{
  "best_global_step": 3050000,
  "best_metric": 0.005603602156043053,
  "best_model_checkpoint": "/mnt/local/plwn-semantic-embeddings/EuroBERT-610m/biencoder/20250812_021454_embedder_sentsplit_train-0.93/checkpoint-3050000",
  "epoch": 5.0,
  "eval_steps": 50000,
  "global_step": 3055260,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 3.273043865333883e-05,
      "grad_norm": 177.5955810546875,
      "learning_rate": 1e-08,
      "loss": 0.5265,
      "step": 20
    },
    {
      "epoch": 6.546087730667766e-05,
      "grad_norm": 167.79440307617188,
      "learning_rate": 2e-08,
      "loss": 0.4928,
      "step": 40
    },
    {
      "epoch": 9.81913159600165e-05,
      "grad_norm": 181.1426544189453,
      "learning_rate": 3.0000000000000004e-08,
      "loss": 0.5125,
      "step": 60
    },
    {
      "epoch": 0.00013092175461335533,
      "grad_norm": 46.77064895629883,
      "learning_rate": 4e-08,
      "loss": 0.5041,
      "step": 80
    },
    {
      "epoch": 0.00016365219326669415,
      "grad_norm": 87.8071517944336,
      "learning_rate": 5.0000000000000004e-08,
      "loss": 0.494,
      "step": 100
    },
    {
      "epoch": 0.000196382631920033,
      "grad_norm": 83.7109375,
      "learning_rate": 6.000000000000001e-08,
      "loss": 0.4941,
      "step": 120
    },
    {
      "epoch": 0.00022911307057337183,
      "grad_norm": 157.60247802734375,
      "learning_rate": 7e-08,
      "loss": 0.4654,
      "step": 140
    },
    {
      "epoch": 0.00026184350922671065,
      "grad_norm": 76.8175277709961,
      "learning_rate": 8e-08,
      "loss": 0.4564,
      "step": 160
    },
    {
      "epoch": 0.0002945739478800495,
      "grad_norm": 290.5692138671875,
      "learning_rate": 9e-08,
      "loss": 0.4005,
      "step": 180
    },
    {
      "epoch": 0.0003273043865333883,
      "grad_norm": 48.54642868041992,
      "learning_rate": 1.0000000000000001e-07,
      "loss": 0.4196,
      "step": 200
    },
    {
      "epoch": 0.0003600348251867272,
      "grad_norm": 89.19824981689453,
      "learning_rate": 1.1e-07,
      "loss": 0.3912,
      "step": 220
    },
    {
      "epoch": 0.000392765263840066,
      "grad_norm": 62.10696792602539,
      "learning_rate": 1.2000000000000002e-07,
      "loss": 0.3819,
      "step": 240
    },
    {
      "epoch": 0.00042549570249340483,
      "grad_norm": 87.84744262695312,
      "learning_rate": 1.3e-07,
      "loss": 0.3628,
      "step": 260
    },
    {
      "epoch": 0.00045822614114674365,
      "grad_norm": 220.0338592529297,
      "learning_rate": 1.4e-07,
      "loss": 0.3447,
      "step": 280
    },
    {
      "epoch": 0.0004909565798000825,
      "grad_norm": 68.86678314208984,
      "learning_rate": 1.5000000000000002e-07,
      "loss": 0.3672,
      "step": 300
    },
    {
      "epoch": 0.0005236870184534213,
      "grad_norm": 133.0181884765625,
      "learning_rate": 1.6e-07,
      "loss": 0.3623,
      "step": 320
    },
    {
      "epoch": 0.0005564174571067601,
      "grad_norm": 120.92203521728516,
      "learning_rate": 1.7000000000000001e-07,
      "loss": 0.3375,
      "step": 340
    },
    {
      "epoch": 0.000589147895760099,
      "grad_norm": 57.033084869384766,
      "learning_rate": 1.8e-07,
      "loss": 0.3541,
      "step": 360
    },
    {
      "epoch": 0.0006218783344134378,
      "grad_norm": 93.61563110351562,
      "learning_rate": 1.9e-07,
      "loss": 0.323,
      "step": 380
    },
    {
      "epoch": 0.0006546087730667766,
      "grad_norm": 51.717491149902344,
      "learning_rate": 2.0000000000000002e-07,
      "loss": 0.3465,
      "step": 400
    },
    {
      "epoch": 0.0006873392117201154,
      "grad_norm": 57.98363494873047,
      "learning_rate": 2.1000000000000003e-07,
      "loss": 0.3246,
      "step": 420
    },
    {
      "epoch": 0.0007200696503734544,
      "grad_norm": 98.5430679321289,
      "learning_rate": 2.2e-07,
      "loss": 0.3272,
      "step": 440
    },
    {
      "epoch": 0.0007528000890267932,
      "grad_norm": 82.47821044921875,
      "learning_rate": 2.3000000000000002e-07,
      "loss": 0.3379,
      "step": 460
    },
    {
      "epoch": 0.000785530527680132,
      "grad_norm": 208.1881866455078,
      "learning_rate": 2.4000000000000003e-07,
      "loss": 0.3197,
      "step": 480
    },
    {
      "epoch": 0.0008182609663334708,
      "grad_norm": 74.90049743652344,
      "learning_rate": 2.5000000000000004e-07,
      "loss": 0.3335,
      "step": 500
    },
    {
      "epoch": 0.0008509914049868097,
      "grad_norm": 188.8548583984375,
      "learning_rate": 2.6e-07,
      "loss": 0.3299,
      "step": 520
    },
    {
      "epoch": 0.0008837218436401485,
      "grad_norm": 81.29654693603516,
      "learning_rate": 2.7e-07,
      "loss": 0.3371,
      "step": 540
    },
    {
      "epoch": 0.0009164522822934873,
      "grad_norm": 48.546058654785156,
      "learning_rate": 2.8e-07,
      "loss": 0.3169,
      "step": 560
    },
    {
      "epoch": 0.0009491827209468261,
      "grad_norm": 208.53733825683594,
      "learning_rate": 2.9000000000000003e-07,
      "loss": 0.3043,
      "step": 580
    },
    {
      "epoch": 0.000981913159600165,
      "grad_norm": 283.4526672363281,
      "learning_rate": 3.0000000000000004e-07,
      "loss": 0.3008,
      "step": 600
    },
    {
      "epoch": 0.0010146435982535038,
      "grad_norm": 120.55583190917969,
      "learning_rate": 3.1000000000000005e-07,
      "loss": 0.2892,
      "step": 620
    },
    {
      "epoch": 0.0010473740369068426,
      "grad_norm": 41.825340270996094,
      "learning_rate": 3.2e-07,
      "loss": 0.2833,
      "step": 640
    },
    {
      "epoch": 0.0010801044755601814,
      "grad_norm": 61.72878646850586,
      "learning_rate": 3.3e-07,
      "loss": 0.2858,
      "step": 660
    },
    {
      "epoch": 0.0011128349142135203,
      "grad_norm": 147.3242645263672,
      "learning_rate": 3.4000000000000003e-07,
      "loss": 0.3195,
      "step": 680
    },
    {
      "epoch": 0.001145565352866859,
      "grad_norm": 72.35663604736328,
      "learning_rate": 3.5000000000000004e-07,
      "loss": 0.2878,
      "step": 700
    },
    {
      "epoch": 0.001178295791520198,
      "grad_norm": 44.211143493652344,
      "learning_rate": 3.6e-07,
      "loss": 0.2908,
      "step": 720
    },
    {
      "epoch": 0.0012110262301735367,
      "grad_norm": 313.6590576171875,
      "learning_rate": 3.7e-07,
      "loss": 0.2839,
      "step": 740
    },
    {
      "epoch": 0.0012437566688268756,
      "grad_norm": 152.26417541503906,
      "learning_rate": 3.8e-07,
      "loss": 0.2785,
      "step": 760
    },
    {
      "epoch": 0.0012764871074802144,
      "grad_norm": 499.232666015625,
      "learning_rate": 3.9e-07,
      "loss": 0.2685,
      "step": 780
    },
    {
      "epoch": 0.0013092175461335532,
      "grad_norm": 207.45068359375,
      "learning_rate": 4.0000000000000003e-07,
      "loss": 0.3012,
      "step": 800
    },
    {
      "epoch": 0.001341947984786892,
      "grad_norm": 22.015016555786133,
      "learning_rate": 4.1000000000000004e-07,
      "loss": 0.2789,
      "step": 820
    },
    {
      "epoch": 0.0013746784234402309,
      "grad_norm": 29.9649600982666,
      "learning_rate": 4.2000000000000006e-07,
      "loss": 0.2577,
      "step": 840
    },
    {
      "epoch": 0.0014074088620935697,
      "grad_norm": 54.445457458496094,
      "learning_rate": 4.3e-07,
      "loss": 0.2904,
      "step": 860
    },
    {
      "epoch": 0.0014401393007469087,
      "grad_norm": 310.27935791015625,
      "learning_rate": 4.4e-07,
      "loss": 0.2922,
      "step": 880
    },
    {
      "epoch": 0.0014728697394002475,
      "grad_norm": 157.69140625,
      "learning_rate": 4.5000000000000003e-07,
      "loss": 0.2844,
      "step": 900
    },
    {
      "epoch": 0.0015056001780535864,
      "grad_norm": 45.58254623413086,
      "learning_rate": 4.6000000000000004e-07,
      "loss": 0.2765,
      "step": 920
    },
    {
      "epoch": 0.0015383306167069252,
      "grad_norm": 419.19189453125,
      "learning_rate": 4.7000000000000005e-07,
      "loss": 0.2518,
      "step": 940
    },
    {
      "epoch": 0.001571061055360264,
      "grad_norm": 33.34440994262695,
      "learning_rate": 4.800000000000001e-07,
      "loss": 0.2835,
      "step": 960
    },
    {
      "epoch": 0.0016037914940136028,
      "grad_norm": 138.30210876464844,
      "learning_rate": 4.900000000000001e-07,
      "loss": 0.2605,
      "step": 980
    },
    {
      "epoch": 0.0016365219326669417,
      "grad_norm": 64.69876861572266,
      "learning_rate": 5.000000000000001e-07,
      "loss": 0.2727,
      "step": 1000
    },
    {
      "epoch": 0.0016692523713202805,
      "grad_norm": 86.94474029541016,
      "learning_rate": 5.1e-07,
      "loss": 0.2678,
      "step": 1020
    },
    {
      "epoch": 0.0017019828099736193,
      "grad_norm": 82.64227294921875,
      "learning_rate": 5.2e-07,
      "loss": 0.2686,
      "step": 1040
    },
    {
      "epoch": 0.0017347132486269581,
      "grad_norm": 212.56069946289062,
      "learning_rate": 5.3e-07,
      "loss": 0.2532,
      "step": 1060
    },
    {
      "epoch": 0.001767443687280297,
      "grad_norm": 50.38601303100586,
      "learning_rate": 5.4e-07,
      "loss": 0.2494,
      "step": 1080
    },
    {
      "epoch": 0.0018001741259336358,
      "grad_norm": 39.91426086425781,
      "learning_rate": 5.5e-07,
      "loss": 0.2461,
      "step": 1100
    },
    {
      "epoch": 0.0018329045645869746,
      "grad_norm": 41.70036697387695,
      "learning_rate": 5.6e-07,
      "loss": 0.2762,
      "step": 1120
    },
    {
      "epoch": 0.0018656350032403134,
      "grad_norm": 118.65033721923828,
      "learning_rate": 5.7e-07,
      "loss": 0.2548,
      "step": 1140
    },
    {
      "epoch": 0.0018983654418936523,
      "grad_norm": 57.50661087036133,
      "learning_rate": 5.800000000000001e-07,
      "loss": 0.2711,
      "step": 1160
    },
    {
      "epoch": 0.001931095880546991,
      "grad_norm": 301.2096252441406,
      "learning_rate": 5.900000000000001e-07,
      "loss": 0.2442,
      "step": 1180
    },
    {
      "epoch": 0.00196382631920033,
      "grad_norm": 36.78187561035156,
      "learning_rate": 6.000000000000001e-07,
      "loss": 0.2289,
      "step": 1200
    },
    {
      "epoch": 0.0019965567578536687,
      "grad_norm": 556.0841674804688,
      "learning_rate": 6.100000000000001e-07,
      "loss": 0.2597,
      "step": 1220
    },
    {
      "epoch": 0.0020292871965070076,
      "grad_norm": 11.620940208435059,
      "learning_rate": 6.200000000000001e-07,
      "loss": 0.2456,
      "step": 1240
    },
    {
      "epoch": 0.0020620176351603464,
      "grad_norm": 24.293729782104492,
      "learning_rate": 6.3e-07,
      "loss": 0.2171,
      "step": 1260
    },
    {
      "epoch": 0.002094748073813685,
      "grad_norm": 66.85969543457031,
      "learning_rate": 6.4e-07,
      "loss": 0.2179,
      "step": 1280
    },
    {
      "epoch": 0.002127478512467024,
      "grad_norm": 44.2943229675293,
      "learning_rate": 6.5e-07,
      "loss": 0.2134,
      "step": 1300
    },
    {
      "epoch": 0.002160208951120363,
      "grad_norm": 14.559679985046387,
      "learning_rate": 6.6e-07,
      "loss": 0.2355,
      "step": 1320
    },
    {
      "epoch": 0.0021929393897737017,
      "grad_norm": 157.42269897460938,
      "learning_rate": 6.7e-07,
      "loss": 0.2338,
      "step": 1340
    },
    {
      "epoch": 0.0022256698284270405,
      "grad_norm": 51.4877815246582,
      "learning_rate": 6.800000000000001e-07,
      "loss": 0.2267,
      "step": 1360
    },
    {
      "epoch": 0.0022584002670803793,
      "grad_norm": 44.06613540649414,
      "learning_rate": 6.900000000000001e-07,
      "loss": 0.2506,
      "step": 1380
    },
    {
      "epoch": 0.002291130705733718,
      "grad_norm": 206.08453369140625,
      "learning_rate": 7.000000000000001e-07,
      "loss": 0.2417,
      "step": 1400
    },
    {
      "epoch": 0.002323861144387057,
      "grad_norm": 140.97723388671875,
      "learning_rate": 7.1e-07,
      "loss": 0.2325,
      "step": 1420
    },
    {
      "epoch": 0.002356591583040396,
      "grad_norm": 37.57798385620117,
      "learning_rate": 7.2e-07,
      "loss": 0.1999,
      "step": 1440
    },
    {
      "epoch": 0.0023893220216937346,
      "grad_norm": 39.57868957519531,
      "learning_rate": 7.3e-07,
      "loss": 0.2377,
      "step": 1460
    },
    {
      "epoch": 0.0024220524603470735,
      "grad_norm": 42.58829116821289,
      "learning_rate": 7.4e-07,
      "loss": 0.2329,
      "step": 1480
    },
    {
      "epoch": 0.0024547828990004123,
      "grad_norm": 38.82683563232422,
      "learning_rate": 7.5e-07,
      "loss": 0.224,
      "step": 1500
    },
    {
      "epoch": 0.002487513337653751,
      "grad_norm": 71.78076934814453,
      "learning_rate": 7.6e-07,
      "loss": 0.1997,
      "step": 1520
    },
    {
      "epoch": 0.00252024377630709,
      "grad_norm": 68.64315032958984,
      "learning_rate": 7.7e-07,
      "loss": 0.2195,
      "step": 1540
    },
    {
      "epoch": 0.0025529742149604288,
      "grad_norm": 48.78083801269531,
      "learning_rate": 7.8e-07,
      "loss": 0.2265,
      "step": 1560
    },
    {
      "epoch": 0.0025857046536137676,
      "grad_norm": 12.177969932556152,
      "learning_rate": 7.900000000000001e-07,
      "loss": 0.2215,
      "step": 1580
    },
    {
      "epoch": 0.0026184350922671064,
      "grad_norm": 91.64373779296875,
      "learning_rate": 8.000000000000001e-07,
      "loss": 0.2353,
      "step": 1600
    },
    {
      "epoch": 0.0026511655309204452,
      "grad_norm": 67.97624969482422,
      "learning_rate": 8.100000000000001e-07,
      "loss": 0.2304,
      "step": 1620
    },
    {
      "epoch": 0.002683895969573784,
      "grad_norm": 68.41759490966797,
      "learning_rate": 8.200000000000001e-07,
      "loss": 0.2096,
      "step": 1640
    },
    {
      "epoch": 0.002716626408227123,
      "grad_norm": 44.39082336425781,
      "learning_rate": 8.300000000000001e-07,
      "loss": 0.1953,
      "step": 1660
    },
    {
      "epoch": 0.0027493568468804617,
      "grad_norm": 40.78596496582031,
      "learning_rate": 8.400000000000001e-07,
      "loss": 0.2222,
      "step": 1680
    },
    {
      "epoch": 0.0027820872855338005,
      "grad_norm": 19.19767189025879,
      "learning_rate": 8.500000000000001e-07,
      "loss": 0.2171,
      "step": 1700
    },
    {
      "epoch": 0.0028148177241871393,
      "grad_norm": 23.11960220336914,
      "learning_rate": 8.6e-07,
      "loss": 0.1929,
      "step": 1720
    },
    {
      "epoch": 0.002847548162840478,
      "grad_norm": 30.225969314575195,
      "learning_rate": 8.7e-07,
      "loss": 0.2237,
      "step": 1740
    },
    {
      "epoch": 0.0028802786014938174,
      "grad_norm": 43.64992141723633,
      "learning_rate": 8.8e-07,
      "loss": 0.1992,
      "step": 1760
    },
    {
      "epoch": 0.0029130090401471563,
      "grad_norm": 50.762977600097656,
      "learning_rate": 8.900000000000001e-07,
      "loss": 0.1952,
      "step": 1780
    },
    {
      "epoch": 0.002945739478800495,
      "grad_norm": 35.92936706542969,
      "learning_rate": 9.000000000000001e-07,
      "loss": 0.202,
      "step": 1800
    },
    {
      "epoch": 0.002978469917453834,
      "grad_norm": 16.176042556762695,
      "learning_rate": 9.100000000000001e-07,
      "loss": 0.1863,
      "step": 1820
    },
    {
      "epoch": 0.0030112003561071727,
      "grad_norm": 22.352445602416992,
      "learning_rate": 9.200000000000001e-07,
      "loss": 0.226,
      "step": 1840
    },
    {
      "epoch": 0.0030439307947605116,
      "grad_norm": 43.315589904785156,
      "learning_rate": 9.300000000000001e-07,
      "loss": 0.2036,
      "step": 1860
    },
    {
      "epoch": 0.0030766612334138504,
      "grad_norm": 15.333244323730469,
      "learning_rate": 9.400000000000001e-07,
      "loss": 0.1937,
      "step": 1880
    },
    {
      "epoch": 0.003109391672067189,
      "grad_norm": 29.24872589111328,
      "learning_rate": 9.500000000000001e-07,
      "loss": 0.1914,
      "step": 1900
    },
    {
      "epoch": 0.003142122110720528,
      "grad_norm": 114.98567962646484,
      "learning_rate": 9.600000000000001e-07,
      "loss": 0.2225,
      "step": 1920
    },
    {
      "epoch": 0.003174852549373867,
      "grad_norm": 89.80290222167969,
      "learning_rate": 9.7e-07,
      "loss": 0.2169,
      "step": 1940
    },
    {
      "epoch": 0.0032075829880272057,
      "grad_norm": 37.363197326660156,
      "learning_rate": 9.800000000000001e-07,
      "loss": 0.1971,
      "step": 1960
    },
    {
      "epoch": 0.0032403134266805445,
      "grad_norm": 9.080795288085938,
      "learning_rate": 9.9e-07,
      "loss": 0.1743,
      "step": 1980
    },
    {
      "epoch": 0.0032730438653338833,
      "grad_norm": 11.560807228088379,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 0.2135,
      "step": 2000
    },
    {
      "epoch": 0.003305774303987222,
      "grad_norm": 26.71611213684082,
      "learning_rate": 1.01e-06,
      "loss": 0.1904,
      "step": 2020
    },
    {
      "epoch": 0.003338504742640561,
      "grad_norm": 29.841007232666016,
      "learning_rate": 1.02e-06,
      "loss": 0.1887,
      "step": 2040
    },
    {
      "epoch": 0.0033712351812939,
      "grad_norm": 38.70259475708008,
      "learning_rate": 1.03e-06,
      "loss": 0.2245,
      "step": 2060
    },
    {
      "epoch": 0.0034039656199472386,
      "grad_norm": 16.649946212768555,
      "learning_rate": 1.04e-06,
      "loss": 0.1888,
      "step": 2080
    },
    {
      "epoch": 0.0034366960586005775,
      "grad_norm": 14.77779483795166,
      "learning_rate": 1.0500000000000001e-06,
      "loss": 0.1985,
      "step": 2100
    },
    {
      "epoch": 0.0034694264972539163,
      "grad_norm": 49.8078498840332,
      "learning_rate": 1.06e-06,
      "loss": 0.1878,
      "step": 2120
    },
    {
      "epoch": 0.003502156935907255,
      "grad_norm": 22.485061645507812,
      "learning_rate": 1.0700000000000001e-06,
      "loss": 0.2267,
      "step": 2140
    },
    {
      "epoch": 0.003534887374560594,
      "grad_norm": 30.685691833496094,
      "learning_rate": 1.08e-06,
      "loss": 0.2027,
      "step": 2160
    },
    {
      "epoch": 0.0035676178132139327,
      "grad_norm": 102.04566192626953,
      "learning_rate": 1.0900000000000002e-06,
      "loss": 0.1868,
      "step": 2180
    },
    {
      "epoch": 0.0036003482518672716,
      "grad_norm": 36.85950469970703,
      "learning_rate": 1.1e-06,
      "loss": 0.1958,
      "step": 2200
    },
    {
      "epoch": 0.0036330786905206104,
      "grad_norm": 57.821136474609375,
      "learning_rate": 1.1100000000000002e-06,
      "loss": 0.169,
      "step": 2220
    },
    {
      "epoch": 0.0036658091291739492,
      "grad_norm": 35.261573791503906,
      "learning_rate": 1.12e-06,
      "loss": 0.1915,
      "step": 2240
    },
    {
      "epoch": 0.003698539567827288,
      "grad_norm": 27.701196670532227,
      "learning_rate": 1.1300000000000002e-06,
      "loss": 0.1819,
      "step": 2260
    },
    {
      "epoch": 0.003731270006480627,
      "grad_norm": 20.537811279296875,
      "learning_rate": 1.14e-06,
      "loss": 0.1712,
      "step": 2280
    },
    {
      "epoch": 0.0037640004451339657,
      "grad_norm": 8.289626121520996,
      "learning_rate": 1.1500000000000002e-06,
      "loss": 0.1692,
      "step": 2300
    },
    {
      "epoch": 0.0037967308837873045,
      "grad_norm": 17.173763275146484,
      "learning_rate": 1.1600000000000001e-06,
      "loss": 0.1643,
      "step": 2320
    },
    {
      "epoch": 0.0038294613224406433,
      "grad_norm": 23.519763946533203,
      "learning_rate": 1.1700000000000002e-06,
      "loss": 0.1854,
      "step": 2340
    },
    {
      "epoch": 0.003862191761093982,
      "grad_norm": 148.3638458251953,
      "learning_rate": 1.1800000000000001e-06,
      "loss": 0.1954,
      "step": 2360
    },
    {
      "epoch": 0.003894922199747321,
      "grad_norm": 91.76126861572266,
      "learning_rate": 1.19e-06,
      "loss": 0.2121,
      "step": 2380
    },
    {
      "epoch": 0.00392765263840066,
      "grad_norm": 32.927276611328125,
      "learning_rate": 1.2000000000000002e-06,
      "loss": 0.1899,
      "step": 2400
    },
    {
      "epoch": 0.003960383077053999,
      "grad_norm": 13.456506729125977,
      "learning_rate": 1.21e-06,
      "loss": 0.1851,
      "step": 2420
    },
    {
      "epoch": 0.0039931135157073375,
      "grad_norm": 32.03644561767578,
      "learning_rate": 1.2200000000000002e-06,
      "loss": 0.1672,
      "step": 2440
    },
    {
      "epoch": 0.004025843954360677,
      "grad_norm": 23.184246063232422,
      "learning_rate": 1.23e-06,
      "loss": 0.1787,
      "step": 2460
    },
    {
      "epoch": 0.004058574393014015,
      "grad_norm": 10.799433708190918,
      "learning_rate": 1.2400000000000002e-06,
      "loss": 0.1758,
      "step": 2480
    },
    {
      "epoch": 0.004091304831667354,
      "grad_norm": 26.062671661376953,
      "learning_rate": 1.25e-06,
      "loss": 0.2032,
      "step": 2500
    },
    {
      "epoch": 0.004124035270320693,
      "grad_norm": 21.16802215576172,
      "learning_rate": 1.26e-06,
      "loss": 0.1996,
      "step": 2520
    },
    {
      "epoch": 0.004156765708974032,
      "grad_norm": 61.10593795776367,
      "learning_rate": 1.2700000000000001e-06,
      "loss": 0.1803,
      "step": 2540
    },
    {
      "epoch": 0.00418949614762737,
      "grad_norm": 16.791866302490234,
      "learning_rate": 1.28e-06,
      "loss": 0.1852,
      "step": 2560
    },
    {
      "epoch": 0.00422222658628071,
      "grad_norm": 53.38053894042969,
      "learning_rate": 1.2900000000000001e-06,
      "loss": 0.1917,
      "step": 2580
    },
    {
      "epoch": 0.004254957024934048,
      "grad_norm": 31.24004364013672,
      "learning_rate": 1.3e-06,
      "loss": 0.1891,
      "step": 2600
    },
    {
      "epoch": 0.004287687463587387,
      "grad_norm": 17.898347854614258,
      "learning_rate": 1.3100000000000002e-06,
      "loss": 0.2058,
      "step": 2620
    },
    {
      "epoch": 0.004320417902240726,
      "grad_norm": 98.2292251586914,
      "learning_rate": 1.32e-06,
      "loss": 0.2012,
      "step": 2640
    },
    {
      "epoch": 0.004353148340894065,
      "grad_norm": 17.13984489440918,
      "learning_rate": 1.3300000000000002e-06,
      "loss": 0.1798,
      "step": 2660
    },
    {
      "epoch": 0.004385878779547403,
      "grad_norm": 21.41046142578125,
      "learning_rate": 1.34e-06,
      "loss": 0.1886,
      "step": 2680
    },
    {
      "epoch": 0.004418609218200743,
      "grad_norm": 42.748687744140625,
      "learning_rate": 1.3500000000000002e-06,
      "loss": 0.1674,
      "step": 2700
    },
    {
      "epoch": 0.004451339656854081,
      "grad_norm": 49.389400482177734,
      "learning_rate": 1.3600000000000001e-06,
      "loss": 0.1693,
      "step": 2720
    },
    {
      "epoch": 0.00448407009550742,
      "grad_norm": 50.0696907043457,
      "learning_rate": 1.3700000000000002e-06,
      "loss": 0.1756,
      "step": 2740
    },
    {
      "epoch": 0.004516800534160759,
      "grad_norm": 81.89443969726562,
      "learning_rate": 1.3800000000000001e-06,
      "loss": 0.1846,
      "step": 2760
    },
    {
      "epoch": 0.004549530972814098,
      "grad_norm": 15.481436729431152,
      "learning_rate": 1.3900000000000002e-06,
      "loss": 0.1737,
      "step": 2780
    },
    {
      "epoch": 0.004582261411467436,
      "grad_norm": 192.96337890625,
      "learning_rate": 1.4000000000000001e-06,
      "loss": 0.1818,
      "step": 2800
    },
    {
      "epoch": 0.004614991850120776,
      "grad_norm": 38.48305130004883,
      "learning_rate": 1.41e-06,
      "loss": 0.1639,
      "step": 2820
    },
    {
      "epoch": 0.004647722288774114,
      "grad_norm": 57.82422637939453,
      "learning_rate": 1.42e-06,
      "loss": 0.1975,
      "step": 2840
    },
    {
      "epoch": 0.004680452727427453,
      "grad_norm": 33.834083557128906,
      "learning_rate": 1.43e-06,
      "loss": 0.1534,
      "step": 2860
    },
    {
      "epoch": 0.004713183166080792,
      "grad_norm": 15.01107406616211,
      "learning_rate": 1.44e-06,
      "loss": 0.1876,
      "step": 2880
    },
    {
      "epoch": 0.004745913604734131,
      "grad_norm": 46.59115982055664,
      "learning_rate": 1.45e-06,
      "loss": 0.1645,
      "step": 2900
    },
    {
      "epoch": 0.004778644043387469,
      "grad_norm": 43.15562057495117,
      "learning_rate": 1.46e-06,
      "loss": 0.1841,
      "step": 2920
    },
    {
      "epoch": 0.0048113744820408085,
      "grad_norm": 31.678932189941406,
      "learning_rate": 1.4700000000000001e-06,
      "loss": 0.166,
      "step": 2940
    },
    {
      "epoch": 0.004844104920694147,
      "grad_norm": 32.060367584228516,
      "learning_rate": 1.48e-06,
      "loss": 0.1793,
      "step": 2960
    },
    {
      "epoch": 0.004876835359347486,
      "grad_norm": 10.942450523376465,
      "learning_rate": 1.4900000000000001e-06,
      "loss": 0.1592,
      "step": 2980
    },
    {
      "epoch": 0.0049095657980008246,
      "grad_norm": 78.21478271484375,
      "learning_rate": 1.5e-06,
      "loss": 0.1728,
      "step": 3000
    },
    {
      "epoch": 0.004942296236654164,
      "grad_norm": 11.852696418762207,
      "learning_rate": 1.5100000000000002e-06,
      "loss": 0.1612,
      "step": 3020
    },
    {
      "epoch": 0.004975026675307502,
      "grad_norm": 23.905858993530273,
      "learning_rate": 1.52e-06,
      "loss": 0.1703,
      "step": 3040
    },
    {
      "epoch": 0.0050077571139608415,
      "grad_norm": 61.07943344116211,
      "learning_rate": 1.5300000000000002e-06,
      "loss": 0.1718,
      "step": 3060
    },
    {
      "epoch": 0.00504048755261418,
      "grad_norm": 22.057945251464844,
      "learning_rate": 1.54e-06,
      "loss": 0.1601,
      "step": 3080
    },
    {
      "epoch": 0.005073217991267519,
      "grad_norm": 14.228303909301758,
      "learning_rate": 1.5500000000000002e-06,
      "loss": 0.1828,
      "step": 3100
    },
    {
      "epoch": 0.0051059484299208575,
      "grad_norm": 16.013357162475586,
      "learning_rate": 1.56e-06,
      "loss": 0.1666,
      "step": 3120
    },
    {
      "epoch": 0.005138678868574197,
      "grad_norm": 267.1432189941406,
      "learning_rate": 1.5700000000000002e-06,
      "loss": 0.1599,
      "step": 3140
    },
    {
      "epoch": 0.005171409307227535,
      "grad_norm": 16.951663970947266,
      "learning_rate": 1.5800000000000001e-06,
      "loss": 0.1484,
      "step": 3160
    },
    {
      "epoch": 0.005204139745880874,
      "grad_norm": 27.55092430114746,
      "learning_rate": 1.5900000000000002e-06,
      "loss": 0.1761,
      "step": 3180
    },
    {
      "epoch": 0.005236870184534213,
      "grad_norm": 15.055983543395996,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 0.1692,
      "step": 3200
    },
    {
      "epoch": 0.005269600623187552,
      "grad_norm": 23.394210815429688,
      "learning_rate": 1.6100000000000003e-06,
      "loss": 0.149,
      "step": 3220
    },
    {
      "epoch": 0.0053023310618408905,
      "grad_norm": 21.364795684814453,
      "learning_rate": 1.6200000000000002e-06,
      "loss": 0.1637,
      "step": 3240
    },
    {
      "epoch": 0.00533506150049423,
      "grad_norm": 24.654939651489258,
      "learning_rate": 1.6300000000000003e-06,
      "loss": 0.1607,
      "step": 3260
    },
    {
      "epoch": 0.005367791939147568,
      "grad_norm": 19.460132598876953,
      "learning_rate": 1.6400000000000002e-06,
      "loss": 0.1644,
      "step": 3280
    },
    {
      "epoch": 0.005400522377800907,
      "grad_norm": 32.79641342163086,
      "learning_rate": 1.6500000000000003e-06,
      "loss": 0.153,
      "step": 3300
    },
    {
      "epoch": 0.005433252816454246,
      "grad_norm": 27.66579818725586,
      "learning_rate": 1.6600000000000002e-06,
      "loss": 0.1511,
      "step": 3320
    },
    {
      "epoch": 0.005465983255107585,
      "grad_norm": 12.338621139526367,
      "learning_rate": 1.6700000000000003e-06,
      "loss": 0.1446,
      "step": 3340
    },
    {
      "epoch": 0.005498713693760923,
      "grad_norm": 34.25360870361328,
      "learning_rate": 1.6800000000000002e-06,
      "loss": 0.1644,
      "step": 3360
    },
    {
      "epoch": 0.005531444132414263,
      "grad_norm": 17.44060516357422,
      "learning_rate": 1.6900000000000003e-06,
      "loss": 0.1747,
      "step": 3380
    },
    {
      "epoch": 0.005564174571067601,
      "grad_norm": 10.509142875671387,
      "learning_rate": 1.7000000000000002e-06,
      "loss": 0.1496,
      "step": 3400
    },
    {
      "epoch": 0.00559690500972094,
      "grad_norm": 11.333220481872559,
      "learning_rate": 1.7100000000000004e-06,
      "loss": 0.1668,
      "step": 3420
    },
    {
      "epoch": 0.005629635448374279,
      "grad_norm": 12.434348106384277,
      "learning_rate": 1.72e-06,
      "loss": 0.1624,
      "step": 3440
    },
    {
      "epoch": 0.005662365887027618,
      "grad_norm": 14.072142601013184,
      "learning_rate": 1.73e-06,
      "loss": 0.1534,
      "step": 3460
    },
    {
      "epoch": 0.005695096325680956,
      "grad_norm": 14.113509178161621,
      "learning_rate": 1.74e-06,
      "loss": 0.1615,
      "step": 3480
    },
    {
      "epoch": 0.005727826764334296,
      "grad_norm": 13.418909072875977,
      "learning_rate": 1.75e-06,
      "loss": 0.1784,
      "step": 3500
    },
    {
      "epoch": 0.005760557202987635,
      "grad_norm": 80.36198425292969,
      "learning_rate": 1.76e-06,
      "loss": 0.1554,
      "step": 3520
    },
    {
      "epoch": 0.005793287641640973,
      "grad_norm": 36.682926177978516,
      "learning_rate": 1.77e-06,
      "loss": 0.1856,
      "step": 3540
    },
    {
      "epoch": 0.0058260180802943125,
      "grad_norm": 11.906309127807617,
      "learning_rate": 1.7800000000000001e-06,
      "loss": 0.1764,
      "step": 3560
    },
    {
      "epoch": 0.005858748518947651,
      "grad_norm": 249.68557739257812,
      "learning_rate": 1.79e-06,
      "loss": 0.1624,
      "step": 3580
    },
    {
      "epoch": 0.00589147895760099,
      "grad_norm": 25.57176399230957,
      "learning_rate": 1.8000000000000001e-06,
      "loss": 0.1742,
      "step": 3600
    },
    {
      "epoch": 0.0059242093962543286,
      "grad_norm": 22.403268814086914,
      "learning_rate": 1.81e-06,
      "loss": 0.154,
      "step": 3620
    },
    {
      "epoch": 0.005956939834907668,
      "grad_norm": 194.7779541015625,
      "learning_rate": 1.8200000000000002e-06,
      "loss": 0.1692,
      "step": 3640
    },
    {
      "epoch": 0.005989670273561006,
      "grad_norm": 22.815673828125,
      "learning_rate": 1.83e-06,
      "loss": 0.1567,
      "step": 3660
    },
    {
      "epoch": 0.0060224007122143455,
      "grad_norm": 33.51043701171875,
      "learning_rate": 1.8400000000000002e-06,
      "loss": 0.1894,
      "step": 3680
    },
    {
      "epoch": 0.006055131150867684,
      "grad_norm": 24.491291046142578,
      "learning_rate": 1.85e-06,
      "loss": 0.1573,
      "step": 3700
    },
    {
      "epoch": 0.006087861589521023,
      "grad_norm": 15.78678035736084,
      "learning_rate": 1.8600000000000002e-06,
      "loss": 0.1631,
      "step": 3720
    },
    {
      "epoch": 0.0061205920281743615,
      "grad_norm": 29.67047882080078,
      "learning_rate": 1.87e-06,
      "loss": 0.1681,
      "step": 3740
    },
    {
      "epoch": 0.006153322466827701,
      "grad_norm": 30.398723602294922,
      "learning_rate": 1.8800000000000002e-06,
      "loss": 0.1809,
      "step": 3760
    },
    {
      "epoch": 0.006186052905481039,
      "grad_norm": 91.07801818847656,
      "learning_rate": 1.8900000000000001e-06,
      "loss": 0.1637,
      "step": 3780
    },
    {
      "epoch": 0.006218783344134378,
      "grad_norm": 10.125450134277344,
      "learning_rate": 1.9000000000000002e-06,
      "loss": 0.1714,
      "step": 3800
    },
    {
      "epoch": 0.006251513782787717,
      "grad_norm": 10.400639533996582,
      "learning_rate": 1.9100000000000003e-06,
      "loss": 0.1672,
      "step": 3820
    },
    {
      "epoch": 0.006284244221441056,
      "grad_norm": 23.618408203125,
      "learning_rate": 1.9200000000000003e-06,
      "loss": 0.152,
      "step": 3840
    },
    {
      "epoch": 0.0063169746600943944,
      "grad_norm": 176.05877685546875,
      "learning_rate": 1.93e-06,
      "loss": 0.1512,
      "step": 3860
    },
    {
      "epoch": 0.006349705098747734,
      "grad_norm": 26.91118049621582,
      "learning_rate": 1.94e-06,
      "loss": 0.1435,
      "step": 3880
    },
    {
      "epoch": 0.006382435537401072,
      "grad_norm": 36.525535583496094,
      "learning_rate": 1.9500000000000004e-06,
      "loss": 0.1454,
      "step": 3900
    },
    {
      "epoch": 0.006415165976054411,
      "grad_norm": 12.891013145446777,
      "learning_rate": 1.9600000000000003e-06,
      "loss": 0.165,
      "step": 3920
    },
    {
      "epoch": 0.00644789641470775,
      "grad_norm": 17.43115234375,
      "learning_rate": 1.97e-06,
      "loss": 0.1774,
      "step": 3940
    },
    {
      "epoch": 0.006480626853361089,
      "grad_norm": 157.6044921875,
      "learning_rate": 1.98e-06,
      "loss": 0.1481,
      "step": 3960
    },
    {
      "epoch": 0.006513357292014427,
      "grad_norm": 94.08085632324219,
      "learning_rate": 1.9900000000000004e-06,
      "loss": 0.1526,
      "step": 3980
    },
    {
      "epoch": 0.006546087730667767,
      "grad_norm": 17.652626037597656,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.1901,
      "step": 4000
    },
    {
      "epoch": 0.006578818169321105,
      "grad_norm": 174.25926208496094,
      "learning_rate": 2.0100000000000002e-06,
      "loss": 0.1454,
      "step": 4020
    },
    {
      "epoch": 0.006611548607974444,
      "grad_norm": 26.3575382232666,
      "learning_rate": 2.02e-06,
      "loss": 0.1471,
      "step": 4040
    },
    {
      "epoch": 0.006644279046627783,
      "grad_norm": 7.781630516052246,
      "learning_rate": 2.0300000000000005e-06,
      "loss": 0.1554,
      "step": 4060
    },
    {
      "epoch": 0.006677009485281122,
      "grad_norm": 20.270708084106445,
      "learning_rate": 2.04e-06,
      "loss": 0.1499,
      "step": 4080
    },
    {
      "epoch": 0.00670973992393446,
      "grad_norm": 11.047548294067383,
      "learning_rate": 2.05e-06,
      "loss": 0.154,
      "step": 4100
    },
    {
      "epoch": 0.0067424703625878,
      "grad_norm": 10.361346244812012,
      "learning_rate": 2.06e-06,
      "loss": 0.1667,
      "step": 4120
    },
    {
      "epoch": 0.006775200801241138,
      "grad_norm": 19.903369903564453,
      "learning_rate": 2.07e-06,
      "loss": 0.1734,
      "step": 4140
    },
    {
      "epoch": 0.006807931239894477,
      "grad_norm": 17.162296295166016,
      "learning_rate": 2.08e-06,
      "loss": 0.1722,
      "step": 4160
    },
    {
      "epoch": 0.006840661678547816,
      "grad_norm": 69.02982330322266,
      "learning_rate": 2.09e-06,
      "loss": 0.1719,
      "step": 4180
    },
    {
      "epoch": 0.006873392117201155,
      "grad_norm": 9.112802505493164,
      "learning_rate": 2.1000000000000002e-06,
      "loss": 0.1458,
      "step": 4200
    },
    {
      "epoch": 0.006906122555854493,
      "grad_norm": 37.460514068603516,
      "learning_rate": 2.11e-06,
      "loss": 0.1646,
      "step": 4220
    },
    {
      "epoch": 0.0069388529945078326,
      "grad_norm": 53.56019973754883,
      "learning_rate": 2.12e-06,
      "loss": 0.1232,
      "step": 4240
    },
    {
      "epoch": 0.006971583433161171,
      "grad_norm": 19.376672744750977,
      "learning_rate": 2.13e-06,
      "loss": 0.1708,
      "step": 4260
    },
    {
      "epoch": 0.00700431387181451,
      "grad_norm": 50.209068298339844,
      "learning_rate": 2.1400000000000003e-06,
      "loss": 0.1338,
      "step": 4280
    },
    {
      "epoch": 0.007037044310467849,
      "grad_norm": 12.272220611572266,
      "learning_rate": 2.15e-06,
      "loss": 0.1856,
      "step": 4300
    },
    {
      "epoch": 0.007069774749121188,
      "grad_norm": 33.535255432128906,
      "learning_rate": 2.16e-06,
      "loss": 0.1501,
      "step": 4320
    },
    {
      "epoch": 0.007102505187774526,
      "grad_norm": 15.482816696166992,
      "learning_rate": 2.17e-06,
      "loss": 0.1825,
      "step": 4340
    },
    {
      "epoch": 0.0071352356264278655,
      "grad_norm": 39.580326080322266,
      "learning_rate": 2.1800000000000003e-06,
      "loss": 0.1439,
      "step": 4360
    },
    {
      "epoch": 0.007167966065081204,
      "grad_norm": 30.998973846435547,
      "learning_rate": 2.19e-06,
      "loss": 0.1766,
      "step": 4380
    },
    {
      "epoch": 0.007200696503734543,
      "grad_norm": 21.988039016723633,
      "learning_rate": 2.2e-06,
      "loss": 0.1337,
      "step": 4400
    },
    {
      "epoch": 0.0072334269423878815,
      "grad_norm": 9.846911430358887,
      "learning_rate": 2.21e-06,
      "loss": 0.1609,
      "step": 4420
    },
    {
      "epoch": 0.007266157381041221,
      "grad_norm": 15.973969459533691,
      "learning_rate": 2.2200000000000003e-06,
      "loss": 0.1619,
      "step": 4440
    },
    {
      "epoch": 0.007298887819694559,
      "grad_norm": 15.51915168762207,
      "learning_rate": 2.2300000000000002e-06,
      "loss": 0.1708,
      "step": 4460
    },
    {
      "epoch": 0.0073316182583478984,
      "grad_norm": 16.407611846923828,
      "learning_rate": 2.24e-06,
      "loss": 0.1492,
      "step": 4480
    },
    {
      "epoch": 0.007364348697001237,
      "grad_norm": 18.82525062561035,
      "learning_rate": 2.25e-06,
      "loss": 0.1519,
      "step": 4500
    },
    {
      "epoch": 0.007397079135654576,
      "grad_norm": 21.722463607788086,
      "learning_rate": 2.2600000000000004e-06,
      "loss": 0.176,
      "step": 4520
    },
    {
      "epoch": 0.0074298095743079145,
      "grad_norm": 74.31963348388672,
      "learning_rate": 2.2700000000000003e-06,
      "loss": 0.1409,
      "step": 4540
    },
    {
      "epoch": 0.007462540012961254,
      "grad_norm": 11.692567825317383,
      "learning_rate": 2.28e-06,
      "loss": 0.1706,
      "step": 4560
    },
    {
      "epoch": 0.007495270451614592,
      "grad_norm": 22.931732177734375,
      "learning_rate": 2.29e-06,
      "loss": 0.1506,
      "step": 4580
    },
    {
      "epoch": 0.007528000890267931,
      "grad_norm": 9.140167236328125,
      "learning_rate": 2.3000000000000004e-06,
      "loss": 0.1663,
      "step": 4600
    },
    {
      "epoch": 0.00756073132892127,
      "grad_norm": 19.006624221801758,
      "learning_rate": 2.3100000000000003e-06,
      "loss": 0.1586,
      "step": 4620
    },
    {
      "epoch": 0.007593461767574609,
      "grad_norm": 12.76396656036377,
      "learning_rate": 2.3200000000000002e-06,
      "loss": 0.151,
      "step": 4640
    },
    {
      "epoch": 0.007626192206227948,
      "grad_norm": 38.51829147338867,
      "learning_rate": 2.33e-06,
      "loss": 0.1544,
      "step": 4660
    },
    {
      "epoch": 0.007658922644881287,
      "grad_norm": 32.420387268066406,
      "learning_rate": 2.3400000000000005e-06,
      "loss": 0.1739,
      "step": 4680
    },
    {
      "epoch": 0.007691653083534626,
      "grad_norm": 14.01970386505127,
      "learning_rate": 2.35e-06,
      "loss": 0.1481,
      "step": 4700
    },
    {
      "epoch": 0.007724383522187964,
      "grad_norm": 4.670835494995117,
      "learning_rate": 2.3600000000000003e-06,
      "loss": 0.1613,
      "step": 4720
    },
    {
      "epoch": 0.007757113960841304,
      "grad_norm": 10.665949821472168,
      "learning_rate": 2.37e-06,
      "loss": 0.1208,
      "step": 4740
    },
    {
      "epoch": 0.007789844399494642,
      "grad_norm": 31.542280197143555,
      "learning_rate": 2.38e-06,
      "loss": 0.1575,
      "step": 4760
    },
    {
      "epoch": 0.00782257483814798,
      "grad_norm": 10.357207298278809,
      "learning_rate": 2.39e-06,
      "loss": 0.1369,
      "step": 4780
    },
    {
      "epoch": 0.00785530527680132,
      "grad_norm": 12.5222806930542,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 0.1615,
      "step": 4800
    },
    {
      "epoch": 0.007888035715454659,
      "grad_norm": 3.8698947429656982,
      "learning_rate": 2.4100000000000002e-06,
      "loss": 0.1481,
      "step": 4820
    },
    {
      "epoch": 0.007920766154107998,
      "grad_norm": 34.55548858642578,
      "learning_rate": 2.42e-06,
      "loss": 0.1447,
      "step": 4840
    },
    {
      "epoch": 0.007953496592761336,
      "grad_norm": 12.469810485839844,
      "learning_rate": 2.43e-06,
      "loss": 0.1617,
      "step": 4860
    },
    {
      "epoch": 0.007986227031414675,
      "grad_norm": 17.560529708862305,
      "learning_rate": 2.4400000000000004e-06,
      "loss": 0.1151,
      "step": 4880
    },
    {
      "epoch": 0.008018957470068014,
      "grad_norm": 57.80168533325195,
      "learning_rate": 2.4500000000000003e-06,
      "loss": 0.1443,
      "step": 4900
    },
    {
      "epoch": 0.008051687908721353,
      "grad_norm": 10.74612808227539,
      "learning_rate": 2.46e-06,
      "loss": 0.1695,
      "step": 4920
    },
    {
      "epoch": 0.008084418347374691,
      "grad_norm": 18.823705673217773,
      "learning_rate": 2.47e-06,
      "loss": 0.1582,
      "step": 4940
    },
    {
      "epoch": 0.00811714878602803,
      "grad_norm": 17.0759220123291,
      "learning_rate": 2.4800000000000004e-06,
      "loss": 0.1532,
      "step": 4960
    },
    {
      "epoch": 0.00814987922468137,
      "grad_norm": 8.54672622680664,
      "learning_rate": 2.4900000000000003e-06,
      "loss": 0.1402,
      "step": 4980
    },
    {
      "epoch": 0.008182609663334709,
      "grad_norm": 9.997976303100586,
      "learning_rate": 2.5e-06,
      "loss": 0.1494,
      "step": 5000
    },
    {
      "epoch": 0.008215340101988046,
      "grad_norm": 9.800087928771973,
      "learning_rate": 2.51e-06,
      "loss": 0.149,
      "step": 5020
    },
    {
      "epoch": 0.008248070540641386,
      "grad_norm": 53.02509689331055,
      "learning_rate": 2.52e-06,
      "loss": 0.1275,
      "step": 5040
    },
    {
      "epoch": 0.008280800979294725,
      "grad_norm": 24.719099044799805,
      "learning_rate": 2.5300000000000003e-06,
      "loss": 0.1605,
      "step": 5060
    },
    {
      "epoch": 0.008313531417948064,
      "grad_norm": 12.791399955749512,
      "learning_rate": 2.5400000000000002e-06,
      "loss": 0.1605,
      "step": 5080
    },
    {
      "epoch": 0.008346261856601402,
      "grad_norm": 12.22973346710205,
      "learning_rate": 2.55e-06,
      "loss": 0.137,
      "step": 5100
    },
    {
      "epoch": 0.00837899229525474,
      "grad_norm": 11.067790985107422,
      "learning_rate": 2.56e-06,
      "loss": 0.1397,
      "step": 5120
    },
    {
      "epoch": 0.00841172273390808,
      "grad_norm": 7.624326705932617,
      "learning_rate": 2.5700000000000004e-06,
      "loss": 0.1477,
      "step": 5140
    },
    {
      "epoch": 0.00844445317256142,
      "grad_norm": 9.572830200195312,
      "learning_rate": 2.5800000000000003e-06,
      "loss": 0.1236,
      "step": 5160
    },
    {
      "epoch": 0.008477183611214757,
      "grad_norm": 12.320329666137695,
      "learning_rate": 2.59e-06,
      "loss": 0.1265,
      "step": 5180
    },
    {
      "epoch": 0.008509914049868096,
      "grad_norm": 14.99966049194336,
      "learning_rate": 2.6e-06,
      "loss": 0.1572,
      "step": 5200
    },
    {
      "epoch": 0.008542644488521435,
      "grad_norm": 10.513187408447266,
      "learning_rate": 2.6100000000000004e-06,
      "loss": 0.1462,
      "step": 5220
    },
    {
      "epoch": 0.008575374927174775,
      "grad_norm": 24.639205932617188,
      "learning_rate": 2.6200000000000003e-06,
      "loss": 0.1301,
      "step": 5240
    },
    {
      "epoch": 0.008608105365828112,
      "grad_norm": 21.69621467590332,
      "learning_rate": 2.6300000000000002e-06,
      "loss": 0.1547,
      "step": 5260
    },
    {
      "epoch": 0.008640835804481451,
      "grad_norm": 9.776565551757812,
      "learning_rate": 2.64e-06,
      "loss": 0.17,
      "step": 5280
    },
    {
      "epoch": 0.00867356624313479,
      "grad_norm": 13.347136497497559,
      "learning_rate": 2.6500000000000005e-06,
      "loss": 0.1495,
      "step": 5300
    },
    {
      "epoch": 0.00870629668178813,
      "grad_norm": 8.345178604125977,
      "learning_rate": 2.6600000000000004e-06,
      "loss": 0.1265,
      "step": 5320
    },
    {
      "epoch": 0.008739027120441467,
      "grad_norm": 12.945493698120117,
      "learning_rate": 2.6700000000000003e-06,
      "loss": 0.119,
      "step": 5340
    },
    {
      "epoch": 0.008771757559094807,
      "grad_norm": 10.66870403289795,
      "learning_rate": 2.68e-06,
      "loss": 0.1406,
      "step": 5360
    },
    {
      "epoch": 0.008804487997748146,
      "grad_norm": 9.876032829284668,
      "learning_rate": 2.6900000000000005e-06,
      "loss": 0.1616,
      "step": 5380
    },
    {
      "epoch": 0.008837218436401485,
      "grad_norm": 9.085099220275879,
      "learning_rate": 2.7000000000000004e-06,
      "loss": 0.1258,
      "step": 5400
    },
    {
      "epoch": 0.008869948875054823,
      "grad_norm": 19.582984924316406,
      "learning_rate": 2.7100000000000003e-06,
      "loss": 0.1421,
      "step": 5420
    },
    {
      "epoch": 0.008902679313708162,
      "grad_norm": 73.47571563720703,
      "learning_rate": 2.7200000000000002e-06,
      "loss": 0.151,
      "step": 5440
    },
    {
      "epoch": 0.008935409752361501,
      "grad_norm": 7.153264045715332,
      "learning_rate": 2.7300000000000005e-06,
      "loss": 0.1278,
      "step": 5460
    },
    {
      "epoch": 0.00896814019101484,
      "grad_norm": 6.914601802825928,
      "learning_rate": 2.7400000000000004e-06,
      "loss": 0.1406,
      "step": 5480
    },
    {
      "epoch": 0.009000870629668178,
      "grad_norm": 16.58011817932129,
      "learning_rate": 2.7500000000000004e-06,
      "loss": 0.1307,
      "step": 5500
    },
    {
      "epoch": 0.009033601068321517,
      "grad_norm": 57.23251724243164,
      "learning_rate": 2.7600000000000003e-06,
      "loss": 0.149,
      "step": 5520
    },
    {
      "epoch": 0.009066331506974857,
      "grad_norm": 30.26605224609375,
      "learning_rate": 2.7700000000000006e-06,
      "loss": 0.144,
      "step": 5540
    },
    {
      "epoch": 0.009099061945628196,
      "grad_norm": 244.60195922851562,
      "learning_rate": 2.7800000000000005e-06,
      "loss": 0.1741,
      "step": 5560
    },
    {
      "epoch": 0.009131792384281533,
      "grad_norm": 19.998624801635742,
      "learning_rate": 2.7900000000000004e-06,
      "loss": 0.138,
      "step": 5580
    },
    {
      "epoch": 0.009164522822934873,
      "grad_norm": 11.833396911621094,
      "learning_rate": 2.8000000000000003e-06,
      "loss": 0.148,
      "step": 5600
    },
    {
      "epoch": 0.009197253261588212,
      "grad_norm": 19.796232223510742,
      "learning_rate": 2.8100000000000006e-06,
      "loss": 0.1382,
      "step": 5620
    },
    {
      "epoch": 0.009229983700241551,
      "grad_norm": 4.935592174530029,
      "learning_rate": 2.82e-06,
      "loss": 0.1667,
      "step": 5640
    },
    {
      "epoch": 0.009262714138894889,
      "grad_norm": 57.180511474609375,
      "learning_rate": 2.83e-06,
      "loss": 0.1383,
      "step": 5660
    },
    {
      "epoch": 0.009295444577548228,
      "grad_norm": 22.933300018310547,
      "learning_rate": 2.84e-06,
      "loss": 0.1518,
      "step": 5680
    },
    {
      "epoch": 0.009328175016201567,
      "grad_norm": 9.908040046691895,
      "learning_rate": 2.85e-06,
      "loss": 0.1253,
      "step": 5700
    },
    {
      "epoch": 0.009360905454854906,
      "grad_norm": 92.0630111694336,
      "learning_rate": 2.86e-06,
      "loss": 0.1532,
      "step": 5720
    },
    {
      "epoch": 0.009393635893508244,
      "grad_norm": 52.177330017089844,
      "learning_rate": 2.87e-06,
      "loss": 0.1373,
      "step": 5740
    },
    {
      "epoch": 0.009426366332161583,
      "grad_norm": 21.92855453491211,
      "learning_rate": 2.88e-06,
      "loss": 0.1429,
      "step": 5760
    },
    {
      "epoch": 0.009459096770814922,
      "grad_norm": 21.896554946899414,
      "learning_rate": 2.89e-06,
      "loss": 0.1334,
      "step": 5780
    },
    {
      "epoch": 0.009491827209468262,
      "grad_norm": 27.007081985473633,
      "learning_rate": 2.9e-06,
      "loss": 0.1302,
      "step": 5800
    },
    {
      "epoch": 0.0095245576481216,
      "grad_norm": 12.072369575500488,
      "learning_rate": 2.91e-06,
      "loss": 0.1391,
      "step": 5820
    },
    {
      "epoch": 0.009557288086774939,
      "grad_norm": 49.1889533996582,
      "learning_rate": 2.92e-06,
      "loss": 0.145,
      "step": 5840
    },
    {
      "epoch": 0.009590018525428278,
      "grad_norm": 13.1773042678833,
      "learning_rate": 2.93e-06,
      "loss": 0.1327,
      "step": 5860
    },
    {
      "epoch": 0.009622748964081617,
      "grad_norm": 15.273100852966309,
      "learning_rate": 2.9400000000000002e-06,
      "loss": 0.1827,
      "step": 5880
    },
    {
      "epoch": 0.009655479402734956,
      "grad_norm": 13.072859764099121,
      "learning_rate": 2.95e-06,
      "loss": 0.131,
      "step": 5900
    },
    {
      "epoch": 0.009688209841388294,
      "grad_norm": 9.741061210632324,
      "learning_rate": 2.96e-06,
      "loss": 0.153,
      "step": 5920
    },
    {
      "epoch": 0.009720940280041633,
      "grad_norm": 9.73202896118164,
      "learning_rate": 2.97e-06,
      "loss": 0.137,
      "step": 5940
    },
    {
      "epoch": 0.009753670718694972,
      "grad_norm": 42.0189208984375,
      "learning_rate": 2.9800000000000003e-06,
      "loss": 0.1331,
      "step": 5960
    },
    {
      "epoch": 0.009786401157348312,
      "grad_norm": 11.999131202697754,
      "learning_rate": 2.99e-06,
      "loss": 0.1364,
      "step": 5980
    },
    {
      "epoch": 0.009819131596001649,
      "grad_norm": 18.16575050354004,
      "learning_rate": 3e-06,
      "loss": 0.1374,
      "step": 6000
    },
    {
      "epoch": 0.009851862034654988,
      "grad_norm": 5.955016613006592,
      "learning_rate": 3.01e-06,
      "loss": 0.1549,
      "step": 6020
    },
    {
      "epoch": 0.009884592473308328,
      "grad_norm": 11.754526138305664,
      "learning_rate": 3.0200000000000003e-06,
      "loss": 0.1388,
      "step": 6040
    },
    {
      "epoch": 0.009917322911961667,
      "grad_norm": 12.387834548950195,
      "learning_rate": 3.0300000000000002e-06,
      "loss": 0.0936,
      "step": 6060
    },
    {
      "epoch": 0.009950053350615004,
      "grad_norm": 30.531604766845703,
      "learning_rate": 3.04e-06,
      "loss": 0.1341,
      "step": 6080
    },
    {
      "epoch": 0.009982783789268344,
      "grad_norm": 9.018355369567871,
      "learning_rate": 3.05e-06,
      "loss": 0.1234,
      "step": 6100
    },
    {
      "epoch": 0.010015514227921683,
      "grad_norm": 11.648139953613281,
      "learning_rate": 3.0600000000000003e-06,
      "loss": 0.1291,
      "step": 6120
    },
    {
      "epoch": 0.010048244666575022,
      "grad_norm": 19.817808151245117,
      "learning_rate": 3.0700000000000003e-06,
      "loss": 0.1238,
      "step": 6140
    },
    {
      "epoch": 0.01008097510522836,
      "grad_norm": 4.874594688415527,
      "learning_rate": 3.08e-06,
      "loss": 0.1474,
      "step": 6160
    },
    {
      "epoch": 0.010113705543881699,
      "grad_norm": 7.7935614585876465,
      "learning_rate": 3.09e-06,
      "loss": 0.1278,
      "step": 6180
    },
    {
      "epoch": 0.010146435982535038,
      "grad_norm": 85.23173522949219,
      "learning_rate": 3.1000000000000004e-06,
      "loss": 0.1516,
      "step": 6200
    },
    {
      "epoch": 0.010179166421188377,
      "grad_norm": 28.414262771606445,
      "learning_rate": 3.1100000000000003e-06,
      "loss": 0.1347,
      "step": 6220
    },
    {
      "epoch": 0.010211896859841715,
      "grad_norm": 20.338146209716797,
      "learning_rate": 3.12e-06,
      "loss": 0.1454,
      "step": 6240
    },
    {
      "epoch": 0.010244627298495054,
      "grad_norm": 11.687527656555176,
      "learning_rate": 3.13e-06,
      "loss": 0.1622,
      "step": 6260
    },
    {
      "epoch": 0.010277357737148394,
      "grad_norm": 8.104349136352539,
      "learning_rate": 3.1400000000000004e-06,
      "loss": 0.1437,
      "step": 6280
    },
    {
      "epoch": 0.010310088175801733,
      "grad_norm": 9.287030220031738,
      "learning_rate": 3.1500000000000003e-06,
      "loss": 0.1421,
      "step": 6300
    },
    {
      "epoch": 0.01034281861445507,
      "grad_norm": 8.914142608642578,
      "learning_rate": 3.1600000000000002e-06,
      "loss": 0.1466,
      "step": 6320
    },
    {
      "epoch": 0.01037554905310841,
      "grad_norm": 8.511141777038574,
      "learning_rate": 3.17e-06,
      "loss": 0.1437,
      "step": 6340
    },
    {
      "epoch": 0.010408279491761749,
      "grad_norm": 15.90783405303955,
      "learning_rate": 3.1800000000000005e-06,
      "loss": 0.1553,
      "step": 6360
    },
    {
      "epoch": 0.010441009930415088,
      "grad_norm": 12.387250900268555,
      "learning_rate": 3.1900000000000004e-06,
      "loss": 0.1454,
      "step": 6380
    },
    {
      "epoch": 0.010473740369068426,
      "grad_norm": 26.024871826171875,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 0.1452,
      "step": 6400
    },
    {
      "epoch": 0.010506470807721765,
      "grad_norm": 29.02536964416504,
      "learning_rate": 3.21e-06,
      "loss": 0.1218,
      "step": 6420
    },
    {
      "epoch": 0.010539201246375104,
      "grad_norm": 10.880027770996094,
      "learning_rate": 3.2200000000000005e-06,
      "loss": 0.1224,
      "step": 6440
    },
    {
      "epoch": 0.010571931685028443,
      "grad_norm": 7.2482218742370605,
      "learning_rate": 3.2300000000000004e-06,
      "loss": 0.1341,
      "step": 6460
    },
    {
      "epoch": 0.010604662123681781,
      "grad_norm": 10.451786994934082,
      "learning_rate": 3.2400000000000003e-06,
      "loss": 0.139,
      "step": 6480
    },
    {
      "epoch": 0.01063739256233512,
      "grad_norm": 17.248720169067383,
      "learning_rate": 3.2500000000000002e-06,
      "loss": 0.1309,
      "step": 6500
    },
    {
      "epoch": 0.01067012300098846,
      "grad_norm": 10.257393836975098,
      "learning_rate": 3.2600000000000006e-06,
      "loss": 0.1442,
      "step": 6520
    },
    {
      "epoch": 0.010702853439641799,
      "grad_norm": 7.0504889488220215,
      "learning_rate": 3.2700000000000005e-06,
      "loss": 0.1402,
      "step": 6540
    },
    {
      "epoch": 0.010735583878295136,
      "grad_norm": 9.317627906799316,
      "learning_rate": 3.2800000000000004e-06,
      "loss": 0.1458,
      "step": 6560
    },
    {
      "epoch": 0.010768314316948475,
      "grad_norm": 27.297365188598633,
      "learning_rate": 3.2900000000000003e-06,
      "loss": 0.1615,
      "step": 6580
    },
    {
      "epoch": 0.010801044755601815,
      "grad_norm": 17.35902214050293,
      "learning_rate": 3.3000000000000006e-06,
      "loss": 0.1221,
      "step": 6600
    },
    {
      "epoch": 0.010833775194255154,
      "grad_norm": 4.305056571960449,
      "learning_rate": 3.3100000000000005e-06,
      "loss": 0.1463,
      "step": 6620
    },
    {
      "epoch": 0.010866505632908492,
      "grad_norm": 8.133285522460938,
      "learning_rate": 3.3200000000000004e-06,
      "loss": 0.1262,
      "step": 6640
    },
    {
      "epoch": 0.01089923607156183,
      "grad_norm": 14.599608421325684,
      "learning_rate": 3.3300000000000003e-06,
      "loss": 0.1323,
      "step": 6660
    },
    {
      "epoch": 0.01093196651021517,
      "grad_norm": 6.09037971496582,
      "learning_rate": 3.3400000000000006e-06,
      "loss": 0.1478,
      "step": 6680
    },
    {
      "epoch": 0.01096469694886851,
      "grad_norm": 13.855401039123535,
      "learning_rate": 3.3500000000000005e-06,
      "loss": 0.1542,
      "step": 6700
    },
    {
      "epoch": 0.010997427387521847,
      "grad_norm": 13.07607364654541,
      "learning_rate": 3.3600000000000004e-06,
      "loss": 0.1506,
      "step": 6720
    },
    {
      "epoch": 0.011030157826175186,
      "grad_norm": 101.59345245361328,
      "learning_rate": 3.3700000000000003e-06,
      "loss": 0.1614,
      "step": 6740
    },
    {
      "epoch": 0.011062888264828525,
      "grad_norm": 9.881869316101074,
      "learning_rate": 3.3800000000000007e-06,
      "loss": 0.1385,
      "step": 6760
    },
    {
      "epoch": 0.011095618703481865,
      "grad_norm": 18.557199478149414,
      "learning_rate": 3.3900000000000006e-06,
      "loss": 0.1285,
      "step": 6780
    },
    {
      "epoch": 0.011128349142135202,
      "grad_norm": 10.836185455322266,
      "learning_rate": 3.4000000000000005e-06,
      "loss": 0.1402,
      "step": 6800
    },
    {
      "epoch": 0.011161079580788541,
      "grad_norm": 53.94546890258789,
      "learning_rate": 3.4100000000000004e-06,
      "loss": 0.1394,
      "step": 6820
    },
    {
      "epoch": 0.01119381001944188,
      "grad_norm": 7.751704216003418,
      "learning_rate": 3.4200000000000007e-06,
      "loss": 0.1158,
      "step": 6840
    },
    {
      "epoch": 0.01122654045809522,
      "grad_norm": 8.471748352050781,
      "learning_rate": 3.4300000000000006e-06,
      "loss": 0.1086,
      "step": 6860
    },
    {
      "epoch": 0.011259270896748557,
      "grad_norm": 7.7457499504089355,
      "learning_rate": 3.44e-06,
      "loss": 0.144,
      "step": 6880
    },
    {
      "epoch": 0.011292001335401897,
      "grad_norm": 5.976493835449219,
      "learning_rate": 3.45e-06,
      "loss": 0.1476,
      "step": 6900
    },
    {
      "epoch": 0.011324731774055236,
      "grad_norm": 9.427473068237305,
      "learning_rate": 3.46e-06,
      "loss": 0.13,
      "step": 6920
    },
    {
      "epoch": 0.011357462212708575,
      "grad_norm": 5.273642539978027,
      "learning_rate": 3.4700000000000002e-06,
      "loss": 0.1387,
      "step": 6940
    },
    {
      "epoch": 0.011390192651361913,
      "grad_norm": 16.084562301635742,
      "learning_rate": 3.48e-06,
      "loss": 0.1369,
      "step": 6960
    },
    {
      "epoch": 0.011422923090015252,
      "grad_norm": Infinity,
      "learning_rate": 3.49e-06,
      "loss": 0.177,
      "step": 6980
    },
    {
      "epoch": 0.011455653528668591,
      "grad_norm": 5.9251484870910645,
      "learning_rate": 3.5e-06,
      "loss": 0.1137,
      "step": 7000
    },
    {
      "epoch": 0.01148838396732193,
      "grad_norm": 11.057254791259766,
      "learning_rate": 3.5100000000000003e-06,
      "loss": 0.1279,
      "step": 7020
    },
    {
      "epoch": 0.01152111440597527,
      "grad_norm": 6.143144607543945,
      "learning_rate": 3.52e-06,
      "loss": 0.1348,
      "step": 7040
    },
    {
      "epoch": 0.011553844844628607,
      "grad_norm": 13.650620460510254,
      "learning_rate": 3.53e-06,
      "loss": 0.1569,
      "step": 7060
    },
    {
      "epoch": 0.011586575283281947,
      "grad_norm": 115.32864379882812,
      "learning_rate": 3.54e-06,
      "loss": 0.1389,
      "step": 7080
    },
    {
      "epoch": 0.011619305721935286,
      "grad_norm": 12.292210578918457,
      "learning_rate": 3.5500000000000003e-06,
      "loss": 0.144,
      "step": 7100
    },
    {
      "epoch": 0.011652036160588625,
      "grad_norm": 19.973529815673828,
      "learning_rate": 3.5600000000000002e-06,
      "loss": 0.1437,
      "step": 7120
    },
    {
      "epoch": 0.011684766599241963,
      "grad_norm": 9.321552276611328,
      "learning_rate": 3.57e-06,
      "loss": 0.1498,
      "step": 7140
    },
    {
      "epoch": 0.011717497037895302,
      "grad_norm": 12.293712615966797,
      "learning_rate": 3.58e-06,
      "loss": 0.1503,
      "step": 7160
    },
    {
      "epoch": 0.011750227476548641,
      "grad_norm": 2.926297187805176,
      "learning_rate": 3.5900000000000004e-06,
      "loss": 0.1389,
      "step": 7180
    },
    {
      "epoch": 0.01178295791520198,
      "grad_norm": 6.650996208190918,
      "learning_rate": 3.6000000000000003e-06,
      "loss": 0.1412,
      "step": 7200
    },
    {
      "epoch": 0.011815688353855318,
      "grad_norm": 11.289887428283691,
      "learning_rate": 3.61e-06,
      "loss": 0.1276,
      "step": 7220
    },
    {
      "epoch": 0.011848418792508657,
      "grad_norm": 40.43123245239258,
      "learning_rate": 3.62e-06,
      "loss": 0.1471,
      "step": 7240
    },
    {
      "epoch": 0.011881149231161996,
      "grad_norm": 15.9927339553833,
      "learning_rate": 3.6300000000000004e-06,
      "loss": 0.1325,
      "step": 7260
    },
    {
      "epoch": 0.011913879669815336,
      "grad_norm": 6.496262073516846,
      "learning_rate": 3.6400000000000003e-06,
      "loss": 0.1372,
      "step": 7280
    },
    {
      "epoch": 0.011946610108468673,
      "grad_norm": 10.388755798339844,
      "learning_rate": 3.65e-06,
      "loss": 0.1351,
      "step": 7300
    },
    {
      "epoch": 0.011979340547122012,
      "grad_norm": 41.0078239440918,
      "learning_rate": 3.66e-06,
      "loss": 0.1378,
      "step": 7320
    },
    {
      "epoch": 0.012012070985775352,
      "grad_norm": 31.96749496459961,
      "learning_rate": 3.6700000000000004e-06,
      "loss": 0.1286,
      "step": 7340
    },
    {
      "epoch": 0.012044801424428691,
      "grad_norm": 3.3050267696380615,
      "learning_rate": 3.6800000000000003e-06,
      "loss": 0.1161,
      "step": 7360
    },
    {
      "epoch": 0.012077531863082028,
      "grad_norm": 5.9075398445129395,
      "learning_rate": 3.6900000000000002e-06,
      "loss": 0.1285,
      "step": 7380
    },
    {
      "epoch": 0.012110262301735368,
      "grad_norm": 7.512358665466309,
      "learning_rate": 3.7e-06,
      "loss": 0.1376,
      "step": 7400
    },
    {
      "epoch": 0.012142992740388707,
      "grad_norm": 10.217489242553711,
      "learning_rate": 3.7100000000000005e-06,
      "loss": 0.1384,
      "step": 7420
    },
    {
      "epoch": 0.012175723179042046,
      "grad_norm": 6.676782131195068,
      "learning_rate": 3.7200000000000004e-06,
      "loss": 0.134,
      "step": 7440
    },
    {
      "epoch": 0.012208453617695384,
      "grad_norm": 7.483094692230225,
      "learning_rate": 3.7300000000000003e-06,
      "loss": 0.139,
      "step": 7460
    },
    {
      "epoch": 0.012241184056348723,
      "grad_norm": 10.75527286529541,
      "learning_rate": 3.74e-06,
      "loss": 0.1425,
      "step": 7480
    },
    {
      "epoch": 0.012273914495002062,
      "grad_norm": 15.121252059936523,
      "learning_rate": 3.7500000000000005e-06,
      "loss": 0.1306,
      "step": 7500
    },
    {
      "epoch": 0.012306644933655402,
      "grad_norm": 12.183749198913574,
      "learning_rate": 3.7600000000000004e-06,
      "loss": 0.1486,
      "step": 7520
    },
    {
      "epoch": 0.012339375372308739,
      "grad_norm": 15.446149826049805,
      "learning_rate": 3.7700000000000003e-06,
      "loss": 0.1501,
      "step": 7540
    },
    {
      "epoch": 0.012372105810962078,
      "grad_norm": 24.16215705871582,
      "learning_rate": 3.7800000000000002e-06,
      "loss": 0.1329,
      "step": 7560
    },
    {
      "epoch": 0.012404836249615418,
      "grad_norm": 7.962222099304199,
      "learning_rate": 3.79e-06,
      "loss": 0.1117,
      "step": 7580
    },
    {
      "epoch": 0.012437566688268757,
      "grad_norm": 10.371491432189941,
      "learning_rate": 3.8000000000000005e-06,
      "loss": 0.134,
      "step": 7600
    },
    {
      "epoch": 0.012470297126922094,
      "grad_norm": 13.372879028320312,
      "learning_rate": 3.8100000000000004e-06,
      "loss": 0.1284,
      "step": 7620
    },
    {
      "epoch": 0.012503027565575434,
      "grad_norm": 7.069236755371094,
      "learning_rate": 3.820000000000001e-06,
      "loss": 0.148,
      "step": 7640
    },
    {
      "epoch": 0.012535758004228773,
      "grad_norm": 10.400854110717773,
      "learning_rate": 3.830000000000001e-06,
      "loss": 0.131,
      "step": 7660
    },
    {
      "epoch": 0.012568488442882112,
      "grad_norm": 12.81125259399414,
      "learning_rate": 3.8400000000000005e-06,
      "loss": 0.1242,
      "step": 7680
    },
    {
      "epoch": 0.01260121888153545,
      "grad_norm": 36.902015686035156,
      "learning_rate": 3.85e-06,
      "loss": 0.1573,
      "step": 7700
    },
    {
      "epoch": 0.012633949320188789,
      "grad_norm": 123.07977294921875,
      "learning_rate": 3.86e-06,
      "loss": 0.1313,
      "step": 7720
    },
    {
      "epoch": 0.012666679758842128,
      "grad_norm": 100.70095825195312,
      "learning_rate": 3.87e-06,
      "loss": 0.1269,
      "step": 7740
    },
    {
      "epoch": 0.012699410197495467,
      "grad_norm": 10.58767318725586,
      "learning_rate": 3.88e-06,
      "loss": 0.1311,
      "step": 7760
    },
    {
      "epoch": 0.012732140636148805,
      "grad_norm": 13.56623649597168,
      "learning_rate": 3.89e-06,
      "loss": 0.1093,
      "step": 7780
    },
    {
      "epoch": 0.012764871074802144,
      "grad_norm": 13.357009887695312,
      "learning_rate": 3.900000000000001e-06,
      "loss": 0.1107,
      "step": 7800
    },
    {
      "epoch": 0.012797601513455483,
      "grad_norm": 8.885101318359375,
      "learning_rate": 3.910000000000001e-06,
      "loss": 0.1217,
      "step": 7820
    },
    {
      "epoch": 0.012830331952108823,
      "grad_norm": 7.9626312255859375,
      "learning_rate": 3.920000000000001e-06,
      "loss": 0.1508,
      "step": 7840
    },
    {
      "epoch": 0.01286306239076216,
      "grad_norm": 4.356986999511719,
      "learning_rate": 3.9300000000000005e-06,
      "loss": 0.1339,
      "step": 7860
    },
    {
      "epoch": 0.0128957928294155,
      "grad_norm": 15.62414264678955,
      "learning_rate": 3.94e-06,
      "loss": 0.1395,
      "step": 7880
    },
    {
      "epoch": 0.012928523268068839,
      "grad_norm": 13.2130765914917,
      "learning_rate": 3.95e-06,
      "loss": 0.1508,
      "step": 7900
    },
    {
      "epoch": 0.012961253706722178,
      "grad_norm": 14.305927276611328,
      "learning_rate": 3.96e-06,
      "loss": 0.1466,
      "step": 7920
    },
    {
      "epoch": 0.012993984145375516,
      "grad_norm": 9.186273574829102,
      "learning_rate": 3.97e-06,
      "loss": 0.1244,
      "step": 7940
    },
    {
      "epoch": 0.013026714584028855,
      "grad_norm": 38.408912658691406,
      "learning_rate": 3.980000000000001e-06,
      "loss": 0.1116,
      "step": 7960
    },
    {
      "epoch": 0.013059445022682194,
      "grad_norm": 31.37411880493164,
      "learning_rate": 3.990000000000001e-06,
      "loss": 0.1362,
      "step": 7980
    },
    {
      "epoch": 0.013092175461335533,
      "grad_norm": 8.25979995727539,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.1301,
      "step": 8000
    },
    {
      "epoch": 0.01312490589998887,
      "grad_norm": 9.684673309326172,
      "learning_rate": 4.0100000000000006e-06,
      "loss": 0.1364,
      "step": 8020
    },
    {
      "epoch": 0.01315763633864221,
      "grad_norm": 15.583831787109375,
      "learning_rate": 4.0200000000000005e-06,
      "loss": 0.12,
      "step": 8040
    },
    {
      "epoch": 0.01319036677729555,
      "grad_norm": 14.681408882141113,
      "learning_rate": 4.03e-06,
      "loss": 0.1308,
      "step": 8060
    },
    {
      "epoch": 0.013223097215948889,
      "grad_norm": 6.194861888885498,
      "learning_rate": 4.04e-06,
      "loss": 0.1169,
      "step": 8080
    },
    {
      "epoch": 0.013255827654602226,
      "grad_norm": 10.680932998657227,
      "learning_rate": 4.05e-06,
      "loss": 0.1291,
      "step": 8100
    },
    {
      "epoch": 0.013288558093255565,
      "grad_norm": 19.037384033203125,
      "learning_rate": 4.060000000000001e-06,
      "loss": 0.1112,
      "step": 8120
    },
    {
      "epoch": 0.013321288531908905,
      "grad_norm": 19.854801177978516,
      "learning_rate": 4.07e-06,
      "loss": 0.1489,
      "step": 8140
    },
    {
      "epoch": 0.013354018970562244,
      "grad_norm": 8.052275657653809,
      "learning_rate": 4.08e-06,
      "loss": 0.1376,
      "step": 8160
    },
    {
      "epoch": 0.013386749409215583,
      "grad_norm": 33.291107177734375,
      "learning_rate": 4.09e-06,
      "loss": 0.153,
      "step": 8180
    },
    {
      "epoch": 0.01341947984786892,
      "grad_norm": 13.239225387573242,
      "learning_rate": 4.1e-06,
      "loss": 0.1345,
      "step": 8200
    },
    {
      "epoch": 0.01345221028652226,
      "grad_norm": 19.45255470275879,
      "learning_rate": 4.1100000000000005e-06,
      "loss": 0.1163,
      "step": 8220
    },
    {
      "epoch": 0.0134849407251756,
      "grad_norm": 3.6668903827667236,
      "learning_rate": 4.12e-06,
      "loss": 0.1408,
      "step": 8240
    },
    {
      "epoch": 0.013517671163828938,
      "grad_norm": 20.433109283447266,
      "learning_rate": 4.13e-06,
      "loss": 0.1176,
      "step": 8260
    },
    {
      "epoch": 0.013550401602482276,
      "grad_norm": 19.74745750427246,
      "learning_rate": 4.14e-06,
      "loss": 0.1387,
      "step": 8280
    },
    {
      "epoch": 0.013583132041135615,
      "grad_norm": 11.160043716430664,
      "learning_rate": 4.15e-06,
      "loss": 0.1261,
      "step": 8300
    },
    {
      "epoch": 0.013615862479788955,
      "grad_norm": 9.564675331115723,
      "learning_rate": 4.16e-06,
      "loss": 0.1501,
      "step": 8320
    },
    {
      "epoch": 0.013648592918442294,
      "grad_norm": 9.270025253295898,
      "learning_rate": 4.17e-06,
      "loss": 0.1638,
      "step": 8340
    },
    {
      "epoch": 0.013681323357095631,
      "grad_norm": 7.911000728607178,
      "learning_rate": 4.18e-06,
      "loss": 0.1302,
      "step": 8360
    },
    {
      "epoch": 0.01371405379574897,
      "grad_norm": 10.710180282592773,
      "learning_rate": 4.1900000000000005e-06,
      "loss": 0.1506,
      "step": 8380
    },
    {
      "epoch": 0.01374678423440231,
      "grad_norm": 8.42337703704834,
      "learning_rate": 4.2000000000000004e-06,
      "loss": 0.1394,
      "step": 8400
    },
    {
      "epoch": 0.013779514673055649,
      "grad_norm": 6.72131872177124,
      "learning_rate": 4.21e-06,
      "loss": 0.1133,
      "step": 8420
    },
    {
      "epoch": 0.013812245111708987,
      "grad_norm": 11.969120025634766,
      "learning_rate": 4.22e-06,
      "loss": 0.1312,
      "step": 8440
    },
    {
      "epoch": 0.013844975550362326,
      "grad_norm": 10.179827690124512,
      "learning_rate": 4.23e-06,
      "loss": 0.1201,
      "step": 8460
    },
    {
      "epoch": 0.013877705989015665,
      "grad_norm": 11.548558235168457,
      "learning_rate": 4.24e-06,
      "loss": 0.1166,
      "step": 8480
    },
    {
      "epoch": 0.013910436427669004,
      "grad_norm": 4.559553146362305,
      "learning_rate": 4.25e-06,
      "loss": 0.1078,
      "step": 8500
    },
    {
      "epoch": 0.013943166866322342,
      "grad_norm": 5.7927374839782715,
      "learning_rate": 4.26e-06,
      "loss": 0.1387,
      "step": 8520
    },
    {
      "epoch": 0.013975897304975681,
      "grad_norm": 9.414217948913574,
      "learning_rate": 4.270000000000001e-06,
      "loss": 0.1223,
      "step": 8540
    },
    {
      "epoch": 0.01400862774362902,
      "grad_norm": 6.315659999847412,
      "learning_rate": 4.2800000000000005e-06,
      "loss": 0.124,
      "step": 8560
    },
    {
      "epoch": 0.01404135818228236,
      "grad_norm": 1.5119049549102783,
      "learning_rate": 4.2900000000000004e-06,
      "loss": 0.117,
      "step": 8580
    },
    {
      "epoch": 0.014074088620935697,
      "grad_norm": 27.067110061645508,
      "learning_rate": 4.3e-06,
      "loss": 0.1454,
      "step": 8600
    },
    {
      "epoch": 0.014106819059589036,
      "grad_norm": 8.004500389099121,
      "learning_rate": 4.31e-06,
      "loss": 0.1336,
      "step": 8620
    },
    {
      "epoch": 0.014139549498242376,
      "grad_norm": 7.394827842712402,
      "learning_rate": 4.32e-06,
      "loss": 0.1009,
      "step": 8640
    },
    {
      "epoch": 0.014172279936895715,
      "grad_norm": 10.003144264221191,
      "learning_rate": 4.33e-06,
      "loss": 0.1284,
      "step": 8660
    },
    {
      "epoch": 0.014205010375549052,
      "grad_norm": 5.578573703765869,
      "learning_rate": 4.34e-06,
      "loss": 0.1397,
      "step": 8680
    },
    {
      "epoch": 0.014237740814202392,
      "grad_norm": 8.827242851257324,
      "learning_rate": 4.350000000000001e-06,
      "loss": 0.1114,
      "step": 8700
    },
    {
      "epoch": 0.014270471252855731,
      "grad_norm": 11.474480628967285,
      "learning_rate": 4.360000000000001e-06,
      "loss": 0.1286,
      "step": 8720
    },
    {
      "epoch": 0.01430320169150907,
      "grad_norm": 16.9912109375,
      "learning_rate": 4.3700000000000005e-06,
      "loss": 0.1243,
      "step": 8740
    },
    {
      "epoch": 0.014335932130162408,
      "grad_norm": 4.753050327301025,
      "learning_rate": 4.38e-06,
      "loss": 0.1094,
      "step": 8760
    },
    {
      "epoch": 0.014368662568815747,
      "grad_norm": 11.170969009399414,
      "learning_rate": 4.39e-06,
      "loss": 0.1295,
      "step": 8780
    },
    {
      "epoch": 0.014401393007469086,
      "grad_norm": 12.774673461914062,
      "learning_rate": 4.4e-06,
      "loss": 0.1203,
      "step": 8800
    },
    {
      "epoch": 0.014434123446122426,
      "grad_norm": 7.401291847229004,
      "learning_rate": 4.41e-06,
      "loss": 0.1318,
      "step": 8820
    },
    {
      "epoch": 0.014466853884775763,
      "grad_norm": 4.624222278594971,
      "learning_rate": 4.42e-06,
      "loss": 0.1302,
      "step": 8840
    },
    {
      "epoch": 0.014499584323429102,
      "grad_norm": 8.047502517700195,
      "learning_rate": 4.430000000000001e-06,
      "loss": 0.125,
      "step": 8860
    },
    {
      "epoch": 0.014532314762082442,
      "grad_norm": 9.63913345336914,
      "learning_rate": 4.440000000000001e-06,
      "loss": 0.1564,
      "step": 8880
    },
    {
      "epoch": 0.01456504520073578,
      "grad_norm": 11.016352653503418,
      "learning_rate": 4.450000000000001e-06,
      "loss": 0.1262,
      "step": 8900
    },
    {
      "epoch": 0.014597775639389118,
      "grad_norm": 10.685239791870117,
      "learning_rate": 4.4600000000000005e-06,
      "loss": 0.1478,
      "step": 8920
    },
    {
      "epoch": 0.014630506078042458,
      "grad_norm": 7.402393341064453,
      "learning_rate": 4.47e-06,
      "loss": 0.1315,
      "step": 8940
    },
    {
      "epoch": 0.014663236516695797,
      "grad_norm": 7.78855562210083,
      "learning_rate": 4.48e-06,
      "loss": 0.119,
      "step": 8960
    },
    {
      "epoch": 0.014695966955349136,
      "grad_norm": 15.253050804138184,
      "learning_rate": 4.49e-06,
      "loss": 0.1271,
      "step": 8980
    },
    {
      "epoch": 0.014728697394002474,
      "grad_norm": 10.337650299072266,
      "learning_rate": 4.5e-06,
      "loss": 0.1269,
      "step": 9000
    },
    {
      "epoch": 0.014761427832655813,
      "grad_norm": 14.20915699005127,
      "learning_rate": 4.510000000000001e-06,
      "loss": 0.1276,
      "step": 9020
    },
    {
      "epoch": 0.014794158271309152,
      "grad_norm": 10.920132637023926,
      "learning_rate": 4.520000000000001e-06,
      "loss": 0.1316,
      "step": 9040
    },
    {
      "epoch": 0.014826888709962491,
      "grad_norm": 186.19656372070312,
      "learning_rate": 4.530000000000001e-06,
      "loss": 0.1212,
      "step": 9060
    },
    {
      "epoch": 0.014859619148615829,
      "grad_norm": 3.8383829593658447,
      "learning_rate": 4.540000000000001e-06,
      "loss": 0.1304,
      "step": 9080
    },
    {
      "epoch": 0.014892349587269168,
      "grad_norm": 24.936613082885742,
      "learning_rate": 4.5500000000000005e-06,
      "loss": 0.1273,
      "step": 9100
    },
    {
      "epoch": 0.014925080025922507,
      "grad_norm": 7.965730667114258,
      "learning_rate": 4.56e-06,
      "loss": 0.1197,
      "step": 9120
    },
    {
      "epoch": 0.014957810464575847,
      "grad_norm": 12.110482215881348,
      "learning_rate": 4.57e-06,
      "loss": 0.1436,
      "step": 9140
    },
    {
      "epoch": 0.014990540903229184,
      "grad_norm": 6.697673320770264,
      "learning_rate": 4.58e-06,
      "loss": 0.1347,
      "step": 9160
    },
    {
      "epoch": 0.015023271341882524,
      "grad_norm": 26.178590774536133,
      "learning_rate": 4.590000000000001e-06,
      "loss": 0.144,
      "step": 9180
    },
    {
      "epoch": 0.015056001780535863,
      "grad_norm": 8.794496536254883,
      "learning_rate": 4.600000000000001e-06,
      "loss": 0.1502,
      "step": 9200
    },
    {
      "epoch": 0.015088732219189202,
      "grad_norm": 9.967669486999512,
      "learning_rate": 4.610000000000001e-06,
      "loss": 0.1546,
      "step": 9220
    },
    {
      "epoch": 0.01512146265784254,
      "grad_norm": 4.697237491607666,
      "learning_rate": 4.620000000000001e-06,
      "loss": 0.1109,
      "step": 9240
    },
    {
      "epoch": 0.015154193096495879,
      "grad_norm": 9.560781478881836,
      "learning_rate": 4.6300000000000006e-06,
      "loss": 0.1303,
      "step": 9260
    },
    {
      "epoch": 0.015186923535149218,
      "grad_norm": 6.886377334594727,
      "learning_rate": 4.6400000000000005e-06,
      "loss": 0.1273,
      "step": 9280
    },
    {
      "epoch": 0.015219653973802557,
      "grad_norm": 42.96214294433594,
      "learning_rate": 4.65e-06,
      "loss": 0.1335,
      "step": 9300
    },
    {
      "epoch": 0.015252384412455897,
      "grad_norm": 7.8850297927856445,
      "learning_rate": 4.66e-06,
      "loss": 0.1231,
      "step": 9320
    },
    {
      "epoch": 0.015285114851109234,
      "grad_norm": 4.594091892242432,
      "learning_rate": 4.670000000000001e-06,
      "loss": 0.1093,
      "step": 9340
    },
    {
      "epoch": 0.015317845289762573,
      "grad_norm": 20.740604400634766,
      "learning_rate": 4.680000000000001e-06,
      "loss": 0.1056,
      "step": 9360
    },
    {
      "epoch": 0.015350575728415913,
      "grad_norm": 7.131661891937256,
      "learning_rate": 4.69e-06,
      "loss": 0.1381,
      "step": 9380
    },
    {
      "epoch": 0.015383306167069252,
      "grad_norm": 3.9488916397094727,
      "learning_rate": 4.7e-06,
      "loss": 0.1253,
      "step": 9400
    },
    {
      "epoch": 0.01541603660572259,
      "grad_norm": 15.514260292053223,
      "learning_rate": 4.71e-06,
      "loss": 0.1328,
      "step": 9420
    },
    {
      "epoch": 0.015448767044375929,
      "grad_norm": 15.29692268371582,
      "learning_rate": 4.7200000000000005e-06,
      "loss": 0.1285,
      "step": 9440
    },
    {
      "epoch": 0.015481497483029268,
      "grad_norm": 7.738621234893799,
      "learning_rate": 4.7300000000000005e-06,
      "loss": 0.1426,
      "step": 9460
    },
    {
      "epoch": 0.015514227921682607,
      "grad_norm": 6.835037708282471,
      "learning_rate": 4.74e-06,
      "loss": 0.1078,
      "step": 9480
    },
    {
      "epoch": 0.015546958360335945,
      "grad_norm": 19.29600715637207,
      "learning_rate": 4.75e-06,
      "loss": 0.1468,
      "step": 9500
    },
    {
      "epoch": 0.015579688798989284,
      "grad_norm": 24.189821243286133,
      "learning_rate": 4.76e-06,
      "loss": 0.1492,
      "step": 9520
    },
    {
      "epoch": 0.015612419237642623,
      "grad_norm": 3.909679412841797,
      "learning_rate": 4.77e-06,
      "loss": 0.1325,
      "step": 9540
    },
    {
      "epoch": 0.01564514967629596,
      "grad_norm": 10.516539573669434,
      "learning_rate": 4.78e-06,
      "loss": 0.1235,
      "step": 9560
    },
    {
      "epoch": 0.015677880114949302,
      "grad_norm": 15.927177429199219,
      "learning_rate": 4.79e-06,
      "loss": 0.126,
      "step": 9580
    },
    {
      "epoch": 0.01571061055360264,
      "grad_norm": 3.0019803047180176,
      "learning_rate": 4.800000000000001e-06,
      "loss": 0.1037,
      "step": 9600
    },
    {
      "epoch": 0.015743340992255977,
      "grad_norm": 3.283231019973755,
      "learning_rate": 4.8100000000000005e-06,
      "loss": 0.1175,
      "step": 9620
    },
    {
      "epoch": 0.015776071430909318,
      "grad_norm": 11.890449523925781,
      "learning_rate": 4.8200000000000004e-06,
      "loss": 0.1285,
      "step": 9640
    },
    {
      "epoch": 0.015808801869562655,
      "grad_norm": 10.419636726379395,
      "learning_rate": 4.83e-06,
      "loss": 0.1223,
      "step": 9660
    },
    {
      "epoch": 0.015841532308215996,
      "grad_norm": 12.968546867370605,
      "learning_rate": 4.84e-06,
      "loss": 0.131,
      "step": 9680
    },
    {
      "epoch": 0.015874262746869334,
      "grad_norm": 6.059152603149414,
      "learning_rate": 4.85e-06,
      "loss": 0.135,
      "step": 9700
    },
    {
      "epoch": 0.01590699318552267,
      "grad_norm": 11.137810707092285,
      "learning_rate": 4.86e-06,
      "loss": 0.136,
      "step": 9720
    },
    {
      "epoch": 0.015939723624176012,
      "grad_norm": 6.605539798736572,
      "learning_rate": 4.87e-06,
      "loss": 0.1122,
      "step": 9740
    },
    {
      "epoch": 0.01597245406282935,
      "grad_norm": 7.660419464111328,
      "learning_rate": 4.880000000000001e-06,
      "loss": 0.1191,
      "step": 9760
    },
    {
      "epoch": 0.016005184501482687,
      "grad_norm": 8.976151466369629,
      "learning_rate": 4.890000000000001e-06,
      "loss": 0.1199,
      "step": 9780
    },
    {
      "epoch": 0.01603791494013603,
      "grad_norm": 6.1989006996154785,
      "learning_rate": 4.9000000000000005e-06,
      "loss": 0.132,
      "step": 9800
    },
    {
      "epoch": 0.016070645378789366,
      "grad_norm": 18.003416061401367,
      "learning_rate": 4.9100000000000004e-06,
      "loss": 0.1237,
      "step": 9820
    },
    {
      "epoch": 0.016103375817442707,
      "grad_norm": 7.800823211669922,
      "learning_rate": 4.92e-06,
      "loss": 0.1301,
      "step": 9840
    },
    {
      "epoch": 0.016136106256096044,
      "grad_norm": 10.843794822692871,
      "learning_rate": 4.93e-06,
      "loss": 0.1186,
      "step": 9860
    },
    {
      "epoch": 0.016168836694749382,
      "grad_norm": 6.152767181396484,
      "learning_rate": 4.94e-06,
      "loss": 0.135,
      "step": 9880
    },
    {
      "epoch": 0.016201567133402723,
      "grad_norm": 36.35658645629883,
      "learning_rate": 4.95e-06,
      "loss": 0.1462,
      "step": 9900
    },
    {
      "epoch": 0.01623429757205606,
      "grad_norm": 9.00845718383789,
      "learning_rate": 4.960000000000001e-06,
      "loss": 0.129,
      "step": 9920
    },
    {
      "epoch": 0.016267028010709398,
      "grad_norm": 8.522400856018066,
      "learning_rate": 4.970000000000001e-06,
      "loss": 0.1436,
      "step": 9940
    },
    {
      "epoch": 0.01629975844936274,
      "grad_norm": 14.67543888092041,
      "learning_rate": 4.980000000000001e-06,
      "loss": 0.1153,
      "step": 9960
    },
    {
      "epoch": 0.016332488888016077,
      "grad_norm": 10.989816665649414,
      "learning_rate": 4.9900000000000005e-06,
      "loss": 0.1259,
      "step": 9980
    },
    {
      "epoch": 0.016365219326669418,
      "grad_norm": 11.864877700805664,
      "learning_rate": 5e-06,
      "loss": 0.1365,
      "step": 10000
    },
    {
      "epoch": 0.016397949765322755,
      "grad_norm": 6.9157185554504395,
      "learning_rate": 5.01e-06,
      "loss": 0.1278,
      "step": 10020
    },
    {
      "epoch": 0.016430680203976093,
      "grad_norm": 9.559420585632324,
      "learning_rate": 5.02e-06,
      "loss": 0.1245,
      "step": 10040
    },
    {
      "epoch": 0.016463410642629434,
      "grad_norm": 7.7225518226623535,
      "learning_rate": 5.03e-06,
      "loss": 0.1427,
      "step": 10060
    },
    {
      "epoch": 0.01649614108128277,
      "grad_norm": 8.995641708374023,
      "learning_rate": 5.04e-06,
      "loss": 0.1212,
      "step": 10080
    },
    {
      "epoch": 0.01652887151993611,
      "grad_norm": 5.647816181182861,
      "learning_rate": 5.050000000000001e-06,
      "loss": 0.128,
      "step": 10100
    },
    {
      "epoch": 0.01656160195858945,
      "grad_norm": 16.556331634521484,
      "learning_rate": 5.060000000000001e-06,
      "loss": 0.1267,
      "step": 10120
    },
    {
      "epoch": 0.016594332397242787,
      "grad_norm": 9.721426010131836,
      "learning_rate": 5.070000000000001e-06,
      "loss": 0.0966,
      "step": 10140
    },
    {
      "epoch": 0.016627062835896128,
      "grad_norm": 7.235116481781006,
      "learning_rate": 5.0800000000000005e-06,
      "loss": 0.1559,
      "step": 10160
    },
    {
      "epoch": 0.016659793274549466,
      "grad_norm": 5.426608562469482,
      "learning_rate": 5.09e-06,
      "loss": 0.1179,
      "step": 10180
    },
    {
      "epoch": 0.016692523713202803,
      "grad_norm": 6.218957424163818,
      "learning_rate": 5.1e-06,
      "loss": 0.1291,
      "step": 10200
    },
    {
      "epoch": 0.016725254151856144,
      "grad_norm": 10.165144920349121,
      "learning_rate": 5.11e-06,
      "loss": 0.1404,
      "step": 10220
    },
    {
      "epoch": 0.01675798459050948,
      "grad_norm": 8.192106246948242,
      "learning_rate": 5.12e-06,
      "loss": 0.1325,
      "step": 10240
    },
    {
      "epoch": 0.01679071502916282,
      "grad_norm": 14.605569839477539,
      "learning_rate": 5.130000000000001e-06,
      "loss": 0.133,
      "step": 10260
    },
    {
      "epoch": 0.01682344546781616,
      "grad_norm": 6.413369655609131,
      "learning_rate": 5.140000000000001e-06,
      "loss": 0.1237,
      "step": 10280
    },
    {
      "epoch": 0.016856175906469498,
      "grad_norm": 8.961640357971191,
      "learning_rate": 5.150000000000001e-06,
      "loss": 0.1066,
      "step": 10300
    },
    {
      "epoch": 0.01688890634512284,
      "grad_norm": 27.121992111206055,
      "learning_rate": 5.1600000000000006e-06,
      "loss": 0.1124,
      "step": 10320
    },
    {
      "epoch": 0.016921636783776176,
      "grad_norm": 606.1768798828125,
      "learning_rate": 5.1700000000000005e-06,
      "loss": 0.1491,
      "step": 10340
    },
    {
      "epoch": 0.016954367222429514,
      "grad_norm": 7.638665199279785,
      "learning_rate": 5.18e-06,
      "loss": 0.1136,
      "step": 10360
    },
    {
      "epoch": 0.016987097661082855,
      "grad_norm": 23.173873901367188,
      "learning_rate": 5.19e-06,
      "loss": 0.1303,
      "step": 10380
    },
    {
      "epoch": 0.017019828099736192,
      "grad_norm": 15.799456596374512,
      "learning_rate": 5.2e-06,
      "loss": 0.124,
      "step": 10400
    },
    {
      "epoch": 0.01705255853838953,
      "grad_norm": 14.010171890258789,
      "learning_rate": 5.210000000000001e-06,
      "loss": 0.1166,
      "step": 10420
    },
    {
      "epoch": 0.01708528897704287,
      "grad_norm": 13.99083423614502,
      "learning_rate": 5.220000000000001e-06,
      "loss": 0.1304,
      "step": 10440
    },
    {
      "epoch": 0.01711801941569621,
      "grad_norm": 7.2116899490356445,
      "learning_rate": 5.230000000000001e-06,
      "loss": 0.101,
      "step": 10460
    },
    {
      "epoch": 0.01715074985434955,
      "grad_norm": 5.704728603363037,
      "learning_rate": 5.240000000000001e-06,
      "loss": 0.1306,
      "step": 10480
    },
    {
      "epoch": 0.017183480293002887,
      "grad_norm": 7.744631767272949,
      "learning_rate": 5.2500000000000006e-06,
      "loss": 0.1114,
      "step": 10500
    },
    {
      "epoch": 0.017216210731656224,
      "grad_norm": 8.216776847839355,
      "learning_rate": 5.2600000000000005e-06,
      "loss": 0.1342,
      "step": 10520
    },
    {
      "epoch": 0.017248941170309565,
      "grad_norm": 9.315800666809082,
      "learning_rate": 5.27e-06,
      "loss": 0.114,
      "step": 10540
    },
    {
      "epoch": 0.017281671608962903,
      "grad_norm": 9.949655532836914,
      "learning_rate": 5.28e-06,
      "loss": 0.135,
      "step": 10560
    },
    {
      "epoch": 0.017314402047616244,
      "grad_norm": 22.08950424194336,
      "learning_rate": 5.290000000000001e-06,
      "loss": 0.1255,
      "step": 10580
    },
    {
      "epoch": 0.01734713248626958,
      "grad_norm": 13.617483139038086,
      "learning_rate": 5.300000000000001e-06,
      "loss": 0.1297,
      "step": 10600
    },
    {
      "epoch": 0.01737986292492292,
      "grad_norm": 7.987496852874756,
      "learning_rate": 5.310000000000001e-06,
      "loss": 0.1186,
      "step": 10620
    },
    {
      "epoch": 0.01741259336357626,
      "grad_norm": 12.622431755065918,
      "learning_rate": 5.320000000000001e-06,
      "loss": 0.1194,
      "step": 10640
    },
    {
      "epoch": 0.017445323802229597,
      "grad_norm": 9.514262199401855,
      "learning_rate": 5.330000000000001e-06,
      "loss": 0.1196,
      "step": 10660
    },
    {
      "epoch": 0.017478054240882935,
      "grad_norm": 15.021286964416504,
      "learning_rate": 5.3400000000000005e-06,
      "loss": 0.125,
      "step": 10680
    },
    {
      "epoch": 0.017510784679536276,
      "grad_norm": 14.023000717163086,
      "learning_rate": 5.3500000000000004e-06,
      "loss": 0.1134,
      "step": 10700
    },
    {
      "epoch": 0.017543515118189613,
      "grad_norm": 3.590777635574341,
      "learning_rate": 5.36e-06,
      "loss": 0.1431,
      "step": 10720
    },
    {
      "epoch": 0.017576245556842954,
      "grad_norm": 0.6944119334220886,
      "learning_rate": 5.370000000000001e-06,
      "loss": 0.0966,
      "step": 10740
    },
    {
      "epoch": 0.017608975995496292,
      "grad_norm": 14.446030616760254,
      "learning_rate": 5.380000000000001e-06,
      "loss": 0.1242,
      "step": 10760
    },
    {
      "epoch": 0.01764170643414963,
      "grad_norm": 89.62862396240234,
      "learning_rate": 5.390000000000001e-06,
      "loss": 0.1223,
      "step": 10780
    },
    {
      "epoch": 0.01767443687280297,
      "grad_norm": 10.743759155273438,
      "learning_rate": 5.400000000000001e-06,
      "loss": 0.1422,
      "step": 10800
    },
    {
      "epoch": 0.017707167311456308,
      "grad_norm": 6.384023189544678,
      "learning_rate": 5.410000000000001e-06,
      "loss": 0.1151,
      "step": 10820
    },
    {
      "epoch": 0.017739897750109646,
      "grad_norm": 103.59675598144531,
      "learning_rate": 5.420000000000001e-06,
      "loss": 0.1337,
      "step": 10840
    },
    {
      "epoch": 0.017772628188762987,
      "grad_norm": 7.915221214294434,
      "learning_rate": 5.4300000000000005e-06,
      "loss": 0.1127,
      "step": 10860
    },
    {
      "epoch": 0.017805358627416324,
      "grad_norm": 10.855005264282227,
      "learning_rate": 5.4400000000000004e-06,
      "loss": 0.1183,
      "step": 10880
    },
    {
      "epoch": 0.017838089066069665,
      "grad_norm": 2.142184257507324,
      "learning_rate": 5.450000000000001e-06,
      "loss": 0.1306,
      "step": 10900
    },
    {
      "epoch": 0.017870819504723003,
      "grad_norm": 15.504266738891602,
      "learning_rate": 5.460000000000001e-06,
      "loss": 0.1323,
      "step": 10920
    },
    {
      "epoch": 0.01790354994337634,
      "grad_norm": 13.113832473754883,
      "learning_rate": 5.470000000000001e-06,
      "loss": 0.124,
      "step": 10940
    },
    {
      "epoch": 0.01793628038202968,
      "grad_norm": 12.234186172485352,
      "learning_rate": 5.480000000000001e-06,
      "loss": 0.1152,
      "step": 10960
    },
    {
      "epoch": 0.01796901082068302,
      "grad_norm": 4.765852928161621,
      "learning_rate": 5.490000000000001e-06,
      "loss": 0.1186,
      "step": 10980
    },
    {
      "epoch": 0.018001741259336356,
      "grad_norm": 14.759673118591309,
      "learning_rate": 5.500000000000001e-06,
      "loss": 0.1177,
      "step": 11000
    },
    {
      "epoch": 0.018034471697989697,
      "grad_norm": 11.640871047973633,
      "learning_rate": 5.510000000000001e-06,
      "loss": 0.1258,
      "step": 11020
    },
    {
      "epoch": 0.018067202136643035,
      "grad_norm": 7.64703369140625,
      "learning_rate": 5.5200000000000005e-06,
      "loss": 0.1272,
      "step": 11040
    },
    {
      "epoch": 0.018099932575296376,
      "grad_norm": 7.460769176483154,
      "learning_rate": 5.530000000000001e-06,
      "loss": 0.1027,
      "step": 11060
    },
    {
      "epoch": 0.018132663013949713,
      "grad_norm": 45.7253303527832,
      "learning_rate": 5.540000000000001e-06,
      "loss": 0.1218,
      "step": 11080
    },
    {
      "epoch": 0.01816539345260305,
      "grad_norm": 15.246835708618164,
      "learning_rate": 5.550000000000001e-06,
      "loss": 0.1199,
      "step": 11100
    },
    {
      "epoch": 0.01819812389125639,
      "grad_norm": 7.656198501586914,
      "learning_rate": 5.560000000000001e-06,
      "loss": 0.1316,
      "step": 11120
    },
    {
      "epoch": 0.01823085432990973,
      "grad_norm": 5.747116565704346,
      "learning_rate": 5.570000000000001e-06,
      "loss": 0.1156,
      "step": 11140
    },
    {
      "epoch": 0.018263584768563067,
      "grad_norm": 12.99211311340332,
      "learning_rate": 5.580000000000001e-06,
      "loss": 0.1272,
      "step": 11160
    },
    {
      "epoch": 0.018296315207216408,
      "grad_norm": 7.984668731689453,
      "learning_rate": 5.590000000000001e-06,
      "loss": 0.1292,
      "step": 11180
    },
    {
      "epoch": 0.018329045645869745,
      "grad_norm": 4.139256954193115,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.1259,
      "step": 11200
    },
    {
      "epoch": 0.018361776084523086,
      "grad_norm": 22.660436630249023,
      "learning_rate": 5.610000000000001e-06,
      "loss": 0.1156,
      "step": 11220
    },
    {
      "epoch": 0.018394506523176424,
      "grad_norm": 29.53432846069336,
      "learning_rate": 5.620000000000001e-06,
      "loss": 0.1142,
      "step": 11240
    },
    {
      "epoch": 0.01842723696182976,
      "grad_norm": 20.42781639099121,
      "learning_rate": 5.63e-06,
      "loss": 0.1181,
      "step": 11260
    },
    {
      "epoch": 0.018459967400483102,
      "grad_norm": 9.449018478393555,
      "learning_rate": 5.64e-06,
      "loss": 0.1238,
      "step": 11280
    },
    {
      "epoch": 0.01849269783913644,
      "grad_norm": 8.991464614868164,
      "learning_rate": 5.65e-06,
      "loss": 0.1179,
      "step": 11300
    },
    {
      "epoch": 0.018525428277789777,
      "grad_norm": 6.9796953201293945,
      "learning_rate": 5.66e-06,
      "loss": 0.1347,
      "step": 11320
    },
    {
      "epoch": 0.01855815871644312,
      "grad_norm": 21.88351058959961,
      "learning_rate": 5.67e-06,
      "loss": 0.1242,
      "step": 11340
    },
    {
      "epoch": 0.018590889155096456,
      "grad_norm": 6.743653774261475,
      "learning_rate": 5.68e-06,
      "loss": 0.0946,
      "step": 11360
    },
    {
      "epoch": 0.018623619593749797,
      "grad_norm": 11.749059677124023,
      "learning_rate": 5.69e-06,
      "loss": 0.114,
      "step": 11380
    },
    {
      "epoch": 0.018656350032403134,
      "grad_norm": 13.571227073669434,
      "learning_rate": 5.7e-06,
      "loss": 0.1082,
      "step": 11400
    },
    {
      "epoch": 0.018689080471056472,
      "grad_norm": 6.39813232421875,
      "learning_rate": 5.71e-06,
      "loss": 0.113,
      "step": 11420
    },
    {
      "epoch": 0.018721810909709813,
      "grad_norm": 12.218859672546387,
      "learning_rate": 5.72e-06,
      "loss": 0.1216,
      "step": 11440
    },
    {
      "epoch": 0.01875454134836315,
      "grad_norm": 30.416345596313477,
      "learning_rate": 5.73e-06,
      "loss": 0.0998,
      "step": 11460
    },
    {
      "epoch": 0.018787271787016488,
      "grad_norm": 16.454591751098633,
      "learning_rate": 5.74e-06,
      "loss": 0.1245,
      "step": 11480
    },
    {
      "epoch": 0.01882000222566983,
      "grad_norm": 10.131834030151367,
      "learning_rate": 5.75e-06,
      "loss": 0.1479,
      "step": 11500
    },
    {
      "epoch": 0.018852732664323166,
      "grad_norm": 11.705201148986816,
      "learning_rate": 5.76e-06,
      "loss": 0.1333,
      "step": 11520
    },
    {
      "epoch": 0.018885463102976507,
      "grad_norm": 7.636386871337891,
      "learning_rate": 5.77e-06,
      "loss": 0.1053,
      "step": 11540
    },
    {
      "epoch": 0.018918193541629845,
      "grad_norm": 6.214305877685547,
      "learning_rate": 5.78e-06,
      "loss": 0.1249,
      "step": 11560
    },
    {
      "epoch": 0.018950923980283182,
      "grad_norm": 8.111993789672852,
      "learning_rate": 5.7900000000000005e-06,
      "loss": 0.1178,
      "step": 11580
    },
    {
      "epoch": 0.018983654418936523,
      "grad_norm": 5.568321228027344,
      "learning_rate": 5.8e-06,
      "loss": 0.0967,
      "step": 11600
    },
    {
      "epoch": 0.01901638485758986,
      "grad_norm": 5.281013011932373,
      "learning_rate": 5.81e-06,
      "loss": 0.1381,
      "step": 11620
    },
    {
      "epoch": 0.0190491152962432,
      "grad_norm": 8.95651912689209,
      "learning_rate": 5.82e-06,
      "loss": 0.1429,
      "step": 11640
    },
    {
      "epoch": 0.01908184573489654,
      "grad_norm": 23.798431396484375,
      "learning_rate": 5.83e-06,
      "loss": 0.1377,
      "step": 11660
    },
    {
      "epoch": 0.019114576173549877,
      "grad_norm": 15.49697208404541,
      "learning_rate": 5.84e-06,
      "loss": 0.142,
      "step": 11680
    },
    {
      "epoch": 0.019147306612203218,
      "grad_norm": 14.311588287353516,
      "learning_rate": 5.85e-06,
      "loss": 0.1219,
      "step": 11700
    },
    {
      "epoch": 0.019180037050856556,
      "grad_norm": 5.254569053649902,
      "learning_rate": 5.86e-06,
      "loss": 0.1474,
      "step": 11720
    },
    {
      "epoch": 0.019212767489509893,
      "grad_norm": 7.3317036628723145,
      "learning_rate": 5.8700000000000005e-06,
      "loss": 0.1336,
      "step": 11740
    },
    {
      "epoch": 0.019245497928163234,
      "grad_norm": 5.835805892944336,
      "learning_rate": 5.8800000000000005e-06,
      "loss": 0.1169,
      "step": 11760
    },
    {
      "epoch": 0.01927822836681657,
      "grad_norm": 4.193265438079834,
      "learning_rate": 5.89e-06,
      "loss": 0.1098,
      "step": 11780
    },
    {
      "epoch": 0.019310958805469913,
      "grad_norm": 9.098302841186523,
      "learning_rate": 5.9e-06,
      "loss": 0.1115,
      "step": 11800
    },
    {
      "epoch": 0.01934368924412325,
      "grad_norm": 9.469847679138184,
      "learning_rate": 5.91e-06,
      "loss": 0.1336,
      "step": 11820
    },
    {
      "epoch": 0.019376419682776588,
      "grad_norm": 13.592034339904785,
      "learning_rate": 5.92e-06,
      "loss": 0.1396,
      "step": 11840
    },
    {
      "epoch": 0.01940915012142993,
      "grad_norm": 11.764131546020508,
      "learning_rate": 5.93e-06,
      "loss": 0.1149,
      "step": 11860
    },
    {
      "epoch": 0.019441880560083266,
      "grad_norm": 5.016818523406982,
      "learning_rate": 5.94e-06,
      "loss": 0.1286,
      "step": 11880
    },
    {
      "epoch": 0.019474610998736604,
      "grad_norm": 8.536445617675781,
      "learning_rate": 5.950000000000001e-06,
      "loss": 0.1134,
      "step": 11900
    },
    {
      "epoch": 0.019507341437389945,
      "grad_norm": 4.9219465255737305,
      "learning_rate": 5.9600000000000005e-06,
      "loss": 0.1208,
      "step": 11920
    },
    {
      "epoch": 0.019540071876043282,
      "grad_norm": 13.136774063110352,
      "learning_rate": 5.9700000000000004e-06,
      "loss": 0.1011,
      "step": 11940
    },
    {
      "epoch": 0.019572802314696623,
      "grad_norm": 9.882054328918457,
      "learning_rate": 5.98e-06,
      "loss": 0.1187,
      "step": 11960
    },
    {
      "epoch": 0.01960553275334996,
      "grad_norm": 10.848222732543945,
      "learning_rate": 5.99e-06,
      "loss": 0.1346,
      "step": 11980
    },
    {
      "epoch": 0.019638263192003298,
      "grad_norm": 7.037537574768066,
      "learning_rate": 6e-06,
      "loss": 0.1255,
      "step": 12000
    },
    {
      "epoch": 0.01967099363065664,
      "grad_norm": 15.662566184997559,
      "learning_rate": 6.01e-06,
      "loss": 0.1461,
      "step": 12020
    },
    {
      "epoch": 0.019703724069309977,
      "grad_norm": 8.371037483215332,
      "learning_rate": 6.02e-06,
      "loss": 0.1165,
      "step": 12040
    },
    {
      "epoch": 0.019736454507963314,
      "grad_norm": 12.781886100769043,
      "learning_rate": 6.030000000000001e-06,
      "loss": 0.1056,
      "step": 12060
    },
    {
      "epoch": 0.019769184946616655,
      "grad_norm": 10.76941967010498,
      "learning_rate": 6.040000000000001e-06,
      "loss": 0.1265,
      "step": 12080
    },
    {
      "epoch": 0.019801915385269993,
      "grad_norm": 21.89892578125,
      "learning_rate": 6.0500000000000005e-06,
      "loss": 0.1147,
      "step": 12100
    },
    {
      "epoch": 0.019834645823923334,
      "grad_norm": 7.536247730255127,
      "learning_rate": 6.0600000000000004e-06,
      "loss": 0.1364,
      "step": 12120
    },
    {
      "epoch": 0.01986737626257667,
      "grad_norm": 4.512470245361328,
      "learning_rate": 6.07e-06,
      "loss": 0.1356,
      "step": 12140
    },
    {
      "epoch": 0.01990010670123001,
      "grad_norm": 7.716565132141113,
      "learning_rate": 6.08e-06,
      "loss": 0.1195,
      "step": 12160
    },
    {
      "epoch": 0.01993283713988335,
      "grad_norm": 10.159018516540527,
      "learning_rate": 6.09e-06,
      "loss": 0.1122,
      "step": 12180
    },
    {
      "epoch": 0.019965567578536687,
      "grad_norm": 43.498878479003906,
      "learning_rate": 6.1e-06,
      "loss": 0.1104,
      "step": 12200
    },
    {
      "epoch": 0.019998298017190025,
      "grad_norm": 20.03522491455078,
      "learning_rate": 6.110000000000001e-06,
      "loss": 0.1271,
      "step": 12220
    },
    {
      "epoch": 0.020031028455843366,
      "grad_norm": 5.425199031829834,
      "learning_rate": 6.120000000000001e-06,
      "loss": 0.0889,
      "step": 12240
    },
    {
      "epoch": 0.020063758894496703,
      "grad_norm": 5.374655723571777,
      "learning_rate": 6.130000000000001e-06,
      "loss": 0.117,
      "step": 12260
    },
    {
      "epoch": 0.020096489333150044,
      "grad_norm": 5.102170944213867,
      "learning_rate": 6.1400000000000005e-06,
      "loss": 0.122,
      "step": 12280
    },
    {
      "epoch": 0.020129219771803382,
      "grad_norm": 5.36668062210083,
      "learning_rate": 6.15e-06,
      "loss": 0.1234,
      "step": 12300
    },
    {
      "epoch": 0.02016195021045672,
      "grad_norm": 6.445138931274414,
      "learning_rate": 6.16e-06,
      "loss": 0.1127,
      "step": 12320
    },
    {
      "epoch": 0.02019468064911006,
      "grad_norm": 7.85471773147583,
      "learning_rate": 6.17e-06,
      "loss": 0.1161,
      "step": 12340
    },
    {
      "epoch": 0.020227411087763398,
      "grad_norm": 19.825613021850586,
      "learning_rate": 6.18e-06,
      "loss": 0.1283,
      "step": 12360
    },
    {
      "epoch": 0.020260141526416735,
      "grad_norm": 13.81561279296875,
      "learning_rate": 6.190000000000001e-06,
      "loss": 0.1063,
      "step": 12380
    },
    {
      "epoch": 0.020292871965070076,
      "grad_norm": 7.201405048370361,
      "learning_rate": 6.200000000000001e-06,
      "loss": 0.1084,
      "step": 12400
    },
    {
      "epoch": 0.020325602403723414,
      "grad_norm": 8.625556945800781,
      "learning_rate": 6.210000000000001e-06,
      "loss": 0.1442,
      "step": 12420
    },
    {
      "epoch": 0.020358332842376755,
      "grad_norm": 6.439311981201172,
      "learning_rate": 6.220000000000001e-06,
      "loss": 0.1221,
      "step": 12440
    },
    {
      "epoch": 0.020391063281030092,
      "grad_norm": 6.252732753753662,
      "learning_rate": 6.2300000000000005e-06,
      "loss": 0.1283,
      "step": 12460
    },
    {
      "epoch": 0.02042379371968343,
      "grad_norm": 4.9998674392700195,
      "learning_rate": 6.24e-06,
      "loss": 0.131,
      "step": 12480
    },
    {
      "epoch": 0.02045652415833677,
      "grad_norm": 11.814516067504883,
      "learning_rate": 6.25e-06,
      "loss": 0.1306,
      "step": 12500
    },
    {
      "epoch": 0.02048925459699011,
      "grad_norm": 8.710739135742188,
      "learning_rate": 6.26e-06,
      "loss": 0.1099,
      "step": 12520
    },
    {
      "epoch": 0.020521985035643446,
      "grad_norm": 14.083719253540039,
      "learning_rate": 6.27e-06,
      "loss": 0.1231,
      "step": 12540
    },
    {
      "epoch": 0.020554715474296787,
      "grad_norm": 4.65872859954834,
      "learning_rate": 6.280000000000001e-06,
      "loss": 0.1183,
      "step": 12560
    },
    {
      "epoch": 0.020587445912950125,
      "grad_norm": 5.093535900115967,
      "learning_rate": 6.290000000000001e-06,
      "loss": 0.1308,
      "step": 12580
    },
    {
      "epoch": 0.020620176351603466,
      "grad_norm": 11.057432174682617,
      "learning_rate": 6.300000000000001e-06,
      "loss": 0.1153,
      "step": 12600
    },
    {
      "epoch": 0.020652906790256803,
      "grad_norm": 56.02492141723633,
      "learning_rate": 6.3100000000000006e-06,
      "loss": 0.1233,
      "step": 12620
    },
    {
      "epoch": 0.02068563722891014,
      "grad_norm": 6.565062999725342,
      "learning_rate": 6.3200000000000005e-06,
      "loss": 0.1281,
      "step": 12640
    },
    {
      "epoch": 0.02071836766756348,
      "grad_norm": 3.5712573528289795,
      "learning_rate": 6.33e-06,
      "loss": 0.1117,
      "step": 12660
    },
    {
      "epoch": 0.02075109810621682,
      "grad_norm": 6.933999061584473,
      "learning_rate": 6.34e-06,
      "loss": 0.1139,
      "step": 12680
    },
    {
      "epoch": 0.020783828544870157,
      "grad_norm": 6.234281539916992,
      "learning_rate": 6.35e-06,
      "loss": 0.1206,
      "step": 12700
    },
    {
      "epoch": 0.020816558983523498,
      "grad_norm": 8.749150276184082,
      "learning_rate": 6.360000000000001e-06,
      "loss": 0.1136,
      "step": 12720
    },
    {
      "epoch": 0.020849289422176835,
      "grad_norm": 10.96778392791748,
      "learning_rate": 6.370000000000001e-06,
      "loss": 0.1226,
      "step": 12740
    },
    {
      "epoch": 0.020882019860830176,
      "grad_norm": 3.741872549057007,
      "learning_rate": 6.380000000000001e-06,
      "loss": 0.1173,
      "step": 12760
    },
    {
      "epoch": 0.020914750299483514,
      "grad_norm": 16.1850643157959,
      "learning_rate": 6.390000000000001e-06,
      "loss": 0.1221,
      "step": 12780
    },
    {
      "epoch": 0.02094748073813685,
      "grad_norm": 8.259970664978027,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 0.1105,
      "step": 12800
    },
    {
      "epoch": 0.020980211176790192,
      "grad_norm": 16.587066650390625,
      "learning_rate": 6.4100000000000005e-06,
      "loss": 0.1306,
      "step": 12820
    },
    {
      "epoch": 0.02101294161544353,
      "grad_norm": 8.15344524383545,
      "learning_rate": 6.42e-06,
      "loss": 0.1109,
      "step": 12840
    },
    {
      "epoch": 0.02104567205409687,
      "grad_norm": 6.646499156951904,
      "learning_rate": 6.43e-06,
      "loss": 0.1414,
      "step": 12860
    },
    {
      "epoch": 0.021078402492750208,
      "grad_norm": 6.990596771240234,
      "learning_rate": 6.440000000000001e-06,
      "loss": 0.1247,
      "step": 12880
    },
    {
      "epoch": 0.021111132931403546,
      "grad_norm": 106.97391510009766,
      "learning_rate": 6.450000000000001e-06,
      "loss": 0.1193,
      "step": 12900
    },
    {
      "epoch": 0.021143863370056887,
      "grad_norm": 15.122036933898926,
      "learning_rate": 6.460000000000001e-06,
      "loss": 0.1215,
      "step": 12920
    },
    {
      "epoch": 0.021176593808710224,
      "grad_norm": 5.215033054351807,
      "learning_rate": 6.470000000000001e-06,
      "loss": 0.1177,
      "step": 12940
    },
    {
      "epoch": 0.021209324247363562,
      "grad_norm": 7.9678955078125,
      "learning_rate": 6.480000000000001e-06,
      "loss": 0.127,
      "step": 12960
    },
    {
      "epoch": 0.021242054686016903,
      "grad_norm": 11.871441841125488,
      "learning_rate": 6.4900000000000005e-06,
      "loss": 0.1317,
      "step": 12980
    },
    {
      "epoch": 0.02127478512467024,
      "grad_norm": 8.558040618896484,
      "learning_rate": 6.5000000000000004e-06,
      "loss": 0.1174,
      "step": 13000
    },
    {
      "epoch": 0.02130751556332358,
      "grad_norm": 10.14385986328125,
      "learning_rate": 6.51e-06,
      "loss": 0.1286,
      "step": 13020
    },
    {
      "epoch": 0.02134024600197692,
      "grad_norm": 18.838319778442383,
      "learning_rate": 6.520000000000001e-06,
      "loss": 0.1201,
      "step": 13040
    },
    {
      "epoch": 0.021372976440630256,
      "grad_norm": 5.017125606536865,
      "learning_rate": 6.530000000000001e-06,
      "loss": 0.1075,
      "step": 13060
    },
    {
      "epoch": 0.021405706879283597,
      "grad_norm": 7.418455123901367,
      "learning_rate": 6.540000000000001e-06,
      "loss": 0.1244,
      "step": 13080
    },
    {
      "epoch": 0.021438437317936935,
      "grad_norm": 7.008340835571289,
      "learning_rate": 6.550000000000001e-06,
      "loss": 0.1139,
      "step": 13100
    },
    {
      "epoch": 0.021471167756590272,
      "grad_norm": 7.889460563659668,
      "learning_rate": 6.560000000000001e-06,
      "loss": 0.1365,
      "step": 13120
    },
    {
      "epoch": 0.021503898195243613,
      "grad_norm": 2.3879072666168213,
      "learning_rate": 6.570000000000001e-06,
      "loss": 0.1246,
      "step": 13140
    },
    {
      "epoch": 0.02153662863389695,
      "grad_norm": 10.63464641571045,
      "learning_rate": 6.5800000000000005e-06,
      "loss": 0.1075,
      "step": 13160
    },
    {
      "epoch": 0.021569359072550292,
      "grad_norm": 17.6601505279541,
      "learning_rate": 6.5900000000000004e-06,
      "loss": 0.124,
      "step": 13180
    },
    {
      "epoch": 0.02160208951120363,
      "grad_norm": 5.546273708343506,
      "learning_rate": 6.600000000000001e-06,
      "loss": 0.1267,
      "step": 13200
    },
    {
      "epoch": 0.021634819949856967,
      "grad_norm": 10.74276065826416,
      "learning_rate": 6.610000000000001e-06,
      "loss": 0.1161,
      "step": 13220
    },
    {
      "epoch": 0.021667550388510308,
      "grad_norm": 3.7779436111450195,
      "learning_rate": 6.620000000000001e-06,
      "loss": 0.1301,
      "step": 13240
    },
    {
      "epoch": 0.021700280827163645,
      "grad_norm": 9.073884963989258,
      "learning_rate": 6.630000000000001e-06,
      "loss": 0.134,
      "step": 13260
    },
    {
      "epoch": 0.021733011265816983,
      "grad_norm": 9.139842987060547,
      "learning_rate": 6.640000000000001e-06,
      "loss": 0.1196,
      "step": 13280
    },
    {
      "epoch": 0.021765741704470324,
      "grad_norm": 7.21841287612915,
      "learning_rate": 6.650000000000001e-06,
      "loss": 0.1209,
      "step": 13300
    },
    {
      "epoch": 0.02179847214312366,
      "grad_norm": 21.136877059936523,
      "learning_rate": 6.660000000000001e-06,
      "loss": 0.0914,
      "step": 13320
    },
    {
      "epoch": 0.021831202581777003,
      "grad_norm": 55.73615646362305,
      "learning_rate": 6.6700000000000005e-06,
      "loss": 0.1281,
      "step": 13340
    },
    {
      "epoch": 0.02186393302043034,
      "grad_norm": 10.30917739868164,
      "learning_rate": 6.680000000000001e-06,
      "loss": 0.1508,
      "step": 13360
    },
    {
      "epoch": 0.021896663459083678,
      "grad_norm": 4.156057834625244,
      "learning_rate": 6.690000000000001e-06,
      "loss": 0.1092,
      "step": 13380
    },
    {
      "epoch": 0.02192939389773702,
      "grad_norm": 10.376765251159668,
      "learning_rate": 6.700000000000001e-06,
      "loss": 0.13,
      "step": 13400
    },
    {
      "epoch": 0.021962124336390356,
      "grad_norm": 15.258418083190918,
      "learning_rate": 6.710000000000001e-06,
      "loss": 0.1095,
      "step": 13420
    },
    {
      "epoch": 0.021994854775043694,
      "grad_norm": 5.631412029266357,
      "learning_rate": 6.720000000000001e-06,
      "loss": 0.1216,
      "step": 13440
    },
    {
      "epoch": 0.022027585213697035,
      "grad_norm": 9.253827095031738,
      "learning_rate": 6.730000000000001e-06,
      "loss": 0.0995,
      "step": 13460
    },
    {
      "epoch": 0.022060315652350372,
      "grad_norm": 6.084743499755859,
      "learning_rate": 6.740000000000001e-06,
      "loss": 0.1259,
      "step": 13480
    },
    {
      "epoch": 0.022093046091003713,
      "grad_norm": 3.8553714752197266,
      "learning_rate": 6.750000000000001e-06,
      "loss": 0.1087,
      "step": 13500
    },
    {
      "epoch": 0.02212577652965705,
      "grad_norm": 4.325697898864746,
      "learning_rate": 6.760000000000001e-06,
      "loss": 0.1296,
      "step": 13520
    },
    {
      "epoch": 0.022158506968310388,
      "grad_norm": 7.888118267059326,
      "learning_rate": 6.770000000000001e-06,
      "loss": 0.1414,
      "step": 13540
    },
    {
      "epoch": 0.02219123740696373,
      "grad_norm": 10.10167121887207,
      "learning_rate": 6.780000000000001e-06,
      "loss": 0.1204,
      "step": 13560
    },
    {
      "epoch": 0.022223967845617067,
      "grad_norm": 9.176563262939453,
      "learning_rate": 6.790000000000001e-06,
      "loss": 0.1223,
      "step": 13580
    },
    {
      "epoch": 0.022256698284270404,
      "grad_norm": 6.24391508102417,
      "learning_rate": 6.800000000000001e-06,
      "loss": 0.1451,
      "step": 13600
    },
    {
      "epoch": 0.022289428722923745,
      "grad_norm": 6.63362979888916,
      "learning_rate": 6.810000000000001e-06,
      "loss": 0.1047,
      "step": 13620
    },
    {
      "epoch": 0.022322159161577083,
      "grad_norm": 8.029703140258789,
      "learning_rate": 6.820000000000001e-06,
      "loss": 0.1139,
      "step": 13640
    },
    {
      "epoch": 0.022354889600230424,
      "grad_norm": 6.473947048187256,
      "learning_rate": 6.830000000000001e-06,
      "loss": 0.1202,
      "step": 13660
    },
    {
      "epoch": 0.02238762003888376,
      "grad_norm": 9.340574264526367,
      "learning_rate": 6.8400000000000014e-06,
      "loss": 0.1215,
      "step": 13680
    },
    {
      "epoch": 0.0224203504775371,
      "grad_norm": 12.460845947265625,
      "learning_rate": 6.850000000000001e-06,
      "loss": 0.1366,
      "step": 13700
    },
    {
      "epoch": 0.02245308091619044,
      "grad_norm": 9.564356803894043,
      "learning_rate": 6.860000000000001e-06,
      "loss": 0.1156,
      "step": 13720
    },
    {
      "epoch": 0.022485811354843777,
      "grad_norm": 4.851902961730957,
      "learning_rate": 6.870000000000001e-06,
      "loss": 0.1023,
      "step": 13740
    },
    {
      "epoch": 0.022518541793497115,
      "grad_norm": 4.39141845703125,
      "learning_rate": 6.88e-06,
      "loss": 0.1356,
      "step": 13760
    },
    {
      "epoch": 0.022551272232150456,
      "grad_norm": 8.424901008605957,
      "learning_rate": 6.89e-06,
      "loss": 0.1236,
      "step": 13780
    },
    {
      "epoch": 0.022584002670803793,
      "grad_norm": 11.181543350219727,
      "learning_rate": 6.9e-06,
      "loss": 0.1447,
      "step": 13800
    },
    {
      "epoch": 0.022616733109457134,
      "grad_norm": 3.19555926322937,
      "learning_rate": 6.91e-06,
      "loss": 0.1352,
      "step": 13820
    },
    {
      "epoch": 0.022649463548110472,
      "grad_norm": 5.181308746337891,
      "learning_rate": 6.92e-06,
      "loss": 0.1103,
      "step": 13840
    },
    {
      "epoch": 0.02268219398676381,
      "grad_norm": 30.135509490966797,
      "learning_rate": 6.93e-06,
      "loss": 0.0932,
      "step": 13860
    },
    {
      "epoch": 0.02271492442541715,
      "grad_norm": 5.730044841766357,
      "learning_rate": 6.9400000000000005e-06,
      "loss": 0.1203,
      "step": 13880
    },
    {
      "epoch": 0.022747654864070488,
      "grad_norm": 6.420878887176514,
      "learning_rate": 6.95e-06,
      "loss": 0.1419,
      "step": 13900
    },
    {
      "epoch": 0.022780385302723825,
      "grad_norm": 19.32124137878418,
      "learning_rate": 6.96e-06,
      "loss": 0.1378,
      "step": 13920
    },
    {
      "epoch": 0.022813115741377166,
      "grad_norm": 9.162346839904785,
      "learning_rate": 6.97e-06,
      "loss": 0.1322,
      "step": 13940
    },
    {
      "epoch": 0.022845846180030504,
      "grad_norm": 14.90809440612793,
      "learning_rate": 6.98e-06,
      "loss": 0.1482,
      "step": 13960
    },
    {
      "epoch": 0.022878576618683845,
      "grad_norm": 10.638283729553223,
      "learning_rate": 6.99e-06,
      "loss": 0.1256,
      "step": 13980
    },
    {
      "epoch": 0.022911307057337182,
      "grad_norm": 13.341506958007812,
      "learning_rate": 7e-06,
      "loss": 0.1209,
      "step": 14000
    },
    {
      "epoch": 0.02294403749599052,
      "grad_norm": 5.000003337860107,
      "learning_rate": 7.01e-06,
      "loss": 0.126,
      "step": 14020
    },
    {
      "epoch": 0.02297676793464386,
      "grad_norm": 8.91609001159668,
      "learning_rate": 7.0200000000000006e-06,
      "loss": 0.1162,
      "step": 14040
    },
    {
      "epoch": 0.0230094983732972,
      "grad_norm": 8.286173820495605,
      "learning_rate": 7.0300000000000005e-06,
      "loss": 0.0996,
      "step": 14060
    },
    {
      "epoch": 0.02304222881195054,
      "grad_norm": 2.784449338912964,
      "learning_rate": 7.04e-06,
      "loss": 0.1475,
      "step": 14080
    },
    {
      "epoch": 0.023074959250603877,
      "grad_norm": 5.826520919799805,
      "learning_rate": 7.05e-06,
      "loss": 0.1112,
      "step": 14100
    },
    {
      "epoch": 0.023107689689257215,
      "grad_norm": 4.906018257141113,
      "learning_rate": 7.06e-06,
      "loss": 0.1032,
      "step": 14120
    },
    {
      "epoch": 0.023140420127910555,
      "grad_norm": 3.3106284141540527,
      "learning_rate": 7.07e-06,
      "loss": 0.1175,
      "step": 14140
    },
    {
      "epoch": 0.023173150566563893,
      "grad_norm": 7.360857009887695,
      "learning_rate": 7.08e-06,
      "loss": 0.1444,
      "step": 14160
    },
    {
      "epoch": 0.02320588100521723,
      "grad_norm": 6.128981590270996,
      "learning_rate": 7.09e-06,
      "loss": 0.107,
      "step": 14180
    },
    {
      "epoch": 0.02323861144387057,
      "grad_norm": 7.797112464904785,
      "learning_rate": 7.100000000000001e-06,
      "loss": 0.096,
      "step": 14200
    },
    {
      "epoch": 0.02327134188252391,
      "grad_norm": 11.853260040283203,
      "learning_rate": 7.1100000000000005e-06,
      "loss": 0.1162,
      "step": 14220
    },
    {
      "epoch": 0.02330407232117725,
      "grad_norm": 11.212519645690918,
      "learning_rate": 7.1200000000000004e-06,
      "loss": 0.1403,
      "step": 14240
    },
    {
      "epoch": 0.023336802759830588,
      "grad_norm": 14.22956371307373,
      "learning_rate": 7.13e-06,
      "loss": 0.1223,
      "step": 14260
    },
    {
      "epoch": 0.023369533198483925,
      "grad_norm": 6.574679851531982,
      "learning_rate": 7.14e-06,
      "loss": 0.1196,
      "step": 14280
    },
    {
      "epoch": 0.023402263637137266,
      "grad_norm": 8.237431526184082,
      "learning_rate": 7.15e-06,
      "loss": 0.1231,
      "step": 14300
    },
    {
      "epoch": 0.023434994075790604,
      "grad_norm": 8.511423110961914,
      "learning_rate": 7.16e-06,
      "loss": 0.1324,
      "step": 14320
    },
    {
      "epoch": 0.02346772451444394,
      "grad_norm": 12.326702117919922,
      "learning_rate": 7.17e-06,
      "loss": 0.1251,
      "step": 14340
    },
    {
      "epoch": 0.023500454953097282,
      "grad_norm": 16.979084014892578,
      "learning_rate": 7.180000000000001e-06,
      "loss": 0.1167,
      "step": 14360
    },
    {
      "epoch": 0.02353318539175062,
      "grad_norm": 6.135566711425781,
      "learning_rate": 7.190000000000001e-06,
      "loss": 0.1369,
      "step": 14380
    },
    {
      "epoch": 0.02356591583040396,
      "grad_norm": 10.0974702835083,
      "learning_rate": 7.2000000000000005e-06,
      "loss": 0.1224,
      "step": 14400
    },
    {
      "epoch": 0.023598646269057298,
      "grad_norm": 13.67003345489502,
      "learning_rate": 7.2100000000000004e-06,
      "loss": 0.1263,
      "step": 14420
    },
    {
      "epoch": 0.023631376707710636,
      "grad_norm": 7.237561225891113,
      "learning_rate": 7.22e-06,
      "loss": 0.1449,
      "step": 14440
    },
    {
      "epoch": 0.023664107146363977,
      "grad_norm": 4.31002140045166,
      "learning_rate": 7.23e-06,
      "loss": 0.1389,
      "step": 14460
    },
    {
      "epoch": 0.023696837585017314,
      "grad_norm": 4.290844440460205,
      "learning_rate": 7.24e-06,
      "loss": 0.1276,
      "step": 14480
    },
    {
      "epoch": 0.023729568023670652,
      "grad_norm": 9.007561683654785,
      "learning_rate": 7.25e-06,
      "loss": 0.1274,
      "step": 14500
    },
    {
      "epoch": 0.023762298462323993,
      "grad_norm": 5.11496114730835,
      "learning_rate": 7.260000000000001e-06,
      "loss": 0.1379,
      "step": 14520
    },
    {
      "epoch": 0.02379502890097733,
      "grad_norm": 12.170449256896973,
      "learning_rate": 7.270000000000001e-06,
      "loss": 0.1193,
      "step": 14540
    },
    {
      "epoch": 0.02382775933963067,
      "grad_norm": 8.698596000671387,
      "learning_rate": 7.280000000000001e-06,
      "loss": 0.1004,
      "step": 14560
    },
    {
      "epoch": 0.02386048977828401,
      "grad_norm": 6.3362226486206055,
      "learning_rate": 7.2900000000000005e-06,
      "loss": 0.096,
      "step": 14580
    },
    {
      "epoch": 0.023893220216937346,
      "grad_norm": 8.762120246887207,
      "learning_rate": 7.3e-06,
      "loss": 0.1167,
      "step": 14600
    },
    {
      "epoch": 0.023925950655590687,
      "grad_norm": 6.271289825439453,
      "learning_rate": 7.31e-06,
      "loss": 0.1148,
      "step": 14620
    },
    {
      "epoch": 0.023958681094244025,
      "grad_norm": 15.105859756469727,
      "learning_rate": 7.32e-06,
      "loss": 0.1343,
      "step": 14640
    },
    {
      "epoch": 0.023991411532897362,
      "grad_norm": 22.294143676757812,
      "learning_rate": 7.33e-06,
      "loss": 0.1189,
      "step": 14660
    },
    {
      "epoch": 0.024024141971550703,
      "grad_norm": 7.499807834625244,
      "learning_rate": 7.340000000000001e-06,
      "loss": 0.1005,
      "step": 14680
    },
    {
      "epoch": 0.02405687241020404,
      "grad_norm": 64.98603057861328,
      "learning_rate": 7.350000000000001e-06,
      "loss": 0.1076,
      "step": 14700
    },
    {
      "epoch": 0.024089602848857382,
      "grad_norm": 10.042601585388184,
      "learning_rate": 7.360000000000001e-06,
      "loss": 0.1336,
      "step": 14720
    },
    {
      "epoch": 0.02412233328751072,
      "grad_norm": 23.202037811279297,
      "learning_rate": 7.370000000000001e-06,
      "loss": 0.1173,
      "step": 14740
    },
    {
      "epoch": 0.024155063726164057,
      "grad_norm": 8.746512413024902,
      "learning_rate": 7.3800000000000005e-06,
      "loss": 0.1188,
      "step": 14760
    },
    {
      "epoch": 0.024187794164817398,
      "grad_norm": 13.463057518005371,
      "learning_rate": 7.39e-06,
      "loss": 0.11,
      "step": 14780
    },
    {
      "epoch": 0.024220524603470735,
      "grad_norm": 5.841836452484131,
      "learning_rate": 7.4e-06,
      "loss": 0.116,
      "step": 14800
    },
    {
      "epoch": 0.024253255042124073,
      "grad_norm": 10.510108947753906,
      "learning_rate": 7.41e-06,
      "loss": 0.1252,
      "step": 14820
    },
    {
      "epoch": 0.024285985480777414,
      "grad_norm": 6.679780006408691,
      "learning_rate": 7.420000000000001e-06,
      "loss": 0.1144,
      "step": 14840
    },
    {
      "epoch": 0.02431871591943075,
      "grad_norm": 6.771546840667725,
      "learning_rate": 7.430000000000001e-06,
      "loss": 0.0925,
      "step": 14860
    },
    {
      "epoch": 0.024351446358084092,
      "grad_norm": 16.537031173706055,
      "learning_rate": 7.440000000000001e-06,
      "loss": 0.1122,
      "step": 14880
    },
    {
      "epoch": 0.02438417679673743,
      "grad_norm": 10.648541450500488,
      "learning_rate": 7.450000000000001e-06,
      "loss": 0.0962,
      "step": 14900
    },
    {
      "epoch": 0.024416907235390767,
      "grad_norm": 4.561477184295654,
      "learning_rate": 7.4600000000000006e-06,
      "loss": 0.1054,
      "step": 14920
    },
    {
      "epoch": 0.02444963767404411,
      "grad_norm": 4.211126804351807,
      "learning_rate": 7.4700000000000005e-06,
      "loss": 0.1214,
      "step": 14940
    },
    {
      "epoch": 0.024482368112697446,
      "grad_norm": 4.185244560241699,
      "learning_rate": 7.48e-06,
      "loss": 0.1178,
      "step": 14960
    },
    {
      "epoch": 0.024515098551350784,
      "grad_norm": 8.208000183105469,
      "learning_rate": 7.49e-06,
      "loss": 0.1221,
      "step": 14980
    },
    {
      "epoch": 0.024547828990004125,
      "grad_norm": 7.539600372314453,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.1386,
      "step": 15000
    },
    {
      "epoch": 0.024580559428657462,
      "grad_norm": 5.038051605224609,
      "learning_rate": 7.510000000000001e-06,
      "loss": 0.1168,
      "step": 15020
    },
    {
      "epoch": 0.024613289867310803,
      "grad_norm": 15.81421947479248,
      "learning_rate": 7.520000000000001e-06,
      "loss": 0.1129,
      "step": 15040
    },
    {
      "epoch": 0.02464602030596414,
      "grad_norm": 5.118354797363281,
      "learning_rate": 7.530000000000001e-06,
      "loss": 0.1094,
      "step": 15060
    },
    {
      "epoch": 0.024678750744617478,
      "grad_norm": 3.6816394329071045,
      "learning_rate": 7.540000000000001e-06,
      "loss": 0.111,
      "step": 15080
    },
    {
      "epoch": 0.02471148118327082,
      "grad_norm": 7.416588306427002,
      "learning_rate": 7.5500000000000006e-06,
      "loss": 0.1329,
      "step": 15100
    },
    {
      "epoch": 0.024744211621924157,
      "grad_norm": 9.875120162963867,
      "learning_rate": 7.5600000000000005e-06,
      "loss": 0.1148,
      "step": 15120
    },
    {
      "epoch": 0.024776942060577494,
      "grad_norm": 9.364028930664062,
      "learning_rate": 7.57e-06,
      "loss": 0.1487,
      "step": 15140
    },
    {
      "epoch": 0.024809672499230835,
      "grad_norm": 4.738760471343994,
      "learning_rate": 7.58e-06,
      "loss": 0.1128,
      "step": 15160
    },
    {
      "epoch": 0.024842402937884173,
      "grad_norm": 6.653000831604004,
      "learning_rate": 7.590000000000001e-06,
      "loss": 0.1204,
      "step": 15180
    },
    {
      "epoch": 0.024875133376537514,
      "grad_norm": 15.704257011413574,
      "learning_rate": 7.600000000000001e-06,
      "loss": 0.0989,
      "step": 15200
    },
    {
      "epoch": 0.02490786381519085,
      "grad_norm": 5.879464149475098,
      "learning_rate": 7.610000000000001e-06,
      "loss": 0.1122,
      "step": 15220
    },
    {
      "epoch": 0.02494059425384419,
      "grad_norm": 10.674450874328613,
      "learning_rate": 7.620000000000001e-06,
      "loss": 0.1298,
      "step": 15240
    },
    {
      "epoch": 0.02497332469249753,
      "grad_norm": 120.94923400878906,
      "learning_rate": 7.630000000000001e-06,
      "loss": 0.1232,
      "step": 15260
    },
    {
      "epoch": 0.025006055131150867,
      "grad_norm": 5.098135471343994,
      "learning_rate": 7.640000000000001e-06,
      "loss": 0.1126,
      "step": 15280
    },
    {
      "epoch": 0.025038785569804208,
      "grad_norm": 40.88947677612305,
      "learning_rate": 7.650000000000001e-06,
      "loss": 0.1321,
      "step": 15300
    },
    {
      "epoch": 0.025071516008457546,
      "grad_norm": 6.380396842956543,
      "learning_rate": 7.660000000000001e-06,
      "loss": 0.1105,
      "step": 15320
    },
    {
      "epoch": 0.025104246447110883,
      "grad_norm": 6.212552070617676,
      "learning_rate": 7.670000000000001e-06,
      "loss": 0.1147,
      "step": 15340
    },
    {
      "epoch": 0.025136976885764224,
      "grad_norm": 11.00952434539795,
      "learning_rate": 7.680000000000001e-06,
      "loss": 0.1251,
      "step": 15360
    },
    {
      "epoch": 0.025169707324417562,
      "grad_norm": 6.580162048339844,
      "learning_rate": 7.690000000000001e-06,
      "loss": 0.1102,
      "step": 15380
    },
    {
      "epoch": 0.0252024377630709,
      "grad_norm": 2.446596622467041,
      "learning_rate": 7.7e-06,
      "loss": 0.1172,
      "step": 15400
    },
    {
      "epoch": 0.02523516820172424,
      "grad_norm": 3.6338963508605957,
      "learning_rate": 7.71e-06,
      "loss": 0.1516,
      "step": 15420
    },
    {
      "epoch": 0.025267898640377578,
      "grad_norm": 13.765000343322754,
      "learning_rate": 7.72e-06,
      "loss": 0.1112,
      "step": 15440
    },
    {
      "epoch": 0.02530062907903092,
      "grad_norm": 1.7928531169891357,
      "learning_rate": 7.73e-06,
      "loss": 0.1198,
      "step": 15460
    },
    {
      "epoch": 0.025333359517684256,
      "grad_norm": 4.760053634643555,
      "learning_rate": 7.74e-06,
      "loss": 0.0967,
      "step": 15480
    },
    {
      "epoch": 0.025366089956337594,
      "grad_norm": 6.644926071166992,
      "learning_rate": 7.75e-06,
      "loss": 0.0959,
      "step": 15500
    },
    {
      "epoch": 0.025398820394990935,
      "grad_norm": 9.395622253417969,
      "learning_rate": 7.76e-06,
      "loss": 0.1176,
      "step": 15520
    },
    {
      "epoch": 0.025431550833644272,
      "grad_norm": 8.21696949005127,
      "learning_rate": 7.77e-06,
      "loss": 0.1317,
      "step": 15540
    },
    {
      "epoch": 0.02546428127229761,
      "grad_norm": 6.577255725860596,
      "learning_rate": 7.78e-06,
      "loss": 0.1265,
      "step": 15560
    },
    {
      "epoch": 0.02549701171095095,
      "grad_norm": 13.536088943481445,
      "learning_rate": 7.790000000000002e-06,
      "loss": 0.1084,
      "step": 15580
    },
    {
      "epoch": 0.02552974214960429,
      "grad_norm": 4.322021961212158,
      "learning_rate": 7.800000000000002e-06,
      "loss": 0.1309,
      "step": 15600
    },
    {
      "epoch": 0.02556247258825763,
      "grad_norm": 9.328498840332031,
      "learning_rate": 7.810000000000001e-06,
      "loss": 0.1204,
      "step": 15620
    },
    {
      "epoch": 0.025595203026910967,
      "grad_norm": 9.875996589660645,
      "learning_rate": 7.820000000000001e-06,
      "loss": 0.1221,
      "step": 15640
    },
    {
      "epoch": 0.025627933465564304,
      "grad_norm": 9.187349319458008,
      "learning_rate": 7.830000000000001e-06,
      "loss": 0.1098,
      "step": 15660
    },
    {
      "epoch": 0.025660663904217645,
      "grad_norm": 12.399301528930664,
      "learning_rate": 7.840000000000001e-06,
      "loss": 0.1239,
      "step": 15680
    },
    {
      "epoch": 0.025693394342870983,
      "grad_norm": 4.339240550994873,
      "learning_rate": 7.850000000000001e-06,
      "loss": 0.1233,
      "step": 15700
    },
    {
      "epoch": 0.02572612478152432,
      "grad_norm": 8.420233726501465,
      "learning_rate": 7.860000000000001e-06,
      "loss": 0.128,
      "step": 15720
    },
    {
      "epoch": 0.02575885522017766,
      "grad_norm": 9.26225757598877,
      "learning_rate": 7.870000000000001e-06,
      "loss": 0.1418,
      "step": 15740
    },
    {
      "epoch": 0.025791585658831,
      "grad_norm": 9.99677562713623,
      "learning_rate": 7.88e-06,
      "loss": 0.1076,
      "step": 15760
    },
    {
      "epoch": 0.02582431609748434,
      "grad_norm": 13.679707527160645,
      "learning_rate": 7.89e-06,
      "loss": 0.1382,
      "step": 15780
    },
    {
      "epoch": 0.025857046536137678,
      "grad_norm": 10.982840538024902,
      "learning_rate": 7.9e-06,
      "loss": 0.1068,
      "step": 15800
    },
    {
      "epoch": 0.025889776974791015,
      "grad_norm": 4.017818927764893,
      "learning_rate": 7.91e-06,
      "loss": 0.1216,
      "step": 15820
    },
    {
      "epoch": 0.025922507413444356,
      "grad_norm": 7.636927604675293,
      "learning_rate": 7.92e-06,
      "loss": 0.1399,
      "step": 15840
    },
    {
      "epoch": 0.025955237852097694,
      "grad_norm": 5.077426910400391,
      "learning_rate": 7.93e-06,
      "loss": 0.1098,
      "step": 15860
    },
    {
      "epoch": 0.02598796829075103,
      "grad_norm": 8.15695858001709,
      "learning_rate": 7.94e-06,
      "loss": 0.1021,
      "step": 15880
    },
    {
      "epoch": 0.026020698729404372,
      "grad_norm": 5.834659576416016,
      "learning_rate": 7.950000000000002e-06,
      "loss": 0.1194,
      "step": 15900
    },
    {
      "epoch": 0.02605342916805771,
      "grad_norm": 16.123613357543945,
      "learning_rate": 7.960000000000002e-06,
      "loss": 0.1079,
      "step": 15920
    },
    {
      "epoch": 0.02608615960671105,
      "grad_norm": 7.314253807067871,
      "learning_rate": 7.970000000000002e-06,
      "loss": 0.1078,
      "step": 15940
    },
    {
      "epoch": 0.026118890045364388,
      "grad_norm": 11.819263458251953,
      "learning_rate": 7.980000000000002e-06,
      "loss": 0.1072,
      "step": 15960
    },
    {
      "epoch": 0.026151620484017726,
      "grad_norm": 4.963045597076416,
      "learning_rate": 7.990000000000001e-06,
      "loss": 0.1229,
      "step": 15980
    },
    {
      "epoch": 0.026184350922671067,
      "grad_norm": 5.325921058654785,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.1054,
      "step": 16000
    },
    {
      "epoch": 0.026217081361324404,
      "grad_norm": 6.674448490142822,
      "learning_rate": 8.010000000000001e-06,
      "loss": 0.1136,
      "step": 16020
    },
    {
      "epoch": 0.02624981179997774,
      "grad_norm": 6.8690290451049805,
      "learning_rate": 8.020000000000001e-06,
      "loss": 0.1017,
      "step": 16040
    },
    {
      "epoch": 0.026282542238631083,
      "grad_norm": 9.169642448425293,
      "learning_rate": 8.030000000000001e-06,
      "loss": 0.11,
      "step": 16060
    },
    {
      "epoch": 0.02631527267728442,
      "grad_norm": 3.404538869857788,
      "learning_rate": 8.040000000000001e-06,
      "loss": 0.1458,
      "step": 16080
    },
    {
      "epoch": 0.02634800311593776,
      "grad_norm": 3.4216299057006836,
      "learning_rate": 8.050000000000001e-06,
      "loss": 0.1077,
      "step": 16100
    },
    {
      "epoch": 0.0263807335545911,
      "grad_norm": 5.2212419509887695,
      "learning_rate": 8.06e-06,
      "loss": 0.121,
      "step": 16120
    },
    {
      "epoch": 0.026413463993244436,
      "grad_norm": 7.416235446929932,
      "learning_rate": 8.07e-06,
      "loss": 0.1097,
      "step": 16140
    },
    {
      "epoch": 0.026446194431897777,
      "grad_norm": 8.874210357666016,
      "learning_rate": 8.08e-06,
      "loss": 0.1076,
      "step": 16160
    },
    {
      "epoch": 0.026478924870551115,
      "grad_norm": 45.82361602783203,
      "learning_rate": 8.09e-06,
      "loss": 0.1316,
      "step": 16180
    },
    {
      "epoch": 0.026511655309204452,
      "grad_norm": 16.98866081237793,
      "learning_rate": 8.1e-06,
      "loss": 0.1237,
      "step": 16200
    },
    {
      "epoch": 0.026544385747857793,
      "grad_norm": 32.067134857177734,
      "learning_rate": 8.110000000000002e-06,
      "loss": 0.1251,
      "step": 16220
    },
    {
      "epoch": 0.02657711618651113,
      "grad_norm": 10.72386646270752,
      "learning_rate": 8.120000000000002e-06,
      "loss": 0.1124,
      "step": 16240
    },
    {
      "epoch": 0.026609846625164472,
      "grad_norm": 13.680671691894531,
      "learning_rate": 8.13e-06,
      "loss": 0.1102,
      "step": 16260
    },
    {
      "epoch": 0.02664257706381781,
      "grad_norm": 32.401912689208984,
      "learning_rate": 8.14e-06,
      "loss": 0.1445,
      "step": 16280
    },
    {
      "epoch": 0.026675307502471147,
      "grad_norm": 8.953108787536621,
      "learning_rate": 8.15e-06,
      "loss": 0.1249,
      "step": 16300
    },
    {
      "epoch": 0.026708037941124488,
      "grad_norm": 8.461546897888184,
      "learning_rate": 8.16e-06,
      "loss": 0.142,
      "step": 16320
    },
    {
      "epoch": 0.026740768379777825,
      "grad_norm": 7.334432601928711,
      "learning_rate": 8.17e-06,
      "loss": 0.1164,
      "step": 16340
    },
    {
      "epoch": 0.026773498818431166,
      "grad_norm": 2.672496795654297,
      "learning_rate": 8.18e-06,
      "loss": 0.132,
      "step": 16360
    },
    {
      "epoch": 0.026806229257084504,
      "grad_norm": 12.73007583618164,
      "learning_rate": 8.19e-06,
      "loss": 0.1255,
      "step": 16380
    },
    {
      "epoch": 0.02683895969573784,
      "grad_norm": 53.23210525512695,
      "learning_rate": 8.2e-06,
      "loss": 0.1365,
      "step": 16400
    },
    {
      "epoch": 0.026871690134391182,
      "grad_norm": 4.912330150604248,
      "learning_rate": 8.210000000000001e-06,
      "loss": 0.1183,
      "step": 16420
    },
    {
      "epoch": 0.02690442057304452,
      "grad_norm": 8.849454879760742,
      "learning_rate": 8.220000000000001e-06,
      "loss": 0.1206,
      "step": 16440
    },
    {
      "epoch": 0.026937151011697857,
      "grad_norm": 12.325180053710938,
      "learning_rate": 8.23e-06,
      "loss": 0.1139,
      "step": 16460
    },
    {
      "epoch": 0.0269698814503512,
      "grad_norm": 8.784723281860352,
      "learning_rate": 8.24e-06,
      "loss": 0.1098,
      "step": 16480
    },
    {
      "epoch": 0.027002611889004536,
      "grad_norm": 5.6344218254089355,
      "learning_rate": 8.25e-06,
      "loss": 0.1113,
      "step": 16500
    },
    {
      "epoch": 0.027035342327657877,
      "grad_norm": 19.53543472290039,
      "learning_rate": 8.26e-06,
      "loss": 0.1156,
      "step": 16520
    },
    {
      "epoch": 0.027068072766311214,
      "grad_norm": 6.5031561851501465,
      "learning_rate": 8.27e-06,
      "loss": 0.1294,
      "step": 16540
    },
    {
      "epoch": 0.027100803204964552,
      "grad_norm": 28.543134689331055,
      "learning_rate": 8.28e-06,
      "loss": 0.1005,
      "step": 16560
    },
    {
      "epoch": 0.027133533643617893,
      "grad_norm": 5.272019386291504,
      "learning_rate": 8.29e-06,
      "loss": 0.1227,
      "step": 16580
    },
    {
      "epoch": 0.02716626408227123,
      "grad_norm": 4.578763484954834,
      "learning_rate": 8.3e-06,
      "loss": 0.1216,
      "step": 16600
    },
    {
      "epoch": 0.027198994520924568,
      "grad_norm": 3.8870227336883545,
      "learning_rate": 8.31e-06,
      "loss": 0.116,
      "step": 16620
    },
    {
      "epoch": 0.02723172495957791,
      "grad_norm": 17.338008880615234,
      "learning_rate": 8.32e-06,
      "loss": 0.1312,
      "step": 16640
    },
    {
      "epoch": 0.027264455398231247,
      "grad_norm": 2.9127519130706787,
      "learning_rate": 8.33e-06,
      "loss": 0.104,
      "step": 16660
    },
    {
      "epoch": 0.027297185836884588,
      "grad_norm": 15.717615127563477,
      "learning_rate": 8.34e-06,
      "loss": 0.1126,
      "step": 16680
    },
    {
      "epoch": 0.027329916275537925,
      "grad_norm": 10.031432151794434,
      "learning_rate": 8.35e-06,
      "loss": 0.1238,
      "step": 16700
    },
    {
      "epoch": 0.027362646714191263,
      "grad_norm": 5.106947898864746,
      "learning_rate": 8.36e-06,
      "loss": 0.1145,
      "step": 16720
    },
    {
      "epoch": 0.027395377152844604,
      "grad_norm": 5.095993518829346,
      "learning_rate": 8.370000000000001e-06,
      "loss": 0.1198,
      "step": 16740
    },
    {
      "epoch": 0.02742810759149794,
      "grad_norm": 14.359042167663574,
      "learning_rate": 8.380000000000001e-06,
      "loss": 0.1194,
      "step": 16760
    },
    {
      "epoch": 0.02746083803015128,
      "grad_norm": 6.198281288146973,
      "learning_rate": 8.390000000000001e-06,
      "loss": 0.1154,
      "step": 16780
    },
    {
      "epoch": 0.02749356846880462,
      "grad_norm": 10.730459213256836,
      "learning_rate": 8.400000000000001e-06,
      "loss": 0.1275,
      "step": 16800
    },
    {
      "epoch": 0.027526298907457957,
      "grad_norm": 18.43828773498535,
      "learning_rate": 8.41e-06,
      "loss": 0.126,
      "step": 16820
    },
    {
      "epoch": 0.027559029346111298,
      "grad_norm": 17.55718231201172,
      "learning_rate": 8.42e-06,
      "loss": 0.1327,
      "step": 16840
    },
    {
      "epoch": 0.027591759784764636,
      "grad_norm": 9.53339672088623,
      "learning_rate": 8.43e-06,
      "loss": 0.128,
      "step": 16860
    },
    {
      "epoch": 0.027624490223417973,
      "grad_norm": 30.186189651489258,
      "learning_rate": 8.44e-06,
      "loss": 0.1361,
      "step": 16880
    },
    {
      "epoch": 0.027657220662071314,
      "grad_norm": 7.985255718231201,
      "learning_rate": 8.45e-06,
      "loss": 0.1312,
      "step": 16900
    },
    {
      "epoch": 0.02768995110072465,
      "grad_norm": 4.105451583862305,
      "learning_rate": 8.46e-06,
      "loss": 0.1277,
      "step": 16920
    },
    {
      "epoch": 0.02772268153937799,
      "grad_norm": 9.919106483459473,
      "learning_rate": 8.47e-06,
      "loss": 0.106,
      "step": 16940
    },
    {
      "epoch": 0.02775541197803133,
      "grad_norm": 4.998096466064453,
      "learning_rate": 8.48e-06,
      "loss": 0.1344,
      "step": 16960
    },
    {
      "epoch": 0.027788142416684668,
      "grad_norm": 8.529336929321289,
      "learning_rate": 8.49e-06,
      "loss": 0.1302,
      "step": 16980
    },
    {
      "epoch": 0.02782087285533801,
      "grad_norm": 7.189867973327637,
      "learning_rate": 8.5e-06,
      "loss": 0.0996,
      "step": 17000
    },
    {
      "epoch": 0.027853603293991346,
      "grad_norm": 9.0969820022583,
      "learning_rate": 8.51e-06,
      "loss": 0.1328,
      "step": 17020
    },
    {
      "epoch": 0.027886333732644684,
      "grad_norm": 5.966733455657959,
      "learning_rate": 8.52e-06,
      "loss": 0.1161,
      "step": 17040
    },
    {
      "epoch": 0.027919064171298025,
      "grad_norm": 13.493579864501953,
      "learning_rate": 8.530000000000001e-06,
      "loss": 0.1257,
      "step": 17060
    },
    {
      "epoch": 0.027951794609951362,
      "grad_norm": 5.589290142059326,
      "learning_rate": 8.540000000000001e-06,
      "loss": 0.1257,
      "step": 17080
    },
    {
      "epoch": 0.0279845250486047,
      "grad_norm": 5.785459041595459,
      "learning_rate": 8.550000000000001e-06,
      "loss": 0.1231,
      "step": 17100
    },
    {
      "epoch": 0.02801725548725804,
      "grad_norm": 254.62841796875,
      "learning_rate": 8.560000000000001e-06,
      "loss": 0.1087,
      "step": 17120
    },
    {
      "epoch": 0.02804998592591138,
      "grad_norm": 8.149259567260742,
      "learning_rate": 8.570000000000001e-06,
      "loss": 0.1341,
      "step": 17140
    },
    {
      "epoch": 0.02808271636456472,
      "grad_norm": 11.82103157043457,
      "learning_rate": 8.580000000000001e-06,
      "loss": 0.1441,
      "step": 17160
    },
    {
      "epoch": 0.028115446803218057,
      "grad_norm": 6.303851127624512,
      "learning_rate": 8.59e-06,
      "loss": 0.1175,
      "step": 17180
    },
    {
      "epoch": 0.028148177241871394,
      "grad_norm": 22.579193115234375,
      "learning_rate": 8.6e-06,
      "loss": 0.1227,
      "step": 17200
    },
    {
      "epoch": 0.028180907680524735,
      "grad_norm": 10.81120777130127,
      "learning_rate": 8.61e-06,
      "loss": 0.1077,
      "step": 17220
    },
    {
      "epoch": 0.028213638119178073,
      "grad_norm": 4.473446846008301,
      "learning_rate": 8.62e-06,
      "loss": 0.1166,
      "step": 17240
    },
    {
      "epoch": 0.02824636855783141,
      "grad_norm": 16.11052131652832,
      "learning_rate": 8.63e-06,
      "loss": 0.1265,
      "step": 17260
    },
    {
      "epoch": 0.02827909899648475,
      "grad_norm": 4.128331184387207,
      "learning_rate": 8.64e-06,
      "loss": 0.1182,
      "step": 17280
    },
    {
      "epoch": 0.02831182943513809,
      "grad_norm": 5.882116794586182,
      "learning_rate": 8.65e-06,
      "loss": 0.1282,
      "step": 17300
    },
    {
      "epoch": 0.02834455987379143,
      "grad_norm": 7.569882869720459,
      "learning_rate": 8.66e-06,
      "loss": 0.1179,
      "step": 17320
    },
    {
      "epoch": 0.028377290312444767,
      "grad_norm": 8.505380630493164,
      "learning_rate": 8.67e-06,
      "loss": 0.1206,
      "step": 17340
    },
    {
      "epoch": 0.028410020751098105,
      "grad_norm": 3.5168232917785645,
      "learning_rate": 8.68e-06,
      "loss": 0.0957,
      "step": 17360
    },
    {
      "epoch": 0.028442751189751446,
      "grad_norm": 18.793975830078125,
      "learning_rate": 8.690000000000002e-06,
      "loss": 0.1319,
      "step": 17380
    },
    {
      "epoch": 0.028475481628404783,
      "grad_norm": 7.643662452697754,
      "learning_rate": 8.700000000000001e-06,
      "loss": 0.123,
      "step": 17400
    },
    {
      "epoch": 0.02850821206705812,
      "grad_norm": 109.40150451660156,
      "learning_rate": 8.710000000000001e-06,
      "loss": 0.126,
      "step": 17420
    },
    {
      "epoch": 0.028540942505711462,
      "grad_norm": 6.272174835205078,
      "learning_rate": 8.720000000000001e-06,
      "loss": 0.1091,
      "step": 17440
    },
    {
      "epoch": 0.0285736729443648,
      "grad_norm": 10.786972999572754,
      "learning_rate": 8.730000000000001e-06,
      "loss": 0.1427,
      "step": 17460
    },
    {
      "epoch": 0.02860640338301814,
      "grad_norm": 10.771108627319336,
      "learning_rate": 8.740000000000001e-06,
      "loss": 0.1204,
      "step": 17480
    },
    {
      "epoch": 0.028639133821671478,
      "grad_norm": 41.213287353515625,
      "learning_rate": 8.750000000000001e-06,
      "loss": 0.1109,
      "step": 17500
    },
    {
      "epoch": 0.028671864260324816,
      "grad_norm": 6.42552375793457,
      "learning_rate": 8.76e-06,
      "loss": 0.1441,
      "step": 17520
    },
    {
      "epoch": 0.028704594698978157,
      "grad_norm": 3.7349259853363037,
      "learning_rate": 8.77e-06,
      "loss": 0.1272,
      "step": 17540
    },
    {
      "epoch": 0.028737325137631494,
      "grad_norm": 7.345031261444092,
      "learning_rate": 8.78e-06,
      "loss": 0.0937,
      "step": 17560
    },
    {
      "epoch": 0.028770055576284835,
      "grad_norm": 3.795016050338745,
      "learning_rate": 8.79e-06,
      "loss": 0.1179,
      "step": 17580
    },
    {
      "epoch": 0.028802786014938173,
      "grad_norm": 9.248824119567871,
      "learning_rate": 8.8e-06,
      "loss": 0.1194,
      "step": 17600
    },
    {
      "epoch": 0.02883551645359151,
      "grad_norm": 7.514312744140625,
      "learning_rate": 8.81e-06,
      "loss": 0.1215,
      "step": 17620
    },
    {
      "epoch": 0.02886824689224485,
      "grad_norm": 7.936649799346924,
      "learning_rate": 8.82e-06,
      "loss": 0.1211,
      "step": 17640
    },
    {
      "epoch": 0.02890097733089819,
      "grad_norm": 17.654375076293945,
      "learning_rate": 8.83e-06,
      "loss": 0.1063,
      "step": 17660
    },
    {
      "epoch": 0.028933707769551526,
      "grad_norm": 5.088466167449951,
      "learning_rate": 8.84e-06,
      "loss": 0.1234,
      "step": 17680
    },
    {
      "epoch": 0.028966438208204867,
      "grad_norm": 8.96152114868164,
      "learning_rate": 8.85e-06,
      "loss": 0.1233,
      "step": 17700
    },
    {
      "epoch": 0.028999168646858205,
      "grad_norm": 21.7332763671875,
      "learning_rate": 8.860000000000002e-06,
      "loss": 0.1259,
      "step": 17720
    },
    {
      "epoch": 0.029031899085511546,
      "grad_norm": 9.200284957885742,
      "learning_rate": 8.870000000000001e-06,
      "loss": 0.1331,
      "step": 17740
    },
    {
      "epoch": 0.029064629524164883,
      "grad_norm": 8.1887845993042,
      "learning_rate": 8.880000000000001e-06,
      "loss": 0.1231,
      "step": 17760
    },
    {
      "epoch": 0.02909735996281822,
      "grad_norm": 16.53696632385254,
      "learning_rate": 8.890000000000001e-06,
      "loss": 0.1157,
      "step": 17780
    },
    {
      "epoch": 0.02913009040147156,
      "grad_norm": 7.819525718688965,
      "learning_rate": 8.900000000000001e-06,
      "loss": 0.1212,
      "step": 17800
    },
    {
      "epoch": 0.0291628208401249,
      "grad_norm": 16.250532150268555,
      "learning_rate": 8.910000000000001e-06,
      "loss": 0.1389,
      "step": 17820
    },
    {
      "epoch": 0.029195551278778237,
      "grad_norm": 7.265993595123291,
      "learning_rate": 8.920000000000001e-06,
      "loss": 0.0992,
      "step": 17840
    },
    {
      "epoch": 0.029228281717431578,
      "grad_norm": 4.801103115081787,
      "learning_rate": 8.930000000000001e-06,
      "loss": 0.1273,
      "step": 17860
    },
    {
      "epoch": 0.029261012156084915,
      "grad_norm": 6.593010425567627,
      "learning_rate": 8.94e-06,
      "loss": 0.1424,
      "step": 17880
    },
    {
      "epoch": 0.029293742594738256,
      "grad_norm": 6.634941101074219,
      "learning_rate": 8.95e-06,
      "loss": 0.1076,
      "step": 17900
    },
    {
      "epoch": 0.029326473033391594,
      "grad_norm": 7.669236183166504,
      "learning_rate": 8.96e-06,
      "loss": 0.1248,
      "step": 17920
    },
    {
      "epoch": 0.02935920347204493,
      "grad_norm": 4.886868953704834,
      "learning_rate": 8.97e-06,
      "loss": 0.1316,
      "step": 17940
    },
    {
      "epoch": 0.029391933910698272,
      "grad_norm": 49.438377380371094,
      "learning_rate": 8.98e-06,
      "loss": 0.1371,
      "step": 17960
    },
    {
      "epoch": 0.02942466434935161,
      "grad_norm": 12.033476829528809,
      "learning_rate": 8.99e-06,
      "loss": 0.1227,
      "step": 17980
    },
    {
      "epoch": 0.029457394788004947,
      "grad_norm": 8.182933807373047,
      "learning_rate": 9e-06,
      "loss": 0.1353,
      "step": 18000
    },
    {
      "epoch": 0.02949012522665829,
      "grad_norm": 6.198143482208252,
      "learning_rate": 9.01e-06,
      "loss": 0.1427,
      "step": 18020
    },
    {
      "epoch": 0.029522855665311626,
      "grad_norm": 4.676893711090088,
      "learning_rate": 9.020000000000002e-06,
      "loss": 0.1476,
      "step": 18040
    },
    {
      "epoch": 0.029555586103964967,
      "grad_norm": 6.268619537353516,
      "learning_rate": 9.030000000000002e-06,
      "loss": 0.1191,
      "step": 18060
    },
    {
      "epoch": 0.029588316542618304,
      "grad_norm": 6.869527816772461,
      "learning_rate": 9.040000000000002e-06,
      "loss": 0.1235,
      "step": 18080
    },
    {
      "epoch": 0.029621046981271642,
      "grad_norm": 10.599645614624023,
      "learning_rate": 9.050000000000001e-06,
      "loss": 0.1138,
      "step": 18100
    },
    {
      "epoch": 0.029653777419924983,
      "grad_norm": 5.949001789093018,
      "learning_rate": 9.060000000000001e-06,
      "loss": 0.1165,
      "step": 18120
    },
    {
      "epoch": 0.02968650785857832,
      "grad_norm": 5.9183125495910645,
      "learning_rate": 9.070000000000001e-06,
      "loss": 0.1271,
      "step": 18140
    },
    {
      "epoch": 0.029719238297231658,
      "grad_norm": 10.813528060913086,
      "learning_rate": 9.080000000000001e-06,
      "loss": 0.1133,
      "step": 18160
    },
    {
      "epoch": 0.029751968735885,
      "grad_norm": 3.189835786819458,
      "learning_rate": 9.090000000000001e-06,
      "loss": 0.1136,
      "step": 18180
    },
    {
      "epoch": 0.029784699174538336,
      "grad_norm": 47.71329879760742,
      "learning_rate": 9.100000000000001e-06,
      "loss": 0.1457,
      "step": 18200
    },
    {
      "epoch": 0.029817429613191677,
      "grad_norm": 5.866307258605957,
      "learning_rate": 9.110000000000001e-06,
      "loss": 0.1316,
      "step": 18220
    },
    {
      "epoch": 0.029850160051845015,
      "grad_norm": 14.520153045654297,
      "learning_rate": 9.12e-06,
      "loss": 0.1191,
      "step": 18240
    },
    {
      "epoch": 0.029882890490498352,
      "grad_norm": 21.3607234954834,
      "learning_rate": 9.13e-06,
      "loss": 0.1261,
      "step": 18260
    },
    {
      "epoch": 0.029915620929151693,
      "grad_norm": 9.499261856079102,
      "learning_rate": 9.14e-06,
      "loss": 0.1452,
      "step": 18280
    },
    {
      "epoch": 0.02994835136780503,
      "grad_norm": 14.783851623535156,
      "learning_rate": 9.15e-06,
      "loss": 0.1368,
      "step": 18300
    },
    {
      "epoch": 0.02998108180645837,
      "grad_norm": 6.7880120277404785,
      "learning_rate": 9.16e-06,
      "loss": 0.1269,
      "step": 18320
    },
    {
      "epoch": 0.03001381224511171,
      "grad_norm": 11.279929161071777,
      "learning_rate": 9.17e-06,
      "loss": 0.14,
      "step": 18340
    },
    {
      "epoch": 0.030046542683765047,
      "grad_norm": 7.410326957702637,
      "learning_rate": 9.180000000000002e-06,
      "loss": 0.112,
      "step": 18360
    },
    {
      "epoch": 0.030079273122418388,
      "grad_norm": 9.20706558227539,
      "learning_rate": 9.190000000000002e-06,
      "loss": 0.1223,
      "step": 18380
    },
    {
      "epoch": 0.030112003561071726,
      "grad_norm": 17.025833129882812,
      "learning_rate": 9.200000000000002e-06,
      "loss": 0.1341,
      "step": 18400
    },
    {
      "epoch": 0.030144733999725063,
      "grad_norm": 11.833775520324707,
      "learning_rate": 9.210000000000002e-06,
      "loss": 0.1149,
      "step": 18420
    },
    {
      "epoch": 0.030177464438378404,
      "grad_norm": 8.466004371643066,
      "learning_rate": 9.220000000000002e-06,
      "loss": 0.1276,
      "step": 18440
    },
    {
      "epoch": 0.03021019487703174,
      "grad_norm": 15.14416217803955,
      "learning_rate": 9.230000000000001e-06,
      "loss": 0.128,
      "step": 18460
    },
    {
      "epoch": 0.03024292531568508,
      "grad_norm": 8.050102233886719,
      "learning_rate": 9.240000000000001e-06,
      "loss": 0.1269,
      "step": 18480
    },
    {
      "epoch": 0.03027565575433842,
      "grad_norm": 5.011148452758789,
      "learning_rate": 9.250000000000001e-06,
      "loss": 0.1024,
      "step": 18500
    },
    {
      "epoch": 0.030308386192991758,
      "grad_norm": 11.357636451721191,
      "learning_rate": 9.260000000000001e-06,
      "loss": 0.1508,
      "step": 18520
    },
    {
      "epoch": 0.0303411166316451,
      "grad_norm": 5.1126909255981445,
      "learning_rate": 9.270000000000001e-06,
      "loss": 0.1358,
      "step": 18540
    },
    {
      "epoch": 0.030373847070298436,
      "grad_norm": 7.315777778625488,
      "learning_rate": 9.280000000000001e-06,
      "loss": 0.1168,
      "step": 18560
    },
    {
      "epoch": 0.030406577508951774,
      "grad_norm": 3.9571471214294434,
      "learning_rate": 9.29e-06,
      "loss": 0.1142,
      "step": 18580
    },
    {
      "epoch": 0.030439307947605115,
      "grad_norm": 8.91100788116455,
      "learning_rate": 9.3e-06,
      "loss": 0.1158,
      "step": 18600
    },
    {
      "epoch": 0.030472038386258452,
      "grad_norm": 10.299768447875977,
      "learning_rate": 9.31e-06,
      "loss": 0.1071,
      "step": 18620
    },
    {
      "epoch": 0.030504768824911793,
      "grad_norm": 5.5391621589660645,
      "learning_rate": 9.32e-06,
      "loss": 0.1391,
      "step": 18640
    },
    {
      "epoch": 0.03053749926356513,
      "grad_norm": 6.30645751953125,
      "learning_rate": 9.33e-06,
      "loss": 0.1325,
      "step": 18660
    },
    {
      "epoch": 0.030570229702218468,
      "grad_norm": 6.631401538848877,
      "learning_rate": 9.340000000000002e-06,
      "loss": 0.1193,
      "step": 18680
    },
    {
      "epoch": 0.03060296014087181,
      "grad_norm": 6.178687572479248,
      "learning_rate": 9.350000000000002e-06,
      "loss": 0.1101,
      "step": 18700
    },
    {
      "epoch": 0.030635690579525147,
      "grad_norm": 6.3874030113220215,
      "learning_rate": 9.360000000000002e-06,
      "loss": 0.13,
      "step": 18720
    },
    {
      "epoch": 0.030668421018178484,
      "grad_norm": 11.122551918029785,
      "learning_rate": 9.370000000000002e-06,
      "loss": 0.1085,
      "step": 18740
    },
    {
      "epoch": 0.030701151456831825,
      "grad_norm": 9.01845645904541,
      "learning_rate": 9.38e-06,
      "loss": 0.1332,
      "step": 18760
    },
    {
      "epoch": 0.030733881895485163,
      "grad_norm": 7.886573791503906,
      "learning_rate": 9.39e-06,
      "loss": 0.1326,
      "step": 18780
    },
    {
      "epoch": 0.030766612334138504,
      "grad_norm": 7.267894744873047,
      "learning_rate": 9.4e-06,
      "loss": 0.1104,
      "step": 18800
    },
    {
      "epoch": 0.03079934277279184,
      "grad_norm": 21.28951072692871,
      "learning_rate": 9.41e-06,
      "loss": 0.1019,
      "step": 18820
    },
    {
      "epoch": 0.03083207321144518,
      "grad_norm": 15.626639366149902,
      "learning_rate": 9.42e-06,
      "loss": 0.1201,
      "step": 18840
    },
    {
      "epoch": 0.03086480365009852,
      "grad_norm": 7.1845197677612305,
      "learning_rate": 9.43e-06,
      "loss": 0.1149,
      "step": 18860
    },
    {
      "epoch": 0.030897534088751857,
      "grad_norm": 4.093739032745361,
      "learning_rate": 9.440000000000001e-06,
      "loss": 0.1173,
      "step": 18880
    },
    {
      "epoch": 0.030930264527405195,
      "grad_norm": 9.110708236694336,
      "learning_rate": 9.450000000000001e-06,
      "loss": 0.1215,
      "step": 18900
    },
    {
      "epoch": 0.030962994966058536,
      "grad_norm": 7.1619391441345215,
      "learning_rate": 9.460000000000001e-06,
      "loss": 0.1167,
      "step": 18920
    },
    {
      "epoch": 0.030995725404711873,
      "grad_norm": 3.931100845336914,
      "learning_rate": 9.47e-06,
      "loss": 0.1018,
      "step": 18940
    },
    {
      "epoch": 0.031028455843365214,
      "grad_norm": 3.3948135375976562,
      "learning_rate": 9.48e-06,
      "loss": 0.1295,
      "step": 18960
    },
    {
      "epoch": 0.031061186282018552,
      "grad_norm": 9.149520874023438,
      "learning_rate": 9.49e-06,
      "loss": 0.141,
      "step": 18980
    },
    {
      "epoch": 0.03109391672067189,
      "grad_norm": 10.215726852416992,
      "learning_rate": 9.5e-06,
      "loss": 0.126,
      "step": 19000
    },
    {
      "epoch": 0.03112664715932523,
      "grad_norm": 5.60050630569458,
      "learning_rate": 9.51e-06,
      "loss": 0.1249,
      "step": 19020
    },
    {
      "epoch": 0.031159377597978568,
      "grad_norm": 4.888531684875488,
      "learning_rate": 9.52e-06,
      "loss": 0.1063,
      "step": 19040
    },
    {
      "epoch": 0.031192108036631905,
      "grad_norm": 4.034884452819824,
      "learning_rate": 9.53e-06,
      "loss": 0.1299,
      "step": 19060
    },
    {
      "epoch": 0.031224838475285246,
      "grad_norm": 4.514904975891113,
      "learning_rate": 9.54e-06,
      "loss": 0.1502,
      "step": 19080
    },
    {
      "epoch": 0.031257568913938584,
      "grad_norm": 12.446372032165527,
      "learning_rate": 9.55e-06,
      "loss": 0.1246,
      "step": 19100
    },
    {
      "epoch": 0.03129029935259192,
      "grad_norm": 8.596757888793945,
      "learning_rate": 9.56e-06,
      "loss": 0.1113,
      "step": 19120
    },
    {
      "epoch": 0.03132302979124526,
      "grad_norm": 17.965246200561523,
      "learning_rate": 9.57e-06,
      "loss": 0.1417,
      "step": 19140
    },
    {
      "epoch": 0.031355760229898604,
      "grad_norm": 4.151477813720703,
      "learning_rate": 9.58e-06,
      "loss": 0.1182,
      "step": 19160
    },
    {
      "epoch": 0.03138849066855194,
      "grad_norm": 6.6123881340026855,
      "learning_rate": 9.59e-06,
      "loss": 0.139,
      "step": 19180
    },
    {
      "epoch": 0.03142122110720528,
      "grad_norm": 15.86290168762207,
      "learning_rate": 9.600000000000001e-06,
      "loss": 0.1168,
      "step": 19200
    },
    {
      "epoch": 0.031453951545858616,
      "grad_norm": 17.08133316040039,
      "learning_rate": 9.610000000000001e-06,
      "loss": 0.1085,
      "step": 19220
    },
    {
      "epoch": 0.031486681984511954,
      "grad_norm": 8.32790756225586,
      "learning_rate": 9.620000000000001e-06,
      "loss": 0.1265,
      "step": 19240
    },
    {
      "epoch": 0.0315194124231653,
      "grad_norm": 18.773944854736328,
      "learning_rate": 9.630000000000001e-06,
      "loss": 0.1192,
      "step": 19260
    },
    {
      "epoch": 0.031552142861818636,
      "grad_norm": 8.027250289916992,
      "learning_rate": 9.640000000000001e-06,
      "loss": 0.1166,
      "step": 19280
    },
    {
      "epoch": 0.03158487330047197,
      "grad_norm": 5.329442024230957,
      "learning_rate": 9.65e-06,
      "loss": 0.1131,
      "step": 19300
    },
    {
      "epoch": 0.03161760373912531,
      "grad_norm": 6.676128387451172,
      "learning_rate": 9.66e-06,
      "loss": 0.0973,
      "step": 19320
    },
    {
      "epoch": 0.03165033417777865,
      "grad_norm": 2.8437163829803467,
      "learning_rate": 9.67e-06,
      "loss": 0.1249,
      "step": 19340
    },
    {
      "epoch": 0.03168306461643199,
      "grad_norm": 3.947483777999878,
      "learning_rate": 9.68e-06,
      "loss": 0.1261,
      "step": 19360
    },
    {
      "epoch": 0.03171579505508533,
      "grad_norm": 6.929433345794678,
      "learning_rate": 9.69e-06,
      "loss": 0.1509,
      "step": 19380
    },
    {
      "epoch": 0.03174852549373867,
      "grad_norm": 12.228487014770508,
      "learning_rate": 9.7e-06,
      "loss": 0.1349,
      "step": 19400
    },
    {
      "epoch": 0.031781255932392005,
      "grad_norm": 8.290756225585938,
      "learning_rate": 9.71e-06,
      "loss": 0.1376,
      "step": 19420
    },
    {
      "epoch": 0.03181398637104534,
      "grad_norm": 5.072436332702637,
      "learning_rate": 9.72e-06,
      "loss": 0.1194,
      "step": 19440
    },
    {
      "epoch": 0.03184671680969868,
      "grad_norm": 5.29913330078125,
      "learning_rate": 9.73e-06,
      "loss": 0.1404,
      "step": 19460
    },
    {
      "epoch": 0.031879447248352025,
      "grad_norm": 6.407949924468994,
      "learning_rate": 9.74e-06,
      "loss": 0.1344,
      "step": 19480
    },
    {
      "epoch": 0.03191217768700536,
      "grad_norm": 7.476822376251221,
      "learning_rate": 9.75e-06,
      "loss": 0.1071,
      "step": 19500
    },
    {
      "epoch": 0.0319449081256587,
      "grad_norm": 5.059877872467041,
      "learning_rate": 9.760000000000001e-06,
      "loss": 0.1152,
      "step": 19520
    },
    {
      "epoch": 0.03197763856431204,
      "grad_norm": 8.697407722473145,
      "learning_rate": 9.770000000000001e-06,
      "loss": 0.1235,
      "step": 19540
    },
    {
      "epoch": 0.032010369002965375,
      "grad_norm": 11.526402473449707,
      "learning_rate": 9.780000000000001e-06,
      "loss": 0.1033,
      "step": 19560
    },
    {
      "epoch": 0.03204309944161872,
      "grad_norm": 13.901144981384277,
      "learning_rate": 9.790000000000001e-06,
      "loss": 0.1199,
      "step": 19580
    },
    {
      "epoch": 0.03207582988027206,
      "grad_norm": 11.595410346984863,
      "learning_rate": 9.800000000000001e-06,
      "loss": 0.1148,
      "step": 19600
    },
    {
      "epoch": 0.032108560318925394,
      "grad_norm": 7.193914413452148,
      "learning_rate": 9.810000000000001e-06,
      "loss": 0.1443,
      "step": 19620
    },
    {
      "epoch": 0.03214129075757873,
      "grad_norm": 6.191033363342285,
      "learning_rate": 9.820000000000001e-06,
      "loss": 0.1242,
      "step": 19640
    },
    {
      "epoch": 0.03217402119623207,
      "grad_norm": 5.484636306762695,
      "learning_rate": 9.83e-06,
      "loss": 0.1166,
      "step": 19660
    },
    {
      "epoch": 0.032206751634885414,
      "grad_norm": 5.7635498046875,
      "learning_rate": 9.84e-06,
      "loss": 0.1176,
      "step": 19680
    },
    {
      "epoch": 0.03223948207353875,
      "grad_norm": 5.6122965812683105,
      "learning_rate": 9.85e-06,
      "loss": 0.1237,
      "step": 19700
    },
    {
      "epoch": 0.03227221251219209,
      "grad_norm": 3.688183546066284,
      "learning_rate": 9.86e-06,
      "loss": 0.1088,
      "step": 19720
    },
    {
      "epoch": 0.032304942950845426,
      "grad_norm": 6.573989391326904,
      "learning_rate": 9.87e-06,
      "loss": 0.1083,
      "step": 19740
    },
    {
      "epoch": 0.032337673389498764,
      "grad_norm": 1.6697605848312378,
      "learning_rate": 9.88e-06,
      "loss": 0.1195,
      "step": 19760
    },
    {
      "epoch": 0.0323704038281521,
      "grad_norm": 6.71172571182251,
      "learning_rate": 9.89e-06,
      "loss": 0.1322,
      "step": 19780
    },
    {
      "epoch": 0.032403134266805446,
      "grad_norm": 4.56538200378418,
      "learning_rate": 9.9e-06,
      "loss": 0.1175,
      "step": 19800
    },
    {
      "epoch": 0.03243586470545878,
      "grad_norm": 9.480209350585938,
      "learning_rate": 9.91e-06,
      "loss": 0.1299,
      "step": 19820
    },
    {
      "epoch": 0.03246859514411212,
      "grad_norm": 8.464520454406738,
      "learning_rate": 9.920000000000002e-06,
      "loss": 0.0977,
      "step": 19840
    },
    {
      "epoch": 0.03250132558276546,
      "grad_norm": 6.123773574829102,
      "learning_rate": 9.930000000000001e-06,
      "loss": 0.1377,
      "step": 19860
    },
    {
      "epoch": 0.032534056021418796,
      "grad_norm": 5.971806526184082,
      "learning_rate": 9.940000000000001e-06,
      "loss": 0.1142,
      "step": 19880
    },
    {
      "epoch": 0.03256678646007214,
      "grad_norm": 6.847086429595947,
      "learning_rate": 9.950000000000001e-06,
      "loss": 0.1103,
      "step": 19900
    },
    {
      "epoch": 0.03259951689872548,
      "grad_norm": 10.281503677368164,
      "learning_rate": 9.960000000000001e-06,
      "loss": 0.1261,
      "step": 19920
    },
    {
      "epoch": 0.032632247337378815,
      "grad_norm": 14.372685432434082,
      "learning_rate": 9.970000000000001e-06,
      "loss": 0.112,
      "step": 19940
    },
    {
      "epoch": 0.03266497777603215,
      "grad_norm": 15.085270881652832,
      "learning_rate": 9.980000000000001e-06,
      "loss": 0.118,
      "step": 19960
    },
    {
      "epoch": 0.03269770821468549,
      "grad_norm": 9.460223197937012,
      "learning_rate": 9.990000000000001e-06,
      "loss": 0.1286,
      "step": 19980
    },
    {
      "epoch": 0.032730438653338835,
      "grad_norm": 3.948519229888916,
      "learning_rate": 1e-05,
      "loss": 0.146,
      "step": 20000
    },
    {
      "epoch": 0.03276316909199217,
      "grad_norm": 9.868666648864746,
      "learning_rate": 9.999934107786484e-06,
      "loss": 0.1448,
      "step": 20020
    },
    {
      "epoch": 0.03279589953064551,
      "grad_norm": 3.4179372787475586,
      "learning_rate": 9.999868215572966e-06,
      "loss": 0.1411,
      "step": 20040
    },
    {
      "epoch": 0.03282862996929885,
      "grad_norm": 16.626556396484375,
      "learning_rate": 9.99980232335945e-06,
      "loss": 0.1294,
      "step": 20060
    },
    {
      "epoch": 0.032861360407952185,
      "grad_norm": 4.434700012207031,
      "learning_rate": 9.999736431145932e-06,
      "loss": 0.1351,
      "step": 20080
    },
    {
      "epoch": 0.03289409084660553,
      "grad_norm": 5.321785926818848,
      "learning_rate": 9.999670538932415e-06,
      "loss": 0.1371,
      "step": 20100
    },
    {
      "epoch": 0.03292682128525887,
      "grad_norm": 10.533143043518066,
      "learning_rate": 9.999604646718897e-06,
      "loss": 0.1348,
      "step": 20120
    },
    {
      "epoch": 0.032959551723912205,
      "grad_norm": 9.849065780639648,
      "learning_rate": 9.99953875450538e-06,
      "loss": 0.105,
      "step": 20140
    },
    {
      "epoch": 0.03299228216256554,
      "grad_norm": 4.207522392272949,
      "learning_rate": 9.999472862291863e-06,
      "loss": 0.1247,
      "step": 20160
    },
    {
      "epoch": 0.03302501260121888,
      "grad_norm": 6.308660984039307,
      "learning_rate": 9.999406970078346e-06,
      "loss": 0.1487,
      "step": 20180
    },
    {
      "epoch": 0.03305774303987222,
      "grad_norm": 4.255006313323975,
      "learning_rate": 9.99934107786483e-06,
      "loss": 0.1206,
      "step": 20200
    },
    {
      "epoch": 0.03309047347852556,
      "grad_norm": 7.992818832397461,
      "learning_rate": 9.999275185651312e-06,
      "loss": 0.138,
      "step": 20220
    },
    {
      "epoch": 0.0331232039171789,
      "grad_norm": 10.263778686523438,
      "learning_rate": 9.999209293437795e-06,
      "loss": 0.126,
      "step": 20240
    },
    {
      "epoch": 0.03315593435583224,
      "grad_norm": 6.652631759643555,
      "learning_rate": 9.999143401224279e-06,
      "loss": 0.1397,
      "step": 20260
    },
    {
      "epoch": 0.033188664794485574,
      "grad_norm": 7.020860195159912,
      "learning_rate": 9.999077509010761e-06,
      "loss": 0.1642,
      "step": 20280
    },
    {
      "epoch": 0.03322139523313891,
      "grad_norm": 9.288365364074707,
      "learning_rate": 9.999011616797244e-06,
      "loss": 0.1389,
      "step": 20300
    },
    {
      "epoch": 0.033254125671792256,
      "grad_norm": 7.24210262298584,
      "learning_rate": 9.998945724583726e-06,
      "loss": 0.1346,
      "step": 20320
    },
    {
      "epoch": 0.033286856110445594,
      "grad_norm": 3.601839780807495,
      "learning_rate": 9.99887983237021e-06,
      "loss": 0.1249,
      "step": 20340
    },
    {
      "epoch": 0.03331958654909893,
      "grad_norm": 9.064186096191406,
      "learning_rate": 9.998813940156694e-06,
      "loss": 0.1306,
      "step": 20360
    },
    {
      "epoch": 0.03335231698775227,
      "grad_norm": 15.102355003356934,
      "learning_rate": 9.998748047943175e-06,
      "loss": 0.1033,
      "step": 20380
    },
    {
      "epoch": 0.033385047426405606,
      "grad_norm": 18.07634162902832,
      "learning_rate": 9.998682155729659e-06,
      "loss": 0.1153,
      "step": 20400
    },
    {
      "epoch": 0.03341777786505895,
      "grad_norm": 7.797651767730713,
      "learning_rate": 9.998616263516141e-06,
      "loss": 0.1546,
      "step": 20420
    },
    {
      "epoch": 0.03345050830371229,
      "grad_norm": 9.543376922607422,
      "learning_rate": 9.998550371302624e-06,
      "loss": 0.1116,
      "step": 20440
    },
    {
      "epoch": 0.033483238742365626,
      "grad_norm": 5.321150302886963,
      "learning_rate": 9.998484479089106e-06,
      "loss": 0.1087,
      "step": 20460
    },
    {
      "epoch": 0.03351596918101896,
      "grad_norm": 8.638716697692871,
      "learning_rate": 9.99841858687559e-06,
      "loss": 0.1257,
      "step": 20480
    },
    {
      "epoch": 0.0335486996196723,
      "grad_norm": 9.08183765411377,
      "learning_rate": 9.998352694662072e-06,
      "loss": 0.135,
      "step": 20500
    },
    {
      "epoch": 0.03358143005832564,
      "grad_norm": 8.159281730651855,
      "learning_rate": 9.998286802448555e-06,
      "loss": 0.1283,
      "step": 20520
    },
    {
      "epoch": 0.03361416049697898,
      "grad_norm": 2.5693466663360596,
      "learning_rate": 9.998220910235037e-06,
      "loss": 0.1329,
      "step": 20540
    },
    {
      "epoch": 0.03364689093563232,
      "grad_norm": 4.847701072692871,
      "learning_rate": 9.998155018021521e-06,
      "loss": 0.1012,
      "step": 20560
    },
    {
      "epoch": 0.03367962137428566,
      "grad_norm": 4.414511203765869,
      "learning_rate": 9.998089125808005e-06,
      "loss": 0.1074,
      "step": 20580
    },
    {
      "epoch": 0.033712351812938995,
      "grad_norm": 3.395219326019287,
      "learning_rate": 9.998023233594486e-06,
      "loss": 0.1022,
      "step": 20600
    },
    {
      "epoch": 0.03374508225159233,
      "grad_norm": 11.732605934143066,
      "learning_rate": 9.99795734138097e-06,
      "loss": 0.1178,
      "step": 20620
    },
    {
      "epoch": 0.03377781269024568,
      "grad_norm": 13.217489242553711,
      "learning_rate": 9.997891449167454e-06,
      "loss": 0.1153,
      "step": 20640
    },
    {
      "epoch": 0.033810543128899015,
      "grad_norm": 3.7334036827087402,
      "learning_rate": 9.997825556953935e-06,
      "loss": 0.1384,
      "step": 20660
    },
    {
      "epoch": 0.03384327356755235,
      "grad_norm": 8.947254180908203,
      "learning_rate": 9.997759664740419e-06,
      "loss": 0.1275,
      "step": 20680
    },
    {
      "epoch": 0.03387600400620569,
      "grad_norm": 6.940151691436768,
      "learning_rate": 9.997693772526901e-06,
      "loss": 0.1284,
      "step": 20700
    },
    {
      "epoch": 0.03390873444485903,
      "grad_norm": 1.449174404144287,
      "learning_rate": 9.997627880313385e-06,
      "loss": 0.1025,
      "step": 20720
    },
    {
      "epoch": 0.03394146488351237,
      "grad_norm": 3.4928886890411377,
      "learning_rate": 9.997561988099868e-06,
      "loss": 0.1333,
      "step": 20740
    },
    {
      "epoch": 0.03397419532216571,
      "grad_norm": 3.7748076915740967,
      "learning_rate": 9.99749609588635e-06,
      "loss": 0.1355,
      "step": 20760
    },
    {
      "epoch": 0.03400692576081905,
      "grad_norm": 14.340949058532715,
      "learning_rate": 9.997430203672834e-06,
      "loss": 0.1475,
      "step": 20780
    },
    {
      "epoch": 0.034039656199472385,
      "grad_norm": 9.873478889465332,
      "learning_rate": 9.997364311459315e-06,
      "loss": 0.1084,
      "step": 20800
    },
    {
      "epoch": 0.03407238663812572,
      "grad_norm": 209.62911987304688,
      "learning_rate": 9.997298419245799e-06,
      "loss": 0.1142,
      "step": 20820
    },
    {
      "epoch": 0.03410511707677906,
      "grad_norm": 58.59885025024414,
      "learning_rate": 9.997232527032281e-06,
      "loss": 0.129,
      "step": 20840
    },
    {
      "epoch": 0.034137847515432404,
      "grad_norm": 6.342865467071533,
      "learning_rate": 9.997166634818765e-06,
      "loss": 0.1407,
      "step": 20860
    },
    {
      "epoch": 0.03417057795408574,
      "grad_norm": 6.077548027038574,
      "learning_rate": 9.997100742605246e-06,
      "loss": 0.1438,
      "step": 20880
    },
    {
      "epoch": 0.03420330839273908,
      "grad_norm": 6.51420259475708,
      "learning_rate": 9.99703485039173e-06,
      "loss": 0.1365,
      "step": 20900
    },
    {
      "epoch": 0.03423603883139242,
      "grad_norm": 5.017967224121094,
      "learning_rate": 9.996968958178212e-06,
      "loss": 0.1487,
      "step": 20920
    },
    {
      "epoch": 0.034268769270045754,
      "grad_norm": 5.533971309661865,
      "learning_rate": 9.996903065964696e-06,
      "loss": 0.1144,
      "step": 20940
    },
    {
      "epoch": 0.0343014997086991,
      "grad_norm": 36.9881706237793,
      "learning_rate": 9.996837173751177e-06,
      "loss": 0.1297,
      "step": 20960
    },
    {
      "epoch": 0.034334230147352436,
      "grad_norm": 26.268417358398438,
      "learning_rate": 9.996771281537661e-06,
      "loss": 0.1235,
      "step": 20980
    },
    {
      "epoch": 0.034366960586005774,
      "grad_norm": 6.250568389892578,
      "learning_rate": 9.996705389324145e-06,
      "loss": 0.1195,
      "step": 21000
    },
    {
      "epoch": 0.03439969102465911,
      "grad_norm": 11.85289192199707,
      "learning_rate": 9.996639497110626e-06,
      "loss": 0.1568,
      "step": 21020
    },
    {
      "epoch": 0.03443242146331245,
      "grad_norm": 10.700996398925781,
      "learning_rate": 9.99657360489711e-06,
      "loss": 0.1248,
      "step": 21040
    },
    {
      "epoch": 0.03446515190196579,
      "grad_norm": 11.284136772155762,
      "learning_rate": 9.996507712683594e-06,
      "loss": 0.127,
      "step": 21060
    },
    {
      "epoch": 0.03449788234061913,
      "grad_norm": 19.161609649658203,
      "learning_rate": 9.996441820470076e-06,
      "loss": 0.1179,
      "step": 21080
    },
    {
      "epoch": 0.03453061277927247,
      "grad_norm": 15.377131462097168,
      "learning_rate": 9.996375928256559e-06,
      "loss": 0.1284,
      "step": 21100
    },
    {
      "epoch": 0.034563343217925806,
      "grad_norm": 6.569171905517578,
      "learning_rate": 9.996310036043043e-06,
      "loss": 0.1401,
      "step": 21120
    },
    {
      "epoch": 0.03459607365657914,
      "grad_norm": 11.85591983795166,
      "learning_rate": 9.996244143829525e-06,
      "loss": 0.1182,
      "step": 21140
    },
    {
      "epoch": 0.03462880409523249,
      "grad_norm": 12.105982780456543,
      "learning_rate": 9.996178251616008e-06,
      "loss": 0.1243,
      "step": 21160
    },
    {
      "epoch": 0.034661534533885825,
      "grad_norm": 7.817657470703125,
      "learning_rate": 9.99611235940249e-06,
      "loss": 0.1002,
      "step": 21180
    },
    {
      "epoch": 0.03469426497253916,
      "grad_norm": 11.021803855895996,
      "learning_rate": 9.996046467188974e-06,
      "loss": 0.1234,
      "step": 21200
    },
    {
      "epoch": 0.0347269954111925,
      "grad_norm": 6.265562534332275,
      "learning_rate": 9.995980574975456e-06,
      "loss": 0.1347,
      "step": 21220
    },
    {
      "epoch": 0.03475972584984584,
      "grad_norm": 6.362526893615723,
      "learning_rate": 9.995914682761939e-06,
      "loss": 0.1308,
      "step": 21240
    },
    {
      "epoch": 0.034792456288499175,
      "grad_norm": 4.512825012207031,
      "learning_rate": 9.995848790548421e-06,
      "loss": 0.105,
      "step": 21260
    },
    {
      "epoch": 0.03482518672715252,
      "grad_norm": 6.559502124786377,
      "learning_rate": 9.995782898334905e-06,
      "loss": 0.1185,
      "step": 21280
    },
    {
      "epoch": 0.03485791716580586,
      "grad_norm": 9.155824661254883,
      "learning_rate": 9.995717006121387e-06,
      "loss": 0.1366,
      "step": 21300
    },
    {
      "epoch": 0.034890647604459195,
      "grad_norm": 5.069474220275879,
      "learning_rate": 9.99565111390787e-06,
      "loss": 0.133,
      "step": 21320
    },
    {
      "epoch": 0.03492337804311253,
      "grad_norm": 25.864622116088867,
      "learning_rate": 9.995585221694352e-06,
      "loss": 0.1166,
      "step": 21340
    },
    {
      "epoch": 0.03495610848176587,
      "grad_norm": 4.770773887634277,
      "learning_rate": 9.995519329480836e-06,
      "loss": 0.1408,
      "step": 21360
    },
    {
      "epoch": 0.034988838920419214,
      "grad_norm": 6.903642654418945,
      "learning_rate": 9.99545343726732e-06,
      "loss": 0.1036,
      "step": 21380
    },
    {
      "epoch": 0.03502156935907255,
      "grad_norm": 3.4473559856414795,
      "learning_rate": 9.995387545053801e-06,
      "loss": 0.1157,
      "step": 21400
    },
    {
      "epoch": 0.03505429979772589,
      "grad_norm": 6.098388671875,
      "learning_rate": 9.995321652840285e-06,
      "loss": 0.1081,
      "step": 21420
    },
    {
      "epoch": 0.03508703023637923,
      "grad_norm": 6.751668930053711,
      "learning_rate": 9.995255760626768e-06,
      "loss": 0.1391,
      "step": 21440
    },
    {
      "epoch": 0.035119760675032564,
      "grad_norm": 5.53415060043335,
      "learning_rate": 9.99518986841325e-06,
      "loss": 0.1424,
      "step": 21460
    },
    {
      "epoch": 0.03515249111368591,
      "grad_norm": 9.147188186645508,
      "learning_rate": 9.995123976199734e-06,
      "loss": 0.1278,
      "step": 21480
    },
    {
      "epoch": 0.035185221552339246,
      "grad_norm": 7.8618855476379395,
      "learning_rate": 9.995058083986217e-06,
      "loss": 0.1151,
      "step": 21500
    },
    {
      "epoch": 0.035217951990992584,
      "grad_norm": 6.584446430206299,
      "learning_rate": 9.9949921917727e-06,
      "loss": 0.1218,
      "step": 21520
    },
    {
      "epoch": 0.03525068242964592,
      "grad_norm": 1.6347715854644775,
      "learning_rate": 9.994926299559183e-06,
      "loss": 0.1189,
      "step": 21540
    },
    {
      "epoch": 0.03528341286829926,
      "grad_norm": 5.575261116027832,
      "learning_rate": 9.994860407345665e-06,
      "loss": 0.1228,
      "step": 21560
    },
    {
      "epoch": 0.035316143306952597,
      "grad_norm": 5.001314640045166,
      "learning_rate": 9.994794515132148e-06,
      "loss": 0.107,
      "step": 21580
    },
    {
      "epoch": 0.03534887374560594,
      "grad_norm": 4.661531925201416,
      "learning_rate": 9.99472862291863e-06,
      "loss": 0.1037,
      "step": 21600
    },
    {
      "epoch": 0.03538160418425928,
      "grad_norm": 9.629672050476074,
      "learning_rate": 9.994662730705114e-06,
      "loss": 0.0933,
      "step": 21620
    },
    {
      "epoch": 0.035414334622912616,
      "grad_norm": 11.581317901611328,
      "learning_rate": 9.994596838491596e-06,
      "loss": 0.1171,
      "step": 21640
    },
    {
      "epoch": 0.035447065061565954,
      "grad_norm": 7.822231769561768,
      "learning_rate": 9.99453094627808e-06,
      "loss": 0.1,
      "step": 21660
    },
    {
      "epoch": 0.03547979550021929,
      "grad_norm": 13.617534637451172,
      "learning_rate": 9.994465054064561e-06,
      "loss": 0.1502,
      "step": 21680
    },
    {
      "epoch": 0.035512525938872636,
      "grad_norm": 4.999341011047363,
      "learning_rate": 9.994399161851045e-06,
      "loss": 0.1326,
      "step": 21700
    },
    {
      "epoch": 0.03554525637752597,
      "grad_norm": 85.11231994628906,
      "learning_rate": 9.994333269637527e-06,
      "loss": 0.1156,
      "step": 21720
    },
    {
      "epoch": 0.03557798681617931,
      "grad_norm": 13.155220031738281,
      "learning_rate": 9.99426737742401e-06,
      "loss": 0.1486,
      "step": 21740
    },
    {
      "epoch": 0.03561071725483265,
      "grad_norm": 5.80494499206543,
      "learning_rate": 9.994201485210494e-06,
      "loss": 0.1394,
      "step": 21760
    },
    {
      "epoch": 0.035643447693485986,
      "grad_norm": 2.8796629905700684,
      "learning_rate": 9.994135592996976e-06,
      "loss": 0.1186,
      "step": 21780
    },
    {
      "epoch": 0.03567617813213933,
      "grad_norm": 1.9704078435897827,
      "learning_rate": 9.99406970078346e-06,
      "loss": 0.1245,
      "step": 21800
    },
    {
      "epoch": 0.03570890857079267,
      "grad_norm": 4.28673791885376,
      "learning_rate": 9.994003808569943e-06,
      "loss": 0.1272,
      "step": 21820
    },
    {
      "epoch": 0.035741639009446005,
      "grad_norm": 12.515954971313477,
      "learning_rate": 9.993937916356425e-06,
      "loss": 0.1053,
      "step": 21840
    },
    {
      "epoch": 0.03577436944809934,
      "grad_norm": 4.840087890625,
      "learning_rate": 9.993872024142908e-06,
      "loss": 0.0874,
      "step": 21860
    },
    {
      "epoch": 0.03580709988675268,
      "grad_norm": 5.719210147857666,
      "learning_rate": 9.993806131929392e-06,
      "loss": 0.0931,
      "step": 21880
    },
    {
      "epoch": 0.03583983032540602,
      "grad_norm": 9.621671676635742,
      "learning_rate": 9.993740239715874e-06,
      "loss": 0.1211,
      "step": 21900
    },
    {
      "epoch": 0.03587256076405936,
      "grad_norm": 101.2449951171875,
      "learning_rate": 9.993674347502357e-06,
      "loss": 0.1234,
      "step": 21920
    },
    {
      "epoch": 0.0359052912027127,
      "grad_norm": 3.566633701324463,
      "learning_rate": 9.99360845528884e-06,
      "loss": 0.1113,
      "step": 21940
    },
    {
      "epoch": 0.03593802164136604,
      "grad_norm": 7.5593390464782715,
      "learning_rate": 9.993542563075323e-06,
      "loss": 0.1354,
      "step": 21960
    },
    {
      "epoch": 0.035970752080019375,
      "grad_norm": 4.069851875305176,
      "learning_rate": 9.993476670861805e-06,
      "loss": 0.1195,
      "step": 21980
    },
    {
      "epoch": 0.03600348251867271,
      "grad_norm": 17.663808822631836,
      "learning_rate": 9.993410778648288e-06,
      "loss": 0.1141,
      "step": 22000
    },
    {
      "epoch": 0.03603621295732606,
      "grad_norm": 9.668967247009277,
      "learning_rate": 9.99334488643477e-06,
      "loss": 0.1143,
      "step": 22020
    },
    {
      "epoch": 0.036068943395979394,
      "grad_norm": 5.698126792907715,
      "learning_rate": 9.993278994221254e-06,
      "loss": 0.1152,
      "step": 22040
    },
    {
      "epoch": 0.03610167383463273,
      "grad_norm": 5.837045192718506,
      "learning_rate": 9.993213102007736e-06,
      "loss": 0.1148,
      "step": 22060
    },
    {
      "epoch": 0.03613440427328607,
      "grad_norm": 48.20930099487305,
      "learning_rate": 9.99314720979422e-06,
      "loss": 0.122,
      "step": 22080
    },
    {
      "epoch": 0.03616713471193941,
      "grad_norm": 86.6990737915039,
      "learning_rate": 9.993081317580701e-06,
      "loss": 0.1354,
      "step": 22100
    },
    {
      "epoch": 0.03619986515059275,
      "grad_norm": 9.25520133972168,
      "learning_rate": 9.993015425367185e-06,
      "loss": 0.1318,
      "step": 22120
    },
    {
      "epoch": 0.03623259558924609,
      "grad_norm": 6.140288829803467,
      "learning_rate": 9.992949533153668e-06,
      "loss": 0.1239,
      "step": 22140
    },
    {
      "epoch": 0.036265326027899426,
      "grad_norm": 7.20848274230957,
      "learning_rate": 9.99288364094015e-06,
      "loss": 0.1301,
      "step": 22160
    },
    {
      "epoch": 0.036298056466552764,
      "grad_norm": 6.034334659576416,
      "learning_rate": 9.992817748726634e-06,
      "loss": 0.1176,
      "step": 22180
    },
    {
      "epoch": 0.0363307869052061,
      "grad_norm": 3.1436636447906494,
      "learning_rate": 9.992751856513116e-06,
      "loss": 0.1214,
      "step": 22200
    },
    {
      "epoch": 0.036363517343859446,
      "grad_norm": 2.24460768699646,
      "learning_rate": 9.9926859642996e-06,
      "loss": 0.1168,
      "step": 22220
    },
    {
      "epoch": 0.03639624778251278,
      "grad_norm": 13.318211555480957,
      "learning_rate": 9.992620072086083e-06,
      "loss": 0.1289,
      "step": 22240
    },
    {
      "epoch": 0.03642897822116612,
      "grad_norm": 4.6964216232299805,
      "learning_rate": 9.992554179872565e-06,
      "loss": 0.1002,
      "step": 22260
    },
    {
      "epoch": 0.03646170865981946,
      "grad_norm": 11.33691120147705,
      "learning_rate": 9.992488287659048e-06,
      "loss": 0.1233,
      "step": 22280
    },
    {
      "epoch": 0.036494439098472796,
      "grad_norm": 3.5074708461761475,
      "learning_rate": 9.992422395445532e-06,
      "loss": 0.1201,
      "step": 22300
    },
    {
      "epoch": 0.03652716953712613,
      "grad_norm": 3.2007250785827637,
      "learning_rate": 9.992356503232014e-06,
      "loss": 0.1179,
      "step": 22320
    },
    {
      "epoch": 0.03655989997577948,
      "grad_norm": 9.711212158203125,
      "learning_rate": 9.992290611018497e-06,
      "loss": 0.132,
      "step": 22340
    },
    {
      "epoch": 0.036592630414432815,
      "grad_norm": 6.550482749938965,
      "learning_rate": 9.99222471880498e-06,
      "loss": 0.1291,
      "step": 22360
    },
    {
      "epoch": 0.03662536085308615,
      "grad_norm": 5.160052299499512,
      "learning_rate": 9.992158826591463e-06,
      "loss": 0.1036,
      "step": 22380
    },
    {
      "epoch": 0.03665809129173949,
      "grad_norm": 13.325230598449707,
      "learning_rate": 9.992092934377945e-06,
      "loss": 0.1392,
      "step": 22400
    },
    {
      "epoch": 0.03669082173039283,
      "grad_norm": 8.015325546264648,
      "learning_rate": 9.992027042164428e-06,
      "loss": 0.1175,
      "step": 22420
    },
    {
      "epoch": 0.03672355216904617,
      "grad_norm": 4.805301666259766,
      "learning_rate": 9.99196114995091e-06,
      "loss": 0.1154,
      "step": 22440
    },
    {
      "epoch": 0.03675628260769951,
      "grad_norm": 5.702779769897461,
      "learning_rate": 9.991895257737394e-06,
      "loss": 0.1159,
      "step": 22460
    },
    {
      "epoch": 0.03678901304635285,
      "grad_norm": 13.056098937988281,
      "learning_rate": 9.991829365523877e-06,
      "loss": 0.1252,
      "step": 22480
    },
    {
      "epoch": 0.036821743485006185,
      "grad_norm": 5.036514759063721,
      "learning_rate": 9.99176347331036e-06,
      "loss": 0.1127,
      "step": 22500
    },
    {
      "epoch": 0.03685447392365952,
      "grad_norm": 6.174327850341797,
      "learning_rate": 9.991697581096843e-06,
      "loss": 0.132,
      "step": 22520
    },
    {
      "epoch": 0.03688720436231287,
      "grad_norm": 6.593940734863281,
      "learning_rate": 9.991631688883325e-06,
      "loss": 0.1231,
      "step": 22540
    },
    {
      "epoch": 0.036919934800966205,
      "grad_norm": 14.378271102905273,
      "learning_rate": 9.991565796669808e-06,
      "loss": 0.1224,
      "step": 22560
    },
    {
      "epoch": 0.03695266523961954,
      "grad_norm": 3.1093878746032715,
      "learning_rate": 9.99149990445629e-06,
      "loss": 0.1205,
      "step": 22580
    },
    {
      "epoch": 0.03698539567827288,
      "grad_norm": 4.494521141052246,
      "learning_rate": 9.991434012242774e-06,
      "loss": 0.1118,
      "step": 22600
    },
    {
      "epoch": 0.03701812611692622,
      "grad_norm": 17.625694274902344,
      "learning_rate": 9.991368120029258e-06,
      "loss": 0.1303,
      "step": 22620
    },
    {
      "epoch": 0.037050856555579555,
      "grad_norm": 3.386056661605835,
      "learning_rate": 9.99130222781574e-06,
      "loss": 0.0978,
      "step": 22640
    },
    {
      "epoch": 0.0370835869942329,
      "grad_norm": 6.503330707550049,
      "learning_rate": 9.991236335602223e-06,
      "loss": 0.1023,
      "step": 22660
    },
    {
      "epoch": 0.03711631743288624,
      "grad_norm": 12.731517791748047,
      "learning_rate": 9.991170443388707e-06,
      "loss": 0.1273,
      "step": 22680
    },
    {
      "epoch": 0.037149047871539574,
      "grad_norm": 8.119781494140625,
      "learning_rate": 9.991104551175188e-06,
      "loss": 0.1136,
      "step": 22700
    },
    {
      "epoch": 0.03718177831019291,
      "grad_norm": 6.742973327636719,
      "learning_rate": 9.991038658961672e-06,
      "loss": 0.1178,
      "step": 22720
    },
    {
      "epoch": 0.03721450874884625,
      "grad_norm": 10.909878730773926,
      "learning_rate": 9.990972766748154e-06,
      "loss": 0.1216,
      "step": 22740
    },
    {
      "epoch": 0.037247239187499594,
      "grad_norm": 7.953213691711426,
      "learning_rate": 9.990906874534638e-06,
      "loss": 0.1073,
      "step": 22760
    },
    {
      "epoch": 0.03727996962615293,
      "grad_norm": 7.353416919708252,
      "learning_rate": 9.99084098232112e-06,
      "loss": 0.1344,
      "step": 22780
    },
    {
      "epoch": 0.03731270006480627,
      "grad_norm": 2.0929739475250244,
      "learning_rate": 9.990775090107603e-06,
      "loss": 0.1152,
      "step": 22800
    },
    {
      "epoch": 0.037345430503459606,
      "grad_norm": 7.819360256195068,
      "learning_rate": 9.990709197894087e-06,
      "loss": 0.1175,
      "step": 22820
    },
    {
      "epoch": 0.037378160942112944,
      "grad_norm": 6.804110527038574,
      "learning_rate": 9.990643305680568e-06,
      "loss": 0.1263,
      "step": 22840
    },
    {
      "epoch": 0.03741089138076629,
      "grad_norm": 8.182767868041992,
      "learning_rate": 9.990577413467052e-06,
      "loss": 0.1295,
      "step": 22860
    },
    {
      "epoch": 0.037443621819419626,
      "grad_norm": 4.046440601348877,
      "learning_rate": 9.990511521253534e-06,
      "loss": 0.1076,
      "step": 22880
    },
    {
      "epoch": 0.03747635225807296,
      "grad_norm": 10.887542724609375,
      "learning_rate": 9.990445629040018e-06,
      "loss": 0.1008,
      "step": 22900
    },
    {
      "epoch": 0.0375090826967263,
      "grad_norm": 7.443330764770508,
      "learning_rate": 9.9903797368265e-06,
      "loss": 0.1355,
      "step": 22920
    },
    {
      "epoch": 0.03754181313537964,
      "grad_norm": 5.934266090393066,
      "learning_rate": 9.990313844612983e-06,
      "loss": 0.1053,
      "step": 22940
    },
    {
      "epoch": 0.037574543574032976,
      "grad_norm": 2.852320671081543,
      "learning_rate": 9.990247952399465e-06,
      "loss": 0.109,
      "step": 22960
    },
    {
      "epoch": 0.03760727401268632,
      "grad_norm": 17.029401779174805,
      "learning_rate": 9.990182060185949e-06,
      "loss": 0.1065,
      "step": 22980
    },
    {
      "epoch": 0.03764000445133966,
      "grad_norm": 2.740286111831665,
      "learning_rate": 9.99011616797243e-06,
      "loss": 0.1028,
      "step": 23000
    },
    {
      "epoch": 0.037672734889992995,
      "grad_norm": 4.999754905700684,
      "learning_rate": 9.990050275758914e-06,
      "loss": 0.1201,
      "step": 23020
    },
    {
      "epoch": 0.03770546532864633,
      "grad_norm": 9.75322151184082,
      "learning_rate": 9.989984383545398e-06,
      "loss": 0.1134,
      "step": 23040
    },
    {
      "epoch": 0.03773819576729967,
      "grad_norm": 1.8941247463226318,
      "learning_rate": 9.98991849133188e-06,
      "loss": 0.1008,
      "step": 23060
    },
    {
      "epoch": 0.037770926205953015,
      "grad_norm": 8.213369369506836,
      "learning_rate": 9.989852599118363e-06,
      "loss": 0.1133,
      "step": 23080
    },
    {
      "epoch": 0.03780365664460635,
      "grad_norm": 5.864421844482422,
      "learning_rate": 9.989786706904847e-06,
      "loss": 0.1357,
      "step": 23100
    },
    {
      "epoch": 0.03783638708325969,
      "grad_norm": 2.3998851776123047,
      "learning_rate": 9.989720814691329e-06,
      "loss": 0.0975,
      "step": 23120
    },
    {
      "epoch": 0.03786911752191303,
      "grad_norm": 5.484094619750977,
      "learning_rate": 9.989654922477812e-06,
      "loss": 0.1083,
      "step": 23140
    },
    {
      "epoch": 0.037901847960566365,
      "grad_norm": 2.4524483680725098,
      "learning_rate": 9.989589030264294e-06,
      "loss": 0.1154,
      "step": 23160
    },
    {
      "epoch": 0.03793457839921971,
      "grad_norm": 5.168462753295898,
      "learning_rate": 9.989523138050778e-06,
      "loss": 0.1154,
      "step": 23180
    },
    {
      "epoch": 0.03796730883787305,
      "grad_norm": 8.051518440246582,
      "learning_rate": 9.989457245837261e-06,
      "loss": 0.1309,
      "step": 23200
    },
    {
      "epoch": 0.038000039276526384,
      "grad_norm": 27.870319366455078,
      "learning_rate": 9.989391353623743e-06,
      "loss": 0.1109,
      "step": 23220
    },
    {
      "epoch": 0.03803276971517972,
      "grad_norm": 5.730527877807617,
      "learning_rate": 9.989325461410227e-06,
      "loss": 0.114,
      "step": 23240
    },
    {
      "epoch": 0.03806550015383306,
      "grad_norm": 6.2936177253723145,
      "learning_rate": 9.989259569196709e-06,
      "loss": 0.0994,
      "step": 23260
    },
    {
      "epoch": 0.0380982305924864,
      "grad_norm": 3.062621831893921,
      "learning_rate": 9.989193676983192e-06,
      "loss": 0.1151,
      "step": 23280
    },
    {
      "epoch": 0.03813096103113974,
      "grad_norm": 5.195838451385498,
      "learning_rate": 9.989127784769674e-06,
      "loss": 0.1291,
      "step": 23300
    },
    {
      "epoch": 0.03816369146979308,
      "grad_norm": 14.057100296020508,
      "learning_rate": 9.989061892556158e-06,
      "loss": 0.1181,
      "step": 23320
    },
    {
      "epoch": 0.03819642190844642,
      "grad_norm": 11.11351490020752,
      "learning_rate": 9.98899600034264e-06,
      "loss": 0.1371,
      "step": 23340
    },
    {
      "epoch": 0.038229152347099754,
      "grad_norm": 6.598130702972412,
      "learning_rate": 9.988930108129123e-06,
      "loss": 0.1378,
      "step": 23360
    },
    {
      "epoch": 0.03826188278575309,
      "grad_norm": 3.642446756362915,
      "learning_rate": 9.988864215915605e-06,
      "loss": 0.1154,
      "step": 23380
    },
    {
      "epoch": 0.038294613224406436,
      "grad_norm": 15.7865629196167,
      "learning_rate": 9.988798323702089e-06,
      "loss": 0.1187,
      "step": 23400
    },
    {
      "epoch": 0.038327343663059774,
      "grad_norm": 4.100852966308594,
      "learning_rate": 9.988732431488572e-06,
      "loss": 0.1007,
      "step": 23420
    },
    {
      "epoch": 0.03836007410171311,
      "grad_norm": 7.5573296546936035,
      "learning_rate": 9.988666539275054e-06,
      "loss": 0.121,
      "step": 23440
    },
    {
      "epoch": 0.03839280454036645,
      "grad_norm": 8.663269996643066,
      "learning_rate": 9.988600647061538e-06,
      "loss": 0.1194,
      "step": 23460
    },
    {
      "epoch": 0.038425534979019786,
      "grad_norm": 3.0332906246185303,
      "learning_rate": 9.988534754848021e-06,
      "loss": 0.1356,
      "step": 23480
    },
    {
      "epoch": 0.03845826541767313,
      "grad_norm": 7.05355978012085,
      "learning_rate": 9.988468862634503e-06,
      "loss": 0.1606,
      "step": 23500
    },
    {
      "epoch": 0.03849099585632647,
      "grad_norm": 6.396869659423828,
      "learning_rate": 9.988402970420987e-06,
      "loss": 0.1262,
      "step": 23520
    },
    {
      "epoch": 0.038523726294979806,
      "grad_norm": 5.5366902351379395,
      "learning_rate": 9.98833707820747e-06,
      "loss": 0.1162,
      "step": 23540
    },
    {
      "epoch": 0.03855645673363314,
      "grad_norm": 11.279714584350586,
      "learning_rate": 9.988271185993952e-06,
      "loss": 0.1053,
      "step": 23560
    },
    {
      "epoch": 0.03858918717228648,
      "grad_norm": 5.393213748931885,
      "learning_rate": 9.988205293780436e-06,
      "loss": 0.1127,
      "step": 23580
    },
    {
      "epoch": 0.038621917610939825,
      "grad_norm": 5.905176162719727,
      "learning_rate": 9.988139401566918e-06,
      "loss": 0.1176,
      "step": 23600
    },
    {
      "epoch": 0.03865464804959316,
      "grad_norm": 12.684221267700195,
      "learning_rate": 9.988073509353401e-06,
      "loss": 0.1013,
      "step": 23620
    },
    {
      "epoch": 0.0386873784882465,
      "grad_norm": 83.98444366455078,
      "learning_rate": 9.988007617139883e-06,
      "loss": 0.1256,
      "step": 23640
    },
    {
      "epoch": 0.03872010892689984,
      "grad_norm": 7.323430061340332,
      "learning_rate": 9.987941724926367e-06,
      "loss": 0.0974,
      "step": 23660
    },
    {
      "epoch": 0.038752839365553175,
      "grad_norm": 5.9098920822143555,
      "learning_rate": 9.987875832712849e-06,
      "loss": 0.1323,
      "step": 23680
    },
    {
      "epoch": 0.03878556980420651,
      "grad_norm": 4.636550426483154,
      "learning_rate": 9.987809940499332e-06,
      "loss": 0.1178,
      "step": 23700
    },
    {
      "epoch": 0.03881830024285986,
      "grad_norm": 9.886067390441895,
      "learning_rate": 9.987744048285814e-06,
      "loss": 0.1266,
      "step": 23720
    },
    {
      "epoch": 0.038851030681513195,
      "grad_norm": 7.738467216491699,
      "learning_rate": 9.987678156072298e-06,
      "loss": 0.1419,
      "step": 23740
    },
    {
      "epoch": 0.03888376112016653,
      "grad_norm": 6.757480621337891,
      "learning_rate": 9.98761226385878e-06,
      "loss": 0.1236,
      "step": 23760
    },
    {
      "epoch": 0.03891649155881987,
      "grad_norm": 6.8176679611206055,
      "learning_rate": 9.987546371645263e-06,
      "loss": 0.1484,
      "step": 23780
    },
    {
      "epoch": 0.03894922199747321,
      "grad_norm": 5.5044403076171875,
      "learning_rate": 9.987480479431745e-06,
      "loss": 0.1339,
      "step": 23800
    },
    {
      "epoch": 0.03898195243612655,
      "grad_norm": 7.527084827423096,
      "learning_rate": 9.987414587218229e-06,
      "loss": 0.1166,
      "step": 23820
    },
    {
      "epoch": 0.03901468287477989,
      "grad_norm": 5.565399169921875,
      "learning_rate": 9.987348695004712e-06,
      "loss": 0.0996,
      "step": 23840
    },
    {
      "epoch": 0.03904741331343323,
      "grad_norm": 6.134187698364258,
      "learning_rate": 9.987282802791194e-06,
      "loss": 0.1208,
      "step": 23860
    },
    {
      "epoch": 0.039080143752086564,
      "grad_norm": 5.130614280700684,
      "learning_rate": 9.987216910577678e-06,
      "loss": 0.127,
      "step": 23880
    },
    {
      "epoch": 0.0391128741907399,
      "grad_norm": 41.324951171875,
      "learning_rate": 9.987151018364161e-06,
      "loss": 0.1062,
      "step": 23900
    },
    {
      "epoch": 0.039145604629393246,
      "grad_norm": 10.149264335632324,
      "learning_rate": 9.987085126150645e-06,
      "loss": 0.1109,
      "step": 23920
    },
    {
      "epoch": 0.039178335068046584,
      "grad_norm": 3.727123975753784,
      "learning_rate": 9.987019233937127e-06,
      "loss": 0.1047,
      "step": 23940
    },
    {
      "epoch": 0.03921106550669992,
      "grad_norm": 2.493596076965332,
      "learning_rate": 9.98695334172361e-06,
      "loss": 0.127,
      "step": 23960
    },
    {
      "epoch": 0.03924379594535326,
      "grad_norm": 18.146575927734375,
      "learning_rate": 9.986887449510092e-06,
      "loss": 0.0956,
      "step": 23980
    },
    {
      "epoch": 0.039276526384006596,
      "grad_norm": 7.649442195892334,
      "learning_rate": 9.986821557296576e-06,
      "loss": 0.1354,
      "step": 24000
    },
    {
      "epoch": 0.039309256822659934,
      "grad_norm": 6.119079113006592,
      "learning_rate": 9.986755665083058e-06,
      "loss": 0.1252,
      "step": 24020
    },
    {
      "epoch": 0.03934198726131328,
      "grad_norm": 5.845660209655762,
      "learning_rate": 9.986689772869541e-06,
      "loss": 0.1071,
      "step": 24040
    },
    {
      "epoch": 0.039374717699966616,
      "grad_norm": 8.23033332824707,
      "learning_rate": 9.986623880656023e-06,
      "loss": 0.1119,
      "step": 24060
    },
    {
      "epoch": 0.039407448138619953,
      "grad_norm": 8.469346046447754,
      "learning_rate": 9.986557988442507e-06,
      "loss": 0.1049,
      "step": 24080
    },
    {
      "epoch": 0.03944017857727329,
      "grad_norm": 7.913232326507568,
      "learning_rate": 9.986492096228989e-06,
      "loss": 0.1174,
      "step": 24100
    },
    {
      "epoch": 0.03947290901592663,
      "grad_norm": 14.700368881225586,
      "learning_rate": 9.986426204015472e-06,
      "loss": 0.1237,
      "step": 24120
    },
    {
      "epoch": 0.03950563945457997,
      "grad_norm": 8.237668991088867,
      "learning_rate": 9.986360311801954e-06,
      "loss": 0.1491,
      "step": 24140
    },
    {
      "epoch": 0.03953836989323331,
      "grad_norm": 14.265482902526855,
      "learning_rate": 9.986294419588438e-06,
      "loss": 0.1165,
      "step": 24160
    },
    {
      "epoch": 0.03957110033188665,
      "grad_norm": 12.988314628601074,
      "learning_rate": 9.98622852737492e-06,
      "loss": 0.1329,
      "step": 24180
    },
    {
      "epoch": 0.039603830770539986,
      "grad_norm": 10.614182472229004,
      "learning_rate": 9.986162635161403e-06,
      "loss": 0.1237,
      "step": 24200
    },
    {
      "epoch": 0.03963656120919332,
      "grad_norm": 5.449987411499023,
      "learning_rate": 9.986096742947887e-06,
      "loss": 0.0994,
      "step": 24220
    },
    {
      "epoch": 0.03966929164784667,
      "grad_norm": 5.313726425170898,
      "learning_rate": 9.986030850734369e-06,
      "loss": 0.1365,
      "step": 24240
    },
    {
      "epoch": 0.039702022086500005,
      "grad_norm": 4.50619649887085,
      "learning_rate": 9.985964958520852e-06,
      "loss": 0.136,
      "step": 24260
    },
    {
      "epoch": 0.03973475252515334,
      "grad_norm": 5.088881492614746,
      "learning_rate": 9.985899066307336e-06,
      "loss": 0.1331,
      "step": 24280
    },
    {
      "epoch": 0.03976748296380668,
      "grad_norm": 4.855977535247803,
      "learning_rate": 9.985833174093818e-06,
      "loss": 0.1099,
      "step": 24300
    },
    {
      "epoch": 0.03980021340246002,
      "grad_norm": 21.95375633239746,
      "learning_rate": 9.985767281880301e-06,
      "loss": 0.1156,
      "step": 24320
    },
    {
      "epoch": 0.039832943841113355,
      "grad_norm": 5.687751770019531,
      "learning_rate": 9.985701389666785e-06,
      "loss": 0.1193,
      "step": 24340
    },
    {
      "epoch": 0.0398656742797667,
      "grad_norm": 7.005632400512695,
      "learning_rate": 9.985635497453267e-06,
      "loss": 0.1288,
      "step": 24360
    },
    {
      "epoch": 0.03989840471842004,
      "grad_norm": 6.869323253631592,
      "learning_rate": 9.98556960523975e-06,
      "loss": 0.1309,
      "step": 24380
    },
    {
      "epoch": 0.039931135157073375,
      "grad_norm": 8.497098922729492,
      "learning_rate": 9.985503713026232e-06,
      "loss": 0.1169,
      "step": 24400
    },
    {
      "epoch": 0.03996386559572671,
      "grad_norm": 4.687977313995361,
      "learning_rate": 9.985437820812716e-06,
      "loss": 0.1257,
      "step": 24420
    },
    {
      "epoch": 0.03999659603438005,
      "grad_norm": 9.624258995056152,
      "learning_rate": 9.985371928599198e-06,
      "loss": 0.1087,
      "step": 24440
    },
    {
      "epoch": 0.040029326473033394,
      "grad_norm": 9.024892807006836,
      "learning_rate": 9.985306036385681e-06,
      "loss": 0.1318,
      "step": 24460
    },
    {
      "epoch": 0.04006205691168673,
      "grad_norm": 4.917065620422363,
      "learning_rate": 9.985240144172163e-06,
      "loss": 0.1115,
      "step": 24480
    },
    {
      "epoch": 0.04009478735034007,
      "grad_norm": 7.447591304779053,
      "learning_rate": 9.985174251958647e-06,
      "loss": 0.1166,
      "step": 24500
    },
    {
      "epoch": 0.04012751778899341,
      "grad_norm": 6.575088977813721,
      "learning_rate": 9.985108359745129e-06,
      "loss": 0.1147,
      "step": 24520
    },
    {
      "epoch": 0.040160248227646744,
      "grad_norm": 6.268465995788574,
      "learning_rate": 9.985042467531612e-06,
      "loss": 0.1181,
      "step": 24540
    },
    {
      "epoch": 0.04019297866630009,
      "grad_norm": 8.164259910583496,
      "learning_rate": 9.984976575318094e-06,
      "loss": 0.1412,
      "step": 24560
    },
    {
      "epoch": 0.040225709104953426,
      "grad_norm": 4.529340744018555,
      "learning_rate": 9.984910683104578e-06,
      "loss": 0.1279,
      "step": 24580
    },
    {
      "epoch": 0.040258439543606764,
      "grad_norm": 29.032453536987305,
      "learning_rate": 9.984844790891061e-06,
      "loss": 0.1007,
      "step": 24600
    },
    {
      "epoch": 0.0402911699822601,
      "grad_norm": 7.522282600402832,
      "learning_rate": 9.984778898677543e-06,
      "loss": 0.1153,
      "step": 24620
    },
    {
      "epoch": 0.04032390042091344,
      "grad_norm": 40.374595642089844,
      "learning_rate": 9.984713006464027e-06,
      "loss": 0.1029,
      "step": 24640
    },
    {
      "epoch": 0.04035663085956678,
      "grad_norm": 7.527731895446777,
      "learning_rate": 9.98464711425051e-06,
      "loss": 0.1068,
      "step": 24660
    },
    {
      "epoch": 0.04038936129822012,
      "grad_norm": 15.877562522888184,
      "learning_rate": 9.984581222036992e-06,
      "loss": 0.1238,
      "step": 24680
    },
    {
      "epoch": 0.04042209173687346,
      "grad_norm": 4.298641204833984,
      "learning_rate": 9.984515329823476e-06,
      "loss": 0.1071,
      "step": 24700
    },
    {
      "epoch": 0.040454822175526796,
      "grad_norm": 1.32476007938385,
      "learning_rate": 9.98444943760996e-06,
      "loss": 0.1218,
      "step": 24720
    },
    {
      "epoch": 0.04048755261418013,
      "grad_norm": 5.134268760681152,
      "learning_rate": 9.984383545396441e-06,
      "loss": 0.1235,
      "step": 24740
    },
    {
      "epoch": 0.04052028305283347,
      "grad_norm": 4.347992897033691,
      "learning_rate": 9.984317653182925e-06,
      "loss": 0.1082,
      "step": 24760
    },
    {
      "epoch": 0.040553013491486815,
      "grad_norm": 3.9047083854675293,
      "learning_rate": 9.984251760969407e-06,
      "loss": 0.1057,
      "step": 24780
    },
    {
      "epoch": 0.04058574393014015,
      "grad_norm": 3.118483543395996,
      "learning_rate": 9.98418586875589e-06,
      "loss": 0.1091,
      "step": 24800
    },
    {
      "epoch": 0.04061847436879349,
      "grad_norm": 14.603185653686523,
      "learning_rate": 9.984119976542372e-06,
      "loss": 0.1068,
      "step": 24820
    },
    {
      "epoch": 0.04065120480744683,
      "grad_norm": 11.964546203613281,
      "learning_rate": 9.984054084328856e-06,
      "loss": 0.1135,
      "step": 24840
    },
    {
      "epoch": 0.040683935246100165,
      "grad_norm": 2.897980213165283,
      "learning_rate": 9.983988192115338e-06,
      "loss": 0.114,
      "step": 24860
    },
    {
      "epoch": 0.04071666568475351,
      "grad_norm": 5.05222749710083,
      "learning_rate": 9.983922299901822e-06,
      "loss": 0.1218,
      "step": 24880
    },
    {
      "epoch": 0.04074939612340685,
      "grad_norm": 7.912262916564941,
      "learning_rate": 9.983856407688303e-06,
      "loss": 0.1254,
      "step": 24900
    },
    {
      "epoch": 0.040782126562060185,
      "grad_norm": 3.798737049102783,
      "learning_rate": 9.983790515474787e-06,
      "loss": 0.1173,
      "step": 24920
    },
    {
      "epoch": 0.04081485700071352,
      "grad_norm": 7.676419734954834,
      "learning_rate": 9.98372462326127e-06,
      "loss": 0.1539,
      "step": 24940
    },
    {
      "epoch": 0.04084758743936686,
      "grad_norm": 2.914574384689331,
      "learning_rate": 9.983658731047752e-06,
      "loss": 0.1021,
      "step": 24960
    },
    {
      "epoch": 0.040880317878020205,
      "grad_norm": 6.450516223907471,
      "learning_rate": 9.983592838834236e-06,
      "loss": 0.1217,
      "step": 24980
    },
    {
      "epoch": 0.04091304831667354,
      "grad_norm": 7.718508720397949,
      "learning_rate": 9.983526946620718e-06,
      "loss": 0.1344,
      "step": 25000
    },
    {
      "epoch": 0.04094577875532688,
      "grad_norm": 1.5892976522445679,
      "learning_rate": 9.983461054407202e-06,
      "loss": 0.1343,
      "step": 25020
    },
    {
      "epoch": 0.04097850919398022,
      "grad_norm": 3.0679073333740234,
      "learning_rate": 9.983395162193683e-06,
      "loss": 0.1068,
      "step": 25040
    },
    {
      "epoch": 0.041011239632633555,
      "grad_norm": 20.408597946166992,
      "learning_rate": 9.983329269980167e-06,
      "loss": 0.1277,
      "step": 25060
    },
    {
      "epoch": 0.04104397007128689,
      "grad_norm": 11.275266647338867,
      "learning_rate": 9.98326337776665e-06,
      "loss": 0.0956,
      "step": 25080
    },
    {
      "epoch": 0.04107670050994024,
      "grad_norm": 6.539052963256836,
      "learning_rate": 9.983197485553132e-06,
      "loss": 0.1346,
      "step": 25100
    },
    {
      "epoch": 0.041109430948593574,
      "grad_norm": 3.9162533283233643,
      "learning_rate": 9.983131593339616e-06,
      "loss": 0.1203,
      "step": 25120
    },
    {
      "epoch": 0.04114216138724691,
      "grad_norm": 11.517393112182617,
      "learning_rate": 9.9830657011261e-06,
      "loss": 0.117,
      "step": 25140
    },
    {
      "epoch": 0.04117489182590025,
      "grad_norm": 6.342455863952637,
      "learning_rate": 9.982999808912582e-06,
      "loss": 0.1103,
      "step": 25160
    },
    {
      "epoch": 0.04120762226455359,
      "grad_norm": 5.202421188354492,
      "learning_rate": 9.982933916699065e-06,
      "loss": 0.116,
      "step": 25180
    },
    {
      "epoch": 0.04124035270320693,
      "grad_norm": 7.044403076171875,
      "learning_rate": 9.982868024485547e-06,
      "loss": 0.1517,
      "step": 25200
    },
    {
      "epoch": 0.04127308314186027,
      "grad_norm": 15.871227264404297,
      "learning_rate": 9.98280213227203e-06,
      "loss": 0.1078,
      "step": 25220
    },
    {
      "epoch": 0.041305813580513606,
      "grad_norm": 8.812178611755371,
      "learning_rate": 9.982736240058513e-06,
      "loss": 0.1276,
      "step": 25240
    },
    {
      "epoch": 0.041338544019166944,
      "grad_norm": 3.64548659324646,
      "learning_rate": 9.982670347844996e-06,
      "loss": 0.1041,
      "step": 25260
    },
    {
      "epoch": 0.04137127445782028,
      "grad_norm": 5.63416051864624,
      "learning_rate": 9.98260445563148e-06,
      "loss": 0.1084,
      "step": 25280
    },
    {
      "epoch": 0.041404004896473626,
      "grad_norm": 7.046689033508301,
      "learning_rate": 9.982538563417962e-06,
      "loss": 0.1063,
      "step": 25300
    },
    {
      "epoch": 0.04143673533512696,
      "grad_norm": 8.76753044128418,
      "learning_rate": 9.982472671204445e-06,
      "loss": 0.1118,
      "step": 25320
    },
    {
      "epoch": 0.0414694657737803,
      "grad_norm": 12.40533447265625,
      "learning_rate": 9.982406778990927e-06,
      "loss": 0.1084,
      "step": 25340
    },
    {
      "epoch": 0.04150219621243364,
      "grad_norm": 6.707704544067383,
      "learning_rate": 9.98234088677741e-06,
      "loss": 0.1254,
      "step": 25360
    },
    {
      "epoch": 0.041534926651086976,
      "grad_norm": 6.0255537033081055,
      "learning_rate": 9.982274994563893e-06,
      "loss": 0.1134,
      "step": 25380
    },
    {
      "epoch": 0.04156765708974031,
      "grad_norm": 4.931422233581543,
      "learning_rate": 9.982209102350376e-06,
      "loss": 0.1367,
      "step": 25400
    },
    {
      "epoch": 0.04160038752839366,
      "grad_norm": 11.95343017578125,
      "learning_rate": 9.982143210136858e-06,
      "loss": 0.1443,
      "step": 25420
    },
    {
      "epoch": 0.041633117967046995,
      "grad_norm": 15.526039123535156,
      "learning_rate": 9.982077317923342e-06,
      "loss": 0.1307,
      "step": 25440
    },
    {
      "epoch": 0.04166584840570033,
      "grad_norm": 9.631919860839844,
      "learning_rate": 9.982011425709825e-06,
      "loss": 0.1177,
      "step": 25460
    },
    {
      "epoch": 0.04169857884435367,
      "grad_norm": 5.04653263092041,
      "learning_rate": 9.981945533496307e-06,
      "loss": 0.1435,
      "step": 25480
    },
    {
      "epoch": 0.04173130928300701,
      "grad_norm": 6.820366382598877,
      "learning_rate": 9.98187964128279e-06,
      "loss": 0.1135,
      "step": 25500
    },
    {
      "epoch": 0.04176403972166035,
      "grad_norm": 7.027083396911621,
      "learning_rate": 9.981813749069274e-06,
      "loss": 0.103,
      "step": 25520
    },
    {
      "epoch": 0.04179677016031369,
      "grad_norm": 7.36294412612915,
      "learning_rate": 9.981747856855756e-06,
      "loss": 0.0932,
      "step": 25540
    },
    {
      "epoch": 0.04182950059896703,
      "grad_norm": 8.999945640563965,
      "learning_rate": 9.98168196464224e-06,
      "loss": 0.1007,
      "step": 25560
    },
    {
      "epoch": 0.041862231037620365,
      "grad_norm": 4.832837104797363,
      "learning_rate": 9.981616072428722e-06,
      "loss": 0.1334,
      "step": 25580
    },
    {
      "epoch": 0.0418949614762737,
      "grad_norm": 14.12826156616211,
      "learning_rate": 9.981550180215205e-06,
      "loss": 0.1213,
      "step": 25600
    },
    {
      "epoch": 0.04192769191492705,
      "grad_norm": 3.3703513145446777,
      "learning_rate": 9.981484288001687e-06,
      "loss": 0.1242,
      "step": 25620
    },
    {
      "epoch": 0.041960422353580384,
      "grad_norm": 3.0578958988189697,
      "learning_rate": 9.98141839578817e-06,
      "loss": 0.1077,
      "step": 25640
    },
    {
      "epoch": 0.04199315279223372,
      "grad_norm": 8.928520202636719,
      "learning_rate": 9.981352503574654e-06,
      "loss": 0.1154,
      "step": 25660
    },
    {
      "epoch": 0.04202588323088706,
      "grad_norm": 3.7919564247131348,
      "learning_rate": 9.981286611361136e-06,
      "loss": 0.1063,
      "step": 25680
    },
    {
      "epoch": 0.0420586136695404,
      "grad_norm": 9.418403625488281,
      "learning_rate": 9.98122071914762e-06,
      "loss": 0.1182,
      "step": 25700
    },
    {
      "epoch": 0.04209134410819374,
      "grad_norm": 9.060961723327637,
      "learning_rate": 9.981154826934102e-06,
      "loss": 0.1391,
      "step": 25720
    },
    {
      "epoch": 0.04212407454684708,
      "grad_norm": 5.119697570800781,
      "learning_rate": 9.981088934720585e-06,
      "loss": 0.1013,
      "step": 25740
    },
    {
      "epoch": 0.042156804985500416,
      "grad_norm": 2.601127862930298,
      "learning_rate": 9.981023042507067e-06,
      "loss": 0.1204,
      "step": 25760
    },
    {
      "epoch": 0.042189535424153754,
      "grad_norm": 15.284993171691895,
      "learning_rate": 9.98095715029355e-06,
      "loss": 0.1227,
      "step": 25780
    },
    {
      "epoch": 0.04222226586280709,
      "grad_norm": 7.127123832702637,
      "learning_rate": 9.980891258080033e-06,
      "loss": 0.1089,
      "step": 25800
    },
    {
      "epoch": 0.04225499630146043,
      "grad_norm": 6.652677059173584,
      "learning_rate": 9.980825365866516e-06,
      "loss": 0.1177,
      "step": 25820
    },
    {
      "epoch": 0.042287726740113774,
      "grad_norm": 4.143113136291504,
      "learning_rate": 9.980759473652998e-06,
      "loss": 0.1166,
      "step": 25840
    },
    {
      "epoch": 0.04232045717876711,
      "grad_norm": 3.703550338745117,
      "learning_rate": 9.980693581439482e-06,
      "loss": 0.114,
      "step": 25860
    },
    {
      "epoch": 0.04235318761742045,
      "grad_norm": 6.345151424407959,
      "learning_rate": 9.980627689225965e-06,
      "loss": 0.1274,
      "step": 25880
    },
    {
      "epoch": 0.042385918056073786,
      "grad_norm": 4.246684551239014,
      "learning_rate": 9.980561797012447e-06,
      "loss": 0.124,
      "step": 25900
    },
    {
      "epoch": 0.042418648494727124,
      "grad_norm": 9.770210266113281,
      "learning_rate": 9.98049590479893e-06,
      "loss": 0.1245,
      "step": 25920
    },
    {
      "epoch": 0.04245137893338047,
      "grad_norm": 3.1939666271209717,
      "learning_rate": 9.980430012585414e-06,
      "loss": 0.1443,
      "step": 25940
    },
    {
      "epoch": 0.042484109372033806,
      "grad_norm": 6.228965759277344,
      "learning_rate": 9.980364120371896e-06,
      "loss": 0.1365,
      "step": 25960
    },
    {
      "epoch": 0.04251683981068714,
      "grad_norm": 7.1654462814331055,
      "learning_rate": 9.98029822815838e-06,
      "loss": 0.1147,
      "step": 25980
    },
    {
      "epoch": 0.04254957024934048,
      "grad_norm": 5.070658206939697,
      "learning_rate": 9.980232335944863e-06,
      "loss": 0.11,
      "step": 26000
    },
    {
      "epoch": 0.04258230068799382,
      "grad_norm": 4.889983654022217,
      "learning_rate": 9.980166443731345e-06,
      "loss": 0.1068,
      "step": 26020
    },
    {
      "epoch": 0.04261503112664716,
      "grad_norm": 2.608869791030884,
      "learning_rate": 9.980100551517829e-06,
      "loss": 0.1353,
      "step": 26040
    },
    {
      "epoch": 0.0426477615653005,
      "grad_norm": 9.723832130432129,
      "learning_rate": 9.98003465930431e-06,
      "loss": 0.1063,
      "step": 26060
    },
    {
      "epoch": 0.04268049200395384,
      "grad_norm": 5.159085273742676,
      "learning_rate": 9.979968767090794e-06,
      "loss": 0.127,
      "step": 26080
    },
    {
      "epoch": 0.042713222442607175,
      "grad_norm": 11.869173049926758,
      "learning_rate": 9.979902874877276e-06,
      "loss": 0.1246,
      "step": 26100
    },
    {
      "epoch": 0.04274595288126051,
      "grad_norm": 24.43397331237793,
      "learning_rate": 9.97983698266376e-06,
      "loss": 0.1134,
      "step": 26120
    },
    {
      "epoch": 0.04277868331991385,
      "grad_norm": 3.6753766536712646,
      "learning_rate": 9.979771090450242e-06,
      "loss": 0.1106,
      "step": 26140
    },
    {
      "epoch": 0.042811413758567195,
      "grad_norm": 8.44984245300293,
      "learning_rate": 9.979705198236725e-06,
      "loss": 0.1047,
      "step": 26160
    },
    {
      "epoch": 0.04284414419722053,
      "grad_norm": 6.576009273529053,
      "learning_rate": 9.979639306023207e-06,
      "loss": 0.1364,
      "step": 26180
    },
    {
      "epoch": 0.04287687463587387,
      "grad_norm": 4.757560729980469,
      "learning_rate": 9.97957341380969e-06,
      "loss": 0.1148,
      "step": 26200
    },
    {
      "epoch": 0.04290960507452721,
      "grad_norm": 6.483860015869141,
      "learning_rate": 9.979507521596173e-06,
      "loss": 0.1256,
      "step": 26220
    },
    {
      "epoch": 0.042942335513180545,
      "grad_norm": 10.455313682556152,
      "learning_rate": 9.979441629382656e-06,
      "loss": 0.1098,
      "step": 26240
    },
    {
      "epoch": 0.04297506595183389,
      "grad_norm": 4.976560115814209,
      "learning_rate": 9.97937573716914e-06,
      "loss": 0.1151,
      "step": 26260
    },
    {
      "epoch": 0.04300779639048723,
      "grad_norm": 5.868634223937988,
      "learning_rate": 9.979309844955622e-06,
      "loss": 0.1336,
      "step": 26280
    },
    {
      "epoch": 0.043040526829140564,
      "grad_norm": 7.134174346923828,
      "learning_rate": 9.979243952742105e-06,
      "loss": 0.139,
      "step": 26300
    },
    {
      "epoch": 0.0430732572677939,
      "grad_norm": 12.839372634887695,
      "learning_rate": 9.979178060528589e-06,
      "loss": 0.1285,
      "step": 26320
    },
    {
      "epoch": 0.04310598770644724,
      "grad_norm": 5.951321601867676,
      "learning_rate": 9.97911216831507e-06,
      "loss": 0.1121,
      "step": 26340
    },
    {
      "epoch": 0.043138718145100584,
      "grad_norm": 9.951930046081543,
      "learning_rate": 9.979046276101554e-06,
      "loss": 0.1111,
      "step": 26360
    },
    {
      "epoch": 0.04317144858375392,
      "grad_norm": 6.113275527954102,
      "learning_rate": 9.978980383888038e-06,
      "loss": 0.1385,
      "step": 26380
    },
    {
      "epoch": 0.04320417902240726,
      "grad_norm": 5.439718246459961,
      "learning_rate": 9.97891449167452e-06,
      "loss": 0.1172,
      "step": 26400
    },
    {
      "epoch": 0.043236909461060596,
      "grad_norm": 14.778234481811523,
      "learning_rate": 9.978848599461003e-06,
      "loss": 0.119,
      "step": 26420
    },
    {
      "epoch": 0.043269639899713934,
      "grad_norm": 1.660570502281189,
      "learning_rate": 9.978782707247485e-06,
      "loss": 0.1188,
      "step": 26440
    },
    {
      "epoch": 0.04330237033836727,
      "grad_norm": 8.8650541305542,
      "learning_rate": 9.978716815033969e-06,
      "loss": 0.096,
      "step": 26460
    },
    {
      "epoch": 0.043335100777020616,
      "grad_norm": 7.36091947555542,
      "learning_rate": 9.978650922820451e-06,
      "loss": 0.1344,
      "step": 26480
    },
    {
      "epoch": 0.04336783121567395,
      "grad_norm": 4.853392124176025,
      "learning_rate": 9.978585030606934e-06,
      "loss": 0.123,
      "step": 26500
    },
    {
      "epoch": 0.04340056165432729,
      "grad_norm": 8.332679748535156,
      "learning_rate": 9.978519138393416e-06,
      "loss": 0.1314,
      "step": 26520
    },
    {
      "epoch": 0.04343329209298063,
      "grad_norm": 6.548141956329346,
      "learning_rate": 9.9784532461799e-06,
      "loss": 0.0898,
      "step": 26540
    },
    {
      "epoch": 0.043466022531633966,
      "grad_norm": 4.061398029327393,
      "learning_rate": 9.978387353966382e-06,
      "loss": 0.1088,
      "step": 26560
    },
    {
      "epoch": 0.04349875297028731,
      "grad_norm": 4.19350528717041,
      "learning_rate": 9.978321461752865e-06,
      "loss": 0.1203,
      "step": 26580
    },
    {
      "epoch": 0.04353148340894065,
      "grad_norm": 5.525568008422852,
      "learning_rate": 9.978255569539347e-06,
      "loss": 0.1176,
      "step": 26600
    },
    {
      "epoch": 0.043564213847593986,
      "grad_norm": 4.951714992523193,
      "learning_rate": 9.978189677325831e-06,
      "loss": 0.0985,
      "step": 26620
    },
    {
      "epoch": 0.04359694428624732,
      "grad_norm": 6.954432487487793,
      "learning_rate": 9.978123785112313e-06,
      "loss": 0.0979,
      "step": 26640
    },
    {
      "epoch": 0.04362967472490066,
      "grad_norm": 27.07406234741211,
      "learning_rate": 9.978057892898796e-06,
      "loss": 0.123,
      "step": 26660
    },
    {
      "epoch": 0.043662405163554005,
      "grad_norm": 8.621294975280762,
      "learning_rate": 9.97799200068528e-06,
      "loss": 0.1415,
      "step": 26680
    },
    {
      "epoch": 0.04369513560220734,
      "grad_norm": 4.519504547119141,
      "learning_rate": 9.977926108471764e-06,
      "loss": 0.1387,
      "step": 26700
    },
    {
      "epoch": 0.04372786604086068,
      "grad_norm": 6.0418009757995605,
      "learning_rate": 9.977860216258245e-06,
      "loss": 0.0992,
      "step": 26720
    },
    {
      "epoch": 0.04376059647951402,
      "grad_norm": 3.844146490097046,
      "learning_rate": 9.977794324044729e-06,
      "loss": 0.0962,
      "step": 26740
    },
    {
      "epoch": 0.043793326918167355,
      "grad_norm": 8.116514205932617,
      "learning_rate": 9.977728431831213e-06,
      "loss": 0.1217,
      "step": 26760
    },
    {
      "epoch": 0.04382605735682069,
      "grad_norm": 5.867812156677246,
      "learning_rate": 9.977662539617694e-06,
      "loss": 0.1057,
      "step": 26780
    },
    {
      "epoch": 0.04385878779547404,
      "grad_norm": 5.591709136962891,
      "learning_rate": 9.977596647404178e-06,
      "loss": 0.1162,
      "step": 26800
    },
    {
      "epoch": 0.043891518234127375,
      "grad_norm": 8.258811950683594,
      "learning_rate": 9.97753075519066e-06,
      "loss": 0.1057,
      "step": 26820
    },
    {
      "epoch": 0.04392424867278071,
      "grad_norm": 4.115600109100342,
      "learning_rate": 9.977464862977144e-06,
      "loss": 0.1157,
      "step": 26840
    },
    {
      "epoch": 0.04395697911143405,
      "grad_norm": 17.863534927368164,
      "learning_rate": 9.977398970763625e-06,
      "loss": 0.1022,
      "step": 26860
    },
    {
      "epoch": 0.04398970955008739,
      "grad_norm": 9.192305564880371,
      "learning_rate": 9.977333078550109e-06,
      "loss": 0.1162,
      "step": 26880
    },
    {
      "epoch": 0.04402243998874073,
      "grad_norm": 6.451527118682861,
      "learning_rate": 9.977267186336591e-06,
      "loss": 0.1246,
      "step": 26900
    },
    {
      "epoch": 0.04405517042739407,
      "grad_norm": 7.04578161239624,
      "learning_rate": 9.977201294123075e-06,
      "loss": 0.1182,
      "step": 26920
    },
    {
      "epoch": 0.04408790086604741,
      "grad_norm": 5.7632317543029785,
      "learning_rate": 9.977135401909556e-06,
      "loss": 0.0962,
      "step": 26940
    },
    {
      "epoch": 0.044120631304700744,
      "grad_norm": 3.678884744644165,
      "learning_rate": 9.97706950969604e-06,
      "loss": 0.1099,
      "step": 26960
    },
    {
      "epoch": 0.04415336174335408,
      "grad_norm": 2.9526443481445312,
      "learning_rate": 9.977003617482522e-06,
      "loss": 0.1162,
      "step": 26980
    },
    {
      "epoch": 0.044186092182007426,
      "grad_norm": 4.158766746520996,
      "learning_rate": 9.976937725269005e-06,
      "loss": 0.1214,
      "step": 27000
    },
    {
      "epoch": 0.044218822620660764,
      "grad_norm": 3.2841341495513916,
      "learning_rate": 9.976871833055487e-06,
      "loss": 0.107,
      "step": 27020
    },
    {
      "epoch": 0.0442515530593141,
      "grad_norm": 3.3933398723602295,
      "learning_rate": 9.976805940841971e-06,
      "loss": 0.1068,
      "step": 27040
    },
    {
      "epoch": 0.04428428349796744,
      "grad_norm": 6.211116313934326,
      "learning_rate": 9.976740048628455e-06,
      "loss": 0.1306,
      "step": 27060
    },
    {
      "epoch": 0.044317013936620776,
      "grad_norm": 7.691422462463379,
      "learning_rate": 9.976674156414936e-06,
      "loss": 0.1359,
      "step": 27080
    },
    {
      "epoch": 0.04434974437527412,
      "grad_norm": 6.714026927947998,
      "learning_rate": 9.97660826420142e-06,
      "loss": 0.1182,
      "step": 27100
    },
    {
      "epoch": 0.04438247481392746,
      "grad_norm": 4.5239973068237305,
      "learning_rate": 9.976542371987904e-06,
      "loss": 0.1444,
      "step": 27120
    },
    {
      "epoch": 0.044415205252580796,
      "grad_norm": 12.297517776489258,
      "learning_rate": 9.976476479774385e-06,
      "loss": 0.1189,
      "step": 27140
    },
    {
      "epoch": 0.04444793569123413,
      "grad_norm": 4.391872882843018,
      "learning_rate": 9.976410587560869e-06,
      "loss": 0.1229,
      "step": 27160
    },
    {
      "epoch": 0.04448066612988747,
      "grad_norm": 6.9596052169799805,
      "learning_rate": 9.976344695347353e-06,
      "loss": 0.1192,
      "step": 27180
    },
    {
      "epoch": 0.04451339656854081,
      "grad_norm": 4.44728422164917,
      "learning_rate": 9.976278803133835e-06,
      "loss": 0.1093,
      "step": 27200
    },
    {
      "epoch": 0.04454612700719415,
      "grad_norm": 10.294689178466797,
      "learning_rate": 9.976212910920318e-06,
      "loss": 0.114,
      "step": 27220
    },
    {
      "epoch": 0.04457885744584749,
      "grad_norm": 6.807768821716309,
      "learning_rate": 9.9761470187068e-06,
      "loss": 0.1319,
      "step": 27240
    },
    {
      "epoch": 0.04461158788450083,
      "grad_norm": 15.227958679199219,
      "learning_rate": 9.976081126493284e-06,
      "loss": 0.1295,
      "step": 27260
    },
    {
      "epoch": 0.044644318323154165,
      "grad_norm": 4.479367256164551,
      "learning_rate": 9.976015234279766e-06,
      "loss": 0.1256,
      "step": 27280
    },
    {
      "epoch": 0.0446770487618075,
      "grad_norm": 4.858795642852783,
      "learning_rate": 9.975949342066249e-06,
      "loss": 0.1121,
      "step": 27300
    },
    {
      "epoch": 0.04470977920046085,
      "grad_norm": 3.344428062438965,
      "learning_rate": 9.975883449852731e-06,
      "loss": 0.1217,
      "step": 27320
    },
    {
      "epoch": 0.044742509639114185,
      "grad_norm": 2.039834499359131,
      "learning_rate": 9.975817557639215e-06,
      "loss": 0.0915,
      "step": 27340
    },
    {
      "epoch": 0.04477524007776752,
      "grad_norm": 35.48830795288086,
      "learning_rate": 9.975751665425696e-06,
      "loss": 0.1491,
      "step": 27360
    },
    {
      "epoch": 0.04480797051642086,
      "grad_norm": 9.458203315734863,
      "learning_rate": 9.97568577321218e-06,
      "loss": 0.0947,
      "step": 27380
    },
    {
      "epoch": 0.0448407009550742,
      "grad_norm": 7.268515586853027,
      "learning_rate": 9.975619880998664e-06,
      "loss": 0.1387,
      "step": 27400
    },
    {
      "epoch": 0.04487343139372754,
      "grad_norm": 3.9507012367248535,
      "learning_rate": 9.975553988785146e-06,
      "loss": 0.106,
      "step": 27420
    },
    {
      "epoch": 0.04490616183238088,
      "grad_norm": 2.5886528491973877,
      "learning_rate": 9.975488096571629e-06,
      "loss": 0.1135,
      "step": 27440
    },
    {
      "epoch": 0.04493889227103422,
      "grad_norm": 3.946699380874634,
      "learning_rate": 9.975422204358111e-06,
      "loss": 0.1024,
      "step": 27460
    },
    {
      "epoch": 0.044971622709687555,
      "grad_norm": 5.046215534210205,
      "learning_rate": 9.975356312144595e-06,
      "loss": 0.1226,
      "step": 27480
    },
    {
      "epoch": 0.04500435314834089,
      "grad_norm": 21.509140014648438,
      "learning_rate": 9.975290419931078e-06,
      "loss": 0.1003,
      "step": 27500
    },
    {
      "epoch": 0.04503708358699423,
      "grad_norm": 4.85641622543335,
      "learning_rate": 9.97522452771756e-06,
      "loss": 0.1264,
      "step": 27520
    },
    {
      "epoch": 0.045069814025647574,
      "grad_norm": 4.854999542236328,
      "learning_rate": 9.975158635504044e-06,
      "loss": 0.1023,
      "step": 27540
    },
    {
      "epoch": 0.04510254446430091,
      "grad_norm": 5.07082986831665,
      "learning_rate": 9.975092743290527e-06,
      "loss": 0.1083,
      "step": 27560
    },
    {
      "epoch": 0.04513527490295425,
      "grad_norm": 9.002791404724121,
      "learning_rate": 9.97502685107701e-06,
      "loss": 0.111,
      "step": 27580
    },
    {
      "epoch": 0.04516800534160759,
      "grad_norm": 3.8830504417419434,
      "learning_rate": 9.974960958863493e-06,
      "loss": 0.1109,
      "step": 27600
    },
    {
      "epoch": 0.045200735780260924,
      "grad_norm": 6.2538628578186035,
      "learning_rate": 9.974895066649975e-06,
      "loss": 0.1072,
      "step": 27620
    },
    {
      "epoch": 0.04523346621891427,
      "grad_norm": 6.152276039123535,
      "learning_rate": 9.974829174436458e-06,
      "loss": 0.1089,
      "step": 27640
    },
    {
      "epoch": 0.045266196657567606,
      "grad_norm": 6.128051280975342,
      "learning_rate": 9.97476328222294e-06,
      "loss": 0.0915,
      "step": 27660
    },
    {
      "epoch": 0.045298927096220944,
      "grad_norm": 2.089028835296631,
      "learning_rate": 9.974697390009424e-06,
      "loss": 0.1153,
      "step": 27680
    },
    {
      "epoch": 0.04533165753487428,
      "grad_norm": 4.960931777954102,
      "learning_rate": 9.974631497795906e-06,
      "loss": 0.1176,
      "step": 27700
    },
    {
      "epoch": 0.04536438797352762,
      "grad_norm": 2.3593802452087402,
      "learning_rate": 9.97456560558239e-06,
      "loss": 0.1055,
      "step": 27720
    },
    {
      "epoch": 0.04539711841218096,
      "grad_norm": 7.571661472320557,
      "learning_rate": 9.974499713368871e-06,
      "loss": 0.1275,
      "step": 27740
    },
    {
      "epoch": 0.0454298488508343,
      "grad_norm": 4.757933139801025,
      "learning_rate": 9.974433821155355e-06,
      "loss": 0.1064,
      "step": 27760
    },
    {
      "epoch": 0.04546257928948764,
      "grad_norm": 11.210277557373047,
      "learning_rate": 9.974367928941838e-06,
      "loss": 0.1107,
      "step": 27780
    },
    {
      "epoch": 0.045495309728140976,
      "grad_norm": 7.742449760437012,
      "learning_rate": 9.97430203672832e-06,
      "loss": 0.1356,
      "step": 27800
    },
    {
      "epoch": 0.04552804016679431,
      "grad_norm": 5.040927886962891,
      "learning_rate": 9.974236144514804e-06,
      "loss": 0.1109,
      "step": 27820
    },
    {
      "epoch": 0.04556077060544765,
      "grad_norm": 2.0855612754821777,
      "learning_rate": 9.974170252301286e-06,
      "loss": 0.0961,
      "step": 27840
    },
    {
      "epoch": 0.045593501044100995,
      "grad_norm": 3.7648115158081055,
      "learning_rate": 9.97410436008777e-06,
      "loss": 0.1101,
      "step": 27860
    },
    {
      "epoch": 0.04562623148275433,
      "grad_norm": 6.24547004699707,
      "learning_rate": 9.974038467874251e-06,
      "loss": 0.1107,
      "step": 27880
    },
    {
      "epoch": 0.04565896192140767,
      "grad_norm": 5.112504005432129,
      "learning_rate": 9.973972575660735e-06,
      "loss": 0.1167,
      "step": 27900
    },
    {
      "epoch": 0.04569169236006101,
      "grad_norm": 5.76184606552124,
      "learning_rate": 9.973906683447218e-06,
      "loss": 0.156,
      "step": 27920
    },
    {
      "epoch": 0.045724422798714345,
      "grad_norm": 1.6138229370117188,
      "learning_rate": 9.9738407912337e-06,
      "loss": 0.1136,
      "step": 27940
    },
    {
      "epoch": 0.04575715323736769,
      "grad_norm": 3.200138568878174,
      "learning_rate": 9.973774899020184e-06,
      "loss": 0.1093,
      "step": 27960
    },
    {
      "epoch": 0.04578988367602103,
      "grad_norm": 8.08151912689209,
      "learning_rate": 9.973709006806667e-06,
      "loss": 0.1023,
      "step": 27980
    },
    {
      "epoch": 0.045822614114674365,
      "grad_norm": 7.432888984680176,
      "learning_rate": 9.97364311459315e-06,
      "loss": 0.1288,
      "step": 28000
    },
    {
      "epoch": 0.0458553445533277,
      "grad_norm": 11.465738296508789,
      "learning_rate": 9.973577222379633e-06,
      "loss": 0.1206,
      "step": 28020
    },
    {
      "epoch": 0.04588807499198104,
      "grad_norm": 9.631192207336426,
      "learning_rate": 9.973511330166115e-06,
      "loss": 0.1181,
      "step": 28040
    },
    {
      "epoch": 0.045920805430634384,
      "grad_norm": 7.590926647186279,
      "learning_rate": 9.973445437952598e-06,
      "loss": 0.0875,
      "step": 28060
    },
    {
      "epoch": 0.04595353586928772,
      "grad_norm": 8.903188705444336,
      "learning_rate": 9.97337954573908e-06,
      "loss": 0.1084,
      "step": 28080
    },
    {
      "epoch": 0.04598626630794106,
      "grad_norm": 4.306631565093994,
      "learning_rate": 9.973313653525564e-06,
      "loss": 0.1027,
      "step": 28100
    },
    {
      "epoch": 0.0460189967465944,
      "grad_norm": 3.6251442432403564,
      "learning_rate": 9.973247761312047e-06,
      "loss": 0.1079,
      "step": 28120
    },
    {
      "epoch": 0.046051727185247734,
      "grad_norm": 6.819687366485596,
      "learning_rate": 9.97318186909853e-06,
      "loss": 0.1229,
      "step": 28140
    },
    {
      "epoch": 0.04608445762390108,
      "grad_norm": 4.19286584854126,
      "learning_rate": 9.973115976885013e-06,
      "loss": 0.125,
      "step": 28160
    },
    {
      "epoch": 0.046117188062554416,
      "grad_norm": 8.069479942321777,
      "learning_rate": 9.973050084671495e-06,
      "loss": 0.1149,
      "step": 28180
    },
    {
      "epoch": 0.046149918501207754,
      "grad_norm": 3.6654772758483887,
      "learning_rate": 9.972984192457978e-06,
      "loss": 0.1189,
      "step": 28200
    },
    {
      "epoch": 0.04618264893986109,
      "grad_norm": 1.5150068998336792,
      "learning_rate": 9.97291830024446e-06,
      "loss": 0.1023,
      "step": 28220
    },
    {
      "epoch": 0.04621537937851443,
      "grad_norm": 6.4413862228393555,
      "learning_rate": 9.972852408030944e-06,
      "loss": 0.115,
      "step": 28240
    },
    {
      "epoch": 0.04624810981716777,
      "grad_norm": 9.676337242126465,
      "learning_rate": 9.972786515817426e-06,
      "loss": 0.1251,
      "step": 28260
    },
    {
      "epoch": 0.04628084025582111,
      "grad_norm": 6.527968406677246,
      "learning_rate": 9.97272062360391e-06,
      "loss": 0.1096,
      "step": 28280
    },
    {
      "epoch": 0.04631357069447445,
      "grad_norm": 10.42522144317627,
      "learning_rate": 9.972654731390393e-06,
      "loss": 0.1457,
      "step": 28300
    },
    {
      "epoch": 0.046346301133127786,
      "grad_norm": 2.820821762084961,
      "learning_rate": 9.972588839176875e-06,
      "loss": 0.1041,
      "step": 28320
    },
    {
      "epoch": 0.046379031571781124,
      "grad_norm": 4.4641499519348145,
      "learning_rate": 9.972522946963358e-06,
      "loss": 0.1064,
      "step": 28340
    },
    {
      "epoch": 0.04641176201043446,
      "grad_norm": 6.341438293457031,
      "learning_rate": 9.972457054749842e-06,
      "loss": 0.1005,
      "step": 28360
    },
    {
      "epoch": 0.046444492449087806,
      "grad_norm": 3.483017683029175,
      "learning_rate": 9.972391162536324e-06,
      "loss": 0.1269,
      "step": 28380
    },
    {
      "epoch": 0.04647722288774114,
      "grad_norm": 4.074943542480469,
      "learning_rate": 9.972325270322807e-06,
      "loss": 0.1088,
      "step": 28400
    },
    {
      "epoch": 0.04650995332639448,
      "grad_norm": 10.356833457946777,
      "learning_rate": 9.97225937810929e-06,
      "loss": 0.1217,
      "step": 28420
    },
    {
      "epoch": 0.04654268376504782,
      "grad_norm": 7.4701972007751465,
      "learning_rate": 9.972193485895773e-06,
      "loss": 0.0993,
      "step": 28440
    },
    {
      "epoch": 0.046575414203701156,
      "grad_norm": 6.265692710876465,
      "learning_rate": 9.972127593682256e-06,
      "loss": 0.1047,
      "step": 28460
    },
    {
      "epoch": 0.0466081446423545,
      "grad_norm": 2.9130115509033203,
      "learning_rate": 9.972061701468738e-06,
      "loss": 0.1018,
      "step": 28480
    },
    {
      "epoch": 0.04664087508100784,
      "grad_norm": 5.987173557281494,
      "learning_rate": 9.971995809255222e-06,
      "loss": 0.0922,
      "step": 28500
    },
    {
      "epoch": 0.046673605519661175,
      "grad_norm": 4.9118852615356445,
      "learning_rate": 9.971929917041704e-06,
      "loss": 0.1177,
      "step": 28520
    },
    {
      "epoch": 0.04670633595831451,
      "grad_norm": 9.366340637207031,
      "learning_rate": 9.971864024828187e-06,
      "loss": 0.1109,
      "step": 28540
    },
    {
      "epoch": 0.04673906639696785,
      "grad_norm": 5.527575969696045,
      "learning_rate": 9.97179813261467e-06,
      "loss": 0.1334,
      "step": 28560
    },
    {
      "epoch": 0.04677179683562119,
      "grad_norm": 2.791210889816284,
      "learning_rate": 9.971732240401153e-06,
      "loss": 0.0924,
      "step": 28580
    },
    {
      "epoch": 0.04680452727427453,
      "grad_norm": 1.5366835594177246,
      "learning_rate": 9.971666348187635e-06,
      "loss": 0.1302,
      "step": 28600
    },
    {
      "epoch": 0.04683725771292787,
      "grad_norm": 2.338879108428955,
      "learning_rate": 9.971600455974118e-06,
      "loss": 0.1261,
      "step": 28620
    },
    {
      "epoch": 0.04686998815158121,
      "grad_norm": 6.489835739135742,
      "learning_rate": 9.9715345637606e-06,
      "loss": 0.1193,
      "step": 28640
    },
    {
      "epoch": 0.046902718590234545,
      "grad_norm": 4.440120697021484,
      "learning_rate": 9.971468671547084e-06,
      "loss": 0.1266,
      "step": 28660
    },
    {
      "epoch": 0.04693544902888788,
      "grad_norm": 6.294658660888672,
      "learning_rate": 9.971402779333566e-06,
      "loss": 0.1177,
      "step": 28680
    },
    {
      "epoch": 0.04696817946754123,
      "grad_norm": 5.29378604888916,
      "learning_rate": 9.97133688712005e-06,
      "loss": 0.1175,
      "step": 28700
    },
    {
      "epoch": 0.047000909906194564,
      "grad_norm": 4.531035900115967,
      "learning_rate": 9.971270994906533e-06,
      "loss": 0.1034,
      "step": 28720
    },
    {
      "epoch": 0.0470336403448479,
      "grad_norm": 4.67353630065918,
      "learning_rate": 9.971205102693015e-06,
      "loss": 0.121,
      "step": 28740
    },
    {
      "epoch": 0.04706637078350124,
      "grad_norm": 8.74715518951416,
      "learning_rate": 9.971139210479498e-06,
      "loss": 0.0998,
      "step": 28760
    },
    {
      "epoch": 0.04709910122215458,
      "grad_norm": 14.214385032653809,
      "learning_rate": 9.971073318265982e-06,
      "loss": 0.1112,
      "step": 28780
    },
    {
      "epoch": 0.04713183166080792,
      "grad_norm": 12.743973731994629,
      "learning_rate": 9.971007426052464e-06,
      "loss": 0.1214,
      "step": 28800
    },
    {
      "epoch": 0.04716456209946126,
      "grad_norm": 3.7906930446624756,
      "learning_rate": 9.970941533838947e-06,
      "loss": 0.1115,
      "step": 28820
    },
    {
      "epoch": 0.047197292538114596,
      "grad_norm": 6.580860614776611,
      "learning_rate": 9.970875641625431e-06,
      "loss": 0.1013,
      "step": 28840
    },
    {
      "epoch": 0.047230022976767934,
      "grad_norm": 2.685469388961792,
      "learning_rate": 9.970809749411913e-06,
      "loss": 0.1051,
      "step": 28860
    },
    {
      "epoch": 0.04726275341542127,
      "grad_norm": 7.486483097076416,
      "learning_rate": 9.970743857198397e-06,
      "loss": 0.1021,
      "step": 28880
    },
    {
      "epoch": 0.04729548385407461,
      "grad_norm": 16.267383575439453,
      "learning_rate": 9.970677964984878e-06,
      "loss": 0.1066,
      "step": 28900
    },
    {
      "epoch": 0.04732821429272795,
      "grad_norm": 2.251573324203491,
      "learning_rate": 9.970612072771362e-06,
      "loss": 0.0862,
      "step": 28920
    },
    {
      "epoch": 0.04736094473138129,
      "grad_norm": 9.93088150024414,
      "learning_rate": 9.970546180557844e-06,
      "loss": 0.121,
      "step": 28940
    },
    {
      "epoch": 0.04739367517003463,
      "grad_norm": 6.835384368896484,
      "learning_rate": 9.970480288344328e-06,
      "loss": 0.1042,
      "step": 28960
    },
    {
      "epoch": 0.047426405608687966,
      "grad_norm": 4.954787254333496,
      "learning_rate": 9.97041439613081e-06,
      "loss": 0.1168,
      "step": 28980
    },
    {
      "epoch": 0.047459136047341303,
      "grad_norm": 5.2623796463012695,
      "learning_rate": 9.970348503917293e-06,
      "loss": 0.1138,
      "step": 29000
    },
    {
      "epoch": 0.04749186648599465,
      "grad_norm": 16.61541748046875,
      "learning_rate": 9.970282611703775e-06,
      "loss": 0.1251,
      "step": 29020
    },
    {
      "epoch": 0.047524596924647985,
      "grad_norm": 2.603572368621826,
      "learning_rate": 9.970216719490258e-06,
      "loss": 0.1192,
      "step": 29040
    },
    {
      "epoch": 0.04755732736330132,
      "grad_norm": 3.9634971618652344,
      "learning_rate": 9.97015082727674e-06,
      "loss": 0.0903,
      "step": 29060
    },
    {
      "epoch": 0.04759005780195466,
      "grad_norm": 13.551704406738281,
      "learning_rate": 9.970084935063224e-06,
      "loss": 0.1373,
      "step": 29080
    },
    {
      "epoch": 0.047622788240608,
      "grad_norm": 2.8928399085998535,
      "learning_rate": 9.970019042849708e-06,
      "loss": 0.1093,
      "step": 29100
    },
    {
      "epoch": 0.04765551867926134,
      "grad_norm": 3.36847186088562,
      "learning_rate": 9.96995315063619e-06,
      "loss": 0.1239,
      "step": 29120
    },
    {
      "epoch": 0.04768824911791468,
      "grad_norm": 3.1544058322906494,
      "learning_rate": 9.969887258422673e-06,
      "loss": 0.0917,
      "step": 29140
    },
    {
      "epoch": 0.04772097955656802,
      "grad_norm": 8.973612785339355,
      "learning_rate": 9.969821366209157e-06,
      "loss": 0.103,
      "step": 29160
    },
    {
      "epoch": 0.047753709995221355,
      "grad_norm": 3.1668081283569336,
      "learning_rate": 9.969755473995639e-06,
      "loss": 0.1352,
      "step": 29180
    },
    {
      "epoch": 0.04778644043387469,
      "grad_norm": 4.02304220199585,
      "learning_rate": 9.969689581782122e-06,
      "loss": 0.1092,
      "step": 29200
    },
    {
      "epoch": 0.04781917087252804,
      "grad_norm": 4.412359237670898,
      "learning_rate": 9.969623689568606e-06,
      "loss": 0.1008,
      "step": 29220
    },
    {
      "epoch": 0.047851901311181375,
      "grad_norm": 9.531179428100586,
      "learning_rate": 9.969557797355088e-06,
      "loss": 0.1348,
      "step": 29240
    },
    {
      "epoch": 0.04788463174983471,
      "grad_norm": 3.9463486671447754,
      "learning_rate": 9.969491905141571e-06,
      "loss": 0.108,
      "step": 29260
    },
    {
      "epoch": 0.04791736218848805,
      "grad_norm": 5.439239025115967,
      "learning_rate": 9.969426012928053e-06,
      "loss": 0.1164,
      "step": 29280
    },
    {
      "epoch": 0.04795009262714139,
      "grad_norm": 6.498740196228027,
      "learning_rate": 9.969360120714537e-06,
      "loss": 0.1189,
      "step": 29300
    },
    {
      "epoch": 0.047982823065794725,
      "grad_norm": 4.762739658355713,
      "learning_rate": 9.969294228501019e-06,
      "loss": 0.1009,
      "step": 29320
    },
    {
      "epoch": 0.04801555350444807,
      "grad_norm": 8.017991065979004,
      "learning_rate": 9.969228336287502e-06,
      "loss": 0.0972,
      "step": 29340
    },
    {
      "epoch": 0.04804828394310141,
      "grad_norm": 3.3213396072387695,
      "learning_rate": 9.969162444073984e-06,
      "loss": 0.0793,
      "step": 29360
    },
    {
      "epoch": 0.048081014381754744,
      "grad_norm": 6.151182174682617,
      "learning_rate": 9.969096551860468e-06,
      "loss": 0.1256,
      "step": 29380
    },
    {
      "epoch": 0.04811374482040808,
      "grad_norm": 7.468193531036377,
      "learning_rate": 9.96903065964695e-06,
      "loss": 0.1273,
      "step": 29400
    },
    {
      "epoch": 0.04814647525906142,
      "grad_norm": 5.27855920791626,
      "learning_rate": 9.968964767433433e-06,
      "loss": 0.1308,
      "step": 29420
    },
    {
      "epoch": 0.048179205697714764,
      "grad_norm": 7.590419769287109,
      "learning_rate": 9.968898875219915e-06,
      "loss": 0.1219,
      "step": 29440
    },
    {
      "epoch": 0.0482119361363681,
      "grad_norm": 3.5693466663360596,
      "learning_rate": 9.968832983006399e-06,
      "loss": 0.1034,
      "step": 29460
    },
    {
      "epoch": 0.04824466657502144,
      "grad_norm": 4.240338325500488,
      "learning_rate": 9.968767090792882e-06,
      "loss": 0.1109,
      "step": 29480
    },
    {
      "epoch": 0.048277397013674776,
      "grad_norm": 6.737329483032227,
      "learning_rate": 9.968701198579364e-06,
      "loss": 0.1192,
      "step": 29500
    },
    {
      "epoch": 0.048310127452328114,
      "grad_norm": 15.185169219970703,
      "learning_rate": 9.968635306365848e-06,
      "loss": 0.0994,
      "step": 29520
    },
    {
      "epoch": 0.04834285789098146,
      "grad_norm": 9.138763427734375,
      "learning_rate": 9.968569414152331e-06,
      "loss": 0.1048,
      "step": 29540
    },
    {
      "epoch": 0.048375588329634796,
      "grad_norm": 5.08315896987915,
      "learning_rate": 9.968503521938813e-06,
      "loss": 0.1152,
      "step": 29560
    },
    {
      "epoch": 0.04840831876828813,
      "grad_norm": 1.815224289894104,
      "learning_rate": 9.968437629725297e-06,
      "loss": 0.095,
      "step": 29580
    },
    {
      "epoch": 0.04844104920694147,
      "grad_norm": 2.274041175842285,
      "learning_rate": 9.96837173751178e-06,
      "loss": 0.1029,
      "step": 29600
    },
    {
      "epoch": 0.04847377964559481,
      "grad_norm": 5.200857162475586,
      "learning_rate": 9.968305845298262e-06,
      "loss": 0.1183,
      "step": 29620
    },
    {
      "epoch": 0.048506510084248146,
      "grad_norm": 6.050721645355225,
      "learning_rate": 9.968239953084746e-06,
      "loss": 0.1163,
      "step": 29640
    },
    {
      "epoch": 0.04853924052290149,
      "grad_norm": 3.1150190830230713,
      "learning_rate": 9.968174060871228e-06,
      "loss": 0.1011,
      "step": 29660
    },
    {
      "epoch": 0.04857197096155483,
      "grad_norm": 10.819991111755371,
      "learning_rate": 9.968108168657711e-06,
      "loss": 0.1241,
      "step": 29680
    },
    {
      "epoch": 0.048604701400208165,
      "grad_norm": 4.12315559387207,
      "learning_rate": 9.968042276444193e-06,
      "loss": 0.1071,
      "step": 29700
    },
    {
      "epoch": 0.0486374318388615,
      "grad_norm": 4.083725929260254,
      "learning_rate": 9.967976384230677e-06,
      "loss": 0.1171,
      "step": 29720
    },
    {
      "epoch": 0.04867016227751484,
      "grad_norm": 1.31851327419281,
      "learning_rate": 9.967910492017159e-06,
      "loss": 0.1067,
      "step": 29740
    },
    {
      "epoch": 0.048702892716168185,
      "grad_norm": 5.660862445831299,
      "learning_rate": 9.967844599803642e-06,
      "loss": 0.1098,
      "step": 29760
    },
    {
      "epoch": 0.04873562315482152,
      "grad_norm": 6.289471626281738,
      "learning_rate": 9.967778707590124e-06,
      "loss": 0.0992,
      "step": 29780
    },
    {
      "epoch": 0.04876835359347486,
      "grad_norm": 11.108476638793945,
      "learning_rate": 9.967712815376608e-06,
      "loss": 0.1309,
      "step": 29800
    },
    {
      "epoch": 0.0488010840321282,
      "grad_norm": 21.670166015625,
      "learning_rate": 9.96764692316309e-06,
      "loss": 0.13,
      "step": 29820
    },
    {
      "epoch": 0.048833814470781535,
      "grad_norm": 6.975784778594971,
      "learning_rate": 9.967581030949573e-06,
      "loss": 0.1115,
      "step": 29840
    },
    {
      "epoch": 0.04886654490943488,
      "grad_norm": 5.267386436462402,
      "learning_rate": 9.967515138736057e-06,
      "loss": 0.1384,
      "step": 29860
    },
    {
      "epoch": 0.04889927534808822,
      "grad_norm": 3.9899682998657227,
      "learning_rate": 9.967449246522539e-06,
      "loss": 0.1048,
      "step": 29880
    },
    {
      "epoch": 0.048932005786741554,
      "grad_norm": 8.592329025268555,
      "learning_rate": 9.967383354309022e-06,
      "loss": 0.1148,
      "step": 29900
    },
    {
      "epoch": 0.04896473622539489,
      "grad_norm": 5.735074043273926,
      "learning_rate": 9.967317462095504e-06,
      "loss": 0.1148,
      "step": 29920
    },
    {
      "epoch": 0.04899746666404823,
      "grad_norm": 8.289730072021484,
      "learning_rate": 9.967251569881988e-06,
      "loss": 0.1513,
      "step": 29940
    },
    {
      "epoch": 0.04903019710270157,
      "grad_norm": 22.038284301757812,
      "learning_rate": 9.967185677668471e-06,
      "loss": 0.1127,
      "step": 29960
    },
    {
      "epoch": 0.04906292754135491,
      "grad_norm": 7.269028663635254,
      "learning_rate": 9.967119785454953e-06,
      "loss": 0.1079,
      "step": 29980
    },
    {
      "epoch": 0.04909565798000825,
      "grad_norm": 10.390848159790039,
      "learning_rate": 9.967053893241437e-06,
      "loss": 0.114,
      "step": 30000
    },
    {
      "epoch": 0.04912838841866159,
      "grad_norm": 4.6610212326049805,
      "learning_rate": 9.96698800102792e-06,
      "loss": 0.1193,
      "step": 30020
    },
    {
      "epoch": 0.049161118857314924,
      "grad_norm": 10.308993339538574,
      "learning_rate": 9.966922108814402e-06,
      "loss": 0.0895,
      "step": 30040
    },
    {
      "epoch": 0.04919384929596826,
      "grad_norm": 5.393836975097656,
      "learning_rate": 9.966856216600886e-06,
      "loss": 0.1141,
      "step": 30060
    },
    {
      "epoch": 0.049226579734621606,
      "grad_norm": 9.904547691345215,
      "learning_rate": 9.966790324387368e-06,
      "loss": 0.1234,
      "step": 30080
    },
    {
      "epoch": 0.049259310173274944,
      "grad_norm": 4.20170259475708,
      "learning_rate": 9.966724432173851e-06,
      "loss": 0.1087,
      "step": 30100
    },
    {
      "epoch": 0.04929204061192828,
      "grad_norm": 2.427074432373047,
      "learning_rate": 9.966658539960333e-06,
      "loss": 0.107,
      "step": 30120
    },
    {
      "epoch": 0.04932477105058162,
      "grad_norm": 1.619802474975586,
      "learning_rate": 9.966592647746817e-06,
      "loss": 0.1264,
      "step": 30140
    },
    {
      "epoch": 0.049357501489234956,
      "grad_norm": 7.567395210266113,
      "learning_rate": 9.966526755533299e-06,
      "loss": 0.0967,
      "step": 30160
    },
    {
      "epoch": 0.0493902319278883,
      "grad_norm": 5.9151506423950195,
      "learning_rate": 9.966460863319782e-06,
      "loss": 0.1158,
      "step": 30180
    },
    {
      "epoch": 0.04942296236654164,
      "grad_norm": 12.563018798828125,
      "learning_rate": 9.966394971106264e-06,
      "loss": 0.1097,
      "step": 30200
    },
    {
      "epoch": 0.049455692805194976,
      "grad_norm": 4.0431227684021,
      "learning_rate": 9.966329078892748e-06,
      "loss": 0.0979,
      "step": 30220
    },
    {
      "epoch": 0.04948842324384831,
      "grad_norm": 7.060274124145508,
      "learning_rate": 9.966263186679231e-06,
      "loss": 0.1064,
      "step": 30240
    },
    {
      "epoch": 0.04952115368250165,
      "grad_norm": 4.435366153717041,
      "learning_rate": 9.966197294465713e-06,
      "loss": 0.1301,
      "step": 30260
    },
    {
      "epoch": 0.04955388412115499,
      "grad_norm": 5.100098609924316,
      "learning_rate": 9.966131402252197e-06,
      "loss": 0.1022,
      "step": 30280
    },
    {
      "epoch": 0.04958661455980833,
      "grad_norm": 6.150872707366943,
      "learning_rate": 9.966065510038679e-06,
      "loss": 0.1105,
      "step": 30300
    },
    {
      "epoch": 0.04961934499846167,
      "grad_norm": 2.422039270401001,
      "learning_rate": 9.965999617825162e-06,
      "loss": 0.0945,
      "step": 30320
    },
    {
      "epoch": 0.04965207543711501,
      "grad_norm": 20.661287307739258,
      "learning_rate": 9.965933725611646e-06,
      "loss": 0.1184,
      "step": 30340
    },
    {
      "epoch": 0.049684805875768345,
      "grad_norm": 7.7633514404296875,
      "learning_rate": 9.965867833398128e-06,
      "loss": 0.1074,
      "step": 30360
    },
    {
      "epoch": 0.04971753631442168,
      "grad_norm": 3.8163487911224365,
      "learning_rate": 9.965801941184611e-06,
      "loss": 0.1157,
      "step": 30380
    },
    {
      "epoch": 0.04975026675307503,
      "grad_norm": 1.6903438568115234,
      "learning_rate": 9.965736048971095e-06,
      "loss": 0.1087,
      "step": 30400
    },
    {
      "epoch": 0.049782997191728365,
      "grad_norm": 3.413769006729126,
      "learning_rate": 9.965670156757577e-06,
      "loss": 0.0904,
      "step": 30420
    },
    {
      "epoch": 0.0498157276303817,
      "grad_norm": 2.638054847717285,
      "learning_rate": 9.96560426454406e-06,
      "loss": 0.1036,
      "step": 30440
    },
    {
      "epoch": 0.04984845806903504,
      "grad_norm": 2.9333388805389404,
      "learning_rate": 9.965538372330542e-06,
      "loss": 0.0835,
      "step": 30460
    },
    {
      "epoch": 0.04988118850768838,
      "grad_norm": 7.759299278259277,
      "learning_rate": 9.965472480117026e-06,
      "loss": 0.1109,
      "step": 30480
    },
    {
      "epoch": 0.04991391894634172,
      "grad_norm": 3.330723524093628,
      "learning_rate": 9.965406587903508e-06,
      "loss": 0.1276,
      "step": 30500
    },
    {
      "epoch": 0.04994664938499506,
      "grad_norm": 2.9354283809661865,
      "learning_rate": 9.965340695689991e-06,
      "loss": 0.1082,
      "step": 30520
    },
    {
      "epoch": 0.0499793798236484,
      "grad_norm": 2.7772703170776367,
      "learning_rate": 9.965274803476473e-06,
      "loss": 0.1068,
      "step": 30540
    },
    {
      "epoch": 0.050012110262301734,
      "grad_norm": 10.98317813873291,
      "learning_rate": 9.965208911262957e-06,
      "loss": 0.1087,
      "step": 30560
    },
    {
      "epoch": 0.05004484070095507,
      "grad_norm": 8.79317855834961,
      "learning_rate": 9.96514301904944e-06,
      "loss": 0.1107,
      "step": 30580
    },
    {
      "epoch": 0.050077571139608416,
      "grad_norm": 5.664209842681885,
      "learning_rate": 9.965077126835922e-06,
      "loss": 0.093,
      "step": 30600
    },
    {
      "epoch": 0.050110301578261754,
      "grad_norm": 5.876160621643066,
      "learning_rate": 9.965011234622406e-06,
      "loss": 0.1285,
      "step": 30620
    },
    {
      "epoch": 0.05014303201691509,
      "grad_norm": 3.739854574203491,
      "learning_rate": 9.964945342408888e-06,
      "loss": 0.122,
      "step": 30640
    },
    {
      "epoch": 0.05017576245556843,
      "grad_norm": 6.157211780548096,
      "learning_rate": 9.964879450195371e-06,
      "loss": 0.1236,
      "step": 30660
    },
    {
      "epoch": 0.050208492894221766,
      "grad_norm": 2.9947426319122314,
      "learning_rate": 9.964813557981853e-06,
      "loss": 0.0897,
      "step": 30680
    },
    {
      "epoch": 0.050241223332875104,
      "grad_norm": 3.9403746128082275,
      "learning_rate": 9.964747665768337e-06,
      "loss": 0.0986,
      "step": 30700
    },
    {
      "epoch": 0.05027395377152845,
      "grad_norm": 6.227378845214844,
      "learning_rate": 9.964681773554819e-06,
      "loss": 0.1243,
      "step": 30720
    },
    {
      "epoch": 0.050306684210181786,
      "grad_norm": 4.715326309204102,
      "learning_rate": 9.964615881341302e-06,
      "loss": 0.1088,
      "step": 30740
    },
    {
      "epoch": 0.050339414648835124,
      "grad_norm": 4.223259449005127,
      "learning_rate": 9.964549989127786e-06,
      "loss": 0.1299,
      "step": 30760
    },
    {
      "epoch": 0.05037214508748846,
      "grad_norm": 6.141721248626709,
      "learning_rate": 9.964484096914268e-06,
      "loss": 0.0989,
      "step": 30780
    },
    {
      "epoch": 0.0504048755261418,
      "grad_norm": 4.334139347076416,
      "learning_rate": 9.964418204700751e-06,
      "loss": 0.0983,
      "step": 30800
    },
    {
      "epoch": 0.05043760596479514,
      "grad_norm": 6.234640121459961,
      "learning_rate": 9.964352312487235e-06,
      "loss": 0.1316,
      "step": 30820
    },
    {
      "epoch": 0.05047033640344848,
      "grad_norm": 6.453659534454346,
      "learning_rate": 9.964286420273717e-06,
      "loss": 0.1205,
      "step": 30840
    },
    {
      "epoch": 0.05050306684210182,
      "grad_norm": 3.532181978225708,
      "learning_rate": 9.9642205280602e-06,
      "loss": 0.1099,
      "step": 30860
    },
    {
      "epoch": 0.050535797280755156,
      "grad_norm": 4.014083385467529,
      "learning_rate": 9.964154635846682e-06,
      "loss": 0.1175,
      "step": 30880
    },
    {
      "epoch": 0.05056852771940849,
      "grad_norm": 6.05753755569458,
      "learning_rate": 9.964088743633166e-06,
      "loss": 0.1085,
      "step": 30900
    },
    {
      "epoch": 0.05060125815806184,
      "grad_norm": 5.967837810516357,
      "learning_rate": 9.96402285141965e-06,
      "loss": 0.0895,
      "step": 30920
    },
    {
      "epoch": 0.050633988596715175,
      "grad_norm": 3.2152278423309326,
      "learning_rate": 9.963956959206131e-06,
      "loss": 0.0979,
      "step": 30940
    },
    {
      "epoch": 0.05066671903536851,
      "grad_norm": 4.2375969886779785,
      "learning_rate": 9.963891066992615e-06,
      "loss": 0.1018,
      "step": 30960
    },
    {
      "epoch": 0.05069944947402185,
      "grad_norm": 16.236705780029297,
      "learning_rate": 9.963825174779097e-06,
      "loss": 0.1165,
      "step": 30980
    },
    {
      "epoch": 0.05073217991267519,
      "grad_norm": 7.904604434967041,
      "learning_rate": 9.96375928256558e-06,
      "loss": 0.1083,
      "step": 31000
    },
    {
      "epoch": 0.050764910351328525,
      "grad_norm": 4.383127689361572,
      "learning_rate": 9.963693390352062e-06,
      "loss": 0.1077,
      "step": 31020
    },
    {
      "epoch": 0.05079764078998187,
      "grad_norm": 56.67508316040039,
      "learning_rate": 9.963627498138546e-06,
      "loss": 0.103,
      "step": 31040
    },
    {
      "epoch": 0.05083037122863521,
      "grad_norm": 4.107171058654785,
      "learning_rate": 9.963561605925028e-06,
      "loss": 0.1188,
      "step": 31060
    },
    {
      "epoch": 0.050863101667288545,
      "grad_norm": 4.092708110809326,
      "learning_rate": 9.963495713711511e-06,
      "loss": 0.1082,
      "step": 31080
    },
    {
      "epoch": 0.05089583210594188,
      "grad_norm": 7.970059871673584,
      "learning_rate": 9.963429821497993e-06,
      "loss": 0.1047,
      "step": 31100
    },
    {
      "epoch": 0.05092856254459522,
      "grad_norm": 8.230907440185547,
      "learning_rate": 9.963363929284477e-06,
      "loss": 0.1082,
      "step": 31120
    },
    {
      "epoch": 0.050961292983248564,
      "grad_norm": 7.107726573944092,
      "learning_rate": 9.96329803707096e-06,
      "loss": 0.1239,
      "step": 31140
    },
    {
      "epoch": 0.0509940234219019,
      "grad_norm": 1.6508642435073853,
      "learning_rate": 9.963232144857442e-06,
      "loss": 0.1101,
      "step": 31160
    },
    {
      "epoch": 0.05102675386055524,
      "grad_norm": 12.515149116516113,
      "learning_rate": 9.963166252643926e-06,
      "loss": 0.1093,
      "step": 31180
    },
    {
      "epoch": 0.05105948429920858,
      "grad_norm": 6.629861354827881,
      "learning_rate": 9.96310036043041e-06,
      "loss": 0.0943,
      "step": 31200
    },
    {
      "epoch": 0.051092214737861914,
      "grad_norm": 13.602890968322754,
      "learning_rate": 9.963034468216892e-06,
      "loss": 0.1142,
      "step": 31220
    },
    {
      "epoch": 0.05112494517651526,
      "grad_norm": 6.564298629760742,
      "learning_rate": 9.962968576003375e-06,
      "loss": 0.0987,
      "step": 31240
    },
    {
      "epoch": 0.051157675615168596,
      "grad_norm": 4.110578536987305,
      "learning_rate": 9.962902683789859e-06,
      "loss": 0.1089,
      "step": 31260
    },
    {
      "epoch": 0.051190406053821934,
      "grad_norm": 3.7056331634521484,
      "learning_rate": 9.96283679157634e-06,
      "loss": 0.0861,
      "step": 31280
    },
    {
      "epoch": 0.05122313649247527,
      "grad_norm": 4.413780212402344,
      "learning_rate": 9.962770899362824e-06,
      "loss": 0.1117,
      "step": 31300
    },
    {
      "epoch": 0.05125586693112861,
      "grad_norm": 6.78489351272583,
      "learning_rate": 9.962705007149306e-06,
      "loss": 0.1215,
      "step": 31320
    },
    {
      "epoch": 0.051288597369781946,
      "grad_norm": 6.309818267822266,
      "learning_rate": 9.96263911493579e-06,
      "loss": 0.1114,
      "step": 31340
    },
    {
      "epoch": 0.05132132780843529,
      "grad_norm": 9.659964561462402,
      "learning_rate": 9.962573222722272e-06,
      "loss": 0.1273,
      "step": 31360
    },
    {
      "epoch": 0.05135405824708863,
      "grad_norm": 3.6457722187042236,
      "learning_rate": 9.962507330508755e-06,
      "loss": 0.1008,
      "step": 31380
    },
    {
      "epoch": 0.051386788685741966,
      "grad_norm": 4.748359680175781,
      "learning_rate": 9.962441438295237e-06,
      "loss": 0.0934,
      "step": 31400
    },
    {
      "epoch": 0.0514195191243953,
      "grad_norm": 7.092453479766846,
      "learning_rate": 9.96237554608172e-06,
      "loss": 0.0945,
      "step": 31420
    },
    {
      "epoch": 0.05145224956304864,
      "grad_norm": 3.060256242752075,
      "learning_rate": 9.962309653868202e-06,
      "loss": 0.1098,
      "step": 31440
    },
    {
      "epoch": 0.051484980001701985,
      "grad_norm": 19.58906364440918,
      "learning_rate": 9.962243761654686e-06,
      "loss": 0.0967,
      "step": 31460
    },
    {
      "epoch": 0.05151771044035532,
      "grad_norm": 6.4947896003723145,
      "learning_rate": 9.962177869441168e-06,
      "loss": 0.1081,
      "step": 31480
    },
    {
      "epoch": 0.05155044087900866,
      "grad_norm": 4.807133197784424,
      "learning_rate": 9.962111977227652e-06,
      "loss": 0.0878,
      "step": 31500
    },
    {
      "epoch": 0.051583171317662,
      "grad_norm": 1.60320246219635,
      "learning_rate": 9.962046085014133e-06,
      "loss": 0.1077,
      "step": 31520
    },
    {
      "epoch": 0.051615901756315336,
      "grad_norm": 4.158677101135254,
      "learning_rate": 9.961980192800617e-06,
      "loss": 0.118,
      "step": 31540
    },
    {
      "epoch": 0.05164863219496868,
      "grad_norm": 4.357832908630371,
      "learning_rate": 9.9619143005871e-06,
      "loss": 0.109,
      "step": 31560
    },
    {
      "epoch": 0.05168136263362202,
      "grad_norm": 6.630265235900879,
      "learning_rate": 9.961848408373583e-06,
      "loss": 0.1103,
      "step": 31580
    },
    {
      "epoch": 0.051714093072275355,
      "grad_norm": 4.102973461151123,
      "learning_rate": 9.961782516160066e-06,
      "loss": 0.1259,
      "step": 31600
    },
    {
      "epoch": 0.05174682351092869,
      "grad_norm": 3.7707409858703613,
      "learning_rate": 9.96171662394655e-06,
      "loss": 0.1192,
      "step": 31620
    },
    {
      "epoch": 0.05177955394958203,
      "grad_norm": 7.053915500640869,
      "learning_rate": 9.961650731733033e-06,
      "loss": 0.1084,
      "step": 31640
    },
    {
      "epoch": 0.051812284388235375,
      "grad_norm": 8.203879356384277,
      "learning_rate": 9.961584839519515e-06,
      "loss": 0.1219,
      "step": 31660
    },
    {
      "epoch": 0.05184501482688871,
      "grad_norm": 4.593990802764893,
      "learning_rate": 9.961518947305999e-06,
      "loss": 0.1086,
      "step": 31680
    },
    {
      "epoch": 0.05187774526554205,
      "grad_norm": 3.729405164718628,
      "learning_rate": 9.96145305509248e-06,
      "loss": 0.0973,
      "step": 31700
    },
    {
      "epoch": 0.05191047570419539,
      "grad_norm": 4.520603179931641,
      "learning_rate": 9.961387162878964e-06,
      "loss": 0.0886,
      "step": 31720
    },
    {
      "epoch": 0.051943206142848725,
      "grad_norm": 3.066972017288208,
      "learning_rate": 9.961321270665446e-06,
      "loss": 0.0937,
      "step": 31740
    },
    {
      "epoch": 0.05197593658150206,
      "grad_norm": 13.805850982666016,
      "learning_rate": 9.96125537845193e-06,
      "loss": 0.1225,
      "step": 31760
    },
    {
      "epoch": 0.05200866702015541,
      "grad_norm": 6.8967156410217285,
      "learning_rate": 9.961189486238412e-06,
      "loss": 0.1062,
      "step": 31780
    },
    {
      "epoch": 0.052041397458808744,
      "grad_norm": 3.4823226928710938,
      "learning_rate": 9.961123594024895e-06,
      "loss": 0.1189,
      "step": 31800
    },
    {
      "epoch": 0.05207412789746208,
      "grad_norm": 5.732079982757568,
      "learning_rate": 9.961057701811377e-06,
      "loss": 0.1192,
      "step": 31820
    },
    {
      "epoch": 0.05210685833611542,
      "grad_norm": 3.3840172290802,
      "learning_rate": 9.96099180959786e-06,
      "loss": 0.0913,
      "step": 31840
    },
    {
      "epoch": 0.05213958877476876,
      "grad_norm": 6.120872974395752,
      "learning_rate": 9.960925917384343e-06,
      "loss": 0.0994,
      "step": 31860
    },
    {
      "epoch": 0.0521723192134221,
      "grad_norm": 2.93172550201416,
      "learning_rate": 9.960860025170826e-06,
      "loss": 0.1284,
      "step": 31880
    },
    {
      "epoch": 0.05220504965207544,
      "grad_norm": 4.050378799438477,
      "learning_rate": 9.960794132957308e-06,
      "loss": 0.1102,
      "step": 31900
    },
    {
      "epoch": 0.052237780090728776,
      "grad_norm": 3.9643571376800537,
      "learning_rate": 9.960728240743792e-06,
      "loss": 0.1075,
      "step": 31920
    },
    {
      "epoch": 0.052270510529382114,
      "grad_norm": 6.522019386291504,
      "learning_rate": 9.960662348530275e-06,
      "loss": 0.1066,
      "step": 31940
    },
    {
      "epoch": 0.05230324096803545,
      "grad_norm": 16.045324325561523,
      "learning_rate": 9.960596456316757e-06,
      "loss": 0.1063,
      "step": 31960
    },
    {
      "epoch": 0.052335971406688796,
      "grad_norm": 6.68458890914917,
      "learning_rate": 9.96053056410324e-06,
      "loss": 0.0969,
      "step": 31980
    },
    {
      "epoch": 0.05236870184534213,
      "grad_norm": 9.695175170898438,
      "learning_rate": 9.960464671889724e-06,
      "loss": 0.0964,
      "step": 32000
    },
    {
      "epoch": 0.05240143228399547,
      "grad_norm": 7.533164978027344,
      "learning_rate": 9.960398779676206e-06,
      "loss": 0.1123,
      "step": 32020
    },
    {
      "epoch": 0.05243416272264881,
      "grad_norm": 11.98575496673584,
      "learning_rate": 9.96033288746269e-06,
      "loss": 0.1194,
      "step": 32040
    },
    {
      "epoch": 0.052466893161302146,
      "grad_norm": 31.193805694580078,
      "learning_rate": 9.960266995249173e-06,
      "loss": 0.0988,
      "step": 32060
    },
    {
      "epoch": 0.05249962359995548,
      "grad_norm": 4.910134792327881,
      "learning_rate": 9.960201103035655e-06,
      "loss": 0.1223,
      "step": 32080
    },
    {
      "epoch": 0.05253235403860883,
      "grad_norm": 7.230698108673096,
      "learning_rate": 9.960135210822139e-06,
      "loss": 0.1101,
      "step": 32100
    },
    {
      "epoch": 0.052565084477262165,
      "grad_norm": 2.170320749282837,
      "learning_rate": 9.96006931860862e-06,
      "loss": 0.1122,
      "step": 32120
    },
    {
      "epoch": 0.0525978149159155,
      "grad_norm": 7.691370964050293,
      "learning_rate": 9.960003426395104e-06,
      "loss": 0.1093,
      "step": 32140
    },
    {
      "epoch": 0.05263054535456884,
      "grad_norm": 7.861437797546387,
      "learning_rate": 9.959937534181586e-06,
      "loss": 0.097,
      "step": 32160
    },
    {
      "epoch": 0.05266327579322218,
      "grad_norm": 10.039134979248047,
      "learning_rate": 9.95987164196807e-06,
      "loss": 0.0955,
      "step": 32180
    },
    {
      "epoch": 0.05269600623187552,
      "grad_norm": 4.836733341217041,
      "learning_rate": 9.959805749754552e-06,
      "loss": 0.1134,
      "step": 32200
    },
    {
      "epoch": 0.05272873667052886,
      "grad_norm": 2.9695827960968018,
      "learning_rate": 9.959739857541035e-06,
      "loss": 0.1171,
      "step": 32220
    },
    {
      "epoch": 0.0527614671091822,
      "grad_norm": 3.7906363010406494,
      "learning_rate": 9.959673965327517e-06,
      "loss": 0.0899,
      "step": 32240
    },
    {
      "epoch": 0.052794197547835535,
      "grad_norm": 3.6365857124328613,
      "learning_rate": 9.959608073114e-06,
      "loss": 0.1088,
      "step": 32260
    },
    {
      "epoch": 0.05282692798648887,
      "grad_norm": 3.0151965618133545,
      "learning_rate": 9.959542180900483e-06,
      "loss": 0.0841,
      "step": 32280
    },
    {
      "epoch": 0.05285965842514222,
      "grad_norm": 4.481225490570068,
      "learning_rate": 9.959476288686966e-06,
      "loss": 0.0997,
      "step": 32300
    },
    {
      "epoch": 0.052892388863795554,
      "grad_norm": 3.375838041305542,
      "learning_rate": 9.95941039647345e-06,
      "loss": 0.1171,
      "step": 32320
    },
    {
      "epoch": 0.05292511930244889,
      "grad_norm": 5.6053547859191895,
      "learning_rate": 9.959344504259932e-06,
      "loss": 0.1119,
      "step": 32340
    },
    {
      "epoch": 0.05295784974110223,
      "grad_norm": 3.3310229778289795,
      "learning_rate": 9.959278612046415e-06,
      "loss": 0.0968,
      "step": 32360
    },
    {
      "epoch": 0.05299058017975557,
      "grad_norm": 3.182345151901245,
      "learning_rate": 9.959212719832899e-06,
      "loss": 0.115,
      "step": 32380
    },
    {
      "epoch": 0.053023310618408905,
      "grad_norm": 5.1328125,
      "learning_rate": 9.95914682761938e-06,
      "loss": 0.1055,
      "step": 32400
    },
    {
      "epoch": 0.05305604105706225,
      "grad_norm": 15.172881126403809,
      "learning_rate": 9.959080935405864e-06,
      "loss": 0.1,
      "step": 32420
    },
    {
      "epoch": 0.053088771495715587,
      "grad_norm": 4.307304382324219,
      "learning_rate": 9.959015043192348e-06,
      "loss": 0.095,
      "step": 32440
    },
    {
      "epoch": 0.053121501934368924,
      "grad_norm": 3.29937481880188,
      "learning_rate": 9.95894915097883e-06,
      "loss": 0.1356,
      "step": 32460
    },
    {
      "epoch": 0.05315423237302226,
      "grad_norm": 8.143204689025879,
      "learning_rate": 9.958883258765313e-06,
      "loss": 0.126,
      "step": 32480
    },
    {
      "epoch": 0.0531869628116756,
      "grad_norm": 2.3962111473083496,
      "learning_rate": 9.958817366551795e-06,
      "loss": 0.1063,
      "step": 32500
    },
    {
      "epoch": 0.053219693250328944,
      "grad_norm": 7.517457962036133,
      "learning_rate": 9.958751474338279e-06,
      "loss": 0.116,
      "step": 32520
    },
    {
      "epoch": 0.05325242368898228,
      "grad_norm": 4.802196979522705,
      "learning_rate": 9.95868558212476e-06,
      "loss": 0.11,
      "step": 32540
    },
    {
      "epoch": 0.05328515412763562,
      "grad_norm": 6.941405773162842,
      "learning_rate": 9.958619689911244e-06,
      "loss": 0.1075,
      "step": 32560
    },
    {
      "epoch": 0.053317884566288956,
      "grad_norm": 4.387139797210693,
      "learning_rate": 9.958553797697726e-06,
      "loss": 0.0962,
      "step": 32580
    },
    {
      "epoch": 0.053350615004942294,
      "grad_norm": 7.881738185882568,
      "learning_rate": 9.95848790548421e-06,
      "loss": 0.1072,
      "step": 32600
    },
    {
      "epoch": 0.05338334544359564,
      "grad_norm": 7.829104423522949,
      "learning_rate": 9.958422013270692e-06,
      "loss": 0.0942,
      "step": 32620
    },
    {
      "epoch": 0.053416075882248976,
      "grad_norm": 3.1375584602355957,
      "learning_rate": 9.958356121057175e-06,
      "loss": 0.1203,
      "step": 32640
    },
    {
      "epoch": 0.05344880632090231,
      "grad_norm": 5.3705668449401855,
      "learning_rate": 9.958290228843657e-06,
      "loss": 0.1144,
      "step": 32660
    },
    {
      "epoch": 0.05348153675955565,
      "grad_norm": 5.396871566772461,
      "learning_rate": 9.95822433663014e-06,
      "loss": 0.1129,
      "step": 32680
    },
    {
      "epoch": 0.05351426719820899,
      "grad_norm": 1.7934390306472778,
      "learning_rate": 9.958158444416624e-06,
      "loss": 0.1058,
      "step": 32700
    },
    {
      "epoch": 0.05354699763686233,
      "grad_norm": 5.072107791900635,
      "learning_rate": 9.958092552203106e-06,
      "loss": 0.0995,
      "step": 32720
    },
    {
      "epoch": 0.05357972807551567,
      "grad_norm": 5.034060478210449,
      "learning_rate": 9.95802665998959e-06,
      "loss": 0.1099,
      "step": 32740
    },
    {
      "epoch": 0.05361245851416901,
      "grad_norm": 6.531731605529785,
      "learning_rate": 9.957960767776072e-06,
      "loss": 0.1033,
      "step": 32760
    },
    {
      "epoch": 0.053645188952822345,
      "grad_norm": 5.207347393035889,
      "learning_rate": 9.957894875562555e-06,
      "loss": 0.096,
      "step": 32780
    },
    {
      "epoch": 0.05367791939147568,
      "grad_norm": 9.338519096374512,
      "learning_rate": 9.957828983349039e-06,
      "loss": 0.1106,
      "step": 32800
    },
    {
      "epoch": 0.05371064983012902,
      "grad_norm": 1.2926726341247559,
      "learning_rate": 9.957763091135521e-06,
      "loss": 0.0917,
      "step": 32820
    },
    {
      "epoch": 0.053743380268782365,
      "grad_norm": 5.687063217163086,
      "learning_rate": 9.957697198922004e-06,
      "loss": 0.1042,
      "step": 32840
    },
    {
      "epoch": 0.0537761107074357,
      "grad_norm": 7.144947528839111,
      "learning_rate": 9.957631306708488e-06,
      "loss": 0.096,
      "step": 32860
    },
    {
      "epoch": 0.05380884114608904,
      "grad_norm": 5.407193183898926,
      "learning_rate": 9.95756541449497e-06,
      "loss": 0.1208,
      "step": 32880
    },
    {
      "epoch": 0.05384157158474238,
      "grad_norm": 9.419910430908203,
      "learning_rate": 9.957499522281454e-06,
      "loss": 0.1202,
      "step": 32900
    },
    {
      "epoch": 0.053874302023395715,
      "grad_norm": 9.386337280273438,
      "learning_rate": 9.957433630067935e-06,
      "loss": 0.1078,
      "step": 32920
    },
    {
      "epoch": 0.05390703246204906,
      "grad_norm": 7.892646312713623,
      "learning_rate": 9.957367737854419e-06,
      "loss": 0.088,
      "step": 32940
    },
    {
      "epoch": 0.0539397629007024,
      "grad_norm": 2.9461557865142822,
      "learning_rate": 9.957301845640901e-06,
      "loss": 0.1111,
      "step": 32960
    },
    {
      "epoch": 0.053972493339355734,
      "grad_norm": 6.2108473777771,
      "learning_rate": 9.957235953427384e-06,
      "loss": 0.0973,
      "step": 32980
    },
    {
      "epoch": 0.05400522377800907,
      "grad_norm": 5.555716514587402,
      "learning_rate": 9.957170061213866e-06,
      "loss": 0.1011,
      "step": 33000
    },
    {
      "epoch": 0.05403795421666241,
      "grad_norm": 5.691112518310547,
      "learning_rate": 9.95710416900035e-06,
      "loss": 0.1085,
      "step": 33020
    },
    {
      "epoch": 0.054070684655315754,
      "grad_norm": 4.435163497924805,
      "learning_rate": 9.957038276786834e-06,
      "loss": 0.1014,
      "step": 33040
    },
    {
      "epoch": 0.05410341509396909,
      "grad_norm": 5.373723030090332,
      "learning_rate": 9.956972384573315e-06,
      "loss": 0.1119,
      "step": 33060
    },
    {
      "epoch": 0.05413614553262243,
      "grad_norm": 11.597542762756348,
      "learning_rate": 9.956906492359799e-06,
      "loss": 0.0958,
      "step": 33080
    },
    {
      "epoch": 0.054168875971275766,
      "grad_norm": 2.0356342792510986,
      "learning_rate": 9.956840600146281e-06,
      "loss": 0.11,
      "step": 33100
    },
    {
      "epoch": 0.054201606409929104,
      "grad_norm": 4.186541557312012,
      "learning_rate": 9.956774707932764e-06,
      "loss": 0.1046,
      "step": 33120
    },
    {
      "epoch": 0.05423433684858244,
      "grad_norm": 3.1756818294525146,
      "learning_rate": 9.956708815719246e-06,
      "loss": 0.1195,
      "step": 33140
    },
    {
      "epoch": 0.054267067287235786,
      "grad_norm": 3.393094301223755,
      "learning_rate": 9.95664292350573e-06,
      "loss": 0.0853,
      "step": 33160
    },
    {
      "epoch": 0.05429979772588912,
      "grad_norm": 3.113828182220459,
      "learning_rate": 9.956577031292214e-06,
      "loss": 0.1045,
      "step": 33180
    },
    {
      "epoch": 0.05433252816454246,
      "grad_norm": 9.296442985534668,
      "learning_rate": 9.956511139078695e-06,
      "loss": 0.1153,
      "step": 33200
    },
    {
      "epoch": 0.0543652586031958,
      "grad_norm": 4.330512523651123,
      "learning_rate": 9.956445246865179e-06,
      "loss": 0.1138,
      "step": 33220
    },
    {
      "epoch": 0.054397989041849136,
      "grad_norm": 4.996016025543213,
      "learning_rate": 9.956379354651663e-06,
      "loss": 0.109,
      "step": 33240
    },
    {
      "epoch": 0.05443071948050248,
      "grad_norm": 6.8540544509887695,
      "learning_rate": 9.956313462438145e-06,
      "loss": 0.1085,
      "step": 33260
    },
    {
      "epoch": 0.05446344991915582,
      "grad_norm": 4.8712477684021,
      "learning_rate": 9.956247570224628e-06,
      "loss": 0.0895,
      "step": 33280
    },
    {
      "epoch": 0.054496180357809156,
      "grad_norm": 11.056235313415527,
      "learning_rate": 9.95618167801111e-06,
      "loss": 0.1003,
      "step": 33300
    },
    {
      "epoch": 0.05452891079646249,
      "grad_norm": 2.4899637699127197,
      "learning_rate": 9.956115785797594e-06,
      "loss": 0.0884,
      "step": 33320
    },
    {
      "epoch": 0.05456164123511583,
      "grad_norm": 7.640613079071045,
      "learning_rate": 9.956049893584075e-06,
      "loss": 0.1113,
      "step": 33340
    },
    {
      "epoch": 0.054594371673769175,
      "grad_norm": 2.9468324184417725,
      "learning_rate": 9.955984001370559e-06,
      "loss": 0.1169,
      "step": 33360
    },
    {
      "epoch": 0.05462710211242251,
      "grad_norm": 4.32497501373291,
      "learning_rate": 9.955918109157043e-06,
      "loss": 0.1193,
      "step": 33380
    },
    {
      "epoch": 0.05465983255107585,
      "grad_norm": 3.1882612705230713,
      "learning_rate": 9.955852216943525e-06,
      "loss": 0.1175,
      "step": 33400
    },
    {
      "epoch": 0.05469256298972919,
      "grad_norm": 5.210846900939941,
      "learning_rate": 9.955786324730008e-06,
      "loss": 0.1156,
      "step": 33420
    },
    {
      "epoch": 0.054725293428382525,
      "grad_norm": 21.209339141845703,
      "learning_rate": 9.95572043251649e-06,
      "loss": 0.1199,
      "step": 33440
    },
    {
      "epoch": 0.05475802386703586,
      "grad_norm": 1.7577667236328125,
      "learning_rate": 9.955654540302974e-06,
      "loss": 0.1043,
      "step": 33460
    },
    {
      "epoch": 0.05479075430568921,
      "grad_norm": 2.4976532459259033,
      "learning_rate": 9.955588648089455e-06,
      "loss": 0.105,
      "step": 33480
    },
    {
      "epoch": 0.054823484744342545,
      "grad_norm": 3.005505323410034,
      "learning_rate": 9.955522755875939e-06,
      "loss": 0.124,
      "step": 33500
    },
    {
      "epoch": 0.05485621518299588,
      "grad_norm": 4.109124660491943,
      "learning_rate": 9.955456863662421e-06,
      "loss": 0.1204,
      "step": 33520
    },
    {
      "epoch": 0.05488894562164922,
      "grad_norm": 2.3714003562927246,
      "learning_rate": 9.955390971448905e-06,
      "loss": 0.1251,
      "step": 33540
    },
    {
      "epoch": 0.05492167606030256,
      "grad_norm": 2.24273419380188,
      "learning_rate": 9.955325079235386e-06,
      "loss": 0.1133,
      "step": 33560
    },
    {
      "epoch": 0.0549544064989559,
      "grad_norm": 2.5963058471679688,
      "learning_rate": 9.95525918702187e-06,
      "loss": 0.0987,
      "step": 33580
    },
    {
      "epoch": 0.05498713693760924,
      "grad_norm": 5.8463215827941895,
      "learning_rate": 9.955193294808354e-06,
      "loss": 0.1053,
      "step": 33600
    },
    {
      "epoch": 0.05501986737626258,
      "grad_norm": 3.674529552459717,
      "learning_rate": 9.955127402594836e-06,
      "loss": 0.1183,
      "step": 33620
    },
    {
      "epoch": 0.055052597814915914,
      "grad_norm": 3.995927572250366,
      "learning_rate": 9.955061510381319e-06,
      "loss": 0.0941,
      "step": 33640
    },
    {
      "epoch": 0.05508532825356925,
      "grad_norm": 1.9994316101074219,
      "learning_rate": 9.954995618167803e-06,
      "loss": 0.1005,
      "step": 33660
    },
    {
      "epoch": 0.055118058692222596,
      "grad_norm": 3.0102908611297607,
      "learning_rate": 9.954929725954285e-06,
      "loss": 0.1214,
      "step": 33680
    },
    {
      "epoch": 0.055150789130875934,
      "grad_norm": 7.022325038909912,
      "learning_rate": 9.954863833740768e-06,
      "loss": 0.1014,
      "step": 33700
    },
    {
      "epoch": 0.05518351956952927,
      "grad_norm": 2.467841386795044,
      "learning_rate": 9.95479794152725e-06,
      "loss": 0.0879,
      "step": 33720
    },
    {
      "epoch": 0.05521625000818261,
      "grad_norm": 3.9587371349334717,
      "learning_rate": 9.954732049313734e-06,
      "loss": 0.0949,
      "step": 33740
    },
    {
      "epoch": 0.055248980446835946,
      "grad_norm": 20.808834075927734,
      "learning_rate": 9.954666157100217e-06,
      "loss": 0.1346,
      "step": 33760
    },
    {
      "epoch": 0.05528171088548929,
      "grad_norm": 6.18385648727417,
      "learning_rate": 9.954600264886699e-06,
      "loss": 0.1319,
      "step": 33780
    },
    {
      "epoch": 0.05531444132414263,
      "grad_norm": 4.2125091552734375,
      "learning_rate": 9.954534372673183e-06,
      "loss": 0.0989,
      "step": 33800
    },
    {
      "epoch": 0.055347171762795966,
      "grad_norm": 3.549079656600952,
      "learning_rate": 9.954468480459665e-06,
      "loss": 0.0908,
      "step": 33820
    },
    {
      "epoch": 0.0553799022014493,
      "grad_norm": 4.4052534103393555,
      "learning_rate": 9.954402588246148e-06,
      "loss": 0.1011,
      "step": 33840
    },
    {
      "epoch": 0.05541263264010264,
      "grad_norm": 2.5573952198028564,
      "learning_rate": 9.95433669603263e-06,
      "loss": 0.0736,
      "step": 33860
    },
    {
      "epoch": 0.05544536307875598,
      "grad_norm": 6.518613815307617,
      "learning_rate": 9.954270803819114e-06,
      "loss": 0.1003,
      "step": 33880
    },
    {
      "epoch": 0.05547809351740932,
      "grad_norm": 4.38840389251709,
      "learning_rate": 9.954204911605596e-06,
      "loss": 0.1121,
      "step": 33900
    },
    {
      "epoch": 0.05551082395606266,
      "grad_norm": 7.20564603805542,
      "learning_rate": 9.95413901939208e-06,
      "loss": 0.1015,
      "step": 33920
    },
    {
      "epoch": 0.055543554394716,
      "grad_norm": 2.715505838394165,
      "learning_rate": 9.954073127178561e-06,
      "loss": 0.1297,
      "step": 33940
    },
    {
      "epoch": 0.055576284833369335,
      "grad_norm": 3.4505064487457275,
      "learning_rate": 9.954007234965045e-06,
      "loss": 0.127,
      "step": 33960
    },
    {
      "epoch": 0.05560901527202267,
      "grad_norm": 7.093050479888916,
      "learning_rate": 9.953941342751528e-06,
      "loss": 0.0814,
      "step": 33980
    },
    {
      "epoch": 0.05564174571067602,
      "grad_norm": 3.770312547683716,
      "learning_rate": 9.95387545053801e-06,
      "loss": 0.0814,
      "step": 34000
    },
    {
      "epoch": 0.055674476149329355,
      "grad_norm": 7.707580089569092,
      "learning_rate": 9.953809558324494e-06,
      "loss": 0.1061,
      "step": 34020
    },
    {
      "epoch": 0.05570720658798269,
      "grad_norm": 2.4288442134857178,
      "learning_rate": 9.953743666110977e-06,
      "loss": 0.0832,
      "step": 34040
    },
    {
      "epoch": 0.05573993702663603,
      "grad_norm": 5.691476821899414,
      "learning_rate": 9.95367777389746e-06,
      "loss": 0.1091,
      "step": 34060
    },
    {
      "epoch": 0.05577266746528937,
      "grad_norm": 2.9121274948120117,
      "learning_rate": 9.953611881683943e-06,
      "loss": 0.1114,
      "step": 34080
    },
    {
      "epoch": 0.05580539790394271,
      "grad_norm": 4.877732753753662,
      "learning_rate": 9.953545989470426e-06,
      "loss": 0.115,
      "step": 34100
    },
    {
      "epoch": 0.05583812834259605,
      "grad_norm": 3.1537625789642334,
      "learning_rate": 9.953480097256908e-06,
      "loss": 0.0988,
      "step": 34120
    },
    {
      "epoch": 0.05587085878124939,
      "grad_norm": 3.59788179397583,
      "learning_rate": 9.953414205043392e-06,
      "loss": 0.0944,
      "step": 34140
    },
    {
      "epoch": 0.055903589219902725,
      "grad_norm": 3.183270215988159,
      "learning_rate": 9.953348312829874e-06,
      "loss": 0.1198,
      "step": 34160
    },
    {
      "epoch": 0.05593631965855606,
      "grad_norm": 3.4890031814575195,
      "learning_rate": 9.953282420616357e-06,
      "loss": 0.105,
      "step": 34180
    },
    {
      "epoch": 0.0559690500972094,
      "grad_norm": 5.218791484832764,
      "learning_rate": 9.95321652840284e-06,
      "loss": 0.1118,
      "step": 34200
    },
    {
      "epoch": 0.056001780535862744,
      "grad_norm": 1.922079086303711,
      "learning_rate": 9.953150636189323e-06,
      "loss": 0.1106,
      "step": 34220
    },
    {
      "epoch": 0.05603451097451608,
      "grad_norm": 5.792794227600098,
      "learning_rate": 9.953084743975805e-06,
      "loss": 0.1238,
      "step": 34240
    },
    {
      "epoch": 0.05606724141316942,
      "grad_norm": 5.1020073890686035,
      "learning_rate": 9.953018851762288e-06,
      "loss": 0.1027,
      "step": 34260
    },
    {
      "epoch": 0.05609997185182276,
      "grad_norm": 7.344246864318848,
      "learning_rate": 9.95295295954877e-06,
      "loss": 0.1103,
      "step": 34280
    },
    {
      "epoch": 0.056132702290476094,
      "grad_norm": 4.734985828399658,
      "learning_rate": 9.952887067335254e-06,
      "loss": 0.1002,
      "step": 34300
    },
    {
      "epoch": 0.05616543272912944,
      "grad_norm": 3.44728684425354,
      "learning_rate": 9.952821175121736e-06,
      "loss": 0.119,
      "step": 34320
    },
    {
      "epoch": 0.056198163167782776,
      "grad_norm": 2.6631178855895996,
      "learning_rate": 9.95275528290822e-06,
      "loss": 0.1173,
      "step": 34340
    },
    {
      "epoch": 0.056230893606436114,
      "grad_norm": 5.012568950653076,
      "learning_rate": 9.952689390694701e-06,
      "loss": 0.1208,
      "step": 34360
    },
    {
      "epoch": 0.05626362404508945,
      "grad_norm": 3.4636285305023193,
      "learning_rate": 9.952623498481185e-06,
      "loss": 0.1066,
      "step": 34380
    },
    {
      "epoch": 0.05629635448374279,
      "grad_norm": 8.754688262939453,
      "learning_rate": 9.952557606267668e-06,
      "loss": 0.1058,
      "step": 34400
    },
    {
      "epoch": 0.05632908492239613,
      "grad_norm": 13.844488143920898,
      "learning_rate": 9.952491714054152e-06,
      "loss": 0.1181,
      "step": 34420
    },
    {
      "epoch": 0.05636181536104947,
      "grad_norm": 4.980086326599121,
      "learning_rate": 9.952425821840634e-06,
      "loss": 0.0999,
      "step": 34440
    },
    {
      "epoch": 0.05639454579970281,
      "grad_norm": 6.218501091003418,
      "learning_rate": 9.952359929627117e-06,
      "loss": 0.0977,
      "step": 34460
    },
    {
      "epoch": 0.056427276238356146,
      "grad_norm": 5.211645126342773,
      "learning_rate": 9.952294037413601e-06,
      "loss": 0.1098,
      "step": 34480
    },
    {
      "epoch": 0.05646000667700948,
      "grad_norm": 5.403592586517334,
      "learning_rate": 9.952228145200083e-06,
      "loss": 0.1057,
      "step": 34500
    },
    {
      "epoch": 0.05649273711566282,
      "grad_norm": 2.5564029216766357,
      "learning_rate": 9.952162252986566e-06,
      "loss": 0.1049,
      "step": 34520
    },
    {
      "epoch": 0.056525467554316165,
      "grad_norm": 3.201747417449951,
      "learning_rate": 9.952096360773048e-06,
      "loss": 0.1124,
      "step": 34540
    },
    {
      "epoch": 0.0565581979929695,
      "grad_norm": 1.590458869934082,
      "learning_rate": 9.952030468559532e-06,
      "loss": 0.1016,
      "step": 34560
    },
    {
      "epoch": 0.05659092843162284,
      "grad_norm": 5.344533443450928,
      "learning_rate": 9.951964576346014e-06,
      "loss": 0.1064,
      "step": 34580
    },
    {
      "epoch": 0.05662365887027618,
      "grad_norm": 3.9809517860412598,
      "learning_rate": 9.951898684132497e-06,
      "loss": 0.0927,
      "step": 34600
    },
    {
      "epoch": 0.056656389308929515,
      "grad_norm": 5.192516803741455,
      "learning_rate": 9.95183279191898e-06,
      "loss": 0.0909,
      "step": 34620
    },
    {
      "epoch": 0.05668911974758286,
      "grad_norm": 6.162648677825928,
      "learning_rate": 9.951766899705463e-06,
      "loss": 0.0935,
      "step": 34640
    },
    {
      "epoch": 0.0567218501862362,
      "grad_norm": 1.061295747756958,
      "learning_rate": 9.951701007491945e-06,
      "loss": 0.1095,
      "step": 34660
    },
    {
      "epoch": 0.056754580624889535,
      "grad_norm": 4.838981628417969,
      "learning_rate": 9.951635115278428e-06,
      "loss": 0.107,
      "step": 34680
    },
    {
      "epoch": 0.05678731106354287,
      "grad_norm": 2.671704053878784,
      "learning_rate": 9.95156922306491e-06,
      "loss": 0.0916,
      "step": 34700
    },
    {
      "epoch": 0.05682004150219621,
      "grad_norm": 3.833805561065674,
      "learning_rate": 9.951503330851394e-06,
      "loss": 0.0899,
      "step": 34720
    },
    {
      "epoch": 0.056852771940849554,
      "grad_norm": 5.827686309814453,
      "learning_rate": 9.951437438637876e-06,
      "loss": 0.0901,
      "step": 34740
    },
    {
      "epoch": 0.05688550237950289,
      "grad_norm": 3.8057661056518555,
      "learning_rate": 9.95137154642436e-06,
      "loss": 0.1222,
      "step": 34760
    },
    {
      "epoch": 0.05691823281815623,
      "grad_norm": 5.325723171234131,
      "learning_rate": 9.951305654210843e-06,
      "loss": 0.1145,
      "step": 34780
    },
    {
      "epoch": 0.05695096325680957,
      "grad_norm": 9.274480819702148,
      "learning_rate": 9.951239761997325e-06,
      "loss": 0.1137,
      "step": 34800
    },
    {
      "epoch": 0.056983693695462904,
      "grad_norm": 5.862601280212402,
      "learning_rate": 9.951173869783808e-06,
      "loss": 0.106,
      "step": 34820
    },
    {
      "epoch": 0.05701642413411624,
      "grad_norm": 2.089028835296631,
      "learning_rate": 9.951107977570292e-06,
      "loss": 0.1199,
      "step": 34840
    },
    {
      "epoch": 0.057049154572769586,
      "grad_norm": 1.0465798377990723,
      "learning_rate": 9.951042085356774e-06,
      "loss": 0.0763,
      "step": 34860
    },
    {
      "epoch": 0.057081885011422924,
      "grad_norm": 8.823579788208008,
      "learning_rate": 9.950976193143257e-06,
      "loss": 0.0906,
      "step": 34880
    },
    {
      "epoch": 0.05711461545007626,
      "grad_norm": 5.357288837432861,
      "learning_rate": 9.950910300929741e-06,
      "loss": 0.1112,
      "step": 34900
    },
    {
      "epoch": 0.0571473458887296,
      "grad_norm": 5.087492942810059,
      "learning_rate": 9.950844408716223e-06,
      "loss": 0.1032,
      "step": 34920
    },
    {
      "epoch": 0.05718007632738294,
      "grad_norm": 7.988847255706787,
      "learning_rate": 9.950778516502707e-06,
      "loss": 0.0865,
      "step": 34940
    },
    {
      "epoch": 0.05721280676603628,
      "grad_norm": 3.0133070945739746,
      "learning_rate": 9.950712624289188e-06,
      "loss": 0.1059,
      "step": 34960
    },
    {
      "epoch": 0.05724553720468962,
      "grad_norm": 7.5436530113220215,
      "learning_rate": 9.950646732075672e-06,
      "loss": 0.0939,
      "step": 34980
    },
    {
      "epoch": 0.057278267643342956,
      "grad_norm": 5.152456283569336,
      "learning_rate": 9.950580839862154e-06,
      "loss": 0.1265,
      "step": 35000
    },
    {
      "epoch": 0.057310998081996294,
      "grad_norm": 3.308398962020874,
      "learning_rate": 9.950514947648637e-06,
      "loss": 0.0908,
      "step": 35020
    },
    {
      "epoch": 0.05734372852064963,
      "grad_norm": 7.042655944824219,
      "learning_rate": 9.95044905543512e-06,
      "loss": 0.0859,
      "step": 35040
    },
    {
      "epoch": 0.057376458959302976,
      "grad_norm": 7.2455668449401855,
      "learning_rate": 9.950383163221603e-06,
      "loss": 0.1367,
      "step": 35060
    },
    {
      "epoch": 0.05740918939795631,
      "grad_norm": 3.928305149078369,
      "learning_rate": 9.950317271008085e-06,
      "loss": 0.1096,
      "step": 35080
    },
    {
      "epoch": 0.05744191983660965,
      "grad_norm": 4.111536026000977,
      "learning_rate": 9.950251378794568e-06,
      "loss": 0.1196,
      "step": 35100
    },
    {
      "epoch": 0.05747465027526299,
      "grad_norm": 3.290181875228882,
      "learning_rate": 9.95018548658105e-06,
      "loss": 0.1037,
      "step": 35120
    },
    {
      "epoch": 0.057507380713916326,
      "grad_norm": 5.609055519104004,
      "learning_rate": 9.950119594367534e-06,
      "loss": 0.0998,
      "step": 35140
    },
    {
      "epoch": 0.05754011115256967,
      "grad_norm": 5.739729881286621,
      "learning_rate": 9.950053702154017e-06,
      "loss": 0.1463,
      "step": 35160
    },
    {
      "epoch": 0.05757284159122301,
      "grad_norm": 3.865234136581421,
      "learning_rate": 9.9499878099405e-06,
      "loss": 0.0898,
      "step": 35180
    },
    {
      "epoch": 0.057605572029876345,
      "grad_norm": 3.503906011581421,
      "learning_rate": 9.949921917726983e-06,
      "loss": 0.1151,
      "step": 35200
    },
    {
      "epoch": 0.05763830246852968,
      "grad_norm": 5.4790167808532715,
      "learning_rate": 9.949856025513467e-06,
      "loss": 0.106,
      "step": 35220
    },
    {
      "epoch": 0.05767103290718302,
      "grad_norm": 3.627138376235962,
      "learning_rate": 9.949790133299948e-06,
      "loss": 0.0954,
      "step": 35240
    },
    {
      "epoch": 0.05770376334583636,
      "grad_norm": 3.6044154167175293,
      "learning_rate": 9.949724241086432e-06,
      "loss": 0.0998,
      "step": 35260
    },
    {
      "epoch": 0.0577364937844897,
      "grad_norm": 5.360110759735107,
      "learning_rate": 9.949658348872916e-06,
      "loss": 0.1223,
      "step": 35280
    },
    {
      "epoch": 0.05776922422314304,
      "grad_norm": 3.155236005783081,
      "learning_rate": 9.949592456659398e-06,
      "loss": 0.0798,
      "step": 35300
    },
    {
      "epoch": 0.05780195466179638,
      "grad_norm": 4.103496551513672,
      "learning_rate": 9.949526564445881e-06,
      "loss": 0.1163,
      "step": 35320
    },
    {
      "epoch": 0.057834685100449715,
      "grad_norm": 4.340908527374268,
      "learning_rate": 9.949460672232363e-06,
      "loss": 0.0964,
      "step": 35340
    },
    {
      "epoch": 0.05786741553910305,
      "grad_norm": 3.395678758621216,
      "learning_rate": 9.949394780018847e-06,
      "loss": 0.103,
      "step": 35360
    },
    {
      "epoch": 0.0579001459777564,
      "grad_norm": 12.218718528747559,
      "learning_rate": 9.949328887805328e-06,
      "loss": 0.1054,
      "step": 35380
    },
    {
      "epoch": 0.057932876416409734,
      "grad_norm": 3.847013235092163,
      "learning_rate": 9.949262995591812e-06,
      "loss": 0.0845,
      "step": 35400
    },
    {
      "epoch": 0.05796560685506307,
      "grad_norm": 5.426178932189941,
      "learning_rate": 9.949197103378294e-06,
      "loss": 0.1151,
      "step": 35420
    },
    {
      "epoch": 0.05799833729371641,
      "grad_norm": 4.864559650421143,
      "learning_rate": 9.949131211164778e-06,
      "loss": 0.1196,
      "step": 35440
    },
    {
      "epoch": 0.05803106773236975,
      "grad_norm": 14.410343170166016,
      "learning_rate": 9.94906531895126e-06,
      "loss": 0.12,
      "step": 35460
    },
    {
      "epoch": 0.05806379817102309,
      "grad_norm": 5.783588409423828,
      "learning_rate": 9.948999426737743e-06,
      "loss": 0.0998,
      "step": 35480
    },
    {
      "epoch": 0.05809652860967643,
      "grad_norm": 5.004638671875,
      "learning_rate": 9.948933534524227e-06,
      "loss": 0.123,
      "step": 35500
    },
    {
      "epoch": 0.058129259048329766,
      "grad_norm": 4.332380771636963,
      "learning_rate": 9.948867642310709e-06,
      "loss": 0.0959,
      "step": 35520
    },
    {
      "epoch": 0.058161989486983104,
      "grad_norm": 2.4843432903289795,
      "learning_rate": 9.948801750097192e-06,
      "loss": 0.1178,
      "step": 35540
    },
    {
      "epoch": 0.05819471992563644,
      "grad_norm": 3.277576446533203,
      "learning_rate": 9.948735857883674e-06,
      "loss": 0.1063,
      "step": 35560
    },
    {
      "epoch": 0.05822745036428978,
      "grad_norm": 5.533772945404053,
      "learning_rate": 9.948669965670158e-06,
      "loss": 0.131,
      "step": 35580
    },
    {
      "epoch": 0.05826018080294312,
      "grad_norm": 3.1242048740386963,
      "learning_rate": 9.94860407345664e-06,
      "loss": 0.1063,
      "step": 35600
    },
    {
      "epoch": 0.05829291124159646,
      "grad_norm": 5.995444297790527,
      "learning_rate": 9.948538181243123e-06,
      "loss": 0.0905,
      "step": 35620
    },
    {
      "epoch": 0.0583256416802498,
      "grad_norm": 4.876388072967529,
      "learning_rate": 9.948472289029607e-06,
      "loss": 0.0984,
      "step": 35640
    },
    {
      "epoch": 0.058358372118903136,
      "grad_norm": 6.667263031005859,
      "learning_rate": 9.948406396816089e-06,
      "loss": 0.095,
      "step": 35660
    },
    {
      "epoch": 0.058391102557556473,
      "grad_norm": 9.313288688659668,
      "learning_rate": 9.948340504602572e-06,
      "loss": 0.0989,
      "step": 35680
    },
    {
      "epoch": 0.05842383299620982,
      "grad_norm": 2.5505919456481934,
      "learning_rate": 9.948274612389056e-06,
      "loss": 0.1073,
      "step": 35700
    },
    {
      "epoch": 0.058456563434863155,
      "grad_norm": 5.362619876861572,
      "learning_rate": 9.948208720175538e-06,
      "loss": 0.1134,
      "step": 35720
    },
    {
      "epoch": 0.05848929387351649,
      "grad_norm": 18.35968017578125,
      "learning_rate": 9.948142827962021e-06,
      "loss": 0.0985,
      "step": 35740
    },
    {
      "epoch": 0.05852202431216983,
      "grad_norm": 3.1590380668640137,
      "learning_rate": 9.948076935748503e-06,
      "loss": 0.079,
      "step": 35760
    },
    {
      "epoch": 0.05855475475082317,
      "grad_norm": 2.1187660694122314,
      "learning_rate": 9.948011043534987e-06,
      "loss": 0.1073,
      "step": 35780
    },
    {
      "epoch": 0.05858748518947651,
      "grad_norm": 86.55734252929688,
      "learning_rate": 9.947945151321469e-06,
      "loss": 0.1077,
      "step": 35800
    },
    {
      "epoch": 0.05862021562812985,
      "grad_norm": 5.040222644805908,
      "learning_rate": 9.947879259107952e-06,
      "loss": 0.1074,
      "step": 35820
    },
    {
      "epoch": 0.05865294606678319,
      "grad_norm": 4.090165615081787,
      "learning_rate": 9.947813366894436e-06,
      "loss": 0.1053,
      "step": 35840
    },
    {
      "epoch": 0.058685676505436525,
      "grad_norm": 4.004722595214844,
      "learning_rate": 9.947747474680918e-06,
      "loss": 0.0981,
      "step": 35860
    },
    {
      "epoch": 0.05871840694408986,
      "grad_norm": 5.256595134735107,
      "learning_rate": 9.947681582467401e-06,
      "loss": 0.1203,
      "step": 35880
    },
    {
      "epoch": 0.0587511373827432,
      "grad_norm": 3.0562028884887695,
      "learning_rate": 9.947615690253883e-06,
      "loss": 0.102,
      "step": 35900
    },
    {
      "epoch": 0.058783867821396545,
      "grad_norm": 9.501313209533691,
      "learning_rate": 9.947549798040367e-06,
      "loss": 0.0993,
      "step": 35920
    },
    {
      "epoch": 0.05881659826004988,
      "grad_norm": 4.945890426635742,
      "learning_rate": 9.947483905826849e-06,
      "loss": 0.0918,
      "step": 35940
    },
    {
      "epoch": 0.05884932869870322,
      "grad_norm": 1.7850719690322876,
      "learning_rate": 9.947418013613332e-06,
      "loss": 0.0903,
      "step": 35960
    },
    {
      "epoch": 0.05888205913735656,
      "grad_norm": 6.827622890472412,
      "learning_rate": 9.947352121399814e-06,
      "loss": 0.1057,
      "step": 35980
    },
    {
      "epoch": 0.058914789576009895,
      "grad_norm": 3.495701789855957,
      "learning_rate": 9.947286229186298e-06,
      "loss": 0.1235,
      "step": 36000
    },
    {
      "epoch": 0.05894752001466324,
      "grad_norm": 5.921727657318115,
      "learning_rate": 9.947220336972781e-06,
      "loss": 0.1159,
      "step": 36020
    },
    {
      "epoch": 0.05898025045331658,
      "grad_norm": 3.313769578933716,
      "learning_rate": 9.947154444759263e-06,
      "loss": 0.1125,
      "step": 36040
    },
    {
      "epoch": 0.059012980891969914,
      "grad_norm": 5.274353981018066,
      "learning_rate": 9.947088552545747e-06,
      "loss": 0.1161,
      "step": 36060
    },
    {
      "epoch": 0.05904571133062325,
      "grad_norm": 3.942471981048584,
      "learning_rate": 9.94702266033223e-06,
      "loss": 0.1238,
      "step": 36080
    },
    {
      "epoch": 0.05907844176927659,
      "grad_norm": 2.216161012649536,
      "learning_rate": 9.946956768118712e-06,
      "loss": 0.0875,
      "step": 36100
    },
    {
      "epoch": 0.059111172207929934,
      "grad_norm": 4.220824718475342,
      "learning_rate": 9.946890875905196e-06,
      "loss": 0.1067,
      "step": 36120
    },
    {
      "epoch": 0.05914390264658327,
      "grad_norm": 24.11309814453125,
      "learning_rate": 9.946824983691678e-06,
      "loss": 0.1114,
      "step": 36140
    },
    {
      "epoch": 0.05917663308523661,
      "grad_norm": 9.877793312072754,
      "learning_rate": 9.946759091478161e-06,
      "loss": 0.0914,
      "step": 36160
    },
    {
      "epoch": 0.059209363523889946,
      "grad_norm": 1.9430803060531616,
      "learning_rate": 9.946693199264643e-06,
      "loss": 0.0981,
      "step": 36180
    },
    {
      "epoch": 0.059242093962543284,
      "grad_norm": 2.2880990505218506,
      "learning_rate": 9.946627307051127e-06,
      "loss": 0.0869,
      "step": 36200
    },
    {
      "epoch": 0.05927482440119663,
      "grad_norm": 14.979347229003906,
      "learning_rate": 9.94656141483761e-06,
      "loss": 0.1121,
      "step": 36220
    },
    {
      "epoch": 0.059307554839849966,
      "grad_norm": 3.996129274368286,
      "learning_rate": 9.946495522624092e-06,
      "loss": 0.0754,
      "step": 36240
    },
    {
      "epoch": 0.0593402852785033,
      "grad_norm": 4.159722328186035,
      "learning_rate": 9.946429630410576e-06,
      "loss": 0.1124,
      "step": 36260
    },
    {
      "epoch": 0.05937301571715664,
      "grad_norm": 6.8484039306640625,
      "learning_rate": 9.946363738197058e-06,
      "loss": 0.1,
      "step": 36280
    },
    {
      "epoch": 0.05940574615580998,
      "grad_norm": 11.297541618347168,
      "learning_rate": 9.946297845983541e-06,
      "loss": 0.1084,
      "step": 36300
    },
    {
      "epoch": 0.059438476594463316,
      "grad_norm": 7.285188674926758,
      "learning_rate": 9.946231953770023e-06,
      "loss": 0.1077,
      "step": 36320
    },
    {
      "epoch": 0.05947120703311666,
      "grad_norm": 5.6297407150268555,
      "learning_rate": 9.946166061556507e-06,
      "loss": 0.1073,
      "step": 36340
    },
    {
      "epoch": 0.05950393747177,
      "grad_norm": 3.6708977222442627,
      "learning_rate": 9.946100169342989e-06,
      "loss": 0.1104,
      "step": 36360
    },
    {
      "epoch": 0.059536667910423335,
      "grad_norm": 2.6324045658111572,
      "learning_rate": 9.946034277129472e-06,
      "loss": 0.1057,
      "step": 36380
    },
    {
      "epoch": 0.05956939834907667,
      "grad_norm": 2.6102445125579834,
      "learning_rate": 9.945968384915954e-06,
      "loss": 0.1017,
      "step": 36400
    },
    {
      "epoch": 0.05960212878773001,
      "grad_norm": 5.127422332763672,
      "learning_rate": 9.945902492702438e-06,
      "loss": 0.0989,
      "step": 36420
    },
    {
      "epoch": 0.059634859226383355,
      "grad_norm": 6.678228378295898,
      "learning_rate": 9.945836600488921e-06,
      "loss": 0.0873,
      "step": 36440
    },
    {
      "epoch": 0.05966758966503669,
      "grad_norm": 4.206032752990723,
      "learning_rate": 9.945770708275403e-06,
      "loss": 0.1112,
      "step": 36460
    },
    {
      "epoch": 0.05970032010369003,
      "grad_norm": 7.03749418258667,
      "learning_rate": 9.945704816061887e-06,
      "loss": 0.0778,
      "step": 36480
    },
    {
      "epoch": 0.05973305054234337,
      "grad_norm": 7.31504487991333,
      "learning_rate": 9.94563892384837e-06,
      "loss": 0.0952,
      "step": 36500
    },
    {
      "epoch": 0.059765780980996705,
      "grad_norm": 3.552018404006958,
      "learning_rate": 9.945573031634852e-06,
      "loss": 0.0994,
      "step": 36520
    },
    {
      "epoch": 0.05979851141965005,
      "grad_norm": 5.951589584350586,
      "learning_rate": 9.945507139421336e-06,
      "loss": 0.112,
      "step": 36540
    },
    {
      "epoch": 0.05983124185830339,
      "grad_norm": 2.670886754989624,
      "learning_rate": 9.94544124720782e-06,
      "loss": 0.113,
      "step": 36560
    },
    {
      "epoch": 0.059863972296956725,
      "grad_norm": 3.5795655250549316,
      "learning_rate": 9.945375354994301e-06,
      "loss": 0.0851,
      "step": 36580
    },
    {
      "epoch": 0.05989670273561006,
      "grad_norm": 9.23688793182373,
      "learning_rate": 9.945309462780785e-06,
      "loss": 0.1234,
      "step": 36600
    },
    {
      "epoch": 0.0599294331742634,
      "grad_norm": 7.947453022003174,
      "learning_rate": 9.945243570567267e-06,
      "loss": 0.1133,
      "step": 36620
    },
    {
      "epoch": 0.05996216361291674,
      "grad_norm": 5.201255798339844,
      "learning_rate": 9.94517767835375e-06,
      "loss": 0.0931,
      "step": 36640
    },
    {
      "epoch": 0.05999489405157008,
      "grad_norm": 8.099762916564941,
      "learning_rate": 9.945111786140232e-06,
      "loss": 0.0981,
      "step": 36660
    },
    {
      "epoch": 0.06002762449022342,
      "grad_norm": 4.025953769683838,
      "learning_rate": 9.945045893926716e-06,
      "loss": 0.0975,
      "step": 36680
    },
    {
      "epoch": 0.06006035492887676,
      "grad_norm": 5.304753303527832,
      "learning_rate": 9.944980001713198e-06,
      "loss": 0.1021,
      "step": 36700
    },
    {
      "epoch": 0.060093085367530094,
      "grad_norm": 3.68186354637146,
      "learning_rate": 9.944914109499681e-06,
      "loss": 0.0986,
      "step": 36720
    },
    {
      "epoch": 0.06012581580618343,
      "grad_norm": 4.516232967376709,
      "learning_rate": 9.944848217286163e-06,
      "loss": 0.0881,
      "step": 36740
    },
    {
      "epoch": 0.060158546244836776,
      "grad_norm": 4.442058086395264,
      "learning_rate": 9.944782325072647e-06,
      "loss": 0.0952,
      "step": 36760
    },
    {
      "epoch": 0.060191276683490114,
      "grad_norm": 16.29665184020996,
      "learning_rate": 9.944716432859129e-06,
      "loss": 0.1082,
      "step": 36780
    },
    {
      "epoch": 0.06022400712214345,
      "grad_norm": 10.491327285766602,
      "learning_rate": 9.944650540645612e-06,
      "loss": 0.1163,
      "step": 36800
    },
    {
      "epoch": 0.06025673756079679,
      "grad_norm": 2.4893484115600586,
      "learning_rate": 9.944584648432096e-06,
      "loss": 0.0922,
      "step": 36820
    },
    {
      "epoch": 0.060289467999450126,
      "grad_norm": 4.945775508880615,
      "learning_rate": 9.944518756218578e-06,
      "loss": 0.0949,
      "step": 36840
    },
    {
      "epoch": 0.06032219843810347,
      "grad_norm": 2.4944281578063965,
      "learning_rate": 9.944452864005061e-06,
      "loss": 0.1064,
      "step": 36860
    },
    {
      "epoch": 0.06035492887675681,
      "grad_norm": 12.886380195617676,
      "learning_rate": 9.944386971791545e-06,
      "loss": 0.0993,
      "step": 36880
    },
    {
      "epoch": 0.060387659315410146,
      "grad_norm": 10.804498672485352,
      "learning_rate": 9.944321079578027e-06,
      "loss": 0.1039,
      "step": 36900
    },
    {
      "epoch": 0.06042038975406348,
      "grad_norm": 5.294338226318359,
      "learning_rate": 9.94425518736451e-06,
      "loss": 0.1138,
      "step": 36920
    },
    {
      "epoch": 0.06045312019271682,
      "grad_norm": 3.9442405700683594,
      "learning_rate": 9.944189295150994e-06,
      "loss": 0.1256,
      "step": 36940
    },
    {
      "epoch": 0.06048585063137016,
      "grad_norm": 29.861257553100586,
      "learning_rate": 9.944123402937476e-06,
      "loss": 0.1059,
      "step": 36960
    },
    {
      "epoch": 0.0605185810700235,
      "grad_norm": 8.293338775634766,
      "learning_rate": 9.94405751072396e-06,
      "loss": 0.0843,
      "step": 36980
    },
    {
      "epoch": 0.06055131150867684,
      "grad_norm": 18.493942260742188,
      "learning_rate": 9.943991618510441e-06,
      "loss": 0.1112,
      "step": 37000
    },
    {
      "epoch": 0.06058404194733018,
      "grad_norm": 3.9883625507354736,
      "learning_rate": 9.943925726296925e-06,
      "loss": 0.1182,
      "step": 37020
    },
    {
      "epoch": 0.060616772385983515,
      "grad_norm": 7.275963306427002,
      "learning_rate": 9.943859834083407e-06,
      "loss": 0.1022,
      "step": 37040
    },
    {
      "epoch": 0.06064950282463685,
      "grad_norm": 5.0673136711120605,
      "learning_rate": 9.94379394186989e-06,
      "loss": 0.0969,
      "step": 37060
    },
    {
      "epoch": 0.0606822332632902,
      "grad_norm": 4.668947219848633,
      "learning_rate": 9.943728049656372e-06,
      "loss": 0.0952,
      "step": 37080
    },
    {
      "epoch": 0.060714963701943535,
      "grad_norm": 12.94953441619873,
      "learning_rate": 9.943662157442856e-06,
      "loss": 0.0901,
      "step": 37100
    },
    {
      "epoch": 0.06074769414059687,
      "grad_norm": 2.5108940601348877,
      "learning_rate": 9.943596265229338e-06,
      "loss": 0.0947,
      "step": 37120
    },
    {
      "epoch": 0.06078042457925021,
      "grad_norm": 7.702980041503906,
      "learning_rate": 9.943530373015821e-06,
      "loss": 0.1084,
      "step": 37140
    },
    {
      "epoch": 0.06081315501790355,
      "grad_norm": 5.249200344085693,
      "learning_rate": 9.943464480802303e-06,
      "loss": 0.126,
      "step": 37160
    },
    {
      "epoch": 0.06084588545655689,
      "grad_norm": 3.549809694290161,
      "learning_rate": 9.943398588588787e-06,
      "loss": 0.1077,
      "step": 37180
    },
    {
      "epoch": 0.06087861589521023,
      "grad_norm": 4.305044174194336,
      "learning_rate": 9.94333269637527e-06,
      "loss": 0.0965,
      "step": 37200
    },
    {
      "epoch": 0.06091134633386357,
      "grad_norm": 4.218949794769287,
      "learning_rate": 9.943266804161752e-06,
      "loss": 0.0928,
      "step": 37220
    },
    {
      "epoch": 0.060944076772516904,
      "grad_norm": 3.0017359256744385,
      "learning_rate": 9.943200911948236e-06,
      "loss": 0.1285,
      "step": 37240
    },
    {
      "epoch": 0.06097680721117024,
      "grad_norm": 2.2417454719543457,
      "learning_rate": 9.94313501973472e-06,
      "loss": 0.109,
      "step": 37260
    },
    {
      "epoch": 0.061009537649823586,
      "grad_norm": 4.245341777801514,
      "learning_rate": 9.943069127521201e-06,
      "loss": 0.1002,
      "step": 37280
    },
    {
      "epoch": 0.061042268088476924,
      "grad_norm": 3.933912754058838,
      "learning_rate": 9.943003235307685e-06,
      "loss": 0.0998,
      "step": 37300
    },
    {
      "epoch": 0.06107499852713026,
      "grad_norm": 12.885220527648926,
      "learning_rate": 9.942937343094169e-06,
      "loss": 0.1048,
      "step": 37320
    },
    {
      "epoch": 0.0611077289657836,
      "grad_norm": 5.553091049194336,
      "learning_rate": 9.94287145088065e-06,
      "loss": 0.1267,
      "step": 37340
    },
    {
      "epoch": 0.061140459404436936,
      "grad_norm": 2.827327013015747,
      "learning_rate": 9.942805558667134e-06,
      "loss": 0.0944,
      "step": 37360
    },
    {
      "epoch": 0.061173189843090274,
      "grad_norm": 6.1076273918151855,
      "learning_rate": 9.942739666453616e-06,
      "loss": 0.0879,
      "step": 37380
    },
    {
      "epoch": 0.06120592028174362,
      "grad_norm": 3.7453572750091553,
      "learning_rate": 9.9426737742401e-06,
      "loss": 0.0989,
      "step": 37400
    },
    {
      "epoch": 0.061238650720396956,
      "grad_norm": 5.701714038848877,
      "learning_rate": 9.942607882026581e-06,
      "loss": 0.1194,
      "step": 37420
    },
    {
      "epoch": 0.061271381159050294,
      "grad_norm": 2.486335515975952,
      "learning_rate": 9.942541989813065e-06,
      "loss": 0.0958,
      "step": 37440
    },
    {
      "epoch": 0.06130411159770363,
      "grad_norm": 4.421812057495117,
      "learning_rate": 9.942476097599547e-06,
      "loss": 0.121,
      "step": 37460
    },
    {
      "epoch": 0.06133684203635697,
      "grad_norm": 3.0805306434631348,
      "learning_rate": 9.94241020538603e-06,
      "loss": 0.1014,
      "step": 37480
    },
    {
      "epoch": 0.06136957247501031,
      "grad_norm": 6.200908660888672,
      "learning_rate": 9.942344313172512e-06,
      "loss": 0.1065,
      "step": 37500
    },
    {
      "epoch": 0.06140230291366365,
      "grad_norm": 2.6464381217956543,
      "learning_rate": 9.942278420958996e-06,
      "loss": 0.0974,
      "step": 37520
    },
    {
      "epoch": 0.06143503335231699,
      "grad_norm": 3.097696542739868,
      "learning_rate": 9.942212528745478e-06,
      "loss": 0.1023,
      "step": 37540
    },
    {
      "epoch": 0.061467763790970326,
      "grad_norm": 3.225104331970215,
      "learning_rate": 9.942146636531962e-06,
      "loss": 0.0885,
      "step": 37560
    },
    {
      "epoch": 0.06150049422962366,
      "grad_norm": 2.482691526412964,
      "learning_rate": 9.942080744318443e-06,
      "loss": 0.1094,
      "step": 37580
    },
    {
      "epoch": 0.06153322466827701,
      "grad_norm": 8.894187927246094,
      "learning_rate": 9.942014852104927e-06,
      "loss": 0.1057,
      "step": 37600
    },
    {
      "epoch": 0.061565955106930345,
      "grad_norm": 8.763843536376953,
      "learning_rate": 9.94194895989141e-06,
      "loss": 0.1198,
      "step": 37620
    },
    {
      "epoch": 0.06159868554558368,
      "grad_norm": 2.426478862762451,
      "learning_rate": 9.941883067677892e-06,
      "loss": 0.1072,
      "step": 37640
    },
    {
      "epoch": 0.06163141598423702,
      "grad_norm": 3.5278477668762207,
      "learning_rate": 9.941817175464376e-06,
      "loss": 0.1227,
      "step": 37660
    },
    {
      "epoch": 0.06166414642289036,
      "grad_norm": 3.6831307411193848,
      "learning_rate": 9.94175128325086e-06,
      "loss": 0.0947,
      "step": 37680
    },
    {
      "epoch": 0.061696876861543695,
      "grad_norm": 3.802016019821167,
      "learning_rate": 9.941685391037342e-06,
      "loss": 0.0889,
      "step": 37700
    },
    {
      "epoch": 0.06172960730019704,
      "grad_norm": 5.139464378356934,
      "learning_rate": 9.941619498823825e-06,
      "loss": 0.0901,
      "step": 37720
    },
    {
      "epoch": 0.06176233773885038,
      "grad_norm": 4.586146354675293,
      "learning_rate": 9.941553606610309e-06,
      "loss": 0.1137,
      "step": 37740
    },
    {
      "epoch": 0.061795068177503715,
      "grad_norm": 6.276430130004883,
      "learning_rate": 9.94148771439679e-06,
      "loss": 0.1101,
      "step": 37760
    },
    {
      "epoch": 0.06182779861615705,
      "grad_norm": 3.970414400100708,
      "learning_rate": 9.941421822183274e-06,
      "loss": 0.1099,
      "step": 37780
    },
    {
      "epoch": 0.06186052905481039,
      "grad_norm": 2.6683928966522217,
      "learning_rate": 9.941355929969756e-06,
      "loss": 0.1055,
      "step": 37800
    },
    {
      "epoch": 0.061893259493463734,
      "grad_norm": 2.2108328342437744,
      "learning_rate": 9.94129003775624e-06,
      "loss": 0.0966,
      "step": 37820
    },
    {
      "epoch": 0.06192598993211707,
      "grad_norm": 3.86208438873291,
      "learning_rate": 9.941224145542722e-06,
      "loss": 0.0904,
      "step": 37840
    },
    {
      "epoch": 0.06195872037077041,
      "grad_norm": 3.183316707611084,
      "learning_rate": 9.941158253329205e-06,
      "loss": 0.1002,
      "step": 37860
    },
    {
      "epoch": 0.06199145080942375,
      "grad_norm": 4.146705627441406,
      "learning_rate": 9.941092361115687e-06,
      "loss": 0.0926,
      "step": 37880
    },
    {
      "epoch": 0.062024181248077084,
      "grad_norm": 11.426177024841309,
      "learning_rate": 9.94102646890217e-06,
      "loss": 0.0742,
      "step": 37900
    },
    {
      "epoch": 0.06205691168673043,
      "grad_norm": 5.278810024261475,
      "learning_rate": 9.940960576688653e-06,
      "loss": 0.1128,
      "step": 37920
    },
    {
      "epoch": 0.062089642125383766,
      "grad_norm": 5.692551136016846,
      "learning_rate": 9.940894684475136e-06,
      "loss": 0.0976,
      "step": 37940
    },
    {
      "epoch": 0.062122372564037104,
      "grad_norm": 3.6277928352355957,
      "learning_rate": 9.94082879226162e-06,
      "loss": 0.1184,
      "step": 37960
    },
    {
      "epoch": 0.06215510300269044,
      "grad_norm": 6.927928447723389,
      "learning_rate": 9.940762900048102e-06,
      "loss": 0.1061,
      "step": 37980
    },
    {
      "epoch": 0.06218783344134378,
      "grad_norm": 6.381814479827881,
      "learning_rate": 9.940697007834585e-06,
      "loss": 0.1201,
      "step": 38000
    },
    {
      "epoch": 0.062220563879997116,
      "grad_norm": 6.948544502258301,
      "learning_rate": 9.940631115621067e-06,
      "loss": 0.1215,
      "step": 38020
    },
    {
      "epoch": 0.06225329431865046,
      "grad_norm": 1.6766520738601685,
      "learning_rate": 9.94056522340755e-06,
      "loss": 0.1062,
      "step": 38040
    },
    {
      "epoch": 0.0622860247573038,
      "grad_norm": 3.0241446495056152,
      "learning_rate": 9.940499331194034e-06,
      "loss": 0.1025,
      "step": 38060
    },
    {
      "epoch": 0.062318755195957136,
      "grad_norm": 4.503559589385986,
      "learning_rate": 9.940433438980516e-06,
      "loss": 0.1136,
      "step": 38080
    },
    {
      "epoch": 0.06235148563461047,
      "grad_norm": 4.521332740783691,
      "learning_rate": 9.940367546767e-06,
      "loss": 0.1113,
      "step": 38100
    },
    {
      "epoch": 0.06238421607326381,
      "grad_norm": 5.071451663970947,
      "learning_rate": 9.940301654553483e-06,
      "loss": 0.1036,
      "step": 38120
    },
    {
      "epoch": 0.062416946511917155,
      "grad_norm": 6.497502326965332,
      "learning_rate": 9.940235762339965e-06,
      "loss": 0.103,
      "step": 38140
    },
    {
      "epoch": 0.06244967695057049,
      "grad_norm": 6.7610039710998535,
      "learning_rate": 9.940169870126449e-06,
      "loss": 0.0766,
      "step": 38160
    },
    {
      "epoch": 0.06248240738922383,
      "grad_norm": 0.9716641306877136,
      "learning_rate": 9.94010397791293e-06,
      "loss": 0.0958,
      "step": 38180
    },
    {
      "epoch": 0.06251513782787717,
      "grad_norm": 2.6537935733795166,
      "learning_rate": 9.940038085699414e-06,
      "loss": 0.0858,
      "step": 38200
    },
    {
      "epoch": 0.06254786826653051,
      "grad_norm": 3.1676642894744873,
      "learning_rate": 9.939972193485896e-06,
      "loss": 0.0989,
      "step": 38220
    },
    {
      "epoch": 0.06258059870518384,
      "grad_norm": 2.0980403423309326,
      "learning_rate": 9.93990630127238e-06,
      "loss": 0.0658,
      "step": 38240
    },
    {
      "epoch": 0.06261332914383719,
      "grad_norm": 4.96649169921875,
      "learning_rate": 9.939840409058862e-06,
      "loss": 0.1034,
      "step": 38260
    },
    {
      "epoch": 0.06264605958249052,
      "grad_norm": 5.821681499481201,
      "learning_rate": 9.939774516845345e-06,
      "loss": 0.1188,
      "step": 38280
    },
    {
      "epoch": 0.06267879002114386,
      "grad_norm": 4.387632369995117,
      "learning_rate": 9.939708624631829e-06,
      "loss": 0.1015,
      "step": 38300
    },
    {
      "epoch": 0.06271152045979721,
      "grad_norm": 5.362405776977539,
      "learning_rate": 9.93964273241831e-06,
      "loss": 0.1141,
      "step": 38320
    },
    {
      "epoch": 0.06274425089845054,
      "grad_norm": 4.199353218078613,
      "learning_rate": 9.939576840204794e-06,
      "loss": 0.0968,
      "step": 38340
    },
    {
      "epoch": 0.06277698133710388,
      "grad_norm": 3.8079776763916016,
      "learning_rate": 9.939510947991276e-06,
      "loss": 0.114,
      "step": 38360
    },
    {
      "epoch": 0.06280971177575721,
      "grad_norm": 7.624923229217529,
      "learning_rate": 9.93944505577776e-06,
      "loss": 0.107,
      "step": 38380
    },
    {
      "epoch": 0.06284244221441056,
      "grad_norm": 6.342453479766846,
      "learning_rate": 9.939379163564242e-06,
      "loss": 0.0855,
      "step": 38400
    },
    {
      "epoch": 0.0628751726530639,
      "grad_norm": 3.38985276222229,
      "learning_rate": 9.939313271350725e-06,
      "loss": 0.09,
      "step": 38420
    },
    {
      "epoch": 0.06290790309171723,
      "grad_norm": 2.341754674911499,
      "learning_rate": 9.939247379137207e-06,
      "loss": 0.0833,
      "step": 38440
    },
    {
      "epoch": 0.06294063353037058,
      "grad_norm": 6.51350212097168,
      "learning_rate": 9.93918148692369e-06,
      "loss": 0.0872,
      "step": 38460
    },
    {
      "epoch": 0.06297336396902391,
      "grad_norm": 3.3234283924102783,
      "learning_rate": 9.939115594710174e-06,
      "loss": 0.1124,
      "step": 38480
    },
    {
      "epoch": 0.06300609440767725,
      "grad_norm": 5.4650092124938965,
      "learning_rate": 9.939049702496656e-06,
      "loss": 0.1019,
      "step": 38500
    },
    {
      "epoch": 0.0630388248463306,
      "grad_norm": 4.20452356338501,
      "learning_rate": 9.93898381028314e-06,
      "loss": 0.1113,
      "step": 38520
    },
    {
      "epoch": 0.06307155528498393,
      "grad_norm": 8.499171257019043,
      "learning_rate": 9.938917918069623e-06,
      "loss": 0.1047,
      "step": 38540
    },
    {
      "epoch": 0.06310428572363727,
      "grad_norm": 3.939610242843628,
      "learning_rate": 9.938852025856105e-06,
      "loss": 0.0841,
      "step": 38560
    },
    {
      "epoch": 0.0631370161622906,
      "grad_norm": 3.3608551025390625,
      "learning_rate": 9.938786133642589e-06,
      "loss": 0.091,
      "step": 38580
    },
    {
      "epoch": 0.06316974660094395,
      "grad_norm": 5.823808670043945,
      "learning_rate": 9.93872024142907e-06,
      "loss": 0.0985,
      "step": 38600
    },
    {
      "epoch": 0.06320247703959729,
      "grad_norm": 7.476269245147705,
      "learning_rate": 9.938654349215554e-06,
      "loss": 0.1308,
      "step": 38620
    },
    {
      "epoch": 0.06323520747825062,
      "grad_norm": 3.21272349357605,
      "learning_rate": 9.938588457002036e-06,
      "loss": 0.1071,
      "step": 38640
    },
    {
      "epoch": 0.06326793791690397,
      "grad_norm": 5.823341369628906,
      "learning_rate": 9.93852256478852e-06,
      "loss": 0.0959,
      "step": 38660
    },
    {
      "epoch": 0.0633006683555573,
      "grad_norm": 4.720577239990234,
      "learning_rate": 9.938456672575003e-06,
      "loss": 0.1001,
      "step": 38680
    },
    {
      "epoch": 0.06333339879421064,
      "grad_norm": 3.1087398529052734,
      "learning_rate": 9.938390780361485e-06,
      "loss": 0.1226,
      "step": 38700
    },
    {
      "epoch": 0.06336612923286399,
      "grad_norm": 12.127839088439941,
      "learning_rate": 9.938324888147969e-06,
      "loss": 0.1111,
      "step": 38720
    },
    {
      "epoch": 0.06339885967151732,
      "grad_norm": 11.394267082214355,
      "learning_rate": 9.93825899593445e-06,
      "loss": 0.0941,
      "step": 38740
    },
    {
      "epoch": 0.06343159011017066,
      "grad_norm": 2.301361083984375,
      "learning_rate": 9.938193103720934e-06,
      "loss": 0.0982,
      "step": 38760
    },
    {
      "epoch": 0.06346432054882399,
      "grad_norm": 8.764338493347168,
      "learning_rate": 9.938127211507416e-06,
      "loss": 0.1038,
      "step": 38780
    },
    {
      "epoch": 0.06349705098747734,
      "grad_norm": 6.592196464538574,
      "learning_rate": 9.9380613192939e-06,
      "loss": 0.1119,
      "step": 38800
    },
    {
      "epoch": 0.06352978142613068,
      "grad_norm": 6.366598606109619,
      "learning_rate": 9.937995427080382e-06,
      "loss": 0.1172,
      "step": 38820
    },
    {
      "epoch": 0.06356251186478401,
      "grad_norm": 1.0501781702041626,
      "learning_rate": 9.937929534866865e-06,
      "loss": 0.0992,
      "step": 38840
    },
    {
      "epoch": 0.06359524230343735,
      "grad_norm": 8.827820777893066,
      "learning_rate": 9.937863642653349e-06,
      "loss": 0.0813,
      "step": 38860
    },
    {
      "epoch": 0.06362797274209069,
      "grad_norm": 5.229310989379883,
      "learning_rate": 9.93779775043983e-06,
      "loss": 0.0915,
      "step": 38880
    },
    {
      "epoch": 0.06366070318074403,
      "grad_norm": 3.970607042312622,
      "learning_rate": 9.937731858226314e-06,
      "loss": 0.1195,
      "step": 38900
    },
    {
      "epoch": 0.06369343361939736,
      "grad_norm": 14.272705078125,
      "learning_rate": 9.937665966012798e-06,
      "loss": 0.1211,
      "step": 38920
    },
    {
      "epoch": 0.0637261640580507,
      "grad_norm": 3.9214043617248535,
      "learning_rate": 9.93760007379928e-06,
      "loss": 0.0963,
      "step": 38940
    },
    {
      "epoch": 0.06375889449670405,
      "grad_norm": 1.3846356868743896,
      "learning_rate": 9.937534181585763e-06,
      "loss": 0.1148,
      "step": 38960
    },
    {
      "epoch": 0.06379162493535738,
      "grad_norm": 5.85418176651001,
      "learning_rate": 9.937468289372245e-06,
      "loss": 0.1042,
      "step": 38980
    },
    {
      "epoch": 0.06382435537401072,
      "grad_norm": 4.8306355476379395,
      "learning_rate": 9.937402397158729e-06,
      "loss": 0.0954,
      "step": 39000
    },
    {
      "epoch": 0.06385708581266406,
      "grad_norm": 2.8175721168518066,
      "learning_rate": 9.937336504945213e-06,
      "loss": 0.1011,
      "step": 39020
    },
    {
      "epoch": 0.0638898162513174,
      "grad_norm": 8.251764297485352,
      "learning_rate": 9.937270612731694e-06,
      "loss": 0.0976,
      "step": 39040
    },
    {
      "epoch": 0.06392254668997074,
      "grad_norm": 2.948564291000366,
      "learning_rate": 9.937204720518178e-06,
      "loss": 0.0964,
      "step": 39060
    },
    {
      "epoch": 0.06395527712862407,
      "grad_norm": 2.252868890762329,
      "learning_rate": 9.93713882830466e-06,
      "loss": 0.1116,
      "step": 39080
    },
    {
      "epoch": 0.06398800756727742,
      "grad_norm": 6.526454925537109,
      "learning_rate": 9.937072936091143e-06,
      "loss": 0.0878,
      "step": 39100
    },
    {
      "epoch": 0.06402073800593075,
      "grad_norm": 3.4033944606781006,
      "learning_rate": 9.937007043877625e-06,
      "loss": 0.106,
      "step": 39120
    },
    {
      "epoch": 0.0640534684445841,
      "grad_norm": 6.066571235656738,
      "learning_rate": 9.936941151664109e-06,
      "loss": 0.1167,
      "step": 39140
    },
    {
      "epoch": 0.06408619888323744,
      "grad_norm": 6.6484270095825195,
      "learning_rate": 9.936875259450591e-06,
      "loss": 0.1044,
      "step": 39160
    },
    {
      "epoch": 0.06411892932189077,
      "grad_norm": 4.557886123657227,
      "learning_rate": 9.936809367237074e-06,
      "loss": 0.1021,
      "step": 39180
    },
    {
      "epoch": 0.06415165976054411,
      "grad_norm": 4.849204063415527,
      "learning_rate": 9.936743475023556e-06,
      "loss": 0.1061,
      "step": 39200
    },
    {
      "epoch": 0.06418439019919744,
      "grad_norm": 4.829512596130371,
      "learning_rate": 9.93667758281004e-06,
      "loss": 0.0977,
      "step": 39220
    },
    {
      "epoch": 0.06421712063785079,
      "grad_norm": 1.389768362045288,
      "learning_rate": 9.936611690596522e-06,
      "loss": 0.0931,
      "step": 39240
    },
    {
      "epoch": 0.06424985107650413,
      "grad_norm": 6.008662700653076,
      "learning_rate": 9.936545798383005e-06,
      "loss": 0.1149,
      "step": 39260
    },
    {
      "epoch": 0.06428258151515746,
      "grad_norm": 4.862889289855957,
      "learning_rate": 9.936479906169489e-06,
      "loss": 0.0924,
      "step": 39280
    },
    {
      "epoch": 0.06431531195381081,
      "grad_norm": 14.403728485107422,
      "learning_rate": 9.936414013955971e-06,
      "loss": 0.1188,
      "step": 39300
    },
    {
      "epoch": 0.06434804239246414,
      "grad_norm": 5.369787216186523,
      "learning_rate": 9.936348121742454e-06,
      "loss": 0.0776,
      "step": 39320
    },
    {
      "epoch": 0.06438077283111748,
      "grad_norm": 7.057946681976318,
      "learning_rate": 9.936282229528938e-06,
      "loss": 0.0824,
      "step": 39340
    },
    {
      "epoch": 0.06441350326977083,
      "grad_norm": 3.0976674556732178,
      "learning_rate": 9.93621633731542e-06,
      "loss": 0.0893,
      "step": 39360
    },
    {
      "epoch": 0.06444623370842416,
      "grad_norm": 7.71809720993042,
      "learning_rate": 9.936150445101904e-06,
      "loss": 0.0757,
      "step": 39380
    },
    {
      "epoch": 0.0644789641470775,
      "grad_norm": 3.739114761352539,
      "learning_rate": 9.936084552888387e-06,
      "loss": 0.0962,
      "step": 39400
    },
    {
      "epoch": 0.06451169458573083,
      "grad_norm": 5.643762588500977,
      "learning_rate": 9.936018660674869e-06,
      "loss": 0.076,
      "step": 39420
    },
    {
      "epoch": 0.06454442502438418,
      "grad_norm": 5.981598377227783,
      "learning_rate": 9.935952768461353e-06,
      "loss": 0.1018,
      "step": 39440
    },
    {
      "epoch": 0.06457715546303752,
      "grad_norm": 3.8568217754364014,
      "learning_rate": 9.935886876247834e-06,
      "loss": 0.119,
      "step": 39460
    },
    {
      "epoch": 0.06460988590169085,
      "grad_norm": 3.068298578262329,
      "learning_rate": 9.935820984034318e-06,
      "loss": 0.0978,
      "step": 39480
    },
    {
      "epoch": 0.0646426163403442,
      "grad_norm": 6.23139762878418,
      "learning_rate": 9.9357550918208e-06,
      "loss": 0.1055,
      "step": 39500
    },
    {
      "epoch": 0.06467534677899753,
      "grad_norm": 4.0181732177734375,
      "learning_rate": 9.935689199607284e-06,
      "loss": 0.1024,
      "step": 39520
    },
    {
      "epoch": 0.06470807721765087,
      "grad_norm": 4.423268795013428,
      "learning_rate": 9.935623307393765e-06,
      "loss": 0.0893,
      "step": 39540
    },
    {
      "epoch": 0.0647408076563042,
      "grad_norm": 13.088993072509766,
      "learning_rate": 9.935557415180249e-06,
      "loss": 0.1278,
      "step": 39560
    },
    {
      "epoch": 0.06477353809495755,
      "grad_norm": 2.414902448654175,
      "learning_rate": 9.935491522966731e-06,
      "loss": 0.0795,
      "step": 39580
    },
    {
      "epoch": 0.06480626853361089,
      "grad_norm": 6.118940353393555,
      "learning_rate": 9.935425630753215e-06,
      "loss": 0.0977,
      "step": 39600
    },
    {
      "epoch": 0.06483899897226422,
      "grad_norm": 24.471057891845703,
      "learning_rate": 9.935359738539696e-06,
      "loss": 0.0877,
      "step": 39620
    },
    {
      "epoch": 0.06487172941091757,
      "grad_norm": 3.6266300678253174,
      "learning_rate": 9.93529384632618e-06,
      "loss": 0.0917,
      "step": 39640
    },
    {
      "epoch": 0.0649044598495709,
      "grad_norm": 5.932285308837891,
      "learning_rate": 9.935227954112664e-06,
      "loss": 0.0921,
      "step": 39660
    },
    {
      "epoch": 0.06493719028822424,
      "grad_norm": 4.651463508605957,
      "learning_rate": 9.935162061899145e-06,
      "loss": 0.0907,
      "step": 39680
    },
    {
      "epoch": 0.06496992072687759,
      "grad_norm": 4.450772762298584,
      "learning_rate": 9.935096169685629e-06,
      "loss": 0.1199,
      "step": 39700
    },
    {
      "epoch": 0.06500265116553092,
      "grad_norm": 6.1449151039123535,
      "learning_rate": 9.935030277472113e-06,
      "loss": 0.1037,
      "step": 39720
    },
    {
      "epoch": 0.06503538160418426,
      "grad_norm": 4.343564987182617,
      "learning_rate": 9.934964385258595e-06,
      "loss": 0.1182,
      "step": 39740
    },
    {
      "epoch": 0.06506811204283759,
      "grad_norm": 10.228547096252441,
      "learning_rate": 9.934898493045078e-06,
      "loss": 0.0878,
      "step": 39760
    },
    {
      "epoch": 0.06510084248149094,
      "grad_norm": 4.934319496154785,
      "learning_rate": 9.934832600831562e-06,
      "loss": 0.0909,
      "step": 39780
    },
    {
      "epoch": 0.06513357292014428,
      "grad_norm": 6.206862449645996,
      "learning_rate": 9.934766708618044e-06,
      "loss": 0.0947,
      "step": 39800
    },
    {
      "epoch": 0.06516630335879761,
      "grad_norm": 2.6883931159973145,
      "learning_rate": 9.934700816404527e-06,
      "loss": 0.1179,
      "step": 39820
    },
    {
      "epoch": 0.06519903379745096,
      "grad_norm": 5.63271427154541,
      "learning_rate": 9.934634924191009e-06,
      "loss": 0.1053,
      "step": 39840
    },
    {
      "epoch": 0.06523176423610429,
      "grad_norm": 3.987264633178711,
      "learning_rate": 9.934569031977493e-06,
      "loss": 0.0938,
      "step": 39860
    },
    {
      "epoch": 0.06526449467475763,
      "grad_norm": 5.515331745147705,
      "learning_rate": 9.934503139763975e-06,
      "loss": 0.1019,
      "step": 39880
    },
    {
      "epoch": 0.06529722511341098,
      "grad_norm": 10.631836891174316,
      "learning_rate": 9.934437247550458e-06,
      "loss": 0.1152,
      "step": 39900
    },
    {
      "epoch": 0.0653299555520643,
      "grad_norm": 4.485385417938232,
      "learning_rate": 9.93437135533694e-06,
      "loss": 0.0896,
      "step": 39920
    },
    {
      "epoch": 0.06536268599071765,
      "grad_norm": 5.794275283813477,
      "learning_rate": 9.934305463123424e-06,
      "loss": 0.0852,
      "step": 39940
    },
    {
      "epoch": 0.06539541642937098,
      "grad_norm": 6.170813083648682,
      "learning_rate": 9.934239570909906e-06,
      "loss": 0.1007,
      "step": 39960
    },
    {
      "epoch": 0.06542814686802433,
      "grad_norm": 5.054750442504883,
      "learning_rate": 9.934173678696389e-06,
      "loss": 0.104,
      "step": 39980
    },
    {
      "epoch": 0.06546087730667767,
      "grad_norm": 5.236850738525391,
      "learning_rate": 9.934107786482871e-06,
      "loss": 0.0967,
      "step": 40000
    },
    {
      "epoch": 0.065493607745331,
      "grad_norm": 2.9128289222717285,
      "learning_rate": 9.934041894269355e-06,
      "loss": 0.085,
      "step": 40020
    },
    {
      "epoch": 0.06552633818398435,
      "grad_norm": 3.7967989444732666,
      "learning_rate": 9.933976002055838e-06,
      "loss": 0.1269,
      "step": 40040
    },
    {
      "epoch": 0.06555906862263768,
      "grad_norm": 8.662817001342773,
      "learning_rate": 9.93391010984232e-06,
      "loss": 0.0881,
      "step": 40060
    },
    {
      "epoch": 0.06559179906129102,
      "grad_norm": 9.81338119506836,
      "learning_rate": 9.933844217628804e-06,
      "loss": 0.1152,
      "step": 40080
    },
    {
      "epoch": 0.06562452949994436,
      "grad_norm": 4.248226165771484,
      "learning_rate": 9.933778325415287e-06,
      "loss": 0.0897,
      "step": 40100
    },
    {
      "epoch": 0.0656572599385977,
      "grad_norm": 6.619384765625,
      "learning_rate": 9.933712433201769e-06,
      "loss": 0.1009,
      "step": 40120
    },
    {
      "epoch": 0.06568999037725104,
      "grad_norm": 1.884588360786438,
      "learning_rate": 9.933646540988253e-06,
      "loss": 0.1147,
      "step": 40140
    },
    {
      "epoch": 0.06572272081590437,
      "grad_norm": 7.71109676361084,
      "learning_rate": 9.933580648774736e-06,
      "loss": 0.1018,
      "step": 40160
    },
    {
      "epoch": 0.06575545125455771,
      "grad_norm": 4.879652976989746,
      "learning_rate": 9.933514756561218e-06,
      "loss": 0.1104,
      "step": 40180
    },
    {
      "epoch": 0.06578818169321106,
      "grad_norm": 5.413896560668945,
      "learning_rate": 9.933448864347702e-06,
      "loss": 0.1154,
      "step": 40200
    },
    {
      "epoch": 0.06582091213186439,
      "grad_norm": 4.263503551483154,
      "learning_rate": 9.933382972134184e-06,
      "loss": 0.088,
      "step": 40220
    },
    {
      "epoch": 0.06585364257051773,
      "grad_norm": 5.603999137878418,
      "learning_rate": 9.933317079920667e-06,
      "loss": 0.0871,
      "step": 40240
    },
    {
      "epoch": 0.06588637300917106,
      "grad_norm": 2.2454843521118164,
      "learning_rate": 9.93325118770715e-06,
      "loss": 0.0968,
      "step": 40260
    },
    {
      "epoch": 0.06591910344782441,
      "grad_norm": 4.239742755889893,
      "learning_rate": 9.933185295493633e-06,
      "loss": 0.0973,
      "step": 40280
    },
    {
      "epoch": 0.06595183388647774,
      "grad_norm": 2.951982259750366,
      "learning_rate": 9.933119403280115e-06,
      "loss": 0.0986,
      "step": 40300
    },
    {
      "epoch": 0.06598456432513108,
      "grad_norm": 2.5149495601654053,
      "learning_rate": 9.933053511066598e-06,
      "loss": 0.0777,
      "step": 40320
    },
    {
      "epoch": 0.06601729476378443,
      "grad_norm": 5.350897789001465,
      "learning_rate": 9.93298761885308e-06,
      "loss": 0.0976,
      "step": 40340
    },
    {
      "epoch": 0.06605002520243776,
      "grad_norm": 3.3397083282470703,
      "learning_rate": 9.932921726639564e-06,
      "loss": 0.125,
      "step": 40360
    },
    {
      "epoch": 0.0660827556410911,
      "grad_norm": 5.857470512390137,
      "learning_rate": 9.932855834426046e-06,
      "loss": 0.1159,
      "step": 40380
    },
    {
      "epoch": 0.06611548607974443,
      "grad_norm": 5.712958335876465,
      "learning_rate": 9.93278994221253e-06,
      "loss": 0.093,
      "step": 40400
    },
    {
      "epoch": 0.06614821651839778,
      "grad_norm": 1.2222484350204468,
      "learning_rate": 9.932724049999013e-06,
      "loss": 0.1171,
      "step": 40420
    },
    {
      "epoch": 0.06618094695705112,
      "grad_norm": 2.1386375427246094,
      "learning_rate": 9.932658157785495e-06,
      "loss": 0.1006,
      "step": 40440
    },
    {
      "epoch": 0.06621367739570445,
      "grad_norm": 5.673220634460449,
      "learning_rate": 9.932592265571978e-06,
      "loss": 0.1118,
      "step": 40460
    },
    {
      "epoch": 0.0662464078343578,
      "grad_norm": 3.9851717948913574,
      "learning_rate": 9.93252637335846e-06,
      "loss": 0.0879,
      "step": 40480
    },
    {
      "epoch": 0.06627913827301113,
      "grad_norm": 16.176631927490234,
      "learning_rate": 9.932460481144944e-06,
      "loss": 0.0928,
      "step": 40500
    },
    {
      "epoch": 0.06631186871166447,
      "grad_norm": 7.838040828704834,
      "learning_rate": 9.932394588931427e-06,
      "loss": 0.1166,
      "step": 40520
    },
    {
      "epoch": 0.06634459915031782,
      "grad_norm": 8.538603782653809,
      "learning_rate": 9.93232869671791e-06,
      "loss": 0.1161,
      "step": 40540
    },
    {
      "epoch": 0.06637732958897115,
      "grad_norm": 3.8511431217193604,
      "learning_rate": 9.932262804504393e-06,
      "loss": 0.0769,
      "step": 40560
    },
    {
      "epoch": 0.06641006002762449,
      "grad_norm": 4.958973407745361,
      "learning_rate": 9.932196912290876e-06,
      "loss": 0.1063,
      "step": 40580
    },
    {
      "epoch": 0.06644279046627782,
      "grad_norm": 12.664162635803223,
      "learning_rate": 9.932131020077358e-06,
      "loss": 0.1177,
      "step": 40600
    },
    {
      "epoch": 0.06647552090493117,
      "grad_norm": 1.1511800289154053,
      "learning_rate": 9.932065127863842e-06,
      "loss": 0.0983,
      "step": 40620
    },
    {
      "epoch": 0.06650825134358451,
      "grad_norm": 4.814385890960693,
      "learning_rate": 9.931999235650324e-06,
      "loss": 0.0904,
      "step": 40640
    },
    {
      "epoch": 0.06654098178223784,
      "grad_norm": 2.983950614929199,
      "learning_rate": 9.931933343436807e-06,
      "loss": 0.0971,
      "step": 40660
    },
    {
      "epoch": 0.06657371222089119,
      "grad_norm": 6.533325672149658,
      "learning_rate": 9.93186745122329e-06,
      "loss": 0.1039,
      "step": 40680
    },
    {
      "epoch": 0.06660644265954452,
      "grad_norm": 7.644360065460205,
      "learning_rate": 9.931801559009773e-06,
      "loss": 0.0954,
      "step": 40700
    },
    {
      "epoch": 0.06663917309819786,
      "grad_norm": 5.346760272979736,
      "learning_rate": 9.931735666796255e-06,
      "loss": 0.1126,
      "step": 40720
    },
    {
      "epoch": 0.06667190353685121,
      "grad_norm": 4.0857954025268555,
      "learning_rate": 9.931669774582738e-06,
      "loss": 0.0792,
      "step": 40740
    },
    {
      "epoch": 0.06670463397550454,
      "grad_norm": 3.5398457050323486,
      "learning_rate": 9.931603882369222e-06,
      "loss": 0.0856,
      "step": 40760
    },
    {
      "epoch": 0.06673736441415788,
      "grad_norm": 4.060746192932129,
      "learning_rate": 9.931537990155704e-06,
      "loss": 0.107,
      "step": 40780
    },
    {
      "epoch": 0.06677009485281121,
      "grad_norm": 3.841949462890625,
      "learning_rate": 9.931472097942187e-06,
      "loss": 0.1126,
      "step": 40800
    },
    {
      "epoch": 0.06680282529146456,
      "grad_norm": 2.8018505573272705,
      "learning_rate": 9.93140620572867e-06,
      "loss": 0.1067,
      "step": 40820
    },
    {
      "epoch": 0.0668355557301179,
      "grad_norm": 3.753843069076538,
      "learning_rate": 9.931340313515153e-06,
      "loss": 0.1043,
      "step": 40840
    },
    {
      "epoch": 0.06686828616877123,
      "grad_norm": 3.1117656230926514,
      "learning_rate": 9.931274421301635e-06,
      "loss": 0.1029,
      "step": 40860
    },
    {
      "epoch": 0.06690101660742458,
      "grad_norm": 94.65963745117188,
      "learning_rate": 9.931208529088118e-06,
      "loss": 0.0978,
      "step": 40880
    },
    {
      "epoch": 0.06693374704607791,
      "grad_norm": 1.3858933448791504,
      "learning_rate": 9.931142636874602e-06,
      "loss": 0.0767,
      "step": 40900
    },
    {
      "epoch": 0.06696647748473125,
      "grad_norm": 10.09792709350586,
      "learning_rate": 9.931076744661084e-06,
      "loss": 0.096,
      "step": 40920
    },
    {
      "epoch": 0.0669992079233846,
      "grad_norm": 1.1691861152648926,
      "learning_rate": 9.931010852447567e-06,
      "loss": 0.0869,
      "step": 40940
    },
    {
      "epoch": 0.06703193836203793,
      "grad_norm": 3.525404453277588,
      "learning_rate": 9.930944960234051e-06,
      "loss": 0.0951,
      "step": 40960
    },
    {
      "epoch": 0.06706466880069127,
      "grad_norm": 6.061956405639648,
      "learning_rate": 9.930879068020533e-06,
      "loss": 0.1141,
      "step": 40980
    },
    {
      "epoch": 0.0670973992393446,
      "grad_norm": 5.581182956695557,
      "learning_rate": 9.930813175807016e-06,
      "loss": 0.1022,
      "step": 41000
    },
    {
      "epoch": 0.06713012967799795,
      "grad_norm": 4.841171741485596,
      "learning_rate": 9.930747283593498e-06,
      "loss": 0.0932,
      "step": 41020
    },
    {
      "epoch": 0.06716286011665128,
      "grad_norm": 3.228487253189087,
      "learning_rate": 9.930681391379982e-06,
      "loss": 0.1146,
      "step": 41040
    },
    {
      "epoch": 0.06719559055530462,
      "grad_norm": 4.779016494750977,
      "learning_rate": 9.930615499166464e-06,
      "loss": 0.1014,
      "step": 41060
    },
    {
      "epoch": 0.06722832099395797,
      "grad_norm": 8.248217582702637,
      "learning_rate": 9.930549606952947e-06,
      "loss": 0.0965,
      "step": 41080
    },
    {
      "epoch": 0.0672610514326113,
      "grad_norm": 7.751497268676758,
      "learning_rate": 9.93048371473943e-06,
      "loss": 0.1155,
      "step": 41100
    },
    {
      "epoch": 0.06729378187126464,
      "grad_norm": 2.0205211639404297,
      "learning_rate": 9.930417822525913e-06,
      "loss": 0.1203,
      "step": 41120
    },
    {
      "epoch": 0.06732651230991797,
      "grad_norm": 3.2154977321624756,
      "learning_rate": 9.930351930312396e-06,
      "loss": 0.1031,
      "step": 41140
    },
    {
      "epoch": 0.06735924274857132,
      "grad_norm": 6.179682731628418,
      "learning_rate": 9.930286038098878e-06,
      "loss": 0.1149,
      "step": 41160
    },
    {
      "epoch": 0.06739197318722466,
      "grad_norm": 3.9372479915618896,
      "learning_rate": 9.930220145885362e-06,
      "loss": 0.0894,
      "step": 41180
    },
    {
      "epoch": 0.06742470362587799,
      "grad_norm": 88.69194793701172,
      "learning_rate": 9.930154253671844e-06,
      "loss": 0.1065,
      "step": 41200
    },
    {
      "epoch": 0.06745743406453134,
      "grad_norm": 6.362655162811279,
      "learning_rate": 9.930088361458327e-06,
      "loss": 0.0773,
      "step": 41220
    },
    {
      "epoch": 0.06749016450318467,
      "grad_norm": 3.4691901206970215,
      "learning_rate": 9.93002246924481e-06,
      "loss": 0.1,
      "step": 41240
    },
    {
      "epoch": 0.06752289494183801,
      "grad_norm": 4.107913017272949,
      "learning_rate": 9.929956577031293e-06,
      "loss": 0.0907,
      "step": 41260
    },
    {
      "epoch": 0.06755562538049135,
      "grad_norm": 6.150206565856934,
      "learning_rate": 9.929890684817775e-06,
      "loss": 0.0952,
      "step": 41280
    },
    {
      "epoch": 0.06758835581914469,
      "grad_norm": 5.076859474182129,
      "learning_rate": 9.929824792604258e-06,
      "loss": 0.0737,
      "step": 41300
    },
    {
      "epoch": 0.06762108625779803,
      "grad_norm": 5.4250688552856445,
      "learning_rate": 9.929758900390742e-06,
      "loss": 0.0965,
      "step": 41320
    },
    {
      "epoch": 0.06765381669645136,
      "grad_norm": 10.381474494934082,
      "learning_rate": 9.929693008177224e-06,
      "loss": 0.1113,
      "step": 41340
    },
    {
      "epoch": 0.0676865471351047,
      "grad_norm": 6.968786716461182,
      "learning_rate": 9.929627115963707e-06,
      "loss": 0.1173,
      "step": 41360
    },
    {
      "epoch": 0.06771927757375805,
      "grad_norm": 2.2973268032073975,
      "learning_rate": 9.929561223750191e-06,
      "loss": 0.0987,
      "step": 41380
    },
    {
      "epoch": 0.06775200801241138,
      "grad_norm": 7.164764404296875,
      "learning_rate": 9.929495331536673e-06,
      "loss": 0.0868,
      "step": 41400
    },
    {
      "epoch": 0.06778473845106472,
      "grad_norm": 2.8188412189483643,
      "learning_rate": 9.929429439323157e-06,
      "loss": 0.0927,
      "step": 41420
    },
    {
      "epoch": 0.06781746888971805,
      "grad_norm": 9.849783897399902,
      "learning_rate": 9.929363547109638e-06,
      "loss": 0.103,
      "step": 41440
    },
    {
      "epoch": 0.0678501993283714,
      "grad_norm": 6.0471038818359375,
      "learning_rate": 9.929297654896122e-06,
      "loss": 0.0906,
      "step": 41460
    },
    {
      "epoch": 0.06788292976702474,
      "grad_norm": 1.4647291898727417,
      "learning_rate": 9.929231762682606e-06,
      "loss": 0.0964,
      "step": 41480
    },
    {
      "epoch": 0.06791566020567807,
      "grad_norm": 12.410307884216309,
      "learning_rate": 9.929165870469087e-06,
      "loss": 0.1015,
      "step": 41500
    },
    {
      "epoch": 0.06794839064433142,
      "grad_norm": 4.577717304229736,
      "learning_rate": 9.929099978255571e-06,
      "loss": 0.1207,
      "step": 41520
    },
    {
      "epoch": 0.06798112108298475,
      "grad_norm": 6.157159805297852,
      "learning_rate": 9.929034086042053e-06,
      "loss": 0.0943,
      "step": 41540
    },
    {
      "epoch": 0.0680138515216381,
      "grad_norm": 13.7354097366333,
      "learning_rate": 9.928968193828537e-06,
      "loss": 0.0943,
      "step": 41560
    },
    {
      "epoch": 0.06804658196029144,
      "grad_norm": 2.429076671600342,
      "learning_rate": 9.928902301615018e-06,
      "loss": 0.085,
      "step": 41580
    },
    {
      "epoch": 0.06807931239894477,
      "grad_norm": 4.167870044708252,
      "learning_rate": 9.928836409401502e-06,
      "loss": 0.0846,
      "step": 41600
    },
    {
      "epoch": 0.06811204283759811,
      "grad_norm": 6.502415657043457,
      "learning_rate": 9.928770517187984e-06,
      "loss": 0.1091,
      "step": 41620
    },
    {
      "epoch": 0.06814477327625144,
      "grad_norm": 4.577633380889893,
      "learning_rate": 9.928704624974468e-06,
      "loss": 0.0873,
      "step": 41640
    },
    {
      "epoch": 0.06817750371490479,
      "grad_norm": 3.2978672981262207,
      "learning_rate": 9.92863873276095e-06,
      "loss": 0.0989,
      "step": 41660
    },
    {
      "epoch": 0.06821023415355812,
      "grad_norm": 3.2125425338745117,
      "learning_rate": 9.928572840547433e-06,
      "loss": 0.0956,
      "step": 41680
    },
    {
      "epoch": 0.06824296459221146,
      "grad_norm": 4.932881832122803,
      "learning_rate": 9.928506948333917e-06,
      "loss": 0.0903,
      "step": 41700
    },
    {
      "epoch": 0.06827569503086481,
      "grad_norm": 3.286156415939331,
      "learning_rate": 9.928441056120398e-06,
      "loss": 0.1228,
      "step": 41720
    },
    {
      "epoch": 0.06830842546951814,
      "grad_norm": 6.650174140930176,
      "learning_rate": 9.928375163906882e-06,
      "loss": 0.0767,
      "step": 41740
    },
    {
      "epoch": 0.06834115590817148,
      "grad_norm": 4.55600118637085,
      "learning_rate": 9.928309271693366e-06,
      "loss": 0.0868,
      "step": 41760
    },
    {
      "epoch": 0.06837388634682481,
      "grad_norm": 4.339752197265625,
      "learning_rate": 9.928243379479848e-06,
      "loss": 0.0718,
      "step": 41780
    },
    {
      "epoch": 0.06840661678547816,
      "grad_norm": 9.538969039916992,
      "learning_rate": 9.928177487266331e-06,
      "loss": 0.102,
      "step": 41800
    },
    {
      "epoch": 0.0684393472241315,
      "grad_norm": 3.5703489780426025,
      "learning_rate": 9.928111595052815e-06,
      "loss": 0.1118,
      "step": 41820
    },
    {
      "epoch": 0.06847207766278483,
      "grad_norm": 7.5172929763793945,
      "learning_rate": 9.928045702839297e-06,
      "loss": 0.096,
      "step": 41840
    },
    {
      "epoch": 0.06850480810143818,
      "grad_norm": 4.8700032234191895,
      "learning_rate": 9.92797981062578e-06,
      "loss": 0.1085,
      "step": 41860
    },
    {
      "epoch": 0.06853753854009151,
      "grad_norm": 2.9141111373901367,
      "learning_rate": 9.927913918412262e-06,
      "loss": 0.0935,
      "step": 41880
    },
    {
      "epoch": 0.06857026897874485,
      "grad_norm": 2.44562029838562,
      "learning_rate": 9.927848026198746e-06,
      "loss": 0.1125,
      "step": 41900
    },
    {
      "epoch": 0.0686029994173982,
      "grad_norm": 6.164236545562744,
      "learning_rate": 9.927782133985228e-06,
      "loss": 0.1067,
      "step": 41920
    },
    {
      "epoch": 0.06863572985605153,
      "grad_norm": 1.7560150623321533,
      "learning_rate": 9.927716241771711e-06,
      "loss": 0.0882,
      "step": 41940
    },
    {
      "epoch": 0.06866846029470487,
      "grad_norm": 6.133374214172363,
      "learning_rate": 9.927650349558193e-06,
      "loss": 0.1068,
      "step": 41960
    },
    {
      "epoch": 0.0687011907333582,
      "grad_norm": 2.6571593284606934,
      "learning_rate": 9.927584457344677e-06,
      "loss": 0.0958,
      "step": 41980
    },
    {
      "epoch": 0.06873392117201155,
      "grad_norm": 12.282913208007812,
      "learning_rate": 9.927518565131159e-06,
      "loss": 0.1052,
      "step": 42000
    },
    {
      "epoch": 0.06876665161066489,
      "grad_norm": 4.420263290405273,
      "learning_rate": 9.927452672917642e-06,
      "loss": 0.1117,
      "step": 42020
    },
    {
      "epoch": 0.06879938204931822,
      "grad_norm": 4.85689640045166,
      "learning_rate": 9.927386780704124e-06,
      "loss": 0.1081,
      "step": 42040
    },
    {
      "epoch": 0.06883211248797157,
      "grad_norm": 4.014797210693359,
      "learning_rate": 9.927320888490608e-06,
      "loss": 0.1023,
      "step": 42060
    },
    {
      "epoch": 0.0688648429266249,
      "grad_norm": 4.873965740203857,
      "learning_rate": 9.92725499627709e-06,
      "loss": 0.1036,
      "step": 42080
    },
    {
      "epoch": 0.06889757336527824,
      "grad_norm": 4.53603982925415,
      "learning_rate": 9.927189104063573e-06,
      "loss": 0.0941,
      "step": 42100
    },
    {
      "epoch": 0.06893030380393159,
      "grad_norm": 5.905652046203613,
      "learning_rate": 9.927123211850057e-06,
      "loss": 0.0954,
      "step": 42120
    },
    {
      "epoch": 0.06896303424258492,
      "grad_norm": 3.676973819732666,
      "learning_rate": 9.927057319636539e-06,
      "loss": 0.0959,
      "step": 42140
    },
    {
      "epoch": 0.06899576468123826,
      "grad_norm": 4.714684963226318,
      "learning_rate": 9.926991427423022e-06,
      "loss": 0.0919,
      "step": 42160
    },
    {
      "epoch": 0.06902849511989159,
      "grad_norm": 6.953214645385742,
      "learning_rate": 9.926925535209506e-06,
      "loss": 0.0896,
      "step": 42180
    },
    {
      "epoch": 0.06906122555854494,
      "grad_norm": 2.631845235824585,
      "learning_rate": 9.92685964299599e-06,
      "loss": 0.0962,
      "step": 42200
    },
    {
      "epoch": 0.06909395599719828,
      "grad_norm": 2.8655760288238525,
      "learning_rate": 9.926793750782471e-06,
      "loss": 0.0852,
      "step": 42220
    },
    {
      "epoch": 0.06912668643585161,
      "grad_norm": 2.207674026489258,
      "learning_rate": 9.926727858568955e-06,
      "loss": 0.0874,
      "step": 42240
    },
    {
      "epoch": 0.06915941687450496,
      "grad_norm": 2.7808239459991455,
      "learning_rate": 9.926661966355437e-06,
      "loss": 0.1262,
      "step": 42260
    },
    {
      "epoch": 0.06919214731315829,
      "grad_norm": 5.421111583709717,
      "learning_rate": 9.92659607414192e-06,
      "loss": 0.0974,
      "step": 42280
    },
    {
      "epoch": 0.06922487775181163,
      "grad_norm": 2.5586323738098145,
      "learning_rate": 9.926530181928402e-06,
      "loss": 0.1025,
      "step": 42300
    },
    {
      "epoch": 0.06925760819046498,
      "grad_norm": 5.117640018463135,
      "learning_rate": 9.926464289714886e-06,
      "loss": 0.1187,
      "step": 42320
    },
    {
      "epoch": 0.0692903386291183,
      "grad_norm": 2.4969935417175293,
      "learning_rate": 9.926398397501368e-06,
      "loss": 0.0968,
      "step": 42340
    },
    {
      "epoch": 0.06932306906777165,
      "grad_norm": 6.576619625091553,
      "learning_rate": 9.926332505287851e-06,
      "loss": 0.0992,
      "step": 42360
    },
    {
      "epoch": 0.06935579950642498,
      "grad_norm": 14.71823501586914,
      "learning_rate": 9.926266613074333e-06,
      "loss": 0.0958,
      "step": 42380
    },
    {
      "epoch": 0.06938852994507833,
      "grad_norm": 5.5398173332214355,
      "learning_rate": 9.926200720860817e-06,
      "loss": 0.0972,
      "step": 42400
    },
    {
      "epoch": 0.06942126038373166,
      "grad_norm": 5.419635772705078,
      "learning_rate": 9.926134828647299e-06,
      "loss": 0.0947,
      "step": 42420
    },
    {
      "epoch": 0.069453990822385,
      "grad_norm": 1.9597930908203125,
      "learning_rate": 9.926068936433782e-06,
      "loss": 0.1034,
      "step": 42440
    },
    {
      "epoch": 0.06948672126103835,
      "grad_norm": 4.260391712188721,
      "learning_rate": 9.926003044220264e-06,
      "loss": 0.0936,
      "step": 42460
    },
    {
      "epoch": 0.06951945169969168,
      "grad_norm": 2.900416612625122,
      "learning_rate": 9.925937152006748e-06,
      "loss": 0.1097,
      "step": 42480
    },
    {
      "epoch": 0.06955218213834502,
      "grad_norm": 8.867387771606445,
      "learning_rate": 9.925871259793231e-06,
      "loss": 0.0825,
      "step": 42500
    },
    {
      "epoch": 0.06958491257699835,
      "grad_norm": 3.961594581604004,
      "learning_rate": 9.925805367579713e-06,
      "loss": 0.1045,
      "step": 42520
    },
    {
      "epoch": 0.0696176430156517,
      "grad_norm": 10.047128677368164,
      "learning_rate": 9.925739475366197e-06,
      "loss": 0.1307,
      "step": 42540
    },
    {
      "epoch": 0.06965037345430504,
      "grad_norm": 4.114616870880127,
      "learning_rate": 9.92567358315268e-06,
      "loss": 0.0934,
      "step": 42560
    },
    {
      "epoch": 0.06968310389295837,
      "grad_norm": 7.263952732086182,
      "learning_rate": 9.925607690939162e-06,
      "loss": 0.0979,
      "step": 42580
    },
    {
      "epoch": 0.06971583433161171,
      "grad_norm": 6.775822639465332,
      "learning_rate": 9.925541798725646e-06,
      "loss": 0.0975,
      "step": 42600
    },
    {
      "epoch": 0.06974856477026505,
      "grad_norm": 5.524730682373047,
      "learning_rate": 9.92547590651213e-06,
      "loss": 0.0844,
      "step": 42620
    },
    {
      "epoch": 0.06978129520891839,
      "grad_norm": 5.232898712158203,
      "learning_rate": 9.925410014298611e-06,
      "loss": 0.1049,
      "step": 42640
    },
    {
      "epoch": 0.06981402564757173,
      "grad_norm": 2.293994903564453,
      "learning_rate": 9.925344122085095e-06,
      "loss": 0.1129,
      "step": 42660
    },
    {
      "epoch": 0.06984675608622506,
      "grad_norm": 3.921370267868042,
      "learning_rate": 9.925278229871577e-06,
      "loss": 0.1073,
      "step": 42680
    },
    {
      "epoch": 0.06987948652487841,
      "grad_norm": 4.752767562866211,
      "learning_rate": 9.92521233765806e-06,
      "loss": 0.0796,
      "step": 42700
    },
    {
      "epoch": 0.06991221696353174,
      "grad_norm": 5.2145185470581055,
      "learning_rate": 9.925146445444542e-06,
      "loss": 0.1028,
      "step": 42720
    },
    {
      "epoch": 0.06994494740218508,
      "grad_norm": 8.169921875,
      "learning_rate": 9.925080553231026e-06,
      "loss": 0.0946,
      "step": 42740
    },
    {
      "epoch": 0.06997767784083843,
      "grad_norm": 3.123457908630371,
      "learning_rate": 9.925014661017508e-06,
      "loss": 0.0948,
      "step": 42760
    },
    {
      "epoch": 0.07001040827949176,
      "grad_norm": 4.377923488616943,
      "learning_rate": 9.924948768803991e-06,
      "loss": 0.0915,
      "step": 42780
    },
    {
      "epoch": 0.0700431387181451,
      "grad_norm": 8.337228775024414,
      "learning_rate": 9.924882876590473e-06,
      "loss": 0.0979,
      "step": 42800
    },
    {
      "epoch": 0.07007586915679843,
      "grad_norm": 1.4287320375442505,
      "learning_rate": 9.924816984376957e-06,
      "loss": 0.0896,
      "step": 42820
    },
    {
      "epoch": 0.07010859959545178,
      "grad_norm": 2.1418185234069824,
      "learning_rate": 9.924751092163439e-06,
      "loss": 0.1103,
      "step": 42840
    },
    {
      "epoch": 0.07014133003410512,
      "grad_norm": 2.798438549041748,
      "learning_rate": 9.924685199949922e-06,
      "loss": 0.0965,
      "step": 42860
    },
    {
      "epoch": 0.07017406047275845,
      "grad_norm": 0.8432579040527344,
      "learning_rate": 9.924619307736406e-06,
      "loss": 0.0618,
      "step": 42880
    },
    {
      "epoch": 0.0702067909114118,
      "grad_norm": 2.6993777751922607,
      "learning_rate": 9.924553415522888e-06,
      "loss": 0.0845,
      "step": 42900
    },
    {
      "epoch": 0.07023952135006513,
      "grad_norm": 5.485428333282471,
      "learning_rate": 9.924487523309371e-06,
      "loss": 0.099,
      "step": 42920
    },
    {
      "epoch": 0.07027225178871847,
      "grad_norm": 2.6847896575927734,
      "learning_rate": 9.924421631095855e-06,
      "loss": 0.1019,
      "step": 42940
    },
    {
      "epoch": 0.07030498222737182,
      "grad_norm": 3.660804510116577,
      "learning_rate": 9.924355738882337e-06,
      "loss": 0.0867,
      "step": 42960
    },
    {
      "epoch": 0.07033771266602515,
      "grad_norm": 9.846278190612793,
      "learning_rate": 9.92428984666882e-06,
      "loss": 0.12,
      "step": 42980
    },
    {
      "epoch": 0.07037044310467849,
      "grad_norm": 8.643726348876953,
      "learning_rate": 9.924223954455304e-06,
      "loss": 0.0992,
      "step": 43000
    },
    {
      "epoch": 0.07040317354333182,
      "grad_norm": 1.4998489618301392,
      "learning_rate": 9.924158062241786e-06,
      "loss": 0.0926,
      "step": 43020
    },
    {
      "epoch": 0.07043590398198517,
      "grad_norm": 3.8273019790649414,
      "learning_rate": 9.92409217002827e-06,
      "loss": 0.0996,
      "step": 43040
    },
    {
      "epoch": 0.0704686344206385,
      "grad_norm": 7.411912441253662,
      "learning_rate": 9.924026277814751e-06,
      "loss": 0.0936,
      "step": 43060
    },
    {
      "epoch": 0.07050136485929184,
      "grad_norm": 2.46480131149292,
      "learning_rate": 9.923960385601235e-06,
      "loss": 0.1267,
      "step": 43080
    },
    {
      "epoch": 0.07053409529794519,
      "grad_norm": 6.683405876159668,
      "learning_rate": 9.923894493387717e-06,
      "loss": 0.1041,
      "step": 43100
    },
    {
      "epoch": 0.07056682573659852,
      "grad_norm": 2.7206413745880127,
      "learning_rate": 9.9238286011742e-06,
      "loss": 0.1142,
      "step": 43120
    },
    {
      "epoch": 0.07059955617525186,
      "grad_norm": 4.369013786315918,
      "learning_rate": 9.923762708960682e-06,
      "loss": 0.1064,
      "step": 43140
    },
    {
      "epoch": 0.07063228661390519,
      "grad_norm": 36.0822639465332,
      "learning_rate": 9.923696816747166e-06,
      "loss": 0.1098,
      "step": 43160
    },
    {
      "epoch": 0.07066501705255854,
      "grad_norm": 10.32672119140625,
      "learning_rate": 9.923630924533648e-06,
      "loss": 0.1007,
      "step": 43180
    },
    {
      "epoch": 0.07069774749121188,
      "grad_norm": 1.7617146968841553,
      "learning_rate": 9.923565032320131e-06,
      "loss": 0.0808,
      "step": 43200
    },
    {
      "epoch": 0.07073047792986521,
      "grad_norm": 6.619287014007568,
      "learning_rate": 9.923499140106613e-06,
      "loss": 0.1029,
      "step": 43220
    },
    {
      "epoch": 0.07076320836851856,
      "grad_norm": 3.9565346240997314,
      "learning_rate": 9.923433247893097e-06,
      "loss": 0.1006,
      "step": 43240
    },
    {
      "epoch": 0.07079593880717189,
      "grad_norm": 4.449960231781006,
      "learning_rate": 9.92336735567958e-06,
      "loss": 0.0971,
      "step": 43260
    },
    {
      "epoch": 0.07082866924582523,
      "grad_norm": 0.9593714475631714,
      "learning_rate": 9.923301463466062e-06,
      "loss": 0.0872,
      "step": 43280
    },
    {
      "epoch": 0.07086139968447858,
      "grad_norm": 4.222991943359375,
      "learning_rate": 9.923235571252546e-06,
      "loss": 0.0804,
      "step": 43300
    },
    {
      "epoch": 0.07089413012313191,
      "grad_norm": 5.2111101150512695,
      "learning_rate": 9.923169679039028e-06,
      "loss": 0.0997,
      "step": 43320
    },
    {
      "epoch": 0.07092686056178525,
      "grad_norm": 1.7835267782211304,
      "learning_rate": 9.923103786825511e-06,
      "loss": 0.088,
      "step": 43340
    },
    {
      "epoch": 0.07095959100043858,
      "grad_norm": 2.430506944656372,
      "learning_rate": 9.923037894611995e-06,
      "loss": 0.0969,
      "step": 43360
    },
    {
      "epoch": 0.07099232143909193,
      "grad_norm": 2.0234293937683105,
      "learning_rate": 9.922972002398477e-06,
      "loss": 0.1124,
      "step": 43380
    },
    {
      "epoch": 0.07102505187774527,
      "grad_norm": 1.8981094360351562,
      "learning_rate": 9.92290611018496e-06,
      "loss": 0.0995,
      "step": 43400
    },
    {
      "epoch": 0.0710577823163986,
      "grad_norm": 6.8535966873168945,
      "learning_rate": 9.922840217971444e-06,
      "loss": 0.0809,
      "step": 43420
    },
    {
      "epoch": 0.07109051275505195,
      "grad_norm": 4.509099960327148,
      "learning_rate": 9.922774325757926e-06,
      "loss": 0.0957,
      "step": 43440
    },
    {
      "epoch": 0.07112324319370528,
      "grad_norm": 4.8258161544799805,
      "learning_rate": 9.92270843354441e-06,
      "loss": 0.1185,
      "step": 43460
    },
    {
      "epoch": 0.07115597363235862,
      "grad_norm": 4.524782657623291,
      "learning_rate": 9.922642541330891e-06,
      "loss": 0.0993,
      "step": 43480
    },
    {
      "epoch": 0.07118870407101197,
      "grad_norm": 2.44435977935791,
      "learning_rate": 9.922576649117375e-06,
      "loss": 0.105,
      "step": 43500
    },
    {
      "epoch": 0.0712214345096653,
      "grad_norm": 1.9835139513015747,
      "learning_rate": 9.922510756903857e-06,
      "loss": 0.0957,
      "step": 43520
    },
    {
      "epoch": 0.07125416494831864,
      "grad_norm": 3.5386433601379395,
      "learning_rate": 9.92244486469034e-06,
      "loss": 0.1191,
      "step": 43540
    },
    {
      "epoch": 0.07128689538697197,
      "grad_norm": 3.803469181060791,
      "learning_rate": 9.922378972476822e-06,
      "loss": 0.1164,
      "step": 43560
    },
    {
      "epoch": 0.07131962582562532,
      "grad_norm": 10.703171730041504,
      "learning_rate": 9.922313080263306e-06,
      "loss": 0.0921,
      "step": 43580
    },
    {
      "epoch": 0.07135235626427866,
      "grad_norm": 7.602084159851074,
      "learning_rate": 9.92224718804979e-06,
      "loss": 0.1137,
      "step": 43600
    },
    {
      "epoch": 0.07138508670293199,
      "grad_norm": 1.3139125108718872,
      "learning_rate": 9.922181295836271e-06,
      "loss": 0.0876,
      "step": 43620
    },
    {
      "epoch": 0.07141781714158534,
      "grad_norm": 5.189133644104004,
      "learning_rate": 9.922115403622755e-06,
      "loss": 0.1003,
      "step": 43640
    },
    {
      "epoch": 0.07145054758023867,
      "grad_norm": 22.8918399810791,
      "learning_rate": 9.922049511409237e-06,
      "loss": 0.086,
      "step": 43660
    },
    {
      "epoch": 0.07148327801889201,
      "grad_norm": 2.530036211013794,
      "learning_rate": 9.92198361919572e-06,
      "loss": 0.1117,
      "step": 43680
    },
    {
      "epoch": 0.07151600845754535,
      "grad_norm": 3.8260531425476074,
      "learning_rate": 9.921917726982202e-06,
      "loss": 0.0909,
      "step": 43700
    },
    {
      "epoch": 0.07154873889619869,
      "grad_norm": 1.0904920101165771,
      "learning_rate": 9.921851834768686e-06,
      "loss": 0.0788,
      "step": 43720
    },
    {
      "epoch": 0.07158146933485203,
      "grad_norm": 6.3440399169921875,
      "learning_rate": 9.92178594255517e-06,
      "loss": 0.0972,
      "step": 43740
    },
    {
      "epoch": 0.07161419977350536,
      "grad_norm": 4.119105339050293,
      "learning_rate": 9.921720050341651e-06,
      "loss": 0.0987,
      "step": 43760
    },
    {
      "epoch": 0.0716469302121587,
      "grad_norm": 3.8013813495635986,
      "learning_rate": 9.921654158128135e-06,
      "loss": 0.0768,
      "step": 43780
    },
    {
      "epoch": 0.07167966065081204,
      "grad_norm": 2.86078143119812,
      "learning_rate": 9.921588265914619e-06,
      "loss": 0.1054,
      "step": 43800
    },
    {
      "epoch": 0.07171239108946538,
      "grad_norm": 5.090022563934326,
      "learning_rate": 9.9215223737011e-06,
      "loss": 0.1,
      "step": 43820
    },
    {
      "epoch": 0.07174512152811872,
      "grad_norm": 5.547066688537598,
      "learning_rate": 9.921456481487584e-06,
      "loss": 0.1209,
      "step": 43840
    },
    {
      "epoch": 0.07177785196677205,
      "grad_norm": 10.2150297164917,
      "learning_rate": 9.921390589274066e-06,
      "loss": 0.0972,
      "step": 43860
    },
    {
      "epoch": 0.0718105824054254,
      "grad_norm": 3.3690547943115234,
      "learning_rate": 9.92132469706055e-06,
      "loss": 0.1029,
      "step": 43880
    },
    {
      "epoch": 0.07184331284407873,
      "grad_norm": 3.8577263355255127,
      "learning_rate": 9.921258804847032e-06,
      "loss": 0.1136,
      "step": 43900
    },
    {
      "epoch": 0.07187604328273207,
      "grad_norm": 5.205117225646973,
      "learning_rate": 9.921192912633515e-06,
      "loss": 0.0816,
      "step": 43920
    },
    {
      "epoch": 0.07190877372138542,
      "grad_norm": 2.6369991302490234,
      "learning_rate": 9.921127020419999e-06,
      "loss": 0.0982,
      "step": 43940
    },
    {
      "epoch": 0.07194150416003875,
      "grad_norm": 6.0550150871276855,
      "learning_rate": 9.92106112820648e-06,
      "loss": 0.0993,
      "step": 43960
    },
    {
      "epoch": 0.0719742345986921,
      "grad_norm": 3.8442883491516113,
      "learning_rate": 9.920995235992964e-06,
      "loss": 0.0841,
      "step": 43980
    },
    {
      "epoch": 0.07200696503734542,
      "grad_norm": 6.248653888702393,
      "learning_rate": 9.920929343779446e-06,
      "loss": 0.0871,
      "step": 44000
    },
    {
      "epoch": 0.07203969547599877,
      "grad_norm": 7.7700605392456055,
      "learning_rate": 9.92086345156593e-06,
      "loss": 0.0885,
      "step": 44020
    },
    {
      "epoch": 0.07207242591465211,
      "grad_norm": 4.645401954650879,
      "learning_rate": 9.920797559352412e-06,
      "loss": 0.0749,
      "step": 44040
    },
    {
      "epoch": 0.07210515635330544,
      "grad_norm": 4.896751403808594,
      "learning_rate": 9.920731667138895e-06,
      "loss": 0.0814,
      "step": 44060
    },
    {
      "epoch": 0.07213788679195879,
      "grad_norm": 8.720717430114746,
      "learning_rate": 9.920665774925377e-06,
      "loss": 0.0917,
      "step": 44080
    },
    {
      "epoch": 0.07217061723061212,
      "grad_norm": 3.147691011428833,
      "learning_rate": 9.92059988271186e-06,
      "loss": 0.0868,
      "step": 44100
    },
    {
      "epoch": 0.07220334766926546,
      "grad_norm": 1.5458080768585205,
      "learning_rate": 9.920533990498342e-06,
      "loss": 0.0805,
      "step": 44120
    },
    {
      "epoch": 0.07223607810791881,
      "grad_norm": 1.0464046001434326,
      "learning_rate": 9.920468098284826e-06,
      "loss": 0.0964,
      "step": 44140
    },
    {
      "epoch": 0.07226880854657214,
      "grad_norm": 5.150318622589111,
      "learning_rate": 9.92040220607131e-06,
      "loss": 0.0931,
      "step": 44160
    },
    {
      "epoch": 0.07230153898522548,
      "grad_norm": 2.4471352100372314,
      "learning_rate": 9.920336313857792e-06,
      "loss": 0.0872,
      "step": 44180
    },
    {
      "epoch": 0.07233426942387881,
      "grad_norm": 3.8119654655456543,
      "learning_rate": 9.920270421644275e-06,
      "loss": 0.1041,
      "step": 44200
    },
    {
      "epoch": 0.07236699986253216,
      "grad_norm": 18.19025421142578,
      "learning_rate": 9.920204529430759e-06,
      "loss": 0.0951,
      "step": 44220
    },
    {
      "epoch": 0.0723997303011855,
      "grad_norm": 5.680845260620117,
      "learning_rate": 9.92013863721724e-06,
      "loss": 0.11,
      "step": 44240
    },
    {
      "epoch": 0.07243246073983883,
      "grad_norm": 1.2628449201583862,
      "learning_rate": 9.920072745003724e-06,
      "loss": 0.0909,
      "step": 44260
    },
    {
      "epoch": 0.07246519117849218,
      "grad_norm": 6.966485977172852,
      "learning_rate": 9.920006852790208e-06,
      "loss": 0.094,
      "step": 44280
    },
    {
      "epoch": 0.07249792161714551,
      "grad_norm": 2.2960331439971924,
      "learning_rate": 9.91994096057669e-06,
      "loss": 0.0844,
      "step": 44300
    },
    {
      "epoch": 0.07253065205579885,
      "grad_norm": 13.007739067077637,
      "learning_rate": 9.919875068363173e-06,
      "loss": 0.0893,
      "step": 44320
    },
    {
      "epoch": 0.0725633824944522,
      "grad_norm": 6.354584693908691,
      "learning_rate": 9.919809176149655e-06,
      "loss": 0.0857,
      "step": 44340
    },
    {
      "epoch": 0.07259611293310553,
      "grad_norm": 9.226109504699707,
      "learning_rate": 9.919743283936139e-06,
      "loss": 0.0999,
      "step": 44360
    },
    {
      "epoch": 0.07262884337175887,
      "grad_norm": 7.629047870635986,
      "learning_rate": 9.91967739172262e-06,
      "loss": 0.1112,
      "step": 44380
    },
    {
      "epoch": 0.0726615738104122,
      "grad_norm": 9.602371215820312,
      "learning_rate": 9.919611499509104e-06,
      "loss": 0.098,
      "step": 44400
    },
    {
      "epoch": 0.07269430424906555,
      "grad_norm": 3.208932399749756,
      "learning_rate": 9.919545607295586e-06,
      "loss": 0.1112,
      "step": 44420
    },
    {
      "epoch": 0.07272703468771889,
      "grad_norm": 2.850125789642334,
      "learning_rate": 9.91947971508207e-06,
      "loss": 0.1003,
      "step": 44440
    },
    {
      "epoch": 0.07275976512637222,
      "grad_norm": 7.8070478439331055,
      "learning_rate": 9.919413822868552e-06,
      "loss": 0.1092,
      "step": 44460
    },
    {
      "epoch": 0.07279249556502557,
      "grad_norm": 5.112564563751221,
      "learning_rate": 9.919347930655035e-06,
      "loss": 0.0811,
      "step": 44480
    },
    {
      "epoch": 0.0728252260036789,
      "grad_norm": 3.6784331798553467,
      "learning_rate": 9.919282038441517e-06,
      "loss": 0.0966,
      "step": 44500
    },
    {
      "epoch": 0.07285795644233224,
      "grad_norm": 3.1580214500427246,
      "learning_rate": 9.919216146228e-06,
      "loss": 0.1126,
      "step": 44520
    },
    {
      "epoch": 0.07289068688098557,
      "grad_norm": 5.060790538787842,
      "learning_rate": 9.919150254014484e-06,
      "loss": 0.0829,
      "step": 44540
    },
    {
      "epoch": 0.07292341731963892,
      "grad_norm": 6.910121440887451,
      "learning_rate": 9.919084361800966e-06,
      "loss": 0.0969,
      "step": 44560
    },
    {
      "epoch": 0.07295614775829226,
      "grad_norm": 27.800167083740234,
      "learning_rate": 9.91901846958745e-06,
      "loss": 0.095,
      "step": 44580
    },
    {
      "epoch": 0.07298887819694559,
      "grad_norm": 2.154010772705078,
      "learning_rate": 9.918952577373933e-06,
      "loss": 0.0796,
      "step": 44600
    },
    {
      "epoch": 0.07302160863559894,
      "grad_norm": 10.29953384399414,
      "learning_rate": 9.918886685160415e-06,
      "loss": 0.1081,
      "step": 44620
    },
    {
      "epoch": 0.07305433907425227,
      "grad_norm": 3.4732260704040527,
      "learning_rate": 9.918820792946899e-06,
      "loss": 0.0972,
      "step": 44640
    },
    {
      "epoch": 0.07308706951290561,
      "grad_norm": 3.908747911453247,
      "learning_rate": 9.918754900733382e-06,
      "loss": 0.1029,
      "step": 44660
    },
    {
      "epoch": 0.07311979995155896,
      "grad_norm": 3.8036341667175293,
      "learning_rate": 9.918689008519864e-06,
      "loss": 0.0907,
      "step": 44680
    },
    {
      "epoch": 0.07315253039021229,
      "grad_norm": 4.918139934539795,
      "learning_rate": 9.918623116306348e-06,
      "loss": 0.0875,
      "step": 44700
    },
    {
      "epoch": 0.07318526082886563,
      "grad_norm": 9.099723815917969,
      "learning_rate": 9.91855722409283e-06,
      "loss": 0.0929,
      "step": 44720
    },
    {
      "epoch": 0.07321799126751896,
      "grad_norm": 2.0625805854797363,
      "learning_rate": 9.918491331879313e-06,
      "loss": 0.0958,
      "step": 44740
    },
    {
      "epoch": 0.0732507217061723,
      "grad_norm": 4.0366387367248535,
      "learning_rate": 9.918425439665795e-06,
      "loss": 0.0894,
      "step": 44760
    },
    {
      "epoch": 0.07328345214482565,
      "grad_norm": 3.513423442840576,
      "learning_rate": 9.918359547452279e-06,
      "loss": 0.0879,
      "step": 44780
    },
    {
      "epoch": 0.07331618258347898,
      "grad_norm": 2.805238723754883,
      "learning_rate": 9.91829365523876e-06,
      "loss": 0.0884,
      "step": 44800
    },
    {
      "epoch": 0.07334891302213233,
      "grad_norm": 4.657384872436523,
      "learning_rate": 9.918227763025244e-06,
      "loss": 0.1047,
      "step": 44820
    },
    {
      "epoch": 0.07338164346078566,
      "grad_norm": 2.2284204959869385,
      "learning_rate": 9.918161870811726e-06,
      "loss": 0.0772,
      "step": 44840
    },
    {
      "epoch": 0.073414373899439,
      "grad_norm": 1.0206568241119385,
      "learning_rate": 9.91809597859821e-06,
      "loss": 0.0805,
      "step": 44860
    },
    {
      "epoch": 0.07344710433809234,
      "grad_norm": 3.2143936157226562,
      "learning_rate": 9.918030086384692e-06,
      "loss": 0.1066,
      "step": 44880
    },
    {
      "epoch": 0.07347983477674568,
      "grad_norm": 3.486156940460205,
      "learning_rate": 9.917964194171175e-06,
      "loss": 0.1049,
      "step": 44900
    },
    {
      "epoch": 0.07351256521539902,
      "grad_norm": 98.19830322265625,
      "learning_rate": 9.917898301957657e-06,
      "loss": 0.1064,
      "step": 44920
    },
    {
      "epoch": 0.07354529565405235,
      "grad_norm": 4.012186050415039,
      "learning_rate": 9.91783240974414e-06,
      "loss": 0.0978,
      "step": 44940
    },
    {
      "epoch": 0.0735780260927057,
      "grad_norm": 2.7770566940307617,
      "learning_rate": 9.917766517530624e-06,
      "loss": 0.1039,
      "step": 44960
    },
    {
      "epoch": 0.07361075653135904,
      "grad_norm": 3.935842275619507,
      "learning_rate": 9.917700625317108e-06,
      "loss": 0.115,
      "step": 44980
    },
    {
      "epoch": 0.07364348697001237,
      "grad_norm": 6.090681552886963,
      "learning_rate": 9.91763473310359e-06,
      "loss": 0.113,
      "step": 45000
    },
    {
      "epoch": 0.07367621740866571,
      "grad_norm": 5.045783519744873,
      "learning_rate": 9.917568840890073e-06,
      "loss": 0.1223,
      "step": 45020
    },
    {
      "epoch": 0.07370894784731905,
      "grad_norm": 6.755669116973877,
      "learning_rate": 9.917502948676557e-06,
      "loss": 0.1196,
      "step": 45040
    },
    {
      "epoch": 0.07374167828597239,
      "grad_norm": 3.700324535369873,
      "learning_rate": 9.917437056463039e-06,
      "loss": 0.0897,
      "step": 45060
    },
    {
      "epoch": 0.07377440872462573,
      "grad_norm": 4.975447177886963,
      "learning_rate": 9.917371164249522e-06,
      "loss": 0.1027,
      "step": 45080
    },
    {
      "epoch": 0.07380713916327906,
      "grad_norm": 12.253152847290039,
      "learning_rate": 9.917305272036004e-06,
      "loss": 0.1224,
      "step": 45100
    },
    {
      "epoch": 0.07383986960193241,
      "grad_norm": 1.8790168762207031,
      "learning_rate": 9.917239379822488e-06,
      "loss": 0.0845,
      "step": 45120
    },
    {
      "epoch": 0.07387260004058574,
      "grad_norm": 5.814539432525635,
      "learning_rate": 9.91717348760897e-06,
      "loss": 0.0957,
      "step": 45140
    },
    {
      "epoch": 0.07390533047923908,
      "grad_norm": 5.351069450378418,
      "learning_rate": 9.917107595395453e-06,
      "loss": 0.1078,
      "step": 45160
    },
    {
      "epoch": 0.07393806091789241,
      "grad_norm": 16.26407241821289,
      "learning_rate": 9.917041703181935e-06,
      "loss": 0.0971,
      "step": 45180
    },
    {
      "epoch": 0.07397079135654576,
      "grad_norm": 3.0318803787231445,
      "learning_rate": 9.916975810968419e-06,
      "loss": 0.1026,
      "step": 45200
    },
    {
      "epoch": 0.0740035217951991,
      "grad_norm": 4.112021446228027,
      "learning_rate": 9.9169099187549e-06,
      "loss": 0.0867,
      "step": 45220
    },
    {
      "epoch": 0.07403625223385243,
      "grad_norm": 0.922856330871582,
      "learning_rate": 9.916844026541384e-06,
      "loss": 0.0952,
      "step": 45240
    },
    {
      "epoch": 0.07406898267250578,
      "grad_norm": 5.974740505218506,
      "learning_rate": 9.916778134327866e-06,
      "loss": 0.0988,
      "step": 45260
    },
    {
      "epoch": 0.07410171311115911,
      "grad_norm": 3.744122266769409,
      "learning_rate": 9.91671224211435e-06,
      "loss": 0.0909,
      "step": 45280
    },
    {
      "epoch": 0.07413444354981245,
      "grad_norm": 3.6980462074279785,
      "learning_rate": 9.916646349900832e-06,
      "loss": 0.1125,
      "step": 45300
    },
    {
      "epoch": 0.0741671739884658,
      "grad_norm": 5.475578308105469,
      "learning_rate": 9.916580457687315e-06,
      "loss": 0.1007,
      "step": 45320
    },
    {
      "epoch": 0.07419990442711913,
      "grad_norm": 2.9628398418426514,
      "learning_rate": 9.916514565473799e-06,
      "loss": 0.0661,
      "step": 45340
    },
    {
      "epoch": 0.07423263486577247,
      "grad_norm": 3.065316677093506,
      "learning_rate": 9.91644867326028e-06,
      "loss": 0.1074,
      "step": 45360
    },
    {
      "epoch": 0.0742653653044258,
      "grad_norm": 2.879176139831543,
      "learning_rate": 9.916382781046764e-06,
      "loss": 0.0865,
      "step": 45380
    },
    {
      "epoch": 0.07429809574307915,
      "grad_norm": 1.880800485610962,
      "learning_rate": 9.916316888833248e-06,
      "loss": 0.0817,
      "step": 45400
    },
    {
      "epoch": 0.07433082618173249,
      "grad_norm": 6.832296848297119,
      "learning_rate": 9.91625099661973e-06,
      "loss": 0.1102,
      "step": 45420
    },
    {
      "epoch": 0.07436355662038582,
      "grad_norm": 3.6560726165771484,
      "learning_rate": 9.916185104406213e-06,
      "loss": 0.0967,
      "step": 45440
    },
    {
      "epoch": 0.07439628705903917,
      "grad_norm": 3.8792316913604736,
      "learning_rate": 9.916119212192697e-06,
      "loss": 0.087,
      "step": 45460
    },
    {
      "epoch": 0.0744290174976925,
      "grad_norm": 2.6900553703308105,
      "learning_rate": 9.916053319979179e-06,
      "loss": 0.0959,
      "step": 45480
    },
    {
      "epoch": 0.07446174793634584,
      "grad_norm": 5.483438014984131,
      "learning_rate": 9.915987427765663e-06,
      "loss": 0.0809,
      "step": 45500
    },
    {
      "epoch": 0.07449447837499919,
      "grad_norm": 4.528907775878906,
      "learning_rate": 9.915921535552144e-06,
      "loss": 0.1066,
      "step": 45520
    },
    {
      "epoch": 0.07452720881365252,
      "grad_norm": 3.36852765083313,
      "learning_rate": 9.915855643338628e-06,
      "loss": 0.0789,
      "step": 45540
    },
    {
      "epoch": 0.07455993925230586,
      "grad_norm": 5.374870777130127,
      "learning_rate": 9.91578975112511e-06,
      "loss": 0.1159,
      "step": 45560
    },
    {
      "epoch": 0.07459266969095919,
      "grad_norm": 4.134756565093994,
      "learning_rate": 9.915723858911594e-06,
      "loss": 0.107,
      "step": 45580
    },
    {
      "epoch": 0.07462540012961254,
      "grad_norm": 8.182598114013672,
      "learning_rate": 9.915657966698075e-06,
      "loss": 0.0862,
      "step": 45600
    },
    {
      "epoch": 0.07465813056826588,
      "grad_norm": 8.407455444335938,
      "learning_rate": 9.915592074484559e-06,
      "loss": 0.0894,
      "step": 45620
    },
    {
      "epoch": 0.07469086100691921,
      "grad_norm": 8.953672409057617,
      "learning_rate": 9.915526182271041e-06,
      "loss": 0.1004,
      "step": 45640
    },
    {
      "epoch": 0.07472359144557256,
      "grad_norm": 4.7282795906066895,
      "learning_rate": 9.915460290057524e-06,
      "loss": 0.1015,
      "step": 45660
    },
    {
      "epoch": 0.07475632188422589,
      "grad_norm": 4.887329578399658,
      "learning_rate": 9.915394397844006e-06,
      "loss": 0.1041,
      "step": 45680
    },
    {
      "epoch": 0.07478905232287923,
      "grad_norm": 2.5768964290618896,
      "learning_rate": 9.91532850563049e-06,
      "loss": 0.1139,
      "step": 45700
    },
    {
      "epoch": 0.07482178276153258,
      "grad_norm": 2.0710926055908203,
      "learning_rate": 9.915262613416974e-06,
      "loss": 0.1015,
      "step": 45720
    },
    {
      "epoch": 0.07485451320018591,
      "grad_norm": 7.002739906311035,
      "learning_rate": 9.915196721203455e-06,
      "loss": 0.089,
      "step": 45740
    },
    {
      "epoch": 0.07488724363883925,
      "grad_norm": 7.084076881408691,
      "learning_rate": 9.915130828989939e-06,
      "loss": 0.1088,
      "step": 45760
    },
    {
      "epoch": 0.07491997407749258,
      "grad_norm": 1.9618518352508545,
      "learning_rate": 9.915064936776423e-06,
      "loss": 0.0749,
      "step": 45780
    },
    {
      "epoch": 0.07495270451614593,
      "grad_norm": 4.811603546142578,
      "learning_rate": 9.914999044562904e-06,
      "loss": 0.096,
      "step": 45800
    },
    {
      "epoch": 0.07498543495479927,
      "grad_norm": 2.4516255855560303,
      "learning_rate": 9.914933152349388e-06,
      "loss": 0.0936,
      "step": 45820
    },
    {
      "epoch": 0.0750181653934526,
      "grad_norm": 13.114709854125977,
      "learning_rate": 9.914867260135872e-06,
      "loss": 0.0975,
      "step": 45840
    },
    {
      "epoch": 0.07505089583210595,
      "grad_norm": 3.4521255493164062,
      "learning_rate": 9.914801367922354e-06,
      "loss": 0.0976,
      "step": 45860
    },
    {
      "epoch": 0.07508362627075928,
      "grad_norm": 2.550152540206909,
      "learning_rate": 9.914735475708837e-06,
      "loss": 0.0968,
      "step": 45880
    },
    {
      "epoch": 0.07511635670941262,
      "grad_norm": 6.3373188972473145,
      "learning_rate": 9.914669583495319e-06,
      "loss": 0.1165,
      "step": 45900
    },
    {
      "epoch": 0.07514908714806595,
      "grad_norm": 1.2318439483642578,
      "learning_rate": 9.914603691281803e-06,
      "loss": 0.0896,
      "step": 45920
    },
    {
      "epoch": 0.0751818175867193,
      "grad_norm": 3.8954896926879883,
      "learning_rate": 9.914537799068285e-06,
      "loss": 0.091,
      "step": 45940
    },
    {
      "epoch": 0.07521454802537264,
      "grad_norm": 5.498123645782471,
      "learning_rate": 9.914471906854768e-06,
      "loss": 0.1051,
      "step": 45960
    },
    {
      "epoch": 0.07524727846402597,
      "grad_norm": 3.350128412246704,
      "learning_rate": 9.91440601464125e-06,
      "loss": 0.0994,
      "step": 45980
    },
    {
      "epoch": 0.07528000890267932,
      "grad_norm": 8.362604141235352,
      "learning_rate": 9.914340122427734e-06,
      "loss": 0.0879,
      "step": 46000
    },
    {
      "epoch": 0.07531273934133265,
      "grad_norm": 2.868926763534546,
      "learning_rate": 9.914274230214215e-06,
      "loss": 0.0917,
      "step": 46020
    },
    {
      "epoch": 0.07534546977998599,
      "grad_norm": 3.231926441192627,
      "learning_rate": 9.914208338000699e-06,
      "loss": 0.1109,
      "step": 46040
    },
    {
      "epoch": 0.07537820021863934,
      "grad_norm": 4.436244010925293,
      "learning_rate": 9.914142445787183e-06,
      "loss": 0.0971,
      "step": 46060
    },
    {
      "epoch": 0.07541093065729267,
      "grad_norm": 1.7850360870361328,
      "learning_rate": 9.914076553573665e-06,
      "loss": 0.0928,
      "step": 46080
    },
    {
      "epoch": 0.07544366109594601,
      "grad_norm": 6.3358612060546875,
      "learning_rate": 9.914010661360148e-06,
      "loss": 0.0921,
      "step": 46100
    },
    {
      "epoch": 0.07547639153459934,
      "grad_norm": 2.257627487182617,
      "learning_rate": 9.91394476914663e-06,
      "loss": 0.1051,
      "step": 46120
    },
    {
      "epoch": 0.07550912197325269,
      "grad_norm": 5.990715503692627,
      "learning_rate": 9.913878876933114e-06,
      "loss": 0.1146,
      "step": 46140
    },
    {
      "epoch": 0.07554185241190603,
      "grad_norm": 2.465430736541748,
      "learning_rate": 9.913812984719596e-06,
      "loss": 0.0763,
      "step": 46160
    },
    {
      "epoch": 0.07557458285055936,
      "grad_norm": 5.437900066375732,
      "learning_rate": 9.913747092506079e-06,
      "loss": 0.1025,
      "step": 46180
    },
    {
      "epoch": 0.0756073132892127,
      "grad_norm": 1.353947639465332,
      "learning_rate": 9.913681200292563e-06,
      "loss": 0.0979,
      "step": 46200
    },
    {
      "epoch": 0.07564004372786604,
      "grad_norm": 3.9555914402008057,
      "learning_rate": 9.913615308079045e-06,
      "loss": 0.0889,
      "step": 46220
    },
    {
      "epoch": 0.07567277416651938,
      "grad_norm": 3.603308916091919,
      "learning_rate": 9.913549415865528e-06,
      "loss": 0.0949,
      "step": 46240
    },
    {
      "epoch": 0.07570550460517272,
      "grad_norm": 2.9203574657440186,
      "learning_rate": 9.913483523652012e-06,
      "loss": 0.0959,
      "step": 46260
    },
    {
      "epoch": 0.07573823504382605,
      "grad_norm": 2.5496582984924316,
      "learning_rate": 9.913417631438494e-06,
      "loss": 0.0976,
      "step": 46280
    },
    {
      "epoch": 0.0757709654824794,
      "grad_norm": 3.644348621368408,
      "learning_rate": 9.913351739224977e-06,
      "loss": 0.1038,
      "step": 46300
    },
    {
      "epoch": 0.07580369592113273,
      "grad_norm": 1.7275166511535645,
      "learning_rate": 9.913285847011459e-06,
      "loss": 0.0932,
      "step": 46320
    },
    {
      "epoch": 0.07583642635978607,
      "grad_norm": 18.449392318725586,
      "learning_rate": 9.913219954797943e-06,
      "loss": 0.0938,
      "step": 46340
    },
    {
      "epoch": 0.07586915679843942,
      "grad_norm": 3.052330732345581,
      "learning_rate": 9.913154062584425e-06,
      "loss": 0.0701,
      "step": 46360
    },
    {
      "epoch": 0.07590188723709275,
      "grad_norm": 2.9026994705200195,
      "learning_rate": 9.913088170370908e-06,
      "loss": 0.0816,
      "step": 46380
    },
    {
      "epoch": 0.0759346176757461,
      "grad_norm": 2.1233971118927,
      "learning_rate": 9.913022278157392e-06,
      "loss": 0.1013,
      "step": 46400
    },
    {
      "epoch": 0.07596734811439942,
      "grad_norm": 4.180610656738281,
      "learning_rate": 9.912956385943874e-06,
      "loss": 0.102,
      "step": 46420
    },
    {
      "epoch": 0.07600007855305277,
      "grad_norm": 3.1217267513275146,
      "learning_rate": 9.912890493730357e-06,
      "loss": 0.0961,
      "step": 46440
    },
    {
      "epoch": 0.07603280899170611,
      "grad_norm": 3.541592597961426,
      "learning_rate": 9.912824601516839e-06,
      "loss": 0.0819,
      "step": 46460
    },
    {
      "epoch": 0.07606553943035944,
      "grad_norm": 4.955533981323242,
      "learning_rate": 9.912758709303323e-06,
      "loss": 0.1002,
      "step": 46480
    },
    {
      "epoch": 0.07609826986901279,
      "grad_norm": 5.904676914215088,
      "learning_rate": 9.912692817089805e-06,
      "loss": 0.0847,
      "step": 46500
    },
    {
      "epoch": 0.07613100030766612,
      "grad_norm": 2.844338893890381,
      "learning_rate": 9.912626924876288e-06,
      "loss": 0.0809,
      "step": 46520
    },
    {
      "epoch": 0.07616373074631946,
      "grad_norm": 21.365461349487305,
      "learning_rate": 9.91256103266277e-06,
      "loss": 0.1059,
      "step": 46540
    },
    {
      "epoch": 0.0761964611849728,
      "grad_norm": 20.585338592529297,
      "learning_rate": 9.912495140449254e-06,
      "loss": 0.0984,
      "step": 46560
    },
    {
      "epoch": 0.07622919162362614,
      "grad_norm": 7.640870571136475,
      "learning_rate": 9.912429248235737e-06,
      "loss": 0.0987,
      "step": 46580
    },
    {
      "epoch": 0.07626192206227948,
      "grad_norm": 4.805365085601807,
      "learning_rate": 9.91236335602222e-06,
      "loss": 0.1232,
      "step": 46600
    },
    {
      "epoch": 0.07629465250093281,
      "grad_norm": 3.8189969062805176,
      "learning_rate": 9.912297463808703e-06,
      "loss": 0.0986,
      "step": 46620
    },
    {
      "epoch": 0.07632738293958616,
      "grad_norm": 4.019855976104736,
      "learning_rate": 9.912231571595186e-06,
      "loss": 0.0857,
      "step": 46640
    },
    {
      "epoch": 0.07636011337823949,
      "grad_norm": 4.421505928039551,
      "learning_rate": 9.912165679381668e-06,
      "loss": 0.1149,
      "step": 46660
    },
    {
      "epoch": 0.07639284381689283,
      "grad_norm": 2.5158419609069824,
      "learning_rate": 9.912099787168152e-06,
      "loss": 0.1072,
      "step": 46680
    },
    {
      "epoch": 0.07642557425554618,
      "grad_norm": 2.495978593826294,
      "learning_rate": 9.912033894954634e-06,
      "loss": 0.1047,
      "step": 46700
    },
    {
      "epoch": 0.07645830469419951,
      "grad_norm": 5.102301120758057,
      "learning_rate": 9.911968002741117e-06,
      "loss": 0.084,
      "step": 46720
    },
    {
      "epoch": 0.07649103513285285,
      "grad_norm": 2.409679889678955,
      "learning_rate": 9.911902110527601e-06,
      "loss": 0.0757,
      "step": 46740
    },
    {
      "epoch": 0.07652376557150618,
      "grad_norm": 6.238305568695068,
      "learning_rate": 9.911836218314083e-06,
      "loss": 0.1095,
      "step": 46760
    },
    {
      "epoch": 0.07655649601015953,
      "grad_norm": 5.783995151519775,
      "learning_rate": 9.911770326100566e-06,
      "loss": 0.0954,
      "step": 46780
    },
    {
      "epoch": 0.07658922644881287,
      "grad_norm": 3.620130777359009,
      "learning_rate": 9.911704433887048e-06,
      "loss": 0.0886,
      "step": 46800
    },
    {
      "epoch": 0.0766219568874662,
      "grad_norm": 5.667612552642822,
      "learning_rate": 9.911638541673532e-06,
      "loss": 0.0886,
      "step": 46820
    },
    {
      "epoch": 0.07665468732611955,
      "grad_norm": 3.779024362564087,
      "learning_rate": 9.911572649460014e-06,
      "loss": 0.0939,
      "step": 46840
    },
    {
      "epoch": 0.07668741776477288,
      "grad_norm": 2.188551187515259,
      "learning_rate": 9.911506757246497e-06,
      "loss": 0.0767,
      "step": 46860
    },
    {
      "epoch": 0.07672014820342622,
      "grad_norm": 3.497584581375122,
      "learning_rate": 9.91144086503298e-06,
      "loss": 0.0892,
      "step": 46880
    },
    {
      "epoch": 0.07675287864207957,
      "grad_norm": 3.751699924468994,
      "learning_rate": 9.911374972819463e-06,
      "loss": 0.1019,
      "step": 46900
    },
    {
      "epoch": 0.0767856090807329,
      "grad_norm": 11.062211990356445,
      "learning_rate": 9.911309080605945e-06,
      "loss": 0.0857,
      "step": 46920
    },
    {
      "epoch": 0.07681833951938624,
      "grad_norm": 3.8723042011260986,
      "learning_rate": 9.911243188392428e-06,
      "loss": 0.0917,
      "step": 46940
    },
    {
      "epoch": 0.07685106995803957,
      "grad_norm": 2.289156675338745,
      "learning_rate": 9.91117729617891e-06,
      "loss": 0.0966,
      "step": 46960
    },
    {
      "epoch": 0.07688380039669292,
      "grad_norm": 4.320603370666504,
      "learning_rate": 9.911111403965394e-06,
      "loss": 0.0966,
      "step": 46980
    },
    {
      "epoch": 0.07691653083534626,
      "grad_norm": 4.305558681488037,
      "learning_rate": 9.911045511751877e-06,
      "loss": 0.0903,
      "step": 47000
    },
    {
      "epoch": 0.07694926127399959,
      "grad_norm": 4.631832122802734,
      "learning_rate": 9.91097961953836e-06,
      "loss": 0.0899,
      "step": 47020
    },
    {
      "epoch": 0.07698199171265294,
      "grad_norm": 3.184211254119873,
      "learning_rate": 9.910913727324843e-06,
      "loss": 0.0904,
      "step": 47040
    },
    {
      "epoch": 0.07701472215130627,
      "grad_norm": 3.8167455196380615,
      "learning_rate": 9.910847835111326e-06,
      "loss": 0.1154,
      "step": 47060
    },
    {
      "epoch": 0.07704745258995961,
      "grad_norm": 1.5586626529693604,
      "learning_rate": 9.910781942897808e-06,
      "loss": 0.1117,
      "step": 47080
    },
    {
      "epoch": 0.07708018302861296,
      "grad_norm": 3.9539315700531006,
      "learning_rate": 9.910716050684292e-06,
      "loss": 0.0927,
      "step": 47100
    },
    {
      "epoch": 0.07711291346726629,
      "grad_norm": 3.931910276412964,
      "learning_rate": 9.910650158470775e-06,
      "loss": 0.1093,
      "step": 47120
    },
    {
      "epoch": 0.07714564390591963,
      "grad_norm": 5.066051006317139,
      "learning_rate": 9.910584266257257e-06,
      "loss": 0.1126,
      "step": 47140
    },
    {
      "epoch": 0.07717837434457296,
      "grad_norm": 5.242550373077393,
      "learning_rate": 9.910518374043741e-06,
      "loss": 0.1179,
      "step": 47160
    },
    {
      "epoch": 0.0772111047832263,
      "grad_norm": 4.0361104011535645,
      "learning_rate": 9.910452481830223e-06,
      "loss": 0.0759,
      "step": 47180
    },
    {
      "epoch": 0.07724383522187965,
      "grad_norm": 5.172632217407227,
      "learning_rate": 9.910386589616706e-06,
      "loss": 0.0907,
      "step": 47200
    },
    {
      "epoch": 0.07727656566053298,
      "grad_norm": 16.961206436157227,
      "learning_rate": 9.910320697403188e-06,
      "loss": 0.0903,
      "step": 47220
    },
    {
      "epoch": 0.07730929609918633,
      "grad_norm": 3.347491979598999,
      "learning_rate": 9.910254805189672e-06,
      "loss": 0.1102,
      "step": 47240
    },
    {
      "epoch": 0.07734202653783966,
      "grad_norm": 8.824296951293945,
      "learning_rate": 9.910188912976154e-06,
      "loss": 0.0893,
      "step": 47260
    },
    {
      "epoch": 0.077374756976493,
      "grad_norm": 3.3046000003814697,
      "learning_rate": 9.910123020762637e-06,
      "loss": 0.1007,
      "step": 47280
    },
    {
      "epoch": 0.07740748741514633,
      "grad_norm": 3.428417921066284,
      "learning_rate": 9.91005712854912e-06,
      "loss": 0.0887,
      "step": 47300
    },
    {
      "epoch": 0.07744021785379968,
      "grad_norm": 1.2963300943374634,
      "learning_rate": 9.909991236335603e-06,
      "loss": 0.0972,
      "step": 47320
    },
    {
      "epoch": 0.07747294829245302,
      "grad_norm": 4.7085113525390625,
      "learning_rate": 9.909925344122085e-06,
      "loss": 0.0684,
      "step": 47340
    },
    {
      "epoch": 0.07750567873110635,
      "grad_norm": 3.158837080001831,
      "learning_rate": 9.909859451908568e-06,
      "loss": 0.0951,
      "step": 47360
    },
    {
      "epoch": 0.0775384091697597,
      "grad_norm": 0.8033812642097473,
      "learning_rate": 9.909793559695052e-06,
      "loss": 0.0916,
      "step": 47380
    },
    {
      "epoch": 0.07757113960841303,
      "grad_norm": 4.073744297027588,
      "learning_rate": 9.909727667481534e-06,
      "loss": 0.0877,
      "step": 47400
    },
    {
      "epoch": 0.07760387004706637,
      "grad_norm": 7.36950159072876,
      "learning_rate": 9.909661775268017e-06,
      "loss": 0.1003,
      "step": 47420
    },
    {
      "epoch": 0.07763660048571971,
      "grad_norm": 9.038409233093262,
      "learning_rate": 9.909595883054501e-06,
      "loss": 0.0862,
      "step": 47440
    },
    {
      "epoch": 0.07766933092437305,
      "grad_norm": 8.43056583404541,
      "learning_rate": 9.909529990840983e-06,
      "loss": 0.0871,
      "step": 47460
    },
    {
      "epoch": 0.07770206136302639,
      "grad_norm": 3.4461679458618164,
      "learning_rate": 9.909464098627466e-06,
      "loss": 0.1025,
      "step": 47480
    },
    {
      "epoch": 0.07773479180167972,
      "grad_norm": 9.353697776794434,
      "learning_rate": 9.90939820641395e-06,
      "loss": 0.0923,
      "step": 47500
    },
    {
      "epoch": 0.07776752224033306,
      "grad_norm": 7.783238887786865,
      "learning_rate": 9.909332314200432e-06,
      "loss": 0.1024,
      "step": 47520
    },
    {
      "epoch": 0.07780025267898641,
      "grad_norm": 2.795301914215088,
      "learning_rate": 9.909266421986916e-06,
      "loss": 0.0985,
      "step": 47540
    },
    {
      "epoch": 0.07783298311763974,
      "grad_norm": 3.106816053390503,
      "learning_rate": 9.909200529773397e-06,
      "loss": 0.0943,
      "step": 47560
    },
    {
      "epoch": 0.07786571355629308,
      "grad_norm": 4.8880157470703125,
      "learning_rate": 9.909134637559881e-06,
      "loss": 0.0827,
      "step": 47580
    },
    {
      "epoch": 0.07789844399494641,
      "grad_norm": 1.243208885192871,
      "learning_rate": 9.909068745346363e-06,
      "loss": 0.101,
      "step": 47600
    },
    {
      "epoch": 0.07793117443359976,
      "grad_norm": 3.1299147605895996,
      "learning_rate": 9.909002853132847e-06,
      "loss": 0.1097,
      "step": 47620
    },
    {
      "epoch": 0.0779639048722531,
      "grad_norm": 3.4794743061065674,
      "learning_rate": 9.908936960919328e-06,
      "loss": 0.0957,
      "step": 47640
    },
    {
      "epoch": 0.07799663531090643,
      "grad_norm": 3.4014856815338135,
      "learning_rate": 9.908871068705812e-06,
      "loss": 0.0939,
      "step": 47660
    },
    {
      "epoch": 0.07802936574955978,
      "grad_norm": 4.4729533195495605,
      "learning_rate": 9.908805176492294e-06,
      "loss": 0.0835,
      "step": 47680
    },
    {
      "epoch": 0.07806209618821311,
      "grad_norm": 3.5278983116149902,
      "learning_rate": 9.908739284278777e-06,
      "loss": 0.1143,
      "step": 47700
    },
    {
      "epoch": 0.07809482662686645,
      "grad_norm": 0.8469237685203552,
      "learning_rate": 9.90867339206526e-06,
      "loss": 0.1127,
      "step": 47720
    },
    {
      "epoch": 0.0781275570655198,
      "grad_norm": 1.916814923286438,
      "learning_rate": 9.908607499851743e-06,
      "loss": 0.0765,
      "step": 47740
    },
    {
      "epoch": 0.07816028750417313,
      "grad_norm": 5.181746959686279,
      "learning_rate": 9.908541607638227e-06,
      "loss": 0.114,
      "step": 47760
    },
    {
      "epoch": 0.07819301794282647,
      "grad_norm": 2.1037399768829346,
      "learning_rate": 9.908475715424708e-06,
      "loss": 0.0973,
      "step": 47780
    },
    {
      "epoch": 0.0782257483814798,
      "grad_norm": 7.549102306365967,
      "learning_rate": 9.908409823211192e-06,
      "loss": 0.0837,
      "step": 47800
    },
    {
      "epoch": 0.07825847882013315,
      "grad_norm": 4.314159393310547,
      "learning_rate": 9.908343930997676e-06,
      "loss": 0.1102,
      "step": 47820
    },
    {
      "epoch": 0.07829120925878649,
      "grad_norm": 5.377231121063232,
      "learning_rate": 9.908278038784158e-06,
      "loss": 0.0933,
      "step": 47840
    },
    {
      "epoch": 0.07832393969743982,
      "grad_norm": 3.551072359085083,
      "learning_rate": 9.908212146570641e-06,
      "loss": 0.0897,
      "step": 47860
    },
    {
      "epoch": 0.07835667013609317,
      "grad_norm": 8.090377807617188,
      "learning_rate": 9.908146254357125e-06,
      "loss": 0.0836,
      "step": 47880
    },
    {
      "epoch": 0.0783894005747465,
      "grad_norm": 4.24093770980835,
      "learning_rate": 9.908080362143607e-06,
      "loss": 0.0801,
      "step": 47900
    },
    {
      "epoch": 0.07842213101339984,
      "grad_norm": 6.402015686035156,
      "learning_rate": 9.90801446993009e-06,
      "loss": 0.091,
      "step": 47920
    },
    {
      "epoch": 0.07845486145205319,
      "grad_norm": 3.775745153427124,
      "learning_rate": 9.907948577716572e-06,
      "loss": 0.0847,
      "step": 47940
    },
    {
      "epoch": 0.07848759189070652,
      "grad_norm": 5.479029655456543,
      "learning_rate": 9.907882685503056e-06,
      "loss": 0.1019,
      "step": 47960
    },
    {
      "epoch": 0.07852032232935986,
      "grad_norm": 8.198347091674805,
      "learning_rate": 9.907816793289538e-06,
      "loss": 0.098,
      "step": 47980
    },
    {
      "epoch": 0.07855305276801319,
      "grad_norm": 4.622074127197266,
      "learning_rate": 9.907750901076021e-06,
      "loss": 0.0867,
      "step": 48000
    },
    {
      "epoch": 0.07858578320666654,
      "grad_norm": 6.617368698120117,
      "learning_rate": 9.907685008862503e-06,
      "loss": 0.072,
      "step": 48020
    },
    {
      "epoch": 0.07861851364531987,
      "grad_norm": 4.238549709320068,
      "learning_rate": 9.907619116648987e-06,
      "loss": 0.108,
      "step": 48040
    },
    {
      "epoch": 0.07865124408397321,
      "grad_norm": 4.423222541809082,
      "learning_rate": 9.907553224435468e-06,
      "loss": 0.1155,
      "step": 48060
    },
    {
      "epoch": 0.07868397452262656,
      "grad_norm": 4.31801700592041,
      "learning_rate": 9.907487332221952e-06,
      "loss": 0.0907,
      "step": 48080
    },
    {
      "epoch": 0.07871670496127989,
      "grad_norm": 5.844754695892334,
      "learning_rate": 9.907421440008434e-06,
      "loss": 0.0902,
      "step": 48100
    },
    {
      "epoch": 0.07874943539993323,
      "grad_norm": 4.4203996658325195,
      "learning_rate": 9.907355547794918e-06,
      "loss": 0.1053,
      "step": 48120
    },
    {
      "epoch": 0.07878216583858656,
      "grad_norm": 11.029620170593262,
      "learning_rate": 9.9072896555814e-06,
      "loss": 0.0848,
      "step": 48140
    },
    {
      "epoch": 0.07881489627723991,
      "grad_norm": 6.664504528045654,
      "learning_rate": 9.907223763367883e-06,
      "loss": 0.1106,
      "step": 48160
    },
    {
      "epoch": 0.07884762671589325,
      "grad_norm": 4.46523904800415,
      "learning_rate": 9.907157871154367e-06,
      "loss": 0.0957,
      "step": 48180
    },
    {
      "epoch": 0.07888035715454658,
      "grad_norm": 3.2946200370788574,
      "learning_rate": 9.907091978940849e-06,
      "loss": 0.0734,
      "step": 48200
    },
    {
      "epoch": 0.07891308759319993,
      "grad_norm": 3.02502703666687,
      "learning_rate": 9.907026086727332e-06,
      "loss": 0.0949,
      "step": 48220
    },
    {
      "epoch": 0.07894581803185326,
      "grad_norm": 3.1458468437194824,
      "learning_rate": 9.906960194513816e-06,
      "loss": 0.0728,
      "step": 48240
    },
    {
      "epoch": 0.0789785484705066,
      "grad_norm": 4.94318151473999,
      "learning_rate": 9.906894302300298e-06,
      "loss": 0.0951,
      "step": 48260
    },
    {
      "epoch": 0.07901127890915995,
      "grad_norm": 5.2997918128967285,
      "learning_rate": 9.906828410086781e-06,
      "loss": 0.0857,
      "step": 48280
    },
    {
      "epoch": 0.07904400934781328,
      "grad_norm": 4.144311428070068,
      "learning_rate": 9.906762517873265e-06,
      "loss": 0.1103,
      "step": 48300
    },
    {
      "epoch": 0.07907673978646662,
      "grad_norm": 5.011114597320557,
      "learning_rate": 9.906696625659747e-06,
      "loss": 0.0875,
      "step": 48320
    },
    {
      "epoch": 0.07910947022511995,
      "grad_norm": 7.14378547668457,
      "learning_rate": 9.90663073344623e-06,
      "loss": 0.107,
      "step": 48340
    },
    {
      "epoch": 0.0791422006637733,
      "grad_norm": 2.462073564529419,
      "learning_rate": 9.906564841232712e-06,
      "loss": 0.1141,
      "step": 48360
    },
    {
      "epoch": 0.07917493110242664,
      "grad_norm": 3.3208749294281006,
      "learning_rate": 9.906498949019196e-06,
      "loss": 0.0989,
      "step": 48380
    },
    {
      "epoch": 0.07920766154107997,
      "grad_norm": 7.46345329284668,
      "learning_rate": 9.906433056805678e-06,
      "loss": 0.0973,
      "step": 48400
    },
    {
      "epoch": 0.07924039197973332,
      "grad_norm": 6.528219223022461,
      "learning_rate": 9.906367164592161e-06,
      "loss": 0.0722,
      "step": 48420
    },
    {
      "epoch": 0.07927312241838665,
      "grad_norm": 6.918247222900391,
      "learning_rate": 9.906301272378643e-06,
      "loss": 0.0733,
      "step": 48440
    },
    {
      "epoch": 0.07930585285703999,
      "grad_norm": 5.763236999511719,
      "learning_rate": 9.906235380165127e-06,
      "loss": 0.0945,
      "step": 48460
    },
    {
      "epoch": 0.07933858329569334,
      "grad_norm": 3.151909112930298,
      "learning_rate": 9.906169487951609e-06,
      "loss": 0.0723,
      "step": 48480
    },
    {
      "epoch": 0.07937131373434667,
      "grad_norm": 4.758991718292236,
      "learning_rate": 9.906103595738092e-06,
      "loss": 0.0762,
      "step": 48500
    },
    {
      "epoch": 0.07940404417300001,
      "grad_norm": 2.316204786300659,
      "learning_rate": 9.906037703524576e-06,
      "loss": 0.1128,
      "step": 48520
    },
    {
      "epoch": 0.07943677461165334,
      "grad_norm": 6.085975646972656,
      "learning_rate": 9.905971811311058e-06,
      "loss": 0.095,
      "step": 48540
    },
    {
      "epoch": 0.07946950505030669,
      "grad_norm": 2.8956313133239746,
      "learning_rate": 9.905905919097541e-06,
      "loss": 0.0819,
      "step": 48560
    },
    {
      "epoch": 0.07950223548896003,
      "grad_norm": 3.455780267715454,
      "learning_rate": 9.905840026884023e-06,
      "loss": 0.0794,
      "step": 48580
    },
    {
      "epoch": 0.07953496592761336,
      "grad_norm": 2.1236345767974854,
      "learning_rate": 9.905774134670507e-06,
      "loss": 0.0897,
      "step": 48600
    },
    {
      "epoch": 0.0795676963662667,
      "grad_norm": 5.5522308349609375,
      "learning_rate": 9.90570824245699e-06,
      "loss": 0.1076,
      "step": 48620
    },
    {
      "epoch": 0.07960042680492004,
      "grad_norm": 3.546278476715088,
      "learning_rate": 9.905642350243472e-06,
      "loss": 0.084,
      "step": 48640
    },
    {
      "epoch": 0.07963315724357338,
      "grad_norm": 5.3231048583984375,
      "learning_rate": 9.905576458029956e-06,
      "loss": 0.0965,
      "step": 48660
    },
    {
      "epoch": 0.07966588768222671,
      "grad_norm": 3.002722978591919,
      "learning_rate": 9.90551056581644e-06,
      "loss": 0.0806,
      "step": 48680
    },
    {
      "epoch": 0.07969861812088005,
      "grad_norm": 5.666306495666504,
      "learning_rate": 9.905444673602921e-06,
      "loss": 0.0839,
      "step": 48700
    },
    {
      "epoch": 0.0797313485595334,
      "grad_norm": 5.842062473297119,
      "learning_rate": 9.905378781389405e-06,
      "loss": 0.0825,
      "step": 48720
    },
    {
      "epoch": 0.07976407899818673,
      "grad_norm": 6.117522239685059,
      "learning_rate": 9.905312889175887e-06,
      "loss": 0.0869,
      "step": 48740
    },
    {
      "epoch": 0.07979680943684007,
      "grad_norm": 5.295429706573486,
      "learning_rate": 9.90524699696237e-06,
      "loss": 0.0989,
      "step": 48760
    },
    {
      "epoch": 0.0798295398754934,
      "grad_norm": 3.4862184524536133,
      "learning_rate": 9.905181104748852e-06,
      "loss": 0.088,
      "step": 48780
    },
    {
      "epoch": 0.07986227031414675,
      "grad_norm": 3.8075671195983887,
      "learning_rate": 9.905115212535336e-06,
      "loss": 0.1042,
      "step": 48800
    },
    {
      "epoch": 0.0798950007528001,
      "grad_norm": 4.212061882019043,
      "learning_rate": 9.905049320321818e-06,
      "loss": 0.0748,
      "step": 48820
    },
    {
      "epoch": 0.07992773119145342,
      "grad_norm": 129.67308044433594,
      "learning_rate": 9.904983428108301e-06,
      "loss": 0.1121,
      "step": 48840
    },
    {
      "epoch": 0.07996046163010677,
      "grad_norm": 4.276890277862549,
      "learning_rate": 9.904917535894785e-06,
      "loss": 0.096,
      "step": 48860
    },
    {
      "epoch": 0.0799931920687601,
      "grad_norm": 4.169255256652832,
      "learning_rate": 9.904851643681267e-06,
      "loss": 0.0736,
      "step": 48880
    },
    {
      "epoch": 0.08002592250741344,
      "grad_norm": 2.7993762493133545,
      "learning_rate": 9.90478575146775e-06,
      "loss": 0.0864,
      "step": 48900
    },
    {
      "epoch": 0.08005865294606679,
      "grad_norm": 3.049684762954712,
      "learning_rate": 9.904719859254232e-06,
      "loss": 0.0909,
      "step": 48920
    },
    {
      "epoch": 0.08009138338472012,
      "grad_norm": 1.735497236251831,
      "learning_rate": 9.904653967040716e-06,
      "loss": 0.0967,
      "step": 48940
    },
    {
      "epoch": 0.08012411382337346,
      "grad_norm": 4.103519439697266,
      "learning_rate": 9.904588074827198e-06,
      "loss": 0.1053,
      "step": 48960
    },
    {
      "epoch": 0.0801568442620268,
      "grad_norm": 2.0424270629882812,
      "learning_rate": 9.904522182613681e-06,
      "loss": 0.0743,
      "step": 48980
    },
    {
      "epoch": 0.08018957470068014,
      "grad_norm": 3.3530402183532715,
      "learning_rate": 9.904456290400163e-06,
      "loss": 0.0733,
      "step": 49000
    },
    {
      "epoch": 0.08022230513933348,
      "grad_norm": 2.0171847343444824,
      "learning_rate": 9.904390398186647e-06,
      "loss": 0.0934,
      "step": 49020
    },
    {
      "epoch": 0.08025503557798681,
      "grad_norm": 3.3863186836242676,
      "learning_rate": 9.90432450597313e-06,
      "loss": 0.0965,
      "step": 49040
    },
    {
      "epoch": 0.08028776601664016,
      "grad_norm": 5.485724449157715,
      "learning_rate": 9.904258613759612e-06,
      "loss": 0.0951,
      "step": 49060
    },
    {
      "epoch": 0.08032049645529349,
      "grad_norm": 4.912657737731934,
      "learning_rate": 9.904192721546096e-06,
      "loss": 0.0991,
      "step": 49080
    },
    {
      "epoch": 0.08035322689394683,
      "grad_norm": 3.8076565265655518,
      "learning_rate": 9.90412682933258e-06,
      "loss": 0.0751,
      "step": 49100
    },
    {
      "epoch": 0.08038595733260018,
      "grad_norm": 3.218050003051758,
      "learning_rate": 9.904060937119061e-06,
      "loss": 0.0821,
      "step": 49120
    },
    {
      "epoch": 0.08041868777125351,
      "grad_norm": 8.74461555480957,
      "learning_rate": 9.903995044905545e-06,
      "loss": 0.0877,
      "step": 49140
    },
    {
      "epoch": 0.08045141820990685,
      "grad_norm": 5.667856216430664,
      "learning_rate": 9.903929152692027e-06,
      "loss": 0.0874,
      "step": 49160
    },
    {
      "epoch": 0.08048414864856018,
      "grad_norm": 3.9749724864959717,
      "learning_rate": 9.90386326047851e-06,
      "loss": 0.0946,
      "step": 49180
    },
    {
      "epoch": 0.08051687908721353,
      "grad_norm": 1.130185842514038,
      "learning_rate": 9.903797368264992e-06,
      "loss": 0.0931,
      "step": 49200
    },
    {
      "epoch": 0.08054960952586687,
      "grad_norm": 1.9589077234268188,
      "learning_rate": 9.903731476051476e-06,
      "loss": 0.1151,
      "step": 49220
    },
    {
      "epoch": 0.0805823399645202,
      "grad_norm": 4.825930595397949,
      "learning_rate": 9.90366558383796e-06,
      "loss": 0.1151,
      "step": 49240
    },
    {
      "epoch": 0.08061507040317355,
      "grad_norm": 4.762904644012451,
      "learning_rate": 9.903599691624441e-06,
      "loss": 0.1052,
      "step": 49260
    },
    {
      "epoch": 0.08064780084182688,
      "grad_norm": 4.884278297424316,
      "learning_rate": 9.903533799410925e-06,
      "loss": 0.1048,
      "step": 49280
    },
    {
      "epoch": 0.08068053128048022,
      "grad_norm": 2.492018461227417,
      "learning_rate": 9.903467907197407e-06,
      "loss": 0.0782,
      "step": 49300
    },
    {
      "epoch": 0.08071326171913357,
      "grad_norm": 11.271273612976074,
      "learning_rate": 9.90340201498389e-06,
      "loss": 0.0842,
      "step": 49320
    },
    {
      "epoch": 0.0807459921577869,
      "grad_norm": 14.022377014160156,
      "learning_rate": 9.903336122770372e-06,
      "loss": 0.0756,
      "step": 49340
    },
    {
      "epoch": 0.08077872259644024,
      "grad_norm": 9.321903228759766,
      "learning_rate": 9.903270230556856e-06,
      "loss": 0.103,
      "step": 49360
    },
    {
      "epoch": 0.08081145303509357,
      "grad_norm": 2.5679662227630615,
      "learning_rate": 9.903204338343338e-06,
      "loss": 0.0897,
      "step": 49380
    },
    {
      "epoch": 0.08084418347374692,
      "grad_norm": 3.2114667892456055,
      "learning_rate": 9.903138446129821e-06,
      "loss": 0.0872,
      "step": 49400
    },
    {
      "epoch": 0.08087691391240025,
      "grad_norm": 4.991625785827637,
      "learning_rate": 9.903072553916305e-06,
      "loss": 0.0957,
      "step": 49420
    },
    {
      "epoch": 0.08090964435105359,
      "grad_norm": 4.991522312164307,
      "learning_rate": 9.903006661702787e-06,
      "loss": 0.0897,
      "step": 49440
    },
    {
      "epoch": 0.08094237478970694,
      "grad_norm": 4.712497234344482,
      "learning_rate": 9.90294076948927e-06,
      "loss": 0.0876,
      "step": 49460
    },
    {
      "epoch": 0.08097510522836027,
      "grad_norm": 6.2698516845703125,
      "learning_rate": 9.902874877275754e-06,
      "loss": 0.0945,
      "step": 49480
    },
    {
      "epoch": 0.08100783566701361,
      "grad_norm": 3.5020854473114014,
      "learning_rate": 9.902808985062236e-06,
      "loss": 0.0943,
      "step": 49500
    },
    {
      "epoch": 0.08104056610566694,
      "grad_norm": 3.7942006587982178,
      "learning_rate": 9.90274309284872e-06,
      "loss": 0.0976,
      "step": 49520
    },
    {
      "epoch": 0.08107329654432029,
      "grad_norm": 3.733919143676758,
      "learning_rate": 9.902677200635201e-06,
      "loss": 0.0901,
      "step": 49540
    },
    {
      "epoch": 0.08110602698297363,
      "grad_norm": 5.551589012145996,
      "learning_rate": 9.902611308421685e-06,
      "loss": 0.0848,
      "step": 49560
    },
    {
      "epoch": 0.08113875742162696,
      "grad_norm": 2.359370231628418,
      "learning_rate": 9.902545416208169e-06,
      "loss": 0.1013,
      "step": 49580
    },
    {
      "epoch": 0.0811714878602803,
      "grad_norm": 6.118266582489014,
      "learning_rate": 9.90247952399465e-06,
      "loss": 0.0979,
      "step": 49600
    },
    {
      "epoch": 0.08120421829893364,
      "grad_norm": 5.578919410705566,
      "learning_rate": 9.902413631781134e-06,
      "loss": 0.1167,
      "step": 49620
    },
    {
      "epoch": 0.08123694873758698,
      "grad_norm": 3.0660560131073,
      "learning_rate": 9.902347739567616e-06,
      "loss": 0.0954,
      "step": 49640
    },
    {
      "epoch": 0.08126967917624033,
      "grad_norm": 8.051896095275879,
      "learning_rate": 9.9022818473541e-06,
      "loss": 0.0975,
      "step": 49660
    },
    {
      "epoch": 0.08130240961489366,
      "grad_norm": 1.5131617784500122,
      "learning_rate": 9.902215955140581e-06,
      "loss": 0.0867,
      "step": 49680
    },
    {
      "epoch": 0.081335140053547,
      "grad_norm": 5.260891437530518,
      "learning_rate": 9.902150062927065e-06,
      "loss": 0.0993,
      "step": 49700
    },
    {
      "epoch": 0.08136787049220033,
      "grad_norm": 3.773533821105957,
      "learning_rate": 9.902084170713547e-06,
      "loss": 0.0918,
      "step": 49720
    },
    {
      "epoch": 0.08140060093085368,
      "grad_norm": 2.8646175861358643,
      "learning_rate": 9.90201827850003e-06,
      "loss": 0.0923,
      "step": 49740
    },
    {
      "epoch": 0.08143333136950702,
      "grad_norm": 1.3364558219909668,
      "learning_rate": 9.901952386286512e-06,
      "loss": 0.0787,
      "step": 49760
    },
    {
      "epoch": 0.08146606180816035,
      "grad_norm": 2.514254331588745,
      "learning_rate": 9.901886494072996e-06,
      "loss": 0.0787,
      "step": 49780
    },
    {
      "epoch": 0.0814987922468137,
      "grad_norm": 4.674496173858643,
      "learning_rate": 9.901820601859478e-06,
      "loss": 0.1027,
      "step": 49800
    },
    {
      "epoch": 0.08153152268546703,
      "grad_norm": 2.4905238151550293,
      "learning_rate": 9.901754709645961e-06,
      "loss": 0.0925,
      "step": 49820
    },
    {
      "epoch": 0.08156425312412037,
      "grad_norm": 3.682882308959961,
      "learning_rate": 9.901688817432445e-06,
      "loss": 0.0819,
      "step": 49840
    },
    {
      "epoch": 0.08159698356277371,
      "grad_norm": 3.5395259857177734,
      "learning_rate": 9.901622925218927e-06,
      "loss": 0.0663,
      "step": 49860
    },
    {
      "epoch": 0.08162971400142705,
      "grad_norm": 8.159235954284668,
      "learning_rate": 9.90155703300541e-06,
      "loss": 0.085,
      "step": 49880
    },
    {
      "epoch": 0.08166244444008039,
      "grad_norm": 5.1666412353515625,
      "learning_rate": 9.901491140791894e-06,
      "loss": 0.1004,
      "step": 49900
    },
    {
      "epoch": 0.08169517487873372,
      "grad_norm": 11.587820053100586,
      "learning_rate": 9.901425248578376e-06,
      "loss": 0.0933,
      "step": 49920
    },
    {
      "epoch": 0.08172790531738706,
      "grad_norm": 3.778249979019165,
      "learning_rate": 9.90135935636486e-06,
      "loss": 0.072,
      "step": 49940
    },
    {
      "epoch": 0.08176063575604041,
      "grad_norm": 3.524719715118408,
      "learning_rate": 9.901293464151343e-06,
      "loss": 0.091,
      "step": 49960
    },
    {
      "epoch": 0.08179336619469374,
      "grad_norm": 3.117504835128784,
      "learning_rate": 9.901227571937825e-06,
      "loss": 0.0776,
      "step": 49980
    },
    {
      "epoch": 0.08182609663334708,
      "grad_norm": 12.41788387298584,
      "learning_rate": 9.901161679724309e-06,
      "loss": 0.0988,
      "step": 50000
    },
    {
      "epoch": 0.08182609663334708,
      "eval_loss": 0.045443981885910034,
      "eval_runtime": 6509.9708,
      "eval_samples_per_second": 157.89,
      "eval_steps_per_second": 15.789,
      "eval_sts-dev_pearson_cosine": 0.8855070869522049,
      "eval_sts-dev_spearman_cosine": 0.833517065116008,
      "step": 50000
    },
    {
      "epoch": 0.08185882707200041,
      "grad_norm": 3.9534780979156494,
      "learning_rate": 9.90109578751079e-06,
      "loss": 0.0848,
      "step": 50020
    },
    {
      "epoch": 0.08189155751065376,
      "grad_norm": 7.4297637939453125,
      "learning_rate": 9.901029895297274e-06,
      "loss": 0.0825,
      "step": 50040
    },
    {
      "epoch": 0.08192428794930709,
      "grad_norm": 1.3082503080368042,
      "learning_rate": 9.900964003083756e-06,
      "loss": 0.0751,
      "step": 50060
    },
    {
      "epoch": 0.08195701838796043,
      "grad_norm": 3.790311574935913,
      "learning_rate": 9.90089811087024e-06,
      "loss": 0.0853,
      "step": 50080
    },
    {
      "epoch": 0.08198974882661378,
      "grad_norm": 4.0918755531311035,
      "learning_rate": 9.900832218656721e-06,
      "loss": 0.0762,
      "step": 50100
    },
    {
      "epoch": 0.08202247926526711,
      "grad_norm": 2.624202013015747,
      "learning_rate": 9.900766326443205e-06,
      "loss": 0.0884,
      "step": 50120
    },
    {
      "epoch": 0.08205520970392045,
      "grad_norm": 2.2377734184265137,
      "learning_rate": 9.900700434229687e-06,
      "loss": 0.0908,
      "step": 50140
    },
    {
      "epoch": 0.08208794014257378,
      "grad_norm": 1.988304853439331,
      "learning_rate": 9.90063454201617e-06,
      "loss": 0.091,
      "step": 50160
    },
    {
      "epoch": 0.08212067058122713,
      "grad_norm": 4.598141193389893,
      "learning_rate": 9.900568649802652e-06,
      "loss": 0.0841,
      "step": 50180
    },
    {
      "epoch": 0.08215340101988047,
      "grad_norm": 3.075181007385254,
      "learning_rate": 9.900502757589136e-06,
      "loss": 0.0675,
      "step": 50200
    },
    {
      "epoch": 0.0821861314585338,
      "grad_norm": 4.624607086181641,
      "learning_rate": 9.90043686537562e-06,
      "loss": 0.098,
      "step": 50220
    },
    {
      "epoch": 0.08221886189718715,
      "grad_norm": 6.733607769012451,
      "learning_rate": 9.900370973162102e-06,
      "loss": 0.0856,
      "step": 50240
    },
    {
      "epoch": 0.08225159233584048,
      "grad_norm": 4.319208145141602,
      "learning_rate": 9.900305080948585e-06,
      "loss": 0.0992,
      "step": 50260
    },
    {
      "epoch": 0.08228432277449382,
      "grad_norm": 3.002060651779175,
      "learning_rate": 9.900239188735069e-06,
      "loss": 0.0833,
      "step": 50280
    },
    {
      "epoch": 0.08231705321314717,
      "grad_norm": 2.2809276580810547,
      "learning_rate": 9.90017329652155e-06,
      "loss": 0.1045,
      "step": 50300
    },
    {
      "epoch": 0.0823497836518005,
      "grad_norm": 1.6510180234909058,
      "learning_rate": 9.900107404308034e-06,
      "loss": 0.0693,
      "step": 50320
    },
    {
      "epoch": 0.08238251409045384,
      "grad_norm": 5.958058834075928,
      "learning_rate": 9.900041512094518e-06,
      "loss": 0.1093,
      "step": 50340
    },
    {
      "epoch": 0.08241524452910717,
      "grad_norm": 3.591071128845215,
      "learning_rate": 9.899975619881e-06,
      "loss": 0.0958,
      "step": 50360
    },
    {
      "epoch": 0.08244797496776052,
      "grad_norm": 3.233686923980713,
      "learning_rate": 9.899909727667483e-06,
      "loss": 0.1077,
      "step": 50380
    },
    {
      "epoch": 0.08248070540641386,
      "grad_norm": 12.392102241516113,
      "learning_rate": 9.899843835453965e-06,
      "loss": 0.0983,
      "step": 50400
    },
    {
      "epoch": 0.08251343584506719,
      "grad_norm": 4.016182899475098,
      "learning_rate": 9.899777943240449e-06,
      "loss": 0.0815,
      "step": 50420
    },
    {
      "epoch": 0.08254616628372054,
      "grad_norm": 7.2156081199646,
      "learning_rate": 9.89971205102693e-06,
      "loss": 0.104,
      "step": 50440
    },
    {
      "epoch": 0.08257889672237387,
      "grad_norm": 2.288923740386963,
      "learning_rate": 9.899646158813414e-06,
      "loss": 0.0942,
      "step": 50460
    },
    {
      "epoch": 0.08261162716102721,
      "grad_norm": 1.9998916387557983,
      "learning_rate": 9.899580266599896e-06,
      "loss": 0.0848,
      "step": 50480
    },
    {
      "epoch": 0.08264435759968056,
      "grad_norm": 3.3641581535339355,
      "learning_rate": 9.89951437438638e-06,
      "loss": 0.0886,
      "step": 50500
    },
    {
      "epoch": 0.08267708803833389,
      "grad_norm": 5.372104167938232,
      "learning_rate": 9.899448482172862e-06,
      "loss": 0.0906,
      "step": 50520
    },
    {
      "epoch": 0.08270981847698723,
      "grad_norm": 4.003678321838379,
      "learning_rate": 9.899382589959345e-06,
      "loss": 0.1073,
      "step": 50540
    },
    {
      "epoch": 0.08274254891564056,
      "grad_norm": 2.980616807937622,
      "learning_rate": 9.899316697745827e-06,
      "loss": 0.0869,
      "step": 50560
    },
    {
      "epoch": 0.0827752793542939,
      "grad_norm": 5.586452960968018,
      "learning_rate": 9.89925080553231e-06,
      "loss": 0.0974,
      "step": 50580
    },
    {
      "epoch": 0.08280800979294725,
      "grad_norm": 3.6595990657806396,
      "learning_rate": 9.899184913318794e-06,
      "loss": 0.1011,
      "step": 50600
    },
    {
      "epoch": 0.08284074023160058,
      "grad_norm": 3.4286720752716064,
      "learning_rate": 9.899119021105276e-06,
      "loss": 0.0851,
      "step": 50620
    },
    {
      "epoch": 0.08287347067025393,
      "grad_norm": 7.145613670349121,
      "learning_rate": 9.89905312889176e-06,
      "loss": 0.09,
      "step": 50640
    },
    {
      "epoch": 0.08290620110890726,
      "grad_norm": 3.5174038410186768,
      "learning_rate": 9.898987236678243e-06,
      "loss": 0.0804,
      "step": 50660
    },
    {
      "epoch": 0.0829389315475606,
      "grad_norm": 6.3174004554748535,
      "learning_rate": 9.898921344464725e-06,
      "loss": 0.1041,
      "step": 50680
    },
    {
      "epoch": 0.08297166198621395,
      "grad_norm": 12.555102348327637,
      "learning_rate": 9.898855452251209e-06,
      "loss": 0.0827,
      "step": 50700
    },
    {
      "epoch": 0.08300439242486728,
      "grad_norm": 3.6760921478271484,
      "learning_rate": 9.898789560037692e-06,
      "loss": 0.0964,
      "step": 50720
    },
    {
      "epoch": 0.08303712286352062,
      "grad_norm": 6.410063743591309,
      "learning_rate": 9.898723667824174e-06,
      "loss": 0.0782,
      "step": 50740
    },
    {
      "epoch": 0.08306985330217395,
      "grad_norm": 4.192316055297852,
      "learning_rate": 9.898657775610658e-06,
      "loss": 0.1081,
      "step": 50760
    },
    {
      "epoch": 0.0831025837408273,
      "grad_norm": 4.0829668045043945,
      "learning_rate": 9.89859188339714e-06,
      "loss": 0.0916,
      "step": 50780
    },
    {
      "epoch": 0.08313531417948063,
      "grad_norm": 11.78480052947998,
      "learning_rate": 9.898525991183623e-06,
      "loss": 0.0708,
      "step": 50800
    },
    {
      "epoch": 0.08316804461813397,
      "grad_norm": 2.8347747325897217,
      "learning_rate": 9.898460098970105e-06,
      "loss": 0.0976,
      "step": 50820
    },
    {
      "epoch": 0.08320077505678732,
      "grad_norm": 3.608736991882324,
      "learning_rate": 9.898394206756589e-06,
      "loss": 0.0869,
      "step": 50840
    },
    {
      "epoch": 0.08323350549544065,
      "grad_norm": 5.504910945892334,
      "learning_rate": 9.89832831454307e-06,
      "loss": 0.099,
      "step": 50860
    },
    {
      "epoch": 0.08326623593409399,
      "grad_norm": 6.089844226837158,
      "learning_rate": 9.898262422329554e-06,
      "loss": 0.0916,
      "step": 50880
    },
    {
      "epoch": 0.08329896637274732,
      "grad_norm": 4.49663782119751,
      "learning_rate": 9.898196530116036e-06,
      "loss": 0.0756,
      "step": 50900
    },
    {
      "epoch": 0.08333169681140067,
      "grad_norm": 20.741676330566406,
      "learning_rate": 9.89813063790252e-06,
      "loss": 0.0794,
      "step": 50920
    },
    {
      "epoch": 0.08336442725005401,
      "grad_norm": 24.107345581054688,
      "learning_rate": 9.898064745689002e-06,
      "loss": 0.0958,
      "step": 50940
    },
    {
      "epoch": 0.08339715768870734,
      "grad_norm": 3.5574848651885986,
      "learning_rate": 9.897998853475485e-06,
      "loss": 0.1237,
      "step": 50960
    },
    {
      "epoch": 0.08342988812736069,
      "grad_norm": 4.120969295501709,
      "learning_rate": 9.897932961261969e-06,
      "loss": 0.1151,
      "step": 50980
    },
    {
      "epoch": 0.08346261856601402,
      "grad_norm": 6.071258544921875,
      "learning_rate": 9.89786706904845e-06,
      "loss": 0.0956,
      "step": 51000
    },
    {
      "epoch": 0.08349534900466736,
      "grad_norm": 13.932547569274902,
      "learning_rate": 9.897801176834934e-06,
      "loss": 0.0993,
      "step": 51020
    },
    {
      "epoch": 0.0835280794433207,
      "grad_norm": 3.379134178161621,
      "learning_rate": 9.897735284621416e-06,
      "loss": 0.1049,
      "step": 51040
    },
    {
      "epoch": 0.08356080988197404,
      "grad_norm": 3.683166742324829,
      "learning_rate": 9.8976693924079e-06,
      "loss": 0.0788,
      "step": 51060
    },
    {
      "epoch": 0.08359354032062738,
      "grad_norm": 3.4552884101867676,
      "learning_rate": 9.897603500194383e-06,
      "loss": 0.0865,
      "step": 51080
    },
    {
      "epoch": 0.08362627075928071,
      "grad_norm": 6.178581237792969,
      "learning_rate": 9.897537607980865e-06,
      "loss": 0.0838,
      "step": 51100
    },
    {
      "epoch": 0.08365900119793405,
      "grad_norm": 4.209511756896973,
      "learning_rate": 9.897471715767349e-06,
      "loss": 0.0945,
      "step": 51120
    },
    {
      "epoch": 0.0836917316365874,
      "grad_norm": 5.076707363128662,
      "learning_rate": 9.897405823553832e-06,
      "loss": 0.0962,
      "step": 51140
    },
    {
      "epoch": 0.08372446207524073,
      "grad_norm": 3.417132616043091,
      "learning_rate": 9.897339931340314e-06,
      "loss": 0.1026,
      "step": 51160
    },
    {
      "epoch": 0.08375719251389407,
      "grad_norm": 5.063194751739502,
      "learning_rate": 9.897274039126798e-06,
      "loss": 0.0704,
      "step": 51180
    },
    {
      "epoch": 0.0837899229525474,
      "grad_norm": 3.9693803787231445,
      "learning_rate": 9.89720814691328e-06,
      "loss": 0.0992,
      "step": 51200
    },
    {
      "epoch": 0.08382265339120075,
      "grad_norm": 5.291397571563721,
      "learning_rate": 9.897142254699763e-06,
      "loss": 0.0853,
      "step": 51220
    },
    {
      "epoch": 0.0838553838298541,
      "grad_norm": 2.7976136207580566,
      "learning_rate": 9.897076362486245e-06,
      "loss": 0.0847,
      "step": 51240
    },
    {
      "epoch": 0.08388811426850742,
      "grad_norm": 2.2224199771881104,
      "learning_rate": 9.897010470272729e-06,
      "loss": 0.0892,
      "step": 51260
    },
    {
      "epoch": 0.08392084470716077,
      "grad_norm": 4.592893600463867,
      "learning_rate": 9.89694457805921e-06,
      "loss": 0.0714,
      "step": 51280
    },
    {
      "epoch": 0.0839535751458141,
      "grad_norm": 3.6599295139312744,
      "learning_rate": 9.896878685845694e-06,
      "loss": 0.0982,
      "step": 51300
    },
    {
      "epoch": 0.08398630558446744,
      "grad_norm": 4.830970764160156,
      "learning_rate": 9.896812793632178e-06,
      "loss": 0.0904,
      "step": 51320
    },
    {
      "epoch": 0.08401903602312079,
      "grad_norm": 4.0165300369262695,
      "learning_rate": 9.89674690141866e-06,
      "loss": 0.098,
      "step": 51340
    },
    {
      "epoch": 0.08405176646177412,
      "grad_norm": 4.915994644165039,
      "learning_rate": 9.896681009205143e-06,
      "loss": 0.0916,
      "step": 51360
    },
    {
      "epoch": 0.08408449690042746,
      "grad_norm": 14.755982398986816,
      "learning_rate": 9.896615116991625e-06,
      "loss": 0.1056,
      "step": 51380
    },
    {
      "epoch": 0.0841172273390808,
      "grad_norm": 2.4987404346466064,
      "learning_rate": 9.896549224778109e-06,
      "loss": 0.1039,
      "step": 51400
    },
    {
      "epoch": 0.08414995777773414,
      "grad_norm": 1.7842776775360107,
      "learning_rate": 9.89648333256459e-06,
      "loss": 0.0919,
      "step": 51420
    },
    {
      "epoch": 0.08418268821638748,
      "grad_norm": 7.254619598388672,
      "learning_rate": 9.896417440351074e-06,
      "loss": 0.0777,
      "step": 51440
    },
    {
      "epoch": 0.08421541865504081,
      "grad_norm": 0.9492211937904358,
      "learning_rate": 9.896351548137558e-06,
      "loss": 0.0903,
      "step": 51460
    },
    {
      "epoch": 0.08424814909369416,
      "grad_norm": 2.993607521057129,
      "learning_rate": 9.89628565592404e-06,
      "loss": 0.0926,
      "step": 51480
    },
    {
      "epoch": 0.08428087953234749,
      "grad_norm": 2.1449906826019287,
      "learning_rate": 9.896219763710523e-06,
      "loss": 0.1037,
      "step": 51500
    },
    {
      "epoch": 0.08431360997100083,
      "grad_norm": 3.819734573364258,
      "learning_rate": 9.896153871497007e-06,
      "loss": 0.0897,
      "step": 51520
    },
    {
      "epoch": 0.08434634040965416,
      "grad_norm": 2.659390449523926,
      "learning_rate": 9.896087979283489e-06,
      "loss": 0.0762,
      "step": 51540
    },
    {
      "epoch": 0.08437907084830751,
      "grad_norm": 1.9857004880905151,
      "learning_rate": 9.896022087069973e-06,
      "loss": 0.0783,
      "step": 51560
    },
    {
      "epoch": 0.08441180128696085,
      "grad_norm": 3.6617298126220703,
      "learning_rate": 9.895956194856454e-06,
      "loss": 0.1014,
      "step": 51580
    },
    {
      "epoch": 0.08444453172561418,
      "grad_norm": 3.4375061988830566,
      "learning_rate": 9.895890302642938e-06,
      "loss": 0.0835,
      "step": 51600
    },
    {
      "epoch": 0.08447726216426753,
      "grad_norm": 4.856380939483643,
      "learning_rate": 9.89582441042942e-06,
      "loss": 0.0939,
      "step": 51620
    },
    {
      "epoch": 0.08450999260292086,
      "grad_norm": 2.509032726287842,
      "learning_rate": 9.895758518215903e-06,
      "loss": 0.0847,
      "step": 51640
    },
    {
      "epoch": 0.0845427230415742,
      "grad_norm": 6.173371315002441,
      "learning_rate": 9.895692626002385e-06,
      "loss": 0.0923,
      "step": 51660
    },
    {
      "epoch": 0.08457545348022755,
      "grad_norm": 5.510430812835693,
      "learning_rate": 9.895626733788869e-06,
      "loss": 0.0833,
      "step": 51680
    },
    {
      "epoch": 0.08460818391888088,
      "grad_norm": 4.253340721130371,
      "learning_rate": 9.895560841575353e-06,
      "loss": 0.0769,
      "step": 51700
    },
    {
      "epoch": 0.08464091435753422,
      "grad_norm": 4.802877426147461,
      "learning_rate": 9.895494949361834e-06,
      "loss": 0.0991,
      "step": 51720
    },
    {
      "epoch": 0.08467364479618755,
      "grad_norm": 4.592669486999512,
      "learning_rate": 9.895429057148318e-06,
      "loss": 0.0951,
      "step": 51740
    },
    {
      "epoch": 0.0847063752348409,
      "grad_norm": 3.625356674194336,
      "learning_rate": 9.8953631649348e-06,
      "loss": 0.0903,
      "step": 51760
    },
    {
      "epoch": 0.08473910567349424,
      "grad_norm": 1.2074264287948608,
      "learning_rate": 9.895297272721283e-06,
      "loss": 0.0822,
      "step": 51780
    },
    {
      "epoch": 0.08477183611214757,
      "grad_norm": 4.136786460876465,
      "learning_rate": 9.895231380507765e-06,
      "loss": 0.0938,
      "step": 51800
    },
    {
      "epoch": 0.08480456655080092,
      "grad_norm": 2.40320086479187,
      "learning_rate": 9.895165488294249e-06,
      "loss": 0.093,
      "step": 51820
    },
    {
      "epoch": 0.08483729698945425,
      "grad_norm": 4.567634105682373,
      "learning_rate": 9.895099596080731e-06,
      "loss": 0.1094,
      "step": 51840
    },
    {
      "epoch": 0.08487002742810759,
      "grad_norm": 3.928421974182129,
      "learning_rate": 9.895033703867214e-06,
      "loss": 0.1158,
      "step": 51860
    },
    {
      "epoch": 0.08490275786676094,
      "grad_norm": 3.1233069896698,
      "learning_rate": 9.894967811653698e-06,
      "loss": 0.0908,
      "step": 51880
    },
    {
      "epoch": 0.08493548830541427,
      "grad_norm": 2.0719330310821533,
      "learning_rate": 9.89490191944018e-06,
      "loss": 0.0915,
      "step": 51900
    },
    {
      "epoch": 0.08496821874406761,
      "grad_norm": 5.514596939086914,
      "learning_rate": 9.894836027226664e-06,
      "loss": 0.0804,
      "step": 51920
    },
    {
      "epoch": 0.08500094918272094,
      "grad_norm": 3.9116830825805664,
      "learning_rate": 9.894770135013147e-06,
      "loss": 0.0923,
      "step": 51940
    },
    {
      "epoch": 0.08503367962137429,
      "grad_norm": 4.740574836730957,
      "learning_rate": 9.894704242799629e-06,
      "loss": 0.1008,
      "step": 51960
    },
    {
      "epoch": 0.08506641006002763,
      "grad_norm": 10.651110649108887,
      "learning_rate": 9.894638350586113e-06,
      "loss": 0.0682,
      "step": 51980
    },
    {
      "epoch": 0.08509914049868096,
      "grad_norm": 4.214074611663818,
      "learning_rate": 9.894572458372594e-06,
      "loss": 0.1141,
      "step": 52000
    },
    {
      "epoch": 0.0851318709373343,
      "grad_norm": 2.9177374839782715,
      "learning_rate": 9.894506566159078e-06,
      "loss": 0.0702,
      "step": 52020
    },
    {
      "epoch": 0.08516460137598764,
      "grad_norm": 3.150233745574951,
      "learning_rate": 9.894440673945562e-06,
      "loss": 0.1073,
      "step": 52040
    },
    {
      "epoch": 0.08519733181464098,
      "grad_norm": 2.270505666732788,
      "learning_rate": 9.894374781732044e-06,
      "loss": 0.0888,
      "step": 52060
    },
    {
      "epoch": 0.08523006225329433,
      "grad_norm": 3.2473790645599365,
      "learning_rate": 9.894308889518527e-06,
      "loss": 0.0949,
      "step": 52080
    },
    {
      "epoch": 0.08526279269194766,
      "grad_norm": 3.1785106658935547,
      "learning_rate": 9.894242997305009e-06,
      "loss": 0.0845,
      "step": 52100
    },
    {
      "epoch": 0.085295523130601,
      "grad_norm": 3.9038703441619873,
      "learning_rate": 9.894177105091493e-06,
      "loss": 0.0959,
      "step": 52120
    },
    {
      "epoch": 0.08532825356925433,
      "grad_norm": 2.3078198432922363,
      "learning_rate": 9.894111212877975e-06,
      "loss": 0.0646,
      "step": 52140
    },
    {
      "epoch": 0.08536098400790768,
      "grad_norm": 2.541419267654419,
      "learning_rate": 9.894045320664458e-06,
      "loss": 0.1046,
      "step": 52160
    },
    {
      "epoch": 0.085393714446561,
      "grad_norm": 1.9874087572097778,
      "learning_rate": 9.89397942845094e-06,
      "loss": 0.087,
      "step": 52180
    },
    {
      "epoch": 0.08542644488521435,
      "grad_norm": 6.183743953704834,
      "learning_rate": 9.893913536237424e-06,
      "loss": 0.0752,
      "step": 52200
    },
    {
      "epoch": 0.0854591753238677,
      "grad_norm": 4.221048355102539,
      "learning_rate": 9.893847644023905e-06,
      "loss": 0.1087,
      "step": 52220
    },
    {
      "epoch": 0.08549190576252103,
      "grad_norm": 12.577559471130371,
      "learning_rate": 9.893781751810389e-06,
      "loss": 0.0733,
      "step": 52240
    },
    {
      "epoch": 0.08552463620117437,
      "grad_norm": 4.882591724395752,
      "learning_rate": 9.893715859596873e-06,
      "loss": 0.0766,
      "step": 52260
    },
    {
      "epoch": 0.0855573666398277,
      "grad_norm": 1.8881148099899292,
      "learning_rate": 9.893649967383355e-06,
      "loss": 0.092,
      "step": 52280
    },
    {
      "epoch": 0.08559009707848104,
      "grad_norm": 2.12400484085083,
      "learning_rate": 9.893584075169838e-06,
      "loss": 0.0889,
      "step": 52300
    },
    {
      "epoch": 0.08562282751713439,
      "grad_norm": 4.444852352142334,
      "learning_rate": 9.893518182956322e-06,
      "loss": 0.0731,
      "step": 52320
    },
    {
      "epoch": 0.08565555795578772,
      "grad_norm": 1.3025020360946655,
      "learning_rate": 9.893452290742804e-06,
      "loss": 0.0658,
      "step": 52340
    },
    {
      "epoch": 0.08568828839444106,
      "grad_norm": 1.8071287870407104,
      "learning_rate": 9.893386398529287e-06,
      "loss": 0.0794,
      "step": 52360
    },
    {
      "epoch": 0.0857210188330944,
      "grad_norm": 6.555062294006348,
      "learning_rate": 9.89332050631577e-06,
      "loss": 0.0841,
      "step": 52380
    },
    {
      "epoch": 0.08575374927174774,
      "grad_norm": 10.06155776977539,
      "learning_rate": 9.893254614102253e-06,
      "loss": 0.1103,
      "step": 52400
    },
    {
      "epoch": 0.08578647971040108,
      "grad_norm": 6.518322944641113,
      "learning_rate": 9.893188721888736e-06,
      "loss": 0.1014,
      "step": 52420
    },
    {
      "epoch": 0.08581921014905441,
      "grad_norm": 17.133930206298828,
      "learning_rate": 9.893122829675218e-06,
      "loss": 0.0751,
      "step": 52440
    },
    {
      "epoch": 0.08585194058770776,
      "grad_norm": 3.7111871242523193,
      "learning_rate": 9.893056937461702e-06,
      "loss": 0.0832,
      "step": 52460
    },
    {
      "epoch": 0.08588467102636109,
      "grad_norm": 7.561840057373047,
      "learning_rate": 9.892991045248184e-06,
      "loss": 0.0789,
      "step": 52480
    },
    {
      "epoch": 0.08591740146501443,
      "grad_norm": 9.881902694702148,
      "learning_rate": 9.892925153034667e-06,
      "loss": 0.1027,
      "step": 52500
    },
    {
      "epoch": 0.08595013190366778,
      "grad_norm": 4.294491291046143,
      "learning_rate": 9.892859260821149e-06,
      "loss": 0.071,
      "step": 52520
    },
    {
      "epoch": 0.08598286234232111,
      "grad_norm": 6.795041561126709,
      "learning_rate": 9.892793368607633e-06,
      "loss": 0.0886,
      "step": 52540
    },
    {
      "epoch": 0.08601559278097445,
      "grad_norm": 1.1411420106887817,
      "learning_rate": 9.892727476394115e-06,
      "loss": 0.0842,
      "step": 52560
    },
    {
      "epoch": 0.08604832321962778,
      "grad_norm": 3.414644241333008,
      "learning_rate": 9.892661584180598e-06,
      "loss": 0.1078,
      "step": 52580
    },
    {
      "epoch": 0.08608105365828113,
      "grad_norm": 3.0933051109313965,
      "learning_rate": 9.89259569196708e-06,
      "loss": 0.0826,
      "step": 52600
    },
    {
      "epoch": 0.08611378409693447,
      "grad_norm": 1.4852731227874756,
      "learning_rate": 9.892529799753564e-06,
      "loss": 0.0841,
      "step": 52620
    },
    {
      "epoch": 0.0861465145355878,
      "grad_norm": 4.582865238189697,
      "learning_rate": 9.892463907540046e-06,
      "loss": 0.0899,
      "step": 52640
    },
    {
      "epoch": 0.08617924497424115,
      "grad_norm": 3.29958438873291,
      "learning_rate": 9.892398015326529e-06,
      "loss": 0.098,
      "step": 52660
    },
    {
      "epoch": 0.08621197541289448,
      "grad_norm": 18.41004753112793,
      "learning_rate": 9.892332123113013e-06,
      "loss": 0.0871,
      "step": 52680
    },
    {
      "epoch": 0.08624470585154782,
      "grad_norm": 8.431147575378418,
      "learning_rate": 9.892266230899495e-06,
      "loss": 0.1109,
      "step": 52700
    },
    {
      "epoch": 0.08627743629020117,
      "grad_norm": 3.0687248706817627,
      "learning_rate": 9.892200338685978e-06,
      "loss": 0.1059,
      "step": 52720
    },
    {
      "epoch": 0.0863101667288545,
      "grad_norm": 2.0498313903808594,
      "learning_rate": 9.892134446472462e-06,
      "loss": 0.0733,
      "step": 52740
    },
    {
      "epoch": 0.08634289716750784,
      "grad_norm": 2.8148598670959473,
      "learning_rate": 9.892068554258945e-06,
      "loss": 0.094,
      "step": 52760
    },
    {
      "epoch": 0.08637562760616117,
      "grad_norm": 4.100212097167969,
      "learning_rate": 9.892002662045427e-06,
      "loss": 0.0913,
      "step": 52780
    },
    {
      "epoch": 0.08640835804481452,
      "grad_norm": 7.806318283081055,
      "learning_rate": 9.89193676983191e-06,
      "loss": 0.085,
      "step": 52800
    },
    {
      "epoch": 0.08644108848346786,
      "grad_norm": 7.001810550689697,
      "learning_rate": 9.891870877618393e-06,
      "loss": 0.0988,
      "step": 52820
    },
    {
      "epoch": 0.08647381892212119,
      "grad_norm": 11.56930923461914,
      "learning_rate": 9.891804985404876e-06,
      "loss": 0.0795,
      "step": 52840
    },
    {
      "epoch": 0.08650654936077454,
      "grad_norm": 5.357579708099365,
      "learning_rate": 9.891739093191358e-06,
      "loss": 0.0758,
      "step": 52860
    },
    {
      "epoch": 0.08653927979942787,
      "grad_norm": 0.5680930018424988,
      "learning_rate": 9.891673200977842e-06,
      "loss": 0.0803,
      "step": 52880
    },
    {
      "epoch": 0.08657201023808121,
      "grad_norm": 7.799154758453369,
      "learning_rate": 9.891607308764324e-06,
      "loss": 0.0947,
      "step": 52900
    },
    {
      "epoch": 0.08660474067673454,
      "grad_norm": 3.865812063217163,
      "learning_rate": 9.891541416550807e-06,
      "loss": 0.0751,
      "step": 52920
    },
    {
      "epoch": 0.08663747111538789,
      "grad_norm": 3.8385531902313232,
      "learning_rate": 9.89147552433729e-06,
      "loss": 0.0765,
      "step": 52940
    },
    {
      "epoch": 0.08667020155404123,
      "grad_norm": 3.3557379245758057,
      "learning_rate": 9.891409632123773e-06,
      "loss": 0.0834,
      "step": 52960
    },
    {
      "epoch": 0.08670293199269456,
      "grad_norm": 4.883358955383301,
      "learning_rate": 9.891343739910255e-06,
      "loss": 0.1016,
      "step": 52980
    },
    {
      "epoch": 0.0867356624313479,
      "grad_norm": 8.2744722366333,
      "learning_rate": 9.891277847696738e-06,
      "loss": 0.1096,
      "step": 53000
    },
    {
      "epoch": 0.08676839287000124,
      "grad_norm": 1.9834682941436768,
      "learning_rate": 9.89121195548322e-06,
      "loss": 0.0952,
      "step": 53020
    },
    {
      "epoch": 0.08680112330865458,
      "grad_norm": 8.978466987609863,
      "learning_rate": 9.891146063269704e-06,
      "loss": 0.0937,
      "step": 53040
    },
    {
      "epoch": 0.08683385374730793,
      "grad_norm": 2.757646322250366,
      "learning_rate": 9.891080171056187e-06,
      "loss": 0.0983,
      "step": 53060
    },
    {
      "epoch": 0.08686658418596126,
      "grad_norm": 6.877670764923096,
      "learning_rate": 9.89101427884267e-06,
      "loss": 0.0731,
      "step": 53080
    },
    {
      "epoch": 0.0868993146246146,
      "grad_norm": 3.8403947353363037,
      "learning_rate": 9.890948386629153e-06,
      "loss": 0.0917,
      "step": 53100
    },
    {
      "epoch": 0.08693204506326793,
      "grad_norm": 5.199953556060791,
      "learning_rate": 9.890882494415636e-06,
      "loss": 0.0915,
      "step": 53120
    },
    {
      "epoch": 0.08696477550192128,
      "grad_norm": 3.5652430057525635,
      "learning_rate": 9.890816602202118e-06,
      "loss": 0.0734,
      "step": 53140
    },
    {
      "epoch": 0.08699750594057462,
      "grad_norm": 3.6462409496307373,
      "learning_rate": 9.890750709988602e-06,
      "loss": 0.1004,
      "step": 53160
    },
    {
      "epoch": 0.08703023637922795,
      "grad_norm": 3.201512336730957,
      "learning_rate": 9.890684817775085e-06,
      "loss": 0.083,
      "step": 53180
    },
    {
      "epoch": 0.0870629668178813,
      "grad_norm": 5.128175258636475,
      "learning_rate": 9.890618925561567e-06,
      "loss": 0.0988,
      "step": 53200
    },
    {
      "epoch": 0.08709569725653463,
      "grad_norm": 3.0038366317749023,
      "learning_rate": 9.890553033348051e-06,
      "loss": 0.1049,
      "step": 53220
    },
    {
      "epoch": 0.08712842769518797,
      "grad_norm": 4.470758438110352,
      "learning_rate": 9.890487141134533e-06,
      "loss": 0.0995,
      "step": 53240
    },
    {
      "epoch": 0.08716115813384132,
      "grad_norm": 6.703179836273193,
      "learning_rate": 9.890421248921016e-06,
      "loss": 0.0929,
      "step": 53260
    },
    {
      "epoch": 0.08719388857249465,
      "grad_norm": 1.966712236404419,
      "learning_rate": 9.890355356707498e-06,
      "loss": 0.0898,
      "step": 53280
    },
    {
      "epoch": 0.08722661901114799,
      "grad_norm": 5.313753604888916,
      "learning_rate": 9.890289464493982e-06,
      "loss": 0.0806,
      "step": 53300
    },
    {
      "epoch": 0.08725934944980132,
      "grad_norm": 3.0782880783081055,
      "learning_rate": 9.890223572280464e-06,
      "loss": 0.0957,
      "step": 53320
    },
    {
      "epoch": 0.08729207988845467,
      "grad_norm": 6.423717021942139,
      "learning_rate": 9.890157680066947e-06,
      "loss": 0.083,
      "step": 53340
    },
    {
      "epoch": 0.08732481032710801,
      "grad_norm": 3.472957134246826,
      "learning_rate": 9.89009178785343e-06,
      "loss": 0.0978,
      "step": 53360
    },
    {
      "epoch": 0.08735754076576134,
      "grad_norm": 5.299381256103516,
      "learning_rate": 9.890025895639913e-06,
      "loss": 0.076,
      "step": 53380
    },
    {
      "epoch": 0.08739027120441469,
      "grad_norm": 3.059408664703369,
      "learning_rate": 9.889960003426395e-06,
      "loss": 0.0839,
      "step": 53400
    },
    {
      "epoch": 0.08742300164306802,
      "grad_norm": 4.56890344619751,
      "learning_rate": 9.889894111212878e-06,
      "loss": 0.1076,
      "step": 53420
    },
    {
      "epoch": 0.08745573208172136,
      "grad_norm": 0.8381149172782898,
      "learning_rate": 9.889828218999362e-06,
      "loss": 0.0879,
      "step": 53440
    },
    {
      "epoch": 0.0874884625203747,
      "grad_norm": 4.134371280670166,
      "learning_rate": 9.889762326785844e-06,
      "loss": 0.0975,
      "step": 53460
    },
    {
      "epoch": 0.08752119295902804,
      "grad_norm": 5.0164570808410645,
      "learning_rate": 9.889696434572327e-06,
      "loss": 0.094,
      "step": 53480
    },
    {
      "epoch": 0.08755392339768138,
      "grad_norm": 3.9871363639831543,
      "learning_rate": 9.889630542358811e-06,
      "loss": 0.0811,
      "step": 53500
    },
    {
      "epoch": 0.08758665383633471,
      "grad_norm": 3.5391900539398193,
      "learning_rate": 9.889564650145293e-06,
      "loss": 0.086,
      "step": 53520
    },
    {
      "epoch": 0.08761938427498805,
      "grad_norm": 2.0630664825439453,
      "learning_rate": 9.889498757931776e-06,
      "loss": 0.0815,
      "step": 53540
    },
    {
      "epoch": 0.08765211471364139,
      "grad_norm": 4.94787073135376,
      "learning_rate": 9.88943286571826e-06,
      "loss": 0.1024,
      "step": 53560
    },
    {
      "epoch": 0.08768484515229473,
      "grad_norm": 3.6980812549591064,
      "learning_rate": 9.889366973504742e-06,
      "loss": 0.0844,
      "step": 53580
    },
    {
      "epoch": 0.08771757559094807,
      "grad_norm": 3.3050014972686768,
      "learning_rate": 9.889301081291226e-06,
      "loss": 0.0644,
      "step": 53600
    },
    {
      "epoch": 0.0877503060296014,
      "grad_norm": 9.09607219696045,
      "learning_rate": 9.889235189077707e-06,
      "loss": 0.0916,
      "step": 53620
    },
    {
      "epoch": 0.08778303646825475,
      "grad_norm": 0.8280184864997864,
      "learning_rate": 9.889169296864191e-06,
      "loss": 0.0604,
      "step": 53640
    },
    {
      "epoch": 0.08781576690690808,
      "grad_norm": 2.9215123653411865,
      "learning_rate": 9.889103404650673e-06,
      "loss": 0.1003,
      "step": 53660
    },
    {
      "epoch": 0.08784849734556142,
      "grad_norm": 4.686954021453857,
      "learning_rate": 9.889037512437156e-06,
      "loss": 0.0959,
      "step": 53680
    },
    {
      "epoch": 0.08788122778421477,
      "grad_norm": 2.1671550273895264,
      "learning_rate": 9.888971620223638e-06,
      "loss": 0.0742,
      "step": 53700
    },
    {
      "epoch": 0.0879139582228681,
      "grad_norm": 3.5359058380126953,
      "learning_rate": 9.888905728010122e-06,
      "loss": 0.0934,
      "step": 53720
    },
    {
      "epoch": 0.08794668866152144,
      "grad_norm": 4.076354026794434,
      "learning_rate": 9.888839835796604e-06,
      "loss": 0.0739,
      "step": 53740
    },
    {
      "epoch": 0.08797941910017477,
      "grad_norm": 4.3547773361206055,
      "learning_rate": 9.888773943583087e-06,
      "loss": 0.1087,
      "step": 53760
    },
    {
      "epoch": 0.08801214953882812,
      "grad_norm": 3.8469462394714355,
      "learning_rate": 9.888708051369571e-06,
      "loss": 0.0802,
      "step": 53780
    },
    {
      "epoch": 0.08804487997748146,
      "grad_norm": 3.5220186710357666,
      "learning_rate": 9.888642159156053e-06,
      "loss": 0.0988,
      "step": 53800
    },
    {
      "epoch": 0.0880776104161348,
      "grad_norm": 4.441314220428467,
      "learning_rate": 9.888576266942536e-06,
      "loss": 0.0924,
      "step": 53820
    },
    {
      "epoch": 0.08811034085478814,
      "grad_norm": 2.004211664199829,
      "learning_rate": 9.888510374729018e-06,
      "loss": 0.0744,
      "step": 53840
    },
    {
      "epoch": 0.08814307129344147,
      "grad_norm": 3.642413854598999,
      "learning_rate": 9.888444482515502e-06,
      "loss": 0.0929,
      "step": 53860
    },
    {
      "epoch": 0.08817580173209481,
      "grad_norm": 2.6421523094177246,
      "learning_rate": 9.888378590301984e-06,
      "loss": 0.0853,
      "step": 53880
    },
    {
      "epoch": 0.08820853217074816,
      "grad_norm": 3.268495559692383,
      "learning_rate": 9.888312698088467e-06,
      "loss": 0.1005,
      "step": 53900
    },
    {
      "epoch": 0.08824126260940149,
      "grad_norm": 2.828721046447754,
      "learning_rate": 9.888246805874951e-06,
      "loss": 0.0803,
      "step": 53920
    },
    {
      "epoch": 0.08827399304805483,
      "grad_norm": 2.7006964683532715,
      "learning_rate": 9.888180913661433e-06,
      "loss": 0.1062,
      "step": 53940
    },
    {
      "epoch": 0.08830672348670816,
      "grad_norm": 5.698319435119629,
      "learning_rate": 9.888115021447917e-06,
      "loss": 0.0724,
      "step": 53960
    },
    {
      "epoch": 0.08833945392536151,
      "grad_norm": 2.372600793838501,
      "learning_rate": 9.8880491292344e-06,
      "loss": 0.0853,
      "step": 53980
    },
    {
      "epoch": 0.08837218436401485,
      "grad_norm": 1.132451057434082,
      "learning_rate": 9.887983237020882e-06,
      "loss": 0.1004,
      "step": 54000
    },
    {
      "epoch": 0.08840491480266818,
      "grad_norm": 5.132742404937744,
      "learning_rate": 9.887917344807366e-06,
      "loss": 0.0766,
      "step": 54020
    },
    {
      "epoch": 0.08843764524132153,
      "grad_norm": 11.052901268005371,
      "learning_rate": 9.887851452593847e-06,
      "loss": 0.0869,
      "step": 54040
    },
    {
      "epoch": 0.08847037567997486,
      "grad_norm": 2.269440174102783,
      "learning_rate": 9.887785560380331e-06,
      "loss": 0.0798,
      "step": 54060
    },
    {
      "epoch": 0.0885031061186282,
      "grad_norm": 4.890540599822998,
      "learning_rate": 9.887719668166813e-06,
      "loss": 0.0778,
      "step": 54080
    },
    {
      "epoch": 0.08853583655728155,
      "grad_norm": 5.697045803070068,
      "learning_rate": 9.887653775953297e-06,
      "loss": 0.0951,
      "step": 54100
    },
    {
      "epoch": 0.08856856699593488,
      "grad_norm": 0.5185862183570862,
      "learning_rate": 9.887587883739778e-06,
      "loss": 0.083,
      "step": 54120
    },
    {
      "epoch": 0.08860129743458822,
      "grad_norm": 1.8852852582931519,
      "learning_rate": 9.887521991526262e-06,
      "loss": 0.0794,
      "step": 54140
    },
    {
      "epoch": 0.08863402787324155,
      "grad_norm": 4.774723052978516,
      "learning_rate": 9.887456099312746e-06,
      "loss": 0.0802,
      "step": 54160
    },
    {
      "epoch": 0.0886667583118949,
      "grad_norm": 3.0135819911956787,
      "learning_rate": 9.887390207099228e-06,
      "loss": 0.0892,
      "step": 54180
    },
    {
      "epoch": 0.08869948875054824,
      "grad_norm": 4.1366448402404785,
      "learning_rate": 9.887324314885711e-06,
      "loss": 0.0986,
      "step": 54200
    },
    {
      "epoch": 0.08873221918920157,
      "grad_norm": 3.2309489250183105,
      "learning_rate": 9.887258422672193e-06,
      "loss": 0.0824,
      "step": 54220
    },
    {
      "epoch": 0.08876494962785492,
      "grad_norm": 5.554040908813477,
      "learning_rate": 9.887192530458677e-06,
      "loss": 0.0659,
      "step": 54240
    },
    {
      "epoch": 0.08879768006650825,
      "grad_norm": 5.424490451812744,
      "learning_rate": 9.887126638245158e-06,
      "loss": 0.0924,
      "step": 54260
    },
    {
      "epoch": 0.08883041050516159,
      "grad_norm": 1.8595045804977417,
      "learning_rate": 9.887060746031642e-06,
      "loss": 0.0626,
      "step": 54280
    },
    {
      "epoch": 0.08886314094381492,
      "grad_norm": 6.178706645965576,
      "learning_rate": 9.886994853818126e-06,
      "loss": 0.0963,
      "step": 54300
    },
    {
      "epoch": 0.08889587138246827,
      "grad_norm": 1.7815340757369995,
      "learning_rate": 9.886928961604608e-06,
      "loss": 0.0721,
      "step": 54320
    },
    {
      "epoch": 0.08892860182112161,
      "grad_norm": 2.907898187637329,
      "learning_rate": 9.886863069391091e-06,
      "loss": 0.0932,
      "step": 54340
    },
    {
      "epoch": 0.08896133225977494,
      "grad_norm": 7.755176067352295,
      "learning_rate": 9.886797177177575e-06,
      "loss": 0.0867,
      "step": 54360
    },
    {
      "epoch": 0.08899406269842829,
      "grad_norm": 1.9055951833724976,
      "learning_rate": 9.886731284964057e-06,
      "loss": 0.0839,
      "step": 54380
    },
    {
      "epoch": 0.08902679313708162,
      "grad_norm": 8.735963821411133,
      "learning_rate": 9.88666539275054e-06,
      "loss": 0.0935,
      "step": 54400
    },
    {
      "epoch": 0.08905952357573496,
      "grad_norm": 2.3844892978668213,
      "learning_rate": 9.886599500537022e-06,
      "loss": 0.0861,
      "step": 54420
    },
    {
      "epoch": 0.0890922540143883,
      "grad_norm": 3.1184287071228027,
      "learning_rate": 9.886533608323506e-06,
      "loss": 0.1022,
      "step": 54440
    },
    {
      "epoch": 0.08912498445304164,
      "grad_norm": 2.2494261264801025,
      "learning_rate": 9.886467716109988e-06,
      "loss": 0.079,
      "step": 54460
    },
    {
      "epoch": 0.08915771489169498,
      "grad_norm": 6.270880222320557,
      "learning_rate": 9.886401823896471e-06,
      "loss": 0.0705,
      "step": 54480
    },
    {
      "epoch": 0.08919044533034831,
      "grad_norm": 3.59334659576416,
      "learning_rate": 9.886335931682955e-06,
      "loss": 0.1013,
      "step": 54500
    },
    {
      "epoch": 0.08922317576900166,
      "grad_norm": 3.2438066005706787,
      "learning_rate": 9.886270039469437e-06,
      "loss": 0.1043,
      "step": 54520
    },
    {
      "epoch": 0.089255906207655,
      "grad_norm": 5.636196136474609,
      "learning_rate": 9.88620414725592e-06,
      "loss": 0.0828,
      "step": 54540
    },
    {
      "epoch": 0.08928863664630833,
      "grad_norm": 3.751549243927002,
      "learning_rate": 9.886138255042402e-06,
      "loss": 0.1177,
      "step": 54560
    },
    {
      "epoch": 0.08932136708496168,
      "grad_norm": 3.2303216457366943,
      "learning_rate": 9.886072362828886e-06,
      "loss": 0.0859,
      "step": 54580
    },
    {
      "epoch": 0.089354097523615,
      "grad_norm": 2.4326610565185547,
      "learning_rate": 9.886006470615368e-06,
      "loss": 0.1033,
      "step": 54600
    },
    {
      "epoch": 0.08938682796226835,
      "grad_norm": 2.835463762283325,
      "learning_rate": 9.885940578401851e-06,
      "loss": 0.0791,
      "step": 54620
    },
    {
      "epoch": 0.0894195584009217,
      "grad_norm": 3.276426076889038,
      "learning_rate": 9.885874686188333e-06,
      "loss": 0.085,
      "step": 54640
    },
    {
      "epoch": 0.08945228883957503,
      "grad_norm": 4.0778021812438965,
      "learning_rate": 9.885808793974817e-06,
      "loss": 0.0894,
      "step": 54660
    },
    {
      "epoch": 0.08948501927822837,
      "grad_norm": 3.5927085876464844,
      "learning_rate": 9.885742901761299e-06,
      "loss": 0.1143,
      "step": 54680
    },
    {
      "epoch": 0.0895177497168817,
      "grad_norm": 4.32988166809082,
      "learning_rate": 9.885677009547782e-06,
      "loss": 0.0911,
      "step": 54700
    },
    {
      "epoch": 0.08955048015553504,
      "grad_norm": 4.271224021911621,
      "learning_rate": 9.885611117334266e-06,
      "loss": 0.0716,
      "step": 54720
    },
    {
      "epoch": 0.08958321059418839,
      "grad_norm": 4.032992839813232,
      "learning_rate": 9.885545225120748e-06,
      "loss": 0.0925,
      "step": 54740
    },
    {
      "epoch": 0.08961594103284172,
      "grad_norm": 4.618505477905273,
      "learning_rate": 9.885479332907231e-06,
      "loss": 0.0897,
      "step": 54760
    },
    {
      "epoch": 0.08964867147149506,
      "grad_norm": 3.300706386566162,
      "learning_rate": 9.885413440693715e-06,
      "loss": 0.0917,
      "step": 54780
    },
    {
      "epoch": 0.0896814019101484,
      "grad_norm": 4.409672260284424,
      "learning_rate": 9.885347548480197e-06,
      "loss": 0.1104,
      "step": 54800
    },
    {
      "epoch": 0.08971413234880174,
      "grad_norm": 2.669454336166382,
      "learning_rate": 9.88528165626668e-06,
      "loss": 0.0797,
      "step": 54820
    },
    {
      "epoch": 0.08974686278745508,
      "grad_norm": 5.78820276260376,
      "learning_rate": 9.885215764053164e-06,
      "loss": 0.0801,
      "step": 54840
    },
    {
      "epoch": 0.08977959322610841,
      "grad_norm": 3.2731735706329346,
      "learning_rate": 9.885149871839646e-06,
      "loss": 0.0926,
      "step": 54860
    },
    {
      "epoch": 0.08981232366476176,
      "grad_norm": 4.154721736907959,
      "learning_rate": 9.88508397962613e-06,
      "loss": 0.0827,
      "step": 54880
    },
    {
      "epoch": 0.08984505410341509,
      "grad_norm": 5.173102855682373,
      "learning_rate": 9.885018087412611e-06,
      "loss": 0.1118,
      "step": 54900
    },
    {
      "epoch": 0.08987778454206843,
      "grad_norm": 2.9420056343078613,
      "learning_rate": 9.884952195199095e-06,
      "loss": 0.0911,
      "step": 54920
    },
    {
      "epoch": 0.08991051498072178,
      "grad_norm": 5.385845184326172,
      "learning_rate": 9.884886302985577e-06,
      "loss": 0.0825,
      "step": 54940
    },
    {
      "epoch": 0.08994324541937511,
      "grad_norm": 2.9614174365997314,
      "learning_rate": 9.88482041077206e-06,
      "loss": 0.0687,
      "step": 54960
    },
    {
      "epoch": 0.08997597585802845,
      "grad_norm": 4.392318248748779,
      "learning_rate": 9.884754518558542e-06,
      "loss": 0.0869,
      "step": 54980
    },
    {
      "epoch": 0.09000870629668178,
      "grad_norm": 2.4746439456939697,
      "learning_rate": 9.884688626345026e-06,
      "loss": 0.0857,
      "step": 55000
    },
    {
      "epoch": 0.09004143673533513,
      "grad_norm": 2.935499429702759,
      "learning_rate": 9.884622734131508e-06,
      "loss": 0.0997,
      "step": 55020
    },
    {
      "epoch": 0.09007416717398846,
      "grad_norm": 3.7315051555633545,
      "learning_rate": 9.884556841917991e-06,
      "loss": 0.0872,
      "step": 55040
    },
    {
      "epoch": 0.0901068976126418,
      "grad_norm": 2.274129867553711,
      "learning_rate": 9.884490949704473e-06,
      "loss": 0.074,
      "step": 55060
    },
    {
      "epoch": 0.09013962805129515,
      "grad_norm": 3.3746583461761475,
      "learning_rate": 9.884425057490957e-06,
      "loss": 0.0893,
      "step": 55080
    },
    {
      "epoch": 0.09017235848994848,
      "grad_norm": 4.666585445404053,
      "learning_rate": 9.88435916527744e-06,
      "loss": 0.0888,
      "step": 55100
    },
    {
      "epoch": 0.09020508892860182,
      "grad_norm": 4.989713191986084,
      "learning_rate": 9.884293273063922e-06,
      "loss": 0.0837,
      "step": 55120
    },
    {
      "epoch": 0.09023781936725515,
      "grad_norm": 7.840435028076172,
      "learning_rate": 9.884227380850406e-06,
      "loss": 0.0935,
      "step": 55140
    },
    {
      "epoch": 0.0902705498059085,
      "grad_norm": 17.081653594970703,
      "learning_rate": 9.88416148863689e-06,
      "loss": 0.0904,
      "step": 55160
    },
    {
      "epoch": 0.09030328024456184,
      "grad_norm": 2.851043224334717,
      "learning_rate": 9.884095596423371e-06,
      "loss": 0.0813,
      "step": 55180
    },
    {
      "epoch": 0.09033601068321517,
      "grad_norm": 4.402394771575928,
      "learning_rate": 9.884029704209855e-06,
      "loss": 0.09,
      "step": 55200
    },
    {
      "epoch": 0.09036874112186852,
      "grad_norm": 2.0891635417938232,
      "learning_rate": 9.883963811996338e-06,
      "loss": 0.1051,
      "step": 55220
    },
    {
      "epoch": 0.09040147156052185,
      "grad_norm": 5.35684871673584,
      "learning_rate": 9.88389791978282e-06,
      "loss": 0.0855,
      "step": 55240
    },
    {
      "epoch": 0.09043420199917519,
      "grad_norm": 2.92708683013916,
      "learning_rate": 9.883832027569304e-06,
      "loss": 0.0962,
      "step": 55260
    },
    {
      "epoch": 0.09046693243782854,
      "grad_norm": 2.987598419189453,
      "learning_rate": 9.883766135355786e-06,
      "loss": 0.1056,
      "step": 55280
    },
    {
      "epoch": 0.09049966287648187,
      "grad_norm": 2.7529983520507812,
      "learning_rate": 9.88370024314227e-06,
      "loss": 0.0863,
      "step": 55300
    },
    {
      "epoch": 0.09053239331513521,
      "grad_norm": 7.259894847869873,
      "learning_rate": 9.883634350928751e-06,
      "loss": 0.095,
      "step": 55320
    },
    {
      "epoch": 0.09056512375378854,
      "grad_norm": 2.5137953758239746,
      "learning_rate": 9.883568458715235e-06,
      "loss": 0.1096,
      "step": 55340
    },
    {
      "epoch": 0.09059785419244189,
      "grad_norm": 14.498001098632812,
      "learning_rate": 9.883502566501717e-06,
      "loss": 0.0889,
      "step": 55360
    },
    {
      "epoch": 0.09063058463109523,
      "grad_norm": 3.612852096557617,
      "learning_rate": 9.8834366742882e-06,
      "loss": 0.0871,
      "step": 55380
    },
    {
      "epoch": 0.09066331506974856,
      "grad_norm": 2.614877939224243,
      "learning_rate": 9.883370782074682e-06,
      "loss": 0.0781,
      "step": 55400
    },
    {
      "epoch": 0.0906960455084019,
      "grad_norm": 72.62384033203125,
      "learning_rate": 9.883304889861166e-06,
      "loss": 0.1049,
      "step": 55420
    },
    {
      "epoch": 0.09072877594705524,
      "grad_norm": 3.001952886581421,
      "learning_rate": 9.883238997647648e-06,
      "loss": 0.0843,
      "step": 55440
    },
    {
      "epoch": 0.09076150638570858,
      "grad_norm": 4.42786169052124,
      "learning_rate": 9.883173105434131e-06,
      "loss": 0.1067,
      "step": 55460
    },
    {
      "epoch": 0.09079423682436193,
      "grad_norm": 4.053040981292725,
      "learning_rate": 9.883107213220613e-06,
      "loss": 0.0879,
      "step": 55480
    },
    {
      "epoch": 0.09082696726301526,
      "grad_norm": 1.2442352771759033,
      "learning_rate": 9.883041321007097e-06,
      "loss": 0.0886,
      "step": 55500
    },
    {
      "epoch": 0.0908596977016686,
      "grad_norm": 2.821903944015503,
      "learning_rate": 9.88297542879358e-06,
      "loss": 0.0885,
      "step": 55520
    },
    {
      "epoch": 0.09089242814032193,
      "grad_norm": 6.030549049377441,
      "learning_rate": 9.882909536580064e-06,
      "loss": 0.0768,
      "step": 55540
    },
    {
      "epoch": 0.09092515857897528,
      "grad_norm": 2.8951234817504883,
      "learning_rate": 9.882843644366546e-06,
      "loss": 0.092,
      "step": 55560
    },
    {
      "epoch": 0.09095788901762862,
      "grad_norm": 5.638913631439209,
      "learning_rate": 9.88277775215303e-06,
      "loss": 0.0944,
      "step": 55580
    },
    {
      "epoch": 0.09099061945628195,
      "grad_norm": 4.856852054595947,
      "learning_rate": 9.882711859939513e-06,
      "loss": 0.093,
      "step": 55600
    },
    {
      "epoch": 0.0910233498949353,
      "grad_norm": 7.362081527709961,
      "learning_rate": 9.882645967725995e-06,
      "loss": 0.0821,
      "step": 55620
    },
    {
      "epoch": 0.09105608033358863,
      "grad_norm": 1.1823326349258423,
      "learning_rate": 9.882580075512479e-06,
      "loss": 0.0978,
      "step": 55640
    },
    {
      "epoch": 0.09108881077224197,
      "grad_norm": 2.7773423194885254,
      "learning_rate": 9.88251418329896e-06,
      "loss": 0.1083,
      "step": 55660
    },
    {
      "epoch": 0.0911215412108953,
      "grad_norm": 1.72969388961792,
      "learning_rate": 9.882448291085444e-06,
      "loss": 0.0864,
      "step": 55680
    },
    {
      "epoch": 0.09115427164954865,
      "grad_norm": 1.9381434917449951,
      "learning_rate": 9.882382398871926e-06,
      "loss": 0.0833,
      "step": 55700
    },
    {
      "epoch": 0.09118700208820199,
      "grad_norm": 3.9476730823516846,
      "learning_rate": 9.88231650665841e-06,
      "loss": 0.0725,
      "step": 55720
    },
    {
      "epoch": 0.09121973252685532,
      "grad_norm": 3.1986100673675537,
      "learning_rate": 9.882250614444891e-06,
      "loss": 0.0863,
      "step": 55740
    },
    {
      "epoch": 0.09125246296550867,
      "grad_norm": 3.5572149753570557,
      "learning_rate": 9.882184722231375e-06,
      "loss": 0.0955,
      "step": 55760
    },
    {
      "epoch": 0.091285193404162,
      "grad_norm": 4.041855335235596,
      "learning_rate": 9.882118830017857e-06,
      "loss": 0.0848,
      "step": 55780
    },
    {
      "epoch": 0.09131792384281534,
      "grad_norm": 1.1015472412109375,
      "learning_rate": 9.88205293780434e-06,
      "loss": 0.0701,
      "step": 55800
    },
    {
      "epoch": 0.09135065428146869,
      "grad_norm": 6.963444709777832,
      "learning_rate": 9.881987045590822e-06,
      "loss": 0.068,
      "step": 55820
    },
    {
      "epoch": 0.09138338472012202,
      "grad_norm": 2.1133522987365723,
      "learning_rate": 9.881921153377306e-06,
      "loss": 0.0976,
      "step": 55840
    },
    {
      "epoch": 0.09141611515877536,
      "grad_norm": 7.969969272613525,
      "learning_rate": 9.881855261163788e-06,
      "loss": 0.0674,
      "step": 55860
    },
    {
      "epoch": 0.09144884559742869,
      "grad_norm": 1.9950445890426636,
      "learning_rate": 9.881789368950271e-06,
      "loss": 0.0939,
      "step": 55880
    },
    {
      "epoch": 0.09148157603608204,
      "grad_norm": 3.373471975326538,
      "learning_rate": 9.881723476736755e-06,
      "loss": 0.0823,
      "step": 55900
    },
    {
      "epoch": 0.09151430647473538,
      "grad_norm": 1.9198482036590576,
      "learning_rate": 9.881657584523237e-06,
      "loss": 0.08,
      "step": 55920
    },
    {
      "epoch": 0.09154703691338871,
      "grad_norm": 2.0084152221679688,
      "learning_rate": 9.88159169230972e-06,
      "loss": 0.0803,
      "step": 55940
    },
    {
      "epoch": 0.09157976735204205,
      "grad_norm": 2.5147576332092285,
      "learning_rate": 9.881525800096204e-06,
      "loss": 0.0958,
      "step": 55960
    },
    {
      "epoch": 0.09161249779069539,
      "grad_norm": 8.418922424316406,
      "learning_rate": 9.881459907882686e-06,
      "loss": 0.1179,
      "step": 55980
    },
    {
      "epoch": 0.09164522822934873,
      "grad_norm": 2.393165111541748,
      "learning_rate": 9.88139401566917e-06,
      "loss": 0.0975,
      "step": 56000
    },
    {
      "epoch": 0.09167795866800207,
      "grad_norm": 1.803880214691162,
      "learning_rate": 9.881328123455653e-06,
      "loss": 0.0859,
      "step": 56020
    },
    {
      "epoch": 0.0917106891066554,
      "grad_norm": 4.558469295501709,
      "learning_rate": 9.881262231242135e-06,
      "loss": 0.0804,
      "step": 56040
    },
    {
      "epoch": 0.09174341954530875,
      "grad_norm": 2.0375921726226807,
      "learning_rate": 9.881196339028619e-06,
      "loss": 0.086,
      "step": 56060
    },
    {
      "epoch": 0.09177614998396208,
      "grad_norm": 1.5981026887893677,
      "learning_rate": 9.8811304468151e-06,
      "loss": 0.0852,
      "step": 56080
    },
    {
      "epoch": 0.09180888042261542,
      "grad_norm": 18.05819320678711,
      "learning_rate": 9.881064554601584e-06,
      "loss": 0.077,
      "step": 56100
    },
    {
      "epoch": 0.09184161086126877,
      "grad_norm": 2.6096956729888916,
      "learning_rate": 9.880998662388066e-06,
      "loss": 0.0747,
      "step": 56120
    },
    {
      "epoch": 0.0918743412999221,
      "grad_norm": 1.4467735290527344,
      "learning_rate": 9.88093277017455e-06,
      "loss": 0.0732,
      "step": 56140
    },
    {
      "epoch": 0.09190707173857544,
      "grad_norm": 2.6750237941741943,
      "learning_rate": 9.880866877961031e-06,
      "loss": 0.0852,
      "step": 56160
    },
    {
      "epoch": 0.09193980217722877,
      "grad_norm": 4.076870441436768,
      "learning_rate": 9.880800985747515e-06,
      "loss": 0.077,
      "step": 56180
    },
    {
      "epoch": 0.09197253261588212,
      "grad_norm": 3.1173572540283203,
      "learning_rate": 9.880735093533997e-06,
      "loss": 0.0793,
      "step": 56200
    },
    {
      "epoch": 0.09200526305453546,
      "grad_norm": 4.06015682220459,
      "learning_rate": 9.88066920132048e-06,
      "loss": 0.0921,
      "step": 56220
    },
    {
      "epoch": 0.0920379934931888,
      "grad_norm": 2.9483466148376465,
      "learning_rate": 9.880603309106964e-06,
      "loss": 0.0759,
      "step": 56240
    },
    {
      "epoch": 0.09207072393184214,
      "grad_norm": 6.026981830596924,
      "learning_rate": 9.880537416893446e-06,
      "loss": 0.0721,
      "step": 56260
    },
    {
      "epoch": 0.09210345437049547,
      "grad_norm": 2.4702165126800537,
      "learning_rate": 9.88047152467993e-06,
      "loss": 0.0942,
      "step": 56280
    },
    {
      "epoch": 0.09213618480914881,
      "grad_norm": 8.463037490844727,
      "learning_rate": 9.880405632466411e-06,
      "loss": 0.0715,
      "step": 56300
    },
    {
      "epoch": 0.09216891524780216,
      "grad_norm": 3.856088161468506,
      "learning_rate": 9.880339740252895e-06,
      "loss": 0.086,
      "step": 56320
    },
    {
      "epoch": 0.09220164568645549,
      "grad_norm": 1.5331135988235474,
      "learning_rate": 9.880273848039379e-06,
      "loss": 0.1026,
      "step": 56340
    },
    {
      "epoch": 0.09223437612510883,
      "grad_norm": 4.112022876739502,
      "learning_rate": 9.88020795582586e-06,
      "loss": 0.0885,
      "step": 56360
    },
    {
      "epoch": 0.09226710656376216,
      "grad_norm": 3.580949068069458,
      "learning_rate": 9.880142063612344e-06,
      "loss": 0.1049,
      "step": 56380
    },
    {
      "epoch": 0.09229983700241551,
      "grad_norm": 5.637147426605225,
      "learning_rate": 9.880076171398828e-06,
      "loss": 0.0927,
      "step": 56400
    },
    {
      "epoch": 0.09233256744106884,
      "grad_norm": 3.612698554992676,
      "learning_rate": 9.88001027918531e-06,
      "loss": 0.0715,
      "step": 56420
    },
    {
      "epoch": 0.09236529787972218,
      "grad_norm": 8.349162101745605,
      "learning_rate": 9.879944386971793e-06,
      "loss": 0.0916,
      "step": 56440
    },
    {
      "epoch": 0.09239802831837553,
      "grad_norm": 3.900970935821533,
      "learning_rate": 9.879878494758275e-06,
      "loss": 0.0879,
      "step": 56460
    },
    {
      "epoch": 0.09243075875702886,
      "grad_norm": 4.713089942932129,
      "learning_rate": 9.879812602544759e-06,
      "loss": 0.0771,
      "step": 56480
    },
    {
      "epoch": 0.0924634891956822,
      "grad_norm": 2.518850803375244,
      "learning_rate": 9.87974671033124e-06,
      "loss": 0.0957,
      "step": 56500
    },
    {
      "epoch": 0.09249621963433553,
      "grad_norm": 2.5935850143432617,
      "learning_rate": 9.879680818117724e-06,
      "loss": 0.0874,
      "step": 56520
    },
    {
      "epoch": 0.09252895007298888,
      "grad_norm": 2.9757444858551025,
      "learning_rate": 9.879614925904206e-06,
      "loss": 0.1023,
      "step": 56540
    },
    {
      "epoch": 0.09256168051164222,
      "grad_norm": 7.275620460510254,
      "learning_rate": 9.87954903369069e-06,
      "loss": 0.0922,
      "step": 56560
    },
    {
      "epoch": 0.09259441095029555,
      "grad_norm": 6.9810872077941895,
      "learning_rate": 9.879483141477172e-06,
      "loss": 0.1076,
      "step": 56580
    },
    {
      "epoch": 0.0926271413889489,
      "grad_norm": 2.299438714981079,
      "learning_rate": 9.879417249263655e-06,
      "loss": 0.0949,
      "step": 56600
    },
    {
      "epoch": 0.09265987182760223,
      "grad_norm": 0.7972704172134399,
      "learning_rate": 9.879351357050139e-06,
      "loss": 0.0814,
      "step": 56620
    },
    {
      "epoch": 0.09269260226625557,
      "grad_norm": 3.7042622566223145,
      "learning_rate": 9.87928546483662e-06,
      "loss": 0.1043,
      "step": 56640
    },
    {
      "epoch": 0.09272533270490892,
      "grad_norm": 2.432155132293701,
      "learning_rate": 9.879219572623104e-06,
      "loss": 0.0864,
      "step": 56660
    },
    {
      "epoch": 0.09275806314356225,
      "grad_norm": 1.1846585273742676,
      "learning_rate": 9.879153680409586e-06,
      "loss": 0.0832,
      "step": 56680
    },
    {
      "epoch": 0.09279079358221559,
      "grad_norm": 3.4643094539642334,
      "learning_rate": 9.87908778819607e-06,
      "loss": 0.0883,
      "step": 56700
    },
    {
      "epoch": 0.09282352402086892,
      "grad_norm": 1.0939160585403442,
      "learning_rate": 9.879021895982552e-06,
      "loss": 0.0903,
      "step": 56720
    },
    {
      "epoch": 0.09285625445952227,
      "grad_norm": 7.819724082946777,
      "learning_rate": 9.878956003769035e-06,
      "loss": 0.1003,
      "step": 56740
    },
    {
      "epoch": 0.09288898489817561,
      "grad_norm": 1.582091212272644,
      "learning_rate": 9.878890111555519e-06,
      "loss": 0.0716,
      "step": 56760
    },
    {
      "epoch": 0.09292171533682894,
      "grad_norm": 6.450622081756592,
      "learning_rate": 9.878824219342e-06,
      "loss": 0.0772,
      "step": 56780
    },
    {
      "epoch": 0.09295444577548229,
      "grad_norm": 8.593973159790039,
      "learning_rate": 9.878758327128484e-06,
      "loss": 0.0884,
      "step": 56800
    },
    {
      "epoch": 0.09298717621413562,
      "grad_norm": 2.9190776348114014,
      "learning_rate": 9.878692434914968e-06,
      "loss": 0.09,
      "step": 56820
    },
    {
      "epoch": 0.09301990665278896,
      "grad_norm": 5.6090474128723145,
      "learning_rate": 9.87862654270145e-06,
      "loss": 0.079,
      "step": 56840
    },
    {
      "epoch": 0.0930526370914423,
      "grad_norm": 4.7590131759643555,
      "learning_rate": 9.878560650487933e-06,
      "loss": 0.1102,
      "step": 56860
    },
    {
      "epoch": 0.09308536753009564,
      "grad_norm": 2.3283531665802,
      "learning_rate": 9.878494758274415e-06,
      "loss": 0.0806,
      "step": 56880
    },
    {
      "epoch": 0.09311809796874898,
      "grad_norm": 4.404531002044678,
      "learning_rate": 9.878428866060899e-06,
      "loss": 0.088,
      "step": 56900
    },
    {
      "epoch": 0.09315082840740231,
      "grad_norm": 3.385246753692627,
      "learning_rate": 9.87836297384738e-06,
      "loss": 0.0784,
      "step": 56920
    },
    {
      "epoch": 0.09318355884605566,
      "grad_norm": 4.261177062988281,
      "learning_rate": 9.878297081633864e-06,
      "loss": 0.0822,
      "step": 56940
    },
    {
      "epoch": 0.093216289284709,
      "grad_norm": 1.2454824447631836,
      "learning_rate": 9.878231189420348e-06,
      "loss": 0.0862,
      "step": 56960
    },
    {
      "epoch": 0.09324901972336233,
      "grad_norm": 2.208601236343384,
      "learning_rate": 9.87816529720683e-06,
      "loss": 0.0851,
      "step": 56980
    },
    {
      "epoch": 0.09328175016201568,
      "grad_norm": 5.010698318481445,
      "learning_rate": 9.878099404993313e-06,
      "loss": 0.1008,
      "step": 57000
    },
    {
      "epoch": 0.093314480600669,
      "grad_norm": 1.7144014835357666,
      "learning_rate": 9.878033512779795e-06,
      "loss": 0.0857,
      "step": 57020
    },
    {
      "epoch": 0.09334721103932235,
      "grad_norm": 5.025374412536621,
      "learning_rate": 9.877967620566279e-06,
      "loss": 0.0834,
      "step": 57040
    },
    {
      "epoch": 0.09337994147797568,
      "grad_norm": 2.8784496784210205,
      "learning_rate": 9.87790172835276e-06,
      "loss": 0.0811,
      "step": 57060
    },
    {
      "epoch": 0.09341267191662903,
      "grad_norm": 1.8962594270706177,
      "learning_rate": 9.877835836139244e-06,
      "loss": 0.0979,
      "step": 57080
    },
    {
      "epoch": 0.09344540235528237,
      "grad_norm": 3.0299699306488037,
      "learning_rate": 9.877769943925726e-06,
      "loss": 0.0812,
      "step": 57100
    },
    {
      "epoch": 0.0934781327939357,
      "grad_norm": 0.9326967597007751,
      "learning_rate": 9.87770405171221e-06,
      "loss": 0.0955,
      "step": 57120
    },
    {
      "epoch": 0.09351086323258904,
      "grad_norm": 1.9715884923934937,
      "learning_rate": 9.877638159498693e-06,
      "loss": 0.0924,
      "step": 57140
    },
    {
      "epoch": 0.09354359367124238,
      "grad_norm": 4.718605041503906,
      "learning_rate": 9.877572267285175e-06,
      "loss": 0.1189,
      "step": 57160
    },
    {
      "epoch": 0.09357632410989572,
      "grad_norm": 4.571564674377441,
      "learning_rate": 9.877506375071659e-06,
      "loss": 0.1064,
      "step": 57180
    },
    {
      "epoch": 0.09360905454854906,
      "grad_norm": 1.4446241855621338,
      "learning_rate": 9.877440482858142e-06,
      "loss": 0.1078,
      "step": 57200
    },
    {
      "epoch": 0.0936417849872024,
      "grad_norm": 6.4891133308410645,
      "learning_rate": 9.877374590644624e-06,
      "loss": 0.0876,
      "step": 57220
    },
    {
      "epoch": 0.09367451542585574,
      "grad_norm": 8.66517448425293,
      "learning_rate": 9.877308698431108e-06,
      "loss": 0.0848,
      "step": 57240
    },
    {
      "epoch": 0.09370724586450907,
      "grad_norm": 2.0147976875305176,
      "learning_rate": 9.87724280621759e-06,
      "loss": 0.0704,
      "step": 57260
    },
    {
      "epoch": 0.09373997630316241,
      "grad_norm": 12.947086334228516,
      "learning_rate": 9.877176914004073e-06,
      "loss": 0.0784,
      "step": 57280
    },
    {
      "epoch": 0.09377270674181576,
      "grad_norm": 3.110478639602661,
      "learning_rate": 9.877111021790557e-06,
      "loss": 0.0857,
      "step": 57300
    },
    {
      "epoch": 0.09380543718046909,
      "grad_norm": 3.1981091499328613,
      "learning_rate": 9.877045129577039e-06,
      "loss": 0.0707,
      "step": 57320
    },
    {
      "epoch": 0.09383816761912243,
      "grad_norm": 2.723785638809204,
      "learning_rate": 9.876979237363522e-06,
      "loss": 0.0974,
      "step": 57340
    },
    {
      "epoch": 0.09387089805777576,
      "grad_norm": 2.5694522857666016,
      "learning_rate": 9.876913345150004e-06,
      "loss": 0.1087,
      "step": 57360
    },
    {
      "epoch": 0.09390362849642911,
      "grad_norm": 2.898763656616211,
      "learning_rate": 9.876847452936488e-06,
      "loss": 0.0957,
      "step": 57380
    },
    {
      "epoch": 0.09393635893508245,
      "grad_norm": 4.507735729217529,
      "learning_rate": 9.87678156072297e-06,
      "loss": 0.0862,
      "step": 57400
    },
    {
      "epoch": 0.09396908937373578,
      "grad_norm": 1.275635838508606,
      "learning_rate": 9.876715668509453e-06,
      "loss": 0.0735,
      "step": 57420
    },
    {
      "epoch": 0.09400181981238913,
      "grad_norm": 6.414534568786621,
      "learning_rate": 9.876649776295935e-06,
      "loss": 0.1042,
      "step": 57440
    },
    {
      "epoch": 0.09403455025104246,
      "grad_norm": 6.207351207733154,
      "learning_rate": 9.876583884082419e-06,
      "loss": 0.1078,
      "step": 57460
    },
    {
      "epoch": 0.0940672806896958,
      "grad_norm": 4.73927640914917,
      "learning_rate": 9.8765179918689e-06,
      "loss": 0.0734,
      "step": 57480
    },
    {
      "epoch": 0.09410001112834915,
      "grad_norm": 2.180302143096924,
      "learning_rate": 9.876452099655384e-06,
      "loss": 0.0949,
      "step": 57500
    },
    {
      "epoch": 0.09413274156700248,
      "grad_norm": 5.227784156799316,
      "learning_rate": 9.876386207441866e-06,
      "loss": 0.102,
      "step": 57520
    },
    {
      "epoch": 0.09416547200565582,
      "grad_norm": 3.2957675457000732,
      "learning_rate": 9.87632031522835e-06,
      "loss": 0.0823,
      "step": 57540
    },
    {
      "epoch": 0.09419820244430915,
      "grad_norm": 5.764142990112305,
      "learning_rate": 9.876254423014833e-06,
      "loss": 0.0797,
      "step": 57560
    },
    {
      "epoch": 0.0942309328829625,
      "grad_norm": 3.5336532592773438,
      "learning_rate": 9.876188530801315e-06,
      "loss": 0.0791,
      "step": 57580
    },
    {
      "epoch": 0.09426366332161584,
      "grad_norm": 4.262814521789551,
      "learning_rate": 9.876122638587799e-06,
      "loss": 0.0747,
      "step": 57600
    },
    {
      "epoch": 0.09429639376026917,
      "grad_norm": 4.109720230102539,
      "learning_rate": 9.876056746374282e-06,
      "loss": 0.0986,
      "step": 57620
    },
    {
      "epoch": 0.09432912419892252,
      "grad_norm": 11.559442520141602,
      "learning_rate": 9.875990854160764e-06,
      "loss": 0.1027,
      "step": 57640
    },
    {
      "epoch": 0.09436185463757585,
      "grad_norm": 0.7833722829818726,
      "learning_rate": 9.875924961947248e-06,
      "loss": 0.0759,
      "step": 57660
    },
    {
      "epoch": 0.09439458507622919,
      "grad_norm": 13.642595291137695,
      "learning_rate": 9.875859069733732e-06,
      "loss": 0.0914,
      "step": 57680
    },
    {
      "epoch": 0.09442731551488254,
      "grad_norm": 1.8013497591018677,
      "learning_rate": 9.875793177520213e-06,
      "loss": 0.085,
      "step": 57700
    },
    {
      "epoch": 0.09446004595353587,
      "grad_norm": 2.3143694400787354,
      "learning_rate": 9.875727285306697e-06,
      "loss": 0.0771,
      "step": 57720
    },
    {
      "epoch": 0.09449277639218921,
      "grad_norm": 3.968130350112915,
      "learning_rate": 9.875661393093179e-06,
      "loss": 0.0874,
      "step": 57740
    },
    {
      "epoch": 0.09452550683084254,
      "grad_norm": 2.9280686378479004,
      "learning_rate": 9.875595500879662e-06,
      "loss": 0.0887,
      "step": 57760
    },
    {
      "epoch": 0.09455823726949589,
      "grad_norm": 1.7653344869613647,
      "learning_rate": 9.875529608666144e-06,
      "loss": 0.0772,
      "step": 57780
    },
    {
      "epoch": 0.09459096770814922,
      "grad_norm": 11.918485641479492,
      "learning_rate": 9.875463716452628e-06,
      "loss": 0.086,
      "step": 57800
    },
    {
      "epoch": 0.09462369814680256,
      "grad_norm": 5.511264324188232,
      "learning_rate": 9.87539782423911e-06,
      "loss": 0.0894,
      "step": 57820
    },
    {
      "epoch": 0.0946564285854559,
      "grad_norm": 2.616316318511963,
      "learning_rate": 9.875331932025593e-06,
      "loss": 0.0817,
      "step": 57840
    },
    {
      "epoch": 0.09468915902410924,
      "grad_norm": 3.104931354522705,
      "learning_rate": 9.875266039812075e-06,
      "loss": 0.0971,
      "step": 57860
    },
    {
      "epoch": 0.09472188946276258,
      "grad_norm": 2.681675672531128,
      "learning_rate": 9.875200147598559e-06,
      "loss": 0.1083,
      "step": 57880
    },
    {
      "epoch": 0.09475461990141591,
      "grad_norm": 2.2288293838500977,
      "learning_rate": 9.87513425538504e-06,
      "loss": 0.1005,
      "step": 57900
    },
    {
      "epoch": 0.09478735034006926,
      "grad_norm": 7.194356918334961,
      "learning_rate": 9.875068363171524e-06,
      "loss": 0.0766,
      "step": 57920
    },
    {
      "epoch": 0.0948200807787226,
      "grad_norm": 4.499239921569824,
      "learning_rate": 9.875002470958008e-06,
      "loss": 0.0869,
      "step": 57940
    },
    {
      "epoch": 0.09485281121737593,
      "grad_norm": 3.75744891166687,
      "learning_rate": 9.87493657874449e-06,
      "loss": 0.0868,
      "step": 57960
    },
    {
      "epoch": 0.09488554165602928,
      "grad_norm": 5.359435558319092,
      "learning_rate": 9.874870686530973e-06,
      "loss": 0.1003,
      "step": 57980
    },
    {
      "epoch": 0.09491827209468261,
      "grad_norm": 1.453687310218811,
      "learning_rate": 9.874804794317457e-06,
      "loss": 0.0923,
      "step": 58000
    },
    {
      "epoch": 0.09495100253333595,
      "grad_norm": 7.247391223907471,
      "learning_rate": 9.874738902103939e-06,
      "loss": 0.0781,
      "step": 58020
    },
    {
      "epoch": 0.0949837329719893,
      "grad_norm": 5.457358360290527,
      "learning_rate": 9.874673009890423e-06,
      "loss": 0.083,
      "step": 58040
    },
    {
      "epoch": 0.09501646341064263,
      "grad_norm": 3.8619117736816406,
      "learning_rate": 9.874607117676906e-06,
      "loss": 0.1017,
      "step": 58060
    },
    {
      "epoch": 0.09504919384929597,
      "grad_norm": 6.972201824188232,
      "learning_rate": 9.874541225463388e-06,
      "loss": 0.0795,
      "step": 58080
    },
    {
      "epoch": 0.0950819242879493,
      "grad_norm": 2.9968180656433105,
      "learning_rate": 9.874475333249872e-06,
      "loss": 0.0773,
      "step": 58100
    },
    {
      "epoch": 0.09511465472660265,
      "grad_norm": 10.8342866897583,
      "learning_rate": 9.874409441036353e-06,
      "loss": 0.0887,
      "step": 58120
    },
    {
      "epoch": 0.09514738516525599,
      "grad_norm": 1.55564546585083,
      "learning_rate": 9.874343548822837e-06,
      "loss": 0.0722,
      "step": 58140
    },
    {
      "epoch": 0.09518011560390932,
      "grad_norm": 1.0200493335723877,
      "learning_rate": 9.874277656609319e-06,
      "loss": 0.075,
      "step": 58160
    },
    {
      "epoch": 0.09521284604256267,
      "grad_norm": 3.2063164710998535,
      "learning_rate": 9.874211764395803e-06,
      "loss": 0.0806,
      "step": 58180
    },
    {
      "epoch": 0.095245576481216,
      "grad_norm": 2.4430503845214844,
      "learning_rate": 9.874145872182284e-06,
      "loss": 0.0786,
      "step": 58200
    },
    {
      "epoch": 0.09527830691986934,
      "grad_norm": 2.214521646499634,
      "learning_rate": 9.874079979968768e-06,
      "loss": 0.0809,
      "step": 58220
    },
    {
      "epoch": 0.09531103735852268,
      "grad_norm": 9.468746185302734,
      "learning_rate": 9.87401408775525e-06,
      "loss": 0.1098,
      "step": 58240
    },
    {
      "epoch": 0.09534376779717602,
      "grad_norm": 2.2747602462768555,
      "learning_rate": 9.873948195541734e-06,
      "loss": 0.0771,
      "step": 58260
    },
    {
      "epoch": 0.09537649823582936,
      "grad_norm": 9.616325378417969,
      "learning_rate": 9.873882303328215e-06,
      "loss": 0.0813,
      "step": 58280
    },
    {
      "epoch": 0.09540922867448269,
      "grad_norm": 3.2818422317504883,
      "learning_rate": 9.873816411114699e-06,
      "loss": 0.084,
      "step": 58300
    },
    {
      "epoch": 0.09544195911313604,
      "grad_norm": 2.528547525405884,
      "learning_rate": 9.873750518901183e-06,
      "loss": 0.091,
      "step": 58320
    },
    {
      "epoch": 0.09547468955178938,
      "grad_norm": 4.063212871551514,
      "learning_rate": 9.873684626687664e-06,
      "loss": 0.1015,
      "step": 58340
    },
    {
      "epoch": 0.09550741999044271,
      "grad_norm": 1.8938508033752441,
      "learning_rate": 9.873618734474148e-06,
      "loss": 0.0995,
      "step": 58360
    },
    {
      "epoch": 0.09554015042909605,
      "grad_norm": 9.195660591125488,
      "learning_rate": 9.873552842260632e-06,
      "loss": 0.1046,
      "step": 58380
    },
    {
      "epoch": 0.09557288086774939,
      "grad_norm": 4.419467449188232,
      "learning_rate": 9.873486950047114e-06,
      "loss": 0.0854,
      "step": 58400
    },
    {
      "epoch": 0.09560561130640273,
      "grad_norm": 2.337029218673706,
      "learning_rate": 9.873421057833597e-06,
      "loss": 0.0961,
      "step": 58420
    },
    {
      "epoch": 0.09563834174505607,
      "grad_norm": 5.364217758178711,
      "learning_rate": 9.87335516562008e-06,
      "loss": 0.0967,
      "step": 58440
    },
    {
      "epoch": 0.0956710721837094,
      "grad_norm": 2.685333728790283,
      "learning_rate": 9.873289273406563e-06,
      "loss": 0.0943,
      "step": 58460
    },
    {
      "epoch": 0.09570380262236275,
      "grad_norm": 3.771651029586792,
      "learning_rate": 9.873223381193046e-06,
      "loss": 0.0942,
      "step": 58480
    },
    {
      "epoch": 0.09573653306101608,
      "grad_norm": 3.6105539798736572,
      "learning_rate": 9.873157488979528e-06,
      "loss": 0.106,
      "step": 58500
    },
    {
      "epoch": 0.09576926349966942,
      "grad_norm": 4.39670991897583,
      "learning_rate": 9.873091596766012e-06,
      "loss": 0.0883,
      "step": 58520
    },
    {
      "epoch": 0.09580199393832275,
      "grad_norm": 7.576918601989746,
      "learning_rate": 9.873025704552494e-06,
      "loss": 0.0843,
      "step": 58540
    },
    {
      "epoch": 0.0958347243769761,
      "grad_norm": 3.677105665206909,
      "learning_rate": 9.872959812338977e-06,
      "loss": 0.0834,
      "step": 58560
    },
    {
      "epoch": 0.09586745481562944,
      "grad_norm": 1.4129773378372192,
      "learning_rate": 9.872893920125459e-06,
      "loss": 0.0918,
      "step": 58580
    },
    {
      "epoch": 0.09590018525428277,
      "grad_norm": 18.606327056884766,
      "learning_rate": 9.872828027911943e-06,
      "loss": 0.0817,
      "step": 58600
    },
    {
      "epoch": 0.09593291569293612,
      "grad_norm": 1.5486003160476685,
      "learning_rate": 9.872762135698425e-06,
      "loss": 0.0792,
      "step": 58620
    },
    {
      "epoch": 0.09596564613158945,
      "grad_norm": 6.035514831542969,
      "learning_rate": 9.872696243484908e-06,
      "loss": 0.0965,
      "step": 58640
    },
    {
      "epoch": 0.0959983765702428,
      "grad_norm": 5.404582500457764,
      "learning_rate": 9.87263035127139e-06,
      "loss": 0.0901,
      "step": 58660
    },
    {
      "epoch": 0.09603110700889614,
      "grad_norm": 2.875938653945923,
      "learning_rate": 9.872564459057874e-06,
      "loss": 0.0804,
      "step": 58680
    },
    {
      "epoch": 0.09606383744754947,
      "grad_norm": 4.243218898773193,
      "learning_rate": 9.872498566844355e-06,
      "loss": 0.0743,
      "step": 58700
    },
    {
      "epoch": 0.09609656788620281,
      "grad_norm": 3.8565449714660645,
      "learning_rate": 9.872432674630839e-06,
      "loss": 0.0993,
      "step": 58720
    },
    {
      "epoch": 0.09612929832485614,
      "grad_norm": 2.7969255447387695,
      "learning_rate": 9.872366782417323e-06,
      "loss": 0.0883,
      "step": 58740
    },
    {
      "epoch": 0.09616202876350949,
      "grad_norm": 5.210634231567383,
      "learning_rate": 9.872300890203805e-06,
      "loss": 0.0894,
      "step": 58760
    },
    {
      "epoch": 0.09619475920216283,
      "grad_norm": 5.595044136047363,
      "learning_rate": 9.872234997990288e-06,
      "loss": 0.0777,
      "step": 58780
    },
    {
      "epoch": 0.09622748964081616,
      "grad_norm": 2.1718032360076904,
      "learning_rate": 9.872169105776772e-06,
      "loss": 0.0971,
      "step": 58800
    },
    {
      "epoch": 0.09626022007946951,
      "grad_norm": 5.703341484069824,
      "learning_rate": 9.872103213563254e-06,
      "loss": 0.0859,
      "step": 58820
    },
    {
      "epoch": 0.09629295051812284,
      "grad_norm": 5.467666149139404,
      "learning_rate": 9.872037321349737e-06,
      "loss": 0.0903,
      "step": 58840
    },
    {
      "epoch": 0.09632568095677618,
      "grad_norm": 3.3931894302368164,
      "learning_rate": 9.87197142913622e-06,
      "loss": 0.0979,
      "step": 58860
    },
    {
      "epoch": 0.09635841139542953,
      "grad_norm": 5.751612663269043,
      "learning_rate": 9.871905536922703e-06,
      "loss": 0.0997,
      "step": 58880
    },
    {
      "epoch": 0.09639114183408286,
      "grad_norm": 4.129360198974609,
      "learning_rate": 9.871839644709186e-06,
      "loss": 0.0945,
      "step": 58900
    },
    {
      "epoch": 0.0964238722727362,
      "grad_norm": 4.122641086578369,
      "learning_rate": 9.871773752495668e-06,
      "loss": 0.1097,
      "step": 58920
    },
    {
      "epoch": 0.09645660271138953,
      "grad_norm": 2.9131362438201904,
      "learning_rate": 9.871707860282152e-06,
      "loss": 0.0802,
      "step": 58940
    },
    {
      "epoch": 0.09648933315004288,
      "grad_norm": 4.527230739593506,
      "learning_rate": 9.871641968068634e-06,
      "loss": 0.087,
      "step": 58960
    },
    {
      "epoch": 0.09652206358869622,
      "grad_norm": 2.406935453414917,
      "learning_rate": 9.871576075855117e-06,
      "loss": 0.0692,
      "step": 58980
    },
    {
      "epoch": 0.09655479402734955,
      "grad_norm": 3.0299673080444336,
      "learning_rate": 9.871510183641599e-06,
      "loss": 0.0952,
      "step": 59000
    },
    {
      "epoch": 0.0965875244660029,
      "grad_norm": 3.851451873779297,
      "learning_rate": 9.871444291428083e-06,
      "loss": 0.0713,
      "step": 59020
    },
    {
      "epoch": 0.09662025490465623,
      "grad_norm": 12.316381454467773,
      "learning_rate": 9.871378399214565e-06,
      "loss": 0.0848,
      "step": 59040
    },
    {
      "epoch": 0.09665298534330957,
      "grad_norm": 3.2326292991638184,
      "learning_rate": 9.871312507001048e-06,
      "loss": 0.0883,
      "step": 59060
    },
    {
      "epoch": 0.09668571578196292,
      "grad_norm": 2.707523822784424,
      "learning_rate": 9.871246614787532e-06,
      "loss": 0.1034,
      "step": 59080
    },
    {
      "epoch": 0.09671844622061625,
      "grad_norm": 3.143031358718872,
      "learning_rate": 9.871180722574014e-06,
      "loss": 0.0986,
      "step": 59100
    },
    {
      "epoch": 0.09675117665926959,
      "grad_norm": 3.388827323913574,
      "learning_rate": 9.871114830360497e-06,
      "loss": 0.0843,
      "step": 59120
    },
    {
      "epoch": 0.09678390709792292,
      "grad_norm": 2.389836311340332,
      "learning_rate": 9.871048938146979e-06,
      "loss": 0.0984,
      "step": 59140
    },
    {
      "epoch": 0.09681663753657627,
      "grad_norm": 3.070573568344116,
      "learning_rate": 9.870983045933463e-06,
      "loss": 0.0841,
      "step": 59160
    },
    {
      "epoch": 0.0968493679752296,
      "grad_norm": 1.836690068244934,
      "learning_rate": 9.870917153719946e-06,
      "loss": 0.0823,
      "step": 59180
    },
    {
      "epoch": 0.09688209841388294,
      "grad_norm": 3.2379987239837646,
      "learning_rate": 9.870851261506428e-06,
      "loss": 0.0753,
      "step": 59200
    },
    {
      "epoch": 0.09691482885253629,
      "grad_norm": 2.951327085494995,
      "learning_rate": 9.870785369292912e-06,
      "loss": 0.1013,
      "step": 59220
    },
    {
      "epoch": 0.09694755929118962,
      "grad_norm": 4.220248699188232,
      "learning_rate": 9.870719477079395e-06,
      "loss": 0.0892,
      "step": 59240
    },
    {
      "epoch": 0.09698028972984296,
      "grad_norm": 2.332916259765625,
      "learning_rate": 9.870653584865877e-06,
      "loss": 0.0798,
      "step": 59260
    },
    {
      "epoch": 0.09701302016849629,
      "grad_norm": 7.488060474395752,
      "learning_rate": 9.870587692652361e-06,
      "loss": 0.0934,
      "step": 59280
    },
    {
      "epoch": 0.09704575060714964,
      "grad_norm": 5.634520053863525,
      "learning_rate": 9.870521800438843e-06,
      "loss": 0.0849,
      "step": 59300
    },
    {
      "epoch": 0.09707848104580298,
      "grad_norm": 4.821596622467041,
      "learning_rate": 9.870455908225326e-06,
      "loss": 0.1032,
      "step": 59320
    },
    {
      "epoch": 0.09711121148445631,
      "grad_norm": 1.357086181640625,
      "learning_rate": 9.870390016011808e-06,
      "loss": 0.0928,
      "step": 59340
    },
    {
      "epoch": 0.09714394192310966,
      "grad_norm": 8.169440269470215,
      "learning_rate": 9.870324123798292e-06,
      "loss": 0.0879,
      "step": 59360
    },
    {
      "epoch": 0.09717667236176299,
      "grad_norm": 4.274595260620117,
      "learning_rate": 9.870258231584774e-06,
      "loss": 0.0876,
      "step": 59380
    },
    {
      "epoch": 0.09720940280041633,
      "grad_norm": 3.606180429458618,
      "learning_rate": 9.870192339371257e-06,
      "loss": 0.0879,
      "step": 59400
    },
    {
      "epoch": 0.09724213323906968,
      "grad_norm": 1.856575846672058,
      "learning_rate": 9.870126447157741e-06,
      "loss": 0.0913,
      "step": 59420
    },
    {
      "epoch": 0.097274863677723,
      "grad_norm": 8.964384078979492,
      "learning_rate": 9.870060554944223e-06,
      "loss": 0.0937,
      "step": 59440
    },
    {
      "epoch": 0.09730759411637635,
      "grad_norm": 3.4269235134124756,
      "learning_rate": 9.869994662730706e-06,
      "loss": 0.1166,
      "step": 59460
    },
    {
      "epoch": 0.09734032455502968,
      "grad_norm": 1.5916919708251953,
      "learning_rate": 9.869928770517188e-06,
      "loss": 0.0726,
      "step": 59480
    },
    {
      "epoch": 0.09737305499368303,
      "grad_norm": 9.03477668762207,
      "learning_rate": 9.869862878303672e-06,
      "loss": 0.0996,
      "step": 59500
    },
    {
      "epoch": 0.09740578543233637,
      "grad_norm": 3.238842725753784,
      "learning_rate": 9.869796986090154e-06,
      "loss": 0.0873,
      "step": 59520
    },
    {
      "epoch": 0.0974385158709897,
      "grad_norm": 4.0985636711120605,
      "learning_rate": 9.869731093876637e-06,
      "loss": 0.0653,
      "step": 59540
    },
    {
      "epoch": 0.09747124630964304,
      "grad_norm": 4.318750381469727,
      "learning_rate": 9.86966520166312e-06,
      "loss": 0.0809,
      "step": 59560
    },
    {
      "epoch": 0.09750397674829638,
      "grad_norm": 3.9541940689086914,
      "learning_rate": 9.869599309449603e-06,
      "loss": 0.0914,
      "step": 59580
    },
    {
      "epoch": 0.09753670718694972,
      "grad_norm": 1.1891504526138306,
      "learning_rate": 9.869533417236086e-06,
      "loss": 0.0884,
      "step": 59600
    },
    {
      "epoch": 0.09756943762560306,
      "grad_norm": 3.8658857345581055,
      "learning_rate": 9.869467525022568e-06,
      "loss": 0.078,
      "step": 59620
    },
    {
      "epoch": 0.0976021680642564,
      "grad_norm": 5.601024150848389,
      "learning_rate": 9.869401632809052e-06,
      "loss": 0.0835,
      "step": 59640
    },
    {
      "epoch": 0.09763489850290974,
      "grad_norm": 3.2295761108398438,
      "learning_rate": 9.869335740595535e-06,
      "loss": 0.0924,
      "step": 59660
    },
    {
      "epoch": 0.09766762894156307,
      "grad_norm": 4.310055732727051,
      "learning_rate": 9.869269848382017e-06,
      "loss": 0.0888,
      "step": 59680
    },
    {
      "epoch": 0.09770035938021641,
      "grad_norm": 3.877657175064087,
      "learning_rate": 9.869203956168501e-06,
      "loss": 0.094,
      "step": 59700
    },
    {
      "epoch": 0.09773308981886976,
      "grad_norm": 6.504143714904785,
      "learning_rate": 9.869138063954983e-06,
      "loss": 0.0813,
      "step": 59720
    },
    {
      "epoch": 0.09776582025752309,
      "grad_norm": 3.017930030822754,
      "learning_rate": 9.869072171741466e-06,
      "loss": 0.0777,
      "step": 59740
    },
    {
      "epoch": 0.09779855069617643,
      "grad_norm": 2.6879827976226807,
      "learning_rate": 9.86900627952795e-06,
      "loss": 0.07,
      "step": 59760
    },
    {
      "epoch": 0.09783128113482976,
      "grad_norm": 4.308539867401123,
      "learning_rate": 9.868940387314432e-06,
      "loss": 0.0989,
      "step": 59780
    },
    {
      "epoch": 0.09786401157348311,
      "grad_norm": 2.5749170780181885,
      "learning_rate": 9.868874495100915e-06,
      "loss": 0.0856,
      "step": 59800
    },
    {
      "epoch": 0.09789674201213645,
      "grad_norm": 1.755948543548584,
      "learning_rate": 9.868808602887397e-06,
      "loss": 0.0761,
      "step": 59820
    },
    {
      "epoch": 0.09792947245078978,
      "grad_norm": 6.125967025756836,
      "learning_rate": 9.868742710673881e-06,
      "loss": 0.0877,
      "step": 59840
    },
    {
      "epoch": 0.09796220288944313,
      "grad_norm": 4.12894868850708,
      "learning_rate": 9.868676818460363e-06,
      "loss": 0.1284,
      "step": 59860
    },
    {
      "epoch": 0.09799493332809646,
      "grad_norm": 2.3581652641296387,
      "learning_rate": 9.868610926246846e-06,
      "loss": 0.0785,
      "step": 59880
    },
    {
      "epoch": 0.0980276637667498,
      "grad_norm": 3.6083476543426514,
      "learning_rate": 9.868545034033328e-06,
      "loss": 0.0666,
      "step": 59900
    },
    {
      "epoch": 0.09806039420540313,
      "grad_norm": 2.479447364807129,
      "learning_rate": 9.868479141819812e-06,
      "loss": 0.0741,
      "step": 59920
    },
    {
      "epoch": 0.09809312464405648,
      "grad_norm": 4.368551254272461,
      "learning_rate": 9.868413249606294e-06,
      "loss": 0.0983,
      "step": 59940
    },
    {
      "epoch": 0.09812585508270982,
      "grad_norm": 0.5971171259880066,
      "learning_rate": 9.868347357392777e-06,
      "loss": 0.0779,
      "step": 59960
    },
    {
      "epoch": 0.09815858552136315,
      "grad_norm": 8.562223434448242,
      "learning_rate": 9.868281465179261e-06,
      "loss": 0.0773,
      "step": 59980
    },
    {
      "epoch": 0.0981913159600165,
      "grad_norm": 1.8265849351882935,
      "learning_rate": 9.868215572965743e-06,
      "loss": 0.0822,
      "step": 60000
    },
    {
      "epoch": 0.09822404639866983,
      "grad_norm": 4.395759582519531,
      "learning_rate": 9.868149680752226e-06,
      "loss": 0.1043,
      "step": 60020
    },
    {
      "epoch": 0.09825677683732317,
      "grad_norm": 11.241462707519531,
      "learning_rate": 9.86808378853871e-06,
      "loss": 0.0795,
      "step": 60040
    },
    {
      "epoch": 0.09828950727597652,
      "grad_norm": 3.3997397422790527,
      "learning_rate": 9.868017896325192e-06,
      "loss": 0.1002,
      "step": 60060
    },
    {
      "epoch": 0.09832223771462985,
      "grad_norm": 13.31643009185791,
      "learning_rate": 9.867952004111676e-06,
      "loss": 0.1049,
      "step": 60080
    },
    {
      "epoch": 0.09835496815328319,
      "grad_norm": 4.214633464813232,
      "learning_rate": 9.867886111898157e-06,
      "loss": 0.077,
      "step": 60100
    },
    {
      "epoch": 0.09838769859193652,
      "grad_norm": 13.665872573852539,
      "learning_rate": 9.867820219684641e-06,
      "loss": 0.0763,
      "step": 60120
    },
    {
      "epoch": 0.09842042903058987,
      "grad_norm": 2.056861639022827,
      "learning_rate": 9.867754327471125e-06,
      "loss": 0.0694,
      "step": 60140
    },
    {
      "epoch": 0.09845315946924321,
      "grad_norm": 1.5580233335494995,
      "learning_rate": 9.867688435257607e-06,
      "loss": 0.0774,
      "step": 60160
    },
    {
      "epoch": 0.09848588990789654,
      "grad_norm": 6.6901397705078125,
      "learning_rate": 9.86762254304409e-06,
      "loss": 0.0979,
      "step": 60180
    },
    {
      "epoch": 0.09851862034654989,
      "grad_norm": 1.9873154163360596,
      "learning_rate": 9.867556650830572e-06,
      "loss": 0.0849,
      "step": 60200
    },
    {
      "epoch": 0.09855135078520322,
      "grad_norm": 5.147111415863037,
      "learning_rate": 9.867490758617056e-06,
      "loss": 0.0781,
      "step": 60220
    },
    {
      "epoch": 0.09858408122385656,
      "grad_norm": 3.6672589778900146,
      "learning_rate": 9.867424866403537e-06,
      "loss": 0.0789,
      "step": 60240
    },
    {
      "epoch": 0.0986168116625099,
      "grad_norm": 4.794676303863525,
      "learning_rate": 9.867358974190021e-06,
      "loss": 0.0845,
      "step": 60260
    },
    {
      "epoch": 0.09864954210116324,
      "grad_norm": 7.736025810241699,
      "learning_rate": 9.867293081976503e-06,
      "loss": 0.0666,
      "step": 60280
    },
    {
      "epoch": 0.09868227253981658,
      "grad_norm": 1.8395036458969116,
      "learning_rate": 9.867227189762987e-06,
      "loss": 0.0849,
      "step": 60300
    },
    {
      "epoch": 0.09871500297846991,
      "grad_norm": 1.8842978477478027,
      "learning_rate": 9.867161297549468e-06,
      "loss": 0.0911,
      "step": 60320
    },
    {
      "epoch": 0.09874773341712326,
      "grad_norm": 4.020163059234619,
      "learning_rate": 9.867095405335952e-06,
      "loss": 0.0938,
      "step": 60340
    },
    {
      "epoch": 0.0987804638557766,
      "grad_norm": 4.115949630737305,
      "learning_rate": 9.867029513122434e-06,
      "loss": 0.0791,
      "step": 60360
    },
    {
      "epoch": 0.09881319429442993,
      "grad_norm": 2.5709261894226074,
      "learning_rate": 9.866963620908917e-06,
      "loss": 0.0882,
      "step": 60380
    },
    {
      "epoch": 0.09884592473308328,
      "grad_norm": 5.720667839050293,
      "learning_rate": 9.866897728695401e-06,
      "loss": 0.0694,
      "step": 60400
    },
    {
      "epoch": 0.0988786551717366,
      "grad_norm": 18.399492263793945,
      "learning_rate": 9.866831836481883e-06,
      "loss": 0.0877,
      "step": 60420
    },
    {
      "epoch": 0.09891138561038995,
      "grad_norm": 3.1902570724487305,
      "learning_rate": 9.866765944268367e-06,
      "loss": 0.0757,
      "step": 60440
    },
    {
      "epoch": 0.0989441160490433,
      "grad_norm": 2.0956547260284424,
      "learning_rate": 9.86670005205485e-06,
      "loss": 0.0915,
      "step": 60460
    },
    {
      "epoch": 0.09897684648769663,
      "grad_norm": 1.8100497722625732,
      "learning_rate": 9.866634159841334e-06,
      "loss": 0.0733,
      "step": 60480
    },
    {
      "epoch": 0.09900957692634997,
      "grad_norm": 3.7002642154693604,
      "learning_rate": 9.866568267627816e-06,
      "loss": 0.0933,
      "step": 60500
    },
    {
      "epoch": 0.0990423073650033,
      "grad_norm": 2.1367697715759277,
      "learning_rate": 9.8665023754143e-06,
      "loss": 0.08,
      "step": 60520
    },
    {
      "epoch": 0.09907503780365665,
      "grad_norm": 5.109748840332031,
      "learning_rate": 9.866436483200781e-06,
      "loss": 0.0775,
      "step": 60540
    },
    {
      "epoch": 0.09910776824230998,
      "grad_norm": 3.640446186065674,
      "learning_rate": 9.866370590987265e-06,
      "loss": 0.0761,
      "step": 60560
    },
    {
      "epoch": 0.09914049868096332,
      "grad_norm": 1.2221769094467163,
      "learning_rate": 9.866304698773747e-06,
      "loss": 0.0848,
      "step": 60580
    },
    {
      "epoch": 0.09917322911961667,
      "grad_norm": 1.3082664012908936,
      "learning_rate": 9.86623880656023e-06,
      "loss": 0.0828,
      "step": 60600
    },
    {
      "epoch": 0.09920595955827,
      "grad_norm": 2.176442861557007,
      "learning_rate": 9.866172914346712e-06,
      "loss": 0.0823,
      "step": 60620
    },
    {
      "epoch": 0.09923868999692334,
      "grad_norm": 1.4062626361846924,
      "learning_rate": 9.866107022133196e-06,
      "loss": 0.0798,
      "step": 60640
    },
    {
      "epoch": 0.09927142043557667,
      "grad_norm": 1.9849952459335327,
      "learning_rate": 9.866041129919678e-06,
      "loss": 0.0855,
      "step": 60660
    },
    {
      "epoch": 0.09930415087423002,
      "grad_norm": 7.85307502746582,
      "learning_rate": 9.865975237706161e-06,
      "loss": 0.0887,
      "step": 60680
    },
    {
      "epoch": 0.09933688131288336,
      "grad_norm": 3.147400140762329,
      "learning_rate": 9.865909345492643e-06,
      "loss": 0.0837,
      "step": 60700
    },
    {
      "epoch": 0.09936961175153669,
      "grad_norm": 2.8327906131744385,
      "learning_rate": 9.865843453279127e-06,
      "loss": 0.0785,
      "step": 60720
    },
    {
      "epoch": 0.09940234219019004,
      "grad_norm": 4.154290676116943,
      "learning_rate": 9.865777561065608e-06,
      "loss": 0.0674,
      "step": 60740
    },
    {
      "epoch": 0.09943507262884337,
      "grad_norm": 7.989344596862793,
      "learning_rate": 9.865711668852092e-06,
      "loss": 0.0821,
      "step": 60760
    },
    {
      "epoch": 0.09946780306749671,
      "grad_norm": 3.5239949226379395,
      "learning_rate": 9.865645776638576e-06,
      "loss": 0.0794,
      "step": 60780
    },
    {
      "epoch": 0.09950053350615005,
      "grad_norm": 3.65185284614563,
      "learning_rate": 9.865579884425058e-06,
      "loss": 0.0908,
      "step": 60800
    },
    {
      "epoch": 0.09953326394480339,
      "grad_norm": 6.505690097808838,
      "learning_rate": 9.865513992211541e-06,
      "loss": 0.0764,
      "step": 60820
    },
    {
      "epoch": 0.09956599438345673,
      "grad_norm": 2.9506590366363525,
      "learning_rate": 9.865448099998025e-06,
      "loss": 0.0838,
      "step": 60840
    },
    {
      "epoch": 0.09959872482211006,
      "grad_norm": 3.2015626430511475,
      "learning_rate": 9.865382207784507e-06,
      "loss": 0.0809,
      "step": 60860
    },
    {
      "epoch": 0.0996314552607634,
      "grad_norm": 2.9495818614959717,
      "learning_rate": 9.86531631557099e-06,
      "loss": 0.0947,
      "step": 60880
    },
    {
      "epoch": 0.09966418569941675,
      "grad_norm": 2.924175262451172,
      "learning_rate": 9.865250423357474e-06,
      "loss": 0.0916,
      "step": 60900
    },
    {
      "epoch": 0.09969691613807008,
      "grad_norm": 1.8180121183395386,
      "learning_rate": 9.865184531143956e-06,
      "loss": 0.0872,
      "step": 60920
    },
    {
      "epoch": 0.09972964657672342,
      "grad_norm": 3.6070427894592285,
      "learning_rate": 9.86511863893044e-06,
      "loss": 0.0862,
      "step": 60940
    },
    {
      "epoch": 0.09976237701537675,
      "grad_norm": 5.00179386138916,
      "learning_rate": 9.865052746716921e-06,
      "loss": 0.0782,
      "step": 60960
    },
    {
      "epoch": 0.0997951074540301,
      "grad_norm": 2.879455327987671,
      "learning_rate": 9.864986854503405e-06,
      "loss": 0.092,
      "step": 60980
    },
    {
      "epoch": 0.09982783789268344,
      "grad_norm": 2.940955877304077,
      "learning_rate": 9.864920962289887e-06,
      "loss": 0.0981,
      "step": 61000
    },
    {
      "epoch": 0.09986056833133677,
      "grad_norm": 3.085035562515259,
      "learning_rate": 9.86485507007637e-06,
      "loss": 0.0785,
      "step": 61020
    },
    {
      "epoch": 0.09989329876999012,
      "grad_norm": 4.194743633270264,
      "learning_rate": 9.864789177862852e-06,
      "loss": 0.0857,
      "step": 61040
    },
    {
      "epoch": 0.09992602920864345,
      "grad_norm": 2.377591848373413,
      "learning_rate": 9.864723285649336e-06,
      "loss": 0.077,
      "step": 61060
    },
    {
      "epoch": 0.0999587596472968,
      "grad_norm": 2.6612448692321777,
      "learning_rate": 9.864657393435818e-06,
      "loss": 0.0944,
      "step": 61080
    },
    {
      "epoch": 0.09999149008595014,
      "grad_norm": 4.013981342315674,
      "learning_rate": 9.864591501222301e-06,
      "loss": 0.1025,
      "step": 61100
    },
    {
      "epoch": 0.10002422052460347,
      "grad_norm": 2.144000291824341,
      "learning_rate": 9.864525609008783e-06,
      "loss": 0.0782,
      "step": 61120
    },
    {
      "epoch": 0.10005695096325681,
      "grad_norm": 2.606419801712036,
      "learning_rate": 9.864459716795267e-06,
      "loss": 0.0756,
      "step": 61140
    },
    {
      "epoch": 0.10008968140191014,
      "grad_norm": 2.5931947231292725,
      "learning_rate": 9.86439382458175e-06,
      "loss": 0.087,
      "step": 61160
    },
    {
      "epoch": 0.10012241184056349,
      "grad_norm": 3.2232344150543213,
      "learning_rate": 9.864327932368232e-06,
      "loss": 0.0846,
      "step": 61180
    },
    {
      "epoch": 0.10015514227921683,
      "grad_norm": 3.682368516921997,
      "learning_rate": 9.864262040154716e-06,
      "loss": 0.0767,
      "step": 61200
    },
    {
      "epoch": 0.10018787271787016,
      "grad_norm": 5.032368183135986,
      "learning_rate": 9.8641961479412e-06,
      "loss": 0.0857,
      "step": 61220
    },
    {
      "epoch": 0.10022060315652351,
      "grad_norm": 0.7904443740844727,
      "learning_rate": 9.864130255727681e-06,
      "loss": 0.1025,
      "step": 61240
    },
    {
      "epoch": 0.10025333359517684,
      "grad_norm": 3.052126407623291,
      "learning_rate": 9.864064363514165e-06,
      "loss": 0.0884,
      "step": 61260
    },
    {
      "epoch": 0.10028606403383018,
      "grad_norm": 3.387789249420166,
      "learning_rate": 9.863998471300648e-06,
      "loss": 0.0993,
      "step": 61280
    },
    {
      "epoch": 0.10031879447248351,
      "grad_norm": 2.4367432594299316,
      "learning_rate": 9.86393257908713e-06,
      "loss": 0.0991,
      "step": 61300
    },
    {
      "epoch": 0.10035152491113686,
      "grad_norm": 65.48635864257812,
      "learning_rate": 9.863866686873614e-06,
      "loss": 0.0908,
      "step": 61320
    },
    {
      "epoch": 0.1003842553497902,
      "grad_norm": 2.938469171524048,
      "learning_rate": 9.863800794660096e-06,
      "loss": 0.0777,
      "step": 61340
    },
    {
      "epoch": 0.10041698578844353,
      "grad_norm": 5.182320594787598,
      "learning_rate": 9.86373490244658e-06,
      "loss": 0.0943,
      "step": 61360
    },
    {
      "epoch": 0.10044971622709688,
      "grad_norm": 2.3982694149017334,
      "learning_rate": 9.863669010233061e-06,
      "loss": 0.1023,
      "step": 61380
    },
    {
      "epoch": 0.10048244666575021,
      "grad_norm": 2.6318626403808594,
      "learning_rate": 9.863603118019545e-06,
      "loss": 0.0745,
      "step": 61400
    },
    {
      "epoch": 0.10051517710440355,
      "grad_norm": 3.760944128036499,
      "learning_rate": 9.863537225806027e-06,
      "loss": 0.096,
      "step": 61420
    },
    {
      "epoch": 0.1005479075430569,
      "grad_norm": 3.3985390663146973,
      "learning_rate": 9.86347133359251e-06,
      "loss": 0.0723,
      "step": 61440
    },
    {
      "epoch": 0.10058063798171023,
      "grad_norm": 3.556439161300659,
      "learning_rate": 9.863405441378992e-06,
      "loss": 0.0925,
      "step": 61460
    },
    {
      "epoch": 0.10061336842036357,
      "grad_norm": 2.9994049072265625,
      "learning_rate": 9.863339549165476e-06,
      "loss": 0.0826,
      "step": 61480
    },
    {
      "epoch": 0.1006460988590169,
      "grad_norm": 3.3246474266052246,
      "learning_rate": 9.863273656951958e-06,
      "loss": 0.0754,
      "step": 61500
    },
    {
      "epoch": 0.10067882929767025,
      "grad_norm": 1.865745186805725,
      "learning_rate": 9.863207764738441e-06,
      "loss": 0.0706,
      "step": 61520
    },
    {
      "epoch": 0.10071155973632359,
      "grad_norm": 3.574780225753784,
      "learning_rate": 9.863141872524925e-06,
      "loss": 0.0769,
      "step": 61540
    },
    {
      "epoch": 0.10074429017497692,
      "grad_norm": 5.393348217010498,
      "learning_rate": 9.863075980311407e-06,
      "loss": 0.0956,
      "step": 61560
    },
    {
      "epoch": 0.10077702061363027,
      "grad_norm": 3.2281622886657715,
      "learning_rate": 9.86301008809789e-06,
      "loss": 0.0723,
      "step": 61580
    },
    {
      "epoch": 0.1008097510522836,
      "grad_norm": 5.800656795501709,
      "learning_rate": 9.862944195884372e-06,
      "loss": 0.0977,
      "step": 61600
    },
    {
      "epoch": 0.10084248149093694,
      "grad_norm": 8.247782707214355,
      "learning_rate": 9.862878303670856e-06,
      "loss": 0.1042,
      "step": 61620
    },
    {
      "epoch": 0.10087521192959029,
      "grad_norm": 11.98727035522461,
      "learning_rate": 9.86281241145734e-06,
      "loss": 0.0771,
      "step": 61640
    },
    {
      "epoch": 0.10090794236824362,
      "grad_norm": 2.6442883014678955,
      "learning_rate": 9.862746519243821e-06,
      "loss": 0.0945,
      "step": 61660
    },
    {
      "epoch": 0.10094067280689696,
      "grad_norm": 3.0595266819000244,
      "learning_rate": 9.862680627030305e-06,
      "loss": 0.0781,
      "step": 61680
    },
    {
      "epoch": 0.10097340324555029,
      "grad_norm": 7.536149501800537,
      "learning_rate": 9.862614734816788e-06,
      "loss": 0.0873,
      "step": 61700
    },
    {
      "epoch": 0.10100613368420364,
      "grad_norm": 1.8254843950271606,
      "learning_rate": 9.86254884260327e-06,
      "loss": 0.0966,
      "step": 61720
    },
    {
      "epoch": 0.10103886412285698,
      "grad_norm": 3.327019214630127,
      "learning_rate": 9.862482950389754e-06,
      "loss": 0.0654,
      "step": 61740
    },
    {
      "epoch": 0.10107159456151031,
      "grad_norm": 36.15378189086914,
      "learning_rate": 9.862417058176236e-06,
      "loss": 0.0697,
      "step": 61760
    },
    {
      "epoch": 0.10110432500016366,
      "grad_norm": 1.0630989074707031,
      "learning_rate": 9.86235116596272e-06,
      "loss": 0.0691,
      "step": 61780
    },
    {
      "epoch": 0.10113705543881699,
      "grad_norm": 5.159254550933838,
      "learning_rate": 9.862285273749201e-06,
      "loss": 0.0932,
      "step": 61800
    },
    {
      "epoch": 0.10116978587747033,
      "grad_norm": 4.414698123931885,
      "learning_rate": 9.862219381535685e-06,
      "loss": 0.0696,
      "step": 61820
    },
    {
      "epoch": 0.10120251631612368,
      "grad_norm": 2.8002991676330566,
      "learning_rate": 9.862153489322167e-06,
      "loss": 0.0845,
      "step": 61840
    },
    {
      "epoch": 0.101235246754777,
      "grad_norm": 1.8948818445205688,
      "learning_rate": 9.86208759710865e-06,
      "loss": 0.0794,
      "step": 61860
    },
    {
      "epoch": 0.10126797719343035,
      "grad_norm": 0.8375496864318848,
      "learning_rate": 9.862021704895134e-06,
      "loss": 0.0836,
      "step": 61880
    },
    {
      "epoch": 0.10130070763208368,
      "grad_norm": 5.4284749031066895,
      "learning_rate": 9.861955812681616e-06,
      "loss": 0.0988,
      "step": 61900
    },
    {
      "epoch": 0.10133343807073703,
      "grad_norm": 3.957958698272705,
      "learning_rate": 9.8618899204681e-06,
      "loss": 0.0711,
      "step": 61920
    },
    {
      "epoch": 0.10136616850939037,
      "grad_norm": 3.1166458129882812,
      "learning_rate": 9.861824028254581e-06,
      "loss": 0.0725,
      "step": 61940
    },
    {
      "epoch": 0.1013988989480437,
      "grad_norm": 6.436363697052002,
      "learning_rate": 9.861758136041065e-06,
      "loss": 0.0875,
      "step": 61960
    },
    {
      "epoch": 0.10143162938669704,
      "grad_norm": 2.6705615520477295,
      "learning_rate": 9.861692243827547e-06,
      "loss": 0.0823,
      "step": 61980
    },
    {
      "epoch": 0.10146435982535038,
      "grad_norm": 3.599468469619751,
      "learning_rate": 9.86162635161403e-06,
      "loss": 0.0831,
      "step": 62000
    },
    {
      "epoch": 0.10149709026400372,
      "grad_norm": 2.0171515941619873,
      "learning_rate": 9.861560459400514e-06,
      "loss": 0.0953,
      "step": 62020
    },
    {
      "epoch": 0.10152982070265705,
      "grad_norm": 3.7572035789489746,
      "learning_rate": 9.861494567186996e-06,
      "loss": 0.1049,
      "step": 62040
    },
    {
      "epoch": 0.1015625511413104,
      "grad_norm": 2.6047415733337402,
      "learning_rate": 9.86142867497348e-06,
      "loss": 0.1026,
      "step": 62060
    },
    {
      "epoch": 0.10159528157996374,
      "grad_norm": 3.313504219055176,
      "learning_rate": 9.861362782759963e-06,
      "loss": 0.0828,
      "step": 62080
    },
    {
      "epoch": 0.10162801201861707,
      "grad_norm": 2.424280881881714,
      "learning_rate": 9.861296890546445e-06,
      "loss": 0.0785,
      "step": 62100
    },
    {
      "epoch": 0.10166074245727041,
      "grad_norm": 5.418919563293457,
      "learning_rate": 9.861230998332929e-06,
      "loss": 0.107,
      "step": 62120
    },
    {
      "epoch": 0.10169347289592374,
      "grad_norm": 2.8228302001953125,
      "learning_rate": 9.86116510611941e-06,
      "loss": 0.0845,
      "step": 62140
    },
    {
      "epoch": 0.10172620333457709,
      "grad_norm": 4.071574687957764,
      "learning_rate": 9.861099213905894e-06,
      "loss": 0.0931,
      "step": 62160
    },
    {
      "epoch": 0.10175893377323043,
      "grad_norm": 2.927358865737915,
      "learning_rate": 9.861033321692376e-06,
      "loss": 0.0765,
      "step": 62180
    },
    {
      "epoch": 0.10179166421188376,
      "grad_norm": 4.516827583312988,
      "learning_rate": 9.86096742947886e-06,
      "loss": 0.0904,
      "step": 62200
    },
    {
      "epoch": 0.10182439465053711,
      "grad_norm": 3.9419760704040527,
      "learning_rate": 9.860901537265343e-06,
      "loss": 0.0713,
      "step": 62220
    },
    {
      "epoch": 0.10185712508919044,
      "grad_norm": 5.468240737915039,
      "learning_rate": 9.860835645051825e-06,
      "loss": 0.0805,
      "step": 62240
    },
    {
      "epoch": 0.10188985552784378,
      "grad_norm": 2.452993869781494,
      "learning_rate": 9.860769752838309e-06,
      "loss": 0.0921,
      "step": 62260
    },
    {
      "epoch": 0.10192258596649713,
      "grad_norm": 2.9027254581451416,
      "learning_rate": 9.86070386062479e-06,
      "loss": 0.0816,
      "step": 62280
    },
    {
      "epoch": 0.10195531640515046,
      "grad_norm": 6.6148576736450195,
      "learning_rate": 9.860637968411274e-06,
      "loss": 0.0743,
      "step": 62300
    },
    {
      "epoch": 0.1019880468438038,
      "grad_norm": 1.930790662765503,
      "learning_rate": 9.860572076197756e-06,
      "loss": 0.0892,
      "step": 62320
    },
    {
      "epoch": 0.10202077728245713,
      "grad_norm": 1.2548638582229614,
      "learning_rate": 9.86050618398424e-06,
      "loss": 0.0778,
      "step": 62340
    },
    {
      "epoch": 0.10205350772111048,
      "grad_norm": 4.115673542022705,
      "learning_rate": 9.860440291770721e-06,
      "loss": 0.0887,
      "step": 62360
    },
    {
      "epoch": 0.10208623815976382,
      "grad_norm": 3.42170786857605,
      "learning_rate": 9.860374399557205e-06,
      "loss": 0.0865,
      "step": 62380
    },
    {
      "epoch": 0.10211896859841715,
      "grad_norm": 1.1454198360443115,
      "learning_rate": 9.860308507343687e-06,
      "loss": 0.0615,
      "step": 62400
    },
    {
      "epoch": 0.1021516990370705,
      "grad_norm": 12.376587867736816,
      "learning_rate": 9.86024261513017e-06,
      "loss": 0.09,
      "step": 62420
    },
    {
      "epoch": 0.10218442947572383,
      "grad_norm": 5.105517864227295,
      "learning_rate": 9.860176722916654e-06,
      "loss": 0.1078,
      "step": 62440
    },
    {
      "epoch": 0.10221715991437717,
      "grad_norm": 3.1879544258117676,
      "learning_rate": 9.860110830703136e-06,
      "loss": 0.108,
      "step": 62460
    },
    {
      "epoch": 0.10224989035303052,
      "grad_norm": 1.8476606607437134,
      "learning_rate": 9.86004493848962e-06,
      "loss": 0.063,
      "step": 62480
    },
    {
      "epoch": 0.10228262079168385,
      "grad_norm": 6.132774353027344,
      "learning_rate": 9.859979046276103e-06,
      "loss": 0.1004,
      "step": 62500
    },
    {
      "epoch": 0.10231535123033719,
      "grad_norm": 2.8249638080596924,
      "learning_rate": 9.859913154062585e-06,
      "loss": 0.0741,
      "step": 62520
    },
    {
      "epoch": 0.10234808166899052,
      "grad_norm": 5.954491138458252,
      "learning_rate": 9.859847261849069e-06,
      "loss": 0.0722,
      "step": 62540
    },
    {
      "epoch": 0.10238081210764387,
      "grad_norm": 3.848782777786255,
      "learning_rate": 9.85978136963555e-06,
      "loss": 0.0723,
      "step": 62560
    },
    {
      "epoch": 0.10241354254629721,
      "grad_norm": 2.701688528060913,
      "learning_rate": 9.859715477422034e-06,
      "loss": 0.0961,
      "step": 62580
    },
    {
      "epoch": 0.10244627298495054,
      "grad_norm": 6.175139904022217,
      "learning_rate": 9.859649585208518e-06,
      "loss": 0.0848,
      "step": 62600
    },
    {
      "epoch": 0.10247900342360389,
      "grad_norm": 3.275784730911255,
      "learning_rate": 9.859583692995e-06,
      "loss": 0.0746,
      "step": 62620
    },
    {
      "epoch": 0.10251173386225722,
      "grad_norm": 1.7852627038955688,
      "learning_rate": 9.859517800781483e-06,
      "loss": 0.0788,
      "step": 62640
    },
    {
      "epoch": 0.10254446430091056,
      "grad_norm": 8.513428688049316,
      "learning_rate": 9.859451908567965e-06,
      "loss": 0.0701,
      "step": 62660
    },
    {
      "epoch": 0.10257719473956389,
      "grad_norm": 2.988673686981201,
      "learning_rate": 9.859386016354449e-06,
      "loss": 0.0784,
      "step": 62680
    },
    {
      "epoch": 0.10260992517821724,
      "grad_norm": 3.355020523071289,
      "learning_rate": 9.85932012414093e-06,
      "loss": 0.077,
      "step": 62700
    },
    {
      "epoch": 0.10264265561687058,
      "grad_norm": 3.056964874267578,
      "learning_rate": 9.859254231927414e-06,
      "loss": 0.0843,
      "step": 62720
    },
    {
      "epoch": 0.10267538605552391,
      "grad_norm": 0.7579419612884521,
      "learning_rate": 9.859188339713896e-06,
      "loss": 0.0745,
      "step": 62740
    },
    {
      "epoch": 0.10270811649417726,
      "grad_norm": 7.78302001953125,
      "learning_rate": 9.85912244750038e-06,
      "loss": 0.0901,
      "step": 62760
    },
    {
      "epoch": 0.10274084693283059,
      "grad_norm": 3.641606569290161,
      "learning_rate": 9.859056555286862e-06,
      "loss": 0.0787,
      "step": 62780
    },
    {
      "epoch": 0.10277357737148393,
      "grad_norm": 3.776343822479248,
      "learning_rate": 9.858990663073345e-06,
      "loss": 0.0773,
      "step": 62800
    },
    {
      "epoch": 0.10280630781013728,
      "grad_norm": 3.9005231857299805,
      "learning_rate": 9.858924770859829e-06,
      "loss": 0.0875,
      "step": 62820
    },
    {
      "epoch": 0.1028390382487906,
      "grad_norm": 3.560912847518921,
      "learning_rate": 9.85885887864631e-06,
      "loss": 0.0924,
      "step": 62840
    },
    {
      "epoch": 0.10287176868744395,
      "grad_norm": 1.7676526308059692,
      "learning_rate": 9.858792986432794e-06,
      "loss": 0.0915,
      "step": 62860
    },
    {
      "epoch": 0.10290449912609728,
      "grad_norm": 12.585232734680176,
      "learning_rate": 9.858727094219278e-06,
      "loss": 0.0729,
      "step": 62880
    },
    {
      "epoch": 0.10293722956475063,
      "grad_norm": 2.45768141746521,
      "learning_rate": 9.85866120200576e-06,
      "loss": 0.0941,
      "step": 62900
    },
    {
      "epoch": 0.10296996000340397,
      "grad_norm": 4.607007026672363,
      "learning_rate": 9.858595309792243e-06,
      "loss": 0.0812,
      "step": 62920
    },
    {
      "epoch": 0.1030026904420573,
      "grad_norm": 5.649052143096924,
      "learning_rate": 9.858529417578727e-06,
      "loss": 0.1002,
      "step": 62940
    },
    {
      "epoch": 0.10303542088071065,
      "grad_norm": 6.623944282531738,
      "learning_rate": 9.858463525365209e-06,
      "loss": 0.0727,
      "step": 62960
    },
    {
      "epoch": 0.10306815131936398,
      "grad_norm": 7.5495219230651855,
      "learning_rate": 9.858397633151692e-06,
      "loss": 0.0892,
      "step": 62980
    },
    {
      "epoch": 0.10310088175801732,
      "grad_norm": 6.646795272827148,
      "learning_rate": 9.858331740938174e-06,
      "loss": 0.0848,
      "step": 63000
    },
    {
      "epoch": 0.10313361219667067,
      "grad_norm": 8.370187759399414,
      "learning_rate": 9.858265848724658e-06,
      "loss": 0.0805,
      "step": 63020
    },
    {
      "epoch": 0.103166342635324,
      "grad_norm": 5.297788619995117,
      "learning_rate": 9.85819995651114e-06,
      "loss": 0.0728,
      "step": 63040
    },
    {
      "epoch": 0.10319907307397734,
      "grad_norm": 1.3397493362426758,
      "learning_rate": 9.858134064297623e-06,
      "loss": 0.071,
      "step": 63060
    },
    {
      "epoch": 0.10323180351263067,
      "grad_norm": 5.423744201660156,
      "learning_rate": 9.858068172084105e-06,
      "loss": 0.0928,
      "step": 63080
    },
    {
      "epoch": 0.10326453395128402,
      "grad_norm": 5.748130798339844,
      "learning_rate": 9.858002279870589e-06,
      "loss": 0.0873,
      "step": 63100
    },
    {
      "epoch": 0.10329726438993736,
      "grad_norm": 2.9201862812042236,
      "learning_rate": 9.85793638765707e-06,
      "loss": 0.1025,
      "step": 63120
    },
    {
      "epoch": 0.10332999482859069,
      "grad_norm": 2.350173234939575,
      "learning_rate": 9.857870495443554e-06,
      "loss": 0.0852,
      "step": 63140
    },
    {
      "epoch": 0.10336272526724403,
      "grad_norm": 2.539804220199585,
      "learning_rate": 9.857804603230036e-06,
      "loss": 0.0818,
      "step": 63160
    },
    {
      "epoch": 0.10339545570589737,
      "grad_norm": 3.4145638942718506,
      "learning_rate": 9.85773871101652e-06,
      "loss": 0.0728,
      "step": 63180
    },
    {
      "epoch": 0.10342818614455071,
      "grad_norm": 2.234154224395752,
      "learning_rate": 9.857672818803002e-06,
      "loss": 0.0957,
      "step": 63200
    },
    {
      "epoch": 0.10346091658320405,
      "grad_norm": 2.246901273727417,
      "learning_rate": 9.857606926589485e-06,
      "loss": 0.0928,
      "step": 63220
    },
    {
      "epoch": 0.10349364702185739,
      "grad_norm": 2.210350751876831,
      "learning_rate": 9.857541034375969e-06,
      "loss": 0.0901,
      "step": 63240
    },
    {
      "epoch": 0.10352637746051073,
      "grad_norm": 1.9427146911621094,
      "learning_rate": 9.857475142162452e-06,
      "loss": 0.0758,
      "step": 63260
    },
    {
      "epoch": 0.10355910789916406,
      "grad_norm": 6.613433837890625,
      "learning_rate": 9.857409249948934e-06,
      "loss": 0.0991,
      "step": 63280
    },
    {
      "epoch": 0.1035918383378174,
      "grad_norm": 1.5910084247589111,
      "learning_rate": 9.857343357735418e-06,
      "loss": 0.0824,
      "step": 63300
    },
    {
      "epoch": 0.10362456877647075,
      "grad_norm": 2.1500301361083984,
      "learning_rate": 9.857277465521901e-06,
      "loss": 0.0849,
      "step": 63320
    },
    {
      "epoch": 0.10365729921512408,
      "grad_norm": 4.312437057495117,
      "learning_rate": 9.857211573308383e-06,
      "loss": 0.0769,
      "step": 63340
    },
    {
      "epoch": 0.10369002965377742,
      "grad_norm": 2.977788209915161,
      "learning_rate": 9.857145681094867e-06,
      "loss": 0.0955,
      "step": 63360
    },
    {
      "epoch": 0.10372276009243075,
      "grad_norm": 2.761972188949585,
      "learning_rate": 9.857079788881349e-06,
      "loss": 0.0711,
      "step": 63380
    },
    {
      "epoch": 0.1037554905310841,
      "grad_norm": 3.5066521167755127,
      "learning_rate": 9.857013896667832e-06,
      "loss": 0.0745,
      "step": 63400
    },
    {
      "epoch": 0.10378822096973743,
      "grad_norm": 1.1414690017700195,
      "learning_rate": 9.856948004454314e-06,
      "loss": 0.0685,
      "step": 63420
    },
    {
      "epoch": 0.10382095140839077,
      "grad_norm": 1.908700942993164,
      "learning_rate": 9.856882112240798e-06,
      "loss": 0.0616,
      "step": 63440
    },
    {
      "epoch": 0.10385368184704412,
      "grad_norm": 1.6045597791671753,
      "learning_rate": 9.85681622002728e-06,
      "loss": 0.0789,
      "step": 63460
    },
    {
      "epoch": 0.10388641228569745,
      "grad_norm": 4.079545021057129,
      "learning_rate": 9.856750327813763e-06,
      "loss": 0.0723,
      "step": 63480
    },
    {
      "epoch": 0.1039191427243508,
      "grad_norm": 2.1032705307006836,
      "learning_rate": 9.856684435600245e-06,
      "loss": 0.0777,
      "step": 63500
    },
    {
      "epoch": 0.10395187316300412,
      "grad_norm": 3.019312858581543,
      "learning_rate": 9.856618543386729e-06,
      "loss": 0.1064,
      "step": 63520
    },
    {
      "epoch": 0.10398460360165747,
      "grad_norm": 8.667119026184082,
      "learning_rate": 9.85655265117321e-06,
      "loss": 0.0935,
      "step": 63540
    },
    {
      "epoch": 0.10401733404031081,
      "grad_norm": 7.967508792877197,
      "learning_rate": 9.856486758959694e-06,
      "loss": 0.0812,
      "step": 63560
    },
    {
      "epoch": 0.10405006447896414,
      "grad_norm": 3.0243444442749023,
      "learning_rate": 9.856420866746176e-06,
      "loss": 0.0733,
      "step": 63580
    },
    {
      "epoch": 0.10408279491761749,
      "grad_norm": 2.5818264484405518,
      "learning_rate": 9.85635497453266e-06,
      "loss": 0.0858,
      "step": 63600
    },
    {
      "epoch": 0.10411552535627082,
      "grad_norm": 0.9431999921798706,
      "learning_rate": 9.856289082319143e-06,
      "loss": 0.0896,
      "step": 63620
    },
    {
      "epoch": 0.10414825579492416,
      "grad_norm": 3.345116138458252,
      "learning_rate": 9.856223190105625e-06,
      "loss": 0.0774,
      "step": 63640
    },
    {
      "epoch": 0.10418098623357751,
      "grad_norm": 3.1904046535491943,
      "learning_rate": 9.856157297892109e-06,
      "loss": 0.0712,
      "step": 63660
    },
    {
      "epoch": 0.10421371667223084,
      "grad_norm": 7.178798675537109,
      "learning_rate": 9.856091405678592e-06,
      "loss": 0.0924,
      "step": 63680
    },
    {
      "epoch": 0.10424644711088418,
      "grad_norm": 3.1911723613739014,
      "learning_rate": 9.856025513465074e-06,
      "loss": 0.0814,
      "step": 63700
    },
    {
      "epoch": 0.10427917754953751,
      "grad_norm": 2.3628597259521484,
      "learning_rate": 9.855959621251558e-06,
      "loss": 0.0977,
      "step": 63720
    },
    {
      "epoch": 0.10431190798819086,
      "grad_norm": 1.6982216835021973,
      "learning_rate": 9.855893729038041e-06,
      "loss": 0.0808,
      "step": 63740
    },
    {
      "epoch": 0.1043446384268442,
      "grad_norm": 2.173257827758789,
      "learning_rate": 9.855827836824523e-06,
      "loss": 0.0726,
      "step": 63760
    },
    {
      "epoch": 0.10437736886549753,
      "grad_norm": 0.9554072022438049,
      "learning_rate": 9.855761944611007e-06,
      "loss": 0.0892,
      "step": 63780
    },
    {
      "epoch": 0.10441009930415088,
      "grad_norm": 2.0869359970092773,
      "learning_rate": 9.855696052397489e-06,
      "loss": 0.0864,
      "step": 63800
    },
    {
      "epoch": 0.10444282974280421,
      "grad_norm": 17.673927307128906,
      "learning_rate": 9.855630160183972e-06,
      "loss": 0.0829,
      "step": 63820
    },
    {
      "epoch": 0.10447556018145755,
      "grad_norm": 3.0209968090057373,
      "learning_rate": 9.855564267970454e-06,
      "loss": 0.0752,
      "step": 63840
    },
    {
      "epoch": 0.1045082906201109,
      "grad_norm": 6.033916473388672,
      "learning_rate": 9.855498375756938e-06,
      "loss": 0.0886,
      "step": 63860
    },
    {
      "epoch": 0.10454102105876423,
      "grad_norm": 2.054673910140991,
      "learning_rate": 9.85543248354342e-06,
      "loss": 0.0723,
      "step": 63880
    },
    {
      "epoch": 0.10457375149741757,
      "grad_norm": 4.39602518081665,
      "learning_rate": 9.855366591329903e-06,
      "loss": 0.0774,
      "step": 63900
    },
    {
      "epoch": 0.1046064819360709,
      "grad_norm": 3.2385385036468506,
      "learning_rate": 9.855300699116385e-06,
      "loss": 0.0828,
      "step": 63920
    },
    {
      "epoch": 0.10463921237472425,
      "grad_norm": 8.366942405700684,
      "learning_rate": 9.855234806902869e-06,
      "loss": 0.098,
      "step": 63940
    },
    {
      "epoch": 0.10467194281337759,
      "grad_norm": 2.883284091949463,
      "learning_rate": 9.85516891468935e-06,
      "loss": 0.0868,
      "step": 63960
    },
    {
      "epoch": 0.10470467325203092,
      "grad_norm": 4.467164516448975,
      "learning_rate": 9.855103022475834e-06,
      "loss": 0.084,
      "step": 63980
    },
    {
      "epoch": 0.10473740369068427,
      "grad_norm": 4.158154487609863,
      "learning_rate": 9.855037130262318e-06,
      "loss": 0.0785,
      "step": 64000
    },
    {
      "epoch": 0.1047701341293376,
      "grad_norm": 4.074821949005127,
      "learning_rate": 9.8549712380488e-06,
      "loss": 0.0943,
      "step": 64020
    },
    {
      "epoch": 0.10480286456799094,
      "grad_norm": 1.3348511457443237,
      "learning_rate": 9.854905345835283e-06,
      "loss": 0.0747,
      "step": 64040
    },
    {
      "epoch": 0.10483559500664427,
      "grad_norm": 3.4818572998046875,
      "learning_rate": 9.854839453621767e-06,
      "loss": 0.0875,
      "step": 64060
    },
    {
      "epoch": 0.10486832544529762,
      "grad_norm": 1.0354533195495605,
      "learning_rate": 9.854773561408249e-06,
      "loss": 0.0755,
      "step": 64080
    },
    {
      "epoch": 0.10490105588395096,
      "grad_norm": 2.055809736251831,
      "learning_rate": 9.854707669194732e-06,
      "loss": 0.0851,
      "step": 64100
    },
    {
      "epoch": 0.10493378632260429,
      "grad_norm": 4.482025623321533,
      "learning_rate": 9.854641776981216e-06,
      "loss": 0.0925,
      "step": 64120
    },
    {
      "epoch": 0.10496651676125764,
      "grad_norm": 3.954291820526123,
      "learning_rate": 9.854575884767698e-06,
      "loss": 0.0842,
      "step": 64140
    },
    {
      "epoch": 0.10499924719991097,
      "grad_norm": 3.2812113761901855,
      "learning_rate": 9.854509992554182e-06,
      "loss": 0.0745,
      "step": 64160
    },
    {
      "epoch": 0.10503197763856431,
      "grad_norm": 0.6004838943481445,
      "learning_rate": 9.854444100340663e-06,
      "loss": 0.0747,
      "step": 64180
    },
    {
      "epoch": 0.10506470807721766,
      "grad_norm": 1.142020583152771,
      "learning_rate": 9.854378208127147e-06,
      "loss": 0.0928,
      "step": 64200
    },
    {
      "epoch": 0.10509743851587099,
      "grad_norm": 3.8870222568511963,
      "learning_rate": 9.854312315913629e-06,
      "loss": 0.0902,
      "step": 64220
    },
    {
      "epoch": 0.10513016895452433,
      "grad_norm": 8.98450756072998,
      "learning_rate": 9.854246423700113e-06,
      "loss": 0.0908,
      "step": 64240
    },
    {
      "epoch": 0.10516289939317766,
      "grad_norm": 13.65620231628418,
      "learning_rate": 9.854180531486594e-06,
      "loss": 0.0845,
      "step": 64260
    },
    {
      "epoch": 0.105195629831831,
      "grad_norm": 3.557929515838623,
      "learning_rate": 9.854114639273078e-06,
      "loss": 0.0941,
      "step": 64280
    },
    {
      "epoch": 0.10522836027048435,
      "grad_norm": 2.880107879638672,
      "learning_rate": 9.85404874705956e-06,
      "loss": 0.0718,
      "step": 64300
    },
    {
      "epoch": 0.10526109070913768,
      "grad_norm": 1.5198813676834106,
      "learning_rate": 9.853982854846043e-06,
      "loss": 0.0865,
      "step": 64320
    },
    {
      "epoch": 0.10529382114779103,
      "grad_norm": 7.060075283050537,
      "learning_rate": 9.853916962632527e-06,
      "loss": 0.0733,
      "step": 64340
    },
    {
      "epoch": 0.10532655158644436,
      "grad_norm": 3.000654458999634,
      "learning_rate": 9.853851070419009e-06,
      "loss": 0.0982,
      "step": 64360
    },
    {
      "epoch": 0.1053592820250977,
      "grad_norm": 2.4864652156829834,
      "learning_rate": 9.853785178205493e-06,
      "loss": 0.0807,
      "step": 64380
    },
    {
      "epoch": 0.10539201246375104,
      "grad_norm": 1.625002384185791,
      "learning_rate": 9.853719285991974e-06,
      "loss": 0.072,
      "step": 64400
    },
    {
      "epoch": 0.10542474290240438,
      "grad_norm": 3.7522494792938232,
      "learning_rate": 9.853653393778458e-06,
      "loss": 0.081,
      "step": 64420
    },
    {
      "epoch": 0.10545747334105772,
      "grad_norm": 10.198802947998047,
      "learning_rate": 9.85358750156494e-06,
      "loss": 0.0923,
      "step": 64440
    },
    {
      "epoch": 0.10549020377971105,
      "grad_norm": 2.1846439838409424,
      "learning_rate": 9.853521609351423e-06,
      "loss": 0.0873,
      "step": 64460
    },
    {
      "epoch": 0.1055229342183644,
      "grad_norm": 3.7413077354431152,
      "learning_rate": 9.853455717137907e-06,
      "loss": 0.0646,
      "step": 64480
    },
    {
      "epoch": 0.10555566465701774,
      "grad_norm": 3.660010576248169,
      "learning_rate": 9.853389824924389e-06,
      "loss": 0.0862,
      "step": 64500
    },
    {
      "epoch": 0.10558839509567107,
      "grad_norm": 3.121626853942871,
      "learning_rate": 9.853323932710873e-06,
      "loss": 0.0903,
      "step": 64520
    },
    {
      "epoch": 0.10562112553432441,
      "grad_norm": 2.1864500045776367,
      "learning_rate": 9.853258040497356e-06,
      "loss": 0.0827,
      "step": 64540
    },
    {
      "epoch": 0.10565385597297774,
      "grad_norm": 6.1064677238464355,
      "learning_rate": 9.853192148283838e-06,
      "loss": 0.0815,
      "step": 64560
    },
    {
      "epoch": 0.10568658641163109,
      "grad_norm": 4.738467216491699,
      "learning_rate": 9.853126256070322e-06,
      "loss": 0.0976,
      "step": 64580
    },
    {
      "epoch": 0.10571931685028443,
      "grad_norm": 4.454445838928223,
      "learning_rate": 9.853060363856804e-06,
      "loss": 0.08,
      "step": 64600
    },
    {
      "epoch": 0.10575204728893776,
      "grad_norm": 3.3907344341278076,
      "learning_rate": 9.852994471643287e-06,
      "loss": 0.0966,
      "step": 64620
    },
    {
      "epoch": 0.10578477772759111,
      "grad_norm": 2.751945734024048,
      "learning_rate": 9.852928579429769e-06,
      "loss": 0.0645,
      "step": 64640
    },
    {
      "epoch": 0.10581750816624444,
      "grad_norm": 5.073198318481445,
      "learning_rate": 9.852862687216253e-06,
      "loss": 0.072,
      "step": 64660
    },
    {
      "epoch": 0.10585023860489778,
      "grad_norm": 2.2513723373413086,
      "learning_rate": 9.852796795002734e-06,
      "loss": 0.0971,
      "step": 64680
    },
    {
      "epoch": 0.10588296904355113,
      "grad_norm": 3.164773941040039,
      "learning_rate": 9.852730902789218e-06,
      "loss": 0.0908,
      "step": 64700
    },
    {
      "epoch": 0.10591569948220446,
      "grad_norm": 2.205850839614868,
      "learning_rate": 9.852665010575702e-06,
      "loss": 0.0937,
      "step": 64720
    },
    {
      "epoch": 0.1059484299208578,
      "grad_norm": 5.168729305267334,
      "learning_rate": 9.852599118362184e-06,
      "loss": 0.0944,
      "step": 64740
    },
    {
      "epoch": 0.10598116035951113,
      "grad_norm": 2.321126937866211,
      "learning_rate": 9.852533226148667e-06,
      "loss": 0.0776,
      "step": 64760
    },
    {
      "epoch": 0.10601389079816448,
      "grad_norm": 5.369662761688232,
      "learning_rate": 9.852467333935149e-06,
      "loss": 0.0976,
      "step": 64780
    },
    {
      "epoch": 0.10604662123681781,
      "grad_norm": 2.6366584300994873,
      "learning_rate": 9.852401441721633e-06,
      "loss": 0.073,
      "step": 64800
    },
    {
      "epoch": 0.10607935167547115,
      "grad_norm": 7.9806742668151855,
      "learning_rate": 9.852335549508115e-06,
      "loss": 0.0708,
      "step": 64820
    },
    {
      "epoch": 0.1061120821141245,
      "grad_norm": 3.0404715538024902,
      "learning_rate": 9.852269657294598e-06,
      "loss": 0.0906,
      "step": 64840
    },
    {
      "epoch": 0.10614481255277783,
      "grad_norm": 2.6303751468658447,
      "learning_rate": 9.852203765081082e-06,
      "loss": 0.0819,
      "step": 64860
    },
    {
      "epoch": 0.10617754299143117,
      "grad_norm": 3.3979625701904297,
      "learning_rate": 9.852137872867564e-06,
      "loss": 0.0731,
      "step": 64880
    },
    {
      "epoch": 0.1062102734300845,
      "grad_norm": 1.8950886726379395,
      "learning_rate": 9.852071980654047e-06,
      "loss": 0.0909,
      "step": 64900
    },
    {
      "epoch": 0.10624300386873785,
      "grad_norm": 2.0194756984710693,
      "learning_rate": 9.85200608844053e-06,
      "loss": 0.0705,
      "step": 64920
    },
    {
      "epoch": 0.10627573430739119,
      "grad_norm": 6.669308662414551,
      "learning_rate": 9.851940196227013e-06,
      "loss": 0.0907,
      "step": 64940
    },
    {
      "epoch": 0.10630846474604452,
      "grad_norm": 1.5304005146026611,
      "learning_rate": 9.851874304013496e-06,
      "loss": 0.0887,
      "step": 64960
    },
    {
      "epoch": 0.10634119518469787,
      "grad_norm": 4.579463481903076,
      "learning_rate": 9.851808411799978e-06,
      "loss": 0.0916,
      "step": 64980
    },
    {
      "epoch": 0.1063739256233512,
      "grad_norm": 4.063445091247559,
      "learning_rate": 9.851742519586462e-06,
      "loss": 0.0712,
      "step": 65000
    },
    {
      "epoch": 0.10640665606200454,
      "grad_norm": 4.213898658752441,
      "learning_rate": 9.851676627372944e-06,
      "loss": 0.0816,
      "step": 65020
    },
    {
      "epoch": 0.10643938650065789,
      "grad_norm": 12.02647590637207,
      "learning_rate": 9.851610735159427e-06,
      "loss": 0.0746,
      "step": 65040
    },
    {
      "epoch": 0.10647211693931122,
      "grad_norm": 3.986671209335327,
      "learning_rate": 9.85154484294591e-06,
      "loss": 0.0859,
      "step": 65060
    },
    {
      "epoch": 0.10650484737796456,
      "grad_norm": 3.576698064804077,
      "learning_rate": 9.851478950732393e-06,
      "loss": 0.0682,
      "step": 65080
    },
    {
      "epoch": 0.10653757781661789,
      "grad_norm": 9.857576370239258,
      "learning_rate": 9.851413058518876e-06,
      "loss": 0.0981,
      "step": 65100
    },
    {
      "epoch": 0.10657030825527124,
      "grad_norm": 1.9445327520370483,
      "learning_rate": 9.851347166305358e-06,
      "loss": 0.0932,
      "step": 65120
    },
    {
      "epoch": 0.10660303869392458,
      "grad_norm": 4.260919094085693,
      "learning_rate": 9.851281274091842e-06,
      "loss": 0.0776,
      "step": 65140
    },
    {
      "epoch": 0.10663576913257791,
      "grad_norm": 4.8115339279174805,
      "learning_rate": 9.851215381878324e-06,
      "loss": 0.0931,
      "step": 65160
    },
    {
      "epoch": 0.10666849957123126,
      "grad_norm": 2.0756044387817383,
      "learning_rate": 9.851149489664807e-06,
      "loss": 0.0688,
      "step": 65180
    },
    {
      "epoch": 0.10670123000988459,
      "grad_norm": 2.4566569328308105,
      "learning_rate": 9.851083597451289e-06,
      "loss": 0.0805,
      "step": 65200
    },
    {
      "epoch": 0.10673396044853793,
      "grad_norm": 2.119500160217285,
      "learning_rate": 9.851017705237773e-06,
      "loss": 0.0684,
      "step": 65220
    },
    {
      "epoch": 0.10676669088719128,
      "grad_norm": 1.5118606090545654,
      "learning_rate": 9.850951813024255e-06,
      "loss": 0.0993,
      "step": 65240
    },
    {
      "epoch": 0.1067994213258446,
      "grad_norm": 4.07548189163208,
      "learning_rate": 9.850885920810738e-06,
      "loss": 0.092,
      "step": 65260
    },
    {
      "epoch": 0.10683215176449795,
      "grad_norm": 2.5028953552246094,
      "learning_rate": 9.850820028597222e-06,
      "loss": 0.0905,
      "step": 65280
    },
    {
      "epoch": 0.10686488220315128,
      "grad_norm": 3.5004258155822754,
      "learning_rate": 9.850754136383704e-06,
      "loss": 0.0914,
      "step": 65300
    },
    {
      "epoch": 0.10689761264180463,
      "grad_norm": 3.298586368560791,
      "learning_rate": 9.850688244170187e-06,
      "loss": 0.0771,
      "step": 65320
    },
    {
      "epoch": 0.10693034308045797,
      "grad_norm": 1.8088114261627197,
      "learning_rate": 9.85062235195667e-06,
      "loss": 0.0934,
      "step": 65340
    },
    {
      "epoch": 0.1069630735191113,
      "grad_norm": 8.192654609680176,
      "learning_rate": 9.850556459743153e-06,
      "loss": 0.0815,
      "step": 65360
    },
    {
      "epoch": 0.10699580395776465,
      "grad_norm": 3.245506763458252,
      "learning_rate": 9.850490567529636e-06,
      "loss": 0.0982,
      "step": 65380
    },
    {
      "epoch": 0.10702853439641798,
      "grad_norm": 2.5868170261383057,
      "learning_rate": 9.85042467531612e-06,
      "loss": 0.0722,
      "step": 65400
    },
    {
      "epoch": 0.10706126483507132,
      "grad_norm": 0.9304549694061279,
      "learning_rate": 9.850358783102602e-06,
      "loss": 0.0996,
      "step": 65420
    },
    {
      "epoch": 0.10709399527372467,
      "grad_norm": 1.2259114980697632,
      "learning_rate": 9.850292890889085e-06,
      "loss": 0.076,
      "step": 65440
    },
    {
      "epoch": 0.107126725712378,
      "grad_norm": 7.495715618133545,
      "learning_rate": 9.850226998675567e-06,
      "loss": 0.0805,
      "step": 65460
    },
    {
      "epoch": 0.10715945615103134,
      "grad_norm": 4.998600959777832,
      "learning_rate": 9.850161106462051e-06,
      "loss": 0.0662,
      "step": 65480
    },
    {
      "epoch": 0.10719218658968467,
      "grad_norm": 3.9517390727996826,
      "learning_rate": 9.850095214248533e-06,
      "loss": 0.0672,
      "step": 65500
    },
    {
      "epoch": 0.10722491702833802,
      "grad_norm": 2.3524749279022217,
      "learning_rate": 9.850029322035016e-06,
      "loss": 0.0772,
      "step": 65520
    },
    {
      "epoch": 0.10725764746699135,
      "grad_norm": 2.9585561752319336,
      "learning_rate": 9.849963429821498e-06,
      "loss": 0.073,
      "step": 65540
    },
    {
      "epoch": 0.10729037790564469,
      "grad_norm": 3.6747844219207764,
      "learning_rate": 9.849897537607982e-06,
      "loss": 0.076,
      "step": 65560
    },
    {
      "epoch": 0.10732310834429803,
      "grad_norm": 4.340322971343994,
      "learning_rate": 9.849831645394464e-06,
      "loss": 0.085,
      "step": 65580
    },
    {
      "epoch": 0.10735583878295137,
      "grad_norm": 8.357513427734375,
      "learning_rate": 9.849765753180947e-06,
      "loss": 0.1022,
      "step": 65600
    },
    {
      "epoch": 0.10738856922160471,
      "grad_norm": 11.885339736938477,
      "learning_rate": 9.84969986096743e-06,
      "loss": 0.085,
      "step": 65620
    },
    {
      "epoch": 0.10742129966025804,
      "grad_norm": 3.3699684143066406,
      "learning_rate": 9.849633968753913e-06,
      "loss": 0.0851,
      "step": 65640
    },
    {
      "epoch": 0.10745403009891139,
      "grad_norm": 8.172669410705566,
      "learning_rate": 9.849568076540396e-06,
      "loss": 0.0832,
      "step": 65660
    },
    {
      "epoch": 0.10748676053756473,
      "grad_norm": 3.6850478649139404,
      "learning_rate": 9.849502184326878e-06,
      "loss": 0.0801,
      "step": 65680
    },
    {
      "epoch": 0.10751949097621806,
      "grad_norm": 1.538267970085144,
      "learning_rate": 9.849436292113362e-06,
      "loss": 0.0743,
      "step": 65700
    },
    {
      "epoch": 0.1075522214148714,
      "grad_norm": 3.6390292644500732,
      "learning_rate": 9.849370399899845e-06,
      "loss": 0.0743,
      "step": 65720
    },
    {
      "epoch": 0.10758495185352474,
      "grad_norm": 5.680239677429199,
      "learning_rate": 9.849304507686327e-06,
      "loss": 0.0888,
      "step": 65740
    },
    {
      "epoch": 0.10761768229217808,
      "grad_norm": 6.644551753997803,
      "learning_rate": 9.849238615472811e-06,
      "loss": 0.0964,
      "step": 65760
    },
    {
      "epoch": 0.10765041273083142,
      "grad_norm": 8.074342727661133,
      "learning_rate": 9.849172723259294e-06,
      "loss": 0.0994,
      "step": 65780
    },
    {
      "epoch": 0.10768314316948475,
      "grad_norm": 7.55112361907959,
      "learning_rate": 9.849106831045776e-06,
      "loss": 0.0995,
      "step": 65800
    },
    {
      "epoch": 0.1077158736081381,
      "grad_norm": 2.7095181941986084,
      "learning_rate": 9.84904093883226e-06,
      "loss": 0.0737,
      "step": 65820
    },
    {
      "epoch": 0.10774860404679143,
      "grad_norm": 5.081606864929199,
      "learning_rate": 9.848975046618742e-06,
      "loss": 0.0813,
      "step": 65840
    },
    {
      "epoch": 0.10778133448544477,
      "grad_norm": 2.5700290203094482,
      "learning_rate": 9.848909154405225e-06,
      "loss": 0.0708,
      "step": 65860
    },
    {
      "epoch": 0.10781406492409812,
      "grad_norm": 3.515169382095337,
      "learning_rate": 9.848843262191707e-06,
      "loss": 0.0845,
      "step": 65880
    },
    {
      "epoch": 0.10784679536275145,
      "grad_norm": 5.3274335861206055,
      "learning_rate": 9.848777369978191e-06,
      "loss": 0.0871,
      "step": 65900
    },
    {
      "epoch": 0.1078795258014048,
      "grad_norm": 7.681429862976074,
      "learning_rate": 9.848711477764673e-06,
      "loss": 0.0849,
      "step": 65920
    },
    {
      "epoch": 0.10791225624005812,
      "grad_norm": 3.227518320083618,
      "learning_rate": 9.848645585551156e-06,
      "loss": 0.06,
      "step": 65940
    },
    {
      "epoch": 0.10794498667871147,
      "grad_norm": 1.2699472904205322,
      "learning_rate": 9.848579693337638e-06,
      "loss": 0.09,
      "step": 65960
    },
    {
      "epoch": 0.10797771711736481,
      "grad_norm": 3.480790376663208,
      "learning_rate": 9.848513801124122e-06,
      "loss": 0.0864,
      "step": 65980
    },
    {
      "epoch": 0.10801044755601814,
      "grad_norm": 1.4169213771820068,
      "learning_rate": 9.848447908910604e-06,
      "loss": 0.0869,
      "step": 66000
    },
    {
      "epoch": 0.10804317799467149,
      "grad_norm": 2.1825485229492188,
      "learning_rate": 9.848382016697087e-06,
      "loss": 0.0848,
      "step": 66020
    },
    {
      "epoch": 0.10807590843332482,
      "grad_norm": 2.97072696685791,
      "learning_rate": 9.848316124483571e-06,
      "loss": 0.074,
      "step": 66040
    },
    {
      "epoch": 0.10810863887197816,
      "grad_norm": 4.388003349304199,
      "learning_rate": 9.848250232270053e-06,
      "loss": 0.0937,
      "step": 66060
    },
    {
      "epoch": 0.10814136931063151,
      "grad_norm": 3.7382051944732666,
      "learning_rate": 9.848184340056536e-06,
      "loss": 0.0847,
      "step": 66080
    },
    {
      "epoch": 0.10817409974928484,
      "grad_norm": 4.567509174346924,
      "learning_rate": 9.84811844784302e-06,
      "loss": 0.0745,
      "step": 66100
    },
    {
      "epoch": 0.10820683018793818,
      "grad_norm": 3.3957300186157227,
      "learning_rate": 9.848052555629502e-06,
      "loss": 0.0752,
      "step": 66120
    },
    {
      "epoch": 0.10823956062659151,
      "grad_norm": 1.729562520980835,
      "learning_rate": 9.847986663415985e-06,
      "loss": 0.0791,
      "step": 66140
    },
    {
      "epoch": 0.10827229106524486,
      "grad_norm": 12.619423866271973,
      "learning_rate": 9.847920771202469e-06,
      "loss": 0.0747,
      "step": 66160
    },
    {
      "epoch": 0.10830502150389819,
      "grad_norm": 1.9918071031570435,
      "learning_rate": 9.847854878988951e-06,
      "loss": 0.0833,
      "step": 66180
    },
    {
      "epoch": 0.10833775194255153,
      "grad_norm": 4.0547308921813965,
      "learning_rate": 9.847788986775435e-06,
      "loss": 0.082,
      "step": 66200
    },
    {
      "epoch": 0.10837048238120488,
      "grad_norm": 2.4895315170288086,
      "learning_rate": 9.847723094561916e-06,
      "loss": 0.0813,
      "step": 66220
    },
    {
      "epoch": 0.10840321281985821,
      "grad_norm": 7.107756614685059,
      "learning_rate": 9.8476572023484e-06,
      "loss": 0.076,
      "step": 66240
    },
    {
      "epoch": 0.10843594325851155,
      "grad_norm": 0.5844257473945618,
      "learning_rate": 9.847591310134882e-06,
      "loss": 0.0706,
      "step": 66260
    },
    {
      "epoch": 0.10846867369716488,
      "grad_norm": 6.302941799163818,
      "learning_rate": 9.847525417921366e-06,
      "loss": 0.0991,
      "step": 66280
    },
    {
      "epoch": 0.10850140413581823,
      "grad_norm": 2.950603485107422,
      "learning_rate": 9.847459525707847e-06,
      "loss": 0.0974,
      "step": 66300
    },
    {
      "epoch": 0.10853413457447157,
      "grad_norm": 3.177090644836426,
      "learning_rate": 9.847393633494331e-06,
      "loss": 0.0759,
      "step": 66320
    },
    {
      "epoch": 0.1085668650131249,
      "grad_norm": 7.684020042419434,
      "learning_rate": 9.847327741280813e-06,
      "loss": 0.0724,
      "step": 66340
    },
    {
      "epoch": 0.10859959545177825,
      "grad_norm": 3.6151111125946045,
      "learning_rate": 9.847261849067296e-06,
      "loss": 0.0736,
      "step": 66360
    },
    {
      "epoch": 0.10863232589043158,
      "grad_norm": 3.4376473426818848,
      "learning_rate": 9.847195956853778e-06,
      "loss": 0.0823,
      "step": 66380
    },
    {
      "epoch": 0.10866505632908492,
      "grad_norm": 1.6233776807785034,
      "learning_rate": 9.847130064640262e-06,
      "loss": 0.088,
      "step": 66400
    },
    {
      "epoch": 0.10869778676773827,
      "grad_norm": 2.263686418533325,
      "learning_rate": 9.847064172426744e-06,
      "loss": 0.0888,
      "step": 66420
    },
    {
      "epoch": 0.1087305172063916,
      "grad_norm": 4.755277156829834,
      "learning_rate": 9.846998280213227e-06,
      "loss": 0.0854,
      "step": 66440
    },
    {
      "epoch": 0.10876324764504494,
      "grad_norm": 5.881462574005127,
      "learning_rate": 9.846932387999711e-06,
      "loss": 0.0794,
      "step": 66460
    },
    {
      "epoch": 0.10879597808369827,
      "grad_norm": 6.651036739349365,
      "learning_rate": 9.846866495786193e-06,
      "loss": 0.0713,
      "step": 66480
    },
    {
      "epoch": 0.10882870852235162,
      "grad_norm": 6.294381618499756,
      "learning_rate": 9.846800603572677e-06,
      "loss": 0.078,
      "step": 66500
    },
    {
      "epoch": 0.10886143896100496,
      "grad_norm": 4.712299823760986,
      "learning_rate": 9.84673471135916e-06,
      "loss": 0.0711,
      "step": 66520
    },
    {
      "epoch": 0.10889416939965829,
      "grad_norm": 4.621307373046875,
      "learning_rate": 9.846668819145642e-06,
      "loss": 0.087,
      "step": 66540
    },
    {
      "epoch": 0.10892689983831164,
      "grad_norm": 5.417215824127197,
      "learning_rate": 9.846602926932126e-06,
      "loss": 0.0869,
      "step": 66560
    },
    {
      "epoch": 0.10895963027696497,
      "grad_norm": 16.0477237701416,
      "learning_rate": 9.84653703471861e-06,
      "loss": 0.0811,
      "step": 66580
    },
    {
      "epoch": 0.10899236071561831,
      "grad_norm": 4.8468828201293945,
      "learning_rate": 9.846471142505091e-06,
      "loss": 0.0764,
      "step": 66600
    },
    {
      "epoch": 0.10902509115427166,
      "grad_norm": 2.223253011703491,
      "learning_rate": 9.846405250291575e-06,
      "loss": 0.0959,
      "step": 66620
    },
    {
      "epoch": 0.10905782159292499,
      "grad_norm": 2.43521785736084,
      "learning_rate": 9.846339358078057e-06,
      "loss": 0.0898,
      "step": 66640
    },
    {
      "epoch": 0.10909055203157833,
      "grad_norm": 3.230100393295288,
      "learning_rate": 9.84627346586454e-06,
      "loss": 0.0768,
      "step": 66660
    },
    {
      "epoch": 0.10912328247023166,
      "grad_norm": 4.487438678741455,
      "learning_rate": 9.846207573651022e-06,
      "loss": 0.0896,
      "step": 66680
    },
    {
      "epoch": 0.109156012908885,
      "grad_norm": 4.811901569366455,
      "learning_rate": 9.846141681437506e-06,
      "loss": 0.0915,
      "step": 66700
    },
    {
      "epoch": 0.10918874334753835,
      "grad_norm": 11.113654136657715,
      "learning_rate": 9.846075789223987e-06,
      "loss": 0.084,
      "step": 66720
    },
    {
      "epoch": 0.10922147378619168,
      "grad_norm": 5.592864036560059,
      "learning_rate": 9.846009897010471e-06,
      "loss": 0.093,
      "step": 66740
    },
    {
      "epoch": 0.10925420422484503,
      "grad_norm": 8.452320098876953,
      "learning_rate": 9.845944004796953e-06,
      "loss": 0.0672,
      "step": 66760
    },
    {
      "epoch": 0.10928693466349836,
      "grad_norm": 2.392333984375,
      "learning_rate": 9.845878112583437e-06,
      "loss": 0.0659,
      "step": 66780
    },
    {
      "epoch": 0.1093196651021517,
      "grad_norm": 8.695066452026367,
      "learning_rate": 9.84581222036992e-06,
      "loss": 0.0814,
      "step": 66800
    },
    {
      "epoch": 0.10935239554080504,
      "grad_norm": 2.6096384525299072,
      "learning_rate": 9.845746328156402e-06,
      "loss": 0.0811,
      "step": 66820
    },
    {
      "epoch": 0.10938512597945838,
      "grad_norm": 0.7862392663955688,
      "learning_rate": 9.845680435942886e-06,
      "loss": 0.0788,
      "step": 66840
    },
    {
      "epoch": 0.10941785641811172,
      "grad_norm": 4.226408004760742,
      "learning_rate": 9.845614543729368e-06,
      "loss": 0.0777,
      "step": 66860
    },
    {
      "epoch": 0.10945058685676505,
      "grad_norm": 5.010457515716553,
      "learning_rate": 9.845548651515851e-06,
      "loss": 0.0974,
      "step": 66880
    },
    {
      "epoch": 0.1094833172954184,
      "grad_norm": 2.6246588230133057,
      "learning_rate": 9.845482759302335e-06,
      "loss": 0.0886,
      "step": 66900
    },
    {
      "epoch": 0.10951604773407173,
      "grad_norm": 2.4128785133361816,
      "learning_rate": 9.845416867088817e-06,
      "loss": 0.0932,
      "step": 66920
    },
    {
      "epoch": 0.10954877817272507,
      "grad_norm": 3.1775519847869873,
      "learning_rate": 9.8453509748753e-06,
      "loss": 0.0708,
      "step": 66940
    },
    {
      "epoch": 0.10958150861137841,
      "grad_norm": 3.7744545936584473,
      "learning_rate": 9.845285082661784e-06,
      "loss": 0.0694,
      "step": 66960
    },
    {
      "epoch": 0.10961423905003174,
      "grad_norm": 3.7441210746765137,
      "learning_rate": 9.845219190448266e-06,
      "loss": 0.0827,
      "step": 66980
    },
    {
      "epoch": 0.10964696948868509,
      "grad_norm": 3.5851633548736572,
      "learning_rate": 9.84515329823475e-06,
      "loss": 0.0911,
      "step": 67000
    },
    {
      "epoch": 0.10967969992733842,
      "grad_norm": 1.2717993259429932,
      "learning_rate": 9.845087406021231e-06,
      "loss": 0.0692,
      "step": 67020
    },
    {
      "epoch": 0.10971243036599176,
      "grad_norm": 2.328819513320923,
      "learning_rate": 9.845021513807715e-06,
      "loss": 0.1122,
      "step": 67040
    },
    {
      "epoch": 0.10974516080464511,
      "grad_norm": 3.380849838256836,
      "learning_rate": 9.844955621594197e-06,
      "loss": 0.0719,
      "step": 67060
    },
    {
      "epoch": 0.10977789124329844,
      "grad_norm": 4.719742774963379,
      "learning_rate": 9.84488972938068e-06,
      "loss": 0.0649,
      "step": 67080
    },
    {
      "epoch": 0.10981062168195178,
      "grad_norm": 2.041355848312378,
      "learning_rate": 9.844823837167162e-06,
      "loss": 0.088,
      "step": 67100
    },
    {
      "epoch": 0.10984335212060511,
      "grad_norm": 3.9889516830444336,
      "learning_rate": 9.844757944953646e-06,
      "loss": 0.0708,
      "step": 67120
    },
    {
      "epoch": 0.10987608255925846,
      "grad_norm": 4.039348602294922,
      "learning_rate": 9.844692052740128e-06,
      "loss": 0.0788,
      "step": 67140
    },
    {
      "epoch": 0.1099088129979118,
      "grad_norm": 3.25697922706604,
      "learning_rate": 9.844626160526611e-06,
      "loss": 0.0899,
      "step": 67160
    },
    {
      "epoch": 0.10994154343656513,
      "grad_norm": 1.6812125444412231,
      "learning_rate": 9.844560268313095e-06,
      "loss": 0.0793,
      "step": 67180
    },
    {
      "epoch": 0.10997427387521848,
      "grad_norm": 6.157547950744629,
      "learning_rate": 9.844494376099577e-06,
      "loss": 0.099,
      "step": 67200
    },
    {
      "epoch": 0.11000700431387181,
      "grad_norm": 3.515195369720459,
      "learning_rate": 9.84442848388606e-06,
      "loss": 0.09,
      "step": 67220
    },
    {
      "epoch": 0.11003973475252515,
      "grad_norm": 3.2619171142578125,
      "learning_rate": 9.844362591672542e-06,
      "loss": 0.068,
      "step": 67240
    },
    {
      "epoch": 0.1100724651911785,
      "grad_norm": 5.24716854095459,
      "learning_rate": 9.844296699459026e-06,
      "loss": 0.0857,
      "step": 67260
    },
    {
      "epoch": 0.11010519562983183,
      "grad_norm": 2.5688698291778564,
      "learning_rate": 9.844230807245508e-06,
      "loss": 0.074,
      "step": 67280
    },
    {
      "epoch": 0.11013792606848517,
      "grad_norm": 2.4542267322540283,
      "learning_rate": 9.844164915031991e-06,
      "loss": 0.0778,
      "step": 67300
    },
    {
      "epoch": 0.1101706565071385,
      "grad_norm": 2.203705072402954,
      "learning_rate": 9.844099022818475e-06,
      "loss": 0.0818,
      "step": 67320
    },
    {
      "epoch": 0.11020338694579185,
      "grad_norm": 5.44384765625,
      "learning_rate": 9.844033130604957e-06,
      "loss": 0.0924,
      "step": 67340
    },
    {
      "epoch": 0.11023611738444519,
      "grad_norm": 4.70678186416626,
      "learning_rate": 9.84396723839144e-06,
      "loss": 0.0827,
      "step": 67360
    },
    {
      "epoch": 0.11026884782309852,
      "grad_norm": 3.824144124984741,
      "learning_rate": 9.843901346177924e-06,
      "loss": 0.0894,
      "step": 67380
    },
    {
      "epoch": 0.11030157826175187,
      "grad_norm": 3.9825704097747803,
      "learning_rate": 9.843835453964406e-06,
      "loss": 0.0817,
      "step": 67400
    },
    {
      "epoch": 0.1103343087004052,
      "grad_norm": 4.285409927368164,
      "learning_rate": 9.84376956175089e-06,
      "loss": 0.0844,
      "step": 67420
    },
    {
      "epoch": 0.11036703913905854,
      "grad_norm": 3.595896005630493,
      "learning_rate": 9.843703669537371e-06,
      "loss": 0.0751,
      "step": 67440
    },
    {
      "epoch": 0.11039976957771189,
      "grad_norm": 4.28822660446167,
      "learning_rate": 9.843637777323855e-06,
      "loss": 0.0923,
      "step": 67460
    },
    {
      "epoch": 0.11043250001636522,
      "grad_norm": 3.33300518989563,
      "learning_rate": 9.843571885110337e-06,
      "loss": 0.072,
      "step": 67480
    },
    {
      "epoch": 0.11046523045501856,
      "grad_norm": 2.158888578414917,
      "learning_rate": 9.84350599289682e-06,
      "loss": 0.0811,
      "step": 67500
    },
    {
      "epoch": 0.11049796089367189,
      "grad_norm": 3.191394090652466,
      "learning_rate": 9.843440100683304e-06,
      "loss": 0.0793,
      "step": 67520
    },
    {
      "epoch": 0.11053069133232524,
      "grad_norm": 2.9592278003692627,
      "learning_rate": 9.843374208469786e-06,
      "loss": 0.0879,
      "step": 67540
    },
    {
      "epoch": 0.11056342177097858,
      "grad_norm": 3.238255023956299,
      "learning_rate": 9.84330831625627e-06,
      "loss": 0.0922,
      "step": 67560
    },
    {
      "epoch": 0.11059615220963191,
      "grad_norm": 4.336159706115723,
      "learning_rate": 9.843242424042751e-06,
      "loss": 0.0751,
      "step": 67580
    },
    {
      "epoch": 0.11062888264828526,
      "grad_norm": 2.1918671131134033,
      "learning_rate": 9.843176531829235e-06,
      "loss": 0.0742,
      "step": 67600
    },
    {
      "epoch": 0.11066161308693859,
      "grad_norm": 2.1084768772125244,
      "learning_rate": 9.843110639615717e-06,
      "loss": 0.0725,
      "step": 67620
    },
    {
      "epoch": 0.11069434352559193,
      "grad_norm": 3.9971816539764404,
      "learning_rate": 9.8430447474022e-06,
      "loss": 0.0762,
      "step": 67640
    },
    {
      "epoch": 0.11072707396424526,
      "grad_norm": 5.304965496063232,
      "learning_rate": 9.842978855188682e-06,
      "loss": 0.0796,
      "step": 67660
    },
    {
      "epoch": 0.1107598044028986,
      "grad_norm": 2.365542411804199,
      "learning_rate": 9.842912962975166e-06,
      "loss": 0.0745,
      "step": 67680
    },
    {
      "epoch": 0.11079253484155195,
      "grad_norm": 39.04296112060547,
      "learning_rate": 9.84284707076165e-06,
      "loss": 0.1022,
      "step": 67700
    },
    {
      "epoch": 0.11082526528020528,
      "grad_norm": 2.6601033210754395,
      "learning_rate": 9.842781178548131e-06,
      "loss": 0.0899,
      "step": 67720
    },
    {
      "epoch": 0.11085799571885863,
      "grad_norm": 3.3876383304595947,
      "learning_rate": 9.842715286334615e-06,
      "loss": 0.0897,
      "step": 67740
    },
    {
      "epoch": 0.11089072615751196,
      "grad_norm": 3.850581407546997,
      "learning_rate": 9.842649394121098e-06,
      "loss": 0.0917,
      "step": 67760
    },
    {
      "epoch": 0.1109234565961653,
      "grad_norm": 1.2306829690933228,
      "learning_rate": 9.84258350190758e-06,
      "loss": 0.083,
      "step": 67780
    },
    {
      "epoch": 0.11095618703481865,
      "grad_norm": 1.0736113786697388,
      "learning_rate": 9.842517609694064e-06,
      "loss": 0.0627,
      "step": 67800
    },
    {
      "epoch": 0.11098891747347198,
      "grad_norm": 8.949044227600098,
      "learning_rate": 9.842451717480546e-06,
      "loss": 0.0714,
      "step": 67820
    },
    {
      "epoch": 0.11102164791212532,
      "grad_norm": 2.1326065063476562,
      "learning_rate": 9.84238582526703e-06,
      "loss": 0.0859,
      "step": 67840
    },
    {
      "epoch": 0.11105437835077865,
      "grad_norm": 3.1712634563446045,
      "learning_rate": 9.842319933053513e-06,
      "loss": 0.0767,
      "step": 67860
    },
    {
      "epoch": 0.111087108789432,
      "grad_norm": 3.6301801204681396,
      "learning_rate": 9.842254040839995e-06,
      "loss": 0.0684,
      "step": 67880
    },
    {
      "epoch": 0.11111983922808534,
      "grad_norm": 7.597687721252441,
      "learning_rate": 9.842188148626478e-06,
      "loss": 0.0897,
      "step": 67900
    },
    {
      "epoch": 0.11115256966673867,
      "grad_norm": 2.4530656337738037,
      "learning_rate": 9.84212225641296e-06,
      "loss": 0.0635,
      "step": 67920
    },
    {
      "epoch": 0.11118530010539202,
      "grad_norm": 11.335349082946777,
      "learning_rate": 9.842056364199444e-06,
      "loss": 0.0861,
      "step": 67940
    },
    {
      "epoch": 0.11121803054404535,
      "grad_norm": 2.896667718887329,
      "learning_rate": 9.841990471985926e-06,
      "loss": 0.1006,
      "step": 67960
    },
    {
      "epoch": 0.11125076098269869,
      "grad_norm": 2.808471918106079,
      "learning_rate": 9.84192457977241e-06,
      "loss": 0.073,
      "step": 67980
    },
    {
      "epoch": 0.11128349142135203,
      "grad_norm": 6.154181480407715,
      "learning_rate": 9.841858687558891e-06,
      "loss": 0.0911,
      "step": 68000
    },
    {
      "epoch": 0.11131622186000537,
      "grad_norm": 10.262094497680664,
      "learning_rate": 9.841792795345375e-06,
      "loss": 0.0821,
      "step": 68020
    },
    {
      "epoch": 0.11134895229865871,
      "grad_norm": 2.31481671333313,
      "learning_rate": 9.841726903131857e-06,
      "loss": 0.077,
      "step": 68040
    },
    {
      "epoch": 0.11138168273731204,
      "grad_norm": 7.045236110687256,
      "learning_rate": 9.84166101091834e-06,
      "loss": 0.0872,
      "step": 68060
    },
    {
      "epoch": 0.11141441317596538,
      "grad_norm": 2.3700199127197266,
      "learning_rate": 9.841595118704822e-06,
      "loss": 0.0715,
      "step": 68080
    },
    {
      "epoch": 0.11144714361461873,
      "grad_norm": 5.464462757110596,
      "learning_rate": 9.841529226491306e-06,
      "loss": 0.0679,
      "step": 68100
    },
    {
      "epoch": 0.11147987405327206,
      "grad_norm": 4.0911455154418945,
      "learning_rate": 9.84146333427779e-06,
      "loss": 0.086,
      "step": 68120
    },
    {
      "epoch": 0.1115126044919254,
      "grad_norm": 7.042687892913818,
      "learning_rate": 9.841397442064271e-06,
      "loss": 0.114,
      "step": 68140
    },
    {
      "epoch": 0.11154533493057874,
      "grad_norm": 1.2637406587600708,
      "learning_rate": 9.841331549850755e-06,
      "loss": 0.0738,
      "step": 68160
    },
    {
      "epoch": 0.11157806536923208,
      "grad_norm": 1.0422470569610596,
      "learning_rate": 9.841265657637239e-06,
      "loss": 0.0836,
      "step": 68180
    },
    {
      "epoch": 0.11161079580788542,
      "grad_norm": 2.643129825592041,
      "learning_rate": 9.84119976542372e-06,
      "loss": 0.0857,
      "step": 68200
    },
    {
      "epoch": 0.11164352624653875,
      "grad_norm": 5.1199541091918945,
      "learning_rate": 9.841133873210204e-06,
      "loss": 0.0855,
      "step": 68220
    },
    {
      "epoch": 0.1116762566851921,
      "grad_norm": 2.7856175899505615,
      "learning_rate": 9.841067980996688e-06,
      "loss": 0.0911,
      "step": 68240
    },
    {
      "epoch": 0.11170898712384543,
      "grad_norm": 2.5398168563842773,
      "learning_rate": 9.84100208878317e-06,
      "loss": 0.0826,
      "step": 68260
    },
    {
      "epoch": 0.11174171756249877,
      "grad_norm": 2.0581650733947754,
      "learning_rate": 9.840936196569653e-06,
      "loss": 0.0595,
      "step": 68280
    },
    {
      "epoch": 0.1117744480011521,
      "grad_norm": 4.122814178466797,
      "learning_rate": 9.840870304356135e-06,
      "loss": 0.1021,
      "step": 68300
    },
    {
      "epoch": 0.11180717843980545,
      "grad_norm": 6.068617820739746,
      "learning_rate": 9.840804412142619e-06,
      "loss": 0.1004,
      "step": 68320
    },
    {
      "epoch": 0.1118399088784588,
      "grad_norm": 0.868384599685669,
      "learning_rate": 9.8407385199291e-06,
      "loss": 0.0653,
      "step": 68340
    },
    {
      "epoch": 0.11187263931711212,
      "grad_norm": 2.3396129608154297,
      "learning_rate": 9.840672627715584e-06,
      "loss": 0.0843,
      "step": 68360
    },
    {
      "epoch": 0.11190536975576547,
      "grad_norm": 2.607060194015503,
      "learning_rate": 9.840606735502066e-06,
      "loss": 0.0868,
      "step": 68380
    },
    {
      "epoch": 0.1119381001944188,
      "grad_norm": 3.5002691745758057,
      "learning_rate": 9.84054084328855e-06,
      "loss": 0.0724,
      "step": 68400
    },
    {
      "epoch": 0.11197083063307214,
      "grad_norm": 3.871485948562622,
      "learning_rate": 9.840474951075031e-06,
      "loss": 0.0747,
      "step": 68420
    },
    {
      "epoch": 0.11200356107172549,
      "grad_norm": 8.518383026123047,
      "learning_rate": 9.840409058861515e-06,
      "loss": 0.0818,
      "step": 68440
    },
    {
      "epoch": 0.11203629151037882,
      "grad_norm": 3.8581433296203613,
      "learning_rate": 9.840343166647997e-06,
      "loss": 0.0837,
      "step": 68460
    },
    {
      "epoch": 0.11206902194903216,
      "grad_norm": 1.8419688940048218,
      "learning_rate": 9.84027727443448e-06,
      "loss": 0.0946,
      "step": 68480
    },
    {
      "epoch": 0.1121017523876855,
      "grad_norm": 2.5977771282196045,
      "learning_rate": 9.840211382220964e-06,
      "loss": 0.0775,
      "step": 68500
    },
    {
      "epoch": 0.11213448282633884,
      "grad_norm": 4.1341447830200195,
      "learning_rate": 9.840145490007446e-06,
      "loss": 0.1067,
      "step": 68520
    },
    {
      "epoch": 0.11216721326499218,
      "grad_norm": 7.688470840454102,
      "learning_rate": 9.84007959779393e-06,
      "loss": 0.0914,
      "step": 68540
    },
    {
      "epoch": 0.11219994370364551,
      "grad_norm": 2.8780770301818848,
      "learning_rate": 9.840013705580413e-06,
      "loss": 0.0615,
      "step": 68560
    },
    {
      "epoch": 0.11223267414229886,
      "grad_norm": 1.701923131942749,
      "learning_rate": 9.839947813366895e-06,
      "loss": 0.075,
      "step": 68580
    },
    {
      "epoch": 0.11226540458095219,
      "grad_norm": 9.526090621948242,
      "learning_rate": 9.839881921153379e-06,
      "loss": 0.0756,
      "step": 68600
    },
    {
      "epoch": 0.11229813501960553,
      "grad_norm": 2.451164484024048,
      "learning_rate": 9.839816028939862e-06,
      "loss": 0.0798,
      "step": 68620
    },
    {
      "epoch": 0.11233086545825888,
      "grad_norm": 4.401909828186035,
      "learning_rate": 9.839750136726344e-06,
      "loss": 0.0888,
      "step": 68640
    },
    {
      "epoch": 0.11236359589691221,
      "grad_norm": 2.8969223499298096,
      "learning_rate": 9.839684244512828e-06,
      "loss": 0.0823,
      "step": 68660
    },
    {
      "epoch": 0.11239632633556555,
      "grad_norm": 5.888068675994873,
      "learning_rate": 9.83961835229931e-06,
      "loss": 0.0913,
      "step": 68680
    },
    {
      "epoch": 0.11242905677421888,
      "grad_norm": 3.0475194454193115,
      "learning_rate": 9.839552460085793e-06,
      "loss": 0.0863,
      "step": 68700
    },
    {
      "epoch": 0.11246178721287223,
      "grad_norm": 5.2224225997924805,
      "learning_rate": 9.839486567872275e-06,
      "loss": 0.0767,
      "step": 68720
    },
    {
      "epoch": 0.11249451765152557,
      "grad_norm": 3.202993154525757,
      "learning_rate": 9.839420675658759e-06,
      "loss": 0.0751,
      "step": 68740
    },
    {
      "epoch": 0.1125272480901789,
      "grad_norm": 4.024164199829102,
      "learning_rate": 9.83935478344524e-06,
      "loss": 0.0817,
      "step": 68760
    },
    {
      "epoch": 0.11255997852883225,
      "grad_norm": 2.701364755630493,
      "learning_rate": 9.839288891231724e-06,
      "loss": 0.0717,
      "step": 68780
    },
    {
      "epoch": 0.11259270896748558,
      "grad_norm": 1.7277600765228271,
      "learning_rate": 9.839222999018206e-06,
      "loss": 0.0797,
      "step": 68800
    },
    {
      "epoch": 0.11262543940613892,
      "grad_norm": 3.993260383605957,
      "learning_rate": 9.83915710680469e-06,
      "loss": 0.0921,
      "step": 68820
    },
    {
      "epoch": 0.11265816984479227,
      "grad_norm": 3.2413525581359863,
      "learning_rate": 9.839091214591171e-06,
      "loss": 0.1059,
      "step": 68840
    },
    {
      "epoch": 0.1126909002834456,
      "grad_norm": 6.559226036071777,
      "learning_rate": 9.839025322377655e-06,
      "loss": 0.0766,
      "step": 68860
    },
    {
      "epoch": 0.11272363072209894,
      "grad_norm": 3.8028297424316406,
      "learning_rate": 9.838959430164139e-06,
      "loss": 0.0904,
      "step": 68880
    },
    {
      "epoch": 0.11275636116075227,
      "grad_norm": 7.547632694244385,
      "learning_rate": 9.83889353795062e-06,
      "loss": 0.0973,
      "step": 68900
    },
    {
      "epoch": 0.11278909159940562,
      "grad_norm": 4.661667823791504,
      "learning_rate": 9.838827645737104e-06,
      "loss": 0.0891,
      "step": 68920
    },
    {
      "epoch": 0.11282182203805896,
      "grad_norm": 4.144139766693115,
      "learning_rate": 9.838761753523588e-06,
      "loss": 0.0751,
      "step": 68940
    },
    {
      "epoch": 0.11285455247671229,
      "grad_norm": 6.766888618469238,
      "learning_rate": 9.83869586131007e-06,
      "loss": 0.0912,
      "step": 68960
    },
    {
      "epoch": 0.11288728291536564,
      "grad_norm": 0.7505074739456177,
      "learning_rate": 9.838629969096553e-06,
      "loss": 0.0645,
      "step": 68980
    },
    {
      "epoch": 0.11292001335401897,
      "grad_norm": 6.077606201171875,
      "learning_rate": 9.838564076883037e-06,
      "loss": 0.0969,
      "step": 69000
    },
    {
      "epoch": 0.11295274379267231,
      "grad_norm": 5.5114240646362305,
      "learning_rate": 9.838498184669519e-06,
      "loss": 0.0835,
      "step": 69020
    },
    {
      "epoch": 0.11298547423132564,
      "grad_norm": 4.769702434539795,
      "learning_rate": 9.838432292456002e-06,
      "loss": 0.0986,
      "step": 69040
    },
    {
      "epoch": 0.11301820466997899,
      "grad_norm": 3.796638250350952,
      "learning_rate": 9.838366400242484e-06,
      "loss": 0.0785,
      "step": 69060
    },
    {
      "epoch": 0.11305093510863233,
      "grad_norm": 2.696392059326172,
      "learning_rate": 9.838300508028968e-06,
      "loss": 0.0674,
      "step": 69080
    },
    {
      "epoch": 0.11308366554728566,
      "grad_norm": 3.762697458267212,
      "learning_rate": 9.83823461581545e-06,
      "loss": 0.0791,
      "step": 69100
    },
    {
      "epoch": 0.113116395985939,
      "grad_norm": 1.6940537691116333,
      "learning_rate": 9.838168723601933e-06,
      "loss": 0.07,
      "step": 69120
    },
    {
      "epoch": 0.11314912642459234,
      "grad_norm": 2.871441602706909,
      "learning_rate": 9.838102831388415e-06,
      "loss": 0.0848,
      "step": 69140
    },
    {
      "epoch": 0.11318185686324568,
      "grad_norm": 19.104034423828125,
      "learning_rate": 9.838036939174899e-06,
      "loss": 0.0808,
      "step": 69160
    },
    {
      "epoch": 0.11321458730189903,
      "grad_norm": 1.9852080345153809,
      "learning_rate": 9.83797104696138e-06,
      "loss": 0.077,
      "step": 69180
    },
    {
      "epoch": 0.11324731774055236,
      "grad_norm": 4.205296039581299,
      "learning_rate": 9.837905154747864e-06,
      "loss": 0.0768,
      "step": 69200
    },
    {
      "epoch": 0.1132800481792057,
      "grad_norm": 2.6026110649108887,
      "learning_rate": 9.837839262534346e-06,
      "loss": 0.094,
      "step": 69220
    },
    {
      "epoch": 0.11331277861785903,
      "grad_norm": 3.3578357696533203,
      "learning_rate": 9.83777337032083e-06,
      "loss": 0.087,
      "step": 69240
    },
    {
      "epoch": 0.11334550905651238,
      "grad_norm": 2.643195390701294,
      "learning_rate": 9.837707478107313e-06,
      "loss": 0.095,
      "step": 69260
    },
    {
      "epoch": 0.11337823949516572,
      "grad_norm": 4.125986576080322,
      "learning_rate": 9.837641585893795e-06,
      "loss": 0.0781,
      "step": 69280
    },
    {
      "epoch": 0.11341096993381905,
      "grad_norm": 12.34237289428711,
      "learning_rate": 9.837575693680279e-06,
      "loss": 0.0777,
      "step": 69300
    },
    {
      "epoch": 0.1134437003724724,
      "grad_norm": 9.436317443847656,
      "learning_rate": 9.83750980146676e-06,
      "loss": 0.0885,
      "step": 69320
    },
    {
      "epoch": 0.11347643081112573,
      "grad_norm": 5.46121072769165,
      "learning_rate": 9.837443909253244e-06,
      "loss": 0.0907,
      "step": 69340
    },
    {
      "epoch": 0.11350916124977907,
      "grad_norm": 1.2762937545776367,
      "learning_rate": 9.837378017039728e-06,
      "loss": 0.063,
      "step": 69360
    },
    {
      "epoch": 0.11354189168843241,
      "grad_norm": 3.6235475540161133,
      "learning_rate": 9.83731212482621e-06,
      "loss": 0.0825,
      "step": 69380
    },
    {
      "epoch": 0.11357462212708574,
      "grad_norm": 4.324809551239014,
      "learning_rate": 9.837246232612693e-06,
      "loss": 0.0815,
      "step": 69400
    },
    {
      "epoch": 0.11360735256573909,
      "grad_norm": 3.5962352752685547,
      "learning_rate": 9.837180340399177e-06,
      "loss": 0.0804,
      "step": 69420
    },
    {
      "epoch": 0.11364008300439242,
      "grad_norm": 3.906982660293579,
      "learning_rate": 9.837114448185659e-06,
      "loss": 0.0883,
      "step": 69440
    },
    {
      "epoch": 0.11367281344304576,
      "grad_norm": 2.338822603225708,
      "learning_rate": 9.837048555972142e-06,
      "loss": 0.098,
      "step": 69460
    },
    {
      "epoch": 0.11370554388169911,
      "grad_norm": 3.665138006210327,
      "learning_rate": 9.836982663758624e-06,
      "loss": 0.1136,
      "step": 69480
    },
    {
      "epoch": 0.11373827432035244,
      "grad_norm": 5.3276519775390625,
      "learning_rate": 9.836916771545108e-06,
      "loss": 0.0843,
      "step": 69500
    },
    {
      "epoch": 0.11377100475900578,
      "grad_norm": 2.2703096866607666,
      "learning_rate": 9.83685087933159e-06,
      "loss": 0.0876,
      "step": 69520
    },
    {
      "epoch": 0.11380373519765911,
      "grad_norm": 2.975619077682495,
      "learning_rate": 9.836784987118073e-06,
      "loss": 0.0961,
      "step": 69540
    },
    {
      "epoch": 0.11383646563631246,
      "grad_norm": 2.3219494819641113,
      "learning_rate": 9.836719094904555e-06,
      "loss": 0.1122,
      "step": 69560
    },
    {
      "epoch": 0.1138691960749658,
      "grad_norm": 4.735830307006836,
      "learning_rate": 9.836653202691039e-06,
      "loss": 0.0748,
      "step": 69580
    },
    {
      "epoch": 0.11390192651361913,
      "grad_norm": 3.079540491104126,
      "learning_rate": 9.83658731047752e-06,
      "loss": 0.0836,
      "step": 69600
    },
    {
      "epoch": 0.11393465695227248,
      "grad_norm": 3.073575258255005,
      "learning_rate": 9.836521418264004e-06,
      "loss": 0.0678,
      "step": 69620
    },
    {
      "epoch": 0.11396738739092581,
      "grad_norm": 2.9979231357574463,
      "learning_rate": 9.836455526050488e-06,
      "loss": 0.0942,
      "step": 69640
    },
    {
      "epoch": 0.11400011782957915,
      "grad_norm": 5.707431316375732,
      "learning_rate": 9.83638963383697e-06,
      "loss": 0.091,
      "step": 69660
    },
    {
      "epoch": 0.11403284826823248,
      "grad_norm": 5.185136795043945,
      "learning_rate": 9.836323741623453e-06,
      "loss": 0.0841,
      "step": 69680
    },
    {
      "epoch": 0.11406557870688583,
      "grad_norm": 2.45988392829895,
      "learning_rate": 9.836257849409935e-06,
      "loss": 0.0779,
      "step": 69700
    },
    {
      "epoch": 0.11409830914553917,
      "grad_norm": 1.714118242263794,
      "learning_rate": 9.836191957196419e-06,
      "loss": 0.0873,
      "step": 69720
    },
    {
      "epoch": 0.1141310395841925,
      "grad_norm": 5.885463237762451,
      "learning_rate": 9.836126064982902e-06,
      "loss": 0.067,
      "step": 69740
    },
    {
      "epoch": 0.11416377002284585,
      "grad_norm": 3.7628605365753174,
      "learning_rate": 9.836060172769384e-06,
      "loss": 0.0794,
      "step": 69760
    },
    {
      "epoch": 0.11419650046149918,
      "grad_norm": 3.0934252738952637,
      "learning_rate": 9.835994280555868e-06,
      "loss": 0.0767,
      "step": 69780
    },
    {
      "epoch": 0.11422923090015252,
      "grad_norm": 4.276547908782959,
      "learning_rate": 9.835928388342351e-06,
      "loss": 0.0782,
      "step": 69800
    },
    {
      "epoch": 0.11426196133880587,
      "grad_norm": 2.15596342086792,
      "learning_rate": 9.835862496128833e-06,
      "loss": 0.0774,
      "step": 69820
    },
    {
      "epoch": 0.1142946917774592,
      "grad_norm": 2.451028823852539,
      "learning_rate": 9.835796603915317e-06,
      "loss": 0.0722,
      "step": 69840
    },
    {
      "epoch": 0.11432742221611254,
      "grad_norm": 3.1693334579467773,
      "learning_rate": 9.835730711701799e-06,
      "loss": 0.0754,
      "step": 69860
    },
    {
      "epoch": 0.11436015265476587,
      "grad_norm": 5.14016580581665,
      "learning_rate": 9.835664819488282e-06,
      "loss": 0.0868,
      "step": 69880
    },
    {
      "epoch": 0.11439288309341922,
      "grad_norm": 2.3958349227905273,
      "learning_rate": 9.835598927274764e-06,
      "loss": 0.0902,
      "step": 69900
    },
    {
      "epoch": 0.11442561353207256,
      "grad_norm": 2.9707181453704834,
      "learning_rate": 9.835533035061248e-06,
      "loss": 0.0691,
      "step": 69920
    },
    {
      "epoch": 0.11445834397072589,
      "grad_norm": 1.0947778224945068,
      "learning_rate": 9.83546714284773e-06,
      "loss": 0.0905,
      "step": 69940
    },
    {
      "epoch": 0.11449107440937924,
      "grad_norm": 2.6215546131134033,
      "learning_rate": 9.835401250634213e-06,
      "loss": 0.0917,
      "step": 69960
    },
    {
      "epoch": 0.11452380484803257,
      "grad_norm": 3.139328956604004,
      "learning_rate": 9.835335358420697e-06,
      "loss": 0.0803,
      "step": 69980
    },
    {
      "epoch": 0.11455653528668591,
      "grad_norm": 9.38162899017334,
      "learning_rate": 9.835269466207179e-06,
      "loss": 0.0895,
      "step": 70000
    },
    {
      "epoch": 0.11458926572533926,
      "grad_norm": 5.2453203201293945,
      "learning_rate": 9.835203573993662e-06,
      "loss": 0.0866,
      "step": 70020
    },
    {
      "epoch": 0.11462199616399259,
      "grad_norm": 2.576051950454712,
      "learning_rate": 9.835137681780144e-06,
      "loss": 0.0761,
      "step": 70040
    },
    {
      "epoch": 0.11465472660264593,
      "grad_norm": 5.484974384307861,
      "learning_rate": 9.835071789566628e-06,
      "loss": 0.0752,
      "step": 70060
    },
    {
      "epoch": 0.11468745704129926,
      "grad_norm": 4.611161231994629,
      "learning_rate": 9.83500589735311e-06,
      "loss": 0.0717,
      "step": 70080
    },
    {
      "epoch": 0.1147201874799526,
      "grad_norm": 3.6258492469787598,
      "learning_rate": 9.834940005139593e-06,
      "loss": 0.088,
      "step": 70100
    },
    {
      "epoch": 0.11475291791860595,
      "grad_norm": 12.849776268005371,
      "learning_rate": 9.834874112926075e-06,
      "loss": 0.0762,
      "step": 70120
    },
    {
      "epoch": 0.11478564835725928,
      "grad_norm": 5.821876049041748,
      "learning_rate": 9.834808220712559e-06,
      "loss": 0.0926,
      "step": 70140
    },
    {
      "epoch": 0.11481837879591263,
      "grad_norm": 2.8446147441864014,
      "learning_rate": 9.834742328499042e-06,
      "loss": 0.0796,
      "step": 70160
    },
    {
      "epoch": 0.11485110923456596,
      "grad_norm": 4.02296257019043,
      "learning_rate": 9.834676436285524e-06,
      "loss": 0.0692,
      "step": 70180
    },
    {
      "epoch": 0.1148838396732193,
      "grad_norm": 1.6270806789398193,
      "learning_rate": 9.834610544072008e-06,
      "loss": 0.0689,
      "step": 70200
    },
    {
      "epoch": 0.11491657011187265,
      "grad_norm": 3.3956305980682373,
      "learning_rate": 9.834544651858492e-06,
      "loss": 0.075,
      "step": 70220
    },
    {
      "epoch": 0.11494930055052598,
      "grad_norm": 1.6261372566223145,
      "learning_rate": 9.834478759644973e-06,
      "loss": 0.0618,
      "step": 70240
    },
    {
      "epoch": 0.11498203098917932,
      "grad_norm": 4.308150291442871,
      "learning_rate": 9.834412867431457e-06,
      "loss": 0.081,
      "step": 70260
    },
    {
      "epoch": 0.11501476142783265,
      "grad_norm": 2.3012640476226807,
      "learning_rate": 9.834346975217939e-06,
      "loss": 0.0849,
      "step": 70280
    },
    {
      "epoch": 0.115047491866486,
      "grad_norm": 4.994641304016113,
      "learning_rate": 9.834281083004422e-06,
      "loss": 0.0839,
      "step": 70300
    },
    {
      "epoch": 0.11508022230513934,
      "grad_norm": 2.187878370285034,
      "learning_rate": 9.834215190790906e-06,
      "loss": 0.0698,
      "step": 70320
    },
    {
      "epoch": 0.11511295274379267,
      "grad_norm": 5.3392558097839355,
      "learning_rate": 9.834149298577388e-06,
      "loss": 0.0728,
      "step": 70340
    },
    {
      "epoch": 0.11514568318244602,
      "grad_norm": 3.1473388671875,
      "learning_rate": 9.834083406363872e-06,
      "loss": 0.079,
      "step": 70360
    },
    {
      "epoch": 0.11517841362109935,
      "grad_norm": 2.7005746364593506,
      "learning_rate": 9.834017514150353e-06,
      "loss": 0.0685,
      "step": 70380
    },
    {
      "epoch": 0.11521114405975269,
      "grad_norm": 1.5551906824111938,
      "learning_rate": 9.833951621936837e-06,
      "loss": 0.0677,
      "step": 70400
    },
    {
      "epoch": 0.11524387449840602,
      "grad_norm": 1.2256815433502197,
      "learning_rate": 9.833885729723319e-06,
      "loss": 0.0652,
      "step": 70420
    },
    {
      "epoch": 0.11527660493705937,
      "grad_norm": 4.083843231201172,
      "learning_rate": 9.833819837509802e-06,
      "loss": 0.0994,
      "step": 70440
    },
    {
      "epoch": 0.11530933537571271,
      "grad_norm": 1.5441746711730957,
      "learning_rate": 9.833753945296284e-06,
      "loss": 0.0847,
      "step": 70460
    },
    {
      "epoch": 0.11534206581436604,
      "grad_norm": 5.360360145568848,
      "learning_rate": 9.833688053082768e-06,
      "loss": 0.0895,
      "step": 70480
    },
    {
      "epoch": 0.11537479625301938,
      "grad_norm": 1.3156085014343262,
      "learning_rate": 9.83362216086925e-06,
      "loss": 0.0817,
      "step": 70500
    },
    {
      "epoch": 0.11540752669167272,
      "grad_norm": 3.9766931533813477,
      "learning_rate": 9.833556268655733e-06,
      "loss": 0.0799,
      "step": 70520
    },
    {
      "epoch": 0.11544025713032606,
      "grad_norm": 2.3593907356262207,
      "learning_rate": 9.833490376442217e-06,
      "loss": 0.073,
      "step": 70540
    },
    {
      "epoch": 0.1154729875689794,
      "grad_norm": 2.27919864654541,
      "learning_rate": 9.833424484228699e-06,
      "loss": 0.0801,
      "step": 70560
    },
    {
      "epoch": 0.11550571800763274,
      "grad_norm": 4.094422817230225,
      "learning_rate": 9.833358592015183e-06,
      "loss": 0.0864,
      "step": 70580
    },
    {
      "epoch": 0.11553844844628608,
      "grad_norm": 2.293459415435791,
      "learning_rate": 9.833292699801666e-06,
      "loss": 0.0738,
      "step": 70600
    },
    {
      "epoch": 0.11557117888493941,
      "grad_norm": 2.362342596054077,
      "learning_rate": 9.833226807588148e-06,
      "loss": 0.0968,
      "step": 70620
    },
    {
      "epoch": 0.11560390932359275,
      "grad_norm": 1.810653805732727,
      "learning_rate": 9.833160915374632e-06,
      "loss": 0.0769,
      "step": 70640
    },
    {
      "epoch": 0.1156366397622461,
      "grad_norm": 3.1706199645996094,
      "learning_rate": 9.833095023161113e-06,
      "loss": 0.0867,
      "step": 70660
    },
    {
      "epoch": 0.11566937020089943,
      "grad_norm": 4.56528377532959,
      "learning_rate": 9.833029130947597e-06,
      "loss": 0.0843,
      "step": 70680
    },
    {
      "epoch": 0.11570210063955277,
      "grad_norm": 4.536510944366455,
      "learning_rate": 9.83296323873408e-06,
      "loss": 0.0867,
      "step": 70700
    },
    {
      "epoch": 0.1157348310782061,
      "grad_norm": 2.366956949234009,
      "learning_rate": 9.832897346520563e-06,
      "loss": 0.0683,
      "step": 70720
    },
    {
      "epoch": 0.11576756151685945,
      "grad_norm": 0.9185577630996704,
      "learning_rate": 9.832831454307046e-06,
      "loss": 0.0666,
      "step": 70740
    },
    {
      "epoch": 0.1158002919555128,
      "grad_norm": 7.943125247955322,
      "learning_rate": 9.832765562093528e-06,
      "loss": 0.0764,
      "step": 70760
    },
    {
      "epoch": 0.11583302239416612,
      "grad_norm": 17.78662109375,
      "learning_rate": 9.832699669880012e-06,
      "loss": 0.083,
      "step": 70780
    },
    {
      "epoch": 0.11586575283281947,
      "grad_norm": 5.631485939025879,
      "learning_rate": 9.832633777666494e-06,
      "loss": 0.0953,
      "step": 70800
    },
    {
      "epoch": 0.1158984832714728,
      "grad_norm": 2.305562734603882,
      "learning_rate": 9.832567885452977e-06,
      "loss": 0.0829,
      "step": 70820
    },
    {
      "epoch": 0.11593121371012614,
      "grad_norm": 2.3578498363494873,
      "learning_rate": 9.832501993239459e-06,
      "loss": 0.1019,
      "step": 70840
    },
    {
      "epoch": 0.11596394414877949,
      "grad_norm": 1.9047518968582153,
      "learning_rate": 9.832436101025943e-06,
      "loss": 0.0723,
      "step": 70860
    },
    {
      "epoch": 0.11599667458743282,
      "grad_norm": 1.5391632318496704,
      "learning_rate": 9.832370208812424e-06,
      "loss": 0.0559,
      "step": 70880
    },
    {
      "epoch": 0.11602940502608616,
      "grad_norm": 4.450434684753418,
      "learning_rate": 9.832304316598908e-06,
      "loss": 0.0815,
      "step": 70900
    },
    {
      "epoch": 0.1160621354647395,
      "grad_norm": 4.209890842437744,
      "learning_rate": 9.83223842438539e-06,
      "loss": 0.0754,
      "step": 70920
    },
    {
      "epoch": 0.11609486590339284,
      "grad_norm": 3.265993118286133,
      "learning_rate": 9.832172532171874e-06,
      "loss": 0.0754,
      "step": 70940
    },
    {
      "epoch": 0.11612759634204618,
      "grad_norm": 7.031725883483887,
      "learning_rate": 9.832106639958357e-06,
      "loss": 0.0811,
      "step": 70960
    },
    {
      "epoch": 0.11616032678069951,
      "grad_norm": 2.3651492595672607,
      "learning_rate": 9.832040747744839e-06,
      "loss": 0.0802,
      "step": 70980
    },
    {
      "epoch": 0.11619305721935286,
      "grad_norm": 0.5619605183601379,
      "learning_rate": 9.831974855531323e-06,
      "loss": 0.0656,
      "step": 71000
    },
    {
      "epoch": 0.11622578765800619,
      "grad_norm": 2.749824047088623,
      "learning_rate": 9.831908963317806e-06,
      "loss": 0.0916,
      "step": 71020
    },
    {
      "epoch": 0.11625851809665953,
      "grad_norm": 11.324130058288574,
      "learning_rate": 9.83184307110429e-06,
      "loss": 0.0909,
      "step": 71040
    },
    {
      "epoch": 0.11629124853531288,
      "grad_norm": 4.917883396148682,
      "learning_rate": 9.831777178890772e-06,
      "loss": 0.103,
      "step": 71060
    },
    {
      "epoch": 0.11632397897396621,
      "grad_norm": 1.9267698526382446,
      "learning_rate": 9.831711286677255e-06,
      "loss": 0.0854,
      "step": 71080
    },
    {
      "epoch": 0.11635670941261955,
      "grad_norm": 2.5979785919189453,
      "learning_rate": 9.831645394463737e-06,
      "loss": 0.0819,
      "step": 71100
    },
    {
      "epoch": 0.11638943985127288,
      "grad_norm": 10.358926773071289,
      "learning_rate": 9.83157950225022e-06,
      "loss": 0.1022,
      "step": 71120
    },
    {
      "epoch": 0.11642217028992623,
      "grad_norm": 3.7635741233825684,
      "learning_rate": 9.831513610036703e-06,
      "loss": 0.0733,
      "step": 71140
    },
    {
      "epoch": 0.11645490072857956,
      "grad_norm": 3.1541764736175537,
      "learning_rate": 9.831447717823186e-06,
      "loss": 0.0794,
      "step": 71160
    },
    {
      "epoch": 0.1164876311672329,
      "grad_norm": 0.4219098389148712,
      "learning_rate": 9.831381825609668e-06,
      "loss": 0.0941,
      "step": 71180
    },
    {
      "epoch": 0.11652036160588625,
      "grad_norm": 3.941295862197876,
      "learning_rate": 9.831315933396152e-06,
      "loss": 0.0666,
      "step": 71200
    },
    {
      "epoch": 0.11655309204453958,
      "grad_norm": 6.087751865386963,
      "learning_rate": 9.831250041182634e-06,
      "loss": 0.0912,
      "step": 71220
    },
    {
      "epoch": 0.11658582248319292,
      "grad_norm": 0.8788993954658508,
      "learning_rate": 9.831184148969117e-06,
      "loss": 0.0771,
      "step": 71240
    },
    {
      "epoch": 0.11661855292184625,
      "grad_norm": 3.814671039581299,
      "learning_rate": 9.831118256755599e-06,
      "loss": 0.0682,
      "step": 71260
    },
    {
      "epoch": 0.1166512833604996,
      "grad_norm": 4.134276866912842,
      "learning_rate": 9.831052364542083e-06,
      "loss": 0.084,
      "step": 71280
    },
    {
      "epoch": 0.11668401379915294,
      "grad_norm": 6.187695026397705,
      "learning_rate": 9.830986472328565e-06,
      "loss": 0.0885,
      "step": 71300
    },
    {
      "epoch": 0.11671674423780627,
      "grad_norm": 1.8821446895599365,
      "learning_rate": 9.830920580115048e-06,
      "loss": 0.089,
      "step": 71320
    },
    {
      "epoch": 0.11674947467645962,
      "grad_norm": 8.782118797302246,
      "learning_rate": 9.830854687901532e-06,
      "loss": 0.0843,
      "step": 71340
    },
    {
      "epoch": 0.11678220511511295,
      "grad_norm": 3.2012693881988525,
      "learning_rate": 9.830788795688014e-06,
      "loss": 0.0676,
      "step": 71360
    },
    {
      "epoch": 0.11681493555376629,
      "grad_norm": 13.405035972595215,
      "learning_rate": 9.830722903474497e-06,
      "loss": 0.0979,
      "step": 71380
    },
    {
      "epoch": 0.11684766599241964,
      "grad_norm": 8.282715797424316,
      "learning_rate": 9.83065701126098e-06,
      "loss": 0.094,
      "step": 71400
    },
    {
      "epoch": 0.11688039643107297,
      "grad_norm": 4.4399213790893555,
      "learning_rate": 9.830591119047463e-06,
      "loss": 0.0818,
      "step": 71420
    },
    {
      "epoch": 0.11691312686972631,
      "grad_norm": 4.6700968742370605,
      "learning_rate": 9.830525226833946e-06,
      "loss": 0.0937,
      "step": 71440
    },
    {
      "epoch": 0.11694585730837964,
      "grad_norm": 2.1062095165252686,
      "learning_rate": 9.83045933462043e-06,
      "loss": 0.0823,
      "step": 71460
    },
    {
      "epoch": 0.11697858774703299,
      "grad_norm": 13.91811752319336,
      "learning_rate": 9.830393442406912e-06,
      "loss": 0.0772,
      "step": 71480
    },
    {
      "epoch": 0.11701131818568633,
      "grad_norm": 4.384855270385742,
      "learning_rate": 9.830327550193395e-06,
      "loss": 0.096,
      "step": 71500
    },
    {
      "epoch": 0.11704404862433966,
      "grad_norm": 2.6210618019104004,
      "learning_rate": 9.830261657979877e-06,
      "loss": 0.0871,
      "step": 71520
    },
    {
      "epoch": 0.117076779062993,
      "grad_norm": 14.845714569091797,
      "learning_rate": 9.83019576576636e-06,
      "loss": 0.0937,
      "step": 71540
    },
    {
      "epoch": 0.11710950950164634,
      "grad_norm": 3.073533296585083,
      "learning_rate": 9.830129873552843e-06,
      "loss": 0.0724,
      "step": 71560
    },
    {
      "epoch": 0.11714223994029968,
      "grad_norm": 4.357224464416504,
      "learning_rate": 9.830063981339326e-06,
      "loss": 0.0818,
      "step": 71580
    },
    {
      "epoch": 0.11717497037895303,
      "grad_norm": 7.444167137145996,
      "learning_rate": 9.829998089125808e-06,
      "loss": 0.0758,
      "step": 71600
    },
    {
      "epoch": 0.11720770081760636,
      "grad_norm": 3.9515109062194824,
      "learning_rate": 9.829932196912292e-06,
      "loss": 0.0698,
      "step": 71620
    },
    {
      "epoch": 0.1172404312562597,
      "grad_norm": 5.42885160446167,
      "learning_rate": 9.829866304698774e-06,
      "loss": 0.0857,
      "step": 71640
    },
    {
      "epoch": 0.11727316169491303,
      "grad_norm": 2.8234989643096924,
      "learning_rate": 9.829800412485257e-06,
      "loss": 0.065,
      "step": 71660
    },
    {
      "epoch": 0.11730589213356638,
      "grad_norm": 2.291546583175659,
      "learning_rate": 9.829734520271739e-06,
      "loss": 0.0844,
      "step": 71680
    },
    {
      "epoch": 0.11733862257221972,
      "grad_norm": 3.3663249015808105,
      "learning_rate": 9.829668628058223e-06,
      "loss": 0.0673,
      "step": 71700
    },
    {
      "epoch": 0.11737135301087305,
      "grad_norm": 5.051304817199707,
      "learning_rate": 9.829602735844706e-06,
      "loss": 0.0811,
      "step": 71720
    },
    {
      "epoch": 0.1174040834495264,
      "grad_norm": 4.731973171234131,
      "learning_rate": 9.829536843631188e-06,
      "loss": 0.0913,
      "step": 71740
    },
    {
      "epoch": 0.11743681388817973,
      "grad_norm": 2.9955692291259766,
      "learning_rate": 9.829470951417672e-06,
      "loss": 0.0962,
      "step": 71760
    },
    {
      "epoch": 0.11746954432683307,
      "grad_norm": 1.5887172222137451,
      "learning_rate": 9.829405059204155e-06,
      "loss": 0.0674,
      "step": 71780
    },
    {
      "epoch": 0.1175022747654864,
      "grad_norm": 3.4456288814544678,
      "learning_rate": 9.829339166990637e-06,
      "loss": 0.1003,
      "step": 71800
    },
    {
      "epoch": 0.11753500520413974,
      "grad_norm": 2.9126787185668945,
      "learning_rate": 9.829273274777121e-06,
      "loss": 0.0712,
      "step": 71820
    },
    {
      "epoch": 0.11756773564279309,
      "grad_norm": 3.874530553817749,
      "learning_rate": 9.829207382563604e-06,
      "loss": 0.0674,
      "step": 71840
    },
    {
      "epoch": 0.11760046608144642,
      "grad_norm": 5.725199222564697,
      "learning_rate": 9.829141490350086e-06,
      "loss": 0.0768,
      "step": 71860
    },
    {
      "epoch": 0.11763319652009976,
      "grad_norm": 3.0429201126098633,
      "learning_rate": 9.82907559813657e-06,
      "loss": 0.0787,
      "step": 71880
    },
    {
      "epoch": 0.1176659269587531,
      "grad_norm": 6.235903739929199,
      "learning_rate": 9.829009705923052e-06,
      "loss": 0.0914,
      "step": 71900
    },
    {
      "epoch": 0.11769865739740644,
      "grad_norm": 2.395681142807007,
      "learning_rate": 9.828943813709535e-06,
      "loss": 0.0841,
      "step": 71920
    },
    {
      "epoch": 0.11773138783605978,
      "grad_norm": 1.4488463401794434,
      "learning_rate": 9.828877921496017e-06,
      "loss": 0.0655,
      "step": 71940
    },
    {
      "epoch": 0.11776411827471311,
      "grad_norm": 2.7701902389526367,
      "learning_rate": 9.828812029282501e-06,
      "loss": 0.0864,
      "step": 71960
    },
    {
      "epoch": 0.11779684871336646,
      "grad_norm": 2.4178085327148438,
      "learning_rate": 9.828746137068983e-06,
      "loss": 0.0892,
      "step": 71980
    },
    {
      "epoch": 0.11782957915201979,
      "grad_norm": 4.278617858886719,
      "learning_rate": 9.828680244855466e-06,
      "loss": 0.0884,
      "step": 72000
    },
    {
      "epoch": 0.11786230959067313,
      "grad_norm": 6.008898735046387,
      "learning_rate": 9.828614352641948e-06,
      "loss": 0.0732,
      "step": 72020
    },
    {
      "epoch": 0.11789504002932648,
      "grad_norm": 3.956624984741211,
      "learning_rate": 9.828548460428432e-06,
      "loss": 0.08,
      "step": 72040
    },
    {
      "epoch": 0.11792777046797981,
      "grad_norm": 3.6389973163604736,
      "learning_rate": 9.828482568214914e-06,
      "loss": 0.0845,
      "step": 72060
    },
    {
      "epoch": 0.11796050090663315,
      "grad_norm": 4.888363361358643,
      "learning_rate": 9.828416676001397e-06,
      "loss": 0.0778,
      "step": 72080
    },
    {
      "epoch": 0.11799323134528648,
      "grad_norm": 1.4312527179718018,
      "learning_rate": 9.828350783787881e-06,
      "loss": 0.0757,
      "step": 72100
    },
    {
      "epoch": 0.11802596178393983,
      "grad_norm": 5.816091537475586,
      "learning_rate": 9.828284891574363e-06,
      "loss": 0.0682,
      "step": 72120
    },
    {
      "epoch": 0.11805869222259317,
      "grad_norm": 2.9807815551757812,
      "learning_rate": 9.828218999360846e-06,
      "loss": 0.077,
      "step": 72140
    },
    {
      "epoch": 0.1180914226612465,
      "grad_norm": 1.296269416809082,
      "learning_rate": 9.828153107147328e-06,
      "loss": 0.0754,
      "step": 72160
    },
    {
      "epoch": 0.11812415309989985,
      "grad_norm": 2.3861567974090576,
      "learning_rate": 9.828087214933812e-06,
      "loss": 0.0778,
      "step": 72180
    },
    {
      "epoch": 0.11815688353855318,
      "grad_norm": 0.9765966534614563,
      "learning_rate": 9.828021322720295e-06,
      "loss": 0.0648,
      "step": 72200
    },
    {
      "epoch": 0.11818961397720652,
      "grad_norm": 3.032108783721924,
      "learning_rate": 9.827955430506777e-06,
      "loss": 0.0844,
      "step": 72220
    },
    {
      "epoch": 0.11822234441585987,
      "grad_norm": 3.1008594036102295,
      "learning_rate": 9.827889538293261e-06,
      "loss": 0.0746,
      "step": 72240
    },
    {
      "epoch": 0.1182550748545132,
      "grad_norm": 7.728011131286621,
      "learning_rate": 9.827823646079745e-06,
      "loss": 0.0724,
      "step": 72260
    },
    {
      "epoch": 0.11828780529316654,
      "grad_norm": 2.9022579193115234,
      "learning_rate": 9.827757753866226e-06,
      "loss": 0.0801,
      "step": 72280
    },
    {
      "epoch": 0.11832053573181987,
      "grad_norm": 5.908649444580078,
      "learning_rate": 9.82769186165271e-06,
      "loss": 0.0876,
      "step": 72300
    },
    {
      "epoch": 0.11835326617047322,
      "grad_norm": 3.960819721221924,
      "learning_rate": 9.827625969439192e-06,
      "loss": 0.0638,
      "step": 72320
    },
    {
      "epoch": 0.11838599660912656,
      "grad_norm": 2.246636152267456,
      "learning_rate": 9.827560077225675e-06,
      "loss": 0.0686,
      "step": 72340
    },
    {
      "epoch": 0.11841872704777989,
      "grad_norm": 3.725071430206299,
      "learning_rate": 9.827494185012157e-06,
      "loss": 0.0826,
      "step": 72360
    },
    {
      "epoch": 0.11845145748643324,
      "grad_norm": 0.5412582755088806,
      "learning_rate": 9.827428292798641e-06,
      "loss": 0.0783,
      "step": 72380
    },
    {
      "epoch": 0.11848418792508657,
      "grad_norm": 2.4803335666656494,
      "learning_rate": 9.827362400585123e-06,
      "loss": 0.0854,
      "step": 72400
    },
    {
      "epoch": 0.11851691836373991,
      "grad_norm": 3.1639177799224854,
      "learning_rate": 9.827296508371606e-06,
      "loss": 0.0629,
      "step": 72420
    },
    {
      "epoch": 0.11854964880239326,
      "grad_norm": 1.9917316436767578,
      "learning_rate": 9.82723061615809e-06,
      "loss": 0.0777,
      "step": 72440
    },
    {
      "epoch": 0.11858237924104659,
      "grad_norm": 4.945435523986816,
      "learning_rate": 9.827164723944572e-06,
      "loss": 0.0945,
      "step": 72460
    },
    {
      "epoch": 0.11861510967969993,
      "grad_norm": 5.001636981964111,
      "learning_rate": 9.827098831731056e-06,
      "loss": 0.087,
      "step": 72480
    },
    {
      "epoch": 0.11864784011835326,
      "grad_norm": 3.73268461227417,
      "learning_rate": 9.827032939517537e-06,
      "loss": 0.0954,
      "step": 72500
    },
    {
      "epoch": 0.1186805705570066,
      "grad_norm": 3.8352444171905518,
      "learning_rate": 9.826967047304021e-06,
      "loss": 0.0854,
      "step": 72520
    },
    {
      "epoch": 0.11871330099565994,
      "grad_norm": 2.8066675662994385,
      "learning_rate": 9.826901155090503e-06,
      "loss": 0.0683,
      "step": 72540
    },
    {
      "epoch": 0.11874603143431328,
      "grad_norm": 3.355156898498535,
      "learning_rate": 9.826835262876986e-06,
      "loss": 0.0806,
      "step": 72560
    },
    {
      "epoch": 0.11877876187296663,
      "grad_norm": 2.4909451007843018,
      "learning_rate": 9.82676937066347e-06,
      "loss": 0.0841,
      "step": 72580
    },
    {
      "epoch": 0.11881149231161996,
      "grad_norm": 2.6765854358673096,
      "learning_rate": 9.826703478449952e-06,
      "loss": 0.0647,
      "step": 72600
    },
    {
      "epoch": 0.1188442227502733,
      "grad_norm": 1.436600923538208,
      "learning_rate": 9.826637586236436e-06,
      "loss": 0.1109,
      "step": 72620
    },
    {
      "epoch": 0.11887695318892663,
      "grad_norm": 1.9746299982070923,
      "learning_rate": 9.826571694022919e-06,
      "loss": 0.0924,
      "step": 72640
    },
    {
      "epoch": 0.11890968362757998,
      "grad_norm": 4.487030506134033,
      "learning_rate": 9.826505801809401e-06,
      "loss": 0.0819,
      "step": 72660
    },
    {
      "epoch": 0.11894241406623332,
      "grad_norm": 4.703734874725342,
      "learning_rate": 9.826439909595885e-06,
      "loss": 0.0867,
      "step": 72680
    },
    {
      "epoch": 0.11897514450488665,
      "grad_norm": 3.269940137863159,
      "learning_rate": 9.826374017382366e-06,
      "loss": 0.0934,
      "step": 72700
    },
    {
      "epoch": 0.11900787494354,
      "grad_norm": 4.526777267456055,
      "learning_rate": 9.82630812516885e-06,
      "loss": 0.0783,
      "step": 72720
    },
    {
      "epoch": 0.11904060538219333,
      "grad_norm": 2.0787692070007324,
      "learning_rate": 9.826242232955332e-06,
      "loss": 0.0835,
      "step": 72740
    },
    {
      "epoch": 0.11907333582084667,
      "grad_norm": 3.8794167041778564,
      "learning_rate": 9.826176340741816e-06,
      "loss": 0.0926,
      "step": 72760
    },
    {
      "epoch": 0.11910606625950002,
      "grad_norm": 1.994451880455017,
      "learning_rate": 9.826110448528299e-06,
      "loss": 0.0875,
      "step": 72780
    },
    {
      "epoch": 0.11913879669815335,
      "grad_norm": 4.700717449188232,
      "learning_rate": 9.826044556314781e-06,
      "loss": 0.0801,
      "step": 72800
    },
    {
      "epoch": 0.11917152713680669,
      "grad_norm": 3.0843513011932373,
      "learning_rate": 9.825978664101265e-06,
      "loss": 0.0658,
      "step": 72820
    },
    {
      "epoch": 0.11920425757546002,
      "grad_norm": 1.8605602979660034,
      "learning_rate": 9.825912771887747e-06,
      "loss": 0.0869,
      "step": 72840
    },
    {
      "epoch": 0.11923698801411337,
      "grad_norm": 1.6664364337921143,
      "learning_rate": 9.82584687967423e-06,
      "loss": 0.0713,
      "step": 72860
    },
    {
      "epoch": 0.11926971845276671,
      "grad_norm": 6.262058258056641,
      "learning_rate": 9.825780987460712e-06,
      "loss": 0.0866,
      "step": 72880
    },
    {
      "epoch": 0.11930244889142004,
      "grad_norm": 2.3549909591674805,
      "learning_rate": 9.825715095247196e-06,
      "loss": 0.07,
      "step": 72900
    },
    {
      "epoch": 0.11933517933007338,
      "grad_norm": 2.9427695274353027,
      "learning_rate": 9.825649203033677e-06,
      "loss": 0.0716,
      "step": 72920
    },
    {
      "epoch": 0.11936790976872672,
      "grad_norm": 13.0538969039917,
      "learning_rate": 9.825583310820161e-06,
      "loss": 0.0758,
      "step": 72940
    },
    {
      "epoch": 0.11940064020738006,
      "grad_norm": 4.690364837646484,
      "learning_rate": 9.825517418606643e-06,
      "loss": 0.086,
      "step": 72960
    },
    {
      "epoch": 0.1194333706460334,
      "grad_norm": 5.173039436340332,
      "learning_rate": 9.825451526393127e-06,
      "loss": 0.0642,
      "step": 72980
    },
    {
      "epoch": 0.11946610108468673,
      "grad_norm": 0.2849231958389282,
      "learning_rate": 9.82538563417961e-06,
      "loss": 0.0704,
      "step": 73000
    },
    {
      "epoch": 0.11949883152334008,
      "grad_norm": 1.9192980527877808,
      "learning_rate": 9.825319741966092e-06,
      "loss": 0.0703,
      "step": 73020
    },
    {
      "epoch": 0.11953156196199341,
      "grad_norm": 2.836124897003174,
      "learning_rate": 9.825253849752576e-06,
      "loss": 0.0879,
      "step": 73040
    },
    {
      "epoch": 0.11956429240064675,
      "grad_norm": 1.5398848056793213,
      "learning_rate": 9.82518795753906e-06,
      "loss": 0.0917,
      "step": 73060
    },
    {
      "epoch": 0.1195970228393001,
      "grad_norm": 2.9230730533599854,
      "learning_rate": 9.825122065325541e-06,
      "loss": 0.0866,
      "step": 73080
    },
    {
      "epoch": 0.11962975327795343,
      "grad_norm": 2.14184308052063,
      "learning_rate": 9.825056173112025e-06,
      "loss": 0.068,
      "step": 73100
    },
    {
      "epoch": 0.11966248371660677,
      "grad_norm": 13.523097038269043,
      "learning_rate": 9.824990280898507e-06,
      "loss": 0.0783,
      "step": 73120
    },
    {
      "epoch": 0.1196952141552601,
      "grad_norm": 2.2885854244232178,
      "learning_rate": 9.82492438868499e-06,
      "loss": 0.0831,
      "step": 73140
    },
    {
      "epoch": 0.11972794459391345,
      "grad_norm": 2.022108554840088,
      "learning_rate": 9.824858496471474e-06,
      "loss": 0.0697,
      "step": 73160
    },
    {
      "epoch": 0.11976067503256678,
      "grad_norm": 4.5188164710998535,
      "learning_rate": 9.824792604257956e-06,
      "loss": 0.0883,
      "step": 73180
    },
    {
      "epoch": 0.11979340547122012,
      "grad_norm": 4.898582935333252,
      "learning_rate": 9.82472671204444e-06,
      "loss": 0.0819,
      "step": 73200
    },
    {
      "epoch": 0.11982613590987347,
      "grad_norm": 2.718080997467041,
      "learning_rate": 9.824660819830921e-06,
      "loss": 0.075,
      "step": 73220
    },
    {
      "epoch": 0.1198588663485268,
      "grad_norm": 4.575155258178711,
      "learning_rate": 9.824594927617405e-06,
      "loss": 0.0826,
      "step": 73240
    },
    {
      "epoch": 0.11989159678718014,
      "grad_norm": 1.8821381330490112,
      "learning_rate": 9.824529035403887e-06,
      "loss": 0.088,
      "step": 73260
    },
    {
      "epoch": 0.11992432722583347,
      "grad_norm": 4.716497898101807,
      "learning_rate": 9.82446314319037e-06,
      "loss": 0.0855,
      "step": 73280
    },
    {
      "epoch": 0.11995705766448682,
      "grad_norm": 4.7839155197143555,
      "learning_rate": 9.824397250976852e-06,
      "loss": 0.0693,
      "step": 73300
    },
    {
      "epoch": 0.11998978810314016,
      "grad_norm": 3.1806352138519287,
      "learning_rate": 9.824331358763336e-06,
      "loss": 0.0847,
      "step": 73320
    },
    {
      "epoch": 0.1200225185417935,
      "grad_norm": 2.415797710418701,
      "learning_rate": 9.824265466549818e-06,
      "loss": 0.0881,
      "step": 73340
    },
    {
      "epoch": 0.12005524898044684,
      "grad_norm": 7.461503028869629,
      "learning_rate": 9.824199574336301e-06,
      "loss": 0.0798,
      "step": 73360
    },
    {
      "epoch": 0.12008797941910017,
      "grad_norm": 1.5177465677261353,
      "learning_rate": 9.824133682122785e-06,
      "loss": 0.0763,
      "step": 73380
    },
    {
      "epoch": 0.12012070985775351,
      "grad_norm": 1.768959641456604,
      "learning_rate": 9.824067789909267e-06,
      "loss": 0.0729,
      "step": 73400
    },
    {
      "epoch": 0.12015344029640686,
      "grad_norm": 2.0554704666137695,
      "learning_rate": 9.82400189769575e-06,
      "loss": 0.0826,
      "step": 73420
    },
    {
      "epoch": 0.12018617073506019,
      "grad_norm": 11.227752685546875,
      "learning_rate": 9.823936005482234e-06,
      "loss": 0.0956,
      "step": 73440
    },
    {
      "epoch": 0.12021890117371353,
      "grad_norm": 3.482424259185791,
      "learning_rate": 9.823870113268716e-06,
      "loss": 0.0753,
      "step": 73460
    },
    {
      "epoch": 0.12025163161236686,
      "grad_norm": 1.0946460962295532,
      "learning_rate": 9.8238042210552e-06,
      "loss": 0.0825,
      "step": 73480
    },
    {
      "epoch": 0.12028436205102021,
      "grad_norm": 1.8982516527175903,
      "learning_rate": 9.823738328841683e-06,
      "loss": 0.0796,
      "step": 73500
    },
    {
      "epoch": 0.12031709248967355,
      "grad_norm": 5.193548679351807,
      "learning_rate": 9.823672436628165e-06,
      "loss": 0.0638,
      "step": 73520
    },
    {
      "epoch": 0.12034982292832688,
      "grad_norm": 3.7972030639648438,
      "learning_rate": 9.823606544414648e-06,
      "loss": 0.0613,
      "step": 73540
    },
    {
      "epoch": 0.12038255336698023,
      "grad_norm": 2.0127627849578857,
      "learning_rate": 9.82354065220113e-06,
      "loss": 0.07,
      "step": 73560
    },
    {
      "epoch": 0.12041528380563356,
      "grad_norm": 2.8567113876342773,
      "learning_rate": 9.823474759987614e-06,
      "loss": 0.0715,
      "step": 73580
    },
    {
      "epoch": 0.1204480142442869,
      "grad_norm": 3.593153715133667,
      "learning_rate": 9.823408867774096e-06,
      "loss": 0.1004,
      "step": 73600
    },
    {
      "epoch": 0.12048074468294025,
      "grad_norm": 6.878698825836182,
      "learning_rate": 9.82334297556058e-06,
      "loss": 0.0841,
      "step": 73620
    },
    {
      "epoch": 0.12051347512159358,
      "grad_norm": 2.8439652919769287,
      "learning_rate": 9.823277083347061e-06,
      "loss": 0.0832,
      "step": 73640
    },
    {
      "epoch": 0.12054620556024692,
      "grad_norm": 2.530097484588623,
      "learning_rate": 9.823211191133545e-06,
      "loss": 0.0727,
      "step": 73660
    },
    {
      "epoch": 0.12057893599890025,
      "grad_norm": 4.365729808807373,
      "learning_rate": 9.823145298920027e-06,
      "loss": 0.0773,
      "step": 73680
    },
    {
      "epoch": 0.1206116664375536,
      "grad_norm": 8.8096284866333,
      "learning_rate": 9.82307940670651e-06,
      "loss": 0.0773,
      "step": 73700
    },
    {
      "epoch": 0.12064439687620694,
      "grad_norm": 4.117755889892578,
      "learning_rate": 9.823013514492992e-06,
      "loss": 0.0816,
      "step": 73720
    },
    {
      "epoch": 0.12067712731486027,
      "grad_norm": 4.137965679168701,
      "learning_rate": 9.822947622279476e-06,
      "loss": 0.0835,
      "step": 73740
    },
    {
      "epoch": 0.12070985775351362,
      "grad_norm": 6.470649242401123,
      "learning_rate": 9.822881730065958e-06,
      "loss": 0.0845,
      "step": 73760
    },
    {
      "epoch": 0.12074258819216695,
      "grad_norm": 2.8854544162750244,
      "learning_rate": 9.822815837852441e-06,
      "loss": 0.0745,
      "step": 73780
    },
    {
      "epoch": 0.12077531863082029,
      "grad_norm": 4.840182304382324,
      "learning_rate": 9.822749945638925e-06,
      "loss": 0.0753,
      "step": 73800
    },
    {
      "epoch": 0.12080804906947364,
      "grad_norm": 2.0471653938293457,
      "learning_rate": 9.822684053425408e-06,
      "loss": 0.0798,
      "step": 73820
    },
    {
      "epoch": 0.12084077950812697,
      "grad_norm": 18.27364730834961,
      "learning_rate": 9.82261816121189e-06,
      "loss": 0.0913,
      "step": 73840
    },
    {
      "epoch": 0.12087350994678031,
      "grad_norm": 4.187452793121338,
      "learning_rate": 9.822552268998374e-06,
      "loss": 0.0731,
      "step": 73860
    },
    {
      "epoch": 0.12090624038543364,
      "grad_norm": 14.756621360778809,
      "learning_rate": 9.822486376784857e-06,
      "loss": 0.0861,
      "step": 73880
    },
    {
      "epoch": 0.12093897082408699,
      "grad_norm": 5.358825206756592,
      "learning_rate": 9.82242048457134e-06,
      "loss": 0.086,
      "step": 73900
    },
    {
      "epoch": 0.12097170126274032,
      "grad_norm": 15.506087303161621,
      "learning_rate": 9.822354592357823e-06,
      "loss": 0.0775,
      "step": 73920
    },
    {
      "epoch": 0.12100443170139366,
      "grad_norm": 3.1848342418670654,
      "learning_rate": 9.822288700144305e-06,
      "loss": 0.0848,
      "step": 73940
    },
    {
      "epoch": 0.121037162140047,
      "grad_norm": 1.4159306287765503,
      "learning_rate": 9.822222807930788e-06,
      "loss": 0.0743,
      "step": 73960
    },
    {
      "epoch": 0.12106989257870034,
      "grad_norm": 1.158510446548462,
      "learning_rate": 9.82215691571727e-06,
      "loss": 0.0821,
      "step": 73980
    },
    {
      "epoch": 0.12110262301735368,
      "grad_norm": 10.218517303466797,
      "learning_rate": 9.822091023503754e-06,
      "loss": 0.0714,
      "step": 74000
    },
    {
      "epoch": 0.12113535345600701,
      "grad_norm": 2.5594820976257324,
      "learning_rate": 9.822025131290236e-06,
      "loss": 0.0795,
      "step": 74020
    },
    {
      "epoch": 0.12116808389466036,
      "grad_norm": 1.6618165969848633,
      "learning_rate": 9.82195923907672e-06,
      "loss": 0.0805,
      "step": 74040
    },
    {
      "epoch": 0.1212008143333137,
      "grad_norm": 2.659822463989258,
      "learning_rate": 9.821893346863201e-06,
      "loss": 0.0772,
      "step": 74060
    },
    {
      "epoch": 0.12123354477196703,
      "grad_norm": 2.448744773864746,
      "learning_rate": 9.821827454649685e-06,
      "loss": 0.0712,
      "step": 74080
    },
    {
      "epoch": 0.12126627521062038,
      "grad_norm": 3.0537023544311523,
      "learning_rate": 9.821761562436167e-06,
      "loss": 0.0802,
      "step": 74100
    },
    {
      "epoch": 0.1212990056492737,
      "grad_norm": 5.619791507720947,
      "learning_rate": 9.82169567022265e-06,
      "loss": 0.0793,
      "step": 74120
    },
    {
      "epoch": 0.12133173608792705,
      "grad_norm": 4.603172779083252,
      "learning_rate": 9.821629778009132e-06,
      "loss": 0.0674,
      "step": 74140
    },
    {
      "epoch": 0.1213644665265804,
      "grad_norm": 2.8155789375305176,
      "learning_rate": 9.821563885795616e-06,
      "loss": 0.0719,
      "step": 74160
    },
    {
      "epoch": 0.12139719696523373,
      "grad_norm": 0.8795742988586426,
      "learning_rate": 9.8214979935821e-06,
      "loss": 0.0718,
      "step": 74180
    },
    {
      "epoch": 0.12142992740388707,
      "grad_norm": 3.9837822914123535,
      "learning_rate": 9.821432101368581e-06,
      "loss": 0.0898,
      "step": 74200
    },
    {
      "epoch": 0.1214626578425404,
      "grad_norm": 3.7177252769470215,
      "learning_rate": 9.821366209155065e-06,
      "loss": 0.0738,
      "step": 74220
    },
    {
      "epoch": 0.12149538828119374,
      "grad_norm": 5.453120231628418,
      "learning_rate": 9.821300316941548e-06,
      "loss": 0.0826,
      "step": 74240
    },
    {
      "epoch": 0.12152811871984709,
      "grad_norm": 1.2199156284332275,
      "learning_rate": 9.82123442472803e-06,
      "loss": 0.0737,
      "step": 74260
    },
    {
      "epoch": 0.12156084915850042,
      "grad_norm": 3.751913547515869,
      "learning_rate": 9.821168532514514e-06,
      "loss": 0.0856,
      "step": 74280
    },
    {
      "epoch": 0.12159357959715376,
      "grad_norm": 5.4385786056518555,
      "learning_rate": 9.821102640300998e-06,
      "loss": 0.0697,
      "step": 74300
    },
    {
      "epoch": 0.1216263100358071,
      "grad_norm": 3.1533267498016357,
      "learning_rate": 9.82103674808748e-06,
      "loss": 0.0949,
      "step": 74320
    },
    {
      "epoch": 0.12165904047446044,
      "grad_norm": 1.264424204826355,
      "learning_rate": 9.820970855873963e-06,
      "loss": 0.0773,
      "step": 74340
    },
    {
      "epoch": 0.12169177091311378,
      "grad_norm": 3.4452741146087646,
      "learning_rate": 9.820904963660445e-06,
      "loss": 0.0804,
      "step": 74360
    },
    {
      "epoch": 0.12172450135176711,
      "grad_norm": 7.606507778167725,
      "learning_rate": 9.820839071446928e-06,
      "loss": 0.0599,
      "step": 74380
    },
    {
      "epoch": 0.12175723179042046,
      "grad_norm": 1.514135718345642,
      "learning_rate": 9.82077317923341e-06,
      "loss": 0.0778,
      "step": 74400
    },
    {
      "epoch": 0.12178996222907379,
      "grad_norm": 3.3415346145629883,
      "learning_rate": 9.820707287019894e-06,
      "loss": 0.0801,
      "step": 74420
    },
    {
      "epoch": 0.12182269266772713,
      "grad_norm": 2.447052240371704,
      "learning_rate": 9.820641394806376e-06,
      "loss": 0.0964,
      "step": 74440
    },
    {
      "epoch": 0.12185542310638048,
      "grad_norm": 2.159222364425659,
      "learning_rate": 9.82057550259286e-06,
      "loss": 0.0672,
      "step": 74460
    },
    {
      "epoch": 0.12188815354503381,
      "grad_norm": 1.9797379970550537,
      "learning_rate": 9.820509610379341e-06,
      "loss": 0.0632,
      "step": 74480
    },
    {
      "epoch": 0.12192088398368715,
      "grad_norm": 2.957418203353882,
      "learning_rate": 9.820443718165825e-06,
      "loss": 0.0656,
      "step": 74500
    },
    {
      "epoch": 0.12195361442234048,
      "grad_norm": 3.1812872886657715,
      "learning_rate": 9.820377825952307e-06,
      "loss": 0.0657,
      "step": 74520
    },
    {
      "epoch": 0.12198634486099383,
      "grad_norm": 14.104419708251953,
      "learning_rate": 9.82031193373879e-06,
      "loss": 0.0908,
      "step": 74540
    },
    {
      "epoch": 0.12201907529964717,
      "grad_norm": 3.8991851806640625,
      "learning_rate": 9.820246041525274e-06,
      "loss": 0.0897,
      "step": 74560
    },
    {
      "epoch": 0.1220518057383005,
      "grad_norm": 6.0205159187316895,
      "learning_rate": 9.820180149311756e-06,
      "loss": 0.0902,
      "step": 74580
    },
    {
      "epoch": 0.12208453617695385,
      "grad_norm": 3.7878949642181396,
      "learning_rate": 9.82011425709824e-06,
      "loss": 0.072,
      "step": 74600
    },
    {
      "epoch": 0.12211726661560718,
      "grad_norm": 2.5294485092163086,
      "learning_rate": 9.820048364884723e-06,
      "loss": 0.0877,
      "step": 74620
    },
    {
      "epoch": 0.12214999705426052,
      "grad_norm": 1.8928661346435547,
      "learning_rate": 9.819982472671205e-06,
      "loss": 0.0774,
      "step": 74640
    },
    {
      "epoch": 0.12218272749291385,
      "grad_norm": 2.812856674194336,
      "learning_rate": 9.819916580457689e-06,
      "loss": 0.0793,
      "step": 74660
    },
    {
      "epoch": 0.1222154579315672,
      "grad_norm": 5.228830814361572,
      "learning_rate": 9.819850688244172e-06,
      "loss": 0.0751,
      "step": 74680
    },
    {
      "epoch": 0.12224818837022054,
      "grad_norm": 3.7034716606140137,
      "learning_rate": 9.819784796030654e-06,
      "loss": 0.0836,
      "step": 74700
    },
    {
      "epoch": 0.12228091880887387,
      "grad_norm": 4.628665447235107,
      "learning_rate": 9.819718903817138e-06,
      "loss": 0.0837,
      "step": 74720
    },
    {
      "epoch": 0.12231364924752722,
      "grad_norm": 1.8290979862213135,
      "learning_rate": 9.81965301160362e-06,
      "loss": 0.0919,
      "step": 74740
    },
    {
      "epoch": 0.12234637968618055,
      "grad_norm": 5.454891681671143,
      "learning_rate": 9.819587119390103e-06,
      "loss": 0.0848,
      "step": 74760
    },
    {
      "epoch": 0.12237911012483389,
      "grad_norm": 4.965439319610596,
      "learning_rate": 9.819521227176585e-06,
      "loss": 0.1018,
      "step": 74780
    },
    {
      "epoch": 0.12241184056348724,
      "grad_norm": 2.1741459369659424,
      "learning_rate": 9.819455334963069e-06,
      "loss": 0.0753,
      "step": 74800
    },
    {
      "epoch": 0.12244457100214057,
      "grad_norm": 3.9826114177703857,
      "learning_rate": 9.81938944274955e-06,
      "loss": 0.0738,
      "step": 74820
    },
    {
      "epoch": 0.12247730144079391,
      "grad_norm": 4.670124053955078,
      "learning_rate": 9.819323550536034e-06,
      "loss": 0.0823,
      "step": 74840
    },
    {
      "epoch": 0.12251003187944724,
      "grad_norm": 1.5562710762023926,
      "learning_rate": 9.819257658322516e-06,
      "loss": 0.0713,
      "step": 74860
    },
    {
      "epoch": 0.12254276231810059,
      "grad_norm": 3.4176650047302246,
      "learning_rate": 9.819191766109e-06,
      "loss": 0.0786,
      "step": 74880
    },
    {
      "epoch": 0.12257549275675393,
      "grad_norm": 3.1438093185424805,
      "learning_rate": 9.819125873895483e-06,
      "loss": 0.0888,
      "step": 74900
    },
    {
      "epoch": 0.12260822319540726,
      "grad_norm": 2.3697590827941895,
      "learning_rate": 9.819059981681965e-06,
      "loss": 0.0843,
      "step": 74920
    },
    {
      "epoch": 0.1226409536340606,
      "grad_norm": 4.093346118927002,
      "learning_rate": 9.818994089468449e-06,
      "loss": 0.083,
      "step": 74940
    },
    {
      "epoch": 0.12267368407271394,
      "grad_norm": 5.147003650665283,
      "learning_rate": 9.81892819725493e-06,
      "loss": 0.0789,
      "step": 74960
    },
    {
      "epoch": 0.12270641451136728,
      "grad_norm": 5.810768127441406,
      "learning_rate": 9.818862305041414e-06,
      "loss": 0.0675,
      "step": 74980
    },
    {
      "epoch": 0.12273914495002063,
      "grad_norm": 2.656017541885376,
      "learning_rate": 9.818796412827896e-06,
      "loss": 0.0776,
      "step": 75000
    },
    {
      "epoch": 0.12277187538867396,
      "grad_norm": 1.1806889772415161,
      "learning_rate": 9.81873052061438e-06,
      "loss": 0.0728,
      "step": 75020
    },
    {
      "epoch": 0.1228046058273273,
      "grad_norm": 3.927924871444702,
      "learning_rate": 9.818664628400863e-06,
      "loss": 0.0647,
      "step": 75040
    },
    {
      "epoch": 0.12283733626598063,
      "grad_norm": 6.248209476470947,
      "learning_rate": 9.818598736187345e-06,
      "loss": 0.0776,
      "step": 75060
    },
    {
      "epoch": 0.12287006670463398,
      "grad_norm": 3.1108014583587646,
      "learning_rate": 9.818532843973829e-06,
      "loss": 0.0911,
      "step": 75080
    },
    {
      "epoch": 0.12290279714328732,
      "grad_norm": 3.155144691467285,
      "learning_rate": 9.818466951760312e-06,
      "loss": 0.0907,
      "step": 75100
    },
    {
      "epoch": 0.12293552758194065,
      "grad_norm": 3.269054651260376,
      "learning_rate": 9.818401059546794e-06,
      "loss": 0.0812,
      "step": 75120
    },
    {
      "epoch": 0.122968258020594,
      "grad_norm": 3.235537528991699,
      "learning_rate": 9.818335167333278e-06,
      "loss": 0.0829,
      "step": 75140
    },
    {
      "epoch": 0.12300098845924733,
      "grad_norm": 1.5001717805862427,
      "learning_rate": 9.81826927511976e-06,
      "loss": 0.0826,
      "step": 75160
    },
    {
      "epoch": 0.12303371889790067,
      "grad_norm": 3.265385389328003,
      "learning_rate": 9.818203382906243e-06,
      "loss": 0.0669,
      "step": 75180
    },
    {
      "epoch": 0.12306644933655402,
      "grad_norm": 2.8767545223236084,
      "learning_rate": 9.818137490692725e-06,
      "loss": 0.0932,
      "step": 75200
    },
    {
      "epoch": 0.12309917977520735,
      "grad_norm": 127.72432708740234,
      "learning_rate": 9.818071598479209e-06,
      "loss": 0.0787,
      "step": 75220
    },
    {
      "epoch": 0.12313191021386069,
      "grad_norm": 11.012807846069336,
      "learning_rate": 9.818005706265692e-06,
      "loss": 0.0839,
      "step": 75240
    },
    {
      "epoch": 0.12316464065251402,
      "grad_norm": 5.123934268951416,
      "learning_rate": 9.817939814052174e-06,
      "loss": 0.0697,
      "step": 75260
    },
    {
      "epoch": 0.12319737109116737,
      "grad_norm": 5.634026527404785,
      "learning_rate": 9.817873921838658e-06,
      "loss": 0.0717,
      "step": 75280
    },
    {
      "epoch": 0.1232301015298207,
      "grad_norm": 3.263183116912842,
      "learning_rate": 9.81780802962514e-06,
      "loss": 0.0679,
      "step": 75300
    },
    {
      "epoch": 0.12326283196847404,
      "grad_norm": 2.221050262451172,
      "learning_rate": 9.817742137411623e-06,
      "loss": 0.0922,
      "step": 75320
    },
    {
      "epoch": 0.12329556240712738,
      "grad_norm": 1.085137128829956,
      "learning_rate": 9.817676245198105e-06,
      "loss": 0.0808,
      "step": 75340
    },
    {
      "epoch": 0.12332829284578072,
      "grad_norm": 1.9965355396270752,
      "learning_rate": 9.817610352984589e-06,
      "loss": 0.0716,
      "step": 75360
    },
    {
      "epoch": 0.12336102328443406,
      "grad_norm": 6.1311750411987305,
      "learning_rate": 9.81754446077107e-06,
      "loss": 0.0792,
      "step": 75380
    },
    {
      "epoch": 0.12339375372308739,
      "grad_norm": 2.938302516937256,
      "learning_rate": 9.817478568557554e-06,
      "loss": 0.0783,
      "step": 75400
    },
    {
      "epoch": 0.12342648416174073,
      "grad_norm": 3.087475299835205,
      "learning_rate": 9.817412676344038e-06,
      "loss": 0.0807,
      "step": 75420
    },
    {
      "epoch": 0.12345921460039408,
      "grad_norm": 4.131775856018066,
      "learning_rate": 9.81734678413052e-06,
      "loss": 0.088,
      "step": 75440
    },
    {
      "epoch": 0.12349194503904741,
      "grad_norm": 3.6246414184570312,
      "learning_rate": 9.817280891917003e-06,
      "loss": 0.0642,
      "step": 75460
    },
    {
      "epoch": 0.12352467547770075,
      "grad_norm": 3.0466861724853516,
      "learning_rate": 9.817214999703487e-06,
      "loss": 0.0725,
      "step": 75480
    },
    {
      "epoch": 0.12355740591635408,
      "grad_norm": 5.956827163696289,
      "learning_rate": 9.817149107489969e-06,
      "loss": 0.0799,
      "step": 75500
    },
    {
      "epoch": 0.12359013635500743,
      "grad_norm": 4.219984531402588,
      "learning_rate": 9.817083215276452e-06,
      "loss": 0.0688,
      "step": 75520
    },
    {
      "epoch": 0.12362286679366077,
      "grad_norm": 2.633594512939453,
      "learning_rate": 9.817017323062934e-06,
      "loss": 0.1014,
      "step": 75540
    },
    {
      "epoch": 0.1236555972323141,
      "grad_norm": 4.700972080230713,
      "learning_rate": 9.816951430849418e-06,
      "loss": 0.0683,
      "step": 75560
    },
    {
      "epoch": 0.12368832767096745,
      "grad_norm": 4.51323127746582,
      "learning_rate": 9.8168855386359e-06,
      "loss": 0.0774,
      "step": 75580
    },
    {
      "epoch": 0.12372105810962078,
      "grad_norm": 5.302404880523682,
      "learning_rate": 9.816819646422383e-06,
      "loss": 0.0645,
      "step": 75600
    },
    {
      "epoch": 0.12375378854827412,
      "grad_norm": 3.5468826293945312,
      "learning_rate": 9.816753754208867e-06,
      "loss": 0.0761,
      "step": 75620
    },
    {
      "epoch": 0.12378651898692747,
      "grad_norm": 10.374695777893066,
      "learning_rate": 9.816687861995349e-06,
      "loss": 0.0826,
      "step": 75640
    },
    {
      "epoch": 0.1238192494255808,
      "grad_norm": 1.0257015228271484,
      "learning_rate": 9.816621969781832e-06,
      "loss": 0.0896,
      "step": 75660
    },
    {
      "epoch": 0.12385197986423414,
      "grad_norm": 3.5235328674316406,
      "learning_rate": 9.816556077568314e-06,
      "loss": 0.0797,
      "step": 75680
    },
    {
      "epoch": 0.12388471030288747,
      "grad_norm": 1.1719906330108643,
      "learning_rate": 9.816490185354798e-06,
      "loss": 0.0754,
      "step": 75700
    },
    {
      "epoch": 0.12391744074154082,
      "grad_norm": 2.6655685901641846,
      "learning_rate": 9.81642429314128e-06,
      "loss": 0.0916,
      "step": 75720
    },
    {
      "epoch": 0.12395017118019416,
      "grad_norm": 0.9744066596031189,
      "learning_rate": 9.816358400927763e-06,
      "loss": 0.065,
      "step": 75740
    },
    {
      "epoch": 0.1239829016188475,
      "grad_norm": 4.840150356292725,
      "learning_rate": 9.816292508714245e-06,
      "loss": 0.064,
      "step": 75760
    },
    {
      "epoch": 0.12401563205750084,
      "grad_norm": 2.5601155757904053,
      "learning_rate": 9.816226616500729e-06,
      "loss": 0.0789,
      "step": 75780
    },
    {
      "epoch": 0.12404836249615417,
      "grad_norm": 5.094804286956787,
      "learning_rate": 9.81616072428721e-06,
      "loss": 0.0782,
      "step": 75800
    },
    {
      "epoch": 0.12408109293480751,
      "grad_norm": 4.421080589294434,
      "learning_rate": 9.816094832073694e-06,
      "loss": 0.0791,
      "step": 75820
    },
    {
      "epoch": 0.12411382337346086,
      "grad_norm": 3.654501438140869,
      "learning_rate": 9.816028939860178e-06,
      "loss": 0.1031,
      "step": 75840
    },
    {
      "epoch": 0.12414655381211419,
      "grad_norm": 3.1001315116882324,
      "learning_rate": 9.81596304764666e-06,
      "loss": 0.0768,
      "step": 75860
    },
    {
      "epoch": 0.12417928425076753,
      "grad_norm": 2.0068068504333496,
      "learning_rate": 9.815897155433143e-06,
      "loss": 0.0643,
      "step": 75880
    },
    {
      "epoch": 0.12421201468942086,
      "grad_norm": 3.0795395374298096,
      "learning_rate": 9.815831263219627e-06,
      "loss": 0.0659,
      "step": 75900
    },
    {
      "epoch": 0.12424474512807421,
      "grad_norm": 1.111746072769165,
      "learning_rate": 9.815765371006109e-06,
      "loss": 0.0724,
      "step": 75920
    },
    {
      "epoch": 0.12427747556672755,
      "grad_norm": 2.605401039123535,
      "learning_rate": 9.815699478792592e-06,
      "loss": 0.0774,
      "step": 75940
    },
    {
      "epoch": 0.12431020600538088,
      "grad_norm": 2.0621228218078613,
      "learning_rate": 9.815633586579076e-06,
      "loss": 0.0979,
      "step": 75960
    },
    {
      "epoch": 0.12434293644403423,
      "grad_norm": 1.5905370712280273,
      "learning_rate": 9.815567694365558e-06,
      "loss": 0.0587,
      "step": 75980
    },
    {
      "epoch": 0.12437566688268756,
      "grad_norm": 2.321918249130249,
      "learning_rate": 9.815501802152041e-06,
      "loss": 0.0577,
      "step": 76000
    },
    {
      "epoch": 0.1244083973213409,
      "grad_norm": 1.5887765884399414,
      "learning_rate": 9.815435909938523e-06,
      "loss": 0.0744,
      "step": 76020
    },
    {
      "epoch": 0.12444112775999423,
      "grad_norm": 2.5597012042999268,
      "learning_rate": 9.815370017725007e-06,
      "loss": 0.0602,
      "step": 76040
    },
    {
      "epoch": 0.12447385819864758,
      "grad_norm": 6.152617454528809,
      "learning_rate": 9.815304125511489e-06,
      "loss": 0.078,
      "step": 76060
    },
    {
      "epoch": 0.12450658863730092,
      "grad_norm": 1.9078625440597534,
      "learning_rate": 9.815238233297972e-06,
      "loss": 0.0872,
      "step": 76080
    },
    {
      "epoch": 0.12453931907595425,
      "grad_norm": 4.631965637207031,
      "learning_rate": 9.815172341084454e-06,
      "loss": 0.077,
      "step": 76100
    },
    {
      "epoch": 0.1245720495146076,
      "grad_norm": 4.538061618804932,
      "learning_rate": 9.815106448870938e-06,
      "loss": 0.0874,
      "step": 76120
    },
    {
      "epoch": 0.12460477995326093,
      "grad_norm": 10.018516540527344,
      "learning_rate": 9.81504055665742e-06,
      "loss": 0.0746,
      "step": 76140
    },
    {
      "epoch": 0.12463751039191427,
      "grad_norm": 4.349259853363037,
      "learning_rate": 9.814974664443903e-06,
      "loss": 0.0909,
      "step": 76160
    },
    {
      "epoch": 0.12467024083056762,
      "grad_norm": 5.920527458190918,
      "learning_rate": 9.814908772230385e-06,
      "loss": 0.0906,
      "step": 76180
    },
    {
      "epoch": 0.12470297126922095,
      "grad_norm": 2.6560616493225098,
      "learning_rate": 9.814842880016869e-06,
      "loss": 0.074,
      "step": 76200
    },
    {
      "epoch": 0.12473570170787429,
      "grad_norm": 2.698704719543457,
      "learning_rate": 9.814776987803352e-06,
      "loss": 0.0878,
      "step": 76220
    },
    {
      "epoch": 0.12476843214652762,
      "grad_norm": 1.863159418106079,
      "learning_rate": 9.814711095589834e-06,
      "loss": 0.0633,
      "step": 76240
    },
    {
      "epoch": 0.12480116258518097,
      "grad_norm": 2.917917490005493,
      "learning_rate": 9.814645203376318e-06,
      "loss": 0.0799,
      "step": 76260
    },
    {
      "epoch": 0.12483389302383431,
      "grad_norm": 2.316553831100464,
      "learning_rate": 9.814579311162801e-06,
      "loss": 0.0702,
      "step": 76280
    },
    {
      "epoch": 0.12486662346248764,
      "grad_norm": 3.244556188583374,
      "learning_rate": 9.814513418949283e-06,
      "loss": 0.0963,
      "step": 76300
    },
    {
      "epoch": 0.12489935390114099,
      "grad_norm": 5.788027763366699,
      "learning_rate": 9.814447526735767e-06,
      "loss": 0.0934,
      "step": 76320
    },
    {
      "epoch": 0.12493208433979432,
      "grad_norm": 6.262392997741699,
      "learning_rate": 9.81438163452225e-06,
      "loss": 0.0678,
      "step": 76340
    },
    {
      "epoch": 0.12496481477844766,
      "grad_norm": 5.781911373138428,
      "learning_rate": 9.814315742308732e-06,
      "loss": 0.071,
      "step": 76360
    },
    {
      "epoch": 0.124997545217101,
      "grad_norm": 2.488795757293701,
      "learning_rate": 9.814249850095216e-06,
      "loss": 0.0705,
      "step": 76380
    },
    {
      "epoch": 0.12503027565575434,
      "grad_norm": 1.0862902402877808,
      "learning_rate": 9.814183957881698e-06,
      "loss": 0.0617,
      "step": 76400
    },
    {
      "epoch": 0.12506300609440768,
      "grad_norm": 3.438767194747925,
      "learning_rate": 9.814118065668181e-06,
      "loss": 0.0838,
      "step": 76420
    },
    {
      "epoch": 0.12509573653306102,
      "grad_norm": 2.654587984085083,
      "learning_rate": 9.814052173454663e-06,
      "loss": 0.0784,
      "step": 76440
    },
    {
      "epoch": 0.12512846697171434,
      "grad_norm": 5.8837761878967285,
      "learning_rate": 9.813986281241147e-06,
      "loss": 0.0836,
      "step": 76460
    },
    {
      "epoch": 0.12516119741036769,
      "grad_norm": 2.083400011062622,
      "learning_rate": 9.813920389027629e-06,
      "loss": 0.0849,
      "step": 76480
    },
    {
      "epoch": 0.12519392784902103,
      "grad_norm": 6.055680274963379,
      "learning_rate": 9.813854496814112e-06,
      "loss": 0.0728,
      "step": 76500
    },
    {
      "epoch": 0.12522665828767438,
      "grad_norm": 3.6582448482513428,
      "learning_rate": 9.813788604600594e-06,
      "loss": 0.076,
      "step": 76520
    },
    {
      "epoch": 0.12525938872632772,
      "grad_norm": 2.6043126583099365,
      "learning_rate": 9.813722712387078e-06,
      "loss": 0.0826,
      "step": 76540
    },
    {
      "epoch": 0.12529211916498104,
      "grad_norm": 2.22137713432312,
      "learning_rate": 9.81365682017356e-06,
      "loss": 0.065,
      "step": 76560
    },
    {
      "epoch": 0.12532484960363438,
      "grad_norm": 3.998039722442627,
      "learning_rate": 9.813590927960043e-06,
      "loss": 0.0693,
      "step": 76580
    },
    {
      "epoch": 0.12535758004228773,
      "grad_norm": 5.8727264404296875,
      "learning_rate": 9.813525035746527e-06,
      "loss": 0.0746,
      "step": 76600
    },
    {
      "epoch": 0.12539031048094107,
      "grad_norm": 5.3841633796691895,
      "learning_rate": 9.813459143533009e-06,
      "loss": 0.1104,
      "step": 76620
    },
    {
      "epoch": 0.12542304091959441,
      "grad_norm": 4.339978218078613,
      "learning_rate": 9.813393251319492e-06,
      "loss": 0.0766,
      "step": 76640
    },
    {
      "epoch": 0.12545577135824773,
      "grad_norm": 5.753975868225098,
      "learning_rate": 9.813327359105976e-06,
      "loss": 0.0765,
      "step": 76660
    },
    {
      "epoch": 0.12548850179690108,
      "grad_norm": 1.4105440378189087,
      "learning_rate": 9.813261466892458e-06,
      "loss": 0.0786,
      "step": 76680
    },
    {
      "epoch": 0.12552123223555442,
      "grad_norm": 8.657127380371094,
      "learning_rate": 9.813195574678942e-06,
      "loss": 0.0666,
      "step": 76700
    },
    {
      "epoch": 0.12555396267420776,
      "grad_norm": 4.038013935089111,
      "learning_rate": 9.813129682465425e-06,
      "loss": 0.0855,
      "step": 76720
    },
    {
      "epoch": 0.1255866931128611,
      "grad_norm": 2.2254507541656494,
      "learning_rate": 9.813063790251907e-06,
      "loss": 0.0789,
      "step": 76740
    },
    {
      "epoch": 0.12561942355151443,
      "grad_norm": 2.4855637550354004,
      "learning_rate": 9.81299789803839e-06,
      "loss": 0.0726,
      "step": 76760
    },
    {
      "epoch": 0.12565215399016777,
      "grad_norm": 4.794955253601074,
      "learning_rate": 9.812932005824872e-06,
      "loss": 0.0861,
      "step": 76780
    },
    {
      "epoch": 0.12568488442882111,
      "grad_norm": 3.075104236602783,
      "learning_rate": 9.812866113611356e-06,
      "loss": 0.0748,
      "step": 76800
    },
    {
      "epoch": 0.12571761486747446,
      "grad_norm": 6.249990940093994,
      "learning_rate": 9.812800221397838e-06,
      "loss": 0.0601,
      "step": 76820
    },
    {
      "epoch": 0.1257503453061278,
      "grad_norm": 2.312002420425415,
      "learning_rate": 9.812734329184322e-06,
      "loss": 0.0636,
      "step": 76840
    },
    {
      "epoch": 0.12578307574478112,
      "grad_norm": 4.64054012298584,
      "learning_rate": 9.812668436970803e-06,
      "loss": 0.0847,
      "step": 76860
    },
    {
      "epoch": 0.12581580618343446,
      "grad_norm": 2.8550024032592773,
      "learning_rate": 9.812602544757287e-06,
      "loss": 0.0706,
      "step": 76880
    },
    {
      "epoch": 0.1258485366220878,
      "grad_norm": 2.3347456455230713,
      "learning_rate": 9.812536652543769e-06,
      "loss": 0.0712,
      "step": 76900
    },
    {
      "epoch": 0.12588126706074115,
      "grad_norm": 3.587766170501709,
      "learning_rate": 9.812470760330253e-06,
      "loss": 0.0729,
      "step": 76920
    },
    {
      "epoch": 0.1259139974993945,
      "grad_norm": 5.909341335296631,
      "learning_rate": 9.812404868116734e-06,
      "loss": 0.0641,
      "step": 76940
    },
    {
      "epoch": 0.12594672793804781,
      "grad_norm": 4.429617881774902,
      "learning_rate": 9.812338975903218e-06,
      "loss": 0.0623,
      "step": 76960
    },
    {
      "epoch": 0.12597945837670116,
      "grad_norm": 8.113693237304688,
      "learning_rate": 9.8122730836897e-06,
      "loss": 0.0722,
      "step": 76980
    },
    {
      "epoch": 0.1260121888153545,
      "grad_norm": 2.633657932281494,
      "learning_rate": 9.812207191476183e-06,
      "loss": 0.0642,
      "step": 77000
    },
    {
      "epoch": 0.12604491925400785,
      "grad_norm": 3.9822211265563965,
      "learning_rate": 9.812141299262667e-06,
      "loss": 0.0737,
      "step": 77020
    },
    {
      "epoch": 0.1260776496926612,
      "grad_norm": 3.0862772464752197,
      "learning_rate": 9.812075407049149e-06,
      "loss": 0.0807,
      "step": 77040
    },
    {
      "epoch": 0.1261103801313145,
      "grad_norm": 2.092336893081665,
      "learning_rate": 9.812009514835633e-06,
      "loss": 0.0735,
      "step": 77060
    },
    {
      "epoch": 0.12614311056996785,
      "grad_norm": 11.893513679504395,
      "learning_rate": 9.811943622622116e-06,
      "loss": 0.0654,
      "step": 77080
    },
    {
      "epoch": 0.1261758410086212,
      "grad_norm": 2.196683645248413,
      "learning_rate": 9.811877730408598e-06,
      "loss": 0.071,
      "step": 77100
    },
    {
      "epoch": 0.12620857144727454,
      "grad_norm": 3.1373353004455566,
      "learning_rate": 9.811811838195082e-06,
      "loss": 0.0774,
      "step": 77120
    },
    {
      "epoch": 0.1262413018859279,
      "grad_norm": 4.021506309509277,
      "learning_rate": 9.811745945981565e-06,
      "loss": 0.0643,
      "step": 77140
    },
    {
      "epoch": 0.1262740323245812,
      "grad_norm": 5.863011360168457,
      "learning_rate": 9.811680053768047e-06,
      "loss": 0.0829,
      "step": 77160
    },
    {
      "epoch": 0.12630676276323455,
      "grad_norm": 4.162875175476074,
      "learning_rate": 9.81161416155453e-06,
      "loss": 0.0827,
      "step": 77180
    },
    {
      "epoch": 0.1263394932018879,
      "grad_norm": 2.3141424655914307,
      "learning_rate": 9.811548269341013e-06,
      "loss": 0.0539,
      "step": 77200
    },
    {
      "epoch": 0.12637222364054124,
      "grad_norm": 4.116818428039551,
      "learning_rate": 9.811482377127496e-06,
      "loss": 0.0788,
      "step": 77220
    },
    {
      "epoch": 0.12640495407919458,
      "grad_norm": 3.462364673614502,
      "learning_rate": 9.811416484913978e-06,
      "loss": 0.0617,
      "step": 77240
    },
    {
      "epoch": 0.1264376845178479,
      "grad_norm": 3.724520444869995,
      "learning_rate": 9.811350592700462e-06,
      "loss": 0.0755,
      "step": 77260
    },
    {
      "epoch": 0.12647041495650124,
      "grad_norm": 3.9554636478424072,
      "learning_rate": 9.811284700486944e-06,
      "loss": 0.0927,
      "step": 77280
    },
    {
      "epoch": 0.1265031453951546,
      "grad_norm": 5.570385932922363,
      "learning_rate": 9.811218808273427e-06,
      "loss": 0.071,
      "step": 77300
    },
    {
      "epoch": 0.12653587583380793,
      "grad_norm": 3.4092981815338135,
      "learning_rate": 9.811152916059909e-06,
      "loss": 0.069,
      "step": 77320
    },
    {
      "epoch": 0.12656860627246128,
      "grad_norm": 2.0057897567749023,
      "learning_rate": 9.811087023846393e-06,
      "loss": 0.0771,
      "step": 77340
    },
    {
      "epoch": 0.1266013367111146,
      "grad_norm": 2.7308146953582764,
      "learning_rate": 9.811021131632876e-06,
      "loss": 0.0652,
      "step": 77360
    },
    {
      "epoch": 0.12663406714976794,
      "grad_norm": 2.364133596420288,
      "learning_rate": 9.810955239419358e-06,
      "loss": 0.0756,
      "step": 77380
    },
    {
      "epoch": 0.12666679758842128,
      "grad_norm": 2.20166015625,
      "learning_rate": 9.810889347205842e-06,
      "loss": 0.0777,
      "step": 77400
    },
    {
      "epoch": 0.12669952802707463,
      "grad_norm": 4.626871109008789,
      "learning_rate": 9.810823454992324e-06,
      "loss": 0.0716,
      "step": 77420
    },
    {
      "epoch": 0.12673225846572797,
      "grad_norm": 1.6443026065826416,
      "learning_rate": 9.810757562778807e-06,
      "loss": 0.0802,
      "step": 77440
    },
    {
      "epoch": 0.1267649889043813,
      "grad_norm": 2.808183431625366,
      "learning_rate": 9.81069167056529e-06,
      "loss": 0.0755,
      "step": 77460
    },
    {
      "epoch": 0.12679771934303463,
      "grad_norm": 5.763791561126709,
      "learning_rate": 9.810625778351773e-06,
      "loss": 0.0749,
      "step": 77480
    },
    {
      "epoch": 0.12683044978168798,
      "grad_norm": 3.5771872997283936,
      "learning_rate": 9.810559886138256e-06,
      "loss": 0.0809,
      "step": 77500
    },
    {
      "epoch": 0.12686318022034132,
      "grad_norm": 1.9853408336639404,
      "learning_rate": 9.81049399392474e-06,
      "loss": 0.064,
      "step": 77520
    },
    {
      "epoch": 0.12689591065899467,
      "grad_norm": 1.718870997428894,
      "learning_rate": 9.810428101711222e-06,
      "loss": 0.0627,
      "step": 77540
    },
    {
      "epoch": 0.12692864109764798,
      "grad_norm": 3.1669681072235107,
      "learning_rate": 9.810362209497705e-06,
      "loss": 0.088,
      "step": 77560
    },
    {
      "epoch": 0.12696137153630133,
      "grad_norm": 5.090820789337158,
      "learning_rate": 9.810296317284187e-06,
      "loss": 0.0713,
      "step": 77580
    },
    {
      "epoch": 0.12699410197495467,
      "grad_norm": 4.3253936767578125,
      "learning_rate": 9.81023042507067e-06,
      "loss": 0.0859,
      "step": 77600
    },
    {
      "epoch": 0.12702683241360802,
      "grad_norm": 9.380119323730469,
      "learning_rate": 9.810164532857153e-06,
      "loss": 0.0608,
      "step": 77620
    },
    {
      "epoch": 0.12705956285226136,
      "grad_norm": 3.506716251373291,
      "learning_rate": 9.810098640643636e-06,
      "loss": 0.0874,
      "step": 77640
    },
    {
      "epoch": 0.12709229329091468,
      "grad_norm": 2.285426378250122,
      "learning_rate": 9.810032748430118e-06,
      "loss": 0.0701,
      "step": 77660
    },
    {
      "epoch": 0.12712502372956802,
      "grad_norm": 2.2445569038391113,
      "learning_rate": 9.809966856216602e-06,
      "loss": 0.0862,
      "step": 77680
    },
    {
      "epoch": 0.12715775416822137,
      "grad_norm": 4.606135368347168,
      "learning_rate": 9.809900964003084e-06,
      "loss": 0.0796,
      "step": 77700
    },
    {
      "epoch": 0.1271904846068747,
      "grad_norm": 1.9987635612487793,
      "learning_rate": 9.809835071789567e-06,
      "loss": 0.0621,
      "step": 77720
    },
    {
      "epoch": 0.12722321504552803,
      "grad_norm": 3.097748041152954,
      "learning_rate": 9.80976917957605e-06,
      "loss": 0.0617,
      "step": 77740
    },
    {
      "epoch": 0.12725594548418137,
      "grad_norm": 2.3808481693267822,
      "learning_rate": 9.809703287362533e-06,
      "loss": 0.0716,
      "step": 77760
    },
    {
      "epoch": 0.12728867592283472,
      "grad_norm": 4.021045684814453,
      "learning_rate": 9.809637395149016e-06,
      "loss": 0.0816,
      "step": 77780
    },
    {
      "epoch": 0.12732140636148806,
      "grad_norm": 5.175722599029541,
      "learning_rate": 9.809571502935498e-06,
      "loss": 0.0857,
      "step": 77800
    },
    {
      "epoch": 0.1273541368001414,
      "grad_norm": 3.55182147026062,
      "learning_rate": 9.809505610721982e-06,
      "loss": 0.0688,
      "step": 77820
    },
    {
      "epoch": 0.12738686723879472,
      "grad_norm": 30.276350021362305,
      "learning_rate": 9.809439718508464e-06,
      "loss": 0.0723,
      "step": 77840
    },
    {
      "epoch": 0.12741959767744807,
      "grad_norm": 3.2706873416900635,
      "learning_rate": 9.809373826294947e-06,
      "loss": 0.072,
      "step": 77860
    },
    {
      "epoch": 0.1274523281161014,
      "grad_norm": 0.8350579738616943,
      "learning_rate": 9.80930793408143e-06,
      "loss": 0.0713,
      "step": 77880
    },
    {
      "epoch": 0.12748505855475475,
      "grad_norm": 24.202341079711914,
      "learning_rate": 9.809242041867913e-06,
      "loss": 0.0898,
      "step": 77900
    },
    {
      "epoch": 0.1275177889934081,
      "grad_norm": 2.652259111404419,
      "learning_rate": 9.809176149654396e-06,
      "loss": 0.0573,
      "step": 77920
    },
    {
      "epoch": 0.12755051943206142,
      "grad_norm": 2.083728313446045,
      "learning_rate": 9.80911025744088e-06,
      "loss": 0.0751,
      "step": 77940
    },
    {
      "epoch": 0.12758324987071476,
      "grad_norm": 3.354091167449951,
      "learning_rate": 9.809044365227362e-06,
      "loss": 0.096,
      "step": 77960
    },
    {
      "epoch": 0.1276159803093681,
      "grad_norm": 2.1674997806549072,
      "learning_rate": 9.808978473013845e-06,
      "loss": 0.0704,
      "step": 77980
    },
    {
      "epoch": 0.12764871074802145,
      "grad_norm": 3.5217132568359375,
      "learning_rate": 9.808912580800327e-06,
      "loss": 0.0837,
      "step": 78000
    },
    {
      "epoch": 0.1276814411866748,
      "grad_norm": 5.018423557281494,
      "learning_rate": 9.80884668858681e-06,
      "loss": 0.0708,
      "step": 78020
    },
    {
      "epoch": 0.1277141716253281,
      "grad_norm": 6.1063008308410645,
      "learning_rate": 9.808780796373293e-06,
      "loss": 0.0902,
      "step": 78040
    },
    {
      "epoch": 0.12774690206398145,
      "grad_norm": 1.0330955982208252,
      "learning_rate": 9.808714904159776e-06,
      "loss": 0.0748,
      "step": 78060
    },
    {
      "epoch": 0.1277796325026348,
      "grad_norm": 4.540598392486572,
      "learning_rate": 9.80864901194626e-06,
      "loss": 0.0734,
      "step": 78080
    },
    {
      "epoch": 0.12781236294128814,
      "grad_norm": 5.002918243408203,
      "learning_rate": 9.808583119732742e-06,
      "loss": 0.0831,
      "step": 78100
    },
    {
      "epoch": 0.1278450933799415,
      "grad_norm": 4.960106372833252,
      "learning_rate": 9.808517227519225e-06,
      "loss": 0.0607,
      "step": 78120
    },
    {
      "epoch": 0.1278778238185948,
      "grad_norm": 3.0712192058563232,
      "learning_rate": 9.808451335305707e-06,
      "loss": 0.0796,
      "step": 78140
    },
    {
      "epoch": 0.12791055425724815,
      "grad_norm": 0.6407955884933472,
      "learning_rate": 9.808385443092191e-06,
      "loss": 0.0779,
      "step": 78160
    },
    {
      "epoch": 0.1279432846959015,
      "grad_norm": 4.793454647064209,
      "learning_rate": 9.808319550878673e-06,
      "loss": 0.0906,
      "step": 78180
    },
    {
      "epoch": 0.12797601513455484,
      "grad_norm": 3.2520291805267334,
      "learning_rate": 9.808253658665156e-06,
      "loss": 0.0744,
      "step": 78200
    },
    {
      "epoch": 0.12800874557320818,
      "grad_norm": 1.4988244771957397,
      "learning_rate": 9.808187766451638e-06,
      "loss": 0.0871,
      "step": 78220
    },
    {
      "epoch": 0.1280414760118615,
      "grad_norm": 3.545659065246582,
      "learning_rate": 9.808121874238122e-06,
      "loss": 0.0558,
      "step": 78240
    },
    {
      "epoch": 0.12807420645051484,
      "grad_norm": 3.7006402015686035,
      "learning_rate": 9.808055982024605e-06,
      "loss": 0.084,
      "step": 78260
    },
    {
      "epoch": 0.1281069368891682,
      "grad_norm": 2.197538137435913,
      "learning_rate": 9.807990089811087e-06,
      "loss": 0.062,
      "step": 78280
    },
    {
      "epoch": 0.12813966732782153,
      "grad_norm": 3.485948085784912,
      "learning_rate": 9.807924197597571e-06,
      "loss": 0.0708,
      "step": 78300
    },
    {
      "epoch": 0.12817239776647488,
      "grad_norm": 4.7537078857421875,
      "learning_rate": 9.807858305384054e-06,
      "loss": 0.0627,
      "step": 78320
    },
    {
      "epoch": 0.1282051282051282,
      "grad_norm": 3.161007881164551,
      "learning_rate": 9.807792413170536e-06,
      "loss": 0.073,
      "step": 78340
    },
    {
      "epoch": 0.12823785864378154,
      "grad_norm": 1.833254098892212,
      "learning_rate": 9.80772652095702e-06,
      "loss": 0.0713,
      "step": 78360
    },
    {
      "epoch": 0.12827058908243488,
      "grad_norm": 3.3213462829589844,
      "learning_rate": 9.807660628743502e-06,
      "loss": 0.0763,
      "step": 78380
    },
    {
      "epoch": 0.12830331952108823,
      "grad_norm": 2.9924144744873047,
      "learning_rate": 9.807594736529985e-06,
      "loss": 0.066,
      "step": 78400
    },
    {
      "epoch": 0.12833604995974157,
      "grad_norm": 2.369417905807495,
      "learning_rate": 9.807528844316469e-06,
      "loss": 0.1084,
      "step": 78420
    },
    {
      "epoch": 0.1283687803983949,
      "grad_norm": 5.513235569000244,
      "learning_rate": 9.807462952102951e-06,
      "loss": 0.076,
      "step": 78440
    },
    {
      "epoch": 0.12840151083704823,
      "grad_norm": 5.128241539001465,
      "learning_rate": 9.807397059889434e-06,
      "loss": 0.0613,
      "step": 78460
    },
    {
      "epoch": 0.12843424127570158,
      "grad_norm": 30.30673599243164,
      "learning_rate": 9.807331167675916e-06,
      "loss": 0.0895,
      "step": 78480
    },
    {
      "epoch": 0.12846697171435492,
      "grad_norm": 2.6236274242401123,
      "learning_rate": 9.8072652754624e-06,
      "loss": 0.0671,
      "step": 78500
    },
    {
      "epoch": 0.12849970215300827,
      "grad_norm": 19.623889923095703,
      "learning_rate": 9.807199383248882e-06,
      "loss": 0.0666,
      "step": 78520
    },
    {
      "epoch": 0.12853243259166158,
      "grad_norm": 2.611675977706909,
      "learning_rate": 9.807133491035365e-06,
      "loss": 0.1037,
      "step": 78540
    },
    {
      "epoch": 0.12856516303031493,
      "grad_norm": 0.6076831817626953,
      "learning_rate": 9.807067598821847e-06,
      "loss": 0.0846,
      "step": 78560
    },
    {
      "epoch": 0.12859789346896827,
      "grad_norm": 2.7893009185791016,
      "learning_rate": 9.807001706608331e-06,
      "loss": 0.1012,
      "step": 78580
    },
    {
      "epoch": 0.12863062390762162,
      "grad_norm": 3.454118490219116,
      "learning_rate": 9.806935814394813e-06,
      "loss": 0.0691,
      "step": 78600
    },
    {
      "epoch": 0.12866335434627496,
      "grad_norm": 10.793466567993164,
      "learning_rate": 9.806869922181296e-06,
      "loss": 0.0804,
      "step": 78620
    },
    {
      "epoch": 0.12869608478492828,
      "grad_norm": 1.918137550354004,
      "learning_rate": 9.806804029967778e-06,
      "loss": 0.0794,
      "step": 78640
    },
    {
      "epoch": 0.12872881522358162,
      "grad_norm": 4.165306091308594,
      "learning_rate": 9.806738137754262e-06,
      "loss": 0.0808,
      "step": 78660
    },
    {
      "epoch": 0.12876154566223497,
      "grad_norm": 2.4514076709747314,
      "learning_rate": 9.806672245540745e-06,
      "loss": 0.0707,
      "step": 78680
    },
    {
      "epoch": 0.1287942761008883,
      "grad_norm": 23.420244216918945,
      "learning_rate": 9.806606353327227e-06,
      "loss": 0.0857,
      "step": 78700
    },
    {
      "epoch": 0.12882700653954166,
      "grad_norm": 1.8028737306594849,
      "learning_rate": 9.806540461113711e-06,
      "loss": 0.1061,
      "step": 78720
    },
    {
      "epoch": 0.12885973697819497,
      "grad_norm": 2.7133724689483643,
      "learning_rate": 9.806474568900195e-06,
      "loss": 0.0531,
      "step": 78740
    },
    {
      "epoch": 0.12889246741684832,
      "grad_norm": 8.334735870361328,
      "learning_rate": 9.806408676686678e-06,
      "loss": 0.0883,
      "step": 78760
    },
    {
      "epoch": 0.12892519785550166,
      "grad_norm": 2.4766852855682373,
      "learning_rate": 9.80634278447316e-06,
      "loss": 0.0881,
      "step": 78780
    },
    {
      "epoch": 0.128957928294155,
      "grad_norm": 4.008256912231445,
      "learning_rate": 9.806276892259644e-06,
      "loss": 0.0902,
      "step": 78800
    },
    {
      "epoch": 0.12899065873280835,
      "grad_norm": 4.777058124542236,
      "learning_rate": 9.806211000046126e-06,
      "loss": 0.0799,
      "step": 78820
    },
    {
      "epoch": 0.12902338917146167,
      "grad_norm": 2.3218493461608887,
      "learning_rate": 9.806145107832609e-06,
      "loss": 0.056,
      "step": 78840
    },
    {
      "epoch": 0.129056119610115,
      "grad_norm": 2.1412007808685303,
      "learning_rate": 9.806079215619091e-06,
      "loss": 0.0612,
      "step": 78860
    },
    {
      "epoch": 0.12908885004876836,
      "grad_norm": 3.21097993850708,
      "learning_rate": 9.806013323405575e-06,
      "loss": 0.0739,
      "step": 78880
    },
    {
      "epoch": 0.1291215804874217,
      "grad_norm": 1.141042947769165,
      "learning_rate": 9.805947431192056e-06,
      "loss": 0.0735,
      "step": 78900
    },
    {
      "epoch": 0.12915431092607504,
      "grad_norm": 5.205163955688477,
      "learning_rate": 9.80588153897854e-06,
      "loss": 0.0889,
      "step": 78920
    },
    {
      "epoch": 0.12918704136472836,
      "grad_norm": 0.7801201343536377,
      "learning_rate": 9.805815646765022e-06,
      "loss": 0.0725,
      "step": 78940
    },
    {
      "epoch": 0.1292197718033817,
      "grad_norm": 2.287327766418457,
      "learning_rate": 9.805749754551506e-06,
      "loss": 0.0712,
      "step": 78960
    },
    {
      "epoch": 0.12925250224203505,
      "grad_norm": 4.263204574584961,
      "learning_rate": 9.805683862337987e-06,
      "loss": 0.077,
      "step": 78980
    },
    {
      "epoch": 0.1292852326806884,
      "grad_norm": 1.124347448348999,
      "learning_rate": 9.805617970124471e-06,
      "loss": 0.097,
      "step": 79000
    },
    {
      "epoch": 0.12931796311934174,
      "grad_norm": 7.047555446624756,
      "learning_rate": 9.805552077910953e-06,
      "loss": 0.0597,
      "step": 79020
    },
    {
      "epoch": 0.12935069355799506,
      "grad_norm": 4.893148899078369,
      "learning_rate": 9.805486185697436e-06,
      "loss": 0.0835,
      "step": 79040
    },
    {
      "epoch": 0.1293834239966484,
      "grad_norm": 6.170867919921875,
      "learning_rate": 9.80542029348392e-06,
      "loss": 0.0637,
      "step": 79060
    },
    {
      "epoch": 0.12941615443530174,
      "grad_norm": 4.390518665313721,
      "learning_rate": 9.805354401270402e-06,
      "loss": 0.0594,
      "step": 79080
    },
    {
      "epoch": 0.1294488848739551,
      "grad_norm": 7.714954853057861,
      "learning_rate": 9.805288509056886e-06,
      "loss": 0.0684,
      "step": 79100
    },
    {
      "epoch": 0.1294816153126084,
      "grad_norm": 1.430040955543518,
      "learning_rate": 9.805222616843369e-06,
      "loss": 0.0775,
      "step": 79120
    },
    {
      "epoch": 0.12951434575126175,
      "grad_norm": 3.938488006591797,
      "learning_rate": 9.805156724629851e-06,
      "loss": 0.073,
      "step": 79140
    },
    {
      "epoch": 0.1295470761899151,
      "grad_norm": 1.2569730281829834,
      "learning_rate": 9.805090832416335e-06,
      "loss": 0.0732,
      "step": 79160
    },
    {
      "epoch": 0.12957980662856844,
      "grad_norm": 1.6269670724868774,
      "learning_rate": 9.805024940202818e-06,
      "loss": 0.0903,
      "step": 79180
    },
    {
      "epoch": 0.12961253706722178,
      "grad_norm": 1.9458988904953003,
      "learning_rate": 9.8049590479893e-06,
      "loss": 0.0775,
      "step": 79200
    },
    {
      "epoch": 0.1296452675058751,
      "grad_norm": 2.1559135913848877,
      "learning_rate": 9.804893155775784e-06,
      "loss": 0.0662,
      "step": 79220
    },
    {
      "epoch": 0.12967799794452844,
      "grad_norm": 1.8759160041809082,
      "learning_rate": 9.804827263562266e-06,
      "loss": 0.1139,
      "step": 79240
    },
    {
      "epoch": 0.1297107283831818,
      "grad_norm": 2.9591894149780273,
      "learning_rate": 9.80476137134875e-06,
      "loss": 0.0667,
      "step": 79260
    },
    {
      "epoch": 0.12974345882183513,
      "grad_norm": 3.0678465366363525,
      "learning_rate": 9.804695479135231e-06,
      "loss": 0.0814,
      "step": 79280
    },
    {
      "epoch": 0.12977618926048848,
      "grad_norm": 4.128026485443115,
      "learning_rate": 9.804629586921715e-06,
      "loss": 0.0728,
      "step": 79300
    },
    {
      "epoch": 0.1298089196991418,
      "grad_norm": 2.5136020183563232,
      "learning_rate": 9.804563694708197e-06,
      "loss": 0.0643,
      "step": 79320
    },
    {
      "epoch": 0.12984165013779514,
      "grad_norm": 2.2219247817993164,
      "learning_rate": 9.80449780249468e-06,
      "loss": 0.0623,
      "step": 79340
    },
    {
      "epoch": 0.12987438057644848,
      "grad_norm": 2.2764601707458496,
      "learning_rate": 9.804431910281162e-06,
      "loss": 0.0612,
      "step": 79360
    },
    {
      "epoch": 0.12990711101510183,
      "grad_norm": 3.1889095306396484,
      "learning_rate": 9.804366018067646e-06,
      "loss": 0.0877,
      "step": 79380
    },
    {
      "epoch": 0.12993984145375517,
      "grad_norm": 2.782799243927002,
      "learning_rate": 9.804300125854127e-06,
      "loss": 0.0635,
      "step": 79400
    },
    {
      "epoch": 0.1299725718924085,
      "grad_norm": 3.9806787967681885,
      "learning_rate": 9.804234233640611e-06,
      "loss": 0.0768,
      "step": 79420
    },
    {
      "epoch": 0.13000530233106183,
      "grad_norm": 1.7838724851608276,
      "learning_rate": 9.804168341427095e-06,
      "loss": 0.0643,
      "step": 79440
    },
    {
      "epoch": 0.13003803276971518,
      "grad_norm": 2.765558958053589,
      "learning_rate": 9.804102449213577e-06,
      "loss": 0.0714,
      "step": 79460
    },
    {
      "epoch": 0.13007076320836852,
      "grad_norm": 0.7547448873519897,
      "learning_rate": 9.80403655700006e-06,
      "loss": 0.0668,
      "step": 79480
    },
    {
      "epoch": 0.13010349364702187,
      "grad_norm": 16.48428726196289,
      "learning_rate": 9.803970664786544e-06,
      "loss": 0.0872,
      "step": 79500
    },
    {
      "epoch": 0.13013622408567518,
      "grad_norm": 0.9892758727073669,
      "learning_rate": 9.803904772573026e-06,
      "loss": 0.0741,
      "step": 79520
    },
    {
      "epoch": 0.13016895452432853,
      "grad_norm": 5.390220642089844,
      "learning_rate": 9.80383888035951e-06,
      "loss": 0.0969,
      "step": 79540
    },
    {
      "epoch": 0.13020168496298187,
      "grad_norm": 3.733541250228882,
      "learning_rate": 9.803772988145993e-06,
      "loss": 0.0689,
      "step": 79560
    },
    {
      "epoch": 0.13023441540163522,
      "grad_norm": 2.6960930824279785,
      "learning_rate": 9.803707095932475e-06,
      "loss": 0.0758,
      "step": 79580
    },
    {
      "epoch": 0.13026714584028856,
      "grad_norm": 2.5854673385620117,
      "learning_rate": 9.803641203718958e-06,
      "loss": 0.092,
      "step": 79600
    },
    {
      "epoch": 0.13029987627894188,
      "grad_norm": 2.38498592376709,
      "learning_rate": 9.80357531150544e-06,
      "loss": 0.0812,
      "step": 79620
    },
    {
      "epoch": 0.13033260671759522,
      "grad_norm": 1.980554223060608,
      "learning_rate": 9.803509419291924e-06,
      "loss": 0.0748,
      "step": 79640
    },
    {
      "epoch": 0.13036533715624857,
      "grad_norm": 5.726816177368164,
      "learning_rate": 9.803443527078406e-06,
      "loss": 0.0784,
      "step": 79660
    },
    {
      "epoch": 0.1303980675949019,
      "grad_norm": 1.7353547811508179,
      "learning_rate": 9.80337763486489e-06,
      "loss": 0.0999,
      "step": 79680
    },
    {
      "epoch": 0.13043079803355526,
      "grad_norm": 3.4971694946289062,
      "learning_rate": 9.803311742651371e-06,
      "loss": 0.0768,
      "step": 79700
    },
    {
      "epoch": 0.13046352847220857,
      "grad_norm": 4.003891468048096,
      "learning_rate": 9.803245850437855e-06,
      "loss": 0.0725,
      "step": 79720
    },
    {
      "epoch": 0.13049625891086192,
      "grad_norm": 5.366262912750244,
      "learning_rate": 9.803179958224337e-06,
      "loss": 0.0794,
      "step": 79740
    },
    {
      "epoch": 0.13052898934951526,
      "grad_norm": 3.4820873737335205,
      "learning_rate": 9.80311406601082e-06,
      "loss": 0.0799,
      "step": 79760
    },
    {
      "epoch": 0.1305617197881686,
      "grad_norm": 4.019879341125488,
      "learning_rate": 9.803048173797302e-06,
      "loss": 0.0781,
      "step": 79780
    },
    {
      "epoch": 0.13059445022682195,
      "grad_norm": 3.6320414543151855,
      "learning_rate": 9.802982281583786e-06,
      "loss": 0.0674,
      "step": 79800
    },
    {
      "epoch": 0.13062718066547527,
      "grad_norm": 30.770519256591797,
      "learning_rate": 9.80291638937027e-06,
      "loss": 0.0728,
      "step": 79820
    },
    {
      "epoch": 0.1306599111041286,
      "grad_norm": 2.438926935195923,
      "learning_rate": 9.802850497156751e-06,
      "loss": 0.0724,
      "step": 79840
    },
    {
      "epoch": 0.13069264154278196,
      "grad_norm": 5.2828593254089355,
      "learning_rate": 9.802784604943235e-06,
      "loss": 0.0921,
      "step": 79860
    },
    {
      "epoch": 0.1307253719814353,
      "grad_norm": 2.478186845779419,
      "learning_rate": 9.802718712729717e-06,
      "loss": 0.067,
      "step": 79880
    },
    {
      "epoch": 0.13075810242008865,
      "grad_norm": 3.409841775894165,
      "learning_rate": 9.8026528205162e-06,
      "loss": 0.0819,
      "step": 79900
    },
    {
      "epoch": 0.13079083285874196,
      "grad_norm": 2.1830949783325195,
      "learning_rate": 9.802586928302684e-06,
      "loss": 0.0693,
      "step": 79920
    },
    {
      "epoch": 0.1308235632973953,
      "grad_norm": 2.118906021118164,
      "learning_rate": 9.802521036089166e-06,
      "loss": 0.0619,
      "step": 79940
    },
    {
      "epoch": 0.13085629373604865,
      "grad_norm": 4.386897563934326,
      "learning_rate": 9.80245514387565e-06,
      "loss": 0.0838,
      "step": 79960
    },
    {
      "epoch": 0.130889024174702,
      "grad_norm": 3.2284626960754395,
      "learning_rate": 9.802389251662133e-06,
      "loss": 0.0918,
      "step": 79980
    },
    {
      "epoch": 0.13092175461335534,
      "grad_norm": 2.9080159664154053,
      "learning_rate": 9.802323359448615e-06,
      "loss": 0.0705,
      "step": 80000
    },
    {
      "epoch": 0.13095448505200866,
      "grad_norm": 2.051379680633545,
      "learning_rate": 9.802257467235098e-06,
      "loss": 0.0676,
      "step": 80020
    },
    {
      "epoch": 0.130987215490662,
      "grad_norm": 1.8562167882919312,
      "learning_rate": 9.80219157502158e-06,
      "loss": 0.0703,
      "step": 80040
    },
    {
      "epoch": 0.13101994592931535,
      "grad_norm": 1.9618616104125977,
      "learning_rate": 9.802125682808064e-06,
      "loss": 0.0691,
      "step": 80060
    },
    {
      "epoch": 0.1310526763679687,
      "grad_norm": 4.353242874145508,
      "learning_rate": 9.802059790594546e-06,
      "loss": 0.0846,
      "step": 80080
    },
    {
      "epoch": 0.13108540680662203,
      "grad_norm": 1.1479175090789795,
      "learning_rate": 9.80199389838103e-06,
      "loss": 0.0889,
      "step": 80100
    },
    {
      "epoch": 0.13111813724527535,
      "grad_norm": 3.230323553085327,
      "learning_rate": 9.801928006167511e-06,
      "loss": 0.0683,
      "step": 80120
    },
    {
      "epoch": 0.1311508676839287,
      "grad_norm": 5.439493656158447,
      "learning_rate": 9.801862113953995e-06,
      "loss": 0.0863,
      "step": 80140
    },
    {
      "epoch": 0.13118359812258204,
      "grad_norm": 17.938844680786133,
      "learning_rate": 9.801796221740477e-06,
      "loss": 0.0783,
      "step": 80160
    },
    {
      "epoch": 0.13121632856123538,
      "grad_norm": 3.080965280532837,
      "learning_rate": 9.80173032952696e-06,
      "loss": 0.0845,
      "step": 80180
    },
    {
      "epoch": 0.13124905899988873,
      "grad_norm": 3.766421318054199,
      "learning_rate": 9.801664437313444e-06,
      "loss": 0.0771,
      "step": 80200
    },
    {
      "epoch": 0.13128178943854205,
      "grad_norm": 2.861265182495117,
      "learning_rate": 9.801598545099926e-06,
      "loss": 0.0828,
      "step": 80220
    },
    {
      "epoch": 0.1313145198771954,
      "grad_norm": 3.6081645488739014,
      "learning_rate": 9.80153265288641e-06,
      "loss": 0.0642,
      "step": 80240
    },
    {
      "epoch": 0.13134725031584873,
      "grad_norm": 3.8656349182128906,
      "learning_rate": 9.801466760672891e-06,
      "loss": 0.0739,
      "step": 80260
    },
    {
      "epoch": 0.13137998075450208,
      "grad_norm": 1.7311806678771973,
      "learning_rate": 9.801400868459375e-06,
      "loss": 0.0825,
      "step": 80280
    },
    {
      "epoch": 0.13141271119315542,
      "grad_norm": 1.576305627822876,
      "learning_rate": 9.801334976245858e-06,
      "loss": 0.0889,
      "step": 80300
    },
    {
      "epoch": 0.13144544163180874,
      "grad_norm": 1.1899049282073975,
      "learning_rate": 9.80126908403234e-06,
      "loss": 0.073,
      "step": 80320
    },
    {
      "epoch": 0.13147817207046208,
      "grad_norm": 5.438571453094482,
      "learning_rate": 9.801203191818824e-06,
      "loss": 0.0665,
      "step": 80340
    },
    {
      "epoch": 0.13151090250911543,
      "grad_norm": 6.257987976074219,
      "learning_rate": 9.801137299605307e-06,
      "loss": 0.103,
      "step": 80360
    },
    {
      "epoch": 0.13154363294776877,
      "grad_norm": 6.847076416015625,
      "learning_rate": 9.80107140739179e-06,
      "loss": 0.0729,
      "step": 80380
    },
    {
      "epoch": 0.13157636338642212,
      "grad_norm": 2.9682343006134033,
      "learning_rate": 9.801005515178273e-06,
      "loss": 0.0825,
      "step": 80400
    },
    {
      "epoch": 0.13160909382507543,
      "grad_norm": 4.300792217254639,
      "learning_rate": 9.800939622964755e-06,
      "loss": 0.0861,
      "step": 80420
    },
    {
      "epoch": 0.13164182426372878,
      "grad_norm": 4.327638149261475,
      "learning_rate": 9.800873730751238e-06,
      "loss": 0.0686,
      "step": 80440
    },
    {
      "epoch": 0.13167455470238212,
      "grad_norm": 16.24269676208496,
      "learning_rate": 9.80080783853772e-06,
      "loss": 0.0643,
      "step": 80460
    },
    {
      "epoch": 0.13170728514103547,
      "grad_norm": 2.711986780166626,
      "learning_rate": 9.800741946324204e-06,
      "loss": 0.0841,
      "step": 80480
    },
    {
      "epoch": 0.1317400155796888,
      "grad_norm": 14.193299293518066,
      "learning_rate": 9.800676054110686e-06,
      "loss": 0.0809,
      "step": 80500
    },
    {
      "epoch": 0.13177274601834213,
      "grad_norm": 1.3721270561218262,
      "learning_rate": 9.80061016189717e-06,
      "loss": 0.0667,
      "step": 80520
    },
    {
      "epoch": 0.13180547645699547,
      "grad_norm": 4.0855278968811035,
      "learning_rate": 9.800544269683653e-06,
      "loss": 0.072,
      "step": 80540
    },
    {
      "epoch": 0.13183820689564882,
      "grad_norm": 2.224726676940918,
      "learning_rate": 9.800478377470135e-06,
      "loss": 0.0911,
      "step": 80560
    },
    {
      "epoch": 0.13187093733430216,
      "grad_norm": 1.3167110681533813,
      "learning_rate": 9.800412485256618e-06,
      "loss": 0.0632,
      "step": 80580
    },
    {
      "epoch": 0.13190366777295548,
      "grad_norm": 1.490993857383728,
      "learning_rate": 9.8003465930431e-06,
      "loss": 0.0804,
      "step": 80600
    },
    {
      "epoch": 0.13193639821160882,
      "grad_norm": 2.3352928161621094,
      "learning_rate": 9.800280700829584e-06,
      "loss": 0.0846,
      "step": 80620
    },
    {
      "epoch": 0.13196912865026217,
      "grad_norm": 3.1344733238220215,
      "learning_rate": 9.800214808616066e-06,
      "loss": 0.0786,
      "step": 80640
    },
    {
      "epoch": 0.1320018590889155,
      "grad_norm": 5.147523403167725,
      "learning_rate": 9.80014891640255e-06,
      "loss": 0.0758,
      "step": 80660
    },
    {
      "epoch": 0.13203458952756886,
      "grad_norm": 2.331777572631836,
      "learning_rate": 9.800083024189031e-06,
      "loss": 0.0843,
      "step": 80680
    },
    {
      "epoch": 0.13206731996622217,
      "grad_norm": 2.512706995010376,
      "learning_rate": 9.800017131975515e-06,
      "loss": 0.0658,
      "step": 80700
    },
    {
      "epoch": 0.13210005040487552,
      "grad_norm": 5.774646282196045,
      "learning_rate": 9.799951239761998e-06,
      "loss": 0.0807,
      "step": 80720
    },
    {
      "epoch": 0.13213278084352886,
      "grad_norm": 3.3270933628082275,
      "learning_rate": 9.79988534754848e-06,
      "loss": 0.0884,
      "step": 80740
    },
    {
      "epoch": 0.1321655112821822,
      "grad_norm": 11.503588676452637,
      "learning_rate": 9.799819455334964e-06,
      "loss": 0.0587,
      "step": 80760
    },
    {
      "epoch": 0.13219824172083555,
      "grad_norm": 4.373481750488281,
      "learning_rate": 9.799753563121448e-06,
      "loss": 0.0738,
      "step": 80780
    },
    {
      "epoch": 0.13223097215948887,
      "grad_norm": 1.570777177810669,
      "learning_rate": 9.79968767090793e-06,
      "loss": 0.0748,
      "step": 80800
    },
    {
      "epoch": 0.1322637025981422,
      "grad_norm": 4.11759090423584,
      "learning_rate": 9.799621778694413e-06,
      "loss": 0.09,
      "step": 80820
    },
    {
      "epoch": 0.13229643303679556,
      "grad_norm": 3.1123697757720947,
      "learning_rate": 9.799555886480895e-06,
      "loss": 0.0838,
      "step": 80840
    },
    {
      "epoch": 0.1323291634754489,
      "grad_norm": 3.4192914962768555,
      "learning_rate": 9.799489994267379e-06,
      "loss": 0.0718,
      "step": 80860
    },
    {
      "epoch": 0.13236189391410225,
      "grad_norm": 2.093188762664795,
      "learning_rate": 9.799424102053862e-06,
      "loss": 0.0847,
      "step": 80880
    },
    {
      "epoch": 0.13239462435275556,
      "grad_norm": 1.2125478982925415,
      "learning_rate": 9.799358209840344e-06,
      "loss": 0.0809,
      "step": 80900
    },
    {
      "epoch": 0.1324273547914089,
      "grad_norm": 3.6561007499694824,
      "learning_rate": 9.799292317626828e-06,
      "loss": 0.074,
      "step": 80920
    },
    {
      "epoch": 0.13246008523006225,
      "grad_norm": 1.911837100982666,
      "learning_rate": 9.79922642541331e-06,
      "loss": 0.0887,
      "step": 80940
    },
    {
      "epoch": 0.1324928156687156,
      "grad_norm": 9.753917694091797,
      "learning_rate": 9.799160533199793e-06,
      "loss": 0.0912,
      "step": 80960
    },
    {
      "epoch": 0.13252554610736894,
      "grad_norm": 3.9718780517578125,
      "learning_rate": 9.799094640986275e-06,
      "loss": 0.0675,
      "step": 80980
    },
    {
      "epoch": 0.13255827654602226,
      "grad_norm": 3.128237247467041,
      "learning_rate": 9.799028748772759e-06,
      "loss": 0.0735,
      "step": 81000
    },
    {
      "epoch": 0.1325910069846756,
      "grad_norm": 4.992683410644531,
      "learning_rate": 9.79896285655924e-06,
      "loss": 0.0592,
      "step": 81020
    },
    {
      "epoch": 0.13262373742332895,
      "grad_norm": 3.6600418090820312,
      "learning_rate": 9.798896964345724e-06,
      "loss": 0.0764,
      "step": 81040
    },
    {
      "epoch": 0.1326564678619823,
      "grad_norm": 3.7018253803253174,
      "learning_rate": 9.798831072132206e-06,
      "loss": 0.0759,
      "step": 81060
    },
    {
      "epoch": 0.13268919830063564,
      "grad_norm": 2.054248809814453,
      "learning_rate": 9.79876517991869e-06,
      "loss": 0.0743,
      "step": 81080
    },
    {
      "epoch": 0.13272192873928895,
      "grad_norm": 2.063185214996338,
      "learning_rate": 9.798699287705173e-06,
      "loss": 0.0781,
      "step": 81100
    },
    {
      "epoch": 0.1327546591779423,
      "grad_norm": 2.449514865875244,
      "learning_rate": 9.798633395491655e-06,
      "loss": 0.0698,
      "step": 81120
    },
    {
      "epoch": 0.13278738961659564,
      "grad_norm": 2.7986083030700684,
      "learning_rate": 9.798567503278139e-06,
      "loss": 0.0771,
      "step": 81140
    },
    {
      "epoch": 0.13282012005524899,
      "grad_norm": 7.349058628082275,
      "learning_rate": 9.798501611064622e-06,
      "loss": 0.0642,
      "step": 81160
    },
    {
      "epoch": 0.13285285049390233,
      "grad_norm": 3.3609278202056885,
      "learning_rate": 9.798435718851104e-06,
      "loss": 0.074,
      "step": 81180
    },
    {
      "epoch": 0.13288558093255565,
      "grad_norm": 2.8739075660705566,
      "learning_rate": 9.798369826637588e-06,
      "loss": 0.0907,
      "step": 81200
    },
    {
      "epoch": 0.132918311371209,
      "grad_norm": 1.2841960191726685,
      "learning_rate": 9.798303934424071e-06,
      "loss": 0.0676,
      "step": 81220
    },
    {
      "epoch": 0.13295104180986234,
      "grad_norm": 3.7081754207611084,
      "learning_rate": 9.798238042210553e-06,
      "loss": 0.068,
      "step": 81240
    },
    {
      "epoch": 0.13298377224851568,
      "grad_norm": 3.7938899993896484,
      "learning_rate": 9.798172149997037e-06,
      "loss": 0.0566,
      "step": 81260
    },
    {
      "epoch": 0.13301650268716902,
      "grad_norm": 1.937086582183838,
      "learning_rate": 9.798106257783519e-06,
      "loss": 0.0628,
      "step": 81280
    },
    {
      "epoch": 0.13304923312582234,
      "grad_norm": 2.8217828273773193,
      "learning_rate": 9.798040365570002e-06,
      "loss": 0.0667,
      "step": 81300
    },
    {
      "epoch": 0.13308196356447569,
      "grad_norm": 3.1543936729431152,
      "learning_rate": 9.797974473356484e-06,
      "loss": 0.0792,
      "step": 81320
    },
    {
      "epoch": 0.13311469400312903,
      "grad_norm": 4.0449934005737305,
      "learning_rate": 9.797908581142968e-06,
      "loss": 0.0938,
      "step": 81340
    },
    {
      "epoch": 0.13314742444178237,
      "grad_norm": 3.1677958965301514,
      "learning_rate": 9.79784268892945e-06,
      "loss": 0.0547,
      "step": 81360
    },
    {
      "epoch": 0.13318015488043572,
      "grad_norm": 3.825688362121582,
      "learning_rate": 9.797776796715933e-06,
      "loss": 0.0885,
      "step": 81380
    },
    {
      "epoch": 0.13321288531908904,
      "grad_norm": 2.6532516479492188,
      "learning_rate": 9.797710904502415e-06,
      "loss": 0.0697,
      "step": 81400
    },
    {
      "epoch": 0.13324561575774238,
      "grad_norm": 2.8233041763305664,
      "learning_rate": 9.797645012288899e-06,
      "loss": 0.0668,
      "step": 81420
    },
    {
      "epoch": 0.13327834619639572,
      "grad_norm": 3.452770709991455,
      "learning_rate": 9.79757912007538e-06,
      "loss": 0.075,
      "step": 81440
    },
    {
      "epoch": 0.13331107663504907,
      "grad_norm": 1.203415870666504,
      "learning_rate": 9.797513227861864e-06,
      "loss": 0.0621,
      "step": 81460
    },
    {
      "epoch": 0.13334380707370241,
      "grad_norm": 5.158736705780029,
      "learning_rate": 9.797447335648346e-06,
      "loss": 0.078,
      "step": 81480
    },
    {
      "epoch": 0.13337653751235573,
      "grad_norm": 1.590624213218689,
      "learning_rate": 9.79738144343483e-06,
      "loss": 0.0796,
      "step": 81500
    },
    {
      "epoch": 0.13340926795100908,
      "grad_norm": 4.384251117706299,
      "learning_rate": 9.797315551221313e-06,
      "loss": 0.0881,
      "step": 81520
    },
    {
      "epoch": 0.13344199838966242,
      "grad_norm": 1.9274333715438843,
      "learning_rate": 9.797249659007797e-06,
      "loss": 0.0657,
      "step": 81540
    },
    {
      "epoch": 0.13347472882831576,
      "grad_norm": 1.9634463787078857,
      "learning_rate": 9.797183766794279e-06,
      "loss": 0.0802,
      "step": 81560
    },
    {
      "epoch": 0.1335074592669691,
      "grad_norm": 5.532021522521973,
      "learning_rate": 9.797117874580762e-06,
      "loss": 0.0876,
      "step": 81580
    },
    {
      "epoch": 0.13354018970562243,
      "grad_norm": 3.966283082962036,
      "learning_rate": 9.797051982367246e-06,
      "loss": 0.0792,
      "step": 81600
    },
    {
      "epoch": 0.13357292014427577,
      "grad_norm": 5.735515594482422,
      "learning_rate": 9.796986090153728e-06,
      "loss": 0.0873,
      "step": 81620
    },
    {
      "epoch": 0.13360565058292911,
      "grad_norm": 8.942483901977539,
      "learning_rate": 9.796920197940211e-06,
      "loss": 0.0709,
      "step": 81640
    },
    {
      "epoch": 0.13363838102158246,
      "grad_norm": 1.0429900884628296,
      "learning_rate": 9.796854305726693e-06,
      "loss": 0.0627,
      "step": 81660
    },
    {
      "epoch": 0.1336711114602358,
      "grad_norm": 3.2602782249450684,
      "learning_rate": 9.796788413513177e-06,
      "loss": 0.0807,
      "step": 81680
    },
    {
      "epoch": 0.13370384189888912,
      "grad_norm": 3.531526565551758,
      "learning_rate": 9.796722521299659e-06,
      "loss": 0.0827,
      "step": 81700
    },
    {
      "epoch": 0.13373657233754246,
      "grad_norm": 3.4574413299560547,
      "learning_rate": 9.796656629086142e-06,
      "loss": 0.0696,
      "step": 81720
    },
    {
      "epoch": 0.1337693027761958,
      "grad_norm": 1.7553561925888062,
      "learning_rate": 9.796590736872624e-06,
      "loss": 0.0785,
      "step": 81740
    },
    {
      "epoch": 0.13380203321484915,
      "grad_norm": 3.255958318710327,
      "learning_rate": 9.796524844659108e-06,
      "loss": 0.0767,
      "step": 81760
    },
    {
      "epoch": 0.1338347636535025,
      "grad_norm": 1.9759807586669922,
      "learning_rate": 9.79645895244559e-06,
      "loss": 0.0722,
      "step": 81780
    },
    {
      "epoch": 0.13386749409215581,
      "grad_norm": 4.535257339477539,
      "learning_rate": 9.796393060232073e-06,
      "loss": 0.0945,
      "step": 81800
    },
    {
      "epoch": 0.13390022453080916,
      "grad_norm": 23.909809112548828,
      "learning_rate": 9.796327168018555e-06,
      "loss": 0.0748,
      "step": 81820
    },
    {
      "epoch": 0.1339329549694625,
      "grad_norm": 3.723113775253296,
      "learning_rate": 9.796261275805039e-06,
      "loss": 0.0788,
      "step": 81840
    },
    {
      "epoch": 0.13396568540811585,
      "grad_norm": 6.765805244445801,
      "learning_rate": 9.79619538359152e-06,
      "loss": 0.0808,
      "step": 81860
    },
    {
      "epoch": 0.1339984158467692,
      "grad_norm": 0.36372095346450806,
      "learning_rate": 9.796129491378004e-06,
      "loss": 0.0774,
      "step": 81880
    },
    {
      "epoch": 0.1340311462854225,
      "grad_norm": 2.1765894889831543,
      "learning_rate": 9.796063599164488e-06,
      "loss": 0.0644,
      "step": 81900
    },
    {
      "epoch": 0.13406387672407585,
      "grad_norm": 3.4632728099823,
      "learning_rate": 9.79599770695097e-06,
      "loss": 0.0777,
      "step": 81920
    },
    {
      "epoch": 0.1340966071627292,
      "grad_norm": 2.4160690307617188,
      "learning_rate": 9.795931814737453e-06,
      "loss": 0.0524,
      "step": 81940
    },
    {
      "epoch": 0.13412933760138254,
      "grad_norm": 3.052614688873291,
      "learning_rate": 9.795865922523937e-06,
      "loss": 0.0764,
      "step": 81960
    },
    {
      "epoch": 0.13416206804003586,
      "grad_norm": 4.518344879150391,
      "learning_rate": 9.795800030310419e-06,
      "loss": 0.0708,
      "step": 81980
    },
    {
      "epoch": 0.1341947984786892,
      "grad_norm": 5.052082061767578,
      "learning_rate": 9.795734138096902e-06,
      "loss": 0.0864,
      "step": 82000
    },
    {
      "epoch": 0.13422752891734255,
      "grad_norm": 2.4010143280029297,
      "learning_rate": 9.795668245883386e-06,
      "loss": 0.0776,
      "step": 82020
    },
    {
      "epoch": 0.1342602593559959,
      "grad_norm": 1.2116419076919556,
      "learning_rate": 9.795602353669868e-06,
      "loss": 0.0891,
      "step": 82040
    },
    {
      "epoch": 0.13429298979464924,
      "grad_norm": 2.761164903640747,
      "learning_rate": 9.795536461456351e-06,
      "loss": 0.0839,
      "step": 82060
    },
    {
      "epoch": 0.13432572023330255,
      "grad_norm": 2.4092421531677246,
      "learning_rate": 9.795470569242833e-06,
      "loss": 0.0901,
      "step": 82080
    },
    {
      "epoch": 0.1343584506719559,
      "grad_norm": 2.858309745788574,
      "learning_rate": 9.795404677029317e-06,
      "loss": 0.0644,
      "step": 82100
    },
    {
      "epoch": 0.13439118111060924,
      "grad_norm": 3.850877285003662,
      "learning_rate": 9.795338784815799e-06,
      "loss": 0.0717,
      "step": 82120
    },
    {
      "epoch": 0.1344239115492626,
      "grad_norm": 2.981917142868042,
      "learning_rate": 9.795272892602282e-06,
      "loss": 0.0602,
      "step": 82140
    },
    {
      "epoch": 0.13445664198791593,
      "grad_norm": 2.431025266647339,
      "learning_rate": 9.795207000388764e-06,
      "loss": 0.0702,
      "step": 82160
    },
    {
      "epoch": 0.13448937242656925,
      "grad_norm": 3.211151361465454,
      "learning_rate": 9.795141108175248e-06,
      "loss": 0.0859,
      "step": 82180
    },
    {
      "epoch": 0.1345221028652226,
      "grad_norm": 1.7925901412963867,
      "learning_rate": 9.79507521596173e-06,
      "loss": 0.0864,
      "step": 82200
    },
    {
      "epoch": 0.13455483330387594,
      "grad_norm": 2.5261106491088867,
      "learning_rate": 9.795009323748213e-06,
      "loss": 0.0687,
      "step": 82220
    },
    {
      "epoch": 0.13458756374252928,
      "grad_norm": 1.2373346090316772,
      "learning_rate": 9.794943431534695e-06,
      "loss": 0.0726,
      "step": 82240
    },
    {
      "epoch": 0.13462029418118263,
      "grad_norm": 3.300222873687744,
      "learning_rate": 9.794877539321179e-06,
      "loss": 0.0905,
      "step": 82260
    },
    {
      "epoch": 0.13465302461983594,
      "grad_norm": 6.1856160163879395,
      "learning_rate": 9.794811647107662e-06,
      "loss": 0.0675,
      "step": 82280
    },
    {
      "epoch": 0.1346857550584893,
      "grad_norm": 13.746515274047852,
      "learning_rate": 9.794745754894144e-06,
      "loss": 0.0726,
      "step": 82300
    },
    {
      "epoch": 0.13471848549714263,
      "grad_norm": 3.570760488510132,
      "learning_rate": 9.794679862680628e-06,
      "loss": 0.0864,
      "step": 82320
    },
    {
      "epoch": 0.13475121593579598,
      "grad_norm": 4.145529270172119,
      "learning_rate": 9.794613970467111e-06,
      "loss": 0.0905,
      "step": 82340
    },
    {
      "epoch": 0.13478394637444932,
      "grad_norm": 3.334855794906616,
      "learning_rate": 9.794548078253593e-06,
      "loss": 0.0684,
      "step": 82360
    },
    {
      "epoch": 0.13481667681310264,
      "grad_norm": 5.610708713531494,
      "learning_rate": 9.794482186040077e-06,
      "loss": 0.0671,
      "step": 82380
    },
    {
      "epoch": 0.13484940725175598,
      "grad_norm": 2.3899545669555664,
      "learning_rate": 9.79441629382656e-06,
      "loss": 0.0608,
      "step": 82400
    },
    {
      "epoch": 0.13488213769040933,
      "grad_norm": 0.9091928005218506,
      "learning_rate": 9.794350401613042e-06,
      "loss": 0.0598,
      "step": 82420
    },
    {
      "epoch": 0.13491486812906267,
      "grad_norm": 6.019809722900391,
      "learning_rate": 9.794284509399526e-06,
      "loss": 0.0798,
      "step": 82440
    },
    {
      "epoch": 0.13494759856771602,
      "grad_norm": 6.593603134155273,
      "learning_rate": 9.794218617186008e-06,
      "loss": 0.0705,
      "step": 82460
    },
    {
      "epoch": 0.13498032900636933,
      "grad_norm": 2.8218395709991455,
      "learning_rate": 9.794152724972491e-06,
      "loss": 0.0995,
      "step": 82480
    },
    {
      "epoch": 0.13501305944502268,
      "grad_norm": 3.5311849117279053,
      "learning_rate": 9.794086832758973e-06,
      "loss": 0.0858,
      "step": 82500
    },
    {
      "epoch": 0.13504578988367602,
      "grad_norm": 1.6905936002731323,
      "learning_rate": 9.794020940545457e-06,
      "loss": 0.0845,
      "step": 82520
    },
    {
      "epoch": 0.13507852032232937,
      "grad_norm": 3.8364598751068115,
      "learning_rate": 9.793955048331939e-06,
      "loss": 0.0773,
      "step": 82540
    },
    {
      "epoch": 0.1351112507609827,
      "grad_norm": 2.5532779693603516,
      "learning_rate": 9.793889156118422e-06,
      "loss": 0.063,
      "step": 82560
    },
    {
      "epoch": 0.13514398119963603,
      "grad_norm": 3.9436919689178467,
      "learning_rate": 9.793823263904904e-06,
      "loss": 0.0996,
      "step": 82580
    },
    {
      "epoch": 0.13517671163828937,
      "grad_norm": 1.724241018295288,
      "learning_rate": 9.793757371691388e-06,
      "loss": 0.0975,
      "step": 82600
    },
    {
      "epoch": 0.13520944207694272,
      "grad_norm": 1.0026471614837646,
      "learning_rate": 9.79369147947787e-06,
      "loss": 0.0848,
      "step": 82620
    },
    {
      "epoch": 0.13524217251559606,
      "grad_norm": 1.8919843435287476,
      "learning_rate": 9.793625587264353e-06,
      "loss": 0.067,
      "step": 82640
    },
    {
      "epoch": 0.1352749029542494,
      "grad_norm": 0.9536481499671936,
      "learning_rate": 9.793559695050837e-06,
      "loss": 0.0963,
      "step": 82660
    },
    {
      "epoch": 0.13530763339290272,
      "grad_norm": 4.062337398529053,
      "learning_rate": 9.793493802837319e-06,
      "loss": 0.0796,
      "step": 82680
    },
    {
      "epoch": 0.13534036383155607,
      "grad_norm": 2.565695285797119,
      "learning_rate": 9.793427910623802e-06,
      "loss": 0.0805,
      "step": 82700
    },
    {
      "epoch": 0.1353730942702094,
      "grad_norm": 1.9491727352142334,
      "learning_rate": 9.793362018410284e-06,
      "loss": 0.0744,
      "step": 82720
    },
    {
      "epoch": 0.13540582470886275,
      "grad_norm": 3.5848257541656494,
      "learning_rate": 9.793296126196768e-06,
      "loss": 0.0821,
      "step": 82740
    },
    {
      "epoch": 0.1354385551475161,
      "grad_norm": 4.27608585357666,
      "learning_rate": 9.793230233983251e-06,
      "loss": 0.079,
      "step": 82760
    },
    {
      "epoch": 0.13547128558616942,
      "grad_norm": 3.7379441261291504,
      "learning_rate": 9.793164341769733e-06,
      "loss": 0.0866,
      "step": 82780
    },
    {
      "epoch": 0.13550401602482276,
      "grad_norm": 38.3322639465332,
      "learning_rate": 9.793098449556217e-06,
      "loss": 0.0603,
      "step": 82800
    },
    {
      "epoch": 0.1355367464634761,
      "grad_norm": 2.8320257663726807,
      "learning_rate": 9.7930325573427e-06,
      "loss": 0.0722,
      "step": 82820
    },
    {
      "epoch": 0.13556947690212945,
      "grad_norm": 3.3240227699279785,
      "learning_rate": 9.792966665129182e-06,
      "loss": 0.0628,
      "step": 82840
    },
    {
      "epoch": 0.1356022073407828,
      "grad_norm": 0.691033661365509,
      "learning_rate": 9.792900772915666e-06,
      "loss": 0.0731,
      "step": 82860
    },
    {
      "epoch": 0.1356349377794361,
      "grad_norm": 3.312009572982788,
      "learning_rate": 9.792834880702148e-06,
      "loss": 0.0827,
      "step": 82880
    },
    {
      "epoch": 0.13566766821808945,
      "grad_norm": 14.13448715209961,
      "learning_rate": 9.792768988488632e-06,
      "loss": 0.0621,
      "step": 82900
    },
    {
      "epoch": 0.1357003986567428,
      "grad_norm": 2.760047197341919,
      "learning_rate": 9.792703096275113e-06,
      "loss": 0.0647,
      "step": 82920
    },
    {
      "epoch": 0.13573312909539614,
      "grad_norm": 1.9011327028274536,
      "learning_rate": 9.792637204061597e-06,
      "loss": 0.0866,
      "step": 82940
    },
    {
      "epoch": 0.1357658595340495,
      "grad_norm": 1.3630050420761108,
      "learning_rate": 9.792571311848079e-06,
      "loss": 0.0634,
      "step": 82960
    },
    {
      "epoch": 0.1357985899727028,
      "grad_norm": 4.253749847412109,
      "learning_rate": 9.792505419634562e-06,
      "loss": 0.0849,
      "step": 82980
    },
    {
      "epoch": 0.13583132041135615,
      "grad_norm": 4.392864227294922,
      "learning_rate": 9.792439527421046e-06,
      "loss": 0.0644,
      "step": 83000
    },
    {
      "epoch": 0.1358640508500095,
      "grad_norm": 5.75853157043457,
      "learning_rate": 9.792373635207528e-06,
      "loss": 0.0617,
      "step": 83020
    },
    {
      "epoch": 0.13589678128866284,
      "grad_norm": 3.605811595916748,
      "learning_rate": 9.792307742994012e-06,
      "loss": 0.078,
      "step": 83040
    },
    {
      "epoch": 0.13592951172731618,
      "grad_norm": 1.8188221454620361,
      "learning_rate": 9.792241850780493e-06,
      "loss": 0.0925,
      "step": 83060
    },
    {
      "epoch": 0.1359622421659695,
      "grad_norm": 6.943761825561523,
      "learning_rate": 9.792175958566977e-06,
      "loss": 0.0671,
      "step": 83080
    },
    {
      "epoch": 0.13599497260462284,
      "grad_norm": 1.6520813703536987,
      "learning_rate": 9.792110066353459e-06,
      "loss": 0.0808,
      "step": 83100
    },
    {
      "epoch": 0.1360277030432762,
      "grad_norm": 4.994662761688232,
      "learning_rate": 9.792044174139943e-06,
      "loss": 0.1062,
      "step": 83120
    },
    {
      "epoch": 0.13606043348192953,
      "grad_norm": 3.3168578147888184,
      "learning_rate": 9.791978281926426e-06,
      "loss": 0.0633,
      "step": 83140
    },
    {
      "epoch": 0.13609316392058288,
      "grad_norm": 2.3028156757354736,
      "learning_rate": 9.791912389712908e-06,
      "loss": 0.0747,
      "step": 83160
    },
    {
      "epoch": 0.1361258943592362,
      "grad_norm": 1.9359829425811768,
      "learning_rate": 9.791846497499392e-06,
      "loss": 0.0728,
      "step": 83180
    },
    {
      "epoch": 0.13615862479788954,
      "grad_norm": 10.036152839660645,
      "learning_rate": 9.791780605285875e-06,
      "loss": 0.0717,
      "step": 83200
    },
    {
      "epoch": 0.13619135523654288,
      "grad_norm": 5.507127285003662,
      "learning_rate": 9.791714713072357e-06,
      "loss": 0.0835,
      "step": 83220
    },
    {
      "epoch": 0.13622408567519623,
      "grad_norm": 5.411548614501953,
      "learning_rate": 9.79164882085884e-06,
      "loss": 0.0657,
      "step": 83240
    },
    {
      "epoch": 0.13625681611384957,
      "grad_norm": 1.88785719871521,
      "learning_rate": 9.791582928645323e-06,
      "loss": 0.0698,
      "step": 83260
    },
    {
      "epoch": 0.1362895465525029,
      "grad_norm": 2.352729082107544,
      "learning_rate": 9.791517036431806e-06,
      "loss": 0.0915,
      "step": 83280
    },
    {
      "epoch": 0.13632227699115623,
      "grad_norm": 1.0891375541687012,
      "learning_rate": 9.791451144218288e-06,
      "loss": 0.0823,
      "step": 83300
    },
    {
      "epoch": 0.13635500742980958,
      "grad_norm": 2.287008285522461,
      "learning_rate": 9.791385252004772e-06,
      "loss": 0.0718,
      "step": 83320
    },
    {
      "epoch": 0.13638773786846292,
      "grad_norm": 2.166619300842285,
      "learning_rate": 9.791319359791255e-06,
      "loss": 0.0756,
      "step": 83340
    },
    {
      "epoch": 0.13642046830711624,
      "grad_norm": 2.567322254180908,
      "learning_rate": 9.791253467577737e-06,
      "loss": 0.0805,
      "step": 83360
    },
    {
      "epoch": 0.13645319874576958,
      "grad_norm": 1.2791874408721924,
      "learning_rate": 9.79118757536422e-06,
      "loss": 0.0662,
      "step": 83380
    },
    {
      "epoch": 0.13648592918442293,
      "grad_norm": 2.332172393798828,
      "learning_rate": 9.791121683150703e-06,
      "loss": 0.0677,
      "step": 83400
    },
    {
      "epoch": 0.13651865962307627,
      "grad_norm": 3.806976079940796,
      "learning_rate": 9.791055790937186e-06,
      "loss": 0.0811,
      "step": 83420
    },
    {
      "epoch": 0.13655139006172962,
      "grad_norm": 1.6842046976089478,
      "learning_rate": 9.790989898723668e-06,
      "loss": 0.0949,
      "step": 83440
    },
    {
      "epoch": 0.13658412050038293,
      "grad_norm": 2.6355578899383545,
      "learning_rate": 9.790924006510152e-06,
      "loss": 0.0641,
      "step": 83460
    },
    {
      "epoch": 0.13661685093903628,
      "grad_norm": 4.269084453582764,
      "learning_rate": 9.790858114296634e-06,
      "loss": 0.0672,
      "step": 83480
    },
    {
      "epoch": 0.13664958137768962,
      "grad_norm": 2.827054023742676,
      "learning_rate": 9.790792222083117e-06,
      "loss": 0.0746,
      "step": 83500
    },
    {
      "epoch": 0.13668231181634297,
      "grad_norm": 0.7425021529197693,
      "learning_rate": 9.790726329869599e-06,
      "loss": 0.0861,
      "step": 83520
    },
    {
      "epoch": 0.1367150422549963,
      "grad_norm": 2.5122768878936768,
      "learning_rate": 9.790660437656083e-06,
      "loss": 0.0867,
      "step": 83540
    },
    {
      "epoch": 0.13674777269364963,
      "grad_norm": 2.766794443130493,
      "learning_rate": 9.790594545442566e-06,
      "loss": 0.0512,
      "step": 83560
    },
    {
      "epoch": 0.13678050313230297,
      "grad_norm": 3.3975515365600586,
      "learning_rate": 9.790528653229048e-06,
      "loss": 0.0771,
      "step": 83580
    },
    {
      "epoch": 0.13681323357095632,
      "grad_norm": 5.467060089111328,
      "learning_rate": 9.790462761015532e-06,
      "loss": 0.0807,
      "step": 83600
    },
    {
      "epoch": 0.13684596400960966,
      "grad_norm": 14.078001976013184,
      "learning_rate": 9.790396868802015e-06,
      "loss": 0.0774,
      "step": 83620
    },
    {
      "epoch": 0.136878694448263,
      "grad_norm": 4.4802703857421875,
      "learning_rate": 9.790330976588497e-06,
      "loss": 0.0749,
      "step": 83640
    },
    {
      "epoch": 0.13691142488691632,
      "grad_norm": 1.8919392824172974,
      "learning_rate": 9.79026508437498e-06,
      "loss": 0.0636,
      "step": 83660
    },
    {
      "epoch": 0.13694415532556967,
      "grad_norm": 2.1892142295837402,
      "learning_rate": 9.790199192161463e-06,
      "loss": 0.073,
      "step": 83680
    },
    {
      "epoch": 0.136976885764223,
      "grad_norm": 4.360795497894287,
      "learning_rate": 9.790133299947946e-06,
      "loss": 0.0722,
      "step": 83700
    },
    {
      "epoch": 0.13700961620287636,
      "grad_norm": 1.9234083890914917,
      "learning_rate": 9.79006740773443e-06,
      "loss": 0.0939,
      "step": 83720
    },
    {
      "epoch": 0.1370423466415297,
      "grad_norm": 8.995461463928223,
      "learning_rate": 9.790001515520912e-06,
      "loss": 0.0542,
      "step": 83740
    },
    {
      "epoch": 0.13707507708018302,
      "grad_norm": 3.68446683883667,
      "learning_rate": 9.789935623307395e-06,
      "loss": 0.0639,
      "step": 83760
    },
    {
      "epoch": 0.13710780751883636,
      "grad_norm": 1.5011550188064575,
      "learning_rate": 9.789869731093877e-06,
      "loss": 0.0771,
      "step": 83780
    },
    {
      "epoch": 0.1371405379574897,
      "grad_norm": 4.502035140991211,
      "learning_rate": 9.78980383888036e-06,
      "loss": 0.0846,
      "step": 83800
    },
    {
      "epoch": 0.13717326839614305,
      "grad_norm": 6.497550964355469,
      "learning_rate": 9.789737946666843e-06,
      "loss": 0.0746,
      "step": 83820
    },
    {
      "epoch": 0.1372059988347964,
      "grad_norm": 3.6618685722351074,
      "learning_rate": 9.789672054453326e-06,
      "loss": 0.0866,
      "step": 83840
    },
    {
      "epoch": 0.1372387292734497,
      "grad_norm": 1.976022481918335,
      "learning_rate": 9.789606162239808e-06,
      "loss": 0.085,
      "step": 83860
    },
    {
      "epoch": 0.13727145971210306,
      "grad_norm": 1.388376235961914,
      "learning_rate": 9.789540270026292e-06,
      "loss": 0.0637,
      "step": 83880
    },
    {
      "epoch": 0.1373041901507564,
      "grad_norm": 7.957776069641113,
      "learning_rate": 9.789474377812774e-06,
      "loss": 0.0711,
      "step": 83900
    },
    {
      "epoch": 0.13733692058940974,
      "grad_norm": 4.4780683517456055,
      "learning_rate": 9.789408485599257e-06,
      "loss": 0.0898,
      "step": 83920
    },
    {
      "epoch": 0.1373696510280631,
      "grad_norm": 3.955745220184326,
      "learning_rate": 9.78934259338574e-06,
      "loss": 0.0871,
      "step": 83940
    },
    {
      "epoch": 0.1374023814667164,
      "grad_norm": 4.046377182006836,
      "learning_rate": 9.789276701172223e-06,
      "loss": 0.0901,
      "step": 83960
    },
    {
      "epoch": 0.13743511190536975,
      "grad_norm": 3.893491506576538,
      "learning_rate": 9.789210808958706e-06,
      "loss": 0.0922,
      "step": 83980
    },
    {
      "epoch": 0.1374678423440231,
      "grad_norm": 1.457484245300293,
      "learning_rate": 9.78914491674519e-06,
      "loss": 0.0701,
      "step": 84000
    },
    {
      "epoch": 0.13750057278267644,
      "grad_norm": 1.2292922735214233,
      "learning_rate": 9.789079024531672e-06,
      "loss": 0.0493,
      "step": 84020
    },
    {
      "epoch": 0.13753330322132978,
      "grad_norm": 2.8316922187805176,
      "learning_rate": 9.789013132318155e-06,
      "loss": 0.0636,
      "step": 84040
    },
    {
      "epoch": 0.1375660336599831,
      "grad_norm": 1.2729988098144531,
      "learning_rate": 9.788947240104639e-06,
      "loss": 0.0738,
      "step": 84060
    },
    {
      "epoch": 0.13759876409863644,
      "grad_norm": 3.727602481842041,
      "learning_rate": 9.78888134789112e-06,
      "loss": 0.065,
      "step": 84080
    },
    {
      "epoch": 0.1376314945372898,
      "grad_norm": 4.057519912719727,
      "learning_rate": 9.788815455677604e-06,
      "loss": 0.0676,
      "step": 84100
    },
    {
      "epoch": 0.13766422497594313,
      "grad_norm": 3.012072801589966,
      "learning_rate": 9.788749563464086e-06,
      "loss": 0.0647,
      "step": 84120
    },
    {
      "epoch": 0.13769695541459648,
      "grad_norm": 3.7856061458587646,
      "learning_rate": 9.78868367125057e-06,
      "loss": 0.0709,
      "step": 84140
    },
    {
      "epoch": 0.1377296858532498,
      "grad_norm": 5.882051944732666,
      "learning_rate": 9.788617779037052e-06,
      "loss": 0.0894,
      "step": 84160
    },
    {
      "epoch": 0.13776241629190314,
      "grad_norm": 1.2397137880325317,
      "learning_rate": 9.788551886823535e-06,
      "loss": 0.0803,
      "step": 84180
    },
    {
      "epoch": 0.13779514673055648,
      "grad_norm": 3.2275967597961426,
      "learning_rate": 9.788485994610017e-06,
      "loss": 0.0673,
      "step": 84200
    },
    {
      "epoch": 0.13782787716920983,
      "grad_norm": 2.8453316688537598,
      "learning_rate": 9.7884201023965e-06,
      "loss": 0.0726,
      "step": 84220
    },
    {
      "epoch": 0.13786060760786317,
      "grad_norm": 1.4880516529083252,
      "learning_rate": 9.788354210182983e-06,
      "loss": 0.0504,
      "step": 84240
    },
    {
      "epoch": 0.1378933380465165,
      "grad_norm": 2.6680643558502197,
      "learning_rate": 9.788288317969466e-06,
      "loss": 0.0634,
      "step": 84260
    },
    {
      "epoch": 0.13792606848516983,
      "grad_norm": 2.8712921142578125,
      "learning_rate": 9.788222425755948e-06,
      "loss": 0.0747,
      "step": 84280
    },
    {
      "epoch": 0.13795879892382318,
      "grad_norm": 4.390523433685303,
      "learning_rate": 9.788156533542432e-06,
      "loss": 0.0731,
      "step": 84300
    },
    {
      "epoch": 0.13799152936247652,
      "grad_norm": 4.065722465515137,
      "learning_rate": 9.788090641328915e-06,
      "loss": 0.076,
      "step": 84320
    },
    {
      "epoch": 0.13802425980112987,
      "grad_norm": 1.239918828010559,
      "learning_rate": 9.788024749115397e-06,
      "loss": 0.0717,
      "step": 84340
    },
    {
      "epoch": 0.13805699023978318,
      "grad_norm": 2.911496639251709,
      "learning_rate": 9.78795885690188e-06,
      "loss": 0.057,
      "step": 84360
    },
    {
      "epoch": 0.13808972067843653,
      "grad_norm": 1.5798231363296509,
      "learning_rate": 9.787892964688364e-06,
      "loss": 0.0772,
      "step": 84380
    },
    {
      "epoch": 0.13812245111708987,
      "grad_norm": 2.615286350250244,
      "learning_rate": 9.787827072474846e-06,
      "loss": 0.0947,
      "step": 84400
    },
    {
      "epoch": 0.13815518155574322,
      "grad_norm": 3.0906128883361816,
      "learning_rate": 9.78776118026133e-06,
      "loss": 0.0737,
      "step": 84420
    },
    {
      "epoch": 0.13818791199439656,
      "grad_norm": 3.3786540031433105,
      "learning_rate": 9.787695288047813e-06,
      "loss": 0.0659,
      "step": 84440
    },
    {
      "epoch": 0.13822064243304988,
      "grad_norm": 2.5285770893096924,
      "learning_rate": 9.787629395834295e-06,
      "loss": 0.0823,
      "step": 84460
    },
    {
      "epoch": 0.13825337287170322,
      "grad_norm": 3.1619274616241455,
      "learning_rate": 9.787563503620779e-06,
      "loss": 0.0815,
      "step": 84480
    },
    {
      "epoch": 0.13828610331035657,
      "grad_norm": 3.9413094520568848,
      "learning_rate": 9.787497611407261e-06,
      "loss": 0.0905,
      "step": 84500
    },
    {
      "epoch": 0.1383188337490099,
      "grad_norm": 3.117643117904663,
      "learning_rate": 9.787431719193744e-06,
      "loss": 0.084,
      "step": 84520
    },
    {
      "epoch": 0.13835156418766326,
      "grad_norm": 5.469579696655273,
      "learning_rate": 9.787365826980226e-06,
      "loss": 0.0753,
      "step": 84540
    },
    {
      "epoch": 0.13838429462631657,
      "grad_norm": 1.4152088165283203,
      "learning_rate": 9.78729993476671e-06,
      "loss": 0.069,
      "step": 84560
    },
    {
      "epoch": 0.13841702506496992,
      "grad_norm": 5.735730171203613,
      "learning_rate": 9.787234042553192e-06,
      "loss": 0.0602,
      "step": 84580
    },
    {
      "epoch": 0.13844975550362326,
      "grad_norm": 0.8566242456436157,
      "learning_rate": 9.787168150339675e-06,
      "loss": 0.0817,
      "step": 84600
    },
    {
      "epoch": 0.1384824859422766,
      "grad_norm": 1.9963090419769287,
      "learning_rate": 9.787102258126157e-06,
      "loss": 0.0807,
      "step": 84620
    },
    {
      "epoch": 0.13851521638092995,
      "grad_norm": 2.895751953125,
      "learning_rate": 9.787036365912641e-06,
      "loss": 0.0683,
      "step": 84640
    },
    {
      "epoch": 0.13854794681958327,
      "grad_norm": 6.797922611236572,
      "learning_rate": 9.786970473699123e-06,
      "loss": 0.0843,
      "step": 84660
    },
    {
      "epoch": 0.1385806772582366,
      "grad_norm": 1.7391812801361084,
      "learning_rate": 9.786904581485606e-06,
      "loss": 0.0769,
      "step": 84680
    },
    {
      "epoch": 0.13861340769688996,
      "grad_norm": 2.6789848804473877,
      "learning_rate": 9.786838689272088e-06,
      "loss": 0.0821,
      "step": 84700
    },
    {
      "epoch": 0.1386461381355433,
      "grad_norm": 12.618653297424316,
      "learning_rate": 9.786772797058572e-06,
      "loss": 0.0706,
      "step": 84720
    },
    {
      "epoch": 0.13867886857419662,
      "grad_norm": 2.6378650665283203,
      "learning_rate": 9.786706904845055e-06,
      "loss": 0.0839,
      "step": 84740
    },
    {
      "epoch": 0.13871159901284996,
      "grad_norm": 5.209501266479492,
      "learning_rate": 9.786641012631537e-06,
      "loss": 0.098,
      "step": 84760
    },
    {
      "epoch": 0.1387443294515033,
      "grad_norm": 1.4430841207504272,
      "learning_rate": 9.786575120418021e-06,
      "loss": 0.0646,
      "step": 84780
    },
    {
      "epoch": 0.13877705989015665,
      "grad_norm": 2.158809185028076,
      "learning_rate": 9.786509228204504e-06,
      "loss": 0.078,
      "step": 84800
    },
    {
      "epoch": 0.13880979032881,
      "grad_norm": 3.806103467941284,
      "learning_rate": 9.786443335990986e-06,
      "loss": 0.0609,
      "step": 84820
    },
    {
      "epoch": 0.1388425207674633,
      "grad_norm": 1.1448452472686768,
      "learning_rate": 9.78637744377747e-06,
      "loss": 0.0772,
      "step": 84840
    },
    {
      "epoch": 0.13887525120611666,
      "grad_norm": 5.588442802429199,
      "learning_rate": 9.786311551563954e-06,
      "loss": 0.0914,
      "step": 84860
    },
    {
      "epoch": 0.13890798164477,
      "grad_norm": 2.559786558151245,
      "learning_rate": 9.786245659350435e-06,
      "loss": 0.1012,
      "step": 84880
    },
    {
      "epoch": 0.13894071208342335,
      "grad_norm": 2.899812936782837,
      "learning_rate": 9.786179767136919e-06,
      "loss": 0.0771,
      "step": 84900
    },
    {
      "epoch": 0.1389734425220767,
      "grad_norm": 1.3665071725845337,
      "learning_rate": 9.786113874923401e-06,
      "loss": 0.0796,
      "step": 84920
    },
    {
      "epoch": 0.13900617296073,
      "grad_norm": 2.347775459289551,
      "learning_rate": 9.786047982709885e-06,
      "loss": 0.0758,
      "step": 84940
    },
    {
      "epoch": 0.13903890339938335,
      "grad_norm": 1.106984257698059,
      "learning_rate": 9.785982090496366e-06,
      "loss": 0.063,
      "step": 84960
    },
    {
      "epoch": 0.1390716338380367,
      "grad_norm": 5.92643404006958,
      "learning_rate": 9.78591619828285e-06,
      "loss": 0.0716,
      "step": 84980
    },
    {
      "epoch": 0.13910436427669004,
      "grad_norm": 4.095647811889648,
      "learning_rate": 9.785850306069332e-06,
      "loss": 0.07,
      "step": 85000
    },
    {
      "epoch": 0.13913709471534338,
      "grad_norm": 2.024919033050537,
      "learning_rate": 9.785784413855815e-06,
      "loss": 0.0697,
      "step": 85020
    },
    {
      "epoch": 0.1391698251539967,
      "grad_norm": 3.861448287963867,
      "learning_rate": 9.785718521642297e-06,
      "loss": 0.0719,
      "step": 85040
    },
    {
      "epoch": 0.13920255559265005,
      "grad_norm": 1.4421031475067139,
      "learning_rate": 9.785652629428781e-06,
      "loss": 0.0605,
      "step": 85060
    },
    {
      "epoch": 0.1392352860313034,
      "grad_norm": 2.057588815689087,
      "learning_rate": 9.785586737215263e-06,
      "loss": 0.0846,
      "step": 85080
    },
    {
      "epoch": 0.13926801646995673,
      "grad_norm": 3.782616138458252,
      "learning_rate": 9.785520845001746e-06,
      "loss": 0.0583,
      "step": 85100
    },
    {
      "epoch": 0.13930074690861008,
      "grad_norm": 1.4752626419067383,
      "learning_rate": 9.78545495278823e-06,
      "loss": 0.0738,
      "step": 85120
    },
    {
      "epoch": 0.1393334773472634,
      "grad_norm": 3.4330387115478516,
      "learning_rate": 9.785389060574712e-06,
      "loss": 0.091,
      "step": 85140
    },
    {
      "epoch": 0.13936620778591674,
      "grad_norm": 1.7085769176483154,
      "learning_rate": 9.785323168361196e-06,
      "loss": 0.0778,
      "step": 85160
    },
    {
      "epoch": 0.13939893822457008,
      "grad_norm": 3.272789239883423,
      "learning_rate": 9.785257276147679e-06,
      "loss": 0.0682,
      "step": 85180
    },
    {
      "epoch": 0.13943166866322343,
      "grad_norm": 2.5565385818481445,
      "learning_rate": 9.785191383934161e-06,
      "loss": 0.0814,
      "step": 85200
    },
    {
      "epoch": 0.13946439910187677,
      "grad_norm": 1.0954623222351074,
      "learning_rate": 9.785125491720645e-06,
      "loss": 0.0769,
      "step": 85220
    },
    {
      "epoch": 0.1394971295405301,
      "grad_norm": 1.3736791610717773,
      "learning_rate": 9.785059599507128e-06,
      "loss": 0.0763,
      "step": 85240
    },
    {
      "epoch": 0.13952985997918343,
      "grad_norm": 0.6311280131340027,
      "learning_rate": 9.78499370729361e-06,
      "loss": 0.0854,
      "step": 85260
    },
    {
      "epoch": 0.13956259041783678,
      "grad_norm": 6.702917098999023,
      "learning_rate": 9.784927815080094e-06,
      "loss": 0.0657,
      "step": 85280
    },
    {
      "epoch": 0.13959532085649012,
      "grad_norm": 4.63375186920166,
      "learning_rate": 9.784861922866576e-06,
      "loss": 0.085,
      "step": 85300
    },
    {
      "epoch": 0.13962805129514347,
      "grad_norm": 2.8537018299102783,
      "learning_rate": 9.784796030653059e-06,
      "loss": 0.0696,
      "step": 85320
    },
    {
      "epoch": 0.13966078173379678,
      "grad_norm": 6.772221565246582,
      "learning_rate": 9.784730138439541e-06,
      "loss": 0.0784,
      "step": 85340
    },
    {
      "epoch": 0.13969351217245013,
      "grad_norm": 3.05997371673584,
      "learning_rate": 9.784664246226025e-06,
      "loss": 0.0535,
      "step": 85360
    },
    {
      "epoch": 0.13972624261110347,
      "grad_norm": 11.744264602661133,
      "learning_rate": 9.784598354012506e-06,
      "loss": 0.084,
      "step": 85380
    },
    {
      "epoch": 0.13975897304975682,
      "grad_norm": 2.2229719161987305,
      "learning_rate": 9.78453246179899e-06,
      "loss": 0.0583,
      "step": 85400
    },
    {
      "epoch": 0.13979170348841016,
      "grad_norm": 2.464582681655884,
      "learning_rate": 9.784466569585472e-06,
      "loss": 0.0697,
      "step": 85420
    },
    {
      "epoch": 0.13982443392706348,
      "grad_norm": 2.3364598751068115,
      "learning_rate": 9.784400677371956e-06,
      "loss": 0.0545,
      "step": 85440
    },
    {
      "epoch": 0.13985716436571682,
      "grad_norm": 4.749055862426758,
      "learning_rate": 9.784334785158439e-06,
      "loss": 0.0737,
      "step": 85460
    },
    {
      "epoch": 0.13988989480437017,
      "grad_norm": 6.468872547149658,
      "learning_rate": 9.784268892944921e-06,
      "loss": 0.0745,
      "step": 85480
    },
    {
      "epoch": 0.1399226252430235,
      "grad_norm": 2.734355926513672,
      "learning_rate": 9.784203000731405e-06,
      "loss": 0.0981,
      "step": 85500
    },
    {
      "epoch": 0.13995535568167686,
      "grad_norm": 7.3056464195251465,
      "learning_rate": 9.784137108517887e-06,
      "loss": 0.0788,
      "step": 85520
    },
    {
      "epoch": 0.13998808612033017,
      "grad_norm": 2.203500986099243,
      "learning_rate": 9.78407121630437e-06,
      "loss": 0.0979,
      "step": 85540
    },
    {
      "epoch": 0.14002081655898352,
      "grad_norm": 2.7799911499023438,
      "learning_rate": 9.784005324090852e-06,
      "loss": 0.0854,
      "step": 85560
    },
    {
      "epoch": 0.14005354699763686,
      "grad_norm": 0.7063075304031372,
      "learning_rate": 9.783939431877336e-06,
      "loss": 0.0604,
      "step": 85580
    },
    {
      "epoch": 0.1400862774362902,
      "grad_norm": 3.1205081939697266,
      "learning_rate": 9.78387353966382e-06,
      "loss": 0.0687,
      "step": 85600
    },
    {
      "epoch": 0.14011900787494355,
      "grad_norm": 2.197908878326416,
      "learning_rate": 9.783807647450301e-06,
      "loss": 0.0734,
      "step": 85620
    },
    {
      "epoch": 0.14015173831359687,
      "grad_norm": 1.598961353302002,
      "learning_rate": 9.783741755236785e-06,
      "loss": 0.0833,
      "step": 85640
    },
    {
      "epoch": 0.1401844687522502,
      "grad_norm": 2.7246856689453125,
      "learning_rate": 9.783675863023268e-06,
      "loss": 0.0688,
      "step": 85660
    },
    {
      "epoch": 0.14021719919090356,
      "grad_norm": 3.3825039863586426,
      "learning_rate": 9.78360997080975e-06,
      "loss": 0.0974,
      "step": 85680
    },
    {
      "epoch": 0.1402499296295569,
      "grad_norm": 1.5796539783477783,
      "learning_rate": 9.783544078596234e-06,
      "loss": 0.0635,
      "step": 85700
    },
    {
      "epoch": 0.14028266006821025,
      "grad_norm": 7.183863639831543,
      "learning_rate": 9.783478186382716e-06,
      "loss": 0.0651,
      "step": 85720
    },
    {
      "epoch": 0.14031539050686356,
      "grad_norm": 2.4356656074523926,
      "learning_rate": 9.7834122941692e-06,
      "loss": 0.0937,
      "step": 85740
    },
    {
      "epoch": 0.1403481209455169,
      "grad_norm": 2.0897953510284424,
      "learning_rate": 9.783346401955681e-06,
      "loss": 0.0617,
      "step": 85760
    },
    {
      "epoch": 0.14038085138417025,
      "grad_norm": 4.985583782196045,
      "learning_rate": 9.783280509742165e-06,
      "loss": 0.0628,
      "step": 85780
    },
    {
      "epoch": 0.1404135818228236,
      "grad_norm": 3.8677759170532227,
      "learning_rate": 9.783214617528648e-06,
      "loss": 0.0717,
      "step": 85800
    },
    {
      "epoch": 0.14044631226147694,
      "grad_norm": 13.076006889343262,
      "learning_rate": 9.78314872531513e-06,
      "loss": 0.0726,
      "step": 85820
    },
    {
      "epoch": 0.14047904270013026,
      "grad_norm": 1.9539620876312256,
      "learning_rate": 9.783082833101614e-06,
      "loss": 0.0637,
      "step": 85840
    },
    {
      "epoch": 0.1405117731387836,
      "grad_norm": 2.692251682281494,
      "learning_rate": 9.783016940888096e-06,
      "loss": 0.0867,
      "step": 85860
    },
    {
      "epoch": 0.14054450357743695,
      "grad_norm": 1.5349974632263184,
      "learning_rate": 9.78295104867458e-06,
      "loss": 0.0929,
      "step": 85880
    },
    {
      "epoch": 0.1405772340160903,
      "grad_norm": 3.3463118076324463,
      "learning_rate": 9.782885156461061e-06,
      "loss": 0.0604,
      "step": 85900
    },
    {
      "epoch": 0.14060996445474364,
      "grad_norm": 4.37560510635376,
      "learning_rate": 9.782819264247545e-06,
      "loss": 0.0878,
      "step": 85920
    },
    {
      "epoch": 0.14064269489339695,
      "grad_norm": 3.46990704536438,
      "learning_rate": 9.782753372034027e-06,
      "loss": 0.0679,
      "step": 85940
    },
    {
      "epoch": 0.1406754253320503,
      "grad_norm": 2.262082099914551,
      "learning_rate": 9.78268747982051e-06,
      "loss": 0.0679,
      "step": 85960
    },
    {
      "epoch": 0.14070815577070364,
      "grad_norm": 2.2548351287841797,
      "learning_rate": 9.782621587606994e-06,
      "loss": 0.0576,
      "step": 85980
    },
    {
      "epoch": 0.14074088620935699,
      "grad_norm": 2.4153902530670166,
      "learning_rate": 9.782555695393476e-06,
      "loss": 0.074,
      "step": 86000
    },
    {
      "epoch": 0.14077361664801033,
      "grad_norm": 1.390810251235962,
      "learning_rate": 9.78248980317996e-06,
      "loss": 0.0666,
      "step": 86020
    },
    {
      "epoch": 0.14080634708666365,
      "grad_norm": 0.7769159078598022,
      "learning_rate": 9.782423910966443e-06,
      "loss": 0.0651,
      "step": 86040
    },
    {
      "epoch": 0.140839077525317,
      "grad_norm": 6.167825698852539,
      "learning_rate": 9.782358018752925e-06,
      "loss": 0.0739,
      "step": 86060
    },
    {
      "epoch": 0.14087180796397034,
      "grad_norm": 2.658151626586914,
      "learning_rate": 9.782292126539408e-06,
      "loss": 0.0626,
      "step": 86080
    },
    {
      "epoch": 0.14090453840262368,
      "grad_norm": 2.2101969718933105,
      "learning_rate": 9.78222623432589e-06,
      "loss": 0.0869,
      "step": 86100
    },
    {
      "epoch": 0.140937268841277,
      "grad_norm": 6.209811687469482,
      "learning_rate": 9.782160342112374e-06,
      "loss": 0.0871,
      "step": 86120
    },
    {
      "epoch": 0.14096999927993034,
      "grad_norm": 2.2176406383514404,
      "learning_rate": 9.782094449898856e-06,
      "loss": 0.0715,
      "step": 86140
    },
    {
      "epoch": 0.14100272971858369,
      "grad_norm": 1.7064526081085205,
      "learning_rate": 9.78202855768534e-06,
      "loss": 0.0629,
      "step": 86160
    },
    {
      "epoch": 0.14103546015723703,
      "grad_norm": 1.3106123208999634,
      "learning_rate": 9.781962665471823e-06,
      "loss": 0.0715,
      "step": 86180
    },
    {
      "epoch": 0.14106819059589037,
      "grad_norm": 3.7837886810302734,
      "learning_rate": 9.781896773258305e-06,
      "loss": 0.0732,
      "step": 86200
    },
    {
      "epoch": 0.1411009210345437,
      "grad_norm": 6.456302642822266,
      "learning_rate": 9.781830881044788e-06,
      "loss": 0.0813,
      "step": 86220
    },
    {
      "epoch": 0.14113365147319704,
      "grad_norm": 1.591760516166687,
      "learning_rate": 9.78176498883127e-06,
      "loss": 0.0781,
      "step": 86240
    },
    {
      "epoch": 0.14116638191185038,
      "grad_norm": 3.050253391265869,
      "learning_rate": 9.781699096617754e-06,
      "loss": 0.0837,
      "step": 86260
    },
    {
      "epoch": 0.14119911235050372,
      "grad_norm": 0.9627709984779358,
      "learning_rate": 9.781633204404236e-06,
      "loss": 0.0858,
      "step": 86280
    },
    {
      "epoch": 0.14123184278915707,
      "grad_norm": 1.8123345375061035,
      "learning_rate": 9.78156731219072e-06,
      "loss": 0.0793,
      "step": 86300
    },
    {
      "epoch": 0.14126457322781039,
      "grad_norm": 2.6808149814605713,
      "learning_rate": 9.781501419977201e-06,
      "loss": 0.0896,
      "step": 86320
    },
    {
      "epoch": 0.14129730366646373,
      "grad_norm": 0.6834062933921814,
      "learning_rate": 9.781435527763685e-06,
      "loss": 0.0902,
      "step": 86340
    },
    {
      "epoch": 0.14133003410511707,
      "grad_norm": 2.0969183444976807,
      "learning_rate": 9.781369635550167e-06,
      "loss": 0.1004,
      "step": 86360
    },
    {
      "epoch": 0.14136276454377042,
      "grad_norm": 3.106046438217163,
      "learning_rate": 9.78130374333665e-06,
      "loss": 0.0866,
      "step": 86380
    },
    {
      "epoch": 0.14139549498242376,
      "grad_norm": 2.7899179458618164,
      "learning_rate": 9.781237851123134e-06,
      "loss": 0.0816,
      "step": 86400
    },
    {
      "epoch": 0.14142822542107708,
      "grad_norm": 2.269495964050293,
      "learning_rate": 9.781171958909616e-06,
      "loss": 0.0539,
      "step": 86420
    },
    {
      "epoch": 0.14146095585973043,
      "grad_norm": 0.6610790491104126,
      "learning_rate": 9.7811060666961e-06,
      "loss": 0.0738,
      "step": 86440
    },
    {
      "epoch": 0.14149368629838377,
      "grad_norm": 1.8654018640518188,
      "learning_rate": 9.781040174482583e-06,
      "loss": 0.0882,
      "step": 86460
    },
    {
      "epoch": 0.14152641673703711,
      "grad_norm": 12.111270904541016,
      "learning_rate": 9.780974282269065e-06,
      "loss": 0.0646,
      "step": 86480
    },
    {
      "epoch": 0.14155914717569046,
      "grad_norm": 2.4620678424835205,
      "learning_rate": 9.780908390055548e-06,
      "loss": 0.0804,
      "step": 86500
    },
    {
      "epoch": 0.14159187761434378,
      "grad_norm": 2.8134725093841553,
      "learning_rate": 9.780842497842032e-06,
      "loss": 0.0827,
      "step": 86520
    },
    {
      "epoch": 0.14162460805299712,
      "grad_norm": 2.7957937717437744,
      "learning_rate": 9.780776605628514e-06,
      "loss": 0.069,
      "step": 86540
    },
    {
      "epoch": 0.14165733849165046,
      "grad_norm": 2.905170202255249,
      "learning_rate": 9.780710713414997e-06,
      "loss": 0.0863,
      "step": 86560
    },
    {
      "epoch": 0.1416900689303038,
      "grad_norm": 4.443393707275391,
      "learning_rate": 9.78064482120148e-06,
      "loss": 0.0707,
      "step": 86580
    },
    {
      "epoch": 0.14172279936895715,
      "grad_norm": 4.879427909851074,
      "learning_rate": 9.780578928987963e-06,
      "loss": 0.0648,
      "step": 86600
    },
    {
      "epoch": 0.14175552980761047,
      "grad_norm": 1.1521962881088257,
      "learning_rate": 9.780513036774445e-06,
      "loss": 0.0789,
      "step": 86620
    },
    {
      "epoch": 0.14178826024626381,
      "grad_norm": 2.7539665699005127,
      "learning_rate": 9.780447144560928e-06,
      "loss": 0.0813,
      "step": 86640
    },
    {
      "epoch": 0.14182099068491716,
      "grad_norm": 6.253728866577148,
      "learning_rate": 9.78038125234741e-06,
      "loss": 0.0775,
      "step": 86660
    },
    {
      "epoch": 0.1418537211235705,
      "grad_norm": 2.4163315296173096,
      "learning_rate": 9.780315360133894e-06,
      "loss": 0.0672,
      "step": 86680
    },
    {
      "epoch": 0.14188645156222385,
      "grad_norm": 3.0520901679992676,
      "learning_rate": 9.780249467920376e-06,
      "loss": 0.0691,
      "step": 86700
    },
    {
      "epoch": 0.14191918200087716,
      "grad_norm": 1.798566460609436,
      "learning_rate": 9.78018357570686e-06,
      "loss": 0.0725,
      "step": 86720
    },
    {
      "epoch": 0.1419519124395305,
      "grad_norm": 4.310822486877441,
      "learning_rate": 9.780117683493341e-06,
      "loss": 0.0695,
      "step": 86740
    },
    {
      "epoch": 0.14198464287818385,
      "grad_norm": 2.784435272216797,
      "learning_rate": 9.780051791279825e-06,
      "loss": 0.0908,
      "step": 86760
    },
    {
      "epoch": 0.1420173733168372,
      "grad_norm": 3.720562219619751,
      "learning_rate": 9.779985899066308e-06,
      "loss": 0.0623,
      "step": 86780
    },
    {
      "epoch": 0.14205010375549054,
      "grad_norm": 4.185824871063232,
      "learning_rate": 9.77992000685279e-06,
      "loss": 0.0708,
      "step": 86800
    },
    {
      "epoch": 0.14208283419414386,
      "grad_norm": 3.254925489425659,
      "learning_rate": 9.779854114639274e-06,
      "loss": 0.0921,
      "step": 86820
    },
    {
      "epoch": 0.1421155646327972,
      "grad_norm": 4.33598518371582,
      "learning_rate": 9.779788222425758e-06,
      "loss": 0.0975,
      "step": 86840
    },
    {
      "epoch": 0.14214829507145055,
      "grad_norm": 0.8025978803634644,
      "learning_rate": 9.77972233021224e-06,
      "loss": 0.0616,
      "step": 86860
    },
    {
      "epoch": 0.1421810255101039,
      "grad_norm": 4.258691787719727,
      "learning_rate": 9.779656437998723e-06,
      "loss": 0.0721,
      "step": 86880
    },
    {
      "epoch": 0.14221375594875724,
      "grad_norm": 1.8202342987060547,
      "learning_rate": 9.779590545785207e-06,
      "loss": 0.0763,
      "step": 86900
    },
    {
      "epoch": 0.14224648638741055,
      "grad_norm": 2.4312500953674316,
      "learning_rate": 9.779524653571688e-06,
      "loss": 0.0903,
      "step": 86920
    },
    {
      "epoch": 0.1422792168260639,
      "grad_norm": 4.866488456726074,
      "learning_rate": 9.779458761358172e-06,
      "loss": 0.0681,
      "step": 86940
    },
    {
      "epoch": 0.14231194726471724,
      "grad_norm": 3.326721429824829,
      "learning_rate": 9.779392869144654e-06,
      "loss": 0.056,
      "step": 86960
    },
    {
      "epoch": 0.1423446777033706,
      "grad_norm": 2.03292179107666,
      "learning_rate": 9.779326976931138e-06,
      "loss": 0.0705,
      "step": 86980
    },
    {
      "epoch": 0.14237740814202393,
      "grad_norm": 2.8227992057800293,
      "learning_rate": 9.77926108471762e-06,
      "loss": 0.0913,
      "step": 87000
    },
    {
      "epoch": 0.14241013858067725,
      "grad_norm": 1.2785587310791016,
      "learning_rate": 9.779195192504103e-06,
      "loss": 0.0646,
      "step": 87020
    },
    {
      "epoch": 0.1424428690193306,
      "grad_norm": 0.9735977053642273,
      "learning_rate": 9.779129300290585e-06,
      "loss": 0.0832,
      "step": 87040
    },
    {
      "epoch": 0.14247559945798394,
      "grad_norm": 4.625048637390137,
      "learning_rate": 9.779063408077068e-06,
      "loss": 0.0786,
      "step": 87060
    },
    {
      "epoch": 0.14250832989663728,
      "grad_norm": 3.085477828979492,
      "learning_rate": 9.77899751586355e-06,
      "loss": 0.075,
      "step": 87080
    },
    {
      "epoch": 0.14254106033529063,
      "grad_norm": 5.454555034637451,
      "learning_rate": 9.778931623650034e-06,
      "loss": 0.0747,
      "step": 87100
    },
    {
      "epoch": 0.14257379077394394,
      "grad_norm": 3.874411106109619,
      "learning_rate": 9.778865731436516e-06,
      "loss": 0.0603,
      "step": 87120
    },
    {
      "epoch": 0.1426065212125973,
      "grad_norm": 4.815773010253906,
      "learning_rate": 9.778799839223e-06,
      "loss": 0.0745,
      "step": 87140
    },
    {
      "epoch": 0.14263925165125063,
      "grad_norm": 3.654493570327759,
      "learning_rate": 9.778733947009483e-06,
      "loss": 0.0823,
      "step": 87160
    },
    {
      "epoch": 0.14267198208990398,
      "grad_norm": 2.710814952850342,
      "learning_rate": 9.778668054795965e-06,
      "loss": 0.0627,
      "step": 87180
    },
    {
      "epoch": 0.14270471252855732,
      "grad_norm": 2.831644058227539,
      "learning_rate": 9.778602162582449e-06,
      "loss": 0.091,
      "step": 87200
    },
    {
      "epoch": 0.14273744296721064,
      "grad_norm": 2.325652599334717,
      "learning_rate": 9.778536270368932e-06,
      "loss": 0.066,
      "step": 87220
    },
    {
      "epoch": 0.14277017340586398,
      "grad_norm": 2.4158499240875244,
      "learning_rate": 9.778470378155414e-06,
      "loss": 0.0756,
      "step": 87240
    },
    {
      "epoch": 0.14280290384451733,
      "grad_norm": 1.0635490417480469,
      "learning_rate": 9.778404485941898e-06,
      "loss": 0.0744,
      "step": 87260
    },
    {
      "epoch": 0.14283563428317067,
      "grad_norm": 1.8543962240219116,
      "learning_rate": 9.778338593728381e-06,
      "loss": 0.0662,
      "step": 87280
    },
    {
      "epoch": 0.14286836472182401,
      "grad_norm": 2.895212411880493,
      "learning_rate": 9.778272701514863e-06,
      "loss": 0.0754,
      "step": 87300
    },
    {
      "epoch": 0.14290109516047733,
      "grad_norm": 4.7013936042785645,
      "learning_rate": 9.778206809301347e-06,
      "loss": 0.0754,
      "step": 87320
    },
    {
      "epoch": 0.14293382559913068,
      "grad_norm": 2.2419328689575195,
      "learning_rate": 9.778140917087829e-06,
      "loss": 0.0676,
      "step": 87340
    },
    {
      "epoch": 0.14296655603778402,
      "grad_norm": 3.3973278999328613,
      "learning_rate": 9.778075024874312e-06,
      "loss": 0.078,
      "step": 87360
    },
    {
      "epoch": 0.14299928647643737,
      "grad_norm": 1.5374748706817627,
      "learning_rate": 9.778009132660794e-06,
      "loss": 0.0659,
      "step": 87380
    },
    {
      "epoch": 0.1430320169150907,
      "grad_norm": 2.4257547855377197,
      "learning_rate": 9.777943240447278e-06,
      "loss": 0.0758,
      "step": 87400
    },
    {
      "epoch": 0.14306474735374403,
      "grad_norm": 1.3317203521728516,
      "learning_rate": 9.77787734823376e-06,
      "loss": 0.0706,
      "step": 87420
    },
    {
      "epoch": 0.14309747779239737,
      "grad_norm": 1.5591561794281006,
      "learning_rate": 9.777811456020243e-06,
      "loss": 0.0826,
      "step": 87440
    },
    {
      "epoch": 0.14313020823105072,
      "grad_norm": 2.589388847351074,
      "learning_rate": 9.777745563806725e-06,
      "loss": 0.0755,
      "step": 87460
    },
    {
      "epoch": 0.14316293866970406,
      "grad_norm": 1.9001580476760864,
      "learning_rate": 9.777679671593209e-06,
      "loss": 0.0725,
      "step": 87480
    },
    {
      "epoch": 0.1431956691083574,
      "grad_norm": 1.0917878150939941,
      "learning_rate": 9.77761377937969e-06,
      "loss": 0.0642,
      "step": 87500
    },
    {
      "epoch": 0.14322839954701072,
      "grad_norm": 3.6021006107330322,
      "learning_rate": 9.777547887166174e-06,
      "loss": 0.0728,
      "step": 87520
    },
    {
      "epoch": 0.14326112998566407,
      "grad_norm": 1.5715835094451904,
      "learning_rate": 9.777481994952656e-06,
      "loss": 0.0692,
      "step": 87540
    },
    {
      "epoch": 0.1432938604243174,
      "grad_norm": 11.583961486816406,
      "learning_rate": 9.77741610273914e-06,
      "loss": 0.0765,
      "step": 87560
    },
    {
      "epoch": 0.14332659086297075,
      "grad_norm": 1.8782004117965698,
      "learning_rate": 9.777350210525623e-06,
      "loss": 0.0831,
      "step": 87580
    },
    {
      "epoch": 0.14335932130162407,
      "grad_norm": 1.0593966245651245,
      "learning_rate": 9.777284318312105e-06,
      "loss": 0.0695,
      "step": 87600
    },
    {
      "epoch": 0.14339205174027742,
      "grad_norm": 5.823036193847656,
      "learning_rate": 9.777218426098589e-06,
      "loss": 0.0826,
      "step": 87620
    },
    {
      "epoch": 0.14342478217893076,
      "grad_norm": 2.1591873168945312,
      "learning_rate": 9.777152533885072e-06,
      "loss": 0.0673,
      "step": 87640
    },
    {
      "epoch": 0.1434575126175841,
      "grad_norm": 1.4195500612258911,
      "learning_rate": 9.777086641671554e-06,
      "loss": 0.0889,
      "step": 87660
    },
    {
      "epoch": 0.14349024305623745,
      "grad_norm": 2.830674171447754,
      "learning_rate": 9.777020749458038e-06,
      "loss": 0.0681,
      "step": 87680
    },
    {
      "epoch": 0.14352297349489077,
      "grad_norm": 3.6045730113983154,
      "learning_rate": 9.776954857244521e-06,
      "loss": 0.087,
      "step": 87700
    },
    {
      "epoch": 0.1435557039335441,
      "grad_norm": 2.4268910884857178,
      "learning_rate": 9.776888965031003e-06,
      "loss": 0.0799,
      "step": 87720
    },
    {
      "epoch": 0.14358843437219745,
      "grad_norm": 5.291558742523193,
      "learning_rate": 9.776823072817487e-06,
      "loss": 0.0731,
      "step": 87740
    },
    {
      "epoch": 0.1436211648108508,
      "grad_norm": 1.7621179819107056,
      "learning_rate": 9.776757180603969e-06,
      "loss": 0.0853,
      "step": 87760
    },
    {
      "epoch": 0.14365389524950414,
      "grad_norm": 1.1375751495361328,
      "learning_rate": 9.776691288390452e-06,
      "loss": 0.0623,
      "step": 87780
    },
    {
      "epoch": 0.14368662568815746,
      "grad_norm": 2.123194932937622,
      "learning_rate": 9.776625396176934e-06,
      "loss": 0.0633,
      "step": 87800
    },
    {
      "epoch": 0.1437193561268108,
      "grad_norm": 8.242716789245605,
      "learning_rate": 9.776559503963418e-06,
      "loss": 0.0859,
      "step": 87820
    },
    {
      "epoch": 0.14375208656546415,
      "grad_norm": 3.6961607933044434,
      "learning_rate": 9.7764936117499e-06,
      "loss": 0.0794,
      "step": 87840
    },
    {
      "epoch": 0.1437848170041175,
      "grad_norm": 3.6526834964752197,
      "learning_rate": 9.776427719536383e-06,
      "loss": 0.0706,
      "step": 87860
    },
    {
      "epoch": 0.14381754744277084,
      "grad_norm": 1.9701277017593384,
      "learning_rate": 9.776361827322865e-06,
      "loss": 0.0671,
      "step": 87880
    },
    {
      "epoch": 0.14385027788142415,
      "grad_norm": 3.8356993198394775,
      "learning_rate": 9.776295935109349e-06,
      "loss": 0.0564,
      "step": 87900
    },
    {
      "epoch": 0.1438830083200775,
      "grad_norm": 14.400636672973633,
      "learning_rate": 9.776230042895832e-06,
      "loss": 0.0743,
      "step": 87920
    },
    {
      "epoch": 0.14391573875873084,
      "grad_norm": 1.7107502222061157,
      "learning_rate": 9.776164150682314e-06,
      "loss": 0.0678,
      "step": 87940
    },
    {
      "epoch": 0.1439484691973842,
      "grad_norm": 3.0282375812530518,
      "learning_rate": 9.776098258468798e-06,
      "loss": 0.0775,
      "step": 87960
    },
    {
      "epoch": 0.14398119963603753,
      "grad_norm": 1.9000427722930908,
      "learning_rate": 9.77603236625528e-06,
      "loss": 0.0848,
      "step": 87980
    },
    {
      "epoch": 0.14401393007469085,
      "grad_norm": 3.3982133865356445,
      "learning_rate": 9.775966474041763e-06,
      "loss": 0.0921,
      "step": 88000
    },
    {
      "epoch": 0.1440466605133442,
      "grad_norm": 9.179250717163086,
      "learning_rate": 9.775900581828247e-06,
      "loss": 0.088,
      "step": 88020
    },
    {
      "epoch": 0.14407939095199754,
      "grad_norm": 3.8611888885498047,
      "learning_rate": 9.775834689614729e-06,
      "loss": 0.0759,
      "step": 88040
    },
    {
      "epoch": 0.14411212139065088,
      "grad_norm": 2.108275890350342,
      "learning_rate": 9.775768797401212e-06,
      "loss": 0.0546,
      "step": 88060
    },
    {
      "epoch": 0.14414485182930423,
      "grad_norm": 3.1338202953338623,
      "learning_rate": 9.775702905187696e-06,
      "loss": 0.0796,
      "step": 88080
    },
    {
      "epoch": 0.14417758226795754,
      "grad_norm": 2.548931121826172,
      "learning_rate": 9.775637012974178e-06,
      "loss": 0.078,
      "step": 88100
    },
    {
      "epoch": 0.1442103127066109,
      "grad_norm": 5.070772647857666,
      "learning_rate": 9.775571120760661e-06,
      "loss": 0.0752,
      "step": 88120
    },
    {
      "epoch": 0.14424304314526423,
      "grad_norm": 1.9887603521347046,
      "learning_rate": 9.775505228547143e-06,
      "loss": 0.052,
      "step": 88140
    },
    {
      "epoch": 0.14427577358391758,
      "grad_norm": 3.298694372177124,
      "learning_rate": 9.775439336333627e-06,
      "loss": 0.0872,
      "step": 88160
    },
    {
      "epoch": 0.14430850402257092,
      "grad_norm": 7.281567096710205,
      "learning_rate": 9.775373444120109e-06,
      "loss": 0.0717,
      "step": 88180
    },
    {
      "epoch": 0.14434123446122424,
      "grad_norm": 2.6549763679504395,
      "learning_rate": 9.775307551906592e-06,
      "loss": 0.0655,
      "step": 88200
    },
    {
      "epoch": 0.14437396489987758,
      "grad_norm": 3.357501745223999,
      "learning_rate": 9.775241659693074e-06,
      "loss": 0.08,
      "step": 88220
    },
    {
      "epoch": 0.14440669533853093,
      "grad_norm": 4.5648722648620605,
      "learning_rate": 9.775175767479558e-06,
      "loss": 0.0709,
      "step": 88240
    },
    {
      "epoch": 0.14443942577718427,
      "grad_norm": 3.4437618255615234,
      "learning_rate": 9.775109875266041e-06,
      "loss": 0.0944,
      "step": 88260
    },
    {
      "epoch": 0.14447215621583762,
      "grad_norm": 7.467029094696045,
      "learning_rate": 9.775043983052523e-06,
      "loss": 0.062,
      "step": 88280
    },
    {
      "epoch": 0.14450488665449093,
      "grad_norm": 8.214520454406738,
      "learning_rate": 9.774978090839007e-06,
      "loss": 0.0604,
      "step": 88300
    },
    {
      "epoch": 0.14453761709314428,
      "grad_norm": 2.5143120288848877,
      "learning_rate": 9.774912198625489e-06,
      "loss": 0.08,
      "step": 88320
    },
    {
      "epoch": 0.14457034753179762,
      "grad_norm": 2.2035765647888184,
      "learning_rate": 9.774846306411972e-06,
      "loss": 0.0794,
      "step": 88340
    },
    {
      "epoch": 0.14460307797045097,
      "grad_norm": 0.706323504447937,
      "learning_rate": 9.774780414198454e-06,
      "loss": 0.0823,
      "step": 88360
    },
    {
      "epoch": 0.1446358084091043,
      "grad_norm": 3.137202024459839,
      "learning_rate": 9.774714521984938e-06,
      "loss": 0.0619,
      "step": 88380
    },
    {
      "epoch": 0.14466853884775763,
      "grad_norm": 1.8394824266433716,
      "learning_rate": 9.77464862977142e-06,
      "loss": 0.0735,
      "step": 88400
    },
    {
      "epoch": 0.14470126928641097,
      "grad_norm": 3.176452398300171,
      "learning_rate": 9.774582737557903e-06,
      "loss": 0.069,
      "step": 88420
    },
    {
      "epoch": 0.14473399972506432,
      "grad_norm": 2.5671768188476562,
      "learning_rate": 9.774516845344387e-06,
      "loss": 0.0622,
      "step": 88440
    },
    {
      "epoch": 0.14476673016371766,
      "grad_norm": 3.43817400932312,
      "learning_rate": 9.774450953130869e-06,
      "loss": 0.0821,
      "step": 88460
    },
    {
      "epoch": 0.144799460602371,
      "grad_norm": 3.463803768157959,
      "learning_rate": 9.774385060917352e-06,
      "loss": 0.0899,
      "step": 88480
    },
    {
      "epoch": 0.14483219104102432,
      "grad_norm": 5.425890922546387,
      "learning_rate": 9.774319168703836e-06,
      "loss": 0.0758,
      "step": 88500
    },
    {
      "epoch": 0.14486492147967767,
      "grad_norm": 1.702792763710022,
      "learning_rate": 9.774253276490318e-06,
      "loss": 0.0717,
      "step": 88520
    },
    {
      "epoch": 0.144897651918331,
      "grad_norm": 3.5570642948150635,
      "learning_rate": 9.774187384276801e-06,
      "loss": 0.0785,
      "step": 88540
    },
    {
      "epoch": 0.14493038235698436,
      "grad_norm": 5.5057268142700195,
      "learning_rate": 9.774121492063283e-06,
      "loss": 0.082,
      "step": 88560
    },
    {
      "epoch": 0.1449631127956377,
      "grad_norm": 2.652743339538574,
      "learning_rate": 9.774055599849767e-06,
      "loss": 0.0683,
      "step": 88580
    },
    {
      "epoch": 0.14499584323429102,
      "grad_norm": 2.603578567504883,
      "learning_rate": 9.773989707636249e-06,
      "loss": 0.0538,
      "step": 88600
    },
    {
      "epoch": 0.14502857367294436,
      "grad_norm": 3.6008660793304443,
      "learning_rate": 9.773923815422732e-06,
      "loss": 0.0695,
      "step": 88620
    },
    {
      "epoch": 0.1450613041115977,
      "grad_norm": 8.610675811767578,
      "learning_rate": 9.773857923209216e-06,
      "loss": 0.0672,
      "step": 88640
    },
    {
      "epoch": 0.14509403455025105,
      "grad_norm": 2.613330841064453,
      "learning_rate": 9.773792030995698e-06,
      "loss": 0.0804,
      "step": 88660
    },
    {
      "epoch": 0.1451267649889044,
      "grad_norm": 2.5468318462371826,
      "learning_rate": 9.773726138782181e-06,
      "loss": 0.1063,
      "step": 88680
    },
    {
      "epoch": 0.1451594954275577,
      "grad_norm": 4.930234432220459,
      "learning_rate": 9.773660246568663e-06,
      "loss": 0.0676,
      "step": 88700
    },
    {
      "epoch": 0.14519222586621106,
      "grad_norm": 3.5638742446899414,
      "learning_rate": 9.773594354355147e-06,
      "loss": 0.0632,
      "step": 88720
    },
    {
      "epoch": 0.1452249563048644,
      "grad_norm": 2.441019296646118,
      "learning_rate": 9.773528462141629e-06,
      "loss": 0.0772,
      "step": 88740
    },
    {
      "epoch": 0.14525768674351774,
      "grad_norm": 2.5984253883361816,
      "learning_rate": 9.773462569928112e-06,
      "loss": 0.0569,
      "step": 88760
    },
    {
      "epoch": 0.1452904171821711,
      "grad_norm": 3.1343212127685547,
      "learning_rate": 9.773396677714594e-06,
      "loss": 0.0888,
      "step": 88780
    },
    {
      "epoch": 0.1453231476208244,
      "grad_norm": 4.011336326599121,
      "learning_rate": 9.773330785501078e-06,
      "loss": 0.0772,
      "step": 88800
    },
    {
      "epoch": 0.14535587805947775,
      "grad_norm": 1.4073810577392578,
      "learning_rate": 9.773264893287561e-06,
      "loss": 0.0655,
      "step": 88820
    },
    {
      "epoch": 0.1453886084981311,
      "grad_norm": 1.0157537460327148,
      "learning_rate": 9.773199001074043e-06,
      "loss": 0.0705,
      "step": 88840
    },
    {
      "epoch": 0.14542133893678444,
      "grad_norm": 5.021042823791504,
      "learning_rate": 9.773133108860527e-06,
      "loss": 0.0701,
      "step": 88860
    },
    {
      "epoch": 0.14545406937543778,
      "grad_norm": 1.4289767742156982,
      "learning_rate": 9.77306721664701e-06,
      "loss": 0.0674,
      "step": 88880
    },
    {
      "epoch": 0.1454867998140911,
      "grad_norm": 2.415008544921875,
      "learning_rate": 9.773001324433492e-06,
      "loss": 0.0693,
      "step": 88900
    },
    {
      "epoch": 0.14551953025274444,
      "grad_norm": 1.211029052734375,
      "learning_rate": 9.772935432219976e-06,
      "loss": 0.066,
      "step": 88920
    },
    {
      "epoch": 0.1455522606913978,
      "grad_norm": 1.735855221748352,
      "learning_rate": 9.772869540006458e-06,
      "loss": 0.0681,
      "step": 88940
    },
    {
      "epoch": 0.14558499113005113,
      "grad_norm": 1.8758288621902466,
      "learning_rate": 9.772803647792941e-06,
      "loss": 0.067,
      "step": 88960
    },
    {
      "epoch": 0.14561772156870445,
      "grad_norm": 1.917827844619751,
      "learning_rate": 9.772737755579425e-06,
      "loss": 0.0662,
      "step": 88980
    },
    {
      "epoch": 0.1456504520073578,
      "grad_norm": 2.4545485973358154,
      "learning_rate": 9.772671863365907e-06,
      "loss": 0.0676,
      "step": 89000
    },
    {
      "epoch": 0.14568318244601114,
      "grad_norm": 3.598184585571289,
      "learning_rate": 9.77260597115239e-06,
      "loss": 0.093,
      "step": 89020
    },
    {
      "epoch": 0.14571591288466448,
      "grad_norm": 3.377485990524292,
      "learning_rate": 9.772540078938872e-06,
      "loss": 0.0736,
      "step": 89040
    },
    {
      "epoch": 0.14574864332331783,
      "grad_norm": 2.8886570930480957,
      "learning_rate": 9.772474186725356e-06,
      "loss": 0.06,
      "step": 89060
    },
    {
      "epoch": 0.14578137376197114,
      "grad_norm": 2.0959103107452393,
      "learning_rate": 9.772408294511838e-06,
      "loss": 0.0668,
      "step": 89080
    },
    {
      "epoch": 0.1458141042006245,
      "grad_norm": 2.5538852214813232,
      "learning_rate": 9.772342402298321e-06,
      "loss": 0.0842,
      "step": 89100
    },
    {
      "epoch": 0.14584683463927783,
      "grad_norm": 1.6674959659576416,
      "learning_rate": 9.772276510084803e-06,
      "loss": 0.0671,
      "step": 89120
    },
    {
      "epoch": 0.14587956507793118,
      "grad_norm": 5.140248775482178,
      "learning_rate": 9.772210617871287e-06,
      "loss": 0.051,
      "step": 89140
    },
    {
      "epoch": 0.14591229551658452,
      "grad_norm": 2.527371406555176,
      "learning_rate": 9.772144725657769e-06,
      "loss": 0.0816,
      "step": 89160
    },
    {
      "epoch": 0.14594502595523784,
      "grad_norm": 0.8617477416992188,
      "learning_rate": 9.772078833444252e-06,
      "loss": 0.0744,
      "step": 89180
    },
    {
      "epoch": 0.14597775639389118,
      "grad_norm": 0.854145348072052,
      "learning_rate": 9.772012941230734e-06,
      "loss": 0.0723,
      "step": 89200
    },
    {
      "epoch": 0.14601048683254453,
      "grad_norm": 2.000546932220459,
      "learning_rate": 9.771947049017218e-06,
      "loss": 0.0842,
      "step": 89220
    },
    {
      "epoch": 0.14604321727119787,
      "grad_norm": 1.9344757795333862,
      "learning_rate": 9.771881156803702e-06,
      "loss": 0.0616,
      "step": 89240
    },
    {
      "epoch": 0.14607594770985122,
      "grad_norm": 4.7696452140808105,
      "learning_rate": 9.771815264590183e-06,
      "loss": 0.076,
      "step": 89260
    },
    {
      "epoch": 0.14610867814850453,
      "grad_norm": 1.6618881225585938,
      "learning_rate": 9.771749372376667e-06,
      "loss": 0.0577,
      "step": 89280
    },
    {
      "epoch": 0.14614140858715788,
      "grad_norm": 5.38847541809082,
      "learning_rate": 9.77168348016315e-06,
      "loss": 0.0712,
      "step": 89300
    },
    {
      "epoch": 0.14617413902581122,
      "grad_norm": 2.5728840827941895,
      "learning_rate": 9.771617587949634e-06,
      "loss": 0.0569,
      "step": 89320
    },
    {
      "epoch": 0.14620686946446457,
      "grad_norm": 2.17549204826355,
      "learning_rate": 9.771551695736116e-06,
      "loss": 0.0882,
      "step": 89340
    },
    {
      "epoch": 0.1462395999031179,
      "grad_norm": 3.2540485858917236,
      "learning_rate": 9.7714858035226e-06,
      "loss": 0.0708,
      "step": 89360
    },
    {
      "epoch": 0.14627233034177123,
      "grad_norm": 4.208343982696533,
      "learning_rate": 9.771419911309082e-06,
      "loss": 0.0794,
      "step": 89380
    },
    {
      "epoch": 0.14630506078042457,
      "grad_norm": 4.054118633270264,
      "learning_rate": 9.771354019095565e-06,
      "loss": 0.0778,
      "step": 89400
    },
    {
      "epoch": 0.14633779121907792,
      "grad_norm": 1.730855941772461,
      "learning_rate": 9.771288126882047e-06,
      "loss": 0.0782,
      "step": 89420
    },
    {
      "epoch": 0.14637052165773126,
      "grad_norm": 4.156055450439453,
      "learning_rate": 9.77122223466853e-06,
      "loss": 0.0725,
      "step": 89440
    },
    {
      "epoch": 0.1464032520963846,
      "grad_norm": 1.4985977411270142,
      "learning_rate": 9.771156342455013e-06,
      "loss": 0.0638,
      "step": 89460
    },
    {
      "epoch": 0.14643598253503792,
      "grad_norm": 6.908483982086182,
      "learning_rate": 9.771090450241496e-06,
      "loss": 0.0797,
      "step": 89480
    },
    {
      "epoch": 0.14646871297369127,
      "grad_norm": 0.8931912183761597,
      "learning_rate": 9.771024558027978e-06,
      "loss": 0.0645,
      "step": 89500
    },
    {
      "epoch": 0.1465014434123446,
      "grad_norm": 2.3021562099456787,
      "learning_rate": 9.770958665814462e-06,
      "loss": 0.0737,
      "step": 89520
    },
    {
      "epoch": 0.14653417385099796,
      "grad_norm": 2.760005235671997,
      "learning_rate": 9.770892773600943e-06,
      "loss": 0.0633,
      "step": 89540
    },
    {
      "epoch": 0.1465669042896513,
      "grad_norm": 4.634669303894043,
      "learning_rate": 9.770826881387427e-06,
      "loss": 0.0708,
      "step": 89560
    },
    {
      "epoch": 0.14659963472830462,
      "grad_norm": 2.0357935428619385,
      "learning_rate": 9.770760989173909e-06,
      "loss": 0.0679,
      "step": 89580
    },
    {
      "epoch": 0.14663236516695796,
      "grad_norm": 2.198047637939453,
      "learning_rate": 9.770695096960393e-06,
      "loss": 0.0846,
      "step": 89600
    },
    {
      "epoch": 0.1466650956056113,
      "grad_norm": 0.9676510691642761,
      "learning_rate": 9.770629204746876e-06,
      "loss": 0.0841,
      "step": 89620
    },
    {
      "epoch": 0.14669782604426465,
      "grad_norm": 1.5301233530044556,
      "learning_rate": 9.770563312533358e-06,
      "loss": 0.0676,
      "step": 89640
    },
    {
      "epoch": 0.146730556482918,
      "grad_norm": 5.428422927856445,
      "learning_rate": 9.770497420319842e-06,
      "loss": 0.0786,
      "step": 89660
    },
    {
      "epoch": 0.1467632869215713,
      "grad_norm": 1.215765357017517,
      "learning_rate": 9.770431528106325e-06,
      "loss": 0.0797,
      "step": 89680
    },
    {
      "epoch": 0.14679601736022466,
      "grad_norm": 2.155304193496704,
      "learning_rate": 9.770365635892807e-06,
      "loss": 0.0613,
      "step": 89700
    },
    {
      "epoch": 0.146828747798878,
      "grad_norm": 2.1839964389801025,
      "learning_rate": 9.77029974367929e-06,
      "loss": 0.0814,
      "step": 89720
    },
    {
      "epoch": 0.14686147823753135,
      "grad_norm": 4.185027122497559,
      "learning_rate": 9.770233851465774e-06,
      "loss": 0.0807,
      "step": 89740
    },
    {
      "epoch": 0.1468942086761847,
      "grad_norm": 2.5805320739746094,
      "learning_rate": 9.770167959252256e-06,
      "loss": 0.0582,
      "step": 89760
    },
    {
      "epoch": 0.146926939114838,
      "grad_norm": 2.543107271194458,
      "learning_rate": 9.77010206703874e-06,
      "loss": 0.066,
      "step": 89780
    },
    {
      "epoch": 0.14695966955349135,
      "grad_norm": 3.1455531120300293,
      "learning_rate": 9.770036174825222e-06,
      "loss": 0.0688,
      "step": 89800
    },
    {
      "epoch": 0.1469923999921447,
      "grad_norm": 18.48316192626953,
      "learning_rate": 9.769970282611705e-06,
      "loss": 0.0682,
      "step": 89820
    },
    {
      "epoch": 0.14702513043079804,
      "grad_norm": 2.8181653022766113,
      "learning_rate": 9.769904390398187e-06,
      "loss": 0.0632,
      "step": 89840
    },
    {
      "epoch": 0.14705786086945138,
      "grad_norm": 3.5852417945861816,
      "learning_rate": 9.76983849818467e-06,
      "loss": 0.0745,
      "step": 89860
    },
    {
      "epoch": 0.1470905913081047,
      "grad_norm": 3.924365758895874,
      "learning_rate": 9.769772605971153e-06,
      "loss": 0.0835,
      "step": 89880
    },
    {
      "epoch": 0.14712332174675805,
      "grad_norm": 2.6857590675354004,
      "learning_rate": 9.769706713757636e-06,
      "loss": 0.0926,
      "step": 89900
    },
    {
      "epoch": 0.1471560521854114,
      "grad_norm": 11.173789978027344,
      "learning_rate": 9.769640821544118e-06,
      "loss": 0.0661,
      "step": 89920
    },
    {
      "epoch": 0.14718878262406473,
      "grad_norm": 1.1597919464111328,
      "learning_rate": 9.769574929330602e-06,
      "loss": 0.0627,
      "step": 89940
    },
    {
      "epoch": 0.14722151306271808,
      "grad_norm": 2.5010030269622803,
      "learning_rate": 9.769509037117084e-06,
      "loss": 0.0712,
      "step": 89960
    },
    {
      "epoch": 0.1472542435013714,
      "grad_norm": 2.375112295150757,
      "learning_rate": 9.769443144903567e-06,
      "loss": 0.0601,
      "step": 89980
    },
    {
      "epoch": 0.14728697394002474,
      "grad_norm": 2.218780755996704,
      "learning_rate": 9.76937725269005e-06,
      "loss": 0.0713,
      "step": 90000
    },
    {
      "epoch": 0.14731970437867808,
      "grad_norm": 1.7643393278121948,
      "learning_rate": 9.769311360476533e-06,
      "loss": 0.0663,
      "step": 90020
    },
    {
      "epoch": 0.14735243481733143,
      "grad_norm": 0.8306810259819031,
      "learning_rate": 9.769245468263016e-06,
      "loss": 0.0547,
      "step": 90040
    },
    {
      "epoch": 0.14738516525598477,
      "grad_norm": 3.081134080886841,
      "learning_rate": 9.7691795760495e-06,
      "loss": 0.0782,
      "step": 90060
    },
    {
      "epoch": 0.1474178956946381,
      "grad_norm": 2.1824886798858643,
      "learning_rate": 9.769113683835982e-06,
      "loss": 0.079,
      "step": 90080
    },
    {
      "epoch": 0.14745062613329143,
      "grad_norm": 2.6202733516693115,
      "learning_rate": 9.769047791622465e-06,
      "loss": 0.0676,
      "step": 90100
    },
    {
      "epoch": 0.14748335657194478,
      "grad_norm": 2.3713879585266113,
      "learning_rate": 9.768981899408949e-06,
      "loss": 0.0703,
      "step": 90120
    },
    {
      "epoch": 0.14751608701059812,
      "grad_norm": 1.1928879022598267,
      "learning_rate": 9.76891600719543e-06,
      "loss": 0.0781,
      "step": 90140
    },
    {
      "epoch": 0.14754881744925147,
      "grad_norm": 14.80528450012207,
      "learning_rate": 9.768850114981914e-06,
      "loss": 0.0716,
      "step": 90160
    },
    {
      "epoch": 0.14758154788790478,
      "grad_norm": 8.125964164733887,
      "learning_rate": 9.768784222768396e-06,
      "loss": 0.0738,
      "step": 90180
    },
    {
      "epoch": 0.14761427832655813,
      "grad_norm": 2.8106024265289307,
      "learning_rate": 9.76871833055488e-06,
      "loss": 0.0675,
      "step": 90200
    },
    {
      "epoch": 0.14764700876521147,
      "grad_norm": 1.1427977085113525,
      "learning_rate": 9.768652438341362e-06,
      "loss": 0.0571,
      "step": 90220
    },
    {
      "epoch": 0.14767973920386482,
      "grad_norm": 1.8460386991500854,
      "learning_rate": 9.768586546127845e-06,
      "loss": 0.0734,
      "step": 90240
    },
    {
      "epoch": 0.14771246964251816,
      "grad_norm": 3.8354032039642334,
      "learning_rate": 9.768520653914327e-06,
      "loss": 0.0679,
      "step": 90260
    },
    {
      "epoch": 0.14774520008117148,
      "grad_norm": 4.933863639831543,
      "learning_rate": 9.76845476170081e-06,
      "loss": 0.0689,
      "step": 90280
    },
    {
      "epoch": 0.14777793051982482,
      "grad_norm": 9.831811904907227,
      "learning_rate": 9.768388869487293e-06,
      "loss": 0.0777,
      "step": 90300
    },
    {
      "epoch": 0.14781066095847817,
      "grad_norm": 1.7970268726348877,
      "learning_rate": 9.768322977273776e-06,
      "loss": 0.0781,
      "step": 90320
    },
    {
      "epoch": 0.1478433913971315,
      "grad_norm": 4.224301815032959,
      "learning_rate": 9.768257085060258e-06,
      "loss": 0.0719,
      "step": 90340
    },
    {
      "epoch": 0.14787612183578483,
      "grad_norm": 5.038295745849609,
      "learning_rate": 9.768191192846742e-06,
      "loss": 0.0927,
      "step": 90360
    },
    {
      "epoch": 0.14790885227443817,
      "grad_norm": 4.349096298217773,
      "learning_rate": 9.768125300633225e-06,
      "loss": 0.0849,
      "step": 90380
    },
    {
      "epoch": 0.14794158271309152,
      "grad_norm": 3.699855327606201,
      "learning_rate": 9.768059408419707e-06,
      "loss": 0.0629,
      "step": 90400
    },
    {
      "epoch": 0.14797431315174486,
      "grad_norm": 2.569791793823242,
      "learning_rate": 9.76799351620619e-06,
      "loss": 0.0677,
      "step": 90420
    },
    {
      "epoch": 0.1480070435903982,
      "grad_norm": 1.286117672920227,
      "learning_rate": 9.767927623992673e-06,
      "loss": 0.0652,
      "step": 90440
    },
    {
      "epoch": 0.14803977402905152,
      "grad_norm": 7.195093154907227,
      "learning_rate": 9.767861731779156e-06,
      "loss": 0.0737,
      "step": 90460
    },
    {
      "epoch": 0.14807250446770487,
      "grad_norm": 3.863823175430298,
      "learning_rate": 9.76779583956564e-06,
      "loss": 0.0721,
      "step": 90480
    },
    {
      "epoch": 0.1481052349063582,
      "grad_norm": 1.0443178415298462,
      "learning_rate": 9.767729947352122e-06,
      "loss": 0.0606,
      "step": 90500
    },
    {
      "epoch": 0.14813796534501156,
      "grad_norm": 1.4530365467071533,
      "learning_rate": 9.767664055138605e-06,
      "loss": 0.0676,
      "step": 90520
    },
    {
      "epoch": 0.1481706957836649,
      "grad_norm": 2.3286499977111816,
      "learning_rate": 9.767598162925089e-06,
      "loss": 0.0548,
      "step": 90540
    },
    {
      "epoch": 0.14820342622231822,
      "grad_norm": 2.5457255840301514,
      "learning_rate": 9.76753227071157e-06,
      "loss": 0.0705,
      "step": 90560
    },
    {
      "epoch": 0.14823615666097156,
      "grad_norm": 2.9052340984344482,
      "learning_rate": 9.767466378498054e-06,
      "loss": 0.0889,
      "step": 90580
    },
    {
      "epoch": 0.1482688870996249,
      "grad_norm": 2.3965342044830322,
      "learning_rate": 9.767400486284536e-06,
      "loss": 0.0742,
      "step": 90600
    },
    {
      "epoch": 0.14830161753827825,
      "grad_norm": 6.9252543449401855,
      "learning_rate": 9.76733459407102e-06,
      "loss": 0.0699,
      "step": 90620
    },
    {
      "epoch": 0.1483343479769316,
      "grad_norm": 3.4402544498443604,
      "learning_rate": 9.767268701857502e-06,
      "loss": 0.0864,
      "step": 90640
    },
    {
      "epoch": 0.1483670784155849,
      "grad_norm": 1.82646644115448,
      "learning_rate": 9.767202809643985e-06,
      "loss": 0.0468,
      "step": 90660
    },
    {
      "epoch": 0.14839980885423826,
      "grad_norm": 3.185272455215454,
      "learning_rate": 9.767136917430467e-06,
      "loss": 0.0588,
      "step": 90680
    },
    {
      "epoch": 0.1484325392928916,
      "grad_norm": 1.916298747062683,
      "learning_rate": 9.76707102521695e-06,
      "loss": 0.0698,
      "step": 90700
    },
    {
      "epoch": 0.14846526973154495,
      "grad_norm": 5.663589000701904,
      "learning_rate": 9.767005133003434e-06,
      "loss": 0.0997,
      "step": 90720
    },
    {
      "epoch": 0.1484980001701983,
      "grad_norm": 1.967627763748169,
      "learning_rate": 9.766939240789916e-06,
      "loss": 0.0664,
      "step": 90740
    },
    {
      "epoch": 0.1485307306088516,
      "grad_norm": 3.511873960494995,
      "learning_rate": 9.7668733485764e-06,
      "loss": 0.0638,
      "step": 90760
    },
    {
      "epoch": 0.14856346104750495,
      "grad_norm": 2.573735237121582,
      "learning_rate": 9.766807456362882e-06,
      "loss": 0.0734,
      "step": 90780
    },
    {
      "epoch": 0.1485961914861583,
      "grad_norm": 2.9058902263641357,
      "learning_rate": 9.766741564149365e-06,
      "loss": 0.0839,
      "step": 90800
    },
    {
      "epoch": 0.14862892192481164,
      "grad_norm": 1.3800042867660522,
      "learning_rate": 9.766675671935847e-06,
      "loss": 0.0606,
      "step": 90820
    },
    {
      "epoch": 0.14866165236346499,
      "grad_norm": 3.0351321697235107,
      "learning_rate": 9.766609779722331e-06,
      "loss": 0.0825,
      "step": 90840
    },
    {
      "epoch": 0.1486943828021183,
      "grad_norm": 3.345982074737549,
      "learning_rate": 9.766543887508814e-06,
      "loss": 0.0824,
      "step": 90860
    },
    {
      "epoch": 0.14872711324077165,
      "grad_norm": 5.701935291290283,
      "learning_rate": 9.766477995295296e-06,
      "loss": 0.0591,
      "step": 90880
    },
    {
      "epoch": 0.148759843679425,
      "grad_norm": 3.1071982383728027,
      "learning_rate": 9.76641210308178e-06,
      "loss": 0.0711,
      "step": 90900
    },
    {
      "epoch": 0.14879257411807834,
      "grad_norm": 3.2393264770507812,
      "learning_rate": 9.766346210868264e-06,
      "loss": 0.0697,
      "step": 90920
    },
    {
      "epoch": 0.14882530455673168,
      "grad_norm": 0.9286144971847534,
      "learning_rate": 9.766280318654745e-06,
      "loss": 0.0631,
      "step": 90940
    },
    {
      "epoch": 0.148858034995385,
      "grad_norm": 5.957449436187744,
      "learning_rate": 9.766214426441229e-06,
      "loss": 0.0956,
      "step": 90960
    },
    {
      "epoch": 0.14889076543403834,
      "grad_norm": 4.775489807128906,
      "learning_rate": 9.766148534227711e-06,
      "loss": 0.1054,
      "step": 90980
    },
    {
      "epoch": 0.14892349587269169,
      "grad_norm": 4.151142597198486,
      "learning_rate": 9.766082642014194e-06,
      "loss": 0.0885,
      "step": 91000
    },
    {
      "epoch": 0.14895622631134503,
      "grad_norm": 1.6813008785247803,
      "learning_rate": 9.766016749800676e-06,
      "loss": 0.0664,
      "step": 91020
    },
    {
      "epoch": 0.14898895674999837,
      "grad_norm": 1.5853101015090942,
      "learning_rate": 9.76595085758716e-06,
      "loss": 0.0606,
      "step": 91040
    },
    {
      "epoch": 0.1490216871886517,
      "grad_norm": 3.756033182144165,
      "learning_rate": 9.765884965373642e-06,
      "loss": 0.0633,
      "step": 91060
    },
    {
      "epoch": 0.14905441762730504,
      "grad_norm": 2.005070924758911,
      "learning_rate": 9.765819073160125e-06,
      "loss": 0.0748,
      "step": 91080
    },
    {
      "epoch": 0.14908714806595838,
      "grad_norm": 3.638139009475708,
      "learning_rate": 9.765753180946609e-06,
      "loss": 0.0851,
      "step": 91100
    },
    {
      "epoch": 0.14911987850461172,
      "grad_norm": 4.2181596755981445,
      "learning_rate": 9.765687288733091e-06,
      "loss": 0.0745,
      "step": 91120
    },
    {
      "epoch": 0.14915260894326507,
      "grad_norm": 3.5420095920562744,
      "learning_rate": 9.765621396519575e-06,
      "loss": 0.0731,
      "step": 91140
    },
    {
      "epoch": 0.14918533938191839,
      "grad_norm": 2.9590697288513184,
      "learning_rate": 9.765555504306056e-06,
      "loss": 0.0863,
      "step": 91160
    },
    {
      "epoch": 0.14921806982057173,
      "grad_norm": 4.0883941650390625,
      "learning_rate": 9.76548961209254e-06,
      "loss": 0.0709,
      "step": 91180
    },
    {
      "epoch": 0.14925080025922507,
      "grad_norm": 2.588630199432373,
      "learning_rate": 9.765423719879022e-06,
      "loss": 0.0725,
      "step": 91200
    },
    {
      "epoch": 0.14928353069787842,
      "grad_norm": 2.364980697631836,
      "learning_rate": 9.765357827665505e-06,
      "loss": 0.0744,
      "step": 91220
    },
    {
      "epoch": 0.14931626113653176,
      "grad_norm": 2.4669439792633057,
      "learning_rate": 9.765291935451987e-06,
      "loss": 0.0701,
      "step": 91240
    },
    {
      "epoch": 0.14934899157518508,
      "grad_norm": 3.8972294330596924,
      "learning_rate": 9.765226043238471e-06,
      "loss": 0.0768,
      "step": 91260
    },
    {
      "epoch": 0.14938172201383842,
      "grad_norm": 4.006458282470703,
      "learning_rate": 9.765160151024955e-06,
      "loss": 0.0614,
      "step": 91280
    },
    {
      "epoch": 0.14941445245249177,
      "grad_norm": 6.990957736968994,
      "learning_rate": 9.765094258811436e-06,
      "loss": 0.0786,
      "step": 91300
    },
    {
      "epoch": 0.14944718289114511,
      "grad_norm": 6.198397159576416,
      "learning_rate": 9.76502836659792e-06,
      "loss": 0.0803,
      "step": 91320
    },
    {
      "epoch": 0.14947991332979846,
      "grad_norm": 4.126386642456055,
      "learning_rate": 9.764962474384404e-06,
      "loss": 0.0698,
      "step": 91340
    },
    {
      "epoch": 0.14951264376845178,
      "grad_norm": 3.198499917984009,
      "learning_rate": 9.764896582170885e-06,
      "loss": 0.0638,
      "step": 91360
    },
    {
      "epoch": 0.14954537420710512,
      "grad_norm": 3.430354595184326,
      "learning_rate": 9.764830689957369e-06,
      "loss": 0.0798,
      "step": 91380
    },
    {
      "epoch": 0.14957810464575846,
      "grad_norm": 11.203577995300293,
      "learning_rate": 9.764764797743851e-06,
      "loss": 0.0622,
      "step": 91400
    },
    {
      "epoch": 0.1496108350844118,
      "grad_norm": 4.691079139709473,
      "learning_rate": 9.764698905530335e-06,
      "loss": 0.0698,
      "step": 91420
    },
    {
      "epoch": 0.14964356552306515,
      "grad_norm": 4.247495651245117,
      "learning_rate": 9.764633013316818e-06,
      "loss": 0.1047,
      "step": 91440
    },
    {
      "epoch": 0.14967629596171847,
      "grad_norm": 1.807823657989502,
      "learning_rate": 9.7645671211033e-06,
      "loss": 0.0574,
      "step": 91460
    },
    {
      "epoch": 0.14970902640037181,
      "grad_norm": 2.232548475265503,
      "learning_rate": 9.764501228889784e-06,
      "loss": 0.0846,
      "step": 91480
    },
    {
      "epoch": 0.14974175683902516,
      "grad_norm": 4.09009313583374,
      "learning_rate": 9.764435336676266e-06,
      "loss": 0.0829,
      "step": 91500
    },
    {
      "epoch": 0.1497744872776785,
      "grad_norm": 2.2186100482940674,
      "learning_rate": 9.764369444462749e-06,
      "loss": 0.0665,
      "step": 91520
    },
    {
      "epoch": 0.14980721771633185,
      "grad_norm": 2.9139273166656494,
      "learning_rate": 9.764303552249231e-06,
      "loss": 0.0898,
      "step": 91540
    },
    {
      "epoch": 0.14983994815498516,
      "grad_norm": 2.2813754081726074,
      "learning_rate": 9.764237660035715e-06,
      "loss": 0.0671,
      "step": 91560
    },
    {
      "epoch": 0.1498726785936385,
      "grad_norm": 1.7004985809326172,
      "learning_rate": 9.764171767822196e-06,
      "loss": 0.0702,
      "step": 91580
    },
    {
      "epoch": 0.14990540903229185,
      "grad_norm": 1.5870810747146606,
      "learning_rate": 9.76410587560868e-06,
      "loss": 0.0799,
      "step": 91600
    },
    {
      "epoch": 0.1499381394709452,
      "grad_norm": 2.2295472621917725,
      "learning_rate": 9.764039983395162e-06,
      "loss": 0.0833,
      "step": 91620
    },
    {
      "epoch": 0.14997086990959854,
      "grad_norm": 16.6556453704834,
      "learning_rate": 9.763974091181646e-06,
      "loss": 0.0697,
      "step": 91640
    },
    {
      "epoch": 0.15000360034825186,
      "grad_norm": 2.9347453117370605,
      "learning_rate": 9.763908198968129e-06,
      "loss": 0.0714,
      "step": 91660
    },
    {
      "epoch": 0.1500363307869052,
      "grad_norm": 2.8673136234283447,
      "learning_rate": 9.763842306754611e-06,
      "loss": 0.088,
      "step": 91680
    },
    {
      "epoch": 0.15006906122555855,
      "grad_norm": 3.156984329223633,
      "learning_rate": 9.763776414541095e-06,
      "loss": 0.0605,
      "step": 91700
    },
    {
      "epoch": 0.1501017916642119,
      "grad_norm": 3.1175172328948975,
      "learning_rate": 9.763710522327578e-06,
      "loss": 0.0645,
      "step": 91720
    },
    {
      "epoch": 0.1501345221028652,
      "grad_norm": 2.7849724292755127,
      "learning_rate": 9.76364463011406e-06,
      "loss": 0.0661,
      "step": 91740
    },
    {
      "epoch": 0.15016725254151855,
      "grad_norm": 3.80407977104187,
      "learning_rate": 9.763578737900544e-06,
      "loss": 0.0712,
      "step": 91760
    },
    {
      "epoch": 0.1501999829801719,
      "grad_norm": 2.159510850906372,
      "learning_rate": 9.763512845687027e-06,
      "loss": 0.0771,
      "step": 91780
    },
    {
      "epoch": 0.15023271341882524,
      "grad_norm": 2.815699338912964,
      "learning_rate": 9.763446953473509e-06,
      "loss": 0.0693,
      "step": 91800
    },
    {
      "epoch": 0.1502654438574786,
      "grad_norm": 3.712769031524658,
      "learning_rate": 9.763381061259993e-06,
      "loss": 0.0804,
      "step": 91820
    },
    {
      "epoch": 0.1502981742961319,
      "grad_norm": 1.5249342918395996,
      "learning_rate": 9.763315169046475e-06,
      "loss": 0.0536,
      "step": 91840
    },
    {
      "epoch": 0.15033090473478525,
      "grad_norm": 4.861608982086182,
      "learning_rate": 9.763249276832958e-06,
      "loss": 0.0845,
      "step": 91860
    },
    {
      "epoch": 0.1503636351734386,
      "grad_norm": 2.1856319904327393,
      "learning_rate": 9.76318338461944e-06,
      "loss": 0.0625,
      "step": 91880
    },
    {
      "epoch": 0.15039636561209194,
      "grad_norm": 1.8187649250030518,
      "learning_rate": 9.763117492405924e-06,
      "loss": 0.0597,
      "step": 91900
    },
    {
      "epoch": 0.15042909605074528,
      "grad_norm": 2.5755910873413086,
      "learning_rate": 9.763051600192406e-06,
      "loss": 0.0553,
      "step": 91920
    },
    {
      "epoch": 0.1504618264893986,
      "grad_norm": 1.879320740699768,
      "learning_rate": 9.76298570797889e-06,
      "loss": 0.0868,
      "step": 91940
    },
    {
      "epoch": 0.15049455692805194,
      "grad_norm": 3.979065418243408,
      "learning_rate": 9.762919815765371e-06,
      "loss": 0.0623,
      "step": 91960
    },
    {
      "epoch": 0.1505272873667053,
      "grad_norm": 2.7985260486602783,
      "learning_rate": 9.762853923551855e-06,
      "loss": 0.0834,
      "step": 91980
    },
    {
      "epoch": 0.15056001780535863,
      "grad_norm": 3.101855993270874,
      "learning_rate": 9.762788031338337e-06,
      "loss": 0.0796,
      "step": 92000
    },
    {
      "epoch": 0.15059274824401198,
      "grad_norm": 2.865133762359619,
      "learning_rate": 9.76272213912482e-06,
      "loss": 0.0653,
      "step": 92020
    },
    {
      "epoch": 0.1506254786826653,
      "grad_norm": 2.4609763622283936,
      "learning_rate": 9.762656246911302e-06,
      "loss": 0.0557,
      "step": 92040
    },
    {
      "epoch": 0.15065820912131864,
      "grad_norm": 5.58213996887207,
      "learning_rate": 9.762590354697786e-06,
      "loss": 0.0864,
      "step": 92060
    },
    {
      "epoch": 0.15069093955997198,
      "grad_norm": 2.9817819595336914,
      "learning_rate": 9.76252446248427e-06,
      "loss": 0.0473,
      "step": 92080
    },
    {
      "epoch": 0.15072366999862533,
      "grad_norm": 3.1501121520996094,
      "learning_rate": 9.762458570270753e-06,
      "loss": 0.0815,
      "step": 92100
    },
    {
      "epoch": 0.15075640043727867,
      "grad_norm": 1.9722566604614258,
      "learning_rate": 9.762392678057235e-06,
      "loss": 0.0842,
      "step": 92120
    },
    {
      "epoch": 0.150789130875932,
      "grad_norm": 4.053240776062012,
      "learning_rate": 9.762326785843718e-06,
      "loss": 0.087,
      "step": 92140
    },
    {
      "epoch": 0.15082186131458533,
      "grad_norm": 4.356495380401611,
      "learning_rate": 9.762260893630202e-06,
      "loss": 0.0825,
      "step": 92160
    },
    {
      "epoch": 0.15085459175323868,
      "grad_norm": 3.2236130237579346,
      "learning_rate": 9.762195001416684e-06,
      "loss": 0.0831,
      "step": 92180
    },
    {
      "epoch": 0.15088732219189202,
      "grad_norm": 3.355562210083008,
      "learning_rate": 9.762129109203167e-06,
      "loss": 0.0695,
      "step": 92200
    },
    {
      "epoch": 0.15092005263054536,
      "grad_norm": 2.411396026611328,
      "learning_rate": 9.76206321698965e-06,
      "loss": 0.0671,
      "step": 92220
    },
    {
      "epoch": 0.15095278306919868,
      "grad_norm": 2.610705852508545,
      "learning_rate": 9.761997324776133e-06,
      "loss": 0.0579,
      "step": 92240
    },
    {
      "epoch": 0.15098551350785203,
      "grad_norm": 3.641174554824829,
      "learning_rate": 9.761931432562615e-06,
      "loss": 0.0734,
      "step": 92260
    },
    {
      "epoch": 0.15101824394650537,
      "grad_norm": 2.190164089202881,
      "learning_rate": 9.761865540349098e-06,
      "loss": 0.0816,
      "step": 92280
    },
    {
      "epoch": 0.15105097438515871,
      "grad_norm": 2.7063655853271484,
      "learning_rate": 9.76179964813558e-06,
      "loss": 0.0616,
      "step": 92300
    },
    {
      "epoch": 0.15108370482381206,
      "grad_norm": 2.63120698928833,
      "learning_rate": 9.761733755922064e-06,
      "loss": 0.0603,
      "step": 92320
    },
    {
      "epoch": 0.15111643526246538,
      "grad_norm": 1.7493834495544434,
      "learning_rate": 9.761667863708546e-06,
      "loss": 0.0693,
      "step": 92340
    },
    {
      "epoch": 0.15114916570111872,
      "grad_norm": 3.539825916290283,
      "learning_rate": 9.76160197149503e-06,
      "loss": 0.0679,
      "step": 92360
    },
    {
      "epoch": 0.15118189613977207,
      "grad_norm": 5.625732898712158,
      "learning_rate": 9.761536079281511e-06,
      "loss": 0.0793,
      "step": 92380
    },
    {
      "epoch": 0.1512146265784254,
      "grad_norm": 2.9593300819396973,
      "learning_rate": 9.761470187067995e-06,
      "loss": 0.0599,
      "step": 92400
    },
    {
      "epoch": 0.15124735701707875,
      "grad_norm": 2.903416633605957,
      "learning_rate": 9.761404294854477e-06,
      "loss": 0.0753,
      "step": 92420
    },
    {
      "epoch": 0.15128008745573207,
      "grad_norm": 0.8807337284088135,
      "learning_rate": 9.76133840264096e-06,
      "loss": 0.0706,
      "step": 92440
    },
    {
      "epoch": 0.15131281789438542,
      "grad_norm": 2.1599209308624268,
      "learning_rate": 9.761272510427444e-06,
      "loss": 0.0741,
      "step": 92460
    },
    {
      "epoch": 0.15134554833303876,
      "grad_norm": 2.210716962814331,
      "learning_rate": 9.761206618213926e-06,
      "loss": 0.0772,
      "step": 92480
    },
    {
      "epoch": 0.1513782787716921,
      "grad_norm": 4.785558700561523,
      "learning_rate": 9.76114072600041e-06,
      "loss": 0.0669,
      "step": 92500
    },
    {
      "epoch": 0.15141100921034545,
      "grad_norm": 0.5718818306922913,
      "learning_rate": 9.761074833786893e-06,
      "loss": 0.0611,
      "step": 92520
    },
    {
      "epoch": 0.15144373964899877,
      "grad_norm": 2.8736348152160645,
      "learning_rate": 9.761008941573375e-06,
      "loss": 0.0614,
      "step": 92540
    },
    {
      "epoch": 0.1514764700876521,
      "grad_norm": 4.009939670562744,
      "learning_rate": 9.760943049359858e-06,
      "loss": 0.0735,
      "step": 92560
    },
    {
      "epoch": 0.15150920052630545,
      "grad_norm": 5.851947784423828,
      "learning_rate": 9.760877157146342e-06,
      "loss": 0.0642,
      "step": 92580
    },
    {
      "epoch": 0.1515419309649588,
      "grad_norm": 2.5622692108154297,
      "learning_rate": 9.760811264932824e-06,
      "loss": 0.0642,
      "step": 92600
    },
    {
      "epoch": 0.15157466140361214,
      "grad_norm": 8.405000686645508,
      "learning_rate": 9.760745372719307e-06,
      "loss": 0.0616,
      "step": 92620
    },
    {
      "epoch": 0.15160739184226546,
      "grad_norm": 3.6185760498046875,
      "learning_rate": 9.76067948050579e-06,
      "loss": 0.0762,
      "step": 92640
    },
    {
      "epoch": 0.1516401222809188,
      "grad_norm": 1.9037834405899048,
      "learning_rate": 9.760613588292273e-06,
      "loss": 0.0735,
      "step": 92660
    },
    {
      "epoch": 0.15167285271957215,
      "grad_norm": 0.954001247882843,
      "learning_rate": 9.760547696078755e-06,
      "loss": 0.0586,
      "step": 92680
    },
    {
      "epoch": 0.1517055831582255,
      "grad_norm": 5.374936580657959,
      "learning_rate": 9.760481803865238e-06,
      "loss": 0.0699,
      "step": 92700
    },
    {
      "epoch": 0.15173831359687884,
      "grad_norm": 1.4951555728912354,
      "learning_rate": 9.76041591165172e-06,
      "loss": 0.0807,
      "step": 92720
    },
    {
      "epoch": 0.15177104403553215,
      "grad_norm": 3.623277425765991,
      "learning_rate": 9.760350019438204e-06,
      "loss": 0.071,
      "step": 92740
    },
    {
      "epoch": 0.1518037744741855,
      "grad_norm": 0.7316828966140747,
      "learning_rate": 9.760284127224686e-06,
      "loss": 0.0686,
      "step": 92760
    },
    {
      "epoch": 0.15183650491283884,
      "grad_norm": 2.008253335952759,
      "learning_rate": 9.76021823501117e-06,
      "loss": 0.0495,
      "step": 92780
    },
    {
      "epoch": 0.1518692353514922,
      "grad_norm": 2.8087754249572754,
      "learning_rate": 9.760152342797651e-06,
      "loss": 0.084,
      "step": 92800
    },
    {
      "epoch": 0.15190196579014553,
      "grad_norm": 3.3480916023254395,
      "learning_rate": 9.760086450584135e-06,
      "loss": 0.0781,
      "step": 92820
    },
    {
      "epoch": 0.15193469622879885,
      "grad_norm": 2.6256697177886963,
      "learning_rate": 9.760020558370618e-06,
      "loss": 0.084,
      "step": 92840
    },
    {
      "epoch": 0.1519674266674522,
      "grad_norm": 2.9987845420837402,
      "learning_rate": 9.7599546661571e-06,
      "loss": 0.082,
      "step": 92860
    },
    {
      "epoch": 0.15200015710610554,
      "grad_norm": 9.83336353302002,
      "learning_rate": 9.759888773943584e-06,
      "loss": 0.0658,
      "step": 92880
    },
    {
      "epoch": 0.15203288754475888,
      "grad_norm": 2.231044292449951,
      "learning_rate": 9.759822881730067e-06,
      "loss": 0.0801,
      "step": 92900
    },
    {
      "epoch": 0.15206561798341223,
      "grad_norm": 3.5141141414642334,
      "learning_rate": 9.75975698951655e-06,
      "loss": 0.0566,
      "step": 92920
    },
    {
      "epoch": 0.15209834842206554,
      "grad_norm": 5.569818019866943,
      "learning_rate": 9.759691097303033e-06,
      "loss": 0.0649,
      "step": 92940
    },
    {
      "epoch": 0.1521310788607189,
      "grad_norm": 2.063669443130493,
      "learning_rate": 9.759625205089517e-06,
      "loss": 0.084,
      "step": 92960
    },
    {
      "epoch": 0.15216380929937223,
      "grad_norm": 1.9991052150726318,
      "learning_rate": 9.759559312875998e-06,
      "loss": 0.0947,
      "step": 92980
    },
    {
      "epoch": 0.15219653973802558,
      "grad_norm": 5.538431644439697,
      "learning_rate": 9.759493420662482e-06,
      "loss": 0.0724,
      "step": 93000
    },
    {
      "epoch": 0.15222927017667892,
      "grad_norm": 1.6018328666687012,
      "learning_rate": 9.759427528448964e-06,
      "loss": 0.0605,
      "step": 93020
    },
    {
      "epoch": 0.15226200061533224,
      "grad_norm": 2.9498889446258545,
      "learning_rate": 9.759361636235447e-06,
      "loss": 0.0653,
      "step": 93040
    },
    {
      "epoch": 0.15229473105398558,
      "grad_norm": 1.0524941682815552,
      "learning_rate": 9.75929574402193e-06,
      "loss": 0.0715,
      "step": 93060
    },
    {
      "epoch": 0.15232746149263893,
      "grad_norm": 1.936569333076477,
      "learning_rate": 9.759229851808413e-06,
      "loss": 0.0652,
      "step": 93080
    },
    {
      "epoch": 0.15236019193129227,
      "grad_norm": 1.4042896032333374,
      "learning_rate": 9.759163959594895e-06,
      "loss": 0.0576,
      "step": 93100
    },
    {
      "epoch": 0.1523929223699456,
      "grad_norm": 3.1138916015625,
      "learning_rate": 9.759098067381378e-06,
      "loss": 0.0736,
      "step": 93120
    },
    {
      "epoch": 0.15242565280859893,
      "grad_norm": 5.9945902824401855,
      "learning_rate": 9.75903217516786e-06,
      "loss": 0.0879,
      "step": 93140
    },
    {
      "epoch": 0.15245838324725228,
      "grad_norm": 2.4273767471313477,
      "learning_rate": 9.758966282954344e-06,
      "loss": 0.0727,
      "step": 93160
    },
    {
      "epoch": 0.15249111368590562,
      "grad_norm": 5.401031017303467,
      "learning_rate": 9.758900390740826e-06,
      "loss": 0.0877,
      "step": 93180
    },
    {
      "epoch": 0.15252384412455897,
      "grad_norm": 3.282201051712036,
      "learning_rate": 9.75883449852731e-06,
      "loss": 0.0801,
      "step": 93200
    },
    {
      "epoch": 0.15255657456321228,
      "grad_norm": 0.8061864376068115,
      "learning_rate": 9.758768606313793e-06,
      "loss": 0.0634,
      "step": 93220
    },
    {
      "epoch": 0.15258930500186563,
      "grad_norm": 4.865271091461182,
      "learning_rate": 9.758702714100275e-06,
      "loss": 0.0745,
      "step": 93240
    },
    {
      "epoch": 0.15262203544051897,
      "grad_norm": 3.0141499042510986,
      "learning_rate": 9.758636821886758e-06,
      "loss": 0.0797,
      "step": 93260
    },
    {
      "epoch": 0.15265476587917232,
      "grad_norm": 12.346096992492676,
      "learning_rate": 9.75857092967324e-06,
      "loss": 0.0691,
      "step": 93280
    },
    {
      "epoch": 0.15268749631782566,
      "grad_norm": 4.867372512817383,
      "learning_rate": 9.758505037459724e-06,
      "loss": 0.0689,
      "step": 93300
    },
    {
      "epoch": 0.15272022675647898,
      "grad_norm": 4.697149753570557,
      "learning_rate": 9.758439145246208e-06,
      "loss": 0.0652,
      "step": 93320
    },
    {
      "epoch": 0.15275295719513232,
      "grad_norm": 5.5797295570373535,
      "learning_rate": 9.75837325303269e-06,
      "loss": 0.0737,
      "step": 93340
    },
    {
      "epoch": 0.15278568763378567,
      "grad_norm": 9.198299407958984,
      "learning_rate": 9.758307360819173e-06,
      "loss": 0.0682,
      "step": 93360
    },
    {
      "epoch": 0.152818418072439,
      "grad_norm": 3.153823137283325,
      "learning_rate": 9.758241468605657e-06,
      "loss": 0.0757,
      "step": 93380
    },
    {
      "epoch": 0.15285114851109236,
      "grad_norm": 4.359353542327881,
      "learning_rate": 9.758175576392138e-06,
      "loss": 0.0652,
      "step": 93400
    },
    {
      "epoch": 0.15288387894974567,
      "grad_norm": 2.660412311553955,
      "learning_rate": 9.758109684178622e-06,
      "loss": 0.0864,
      "step": 93420
    },
    {
      "epoch": 0.15291660938839902,
      "grad_norm": 0.4657694101333618,
      "learning_rate": 9.758043791965104e-06,
      "loss": 0.0665,
      "step": 93440
    },
    {
      "epoch": 0.15294933982705236,
      "grad_norm": 3.327414035797119,
      "learning_rate": 9.757977899751588e-06,
      "loss": 0.0571,
      "step": 93460
    },
    {
      "epoch": 0.1529820702657057,
      "grad_norm": 2.204464912414551,
      "learning_rate": 9.75791200753807e-06,
      "loss": 0.08,
      "step": 93480
    },
    {
      "epoch": 0.15301480070435905,
      "grad_norm": 4.356010437011719,
      "learning_rate": 9.757846115324553e-06,
      "loss": 0.0641,
      "step": 93500
    },
    {
      "epoch": 0.15304753114301237,
      "grad_norm": 2.2465226650238037,
      "learning_rate": 9.757780223111035e-06,
      "loss": 0.073,
      "step": 93520
    },
    {
      "epoch": 0.1530802615816657,
      "grad_norm": 1.605436086654663,
      "learning_rate": 9.757714330897519e-06,
      "loss": 0.0731,
      "step": 93540
    },
    {
      "epoch": 0.15311299202031906,
      "grad_norm": 2.55106782913208,
      "learning_rate": 9.757648438684002e-06,
      "loss": 0.0725,
      "step": 93560
    },
    {
      "epoch": 0.1531457224589724,
      "grad_norm": 4.130876541137695,
      "learning_rate": 9.757582546470484e-06,
      "loss": 0.0752,
      "step": 93580
    },
    {
      "epoch": 0.15317845289762574,
      "grad_norm": 4.240662097930908,
      "learning_rate": 9.757516654256968e-06,
      "loss": 0.0647,
      "step": 93600
    },
    {
      "epoch": 0.15321118333627906,
      "grad_norm": 2.2245593070983887,
      "learning_rate": 9.75745076204345e-06,
      "loss": 0.074,
      "step": 93620
    },
    {
      "epoch": 0.1532439137749324,
      "grad_norm": 7.548405170440674,
      "learning_rate": 9.757384869829933e-06,
      "loss": 0.0874,
      "step": 93640
    },
    {
      "epoch": 0.15327664421358575,
      "grad_norm": 1.5124088525772095,
      "learning_rate": 9.757318977616415e-06,
      "loss": 0.0891,
      "step": 93660
    },
    {
      "epoch": 0.1533093746522391,
      "grad_norm": 2.374272108078003,
      "learning_rate": 9.757253085402899e-06,
      "loss": 0.0722,
      "step": 93680
    },
    {
      "epoch": 0.15334210509089244,
      "grad_norm": 1.2108123302459717,
      "learning_rate": 9.757187193189382e-06,
      "loss": 0.0604,
      "step": 93700
    },
    {
      "epoch": 0.15337483552954576,
      "grad_norm": 2.414132833480835,
      "learning_rate": 9.757121300975864e-06,
      "loss": 0.0625,
      "step": 93720
    },
    {
      "epoch": 0.1534075659681991,
      "grad_norm": 1.2968056201934814,
      "learning_rate": 9.757055408762348e-06,
      "loss": 0.0714,
      "step": 93740
    },
    {
      "epoch": 0.15344029640685244,
      "grad_norm": 3.3993029594421387,
      "learning_rate": 9.756989516548831e-06,
      "loss": 0.0709,
      "step": 93760
    },
    {
      "epoch": 0.1534730268455058,
      "grad_norm": 2.5830419063568115,
      "learning_rate": 9.756923624335313e-06,
      "loss": 0.0793,
      "step": 93780
    },
    {
      "epoch": 0.15350575728415913,
      "grad_norm": 2.1453347206115723,
      "learning_rate": 9.756857732121797e-06,
      "loss": 0.076,
      "step": 93800
    },
    {
      "epoch": 0.15353848772281245,
      "grad_norm": 2.841754674911499,
      "learning_rate": 9.756791839908279e-06,
      "loss": 0.077,
      "step": 93820
    },
    {
      "epoch": 0.1535712181614658,
      "grad_norm": 1.7638651132583618,
      "learning_rate": 9.756725947694762e-06,
      "loss": 0.0868,
      "step": 93840
    },
    {
      "epoch": 0.15360394860011914,
      "grad_norm": 3.3147242069244385,
      "learning_rate": 9.756660055481244e-06,
      "loss": 0.0734,
      "step": 93860
    },
    {
      "epoch": 0.15363667903877248,
      "grad_norm": 5.309573650360107,
      "learning_rate": 9.756594163267728e-06,
      "loss": 0.0646,
      "step": 93880
    },
    {
      "epoch": 0.15366940947742583,
      "grad_norm": 4.195741653442383,
      "learning_rate": 9.756528271054211e-06,
      "loss": 0.0797,
      "step": 93900
    },
    {
      "epoch": 0.15370213991607914,
      "grad_norm": 1.3476215600967407,
      "learning_rate": 9.756462378840693e-06,
      "loss": 0.0651,
      "step": 93920
    },
    {
      "epoch": 0.1537348703547325,
      "grad_norm": 9.215045928955078,
      "learning_rate": 9.756396486627177e-06,
      "loss": 0.0784,
      "step": 93940
    },
    {
      "epoch": 0.15376760079338583,
      "grad_norm": 0.9661378264427185,
      "learning_rate": 9.756330594413659e-06,
      "loss": 0.075,
      "step": 93960
    },
    {
      "epoch": 0.15380033123203918,
      "grad_norm": 0.5982688665390015,
      "learning_rate": 9.756264702200142e-06,
      "loss": 0.0723,
      "step": 93980
    },
    {
      "epoch": 0.15383306167069252,
      "grad_norm": 1.4499908685684204,
      "learning_rate": 9.756198809986624e-06,
      "loss": 0.0698,
      "step": 94000
    },
    {
      "epoch": 0.15386579210934584,
      "grad_norm": 0.5945823788642883,
      "learning_rate": 9.756132917773108e-06,
      "loss": 0.0838,
      "step": 94020
    },
    {
      "epoch": 0.15389852254799918,
      "grad_norm": 2.361294746398926,
      "learning_rate": 9.75606702555959e-06,
      "loss": 0.0688,
      "step": 94040
    },
    {
      "epoch": 0.15393125298665253,
      "grad_norm": 3.262983560562134,
      "learning_rate": 9.756001133346073e-06,
      "loss": 0.0647,
      "step": 94060
    },
    {
      "epoch": 0.15396398342530587,
      "grad_norm": 3.194938898086548,
      "learning_rate": 9.755935241132555e-06,
      "loss": 0.0597,
      "step": 94080
    },
    {
      "epoch": 0.15399671386395922,
      "grad_norm": 1.8904763460159302,
      "learning_rate": 9.755869348919039e-06,
      "loss": 0.0687,
      "step": 94100
    },
    {
      "epoch": 0.15402944430261253,
      "grad_norm": 2.946599245071411,
      "learning_rate": 9.755803456705522e-06,
      "loss": 0.0788,
      "step": 94120
    },
    {
      "epoch": 0.15406217474126588,
      "grad_norm": 14.194294929504395,
      "learning_rate": 9.755737564492004e-06,
      "loss": 0.0938,
      "step": 94140
    },
    {
      "epoch": 0.15409490517991922,
      "grad_norm": 3.53381609916687,
      "learning_rate": 9.755671672278488e-06,
      "loss": 0.0767,
      "step": 94160
    },
    {
      "epoch": 0.15412763561857257,
      "grad_norm": 1.1291348934173584,
      "learning_rate": 9.755605780064971e-06,
      "loss": 0.0713,
      "step": 94180
    },
    {
      "epoch": 0.1541603660572259,
      "grad_norm": 13.545075416564941,
      "learning_rate": 9.755539887851453e-06,
      "loss": 0.076,
      "step": 94200
    },
    {
      "epoch": 0.15419309649587923,
      "grad_norm": 2.1256535053253174,
      "learning_rate": 9.755473995637937e-06,
      "loss": 0.0638,
      "step": 94220
    },
    {
      "epoch": 0.15422582693453257,
      "grad_norm": 4.272963047027588,
      "learning_rate": 9.75540810342442e-06,
      "loss": 0.0779,
      "step": 94240
    },
    {
      "epoch": 0.15425855737318592,
      "grad_norm": 5.864796161651611,
      "learning_rate": 9.755342211210902e-06,
      "loss": 0.0635,
      "step": 94260
    },
    {
      "epoch": 0.15429128781183926,
      "grad_norm": 1.6841115951538086,
      "learning_rate": 9.755276318997386e-06,
      "loss": 0.0863,
      "step": 94280
    },
    {
      "epoch": 0.1543240182504926,
      "grad_norm": 4.4438581466674805,
      "learning_rate": 9.755210426783868e-06,
      "loss": 0.0539,
      "step": 94300
    },
    {
      "epoch": 0.15435674868914592,
      "grad_norm": 1.2179142236709595,
      "learning_rate": 9.755144534570351e-06,
      "loss": 0.0525,
      "step": 94320
    },
    {
      "epoch": 0.15438947912779927,
      "grad_norm": 4.233118534088135,
      "learning_rate": 9.755078642356833e-06,
      "loss": 0.079,
      "step": 94340
    },
    {
      "epoch": 0.1544222095664526,
      "grad_norm": 2.2311947345733643,
      "learning_rate": 9.755012750143317e-06,
      "loss": 0.0875,
      "step": 94360
    },
    {
      "epoch": 0.15445494000510596,
      "grad_norm": 2.5082478523254395,
      "learning_rate": 9.754946857929799e-06,
      "loss": 0.0786,
      "step": 94380
    },
    {
      "epoch": 0.1544876704437593,
      "grad_norm": 0.9778560400009155,
      "learning_rate": 9.754880965716282e-06,
      "loss": 0.0764,
      "step": 94400
    },
    {
      "epoch": 0.15452040088241262,
      "grad_norm": 3.6503689289093018,
      "learning_rate": 9.754815073502764e-06,
      "loss": 0.0834,
      "step": 94420
    },
    {
      "epoch": 0.15455313132106596,
      "grad_norm": 1.0290076732635498,
      "learning_rate": 9.754749181289248e-06,
      "loss": 0.0557,
      "step": 94440
    },
    {
      "epoch": 0.1545858617597193,
      "grad_norm": 2.6386971473693848,
      "learning_rate": 9.75468328907573e-06,
      "loss": 0.0715,
      "step": 94460
    },
    {
      "epoch": 0.15461859219837265,
      "grad_norm": 3.4135382175445557,
      "learning_rate": 9.754617396862213e-06,
      "loss": 0.077,
      "step": 94480
    },
    {
      "epoch": 0.154651322637026,
      "grad_norm": 3.168808698654175,
      "learning_rate": 9.754551504648697e-06,
      "loss": 0.0617,
      "step": 94500
    },
    {
      "epoch": 0.1546840530756793,
      "grad_norm": 1.5349714756011963,
      "learning_rate": 9.754485612435179e-06,
      "loss": 0.07,
      "step": 94520
    },
    {
      "epoch": 0.15471678351433266,
      "grad_norm": 5.267190933227539,
      "learning_rate": 9.754419720221662e-06,
      "loss": 0.0749,
      "step": 94540
    },
    {
      "epoch": 0.154749513952986,
      "grad_norm": 6.757484436035156,
      "learning_rate": 9.754353828008146e-06,
      "loss": 0.0709,
      "step": 94560
    },
    {
      "epoch": 0.15478224439163935,
      "grad_norm": 6.121474742889404,
      "learning_rate": 9.754287935794628e-06,
      "loss": 0.0808,
      "step": 94580
    },
    {
      "epoch": 0.15481497483029266,
      "grad_norm": 1.861223578453064,
      "learning_rate": 9.754222043581111e-06,
      "loss": 0.066,
      "step": 94600
    },
    {
      "epoch": 0.154847705268946,
      "grad_norm": 2.7193126678466797,
      "learning_rate": 9.754156151367595e-06,
      "loss": 0.0884,
      "step": 94620
    },
    {
      "epoch": 0.15488043570759935,
      "grad_norm": 2.6351687908172607,
      "learning_rate": 9.754090259154077e-06,
      "loss": 0.0925,
      "step": 94640
    },
    {
      "epoch": 0.1549131661462527,
      "grad_norm": 3.298227310180664,
      "learning_rate": 9.75402436694056e-06,
      "loss": 0.0798,
      "step": 94660
    },
    {
      "epoch": 0.15494589658490604,
      "grad_norm": 2.8881564140319824,
      "learning_rate": 9.753958474727042e-06,
      "loss": 0.0735,
      "step": 94680
    },
    {
      "epoch": 0.15497862702355936,
      "grad_norm": 1.8784574270248413,
      "learning_rate": 9.753892582513526e-06,
      "loss": 0.0659,
      "step": 94700
    },
    {
      "epoch": 0.1550113574622127,
      "grad_norm": 2.3355987071990967,
      "learning_rate": 9.753826690300008e-06,
      "loss": 0.0739,
      "step": 94720
    },
    {
      "epoch": 0.15504408790086605,
      "grad_norm": 2.556218385696411,
      "learning_rate": 9.753760798086491e-06,
      "loss": 0.0736,
      "step": 94740
    },
    {
      "epoch": 0.1550768183395194,
      "grad_norm": 2.1781389713287354,
      "learning_rate": 9.753694905872973e-06,
      "loss": 0.0881,
      "step": 94760
    },
    {
      "epoch": 0.15510954877817273,
      "grad_norm": 2.8982763290405273,
      "learning_rate": 9.753629013659457e-06,
      "loss": 0.0657,
      "step": 94780
    },
    {
      "epoch": 0.15514227921682605,
      "grad_norm": 4.134762287139893,
      "learning_rate": 9.753563121445939e-06,
      "loss": 0.0691,
      "step": 94800
    },
    {
      "epoch": 0.1551750096554794,
      "grad_norm": 2.7691781520843506,
      "learning_rate": 9.753497229232422e-06,
      "loss": 0.0681,
      "step": 94820
    },
    {
      "epoch": 0.15520774009413274,
      "grad_norm": 4.299633026123047,
      "learning_rate": 9.753431337018904e-06,
      "loss": 0.066,
      "step": 94840
    },
    {
      "epoch": 0.15524047053278608,
      "grad_norm": 3.5683650970458984,
      "learning_rate": 9.753365444805388e-06,
      "loss": 0.0641,
      "step": 94860
    },
    {
      "epoch": 0.15527320097143943,
      "grad_norm": 0.9404621124267578,
      "learning_rate": 9.753299552591871e-06,
      "loss": 0.0706,
      "step": 94880
    },
    {
      "epoch": 0.15530593141009275,
      "grad_norm": 2.099909782409668,
      "learning_rate": 9.753233660378353e-06,
      "loss": 0.0829,
      "step": 94900
    },
    {
      "epoch": 0.1553386618487461,
      "grad_norm": 3.3578248023986816,
      "learning_rate": 9.753167768164837e-06,
      "loss": 0.0853,
      "step": 94920
    },
    {
      "epoch": 0.15537139228739943,
      "grad_norm": 1.809014916419983,
      "learning_rate": 9.75310187595132e-06,
      "loss": 0.0773,
      "step": 94940
    },
    {
      "epoch": 0.15540412272605278,
      "grad_norm": 1.6978976726531982,
      "learning_rate": 9.753035983737802e-06,
      "loss": 0.0695,
      "step": 94960
    },
    {
      "epoch": 0.15543685316470612,
      "grad_norm": 3.0486252307891846,
      "learning_rate": 9.752970091524286e-06,
      "loss": 0.0685,
      "step": 94980
    },
    {
      "epoch": 0.15546958360335944,
      "grad_norm": 1.0000251531600952,
      "learning_rate": 9.75290419931077e-06,
      "loss": 0.0696,
      "step": 95000
    },
    {
      "epoch": 0.15550231404201278,
      "grad_norm": 4.004556179046631,
      "learning_rate": 9.752838307097251e-06,
      "loss": 0.0702,
      "step": 95020
    },
    {
      "epoch": 0.15553504448066613,
      "grad_norm": 1.846300721168518,
      "learning_rate": 9.752772414883735e-06,
      "loss": 0.1009,
      "step": 95040
    },
    {
      "epoch": 0.15556777491931947,
      "grad_norm": 5.948353290557861,
      "learning_rate": 9.752706522670217e-06,
      "loss": 0.0947,
      "step": 95060
    },
    {
      "epoch": 0.15560050535797282,
      "grad_norm": 3.1006200313568115,
      "learning_rate": 9.7526406304567e-06,
      "loss": 0.0614,
      "step": 95080
    },
    {
      "epoch": 0.15563323579662613,
      "grad_norm": 2.943173408508301,
      "learning_rate": 9.752574738243182e-06,
      "loss": 0.0759,
      "step": 95100
    },
    {
      "epoch": 0.15566596623527948,
      "grad_norm": 1.041479468345642,
      "learning_rate": 9.752508846029666e-06,
      "loss": 0.0663,
      "step": 95120
    },
    {
      "epoch": 0.15569869667393282,
      "grad_norm": 1.0127029418945312,
      "learning_rate": 9.752442953816148e-06,
      "loss": 0.0801,
      "step": 95140
    },
    {
      "epoch": 0.15573142711258617,
      "grad_norm": 1.7543230056762695,
      "learning_rate": 9.752377061602631e-06,
      "loss": 0.0833,
      "step": 95160
    },
    {
      "epoch": 0.1557641575512395,
      "grad_norm": 2.8346173763275146,
      "learning_rate": 9.752311169389113e-06,
      "loss": 0.0697,
      "step": 95180
    },
    {
      "epoch": 0.15579688798989283,
      "grad_norm": 2.4256298542022705,
      "learning_rate": 9.752245277175597e-06,
      "loss": 0.0792,
      "step": 95200
    },
    {
      "epoch": 0.15582961842854617,
      "grad_norm": 3.364025115966797,
      "learning_rate": 9.752179384962079e-06,
      "loss": 0.0694,
      "step": 95220
    },
    {
      "epoch": 0.15586234886719952,
      "grad_norm": 3.4056074619293213,
      "learning_rate": 9.752113492748562e-06,
      "loss": 0.0764,
      "step": 95240
    },
    {
      "epoch": 0.15589507930585286,
      "grad_norm": 4.785549640655518,
      "learning_rate": 9.752047600535044e-06,
      "loss": 0.1024,
      "step": 95260
    },
    {
      "epoch": 0.1559278097445062,
      "grad_norm": 2.787820816040039,
      "learning_rate": 9.751981708321528e-06,
      "loss": 0.0697,
      "step": 95280
    },
    {
      "epoch": 0.15596054018315952,
      "grad_norm": 2.4607994556427,
      "learning_rate": 9.751915816108011e-06,
      "loss": 0.0666,
      "step": 95300
    },
    {
      "epoch": 0.15599327062181287,
      "grad_norm": 1.4010679721832275,
      "learning_rate": 9.751849923894493e-06,
      "loss": 0.081,
      "step": 95320
    },
    {
      "epoch": 0.1560260010604662,
      "grad_norm": 2.8039357662200928,
      "learning_rate": 9.751784031680977e-06,
      "loss": 0.0757,
      "step": 95340
    },
    {
      "epoch": 0.15605873149911956,
      "grad_norm": 5.846952438354492,
      "learning_rate": 9.75171813946746e-06,
      "loss": 0.0779,
      "step": 95360
    },
    {
      "epoch": 0.1560914619377729,
      "grad_norm": 31.28103256225586,
      "learning_rate": 9.751652247253942e-06,
      "loss": 0.067,
      "step": 95380
    },
    {
      "epoch": 0.15612419237642622,
      "grad_norm": 1.498945951461792,
      "learning_rate": 9.751586355040426e-06,
      "loss": 0.0648,
      "step": 95400
    },
    {
      "epoch": 0.15615692281507956,
      "grad_norm": 3.587059736251831,
      "learning_rate": 9.75152046282691e-06,
      "loss": 0.0836,
      "step": 95420
    },
    {
      "epoch": 0.1561896532537329,
      "grad_norm": 2.1237261295318604,
      "learning_rate": 9.751454570613392e-06,
      "loss": 0.0666,
      "step": 95440
    },
    {
      "epoch": 0.15622238369238625,
      "grad_norm": 2.260577917098999,
      "learning_rate": 9.751388678399875e-06,
      "loss": 0.0647,
      "step": 95460
    },
    {
      "epoch": 0.1562551141310396,
      "grad_norm": 1.2406810522079468,
      "learning_rate": 9.751322786186357e-06,
      "loss": 0.0715,
      "step": 95480
    },
    {
      "epoch": 0.1562878445696929,
      "grad_norm": 3.573594331741333,
      "learning_rate": 9.75125689397284e-06,
      "loss": 0.0721,
      "step": 95500
    },
    {
      "epoch": 0.15632057500834626,
      "grad_norm": 3.3377909660339355,
      "learning_rate": 9.751191001759322e-06,
      "loss": 0.0712,
      "step": 95520
    },
    {
      "epoch": 0.1563533054469996,
      "grad_norm": 3.170438051223755,
      "learning_rate": 9.751125109545806e-06,
      "loss": 0.0742,
      "step": 95540
    },
    {
      "epoch": 0.15638603588565295,
      "grad_norm": 6.026580333709717,
      "learning_rate": 9.751059217332288e-06,
      "loss": 0.0851,
      "step": 95560
    },
    {
      "epoch": 0.1564187663243063,
      "grad_norm": 2.4854438304901123,
      "learning_rate": 9.750993325118772e-06,
      "loss": 0.0848,
      "step": 95580
    },
    {
      "epoch": 0.1564514967629596,
      "grad_norm": 2.2686209678649902,
      "learning_rate": 9.750927432905253e-06,
      "loss": 0.0797,
      "step": 95600
    },
    {
      "epoch": 0.15648422720161295,
      "grad_norm": 0.8513339757919312,
      "learning_rate": 9.750861540691737e-06,
      "loss": 0.0604,
      "step": 95620
    },
    {
      "epoch": 0.1565169576402663,
      "grad_norm": 3.6156344413757324,
      "learning_rate": 9.750795648478219e-06,
      "loss": 0.0812,
      "step": 95640
    },
    {
      "epoch": 0.15654968807891964,
      "grad_norm": 6.4413580894470215,
      "learning_rate": 9.750729756264702e-06,
      "loss": 0.0789,
      "step": 95660
    },
    {
      "epoch": 0.15658241851757299,
      "grad_norm": 2.264709711074829,
      "learning_rate": 9.750663864051186e-06,
      "loss": 0.054,
      "step": 95680
    },
    {
      "epoch": 0.1566151489562263,
      "grad_norm": 3.3218772411346436,
      "learning_rate": 9.750597971837668e-06,
      "loss": 0.0636,
      "step": 95700
    },
    {
      "epoch": 0.15664787939487965,
      "grad_norm": 2.5314321517944336,
      "learning_rate": 9.750532079624152e-06,
      "loss": 0.0873,
      "step": 95720
    },
    {
      "epoch": 0.156680609833533,
      "grad_norm": 2.8069705963134766,
      "learning_rate": 9.750466187410635e-06,
      "loss": 0.0524,
      "step": 95740
    },
    {
      "epoch": 0.15671334027218634,
      "grad_norm": 3.316035032272339,
      "learning_rate": 9.750400295197117e-06,
      "loss": 0.077,
      "step": 95760
    },
    {
      "epoch": 0.15674607071083968,
      "grad_norm": 3.7562286853790283,
      "learning_rate": 9.7503344029836e-06,
      "loss": 0.0853,
      "step": 95780
    },
    {
      "epoch": 0.156778801149493,
      "grad_norm": 1.7480930089950562,
      "learning_rate": 9.750268510770084e-06,
      "loss": 0.0699,
      "step": 95800
    },
    {
      "epoch": 0.15681153158814634,
      "grad_norm": 1.6017266511917114,
      "learning_rate": 9.750202618556566e-06,
      "loss": 0.0731,
      "step": 95820
    },
    {
      "epoch": 0.15684426202679969,
      "grad_norm": 2.650038719177246,
      "learning_rate": 9.75013672634305e-06,
      "loss": 0.0849,
      "step": 95840
    },
    {
      "epoch": 0.15687699246545303,
      "grad_norm": 0.7703133821487427,
      "learning_rate": 9.750070834129532e-06,
      "loss": 0.0693,
      "step": 95860
    },
    {
      "epoch": 0.15690972290410637,
      "grad_norm": 4.516238212585449,
      "learning_rate": 9.750004941916015e-06,
      "loss": 0.0925,
      "step": 95880
    },
    {
      "epoch": 0.1569424533427597,
      "grad_norm": 1.114397406578064,
      "learning_rate": 9.749939049702497e-06,
      "loss": 0.064,
      "step": 95900
    },
    {
      "epoch": 0.15697518378141304,
      "grad_norm": 3.355072021484375,
      "learning_rate": 9.74987315748898e-06,
      "loss": 0.0698,
      "step": 95920
    },
    {
      "epoch": 0.15700791422006638,
      "grad_norm": 3.1152327060699463,
      "learning_rate": 9.749807265275463e-06,
      "loss": 0.0647,
      "step": 95940
    },
    {
      "epoch": 0.15704064465871972,
      "grad_norm": 2.965578079223633,
      "learning_rate": 9.749741373061946e-06,
      "loss": 0.076,
      "step": 95960
    },
    {
      "epoch": 0.15707337509737304,
      "grad_norm": 4.782332897186279,
      "learning_rate": 9.749675480848428e-06,
      "loss": 0.0771,
      "step": 95980
    },
    {
      "epoch": 0.15710610553602639,
      "grad_norm": 4.4445109367370605,
      "learning_rate": 9.749609588634912e-06,
      "loss": 0.0686,
      "step": 96000
    },
    {
      "epoch": 0.15713883597467973,
      "grad_norm": 4.875063896179199,
      "learning_rate": 9.749543696421395e-06,
      "loss": 0.0799,
      "step": 96020
    },
    {
      "epoch": 0.15717156641333307,
      "grad_norm": 3.2227823734283447,
      "learning_rate": 9.749477804207877e-06,
      "loss": 0.0625,
      "step": 96040
    },
    {
      "epoch": 0.15720429685198642,
      "grad_norm": 0.9953708648681641,
      "learning_rate": 9.74941191199436e-06,
      "loss": 0.0715,
      "step": 96060
    },
    {
      "epoch": 0.15723702729063974,
      "grad_norm": 10.502516746520996,
      "learning_rate": 9.749346019780843e-06,
      "loss": 0.0741,
      "step": 96080
    },
    {
      "epoch": 0.15726975772929308,
      "grad_norm": 3.770400285720825,
      "learning_rate": 9.749280127567326e-06,
      "loss": 0.0799,
      "step": 96100
    },
    {
      "epoch": 0.15730248816794642,
      "grad_norm": 5.106734275817871,
      "learning_rate": 9.749214235353808e-06,
      "loss": 0.0542,
      "step": 96120
    },
    {
      "epoch": 0.15733521860659977,
      "grad_norm": 2.6140053272247314,
      "learning_rate": 9.749148343140292e-06,
      "loss": 0.064,
      "step": 96140
    },
    {
      "epoch": 0.1573679490452531,
      "grad_norm": 1.728727102279663,
      "learning_rate": 9.749082450926775e-06,
      "loss": 0.0801,
      "step": 96160
    },
    {
      "epoch": 0.15740067948390643,
      "grad_norm": 0.8931089639663696,
      "learning_rate": 9.749016558713257e-06,
      "loss": 0.0722,
      "step": 96180
    },
    {
      "epoch": 0.15743340992255977,
      "grad_norm": 3.6608378887176514,
      "learning_rate": 9.74895066649974e-06,
      "loss": 0.0678,
      "step": 96200
    },
    {
      "epoch": 0.15746614036121312,
      "grad_norm": 3.7433359622955322,
      "learning_rate": 9.748884774286224e-06,
      "loss": 0.0651,
      "step": 96220
    },
    {
      "epoch": 0.15749887079986646,
      "grad_norm": 2.2455685138702393,
      "learning_rate": 9.748818882072706e-06,
      "loss": 0.0732,
      "step": 96240
    },
    {
      "epoch": 0.1575316012385198,
      "grad_norm": 2.1016652584075928,
      "learning_rate": 9.74875298985919e-06,
      "loss": 0.0765,
      "step": 96260
    },
    {
      "epoch": 0.15756433167717313,
      "grad_norm": 2.779918909072876,
      "learning_rate": 9.748687097645672e-06,
      "loss": 0.074,
      "step": 96280
    },
    {
      "epoch": 0.15759706211582647,
      "grad_norm": 1.9012916088104248,
      "learning_rate": 9.748621205432155e-06,
      "loss": 0.0873,
      "step": 96300
    },
    {
      "epoch": 0.15762979255447981,
      "grad_norm": 4.299501895904541,
      "learning_rate": 9.748555313218637e-06,
      "loss": 0.0714,
      "step": 96320
    },
    {
      "epoch": 0.15766252299313316,
      "grad_norm": 2.259148120880127,
      "learning_rate": 9.74848942100512e-06,
      "loss": 0.0741,
      "step": 96340
    },
    {
      "epoch": 0.1576952534317865,
      "grad_norm": 4.4306230545043945,
      "learning_rate": 9.748423528791604e-06,
      "loss": 0.0693,
      "step": 96360
    },
    {
      "epoch": 0.15772798387043982,
      "grad_norm": 1.2792993783950806,
      "learning_rate": 9.748357636578086e-06,
      "loss": 0.075,
      "step": 96380
    },
    {
      "epoch": 0.15776071430909316,
      "grad_norm": 2.933591365814209,
      "learning_rate": 9.74829174436457e-06,
      "loss": 0.0792,
      "step": 96400
    },
    {
      "epoch": 0.1577934447477465,
      "grad_norm": 4.568138122558594,
      "learning_rate": 9.748225852151052e-06,
      "loss": 0.0752,
      "step": 96420
    },
    {
      "epoch": 0.15782617518639985,
      "grad_norm": 4.168363571166992,
      "learning_rate": 9.748159959937535e-06,
      "loss": 0.0545,
      "step": 96440
    },
    {
      "epoch": 0.1578589056250532,
      "grad_norm": 2.3629584312438965,
      "learning_rate": 9.748094067724017e-06,
      "loss": 0.0775,
      "step": 96460
    },
    {
      "epoch": 0.15789163606370651,
      "grad_norm": 1.1864848136901855,
      "learning_rate": 9.7480281755105e-06,
      "loss": 0.0744,
      "step": 96480
    },
    {
      "epoch": 0.15792436650235986,
      "grad_norm": 2.7447917461395264,
      "learning_rate": 9.747962283296983e-06,
      "loss": 0.0764,
      "step": 96500
    },
    {
      "epoch": 0.1579570969410132,
      "grad_norm": 2.4412918090820312,
      "learning_rate": 9.747896391083466e-06,
      "loss": 0.0773,
      "step": 96520
    },
    {
      "epoch": 0.15798982737966655,
      "grad_norm": 8.485719680786133,
      "learning_rate": 9.74783049886995e-06,
      "loss": 0.0693,
      "step": 96540
    },
    {
      "epoch": 0.1580225578183199,
      "grad_norm": 4.906141757965088,
      "learning_rate": 9.747764606656432e-06,
      "loss": 0.0694,
      "step": 96560
    },
    {
      "epoch": 0.1580552882569732,
      "grad_norm": 5.025180816650391,
      "learning_rate": 9.747698714442915e-06,
      "loss": 0.0652,
      "step": 96580
    },
    {
      "epoch": 0.15808801869562655,
      "grad_norm": 5.593366622924805,
      "learning_rate": 9.747632822229399e-06,
      "loss": 0.0768,
      "step": 96600
    },
    {
      "epoch": 0.1581207491342799,
      "grad_norm": 2.1075551509857178,
      "learning_rate": 9.74756693001588e-06,
      "loss": 0.075,
      "step": 96620
    },
    {
      "epoch": 0.15815347957293324,
      "grad_norm": 0.6396471858024597,
      "learning_rate": 9.747501037802364e-06,
      "loss": 0.0694,
      "step": 96640
    },
    {
      "epoch": 0.1581862100115866,
      "grad_norm": 5.594414710998535,
      "learning_rate": 9.747435145588846e-06,
      "loss": 0.061,
      "step": 96660
    },
    {
      "epoch": 0.1582189404502399,
      "grad_norm": 2.227534055709839,
      "learning_rate": 9.74736925337533e-06,
      "loss": 0.0594,
      "step": 96680
    },
    {
      "epoch": 0.15825167088889325,
      "grad_norm": 2.8835971355438232,
      "learning_rate": 9.747303361161813e-06,
      "loss": 0.0795,
      "step": 96700
    },
    {
      "epoch": 0.1582844013275466,
      "grad_norm": 0.8055634498596191,
      "learning_rate": 9.747237468948295e-06,
      "loss": 0.0811,
      "step": 96720
    },
    {
      "epoch": 0.15831713176619994,
      "grad_norm": 2.9888951778411865,
      "learning_rate": 9.747171576734779e-06,
      "loss": 0.0641,
      "step": 96740
    },
    {
      "epoch": 0.15834986220485328,
      "grad_norm": 6.664957523345947,
      "learning_rate": 9.74710568452126e-06,
      "loss": 0.0739,
      "step": 96760
    },
    {
      "epoch": 0.1583825926435066,
      "grad_norm": 4.338232040405273,
      "learning_rate": 9.747039792307744e-06,
      "loss": 0.0768,
      "step": 96780
    },
    {
      "epoch": 0.15841532308215994,
      "grad_norm": 4.046122074127197,
      "learning_rate": 9.746973900094226e-06,
      "loss": 0.0799,
      "step": 96800
    },
    {
      "epoch": 0.1584480535208133,
      "grad_norm": 4.341841220855713,
      "learning_rate": 9.74690800788071e-06,
      "loss": 0.0654,
      "step": 96820
    },
    {
      "epoch": 0.15848078395946663,
      "grad_norm": 3.5161473751068115,
      "learning_rate": 9.746842115667192e-06,
      "loss": 0.0869,
      "step": 96840
    },
    {
      "epoch": 0.15851351439811998,
      "grad_norm": 5.107428550720215,
      "learning_rate": 9.746776223453675e-06,
      "loss": 0.0732,
      "step": 96860
    },
    {
      "epoch": 0.1585462448367733,
      "grad_norm": 2.329495429992676,
      "learning_rate": 9.746710331240157e-06,
      "loss": 0.08,
      "step": 96880
    },
    {
      "epoch": 0.15857897527542664,
      "grad_norm": 2.4032540321350098,
      "learning_rate": 9.74664443902664e-06,
      "loss": 0.0762,
      "step": 96900
    },
    {
      "epoch": 0.15861170571407998,
      "grad_norm": 1.4758917093276978,
      "learning_rate": 9.746578546813123e-06,
      "loss": 0.066,
      "step": 96920
    },
    {
      "epoch": 0.15864443615273333,
      "grad_norm": 6.957388877868652,
      "learning_rate": 9.746512654599606e-06,
      "loss": 0.0694,
      "step": 96940
    },
    {
      "epoch": 0.15867716659138667,
      "grad_norm": 1.9876861572265625,
      "learning_rate": 9.74644676238609e-06,
      "loss": 0.0818,
      "step": 96960
    },
    {
      "epoch": 0.15870989703004,
      "grad_norm": 1.4389833211898804,
      "learning_rate": 9.746380870172572e-06,
      "loss": 0.0695,
      "step": 96980
    },
    {
      "epoch": 0.15874262746869333,
      "grad_norm": 3.1242942810058594,
      "learning_rate": 9.746314977959055e-06,
      "loss": 0.0774,
      "step": 97000
    },
    {
      "epoch": 0.15877535790734668,
      "grad_norm": 3.410661220550537,
      "learning_rate": 9.746249085745539e-06,
      "loss": 0.0622,
      "step": 97020
    },
    {
      "epoch": 0.15880808834600002,
      "grad_norm": 2.2437362670898438,
      "learning_rate": 9.74618319353202e-06,
      "loss": 0.058,
      "step": 97040
    },
    {
      "epoch": 0.15884081878465336,
      "grad_norm": 4.268531799316406,
      "learning_rate": 9.746117301318504e-06,
      "loss": 0.0697,
      "step": 97060
    },
    {
      "epoch": 0.15887354922330668,
      "grad_norm": 2.2836782932281494,
      "learning_rate": 9.746051409104988e-06,
      "loss": 0.0682,
      "step": 97080
    },
    {
      "epoch": 0.15890627966196003,
      "grad_norm": 2.673412561416626,
      "learning_rate": 9.74598551689147e-06,
      "loss": 0.077,
      "step": 97100
    },
    {
      "epoch": 0.15893901010061337,
      "grad_norm": 3.673816204071045,
      "learning_rate": 9.745919624677953e-06,
      "loss": 0.0671,
      "step": 97120
    },
    {
      "epoch": 0.15897174053926671,
      "grad_norm": 1.726587176322937,
      "learning_rate": 9.745853732464435e-06,
      "loss": 0.0558,
      "step": 97140
    },
    {
      "epoch": 0.15900447097792006,
      "grad_norm": 1.213365077972412,
      "learning_rate": 9.745787840250919e-06,
      "loss": 0.07,
      "step": 97160
    },
    {
      "epoch": 0.15903720141657338,
      "grad_norm": 2.3972532749176025,
      "learning_rate": 9.745721948037401e-06,
      "loss": 0.0816,
      "step": 97180
    },
    {
      "epoch": 0.15906993185522672,
      "grad_norm": 6.400440216064453,
      "learning_rate": 9.745656055823884e-06,
      "loss": 0.0645,
      "step": 97200
    },
    {
      "epoch": 0.15910266229388006,
      "grad_norm": 6.475362777709961,
      "learning_rate": 9.745590163610366e-06,
      "loss": 0.0887,
      "step": 97220
    },
    {
      "epoch": 0.1591353927325334,
      "grad_norm": 0.7747164368629456,
      "learning_rate": 9.74552427139685e-06,
      "loss": 0.0617,
      "step": 97240
    },
    {
      "epoch": 0.15916812317118675,
      "grad_norm": 3.0072221755981445,
      "learning_rate": 9.745458379183332e-06,
      "loss": 0.0645,
      "step": 97260
    },
    {
      "epoch": 0.15920085360984007,
      "grad_norm": 7.205784320831299,
      "learning_rate": 9.745392486969815e-06,
      "loss": 0.0619,
      "step": 97280
    },
    {
      "epoch": 0.15923358404849342,
      "grad_norm": 3.0361974239349365,
      "learning_rate": 9.745326594756297e-06,
      "loss": 0.078,
      "step": 97300
    },
    {
      "epoch": 0.15926631448714676,
      "grad_norm": 6.655880451202393,
      "learning_rate": 9.745260702542781e-06,
      "loss": 0.0738,
      "step": 97320
    },
    {
      "epoch": 0.1592990449258001,
      "grad_norm": 4.406084060668945,
      "learning_rate": 9.745194810329264e-06,
      "loss": 0.0605,
      "step": 97340
    },
    {
      "epoch": 0.15933177536445342,
      "grad_norm": 1.233443260192871,
      "learning_rate": 9.745128918115746e-06,
      "loss": 0.0674,
      "step": 97360
    },
    {
      "epoch": 0.15936450580310677,
      "grad_norm": 3.6964900493621826,
      "learning_rate": 9.74506302590223e-06,
      "loss": 0.0708,
      "step": 97380
    },
    {
      "epoch": 0.1593972362417601,
      "grad_norm": 3.488574504852295,
      "learning_rate": 9.744997133688714e-06,
      "loss": 0.0655,
      "step": 97400
    },
    {
      "epoch": 0.15942996668041345,
      "grad_norm": 3.986199378967285,
      "learning_rate": 9.744931241475195e-06,
      "loss": 0.0719,
      "step": 97420
    },
    {
      "epoch": 0.1594626971190668,
      "grad_norm": 1.4890331029891968,
      "learning_rate": 9.744865349261679e-06,
      "loss": 0.0834,
      "step": 97440
    },
    {
      "epoch": 0.15949542755772012,
      "grad_norm": 2.1501431465148926,
      "learning_rate": 9.744799457048163e-06,
      "loss": 0.0515,
      "step": 97460
    },
    {
      "epoch": 0.15952815799637346,
      "grad_norm": 2.2678115367889404,
      "learning_rate": 9.744733564834645e-06,
      "loss": 0.0771,
      "step": 97480
    },
    {
      "epoch": 0.1595608884350268,
      "grad_norm": 2.1691842079162598,
      "learning_rate": 9.744667672621128e-06,
      "loss": 0.0781,
      "step": 97500
    },
    {
      "epoch": 0.15959361887368015,
      "grad_norm": 1.9291927814483643,
      "learning_rate": 9.74460178040761e-06,
      "loss": 0.0636,
      "step": 97520
    },
    {
      "epoch": 0.1596263493123335,
      "grad_norm": 2.4506494998931885,
      "learning_rate": 9.744535888194094e-06,
      "loss": 0.06,
      "step": 97540
    },
    {
      "epoch": 0.1596590797509868,
      "grad_norm": 1.8078469038009644,
      "learning_rate": 9.744469995980575e-06,
      "loss": 0.0733,
      "step": 97560
    },
    {
      "epoch": 0.15969181018964015,
      "grad_norm": 3.199850559234619,
      "learning_rate": 9.744404103767059e-06,
      "loss": 0.0644,
      "step": 97580
    },
    {
      "epoch": 0.1597245406282935,
      "grad_norm": 4.140484809875488,
      "learning_rate": 9.744338211553541e-06,
      "loss": 0.0586,
      "step": 97600
    },
    {
      "epoch": 0.15975727106694684,
      "grad_norm": 2.1186368465423584,
      "learning_rate": 9.744272319340025e-06,
      "loss": 0.0884,
      "step": 97620
    },
    {
      "epoch": 0.1597900015056002,
      "grad_norm": 1.8721957206726074,
      "learning_rate": 9.744206427126506e-06,
      "loss": 0.0695,
      "step": 97640
    },
    {
      "epoch": 0.1598227319442535,
      "grad_norm": 2.6831281185150146,
      "learning_rate": 9.74414053491299e-06,
      "loss": 0.0801,
      "step": 97660
    },
    {
      "epoch": 0.15985546238290685,
      "grad_norm": 0.9026910662651062,
      "learning_rate": 9.744074642699472e-06,
      "loss": 0.0659,
      "step": 97680
    },
    {
      "epoch": 0.1598881928215602,
      "grad_norm": 1.2059922218322754,
      "learning_rate": 9.744008750485955e-06,
      "loss": 0.0812,
      "step": 97700
    },
    {
      "epoch": 0.15992092326021354,
      "grad_norm": 4.864364147186279,
      "learning_rate": 9.743942858272439e-06,
      "loss": 0.0722,
      "step": 97720
    },
    {
      "epoch": 0.15995365369886688,
      "grad_norm": 4.1650190353393555,
      "learning_rate": 9.743876966058921e-06,
      "loss": 0.0624,
      "step": 97740
    },
    {
      "epoch": 0.1599863841375202,
      "grad_norm": 1.457500696182251,
      "learning_rate": 9.743811073845405e-06,
      "loss": 0.0666,
      "step": 97760
    },
    {
      "epoch": 0.16001911457617354,
      "grad_norm": 0.6477744579315186,
      "learning_rate": 9.743745181631888e-06,
      "loss": 0.0703,
      "step": 97780
    },
    {
      "epoch": 0.1600518450148269,
      "grad_norm": 1.6363067626953125,
      "learning_rate": 9.74367928941837e-06,
      "loss": 0.0573,
      "step": 97800
    },
    {
      "epoch": 0.16008457545348023,
      "grad_norm": 1.2181771993637085,
      "learning_rate": 9.743613397204854e-06,
      "loss": 0.0639,
      "step": 97820
    },
    {
      "epoch": 0.16011730589213358,
      "grad_norm": 1.9349961280822754,
      "learning_rate": 9.743547504991337e-06,
      "loss": 0.0771,
      "step": 97840
    },
    {
      "epoch": 0.1601500363307869,
      "grad_norm": 1.4499547481536865,
      "learning_rate": 9.743481612777819e-06,
      "loss": 0.0771,
      "step": 97860
    },
    {
      "epoch": 0.16018276676944024,
      "grad_norm": 1.6630278825759888,
      "learning_rate": 9.743415720564303e-06,
      "loss": 0.0656,
      "step": 97880
    },
    {
      "epoch": 0.16021549720809358,
      "grad_norm": 2.597433090209961,
      "learning_rate": 9.743349828350785e-06,
      "loss": 0.0847,
      "step": 97900
    },
    {
      "epoch": 0.16024822764674693,
      "grad_norm": 3.7614519596099854,
      "learning_rate": 9.743283936137268e-06,
      "loss": 0.0688,
      "step": 97920
    },
    {
      "epoch": 0.16028095808540027,
      "grad_norm": 5.200447082519531,
      "learning_rate": 9.74321804392375e-06,
      "loss": 0.0775,
      "step": 97940
    },
    {
      "epoch": 0.1603136885240536,
      "grad_norm": 2.3978090286254883,
      "learning_rate": 9.743152151710234e-06,
      "loss": 0.0727,
      "step": 97960
    },
    {
      "epoch": 0.16034641896270693,
      "grad_norm": 3.485156297683716,
      "learning_rate": 9.743086259496716e-06,
      "loss": 0.0621,
      "step": 97980
    },
    {
      "epoch": 0.16037914940136028,
      "grad_norm": 2.3659143447875977,
      "learning_rate": 9.743020367283199e-06,
      "loss": 0.0729,
      "step": 98000
    },
    {
      "epoch": 0.16041187984001362,
      "grad_norm": 3.2691545486450195,
      "learning_rate": 9.742954475069681e-06,
      "loss": 0.0677,
      "step": 98020
    },
    {
      "epoch": 0.16044461027866697,
      "grad_norm": 3.1149916648864746,
      "learning_rate": 9.742888582856165e-06,
      "loss": 0.0529,
      "step": 98040
    },
    {
      "epoch": 0.16047734071732028,
      "grad_norm": 2.254990339279175,
      "learning_rate": 9.742822690642647e-06,
      "loss": 0.068,
      "step": 98060
    },
    {
      "epoch": 0.16051007115597363,
      "grad_norm": 4.574305057525635,
      "learning_rate": 9.74275679842913e-06,
      "loss": 0.069,
      "step": 98080
    },
    {
      "epoch": 0.16054280159462697,
      "grad_norm": 4.141981601715088,
      "learning_rate": 9.742690906215612e-06,
      "loss": 0.0655,
      "step": 98100
    },
    {
      "epoch": 0.16057553203328032,
      "grad_norm": 2.7213401794433594,
      "learning_rate": 9.742625014002096e-06,
      "loss": 0.0695,
      "step": 98120
    },
    {
      "epoch": 0.16060826247193366,
      "grad_norm": 8.083833694458008,
      "learning_rate": 9.74255912178858e-06,
      "loss": 0.0788,
      "step": 98140
    },
    {
      "epoch": 0.16064099291058698,
      "grad_norm": 21.851299285888672,
      "learning_rate": 9.742493229575061e-06,
      "loss": 0.0982,
      "step": 98160
    },
    {
      "epoch": 0.16067372334924032,
      "grad_norm": 1.333159327507019,
      "learning_rate": 9.742427337361545e-06,
      "loss": 0.0704,
      "step": 98180
    },
    {
      "epoch": 0.16070645378789367,
      "grad_norm": 6.403407096862793,
      "learning_rate": 9.742361445148028e-06,
      "loss": 0.0699,
      "step": 98200
    },
    {
      "epoch": 0.160739184226547,
      "grad_norm": 2.625384569168091,
      "learning_rate": 9.74229555293451e-06,
      "loss": 0.0716,
      "step": 98220
    },
    {
      "epoch": 0.16077191466520036,
      "grad_norm": 3.6919636726379395,
      "learning_rate": 9.742229660720994e-06,
      "loss": 0.0682,
      "step": 98240
    },
    {
      "epoch": 0.16080464510385367,
      "grad_norm": 1.480006217956543,
      "learning_rate": 9.742163768507477e-06,
      "loss": 0.0632,
      "step": 98260
    },
    {
      "epoch": 0.16083737554250702,
      "grad_norm": 4.647873401641846,
      "learning_rate": 9.74209787629396e-06,
      "loss": 0.0908,
      "step": 98280
    },
    {
      "epoch": 0.16087010598116036,
      "grad_norm": 2.0111582279205322,
      "learning_rate": 9.742031984080443e-06,
      "loss": 0.0736,
      "step": 98300
    },
    {
      "epoch": 0.1609028364198137,
      "grad_norm": 3.053861379623413,
      "learning_rate": 9.741966091866925e-06,
      "loss": 0.0703,
      "step": 98320
    },
    {
      "epoch": 0.16093556685846705,
      "grad_norm": 2.674576997756958,
      "learning_rate": 9.741900199653408e-06,
      "loss": 0.0741,
      "step": 98340
    },
    {
      "epoch": 0.16096829729712037,
      "grad_norm": 4.183969974517822,
      "learning_rate": 9.74183430743989e-06,
      "loss": 0.0828,
      "step": 98360
    },
    {
      "epoch": 0.1610010277357737,
      "grad_norm": 3.5299580097198486,
      "learning_rate": 9.741768415226374e-06,
      "loss": 0.0749,
      "step": 98380
    },
    {
      "epoch": 0.16103375817442706,
      "grad_norm": 4.174997329711914,
      "learning_rate": 9.741702523012856e-06,
      "loss": 0.0625,
      "step": 98400
    },
    {
      "epoch": 0.1610664886130804,
      "grad_norm": 10.503003120422363,
      "learning_rate": 9.74163663079934e-06,
      "loss": 0.0773,
      "step": 98420
    },
    {
      "epoch": 0.16109921905173374,
      "grad_norm": 5.361988067626953,
      "learning_rate": 9.741570738585821e-06,
      "loss": 0.0631,
      "step": 98440
    },
    {
      "epoch": 0.16113194949038706,
      "grad_norm": 3.5656375885009766,
      "learning_rate": 9.741504846372305e-06,
      "loss": 0.0599,
      "step": 98460
    },
    {
      "epoch": 0.1611646799290404,
      "grad_norm": 1.8420791625976562,
      "learning_rate": 9.741438954158788e-06,
      "loss": 0.0728,
      "step": 98480
    },
    {
      "epoch": 0.16119741036769375,
      "grad_norm": 3.0219216346740723,
      "learning_rate": 9.74137306194527e-06,
      "loss": 0.0796,
      "step": 98500
    },
    {
      "epoch": 0.1612301408063471,
      "grad_norm": 20.82181739807129,
      "learning_rate": 9.741307169731754e-06,
      "loss": 0.0633,
      "step": 98520
    },
    {
      "epoch": 0.16126287124500044,
      "grad_norm": 1.331514596939087,
      "learning_rate": 9.741241277518236e-06,
      "loss": 0.0557,
      "step": 98540
    },
    {
      "epoch": 0.16129560168365376,
      "grad_norm": 1.0227527618408203,
      "learning_rate": 9.74117538530472e-06,
      "loss": 0.0675,
      "step": 98560
    },
    {
      "epoch": 0.1613283321223071,
      "grad_norm": 4.5271711349487305,
      "learning_rate": 9.741109493091203e-06,
      "loss": 0.0925,
      "step": 98580
    },
    {
      "epoch": 0.16136106256096044,
      "grad_norm": 1.9222785234451294,
      "learning_rate": 9.741043600877685e-06,
      "loss": 0.0824,
      "step": 98600
    },
    {
      "epoch": 0.1613937929996138,
      "grad_norm": 1.8604497909545898,
      "learning_rate": 9.740977708664168e-06,
      "loss": 0.0693,
      "step": 98620
    },
    {
      "epoch": 0.16142652343826713,
      "grad_norm": 1.7936393022537231,
      "learning_rate": 9.740911816450652e-06,
      "loss": 0.0787,
      "step": 98640
    },
    {
      "epoch": 0.16145925387692045,
      "grad_norm": 2.3961503505706787,
      "learning_rate": 9.740845924237134e-06,
      "loss": 0.0649,
      "step": 98660
    },
    {
      "epoch": 0.1614919843155738,
      "grad_norm": 1.3712931871414185,
      "learning_rate": 9.740780032023617e-06,
      "loss": 0.0653,
      "step": 98680
    },
    {
      "epoch": 0.16152471475422714,
      "grad_norm": 2.9712724685668945,
      "learning_rate": 9.7407141398101e-06,
      "loss": 0.0956,
      "step": 98700
    },
    {
      "epoch": 0.16155744519288048,
      "grad_norm": 5.301608562469482,
      "learning_rate": 9.740648247596583e-06,
      "loss": 0.0744,
      "step": 98720
    },
    {
      "epoch": 0.1615901756315338,
      "grad_norm": 2.7728538513183594,
      "learning_rate": 9.740582355383065e-06,
      "loss": 0.068,
      "step": 98740
    },
    {
      "epoch": 0.16162290607018714,
      "grad_norm": 8.529300689697266,
      "learning_rate": 9.740516463169548e-06,
      "loss": 0.0747,
      "step": 98760
    },
    {
      "epoch": 0.1616556365088405,
      "grad_norm": 3.6114532947540283,
      "learning_rate": 9.74045057095603e-06,
      "loss": 0.0789,
      "step": 98780
    },
    {
      "epoch": 0.16168836694749383,
      "grad_norm": 3.281510829925537,
      "learning_rate": 9.740384678742514e-06,
      "loss": 0.0772,
      "step": 98800
    },
    {
      "epoch": 0.16172109738614718,
      "grad_norm": 1.774031639099121,
      "learning_rate": 9.740318786528997e-06,
      "loss": 0.0697,
      "step": 98820
    },
    {
      "epoch": 0.1617538278248005,
      "grad_norm": 2.141017198562622,
      "learning_rate": 9.74025289431548e-06,
      "loss": 0.0636,
      "step": 98840
    },
    {
      "epoch": 0.16178655826345384,
      "grad_norm": 2.7697129249572754,
      "learning_rate": 9.740187002101963e-06,
      "loss": 0.0755,
      "step": 98860
    },
    {
      "epoch": 0.16181928870210718,
      "grad_norm": 1.9260176420211792,
      "learning_rate": 9.740121109888445e-06,
      "loss": 0.0662,
      "step": 98880
    },
    {
      "epoch": 0.16185201914076053,
      "grad_norm": 2.6990041732788086,
      "learning_rate": 9.740055217674928e-06,
      "loss": 0.0659,
      "step": 98900
    },
    {
      "epoch": 0.16188474957941387,
      "grad_norm": 2.7413954734802246,
      "learning_rate": 9.73998932546141e-06,
      "loss": 0.0631,
      "step": 98920
    },
    {
      "epoch": 0.1619174800180672,
      "grad_norm": 3.2774014472961426,
      "learning_rate": 9.739923433247894e-06,
      "loss": 0.0696,
      "step": 98940
    },
    {
      "epoch": 0.16195021045672053,
      "grad_norm": 1.5460926294326782,
      "learning_rate": 9.739857541034376e-06,
      "loss": 0.0861,
      "step": 98960
    },
    {
      "epoch": 0.16198294089537388,
      "grad_norm": 2.590189218521118,
      "learning_rate": 9.73979164882086e-06,
      "loss": 0.0673,
      "step": 98980
    },
    {
      "epoch": 0.16201567133402722,
      "grad_norm": 4.480491638183594,
      "learning_rate": 9.739725756607343e-06,
      "loss": 0.0722,
      "step": 99000
    },
    {
      "epoch": 0.16204840177268057,
      "grad_norm": 3.3727800846099854,
      "learning_rate": 9.739659864393825e-06,
      "loss": 0.0747,
      "step": 99020
    },
    {
      "epoch": 0.16208113221133388,
      "grad_norm": 2.2017784118652344,
      "learning_rate": 9.739593972180308e-06,
      "loss": 0.0573,
      "step": 99040
    },
    {
      "epoch": 0.16211386264998723,
      "grad_norm": 2.2752034664154053,
      "learning_rate": 9.739528079966792e-06,
      "loss": 0.087,
      "step": 99060
    },
    {
      "epoch": 0.16214659308864057,
      "grad_norm": 3.002473831176758,
      "learning_rate": 9.739462187753274e-06,
      "loss": 0.0794,
      "step": 99080
    },
    {
      "epoch": 0.16217932352729392,
      "grad_norm": 0.6768580675125122,
      "learning_rate": 9.739396295539757e-06,
      "loss": 0.0619,
      "step": 99100
    },
    {
      "epoch": 0.16221205396594726,
      "grad_norm": 2.5100483894348145,
      "learning_rate": 9.73933040332624e-06,
      "loss": 0.0592,
      "step": 99120
    },
    {
      "epoch": 0.16224478440460058,
      "grad_norm": 2.791689872741699,
      "learning_rate": 9.739264511112723e-06,
      "loss": 0.0752,
      "step": 99140
    },
    {
      "epoch": 0.16227751484325392,
      "grad_norm": 4.277963638305664,
      "learning_rate": 9.739198618899205e-06,
      "loss": 0.0701,
      "step": 99160
    },
    {
      "epoch": 0.16231024528190727,
      "grad_norm": 1.6139131784439087,
      "learning_rate": 9.739132726685688e-06,
      "loss": 0.0806,
      "step": 99180
    },
    {
      "epoch": 0.1623429757205606,
      "grad_norm": 2.0756354331970215,
      "learning_rate": 9.739066834472172e-06,
      "loss": 0.0672,
      "step": 99200
    },
    {
      "epoch": 0.16237570615921396,
      "grad_norm": 2.0313141345977783,
      "learning_rate": 9.739000942258654e-06,
      "loss": 0.0732,
      "step": 99220
    },
    {
      "epoch": 0.16240843659786727,
      "grad_norm": 2.788703680038452,
      "learning_rate": 9.738935050045137e-06,
      "loss": 0.0662,
      "step": 99240
    },
    {
      "epoch": 0.16244116703652062,
      "grad_norm": 4.02210807800293,
      "learning_rate": 9.73886915783162e-06,
      "loss": 0.0666,
      "step": 99260
    },
    {
      "epoch": 0.16247389747517396,
      "grad_norm": 1.4926037788391113,
      "learning_rate": 9.738803265618103e-06,
      "loss": 0.0619,
      "step": 99280
    },
    {
      "epoch": 0.1625066279138273,
      "grad_norm": 2.548508405685425,
      "learning_rate": 9.738737373404585e-06,
      "loss": 0.0532,
      "step": 99300
    },
    {
      "epoch": 0.16253935835248065,
      "grad_norm": 2.1272268295288086,
      "learning_rate": 9.738671481191068e-06,
      "loss": 0.0648,
      "step": 99320
    },
    {
      "epoch": 0.16257208879113397,
      "grad_norm": 2.0190320014953613,
      "learning_rate": 9.73860558897755e-06,
      "loss": 0.0672,
      "step": 99340
    },
    {
      "epoch": 0.1626048192297873,
      "grad_norm": 3.247879981994629,
      "learning_rate": 9.738539696764034e-06,
      "loss": 0.0666,
      "step": 99360
    },
    {
      "epoch": 0.16263754966844066,
      "grad_norm": 1.433437466621399,
      "learning_rate": 9.738473804550517e-06,
      "loss": 0.0581,
      "step": 99380
    },
    {
      "epoch": 0.162670280107094,
      "grad_norm": 3.8291995525360107,
      "learning_rate": 9.738407912337e-06,
      "loss": 0.0634,
      "step": 99400
    },
    {
      "epoch": 0.16270301054574735,
      "grad_norm": 2.7675421237945557,
      "learning_rate": 9.738342020123483e-06,
      "loss": 0.0898,
      "step": 99420
    },
    {
      "epoch": 0.16273574098440066,
      "grad_norm": 2.038525104522705,
      "learning_rate": 9.738276127909967e-06,
      "loss": 0.0884,
      "step": 99440
    },
    {
      "epoch": 0.162768471423054,
      "grad_norm": 2.0649333000183105,
      "learning_rate": 9.738210235696448e-06,
      "loss": 0.0593,
      "step": 99460
    },
    {
      "epoch": 0.16280120186170735,
      "grad_norm": 4.08908224105835,
      "learning_rate": 9.738144343482932e-06,
      "loss": 0.0775,
      "step": 99480
    },
    {
      "epoch": 0.1628339323003607,
      "grad_norm": 2.1788291931152344,
      "learning_rate": 9.738078451269414e-06,
      "loss": 0.0624,
      "step": 99500
    },
    {
      "epoch": 0.16286666273901404,
      "grad_norm": 2.538088798522949,
      "learning_rate": 9.738012559055898e-06,
      "loss": 0.0812,
      "step": 99520
    },
    {
      "epoch": 0.16289939317766736,
      "grad_norm": 1.9967056512832642,
      "learning_rate": 9.737946666842381e-06,
      "loss": 0.0565,
      "step": 99540
    },
    {
      "epoch": 0.1629321236163207,
      "grad_norm": 1.5961360931396484,
      "learning_rate": 9.737880774628863e-06,
      "loss": 0.0737,
      "step": 99560
    },
    {
      "epoch": 0.16296485405497405,
      "grad_norm": 2.651669502258301,
      "learning_rate": 9.737814882415347e-06,
      "loss": 0.088,
      "step": 99580
    },
    {
      "epoch": 0.1629975844936274,
      "grad_norm": 1.5710664987564087,
      "learning_rate": 9.737748990201828e-06,
      "loss": 0.0661,
      "step": 99600
    },
    {
      "epoch": 0.16303031493228073,
      "grad_norm": 2.154043674468994,
      "learning_rate": 9.737683097988312e-06,
      "loss": 0.0665,
      "step": 99620
    },
    {
      "epoch": 0.16306304537093405,
      "grad_norm": 3.3835768699645996,
      "learning_rate": 9.737617205774794e-06,
      "loss": 0.062,
      "step": 99640
    },
    {
      "epoch": 0.1630957758095874,
      "grad_norm": 8.850800514221191,
      "learning_rate": 9.737551313561278e-06,
      "loss": 0.0663,
      "step": 99660
    },
    {
      "epoch": 0.16312850624824074,
      "grad_norm": 4.56876277923584,
      "learning_rate": 9.73748542134776e-06,
      "loss": 0.0646,
      "step": 99680
    },
    {
      "epoch": 0.16316123668689408,
      "grad_norm": 7.353396892547607,
      "learning_rate": 9.737419529134243e-06,
      "loss": 0.0608,
      "step": 99700
    },
    {
      "epoch": 0.16319396712554743,
      "grad_norm": 3.996156930923462,
      "learning_rate": 9.737353636920725e-06,
      "loss": 0.1038,
      "step": 99720
    },
    {
      "epoch": 0.16322669756420075,
      "grad_norm": 2.3334548473358154,
      "learning_rate": 9.737287744707208e-06,
      "loss": 0.0664,
      "step": 99740
    },
    {
      "epoch": 0.1632594280028541,
      "grad_norm": 2.0118443965911865,
      "learning_rate": 9.73722185249369e-06,
      "loss": 0.0653,
      "step": 99760
    },
    {
      "epoch": 0.16329215844150743,
      "grad_norm": 1.5434852838516235,
      "learning_rate": 9.737155960280174e-06,
      "loss": 0.0713,
      "step": 99780
    },
    {
      "epoch": 0.16332488888016078,
      "grad_norm": 2.456401824951172,
      "learning_rate": 9.737090068066658e-06,
      "loss": 0.0555,
      "step": 99800
    },
    {
      "epoch": 0.16335761931881412,
      "grad_norm": 1.9166591167449951,
      "learning_rate": 9.73702417585314e-06,
      "loss": 0.0896,
      "step": 99820
    },
    {
      "epoch": 0.16339034975746744,
      "grad_norm": 4.207624435424805,
      "learning_rate": 9.736958283639623e-06,
      "loss": 0.0681,
      "step": 99840
    },
    {
      "epoch": 0.16342308019612078,
      "grad_norm": 4.302624702453613,
      "learning_rate": 9.736892391426107e-06,
      "loss": 0.0822,
      "step": 99860
    },
    {
      "epoch": 0.16345581063477413,
      "grad_norm": 3.0604918003082275,
      "learning_rate": 9.73682649921259e-06,
      "loss": 0.0671,
      "step": 99880
    },
    {
      "epoch": 0.16348854107342747,
      "grad_norm": 2.6629114151000977,
      "learning_rate": 9.736760606999072e-06,
      "loss": 0.0813,
      "step": 99900
    },
    {
      "epoch": 0.16352127151208082,
      "grad_norm": 3.830793619155884,
      "learning_rate": 9.736694714785556e-06,
      "loss": 0.0757,
      "step": 99920
    },
    {
      "epoch": 0.16355400195073413,
      "grad_norm": 2.5579822063446045,
      "learning_rate": 9.736628822572038e-06,
      "loss": 0.0598,
      "step": 99940
    },
    {
      "epoch": 0.16358673238938748,
      "grad_norm": 2.3358585834503174,
      "learning_rate": 9.736562930358521e-06,
      "loss": 0.0656,
      "step": 99960
    },
    {
      "epoch": 0.16361946282804082,
      "grad_norm": 2.2162060737609863,
      "learning_rate": 9.736497038145003e-06,
      "loss": 0.0609,
      "step": 99980
    },
    {
      "epoch": 0.16365219326669417,
      "grad_norm": 3.5165653228759766,
      "learning_rate": 9.736431145931487e-06,
      "loss": 0.0679,
      "step": 100000
    },
    {
      "epoch": 0.16365219326669417,
      "eval_loss": 0.03518321365118027,
      "eval_runtime": 6509.1113,
      "eval_samples_per_second": 157.911,
      "eval_steps_per_second": 15.791,
      "eval_sts-dev_pearson_cosine": 0.9135128337985804,
      "eval_sts-dev_spearman_cosine": 0.852933775518637,
      "step": 100000
    },
    {
      "epoch": 0.1636849237053475,
      "grad_norm": 3.605445146560669,
      "learning_rate": 9.736365253717969e-06,
      "loss": 0.089,
      "step": 100020
    },
    {
      "epoch": 0.16371765414400083,
      "grad_norm": 2.3609461784362793,
      "learning_rate": 9.736299361504452e-06,
      "loss": 0.0823,
      "step": 100040
    },
    {
      "epoch": 0.16375038458265417,
      "grad_norm": 1.9765868186950684,
      "learning_rate": 9.736233469290934e-06,
      "loss": 0.0665,
      "step": 100060
    },
    {
      "epoch": 0.16378311502130752,
      "grad_norm": 2.697237968444824,
      "learning_rate": 9.736167577077418e-06,
      "loss": 0.0567,
      "step": 100080
    },
    {
      "epoch": 0.16381584545996086,
      "grad_norm": 2.8049378395080566,
      "learning_rate": 9.7361016848639e-06,
      "loss": 0.0679,
      "step": 100100
    },
    {
      "epoch": 0.16384857589861418,
      "grad_norm": 5.4059157371521,
      "learning_rate": 9.736035792650383e-06,
      "loss": 0.0655,
      "step": 100120
    },
    {
      "epoch": 0.16388130633726752,
      "grad_norm": 3.4218060970306396,
      "learning_rate": 9.735969900436865e-06,
      "loss": 0.0705,
      "step": 100140
    },
    {
      "epoch": 0.16391403677592087,
      "grad_norm": 4.446719169616699,
      "learning_rate": 9.735904008223349e-06,
      "loss": 0.073,
      "step": 100160
    },
    {
      "epoch": 0.1639467672145742,
      "grad_norm": 3.1646740436553955,
      "learning_rate": 9.735838116009832e-06,
      "loss": 0.0671,
      "step": 100180
    },
    {
      "epoch": 0.16397949765322756,
      "grad_norm": 1.8443427085876465,
      "learning_rate": 9.735772223796314e-06,
      "loss": 0.061,
      "step": 100200
    },
    {
      "epoch": 0.16401222809188087,
      "grad_norm": 1.4607456922531128,
      "learning_rate": 9.735706331582798e-06,
      "loss": 0.0628,
      "step": 100220
    },
    {
      "epoch": 0.16404495853053422,
      "grad_norm": 1.7957993745803833,
      "learning_rate": 9.735640439369281e-06,
      "loss": 0.0774,
      "step": 100240
    },
    {
      "epoch": 0.16407768896918756,
      "grad_norm": 5.2412028312683105,
      "learning_rate": 9.735574547155763e-06,
      "loss": 0.0684,
      "step": 100260
    },
    {
      "epoch": 0.1641104194078409,
      "grad_norm": 3.0369038581848145,
      "learning_rate": 9.735508654942247e-06,
      "loss": 0.0615,
      "step": 100280
    },
    {
      "epoch": 0.16414314984649425,
      "grad_norm": 2.350841999053955,
      "learning_rate": 9.73544276272873e-06,
      "loss": 0.0808,
      "step": 100300
    },
    {
      "epoch": 0.16417588028514757,
      "grad_norm": 1.8379665613174438,
      "learning_rate": 9.735376870515212e-06,
      "loss": 0.0841,
      "step": 100320
    },
    {
      "epoch": 0.1642086107238009,
      "grad_norm": 2.5544402599334717,
      "learning_rate": 9.735310978301696e-06,
      "loss": 0.0725,
      "step": 100340
    },
    {
      "epoch": 0.16424134116245426,
      "grad_norm": 2.2037055492401123,
      "learning_rate": 9.735245086088178e-06,
      "loss": 0.067,
      "step": 100360
    },
    {
      "epoch": 0.1642740716011076,
      "grad_norm": 8.389617919921875,
      "learning_rate": 9.735179193874661e-06,
      "loss": 0.0706,
      "step": 100380
    },
    {
      "epoch": 0.16430680203976095,
      "grad_norm": 1.1662880182266235,
      "learning_rate": 9.735113301661143e-06,
      "loss": 0.0782,
      "step": 100400
    },
    {
      "epoch": 0.16433953247841426,
      "grad_norm": 5.550693511962891,
      "learning_rate": 9.735047409447627e-06,
      "loss": 0.0734,
      "step": 100420
    },
    {
      "epoch": 0.1643722629170676,
      "grad_norm": 4.234264850616455,
      "learning_rate": 9.734981517234109e-06,
      "loss": 0.086,
      "step": 100440
    },
    {
      "epoch": 0.16440499335572095,
      "grad_norm": 2.8273446559906006,
      "learning_rate": 9.734915625020592e-06,
      "loss": 0.0864,
      "step": 100460
    },
    {
      "epoch": 0.1644377237943743,
      "grad_norm": 3.704988479614258,
      "learning_rate": 9.734849732807074e-06,
      "loss": 0.0811,
      "step": 100480
    },
    {
      "epoch": 0.16447045423302764,
      "grad_norm": 1.6718642711639404,
      "learning_rate": 9.734783840593558e-06,
      "loss": 0.0733,
      "step": 100500
    },
    {
      "epoch": 0.16450318467168096,
      "grad_norm": 1.933358073234558,
      "learning_rate": 9.73471794838004e-06,
      "loss": 0.0585,
      "step": 100520
    },
    {
      "epoch": 0.1645359151103343,
      "grad_norm": 1.7018134593963623,
      "learning_rate": 9.734652056166523e-06,
      "loss": 0.0781,
      "step": 100540
    },
    {
      "epoch": 0.16456864554898765,
      "grad_norm": 2.580397367477417,
      "learning_rate": 9.734586163953007e-06,
      "loss": 0.0677,
      "step": 100560
    },
    {
      "epoch": 0.164601375987641,
      "grad_norm": 2.123643636703491,
      "learning_rate": 9.734520271739489e-06,
      "loss": 0.069,
      "step": 100580
    },
    {
      "epoch": 0.16463410642629434,
      "grad_norm": 3.0446550846099854,
      "learning_rate": 9.734454379525972e-06,
      "loss": 0.0733,
      "step": 100600
    },
    {
      "epoch": 0.16466683686494765,
      "grad_norm": 3.261183738708496,
      "learning_rate": 9.734388487312456e-06,
      "loss": 0.0604,
      "step": 100620
    },
    {
      "epoch": 0.164699567303601,
      "grad_norm": 2.8326289653778076,
      "learning_rate": 9.734322595098938e-06,
      "loss": 0.0648,
      "step": 100640
    },
    {
      "epoch": 0.16473229774225434,
      "grad_norm": 5.157761573791504,
      "learning_rate": 9.734256702885421e-06,
      "loss": 0.069,
      "step": 100660
    },
    {
      "epoch": 0.16476502818090769,
      "grad_norm": 2.5850653648376465,
      "learning_rate": 9.734190810671905e-06,
      "loss": 0.0603,
      "step": 100680
    },
    {
      "epoch": 0.16479775861956103,
      "grad_norm": 1.51223886013031,
      "learning_rate": 9.734124918458387e-06,
      "loss": 0.0697,
      "step": 100700
    },
    {
      "epoch": 0.16483048905821435,
      "grad_norm": 3.658086061477661,
      "learning_rate": 9.73405902624487e-06,
      "loss": 0.0611,
      "step": 100720
    },
    {
      "epoch": 0.1648632194968677,
      "grad_norm": 0.8097449541091919,
      "learning_rate": 9.733993134031352e-06,
      "loss": 0.0655,
      "step": 100740
    },
    {
      "epoch": 0.16489594993552104,
      "grad_norm": 2.8757543563842773,
      "learning_rate": 9.733927241817836e-06,
      "loss": 0.069,
      "step": 100760
    },
    {
      "epoch": 0.16492868037417438,
      "grad_norm": 1.3310998678207397,
      "learning_rate": 9.733861349604318e-06,
      "loss": 0.0589,
      "step": 100780
    },
    {
      "epoch": 0.16496141081282772,
      "grad_norm": 3.484873056411743,
      "learning_rate": 9.733795457390801e-06,
      "loss": 0.0702,
      "step": 100800
    },
    {
      "epoch": 0.16499414125148104,
      "grad_norm": 1.9575694799423218,
      "learning_rate": 9.733729565177283e-06,
      "loss": 0.0645,
      "step": 100820
    },
    {
      "epoch": 0.16502687169013439,
      "grad_norm": 1.477412462234497,
      "learning_rate": 9.733663672963767e-06,
      "loss": 0.0642,
      "step": 100840
    },
    {
      "epoch": 0.16505960212878773,
      "grad_norm": 2.074207305908203,
      "learning_rate": 9.733597780750249e-06,
      "loss": 0.0636,
      "step": 100860
    },
    {
      "epoch": 0.16509233256744107,
      "grad_norm": 1.7778829336166382,
      "learning_rate": 9.733531888536732e-06,
      "loss": 0.0603,
      "step": 100880
    },
    {
      "epoch": 0.16512506300609442,
      "grad_norm": 1.7765337228775024,
      "learning_rate": 9.733465996323214e-06,
      "loss": 0.0757,
      "step": 100900
    },
    {
      "epoch": 0.16515779344474774,
      "grad_norm": 17.185928344726562,
      "learning_rate": 9.733400104109698e-06,
      "loss": 0.0655,
      "step": 100920
    },
    {
      "epoch": 0.16519052388340108,
      "grad_norm": 9.314820289611816,
      "learning_rate": 9.733334211896181e-06,
      "loss": 0.0741,
      "step": 100940
    },
    {
      "epoch": 0.16522325432205442,
      "grad_norm": 3.8713042736053467,
      "learning_rate": 9.733268319682663e-06,
      "loss": 0.0856,
      "step": 100960
    },
    {
      "epoch": 0.16525598476070777,
      "grad_norm": 2.108675718307495,
      "learning_rate": 9.733202427469147e-06,
      "loss": 0.0615,
      "step": 100980
    },
    {
      "epoch": 0.1652887151993611,
      "grad_norm": 1.6470403671264648,
      "learning_rate": 9.733136535255629e-06,
      "loss": 0.072,
      "step": 101000
    },
    {
      "epoch": 0.16532144563801443,
      "grad_norm": 2.5009303092956543,
      "learning_rate": 9.733070643042112e-06,
      "loss": 0.0703,
      "step": 101020
    },
    {
      "epoch": 0.16535417607666777,
      "grad_norm": 4.941928863525391,
      "learning_rate": 9.733004750828596e-06,
      "loss": 0.0594,
      "step": 101040
    },
    {
      "epoch": 0.16538690651532112,
      "grad_norm": 1.6138538122177124,
      "learning_rate": 9.732938858615078e-06,
      "loss": 0.0717,
      "step": 101060
    },
    {
      "epoch": 0.16541963695397446,
      "grad_norm": 1.0730513334274292,
      "learning_rate": 9.732872966401561e-06,
      "loss": 0.0612,
      "step": 101080
    },
    {
      "epoch": 0.1654523673926278,
      "grad_norm": 4.382439136505127,
      "learning_rate": 9.732807074188045e-06,
      "loss": 0.0864,
      "step": 101100
    },
    {
      "epoch": 0.16548509783128112,
      "grad_norm": 2.880411148071289,
      "learning_rate": 9.732741181974527e-06,
      "loss": 0.055,
      "step": 101120
    },
    {
      "epoch": 0.16551782826993447,
      "grad_norm": 5.263011932373047,
      "learning_rate": 9.73267528976101e-06,
      "loss": 0.0686,
      "step": 101140
    },
    {
      "epoch": 0.1655505587085878,
      "grad_norm": 2.339648723602295,
      "learning_rate": 9.732609397547492e-06,
      "loss": 0.065,
      "step": 101160
    },
    {
      "epoch": 0.16558328914724116,
      "grad_norm": 2.497530460357666,
      "learning_rate": 9.732543505333976e-06,
      "loss": 0.0626,
      "step": 101180
    },
    {
      "epoch": 0.1656160195858945,
      "grad_norm": 2.0132341384887695,
      "learning_rate": 9.732477613120458e-06,
      "loss": 0.0555,
      "step": 101200
    },
    {
      "epoch": 0.16564875002454782,
      "grad_norm": 3.739307403564453,
      "learning_rate": 9.732411720906941e-06,
      "loss": 0.0862,
      "step": 101220
    },
    {
      "epoch": 0.16568148046320116,
      "grad_norm": 3.625408172607422,
      "learning_rate": 9.732345828693423e-06,
      "loss": 0.0814,
      "step": 101240
    },
    {
      "epoch": 0.1657142109018545,
      "grad_norm": 2.9195048809051514,
      "learning_rate": 9.732279936479907e-06,
      "loss": 0.0784,
      "step": 101260
    },
    {
      "epoch": 0.16574694134050785,
      "grad_norm": 0.8875823616981506,
      "learning_rate": 9.73221404426639e-06,
      "loss": 0.0701,
      "step": 101280
    },
    {
      "epoch": 0.1657796717791612,
      "grad_norm": 0.5886073112487793,
      "learning_rate": 9.732148152052872e-06,
      "loss": 0.0526,
      "step": 101300
    },
    {
      "epoch": 0.16581240221781451,
      "grad_norm": 1.996667504310608,
      "learning_rate": 9.732082259839356e-06,
      "loss": 0.0717,
      "step": 101320
    },
    {
      "epoch": 0.16584513265646786,
      "grad_norm": 3.0398571491241455,
      "learning_rate": 9.732016367625838e-06,
      "loss": 0.0792,
      "step": 101340
    },
    {
      "epoch": 0.1658778630951212,
      "grad_norm": 2.1786623001098633,
      "learning_rate": 9.731950475412321e-06,
      "loss": 0.061,
      "step": 101360
    },
    {
      "epoch": 0.16591059353377455,
      "grad_norm": 0.4967576563358307,
      "learning_rate": 9.731884583198803e-06,
      "loss": 0.0607,
      "step": 101380
    },
    {
      "epoch": 0.1659433239724279,
      "grad_norm": 2.9435834884643555,
      "learning_rate": 9.731818690985287e-06,
      "loss": 0.0719,
      "step": 101400
    },
    {
      "epoch": 0.1659760544110812,
      "grad_norm": 2.808225154876709,
      "learning_rate": 9.73175279877177e-06,
      "loss": 0.076,
      "step": 101420
    },
    {
      "epoch": 0.16600878484973455,
      "grad_norm": 2.3885586261749268,
      "learning_rate": 9.731686906558252e-06,
      "loss": 0.0714,
      "step": 101440
    },
    {
      "epoch": 0.1660415152883879,
      "grad_norm": 3.586596965789795,
      "learning_rate": 9.731621014344736e-06,
      "loss": 0.0705,
      "step": 101460
    },
    {
      "epoch": 0.16607424572704124,
      "grad_norm": 3.2334868907928467,
      "learning_rate": 9.73155512213122e-06,
      "loss": 0.0845,
      "step": 101480
    },
    {
      "epoch": 0.1661069761656946,
      "grad_norm": 3.53376841545105,
      "learning_rate": 9.731489229917701e-06,
      "loss": 0.0607,
      "step": 101500
    },
    {
      "epoch": 0.1661397066043479,
      "grad_norm": 1.1883206367492676,
      "learning_rate": 9.731423337704185e-06,
      "loss": 0.0788,
      "step": 101520
    },
    {
      "epoch": 0.16617243704300125,
      "grad_norm": 3.712236166000366,
      "learning_rate": 9.731357445490667e-06,
      "loss": 0.0624,
      "step": 101540
    },
    {
      "epoch": 0.1662051674816546,
      "grad_norm": 4.985608100891113,
      "learning_rate": 9.73129155327715e-06,
      "loss": 0.0624,
      "step": 101560
    },
    {
      "epoch": 0.16623789792030794,
      "grad_norm": 1.9147052764892578,
      "learning_rate": 9.731225661063632e-06,
      "loss": 0.0567,
      "step": 101580
    },
    {
      "epoch": 0.16627062835896125,
      "grad_norm": 3.9670422077178955,
      "learning_rate": 9.731159768850116e-06,
      "loss": 0.0528,
      "step": 101600
    },
    {
      "epoch": 0.1663033587976146,
      "grad_norm": 2.9399614334106445,
      "learning_rate": 9.731093876636598e-06,
      "loss": 0.0769,
      "step": 101620
    },
    {
      "epoch": 0.16633608923626794,
      "grad_norm": 2.8810596466064453,
      "learning_rate": 9.731027984423081e-06,
      "loss": 0.0646,
      "step": 101640
    },
    {
      "epoch": 0.1663688196749213,
      "grad_norm": 2.9308836460113525,
      "learning_rate": 9.730962092209565e-06,
      "loss": 0.0677,
      "step": 101660
    },
    {
      "epoch": 0.16640155011357463,
      "grad_norm": 4.3340044021606445,
      "learning_rate": 9.730896199996047e-06,
      "loss": 0.0684,
      "step": 101680
    },
    {
      "epoch": 0.16643428055222795,
      "grad_norm": 3.4691712856292725,
      "learning_rate": 9.73083030778253e-06,
      "loss": 0.0684,
      "step": 101700
    },
    {
      "epoch": 0.1664670109908813,
      "grad_norm": 2.8210391998291016,
      "learning_rate": 9.730764415569012e-06,
      "loss": 0.0693,
      "step": 101720
    },
    {
      "epoch": 0.16649974142953464,
      "grad_norm": 1.8760722875595093,
      "learning_rate": 9.730698523355496e-06,
      "loss": 0.0768,
      "step": 101740
    },
    {
      "epoch": 0.16653247186818798,
      "grad_norm": 6.866466522216797,
      "learning_rate": 9.730632631141978e-06,
      "loss": 0.0822,
      "step": 101760
    },
    {
      "epoch": 0.16656520230684133,
      "grad_norm": 3.05424165725708,
      "learning_rate": 9.730566738928462e-06,
      "loss": 0.0831,
      "step": 101780
    },
    {
      "epoch": 0.16659793274549464,
      "grad_norm": 2.4381072521209717,
      "learning_rate": 9.730500846714943e-06,
      "loss": 0.071,
      "step": 101800
    },
    {
      "epoch": 0.166630663184148,
      "grad_norm": 3.8324496746063232,
      "learning_rate": 9.730434954501427e-06,
      "loss": 0.0731,
      "step": 101820
    },
    {
      "epoch": 0.16666339362280133,
      "grad_norm": 1.716387391090393,
      "learning_rate": 9.73036906228791e-06,
      "loss": 0.0789,
      "step": 101840
    },
    {
      "epoch": 0.16669612406145468,
      "grad_norm": 4.50056791305542,
      "learning_rate": 9.730303170074392e-06,
      "loss": 0.0828,
      "step": 101860
    },
    {
      "epoch": 0.16672885450010802,
      "grad_norm": 0.9805009961128235,
      "learning_rate": 9.730237277860876e-06,
      "loss": 0.0545,
      "step": 101880
    },
    {
      "epoch": 0.16676158493876134,
      "grad_norm": 1.4833998680114746,
      "learning_rate": 9.73017138564736e-06,
      "loss": 0.0731,
      "step": 101900
    },
    {
      "epoch": 0.16679431537741468,
      "grad_norm": 1.3017836809158325,
      "learning_rate": 9.730105493433842e-06,
      "loss": 0.0702,
      "step": 101920
    },
    {
      "epoch": 0.16682704581606803,
      "grad_norm": 3.937891721725464,
      "learning_rate": 9.730039601220325e-06,
      "loss": 0.0899,
      "step": 101940
    },
    {
      "epoch": 0.16685977625472137,
      "grad_norm": 2.2107906341552734,
      "learning_rate": 9.729973709006807e-06,
      "loss": 0.0802,
      "step": 101960
    },
    {
      "epoch": 0.16689250669337471,
      "grad_norm": 2.240009069442749,
      "learning_rate": 9.72990781679329e-06,
      "loss": 0.0671,
      "step": 101980
    },
    {
      "epoch": 0.16692523713202803,
      "grad_norm": 2.3161303997039795,
      "learning_rate": 9.729841924579774e-06,
      "loss": 0.0556,
      "step": 102000
    },
    {
      "epoch": 0.16695796757068138,
      "grad_norm": 2.28420090675354,
      "learning_rate": 9.729776032366256e-06,
      "loss": 0.0721,
      "step": 102020
    },
    {
      "epoch": 0.16699069800933472,
      "grad_norm": 1.2034982442855835,
      "learning_rate": 9.72971014015274e-06,
      "loss": 0.0716,
      "step": 102040
    },
    {
      "epoch": 0.16702342844798806,
      "grad_norm": 3.3664698600769043,
      "learning_rate": 9.729644247939222e-06,
      "loss": 0.0738,
      "step": 102060
    },
    {
      "epoch": 0.1670561588866414,
      "grad_norm": 2.8459813594818115,
      "learning_rate": 9.729578355725705e-06,
      "loss": 0.0623,
      "step": 102080
    },
    {
      "epoch": 0.16708888932529473,
      "grad_norm": 4.043743133544922,
      "learning_rate": 9.729512463512187e-06,
      "loss": 0.0757,
      "step": 102100
    },
    {
      "epoch": 0.16712161976394807,
      "grad_norm": 3.7566325664520264,
      "learning_rate": 9.72944657129867e-06,
      "loss": 0.077,
      "step": 102120
    },
    {
      "epoch": 0.16715435020260141,
      "grad_norm": 3.6004703044891357,
      "learning_rate": 9.729380679085153e-06,
      "loss": 0.0724,
      "step": 102140
    },
    {
      "epoch": 0.16718708064125476,
      "grad_norm": 1.7486920356750488,
      "learning_rate": 9.729314786871636e-06,
      "loss": 0.0681,
      "step": 102160
    },
    {
      "epoch": 0.1672198110799081,
      "grad_norm": 1.250731110572815,
      "learning_rate": 9.729248894658118e-06,
      "loss": 0.0662,
      "step": 102180
    },
    {
      "epoch": 0.16725254151856142,
      "grad_norm": 1.4955873489379883,
      "learning_rate": 9.729183002444602e-06,
      "loss": 0.0658,
      "step": 102200
    },
    {
      "epoch": 0.16728527195721477,
      "grad_norm": 2.7678442001342773,
      "learning_rate": 9.729117110231085e-06,
      "loss": 0.0667,
      "step": 102220
    },
    {
      "epoch": 0.1673180023958681,
      "grad_norm": 2.6082873344421387,
      "learning_rate": 9.729051218017567e-06,
      "loss": 0.0581,
      "step": 102240
    },
    {
      "epoch": 0.16735073283452145,
      "grad_norm": 2.9803786277770996,
      "learning_rate": 9.72898532580405e-06,
      "loss": 0.0749,
      "step": 102260
    },
    {
      "epoch": 0.1673834632731748,
      "grad_norm": 0.7533594965934753,
      "learning_rate": 9.728919433590534e-06,
      "loss": 0.0668,
      "step": 102280
    },
    {
      "epoch": 0.16741619371182812,
      "grad_norm": 1.2594561576843262,
      "learning_rate": 9.728853541377016e-06,
      "loss": 0.0622,
      "step": 102300
    },
    {
      "epoch": 0.16744892415048146,
      "grad_norm": 1.9894465208053589,
      "learning_rate": 9.7287876491635e-06,
      "loss": 0.0651,
      "step": 102320
    },
    {
      "epoch": 0.1674816545891348,
      "grad_norm": 6.021636486053467,
      "learning_rate": 9.728721756949983e-06,
      "loss": 0.0769,
      "step": 102340
    },
    {
      "epoch": 0.16751438502778815,
      "grad_norm": 1.9649500846862793,
      "learning_rate": 9.728655864736465e-06,
      "loss": 0.0837,
      "step": 102360
    },
    {
      "epoch": 0.1675471154664415,
      "grad_norm": 2.4336767196655273,
      "learning_rate": 9.728589972522949e-06,
      "loss": 0.0615,
      "step": 102380
    },
    {
      "epoch": 0.1675798459050948,
      "grad_norm": 2.627410888671875,
      "learning_rate": 9.72852408030943e-06,
      "loss": 0.0871,
      "step": 102400
    },
    {
      "epoch": 0.16761257634374815,
      "grad_norm": 1.4371453523635864,
      "learning_rate": 9.728458188095914e-06,
      "loss": 0.0698,
      "step": 102420
    },
    {
      "epoch": 0.1676453067824015,
      "grad_norm": 4.730973243713379,
      "learning_rate": 9.728392295882396e-06,
      "loss": 0.0611,
      "step": 102440
    },
    {
      "epoch": 0.16767803722105484,
      "grad_norm": 0.7211484313011169,
      "learning_rate": 9.72832640366888e-06,
      "loss": 0.065,
      "step": 102460
    },
    {
      "epoch": 0.1677107676597082,
      "grad_norm": 2.015613555908203,
      "learning_rate": 9.728260511455362e-06,
      "loss": 0.0705,
      "step": 102480
    },
    {
      "epoch": 0.1677434980983615,
      "grad_norm": 5.840516567230225,
      "learning_rate": 9.728194619241845e-06,
      "loss": 0.0757,
      "step": 102500
    },
    {
      "epoch": 0.16777622853701485,
      "grad_norm": 1.0203800201416016,
      "learning_rate": 9.728128727028327e-06,
      "loss": 0.0658,
      "step": 102520
    },
    {
      "epoch": 0.1678089589756682,
      "grad_norm": 2.539999485015869,
      "learning_rate": 9.72806283481481e-06,
      "loss": 0.058,
      "step": 102540
    },
    {
      "epoch": 0.16784168941432154,
      "grad_norm": 3.3103983402252197,
      "learning_rate": 9.727996942601293e-06,
      "loss": 0.0606,
      "step": 102560
    },
    {
      "epoch": 0.16787441985297488,
      "grad_norm": 4.1076884269714355,
      "learning_rate": 9.727931050387776e-06,
      "loss": 0.0664,
      "step": 102580
    },
    {
      "epoch": 0.1679071502916282,
      "grad_norm": 5.429949760437012,
      "learning_rate": 9.727865158174258e-06,
      "loss": 0.0722,
      "step": 102600
    },
    {
      "epoch": 0.16793988073028154,
      "grad_norm": 3.1648852825164795,
      "learning_rate": 9.727799265960742e-06,
      "loss": 0.0575,
      "step": 102620
    },
    {
      "epoch": 0.1679726111689349,
      "grad_norm": 0.9526000022888184,
      "learning_rate": 9.727733373747225e-06,
      "loss": 0.0645,
      "step": 102640
    },
    {
      "epoch": 0.16800534160758823,
      "grad_norm": 1.7942872047424316,
      "learning_rate": 9.727667481533709e-06,
      "loss": 0.0755,
      "step": 102660
    },
    {
      "epoch": 0.16803807204624158,
      "grad_norm": 2.8468966484069824,
      "learning_rate": 9.72760158932019e-06,
      "loss": 0.0779,
      "step": 102680
    },
    {
      "epoch": 0.1680708024848949,
      "grad_norm": 2.5606272220611572,
      "learning_rate": 9.727535697106674e-06,
      "loss": 0.0737,
      "step": 102700
    },
    {
      "epoch": 0.16810353292354824,
      "grad_norm": 3.631066083908081,
      "learning_rate": 9.727469804893158e-06,
      "loss": 0.0551,
      "step": 102720
    },
    {
      "epoch": 0.16813626336220158,
      "grad_norm": 1.9822360277175903,
      "learning_rate": 9.72740391267964e-06,
      "loss": 0.0573,
      "step": 102740
    },
    {
      "epoch": 0.16816899380085493,
      "grad_norm": 1.497841238975525,
      "learning_rate": 9.727338020466123e-06,
      "loss": 0.0614,
      "step": 102760
    },
    {
      "epoch": 0.16820172423950827,
      "grad_norm": 3.1426706314086914,
      "learning_rate": 9.727272128252605e-06,
      "loss": 0.0726,
      "step": 102780
    },
    {
      "epoch": 0.1682344546781616,
      "grad_norm": 3.561890125274658,
      "learning_rate": 9.727206236039089e-06,
      "loss": 0.0809,
      "step": 102800
    },
    {
      "epoch": 0.16826718511681493,
      "grad_norm": 4.6190032958984375,
      "learning_rate": 9.72714034382557e-06,
      "loss": 0.0813,
      "step": 102820
    },
    {
      "epoch": 0.16829991555546828,
      "grad_norm": 7.037205219268799,
      "learning_rate": 9.727074451612054e-06,
      "loss": 0.0732,
      "step": 102840
    },
    {
      "epoch": 0.16833264599412162,
      "grad_norm": 3.043463706970215,
      "learning_rate": 9.727008559398536e-06,
      "loss": 0.0747,
      "step": 102860
    },
    {
      "epoch": 0.16836537643277497,
      "grad_norm": 2.351728916168213,
      "learning_rate": 9.72694266718502e-06,
      "loss": 0.083,
      "step": 102880
    },
    {
      "epoch": 0.16839810687142828,
      "grad_norm": 2.146063804626465,
      "learning_rate": 9.726876774971502e-06,
      "loss": 0.0658,
      "step": 102900
    },
    {
      "epoch": 0.16843083731008163,
      "grad_norm": 3.537998676300049,
      "learning_rate": 9.726810882757985e-06,
      "loss": 0.099,
      "step": 102920
    },
    {
      "epoch": 0.16846356774873497,
      "grad_norm": 0.5547322630882263,
      "learning_rate": 9.726744990544467e-06,
      "loss": 0.0586,
      "step": 102940
    },
    {
      "epoch": 0.16849629818738832,
      "grad_norm": 0.5992001891136169,
      "learning_rate": 9.72667909833095e-06,
      "loss": 0.084,
      "step": 102960
    },
    {
      "epoch": 0.16852902862604163,
      "grad_norm": 1.5766271352767944,
      "learning_rate": 9.726613206117433e-06,
      "loss": 0.0518,
      "step": 102980
    },
    {
      "epoch": 0.16856175906469498,
      "grad_norm": 3.9914181232452393,
      "learning_rate": 9.726547313903916e-06,
      "loss": 0.0601,
      "step": 103000
    },
    {
      "epoch": 0.16859448950334832,
      "grad_norm": 6.397760391235352,
      "learning_rate": 9.7264814216904e-06,
      "loss": 0.0605,
      "step": 103020
    },
    {
      "epoch": 0.16862721994200167,
      "grad_norm": 4.674959182739258,
      "learning_rate": 9.726415529476882e-06,
      "loss": 0.073,
      "step": 103040
    },
    {
      "epoch": 0.168659950380655,
      "grad_norm": 2.0886247158050537,
      "learning_rate": 9.726349637263365e-06,
      "loss": 0.0821,
      "step": 103060
    },
    {
      "epoch": 0.16869268081930833,
      "grad_norm": 1.335660457611084,
      "learning_rate": 9.726283745049849e-06,
      "loss": 0.0557,
      "step": 103080
    },
    {
      "epoch": 0.16872541125796167,
      "grad_norm": 0.4826270639896393,
      "learning_rate": 9.72621785283633e-06,
      "loss": 0.0543,
      "step": 103100
    },
    {
      "epoch": 0.16875814169661502,
      "grad_norm": 4.351720333099365,
      "learning_rate": 9.726151960622814e-06,
      "loss": 0.0677,
      "step": 103120
    },
    {
      "epoch": 0.16879087213526836,
      "grad_norm": 2.1151349544525146,
      "learning_rate": 9.726086068409298e-06,
      "loss": 0.0775,
      "step": 103140
    },
    {
      "epoch": 0.1688236025739217,
      "grad_norm": 2.0218236446380615,
      "learning_rate": 9.72602017619578e-06,
      "loss": 0.0689,
      "step": 103160
    },
    {
      "epoch": 0.16885633301257502,
      "grad_norm": 2.6938278675079346,
      "learning_rate": 9.725954283982263e-06,
      "loss": 0.0734,
      "step": 103180
    },
    {
      "epoch": 0.16888906345122837,
      "grad_norm": 1.791221261024475,
      "learning_rate": 9.725888391768745e-06,
      "loss": 0.0788,
      "step": 103200
    },
    {
      "epoch": 0.1689217938898817,
      "grad_norm": 2.3504478931427,
      "learning_rate": 9.725822499555229e-06,
      "loss": 0.0688,
      "step": 103220
    },
    {
      "epoch": 0.16895452432853506,
      "grad_norm": 2.6788055896759033,
      "learning_rate": 9.72575660734171e-06,
      "loss": 0.0685,
      "step": 103240
    },
    {
      "epoch": 0.1689872547671884,
      "grad_norm": 1.5074125528335571,
      "learning_rate": 9.725690715128194e-06,
      "loss": 0.0743,
      "step": 103260
    },
    {
      "epoch": 0.16901998520584172,
      "grad_norm": 3.154644012451172,
      "learning_rate": 9.725624822914676e-06,
      "loss": 0.0797,
      "step": 103280
    },
    {
      "epoch": 0.16905271564449506,
      "grad_norm": 5.956279754638672,
      "learning_rate": 9.72555893070116e-06,
      "loss": 0.063,
      "step": 103300
    },
    {
      "epoch": 0.1690854460831484,
      "grad_norm": 2.942237377166748,
      "learning_rate": 9.725493038487642e-06,
      "loss": 0.0744,
      "step": 103320
    },
    {
      "epoch": 0.16911817652180175,
      "grad_norm": 6.156153678894043,
      "learning_rate": 9.725427146274125e-06,
      "loss": 0.0676,
      "step": 103340
    },
    {
      "epoch": 0.1691509069604551,
      "grad_norm": 2.332641124725342,
      "learning_rate": 9.725361254060607e-06,
      "loss": 0.0651,
      "step": 103360
    },
    {
      "epoch": 0.1691836373991084,
      "grad_norm": 2.209683895111084,
      "learning_rate": 9.725295361847091e-06,
      "loss": 0.066,
      "step": 103380
    },
    {
      "epoch": 0.16921636783776176,
      "grad_norm": 5.923321723937988,
      "learning_rate": 9.725229469633574e-06,
      "loss": 0.0697,
      "step": 103400
    },
    {
      "epoch": 0.1692490982764151,
      "grad_norm": 2.5994980335235596,
      "learning_rate": 9.725163577420056e-06,
      "loss": 0.0702,
      "step": 103420
    },
    {
      "epoch": 0.16928182871506844,
      "grad_norm": 4.164424896240234,
      "learning_rate": 9.72509768520654e-06,
      "loss": 0.0928,
      "step": 103440
    },
    {
      "epoch": 0.1693145591537218,
      "grad_norm": 0.7208173871040344,
      "learning_rate": 9.725031792993024e-06,
      "loss": 0.0785,
      "step": 103460
    },
    {
      "epoch": 0.1693472895923751,
      "grad_norm": 7.979827404022217,
      "learning_rate": 9.724965900779505e-06,
      "loss": 0.0668,
      "step": 103480
    },
    {
      "epoch": 0.16938002003102845,
      "grad_norm": 3.243168592453003,
      "learning_rate": 9.724900008565989e-06,
      "loss": 0.0586,
      "step": 103500
    },
    {
      "epoch": 0.1694127504696818,
      "grad_norm": 2.187957525253296,
      "learning_rate": 9.724834116352473e-06,
      "loss": 0.0742,
      "step": 103520
    },
    {
      "epoch": 0.16944548090833514,
      "grad_norm": 3.0012145042419434,
      "learning_rate": 9.724768224138954e-06,
      "loss": 0.0801,
      "step": 103540
    },
    {
      "epoch": 0.16947821134698848,
      "grad_norm": 1.66615891456604,
      "learning_rate": 9.724702331925438e-06,
      "loss": 0.0629,
      "step": 103560
    },
    {
      "epoch": 0.1695109417856418,
      "grad_norm": 3.6393167972564697,
      "learning_rate": 9.72463643971192e-06,
      "loss": 0.064,
      "step": 103580
    },
    {
      "epoch": 0.16954367222429514,
      "grad_norm": 2.029339551925659,
      "learning_rate": 9.724570547498404e-06,
      "loss": 0.0708,
      "step": 103600
    },
    {
      "epoch": 0.1695764026629485,
      "grad_norm": 1.6296483278274536,
      "learning_rate": 9.724504655284885e-06,
      "loss": 0.0692,
      "step": 103620
    },
    {
      "epoch": 0.16960913310160183,
      "grad_norm": 3.756138324737549,
      "learning_rate": 9.724438763071369e-06,
      "loss": 0.0801,
      "step": 103640
    },
    {
      "epoch": 0.16964186354025518,
      "grad_norm": 2.1156978607177734,
      "learning_rate": 9.724372870857851e-06,
      "loss": 0.0751,
      "step": 103660
    },
    {
      "epoch": 0.1696745939789085,
      "grad_norm": 3.0898709297180176,
      "learning_rate": 9.724306978644334e-06,
      "loss": 0.054,
      "step": 103680
    },
    {
      "epoch": 0.16970732441756184,
      "grad_norm": 0.5426328778266907,
      "learning_rate": 9.724241086430816e-06,
      "loss": 0.0815,
      "step": 103700
    },
    {
      "epoch": 0.16974005485621518,
      "grad_norm": 1.6593101024627686,
      "learning_rate": 9.7241751942173e-06,
      "loss": 0.0591,
      "step": 103720
    },
    {
      "epoch": 0.16977278529486853,
      "grad_norm": 49.10110855102539,
      "learning_rate": 9.724109302003784e-06,
      "loss": 0.0726,
      "step": 103740
    },
    {
      "epoch": 0.16980551573352187,
      "grad_norm": 1.9052902460098267,
      "learning_rate": 9.724043409790265e-06,
      "loss": 0.065,
      "step": 103760
    },
    {
      "epoch": 0.1698382461721752,
      "grad_norm": 2.307643175125122,
      "learning_rate": 9.723977517576749e-06,
      "loss": 0.0767,
      "step": 103780
    },
    {
      "epoch": 0.16987097661082853,
      "grad_norm": 3.8354849815368652,
      "learning_rate": 9.723911625363231e-06,
      "loss": 0.0604,
      "step": 103800
    },
    {
      "epoch": 0.16990370704948188,
      "grad_norm": 46.6002197265625,
      "learning_rate": 9.723845733149715e-06,
      "loss": 0.0724,
      "step": 103820
    },
    {
      "epoch": 0.16993643748813522,
      "grad_norm": 0.8031031489372253,
      "learning_rate": 9.723779840936196e-06,
      "loss": 0.0733,
      "step": 103840
    },
    {
      "epoch": 0.16996916792678857,
      "grad_norm": 5.273345470428467,
      "learning_rate": 9.72371394872268e-06,
      "loss": 0.0634,
      "step": 103860
    },
    {
      "epoch": 0.17000189836544188,
      "grad_norm": 3.067526340484619,
      "learning_rate": 9.723648056509164e-06,
      "loss": 0.0707,
      "step": 103880
    },
    {
      "epoch": 0.17003462880409523,
      "grad_norm": 3.9443771839141846,
      "learning_rate": 9.723582164295645e-06,
      "loss": 0.0823,
      "step": 103900
    },
    {
      "epoch": 0.17006735924274857,
      "grad_norm": 0.5990777611732483,
      "learning_rate": 9.723516272082129e-06,
      "loss": 0.0628,
      "step": 103920
    },
    {
      "epoch": 0.17010008968140192,
      "grad_norm": 1.6589316129684448,
      "learning_rate": 9.723450379868613e-06,
      "loss": 0.0733,
      "step": 103940
    },
    {
      "epoch": 0.17013282012005526,
      "grad_norm": 5.910182476043701,
      "learning_rate": 9.723384487655095e-06,
      "loss": 0.0745,
      "step": 103960
    },
    {
      "epoch": 0.17016555055870858,
      "grad_norm": 1.5405378341674805,
      "learning_rate": 9.723318595441578e-06,
      "loss": 0.0681,
      "step": 103980
    },
    {
      "epoch": 0.17019828099736192,
      "grad_norm": 2.077244758605957,
      "learning_rate": 9.72325270322806e-06,
      "loss": 0.0743,
      "step": 104000
    },
    {
      "epoch": 0.17023101143601527,
      "grad_norm": 2.9225680828094482,
      "learning_rate": 9.723186811014544e-06,
      "loss": 0.0672,
      "step": 104020
    },
    {
      "epoch": 0.1702637418746686,
      "grad_norm": 3.1182448863983154,
      "learning_rate": 9.723120918801025e-06,
      "loss": 0.0857,
      "step": 104040
    },
    {
      "epoch": 0.17029647231332196,
      "grad_norm": 5.843409061431885,
      "learning_rate": 9.723055026587509e-06,
      "loss": 0.0513,
      "step": 104060
    },
    {
      "epoch": 0.17032920275197527,
      "grad_norm": 8.35721206665039,
      "learning_rate": 9.722989134373991e-06,
      "loss": 0.0822,
      "step": 104080
    },
    {
      "epoch": 0.17036193319062862,
      "grad_norm": 2.8166093826293945,
      "learning_rate": 9.722923242160475e-06,
      "loss": 0.0587,
      "step": 104100
    },
    {
      "epoch": 0.17039466362928196,
      "grad_norm": 0.49163955450057983,
      "learning_rate": 9.722857349946958e-06,
      "loss": 0.0805,
      "step": 104120
    },
    {
      "epoch": 0.1704273940679353,
      "grad_norm": 2.643341064453125,
      "learning_rate": 9.72279145773344e-06,
      "loss": 0.0544,
      "step": 104140
    },
    {
      "epoch": 0.17046012450658865,
      "grad_norm": 1.994946837425232,
      "learning_rate": 9.722725565519924e-06,
      "loss": 0.0565,
      "step": 104160
    },
    {
      "epoch": 0.17049285494524197,
      "grad_norm": 1.229844093322754,
      "learning_rate": 9.722659673306406e-06,
      "loss": 0.0585,
      "step": 104180
    },
    {
      "epoch": 0.1705255853838953,
      "grad_norm": 0.5861645340919495,
      "learning_rate": 9.722593781092889e-06,
      "loss": 0.0808,
      "step": 104200
    },
    {
      "epoch": 0.17055831582254866,
      "grad_norm": 2.905947685241699,
      "learning_rate": 9.722527888879371e-06,
      "loss": 0.0645,
      "step": 104220
    },
    {
      "epoch": 0.170591046261202,
      "grad_norm": 1.6583728790283203,
      "learning_rate": 9.722461996665855e-06,
      "loss": 0.0655,
      "step": 104240
    },
    {
      "epoch": 0.17062377669985535,
      "grad_norm": 4.090461730957031,
      "learning_rate": 9.722396104452338e-06,
      "loss": 0.0713,
      "step": 104260
    },
    {
      "epoch": 0.17065650713850866,
      "grad_norm": 3.0685551166534424,
      "learning_rate": 9.72233021223882e-06,
      "loss": 0.0633,
      "step": 104280
    },
    {
      "epoch": 0.170689237577162,
      "grad_norm": 0.3886663019657135,
      "learning_rate": 9.722264320025304e-06,
      "loss": 0.0586,
      "step": 104300
    },
    {
      "epoch": 0.17072196801581535,
      "grad_norm": 3.7937605381011963,
      "learning_rate": 9.722198427811787e-06,
      "loss": 0.0838,
      "step": 104320
    },
    {
      "epoch": 0.1707546984544687,
      "grad_norm": 1.3267714977264404,
      "learning_rate": 9.722132535598269e-06,
      "loss": 0.0776,
      "step": 104340
    },
    {
      "epoch": 0.170787428893122,
      "grad_norm": 3.5924794673919678,
      "learning_rate": 9.722066643384753e-06,
      "loss": 0.0773,
      "step": 104360
    },
    {
      "epoch": 0.17082015933177536,
      "grad_norm": 5.238176345825195,
      "learning_rate": 9.722000751171235e-06,
      "loss": 0.0663,
      "step": 104380
    },
    {
      "epoch": 0.1708528897704287,
      "grad_norm": 2.636449098587036,
      "learning_rate": 9.721934858957718e-06,
      "loss": 0.0806,
      "step": 104400
    },
    {
      "epoch": 0.17088562020908205,
      "grad_norm": 3.014927387237549,
      "learning_rate": 9.7218689667442e-06,
      "loss": 0.065,
      "step": 104420
    },
    {
      "epoch": 0.1709183506477354,
      "grad_norm": 1.0835864543914795,
      "learning_rate": 9.721803074530684e-06,
      "loss": 0.0576,
      "step": 104440
    },
    {
      "epoch": 0.1709510810863887,
      "grad_norm": 2.350006341934204,
      "learning_rate": 9.721737182317167e-06,
      "loss": 0.0697,
      "step": 104460
    },
    {
      "epoch": 0.17098381152504205,
      "grad_norm": 1.920960783958435,
      "learning_rate": 9.72167129010365e-06,
      "loss": 0.0649,
      "step": 104480
    },
    {
      "epoch": 0.1710165419636954,
      "grad_norm": 1.327040195465088,
      "learning_rate": 9.721605397890133e-06,
      "loss": 0.0606,
      "step": 104500
    },
    {
      "epoch": 0.17104927240234874,
      "grad_norm": 3.072736978530884,
      "learning_rate": 9.721539505676615e-06,
      "loss": 0.0819,
      "step": 104520
    },
    {
      "epoch": 0.17108200284100208,
      "grad_norm": 13.529765129089355,
      "learning_rate": 9.721473613463098e-06,
      "loss": 0.0761,
      "step": 104540
    },
    {
      "epoch": 0.1711147332796554,
      "grad_norm": 1.8850305080413818,
      "learning_rate": 9.72140772124958e-06,
      "loss": 0.0737,
      "step": 104560
    },
    {
      "epoch": 0.17114746371830875,
      "grad_norm": 6.951547622680664,
      "learning_rate": 9.721341829036064e-06,
      "loss": 0.0633,
      "step": 104580
    },
    {
      "epoch": 0.1711801941569621,
      "grad_norm": 3.1329996585845947,
      "learning_rate": 9.721275936822546e-06,
      "loss": 0.0605,
      "step": 104600
    },
    {
      "epoch": 0.17121292459561543,
      "grad_norm": 3.006737232208252,
      "learning_rate": 9.72121004460903e-06,
      "loss": 0.0914,
      "step": 104620
    },
    {
      "epoch": 0.17124565503426878,
      "grad_norm": 1.0879310369491577,
      "learning_rate": 9.721144152395511e-06,
      "loss": 0.0529,
      "step": 104640
    },
    {
      "epoch": 0.1712783854729221,
      "grad_norm": 3.6112747192382812,
      "learning_rate": 9.721078260181995e-06,
      "loss": 0.063,
      "step": 104660
    },
    {
      "epoch": 0.17131111591157544,
      "grad_norm": 1.8946818113327026,
      "learning_rate": 9.721012367968478e-06,
      "loss": 0.0826,
      "step": 104680
    },
    {
      "epoch": 0.17134384635022878,
      "grad_norm": 4.441737651824951,
      "learning_rate": 9.72094647575496e-06,
      "loss": 0.0691,
      "step": 104700
    },
    {
      "epoch": 0.17137657678888213,
      "grad_norm": 1.386013150215149,
      "learning_rate": 9.720880583541444e-06,
      "loss": 0.0554,
      "step": 104720
    },
    {
      "epoch": 0.17140930722753547,
      "grad_norm": 3.646411657333374,
      "learning_rate": 9.720814691327927e-06,
      "loss": 0.0628,
      "step": 104740
    },
    {
      "epoch": 0.1714420376661888,
      "grad_norm": 2.3033030033111572,
      "learning_rate": 9.72074879911441e-06,
      "loss": 0.0698,
      "step": 104760
    },
    {
      "epoch": 0.17147476810484213,
      "grad_norm": 0.47988712787628174,
      "learning_rate": 9.720682906900893e-06,
      "loss": 0.0637,
      "step": 104780
    },
    {
      "epoch": 0.17150749854349548,
      "grad_norm": 4.661169528961182,
      "learning_rate": 9.720617014687376e-06,
      "loss": 0.0758,
      "step": 104800
    },
    {
      "epoch": 0.17154022898214882,
      "grad_norm": 2.745770215988159,
      "learning_rate": 9.720551122473858e-06,
      "loss": 0.0669,
      "step": 104820
    },
    {
      "epoch": 0.17157295942080217,
      "grad_norm": 3.783247947692871,
      "learning_rate": 9.720485230260342e-06,
      "loss": 0.0785,
      "step": 104840
    },
    {
      "epoch": 0.17160568985945548,
      "grad_norm": 1.7825345993041992,
      "learning_rate": 9.720419338046824e-06,
      "loss": 0.0508,
      "step": 104860
    },
    {
      "epoch": 0.17163842029810883,
      "grad_norm": 2.5768468379974365,
      "learning_rate": 9.720353445833307e-06,
      "loss": 0.067,
      "step": 104880
    },
    {
      "epoch": 0.17167115073676217,
      "grad_norm": 2.8134074211120605,
      "learning_rate": 9.72028755361979e-06,
      "loss": 0.0707,
      "step": 104900
    },
    {
      "epoch": 0.17170388117541552,
      "grad_norm": 6.133582592010498,
      "learning_rate": 9.720221661406273e-06,
      "loss": 0.053,
      "step": 104920
    },
    {
      "epoch": 0.17173661161406886,
      "grad_norm": 1.936539888381958,
      "learning_rate": 9.720155769192755e-06,
      "loss": 0.0752,
      "step": 104940
    },
    {
      "epoch": 0.17176934205272218,
      "grad_norm": 4.80610990524292,
      "learning_rate": 9.720089876979238e-06,
      "loss": 0.0689,
      "step": 104960
    },
    {
      "epoch": 0.17180207249137552,
      "grad_norm": 3.9530038833618164,
      "learning_rate": 9.72002398476572e-06,
      "loss": 0.0577,
      "step": 104980
    },
    {
      "epoch": 0.17183480293002887,
      "grad_norm": 3.4056155681610107,
      "learning_rate": 9.719958092552204e-06,
      "loss": 0.0684,
      "step": 105000
    },
    {
      "epoch": 0.1718675333686822,
      "grad_norm": 1.0993767976760864,
      "learning_rate": 9.719892200338686e-06,
      "loss": 0.0722,
      "step": 105020
    },
    {
      "epoch": 0.17190026380733556,
      "grad_norm": 0.8555541038513184,
      "learning_rate": 9.71982630812517e-06,
      "loss": 0.0657,
      "step": 105040
    },
    {
      "epoch": 0.17193299424598887,
      "grad_norm": 7.066176414489746,
      "learning_rate": 9.719760415911653e-06,
      "loss": 0.0834,
      "step": 105060
    },
    {
      "epoch": 0.17196572468464222,
      "grad_norm": 2.9781997203826904,
      "learning_rate": 9.719694523698135e-06,
      "loss": 0.0634,
      "step": 105080
    },
    {
      "epoch": 0.17199845512329556,
      "grad_norm": 1.8878307342529297,
      "learning_rate": 9.719628631484618e-06,
      "loss": 0.0797,
      "step": 105100
    },
    {
      "epoch": 0.1720311855619489,
      "grad_norm": 2.562161684036255,
      "learning_rate": 9.719562739271102e-06,
      "loss": 0.0848,
      "step": 105120
    },
    {
      "epoch": 0.17206391600060225,
      "grad_norm": 4.4257731437683105,
      "learning_rate": 9.719496847057584e-06,
      "loss": 0.0663,
      "step": 105140
    },
    {
      "epoch": 0.17209664643925557,
      "grad_norm": 3.411176919937134,
      "learning_rate": 9.719430954844067e-06,
      "loss": 0.0654,
      "step": 105160
    },
    {
      "epoch": 0.1721293768779089,
      "grad_norm": 1.4701042175292969,
      "learning_rate": 9.719365062630551e-06,
      "loss": 0.0755,
      "step": 105180
    },
    {
      "epoch": 0.17216210731656226,
      "grad_norm": 1.6414624452590942,
      "learning_rate": 9.719299170417033e-06,
      "loss": 0.0678,
      "step": 105200
    },
    {
      "epoch": 0.1721948377552156,
      "grad_norm": 3.687344789505005,
      "learning_rate": 9.719233278203516e-06,
      "loss": 0.0792,
      "step": 105220
    },
    {
      "epoch": 0.17222756819386895,
      "grad_norm": 3.894249200820923,
      "learning_rate": 9.719167385989998e-06,
      "loss": 0.0735,
      "step": 105240
    },
    {
      "epoch": 0.17226029863252226,
      "grad_norm": 14.763866424560547,
      "learning_rate": 9.719101493776482e-06,
      "loss": 0.0786,
      "step": 105260
    },
    {
      "epoch": 0.1722930290711756,
      "grad_norm": 1.0681852102279663,
      "learning_rate": 9.719035601562964e-06,
      "loss": 0.0547,
      "step": 105280
    },
    {
      "epoch": 0.17232575950982895,
      "grad_norm": 4.608088970184326,
      "learning_rate": 9.718969709349447e-06,
      "loss": 0.0722,
      "step": 105300
    },
    {
      "epoch": 0.1723584899484823,
      "grad_norm": 2.708775043487549,
      "learning_rate": 9.71890381713593e-06,
      "loss": 0.0725,
      "step": 105320
    },
    {
      "epoch": 0.17239122038713564,
      "grad_norm": 4.118777275085449,
      "learning_rate": 9.718837924922413e-06,
      "loss": 0.0683,
      "step": 105340
    },
    {
      "epoch": 0.17242395082578896,
      "grad_norm": 2.675088405609131,
      "learning_rate": 9.718772032708895e-06,
      "loss": 0.0681,
      "step": 105360
    },
    {
      "epoch": 0.1724566812644423,
      "grad_norm": 2.2747650146484375,
      "learning_rate": 9.718706140495378e-06,
      "loss": 0.0587,
      "step": 105380
    },
    {
      "epoch": 0.17248941170309565,
      "grad_norm": 2.3081963062286377,
      "learning_rate": 9.71864024828186e-06,
      "loss": 0.0727,
      "step": 105400
    },
    {
      "epoch": 0.172522142141749,
      "grad_norm": 3.6664040088653564,
      "learning_rate": 9.718574356068344e-06,
      "loss": 0.0577,
      "step": 105420
    },
    {
      "epoch": 0.17255487258040234,
      "grad_norm": 3.4993996620178223,
      "learning_rate": 9.718508463854827e-06,
      "loss": 0.0731,
      "step": 105440
    },
    {
      "epoch": 0.17258760301905565,
      "grad_norm": 3.9867169857025146,
      "learning_rate": 9.71844257164131e-06,
      "loss": 0.0835,
      "step": 105460
    },
    {
      "epoch": 0.172620333457709,
      "grad_norm": 1.7225706577301025,
      "learning_rate": 9.718376679427793e-06,
      "loss": 0.0752,
      "step": 105480
    },
    {
      "epoch": 0.17265306389636234,
      "grad_norm": 1.839713215827942,
      "learning_rate": 9.718310787214277e-06,
      "loss": 0.0754,
      "step": 105500
    },
    {
      "epoch": 0.17268579433501569,
      "grad_norm": 3.1923043727874756,
      "learning_rate": 9.718244895000758e-06,
      "loss": 0.0714,
      "step": 105520
    },
    {
      "epoch": 0.17271852477366903,
      "grad_norm": 1.1659603118896484,
      "learning_rate": 9.718179002787242e-06,
      "loss": 0.0664,
      "step": 105540
    },
    {
      "epoch": 0.17275125521232235,
      "grad_norm": 2.3388359546661377,
      "learning_rate": 9.718113110573726e-06,
      "loss": 0.0691,
      "step": 105560
    },
    {
      "epoch": 0.1727839856509757,
      "grad_norm": 15.826678276062012,
      "learning_rate": 9.718047218360207e-06,
      "loss": 0.085,
      "step": 105580
    },
    {
      "epoch": 0.17281671608962904,
      "grad_norm": 0.9760979413986206,
      "learning_rate": 9.717981326146691e-06,
      "loss": 0.0603,
      "step": 105600
    },
    {
      "epoch": 0.17284944652828238,
      "grad_norm": 1.3010272979736328,
      "learning_rate": 9.717915433933173e-06,
      "loss": 0.0619,
      "step": 105620
    },
    {
      "epoch": 0.17288217696693572,
      "grad_norm": 2.5579562187194824,
      "learning_rate": 9.717849541719657e-06,
      "loss": 0.0652,
      "step": 105640
    },
    {
      "epoch": 0.17291490740558904,
      "grad_norm": 1.8571174144744873,
      "learning_rate": 9.717783649506138e-06,
      "loss": 0.0704,
      "step": 105660
    },
    {
      "epoch": 0.17294763784424239,
      "grad_norm": 1.3800840377807617,
      "learning_rate": 9.717717757292622e-06,
      "loss": 0.0721,
      "step": 105680
    },
    {
      "epoch": 0.17298036828289573,
      "grad_norm": 1.6169062852859497,
      "learning_rate": 9.717651865079104e-06,
      "loss": 0.083,
      "step": 105700
    },
    {
      "epoch": 0.17301309872154907,
      "grad_norm": 4.72258996963501,
      "learning_rate": 9.717585972865587e-06,
      "loss": 0.0609,
      "step": 105720
    },
    {
      "epoch": 0.1730458291602024,
      "grad_norm": 2.7101972103118896,
      "learning_rate": 9.71752008065207e-06,
      "loss": 0.0802,
      "step": 105740
    },
    {
      "epoch": 0.17307855959885574,
      "grad_norm": 0.8563096523284912,
      "learning_rate": 9.717454188438553e-06,
      "loss": 0.0702,
      "step": 105760
    },
    {
      "epoch": 0.17311129003750908,
      "grad_norm": 1.684800148010254,
      "learning_rate": 9.717388296225035e-06,
      "loss": 0.0711,
      "step": 105780
    },
    {
      "epoch": 0.17314402047616242,
      "grad_norm": 3.3963327407836914,
      "learning_rate": 9.717322404011518e-06,
      "loss": 0.0645,
      "step": 105800
    },
    {
      "epoch": 0.17317675091481577,
      "grad_norm": 1.2648028135299683,
      "learning_rate": 9.717256511798e-06,
      "loss": 0.0647,
      "step": 105820
    },
    {
      "epoch": 0.17320948135346909,
      "grad_norm": 1.822887659072876,
      "learning_rate": 9.717190619584484e-06,
      "loss": 0.0795,
      "step": 105840
    },
    {
      "epoch": 0.17324221179212243,
      "grad_norm": 3.4157204627990723,
      "learning_rate": 9.717124727370968e-06,
      "loss": 0.0571,
      "step": 105860
    },
    {
      "epoch": 0.17327494223077577,
      "grad_norm": 3.6051528453826904,
      "learning_rate": 9.71705883515745e-06,
      "loss": 0.0862,
      "step": 105880
    },
    {
      "epoch": 0.17330767266942912,
      "grad_norm": 4.908985137939453,
      "learning_rate": 9.716992942943933e-06,
      "loss": 0.0544,
      "step": 105900
    },
    {
      "epoch": 0.17334040310808246,
      "grad_norm": 2.5812573432922363,
      "learning_rate": 9.716927050730417e-06,
      "loss": 0.0595,
      "step": 105920
    },
    {
      "epoch": 0.17337313354673578,
      "grad_norm": 2.5146372318267822,
      "learning_rate": 9.716861158516898e-06,
      "loss": 0.0802,
      "step": 105940
    },
    {
      "epoch": 0.17340586398538912,
      "grad_norm": 3.109506130218506,
      "learning_rate": 9.716795266303382e-06,
      "loss": 0.0651,
      "step": 105960
    },
    {
      "epoch": 0.17343859442404247,
      "grad_norm": 3.853627920150757,
      "learning_rate": 9.716729374089866e-06,
      "loss": 0.0829,
      "step": 105980
    },
    {
      "epoch": 0.1734713248626958,
      "grad_norm": 2.3053455352783203,
      "learning_rate": 9.716663481876348e-06,
      "loss": 0.0921,
      "step": 106000
    },
    {
      "epoch": 0.17350405530134916,
      "grad_norm": 3.264620304107666,
      "learning_rate": 9.716597589662831e-06,
      "loss": 0.0659,
      "step": 106020
    },
    {
      "epoch": 0.17353678574000247,
      "grad_norm": 2.1281399726867676,
      "learning_rate": 9.716531697449313e-06,
      "loss": 0.0582,
      "step": 106040
    },
    {
      "epoch": 0.17356951617865582,
      "grad_norm": 2.705217123031616,
      "learning_rate": 9.716465805235797e-06,
      "loss": 0.0798,
      "step": 106060
    },
    {
      "epoch": 0.17360224661730916,
      "grad_norm": 4.150274753570557,
      "learning_rate": 9.716399913022279e-06,
      "loss": 0.0833,
      "step": 106080
    },
    {
      "epoch": 0.1736349770559625,
      "grad_norm": 2.795225143432617,
      "learning_rate": 9.716334020808762e-06,
      "loss": 0.1012,
      "step": 106100
    },
    {
      "epoch": 0.17366770749461585,
      "grad_norm": 5.769581317901611,
      "learning_rate": 9.716268128595244e-06,
      "loss": 0.0769,
      "step": 106120
    },
    {
      "epoch": 0.17370043793326917,
      "grad_norm": 3.121375560760498,
      "learning_rate": 9.716202236381728e-06,
      "loss": 0.0682,
      "step": 106140
    },
    {
      "epoch": 0.17373316837192251,
      "grad_norm": 3.1760475635528564,
      "learning_rate": 9.71613634416821e-06,
      "loss": 0.0653,
      "step": 106160
    },
    {
      "epoch": 0.17376589881057586,
      "grad_norm": 2.202352523803711,
      "learning_rate": 9.716070451954693e-06,
      "loss": 0.0614,
      "step": 106180
    },
    {
      "epoch": 0.1737986292492292,
      "grad_norm": 31.119136810302734,
      "learning_rate": 9.716004559741177e-06,
      "loss": 0.067,
      "step": 106200
    },
    {
      "epoch": 0.17383135968788255,
      "grad_norm": 2.8056132793426514,
      "learning_rate": 9.715938667527659e-06,
      "loss": 0.0611,
      "step": 106220
    },
    {
      "epoch": 0.17386409012653586,
      "grad_norm": 2.1455583572387695,
      "learning_rate": 9.715872775314142e-06,
      "loss": 0.0541,
      "step": 106240
    },
    {
      "epoch": 0.1738968205651892,
      "grad_norm": 5.755025386810303,
      "learning_rate": 9.715806883100624e-06,
      "loss": 0.0601,
      "step": 106260
    },
    {
      "epoch": 0.17392955100384255,
      "grad_norm": 2.3574299812316895,
      "learning_rate": 9.715740990887108e-06,
      "loss": 0.083,
      "step": 106280
    },
    {
      "epoch": 0.1739622814424959,
      "grad_norm": 0.38089826703071594,
      "learning_rate": 9.715675098673591e-06,
      "loss": 0.066,
      "step": 106300
    },
    {
      "epoch": 0.17399501188114924,
      "grad_norm": 2.693840980529785,
      "learning_rate": 9.715609206460073e-06,
      "loss": 0.0606,
      "step": 106320
    },
    {
      "epoch": 0.17402774231980256,
      "grad_norm": 3.041945457458496,
      "learning_rate": 9.715543314246557e-06,
      "loss": 0.0785,
      "step": 106340
    },
    {
      "epoch": 0.1740604727584559,
      "grad_norm": 2.5334699153900146,
      "learning_rate": 9.71547742203304e-06,
      "loss": 0.055,
      "step": 106360
    },
    {
      "epoch": 0.17409320319710925,
      "grad_norm": 2.173159599304199,
      "learning_rate": 9.715411529819522e-06,
      "loss": 0.0859,
      "step": 106380
    },
    {
      "epoch": 0.1741259336357626,
      "grad_norm": 4.5568342208862305,
      "learning_rate": 9.715345637606006e-06,
      "loss": 0.0791,
      "step": 106400
    },
    {
      "epoch": 0.17415866407441594,
      "grad_norm": 3.662813425064087,
      "learning_rate": 9.715279745392488e-06,
      "loss": 0.0688,
      "step": 106420
    },
    {
      "epoch": 0.17419139451306925,
      "grad_norm": 5.467440128326416,
      "learning_rate": 9.715213853178971e-06,
      "loss": 0.053,
      "step": 106440
    },
    {
      "epoch": 0.1742241249517226,
      "grad_norm": 4.002991199493408,
      "learning_rate": 9.715147960965453e-06,
      "loss": 0.0456,
      "step": 106460
    },
    {
      "epoch": 0.17425685539037594,
      "grad_norm": 3.8132436275482178,
      "learning_rate": 9.715082068751937e-06,
      "loss": 0.0657,
      "step": 106480
    },
    {
      "epoch": 0.1742895858290293,
      "grad_norm": 3.8097171783447266,
      "learning_rate": 9.715016176538419e-06,
      "loss": 0.0644,
      "step": 106500
    },
    {
      "epoch": 0.17432231626768263,
      "grad_norm": 1.475093960762024,
      "learning_rate": 9.714950284324902e-06,
      "loss": 0.0701,
      "step": 106520
    },
    {
      "epoch": 0.17435504670633595,
      "grad_norm": 2.0443451404571533,
      "learning_rate": 9.714884392111384e-06,
      "loss": 0.0527,
      "step": 106540
    },
    {
      "epoch": 0.1743877771449893,
      "grad_norm": 2.597106456756592,
      "learning_rate": 9.714818499897868e-06,
      "loss": 0.0717,
      "step": 106560
    },
    {
      "epoch": 0.17442050758364264,
      "grad_norm": 1.8577051162719727,
      "learning_rate": 9.714752607684351e-06,
      "loss": 0.0442,
      "step": 106580
    },
    {
      "epoch": 0.17445323802229598,
      "grad_norm": 2.663717031478882,
      "learning_rate": 9.714686715470833e-06,
      "loss": 0.0721,
      "step": 106600
    },
    {
      "epoch": 0.17448596846094933,
      "grad_norm": 2.775064706802368,
      "learning_rate": 9.714620823257317e-06,
      "loss": 0.0644,
      "step": 106620
    },
    {
      "epoch": 0.17451869889960264,
      "grad_norm": 1.5264719724655151,
      "learning_rate": 9.714554931043799e-06,
      "loss": 0.0765,
      "step": 106640
    },
    {
      "epoch": 0.174551429338256,
      "grad_norm": 3.976706027984619,
      "learning_rate": 9.714489038830282e-06,
      "loss": 0.0666,
      "step": 106660
    },
    {
      "epoch": 0.17458415977690933,
      "grad_norm": 4.610646724700928,
      "learning_rate": 9.714423146616764e-06,
      "loss": 0.0737,
      "step": 106680
    },
    {
      "epoch": 0.17461689021556268,
      "grad_norm": 5.041335582733154,
      "learning_rate": 9.714357254403248e-06,
      "loss": 0.0824,
      "step": 106700
    },
    {
      "epoch": 0.17464962065421602,
      "grad_norm": 2.943953275680542,
      "learning_rate": 9.714291362189731e-06,
      "loss": 0.0761,
      "step": 106720
    },
    {
      "epoch": 0.17468235109286934,
      "grad_norm": 2.951869249343872,
      "learning_rate": 9.714225469976213e-06,
      "loss": 0.065,
      "step": 106740
    },
    {
      "epoch": 0.17471508153152268,
      "grad_norm": 1.091613531112671,
      "learning_rate": 9.714159577762697e-06,
      "loss": 0.0624,
      "step": 106760
    },
    {
      "epoch": 0.17474781197017603,
      "grad_norm": 4.173067092895508,
      "learning_rate": 9.71409368554918e-06,
      "loss": 0.076,
      "step": 106780
    },
    {
      "epoch": 0.17478054240882937,
      "grad_norm": 3.218721389770508,
      "learning_rate": 9.714027793335662e-06,
      "loss": 0.0735,
      "step": 106800
    },
    {
      "epoch": 0.17481327284748271,
      "grad_norm": 0.3558299243450165,
      "learning_rate": 9.713961901122146e-06,
      "loss": 0.1018,
      "step": 106820
    },
    {
      "epoch": 0.17484600328613603,
      "grad_norm": 1.440813422203064,
      "learning_rate": 9.713896008908628e-06,
      "loss": 0.0688,
      "step": 106840
    },
    {
      "epoch": 0.17487873372478938,
      "grad_norm": 1.8712166547775269,
      "learning_rate": 9.713830116695111e-06,
      "loss": 0.0557,
      "step": 106860
    },
    {
      "epoch": 0.17491146416344272,
      "grad_norm": 1.5744125843048096,
      "learning_rate": 9.713764224481593e-06,
      "loss": 0.0584,
      "step": 106880
    },
    {
      "epoch": 0.17494419460209606,
      "grad_norm": 6.583874225616455,
      "learning_rate": 9.713698332268077e-06,
      "loss": 0.0612,
      "step": 106900
    },
    {
      "epoch": 0.1749769250407494,
      "grad_norm": 4.250274181365967,
      "learning_rate": 9.71363244005456e-06,
      "loss": 0.0652,
      "step": 106920
    },
    {
      "epoch": 0.17500965547940273,
      "grad_norm": 3.309758424758911,
      "learning_rate": 9.713566547841042e-06,
      "loss": 0.0793,
      "step": 106940
    },
    {
      "epoch": 0.17504238591805607,
      "grad_norm": 2.1508677005767822,
      "learning_rate": 9.713500655627526e-06,
      "loss": 0.084,
      "step": 106960
    },
    {
      "epoch": 0.17507511635670941,
      "grad_norm": 2.5806093215942383,
      "learning_rate": 9.713434763414008e-06,
      "loss": 0.0759,
      "step": 106980
    },
    {
      "epoch": 0.17510784679536276,
      "grad_norm": 1.5717004537582397,
      "learning_rate": 9.713368871200491e-06,
      "loss": 0.0782,
      "step": 107000
    },
    {
      "epoch": 0.1751405772340161,
      "grad_norm": 2.906412363052368,
      "learning_rate": 9.713302978986973e-06,
      "loss": 0.0699,
      "step": 107020
    },
    {
      "epoch": 0.17517330767266942,
      "grad_norm": 2.793264865875244,
      "learning_rate": 9.713237086773457e-06,
      "loss": 0.0501,
      "step": 107040
    },
    {
      "epoch": 0.17520603811132276,
      "grad_norm": 2.068411111831665,
      "learning_rate": 9.713171194559939e-06,
      "loss": 0.0551,
      "step": 107060
    },
    {
      "epoch": 0.1752387685499761,
      "grad_norm": 3.773615598678589,
      "learning_rate": 9.713105302346422e-06,
      "loss": 0.0694,
      "step": 107080
    },
    {
      "epoch": 0.17527149898862945,
      "grad_norm": 1.2935450077056885,
      "learning_rate": 9.713039410132906e-06,
      "loss": 0.0663,
      "step": 107100
    },
    {
      "epoch": 0.17530422942728277,
      "grad_norm": 1.8258988857269287,
      "learning_rate": 9.712973517919388e-06,
      "loss": 0.0697,
      "step": 107120
    },
    {
      "epoch": 0.17533695986593612,
      "grad_norm": 1.9281468391418457,
      "learning_rate": 9.712907625705871e-06,
      "loss": 0.0506,
      "step": 107140
    },
    {
      "epoch": 0.17536969030458946,
      "grad_norm": 2.2949352264404297,
      "learning_rate": 9.712841733492355e-06,
      "loss": 0.0459,
      "step": 107160
    },
    {
      "epoch": 0.1754024207432428,
      "grad_norm": 8.080209732055664,
      "learning_rate": 9.712775841278837e-06,
      "loss": 0.0656,
      "step": 107180
    },
    {
      "epoch": 0.17543515118189615,
      "grad_norm": 2.9802780151367188,
      "learning_rate": 9.71270994906532e-06,
      "loss": 0.0789,
      "step": 107200
    },
    {
      "epoch": 0.17546788162054947,
      "grad_norm": 2.710819959640503,
      "learning_rate": 9.712644056851802e-06,
      "loss": 0.0815,
      "step": 107220
    },
    {
      "epoch": 0.1755006120592028,
      "grad_norm": 1.1865383386611938,
      "learning_rate": 9.712578164638286e-06,
      "loss": 0.0642,
      "step": 107240
    },
    {
      "epoch": 0.17553334249785615,
      "grad_norm": 4.386138916015625,
      "learning_rate": 9.71251227242477e-06,
      "loss": 0.0569,
      "step": 107260
    },
    {
      "epoch": 0.1755660729365095,
      "grad_norm": 3.389500617980957,
      "learning_rate": 9.712446380211251e-06,
      "loss": 0.0668,
      "step": 107280
    },
    {
      "epoch": 0.17559880337516284,
      "grad_norm": 1.243593692779541,
      "learning_rate": 9.712380487997735e-06,
      "loss": 0.059,
      "step": 107300
    },
    {
      "epoch": 0.17563153381381616,
      "grad_norm": 1.870561122894287,
      "learning_rate": 9.712314595784217e-06,
      "loss": 0.0584,
      "step": 107320
    },
    {
      "epoch": 0.1756642642524695,
      "grad_norm": 2.9953904151916504,
      "learning_rate": 9.7122487035707e-06,
      "loss": 0.0347,
      "step": 107340
    },
    {
      "epoch": 0.17569699469112285,
      "grad_norm": 5.042973518371582,
      "learning_rate": 9.712182811357182e-06,
      "loss": 0.0681,
      "step": 107360
    },
    {
      "epoch": 0.1757297251297762,
      "grad_norm": 3.2537763118743896,
      "learning_rate": 9.712116919143666e-06,
      "loss": 0.0541,
      "step": 107380
    },
    {
      "epoch": 0.17576245556842954,
      "grad_norm": 2.2266805171966553,
      "learning_rate": 9.712051026930148e-06,
      "loss": 0.0771,
      "step": 107400
    },
    {
      "epoch": 0.17579518600708285,
      "grad_norm": 2.6690514087677,
      "learning_rate": 9.711985134716631e-06,
      "loss": 0.0836,
      "step": 107420
    },
    {
      "epoch": 0.1758279164457362,
      "grad_norm": 3.154573917388916,
      "learning_rate": 9.711919242503113e-06,
      "loss": 0.0625,
      "step": 107440
    },
    {
      "epoch": 0.17586064688438954,
      "grad_norm": 1.7715237140655518,
      "learning_rate": 9.711853350289597e-06,
      "loss": 0.0706,
      "step": 107460
    },
    {
      "epoch": 0.1758933773230429,
      "grad_norm": 2.2302908897399902,
      "learning_rate": 9.711787458076079e-06,
      "loss": 0.0659,
      "step": 107480
    },
    {
      "epoch": 0.17592610776169623,
      "grad_norm": 1.1368193626403809,
      "learning_rate": 9.711721565862562e-06,
      "loss": 0.0506,
      "step": 107500
    },
    {
      "epoch": 0.17595883820034955,
      "grad_norm": 3.4762916564941406,
      "learning_rate": 9.711655673649046e-06,
      "loss": 0.0603,
      "step": 107520
    },
    {
      "epoch": 0.1759915686390029,
      "grad_norm": 2.6736977100372314,
      "learning_rate": 9.711589781435528e-06,
      "loss": 0.0744,
      "step": 107540
    },
    {
      "epoch": 0.17602429907765624,
      "grad_norm": 4.499207496643066,
      "learning_rate": 9.711523889222011e-06,
      "loss": 0.064,
      "step": 107560
    },
    {
      "epoch": 0.17605702951630958,
      "grad_norm": 1.0156400203704834,
      "learning_rate": 9.711457997008495e-06,
      "loss": 0.0527,
      "step": 107580
    },
    {
      "epoch": 0.17608975995496293,
      "grad_norm": 3.1578996181488037,
      "learning_rate": 9.711392104794977e-06,
      "loss": 0.067,
      "step": 107600
    },
    {
      "epoch": 0.17612249039361624,
      "grad_norm": 9.065412521362305,
      "learning_rate": 9.71132621258146e-06,
      "loss": 0.0573,
      "step": 107620
    },
    {
      "epoch": 0.1761552208322696,
      "grad_norm": 0.7521172165870667,
      "learning_rate": 9.711260320367944e-06,
      "loss": 0.0599,
      "step": 107640
    },
    {
      "epoch": 0.17618795127092293,
      "grad_norm": 2.4200243949890137,
      "learning_rate": 9.711194428154426e-06,
      "loss": 0.0574,
      "step": 107660
    },
    {
      "epoch": 0.17622068170957628,
      "grad_norm": 4.591860771179199,
      "learning_rate": 9.71112853594091e-06,
      "loss": 0.0714,
      "step": 107680
    },
    {
      "epoch": 0.17625341214822962,
      "grad_norm": 1.2132385969161987,
      "learning_rate": 9.711062643727391e-06,
      "loss": 0.0696,
      "step": 107700
    },
    {
      "epoch": 0.17628614258688294,
      "grad_norm": 1.8803001642227173,
      "learning_rate": 9.710996751513875e-06,
      "loss": 0.0746,
      "step": 107720
    },
    {
      "epoch": 0.17631887302553628,
      "grad_norm": 4.825801372528076,
      "learning_rate": 9.710930859300357e-06,
      "loss": 0.0561,
      "step": 107740
    },
    {
      "epoch": 0.17635160346418963,
      "grad_norm": 3.2270240783691406,
      "learning_rate": 9.71086496708684e-06,
      "loss": 0.0643,
      "step": 107760
    },
    {
      "epoch": 0.17638433390284297,
      "grad_norm": 1.6874333620071411,
      "learning_rate": 9.710799074873322e-06,
      "loss": 0.0673,
      "step": 107780
    },
    {
      "epoch": 0.17641706434149632,
      "grad_norm": 2.1408283710479736,
      "learning_rate": 9.710733182659806e-06,
      "loss": 0.071,
      "step": 107800
    },
    {
      "epoch": 0.17644979478014963,
      "grad_norm": 3.432217597961426,
      "learning_rate": 9.710667290446288e-06,
      "loss": 0.0761,
      "step": 107820
    },
    {
      "epoch": 0.17648252521880298,
      "grad_norm": 3.181103229522705,
      "learning_rate": 9.710601398232771e-06,
      "loss": 0.0506,
      "step": 107840
    },
    {
      "epoch": 0.17651525565745632,
      "grad_norm": 1.5001405477523804,
      "learning_rate": 9.710535506019253e-06,
      "loss": 0.0601,
      "step": 107860
    },
    {
      "epoch": 0.17654798609610967,
      "grad_norm": 4.606447219848633,
      "learning_rate": 9.710469613805737e-06,
      "loss": 0.0532,
      "step": 107880
    },
    {
      "epoch": 0.176580716534763,
      "grad_norm": 2.193758964538574,
      "learning_rate": 9.71040372159222e-06,
      "loss": 0.0848,
      "step": 107900
    },
    {
      "epoch": 0.17661344697341633,
      "grad_norm": 4.55067253112793,
      "learning_rate": 9.710337829378702e-06,
      "loss": 0.0476,
      "step": 107920
    },
    {
      "epoch": 0.17664617741206967,
      "grad_norm": 5.8469414710998535,
      "learning_rate": 9.710271937165186e-06,
      "loss": 0.0636,
      "step": 107940
    },
    {
      "epoch": 0.17667890785072302,
      "grad_norm": 0.3696379065513611,
      "learning_rate": 9.71020604495167e-06,
      "loss": 0.0929,
      "step": 107960
    },
    {
      "epoch": 0.17671163828937636,
      "grad_norm": 1.3905837535858154,
      "learning_rate": 9.710140152738151e-06,
      "loss": 0.0625,
      "step": 107980
    },
    {
      "epoch": 0.1767443687280297,
      "grad_norm": 2.0996177196502686,
      "learning_rate": 9.710074260524635e-06,
      "loss": 0.0686,
      "step": 108000
    },
    {
      "epoch": 0.17677709916668302,
      "grad_norm": 45.92568588256836,
      "learning_rate": 9.710008368311119e-06,
      "loss": 0.0674,
      "step": 108020
    },
    {
      "epoch": 0.17680982960533637,
      "grad_norm": 1.7336318492889404,
      "learning_rate": 9.7099424760976e-06,
      "loss": 0.0619,
      "step": 108040
    },
    {
      "epoch": 0.1768425600439897,
      "grad_norm": 1.1317672729492188,
      "learning_rate": 9.709876583884084e-06,
      "loss": 0.0771,
      "step": 108060
    },
    {
      "epoch": 0.17687529048264305,
      "grad_norm": 3.025123357772827,
      "learning_rate": 9.709810691670566e-06,
      "loss": 0.0719,
      "step": 108080
    },
    {
      "epoch": 0.1769080209212964,
      "grad_norm": 1.5014536380767822,
      "learning_rate": 9.70974479945705e-06,
      "loss": 0.0626,
      "step": 108100
    },
    {
      "epoch": 0.17694075135994972,
      "grad_norm": 4.209896564483643,
      "learning_rate": 9.709678907243532e-06,
      "loss": 0.0741,
      "step": 108120
    },
    {
      "epoch": 0.17697348179860306,
      "grad_norm": 0.5091542601585388,
      "learning_rate": 9.709613015030015e-06,
      "loss": 0.0739,
      "step": 108140
    },
    {
      "epoch": 0.1770062122372564,
      "grad_norm": 2.120424509048462,
      "learning_rate": 9.709547122816497e-06,
      "loss": 0.0535,
      "step": 108160
    },
    {
      "epoch": 0.17703894267590975,
      "grad_norm": 5.314662933349609,
      "learning_rate": 9.70948123060298e-06,
      "loss": 0.0582,
      "step": 108180
    },
    {
      "epoch": 0.1770716731145631,
      "grad_norm": 3.38318133354187,
      "learning_rate": 9.709415338389462e-06,
      "loss": 0.0547,
      "step": 108200
    },
    {
      "epoch": 0.1771044035532164,
      "grad_norm": 1.9618581533432007,
      "learning_rate": 9.709349446175946e-06,
      "loss": 0.0611,
      "step": 108220
    },
    {
      "epoch": 0.17713713399186976,
      "grad_norm": 0.845295250415802,
      "learning_rate": 9.709283553962428e-06,
      "loss": 0.0702,
      "step": 108240
    },
    {
      "epoch": 0.1771698644305231,
      "grad_norm": 1.3106940984725952,
      "learning_rate": 9.709217661748912e-06,
      "loss": 0.0699,
      "step": 108260
    },
    {
      "epoch": 0.17720259486917644,
      "grad_norm": 1.651619553565979,
      "learning_rate": 9.709151769535395e-06,
      "loss": 0.0833,
      "step": 108280
    },
    {
      "epoch": 0.1772353253078298,
      "grad_norm": 5.836263656616211,
      "learning_rate": 9.709085877321877e-06,
      "loss": 0.0688,
      "step": 108300
    },
    {
      "epoch": 0.1772680557464831,
      "grad_norm": 3.490424156188965,
      "learning_rate": 9.70901998510836e-06,
      "loss": 0.0664,
      "step": 108320
    },
    {
      "epoch": 0.17730078618513645,
      "grad_norm": 2.5129899978637695,
      "learning_rate": 9.708954092894844e-06,
      "loss": 0.0705,
      "step": 108340
    },
    {
      "epoch": 0.1773335166237898,
      "grad_norm": 1.3331751823425293,
      "learning_rate": 9.708888200681326e-06,
      "loss": 0.0699,
      "step": 108360
    },
    {
      "epoch": 0.17736624706244314,
      "grad_norm": 2.4300429821014404,
      "learning_rate": 9.70882230846781e-06,
      "loss": 0.0776,
      "step": 108380
    },
    {
      "epoch": 0.17739897750109648,
      "grad_norm": 2.338371753692627,
      "learning_rate": 9.708756416254293e-06,
      "loss": 0.0652,
      "step": 108400
    },
    {
      "epoch": 0.1774317079397498,
      "grad_norm": 1.2426025867462158,
      "learning_rate": 9.708690524040775e-06,
      "loss": 0.0641,
      "step": 108420
    },
    {
      "epoch": 0.17746443837840314,
      "grad_norm": 0.6865028738975525,
      "learning_rate": 9.708624631827259e-06,
      "loss": 0.0663,
      "step": 108440
    },
    {
      "epoch": 0.1774971688170565,
      "grad_norm": 2.1838483810424805,
      "learning_rate": 9.70855873961374e-06,
      "loss": 0.0631,
      "step": 108460
    },
    {
      "epoch": 0.17752989925570983,
      "grad_norm": 2.545496702194214,
      "learning_rate": 9.708492847400224e-06,
      "loss": 0.0594,
      "step": 108480
    },
    {
      "epoch": 0.17756262969436318,
      "grad_norm": 1.6867713928222656,
      "learning_rate": 9.708426955186706e-06,
      "loss": 0.0554,
      "step": 108500
    },
    {
      "epoch": 0.1775953601330165,
      "grad_norm": 6.632059097290039,
      "learning_rate": 9.70836106297319e-06,
      "loss": 0.0787,
      "step": 108520
    },
    {
      "epoch": 0.17762809057166984,
      "grad_norm": 1.818253517150879,
      "learning_rate": 9.708295170759672e-06,
      "loss": 0.0741,
      "step": 108540
    },
    {
      "epoch": 0.17766082101032318,
      "grad_norm": 5.928689956665039,
      "learning_rate": 9.708229278546155e-06,
      "loss": 0.0858,
      "step": 108560
    },
    {
      "epoch": 0.17769355144897653,
      "grad_norm": 0.9826771020889282,
      "learning_rate": 9.708163386332637e-06,
      "loss": 0.0655,
      "step": 108580
    },
    {
      "epoch": 0.17772628188762984,
      "grad_norm": 1.851407766342163,
      "learning_rate": 9.70809749411912e-06,
      "loss": 0.0622,
      "step": 108600
    },
    {
      "epoch": 0.1777590123262832,
      "grad_norm": 1.3638752698898315,
      "learning_rate": 9.708031601905603e-06,
      "loss": 0.057,
      "step": 108620
    },
    {
      "epoch": 0.17779174276493653,
      "grad_norm": 6.870255947113037,
      "learning_rate": 9.707965709692086e-06,
      "loss": 0.0501,
      "step": 108640
    },
    {
      "epoch": 0.17782447320358988,
      "grad_norm": 1.8251354694366455,
      "learning_rate": 9.707899817478568e-06,
      "loss": 0.0566,
      "step": 108660
    },
    {
      "epoch": 0.17785720364224322,
      "grad_norm": 1.2951650619506836,
      "learning_rate": 9.707833925265052e-06,
      "loss": 0.0504,
      "step": 108680
    },
    {
      "epoch": 0.17788993408089654,
      "grad_norm": 4.044419765472412,
      "learning_rate": 9.707768033051535e-06,
      "loss": 0.047,
      "step": 108700
    },
    {
      "epoch": 0.17792266451954988,
      "grad_norm": 1.3311091661453247,
      "learning_rate": 9.707702140838017e-06,
      "loss": 0.0771,
      "step": 108720
    },
    {
      "epoch": 0.17795539495820323,
      "grad_norm": 2.483452320098877,
      "learning_rate": 9.7076362486245e-06,
      "loss": 0.061,
      "step": 108740
    },
    {
      "epoch": 0.17798812539685657,
      "grad_norm": 7.7946648597717285,
      "learning_rate": 9.707570356410984e-06,
      "loss": 0.0571,
      "step": 108760
    },
    {
      "epoch": 0.17802085583550992,
      "grad_norm": 1.3713268041610718,
      "learning_rate": 9.707504464197466e-06,
      "loss": 0.074,
      "step": 108780
    },
    {
      "epoch": 0.17805358627416323,
      "grad_norm": 3.582308292388916,
      "learning_rate": 9.70743857198395e-06,
      "loss": 0.0668,
      "step": 108800
    },
    {
      "epoch": 0.17808631671281658,
      "grad_norm": 3.282813549041748,
      "learning_rate": 9.707372679770433e-06,
      "loss": 0.0678,
      "step": 108820
    },
    {
      "epoch": 0.17811904715146992,
      "grad_norm": 0.8384778499603271,
      "learning_rate": 9.707306787556915e-06,
      "loss": 0.0617,
      "step": 108840
    },
    {
      "epoch": 0.17815177759012327,
      "grad_norm": 2.8424508571624756,
      "learning_rate": 9.707240895343399e-06,
      "loss": 0.0637,
      "step": 108860
    },
    {
      "epoch": 0.1781845080287766,
      "grad_norm": 4.317929744720459,
      "learning_rate": 9.70717500312988e-06,
      "loss": 0.0763,
      "step": 108880
    },
    {
      "epoch": 0.17821723846742993,
      "grad_norm": 0.6278550028800964,
      "learning_rate": 9.707109110916364e-06,
      "loss": 0.065,
      "step": 108900
    },
    {
      "epoch": 0.17824996890608327,
      "grad_norm": 3.442453384399414,
      "learning_rate": 9.707043218702846e-06,
      "loss": 0.0619,
      "step": 108920
    },
    {
      "epoch": 0.17828269934473662,
      "grad_norm": 8.467491149902344,
      "learning_rate": 9.70697732648933e-06,
      "loss": 0.0736,
      "step": 108940
    },
    {
      "epoch": 0.17831542978338996,
      "grad_norm": 3.3657796382904053,
      "learning_rate": 9.706911434275812e-06,
      "loss": 0.0616,
      "step": 108960
    },
    {
      "epoch": 0.1783481602220433,
      "grad_norm": 1.6586427688598633,
      "learning_rate": 9.706845542062295e-06,
      "loss": 0.0688,
      "step": 108980
    },
    {
      "epoch": 0.17838089066069662,
      "grad_norm": 5.23740816116333,
      "learning_rate": 9.706779649848777e-06,
      "loss": 0.0486,
      "step": 109000
    },
    {
      "epoch": 0.17841362109934997,
      "grad_norm": 4.971273899078369,
      "learning_rate": 9.70671375763526e-06,
      "loss": 0.0588,
      "step": 109020
    },
    {
      "epoch": 0.1784463515380033,
      "grad_norm": 3.5328729152679443,
      "learning_rate": 9.706647865421744e-06,
      "loss": 0.0671,
      "step": 109040
    },
    {
      "epoch": 0.17847908197665666,
      "grad_norm": 0.8580484986305237,
      "learning_rate": 9.706581973208226e-06,
      "loss": 0.0702,
      "step": 109060
    },
    {
      "epoch": 0.17851181241531,
      "grad_norm": 2.4953598976135254,
      "learning_rate": 9.70651608099471e-06,
      "loss": 0.0592,
      "step": 109080
    },
    {
      "epoch": 0.17854454285396332,
      "grad_norm": 1.4840084314346313,
      "learning_rate": 9.706450188781192e-06,
      "loss": 0.0691,
      "step": 109100
    },
    {
      "epoch": 0.17857727329261666,
      "grad_norm": 2.467590093612671,
      "learning_rate": 9.706384296567675e-06,
      "loss": 0.0757,
      "step": 109120
    },
    {
      "epoch": 0.17861000373127,
      "grad_norm": 2.071309804916382,
      "learning_rate": 9.706318404354159e-06,
      "loss": 0.0724,
      "step": 109140
    },
    {
      "epoch": 0.17864273416992335,
      "grad_norm": 2.133943557739258,
      "learning_rate": 9.70625251214064e-06,
      "loss": 0.0575,
      "step": 109160
    },
    {
      "epoch": 0.1786754646085767,
      "grad_norm": 1.5205878019332886,
      "learning_rate": 9.706186619927124e-06,
      "loss": 0.063,
      "step": 109180
    },
    {
      "epoch": 0.17870819504723,
      "grad_norm": 2.2358014583587646,
      "learning_rate": 9.706120727713608e-06,
      "loss": 0.072,
      "step": 109200
    },
    {
      "epoch": 0.17874092548588336,
      "grad_norm": 5.092596530914307,
      "learning_rate": 9.70605483550009e-06,
      "loss": 0.0662,
      "step": 109220
    },
    {
      "epoch": 0.1787736559245367,
      "grad_norm": 2.156049966812134,
      "learning_rate": 9.705988943286573e-06,
      "loss": 0.0605,
      "step": 109240
    },
    {
      "epoch": 0.17880638636319005,
      "grad_norm": 1.9720268249511719,
      "learning_rate": 9.705923051073055e-06,
      "loss": 0.0794,
      "step": 109260
    },
    {
      "epoch": 0.1788391168018434,
      "grad_norm": 5.9115471839904785,
      "learning_rate": 9.705857158859539e-06,
      "loss": 0.0653,
      "step": 109280
    },
    {
      "epoch": 0.1788718472404967,
      "grad_norm": 3.2590115070343018,
      "learning_rate": 9.70579126664602e-06,
      "loss": 0.0632,
      "step": 109300
    },
    {
      "epoch": 0.17890457767915005,
      "grad_norm": 5.050242900848389,
      "learning_rate": 9.705725374432504e-06,
      "loss": 0.0655,
      "step": 109320
    },
    {
      "epoch": 0.1789373081178034,
      "grad_norm": 3.2713067531585693,
      "learning_rate": 9.705659482218986e-06,
      "loss": 0.0622,
      "step": 109340
    },
    {
      "epoch": 0.17897003855645674,
      "grad_norm": 3.3436121940612793,
      "learning_rate": 9.70559359000547e-06,
      "loss": 0.0569,
      "step": 109360
    },
    {
      "epoch": 0.17900276899511008,
      "grad_norm": 4.306865692138672,
      "learning_rate": 9.705527697791953e-06,
      "loss": 0.084,
      "step": 109380
    },
    {
      "epoch": 0.1790354994337634,
      "grad_norm": 2.686697483062744,
      "learning_rate": 9.705461805578435e-06,
      "loss": 0.0636,
      "step": 109400
    },
    {
      "epoch": 0.17906822987241675,
      "grad_norm": 1.714653730392456,
      "learning_rate": 9.705395913364919e-06,
      "loss": 0.0587,
      "step": 109420
    },
    {
      "epoch": 0.1791009603110701,
      "grad_norm": 4.476484298706055,
      "learning_rate": 9.7053300211514e-06,
      "loss": 0.0624,
      "step": 109440
    },
    {
      "epoch": 0.17913369074972343,
      "grad_norm": 1.1960254907608032,
      "learning_rate": 9.705264128937884e-06,
      "loss": 0.0737,
      "step": 109460
    },
    {
      "epoch": 0.17916642118837678,
      "grad_norm": 1.8419532775878906,
      "learning_rate": 9.705198236724366e-06,
      "loss": 0.0567,
      "step": 109480
    },
    {
      "epoch": 0.1791991516270301,
      "grad_norm": 3.3571584224700928,
      "learning_rate": 9.70513234451085e-06,
      "loss": 0.0781,
      "step": 109500
    },
    {
      "epoch": 0.17923188206568344,
      "grad_norm": 1.0489649772644043,
      "learning_rate": 9.705066452297332e-06,
      "loss": 0.084,
      "step": 109520
    },
    {
      "epoch": 0.17926461250433678,
      "grad_norm": 7.144311428070068,
      "learning_rate": 9.705000560083815e-06,
      "loss": 0.0563,
      "step": 109540
    },
    {
      "epoch": 0.17929734294299013,
      "grad_norm": 2.517385721206665,
      "learning_rate": 9.704934667870299e-06,
      "loss": 0.0644,
      "step": 109560
    },
    {
      "epoch": 0.17933007338164347,
      "grad_norm": 2.9887142181396484,
      "learning_rate": 9.70486877565678e-06,
      "loss": 0.0588,
      "step": 109580
    },
    {
      "epoch": 0.1793628038202968,
      "grad_norm": 4.625096321105957,
      "learning_rate": 9.704802883443264e-06,
      "loss": 0.0621,
      "step": 109600
    },
    {
      "epoch": 0.17939553425895013,
      "grad_norm": 2.8427131175994873,
      "learning_rate": 9.704736991229748e-06,
      "loss": 0.0586,
      "step": 109620
    },
    {
      "epoch": 0.17942826469760348,
      "grad_norm": 4.143832206726074,
      "learning_rate": 9.70467109901623e-06,
      "loss": 0.0582,
      "step": 109640
    },
    {
      "epoch": 0.17946099513625682,
      "grad_norm": 8.290376663208008,
      "learning_rate": 9.704605206802713e-06,
      "loss": 0.0624,
      "step": 109660
    },
    {
      "epoch": 0.17949372557491017,
      "grad_norm": 4.862304210662842,
      "learning_rate": 9.704539314589195e-06,
      "loss": 0.0605,
      "step": 109680
    },
    {
      "epoch": 0.17952645601356348,
      "grad_norm": 4.070952892303467,
      "learning_rate": 9.704473422375679e-06,
      "loss": 0.0695,
      "step": 109700
    },
    {
      "epoch": 0.17955918645221683,
      "grad_norm": 1.6196573972702026,
      "learning_rate": 9.704407530162163e-06,
      "loss": 0.0565,
      "step": 109720
    },
    {
      "epoch": 0.17959191689087017,
      "grad_norm": 3.8974244594573975,
      "learning_rate": 9.704341637948644e-06,
      "loss": 0.065,
      "step": 109740
    },
    {
      "epoch": 0.17962464732952352,
      "grad_norm": 2.2672696113586426,
      "learning_rate": 9.704275745735128e-06,
      "loss": 0.0641,
      "step": 109760
    },
    {
      "epoch": 0.17965737776817686,
      "grad_norm": 3.635070562362671,
      "learning_rate": 9.70420985352161e-06,
      "loss": 0.0745,
      "step": 109780
    },
    {
      "epoch": 0.17969010820683018,
      "grad_norm": 4.270194053649902,
      "learning_rate": 9.704143961308094e-06,
      "loss": 0.0547,
      "step": 109800
    },
    {
      "epoch": 0.17972283864548352,
      "grad_norm": 3.6894619464874268,
      "learning_rate": 9.704078069094575e-06,
      "loss": 0.0676,
      "step": 109820
    },
    {
      "epoch": 0.17975556908413687,
      "grad_norm": 2.6797618865966797,
      "learning_rate": 9.704012176881059e-06,
      "loss": 0.0683,
      "step": 109840
    },
    {
      "epoch": 0.1797882995227902,
      "grad_norm": 3.652684450149536,
      "learning_rate": 9.703946284667541e-06,
      "loss": 0.0744,
      "step": 109860
    },
    {
      "epoch": 0.17982102996144356,
      "grad_norm": 4.086041450500488,
      "learning_rate": 9.703880392454024e-06,
      "loss": 0.0591,
      "step": 109880
    },
    {
      "epoch": 0.17985376040009687,
      "grad_norm": 1.8610600233078003,
      "learning_rate": 9.703814500240506e-06,
      "loss": 0.062,
      "step": 109900
    },
    {
      "epoch": 0.17988649083875022,
      "grad_norm": 2.8344407081604004,
      "learning_rate": 9.70374860802699e-06,
      "loss": 0.0675,
      "step": 109920
    },
    {
      "epoch": 0.17991922127740356,
      "grad_norm": 3.420320510864258,
      "learning_rate": 9.703682715813474e-06,
      "loss": 0.071,
      "step": 109940
    },
    {
      "epoch": 0.1799519517160569,
      "grad_norm": 3.2908358573913574,
      "learning_rate": 9.703616823599955e-06,
      "loss": 0.06,
      "step": 109960
    },
    {
      "epoch": 0.17998468215471022,
      "grad_norm": 3.2495784759521484,
      "learning_rate": 9.703550931386439e-06,
      "loss": 0.0505,
      "step": 109980
    },
    {
      "epoch": 0.18001741259336357,
      "grad_norm": 8.808055877685547,
      "learning_rate": 9.703485039172923e-06,
      "loss": 0.0842,
      "step": 110000
    },
    {
      "epoch": 0.1800501430320169,
      "grad_norm": 5.401017665863037,
      "learning_rate": 9.703419146959404e-06,
      "loss": 0.0618,
      "step": 110020
    },
    {
      "epoch": 0.18008287347067026,
      "grad_norm": 0.5840879082679749,
      "learning_rate": 9.703353254745888e-06,
      "loss": 0.0621,
      "step": 110040
    },
    {
      "epoch": 0.1801156039093236,
      "grad_norm": 1.009494423866272,
      "learning_rate": 9.70328736253237e-06,
      "loss": 0.0606,
      "step": 110060
    },
    {
      "epoch": 0.18014833434797692,
      "grad_norm": 2.0749402046203613,
      "learning_rate": 9.703221470318854e-06,
      "loss": 0.0636,
      "step": 110080
    },
    {
      "epoch": 0.18018106478663026,
      "grad_norm": 1.3878289461135864,
      "learning_rate": 9.703155578105337e-06,
      "loss": 0.0619,
      "step": 110100
    },
    {
      "epoch": 0.1802137952252836,
      "grad_norm": 3.906480550765991,
      "learning_rate": 9.703089685891819e-06,
      "loss": 0.0629,
      "step": 110120
    },
    {
      "epoch": 0.18024652566393695,
      "grad_norm": 2.0220704078674316,
      "learning_rate": 9.703023793678303e-06,
      "loss": 0.077,
      "step": 110140
    },
    {
      "epoch": 0.1802792561025903,
      "grad_norm": 3.1662704944610596,
      "learning_rate": 9.702957901464785e-06,
      "loss": 0.0734,
      "step": 110160
    },
    {
      "epoch": 0.1803119865412436,
      "grad_norm": 34.32963943481445,
      "learning_rate": 9.702892009251268e-06,
      "loss": 0.0605,
      "step": 110180
    },
    {
      "epoch": 0.18034471697989696,
      "grad_norm": 3.059358596801758,
      "learning_rate": 9.70282611703775e-06,
      "loss": 0.0774,
      "step": 110200
    },
    {
      "epoch": 0.1803774474185503,
      "grad_norm": 2.9288806915283203,
      "learning_rate": 9.702760224824234e-06,
      "loss": 0.0617,
      "step": 110220
    },
    {
      "epoch": 0.18041017785720365,
      "grad_norm": 2.177212953567505,
      "learning_rate": 9.702694332610715e-06,
      "loss": 0.0646,
      "step": 110240
    },
    {
      "epoch": 0.180442908295857,
      "grad_norm": 1.3174530267715454,
      "learning_rate": 9.702628440397199e-06,
      "loss": 0.0737,
      "step": 110260
    },
    {
      "epoch": 0.1804756387345103,
      "grad_norm": 3.021599531173706,
      "learning_rate": 9.702562548183681e-06,
      "loss": 0.0744,
      "step": 110280
    },
    {
      "epoch": 0.18050836917316365,
      "grad_norm": 4.064413070678711,
      "learning_rate": 9.702496655970165e-06,
      "loss": 0.0627,
      "step": 110300
    },
    {
      "epoch": 0.180541099611817,
      "grad_norm": 1.1607584953308105,
      "learning_rate": 9.702430763756646e-06,
      "loss": 0.0576,
      "step": 110320
    },
    {
      "epoch": 0.18057383005047034,
      "grad_norm": 3.8032422065734863,
      "learning_rate": 9.70236487154313e-06,
      "loss": 0.0613,
      "step": 110340
    },
    {
      "epoch": 0.18060656048912369,
      "grad_norm": 2.7360031604766846,
      "learning_rate": 9.702298979329614e-06,
      "loss": 0.0686,
      "step": 110360
    },
    {
      "epoch": 0.180639290927777,
      "grad_norm": 2.371626138687134,
      "learning_rate": 9.702233087116097e-06,
      "loss": 0.0586,
      "step": 110380
    },
    {
      "epoch": 0.18067202136643035,
      "grad_norm": 2.9921462535858154,
      "learning_rate": 9.702167194902579e-06,
      "loss": 0.0804,
      "step": 110400
    },
    {
      "epoch": 0.1807047518050837,
      "grad_norm": 2.926621675491333,
      "learning_rate": 9.702101302689063e-06,
      "loss": 0.0717,
      "step": 110420
    },
    {
      "epoch": 0.18073748224373704,
      "grad_norm": 1.6523106098175049,
      "learning_rate": 9.702035410475546e-06,
      "loss": 0.0661,
      "step": 110440
    },
    {
      "epoch": 0.18077021268239038,
      "grad_norm": 2.9538652896881104,
      "learning_rate": 9.701969518262028e-06,
      "loss": 0.0693,
      "step": 110460
    },
    {
      "epoch": 0.1808029431210437,
      "grad_norm": 0.819333016872406,
      "learning_rate": 9.701903626048512e-06,
      "loss": 0.0758,
      "step": 110480
    },
    {
      "epoch": 0.18083567355969704,
      "grad_norm": 7.75225305557251,
      "learning_rate": 9.701837733834994e-06,
      "loss": 0.0549,
      "step": 110500
    },
    {
      "epoch": 0.18086840399835039,
      "grad_norm": 2.850140333175659,
      "learning_rate": 9.701771841621477e-06,
      "loss": 0.0658,
      "step": 110520
    },
    {
      "epoch": 0.18090113443700373,
      "grad_norm": 3.1090149879455566,
      "learning_rate": 9.701705949407959e-06,
      "loss": 0.0767,
      "step": 110540
    },
    {
      "epoch": 0.18093386487565707,
      "grad_norm": 0.49199220538139343,
      "learning_rate": 9.701640057194443e-06,
      "loss": 0.0641,
      "step": 110560
    },
    {
      "epoch": 0.1809665953143104,
      "grad_norm": 3.0416321754455566,
      "learning_rate": 9.701574164980925e-06,
      "loss": 0.0609,
      "step": 110580
    },
    {
      "epoch": 0.18099932575296374,
      "grad_norm": 5.694687366485596,
      "learning_rate": 9.701508272767408e-06,
      "loss": 0.0822,
      "step": 110600
    },
    {
      "epoch": 0.18103205619161708,
      "grad_norm": 2.554502010345459,
      "learning_rate": 9.70144238055389e-06,
      "loss": 0.0696,
      "step": 110620
    },
    {
      "epoch": 0.18106478663027042,
      "grad_norm": 3.443251132965088,
      "learning_rate": 9.701376488340374e-06,
      "loss": 0.0806,
      "step": 110640
    },
    {
      "epoch": 0.18109751706892377,
      "grad_norm": 1.924917221069336,
      "learning_rate": 9.701310596126856e-06,
      "loss": 0.0684,
      "step": 110660
    },
    {
      "epoch": 0.18113024750757709,
      "grad_norm": 2.9408576488494873,
      "learning_rate": 9.701244703913339e-06,
      "loss": 0.0767,
      "step": 110680
    },
    {
      "epoch": 0.18116297794623043,
      "grad_norm": 2.598567247390747,
      "learning_rate": 9.701178811699821e-06,
      "loss": 0.0669,
      "step": 110700
    },
    {
      "epoch": 0.18119570838488377,
      "grad_norm": 2.3378400802612305,
      "learning_rate": 9.701112919486305e-06,
      "loss": 0.0572,
      "step": 110720
    },
    {
      "epoch": 0.18122843882353712,
      "grad_norm": 3.4595234394073486,
      "learning_rate": 9.701047027272788e-06,
      "loss": 0.0825,
      "step": 110740
    },
    {
      "epoch": 0.18126116926219046,
      "grad_norm": 2.3678932189941406,
      "learning_rate": 9.70098113505927e-06,
      "loss": 0.0599,
      "step": 110760
    },
    {
      "epoch": 0.18129389970084378,
      "grad_norm": 1.6312882900238037,
      "learning_rate": 9.700915242845754e-06,
      "loss": 0.0599,
      "step": 110780
    },
    {
      "epoch": 0.18132663013949712,
      "grad_norm": 1.3636339902877808,
      "learning_rate": 9.700849350632237e-06,
      "loss": 0.0674,
      "step": 110800
    },
    {
      "epoch": 0.18135936057815047,
      "grad_norm": 1.9298769235610962,
      "learning_rate": 9.70078345841872e-06,
      "loss": 0.0712,
      "step": 110820
    },
    {
      "epoch": 0.1813920910168038,
      "grad_norm": 3.2346463203430176,
      "learning_rate": 9.700717566205203e-06,
      "loss": 0.0683,
      "step": 110840
    },
    {
      "epoch": 0.18142482145545716,
      "grad_norm": 2.9922537803649902,
      "learning_rate": 9.700651673991686e-06,
      "loss": 0.0597,
      "step": 110860
    },
    {
      "epoch": 0.18145755189411047,
      "grad_norm": 2.28998064994812,
      "learning_rate": 9.700585781778168e-06,
      "loss": 0.0513,
      "step": 110880
    },
    {
      "epoch": 0.18149028233276382,
      "grad_norm": 2.8818302154541016,
      "learning_rate": 9.700519889564652e-06,
      "loss": 0.0733,
      "step": 110900
    },
    {
      "epoch": 0.18152301277141716,
      "grad_norm": 2.432396650314331,
      "learning_rate": 9.700453997351134e-06,
      "loss": 0.0687,
      "step": 110920
    },
    {
      "epoch": 0.1815557432100705,
      "grad_norm": 0.7583855390548706,
      "learning_rate": 9.700388105137617e-06,
      "loss": 0.077,
      "step": 110940
    },
    {
      "epoch": 0.18158847364872385,
      "grad_norm": 0.6227602958679199,
      "learning_rate": 9.7003222129241e-06,
      "loss": 0.0726,
      "step": 110960
    },
    {
      "epoch": 0.18162120408737717,
      "grad_norm": 1.979588508605957,
      "learning_rate": 9.700256320710583e-06,
      "loss": 0.0677,
      "step": 110980
    },
    {
      "epoch": 0.1816539345260305,
      "grad_norm": 2.7363698482513428,
      "learning_rate": 9.700190428497065e-06,
      "loss": 0.0608,
      "step": 111000
    },
    {
      "epoch": 0.18168666496468386,
      "grad_norm": 1.258923888206482,
      "learning_rate": 9.700124536283548e-06,
      "loss": 0.0662,
      "step": 111020
    },
    {
      "epoch": 0.1817193954033372,
      "grad_norm": 3.820415496826172,
      "learning_rate": 9.70005864407003e-06,
      "loss": 0.0522,
      "step": 111040
    },
    {
      "epoch": 0.18175212584199055,
      "grad_norm": 8.374078750610352,
      "learning_rate": 9.699992751856514e-06,
      "loss": 0.0546,
      "step": 111060
    },
    {
      "epoch": 0.18178485628064386,
      "grad_norm": 0.7423911094665527,
      "learning_rate": 9.699926859642996e-06,
      "loss": 0.0601,
      "step": 111080
    },
    {
      "epoch": 0.1818175867192972,
      "grad_norm": 1.3926740884780884,
      "learning_rate": 9.69986096742948e-06,
      "loss": 0.0665,
      "step": 111100
    },
    {
      "epoch": 0.18185031715795055,
      "grad_norm": 5.174673080444336,
      "learning_rate": 9.699795075215963e-06,
      "loss": 0.0641,
      "step": 111120
    },
    {
      "epoch": 0.1818830475966039,
      "grad_norm": 11.029179573059082,
      "learning_rate": 9.699729183002445e-06,
      "loss": 0.0628,
      "step": 111140
    },
    {
      "epoch": 0.18191577803525724,
      "grad_norm": 10.115206718444824,
      "learning_rate": 9.699663290788928e-06,
      "loss": 0.0656,
      "step": 111160
    },
    {
      "epoch": 0.18194850847391056,
      "grad_norm": 3.2257795333862305,
      "learning_rate": 9.699597398575412e-06,
      "loss": 0.0693,
      "step": 111180
    },
    {
      "epoch": 0.1819812389125639,
      "grad_norm": 0.6342287063598633,
      "learning_rate": 9.699531506361894e-06,
      "loss": 0.0732,
      "step": 111200
    },
    {
      "epoch": 0.18201396935121725,
      "grad_norm": 4.24736213684082,
      "learning_rate": 9.699465614148377e-06,
      "loss": 0.0623,
      "step": 111220
    },
    {
      "epoch": 0.1820466997898706,
      "grad_norm": 1.731295108795166,
      "learning_rate": 9.699399721934861e-06,
      "loss": 0.0669,
      "step": 111240
    },
    {
      "epoch": 0.18207943022852394,
      "grad_norm": 1.2700765132904053,
      "learning_rate": 9.699333829721343e-06,
      "loss": 0.0863,
      "step": 111260
    },
    {
      "epoch": 0.18211216066717725,
      "grad_norm": 2.596468925476074,
      "learning_rate": 9.699267937507826e-06,
      "loss": 0.0638,
      "step": 111280
    },
    {
      "epoch": 0.1821448911058306,
      "grad_norm": 1.4197598695755005,
      "learning_rate": 9.699202045294308e-06,
      "loss": 0.0592,
      "step": 111300
    },
    {
      "epoch": 0.18217762154448394,
      "grad_norm": 3.3402552604675293,
      "learning_rate": 9.699136153080792e-06,
      "loss": 0.0749,
      "step": 111320
    },
    {
      "epoch": 0.1822103519831373,
      "grad_norm": 4.424317836761475,
      "learning_rate": 9.699070260867274e-06,
      "loss": 0.0674,
      "step": 111340
    },
    {
      "epoch": 0.1822430824217906,
      "grad_norm": 3.3263654708862305,
      "learning_rate": 9.699004368653757e-06,
      "loss": 0.055,
      "step": 111360
    },
    {
      "epoch": 0.18227581286044395,
      "grad_norm": 2.968738555908203,
      "learning_rate": 9.69893847644024e-06,
      "loss": 0.0676,
      "step": 111380
    },
    {
      "epoch": 0.1823085432990973,
      "grad_norm": 3.4890661239624023,
      "learning_rate": 9.698872584226723e-06,
      "loss": 0.0694,
      "step": 111400
    },
    {
      "epoch": 0.18234127373775064,
      "grad_norm": 0.8279820084571838,
      "learning_rate": 9.698806692013205e-06,
      "loss": 0.0671,
      "step": 111420
    },
    {
      "epoch": 0.18237400417640398,
      "grad_norm": 1.763708472251892,
      "learning_rate": 9.698740799799688e-06,
      "loss": 0.0638,
      "step": 111440
    },
    {
      "epoch": 0.1824067346150573,
      "grad_norm": 2.7715601921081543,
      "learning_rate": 9.69867490758617e-06,
      "loss": 0.0631,
      "step": 111460
    },
    {
      "epoch": 0.18243946505371064,
      "grad_norm": 3.3499245643615723,
      "learning_rate": 9.698609015372654e-06,
      "loss": 0.0651,
      "step": 111480
    },
    {
      "epoch": 0.182472195492364,
      "grad_norm": 3.62687349319458,
      "learning_rate": 9.698543123159137e-06,
      "loss": 0.0583,
      "step": 111500
    },
    {
      "epoch": 0.18250492593101733,
      "grad_norm": 1.4762814044952393,
      "learning_rate": 9.69847723094562e-06,
      "loss": 0.0686,
      "step": 111520
    },
    {
      "epoch": 0.18253765636967068,
      "grad_norm": 3.2633607387542725,
      "learning_rate": 9.698411338732103e-06,
      "loss": 0.0598,
      "step": 111540
    },
    {
      "epoch": 0.182570386808324,
      "grad_norm": 1.212436556816101,
      "learning_rate": 9.698345446518585e-06,
      "loss": 0.0699,
      "step": 111560
    },
    {
      "epoch": 0.18260311724697734,
      "grad_norm": 3.019362211227417,
      "learning_rate": 9.698279554305068e-06,
      "loss": 0.0614,
      "step": 111580
    },
    {
      "epoch": 0.18263584768563068,
      "grad_norm": 2.392871856689453,
      "learning_rate": 9.698213662091552e-06,
      "loss": 0.0639,
      "step": 111600
    },
    {
      "epoch": 0.18266857812428403,
      "grad_norm": 2.7461719512939453,
      "learning_rate": 9.698147769878034e-06,
      "loss": 0.0624,
      "step": 111620
    },
    {
      "epoch": 0.18270130856293737,
      "grad_norm": 2.3608627319335938,
      "learning_rate": 9.698081877664517e-06,
      "loss": 0.0625,
      "step": 111640
    },
    {
      "epoch": 0.1827340390015907,
      "grad_norm": 3.3110108375549316,
      "learning_rate": 9.698015985451001e-06,
      "loss": 0.0596,
      "step": 111660
    },
    {
      "epoch": 0.18276676944024403,
      "grad_norm": 2.08781361579895,
      "learning_rate": 9.697950093237483e-06,
      "loss": 0.0519,
      "step": 111680
    },
    {
      "epoch": 0.18279949987889738,
      "grad_norm": 3.0723342895507812,
      "learning_rate": 9.697884201023966e-06,
      "loss": 0.0614,
      "step": 111700
    },
    {
      "epoch": 0.18283223031755072,
      "grad_norm": 3.1921966075897217,
      "learning_rate": 9.697818308810448e-06,
      "loss": 0.0629,
      "step": 111720
    },
    {
      "epoch": 0.18286496075620406,
      "grad_norm": 1.577966332435608,
      "learning_rate": 9.697752416596932e-06,
      "loss": 0.0415,
      "step": 111740
    },
    {
      "epoch": 0.18289769119485738,
      "grad_norm": 2.695404052734375,
      "learning_rate": 9.697686524383414e-06,
      "loss": 0.057,
      "step": 111760
    },
    {
      "epoch": 0.18293042163351073,
      "grad_norm": 1.9219988584518433,
      "learning_rate": 9.697620632169897e-06,
      "loss": 0.0857,
      "step": 111780
    },
    {
      "epoch": 0.18296315207216407,
      "grad_norm": 2.2443180084228516,
      "learning_rate": 9.69755473995638e-06,
      "loss": 0.063,
      "step": 111800
    },
    {
      "epoch": 0.18299588251081741,
      "grad_norm": 3.897512674331665,
      "learning_rate": 9.697488847742863e-06,
      "loss": 0.0685,
      "step": 111820
    },
    {
      "epoch": 0.18302861294947076,
      "grad_norm": 1.271814227104187,
      "learning_rate": 9.697422955529347e-06,
      "loss": 0.0535,
      "step": 111840
    },
    {
      "epoch": 0.18306134338812408,
      "grad_norm": 1.247749924659729,
      "learning_rate": 9.697357063315828e-06,
      "loss": 0.0742,
      "step": 111860
    },
    {
      "epoch": 0.18309407382677742,
      "grad_norm": 3.846005916595459,
      "learning_rate": 9.697291171102312e-06,
      "loss": 0.0623,
      "step": 111880
    },
    {
      "epoch": 0.18312680426543076,
      "grad_norm": 0.9641113877296448,
      "learning_rate": 9.697225278888794e-06,
      "loss": 0.0638,
      "step": 111900
    },
    {
      "epoch": 0.1831595347040841,
      "grad_norm": 2.831343650817871,
      "learning_rate": 9.697159386675277e-06,
      "loss": 0.0599,
      "step": 111920
    },
    {
      "epoch": 0.18319226514273745,
      "grad_norm": 2.8259384632110596,
      "learning_rate": 9.69709349446176e-06,
      "loss": 0.0571,
      "step": 111940
    },
    {
      "epoch": 0.18322499558139077,
      "grad_norm": 2.1918413639068604,
      "learning_rate": 9.697027602248243e-06,
      "loss": 0.076,
      "step": 111960
    },
    {
      "epoch": 0.18325772602004411,
      "grad_norm": 1.0263231992721558,
      "learning_rate": 9.696961710034727e-06,
      "loss": 0.062,
      "step": 111980
    },
    {
      "epoch": 0.18329045645869746,
      "grad_norm": 1.2037410736083984,
      "learning_rate": 9.696895817821208e-06,
      "loss": 0.0612,
      "step": 112000
    },
    {
      "epoch": 0.1833231868973508,
      "grad_norm": 0.816822350025177,
      "learning_rate": 9.696829925607692e-06,
      "loss": 0.0563,
      "step": 112020
    },
    {
      "epoch": 0.18335591733600415,
      "grad_norm": 2.979924201965332,
      "learning_rate": 9.696764033394176e-06,
      "loss": 0.064,
      "step": 112040
    },
    {
      "epoch": 0.18338864777465747,
      "grad_norm": 1.488582730293274,
      "learning_rate": 9.696698141180657e-06,
      "loss": 0.0719,
      "step": 112060
    },
    {
      "epoch": 0.1834213782133108,
      "grad_norm": 0.7161929607391357,
      "learning_rate": 9.696632248967141e-06,
      "loss": 0.0551,
      "step": 112080
    },
    {
      "epoch": 0.18345410865196415,
      "grad_norm": 3.353492498397827,
      "learning_rate": 9.696566356753623e-06,
      "loss": 0.0524,
      "step": 112100
    },
    {
      "epoch": 0.1834868390906175,
      "grad_norm": 1.525086760520935,
      "learning_rate": 9.696500464540107e-06,
      "loss": 0.0786,
      "step": 112120
    },
    {
      "epoch": 0.18351956952927084,
      "grad_norm": 1.5749515295028687,
      "learning_rate": 9.696434572326588e-06,
      "loss": 0.057,
      "step": 112140
    },
    {
      "epoch": 0.18355229996792416,
      "grad_norm": 41.46345520019531,
      "learning_rate": 9.696368680113072e-06,
      "loss": 0.0643,
      "step": 112160
    },
    {
      "epoch": 0.1835850304065775,
      "grad_norm": 2.315511465072632,
      "learning_rate": 9.696302787899554e-06,
      "loss": 0.0696,
      "step": 112180
    },
    {
      "epoch": 0.18361776084523085,
      "grad_norm": 0.8992123007774353,
      "learning_rate": 9.696236895686038e-06,
      "loss": 0.0563,
      "step": 112200
    },
    {
      "epoch": 0.1836504912838842,
      "grad_norm": 3.1187052726745605,
      "learning_rate": 9.696171003472521e-06,
      "loss": 0.0833,
      "step": 112220
    },
    {
      "epoch": 0.18368322172253754,
      "grad_norm": 1.5262588262557983,
      "learning_rate": 9.696105111259003e-06,
      "loss": 0.0662,
      "step": 112240
    },
    {
      "epoch": 0.18371595216119085,
      "grad_norm": 2.301271438598633,
      "learning_rate": 9.696039219045487e-06,
      "loss": 0.0646,
      "step": 112260
    },
    {
      "epoch": 0.1837486825998442,
      "grad_norm": 3.298980712890625,
      "learning_rate": 9.695973326831968e-06,
      "loss": 0.0649,
      "step": 112280
    },
    {
      "epoch": 0.18378141303849754,
      "grad_norm": 3.464939594268799,
      "learning_rate": 9.695907434618452e-06,
      "loss": 0.0798,
      "step": 112300
    },
    {
      "epoch": 0.1838141434771509,
      "grad_norm": 2.7063205242156982,
      "learning_rate": 9.695841542404934e-06,
      "loss": 0.0577,
      "step": 112320
    },
    {
      "epoch": 0.18384687391580423,
      "grad_norm": 2.824937105178833,
      "learning_rate": 9.695775650191418e-06,
      "loss": 0.0651,
      "step": 112340
    },
    {
      "epoch": 0.18387960435445755,
      "grad_norm": 2.6894123554229736,
      "learning_rate": 9.6957097579779e-06,
      "loss": 0.0584,
      "step": 112360
    },
    {
      "epoch": 0.1839123347931109,
      "grad_norm": 1.5541117191314697,
      "learning_rate": 9.695643865764383e-06,
      "loss": 0.0751,
      "step": 112380
    },
    {
      "epoch": 0.18394506523176424,
      "grad_norm": 4.48434591293335,
      "learning_rate": 9.695577973550867e-06,
      "loss": 0.0634,
      "step": 112400
    },
    {
      "epoch": 0.18397779567041758,
      "grad_norm": 2.191723108291626,
      "learning_rate": 9.695512081337349e-06,
      "loss": 0.0538,
      "step": 112420
    },
    {
      "epoch": 0.18401052610907093,
      "grad_norm": 4.636616230010986,
      "learning_rate": 9.695446189123832e-06,
      "loss": 0.0558,
      "step": 112440
    },
    {
      "epoch": 0.18404325654772424,
      "grad_norm": 6.5719804763793945,
      "learning_rate": 9.695380296910316e-06,
      "loss": 0.0706,
      "step": 112460
    },
    {
      "epoch": 0.1840759869863776,
      "grad_norm": 3.520965337753296,
      "learning_rate": 9.695314404696798e-06,
      "loss": 0.0947,
      "step": 112480
    },
    {
      "epoch": 0.18410871742503093,
      "grad_norm": 1.7679129838943481,
      "learning_rate": 9.695248512483281e-06,
      "loss": 0.0708,
      "step": 112500
    },
    {
      "epoch": 0.18414144786368428,
      "grad_norm": 2.58610463142395,
      "learning_rate": 9.695182620269763e-06,
      "loss": 0.0771,
      "step": 112520
    },
    {
      "epoch": 0.18417417830233762,
      "grad_norm": 1.1208430528640747,
      "learning_rate": 9.695116728056247e-06,
      "loss": 0.0696,
      "step": 112540
    },
    {
      "epoch": 0.18420690874099094,
      "grad_norm": 2.5708067417144775,
      "learning_rate": 9.69505083584273e-06,
      "loss": 0.0665,
      "step": 112560
    },
    {
      "epoch": 0.18423963917964428,
      "grad_norm": 3.2167558670043945,
      "learning_rate": 9.694984943629212e-06,
      "loss": 0.0732,
      "step": 112580
    },
    {
      "epoch": 0.18427236961829763,
      "grad_norm": 2.1985790729522705,
      "learning_rate": 9.694919051415696e-06,
      "loss": 0.0672,
      "step": 112600
    },
    {
      "epoch": 0.18430510005695097,
      "grad_norm": 1.3393728733062744,
      "learning_rate": 9.694853159202178e-06,
      "loss": 0.0777,
      "step": 112620
    },
    {
      "epoch": 0.18433783049560432,
      "grad_norm": 4.8821821212768555,
      "learning_rate": 9.694787266988661e-06,
      "loss": 0.0788,
      "step": 112640
    },
    {
      "epoch": 0.18437056093425763,
      "grad_norm": 1.2832962274551392,
      "learning_rate": 9.694721374775143e-06,
      "loss": 0.0672,
      "step": 112660
    },
    {
      "epoch": 0.18440329137291098,
      "grad_norm": 3.179009199142456,
      "learning_rate": 9.694655482561627e-06,
      "loss": 0.0582,
      "step": 112680
    },
    {
      "epoch": 0.18443602181156432,
      "grad_norm": 1.9700300693511963,
      "learning_rate": 9.694589590348109e-06,
      "loss": 0.0653,
      "step": 112700
    },
    {
      "epoch": 0.18446875225021767,
      "grad_norm": 1.7562198638916016,
      "learning_rate": 9.694523698134592e-06,
      "loss": 0.0599,
      "step": 112720
    },
    {
      "epoch": 0.18450148268887098,
      "grad_norm": 5.937231540679932,
      "learning_rate": 9.694457805921074e-06,
      "loss": 0.0557,
      "step": 112740
    },
    {
      "epoch": 0.18453421312752433,
      "grad_norm": 1.241750717163086,
      "learning_rate": 9.694391913707558e-06,
      "loss": 0.0752,
      "step": 112760
    },
    {
      "epoch": 0.18456694356617767,
      "grad_norm": 3.7585792541503906,
      "learning_rate": 9.694326021494041e-06,
      "loss": 0.0776,
      "step": 112780
    },
    {
      "epoch": 0.18459967400483102,
      "grad_norm": 1.8421480655670166,
      "learning_rate": 9.694260129280523e-06,
      "loss": 0.0718,
      "step": 112800
    },
    {
      "epoch": 0.18463240444348436,
      "grad_norm": 2.6695592403411865,
      "learning_rate": 9.694194237067007e-06,
      "loss": 0.0659,
      "step": 112820
    },
    {
      "epoch": 0.18466513488213768,
      "grad_norm": 14.822527885437012,
      "learning_rate": 9.69412834485349e-06,
      "loss": 0.0468,
      "step": 112840
    },
    {
      "epoch": 0.18469786532079102,
      "grad_norm": 3.8886048793792725,
      "learning_rate": 9.694062452639972e-06,
      "loss": 0.0544,
      "step": 112860
    },
    {
      "epoch": 0.18473059575944437,
      "grad_norm": 10.41430950164795,
      "learning_rate": 9.693996560426456e-06,
      "loss": 0.0737,
      "step": 112880
    },
    {
      "epoch": 0.1847633261980977,
      "grad_norm": 1.0296269655227661,
      "learning_rate": 9.69393066821294e-06,
      "loss": 0.0806,
      "step": 112900
    },
    {
      "epoch": 0.18479605663675105,
      "grad_norm": 2.619590997695923,
      "learning_rate": 9.693864775999421e-06,
      "loss": 0.0635,
      "step": 112920
    },
    {
      "epoch": 0.18482878707540437,
      "grad_norm": 2.455406427383423,
      "learning_rate": 9.693798883785905e-06,
      "loss": 0.0656,
      "step": 112940
    },
    {
      "epoch": 0.18486151751405772,
      "grad_norm": 3.1960577964782715,
      "learning_rate": 9.693732991572387e-06,
      "loss": 0.0684,
      "step": 112960
    },
    {
      "epoch": 0.18489424795271106,
      "grad_norm": 2.419973850250244,
      "learning_rate": 9.69366709935887e-06,
      "loss": 0.0639,
      "step": 112980
    },
    {
      "epoch": 0.1849269783913644,
      "grad_norm": 3.675046682357788,
      "learning_rate": 9.693601207145352e-06,
      "loss": 0.0486,
      "step": 113000
    },
    {
      "epoch": 0.18495970883001775,
      "grad_norm": 5.05372953414917,
      "learning_rate": 9.693535314931836e-06,
      "loss": 0.0773,
      "step": 113020
    },
    {
      "epoch": 0.18499243926867107,
      "grad_norm": 2.990311861038208,
      "learning_rate": 9.693469422718318e-06,
      "loss": 0.0765,
      "step": 113040
    },
    {
      "epoch": 0.1850251697073244,
      "grad_norm": 1.2219182252883911,
      "learning_rate": 9.693403530504801e-06,
      "loss": 0.0577,
      "step": 113060
    },
    {
      "epoch": 0.18505790014597776,
      "grad_norm": 3.2047319412231445,
      "learning_rate": 9.693337638291283e-06,
      "loss": 0.0628,
      "step": 113080
    },
    {
      "epoch": 0.1850906305846311,
      "grad_norm": 3.118604898452759,
      "learning_rate": 9.693271746077767e-06,
      "loss": 0.0584,
      "step": 113100
    },
    {
      "epoch": 0.18512336102328444,
      "grad_norm": 2.3553595542907715,
      "learning_rate": 9.693205853864249e-06,
      "loss": 0.0652,
      "step": 113120
    },
    {
      "epoch": 0.18515609146193776,
      "grad_norm": 3.8440780639648438,
      "learning_rate": 9.693139961650732e-06,
      "loss": 0.0639,
      "step": 113140
    },
    {
      "epoch": 0.1851888219005911,
      "grad_norm": 1.0856870412826538,
      "learning_rate": 9.693074069437216e-06,
      "loss": 0.0712,
      "step": 113160
    },
    {
      "epoch": 0.18522155233924445,
      "grad_norm": 3.0839991569519043,
      "learning_rate": 9.693008177223698e-06,
      "loss": 0.0683,
      "step": 113180
    },
    {
      "epoch": 0.1852542827778978,
      "grad_norm": 4.023119926452637,
      "learning_rate": 9.692942285010181e-06,
      "loss": 0.0616,
      "step": 113200
    },
    {
      "epoch": 0.18528701321655114,
      "grad_norm": 3.132882595062256,
      "learning_rate": 9.692876392796665e-06,
      "loss": 0.0635,
      "step": 113220
    },
    {
      "epoch": 0.18531974365520446,
      "grad_norm": 1.835339069366455,
      "learning_rate": 9.692810500583147e-06,
      "loss": 0.0569,
      "step": 113240
    },
    {
      "epoch": 0.1853524740938578,
      "grad_norm": 2.0843751430511475,
      "learning_rate": 9.69274460836963e-06,
      "loss": 0.0584,
      "step": 113260
    },
    {
      "epoch": 0.18538520453251114,
      "grad_norm": 1.7179032564163208,
      "learning_rate": 9.692678716156114e-06,
      "loss": 0.0715,
      "step": 113280
    },
    {
      "epoch": 0.1854179349711645,
      "grad_norm": 1.6557328701019287,
      "learning_rate": 9.692612823942596e-06,
      "loss": 0.0617,
      "step": 113300
    },
    {
      "epoch": 0.18545066540981783,
      "grad_norm": 3.779933452606201,
      "learning_rate": 9.69254693172908e-06,
      "loss": 0.0627,
      "step": 113320
    },
    {
      "epoch": 0.18548339584847115,
      "grad_norm": 3.9988818168640137,
      "learning_rate": 9.692481039515561e-06,
      "loss": 0.0697,
      "step": 113340
    },
    {
      "epoch": 0.1855161262871245,
      "grad_norm": 5.264502048492432,
      "learning_rate": 9.692415147302045e-06,
      "loss": 0.0753,
      "step": 113360
    },
    {
      "epoch": 0.18554885672577784,
      "grad_norm": 4.575830459594727,
      "learning_rate": 9.692349255088527e-06,
      "loss": 0.0573,
      "step": 113380
    },
    {
      "epoch": 0.18558158716443118,
      "grad_norm": 4.281597137451172,
      "learning_rate": 9.69228336287501e-06,
      "loss": 0.0758,
      "step": 113400
    },
    {
      "epoch": 0.18561431760308453,
      "grad_norm": 14.112663269042969,
      "learning_rate": 9.692217470661492e-06,
      "loss": 0.0581,
      "step": 113420
    },
    {
      "epoch": 0.18564704804173784,
      "grad_norm": 1.023123860359192,
      "learning_rate": 9.692151578447976e-06,
      "loss": 0.0807,
      "step": 113440
    },
    {
      "epoch": 0.1856797784803912,
      "grad_norm": 1.2773683071136475,
      "learning_rate": 9.692085686234458e-06,
      "loss": 0.0563,
      "step": 113460
    },
    {
      "epoch": 0.18571250891904453,
      "grad_norm": 1.4665921926498413,
      "learning_rate": 9.692019794020941e-06,
      "loss": 0.0712,
      "step": 113480
    },
    {
      "epoch": 0.18574523935769788,
      "grad_norm": 3.432068347930908,
      "learning_rate": 9.691953901807423e-06,
      "loss": 0.0621,
      "step": 113500
    },
    {
      "epoch": 0.18577796979635122,
      "grad_norm": 1.932812213897705,
      "learning_rate": 9.691888009593907e-06,
      "loss": 0.0643,
      "step": 113520
    },
    {
      "epoch": 0.18581070023500454,
      "grad_norm": 3.499185800552368,
      "learning_rate": 9.691822117380389e-06,
      "loss": 0.0582,
      "step": 113540
    },
    {
      "epoch": 0.18584343067365788,
      "grad_norm": 3.797884464263916,
      "learning_rate": 9.691756225166872e-06,
      "loss": 0.0502,
      "step": 113560
    },
    {
      "epoch": 0.18587616111231123,
      "grad_norm": 10.47771167755127,
      "learning_rate": 9.691690332953356e-06,
      "loss": 0.0681,
      "step": 113580
    },
    {
      "epoch": 0.18590889155096457,
      "grad_norm": 3.2952792644500732,
      "learning_rate": 9.691624440739838e-06,
      "loss": 0.0776,
      "step": 113600
    },
    {
      "epoch": 0.18594162198961792,
      "grad_norm": 5.736794471740723,
      "learning_rate": 9.691558548526321e-06,
      "loss": 0.0612,
      "step": 113620
    },
    {
      "epoch": 0.18597435242827123,
      "grad_norm": 2.728144407272339,
      "learning_rate": 9.691492656312805e-06,
      "loss": 0.0568,
      "step": 113640
    },
    {
      "epoch": 0.18600708286692458,
      "grad_norm": 17.215444564819336,
      "learning_rate": 9.691426764099287e-06,
      "loss": 0.0654,
      "step": 113660
    },
    {
      "epoch": 0.18603981330557792,
      "grad_norm": 1.7352303266525269,
      "learning_rate": 9.69136087188577e-06,
      "loss": 0.0663,
      "step": 113680
    },
    {
      "epoch": 0.18607254374423127,
      "grad_norm": 1.9526888132095337,
      "learning_rate": 9.691294979672254e-06,
      "loss": 0.052,
      "step": 113700
    },
    {
      "epoch": 0.1861052741828846,
      "grad_norm": 2.5782201290130615,
      "learning_rate": 9.691229087458736e-06,
      "loss": 0.0747,
      "step": 113720
    },
    {
      "epoch": 0.18613800462153793,
      "grad_norm": 4.681458473205566,
      "learning_rate": 9.69116319524522e-06,
      "loss": 0.0731,
      "step": 113740
    },
    {
      "epoch": 0.18617073506019127,
      "grad_norm": 1.2024340629577637,
      "learning_rate": 9.691097303031701e-06,
      "loss": 0.0573,
      "step": 113760
    },
    {
      "epoch": 0.18620346549884462,
      "grad_norm": 2.8573968410491943,
      "learning_rate": 9.691031410818185e-06,
      "loss": 0.0495,
      "step": 113780
    },
    {
      "epoch": 0.18623619593749796,
      "grad_norm": 15.504654884338379,
      "learning_rate": 9.690965518604667e-06,
      "loss": 0.082,
      "step": 113800
    },
    {
      "epoch": 0.1862689263761513,
      "grad_norm": Infinity,
      "learning_rate": 9.69089962639115e-06,
      "loss": 0.0555,
      "step": 113820
    },
    {
      "epoch": 0.18630165681480462,
      "grad_norm": 5.044332981109619,
      "learning_rate": 9.690833734177632e-06,
      "loss": 0.0617,
      "step": 113840
    },
    {
      "epoch": 0.18633438725345797,
      "grad_norm": 1.6648880243301392,
      "learning_rate": 9.690767841964116e-06,
      "loss": 0.0637,
      "step": 113860
    },
    {
      "epoch": 0.1863671176921113,
      "grad_norm": 5.501214504241943,
      "learning_rate": 9.690701949750598e-06,
      "loss": 0.0683,
      "step": 113880
    },
    {
      "epoch": 0.18639984813076466,
      "grad_norm": 0.5890182256698608,
      "learning_rate": 9.690636057537081e-06,
      "loss": 0.0739,
      "step": 113900
    },
    {
      "epoch": 0.186432578569418,
      "grad_norm": 0.7661017775535583,
      "learning_rate": 9.690570165323563e-06,
      "loss": 0.0699,
      "step": 113920
    },
    {
      "epoch": 0.18646530900807132,
      "grad_norm": 3.9945578575134277,
      "learning_rate": 9.690504273110047e-06,
      "loss": 0.08,
      "step": 113940
    },
    {
      "epoch": 0.18649803944672466,
      "grad_norm": 3.2991182804107666,
      "learning_rate": 9.69043838089653e-06,
      "loss": 0.0584,
      "step": 113960
    },
    {
      "epoch": 0.186530769885378,
      "grad_norm": 3.288071393966675,
      "learning_rate": 9.690372488683012e-06,
      "loss": 0.0603,
      "step": 113980
    },
    {
      "epoch": 0.18656350032403135,
      "grad_norm": 6.051664352416992,
      "learning_rate": 9.690306596469496e-06,
      "loss": 0.0658,
      "step": 114000
    },
    {
      "epoch": 0.1865962307626847,
      "grad_norm": 2.47680401802063,
      "learning_rate": 9.69024070425598e-06,
      "loss": 0.0586,
      "step": 114020
    },
    {
      "epoch": 0.186628961201338,
      "grad_norm": 3.1594271659851074,
      "learning_rate": 9.690174812042461e-06,
      "loss": 0.0772,
      "step": 114040
    },
    {
      "epoch": 0.18666169163999136,
      "grad_norm": 3.3054542541503906,
      "learning_rate": 9.690108919828945e-06,
      "loss": 0.0823,
      "step": 114060
    },
    {
      "epoch": 0.1866944220786447,
      "grad_norm": 2.8232955932617188,
      "learning_rate": 9.690043027615429e-06,
      "loss": 0.0653,
      "step": 114080
    },
    {
      "epoch": 0.18672715251729805,
      "grad_norm": 3.745211124420166,
      "learning_rate": 9.68997713540191e-06,
      "loss": 0.0635,
      "step": 114100
    },
    {
      "epoch": 0.18675988295595136,
      "grad_norm": 4.215062141418457,
      "learning_rate": 9.689911243188394e-06,
      "loss": 0.0712,
      "step": 114120
    },
    {
      "epoch": 0.1867926133946047,
      "grad_norm": 4.926278114318848,
      "learning_rate": 9.689845350974876e-06,
      "loss": 0.0727,
      "step": 114140
    },
    {
      "epoch": 0.18682534383325805,
      "grad_norm": 1.077793836593628,
      "learning_rate": 9.68977945876136e-06,
      "loss": 0.0536,
      "step": 114160
    },
    {
      "epoch": 0.1868580742719114,
      "grad_norm": 3.811445713043213,
      "learning_rate": 9.689713566547841e-06,
      "loss": 0.0668,
      "step": 114180
    },
    {
      "epoch": 0.18689080471056474,
      "grad_norm": 7.394126892089844,
      "learning_rate": 9.689647674334325e-06,
      "loss": 0.0534,
      "step": 114200
    },
    {
      "epoch": 0.18692353514921806,
      "grad_norm": 2.1607229709625244,
      "learning_rate": 9.689581782120807e-06,
      "loss": 0.06,
      "step": 114220
    },
    {
      "epoch": 0.1869562655878714,
      "grad_norm": 8.110835075378418,
      "learning_rate": 9.68951588990729e-06,
      "loss": 0.0596,
      "step": 114240
    },
    {
      "epoch": 0.18698899602652475,
      "grad_norm": 2.4540696144104004,
      "learning_rate": 9.689449997693772e-06,
      "loss": 0.0684,
      "step": 114260
    },
    {
      "epoch": 0.1870217264651781,
      "grad_norm": 1.2925410270690918,
      "learning_rate": 9.689384105480256e-06,
      "loss": 0.0648,
      "step": 114280
    },
    {
      "epoch": 0.18705445690383143,
      "grad_norm": 5.925526142120361,
      "learning_rate": 9.68931821326674e-06,
      "loss": 0.058,
      "step": 114300
    },
    {
      "epoch": 0.18708718734248475,
      "grad_norm": 3.162290573120117,
      "learning_rate": 9.689252321053221e-06,
      "loss": 0.0648,
      "step": 114320
    },
    {
      "epoch": 0.1871199177811381,
      "grad_norm": 2.0699424743652344,
      "learning_rate": 9.689186428839705e-06,
      "loss": 0.0588,
      "step": 114340
    },
    {
      "epoch": 0.18715264821979144,
      "grad_norm": 2.0589165687561035,
      "learning_rate": 9.689120536626187e-06,
      "loss": 0.0637,
      "step": 114360
    },
    {
      "epoch": 0.18718537865844478,
      "grad_norm": 1.9342238903045654,
      "learning_rate": 9.68905464441267e-06,
      "loss": 0.055,
      "step": 114380
    },
    {
      "epoch": 0.18721810909709813,
      "grad_norm": 3.4107043743133545,
      "learning_rate": 9.688988752199152e-06,
      "loss": 0.0682,
      "step": 114400
    },
    {
      "epoch": 0.18725083953575145,
      "grad_norm": 43.880523681640625,
      "learning_rate": 9.688922859985636e-06,
      "loss": 0.079,
      "step": 114420
    },
    {
      "epoch": 0.1872835699744048,
      "grad_norm": 2.4138073921203613,
      "learning_rate": 9.68885696777212e-06,
      "loss": 0.0574,
      "step": 114440
    },
    {
      "epoch": 0.18731630041305813,
      "grad_norm": 2.9429783821105957,
      "learning_rate": 9.688791075558602e-06,
      "loss": 0.0592,
      "step": 114460
    },
    {
      "epoch": 0.18734903085171148,
      "grad_norm": 6.954011917114258,
      "learning_rate": 9.688725183345085e-06,
      "loss": 0.0564,
      "step": 114480
    },
    {
      "epoch": 0.18738176129036482,
      "grad_norm": 2.325059652328491,
      "learning_rate": 9.688659291131569e-06,
      "loss": 0.0706,
      "step": 114500
    },
    {
      "epoch": 0.18741449172901814,
      "grad_norm": 1.9110435247421265,
      "learning_rate": 9.68859339891805e-06,
      "loss": 0.0417,
      "step": 114520
    },
    {
      "epoch": 0.18744722216767148,
      "grad_norm": 2.5353832244873047,
      "learning_rate": 9.688527506704534e-06,
      "loss": 0.0539,
      "step": 114540
    },
    {
      "epoch": 0.18747995260632483,
      "grad_norm": 2.831149101257324,
      "learning_rate": 9.688461614491016e-06,
      "loss": 0.0669,
      "step": 114560
    },
    {
      "epoch": 0.18751268304497817,
      "grad_norm": 1.6453113555908203,
      "learning_rate": 9.6883957222775e-06,
      "loss": 0.0634,
      "step": 114580
    },
    {
      "epoch": 0.18754541348363152,
      "grad_norm": 1.945611596107483,
      "learning_rate": 9.688329830063982e-06,
      "loss": 0.0594,
      "step": 114600
    },
    {
      "epoch": 0.18757814392228483,
      "grad_norm": 1.9969769716262817,
      "learning_rate": 9.688263937850465e-06,
      "loss": 0.0554,
      "step": 114620
    },
    {
      "epoch": 0.18761087436093818,
      "grad_norm": 6.661465644836426,
      "learning_rate": 9.688198045636947e-06,
      "loss": 0.0658,
      "step": 114640
    },
    {
      "epoch": 0.18764360479959152,
      "grad_norm": 2.795793056488037,
      "learning_rate": 9.68813215342343e-06,
      "loss": 0.0772,
      "step": 114660
    },
    {
      "epoch": 0.18767633523824487,
      "grad_norm": 2.054171085357666,
      "learning_rate": 9.688066261209914e-06,
      "loss": 0.0612,
      "step": 114680
    },
    {
      "epoch": 0.1877090656768982,
      "grad_norm": 2.265648365020752,
      "learning_rate": 9.688000368996396e-06,
      "loss": 0.0624,
      "step": 114700
    },
    {
      "epoch": 0.18774179611555153,
      "grad_norm": 2.4194231033325195,
      "learning_rate": 9.68793447678288e-06,
      "loss": 0.0455,
      "step": 114720
    },
    {
      "epoch": 0.18777452655420487,
      "grad_norm": 4.085155963897705,
      "learning_rate": 9.687868584569362e-06,
      "loss": 0.0641,
      "step": 114740
    },
    {
      "epoch": 0.18780725699285822,
      "grad_norm": 3.154459238052368,
      "learning_rate": 9.687802692355845e-06,
      "loss": 0.0678,
      "step": 114760
    },
    {
      "epoch": 0.18783998743151156,
      "grad_norm": 1.4799906015396118,
      "learning_rate": 9.687736800142327e-06,
      "loss": 0.0674,
      "step": 114780
    },
    {
      "epoch": 0.1878727178701649,
      "grad_norm": 5.732081890106201,
      "learning_rate": 9.68767090792881e-06,
      "loss": 0.0731,
      "step": 114800
    },
    {
      "epoch": 0.18790544830881822,
      "grad_norm": 17.33242416381836,
      "learning_rate": 9.687605015715294e-06,
      "loss": 0.0682,
      "step": 114820
    },
    {
      "epoch": 0.18793817874747157,
      "grad_norm": 3.691382646560669,
      "learning_rate": 9.687539123501776e-06,
      "loss": 0.0615,
      "step": 114840
    },
    {
      "epoch": 0.1879709091861249,
      "grad_norm": 2.9202489852905273,
      "learning_rate": 9.68747323128826e-06,
      "loss": 0.0591,
      "step": 114860
    },
    {
      "epoch": 0.18800363962477826,
      "grad_norm": 2.2260422706604004,
      "learning_rate": 9.687407339074743e-06,
      "loss": 0.0693,
      "step": 114880
    },
    {
      "epoch": 0.1880363700634316,
      "grad_norm": 1.123500943183899,
      "learning_rate": 9.687341446861225e-06,
      "loss": 0.057,
      "step": 114900
    },
    {
      "epoch": 0.18806910050208492,
      "grad_norm": 4.86149787902832,
      "learning_rate": 9.687275554647709e-06,
      "loss": 0.0705,
      "step": 114920
    },
    {
      "epoch": 0.18810183094073826,
      "grad_norm": 1.763411045074463,
      "learning_rate": 9.68720966243419e-06,
      "loss": 0.0664,
      "step": 114940
    },
    {
      "epoch": 0.1881345613793916,
      "grad_norm": 4.152899742126465,
      "learning_rate": 9.687143770220674e-06,
      "loss": 0.0665,
      "step": 114960
    },
    {
      "epoch": 0.18816729181804495,
      "grad_norm": 4.642617225646973,
      "learning_rate": 9.687077878007156e-06,
      "loss": 0.0678,
      "step": 114980
    },
    {
      "epoch": 0.1882000222566983,
      "grad_norm": 3.1809401512145996,
      "learning_rate": 9.68701198579364e-06,
      "loss": 0.0552,
      "step": 115000
    },
    {
      "epoch": 0.1882327526953516,
      "grad_norm": 1.1463758945465088,
      "learning_rate": 9.686946093580123e-06,
      "loss": 0.0554,
      "step": 115020
    },
    {
      "epoch": 0.18826548313400496,
      "grad_norm": 1.598276138305664,
      "learning_rate": 9.686880201366605e-06,
      "loss": 0.086,
      "step": 115040
    },
    {
      "epoch": 0.1882982135726583,
      "grad_norm": 2.105268955230713,
      "learning_rate": 9.686814309153089e-06,
      "loss": 0.0659,
      "step": 115060
    },
    {
      "epoch": 0.18833094401131165,
      "grad_norm": 1.960962176322937,
      "learning_rate": 9.68674841693957e-06,
      "loss": 0.0677,
      "step": 115080
    },
    {
      "epoch": 0.188363674449965,
      "grad_norm": 2.0588366985321045,
      "learning_rate": 9.686682524726054e-06,
      "loss": 0.0735,
      "step": 115100
    },
    {
      "epoch": 0.1883964048886183,
      "grad_norm": 5.954396724700928,
      "learning_rate": 9.686616632512536e-06,
      "loss": 0.083,
      "step": 115120
    },
    {
      "epoch": 0.18842913532727165,
      "grad_norm": 1.3934757709503174,
      "learning_rate": 9.68655074029902e-06,
      "loss": 0.0628,
      "step": 115140
    },
    {
      "epoch": 0.188461865765925,
      "grad_norm": 7.3020243644714355,
      "learning_rate": 9.686484848085502e-06,
      "loss": 0.0744,
      "step": 115160
    },
    {
      "epoch": 0.18849459620457834,
      "grad_norm": 6.03965950012207,
      "learning_rate": 9.686418955871985e-06,
      "loss": 0.0508,
      "step": 115180
    },
    {
      "epoch": 0.18852732664323169,
      "grad_norm": 5.229642868041992,
      "learning_rate": 9.686353063658467e-06,
      "loss": 0.0578,
      "step": 115200
    },
    {
      "epoch": 0.188560057081885,
      "grad_norm": 2.113037586212158,
      "learning_rate": 9.68628717144495e-06,
      "loss": 0.0513,
      "step": 115220
    },
    {
      "epoch": 0.18859278752053835,
      "grad_norm": 2.954721212387085,
      "learning_rate": 9.686221279231434e-06,
      "loss": 0.0743,
      "step": 115240
    },
    {
      "epoch": 0.1886255179591917,
      "grad_norm": 2.4273006916046143,
      "learning_rate": 9.686155387017916e-06,
      "loss": 0.0799,
      "step": 115260
    },
    {
      "epoch": 0.18865824839784504,
      "grad_norm": 3.571251630783081,
      "learning_rate": 9.6860894948044e-06,
      "loss": 0.0675,
      "step": 115280
    },
    {
      "epoch": 0.18869097883649838,
      "grad_norm": 1.445510983467102,
      "learning_rate": 9.686023602590883e-06,
      "loss": 0.0567,
      "step": 115300
    },
    {
      "epoch": 0.1887237092751517,
      "grad_norm": 1.1958199739456177,
      "learning_rate": 9.685957710377365e-06,
      "loss": 0.0653,
      "step": 115320
    },
    {
      "epoch": 0.18875643971380504,
      "grad_norm": 1.317764163017273,
      "learning_rate": 9.685891818163849e-06,
      "loss": 0.082,
      "step": 115340
    },
    {
      "epoch": 0.18878917015245839,
      "grad_norm": 2.4640259742736816,
      "learning_rate": 9.685825925950332e-06,
      "loss": 0.0603,
      "step": 115360
    },
    {
      "epoch": 0.18882190059111173,
      "grad_norm": 29.529775619506836,
      "learning_rate": 9.685760033736814e-06,
      "loss": 0.0588,
      "step": 115380
    },
    {
      "epoch": 0.18885463102976507,
      "grad_norm": 1.650100827217102,
      "learning_rate": 9.685694141523298e-06,
      "loss": 0.0755,
      "step": 115400
    },
    {
      "epoch": 0.1888873614684184,
      "grad_norm": 2.9526925086975098,
      "learning_rate": 9.68562824930978e-06,
      "loss": 0.0732,
      "step": 115420
    },
    {
      "epoch": 0.18892009190707174,
      "grad_norm": 3.5184426307678223,
      "learning_rate": 9.685562357096263e-06,
      "loss": 0.0644,
      "step": 115440
    },
    {
      "epoch": 0.18895282234572508,
      "grad_norm": 1.6281148195266724,
      "learning_rate": 9.685496464882745e-06,
      "loss": 0.0581,
      "step": 115460
    },
    {
      "epoch": 0.18898555278437842,
      "grad_norm": 3.085139513015747,
      "learning_rate": 9.685430572669229e-06,
      "loss": 0.0595,
      "step": 115480
    },
    {
      "epoch": 0.18901828322303177,
      "grad_norm": 2.1484320163726807,
      "learning_rate": 9.68536468045571e-06,
      "loss": 0.0684,
      "step": 115500
    },
    {
      "epoch": 0.18905101366168509,
      "grad_norm": 8.684664726257324,
      "learning_rate": 9.685298788242194e-06,
      "loss": 0.0655,
      "step": 115520
    },
    {
      "epoch": 0.18908374410033843,
      "grad_norm": 1.2159769535064697,
      "learning_rate": 9.685232896028676e-06,
      "loss": 0.0754,
      "step": 115540
    },
    {
      "epoch": 0.18911647453899177,
      "grad_norm": 0.5295152068138123,
      "learning_rate": 9.68516700381516e-06,
      "loss": 0.0615,
      "step": 115560
    },
    {
      "epoch": 0.18914920497764512,
      "grad_norm": 2.510984182357788,
      "learning_rate": 9.685101111601642e-06,
      "loss": 0.0673,
      "step": 115580
    },
    {
      "epoch": 0.18918193541629844,
      "grad_norm": 1.8385891914367676,
      "learning_rate": 9.685035219388125e-06,
      "loss": 0.0594,
      "step": 115600
    },
    {
      "epoch": 0.18921466585495178,
      "grad_norm": 3.991102695465088,
      "learning_rate": 9.684969327174609e-06,
      "loss": 0.0624,
      "step": 115620
    },
    {
      "epoch": 0.18924739629360512,
      "grad_norm": 1.3601970672607422,
      "learning_rate": 9.68490343496109e-06,
      "loss": 0.0657,
      "step": 115640
    },
    {
      "epoch": 0.18928012673225847,
      "grad_norm": 3.0507075786590576,
      "learning_rate": 9.684837542747574e-06,
      "loss": 0.0629,
      "step": 115660
    },
    {
      "epoch": 0.1893128571709118,
      "grad_norm": 2.5179054737091064,
      "learning_rate": 9.684771650534058e-06,
      "loss": 0.0666,
      "step": 115680
    },
    {
      "epoch": 0.18934558760956513,
      "grad_norm": 0.7363642454147339,
      "learning_rate": 9.68470575832054e-06,
      "loss": 0.0709,
      "step": 115700
    },
    {
      "epoch": 0.18937831804821847,
      "grad_norm": 3.120035171508789,
      "learning_rate": 9.684639866107023e-06,
      "loss": 0.0803,
      "step": 115720
    },
    {
      "epoch": 0.18941104848687182,
      "grad_norm": 2.0023605823516846,
      "learning_rate": 9.684573973893507e-06,
      "loss": 0.0503,
      "step": 115740
    },
    {
      "epoch": 0.18944377892552516,
      "grad_norm": 1.0888532400131226,
      "learning_rate": 9.684508081679989e-06,
      "loss": 0.0792,
      "step": 115760
    },
    {
      "epoch": 0.1894765093641785,
      "grad_norm": 1.9552171230316162,
      "learning_rate": 9.684442189466473e-06,
      "loss": 0.0822,
      "step": 115780
    },
    {
      "epoch": 0.18950923980283182,
      "grad_norm": 1.8276745080947876,
      "learning_rate": 9.684376297252954e-06,
      "loss": 0.0629,
      "step": 115800
    },
    {
      "epoch": 0.18954197024148517,
      "grad_norm": 2.1655373573303223,
      "learning_rate": 9.684310405039438e-06,
      "loss": 0.0664,
      "step": 115820
    },
    {
      "epoch": 0.1895747006801385,
      "grad_norm": 4.204871654510498,
      "learning_rate": 9.68424451282592e-06,
      "loss": 0.0677,
      "step": 115840
    },
    {
      "epoch": 0.18960743111879186,
      "grad_norm": 2.1920721530914307,
      "learning_rate": 9.684178620612403e-06,
      "loss": 0.0708,
      "step": 115860
    },
    {
      "epoch": 0.1896401615574452,
      "grad_norm": 2.3682429790496826,
      "learning_rate": 9.684112728398885e-06,
      "loss": 0.0679,
      "step": 115880
    },
    {
      "epoch": 0.18967289199609852,
      "grad_norm": 2.3254518508911133,
      "learning_rate": 9.684046836185369e-06,
      "loss": 0.0598,
      "step": 115900
    },
    {
      "epoch": 0.18970562243475186,
      "grad_norm": 2.7622017860412598,
      "learning_rate": 9.68398094397185e-06,
      "loss": 0.0676,
      "step": 115920
    },
    {
      "epoch": 0.1897383528734052,
      "grad_norm": 4.100261211395264,
      "learning_rate": 9.683915051758334e-06,
      "loss": 0.0704,
      "step": 115940
    },
    {
      "epoch": 0.18977108331205855,
      "grad_norm": 1.2384757995605469,
      "learning_rate": 9.683849159544816e-06,
      "loss": 0.0443,
      "step": 115960
    },
    {
      "epoch": 0.1898038137507119,
      "grad_norm": 4.060442924499512,
      "learning_rate": 9.6837832673313e-06,
      "loss": 0.0659,
      "step": 115980
    },
    {
      "epoch": 0.18983654418936521,
      "grad_norm": 1.8075664043426514,
      "learning_rate": 9.683717375117783e-06,
      "loss": 0.0628,
      "step": 116000
    },
    {
      "epoch": 0.18986927462801856,
      "grad_norm": 2.6902925968170166,
      "learning_rate": 9.683651482904265e-06,
      "loss": 0.065,
      "step": 116020
    },
    {
      "epoch": 0.1899020050666719,
      "grad_norm": 1.0045517683029175,
      "learning_rate": 9.683585590690749e-06,
      "loss": 0.0836,
      "step": 116040
    },
    {
      "epoch": 0.18993473550532525,
      "grad_norm": 3.3574867248535156,
      "learning_rate": 9.683519698477233e-06,
      "loss": 0.0662,
      "step": 116060
    },
    {
      "epoch": 0.1899674659439786,
      "grad_norm": 4.582911491394043,
      "learning_rate": 9.683453806263714e-06,
      "loss": 0.0614,
      "step": 116080
    },
    {
      "epoch": 0.1900001963826319,
      "grad_norm": 4.8851318359375,
      "learning_rate": 9.683387914050198e-06,
      "loss": 0.0603,
      "step": 116100
    },
    {
      "epoch": 0.19003292682128525,
      "grad_norm": 1.7661988735198975,
      "learning_rate": 9.683322021836682e-06,
      "loss": 0.0725,
      "step": 116120
    },
    {
      "epoch": 0.1900656572599386,
      "grad_norm": 1.0879430770874023,
      "learning_rate": 9.683256129623164e-06,
      "loss": 0.0628,
      "step": 116140
    },
    {
      "epoch": 0.19009838769859194,
      "grad_norm": 3.3320939540863037,
      "learning_rate": 9.683190237409647e-06,
      "loss": 0.0675,
      "step": 116160
    },
    {
      "epoch": 0.1901311181372453,
      "grad_norm": 3.6448981761932373,
      "learning_rate": 9.683124345196129e-06,
      "loss": 0.075,
      "step": 116180
    },
    {
      "epoch": 0.1901638485758986,
      "grad_norm": 3.4998531341552734,
      "learning_rate": 9.683058452982613e-06,
      "loss": 0.0653,
      "step": 116200
    },
    {
      "epoch": 0.19019657901455195,
      "grad_norm": 2.741591453552246,
      "learning_rate": 9.682992560769094e-06,
      "loss": 0.0708,
      "step": 116220
    },
    {
      "epoch": 0.1902293094532053,
      "grad_norm": 0.8181182146072388,
      "learning_rate": 9.682926668555578e-06,
      "loss": 0.0713,
      "step": 116240
    },
    {
      "epoch": 0.19026203989185864,
      "grad_norm": 2.09732985496521,
      "learning_rate": 9.68286077634206e-06,
      "loss": 0.0566,
      "step": 116260
    },
    {
      "epoch": 0.19029477033051198,
      "grad_norm": 2.0743191242218018,
      "learning_rate": 9.682794884128544e-06,
      "loss": 0.0653,
      "step": 116280
    },
    {
      "epoch": 0.1903275007691653,
      "grad_norm": 1.5688436031341553,
      "learning_rate": 9.682728991915025e-06,
      "loss": 0.0581,
      "step": 116300
    },
    {
      "epoch": 0.19036023120781864,
      "grad_norm": 1.9840322732925415,
      "learning_rate": 9.682663099701509e-06,
      "loss": 0.0523,
      "step": 116320
    },
    {
      "epoch": 0.190392961646472,
      "grad_norm": 2.5571506023406982,
      "learning_rate": 9.682597207487991e-06,
      "loss": 0.0651,
      "step": 116340
    },
    {
      "epoch": 0.19042569208512533,
      "grad_norm": 1.99705970287323,
      "learning_rate": 9.682531315274474e-06,
      "loss": 0.0603,
      "step": 116360
    },
    {
      "epoch": 0.19045842252377868,
      "grad_norm": 1.2386671304702759,
      "learning_rate": 9.682465423060956e-06,
      "loss": 0.0522,
      "step": 116380
    },
    {
      "epoch": 0.190491152962432,
      "grad_norm": 2.765479326248169,
      "learning_rate": 9.68239953084744e-06,
      "loss": 0.0628,
      "step": 116400
    },
    {
      "epoch": 0.19052388340108534,
      "grad_norm": 2.853663444519043,
      "learning_rate": 9.682333638633924e-06,
      "loss": 0.0551,
      "step": 116420
    },
    {
      "epoch": 0.19055661383973868,
      "grad_norm": 1.9057259559631348,
      "learning_rate": 9.682267746420405e-06,
      "loss": 0.0711,
      "step": 116440
    },
    {
      "epoch": 0.19058934427839203,
      "grad_norm": 1.8331061601638794,
      "learning_rate": 9.682201854206889e-06,
      "loss": 0.0776,
      "step": 116460
    },
    {
      "epoch": 0.19062207471704537,
      "grad_norm": 3.3761284351348877,
      "learning_rate": 9.682135961993373e-06,
      "loss": 0.0738,
      "step": 116480
    },
    {
      "epoch": 0.1906548051556987,
      "grad_norm": 5.0563249588012695,
      "learning_rate": 9.682070069779855e-06,
      "loss": 0.0788,
      "step": 116500
    },
    {
      "epoch": 0.19068753559435203,
      "grad_norm": 4.0470805168151855,
      "learning_rate": 9.682004177566338e-06,
      "loss": 0.0711,
      "step": 116520
    },
    {
      "epoch": 0.19072026603300538,
      "grad_norm": 2.1254465579986572,
      "learning_rate": 9.681938285352822e-06,
      "loss": 0.0505,
      "step": 116540
    },
    {
      "epoch": 0.19075299647165872,
      "grad_norm": 4.907011032104492,
      "learning_rate": 9.681872393139304e-06,
      "loss": 0.0477,
      "step": 116560
    },
    {
      "epoch": 0.19078572691031206,
      "grad_norm": 1.7638745307922363,
      "learning_rate": 9.681806500925787e-06,
      "loss": 0.0596,
      "step": 116580
    },
    {
      "epoch": 0.19081845734896538,
      "grad_norm": 3.803784132003784,
      "learning_rate": 9.681740608712269e-06,
      "loss": 0.0541,
      "step": 116600
    },
    {
      "epoch": 0.19085118778761873,
      "grad_norm": 0.5055763721466064,
      "learning_rate": 9.681674716498753e-06,
      "loss": 0.0627,
      "step": 116620
    },
    {
      "epoch": 0.19088391822627207,
      "grad_norm": 1.4452786445617676,
      "learning_rate": 9.681608824285235e-06,
      "loss": 0.0645,
      "step": 116640
    },
    {
      "epoch": 0.19091664866492541,
      "grad_norm": 1.2300574779510498,
      "learning_rate": 9.681542932071718e-06,
      "loss": 0.0825,
      "step": 116660
    },
    {
      "epoch": 0.19094937910357876,
      "grad_norm": 2.1473214626312256,
      "learning_rate": 9.6814770398582e-06,
      "loss": 0.0652,
      "step": 116680
    },
    {
      "epoch": 0.19098210954223208,
      "grad_norm": 2.502490758895874,
      "learning_rate": 9.681411147644684e-06,
      "loss": 0.0661,
      "step": 116700
    },
    {
      "epoch": 0.19101483998088542,
      "grad_norm": 2.5994672775268555,
      "learning_rate": 9.681345255431166e-06,
      "loss": 0.0602,
      "step": 116720
    },
    {
      "epoch": 0.19104757041953876,
      "grad_norm": 8.372099876403809,
      "learning_rate": 9.681279363217649e-06,
      "loss": 0.0648,
      "step": 116740
    },
    {
      "epoch": 0.1910803008581921,
      "grad_norm": 1.340782642364502,
      "learning_rate": 9.681213471004133e-06,
      "loss": 0.0676,
      "step": 116760
    },
    {
      "epoch": 0.19111303129684545,
      "grad_norm": 1.1684014797210693,
      "learning_rate": 9.681147578790615e-06,
      "loss": 0.0775,
      "step": 116780
    },
    {
      "epoch": 0.19114576173549877,
      "grad_norm": 2.1931920051574707,
      "learning_rate": 9.681081686577098e-06,
      "loss": 0.0567,
      "step": 116800
    },
    {
      "epoch": 0.19117849217415211,
      "grad_norm": 2.278562545776367,
      "learning_rate": 9.68101579436358e-06,
      "loss": 0.0745,
      "step": 116820
    },
    {
      "epoch": 0.19121122261280546,
      "grad_norm": 5.6712846755981445,
      "learning_rate": 9.680949902150064e-06,
      "loss": 0.0799,
      "step": 116840
    },
    {
      "epoch": 0.1912439530514588,
      "grad_norm": 1.4678219556808472,
      "learning_rate": 9.680884009936547e-06,
      "loss": 0.0801,
      "step": 116860
    },
    {
      "epoch": 0.19127668349011215,
      "grad_norm": 1.1971043348312378,
      "learning_rate": 9.680818117723029e-06,
      "loss": 0.0544,
      "step": 116880
    },
    {
      "epoch": 0.19130941392876546,
      "grad_norm": 3.5835657119750977,
      "learning_rate": 9.680752225509513e-06,
      "loss": 0.0802,
      "step": 116900
    },
    {
      "epoch": 0.1913421443674188,
      "grad_norm": 4.53090238571167,
      "learning_rate": 9.680686333295996e-06,
      "loss": 0.0605,
      "step": 116920
    },
    {
      "epoch": 0.19137487480607215,
      "grad_norm": 2.7614798545837402,
      "learning_rate": 9.680620441082478e-06,
      "loss": 0.0518,
      "step": 116940
    },
    {
      "epoch": 0.1914076052447255,
      "grad_norm": 2.5596530437469482,
      "learning_rate": 9.680554548868962e-06,
      "loss": 0.068,
      "step": 116960
    },
    {
      "epoch": 0.19144033568337882,
      "grad_norm": 3.421329975128174,
      "learning_rate": 9.680488656655444e-06,
      "loss": 0.0453,
      "step": 116980
    },
    {
      "epoch": 0.19147306612203216,
      "grad_norm": 4.077820777893066,
      "learning_rate": 9.680422764441927e-06,
      "loss": 0.0725,
      "step": 117000
    },
    {
      "epoch": 0.1915057965606855,
      "grad_norm": 4.82936954498291,
      "learning_rate": 9.680356872228409e-06,
      "loss": 0.0712,
      "step": 117020
    },
    {
      "epoch": 0.19153852699933885,
      "grad_norm": 4.193502426147461,
      "learning_rate": 9.680290980014893e-06,
      "loss": 0.053,
      "step": 117040
    },
    {
      "epoch": 0.1915712574379922,
      "grad_norm": 3.3717312812805176,
      "learning_rate": 9.680225087801375e-06,
      "loss": 0.0724,
      "step": 117060
    },
    {
      "epoch": 0.1916039878766455,
      "grad_norm": 3.910771369934082,
      "learning_rate": 9.680159195587858e-06,
      "loss": 0.0798,
      "step": 117080
    },
    {
      "epoch": 0.19163671831529885,
      "grad_norm": 5.520871639251709,
      "learning_rate": 9.68009330337434e-06,
      "loss": 0.0671,
      "step": 117100
    },
    {
      "epoch": 0.1916694487539522,
      "grad_norm": 2.763202428817749,
      "learning_rate": 9.680027411160824e-06,
      "loss": 0.069,
      "step": 117120
    },
    {
      "epoch": 0.19170217919260554,
      "grad_norm": 3.3065342903137207,
      "learning_rate": 9.679961518947307e-06,
      "loss": 0.0661,
      "step": 117140
    },
    {
      "epoch": 0.1917349096312589,
      "grad_norm": 1.7009626626968384,
      "learning_rate": 9.67989562673379e-06,
      "loss": 0.0695,
      "step": 117160
    },
    {
      "epoch": 0.1917676400699122,
      "grad_norm": 2.7540199756622314,
      "learning_rate": 9.679829734520273e-06,
      "loss": 0.071,
      "step": 117180
    },
    {
      "epoch": 0.19180037050856555,
      "grad_norm": 1.4468462467193604,
      "learning_rate": 9.679763842306755e-06,
      "loss": 0.0579,
      "step": 117200
    },
    {
      "epoch": 0.1918331009472189,
      "grad_norm": 3.2470672130584717,
      "learning_rate": 9.679697950093238e-06,
      "loss": 0.0672,
      "step": 117220
    },
    {
      "epoch": 0.19186583138587224,
      "grad_norm": 4.773086071014404,
      "learning_rate": 9.67963205787972e-06,
      "loss": 0.0545,
      "step": 117240
    },
    {
      "epoch": 0.19189856182452558,
      "grad_norm": 2.181201696395874,
      "learning_rate": 9.679566165666204e-06,
      "loss": 0.0576,
      "step": 117260
    },
    {
      "epoch": 0.1919312922631789,
      "grad_norm": 4.35264253616333,
      "learning_rate": 9.679500273452687e-06,
      "loss": 0.0613,
      "step": 117280
    },
    {
      "epoch": 0.19196402270183224,
      "grad_norm": 2.082029104232788,
      "learning_rate": 9.67943438123917e-06,
      "loss": 0.0814,
      "step": 117300
    },
    {
      "epoch": 0.1919967531404856,
      "grad_norm": 3.241227149963379,
      "learning_rate": 9.679368489025653e-06,
      "loss": 0.0585,
      "step": 117320
    },
    {
      "epoch": 0.19202948357913893,
      "grad_norm": 1.7458316087722778,
      "learning_rate": 9.679302596812136e-06,
      "loss": 0.0737,
      "step": 117340
    },
    {
      "epoch": 0.19206221401779228,
      "grad_norm": 5.315060615539551,
      "learning_rate": 9.679236704598618e-06,
      "loss": 0.0659,
      "step": 117360
    },
    {
      "epoch": 0.1920949444564456,
      "grad_norm": 4.114936351776123,
      "learning_rate": 9.679170812385102e-06,
      "loss": 0.0951,
      "step": 117380
    },
    {
      "epoch": 0.19212767489509894,
      "grad_norm": 0.9815667867660522,
      "learning_rate": 9.679104920171584e-06,
      "loss": 0.0704,
      "step": 117400
    },
    {
      "epoch": 0.19216040533375228,
      "grad_norm": 2.236565589904785,
      "learning_rate": 9.679039027958067e-06,
      "loss": 0.063,
      "step": 117420
    },
    {
      "epoch": 0.19219313577240563,
      "grad_norm": 2.3669283390045166,
      "learning_rate": 9.67897313574455e-06,
      "loss": 0.0643,
      "step": 117440
    },
    {
      "epoch": 0.19222586621105897,
      "grad_norm": 2.39536190032959,
      "learning_rate": 9.678907243531033e-06,
      "loss": 0.0793,
      "step": 117460
    },
    {
      "epoch": 0.1922585966497123,
      "grad_norm": 6.692952632904053,
      "learning_rate": 9.678841351317516e-06,
      "loss": 0.0732,
      "step": 117480
    },
    {
      "epoch": 0.19229132708836563,
      "grad_norm": 1.4926230907440186,
      "learning_rate": 9.678775459103998e-06,
      "loss": 0.0702,
      "step": 117500
    },
    {
      "epoch": 0.19232405752701898,
      "grad_norm": 3.1288905143737793,
      "learning_rate": 9.678709566890482e-06,
      "loss": 0.0546,
      "step": 117520
    },
    {
      "epoch": 0.19235678796567232,
      "grad_norm": 1.8039840459823608,
      "learning_rate": 9.678643674676964e-06,
      "loss": 0.0565,
      "step": 117540
    },
    {
      "epoch": 0.19238951840432567,
      "grad_norm": 7.523128032684326,
      "learning_rate": 9.678577782463447e-06,
      "loss": 0.0439,
      "step": 117560
    },
    {
      "epoch": 0.19242224884297898,
      "grad_norm": 2.9037892818450928,
      "learning_rate": 9.67851189024993e-06,
      "loss": 0.0643,
      "step": 117580
    },
    {
      "epoch": 0.19245497928163233,
      "grad_norm": 2.594003200531006,
      "learning_rate": 9.678445998036413e-06,
      "loss": 0.0921,
      "step": 117600
    },
    {
      "epoch": 0.19248770972028567,
      "grad_norm": 14.90468692779541,
      "learning_rate": 9.678380105822895e-06,
      "loss": 0.0631,
      "step": 117620
    },
    {
      "epoch": 0.19252044015893902,
      "grad_norm": 4.142923355102539,
      "learning_rate": 9.678314213609378e-06,
      "loss": 0.0632,
      "step": 117640
    },
    {
      "epoch": 0.19255317059759236,
      "grad_norm": 2.1244149208068848,
      "learning_rate": 9.678248321395862e-06,
      "loss": 0.0638,
      "step": 117660
    },
    {
      "epoch": 0.19258590103624568,
      "grad_norm": 1.9492664337158203,
      "learning_rate": 9.678182429182344e-06,
      "loss": 0.055,
      "step": 117680
    },
    {
      "epoch": 0.19261863147489902,
      "grad_norm": 2.298987865447998,
      "learning_rate": 9.678116536968827e-06,
      "loss": 0.0698,
      "step": 117700
    },
    {
      "epoch": 0.19265136191355237,
      "grad_norm": 1.1985292434692383,
      "learning_rate": 9.678050644755311e-06,
      "loss": 0.0643,
      "step": 117720
    },
    {
      "epoch": 0.1926840923522057,
      "grad_norm": 2.4118542671203613,
      "learning_rate": 9.677984752541793e-06,
      "loss": 0.0714,
      "step": 117740
    },
    {
      "epoch": 0.19271682279085905,
      "grad_norm": 1.5876390933990479,
      "learning_rate": 9.677918860328276e-06,
      "loss": 0.0698,
      "step": 117760
    },
    {
      "epoch": 0.19274955322951237,
      "grad_norm": 2.6972053050994873,
      "learning_rate": 9.677852968114758e-06,
      "loss": 0.0719,
      "step": 117780
    },
    {
      "epoch": 0.19278228366816572,
      "grad_norm": 5.726757526397705,
      "learning_rate": 9.677787075901242e-06,
      "loss": 0.0852,
      "step": 117800
    },
    {
      "epoch": 0.19281501410681906,
      "grad_norm": 0.46734219789505005,
      "learning_rate": 9.677721183687726e-06,
      "loss": 0.0569,
      "step": 117820
    },
    {
      "epoch": 0.1928477445454724,
      "grad_norm": 3.6892268657684326,
      "learning_rate": 9.677655291474207e-06,
      "loss": 0.0676,
      "step": 117840
    },
    {
      "epoch": 0.19288047498412575,
      "grad_norm": 2.5513391494750977,
      "learning_rate": 9.677589399260691e-06,
      "loss": 0.0641,
      "step": 117860
    },
    {
      "epoch": 0.19291320542277907,
      "grad_norm": 4.716724872589111,
      "learning_rate": 9.677523507047173e-06,
      "loss": 0.0626,
      "step": 117880
    },
    {
      "epoch": 0.1929459358614324,
      "grad_norm": 2.375011682510376,
      "learning_rate": 9.677457614833656e-06,
      "loss": 0.0594,
      "step": 117900
    },
    {
      "epoch": 0.19297866630008575,
      "grad_norm": 5.936117649078369,
      "learning_rate": 9.677391722620138e-06,
      "loss": 0.0643,
      "step": 117920
    },
    {
      "epoch": 0.1930113967387391,
      "grad_norm": 3.1095898151397705,
      "learning_rate": 9.677325830406622e-06,
      "loss": 0.0635,
      "step": 117940
    },
    {
      "epoch": 0.19304412717739244,
      "grad_norm": 3.5758814811706543,
      "learning_rate": 9.677259938193104e-06,
      "loss": 0.073,
      "step": 117960
    },
    {
      "epoch": 0.19307685761604576,
      "grad_norm": 7.30776834487915,
      "learning_rate": 9.677194045979587e-06,
      "loss": 0.0571,
      "step": 117980
    },
    {
      "epoch": 0.1931095880546991,
      "grad_norm": 2.5392704010009766,
      "learning_rate": 9.67712815376607e-06,
      "loss": 0.0748,
      "step": 118000
    },
    {
      "epoch": 0.19314231849335245,
      "grad_norm": 1.4466429948806763,
      "learning_rate": 9.677062261552553e-06,
      "loss": 0.0593,
      "step": 118020
    },
    {
      "epoch": 0.1931750489320058,
      "grad_norm": 2.3756561279296875,
      "learning_rate": 9.676996369339035e-06,
      "loss": 0.0738,
      "step": 118040
    },
    {
      "epoch": 0.19320777937065914,
      "grad_norm": 7.604971408843994,
      "learning_rate": 9.676930477125518e-06,
      "loss": 0.0711,
      "step": 118060
    },
    {
      "epoch": 0.19324050980931246,
      "grad_norm": 3.6943023204803467,
      "learning_rate": 9.676864584912002e-06,
      "loss": 0.0673,
      "step": 118080
    },
    {
      "epoch": 0.1932732402479658,
      "grad_norm": 6.470433712005615,
      "learning_rate": 9.676798692698484e-06,
      "loss": 0.0788,
      "step": 118100
    },
    {
      "epoch": 0.19330597068661914,
      "grad_norm": 1.2657544612884521,
      "learning_rate": 9.676732800484967e-06,
      "loss": 0.0623,
      "step": 118120
    },
    {
      "epoch": 0.1933387011252725,
      "grad_norm": 0.7688669562339783,
      "learning_rate": 9.676666908271451e-06,
      "loss": 0.0658,
      "step": 118140
    },
    {
      "epoch": 0.19337143156392583,
      "grad_norm": 3.260211229324341,
      "learning_rate": 9.676601016057933e-06,
      "loss": 0.0504,
      "step": 118160
    },
    {
      "epoch": 0.19340416200257915,
      "grad_norm": 3.8707990646362305,
      "learning_rate": 9.676535123844417e-06,
      "loss": 0.0777,
      "step": 118180
    },
    {
      "epoch": 0.1934368924412325,
      "grad_norm": 2.1009581089019775,
      "learning_rate": 9.6764692316309e-06,
      "loss": 0.0594,
      "step": 118200
    },
    {
      "epoch": 0.19346962287988584,
      "grad_norm": 2.4734251499176025,
      "learning_rate": 9.676403339417382e-06,
      "loss": 0.0636,
      "step": 118220
    },
    {
      "epoch": 0.19350235331853918,
      "grad_norm": 3.126732349395752,
      "learning_rate": 9.676337447203866e-06,
      "loss": 0.0716,
      "step": 118240
    },
    {
      "epoch": 0.19353508375719253,
      "grad_norm": 2.6848456859588623,
      "learning_rate": 9.676271554990347e-06,
      "loss": 0.0708,
      "step": 118260
    },
    {
      "epoch": 0.19356781419584584,
      "grad_norm": 1.8792954683303833,
      "learning_rate": 9.676205662776831e-06,
      "loss": 0.0631,
      "step": 118280
    },
    {
      "epoch": 0.1936005446344992,
      "grad_norm": 3.446732997894287,
      "learning_rate": 9.676139770563313e-06,
      "loss": 0.0508,
      "step": 118300
    },
    {
      "epoch": 0.19363327507315253,
      "grad_norm": 2.520932912826538,
      "learning_rate": 9.676073878349797e-06,
      "loss": 0.0522,
      "step": 118320
    },
    {
      "epoch": 0.19366600551180588,
      "grad_norm": 1.334028959274292,
      "learning_rate": 9.676007986136278e-06,
      "loss": 0.0475,
      "step": 118340
    },
    {
      "epoch": 0.1936987359504592,
      "grad_norm": 6.223057746887207,
      "learning_rate": 9.675942093922762e-06,
      "loss": 0.0643,
      "step": 118360
    },
    {
      "epoch": 0.19373146638911254,
      "grad_norm": 0.6909803152084351,
      "learning_rate": 9.675876201709244e-06,
      "loss": 0.0501,
      "step": 118380
    },
    {
      "epoch": 0.19376419682776588,
      "grad_norm": 4.254077911376953,
      "learning_rate": 9.675810309495728e-06,
      "loss": 0.0544,
      "step": 118400
    },
    {
      "epoch": 0.19379692726641923,
      "grad_norm": 10.136589050292969,
      "learning_rate": 9.67574441728221e-06,
      "loss": 0.0618,
      "step": 118420
    },
    {
      "epoch": 0.19382965770507257,
      "grad_norm": 7.147480010986328,
      "learning_rate": 9.675678525068693e-06,
      "loss": 0.0675,
      "step": 118440
    },
    {
      "epoch": 0.1938623881437259,
      "grad_norm": 1.6134586334228516,
      "learning_rate": 9.675612632855177e-06,
      "loss": 0.0595,
      "step": 118460
    },
    {
      "epoch": 0.19389511858237923,
      "grad_norm": 1.3075056076049805,
      "learning_rate": 9.675546740641658e-06,
      "loss": 0.0704,
      "step": 118480
    },
    {
      "epoch": 0.19392784902103258,
      "grad_norm": 2.741455078125,
      "learning_rate": 9.675480848428142e-06,
      "loss": 0.0656,
      "step": 118500
    },
    {
      "epoch": 0.19396057945968592,
      "grad_norm": 8.912002563476562,
      "learning_rate": 9.675414956214626e-06,
      "loss": 0.0642,
      "step": 118520
    },
    {
      "epoch": 0.19399330989833927,
      "grad_norm": 1.3090307712554932,
      "learning_rate": 9.675349064001108e-06,
      "loss": 0.0643,
      "step": 118540
    },
    {
      "epoch": 0.19402604033699258,
      "grad_norm": 2.6179490089416504,
      "learning_rate": 9.675283171787591e-06,
      "loss": 0.0691,
      "step": 118560
    },
    {
      "epoch": 0.19405877077564593,
      "grad_norm": 1.3334211111068726,
      "learning_rate": 9.675217279574075e-06,
      "loss": 0.0641,
      "step": 118580
    },
    {
      "epoch": 0.19409150121429927,
      "grad_norm": 10.401701927185059,
      "learning_rate": 9.675151387360557e-06,
      "loss": 0.0625,
      "step": 118600
    },
    {
      "epoch": 0.19412423165295262,
      "grad_norm": 3.29890513420105,
      "learning_rate": 9.67508549514704e-06,
      "loss": 0.0673,
      "step": 118620
    },
    {
      "epoch": 0.19415696209160596,
      "grad_norm": 5.717773914337158,
      "learning_rate": 9.675019602933522e-06,
      "loss": 0.0819,
      "step": 118640
    },
    {
      "epoch": 0.19418969253025928,
      "grad_norm": 4.735054969787598,
      "learning_rate": 9.674953710720006e-06,
      "loss": 0.0804,
      "step": 118660
    },
    {
      "epoch": 0.19422242296891262,
      "grad_norm": 3.82987117767334,
      "learning_rate": 9.674887818506488e-06,
      "loss": 0.0432,
      "step": 118680
    },
    {
      "epoch": 0.19425515340756597,
      "grad_norm": 5.016881942749023,
      "learning_rate": 9.674821926292971e-06,
      "loss": 0.095,
      "step": 118700
    },
    {
      "epoch": 0.1942878838462193,
      "grad_norm": 1.6895503997802734,
      "learning_rate": 9.674756034079453e-06,
      "loss": 0.0638,
      "step": 118720
    },
    {
      "epoch": 0.19432061428487266,
      "grad_norm": 2.4227232933044434,
      "learning_rate": 9.674690141865937e-06,
      "loss": 0.0752,
      "step": 118740
    },
    {
      "epoch": 0.19435334472352597,
      "grad_norm": 1.536299228668213,
      "learning_rate": 9.674624249652419e-06,
      "loss": 0.0524,
      "step": 118760
    },
    {
      "epoch": 0.19438607516217932,
      "grad_norm": 3.770193338394165,
      "learning_rate": 9.674558357438902e-06,
      "loss": 0.0623,
      "step": 118780
    },
    {
      "epoch": 0.19441880560083266,
      "grad_norm": 1.2865514755249023,
      "learning_rate": 9.674492465225384e-06,
      "loss": 0.069,
      "step": 118800
    },
    {
      "epoch": 0.194451536039486,
      "grad_norm": 0.866752028465271,
      "learning_rate": 9.674426573011868e-06,
      "loss": 0.0545,
      "step": 118820
    },
    {
      "epoch": 0.19448426647813935,
      "grad_norm": 2.089050054550171,
      "learning_rate": 9.674360680798351e-06,
      "loss": 0.0493,
      "step": 118840
    },
    {
      "epoch": 0.19451699691679267,
      "grad_norm": 3.9557511806488037,
      "learning_rate": 9.674294788584833e-06,
      "loss": 0.082,
      "step": 118860
    },
    {
      "epoch": 0.194549727355446,
      "grad_norm": 1.491443157196045,
      "learning_rate": 9.674228896371317e-06,
      "loss": 0.0645,
      "step": 118880
    },
    {
      "epoch": 0.19458245779409936,
      "grad_norm": 3.613495349884033,
      "learning_rate": 9.6741630041578e-06,
      "loss": 0.0724,
      "step": 118900
    },
    {
      "epoch": 0.1946151882327527,
      "grad_norm": 3.4637322425842285,
      "learning_rate": 9.674097111944282e-06,
      "loss": 0.077,
      "step": 118920
    },
    {
      "epoch": 0.19464791867140604,
      "grad_norm": 2.6689553260803223,
      "learning_rate": 9.674031219730766e-06,
      "loss": 0.0575,
      "step": 118940
    },
    {
      "epoch": 0.19468064911005936,
      "grad_norm": 3.450990915298462,
      "learning_rate": 9.67396532751725e-06,
      "loss": 0.0891,
      "step": 118960
    },
    {
      "epoch": 0.1947133795487127,
      "grad_norm": 1.0235376358032227,
      "learning_rate": 9.673899435303731e-06,
      "loss": 0.0606,
      "step": 118980
    },
    {
      "epoch": 0.19474610998736605,
      "grad_norm": 1.5991270542144775,
      "learning_rate": 9.673833543090215e-06,
      "loss": 0.0733,
      "step": 119000
    },
    {
      "epoch": 0.1947788404260194,
      "grad_norm": 3.4833602905273438,
      "learning_rate": 9.673767650876697e-06,
      "loss": 0.072,
      "step": 119020
    },
    {
      "epoch": 0.19481157086467274,
      "grad_norm": 1.308937907218933,
      "learning_rate": 9.67370175866318e-06,
      "loss": 0.0664,
      "step": 119040
    },
    {
      "epoch": 0.19484430130332606,
      "grad_norm": 1.7619966268539429,
      "learning_rate": 9.673635866449662e-06,
      "loss": 0.0623,
      "step": 119060
    },
    {
      "epoch": 0.1948770317419794,
      "grad_norm": 8.370177268981934,
      "learning_rate": 9.673569974236146e-06,
      "loss": 0.0706,
      "step": 119080
    },
    {
      "epoch": 0.19490976218063275,
      "grad_norm": 4.39669942855835,
      "learning_rate": 9.673504082022628e-06,
      "loss": 0.0798,
      "step": 119100
    },
    {
      "epoch": 0.1949424926192861,
      "grad_norm": 2.244067907333374,
      "learning_rate": 9.673438189809111e-06,
      "loss": 0.0587,
      "step": 119120
    },
    {
      "epoch": 0.19497522305793943,
      "grad_norm": 1.8906445503234863,
      "learning_rate": 9.673372297595593e-06,
      "loss": 0.0591,
      "step": 119140
    },
    {
      "epoch": 0.19500795349659275,
      "grad_norm": 7.4757280349731445,
      "learning_rate": 9.673306405382077e-06,
      "loss": 0.0625,
      "step": 119160
    },
    {
      "epoch": 0.1950406839352461,
      "grad_norm": 0.5080780982971191,
      "learning_rate": 9.673240513168559e-06,
      "loss": 0.0604,
      "step": 119180
    },
    {
      "epoch": 0.19507341437389944,
      "grad_norm": 1.1555558443069458,
      "learning_rate": 9.673174620955042e-06,
      "loss": 0.0631,
      "step": 119200
    },
    {
      "epoch": 0.19510614481255278,
      "grad_norm": 2.3556368350982666,
      "learning_rate": 9.673108728741526e-06,
      "loss": 0.0654,
      "step": 119220
    },
    {
      "epoch": 0.19513887525120613,
      "grad_norm": 5.230129718780518,
      "learning_rate": 9.673042836528008e-06,
      "loss": 0.0557,
      "step": 119240
    },
    {
      "epoch": 0.19517160568985945,
      "grad_norm": 1.8452099561691284,
      "learning_rate": 9.672976944314491e-06,
      "loss": 0.0679,
      "step": 119260
    },
    {
      "epoch": 0.1952043361285128,
      "grad_norm": 2.9186089038848877,
      "learning_rate": 9.672911052100973e-06,
      "loss": 0.0639,
      "step": 119280
    },
    {
      "epoch": 0.19523706656716613,
      "grad_norm": 4.006695747375488,
      "learning_rate": 9.672845159887457e-06,
      "loss": 0.0582,
      "step": 119300
    },
    {
      "epoch": 0.19526979700581948,
      "grad_norm": 1.173096776008606,
      "learning_rate": 9.67277926767394e-06,
      "loss": 0.0823,
      "step": 119320
    },
    {
      "epoch": 0.19530252744447282,
      "grad_norm": 1.2702324390411377,
      "learning_rate": 9.672713375460422e-06,
      "loss": 0.0584,
      "step": 119340
    },
    {
      "epoch": 0.19533525788312614,
      "grad_norm": 1.4650930166244507,
      "learning_rate": 9.672647483246906e-06,
      "loss": 0.0586,
      "step": 119360
    },
    {
      "epoch": 0.19536798832177948,
      "grad_norm": 1.726833701133728,
      "learning_rate": 9.67258159103339e-06,
      "loss": 0.0584,
      "step": 119380
    },
    {
      "epoch": 0.19540071876043283,
      "grad_norm": 6.950833320617676,
      "learning_rate": 9.672515698819871e-06,
      "loss": 0.0619,
      "step": 119400
    },
    {
      "epoch": 0.19543344919908617,
      "grad_norm": 3.1618359088897705,
      "learning_rate": 9.672449806606355e-06,
      "loss": 0.0625,
      "step": 119420
    },
    {
      "epoch": 0.19546617963773952,
      "grad_norm": 6.558014392852783,
      "learning_rate": 9.672383914392837e-06,
      "loss": 0.0587,
      "step": 119440
    },
    {
      "epoch": 0.19549891007639283,
      "grad_norm": 0.9828362464904785,
      "learning_rate": 9.67231802217932e-06,
      "loss": 0.0436,
      "step": 119460
    },
    {
      "epoch": 0.19553164051504618,
      "grad_norm": 0.6447873115539551,
      "learning_rate": 9.672252129965802e-06,
      "loss": 0.05,
      "step": 119480
    },
    {
      "epoch": 0.19556437095369952,
      "grad_norm": 2.908846616744995,
      "learning_rate": 9.672186237752286e-06,
      "loss": 0.0752,
      "step": 119500
    },
    {
      "epoch": 0.19559710139235287,
      "grad_norm": 0.710318922996521,
      "learning_rate": 9.672120345538768e-06,
      "loss": 0.0709,
      "step": 119520
    },
    {
      "epoch": 0.1956298318310062,
      "grad_norm": 4.88331413269043,
      "learning_rate": 9.672054453325251e-06,
      "loss": 0.075,
      "step": 119540
    },
    {
      "epoch": 0.19566256226965953,
      "grad_norm": 2.431828260421753,
      "learning_rate": 9.671988561111733e-06,
      "loss": 0.0502,
      "step": 119560
    },
    {
      "epoch": 0.19569529270831287,
      "grad_norm": 1.5148966312408447,
      "learning_rate": 9.671922668898217e-06,
      "loss": 0.0687,
      "step": 119580
    },
    {
      "epoch": 0.19572802314696622,
      "grad_norm": 1.6520075798034668,
      "learning_rate": 9.6718567766847e-06,
      "loss": 0.0679,
      "step": 119600
    },
    {
      "epoch": 0.19576075358561956,
      "grad_norm": 2.3662455081939697,
      "learning_rate": 9.671790884471182e-06,
      "loss": 0.067,
      "step": 119620
    },
    {
      "epoch": 0.1957934840242729,
      "grad_norm": 1.7552149295806885,
      "learning_rate": 9.671724992257666e-06,
      "loss": 0.0496,
      "step": 119640
    },
    {
      "epoch": 0.19582621446292622,
      "grad_norm": 0.44923505187034607,
      "learning_rate": 9.671659100044148e-06,
      "loss": 0.0545,
      "step": 119660
    },
    {
      "epoch": 0.19585894490157957,
      "grad_norm": 7.694098472595215,
      "learning_rate": 9.671593207830631e-06,
      "loss": 0.0569,
      "step": 119680
    },
    {
      "epoch": 0.1958916753402329,
      "grad_norm": 2.64766001701355,
      "learning_rate": 9.671527315617115e-06,
      "loss": 0.0554,
      "step": 119700
    },
    {
      "epoch": 0.19592440577888626,
      "grad_norm": 2.9733126163482666,
      "learning_rate": 9.671461423403597e-06,
      "loss": 0.0661,
      "step": 119720
    },
    {
      "epoch": 0.19595713621753957,
      "grad_norm": 1.2268192768096924,
      "learning_rate": 9.67139553119008e-06,
      "loss": 0.0615,
      "step": 119740
    },
    {
      "epoch": 0.19598986665619292,
      "grad_norm": 8.92491340637207,
      "learning_rate": 9.671329638976564e-06,
      "loss": 0.0546,
      "step": 119760
    },
    {
      "epoch": 0.19602259709484626,
      "grad_norm": 1.948377013206482,
      "learning_rate": 9.671263746763046e-06,
      "loss": 0.0773,
      "step": 119780
    },
    {
      "epoch": 0.1960553275334996,
      "grad_norm": 1.7134202718734741,
      "learning_rate": 9.67119785454953e-06,
      "loss": 0.0918,
      "step": 119800
    },
    {
      "epoch": 0.19608805797215295,
      "grad_norm": 4.722006320953369,
      "learning_rate": 9.671131962336011e-06,
      "loss": 0.0521,
      "step": 119820
    },
    {
      "epoch": 0.19612078841080627,
      "grad_norm": 2.753380537033081,
      "learning_rate": 9.671066070122495e-06,
      "loss": 0.0627,
      "step": 119840
    },
    {
      "epoch": 0.1961535188494596,
      "grad_norm": 6.826362609863281,
      "learning_rate": 9.671000177908977e-06,
      "loss": 0.0392,
      "step": 119860
    },
    {
      "epoch": 0.19618624928811296,
      "grad_norm": 1.5462071895599365,
      "learning_rate": 9.67093428569546e-06,
      "loss": 0.0641,
      "step": 119880
    },
    {
      "epoch": 0.1962189797267663,
      "grad_norm": 2.225529909133911,
      "learning_rate": 9.670868393481942e-06,
      "loss": 0.0561,
      "step": 119900
    },
    {
      "epoch": 0.19625171016541965,
      "grad_norm": 1.1748899221420288,
      "learning_rate": 9.670802501268426e-06,
      "loss": 0.0455,
      "step": 119920
    },
    {
      "epoch": 0.19628444060407296,
      "grad_norm": 2.668015480041504,
      "learning_rate": 9.67073660905491e-06,
      "loss": 0.0513,
      "step": 119940
    },
    {
      "epoch": 0.1963171710427263,
      "grad_norm": 2.37095046043396,
      "learning_rate": 9.670670716841391e-06,
      "loss": 0.0586,
      "step": 119960
    },
    {
      "epoch": 0.19634990148137965,
      "grad_norm": 3.145002603530884,
      "learning_rate": 9.670604824627875e-06,
      "loss": 0.0788,
      "step": 119980
    },
    {
      "epoch": 0.196382631920033,
      "grad_norm": 2.1244845390319824,
      "learning_rate": 9.670538932414357e-06,
      "loss": 0.0575,
      "step": 120000
    },
    {
      "epoch": 0.19641536235868634,
      "grad_norm": 2.000352621078491,
      "learning_rate": 9.67047304020084e-06,
      "loss": 0.0615,
      "step": 120020
    },
    {
      "epoch": 0.19644809279733966,
      "grad_norm": 1.4842292070388794,
      "learning_rate": 9.670407147987322e-06,
      "loss": 0.0622,
      "step": 120040
    },
    {
      "epoch": 0.196480823235993,
      "grad_norm": 2.093324899673462,
      "learning_rate": 9.670341255773806e-06,
      "loss": 0.0548,
      "step": 120060
    },
    {
      "epoch": 0.19651355367464635,
      "grad_norm": 0.893266499042511,
      "learning_rate": 9.670275363560288e-06,
      "loss": 0.0747,
      "step": 120080
    },
    {
      "epoch": 0.1965462841132997,
      "grad_norm": 1.517627477645874,
      "learning_rate": 9.670209471346771e-06,
      "loss": 0.0554,
      "step": 120100
    },
    {
      "epoch": 0.19657901455195304,
      "grad_norm": 3.2355525493621826,
      "learning_rate": 9.670143579133255e-06,
      "loss": 0.068,
      "step": 120120
    },
    {
      "epoch": 0.19661174499060635,
      "grad_norm": 2.4980618953704834,
      "learning_rate": 9.670077686919737e-06,
      "loss": 0.0618,
      "step": 120140
    },
    {
      "epoch": 0.1966444754292597,
      "grad_norm": 9.35640811920166,
      "learning_rate": 9.67001179470622e-06,
      "loss": 0.0668,
      "step": 120160
    },
    {
      "epoch": 0.19667720586791304,
      "grad_norm": 4.854251861572266,
      "learning_rate": 9.669945902492704e-06,
      "loss": 0.0678,
      "step": 120180
    },
    {
      "epoch": 0.19670993630656639,
      "grad_norm": 2.4108076095581055,
      "learning_rate": 9.669880010279186e-06,
      "loss": 0.0628,
      "step": 120200
    },
    {
      "epoch": 0.19674266674521973,
      "grad_norm": 1.8860806226730347,
      "learning_rate": 9.66981411806567e-06,
      "loss": 0.0474,
      "step": 120220
    },
    {
      "epoch": 0.19677539718387305,
      "grad_norm": 2.2487518787384033,
      "learning_rate": 9.669748225852151e-06,
      "loss": 0.0584,
      "step": 120240
    },
    {
      "epoch": 0.1968081276225264,
      "grad_norm": 3.93007230758667,
      "learning_rate": 9.669682333638635e-06,
      "loss": 0.0627,
      "step": 120260
    },
    {
      "epoch": 0.19684085806117974,
      "grad_norm": 3.4256808757781982,
      "learning_rate": 9.669616441425119e-06,
      "loss": 0.0574,
      "step": 120280
    },
    {
      "epoch": 0.19687358849983308,
      "grad_norm": 2.1833088397979736,
      "learning_rate": 9.6695505492116e-06,
      "loss": 0.0595,
      "step": 120300
    },
    {
      "epoch": 0.19690631893848642,
      "grad_norm": 3.224698781967163,
      "learning_rate": 9.669484656998084e-06,
      "loss": 0.0699,
      "step": 120320
    },
    {
      "epoch": 0.19693904937713974,
      "grad_norm": 2.177528142929077,
      "learning_rate": 9.669418764784566e-06,
      "loss": 0.0818,
      "step": 120340
    },
    {
      "epoch": 0.19697177981579309,
      "grad_norm": 0.7769082188606262,
      "learning_rate": 9.66935287257105e-06,
      "loss": 0.0849,
      "step": 120360
    },
    {
      "epoch": 0.19700451025444643,
      "grad_norm": 1.59627366065979,
      "learning_rate": 9.669286980357531e-06,
      "loss": 0.0792,
      "step": 120380
    },
    {
      "epoch": 0.19703724069309977,
      "grad_norm": 2.4749040603637695,
      "learning_rate": 9.669221088144015e-06,
      "loss": 0.0484,
      "step": 120400
    },
    {
      "epoch": 0.19706997113175312,
      "grad_norm": 1.2690767049789429,
      "learning_rate": 9.669155195930497e-06,
      "loss": 0.077,
      "step": 120420
    },
    {
      "epoch": 0.19710270157040644,
      "grad_norm": 5.667697429656982,
      "learning_rate": 9.66908930371698e-06,
      "loss": 0.0639,
      "step": 120440
    },
    {
      "epoch": 0.19713543200905978,
      "grad_norm": 4.62340784072876,
      "learning_rate": 9.669023411503462e-06,
      "loss": 0.0667,
      "step": 120460
    },
    {
      "epoch": 0.19716816244771312,
      "grad_norm": 2.4299252033233643,
      "learning_rate": 9.668957519289946e-06,
      "loss": 0.0695,
      "step": 120480
    },
    {
      "epoch": 0.19720089288636647,
      "grad_norm": 2.9849960803985596,
      "learning_rate": 9.66889162707643e-06,
      "loss": 0.0584,
      "step": 120500
    },
    {
      "epoch": 0.1972336233250198,
      "grad_norm": 1.452499508857727,
      "learning_rate": 9.668825734862911e-06,
      "loss": 0.0573,
      "step": 120520
    },
    {
      "epoch": 0.19726635376367313,
      "grad_norm": 4.584425926208496,
      "learning_rate": 9.668759842649395e-06,
      "loss": 0.0588,
      "step": 120540
    },
    {
      "epoch": 0.19729908420232647,
      "grad_norm": 2.0273172855377197,
      "learning_rate": 9.668693950435879e-06,
      "loss": 0.0587,
      "step": 120560
    },
    {
      "epoch": 0.19733181464097982,
      "grad_norm": 2.2083992958068848,
      "learning_rate": 9.66862805822236e-06,
      "loss": 0.0639,
      "step": 120580
    },
    {
      "epoch": 0.19736454507963316,
      "grad_norm": 0.707281231880188,
      "learning_rate": 9.668562166008844e-06,
      "loss": 0.0462,
      "step": 120600
    },
    {
      "epoch": 0.1973972755182865,
      "grad_norm": 5.152617931365967,
      "learning_rate": 9.668496273795326e-06,
      "loss": 0.0646,
      "step": 120620
    },
    {
      "epoch": 0.19743000595693982,
      "grad_norm": 3.950246572494507,
      "learning_rate": 9.66843038158181e-06,
      "loss": 0.0648,
      "step": 120640
    },
    {
      "epoch": 0.19746273639559317,
      "grad_norm": 3.1427018642425537,
      "learning_rate": 9.668364489368293e-06,
      "loss": 0.0654,
      "step": 120660
    },
    {
      "epoch": 0.1974954668342465,
      "grad_norm": 6.885496139526367,
      "learning_rate": 9.668298597154775e-06,
      "loss": 0.0549,
      "step": 120680
    },
    {
      "epoch": 0.19752819727289986,
      "grad_norm": 4.024322509765625,
      "learning_rate": 9.668232704941259e-06,
      "loss": 0.0707,
      "step": 120700
    },
    {
      "epoch": 0.1975609277115532,
      "grad_norm": 2.30558443069458,
      "learning_rate": 9.66816681272774e-06,
      "loss": 0.0601,
      "step": 120720
    },
    {
      "epoch": 0.19759365815020652,
      "grad_norm": 3.64115309715271,
      "learning_rate": 9.668100920514224e-06,
      "loss": 0.0558,
      "step": 120740
    },
    {
      "epoch": 0.19762638858885986,
      "grad_norm": 1.4439678192138672,
      "learning_rate": 9.668035028300706e-06,
      "loss": 0.0581,
      "step": 120760
    },
    {
      "epoch": 0.1976591190275132,
      "grad_norm": 3.935223340988159,
      "learning_rate": 9.66796913608719e-06,
      "loss": 0.0691,
      "step": 120780
    },
    {
      "epoch": 0.19769184946616655,
      "grad_norm": 1.5565500259399414,
      "learning_rate": 9.667903243873672e-06,
      "loss": 0.0802,
      "step": 120800
    },
    {
      "epoch": 0.1977245799048199,
      "grad_norm": 1.1730555295944214,
      "learning_rate": 9.667837351660155e-06,
      "loss": 0.0725,
      "step": 120820
    },
    {
      "epoch": 0.1977573103434732,
      "grad_norm": 0.7624600529670715,
      "learning_rate": 9.667771459446637e-06,
      "loss": 0.0711,
      "step": 120840
    },
    {
      "epoch": 0.19779004078212656,
      "grad_norm": 3.3470003604888916,
      "learning_rate": 9.66770556723312e-06,
      "loss": 0.0597,
      "step": 120860
    },
    {
      "epoch": 0.1978227712207799,
      "grad_norm": 3.6446645259857178,
      "learning_rate": 9.667639675019602e-06,
      "loss": 0.0526,
      "step": 120880
    },
    {
      "epoch": 0.19785550165943325,
      "grad_norm": 1.7112889289855957,
      "learning_rate": 9.667573782806086e-06,
      "loss": 0.0601,
      "step": 120900
    },
    {
      "epoch": 0.1978882320980866,
      "grad_norm": 2.686859607696533,
      "learning_rate": 9.66750789059257e-06,
      "loss": 0.0681,
      "step": 120920
    },
    {
      "epoch": 0.1979209625367399,
      "grad_norm": 2.222644805908203,
      "learning_rate": 9.667441998379053e-06,
      "loss": 0.0568,
      "step": 120940
    },
    {
      "epoch": 0.19795369297539325,
      "grad_norm": 2.610083818435669,
      "learning_rate": 9.667376106165535e-06,
      "loss": 0.0799,
      "step": 120960
    },
    {
      "epoch": 0.1979864234140466,
      "grad_norm": 2.83424711227417,
      "learning_rate": 9.667310213952019e-06,
      "loss": 0.0858,
      "step": 120980
    },
    {
      "epoch": 0.19801915385269994,
      "grad_norm": 2.365220785140991,
      "learning_rate": 9.667244321738502e-06,
      "loss": 0.0748,
      "step": 121000
    },
    {
      "epoch": 0.1980518842913533,
      "grad_norm": 1.1016918420791626,
      "learning_rate": 9.667178429524984e-06,
      "loss": 0.0722,
      "step": 121020
    },
    {
      "epoch": 0.1980846147300066,
      "grad_norm": 2.3908472061157227,
      "learning_rate": 9.667112537311468e-06,
      "loss": 0.0673,
      "step": 121040
    },
    {
      "epoch": 0.19811734516865995,
      "grad_norm": 1.8663147687911987,
      "learning_rate": 9.66704664509795e-06,
      "loss": 0.0614,
      "step": 121060
    },
    {
      "epoch": 0.1981500756073133,
      "grad_norm": 1.862341284751892,
      "learning_rate": 9.666980752884433e-06,
      "loss": 0.0552,
      "step": 121080
    },
    {
      "epoch": 0.19818280604596664,
      "grad_norm": 2.5480329990386963,
      "learning_rate": 9.666914860670915e-06,
      "loss": 0.0804,
      "step": 121100
    },
    {
      "epoch": 0.19821553648461995,
      "grad_norm": 0.5878214240074158,
      "learning_rate": 9.666848968457399e-06,
      "loss": 0.0611,
      "step": 121120
    },
    {
      "epoch": 0.1982482669232733,
      "grad_norm": 0.5268639326095581,
      "learning_rate": 9.66678307624388e-06,
      "loss": 0.0751,
      "step": 121140
    },
    {
      "epoch": 0.19828099736192664,
      "grad_norm": 2.5565288066864014,
      "learning_rate": 9.666717184030364e-06,
      "loss": 0.0632,
      "step": 121160
    },
    {
      "epoch": 0.19831372780058,
      "grad_norm": 2.596017360687256,
      "learning_rate": 9.666651291816846e-06,
      "loss": 0.076,
      "step": 121180
    },
    {
      "epoch": 0.19834645823923333,
      "grad_norm": 3.1215689182281494,
      "learning_rate": 9.66658539960333e-06,
      "loss": 0.0781,
      "step": 121200
    },
    {
      "epoch": 0.19837918867788665,
      "grad_norm": 4.017213344573975,
      "learning_rate": 9.666519507389812e-06,
      "loss": 0.0757,
      "step": 121220
    },
    {
      "epoch": 0.19841191911654,
      "grad_norm": 1.8779851198196411,
      "learning_rate": 9.666453615176295e-06,
      "loss": 0.0558,
      "step": 121240
    },
    {
      "epoch": 0.19844464955519334,
      "grad_norm": 4.414248943328857,
      "learning_rate": 9.666387722962777e-06,
      "loss": 0.0709,
      "step": 121260
    },
    {
      "epoch": 0.19847737999384668,
      "grad_norm": 7.312756061553955,
      "learning_rate": 9.66632183074926e-06,
      "loss": 0.0778,
      "step": 121280
    },
    {
      "epoch": 0.19851011043250003,
      "grad_norm": 2.3613479137420654,
      "learning_rate": 9.666255938535744e-06,
      "loss": 0.0659,
      "step": 121300
    },
    {
      "epoch": 0.19854284087115334,
      "grad_norm": 3.5024728775024414,
      "learning_rate": 9.666190046322226e-06,
      "loss": 0.0506,
      "step": 121320
    },
    {
      "epoch": 0.1985755713098067,
      "grad_norm": 4.503299236297607,
      "learning_rate": 9.66612415410871e-06,
      "loss": 0.0726,
      "step": 121340
    },
    {
      "epoch": 0.19860830174846003,
      "grad_norm": 15.73588752746582,
      "learning_rate": 9.666058261895193e-06,
      "loss": 0.0553,
      "step": 121360
    },
    {
      "epoch": 0.19864103218711338,
      "grad_norm": 0.46554601192474365,
      "learning_rate": 9.665992369681675e-06,
      "loss": 0.0592,
      "step": 121380
    },
    {
      "epoch": 0.19867376262576672,
      "grad_norm": 6.823296546936035,
      "learning_rate": 9.665926477468159e-06,
      "loss": 0.0766,
      "step": 121400
    },
    {
      "epoch": 0.19870649306442004,
      "grad_norm": 0.36384281516075134,
      "learning_rate": 9.665860585254642e-06,
      "loss": 0.053,
      "step": 121420
    },
    {
      "epoch": 0.19873922350307338,
      "grad_norm": 2.3335683345794678,
      "learning_rate": 9.665794693041124e-06,
      "loss": 0.0529,
      "step": 121440
    },
    {
      "epoch": 0.19877195394172673,
      "grad_norm": 3.795632839202881,
      "learning_rate": 9.665728800827608e-06,
      "loss": 0.0665,
      "step": 121460
    },
    {
      "epoch": 0.19880468438038007,
      "grad_norm": 3.3575642108917236,
      "learning_rate": 9.66566290861409e-06,
      "loss": 0.07,
      "step": 121480
    },
    {
      "epoch": 0.19883741481903341,
      "grad_norm": 1.4108564853668213,
      "learning_rate": 9.665597016400573e-06,
      "loss": 0.0531,
      "step": 121500
    },
    {
      "epoch": 0.19887014525768673,
      "grad_norm": 1.00454843044281,
      "learning_rate": 9.665531124187055e-06,
      "loss": 0.0574,
      "step": 121520
    },
    {
      "epoch": 0.19890287569634008,
      "grad_norm": 5.611053466796875,
      "learning_rate": 9.665465231973539e-06,
      "loss": 0.0627,
      "step": 121540
    },
    {
      "epoch": 0.19893560613499342,
      "grad_norm": 7.0262322425842285,
      "learning_rate": 9.66539933976002e-06,
      "loss": 0.0664,
      "step": 121560
    },
    {
      "epoch": 0.19896833657364676,
      "grad_norm": 9.6187744140625,
      "learning_rate": 9.665333447546504e-06,
      "loss": 0.0622,
      "step": 121580
    },
    {
      "epoch": 0.1990010670123001,
      "grad_norm": 1.3962832689285278,
      "learning_rate": 9.665267555332986e-06,
      "loss": 0.0586,
      "step": 121600
    },
    {
      "epoch": 0.19903379745095343,
      "grad_norm": 1.3019226789474487,
      "learning_rate": 9.66520166311947e-06,
      "loss": 0.0586,
      "step": 121620
    },
    {
      "epoch": 0.19906652788960677,
      "grad_norm": 1.7183622121810913,
      "learning_rate": 9.665135770905952e-06,
      "loss": 0.0666,
      "step": 121640
    },
    {
      "epoch": 0.19909925832826011,
      "grad_norm": 1.010117530822754,
      "learning_rate": 9.665069878692435e-06,
      "loss": 0.0719,
      "step": 121660
    },
    {
      "epoch": 0.19913198876691346,
      "grad_norm": 3.2024035453796387,
      "learning_rate": 9.665003986478919e-06,
      "loss": 0.0539,
      "step": 121680
    },
    {
      "epoch": 0.1991647192055668,
      "grad_norm": 1.7199466228485107,
      "learning_rate": 9.6649380942654e-06,
      "loss": 0.0545,
      "step": 121700
    },
    {
      "epoch": 0.19919744964422012,
      "grad_norm": 15.673958778381348,
      "learning_rate": 9.664872202051884e-06,
      "loss": 0.0511,
      "step": 121720
    },
    {
      "epoch": 0.19923018008287346,
      "grad_norm": 3.378530979156494,
      "learning_rate": 9.664806309838368e-06,
      "loss": 0.0626,
      "step": 121740
    },
    {
      "epoch": 0.1992629105215268,
      "grad_norm": 2.20989990234375,
      "learning_rate": 9.66474041762485e-06,
      "loss": 0.062,
      "step": 121760
    },
    {
      "epoch": 0.19929564096018015,
      "grad_norm": 2.3575425148010254,
      "learning_rate": 9.664674525411333e-06,
      "loss": 0.0654,
      "step": 121780
    },
    {
      "epoch": 0.1993283713988335,
      "grad_norm": 1.7056177854537964,
      "learning_rate": 9.664608633197817e-06,
      "loss": 0.0638,
      "step": 121800
    },
    {
      "epoch": 0.19936110183748681,
      "grad_norm": 3.057342767715454,
      "learning_rate": 9.664542740984299e-06,
      "loss": 0.0569,
      "step": 121820
    },
    {
      "epoch": 0.19939383227614016,
      "grad_norm": 1.8507195711135864,
      "learning_rate": 9.664476848770782e-06,
      "loss": 0.0542,
      "step": 121840
    },
    {
      "epoch": 0.1994265627147935,
      "grad_norm": 1.1342836618423462,
      "learning_rate": 9.664410956557264e-06,
      "loss": 0.0689,
      "step": 121860
    },
    {
      "epoch": 0.19945929315344685,
      "grad_norm": 2.3946585655212402,
      "learning_rate": 9.664345064343748e-06,
      "loss": 0.0611,
      "step": 121880
    },
    {
      "epoch": 0.1994920235921002,
      "grad_norm": 2.059685468673706,
      "learning_rate": 9.66427917213023e-06,
      "loss": 0.0717,
      "step": 121900
    },
    {
      "epoch": 0.1995247540307535,
      "grad_norm": 1.8103504180908203,
      "learning_rate": 9.664213279916713e-06,
      "loss": 0.0654,
      "step": 121920
    },
    {
      "epoch": 0.19955748446940685,
      "grad_norm": 4.371260643005371,
      "learning_rate": 9.664147387703195e-06,
      "loss": 0.0676,
      "step": 121940
    },
    {
      "epoch": 0.1995902149080602,
      "grad_norm": 1.1306438446044922,
      "learning_rate": 9.664081495489679e-06,
      "loss": 0.0632,
      "step": 121960
    },
    {
      "epoch": 0.19962294534671354,
      "grad_norm": 1.4984108209609985,
      "learning_rate": 9.66401560327616e-06,
      "loss": 0.0713,
      "step": 121980
    },
    {
      "epoch": 0.1996556757853669,
      "grad_norm": 2.1179261207580566,
      "learning_rate": 9.663949711062644e-06,
      "loss": 0.0699,
      "step": 122000
    },
    {
      "epoch": 0.1996884062240202,
      "grad_norm": 2.7325193881988525,
      "learning_rate": 9.663883818849126e-06,
      "loss": 0.0605,
      "step": 122020
    },
    {
      "epoch": 0.19972113666267355,
      "grad_norm": 16.48809051513672,
      "learning_rate": 9.66381792663561e-06,
      "loss": 0.0779,
      "step": 122040
    },
    {
      "epoch": 0.1997538671013269,
      "grad_norm": 2.1417784690856934,
      "learning_rate": 9.663752034422093e-06,
      "loss": 0.0611,
      "step": 122060
    },
    {
      "epoch": 0.19978659753998024,
      "grad_norm": 1.9625394344329834,
      "learning_rate": 9.663686142208575e-06,
      "loss": 0.0642,
      "step": 122080
    },
    {
      "epoch": 0.19981932797863358,
      "grad_norm": 2.9041430950164795,
      "learning_rate": 9.663620249995059e-06,
      "loss": 0.0617,
      "step": 122100
    },
    {
      "epoch": 0.1998520584172869,
      "grad_norm": 2.08117413520813,
      "learning_rate": 9.66355435778154e-06,
      "loss": 0.0629,
      "step": 122120
    },
    {
      "epoch": 0.19988478885594024,
      "grad_norm": 1.6988141536712646,
      "learning_rate": 9.663488465568024e-06,
      "loss": 0.0726,
      "step": 122140
    },
    {
      "epoch": 0.1999175192945936,
      "grad_norm": 0.816581666469574,
      "learning_rate": 9.663422573354508e-06,
      "loss": 0.064,
      "step": 122160
    },
    {
      "epoch": 0.19995024973324693,
      "grad_norm": 1.1443917751312256,
      "learning_rate": 9.66335668114099e-06,
      "loss": 0.0748,
      "step": 122180
    },
    {
      "epoch": 0.19998298017190028,
      "grad_norm": 5.441790580749512,
      "learning_rate": 9.663290788927473e-06,
      "loss": 0.0742,
      "step": 122200
    },
    {
      "epoch": 0.2000157106105536,
      "grad_norm": 4.617925643920898,
      "learning_rate": 9.663224896713957e-06,
      "loss": 0.0709,
      "step": 122220
    },
    {
      "epoch": 0.20004844104920694,
      "grad_norm": 4.989980697631836,
      "learning_rate": 9.663159004500439e-06,
      "loss": 0.0579,
      "step": 122240
    },
    {
      "epoch": 0.20008117148786028,
      "grad_norm": 3.3611974716186523,
      "learning_rate": 9.663093112286923e-06,
      "loss": 0.0722,
      "step": 122260
    },
    {
      "epoch": 0.20011390192651363,
      "grad_norm": 2.1803853511810303,
      "learning_rate": 9.663027220073404e-06,
      "loss": 0.0586,
      "step": 122280
    },
    {
      "epoch": 0.20014663236516697,
      "grad_norm": 3.4412906169891357,
      "learning_rate": 9.662961327859888e-06,
      "loss": 0.0518,
      "step": 122300
    },
    {
      "epoch": 0.2001793628038203,
      "grad_norm": 2.9285340309143066,
      "learning_rate": 9.66289543564637e-06,
      "loss": 0.0631,
      "step": 122320
    },
    {
      "epoch": 0.20021209324247363,
      "grad_norm": 3.184610605239868,
      "learning_rate": 9.662829543432853e-06,
      "loss": 0.0659,
      "step": 122340
    },
    {
      "epoch": 0.20024482368112698,
      "grad_norm": 1.6628631353378296,
      "learning_rate": 9.662763651219335e-06,
      "loss": 0.062,
      "step": 122360
    },
    {
      "epoch": 0.20027755411978032,
      "grad_norm": 5.418624401092529,
      "learning_rate": 9.662697759005819e-06,
      "loss": 0.0768,
      "step": 122380
    },
    {
      "epoch": 0.20031028455843367,
      "grad_norm": 2.1976070404052734,
      "learning_rate": 9.662631866792303e-06,
      "loss": 0.0593,
      "step": 122400
    },
    {
      "epoch": 0.20034301499708698,
      "grad_norm": 8.392376899719238,
      "learning_rate": 9.662565974578784e-06,
      "loss": 0.0645,
      "step": 122420
    },
    {
      "epoch": 0.20037574543574033,
      "grad_norm": 3.448918104171753,
      "learning_rate": 9.662500082365268e-06,
      "loss": 0.0703,
      "step": 122440
    },
    {
      "epoch": 0.20040847587439367,
      "grad_norm": 0.5829870104789734,
      "learning_rate": 9.66243419015175e-06,
      "loss": 0.0557,
      "step": 122460
    },
    {
      "epoch": 0.20044120631304702,
      "grad_norm": 2.171415090560913,
      "learning_rate": 9.662368297938234e-06,
      "loss": 0.0515,
      "step": 122480
    },
    {
      "epoch": 0.20047393675170036,
      "grad_norm": 2.2691211700439453,
      "learning_rate": 9.662302405724715e-06,
      "loss": 0.0601,
      "step": 122500
    },
    {
      "epoch": 0.20050666719035368,
      "grad_norm": 3.1174874305725098,
      "learning_rate": 9.662236513511199e-06,
      "loss": 0.055,
      "step": 122520
    },
    {
      "epoch": 0.20053939762900702,
      "grad_norm": 3.740879535675049,
      "learning_rate": 9.662170621297683e-06,
      "loss": 0.0767,
      "step": 122540
    },
    {
      "epoch": 0.20057212806766037,
      "grad_norm": 2.0626790523529053,
      "learning_rate": 9.662104729084164e-06,
      "loss": 0.0585,
      "step": 122560
    },
    {
      "epoch": 0.2006048585063137,
      "grad_norm": 1.4690741300582886,
      "learning_rate": 9.662038836870648e-06,
      "loss": 0.0612,
      "step": 122580
    },
    {
      "epoch": 0.20063758894496703,
      "grad_norm": 0.8923214673995972,
      "learning_rate": 9.661972944657132e-06,
      "loss": 0.0488,
      "step": 122600
    },
    {
      "epoch": 0.20067031938362037,
      "grad_norm": 1.7182974815368652,
      "learning_rate": 9.661907052443614e-06,
      "loss": 0.0697,
      "step": 122620
    },
    {
      "epoch": 0.20070304982227372,
      "grad_norm": 3.15054988861084,
      "learning_rate": 9.661841160230097e-06,
      "loss": 0.0575,
      "step": 122640
    },
    {
      "epoch": 0.20073578026092706,
      "grad_norm": 1.2925060987472534,
      "learning_rate": 9.661775268016579e-06,
      "loss": 0.0591,
      "step": 122660
    },
    {
      "epoch": 0.2007685106995804,
      "grad_norm": 2.348726749420166,
      "learning_rate": 9.661709375803063e-06,
      "loss": 0.0483,
      "step": 122680
    },
    {
      "epoch": 0.20080124113823372,
      "grad_norm": 1.8055633306503296,
      "learning_rate": 9.661643483589544e-06,
      "loss": 0.0651,
      "step": 122700
    },
    {
      "epoch": 0.20083397157688707,
      "grad_norm": 2.325059175491333,
      "learning_rate": 9.661577591376028e-06,
      "loss": 0.0689,
      "step": 122720
    },
    {
      "epoch": 0.2008667020155404,
      "grad_norm": 2.2719779014587402,
      "learning_rate": 9.661511699162512e-06,
      "loss": 0.0715,
      "step": 122740
    },
    {
      "epoch": 0.20089943245419375,
      "grad_norm": 1.756600022315979,
      "learning_rate": 9.661445806948994e-06,
      "loss": 0.0594,
      "step": 122760
    },
    {
      "epoch": 0.2009321628928471,
      "grad_norm": 4.216682434082031,
      "learning_rate": 9.661379914735477e-06,
      "loss": 0.0679,
      "step": 122780
    },
    {
      "epoch": 0.20096489333150042,
      "grad_norm": 2.490407943725586,
      "learning_rate": 9.661314022521959e-06,
      "loss": 0.0503,
      "step": 122800
    },
    {
      "epoch": 0.20099762377015376,
      "grad_norm": 2.6625120639801025,
      "learning_rate": 9.661248130308443e-06,
      "loss": 0.0551,
      "step": 122820
    },
    {
      "epoch": 0.2010303542088071,
      "grad_norm": 1.268918752670288,
      "learning_rate": 9.661182238094925e-06,
      "loss": 0.0524,
      "step": 122840
    },
    {
      "epoch": 0.20106308464746045,
      "grad_norm": 1.2128607034683228,
      "learning_rate": 9.661116345881408e-06,
      "loss": 0.0593,
      "step": 122860
    },
    {
      "epoch": 0.2010958150861138,
      "grad_norm": 1.3128330707550049,
      "learning_rate": 9.66105045366789e-06,
      "loss": 0.0661,
      "step": 122880
    },
    {
      "epoch": 0.2011285455247671,
      "grad_norm": 0.9929251074790955,
      "learning_rate": 9.660984561454374e-06,
      "loss": 0.0416,
      "step": 122900
    },
    {
      "epoch": 0.20116127596342046,
      "grad_norm": 0.8680424690246582,
      "learning_rate": 9.660918669240855e-06,
      "loss": 0.06,
      "step": 122920
    },
    {
      "epoch": 0.2011940064020738,
      "grad_norm": 2.6928062438964844,
      "learning_rate": 9.660852777027339e-06,
      "loss": 0.0754,
      "step": 122940
    },
    {
      "epoch": 0.20122673684072714,
      "grad_norm": 3.524306058883667,
      "learning_rate": 9.660786884813823e-06,
      "loss": 0.0605,
      "step": 122960
    },
    {
      "epoch": 0.2012594672793805,
      "grad_norm": 1.8941752910614014,
      "learning_rate": 9.660720992600305e-06,
      "loss": 0.0528,
      "step": 122980
    },
    {
      "epoch": 0.2012921977180338,
      "grad_norm": 1.6557741165161133,
      "learning_rate": 9.660655100386788e-06,
      "loss": 0.0628,
      "step": 123000
    },
    {
      "epoch": 0.20132492815668715,
      "grad_norm": 1.0971393585205078,
      "learning_rate": 9.660589208173272e-06,
      "loss": 0.0658,
      "step": 123020
    },
    {
      "epoch": 0.2013576585953405,
      "grad_norm": 3.4584460258483887,
      "learning_rate": 9.660523315959754e-06,
      "loss": 0.0723,
      "step": 123040
    },
    {
      "epoch": 0.20139038903399384,
      "grad_norm": 3.25311017036438,
      "learning_rate": 9.660457423746237e-06,
      "loss": 0.0646,
      "step": 123060
    },
    {
      "epoch": 0.20142311947264718,
      "grad_norm": 2.1286914348602295,
      "learning_rate": 9.660391531532719e-06,
      "loss": 0.0575,
      "step": 123080
    },
    {
      "epoch": 0.2014558499113005,
      "grad_norm": 7.126840591430664,
      "learning_rate": 9.660325639319203e-06,
      "loss": 0.0894,
      "step": 123100
    },
    {
      "epoch": 0.20148858034995384,
      "grad_norm": 1.7220005989074707,
      "learning_rate": 9.660259747105686e-06,
      "loss": 0.0728,
      "step": 123120
    },
    {
      "epoch": 0.2015213107886072,
      "grad_norm": 4.117365837097168,
      "learning_rate": 9.660193854892168e-06,
      "loss": 0.0596,
      "step": 123140
    },
    {
      "epoch": 0.20155404122726053,
      "grad_norm": 0.854361891746521,
      "learning_rate": 9.660127962678652e-06,
      "loss": 0.058,
      "step": 123160
    },
    {
      "epoch": 0.20158677166591388,
      "grad_norm": 1.6878411769866943,
      "learning_rate": 9.660062070465134e-06,
      "loss": 0.0527,
      "step": 123180
    },
    {
      "epoch": 0.2016195021045672,
      "grad_norm": 4.48769474029541,
      "learning_rate": 9.659996178251617e-06,
      "loss": 0.0623,
      "step": 123200
    },
    {
      "epoch": 0.20165223254322054,
      "grad_norm": 1.5495357513427734,
      "learning_rate": 9.659930286038099e-06,
      "loss": 0.0642,
      "step": 123220
    },
    {
      "epoch": 0.20168496298187388,
      "grad_norm": 1.3393996953964233,
      "learning_rate": 9.659864393824583e-06,
      "loss": 0.0533,
      "step": 123240
    },
    {
      "epoch": 0.20171769342052723,
      "grad_norm": 4.284030914306641,
      "learning_rate": 9.659798501611065e-06,
      "loss": 0.0598,
      "step": 123260
    },
    {
      "epoch": 0.20175042385918057,
      "grad_norm": 1.5973232984542847,
      "learning_rate": 9.659732609397548e-06,
      "loss": 0.0666,
      "step": 123280
    },
    {
      "epoch": 0.2017831542978339,
      "grad_norm": 2.653149366378784,
      "learning_rate": 9.65966671718403e-06,
      "loss": 0.0783,
      "step": 123300
    },
    {
      "epoch": 0.20181588473648723,
      "grad_norm": 12.722042083740234,
      "learning_rate": 9.659600824970514e-06,
      "loss": 0.0453,
      "step": 123320
    },
    {
      "epoch": 0.20184861517514058,
      "grad_norm": 2.4877257347106934,
      "learning_rate": 9.659534932756997e-06,
      "loss": 0.0813,
      "step": 123340
    },
    {
      "epoch": 0.20188134561379392,
      "grad_norm": 1.7917909622192383,
      "learning_rate": 9.659469040543479e-06,
      "loss": 0.0579,
      "step": 123360
    },
    {
      "epoch": 0.20191407605244727,
      "grad_norm": 3.5339114665985107,
      "learning_rate": 9.659403148329963e-06,
      "loss": 0.0651,
      "step": 123380
    },
    {
      "epoch": 0.20194680649110058,
      "grad_norm": 3.8894641399383545,
      "learning_rate": 9.659337256116446e-06,
      "loss": 0.0692,
      "step": 123400
    },
    {
      "epoch": 0.20197953692975393,
      "grad_norm": 2.5431275367736816,
      "learning_rate": 9.659271363902928e-06,
      "loss": 0.0642,
      "step": 123420
    },
    {
      "epoch": 0.20201226736840727,
      "grad_norm": 4.995075702667236,
      "learning_rate": 9.659205471689412e-06,
      "loss": 0.0668,
      "step": 123440
    },
    {
      "epoch": 0.20204499780706062,
      "grad_norm": 1.5474426746368408,
      "learning_rate": 9.659139579475895e-06,
      "loss": 0.0722,
      "step": 123460
    },
    {
      "epoch": 0.20207772824571396,
      "grad_norm": 0.8026766777038574,
      "learning_rate": 9.659073687262377e-06,
      "loss": 0.0735,
      "step": 123480
    },
    {
      "epoch": 0.20211045868436728,
      "grad_norm": 2.8153116703033447,
      "learning_rate": 9.659007795048861e-06,
      "loss": 0.0711,
      "step": 123500
    },
    {
      "epoch": 0.20214318912302062,
      "grad_norm": 4.108283042907715,
      "learning_rate": 9.658941902835343e-06,
      "loss": 0.0647,
      "step": 123520
    },
    {
      "epoch": 0.20217591956167397,
      "grad_norm": 3.582024574279785,
      "learning_rate": 9.658876010621826e-06,
      "loss": 0.0609,
      "step": 123540
    },
    {
      "epoch": 0.2022086500003273,
      "grad_norm": 7.071532249450684,
      "learning_rate": 9.658810118408308e-06,
      "loss": 0.071,
      "step": 123560
    },
    {
      "epoch": 0.20224138043898066,
      "grad_norm": 1.5629377365112305,
      "learning_rate": 9.658744226194792e-06,
      "loss": 0.0693,
      "step": 123580
    },
    {
      "epoch": 0.20227411087763397,
      "grad_norm": 2.1188340187072754,
      "learning_rate": 9.658678333981274e-06,
      "loss": 0.0655,
      "step": 123600
    },
    {
      "epoch": 0.20230684131628732,
      "grad_norm": 3.6971030235290527,
      "learning_rate": 9.658612441767757e-06,
      "loss": 0.0643,
      "step": 123620
    },
    {
      "epoch": 0.20233957175494066,
      "grad_norm": 2.2153916358947754,
      "learning_rate": 9.65854654955424e-06,
      "loss": 0.0596,
      "step": 123640
    },
    {
      "epoch": 0.202372302193594,
      "grad_norm": 0.993754506111145,
      "learning_rate": 9.658480657340723e-06,
      "loss": 0.0646,
      "step": 123660
    },
    {
      "epoch": 0.20240503263224735,
      "grad_norm": 2.3312768936157227,
      "learning_rate": 9.658414765127205e-06,
      "loss": 0.0685,
      "step": 123680
    },
    {
      "epoch": 0.20243776307090067,
      "grad_norm": 1.2714420557022095,
      "learning_rate": 9.658348872913688e-06,
      "loss": 0.0765,
      "step": 123700
    },
    {
      "epoch": 0.202470493509554,
      "grad_norm": 2.303408145904541,
      "learning_rate": 9.658282980700172e-06,
      "loss": 0.0566,
      "step": 123720
    },
    {
      "epoch": 0.20250322394820736,
      "grad_norm": 1.6785333156585693,
      "learning_rate": 9.658217088486654e-06,
      "loss": 0.0779,
      "step": 123740
    },
    {
      "epoch": 0.2025359543868607,
      "grad_norm": 2.4533803462982178,
      "learning_rate": 9.658151196273137e-06,
      "loss": 0.0683,
      "step": 123760
    },
    {
      "epoch": 0.20256868482551404,
      "grad_norm": 0.7819052338600159,
      "learning_rate": 9.658085304059621e-06,
      "loss": 0.0696,
      "step": 123780
    },
    {
      "epoch": 0.20260141526416736,
      "grad_norm": 3.4689762592315674,
      "learning_rate": 9.658019411846103e-06,
      "loss": 0.0637,
      "step": 123800
    },
    {
      "epoch": 0.2026341457028207,
      "grad_norm": 7.822580814361572,
      "learning_rate": 9.657953519632586e-06,
      "loss": 0.0621,
      "step": 123820
    },
    {
      "epoch": 0.20266687614147405,
      "grad_norm": 3.886016845703125,
      "learning_rate": 9.65788762741907e-06,
      "loss": 0.0606,
      "step": 123840
    },
    {
      "epoch": 0.2026996065801274,
      "grad_norm": 1.3759803771972656,
      "learning_rate": 9.657821735205552e-06,
      "loss": 0.0587,
      "step": 123860
    },
    {
      "epoch": 0.20273233701878074,
      "grad_norm": 2.487905979156494,
      "learning_rate": 9.657755842992035e-06,
      "loss": 0.0582,
      "step": 123880
    },
    {
      "epoch": 0.20276506745743406,
      "grad_norm": 23.20622444152832,
      "learning_rate": 9.657689950778517e-06,
      "loss": 0.067,
      "step": 123900
    },
    {
      "epoch": 0.2027977978960874,
      "grad_norm": 2.463041067123413,
      "learning_rate": 9.657624058565001e-06,
      "loss": 0.0698,
      "step": 123920
    },
    {
      "epoch": 0.20283052833474075,
      "grad_norm": 1.5759973526000977,
      "learning_rate": 9.657558166351483e-06,
      "loss": 0.0823,
      "step": 123940
    },
    {
      "epoch": 0.2028632587733941,
      "grad_norm": 0.7457911372184753,
      "learning_rate": 9.657492274137966e-06,
      "loss": 0.0695,
      "step": 123960
    },
    {
      "epoch": 0.2028959892120474,
      "grad_norm": 4.268850326538086,
      "learning_rate": 9.657426381924448e-06,
      "loss": 0.073,
      "step": 123980
    },
    {
      "epoch": 0.20292871965070075,
      "grad_norm": 1.3242930173873901,
      "learning_rate": 9.657360489710932e-06,
      "loss": 0.0693,
      "step": 124000
    },
    {
      "epoch": 0.2029614500893541,
      "grad_norm": 4.617033958435059,
      "learning_rate": 9.657294597497414e-06,
      "loss": 0.0763,
      "step": 124020
    },
    {
      "epoch": 0.20299418052800744,
      "grad_norm": 2.36281681060791,
      "learning_rate": 9.657228705283897e-06,
      "loss": 0.0646,
      "step": 124040
    },
    {
      "epoch": 0.20302691096666078,
      "grad_norm": 3.612722396850586,
      "learning_rate": 9.65716281307038e-06,
      "loss": 0.0434,
      "step": 124060
    },
    {
      "epoch": 0.2030596414053141,
      "grad_norm": 3.5868310928344727,
      "learning_rate": 9.657096920856863e-06,
      "loss": 0.0685,
      "step": 124080
    },
    {
      "epoch": 0.20309237184396745,
      "grad_norm": 9.193137168884277,
      "learning_rate": 9.657031028643345e-06,
      "loss": 0.0681,
      "step": 124100
    },
    {
      "epoch": 0.2031251022826208,
      "grad_norm": 1.4560493230819702,
      "learning_rate": 9.656965136429828e-06,
      "loss": 0.0678,
      "step": 124120
    },
    {
      "epoch": 0.20315783272127413,
      "grad_norm": 4.214592456817627,
      "learning_rate": 9.656899244216312e-06,
      "loss": 0.0729,
      "step": 124140
    },
    {
      "epoch": 0.20319056315992748,
      "grad_norm": 3.082500696182251,
      "learning_rate": 9.656833352002794e-06,
      "loss": 0.0498,
      "step": 124160
    },
    {
      "epoch": 0.2032232935985808,
      "grad_norm": 1.4901248216629028,
      "learning_rate": 9.656767459789277e-06,
      "loss": 0.0628,
      "step": 124180
    },
    {
      "epoch": 0.20325602403723414,
      "grad_norm": 29.181577682495117,
      "learning_rate": 9.656701567575761e-06,
      "loss": 0.0641,
      "step": 124200
    },
    {
      "epoch": 0.20328875447588748,
      "grad_norm": 1.1612745523452759,
      "learning_rate": 9.656635675362243e-06,
      "loss": 0.0642,
      "step": 124220
    },
    {
      "epoch": 0.20332148491454083,
      "grad_norm": 0.4324415326118469,
      "learning_rate": 9.656569783148726e-06,
      "loss": 0.0705,
      "step": 124240
    },
    {
      "epoch": 0.20335421535319417,
      "grad_norm": 1.5270695686340332,
      "learning_rate": 9.65650389093521e-06,
      "loss": 0.066,
      "step": 124260
    },
    {
      "epoch": 0.2033869457918475,
      "grad_norm": 0.8497119545936584,
      "learning_rate": 9.656437998721692e-06,
      "loss": 0.0712,
      "step": 124280
    },
    {
      "epoch": 0.20341967623050083,
      "grad_norm": 3.23827862739563,
      "learning_rate": 9.656372106508176e-06,
      "loss": 0.053,
      "step": 124300
    },
    {
      "epoch": 0.20345240666915418,
      "grad_norm": 4.161468982696533,
      "learning_rate": 9.656306214294657e-06,
      "loss": 0.0479,
      "step": 124320
    },
    {
      "epoch": 0.20348513710780752,
      "grad_norm": 1.5811333656311035,
      "learning_rate": 9.656240322081141e-06,
      "loss": 0.045,
      "step": 124340
    },
    {
      "epoch": 0.20351786754646087,
      "grad_norm": 2.376241445541382,
      "learning_rate": 9.656174429867623e-06,
      "loss": 0.0692,
      "step": 124360
    },
    {
      "epoch": 0.20355059798511418,
      "grad_norm": 1.6904792785644531,
      "learning_rate": 9.656108537654106e-06,
      "loss": 0.0444,
      "step": 124380
    },
    {
      "epoch": 0.20358332842376753,
      "grad_norm": 3.019930124282837,
      "learning_rate": 9.656042645440588e-06,
      "loss": 0.0591,
      "step": 124400
    },
    {
      "epoch": 0.20361605886242087,
      "grad_norm": 2.8644731044769287,
      "learning_rate": 9.655976753227072e-06,
      "loss": 0.0575,
      "step": 124420
    },
    {
      "epoch": 0.20364878930107422,
      "grad_norm": 0.9693022966384888,
      "learning_rate": 9.655910861013554e-06,
      "loss": 0.0809,
      "step": 124440
    },
    {
      "epoch": 0.20368151973972756,
      "grad_norm": 2.386904239654541,
      "learning_rate": 9.655844968800037e-06,
      "loss": 0.0607,
      "step": 124460
    },
    {
      "epoch": 0.20371425017838088,
      "grad_norm": 3.821716547012329,
      "learning_rate": 9.65577907658652e-06,
      "loss": 0.0584,
      "step": 124480
    },
    {
      "epoch": 0.20374698061703422,
      "grad_norm": 2.8203964233398438,
      "learning_rate": 9.655713184373003e-06,
      "loss": 0.0706,
      "step": 124500
    },
    {
      "epoch": 0.20377971105568757,
      "grad_norm": 2.6236627101898193,
      "learning_rate": 9.655647292159487e-06,
      "loss": 0.0706,
      "step": 124520
    },
    {
      "epoch": 0.2038124414943409,
      "grad_norm": 0.974700927734375,
      "learning_rate": 9.655581399945968e-06,
      "loss": 0.0449,
      "step": 124540
    },
    {
      "epoch": 0.20384517193299426,
      "grad_norm": 5.942873477935791,
      "learning_rate": 9.655515507732452e-06,
      "loss": 0.0842,
      "step": 124560
    },
    {
      "epoch": 0.20387790237164757,
      "grad_norm": 5.178988933563232,
      "learning_rate": 9.655449615518936e-06,
      "loss": 0.073,
      "step": 124580
    },
    {
      "epoch": 0.20391063281030092,
      "grad_norm": 2.6336252689361572,
      "learning_rate": 9.655383723305417e-06,
      "loss": 0.0643,
      "step": 124600
    },
    {
      "epoch": 0.20394336324895426,
      "grad_norm": 5.842587471008301,
      "learning_rate": 9.655317831091901e-06,
      "loss": 0.0758,
      "step": 124620
    },
    {
      "epoch": 0.2039760936876076,
      "grad_norm": 2.2325856685638428,
      "learning_rate": 9.655251938878385e-06,
      "loss": 0.0593,
      "step": 124640
    },
    {
      "epoch": 0.20400882412626095,
      "grad_norm": 3.3283159732818604,
      "learning_rate": 9.655186046664867e-06,
      "loss": 0.0674,
      "step": 124660
    },
    {
      "epoch": 0.20404155456491427,
      "grad_norm": 3.777378559112549,
      "learning_rate": 9.65512015445135e-06,
      "loss": 0.0633,
      "step": 124680
    },
    {
      "epoch": 0.2040742850035676,
      "grad_norm": 6.954375267028809,
      "learning_rate": 9.655054262237832e-06,
      "loss": 0.067,
      "step": 124700
    },
    {
      "epoch": 0.20410701544222096,
      "grad_norm": 3.1591522693634033,
      "learning_rate": 9.654988370024316e-06,
      "loss": 0.0749,
      "step": 124720
    },
    {
      "epoch": 0.2041397458808743,
      "grad_norm": 1.158884048461914,
      "learning_rate": 9.654922477810798e-06,
      "loss": 0.0633,
      "step": 124740
    },
    {
      "epoch": 0.20417247631952765,
      "grad_norm": 1.5922420024871826,
      "learning_rate": 9.654856585597281e-06,
      "loss": 0.0545,
      "step": 124760
    },
    {
      "epoch": 0.20420520675818096,
      "grad_norm": 5.144536972045898,
      "learning_rate": 9.654790693383763e-06,
      "loss": 0.0852,
      "step": 124780
    },
    {
      "epoch": 0.2042379371968343,
      "grad_norm": 2.761157989501953,
      "learning_rate": 9.654724801170247e-06,
      "loss": 0.0867,
      "step": 124800
    },
    {
      "epoch": 0.20427066763548765,
      "grad_norm": 1.7949522733688354,
      "learning_rate": 9.654658908956728e-06,
      "loss": 0.0697,
      "step": 124820
    },
    {
      "epoch": 0.204303398074141,
      "grad_norm": 0.9806770086288452,
      "learning_rate": 9.654593016743212e-06,
      "loss": 0.0707,
      "step": 124840
    },
    {
      "epoch": 0.20433612851279434,
      "grad_norm": 20.370649337768555,
      "learning_rate": 9.654527124529696e-06,
      "loss": 0.06,
      "step": 124860
    },
    {
      "epoch": 0.20436885895144766,
      "grad_norm": 2.4434921741485596,
      "learning_rate": 9.654461232316178e-06,
      "loss": 0.0732,
      "step": 124880
    },
    {
      "epoch": 0.204401589390101,
      "grad_norm": 1.2044999599456787,
      "learning_rate": 9.654395340102661e-06,
      "loss": 0.0557,
      "step": 124900
    },
    {
      "epoch": 0.20443431982875435,
      "grad_norm": 1.3593144416809082,
      "learning_rate": 9.654329447889143e-06,
      "loss": 0.0624,
      "step": 124920
    },
    {
      "epoch": 0.2044670502674077,
      "grad_norm": 1.4869468212127686,
      "learning_rate": 9.654263555675627e-06,
      "loss": 0.0837,
      "step": 124940
    },
    {
      "epoch": 0.20449978070606104,
      "grad_norm": 2.860980272293091,
      "learning_rate": 9.654197663462108e-06,
      "loss": 0.0326,
      "step": 124960
    },
    {
      "epoch": 0.20453251114471435,
      "grad_norm": 1.9259575605392456,
      "learning_rate": 9.654131771248592e-06,
      "loss": 0.0561,
      "step": 124980
    },
    {
      "epoch": 0.2045652415833677,
      "grad_norm": 2.01945161819458,
      "learning_rate": 9.654065879035076e-06,
      "loss": 0.0684,
      "step": 125000
    },
    {
      "epoch": 0.20459797202202104,
      "grad_norm": 3.130741834640503,
      "learning_rate": 9.653999986821558e-06,
      "loss": 0.075,
      "step": 125020
    },
    {
      "epoch": 0.20463070246067439,
      "grad_norm": 5.199065208435059,
      "learning_rate": 9.653934094608041e-06,
      "loss": 0.0885,
      "step": 125040
    },
    {
      "epoch": 0.20466343289932773,
      "grad_norm": 1.1241549253463745,
      "learning_rate": 9.653868202394525e-06,
      "loss": 0.0645,
      "step": 125060
    },
    {
      "epoch": 0.20469616333798105,
      "grad_norm": 6.607696533203125,
      "learning_rate": 9.653802310181007e-06,
      "loss": 0.0701,
      "step": 125080
    },
    {
      "epoch": 0.2047288937766344,
      "grad_norm": 2.0243704319000244,
      "learning_rate": 9.65373641796749e-06,
      "loss": 0.0767,
      "step": 125100
    },
    {
      "epoch": 0.20476162421528774,
      "grad_norm": 2.3805882930755615,
      "learning_rate": 9.653670525753972e-06,
      "loss": 0.0699,
      "step": 125120
    },
    {
      "epoch": 0.20479435465394108,
      "grad_norm": 3.648509979248047,
      "learning_rate": 9.653604633540456e-06,
      "loss": 0.0633,
      "step": 125140
    },
    {
      "epoch": 0.20482708509259442,
      "grad_norm": 4.433507442474365,
      "learning_rate": 9.653538741326938e-06,
      "loss": 0.0703,
      "step": 125160
    },
    {
      "epoch": 0.20485981553124774,
      "grad_norm": 3.3411171436309814,
      "learning_rate": 9.653472849113421e-06,
      "loss": 0.0632,
      "step": 125180
    },
    {
      "epoch": 0.20489254596990109,
      "grad_norm": 2.9365367889404297,
      "learning_rate": 9.653406956899905e-06,
      "loss": 0.059,
      "step": 125200
    },
    {
      "epoch": 0.20492527640855443,
      "grad_norm": 1.2836155891418457,
      "learning_rate": 9.653341064686387e-06,
      "loss": 0.0664,
      "step": 125220
    },
    {
      "epoch": 0.20495800684720777,
      "grad_norm": 0.6898733377456665,
      "learning_rate": 9.65327517247287e-06,
      "loss": 0.0695,
      "step": 125240
    },
    {
      "epoch": 0.20499073728586112,
      "grad_norm": 1.1328749656677246,
      "learning_rate": 9.653209280259352e-06,
      "loss": 0.0482,
      "step": 125260
    },
    {
      "epoch": 0.20502346772451444,
      "grad_norm": 2.7355499267578125,
      "learning_rate": 9.653143388045836e-06,
      "loss": 0.0631,
      "step": 125280
    },
    {
      "epoch": 0.20505619816316778,
      "grad_norm": 6.621389389038086,
      "learning_rate": 9.653077495832318e-06,
      "loss": 0.0641,
      "step": 125300
    },
    {
      "epoch": 0.20508892860182112,
      "grad_norm": 2.796236753463745,
      "learning_rate": 9.653011603618801e-06,
      "loss": 0.0418,
      "step": 125320
    },
    {
      "epoch": 0.20512165904047447,
      "grad_norm": 2.7333502769470215,
      "learning_rate": 9.652945711405283e-06,
      "loss": 0.0689,
      "step": 125340
    },
    {
      "epoch": 0.20515438947912779,
      "grad_norm": 1.163620114326477,
      "learning_rate": 9.652879819191767e-06,
      "loss": 0.0528,
      "step": 125360
    },
    {
      "epoch": 0.20518711991778113,
      "grad_norm": 2.1556897163391113,
      "learning_rate": 9.65281392697825e-06,
      "loss": 0.0637,
      "step": 125380
    },
    {
      "epoch": 0.20521985035643447,
      "grad_norm": 2.160560369491577,
      "learning_rate": 9.652748034764732e-06,
      "loss": 0.0655,
      "step": 125400
    },
    {
      "epoch": 0.20525258079508782,
      "grad_norm": 2.109848737716675,
      "learning_rate": 9.652682142551216e-06,
      "loss": 0.0549,
      "step": 125420
    },
    {
      "epoch": 0.20528531123374116,
      "grad_norm": 0.3516615629196167,
      "learning_rate": 9.6526162503377e-06,
      "loss": 0.0647,
      "step": 125440
    },
    {
      "epoch": 0.20531804167239448,
      "grad_norm": 6.602402687072754,
      "learning_rate": 9.652550358124181e-06,
      "loss": 0.0722,
      "step": 125460
    },
    {
      "epoch": 0.20535077211104782,
      "grad_norm": 1.8644099235534668,
      "learning_rate": 9.652484465910665e-06,
      "loss": 0.0556,
      "step": 125480
    },
    {
      "epoch": 0.20538350254970117,
      "grad_norm": 4.727684020996094,
      "learning_rate": 9.652418573697147e-06,
      "loss": 0.0651,
      "step": 125500
    },
    {
      "epoch": 0.2054162329883545,
      "grad_norm": 3.029688835144043,
      "learning_rate": 9.65235268148363e-06,
      "loss": 0.0594,
      "step": 125520
    },
    {
      "epoch": 0.20544896342700786,
      "grad_norm": 2.555441379547119,
      "learning_rate": 9.652286789270112e-06,
      "loss": 0.0636,
      "step": 125540
    },
    {
      "epoch": 0.20548169386566117,
      "grad_norm": 1.050502896308899,
      "learning_rate": 9.652220897056596e-06,
      "loss": 0.0522,
      "step": 125560
    },
    {
      "epoch": 0.20551442430431452,
      "grad_norm": 1.9603638648986816,
      "learning_rate": 9.65215500484308e-06,
      "loss": 0.0627,
      "step": 125580
    },
    {
      "epoch": 0.20554715474296786,
      "grad_norm": 1.0816948413848877,
      "learning_rate": 9.652089112629561e-06,
      "loss": 0.0615,
      "step": 125600
    },
    {
      "epoch": 0.2055798851816212,
      "grad_norm": 2.809504508972168,
      "learning_rate": 9.652023220416045e-06,
      "loss": 0.046,
      "step": 125620
    },
    {
      "epoch": 0.20561261562027455,
      "grad_norm": 2.625147581100464,
      "learning_rate": 9.651957328202527e-06,
      "loss": 0.0557,
      "step": 125640
    },
    {
      "epoch": 0.20564534605892787,
      "grad_norm": 0.5940306186676025,
      "learning_rate": 9.65189143598901e-06,
      "loss": 0.0564,
      "step": 125660
    },
    {
      "epoch": 0.2056780764975812,
      "grad_norm": 2.187978506088257,
      "learning_rate": 9.651825543775492e-06,
      "loss": 0.0632,
      "step": 125680
    },
    {
      "epoch": 0.20571080693623456,
      "grad_norm": 3.3620758056640625,
      "learning_rate": 9.651759651561976e-06,
      "loss": 0.0676,
      "step": 125700
    },
    {
      "epoch": 0.2057435373748879,
      "grad_norm": 3.0554540157318115,
      "learning_rate": 9.651693759348458e-06,
      "loss": 0.0732,
      "step": 125720
    },
    {
      "epoch": 0.20577626781354125,
      "grad_norm": 1.776777744293213,
      "learning_rate": 9.651627867134941e-06,
      "loss": 0.0716,
      "step": 125740
    },
    {
      "epoch": 0.20580899825219456,
      "grad_norm": 1.8254059553146362,
      "learning_rate": 9.651561974921423e-06,
      "loss": 0.0679,
      "step": 125760
    },
    {
      "epoch": 0.2058417286908479,
      "grad_norm": 1.8907899856567383,
      "learning_rate": 9.651496082707907e-06,
      "loss": 0.0548,
      "step": 125780
    },
    {
      "epoch": 0.20587445912950125,
      "grad_norm": 2.9520621299743652,
      "learning_rate": 9.65143019049439e-06,
      "loss": 0.0649,
      "step": 125800
    },
    {
      "epoch": 0.2059071895681546,
      "grad_norm": 0.7565929889678955,
      "learning_rate": 9.651364298280872e-06,
      "loss": 0.0525,
      "step": 125820
    },
    {
      "epoch": 0.20593992000680794,
      "grad_norm": 1.1878217458724976,
      "learning_rate": 9.651298406067356e-06,
      "loss": 0.0659,
      "step": 125840
    },
    {
      "epoch": 0.20597265044546126,
      "grad_norm": 0.47393903136253357,
      "learning_rate": 9.65123251385384e-06,
      "loss": 0.0542,
      "step": 125860
    },
    {
      "epoch": 0.2060053808841146,
      "grad_norm": 2.566063642501831,
      "learning_rate": 9.651166621640321e-06,
      "loss": 0.059,
      "step": 125880
    },
    {
      "epoch": 0.20603811132276795,
      "grad_norm": 1.3693006038665771,
      "learning_rate": 9.651100729426805e-06,
      "loss": 0.0482,
      "step": 125900
    },
    {
      "epoch": 0.2060708417614213,
      "grad_norm": 2.35489559173584,
      "learning_rate": 9.651034837213288e-06,
      "loss": 0.0602,
      "step": 125920
    },
    {
      "epoch": 0.20610357220007464,
      "grad_norm": 2.8426930904388428,
      "learning_rate": 9.65096894499977e-06,
      "loss": 0.0745,
      "step": 125940
    },
    {
      "epoch": 0.20613630263872795,
      "grad_norm": 2.708338975906372,
      "learning_rate": 9.650903052786254e-06,
      "loss": 0.074,
      "step": 125960
    },
    {
      "epoch": 0.2061690330773813,
      "grad_norm": 3.0088672637939453,
      "learning_rate": 9.650837160572736e-06,
      "loss": 0.0584,
      "step": 125980
    },
    {
      "epoch": 0.20620176351603464,
      "grad_norm": 4.845804214477539,
      "learning_rate": 9.65077126835922e-06,
      "loss": 0.0771,
      "step": 126000
    },
    {
      "epoch": 0.206234493954688,
      "grad_norm": 1.1608277559280396,
      "learning_rate": 9.650705376145701e-06,
      "loss": 0.0492,
      "step": 126020
    },
    {
      "epoch": 0.20626722439334133,
      "grad_norm": 4.996984481811523,
      "learning_rate": 9.650639483932185e-06,
      "loss": 0.0686,
      "step": 126040
    },
    {
      "epoch": 0.20629995483199465,
      "grad_norm": 5.673241138458252,
      "learning_rate": 9.650573591718667e-06,
      "loss": 0.0615,
      "step": 126060
    },
    {
      "epoch": 0.206332685270648,
      "grad_norm": 2.9631078243255615,
      "learning_rate": 9.65050769950515e-06,
      "loss": 0.0563,
      "step": 126080
    },
    {
      "epoch": 0.20636541570930134,
      "grad_norm": 2.0079870223999023,
      "learning_rate": 9.650441807291632e-06,
      "loss": 0.0589,
      "step": 126100
    },
    {
      "epoch": 0.20639814614795468,
      "grad_norm": 2.244413137435913,
      "learning_rate": 9.650375915078116e-06,
      "loss": 0.0584,
      "step": 126120
    },
    {
      "epoch": 0.20643087658660803,
      "grad_norm": 2.946190357208252,
      "learning_rate": 9.650310022864598e-06,
      "loss": 0.0553,
      "step": 126140
    },
    {
      "epoch": 0.20646360702526134,
      "grad_norm": 2.619034767150879,
      "learning_rate": 9.650244130651081e-06,
      "loss": 0.0718,
      "step": 126160
    },
    {
      "epoch": 0.2064963374639147,
      "grad_norm": 2.2563912868499756,
      "learning_rate": 9.650178238437565e-06,
      "loss": 0.056,
      "step": 126180
    },
    {
      "epoch": 0.20652906790256803,
      "grad_norm": 1.1843369007110596,
      "learning_rate": 9.650112346224047e-06,
      "loss": 0.0659,
      "step": 126200
    },
    {
      "epoch": 0.20656179834122138,
      "grad_norm": 4.432088375091553,
      "learning_rate": 9.65004645401053e-06,
      "loss": 0.062,
      "step": 126220
    },
    {
      "epoch": 0.20659452877987472,
      "grad_norm": 3.5883824825286865,
      "learning_rate": 9.649980561797014e-06,
      "loss": 0.0653,
      "step": 126240
    },
    {
      "epoch": 0.20662725921852804,
      "grad_norm": 3.340359926223755,
      "learning_rate": 9.649914669583496e-06,
      "loss": 0.0551,
      "step": 126260
    },
    {
      "epoch": 0.20665998965718138,
      "grad_norm": 4.084446430206299,
      "learning_rate": 9.64984877736998e-06,
      "loss": 0.0653,
      "step": 126280
    },
    {
      "epoch": 0.20669272009583473,
      "grad_norm": 2.033377170562744,
      "learning_rate": 9.649782885156463e-06,
      "loss": 0.0669,
      "step": 126300
    },
    {
      "epoch": 0.20672545053448807,
      "grad_norm": 2.76328706741333,
      "learning_rate": 9.649716992942945e-06,
      "loss": 0.0526,
      "step": 126320
    },
    {
      "epoch": 0.20675818097314141,
      "grad_norm": 3.686567783355713,
      "learning_rate": 9.649651100729429e-06,
      "loss": 0.0619,
      "step": 126340
    },
    {
      "epoch": 0.20679091141179473,
      "grad_norm": 1.882871150970459,
      "learning_rate": 9.64958520851591e-06,
      "loss": 0.0698,
      "step": 126360
    },
    {
      "epoch": 0.20682364185044808,
      "grad_norm": 3.7368690967559814,
      "learning_rate": 9.649519316302394e-06,
      "loss": 0.0764,
      "step": 126380
    },
    {
      "epoch": 0.20685637228910142,
      "grad_norm": 2.6471312046051025,
      "learning_rate": 9.649453424088876e-06,
      "loss": 0.0647,
      "step": 126400
    },
    {
      "epoch": 0.20688910272775476,
      "grad_norm": 5.040985584259033,
      "learning_rate": 9.64938753187536e-06,
      "loss": 0.0607,
      "step": 126420
    },
    {
      "epoch": 0.2069218331664081,
      "grad_norm": 2.3193702697753906,
      "learning_rate": 9.649321639661841e-06,
      "loss": 0.0527,
      "step": 126440
    },
    {
      "epoch": 0.20695456360506143,
      "grad_norm": 4.009838581085205,
      "learning_rate": 9.649255747448325e-06,
      "loss": 0.07,
      "step": 126460
    },
    {
      "epoch": 0.20698729404371477,
      "grad_norm": 2.6737494468688965,
      "learning_rate": 9.649189855234807e-06,
      "loss": 0.0633,
      "step": 126480
    },
    {
      "epoch": 0.20702002448236811,
      "grad_norm": 2.765522003173828,
      "learning_rate": 9.64912396302129e-06,
      "loss": 0.0581,
      "step": 126500
    },
    {
      "epoch": 0.20705275492102146,
      "grad_norm": 1.2138980627059937,
      "learning_rate": 9.649058070807772e-06,
      "loss": 0.0856,
      "step": 126520
    },
    {
      "epoch": 0.2070854853596748,
      "grad_norm": 2.503892421722412,
      "learning_rate": 9.648992178594256e-06,
      "loss": 0.072,
      "step": 126540
    },
    {
      "epoch": 0.20711821579832812,
      "grad_norm": 3.2356793880462646,
      "learning_rate": 9.64892628638074e-06,
      "loss": 0.0609,
      "step": 126560
    },
    {
      "epoch": 0.20715094623698146,
      "grad_norm": 1.4630552530288696,
      "learning_rate": 9.648860394167221e-06,
      "loss": 0.0675,
      "step": 126580
    },
    {
      "epoch": 0.2071836766756348,
      "grad_norm": 5.722335338592529,
      "learning_rate": 9.648794501953705e-06,
      "loss": 0.0758,
      "step": 126600
    },
    {
      "epoch": 0.20721640711428815,
      "grad_norm": 4.896772861480713,
      "learning_rate": 9.648728609740189e-06,
      "loss": 0.0879,
      "step": 126620
    },
    {
      "epoch": 0.2072491375529415,
      "grad_norm": 2.824777841567993,
      "learning_rate": 9.64866271752667e-06,
      "loss": 0.0467,
      "step": 126640
    },
    {
      "epoch": 0.20728186799159481,
      "grad_norm": 1.6164336204528809,
      "learning_rate": 9.648596825313154e-06,
      "loss": 0.0516,
      "step": 126660
    },
    {
      "epoch": 0.20731459843024816,
      "grad_norm": 1.1745392084121704,
      "learning_rate": 9.648530933099638e-06,
      "loss": 0.0694,
      "step": 126680
    },
    {
      "epoch": 0.2073473288689015,
      "grad_norm": 2.701134443283081,
      "learning_rate": 9.64846504088612e-06,
      "loss": 0.0636,
      "step": 126700
    },
    {
      "epoch": 0.20738005930755485,
      "grad_norm": 2.982572317123413,
      "learning_rate": 9.648399148672603e-06,
      "loss": 0.0768,
      "step": 126720
    },
    {
      "epoch": 0.20741278974620816,
      "grad_norm": 6.084234714508057,
      "learning_rate": 9.648333256459085e-06,
      "loss": 0.0566,
      "step": 126740
    },
    {
      "epoch": 0.2074455201848615,
      "grad_norm": 1.7611613273620605,
      "learning_rate": 9.648267364245569e-06,
      "loss": 0.0724,
      "step": 126760
    },
    {
      "epoch": 0.20747825062351485,
      "grad_norm": 2.7876510620117188,
      "learning_rate": 9.64820147203205e-06,
      "loss": 0.0624,
      "step": 126780
    },
    {
      "epoch": 0.2075109810621682,
      "grad_norm": 1.3831353187561035,
      "learning_rate": 9.648135579818534e-06,
      "loss": 0.0562,
      "step": 126800
    },
    {
      "epoch": 0.20754371150082154,
      "grad_norm": 3.823408365249634,
      "learning_rate": 9.648069687605016e-06,
      "loss": 0.0552,
      "step": 126820
    },
    {
      "epoch": 0.20757644193947486,
      "grad_norm": 1.7633918523788452,
      "learning_rate": 9.6480037953915e-06,
      "loss": 0.0595,
      "step": 126840
    },
    {
      "epoch": 0.2076091723781282,
      "grad_norm": 1.585544228553772,
      "learning_rate": 9.647937903177981e-06,
      "loss": 0.0626,
      "step": 126860
    },
    {
      "epoch": 0.20764190281678155,
      "grad_norm": 4.055099964141846,
      "learning_rate": 9.647872010964465e-06,
      "loss": 0.0575,
      "step": 126880
    },
    {
      "epoch": 0.2076746332554349,
      "grad_norm": 4.040394306182861,
      "learning_rate": 9.647806118750947e-06,
      "loss": 0.0601,
      "step": 126900
    },
    {
      "epoch": 0.20770736369408824,
      "grad_norm": 1.8642966747283936,
      "learning_rate": 9.64774022653743e-06,
      "loss": 0.0511,
      "step": 126920
    },
    {
      "epoch": 0.20774009413274155,
      "grad_norm": 1.3750123977661133,
      "learning_rate": 9.647674334323912e-06,
      "loss": 0.0693,
      "step": 126940
    },
    {
      "epoch": 0.2077728245713949,
      "grad_norm": 1.5280033349990845,
      "learning_rate": 9.647608442110396e-06,
      "loss": 0.051,
      "step": 126960
    },
    {
      "epoch": 0.20780555501004824,
      "grad_norm": 0.5120836496353149,
      "learning_rate": 9.64754254989688e-06,
      "loss": 0.0751,
      "step": 126980
    },
    {
      "epoch": 0.2078382854487016,
      "grad_norm": 2.846299648284912,
      "learning_rate": 9.647476657683361e-06,
      "loss": 0.0483,
      "step": 127000
    },
    {
      "epoch": 0.20787101588735493,
      "grad_norm": 3.259936571121216,
      "learning_rate": 9.647410765469845e-06,
      "loss": 0.0619,
      "step": 127020
    },
    {
      "epoch": 0.20790374632600825,
      "grad_norm": 1.2961922883987427,
      "learning_rate": 9.647344873256329e-06,
      "loss": 0.0655,
      "step": 127040
    },
    {
      "epoch": 0.2079364767646616,
      "grad_norm": 4.477651119232178,
      "learning_rate": 9.64727898104281e-06,
      "loss": 0.0654,
      "step": 127060
    },
    {
      "epoch": 0.20796920720331494,
      "grad_norm": 4.387113094329834,
      "learning_rate": 9.647213088829294e-06,
      "loss": 0.053,
      "step": 127080
    },
    {
      "epoch": 0.20800193764196828,
      "grad_norm": 3.4316511154174805,
      "learning_rate": 9.647147196615778e-06,
      "loss": 0.0627,
      "step": 127100
    },
    {
      "epoch": 0.20803466808062163,
      "grad_norm": 1.9260121583938599,
      "learning_rate": 9.64708130440226e-06,
      "loss": 0.048,
      "step": 127120
    },
    {
      "epoch": 0.20806739851927494,
      "grad_norm": 0.7341282367706299,
      "learning_rate": 9.647015412188743e-06,
      "loss": 0.0648,
      "step": 127140
    },
    {
      "epoch": 0.2081001289579283,
      "grad_norm": 3.8445968627929688,
      "learning_rate": 9.646949519975225e-06,
      "loss": 0.0819,
      "step": 127160
    },
    {
      "epoch": 0.20813285939658163,
      "grad_norm": 1.6898958683013916,
      "learning_rate": 9.646883627761709e-06,
      "loss": 0.0657,
      "step": 127180
    },
    {
      "epoch": 0.20816558983523498,
      "grad_norm": 3.516249656677246,
      "learning_rate": 9.64681773554819e-06,
      "loss": 0.057,
      "step": 127200
    },
    {
      "epoch": 0.20819832027388832,
      "grad_norm": 1.7864067554473877,
      "learning_rate": 9.646751843334674e-06,
      "loss": 0.0743,
      "step": 127220
    },
    {
      "epoch": 0.20823105071254164,
      "grad_norm": 1.1161996126174927,
      "learning_rate": 9.646685951121156e-06,
      "loss": 0.0603,
      "step": 127240
    },
    {
      "epoch": 0.20826378115119498,
      "grad_norm": 1.1214615106582642,
      "learning_rate": 9.64662005890764e-06,
      "loss": 0.0388,
      "step": 127260
    },
    {
      "epoch": 0.20829651158984833,
      "grad_norm": 2.763733386993408,
      "learning_rate": 9.646554166694122e-06,
      "loss": 0.0576,
      "step": 127280
    },
    {
      "epoch": 0.20832924202850167,
      "grad_norm": 3.8372437953948975,
      "learning_rate": 9.646488274480605e-06,
      "loss": 0.0558,
      "step": 127300
    },
    {
      "epoch": 0.20836197246715502,
      "grad_norm": 2.8663582801818848,
      "learning_rate": 9.646422382267089e-06,
      "loss": 0.0642,
      "step": 127320
    },
    {
      "epoch": 0.20839470290580833,
      "grad_norm": 1.4436476230621338,
      "learning_rate": 9.64635649005357e-06,
      "loss": 0.0566,
      "step": 127340
    },
    {
      "epoch": 0.20842743334446168,
      "grad_norm": 1.366278052330017,
      "learning_rate": 9.646290597840054e-06,
      "loss": 0.0714,
      "step": 127360
    },
    {
      "epoch": 0.20846016378311502,
      "grad_norm": 1.4839352369308472,
      "learning_rate": 9.646224705626536e-06,
      "loss": 0.0911,
      "step": 127380
    },
    {
      "epoch": 0.20849289422176837,
      "grad_norm": 3.2573277950286865,
      "learning_rate": 9.64615881341302e-06,
      "loss": 0.0661,
      "step": 127400
    },
    {
      "epoch": 0.2085256246604217,
      "grad_norm": 1.927013874053955,
      "learning_rate": 9.646092921199503e-06,
      "loss": 0.0757,
      "step": 127420
    },
    {
      "epoch": 0.20855835509907503,
      "grad_norm": 0.23227828741073608,
      "learning_rate": 9.646027028985985e-06,
      "loss": 0.0621,
      "step": 127440
    },
    {
      "epoch": 0.20859108553772837,
      "grad_norm": 1.2728272676467896,
      "learning_rate": 9.645961136772469e-06,
      "loss": 0.061,
      "step": 127460
    },
    {
      "epoch": 0.20862381597638172,
      "grad_norm": 7.160358905792236,
      "learning_rate": 9.645895244558952e-06,
      "loss": 0.0552,
      "step": 127480
    },
    {
      "epoch": 0.20865654641503506,
      "grad_norm": 1.7885063886642456,
      "learning_rate": 9.645829352345434e-06,
      "loss": 0.0588,
      "step": 127500
    },
    {
      "epoch": 0.2086892768536884,
      "grad_norm": 2.752781867980957,
      "learning_rate": 9.645763460131918e-06,
      "loss": 0.0653,
      "step": 127520
    },
    {
      "epoch": 0.20872200729234172,
      "grad_norm": 5.070200443267822,
      "learning_rate": 9.6456975679184e-06,
      "loss": 0.0763,
      "step": 127540
    },
    {
      "epoch": 0.20875473773099507,
      "grad_norm": 3.5599122047424316,
      "learning_rate": 9.645631675704883e-06,
      "loss": 0.0669,
      "step": 127560
    },
    {
      "epoch": 0.2087874681696484,
      "grad_norm": 1.1050705909729004,
      "learning_rate": 9.645565783491365e-06,
      "loss": 0.0618,
      "step": 127580
    },
    {
      "epoch": 0.20882019860830175,
      "grad_norm": 3.072648286819458,
      "learning_rate": 9.645499891277849e-06,
      "loss": 0.0767,
      "step": 127600
    },
    {
      "epoch": 0.2088529290469551,
      "grad_norm": 2.757190465927124,
      "learning_rate": 9.64543399906433e-06,
      "loss": 0.0593,
      "step": 127620
    },
    {
      "epoch": 0.20888565948560842,
      "grad_norm": 2.336644411087036,
      "learning_rate": 9.645368106850814e-06,
      "loss": 0.063,
      "step": 127640
    },
    {
      "epoch": 0.20891838992426176,
      "grad_norm": 1.8677958250045776,
      "learning_rate": 9.645302214637296e-06,
      "loss": 0.0685,
      "step": 127660
    },
    {
      "epoch": 0.2089511203629151,
      "grad_norm": 1.4993534088134766,
      "learning_rate": 9.64523632242378e-06,
      "loss": 0.0574,
      "step": 127680
    },
    {
      "epoch": 0.20898385080156845,
      "grad_norm": 2.6932129859924316,
      "learning_rate": 9.645170430210263e-06,
      "loss": 0.0618,
      "step": 127700
    },
    {
      "epoch": 0.2090165812402218,
      "grad_norm": 1.7259831428527832,
      "learning_rate": 9.645104537996745e-06,
      "loss": 0.0678,
      "step": 127720
    },
    {
      "epoch": 0.2090493116788751,
      "grad_norm": 3.5393753051757812,
      "learning_rate": 9.645038645783229e-06,
      "loss": 0.0636,
      "step": 127740
    },
    {
      "epoch": 0.20908204211752845,
      "grad_norm": 1.1577296257019043,
      "learning_rate": 9.64497275356971e-06,
      "loss": 0.0597,
      "step": 127760
    },
    {
      "epoch": 0.2091147725561818,
      "grad_norm": 0.8782822489738464,
      "learning_rate": 9.644906861356194e-06,
      "loss": 0.0583,
      "step": 127780
    },
    {
      "epoch": 0.20914750299483514,
      "grad_norm": 3.206798791885376,
      "learning_rate": 9.644840969142676e-06,
      "loss": 0.0575,
      "step": 127800
    },
    {
      "epoch": 0.2091802334334885,
      "grad_norm": 2.5547146797180176,
      "learning_rate": 9.64477507692916e-06,
      "loss": 0.0644,
      "step": 127820
    },
    {
      "epoch": 0.2092129638721418,
      "grad_norm": 3.337214946746826,
      "learning_rate": 9.644709184715643e-06,
      "loss": 0.0786,
      "step": 127840
    },
    {
      "epoch": 0.20924569431079515,
      "grad_norm": 0.7578197717666626,
      "learning_rate": 9.644643292502125e-06,
      "loss": 0.0615,
      "step": 127860
    },
    {
      "epoch": 0.2092784247494485,
      "grad_norm": 8.831106185913086,
      "learning_rate": 9.644577400288609e-06,
      "loss": 0.0655,
      "step": 127880
    },
    {
      "epoch": 0.20931115518810184,
      "grad_norm": 5.33793306350708,
      "learning_rate": 9.644511508075092e-06,
      "loss": 0.064,
      "step": 127900
    },
    {
      "epoch": 0.20934388562675518,
      "grad_norm": 4.514750957489014,
      "learning_rate": 9.644445615861574e-06,
      "loss": 0.0587,
      "step": 127920
    },
    {
      "epoch": 0.2093766160654085,
      "grad_norm": 2.952903985977173,
      "learning_rate": 9.644379723648058e-06,
      "loss": 0.0773,
      "step": 127940
    },
    {
      "epoch": 0.20940934650406184,
      "grad_norm": 1.5129165649414062,
      "learning_rate": 9.64431383143454e-06,
      "loss": 0.0535,
      "step": 127960
    },
    {
      "epoch": 0.2094420769427152,
      "grad_norm": 2.008481502532959,
      "learning_rate": 9.644247939221023e-06,
      "loss": 0.0505,
      "step": 127980
    },
    {
      "epoch": 0.20947480738136853,
      "grad_norm": 1.8665434122085571,
      "learning_rate": 9.644182047007505e-06,
      "loss": 0.059,
      "step": 128000
    },
    {
      "epoch": 0.20950753782002188,
      "grad_norm": 1.1851630210876465,
      "learning_rate": 9.644116154793989e-06,
      "loss": 0.0598,
      "step": 128020
    },
    {
      "epoch": 0.2095402682586752,
      "grad_norm": 1.3130309581756592,
      "learning_rate": 9.644050262580472e-06,
      "loss": 0.0632,
      "step": 128040
    },
    {
      "epoch": 0.20957299869732854,
      "grad_norm": 1.290475845336914,
      "learning_rate": 9.643984370366954e-06,
      "loss": 0.0512,
      "step": 128060
    },
    {
      "epoch": 0.20960572913598188,
      "grad_norm": 2.2472994327545166,
      "learning_rate": 9.643918478153438e-06,
      "loss": 0.0674,
      "step": 128080
    },
    {
      "epoch": 0.20963845957463523,
      "grad_norm": 1.3781700134277344,
      "learning_rate": 9.64385258593992e-06,
      "loss": 0.055,
      "step": 128100
    },
    {
      "epoch": 0.20967119001328854,
      "grad_norm": 2.327634334564209,
      "learning_rate": 9.643786693726403e-06,
      "loss": 0.0813,
      "step": 128120
    },
    {
      "epoch": 0.2097039204519419,
      "grad_norm": 2.8751556873321533,
      "learning_rate": 9.643720801512885e-06,
      "loss": 0.0729,
      "step": 128140
    },
    {
      "epoch": 0.20973665089059523,
      "grad_norm": 1.4975477457046509,
      "learning_rate": 9.643654909299369e-06,
      "loss": 0.0637,
      "step": 128160
    },
    {
      "epoch": 0.20976938132924858,
      "grad_norm": 1.988478183746338,
      "learning_rate": 9.64358901708585e-06,
      "loss": 0.0545,
      "step": 128180
    },
    {
      "epoch": 0.20980211176790192,
      "grad_norm": 1.8617932796478271,
      "learning_rate": 9.643523124872334e-06,
      "loss": 0.0592,
      "step": 128200
    },
    {
      "epoch": 0.20983484220655524,
      "grad_norm": 3.1320228576660156,
      "learning_rate": 9.643457232658818e-06,
      "loss": 0.0738,
      "step": 128220
    },
    {
      "epoch": 0.20986757264520858,
      "grad_norm": 5.330245494842529,
      "learning_rate": 9.6433913404453e-06,
      "loss": 0.0536,
      "step": 128240
    },
    {
      "epoch": 0.20990030308386193,
      "grad_norm": 2.086247444152832,
      "learning_rate": 9.643325448231783e-06,
      "loss": 0.063,
      "step": 128260
    },
    {
      "epoch": 0.20993303352251527,
      "grad_norm": 1.567814826965332,
      "learning_rate": 9.643259556018267e-06,
      "loss": 0.0784,
      "step": 128280
    },
    {
      "epoch": 0.20996576396116862,
      "grad_norm": 4.26121187210083,
      "learning_rate": 9.643193663804749e-06,
      "loss": 0.0749,
      "step": 128300
    },
    {
      "epoch": 0.20999849439982193,
      "grad_norm": 1.95146644115448,
      "learning_rate": 9.643127771591232e-06,
      "loss": 0.0786,
      "step": 128320
    },
    {
      "epoch": 0.21003122483847528,
      "grad_norm": 7.442866802215576,
      "learning_rate": 9.643061879377714e-06,
      "loss": 0.0636,
      "step": 128340
    },
    {
      "epoch": 0.21006395527712862,
      "grad_norm": 5.527495384216309,
      "learning_rate": 9.642995987164198e-06,
      "loss": 0.0561,
      "step": 128360
    },
    {
      "epoch": 0.21009668571578197,
      "grad_norm": 3.1565144062042236,
      "learning_rate": 9.642930094950682e-06,
      "loss": 0.0592,
      "step": 128380
    },
    {
      "epoch": 0.2101294161544353,
      "grad_norm": 1.974601149559021,
      "learning_rate": 9.642864202737163e-06,
      "loss": 0.0699,
      "step": 128400
    },
    {
      "epoch": 0.21016214659308863,
      "grad_norm": 4.323048114776611,
      "learning_rate": 9.642798310523647e-06,
      "loss": 0.0607,
      "step": 128420
    },
    {
      "epoch": 0.21019487703174197,
      "grad_norm": 3.5320332050323486,
      "learning_rate": 9.642732418310129e-06,
      "loss": 0.0607,
      "step": 128440
    },
    {
      "epoch": 0.21022760747039532,
      "grad_norm": 1.3149404525756836,
      "learning_rate": 9.642666526096613e-06,
      "loss": 0.0718,
      "step": 128460
    },
    {
      "epoch": 0.21026033790904866,
      "grad_norm": 3.7043652534484863,
      "learning_rate": 9.642600633883094e-06,
      "loss": 0.0615,
      "step": 128480
    },
    {
      "epoch": 0.210293068347702,
      "grad_norm": 0.41734617948532104,
      "learning_rate": 9.642534741669578e-06,
      "loss": 0.0544,
      "step": 128500
    },
    {
      "epoch": 0.21032579878635532,
      "grad_norm": 3.7172937393188477,
      "learning_rate": 9.64246884945606e-06,
      "loss": 0.0663,
      "step": 128520
    },
    {
      "epoch": 0.21035852922500867,
      "grad_norm": 4.215800762176514,
      "learning_rate": 9.642402957242543e-06,
      "loss": 0.0717,
      "step": 128540
    },
    {
      "epoch": 0.210391259663662,
      "grad_norm": 2.0908939838409424,
      "learning_rate": 9.642337065029025e-06,
      "loss": 0.0549,
      "step": 128560
    },
    {
      "epoch": 0.21042399010231536,
      "grad_norm": 1.6107534170150757,
      "learning_rate": 9.642271172815509e-06,
      "loss": 0.0615,
      "step": 128580
    },
    {
      "epoch": 0.2104567205409687,
      "grad_norm": 1.9729042053222656,
      "learning_rate": 9.64220528060199e-06,
      "loss": 0.0623,
      "step": 128600
    },
    {
      "epoch": 0.21048945097962202,
      "grad_norm": 2.4460713863372803,
      "learning_rate": 9.642139388388474e-06,
      "loss": 0.0715,
      "step": 128620
    },
    {
      "epoch": 0.21052218141827536,
      "grad_norm": 2.2173428535461426,
      "learning_rate": 9.642073496174958e-06,
      "loss": 0.0729,
      "step": 128640
    },
    {
      "epoch": 0.2105549118569287,
      "grad_norm": 2.6761116981506348,
      "learning_rate": 9.642007603961442e-06,
      "loss": 0.0614,
      "step": 128660
    },
    {
      "epoch": 0.21058764229558205,
      "grad_norm": 3.254897356033325,
      "learning_rate": 9.641941711747923e-06,
      "loss": 0.0569,
      "step": 128680
    },
    {
      "epoch": 0.2106203727342354,
      "grad_norm": 3.81656551361084,
      "learning_rate": 9.641875819534407e-06,
      "loss": 0.0508,
      "step": 128700
    },
    {
      "epoch": 0.2106531031728887,
      "grad_norm": 7.548002243041992,
      "learning_rate": 9.64180992732089e-06,
      "loss": 0.0568,
      "step": 128720
    },
    {
      "epoch": 0.21068583361154206,
      "grad_norm": 7.419567584991455,
      "learning_rate": 9.641744035107373e-06,
      "loss": 0.0753,
      "step": 128740
    },
    {
      "epoch": 0.2107185640501954,
      "grad_norm": 1.464831829071045,
      "learning_rate": 9.641678142893856e-06,
      "loss": 0.0792,
      "step": 128760
    },
    {
      "epoch": 0.21075129448884874,
      "grad_norm": 3.0149052143096924,
      "learning_rate": 9.641612250680338e-06,
      "loss": 0.0569,
      "step": 128780
    },
    {
      "epoch": 0.2107840249275021,
      "grad_norm": 3.5712788105010986,
      "learning_rate": 9.641546358466822e-06,
      "loss": 0.0635,
      "step": 128800
    },
    {
      "epoch": 0.2108167553661554,
      "grad_norm": 1.7530200481414795,
      "learning_rate": 9.641480466253304e-06,
      "loss": 0.0669,
      "step": 128820
    },
    {
      "epoch": 0.21084948580480875,
      "grad_norm": 1.4718269109725952,
      "learning_rate": 9.641414574039787e-06,
      "loss": 0.0467,
      "step": 128840
    },
    {
      "epoch": 0.2108822162434621,
      "grad_norm": 1.6475584506988525,
      "learning_rate": 9.641348681826269e-06,
      "loss": 0.0878,
      "step": 128860
    },
    {
      "epoch": 0.21091494668211544,
      "grad_norm": 2.509727716445923,
      "learning_rate": 9.641282789612753e-06,
      "loss": 0.0604,
      "step": 128880
    },
    {
      "epoch": 0.21094767712076878,
      "grad_norm": 3.255124568939209,
      "learning_rate": 9.641216897399234e-06,
      "loss": 0.0584,
      "step": 128900
    },
    {
      "epoch": 0.2109804075594221,
      "grad_norm": 1.189630150794983,
      "learning_rate": 9.641151005185718e-06,
      "loss": 0.0472,
      "step": 128920
    },
    {
      "epoch": 0.21101313799807545,
      "grad_norm": 2.323563575744629,
      "learning_rate": 9.6410851129722e-06,
      "loss": 0.067,
      "step": 128940
    },
    {
      "epoch": 0.2110458684367288,
      "grad_norm": 3.2141225337982178,
      "learning_rate": 9.641019220758684e-06,
      "loss": 0.0659,
      "step": 128960
    },
    {
      "epoch": 0.21107859887538213,
      "grad_norm": 7.926151752471924,
      "learning_rate": 9.640953328545165e-06,
      "loss": 0.0613,
      "step": 128980
    },
    {
      "epoch": 0.21111132931403548,
      "grad_norm": 2.2106335163116455,
      "learning_rate": 9.640887436331649e-06,
      "loss": 0.057,
      "step": 129000
    },
    {
      "epoch": 0.2111440597526888,
      "grad_norm": 0.7406363487243652,
      "learning_rate": 9.640821544118133e-06,
      "loss": 0.0535,
      "step": 129020
    },
    {
      "epoch": 0.21117679019134214,
      "grad_norm": 3.340078353881836,
      "learning_rate": 9.640755651904615e-06,
      "loss": 0.0505,
      "step": 129040
    },
    {
      "epoch": 0.21120952062999548,
      "grad_norm": 2.443039894104004,
      "learning_rate": 9.640689759691098e-06,
      "loss": 0.0462,
      "step": 129060
    },
    {
      "epoch": 0.21124225106864883,
      "grad_norm": 2.9245450496673584,
      "learning_rate": 9.640623867477582e-06,
      "loss": 0.0667,
      "step": 129080
    },
    {
      "epoch": 0.21127498150730217,
      "grad_norm": 0.3849027454853058,
      "learning_rate": 9.640557975264064e-06,
      "loss": 0.0651,
      "step": 129100
    },
    {
      "epoch": 0.2113077119459555,
      "grad_norm": 1.8878381252288818,
      "learning_rate": 9.640492083050547e-06,
      "loss": 0.0407,
      "step": 129120
    },
    {
      "epoch": 0.21134044238460883,
      "grad_norm": 3.4720757007598877,
      "learning_rate": 9.64042619083703e-06,
      "loss": 0.0636,
      "step": 129140
    },
    {
      "epoch": 0.21137317282326218,
      "grad_norm": 1.9052655696868896,
      "learning_rate": 9.640360298623513e-06,
      "loss": 0.0495,
      "step": 129160
    },
    {
      "epoch": 0.21140590326191552,
      "grad_norm": 1.1080565452575684,
      "learning_rate": 9.640294406409996e-06,
      "loss": 0.0557,
      "step": 129180
    },
    {
      "epoch": 0.21143863370056887,
      "grad_norm": 4.124522686004639,
      "learning_rate": 9.640228514196478e-06,
      "loss": 0.0645,
      "step": 129200
    },
    {
      "epoch": 0.21147136413922218,
      "grad_norm": 2.5503616333007812,
      "learning_rate": 9.640162621982962e-06,
      "loss": 0.0655,
      "step": 129220
    },
    {
      "epoch": 0.21150409457787553,
      "grad_norm": 1.4681066274642944,
      "learning_rate": 9.640096729769444e-06,
      "loss": 0.0691,
      "step": 129240
    },
    {
      "epoch": 0.21153682501652887,
      "grad_norm": 0.9916309118270874,
      "learning_rate": 9.640030837555927e-06,
      "loss": 0.0624,
      "step": 129260
    },
    {
      "epoch": 0.21156955545518222,
      "grad_norm": 3.9945948123931885,
      "learning_rate": 9.639964945342409e-06,
      "loss": 0.0535,
      "step": 129280
    },
    {
      "epoch": 0.21160228589383556,
      "grad_norm": 4.574992656707764,
      "learning_rate": 9.639899053128893e-06,
      "loss": 0.0733,
      "step": 129300
    },
    {
      "epoch": 0.21163501633248888,
      "grad_norm": 4.672280311584473,
      "learning_rate": 9.639833160915375e-06,
      "loss": 0.0533,
      "step": 129320
    },
    {
      "epoch": 0.21166774677114222,
      "grad_norm": 1.639457106590271,
      "learning_rate": 9.639767268701858e-06,
      "loss": 0.0599,
      "step": 129340
    },
    {
      "epoch": 0.21170047720979557,
      "grad_norm": 2.7811386585235596,
      "learning_rate": 9.63970137648834e-06,
      "loss": 0.0616,
      "step": 129360
    },
    {
      "epoch": 0.2117332076484489,
      "grad_norm": 1.504675269126892,
      "learning_rate": 9.639635484274824e-06,
      "loss": 0.0731,
      "step": 129380
    },
    {
      "epoch": 0.21176593808710226,
      "grad_norm": 2.374565839767456,
      "learning_rate": 9.639569592061307e-06,
      "loss": 0.065,
      "step": 129400
    },
    {
      "epoch": 0.21179866852575557,
      "grad_norm": 6.2509446144104,
      "learning_rate": 9.639503699847789e-06,
      "loss": 0.063,
      "step": 129420
    },
    {
      "epoch": 0.21183139896440892,
      "grad_norm": 3.7780871391296387,
      "learning_rate": 9.639437807634273e-06,
      "loss": 0.0473,
      "step": 129440
    },
    {
      "epoch": 0.21186412940306226,
      "grad_norm": 2.469273328781128,
      "learning_rate": 9.639371915420756e-06,
      "loss": 0.0523,
      "step": 129460
    },
    {
      "epoch": 0.2118968598417156,
      "grad_norm": 1.006765365600586,
      "learning_rate": 9.639306023207238e-06,
      "loss": 0.0546,
      "step": 129480
    },
    {
      "epoch": 0.21192959028036895,
      "grad_norm": 4.602476596832275,
      "learning_rate": 9.639240130993722e-06,
      "loss": 0.044,
      "step": 129500
    },
    {
      "epoch": 0.21196232071902227,
      "grad_norm": 2.397733688354492,
      "learning_rate": 9.639174238780205e-06,
      "loss": 0.0693,
      "step": 129520
    },
    {
      "epoch": 0.2119950511576756,
      "grad_norm": 2.056018829345703,
      "learning_rate": 9.639108346566687e-06,
      "loss": 0.0737,
      "step": 129540
    },
    {
      "epoch": 0.21202778159632896,
      "grad_norm": 2.7664246559143066,
      "learning_rate": 9.63904245435317e-06,
      "loss": 0.0677,
      "step": 129560
    },
    {
      "epoch": 0.2120605120349823,
      "grad_norm": 0.7044146656990051,
      "learning_rate": 9.638976562139653e-06,
      "loss": 0.0595,
      "step": 129580
    },
    {
      "epoch": 0.21209324247363562,
      "grad_norm": 2.5599405765533447,
      "learning_rate": 9.638910669926136e-06,
      "loss": 0.0634,
      "step": 129600
    },
    {
      "epoch": 0.21212597291228896,
      "grad_norm": 3.1769330501556396,
      "learning_rate": 9.638844777712618e-06,
      "loss": 0.0863,
      "step": 129620
    },
    {
      "epoch": 0.2121587033509423,
      "grad_norm": 3.863671064376831,
      "learning_rate": 9.638778885499102e-06,
      "loss": 0.0539,
      "step": 129640
    },
    {
      "epoch": 0.21219143378959565,
      "grad_norm": 4.275214195251465,
      "learning_rate": 9.638712993285584e-06,
      "loss": 0.0699,
      "step": 129660
    },
    {
      "epoch": 0.212224164228249,
      "grad_norm": 1.668078899383545,
      "learning_rate": 9.638647101072067e-06,
      "loss": 0.0566,
      "step": 129680
    },
    {
      "epoch": 0.2122568946669023,
      "grad_norm": 1.1735094785690308,
      "learning_rate": 9.638581208858549e-06,
      "loss": 0.0567,
      "step": 129700
    },
    {
      "epoch": 0.21228962510555566,
      "grad_norm": 3.4118714332580566,
      "learning_rate": 9.638515316645033e-06,
      "loss": 0.0632,
      "step": 129720
    },
    {
      "epoch": 0.212322355544209,
      "grad_norm": 1.6577355861663818,
      "learning_rate": 9.638449424431515e-06,
      "loss": 0.0642,
      "step": 129740
    },
    {
      "epoch": 0.21235508598286235,
      "grad_norm": 4.383780002593994,
      "learning_rate": 9.638383532217998e-06,
      "loss": 0.0617,
      "step": 129760
    },
    {
      "epoch": 0.2123878164215157,
      "grad_norm": 5.240005016326904,
      "learning_rate": 9.638317640004482e-06,
      "loss": 0.0739,
      "step": 129780
    },
    {
      "epoch": 0.212420546860169,
      "grad_norm": 1.8023077249526978,
      "learning_rate": 9.638251747790964e-06,
      "loss": 0.0617,
      "step": 129800
    },
    {
      "epoch": 0.21245327729882235,
      "grad_norm": 2.682347059249878,
      "learning_rate": 9.638185855577447e-06,
      "loss": 0.068,
      "step": 129820
    },
    {
      "epoch": 0.2124860077374757,
      "grad_norm": 2.0453431606292725,
      "learning_rate": 9.63811996336393e-06,
      "loss": 0.0415,
      "step": 129840
    },
    {
      "epoch": 0.21251873817612904,
      "grad_norm": 1.0428286790847778,
      "learning_rate": 9.638054071150413e-06,
      "loss": 0.0664,
      "step": 129860
    },
    {
      "epoch": 0.21255146861478239,
      "grad_norm": 2.525865077972412,
      "learning_rate": 9.637988178936896e-06,
      "loss": 0.0639,
      "step": 129880
    },
    {
      "epoch": 0.2125841990534357,
      "grad_norm": 2.4263124465942383,
      "learning_rate": 9.637922286723378e-06,
      "loss": 0.0598,
      "step": 129900
    },
    {
      "epoch": 0.21261692949208905,
      "grad_norm": 2.0101842880249023,
      "learning_rate": 9.637856394509862e-06,
      "loss": 0.0643,
      "step": 129920
    },
    {
      "epoch": 0.2126496599307424,
      "grad_norm": 1.3714503049850464,
      "learning_rate": 9.637790502296345e-06,
      "loss": 0.0626,
      "step": 129940
    },
    {
      "epoch": 0.21268239036939574,
      "grad_norm": 3.8026914596557617,
      "learning_rate": 9.637724610082827e-06,
      "loss": 0.0846,
      "step": 129960
    },
    {
      "epoch": 0.21271512080804908,
      "grad_norm": 2.400725841522217,
      "learning_rate": 9.637658717869311e-06,
      "loss": 0.0595,
      "step": 129980
    },
    {
      "epoch": 0.2127478512467024,
      "grad_norm": 0.6105261445045471,
      "learning_rate": 9.637592825655793e-06,
      "loss": 0.0563,
      "step": 130000
    },
    {
      "epoch": 0.21278058168535574,
      "grad_norm": 2.0625998973846436,
      "learning_rate": 9.637526933442276e-06,
      "loss": 0.0547,
      "step": 130020
    },
    {
      "epoch": 0.21281331212400909,
      "grad_norm": 1.2856709957122803,
      "learning_rate": 9.637461041228758e-06,
      "loss": 0.0675,
      "step": 130040
    },
    {
      "epoch": 0.21284604256266243,
      "grad_norm": 2.879668951034546,
      "learning_rate": 9.637395149015242e-06,
      "loss": 0.0639,
      "step": 130060
    },
    {
      "epoch": 0.21287877300131577,
      "grad_norm": 2.2015652656555176,
      "learning_rate": 9.637329256801724e-06,
      "loss": 0.0655,
      "step": 130080
    },
    {
      "epoch": 0.2129115034399691,
      "grad_norm": 2.5276741981506348,
      "learning_rate": 9.637263364588207e-06,
      "loss": 0.061,
      "step": 130100
    },
    {
      "epoch": 0.21294423387862244,
      "grad_norm": 1.0996222496032715,
      "learning_rate": 9.63719747237469e-06,
      "loss": 0.0569,
      "step": 130120
    },
    {
      "epoch": 0.21297696431727578,
      "grad_norm": 3.3813071250915527,
      "learning_rate": 9.637131580161173e-06,
      "loss": 0.061,
      "step": 130140
    },
    {
      "epoch": 0.21300969475592912,
      "grad_norm": 1.6123002767562866,
      "learning_rate": 9.637065687947656e-06,
      "loss": 0.064,
      "step": 130160
    },
    {
      "epoch": 0.21304242519458247,
      "grad_norm": 2.0515153408050537,
      "learning_rate": 9.636999795734138e-06,
      "loss": 0.0577,
      "step": 130180
    },
    {
      "epoch": 0.21307515563323579,
      "grad_norm": 2.7947463989257812,
      "learning_rate": 9.636933903520622e-06,
      "loss": 0.0486,
      "step": 130200
    },
    {
      "epoch": 0.21310788607188913,
      "grad_norm": 1.712888240814209,
      "learning_rate": 9.636868011307104e-06,
      "loss": 0.0596,
      "step": 130220
    },
    {
      "epoch": 0.21314061651054247,
      "grad_norm": 3.142916440963745,
      "learning_rate": 9.636802119093587e-06,
      "loss": 0.0751,
      "step": 130240
    },
    {
      "epoch": 0.21317334694919582,
      "grad_norm": 2.201625347137451,
      "learning_rate": 9.636736226880071e-06,
      "loss": 0.0711,
      "step": 130260
    },
    {
      "epoch": 0.21320607738784916,
      "grad_norm": 4.977527618408203,
      "learning_rate": 9.636670334666553e-06,
      "loss": 0.0667,
      "step": 130280
    },
    {
      "epoch": 0.21323880782650248,
      "grad_norm": 3.077820062637329,
      "learning_rate": 9.636604442453036e-06,
      "loss": 0.0639,
      "step": 130300
    },
    {
      "epoch": 0.21327153826515582,
      "grad_norm": 1.7115745544433594,
      "learning_rate": 9.63653855023952e-06,
      "loss": 0.0716,
      "step": 130320
    },
    {
      "epoch": 0.21330426870380917,
      "grad_norm": 3.5119500160217285,
      "learning_rate": 9.636472658026002e-06,
      "loss": 0.0621,
      "step": 130340
    },
    {
      "epoch": 0.2133369991424625,
      "grad_norm": 1.7381550073623657,
      "learning_rate": 9.636406765812485e-06,
      "loss": 0.0891,
      "step": 130360
    },
    {
      "epoch": 0.21336972958111586,
      "grad_norm": 2.633903980255127,
      "learning_rate": 9.636340873598967e-06,
      "loss": 0.0463,
      "step": 130380
    },
    {
      "epoch": 0.21340246001976917,
      "grad_norm": 3.2883052825927734,
      "learning_rate": 9.636274981385451e-06,
      "loss": 0.063,
      "step": 130400
    },
    {
      "epoch": 0.21343519045842252,
      "grad_norm": 0.8439732789993286,
      "learning_rate": 9.636209089171933e-06,
      "loss": 0.0585,
      "step": 130420
    },
    {
      "epoch": 0.21346792089707586,
      "grad_norm": 2.320373773574829,
      "learning_rate": 9.636143196958416e-06,
      "loss": 0.0561,
      "step": 130440
    },
    {
      "epoch": 0.2135006513357292,
      "grad_norm": 1.5893912315368652,
      "learning_rate": 9.636077304744898e-06,
      "loss": 0.0445,
      "step": 130460
    },
    {
      "epoch": 0.21353338177438255,
      "grad_norm": 0.5883187651634216,
      "learning_rate": 9.636011412531382e-06,
      "loss": 0.0608,
      "step": 130480
    },
    {
      "epoch": 0.21356611221303587,
      "grad_norm": 3.0148262977600098,
      "learning_rate": 9.635945520317866e-06,
      "loss": 0.0616,
      "step": 130500
    },
    {
      "epoch": 0.2135988426516892,
      "grad_norm": 2.852233409881592,
      "learning_rate": 9.635879628104347e-06,
      "loss": 0.0592,
      "step": 130520
    },
    {
      "epoch": 0.21363157309034256,
      "grad_norm": 2.9980013370513916,
      "learning_rate": 9.635813735890831e-06,
      "loss": 0.0731,
      "step": 130540
    },
    {
      "epoch": 0.2136643035289959,
      "grad_norm": 1.1746656894683838,
      "learning_rate": 9.635747843677313e-06,
      "loss": 0.0476,
      "step": 130560
    },
    {
      "epoch": 0.21369703396764925,
      "grad_norm": 4.01054048538208,
      "learning_rate": 9.635681951463796e-06,
      "loss": 0.0729,
      "step": 130580
    },
    {
      "epoch": 0.21372976440630256,
      "grad_norm": 1.0786209106445312,
      "learning_rate": 9.635616059250278e-06,
      "loss": 0.0565,
      "step": 130600
    },
    {
      "epoch": 0.2137624948449559,
      "grad_norm": 2.7622859477996826,
      "learning_rate": 9.635550167036762e-06,
      "loss": 0.0667,
      "step": 130620
    },
    {
      "epoch": 0.21379522528360925,
      "grad_norm": 1.3912692070007324,
      "learning_rate": 9.635484274823244e-06,
      "loss": 0.0662,
      "step": 130640
    },
    {
      "epoch": 0.2138279557222626,
      "grad_norm": 1.3688580989837646,
      "learning_rate": 9.635418382609727e-06,
      "loss": 0.0515,
      "step": 130660
    },
    {
      "epoch": 0.21386068616091594,
      "grad_norm": 20.521678924560547,
      "learning_rate": 9.635352490396211e-06,
      "loss": 0.0674,
      "step": 130680
    },
    {
      "epoch": 0.21389341659956926,
      "grad_norm": 2.7530674934387207,
      "learning_rate": 9.635286598182693e-06,
      "loss": 0.057,
      "step": 130700
    },
    {
      "epoch": 0.2139261470382226,
      "grad_norm": 1.9985018968582153,
      "learning_rate": 9.635220705969177e-06,
      "loss": 0.0574,
      "step": 130720
    },
    {
      "epoch": 0.21395887747687595,
      "grad_norm": 1.601143717765808,
      "learning_rate": 9.63515481375566e-06,
      "loss": 0.0618,
      "step": 130740
    },
    {
      "epoch": 0.2139916079155293,
      "grad_norm": 2.811375617980957,
      "learning_rate": 9.635088921542142e-06,
      "loss": 0.0605,
      "step": 130760
    },
    {
      "epoch": 0.21402433835418264,
      "grad_norm": 1.4570527076721191,
      "learning_rate": 9.635023029328626e-06,
      "loss": 0.0605,
      "step": 130780
    },
    {
      "epoch": 0.21405706879283595,
      "grad_norm": 10.013202667236328,
      "learning_rate": 9.634957137115107e-06,
      "loss": 0.071,
      "step": 130800
    },
    {
      "epoch": 0.2140897992314893,
      "grad_norm": 5.5522780418396,
      "learning_rate": 9.634891244901591e-06,
      "loss": 0.0569,
      "step": 130820
    },
    {
      "epoch": 0.21412252967014264,
      "grad_norm": 2.751081943511963,
      "learning_rate": 9.634825352688075e-06,
      "loss": 0.074,
      "step": 130840
    },
    {
      "epoch": 0.214155260108796,
      "grad_norm": 2.225534439086914,
      "learning_rate": 9.634759460474557e-06,
      "loss": 0.069,
      "step": 130860
    },
    {
      "epoch": 0.21418799054744933,
      "grad_norm": 3.197685956954956,
      "learning_rate": 9.63469356826104e-06,
      "loss": 0.0779,
      "step": 130880
    },
    {
      "epoch": 0.21422072098610265,
      "grad_norm": 3.8856899738311768,
      "learning_rate": 9.634627676047522e-06,
      "loss": 0.0586,
      "step": 130900
    },
    {
      "epoch": 0.214253451424756,
      "grad_norm": 3.142355442047119,
      "learning_rate": 9.634561783834006e-06,
      "loss": 0.072,
      "step": 130920
    },
    {
      "epoch": 0.21428618186340934,
      "grad_norm": 2.1094648838043213,
      "learning_rate": 9.634495891620487e-06,
      "loss": 0.062,
      "step": 130940
    },
    {
      "epoch": 0.21431891230206268,
      "grad_norm": 1.9298518896102905,
      "learning_rate": 9.634429999406971e-06,
      "loss": 0.0628,
      "step": 130960
    },
    {
      "epoch": 0.214351642740716,
      "grad_norm": 3.8810975551605225,
      "learning_rate": 9.634364107193453e-06,
      "loss": 0.0767,
      "step": 130980
    },
    {
      "epoch": 0.21438437317936934,
      "grad_norm": 2.933894395828247,
      "learning_rate": 9.634298214979937e-06,
      "loss": 0.0808,
      "step": 131000
    },
    {
      "epoch": 0.2144171036180227,
      "grad_norm": 2.454237937927246,
      "learning_rate": 9.634232322766418e-06,
      "loss": 0.0518,
      "step": 131020
    },
    {
      "epoch": 0.21444983405667603,
      "grad_norm": 2.6615424156188965,
      "learning_rate": 9.634166430552902e-06,
      "loss": 0.0703,
      "step": 131040
    },
    {
      "epoch": 0.21448256449532938,
      "grad_norm": 1.741749882698059,
      "learning_rate": 9.634100538339386e-06,
      "loss": 0.0738,
      "step": 131060
    },
    {
      "epoch": 0.2145152949339827,
      "grad_norm": 2.12642240524292,
      "learning_rate": 9.634034646125868e-06,
      "loss": 0.0588,
      "step": 131080
    },
    {
      "epoch": 0.21454802537263604,
      "grad_norm": 1.9707422256469727,
      "learning_rate": 9.633968753912351e-06,
      "loss": 0.0703,
      "step": 131100
    },
    {
      "epoch": 0.21458075581128938,
      "grad_norm": 1.4976274967193604,
      "learning_rate": 9.633902861698835e-06,
      "loss": 0.0664,
      "step": 131120
    },
    {
      "epoch": 0.21461348624994273,
      "grad_norm": 2.562833070755005,
      "learning_rate": 9.633836969485317e-06,
      "loss": 0.0563,
      "step": 131140
    },
    {
      "epoch": 0.21464621668859607,
      "grad_norm": 3.888183116912842,
      "learning_rate": 9.6337710772718e-06,
      "loss": 0.0854,
      "step": 131160
    },
    {
      "epoch": 0.2146789471272494,
      "grad_norm": 2.2922019958496094,
      "learning_rate": 9.633705185058284e-06,
      "loss": 0.0532,
      "step": 131180
    },
    {
      "epoch": 0.21471167756590273,
      "grad_norm": 7.102322578430176,
      "learning_rate": 9.633639292844766e-06,
      "loss": 0.0706,
      "step": 131200
    },
    {
      "epoch": 0.21474440800455608,
      "grad_norm": 0.33209100365638733,
      "learning_rate": 9.63357340063125e-06,
      "loss": 0.0719,
      "step": 131220
    },
    {
      "epoch": 0.21477713844320942,
      "grad_norm": 2.508125066757202,
      "learning_rate": 9.633507508417731e-06,
      "loss": 0.0559,
      "step": 131240
    },
    {
      "epoch": 0.21480986888186276,
      "grad_norm": 0.8241720199584961,
      "learning_rate": 9.633441616204215e-06,
      "loss": 0.0523,
      "step": 131260
    },
    {
      "epoch": 0.21484259932051608,
      "grad_norm": 1.8170441389083862,
      "learning_rate": 9.633375723990697e-06,
      "loss": 0.0608,
      "step": 131280
    },
    {
      "epoch": 0.21487532975916943,
      "grad_norm": 1.0297515392303467,
      "learning_rate": 9.63330983177718e-06,
      "loss": 0.0652,
      "step": 131300
    },
    {
      "epoch": 0.21490806019782277,
      "grad_norm": 4.745708465576172,
      "learning_rate": 9.633243939563662e-06,
      "loss": 0.071,
      "step": 131320
    },
    {
      "epoch": 0.21494079063647611,
      "grad_norm": 1.1952906847000122,
      "learning_rate": 9.633178047350146e-06,
      "loss": 0.0644,
      "step": 131340
    },
    {
      "epoch": 0.21497352107512946,
      "grad_norm": 3.247858762741089,
      "learning_rate": 9.633112155136628e-06,
      "loss": 0.0632,
      "step": 131360
    },
    {
      "epoch": 0.21500625151378278,
      "grad_norm": 2.215902805328369,
      "learning_rate": 9.633046262923111e-06,
      "loss": 0.0716,
      "step": 131380
    },
    {
      "epoch": 0.21503898195243612,
      "grad_norm": 2.0587029457092285,
      "learning_rate": 9.632980370709593e-06,
      "loss": 0.0604,
      "step": 131400
    },
    {
      "epoch": 0.21507171239108946,
      "grad_norm": 0.9388107061386108,
      "learning_rate": 9.632914478496077e-06,
      "loss": 0.0692,
      "step": 131420
    },
    {
      "epoch": 0.2151044428297428,
      "grad_norm": 5.017520427703857,
      "learning_rate": 9.63284858628256e-06,
      "loss": 0.054,
      "step": 131440
    },
    {
      "epoch": 0.21513717326839615,
      "grad_norm": 2.7374048233032227,
      "learning_rate": 9.632782694069042e-06,
      "loss": 0.0714,
      "step": 131460
    },
    {
      "epoch": 0.21516990370704947,
      "grad_norm": 0.7090265154838562,
      "learning_rate": 9.632716801855526e-06,
      "loss": 0.0549,
      "step": 131480
    },
    {
      "epoch": 0.21520263414570281,
      "grad_norm": 1.352371335029602,
      "learning_rate": 9.63265090964201e-06,
      "loss": 0.0421,
      "step": 131500
    },
    {
      "epoch": 0.21523536458435616,
      "grad_norm": 4.050007343292236,
      "learning_rate": 9.632585017428491e-06,
      "loss": 0.0488,
      "step": 131520
    },
    {
      "epoch": 0.2152680950230095,
      "grad_norm": 3.927131414413452,
      "learning_rate": 9.632519125214975e-06,
      "loss": 0.0724,
      "step": 131540
    },
    {
      "epoch": 0.21530082546166285,
      "grad_norm": 2.16884708404541,
      "learning_rate": 9.632453233001458e-06,
      "loss": 0.0763,
      "step": 131560
    },
    {
      "epoch": 0.21533355590031616,
      "grad_norm": 3.86393141746521,
      "learning_rate": 9.63238734078794e-06,
      "loss": 0.0514,
      "step": 131580
    },
    {
      "epoch": 0.2153662863389695,
      "grad_norm": 3.114086389541626,
      "learning_rate": 9.632321448574424e-06,
      "loss": 0.0487,
      "step": 131600
    },
    {
      "epoch": 0.21539901677762285,
      "grad_norm": 0.38244086503982544,
      "learning_rate": 9.632255556360906e-06,
      "loss": 0.0555,
      "step": 131620
    },
    {
      "epoch": 0.2154317472162762,
      "grad_norm": 2.6530606746673584,
      "learning_rate": 9.63218966414739e-06,
      "loss": 0.0731,
      "step": 131640
    },
    {
      "epoch": 0.21546447765492954,
      "grad_norm": 3.4942073822021484,
      "learning_rate": 9.632123771933871e-06,
      "loss": 0.0586,
      "step": 131660
    },
    {
      "epoch": 0.21549720809358286,
      "grad_norm": 2.2021243572235107,
      "learning_rate": 9.632057879720355e-06,
      "loss": 0.0702,
      "step": 131680
    },
    {
      "epoch": 0.2155299385322362,
      "grad_norm": 4.167094707489014,
      "learning_rate": 9.631991987506837e-06,
      "loss": 0.0666,
      "step": 131700
    },
    {
      "epoch": 0.21556266897088955,
      "grad_norm": 2.919058322906494,
      "learning_rate": 9.63192609529332e-06,
      "loss": 0.0454,
      "step": 131720
    },
    {
      "epoch": 0.2155953994095429,
      "grad_norm": 3.666823625564575,
      "learning_rate": 9.631860203079802e-06,
      "loss": 0.0593,
      "step": 131740
    },
    {
      "epoch": 0.21562812984819624,
      "grad_norm": 2.0081591606140137,
      "learning_rate": 9.631794310866286e-06,
      "loss": 0.0558,
      "step": 131760
    },
    {
      "epoch": 0.21566086028684955,
      "grad_norm": 1.155122995376587,
      "learning_rate": 9.631728418652768e-06,
      "loss": 0.0693,
      "step": 131780
    },
    {
      "epoch": 0.2156935907255029,
      "grad_norm": 2.2965168952941895,
      "learning_rate": 9.631662526439251e-06,
      "loss": 0.0873,
      "step": 131800
    },
    {
      "epoch": 0.21572632116415624,
      "grad_norm": 1.3455076217651367,
      "learning_rate": 9.631596634225733e-06,
      "loss": 0.0671,
      "step": 131820
    },
    {
      "epoch": 0.2157590516028096,
      "grad_norm": 2.0853567123413086,
      "learning_rate": 9.631530742012217e-06,
      "loss": 0.0643,
      "step": 131840
    },
    {
      "epoch": 0.21579178204146293,
      "grad_norm": 4.843384265899658,
      "learning_rate": 9.6314648497987e-06,
      "loss": 0.0689,
      "step": 131860
    },
    {
      "epoch": 0.21582451248011625,
      "grad_norm": 1.8115501403808594,
      "learning_rate": 9.631398957585182e-06,
      "loss": 0.0554,
      "step": 131880
    },
    {
      "epoch": 0.2158572429187696,
      "grad_norm": 0.9605897665023804,
      "learning_rate": 9.631333065371666e-06,
      "loss": 0.0612,
      "step": 131900
    },
    {
      "epoch": 0.21588997335742294,
      "grad_norm": 2.0857746601104736,
      "learning_rate": 9.63126717315815e-06,
      "loss": 0.0594,
      "step": 131920
    },
    {
      "epoch": 0.21592270379607628,
      "grad_norm": 2.5524778366088867,
      "learning_rate": 9.631201280944631e-06,
      "loss": 0.0733,
      "step": 131940
    },
    {
      "epoch": 0.21595543423472963,
      "grad_norm": 3.6262669563293457,
      "learning_rate": 9.631135388731115e-06,
      "loss": 0.0797,
      "step": 131960
    },
    {
      "epoch": 0.21598816467338294,
      "grad_norm": 1.7272617816925049,
      "learning_rate": 9.631069496517598e-06,
      "loss": 0.0607,
      "step": 131980
    },
    {
      "epoch": 0.2160208951120363,
      "grad_norm": 3.5246670246124268,
      "learning_rate": 9.63100360430408e-06,
      "loss": 0.091,
      "step": 132000
    },
    {
      "epoch": 0.21605362555068963,
      "grad_norm": 1.6266981363296509,
      "learning_rate": 9.630937712090564e-06,
      "loss": 0.072,
      "step": 132020
    },
    {
      "epoch": 0.21608635598934298,
      "grad_norm": 0.44550850987434387,
      "learning_rate": 9.630871819877046e-06,
      "loss": 0.0674,
      "step": 132040
    },
    {
      "epoch": 0.21611908642799632,
      "grad_norm": 0.8471530079841614,
      "learning_rate": 9.63080592766353e-06,
      "loss": 0.0562,
      "step": 132060
    },
    {
      "epoch": 0.21615181686664964,
      "grad_norm": 2.882143974304199,
      "learning_rate": 9.630740035450011e-06,
      "loss": 0.0667,
      "step": 132080
    },
    {
      "epoch": 0.21618454730530298,
      "grad_norm": 1.4970355033874512,
      "learning_rate": 9.630674143236495e-06,
      "loss": 0.0849,
      "step": 132100
    },
    {
      "epoch": 0.21621727774395633,
      "grad_norm": 3.3090262413024902,
      "learning_rate": 9.630608251022977e-06,
      "loss": 0.0639,
      "step": 132120
    },
    {
      "epoch": 0.21625000818260967,
      "grad_norm": 13.927351951599121,
      "learning_rate": 9.63054235880946e-06,
      "loss": 0.0692,
      "step": 132140
    },
    {
      "epoch": 0.21628273862126302,
      "grad_norm": 1.8181918859481812,
      "learning_rate": 9.630476466595942e-06,
      "loss": 0.0466,
      "step": 132160
    },
    {
      "epoch": 0.21631546905991633,
      "grad_norm": 3.5666213035583496,
      "learning_rate": 9.630410574382426e-06,
      "loss": 0.0766,
      "step": 132180
    },
    {
      "epoch": 0.21634819949856968,
      "grad_norm": 0.7972708344459534,
      "learning_rate": 9.630344682168908e-06,
      "loss": 0.0553,
      "step": 132200
    },
    {
      "epoch": 0.21638092993722302,
      "grad_norm": 3.615734815597534,
      "learning_rate": 9.630278789955391e-06,
      "loss": 0.0829,
      "step": 132220
    },
    {
      "epoch": 0.21641366037587637,
      "grad_norm": 2.319248914718628,
      "learning_rate": 9.630212897741875e-06,
      "loss": 0.0601,
      "step": 132240
    },
    {
      "epoch": 0.2164463908145297,
      "grad_norm": 2.230588436126709,
      "learning_rate": 9.630147005528357e-06,
      "loss": 0.0633,
      "step": 132260
    },
    {
      "epoch": 0.21647912125318303,
      "grad_norm": 2.1191048622131348,
      "learning_rate": 9.63008111331484e-06,
      "loss": 0.0688,
      "step": 132280
    },
    {
      "epoch": 0.21651185169183637,
      "grad_norm": 1.3858224153518677,
      "learning_rate": 9.630015221101324e-06,
      "loss": 0.0567,
      "step": 132300
    },
    {
      "epoch": 0.21654458213048972,
      "grad_norm": 0.7934946417808533,
      "learning_rate": 9.629949328887806e-06,
      "loss": 0.0634,
      "step": 132320
    },
    {
      "epoch": 0.21657731256914306,
      "grad_norm": 1.9156516790390015,
      "learning_rate": 9.62988343667429e-06,
      "loss": 0.0679,
      "step": 132340
    },
    {
      "epoch": 0.21661004300779638,
      "grad_norm": 0.2973474860191345,
      "learning_rate": 9.629817544460773e-06,
      "loss": 0.0609,
      "step": 132360
    },
    {
      "epoch": 0.21664277344644972,
      "grad_norm": 3.2248544692993164,
      "learning_rate": 9.629751652247255e-06,
      "loss": 0.0662,
      "step": 132380
    },
    {
      "epoch": 0.21667550388510307,
      "grad_norm": 2.290346145629883,
      "learning_rate": 9.629685760033738e-06,
      "loss": 0.0742,
      "step": 132400
    },
    {
      "epoch": 0.2167082343237564,
      "grad_norm": 1.0172356367111206,
      "learning_rate": 9.62961986782022e-06,
      "loss": 0.0679,
      "step": 132420
    },
    {
      "epoch": 0.21674096476240975,
      "grad_norm": 2.548044443130493,
      "learning_rate": 9.629553975606704e-06,
      "loss": 0.0528,
      "step": 132440
    },
    {
      "epoch": 0.21677369520106307,
      "grad_norm": 1.7585101127624512,
      "learning_rate": 9.629488083393186e-06,
      "loss": 0.0598,
      "step": 132460
    },
    {
      "epoch": 0.21680642563971642,
      "grad_norm": 1.8634583950042725,
      "learning_rate": 9.62942219117967e-06,
      "loss": 0.0615,
      "step": 132480
    },
    {
      "epoch": 0.21683915607836976,
      "grad_norm": 2.124478816986084,
      "learning_rate": 9.629356298966151e-06,
      "loss": 0.0515,
      "step": 132500
    },
    {
      "epoch": 0.2168718865170231,
      "grad_norm": 5.393493175506592,
      "learning_rate": 9.629290406752635e-06,
      "loss": 0.089,
      "step": 132520
    },
    {
      "epoch": 0.21690461695567645,
      "grad_norm": 1.7906723022460938,
      "learning_rate": 9.629224514539117e-06,
      "loss": 0.0553,
      "step": 132540
    },
    {
      "epoch": 0.21693734739432977,
      "grad_norm": 3.5132951736450195,
      "learning_rate": 9.6291586223256e-06,
      "loss": 0.0711,
      "step": 132560
    },
    {
      "epoch": 0.2169700778329831,
      "grad_norm": 1.087835669517517,
      "learning_rate": 9.629092730112082e-06,
      "loss": 0.0572,
      "step": 132580
    },
    {
      "epoch": 0.21700280827163645,
      "grad_norm": 1.034635066986084,
      "learning_rate": 9.629026837898566e-06,
      "loss": 0.0431,
      "step": 132600
    },
    {
      "epoch": 0.2170355387102898,
      "grad_norm": 6.187779903411865,
      "learning_rate": 9.62896094568505e-06,
      "loss": 0.0572,
      "step": 132620
    },
    {
      "epoch": 0.21706826914894314,
      "grad_norm": 2.027627468109131,
      "learning_rate": 9.628895053471531e-06,
      "loss": 0.0673,
      "step": 132640
    },
    {
      "epoch": 0.21710099958759646,
      "grad_norm": 2.510042667388916,
      "learning_rate": 9.628829161258015e-06,
      "loss": 0.0523,
      "step": 132660
    },
    {
      "epoch": 0.2171337300262498,
      "grad_norm": 4.979486465454102,
      "learning_rate": 9.628763269044497e-06,
      "loss": 0.0723,
      "step": 132680
    },
    {
      "epoch": 0.21716646046490315,
      "grad_norm": 1.8226436376571655,
      "learning_rate": 9.62869737683098e-06,
      "loss": 0.0548,
      "step": 132700
    },
    {
      "epoch": 0.2171991909035565,
      "grad_norm": 2.709892511367798,
      "learning_rate": 9.628631484617464e-06,
      "loss": 0.0614,
      "step": 132720
    },
    {
      "epoch": 0.21723192134220984,
      "grad_norm": 0.9696078896522522,
      "learning_rate": 9.628565592403946e-06,
      "loss": 0.052,
      "step": 132740
    },
    {
      "epoch": 0.21726465178086315,
      "grad_norm": 9.333660125732422,
      "learning_rate": 9.62849970019043e-06,
      "loss": 0.0632,
      "step": 132760
    },
    {
      "epoch": 0.2172973822195165,
      "grad_norm": 1.938501238822937,
      "learning_rate": 9.628433807976913e-06,
      "loss": 0.0604,
      "step": 132780
    },
    {
      "epoch": 0.21733011265816984,
      "grad_norm": 1.6970596313476562,
      "learning_rate": 9.628367915763395e-06,
      "loss": 0.0589,
      "step": 132800
    },
    {
      "epoch": 0.2173628430968232,
      "grad_norm": 1.960314393043518,
      "learning_rate": 9.628302023549879e-06,
      "loss": 0.0619,
      "step": 132820
    },
    {
      "epoch": 0.21739557353547653,
      "grad_norm": 23.61455726623535,
      "learning_rate": 9.62823613133636e-06,
      "loss": 0.0745,
      "step": 132840
    },
    {
      "epoch": 0.21742830397412985,
      "grad_norm": 2.9284634590148926,
      "learning_rate": 9.628170239122844e-06,
      "loss": 0.0554,
      "step": 132860
    },
    {
      "epoch": 0.2174610344127832,
      "grad_norm": 6.034245014190674,
      "learning_rate": 9.628104346909326e-06,
      "loss": 0.0748,
      "step": 132880
    },
    {
      "epoch": 0.21749376485143654,
      "grad_norm": 2.2798211574554443,
      "learning_rate": 9.62803845469581e-06,
      "loss": 0.0607,
      "step": 132900
    },
    {
      "epoch": 0.21752649529008988,
      "grad_norm": 5.44528865814209,
      "learning_rate": 9.627972562482291e-06,
      "loss": 0.068,
      "step": 132920
    },
    {
      "epoch": 0.21755922572874323,
      "grad_norm": 1.1110042333602905,
      "learning_rate": 9.627906670268775e-06,
      "loss": 0.0587,
      "step": 132940
    },
    {
      "epoch": 0.21759195616739654,
      "grad_norm": 2.2529971599578857,
      "learning_rate": 9.627840778055259e-06,
      "loss": 0.0641,
      "step": 132960
    },
    {
      "epoch": 0.2176246866060499,
      "grad_norm": 2.8479883670806885,
      "learning_rate": 9.62777488584174e-06,
      "loss": 0.0807,
      "step": 132980
    },
    {
      "epoch": 0.21765741704470323,
      "grad_norm": 3.5733537673950195,
      "learning_rate": 9.627708993628224e-06,
      "loss": 0.0567,
      "step": 133000
    },
    {
      "epoch": 0.21769014748335658,
      "grad_norm": 3.149170398712158,
      "learning_rate": 9.627643101414706e-06,
      "loss": 0.0567,
      "step": 133020
    },
    {
      "epoch": 0.21772287792200992,
      "grad_norm": 1.6707004308700562,
      "learning_rate": 9.62757720920119e-06,
      "loss": 0.0638,
      "step": 133040
    },
    {
      "epoch": 0.21775560836066324,
      "grad_norm": 0.9186380505561829,
      "learning_rate": 9.627511316987671e-06,
      "loss": 0.0698,
      "step": 133060
    },
    {
      "epoch": 0.21778833879931658,
      "grad_norm": 2.965099573135376,
      "learning_rate": 9.627445424774155e-06,
      "loss": 0.0879,
      "step": 133080
    },
    {
      "epoch": 0.21782106923796993,
      "grad_norm": 3.4207234382629395,
      "learning_rate": 9.627379532560639e-06,
      "loss": 0.0723,
      "step": 133100
    },
    {
      "epoch": 0.21785379967662327,
      "grad_norm": 1.1041544675827026,
      "learning_rate": 9.62731364034712e-06,
      "loss": 0.0509,
      "step": 133120
    },
    {
      "epoch": 0.21788653011527662,
      "grad_norm": 3.432213544845581,
      "learning_rate": 9.627247748133604e-06,
      "loss": 0.0767,
      "step": 133140
    },
    {
      "epoch": 0.21791926055392993,
      "grad_norm": 1.623849630355835,
      "learning_rate": 9.627181855920088e-06,
      "loss": 0.0524,
      "step": 133160
    },
    {
      "epoch": 0.21795199099258328,
      "grad_norm": 2.7225584983825684,
      "learning_rate": 9.62711596370657e-06,
      "loss": 0.0581,
      "step": 133180
    },
    {
      "epoch": 0.21798472143123662,
      "grad_norm": 0.8779070973396301,
      "learning_rate": 9.627050071493053e-06,
      "loss": 0.0528,
      "step": 133200
    },
    {
      "epoch": 0.21801745186988997,
      "grad_norm": 3.1036832332611084,
      "learning_rate": 9.626984179279535e-06,
      "loss": 0.0609,
      "step": 133220
    },
    {
      "epoch": 0.2180501823085433,
      "grad_norm": 2.3362057209014893,
      "learning_rate": 9.626918287066019e-06,
      "loss": 0.0642,
      "step": 133240
    },
    {
      "epoch": 0.21808291274719663,
      "grad_norm": 1.1871031522750854,
      "learning_rate": 9.6268523948525e-06,
      "loss": 0.0598,
      "step": 133260
    },
    {
      "epoch": 0.21811564318584997,
      "grad_norm": 0.8911901116371155,
      "learning_rate": 9.626786502638984e-06,
      "loss": 0.0515,
      "step": 133280
    },
    {
      "epoch": 0.21814837362450332,
      "grad_norm": 1.2265040874481201,
      "learning_rate": 9.626720610425468e-06,
      "loss": 0.0761,
      "step": 133300
    },
    {
      "epoch": 0.21818110406315666,
      "grad_norm": 1.1596958637237549,
      "learning_rate": 9.62665471821195e-06,
      "loss": 0.0495,
      "step": 133320
    },
    {
      "epoch": 0.21821383450181,
      "grad_norm": 6.010471343994141,
      "learning_rate": 9.626588825998433e-06,
      "loss": 0.0568,
      "step": 133340
    },
    {
      "epoch": 0.21824656494046332,
      "grad_norm": 5.738663196563721,
      "learning_rate": 9.626522933784915e-06,
      "loss": 0.0579,
      "step": 133360
    },
    {
      "epoch": 0.21827929537911667,
      "grad_norm": 3.824744939804077,
      "learning_rate": 9.626457041571399e-06,
      "loss": 0.0425,
      "step": 133380
    },
    {
      "epoch": 0.21831202581777,
      "grad_norm": 2.7133982181549072,
      "learning_rate": 9.62639114935788e-06,
      "loss": 0.0586,
      "step": 133400
    },
    {
      "epoch": 0.21834475625642336,
      "grad_norm": 3.2294681072235107,
      "learning_rate": 9.626325257144364e-06,
      "loss": 0.0758,
      "step": 133420
    },
    {
      "epoch": 0.2183774866950767,
      "grad_norm": 3.942504644393921,
      "learning_rate": 9.626259364930846e-06,
      "loss": 0.0705,
      "step": 133440
    },
    {
      "epoch": 0.21841021713373002,
      "grad_norm": 3.0483033657073975,
      "learning_rate": 9.62619347271733e-06,
      "loss": 0.0486,
      "step": 133460
    },
    {
      "epoch": 0.21844294757238336,
      "grad_norm": 15.66794490814209,
      "learning_rate": 9.626127580503812e-06,
      "loss": 0.0522,
      "step": 133480
    },
    {
      "epoch": 0.2184756780110367,
      "grad_norm": 2.199831008911133,
      "learning_rate": 9.626061688290295e-06,
      "loss": 0.0613,
      "step": 133500
    },
    {
      "epoch": 0.21850840844969005,
      "grad_norm": 2.2646849155426025,
      "learning_rate": 9.625995796076779e-06,
      "loss": 0.0544,
      "step": 133520
    },
    {
      "epoch": 0.2185411388883434,
      "grad_norm": 1.555756688117981,
      "learning_rate": 9.62592990386326e-06,
      "loss": 0.0655,
      "step": 133540
    },
    {
      "epoch": 0.2185738693269967,
      "grad_norm": 3.996903419494629,
      "learning_rate": 9.625864011649744e-06,
      "loss": 0.0586,
      "step": 133560
    },
    {
      "epoch": 0.21860659976565006,
      "grad_norm": 11.094271659851074,
      "learning_rate": 9.625798119436228e-06,
      "loss": 0.0655,
      "step": 133580
    },
    {
      "epoch": 0.2186393302043034,
      "grad_norm": 3.9910836219787598,
      "learning_rate": 9.62573222722271e-06,
      "loss": 0.055,
      "step": 133600
    },
    {
      "epoch": 0.21867206064295674,
      "grad_norm": 3.090301990509033,
      "learning_rate": 9.625666335009193e-06,
      "loss": 0.07,
      "step": 133620
    },
    {
      "epoch": 0.2187047910816101,
      "grad_norm": 3.503664255142212,
      "learning_rate": 9.625600442795675e-06,
      "loss": 0.0641,
      "step": 133640
    },
    {
      "epoch": 0.2187375215202634,
      "grad_norm": 2.56268572807312,
      "learning_rate": 9.625534550582159e-06,
      "loss": 0.0661,
      "step": 133660
    },
    {
      "epoch": 0.21877025195891675,
      "grad_norm": 1.9979232549667358,
      "learning_rate": 9.625468658368642e-06,
      "loss": 0.0633,
      "step": 133680
    },
    {
      "epoch": 0.2188029823975701,
      "grad_norm": 0.6871234178543091,
      "learning_rate": 9.625402766155124e-06,
      "loss": 0.0611,
      "step": 133700
    },
    {
      "epoch": 0.21883571283622344,
      "grad_norm": 1.4135522842407227,
      "learning_rate": 9.625336873941608e-06,
      "loss": 0.055,
      "step": 133720
    },
    {
      "epoch": 0.21886844327487676,
      "grad_norm": 0.8929240703582764,
      "learning_rate": 9.62527098172809e-06,
      "loss": 0.0495,
      "step": 133740
    },
    {
      "epoch": 0.2189011737135301,
      "grad_norm": 2.5446853637695312,
      "learning_rate": 9.625205089514573e-06,
      "loss": 0.0494,
      "step": 133760
    },
    {
      "epoch": 0.21893390415218345,
      "grad_norm": 1.0588799715042114,
      "learning_rate": 9.625139197301055e-06,
      "loss": 0.0454,
      "step": 133780
    },
    {
      "epoch": 0.2189666345908368,
      "grad_norm": 2.6095492839813232,
      "learning_rate": 9.625073305087539e-06,
      "loss": 0.0826,
      "step": 133800
    },
    {
      "epoch": 0.21899936502949013,
      "grad_norm": 2.2270700931549072,
      "learning_rate": 9.62500741287402e-06,
      "loss": 0.0547,
      "step": 133820
    },
    {
      "epoch": 0.21903209546814345,
      "grad_norm": 1.2927143573760986,
      "learning_rate": 9.624941520660504e-06,
      "loss": 0.0629,
      "step": 133840
    },
    {
      "epoch": 0.2190648259067968,
      "grad_norm": 1.682915210723877,
      "learning_rate": 9.624875628446986e-06,
      "loss": 0.0492,
      "step": 133860
    },
    {
      "epoch": 0.21909755634545014,
      "grad_norm": 2.6622045040130615,
      "learning_rate": 9.62480973623347e-06,
      "loss": 0.0528,
      "step": 133880
    },
    {
      "epoch": 0.21913028678410348,
      "grad_norm": 1.2455601692199707,
      "learning_rate": 9.624743844019953e-06,
      "loss": 0.0676,
      "step": 133900
    },
    {
      "epoch": 0.21916301722275683,
      "grad_norm": 3.162506103515625,
      "learning_rate": 9.624677951806435e-06,
      "loss": 0.0857,
      "step": 133920
    },
    {
      "epoch": 0.21919574766141015,
      "grad_norm": 1.6866511106491089,
      "learning_rate": 9.624612059592919e-06,
      "loss": 0.0752,
      "step": 133940
    },
    {
      "epoch": 0.2192284781000635,
      "grad_norm": 1.5948975086212158,
      "learning_rate": 9.624546167379402e-06,
      "loss": 0.0637,
      "step": 133960
    },
    {
      "epoch": 0.21926120853871683,
      "grad_norm": 3.5513057708740234,
      "learning_rate": 9.624480275165884e-06,
      "loss": 0.0783,
      "step": 133980
    },
    {
      "epoch": 0.21929393897737018,
      "grad_norm": 3.1890313625335693,
      "learning_rate": 9.624414382952368e-06,
      "loss": 0.07,
      "step": 134000
    },
    {
      "epoch": 0.21932666941602352,
      "grad_norm": 1.7518882751464844,
      "learning_rate": 9.624348490738851e-06,
      "loss": 0.0587,
      "step": 134020
    },
    {
      "epoch": 0.21935939985467684,
      "grad_norm": 1.5614902973175049,
      "learning_rate": 9.624282598525333e-06,
      "loss": 0.0558,
      "step": 134040
    },
    {
      "epoch": 0.21939213029333018,
      "grad_norm": 2.3542566299438477,
      "learning_rate": 9.624216706311817e-06,
      "loss": 0.0592,
      "step": 134060
    },
    {
      "epoch": 0.21942486073198353,
      "grad_norm": 0.3859822452068329,
      "learning_rate": 9.624150814098299e-06,
      "loss": 0.0513,
      "step": 134080
    },
    {
      "epoch": 0.21945759117063687,
      "grad_norm": 2.3624210357666016,
      "learning_rate": 9.624084921884782e-06,
      "loss": 0.0577,
      "step": 134100
    },
    {
      "epoch": 0.21949032160929022,
      "grad_norm": 1.1165997982025146,
      "learning_rate": 9.624019029671264e-06,
      "loss": 0.0555,
      "step": 134120
    },
    {
      "epoch": 0.21952305204794353,
      "grad_norm": 0.31682562828063965,
      "learning_rate": 9.623953137457748e-06,
      "loss": 0.0637,
      "step": 134140
    },
    {
      "epoch": 0.21955578248659688,
      "grad_norm": 3.012575387954712,
      "learning_rate": 9.62388724524423e-06,
      "loss": 0.0547,
      "step": 134160
    },
    {
      "epoch": 0.21958851292525022,
      "grad_norm": 1.0702630281448364,
      "learning_rate": 9.623821353030713e-06,
      "loss": 0.0591,
      "step": 134180
    },
    {
      "epoch": 0.21962124336390357,
      "grad_norm": 1.3156408071517944,
      "learning_rate": 9.623755460817195e-06,
      "loss": 0.0601,
      "step": 134200
    },
    {
      "epoch": 0.2196539738025569,
      "grad_norm": 3.097043037414551,
      "learning_rate": 9.623689568603679e-06,
      "loss": 0.0617,
      "step": 134220
    },
    {
      "epoch": 0.21968670424121023,
      "grad_norm": 3.4934916496276855,
      "learning_rate": 9.62362367639016e-06,
      "loss": 0.055,
      "step": 134240
    },
    {
      "epoch": 0.21971943467986357,
      "grad_norm": 2.8140175342559814,
      "learning_rate": 9.623557784176644e-06,
      "loss": 0.0735,
      "step": 134260
    },
    {
      "epoch": 0.21975216511851692,
      "grad_norm": 2.355684757232666,
      "learning_rate": 9.623491891963128e-06,
      "loss": 0.0529,
      "step": 134280
    },
    {
      "epoch": 0.21978489555717026,
      "grad_norm": 3.757420539855957,
      "learning_rate": 9.62342599974961e-06,
      "loss": 0.0784,
      "step": 134300
    },
    {
      "epoch": 0.2198176259958236,
      "grad_norm": 3.2190189361572266,
      "learning_rate": 9.623360107536093e-06,
      "loss": 0.0697,
      "step": 134320
    },
    {
      "epoch": 0.21985035643447692,
      "grad_norm": 6.456999778747559,
      "learning_rate": 9.623294215322577e-06,
      "loss": 0.0469,
      "step": 134340
    },
    {
      "epoch": 0.21988308687313027,
      "grad_norm": 2.059988021850586,
      "learning_rate": 9.623228323109059e-06,
      "loss": 0.0831,
      "step": 134360
    },
    {
      "epoch": 0.2199158173117836,
      "grad_norm": 1.6092721223831177,
      "learning_rate": 9.623162430895542e-06,
      "loss": 0.0607,
      "step": 134380
    },
    {
      "epoch": 0.21994854775043696,
      "grad_norm": 3.513697624206543,
      "learning_rate": 9.623096538682026e-06,
      "loss": 0.0636,
      "step": 134400
    },
    {
      "epoch": 0.2199812781890903,
      "grad_norm": 1.4380630254745483,
      "learning_rate": 9.623030646468508e-06,
      "loss": 0.0545,
      "step": 134420
    },
    {
      "epoch": 0.22001400862774362,
      "grad_norm": 4.303861618041992,
      "learning_rate": 9.622964754254992e-06,
      "loss": 0.0651,
      "step": 134440
    },
    {
      "epoch": 0.22004673906639696,
      "grad_norm": 1.204964518547058,
      "learning_rate": 9.622898862041473e-06,
      "loss": 0.0582,
      "step": 134460
    },
    {
      "epoch": 0.2200794695050503,
      "grad_norm": 4.306888103485107,
      "learning_rate": 9.622832969827957e-06,
      "loss": 0.0645,
      "step": 134480
    },
    {
      "epoch": 0.22011219994370365,
      "grad_norm": 1.3594988584518433,
      "learning_rate": 9.622767077614439e-06,
      "loss": 0.0633,
      "step": 134500
    },
    {
      "epoch": 0.220144930382357,
      "grad_norm": 0.7929739952087402,
      "learning_rate": 9.622701185400922e-06,
      "loss": 0.0609,
      "step": 134520
    },
    {
      "epoch": 0.2201776608210103,
      "grad_norm": 3.158566951751709,
      "learning_rate": 9.622635293187404e-06,
      "loss": 0.0782,
      "step": 134540
    },
    {
      "epoch": 0.22021039125966366,
      "grad_norm": 1.9860100746154785,
      "learning_rate": 9.622569400973888e-06,
      "loss": 0.0537,
      "step": 134560
    },
    {
      "epoch": 0.220243121698317,
      "grad_norm": 0.746576189994812,
      "learning_rate": 9.62250350876037e-06,
      "loss": 0.0754,
      "step": 134580
    },
    {
      "epoch": 0.22027585213697035,
      "grad_norm": 5.142512321472168,
      "learning_rate": 9.622437616546853e-06,
      "loss": 0.0668,
      "step": 134600
    },
    {
      "epoch": 0.2203085825756237,
      "grad_norm": 12.639803886413574,
      "learning_rate": 9.622371724333335e-06,
      "loss": 0.0604,
      "step": 134620
    },
    {
      "epoch": 0.220341313014277,
      "grad_norm": 3.659243583679199,
      "learning_rate": 9.622305832119819e-06,
      "loss": 0.0731,
      "step": 134640
    },
    {
      "epoch": 0.22037404345293035,
      "grad_norm": 2.9898762702941895,
      "learning_rate": 9.6222399399063e-06,
      "loss": 0.0648,
      "step": 134660
    },
    {
      "epoch": 0.2204067738915837,
      "grad_norm": 0.841454267501831,
      "learning_rate": 9.622174047692784e-06,
      "loss": 0.0674,
      "step": 134680
    },
    {
      "epoch": 0.22043950433023704,
      "grad_norm": 2.621006488800049,
      "learning_rate": 9.622108155479268e-06,
      "loss": 0.06,
      "step": 134700
    },
    {
      "epoch": 0.22047223476889038,
      "grad_norm": 3.91215443611145,
      "learning_rate": 9.62204226326575e-06,
      "loss": 0.0645,
      "step": 134720
    },
    {
      "epoch": 0.2205049652075437,
      "grad_norm": 0.4385683834552765,
      "learning_rate": 9.621976371052233e-06,
      "loss": 0.063,
      "step": 134740
    },
    {
      "epoch": 0.22053769564619705,
      "grad_norm": 2.2869324684143066,
      "learning_rate": 9.621910478838717e-06,
      "loss": 0.0529,
      "step": 134760
    },
    {
      "epoch": 0.2205704260848504,
      "grad_norm": 0.523755669593811,
      "learning_rate": 9.621844586625199e-06,
      "loss": 0.0609,
      "step": 134780
    },
    {
      "epoch": 0.22060315652350374,
      "grad_norm": 2.595914363861084,
      "learning_rate": 9.621778694411683e-06,
      "loss": 0.0544,
      "step": 134800
    },
    {
      "epoch": 0.22063588696215708,
      "grad_norm": 1.2062005996704102,
      "learning_rate": 9.621712802198166e-06,
      "loss": 0.0656,
      "step": 134820
    },
    {
      "epoch": 0.2206686174008104,
      "grad_norm": 9.769163131713867,
      "learning_rate": 9.621646909984648e-06,
      "loss": 0.0637,
      "step": 134840
    },
    {
      "epoch": 0.22070134783946374,
      "grad_norm": 1.7929893732070923,
      "learning_rate": 9.621581017771132e-06,
      "loss": 0.0597,
      "step": 134860
    },
    {
      "epoch": 0.22073407827811709,
      "grad_norm": 2.2482168674468994,
      "learning_rate": 9.621515125557613e-06,
      "loss": 0.0666,
      "step": 134880
    },
    {
      "epoch": 0.22076680871677043,
      "grad_norm": 1.0486451387405396,
      "learning_rate": 9.621449233344097e-06,
      "loss": 0.0629,
      "step": 134900
    },
    {
      "epoch": 0.22079953915542377,
      "grad_norm": 4.8785176277160645,
      "learning_rate": 9.621383341130579e-06,
      "loss": 0.0704,
      "step": 134920
    },
    {
      "epoch": 0.2208322695940771,
      "grad_norm": 2.025902271270752,
      "learning_rate": 9.621317448917063e-06,
      "loss": 0.0493,
      "step": 134940
    },
    {
      "epoch": 0.22086500003273044,
      "grad_norm": 0.5721666216850281,
      "learning_rate": 9.621251556703544e-06,
      "loss": 0.0605,
      "step": 134960
    },
    {
      "epoch": 0.22089773047138378,
      "grad_norm": 16.04302406311035,
      "learning_rate": 9.621185664490028e-06,
      "loss": 0.076,
      "step": 134980
    },
    {
      "epoch": 0.22093046091003712,
      "grad_norm": 0.6338207721710205,
      "learning_rate": 9.62111977227651e-06,
      "loss": 0.0625,
      "step": 135000
    },
    {
      "epoch": 0.22096319134869047,
      "grad_norm": 0.576640784740448,
      "learning_rate": 9.621053880062993e-06,
      "loss": 0.0483,
      "step": 135020
    },
    {
      "epoch": 0.22099592178734379,
      "grad_norm": 5.159727573394775,
      "learning_rate": 9.620987987849475e-06,
      "loss": 0.06,
      "step": 135040
    },
    {
      "epoch": 0.22102865222599713,
      "grad_norm": 4.598940372467041,
      "learning_rate": 9.620922095635959e-06,
      "loss": 0.0642,
      "step": 135060
    },
    {
      "epoch": 0.22106138266465047,
      "grad_norm": 2.769756555557251,
      "learning_rate": 9.620856203422443e-06,
      "loss": 0.0637,
      "step": 135080
    },
    {
      "epoch": 0.22109411310330382,
      "grad_norm": 2.465487480163574,
      "learning_rate": 9.620790311208924e-06,
      "loss": 0.0779,
      "step": 135100
    },
    {
      "epoch": 0.22112684354195716,
      "grad_norm": 0.919580340385437,
      "learning_rate": 9.620724418995408e-06,
      "loss": 0.0485,
      "step": 135120
    },
    {
      "epoch": 0.22115957398061048,
      "grad_norm": 1.9764689207077026,
      "learning_rate": 9.620658526781892e-06,
      "loss": 0.0772,
      "step": 135140
    },
    {
      "epoch": 0.22119230441926382,
      "grad_norm": 0.9514948129653931,
      "learning_rate": 9.620592634568374e-06,
      "loss": 0.0449,
      "step": 135160
    },
    {
      "epoch": 0.22122503485791717,
      "grad_norm": 2.8924334049224854,
      "learning_rate": 9.620526742354857e-06,
      "loss": 0.0691,
      "step": 135180
    },
    {
      "epoch": 0.2212577652965705,
      "grad_norm": 0.529802143573761,
      "learning_rate": 9.62046085014134e-06,
      "loss": 0.0684,
      "step": 135200
    },
    {
      "epoch": 0.22129049573522383,
      "grad_norm": 3.4320034980773926,
      "learning_rate": 9.620394957927823e-06,
      "loss": 0.0654,
      "step": 135220
    },
    {
      "epoch": 0.22132322617387717,
      "grad_norm": 1.6405545473098755,
      "learning_rate": 9.620329065714306e-06,
      "loss": 0.0617,
      "step": 135240
    },
    {
      "epoch": 0.22135595661253052,
      "grad_norm": 1.432064175605774,
      "learning_rate": 9.620263173500788e-06,
      "loss": 0.0724,
      "step": 135260
    },
    {
      "epoch": 0.22138868705118386,
      "grad_norm": 1.6406712532043457,
      "learning_rate": 9.620197281287272e-06,
      "loss": 0.0617,
      "step": 135280
    },
    {
      "epoch": 0.2214214174898372,
      "grad_norm": 2.564810037612915,
      "learning_rate": 9.620131389073754e-06,
      "loss": 0.0762,
      "step": 135300
    },
    {
      "epoch": 0.22145414792849052,
      "grad_norm": 0.48105522990226746,
      "learning_rate": 9.620065496860237e-06,
      "loss": 0.0645,
      "step": 135320
    },
    {
      "epoch": 0.22148687836714387,
      "grad_norm": 3.438138484954834,
      "learning_rate": 9.619999604646719e-06,
      "loss": 0.0753,
      "step": 135340
    },
    {
      "epoch": 0.2215196088057972,
      "grad_norm": 1.3158013820648193,
      "learning_rate": 9.619933712433203e-06,
      "loss": 0.0472,
      "step": 135360
    },
    {
      "epoch": 0.22155233924445056,
      "grad_norm": 1.4288787841796875,
      "learning_rate": 9.619867820219685e-06,
      "loss": 0.0531,
      "step": 135380
    },
    {
      "epoch": 0.2215850696831039,
      "grad_norm": 2.3781418800354004,
      "learning_rate": 9.619801928006168e-06,
      "loss": 0.0597,
      "step": 135400
    },
    {
      "epoch": 0.22161780012175722,
      "grad_norm": 4.061512470245361,
      "learning_rate": 9.619736035792652e-06,
      "loss": 0.0623,
      "step": 135420
    },
    {
      "epoch": 0.22165053056041056,
      "grad_norm": 1.9355262517929077,
      "learning_rate": 9.619670143579134e-06,
      "loss": 0.0579,
      "step": 135440
    },
    {
      "epoch": 0.2216832609990639,
      "grad_norm": 3.988241672515869,
      "learning_rate": 9.619604251365617e-06,
      "loss": 0.0744,
      "step": 135460
    },
    {
      "epoch": 0.22171599143771725,
      "grad_norm": 1.574479579925537,
      "learning_rate": 9.619538359152099e-06,
      "loss": 0.0482,
      "step": 135480
    },
    {
      "epoch": 0.2217487218763706,
      "grad_norm": 1.3496366739273071,
      "learning_rate": 9.619472466938583e-06,
      "loss": 0.0495,
      "step": 135500
    },
    {
      "epoch": 0.2217814523150239,
      "grad_norm": 1.7059013843536377,
      "learning_rate": 9.619406574725065e-06,
      "loss": 0.0523,
      "step": 135520
    },
    {
      "epoch": 0.22181418275367726,
      "grad_norm": 1.9142088890075684,
      "learning_rate": 9.619340682511548e-06,
      "loss": 0.0641,
      "step": 135540
    },
    {
      "epoch": 0.2218469131923306,
      "grad_norm": 3.0566959381103516,
      "learning_rate": 9.619274790298032e-06,
      "loss": 0.0573,
      "step": 135560
    },
    {
      "epoch": 0.22187964363098395,
      "grad_norm": 2.953944683074951,
      "learning_rate": 9.619208898084514e-06,
      "loss": 0.0609,
      "step": 135580
    },
    {
      "epoch": 0.2219123740696373,
      "grad_norm": 4.885005950927734,
      "learning_rate": 9.619143005870997e-06,
      "loss": 0.05,
      "step": 135600
    },
    {
      "epoch": 0.2219451045082906,
      "grad_norm": 1.028208613395691,
      "learning_rate": 9.61907711365748e-06,
      "loss": 0.0692,
      "step": 135620
    },
    {
      "epoch": 0.22197783494694395,
      "grad_norm": 4.8201727867126465,
      "learning_rate": 9.619011221443963e-06,
      "loss": 0.0759,
      "step": 135640
    },
    {
      "epoch": 0.2220105653855973,
      "grad_norm": 2.147012710571289,
      "learning_rate": 9.618945329230446e-06,
      "loss": 0.0535,
      "step": 135660
    },
    {
      "epoch": 0.22204329582425064,
      "grad_norm": 3.596888303756714,
      "learning_rate": 9.618879437016928e-06,
      "loss": 0.0475,
      "step": 135680
    },
    {
      "epoch": 0.22207602626290399,
      "grad_norm": 1.279416561126709,
      "learning_rate": 9.618813544803412e-06,
      "loss": 0.077,
      "step": 135700
    },
    {
      "epoch": 0.2221087567015573,
      "grad_norm": 23.02398681640625,
      "learning_rate": 9.618747652589894e-06,
      "loss": 0.0769,
      "step": 135720
    },
    {
      "epoch": 0.22214148714021065,
      "grad_norm": 0.8787627816200256,
      "learning_rate": 9.618681760376377e-06,
      "loss": 0.064,
      "step": 135740
    },
    {
      "epoch": 0.222174217578864,
      "grad_norm": 1.2074471712112427,
      "learning_rate": 9.61861586816286e-06,
      "loss": 0.0768,
      "step": 135760
    },
    {
      "epoch": 0.22220694801751734,
      "grad_norm": 1.7510936260223389,
      "learning_rate": 9.618549975949343e-06,
      "loss": 0.0604,
      "step": 135780
    },
    {
      "epoch": 0.22223967845617068,
      "grad_norm": 4.380313873291016,
      "learning_rate": 9.618484083735826e-06,
      "loss": 0.0735,
      "step": 135800
    },
    {
      "epoch": 0.222272408894824,
      "grad_norm": 0.9566521644592285,
      "learning_rate": 9.618418191522308e-06,
      "loss": 0.0522,
      "step": 135820
    },
    {
      "epoch": 0.22230513933347734,
      "grad_norm": 4.35939884185791,
      "learning_rate": 9.618352299308792e-06,
      "loss": 0.0801,
      "step": 135840
    },
    {
      "epoch": 0.2223378697721307,
      "grad_norm": 3.731757402420044,
      "learning_rate": 9.618286407095274e-06,
      "loss": 0.0582,
      "step": 135860
    },
    {
      "epoch": 0.22237060021078403,
      "grad_norm": 2.5232372283935547,
      "learning_rate": 9.618220514881757e-06,
      "loss": 0.0403,
      "step": 135880
    },
    {
      "epoch": 0.22240333064943738,
      "grad_norm": 3.389558792114258,
      "learning_rate": 9.618154622668239e-06,
      "loss": 0.06,
      "step": 135900
    },
    {
      "epoch": 0.2224360610880907,
      "grad_norm": 2.256497383117676,
      "learning_rate": 9.618088730454723e-06,
      "loss": 0.0731,
      "step": 135920
    },
    {
      "epoch": 0.22246879152674404,
      "grad_norm": 3.2773776054382324,
      "learning_rate": 9.618022838241206e-06,
      "loss": 0.0637,
      "step": 135940
    },
    {
      "epoch": 0.22250152196539738,
      "grad_norm": 3.424198627471924,
      "learning_rate": 9.617956946027688e-06,
      "loss": 0.0718,
      "step": 135960
    },
    {
      "epoch": 0.22253425240405073,
      "grad_norm": 3.1423425674438477,
      "learning_rate": 9.617891053814172e-06,
      "loss": 0.0693,
      "step": 135980
    },
    {
      "epoch": 0.22256698284270407,
      "grad_norm": 2.5317392349243164,
      "learning_rate": 9.617825161600655e-06,
      "loss": 0.0751,
      "step": 136000
    },
    {
      "epoch": 0.2225997132813574,
      "grad_norm": 1.798898696899414,
      "learning_rate": 9.617759269387137e-06,
      "loss": 0.061,
      "step": 136020
    },
    {
      "epoch": 0.22263244372001073,
      "grad_norm": 2.8000166416168213,
      "learning_rate": 9.617693377173621e-06,
      "loss": 0.0582,
      "step": 136040
    },
    {
      "epoch": 0.22266517415866408,
      "grad_norm": 2.6050188541412354,
      "learning_rate": 9.617627484960103e-06,
      "loss": 0.0411,
      "step": 136060
    },
    {
      "epoch": 0.22269790459731742,
      "grad_norm": 5.034906387329102,
      "learning_rate": 9.617561592746586e-06,
      "loss": 0.0534,
      "step": 136080
    },
    {
      "epoch": 0.22273063503597076,
      "grad_norm": 1.0999882221221924,
      "learning_rate": 9.617495700533068e-06,
      "loss": 0.0415,
      "step": 136100
    },
    {
      "epoch": 0.22276336547462408,
      "grad_norm": 3.231001615524292,
      "learning_rate": 9.617429808319552e-06,
      "loss": 0.0566,
      "step": 136120
    },
    {
      "epoch": 0.22279609591327743,
      "grad_norm": 1.8587321043014526,
      "learning_rate": 9.617363916106035e-06,
      "loss": 0.0613,
      "step": 136140
    },
    {
      "epoch": 0.22282882635193077,
      "grad_norm": 5.516097068786621,
      "learning_rate": 9.617298023892517e-06,
      "loss": 0.0414,
      "step": 136160
    },
    {
      "epoch": 0.22286155679058411,
      "grad_norm": 7.188494682312012,
      "learning_rate": 9.617232131679001e-06,
      "loss": 0.059,
      "step": 136180
    },
    {
      "epoch": 0.22289428722923746,
      "grad_norm": 1.215187430381775,
      "learning_rate": 9.617166239465483e-06,
      "loss": 0.0525,
      "step": 136200
    },
    {
      "epoch": 0.22292701766789078,
      "grad_norm": 2.9484729766845703,
      "learning_rate": 9.617100347251966e-06,
      "loss": 0.0625,
      "step": 136220
    },
    {
      "epoch": 0.22295974810654412,
      "grad_norm": 0.7846181988716125,
      "learning_rate": 9.617034455038448e-06,
      "loss": 0.0603,
      "step": 136240
    },
    {
      "epoch": 0.22299247854519746,
      "grad_norm": 2.439671039581299,
      "learning_rate": 9.616968562824932e-06,
      "loss": 0.0562,
      "step": 136260
    },
    {
      "epoch": 0.2230252089838508,
      "grad_norm": 2.973550319671631,
      "learning_rate": 9.616902670611414e-06,
      "loss": 0.0547,
      "step": 136280
    },
    {
      "epoch": 0.22305793942250415,
      "grad_norm": 1.6296435594558716,
      "learning_rate": 9.616836778397897e-06,
      "loss": 0.0763,
      "step": 136300
    },
    {
      "epoch": 0.22309066986115747,
      "grad_norm": 1.0292357206344604,
      "learning_rate": 9.61677088618438e-06,
      "loss": 0.0505,
      "step": 136320
    },
    {
      "epoch": 0.22312340029981081,
      "grad_norm": 1.916658878326416,
      "learning_rate": 9.616704993970863e-06,
      "loss": 0.068,
      "step": 136340
    },
    {
      "epoch": 0.22315613073846416,
      "grad_norm": 2.5464630126953125,
      "learning_rate": 9.616639101757346e-06,
      "loss": 0.0889,
      "step": 136360
    },
    {
      "epoch": 0.2231888611771175,
      "grad_norm": 3.7164134979248047,
      "learning_rate": 9.616573209543828e-06,
      "loss": 0.0633,
      "step": 136380
    },
    {
      "epoch": 0.22322159161577085,
      "grad_norm": 4.2077765464782715,
      "learning_rate": 9.616507317330312e-06,
      "loss": 0.0555,
      "step": 136400
    },
    {
      "epoch": 0.22325432205442416,
      "grad_norm": 2.8870043754577637,
      "learning_rate": 9.616441425116795e-06,
      "loss": 0.072,
      "step": 136420
    },
    {
      "epoch": 0.2232870524930775,
      "grad_norm": 1.4060100317001343,
      "learning_rate": 9.616375532903277e-06,
      "loss": 0.0415,
      "step": 136440
    },
    {
      "epoch": 0.22331978293173085,
      "grad_norm": 1.6882961988449097,
      "learning_rate": 9.616309640689761e-06,
      "loss": 0.0755,
      "step": 136460
    },
    {
      "epoch": 0.2233525133703842,
      "grad_norm": 6.424252033233643,
      "learning_rate": 9.616243748476245e-06,
      "loss": 0.0555,
      "step": 136480
    },
    {
      "epoch": 0.22338524380903754,
      "grad_norm": 2.258321762084961,
      "learning_rate": 9.616177856262726e-06,
      "loss": 0.0553,
      "step": 136500
    },
    {
      "epoch": 0.22341797424769086,
      "grad_norm": 2.4157159328460693,
      "learning_rate": 9.61611196404921e-06,
      "loss": 0.0544,
      "step": 136520
    },
    {
      "epoch": 0.2234507046863442,
      "grad_norm": 3.5562570095062256,
      "learning_rate": 9.616046071835692e-06,
      "loss": 0.068,
      "step": 136540
    },
    {
      "epoch": 0.22348343512499755,
      "grad_norm": 2.2744131088256836,
      "learning_rate": 9.615980179622175e-06,
      "loss": 0.0501,
      "step": 136560
    },
    {
      "epoch": 0.2235161655636509,
      "grad_norm": 2.7558436393737793,
      "learning_rate": 9.615914287408657e-06,
      "loss": 0.0574,
      "step": 136580
    },
    {
      "epoch": 0.2235488960023042,
      "grad_norm": 2.5150527954101562,
      "learning_rate": 9.615848395195141e-06,
      "loss": 0.0635,
      "step": 136600
    },
    {
      "epoch": 0.22358162644095755,
      "grad_norm": 1.2602369785308838,
      "learning_rate": 9.615782502981623e-06,
      "loss": 0.0741,
      "step": 136620
    },
    {
      "epoch": 0.2236143568796109,
      "grad_norm": 0.28254446387290955,
      "learning_rate": 9.615716610768106e-06,
      "loss": 0.0593,
      "step": 136640
    },
    {
      "epoch": 0.22364708731826424,
      "grad_norm": 2.3212411403656006,
      "learning_rate": 9.615650718554588e-06,
      "loss": 0.0573,
      "step": 136660
    },
    {
      "epoch": 0.2236798177569176,
      "grad_norm": 2.119447708129883,
      "learning_rate": 9.615584826341072e-06,
      "loss": 0.0523,
      "step": 136680
    },
    {
      "epoch": 0.2237125481955709,
      "grad_norm": 8.114386558532715,
      "learning_rate": 9.615518934127554e-06,
      "loss": 0.0582,
      "step": 136700
    },
    {
      "epoch": 0.22374527863422425,
      "grad_norm": 2.6195859909057617,
      "learning_rate": 9.615453041914037e-06,
      "loss": 0.058,
      "step": 136720
    },
    {
      "epoch": 0.2237780090728776,
      "grad_norm": 2.481595754623413,
      "learning_rate": 9.615387149700521e-06,
      "loss": 0.067,
      "step": 136740
    },
    {
      "epoch": 0.22381073951153094,
      "grad_norm": 2.800678253173828,
      "learning_rate": 9.615321257487003e-06,
      "loss": 0.0713,
      "step": 136760
    },
    {
      "epoch": 0.22384346995018428,
      "grad_norm": 5.213248252868652,
      "learning_rate": 9.615255365273486e-06,
      "loss": 0.055,
      "step": 136780
    },
    {
      "epoch": 0.2238762003888376,
      "grad_norm": 1.676714539527893,
      "learning_rate": 9.61518947305997e-06,
      "loss": 0.0605,
      "step": 136800
    },
    {
      "epoch": 0.22390893082749094,
      "grad_norm": 1.754822850227356,
      "learning_rate": 9.615123580846452e-06,
      "loss": 0.0438,
      "step": 136820
    },
    {
      "epoch": 0.2239416612661443,
      "grad_norm": 3.2813096046447754,
      "learning_rate": 9.615057688632936e-06,
      "loss": 0.0627,
      "step": 136840
    },
    {
      "epoch": 0.22397439170479763,
      "grad_norm": 0.431687593460083,
      "learning_rate": 9.614991796419419e-06,
      "loss": 0.052,
      "step": 136860
    },
    {
      "epoch": 0.22400712214345098,
      "grad_norm": 3.081843614578247,
      "learning_rate": 9.614925904205901e-06,
      "loss": 0.0512,
      "step": 136880
    },
    {
      "epoch": 0.2240398525821043,
      "grad_norm": 1.6910254955291748,
      "learning_rate": 9.614860011992385e-06,
      "loss": 0.0516,
      "step": 136900
    },
    {
      "epoch": 0.22407258302075764,
      "grad_norm": 1.3863682746887207,
      "learning_rate": 9.614794119778866e-06,
      "loss": 0.0616,
      "step": 136920
    },
    {
      "epoch": 0.22410531345941098,
      "grad_norm": 0.906431257724762,
      "learning_rate": 9.61472822756535e-06,
      "loss": 0.0742,
      "step": 136940
    },
    {
      "epoch": 0.22413804389806433,
      "grad_norm": 7.211801052093506,
      "learning_rate": 9.614662335351832e-06,
      "loss": 0.0707,
      "step": 136960
    },
    {
      "epoch": 0.22417077433671767,
      "grad_norm": 5.053874492645264,
      "learning_rate": 9.614596443138316e-06,
      "loss": 0.0762,
      "step": 136980
    },
    {
      "epoch": 0.224203504775371,
      "grad_norm": 7.171563625335693,
      "learning_rate": 9.614530550924797e-06,
      "loss": 0.0663,
      "step": 137000
    },
    {
      "epoch": 0.22423623521402433,
      "grad_norm": 2.7275352478027344,
      "learning_rate": 9.614464658711281e-06,
      "loss": 0.0558,
      "step": 137020
    },
    {
      "epoch": 0.22426896565267768,
      "grad_norm": 3.169044017791748,
      "learning_rate": 9.614398766497763e-06,
      "loss": 0.0746,
      "step": 137040
    },
    {
      "epoch": 0.22430169609133102,
      "grad_norm": 2.0310897827148438,
      "learning_rate": 9.614332874284247e-06,
      "loss": 0.0501,
      "step": 137060
    },
    {
      "epoch": 0.22433442652998437,
      "grad_norm": 4.360069274902344,
      "learning_rate": 9.614266982070728e-06,
      "loss": 0.0584,
      "step": 137080
    },
    {
      "epoch": 0.22436715696863768,
      "grad_norm": 1.4594539403915405,
      "learning_rate": 9.614201089857212e-06,
      "loss": 0.0588,
      "step": 137100
    },
    {
      "epoch": 0.22439988740729103,
      "grad_norm": 4.505132675170898,
      "learning_rate": 9.614135197643696e-06,
      "loss": 0.061,
      "step": 137120
    },
    {
      "epoch": 0.22443261784594437,
      "grad_norm": 4.033030033111572,
      "learning_rate": 9.614069305430177e-06,
      "loss": 0.0555,
      "step": 137140
    },
    {
      "epoch": 0.22446534828459772,
      "grad_norm": 1.1629379987716675,
      "learning_rate": 9.614003413216661e-06,
      "loss": 0.0734,
      "step": 137160
    },
    {
      "epoch": 0.22449807872325106,
      "grad_norm": 2.964165687561035,
      "learning_rate": 9.613937521003145e-06,
      "loss": 0.0544,
      "step": 137180
    },
    {
      "epoch": 0.22453080916190438,
      "grad_norm": 2.0960938930511475,
      "learning_rate": 9.613871628789627e-06,
      "loss": 0.0744,
      "step": 137200
    },
    {
      "epoch": 0.22456353960055772,
      "grad_norm": 0.5370023846626282,
      "learning_rate": 9.61380573657611e-06,
      "loss": 0.046,
      "step": 137220
    },
    {
      "epoch": 0.22459627003921107,
      "grad_norm": 2.3433122634887695,
      "learning_rate": 9.613739844362594e-06,
      "loss": 0.0607,
      "step": 137240
    },
    {
      "epoch": 0.2246290004778644,
      "grad_norm": 1.5488038063049316,
      "learning_rate": 9.613673952149076e-06,
      "loss": 0.0501,
      "step": 137260
    },
    {
      "epoch": 0.22466173091651775,
      "grad_norm": 1.961130142211914,
      "learning_rate": 9.61360805993556e-06,
      "loss": 0.0568,
      "step": 137280
    },
    {
      "epoch": 0.22469446135517107,
      "grad_norm": 0.1604236662387848,
      "learning_rate": 9.613542167722041e-06,
      "loss": 0.0651,
      "step": 137300
    },
    {
      "epoch": 0.22472719179382442,
      "grad_norm": 3.5101733207702637,
      "learning_rate": 9.613476275508525e-06,
      "loss": 0.0691,
      "step": 137320
    },
    {
      "epoch": 0.22475992223247776,
      "grad_norm": 0.8720236420631409,
      "learning_rate": 9.613410383295007e-06,
      "loss": 0.0504,
      "step": 137340
    },
    {
      "epoch": 0.2247926526711311,
      "grad_norm": 2.701449394226074,
      "learning_rate": 9.61334449108149e-06,
      "loss": 0.0646,
      "step": 137360
    },
    {
      "epoch": 0.22482538310978445,
      "grad_norm": 1.6158814430236816,
      "learning_rate": 9.613278598867972e-06,
      "loss": 0.0569,
      "step": 137380
    },
    {
      "epoch": 0.22485811354843777,
      "grad_norm": 1.7554634809494019,
      "learning_rate": 9.613212706654456e-06,
      "loss": 0.058,
      "step": 137400
    },
    {
      "epoch": 0.2248908439870911,
      "grad_norm": 1.8285294771194458,
      "learning_rate": 9.613146814440938e-06,
      "loss": 0.0547,
      "step": 137420
    },
    {
      "epoch": 0.22492357442574445,
      "grad_norm": 1.8667712211608887,
      "learning_rate": 9.613080922227421e-06,
      "loss": 0.0704,
      "step": 137440
    },
    {
      "epoch": 0.2249563048643978,
      "grad_norm": 0.9002320170402527,
      "learning_rate": 9.613015030013903e-06,
      "loss": 0.0705,
      "step": 137460
    },
    {
      "epoch": 0.22498903530305114,
      "grad_norm": 2.341634511947632,
      "learning_rate": 9.612949137800387e-06,
      "loss": 0.0611,
      "step": 137480
    },
    {
      "epoch": 0.22502176574170446,
      "grad_norm": 3.521986484527588,
      "learning_rate": 9.612883245586868e-06,
      "loss": 0.0528,
      "step": 137500
    },
    {
      "epoch": 0.2250544961803578,
      "grad_norm": 1.6856695413589478,
      "learning_rate": 9.612817353373352e-06,
      "loss": 0.054,
      "step": 137520
    },
    {
      "epoch": 0.22508722661901115,
      "grad_norm": 2.9158856868743896,
      "learning_rate": 9.612751461159836e-06,
      "loss": 0.061,
      "step": 137540
    },
    {
      "epoch": 0.2251199570576645,
      "grad_norm": 2.804940700531006,
      "learning_rate": 9.612685568946318e-06,
      "loss": 0.0509,
      "step": 137560
    },
    {
      "epoch": 0.22515268749631784,
      "grad_norm": 3.544420003890991,
      "learning_rate": 9.612619676732801e-06,
      "loss": 0.0604,
      "step": 137580
    },
    {
      "epoch": 0.22518541793497115,
      "grad_norm": 2.4149045944213867,
      "learning_rate": 9.612553784519285e-06,
      "loss": 0.0585,
      "step": 137600
    },
    {
      "epoch": 0.2252181483736245,
      "grad_norm": 2.7398922443389893,
      "learning_rate": 9.612487892305767e-06,
      "loss": 0.0538,
      "step": 137620
    },
    {
      "epoch": 0.22525087881227784,
      "grad_norm": 5.388891696929932,
      "learning_rate": 9.61242200009225e-06,
      "loss": 0.0623,
      "step": 137640
    },
    {
      "epoch": 0.2252836092509312,
      "grad_norm": 2.311023712158203,
      "learning_rate": 9.612356107878734e-06,
      "loss": 0.0634,
      "step": 137660
    },
    {
      "epoch": 0.22531633968958453,
      "grad_norm": 0.893798828125,
      "learning_rate": 9.612290215665216e-06,
      "loss": 0.0692,
      "step": 137680
    },
    {
      "epoch": 0.22534907012823785,
      "grad_norm": 0.6351541876792908,
      "learning_rate": 9.6122243234517e-06,
      "loss": 0.0593,
      "step": 137700
    },
    {
      "epoch": 0.2253818005668912,
      "grad_norm": 1.9721417427062988,
      "learning_rate": 9.612158431238181e-06,
      "loss": 0.0473,
      "step": 137720
    },
    {
      "epoch": 0.22541453100554454,
      "grad_norm": 4.0163116455078125,
      "learning_rate": 9.612092539024665e-06,
      "loss": 0.0766,
      "step": 137740
    },
    {
      "epoch": 0.22544726144419788,
      "grad_norm": 2.103580951690674,
      "learning_rate": 9.612026646811147e-06,
      "loss": 0.0464,
      "step": 137760
    },
    {
      "epoch": 0.22547999188285123,
      "grad_norm": 0.8706488609313965,
      "learning_rate": 9.61196075459763e-06,
      "loss": 0.0368,
      "step": 137780
    },
    {
      "epoch": 0.22551272232150454,
      "grad_norm": 2.8511645793914795,
      "learning_rate": 9.611894862384112e-06,
      "loss": 0.0646,
      "step": 137800
    },
    {
      "epoch": 0.2255454527601579,
      "grad_norm": 2.5087685585021973,
      "learning_rate": 9.611828970170596e-06,
      "loss": 0.0557,
      "step": 137820
    },
    {
      "epoch": 0.22557818319881123,
      "grad_norm": 5.985613822937012,
      "learning_rate": 9.611763077957078e-06,
      "loss": 0.0563,
      "step": 137840
    },
    {
      "epoch": 0.22561091363746458,
      "grad_norm": 5.010105133056641,
      "learning_rate": 9.611697185743561e-06,
      "loss": 0.0753,
      "step": 137860
    },
    {
      "epoch": 0.22564364407611792,
      "grad_norm": 7.285435199737549,
      "learning_rate": 9.611631293530045e-06,
      "loss": 0.0858,
      "step": 137880
    },
    {
      "epoch": 0.22567637451477124,
      "grad_norm": 0.9504488706588745,
      "learning_rate": 9.611565401316527e-06,
      "loss": 0.0676,
      "step": 137900
    },
    {
      "epoch": 0.22570910495342458,
      "grad_norm": 4.053852081298828,
      "learning_rate": 9.61149950910301e-06,
      "loss": 0.0744,
      "step": 137920
    },
    {
      "epoch": 0.22574183539207793,
      "grad_norm": 1.3050620555877686,
      "learning_rate": 9.611433616889492e-06,
      "loss": 0.067,
      "step": 137940
    },
    {
      "epoch": 0.22577456583073127,
      "grad_norm": 4.079358100891113,
      "learning_rate": 9.611367724675976e-06,
      "loss": 0.0667,
      "step": 137960
    },
    {
      "epoch": 0.2258072962693846,
      "grad_norm": 1.7903058528900146,
      "learning_rate": 9.61130183246246e-06,
      "loss": 0.056,
      "step": 137980
    },
    {
      "epoch": 0.22584002670803793,
      "grad_norm": 1.6879708766937256,
      "learning_rate": 9.611235940248941e-06,
      "loss": 0.0581,
      "step": 138000
    },
    {
      "epoch": 0.22587275714669128,
      "grad_norm": 2.747185468673706,
      "learning_rate": 9.611170048035425e-06,
      "loss": 0.061,
      "step": 138020
    },
    {
      "epoch": 0.22590548758534462,
      "grad_norm": 2.0570671558380127,
      "learning_rate": 9.611104155821908e-06,
      "loss": 0.0616,
      "step": 138040
    },
    {
      "epoch": 0.22593821802399797,
      "grad_norm": 1.9901069402694702,
      "learning_rate": 9.61103826360839e-06,
      "loss": 0.0564,
      "step": 138060
    },
    {
      "epoch": 0.22597094846265128,
      "grad_norm": 1.42068350315094,
      "learning_rate": 9.610972371394874e-06,
      "loss": 0.0718,
      "step": 138080
    },
    {
      "epoch": 0.22600367890130463,
      "grad_norm": 3.684814929962158,
      "learning_rate": 9.610906479181356e-06,
      "loss": 0.0684,
      "step": 138100
    },
    {
      "epoch": 0.22603640933995797,
      "grad_norm": 1.7667932510375977,
      "learning_rate": 9.61084058696784e-06,
      "loss": 0.0667,
      "step": 138120
    },
    {
      "epoch": 0.22606913977861132,
      "grad_norm": 3.675260066986084,
      "learning_rate": 9.610774694754321e-06,
      "loss": 0.0582,
      "step": 138140
    },
    {
      "epoch": 0.22610187021726466,
      "grad_norm": 2.783339023590088,
      "learning_rate": 9.610708802540805e-06,
      "loss": 0.0582,
      "step": 138160
    },
    {
      "epoch": 0.22613460065591798,
      "grad_norm": 2.342468738555908,
      "learning_rate": 9.610642910327287e-06,
      "loss": 0.0547,
      "step": 138180
    },
    {
      "epoch": 0.22616733109457132,
      "grad_norm": 1.554131269454956,
      "learning_rate": 9.61057701811377e-06,
      "loss": 0.0663,
      "step": 138200
    },
    {
      "epoch": 0.22620006153322467,
      "grad_norm": 3.4551236629486084,
      "learning_rate": 9.610511125900254e-06,
      "loss": 0.0667,
      "step": 138220
    },
    {
      "epoch": 0.226232791971878,
      "grad_norm": 2.6853597164154053,
      "learning_rate": 9.610445233686736e-06,
      "loss": 0.0652,
      "step": 138240
    },
    {
      "epoch": 0.22626552241053136,
      "grad_norm": 1.7239429950714111,
      "learning_rate": 9.61037934147322e-06,
      "loss": 0.052,
      "step": 138260
    },
    {
      "epoch": 0.22629825284918467,
      "grad_norm": 1.2444335222244263,
      "learning_rate": 9.610313449259701e-06,
      "loss": 0.0515,
      "step": 138280
    },
    {
      "epoch": 0.22633098328783802,
      "grad_norm": 0.9898059964179993,
      "learning_rate": 9.610247557046185e-06,
      "loss": 0.0582,
      "step": 138300
    },
    {
      "epoch": 0.22636371372649136,
      "grad_norm": 2.2158310413360596,
      "learning_rate": 9.610181664832667e-06,
      "loss": 0.0532,
      "step": 138320
    },
    {
      "epoch": 0.2263964441651447,
      "grad_norm": 2.0763192176818848,
      "learning_rate": 9.61011577261915e-06,
      "loss": 0.0734,
      "step": 138340
    },
    {
      "epoch": 0.22642917460379805,
      "grad_norm": 4.258749485015869,
      "learning_rate": 9.610049880405632e-06,
      "loss": 0.0685,
      "step": 138360
    },
    {
      "epoch": 0.22646190504245137,
      "grad_norm": 2.1348559856414795,
      "learning_rate": 9.609983988192116e-06,
      "loss": 0.0596,
      "step": 138380
    },
    {
      "epoch": 0.2264946354811047,
      "grad_norm": 2.3623244762420654,
      "learning_rate": 9.6099180959786e-06,
      "loss": 0.0766,
      "step": 138400
    },
    {
      "epoch": 0.22652736591975806,
      "grad_norm": 3.3373231887817383,
      "learning_rate": 9.609852203765081e-06,
      "loss": 0.0516,
      "step": 138420
    },
    {
      "epoch": 0.2265600963584114,
      "grad_norm": 5.061053276062012,
      "learning_rate": 9.609786311551565e-06,
      "loss": 0.0567,
      "step": 138440
    },
    {
      "epoch": 0.22659282679706474,
      "grad_norm": 2.33705735206604,
      "learning_rate": 9.609720419338048e-06,
      "loss": 0.0653,
      "step": 138460
    },
    {
      "epoch": 0.22662555723571806,
      "grad_norm": 3.732231616973877,
      "learning_rate": 9.60965452712453e-06,
      "loss": 0.0492,
      "step": 138480
    },
    {
      "epoch": 0.2266582876743714,
      "grad_norm": 1.0771479606628418,
      "learning_rate": 9.609588634911014e-06,
      "loss": 0.0609,
      "step": 138500
    },
    {
      "epoch": 0.22669101811302475,
      "grad_norm": 3.7769737243652344,
      "learning_rate": 9.609522742697496e-06,
      "loss": 0.0563,
      "step": 138520
    },
    {
      "epoch": 0.2267237485516781,
      "grad_norm": 1.8812620639801025,
      "learning_rate": 9.60945685048398e-06,
      "loss": 0.0612,
      "step": 138540
    },
    {
      "epoch": 0.22675647899033144,
      "grad_norm": 1.0831255912780762,
      "learning_rate": 9.609390958270461e-06,
      "loss": 0.0643,
      "step": 138560
    },
    {
      "epoch": 0.22678920942898476,
      "grad_norm": 3.2720367908477783,
      "learning_rate": 9.609325066056945e-06,
      "loss": 0.0446,
      "step": 138580
    },
    {
      "epoch": 0.2268219398676381,
      "grad_norm": 2.525040864944458,
      "learning_rate": 9.609259173843428e-06,
      "loss": 0.056,
      "step": 138600
    },
    {
      "epoch": 0.22685467030629144,
      "grad_norm": 3.1469035148620605,
      "learning_rate": 9.60919328162991e-06,
      "loss": 0.065,
      "step": 138620
    },
    {
      "epoch": 0.2268874007449448,
      "grad_norm": 1.3602625131607056,
      "learning_rate": 9.609127389416394e-06,
      "loss": 0.0483,
      "step": 138640
    },
    {
      "epoch": 0.22692013118359813,
      "grad_norm": 3.942493438720703,
      "learning_rate": 9.609061497202876e-06,
      "loss": 0.0736,
      "step": 138660
    },
    {
      "epoch": 0.22695286162225145,
      "grad_norm": 3.948488473892212,
      "learning_rate": 9.60899560498936e-06,
      "loss": 0.0586,
      "step": 138680
    },
    {
      "epoch": 0.2269855920609048,
      "grad_norm": 3.1424636840820312,
      "learning_rate": 9.608929712775841e-06,
      "loss": 0.0487,
      "step": 138700
    },
    {
      "epoch": 0.22701832249955814,
      "grad_norm": 3.1712405681610107,
      "learning_rate": 9.608863820562325e-06,
      "loss": 0.0751,
      "step": 138720
    },
    {
      "epoch": 0.22705105293821148,
      "grad_norm": 5.0531535148620605,
      "learning_rate": 9.608797928348807e-06,
      "loss": 0.0727,
      "step": 138740
    },
    {
      "epoch": 0.22708378337686483,
      "grad_norm": 2.878190279006958,
      "learning_rate": 9.60873203613529e-06,
      "loss": 0.0519,
      "step": 138760
    },
    {
      "epoch": 0.22711651381551815,
      "grad_norm": 0.7236849665641785,
      "learning_rate": 9.608666143921774e-06,
      "loss": 0.0417,
      "step": 138780
    },
    {
      "epoch": 0.2271492442541715,
      "grad_norm": 1.1363900899887085,
      "learning_rate": 9.608600251708256e-06,
      "loss": 0.0569,
      "step": 138800
    },
    {
      "epoch": 0.22718197469282483,
      "grad_norm": 1.27244234085083,
      "learning_rate": 9.60853435949474e-06,
      "loss": 0.0587,
      "step": 138820
    },
    {
      "epoch": 0.22721470513147818,
      "grad_norm": 1.2710598707199097,
      "learning_rate": 9.608468467281223e-06,
      "loss": 0.0654,
      "step": 138840
    },
    {
      "epoch": 0.22724743557013152,
      "grad_norm": 0.9743243455886841,
      "learning_rate": 9.608402575067705e-06,
      "loss": 0.0531,
      "step": 138860
    },
    {
      "epoch": 0.22728016600878484,
      "grad_norm": 2.1560893058776855,
      "learning_rate": 9.608336682854189e-06,
      "loss": 0.0648,
      "step": 138880
    },
    {
      "epoch": 0.22731289644743818,
      "grad_norm": 2.1004276275634766,
      "learning_rate": 9.60827079064067e-06,
      "loss": 0.075,
      "step": 138900
    },
    {
      "epoch": 0.22734562688609153,
      "grad_norm": 1.688085675239563,
      "learning_rate": 9.608204898427154e-06,
      "loss": 0.0593,
      "step": 138920
    },
    {
      "epoch": 0.22737835732474487,
      "grad_norm": 4.911003112792969,
      "learning_rate": 9.608139006213638e-06,
      "loss": 0.0627,
      "step": 138940
    },
    {
      "epoch": 0.22741108776339822,
      "grad_norm": 2.859612226486206,
      "learning_rate": 9.60807311400012e-06,
      "loss": 0.0538,
      "step": 138960
    },
    {
      "epoch": 0.22744381820205153,
      "grad_norm": 0.7782317399978638,
      "learning_rate": 9.608007221786603e-06,
      "loss": 0.0477,
      "step": 138980
    },
    {
      "epoch": 0.22747654864070488,
      "grad_norm": 4.114875316619873,
      "learning_rate": 9.607941329573085e-06,
      "loss": 0.0776,
      "step": 139000
    },
    {
      "epoch": 0.22750927907935822,
      "grad_norm": 1.347411036491394,
      "learning_rate": 9.607875437359569e-06,
      "loss": 0.0607,
      "step": 139020
    },
    {
      "epoch": 0.22754200951801157,
      "grad_norm": 1.8337699174880981,
      "learning_rate": 9.60780954514605e-06,
      "loss": 0.057,
      "step": 139040
    },
    {
      "epoch": 0.2275747399566649,
      "grad_norm": 2.1390347480773926,
      "learning_rate": 9.607743652932534e-06,
      "loss": 0.0599,
      "step": 139060
    },
    {
      "epoch": 0.22760747039531823,
      "grad_norm": 3.9214813709259033,
      "learning_rate": 9.607677760719016e-06,
      "loss": 0.082,
      "step": 139080
    },
    {
      "epoch": 0.22764020083397157,
      "grad_norm": 2.040372610092163,
      "learning_rate": 9.6076118685055e-06,
      "loss": 0.0365,
      "step": 139100
    },
    {
      "epoch": 0.22767293127262492,
      "grad_norm": 5.90853214263916,
      "learning_rate": 9.607545976291981e-06,
      "loss": 0.0587,
      "step": 139120
    },
    {
      "epoch": 0.22770566171127826,
      "grad_norm": 1.7931194305419922,
      "learning_rate": 9.607480084078465e-06,
      "loss": 0.067,
      "step": 139140
    },
    {
      "epoch": 0.2277383921499316,
      "grad_norm": 1.9060364961624146,
      "learning_rate": 9.607414191864947e-06,
      "loss": 0.0612,
      "step": 139160
    },
    {
      "epoch": 0.22777112258858492,
      "grad_norm": 2.9293742179870605,
      "learning_rate": 9.60734829965143e-06,
      "loss": 0.0571,
      "step": 139180
    },
    {
      "epoch": 0.22780385302723827,
      "grad_norm": 1.5017703771591187,
      "learning_rate": 9.607282407437914e-06,
      "loss": 0.052,
      "step": 139200
    },
    {
      "epoch": 0.2278365834658916,
      "grad_norm": 1.3190639019012451,
      "learning_rate": 9.607216515224398e-06,
      "loss": 0.0588,
      "step": 139220
    },
    {
      "epoch": 0.22786931390454496,
      "grad_norm": 2.234907388687134,
      "learning_rate": 9.60715062301088e-06,
      "loss": 0.0529,
      "step": 139240
    },
    {
      "epoch": 0.2279020443431983,
      "grad_norm": 1.407377004623413,
      "learning_rate": 9.607084730797363e-06,
      "loss": 0.0685,
      "step": 139260
    },
    {
      "epoch": 0.22793477478185162,
      "grad_norm": 6.198915958404541,
      "learning_rate": 9.607018838583847e-06,
      "loss": 0.0929,
      "step": 139280
    },
    {
      "epoch": 0.22796750522050496,
      "grad_norm": 3.2707700729370117,
      "learning_rate": 9.606952946370329e-06,
      "loss": 0.0608,
      "step": 139300
    },
    {
      "epoch": 0.2280002356591583,
      "grad_norm": 1.0284113883972168,
      "learning_rate": 9.606887054156812e-06,
      "loss": 0.0487,
      "step": 139320
    },
    {
      "epoch": 0.22803296609781165,
      "grad_norm": 1.2402862310409546,
      "learning_rate": 9.606821161943294e-06,
      "loss": 0.058,
      "step": 139340
    },
    {
      "epoch": 0.22806569653646497,
      "grad_norm": 2.4739668369293213,
      "learning_rate": 9.606755269729778e-06,
      "loss": 0.0637,
      "step": 139360
    },
    {
      "epoch": 0.2280984269751183,
      "grad_norm": 1.7891818284988403,
      "learning_rate": 9.60668937751626e-06,
      "loss": 0.0516,
      "step": 139380
    },
    {
      "epoch": 0.22813115741377166,
      "grad_norm": 6.237272262573242,
      "learning_rate": 9.606623485302743e-06,
      "loss": 0.0721,
      "step": 139400
    },
    {
      "epoch": 0.228163887852425,
      "grad_norm": 2.0997672080993652,
      "learning_rate": 9.606557593089225e-06,
      "loss": 0.0489,
      "step": 139420
    },
    {
      "epoch": 0.22819661829107835,
      "grad_norm": 4.688788890838623,
      "learning_rate": 9.606491700875709e-06,
      "loss": 0.0655,
      "step": 139440
    },
    {
      "epoch": 0.22822934872973166,
      "grad_norm": 0.8824304938316345,
      "learning_rate": 9.60642580866219e-06,
      "loss": 0.0551,
      "step": 139460
    },
    {
      "epoch": 0.228262079168385,
      "grad_norm": 3.071537733078003,
      "learning_rate": 9.606359916448674e-06,
      "loss": 0.0639,
      "step": 139480
    },
    {
      "epoch": 0.22829480960703835,
      "grad_norm": 0.6167427897453308,
      "learning_rate": 9.606294024235156e-06,
      "loss": 0.0562,
      "step": 139500
    },
    {
      "epoch": 0.2283275400456917,
      "grad_norm": 2.0973002910614014,
      "learning_rate": 9.60622813202164e-06,
      "loss": 0.0623,
      "step": 139520
    },
    {
      "epoch": 0.22836027048434504,
      "grad_norm": 1.614781379699707,
      "learning_rate": 9.606162239808121e-06,
      "loss": 0.0701,
      "step": 139540
    },
    {
      "epoch": 0.22839300092299836,
      "grad_norm": 1.5521482229232788,
      "learning_rate": 9.606096347594605e-06,
      "loss": 0.0542,
      "step": 139560
    },
    {
      "epoch": 0.2284257313616517,
      "grad_norm": 2.3046631813049316,
      "learning_rate": 9.606030455381089e-06,
      "loss": 0.0559,
      "step": 139580
    },
    {
      "epoch": 0.22845846180030505,
      "grad_norm": 2.92268705368042,
      "learning_rate": 9.60596456316757e-06,
      "loss": 0.0683,
      "step": 139600
    },
    {
      "epoch": 0.2284911922389584,
      "grad_norm": 3.5486972332000732,
      "learning_rate": 9.605898670954054e-06,
      "loss": 0.0508,
      "step": 139620
    },
    {
      "epoch": 0.22852392267761173,
      "grad_norm": 3.080561637878418,
      "learning_rate": 9.605832778740538e-06,
      "loss": 0.0742,
      "step": 139640
    },
    {
      "epoch": 0.22855665311626505,
      "grad_norm": 1.7506659030914307,
      "learning_rate": 9.60576688652702e-06,
      "loss": 0.051,
      "step": 139660
    },
    {
      "epoch": 0.2285893835549184,
      "grad_norm": 2.7959041595458984,
      "learning_rate": 9.605700994313503e-06,
      "loss": 0.0583,
      "step": 139680
    },
    {
      "epoch": 0.22862211399357174,
      "grad_norm": 2.749497175216675,
      "learning_rate": 9.605635102099987e-06,
      "loss": 0.0459,
      "step": 139700
    },
    {
      "epoch": 0.22865484443222509,
      "grad_norm": 4.235579013824463,
      "learning_rate": 9.605569209886469e-06,
      "loss": 0.052,
      "step": 139720
    },
    {
      "epoch": 0.22868757487087843,
      "grad_norm": 3.0743703842163086,
      "learning_rate": 9.605503317672952e-06,
      "loss": 0.0565,
      "step": 139740
    },
    {
      "epoch": 0.22872030530953175,
      "grad_norm": 2.4337375164031982,
      "learning_rate": 9.605437425459434e-06,
      "loss": 0.0638,
      "step": 139760
    },
    {
      "epoch": 0.2287530357481851,
      "grad_norm": 3.5916242599487305,
      "learning_rate": 9.605371533245918e-06,
      "loss": 0.0577,
      "step": 139780
    },
    {
      "epoch": 0.22878576618683844,
      "grad_norm": 2.4474709033966064,
      "learning_rate": 9.6053056410324e-06,
      "loss": 0.0659,
      "step": 139800
    },
    {
      "epoch": 0.22881849662549178,
      "grad_norm": 2.493288516998291,
      "learning_rate": 9.605239748818883e-06,
      "loss": 0.0382,
      "step": 139820
    },
    {
      "epoch": 0.22885122706414512,
      "grad_norm": 4.406726360321045,
      "learning_rate": 9.605173856605365e-06,
      "loss": 0.072,
      "step": 139840
    },
    {
      "epoch": 0.22888395750279844,
      "grad_norm": 7.200432300567627,
      "learning_rate": 9.605107964391849e-06,
      "loss": 0.0643,
      "step": 139860
    },
    {
      "epoch": 0.22891668794145179,
      "grad_norm": 1.4374831914901733,
      "learning_rate": 9.60504207217833e-06,
      "loss": 0.065,
      "step": 139880
    },
    {
      "epoch": 0.22894941838010513,
      "grad_norm": 2.295503616333008,
      "learning_rate": 9.604976179964814e-06,
      "loss": 0.0472,
      "step": 139900
    },
    {
      "epoch": 0.22898214881875847,
      "grad_norm": 3.3661746978759766,
      "learning_rate": 9.604910287751296e-06,
      "loss": 0.0588,
      "step": 139920
    },
    {
      "epoch": 0.22901487925741182,
      "grad_norm": 2.0511083602905273,
      "learning_rate": 9.60484439553778e-06,
      "loss": 0.0525,
      "step": 139940
    },
    {
      "epoch": 0.22904760969606514,
      "grad_norm": 0.432047963142395,
      "learning_rate": 9.604778503324263e-06,
      "loss": 0.0657,
      "step": 139960
    },
    {
      "epoch": 0.22908034013471848,
      "grad_norm": 3.7495298385620117,
      "learning_rate": 9.604712611110745e-06,
      "loss": 0.0535,
      "step": 139980
    },
    {
      "epoch": 0.22911307057337182,
      "grad_norm": 0.8179539442062378,
      "learning_rate": 9.604646718897229e-06,
      "loss": 0.0609,
      "step": 140000
    },
    {
      "epoch": 0.22914580101202517,
      "grad_norm": 2.6057889461517334,
      "learning_rate": 9.604580826683712e-06,
      "loss": 0.0522,
      "step": 140020
    },
    {
      "epoch": 0.2291785314506785,
      "grad_norm": 1.5551152229309082,
      "learning_rate": 9.604514934470194e-06,
      "loss": 0.0594,
      "step": 140040
    },
    {
      "epoch": 0.22921126188933183,
      "grad_norm": 2.410775899887085,
      "learning_rate": 9.604449042256678e-06,
      "loss": 0.0476,
      "step": 140060
    },
    {
      "epoch": 0.22924399232798517,
      "grad_norm": 1.6687904596328735,
      "learning_rate": 9.604383150043161e-06,
      "loss": 0.0501,
      "step": 140080
    },
    {
      "epoch": 0.22927672276663852,
      "grad_norm": 4.15011739730835,
      "learning_rate": 9.604317257829643e-06,
      "loss": 0.0582,
      "step": 140100
    },
    {
      "epoch": 0.22930945320529186,
      "grad_norm": 1.279313325881958,
      "learning_rate": 9.604251365616127e-06,
      "loss": 0.0501,
      "step": 140120
    },
    {
      "epoch": 0.2293421836439452,
      "grad_norm": 1.0351041555404663,
      "learning_rate": 9.604185473402609e-06,
      "loss": 0.057,
      "step": 140140
    },
    {
      "epoch": 0.22937491408259852,
      "grad_norm": 0.902669370174408,
      "learning_rate": 9.604119581189092e-06,
      "loss": 0.0668,
      "step": 140160
    },
    {
      "epoch": 0.22940764452125187,
      "grad_norm": 2.0733802318573,
      "learning_rate": 9.604053688975574e-06,
      "loss": 0.0627,
      "step": 140180
    },
    {
      "epoch": 0.2294403749599052,
      "grad_norm": 2.327287435531616,
      "learning_rate": 9.603987796762058e-06,
      "loss": 0.0527,
      "step": 140200
    },
    {
      "epoch": 0.22947310539855856,
      "grad_norm": 4.456376075744629,
      "learning_rate": 9.60392190454854e-06,
      "loss": 0.0498,
      "step": 140220
    },
    {
      "epoch": 0.2295058358372119,
      "grad_norm": 1.1901682615280151,
      "learning_rate": 9.603856012335023e-06,
      "loss": 0.0535,
      "step": 140240
    },
    {
      "epoch": 0.22953856627586522,
      "grad_norm": 1.7706671953201294,
      "learning_rate": 9.603790120121505e-06,
      "loss": 0.0576,
      "step": 140260
    },
    {
      "epoch": 0.22957129671451856,
      "grad_norm": 2.297849416732788,
      "learning_rate": 9.603724227907989e-06,
      "loss": 0.0502,
      "step": 140280
    },
    {
      "epoch": 0.2296040271531719,
      "grad_norm": 1.4728766679763794,
      "learning_rate": 9.60365833569447e-06,
      "loss": 0.051,
      "step": 140300
    },
    {
      "epoch": 0.22963675759182525,
      "grad_norm": 2.82525372505188,
      "learning_rate": 9.603592443480954e-06,
      "loss": 0.0608,
      "step": 140320
    },
    {
      "epoch": 0.2296694880304786,
      "grad_norm": 1.7634356021881104,
      "learning_rate": 9.603526551267438e-06,
      "loss": 0.0553,
      "step": 140340
    },
    {
      "epoch": 0.2297022184691319,
      "grad_norm": 1.1798882484436035,
      "learning_rate": 9.60346065905392e-06,
      "loss": 0.0688,
      "step": 140360
    },
    {
      "epoch": 0.22973494890778526,
      "grad_norm": 3.9788949489593506,
      "learning_rate": 9.603394766840403e-06,
      "loss": 0.0682,
      "step": 140380
    },
    {
      "epoch": 0.2297676793464386,
      "grad_norm": 3.0393776893615723,
      "learning_rate": 9.603328874626885e-06,
      "loss": 0.0703,
      "step": 140400
    },
    {
      "epoch": 0.22980040978509195,
      "grad_norm": 2.197962522506714,
      "learning_rate": 9.603262982413369e-06,
      "loss": 0.057,
      "step": 140420
    },
    {
      "epoch": 0.2298331402237453,
      "grad_norm": 1.898543119430542,
      "learning_rate": 9.603197090199852e-06,
      "loss": 0.0664,
      "step": 140440
    },
    {
      "epoch": 0.2298658706623986,
      "grad_norm": 1.472200632095337,
      "learning_rate": 9.603131197986334e-06,
      "loss": 0.0453,
      "step": 140460
    },
    {
      "epoch": 0.22989860110105195,
      "grad_norm": 2.7841551303863525,
      "learning_rate": 9.603065305772818e-06,
      "loss": 0.0534,
      "step": 140480
    },
    {
      "epoch": 0.2299313315397053,
      "grad_norm": 0.8074170351028442,
      "learning_rate": 9.602999413559301e-06,
      "loss": 0.0639,
      "step": 140500
    },
    {
      "epoch": 0.22996406197835864,
      "grad_norm": 4.0925798416137695,
      "learning_rate": 9.602933521345783e-06,
      "loss": 0.0475,
      "step": 140520
    },
    {
      "epoch": 0.22999679241701199,
      "grad_norm": 0.931810736656189,
      "learning_rate": 9.602867629132267e-06,
      "loss": 0.0816,
      "step": 140540
    },
    {
      "epoch": 0.2300295228556653,
      "grad_norm": 2.190044403076172,
      "learning_rate": 9.602801736918749e-06,
      "loss": 0.0673,
      "step": 140560
    },
    {
      "epoch": 0.23006225329431865,
      "grad_norm": 1.8608968257904053,
      "learning_rate": 9.602735844705232e-06,
      "loss": 0.0599,
      "step": 140580
    },
    {
      "epoch": 0.230094983732972,
      "grad_norm": 2.98210072517395,
      "learning_rate": 9.602669952491714e-06,
      "loss": 0.0553,
      "step": 140600
    },
    {
      "epoch": 0.23012771417162534,
      "grad_norm": 3.0367684364318848,
      "learning_rate": 9.602604060278198e-06,
      "loss": 0.0546,
      "step": 140620
    },
    {
      "epoch": 0.23016044461027868,
      "grad_norm": 3.0536224842071533,
      "learning_rate": 9.60253816806468e-06,
      "loss": 0.0825,
      "step": 140640
    },
    {
      "epoch": 0.230193175048932,
      "grad_norm": 4.885120391845703,
      "learning_rate": 9.602472275851163e-06,
      "loss": 0.0795,
      "step": 140660
    },
    {
      "epoch": 0.23022590548758534,
      "grad_norm": 3.416710615158081,
      "learning_rate": 9.602406383637647e-06,
      "loss": 0.0538,
      "step": 140680
    },
    {
      "epoch": 0.2302586359262387,
      "grad_norm": 2.7541658878326416,
      "learning_rate": 9.602340491424129e-06,
      "loss": 0.0506,
      "step": 140700
    },
    {
      "epoch": 0.23029136636489203,
      "grad_norm": 16.11739730834961,
      "learning_rate": 9.602274599210612e-06,
      "loss": 0.0549,
      "step": 140720
    },
    {
      "epoch": 0.23032409680354535,
      "grad_norm": 0.6902374625205994,
      "learning_rate": 9.602208706997094e-06,
      "loss": 0.0519,
      "step": 140740
    },
    {
      "epoch": 0.2303568272421987,
      "grad_norm": 1.6018036603927612,
      "learning_rate": 9.602142814783578e-06,
      "loss": 0.0575,
      "step": 140760
    },
    {
      "epoch": 0.23038955768085204,
      "grad_norm": 2.233138084411621,
      "learning_rate": 9.60207692257006e-06,
      "loss": 0.0591,
      "step": 140780
    },
    {
      "epoch": 0.23042228811950538,
      "grad_norm": 5.422189712524414,
      "learning_rate": 9.602011030356543e-06,
      "loss": 0.0584,
      "step": 140800
    },
    {
      "epoch": 0.23045501855815873,
      "grad_norm": 5.192191123962402,
      "learning_rate": 9.601945138143027e-06,
      "loss": 0.0688,
      "step": 140820
    },
    {
      "epoch": 0.23048774899681204,
      "grad_norm": 1.3291454315185547,
      "learning_rate": 9.601879245929509e-06,
      "loss": 0.0661,
      "step": 140840
    },
    {
      "epoch": 0.2305204794354654,
      "grad_norm": 2.8484582901000977,
      "learning_rate": 9.601813353715992e-06,
      "loss": 0.0481,
      "step": 140860
    },
    {
      "epoch": 0.23055320987411873,
      "grad_norm": 2.4599387645721436,
      "learning_rate": 9.601747461502476e-06,
      "loss": 0.0476,
      "step": 140880
    },
    {
      "epoch": 0.23058594031277208,
      "grad_norm": 1.623700737953186,
      "learning_rate": 9.601681569288958e-06,
      "loss": 0.0641,
      "step": 140900
    },
    {
      "epoch": 0.23061867075142542,
      "grad_norm": 5.510317802429199,
      "learning_rate": 9.601615677075442e-06,
      "loss": 0.0687,
      "step": 140920
    },
    {
      "epoch": 0.23065140119007874,
      "grad_norm": 3.412170886993408,
      "learning_rate": 9.601549784861923e-06,
      "loss": 0.0495,
      "step": 140940
    },
    {
      "epoch": 0.23068413162873208,
      "grad_norm": 3.7346625328063965,
      "learning_rate": 9.601483892648407e-06,
      "loss": 0.0471,
      "step": 140960
    },
    {
      "epoch": 0.23071686206738543,
      "grad_norm": 0.7689668536186218,
      "learning_rate": 9.601418000434889e-06,
      "loss": 0.0684,
      "step": 140980
    },
    {
      "epoch": 0.23074959250603877,
      "grad_norm": 5.794361114501953,
      "learning_rate": 9.601352108221372e-06,
      "loss": 0.064,
      "step": 141000
    },
    {
      "epoch": 0.23078232294469211,
      "grad_norm": 1.9188799858093262,
      "learning_rate": 9.601286216007854e-06,
      "loss": 0.0746,
      "step": 141020
    },
    {
      "epoch": 0.23081505338334543,
      "grad_norm": 4.196186065673828,
      "learning_rate": 9.601220323794338e-06,
      "loss": 0.0668,
      "step": 141040
    },
    {
      "epoch": 0.23084778382199878,
      "grad_norm": 2.775454521179199,
      "learning_rate": 9.601154431580822e-06,
      "loss": 0.0768,
      "step": 141060
    },
    {
      "epoch": 0.23088051426065212,
      "grad_norm": 3.1127264499664307,
      "learning_rate": 9.601088539367303e-06,
      "loss": 0.0783,
      "step": 141080
    },
    {
      "epoch": 0.23091324469930546,
      "grad_norm": 3.1885669231414795,
      "learning_rate": 9.601022647153787e-06,
      "loss": 0.0721,
      "step": 141100
    },
    {
      "epoch": 0.2309459751379588,
      "grad_norm": 1.7259000539779663,
      "learning_rate": 9.600956754940269e-06,
      "loss": 0.0634,
      "step": 141120
    },
    {
      "epoch": 0.23097870557661213,
      "grad_norm": 1.3131505250930786,
      "learning_rate": 9.600890862726753e-06,
      "loss": 0.0546,
      "step": 141140
    },
    {
      "epoch": 0.23101143601526547,
      "grad_norm": 1.31517493724823,
      "learning_rate": 9.600824970513234e-06,
      "loss": 0.0627,
      "step": 141160
    },
    {
      "epoch": 0.23104416645391881,
      "grad_norm": 1.983508586883545,
      "learning_rate": 9.600759078299718e-06,
      "loss": 0.06,
      "step": 141180
    },
    {
      "epoch": 0.23107689689257216,
      "grad_norm": 0.6113737225532532,
      "learning_rate": 9.6006931860862e-06,
      "loss": 0.0649,
      "step": 141200
    },
    {
      "epoch": 0.2311096273312255,
      "grad_norm": 4.387593746185303,
      "learning_rate": 9.600627293872683e-06,
      "loss": 0.0659,
      "step": 141220
    },
    {
      "epoch": 0.23114235776987882,
      "grad_norm": 1.0678210258483887,
      "learning_rate": 9.600561401659167e-06,
      "loss": 0.0408,
      "step": 141240
    },
    {
      "epoch": 0.23117508820853216,
      "grad_norm": 3.743687629699707,
      "learning_rate": 9.600495509445649e-06,
      "loss": 0.0541,
      "step": 141260
    },
    {
      "epoch": 0.2312078186471855,
      "grad_norm": 3.519507646560669,
      "learning_rate": 9.600429617232133e-06,
      "loss": 0.0521,
      "step": 141280
    },
    {
      "epoch": 0.23124054908583885,
      "grad_norm": 0.9666174650192261,
      "learning_rate": 9.600363725018616e-06,
      "loss": 0.049,
      "step": 141300
    },
    {
      "epoch": 0.2312732795244922,
      "grad_norm": 3.400059223175049,
      "learning_rate": 9.600297832805098e-06,
      "loss": 0.0418,
      "step": 141320
    },
    {
      "epoch": 0.23130600996314551,
      "grad_norm": 1.7272001504898071,
      "learning_rate": 9.600231940591582e-06,
      "loss": 0.0619,
      "step": 141340
    },
    {
      "epoch": 0.23133874040179886,
      "grad_norm": 2.714482069015503,
      "learning_rate": 9.600166048378064e-06,
      "loss": 0.0636,
      "step": 141360
    },
    {
      "epoch": 0.2313714708404522,
      "grad_norm": 1.2740305662155151,
      "learning_rate": 9.600100156164547e-06,
      "loss": 0.0532,
      "step": 141380
    },
    {
      "epoch": 0.23140420127910555,
      "grad_norm": 2.508060932159424,
      "learning_rate": 9.60003426395103e-06,
      "loss": 0.0533,
      "step": 141400
    },
    {
      "epoch": 0.2314369317177589,
      "grad_norm": 2.5638651847839355,
      "learning_rate": 9.599968371737513e-06,
      "loss": 0.0662,
      "step": 141420
    },
    {
      "epoch": 0.2314696621564122,
      "grad_norm": 4.3671345710754395,
      "learning_rate": 9.599902479523996e-06,
      "loss": 0.0556,
      "step": 141440
    },
    {
      "epoch": 0.23150239259506555,
      "grad_norm": 0.2946285903453827,
      "learning_rate": 9.599836587310478e-06,
      "loss": 0.0533,
      "step": 141460
    },
    {
      "epoch": 0.2315351230337189,
      "grad_norm": 1.593571424484253,
      "learning_rate": 9.599770695096962e-06,
      "loss": 0.0597,
      "step": 141480
    },
    {
      "epoch": 0.23156785347237224,
      "grad_norm": 2.291003942489624,
      "learning_rate": 9.599704802883444e-06,
      "loss": 0.0665,
      "step": 141500
    },
    {
      "epoch": 0.2316005839110256,
      "grad_norm": 0.6226277351379395,
      "learning_rate": 9.599638910669927e-06,
      "loss": 0.0385,
      "step": 141520
    },
    {
      "epoch": 0.2316333143496789,
      "grad_norm": 2.918135643005371,
      "learning_rate": 9.599573018456409e-06,
      "loss": 0.0655,
      "step": 141540
    },
    {
      "epoch": 0.23166604478833225,
      "grad_norm": 2.719464063644409,
      "learning_rate": 9.599507126242893e-06,
      "loss": 0.0734,
      "step": 141560
    },
    {
      "epoch": 0.2316987752269856,
      "grad_norm": 0.8493427634239197,
      "learning_rate": 9.599441234029374e-06,
      "loss": 0.0616,
      "step": 141580
    },
    {
      "epoch": 0.23173150566563894,
      "grad_norm": 1.3067106008529663,
      "learning_rate": 9.599375341815858e-06,
      "loss": 0.0568,
      "step": 141600
    },
    {
      "epoch": 0.23176423610429228,
      "grad_norm": 1.5294873714447021,
      "learning_rate": 9.599309449602342e-06,
      "loss": 0.0564,
      "step": 141620
    },
    {
      "epoch": 0.2317969665429456,
      "grad_norm": 1.6833109855651855,
      "learning_rate": 9.599243557388824e-06,
      "loss": 0.0572,
      "step": 141640
    },
    {
      "epoch": 0.23182969698159894,
      "grad_norm": 1.792715311050415,
      "learning_rate": 9.599177665175307e-06,
      "loss": 0.059,
      "step": 141660
    },
    {
      "epoch": 0.2318624274202523,
      "grad_norm": 3.9235188961029053,
      "learning_rate": 9.59911177296179e-06,
      "loss": 0.0472,
      "step": 141680
    },
    {
      "epoch": 0.23189515785890563,
      "grad_norm": 1.224531888961792,
      "learning_rate": 9.599045880748273e-06,
      "loss": 0.0616,
      "step": 141700
    },
    {
      "epoch": 0.23192788829755898,
      "grad_norm": 1.0162638425827026,
      "learning_rate": 9.598979988534756e-06,
      "loss": 0.0583,
      "step": 141720
    },
    {
      "epoch": 0.2319606187362123,
      "grad_norm": 0.6144767999649048,
      "learning_rate": 9.59891409632124e-06,
      "loss": 0.0681,
      "step": 141740
    },
    {
      "epoch": 0.23199334917486564,
      "grad_norm": 2.8478317260742188,
      "learning_rate": 9.598848204107722e-06,
      "loss": 0.0565,
      "step": 141760
    },
    {
      "epoch": 0.23202607961351898,
      "grad_norm": 1.8279681205749512,
      "learning_rate": 9.598782311894205e-06,
      "loss": 0.0335,
      "step": 141780
    },
    {
      "epoch": 0.23205881005217233,
      "grad_norm": 2.3746933937072754,
      "learning_rate": 9.598716419680687e-06,
      "loss": 0.0693,
      "step": 141800
    },
    {
      "epoch": 0.23209154049082567,
      "grad_norm": 1.1832157373428345,
      "learning_rate": 9.59865052746717e-06,
      "loss": 0.0526,
      "step": 141820
    },
    {
      "epoch": 0.232124270929479,
      "grad_norm": 1.0963400602340698,
      "learning_rate": 9.598584635253653e-06,
      "loss": 0.0563,
      "step": 141840
    },
    {
      "epoch": 0.23215700136813233,
      "grad_norm": 5.74366569519043,
      "learning_rate": 9.598518743040136e-06,
      "loss": 0.0759,
      "step": 141860
    },
    {
      "epoch": 0.23218973180678568,
      "grad_norm": 2.198411703109741,
      "learning_rate": 9.598452850826618e-06,
      "loss": 0.0521,
      "step": 141880
    },
    {
      "epoch": 0.23222246224543902,
      "grad_norm": 33.13545608520508,
      "learning_rate": 9.598386958613102e-06,
      "loss": 0.0602,
      "step": 141900
    },
    {
      "epoch": 0.23225519268409237,
      "grad_norm": 2.4487247467041016,
      "learning_rate": 9.598321066399584e-06,
      "loss": 0.071,
      "step": 141920
    },
    {
      "epoch": 0.23228792312274568,
      "grad_norm": 1.5537755489349365,
      "learning_rate": 9.598255174186067e-06,
      "loss": 0.0569,
      "step": 141940
    },
    {
      "epoch": 0.23232065356139903,
      "grad_norm": 1.4658654928207397,
      "learning_rate": 9.598189281972549e-06,
      "loss": 0.0536,
      "step": 141960
    },
    {
      "epoch": 0.23235338400005237,
      "grad_norm": 1.1447397470474243,
      "learning_rate": 9.598123389759033e-06,
      "loss": 0.0692,
      "step": 141980
    },
    {
      "epoch": 0.23238611443870572,
      "grad_norm": 5.5566558837890625,
      "learning_rate": 9.598057497545516e-06,
      "loss": 0.0545,
      "step": 142000
    },
    {
      "epoch": 0.23241884487735906,
      "grad_norm": 0.7920957207679749,
      "learning_rate": 9.597991605331998e-06,
      "loss": 0.0675,
      "step": 142020
    },
    {
      "epoch": 0.23245157531601238,
      "grad_norm": 4.544452667236328,
      "learning_rate": 9.597925713118482e-06,
      "loss": 0.0638,
      "step": 142040
    },
    {
      "epoch": 0.23248430575466572,
      "grad_norm": 1.4827051162719727,
      "learning_rate": 9.597859820904965e-06,
      "loss": 0.0496,
      "step": 142060
    },
    {
      "epoch": 0.23251703619331907,
      "grad_norm": 1.7644753456115723,
      "learning_rate": 9.597793928691447e-06,
      "loss": 0.0474,
      "step": 142080
    },
    {
      "epoch": 0.2325497666319724,
      "grad_norm": 2.5110666751861572,
      "learning_rate": 9.59772803647793e-06,
      "loss": 0.0725,
      "step": 142100
    },
    {
      "epoch": 0.23258249707062575,
      "grad_norm": 2.918739080429077,
      "learning_rate": 9.597662144264414e-06,
      "loss": 0.0604,
      "step": 142120
    },
    {
      "epoch": 0.23261522750927907,
      "grad_norm": 3.528045177459717,
      "learning_rate": 9.597596252050896e-06,
      "loss": 0.0677,
      "step": 142140
    },
    {
      "epoch": 0.23264795794793242,
      "grad_norm": 3.109663486480713,
      "learning_rate": 9.59753035983738e-06,
      "loss": 0.0533,
      "step": 142160
    },
    {
      "epoch": 0.23268068838658576,
      "grad_norm": 1.169381856918335,
      "learning_rate": 9.597464467623862e-06,
      "loss": 0.0451,
      "step": 142180
    },
    {
      "epoch": 0.2327134188252391,
      "grad_norm": 2.1854560375213623,
      "learning_rate": 9.597398575410345e-06,
      "loss": 0.0592,
      "step": 142200
    },
    {
      "epoch": 0.23274614926389242,
      "grad_norm": 2.906879186630249,
      "learning_rate": 9.597332683196827e-06,
      "loss": 0.043,
      "step": 142220
    },
    {
      "epoch": 0.23277887970254577,
      "grad_norm": 3.13370418548584,
      "learning_rate": 9.59726679098331e-06,
      "loss": 0.0653,
      "step": 142240
    },
    {
      "epoch": 0.2328116101411991,
      "grad_norm": 2.0358901023864746,
      "learning_rate": 9.597200898769793e-06,
      "loss": 0.0662,
      "step": 142260
    },
    {
      "epoch": 0.23284434057985245,
      "grad_norm": 0.6569080948829651,
      "learning_rate": 9.597135006556276e-06,
      "loss": 0.0724,
      "step": 142280
    },
    {
      "epoch": 0.2328770710185058,
      "grad_norm": 4.376756191253662,
      "learning_rate": 9.597069114342758e-06,
      "loss": 0.0698,
      "step": 142300
    },
    {
      "epoch": 0.23290980145715912,
      "grad_norm": 2.6604435443878174,
      "learning_rate": 9.597003222129242e-06,
      "loss": 0.0667,
      "step": 142320
    },
    {
      "epoch": 0.23294253189581246,
      "grad_norm": 2.024766445159912,
      "learning_rate": 9.596937329915724e-06,
      "loss": 0.0539,
      "step": 142340
    },
    {
      "epoch": 0.2329752623344658,
      "grad_norm": 2.8911478519439697,
      "learning_rate": 9.596871437702207e-06,
      "loss": 0.0488,
      "step": 142360
    },
    {
      "epoch": 0.23300799277311915,
      "grad_norm": 3.1102583408355713,
      "learning_rate": 9.59680554548869e-06,
      "loss": 0.0512,
      "step": 142380
    },
    {
      "epoch": 0.2330407232117725,
      "grad_norm": 2.203280448913574,
      "learning_rate": 9.596739653275173e-06,
      "loss": 0.0682,
      "step": 142400
    },
    {
      "epoch": 0.2330734536504258,
      "grad_norm": 3.760715961456299,
      "learning_rate": 9.596673761061656e-06,
      "loss": 0.0529,
      "step": 142420
    },
    {
      "epoch": 0.23310618408907915,
      "grad_norm": 4.263614177703857,
      "learning_rate": 9.596607868848138e-06,
      "loss": 0.0586,
      "step": 142440
    },
    {
      "epoch": 0.2331389145277325,
      "grad_norm": 4.840023040771484,
      "learning_rate": 9.596541976634622e-06,
      "loss": 0.0657,
      "step": 142460
    },
    {
      "epoch": 0.23317164496638584,
      "grad_norm": 1.4608750343322754,
      "learning_rate": 9.596476084421105e-06,
      "loss": 0.0628,
      "step": 142480
    },
    {
      "epoch": 0.2332043754050392,
      "grad_norm": 4.71592378616333,
      "learning_rate": 9.596410192207587e-06,
      "loss": 0.0579,
      "step": 142500
    },
    {
      "epoch": 0.2332371058436925,
      "grad_norm": 2.242795467376709,
      "learning_rate": 9.596344299994071e-06,
      "loss": 0.0716,
      "step": 142520
    },
    {
      "epoch": 0.23326983628234585,
      "grad_norm": 1.4266237020492554,
      "learning_rate": 9.596278407780554e-06,
      "loss": 0.0654,
      "step": 142540
    },
    {
      "epoch": 0.2333025667209992,
      "grad_norm": 2.4809041023254395,
      "learning_rate": 9.596212515567036e-06,
      "loss": 0.0564,
      "step": 142560
    },
    {
      "epoch": 0.23333529715965254,
      "grad_norm": 3.4079740047454834,
      "learning_rate": 9.59614662335352e-06,
      "loss": 0.0382,
      "step": 142580
    },
    {
      "epoch": 0.23336802759830588,
      "grad_norm": 5.9312896728515625,
      "learning_rate": 9.596080731140002e-06,
      "loss": 0.0814,
      "step": 142600
    },
    {
      "epoch": 0.2334007580369592,
      "grad_norm": 1.112441062927246,
      "learning_rate": 9.596014838926485e-06,
      "loss": 0.0559,
      "step": 142620
    },
    {
      "epoch": 0.23343348847561254,
      "grad_norm": 4.50370454788208,
      "learning_rate": 9.595948946712967e-06,
      "loss": 0.0784,
      "step": 142640
    },
    {
      "epoch": 0.2334662189142659,
      "grad_norm": 14.293669700622559,
      "learning_rate": 9.595883054499451e-06,
      "loss": 0.0524,
      "step": 142660
    },
    {
      "epoch": 0.23349894935291923,
      "grad_norm": 1.4101394414901733,
      "learning_rate": 9.595817162285933e-06,
      "loss": 0.0624,
      "step": 142680
    },
    {
      "epoch": 0.23353167979157258,
      "grad_norm": 1.586524248123169,
      "learning_rate": 9.595751270072416e-06,
      "loss": 0.0602,
      "step": 142700
    },
    {
      "epoch": 0.2335644102302259,
      "grad_norm": 1.7886022329330444,
      "learning_rate": 9.595685377858898e-06,
      "loss": 0.0654,
      "step": 142720
    },
    {
      "epoch": 0.23359714066887924,
      "grad_norm": 1.5969231128692627,
      "learning_rate": 9.595619485645382e-06,
      "loss": 0.0461,
      "step": 142740
    },
    {
      "epoch": 0.23362987110753258,
      "grad_norm": 4.317482948303223,
      "learning_rate": 9.595553593431864e-06,
      "loss": 0.0604,
      "step": 142760
    },
    {
      "epoch": 0.23366260154618593,
      "grad_norm": 1.6613625288009644,
      "learning_rate": 9.595487701218347e-06,
      "loss": 0.0658,
      "step": 142780
    },
    {
      "epoch": 0.23369533198483927,
      "grad_norm": 10.161229133605957,
      "learning_rate": 9.595421809004831e-06,
      "loss": 0.0635,
      "step": 142800
    },
    {
      "epoch": 0.2337280624234926,
      "grad_norm": 1.50420343875885,
      "learning_rate": 9.595355916791313e-06,
      "loss": 0.053,
      "step": 142820
    },
    {
      "epoch": 0.23376079286214593,
      "grad_norm": 0.3583484888076782,
      "learning_rate": 9.595290024577796e-06,
      "loss": 0.0602,
      "step": 142840
    },
    {
      "epoch": 0.23379352330079928,
      "grad_norm": 1.310314655303955,
      "learning_rate": 9.59522413236428e-06,
      "loss": 0.0663,
      "step": 142860
    },
    {
      "epoch": 0.23382625373945262,
      "grad_norm": 1.3731743097305298,
      "learning_rate": 9.595158240150762e-06,
      "loss": 0.0559,
      "step": 142880
    },
    {
      "epoch": 0.23385898417810597,
      "grad_norm": 2.902031183242798,
      "learning_rate": 9.595092347937245e-06,
      "loss": 0.061,
      "step": 142900
    },
    {
      "epoch": 0.23389171461675928,
      "grad_norm": 1.993459701538086,
      "learning_rate": 9.595026455723729e-06,
      "loss": 0.0584,
      "step": 142920
    },
    {
      "epoch": 0.23392444505541263,
      "grad_norm": 2.6648876667022705,
      "learning_rate": 9.594960563510211e-06,
      "loss": 0.0619,
      "step": 142940
    },
    {
      "epoch": 0.23395717549406597,
      "grad_norm": 2.5206048488616943,
      "learning_rate": 9.594894671296695e-06,
      "loss": 0.0506,
      "step": 142960
    },
    {
      "epoch": 0.23398990593271932,
      "grad_norm": 6.134980201721191,
      "learning_rate": 9.594828779083176e-06,
      "loss": 0.0451,
      "step": 142980
    },
    {
      "epoch": 0.23402263637137266,
      "grad_norm": 0.9809520840644836,
      "learning_rate": 9.59476288686966e-06,
      "loss": 0.0522,
      "step": 143000
    },
    {
      "epoch": 0.23405536681002598,
      "grad_norm": 0.8157505989074707,
      "learning_rate": 9.594696994656142e-06,
      "loss": 0.0657,
      "step": 143020
    },
    {
      "epoch": 0.23408809724867932,
      "grad_norm": 1.5050240755081177,
      "learning_rate": 9.594631102442625e-06,
      "loss": 0.0534,
      "step": 143040
    },
    {
      "epoch": 0.23412082768733267,
      "grad_norm": 2.081411361694336,
      "learning_rate": 9.594565210229107e-06,
      "loss": 0.0506,
      "step": 143060
    },
    {
      "epoch": 0.234153558125986,
      "grad_norm": 2.6016037464141846,
      "learning_rate": 9.594499318015591e-06,
      "loss": 0.0595,
      "step": 143080
    },
    {
      "epoch": 0.23418628856463936,
      "grad_norm": 0.33115312457084656,
      "learning_rate": 9.594433425802073e-06,
      "loss": 0.0563,
      "step": 143100
    },
    {
      "epoch": 0.23421901900329267,
      "grad_norm": 4.651998519897461,
      "learning_rate": 9.594367533588556e-06,
      "loss": 0.0494,
      "step": 143120
    },
    {
      "epoch": 0.23425174944194602,
      "grad_norm": 3.2513818740844727,
      "learning_rate": 9.594301641375038e-06,
      "loss": 0.0571,
      "step": 143140
    },
    {
      "epoch": 0.23428447988059936,
      "grad_norm": 8.258429527282715,
      "learning_rate": 9.594235749161522e-06,
      "loss": 0.0626,
      "step": 143160
    },
    {
      "epoch": 0.2343172103192527,
      "grad_norm": 2.478395462036133,
      "learning_rate": 9.594169856948006e-06,
      "loss": 0.0584,
      "step": 143180
    },
    {
      "epoch": 0.23434994075790605,
      "grad_norm": 1.4248971939086914,
      "learning_rate": 9.594103964734487e-06,
      "loss": 0.0488,
      "step": 143200
    },
    {
      "epoch": 0.23438267119655937,
      "grad_norm": 1.585842490196228,
      "learning_rate": 9.594038072520971e-06,
      "loss": 0.065,
      "step": 143220
    },
    {
      "epoch": 0.2344154016352127,
      "grad_norm": 1.6836531162261963,
      "learning_rate": 9.593972180307453e-06,
      "loss": 0.058,
      "step": 143240
    },
    {
      "epoch": 0.23444813207386606,
      "grad_norm": 1.3427001237869263,
      "learning_rate": 9.593906288093936e-06,
      "loss": 0.0843,
      "step": 143260
    },
    {
      "epoch": 0.2344808625125194,
      "grad_norm": 3.3038461208343506,
      "learning_rate": 9.59384039588042e-06,
      "loss": 0.0673,
      "step": 143280
    },
    {
      "epoch": 0.23451359295117274,
      "grad_norm": 2.514556407928467,
      "learning_rate": 9.593774503666902e-06,
      "loss": 0.0651,
      "step": 143300
    },
    {
      "epoch": 0.23454632338982606,
      "grad_norm": 2.3596973419189453,
      "learning_rate": 9.593708611453386e-06,
      "loss": 0.0659,
      "step": 143320
    },
    {
      "epoch": 0.2345790538284794,
      "grad_norm": 1.0675129890441895,
      "learning_rate": 9.593642719239869e-06,
      "loss": 0.0731,
      "step": 143340
    },
    {
      "epoch": 0.23461178426713275,
      "grad_norm": 5.708010673522949,
      "learning_rate": 9.593576827026351e-06,
      "loss": 0.0609,
      "step": 143360
    },
    {
      "epoch": 0.2346445147057861,
      "grad_norm": 1.3236477375030518,
      "learning_rate": 9.593510934812835e-06,
      "loss": 0.0581,
      "step": 143380
    },
    {
      "epoch": 0.23467724514443944,
      "grad_norm": 4.916604518890381,
      "learning_rate": 9.593445042599317e-06,
      "loss": 0.0513,
      "step": 143400
    },
    {
      "epoch": 0.23470997558309276,
      "grad_norm": 2.0610549449920654,
      "learning_rate": 9.5933791503858e-06,
      "loss": 0.051,
      "step": 143420
    },
    {
      "epoch": 0.2347427060217461,
      "grad_norm": 2.09600567817688,
      "learning_rate": 9.593313258172282e-06,
      "loss": 0.0694,
      "step": 143440
    },
    {
      "epoch": 0.23477543646039944,
      "grad_norm": 1.7675491571426392,
      "learning_rate": 9.593247365958766e-06,
      "loss": 0.0601,
      "step": 143460
    },
    {
      "epoch": 0.2348081668990528,
      "grad_norm": 3.911438465118408,
      "learning_rate": 9.593181473745247e-06,
      "loss": 0.055,
      "step": 143480
    },
    {
      "epoch": 0.23484089733770613,
      "grad_norm": 0.40282806754112244,
      "learning_rate": 9.593115581531731e-06,
      "loss": 0.0422,
      "step": 143500
    },
    {
      "epoch": 0.23487362777635945,
      "grad_norm": 1.2718591690063477,
      "learning_rate": 9.593049689318215e-06,
      "loss": 0.0621,
      "step": 143520
    },
    {
      "epoch": 0.2349063582150128,
      "grad_norm": 2.6872973442077637,
      "learning_rate": 9.592983797104697e-06,
      "loss": 0.0596,
      "step": 143540
    },
    {
      "epoch": 0.23493908865366614,
      "grad_norm": 2.183678150177002,
      "learning_rate": 9.59291790489118e-06,
      "loss": 0.0616,
      "step": 143560
    },
    {
      "epoch": 0.23497181909231948,
      "grad_norm": 5.696954250335693,
      "learning_rate": 9.592852012677662e-06,
      "loss": 0.0742,
      "step": 143580
    },
    {
      "epoch": 0.2350045495309728,
      "grad_norm": 3.5251214504241943,
      "learning_rate": 9.592786120464146e-06,
      "loss": 0.0528,
      "step": 143600
    },
    {
      "epoch": 0.23503727996962614,
      "grad_norm": 3.769733190536499,
      "learning_rate": 9.592720228250627e-06,
      "loss": 0.0639,
      "step": 143620
    },
    {
      "epoch": 0.2350700104082795,
      "grad_norm": 4.248138427734375,
      "learning_rate": 9.592654336037111e-06,
      "loss": 0.0672,
      "step": 143640
    },
    {
      "epoch": 0.23510274084693283,
      "grad_norm": 1.8918017148971558,
      "learning_rate": 9.592588443823595e-06,
      "loss": 0.0572,
      "step": 143660
    },
    {
      "epoch": 0.23513547128558618,
      "grad_norm": 2.5907187461853027,
      "learning_rate": 9.592522551610077e-06,
      "loss": 0.0729,
      "step": 143680
    },
    {
      "epoch": 0.2351682017242395,
      "grad_norm": 2.3002076148986816,
      "learning_rate": 9.59245665939656e-06,
      "loss": 0.077,
      "step": 143700
    },
    {
      "epoch": 0.23520093216289284,
      "grad_norm": 2.2936885356903076,
      "learning_rate": 9.592390767183044e-06,
      "loss": 0.0585,
      "step": 143720
    },
    {
      "epoch": 0.23523366260154618,
      "grad_norm": 3.905486822128296,
      "learning_rate": 9.592324874969526e-06,
      "loss": 0.0725,
      "step": 143740
    },
    {
      "epoch": 0.23526639304019953,
      "grad_norm": 0.6935741305351257,
      "learning_rate": 9.59225898275601e-06,
      "loss": 0.0386,
      "step": 143760
    },
    {
      "epoch": 0.23529912347885287,
      "grad_norm": 2.828711986541748,
      "learning_rate": 9.592193090542491e-06,
      "loss": 0.0862,
      "step": 143780
    },
    {
      "epoch": 0.2353318539175062,
      "grad_norm": 4.791533470153809,
      "learning_rate": 9.592127198328975e-06,
      "loss": 0.0612,
      "step": 143800
    },
    {
      "epoch": 0.23536458435615953,
      "grad_norm": 1.275988221168518,
      "learning_rate": 9.592061306115457e-06,
      "loss": 0.0498,
      "step": 143820
    },
    {
      "epoch": 0.23539731479481288,
      "grad_norm": 2.9286065101623535,
      "learning_rate": 9.59199541390194e-06,
      "loss": 0.0577,
      "step": 143840
    },
    {
      "epoch": 0.23543004523346622,
      "grad_norm": 2.070417642593384,
      "learning_rate": 9.591929521688424e-06,
      "loss": 0.0423,
      "step": 143860
    },
    {
      "epoch": 0.23546277567211957,
      "grad_norm": 3.627744197845459,
      "learning_rate": 9.591863629474906e-06,
      "loss": 0.054,
      "step": 143880
    },
    {
      "epoch": 0.23549550611077288,
      "grad_norm": 2.0591490268707275,
      "learning_rate": 9.59179773726139e-06,
      "loss": 0.0506,
      "step": 143900
    },
    {
      "epoch": 0.23552823654942623,
      "grad_norm": 1.6518332958221436,
      "learning_rate": 9.591731845047871e-06,
      "loss": 0.0659,
      "step": 143920
    },
    {
      "epoch": 0.23556096698807957,
      "grad_norm": 2.294407844543457,
      "learning_rate": 9.591665952834355e-06,
      "loss": 0.0525,
      "step": 143940
    },
    {
      "epoch": 0.23559369742673292,
      "grad_norm": 2.193659782409668,
      "learning_rate": 9.591600060620837e-06,
      "loss": 0.0655,
      "step": 143960
    },
    {
      "epoch": 0.23562642786538626,
      "grad_norm": 2.76910138130188,
      "learning_rate": 9.59153416840732e-06,
      "loss": 0.0595,
      "step": 143980
    },
    {
      "epoch": 0.23565915830403958,
      "grad_norm": 2.8908495903015137,
      "learning_rate": 9.591468276193802e-06,
      "loss": 0.0704,
      "step": 144000
    },
    {
      "epoch": 0.23569188874269292,
      "grad_norm": 4.753262996673584,
      "learning_rate": 9.591402383980286e-06,
      "loss": 0.0538,
      "step": 144020
    },
    {
      "epoch": 0.23572461918134627,
      "grad_norm": 3.6043145656585693,
      "learning_rate": 9.591336491766768e-06,
      "loss": 0.0679,
      "step": 144040
    },
    {
      "epoch": 0.2357573496199996,
      "grad_norm": 2.6928675174713135,
      "learning_rate": 9.591270599553251e-06,
      "loss": 0.0508,
      "step": 144060
    },
    {
      "epoch": 0.23579008005865296,
      "grad_norm": 1.32551908493042,
      "learning_rate": 9.591204707339735e-06,
      "loss": 0.0499,
      "step": 144080
    },
    {
      "epoch": 0.23582281049730627,
      "grad_norm": 1.8942651748657227,
      "learning_rate": 9.591138815126217e-06,
      "loss": 0.0631,
      "step": 144100
    },
    {
      "epoch": 0.23585554093595962,
      "grad_norm": 2.850646734237671,
      "learning_rate": 9.5910729229127e-06,
      "loss": 0.0469,
      "step": 144120
    },
    {
      "epoch": 0.23588827137461296,
      "grad_norm": 0.7331434488296509,
      "learning_rate": 9.591007030699184e-06,
      "loss": 0.0758,
      "step": 144140
    },
    {
      "epoch": 0.2359210018132663,
      "grad_norm": 2.906503200531006,
      "learning_rate": 9.590941138485666e-06,
      "loss": 0.065,
      "step": 144160
    },
    {
      "epoch": 0.23595373225191965,
      "grad_norm": 1.5560579299926758,
      "learning_rate": 9.59087524627215e-06,
      "loss": 0.0418,
      "step": 144180
    },
    {
      "epoch": 0.23598646269057297,
      "grad_norm": 1.2695075273513794,
      "learning_rate": 9.590809354058633e-06,
      "loss": 0.0495,
      "step": 144200
    },
    {
      "epoch": 0.2360191931292263,
      "grad_norm": 3.6468536853790283,
      "learning_rate": 9.590743461845115e-06,
      "loss": 0.0575,
      "step": 144220
    },
    {
      "epoch": 0.23605192356787966,
      "grad_norm": 3.467015266418457,
      "learning_rate": 9.590677569631598e-06,
      "loss": 0.0481,
      "step": 144240
    },
    {
      "epoch": 0.236084654006533,
      "grad_norm": 1.7962912321090698,
      "learning_rate": 9.59061167741808e-06,
      "loss": 0.0605,
      "step": 144260
    },
    {
      "epoch": 0.23611738444518635,
      "grad_norm": 2.7795228958129883,
      "learning_rate": 9.590545785204564e-06,
      "loss": 0.0596,
      "step": 144280
    },
    {
      "epoch": 0.23615011488383966,
      "grad_norm": 13.181173324584961,
      "learning_rate": 9.590479892991046e-06,
      "loss": 0.0715,
      "step": 144300
    },
    {
      "epoch": 0.236182845322493,
      "grad_norm": 1.3363487720489502,
      "learning_rate": 9.59041400077753e-06,
      "loss": 0.0607,
      "step": 144320
    },
    {
      "epoch": 0.23621557576114635,
      "grad_norm": 2.0982611179351807,
      "learning_rate": 9.590348108564011e-06,
      "loss": 0.0536,
      "step": 144340
    },
    {
      "epoch": 0.2362483061997997,
      "grad_norm": 1.1533522605895996,
      "learning_rate": 9.590282216350495e-06,
      "loss": 0.0522,
      "step": 144360
    },
    {
      "epoch": 0.23628103663845304,
      "grad_norm": 0.6074631810188293,
      "learning_rate": 9.590216324136977e-06,
      "loss": 0.0479,
      "step": 144380
    },
    {
      "epoch": 0.23631376707710636,
      "grad_norm": 2.952613353729248,
      "learning_rate": 9.59015043192346e-06,
      "loss": 0.0567,
      "step": 144400
    },
    {
      "epoch": 0.2363464975157597,
      "grad_norm": 3.3814339637756348,
      "learning_rate": 9.590084539709942e-06,
      "loss": 0.0487,
      "step": 144420
    },
    {
      "epoch": 0.23637922795441305,
      "grad_norm": 3.4478306770324707,
      "learning_rate": 9.590018647496426e-06,
      "loss": 0.0539,
      "step": 144440
    },
    {
      "epoch": 0.2364119583930664,
      "grad_norm": 1.1043689250946045,
      "learning_rate": 9.58995275528291e-06,
      "loss": 0.0637,
      "step": 144460
    },
    {
      "epoch": 0.23644468883171973,
      "grad_norm": 3.344993829727173,
      "learning_rate": 9.589886863069391e-06,
      "loss": 0.0609,
      "step": 144480
    },
    {
      "epoch": 0.23647741927037305,
      "grad_norm": 0.6779181361198425,
      "learning_rate": 9.589820970855875e-06,
      "loss": 0.055,
      "step": 144500
    },
    {
      "epoch": 0.2365101497090264,
      "grad_norm": 2.383014440536499,
      "learning_rate": 9.589755078642358e-06,
      "loss": 0.0544,
      "step": 144520
    },
    {
      "epoch": 0.23654288014767974,
      "grad_norm": 1.0517927408218384,
      "learning_rate": 9.58968918642884e-06,
      "loss": 0.0594,
      "step": 144540
    },
    {
      "epoch": 0.23657561058633308,
      "grad_norm": 8.314651489257812,
      "learning_rate": 9.589623294215324e-06,
      "loss": 0.0605,
      "step": 144560
    },
    {
      "epoch": 0.23660834102498643,
      "grad_norm": 2.5449204444885254,
      "learning_rate": 9.589557402001807e-06,
      "loss": 0.0578,
      "step": 144580
    },
    {
      "epoch": 0.23664107146363975,
      "grad_norm": 3.3322484493255615,
      "learning_rate": 9.58949150978829e-06,
      "loss": 0.052,
      "step": 144600
    },
    {
      "epoch": 0.2366738019022931,
      "grad_norm": 10.487027168273926,
      "learning_rate": 9.589425617574773e-06,
      "loss": 0.0523,
      "step": 144620
    },
    {
      "epoch": 0.23670653234094644,
      "grad_norm": 2.0591769218444824,
      "learning_rate": 9.589359725361255e-06,
      "loss": 0.0437,
      "step": 144640
    },
    {
      "epoch": 0.23673926277959978,
      "grad_norm": 2.7511045932769775,
      "learning_rate": 9.589293833147738e-06,
      "loss": 0.0601,
      "step": 144660
    },
    {
      "epoch": 0.23677199321825312,
      "grad_norm": 1.4846746921539307,
      "learning_rate": 9.58922794093422e-06,
      "loss": 0.0493,
      "step": 144680
    },
    {
      "epoch": 0.23680472365690644,
      "grad_norm": 3.727602243423462,
      "learning_rate": 9.589162048720704e-06,
      "loss": 0.0507,
      "step": 144700
    },
    {
      "epoch": 0.23683745409555979,
      "grad_norm": 1.5044184923171997,
      "learning_rate": 9.589096156507186e-06,
      "loss": 0.0669,
      "step": 144720
    },
    {
      "epoch": 0.23687018453421313,
      "grad_norm": 2.093487501144409,
      "learning_rate": 9.58903026429367e-06,
      "loss": 0.0611,
      "step": 144740
    },
    {
      "epoch": 0.23690291497286647,
      "grad_norm": 4.508047103881836,
      "learning_rate": 9.588964372080151e-06,
      "loss": 0.0739,
      "step": 144760
    },
    {
      "epoch": 0.23693564541151982,
      "grad_norm": 1.3070966005325317,
      "learning_rate": 9.588898479866635e-06,
      "loss": 0.0485,
      "step": 144780
    },
    {
      "epoch": 0.23696837585017314,
      "grad_norm": 2.1115078926086426,
      "learning_rate": 9.588832587653117e-06,
      "loss": 0.0666,
      "step": 144800
    },
    {
      "epoch": 0.23700110628882648,
      "grad_norm": 11.203545570373535,
      "learning_rate": 9.5887666954396e-06,
      "loss": 0.0485,
      "step": 144820
    },
    {
      "epoch": 0.23703383672747982,
      "grad_norm": 15.357587814331055,
      "learning_rate": 9.588700803226084e-06,
      "loss": 0.0716,
      "step": 144840
    },
    {
      "epoch": 0.23706656716613317,
      "grad_norm": 2.2268340587615967,
      "learning_rate": 9.588634911012566e-06,
      "loss": 0.0627,
      "step": 144860
    },
    {
      "epoch": 0.2370992976047865,
      "grad_norm": 2.6547718048095703,
      "learning_rate": 9.58856901879905e-06,
      "loss": 0.0629,
      "step": 144880
    },
    {
      "epoch": 0.23713202804343983,
      "grad_norm": 2.645843267440796,
      "learning_rate": 9.588503126585533e-06,
      "loss": 0.0821,
      "step": 144900
    },
    {
      "epoch": 0.23716475848209317,
      "grad_norm": 1.7338573932647705,
      "learning_rate": 9.588437234372015e-06,
      "loss": 0.0601,
      "step": 144920
    },
    {
      "epoch": 0.23719748892074652,
      "grad_norm": 2.4107344150543213,
      "learning_rate": 9.588371342158498e-06,
      "loss": 0.0548,
      "step": 144940
    },
    {
      "epoch": 0.23723021935939986,
      "grad_norm": 13.351856231689453,
      "learning_rate": 9.588305449944982e-06,
      "loss": 0.0462,
      "step": 144960
    },
    {
      "epoch": 0.23726294979805318,
      "grad_norm": 4.581035614013672,
      "learning_rate": 9.588239557731464e-06,
      "loss": 0.0565,
      "step": 144980
    },
    {
      "epoch": 0.23729568023670652,
      "grad_norm": 4.726254463195801,
      "learning_rate": 9.588173665517948e-06,
      "loss": 0.0626,
      "step": 145000
    },
    {
      "epoch": 0.23732841067535987,
      "grad_norm": 0.8378092646598816,
      "learning_rate": 9.58810777330443e-06,
      "loss": 0.051,
      "step": 145020
    },
    {
      "epoch": 0.2373611411140132,
      "grad_norm": 2.360957145690918,
      "learning_rate": 9.588041881090913e-06,
      "loss": 0.0541,
      "step": 145040
    },
    {
      "epoch": 0.23739387155266656,
      "grad_norm": 1.9077740907669067,
      "learning_rate": 9.587975988877395e-06,
      "loss": 0.0534,
      "step": 145060
    },
    {
      "epoch": 0.23742660199131987,
      "grad_norm": 3.0266246795654297,
      "learning_rate": 9.587910096663879e-06,
      "loss": 0.0587,
      "step": 145080
    },
    {
      "epoch": 0.23745933242997322,
      "grad_norm": 2.057208776473999,
      "learning_rate": 9.58784420445036e-06,
      "loss": 0.0646,
      "step": 145100
    },
    {
      "epoch": 0.23749206286862656,
      "grad_norm": 1.8053334951400757,
      "learning_rate": 9.587778312236844e-06,
      "loss": 0.0554,
      "step": 145120
    },
    {
      "epoch": 0.2375247933072799,
      "grad_norm": 7.836539268493652,
      "learning_rate": 9.587712420023326e-06,
      "loss": 0.0424,
      "step": 145140
    },
    {
      "epoch": 0.23755752374593325,
      "grad_norm": 3.414364814758301,
      "learning_rate": 9.58764652780981e-06,
      "loss": 0.0589,
      "step": 145160
    },
    {
      "epoch": 0.23759025418458657,
      "grad_norm": 2.5169990062713623,
      "learning_rate": 9.587580635596291e-06,
      "loss": 0.038,
      "step": 145180
    },
    {
      "epoch": 0.2376229846232399,
      "grad_norm": 3.4132180213928223,
      "learning_rate": 9.587514743382775e-06,
      "loss": 0.0468,
      "step": 145200
    },
    {
      "epoch": 0.23765571506189326,
      "grad_norm": 3.1528475284576416,
      "learning_rate": 9.587448851169257e-06,
      "loss": 0.046,
      "step": 145220
    },
    {
      "epoch": 0.2376884455005466,
      "grad_norm": 1.4620779752731323,
      "learning_rate": 9.58738295895574e-06,
      "loss": 0.0607,
      "step": 145240
    },
    {
      "epoch": 0.23772117593919995,
      "grad_norm": 1.3435804843902588,
      "learning_rate": 9.587317066742224e-06,
      "loss": 0.0452,
      "step": 145260
    },
    {
      "epoch": 0.23775390637785326,
      "grad_norm": 2.588440418243408,
      "learning_rate": 9.587251174528706e-06,
      "loss": 0.0619,
      "step": 145280
    },
    {
      "epoch": 0.2377866368165066,
      "grad_norm": 1.0988863706588745,
      "learning_rate": 9.58718528231519e-06,
      "loss": 0.0578,
      "step": 145300
    },
    {
      "epoch": 0.23781936725515995,
      "grad_norm": 3.496239185333252,
      "learning_rate": 9.587119390101673e-06,
      "loss": 0.0588,
      "step": 145320
    },
    {
      "epoch": 0.2378520976938133,
      "grad_norm": 0.42473384737968445,
      "learning_rate": 9.587053497888155e-06,
      "loss": 0.0598,
      "step": 145340
    },
    {
      "epoch": 0.23788482813246664,
      "grad_norm": 2.1387176513671875,
      "learning_rate": 9.586987605674639e-06,
      "loss": 0.0764,
      "step": 145360
    },
    {
      "epoch": 0.23791755857111996,
      "grad_norm": 4.622932434082031,
      "learning_rate": 9.586921713461122e-06,
      "loss": 0.0615,
      "step": 145380
    },
    {
      "epoch": 0.2379502890097733,
      "grad_norm": 1.3315072059631348,
      "learning_rate": 9.586855821247604e-06,
      "loss": 0.0507,
      "step": 145400
    },
    {
      "epoch": 0.23798301944842665,
      "grad_norm": 5.503266334533691,
      "learning_rate": 9.586789929034088e-06,
      "loss": 0.0571,
      "step": 145420
    },
    {
      "epoch": 0.23801574988708,
      "grad_norm": 1.256962537765503,
      "learning_rate": 9.58672403682057e-06,
      "loss": 0.0589,
      "step": 145440
    },
    {
      "epoch": 0.23804848032573334,
      "grad_norm": 0.9145877957344055,
      "learning_rate": 9.586658144607053e-06,
      "loss": 0.0438,
      "step": 145460
    },
    {
      "epoch": 0.23808121076438665,
      "grad_norm": 4.682957172393799,
      "learning_rate": 9.586592252393535e-06,
      "loss": 0.0607,
      "step": 145480
    },
    {
      "epoch": 0.23811394120304,
      "grad_norm": 3.100454807281494,
      "learning_rate": 9.586526360180019e-06,
      "loss": 0.0591,
      "step": 145500
    },
    {
      "epoch": 0.23814667164169334,
      "grad_norm": 1.4139271974563599,
      "learning_rate": 9.5864604679665e-06,
      "loss": 0.0795,
      "step": 145520
    },
    {
      "epoch": 0.23817940208034669,
      "grad_norm": 3.452040195465088,
      "learning_rate": 9.586394575752984e-06,
      "loss": 0.0652,
      "step": 145540
    },
    {
      "epoch": 0.23821213251900003,
      "grad_norm": 2.093832492828369,
      "learning_rate": 9.586328683539466e-06,
      "loss": 0.0595,
      "step": 145560
    },
    {
      "epoch": 0.23824486295765335,
      "grad_norm": 2.4632041454315186,
      "learning_rate": 9.58626279132595e-06,
      "loss": 0.0735,
      "step": 145580
    },
    {
      "epoch": 0.2382775933963067,
      "grad_norm": 1.4119188785552979,
      "learning_rate": 9.586196899112431e-06,
      "loss": 0.0618,
      "step": 145600
    },
    {
      "epoch": 0.23831032383496004,
      "grad_norm": 4.673976421356201,
      "learning_rate": 9.586131006898915e-06,
      "loss": 0.0584,
      "step": 145620
    },
    {
      "epoch": 0.23834305427361338,
      "grad_norm": 1.1819560527801514,
      "learning_rate": 9.586065114685399e-06,
      "loss": 0.056,
      "step": 145640
    },
    {
      "epoch": 0.23837578471226673,
      "grad_norm": 3.080948829650879,
      "learning_rate": 9.58599922247188e-06,
      "loss": 0.0479,
      "step": 145660
    },
    {
      "epoch": 0.23840851515092004,
      "grad_norm": 1.1154741048812866,
      "learning_rate": 9.585933330258364e-06,
      "loss": 0.0615,
      "step": 145680
    },
    {
      "epoch": 0.2384412455895734,
      "grad_norm": 3.291659116744995,
      "learning_rate": 9.585867438044848e-06,
      "loss": 0.0524,
      "step": 145700
    },
    {
      "epoch": 0.23847397602822673,
      "grad_norm": 3.621476650238037,
      "learning_rate": 9.58580154583133e-06,
      "loss": 0.0654,
      "step": 145720
    },
    {
      "epoch": 0.23850670646688008,
      "grad_norm": 1.3527958393096924,
      "learning_rate": 9.585735653617813e-06,
      "loss": 0.0485,
      "step": 145740
    },
    {
      "epoch": 0.23853943690553342,
      "grad_norm": 2.1856086254119873,
      "learning_rate": 9.585669761404297e-06,
      "loss": 0.0619,
      "step": 145760
    },
    {
      "epoch": 0.23857216734418674,
      "grad_norm": 1.0842316150665283,
      "learning_rate": 9.585603869190779e-06,
      "loss": 0.0599,
      "step": 145780
    },
    {
      "epoch": 0.23860489778284008,
      "grad_norm": 3.0782554149627686,
      "learning_rate": 9.585537976977262e-06,
      "loss": 0.0734,
      "step": 145800
    },
    {
      "epoch": 0.23863762822149343,
      "grad_norm": 1.5541155338287354,
      "learning_rate": 9.585472084763744e-06,
      "loss": 0.0514,
      "step": 145820
    },
    {
      "epoch": 0.23867035866014677,
      "grad_norm": 0.9012951254844666,
      "learning_rate": 9.585406192550228e-06,
      "loss": 0.052,
      "step": 145840
    },
    {
      "epoch": 0.23870308909880011,
      "grad_norm": 1.0786595344543457,
      "learning_rate": 9.58534030033671e-06,
      "loss": 0.0585,
      "step": 145860
    },
    {
      "epoch": 0.23873581953745343,
      "grad_norm": 1.3572324514389038,
      "learning_rate": 9.585274408123193e-06,
      "loss": 0.0542,
      "step": 145880
    },
    {
      "epoch": 0.23876854997610678,
      "grad_norm": 2.582291841506958,
      "learning_rate": 9.585208515909675e-06,
      "loss": 0.0653,
      "step": 145900
    },
    {
      "epoch": 0.23880128041476012,
      "grad_norm": 4.002479076385498,
      "learning_rate": 9.585142623696159e-06,
      "loss": 0.0412,
      "step": 145920
    },
    {
      "epoch": 0.23883401085341346,
      "grad_norm": 2.2075705528259277,
      "learning_rate": 9.58507673148264e-06,
      "loss": 0.0564,
      "step": 145940
    },
    {
      "epoch": 0.2388667412920668,
      "grad_norm": 2.070615768432617,
      "learning_rate": 9.585010839269124e-06,
      "loss": 0.052,
      "step": 145960
    },
    {
      "epoch": 0.23889947173072013,
      "grad_norm": 1.3652406930923462,
      "learning_rate": 9.584944947055608e-06,
      "loss": 0.0619,
      "step": 145980
    },
    {
      "epoch": 0.23893220216937347,
      "grad_norm": 1.7309750318527222,
      "learning_rate": 9.58487905484209e-06,
      "loss": 0.0504,
      "step": 146000
    },
    {
      "epoch": 0.23896493260802681,
      "grad_norm": 2.8554999828338623,
      "learning_rate": 9.584813162628573e-06,
      "loss": 0.06,
      "step": 146020
    },
    {
      "epoch": 0.23899766304668016,
      "grad_norm": 5.103314399719238,
      "learning_rate": 9.584747270415055e-06,
      "loss": 0.0524,
      "step": 146040
    },
    {
      "epoch": 0.2390303934853335,
      "grad_norm": 0.7714303135871887,
      "learning_rate": 9.584681378201539e-06,
      "loss": 0.0545,
      "step": 146060
    },
    {
      "epoch": 0.23906312392398682,
      "grad_norm": 2.020940065383911,
      "learning_rate": 9.58461548598802e-06,
      "loss": 0.041,
      "step": 146080
    },
    {
      "epoch": 0.23909585436264016,
      "grad_norm": 3.2957706451416016,
      "learning_rate": 9.584549593774504e-06,
      "loss": 0.074,
      "step": 146100
    },
    {
      "epoch": 0.2391285848012935,
      "grad_norm": 2.823056221008301,
      "learning_rate": 9.584483701560988e-06,
      "loss": 0.0691,
      "step": 146120
    },
    {
      "epoch": 0.23916131523994685,
      "grad_norm": 0.9385611414909363,
      "learning_rate": 9.58441780934747e-06,
      "loss": 0.0487,
      "step": 146140
    },
    {
      "epoch": 0.2391940456786002,
      "grad_norm": 3.856928825378418,
      "learning_rate": 9.584351917133953e-06,
      "loss": 0.0689,
      "step": 146160
    },
    {
      "epoch": 0.23922677611725351,
      "grad_norm": 1.5529896020889282,
      "learning_rate": 9.584286024920437e-06,
      "loss": 0.0529,
      "step": 146180
    },
    {
      "epoch": 0.23925950655590686,
      "grad_norm": 0.7538465261459351,
      "learning_rate": 9.584220132706919e-06,
      "loss": 0.039,
      "step": 146200
    },
    {
      "epoch": 0.2392922369945602,
      "grad_norm": 2.732254981994629,
      "learning_rate": 9.584154240493402e-06,
      "loss": 0.0602,
      "step": 146220
    },
    {
      "epoch": 0.23932496743321355,
      "grad_norm": 9.158520698547363,
      "learning_rate": 9.584088348279884e-06,
      "loss": 0.0683,
      "step": 146240
    },
    {
      "epoch": 0.2393576978718669,
      "grad_norm": 1.967740535736084,
      "learning_rate": 9.584022456066368e-06,
      "loss": 0.0697,
      "step": 146260
    },
    {
      "epoch": 0.2393904283105202,
      "grad_norm": 2.132441759109497,
      "learning_rate": 9.58395656385285e-06,
      "loss": 0.0527,
      "step": 146280
    },
    {
      "epoch": 0.23942315874917355,
      "grad_norm": 2.3155829906463623,
      "learning_rate": 9.583890671639333e-06,
      "loss": 0.0544,
      "step": 146300
    },
    {
      "epoch": 0.2394558891878269,
      "grad_norm": 0.6690428853034973,
      "learning_rate": 9.583824779425817e-06,
      "loss": 0.0435,
      "step": 146320
    },
    {
      "epoch": 0.23948861962648024,
      "grad_norm": 11.699578285217285,
      "learning_rate": 9.583758887212299e-06,
      "loss": 0.0633,
      "step": 146340
    },
    {
      "epoch": 0.23952135006513356,
      "grad_norm": 5.901641368865967,
      "learning_rate": 9.583692994998782e-06,
      "loss": 0.0519,
      "step": 146360
    },
    {
      "epoch": 0.2395540805037869,
      "grad_norm": 1.201819658279419,
      "learning_rate": 9.583627102785264e-06,
      "loss": 0.0449,
      "step": 146380
    },
    {
      "epoch": 0.23958681094244025,
      "grad_norm": 1.6036053895950317,
      "learning_rate": 9.583561210571748e-06,
      "loss": 0.042,
      "step": 146400
    },
    {
      "epoch": 0.2396195413810936,
      "grad_norm": 1.7499052286148071,
      "learning_rate": 9.58349531835823e-06,
      "loss": 0.0529,
      "step": 146420
    },
    {
      "epoch": 0.23965227181974694,
      "grad_norm": 1.6135166883468628,
      "learning_rate": 9.583429426144713e-06,
      "loss": 0.0637,
      "step": 146440
    },
    {
      "epoch": 0.23968500225840025,
      "grad_norm": 4.433717250823975,
      "learning_rate": 9.583363533931195e-06,
      "loss": 0.0663,
      "step": 146460
    },
    {
      "epoch": 0.2397177326970536,
      "grad_norm": 1.2399438619613647,
      "learning_rate": 9.583297641717679e-06,
      "loss": 0.0649,
      "step": 146480
    },
    {
      "epoch": 0.23975046313570694,
      "grad_norm": 4.788046360015869,
      "learning_rate": 9.583231749504162e-06,
      "loss": 0.0547,
      "step": 146500
    },
    {
      "epoch": 0.2397831935743603,
      "grad_norm": 1.0488712787628174,
      "learning_rate": 9.583165857290644e-06,
      "loss": 0.0641,
      "step": 146520
    },
    {
      "epoch": 0.23981592401301363,
      "grad_norm": 8.663869857788086,
      "learning_rate": 9.583099965077128e-06,
      "loss": 0.0445,
      "step": 146540
    },
    {
      "epoch": 0.23984865445166695,
      "grad_norm": 5.846449851989746,
      "learning_rate": 9.583034072863611e-06,
      "loss": 0.0667,
      "step": 146560
    },
    {
      "epoch": 0.2398813848903203,
      "grad_norm": 1.8586450815200806,
      "learning_rate": 9.582968180650093e-06,
      "loss": 0.0744,
      "step": 146580
    },
    {
      "epoch": 0.23991411532897364,
      "grad_norm": 3.8960070610046387,
      "learning_rate": 9.582902288436577e-06,
      "loss": 0.0694,
      "step": 146600
    },
    {
      "epoch": 0.23994684576762698,
      "grad_norm": 0.8669576644897461,
      "learning_rate": 9.582836396223059e-06,
      "loss": 0.0492,
      "step": 146620
    },
    {
      "epoch": 0.23997957620628033,
      "grad_norm": 2.2852206230163574,
      "learning_rate": 9.582770504009542e-06,
      "loss": 0.0437,
      "step": 146640
    },
    {
      "epoch": 0.24001230664493364,
      "grad_norm": 2.3174383640289307,
      "learning_rate": 9.582704611796026e-06,
      "loss": 0.0669,
      "step": 146660
    },
    {
      "epoch": 0.240045037083587,
      "grad_norm": 1.2782782316207886,
      "learning_rate": 9.582638719582508e-06,
      "loss": 0.059,
      "step": 146680
    },
    {
      "epoch": 0.24007776752224033,
      "grad_norm": 1.7492104768753052,
      "learning_rate": 9.582572827368991e-06,
      "loss": 0.0666,
      "step": 146700
    },
    {
      "epoch": 0.24011049796089368,
      "grad_norm": 2.322216272354126,
      "learning_rate": 9.582506935155473e-06,
      "loss": 0.0441,
      "step": 146720
    },
    {
      "epoch": 0.24014322839954702,
      "grad_norm": 0.7335741519927979,
      "learning_rate": 9.582441042941957e-06,
      "loss": 0.026,
      "step": 146740
    },
    {
      "epoch": 0.24017595883820034,
      "grad_norm": 12.586138725280762,
      "learning_rate": 9.582375150728439e-06,
      "loss": 0.0373,
      "step": 146760
    },
    {
      "epoch": 0.24020868927685368,
      "grad_norm": 1.4935556650161743,
      "learning_rate": 9.582309258514922e-06,
      "loss": 0.0554,
      "step": 146780
    },
    {
      "epoch": 0.24024141971550703,
      "grad_norm": 2.1107707023620605,
      "learning_rate": 9.582243366301404e-06,
      "loss": 0.0626,
      "step": 146800
    },
    {
      "epoch": 0.24027415015416037,
      "grad_norm": 1.8720260858535767,
      "learning_rate": 9.582177474087888e-06,
      "loss": 0.0518,
      "step": 146820
    },
    {
      "epoch": 0.24030688059281372,
      "grad_norm": 1.1607880592346191,
      "learning_rate": 9.58211158187437e-06,
      "loss": 0.0643,
      "step": 146840
    },
    {
      "epoch": 0.24033961103146703,
      "grad_norm": 2.852067232131958,
      "learning_rate": 9.582045689660853e-06,
      "loss": 0.0626,
      "step": 146860
    },
    {
      "epoch": 0.24037234147012038,
      "grad_norm": 2.3945155143737793,
      "learning_rate": 9.581979797447335e-06,
      "loss": 0.0563,
      "step": 146880
    },
    {
      "epoch": 0.24040507190877372,
      "grad_norm": 5.353707313537598,
      "learning_rate": 9.581913905233819e-06,
      "loss": 0.079,
      "step": 146900
    },
    {
      "epoch": 0.24043780234742707,
      "grad_norm": 5.2042460441589355,
      "learning_rate": 9.581848013020302e-06,
      "loss": 0.0614,
      "step": 146920
    },
    {
      "epoch": 0.2404705327860804,
      "grad_norm": 1.219384789466858,
      "learning_rate": 9.581782120806784e-06,
      "loss": 0.0518,
      "step": 146940
    },
    {
      "epoch": 0.24050326322473373,
      "grad_norm": 2.1603970527648926,
      "learning_rate": 9.581716228593268e-06,
      "loss": 0.0618,
      "step": 146960
    },
    {
      "epoch": 0.24053599366338707,
      "grad_norm": 1.3052630424499512,
      "learning_rate": 9.581650336379751e-06,
      "loss": 0.0448,
      "step": 146980
    },
    {
      "epoch": 0.24056872410204042,
      "grad_norm": 2.365460157394409,
      "learning_rate": 9.581584444166233e-06,
      "loss": 0.0706,
      "step": 147000
    },
    {
      "epoch": 0.24060145454069376,
      "grad_norm": 2.448561668395996,
      "learning_rate": 9.581518551952717e-06,
      "loss": 0.0678,
      "step": 147020
    },
    {
      "epoch": 0.2406341849793471,
      "grad_norm": 2.5171422958374023,
      "learning_rate": 9.5814526597392e-06,
      "loss": 0.0616,
      "step": 147040
    },
    {
      "epoch": 0.24066691541800042,
      "grad_norm": 1.3399696350097656,
      "learning_rate": 9.581386767525682e-06,
      "loss": 0.0697,
      "step": 147060
    },
    {
      "epoch": 0.24069964585665377,
      "grad_norm": 2.7277567386627197,
      "learning_rate": 9.581320875312166e-06,
      "loss": 0.0555,
      "step": 147080
    },
    {
      "epoch": 0.2407323762953071,
      "grad_norm": 24.27251434326172,
      "learning_rate": 9.581254983098648e-06,
      "loss": 0.0697,
      "step": 147100
    },
    {
      "epoch": 0.24076510673396045,
      "grad_norm": 2.9693713188171387,
      "learning_rate": 9.581189090885132e-06,
      "loss": 0.0465,
      "step": 147120
    },
    {
      "epoch": 0.2407978371726138,
      "grad_norm": 3.293104887008667,
      "learning_rate": 9.581123198671613e-06,
      "loss": 0.0555,
      "step": 147140
    },
    {
      "epoch": 0.24083056761126712,
      "grad_norm": 0.5681620240211487,
      "learning_rate": 9.581057306458097e-06,
      "loss": 0.0594,
      "step": 147160
    },
    {
      "epoch": 0.24086329804992046,
      "grad_norm": 2.714245319366455,
      "learning_rate": 9.580991414244579e-06,
      "loss": 0.0589,
      "step": 147180
    },
    {
      "epoch": 0.2408960284885738,
      "grad_norm": 2.6296913623809814,
      "learning_rate": 9.580925522031062e-06,
      "loss": 0.0623,
      "step": 147200
    },
    {
      "epoch": 0.24092875892722715,
      "grad_norm": 1.4932808876037598,
      "learning_rate": 9.580859629817544e-06,
      "loss": 0.0617,
      "step": 147220
    },
    {
      "epoch": 0.2409614893658805,
      "grad_norm": 1.4402416944503784,
      "learning_rate": 9.580793737604028e-06,
      "loss": 0.0377,
      "step": 147240
    },
    {
      "epoch": 0.2409942198045338,
      "grad_norm": 6.2487006187438965,
      "learning_rate": 9.58072784539051e-06,
      "loss": 0.0608,
      "step": 147260
    },
    {
      "epoch": 0.24102695024318715,
      "grad_norm": 3.004852771759033,
      "learning_rate": 9.580661953176993e-06,
      "loss": 0.0654,
      "step": 147280
    },
    {
      "epoch": 0.2410596806818405,
      "grad_norm": 1.5788573026657104,
      "learning_rate": 9.580596060963477e-06,
      "loss": 0.0583,
      "step": 147300
    },
    {
      "epoch": 0.24109241112049384,
      "grad_norm": 2.3198318481445312,
      "learning_rate": 9.580530168749959e-06,
      "loss": 0.0648,
      "step": 147320
    },
    {
      "epoch": 0.2411251415591472,
      "grad_norm": 1.3864331245422363,
      "learning_rate": 9.580464276536442e-06,
      "loss": 0.0731,
      "step": 147340
    },
    {
      "epoch": 0.2411578719978005,
      "grad_norm": 4.723691463470459,
      "learning_rate": 9.580398384322926e-06,
      "loss": 0.055,
      "step": 147360
    },
    {
      "epoch": 0.24119060243645385,
      "grad_norm": 4.484902858734131,
      "learning_rate": 9.580332492109408e-06,
      "loss": 0.051,
      "step": 147380
    },
    {
      "epoch": 0.2412233328751072,
      "grad_norm": 0.8652536869049072,
      "learning_rate": 9.580266599895892e-06,
      "loss": 0.0492,
      "step": 147400
    },
    {
      "epoch": 0.24125606331376054,
      "grad_norm": 0.881644606590271,
      "learning_rate": 9.580200707682375e-06,
      "loss": 0.0559,
      "step": 147420
    },
    {
      "epoch": 0.24128879375241388,
      "grad_norm": 4.713554382324219,
      "learning_rate": 9.580134815468857e-06,
      "loss": 0.0659,
      "step": 147440
    },
    {
      "epoch": 0.2413215241910672,
      "grad_norm": 2.7606894969940186,
      "learning_rate": 9.58006892325534e-06,
      "loss": 0.0582,
      "step": 147460
    },
    {
      "epoch": 0.24135425462972054,
      "grad_norm": 3.9527082443237305,
      "learning_rate": 9.580003031041823e-06,
      "loss": 0.058,
      "step": 147480
    },
    {
      "epoch": 0.2413869850683739,
      "grad_norm": 1.26620614528656,
      "learning_rate": 9.579937138828306e-06,
      "loss": 0.0483,
      "step": 147500
    },
    {
      "epoch": 0.24141971550702723,
      "grad_norm": 1.429495930671692,
      "learning_rate": 9.579871246614788e-06,
      "loss": 0.0531,
      "step": 147520
    },
    {
      "epoch": 0.24145244594568058,
      "grad_norm": 0.5880086421966553,
      "learning_rate": 9.579805354401272e-06,
      "loss": 0.0522,
      "step": 147540
    },
    {
      "epoch": 0.2414851763843339,
      "grad_norm": 3.0962486267089844,
      "learning_rate": 9.579739462187753e-06,
      "loss": 0.0551,
      "step": 147560
    },
    {
      "epoch": 0.24151790682298724,
      "grad_norm": 1.931931495666504,
      "learning_rate": 9.579673569974237e-06,
      "loss": 0.0621,
      "step": 147580
    },
    {
      "epoch": 0.24155063726164058,
      "grad_norm": 2.534235954284668,
      "learning_rate": 9.579607677760719e-06,
      "loss": 0.0642,
      "step": 147600
    },
    {
      "epoch": 0.24158336770029393,
      "grad_norm": 2.1672511100769043,
      "learning_rate": 9.579541785547203e-06,
      "loss": 0.051,
      "step": 147620
    },
    {
      "epoch": 0.24161609813894727,
      "grad_norm": 1.0851852893829346,
      "learning_rate": 9.579475893333684e-06,
      "loss": 0.061,
      "step": 147640
    },
    {
      "epoch": 0.2416488285776006,
      "grad_norm": 3.613372325897217,
      "learning_rate": 9.579410001120168e-06,
      "loss": 0.0579,
      "step": 147660
    },
    {
      "epoch": 0.24168155901625393,
      "grad_norm": 4.207603931427002,
      "learning_rate": 9.579344108906652e-06,
      "loss": 0.06,
      "step": 147680
    },
    {
      "epoch": 0.24171428945490728,
      "grad_norm": 1.3144370317459106,
      "learning_rate": 9.579278216693134e-06,
      "loss": 0.0569,
      "step": 147700
    },
    {
      "epoch": 0.24174701989356062,
      "grad_norm": 5.069886207580566,
      "learning_rate": 9.579212324479617e-06,
      "loss": 0.0689,
      "step": 147720
    },
    {
      "epoch": 0.24177975033221394,
      "grad_norm": 1.793842077255249,
      "learning_rate": 9.5791464322661e-06,
      "loss": 0.0704,
      "step": 147740
    },
    {
      "epoch": 0.24181248077086728,
      "grad_norm": 1.5325886011123657,
      "learning_rate": 9.579080540052583e-06,
      "loss": 0.0544,
      "step": 147760
    },
    {
      "epoch": 0.24184521120952063,
      "grad_norm": 2.2219526767730713,
      "learning_rate": 9.579014647839066e-06,
      "loss": 0.0601,
      "step": 147780
    },
    {
      "epoch": 0.24187794164817397,
      "grad_norm": 1.535891056060791,
      "learning_rate": 9.57894875562555e-06,
      "loss": 0.059,
      "step": 147800
    },
    {
      "epoch": 0.24191067208682732,
      "grad_norm": 3.5667037963867188,
      "learning_rate": 9.578882863412032e-06,
      "loss": 0.0644,
      "step": 147820
    },
    {
      "epoch": 0.24194340252548063,
      "grad_norm": 4.029683589935303,
      "learning_rate": 9.578816971198515e-06,
      "loss": 0.0674,
      "step": 147840
    },
    {
      "epoch": 0.24197613296413398,
      "grad_norm": 2.3495564460754395,
      "learning_rate": 9.578751078984997e-06,
      "loss": 0.0593,
      "step": 147860
    },
    {
      "epoch": 0.24200886340278732,
      "grad_norm": 5.404991626739502,
      "learning_rate": 9.57868518677148e-06,
      "loss": 0.0603,
      "step": 147880
    },
    {
      "epoch": 0.24204159384144067,
      "grad_norm": 2.8625032901763916,
      "learning_rate": 9.578619294557963e-06,
      "loss": 0.0544,
      "step": 147900
    },
    {
      "epoch": 0.242074324280094,
      "grad_norm": 4.919280052185059,
      "learning_rate": 9.578553402344446e-06,
      "loss": 0.0665,
      "step": 147920
    },
    {
      "epoch": 0.24210705471874733,
      "grad_norm": 2.3889858722686768,
      "learning_rate": 9.578487510130928e-06,
      "loss": 0.0609,
      "step": 147940
    },
    {
      "epoch": 0.24213978515740067,
      "grad_norm": 0.7572718262672424,
      "learning_rate": 9.578421617917412e-06,
      "loss": 0.0479,
      "step": 147960
    },
    {
      "epoch": 0.24217251559605402,
      "grad_norm": 1.0319490432739258,
      "learning_rate": 9.578355725703894e-06,
      "loss": 0.0472,
      "step": 147980
    },
    {
      "epoch": 0.24220524603470736,
      "grad_norm": 2.618403196334839,
      "learning_rate": 9.578289833490377e-06,
      "loss": 0.0551,
      "step": 148000
    },
    {
      "epoch": 0.2422379764733607,
      "grad_norm": 0.6443394422531128,
      "learning_rate": 9.578223941276859e-06,
      "loss": 0.0539,
      "step": 148020
    },
    {
      "epoch": 0.24227070691201402,
      "grad_norm": 2.1016855239868164,
      "learning_rate": 9.578158049063343e-06,
      "loss": 0.058,
      "step": 148040
    },
    {
      "epoch": 0.24230343735066737,
      "grad_norm": 2.6404120922088623,
      "learning_rate": 9.578092156849825e-06,
      "loss": 0.0605,
      "step": 148060
    },
    {
      "epoch": 0.2423361677893207,
      "grad_norm": 1.8217110633850098,
      "learning_rate": 9.578026264636308e-06,
      "loss": 0.07,
      "step": 148080
    },
    {
      "epoch": 0.24236889822797406,
      "grad_norm": 7.289804935455322,
      "learning_rate": 9.577960372422792e-06,
      "loss": 0.0496,
      "step": 148100
    },
    {
      "epoch": 0.2424016286666274,
      "grad_norm": 1.9148783683776855,
      "learning_rate": 9.577894480209274e-06,
      "loss": 0.0726,
      "step": 148120
    },
    {
      "epoch": 0.24243435910528072,
      "grad_norm": 3.0868234634399414,
      "learning_rate": 9.577828587995757e-06,
      "loss": 0.0621,
      "step": 148140
    },
    {
      "epoch": 0.24246708954393406,
      "grad_norm": 5.525757789611816,
      "learning_rate": 9.57776269578224e-06,
      "loss": 0.0732,
      "step": 148160
    },
    {
      "epoch": 0.2424998199825874,
      "grad_norm": 2.7754268646240234,
      "learning_rate": 9.577696803568723e-06,
      "loss": 0.0648,
      "step": 148180
    },
    {
      "epoch": 0.24253255042124075,
      "grad_norm": 2.0713155269622803,
      "learning_rate": 9.577630911355206e-06,
      "loss": 0.059,
      "step": 148200
    },
    {
      "epoch": 0.2425652808598941,
      "grad_norm": 2.60971999168396,
      "learning_rate": 9.57756501914169e-06,
      "loss": 0.0566,
      "step": 148220
    },
    {
      "epoch": 0.2425980112985474,
      "grad_norm": 2.5628843307495117,
      "learning_rate": 9.577499126928172e-06,
      "loss": 0.0592,
      "step": 148240
    },
    {
      "epoch": 0.24263074173720076,
      "grad_norm": 3.2199418544769287,
      "learning_rate": 9.577433234714655e-06,
      "loss": 0.0589,
      "step": 148260
    },
    {
      "epoch": 0.2426634721758541,
      "grad_norm": 0.49440571665763855,
      "learning_rate": 9.577367342501137e-06,
      "loss": 0.0704,
      "step": 148280
    },
    {
      "epoch": 0.24269620261450744,
      "grad_norm": 1.1275889873504639,
      "learning_rate": 9.57730145028762e-06,
      "loss": 0.0671,
      "step": 148300
    },
    {
      "epoch": 0.2427289330531608,
      "grad_norm": 3.664259195327759,
      "learning_rate": 9.577235558074103e-06,
      "loss": 0.0709,
      "step": 148320
    },
    {
      "epoch": 0.2427616634918141,
      "grad_norm": 2.4275400638580322,
      "learning_rate": 9.577169665860586e-06,
      "loss": 0.0594,
      "step": 148340
    },
    {
      "epoch": 0.24279439393046745,
      "grad_norm": 2.003761053085327,
      "learning_rate": 9.577103773647068e-06,
      "loss": 0.0612,
      "step": 148360
    },
    {
      "epoch": 0.2428271243691208,
      "grad_norm": 8.06137466430664,
      "learning_rate": 9.577037881433552e-06,
      "loss": 0.0549,
      "step": 148380
    },
    {
      "epoch": 0.24285985480777414,
      "grad_norm": 1.377731204032898,
      "learning_rate": 9.576971989220034e-06,
      "loss": 0.0549,
      "step": 148400
    },
    {
      "epoch": 0.24289258524642748,
      "grad_norm": 2.7696611881256104,
      "learning_rate": 9.576906097006517e-06,
      "loss": 0.05,
      "step": 148420
    },
    {
      "epoch": 0.2429253156850808,
      "grad_norm": 1.0904536247253418,
      "learning_rate": 9.576840204793e-06,
      "loss": 0.0577,
      "step": 148440
    },
    {
      "epoch": 0.24295804612373414,
      "grad_norm": 1.4931001663208008,
      "learning_rate": 9.576774312579483e-06,
      "loss": 0.0501,
      "step": 148460
    },
    {
      "epoch": 0.2429907765623875,
      "grad_norm": 1.4070758819580078,
      "learning_rate": 9.576708420365966e-06,
      "loss": 0.047,
      "step": 148480
    },
    {
      "epoch": 0.24302350700104083,
      "grad_norm": 1.8749979734420776,
      "learning_rate": 9.576642528152448e-06,
      "loss": 0.0712,
      "step": 148500
    },
    {
      "epoch": 0.24305623743969418,
      "grad_norm": 4.740206241607666,
      "learning_rate": 9.576576635938932e-06,
      "loss": 0.0404,
      "step": 148520
    },
    {
      "epoch": 0.2430889678783475,
      "grad_norm": 1.417576789855957,
      "learning_rate": 9.576510743725415e-06,
      "loss": 0.0523,
      "step": 148540
    },
    {
      "epoch": 0.24312169831700084,
      "grad_norm": 2.658825159072876,
      "learning_rate": 9.576444851511897e-06,
      "loss": 0.0587,
      "step": 148560
    },
    {
      "epoch": 0.24315442875565418,
      "grad_norm": 1.2434542179107666,
      "learning_rate": 9.57637895929838e-06,
      "loss": 0.0548,
      "step": 148580
    },
    {
      "epoch": 0.24318715919430753,
      "grad_norm": 5.453131675720215,
      "learning_rate": 9.576313067084864e-06,
      "loss": 0.0638,
      "step": 148600
    },
    {
      "epoch": 0.24321988963296087,
      "grad_norm": 2.3741142749786377,
      "learning_rate": 9.576247174871346e-06,
      "loss": 0.0725,
      "step": 148620
    },
    {
      "epoch": 0.2432526200716142,
      "grad_norm": 3.052722215652466,
      "learning_rate": 9.57618128265783e-06,
      "loss": 0.0657,
      "step": 148640
    },
    {
      "epoch": 0.24328535051026753,
      "grad_norm": 3.592702627182007,
      "learning_rate": 9.576115390444312e-06,
      "loss": 0.0453,
      "step": 148660
    },
    {
      "epoch": 0.24331808094892088,
      "grad_norm": 1.9576443433761597,
      "learning_rate": 9.576049498230795e-06,
      "loss": 0.0757,
      "step": 148680
    },
    {
      "epoch": 0.24335081138757422,
      "grad_norm": 0.8467056751251221,
      "learning_rate": 9.575983606017277e-06,
      "loss": 0.0452,
      "step": 148700
    },
    {
      "epoch": 0.24338354182622757,
      "grad_norm": 0.3732950985431671,
      "learning_rate": 9.575917713803761e-06,
      "loss": 0.0556,
      "step": 148720
    },
    {
      "epoch": 0.24341627226488088,
      "grad_norm": 7.38003396987915,
      "learning_rate": 9.575851821590243e-06,
      "loss": 0.0729,
      "step": 148740
    },
    {
      "epoch": 0.24344900270353423,
      "grad_norm": 1.5588734149932861,
      "learning_rate": 9.575785929376726e-06,
      "loss": 0.064,
      "step": 148760
    },
    {
      "epoch": 0.24348173314218757,
      "grad_norm": 0.8420317769050598,
      "learning_rate": 9.57572003716321e-06,
      "loss": 0.0543,
      "step": 148780
    },
    {
      "epoch": 0.24351446358084092,
      "grad_norm": 1.8725612163543701,
      "learning_rate": 9.575654144949692e-06,
      "loss": 0.0414,
      "step": 148800
    },
    {
      "epoch": 0.24354719401949426,
      "grad_norm": 4.387650489807129,
      "learning_rate": 9.575588252736175e-06,
      "loss": 0.0699,
      "step": 148820
    },
    {
      "epoch": 0.24357992445814758,
      "grad_norm": 0.9012681245803833,
      "learning_rate": 9.575522360522657e-06,
      "loss": 0.0565,
      "step": 148840
    },
    {
      "epoch": 0.24361265489680092,
      "grad_norm": 2.142094612121582,
      "learning_rate": 9.575456468309141e-06,
      "loss": 0.0681,
      "step": 148860
    },
    {
      "epoch": 0.24364538533545427,
      "grad_norm": 4.486361980438232,
      "learning_rate": 9.575390576095623e-06,
      "loss": 0.0656,
      "step": 148880
    },
    {
      "epoch": 0.2436781157741076,
      "grad_norm": 1.4107624292373657,
      "learning_rate": 9.575324683882106e-06,
      "loss": 0.055,
      "step": 148900
    },
    {
      "epoch": 0.24371084621276096,
      "grad_norm": 2.204771041870117,
      "learning_rate": 9.575258791668588e-06,
      "loss": 0.0471,
      "step": 148920
    },
    {
      "epoch": 0.24374357665141427,
      "grad_norm": 1.4249731302261353,
      "learning_rate": 9.575192899455072e-06,
      "loss": 0.0568,
      "step": 148940
    },
    {
      "epoch": 0.24377630709006762,
      "grad_norm": 1.829007863998413,
      "learning_rate": 9.575127007241555e-06,
      "loss": 0.0592,
      "step": 148960
    },
    {
      "epoch": 0.24380903752872096,
      "grad_norm": 4.348568439483643,
      "learning_rate": 9.575061115028037e-06,
      "loss": 0.0531,
      "step": 148980
    },
    {
      "epoch": 0.2438417679673743,
      "grad_norm": 1.376156210899353,
      "learning_rate": 9.574995222814521e-06,
      "loss": 0.0665,
      "step": 149000
    },
    {
      "epoch": 0.24387449840602765,
      "grad_norm": 0.6539850831031799,
      "learning_rate": 9.574929330601004e-06,
      "loss": 0.0559,
      "step": 149020
    },
    {
      "epoch": 0.24390722884468097,
      "grad_norm": 0.3180088400840759,
      "learning_rate": 9.574863438387486e-06,
      "loss": 0.0535,
      "step": 149040
    },
    {
      "epoch": 0.2439399592833343,
      "grad_norm": 0.5782182216644287,
      "learning_rate": 9.57479754617397e-06,
      "loss": 0.0593,
      "step": 149060
    },
    {
      "epoch": 0.24397268972198766,
      "grad_norm": 1.0838885307312012,
      "learning_rate": 9.574731653960452e-06,
      "loss": 0.0639,
      "step": 149080
    },
    {
      "epoch": 0.244005420160641,
      "grad_norm": 3.349032163619995,
      "learning_rate": 9.574665761746935e-06,
      "loss": 0.0562,
      "step": 149100
    },
    {
      "epoch": 0.24403815059929435,
      "grad_norm": 2.5502798557281494,
      "learning_rate": 9.574599869533417e-06,
      "loss": 0.0614,
      "step": 149120
    },
    {
      "epoch": 0.24407088103794766,
      "grad_norm": 0.6894341111183167,
      "learning_rate": 9.574533977319901e-06,
      "loss": 0.0573,
      "step": 149140
    },
    {
      "epoch": 0.244103611476601,
      "grad_norm": 4.092086315155029,
      "learning_rate": 9.574468085106385e-06,
      "loss": 0.0437,
      "step": 149160
    },
    {
      "epoch": 0.24413634191525435,
      "grad_norm": 5.726505279541016,
      "learning_rate": 9.574402192892866e-06,
      "loss": 0.0571,
      "step": 149180
    },
    {
      "epoch": 0.2441690723539077,
      "grad_norm": 2.4481968879699707,
      "learning_rate": 9.57433630067935e-06,
      "loss": 0.0705,
      "step": 149200
    },
    {
      "epoch": 0.244201802792561,
      "grad_norm": 1.5048097372055054,
      "learning_rate": 9.574270408465832e-06,
      "loss": 0.0598,
      "step": 149220
    },
    {
      "epoch": 0.24423453323121436,
      "grad_norm": 1.9720438718795776,
      "learning_rate": 9.574204516252315e-06,
      "loss": 0.0571,
      "step": 149240
    },
    {
      "epoch": 0.2442672636698677,
      "grad_norm": 2.1755425930023193,
      "learning_rate": 9.574138624038797e-06,
      "loss": 0.0562,
      "step": 149260
    },
    {
      "epoch": 0.24429999410852105,
      "grad_norm": 1.0708973407745361,
      "learning_rate": 9.574072731825281e-06,
      "loss": 0.0586,
      "step": 149280
    },
    {
      "epoch": 0.2443327245471744,
      "grad_norm": 1.3123915195465088,
      "learning_rate": 9.574006839611763e-06,
      "loss": 0.0692,
      "step": 149300
    },
    {
      "epoch": 0.2443654549858277,
      "grad_norm": 2.657074213027954,
      "learning_rate": 9.573940947398246e-06,
      "loss": 0.0597,
      "step": 149320
    },
    {
      "epoch": 0.24439818542448105,
      "grad_norm": 2.4937877655029297,
      "learning_rate": 9.57387505518473e-06,
      "loss": 0.0488,
      "step": 149340
    },
    {
      "epoch": 0.2444309158631344,
      "grad_norm": 0.9897172451019287,
      "learning_rate": 9.573809162971212e-06,
      "loss": 0.0645,
      "step": 149360
    },
    {
      "epoch": 0.24446364630178774,
      "grad_norm": 2.934964179992676,
      "learning_rate": 9.573743270757696e-06,
      "loss": 0.0505,
      "step": 149380
    },
    {
      "epoch": 0.24449637674044108,
      "grad_norm": 3.2548882961273193,
      "learning_rate": 9.573677378544179e-06,
      "loss": 0.0485,
      "step": 149400
    },
    {
      "epoch": 0.2445291071790944,
      "grad_norm": 1.7605693340301514,
      "learning_rate": 9.573611486330661e-06,
      "loss": 0.0451,
      "step": 149420
    },
    {
      "epoch": 0.24456183761774775,
      "grad_norm": 1.3718929290771484,
      "learning_rate": 9.573545594117145e-06,
      "loss": 0.0558,
      "step": 149440
    },
    {
      "epoch": 0.2445945680564011,
      "grad_norm": 1.9557667970657349,
      "learning_rate": 9.573479701903626e-06,
      "loss": 0.0527,
      "step": 149460
    },
    {
      "epoch": 0.24462729849505443,
      "grad_norm": 3.617199420928955,
      "learning_rate": 9.57341380969011e-06,
      "loss": 0.0584,
      "step": 149480
    },
    {
      "epoch": 0.24466002893370778,
      "grad_norm": 6.004734039306641,
      "learning_rate": 9.573347917476594e-06,
      "loss": 0.065,
      "step": 149500
    },
    {
      "epoch": 0.2446927593723611,
      "grad_norm": 1.9666860103607178,
      "learning_rate": 9.573282025263076e-06,
      "loss": 0.0533,
      "step": 149520
    },
    {
      "epoch": 0.24472548981101444,
      "grad_norm": 1.00665283203125,
      "learning_rate": 9.573216133049559e-06,
      "loss": 0.04,
      "step": 149540
    },
    {
      "epoch": 0.24475822024966779,
      "grad_norm": 0.7927422523498535,
      "learning_rate": 9.573150240836041e-06,
      "loss": 0.0571,
      "step": 149560
    },
    {
      "epoch": 0.24479095068832113,
      "grad_norm": 2.062682867050171,
      "learning_rate": 9.573084348622525e-06,
      "loss": 0.0486,
      "step": 149580
    },
    {
      "epoch": 0.24482368112697447,
      "grad_norm": 2.7354559898376465,
      "learning_rate": 9.573018456409006e-06,
      "loss": 0.059,
      "step": 149600
    },
    {
      "epoch": 0.2448564115656278,
      "grad_norm": 3.8332035541534424,
      "learning_rate": 9.57295256419549e-06,
      "loss": 0.046,
      "step": 149620
    },
    {
      "epoch": 0.24488914200428114,
      "grad_norm": 1.6022849082946777,
      "learning_rate": 9.572886671981972e-06,
      "loss": 0.0572,
      "step": 149640
    },
    {
      "epoch": 0.24492187244293448,
      "grad_norm": 0.6142081618309021,
      "learning_rate": 9.572820779768456e-06,
      "loss": 0.0449,
      "step": 149660
    },
    {
      "epoch": 0.24495460288158782,
      "grad_norm": 0.677276074886322,
      "learning_rate": 9.572754887554937e-06,
      "loss": 0.0618,
      "step": 149680
    },
    {
      "epoch": 0.24498733332024117,
      "grad_norm": 1.9472593069076538,
      "learning_rate": 9.572688995341421e-06,
      "loss": 0.0443,
      "step": 149700
    },
    {
      "epoch": 0.24502006375889449,
      "grad_norm": 2.354022741317749,
      "learning_rate": 9.572623103127903e-06,
      "loss": 0.0544,
      "step": 149720
    },
    {
      "epoch": 0.24505279419754783,
      "grad_norm": 4.052523136138916,
      "learning_rate": 9.572557210914387e-06,
      "loss": 0.0618,
      "step": 149740
    },
    {
      "epoch": 0.24508552463620117,
      "grad_norm": 1.3093037605285645,
      "learning_rate": 9.57249131870087e-06,
      "loss": 0.0755,
      "step": 149760
    },
    {
      "epoch": 0.24511825507485452,
      "grad_norm": 2.0938196182250977,
      "learning_rate": 9.572425426487354e-06,
      "loss": 0.0684,
      "step": 149780
    },
    {
      "epoch": 0.24515098551350786,
      "grad_norm": 5.678918838500977,
      "learning_rate": 9.572359534273836e-06,
      "loss": 0.0553,
      "step": 149800
    },
    {
      "epoch": 0.24518371595216118,
      "grad_norm": 0.5165150165557861,
      "learning_rate": 9.57229364206032e-06,
      "loss": 0.059,
      "step": 149820
    },
    {
      "epoch": 0.24521644639081452,
      "grad_norm": 2.2469887733459473,
      "learning_rate": 9.572227749846803e-06,
      "loss": 0.0537,
      "step": 149840
    },
    {
      "epoch": 0.24524917682946787,
      "grad_norm": 1.9611032009124756,
      "learning_rate": 9.572161857633285e-06,
      "loss": 0.0606,
      "step": 149860
    },
    {
      "epoch": 0.2452819072681212,
      "grad_norm": 1.829487919807434,
      "learning_rate": 9.572095965419768e-06,
      "loss": 0.0485,
      "step": 149880
    },
    {
      "epoch": 0.24531463770677456,
      "grad_norm": 2.9105639457702637,
      "learning_rate": 9.57203007320625e-06,
      "loss": 0.0593,
      "step": 149900
    },
    {
      "epoch": 0.24534736814542787,
      "grad_norm": 2.6882483959198,
      "learning_rate": 9.571964180992734e-06,
      "loss": 0.072,
      "step": 149920
    },
    {
      "epoch": 0.24538009858408122,
      "grad_norm": 0.857874870300293,
      "learning_rate": 9.571898288779216e-06,
      "loss": 0.0715,
      "step": 149940
    },
    {
      "epoch": 0.24541282902273456,
      "grad_norm": 4.625515460968018,
      "learning_rate": 9.5718323965657e-06,
      "loss": 0.0555,
      "step": 149960
    },
    {
      "epoch": 0.2454455594613879,
      "grad_norm": 2.436708927154541,
      "learning_rate": 9.571766504352181e-06,
      "loss": 0.0481,
      "step": 149980
    },
    {
      "epoch": 0.24547828990004125,
      "grad_norm": 1.6639631986618042,
      "learning_rate": 9.571700612138665e-06,
      "loss": 0.0687,
      "step": 150000
    },
    {
      "epoch": 0.24547828990004125,
      "eval_loss": 0.0285944826900959,
      "eval_runtime": 6511.2739,
      "eval_samples_per_second": 157.858,
      "eval_steps_per_second": 15.786,
      "eval_sts-dev_pearson_cosine": 0.9296965936517437,
      "eval_sts-dev_spearman_cosine": 0.8622713725553459,
      "step": 150000
    },
    {
      "epoch": 0.24551102033869457,
      "grad_norm": 1.0811253786087036,
      "learning_rate": 9.571634719925147e-06,
      "loss": 0.0576,
      "step": 150020
    },
    {
      "epoch": 0.2455437507773479,
      "grad_norm": 1.2061856985092163,
      "learning_rate": 9.57156882771163e-06,
      "loss": 0.0804,
      "step": 150040
    },
    {
      "epoch": 0.24557648121600126,
      "grad_norm": 1.225507378578186,
      "learning_rate": 9.571502935498112e-06,
      "loss": 0.0671,
      "step": 150060
    },
    {
      "epoch": 0.2456092116546546,
      "grad_norm": 2.2694051265716553,
      "learning_rate": 9.571437043284596e-06,
      "loss": 0.0514,
      "step": 150080
    },
    {
      "epoch": 0.24564194209330795,
      "grad_norm": 1.1258794069290161,
      "learning_rate": 9.571371151071078e-06,
      "loss": 0.043,
      "step": 150100
    },
    {
      "epoch": 0.24567467253196126,
      "grad_norm": 2.502565860748291,
      "learning_rate": 9.571305258857561e-06,
      "loss": 0.0563,
      "step": 150120
    },
    {
      "epoch": 0.2457074029706146,
      "grad_norm": 9.037985801696777,
      "learning_rate": 9.571239366644045e-06,
      "loss": 0.0672,
      "step": 150140
    },
    {
      "epoch": 0.24574013340926795,
      "grad_norm": 2.9204163551330566,
      "learning_rate": 9.571173474430527e-06,
      "loss": 0.0568,
      "step": 150160
    },
    {
      "epoch": 0.2457728638479213,
      "grad_norm": 1.4918324947357178,
      "learning_rate": 9.57110758221701e-06,
      "loss": 0.0479,
      "step": 150180
    },
    {
      "epoch": 0.24580559428657464,
      "grad_norm": 4.791697978973389,
      "learning_rate": 9.571041690003494e-06,
      "loss": 0.0472,
      "step": 150200
    },
    {
      "epoch": 0.24583832472522796,
      "grad_norm": 4.25651216506958,
      "learning_rate": 9.570975797789976e-06,
      "loss": 0.0648,
      "step": 150220
    },
    {
      "epoch": 0.2458710551638813,
      "grad_norm": 3.611889600753784,
      "learning_rate": 9.57090990557646e-06,
      "loss": 0.0678,
      "step": 150240
    },
    {
      "epoch": 0.24590378560253465,
      "grad_norm": 3.3762261867523193,
      "learning_rate": 9.570844013362943e-06,
      "loss": 0.0622,
      "step": 150260
    },
    {
      "epoch": 0.245936516041188,
      "grad_norm": 2.097822904586792,
      "learning_rate": 9.570778121149425e-06,
      "loss": 0.0623,
      "step": 150280
    },
    {
      "epoch": 0.24596924647984134,
      "grad_norm": 1.6854090690612793,
      "learning_rate": 9.570712228935908e-06,
      "loss": 0.0591,
      "step": 150300
    },
    {
      "epoch": 0.24600197691849465,
      "grad_norm": 10.064262390136719,
      "learning_rate": 9.57064633672239e-06,
      "loss": 0.0531,
      "step": 150320
    },
    {
      "epoch": 0.246034707357148,
      "grad_norm": 2.1680071353912354,
      "learning_rate": 9.570580444508874e-06,
      "loss": 0.0645,
      "step": 150340
    },
    {
      "epoch": 0.24606743779580134,
      "grad_norm": 3.0262935161590576,
      "learning_rate": 9.570514552295356e-06,
      "loss": 0.0495,
      "step": 150360
    },
    {
      "epoch": 0.24610016823445469,
      "grad_norm": 2.1186349391937256,
      "learning_rate": 9.57044866008184e-06,
      "loss": 0.0641,
      "step": 150380
    },
    {
      "epoch": 0.24613289867310803,
      "grad_norm": 1.7441414594650269,
      "learning_rate": 9.570382767868321e-06,
      "loss": 0.0597,
      "step": 150400
    },
    {
      "epoch": 0.24616562911176135,
      "grad_norm": 31.091989517211914,
      "learning_rate": 9.570316875654805e-06,
      "loss": 0.0547,
      "step": 150420
    },
    {
      "epoch": 0.2461983595504147,
      "grad_norm": 0.950309693813324,
      "learning_rate": 9.570250983441287e-06,
      "loss": 0.0629,
      "step": 150440
    },
    {
      "epoch": 0.24623108998906804,
      "grad_norm": 0.6760870218276978,
      "learning_rate": 9.57018509122777e-06,
      "loss": 0.0399,
      "step": 150460
    },
    {
      "epoch": 0.24626382042772138,
      "grad_norm": 1.6116855144500732,
      "learning_rate": 9.570119199014252e-06,
      "loss": 0.0429,
      "step": 150480
    },
    {
      "epoch": 0.24629655086637472,
      "grad_norm": 3.0058093070983887,
      "learning_rate": 9.570053306800736e-06,
      "loss": 0.0632,
      "step": 150500
    },
    {
      "epoch": 0.24632928130502804,
      "grad_norm": 2.8298404216766357,
      "learning_rate": 9.56998741458722e-06,
      "loss": 0.0689,
      "step": 150520
    },
    {
      "epoch": 0.24636201174368139,
      "grad_norm": 3.1733896732330322,
      "learning_rate": 9.569921522373701e-06,
      "loss": 0.0449,
      "step": 150540
    },
    {
      "epoch": 0.24639474218233473,
      "grad_norm": 16.969280242919922,
      "learning_rate": 9.569855630160185e-06,
      "loss": 0.0634,
      "step": 150560
    },
    {
      "epoch": 0.24642747262098808,
      "grad_norm": 2.437854051589966,
      "learning_rate": 9.569789737946668e-06,
      "loss": 0.0589,
      "step": 150580
    },
    {
      "epoch": 0.2464602030596414,
      "grad_norm": 1.9777209758758545,
      "learning_rate": 9.56972384573315e-06,
      "loss": 0.0497,
      "step": 150600
    },
    {
      "epoch": 0.24649293349829474,
      "grad_norm": 2.023977756500244,
      "learning_rate": 9.569657953519634e-06,
      "loss": 0.0548,
      "step": 150620
    },
    {
      "epoch": 0.24652566393694808,
      "grad_norm": 2.390442132949829,
      "learning_rate": 9.569592061306117e-06,
      "loss": 0.0625,
      "step": 150640
    },
    {
      "epoch": 0.24655839437560143,
      "grad_norm": 1.0964728593826294,
      "learning_rate": 9.5695261690926e-06,
      "loss": 0.04,
      "step": 150660
    },
    {
      "epoch": 0.24659112481425477,
      "grad_norm": 3.1821577548980713,
      "learning_rate": 9.569460276879083e-06,
      "loss": 0.0653,
      "step": 150680
    },
    {
      "epoch": 0.2466238552529081,
      "grad_norm": 1.2176297903060913,
      "learning_rate": 9.569394384665565e-06,
      "loss": 0.0556,
      "step": 150700
    },
    {
      "epoch": 0.24665658569156143,
      "grad_norm": 0.5024120807647705,
      "learning_rate": 9.569328492452048e-06,
      "loss": 0.0426,
      "step": 150720
    },
    {
      "epoch": 0.24668931613021478,
      "grad_norm": 3.0153515338897705,
      "learning_rate": 9.56926260023853e-06,
      "loss": 0.0545,
      "step": 150740
    },
    {
      "epoch": 0.24672204656886812,
      "grad_norm": 1.4478169679641724,
      "learning_rate": 9.569196708025014e-06,
      "loss": 0.0691,
      "step": 150760
    },
    {
      "epoch": 0.24675477700752146,
      "grad_norm": 0.8884761929512024,
      "learning_rate": 9.569130815811496e-06,
      "loss": 0.0559,
      "step": 150780
    },
    {
      "epoch": 0.24678750744617478,
      "grad_norm": 1.2707637548446655,
      "learning_rate": 9.56906492359798e-06,
      "loss": 0.0522,
      "step": 150800
    },
    {
      "epoch": 0.24682023788482813,
      "grad_norm": 0.7489868402481079,
      "learning_rate": 9.568999031384461e-06,
      "loss": 0.0675,
      "step": 150820
    },
    {
      "epoch": 0.24685296832348147,
      "grad_norm": 0.73211270570755,
      "learning_rate": 9.568933139170945e-06,
      "loss": 0.048,
      "step": 150840
    },
    {
      "epoch": 0.24688569876213481,
      "grad_norm": 1.5933163166046143,
      "learning_rate": 9.568867246957427e-06,
      "loss": 0.0457,
      "step": 150860
    },
    {
      "epoch": 0.24691842920078816,
      "grad_norm": 1.1328843832015991,
      "learning_rate": 9.56880135474391e-06,
      "loss": 0.0601,
      "step": 150880
    },
    {
      "epoch": 0.24695115963944148,
      "grad_norm": 2.352201223373413,
      "learning_rate": 9.568735462530394e-06,
      "loss": 0.0644,
      "step": 150900
    },
    {
      "epoch": 0.24698389007809482,
      "grad_norm": 0.6820464730262756,
      "learning_rate": 9.568669570316876e-06,
      "loss": 0.0557,
      "step": 150920
    },
    {
      "epoch": 0.24701662051674816,
      "grad_norm": 1.850962519645691,
      "learning_rate": 9.56860367810336e-06,
      "loss": 0.0615,
      "step": 150940
    },
    {
      "epoch": 0.2470493509554015,
      "grad_norm": 1.13493013381958,
      "learning_rate": 9.568537785889841e-06,
      "loss": 0.0605,
      "step": 150960
    },
    {
      "epoch": 0.24708208139405485,
      "grad_norm": 1.4442332983016968,
      "learning_rate": 9.568471893676325e-06,
      "loss": 0.0619,
      "step": 150980
    },
    {
      "epoch": 0.24711481183270817,
      "grad_norm": 3.868992805480957,
      "learning_rate": 9.568406001462808e-06,
      "loss": 0.0604,
      "step": 151000
    },
    {
      "epoch": 0.24714754227136151,
      "grad_norm": 5.921952247619629,
      "learning_rate": 9.56834010924929e-06,
      "loss": 0.0542,
      "step": 151020
    },
    {
      "epoch": 0.24718027271001486,
      "grad_norm": 0.5800724029541016,
      "learning_rate": 9.568274217035774e-06,
      "loss": 0.0525,
      "step": 151040
    },
    {
      "epoch": 0.2472130031486682,
      "grad_norm": 2.8485536575317383,
      "learning_rate": 9.568208324822258e-06,
      "loss": 0.0518,
      "step": 151060
    },
    {
      "epoch": 0.24724573358732155,
      "grad_norm": 3.882427453994751,
      "learning_rate": 9.56814243260874e-06,
      "loss": 0.0765,
      "step": 151080
    },
    {
      "epoch": 0.24727846402597486,
      "grad_norm": 1.470487117767334,
      "learning_rate": 9.568076540395223e-06,
      "loss": 0.0614,
      "step": 151100
    },
    {
      "epoch": 0.2473111944646282,
      "grad_norm": 1.9374217987060547,
      "learning_rate": 9.568010648181705e-06,
      "loss": 0.057,
      "step": 151120
    },
    {
      "epoch": 0.24734392490328155,
      "grad_norm": 1.861849308013916,
      "learning_rate": 9.567944755968188e-06,
      "loss": 0.0603,
      "step": 151140
    },
    {
      "epoch": 0.2473766553419349,
      "grad_norm": 2.6757192611694336,
      "learning_rate": 9.56787886375467e-06,
      "loss": 0.0697,
      "step": 151160
    },
    {
      "epoch": 0.24740938578058824,
      "grad_norm": 1.01449453830719,
      "learning_rate": 9.567812971541154e-06,
      "loss": 0.0741,
      "step": 151180
    },
    {
      "epoch": 0.24744211621924156,
      "grad_norm": 3.9278204441070557,
      "learning_rate": 9.567747079327636e-06,
      "loss": 0.0594,
      "step": 151200
    },
    {
      "epoch": 0.2474748466578949,
      "grad_norm": 1.7271645069122314,
      "learning_rate": 9.56768118711412e-06,
      "loss": 0.0502,
      "step": 151220
    },
    {
      "epoch": 0.24750757709654825,
      "grad_norm": 2.977736473083496,
      "learning_rate": 9.567615294900603e-06,
      "loss": 0.0646,
      "step": 151240
    },
    {
      "epoch": 0.2475403075352016,
      "grad_norm": 7.195860385894775,
      "learning_rate": 9.567549402687085e-06,
      "loss": 0.0628,
      "step": 151260
    },
    {
      "epoch": 0.24757303797385494,
      "grad_norm": 1.126296043395996,
      "learning_rate": 9.567483510473568e-06,
      "loss": 0.0611,
      "step": 151280
    },
    {
      "epoch": 0.24760576841250825,
      "grad_norm": 2.030014753341675,
      "learning_rate": 9.56741761826005e-06,
      "loss": 0.064,
      "step": 151300
    },
    {
      "epoch": 0.2476384988511616,
      "grad_norm": 3.8508341312408447,
      "learning_rate": 9.567351726046534e-06,
      "loss": 0.0638,
      "step": 151320
    },
    {
      "epoch": 0.24767122928981494,
      "grad_norm": 1.291395664215088,
      "learning_rate": 9.567285833833016e-06,
      "loss": 0.0374,
      "step": 151340
    },
    {
      "epoch": 0.2477039597284683,
      "grad_norm": 0.8140915036201477,
      "learning_rate": 9.5672199416195e-06,
      "loss": 0.0539,
      "step": 151360
    },
    {
      "epoch": 0.24773669016712163,
      "grad_norm": 1.5680124759674072,
      "learning_rate": 9.567154049405983e-06,
      "loss": 0.0621,
      "step": 151380
    },
    {
      "epoch": 0.24776942060577495,
      "grad_norm": 0.6501150727272034,
      "learning_rate": 9.567088157192465e-06,
      "loss": 0.0641,
      "step": 151400
    },
    {
      "epoch": 0.2478021510444283,
      "grad_norm": 1.9143778085708618,
      "learning_rate": 9.567022264978949e-06,
      "loss": 0.0605,
      "step": 151420
    },
    {
      "epoch": 0.24783488148308164,
      "grad_norm": 1.2499628067016602,
      "learning_rate": 9.566956372765432e-06,
      "loss": 0.0515,
      "step": 151440
    },
    {
      "epoch": 0.24786761192173498,
      "grad_norm": 2.3724403381347656,
      "learning_rate": 9.566890480551914e-06,
      "loss": 0.0654,
      "step": 151460
    },
    {
      "epoch": 0.24790034236038833,
      "grad_norm": 3.897976875305176,
      "learning_rate": 9.566824588338398e-06,
      "loss": 0.0774,
      "step": 151480
    },
    {
      "epoch": 0.24793307279904164,
      "grad_norm": 2.5908122062683105,
      "learning_rate": 9.56675869612488e-06,
      "loss": 0.0569,
      "step": 151500
    },
    {
      "epoch": 0.247965803237695,
      "grad_norm": 1.1164907217025757,
      "learning_rate": 9.566692803911363e-06,
      "loss": 0.0488,
      "step": 151520
    },
    {
      "epoch": 0.24799853367634833,
      "grad_norm": 2.3231778144836426,
      "learning_rate": 9.566626911697845e-06,
      "loss": 0.0373,
      "step": 151540
    },
    {
      "epoch": 0.24803126411500168,
      "grad_norm": 1.9090644121170044,
      "learning_rate": 9.566561019484329e-06,
      "loss": 0.0638,
      "step": 151560
    },
    {
      "epoch": 0.24806399455365502,
      "grad_norm": 3.5304479598999023,
      "learning_rate": 9.56649512727081e-06,
      "loss": 0.0706,
      "step": 151580
    },
    {
      "epoch": 0.24809672499230834,
      "grad_norm": 2.1609575748443604,
      "learning_rate": 9.566429235057294e-06,
      "loss": 0.0502,
      "step": 151600
    },
    {
      "epoch": 0.24812945543096168,
      "grad_norm": 1.4671673774719238,
      "learning_rate": 9.566363342843778e-06,
      "loss": 0.0667,
      "step": 151620
    },
    {
      "epoch": 0.24816218586961503,
      "grad_norm": 2.2774009704589844,
      "learning_rate": 9.56629745063026e-06,
      "loss": 0.06,
      "step": 151640
    },
    {
      "epoch": 0.24819491630826837,
      "grad_norm": 5.659656047821045,
      "learning_rate": 9.566231558416743e-06,
      "loss": 0.0734,
      "step": 151660
    },
    {
      "epoch": 0.24822764674692172,
      "grad_norm": 1.0945242643356323,
      "learning_rate": 9.566165666203225e-06,
      "loss": 0.0573,
      "step": 151680
    },
    {
      "epoch": 0.24826037718557503,
      "grad_norm": 1.374146580696106,
      "learning_rate": 9.566099773989709e-06,
      "loss": 0.0652,
      "step": 151700
    },
    {
      "epoch": 0.24829310762422838,
      "grad_norm": 4.53059720993042,
      "learning_rate": 9.56603388177619e-06,
      "loss": 0.0419,
      "step": 151720
    },
    {
      "epoch": 0.24832583806288172,
      "grad_norm": 3.813778877258301,
      "learning_rate": 9.565967989562674e-06,
      "loss": 0.0506,
      "step": 151740
    },
    {
      "epoch": 0.24835856850153507,
      "grad_norm": 2.273451566696167,
      "learning_rate": 9.565902097349156e-06,
      "loss": 0.059,
      "step": 151760
    },
    {
      "epoch": 0.2483912989401884,
      "grad_norm": 0.7260968089103699,
      "learning_rate": 9.56583620513564e-06,
      "loss": 0.0482,
      "step": 151780
    },
    {
      "epoch": 0.24842402937884173,
      "grad_norm": 5.387504577636719,
      "learning_rate": 9.565770312922123e-06,
      "loss": 0.0651,
      "step": 151800
    },
    {
      "epoch": 0.24845675981749507,
      "grad_norm": 4.516838550567627,
      "learning_rate": 9.565704420708605e-06,
      "loss": 0.0705,
      "step": 151820
    },
    {
      "epoch": 0.24848949025614842,
      "grad_norm": 1.3916064500808716,
      "learning_rate": 9.565638528495089e-06,
      "loss": 0.0612,
      "step": 151840
    },
    {
      "epoch": 0.24852222069480176,
      "grad_norm": 3.718484878540039,
      "learning_rate": 9.565572636281572e-06,
      "loss": 0.0525,
      "step": 151860
    },
    {
      "epoch": 0.2485549511334551,
      "grad_norm": 0.9703806638717651,
      "learning_rate": 9.565506744068054e-06,
      "loss": 0.0484,
      "step": 151880
    },
    {
      "epoch": 0.24858768157210842,
      "grad_norm": 6.458555221557617,
      "learning_rate": 9.565440851854538e-06,
      "loss": 0.0649,
      "step": 151900
    },
    {
      "epoch": 0.24862041201076177,
      "grad_norm": 3.0477471351623535,
      "learning_rate": 9.56537495964102e-06,
      "loss": 0.0615,
      "step": 151920
    },
    {
      "epoch": 0.2486531424494151,
      "grad_norm": 3.9400792121887207,
      "learning_rate": 9.565309067427503e-06,
      "loss": 0.0641,
      "step": 151940
    },
    {
      "epoch": 0.24868587288806845,
      "grad_norm": 1.9919308423995972,
      "learning_rate": 9.565243175213987e-06,
      "loss": 0.0525,
      "step": 151960
    },
    {
      "epoch": 0.24871860332672177,
      "grad_norm": 1.4716566801071167,
      "learning_rate": 9.565177283000469e-06,
      "loss": 0.0665,
      "step": 151980
    },
    {
      "epoch": 0.24875133376537512,
      "grad_norm": 3.100203514099121,
      "learning_rate": 9.565111390786952e-06,
      "loss": 0.0446,
      "step": 152000
    },
    {
      "epoch": 0.24878406420402846,
      "grad_norm": 1.0740853548049927,
      "learning_rate": 9.565045498573434e-06,
      "loss": 0.0732,
      "step": 152020
    },
    {
      "epoch": 0.2488167946426818,
      "grad_norm": 1.8751767873764038,
      "learning_rate": 9.564979606359918e-06,
      "loss": 0.0675,
      "step": 152040
    },
    {
      "epoch": 0.24884952508133515,
      "grad_norm": 0.6113210320472717,
      "learning_rate": 9.5649137141464e-06,
      "loss": 0.0603,
      "step": 152060
    },
    {
      "epoch": 0.24888225551998847,
      "grad_norm": 0.5320490002632141,
      "learning_rate": 9.564847821932883e-06,
      "loss": 0.0541,
      "step": 152080
    },
    {
      "epoch": 0.2489149859586418,
      "grad_norm": 1.8918027877807617,
      "learning_rate": 9.564781929719365e-06,
      "loss": 0.0554,
      "step": 152100
    },
    {
      "epoch": 0.24894771639729515,
      "grad_norm": 1.6355807781219482,
      "learning_rate": 9.564716037505849e-06,
      "loss": 0.0575,
      "step": 152120
    },
    {
      "epoch": 0.2489804468359485,
      "grad_norm": 1.3421369791030884,
      "learning_rate": 9.56465014529233e-06,
      "loss": 0.0561,
      "step": 152140
    },
    {
      "epoch": 0.24901317727460184,
      "grad_norm": 2.035348415374756,
      "learning_rate": 9.564584253078814e-06,
      "loss": 0.048,
      "step": 152160
    },
    {
      "epoch": 0.24904590771325516,
      "grad_norm": 2.5853312015533447,
      "learning_rate": 9.564518360865298e-06,
      "loss": 0.0656,
      "step": 152180
    },
    {
      "epoch": 0.2490786381519085,
      "grad_norm": 2.695915937423706,
      "learning_rate": 9.56445246865178e-06,
      "loss": 0.0643,
      "step": 152200
    },
    {
      "epoch": 0.24911136859056185,
      "grad_norm": 3.5795092582702637,
      "learning_rate": 9.564386576438263e-06,
      "loss": 0.0532,
      "step": 152220
    },
    {
      "epoch": 0.2491440990292152,
      "grad_norm": 1.4160023927688599,
      "learning_rate": 9.564320684224747e-06,
      "loss": 0.0695,
      "step": 152240
    },
    {
      "epoch": 0.24917682946786854,
      "grad_norm": 0.7981837391853333,
      "learning_rate": 9.564254792011229e-06,
      "loss": 0.057,
      "step": 152260
    },
    {
      "epoch": 0.24920955990652185,
      "grad_norm": 1.3096436262130737,
      "learning_rate": 9.564188899797712e-06,
      "loss": 0.0801,
      "step": 152280
    },
    {
      "epoch": 0.2492422903451752,
      "grad_norm": 2.0580506324768066,
      "learning_rate": 9.564123007584196e-06,
      "loss": 0.0417,
      "step": 152300
    },
    {
      "epoch": 0.24927502078382854,
      "grad_norm": 2.0198092460632324,
      "learning_rate": 9.564057115370678e-06,
      "loss": 0.0651,
      "step": 152320
    },
    {
      "epoch": 0.2493077512224819,
      "grad_norm": 2.155298948287964,
      "learning_rate": 9.563991223157161e-06,
      "loss": 0.0525,
      "step": 152340
    },
    {
      "epoch": 0.24934048166113523,
      "grad_norm": 2.507566213607788,
      "learning_rate": 9.563925330943643e-06,
      "loss": 0.0749,
      "step": 152360
    },
    {
      "epoch": 0.24937321209978855,
      "grad_norm": 0.7262559533119202,
      "learning_rate": 9.563859438730127e-06,
      "loss": 0.0544,
      "step": 152380
    },
    {
      "epoch": 0.2494059425384419,
      "grad_norm": 3.594723701477051,
      "learning_rate": 9.563793546516609e-06,
      "loss": 0.0469,
      "step": 152400
    },
    {
      "epoch": 0.24943867297709524,
      "grad_norm": 2.143301010131836,
      "learning_rate": 9.563727654303092e-06,
      "loss": 0.0536,
      "step": 152420
    },
    {
      "epoch": 0.24947140341574858,
      "grad_norm": 1.1014082431793213,
      "learning_rate": 9.563661762089574e-06,
      "loss": 0.0541,
      "step": 152440
    },
    {
      "epoch": 0.24950413385440193,
      "grad_norm": 1.6071391105651855,
      "learning_rate": 9.563595869876058e-06,
      "loss": 0.0635,
      "step": 152460
    },
    {
      "epoch": 0.24953686429305524,
      "grad_norm": 0.9075103998184204,
      "learning_rate": 9.56352997766254e-06,
      "loss": 0.0626,
      "step": 152480
    },
    {
      "epoch": 0.2495695947317086,
      "grad_norm": 2.5978527069091797,
      "learning_rate": 9.563464085449023e-06,
      "loss": 0.0567,
      "step": 152500
    },
    {
      "epoch": 0.24960232517036193,
      "grad_norm": 2.3104846477508545,
      "learning_rate": 9.563398193235505e-06,
      "loss": 0.0561,
      "step": 152520
    },
    {
      "epoch": 0.24963505560901528,
      "grad_norm": 0.7021661996841431,
      "learning_rate": 9.563332301021989e-06,
      "loss": 0.0556,
      "step": 152540
    },
    {
      "epoch": 0.24966778604766862,
      "grad_norm": 1.9032155275344849,
      "learning_rate": 9.563266408808472e-06,
      "loss": 0.072,
      "step": 152560
    },
    {
      "epoch": 0.24970051648632194,
      "grad_norm": 2.5438339710235596,
      "learning_rate": 9.563200516594954e-06,
      "loss": 0.0753,
      "step": 152580
    },
    {
      "epoch": 0.24973324692497528,
      "grad_norm": 3.5626749992370605,
      "learning_rate": 9.563134624381438e-06,
      "loss": 0.0415,
      "step": 152600
    },
    {
      "epoch": 0.24976597736362863,
      "grad_norm": 0.3440219759941101,
      "learning_rate": 9.563068732167921e-06,
      "loss": 0.0521,
      "step": 152620
    },
    {
      "epoch": 0.24979870780228197,
      "grad_norm": 1.529812216758728,
      "learning_rate": 9.563002839954403e-06,
      "loss": 0.0612,
      "step": 152640
    },
    {
      "epoch": 0.24983143824093532,
      "grad_norm": 1.5296580791473389,
      "learning_rate": 9.562936947740887e-06,
      "loss": 0.0613,
      "step": 152660
    },
    {
      "epoch": 0.24986416867958863,
      "grad_norm": 2.0601084232330322,
      "learning_rate": 9.56287105552737e-06,
      "loss": 0.0642,
      "step": 152680
    },
    {
      "epoch": 0.24989689911824198,
      "grad_norm": 2.725247859954834,
      "learning_rate": 9.562805163313852e-06,
      "loss": 0.0672,
      "step": 152700
    },
    {
      "epoch": 0.24992962955689532,
      "grad_norm": 2.9677200317382812,
      "learning_rate": 9.562739271100336e-06,
      "loss": 0.0493,
      "step": 152720
    },
    {
      "epoch": 0.24996235999554867,
      "grad_norm": 3.9952261447906494,
      "learning_rate": 9.562673378886818e-06,
      "loss": 0.0734,
      "step": 152740
    },
    {
      "epoch": 0.249995090434202,
      "grad_norm": 3.977313756942749,
      "learning_rate": 9.562607486673301e-06,
      "loss": 0.0548,
      "step": 152760
    },
    {
      "epoch": 0.25002782087285536,
      "grad_norm": 2.6690025329589844,
      "learning_rate": 9.562541594459783e-06,
      "loss": 0.0474,
      "step": 152780
    },
    {
      "epoch": 0.25006055131150867,
      "grad_norm": 3.6532299518585205,
      "learning_rate": 9.562475702246267e-06,
      "loss": 0.0479,
      "step": 152800
    },
    {
      "epoch": 0.250093281750162,
      "grad_norm": 2.1311426162719727,
      "learning_rate": 9.562409810032749e-06,
      "loss": 0.0524,
      "step": 152820
    },
    {
      "epoch": 0.25012601218881536,
      "grad_norm": 0.9183601140975952,
      "learning_rate": 9.562343917819232e-06,
      "loss": 0.0628,
      "step": 152840
    },
    {
      "epoch": 0.2501587426274687,
      "grad_norm": 15.454980850219727,
      "learning_rate": 9.562278025605714e-06,
      "loss": 0.0561,
      "step": 152860
    },
    {
      "epoch": 0.25019147306612205,
      "grad_norm": 3.8782057762145996,
      "learning_rate": 9.562212133392198e-06,
      "loss": 0.0636,
      "step": 152880
    },
    {
      "epoch": 0.25022420350477537,
      "grad_norm": 0.8015502691268921,
      "learning_rate": 9.56214624117868e-06,
      "loss": 0.0598,
      "step": 152900
    },
    {
      "epoch": 0.2502569339434287,
      "grad_norm": 1.1785930395126343,
      "learning_rate": 9.562080348965163e-06,
      "loss": 0.068,
      "step": 152920
    },
    {
      "epoch": 0.25028966438208206,
      "grad_norm": 1.2619762420654297,
      "learning_rate": 9.562014456751645e-06,
      "loss": 0.0722,
      "step": 152940
    },
    {
      "epoch": 0.25032239482073537,
      "grad_norm": 1.1907353401184082,
      "learning_rate": 9.561948564538129e-06,
      "loss": 0.0516,
      "step": 152960
    },
    {
      "epoch": 0.25035512525938874,
      "grad_norm": 1.6776952743530273,
      "learning_rate": 9.561882672324612e-06,
      "loss": 0.0436,
      "step": 152980
    },
    {
      "epoch": 0.25038785569804206,
      "grad_norm": 5.22493314743042,
      "learning_rate": 9.561816780111094e-06,
      "loss": 0.0477,
      "step": 153000
    },
    {
      "epoch": 0.2504205861366954,
      "grad_norm": 0.7027491927146912,
      "learning_rate": 9.561750887897578e-06,
      "loss": 0.0538,
      "step": 153020
    },
    {
      "epoch": 0.25045331657534875,
      "grad_norm": 3.36773681640625,
      "learning_rate": 9.561684995684061e-06,
      "loss": 0.057,
      "step": 153040
    },
    {
      "epoch": 0.25048604701400207,
      "grad_norm": 2.636496067047119,
      "learning_rate": 9.561619103470543e-06,
      "loss": 0.0667,
      "step": 153060
    },
    {
      "epoch": 0.25051877745265544,
      "grad_norm": 3.35208797454834,
      "learning_rate": 9.561553211257027e-06,
      "loss": 0.0569,
      "step": 153080
    },
    {
      "epoch": 0.25055150789130876,
      "grad_norm": 0.600712239742279,
      "learning_rate": 9.56148731904351e-06,
      "loss": 0.0458,
      "step": 153100
    },
    {
      "epoch": 0.2505842383299621,
      "grad_norm": 1.799240231513977,
      "learning_rate": 9.561421426829992e-06,
      "loss": 0.0577,
      "step": 153120
    },
    {
      "epoch": 0.25061696876861544,
      "grad_norm": 1.453377604484558,
      "learning_rate": 9.561355534616476e-06,
      "loss": 0.0513,
      "step": 153140
    },
    {
      "epoch": 0.25064969920726876,
      "grad_norm": 1.5303434133529663,
      "learning_rate": 9.561289642402958e-06,
      "loss": 0.0601,
      "step": 153160
    },
    {
      "epoch": 0.25068242964592213,
      "grad_norm": 3.1252782344818115,
      "learning_rate": 9.561223750189441e-06,
      "loss": 0.0704,
      "step": 153180
    },
    {
      "epoch": 0.25071516008457545,
      "grad_norm": 1.5114248991012573,
      "learning_rate": 9.561157857975923e-06,
      "loss": 0.0486,
      "step": 153200
    },
    {
      "epoch": 0.25074789052322877,
      "grad_norm": 0.9933601021766663,
      "learning_rate": 9.561091965762407e-06,
      "loss": 0.0467,
      "step": 153220
    },
    {
      "epoch": 0.25078062096188214,
      "grad_norm": 1.2437360286712646,
      "learning_rate": 9.561026073548889e-06,
      "loss": 0.0627,
      "step": 153240
    },
    {
      "epoch": 0.25081335140053546,
      "grad_norm": 5.589747905731201,
      "learning_rate": 9.560960181335372e-06,
      "loss": 0.0529,
      "step": 153260
    },
    {
      "epoch": 0.25084608183918883,
      "grad_norm": 3.251352548599243,
      "learning_rate": 9.560894289121854e-06,
      "loss": 0.0612,
      "step": 153280
    },
    {
      "epoch": 0.25087881227784214,
      "grad_norm": 1.4776896238327026,
      "learning_rate": 9.560828396908338e-06,
      "loss": 0.0621,
      "step": 153300
    },
    {
      "epoch": 0.25091154271649546,
      "grad_norm": 1.551735758781433,
      "learning_rate": 9.56076250469482e-06,
      "loss": 0.0776,
      "step": 153320
    },
    {
      "epoch": 0.25094427315514883,
      "grad_norm": 2.1645467281341553,
      "learning_rate": 9.560696612481303e-06,
      "loss": 0.057,
      "step": 153340
    },
    {
      "epoch": 0.25097700359380215,
      "grad_norm": 0.3291684687137604,
      "learning_rate": 9.560630720267787e-06,
      "loss": 0.0551,
      "step": 153360
    },
    {
      "epoch": 0.2510097340324555,
      "grad_norm": 1.5910706520080566,
      "learning_rate": 9.560564828054269e-06,
      "loss": 0.0621,
      "step": 153380
    },
    {
      "epoch": 0.25104246447110884,
      "grad_norm": 4.30831241607666,
      "learning_rate": 9.560498935840752e-06,
      "loss": 0.0605,
      "step": 153400
    },
    {
      "epoch": 0.25107519490976216,
      "grad_norm": 1.571135401725769,
      "learning_rate": 9.560433043627236e-06,
      "loss": 0.0562,
      "step": 153420
    },
    {
      "epoch": 0.25110792534841553,
      "grad_norm": 2.0103695392608643,
      "learning_rate": 9.560367151413718e-06,
      "loss": 0.0503,
      "step": 153440
    },
    {
      "epoch": 0.25114065578706884,
      "grad_norm": 1.0298141241073608,
      "learning_rate": 9.560301259200202e-06,
      "loss": 0.0537,
      "step": 153460
    },
    {
      "epoch": 0.2511733862257222,
      "grad_norm": 2.714247226715088,
      "learning_rate": 9.560235366986685e-06,
      "loss": 0.0595,
      "step": 153480
    },
    {
      "epoch": 0.25120611666437553,
      "grad_norm": 1.547728180885315,
      "learning_rate": 9.560169474773167e-06,
      "loss": 0.0484,
      "step": 153500
    },
    {
      "epoch": 0.25123884710302885,
      "grad_norm": 1.7551935911178589,
      "learning_rate": 9.56010358255965e-06,
      "loss": 0.0507,
      "step": 153520
    },
    {
      "epoch": 0.2512715775416822,
      "grad_norm": 3.176382064819336,
      "learning_rate": 9.560037690346132e-06,
      "loss": 0.0677,
      "step": 153540
    },
    {
      "epoch": 0.25130430798033554,
      "grad_norm": 7.069536209106445,
      "learning_rate": 9.559971798132616e-06,
      "loss": 0.053,
      "step": 153560
    },
    {
      "epoch": 0.2513370384189889,
      "grad_norm": 0.5913550853729248,
      "learning_rate": 9.559905905919098e-06,
      "loss": 0.0512,
      "step": 153580
    },
    {
      "epoch": 0.25136976885764223,
      "grad_norm": 1.4126032590866089,
      "learning_rate": 9.559840013705582e-06,
      "loss": 0.0662,
      "step": 153600
    },
    {
      "epoch": 0.25140249929629555,
      "grad_norm": 1.9945613145828247,
      "learning_rate": 9.559774121492063e-06,
      "loss": 0.0594,
      "step": 153620
    },
    {
      "epoch": 0.2514352297349489,
      "grad_norm": 1.5141297578811646,
      "learning_rate": 9.559708229278547e-06,
      "loss": 0.0759,
      "step": 153640
    },
    {
      "epoch": 0.25146796017360223,
      "grad_norm": 1.2603729963302612,
      "learning_rate": 9.559642337065029e-06,
      "loss": 0.0562,
      "step": 153660
    },
    {
      "epoch": 0.2515006906122556,
      "grad_norm": 1.7003648281097412,
      "learning_rate": 9.559576444851513e-06,
      "loss": 0.0452,
      "step": 153680
    },
    {
      "epoch": 0.2515334210509089,
      "grad_norm": 3.600395441055298,
      "learning_rate": 9.559510552637996e-06,
      "loss": 0.0585,
      "step": 153700
    },
    {
      "epoch": 0.25156615148956224,
      "grad_norm": 2.0360028743743896,
      "learning_rate": 9.559444660424478e-06,
      "loss": 0.0567,
      "step": 153720
    },
    {
      "epoch": 0.2515988819282156,
      "grad_norm": 1.3093676567077637,
      "learning_rate": 9.559378768210962e-06,
      "loss": 0.0649,
      "step": 153740
    },
    {
      "epoch": 0.25163161236686893,
      "grad_norm": 2.355550765991211,
      "learning_rate": 9.559312875997443e-06,
      "loss": 0.0512,
      "step": 153760
    },
    {
      "epoch": 0.2516643428055223,
      "grad_norm": 2.5104901790618896,
      "learning_rate": 9.559246983783927e-06,
      "loss": 0.0594,
      "step": 153780
    },
    {
      "epoch": 0.2516970732441756,
      "grad_norm": 0.503398060798645,
      "learning_rate": 9.559181091570409e-06,
      "loss": 0.058,
      "step": 153800
    },
    {
      "epoch": 0.25172980368282893,
      "grad_norm": 3.8175086975097656,
      "learning_rate": 9.559115199356893e-06,
      "loss": 0.0579,
      "step": 153820
    },
    {
      "epoch": 0.2517625341214823,
      "grad_norm": 2.503753185272217,
      "learning_rate": 9.559049307143376e-06,
      "loss": 0.0445,
      "step": 153840
    },
    {
      "epoch": 0.2517952645601356,
      "grad_norm": 1.609084963798523,
      "learning_rate": 9.558983414929858e-06,
      "loss": 0.0623,
      "step": 153860
    },
    {
      "epoch": 0.251827994998789,
      "grad_norm": 2.627849578857422,
      "learning_rate": 9.558917522716342e-06,
      "loss": 0.0488,
      "step": 153880
    },
    {
      "epoch": 0.2518607254374423,
      "grad_norm": 2.0720136165618896,
      "learning_rate": 9.558851630502825e-06,
      "loss": 0.0504,
      "step": 153900
    },
    {
      "epoch": 0.25189345587609563,
      "grad_norm": 2.8474271297454834,
      "learning_rate": 9.558785738289307e-06,
      "loss": 0.0621,
      "step": 153920
    },
    {
      "epoch": 0.251926186314749,
      "grad_norm": 0.6489474177360535,
      "learning_rate": 9.55871984607579e-06,
      "loss": 0.0624,
      "step": 153940
    },
    {
      "epoch": 0.2519589167534023,
      "grad_norm": 0.7562652826309204,
      "learning_rate": 9.558653953862273e-06,
      "loss": 0.0466,
      "step": 153960
    },
    {
      "epoch": 0.2519916471920557,
      "grad_norm": 1.3622426986694336,
      "learning_rate": 9.558588061648756e-06,
      "loss": 0.0505,
      "step": 153980
    },
    {
      "epoch": 0.252024377630709,
      "grad_norm": 5.708794116973877,
      "learning_rate": 9.558522169435238e-06,
      "loss": 0.0606,
      "step": 154000
    },
    {
      "epoch": 0.2520571080693623,
      "grad_norm": 2.137424945831299,
      "learning_rate": 9.558456277221722e-06,
      "loss": 0.0599,
      "step": 154020
    },
    {
      "epoch": 0.2520898385080157,
      "grad_norm": 2.3222603797912598,
      "learning_rate": 9.558390385008204e-06,
      "loss": 0.0416,
      "step": 154040
    },
    {
      "epoch": 0.252122568946669,
      "grad_norm": 1.276695966720581,
      "learning_rate": 9.558324492794687e-06,
      "loss": 0.0616,
      "step": 154060
    },
    {
      "epoch": 0.2521552993853224,
      "grad_norm": 2.164891242980957,
      "learning_rate": 9.55825860058117e-06,
      "loss": 0.0586,
      "step": 154080
    },
    {
      "epoch": 0.2521880298239757,
      "grad_norm": 2.351689338684082,
      "learning_rate": 9.558192708367653e-06,
      "loss": 0.0614,
      "step": 154100
    },
    {
      "epoch": 0.252220760262629,
      "grad_norm": 1.369642734527588,
      "learning_rate": 9.558126816154136e-06,
      "loss": 0.047,
      "step": 154120
    },
    {
      "epoch": 0.2522534907012824,
      "grad_norm": 3.36867356300354,
      "learning_rate": 9.558060923940618e-06,
      "loss": 0.0632,
      "step": 154140
    },
    {
      "epoch": 0.2522862211399357,
      "grad_norm": 1.5605887174606323,
      "learning_rate": 9.557995031727102e-06,
      "loss": 0.0469,
      "step": 154160
    },
    {
      "epoch": 0.2523189515785891,
      "grad_norm": 1.820443868637085,
      "learning_rate": 9.557929139513584e-06,
      "loss": 0.0467,
      "step": 154180
    },
    {
      "epoch": 0.2523516820172424,
      "grad_norm": 26.82906150817871,
      "learning_rate": 9.557863247300067e-06,
      "loss": 0.0512,
      "step": 154200
    },
    {
      "epoch": 0.2523844124558957,
      "grad_norm": 2.896944522857666,
      "learning_rate": 9.55779735508655e-06,
      "loss": 0.0579,
      "step": 154220
    },
    {
      "epoch": 0.2524171428945491,
      "grad_norm": 2.100632429122925,
      "learning_rate": 9.557731462873033e-06,
      "loss": 0.0667,
      "step": 154240
    },
    {
      "epoch": 0.2524498733332024,
      "grad_norm": 0.8704169988632202,
      "learning_rate": 9.557665570659516e-06,
      "loss": 0.0677,
      "step": 154260
    },
    {
      "epoch": 0.2524826037718558,
      "grad_norm": 3.5741775035858154,
      "learning_rate": 9.557599678446e-06,
      "loss": 0.0528,
      "step": 154280
    },
    {
      "epoch": 0.2525153342105091,
      "grad_norm": 1.953324794769287,
      "learning_rate": 9.557533786232482e-06,
      "loss": 0.0657,
      "step": 154300
    },
    {
      "epoch": 0.2525480646491624,
      "grad_norm": 3.4818856716156006,
      "learning_rate": 9.557467894018965e-06,
      "loss": 0.0721,
      "step": 154320
    },
    {
      "epoch": 0.2525807950878158,
      "grad_norm": 1.563886284828186,
      "learning_rate": 9.557402001805447e-06,
      "loss": 0.0467,
      "step": 154340
    },
    {
      "epoch": 0.2526135255264691,
      "grad_norm": 5.572690963745117,
      "learning_rate": 9.55733610959193e-06,
      "loss": 0.0501,
      "step": 154360
    },
    {
      "epoch": 0.25264625596512247,
      "grad_norm": 1.9287253618240356,
      "learning_rate": 9.557270217378413e-06,
      "loss": 0.0483,
      "step": 154380
    },
    {
      "epoch": 0.2526789864037758,
      "grad_norm": 1.6662951707839966,
      "learning_rate": 9.557204325164896e-06,
      "loss": 0.066,
      "step": 154400
    },
    {
      "epoch": 0.2527117168424291,
      "grad_norm": 1.0022857189178467,
      "learning_rate": 9.55713843295138e-06,
      "loss": 0.0744,
      "step": 154420
    },
    {
      "epoch": 0.2527444472810825,
      "grad_norm": 1.3965498208999634,
      "learning_rate": 9.557072540737862e-06,
      "loss": 0.056,
      "step": 154440
    },
    {
      "epoch": 0.2527771777197358,
      "grad_norm": 2.6607162952423096,
      "learning_rate": 9.557006648524345e-06,
      "loss": 0.0533,
      "step": 154460
    },
    {
      "epoch": 0.25280990815838916,
      "grad_norm": 3.4337918758392334,
      "learning_rate": 9.556940756310827e-06,
      "loss": 0.0683,
      "step": 154480
    },
    {
      "epoch": 0.2528426385970425,
      "grad_norm": 0.9381970763206482,
      "learning_rate": 9.55687486409731e-06,
      "loss": 0.0659,
      "step": 154500
    },
    {
      "epoch": 0.2528753690356958,
      "grad_norm": 2.5791897773742676,
      "learning_rate": 9.556808971883793e-06,
      "loss": 0.0751,
      "step": 154520
    },
    {
      "epoch": 0.25290809947434917,
      "grad_norm": 2.2910146713256836,
      "learning_rate": 9.556743079670276e-06,
      "loss": 0.0648,
      "step": 154540
    },
    {
      "epoch": 0.2529408299130025,
      "grad_norm": 1.8527013063430786,
      "learning_rate": 9.556677187456758e-06,
      "loss": 0.06,
      "step": 154560
    },
    {
      "epoch": 0.25297356035165586,
      "grad_norm": 3.2190725803375244,
      "learning_rate": 9.556611295243242e-06,
      "loss": 0.051,
      "step": 154580
    },
    {
      "epoch": 0.2530062907903092,
      "grad_norm": 3.145850419998169,
      "learning_rate": 9.556545403029724e-06,
      "loss": 0.0701,
      "step": 154600
    },
    {
      "epoch": 0.2530390212289625,
      "grad_norm": 0.7970063090324402,
      "learning_rate": 9.556479510816207e-06,
      "loss": 0.0584,
      "step": 154620
    },
    {
      "epoch": 0.25307175166761586,
      "grad_norm": 3.0034589767456055,
      "learning_rate": 9.55641361860269e-06,
      "loss": 0.0627,
      "step": 154640
    },
    {
      "epoch": 0.2531044821062692,
      "grad_norm": 1.3429819345474243,
      "learning_rate": 9.556347726389173e-06,
      "loss": 0.0623,
      "step": 154660
    },
    {
      "epoch": 0.25313721254492255,
      "grad_norm": 1.9185422658920288,
      "learning_rate": 9.556281834175656e-06,
      "loss": 0.0584,
      "step": 154680
    },
    {
      "epoch": 0.25316994298357587,
      "grad_norm": 0.9048865437507629,
      "learning_rate": 9.55621594196214e-06,
      "loss": 0.064,
      "step": 154700
    },
    {
      "epoch": 0.2532026734222292,
      "grad_norm": 1.7964541912078857,
      "learning_rate": 9.556150049748622e-06,
      "loss": 0.0559,
      "step": 154720
    },
    {
      "epoch": 0.25323540386088256,
      "grad_norm": 1.9395846128463745,
      "learning_rate": 9.556084157535105e-06,
      "loss": 0.0551,
      "step": 154740
    },
    {
      "epoch": 0.2532681342995359,
      "grad_norm": 1.8189946413040161,
      "learning_rate": 9.556018265321589e-06,
      "loss": 0.0453,
      "step": 154760
    },
    {
      "epoch": 0.25330086473818925,
      "grad_norm": 1.6431962251663208,
      "learning_rate": 9.55595237310807e-06,
      "loss": 0.0581,
      "step": 154780
    },
    {
      "epoch": 0.25333359517684256,
      "grad_norm": 3.1682536602020264,
      "learning_rate": 9.555886480894554e-06,
      "loss": 0.0553,
      "step": 154800
    },
    {
      "epoch": 0.2533663256154959,
      "grad_norm": 2.6600606441497803,
      "learning_rate": 9.555820588681036e-06,
      "loss": 0.0566,
      "step": 154820
    },
    {
      "epoch": 0.25339905605414925,
      "grad_norm": 2.571861743927002,
      "learning_rate": 9.55575469646752e-06,
      "loss": 0.0514,
      "step": 154840
    },
    {
      "epoch": 0.25343178649280257,
      "grad_norm": 2.033900260925293,
      "learning_rate": 9.555688804254002e-06,
      "loss": 0.0688,
      "step": 154860
    },
    {
      "epoch": 0.25346451693145594,
      "grad_norm": 3.025503396987915,
      "learning_rate": 9.555622912040485e-06,
      "loss": 0.0462,
      "step": 154880
    },
    {
      "epoch": 0.25349724737010926,
      "grad_norm": 5.2051873207092285,
      "learning_rate": 9.555557019826967e-06,
      "loss": 0.0629,
      "step": 154900
    },
    {
      "epoch": 0.2535299778087626,
      "grad_norm": 0.8366335034370422,
      "learning_rate": 9.55549112761345e-06,
      "loss": 0.05,
      "step": 154920
    },
    {
      "epoch": 0.25356270824741595,
      "grad_norm": 0.8012365698814392,
      "learning_rate": 9.555425235399933e-06,
      "loss": 0.0692,
      "step": 154940
    },
    {
      "epoch": 0.25359543868606926,
      "grad_norm": 2.196908712387085,
      "learning_rate": 9.555359343186416e-06,
      "loss": 0.0554,
      "step": 154960
    },
    {
      "epoch": 0.25362816912472264,
      "grad_norm": 1.3193187713623047,
      "learning_rate": 9.555293450972898e-06,
      "loss": 0.0506,
      "step": 154980
    },
    {
      "epoch": 0.25366089956337595,
      "grad_norm": 2.6945083141326904,
      "learning_rate": 9.555227558759382e-06,
      "loss": 0.0542,
      "step": 155000
    },
    {
      "epoch": 0.25369363000202927,
      "grad_norm": 1.5013532638549805,
      "learning_rate": 9.555161666545865e-06,
      "loss": 0.0646,
      "step": 155020
    },
    {
      "epoch": 0.25372636044068264,
      "grad_norm": 4.389745712280273,
      "learning_rate": 9.555095774332347e-06,
      "loss": 0.0708,
      "step": 155040
    },
    {
      "epoch": 0.25375909087933596,
      "grad_norm": 2.568730354309082,
      "learning_rate": 9.555029882118831e-06,
      "loss": 0.0562,
      "step": 155060
    },
    {
      "epoch": 0.25379182131798933,
      "grad_norm": 2.3435251712799072,
      "learning_rate": 9.554963989905314e-06,
      "loss": 0.0519,
      "step": 155080
    },
    {
      "epoch": 0.25382455175664265,
      "grad_norm": 2.154092311859131,
      "learning_rate": 9.554898097691796e-06,
      "loss": 0.0644,
      "step": 155100
    },
    {
      "epoch": 0.25385728219529596,
      "grad_norm": 3.6143958568573,
      "learning_rate": 9.55483220547828e-06,
      "loss": 0.0733,
      "step": 155120
    },
    {
      "epoch": 0.25389001263394934,
      "grad_norm": 1.549094319343567,
      "learning_rate": 9.554766313264764e-06,
      "loss": 0.0538,
      "step": 155140
    },
    {
      "epoch": 0.25392274307260265,
      "grad_norm": 1.9358004331588745,
      "learning_rate": 9.554700421051245e-06,
      "loss": 0.0549,
      "step": 155160
    },
    {
      "epoch": 0.253955473511256,
      "grad_norm": 3.6343932151794434,
      "learning_rate": 9.554634528837729e-06,
      "loss": 0.0639,
      "step": 155180
    },
    {
      "epoch": 0.25398820394990934,
      "grad_norm": 2.3966434001922607,
      "learning_rate": 9.554568636624211e-06,
      "loss": 0.0515,
      "step": 155200
    },
    {
      "epoch": 0.25402093438856266,
      "grad_norm": 1.2089835405349731,
      "learning_rate": 9.554502744410694e-06,
      "loss": 0.0438,
      "step": 155220
    },
    {
      "epoch": 0.25405366482721603,
      "grad_norm": 0.6285596489906311,
      "learning_rate": 9.554436852197176e-06,
      "loss": 0.0647,
      "step": 155240
    },
    {
      "epoch": 0.25408639526586935,
      "grad_norm": 3.192760944366455,
      "learning_rate": 9.55437095998366e-06,
      "loss": 0.0564,
      "step": 155260
    },
    {
      "epoch": 0.2541191257045227,
      "grad_norm": 1.6650705337524414,
      "learning_rate": 9.554305067770142e-06,
      "loss": 0.0672,
      "step": 155280
    },
    {
      "epoch": 0.25415185614317604,
      "grad_norm": 3.5063066482543945,
      "learning_rate": 9.554239175556625e-06,
      "loss": 0.0504,
      "step": 155300
    },
    {
      "epoch": 0.25418458658182935,
      "grad_norm": 4.847886085510254,
      "learning_rate": 9.554173283343107e-06,
      "loss": 0.0531,
      "step": 155320
    },
    {
      "epoch": 0.2542173170204827,
      "grad_norm": 2.023552179336548,
      "learning_rate": 9.554107391129591e-06,
      "loss": 0.0596,
      "step": 155340
    },
    {
      "epoch": 0.25425004745913604,
      "grad_norm": 1.0097604990005493,
      "learning_rate": 9.554041498916073e-06,
      "loss": 0.0491,
      "step": 155360
    },
    {
      "epoch": 0.2542827778977894,
      "grad_norm": 4.84196138381958,
      "learning_rate": 9.553975606702556e-06,
      "loss": 0.0455,
      "step": 155380
    },
    {
      "epoch": 0.25431550833644273,
      "grad_norm": 0.9599193930625916,
      "learning_rate": 9.55390971448904e-06,
      "loss": 0.0509,
      "step": 155400
    },
    {
      "epoch": 0.25434823877509605,
      "grad_norm": 2.4407808780670166,
      "learning_rate": 9.553843822275522e-06,
      "loss": 0.0589,
      "step": 155420
    },
    {
      "epoch": 0.2543809692137494,
      "grad_norm": 3.7696726322174072,
      "learning_rate": 9.553777930062005e-06,
      "loss": 0.0527,
      "step": 155440
    },
    {
      "epoch": 0.25441369965240274,
      "grad_norm": 5.682666778564453,
      "learning_rate": 9.553712037848489e-06,
      "loss": 0.0705,
      "step": 155460
    },
    {
      "epoch": 0.25444643009105605,
      "grad_norm": 1.7114710807800293,
      "learning_rate": 9.553646145634971e-06,
      "loss": 0.0491,
      "step": 155480
    },
    {
      "epoch": 0.2544791605297094,
      "grad_norm": 1.2973912954330444,
      "learning_rate": 9.553580253421455e-06,
      "loss": 0.0435,
      "step": 155500
    },
    {
      "epoch": 0.25451189096836274,
      "grad_norm": 2.3177781105041504,
      "learning_rate": 9.553514361207938e-06,
      "loss": 0.0572,
      "step": 155520
    },
    {
      "epoch": 0.2545446214070161,
      "grad_norm": 1.9679865837097168,
      "learning_rate": 9.55344846899442e-06,
      "loss": 0.0549,
      "step": 155540
    },
    {
      "epoch": 0.25457735184566943,
      "grad_norm": 2.6645402908325195,
      "learning_rate": 9.553382576780904e-06,
      "loss": 0.047,
      "step": 155560
    },
    {
      "epoch": 0.25461008228432275,
      "grad_norm": 0.9053478837013245,
      "learning_rate": 9.553316684567385e-06,
      "loss": 0.0635,
      "step": 155580
    },
    {
      "epoch": 0.2546428127229761,
      "grad_norm": 2.4763541221618652,
      "learning_rate": 9.553250792353869e-06,
      "loss": 0.065,
      "step": 155600
    },
    {
      "epoch": 0.25467554316162944,
      "grad_norm": 2.106264591217041,
      "learning_rate": 9.553184900140351e-06,
      "loss": 0.0603,
      "step": 155620
    },
    {
      "epoch": 0.2547082736002828,
      "grad_norm": 4.92207670211792,
      "learning_rate": 9.553119007926835e-06,
      "loss": 0.0649,
      "step": 155640
    },
    {
      "epoch": 0.2547410040389361,
      "grad_norm": 19.009262084960938,
      "learning_rate": 9.553053115713316e-06,
      "loss": 0.0538,
      "step": 155660
    },
    {
      "epoch": 0.25477373447758944,
      "grad_norm": 2.802795171737671,
      "learning_rate": 9.5529872234998e-06,
      "loss": 0.0545,
      "step": 155680
    },
    {
      "epoch": 0.2548064649162428,
      "grad_norm": 1.1243960857391357,
      "learning_rate": 9.552921331286282e-06,
      "loss": 0.0478,
      "step": 155700
    },
    {
      "epoch": 0.25483919535489613,
      "grad_norm": 0.863877534866333,
      "learning_rate": 9.552855439072766e-06,
      "loss": 0.0507,
      "step": 155720
    },
    {
      "epoch": 0.2548719257935495,
      "grad_norm": 2.6920289993286133,
      "learning_rate": 9.552789546859247e-06,
      "loss": 0.0631,
      "step": 155740
    },
    {
      "epoch": 0.2549046562322028,
      "grad_norm": 1.2755370140075684,
      "learning_rate": 9.552723654645731e-06,
      "loss": 0.0485,
      "step": 155760
    },
    {
      "epoch": 0.25493738667085614,
      "grad_norm": 1.5895003080368042,
      "learning_rate": 9.552657762432213e-06,
      "loss": 0.0512,
      "step": 155780
    },
    {
      "epoch": 0.2549701171095095,
      "grad_norm": 1.9882744550704956,
      "learning_rate": 9.552591870218696e-06,
      "loss": 0.0564,
      "step": 155800
    },
    {
      "epoch": 0.2550028475481628,
      "grad_norm": 0.8557612895965576,
      "learning_rate": 9.55252597800518e-06,
      "loss": 0.0734,
      "step": 155820
    },
    {
      "epoch": 0.2550355779868162,
      "grad_norm": 2.387432098388672,
      "learning_rate": 9.552460085791662e-06,
      "loss": 0.056,
      "step": 155840
    },
    {
      "epoch": 0.2550683084254695,
      "grad_norm": 2.1007399559020996,
      "learning_rate": 9.552394193578146e-06,
      "loss": 0.0713,
      "step": 155860
    },
    {
      "epoch": 0.25510103886412283,
      "grad_norm": 2.9263947010040283,
      "learning_rate": 9.552328301364629e-06,
      "loss": 0.0555,
      "step": 155880
    },
    {
      "epoch": 0.2551337693027762,
      "grad_norm": 3.4491095542907715,
      "learning_rate": 9.552262409151111e-06,
      "loss": 0.0516,
      "step": 155900
    },
    {
      "epoch": 0.2551664997414295,
      "grad_norm": 2.133384943008423,
      "learning_rate": 9.552196516937595e-06,
      "loss": 0.0586,
      "step": 155920
    },
    {
      "epoch": 0.2551992301800829,
      "grad_norm": 0.6072445511817932,
      "learning_rate": 9.552130624724078e-06,
      "loss": 0.0638,
      "step": 155940
    },
    {
      "epoch": 0.2552319606187362,
      "grad_norm": 1.7804754972457886,
      "learning_rate": 9.55206473251056e-06,
      "loss": 0.0627,
      "step": 155960
    },
    {
      "epoch": 0.2552646910573895,
      "grad_norm": 1.5071414709091187,
      "learning_rate": 9.551998840297044e-06,
      "loss": 0.0508,
      "step": 155980
    },
    {
      "epoch": 0.2552974214960429,
      "grad_norm": 1.272344708442688,
      "learning_rate": 9.551932948083526e-06,
      "loss": 0.0433,
      "step": 156000
    },
    {
      "epoch": 0.2553301519346962,
      "grad_norm": 4.490813255310059,
      "learning_rate": 9.551867055870009e-06,
      "loss": 0.0605,
      "step": 156020
    },
    {
      "epoch": 0.2553628823733496,
      "grad_norm": 1.4136203527450562,
      "learning_rate": 9.551801163656491e-06,
      "loss": 0.0617,
      "step": 156040
    },
    {
      "epoch": 0.2553956128120029,
      "grad_norm": 1.8903446197509766,
      "learning_rate": 9.551735271442975e-06,
      "loss": 0.0561,
      "step": 156060
    },
    {
      "epoch": 0.2554283432506562,
      "grad_norm": 7.505102157592773,
      "learning_rate": 9.551669379229457e-06,
      "loss": 0.0638,
      "step": 156080
    },
    {
      "epoch": 0.2554610736893096,
      "grad_norm": 4.724079132080078,
      "learning_rate": 9.55160348701594e-06,
      "loss": 0.0512,
      "step": 156100
    },
    {
      "epoch": 0.2554938041279629,
      "grad_norm": 2.1024723052978516,
      "learning_rate": 9.551537594802422e-06,
      "loss": 0.0612,
      "step": 156120
    },
    {
      "epoch": 0.2555265345666163,
      "grad_norm": 3.2209980487823486,
      "learning_rate": 9.551471702588906e-06,
      "loss": 0.0615,
      "step": 156140
    },
    {
      "epoch": 0.2555592650052696,
      "grad_norm": 1.8155759572982788,
      "learning_rate": 9.55140581037539e-06,
      "loss": 0.0427,
      "step": 156160
    },
    {
      "epoch": 0.2555919954439229,
      "grad_norm": 2.29390025138855,
      "learning_rate": 9.551339918161871e-06,
      "loss": 0.0518,
      "step": 156180
    },
    {
      "epoch": 0.2556247258825763,
      "grad_norm": 1.664345383644104,
      "learning_rate": 9.551274025948355e-06,
      "loss": 0.0682,
      "step": 156200
    },
    {
      "epoch": 0.2556574563212296,
      "grad_norm": 0.31755372881889343,
      "learning_rate": 9.551208133734837e-06,
      "loss": 0.0575,
      "step": 156220
    },
    {
      "epoch": 0.255690186759883,
      "grad_norm": 3.854461193084717,
      "learning_rate": 9.55114224152132e-06,
      "loss": 0.0597,
      "step": 156240
    },
    {
      "epoch": 0.2557229171985363,
      "grad_norm": 2.4538373947143555,
      "learning_rate": 9.551076349307804e-06,
      "loss": 0.0579,
      "step": 156260
    },
    {
      "epoch": 0.2557556476371896,
      "grad_norm": 2.5398168563842773,
      "learning_rate": 9.551010457094286e-06,
      "loss": 0.0667,
      "step": 156280
    },
    {
      "epoch": 0.255788378075843,
      "grad_norm": 1.0711814165115356,
      "learning_rate": 9.55094456488077e-06,
      "loss": 0.0476,
      "step": 156300
    },
    {
      "epoch": 0.2558211085144963,
      "grad_norm": 9.289960861206055,
      "learning_rate": 9.550878672667253e-06,
      "loss": 0.062,
      "step": 156320
    },
    {
      "epoch": 0.25585383895314967,
      "grad_norm": 0.8866823315620422,
      "learning_rate": 9.550812780453735e-06,
      "loss": 0.0513,
      "step": 156340
    },
    {
      "epoch": 0.255886569391803,
      "grad_norm": 1.064504861831665,
      "learning_rate": 9.550746888240218e-06,
      "loss": 0.0537,
      "step": 156360
    },
    {
      "epoch": 0.2559192998304563,
      "grad_norm": 9.028231620788574,
      "learning_rate": 9.5506809960267e-06,
      "loss": 0.0674,
      "step": 156380
    },
    {
      "epoch": 0.2559520302691097,
      "grad_norm": 0.9152376055717468,
      "learning_rate": 9.550615103813184e-06,
      "loss": 0.0628,
      "step": 156400
    },
    {
      "epoch": 0.255984760707763,
      "grad_norm": 1.5080456733703613,
      "learning_rate": 9.550549211599666e-06,
      "loss": 0.058,
      "step": 156420
    },
    {
      "epoch": 0.25601749114641636,
      "grad_norm": 2.2021918296813965,
      "learning_rate": 9.55048331938615e-06,
      "loss": 0.0613,
      "step": 156440
    },
    {
      "epoch": 0.2560502215850697,
      "grad_norm": 1.0990917682647705,
      "learning_rate": 9.550417427172631e-06,
      "loss": 0.0564,
      "step": 156460
    },
    {
      "epoch": 0.256082952023723,
      "grad_norm": 1.9964519739151,
      "learning_rate": 9.550351534959115e-06,
      "loss": 0.0559,
      "step": 156480
    },
    {
      "epoch": 0.25611568246237637,
      "grad_norm": 2.5733020305633545,
      "learning_rate": 9.550285642745597e-06,
      "loss": 0.0636,
      "step": 156500
    },
    {
      "epoch": 0.2561484129010297,
      "grad_norm": 2.54636287689209,
      "learning_rate": 9.55021975053208e-06,
      "loss": 0.0644,
      "step": 156520
    },
    {
      "epoch": 0.25618114333968306,
      "grad_norm": 0.998181939125061,
      "learning_rate": 9.550153858318564e-06,
      "loss": 0.0554,
      "step": 156540
    },
    {
      "epoch": 0.2562138737783364,
      "grad_norm": 3.572115659713745,
      "learning_rate": 9.550087966105046e-06,
      "loss": 0.0545,
      "step": 156560
    },
    {
      "epoch": 0.2562466042169897,
      "grad_norm": 3.13018798828125,
      "learning_rate": 9.55002207389153e-06,
      "loss": 0.053,
      "step": 156580
    },
    {
      "epoch": 0.25627933465564307,
      "grad_norm": 2.8918583393096924,
      "learning_rate": 9.549956181678011e-06,
      "loss": 0.05,
      "step": 156600
    },
    {
      "epoch": 0.2563120650942964,
      "grad_norm": 1.7203449010849,
      "learning_rate": 9.549890289464495e-06,
      "loss": 0.0506,
      "step": 156620
    },
    {
      "epoch": 0.25634479553294975,
      "grad_norm": 1.2989411354064941,
      "learning_rate": 9.549824397250977e-06,
      "loss": 0.0522,
      "step": 156640
    },
    {
      "epoch": 0.25637752597160307,
      "grad_norm": 1.2107311487197876,
      "learning_rate": 9.54975850503746e-06,
      "loss": 0.071,
      "step": 156660
    },
    {
      "epoch": 0.2564102564102564,
      "grad_norm": 1.8505765199661255,
      "learning_rate": 9.549692612823944e-06,
      "loss": 0.0751,
      "step": 156680
    },
    {
      "epoch": 0.25644298684890976,
      "grad_norm": 5.740604877471924,
      "learning_rate": 9.549626720610426e-06,
      "loss": 0.0615,
      "step": 156700
    },
    {
      "epoch": 0.2564757172875631,
      "grad_norm": 2.556586742401123,
      "learning_rate": 9.54956082839691e-06,
      "loss": 0.0474,
      "step": 156720
    },
    {
      "epoch": 0.25650844772621645,
      "grad_norm": 3.728196859359741,
      "learning_rate": 9.549494936183393e-06,
      "loss": 0.0626,
      "step": 156740
    },
    {
      "epoch": 0.25654117816486977,
      "grad_norm": 4.09365701675415,
      "learning_rate": 9.549429043969875e-06,
      "loss": 0.0521,
      "step": 156760
    },
    {
      "epoch": 0.2565739086035231,
      "grad_norm": 9.067207336425781,
      "learning_rate": 9.549363151756358e-06,
      "loss": 0.0494,
      "step": 156780
    },
    {
      "epoch": 0.25660663904217645,
      "grad_norm": 1.0394500494003296,
      "learning_rate": 9.54929725954284e-06,
      "loss": 0.0583,
      "step": 156800
    },
    {
      "epoch": 0.25663936948082977,
      "grad_norm": 1.8806359767913818,
      "learning_rate": 9.549231367329324e-06,
      "loss": 0.0597,
      "step": 156820
    },
    {
      "epoch": 0.25667209991948314,
      "grad_norm": 2.6042981147766113,
      "learning_rate": 9.549165475115806e-06,
      "loss": 0.0486,
      "step": 156840
    },
    {
      "epoch": 0.25670483035813646,
      "grad_norm": 10.548815727233887,
      "learning_rate": 9.54909958290229e-06,
      "loss": 0.0538,
      "step": 156860
    },
    {
      "epoch": 0.2567375607967898,
      "grad_norm": 2.165447235107422,
      "learning_rate": 9.549033690688773e-06,
      "loss": 0.0693,
      "step": 156880
    },
    {
      "epoch": 0.25677029123544315,
      "grad_norm": 1.9668248891830444,
      "learning_rate": 9.548967798475255e-06,
      "loss": 0.0635,
      "step": 156900
    },
    {
      "epoch": 0.25680302167409647,
      "grad_norm": 7.287317276000977,
      "learning_rate": 9.548901906261738e-06,
      "loss": 0.053,
      "step": 156920
    },
    {
      "epoch": 0.25683575211274984,
      "grad_norm": 3.967944860458374,
      "learning_rate": 9.54883601404822e-06,
      "loss": 0.0605,
      "step": 156940
    },
    {
      "epoch": 0.25686848255140315,
      "grad_norm": 1.1439355611801147,
      "learning_rate": 9.548770121834704e-06,
      "loss": 0.0675,
      "step": 156960
    },
    {
      "epoch": 0.25690121299005647,
      "grad_norm": 1.6037566661834717,
      "learning_rate": 9.548704229621186e-06,
      "loss": 0.0576,
      "step": 156980
    },
    {
      "epoch": 0.25693394342870984,
      "grad_norm": 1.0999993085861206,
      "learning_rate": 9.54863833740767e-06,
      "loss": 0.0513,
      "step": 157000
    },
    {
      "epoch": 0.25696667386736316,
      "grad_norm": 2.6323211193084717,
      "learning_rate": 9.548572445194151e-06,
      "loss": 0.0539,
      "step": 157020
    },
    {
      "epoch": 0.25699940430601653,
      "grad_norm": 2.529552698135376,
      "learning_rate": 9.548506552980635e-06,
      "loss": 0.0772,
      "step": 157040
    },
    {
      "epoch": 0.25703213474466985,
      "grad_norm": 0.6073001027107239,
      "learning_rate": 9.548440660767118e-06,
      "loss": 0.0593,
      "step": 157060
    },
    {
      "epoch": 0.25706486518332317,
      "grad_norm": 1.5583734512329102,
      "learning_rate": 9.5483747685536e-06,
      "loss": 0.0557,
      "step": 157080
    },
    {
      "epoch": 0.25709759562197654,
      "grad_norm": 1.7740015983581543,
      "learning_rate": 9.548308876340084e-06,
      "loss": 0.0468,
      "step": 157100
    },
    {
      "epoch": 0.25713032606062985,
      "grad_norm": 1.5382925271987915,
      "learning_rate": 9.548242984126567e-06,
      "loss": 0.052,
      "step": 157120
    },
    {
      "epoch": 0.2571630564992832,
      "grad_norm": 1.627392053604126,
      "learning_rate": 9.54817709191305e-06,
      "loss": 0.0549,
      "step": 157140
    },
    {
      "epoch": 0.25719578693793654,
      "grad_norm": 1.4951279163360596,
      "learning_rate": 9.548111199699533e-06,
      "loss": 0.0737,
      "step": 157160
    },
    {
      "epoch": 0.25722851737658986,
      "grad_norm": 0.7297223210334778,
      "learning_rate": 9.548045307486015e-06,
      "loss": 0.0437,
      "step": 157180
    },
    {
      "epoch": 0.25726124781524323,
      "grad_norm": 10.26019287109375,
      "learning_rate": 9.547979415272498e-06,
      "loss": 0.0574,
      "step": 157200
    },
    {
      "epoch": 0.25729397825389655,
      "grad_norm": 2.457047462463379,
      "learning_rate": 9.547913523058982e-06,
      "loss": 0.0514,
      "step": 157220
    },
    {
      "epoch": 0.2573267086925499,
      "grad_norm": 1.8717517852783203,
      "learning_rate": 9.547847630845464e-06,
      "loss": 0.0492,
      "step": 157240
    },
    {
      "epoch": 0.25735943913120324,
      "grad_norm": 1.9772789478302002,
      "learning_rate": 9.547781738631947e-06,
      "loss": 0.0492,
      "step": 157260
    },
    {
      "epoch": 0.25739216956985655,
      "grad_norm": 2.4913477897644043,
      "learning_rate": 9.54771584641843e-06,
      "loss": 0.0486,
      "step": 157280
    },
    {
      "epoch": 0.2574249000085099,
      "grad_norm": 1.7928212881088257,
      "learning_rate": 9.547649954204913e-06,
      "loss": 0.0747,
      "step": 157300
    },
    {
      "epoch": 0.25745763044716324,
      "grad_norm": 2.7255611419677734,
      "learning_rate": 9.547584061991395e-06,
      "loss": 0.0687,
      "step": 157320
    },
    {
      "epoch": 0.2574903608858166,
      "grad_norm": 1.0351780652999878,
      "learning_rate": 9.547518169777878e-06,
      "loss": 0.0472,
      "step": 157340
    },
    {
      "epoch": 0.25752309132446993,
      "grad_norm": 0.6953312754631042,
      "learning_rate": 9.54745227756436e-06,
      "loss": 0.0441,
      "step": 157360
    },
    {
      "epoch": 0.25755582176312325,
      "grad_norm": 1.6794856786727905,
      "learning_rate": 9.547386385350844e-06,
      "loss": 0.044,
      "step": 157380
    },
    {
      "epoch": 0.2575885522017766,
      "grad_norm": 2.3879265785217285,
      "learning_rate": 9.547320493137326e-06,
      "loss": 0.0495,
      "step": 157400
    },
    {
      "epoch": 0.25762128264042994,
      "grad_norm": 1.0771726369857788,
      "learning_rate": 9.54725460092381e-06,
      "loss": 0.0371,
      "step": 157420
    },
    {
      "epoch": 0.2576540130790833,
      "grad_norm": 2.5523860454559326,
      "learning_rate": 9.547188708710291e-06,
      "loss": 0.0523,
      "step": 157440
    },
    {
      "epoch": 0.2576867435177366,
      "grad_norm": 3.149095058441162,
      "learning_rate": 9.547122816496775e-06,
      "loss": 0.0691,
      "step": 157460
    },
    {
      "epoch": 0.25771947395638994,
      "grad_norm": 5.306729793548584,
      "learning_rate": 9.547056924283258e-06,
      "loss": 0.0846,
      "step": 157480
    },
    {
      "epoch": 0.2577522043950433,
      "grad_norm": 1.7202528715133667,
      "learning_rate": 9.546991032069742e-06,
      "loss": 0.0513,
      "step": 157500
    },
    {
      "epoch": 0.25778493483369663,
      "grad_norm": 1.4540988206863403,
      "learning_rate": 9.546925139856224e-06,
      "loss": 0.0573,
      "step": 157520
    },
    {
      "epoch": 0.25781766527235,
      "grad_norm": 2.3986611366271973,
      "learning_rate": 9.546859247642708e-06,
      "loss": 0.0586,
      "step": 157540
    },
    {
      "epoch": 0.2578503957110033,
      "grad_norm": 5.898473739624023,
      "learning_rate": 9.54679335542919e-06,
      "loss": 0.0555,
      "step": 157560
    },
    {
      "epoch": 0.25788312614965664,
      "grad_norm": 2.595400094985962,
      "learning_rate": 9.546727463215673e-06,
      "loss": 0.0361,
      "step": 157580
    },
    {
      "epoch": 0.25791585658831,
      "grad_norm": 3.307314157485962,
      "learning_rate": 9.546661571002157e-06,
      "loss": 0.048,
      "step": 157600
    },
    {
      "epoch": 0.2579485870269633,
      "grad_norm": 1.2679336071014404,
      "learning_rate": 9.546595678788638e-06,
      "loss": 0.0604,
      "step": 157620
    },
    {
      "epoch": 0.2579813174656167,
      "grad_norm": 3.2801589965820312,
      "learning_rate": 9.546529786575122e-06,
      "loss": 0.0505,
      "step": 157640
    },
    {
      "epoch": 0.25801404790427,
      "grad_norm": 0.9537969827651978,
      "learning_rate": 9.546463894361604e-06,
      "loss": 0.0607,
      "step": 157660
    },
    {
      "epoch": 0.25804677834292333,
      "grad_norm": 0.8429065942764282,
      "learning_rate": 9.546398002148088e-06,
      "loss": 0.0505,
      "step": 157680
    },
    {
      "epoch": 0.2580795087815767,
      "grad_norm": 0.978485643863678,
      "learning_rate": 9.54633210993457e-06,
      "loss": 0.0583,
      "step": 157700
    },
    {
      "epoch": 0.25811223922023,
      "grad_norm": 3.312548875808716,
      "learning_rate": 9.546266217721053e-06,
      "loss": 0.063,
      "step": 157720
    },
    {
      "epoch": 0.2581449696588834,
      "grad_norm": 2.9048588275909424,
      "learning_rate": 9.546200325507535e-06,
      "loss": 0.0667,
      "step": 157740
    },
    {
      "epoch": 0.2581777000975367,
      "grad_norm": 2.383505344390869,
      "learning_rate": 9.546134433294019e-06,
      "loss": 0.0565,
      "step": 157760
    },
    {
      "epoch": 0.25821043053619,
      "grad_norm": 4.279575824737549,
      "learning_rate": 9.5460685410805e-06,
      "loss": 0.0704,
      "step": 157780
    },
    {
      "epoch": 0.2582431609748434,
      "grad_norm": 6.240627765655518,
      "learning_rate": 9.546002648866984e-06,
      "loss": 0.0563,
      "step": 157800
    },
    {
      "epoch": 0.2582758914134967,
      "grad_norm": 1.3785731792449951,
      "learning_rate": 9.545936756653466e-06,
      "loss": 0.0485,
      "step": 157820
    },
    {
      "epoch": 0.2583086218521501,
      "grad_norm": 1.5873548984527588,
      "learning_rate": 9.54587086443995e-06,
      "loss": 0.0375,
      "step": 157840
    },
    {
      "epoch": 0.2583413522908034,
      "grad_norm": 2.699875831604004,
      "learning_rate": 9.545804972226433e-06,
      "loss": 0.0591,
      "step": 157860
    },
    {
      "epoch": 0.2583740827294567,
      "grad_norm": 1.7784253358840942,
      "learning_rate": 9.545739080012915e-06,
      "loss": 0.0569,
      "step": 157880
    },
    {
      "epoch": 0.2584068131681101,
      "grad_norm": 1.8629528284072876,
      "learning_rate": 9.545673187799399e-06,
      "loss": 0.0597,
      "step": 157900
    },
    {
      "epoch": 0.2584395436067634,
      "grad_norm": 0.6256996393203735,
      "learning_rate": 9.545607295585882e-06,
      "loss": 0.0682,
      "step": 157920
    },
    {
      "epoch": 0.2584722740454168,
      "grad_norm": 2.0696966648101807,
      "learning_rate": 9.545541403372364e-06,
      "loss": 0.0529,
      "step": 157940
    },
    {
      "epoch": 0.2585050044840701,
      "grad_norm": 1.2830181121826172,
      "learning_rate": 9.545475511158848e-06,
      "loss": 0.0664,
      "step": 157960
    },
    {
      "epoch": 0.2585377349227234,
      "grad_norm": 0.985479474067688,
      "learning_rate": 9.545409618945331e-06,
      "loss": 0.0535,
      "step": 157980
    },
    {
      "epoch": 0.2585704653613768,
      "grad_norm": 3.722376823425293,
      "learning_rate": 9.545343726731813e-06,
      "loss": 0.0557,
      "step": 158000
    },
    {
      "epoch": 0.2586031958000301,
      "grad_norm": 2.4493508338928223,
      "learning_rate": 9.545277834518297e-06,
      "loss": 0.0491,
      "step": 158020
    },
    {
      "epoch": 0.2586359262386835,
      "grad_norm": 2.080482006072998,
      "learning_rate": 9.545211942304779e-06,
      "loss": 0.0542,
      "step": 158040
    },
    {
      "epoch": 0.2586686566773368,
      "grad_norm": 0.8983863592147827,
      "learning_rate": 9.545146050091262e-06,
      "loss": 0.0479,
      "step": 158060
    },
    {
      "epoch": 0.2587013871159901,
      "grad_norm": 3.3662478923797607,
      "learning_rate": 9.545080157877744e-06,
      "loss": 0.0675,
      "step": 158080
    },
    {
      "epoch": 0.2587341175546435,
      "grad_norm": 2.1303884983062744,
      "learning_rate": 9.545014265664228e-06,
      "loss": 0.0544,
      "step": 158100
    },
    {
      "epoch": 0.2587668479932968,
      "grad_norm": 2.1564762592315674,
      "learning_rate": 9.54494837345071e-06,
      "loss": 0.0602,
      "step": 158120
    },
    {
      "epoch": 0.2587995784319502,
      "grad_norm": 2.5189127922058105,
      "learning_rate": 9.544882481237193e-06,
      "loss": 0.0583,
      "step": 158140
    },
    {
      "epoch": 0.2588323088706035,
      "grad_norm": 3.1088144779205322,
      "learning_rate": 9.544816589023675e-06,
      "loss": 0.0516,
      "step": 158160
    },
    {
      "epoch": 0.2588650393092568,
      "grad_norm": 1.9957555532455444,
      "learning_rate": 9.544750696810159e-06,
      "loss": 0.0537,
      "step": 158180
    },
    {
      "epoch": 0.2588977697479102,
      "grad_norm": 0.4966432452201843,
      "learning_rate": 9.54468480459664e-06,
      "loss": 0.0534,
      "step": 158200
    },
    {
      "epoch": 0.2589305001865635,
      "grad_norm": 2.2552576065063477,
      "learning_rate": 9.544618912383124e-06,
      "loss": 0.0447,
      "step": 158220
    },
    {
      "epoch": 0.2589632306252168,
      "grad_norm": 3.4176900386810303,
      "learning_rate": 9.544553020169608e-06,
      "loss": 0.0508,
      "step": 158240
    },
    {
      "epoch": 0.2589959610638702,
      "grad_norm": 2.1608176231384277,
      "learning_rate": 9.54448712795609e-06,
      "loss": 0.0618,
      "step": 158260
    },
    {
      "epoch": 0.2590286915025235,
      "grad_norm": 2.1743905544281006,
      "learning_rate": 9.544421235742573e-06,
      "loss": 0.0514,
      "step": 158280
    },
    {
      "epoch": 0.2590614219411769,
      "grad_norm": 4.84951114654541,
      "learning_rate": 9.544355343529057e-06,
      "loss": 0.0496,
      "step": 158300
    },
    {
      "epoch": 0.2590941523798302,
      "grad_norm": 6.840982913970947,
      "learning_rate": 9.544289451315539e-06,
      "loss": 0.0656,
      "step": 158320
    },
    {
      "epoch": 0.2591268828184835,
      "grad_norm": 1.2535605430603027,
      "learning_rate": 9.544223559102022e-06,
      "loss": 0.0548,
      "step": 158340
    },
    {
      "epoch": 0.2591596132571369,
      "grad_norm": 4.514525890350342,
      "learning_rate": 9.544157666888506e-06,
      "loss": 0.08,
      "step": 158360
    },
    {
      "epoch": 0.2591923436957902,
      "grad_norm": 3.1885390281677246,
      "learning_rate": 9.544091774674988e-06,
      "loss": 0.0532,
      "step": 158380
    },
    {
      "epoch": 0.25922507413444357,
      "grad_norm": 1.6374537944793701,
      "learning_rate": 9.544025882461471e-06,
      "loss": 0.0633,
      "step": 158400
    },
    {
      "epoch": 0.2592578045730969,
      "grad_norm": 2.527316093444824,
      "learning_rate": 9.543959990247953e-06,
      "loss": 0.0642,
      "step": 158420
    },
    {
      "epoch": 0.2592905350117502,
      "grad_norm": 1.6552255153656006,
      "learning_rate": 9.543894098034437e-06,
      "loss": 0.0537,
      "step": 158440
    },
    {
      "epoch": 0.2593232654504036,
      "grad_norm": 1.1804759502410889,
      "learning_rate": 9.543828205820919e-06,
      "loss": 0.0644,
      "step": 158460
    },
    {
      "epoch": 0.2593559958890569,
      "grad_norm": 1.01759672164917,
      "learning_rate": 9.543762313607402e-06,
      "loss": 0.0555,
      "step": 158480
    },
    {
      "epoch": 0.25938872632771026,
      "grad_norm": 1.9375927448272705,
      "learning_rate": 9.543696421393884e-06,
      "loss": 0.0539,
      "step": 158500
    },
    {
      "epoch": 0.2594214567663636,
      "grad_norm": 3.495469570159912,
      "learning_rate": 9.543630529180368e-06,
      "loss": 0.0636,
      "step": 158520
    },
    {
      "epoch": 0.2594541872050169,
      "grad_norm": 2.060168504714966,
      "learning_rate": 9.54356463696685e-06,
      "loss": 0.0522,
      "step": 158540
    },
    {
      "epoch": 0.25948691764367027,
      "grad_norm": 1.1276439428329468,
      "learning_rate": 9.543498744753333e-06,
      "loss": 0.0476,
      "step": 158560
    },
    {
      "epoch": 0.2595196480823236,
      "grad_norm": 3.8586995601654053,
      "learning_rate": 9.543432852539815e-06,
      "loss": 0.0679,
      "step": 158580
    },
    {
      "epoch": 0.25955237852097696,
      "grad_norm": 2.2437663078308105,
      "learning_rate": 9.543366960326299e-06,
      "loss": 0.0737,
      "step": 158600
    },
    {
      "epoch": 0.2595851089596303,
      "grad_norm": 4.257828712463379,
      "learning_rate": 9.54330106811278e-06,
      "loss": 0.0591,
      "step": 158620
    },
    {
      "epoch": 0.2596178393982836,
      "grad_norm": 0.8738788366317749,
      "learning_rate": 9.543235175899264e-06,
      "loss": 0.051,
      "step": 158640
    },
    {
      "epoch": 0.25965056983693696,
      "grad_norm": 1.6332274675369263,
      "learning_rate": 9.543169283685748e-06,
      "loss": 0.0521,
      "step": 158660
    },
    {
      "epoch": 0.2596833002755903,
      "grad_norm": 3.31394100189209,
      "learning_rate": 9.54310339147223e-06,
      "loss": 0.0529,
      "step": 158680
    },
    {
      "epoch": 0.25971603071424365,
      "grad_norm": 1.9157319068908691,
      "learning_rate": 9.543037499258713e-06,
      "loss": 0.0624,
      "step": 158700
    },
    {
      "epoch": 0.25974876115289697,
      "grad_norm": 2.9910192489624023,
      "learning_rate": 9.542971607045197e-06,
      "loss": 0.0536,
      "step": 158720
    },
    {
      "epoch": 0.2597814915915503,
      "grad_norm": 8.20278549194336,
      "learning_rate": 9.542905714831679e-06,
      "loss": 0.0408,
      "step": 158740
    },
    {
      "epoch": 0.25981422203020366,
      "grad_norm": 1.6939045190811157,
      "learning_rate": 9.542839822618162e-06,
      "loss": 0.0707,
      "step": 158760
    },
    {
      "epoch": 0.259846952468857,
      "grad_norm": 4.5916547775268555,
      "learning_rate": 9.542773930404646e-06,
      "loss": 0.0694,
      "step": 158780
    },
    {
      "epoch": 0.25987968290751035,
      "grad_norm": 1.125922679901123,
      "learning_rate": 9.542708038191128e-06,
      "loss": 0.0482,
      "step": 158800
    },
    {
      "epoch": 0.25991241334616366,
      "grad_norm": 4.584476470947266,
      "learning_rate": 9.542642145977611e-06,
      "loss": 0.061,
      "step": 158820
    },
    {
      "epoch": 0.259945143784817,
      "grad_norm": 2.0731730461120605,
      "learning_rate": 9.542576253764093e-06,
      "loss": 0.0632,
      "step": 158840
    },
    {
      "epoch": 0.25997787422347035,
      "grad_norm": 4.054751873016357,
      "learning_rate": 9.542510361550577e-06,
      "loss": 0.0555,
      "step": 158860
    },
    {
      "epoch": 0.26001060466212367,
      "grad_norm": 3.8904385566711426,
      "learning_rate": 9.542444469337059e-06,
      "loss": 0.0735,
      "step": 158880
    },
    {
      "epoch": 0.26004333510077704,
      "grad_norm": 1.945800542831421,
      "learning_rate": 9.542378577123542e-06,
      "loss": 0.045,
      "step": 158900
    },
    {
      "epoch": 0.26007606553943036,
      "grad_norm": 1.8431869745254517,
      "learning_rate": 9.542312684910024e-06,
      "loss": 0.0612,
      "step": 158920
    },
    {
      "epoch": 0.2601087959780837,
      "grad_norm": 3.7564969062805176,
      "learning_rate": 9.542246792696508e-06,
      "loss": 0.053,
      "step": 158940
    },
    {
      "epoch": 0.26014152641673705,
      "grad_norm": 1.9329363107681274,
      "learning_rate": 9.54218090048299e-06,
      "loss": 0.0592,
      "step": 158960
    },
    {
      "epoch": 0.26017425685539036,
      "grad_norm": 1.8002219200134277,
      "learning_rate": 9.542115008269473e-06,
      "loss": 0.054,
      "step": 158980
    },
    {
      "epoch": 0.26020698729404373,
      "grad_norm": 1.3622113466262817,
      "learning_rate": 9.542049116055957e-06,
      "loss": 0.0559,
      "step": 159000
    },
    {
      "epoch": 0.26023971773269705,
      "grad_norm": 2.538038969039917,
      "learning_rate": 9.541983223842439e-06,
      "loss": 0.0518,
      "step": 159020
    },
    {
      "epoch": 0.26027244817135037,
      "grad_norm": 1.59329092502594,
      "learning_rate": 9.541917331628922e-06,
      "loss": 0.0535,
      "step": 159040
    },
    {
      "epoch": 0.26030517861000374,
      "grad_norm": 2.433520793914795,
      "learning_rate": 9.541851439415404e-06,
      "loss": 0.0588,
      "step": 159060
    },
    {
      "epoch": 0.26033790904865706,
      "grad_norm": 2.0104620456695557,
      "learning_rate": 9.541785547201888e-06,
      "loss": 0.0567,
      "step": 159080
    },
    {
      "epoch": 0.26037063948731043,
      "grad_norm": 3.1495449542999268,
      "learning_rate": 9.541719654988371e-06,
      "loss": 0.0649,
      "step": 159100
    },
    {
      "epoch": 0.26040336992596375,
      "grad_norm": 1.12459397315979,
      "learning_rate": 9.541653762774853e-06,
      "loss": 0.0361,
      "step": 159120
    },
    {
      "epoch": 0.26043610036461706,
      "grad_norm": 4.012996196746826,
      "learning_rate": 9.541587870561337e-06,
      "loss": 0.0705,
      "step": 159140
    },
    {
      "epoch": 0.26046883080327043,
      "grad_norm": 3.6608545780181885,
      "learning_rate": 9.54152197834782e-06,
      "loss": 0.0668,
      "step": 159160
    },
    {
      "epoch": 0.26050156124192375,
      "grad_norm": 2.4973676204681396,
      "learning_rate": 9.541456086134302e-06,
      "loss": 0.0581,
      "step": 159180
    },
    {
      "epoch": 0.2605342916805771,
      "grad_norm": 2.697878122329712,
      "learning_rate": 9.541390193920786e-06,
      "loss": 0.0758,
      "step": 159200
    },
    {
      "epoch": 0.26056702211923044,
      "grad_norm": 2.674420118331909,
      "learning_rate": 9.541324301707268e-06,
      "loss": 0.063,
      "step": 159220
    },
    {
      "epoch": 0.26059975255788376,
      "grad_norm": 2.3319342136383057,
      "learning_rate": 9.541258409493751e-06,
      "loss": 0.0665,
      "step": 159240
    },
    {
      "epoch": 0.26063248299653713,
      "grad_norm": 2.148730993270874,
      "learning_rate": 9.541192517280233e-06,
      "loss": 0.0506,
      "step": 159260
    },
    {
      "epoch": 0.26066521343519045,
      "grad_norm": 1.3046518564224243,
      "learning_rate": 9.541126625066717e-06,
      "loss": 0.0552,
      "step": 159280
    },
    {
      "epoch": 0.2606979438738438,
      "grad_norm": 2.156144618988037,
      "learning_rate": 9.541060732853199e-06,
      "loss": 0.0611,
      "step": 159300
    },
    {
      "epoch": 0.26073067431249713,
      "grad_norm": 5.531362056732178,
      "learning_rate": 9.540994840639682e-06,
      "loss": 0.0475,
      "step": 159320
    },
    {
      "epoch": 0.26076340475115045,
      "grad_norm": 1.344061255455017,
      "learning_rate": 9.540928948426166e-06,
      "loss": 0.0593,
      "step": 159340
    },
    {
      "epoch": 0.2607961351898038,
      "grad_norm": 1.6476576328277588,
      "learning_rate": 9.540863056212648e-06,
      "loss": 0.0473,
      "step": 159360
    },
    {
      "epoch": 0.26082886562845714,
      "grad_norm": 3.4165241718292236,
      "learning_rate": 9.540797163999131e-06,
      "loss": 0.0561,
      "step": 159380
    },
    {
      "epoch": 0.2608615960671105,
      "grad_norm": 1.0015121698379517,
      "learning_rate": 9.540731271785613e-06,
      "loss": 0.0461,
      "step": 159400
    },
    {
      "epoch": 0.26089432650576383,
      "grad_norm": 1.088539958000183,
      "learning_rate": 9.540665379572097e-06,
      "loss": 0.0618,
      "step": 159420
    },
    {
      "epoch": 0.26092705694441715,
      "grad_norm": 2.151871919631958,
      "learning_rate": 9.540599487358579e-06,
      "loss": 0.0469,
      "step": 159440
    },
    {
      "epoch": 0.2609597873830705,
      "grad_norm": 3.8205058574676514,
      "learning_rate": 9.540533595145062e-06,
      "loss": 0.0476,
      "step": 159460
    },
    {
      "epoch": 0.26099251782172384,
      "grad_norm": 3.560318946838379,
      "learning_rate": 9.540467702931544e-06,
      "loss": 0.0518,
      "step": 159480
    },
    {
      "epoch": 0.2610252482603772,
      "grad_norm": 0.7922639846801758,
      "learning_rate": 9.540401810718028e-06,
      "loss": 0.0686,
      "step": 159500
    },
    {
      "epoch": 0.2610579786990305,
      "grad_norm": 0.9477920532226562,
      "learning_rate": 9.540335918504511e-06,
      "loss": 0.0562,
      "step": 159520
    },
    {
      "epoch": 0.26109070913768384,
      "grad_norm": 3.6206789016723633,
      "learning_rate": 9.540270026290993e-06,
      "loss": 0.05,
      "step": 159540
    },
    {
      "epoch": 0.2611234395763372,
      "grad_norm": 2.8739702701568604,
      "learning_rate": 9.540204134077477e-06,
      "loss": 0.0832,
      "step": 159560
    },
    {
      "epoch": 0.26115617001499053,
      "grad_norm": 0.7478557825088501,
      "learning_rate": 9.54013824186396e-06,
      "loss": 0.0529,
      "step": 159580
    },
    {
      "epoch": 0.2611889004536439,
      "grad_norm": 2.8661434650421143,
      "learning_rate": 9.540072349650442e-06,
      "loss": 0.0642,
      "step": 159600
    },
    {
      "epoch": 0.2612216308922972,
      "grad_norm": 2.2704081535339355,
      "learning_rate": 9.540006457436926e-06,
      "loss": 0.0562,
      "step": 159620
    },
    {
      "epoch": 0.26125436133095054,
      "grad_norm": 1.8918638229370117,
      "learning_rate": 9.539940565223408e-06,
      "loss": 0.0505,
      "step": 159640
    },
    {
      "epoch": 0.2612870917696039,
      "grad_norm": 1.266278624534607,
      "learning_rate": 9.539874673009891e-06,
      "loss": 0.0497,
      "step": 159660
    },
    {
      "epoch": 0.2613198222082572,
      "grad_norm": 2.036177158355713,
      "learning_rate": 9.539808780796375e-06,
      "loss": 0.0674,
      "step": 159680
    },
    {
      "epoch": 0.2613525526469106,
      "grad_norm": 1.9531981945037842,
      "learning_rate": 9.539742888582857e-06,
      "loss": 0.0636,
      "step": 159700
    },
    {
      "epoch": 0.2613852830855639,
      "grad_norm": 1.7506386041641235,
      "learning_rate": 9.53967699636934e-06,
      "loss": 0.0335,
      "step": 159720
    },
    {
      "epoch": 0.26141801352421723,
      "grad_norm": 0.5168870091438293,
      "learning_rate": 9.539611104155822e-06,
      "loss": 0.0498,
      "step": 159740
    },
    {
      "epoch": 0.2614507439628706,
      "grad_norm": 1.2938729524612427,
      "learning_rate": 9.539545211942306e-06,
      "loss": 0.0645,
      "step": 159760
    },
    {
      "epoch": 0.2614834744015239,
      "grad_norm": 0.896843433380127,
      "learning_rate": 9.539479319728788e-06,
      "loss": 0.0577,
      "step": 159780
    },
    {
      "epoch": 0.2615162048401773,
      "grad_norm": 1.775289535522461,
      "learning_rate": 9.539413427515272e-06,
      "loss": 0.067,
      "step": 159800
    },
    {
      "epoch": 0.2615489352788306,
      "grad_norm": 1.585209608078003,
      "learning_rate": 9.539347535301753e-06,
      "loss": 0.0479,
      "step": 159820
    },
    {
      "epoch": 0.2615816657174839,
      "grad_norm": 3.0397555828094482,
      "learning_rate": 9.539281643088237e-06,
      "loss": 0.0504,
      "step": 159840
    },
    {
      "epoch": 0.2616143961561373,
      "grad_norm": 0.48114120960235596,
      "learning_rate": 9.539215750874719e-06,
      "loss": 0.0589,
      "step": 159860
    },
    {
      "epoch": 0.2616471265947906,
      "grad_norm": 3.510741949081421,
      "learning_rate": 9.539149858661202e-06,
      "loss": 0.0602,
      "step": 159880
    },
    {
      "epoch": 0.261679857033444,
      "grad_norm": 3.718463659286499,
      "learning_rate": 9.539083966447686e-06,
      "loss": 0.0495,
      "step": 159900
    },
    {
      "epoch": 0.2617125874720973,
      "grad_norm": 1.6635828018188477,
      "learning_rate": 9.539018074234168e-06,
      "loss": 0.0539,
      "step": 159920
    },
    {
      "epoch": 0.2617453179107506,
      "grad_norm": 1.4848192930221558,
      "learning_rate": 9.538952182020652e-06,
      "loss": 0.0606,
      "step": 159940
    },
    {
      "epoch": 0.261778048349404,
      "grad_norm": 1.0804932117462158,
      "learning_rate": 9.538886289807135e-06,
      "loss": 0.0667,
      "step": 159960
    },
    {
      "epoch": 0.2618107787880573,
      "grad_norm": 2.9486989974975586,
      "learning_rate": 9.538820397593617e-06,
      "loss": 0.0604,
      "step": 159980
    },
    {
      "epoch": 0.2618435092267107,
      "grad_norm": 1.9377468824386597,
      "learning_rate": 9.5387545053801e-06,
      "loss": 0.0691,
      "step": 160000
    },
    {
      "epoch": 0.261876239665364,
      "grad_norm": 0.8600391745567322,
      "learning_rate": 9.538688613166583e-06,
      "loss": 0.0482,
      "step": 160020
    },
    {
      "epoch": 0.2619089701040173,
      "grad_norm": 2.2455475330352783,
      "learning_rate": 9.538622720953066e-06,
      "loss": 0.0568,
      "step": 160040
    },
    {
      "epoch": 0.2619417005426707,
      "grad_norm": 0.5901936292648315,
      "learning_rate": 9.53855682873955e-06,
      "loss": 0.0631,
      "step": 160060
    },
    {
      "epoch": 0.261974430981324,
      "grad_norm": 1.7935751676559448,
      "learning_rate": 9.538490936526032e-06,
      "loss": 0.0624,
      "step": 160080
    },
    {
      "epoch": 0.2620071614199774,
      "grad_norm": 1.4754995107650757,
      "learning_rate": 9.538425044312515e-06,
      "loss": 0.0518,
      "step": 160100
    },
    {
      "epoch": 0.2620398918586307,
      "grad_norm": 1.998016595840454,
      "learning_rate": 9.538359152098997e-06,
      "loss": 0.0531,
      "step": 160120
    },
    {
      "epoch": 0.262072622297284,
      "grad_norm": 1.7476074695587158,
      "learning_rate": 9.53829325988548e-06,
      "loss": 0.0506,
      "step": 160140
    },
    {
      "epoch": 0.2621053527359374,
      "grad_norm": 2.4352469444274902,
      "learning_rate": 9.538227367671963e-06,
      "loss": 0.0396,
      "step": 160160
    },
    {
      "epoch": 0.2621380831745907,
      "grad_norm": 3.4055418968200684,
      "learning_rate": 9.538161475458446e-06,
      "loss": 0.0633,
      "step": 160180
    },
    {
      "epoch": 0.26217081361324407,
      "grad_norm": 3.5471231937408447,
      "learning_rate": 9.538095583244928e-06,
      "loss": 0.0495,
      "step": 160200
    },
    {
      "epoch": 0.2622035440518974,
      "grad_norm": 3.129171371459961,
      "learning_rate": 9.538029691031412e-06,
      "loss": 0.0478,
      "step": 160220
    },
    {
      "epoch": 0.2622362744905507,
      "grad_norm": 0.7410193085670471,
      "learning_rate": 9.537963798817893e-06,
      "loss": 0.0563,
      "step": 160240
    },
    {
      "epoch": 0.2622690049292041,
      "grad_norm": 1.9025659561157227,
      "learning_rate": 9.537897906604377e-06,
      "loss": 0.0557,
      "step": 160260
    },
    {
      "epoch": 0.2623017353678574,
      "grad_norm": 1.4371776580810547,
      "learning_rate": 9.53783201439086e-06,
      "loss": 0.061,
      "step": 160280
    },
    {
      "epoch": 0.26233446580651076,
      "grad_norm": 1.9377866983413696,
      "learning_rate": 9.537766122177343e-06,
      "loss": 0.0543,
      "step": 160300
    },
    {
      "epoch": 0.2623671962451641,
      "grad_norm": 1.3596606254577637,
      "learning_rate": 9.537700229963826e-06,
      "loss": 0.0524,
      "step": 160320
    },
    {
      "epoch": 0.2623999266838174,
      "grad_norm": 1.771777868270874,
      "learning_rate": 9.53763433775031e-06,
      "loss": 0.054,
      "step": 160340
    },
    {
      "epoch": 0.26243265712247077,
      "grad_norm": 1.0766874551773071,
      "learning_rate": 9.537568445536792e-06,
      "loss": 0.0541,
      "step": 160360
    },
    {
      "epoch": 0.2624653875611241,
      "grad_norm": 1.069408655166626,
      "learning_rate": 9.537502553323275e-06,
      "loss": 0.0559,
      "step": 160380
    },
    {
      "epoch": 0.26249811799977746,
      "grad_norm": 1.0067968368530273,
      "learning_rate": 9.537436661109759e-06,
      "loss": 0.0483,
      "step": 160400
    },
    {
      "epoch": 0.2625308484384308,
      "grad_norm": 0.49267736077308655,
      "learning_rate": 9.53737076889624e-06,
      "loss": 0.0667,
      "step": 160420
    },
    {
      "epoch": 0.2625635788770841,
      "grad_norm": 1.008604884147644,
      "learning_rate": 9.537304876682724e-06,
      "loss": 0.059,
      "step": 160440
    },
    {
      "epoch": 0.26259630931573746,
      "grad_norm": 3.470512628555298,
      "learning_rate": 9.537238984469206e-06,
      "loss": 0.0557,
      "step": 160460
    },
    {
      "epoch": 0.2626290397543908,
      "grad_norm": 2.3513200283050537,
      "learning_rate": 9.53717309225569e-06,
      "loss": 0.0497,
      "step": 160480
    },
    {
      "epoch": 0.26266177019304415,
      "grad_norm": 1.555405616760254,
      "learning_rate": 9.537107200042172e-06,
      "loss": 0.0644,
      "step": 160500
    },
    {
      "epoch": 0.26269450063169747,
      "grad_norm": 2.6658098697662354,
      "learning_rate": 9.537041307828655e-06,
      "loss": 0.0564,
      "step": 160520
    },
    {
      "epoch": 0.2627272310703508,
      "grad_norm": 2.461517333984375,
      "learning_rate": 9.536975415615137e-06,
      "loss": 0.052,
      "step": 160540
    },
    {
      "epoch": 0.26275996150900416,
      "grad_norm": 2.672668695449829,
      "learning_rate": 9.53690952340162e-06,
      "loss": 0.05,
      "step": 160560
    },
    {
      "epoch": 0.2627926919476575,
      "grad_norm": 2.5110585689544678,
      "learning_rate": 9.536843631188103e-06,
      "loss": 0.064,
      "step": 160580
    },
    {
      "epoch": 0.26282542238631085,
      "grad_norm": 0.6994592547416687,
      "learning_rate": 9.536777738974586e-06,
      "loss": 0.0422,
      "step": 160600
    },
    {
      "epoch": 0.26285815282496416,
      "grad_norm": 2.9297235012054443,
      "learning_rate": 9.536711846761068e-06,
      "loss": 0.0709,
      "step": 160620
    },
    {
      "epoch": 0.2628908832636175,
      "grad_norm": 0.8155796527862549,
      "learning_rate": 9.536645954547552e-06,
      "loss": 0.0549,
      "step": 160640
    },
    {
      "epoch": 0.26292361370227085,
      "grad_norm": 1.365517497062683,
      "learning_rate": 9.536580062334034e-06,
      "loss": 0.0758,
      "step": 160660
    },
    {
      "epoch": 0.26295634414092417,
      "grad_norm": 1.5774599313735962,
      "learning_rate": 9.536514170120517e-06,
      "loss": 0.0527,
      "step": 160680
    },
    {
      "epoch": 0.26298907457957754,
      "grad_norm": 1.308756709098816,
      "learning_rate": 9.536448277907e-06,
      "loss": 0.0495,
      "step": 160700
    },
    {
      "epoch": 0.26302180501823086,
      "grad_norm": 2.4010720252990723,
      "learning_rate": 9.536382385693483e-06,
      "loss": 0.0491,
      "step": 160720
    },
    {
      "epoch": 0.2630545354568842,
      "grad_norm": 2.8833508491516113,
      "learning_rate": 9.536316493479966e-06,
      "loss": 0.0605,
      "step": 160740
    },
    {
      "epoch": 0.26308726589553755,
      "grad_norm": 1.4281896352767944,
      "learning_rate": 9.53625060126645e-06,
      "loss": 0.0753,
      "step": 160760
    },
    {
      "epoch": 0.26311999633419086,
      "grad_norm": 1.446312665939331,
      "learning_rate": 9.536184709052932e-06,
      "loss": 0.0561,
      "step": 160780
    },
    {
      "epoch": 0.26315272677284424,
      "grad_norm": 2.211094856262207,
      "learning_rate": 9.536118816839415e-06,
      "loss": 0.0511,
      "step": 160800
    },
    {
      "epoch": 0.26318545721149755,
      "grad_norm": 2.435791254043579,
      "learning_rate": 9.536052924625899e-06,
      "loss": 0.0557,
      "step": 160820
    },
    {
      "epoch": 0.26321818765015087,
      "grad_norm": 1.6212544441223145,
      "learning_rate": 9.53598703241238e-06,
      "loss": 0.0648,
      "step": 160840
    },
    {
      "epoch": 0.26325091808880424,
      "grad_norm": 3.897672176361084,
      "learning_rate": 9.535921140198864e-06,
      "loss": 0.059,
      "step": 160860
    },
    {
      "epoch": 0.26328364852745756,
      "grad_norm": 4.222901344299316,
      "learning_rate": 9.535855247985346e-06,
      "loss": 0.0442,
      "step": 160880
    },
    {
      "epoch": 0.26331637896611093,
      "grad_norm": 3.487264394760132,
      "learning_rate": 9.53578935577183e-06,
      "loss": 0.0647,
      "step": 160900
    },
    {
      "epoch": 0.26334910940476425,
      "grad_norm": 2.540086030960083,
      "learning_rate": 9.535723463558312e-06,
      "loss": 0.0563,
      "step": 160920
    },
    {
      "epoch": 0.26338183984341756,
      "grad_norm": 2.242074728012085,
      "learning_rate": 9.535657571344795e-06,
      "loss": 0.0463,
      "step": 160940
    },
    {
      "epoch": 0.26341457028207094,
      "grad_norm": 1.5480198860168457,
      "learning_rate": 9.535591679131277e-06,
      "loss": 0.0404,
      "step": 160960
    },
    {
      "epoch": 0.26344730072072425,
      "grad_norm": 4.6740288734436035,
      "learning_rate": 9.53552578691776e-06,
      "loss": 0.0667,
      "step": 160980
    },
    {
      "epoch": 0.2634800311593776,
      "grad_norm": 1.10960853099823,
      "learning_rate": 9.535459894704243e-06,
      "loss": 0.0645,
      "step": 161000
    },
    {
      "epoch": 0.26351276159803094,
      "grad_norm": 1.7069884538650513,
      "learning_rate": 9.535394002490726e-06,
      "loss": 0.0456,
      "step": 161020
    },
    {
      "epoch": 0.26354549203668426,
      "grad_norm": 3.485720157623291,
      "learning_rate": 9.535328110277208e-06,
      "loss": 0.0572,
      "step": 161040
    },
    {
      "epoch": 0.26357822247533763,
      "grad_norm": 1.9903799295425415,
      "learning_rate": 9.535262218063692e-06,
      "loss": 0.0466,
      "step": 161060
    },
    {
      "epoch": 0.26361095291399095,
      "grad_norm": 2.90446138381958,
      "learning_rate": 9.535196325850175e-06,
      "loss": 0.0602,
      "step": 161080
    },
    {
      "epoch": 0.26364368335264426,
      "grad_norm": 1.6043049097061157,
      "learning_rate": 9.535130433636657e-06,
      "loss": 0.0587,
      "step": 161100
    },
    {
      "epoch": 0.26367641379129764,
      "grad_norm": 3.057400941848755,
      "learning_rate": 9.53506454142314e-06,
      "loss": 0.0634,
      "step": 161120
    },
    {
      "epoch": 0.26370914422995095,
      "grad_norm": 5.164841651916504,
      "learning_rate": 9.534998649209624e-06,
      "loss": 0.0487,
      "step": 161140
    },
    {
      "epoch": 0.2637418746686043,
      "grad_norm": 2.5215580463409424,
      "learning_rate": 9.534932756996106e-06,
      "loss": 0.0578,
      "step": 161160
    },
    {
      "epoch": 0.26377460510725764,
      "grad_norm": 5.3214545249938965,
      "learning_rate": 9.53486686478259e-06,
      "loss": 0.0457,
      "step": 161180
    },
    {
      "epoch": 0.26380733554591096,
      "grad_norm": 1.3173201084136963,
      "learning_rate": 9.534800972569073e-06,
      "loss": 0.0416,
      "step": 161200
    },
    {
      "epoch": 0.26384006598456433,
      "grad_norm": 1.6518337726593018,
      "learning_rate": 9.534735080355555e-06,
      "loss": 0.0424,
      "step": 161220
    },
    {
      "epoch": 0.26387279642321765,
      "grad_norm": 9.104190826416016,
      "learning_rate": 9.534669188142039e-06,
      "loss": 0.052,
      "step": 161240
    },
    {
      "epoch": 0.263905526861871,
      "grad_norm": 1.4073560237884521,
      "learning_rate": 9.53460329592852e-06,
      "loss": 0.0625,
      "step": 161260
    },
    {
      "epoch": 0.26393825730052434,
      "grad_norm": 4.575629234313965,
      "learning_rate": 9.534537403715004e-06,
      "loss": 0.0529,
      "step": 161280
    },
    {
      "epoch": 0.26397098773917765,
      "grad_norm": 1.566740870475769,
      "learning_rate": 9.534471511501486e-06,
      "loss": 0.0623,
      "step": 161300
    },
    {
      "epoch": 0.264003718177831,
      "grad_norm": 3.7412033081054688,
      "learning_rate": 9.53440561928797e-06,
      "loss": 0.0591,
      "step": 161320
    },
    {
      "epoch": 0.26403644861648434,
      "grad_norm": 2.342013120651245,
      "learning_rate": 9.534339727074452e-06,
      "loss": 0.0623,
      "step": 161340
    },
    {
      "epoch": 0.2640691790551377,
      "grad_norm": 2.091186761856079,
      "learning_rate": 9.534273834860935e-06,
      "loss": 0.0626,
      "step": 161360
    },
    {
      "epoch": 0.26410190949379103,
      "grad_norm": 1.300087809562683,
      "learning_rate": 9.534207942647417e-06,
      "loss": 0.042,
      "step": 161380
    },
    {
      "epoch": 0.26413463993244435,
      "grad_norm": 1.632625937461853,
      "learning_rate": 9.534142050433901e-06,
      "loss": 0.0549,
      "step": 161400
    },
    {
      "epoch": 0.2641673703710977,
      "grad_norm": 1.8499950170516968,
      "learning_rate": 9.534076158220383e-06,
      "loss": 0.0453,
      "step": 161420
    },
    {
      "epoch": 0.26420010080975104,
      "grad_norm": 1.1950011253356934,
      "learning_rate": 9.534010266006866e-06,
      "loss": 0.0725,
      "step": 161440
    },
    {
      "epoch": 0.2642328312484044,
      "grad_norm": 3.252659797668457,
      "learning_rate": 9.53394437379335e-06,
      "loss": 0.0439,
      "step": 161460
    },
    {
      "epoch": 0.2642655616870577,
      "grad_norm": 1.4591193199157715,
      "learning_rate": 9.533878481579832e-06,
      "loss": 0.0539,
      "step": 161480
    },
    {
      "epoch": 0.26429829212571104,
      "grad_norm": 3.7991907596588135,
      "learning_rate": 9.533812589366315e-06,
      "loss": 0.0596,
      "step": 161500
    },
    {
      "epoch": 0.2643310225643644,
      "grad_norm": 2.0893375873565674,
      "learning_rate": 9.533746697152797e-06,
      "loss": 0.0505,
      "step": 161520
    },
    {
      "epoch": 0.26436375300301773,
      "grad_norm": 2.546656370162964,
      "learning_rate": 9.533680804939281e-06,
      "loss": 0.0571,
      "step": 161540
    },
    {
      "epoch": 0.2643964834416711,
      "grad_norm": 2.933748483657837,
      "learning_rate": 9.533614912725764e-06,
      "loss": 0.042,
      "step": 161560
    },
    {
      "epoch": 0.2644292138803244,
      "grad_norm": 2.590974807739258,
      "learning_rate": 9.533549020512246e-06,
      "loss": 0.0508,
      "step": 161580
    },
    {
      "epoch": 0.26446194431897774,
      "grad_norm": 3.008100748062134,
      "learning_rate": 9.53348312829873e-06,
      "loss": 0.0478,
      "step": 161600
    },
    {
      "epoch": 0.2644946747576311,
      "grad_norm": 3.1564385890960693,
      "learning_rate": 9.533417236085214e-06,
      "loss": 0.0496,
      "step": 161620
    },
    {
      "epoch": 0.2645274051962844,
      "grad_norm": 1.2037864923477173,
      "learning_rate": 9.533351343871695e-06,
      "loss": 0.0298,
      "step": 161640
    },
    {
      "epoch": 0.2645601356349378,
      "grad_norm": 2.099055290222168,
      "learning_rate": 9.533285451658179e-06,
      "loss": 0.066,
      "step": 161660
    },
    {
      "epoch": 0.2645928660735911,
      "grad_norm": 1.876025676727295,
      "learning_rate": 9.533219559444661e-06,
      "loss": 0.0634,
      "step": 161680
    },
    {
      "epoch": 0.26462559651224443,
      "grad_norm": 2.4159433841705322,
      "learning_rate": 9.533153667231145e-06,
      "loss": 0.0615,
      "step": 161700
    },
    {
      "epoch": 0.2646583269508978,
      "grad_norm": 5.2633585929870605,
      "learning_rate": 9.533087775017626e-06,
      "loss": 0.05,
      "step": 161720
    },
    {
      "epoch": 0.2646910573895511,
      "grad_norm": 1.9260085821151733,
      "learning_rate": 9.53302188280411e-06,
      "loss": 0.0617,
      "step": 161740
    },
    {
      "epoch": 0.2647237878282045,
      "grad_norm": 1.468223214149475,
      "learning_rate": 9.532955990590592e-06,
      "loss": 0.0447,
      "step": 161760
    },
    {
      "epoch": 0.2647565182668578,
      "grad_norm": 1.9043270349502563,
      "learning_rate": 9.532890098377075e-06,
      "loss": 0.0649,
      "step": 161780
    },
    {
      "epoch": 0.2647892487055111,
      "grad_norm": 1.0735735893249512,
      "learning_rate": 9.532824206163559e-06,
      "loss": 0.0529,
      "step": 161800
    },
    {
      "epoch": 0.2648219791441645,
      "grad_norm": 1.4976673126220703,
      "learning_rate": 9.532758313950041e-06,
      "loss": 0.0618,
      "step": 161820
    },
    {
      "epoch": 0.2648547095828178,
      "grad_norm": 3.7988314628601074,
      "learning_rate": 9.532692421736525e-06,
      "loss": 0.0611,
      "step": 161840
    },
    {
      "epoch": 0.2648874400214712,
      "grad_norm": 1.208587408065796,
      "learning_rate": 9.532626529523006e-06,
      "loss": 0.0579,
      "step": 161860
    },
    {
      "epoch": 0.2649201704601245,
      "grad_norm": 1.8955442905426025,
      "learning_rate": 9.53256063730949e-06,
      "loss": 0.065,
      "step": 161880
    },
    {
      "epoch": 0.2649529008987778,
      "grad_norm": 0.46050789952278137,
      "learning_rate": 9.532494745095972e-06,
      "loss": 0.044,
      "step": 161900
    },
    {
      "epoch": 0.2649856313374312,
      "grad_norm": 2.771545648574829,
      "learning_rate": 9.532428852882455e-06,
      "loss": 0.065,
      "step": 161920
    },
    {
      "epoch": 0.2650183617760845,
      "grad_norm": 1.750075101852417,
      "learning_rate": 9.532362960668939e-06,
      "loss": 0.0555,
      "step": 161940
    },
    {
      "epoch": 0.2650510922147379,
      "grad_norm": 0.5405501127243042,
      "learning_rate": 9.532297068455421e-06,
      "loss": 0.0418,
      "step": 161960
    },
    {
      "epoch": 0.2650838226533912,
      "grad_norm": 3.556281089782715,
      "learning_rate": 9.532231176241905e-06,
      "loss": 0.0496,
      "step": 161980
    },
    {
      "epoch": 0.2651165530920445,
      "grad_norm": 1.6231391429901123,
      "learning_rate": 9.532165284028388e-06,
      "loss": 0.056,
      "step": 162000
    },
    {
      "epoch": 0.2651492835306979,
      "grad_norm": 1.9009627103805542,
      "learning_rate": 9.53209939181487e-06,
      "loss": 0.0555,
      "step": 162020
    },
    {
      "epoch": 0.2651820139693512,
      "grad_norm": 1.1769144535064697,
      "learning_rate": 9.532033499601354e-06,
      "loss": 0.059,
      "step": 162040
    },
    {
      "epoch": 0.2652147444080046,
      "grad_norm": 0.7312498688697815,
      "learning_rate": 9.531967607387836e-06,
      "loss": 0.0593,
      "step": 162060
    },
    {
      "epoch": 0.2652474748466579,
      "grad_norm": 0.657779335975647,
      "learning_rate": 9.531901715174319e-06,
      "loss": 0.0576,
      "step": 162080
    },
    {
      "epoch": 0.2652802052853112,
      "grad_norm": 1.120363712310791,
      "learning_rate": 9.531835822960801e-06,
      "loss": 0.0549,
      "step": 162100
    },
    {
      "epoch": 0.2653129357239646,
      "grad_norm": 2.141343355178833,
      "learning_rate": 9.531769930747285e-06,
      "loss": 0.0541,
      "step": 162120
    },
    {
      "epoch": 0.2653456661626179,
      "grad_norm": 1.1320900917053223,
      "learning_rate": 9.531704038533766e-06,
      "loss": 0.0549,
      "step": 162140
    },
    {
      "epoch": 0.26537839660127127,
      "grad_norm": 1.7558320760726929,
      "learning_rate": 9.53163814632025e-06,
      "loss": 0.045,
      "step": 162160
    },
    {
      "epoch": 0.2654111270399246,
      "grad_norm": 1.595332145690918,
      "learning_rate": 9.531572254106734e-06,
      "loss": 0.0714,
      "step": 162180
    },
    {
      "epoch": 0.2654438574785779,
      "grad_norm": 2.7499823570251465,
      "learning_rate": 9.531506361893216e-06,
      "loss": 0.0541,
      "step": 162200
    },
    {
      "epoch": 0.2654765879172313,
      "grad_norm": 2.378926992416382,
      "learning_rate": 9.531440469679699e-06,
      "loss": 0.0421,
      "step": 162220
    },
    {
      "epoch": 0.2655093183558846,
      "grad_norm": 3.3041586875915527,
      "learning_rate": 9.531374577466181e-06,
      "loss": 0.0501,
      "step": 162240
    },
    {
      "epoch": 0.26554204879453797,
      "grad_norm": 19.028242111206055,
      "learning_rate": 9.531308685252665e-06,
      "loss": 0.0539,
      "step": 162260
    },
    {
      "epoch": 0.2655747792331913,
      "grad_norm": 0.181006520986557,
      "learning_rate": 9.531242793039146e-06,
      "loss": 0.0444,
      "step": 162280
    },
    {
      "epoch": 0.2656075096718446,
      "grad_norm": 4.481729030609131,
      "learning_rate": 9.53117690082563e-06,
      "loss": 0.0525,
      "step": 162300
    },
    {
      "epoch": 0.26564024011049797,
      "grad_norm": 3.8206944465637207,
      "learning_rate": 9.531111008612112e-06,
      "loss": 0.0557,
      "step": 162320
    },
    {
      "epoch": 0.2656729705491513,
      "grad_norm": 1.7623165845870972,
      "learning_rate": 9.531045116398596e-06,
      "loss": 0.0458,
      "step": 162340
    },
    {
      "epoch": 0.26570570098780466,
      "grad_norm": 1.865490436553955,
      "learning_rate": 9.530979224185079e-06,
      "loss": 0.0555,
      "step": 162360
    },
    {
      "epoch": 0.265738431426458,
      "grad_norm": 1.9230691194534302,
      "learning_rate": 9.530913331971561e-06,
      "loss": 0.0602,
      "step": 162380
    },
    {
      "epoch": 0.2657711618651113,
      "grad_norm": 1.091062307357788,
      "learning_rate": 9.530847439758045e-06,
      "loss": 0.0515,
      "step": 162400
    },
    {
      "epoch": 0.26580389230376467,
      "grad_norm": 2.745893955230713,
      "learning_rate": 9.530781547544528e-06,
      "loss": 0.0589,
      "step": 162420
    },
    {
      "epoch": 0.265836622742418,
      "grad_norm": 1.1079763174057007,
      "learning_rate": 9.53071565533101e-06,
      "loss": 0.0558,
      "step": 162440
    },
    {
      "epoch": 0.26586935318107136,
      "grad_norm": 3.727334976196289,
      "learning_rate": 9.530649763117494e-06,
      "loss": 0.0724,
      "step": 162460
    },
    {
      "epoch": 0.26590208361972467,
      "grad_norm": 0.6982540488243103,
      "learning_rate": 9.530583870903976e-06,
      "loss": 0.0562,
      "step": 162480
    },
    {
      "epoch": 0.265934814058378,
      "grad_norm": 1.3790338039398193,
      "learning_rate": 9.53051797869046e-06,
      "loss": 0.0649,
      "step": 162500
    },
    {
      "epoch": 0.26596754449703136,
      "grad_norm": 1.7079633474349976,
      "learning_rate": 9.530452086476943e-06,
      "loss": 0.0472,
      "step": 162520
    },
    {
      "epoch": 0.2660002749356847,
      "grad_norm": 2.5090391635894775,
      "learning_rate": 9.530386194263425e-06,
      "loss": 0.0474,
      "step": 162540
    },
    {
      "epoch": 0.26603300537433805,
      "grad_norm": 1.5805338621139526,
      "learning_rate": 9.530320302049908e-06,
      "loss": 0.0459,
      "step": 162560
    },
    {
      "epoch": 0.26606573581299137,
      "grad_norm": 1.948401689529419,
      "learning_rate": 9.53025440983639e-06,
      "loss": 0.0551,
      "step": 162580
    },
    {
      "epoch": 0.2660984662516447,
      "grad_norm": 1.349694848060608,
      "learning_rate": 9.530188517622874e-06,
      "loss": 0.0482,
      "step": 162600
    },
    {
      "epoch": 0.26613119669029806,
      "grad_norm": 1.572435975074768,
      "learning_rate": 9.530122625409356e-06,
      "loss": 0.0542,
      "step": 162620
    },
    {
      "epoch": 0.26616392712895137,
      "grad_norm": 4.555666923522949,
      "learning_rate": 9.53005673319584e-06,
      "loss": 0.0655,
      "step": 162640
    },
    {
      "epoch": 0.26619665756760474,
      "grad_norm": 1.7007603645324707,
      "learning_rate": 9.529990840982321e-06,
      "loss": 0.0507,
      "step": 162660
    },
    {
      "epoch": 0.26622938800625806,
      "grad_norm": 1.7083395719528198,
      "learning_rate": 9.529924948768805e-06,
      "loss": 0.0427,
      "step": 162680
    },
    {
      "epoch": 0.2662621184449114,
      "grad_norm": 1.151524543762207,
      "learning_rate": 9.529859056555287e-06,
      "loss": 0.0527,
      "step": 162700
    },
    {
      "epoch": 0.26629484888356475,
      "grad_norm": 2.115175247192383,
      "learning_rate": 9.52979316434177e-06,
      "loss": 0.0535,
      "step": 162720
    },
    {
      "epoch": 0.26632757932221807,
      "grad_norm": 1.3881852626800537,
      "learning_rate": 9.529727272128254e-06,
      "loss": 0.0558,
      "step": 162740
    },
    {
      "epoch": 0.26636030976087144,
      "grad_norm": 6.573080062866211,
      "learning_rate": 9.529661379914736e-06,
      "loss": 0.0699,
      "step": 162760
    },
    {
      "epoch": 0.26639304019952476,
      "grad_norm": 1.0085957050323486,
      "learning_rate": 9.52959548770122e-06,
      "loss": 0.0751,
      "step": 162780
    },
    {
      "epoch": 0.26642577063817807,
      "grad_norm": 1.854742407798767,
      "learning_rate": 9.529529595487703e-06,
      "loss": 0.0445,
      "step": 162800
    },
    {
      "epoch": 0.26645850107683144,
      "grad_norm": 1.5644782781600952,
      "learning_rate": 9.529463703274185e-06,
      "loss": 0.0531,
      "step": 162820
    },
    {
      "epoch": 0.26649123151548476,
      "grad_norm": 1.1115537881851196,
      "learning_rate": 9.529397811060668e-06,
      "loss": 0.0605,
      "step": 162840
    },
    {
      "epoch": 0.26652396195413813,
      "grad_norm": 1.6265555620193481,
      "learning_rate": 9.529331918847152e-06,
      "loss": 0.0753,
      "step": 162860
    },
    {
      "epoch": 0.26655669239279145,
      "grad_norm": 0.8218767642974854,
      "learning_rate": 9.529266026633634e-06,
      "loss": 0.0405,
      "step": 162880
    },
    {
      "epoch": 0.26658942283144477,
      "grad_norm": 0.6794264316558838,
      "learning_rate": 9.529200134420117e-06,
      "loss": 0.0506,
      "step": 162900
    },
    {
      "epoch": 0.26662215327009814,
      "grad_norm": 0.0930996760725975,
      "learning_rate": 9.5291342422066e-06,
      "loss": 0.0507,
      "step": 162920
    },
    {
      "epoch": 0.26665488370875146,
      "grad_norm": 1.5926313400268555,
      "learning_rate": 9.529068349993083e-06,
      "loss": 0.0777,
      "step": 162940
    },
    {
      "epoch": 0.26668761414740483,
      "grad_norm": 0.7527074813842773,
      "learning_rate": 9.529002457779565e-06,
      "loss": 0.0515,
      "step": 162960
    },
    {
      "epoch": 0.26672034458605814,
      "grad_norm": 1.5035552978515625,
      "learning_rate": 9.528936565566048e-06,
      "loss": 0.0454,
      "step": 162980
    },
    {
      "epoch": 0.26675307502471146,
      "grad_norm": 1.6028062105178833,
      "learning_rate": 9.52887067335253e-06,
      "loss": 0.0587,
      "step": 163000
    },
    {
      "epoch": 0.26678580546336483,
      "grad_norm": 1.7464016675949097,
      "learning_rate": 9.528804781139014e-06,
      "loss": 0.0579,
      "step": 163020
    },
    {
      "epoch": 0.26681853590201815,
      "grad_norm": 2.943967819213867,
      "learning_rate": 9.528738888925496e-06,
      "loss": 0.0583,
      "step": 163040
    },
    {
      "epoch": 0.2668512663406715,
      "grad_norm": 2.5665252208709717,
      "learning_rate": 9.52867299671198e-06,
      "loss": 0.0406,
      "step": 163060
    },
    {
      "epoch": 0.26688399677932484,
      "grad_norm": 1.2467358112335205,
      "learning_rate": 9.528607104498461e-06,
      "loss": 0.0648,
      "step": 163080
    },
    {
      "epoch": 0.26691672721797816,
      "grad_norm": 1.1688441038131714,
      "learning_rate": 9.528541212284945e-06,
      "loss": 0.0479,
      "step": 163100
    },
    {
      "epoch": 0.26694945765663153,
      "grad_norm": 1.5995792150497437,
      "learning_rate": 9.528475320071428e-06,
      "loss": 0.0746,
      "step": 163120
    },
    {
      "epoch": 0.26698218809528484,
      "grad_norm": 2.231410026550293,
      "learning_rate": 9.52840942785791e-06,
      "loss": 0.072,
      "step": 163140
    },
    {
      "epoch": 0.2670149185339382,
      "grad_norm": 0.9962986707687378,
      "learning_rate": 9.528343535644394e-06,
      "loss": 0.0461,
      "step": 163160
    },
    {
      "epoch": 0.26704764897259153,
      "grad_norm": 0.817957878112793,
      "learning_rate": 9.528277643430877e-06,
      "loss": 0.0485,
      "step": 163180
    },
    {
      "epoch": 0.26708037941124485,
      "grad_norm": 2.0490686893463135,
      "learning_rate": 9.52821175121736e-06,
      "loss": 0.0602,
      "step": 163200
    },
    {
      "epoch": 0.2671131098498982,
      "grad_norm": 2.8803305625915527,
      "learning_rate": 9.528145859003843e-06,
      "loss": 0.0505,
      "step": 163220
    },
    {
      "epoch": 0.26714584028855154,
      "grad_norm": 5.798638820648193,
      "learning_rate": 9.528079966790326e-06,
      "loss": 0.0625,
      "step": 163240
    },
    {
      "epoch": 0.2671785707272049,
      "grad_norm": 0.8278819918632507,
      "learning_rate": 9.528014074576808e-06,
      "loss": 0.0601,
      "step": 163260
    },
    {
      "epoch": 0.26721130116585823,
      "grad_norm": 1.3671634197235107,
      "learning_rate": 9.527948182363292e-06,
      "loss": 0.0682,
      "step": 163280
    },
    {
      "epoch": 0.26724403160451154,
      "grad_norm": 0.7234696745872498,
      "learning_rate": 9.527882290149774e-06,
      "loss": 0.0464,
      "step": 163300
    },
    {
      "epoch": 0.2672767620431649,
      "grad_norm": 2.2785661220550537,
      "learning_rate": 9.527816397936257e-06,
      "loss": 0.0738,
      "step": 163320
    },
    {
      "epoch": 0.26730949248181823,
      "grad_norm": 2.9804656505584717,
      "learning_rate": 9.52775050572274e-06,
      "loss": 0.049,
      "step": 163340
    },
    {
      "epoch": 0.2673422229204716,
      "grad_norm": 1.6956543922424316,
      "learning_rate": 9.527684613509223e-06,
      "loss": 0.0469,
      "step": 163360
    },
    {
      "epoch": 0.2673749533591249,
      "grad_norm": 2.008234739303589,
      "learning_rate": 9.527618721295705e-06,
      "loss": 0.0546,
      "step": 163380
    },
    {
      "epoch": 0.26740768379777824,
      "grad_norm": 0.6794144511222839,
      "learning_rate": 9.527552829082188e-06,
      "loss": 0.0747,
      "step": 163400
    },
    {
      "epoch": 0.2674404142364316,
      "grad_norm": 3.217604398727417,
      "learning_rate": 9.52748693686867e-06,
      "loss": 0.0689,
      "step": 163420
    },
    {
      "epoch": 0.26747314467508493,
      "grad_norm": 7.917726993560791,
      "learning_rate": 9.527421044655154e-06,
      "loss": 0.0562,
      "step": 163440
    },
    {
      "epoch": 0.2675058751137383,
      "grad_norm": 1.7221726179122925,
      "learning_rate": 9.527355152441636e-06,
      "loss": 0.0659,
      "step": 163460
    },
    {
      "epoch": 0.2675386055523916,
      "grad_norm": 6.807310104370117,
      "learning_rate": 9.52728926022812e-06,
      "loss": 0.0516,
      "step": 163480
    },
    {
      "epoch": 0.26757133599104493,
      "grad_norm": 1.3867042064666748,
      "learning_rate": 9.527223368014601e-06,
      "loss": 0.0623,
      "step": 163500
    },
    {
      "epoch": 0.2676040664296983,
      "grad_norm": 2.546105146408081,
      "learning_rate": 9.527157475801085e-06,
      "loss": 0.0637,
      "step": 163520
    },
    {
      "epoch": 0.2676367968683516,
      "grad_norm": 0.30136945843696594,
      "learning_rate": 9.527091583587568e-06,
      "loss": 0.0524,
      "step": 163540
    },
    {
      "epoch": 0.267669527307005,
      "grad_norm": 2.672497272491455,
      "learning_rate": 9.52702569137405e-06,
      "loss": 0.0556,
      "step": 163560
    },
    {
      "epoch": 0.2677022577456583,
      "grad_norm": 2.485142707824707,
      "learning_rate": 9.526959799160534e-06,
      "loss": 0.0566,
      "step": 163580
    },
    {
      "epoch": 0.26773498818431163,
      "grad_norm": 3.0340704917907715,
      "learning_rate": 9.526893906947017e-06,
      "loss": 0.0662,
      "step": 163600
    },
    {
      "epoch": 0.267767718622965,
      "grad_norm": 1.152146339416504,
      "learning_rate": 9.5268280147335e-06,
      "loss": 0.0443,
      "step": 163620
    },
    {
      "epoch": 0.2678004490616183,
      "grad_norm": 1.3622400760650635,
      "learning_rate": 9.526762122519983e-06,
      "loss": 0.0507,
      "step": 163640
    },
    {
      "epoch": 0.2678331795002717,
      "grad_norm": 2.2767999172210693,
      "learning_rate": 9.526696230306467e-06,
      "loss": 0.0406,
      "step": 163660
    },
    {
      "epoch": 0.267865909938925,
      "grad_norm": 1.2907291650772095,
      "learning_rate": 9.526630338092948e-06,
      "loss": 0.0653,
      "step": 163680
    },
    {
      "epoch": 0.2678986403775783,
      "grad_norm": 5.252847671508789,
      "learning_rate": 9.526564445879432e-06,
      "loss": 0.0599,
      "step": 163700
    },
    {
      "epoch": 0.2679313708162317,
      "grad_norm": 8.740495681762695,
      "learning_rate": 9.526498553665914e-06,
      "loss": 0.0536,
      "step": 163720
    },
    {
      "epoch": 0.267964101254885,
      "grad_norm": 4.84222412109375,
      "learning_rate": 9.526432661452398e-06,
      "loss": 0.0581,
      "step": 163740
    },
    {
      "epoch": 0.2679968316935384,
      "grad_norm": 1.4578256607055664,
      "learning_rate": 9.52636676923888e-06,
      "loss": 0.0592,
      "step": 163760
    },
    {
      "epoch": 0.2680295621321917,
      "grad_norm": 1.7432698011398315,
      "learning_rate": 9.526300877025363e-06,
      "loss": 0.0688,
      "step": 163780
    },
    {
      "epoch": 0.268062292570845,
      "grad_norm": 2.9819161891937256,
      "learning_rate": 9.526234984811845e-06,
      "loss": 0.0504,
      "step": 163800
    },
    {
      "epoch": 0.2680950230094984,
      "grad_norm": 0.7353777289390564,
      "learning_rate": 9.526169092598328e-06,
      "loss": 0.0705,
      "step": 163820
    },
    {
      "epoch": 0.2681277534481517,
      "grad_norm": 1.7560926675796509,
      "learning_rate": 9.52610320038481e-06,
      "loss": 0.0462,
      "step": 163840
    },
    {
      "epoch": 0.268160483886805,
      "grad_norm": 3.543325662612915,
      "learning_rate": 9.526037308171294e-06,
      "loss": 0.045,
      "step": 163860
    },
    {
      "epoch": 0.2681932143254584,
      "grad_norm": 6.07576322555542,
      "learning_rate": 9.525971415957776e-06,
      "loss": 0.0652,
      "step": 163880
    },
    {
      "epoch": 0.2682259447641117,
      "grad_norm": 1.8252086639404297,
      "learning_rate": 9.52590552374426e-06,
      "loss": 0.0542,
      "step": 163900
    },
    {
      "epoch": 0.2682586752027651,
      "grad_norm": 2.5661280155181885,
      "learning_rate": 9.525839631530743e-06,
      "loss": 0.0599,
      "step": 163920
    },
    {
      "epoch": 0.2682914056414184,
      "grad_norm": 3.1106221675872803,
      "learning_rate": 9.525773739317225e-06,
      "loss": 0.0513,
      "step": 163940
    },
    {
      "epoch": 0.2683241360800717,
      "grad_norm": 2.1934027671813965,
      "learning_rate": 9.525707847103708e-06,
      "loss": 0.0732,
      "step": 163960
    },
    {
      "epoch": 0.2683568665187251,
      "grad_norm": 3.737623453140259,
      "learning_rate": 9.525641954890192e-06,
      "loss": 0.0542,
      "step": 163980
    },
    {
      "epoch": 0.2683895969573784,
      "grad_norm": 1.6819431781768799,
      "learning_rate": 9.525576062676674e-06,
      "loss": 0.0633,
      "step": 164000
    },
    {
      "epoch": 0.2684223273960318,
      "grad_norm": 1.8888137340545654,
      "learning_rate": 9.525510170463158e-06,
      "loss": 0.0468,
      "step": 164020
    },
    {
      "epoch": 0.2684550578346851,
      "grad_norm": 1.796875,
      "learning_rate": 9.525444278249641e-06,
      "loss": 0.053,
      "step": 164040
    },
    {
      "epoch": 0.2684877882733384,
      "grad_norm": 1.4918103218078613,
      "learning_rate": 9.525378386036123e-06,
      "loss": 0.0458,
      "step": 164060
    },
    {
      "epoch": 0.2685205187119918,
      "grad_norm": 2.4722464084625244,
      "learning_rate": 9.525312493822607e-06,
      "loss": 0.055,
      "step": 164080
    },
    {
      "epoch": 0.2685532491506451,
      "grad_norm": 2.5921919345855713,
      "learning_rate": 9.525246601609089e-06,
      "loss": 0.049,
      "step": 164100
    },
    {
      "epoch": 0.2685859795892985,
      "grad_norm": 0.615174412727356,
      "learning_rate": 9.525180709395572e-06,
      "loss": 0.054,
      "step": 164120
    },
    {
      "epoch": 0.2686187100279518,
      "grad_norm": 0.8479446768760681,
      "learning_rate": 9.525114817182054e-06,
      "loss": 0.0508,
      "step": 164140
    },
    {
      "epoch": 0.2686514404666051,
      "grad_norm": 2.1240081787109375,
      "learning_rate": 9.525048924968538e-06,
      "loss": 0.0621,
      "step": 164160
    },
    {
      "epoch": 0.2686841709052585,
      "grad_norm": 2.9224181175231934,
      "learning_rate": 9.52498303275502e-06,
      "loss": 0.037,
      "step": 164180
    },
    {
      "epoch": 0.2687169013439118,
      "grad_norm": 0.7074977159500122,
      "learning_rate": 9.524917140541503e-06,
      "loss": 0.0554,
      "step": 164200
    },
    {
      "epoch": 0.26874963178256517,
      "grad_norm": 2.031697988510132,
      "learning_rate": 9.524851248327985e-06,
      "loss": 0.0584,
      "step": 164220
    },
    {
      "epoch": 0.2687823622212185,
      "grad_norm": 1.897621750831604,
      "learning_rate": 9.524785356114469e-06,
      "loss": 0.0508,
      "step": 164240
    },
    {
      "epoch": 0.2688150926598718,
      "grad_norm": 3.0017590522766113,
      "learning_rate": 9.524719463900952e-06,
      "loss": 0.0419,
      "step": 164260
    },
    {
      "epoch": 0.2688478230985252,
      "grad_norm": 0.7953546047210693,
      "learning_rate": 9.524653571687434e-06,
      "loss": 0.0553,
      "step": 164280
    },
    {
      "epoch": 0.2688805535371785,
      "grad_norm": 3.6507604122161865,
      "learning_rate": 9.524587679473918e-06,
      "loss": 0.064,
      "step": 164300
    },
    {
      "epoch": 0.26891328397583186,
      "grad_norm": 0.9828212261199951,
      "learning_rate": 9.5245217872604e-06,
      "loss": 0.059,
      "step": 164320
    },
    {
      "epoch": 0.2689460144144852,
      "grad_norm": 2.323702812194824,
      "learning_rate": 9.524455895046883e-06,
      "loss": 0.0486,
      "step": 164340
    },
    {
      "epoch": 0.2689787448531385,
      "grad_norm": 2.4978628158569336,
      "learning_rate": 9.524390002833365e-06,
      "loss": 0.0559,
      "step": 164360
    },
    {
      "epoch": 0.26901147529179187,
      "grad_norm": 1.2884905338287354,
      "learning_rate": 9.524324110619849e-06,
      "loss": 0.0555,
      "step": 164380
    },
    {
      "epoch": 0.2690442057304452,
      "grad_norm": 2.0112807750701904,
      "learning_rate": 9.524258218406332e-06,
      "loss": 0.0438,
      "step": 164400
    },
    {
      "epoch": 0.26907693616909856,
      "grad_norm": 1.6637991666793823,
      "learning_rate": 9.524192326192814e-06,
      "loss": 0.0464,
      "step": 164420
    },
    {
      "epoch": 0.2691096666077519,
      "grad_norm": 1.2924563884735107,
      "learning_rate": 9.524126433979298e-06,
      "loss": 0.0609,
      "step": 164440
    },
    {
      "epoch": 0.2691423970464052,
      "grad_norm": 1.6668943166732788,
      "learning_rate": 9.524060541765781e-06,
      "loss": 0.0562,
      "step": 164460
    },
    {
      "epoch": 0.26917512748505856,
      "grad_norm": 3.495784282684326,
      "learning_rate": 9.523994649552263e-06,
      "loss": 0.0574,
      "step": 164480
    },
    {
      "epoch": 0.2692078579237119,
      "grad_norm": 2.105626106262207,
      "learning_rate": 9.523928757338747e-06,
      "loss": 0.0443,
      "step": 164500
    },
    {
      "epoch": 0.26924058836236525,
      "grad_norm": 2.8290820121765137,
      "learning_rate": 9.523862865125229e-06,
      "loss": 0.0489,
      "step": 164520
    },
    {
      "epoch": 0.26927331880101857,
      "grad_norm": 0.8710267543792725,
      "learning_rate": 9.523796972911712e-06,
      "loss": 0.053,
      "step": 164540
    },
    {
      "epoch": 0.2693060492396719,
      "grad_norm": 2.8831989765167236,
      "learning_rate": 9.523731080698194e-06,
      "loss": 0.0619,
      "step": 164560
    },
    {
      "epoch": 0.26933877967832526,
      "grad_norm": 6.293511390686035,
      "learning_rate": 9.523665188484678e-06,
      "loss": 0.0546,
      "step": 164580
    },
    {
      "epoch": 0.2693715101169786,
      "grad_norm": 3.1992149353027344,
      "learning_rate": 9.52359929627116e-06,
      "loss": 0.0448,
      "step": 164600
    },
    {
      "epoch": 0.26940424055563195,
      "grad_norm": 1.481582522392273,
      "learning_rate": 9.523533404057643e-06,
      "loss": 0.045,
      "step": 164620
    },
    {
      "epoch": 0.26943697099428526,
      "grad_norm": 0.5458257794380188,
      "learning_rate": 9.523467511844127e-06,
      "loss": 0.0404,
      "step": 164640
    },
    {
      "epoch": 0.2694697014329386,
      "grad_norm": 0.6700718998908997,
      "learning_rate": 9.523401619630609e-06,
      "loss": 0.0445,
      "step": 164660
    },
    {
      "epoch": 0.26950243187159195,
      "grad_norm": 8.402071952819824,
      "learning_rate": 9.523335727417092e-06,
      "loss": 0.0571,
      "step": 164680
    },
    {
      "epoch": 0.26953516231024527,
      "grad_norm": 3.1657330989837646,
      "learning_rate": 9.523269835203574e-06,
      "loss": 0.0355,
      "step": 164700
    },
    {
      "epoch": 0.26956789274889864,
      "grad_norm": 0.6417539715766907,
      "learning_rate": 9.523203942990058e-06,
      "loss": 0.0482,
      "step": 164720
    },
    {
      "epoch": 0.26960062318755196,
      "grad_norm": 3.8022139072418213,
      "learning_rate": 9.52313805077654e-06,
      "loss": 0.0621,
      "step": 164740
    },
    {
      "epoch": 0.2696333536262053,
      "grad_norm": 3.342585563659668,
      "learning_rate": 9.523072158563023e-06,
      "loss": 0.0566,
      "step": 164760
    },
    {
      "epoch": 0.26966608406485865,
      "grad_norm": 1.9436078071594238,
      "learning_rate": 9.523006266349507e-06,
      "loss": 0.0448,
      "step": 164780
    },
    {
      "epoch": 0.26969881450351196,
      "grad_norm": 1.1476242542266846,
      "learning_rate": 9.522940374135989e-06,
      "loss": 0.0662,
      "step": 164800
    },
    {
      "epoch": 0.26973154494216534,
      "grad_norm": 4.636447429656982,
      "learning_rate": 9.522874481922472e-06,
      "loss": 0.0558,
      "step": 164820
    },
    {
      "epoch": 0.26976427538081865,
      "grad_norm": 1.6430110931396484,
      "learning_rate": 9.522808589708956e-06,
      "loss": 0.0442,
      "step": 164840
    },
    {
      "epoch": 0.26979700581947197,
      "grad_norm": 0.5923351645469666,
      "learning_rate": 9.522742697495438e-06,
      "loss": 0.0481,
      "step": 164860
    },
    {
      "epoch": 0.26982973625812534,
      "grad_norm": 1.165725827217102,
      "learning_rate": 9.522676805281921e-06,
      "loss": 0.0629,
      "step": 164880
    },
    {
      "epoch": 0.26986246669677866,
      "grad_norm": 1.968703269958496,
      "learning_rate": 9.522610913068403e-06,
      "loss": 0.0439,
      "step": 164900
    },
    {
      "epoch": 0.26989519713543203,
      "grad_norm": 1.529948353767395,
      "learning_rate": 9.522545020854887e-06,
      "loss": 0.0523,
      "step": 164920
    },
    {
      "epoch": 0.26992792757408535,
      "grad_norm": 2.7938358783721924,
      "learning_rate": 9.522479128641369e-06,
      "loss": 0.0452,
      "step": 164940
    },
    {
      "epoch": 0.26996065801273866,
      "grad_norm": 0.45264995098114014,
      "learning_rate": 9.522413236427852e-06,
      "loss": 0.0394,
      "step": 164960
    },
    {
      "epoch": 0.26999338845139204,
      "grad_norm": 1.9671345949172974,
      "learning_rate": 9.522347344214336e-06,
      "loss": 0.0492,
      "step": 164980
    },
    {
      "epoch": 0.27002611889004535,
      "grad_norm": 2.8979732990264893,
      "learning_rate": 9.522281452000818e-06,
      "loss": 0.0587,
      "step": 165000
    },
    {
      "epoch": 0.2700588493286987,
      "grad_norm": 2.7238082885742188,
      "learning_rate": 9.522215559787301e-06,
      "loss": 0.0693,
      "step": 165020
    },
    {
      "epoch": 0.27009157976735204,
      "grad_norm": 3.088984727859497,
      "learning_rate": 9.522149667573783e-06,
      "loss": 0.0434,
      "step": 165040
    },
    {
      "epoch": 0.27012431020600536,
      "grad_norm": 0.3147684633731842,
      "learning_rate": 9.522083775360267e-06,
      "loss": 0.0658,
      "step": 165060
    },
    {
      "epoch": 0.27015704064465873,
      "grad_norm": 0.774106502532959,
      "learning_rate": 9.522017883146749e-06,
      "loss": 0.0442,
      "step": 165080
    },
    {
      "epoch": 0.27018977108331205,
      "grad_norm": 2.9901340007781982,
      "learning_rate": 9.521951990933232e-06,
      "loss": 0.0542,
      "step": 165100
    },
    {
      "epoch": 0.2702225015219654,
      "grad_norm": 1.2538397312164307,
      "learning_rate": 9.521886098719714e-06,
      "loss": 0.0544,
      "step": 165120
    },
    {
      "epoch": 0.27025523196061874,
      "grad_norm": 0.8940694332122803,
      "learning_rate": 9.521820206506198e-06,
      "loss": 0.0721,
      "step": 165140
    },
    {
      "epoch": 0.27028796239927205,
      "grad_norm": 2.803097724914551,
      "learning_rate": 9.52175431429268e-06,
      "loss": 0.0584,
      "step": 165160
    },
    {
      "epoch": 0.2703206928379254,
      "grad_norm": 2.07704758644104,
      "learning_rate": 9.521688422079163e-06,
      "loss": 0.0414,
      "step": 165180
    },
    {
      "epoch": 0.27035342327657874,
      "grad_norm": 0.7739445567131042,
      "learning_rate": 9.521622529865647e-06,
      "loss": 0.0513,
      "step": 165200
    },
    {
      "epoch": 0.2703861537152321,
      "grad_norm": 1.8384507894515991,
      "learning_rate": 9.521556637652129e-06,
      "loss": 0.0528,
      "step": 165220
    },
    {
      "epoch": 0.27041888415388543,
      "grad_norm": 5.994294166564941,
      "learning_rate": 9.521490745438612e-06,
      "loss": 0.0512,
      "step": 165240
    },
    {
      "epoch": 0.27045161459253875,
      "grad_norm": 0.9796431064605713,
      "learning_rate": 9.521424853225096e-06,
      "loss": 0.0486,
      "step": 165260
    },
    {
      "epoch": 0.2704843450311921,
      "grad_norm": 1.438956618309021,
      "learning_rate": 9.521358961011578e-06,
      "loss": 0.0471,
      "step": 165280
    },
    {
      "epoch": 0.27051707546984544,
      "grad_norm": 2.1569314002990723,
      "learning_rate": 9.521293068798061e-06,
      "loss": 0.0603,
      "step": 165300
    },
    {
      "epoch": 0.2705498059084988,
      "grad_norm": 3.1057915687561035,
      "learning_rate": 9.521227176584545e-06,
      "loss": 0.0598,
      "step": 165320
    },
    {
      "epoch": 0.2705825363471521,
      "grad_norm": 1.413896083831787,
      "learning_rate": 9.521161284371027e-06,
      "loss": 0.0532,
      "step": 165340
    },
    {
      "epoch": 0.27061526678580544,
      "grad_norm": 2.0114636421203613,
      "learning_rate": 9.52109539215751e-06,
      "loss": 0.0543,
      "step": 165360
    },
    {
      "epoch": 0.2706479972244588,
      "grad_norm": 2.473567008972168,
      "learning_rate": 9.521029499943992e-06,
      "loss": 0.0538,
      "step": 165380
    },
    {
      "epoch": 0.27068072766311213,
      "grad_norm": 1.0050392150878906,
      "learning_rate": 9.520963607730476e-06,
      "loss": 0.0435,
      "step": 165400
    },
    {
      "epoch": 0.2707134581017655,
      "grad_norm": 0.1612797975540161,
      "learning_rate": 9.520897715516958e-06,
      "loss": 0.0564,
      "step": 165420
    },
    {
      "epoch": 0.2707461885404188,
      "grad_norm": 1.3323558568954468,
      "learning_rate": 9.520831823303441e-06,
      "loss": 0.059,
      "step": 165440
    },
    {
      "epoch": 0.27077891897907214,
      "grad_norm": 2.485219955444336,
      "learning_rate": 9.520765931089923e-06,
      "loss": 0.0647,
      "step": 165460
    },
    {
      "epoch": 0.2708116494177255,
      "grad_norm": 0.21541833877563477,
      "learning_rate": 9.520700038876407e-06,
      "loss": 0.0691,
      "step": 165480
    },
    {
      "epoch": 0.2708443798563788,
      "grad_norm": 0.7289693355560303,
      "learning_rate": 9.520634146662889e-06,
      "loss": 0.0608,
      "step": 165500
    },
    {
      "epoch": 0.2708771102950322,
      "grad_norm": 3.05953311920166,
      "learning_rate": 9.520568254449372e-06,
      "loss": 0.061,
      "step": 165520
    },
    {
      "epoch": 0.2709098407336855,
      "grad_norm": 1.5192227363586426,
      "learning_rate": 9.520502362235854e-06,
      "loss": 0.0558,
      "step": 165540
    },
    {
      "epoch": 0.27094257117233883,
      "grad_norm": 3.310349225997925,
      "learning_rate": 9.520436470022338e-06,
      "loss": 0.0432,
      "step": 165560
    },
    {
      "epoch": 0.2709753016109922,
      "grad_norm": 1.3241486549377441,
      "learning_rate": 9.520370577808821e-06,
      "loss": 0.0607,
      "step": 165580
    },
    {
      "epoch": 0.2710080320496455,
      "grad_norm": 3.0395381450653076,
      "learning_rate": 9.520304685595303e-06,
      "loss": 0.0679,
      "step": 165600
    },
    {
      "epoch": 0.2710407624882989,
      "grad_norm": 2.3833911418914795,
      "learning_rate": 9.520238793381787e-06,
      "loss": 0.0562,
      "step": 165620
    },
    {
      "epoch": 0.2710734929269522,
      "grad_norm": 2.821981430053711,
      "learning_rate": 9.52017290116827e-06,
      "loss": 0.062,
      "step": 165640
    },
    {
      "epoch": 0.2711062233656055,
      "grad_norm": 1.9980299472808838,
      "learning_rate": 9.520107008954752e-06,
      "loss": 0.0561,
      "step": 165660
    },
    {
      "epoch": 0.2711389538042589,
      "grad_norm": 2.0134637355804443,
      "learning_rate": 9.520041116741236e-06,
      "loss": 0.0537,
      "step": 165680
    },
    {
      "epoch": 0.2711716842429122,
      "grad_norm": 1.7220181226730347,
      "learning_rate": 9.51997522452772e-06,
      "loss": 0.052,
      "step": 165700
    },
    {
      "epoch": 0.2712044146815656,
      "grad_norm": 1.4853862524032593,
      "learning_rate": 9.519909332314201e-06,
      "loss": 0.0592,
      "step": 165720
    },
    {
      "epoch": 0.2712371451202189,
      "grad_norm": 2.026857614517212,
      "learning_rate": 9.519843440100685e-06,
      "loss": 0.048,
      "step": 165740
    },
    {
      "epoch": 0.2712698755588722,
      "grad_norm": 0.9216122627258301,
      "learning_rate": 9.519777547887167e-06,
      "loss": 0.0555,
      "step": 165760
    },
    {
      "epoch": 0.2713026059975256,
      "grad_norm": 9.840765953063965,
      "learning_rate": 9.51971165567365e-06,
      "loss": 0.0619,
      "step": 165780
    },
    {
      "epoch": 0.2713353364361789,
      "grad_norm": 1.8164494037628174,
      "learning_rate": 9.519645763460132e-06,
      "loss": 0.0458,
      "step": 165800
    },
    {
      "epoch": 0.2713680668748323,
      "grad_norm": 1.5482679605484009,
      "learning_rate": 9.519579871246616e-06,
      "loss": 0.0456,
      "step": 165820
    },
    {
      "epoch": 0.2714007973134856,
      "grad_norm": 2.5354177951812744,
      "learning_rate": 9.519513979033098e-06,
      "loss": 0.0545,
      "step": 165840
    },
    {
      "epoch": 0.2714335277521389,
      "grad_norm": 2.4471864700317383,
      "learning_rate": 9.519448086819581e-06,
      "loss": 0.038,
      "step": 165860
    },
    {
      "epoch": 0.2714662581907923,
      "grad_norm": 0.6803167462348938,
      "learning_rate": 9.519382194606063e-06,
      "loss": 0.0534,
      "step": 165880
    },
    {
      "epoch": 0.2714989886294456,
      "grad_norm": 2.174297332763672,
      "learning_rate": 9.519316302392547e-06,
      "loss": 0.0631,
      "step": 165900
    },
    {
      "epoch": 0.271531719068099,
      "grad_norm": 2.297605037689209,
      "learning_rate": 9.519250410179029e-06,
      "loss": 0.0417,
      "step": 165920
    },
    {
      "epoch": 0.2715644495067523,
      "grad_norm": 0.9793692827224731,
      "learning_rate": 9.519184517965512e-06,
      "loss": 0.0423,
      "step": 165940
    },
    {
      "epoch": 0.2715971799454056,
      "grad_norm": 1.9826778173446655,
      "learning_rate": 9.519118625751996e-06,
      "loss": 0.0553,
      "step": 165960
    },
    {
      "epoch": 0.271629910384059,
      "grad_norm": 1.1093926429748535,
      "learning_rate": 9.519052733538478e-06,
      "loss": 0.0537,
      "step": 165980
    },
    {
      "epoch": 0.2716626408227123,
      "grad_norm": 1.7387566566467285,
      "learning_rate": 9.518986841324961e-06,
      "loss": 0.058,
      "step": 166000
    },
    {
      "epoch": 0.27169537126136567,
      "grad_norm": 2.2178823947906494,
      "learning_rate": 9.518920949111445e-06,
      "loss": 0.0428,
      "step": 166020
    },
    {
      "epoch": 0.271728101700019,
      "grad_norm": 2.5702733993530273,
      "learning_rate": 9.518855056897927e-06,
      "loss": 0.0489,
      "step": 166040
    },
    {
      "epoch": 0.2717608321386723,
      "grad_norm": 1.555428385734558,
      "learning_rate": 9.51878916468441e-06,
      "loss": 0.0532,
      "step": 166060
    },
    {
      "epoch": 0.2717935625773257,
      "grad_norm": 5.905562877655029,
      "learning_rate": 9.518723272470894e-06,
      "loss": 0.0475,
      "step": 166080
    },
    {
      "epoch": 0.271826293015979,
      "grad_norm": 2.942774534225464,
      "learning_rate": 9.518657380257376e-06,
      "loss": 0.045,
      "step": 166100
    },
    {
      "epoch": 0.27185902345463236,
      "grad_norm": 2.975292682647705,
      "learning_rate": 9.51859148804386e-06,
      "loss": 0.0547,
      "step": 166120
    },
    {
      "epoch": 0.2718917538932857,
      "grad_norm": 2.0737109184265137,
      "learning_rate": 9.518525595830342e-06,
      "loss": 0.0583,
      "step": 166140
    },
    {
      "epoch": 0.271924484331939,
      "grad_norm": 0.6795765161514282,
      "learning_rate": 9.518459703616825e-06,
      "loss": 0.0517,
      "step": 166160
    },
    {
      "epoch": 0.27195721477059237,
      "grad_norm": 1.9151023626327515,
      "learning_rate": 9.518393811403307e-06,
      "loss": 0.0541,
      "step": 166180
    },
    {
      "epoch": 0.2719899452092457,
      "grad_norm": 4.054194927215576,
      "learning_rate": 9.51832791918979e-06,
      "loss": 0.0542,
      "step": 166200
    },
    {
      "epoch": 0.27202267564789906,
      "grad_norm": 1.0865634679794312,
      "learning_rate": 9.518262026976272e-06,
      "loss": 0.0589,
      "step": 166220
    },
    {
      "epoch": 0.2720554060865524,
      "grad_norm": 3.233449697494507,
      "learning_rate": 9.518196134762756e-06,
      "loss": 0.0524,
      "step": 166240
    },
    {
      "epoch": 0.2720881365252057,
      "grad_norm": 1.2790443897247314,
      "learning_rate": 9.518130242549238e-06,
      "loss": 0.0664,
      "step": 166260
    },
    {
      "epoch": 0.27212086696385906,
      "grad_norm": 10.964044570922852,
      "learning_rate": 9.518064350335722e-06,
      "loss": 0.0387,
      "step": 166280
    },
    {
      "epoch": 0.2721535974025124,
      "grad_norm": 2.3275179862976074,
      "learning_rate": 9.517998458122203e-06,
      "loss": 0.0681,
      "step": 166300
    },
    {
      "epoch": 0.27218632784116575,
      "grad_norm": 5.256371021270752,
      "learning_rate": 9.517932565908687e-06,
      "loss": 0.0571,
      "step": 166320
    },
    {
      "epoch": 0.27221905827981907,
      "grad_norm": 1.5935208797454834,
      "learning_rate": 9.517866673695169e-06,
      "loss": 0.0498,
      "step": 166340
    },
    {
      "epoch": 0.2722517887184724,
      "grad_norm": 0.9145708084106445,
      "learning_rate": 9.517800781481653e-06,
      "loss": 0.0594,
      "step": 166360
    },
    {
      "epoch": 0.27228451915712576,
      "grad_norm": 2.104762315750122,
      "learning_rate": 9.517734889268136e-06,
      "loss": 0.0561,
      "step": 166380
    },
    {
      "epoch": 0.2723172495957791,
      "grad_norm": 2.393164873123169,
      "learning_rate": 9.517668997054618e-06,
      "loss": 0.0601,
      "step": 166400
    },
    {
      "epoch": 0.27234998003443245,
      "grad_norm": 1.4808980226516724,
      "learning_rate": 9.517603104841102e-06,
      "loss": 0.0527,
      "step": 166420
    },
    {
      "epoch": 0.27238271047308577,
      "grad_norm": 1.7674638032913208,
      "learning_rate": 9.517537212627585e-06,
      "loss": 0.0529,
      "step": 166440
    },
    {
      "epoch": 0.2724154409117391,
      "grad_norm": 1.3368611335754395,
      "learning_rate": 9.517471320414067e-06,
      "loss": 0.0462,
      "step": 166460
    },
    {
      "epoch": 0.27244817135039245,
      "grad_norm": 2.3524997234344482,
      "learning_rate": 9.51740542820055e-06,
      "loss": 0.0498,
      "step": 166480
    },
    {
      "epoch": 0.27248090178904577,
      "grad_norm": 2.2886340618133545,
      "learning_rate": 9.517339535987034e-06,
      "loss": 0.0575,
      "step": 166500
    },
    {
      "epoch": 0.27251363222769914,
      "grad_norm": 1.7302772998809814,
      "learning_rate": 9.517273643773516e-06,
      "loss": 0.0559,
      "step": 166520
    },
    {
      "epoch": 0.27254636266635246,
      "grad_norm": 0.2799084782600403,
      "learning_rate": 9.51720775156e-06,
      "loss": 0.0781,
      "step": 166540
    },
    {
      "epoch": 0.2725790931050058,
      "grad_norm": 3.7415401935577393,
      "learning_rate": 9.517141859346482e-06,
      "loss": 0.0642,
      "step": 166560
    },
    {
      "epoch": 0.27261182354365915,
      "grad_norm": 0.7362474203109741,
      "learning_rate": 9.517075967132965e-06,
      "loss": 0.0563,
      "step": 166580
    },
    {
      "epoch": 0.27264455398231247,
      "grad_norm": 0.7047861218452454,
      "learning_rate": 9.517010074919447e-06,
      "loss": 0.0691,
      "step": 166600
    },
    {
      "epoch": 0.27267728442096584,
      "grad_norm": 1.686755895614624,
      "learning_rate": 9.51694418270593e-06,
      "loss": 0.0406,
      "step": 166620
    },
    {
      "epoch": 0.27271001485961915,
      "grad_norm": 2.1672210693359375,
      "learning_rate": 9.516878290492413e-06,
      "loss": 0.0484,
      "step": 166640
    },
    {
      "epoch": 0.27274274529827247,
      "grad_norm": 1.5251559019088745,
      "learning_rate": 9.516812398278896e-06,
      "loss": 0.0613,
      "step": 166660
    },
    {
      "epoch": 0.27277547573692584,
      "grad_norm": 0.8104824423789978,
      "learning_rate": 9.516746506065378e-06,
      "loss": 0.0526,
      "step": 166680
    },
    {
      "epoch": 0.27280820617557916,
      "grad_norm": 1.9710097312927246,
      "learning_rate": 9.516680613851862e-06,
      "loss": 0.05,
      "step": 166700
    },
    {
      "epoch": 0.2728409366142325,
      "grad_norm": 1.8466259241104126,
      "learning_rate": 9.516614721638345e-06,
      "loss": 0.0536,
      "step": 166720
    },
    {
      "epoch": 0.27287366705288585,
      "grad_norm": 5.2730512619018555,
      "learning_rate": 9.516548829424827e-06,
      "loss": 0.0444,
      "step": 166740
    },
    {
      "epoch": 0.27290639749153917,
      "grad_norm": 1.6664785146713257,
      "learning_rate": 9.51648293721131e-06,
      "loss": 0.0486,
      "step": 166760
    },
    {
      "epoch": 0.27293912793019254,
      "grad_norm": 1.545536756515503,
      "learning_rate": 9.516417044997793e-06,
      "loss": 0.0626,
      "step": 166780
    },
    {
      "epoch": 0.27297185836884585,
      "grad_norm": 2.2057480812072754,
      "learning_rate": 9.516351152784276e-06,
      "loss": 0.0746,
      "step": 166800
    },
    {
      "epoch": 0.27300458880749917,
      "grad_norm": 2.9734601974487305,
      "learning_rate": 9.51628526057076e-06,
      "loss": 0.0732,
      "step": 166820
    },
    {
      "epoch": 0.27303731924615254,
      "grad_norm": 1.4488190412521362,
      "learning_rate": 9.516219368357242e-06,
      "loss": 0.0419,
      "step": 166840
    },
    {
      "epoch": 0.27307004968480586,
      "grad_norm": 2.903164863586426,
      "learning_rate": 9.516153476143725e-06,
      "loss": 0.0596,
      "step": 166860
    },
    {
      "epoch": 0.27310278012345923,
      "grad_norm": 1.7115063667297363,
      "learning_rate": 9.516087583930209e-06,
      "loss": 0.0662,
      "step": 166880
    },
    {
      "epoch": 0.27313551056211255,
      "grad_norm": 1.4792463779449463,
      "learning_rate": 9.51602169171669e-06,
      "loss": 0.0507,
      "step": 166900
    },
    {
      "epoch": 0.27316824100076587,
      "grad_norm": 1.1786590814590454,
      "learning_rate": 9.515955799503174e-06,
      "loss": 0.0443,
      "step": 166920
    },
    {
      "epoch": 0.27320097143941924,
      "grad_norm": 2.9492790699005127,
      "learning_rate": 9.515889907289656e-06,
      "loss": 0.0487,
      "step": 166940
    },
    {
      "epoch": 0.27323370187807255,
      "grad_norm": 0.7503848671913147,
      "learning_rate": 9.51582401507614e-06,
      "loss": 0.0393,
      "step": 166960
    },
    {
      "epoch": 0.2732664323167259,
      "grad_norm": 4.501891613006592,
      "learning_rate": 9.515758122862622e-06,
      "loss": 0.0499,
      "step": 166980
    },
    {
      "epoch": 0.27329916275537924,
      "grad_norm": 3.2356183528900146,
      "learning_rate": 9.515692230649105e-06,
      "loss": 0.0522,
      "step": 167000
    },
    {
      "epoch": 0.27333189319403256,
      "grad_norm": 6.026544570922852,
      "learning_rate": 9.515626338435587e-06,
      "loss": 0.0526,
      "step": 167020
    },
    {
      "epoch": 0.27336462363268593,
      "grad_norm": 2.162693977355957,
      "learning_rate": 9.51556044622207e-06,
      "loss": 0.057,
      "step": 167040
    },
    {
      "epoch": 0.27339735407133925,
      "grad_norm": 1.0281157493591309,
      "learning_rate": 9.515494554008553e-06,
      "loss": 0.0639,
      "step": 167060
    },
    {
      "epoch": 0.2734300845099926,
      "grad_norm": 1.8505241870880127,
      "learning_rate": 9.515428661795036e-06,
      "loss": 0.0582,
      "step": 167080
    },
    {
      "epoch": 0.27346281494864594,
      "grad_norm": 1.4465411901474,
      "learning_rate": 9.51536276958152e-06,
      "loss": 0.0497,
      "step": 167100
    },
    {
      "epoch": 0.27349554538729925,
      "grad_norm": 1.6888216733932495,
      "learning_rate": 9.515296877368002e-06,
      "loss": 0.0528,
      "step": 167120
    },
    {
      "epoch": 0.2735282758259526,
      "grad_norm": 1.820482850074768,
      "learning_rate": 9.515230985154485e-06,
      "loss": 0.0483,
      "step": 167140
    },
    {
      "epoch": 0.27356100626460594,
      "grad_norm": 1.1273257732391357,
      "learning_rate": 9.515165092940967e-06,
      "loss": 0.0536,
      "step": 167160
    },
    {
      "epoch": 0.2735937367032593,
      "grad_norm": 4.29630708694458,
      "learning_rate": 9.51509920072745e-06,
      "loss": 0.0721,
      "step": 167180
    },
    {
      "epoch": 0.27362646714191263,
      "grad_norm": 0.8553447723388672,
      "learning_rate": 9.515033308513933e-06,
      "loss": 0.067,
      "step": 167200
    },
    {
      "epoch": 0.27365919758056595,
      "grad_norm": 2.745419502258301,
      "learning_rate": 9.514967416300416e-06,
      "loss": 0.0391,
      "step": 167220
    },
    {
      "epoch": 0.2736919280192193,
      "grad_norm": 0.7388609647750854,
      "learning_rate": 9.5149015240869e-06,
      "loss": 0.0594,
      "step": 167240
    },
    {
      "epoch": 0.27372465845787264,
      "grad_norm": 1.8014163970947266,
      "learning_rate": 9.514835631873382e-06,
      "loss": 0.0419,
      "step": 167260
    },
    {
      "epoch": 0.273757388896526,
      "grad_norm": 2.627976655960083,
      "learning_rate": 9.514769739659865e-06,
      "loss": 0.0603,
      "step": 167280
    },
    {
      "epoch": 0.2737901193351793,
      "grad_norm": 2.3102104663848877,
      "learning_rate": 9.514703847446349e-06,
      "loss": 0.0593,
      "step": 167300
    },
    {
      "epoch": 0.27382284977383264,
      "grad_norm": 1.6037614345550537,
      "learning_rate": 9.51463795523283e-06,
      "loss": 0.0518,
      "step": 167320
    },
    {
      "epoch": 0.273855580212486,
      "grad_norm": 1.6707229614257812,
      "learning_rate": 9.514572063019314e-06,
      "loss": 0.048,
      "step": 167340
    },
    {
      "epoch": 0.27388831065113933,
      "grad_norm": 5.768131256103516,
      "learning_rate": 9.514506170805796e-06,
      "loss": 0.0405,
      "step": 167360
    },
    {
      "epoch": 0.2739210410897927,
      "grad_norm": 1.4899977445602417,
      "learning_rate": 9.51444027859228e-06,
      "loss": 0.0515,
      "step": 167380
    },
    {
      "epoch": 0.273953771528446,
      "grad_norm": 2.2952795028686523,
      "learning_rate": 9.514374386378762e-06,
      "loss": 0.0697,
      "step": 167400
    },
    {
      "epoch": 0.27398650196709934,
      "grad_norm": 5.931015968322754,
      "learning_rate": 9.514308494165245e-06,
      "loss": 0.0655,
      "step": 167420
    },
    {
      "epoch": 0.2740192324057527,
      "grad_norm": 5.126631736755371,
      "learning_rate": 9.514242601951729e-06,
      "loss": 0.0393,
      "step": 167440
    },
    {
      "epoch": 0.274051962844406,
      "grad_norm": 5.251955986022949,
      "learning_rate": 9.51417670973821e-06,
      "loss": 0.0545,
      "step": 167460
    },
    {
      "epoch": 0.2740846932830594,
      "grad_norm": 0.7533587217330933,
      "learning_rate": 9.514110817524694e-06,
      "loss": 0.0615,
      "step": 167480
    },
    {
      "epoch": 0.2741174237217127,
      "grad_norm": 1.9759886264801025,
      "learning_rate": 9.514044925311176e-06,
      "loss": 0.062,
      "step": 167500
    },
    {
      "epoch": 0.27415015416036603,
      "grad_norm": 5.203852653503418,
      "learning_rate": 9.51397903309766e-06,
      "loss": 0.0506,
      "step": 167520
    },
    {
      "epoch": 0.2741828845990194,
      "grad_norm": 2.2240254878997803,
      "learning_rate": 9.513913140884142e-06,
      "loss": 0.0502,
      "step": 167540
    },
    {
      "epoch": 0.2742156150376727,
      "grad_norm": 2.3676631450653076,
      "learning_rate": 9.513847248670625e-06,
      "loss": 0.0526,
      "step": 167560
    },
    {
      "epoch": 0.2742483454763261,
      "grad_norm": 2.006115198135376,
      "learning_rate": 9.513781356457107e-06,
      "loss": 0.0487,
      "step": 167580
    },
    {
      "epoch": 0.2742810759149794,
      "grad_norm": 2.2473413944244385,
      "learning_rate": 9.51371546424359e-06,
      "loss": 0.0601,
      "step": 167600
    },
    {
      "epoch": 0.2743138063536327,
      "grad_norm": 1.3251514434814453,
      "learning_rate": 9.513649572030074e-06,
      "loss": 0.0522,
      "step": 167620
    },
    {
      "epoch": 0.2743465367922861,
      "grad_norm": 4.614518642425537,
      "learning_rate": 9.513583679816556e-06,
      "loss": 0.0542,
      "step": 167640
    },
    {
      "epoch": 0.2743792672309394,
      "grad_norm": 1.7187103033065796,
      "learning_rate": 9.51351778760304e-06,
      "loss": 0.0517,
      "step": 167660
    },
    {
      "epoch": 0.2744119976695928,
      "grad_norm": 2.717623472213745,
      "learning_rate": 9.513451895389523e-06,
      "loss": 0.0694,
      "step": 167680
    },
    {
      "epoch": 0.2744447281082461,
      "grad_norm": 2.519989490509033,
      "learning_rate": 9.513386003176005e-06,
      "loss": 0.0548,
      "step": 167700
    },
    {
      "epoch": 0.2744774585468994,
      "grad_norm": 0.8386203646659851,
      "learning_rate": 9.513320110962489e-06,
      "loss": 0.0649,
      "step": 167720
    },
    {
      "epoch": 0.2745101889855528,
      "grad_norm": 0.24852852523326874,
      "learning_rate": 9.513254218748971e-06,
      "loss": 0.0466,
      "step": 167740
    },
    {
      "epoch": 0.2745429194242061,
      "grad_norm": 3.6014204025268555,
      "learning_rate": 9.513188326535454e-06,
      "loss": 0.0426,
      "step": 167760
    },
    {
      "epoch": 0.2745756498628595,
      "grad_norm": 0.9834880828857422,
      "learning_rate": 9.513122434321938e-06,
      "loss": 0.0477,
      "step": 167780
    },
    {
      "epoch": 0.2746083803015128,
      "grad_norm": 1.2775367498397827,
      "learning_rate": 9.51305654210842e-06,
      "loss": 0.0538,
      "step": 167800
    },
    {
      "epoch": 0.2746411107401661,
      "grad_norm": 2.139214038848877,
      "learning_rate": 9.512990649894904e-06,
      "loss": 0.0578,
      "step": 167820
    },
    {
      "epoch": 0.2746738411788195,
      "grad_norm": 3.0283596515655518,
      "learning_rate": 9.512924757681385e-06,
      "loss": 0.0477,
      "step": 167840
    },
    {
      "epoch": 0.2747065716174728,
      "grad_norm": 3.2149412631988525,
      "learning_rate": 9.512858865467869e-06,
      "loss": 0.0679,
      "step": 167860
    },
    {
      "epoch": 0.2747393020561262,
      "grad_norm": 1.93772292137146,
      "learning_rate": 9.512792973254351e-06,
      "loss": 0.0628,
      "step": 167880
    },
    {
      "epoch": 0.2747720324947795,
      "grad_norm": 0.49908435344696045,
      "learning_rate": 9.512727081040834e-06,
      "loss": 0.0499,
      "step": 167900
    },
    {
      "epoch": 0.2748047629334328,
      "grad_norm": 0.8956155180931091,
      "learning_rate": 9.512661188827316e-06,
      "loss": 0.0513,
      "step": 167920
    },
    {
      "epoch": 0.2748374933720862,
      "grad_norm": 1.0319756269454956,
      "learning_rate": 9.5125952966138e-06,
      "loss": 0.0553,
      "step": 167940
    },
    {
      "epoch": 0.2748702238107395,
      "grad_norm": 1.452760934829712,
      "learning_rate": 9.512529404400282e-06,
      "loss": 0.0475,
      "step": 167960
    },
    {
      "epoch": 0.2749029542493929,
      "grad_norm": 2.9222521781921387,
      "learning_rate": 9.512463512186765e-06,
      "loss": 0.053,
      "step": 167980
    },
    {
      "epoch": 0.2749356846880462,
      "grad_norm": 4.290846347808838,
      "learning_rate": 9.512397619973247e-06,
      "loss": 0.0578,
      "step": 168000
    },
    {
      "epoch": 0.2749684151266995,
      "grad_norm": 0.7344008088111877,
      "learning_rate": 9.512331727759731e-06,
      "loss": 0.0421,
      "step": 168020
    },
    {
      "epoch": 0.2750011455653529,
      "grad_norm": 2.787585735321045,
      "learning_rate": 9.512265835546215e-06,
      "loss": 0.0502,
      "step": 168040
    },
    {
      "epoch": 0.2750338760040062,
      "grad_norm": 12.563152313232422,
      "learning_rate": 9.512199943332698e-06,
      "loss": 0.0697,
      "step": 168060
    },
    {
      "epoch": 0.27506660644265957,
      "grad_norm": 1.6247506141662598,
      "learning_rate": 9.51213405111918e-06,
      "loss": 0.0459,
      "step": 168080
    },
    {
      "epoch": 0.2750993368813129,
      "grad_norm": 3.25699782371521,
      "learning_rate": 9.512068158905664e-06,
      "loss": 0.0461,
      "step": 168100
    },
    {
      "epoch": 0.2751320673199662,
      "grad_norm": 2.009929656982422,
      "learning_rate": 9.512002266692145e-06,
      "loss": 0.0638,
      "step": 168120
    },
    {
      "epoch": 0.2751647977586196,
      "grad_norm": 2.8863840103149414,
      "learning_rate": 9.511936374478629e-06,
      "loss": 0.0521,
      "step": 168140
    },
    {
      "epoch": 0.2751975281972729,
      "grad_norm": 2.019794464111328,
      "learning_rate": 9.511870482265113e-06,
      "loss": 0.0607,
      "step": 168160
    },
    {
      "epoch": 0.27523025863592626,
      "grad_norm": 1.6948277950286865,
      "learning_rate": 9.511804590051595e-06,
      "loss": 0.0622,
      "step": 168180
    },
    {
      "epoch": 0.2752629890745796,
      "grad_norm": 3.060957908630371,
      "learning_rate": 9.511738697838078e-06,
      "loss": 0.0558,
      "step": 168200
    },
    {
      "epoch": 0.2752957195132329,
      "grad_norm": 0.8168935775756836,
      "learning_rate": 9.51167280562456e-06,
      "loss": 0.0525,
      "step": 168220
    },
    {
      "epoch": 0.27532844995188627,
      "grad_norm": 1.8921098709106445,
      "learning_rate": 9.511606913411044e-06,
      "loss": 0.041,
      "step": 168240
    },
    {
      "epoch": 0.2753611803905396,
      "grad_norm": 3.0423269271850586,
      "learning_rate": 9.511541021197525e-06,
      "loss": 0.0597,
      "step": 168260
    },
    {
      "epoch": 0.27539391082919296,
      "grad_norm": 0.7458373308181763,
      "learning_rate": 9.511475128984009e-06,
      "loss": 0.0526,
      "step": 168280
    },
    {
      "epoch": 0.2754266412678463,
      "grad_norm": 2.902665853500366,
      "learning_rate": 9.511409236770491e-06,
      "loss": 0.0584,
      "step": 168300
    },
    {
      "epoch": 0.2754593717064996,
      "grad_norm": 1.402976155281067,
      "learning_rate": 9.511343344556975e-06,
      "loss": 0.0545,
      "step": 168320
    },
    {
      "epoch": 0.27549210214515296,
      "grad_norm": 12.897340774536133,
      "learning_rate": 9.511277452343456e-06,
      "loss": 0.0584,
      "step": 168340
    },
    {
      "epoch": 0.2755248325838063,
      "grad_norm": 1.8665014505386353,
      "learning_rate": 9.51121156012994e-06,
      "loss": 0.0425,
      "step": 168360
    },
    {
      "epoch": 0.27555756302245965,
      "grad_norm": 0.7139849662780762,
      "learning_rate": 9.511145667916422e-06,
      "loss": 0.0669,
      "step": 168380
    },
    {
      "epoch": 0.27559029346111297,
      "grad_norm": 0.6883512139320374,
      "learning_rate": 9.511079775702906e-06,
      "loss": 0.0657,
      "step": 168400
    },
    {
      "epoch": 0.2756230238997663,
      "grad_norm": 5.150322437286377,
      "learning_rate": 9.511013883489389e-06,
      "loss": 0.0644,
      "step": 168420
    },
    {
      "epoch": 0.27565575433841966,
      "grad_norm": 2.1912355422973633,
      "learning_rate": 9.510947991275871e-06,
      "loss": 0.06,
      "step": 168440
    },
    {
      "epoch": 0.275688484777073,
      "grad_norm": 3.9428470134735107,
      "learning_rate": 9.510882099062355e-06,
      "loss": 0.0611,
      "step": 168460
    },
    {
      "epoch": 0.27572121521572635,
      "grad_norm": 0.3903256356716156,
      "learning_rate": 9.510816206848838e-06,
      "loss": 0.0686,
      "step": 168480
    },
    {
      "epoch": 0.27575394565437966,
      "grad_norm": 1.4017664194107056,
      "learning_rate": 9.51075031463532e-06,
      "loss": 0.0641,
      "step": 168500
    },
    {
      "epoch": 0.275786676093033,
      "grad_norm": 3.557513952255249,
      "learning_rate": 9.510684422421804e-06,
      "loss": 0.05,
      "step": 168520
    },
    {
      "epoch": 0.27581940653168635,
      "grad_norm": 3.004410743713379,
      "learning_rate": 9.510618530208287e-06,
      "loss": 0.0361,
      "step": 168540
    },
    {
      "epoch": 0.27585213697033967,
      "grad_norm": 1.514117956161499,
      "learning_rate": 9.510552637994769e-06,
      "loss": 0.0383,
      "step": 168560
    },
    {
      "epoch": 0.27588486740899304,
      "grad_norm": 0.5503338575363159,
      "learning_rate": 9.510486745781253e-06,
      "loss": 0.0685,
      "step": 168580
    },
    {
      "epoch": 0.27591759784764636,
      "grad_norm": 1.600080132484436,
      "learning_rate": 9.510420853567735e-06,
      "loss": 0.0469,
      "step": 168600
    },
    {
      "epoch": 0.2759503282862997,
      "grad_norm": 0.716139554977417,
      "learning_rate": 9.510354961354218e-06,
      "loss": 0.0513,
      "step": 168620
    },
    {
      "epoch": 0.27598305872495305,
      "grad_norm": 2.1323037147521973,
      "learning_rate": 9.5102890691407e-06,
      "loss": 0.0504,
      "step": 168640
    },
    {
      "epoch": 0.27601578916360636,
      "grad_norm": 1.153757929801941,
      "learning_rate": 9.510223176927184e-06,
      "loss": 0.0565,
      "step": 168660
    },
    {
      "epoch": 0.27604851960225973,
      "grad_norm": 1.9261928796768188,
      "learning_rate": 9.510157284713666e-06,
      "loss": 0.0537,
      "step": 168680
    },
    {
      "epoch": 0.27608125004091305,
      "grad_norm": 7.433269500732422,
      "learning_rate": 9.510091392500149e-06,
      "loss": 0.069,
      "step": 168700
    },
    {
      "epoch": 0.27611398047956637,
      "grad_norm": 3.160688877105713,
      "learning_rate": 9.510025500286631e-06,
      "loss": 0.0446,
      "step": 168720
    },
    {
      "epoch": 0.27614671091821974,
      "grad_norm": 2.59165620803833,
      "learning_rate": 9.509959608073115e-06,
      "loss": 0.0484,
      "step": 168740
    },
    {
      "epoch": 0.27617944135687306,
      "grad_norm": 1.0024181604385376,
      "learning_rate": 9.509893715859597e-06,
      "loss": 0.064,
      "step": 168760
    },
    {
      "epoch": 0.27621217179552643,
      "grad_norm": 0.6451258659362793,
      "learning_rate": 9.50982782364608e-06,
      "loss": 0.0491,
      "step": 168780
    },
    {
      "epoch": 0.27624490223417975,
      "grad_norm": 0.7634056806564331,
      "learning_rate": 9.509761931432564e-06,
      "loss": 0.0479,
      "step": 168800
    },
    {
      "epoch": 0.27627763267283306,
      "grad_norm": 1.6642547845840454,
      "learning_rate": 9.509696039219046e-06,
      "loss": 0.0556,
      "step": 168820
    },
    {
      "epoch": 0.27631036311148643,
      "grad_norm": 3.535478115081787,
      "learning_rate": 9.50963014700553e-06,
      "loss": 0.0478,
      "step": 168840
    },
    {
      "epoch": 0.27634309355013975,
      "grad_norm": 5.614175796508789,
      "learning_rate": 9.509564254792013e-06,
      "loss": 0.0481,
      "step": 168860
    },
    {
      "epoch": 0.2763758239887931,
      "grad_norm": 2.732146739959717,
      "learning_rate": 9.509498362578495e-06,
      "loss": 0.0593,
      "step": 168880
    },
    {
      "epoch": 0.27640855442744644,
      "grad_norm": 5.448575496673584,
      "learning_rate": 9.509432470364978e-06,
      "loss": 0.0752,
      "step": 168900
    },
    {
      "epoch": 0.27644128486609976,
      "grad_norm": 2.5868449211120605,
      "learning_rate": 9.509366578151462e-06,
      "loss": 0.0437,
      "step": 168920
    },
    {
      "epoch": 0.27647401530475313,
      "grad_norm": 2.848361015319824,
      "learning_rate": 9.509300685937944e-06,
      "loss": 0.062,
      "step": 168940
    },
    {
      "epoch": 0.27650674574340645,
      "grad_norm": 2.278550863265991,
      "learning_rate": 9.509234793724427e-06,
      "loss": 0.0682,
      "step": 168960
    },
    {
      "epoch": 0.2765394761820598,
      "grad_norm": 2.551685333251953,
      "learning_rate": 9.50916890151091e-06,
      "loss": 0.0529,
      "step": 168980
    },
    {
      "epoch": 0.27657220662071313,
      "grad_norm": 2.5522708892822266,
      "learning_rate": 9.509103009297393e-06,
      "loss": 0.0531,
      "step": 169000
    },
    {
      "epoch": 0.27660493705936645,
      "grad_norm": 1.2488267421722412,
      "learning_rate": 9.509037117083875e-06,
      "loss": 0.0529,
      "step": 169020
    },
    {
      "epoch": 0.2766376674980198,
      "grad_norm": 1.8175053596496582,
      "learning_rate": 9.508971224870358e-06,
      "loss": 0.049,
      "step": 169040
    },
    {
      "epoch": 0.27667039793667314,
      "grad_norm": 2.271127700805664,
      "learning_rate": 9.50890533265684e-06,
      "loss": 0.0454,
      "step": 169060
    },
    {
      "epoch": 0.2767031283753265,
      "grad_norm": 0.5483169555664062,
      "learning_rate": 9.508839440443324e-06,
      "loss": 0.0571,
      "step": 169080
    },
    {
      "epoch": 0.27673585881397983,
      "grad_norm": 2.882488489151001,
      "learning_rate": 9.508773548229806e-06,
      "loss": 0.058,
      "step": 169100
    },
    {
      "epoch": 0.27676858925263315,
      "grad_norm": 0.6466789245605469,
      "learning_rate": 9.50870765601629e-06,
      "loss": 0.045,
      "step": 169120
    },
    {
      "epoch": 0.2768013196912865,
      "grad_norm": 0.9847024083137512,
      "learning_rate": 9.508641763802771e-06,
      "loss": 0.0474,
      "step": 169140
    },
    {
      "epoch": 0.27683405012993983,
      "grad_norm": 7.2399210929870605,
      "learning_rate": 9.508575871589255e-06,
      "loss": 0.0563,
      "step": 169160
    },
    {
      "epoch": 0.2768667805685932,
      "grad_norm": 2.869474411010742,
      "learning_rate": 9.508509979375738e-06,
      "loss": 0.064,
      "step": 169180
    },
    {
      "epoch": 0.2768995110072465,
      "grad_norm": 0.5139559507369995,
      "learning_rate": 9.50844408716222e-06,
      "loss": 0.058,
      "step": 169200
    },
    {
      "epoch": 0.27693224144589984,
      "grad_norm": 3.3983120918273926,
      "learning_rate": 9.508378194948704e-06,
      "loss": 0.0618,
      "step": 169220
    },
    {
      "epoch": 0.2769649718845532,
      "grad_norm": 3.1432132720947266,
      "learning_rate": 9.508312302735186e-06,
      "loss": 0.0716,
      "step": 169240
    },
    {
      "epoch": 0.27699770232320653,
      "grad_norm": 5.219401836395264,
      "learning_rate": 9.50824641052167e-06,
      "loss": 0.0459,
      "step": 169260
    },
    {
      "epoch": 0.2770304327618599,
      "grad_norm": 5.317469596862793,
      "learning_rate": 9.508180518308153e-06,
      "loss": 0.0466,
      "step": 169280
    },
    {
      "epoch": 0.2770631632005132,
      "grad_norm": 2.946317434310913,
      "learning_rate": 9.508114626094635e-06,
      "loss": 0.0523,
      "step": 169300
    },
    {
      "epoch": 0.27709589363916654,
      "grad_norm": 4.705124378204346,
      "learning_rate": 9.508048733881118e-06,
      "loss": 0.0791,
      "step": 169320
    },
    {
      "epoch": 0.2771286240778199,
      "grad_norm": 3.260369062423706,
      "learning_rate": 9.507982841667602e-06,
      "loss": 0.0582,
      "step": 169340
    },
    {
      "epoch": 0.2771613545164732,
      "grad_norm": 2.335871934890747,
      "learning_rate": 9.507916949454084e-06,
      "loss": 0.0625,
      "step": 169360
    },
    {
      "epoch": 0.2771940849551266,
      "grad_norm": 2.0050528049468994,
      "learning_rate": 9.507851057240567e-06,
      "loss": 0.0582,
      "step": 169380
    },
    {
      "epoch": 0.2772268153937799,
      "grad_norm": 2.2803561687469482,
      "learning_rate": 9.50778516502705e-06,
      "loss": 0.0549,
      "step": 169400
    },
    {
      "epoch": 0.27725954583243323,
      "grad_norm": 2.033372163772583,
      "learning_rate": 9.507719272813533e-06,
      "loss": 0.044,
      "step": 169420
    },
    {
      "epoch": 0.2772922762710866,
      "grad_norm": 7.927550315856934,
      "learning_rate": 9.507653380600015e-06,
      "loss": 0.0557,
      "step": 169440
    },
    {
      "epoch": 0.2773250067097399,
      "grad_norm": 73.91409301757812,
      "learning_rate": 9.507587488386498e-06,
      "loss": 0.0465,
      "step": 169460
    },
    {
      "epoch": 0.27735773714839324,
      "grad_norm": 1.4870827198028564,
      "learning_rate": 9.50752159617298e-06,
      "loss": 0.0651,
      "step": 169480
    },
    {
      "epoch": 0.2773904675870466,
      "grad_norm": 2.509695053100586,
      "learning_rate": 9.507455703959464e-06,
      "loss": 0.0588,
      "step": 169500
    },
    {
      "epoch": 0.2774231980256999,
      "grad_norm": 1.4077831506729126,
      "learning_rate": 9.507389811745946e-06,
      "loss": 0.0524,
      "step": 169520
    },
    {
      "epoch": 0.2774559284643533,
      "grad_norm": 8.7421293258667,
      "learning_rate": 9.50732391953243e-06,
      "loss": 0.0502,
      "step": 169540
    },
    {
      "epoch": 0.2774886589030066,
      "grad_norm": 1.3915104866027832,
      "learning_rate": 9.507258027318913e-06,
      "loss": 0.0502,
      "step": 169560
    },
    {
      "epoch": 0.27752138934165993,
      "grad_norm": 7.238625526428223,
      "learning_rate": 9.507192135105395e-06,
      "loss": 0.0669,
      "step": 169580
    },
    {
      "epoch": 0.2775541197803133,
      "grad_norm": 1.6538690328598022,
      "learning_rate": 9.507126242891878e-06,
      "loss": 0.0775,
      "step": 169600
    },
    {
      "epoch": 0.2775868502189666,
      "grad_norm": 1.7720513343811035,
      "learning_rate": 9.50706035067836e-06,
      "loss": 0.0535,
      "step": 169620
    },
    {
      "epoch": 0.27761958065762,
      "grad_norm": 0.7051882743835449,
      "learning_rate": 9.506994458464844e-06,
      "loss": 0.036,
      "step": 169640
    },
    {
      "epoch": 0.2776523110962733,
      "grad_norm": 3.104907751083374,
      "learning_rate": 9.506928566251327e-06,
      "loss": 0.0727,
      "step": 169660
    },
    {
      "epoch": 0.2776850415349266,
      "grad_norm": 1.15382719039917,
      "learning_rate": 9.50686267403781e-06,
      "loss": 0.0527,
      "step": 169680
    },
    {
      "epoch": 0.27771777197358,
      "grad_norm": 1.495835781097412,
      "learning_rate": 9.506796781824293e-06,
      "loss": 0.0573,
      "step": 169700
    },
    {
      "epoch": 0.2777505024122333,
      "grad_norm": 1.357833743095398,
      "learning_rate": 9.506730889610777e-06,
      "loss": 0.0541,
      "step": 169720
    },
    {
      "epoch": 0.2777832328508867,
      "grad_norm": 1.660707712173462,
      "learning_rate": 9.506664997397258e-06,
      "loss": 0.0656,
      "step": 169740
    },
    {
      "epoch": 0.27781596328954,
      "grad_norm": 2.8128466606140137,
      "learning_rate": 9.506599105183742e-06,
      "loss": 0.0582,
      "step": 169760
    },
    {
      "epoch": 0.2778486937281933,
      "grad_norm": 3.5664923191070557,
      "learning_rate": 9.506533212970224e-06,
      "loss": 0.0603,
      "step": 169780
    },
    {
      "epoch": 0.2778814241668467,
      "grad_norm": 2.98993182182312,
      "learning_rate": 9.506467320756707e-06,
      "loss": 0.0521,
      "step": 169800
    },
    {
      "epoch": 0.2779141546055,
      "grad_norm": 3.790647506713867,
      "learning_rate": 9.50640142854319e-06,
      "loss": 0.0606,
      "step": 169820
    },
    {
      "epoch": 0.2779468850441534,
      "grad_norm": 3.0732693672180176,
      "learning_rate": 9.506335536329673e-06,
      "loss": 0.067,
      "step": 169840
    },
    {
      "epoch": 0.2779796154828067,
      "grad_norm": 0.455124169588089,
      "learning_rate": 9.506269644116155e-06,
      "loss": 0.0647,
      "step": 169860
    },
    {
      "epoch": 0.27801234592146,
      "grad_norm": 1.7436195611953735,
      "learning_rate": 9.506203751902638e-06,
      "loss": 0.0595,
      "step": 169880
    },
    {
      "epoch": 0.2780450763601134,
      "grad_norm": 2.740776300430298,
      "learning_rate": 9.506137859689122e-06,
      "loss": 0.0541,
      "step": 169900
    },
    {
      "epoch": 0.2780778067987667,
      "grad_norm": 2.1336350440979004,
      "learning_rate": 9.506071967475604e-06,
      "loss": 0.041,
      "step": 169920
    },
    {
      "epoch": 0.2781105372374201,
      "grad_norm": 3.97932505607605,
      "learning_rate": 9.506006075262087e-06,
      "loss": 0.0641,
      "step": 169940
    },
    {
      "epoch": 0.2781432676760734,
      "grad_norm": 1.7478935718536377,
      "learning_rate": 9.50594018304857e-06,
      "loss": 0.0513,
      "step": 169960
    },
    {
      "epoch": 0.2781759981147267,
      "grad_norm": 2.4589591026306152,
      "learning_rate": 9.505874290835053e-06,
      "loss": 0.0639,
      "step": 169980
    },
    {
      "epoch": 0.2782087285533801,
      "grad_norm": 3.7104671001434326,
      "learning_rate": 9.505808398621535e-06,
      "loss": 0.0558,
      "step": 170000
    },
    {
      "epoch": 0.2782414589920334,
      "grad_norm": 3.861769437789917,
      "learning_rate": 9.505742506408018e-06,
      "loss": 0.0561,
      "step": 170020
    },
    {
      "epoch": 0.27827418943068677,
      "grad_norm": 3.6814634799957275,
      "learning_rate": 9.5056766141945e-06,
      "loss": 0.0427,
      "step": 170040
    },
    {
      "epoch": 0.2783069198693401,
      "grad_norm": 1.864568829536438,
      "learning_rate": 9.505610721980984e-06,
      "loss": 0.069,
      "step": 170060
    },
    {
      "epoch": 0.2783396503079934,
      "grad_norm": 2.5441341400146484,
      "learning_rate": 9.505544829767468e-06,
      "loss": 0.0474,
      "step": 170080
    },
    {
      "epoch": 0.2783723807466468,
      "grad_norm": 2.569073438644409,
      "learning_rate": 9.50547893755395e-06,
      "loss": 0.0468,
      "step": 170100
    },
    {
      "epoch": 0.2784051111853001,
      "grad_norm": 1.2264305353164673,
      "learning_rate": 9.505413045340433e-06,
      "loss": 0.0568,
      "step": 170120
    },
    {
      "epoch": 0.27843784162395346,
      "grad_norm": 5.273919105529785,
      "learning_rate": 9.505347153126917e-06,
      "loss": 0.0559,
      "step": 170140
    },
    {
      "epoch": 0.2784705720626068,
      "grad_norm": 6.503942966461182,
      "learning_rate": 9.505281260913398e-06,
      "loss": 0.0647,
      "step": 170160
    },
    {
      "epoch": 0.2785033025012601,
      "grad_norm": 4.436850070953369,
      "learning_rate": 9.505215368699882e-06,
      "loss": 0.0511,
      "step": 170180
    },
    {
      "epoch": 0.27853603293991347,
      "grad_norm": 4.052383899688721,
      "learning_rate": 9.505149476486364e-06,
      "loss": 0.0643,
      "step": 170200
    },
    {
      "epoch": 0.2785687633785668,
      "grad_norm": 3.5044407844543457,
      "learning_rate": 9.505083584272848e-06,
      "loss": 0.0526,
      "step": 170220
    },
    {
      "epoch": 0.27860149381722016,
      "grad_norm": 2.637784719467163,
      "learning_rate": 9.505017692059331e-06,
      "loss": 0.0644,
      "step": 170240
    },
    {
      "epoch": 0.2786342242558735,
      "grad_norm": 1.0813689231872559,
      "learning_rate": 9.504951799845813e-06,
      "loss": 0.0671,
      "step": 170260
    },
    {
      "epoch": 0.2786669546945268,
      "grad_norm": 1.591565728187561,
      "learning_rate": 9.504885907632297e-06,
      "loss": 0.0443,
      "step": 170280
    },
    {
      "epoch": 0.27869968513318016,
      "grad_norm": 1.9113976955413818,
      "learning_rate": 9.504820015418778e-06,
      "loss": 0.0618,
      "step": 170300
    },
    {
      "epoch": 0.2787324155718335,
      "grad_norm": 1.7061240673065186,
      "learning_rate": 9.504754123205262e-06,
      "loss": 0.0467,
      "step": 170320
    },
    {
      "epoch": 0.27876514601048685,
      "grad_norm": 1.7537814378738403,
      "learning_rate": 9.504688230991744e-06,
      "loss": 0.0527,
      "step": 170340
    },
    {
      "epoch": 0.27879787644914017,
      "grad_norm": 3.0405020713806152,
      "learning_rate": 9.504622338778228e-06,
      "loss": 0.0509,
      "step": 170360
    },
    {
      "epoch": 0.2788306068877935,
      "grad_norm": 2.6258492469787598,
      "learning_rate": 9.50455644656471e-06,
      "loss": 0.0627,
      "step": 170380
    },
    {
      "epoch": 0.27886333732644686,
      "grad_norm": 0.8154435157775879,
      "learning_rate": 9.504490554351193e-06,
      "loss": 0.0517,
      "step": 170400
    },
    {
      "epoch": 0.2788960677651002,
      "grad_norm": 1.5460165739059448,
      "learning_rate": 9.504424662137675e-06,
      "loss": 0.046,
      "step": 170420
    },
    {
      "epoch": 0.27892879820375355,
      "grad_norm": 1.905551791191101,
      "learning_rate": 9.504358769924159e-06,
      "loss": 0.0612,
      "step": 170440
    },
    {
      "epoch": 0.27896152864240686,
      "grad_norm": 2.0901851654052734,
      "learning_rate": 9.504292877710642e-06,
      "loss": 0.0542,
      "step": 170460
    },
    {
      "epoch": 0.2789942590810602,
      "grad_norm": 2.1675596237182617,
      "learning_rate": 9.504226985497124e-06,
      "loss": 0.0589,
      "step": 170480
    },
    {
      "epoch": 0.27902698951971355,
      "grad_norm": 2.775536298751831,
      "learning_rate": 9.504161093283608e-06,
      "loss": 0.0642,
      "step": 170500
    },
    {
      "epoch": 0.27905971995836687,
      "grad_norm": 2.357983350753784,
      "learning_rate": 9.504095201070091e-06,
      "loss": 0.0633,
      "step": 170520
    },
    {
      "epoch": 0.27909245039702024,
      "grad_norm": 0.9825782179832458,
      "learning_rate": 9.504029308856573e-06,
      "loss": 0.0719,
      "step": 170540
    },
    {
      "epoch": 0.27912518083567356,
      "grad_norm": 1.821065902709961,
      "learning_rate": 9.503963416643057e-06,
      "loss": 0.0602,
      "step": 170560
    },
    {
      "epoch": 0.2791579112743269,
      "grad_norm": 0.3071233332157135,
      "learning_rate": 9.503897524429539e-06,
      "loss": 0.0411,
      "step": 170580
    },
    {
      "epoch": 0.27919064171298025,
      "grad_norm": 1.212679386138916,
      "learning_rate": 9.503831632216022e-06,
      "loss": 0.0548,
      "step": 170600
    },
    {
      "epoch": 0.27922337215163356,
      "grad_norm": 2.089721202850342,
      "learning_rate": 9.503765740002506e-06,
      "loss": 0.0589,
      "step": 170620
    },
    {
      "epoch": 0.27925610259028694,
      "grad_norm": 0.4290757179260254,
      "learning_rate": 9.503699847788988e-06,
      "loss": 0.0535,
      "step": 170640
    },
    {
      "epoch": 0.27928883302894025,
      "grad_norm": 1.4709326028823853,
      "learning_rate": 9.503633955575471e-06,
      "loss": 0.0494,
      "step": 170660
    },
    {
      "epoch": 0.27932156346759357,
      "grad_norm": 1.2572413682937622,
      "learning_rate": 9.503568063361953e-06,
      "loss": 0.0424,
      "step": 170680
    },
    {
      "epoch": 0.27935429390624694,
      "grad_norm": 5.4777092933654785,
      "learning_rate": 9.503502171148437e-06,
      "loss": 0.0576,
      "step": 170700
    },
    {
      "epoch": 0.27938702434490026,
      "grad_norm": 2.144989490509033,
      "learning_rate": 9.503436278934919e-06,
      "loss": 0.0767,
      "step": 170720
    },
    {
      "epoch": 0.27941975478355363,
      "grad_norm": 2.580568313598633,
      "learning_rate": 9.503370386721402e-06,
      "loss": 0.0628,
      "step": 170740
    },
    {
      "epoch": 0.27945248522220695,
      "grad_norm": 1.337056040763855,
      "learning_rate": 9.503304494507884e-06,
      "loss": 0.0534,
      "step": 170760
    },
    {
      "epoch": 0.27948521566086026,
      "grad_norm": 1.3557777404785156,
      "learning_rate": 9.503238602294368e-06,
      "loss": 0.0516,
      "step": 170780
    },
    {
      "epoch": 0.27951794609951364,
      "grad_norm": 3.485670804977417,
      "learning_rate": 9.50317271008085e-06,
      "loss": 0.0692,
      "step": 170800
    },
    {
      "epoch": 0.27955067653816695,
      "grad_norm": 1.7133466005325317,
      "learning_rate": 9.503106817867333e-06,
      "loss": 0.0453,
      "step": 170820
    },
    {
      "epoch": 0.2795834069768203,
      "grad_norm": 1.577517032623291,
      "learning_rate": 9.503040925653817e-06,
      "loss": 0.0635,
      "step": 170840
    },
    {
      "epoch": 0.27961613741547364,
      "grad_norm": 3.9109582901000977,
      "learning_rate": 9.502975033440299e-06,
      "loss": 0.0539,
      "step": 170860
    },
    {
      "epoch": 0.27964886785412696,
      "grad_norm": 1.1214040517807007,
      "learning_rate": 9.502909141226782e-06,
      "loss": 0.0638,
      "step": 170880
    },
    {
      "epoch": 0.27968159829278033,
      "grad_norm": 1.0348470211029053,
      "learning_rate": 9.502843249013266e-06,
      "loss": 0.0646,
      "step": 170900
    },
    {
      "epoch": 0.27971432873143365,
      "grad_norm": 2.449951171875,
      "learning_rate": 9.502777356799748e-06,
      "loss": 0.0556,
      "step": 170920
    },
    {
      "epoch": 0.279747059170087,
      "grad_norm": 4.793022632598877,
      "learning_rate": 9.502711464586231e-06,
      "loss": 0.054,
      "step": 170940
    },
    {
      "epoch": 0.27977978960874034,
      "grad_norm": 3.430067777633667,
      "learning_rate": 9.502645572372715e-06,
      "loss": 0.0635,
      "step": 170960
    },
    {
      "epoch": 0.27981252004739365,
      "grad_norm": 2.6632080078125,
      "learning_rate": 9.502579680159197e-06,
      "loss": 0.0427,
      "step": 170980
    },
    {
      "epoch": 0.279845250486047,
      "grad_norm": 1.9146941900253296,
      "learning_rate": 9.50251378794568e-06,
      "loss": 0.0551,
      "step": 171000
    },
    {
      "epoch": 0.27987798092470034,
      "grad_norm": 3.0370750427246094,
      "learning_rate": 9.502447895732162e-06,
      "loss": 0.053,
      "step": 171020
    },
    {
      "epoch": 0.2799107113633537,
      "grad_norm": 0.482334703207016,
      "learning_rate": 9.502382003518646e-06,
      "loss": 0.0385,
      "step": 171040
    },
    {
      "epoch": 0.27994344180200703,
      "grad_norm": 3.007297992706299,
      "learning_rate": 9.502316111305128e-06,
      "loss": 0.0481,
      "step": 171060
    },
    {
      "epoch": 0.27997617224066035,
      "grad_norm": 2.7240633964538574,
      "learning_rate": 9.502250219091611e-06,
      "loss": 0.0552,
      "step": 171080
    },
    {
      "epoch": 0.2800089026793137,
      "grad_norm": 2.9138259887695312,
      "learning_rate": 9.502184326878093e-06,
      "loss": 0.0596,
      "step": 171100
    },
    {
      "epoch": 0.28004163311796704,
      "grad_norm": 1.1929353475570679,
      "learning_rate": 9.502118434664577e-06,
      "loss": 0.0521,
      "step": 171120
    },
    {
      "epoch": 0.2800743635566204,
      "grad_norm": 2.525474786758423,
      "learning_rate": 9.502052542451059e-06,
      "loss": 0.0734,
      "step": 171140
    },
    {
      "epoch": 0.2801070939952737,
      "grad_norm": 18.598957061767578,
      "learning_rate": 9.501986650237542e-06,
      "loss": 0.0519,
      "step": 171160
    },
    {
      "epoch": 0.28013982443392704,
      "grad_norm": 0.9054376482963562,
      "learning_rate": 9.501920758024024e-06,
      "loss": 0.0376,
      "step": 171180
    },
    {
      "epoch": 0.2801725548725804,
      "grad_norm": 1.4730480909347534,
      "learning_rate": 9.501854865810508e-06,
      "loss": 0.0749,
      "step": 171200
    },
    {
      "epoch": 0.28020528531123373,
      "grad_norm": 1.5429054498672485,
      "learning_rate": 9.50178897359699e-06,
      "loss": 0.0559,
      "step": 171220
    },
    {
      "epoch": 0.2802380157498871,
      "grad_norm": 1.9930528402328491,
      "learning_rate": 9.501723081383473e-06,
      "loss": 0.0511,
      "step": 171240
    },
    {
      "epoch": 0.2802707461885404,
      "grad_norm": 1.5484308004379272,
      "learning_rate": 9.501657189169957e-06,
      "loss": 0.0618,
      "step": 171260
    },
    {
      "epoch": 0.28030347662719374,
      "grad_norm": 3.1443278789520264,
      "learning_rate": 9.501591296956439e-06,
      "loss": 0.0366,
      "step": 171280
    },
    {
      "epoch": 0.2803362070658471,
      "grad_norm": 3.7206175327301025,
      "learning_rate": 9.501525404742922e-06,
      "loss": 0.07,
      "step": 171300
    },
    {
      "epoch": 0.2803689375045004,
      "grad_norm": 1.2291642427444458,
      "learning_rate": 9.501459512529406e-06,
      "loss": 0.0449,
      "step": 171320
    },
    {
      "epoch": 0.2804016679431538,
      "grad_norm": 1.5455291271209717,
      "learning_rate": 9.501393620315888e-06,
      "loss": 0.0608,
      "step": 171340
    },
    {
      "epoch": 0.2804343983818071,
      "grad_norm": 0.5807052850723267,
      "learning_rate": 9.501327728102371e-06,
      "loss": 0.0506,
      "step": 171360
    },
    {
      "epoch": 0.28046712882046043,
      "grad_norm": 1.4946726560592651,
      "learning_rate": 9.501261835888855e-06,
      "loss": 0.0385,
      "step": 171380
    },
    {
      "epoch": 0.2804998592591138,
      "grad_norm": 2.5095937252044678,
      "learning_rate": 9.501195943675337e-06,
      "loss": 0.0657,
      "step": 171400
    },
    {
      "epoch": 0.2805325896977671,
      "grad_norm": 1.6938691139221191,
      "learning_rate": 9.50113005146182e-06,
      "loss": 0.0607,
      "step": 171420
    },
    {
      "epoch": 0.2805653201364205,
      "grad_norm": 5.191635608673096,
      "learning_rate": 9.501064159248302e-06,
      "loss": 0.0647,
      "step": 171440
    },
    {
      "epoch": 0.2805980505750738,
      "grad_norm": 6.025579929351807,
      "learning_rate": 9.500998267034786e-06,
      "loss": 0.0598,
      "step": 171460
    },
    {
      "epoch": 0.2806307810137271,
      "grad_norm": 3.1040539741516113,
      "learning_rate": 9.500932374821268e-06,
      "loss": 0.0545,
      "step": 171480
    },
    {
      "epoch": 0.2806635114523805,
      "grad_norm": 0.8554497361183167,
      "learning_rate": 9.500866482607751e-06,
      "loss": 0.0661,
      "step": 171500
    },
    {
      "epoch": 0.2806962418910338,
      "grad_norm": 2.6365139484405518,
      "learning_rate": 9.500800590394233e-06,
      "loss": 0.0495,
      "step": 171520
    },
    {
      "epoch": 0.2807289723296872,
      "grad_norm": 2.992765426635742,
      "learning_rate": 9.500734698180717e-06,
      "loss": 0.0612,
      "step": 171540
    },
    {
      "epoch": 0.2807617027683405,
      "grad_norm": 3.9401190280914307,
      "learning_rate": 9.500668805967199e-06,
      "loss": 0.0463,
      "step": 171560
    },
    {
      "epoch": 0.2807944332069938,
      "grad_norm": 1.3500126600265503,
      "learning_rate": 9.500602913753682e-06,
      "loss": 0.0762,
      "step": 171580
    },
    {
      "epoch": 0.2808271636456472,
      "grad_norm": 2.0883843898773193,
      "learning_rate": 9.500537021540164e-06,
      "loss": 0.0389,
      "step": 171600
    },
    {
      "epoch": 0.2808598940843005,
      "grad_norm": 1.8939908742904663,
      "learning_rate": 9.500471129326648e-06,
      "loss": 0.0655,
      "step": 171620
    },
    {
      "epoch": 0.2808926245229539,
      "grad_norm": 2.5714054107666016,
      "learning_rate": 9.500405237113131e-06,
      "loss": 0.0617,
      "step": 171640
    },
    {
      "epoch": 0.2809253549616072,
      "grad_norm": 1.2062371969223022,
      "learning_rate": 9.500339344899613e-06,
      "loss": 0.0589,
      "step": 171660
    },
    {
      "epoch": 0.2809580854002605,
      "grad_norm": 1.3768326044082642,
      "learning_rate": 9.500273452686097e-06,
      "loss": 0.0487,
      "step": 171680
    },
    {
      "epoch": 0.2809908158389139,
      "grad_norm": 0.7532352805137634,
      "learning_rate": 9.50020756047258e-06,
      "loss": 0.0599,
      "step": 171700
    },
    {
      "epoch": 0.2810235462775672,
      "grad_norm": 0.729099452495575,
      "learning_rate": 9.500141668259062e-06,
      "loss": 0.0635,
      "step": 171720
    },
    {
      "epoch": 0.2810562767162206,
      "grad_norm": 2.6301186084747314,
      "learning_rate": 9.500075776045546e-06,
      "loss": 0.062,
      "step": 171740
    },
    {
      "epoch": 0.2810890071548739,
      "grad_norm": 2.68542218208313,
      "learning_rate": 9.50000988383203e-06,
      "loss": 0.058,
      "step": 171760
    },
    {
      "epoch": 0.2811217375935272,
      "grad_norm": 3.6733851432800293,
      "learning_rate": 9.499943991618511e-06,
      "loss": 0.0483,
      "step": 171780
    },
    {
      "epoch": 0.2811544680321806,
      "grad_norm": 1.2092602252960205,
      "learning_rate": 9.499878099404995e-06,
      "loss": 0.0534,
      "step": 171800
    },
    {
      "epoch": 0.2811871984708339,
      "grad_norm": 0.8753149509429932,
      "learning_rate": 9.499812207191477e-06,
      "loss": 0.0606,
      "step": 171820
    },
    {
      "epoch": 0.28121992890948727,
      "grad_norm": 5.437256336212158,
      "learning_rate": 9.49974631497796e-06,
      "loss": 0.0524,
      "step": 171840
    },
    {
      "epoch": 0.2812526593481406,
      "grad_norm": 4.271737575531006,
      "learning_rate": 9.499680422764442e-06,
      "loss": 0.0791,
      "step": 171860
    },
    {
      "epoch": 0.2812853897867939,
      "grad_norm": 4.227427005767822,
      "learning_rate": 9.499614530550926e-06,
      "loss": 0.0632,
      "step": 171880
    },
    {
      "epoch": 0.2813181202254473,
      "grad_norm": 1.3301911354064941,
      "learning_rate": 9.499548638337408e-06,
      "loss": 0.0484,
      "step": 171900
    },
    {
      "epoch": 0.2813508506641006,
      "grad_norm": 0.7691072821617126,
      "learning_rate": 9.499482746123891e-06,
      "loss": 0.0568,
      "step": 171920
    },
    {
      "epoch": 0.28138358110275397,
      "grad_norm": 2.9251954555511475,
      "learning_rate": 9.499416853910373e-06,
      "loss": 0.0686,
      "step": 171940
    },
    {
      "epoch": 0.2814163115414073,
      "grad_norm": 3.6841776371002197,
      "learning_rate": 9.499350961696857e-06,
      "loss": 0.0647,
      "step": 171960
    },
    {
      "epoch": 0.2814490419800606,
      "grad_norm": 1.4701796770095825,
      "learning_rate": 9.499285069483339e-06,
      "loss": 0.0517,
      "step": 171980
    },
    {
      "epoch": 0.28148177241871397,
      "grad_norm": 1.2281363010406494,
      "learning_rate": 9.499219177269822e-06,
      "loss": 0.044,
      "step": 172000
    },
    {
      "epoch": 0.2815145028573673,
      "grad_norm": 6.2329511642456055,
      "learning_rate": 9.499153285056306e-06,
      "loss": 0.0461,
      "step": 172020
    },
    {
      "epoch": 0.28154723329602066,
      "grad_norm": 2.1272096633911133,
      "learning_rate": 9.499087392842788e-06,
      "loss": 0.0639,
      "step": 172040
    },
    {
      "epoch": 0.281579963734674,
      "grad_norm": 1.3522688150405884,
      "learning_rate": 9.499021500629271e-06,
      "loss": 0.0515,
      "step": 172060
    },
    {
      "epoch": 0.2816126941733273,
      "grad_norm": 0.7198584675788879,
      "learning_rate": 9.498955608415753e-06,
      "loss": 0.0499,
      "step": 172080
    },
    {
      "epoch": 0.28164542461198067,
      "grad_norm": 2.9882853031158447,
      "learning_rate": 9.498889716202237e-06,
      "loss": 0.066,
      "step": 172100
    },
    {
      "epoch": 0.281678155050634,
      "grad_norm": 4.5632476806640625,
      "learning_rate": 9.49882382398872e-06,
      "loss": 0.054,
      "step": 172120
    },
    {
      "epoch": 0.28171088548928735,
      "grad_norm": 0.871738851070404,
      "learning_rate": 9.498757931775202e-06,
      "loss": 0.0503,
      "step": 172140
    },
    {
      "epoch": 0.28174361592794067,
      "grad_norm": 1.6541175842285156,
      "learning_rate": 9.498692039561686e-06,
      "loss": 0.0565,
      "step": 172160
    },
    {
      "epoch": 0.281776346366594,
      "grad_norm": 0.5696065425872803,
      "learning_rate": 9.49862614734817e-06,
      "loss": 0.0514,
      "step": 172180
    },
    {
      "epoch": 0.28180907680524736,
      "grad_norm": 1.2024942636489868,
      "learning_rate": 9.498560255134651e-06,
      "loss": 0.0595,
      "step": 172200
    },
    {
      "epoch": 0.2818418072439007,
      "grad_norm": 4.2836713790893555,
      "learning_rate": 9.498494362921135e-06,
      "loss": 0.0516,
      "step": 172220
    },
    {
      "epoch": 0.281874537682554,
      "grad_norm": 2.1385488510131836,
      "learning_rate": 9.498428470707617e-06,
      "loss": 0.0578,
      "step": 172240
    },
    {
      "epoch": 0.28190726812120737,
      "grad_norm": 2.1606688499450684,
      "learning_rate": 9.4983625784941e-06,
      "loss": 0.051,
      "step": 172260
    },
    {
      "epoch": 0.2819399985598607,
      "grad_norm": 2.021965265274048,
      "learning_rate": 9.498296686280582e-06,
      "loss": 0.0471,
      "step": 172280
    },
    {
      "epoch": 0.28197272899851406,
      "grad_norm": 4.778365135192871,
      "learning_rate": 9.498230794067066e-06,
      "loss": 0.0526,
      "step": 172300
    },
    {
      "epoch": 0.28200545943716737,
      "grad_norm": 0.6369528770446777,
      "learning_rate": 9.498164901853548e-06,
      "loss": 0.0588,
      "step": 172320
    },
    {
      "epoch": 0.2820381898758207,
      "grad_norm": 1.7873157262802124,
      "learning_rate": 9.498099009640032e-06,
      "loss": 0.059,
      "step": 172340
    },
    {
      "epoch": 0.28207092031447406,
      "grad_norm": 2.500068187713623,
      "learning_rate": 9.498033117426515e-06,
      "loss": 0.0602,
      "step": 172360
    },
    {
      "epoch": 0.2821036507531274,
      "grad_norm": 2.308835506439209,
      "learning_rate": 9.497967225212997e-06,
      "loss": 0.0491,
      "step": 172380
    },
    {
      "epoch": 0.28213638119178075,
      "grad_norm": 2.7382969856262207,
      "learning_rate": 9.49790133299948e-06,
      "loss": 0.0587,
      "step": 172400
    },
    {
      "epoch": 0.28216911163043407,
      "grad_norm": 3.7502152919769287,
      "learning_rate": 9.497835440785962e-06,
      "loss": 0.0687,
      "step": 172420
    },
    {
      "epoch": 0.2822018420690874,
      "grad_norm": 6.8340864181518555,
      "learning_rate": 9.497769548572446e-06,
      "loss": 0.0567,
      "step": 172440
    },
    {
      "epoch": 0.28223457250774076,
      "grad_norm": 1.3326798677444458,
      "learning_rate": 9.497703656358928e-06,
      "loss": 0.0518,
      "step": 172460
    },
    {
      "epoch": 0.28226730294639407,
      "grad_norm": 4.95932149887085,
      "learning_rate": 9.497637764145412e-06,
      "loss": 0.0564,
      "step": 172480
    },
    {
      "epoch": 0.28230003338504744,
      "grad_norm": 3.897327423095703,
      "learning_rate": 9.497571871931895e-06,
      "loss": 0.0624,
      "step": 172500
    },
    {
      "epoch": 0.28233276382370076,
      "grad_norm": 1.3517924547195435,
      "learning_rate": 9.497505979718377e-06,
      "loss": 0.0489,
      "step": 172520
    },
    {
      "epoch": 0.2823654942623541,
      "grad_norm": 1.5124177932739258,
      "learning_rate": 9.49744008750486e-06,
      "loss": 0.0683,
      "step": 172540
    },
    {
      "epoch": 0.28239822470100745,
      "grad_norm": 2.4954288005828857,
      "learning_rate": 9.497374195291344e-06,
      "loss": 0.0517,
      "step": 172560
    },
    {
      "epoch": 0.28243095513966077,
      "grad_norm": 3.0359272956848145,
      "learning_rate": 9.497308303077826e-06,
      "loss": 0.0538,
      "step": 172580
    },
    {
      "epoch": 0.28246368557831414,
      "grad_norm": 1.393715500831604,
      "learning_rate": 9.49724241086431e-06,
      "loss": 0.0655,
      "step": 172600
    },
    {
      "epoch": 0.28249641601696746,
      "grad_norm": 1.5693120956420898,
      "learning_rate": 9.497176518650792e-06,
      "loss": 0.061,
      "step": 172620
    },
    {
      "epoch": 0.28252914645562077,
      "grad_norm": 2.082981824874878,
      "learning_rate": 9.497110626437275e-06,
      "loss": 0.0477,
      "step": 172640
    },
    {
      "epoch": 0.28256187689427414,
      "grad_norm": 0.9100540280342102,
      "learning_rate": 9.497044734223757e-06,
      "loss": 0.0533,
      "step": 172660
    },
    {
      "epoch": 0.28259460733292746,
      "grad_norm": 3.0964441299438477,
      "learning_rate": 9.49697884201024e-06,
      "loss": 0.0455,
      "step": 172680
    },
    {
      "epoch": 0.28262733777158083,
      "grad_norm": 0.9740391373634338,
      "learning_rate": 9.496912949796724e-06,
      "loss": 0.0561,
      "step": 172700
    },
    {
      "epoch": 0.28266006821023415,
      "grad_norm": 0.7283127307891846,
      "learning_rate": 9.496847057583206e-06,
      "loss": 0.0474,
      "step": 172720
    },
    {
      "epoch": 0.28269279864888747,
      "grad_norm": 2.807722330093384,
      "learning_rate": 9.49678116536969e-06,
      "loss": 0.0677,
      "step": 172740
    },
    {
      "epoch": 0.28272552908754084,
      "grad_norm": 2.0570249557495117,
      "learning_rate": 9.496715273156172e-06,
      "loss": 0.06,
      "step": 172760
    },
    {
      "epoch": 0.28275825952619416,
      "grad_norm": 1.6384716033935547,
      "learning_rate": 9.496649380942655e-06,
      "loss": 0.0517,
      "step": 172780
    },
    {
      "epoch": 0.28279098996484753,
      "grad_norm": 1.4598802328109741,
      "learning_rate": 9.496583488729137e-06,
      "loss": 0.0416,
      "step": 172800
    },
    {
      "epoch": 0.28282372040350084,
      "grad_norm": 0.8173114061355591,
      "learning_rate": 9.49651759651562e-06,
      "loss": 0.0517,
      "step": 172820
    },
    {
      "epoch": 0.28285645084215416,
      "grad_norm": 4.9131879806518555,
      "learning_rate": 9.496451704302103e-06,
      "loss": 0.06,
      "step": 172840
    },
    {
      "epoch": 0.28288918128080753,
      "grad_norm": 4.010931015014648,
      "learning_rate": 9.496385812088586e-06,
      "loss": 0.0637,
      "step": 172860
    },
    {
      "epoch": 0.28292191171946085,
      "grad_norm": 0.6379078030586243,
      "learning_rate": 9.496319919875068e-06,
      "loss": 0.0543,
      "step": 172880
    },
    {
      "epoch": 0.2829546421581142,
      "grad_norm": 0.8180931806564331,
      "learning_rate": 9.496254027661552e-06,
      "loss": 0.0477,
      "step": 172900
    },
    {
      "epoch": 0.28298737259676754,
      "grad_norm": 1.3990141153335571,
      "learning_rate": 9.496188135448035e-06,
      "loss": 0.0509,
      "step": 172920
    },
    {
      "epoch": 0.28302010303542086,
      "grad_norm": 1.4082260131835938,
      "learning_rate": 9.496122243234517e-06,
      "loss": 0.0536,
      "step": 172940
    },
    {
      "epoch": 0.28305283347407423,
      "grad_norm": 1.6914706230163574,
      "learning_rate": 9.496056351021e-06,
      "loss": 0.0555,
      "step": 172960
    },
    {
      "epoch": 0.28308556391272754,
      "grad_norm": 0.6509429812431335,
      "learning_rate": 9.495990458807484e-06,
      "loss": 0.0571,
      "step": 172980
    },
    {
      "epoch": 0.2831182943513809,
      "grad_norm": 2.0781753063201904,
      "learning_rate": 9.495924566593966e-06,
      "loss": 0.0487,
      "step": 173000
    },
    {
      "epoch": 0.28315102479003423,
      "grad_norm": 2.5793862342834473,
      "learning_rate": 9.49585867438045e-06,
      "loss": 0.0447,
      "step": 173020
    },
    {
      "epoch": 0.28318375522868755,
      "grad_norm": 5.063972473144531,
      "learning_rate": 9.495792782166932e-06,
      "loss": 0.0721,
      "step": 173040
    },
    {
      "epoch": 0.2832164856673409,
      "grad_norm": 4.398433685302734,
      "learning_rate": 9.495726889953415e-06,
      "loss": 0.0478,
      "step": 173060
    },
    {
      "epoch": 0.28324921610599424,
      "grad_norm": 1.1373755931854248,
      "learning_rate": 9.495660997739899e-06,
      "loss": 0.0466,
      "step": 173080
    },
    {
      "epoch": 0.2832819465446476,
      "grad_norm": 1.5030916929244995,
      "learning_rate": 9.49559510552638e-06,
      "loss": 0.0529,
      "step": 173100
    },
    {
      "epoch": 0.28331467698330093,
      "grad_norm": 0.49536508321762085,
      "learning_rate": 9.495529213312864e-06,
      "loss": 0.0662,
      "step": 173120
    },
    {
      "epoch": 0.28334740742195424,
      "grad_norm": 2.647015333175659,
      "learning_rate": 9.495463321099346e-06,
      "loss": 0.0627,
      "step": 173140
    },
    {
      "epoch": 0.2833801378606076,
      "grad_norm": 3.106884479522705,
      "learning_rate": 9.49539742888583e-06,
      "loss": 0.0488,
      "step": 173160
    },
    {
      "epoch": 0.28341286829926093,
      "grad_norm": 2.4814157485961914,
      "learning_rate": 9.495331536672312e-06,
      "loss": 0.0586,
      "step": 173180
    },
    {
      "epoch": 0.2834455987379143,
      "grad_norm": 2.9362993240356445,
      "learning_rate": 9.495265644458795e-06,
      "loss": 0.0675,
      "step": 173200
    },
    {
      "epoch": 0.2834783291765676,
      "grad_norm": 4.2251081466674805,
      "learning_rate": 9.495199752245277e-06,
      "loss": 0.0506,
      "step": 173220
    },
    {
      "epoch": 0.28351105961522094,
      "grad_norm": 3.3232901096343994,
      "learning_rate": 9.49513386003176e-06,
      "loss": 0.0569,
      "step": 173240
    },
    {
      "epoch": 0.2835437900538743,
      "grad_norm": 0.7765945792198181,
      "learning_rate": 9.495067967818243e-06,
      "loss": 0.0654,
      "step": 173260
    },
    {
      "epoch": 0.28357652049252763,
      "grad_norm": 1.2607944011688232,
      "learning_rate": 9.495002075604726e-06,
      "loss": 0.0526,
      "step": 173280
    },
    {
      "epoch": 0.283609250931181,
      "grad_norm": 0.7724539041519165,
      "learning_rate": 9.49493618339121e-06,
      "loss": 0.0439,
      "step": 173300
    },
    {
      "epoch": 0.2836419813698343,
      "grad_norm": 1.9599971771240234,
      "learning_rate": 9.494870291177692e-06,
      "loss": 0.0551,
      "step": 173320
    },
    {
      "epoch": 0.28367471180848763,
      "grad_norm": 0.5830015540122986,
      "learning_rate": 9.494804398964175e-06,
      "loss": 0.0521,
      "step": 173340
    },
    {
      "epoch": 0.283707442247141,
      "grad_norm": 2.0947816371917725,
      "learning_rate": 9.494738506750659e-06,
      "loss": 0.0542,
      "step": 173360
    },
    {
      "epoch": 0.2837401726857943,
      "grad_norm": 2.7022268772125244,
      "learning_rate": 9.49467261453714e-06,
      "loss": 0.0518,
      "step": 173380
    },
    {
      "epoch": 0.2837729031244477,
      "grad_norm": 4.003075122833252,
      "learning_rate": 9.494606722323624e-06,
      "loss": 0.0639,
      "step": 173400
    },
    {
      "epoch": 0.283805633563101,
      "grad_norm": 1.269504189491272,
      "learning_rate": 9.494540830110108e-06,
      "loss": 0.0498,
      "step": 173420
    },
    {
      "epoch": 0.28383836400175433,
      "grad_norm": 4.047563076019287,
      "learning_rate": 9.49447493789659e-06,
      "loss": 0.0626,
      "step": 173440
    },
    {
      "epoch": 0.2838710944404077,
      "grad_norm": 1.8560471534729004,
      "learning_rate": 9.494409045683073e-06,
      "loss": 0.0444,
      "step": 173460
    },
    {
      "epoch": 0.283903824879061,
      "grad_norm": 1.8859602212905884,
      "learning_rate": 9.494343153469555e-06,
      "loss": 0.0636,
      "step": 173480
    },
    {
      "epoch": 0.2839365553177144,
      "grad_norm": 1.2994860410690308,
      "learning_rate": 9.494277261256039e-06,
      "loss": 0.0467,
      "step": 173500
    },
    {
      "epoch": 0.2839692857563677,
      "grad_norm": 2.294339656829834,
      "learning_rate": 9.49421136904252e-06,
      "loss": 0.042,
      "step": 173520
    },
    {
      "epoch": 0.284002016195021,
      "grad_norm": 1.2300457954406738,
      "learning_rate": 9.494145476829004e-06,
      "loss": 0.0549,
      "step": 173540
    },
    {
      "epoch": 0.2840347466336744,
      "grad_norm": 8.975757598876953,
      "learning_rate": 9.494079584615486e-06,
      "loss": 0.0654,
      "step": 173560
    },
    {
      "epoch": 0.2840674770723277,
      "grad_norm": 1.8628391027450562,
      "learning_rate": 9.49401369240197e-06,
      "loss": 0.0541,
      "step": 173580
    },
    {
      "epoch": 0.2841002075109811,
      "grad_norm": 3.6559348106384277,
      "learning_rate": 9.493947800188452e-06,
      "loss": 0.0506,
      "step": 173600
    },
    {
      "epoch": 0.2841329379496344,
      "grad_norm": 3.757530927658081,
      "learning_rate": 9.493881907974935e-06,
      "loss": 0.0416,
      "step": 173620
    },
    {
      "epoch": 0.2841656683882877,
      "grad_norm": 7.074733257293701,
      "learning_rate": 9.493816015761417e-06,
      "loss": 0.0573,
      "step": 173640
    },
    {
      "epoch": 0.2841983988269411,
      "grad_norm": 1.1462676525115967,
      "learning_rate": 9.4937501235479e-06,
      "loss": 0.0606,
      "step": 173660
    },
    {
      "epoch": 0.2842311292655944,
      "grad_norm": 2.2688143253326416,
      "learning_rate": 9.493684231334384e-06,
      "loss": 0.0577,
      "step": 173680
    },
    {
      "epoch": 0.2842638597042478,
      "grad_norm": 0.652190625667572,
      "learning_rate": 9.493618339120866e-06,
      "loss": 0.0562,
      "step": 173700
    },
    {
      "epoch": 0.2842965901429011,
      "grad_norm": 1.7697179317474365,
      "learning_rate": 9.49355244690735e-06,
      "loss": 0.0511,
      "step": 173720
    },
    {
      "epoch": 0.2843293205815544,
      "grad_norm": 2.4249255657196045,
      "learning_rate": 9.493486554693833e-06,
      "loss": 0.0622,
      "step": 173740
    },
    {
      "epoch": 0.2843620510202078,
      "grad_norm": 4.250504016876221,
      "learning_rate": 9.493420662480315e-06,
      "loss": 0.0475,
      "step": 173760
    },
    {
      "epoch": 0.2843947814588611,
      "grad_norm": 1.297389268875122,
      "learning_rate": 9.493354770266799e-06,
      "loss": 0.0494,
      "step": 173780
    },
    {
      "epoch": 0.2844275118975145,
      "grad_norm": 2.412976026535034,
      "learning_rate": 9.493288878053283e-06,
      "loss": 0.0482,
      "step": 173800
    },
    {
      "epoch": 0.2844602423361678,
      "grad_norm": 8.245550155639648,
      "learning_rate": 9.493222985839764e-06,
      "loss": 0.0584,
      "step": 173820
    },
    {
      "epoch": 0.2844929727748211,
      "grad_norm": 1.5396136045455933,
      "learning_rate": 9.493157093626248e-06,
      "loss": 0.0425,
      "step": 173840
    },
    {
      "epoch": 0.2845257032134745,
      "grad_norm": 2.7821788787841797,
      "learning_rate": 9.49309120141273e-06,
      "loss": 0.0626,
      "step": 173860
    },
    {
      "epoch": 0.2845584336521278,
      "grad_norm": 2.153550624847412,
      "learning_rate": 9.493025309199213e-06,
      "loss": 0.0586,
      "step": 173880
    },
    {
      "epoch": 0.28459116409078117,
      "grad_norm": 1.3860119581222534,
      "learning_rate": 9.492959416985695e-06,
      "loss": 0.0499,
      "step": 173900
    },
    {
      "epoch": 0.2846238945294345,
      "grad_norm": 1.595323920249939,
      "learning_rate": 9.492893524772179e-06,
      "loss": 0.0583,
      "step": 173920
    },
    {
      "epoch": 0.2846566249680878,
      "grad_norm": 1.6910613775253296,
      "learning_rate": 9.492827632558661e-06,
      "loss": 0.0553,
      "step": 173940
    },
    {
      "epoch": 0.2846893554067412,
      "grad_norm": 2.1853959560394287,
      "learning_rate": 9.492761740345144e-06,
      "loss": 0.048,
      "step": 173960
    },
    {
      "epoch": 0.2847220858453945,
      "grad_norm": 1.0803478956222534,
      "learning_rate": 9.492695848131626e-06,
      "loss": 0.0415,
      "step": 173980
    },
    {
      "epoch": 0.28475481628404786,
      "grad_norm": 1.6052335500717163,
      "learning_rate": 9.49262995591811e-06,
      "loss": 0.0437,
      "step": 174000
    },
    {
      "epoch": 0.2847875467227012,
      "grad_norm": 1.7661564350128174,
      "learning_rate": 9.492564063704592e-06,
      "loss": 0.0408,
      "step": 174020
    },
    {
      "epoch": 0.2848202771613545,
      "grad_norm": 3.160846710205078,
      "learning_rate": 9.492498171491075e-06,
      "loss": 0.0489,
      "step": 174040
    },
    {
      "epoch": 0.28485300760000787,
      "grad_norm": 2.950664758682251,
      "learning_rate": 9.492432279277557e-06,
      "loss": 0.065,
      "step": 174060
    },
    {
      "epoch": 0.2848857380386612,
      "grad_norm": 4.2651047706604,
      "learning_rate": 9.492366387064041e-06,
      "loss": 0.0541,
      "step": 174080
    },
    {
      "epoch": 0.28491846847731456,
      "grad_norm": 2.285473346710205,
      "learning_rate": 9.492300494850524e-06,
      "loss": 0.0632,
      "step": 174100
    },
    {
      "epoch": 0.2849511989159679,
      "grad_norm": 1.9285553693771362,
      "learning_rate": 9.492234602637006e-06,
      "loss": 0.0549,
      "step": 174120
    },
    {
      "epoch": 0.2849839293546212,
      "grad_norm": 1.0816681385040283,
      "learning_rate": 9.49216871042349e-06,
      "loss": 0.0439,
      "step": 174140
    },
    {
      "epoch": 0.28501665979327456,
      "grad_norm": 0.8880852460861206,
      "learning_rate": 9.492102818209974e-06,
      "loss": 0.0674,
      "step": 174160
    },
    {
      "epoch": 0.2850493902319279,
      "grad_norm": 1.5593178272247314,
      "learning_rate": 9.492036925996455e-06,
      "loss": 0.0522,
      "step": 174180
    },
    {
      "epoch": 0.28508212067058125,
      "grad_norm": 3.7272415161132812,
      "learning_rate": 9.491971033782939e-06,
      "loss": 0.0577,
      "step": 174200
    },
    {
      "epoch": 0.28511485110923457,
      "grad_norm": 2.2533349990844727,
      "learning_rate": 9.491905141569423e-06,
      "loss": 0.0603,
      "step": 174220
    },
    {
      "epoch": 0.2851475815478879,
      "grad_norm": 1.6905484199523926,
      "learning_rate": 9.491839249355904e-06,
      "loss": 0.0634,
      "step": 174240
    },
    {
      "epoch": 0.28518031198654126,
      "grad_norm": 3.436427593231201,
      "learning_rate": 9.491773357142388e-06,
      "loss": 0.049,
      "step": 174260
    },
    {
      "epoch": 0.2852130424251946,
      "grad_norm": 1.7864478826522827,
      "learning_rate": 9.49170746492887e-06,
      "loss": 0.0506,
      "step": 174280
    },
    {
      "epoch": 0.28524577286384795,
      "grad_norm": 1.6681571006774902,
      "learning_rate": 9.491641572715354e-06,
      "loss": 0.0644,
      "step": 174300
    },
    {
      "epoch": 0.28527850330250126,
      "grad_norm": 2.5735414028167725,
      "learning_rate": 9.491575680501835e-06,
      "loss": 0.0715,
      "step": 174320
    },
    {
      "epoch": 0.2853112337411546,
      "grad_norm": 3.1014645099639893,
      "learning_rate": 9.491509788288319e-06,
      "loss": 0.0518,
      "step": 174340
    },
    {
      "epoch": 0.28534396417980795,
      "grad_norm": 0.8235708475112915,
      "learning_rate": 9.491443896074801e-06,
      "loss": 0.0556,
      "step": 174360
    },
    {
      "epoch": 0.28537669461846127,
      "grad_norm": 1.1352386474609375,
      "learning_rate": 9.491378003861285e-06,
      "loss": 0.0378,
      "step": 174380
    },
    {
      "epoch": 0.28540942505711464,
      "grad_norm": 9.161324501037598,
      "learning_rate": 9.491312111647766e-06,
      "loss": 0.064,
      "step": 174400
    },
    {
      "epoch": 0.28544215549576796,
      "grad_norm": 2.5641026496887207,
      "learning_rate": 9.49124621943425e-06,
      "loss": 0.0634,
      "step": 174420
    },
    {
      "epoch": 0.2854748859344213,
      "grad_norm": 1.4594528675079346,
      "learning_rate": 9.491180327220732e-06,
      "loss": 0.0655,
      "step": 174440
    },
    {
      "epoch": 0.28550761637307465,
      "grad_norm": 2.5817723274230957,
      "learning_rate": 9.491114435007215e-06,
      "loss": 0.0487,
      "step": 174460
    },
    {
      "epoch": 0.28554034681172796,
      "grad_norm": 3.3286197185516357,
      "learning_rate": 9.491048542793699e-06,
      "loss": 0.0444,
      "step": 174480
    },
    {
      "epoch": 0.28557307725038134,
      "grad_norm": 1.8987237215042114,
      "learning_rate": 9.490982650580181e-06,
      "loss": 0.049,
      "step": 174500
    },
    {
      "epoch": 0.28560580768903465,
      "grad_norm": 4.405591011047363,
      "learning_rate": 9.490916758366665e-06,
      "loss": 0.0539,
      "step": 174520
    },
    {
      "epoch": 0.28563853812768797,
      "grad_norm": 1.1065069437026978,
      "learning_rate": 9.490850866153148e-06,
      "loss": 0.04,
      "step": 174540
    },
    {
      "epoch": 0.28567126856634134,
      "grad_norm": 1.0959581136703491,
      "learning_rate": 9.49078497393963e-06,
      "loss": 0.0523,
      "step": 174560
    },
    {
      "epoch": 0.28570399900499466,
      "grad_norm": 4.740656852722168,
      "learning_rate": 9.490719081726114e-06,
      "loss": 0.0388,
      "step": 174580
    },
    {
      "epoch": 0.28573672944364803,
      "grad_norm": 3.0771398544311523,
      "learning_rate": 9.490653189512597e-06,
      "loss": 0.0294,
      "step": 174600
    },
    {
      "epoch": 0.28576945988230135,
      "grad_norm": 1.5210763216018677,
      "learning_rate": 9.490587297299079e-06,
      "loss": 0.0533,
      "step": 174620
    },
    {
      "epoch": 0.28580219032095466,
      "grad_norm": 1.4678996801376343,
      "learning_rate": 9.490521405085563e-06,
      "loss": 0.0552,
      "step": 174640
    },
    {
      "epoch": 0.28583492075960804,
      "grad_norm": 1.8463306427001953,
      "learning_rate": 9.490455512872045e-06,
      "loss": 0.0549,
      "step": 174660
    },
    {
      "epoch": 0.28586765119826135,
      "grad_norm": 1.5188002586364746,
      "learning_rate": 9.490389620658528e-06,
      "loss": 0.0638,
      "step": 174680
    },
    {
      "epoch": 0.2859003816369147,
      "grad_norm": 1.8008968830108643,
      "learning_rate": 9.49032372844501e-06,
      "loss": 0.0574,
      "step": 174700
    },
    {
      "epoch": 0.28593311207556804,
      "grad_norm": 3.202249765396118,
      "learning_rate": 9.490257836231494e-06,
      "loss": 0.0567,
      "step": 174720
    },
    {
      "epoch": 0.28596584251422136,
      "grad_norm": 2.8617656230926514,
      "learning_rate": 9.490191944017976e-06,
      "loss": 0.0429,
      "step": 174740
    },
    {
      "epoch": 0.28599857295287473,
      "grad_norm": 1.1346423625946045,
      "learning_rate": 9.490126051804459e-06,
      "loss": 0.0344,
      "step": 174760
    },
    {
      "epoch": 0.28603130339152805,
      "grad_norm": 2.9708943367004395,
      "learning_rate": 9.490060159590941e-06,
      "loss": 0.0639,
      "step": 174780
    },
    {
      "epoch": 0.2860640338301814,
      "grad_norm": 1.0941131114959717,
      "learning_rate": 9.489994267377425e-06,
      "loss": 0.0565,
      "step": 174800
    },
    {
      "epoch": 0.28609676426883474,
      "grad_norm": 1.911055564880371,
      "learning_rate": 9.489928375163908e-06,
      "loss": 0.0456,
      "step": 174820
    },
    {
      "epoch": 0.28612949470748805,
      "grad_norm": 2.0381228923797607,
      "learning_rate": 9.48986248295039e-06,
      "loss": 0.0527,
      "step": 174840
    },
    {
      "epoch": 0.2861622251461414,
      "grad_norm": 1.756815791130066,
      "learning_rate": 9.489796590736874e-06,
      "loss": 0.06,
      "step": 174860
    },
    {
      "epoch": 0.28619495558479474,
      "grad_norm": 2.43723726272583,
      "learning_rate": 9.489730698523356e-06,
      "loss": 0.0442,
      "step": 174880
    },
    {
      "epoch": 0.2862276860234481,
      "grad_norm": 3.039355754852295,
      "learning_rate": 9.489664806309839e-06,
      "loss": 0.0501,
      "step": 174900
    },
    {
      "epoch": 0.28626041646210143,
      "grad_norm": 0.9701074957847595,
      "learning_rate": 9.489598914096321e-06,
      "loss": 0.0338,
      "step": 174920
    },
    {
      "epoch": 0.28629314690075475,
      "grad_norm": 0.8618232607841492,
      "learning_rate": 9.489533021882805e-06,
      "loss": 0.0408,
      "step": 174940
    },
    {
      "epoch": 0.2863258773394081,
      "grad_norm": 3.1578187942504883,
      "learning_rate": 9.489467129669288e-06,
      "loss": 0.0558,
      "step": 174960
    },
    {
      "epoch": 0.28635860777806144,
      "grad_norm": 1.9318615198135376,
      "learning_rate": 9.48940123745577e-06,
      "loss": 0.045,
      "step": 174980
    },
    {
      "epoch": 0.2863913382167148,
      "grad_norm": 1.5248713493347168,
      "learning_rate": 9.489335345242254e-06,
      "loss": 0.0535,
      "step": 175000
    },
    {
      "epoch": 0.2864240686553681,
      "grad_norm": 2.413102626800537,
      "learning_rate": 9.489269453028737e-06,
      "loss": 0.0439,
      "step": 175020
    },
    {
      "epoch": 0.28645679909402144,
      "grad_norm": 1.3827555179595947,
      "learning_rate": 9.48920356081522e-06,
      "loss": 0.0503,
      "step": 175040
    },
    {
      "epoch": 0.2864895295326748,
      "grad_norm": 1.1201527118682861,
      "learning_rate": 9.489137668601703e-06,
      "loss": 0.0628,
      "step": 175060
    },
    {
      "epoch": 0.28652225997132813,
      "grad_norm": 4.278746604919434,
      "learning_rate": 9.489071776388185e-06,
      "loss": 0.0529,
      "step": 175080
    },
    {
      "epoch": 0.28655499040998145,
      "grad_norm": 2.4337801933288574,
      "learning_rate": 9.489005884174668e-06,
      "loss": 0.0582,
      "step": 175100
    },
    {
      "epoch": 0.2865877208486348,
      "grad_norm": 2.60206937789917,
      "learning_rate": 9.48893999196115e-06,
      "loss": 0.0494,
      "step": 175120
    },
    {
      "epoch": 0.28662045128728814,
      "grad_norm": 5.889477252960205,
      "learning_rate": 9.488874099747634e-06,
      "loss": 0.0518,
      "step": 175140
    },
    {
      "epoch": 0.2866531817259415,
      "grad_norm": 1.8461384773254395,
      "learning_rate": 9.488808207534117e-06,
      "loss": 0.0616,
      "step": 175160
    },
    {
      "epoch": 0.2866859121645948,
      "grad_norm": 3.8031468391418457,
      "learning_rate": 9.4887423153206e-06,
      "loss": 0.0634,
      "step": 175180
    },
    {
      "epoch": 0.28671864260324814,
      "grad_norm": 2.7847344875335693,
      "learning_rate": 9.488676423107083e-06,
      "loss": 0.0538,
      "step": 175200
    },
    {
      "epoch": 0.2867513730419015,
      "grad_norm": 2.048377752304077,
      "learning_rate": 9.488610530893565e-06,
      "loss": 0.0579,
      "step": 175220
    },
    {
      "epoch": 0.28678410348055483,
      "grad_norm": 2.32623028755188,
      "learning_rate": 9.488544638680048e-06,
      "loss": 0.0682,
      "step": 175240
    },
    {
      "epoch": 0.2868168339192082,
      "grad_norm": 1.295722246170044,
      "learning_rate": 9.48847874646653e-06,
      "loss": 0.0472,
      "step": 175260
    },
    {
      "epoch": 0.2868495643578615,
      "grad_norm": 2.050286293029785,
      "learning_rate": 9.488412854253014e-06,
      "loss": 0.077,
      "step": 175280
    },
    {
      "epoch": 0.28688229479651484,
      "grad_norm": 1.2468856573104858,
      "learning_rate": 9.488346962039496e-06,
      "loss": 0.0558,
      "step": 175300
    },
    {
      "epoch": 0.2869150252351682,
      "grad_norm": 1.0609557628631592,
      "learning_rate": 9.48828106982598e-06,
      "loss": 0.0305,
      "step": 175320
    },
    {
      "epoch": 0.2869477556738215,
      "grad_norm": 2.6341638565063477,
      "learning_rate": 9.488215177612463e-06,
      "loss": 0.0494,
      "step": 175340
    },
    {
      "epoch": 0.2869804861124749,
      "grad_norm": 7.890031814575195,
      "learning_rate": 9.488149285398945e-06,
      "loss": 0.0725,
      "step": 175360
    },
    {
      "epoch": 0.2870132165511282,
      "grad_norm": 0.5691899657249451,
      "learning_rate": 9.488083393185428e-06,
      "loss": 0.0452,
      "step": 175380
    },
    {
      "epoch": 0.28704594698978153,
      "grad_norm": 1.6558233499526978,
      "learning_rate": 9.488017500971912e-06,
      "loss": 0.0638,
      "step": 175400
    },
    {
      "epoch": 0.2870786774284349,
      "grad_norm": 2.7784316539764404,
      "learning_rate": 9.487951608758394e-06,
      "loss": 0.0607,
      "step": 175420
    },
    {
      "epoch": 0.2871114078670882,
      "grad_norm": 0.9316056966781616,
      "learning_rate": 9.487885716544877e-06,
      "loss": 0.0421,
      "step": 175440
    },
    {
      "epoch": 0.2871441383057416,
      "grad_norm": 1.3088181018829346,
      "learning_rate": 9.48781982433136e-06,
      "loss": 0.0476,
      "step": 175460
    },
    {
      "epoch": 0.2871768687443949,
      "grad_norm": 2.219574213027954,
      "learning_rate": 9.487753932117843e-06,
      "loss": 0.0664,
      "step": 175480
    },
    {
      "epoch": 0.2872095991830482,
      "grad_norm": 4.544788837432861,
      "learning_rate": 9.487688039904325e-06,
      "loss": 0.0546,
      "step": 175500
    },
    {
      "epoch": 0.2872423296217016,
      "grad_norm": 2.4732303619384766,
      "learning_rate": 9.487622147690808e-06,
      "loss": 0.0695,
      "step": 175520
    },
    {
      "epoch": 0.2872750600603549,
      "grad_norm": 2.6815707683563232,
      "learning_rate": 9.487556255477292e-06,
      "loss": 0.0513,
      "step": 175540
    },
    {
      "epoch": 0.2873077904990083,
      "grad_norm": 4.3624725341796875,
      "learning_rate": 9.487490363263774e-06,
      "loss": 0.05,
      "step": 175560
    },
    {
      "epoch": 0.2873405209376616,
      "grad_norm": 2.4040122032165527,
      "learning_rate": 9.487424471050257e-06,
      "loss": 0.0725,
      "step": 175580
    },
    {
      "epoch": 0.2873732513763149,
      "grad_norm": 2.227241039276123,
      "learning_rate": 9.48735857883674e-06,
      "loss": 0.0442,
      "step": 175600
    },
    {
      "epoch": 0.2874059818149683,
      "grad_norm": 1.9340951442718506,
      "learning_rate": 9.487292686623223e-06,
      "loss": 0.0498,
      "step": 175620
    },
    {
      "epoch": 0.2874387122536216,
      "grad_norm": 1.2184813022613525,
      "learning_rate": 9.487226794409705e-06,
      "loss": 0.0621,
      "step": 175640
    },
    {
      "epoch": 0.287471442692275,
      "grad_norm": 2.535046100616455,
      "learning_rate": 9.487160902196188e-06,
      "loss": 0.0494,
      "step": 175660
    },
    {
      "epoch": 0.2875041731309283,
      "grad_norm": 2.990757703781128,
      "learning_rate": 9.48709500998267e-06,
      "loss": 0.0585,
      "step": 175680
    },
    {
      "epoch": 0.2875369035695816,
      "grad_norm": 2.903970718383789,
      "learning_rate": 9.487029117769154e-06,
      "loss": 0.0429,
      "step": 175700
    },
    {
      "epoch": 0.287569634008235,
      "grad_norm": 0.8053140044212341,
      "learning_rate": 9.486963225555636e-06,
      "loss": 0.0341,
      "step": 175720
    },
    {
      "epoch": 0.2876023644468883,
      "grad_norm": 2.1833879947662354,
      "learning_rate": 9.48689733334212e-06,
      "loss": 0.0563,
      "step": 175740
    },
    {
      "epoch": 0.2876350948855417,
      "grad_norm": 0.8408417701721191,
      "learning_rate": 9.486831441128603e-06,
      "loss": 0.057,
      "step": 175760
    },
    {
      "epoch": 0.287667825324195,
      "grad_norm": 0.760103166103363,
      "learning_rate": 9.486765548915086e-06,
      "loss": 0.0514,
      "step": 175780
    },
    {
      "epoch": 0.2877005557628483,
      "grad_norm": 1.5494258403778076,
      "learning_rate": 9.486699656701568e-06,
      "loss": 0.0414,
      "step": 175800
    },
    {
      "epoch": 0.2877332862015017,
      "grad_norm": 2.4609053134918213,
      "learning_rate": 9.486633764488052e-06,
      "loss": 0.0434,
      "step": 175820
    },
    {
      "epoch": 0.287766016640155,
      "grad_norm": 1.2892823219299316,
      "learning_rate": 9.486567872274534e-06,
      "loss": 0.0699,
      "step": 175840
    },
    {
      "epoch": 0.28779874707880837,
      "grad_norm": 1.5599799156188965,
      "learning_rate": 9.486501980061017e-06,
      "loss": 0.0614,
      "step": 175860
    },
    {
      "epoch": 0.2878314775174617,
      "grad_norm": 1.370601773262024,
      "learning_rate": 9.486436087847501e-06,
      "loss": 0.0564,
      "step": 175880
    },
    {
      "epoch": 0.287864207956115,
      "grad_norm": 1.8587801456451416,
      "learning_rate": 9.486370195633983e-06,
      "loss": 0.0525,
      "step": 175900
    },
    {
      "epoch": 0.2878969383947684,
      "grad_norm": 1.2996906042099,
      "learning_rate": 9.486304303420466e-06,
      "loss": 0.0643,
      "step": 175920
    },
    {
      "epoch": 0.2879296688334217,
      "grad_norm": 1.8073022365570068,
      "learning_rate": 9.486238411206948e-06,
      "loss": 0.0591,
      "step": 175940
    },
    {
      "epoch": 0.28796239927207506,
      "grad_norm": 1.4360642433166504,
      "learning_rate": 9.486172518993432e-06,
      "loss": 0.0613,
      "step": 175960
    },
    {
      "epoch": 0.2879951297107284,
      "grad_norm": 3.3353288173675537,
      "learning_rate": 9.486106626779914e-06,
      "loss": 0.0554,
      "step": 175980
    },
    {
      "epoch": 0.2880278601493817,
      "grad_norm": 2.246272325515747,
      "learning_rate": 9.486040734566397e-06,
      "loss": 0.0533,
      "step": 176000
    },
    {
      "epoch": 0.28806059058803507,
      "grad_norm": 1.6643729209899902,
      "learning_rate": 9.48597484235288e-06,
      "loss": 0.0618,
      "step": 176020
    },
    {
      "epoch": 0.2880933210266884,
      "grad_norm": 1.3742092847824097,
      "learning_rate": 9.485908950139363e-06,
      "loss": 0.0634,
      "step": 176040
    },
    {
      "epoch": 0.28812605146534176,
      "grad_norm": 2.0631911754608154,
      "learning_rate": 9.485843057925845e-06,
      "loss": 0.0443,
      "step": 176060
    },
    {
      "epoch": 0.2881587819039951,
      "grad_norm": 2.867975950241089,
      "learning_rate": 9.485777165712328e-06,
      "loss": 0.061,
      "step": 176080
    },
    {
      "epoch": 0.2881915123426484,
      "grad_norm": 3.3056132793426514,
      "learning_rate": 9.48571127349881e-06,
      "loss": 0.0572,
      "step": 176100
    },
    {
      "epoch": 0.28822424278130176,
      "grad_norm": 5.575493335723877,
      "learning_rate": 9.485645381285294e-06,
      "loss": 0.0554,
      "step": 176120
    },
    {
      "epoch": 0.2882569732199551,
      "grad_norm": 1.52536141872406,
      "learning_rate": 9.485579489071777e-06,
      "loss": 0.0465,
      "step": 176140
    },
    {
      "epoch": 0.28828970365860845,
      "grad_norm": 5.712409973144531,
      "learning_rate": 9.48551359685826e-06,
      "loss": 0.0648,
      "step": 176160
    },
    {
      "epoch": 0.28832243409726177,
      "grad_norm": 1.3988277912139893,
      "learning_rate": 9.485447704644743e-06,
      "loss": 0.0477,
      "step": 176180
    },
    {
      "epoch": 0.2883551645359151,
      "grad_norm": 1.1055399179458618,
      "learning_rate": 9.485381812431227e-06,
      "loss": 0.0444,
      "step": 176200
    },
    {
      "epoch": 0.28838789497456846,
      "grad_norm": 1.9409995079040527,
      "learning_rate": 9.485315920217708e-06,
      "loss": 0.0606,
      "step": 176220
    },
    {
      "epoch": 0.2884206254132218,
      "grad_norm": 0.7478981614112854,
      "learning_rate": 9.485250028004192e-06,
      "loss": 0.0574,
      "step": 176240
    },
    {
      "epoch": 0.28845335585187515,
      "grad_norm": 2.3603994846343994,
      "learning_rate": 9.485184135790676e-06,
      "loss": 0.0515,
      "step": 176260
    },
    {
      "epoch": 0.28848608629052847,
      "grad_norm": 0.9673510193824768,
      "learning_rate": 9.485118243577157e-06,
      "loss": 0.0536,
      "step": 176280
    },
    {
      "epoch": 0.2885188167291818,
      "grad_norm": 7.239416122436523,
      "learning_rate": 9.485052351363641e-06,
      "loss": 0.0606,
      "step": 176300
    },
    {
      "epoch": 0.28855154716783515,
      "grad_norm": 2.766072988510132,
      "learning_rate": 9.484986459150123e-06,
      "loss": 0.0546,
      "step": 176320
    },
    {
      "epoch": 0.28858427760648847,
      "grad_norm": 2.4842262268066406,
      "learning_rate": 9.484920566936607e-06,
      "loss": 0.065,
      "step": 176340
    },
    {
      "epoch": 0.28861700804514184,
      "grad_norm": 2.6910533905029297,
      "learning_rate": 9.484854674723088e-06,
      "loss": 0.0604,
      "step": 176360
    },
    {
      "epoch": 0.28864973848379516,
      "grad_norm": 4.135145664215088,
      "learning_rate": 9.484788782509572e-06,
      "loss": 0.06,
      "step": 176380
    },
    {
      "epoch": 0.2886824689224485,
      "grad_norm": 1.3029628992080688,
      "learning_rate": 9.484722890296054e-06,
      "loss": 0.0631,
      "step": 176400
    },
    {
      "epoch": 0.28871519936110185,
      "grad_norm": 1.371566653251648,
      "learning_rate": 9.484656998082538e-06,
      "loss": 0.0643,
      "step": 176420
    },
    {
      "epoch": 0.28874792979975517,
      "grad_norm": 2.902200222015381,
      "learning_rate": 9.48459110586902e-06,
      "loss": 0.0634,
      "step": 176440
    },
    {
      "epoch": 0.28878066023840854,
      "grad_norm": 2.156587600708008,
      "learning_rate": 9.484525213655503e-06,
      "loss": 0.0721,
      "step": 176460
    },
    {
      "epoch": 0.28881339067706185,
      "grad_norm": 3.0983357429504395,
      "learning_rate": 9.484459321441985e-06,
      "loss": 0.0494,
      "step": 176480
    },
    {
      "epoch": 0.28884612111571517,
      "grad_norm": 1.5165706872940063,
      "learning_rate": 9.484393429228468e-06,
      "loss": 0.0638,
      "step": 176500
    },
    {
      "epoch": 0.28887885155436854,
      "grad_norm": 1.6635369062423706,
      "learning_rate": 9.484327537014952e-06,
      "loss": 0.0509,
      "step": 176520
    },
    {
      "epoch": 0.28891158199302186,
      "grad_norm": 1.2088927030563354,
      "learning_rate": 9.484261644801434e-06,
      "loss": 0.0443,
      "step": 176540
    },
    {
      "epoch": 0.28894431243167523,
      "grad_norm": 2.7903833389282227,
      "learning_rate": 9.484195752587918e-06,
      "loss": 0.0437,
      "step": 176560
    },
    {
      "epoch": 0.28897704287032855,
      "grad_norm": 1.2742775678634644,
      "learning_rate": 9.484129860374401e-06,
      "loss": 0.07,
      "step": 176580
    },
    {
      "epoch": 0.28900977330898187,
      "grad_norm": 1.2586586475372314,
      "learning_rate": 9.484063968160883e-06,
      "loss": 0.0501,
      "step": 176600
    },
    {
      "epoch": 0.28904250374763524,
      "grad_norm": 1.7705028057098389,
      "learning_rate": 9.483998075947367e-06,
      "loss": 0.0577,
      "step": 176620
    },
    {
      "epoch": 0.28907523418628855,
      "grad_norm": 2.2479727268218994,
      "learning_rate": 9.48393218373385e-06,
      "loss": 0.0702,
      "step": 176640
    },
    {
      "epoch": 0.2891079646249419,
      "grad_norm": 4.189124584197998,
      "learning_rate": 9.483866291520332e-06,
      "loss": 0.055,
      "step": 176660
    },
    {
      "epoch": 0.28914069506359524,
      "grad_norm": 1.2630107402801514,
      "learning_rate": 9.483800399306816e-06,
      "loss": 0.0422,
      "step": 176680
    },
    {
      "epoch": 0.28917342550224856,
      "grad_norm": 3.1335906982421875,
      "learning_rate": 9.483734507093298e-06,
      "loss": 0.0604,
      "step": 176700
    },
    {
      "epoch": 0.28920615594090193,
      "grad_norm": 4.341984748840332,
      "learning_rate": 9.483668614879781e-06,
      "loss": 0.0498,
      "step": 176720
    },
    {
      "epoch": 0.28923888637955525,
      "grad_norm": 0.6648404002189636,
      "learning_rate": 9.483602722666263e-06,
      "loss": 0.0668,
      "step": 176740
    },
    {
      "epoch": 0.2892716168182086,
      "grad_norm": 0.5523581504821777,
      "learning_rate": 9.483536830452747e-06,
      "loss": 0.0576,
      "step": 176760
    },
    {
      "epoch": 0.28930434725686194,
      "grad_norm": 7.777834892272949,
      "learning_rate": 9.483470938239229e-06,
      "loss": 0.0546,
      "step": 176780
    },
    {
      "epoch": 0.28933707769551525,
      "grad_norm": 2.6555871963500977,
      "learning_rate": 9.483405046025712e-06,
      "loss": 0.043,
      "step": 176800
    },
    {
      "epoch": 0.2893698081341686,
      "grad_norm": 1.5213295221328735,
      "learning_rate": 9.483339153812194e-06,
      "loss": 0.0502,
      "step": 176820
    },
    {
      "epoch": 0.28940253857282194,
      "grad_norm": 4.7327799797058105,
      "learning_rate": 9.483273261598678e-06,
      "loss": 0.0518,
      "step": 176840
    },
    {
      "epoch": 0.2894352690114753,
      "grad_norm": 5.714395999908447,
      "learning_rate": 9.48320736938516e-06,
      "loss": 0.0506,
      "step": 176860
    },
    {
      "epoch": 0.28946799945012863,
      "grad_norm": 2.295031785964966,
      "learning_rate": 9.483141477171643e-06,
      "loss": 0.0584,
      "step": 176880
    },
    {
      "epoch": 0.28950072988878195,
      "grad_norm": 2.3325204849243164,
      "learning_rate": 9.483075584958125e-06,
      "loss": 0.0568,
      "step": 176900
    },
    {
      "epoch": 0.2895334603274353,
      "grad_norm": 1.523606777191162,
      "learning_rate": 9.483009692744609e-06,
      "loss": 0.0638,
      "step": 176920
    },
    {
      "epoch": 0.28956619076608864,
      "grad_norm": 1.6012156009674072,
      "learning_rate": 9.482943800531092e-06,
      "loss": 0.0454,
      "step": 176940
    },
    {
      "epoch": 0.289598921204742,
      "grad_norm": 2.023794651031494,
      "learning_rate": 9.482877908317574e-06,
      "loss": 0.0589,
      "step": 176960
    },
    {
      "epoch": 0.2896316516433953,
      "grad_norm": 0.9921860098838806,
      "learning_rate": 9.482812016104058e-06,
      "loss": 0.0575,
      "step": 176980
    },
    {
      "epoch": 0.28966438208204864,
      "grad_norm": 2.7789716720581055,
      "learning_rate": 9.482746123890541e-06,
      "loss": 0.0492,
      "step": 177000
    },
    {
      "epoch": 0.289697112520702,
      "grad_norm": 3.749277114868164,
      "learning_rate": 9.482680231677023e-06,
      "loss": 0.0835,
      "step": 177020
    },
    {
      "epoch": 0.28972984295935533,
      "grad_norm": 3.560152053833008,
      "learning_rate": 9.482614339463507e-06,
      "loss": 0.0589,
      "step": 177040
    },
    {
      "epoch": 0.2897625733980087,
      "grad_norm": 3.9182519912719727,
      "learning_rate": 9.48254844724999e-06,
      "loss": 0.0522,
      "step": 177060
    },
    {
      "epoch": 0.289795303836662,
      "grad_norm": 4.2446417808532715,
      "learning_rate": 9.482482555036472e-06,
      "loss": 0.0514,
      "step": 177080
    },
    {
      "epoch": 0.28982803427531534,
      "grad_norm": 1.8153338432312012,
      "learning_rate": 9.482416662822956e-06,
      "loss": 0.0453,
      "step": 177100
    },
    {
      "epoch": 0.2898607647139687,
      "grad_norm": 2.1456737518310547,
      "learning_rate": 9.482350770609438e-06,
      "loss": 0.0546,
      "step": 177120
    },
    {
      "epoch": 0.289893495152622,
      "grad_norm": 2.1930394172668457,
      "learning_rate": 9.482284878395921e-06,
      "loss": 0.0707,
      "step": 177140
    },
    {
      "epoch": 0.2899262255912754,
      "grad_norm": 0.9321184754371643,
      "learning_rate": 9.482218986182403e-06,
      "loss": 0.0505,
      "step": 177160
    },
    {
      "epoch": 0.2899589560299287,
      "grad_norm": 0.877021074295044,
      "learning_rate": 9.482153093968887e-06,
      "loss": 0.0694,
      "step": 177180
    },
    {
      "epoch": 0.28999168646858203,
      "grad_norm": 2.793755531311035,
      "learning_rate": 9.482087201755369e-06,
      "loss": 0.0529,
      "step": 177200
    },
    {
      "epoch": 0.2900244169072354,
      "grad_norm": 3.4235801696777344,
      "learning_rate": 9.482021309541852e-06,
      "loss": 0.0488,
      "step": 177220
    },
    {
      "epoch": 0.2900571473458887,
      "grad_norm": 2.485841751098633,
      "learning_rate": 9.481955417328334e-06,
      "loss": 0.0502,
      "step": 177240
    },
    {
      "epoch": 0.2900898777845421,
      "grad_norm": 0.8647011518478394,
      "learning_rate": 9.481889525114818e-06,
      "loss": 0.0529,
      "step": 177260
    },
    {
      "epoch": 0.2901226082231954,
      "grad_norm": 2.1649904251098633,
      "learning_rate": 9.481823632901301e-06,
      "loss": 0.0562,
      "step": 177280
    },
    {
      "epoch": 0.2901553386618487,
      "grad_norm": 1.1050779819488525,
      "learning_rate": 9.481757740687783e-06,
      "loss": 0.0413,
      "step": 177300
    },
    {
      "epoch": 0.2901880691005021,
      "grad_norm": 1.942696213722229,
      "learning_rate": 9.481691848474267e-06,
      "loss": 0.0719,
      "step": 177320
    },
    {
      "epoch": 0.2902207995391554,
      "grad_norm": 2.6553850173950195,
      "learning_rate": 9.481625956260749e-06,
      "loss": 0.0574,
      "step": 177340
    },
    {
      "epoch": 0.2902535299778088,
      "grad_norm": 3.2958807945251465,
      "learning_rate": 9.481560064047232e-06,
      "loss": 0.0451,
      "step": 177360
    },
    {
      "epoch": 0.2902862604164621,
      "grad_norm": 1.4666751623153687,
      "learning_rate": 9.481494171833716e-06,
      "loss": 0.0459,
      "step": 177380
    },
    {
      "epoch": 0.2903189908551154,
      "grad_norm": 1.5672590732574463,
      "learning_rate": 9.481428279620198e-06,
      "loss": 0.0635,
      "step": 177400
    },
    {
      "epoch": 0.2903517212937688,
      "grad_norm": 3.608882427215576,
      "learning_rate": 9.481362387406681e-06,
      "loss": 0.0583,
      "step": 177420
    },
    {
      "epoch": 0.2903844517324221,
      "grad_norm": 1.3788357973098755,
      "learning_rate": 9.481296495193165e-06,
      "loss": 0.0444,
      "step": 177440
    },
    {
      "epoch": 0.2904171821710755,
      "grad_norm": 3.224801778793335,
      "learning_rate": 9.481230602979647e-06,
      "loss": 0.0528,
      "step": 177460
    },
    {
      "epoch": 0.2904499126097288,
      "grad_norm": 1.3127291202545166,
      "learning_rate": 9.48116471076613e-06,
      "loss": 0.0566,
      "step": 177480
    },
    {
      "epoch": 0.2904826430483821,
      "grad_norm": 0.4742351174354553,
      "learning_rate": 9.481098818552612e-06,
      "loss": 0.0549,
      "step": 177500
    },
    {
      "epoch": 0.2905153734870355,
      "grad_norm": 3.18278431892395,
      "learning_rate": 9.481032926339096e-06,
      "loss": 0.0574,
      "step": 177520
    },
    {
      "epoch": 0.2905481039256888,
      "grad_norm": 2.7060821056365967,
      "learning_rate": 9.480967034125578e-06,
      "loss": 0.0442,
      "step": 177540
    },
    {
      "epoch": 0.2905808343643422,
      "grad_norm": 0.4127509593963623,
      "learning_rate": 9.480901141912061e-06,
      "loss": 0.0482,
      "step": 177560
    },
    {
      "epoch": 0.2906135648029955,
      "grad_norm": 4.162830352783203,
      "learning_rate": 9.480835249698543e-06,
      "loss": 0.0474,
      "step": 177580
    },
    {
      "epoch": 0.2906462952416488,
      "grad_norm": 0.6951025724411011,
      "learning_rate": 9.480769357485027e-06,
      "loss": 0.0679,
      "step": 177600
    },
    {
      "epoch": 0.2906790256803022,
      "grad_norm": 2.854785442352295,
      "learning_rate": 9.480703465271509e-06,
      "loss": 0.0484,
      "step": 177620
    },
    {
      "epoch": 0.2907117561189555,
      "grad_norm": 2.815530776977539,
      "learning_rate": 9.480637573057992e-06,
      "loss": 0.0808,
      "step": 177640
    },
    {
      "epoch": 0.29074448655760887,
      "grad_norm": 1.4152626991271973,
      "learning_rate": 9.480571680844476e-06,
      "loss": 0.0416,
      "step": 177660
    },
    {
      "epoch": 0.2907772169962622,
      "grad_norm": 2.653322696685791,
      "learning_rate": 9.480505788630958e-06,
      "loss": 0.0526,
      "step": 177680
    },
    {
      "epoch": 0.2908099474349155,
      "grad_norm": 1.127522349357605,
      "learning_rate": 9.480439896417441e-06,
      "loss": 0.0504,
      "step": 177700
    },
    {
      "epoch": 0.2908426778735689,
      "grad_norm": 1.5898600816726685,
      "learning_rate": 9.480374004203923e-06,
      "loss": 0.0712,
      "step": 177720
    },
    {
      "epoch": 0.2908754083122222,
      "grad_norm": 1.346495509147644,
      "learning_rate": 9.480308111990407e-06,
      "loss": 0.0573,
      "step": 177740
    },
    {
      "epoch": 0.29090813875087557,
      "grad_norm": 0.943021297454834,
      "learning_rate": 9.480242219776889e-06,
      "loss": 0.0517,
      "step": 177760
    },
    {
      "epoch": 0.2909408691895289,
      "grad_norm": 2.0237483978271484,
      "learning_rate": 9.480176327563372e-06,
      "loss": 0.0564,
      "step": 177780
    },
    {
      "epoch": 0.2909735996281822,
      "grad_norm": 3.9975337982177734,
      "learning_rate": 9.480110435349856e-06,
      "loss": 0.0609,
      "step": 177800
    },
    {
      "epoch": 0.2910063300668356,
      "grad_norm": 2.2615928649902344,
      "learning_rate": 9.480044543136338e-06,
      "loss": 0.0493,
      "step": 177820
    },
    {
      "epoch": 0.2910390605054889,
      "grad_norm": 4.545421600341797,
      "learning_rate": 9.479978650922821e-06,
      "loss": 0.0461,
      "step": 177840
    },
    {
      "epoch": 0.2910717909441422,
      "grad_norm": 1.2454113960266113,
      "learning_rate": 9.479912758709305e-06,
      "loss": 0.0381,
      "step": 177860
    },
    {
      "epoch": 0.2911045213827956,
      "grad_norm": 2.1435718536376953,
      "learning_rate": 9.479846866495787e-06,
      "loss": 0.0595,
      "step": 177880
    },
    {
      "epoch": 0.2911372518214489,
      "grad_norm": 2.4600512981414795,
      "learning_rate": 9.47978097428227e-06,
      "loss": 0.0496,
      "step": 177900
    },
    {
      "epoch": 0.29116998226010227,
      "grad_norm": 0.8758845329284668,
      "learning_rate": 9.479715082068752e-06,
      "loss": 0.0653,
      "step": 177920
    },
    {
      "epoch": 0.2912027126987556,
      "grad_norm": 0.6676758527755737,
      "learning_rate": 9.479649189855236e-06,
      "loss": 0.0549,
      "step": 177940
    },
    {
      "epoch": 0.2912354431374089,
      "grad_norm": 1.5380687713623047,
      "learning_rate": 9.479583297641718e-06,
      "loss": 0.0617,
      "step": 177960
    },
    {
      "epoch": 0.2912681735760623,
      "grad_norm": 1.2708221673965454,
      "learning_rate": 9.479517405428201e-06,
      "loss": 0.0643,
      "step": 177980
    },
    {
      "epoch": 0.2913009040147156,
      "grad_norm": 2.4018726348876953,
      "learning_rate": 9.479451513214685e-06,
      "loss": 0.059,
      "step": 178000
    },
    {
      "epoch": 0.29133363445336896,
      "grad_norm": 2.1601791381835938,
      "learning_rate": 9.479385621001167e-06,
      "loss": 0.0433,
      "step": 178020
    },
    {
      "epoch": 0.2913663648920223,
      "grad_norm": 0.43207889795303345,
      "learning_rate": 9.47931972878765e-06,
      "loss": 0.0463,
      "step": 178040
    },
    {
      "epoch": 0.2913990953306756,
      "grad_norm": 2.831113815307617,
      "learning_rate": 9.479253836574132e-06,
      "loss": 0.045,
      "step": 178060
    },
    {
      "epoch": 0.29143182576932897,
      "grad_norm": 1.06032133102417,
      "learning_rate": 9.479187944360616e-06,
      "loss": 0.0491,
      "step": 178080
    },
    {
      "epoch": 0.2914645562079823,
      "grad_norm": 1.4051642417907715,
      "learning_rate": 9.479122052147098e-06,
      "loss": 0.0519,
      "step": 178100
    },
    {
      "epoch": 0.29149728664663566,
      "grad_norm": 1.1521823406219482,
      "learning_rate": 9.479056159933581e-06,
      "loss": 0.0459,
      "step": 178120
    },
    {
      "epoch": 0.291530017085289,
      "grad_norm": 2.9547972679138184,
      "learning_rate": 9.478990267720063e-06,
      "loss": 0.055,
      "step": 178140
    },
    {
      "epoch": 0.2915627475239423,
      "grad_norm": 2.376188039779663,
      "learning_rate": 9.478924375506547e-06,
      "loss": 0.0474,
      "step": 178160
    },
    {
      "epoch": 0.29159547796259566,
      "grad_norm": 2.1450464725494385,
      "learning_rate": 9.47885848329303e-06,
      "loss": 0.0461,
      "step": 178180
    },
    {
      "epoch": 0.291628208401249,
      "grad_norm": 1.0427545309066772,
      "learning_rate": 9.478792591079512e-06,
      "loss": 0.0329,
      "step": 178200
    },
    {
      "epoch": 0.29166093883990235,
      "grad_norm": 2.223346471786499,
      "learning_rate": 9.478726698865996e-06,
      "loss": 0.0515,
      "step": 178220
    },
    {
      "epoch": 0.29169366927855567,
      "grad_norm": 2.794602632522583,
      "learning_rate": 9.47866080665248e-06,
      "loss": 0.0638,
      "step": 178240
    },
    {
      "epoch": 0.291726399717209,
      "grad_norm": 2.019886016845703,
      "learning_rate": 9.478594914438961e-06,
      "loss": 0.0466,
      "step": 178260
    },
    {
      "epoch": 0.29175913015586236,
      "grad_norm": 1.3716671466827393,
      "learning_rate": 9.478529022225445e-06,
      "loss": 0.054,
      "step": 178280
    },
    {
      "epoch": 0.2917918605945157,
      "grad_norm": 0.5373806357383728,
      "learning_rate": 9.478463130011927e-06,
      "loss": 0.0416,
      "step": 178300
    },
    {
      "epoch": 0.29182459103316905,
      "grad_norm": 2.5821361541748047,
      "learning_rate": 9.47839723779841e-06,
      "loss": 0.0538,
      "step": 178320
    },
    {
      "epoch": 0.29185732147182236,
      "grad_norm": 1.079424500465393,
      "learning_rate": 9.478331345584894e-06,
      "loss": 0.0559,
      "step": 178340
    },
    {
      "epoch": 0.2918900519104757,
      "grad_norm": 1.4797165393829346,
      "learning_rate": 9.478265453371376e-06,
      "loss": 0.0467,
      "step": 178360
    },
    {
      "epoch": 0.29192278234912905,
      "grad_norm": 1.5630521774291992,
      "learning_rate": 9.47819956115786e-06,
      "loss": 0.0597,
      "step": 178380
    },
    {
      "epoch": 0.29195551278778237,
      "grad_norm": 0.6933746933937073,
      "learning_rate": 9.478133668944341e-06,
      "loss": 0.0516,
      "step": 178400
    },
    {
      "epoch": 0.29198824322643574,
      "grad_norm": 3.8365840911865234,
      "learning_rate": 9.478067776730825e-06,
      "loss": 0.0619,
      "step": 178420
    },
    {
      "epoch": 0.29202097366508906,
      "grad_norm": 1.9488534927368164,
      "learning_rate": 9.478001884517307e-06,
      "loss": 0.0415,
      "step": 178440
    },
    {
      "epoch": 0.2920537041037424,
      "grad_norm": 2.168045997619629,
      "learning_rate": 9.47793599230379e-06,
      "loss": 0.0494,
      "step": 178460
    },
    {
      "epoch": 0.29208643454239575,
      "grad_norm": 3.709257125854492,
      "learning_rate": 9.477870100090272e-06,
      "loss": 0.0517,
      "step": 178480
    },
    {
      "epoch": 0.29211916498104906,
      "grad_norm": 2.351170301437378,
      "learning_rate": 9.477804207876756e-06,
      "loss": 0.0572,
      "step": 178500
    },
    {
      "epoch": 0.29215189541970243,
      "grad_norm": 2.021622657775879,
      "learning_rate": 9.477738315663238e-06,
      "loss": 0.0502,
      "step": 178520
    },
    {
      "epoch": 0.29218462585835575,
      "grad_norm": 1.980158805847168,
      "learning_rate": 9.477672423449721e-06,
      "loss": 0.0505,
      "step": 178540
    },
    {
      "epoch": 0.29221735629700907,
      "grad_norm": 3.477583169937134,
      "learning_rate": 9.477606531236203e-06,
      "loss": 0.0505,
      "step": 178560
    },
    {
      "epoch": 0.29225008673566244,
      "grad_norm": 6.243624687194824,
      "learning_rate": 9.477540639022687e-06,
      "loss": 0.0496,
      "step": 178580
    },
    {
      "epoch": 0.29228281717431576,
      "grad_norm": 2.6051039695739746,
      "learning_rate": 9.47747474680917e-06,
      "loss": 0.0589,
      "step": 178600
    },
    {
      "epoch": 0.29231554761296913,
      "grad_norm": 0.38897132873535156,
      "learning_rate": 9.477408854595654e-06,
      "loss": 0.0483,
      "step": 178620
    },
    {
      "epoch": 0.29234827805162245,
      "grad_norm": 1.888952612876892,
      "learning_rate": 9.477342962382136e-06,
      "loss": 0.0423,
      "step": 178640
    },
    {
      "epoch": 0.29238100849027576,
      "grad_norm": 0.46204233169555664,
      "learning_rate": 9.47727707016862e-06,
      "loss": 0.0634,
      "step": 178660
    },
    {
      "epoch": 0.29241373892892913,
      "grad_norm": 1.7190196514129639,
      "learning_rate": 9.477211177955103e-06,
      "loss": 0.0439,
      "step": 178680
    },
    {
      "epoch": 0.29244646936758245,
      "grad_norm": 2.608808994293213,
      "learning_rate": 9.477145285741585e-06,
      "loss": 0.044,
      "step": 178700
    },
    {
      "epoch": 0.2924791998062358,
      "grad_norm": 1.5140928030014038,
      "learning_rate": 9.477079393528069e-06,
      "loss": 0.0531,
      "step": 178720
    },
    {
      "epoch": 0.29251193024488914,
      "grad_norm": 2.0510246753692627,
      "learning_rate": 9.47701350131455e-06,
      "loss": 0.0437,
      "step": 178740
    },
    {
      "epoch": 0.29254466068354246,
      "grad_norm": 1.4164618253707886,
      "learning_rate": 9.476947609101034e-06,
      "loss": 0.0624,
      "step": 178760
    },
    {
      "epoch": 0.29257739112219583,
      "grad_norm": 2.058713674545288,
      "learning_rate": 9.476881716887516e-06,
      "loss": 0.0451,
      "step": 178780
    },
    {
      "epoch": 0.29261012156084915,
      "grad_norm": 0.6650362014770508,
      "learning_rate": 9.476815824674e-06,
      "loss": 0.0509,
      "step": 178800
    },
    {
      "epoch": 0.2926428519995025,
      "grad_norm": 1.7815121412277222,
      "learning_rate": 9.476749932460482e-06,
      "loss": 0.0502,
      "step": 178820
    },
    {
      "epoch": 0.29267558243815583,
      "grad_norm": 0.28016045689582825,
      "learning_rate": 9.476684040246965e-06,
      "loss": 0.054,
      "step": 178840
    },
    {
      "epoch": 0.29270831287680915,
      "grad_norm": 2.1501004695892334,
      "learning_rate": 9.476618148033447e-06,
      "loss": 0.0698,
      "step": 178860
    },
    {
      "epoch": 0.2927410433154625,
      "grad_norm": 1.266048789024353,
      "learning_rate": 9.47655225581993e-06,
      "loss": 0.0541,
      "step": 178880
    },
    {
      "epoch": 0.29277377375411584,
      "grad_norm": 1.1634055376052856,
      "learning_rate": 9.476486363606412e-06,
      "loss": 0.062,
      "step": 178900
    },
    {
      "epoch": 0.2928065041927692,
      "grad_norm": 1.5422216653823853,
      "learning_rate": 9.476420471392896e-06,
      "loss": 0.0557,
      "step": 178920
    },
    {
      "epoch": 0.29283923463142253,
      "grad_norm": 2.156090021133423,
      "learning_rate": 9.476354579179378e-06,
      "loss": 0.0473,
      "step": 178940
    },
    {
      "epoch": 0.29287196507007585,
      "grad_norm": 1.9688074588775635,
      "learning_rate": 9.476288686965862e-06,
      "loss": 0.0502,
      "step": 178960
    },
    {
      "epoch": 0.2929046955087292,
      "grad_norm": 1.284480333328247,
      "learning_rate": 9.476222794752345e-06,
      "loss": 0.0415,
      "step": 178980
    },
    {
      "epoch": 0.29293742594738253,
      "grad_norm": 3.364741563796997,
      "learning_rate": 9.476156902538827e-06,
      "loss": 0.0703,
      "step": 179000
    },
    {
      "epoch": 0.2929701563860359,
      "grad_norm": 1.208730697631836,
      "learning_rate": 9.47609101032531e-06,
      "loss": 0.0528,
      "step": 179020
    },
    {
      "epoch": 0.2930028868246892,
      "grad_norm": 1.3519288301467896,
      "learning_rate": 9.476025118111794e-06,
      "loss": 0.0449,
      "step": 179040
    },
    {
      "epoch": 0.29303561726334254,
      "grad_norm": 3.766183376312256,
      "learning_rate": 9.475959225898276e-06,
      "loss": 0.0477,
      "step": 179060
    },
    {
      "epoch": 0.2930683477019959,
      "grad_norm": 1.5666422843933105,
      "learning_rate": 9.47589333368476e-06,
      "loss": 0.0516,
      "step": 179080
    },
    {
      "epoch": 0.29310107814064923,
      "grad_norm": 2.590963840484619,
      "learning_rate": 9.475827441471243e-06,
      "loss": 0.0647,
      "step": 179100
    },
    {
      "epoch": 0.2931338085793026,
      "grad_norm": 3.8004159927368164,
      "learning_rate": 9.475761549257725e-06,
      "loss": 0.0583,
      "step": 179120
    },
    {
      "epoch": 0.2931665390179559,
      "grad_norm": 1.5081883668899536,
      "learning_rate": 9.475695657044209e-06,
      "loss": 0.0739,
      "step": 179140
    },
    {
      "epoch": 0.29319926945660924,
      "grad_norm": 1.5238168239593506,
      "learning_rate": 9.47562976483069e-06,
      "loss": 0.0555,
      "step": 179160
    },
    {
      "epoch": 0.2932319998952626,
      "grad_norm": 2.7336623668670654,
      "learning_rate": 9.475563872617174e-06,
      "loss": 0.0563,
      "step": 179180
    },
    {
      "epoch": 0.2932647303339159,
      "grad_norm": 1.493526577949524,
      "learning_rate": 9.475497980403656e-06,
      "loss": 0.0488,
      "step": 179200
    },
    {
      "epoch": 0.2932974607725693,
      "grad_norm": 1.5924772024154663,
      "learning_rate": 9.47543208819014e-06,
      "loss": 0.0586,
      "step": 179220
    },
    {
      "epoch": 0.2933301912112226,
      "grad_norm": 2.194540023803711,
      "learning_rate": 9.475366195976622e-06,
      "loss": 0.0492,
      "step": 179240
    },
    {
      "epoch": 0.29336292164987593,
      "grad_norm": 1.8423385620117188,
      "learning_rate": 9.475300303763105e-06,
      "loss": 0.077,
      "step": 179260
    },
    {
      "epoch": 0.2933956520885293,
      "grad_norm": 4.176454067230225,
      "learning_rate": 9.475234411549587e-06,
      "loss": 0.0588,
      "step": 179280
    },
    {
      "epoch": 0.2934283825271826,
      "grad_norm": 1.6462105512619019,
      "learning_rate": 9.47516851933607e-06,
      "loss": 0.0389,
      "step": 179300
    },
    {
      "epoch": 0.293461112965836,
      "grad_norm": 1.484008550643921,
      "learning_rate": 9.475102627122553e-06,
      "loss": 0.0546,
      "step": 179320
    },
    {
      "epoch": 0.2934938434044893,
      "grad_norm": 3.598170757293701,
      "learning_rate": 9.475036734909036e-06,
      "loss": 0.0696,
      "step": 179340
    },
    {
      "epoch": 0.2935265738431426,
      "grad_norm": 1.7803397178649902,
      "learning_rate": 9.47497084269552e-06,
      "loss": 0.0631,
      "step": 179360
    },
    {
      "epoch": 0.293559304281796,
      "grad_norm": 0.8524945378303528,
      "learning_rate": 9.474904950482002e-06,
      "loss": 0.0596,
      "step": 179380
    },
    {
      "epoch": 0.2935920347204493,
      "grad_norm": 1.1679472923278809,
      "learning_rate": 9.474839058268485e-06,
      "loss": 0.054,
      "step": 179400
    },
    {
      "epoch": 0.2936247651591027,
      "grad_norm": 2.9162604808807373,
      "learning_rate": 9.474773166054969e-06,
      "loss": 0.0543,
      "step": 179420
    },
    {
      "epoch": 0.293657495597756,
      "grad_norm": 1.99216628074646,
      "learning_rate": 9.47470727384145e-06,
      "loss": 0.0537,
      "step": 179440
    },
    {
      "epoch": 0.2936902260364093,
      "grad_norm": 0.30066654086112976,
      "learning_rate": 9.474641381627934e-06,
      "loss": 0.0611,
      "step": 179460
    },
    {
      "epoch": 0.2937229564750627,
      "grad_norm": 1.0873676538467407,
      "learning_rate": 9.474575489414418e-06,
      "loss": 0.0542,
      "step": 179480
    },
    {
      "epoch": 0.293755686913716,
      "grad_norm": 1.3740602731704712,
      "learning_rate": 9.4745095972009e-06,
      "loss": 0.0555,
      "step": 179500
    },
    {
      "epoch": 0.2937884173523694,
      "grad_norm": 2.139469623565674,
      "learning_rate": 9.474443704987383e-06,
      "loss": 0.0468,
      "step": 179520
    },
    {
      "epoch": 0.2938211477910227,
      "grad_norm": 1.9644653797149658,
      "learning_rate": 9.474377812773865e-06,
      "loss": 0.0564,
      "step": 179540
    },
    {
      "epoch": 0.293853878229676,
      "grad_norm": 4.880376815795898,
      "learning_rate": 9.474311920560349e-06,
      "loss": 0.0528,
      "step": 179560
    },
    {
      "epoch": 0.2938866086683294,
      "grad_norm": 0.2689025402069092,
      "learning_rate": 9.47424602834683e-06,
      "loss": 0.0632,
      "step": 179580
    },
    {
      "epoch": 0.2939193391069827,
      "grad_norm": 2.1066720485687256,
      "learning_rate": 9.474180136133314e-06,
      "loss": 0.0584,
      "step": 179600
    },
    {
      "epoch": 0.2939520695456361,
      "grad_norm": 3.155792713165283,
      "learning_rate": 9.474114243919796e-06,
      "loss": 0.0502,
      "step": 179620
    },
    {
      "epoch": 0.2939847999842894,
      "grad_norm": 2.661086320877075,
      "learning_rate": 9.47404835170628e-06,
      "loss": 0.0399,
      "step": 179640
    },
    {
      "epoch": 0.2940175304229427,
      "grad_norm": 2.090014696121216,
      "learning_rate": 9.473982459492762e-06,
      "loss": 0.0411,
      "step": 179660
    },
    {
      "epoch": 0.2940502608615961,
      "grad_norm": 1.3789575099945068,
      "learning_rate": 9.473916567279245e-06,
      "loss": 0.0644,
      "step": 179680
    },
    {
      "epoch": 0.2940829913002494,
      "grad_norm": 4.061273097991943,
      "learning_rate": 9.473850675065727e-06,
      "loss": 0.0573,
      "step": 179700
    },
    {
      "epoch": 0.29411572173890277,
      "grad_norm": 1.8423051834106445,
      "learning_rate": 9.47378478285221e-06,
      "loss": 0.0438,
      "step": 179720
    },
    {
      "epoch": 0.2941484521775561,
      "grad_norm": 1.405773401260376,
      "learning_rate": 9.473718890638694e-06,
      "loss": 0.0445,
      "step": 179740
    },
    {
      "epoch": 0.2941811826162094,
      "grad_norm": 1.398390531539917,
      "learning_rate": 9.473652998425176e-06,
      "loss": 0.0524,
      "step": 179760
    },
    {
      "epoch": 0.2942139130548628,
      "grad_norm": 1.6158233880996704,
      "learning_rate": 9.47358710621166e-06,
      "loss": 0.0355,
      "step": 179780
    },
    {
      "epoch": 0.2942466434935161,
      "grad_norm": 2.5742478370666504,
      "learning_rate": 9.473521213998142e-06,
      "loss": 0.0466,
      "step": 179800
    },
    {
      "epoch": 0.29427937393216946,
      "grad_norm": 1.620905876159668,
      "learning_rate": 9.473455321784625e-06,
      "loss": 0.057,
      "step": 179820
    },
    {
      "epoch": 0.2943121043708228,
      "grad_norm": 2.7542784214019775,
      "learning_rate": 9.473389429571109e-06,
      "loss": 0.0688,
      "step": 179840
    },
    {
      "epoch": 0.2943448348094761,
      "grad_norm": 1.4317939281463623,
      "learning_rate": 9.47332353735759e-06,
      "loss": 0.052,
      "step": 179860
    },
    {
      "epoch": 0.29437756524812947,
      "grad_norm": 0.5254918336868286,
      "learning_rate": 9.473257645144074e-06,
      "loss": 0.0517,
      "step": 179880
    },
    {
      "epoch": 0.2944102956867828,
      "grad_norm": 1.9282907247543335,
      "learning_rate": 9.473191752930558e-06,
      "loss": 0.0472,
      "step": 179900
    },
    {
      "epoch": 0.29444302612543616,
      "grad_norm": 1.531814694404602,
      "learning_rate": 9.47312586071704e-06,
      "loss": 0.051,
      "step": 179920
    },
    {
      "epoch": 0.2944757565640895,
      "grad_norm": 0.611998975276947,
      "learning_rate": 9.473059968503523e-06,
      "loss": 0.0537,
      "step": 179940
    },
    {
      "epoch": 0.2945084870027428,
      "grad_norm": 2.0148158073425293,
      "learning_rate": 9.472994076290005e-06,
      "loss": 0.0544,
      "step": 179960
    },
    {
      "epoch": 0.29454121744139616,
      "grad_norm": 2.105844736099243,
      "learning_rate": 9.472928184076489e-06,
      "loss": 0.0462,
      "step": 179980
    },
    {
      "epoch": 0.2945739478800495,
      "grad_norm": 2.0908143520355225,
      "learning_rate": 9.47286229186297e-06,
      "loss": 0.0455,
      "step": 180000
    },
    {
      "epoch": 0.29460667831870285,
      "grad_norm": 2.151819944381714,
      "learning_rate": 9.472796399649454e-06,
      "loss": 0.0622,
      "step": 180020
    },
    {
      "epoch": 0.29463940875735617,
      "grad_norm": 2.9605488777160645,
      "learning_rate": 9.472730507435936e-06,
      "loss": 0.0621,
      "step": 180040
    },
    {
      "epoch": 0.2946721391960095,
      "grad_norm": 1.1104369163513184,
      "learning_rate": 9.47266461522242e-06,
      "loss": 0.0484,
      "step": 180060
    },
    {
      "epoch": 0.29470486963466286,
      "grad_norm": 1.297437310218811,
      "learning_rate": 9.472598723008902e-06,
      "loss": 0.0586,
      "step": 180080
    },
    {
      "epoch": 0.2947376000733162,
      "grad_norm": 5.3589653968811035,
      "learning_rate": 9.472532830795385e-06,
      "loss": 0.0531,
      "step": 180100
    },
    {
      "epoch": 0.29477033051196955,
      "grad_norm": 2.739518404006958,
      "learning_rate": 9.472466938581869e-06,
      "loss": 0.0591,
      "step": 180120
    },
    {
      "epoch": 0.29480306095062286,
      "grad_norm": 2.6240904331207275,
      "learning_rate": 9.47240104636835e-06,
      "loss": 0.0392,
      "step": 180140
    },
    {
      "epoch": 0.2948357913892762,
      "grad_norm": 3.1570606231689453,
      "learning_rate": 9.472335154154834e-06,
      "loss": 0.0504,
      "step": 180160
    },
    {
      "epoch": 0.29486852182792955,
      "grad_norm": 1.5151119232177734,
      "learning_rate": 9.472269261941316e-06,
      "loss": 0.0645,
      "step": 180180
    },
    {
      "epoch": 0.29490125226658287,
      "grad_norm": 1.1501429080963135,
      "learning_rate": 9.4722033697278e-06,
      "loss": 0.0767,
      "step": 180200
    },
    {
      "epoch": 0.29493398270523624,
      "grad_norm": 1.0050257444381714,
      "learning_rate": 9.472137477514283e-06,
      "loss": 0.0486,
      "step": 180220
    },
    {
      "epoch": 0.29496671314388956,
      "grad_norm": 1.703823447227478,
      "learning_rate": 9.472071585300765e-06,
      "loss": 0.0429,
      "step": 180240
    },
    {
      "epoch": 0.2949994435825429,
      "grad_norm": 3.30214786529541,
      "learning_rate": 9.472005693087249e-06,
      "loss": 0.0456,
      "step": 180260
    },
    {
      "epoch": 0.29503217402119625,
      "grad_norm": 0.9355379343032837,
      "learning_rate": 9.471939800873733e-06,
      "loss": 0.0442,
      "step": 180280
    },
    {
      "epoch": 0.29506490445984956,
      "grad_norm": 1.4035543203353882,
      "learning_rate": 9.471873908660214e-06,
      "loss": 0.0517,
      "step": 180300
    },
    {
      "epoch": 0.29509763489850294,
      "grad_norm": 1.041075348854065,
      "learning_rate": 9.471808016446698e-06,
      "loss": 0.052,
      "step": 180320
    },
    {
      "epoch": 0.29513036533715625,
      "grad_norm": 1.0378228425979614,
      "learning_rate": 9.47174212423318e-06,
      "loss": 0.0682,
      "step": 180340
    },
    {
      "epoch": 0.29516309577580957,
      "grad_norm": 1.1314687728881836,
      "learning_rate": 9.471676232019664e-06,
      "loss": 0.0444,
      "step": 180360
    },
    {
      "epoch": 0.29519582621446294,
      "grad_norm": 3.19931960105896,
      "learning_rate": 9.471610339806145e-06,
      "loss": 0.0515,
      "step": 180380
    },
    {
      "epoch": 0.29522855665311626,
      "grad_norm": 0.935430109500885,
      "learning_rate": 9.471544447592629e-06,
      "loss": 0.0652,
      "step": 180400
    },
    {
      "epoch": 0.29526128709176963,
      "grad_norm": 3.5697691440582275,
      "learning_rate": 9.471478555379111e-06,
      "loss": 0.0536,
      "step": 180420
    },
    {
      "epoch": 0.29529401753042295,
      "grad_norm": 1.6440215110778809,
      "learning_rate": 9.471412663165594e-06,
      "loss": 0.0637,
      "step": 180440
    },
    {
      "epoch": 0.29532674796907626,
      "grad_norm": 0.6040766835212708,
      "learning_rate": 9.471346770952078e-06,
      "loss": 0.0648,
      "step": 180460
    },
    {
      "epoch": 0.29535947840772964,
      "grad_norm": 1.7791718244552612,
      "learning_rate": 9.47128087873856e-06,
      "loss": 0.0549,
      "step": 180480
    },
    {
      "epoch": 0.29539220884638295,
      "grad_norm": 1.1851123571395874,
      "learning_rate": 9.471214986525044e-06,
      "loss": 0.0493,
      "step": 180500
    },
    {
      "epoch": 0.2954249392850363,
      "grad_norm": 2.246565103530884,
      "learning_rate": 9.471149094311525e-06,
      "loss": 0.0591,
      "step": 180520
    },
    {
      "epoch": 0.29545766972368964,
      "grad_norm": 2.4402942657470703,
      "learning_rate": 9.471083202098009e-06,
      "loss": 0.0483,
      "step": 180540
    },
    {
      "epoch": 0.29549040016234296,
      "grad_norm": 2.0413601398468018,
      "learning_rate": 9.471017309884491e-06,
      "loss": 0.0573,
      "step": 180560
    },
    {
      "epoch": 0.29552313060099633,
      "grad_norm": 1.641897439956665,
      "learning_rate": 9.470951417670974e-06,
      "loss": 0.0504,
      "step": 180580
    },
    {
      "epoch": 0.29555586103964965,
      "grad_norm": 1.1160963773727417,
      "learning_rate": 9.470885525457456e-06,
      "loss": 0.06,
      "step": 180600
    },
    {
      "epoch": 0.295588591478303,
      "grad_norm": 0.6687489151954651,
      "learning_rate": 9.47081963324394e-06,
      "loss": 0.0777,
      "step": 180620
    },
    {
      "epoch": 0.29562132191695634,
      "grad_norm": 0.6838686466217041,
      "learning_rate": 9.470753741030424e-06,
      "loss": 0.0478,
      "step": 180640
    },
    {
      "epoch": 0.29565405235560965,
      "grad_norm": 3.023923397064209,
      "learning_rate": 9.470687848816905e-06,
      "loss": 0.0542,
      "step": 180660
    },
    {
      "epoch": 0.295686782794263,
      "grad_norm": 0.7600545287132263,
      "learning_rate": 9.470621956603389e-06,
      "loss": 0.0535,
      "step": 180680
    },
    {
      "epoch": 0.29571951323291634,
      "grad_norm": 1.5737420320510864,
      "learning_rate": 9.470556064389873e-06,
      "loss": 0.0529,
      "step": 180700
    },
    {
      "epoch": 0.29575224367156966,
      "grad_norm": 2.430706262588501,
      "learning_rate": 9.470490172176355e-06,
      "loss": 0.0423,
      "step": 180720
    },
    {
      "epoch": 0.29578497411022303,
      "grad_norm": 0.9379483461380005,
      "learning_rate": 9.470424279962838e-06,
      "loss": 0.0386,
      "step": 180740
    },
    {
      "epoch": 0.29581770454887635,
      "grad_norm": 8.05562686920166,
      "learning_rate": 9.47035838774932e-06,
      "loss": 0.0715,
      "step": 180760
    },
    {
      "epoch": 0.2958504349875297,
      "grad_norm": 2.448397397994995,
      "learning_rate": 9.470292495535804e-06,
      "loss": 0.0574,
      "step": 180780
    },
    {
      "epoch": 0.29588316542618304,
      "grad_norm": 3.276705503463745,
      "learning_rate": 9.470226603322287e-06,
      "loss": 0.0525,
      "step": 180800
    },
    {
      "epoch": 0.29591589586483635,
      "grad_norm": 2.9580368995666504,
      "learning_rate": 9.470160711108769e-06,
      "loss": 0.0545,
      "step": 180820
    },
    {
      "epoch": 0.2959486263034897,
      "grad_norm": 2.9455630779266357,
      "learning_rate": 9.470094818895253e-06,
      "loss": 0.063,
      "step": 180840
    },
    {
      "epoch": 0.29598135674214304,
      "grad_norm": 1.430104374885559,
      "learning_rate": 9.470028926681735e-06,
      "loss": 0.0459,
      "step": 180860
    },
    {
      "epoch": 0.2960140871807964,
      "grad_norm": 4.545012950897217,
      "learning_rate": 9.469963034468218e-06,
      "loss": 0.0617,
      "step": 180880
    },
    {
      "epoch": 0.29604681761944973,
      "grad_norm": 2.1577508449554443,
      "learning_rate": 9.4698971422547e-06,
      "loss": 0.0644,
      "step": 180900
    },
    {
      "epoch": 0.29607954805810305,
      "grad_norm": 1.358586311340332,
      "learning_rate": 9.469831250041184e-06,
      "loss": 0.0536,
      "step": 180920
    },
    {
      "epoch": 0.2961122784967564,
      "grad_norm": 1.8177236318588257,
      "learning_rate": 9.469765357827665e-06,
      "loss": 0.0448,
      "step": 180940
    },
    {
      "epoch": 0.29614500893540974,
      "grad_norm": 3.683720111846924,
      "learning_rate": 9.469699465614149e-06,
      "loss": 0.0616,
      "step": 180960
    },
    {
      "epoch": 0.2961777393740631,
      "grad_norm": 1.4501700401306152,
      "learning_rate": 9.469633573400631e-06,
      "loss": 0.0473,
      "step": 180980
    },
    {
      "epoch": 0.2962104698127164,
      "grad_norm": 2.542675256729126,
      "learning_rate": 9.469567681187115e-06,
      "loss": 0.0473,
      "step": 181000
    },
    {
      "epoch": 0.29624320025136974,
      "grad_norm": 2.6801114082336426,
      "learning_rate": 9.469501788973598e-06,
      "loss": 0.0619,
      "step": 181020
    },
    {
      "epoch": 0.2962759306900231,
      "grad_norm": 2.4784159660339355,
      "learning_rate": 9.46943589676008e-06,
      "loss": 0.0644,
      "step": 181040
    },
    {
      "epoch": 0.29630866112867643,
      "grad_norm": 1.9196110963821411,
      "learning_rate": 9.469370004546564e-06,
      "loss": 0.0484,
      "step": 181060
    },
    {
      "epoch": 0.2963413915673298,
      "grad_norm": 1.2735586166381836,
      "learning_rate": 9.469304112333047e-06,
      "loss": 0.0549,
      "step": 181080
    },
    {
      "epoch": 0.2963741220059831,
      "grad_norm": 2.055605411529541,
      "learning_rate": 9.469238220119529e-06,
      "loss": 0.0458,
      "step": 181100
    },
    {
      "epoch": 0.29640685244463644,
      "grad_norm": 1.7393133640289307,
      "learning_rate": 9.469172327906013e-06,
      "loss": 0.0456,
      "step": 181120
    },
    {
      "epoch": 0.2964395828832898,
      "grad_norm": 1.1797938346862793,
      "learning_rate": 9.469106435692496e-06,
      "loss": 0.0483,
      "step": 181140
    },
    {
      "epoch": 0.2964723133219431,
      "grad_norm": 1.3305583000183105,
      "learning_rate": 9.469040543478978e-06,
      "loss": 0.0559,
      "step": 181160
    },
    {
      "epoch": 0.2965050437605965,
      "grad_norm": 2.996080160140991,
      "learning_rate": 9.468974651265462e-06,
      "loss": 0.0494,
      "step": 181180
    },
    {
      "epoch": 0.2965377741992498,
      "grad_norm": 1.0784475803375244,
      "learning_rate": 9.468908759051944e-06,
      "loss": 0.0512,
      "step": 181200
    },
    {
      "epoch": 0.29657050463790313,
      "grad_norm": 0.9500412344932556,
      "learning_rate": 9.468842866838427e-06,
      "loss": 0.0478,
      "step": 181220
    },
    {
      "epoch": 0.2966032350765565,
      "grad_norm": 2.142160654067993,
      "learning_rate": 9.468776974624909e-06,
      "loss": 0.07,
      "step": 181240
    },
    {
      "epoch": 0.2966359655152098,
      "grad_norm": 3.937021493911743,
      "learning_rate": 9.468711082411393e-06,
      "loss": 0.0637,
      "step": 181260
    },
    {
      "epoch": 0.2966686959538632,
      "grad_norm": 1.301302433013916,
      "learning_rate": 9.468645190197875e-06,
      "loss": 0.0486,
      "step": 181280
    },
    {
      "epoch": 0.2967014263925165,
      "grad_norm": 1.1531381607055664,
      "learning_rate": 9.468579297984358e-06,
      "loss": 0.0582,
      "step": 181300
    },
    {
      "epoch": 0.2967341568311698,
      "grad_norm": 2.0615460872650146,
      "learning_rate": 9.46851340577084e-06,
      "loss": 0.0572,
      "step": 181320
    },
    {
      "epoch": 0.2967668872698232,
      "grad_norm": 2.2000718116760254,
      "learning_rate": 9.468447513557324e-06,
      "loss": 0.0549,
      "step": 181340
    },
    {
      "epoch": 0.2967996177084765,
      "grad_norm": 2.8212625980377197,
      "learning_rate": 9.468381621343806e-06,
      "loss": 0.0484,
      "step": 181360
    },
    {
      "epoch": 0.2968323481471299,
      "grad_norm": 4.2846198081970215,
      "learning_rate": 9.46831572913029e-06,
      "loss": 0.0513,
      "step": 181380
    },
    {
      "epoch": 0.2968650785857832,
      "grad_norm": 2.0589823722839355,
      "learning_rate": 9.468249836916773e-06,
      "loss": 0.0777,
      "step": 181400
    },
    {
      "epoch": 0.2968978090244365,
      "grad_norm": 1.7267866134643555,
      "learning_rate": 9.468183944703255e-06,
      "loss": 0.0603,
      "step": 181420
    },
    {
      "epoch": 0.2969305394630899,
      "grad_norm": 1.734320878982544,
      "learning_rate": 9.468118052489738e-06,
      "loss": 0.061,
      "step": 181440
    },
    {
      "epoch": 0.2969632699017432,
      "grad_norm": 0.43711063265800476,
      "learning_rate": 9.468052160276222e-06,
      "loss": 0.0508,
      "step": 181460
    },
    {
      "epoch": 0.2969960003403966,
      "grad_norm": 1.7242168188095093,
      "learning_rate": 9.467986268062704e-06,
      "loss": 0.0462,
      "step": 181480
    },
    {
      "epoch": 0.2970287307790499,
      "grad_norm": 3.210496664047241,
      "learning_rate": 9.467920375849187e-06,
      "loss": 0.0516,
      "step": 181500
    },
    {
      "epoch": 0.2970614612177032,
      "grad_norm": 1.1178572177886963,
      "learning_rate": 9.467854483635671e-06,
      "loss": 0.0541,
      "step": 181520
    },
    {
      "epoch": 0.2970941916563566,
      "grad_norm": 1.3444725275039673,
      "learning_rate": 9.467788591422153e-06,
      "loss": 0.0552,
      "step": 181540
    },
    {
      "epoch": 0.2971269220950099,
      "grad_norm": 1.4580577611923218,
      "learning_rate": 9.467722699208636e-06,
      "loss": 0.0633,
      "step": 181560
    },
    {
      "epoch": 0.2971596525336633,
      "grad_norm": 2.3352553844451904,
      "learning_rate": 9.467656806995118e-06,
      "loss": 0.0515,
      "step": 181580
    },
    {
      "epoch": 0.2971923829723166,
      "grad_norm": 2.1618404388427734,
      "learning_rate": 9.467590914781602e-06,
      "loss": 0.0467,
      "step": 181600
    },
    {
      "epoch": 0.2972251134109699,
      "grad_norm": 1.3307257890701294,
      "learning_rate": 9.467525022568084e-06,
      "loss": 0.0615,
      "step": 181620
    },
    {
      "epoch": 0.2972578438496233,
      "grad_norm": 1.3222804069519043,
      "learning_rate": 9.467459130354567e-06,
      "loss": 0.0576,
      "step": 181640
    },
    {
      "epoch": 0.2972905742882766,
      "grad_norm": 1.1323038339614868,
      "learning_rate": 9.46739323814105e-06,
      "loss": 0.0524,
      "step": 181660
    },
    {
      "epoch": 0.29732330472692997,
      "grad_norm": 6.791013240814209,
      "learning_rate": 9.467327345927533e-06,
      "loss": 0.0623,
      "step": 181680
    },
    {
      "epoch": 0.2973560351655833,
      "grad_norm": 2.4813623428344727,
      "learning_rate": 9.467261453714015e-06,
      "loss": 0.0373,
      "step": 181700
    },
    {
      "epoch": 0.2973887656042366,
      "grad_norm": 2.065708637237549,
      "learning_rate": 9.467195561500498e-06,
      "loss": 0.0363,
      "step": 181720
    },
    {
      "epoch": 0.29742149604289,
      "grad_norm": 2.8745203018188477,
      "learning_rate": 9.46712966928698e-06,
      "loss": 0.0517,
      "step": 181740
    },
    {
      "epoch": 0.2974542264815433,
      "grad_norm": 1.339646577835083,
      "learning_rate": 9.467063777073464e-06,
      "loss": 0.0702,
      "step": 181760
    },
    {
      "epoch": 0.29748695692019667,
      "grad_norm": 4.966314315795898,
      "learning_rate": 9.466997884859946e-06,
      "loss": 0.0557,
      "step": 181780
    },
    {
      "epoch": 0.29751968735885,
      "grad_norm": 3.8673338890075684,
      "learning_rate": 9.46693199264643e-06,
      "loss": 0.0585,
      "step": 181800
    },
    {
      "epoch": 0.2975524177975033,
      "grad_norm": 2.1966164112091064,
      "learning_rate": 9.466866100432913e-06,
      "loss": 0.0333,
      "step": 181820
    },
    {
      "epoch": 0.29758514823615667,
      "grad_norm": 0.5017494559288025,
      "learning_rate": 9.466800208219395e-06,
      "loss": 0.0615,
      "step": 181840
    },
    {
      "epoch": 0.29761787867481,
      "grad_norm": 5.890752792358398,
      "learning_rate": 9.466734316005878e-06,
      "loss": 0.0534,
      "step": 181860
    },
    {
      "epoch": 0.29765060911346336,
      "grad_norm": 2.8162240982055664,
      "learning_rate": 9.466668423792362e-06,
      "loss": 0.0417,
      "step": 181880
    },
    {
      "epoch": 0.2976833395521167,
      "grad_norm": 3.329777240753174,
      "learning_rate": 9.466602531578844e-06,
      "loss": 0.038,
      "step": 181900
    },
    {
      "epoch": 0.29771606999077,
      "grad_norm": 0.8427661061286926,
      "learning_rate": 9.466536639365327e-06,
      "loss": 0.0476,
      "step": 181920
    },
    {
      "epoch": 0.29774880042942337,
      "grad_norm": 2.3732481002807617,
      "learning_rate": 9.466470747151811e-06,
      "loss": 0.0414,
      "step": 181940
    },
    {
      "epoch": 0.2977815308680767,
      "grad_norm": 3.2847540378570557,
      "learning_rate": 9.466404854938293e-06,
      "loss": 0.0417,
      "step": 181960
    },
    {
      "epoch": 0.29781426130673005,
      "grad_norm": 1.2679574489593506,
      "learning_rate": 9.466338962724776e-06,
      "loss": 0.0493,
      "step": 181980
    },
    {
      "epoch": 0.29784699174538337,
      "grad_norm": 0.9748407602310181,
      "learning_rate": 9.466273070511258e-06,
      "loss": 0.0495,
      "step": 182000
    },
    {
      "epoch": 0.2978797221840367,
      "grad_norm": 1.9053168296813965,
      "learning_rate": 9.466207178297742e-06,
      "loss": 0.0438,
      "step": 182020
    },
    {
      "epoch": 0.29791245262269006,
      "grad_norm": 0.9773237109184265,
      "learning_rate": 9.466141286084224e-06,
      "loss": 0.0485,
      "step": 182040
    },
    {
      "epoch": 0.2979451830613434,
      "grad_norm": 2.199575662612915,
      "learning_rate": 9.466075393870707e-06,
      "loss": 0.055,
      "step": 182060
    },
    {
      "epoch": 0.29797791349999675,
      "grad_norm": 3.7994167804718018,
      "learning_rate": 9.46600950165719e-06,
      "loss": 0.0506,
      "step": 182080
    },
    {
      "epoch": 0.29801064393865007,
      "grad_norm": 0.5223568081855774,
      "learning_rate": 9.465943609443673e-06,
      "loss": 0.0362,
      "step": 182100
    },
    {
      "epoch": 0.2980433743773034,
      "grad_norm": 2.9805941581726074,
      "learning_rate": 9.465877717230155e-06,
      "loss": 0.0518,
      "step": 182120
    },
    {
      "epoch": 0.29807610481595676,
      "grad_norm": 1.4303537607192993,
      "learning_rate": 9.465811825016638e-06,
      "loss": 0.0724,
      "step": 182140
    },
    {
      "epoch": 0.29810883525461007,
      "grad_norm": 1.3813267946243286,
      "learning_rate": 9.46574593280312e-06,
      "loss": 0.061,
      "step": 182160
    },
    {
      "epoch": 0.29814156569326344,
      "grad_norm": 1.2890125513076782,
      "learning_rate": 9.465680040589604e-06,
      "loss": 0.0536,
      "step": 182180
    },
    {
      "epoch": 0.29817429613191676,
      "grad_norm": 1.2032345533370972,
      "learning_rate": 9.465614148376087e-06,
      "loss": 0.0518,
      "step": 182200
    },
    {
      "epoch": 0.2982070265705701,
      "grad_norm": 1.9318749904632568,
      "learning_rate": 9.46554825616257e-06,
      "loss": 0.0563,
      "step": 182220
    },
    {
      "epoch": 0.29823975700922345,
      "grad_norm": 0.5569402575492859,
      "learning_rate": 9.465482363949053e-06,
      "loss": 0.0484,
      "step": 182240
    },
    {
      "epoch": 0.29827248744787677,
      "grad_norm": 1.9752486944198608,
      "learning_rate": 9.465416471735536e-06,
      "loss": 0.0439,
      "step": 182260
    },
    {
      "epoch": 0.29830521788653014,
      "grad_norm": 3.9524242877960205,
      "learning_rate": 9.465350579522018e-06,
      "loss": 0.0512,
      "step": 182280
    },
    {
      "epoch": 0.29833794832518346,
      "grad_norm": 2.4015755653381348,
      "learning_rate": 9.465284687308502e-06,
      "loss": 0.0498,
      "step": 182300
    },
    {
      "epoch": 0.29837067876383677,
      "grad_norm": 1.5760647058486938,
      "learning_rate": 9.465218795094986e-06,
      "loss": 0.0596,
      "step": 182320
    },
    {
      "epoch": 0.29840340920249014,
      "grad_norm": 1.9357643127441406,
      "learning_rate": 9.465152902881467e-06,
      "loss": 0.0673,
      "step": 182340
    },
    {
      "epoch": 0.29843613964114346,
      "grad_norm": 1.296636939048767,
      "learning_rate": 9.465087010667951e-06,
      "loss": 0.0388,
      "step": 182360
    },
    {
      "epoch": 0.29846887007979683,
      "grad_norm": 2.1664819717407227,
      "learning_rate": 9.465021118454433e-06,
      "loss": 0.0531,
      "step": 182380
    },
    {
      "epoch": 0.29850160051845015,
      "grad_norm": 1.6981720924377441,
      "learning_rate": 9.464955226240917e-06,
      "loss": 0.0624,
      "step": 182400
    },
    {
      "epoch": 0.29853433095710347,
      "grad_norm": 2.289616107940674,
      "learning_rate": 9.464889334027398e-06,
      "loss": 0.0628,
      "step": 182420
    },
    {
      "epoch": 0.29856706139575684,
      "grad_norm": 1.1495498418807983,
      "learning_rate": 9.464823441813882e-06,
      "loss": 0.0638,
      "step": 182440
    },
    {
      "epoch": 0.29859979183441016,
      "grad_norm": 0.8426066637039185,
      "learning_rate": 9.464757549600364e-06,
      "loss": 0.05,
      "step": 182460
    },
    {
      "epoch": 0.2986325222730635,
      "grad_norm": 3.397470474243164,
      "learning_rate": 9.464691657386847e-06,
      "loss": 0.0617,
      "step": 182480
    },
    {
      "epoch": 0.29866525271171684,
      "grad_norm": 3.96890926361084,
      "learning_rate": 9.46462576517333e-06,
      "loss": 0.0586,
      "step": 182500
    },
    {
      "epoch": 0.29869798315037016,
      "grad_norm": 3.048203229904175,
      "learning_rate": 9.464559872959813e-06,
      "loss": 0.0605,
      "step": 182520
    },
    {
      "epoch": 0.29873071358902353,
      "grad_norm": 1.739553451538086,
      "learning_rate": 9.464493980746295e-06,
      "loss": 0.0613,
      "step": 182540
    },
    {
      "epoch": 0.29876344402767685,
      "grad_norm": 2.7222812175750732,
      "learning_rate": 9.464428088532778e-06,
      "loss": 0.0642,
      "step": 182560
    },
    {
      "epoch": 0.2987961744663302,
      "grad_norm": 5.799447536468506,
      "learning_rate": 9.464362196319262e-06,
      "loss": 0.057,
      "step": 182580
    },
    {
      "epoch": 0.29882890490498354,
      "grad_norm": 1.7865709066390991,
      "learning_rate": 9.464296304105744e-06,
      "loss": 0.0467,
      "step": 182600
    },
    {
      "epoch": 0.29886163534363686,
      "grad_norm": 2.0049004554748535,
      "learning_rate": 9.464230411892227e-06,
      "loss": 0.0656,
      "step": 182620
    },
    {
      "epoch": 0.29889436578229023,
      "grad_norm": 1.1269673109054565,
      "learning_rate": 9.46416451967871e-06,
      "loss": 0.055,
      "step": 182640
    },
    {
      "epoch": 0.29892709622094354,
      "grad_norm": 1.0557098388671875,
      "learning_rate": 9.464098627465193e-06,
      "loss": 0.0582,
      "step": 182660
    },
    {
      "epoch": 0.2989598266595969,
      "grad_norm": 2.1503286361694336,
      "learning_rate": 9.464032735251677e-06,
      "loss": 0.0605,
      "step": 182680
    },
    {
      "epoch": 0.29899255709825023,
      "grad_norm": 4.417215347290039,
      "learning_rate": 9.463966843038158e-06,
      "loss": 0.0598,
      "step": 182700
    },
    {
      "epoch": 0.29902528753690355,
      "grad_norm": 1.3241770267486572,
      "learning_rate": 9.463900950824642e-06,
      "loss": 0.0596,
      "step": 182720
    },
    {
      "epoch": 0.2990580179755569,
      "grad_norm": 1.4471485614776611,
      "learning_rate": 9.463835058611126e-06,
      "loss": 0.0444,
      "step": 182740
    },
    {
      "epoch": 0.29909074841421024,
      "grad_norm": 2.46095609664917,
      "learning_rate": 9.463769166397608e-06,
      "loss": 0.0636,
      "step": 182760
    },
    {
      "epoch": 0.2991234788528636,
      "grad_norm": 2.383927583694458,
      "learning_rate": 9.463703274184091e-06,
      "loss": 0.0406,
      "step": 182780
    },
    {
      "epoch": 0.29915620929151693,
      "grad_norm": 1.2030894756317139,
      "learning_rate": 9.463637381970573e-06,
      "loss": 0.0472,
      "step": 182800
    },
    {
      "epoch": 0.29918893973017024,
      "grad_norm": 2.466603994369507,
      "learning_rate": 9.463571489757057e-06,
      "loss": 0.0614,
      "step": 182820
    },
    {
      "epoch": 0.2992216701688236,
      "grad_norm": 1.4319833517074585,
      "learning_rate": 9.463505597543538e-06,
      "loss": 0.0589,
      "step": 182840
    },
    {
      "epoch": 0.29925440060747693,
      "grad_norm": 2.6615805625915527,
      "learning_rate": 9.463439705330022e-06,
      "loss": 0.0417,
      "step": 182860
    },
    {
      "epoch": 0.2992871310461303,
      "grad_norm": 16.206796646118164,
      "learning_rate": 9.463373813116504e-06,
      "loss": 0.0559,
      "step": 182880
    },
    {
      "epoch": 0.2993198614847836,
      "grad_norm": 2.0574159622192383,
      "learning_rate": 9.463307920902988e-06,
      "loss": 0.0501,
      "step": 182900
    },
    {
      "epoch": 0.29935259192343694,
      "grad_norm": 0.42422693967819214,
      "learning_rate": 9.463242028689471e-06,
      "loss": 0.0496,
      "step": 182920
    },
    {
      "epoch": 0.2993853223620903,
      "grad_norm": 1.0412037372589111,
      "learning_rate": 9.463176136475953e-06,
      "loss": 0.072,
      "step": 182940
    },
    {
      "epoch": 0.29941805280074363,
      "grad_norm": 4.894329071044922,
      "learning_rate": 9.463110244262437e-06,
      "loss": 0.0525,
      "step": 182960
    },
    {
      "epoch": 0.299450783239397,
      "grad_norm": 5.701852321624756,
      "learning_rate": 9.463044352048919e-06,
      "loss": 0.0936,
      "step": 182980
    },
    {
      "epoch": 0.2994835136780503,
      "grad_norm": 3.3944010734558105,
      "learning_rate": 9.462978459835402e-06,
      "loss": 0.0508,
      "step": 183000
    },
    {
      "epoch": 0.29951624411670363,
      "grad_norm": 1.4367157220840454,
      "learning_rate": 9.462912567621884e-06,
      "loss": 0.0391,
      "step": 183020
    },
    {
      "epoch": 0.299548974555357,
      "grad_norm": 1.53859543800354,
      "learning_rate": 9.462846675408368e-06,
      "loss": 0.0458,
      "step": 183040
    },
    {
      "epoch": 0.2995817049940103,
      "grad_norm": 2.5583832263946533,
      "learning_rate": 9.462780783194851e-06,
      "loss": 0.0521,
      "step": 183060
    },
    {
      "epoch": 0.2996144354326637,
      "grad_norm": 1.237385630607605,
      "learning_rate": 9.462714890981333e-06,
      "loss": 0.0445,
      "step": 183080
    },
    {
      "epoch": 0.299647165871317,
      "grad_norm": 2.450852870941162,
      "learning_rate": 9.462648998767817e-06,
      "loss": 0.0527,
      "step": 183100
    },
    {
      "epoch": 0.29967989630997033,
      "grad_norm": 0.38436275720596313,
      "learning_rate": 9.4625831065543e-06,
      "loss": 0.0498,
      "step": 183120
    },
    {
      "epoch": 0.2997126267486237,
      "grad_norm": 2.142045497894287,
      "learning_rate": 9.462517214340782e-06,
      "loss": 0.0654,
      "step": 183140
    },
    {
      "epoch": 0.299745357187277,
      "grad_norm": 1.1231060028076172,
      "learning_rate": 9.462451322127266e-06,
      "loss": 0.0481,
      "step": 183160
    },
    {
      "epoch": 0.2997780876259304,
      "grad_norm": 1.444814920425415,
      "learning_rate": 9.462385429913748e-06,
      "loss": 0.0521,
      "step": 183180
    },
    {
      "epoch": 0.2998108180645837,
      "grad_norm": 2.4031848907470703,
      "learning_rate": 9.462319537700231e-06,
      "loss": 0.0487,
      "step": 183200
    },
    {
      "epoch": 0.299843548503237,
      "grad_norm": 3.9291532039642334,
      "learning_rate": 9.462253645486713e-06,
      "loss": 0.0401,
      "step": 183220
    },
    {
      "epoch": 0.2998762789418904,
      "grad_norm": 1.818336844444275,
      "learning_rate": 9.462187753273197e-06,
      "loss": 0.0575,
      "step": 183240
    },
    {
      "epoch": 0.2999090093805437,
      "grad_norm": 4.743800640106201,
      "learning_rate": 9.46212186105968e-06,
      "loss": 0.0591,
      "step": 183260
    },
    {
      "epoch": 0.2999417398191971,
      "grad_norm": 0.8394067883491516,
      "learning_rate": 9.462055968846162e-06,
      "loss": 0.0767,
      "step": 183280
    },
    {
      "epoch": 0.2999744702578504,
      "grad_norm": 1.7052792310714722,
      "learning_rate": 9.461990076632646e-06,
      "loss": 0.0543,
      "step": 183300
    },
    {
      "epoch": 0.3000072006965037,
      "grad_norm": 1.3697408437728882,
      "learning_rate": 9.461924184419128e-06,
      "loss": 0.0425,
      "step": 183320
    },
    {
      "epoch": 0.3000399311351571,
      "grad_norm": 0.8476281762123108,
      "learning_rate": 9.461858292205611e-06,
      "loss": 0.0449,
      "step": 183340
    },
    {
      "epoch": 0.3000726615738104,
      "grad_norm": 1.5758363008499146,
      "learning_rate": 9.461792399992093e-06,
      "loss": 0.0421,
      "step": 183360
    },
    {
      "epoch": 0.3001053920124638,
      "grad_norm": 1.7167491912841797,
      "learning_rate": 9.461726507778577e-06,
      "loss": 0.0487,
      "step": 183380
    },
    {
      "epoch": 0.3001381224511171,
      "grad_norm": 1.0779494047164917,
      "learning_rate": 9.461660615565059e-06,
      "loss": 0.0549,
      "step": 183400
    },
    {
      "epoch": 0.3001708528897704,
      "grad_norm": 4.532730579376221,
      "learning_rate": 9.461594723351542e-06,
      "loss": 0.0428,
      "step": 183420
    },
    {
      "epoch": 0.3002035833284238,
      "grad_norm": 0.3648788332939148,
      "learning_rate": 9.461528831138024e-06,
      "loss": 0.0477,
      "step": 183440
    },
    {
      "epoch": 0.3002363137670771,
      "grad_norm": 2.0041418075561523,
      "learning_rate": 9.461462938924508e-06,
      "loss": 0.0405,
      "step": 183460
    },
    {
      "epoch": 0.3002690442057304,
      "grad_norm": 2.9075653553009033,
      "learning_rate": 9.461397046710991e-06,
      "loss": 0.0559,
      "step": 183480
    },
    {
      "epoch": 0.3003017746443838,
      "grad_norm": 1.3125221729278564,
      "learning_rate": 9.461331154497473e-06,
      "loss": 0.0574,
      "step": 183500
    },
    {
      "epoch": 0.3003345050830371,
      "grad_norm": 0.35407590866088867,
      "learning_rate": 9.461265262283957e-06,
      "loss": 0.0518,
      "step": 183520
    },
    {
      "epoch": 0.3003672355216905,
      "grad_norm": 2.3843002319335938,
      "learning_rate": 9.46119937007044e-06,
      "loss": 0.0566,
      "step": 183540
    },
    {
      "epoch": 0.3003999659603438,
      "grad_norm": 0.6704079508781433,
      "learning_rate": 9.461133477856922e-06,
      "loss": 0.0476,
      "step": 183560
    },
    {
      "epoch": 0.3004326963989971,
      "grad_norm": 3.0846352577209473,
      "learning_rate": 9.461067585643406e-06,
      "loss": 0.0419,
      "step": 183580
    },
    {
      "epoch": 0.3004654268376505,
      "grad_norm": 1.9666688442230225,
      "learning_rate": 9.461001693429888e-06,
      "loss": 0.0419,
      "step": 183600
    },
    {
      "epoch": 0.3004981572763038,
      "grad_norm": 1.7525701522827148,
      "learning_rate": 9.460935801216371e-06,
      "loss": 0.0572,
      "step": 183620
    },
    {
      "epoch": 0.3005308877149572,
      "grad_norm": 5.774857521057129,
      "learning_rate": 9.460869909002855e-06,
      "loss": 0.0583,
      "step": 183640
    },
    {
      "epoch": 0.3005636181536105,
      "grad_norm": 1.5871937274932861,
      "learning_rate": 9.460804016789337e-06,
      "loss": 0.0587,
      "step": 183660
    },
    {
      "epoch": 0.3005963485922638,
      "grad_norm": 0.6784637570381165,
      "learning_rate": 9.46073812457582e-06,
      "loss": 0.0437,
      "step": 183680
    },
    {
      "epoch": 0.3006290790309172,
      "grad_norm": 1.6947261095046997,
      "learning_rate": 9.460672232362302e-06,
      "loss": 0.0607,
      "step": 183700
    },
    {
      "epoch": 0.3006618094695705,
      "grad_norm": 5.42533016204834,
      "learning_rate": 9.460606340148786e-06,
      "loss": 0.0645,
      "step": 183720
    },
    {
      "epoch": 0.30069453990822387,
      "grad_norm": 2.914689064025879,
      "learning_rate": 9.460540447935268e-06,
      "loss": 0.0367,
      "step": 183740
    },
    {
      "epoch": 0.3007272703468772,
      "grad_norm": 2.933238983154297,
      "learning_rate": 9.460474555721751e-06,
      "loss": 0.0551,
      "step": 183760
    },
    {
      "epoch": 0.3007600007855305,
      "grad_norm": 0.8588159084320068,
      "learning_rate": 9.460408663508233e-06,
      "loss": 0.0589,
      "step": 183780
    },
    {
      "epoch": 0.3007927312241839,
      "grad_norm": 1.7417123317718506,
      "learning_rate": 9.460342771294717e-06,
      "loss": 0.0635,
      "step": 183800
    },
    {
      "epoch": 0.3008254616628372,
      "grad_norm": 2.274308204650879,
      "learning_rate": 9.460276879081199e-06,
      "loss": 0.0587,
      "step": 183820
    },
    {
      "epoch": 0.30085819210149056,
      "grad_norm": 6.393206596374512,
      "learning_rate": 9.460210986867682e-06,
      "loss": 0.0595,
      "step": 183840
    },
    {
      "epoch": 0.3008909225401439,
      "grad_norm": 1.6045376062393188,
      "learning_rate": 9.460145094654166e-06,
      "loss": 0.0612,
      "step": 183860
    },
    {
      "epoch": 0.3009236529787972,
      "grad_norm": 3.043513774871826,
      "learning_rate": 9.460079202440648e-06,
      "loss": 0.073,
      "step": 183880
    },
    {
      "epoch": 0.30095638341745057,
      "grad_norm": 1.196999192237854,
      "learning_rate": 9.460013310227131e-06,
      "loss": 0.0481,
      "step": 183900
    },
    {
      "epoch": 0.3009891138561039,
      "grad_norm": 2.9670536518096924,
      "learning_rate": 9.459947418013615e-06,
      "loss": 0.0532,
      "step": 183920
    },
    {
      "epoch": 0.30102184429475726,
      "grad_norm": 2.0660643577575684,
      "learning_rate": 9.459881525800097e-06,
      "loss": 0.0441,
      "step": 183940
    },
    {
      "epoch": 0.3010545747334106,
      "grad_norm": 2.494307041168213,
      "learning_rate": 9.45981563358658e-06,
      "loss": 0.0559,
      "step": 183960
    },
    {
      "epoch": 0.3010873051720639,
      "grad_norm": 2.131413459777832,
      "learning_rate": 9.459749741373064e-06,
      "loss": 0.0522,
      "step": 183980
    },
    {
      "epoch": 0.30112003561071726,
      "grad_norm": 1.700393557548523,
      "learning_rate": 9.459683849159546e-06,
      "loss": 0.0498,
      "step": 184000
    },
    {
      "epoch": 0.3011527660493706,
      "grad_norm": 2.642045736312866,
      "learning_rate": 9.45961795694603e-06,
      "loss": 0.0633,
      "step": 184020
    },
    {
      "epoch": 0.30118549648802395,
      "grad_norm": 3.7354366779327393,
      "learning_rate": 9.459552064732511e-06,
      "loss": 0.0482,
      "step": 184040
    },
    {
      "epoch": 0.30121822692667727,
      "grad_norm": 2.2702629566192627,
      "learning_rate": 9.459486172518995e-06,
      "loss": 0.0561,
      "step": 184060
    },
    {
      "epoch": 0.3012509573653306,
      "grad_norm": 1.5463345050811768,
      "learning_rate": 9.459420280305477e-06,
      "loss": 0.0534,
      "step": 184080
    },
    {
      "epoch": 0.30128368780398396,
      "grad_norm": 2.3676846027374268,
      "learning_rate": 9.45935438809196e-06,
      "loss": 0.0505,
      "step": 184100
    },
    {
      "epoch": 0.3013164182426373,
      "grad_norm": 2.08010196685791,
      "learning_rate": 9.459288495878442e-06,
      "loss": 0.0663,
      "step": 184120
    },
    {
      "epoch": 0.30134914868129065,
      "grad_norm": 1.7652480602264404,
      "learning_rate": 9.459222603664926e-06,
      "loss": 0.0441,
      "step": 184140
    },
    {
      "epoch": 0.30138187911994396,
      "grad_norm": 0.955950140953064,
      "learning_rate": 9.459156711451408e-06,
      "loss": 0.0673,
      "step": 184160
    },
    {
      "epoch": 0.3014146095585973,
      "grad_norm": 3.0182406902313232,
      "learning_rate": 9.459090819237891e-06,
      "loss": 0.0508,
      "step": 184180
    },
    {
      "epoch": 0.30144733999725065,
      "grad_norm": 1.3626399040222168,
      "learning_rate": 9.459024927024373e-06,
      "loss": 0.0605,
      "step": 184200
    },
    {
      "epoch": 0.30148007043590397,
      "grad_norm": 0.9116946458816528,
      "learning_rate": 9.458959034810857e-06,
      "loss": 0.0427,
      "step": 184220
    },
    {
      "epoch": 0.30151280087455734,
      "grad_norm": 1.446102261543274,
      "learning_rate": 9.45889314259734e-06,
      "loss": 0.0455,
      "step": 184240
    },
    {
      "epoch": 0.30154553131321066,
      "grad_norm": 1.6557977199554443,
      "learning_rate": 9.458827250383822e-06,
      "loss": 0.0789,
      "step": 184260
    },
    {
      "epoch": 0.301578261751864,
      "grad_norm": 0.9854177832603455,
      "learning_rate": 9.458761358170306e-06,
      "loss": 0.0505,
      "step": 184280
    },
    {
      "epoch": 0.30161099219051735,
      "grad_norm": 1.2001105546951294,
      "learning_rate": 9.45869546595679e-06,
      "loss": 0.0505,
      "step": 184300
    },
    {
      "epoch": 0.30164372262917066,
      "grad_norm": 1.9454997777938843,
      "learning_rate": 9.458629573743271e-06,
      "loss": 0.0537,
      "step": 184320
    },
    {
      "epoch": 0.30167645306782404,
      "grad_norm": 1.1099621057510376,
      "learning_rate": 9.458563681529755e-06,
      "loss": 0.0501,
      "step": 184340
    },
    {
      "epoch": 0.30170918350647735,
      "grad_norm": 1.5288348197937012,
      "learning_rate": 9.458497789316239e-06,
      "loss": 0.0378,
      "step": 184360
    },
    {
      "epoch": 0.30174191394513067,
      "grad_norm": 0.7475644946098328,
      "learning_rate": 9.45843189710272e-06,
      "loss": 0.041,
      "step": 184380
    },
    {
      "epoch": 0.30177464438378404,
      "grad_norm": 1.5831608772277832,
      "learning_rate": 9.458366004889204e-06,
      "loss": 0.0486,
      "step": 184400
    },
    {
      "epoch": 0.30180737482243736,
      "grad_norm": 1.2677010297775269,
      "learning_rate": 9.458300112675686e-06,
      "loss": 0.0598,
      "step": 184420
    },
    {
      "epoch": 0.30184010526109073,
      "grad_norm": 1.1890736818313599,
      "learning_rate": 9.45823422046217e-06,
      "loss": 0.0766,
      "step": 184440
    },
    {
      "epoch": 0.30187283569974405,
      "grad_norm": 1.8537921905517578,
      "learning_rate": 9.458168328248651e-06,
      "loss": 0.0618,
      "step": 184460
    },
    {
      "epoch": 0.30190556613839736,
      "grad_norm": 0.8825829029083252,
      "learning_rate": 9.458102436035135e-06,
      "loss": 0.047,
      "step": 184480
    },
    {
      "epoch": 0.30193829657705074,
      "grad_norm": 1.6876498460769653,
      "learning_rate": 9.458036543821617e-06,
      "loss": 0.0604,
      "step": 184500
    },
    {
      "epoch": 0.30197102701570405,
      "grad_norm": 2.756706476211548,
      "learning_rate": 9.4579706516081e-06,
      "loss": 0.0433,
      "step": 184520
    },
    {
      "epoch": 0.3020037574543574,
      "grad_norm": 1.5558327436447144,
      "learning_rate": 9.457904759394582e-06,
      "loss": 0.0479,
      "step": 184540
    },
    {
      "epoch": 0.30203648789301074,
      "grad_norm": 2.491340160369873,
      "learning_rate": 9.457838867181066e-06,
      "loss": 0.0537,
      "step": 184560
    },
    {
      "epoch": 0.30206921833166406,
      "grad_norm": 1.0782766342163086,
      "learning_rate": 9.457772974967548e-06,
      "loss": 0.0522,
      "step": 184580
    },
    {
      "epoch": 0.30210194877031743,
      "grad_norm": 4.610483646392822,
      "learning_rate": 9.457707082754031e-06,
      "loss": 0.0483,
      "step": 184600
    },
    {
      "epoch": 0.30213467920897075,
      "grad_norm": 3.226468801498413,
      "learning_rate": 9.457641190540513e-06,
      "loss": 0.0529,
      "step": 184620
    },
    {
      "epoch": 0.3021674096476241,
      "grad_norm": 1.6139932870864868,
      "learning_rate": 9.457575298326997e-06,
      "loss": 0.0593,
      "step": 184640
    },
    {
      "epoch": 0.30220014008627744,
      "grad_norm": 2.1670522689819336,
      "learning_rate": 9.45750940611348e-06,
      "loss": 0.0575,
      "step": 184660
    },
    {
      "epoch": 0.30223287052493075,
      "grad_norm": 1.1294052600860596,
      "learning_rate": 9.457443513899962e-06,
      "loss": 0.0396,
      "step": 184680
    },
    {
      "epoch": 0.3022656009635841,
      "grad_norm": 0.40270286798477173,
      "learning_rate": 9.457377621686446e-06,
      "loss": 0.0642,
      "step": 184700
    },
    {
      "epoch": 0.30229833140223744,
      "grad_norm": 1.3808163404464722,
      "learning_rate": 9.45731172947293e-06,
      "loss": 0.057,
      "step": 184720
    },
    {
      "epoch": 0.3023310618408908,
      "grad_norm": 3.586693286895752,
      "learning_rate": 9.457245837259411e-06,
      "loss": 0.0475,
      "step": 184740
    },
    {
      "epoch": 0.30236379227954413,
      "grad_norm": 2.940728187561035,
      "learning_rate": 9.457179945045895e-06,
      "loss": 0.0513,
      "step": 184760
    },
    {
      "epoch": 0.30239652271819745,
      "grad_norm": 1.465196967124939,
      "learning_rate": 9.457114052832379e-06,
      "loss": 0.0587,
      "step": 184780
    },
    {
      "epoch": 0.3024292531568508,
      "grad_norm": 1.2732484340667725,
      "learning_rate": 9.45704816061886e-06,
      "loss": 0.0471,
      "step": 184800
    },
    {
      "epoch": 0.30246198359550414,
      "grad_norm": 1.201802372932434,
      "learning_rate": 9.456982268405344e-06,
      "loss": 0.0508,
      "step": 184820
    },
    {
      "epoch": 0.3024947140341575,
      "grad_norm": 1.2716171741485596,
      "learning_rate": 9.456916376191826e-06,
      "loss": 0.0507,
      "step": 184840
    },
    {
      "epoch": 0.3025274444728108,
      "grad_norm": 2.800645589828491,
      "learning_rate": 9.45685048397831e-06,
      "loss": 0.06,
      "step": 184860
    },
    {
      "epoch": 0.30256017491146414,
      "grad_norm": 0.9216226935386658,
      "learning_rate": 9.456784591764791e-06,
      "loss": 0.0507,
      "step": 184880
    },
    {
      "epoch": 0.3025929053501175,
      "grad_norm": 0.755284309387207,
      "learning_rate": 9.456718699551275e-06,
      "loss": 0.0509,
      "step": 184900
    },
    {
      "epoch": 0.30262563578877083,
      "grad_norm": 0.381491482257843,
      "learning_rate": 9.456652807337757e-06,
      "loss": 0.0307,
      "step": 184920
    },
    {
      "epoch": 0.3026583662274242,
      "grad_norm": 0.4799552857875824,
      "learning_rate": 9.45658691512424e-06,
      "loss": 0.0577,
      "step": 184940
    },
    {
      "epoch": 0.3026910966660775,
      "grad_norm": 2.1588921546936035,
      "learning_rate": 9.456521022910722e-06,
      "loss": 0.0445,
      "step": 184960
    },
    {
      "epoch": 0.30272382710473084,
      "grad_norm": 7.260469913482666,
      "learning_rate": 9.456455130697206e-06,
      "loss": 0.0691,
      "step": 184980
    },
    {
      "epoch": 0.3027565575433842,
      "grad_norm": 1.1285063028335571,
      "learning_rate": 9.456389238483688e-06,
      "loss": 0.0515,
      "step": 185000
    },
    {
      "epoch": 0.3027892879820375,
      "grad_norm": 2.1009340286254883,
      "learning_rate": 9.456323346270172e-06,
      "loss": 0.0445,
      "step": 185020
    },
    {
      "epoch": 0.3028220184206909,
      "grad_norm": 0.36607131361961365,
      "learning_rate": 9.456257454056655e-06,
      "loss": 0.0469,
      "step": 185040
    },
    {
      "epoch": 0.3028547488593442,
      "grad_norm": 1.9630686044692993,
      "learning_rate": 9.456191561843137e-06,
      "loss": 0.0474,
      "step": 185060
    },
    {
      "epoch": 0.30288747929799753,
      "grad_norm": 1.1378458738327026,
      "learning_rate": 9.45612566962962e-06,
      "loss": 0.0445,
      "step": 185080
    },
    {
      "epoch": 0.3029202097366509,
      "grad_norm": 0.6110678911209106,
      "learning_rate": 9.456059777416104e-06,
      "loss": 0.0534,
      "step": 185100
    },
    {
      "epoch": 0.3029529401753042,
      "grad_norm": 1.7833421230316162,
      "learning_rate": 9.455993885202586e-06,
      "loss": 0.0684,
      "step": 185120
    },
    {
      "epoch": 0.3029856706139576,
      "grad_norm": 1.2443197965621948,
      "learning_rate": 9.45592799298907e-06,
      "loss": 0.0664,
      "step": 185140
    },
    {
      "epoch": 0.3030184010526109,
      "grad_norm": 1.7417292594909668,
      "learning_rate": 9.455862100775553e-06,
      "loss": 0.0465,
      "step": 185160
    },
    {
      "epoch": 0.3030511314912642,
      "grad_norm": 0.7070527672767639,
      "learning_rate": 9.455796208562035e-06,
      "loss": 0.0625,
      "step": 185180
    },
    {
      "epoch": 0.3030838619299176,
      "grad_norm": 0.16695989668369293,
      "learning_rate": 9.455730316348519e-06,
      "loss": 0.0442,
      "step": 185200
    },
    {
      "epoch": 0.3031165923685709,
      "grad_norm": 1.345898985862732,
      "learning_rate": 9.455664424135e-06,
      "loss": 0.0604,
      "step": 185220
    },
    {
      "epoch": 0.3031493228072243,
      "grad_norm": 1.2869668006896973,
      "learning_rate": 9.455598531921484e-06,
      "loss": 0.0466,
      "step": 185240
    },
    {
      "epoch": 0.3031820532458776,
      "grad_norm": 2.6034505367279053,
      "learning_rate": 9.455532639707966e-06,
      "loss": 0.0571,
      "step": 185260
    },
    {
      "epoch": 0.3032147836845309,
      "grad_norm": 3.259920120239258,
      "learning_rate": 9.45546674749445e-06,
      "loss": 0.0539,
      "step": 185280
    },
    {
      "epoch": 0.3032475141231843,
      "grad_norm": 4.740086078643799,
      "learning_rate": 9.455400855280932e-06,
      "loss": 0.0582,
      "step": 185300
    },
    {
      "epoch": 0.3032802445618376,
      "grad_norm": 0.5325086116790771,
      "learning_rate": 9.455334963067415e-06,
      "loss": 0.054,
      "step": 185320
    },
    {
      "epoch": 0.303312975000491,
      "grad_norm": 3.111720323562622,
      "learning_rate": 9.455269070853897e-06,
      "loss": 0.0503,
      "step": 185340
    },
    {
      "epoch": 0.3033457054391443,
      "grad_norm": 1.1384235620498657,
      "learning_rate": 9.45520317864038e-06,
      "loss": 0.0657,
      "step": 185360
    },
    {
      "epoch": 0.3033784358777976,
      "grad_norm": 5.506680488586426,
      "learning_rate": 9.455137286426864e-06,
      "loss": 0.0464,
      "step": 185380
    },
    {
      "epoch": 0.303411166316451,
      "grad_norm": 0.8469128608703613,
      "learning_rate": 9.455071394213346e-06,
      "loss": 0.0427,
      "step": 185400
    },
    {
      "epoch": 0.3034438967551043,
      "grad_norm": 0.6829410791397095,
      "learning_rate": 9.45500550199983e-06,
      "loss": 0.0568,
      "step": 185420
    },
    {
      "epoch": 0.3034766271937577,
      "grad_norm": 3.1155598163604736,
      "learning_rate": 9.454939609786312e-06,
      "loss": 0.0603,
      "step": 185440
    },
    {
      "epoch": 0.303509357632411,
      "grad_norm": 1.8504891395568848,
      "learning_rate": 9.454873717572795e-06,
      "loss": 0.0397,
      "step": 185460
    },
    {
      "epoch": 0.3035420880710643,
      "grad_norm": 0.992943525314331,
      "learning_rate": 9.454807825359277e-06,
      "loss": 0.0513,
      "step": 185480
    },
    {
      "epoch": 0.3035748185097177,
      "grad_norm": 1.0205072164535522,
      "learning_rate": 9.45474193314576e-06,
      "loss": 0.0654,
      "step": 185500
    },
    {
      "epoch": 0.303607548948371,
      "grad_norm": 1.9692530632019043,
      "learning_rate": 9.454676040932244e-06,
      "loss": 0.0496,
      "step": 185520
    },
    {
      "epoch": 0.30364027938702437,
      "grad_norm": 1.7093232870101929,
      "learning_rate": 9.454610148718726e-06,
      "loss": 0.061,
      "step": 185540
    },
    {
      "epoch": 0.3036730098256777,
      "grad_norm": 1.993628978729248,
      "learning_rate": 9.45454425650521e-06,
      "loss": 0.0568,
      "step": 185560
    },
    {
      "epoch": 0.303705740264331,
      "grad_norm": 4.140977382659912,
      "learning_rate": 9.454478364291693e-06,
      "loss": 0.0477,
      "step": 185580
    },
    {
      "epoch": 0.3037384707029844,
      "grad_norm": 1.4409117698669434,
      "learning_rate": 9.454412472078175e-06,
      "loss": 0.0607,
      "step": 185600
    },
    {
      "epoch": 0.3037712011416377,
      "grad_norm": 3.710235357284546,
      "learning_rate": 9.454346579864659e-06,
      "loss": 0.0574,
      "step": 185620
    },
    {
      "epoch": 0.30380393158029106,
      "grad_norm": 1.5237810611724854,
      "learning_rate": 9.45428068765114e-06,
      "loss": 0.0576,
      "step": 185640
    },
    {
      "epoch": 0.3038366620189444,
      "grad_norm": 1.7067309617996216,
      "learning_rate": 9.454214795437624e-06,
      "loss": 0.0451,
      "step": 185660
    },
    {
      "epoch": 0.3038693924575977,
      "grad_norm": 5.486518859863281,
      "learning_rate": 9.454148903224106e-06,
      "loss": 0.0446,
      "step": 185680
    },
    {
      "epoch": 0.30390212289625107,
      "grad_norm": 2.1728038787841797,
      "learning_rate": 9.45408301101059e-06,
      "loss": 0.0524,
      "step": 185700
    },
    {
      "epoch": 0.3039348533349044,
      "grad_norm": 1.9829638004302979,
      "learning_rate": 9.454017118797073e-06,
      "loss": 0.0551,
      "step": 185720
    },
    {
      "epoch": 0.30396758377355776,
      "grad_norm": 2.5975844860076904,
      "learning_rate": 9.453951226583555e-06,
      "loss": 0.0584,
      "step": 185740
    },
    {
      "epoch": 0.3040003142122111,
      "grad_norm": 2.0518970489501953,
      "learning_rate": 9.453885334370039e-06,
      "loss": 0.0407,
      "step": 185760
    },
    {
      "epoch": 0.3040330446508644,
      "grad_norm": 3.758516311645508,
      "learning_rate": 9.45381944215652e-06,
      "loss": 0.0391,
      "step": 185780
    },
    {
      "epoch": 0.30406577508951776,
      "grad_norm": 1.8870593309402466,
      "learning_rate": 9.453753549943004e-06,
      "loss": 0.0485,
      "step": 185800
    },
    {
      "epoch": 0.3040985055281711,
      "grad_norm": 3.494267463684082,
      "learning_rate": 9.453687657729486e-06,
      "loss": 0.0584,
      "step": 185820
    },
    {
      "epoch": 0.30413123596682445,
      "grad_norm": 4.712409496307373,
      "learning_rate": 9.45362176551597e-06,
      "loss": 0.072,
      "step": 185840
    },
    {
      "epoch": 0.30416396640547777,
      "grad_norm": 2.142423629760742,
      "learning_rate": 9.453555873302452e-06,
      "loss": 0.0482,
      "step": 185860
    },
    {
      "epoch": 0.3041966968441311,
      "grad_norm": 3.4915659427642822,
      "learning_rate": 9.453489981088935e-06,
      "loss": 0.0515,
      "step": 185880
    },
    {
      "epoch": 0.30422942728278446,
      "grad_norm": 2.2379038333892822,
      "learning_rate": 9.453424088875419e-06,
      "loss": 0.0812,
      "step": 185900
    },
    {
      "epoch": 0.3042621577214378,
      "grad_norm": 2.61915922164917,
      "learning_rate": 9.4533581966619e-06,
      "loss": 0.0414,
      "step": 185920
    },
    {
      "epoch": 0.30429488816009115,
      "grad_norm": 1.3761686086654663,
      "learning_rate": 9.453292304448384e-06,
      "loss": 0.0482,
      "step": 185940
    },
    {
      "epoch": 0.30432761859874446,
      "grad_norm": 0.6357054114341736,
      "learning_rate": 9.453226412234868e-06,
      "loss": 0.0506,
      "step": 185960
    },
    {
      "epoch": 0.3043603490373978,
      "grad_norm": 1.2615488767623901,
      "learning_rate": 9.45316052002135e-06,
      "loss": 0.0549,
      "step": 185980
    },
    {
      "epoch": 0.30439307947605115,
      "grad_norm": 1.031126618385315,
      "learning_rate": 9.453094627807833e-06,
      "loss": 0.051,
      "step": 186000
    },
    {
      "epoch": 0.30442580991470447,
      "grad_norm": 2.5687806606292725,
      "learning_rate": 9.453028735594315e-06,
      "loss": 0.041,
      "step": 186020
    },
    {
      "epoch": 0.30445854035335784,
      "grad_norm": 0.9181137681007385,
      "learning_rate": 9.452962843380799e-06,
      "loss": 0.0468,
      "step": 186040
    },
    {
      "epoch": 0.30449127079201116,
      "grad_norm": 0.4623671770095825,
      "learning_rate": 9.45289695116728e-06,
      "loss": 0.0563,
      "step": 186060
    },
    {
      "epoch": 0.3045240012306645,
      "grad_norm": 2.5791072845458984,
      "learning_rate": 9.452831058953764e-06,
      "loss": 0.0541,
      "step": 186080
    },
    {
      "epoch": 0.30455673166931785,
      "grad_norm": 0.834648072719574,
      "learning_rate": 9.452765166740248e-06,
      "loss": 0.0475,
      "step": 186100
    },
    {
      "epoch": 0.30458946210797117,
      "grad_norm": 1.3119442462921143,
      "learning_rate": 9.45269927452673e-06,
      "loss": 0.0521,
      "step": 186120
    },
    {
      "epoch": 0.30462219254662454,
      "grad_norm": 1.7445920705795288,
      "learning_rate": 9.452633382313213e-06,
      "loss": 0.0605,
      "step": 186140
    },
    {
      "epoch": 0.30465492298527785,
      "grad_norm": 1.3622779846191406,
      "learning_rate": 9.452567490099695e-06,
      "loss": 0.0358,
      "step": 186160
    },
    {
      "epoch": 0.30468765342393117,
      "grad_norm": 1.7594484090805054,
      "learning_rate": 9.452501597886179e-06,
      "loss": 0.0543,
      "step": 186180
    },
    {
      "epoch": 0.30472038386258454,
      "grad_norm": 5.022758483886719,
      "learning_rate": 9.45243570567266e-06,
      "loss": 0.0434,
      "step": 186200
    },
    {
      "epoch": 0.30475311430123786,
      "grad_norm": 0.9609742760658264,
      "learning_rate": 9.452369813459144e-06,
      "loss": 0.0511,
      "step": 186220
    },
    {
      "epoch": 0.3047858447398912,
      "grad_norm": 0.3684481978416443,
      "learning_rate": 9.452303921245626e-06,
      "loss": 0.0442,
      "step": 186240
    },
    {
      "epoch": 0.30481857517854455,
      "grad_norm": 2.4008710384368896,
      "learning_rate": 9.45223802903211e-06,
      "loss": 0.0519,
      "step": 186260
    },
    {
      "epoch": 0.30485130561719787,
      "grad_norm": 0.6232544779777527,
      "learning_rate": 9.452172136818592e-06,
      "loss": 0.0623,
      "step": 186280
    },
    {
      "epoch": 0.30488403605585124,
      "grad_norm": 1.7461857795715332,
      "learning_rate": 9.452106244605075e-06,
      "loss": 0.0448,
      "step": 186300
    },
    {
      "epoch": 0.30491676649450455,
      "grad_norm": 3.580692768096924,
      "learning_rate": 9.452040352391559e-06,
      "loss": 0.0438,
      "step": 186320
    },
    {
      "epoch": 0.30494949693315787,
      "grad_norm": 1.2898004055023193,
      "learning_rate": 9.451974460178042e-06,
      "loss": 0.0678,
      "step": 186340
    },
    {
      "epoch": 0.30498222737181124,
      "grad_norm": 1.3798730373382568,
      "learning_rate": 9.451908567964524e-06,
      "loss": 0.0511,
      "step": 186360
    },
    {
      "epoch": 0.30501495781046456,
      "grad_norm": 2.7095696926116943,
      "learning_rate": 9.451842675751008e-06,
      "loss": 0.0525,
      "step": 186380
    },
    {
      "epoch": 0.30504768824911793,
      "grad_norm": 0.6643586158752441,
      "learning_rate": 9.45177678353749e-06,
      "loss": 0.0598,
      "step": 186400
    },
    {
      "epoch": 0.30508041868777125,
      "grad_norm": 1.0167689323425293,
      "learning_rate": 9.451710891323973e-06,
      "loss": 0.0685,
      "step": 186420
    },
    {
      "epoch": 0.30511314912642457,
      "grad_norm": 1.4082386493682861,
      "learning_rate": 9.451644999110457e-06,
      "loss": 0.0485,
      "step": 186440
    },
    {
      "epoch": 0.30514587956507794,
      "grad_norm": 1.359266996383667,
      "learning_rate": 9.451579106896939e-06,
      "loss": 0.0361,
      "step": 186460
    },
    {
      "epoch": 0.30517861000373125,
      "grad_norm": 1.9574403762817383,
      "learning_rate": 9.451513214683423e-06,
      "loss": 0.0404,
      "step": 186480
    },
    {
      "epoch": 0.3052113404423846,
      "grad_norm": 0.7523412108421326,
      "learning_rate": 9.451447322469904e-06,
      "loss": 0.0641,
      "step": 186500
    },
    {
      "epoch": 0.30524407088103794,
      "grad_norm": 3.699610948562622,
      "learning_rate": 9.451381430256388e-06,
      "loss": 0.0546,
      "step": 186520
    },
    {
      "epoch": 0.30527680131969126,
      "grad_norm": 3.428941249847412,
      "learning_rate": 9.45131553804287e-06,
      "loss": 0.0515,
      "step": 186540
    },
    {
      "epoch": 0.30530953175834463,
      "grad_norm": 0.5186573266983032,
      "learning_rate": 9.451249645829353e-06,
      "loss": 0.0683,
      "step": 186560
    },
    {
      "epoch": 0.30534226219699795,
      "grad_norm": 2.9852888584136963,
      "learning_rate": 9.451183753615835e-06,
      "loss": 0.0429,
      "step": 186580
    },
    {
      "epoch": 0.3053749926356513,
      "grad_norm": 2.4095067977905273,
      "learning_rate": 9.451117861402319e-06,
      "loss": 0.0696,
      "step": 186600
    },
    {
      "epoch": 0.30540772307430464,
      "grad_norm": 3.107228994369507,
      "learning_rate": 9.451051969188801e-06,
      "loss": 0.0617,
      "step": 186620
    },
    {
      "epoch": 0.30544045351295795,
      "grad_norm": 4.083209991455078,
      "learning_rate": 9.450986076975284e-06,
      "loss": 0.0516,
      "step": 186640
    },
    {
      "epoch": 0.3054731839516113,
      "grad_norm": 2.4981117248535156,
      "learning_rate": 9.450920184761766e-06,
      "loss": 0.0665,
      "step": 186660
    },
    {
      "epoch": 0.30550591439026464,
      "grad_norm": 2.9391849040985107,
      "learning_rate": 9.45085429254825e-06,
      "loss": 0.0541,
      "step": 186680
    },
    {
      "epoch": 0.305538644828918,
      "grad_norm": 2.6589865684509277,
      "learning_rate": 9.450788400334734e-06,
      "loss": 0.0624,
      "step": 186700
    },
    {
      "epoch": 0.30557137526757133,
      "grad_norm": 1.2011313438415527,
      "learning_rate": 9.450722508121215e-06,
      "loss": 0.0497,
      "step": 186720
    },
    {
      "epoch": 0.30560410570622465,
      "grad_norm": 1.2907274961471558,
      "learning_rate": 9.450656615907699e-06,
      "loss": 0.0455,
      "step": 186740
    },
    {
      "epoch": 0.305636836144878,
      "grad_norm": 1.649613857269287,
      "learning_rate": 9.450590723694183e-06,
      "loss": 0.0543,
      "step": 186760
    },
    {
      "epoch": 0.30566956658353134,
      "grad_norm": 2.474079132080078,
      "learning_rate": 9.450524831480664e-06,
      "loss": 0.0552,
      "step": 186780
    },
    {
      "epoch": 0.3057022970221847,
      "grad_norm": 3.045891046524048,
      "learning_rate": 9.450458939267148e-06,
      "loss": 0.0553,
      "step": 186800
    },
    {
      "epoch": 0.305735027460838,
      "grad_norm": 0.7627320289611816,
      "learning_rate": 9.450393047053632e-06,
      "loss": 0.0386,
      "step": 186820
    },
    {
      "epoch": 0.30576775789949134,
      "grad_norm": 1.6274656057357788,
      "learning_rate": 9.450327154840114e-06,
      "loss": 0.0601,
      "step": 186840
    },
    {
      "epoch": 0.3058004883381447,
      "grad_norm": 4.066006660461426,
      "learning_rate": 9.450261262626597e-06,
      "loss": 0.0448,
      "step": 186860
    },
    {
      "epoch": 0.30583321877679803,
      "grad_norm": 0.9654325842857361,
      "learning_rate": 9.450195370413079e-06,
      "loss": 0.0547,
      "step": 186880
    },
    {
      "epoch": 0.3058659492154514,
      "grad_norm": 2.0939855575561523,
      "learning_rate": 9.450129478199563e-06,
      "loss": 0.0478,
      "step": 186900
    },
    {
      "epoch": 0.3058986796541047,
      "grad_norm": 2.6318016052246094,
      "learning_rate": 9.450063585986044e-06,
      "loss": 0.0653,
      "step": 186920
    },
    {
      "epoch": 0.30593141009275804,
      "grad_norm": 0.7462786436080933,
      "learning_rate": 9.449997693772528e-06,
      "loss": 0.0543,
      "step": 186940
    },
    {
      "epoch": 0.3059641405314114,
      "grad_norm": 1.0432919263839722,
      "learning_rate": 9.44993180155901e-06,
      "loss": 0.0542,
      "step": 186960
    },
    {
      "epoch": 0.3059968709700647,
      "grad_norm": 1.0350147485733032,
      "learning_rate": 9.449865909345494e-06,
      "loss": 0.058,
      "step": 186980
    },
    {
      "epoch": 0.3060296014087181,
      "grad_norm": 1.4298545122146606,
      "learning_rate": 9.449800017131975e-06,
      "loss": 0.042,
      "step": 187000
    },
    {
      "epoch": 0.3060623318473714,
      "grad_norm": 1.0840692520141602,
      "learning_rate": 9.449734124918459e-06,
      "loss": 0.0408,
      "step": 187020
    },
    {
      "epoch": 0.30609506228602473,
      "grad_norm": 2.638960123062134,
      "learning_rate": 9.449668232704941e-06,
      "loss": 0.0661,
      "step": 187040
    },
    {
      "epoch": 0.3061277927246781,
      "grad_norm": 1.6952145099639893,
      "learning_rate": 9.449602340491425e-06,
      "loss": 0.0372,
      "step": 187060
    },
    {
      "epoch": 0.3061605231633314,
      "grad_norm": 1.85641348361969,
      "learning_rate": 9.449536448277908e-06,
      "loss": 0.0446,
      "step": 187080
    },
    {
      "epoch": 0.3061932536019848,
      "grad_norm": 0.871062695980072,
      "learning_rate": 9.44947055606439e-06,
      "loss": 0.0556,
      "step": 187100
    },
    {
      "epoch": 0.3062259840406381,
      "grad_norm": 0.8356791138648987,
      "learning_rate": 9.449404663850874e-06,
      "loss": 0.0494,
      "step": 187120
    },
    {
      "epoch": 0.3062587144792914,
      "grad_norm": 2.229121446609497,
      "learning_rate": 9.449338771637357e-06,
      "loss": 0.0517,
      "step": 187140
    },
    {
      "epoch": 0.3062914449179448,
      "grad_norm": 1.7500121593475342,
      "learning_rate": 9.449272879423839e-06,
      "loss": 0.0611,
      "step": 187160
    },
    {
      "epoch": 0.3063241753565981,
      "grad_norm": 1.0357547998428345,
      "learning_rate": 9.449206987210323e-06,
      "loss": 0.0373,
      "step": 187180
    },
    {
      "epoch": 0.3063569057952515,
      "grad_norm": 2.0115110874176025,
      "learning_rate": 9.449141094996806e-06,
      "loss": 0.0558,
      "step": 187200
    },
    {
      "epoch": 0.3063896362339048,
      "grad_norm": 5.65366268157959,
      "learning_rate": 9.449075202783288e-06,
      "loss": 0.0516,
      "step": 187220
    },
    {
      "epoch": 0.3064223666725581,
      "grad_norm": 0.804378092288971,
      "learning_rate": 9.449009310569772e-06,
      "loss": 0.0666,
      "step": 187240
    },
    {
      "epoch": 0.3064550971112115,
      "grad_norm": 3.3641581535339355,
      "learning_rate": 9.448943418356254e-06,
      "loss": 0.0589,
      "step": 187260
    },
    {
      "epoch": 0.3064878275498648,
      "grad_norm": 1.7641652822494507,
      "learning_rate": 9.448877526142737e-06,
      "loss": 0.0614,
      "step": 187280
    },
    {
      "epoch": 0.3065205579885182,
      "grad_norm": 1.7149887084960938,
      "learning_rate": 9.448811633929219e-06,
      "loss": 0.0492,
      "step": 187300
    },
    {
      "epoch": 0.3065532884271715,
      "grad_norm": 0.6737316250801086,
      "learning_rate": 9.448745741715703e-06,
      "loss": 0.0424,
      "step": 187320
    },
    {
      "epoch": 0.3065860188658248,
      "grad_norm": 1.867827296257019,
      "learning_rate": 9.448679849502185e-06,
      "loss": 0.05,
      "step": 187340
    },
    {
      "epoch": 0.3066187493044782,
      "grad_norm": 0.8708980083465576,
      "learning_rate": 9.448613957288668e-06,
      "loss": 0.0429,
      "step": 187360
    },
    {
      "epoch": 0.3066514797431315,
      "grad_norm": 1.8745495080947876,
      "learning_rate": 9.44854806507515e-06,
      "loss": 0.0465,
      "step": 187380
    },
    {
      "epoch": 0.3066842101817849,
      "grad_norm": 1.879464864730835,
      "learning_rate": 9.448482172861634e-06,
      "loss": 0.0521,
      "step": 187400
    },
    {
      "epoch": 0.3067169406204382,
      "grad_norm": 0.18147841095924377,
      "learning_rate": 9.448416280648116e-06,
      "loss": 0.0422,
      "step": 187420
    },
    {
      "epoch": 0.3067496710590915,
      "grad_norm": 1.2049413919448853,
      "learning_rate": 9.448350388434599e-06,
      "loss": 0.0484,
      "step": 187440
    },
    {
      "epoch": 0.3067824014977449,
      "grad_norm": 1.9143856763839722,
      "learning_rate": 9.448284496221081e-06,
      "loss": 0.0596,
      "step": 187460
    },
    {
      "epoch": 0.3068151319363982,
      "grad_norm": 1.6449898481369019,
      "learning_rate": 9.448218604007565e-06,
      "loss": 0.0382,
      "step": 187480
    },
    {
      "epoch": 0.30684786237505157,
      "grad_norm": 3.369215965270996,
      "learning_rate": 9.448152711794048e-06,
      "loss": 0.0454,
      "step": 187500
    },
    {
      "epoch": 0.3068805928137049,
      "grad_norm": 2.8034892082214355,
      "learning_rate": 9.44808681958053e-06,
      "loss": 0.0547,
      "step": 187520
    },
    {
      "epoch": 0.3069133232523582,
      "grad_norm": 2.5161118507385254,
      "learning_rate": 9.448020927367014e-06,
      "loss": 0.0605,
      "step": 187540
    },
    {
      "epoch": 0.3069460536910116,
      "grad_norm": 1.5133227109909058,
      "learning_rate": 9.447955035153497e-06,
      "loss": 0.0454,
      "step": 187560
    },
    {
      "epoch": 0.3069787841296649,
      "grad_norm": 3.3009800910949707,
      "learning_rate": 9.447889142939979e-06,
      "loss": 0.0511,
      "step": 187580
    },
    {
      "epoch": 0.30701151456831827,
      "grad_norm": 0.7452313899993896,
      "learning_rate": 9.447823250726463e-06,
      "loss": 0.0445,
      "step": 187600
    },
    {
      "epoch": 0.3070442450069716,
      "grad_norm": 2.2339210510253906,
      "learning_rate": 9.447757358512946e-06,
      "loss": 0.048,
      "step": 187620
    },
    {
      "epoch": 0.3070769754456249,
      "grad_norm": 2.6710379123687744,
      "learning_rate": 9.447691466299428e-06,
      "loss": 0.0601,
      "step": 187640
    },
    {
      "epoch": 0.3071097058842783,
      "grad_norm": 0.3016902804374695,
      "learning_rate": 9.447625574085912e-06,
      "loss": 0.0371,
      "step": 187660
    },
    {
      "epoch": 0.3071424363229316,
      "grad_norm": 2.6837525367736816,
      "learning_rate": 9.447559681872394e-06,
      "loss": 0.0634,
      "step": 187680
    },
    {
      "epoch": 0.30717516676158496,
      "grad_norm": 0.9372446537017822,
      "learning_rate": 9.447493789658877e-06,
      "loss": 0.0585,
      "step": 187700
    },
    {
      "epoch": 0.3072078972002383,
      "grad_norm": 3.0776519775390625,
      "learning_rate": 9.44742789744536e-06,
      "loss": 0.0638,
      "step": 187720
    },
    {
      "epoch": 0.3072406276388916,
      "grad_norm": 1.3656827211380005,
      "learning_rate": 9.447362005231843e-06,
      "loss": 0.0521,
      "step": 187740
    },
    {
      "epoch": 0.30727335807754497,
      "grad_norm": 3.6983213424682617,
      "learning_rate": 9.447296113018325e-06,
      "loss": 0.0529,
      "step": 187760
    },
    {
      "epoch": 0.3073060885161983,
      "grad_norm": 2.724593162536621,
      "learning_rate": 9.447230220804808e-06,
      "loss": 0.0521,
      "step": 187780
    },
    {
      "epoch": 0.30733881895485166,
      "grad_norm": 0.9878355860710144,
      "learning_rate": 9.44716432859129e-06,
      "loss": 0.0461,
      "step": 187800
    },
    {
      "epoch": 0.307371549393505,
      "grad_norm": 1.6280392408370972,
      "learning_rate": 9.447098436377774e-06,
      "loss": 0.0438,
      "step": 187820
    },
    {
      "epoch": 0.3074042798321583,
      "grad_norm": 7.253068923950195,
      "learning_rate": 9.447032544164257e-06,
      "loss": 0.0605,
      "step": 187840
    },
    {
      "epoch": 0.30743701027081166,
      "grad_norm": 1.5598403215408325,
      "learning_rate": 9.44696665195074e-06,
      "loss": 0.0582,
      "step": 187860
    },
    {
      "epoch": 0.307469740709465,
      "grad_norm": 1.799535870552063,
      "learning_rate": 9.446900759737223e-06,
      "loss": 0.0528,
      "step": 187880
    },
    {
      "epoch": 0.30750247114811835,
      "grad_norm": 1.371612548828125,
      "learning_rate": 9.446834867523705e-06,
      "loss": 0.0516,
      "step": 187900
    },
    {
      "epoch": 0.30753520158677167,
      "grad_norm": 1.9707311391830444,
      "learning_rate": 9.446768975310188e-06,
      "loss": 0.065,
      "step": 187920
    },
    {
      "epoch": 0.307567932025425,
      "grad_norm": 1.016690969467163,
      "learning_rate": 9.446703083096672e-06,
      "loss": 0.0494,
      "step": 187940
    },
    {
      "epoch": 0.30760066246407836,
      "grad_norm": 2.8618080615997314,
      "learning_rate": 9.446637190883154e-06,
      "loss": 0.0506,
      "step": 187960
    },
    {
      "epoch": 0.3076333929027317,
      "grad_norm": 1.9089994430541992,
      "learning_rate": 9.446571298669637e-06,
      "loss": 0.0475,
      "step": 187980
    },
    {
      "epoch": 0.30766612334138504,
      "grad_norm": 2.9329230785369873,
      "learning_rate": 9.446505406456121e-06,
      "loss": 0.0478,
      "step": 188000
    },
    {
      "epoch": 0.30769885378003836,
      "grad_norm": 2.7371864318847656,
      "learning_rate": 9.446439514242603e-06,
      "loss": 0.0431,
      "step": 188020
    },
    {
      "epoch": 0.3077315842186917,
      "grad_norm": 2.0164120197296143,
      "learning_rate": 9.446373622029086e-06,
      "loss": 0.0587,
      "step": 188040
    },
    {
      "epoch": 0.30776431465734505,
      "grad_norm": 2.942397117614746,
      "learning_rate": 9.446307729815568e-06,
      "loss": 0.0388,
      "step": 188060
    },
    {
      "epoch": 0.30779704509599837,
      "grad_norm": 5.817633152008057,
      "learning_rate": 9.446241837602052e-06,
      "loss": 0.0686,
      "step": 188080
    },
    {
      "epoch": 0.30782977553465174,
      "grad_norm": 5.064756870269775,
      "learning_rate": 9.446175945388534e-06,
      "loss": 0.0649,
      "step": 188100
    },
    {
      "epoch": 0.30786250597330506,
      "grad_norm": 0.5696905255317688,
      "learning_rate": 9.446110053175017e-06,
      "loss": 0.0377,
      "step": 188120
    },
    {
      "epoch": 0.3078952364119584,
      "grad_norm": 0.6592555046081543,
      "learning_rate": 9.4460441609615e-06,
      "loss": 0.0413,
      "step": 188140
    },
    {
      "epoch": 0.30792796685061175,
      "grad_norm": 11.08248519897461,
      "learning_rate": 9.445978268747983e-06,
      "loss": 0.0489,
      "step": 188160
    },
    {
      "epoch": 0.30796069728926506,
      "grad_norm": 1.0847957134246826,
      "learning_rate": 9.445912376534466e-06,
      "loss": 0.0534,
      "step": 188180
    },
    {
      "epoch": 0.30799342772791843,
      "grad_norm": 1.2027790546417236,
      "learning_rate": 9.445846484320948e-06,
      "loss": 0.0725,
      "step": 188200
    },
    {
      "epoch": 0.30802615816657175,
      "grad_norm": 0.661427915096283,
      "learning_rate": 9.445780592107432e-06,
      "loss": 0.0474,
      "step": 188220
    },
    {
      "epoch": 0.30805888860522507,
      "grad_norm": 0.9971646070480347,
      "learning_rate": 9.445714699893914e-06,
      "loss": 0.0544,
      "step": 188240
    },
    {
      "epoch": 0.30809161904387844,
      "grad_norm": 0.5410302877426147,
      "learning_rate": 9.445648807680397e-06,
      "loss": 0.0652,
      "step": 188260
    },
    {
      "epoch": 0.30812434948253176,
      "grad_norm": 2.2942380905151367,
      "learning_rate": 9.44558291546688e-06,
      "loss": 0.0559,
      "step": 188280
    },
    {
      "epoch": 0.30815707992118513,
      "grad_norm": 3.457645893096924,
      "learning_rate": 9.445517023253363e-06,
      "loss": 0.0552,
      "step": 188300
    },
    {
      "epoch": 0.30818981035983845,
      "grad_norm": 0.31151890754699707,
      "learning_rate": 9.445451131039845e-06,
      "loss": 0.0542,
      "step": 188320
    },
    {
      "epoch": 0.30822254079849176,
      "grad_norm": 2.315378189086914,
      "learning_rate": 9.445385238826328e-06,
      "loss": 0.0442,
      "step": 188340
    },
    {
      "epoch": 0.30825527123714513,
      "grad_norm": 3.418999195098877,
      "learning_rate": 9.445319346612812e-06,
      "loss": 0.0558,
      "step": 188360
    },
    {
      "epoch": 0.30828800167579845,
      "grad_norm": 2.6246886253356934,
      "learning_rate": 9.445253454399294e-06,
      "loss": 0.0452,
      "step": 188380
    },
    {
      "epoch": 0.3083207321144518,
      "grad_norm": 1.2990620136260986,
      "learning_rate": 9.445187562185777e-06,
      "loss": 0.0484,
      "step": 188400
    },
    {
      "epoch": 0.30835346255310514,
      "grad_norm": 2.243431568145752,
      "learning_rate": 9.445121669972261e-06,
      "loss": 0.0502,
      "step": 188420
    },
    {
      "epoch": 0.30838619299175846,
      "grad_norm": 2.100811243057251,
      "learning_rate": 9.445055777758743e-06,
      "loss": 0.0414,
      "step": 188440
    },
    {
      "epoch": 0.30841892343041183,
      "grad_norm": 1.6641286611557007,
      "learning_rate": 9.444989885545226e-06,
      "loss": 0.0508,
      "step": 188460
    },
    {
      "epoch": 0.30845165386906515,
      "grad_norm": 2.7549216747283936,
      "learning_rate": 9.444923993331708e-06,
      "loss": 0.0585,
      "step": 188480
    },
    {
      "epoch": 0.3084843843077185,
      "grad_norm": 1.4463646411895752,
      "learning_rate": 9.444858101118192e-06,
      "loss": 0.0459,
      "step": 188500
    },
    {
      "epoch": 0.30851711474637183,
      "grad_norm": 3.747460126876831,
      "learning_rate": 9.444792208904674e-06,
      "loss": 0.0524,
      "step": 188520
    },
    {
      "epoch": 0.30854984518502515,
      "grad_norm": 1.1514194011688232,
      "learning_rate": 9.444726316691157e-06,
      "loss": 0.0561,
      "step": 188540
    },
    {
      "epoch": 0.3085825756236785,
      "grad_norm": 2.3043429851531982,
      "learning_rate": 9.444660424477641e-06,
      "loss": 0.0686,
      "step": 188560
    },
    {
      "epoch": 0.30861530606233184,
      "grad_norm": 2.3122549057006836,
      "learning_rate": 9.444594532264123e-06,
      "loss": 0.0634,
      "step": 188580
    },
    {
      "epoch": 0.3086480365009852,
      "grad_norm": 1.4910025596618652,
      "learning_rate": 9.444528640050606e-06,
      "loss": 0.0561,
      "step": 188600
    },
    {
      "epoch": 0.30868076693963853,
      "grad_norm": 4.244918346405029,
      "learning_rate": 9.444462747837088e-06,
      "loss": 0.0692,
      "step": 188620
    },
    {
      "epoch": 0.30871349737829185,
      "grad_norm": 1.7043957710266113,
      "learning_rate": 9.444396855623572e-06,
      "loss": 0.0399,
      "step": 188640
    },
    {
      "epoch": 0.3087462278169452,
      "grad_norm": 2.461542844772339,
      "learning_rate": 9.444330963410054e-06,
      "loss": 0.0676,
      "step": 188660
    },
    {
      "epoch": 0.30877895825559853,
      "grad_norm": 1.6575603485107422,
      "learning_rate": 9.444265071196537e-06,
      "loss": 0.0723,
      "step": 188680
    },
    {
      "epoch": 0.3088116886942519,
      "grad_norm": 1.1443825960159302,
      "learning_rate": 9.44419917898302e-06,
      "loss": 0.043,
      "step": 188700
    },
    {
      "epoch": 0.3088444191329052,
      "grad_norm": 0.9055594801902771,
      "learning_rate": 9.444133286769503e-06,
      "loss": 0.0409,
      "step": 188720
    },
    {
      "epoch": 0.30887714957155854,
      "grad_norm": 1.7240898609161377,
      "learning_rate": 9.444067394555987e-06,
      "loss": 0.0546,
      "step": 188740
    },
    {
      "epoch": 0.3089098800102119,
      "grad_norm": 2.7426795959472656,
      "learning_rate": 9.444001502342468e-06,
      "loss": 0.0495,
      "step": 188760
    },
    {
      "epoch": 0.30894261044886523,
      "grad_norm": 1.1552770137786865,
      "learning_rate": 9.443935610128952e-06,
      "loss": 0.0449,
      "step": 188780
    },
    {
      "epoch": 0.3089753408875186,
      "grad_norm": 1.399836778640747,
      "learning_rate": 9.443869717915436e-06,
      "loss": 0.0533,
      "step": 188800
    },
    {
      "epoch": 0.3090080713261719,
      "grad_norm": 2.4311935901641846,
      "learning_rate": 9.443803825701917e-06,
      "loss": 0.0739,
      "step": 188820
    },
    {
      "epoch": 0.30904080176482523,
      "grad_norm": 2.167550563812256,
      "learning_rate": 9.443737933488401e-06,
      "loss": 0.0399,
      "step": 188840
    },
    {
      "epoch": 0.3090735322034786,
      "grad_norm": 1.2720144987106323,
      "learning_rate": 9.443672041274883e-06,
      "loss": 0.0565,
      "step": 188860
    },
    {
      "epoch": 0.3091062626421319,
      "grad_norm": 1.2276575565338135,
      "learning_rate": 9.443606149061367e-06,
      "loss": 0.0373,
      "step": 188880
    },
    {
      "epoch": 0.3091389930807853,
      "grad_norm": 0.6392654180526733,
      "learning_rate": 9.44354025684785e-06,
      "loss": 0.0413,
      "step": 188900
    },
    {
      "epoch": 0.3091717235194386,
      "grad_norm": 0.61251300573349,
      "learning_rate": 9.443474364634332e-06,
      "loss": 0.0589,
      "step": 188920
    },
    {
      "epoch": 0.30920445395809193,
      "grad_norm": 0.6931375861167908,
      "learning_rate": 9.443408472420816e-06,
      "loss": 0.0551,
      "step": 188940
    },
    {
      "epoch": 0.3092371843967453,
      "grad_norm": 1.2282382249832153,
      "learning_rate": 9.443342580207297e-06,
      "loss": 0.0542,
      "step": 188960
    },
    {
      "epoch": 0.3092699148353986,
      "grad_norm": 0.565371572971344,
      "learning_rate": 9.443276687993781e-06,
      "loss": 0.0454,
      "step": 188980
    },
    {
      "epoch": 0.309302645274052,
      "grad_norm": 1.8742434978485107,
      "learning_rate": 9.443210795780263e-06,
      "loss": 0.0553,
      "step": 189000
    },
    {
      "epoch": 0.3093353757127053,
      "grad_norm": 2.049298048019409,
      "learning_rate": 9.443144903566747e-06,
      "loss": 0.0589,
      "step": 189020
    },
    {
      "epoch": 0.3093681061513586,
      "grad_norm": 0.46209022402763367,
      "learning_rate": 9.443079011353228e-06,
      "loss": 0.0494,
      "step": 189040
    },
    {
      "epoch": 0.309400836590012,
      "grad_norm": 0.62241530418396,
      "learning_rate": 9.443013119139712e-06,
      "loss": 0.0528,
      "step": 189060
    },
    {
      "epoch": 0.3094335670286653,
      "grad_norm": 2.7869863510131836,
      "learning_rate": 9.442947226926194e-06,
      "loss": 0.0414,
      "step": 189080
    },
    {
      "epoch": 0.30946629746731863,
      "grad_norm": 2.4727776050567627,
      "learning_rate": 9.442881334712678e-06,
      "loss": 0.0542,
      "step": 189100
    },
    {
      "epoch": 0.309499027905972,
      "grad_norm": 1.6297441720962524,
      "learning_rate": 9.442815442499161e-06,
      "loss": 0.0575,
      "step": 189120
    },
    {
      "epoch": 0.3095317583446253,
      "grad_norm": 1.9478707313537598,
      "learning_rate": 9.442749550285643e-06,
      "loss": 0.0556,
      "step": 189140
    },
    {
      "epoch": 0.3095644887832787,
      "grad_norm": 2.00107741355896,
      "learning_rate": 9.442683658072127e-06,
      "loss": 0.039,
      "step": 189160
    },
    {
      "epoch": 0.309597219221932,
      "grad_norm": 2.846590995788574,
      "learning_rate": 9.44261776585861e-06,
      "loss": 0.0588,
      "step": 189180
    },
    {
      "epoch": 0.3096299496605853,
      "grad_norm": 3.2652933597564697,
      "learning_rate": 9.442551873645092e-06,
      "loss": 0.0532,
      "step": 189200
    },
    {
      "epoch": 0.3096626800992387,
      "grad_norm": 0.6774033308029175,
      "learning_rate": 9.442485981431576e-06,
      "loss": 0.0466,
      "step": 189220
    },
    {
      "epoch": 0.309695410537892,
      "grad_norm": 4.223908424377441,
      "learning_rate": 9.44242008921806e-06,
      "loss": 0.0591,
      "step": 189240
    },
    {
      "epoch": 0.3097281409765454,
      "grad_norm": 1.7022294998168945,
      "learning_rate": 9.442354197004541e-06,
      "loss": 0.069,
      "step": 189260
    },
    {
      "epoch": 0.3097608714151987,
      "grad_norm": 0.9331284761428833,
      "learning_rate": 9.442288304791025e-06,
      "loss": 0.0464,
      "step": 189280
    },
    {
      "epoch": 0.309793601853852,
      "grad_norm": 0.6340921521186829,
      "learning_rate": 9.442222412577507e-06,
      "loss": 0.0465,
      "step": 189300
    },
    {
      "epoch": 0.3098263322925054,
      "grad_norm": 2.638422966003418,
      "learning_rate": 9.44215652036399e-06,
      "loss": 0.0341,
      "step": 189320
    },
    {
      "epoch": 0.3098590627311587,
      "grad_norm": 4.458343982696533,
      "learning_rate": 9.442090628150472e-06,
      "loss": 0.0661,
      "step": 189340
    },
    {
      "epoch": 0.3098917931698121,
      "grad_norm": 0.9502610564231873,
      "learning_rate": 9.442024735936956e-06,
      "loss": 0.0569,
      "step": 189360
    },
    {
      "epoch": 0.3099245236084654,
      "grad_norm": 2.543667793273926,
      "learning_rate": 9.441958843723438e-06,
      "loss": 0.0568,
      "step": 189380
    },
    {
      "epoch": 0.3099572540471187,
      "grad_norm": 3.1978249549865723,
      "learning_rate": 9.441892951509921e-06,
      "loss": 0.0538,
      "step": 189400
    },
    {
      "epoch": 0.3099899844857721,
      "grad_norm": 2.5583388805389404,
      "learning_rate": 9.441827059296403e-06,
      "loss": 0.0388,
      "step": 189420
    },
    {
      "epoch": 0.3100227149244254,
      "grad_norm": 1.358826994895935,
      "learning_rate": 9.441761167082887e-06,
      "loss": 0.0371,
      "step": 189440
    },
    {
      "epoch": 0.3100554453630788,
      "grad_norm": 0.9985232353210449,
      "learning_rate": 9.441695274869369e-06,
      "loss": 0.0499,
      "step": 189460
    },
    {
      "epoch": 0.3100881758017321,
      "grad_norm": 1.607893943786621,
      "learning_rate": 9.441629382655852e-06,
      "loss": 0.0586,
      "step": 189480
    },
    {
      "epoch": 0.3101209062403854,
      "grad_norm": 3.5537149906158447,
      "learning_rate": 9.441563490442334e-06,
      "loss": 0.054,
      "step": 189500
    },
    {
      "epoch": 0.3101536366790388,
      "grad_norm": 3.167546272277832,
      "learning_rate": 9.441497598228818e-06,
      "loss": 0.0511,
      "step": 189520
    },
    {
      "epoch": 0.3101863671176921,
      "grad_norm": 2.649965524673462,
      "learning_rate": 9.441431706015301e-06,
      "loss": 0.0541,
      "step": 189540
    },
    {
      "epoch": 0.31021909755634547,
      "grad_norm": 1.3184046745300293,
      "learning_rate": 9.441365813801783e-06,
      "loss": 0.0414,
      "step": 189560
    },
    {
      "epoch": 0.3102518279949988,
      "grad_norm": 2.366726875305176,
      "learning_rate": 9.441299921588267e-06,
      "loss": 0.0501,
      "step": 189580
    },
    {
      "epoch": 0.3102845584336521,
      "grad_norm": 2.4242513179779053,
      "learning_rate": 9.44123402937475e-06,
      "loss": 0.0544,
      "step": 189600
    },
    {
      "epoch": 0.3103172888723055,
      "grad_norm": 1.8177173137664795,
      "learning_rate": 9.441168137161232e-06,
      "loss": 0.0515,
      "step": 189620
    },
    {
      "epoch": 0.3103500193109588,
      "grad_norm": 1.9051270484924316,
      "learning_rate": 9.441102244947716e-06,
      "loss": 0.033,
      "step": 189640
    },
    {
      "epoch": 0.31038274974961216,
      "grad_norm": 1.7736010551452637,
      "learning_rate": 9.4410363527342e-06,
      "loss": 0.0426,
      "step": 189660
    },
    {
      "epoch": 0.3104154801882655,
      "grad_norm": 3.3812944889068604,
      "learning_rate": 9.440970460520681e-06,
      "loss": 0.0539,
      "step": 189680
    },
    {
      "epoch": 0.3104482106269188,
      "grad_norm": 5.05572509765625,
      "learning_rate": 9.440904568307165e-06,
      "loss": 0.0586,
      "step": 189700
    },
    {
      "epoch": 0.31048094106557217,
      "grad_norm": 3.4352920055389404,
      "learning_rate": 9.440838676093647e-06,
      "loss": 0.05,
      "step": 189720
    },
    {
      "epoch": 0.3105136715042255,
      "grad_norm": 1.6711345911026,
      "learning_rate": 9.44077278388013e-06,
      "loss": 0.0415,
      "step": 189740
    },
    {
      "epoch": 0.31054640194287886,
      "grad_norm": 0.5237457752227783,
      "learning_rate": 9.440706891666612e-06,
      "loss": 0.0398,
      "step": 189760
    },
    {
      "epoch": 0.3105791323815322,
      "grad_norm": 20.80657958984375,
      "learning_rate": 9.440640999453096e-06,
      "loss": 0.0575,
      "step": 189780
    },
    {
      "epoch": 0.3106118628201855,
      "grad_norm": 2.260241985321045,
      "learning_rate": 9.440575107239578e-06,
      "loss": 0.0633,
      "step": 189800
    },
    {
      "epoch": 0.31064459325883886,
      "grad_norm": 2.184400796890259,
      "learning_rate": 9.440509215026061e-06,
      "loss": 0.0419,
      "step": 189820
    },
    {
      "epoch": 0.3106773236974922,
      "grad_norm": 3.68359112739563,
      "learning_rate": 9.440443322812543e-06,
      "loss": 0.0477,
      "step": 189840
    },
    {
      "epoch": 0.31071005413614555,
      "grad_norm": 2.7706823348999023,
      "learning_rate": 9.440377430599027e-06,
      "loss": 0.0605,
      "step": 189860
    },
    {
      "epoch": 0.31074278457479887,
      "grad_norm": 1.564732551574707,
      "learning_rate": 9.440311538385509e-06,
      "loss": 0.0393,
      "step": 189880
    },
    {
      "epoch": 0.3107755150134522,
      "grad_norm": 2.692671060562134,
      "learning_rate": 9.440245646171992e-06,
      "loss": 0.068,
      "step": 189900
    },
    {
      "epoch": 0.31080824545210556,
      "grad_norm": 0.8382638096809387,
      "learning_rate": 9.440179753958476e-06,
      "loss": 0.0618,
      "step": 189920
    },
    {
      "epoch": 0.3108409758907589,
      "grad_norm": 1.158363938331604,
      "learning_rate": 9.440113861744958e-06,
      "loss": 0.0684,
      "step": 189940
    },
    {
      "epoch": 0.31087370632941225,
      "grad_norm": 0.4923146069049835,
      "learning_rate": 9.440047969531441e-06,
      "loss": 0.0673,
      "step": 189960
    },
    {
      "epoch": 0.31090643676806556,
      "grad_norm": 1.6576241254806519,
      "learning_rate": 9.439982077317925e-06,
      "loss": 0.0507,
      "step": 189980
    },
    {
      "epoch": 0.3109391672067189,
      "grad_norm": 1.331276774406433,
      "learning_rate": 9.439916185104407e-06,
      "loss": 0.0559,
      "step": 190000
    },
    {
      "epoch": 0.31097189764537225,
      "grad_norm": 1.4781800508499146,
      "learning_rate": 9.43985029289089e-06,
      "loss": 0.0547,
      "step": 190020
    },
    {
      "epoch": 0.31100462808402557,
      "grad_norm": 3.115236759185791,
      "learning_rate": 9.439784400677374e-06,
      "loss": 0.0762,
      "step": 190040
    },
    {
      "epoch": 0.31103735852267894,
      "grad_norm": 1.5563969612121582,
      "learning_rate": 9.439718508463856e-06,
      "loss": 0.0324,
      "step": 190060
    },
    {
      "epoch": 0.31107008896133226,
      "grad_norm": 2.7924301624298096,
      "learning_rate": 9.43965261625034e-06,
      "loss": 0.0513,
      "step": 190080
    },
    {
      "epoch": 0.3111028193999856,
      "grad_norm": 1.7887511253356934,
      "learning_rate": 9.439586724036821e-06,
      "loss": 0.0468,
      "step": 190100
    },
    {
      "epoch": 0.31113554983863895,
      "grad_norm": 2.752197504043579,
      "learning_rate": 9.439520831823305e-06,
      "loss": 0.0571,
      "step": 190120
    },
    {
      "epoch": 0.31116828027729226,
      "grad_norm": 2.945107936859131,
      "learning_rate": 9.439454939609787e-06,
      "loss": 0.0702,
      "step": 190140
    },
    {
      "epoch": 0.31120101071594564,
      "grad_norm": 0.3317108750343323,
      "learning_rate": 9.43938904739627e-06,
      "loss": 0.0423,
      "step": 190160
    },
    {
      "epoch": 0.31123374115459895,
      "grad_norm": 3.05263352394104,
      "learning_rate": 9.439323155182752e-06,
      "loss": 0.0589,
      "step": 190180
    },
    {
      "epoch": 0.31126647159325227,
      "grad_norm": 2.6364879608154297,
      "learning_rate": 9.439257262969236e-06,
      "loss": 0.0518,
      "step": 190200
    },
    {
      "epoch": 0.31129920203190564,
      "grad_norm": 3.3063437938690186,
      "learning_rate": 9.439191370755718e-06,
      "loss": 0.0575,
      "step": 190220
    },
    {
      "epoch": 0.31133193247055896,
      "grad_norm": 2.494788646697998,
      "learning_rate": 9.439125478542201e-06,
      "loss": 0.051,
      "step": 190240
    },
    {
      "epoch": 0.31136466290921233,
      "grad_norm": 0.6880640387535095,
      "learning_rate": 9.439059586328683e-06,
      "loss": 0.0507,
      "step": 190260
    },
    {
      "epoch": 0.31139739334786565,
      "grad_norm": 2.7995376586914062,
      "learning_rate": 9.438993694115167e-06,
      "loss": 0.0546,
      "step": 190280
    },
    {
      "epoch": 0.31143012378651896,
      "grad_norm": 1.7394472360610962,
      "learning_rate": 9.43892780190165e-06,
      "loss": 0.0368,
      "step": 190300
    },
    {
      "epoch": 0.31146285422517234,
      "grad_norm": 1.8068467378616333,
      "learning_rate": 9.438861909688132e-06,
      "loss": 0.0519,
      "step": 190320
    },
    {
      "epoch": 0.31149558466382565,
      "grad_norm": 2.5215060710906982,
      "learning_rate": 9.438796017474616e-06,
      "loss": 0.0616,
      "step": 190340
    },
    {
      "epoch": 0.311528315102479,
      "grad_norm": 2.4468719959259033,
      "learning_rate": 9.438730125261098e-06,
      "loss": 0.0535,
      "step": 190360
    },
    {
      "epoch": 0.31156104554113234,
      "grad_norm": 0.6860129833221436,
      "learning_rate": 9.438664233047581e-06,
      "loss": 0.0481,
      "step": 190380
    },
    {
      "epoch": 0.31159377597978566,
      "grad_norm": 1.0764834880828857,
      "learning_rate": 9.438598340834065e-06,
      "loss": 0.0652,
      "step": 190400
    },
    {
      "epoch": 0.31162650641843903,
      "grad_norm": 2.3646445274353027,
      "learning_rate": 9.438532448620547e-06,
      "loss": 0.0528,
      "step": 190420
    },
    {
      "epoch": 0.31165923685709235,
      "grad_norm": 2.9988794326782227,
      "learning_rate": 9.43846655640703e-06,
      "loss": 0.0601,
      "step": 190440
    },
    {
      "epoch": 0.3116919672957457,
      "grad_norm": 1.5025501251220703,
      "learning_rate": 9.438400664193514e-06,
      "loss": 0.0502,
      "step": 190460
    },
    {
      "epoch": 0.31172469773439904,
      "grad_norm": 0.4284532964229584,
      "learning_rate": 9.438334771979996e-06,
      "loss": 0.0485,
      "step": 190480
    },
    {
      "epoch": 0.31175742817305235,
      "grad_norm": 2.05222487449646,
      "learning_rate": 9.43826887976648e-06,
      "loss": 0.0696,
      "step": 190500
    },
    {
      "epoch": 0.3117901586117057,
      "grad_norm": 2.050499677658081,
      "learning_rate": 9.438202987552961e-06,
      "loss": 0.0456,
      "step": 190520
    },
    {
      "epoch": 0.31182288905035904,
      "grad_norm": 1.5859485864639282,
      "learning_rate": 9.438137095339445e-06,
      "loss": 0.0667,
      "step": 190540
    },
    {
      "epoch": 0.3118556194890124,
      "grad_norm": 0.2557067573070526,
      "learning_rate": 9.438071203125927e-06,
      "loss": 0.0487,
      "step": 190560
    },
    {
      "epoch": 0.31188834992766573,
      "grad_norm": 1.1284308433532715,
      "learning_rate": 9.43800531091241e-06,
      "loss": 0.07,
      "step": 190580
    },
    {
      "epoch": 0.31192108036631905,
      "grad_norm": 2.030968189239502,
      "learning_rate": 9.437939418698892e-06,
      "loss": 0.0509,
      "step": 190600
    },
    {
      "epoch": 0.3119538108049724,
      "grad_norm": 1.3501461744308472,
      "learning_rate": 9.437873526485376e-06,
      "loss": 0.051,
      "step": 190620
    },
    {
      "epoch": 0.31198654124362574,
      "grad_norm": 0.768211305141449,
      "learning_rate": 9.43780763427186e-06,
      "loss": 0.0589,
      "step": 190640
    },
    {
      "epoch": 0.3120192716822791,
      "grad_norm": 1.0023634433746338,
      "learning_rate": 9.437741742058341e-06,
      "loss": 0.0417,
      "step": 190660
    },
    {
      "epoch": 0.3120520021209324,
      "grad_norm": 1.5622073411941528,
      "learning_rate": 9.437675849844825e-06,
      "loss": 0.0656,
      "step": 190680
    },
    {
      "epoch": 0.31208473255958574,
      "grad_norm": 0.35669195652008057,
      "learning_rate": 9.437609957631307e-06,
      "loss": 0.0496,
      "step": 190700
    },
    {
      "epoch": 0.3121174629982391,
      "grad_norm": 0.9138898253440857,
      "learning_rate": 9.43754406541779e-06,
      "loss": 0.0542,
      "step": 190720
    },
    {
      "epoch": 0.31215019343689243,
      "grad_norm": 1.8363081216812134,
      "learning_rate": 9.437478173204272e-06,
      "loss": 0.0492,
      "step": 190740
    },
    {
      "epoch": 0.3121829238755458,
      "grad_norm": 4.893155574798584,
      "learning_rate": 9.437412280990756e-06,
      "loss": 0.0536,
      "step": 190760
    },
    {
      "epoch": 0.3122156543141991,
      "grad_norm": 2.1202385425567627,
      "learning_rate": 9.43734638877724e-06,
      "loss": 0.0469,
      "step": 190780
    },
    {
      "epoch": 0.31224838475285244,
      "grad_norm": 1.496254324913025,
      "learning_rate": 9.437280496563721e-06,
      "loss": 0.0669,
      "step": 190800
    },
    {
      "epoch": 0.3122811151915058,
      "grad_norm": 2.5893571376800537,
      "learning_rate": 9.437214604350205e-06,
      "loss": 0.0479,
      "step": 190820
    },
    {
      "epoch": 0.3123138456301591,
      "grad_norm": 1.6445682048797607,
      "learning_rate": 9.437148712136689e-06,
      "loss": 0.058,
      "step": 190840
    },
    {
      "epoch": 0.3123465760688125,
      "grad_norm": 1.7939540147781372,
      "learning_rate": 9.43708281992317e-06,
      "loss": 0.052,
      "step": 190860
    },
    {
      "epoch": 0.3123793065074658,
      "grad_norm": 2.4045798778533936,
      "learning_rate": 9.437016927709654e-06,
      "loss": 0.0574,
      "step": 190880
    },
    {
      "epoch": 0.31241203694611913,
      "grad_norm": 1.9095581769943237,
      "learning_rate": 9.436951035496136e-06,
      "loss": 0.062,
      "step": 190900
    },
    {
      "epoch": 0.3124447673847725,
      "grad_norm": 4.074580669403076,
      "learning_rate": 9.43688514328262e-06,
      "loss": 0.057,
      "step": 190920
    },
    {
      "epoch": 0.3124774978234258,
      "grad_norm": 1.2135272026062012,
      "learning_rate": 9.436819251069101e-06,
      "loss": 0.0506,
      "step": 190940
    },
    {
      "epoch": 0.3125102282620792,
      "grad_norm": 7.784948825836182,
      "learning_rate": 9.436753358855585e-06,
      "loss": 0.0589,
      "step": 190960
    },
    {
      "epoch": 0.3125429587007325,
      "grad_norm": 2.681845188140869,
      "learning_rate": 9.436687466642067e-06,
      "loss": 0.0673,
      "step": 190980
    },
    {
      "epoch": 0.3125756891393858,
      "grad_norm": 2.7796077728271484,
      "learning_rate": 9.43662157442855e-06,
      "loss": 0.0576,
      "step": 191000
    },
    {
      "epoch": 0.3126084195780392,
      "grad_norm": 2.1240532398223877,
      "learning_rate": 9.436555682215034e-06,
      "loss": 0.0586,
      "step": 191020
    },
    {
      "epoch": 0.3126411500166925,
      "grad_norm": 0.585669219493866,
      "learning_rate": 9.436489790001516e-06,
      "loss": 0.0472,
      "step": 191040
    },
    {
      "epoch": 0.3126738804553459,
      "grad_norm": 1.7555309534072876,
      "learning_rate": 9.436423897788e-06,
      "loss": 0.0502,
      "step": 191060
    },
    {
      "epoch": 0.3127066108939992,
      "grad_norm": 1.3406141996383667,
      "learning_rate": 9.436358005574481e-06,
      "loss": 0.0617,
      "step": 191080
    },
    {
      "epoch": 0.3127393413326525,
      "grad_norm": 2.9010767936706543,
      "learning_rate": 9.436292113360965e-06,
      "loss": 0.0495,
      "step": 191100
    },
    {
      "epoch": 0.3127720717713059,
      "grad_norm": 1.9687721729278564,
      "learning_rate": 9.436226221147447e-06,
      "loss": 0.0535,
      "step": 191120
    },
    {
      "epoch": 0.3128048022099592,
      "grad_norm": 1.7206934690475464,
      "learning_rate": 9.43616032893393e-06,
      "loss": 0.05,
      "step": 191140
    },
    {
      "epoch": 0.3128375326486126,
      "grad_norm": 1.7181751728057861,
      "learning_rate": 9.436094436720412e-06,
      "loss": 0.0474,
      "step": 191160
    },
    {
      "epoch": 0.3128702630872659,
      "grad_norm": 10.942112922668457,
      "learning_rate": 9.436028544506896e-06,
      "loss": 0.041,
      "step": 191180
    },
    {
      "epoch": 0.3129029935259192,
      "grad_norm": 0.35956206917762756,
      "learning_rate": 9.43596265229338e-06,
      "loss": 0.0484,
      "step": 191200
    },
    {
      "epoch": 0.3129357239645726,
      "grad_norm": 1.139689564704895,
      "learning_rate": 9.435896760079861e-06,
      "loss": 0.0547,
      "step": 191220
    },
    {
      "epoch": 0.3129684544032259,
      "grad_norm": 2.0763885974884033,
      "learning_rate": 9.435830867866345e-06,
      "loss": 0.0372,
      "step": 191240
    },
    {
      "epoch": 0.3130011848418793,
      "grad_norm": 1.2745782136917114,
      "learning_rate": 9.435764975652829e-06,
      "loss": 0.0653,
      "step": 191260
    },
    {
      "epoch": 0.3130339152805326,
      "grad_norm": 9.155213356018066,
      "learning_rate": 9.43569908343931e-06,
      "loss": 0.052,
      "step": 191280
    },
    {
      "epoch": 0.3130666457191859,
      "grad_norm": 13.852677345275879,
      "learning_rate": 9.435633191225794e-06,
      "loss": 0.0647,
      "step": 191300
    },
    {
      "epoch": 0.3130993761578393,
      "grad_norm": 2.4251468181610107,
      "learning_rate": 9.435567299012276e-06,
      "loss": 0.0555,
      "step": 191320
    },
    {
      "epoch": 0.3131321065964926,
      "grad_norm": 0.3443757891654968,
      "learning_rate": 9.43550140679876e-06,
      "loss": 0.0328,
      "step": 191340
    },
    {
      "epoch": 0.31316483703514597,
      "grad_norm": 1.0083633661270142,
      "learning_rate": 9.435435514585243e-06,
      "loss": 0.0572,
      "step": 191360
    },
    {
      "epoch": 0.3131975674737993,
      "grad_norm": 1.2118412256240845,
      "learning_rate": 9.435369622371725e-06,
      "loss": 0.0461,
      "step": 191380
    },
    {
      "epoch": 0.3132302979124526,
      "grad_norm": 0.6988157033920288,
      "learning_rate": 9.435303730158209e-06,
      "loss": 0.0433,
      "step": 191400
    },
    {
      "epoch": 0.313263028351106,
      "grad_norm": 2.208526372909546,
      "learning_rate": 9.43523783794469e-06,
      "loss": 0.0643,
      "step": 191420
    },
    {
      "epoch": 0.3132957587897593,
      "grad_norm": 1.2747749090194702,
      "learning_rate": 9.435171945731174e-06,
      "loss": 0.0435,
      "step": 191440
    },
    {
      "epoch": 0.31332848922841267,
      "grad_norm": 3.1754536628723145,
      "learning_rate": 9.435106053517656e-06,
      "loss": 0.0486,
      "step": 191460
    },
    {
      "epoch": 0.313361219667066,
      "grad_norm": 4.512558937072754,
      "learning_rate": 9.43504016130414e-06,
      "loss": 0.0529,
      "step": 191480
    },
    {
      "epoch": 0.3133939501057193,
      "grad_norm": 1.802713394165039,
      "learning_rate": 9.434974269090622e-06,
      "loss": 0.0417,
      "step": 191500
    },
    {
      "epoch": 0.31342668054437267,
      "grad_norm": 1.033033013343811,
      "learning_rate": 9.434908376877105e-06,
      "loss": 0.0639,
      "step": 191520
    },
    {
      "epoch": 0.313459410983026,
      "grad_norm": 0.8324115872383118,
      "learning_rate": 9.434842484663587e-06,
      "loss": 0.0515,
      "step": 191540
    },
    {
      "epoch": 0.31349214142167936,
      "grad_norm": 2.03898024559021,
      "learning_rate": 9.43477659245007e-06,
      "loss": 0.0618,
      "step": 191560
    },
    {
      "epoch": 0.3135248718603327,
      "grad_norm": 0.5151153206825256,
      "learning_rate": 9.434710700236554e-06,
      "loss": 0.0539,
      "step": 191580
    },
    {
      "epoch": 0.313557602298986,
      "grad_norm": 2.9210703372955322,
      "learning_rate": 9.434644808023036e-06,
      "loss": 0.0484,
      "step": 191600
    },
    {
      "epoch": 0.31359033273763937,
      "grad_norm": 5.007814884185791,
      "learning_rate": 9.43457891580952e-06,
      "loss": 0.0424,
      "step": 191620
    },
    {
      "epoch": 0.3136230631762927,
      "grad_norm": 0.8922635316848755,
      "learning_rate": 9.434513023596003e-06,
      "loss": 0.0463,
      "step": 191640
    },
    {
      "epoch": 0.31365579361494605,
      "grad_norm": 3.618191719055176,
      "learning_rate": 9.434447131382485e-06,
      "loss": 0.0735,
      "step": 191660
    },
    {
      "epoch": 0.31368852405359937,
      "grad_norm": 2.643240451812744,
      "learning_rate": 9.434381239168969e-06,
      "loss": 0.0561,
      "step": 191680
    },
    {
      "epoch": 0.3137212544922527,
      "grad_norm": 1.381603717803955,
      "learning_rate": 9.434315346955452e-06,
      "loss": 0.0394,
      "step": 191700
    },
    {
      "epoch": 0.31375398493090606,
      "grad_norm": 2.5655970573425293,
      "learning_rate": 9.434249454741934e-06,
      "loss": 0.0645,
      "step": 191720
    },
    {
      "epoch": 0.3137867153695594,
      "grad_norm": 1.1107414960861206,
      "learning_rate": 9.434183562528418e-06,
      "loss": 0.0531,
      "step": 191740
    },
    {
      "epoch": 0.31381944580821275,
      "grad_norm": 1.690897822380066,
      "learning_rate": 9.4341176703149e-06,
      "loss": 0.0604,
      "step": 191760
    },
    {
      "epoch": 0.31385217624686607,
      "grad_norm": 1.220062017440796,
      "learning_rate": 9.434051778101383e-06,
      "loss": 0.0514,
      "step": 191780
    },
    {
      "epoch": 0.3138849066855194,
      "grad_norm": 2.5837509632110596,
      "learning_rate": 9.433985885887865e-06,
      "loss": 0.046,
      "step": 191800
    },
    {
      "epoch": 0.31391763712417275,
      "grad_norm": 1.0935293436050415,
      "learning_rate": 9.433919993674349e-06,
      "loss": 0.0495,
      "step": 191820
    },
    {
      "epoch": 0.31395036756282607,
      "grad_norm": 1.7281808853149414,
      "learning_rate": 9.43385410146083e-06,
      "loss": 0.0485,
      "step": 191840
    },
    {
      "epoch": 0.3139830980014794,
      "grad_norm": 0.6126060485839844,
      "learning_rate": 9.433788209247314e-06,
      "loss": 0.0512,
      "step": 191860
    },
    {
      "epoch": 0.31401582844013276,
      "grad_norm": 2.096217155456543,
      "learning_rate": 9.433722317033796e-06,
      "loss": 0.0464,
      "step": 191880
    },
    {
      "epoch": 0.3140485588787861,
      "grad_norm": 5.9593400955200195,
      "learning_rate": 9.43365642482028e-06,
      "loss": 0.0556,
      "step": 191900
    },
    {
      "epoch": 0.31408128931743945,
      "grad_norm": 2.0034332275390625,
      "learning_rate": 9.433590532606762e-06,
      "loss": 0.0495,
      "step": 191920
    },
    {
      "epoch": 0.31411401975609277,
      "grad_norm": 2.2528347969055176,
      "learning_rate": 9.433524640393245e-06,
      "loss": 0.0461,
      "step": 191940
    },
    {
      "epoch": 0.3141467501947461,
      "grad_norm": 1.8715530633926392,
      "learning_rate": 9.433458748179729e-06,
      "loss": 0.0592,
      "step": 191960
    },
    {
      "epoch": 0.31417948063339945,
      "grad_norm": 4.810059547424316,
      "learning_rate": 9.43339285596621e-06,
      "loss": 0.0307,
      "step": 191980
    },
    {
      "epoch": 0.31421221107205277,
      "grad_norm": 1.167860746383667,
      "learning_rate": 9.433326963752694e-06,
      "loss": 0.0358,
      "step": 192000
    },
    {
      "epoch": 0.31424494151070614,
      "grad_norm": 2.437253713607788,
      "learning_rate": 9.433261071539178e-06,
      "loss": 0.0636,
      "step": 192020
    },
    {
      "epoch": 0.31427767194935946,
      "grad_norm": 1.907050609588623,
      "learning_rate": 9.43319517932566e-06,
      "loss": 0.0595,
      "step": 192040
    },
    {
      "epoch": 0.3143104023880128,
      "grad_norm": 1.9532593488693237,
      "learning_rate": 9.433129287112143e-06,
      "loss": 0.043,
      "step": 192060
    },
    {
      "epoch": 0.31434313282666615,
      "grad_norm": 1.7691404819488525,
      "learning_rate": 9.433063394898627e-06,
      "loss": 0.0497,
      "step": 192080
    },
    {
      "epoch": 0.31437586326531947,
      "grad_norm": 1.2578612565994263,
      "learning_rate": 9.432997502685109e-06,
      "loss": 0.0491,
      "step": 192100
    },
    {
      "epoch": 0.31440859370397284,
      "grad_norm": 0.9743456840515137,
      "learning_rate": 9.432931610471592e-06,
      "loss": 0.0431,
      "step": 192120
    },
    {
      "epoch": 0.31444132414262616,
      "grad_norm": 1.4811760187149048,
      "learning_rate": 9.432865718258074e-06,
      "loss": 0.0398,
      "step": 192140
    },
    {
      "epoch": 0.31447405458127947,
      "grad_norm": 9.76873779296875,
      "learning_rate": 9.432799826044558e-06,
      "loss": 0.0637,
      "step": 192160
    },
    {
      "epoch": 0.31450678501993284,
      "grad_norm": 1.9838560819625854,
      "learning_rate": 9.43273393383104e-06,
      "loss": 0.0438,
      "step": 192180
    },
    {
      "epoch": 0.31453951545858616,
      "grad_norm": 1.8417412042617798,
      "learning_rate": 9.432668041617523e-06,
      "loss": 0.0641,
      "step": 192200
    },
    {
      "epoch": 0.31457224589723953,
      "grad_norm": 0.2043628841638565,
      "learning_rate": 9.432602149404005e-06,
      "loss": 0.0454,
      "step": 192220
    },
    {
      "epoch": 0.31460497633589285,
      "grad_norm": 1.3016332387924194,
      "learning_rate": 9.432536257190489e-06,
      "loss": 0.0605,
      "step": 192240
    },
    {
      "epoch": 0.31463770677454617,
      "grad_norm": 1.5993624925613403,
      "learning_rate": 9.43247036497697e-06,
      "loss": 0.0588,
      "step": 192260
    },
    {
      "epoch": 0.31467043721319954,
      "grad_norm": 1.4093629121780396,
      "learning_rate": 9.432404472763454e-06,
      "loss": 0.0398,
      "step": 192280
    },
    {
      "epoch": 0.31470316765185286,
      "grad_norm": 2.252288341522217,
      "learning_rate": 9.432338580549936e-06,
      "loss": 0.0592,
      "step": 192300
    },
    {
      "epoch": 0.3147358980905062,
      "grad_norm": 3.022590398788452,
      "learning_rate": 9.43227268833642e-06,
      "loss": 0.0655,
      "step": 192320
    },
    {
      "epoch": 0.31476862852915954,
      "grad_norm": 0.8643695712089539,
      "learning_rate": 9.432206796122902e-06,
      "loss": 0.0388,
      "step": 192340
    },
    {
      "epoch": 0.31480135896781286,
      "grad_norm": 4.479755878448486,
      "learning_rate": 9.432140903909385e-06,
      "loss": 0.0605,
      "step": 192360
    },
    {
      "epoch": 0.31483408940646623,
      "grad_norm": 2.045215606689453,
      "learning_rate": 9.432075011695869e-06,
      "loss": 0.0584,
      "step": 192380
    },
    {
      "epoch": 0.31486681984511955,
      "grad_norm": 3.6543471813201904,
      "learning_rate": 9.43200911948235e-06,
      "loss": 0.05,
      "step": 192400
    },
    {
      "epoch": 0.3148995502837729,
      "grad_norm": 3.3109142780303955,
      "learning_rate": 9.431943227268834e-06,
      "loss": 0.0566,
      "step": 192420
    },
    {
      "epoch": 0.31493228072242624,
      "grad_norm": 1.0786097049713135,
      "learning_rate": 9.431877335055318e-06,
      "loss": 0.0337,
      "step": 192440
    },
    {
      "epoch": 0.31496501116107956,
      "grad_norm": 1.3577672243118286,
      "learning_rate": 9.4318114428418e-06,
      "loss": 0.0475,
      "step": 192460
    },
    {
      "epoch": 0.3149977415997329,
      "grad_norm": 0.49237900972366333,
      "learning_rate": 9.431745550628283e-06,
      "loss": 0.0477,
      "step": 192480
    },
    {
      "epoch": 0.31503047203838624,
      "grad_norm": 1.6636106967926025,
      "learning_rate": 9.431679658414767e-06,
      "loss": 0.0671,
      "step": 192500
    },
    {
      "epoch": 0.3150632024770396,
      "grad_norm": 1.4232025146484375,
      "learning_rate": 9.431613766201249e-06,
      "loss": 0.0543,
      "step": 192520
    },
    {
      "epoch": 0.31509593291569293,
      "grad_norm": 1.548925757408142,
      "learning_rate": 9.431547873987732e-06,
      "loss": 0.0501,
      "step": 192540
    },
    {
      "epoch": 0.31512866335434625,
      "grad_norm": 1.3780224323272705,
      "learning_rate": 9.431481981774214e-06,
      "loss": 0.051,
      "step": 192560
    },
    {
      "epoch": 0.3151613937929996,
      "grad_norm": 0.2214965969324112,
      "learning_rate": 9.431416089560698e-06,
      "loss": 0.0448,
      "step": 192580
    },
    {
      "epoch": 0.31519412423165294,
      "grad_norm": 1.2798964977264404,
      "learning_rate": 9.43135019734718e-06,
      "loss": 0.0508,
      "step": 192600
    },
    {
      "epoch": 0.3152268546703063,
      "grad_norm": 1.4086246490478516,
      "learning_rate": 9.431284305133663e-06,
      "loss": 0.0496,
      "step": 192620
    },
    {
      "epoch": 0.31525958510895963,
      "grad_norm": 3.4110264778137207,
      "learning_rate": 9.431218412920145e-06,
      "loss": 0.0457,
      "step": 192640
    },
    {
      "epoch": 0.31529231554761294,
      "grad_norm": 1.6919769048690796,
      "learning_rate": 9.431152520706629e-06,
      "loss": 0.0594,
      "step": 192660
    },
    {
      "epoch": 0.3153250459862663,
      "grad_norm": 0.711456298828125,
      "learning_rate": 9.43108662849311e-06,
      "loss": 0.0444,
      "step": 192680
    },
    {
      "epoch": 0.31535777642491963,
      "grad_norm": 3.751718044281006,
      "learning_rate": 9.431020736279594e-06,
      "loss": 0.057,
      "step": 192700
    },
    {
      "epoch": 0.315390506863573,
      "grad_norm": 3.2742862701416016,
      "learning_rate": 9.430954844066076e-06,
      "loss": 0.0483,
      "step": 192720
    },
    {
      "epoch": 0.3154232373022263,
      "grad_norm": 1.7093284130096436,
      "learning_rate": 9.43088895185256e-06,
      "loss": 0.0421,
      "step": 192740
    },
    {
      "epoch": 0.31545596774087964,
      "grad_norm": 2.903709650039673,
      "learning_rate": 9.430823059639043e-06,
      "loss": 0.0428,
      "step": 192760
    },
    {
      "epoch": 0.315488698179533,
      "grad_norm": 2.695058822631836,
      "learning_rate": 9.430757167425525e-06,
      "loss": 0.0455,
      "step": 192780
    },
    {
      "epoch": 0.31552142861818633,
      "grad_norm": 2.1683270931243896,
      "learning_rate": 9.430691275212009e-06,
      "loss": 0.0679,
      "step": 192800
    },
    {
      "epoch": 0.3155541590568397,
      "grad_norm": 2.1018013954162598,
      "learning_rate": 9.430625382998493e-06,
      "loss": 0.0482,
      "step": 192820
    },
    {
      "epoch": 0.315586889495493,
      "grad_norm": 2.826474189758301,
      "learning_rate": 9.430559490784974e-06,
      "loss": 0.0444,
      "step": 192840
    },
    {
      "epoch": 0.31561961993414633,
      "grad_norm": 1.3107470273971558,
      "learning_rate": 9.430493598571458e-06,
      "loss": 0.0534,
      "step": 192860
    },
    {
      "epoch": 0.3156523503727997,
      "grad_norm": 1.0092837810516357,
      "learning_rate": 9.430427706357942e-06,
      "loss": 0.0579,
      "step": 192880
    },
    {
      "epoch": 0.315685080811453,
      "grad_norm": 1.4313710927963257,
      "learning_rate": 9.430361814144423e-06,
      "loss": 0.0532,
      "step": 192900
    },
    {
      "epoch": 0.3157178112501064,
      "grad_norm": 0.43062901496887207,
      "learning_rate": 9.430295921930907e-06,
      "loss": 0.0446,
      "step": 192920
    },
    {
      "epoch": 0.3157505416887597,
      "grad_norm": 1.4159890413284302,
      "learning_rate": 9.430230029717389e-06,
      "loss": 0.0545,
      "step": 192940
    },
    {
      "epoch": 0.31578327212741303,
      "grad_norm": 2.3552327156066895,
      "learning_rate": 9.430164137503873e-06,
      "loss": 0.0446,
      "step": 192960
    },
    {
      "epoch": 0.3158160025660664,
      "grad_norm": 1.6735334396362305,
      "learning_rate": 9.430098245290354e-06,
      "loss": 0.0631,
      "step": 192980
    },
    {
      "epoch": 0.3158487330047197,
      "grad_norm": 0.43487638235092163,
      "learning_rate": 9.430032353076838e-06,
      "loss": 0.0507,
      "step": 193000
    },
    {
      "epoch": 0.3158814634433731,
      "grad_norm": 0.6847342848777771,
      "learning_rate": 9.42996646086332e-06,
      "loss": 0.0466,
      "step": 193020
    },
    {
      "epoch": 0.3159141938820264,
      "grad_norm": 1.456600546836853,
      "learning_rate": 9.429900568649804e-06,
      "loss": 0.0527,
      "step": 193040
    },
    {
      "epoch": 0.3159469243206797,
      "grad_norm": 1.6791388988494873,
      "learning_rate": 9.429834676436285e-06,
      "loss": 0.0704,
      "step": 193060
    },
    {
      "epoch": 0.3159796547593331,
      "grad_norm": 1.1231502294540405,
      "learning_rate": 9.429768784222769e-06,
      "loss": 0.0425,
      "step": 193080
    },
    {
      "epoch": 0.3160123851979864,
      "grad_norm": 7.211317539215088,
      "learning_rate": 9.429702892009251e-06,
      "loss": 0.0697,
      "step": 193100
    },
    {
      "epoch": 0.3160451156366398,
      "grad_norm": 2.783841848373413,
      "learning_rate": 9.429636999795734e-06,
      "loss": 0.0599,
      "step": 193120
    },
    {
      "epoch": 0.3160778460752931,
      "grad_norm": 1.6137052774429321,
      "learning_rate": 9.429571107582218e-06,
      "loss": 0.0673,
      "step": 193140
    },
    {
      "epoch": 0.3161105765139464,
      "grad_norm": 2.71748685836792,
      "learning_rate": 9.4295052153687e-06,
      "loss": 0.0425,
      "step": 193160
    },
    {
      "epoch": 0.3161433069525998,
      "grad_norm": 0.9641721248626709,
      "learning_rate": 9.429439323155184e-06,
      "loss": 0.077,
      "step": 193180
    },
    {
      "epoch": 0.3161760373912531,
      "grad_norm": 1.6533012390136719,
      "learning_rate": 9.429373430941665e-06,
      "loss": 0.057,
      "step": 193200
    },
    {
      "epoch": 0.3162087678299065,
      "grad_norm": 0.6385592222213745,
      "learning_rate": 9.429307538728149e-06,
      "loss": 0.0608,
      "step": 193220
    },
    {
      "epoch": 0.3162414982685598,
      "grad_norm": 1.9226685762405396,
      "learning_rate": 9.429241646514633e-06,
      "loss": 0.045,
      "step": 193240
    },
    {
      "epoch": 0.3162742287072131,
      "grad_norm": 1.7008306980133057,
      "learning_rate": 9.429175754301114e-06,
      "loss": 0.0601,
      "step": 193260
    },
    {
      "epoch": 0.3163069591458665,
      "grad_norm": 1.9280173778533936,
      "learning_rate": 9.429109862087598e-06,
      "loss": 0.053,
      "step": 193280
    },
    {
      "epoch": 0.3163396895845198,
      "grad_norm": 1.667881727218628,
      "learning_rate": 9.429043969874082e-06,
      "loss": 0.0483,
      "step": 193300
    },
    {
      "epoch": 0.3163724200231732,
      "grad_norm": 0.8733946681022644,
      "learning_rate": 9.428978077660564e-06,
      "loss": 0.048,
      "step": 193320
    },
    {
      "epoch": 0.3164051504618265,
      "grad_norm": 2.5267980098724365,
      "learning_rate": 9.428912185447047e-06,
      "loss": 0.0524,
      "step": 193340
    },
    {
      "epoch": 0.3164378809004798,
      "grad_norm": 1.8965599536895752,
      "learning_rate": 9.428846293233529e-06,
      "loss": 0.0449,
      "step": 193360
    },
    {
      "epoch": 0.3164706113391332,
      "grad_norm": 4.195573329925537,
      "learning_rate": 9.428780401020013e-06,
      "loss": 0.0487,
      "step": 193380
    },
    {
      "epoch": 0.3165033417777865,
      "grad_norm": 4.971278190612793,
      "learning_rate": 9.428714508806495e-06,
      "loss": 0.0415,
      "step": 193400
    },
    {
      "epoch": 0.31653607221643987,
      "grad_norm": 0.8956568837165833,
      "learning_rate": 9.428648616592978e-06,
      "loss": 0.0395,
      "step": 193420
    },
    {
      "epoch": 0.3165688026550932,
      "grad_norm": 0.9068835973739624,
      "learning_rate": 9.42858272437946e-06,
      "loss": 0.0575,
      "step": 193440
    },
    {
      "epoch": 0.3166015330937465,
      "grad_norm": 1.1338361501693726,
      "learning_rate": 9.428516832165944e-06,
      "loss": 0.0388,
      "step": 193460
    },
    {
      "epoch": 0.3166342635323999,
      "grad_norm": 0.8729643225669861,
      "learning_rate": 9.428450939952427e-06,
      "loss": 0.0687,
      "step": 193480
    },
    {
      "epoch": 0.3166669939710532,
      "grad_norm": 1.8302993774414062,
      "learning_rate": 9.428385047738909e-06,
      "loss": 0.0452,
      "step": 193500
    },
    {
      "epoch": 0.31669972440970656,
      "grad_norm": 3.289323329925537,
      "learning_rate": 9.428319155525393e-06,
      "loss": 0.0462,
      "step": 193520
    },
    {
      "epoch": 0.3167324548483599,
      "grad_norm": 1.1776468753814697,
      "learning_rate": 9.428253263311875e-06,
      "loss": 0.0501,
      "step": 193540
    },
    {
      "epoch": 0.3167651852870132,
      "grad_norm": 2.275090217590332,
      "learning_rate": 9.428187371098358e-06,
      "loss": 0.0657,
      "step": 193560
    },
    {
      "epoch": 0.31679791572566657,
      "grad_norm": 0.9862239956855774,
      "learning_rate": 9.42812147888484e-06,
      "loss": 0.0517,
      "step": 193580
    },
    {
      "epoch": 0.3168306461643199,
      "grad_norm": 0.9363088011741638,
      "learning_rate": 9.428055586671324e-06,
      "loss": 0.051,
      "step": 193600
    },
    {
      "epoch": 0.31686337660297326,
      "grad_norm": 0.46097609400749207,
      "learning_rate": 9.427989694457807e-06,
      "loss": 0.0387,
      "step": 193620
    },
    {
      "epoch": 0.3168961070416266,
      "grad_norm": 0.7342910170555115,
      "learning_rate": 9.427923802244289e-06,
      "loss": 0.0455,
      "step": 193640
    },
    {
      "epoch": 0.3169288374802799,
      "grad_norm": 2.2690958976745605,
      "learning_rate": 9.427857910030773e-06,
      "loss": 0.0671,
      "step": 193660
    },
    {
      "epoch": 0.31696156791893326,
      "grad_norm": 2.449619770050049,
      "learning_rate": 9.427792017817256e-06,
      "loss": 0.0574,
      "step": 193680
    },
    {
      "epoch": 0.3169942983575866,
      "grad_norm": 2.1016597747802734,
      "learning_rate": 9.427726125603738e-06,
      "loss": 0.0457,
      "step": 193700
    },
    {
      "epoch": 0.31702702879623995,
      "grad_norm": 1.53525972366333,
      "learning_rate": 9.427660233390222e-06,
      "loss": 0.0414,
      "step": 193720
    },
    {
      "epoch": 0.31705975923489327,
      "grad_norm": 2.814681053161621,
      "learning_rate": 9.427594341176704e-06,
      "loss": 0.0515,
      "step": 193740
    },
    {
      "epoch": 0.3170924896735466,
      "grad_norm": 3.8781206607818604,
      "learning_rate": 9.427528448963187e-06,
      "loss": 0.058,
      "step": 193760
    },
    {
      "epoch": 0.31712522011219996,
      "grad_norm": 2.2501614093780518,
      "learning_rate": 9.427462556749669e-06,
      "loss": 0.0445,
      "step": 193780
    },
    {
      "epoch": 0.3171579505508533,
      "grad_norm": 2.036085844039917,
      "learning_rate": 9.427396664536153e-06,
      "loss": 0.053,
      "step": 193800
    },
    {
      "epoch": 0.31719068098950665,
      "grad_norm": 0.6438189744949341,
      "learning_rate": 9.427330772322636e-06,
      "loss": 0.053,
      "step": 193820
    },
    {
      "epoch": 0.31722341142815996,
      "grad_norm": 0.9681190848350525,
      "learning_rate": 9.427264880109118e-06,
      "loss": 0.0419,
      "step": 193840
    },
    {
      "epoch": 0.3172561418668133,
      "grad_norm": 2.661839485168457,
      "learning_rate": 9.427198987895602e-06,
      "loss": 0.0669,
      "step": 193860
    },
    {
      "epoch": 0.31728887230546665,
      "grad_norm": 1.8217219114303589,
      "learning_rate": 9.427133095682084e-06,
      "loss": 0.0599,
      "step": 193880
    },
    {
      "epoch": 0.31732160274411997,
      "grad_norm": 2.631721258163452,
      "learning_rate": 9.427067203468567e-06,
      "loss": 0.0515,
      "step": 193900
    },
    {
      "epoch": 0.31735433318277334,
      "grad_norm": 0.34674781560897827,
      "learning_rate": 9.427001311255049e-06,
      "loss": 0.0528,
      "step": 193920
    },
    {
      "epoch": 0.31738706362142666,
      "grad_norm": 2.2775473594665527,
      "learning_rate": 9.426935419041533e-06,
      "loss": 0.0493,
      "step": 193940
    },
    {
      "epoch": 0.31741979406008,
      "grad_norm": 1.075187087059021,
      "learning_rate": 9.426869526828015e-06,
      "loss": 0.0634,
      "step": 193960
    },
    {
      "epoch": 0.31745252449873335,
      "grad_norm": 5.963957786560059,
      "learning_rate": 9.426803634614498e-06,
      "loss": 0.044,
      "step": 193980
    },
    {
      "epoch": 0.31748525493738666,
      "grad_norm": 2.273953437805176,
      "learning_rate": 9.42673774240098e-06,
      "loss": 0.0428,
      "step": 194000
    },
    {
      "epoch": 0.31751798537604004,
      "grad_norm": 1.849634051322937,
      "learning_rate": 9.426671850187464e-06,
      "loss": 0.0432,
      "step": 194020
    },
    {
      "epoch": 0.31755071581469335,
      "grad_norm": 8.160370826721191,
      "learning_rate": 9.426605957973947e-06,
      "loss": 0.062,
      "step": 194040
    },
    {
      "epoch": 0.31758344625334667,
      "grad_norm": 0.1883736103773117,
      "learning_rate": 9.42654006576043e-06,
      "loss": 0.0616,
      "step": 194060
    },
    {
      "epoch": 0.31761617669200004,
      "grad_norm": 1.4279332160949707,
      "learning_rate": 9.426474173546913e-06,
      "loss": 0.0567,
      "step": 194080
    },
    {
      "epoch": 0.31764890713065336,
      "grad_norm": 0.9289644956588745,
      "learning_rate": 9.426408281333396e-06,
      "loss": 0.0596,
      "step": 194100
    },
    {
      "epoch": 0.31768163756930673,
      "grad_norm": 0.6524251103401184,
      "learning_rate": 9.426342389119878e-06,
      "loss": 0.0507,
      "step": 194120
    },
    {
      "epoch": 0.31771436800796005,
      "grad_norm": 1.1284900903701782,
      "learning_rate": 9.426276496906362e-06,
      "loss": 0.0719,
      "step": 194140
    },
    {
      "epoch": 0.31774709844661336,
      "grad_norm": 4.789248466491699,
      "learning_rate": 9.426210604692845e-06,
      "loss": 0.0521,
      "step": 194160
    },
    {
      "epoch": 0.31777982888526674,
      "grad_norm": 2.3903679847717285,
      "learning_rate": 9.426144712479327e-06,
      "loss": 0.0544,
      "step": 194180
    },
    {
      "epoch": 0.31781255932392005,
      "grad_norm": 1.6606829166412354,
      "learning_rate": 9.426078820265811e-06,
      "loss": 0.0348,
      "step": 194200
    },
    {
      "epoch": 0.3178452897625734,
      "grad_norm": 0.8005521297454834,
      "learning_rate": 9.426012928052293e-06,
      "loss": 0.0496,
      "step": 194220
    },
    {
      "epoch": 0.31787802020122674,
      "grad_norm": 1.7087154388427734,
      "learning_rate": 9.425947035838776e-06,
      "loss": 0.0413,
      "step": 194240
    },
    {
      "epoch": 0.31791075063988006,
      "grad_norm": 1.7923778295516968,
      "learning_rate": 9.425881143625258e-06,
      "loss": 0.0666,
      "step": 194260
    },
    {
      "epoch": 0.31794348107853343,
      "grad_norm": 1.1126610040664673,
      "learning_rate": 9.425815251411742e-06,
      "loss": 0.0484,
      "step": 194280
    },
    {
      "epoch": 0.31797621151718675,
      "grad_norm": 0.662514865398407,
      "learning_rate": 9.425749359198224e-06,
      "loss": 0.0608,
      "step": 194300
    },
    {
      "epoch": 0.3180089419558401,
      "grad_norm": 1.3235770463943481,
      "learning_rate": 9.425683466984707e-06,
      "loss": 0.0356,
      "step": 194320
    },
    {
      "epoch": 0.31804167239449344,
      "grad_norm": 1.637617588043213,
      "learning_rate": 9.42561757477119e-06,
      "loss": 0.0556,
      "step": 194340
    },
    {
      "epoch": 0.31807440283314675,
      "grad_norm": 1.4577900171279907,
      "learning_rate": 9.425551682557673e-06,
      "loss": 0.0555,
      "step": 194360
    },
    {
      "epoch": 0.3181071332718001,
      "grad_norm": 1.0906752347946167,
      "learning_rate": 9.425485790344155e-06,
      "loss": 0.0553,
      "step": 194380
    },
    {
      "epoch": 0.31813986371045344,
      "grad_norm": 1.929850697517395,
      "learning_rate": 9.425419898130638e-06,
      "loss": 0.057,
      "step": 194400
    },
    {
      "epoch": 0.3181725941491068,
      "grad_norm": 1.0334981679916382,
      "learning_rate": 9.425354005917122e-06,
      "loss": 0.0486,
      "step": 194420
    },
    {
      "epoch": 0.31820532458776013,
      "grad_norm": 3.0849597454071045,
      "learning_rate": 9.425288113703604e-06,
      "loss": 0.0406,
      "step": 194440
    },
    {
      "epoch": 0.31823805502641345,
      "grad_norm": 3.604340076446533,
      "learning_rate": 9.425222221490087e-06,
      "loss": 0.046,
      "step": 194460
    },
    {
      "epoch": 0.3182707854650668,
      "grad_norm": 1.0467396974563599,
      "learning_rate": 9.425156329276571e-06,
      "loss": 0.0566,
      "step": 194480
    },
    {
      "epoch": 0.31830351590372014,
      "grad_norm": 1.4263737201690674,
      "learning_rate": 9.425090437063053e-06,
      "loss": 0.0582,
      "step": 194500
    },
    {
      "epoch": 0.3183362463423735,
      "grad_norm": 4.825499057769775,
      "learning_rate": 9.425024544849536e-06,
      "loss": 0.0397,
      "step": 194520
    },
    {
      "epoch": 0.3183689767810268,
      "grad_norm": 1.1210386753082275,
      "learning_rate": 9.42495865263602e-06,
      "loss": 0.0563,
      "step": 194540
    },
    {
      "epoch": 0.31840170721968014,
      "grad_norm": 3.293076992034912,
      "learning_rate": 9.424892760422502e-06,
      "loss": 0.0443,
      "step": 194560
    },
    {
      "epoch": 0.3184344376583335,
      "grad_norm": 0.9480650424957275,
      "learning_rate": 9.424826868208985e-06,
      "loss": 0.0413,
      "step": 194580
    },
    {
      "epoch": 0.31846716809698683,
      "grad_norm": 5.5273661613464355,
      "learning_rate": 9.424760975995467e-06,
      "loss": 0.0648,
      "step": 194600
    },
    {
      "epoch": 0.3184998985356402,
      "grad_norm": 1.1365689039230347,
      "learning_rate": 9.424695083781951e-06,
      "loss": 0.0597,
      "step": 194620
    },
    {
      "epoch": 0.3185326289742935,
      "grad_norm": 2.5576837062835693,
      "learning_rate": 9.424629191568433e-06,
      "loss": 0.0556,
      "step": 194640
    },
    {
      "epoch": 0.31856535941294684,
      "grad_norm": 3.0686614513397217,
      "learning_rate": 9.424563299354916e-06,
      "loss": 0.0404,
      "step": 194660
    },
    {
      "epoch": 0.3185980898516002,
      "grad_norm": 2.4694807529449463,
      "learning_rate": 9.424497407141398e-06,
      "loss": 0.0472,
      "step": 194680
    },
    {
      "epoch": 0.3186308202902535,
      "grad_norm": 0.47858312726020813,
      "learning_rate": 9.424431514927882e-06,
      "loss": 0.0502,
      "step": 194700
    },
    {
      "epoch": 0.31866355072890684,
      "grad_norm": 4.9159135818481445,
      "learning_rate": 9.424365622714364e-06,
      "loss": 0.0508,
      "step": 194720
    },
    {
      "epoch": 0.3186962811675602,
      "grad_norm": 1.1040115356445312,
      "learning_rate": 9.424299730500847e-06,
      "loss": 0.0367,
      "step": 194740
    },
    {
      "epoch": 0.31872901160621353,
      "grad_norm": 3.765554189682007,
      "learning_rate": 9.42423383828733e-06,
      "loss": 0.0449,
      "step": 194760
    },
    {
      "epoch": 0.3187617420448669,
      "grad_norm": 3.7220301628112793,
      "learning_rate": 9.424167946073813e-06,
      "loss": 0.0476,
      "step": 194780
    },
    {
      "epoch": 0.3187944724835202,
      "grad_norm": 0.9769198298454285,
      "learning_rate": 9.424102053860296e-06,
      "loss": 0.0462,
      "step": 194800
    },
    {
      "epoch": 0.31882720292217354,
      "grad_norm": 0.8039570450782776,
      "learning_rate": 9.424036161646778e-06,
      "loss": 0.0586,
      "step": 194820
    },
    {
      "epoch": 0.3188599333608269,
      "grad_norm": 2.3797855377197266,
      "learning_rate": 9.423970269433262e-06,
      "loss": 0.058,
      "step": 194840
    },
    {
      "epoch": 0.3188926637994802,
      "grad_norm": 2.2337372303009033,
      "learning_rate": 9.423904377219746e-06,
      "loss": 0.0462,
      "step": 194860
    },
    {
      "epoch": 0.3189253942381336,
      "grad_norm": 2.2765936851501465,
      "learning_rate": 9.423838485006227e-06,
      "loss": 0.0425,
      "step": 194880
    },
    {
      "epoch": 0.3189581246767869,
      "grad_norm": 1.035493016242981,
      "learning_rate": 9.423772592792711e-06,
      "loss": 0.0572,
      "step": 194900
    },
    {
      "epoch": 0.31899085511544023,
      "grad_norm": 2.4841580390930176,
      "learning_rate": 9.423706700579195e-06,
      "loss": 0.0494,
      "step": 194920
    },
    {
      "epoch": 0.3190235855540936,
      "grad_norm": 2.248692512512207,
      "learning_rate": 9.423640808365676e-06,
      "loss": 0.0411,
      "step": 194940
    },
    {
      "epoch": 0.3190563159927469,
      "grad_norm": 1.1888965368270874,
      "learning_rate": 9.42357491615216e-06,
      "loss": 0.0322,
      "step": 194960
    },
    {
      "epoch": 0.3190890464314003,
      "grad_norm": 1.5009900331497192,
      "learning_rate": 9.423509023938642e-06,
      "loss": 0.038,
      "step": 194980
    },
    {
      "epoch": 0.3191217768700536,
      "grad_norm": 0.9864048361778259,
      "learning_rate": 9.423443131725126e-06,
      "loss": 0.0459,
      "step": 195000
    },
    {
      "epoch": 0.3191545073087069,
      "grad_norm": 7.95999002456665,
      "learning_rate": 9.423377239511607e-06,
      "loss": 0.0694,
      "step": 195020
    },
    {
      "epoch": 0.3191872377473603,
      "grad_norm": 1.897676706314087,
      "learning_rate": 9.423311347298091e-06,
      "loss": 0.0524,
      "step": 195040
    },
    {
      "epoch": 0.3192199681860136,
      "grad_norm": 0.9629898071289062,
      "learning_rate": 9.423245455084573e-06,
      "loss": 0.0564,
      "step": 195060
    },
    {
      "epoch": 0.319252698624667,
      "grad_norm": 1.1055530309677124,
      "learning_rate": 9.423179562871057e-06,
      "loss": 0.0451,
      "step": 195080
    },
    {
      "epoch": 0.3192854290633203,
      "grad_norm": 1.1093659400939941,
      "learning_rate": 9.423113670657538e-06,
      "loss": 0.0436,
      "step": 195100
    },
    {
      "epoch": 0.3193181595019736,
      "grad_norm": 1.5340946912765503,
      "learning_rate": 9.423047778444022e-06,
      "loss": 0.0766,
      "step": 195120
    },
    {
      "epoch": 0.319350889940627,
      "grad_norm": 4.267536640167236,
      "learning_rate": 9.422981886230504e-06,
      "loss": 0.0625,
      "step": 195140
    },
    {
      "epoch": 0.3193836203792803,
      "grad_norm": 1.9362727403640747,
      "learning_rate": 9.422915994016987e-06,
      "loss": 0.057,
      "step": 195160
    },
    {
      "epoch": 0.3194163508179337,
      "grad_norm": 1.2774816751480103,
      "learning_rate": 9.42285010180347e-06,
      "loss": 0.0669,
      "step": 195180
    },
    {
      "epoch": 0.319449081256587,
      "grad_norm": 1.4412513971328735,
      "learning_rate": 9.422784209589953e-06,
      "loss": 0.0544,
      "step": 195200
    },
    {
      "epoch": 0.3194818116952403,
      "grad_norm": 1.724258303642273,
      "learning_rate": 9.422718317376437e-06,
      "loss": 0.0476,
      "step": 195220
    },
    {
      "epoch": 0.3195145421338937,
      "grad_norm": 2.3102285861968994,
      "learning_rate": 9.422652425162918e-06,
      "loss": 0.0441,
      "step": 195240
    },
    {
      "epoch": 0.319547272572547,
      "grad_norm": 2.5945756435394287,
      "learning_rate": 9.422586532949402e-06,
      "loss": 0.055,
      "step": 195260
    },
    {
      "epoch": 0.3195800030112004,
      "grad_norm": 1.6673144102096558,
      "learning_rate": 9.422520640735886e-06,
      "loss": 0.0598,
      "step": 195280
    },
    {
      "epoch": 0.3196127334498537,
      "grad_norm": 2.984025239944458,
      "learning_rate": 9.422454748522368e-06,
      "loss": 0.0494,
      "step": 195300
    },
    {
      "epoch": 0.319645463888507,
      "grad_norm": 2.6782562732696533,
      "learning_rate": 9.422388856308851e-06,
      "loss": 0.0614,
      "step": 195320
    },
    {
      "epoch": 0.3196781943271604,
      "grad_norm": 1.4782129526138306,
      "learning_rate": 9.422322964095335e-06,
      "loss": 0.0465,
      "step": 195340
    },
    {
      "epoch": 0.3197109247658137,
      "grad_norm": 2.1367883682250977,
      "learning_rate": 9.422257071881817e-06,
      "loss": 0.0556,
      "step": 195360
    },
    {
      "epoch": 0.31974365520446707,
      "grad_norm": 1.3468139171600342,
      "learning_rate": 9.4221911796683e-06,
      "loss": 0.0446,
      "step": 195380
    },
    {
      "epoch": 0.3197763856431204,
      "grad_norm": 1.5174075365066528,
      "learning_rate": 9.422125287454782e-06,
      "loss": 0.0385,
      "step": 195400
    },
    {
      "epoch": 0.3198091160817737,
      "grad_norm": 1.2010406255722046,
      "learning_rate": 9.422059395241266e-06,
      "loss": 0.0402,
      "step": 195420
    },
    {
      "epoch": 0.3198418465204271,
      "grad_norm": 1.3749873638153076,
      "learning_rate": 9.421993503027748e-06,
      "loss": 0.0441,
      "step": 195440
    },
    {
      "epoch": 0.3198745769590804,
      "grad_norm": 0.7676972150802612,
      "learning_rate": 9.421927610814231e-06,
      "loss": 0.0559,
      "step": 195460
    },
    {
      "epoch": 0.31990730739773376,
      "grad_norm": 0.4335395097732544,
      "learning_rate": 9.421861718600713e-06,
      "loss": 0.0442,
      "step": 195480
    },
    {
      "epoch": 0.3199400378363871,
      "grad_norm": 0.8770121932029724,
      "learning_rate": 9.421795826387197e-06,
      "loss": 0.0355,
      "step": 195500
    },
    {
      "epoch": 0.3199727682750404,
      "grad_norm": 1.2716354131698608,
      "learning_rate": 9.421729934173678e-06,
      "loss": 0.041,
      "step": 195520
    },
    {
      "epoch": 0.32000549871369377,
      "grad_norm": 1.3253333568572998,
      "learning_rate": 9.421664041960162e-06,
      "loss": 0.0594,
      "step": 195540
    },
    {
      "epoch": 0.3200382291523471,
      "grad_norm": 1.6097822189331055,
      "learning_rate": 9.421598149746644e-06,
      "loss": 0.0625,
      "step": 195560
    },
    {
      "epoch": 0.32007095959100046,
      "grad_norm": 2.957386016845703,
      "learning_rate": 9.421532257533128e-06,
      "loss": 0.0517,
      "step": 195580
    },
    {
      "epoch": 0.3201036900296538,
      "grad_norm": 4.188527584075928,
      "learning_rate": 9.421466365319611e-06,
      "loss": 0.0432,
      "step": 195600
    },
    {
      "epoch": 0.3201364204683071,
      "grad_norm": 2.3326964378356934,
      "learning_rate": 9.421400473106093e-06,
      "loss": 0.0486,
      "step": 195620
    },
    {
      "epoch": 0.32016915090696046,
      "grad_norm": 1.6278756856918335,
      "learning_rate": 9.421334580892577e-06,
      "loss": 0.0501,
      "step": 195640
    },
    {
      "epoch": 0.3202018813456138,
      "grad_norm": 1.3064682483673096,
      "learning_rate": 9.42126868867906e-06,
      "loss": 0.0661,
      "step": 195660
    },
    {
      "epoch": 0.32023461178426715,
      "grad_norm": 2.2358193397521973,
      "learning_rate": 9.421202796465542e-06,
      "loss": 0.0466,
      "step": 195680
    },
    {
      "epoch": 0.32026734222292047,
      "grad_norm": 1.7978088855743408,
      "learning_rate": 9.421136904252026e-06,
      "loss": 0.0431,
      "step": 195700
    },
    {
      "epoch": 0.3203000726615738,
      "grad_norm": 0.8800023198127747,
      "learning_rate": 9.42107101203851e-06,
      "loss": 0.0492,
      "step": 195720
    },
    {
      "epoch": 0.32033280310022716,
      "grad_norm": 2.856353759765625,
      "learning_rate": 9.421005119824991e-06,
      "loss": 0.0385,
      "step": 195740
    },
    {
      "epoch": 0.3203655335388805,
      "grad_norm": 26.041643142700195,
      "learning_rate": 9.420939227611475e-06,
      "loss": 0.0654,
      "step": 195760
    },
    {
      "epoch": 0.32039826397753385,
      "grad_norm": 0.8182676434516907,
      "learning_rate": 9.420873335397957e-06,
      "loss": 0.0391,
      "step": 195780
    },
    {
      "epoch": 0.32043099441618716,
      "grad_norm": 2.696096897125244,
      "learning_rate": 9.42080744318444e-06,
      "loss": 0.0659,
      "step": 195800
    },
    {
      "epoch": 0.3204637248548405,
      "grad_norm": 2.787252187728882,
      "learning_rate": 9.420741550970922e-06,
      "loss": 0.0355,
      "step": 195820
    },
    {
      "epoch": 0.32049645529349385,
      "grad_norm": 2.807049512863159,
      "learning_rate": 9.420675658757406e-06,
      "loss": 0.0406,
      "step": 195840
    },
    {
      "epoch": 0.32052918573214717,
      "grad_norm": 2.9314301013946533,
      "learning_rate": 9.420609766543888e-06,
      "loss": 0.0629,
      "step": 195860
    },
    {
      "epoch": 0.32056191617080054,
      "grad_norm": 1.9661531448364258,
      "learning_rate": 9.420543874330371e-06,
      "loss": 0.053,
      "step": 195880
    },
    {
      "epoch": 0.32059464660945386,
      "grad_norm": 3.7645697593688965,
      "learning_rate": 9.420477982116853e-06,
      "loss": 0.0715,
      "step": 195900
    },
    {
      "epoch": 0.3206273770481072,
      "grad_norm": 2.579172372817993,
      "learning_rate": 9.420412089903337e-06,
      "loss": 0.0371,
      "step": 195920
    },
    {
      "epoch": 0.32066010748676055,
      "grad_norm": 3.2489144802093506,
      "learning_rate": 9.42034619768982e-06,
      "loss": 0.0569,
      "step": 195940
    },
    {
      "epoch": 0.32069283792541387,
      "grad_norm": 1.1264747381210327,
      "learning_rate": 9.420280305476302e-06,
      "loss": 0.0408,
      "step": 195960
    },
    {
      "epoch": 0.32072556836406724,
      "grad_norm": 0.9492139220237732,
      "learning_rate": 9.420214413262786e-06,
      "loss": 0.0454,
      "step": 195980
    },
    {
      "epoch": 0.32075829880272055,
      "grad_norm": 4.400721549987793,
      "learning_rate": 9.420148521049268e-06,
      "loss": 0.0539,
      "step": 196000
    },
    {
      "epoch": 0.32079102924137387,
      "grad_norm": 2.5355751514434814,
      "learning_rate": 9.420082628835751e-06,
      "loss": 0.0545,
      "step": 196020
    },
    {
      "epoch": 0.32082375968002724,
      "grad_norm": 1.3803949356079102,
      "learning_rate": 9.420016736622233e-06,
      "loss": 0.0373,
      "step": 196040
    },
    {
      "epoch": 0.32085649011868056,
      "grad_norm": 1.5401793718338013,
      "learning_rate": 9.419950844408717e-06,
      "loss": 0.0436,
      "step": 196060
    },
    {
      "epoch": 0.32088922055733393,
      "grad_norm": 3.316493034362793,
      "learning_rate": 9.4198849521952e-06,
      "loss": 0.052,
      "step": 196080
    },
    {
      "epoch": 0.32092195099598725,
      "grad_norm": 0.6889505386352539,
      "learning_rate": 9.419819059981682e-06,
      "loss": 0.0687,
      "step": 196100
    },
    {
      "epoch": 0.32095468143464057,
      "grad_norm": 1.7433372735977173,
      "learning_rate": 9.419753167768166e-06,
      "loss": 0.0522,
      "step": 196120
    },
    {
      "epoch": 0.32098741187329394,
      "grad_norm": 0.9405544996261597,
      "learning_rate": 9.41968727555465e-06,
      "loss": 0.0614,
      "step": 196140
    },
    {
      "epoch": 0.32102014231194725,
      "grad_norm": 3.532214879989624,
      "learning_rate": 9.419621383341131e-06,
      "loss": 0.0401,
      "step": 196160
    },
    {
      "epoch": 0.3210528727506006,
      "grad_norm": 0.752164363861084,
      "learning_rate": 9.419555491127615e-06,
      "loss": 0.0608,
      "step": 196180
    },
    {
      "epoch": 0.32108560318925394,
      "grad_norm": 1.6780873537063599,
      "learning_rate": 9.419489598914097e-06,
      "loss": 0.0548,
      "step": 196200
    },
    {
      "epoch": 0.32111833362790726,
      "grad_norm": 1.344942569732666,
      "learning_rate": 9.41942370670058e-06,
      "loss": 0.0578,
      "step": 196220
    },
    {
      "epoch": 0.32115106406656063,
      "grad_norm": 1.1554756164550781,
      "learning_rate": 9.419357814487062e-06,
      "loss": 0.0434,
      "step": 196240
    },
    {
      "epoch": 0.32118379450521395,
      "grad_norm": 3.9192771911621094,
      "learning_rate": 9.419291922273546e-06,
      "loss": 0.0406,
      "step": 196260
    },
    {
      "epoch": 0.3212165249438673,
      "grad_norm": 1.4693275690078735,
      "learning_rate": 9.41922603006003e-06,
      "loss": 0.0368,
      "step": 196280
    },
    {
      "epoch": 0.32124925538252064,
      "grad_norm": 1.6181211471557617,
      "learning_rate": 9.419160137846511e-06,
      "loss": 0.0567,
      "step": 196300
    },
    {
      "epoch": 0.32128198582117395,
      "grad_norm": 2.199345350265503,
      "learning_rate": 9.419094245632995e-06,
      "loss": 0.0738,
      "step": 196320
    },
    {
      "epoch": 0.3213147162598273,
      "grad_norm": 2.9056553840637207,
      "learning_rate": 9.419028353419477e-06,
      "loss": 0.0545,
      "step": 196340
    },
    {
      "epoch": 0.32134744669848064,
      "grad_norm": 2.3058435916900635,
      "learning_rate": 9.41896246120596e-06,
      "loss": 0.052,
      "step": 196360
    },
    {
      "epoch": 0.321380177137134,
      "grad_norm": 3.6598496437072754,
      "learning_rate": 9.418896568992442e-06,
      "loss": 0.0489,
      "step": 196380
    },
    {
      "epoch": 0.32141290757578733,
      "grad_norm": 2.1491000652313232,
      "learning_rate": 9.418830676778926e-06,
      "loss": 0.0499,
      "step": 196400
    },
    {
      "epoch": 0.32144563801444065,
      "grad_norm": 2.8822085857391357,
      "learning_rate": 9.418764784565408e-06,
      "loss": 0.0454,
      "step": 196420
    },
    {
      "epoch": 0.321478368453094,
      "grad_norm": 0.931187093257904,
      "learning_rate": 9.418698892351891e-06,
      "loss": 0.0465,
      "step": 196440
    },
    {
      "epoch": 0.32151109889174734,
      "grad_norm": 2.9578733444213867,
      "learning_rate": 9.418633000138375e-06,
      "loss": 0.0512,
      "step": 196460
    },
    {
      "epoch": 0.3215438293304007,
      "grad_norm": 2.493581533432007,
      "learning_rate": 9.418567107924857e-06,
      "loss": 0.0504,
      "step": 196480
    },
    {
      "epoch": 0.321576559769054,
      "grad_norm": 3.5356831550598145,
      "learning_rate": 9.41850121571134e-06,
      "loss": 0.0569,
      "step": 196500
    },
    {
      "epoch": 0.32160929020770734,
      "grad_norm": 1.0949736833572388,
      "learning_rate": 9.418435323497824e-06,
      "loss": 0.0541,
      "step": 196520
    },
    {
      "epoch": 0.3216420206463607,
      "grad_norm": 2.089240550994873,
      "learning_rate": 9.418369431284306e-06,
      "loss": 0.0495,
      "step": 196540
    },
    {
      "epoch": 0.32167475108501403,
      "grad_norm": 3.816506862640381,
      "learning_rate": 9.41830353907079e-06,
      "loss": 0.0545,
      "step": 196560
    },
    {
      "epoch": 0.3217074815236674,
      "grad_norm": 0.7981206774711609,
      "learning_rate": 9.418237646857271e-06,
      "loss": 0.0532,
      "step": 196580
    },
    {
      "epoch": 0.3217402119623207,
      "grad_norm": 0.5971953868865967,
      "learning_rate": 9.418171754643755e-06,
      "loss": 0.0604,
      "step": 196600
    },
    {
      "epoch": 0.32177294240097404,
      "grad_norm": 2.3899905681610107,
      "learning_rate": 9.418105862430238e-06,
      "loss": 0.0457,
      "step": 196620
    },
    {
      "epoch": 0.3218056728396274,
      "grad_norm": 7.973410129547119,
      "learning_rate": 9.41803997021672e-06,
      "loss": 0.072,
      "step": 196640
    },
    {
      "epoch": 0.3218384032782807,
      "grad_norm": 11.012188911437988,
      "learning_rate": 9.417974078003204e-06,
      "loss": 0.0534,
      "step": 196660
    },
    {
      "epoch": 0.3218711337169341,
      "grad_norm": 2.883928060531616,
      "learning_rate": 9.417908185789686e-06,
      "loss": 0.0454,
      "step": 196680
    },
    {
      "epoch": 0.3219038641555874,
      "grad_norm": 1.3140090703964233,
      "learning_rate": 9.41784229357617e-06,
      "loss": 0.0595,
      "step": 196700
    },
    {
      "epoch": 0.32193659459424073,
      "grad_norm": 1.118187665939331,
      "learning_rate": 9.417776401362651e-06,
      "loss": 0.0475,
      "step": 196720
    },
    {
      "epoch": 0.3219693250328941,
      "grad_norm": 1.373908519744873,
      "learning_rate": 9.417710509149135e-06,
      "loss": 0.052,
      "step": 196740
    },
    {
      "epoch": 0.3220020554715474,
      "grad_norm": 4.426510334014893,
      "learning_rate": 9.417644616935617e-06,
      "loss": 0.06,
      "step": 196760
    },
    {
      "epoch": 0.3220347859102008,
      "grad_norm": 1.518594741821289,
      "learning_rate": 9.4175787247221e-06,
      "loss": 0.0441,
      "step": 196780
    },
    {
      "epoch": 0.3220675163488541,
      "grad_norm": 4.023934841156006,
      "learning_rate": 9.417512832508582e-06,
      "loss": 0.06,
      "step": 196800
    },
    {
      "epoch": 0.3221002467875074,
      "grad_norm": 1.7867461442947388,
      "learning_rate": 9.417446940295066e-06,
      "loss": 0.0487,
      "step": 196820
    },
    {
      "epoch": 0.3221329772261608,
      "grad_norm": 3.467231035232544,
      "learning_rate": 9.417381048081548e-06,
      "loss": 0.0473,
      "step": 196840
    },
    {
      "epoch": 0.3221657076648141,
      "grad_norm": 2.9119367599487305,
      "learning_rate": 9.417315155868031e-06,
      "loss": 0.0558,
      "step": 196860
    },
    {
      "epoch": 0.3221984381034675,
      "grad_norm": 4.534298419952393,
      "learning_rate": 9.417249263654515e-06,
      "loss": 0.041,
      "step": 196880
    },
    {
      "epoch": 0.3222311685421208,
      "grad_norm": 1.4739826917648315,
      "learning_rate": 9.417183371440999e-06,
      "loss": 0.0586,
      "step": 196900
    },
    {
      "epoch": 0.3222638989807741,
      "grad_norm": 1.2065285444259644,
      "learning_rate": 9.41711747922748e-06,
      "loss": 0.0386,
      "step": 196920
    },
    {
      "epoch": 0.3222966294194275,
      "grad_norm": 2.9948878288269043,
      "learning_rate": 9.417051587013964e-06,
      "loss": 0.0444,
      "step": 196940
    },
    {
      "epoch": 0.3223293598580808,
      "grad_norm": 2.9911258220672607,
      "learning_rate": 9.416985694800446e-06,
      "loss": 0.0576,
      "step": 196960
    },
    {
      "epoch": 0.3223620902967342,
      "grad_norm": 2.038224220275879,
      "learning_rate": 9.41691980258693e-06,
      "loss": 0.0571,
      "step": 196980
    },
    {
      "epoch": 0.3223948207353875,
      "grad_norm": 0.7064347267150879,
      "learning_rate": 9.416853910373413e-06,
      "loss": 0.0538,
      "step": 197000
    },
    {
      "epoch": 0.3224275511740408,
      "grad_norm": 1.3890396356582642,
      "learning_rate": 9.416788018159895e-06,
      "loss": 0.0553,
      "step": 197020
    },
    {
      "epoch": 0.3224602816126942,
      "grad_norm": 2.5315592288970947,
      "learning_rate": 9.416722125946379e-06,
      "loss": 0.0545,
      "step": 197040
    },
    {
      "epoch": 0.3224930120513475,
      "grad_norm": 0.5573643445968628,
      "learning_rate": 9.41665623373286e-06,
      "loss": 0.0391,
      "step": 197060
    },
    {
      "epoch": 0.3225257424900009,
      "grad_norm": 1.4358707666397095,
      "learning_rate": 9.416590341519344e-06,
      "loss": 0.0514,
      "step": 197080
    },
    {
      "epoch": 0.3225584729286542,
      "grad_norm": 1.2181458473205566,
      "learning_rate": 9.416524449305826e-06,
      "loss": 0.0624,
      "step": 197100
    },
    {
      "epoch": 0.3225912033673075,
      "grad_norm": 0.7122614979743958,
      "learning_rate": 9.41645855709231e-06,
      "loss": 0.0528,
      "step": 197120
    },
    {
      "epoch": 0.3226239338059609,
      "grad_norm": 1.1678131818771362,
      "learning_rate": 9.416392664878791e-06,
      "loss": 0.0367,
      "step": 197140
    },
    {
      "epoch": 0.3226566642446142,
      "grad_norm": 2.8452560901641846,
      "learning_rate": 9.416326772665275e-06,
      "loss": 0.0555,
      "step": 197160
    },
    {
      "epoch": 0.32268939468326757,
      "grad_norm": 0.7898856401443481,
      "learning_rate": 9.416260880451757e-06,
      "loss": 0.0392,
      "step": 197180
    },
    {
      "epoch": 0.3227221251219209,
      "grad_norm": 2.4942667484283447,
      "learning_rate": 9.41619498823824e-06,
      "loss": 0.05,
      "step": 197200
    },
    {
      "epoch": 0.3227548555605742,
      "grad_norm": 1.123885989189148,
      "learning_rate": 9.416129096024722e-06,
      "loss": 0.0502,
      "step": 197220
    },
    {
      "epoch": 0.3227875859992276,
      "grad_norm": 1.3377389907836914,
      "learning_rate": 9.416063203811206e-06,
      "loss": 0.0331,
      "step": 197240
    },
    {
      "epoch": 0.3228203164378809,
      "grad_norm": 1.3917373418807983,
      "learning_rate": 9.41599731159769e-06,
      "loss": 0.0544,
      "step": 197260
    },
    {
      "epoch": 0.32285304687653427,
      "grad_norm": 1.6043215990066528,
      "learning_rate": 9.415931419384171e-06,
      "loss": 0.0475,
      "step": 197280
    },
    {
      "epoch": 0.3228857773151876,
      "grad_norm": 1.5313533544540405,
      "learning_rate": 9.415865527170655e-06,
      "loss": 0.0579,
      "step": 197300
    },
    {
      "epoch": 0.3229185077538409,
      "grad_norm": 1.22202467918396,
      "learning_rate": 9.415799634957139e-06,
      "loss": 0.0587,
      "step": 197320
    },
    {
      "epoch": 0.32295123819249427,
      "grad_norm": 0.4011479616165161,
      "learning_rate": 9.41573374274362e-06,
      "loss": 0.0474,
      "step": 197340
    },
    {
      "epoch": 0.3229839686311476,
      "grad_norm": 1.413368821144104,
      "learning_rate": 9.415667850530104e-06,
      "loss": 0.061,
      "step": 197360
    },
    {
      "epoch": 0.32301669906980096,
      "grad_norm": 1.502859354019165,
      "learning_rate": 9.415601958316588e-06,
      "loss": 0.043,
      "step": 197380
    },
    {
      "epoch": 0.3230494295084543,
      "grad_norm": 0.627629280090332,
      "learning_rate": 9.41553606610307e-06,
      "loss": 0.0438,
      "step": 197400
    },
    {
      "epoch": 0.3230821599471076,
      "grad_norm": 0.22526372969150543,
      "learning_rate": 9.415470173889553e-06,
      "loss": 0.0408,
      "step": 197420
    },
    {
      "epoch": 0.32311489038576097,
      "grad_norm": 2.707237958908081,
      "learning_rate": 9.415404281676035e-06,
      "loss": 0.0515,
      "step": 197440
    },
    {
      "epoch": 0.3231476208244143,
      "grad_norm": 3.67151141166687,
      "learning_rate": 9.415338389462519e-06,
      "loss": 0.0348,
      "step": 197460
    },
    {
      "epoch": 0.3231803512630676,
      "grad_norm": 3.4782371520996094,
      "learning_rate": 9.415272497249e-06,
      "loss": 0.0544,
      "step": 197480
    },
    {
      "epoch": 0.323213081701721,
      "grad_norm": 4.041981220245361,
      "learning_rate": 9.415206605035484e-06,
      "loss": 0.0495,
      "step": 197500
    },
    {
      "epoch": 0.3232458121403743,
      "grad_norm": 0.8225153088569641,
      "learning_rate": 9.415140712821966e-06,
      "loss": 0.0397,
      "step": 197520
    },
    {
      "epoch": 0.32327854257902766,
      "grad_norm": 1.3028160333633423,
      "learning_rate": 9.41507482060845e-06,
      "loss": 0.0463,
      "step": 197540
    },
    {
      "epoch": 0.323311273017681,
      "grad_norm": 1.8230360746383667,
      "learning_rate": 9.415008928394931e-06,
      "loss": 0.0602,
      "step": 197560
    },
    {
      "epoch": 0.3233440034563343,
      "grad_norm": 0.6539272665977478,
      "learning_rate": 9.414943036181415e-06,
      "loss": 0.0429,
      "step": 197580
    },
    {
      "epoch": 0.32337673389498767,
      "grad_norm": 3.755653142929077,
      "learning_rate": 9.414877143967897e-06,
      "loss": 0.0501,
      "step": 197600
    },
    {
      "epoch": 0.323409464333641,
      "grad_norm": 4.163490295410156,
      "learning_rate": 9.41481125175438e-06,
      "loss": 0.05,
      "step": 197620
    },
    {
      "epoch": 0.32344219477229436,
      "grad_norm": 0.76369708776474,
      "learning_rate": 9.414745359540864e-06,
      "loss": 0.0515,
      "step": 197640
    },
    {
      "epoch": 0.3234749252109477,
      "grad_norm": 2.539315938949585,
      "learning_rate": 9.414679467327346e-06,
      "loss": 0.0474,
      "step": 197660
    },
    {
      "epoch": 0.323507655649601,
      "grad_norm": 0.4633175730705261,
      "learning_rate": 9.41461357511383e-06,
      "loss": 0.0396,
      "step": 197680
    },
    {
      "epoch": 0.32354038608825436,
      "grad_norm": 1.8258179426193237,
      "learning_rate": 9.414547682900313e-06,
      "loss": 0.048,
      "step": 197700
    },
    {
      "epoch": 0.3235731165269077,
      "grad_norm": 4.2443623542785645,
      "learning_rate": 9.414481790686795e-06,
      "loss": 0.0466,
      "step": 197720
    },
    {
      "epoch": 0.32360584696556105,
      "grad_norm": 0.9107050895690918,
      "learning_rate": 9.414415898473279e-06,
      "loss": 0.0548,
      "step": 197740
    },
    {
      "epoch": 0.32363857740421437,
      "grad_norm": 1.6197360754013062,
      "learning_rate": 9.414350006259762e-06,
      "loss": 0.048,
      "step": 197760
    },
    {
      "epoch": 0.3236713078428677,
      "grad_norm": 0.5642671585083008,
      "learning_rate": 9.414284114046244e-06,
      "loss": 0.0416,
      "step": 197780
    },
    {
      "epoch": 0.32370403828152106,
      "grad_norm": 1.0847219228744507,
      "learning_rate": 9.414218221832728e-06,
      "loss": 0.0505,
      "step": 197800
    },
    {
      "epoch": 0.3237367687201744,
      "grad_norm": 3.1591503620147705,
      "learning_rate": 9.41415232961921e-06,
      "loss": 0.0569,
      "step": 197820
    },
    {
      "epoch": 0.32376949915882774,
      "grad_norm": 1.1162772178649902,
      "learning_rate": 9.414086437405693e-06,
      "loss": 0.0613,
      "step": 197840
    },
    {
      "epoch": 0.32380222959748106,
      "grad_norm": 1.3550535440444946,
      "learning_rate": 9.414020545192175e-06,
      "loss": 0.0454,
      "step": 197860
    },
    {
      "epoch": 0.3238349600361344,
      "grad_norm": 5.68312406539917,
      "learning_rate": 9.413954652978659e-06,
      "loss": 0.0399,
      "step": 197880
    },
    {
      "epoch": 0.32386769047478775,
      "grad_norm": 1.4028412103652954,
      "learning_rate": 9.41388876076514e-06,
      "loss": 0.0526,
      "step": 197900
    },
    {
      "epoch": 0.32390042091344107,
      "grad_norm": 1.7239243984222412,
      "learning_rate": 9.413822868551624e-06,
      "loss": 0.0739,
      "step": 197920
    },
    {
      "epoch": 0.32393315135209444,
      "grad_norm": 0.8387121558189392,
      "learning_rate": 9.413756976338106e-06,
      "loss": 0.0491,
      "step": 197940
    },
    {
      "epoch": 0.32396588179074776,
      "grad_norm": 1.5527957677841187,
      "learning_rate": 9.41369108412459e-06,
      "loss": 0.041,
      "step": 197960
    },
    {
      "epoch": 0.3239986122294011,
      "grad_norm": 3.3989102840423584,
      "learning_rate": 9.413625191911072e-06,
      "loss": 0.0514,
      "step": 197980
    },
    {
      "epoch": 0.32403134266805445,
      "grad_norm": 2.6632373332977295,
      "learning_rate": 9.413559299697555e-06,
      "loss": 0.0459,
      "step": 198000
    },
    {
      "epoch": 0.32406407310670776,
      "grad_norm": 1.6411478519439697,
      "learning_rate": 9.413493407484037e-06,
      "loss": 0.0426,
      "step": 198020
    },
    {
      "epoch": 0.32409680354536113,
      "grad_norm": 3.125096559524536,
      "learning_rate": 9.41342751527052e-06,
      "loss": 0.059,
      "step": 198040
    },
    {
      "epoch": 0.32412953398401445,
      "grad_norm": 1.0732661485671997,
      "learning_rate": 9.413361623057004e-06,
      "loss": 0.0567,
      "step": 198060
    },
    {
      "epoch": 0.32416226442266777,
      "grad_norm": 2.156898260116577,
      "learning_rate": 9.413295730843486e-06,
      "loss": 0.0468,
      "step": 198080
    },
    {
      "epoch": 0.32419499486132114,
      "grad_norm": 0.8822950124740601,
      "learning_rate": 9.41322983862997e-06,
      "loss": 0.0629,
      "step": 198100
    },
    {
      "epoch": 0.32422772529997446,
      "grad_norm": 1.5662168264389038,
      "learning_rate": 9.413163946416453e-06,
      "loss": 0.0414,
      "step": 198120
    },
    {
      "epoch": 0.32426045573862783,
      "grad_norm": 1.4092832803726196,
      "learning_rate": 9.413098054202935e-06,
      "loss": 0.0601,
      "step": 198140
    },
    {
      "epoch": 0.32429318617728115,
      "grad_norm": 3.8004097938537598,
      "learning_rate": 9.413032161989419e-06,
      "loss": 0.0424,
      "step": 198160
    },
    {
      "epoch": 0.32432591661593446,
      "grad_norm": 1.9946805238723755,
      "learning_rate": 9.412966269775902e-06,
      "loss": 0.0671,
      "step": 198180
    },
    {
      "epoch": 0.32435864705458783,
      "grad_norm": 2.7225160598754883,
      "learning_rate": 9.412900377562384e-06,
      "loss": 0.037,
      "step": 198200
    },
    {
      "epoch": 0.32439137749324115,
      "grad_norm": 0.98499995470047,
      "learning_rate": 9.412834485348868e-06,
      "loss": 0.0476,
      "step": 198220
    },
    {
      "epoch": 0.3244241079318945,
      "grad_norm": 1.7362524271011353,
      "learning_rate": 9.41276859313535e-06,
      "loss": 0.0553,
      "step": 198240
    },
    {
      "epoch": 0.32445683837054784,
      "grad_norm": 3.1227598190307617,
      "learning_rate": 9.412702700921833e-06,
      "loss": 0.0463,
      "step": 198260
    },
    {
      "epoch": 0.32448956880920116,
      "grad_norm": 3.407078266143799,
      "learning_rate": 9.412636808708315e-06,
      "loss": 0.0604,
      "step": 198280
    },
    {
      "epoch": 0.32452229924785453,
      "grad_norm": 0.720069944858551,
      "learning_rate": 9.412570916494799e-06,
      "loss": 0.0507,
      "step": 198300
    },
    {
      "epoch": 0.32455502968650785,
      "grad_norm": 1.5959737300872803,
      "learning_rate": 9.41250502428128e-06,
      "loss": 0.0412,
      "step": 198320
    },
    {
      "epoch": 0.3245877601251612,
      "grad_norm": 0.3588173985481262,
      "learning_rate": 9.412439132067764e-06,
      "loss": 0.0476,
      "step": 198340
    },
    {
      "epoch": 0.32462049056381453,
      "grad_norm": 1.6768378019332886,
      "learning_rate": 9.412373239854246e-06,
      "loss": 0.0443,
      "step": 198360
    },
    {
      "epoch": 0.32465322100246785,
      "grad_norm": 1.2321966886520386,
      "learning_rate": 9.41230734764073e-06,
      "loss": 0.0544,
      "step": 198380
    },
    {
      "epoch": 0.3246859514411212,
      "grad_norm": 3.6218605041503906,
      "learning_rate": 9.412241455427213e-06,
      "loss": 0.0502,
      "step": 198400
    },
    {
      "epoch": 0.32471868187977454,
      "grad_norm": 1.954299807548523,
      "learning_rate": 9.412175563213695e-06,
      "loss": 0.0433,
      "step": 198420
    },
    {
      "epoch": 0.3247514123184279,
      "grad_norm": 10.528045654296875,
      "learning_rate": 9.412109671000179e-06,
      "loss": 0.0654,
      "step": 198440
    },
    {
      "epoch": 0.32478414275708123,
      "grad_norm": 2.1739847660064697,
      "learning_rate": 9.41204377878666e-06,
      "loss": 0.0507,
      "step": 198460
    },
    {
      "epoch": 0.32481687319573455,
      "grad_norm": 0.6556066870689392,
      "learning_rate": 9.411977886573144e-06,
      "loss": 0.052,
      "step": 198480
    },
    {
      "epoch": 0.3248496036343879,
      "grad_norm": 4.964200496673584,
      "learning_rate": 9.411911994359628e-06,
      "loss": 0.0414,
      "step": 198500
    },
    {
      "epoch": 0.32488233407304123,
      "grad_norm": 1.653588056564331,
      "learning_rate": 9.41184610214611e-06,
      "loss": 0.0446,
      "step": 198520
    },
    {
      "epoch": 0.3249150645116946,
      "grad_norm": 1.3119235038757324,
      "learning_rate": 9.411780209932593e-06,
      "loss": 0.0445,
      "step": 198540
    },
    {
      "epoch": 0.3249477949503479,
      "grad_norm": 7.745993137359619,
      "learning_rate": 9.411714317719077e-06,
      "loss": 0.0569,
      "step": 198560
    },
    {
      "epoch": 0.32498052538900124,
      "grad_norm": 1.4497919082641602,
      "learning_rate": 9.411648425505559e-06,
      "loss": 0.0387,
      "step": 198580
    },
    {
      "epoch": 0.3250132558276546,
      "grad_norm": 1.5935325622558594,
      "learning_rate": 9.411582533292042e-06,
      "loss": 0.0505,
      "step": 198600
    },
    {
      "epoch": 0.32504598626630793,
      "grad_norm": 3.579967975616455,
      "learning_rate": 9.411516641078524e-06,
      "loss": 0.0453,
      "step": 198620
    },
    {
      "epoch": 0.3250787167049613,
      "grad_norm": 1.3691903352737427,
      "learning_rate": 9.411450748865008e-06,
      "loss": 0.0417,
      "step": 198640
    },
    {
      "epoch": 0.3251114471436146,
      "grad_norm": 0.6556674838066101,
      "learning_rate": 9.41138485665149e-06,
      "loss": 0.0442,
      "step": 198660
    },
    {
      "epoch": 0.32514417758226793,
      "grad_norm": 2.818596363067627,
      "learning_rate": 9.411318964437973e-06,
      "loss": 0.0522,
      "step": 198680
    },
    {
      "epoch": 0.3251769080209213,
      "grad_norm": 1.8130178451538086,
      "learning_rate": 9.411253072224455e-06,
      "loss": 0.0556,
      "step": 198700
    },
    {
      "epoch": 0.3252096384595746,
      "grad_norm": 3.4771711826324463,
      "learning_rate": 9.411187180010939e-06,
      "loss": 0.0637,
      "step": 198720
    },
    {
      "epoch": 0.325242368898228,
      "grad_norm": 1.443712592124939,
      "learning_rate": 9.411121287797422e-06,
      "loss": 0.0564,
      "step": 198740
    },
    {
      "epoch": 0.3252750993368813,
      "grad_norm": 0.6554251313209534,
      "learning_rate": 9.411055395583904e-06,
      "loss": 0.0469,
      "step": 198760
    },
    {
      "epoch": 0.32530782977553463,
      "grad_norm": 0.965094268321991,
      "learning_rate": 9.410989503370388e-06,
      "loss": 0.053,
      "step": 198780
    },
    {
      "epoch": 0.325340560214188,
      "grad_norm": 4.386620044708252,
      "learning_rate": 9.41092361115687e-06,
      "loss": 0.0456,
      "step": 198800
    },
    {
      "epoch": 0.3253732906528413,
      "grad_norm": 1.8155242204666138,
      "learning_rate": 9.410857718943353e-06,
      "loss": 0.0529,
      "step": 198820
    },
    {
      "epoch": 0.3254060210914947,
      "grad_norm": 2.8639466762542725,
      "learning_rate": 9.410791826729835e-06,
      "loss": 0.0485,
      "step": 198840
    },
    {
      "epoch": 0.325438751530148,
      "grad_norm": 0.8987303376197815,
      "learning_rate": 9.410725934516319e-06,
      "loss": 0.0543,
      "step": 198860
    },
    {
      "epoch": 0.3254714819688013,
      "grad_norm": 1.2821531295776367,
      "learning_rate": 9.4106600423028e-06,
      "loss": 0.043,
      "step": 198880
    },
    {
      "epoch": 0.3255042124074547,
      "grad_norm": 1.298445224761963,
      "learning_rate": 9.410594150089284e-06,
      "loss": 0.0582,
      "step": 198900
    },
    {
      "epoch": 0.325536942846108,
      "grad_norm": 0.8998410105705261,
      "learning_rate": 9.410528257875768e-06,
      "loss": 0.0427,
      "step": 198920
    },
    {
      "epoch": 0.3255696732847614,
      "grad_norm": 2.247666597366333,
      "learning_rate": 9.41046236566225e-06,
      "loss": 0.0568,
      "step": 198940
    },
    {
      "epoch": 0.3256024037234147,
      "grad_norm": 1.089212417602539,
      "learning_rate": 9.410396473448733e-06,
      "loss": 0.0541,
      "step": 198960
    },
    {
      "epoch": 0.325635134162068,
      "grad_norm": 3.7587056159973145,
      "learning_rate": 9.410330581235217e-06,
      "loss": 0.0452,
      "step": 198980
    },
    {
      "epoch": 0.3256678646007214,
      "grad_norm": 2.5733766555786133,
      "learning_rate": 9.410264689021699e-06,
      "loss": 0.0591,
      "step": 199000
    },
    {
      "epoch": 0.3257005950393747,
      "grad_norm": 1.5833803415298462,
      "learning_rate": 9.410198796808183e-06,
      "loss": 0.053,
      "step": 199020
    },
    {
      "epoch": 0.3257333254780281,
      "grad_norm": 1.0520099401474,
      "learning_rate": 9.410132904594664e-06,
      "loss": 0.0302,
      "step": 199040
    },
    {
      "epoch": 0.3257660559166814,
      "grad_norm": 4.245423316955566,
      "learning_rate": 9.410067012381148e-06,
      "loss": 0.0655,
      "step": 199060
    },
    {
      "epoch": 0.3257987863553347,
      "grad_norm": 0.7301044464111328,
      "learning_rate": 9.41000112016763e-06,
      "loss": 0.0389,
      "step": 199080
    },
    {
      "epoch": 0.3258315167939881,
      "grad_norm": 0.25761285424232483,
      "learning_rate": 9.409935227954113e-06,
      "loss": 0.0533,
      "step": 199100
    },
    {
      "epoch": 0.3258642472326414,
      "grad_norm": 2.5822763442993164,
      "learning_rate": 9.409869335740597e-06,
      "loss": 0.0398,
      "step": 199120
    },
    {
      "epoch": 0.3258969776712948,
      "grad_norm": 3.16837477684021,
      "learning_rate": 9.409803443527079e-06,
      "loss": 0.0476,
      "step": 199140
    },
    {
      "epoch": 0.3259297081099481,
      "grad_norm": 3.628917694091797,
      "learning_rate": 9.409737551313563e-06,
      "loss": 0.0574,
      "step": 199160
    },
    {
      "epoch": 0.3259624385486014,
      "grad_norm": 0.3915441334247589,
      "learning_rate": 9.409671659100044e-06,
      "loss": 0.054,
      "step": 199180
    },
    {
      "epoch": 0.3259951689872548,
      "grad_norm": 1.0173548460006714,
      "learning_rate": 9.409605766886528e-06,
      "loss": 0.0487,
      "step": 199200
    },
    {
      "epoch": 0.3260278994259081,
      "grad_norm": 2.366624116897583,
      "learning_rate": 9.40953987467301e-06,
      "loss": 0.0514,
      "step": 199220
    },
    {
      "epoch": 0.32606062986456147,
      "grad_norm": 0.9226993322372437,
      "learning_rate": 9.409473982459493e-06,
      "loss": 0.0668,
      "step": 199240
    },
    {
      "epoch": 0.3260933603032148,
      "grad_norm": 2.0381431579589844,
      "learning_rate": 9.409408090245975e-06,
      "loss": 0.0438,
      "step": 199260
    },
    {
      "epoch": 0.3261260907418681,
      "grad_norm": 1.6083462238311768,
      "learning_rate": 9.409342198032459e-06,
      "loss": 0.0451,
      "step": 199280
    },
    {
      "epoch": 0.3261588211805215,
      "grad_norm": 0.41258570551872253,
      "learning_rate": 9.409276305818943e-06,
      "loss": 0.053,
      "step": 199300
    },
    {
      "epoch": 0.3261915516191748,
      "grad_norm": 1.3640539646148682,
      "learning_rate": 9.409210413605424e-06,
      "loss": 0.0577,
      "step": 199320
    },
    {
      "epoch": 0.32622428205782816,
      "grad_norm": 3.2497947216033936,
      "learning_rate": 9.409144521391908e-06,
      "loss": 0.0536,
      "step": 199340
    },
    {
      "epoch": 0.3262570124964815,
      "grad_norm": 0.9477925896644592,
      "learning_rate": 9.409078629178392e-06,
      "loss": 0.0448,
      "step": 199360
    },
    {
      "epoch": 0.3262897429351348,
      "grad_norm": 1.0855823755264282,
      "learning_rate": 9.409012736964874e-06,
      "loss": 0.05,
      "step": 199380
    },
    {
      "epoch": 0.32632247337378817,
      "grad_norm": 1.8175972700119019,
      "learning_rate": 9.408946844751357e-06,
      "loss": 0.0507,
      "step": 199400
    },
    {
      "epoch": 0.3263552038124415,
      "grad_norm": 2.5085983276367188,
      "learning_rate": 9.408880952537839e-06,
      "loss": 0.051,
      "step": 199420
    },
    {
      "epoch": 0.32638793425109486,
      "grad_norm": 0.5323766469955444,
      "learning_rate": 9.408815060324323e-06,
      "loss": 0.0497,
      "step": 199440
    },
    {
      "epoch": 0.3264206646897482,
      "grad_norm": 0.7287358641624451,
      "learning_rate": 9.408749168110806e-06,
      "loss": 0.044,
      "step": 199460
    },
    {
      "epoch": 0.3264533951284015,
      "grad_norm": 1.1887669563293457,
      "learning_rate": 9.408683275897288e-06,
      "loss": 0.0319,
      "step": 199480
    },
    {
      "epoch": 0.32648612556705486,
      "grad_norm": 1.6830322742462158,
      "learning_rate": 9.408617383683772e-06,
      "loss": 0.0582,
      "step": 199500
    },
    {
      "epoch": 0.3265188560057082,
      "grad_norm": 0.36228129267692566,
      "learning_rate": 9.408551491470254e-06,
      "loss": 0.058,
      "step": 199520
    },
    {
      "epoch": 0.32655158644436155,
      "grad_norm": 1.3196187019348145,
      "learning_rate": 9.408485599256737e-06,
      "loss": 0.0661,
      "step": 199540
    },
    {
      "epoch": 0.32658431688301487,
      "grad_norm": 5.0337371826171875,
      "learning_rate": 9.408419707043219e-06,
      "loss": 0.0575,
      "step": 199560
    },
    {
      "epoch": 0.3266170473216682,
      "grad_norm": 0.8868515491485596,
      "learning_rate": 9.408353814829703e-06,
      "loss": 0.0398,
      "step": 199580
    },
    {
      "epoch": 0.32664977776032156,
      "grad_norm": 3.494537591934204,
      "learning_rate": 9.408287922616185e-06,
      "loss": 0.0488,
      "step": 199600
    },
    {
      "epoch": 0.3266825081989749,
      "grad_norm": 2.059201717376709,
      "learning_rate": 9.408222030402668e-06,
      "loss": 0.0593,
      "step": 199620
    },
    {
      "epoch": 0.32671523863762825,
      "grad_norm": 1.4336012601852417,
      "learning_rate": 9.40815613818915e-06,
      "loss": 0.0659,
      "step": 199640
    },
    {
      "epoch": 0.32674796907628156,
      "grad_norm": 0.2932310104370117,
      "learning_rate": 9.408090245975634e-06,
      "loss": 0.0578,
      "step": 199660
    },
    {
      "epoch": 0.3267806995149349,
      "grad_norm": 1.4243963956832886,
      "learning_rate": 9.408024353762117e-06,
      "loss": 0.0735,
      "step": 199680
    },
    {
      "epoch": 0.32681342995358825,
      "grad_norm": 2.4155263900756836,
      "learning_rate": 9.407958461548599e-06,
      "loss": 0.0548,
      "step": 199700
    },
    {
      "epoch": 0.32684616039224157,
      "grad_norm": 1.455316185951233,
      "learning_rate": 9.407892569335083e-06,
      "loss": 0.0348,
      "step": 199720
    },
    {
      "epoch": 0.32687889083089494,
      "grad_norm": 1.7692333459854126,
      "learning_rate": 9.407826677121566e-06,
      "loss": 0.0553,
      "step": 199740
    },
    {
      "epoch": 0.32691162126954826,
      "grad_norm": 0.9712076187133789,
      "learning_rate": 9.407760784908048e-06,
      "loss": 0.053,
      "step": 199760
    },
    {
      "epoch": 0.3269443517082016,
      "grad_norm": 1.4506454467773438,
      "learning_rate": 9.407694892694532e-06,
      "loss": 0.0449,
      "step": 199780
    },
    {
      "epoch": 0.32697708214685495,
      "grad_norm": 1.256187915802002,
      "learning_rate": 9.407629000481015e-06,
      "loss": 0.0553,
      "step": 199800
    },
    {
      "epoch": 0.32700981258550826,
      "grad_norm": 4.4112868309021,
      "learning_rate": 9.407563108267497e-06,
      "loss": 0.0646,
      "step": 199820
    },
    {
      "epoch": 0.32704254302416164,
      "grad_norm": 2.463837146759033,
      "learning_rate": 9.40749721605398e-06,
      "loss": 0.0619,
      "step": 199840
    },
    {
      "epoch": 0.32707527346281495,
      "grad_norm": 0.893298327922821,
      "learning_rate": 9.407431323840463e-06,
      "loss": 0.0482,
      "step": 199860
    },
    {
      "epoch": 0.32710800390146827,
      "grad_norm": 6.263863563537598,
      "learning_rate": 9.407365431626946e-06,
      "loss": 0.0443,
      "step": 199880
    },
    {
      "epoch": 0.32714073434012164,
      "grad_norm": 1.9286985397338867,
      "learning_rate": 9.407299539413428e-06,
      "loss": 0.0386,
      "step": 199900
    },
    {
      "epoch": 0.32717346477877496,
      "grad_norm": 2.1803197860717773,
      "learning_rate": 9.407233647199912e-06,
      "loss": 0.0431,
      "step": 199920
    },
    {
      "epoch": 0.32720619521742833,
      "grad_norm": 0.8921597599983215,
      "learning_rate": 9.407167754986394e-06,
      "loss": 0.0455,
      "step": 199940
    },
    {
      "epoch": 0.32723892565608165,
      "grad_norm": 3.208559989929199,
      "learning_rate": 9.407101862772877e-06,
      "loss": 0.0381,
      "step": 199960
    },
    {
      "epoch": 0.32727165609473496,
      "grad_norm": 2.124441146850586,
      "learning_rate": 9.407035970559359e-06,
      "loss": 0.0569,
      "step": 199980
    },
    {
      "epoch": 0.32730438653338834,
      "grad_norm": 0.7178069949150085,
      "learning_rate": 9.406970078345843e-06,
      "loss": 0.048,
      "step": 200000
    },
    {
      "epoch": 0.32730438653338834,
      "eval_loss": 0.025067325681447983,
      "eval_runtime": 6516.2754,
      "eval_samples_per_second": 157.737,
      "eval_steps_per_second": 15.774,
      "eval_sts-dev_pearson_cosine": 0.9388005432239385,
      "eval_sts-dev_spearman_cosine": 0.8676226093963929,
      "step": 200000
    },
    {
      "epoch": 0.32733711697204165,
      "grad_norm": 2.156061887741089,
      "learning_rate": 9.406904186132325e-06,
      "loss": 0.0582,
      "step": 200020
    },
    {
      "epoch": 0.327369847410695,
      "grad_norm": 0.9801411628723145,
      "learning_rate": 9.406838293918808e-06,
      "loss": 0.0513,
      "step": 200040
    },
    {
      "epoch": 0.32740257784934834,
      "grad_norm": 0.6660376787185669,
      "learning_rate": 9.40677240170529e-06,
      "loss": 0.0584,
      "step": 200060
    },
    {
      "epoch": 0.32743530828800166,
      "grad_norm": 0.7824437022209167,
      "learning_rate": 9.406706509491774e-06,
      "loss": 0.0667,
      "step": 200080
    },
    {
      "epoch": 0.32746803872665503,
      "grad_norm": 2.3622894287109375,
      "learning_rate": 9.406640617278257e-06,
      "loss": 0.0471,
      "step": 200100
    },
    {
      "epoch": 0.32750076916530835,
      "grad_norm": 1.657612681388855,
      "learning_rate": 9.406574725064739e-06,
      "loss": 0.0581,
      "step": 200120
    },
    {
      "epoch": 0.3275334996039617,
      "grad_norm": 2.2851617336273193,
      "learning_rate": 9.406508832851223e-06,
      "loss": 0.0609,
      "step": 200140
    },
    {
      "epoch": 0.32756623004261504,
      "grad_norm": 1.7970764636993408,
      "learning_rate": 9.406442940637706e-06,
      "loss": 0.0438,
      "step": 200160
    },
    {
      "epoch": 0.32759896048126835,
      "grad_norm": 0.37924331426620483,
      "learning_rate": 9.406377048424188e-06,
      "loss": 0.056,
      "step": 200180
    },
    {
      "epoch": 0.3276316909199217,
      "grad_norm": 1.8700311183929443,
      "learning_rate": 9.406311156210672e-06,
      "loss": 0.0594,
      "step": 200200
    },
    {
      "epoch": 0.32766442135857504,
      "grad_norm": 1.4282505512237549,
      "learning_rate": 9.406245263997155e-06,
      "loss": 0.05,
      "step": 200220
    },
    {
      "epoch": 0.32769715179722836,
      "grad_norm": 9.458130836486816,
      "learning_rate": 9.406179371783637e-06,
      "loss": 0.044,
      "step": 200240
    },
    {
      "epoch": 0.32772988223588173,
      "grad_norm": 2.1663451194763184,
      "learning_rate": 9.40611347957012e-06,
      "loss": 0.0528,
      "step": 200260
    },
    {
      "epoch": 0.32776261267453505,
      "grad_norm": 1.6258795261383057,
      "learning_rate": 9.406047587356603e-06,
      "loss": 0.0521,
      "step": 200280
    },
    {
      "epoch": 0.3277953431131884,
      "grad_norm": 4.38129186630249,
      "learning_rate": 9.405981695143086e-06,
      "loss": 0.0511,
      "step": 200300
    },
    {
      "epoch": 0.32782807355184174,
      "grad_norm": 1.568132758140564,
      "learning_rate": 9.405915802929568e-06,
      "loss": 0.0352,
      "step": 200320
    },
    {
      "epoch": 0.32786080399049505,
      "grad_norm": 3.093127965927124,
      "learning_rate": 9.405849910716052e-06,
      "loss": 0.054,
      "step": 200340
    },
    {
      "epoch": 0.3278935344291484,
      "grad_norm": 1.3157565593719482,
      "learning_rate": 9.405784018502534e-06,
      "loss": 0.0576,
      "step": 200360
    },
    {
      "epoch": 0.32792626486780174,
      "grad_norm": 1.4885516166687012,
      "learning_rate": 9.405718126289017e-06,
      "loss": 0.0596,
      "step": 200380
    },
    {
      "epoch": 0.3279589953064551,
      "grad_norm": 2.4353673458099365,
      "learning_rate": 9.4056522340755e-06,
      "loss": 0.0573,
      "step": 200400
    },
    {
      "epoch": 0.32799172574510843,
      "grad_norm": 0.7293978333473206,
      "learning_rate": 9.405586341861983e-06,
      "loss": 0.046,
      "step": 200420
    },
    {
      "epoch": 0.32802445618376175,
      "grad_norm": 1.9560526609420776,
      "learning_rate": 9.405520449648465e-06,
      "loss": 0.057,
      "step": 200440
    },
    {
      "epoch": 0.3280571866224151,
      "grad_norm": 2.260014295578003,
      "learning_rate": 9.405454557434948e-06,
      "loss": 0.063,
      "step": 200460
    },
    {
      "epoch": 0.32808991706106844,
      "grad_norm": 1.4739301204681396,
      "learning_rate": 9.405388665221432e-06,
      "loss": 0.0672,
      "step": 200480
    },
    {
      "epoch": 0.3281226474997218,
      "grad_norm": 2.837005138397217,
      "learning_rate": 9.405322773007914e-06,
      "loss": 0.0502,
      "step": 200500
    },
    {
      "epoch": 0.3281553779383751,
      "grad_norm": 2.072441816329956,
      "learning_rate": 9.405256880794397e-06,
      "loss": 0.0599,
      "step": 200520
    },
    {
      "epoch": 0.32818810837702844,
      "grad_norm": 1.698055624961853,
      "learning_rate": 9.405190988580881e-06,
      "loss": 0.0492,
      "step": 200540
    },
    {
      "epoch": 0.3282208388156818,
      "grad_norm": 5.085409641265869,
      "learning_rate": 9.405125096367363e-06,
      "loss": 0.0566,
      "step": 200560
    },
    {
      "epoch": 0.32825356925433513,
      "grad_norm": 0.789597749710083,
      "learning_rate": 9.405059204153846e-06,
      "loss": 0.0582,
      "step": 200580
    },
    {
      "epoch": 0.3282862996929885,
      "grad_norm": 1.7711718082427979,
      "learning_rate": 9.40499331194033e-06,
      "loss": 0.0519,
      "step": 200600
    },
    {
      "epoch": 0.3283190301316418,
      "grad_norm": 2.2049388885498047,
      "learning_rate": 9.404927419726812e-06,
      "loss": 0.0484,
      "step": 200620
    },
    {
      "epoch": 0.32835176057029514,
      "grad_norm": 2.447420358657837,
      "learning_rate": 9.404861527513295e-06,
      "loss": 0.0541,
      "step": 200640
    },
    {
      "epoch": 0.3283844910089485,
      "grad_norm": 4.753474712371826,
      "learning_rate": 9.404795635299777e-06,
      "loss": 0.0601,
      "step": 200660
    },
    {
      "epoch": 0.3284172214476018,
      "grad_norm": 2.2866170406341553,
      "learning_rate": 9.404729743086261e-06,
      "loss": 0.0558,
      "step": 200680
    },
    {
      "epoch": 0.3284499518862552,
      "grad_norm": 1.7961933612823486,
      "learning_rate": 9.404663850872743e-06,
      "loss": 0.055,
      "step": 200700
    },
    {
      "epoch": 0.3284826823249085,
      "grad_norm": 2.3860228061676025,
      "learning_rate": 9.404597958659226e-06,
      "loss": 0.0516,
      "step": 200720
    },
    {
      "epoch": 0.32851541276356183,
      "grad_norm": 2.35665225982666,
      "learning_rate": 9.404532066445708e-06,
      "loss": 0.0481,
      "step": 200740
    },
    {
      "epoch": 0.3285481432022152,
      "grad_norm": 0.9472538232803345,
      "learning_rate": 9.404466174232192e-06,
      "loss": 0.0498,
      "step": 200760
    },
    {
      "epoch": 0.3285808736408685,
      "grad_norm": 1.6648041009902954,
      "learning_rate": 9.404400282018674e-06,
      "loss": 0.0442,
      "step": 200780
    },
    {
      "epoch": 0.3286136040795219,
      "grad_norm": 1.7819877862930298,
      "learning_rate": 9.404334389805157e-06,
      "loss": 0.0624,
      "step": 200800
    },
    {
      "epoch": 0.3286463345181752,
      "grad_norm": 1.2212687730789185,
      "learning_rate": 9.40426849759164e-06,
      "loss": 0.0517,
      "step": 200820
    },
    {
      "epoch": 0.3286790649568285,
      "grad_norm": 0.7648096084594727,
      "learning_rate": 9.404202605378123e-06,
      "loss": 0.0477,
      "step": 200840
    },
    {
      "epoch": 0.3287117953954819,
      "grad_norm": 0.6516290903091431,
      "learning_rate": 9.404136713164606e-06,
      "loss": 0.0512,
      "step": 200860
    },
    {
      "epoch": 0.3287445258341352,
      "grad_norm": 3.3714635372161865,
      "learning_rate": 9.404070820951088e-06,
      "loss": 0.0544,
      "step": 200880
    },
    {
      "epoch": 0.3287772562727886,
      "grad_norm": 3.965728998184204,
      "learning_rate": 9.404004928737572e-06,
      "loss": 0.059,
      "step": 200900
    },
    {
      "epoch": 0.3288099867114419,
      "grad_norm": 0.9707949757575989,
      "learning_rate": 9.403939036524054e-06,
      "loss": 0.0667,
      "step": 200920
    },
    {
      "epoch": 0.3288427171500952,
      "grad_norm": 3.991528272628784,
      "learning_rate": 9.403873144310537e-06,
      "loss": 0.0501,
      "step": 200940
    },
    {
      "epoch": 0.3288754475887486,
      "grad_norm": 1.09275221824646,
      "learning_rate": 9.403807252097021e-06,
      "loss": 0.045,
      "step": 200960
    },
    {
      "epoch": 0.3289081780274019,
      "grad_norm": 5.076295852661133,
      "learning_rate": 9.403741359883503e-06,
      "loss": 0.0413,
      "step": 200980
    },
    {
      "epoch": 0.3289409084660553,
      "grad_norm": 1.1320871114730835,
      "learning_rate": 9.403675467669986e-06,
      "loss": 0.0565,
      "step": 201000
    },
    {
      "epoch": 0.3289736389047086,
      "grad_norm": 1.8725919723510742,
      "learning_rate": 9.40360957545647e-06,
      "loss": 0.0528,
      "step": 201020
    },
    {
      "epoch": 0.3290063693433619,
      "grad_norm": 1.807519793510437,
      "learning_rate": 9.403543683242952e-06,
      "loss": 0.053,
      "step": 201040
    },
    {
      "epoch": 0.3290390997820153,
      "grad_norm": 1.1030787229537964,
      "learning_rate": 9.403477791029436e-06,
      "loss": 0.0644,
      "step": 201060
    },
    {
      "epoch": 0.3290718302206686,
      "grad_norm": 1.0309247970581055,
      "learning_rate": 9.403411898815917e-06,
      "loss": 0.0309,
      "step": 201080
    },
    {
      "epoch": 0.329104560659322,
      "grad_norm": 3.5883748531341553,
      "learning_rate": 9.403346006602401e-06,
      "loss": 0.0619,
      "step": 201100
    },
    {
      "epoch": 0.3291372910979753,
      "grad_norm": 4.562214374542236,
      "learning_rate": 9.403280114388883e-06,
      "loss": 0.05,
      "step": 201120
    },
    {
      "epoch": 0.3291700215366286,
      "grad_norm": 3.203533411026001,
      "learning_rate": 9.403214222175366e-06,
      "loss": 0.0473,
      "step": 201140
    },
    {
      "epoch": 0.329202751975282,
      "grad_norm": 2.0279359817504883,
      "learning_rate": 9.403148329961848e-06,
      "loss": 0.0481,
      "step": 201160
    },
    {
      "epoch": 0.3292354824139353,
      "grad_norm": 3.003324031829834,
      "learning_rate": 9.403082437748332e-06,
      "loss": 0.0517,
      "step": 201180
    },
    {
      "epoch": 0.32926821285258867,
      "grad_norm": 1.1311869621276855,
      "learning_rate": 9.403016545534816e-06,
      "loss": 0.0566,
      "step": 201200
    },
    {
      "epoch": 0.329300943291242,
      "grad_norm": 1.8635491132736206,
      "learning_rate": 9.402950653321297e-06,
      "loss": 0.0461,
      "step": 201220
    },
    {
      "epoch": 0.3293336737298953,
      "grad_norm": 3.4888417720794678,
      "learning_rate": 9.402884761107781e-06,
      "loss": 0.0577,
      "step": 201240
    },
    {
      "epoch": 0.3293664041685487,
      "grad_norm": 2.4572031497955322,
      "learning_rate": 9.402818868894263e-06,
      "loss": 0.0456,
      "step": 201260
    },
    {
      "epoch": 0.329399134607202,
      "grad_norm": 1.7125617265701294,
      "learning_rate": 9.402752976680746e-06,
      "loss": 0.0475,
      "step": 201280
    },
    {
      "epoch": 0.32943186504585537,
      "grad_norm": 1.9636987447738647,
      "learning_rate": 9.402687084467228e-06,
      "loss": 0.0459,
      "step": 201300
    },
    {
      "epoch": 0.3294645954845087,
      "grad_norm": 0.8649392127990723,
      "learning_rate": 9.402621192253712e-06,
      "loss": 0.0422,
      "step": 201320
    },
    {
      "epoch": 0.329497325923162,
      "grad_norm": 0.20065122842788696,
      "learning_rate": 9.402555300040196e-06,
      "loss": 0.0679,
      "step": 201340
    },
    {
      "epoch": 0.32953005636181537,
      "grad_norm": 1.1908541917800903,
      "learning_rate": 9.402489407826677e-06,
      "loss": 0.0483,
      "step": 201360
    },
    {
      "epoch": 0.3295627868004687,
      "grad_norm": 2.145233154296875,
      "learning_rate": 9.402423515613161e-06,
      "loss": 0.0572,
      "step": 201380
    },
    {
      "epoch": 0.32959551723912206,
      "grad_norm": 1.0555181503295898,
      "learning_rate": 9.402357623399645e-06,
      "loss": 0.0468,
      "step": 201400
    },
    {
      "epoch": 0.3296282476777754,
      "grad_norm": 1.0639121532440186,
      "learning_rate": 9.402291731186127e-06,
      "loss": 0.0445,
      "step": 201420
    },
    {
      "epoch": 0.3296609781164287,
      "grad_norm": 1.222049593925476,
      "learning_rate": 9.40222583897261e-06,
      "loss": 0.046,
      "step": 201440
    },
    {
      "epoch": 0.32969370855508207,
      "grad_norm": 2.15559983253479,
      "learning_rate": 9.402159946759092e-06,
      "loss": 0.0417,
      "step": 201460
    },
    {
      "epoch": 0.3297264389937354,
      "grad_norm": 3.1236019134521484,
      "learning_rate": 9.402094054545576e-06,
      "loss": 0.0624,
      "step": 201480
    },
    {
      "epoch": 0.32975916943238875,
      "grad_norm": 3.697333812713623,
      "learning_rate": 9.402028162332057e-06,
      "loss": 0.0556,
      "step": 201500
    },
    {
      "epoch": 0.32979189987104207,
      "grad_norm": 0.7318956851959229,
      "learning_rate": 9.401962270118541e-06,
      "loss": 0.0511,
      "step": 201520
    },
    {
      "epoch": 0.3298246303096954,
      "grad_norm": 0.6948199272155762,
      "learning_rate": 9.401896377905023e-06,
      "loss": 0.0475,
      "step": 201540
    },
    {
      "epoch": 0.32985736074834876,
      "grad_norm": 1.126114845275879,
      "learning_rate": 9.401830485691507e-06,
      "loss": 0.0417,
      "step": 201560
    },
    {
      "epoch": 0.3298900911870021,
      "grad_norm": 0.9014883637428284,
      "learning_rate": 9.40176459347799e-06,
      "loss": 0.0383,
      "step": 201580
    },
    {
      "epoch": 0.32992282162565545,
      "grad_norm": 2.6961331367492676,
      "learning_rate": 9.401698701264472e-06,
      "loss": 0.0495,
      "step": 201600
    },
    {
      "epoch": 0.32995555206430877,
      "grad_norm": 1.061332106590271,
      "learning_rate": 9.401632809050956e-06,
      "loss": 0.0481,
      "step": 201620
    },
    {
      "epoch": 0.3299882825029621,
      "grad_norm": 4.68845272064209,
      "learning_rate": 9.401566916837438e-06,
      "loss": 0.0546,
      "step": 201640
    },
    {
      "epoch": 0.33002101294161545,
      "grad_norm": 0.5238202214241028,
      "learning_rate": 9.401501024623921e-06,
      "loss": 0.0588,
      "step": 201660
    },
    {
      "epoch": 0.33005374338026877,
      "grad_norm": 0.38767942786216736,
      "learning_rate": 9.401435132410403e-06,
      "loss": 0.0426,
      "step": 201680
    },
    {
      "epoch": 0.33008647381892214,
      "grad_norm": 2.0929696559906006,
      "learning_rate": 9.401369240196887e-06,
      "loss": 0.0545,
      "step": 201700
    },
    {
      "epoch": 0.33011920425757546,
      "grad_norm": 1.364861011505127,
      "learning_rate": 9.401303347983368e-06,
      "loss": 0.0598,
      "step": 201720
    },
    {
      "epoch": 0.3301519346962288,
      "grad_norm": 0.5153960585594177,
      "learning_rate": 9.401237455769852e-06,
      "loss": 0.0341,
      "step": 201740
    },
    {
      "epoch": 0.33018466513488215,
      "grad_norm": 1.9153131246566772,
      "learning_rate": 9.401171563556336e-06,
      "loss": 0.0538,
      "step": 201760
    },
    {
      "epoch": 0.33021739557353547,
      "grad_norm": 0.9681320786476135,
      "learning_rate": 9.401105671342818e-06,
      "loss": 0.0551,
      "step": 201780
    },
    {
      "epoch": 0.33025012601218884,
      "grad_norm": 1.6873513460159302,
      "learning_rate": 9.401039779129301e-06,
      "loss": 0.0593,
      "step": 201800
    },
    {
      "epoch": 0.33028285645084215,
      "grad_norm": 0.6820811033248901,
      "learning_rate": 9.400973886915785e-06,
      "loss": 0.0601,
      "step": 201820
    },
    {
      "epoch": 0.33031558688949547,
      "grad_norm": 33.01420974731445,
      "learning_rate": 9.400907994702267e-06,
      "loss": 0.0526,
      "step": 201840
    },
    {
      "epoch": 0.33034831732814884,
      "grad_norm": 1.63428795337677,
      "learning_rate": 9.40084210248875e-06,
      "loss": 0.0514,
      "step": 201860
    },
    {
      "epoch": 0.33038104776680216,
      "grad_norm": 0.9290302991867065,
      "learning_rate": 9.400776210275232e-06,
      "loss": 0.0389,
      "step": 201880
    },
    {
      "epoch": 0.33041377820545553,
      "grad_norm": 1.9472332000732422,
      "learning_rate": 9.400710318061716e-06,
      "loss": 0.0502,
      "step": 201900
    },
    {
      "epoch": 0.33044650864410885,
      "grad_norm": 1.2991421222686768,
      "learning_rate": 9.4006444258482e-06,
      "loss": 0.0456,
      "step": 201920
    },
    {
      "epoch": 0.33047923908276217,
      "grad_norm": 2.8868393898010254,
      "learning_rate": 9.400578533634681e-06,
      "loss": 0.054,
      "step": 201940
    },
    {
      "epoch": 0.33051196952141554,
      "grad_norm": 3.5515778064727783,
      "learning_rate": 9.400512641421165e-06,
      "loss": 0.0558,
      "step": 201960
    },
    {
      "epoch": 0.33054469996006886,
      "grad_norm": 3.962089776992798,
      "learning_rate": 9.400446749207647e-06,
      "loss": 0.0525,
      "step": 201980
    },
    {
      "epoch": 0.3305774303987222,
      "grad_norm": 1.5693857669830322,
      "learning_rate": 9.40038085699413e-06,
      "loss": 0.0539,
      "step": 202000
    },
    {
      "epoch": 0.33061016083737554,
      "grad_norm": 1.779882550239563,
      "learning_rate": 9.400314964780612e-06,
      "loss": 0.0433,
      "step": 202020
    },
    {
      "epoch": 0.33064289127602886,
      "grad_norm": 0.9026069045066833,
      "learning_rate": 9.400249072567096e-06,
      "loss": 0.0362,
      "step": 202040
    },
    {
      "epoch": 0.33067562171468223,
      "grad_norm": 4.397663116455078,
      "learning_rate": 9.400183180353578e-06,
      "loss": 0.0486,
      "step": 202060
    },
    {
      "epoch": 0.33070835215333555,
      "grad_norm": 0.46187910437583923,
      "learning_rate": 9.400117288140061e-06,
      "loss": 0.0439,
      "step": 202080
    },
    {
      "epoch": 0.3307410825919889,
      "grad_norm": 1.947962999343872,
      "learning_rate": 9.400051395926543e-06,
      "loss": 0.0494,
      "step": 202100
    },
    {
      "epoch": 0.33077381303064224,
      "grad_norm": 1.7105746269226074,
      "learning_rate": 9.399985503713027e-06,
      "loss": 0.0405,
      "step": 202120
    },
    {
      "epoch": 0.33080654346929556,
      "grad_norm": 1.9909065961837769,
      "learning_rate": 9.39991961149951e-06,
      "loss": 0.0567,
      "step": 202140
    },
    {
      "epoch": 0.3308392739079489,
      "grad_norm": 2.0382533073425293,
      "learning_rate": 9.399853719285992e-06,
      "loss": 0.0425,
      "step": 202160
    },
    {
      "epoch": 0.33087200434660224,
      "grad_norm": 0.563291072845459,
      "learning_rate": 9.399787827072476e-06,
      "loss": 0.0405,
      "step": 202180
    },
    {
      "epoch": 0.3309047347852556,
      "grad_norm": 0.31398719549179077,
      "learning_rate": 9.39972193485896e-06,
      "loss": 0.044,
      "step": 202200
    },
    {
      "epoch": 0.33093746522390893,
      "grad_norm": 1.6273269653320312,
      "learning_rate": 9.399656042645441e-06,
      "loss": 0.044,
      "step": 202220
    },
    {
      "epoch": 0.33097019566256225,
      "grad_norm": 1.0465686321258545,
      "learning_rate": 9.399590150431925e-06,
      "loss": 0.0403,
      "step": 202240
    },
    {
      "epoch": 0.3310029261012156,
      "grad_norm": 1.3161102533340454,
      "learning_rate": 9.399524258218408e-06,
      "loss": 0.0468,
      "step": 202260
    },
    {
      "epoch": 0.33103565653986894,
      "grad_norm": 1.682244896888733,
      "learning_rate": 9.39945836600489e-06,
      "loss": 0.04,
      "step": 202280
    },
    {
      "epoch": 0.3310683869785223,
      "grad_norm": 2.803748369216919,
      "learning_rate": 9.399392473791374e-06,
      "loss": 0.0504,
      "step": 202300
    },
    {
      "epoch": 0.3311011174171756,
      "grad_norm": 1.2220968008041382,
      "learning_rate": 9.399326581577856e-06,
      "loss": 0.0357,
      "step": 202320
    },
    {
      "epoch": 0.33113384785582894,
      "grad_norm": 3.6180832386016846,
      "learning_rate": 9.39926068936434e-06,
      "loss": 0.0629,
      "step": 202340
    },
    {
      "epoch": 0.3311665782944823,
      "grad_norm": 1.7480043172836304,
      "learning_rate": 9.399194797150821e-06,
      "loss": 0.0566,
      "step": 202360
    },
    {
      "epoch": 0.33119930873313563,
      "grad_norm": 0.9769600033760071,
      "learning_rate": 9.399128904937305e-06,
      "loss": 0.0497,
      "step": 202380
    },
    {
      "epoch": 0.331232039171789,
      "grad_norm": 2.5525054931640625,
      "learning_rate": 9.399063012723787e-06,
      "loss": 0.047,
      "step": 202400
    },
    {
      "epoch": 0.3312647696104423,
      "grad_norm": 3.6055517196655273,
      "learning_rate": 9.39899712051027e-06,
      "loss": 0.0483,
      "step": 202420
    },
    {
      "epoch": 0.33129750004909564,
      "grad_norm": 2.954958915710449,
      "learning_rate": 9.398931228296752e-06,
      "loss": 0.0682,
      "step": 202440
    },
    {
      "epoch": 0.331330230487749,
      "grad_norm": 1.4200962781906128,
      "learning_rate": 9.398865336083236e-06,
      "loss": 0.0591,
      "step": 202460
    },
    {
      "epoch": 0.33136296092640233,
      "grad_norm": 1.2862153053283691,
      "learning_rate": 9.398799443869718e-06,
      "loss": 0.0563,
      "step": 202480
    },
    {
      "epoch": 0.3313956913650557,
      "grad_norm": 0.9576972723007202,
      "learning_rate": 9.398733551656201e-06,
      "loss": 0.0531,
      "step": 202500
    },
    {
      "epoch": 0.331428421803709,
      "grad_norm": 1.0102043151855469,
      "learning_rate": 9.398667659442685e-06,
      "loss": 0.0445,
      "step": 202520
    },
    {
      "epoch": 0.33146115224236233,
      "grad_norm": 1.5729639530181885,
      "learning_rate": 9.398601767229167e-06,
      "loss": 0.0481,
      "step": 202540
    },
    {
      "epoch": 0.3314938826810157,
      "grad_norm": 0.5596566200256348,
      "learning_rate": 9.39853587501565e-06,
      "loss": 0.0386,
      "step": 202560
    },
    {
      "epoch": 0.331526613119669,
      "grad_norm": 2.6325275897979736,
      "learning_rate": 9.398469982802134e-06,
      "loss": 0.0563,
      "step": 202580
    },
    {
      "epoch": 0.3315593435583224,
      "grad_norm": 3.856034994125366,
      "learning_rate": 9.398404090588616e-06,
      "loss": 0.0676,
      "step": 202600
    },
    {
      "epoch": 0.3315920739969757,
      "grad_norm": 0.9275215268135071,
      "learning_rate": 9.3983381983751e-06,
      "loss": 0.0464,
      "step": 202620
    },
    {
      "epoch": 0.33162480443562903,
      "grad_norm": 2.131425142288208,
      "learning_rate": 9.398272306161583e-06,
      "loss": 0.0677,
      "step": 202640
    },
    {
      "epoch": 0.3316575348742824,
      "grad_norm": 0.3821883797645569,
      "learning_rate": 9.398206413948065e-06,
      "loss": 0.0568,
      "step": 202660
    },
    {
      "epoch": 0.3316902653129357,
      "grad_norm": 2.009200096130371,
      "learning_rate": 9.398140521734548e-06,
      "loss": 0.0558,
      "step": 202680
    },
    {
      "epoch": 0.3317229957515891,
      "grad_norm": 2.216829538345337,
      "learning_rate": 9.39807462952103e-06,
      "loss": 0.0475,
      "step": 202700
    },
    {
      "epoch": 0.3317557261902424,
      "grad_norm": 1.7054128646850586,
      "learning_rate": 9.398008737307514e-06,
      "loss": 0.0516,
      "step": 202720
    },
    {
      "epoch": 0.3317884566288957,
      "grad_norm": 0.37511640787124634,
      "learning_rate": 9.397942845093996e-06,
      "loss": 0.0463,
      "step": 202740
    },
    {
      "epoch": 0.3318211870675491,
      "grad_norm": 1.746824026107788,
      "learning_rate": 9.39787695288048e-06,
      "loss": 0.0439,
      "step": 202760
    },
    {
      "epoch": 0.3318539175062024,
      "grad_norm": 1.5265883207321167,
      "learning_rate": 9.397811060666961e-06,
      "loss": 0.069,
      "step": 202780
    },
    {
      "epoch": 0.3318866479448558,
      "grad_norm": 0.7474348545074463,
      "learning_rate": 9.397745168453445e-06,
      "loss": 0.0594,
      "step": 202800
    },
    {
      "epoch": 0.3319193783835091,
      "grad_norm": 2.1713006496429443,
      "learning_rate": 9.397679276239927e-06,
      "loss": 0.0476,
      "step": 202820
    },
    {
      "epoch": 0.3319521088221624,
      "grad_norm": 2.6823854446411133,
      "learning_rate": 9.39761338402641e-06,
      "loss": 0.035,
      "step": 202840
    },
    {
      "epoch": 0.3319848392608158,
      "grad_norm": 1.6451786756515503,
      "learning_rate": 9.397547491812892e-06,
      "loss": 0.068,
      "step": 202860
    },
    {
      "epoch": 0.3320175696994691,
      "grad_norm": 4.653789520263672,
      "learning_rate": 9.397481599599376e-06,
      "loss": 0.0635,
      "step": 202880
    },
    {
      "epoch": 0.3320503001381225,
      "grad_norm": 1.2674845457077026,
      "learning_rate": 9.397415707385858e-06,
      "loss": 0.0507,
      "step": 202900
    },
    {
      "epoch": 0.3320830305767758,
      "grad_norm": 1.7746983766555786,
      "learning_rate": 9.397349815172341e-06,
      "loss": 0.05,
      "step": 202920
    },
    {
      "epoch": 0.3321157610154291,
      "grad_norm": 0.6549530625343323,
      "learning_rate": 9.397283922958825e-06,
      "loss": 0.0501,
      "step": 202940
    },
    {
      "epoch": 0.3321484914540825,
      "grad_norm": 0.5161020159721375,
      "learning_rate": 9.397218030745307e-06,
      "loss": 0.0537,
      "step": 202960
    },
    {
      "epoch": 0.3321812218927358,
      "grad_norm": 2.1617400646209717,
      "learning_rate": 9.39715213853179e-06,
      "loss": 0.0521,
      "step": 202980
    },
    {
      "epoch": 0.3322139523313892,
      "grad_norm": 0.24778957664966583,
      "learning_rate": 9.397086246318274e-06,
      "loss": 0.0483,
      "step": 203000
    },
    {
      "epoch": 0.3322466827700425,
      "grad_norm": 1.2562966346740723,
      "learning_rate": 9.397020354104756e-06,
      "loss": 0.0545,
      "step": 203020
    },
    {
      "epoch": 0.3322794132086958,
      "grad_norm": 6.960099220275879,
      "learning_rate": 9.39695446189124e-06,
      "loss": 0.0442,
      "step": 203040
    },
    {
      "epoch": 0.3323121436473492,
      "grad_norm": 3.224517583847046,
      "learning_rate": 9.396888569677723e-06,
      "loss": 0.0479,
      "step": 203060
    },
    {
      "epoch": 0.3323448740860025,
      "grad_norm": 1.372629165649414,
      "learning_rate": 9.396822677464205e-06,
      "loss": 0.0555,
      "step": 203080
    },
    {
      "epoch": 0.3323776045246558,
      "grad_norm": 6.1662092208862305,
      "learning_rate": 9.396756785250689e-06,
      "loss": 0.0581,
      "step": 203100
    },
    {
      "epoch": 0.3324103349633092,
      "grad_norm": 1.5525860786437988,
      "learning_rate": 9.39669089303717e-06,
      "loss": 0.0503,
      "step": 203120
    },
    {
      "epoch": 0.3324430654019625,
      "grad_norm": 0.5813891291618347,
      "learning_rate": 9.396625000823654e-06,
      "loss": 0.0502,
      "step": 203140
    },
    {
      "epoch": 0.3324757958406159,
      "grad_norm": 4.169534683227539,
      "learning_rate": 9.396559108610136e-06,
      "loss": 0.073,
      "step": 203160
    },
    {
      "epoch": 0.3325085262792692,
      "grad_norm": 4.087433338165283,
      "learning_rate": 9.39649321639662e-06,
      "loss": 0.0469,
      "step": 203180
    },
    {
      "epoch": 0.3325412567179225,
      "grad_norm": 7.250362873077393,
      "learning_rate": 9.396427324183101e-06,
      "loss": 0.0555,
      "step": 203200
    },
    {
      "epoch": 0.3325739871565759,
      "grad_norm": 2.6511402130126953,
      "learning_rate": 9.396361431969585e-06,
      "loss": 0.0571,
      "step": 203220
    },
    {
      "epoch": 0.3326067175952292,
      "grad_norm": 1.783492922782898,
      "learning_rate": 9.396295539756067e-06,
      "loss": 0.062,
      "step": 203240
    },
    {
      "epoch": 0.33263944803388257,
      "grad_norm": 2.0912957191467285,
      "learning_rate": 9.39622964754255e-06,
      "loss": 0.0393,
      "step": 203260
    },
    {
      "epoch": 0.3326721784725359,
      "grad_norm": 0.9683249592781067,
      "learning_rate": 9.396163755329032e-06,
      "loss": 0.0536,
      "step": 203280
    },
    {
      "epoch": 0.3327049089111892,
      "grad_norm": 0.5171132683753967,
      "learning_rate": 9.396097863115516e-06,
      "loss": 0.0542,
      "step": 203300
    },
    {
      "epoch": 0.3327376393498426,
      "grad_norm": 0.6684024333953857,
      "learning_rate": 9.396031970902e-06,
      "loss": 0.037,
      "step": 203320
    },
    {
      "epoch": 0.3327703697884959,
      "grad_norm": 1.0819416046142578,
      "learning_rate": 9.395966078688481e-06,
      "loss": 0.0378,
      "step": 203340
    },
    {
      "epoch": 0.33280310022714926,
      "grad_norm": 0.5977455973625183,
      "learning_rate": 9.395900186474965e-06,
      "loss": 0.0549,
      "step": 203360
    },
    {
      "epoch": 0.3328358306658026,
      "grad_norm": 1.5105820894241333,
      "learning_rate": 9.395834294261449e-06,
      "loss": 0.0657,
      "step": 203380
    },
    {
      "epoch": 0.3328685611044559,
      "grad_norm": 1.5878362655639648,
      "learning_rate": 9.39576840204793e-06,
      "loss": 0.0524,
      "step": 203400
    },
    {
      "epoch": 0.33290129154310927,
      "grad_norm": 2.390889883041382,
      "learning_rate": 9.395702509834414e-06,
      "loss": 0.063,
      "step": 203420
    },
    {
      "epoch": 0.3329340219817626,
      "grad_norm": 1.5364360809326172,
      "learning_rate": 9.395636617620898e-06,
      "loss": 0.0413,
      "step": 203440
    },
    {
      "epoch": 0.33296675242041596,
      "grad_norm": 2.0735812187194824,
      "learning_rate": 9.39557072540738e-06,
      "loss": 0.0421,
      "step": 203460
    },
    {
      "epoch": 0.3329994828590693,
      "grad_norm": 0.5767173767089844,
      "learning_rate": 9.395504833193863e-06,
      "loss": 0.0496,
      "step": 203480
    },
    {
      "epoch": 0.3330322132977226,
      "grad_norm": 2.4457693099975586,
      "learning_rate": 9.395438940980345e-06,
      "loss": 0.0507,
      "step": 203500
    },
    {
      "epoch": 0.33306494373637596,
      "grad_norm": 2.3448359966278076,
      "learning_rate": 9.395373048766829e-06,
      "loss": 0.0394,
      "step": 203520
    },
    {
      "epoch": 0.3330976741750293,
      "grad_norm": 5.377236366271973,
      "learning_rate": 9.39530715655331e-06,
      "loss": 0.0462,
      "step": 203540
    },
    {
      "epoch": 0.33313040461368265,
      "grad_norm": 2.475175142288208,
      "learning_rate": 9.395241264339794e-06,
      "loss": 0.0462,
      "step": 203560
    },
    {
      "epoch": 0.33316313505233597,
      "grad_norm": 0.41618770360946655,
      "learning_rate": 9.395175372126276e-06,
      "loss": 0.0535,
      "step": 203580
    },
    {
      "epoch": 0.3331958654909893,
      "grad_norm": 1.079937219619751,
      "learning_rate": 9.39510947991276e-06,
      "loss": 0.0424,
      "step": 203600
    },
    {
      "epoch": 0.33322859592964266,
      "grad_norm": 1.5137121677398682,
      "learning_rate": 9.395043587699241e-06,
      "loss": 0.0481,
      "step": 203620
    },
    {
      "epoch": 0.333261326368296,
      "grad_norm": 1.9391318559646606,
      "learning_rate": 9.394977695485725e-06,
      "loss": 0.0611,
      "step": 203640
    },
    {
      "epoch": 0.33329405680694935,
      "grad_norm": 1.334269642829895,
      "learning_rate": 9.394911803272209e-06,
      "loss": 0.0477,
      "step": 203660
    },
    {
      "epoch": 0.33332678724560266,
      "grad_norm": 0.284807950258255,
      "learning_rate": 9.39484591105869e-06,
      "loss": 0.0634,
      "step": 203680
    },
    {
      "epoch": 0.333359517684256,
      "grad_norm": 0.6724758744239807,
      "learning_rate": 9.394780018845174e-06,
      "loss": 0.0394,
      "step": 203700
    },
    {
      "epoch": 0.33339224812290935,
      "grad_norm": 3.5216102600097656,
      "learning_rate": 9.394714126631656e-06,
      "loss": 0.0584,
      "step": 203720
    },
    {
      "epoch": 0.33342497856156267,
      "grad_norm": 0.9080886244773865,
      "learning_rate": 9.39464823441814e-06,
      "loss": 0.0559,
      "step": 203740
    },
    {
      "epoch": 0.33345770900021604,
      "grad_norm": 1.3972065448760986,
      "learning_rate": 9.394582342204621e-06,
      "loss": 0.0537,
      "step": 203760
    },
    {
      "epoch": 0.33349043943886936,
      "grad_norm": 1.3326191902160645,
      "learning_rate": 9.394516449991105e-06,
      "loss": 0.0548,
      "step": 203780
    },
    {
      "epoch": 0.3335231698775227,
      "grad_norm": 1.2968631982803345,
      "learning_rate": 9.394450557777589e-06,
      "loss": 0.0502,
      "step": 203800
    },
    {
      "epoch": 0.33355590031617605,
      "grad_norm": 0.8144196271896362,
      "learning_rate": 9.39438466556407e-06,
      "loss": 0.0346,
      "step": 203820
    },
    {
      "epoch": 0.33358863075482936,
      "grad_norm": 1.3254603147506714,
      "learning_rate": 9.394318773350554e-06,
      "loss": 0.0541,
      "step": 203840
    },
    {
      "epoch": 0.33362136119348273,
      "grad_norm": 2.58736252784729,
      "learning_rate": 9.394252881137038e-06,
      "loss": 0.0489,
      "step": 203860
    },
    {
      "epoch": 0.33365409163213605,
      "grad_norm": 1.0835539102554321,
      "learning_rate": 9.39418698892352e-06,
      "loss": 0.0435,
      "step": 203880
    },
    {
      "epoch": 0.33368682207078937,
      "grad_norm": 2.0709400177001953,
      "learning_rate": 9.394121096710003e-06,
      "loss": 0.0484,
      "step": 203900
    },
    {
      "epoch": 0.33371955250944274,
      "grad_norm": 1.5284425020217896,
      "learning_rate": 9.394055204496485e-06,
      "loss": 0.0509,
      "step": 203920
    },
    {
      "epoch": 0.33375228294809606,
      "grad_norm": 6.832344055175781,
      "learning_rate": 9.393989312282969e-06,
      "loss": 0.0404,
      "step": 203940
    },
    {
      "epoch": 0.33378501338674943,
      "grad_norm": 1.1891229152679443,
      "learning_rate": 9.39392342006945e-06,
      "loss": 0.0461,
      "step": 203960
    },
    {
      "epoch": 0.33381774382540275,
      "grad_norm": 3.0233237743377686,
      "learning_rate": 9.393857527855934e-06,
      "loss": 0.0588,
      "step": 203980
    },
    {
      "epoch": 0.33385047426405606,
      "grad_norm": 1.0933737754821777,
      "learning_rate": 9.393791635642416e-06,
      "loss": 0.038,
      "step": 204000
    },
    {
      "epoch": 0.33388320470270944,
      "grad_norm": 3.22794246673584,
      "learning_rate": 9.3937257434289e-06,
      "loss": 0.0367,
      "step": 204020
    },
    {
      "epoch": 0.33391593514136275,
      "grad_norm": 1.7234079837799072,
      "learning_rate": 9.393659851215383e-06,
      "loss": 0.0482,
      "step": 204040
    },
    {
      "epoch": 0.3339486655800161,
      "grad_norm": 2.28761887550354,
      "learning_rate": 9.393593959001865e-06,
      "loss": 0.032,
      "step": 204060
    },
    {
      "epoch": 0.33398139601866944,
      "grad_norm": 1.2760423421859741,
      "learning_rate": 9.393528066788349e-06,
      "loss": 0.0464,
      "step": 204080
    },
    {
      "epoch": 0.33401412645732276,
      "grad_norm": 3.662069320678711,
      "learning_rate": 9.39346217457483e-06,
      "loss": 0.0508,
      "step": 204100
    },
    {
      "epoch": 0.33404685689597613,
      "grad_norm": 5.136407852172852,
      "learning_rate": 9.393396282361314e-06,
      "loss": 0.055,
      "step": 204120
    },
    {
      "epoch": 0.33407958733462945,
      "grad_norm": 2.048267126083374,
      "learning_rate": 9.393330390147796e-06,
      "loss": 0.0411,
      "step": 204140
    },
    {
      "epoch": 0.3341123177732828,
      "grad_norm": 2.3171911239624023,
      "learning_rate": 9.39326449793428e-06,
      "loss": 0.0488,
      "step": 204160
    },
    {
      "epoch": 0.33414504821193614,
      "grad_norm": 1.3382422924041748,
      "learning_rate": 9.393198605720763e-06,
      "loss": 0.0395,
      "step": 204180
    },
    {
      "epoch": 0.33417777865058945,
      "grad_norm": 1.2966539859771729,
      "learning_rate": 9.393132713507245e-06,
      "loss": 0.062,
      "step": 204200
    },
    {
      "epoch": 0.3342105090892428,
      "grad_norm": 1.3293343782424927,
      "learning_rate": 9.393066821293729e-06,
      "loss": 0.0616,
      "step": 204220
    },
    {
      "epoch": 0.33424323952789614,
      "grad_norm": 2.955606460571289,
      "learning_rate": 9.393000929080212e-06,
      "loss": 0.0544,
      "step": 204240
    },
    {
      "epoch": 0.3342759699665495,
      "grad_norm": 2.014728307723999,
      "learning_rate": 9.392935036866694e-06,
      "loss": 0.0612,
      "step": 204260
    },
    {
      "epoch": 0.33430870040520283,
      "grad_norm": 1.4493699073791504,
      "learning_rate": 9.392869144653178e-06,
      "loss": 0.0475,
      "step": 204280
    },
    {
      "epoch": 0.33434143084385615,
      "grad_norm": 2.2165846824645996,
      "learning_rate": 9.39280325243966e-06,
      "loss": 0.0483,
      "step": 204300
    },
    {
      "epoch": 0.3343741612825095,
      "grad_norm": 1.860713005065918,
      "learning_rate": 9.392737360226143e-06,
      "loss": 0.0375,
      "step": 204320
    },
    {
      "epoch": 0.33440689172116284,
      "grad_norm": 6.983010292053223,
      "learning_rate": 9.392671468012625e-06,
      "loss": 0.0484,
      "step": 204340
    },
    {
      "epoch": 0.3344396221598162,
      "grad_norm": 2.304327964782715,
      "learning_rate": 9.392605575799109e-06,
      "loss": 0.0619,
      "step": 204360
    },
    {
      "epoch": 0.3344723525984695,
      "grad_norm": 4.021947860717773,
      "learning_rate": 9.392539683585592e-06,
      "loss": 0.042,
      "step": 204380
    },
    {
      "epoch": 0.33450508303712284,
      "grad_norm": 1.9339863061904907,
      "learning_rate": 9.392473791372074e-06,
      "loss": 0.0503,
      "step": 204400
    },
    {
      "epoch": 0.3345378134757762,
      "grad_norm": 1.0963329076766968,
      "learning_rate": 9.392407899158558e-06,
      "loss": 0.0493,
      "step": 204420
    },
    {
      "epoch": 0.33457054391442953,
      "grad_norm": 9.314824104309082,
      "learning_rate": 9.39234200694504e-06,
      "loss": 0.0666,
      "step": 204440
    },
    {
      "epoch": 0.3346032743530829,
      "grad_norm": 1.6698154211044312,
      "learning_rate": 9.392276114731523e-06,
      "loss": 0.0422,
      "step": 204460
    },
    {
      "epoch": 0.3346360047917362,
      "grad_norm": 2.80186128616333,
      "learning_rate": 9.392210222518005e-06,
      "loss": 0.0353,
      "step": 204480
    },
    {
      "epoch": 0.33466873523038954,
      "grad_norm": 1.6876301765441895,
      "learning_rate": 9.392144330304489e-06,
      "loss": 0.0548,
      "step": 204500
    },
    {
      "epoch": 0.3347014656690429,
      "grad_norm": 1.2352614402770996,
      "learning_rate": 9.39207843809097e-06,
      "loss": 0.0446,
      "step": 204520
    },
    {
      "epoch": 0.3347341961076962,
      "grad_norm": 2.5487966537475586,
      "learning_rate": 9.392012545877454e-06,
      "loss": 0.0406,
      "step": 204540
    },
    {
      "epoch": 0.3347669265463496,
      "grad_norm": 5.942346096038818,
      "learning_rate": 9.391946653663936e-06,
      "loss": 0.0628,
      "step": 204560
    },
    {
      "epoch": 0.3347996569850029,
      "grad_norm": 0.6541810631752014,
      "learning_rate": 9.39188076145042e-06,
      "loss": 0.0478,
      "step": 204580
    },
    {
      "epoch": 0.33483238742365623,
      "grad_norm": 0.8835069537162781,
      "learning_rate": 9.391814869236903e-06,
      "loss": 0.0331,
      "step": 204600
    },
    {
      "epoch": 0.3348651178623096,
      "grad_norm": 1.1944773197174072,
      "learning_rate": 9.391748977023387e-06,
      "loss": 0.0351,
      "step": 204620
    },
    {
      "epoch": 0.3348978483009629,
      "grad_norm": 5.109320163726807,
      "learning_rate": 9.391683084809869e-06,
      "loss": 0.06,
      "step": 204640
    },
    {
      "epoch": 0.3349305787396163,
      "grad_norm": 1.4705842733383179,
      "learning_rate": 9.391617192596352e-06,
      "loss": 0.0473,
      "step": 204660
    },
    {
      "epoch": 0.3349633091782696,
      "grad_norm": 1.956726312637329,
      "learning_rate": 9.391551300382834e-06,
      "loss": 0.0512,
      "step": 204680
    },
    {
      "epoch": 0.3349960396169229,
      "grad_norm": 2.347949504852295,
      "learning_rate": 9.391485408169318e-06,
      "loss": 0.0515,
      "step": 204700
    },
    {
      "epoch": 0.3350287700555763,
      "grad_norm": 1.292475938796997,
      "learning_rate": 9.391419515955801e-06,
      "loss": 0.0496,
      "step": 204720
    },
    {
      "epoch": 0.3350615004942296,
      "grad_norm": 1.896120548248291,
      "learning_rate": 9.391353623742283e-06,
      "loss": 0.0578,
      "step": 204740
    },
    {
      "epoch": 0.335094230932883,
      "grad_norm": 1.3494597673416138,
      "learning_rate": 9.391287731528767e-06,
      "loss": 0.0486,
      "step": 204760
    },
    {
      "epoch": 0.3351269613715363,
      "grad_norm": 4.663101673126221,
      "learning_rate": 9.391221839315249e-06,
      "loss": 0.0415,
      "step": 204780
    },
    {
      "epoch": 0.3351596918101896,
      "grad_norm": 1.773547887802124,
      "learning_rate": 9.391155947101732e-06,
      "loss": 0.0578,
      "step": 204800
    },
    {
      "epoch": 0.335192422248843,
      "grad_norm": 0.5400932431221008,
      "learning_rate": 9.391090054888214e-06,
      "loss": 0.0479,
      "step": 204820
    },
    {
      "epoch": 0.3352251526874963,
      "grad_norm": 1.9141868352890015,
      "learning_rate": 9.391024162674698e-06,
      "loss": 0.0486,
      "step": 204840
    },
    {
      "epoch": 0.3352578831261497,
      "grad_norm": 4.671059608459473,
      "learning_rate": 9.39095827046118e-06,
      "loss": 0.0576,
      "step": 204860
    },
    {
      "epoch": 0.335290613564803,
      "grad_norm": 2.0770890712738037,
      "learning_rate": 9.390892378247663e-06,
      "loss": 0.0438,
      "step": 204880
    },
    {
      "epoch": 0.3353233440034563,
      "grad_norm": 2.5051016807556152,
      "learning_rate": 9.390826486034145e-06,
      "loss": 0.0565,
      "step": 204900
    },
    {
      "epoch": 0.3353560744421097,
      "grad_norm": 6.5871100425720215,
      "learning_rate": 9.390760593820629e-06,
      "loss": 0.0507,
      "step": 204920
    },
    {
      "epoch": 0.335388804880763,
      "grad_norm": 1.6316946744918823,
      "learning_rate": 9.39069470160711e-06,
      "loss": 0.041,
      "step": 204940
    },
    {
      "epoch": 0.3354215353194164,
      "grad_norm": 1.6179282665252686,
      "learning_rate": 9.390628809393594e-06,
      "loss": 0.0475,
      "step": 204960
    },
    {
      "epoch": 0.3354542657580697,
      "grad_norm": 2.5191380977630615,
      "learning_rate": 9.390562917180078e-06,
      "loss": 0.0543,
      "step": 204980
    },
    {
      "epoch": 0.335486996196723,
      "grad_norm": 1.0827319622039795,
      "learning_rate": 9.39049702496656e-06,
      "loss": 0.044,
      "step": 205000
    },
    {
      "epoch": 0.3355197266353764,
      "grad_norm": 1.487236499786377,
      "learning_rate": 9.390431132753043e-06,
      "loss": 0.0508,
      "step": 205020
    },
    {
      "epoch": 0.3355524570740297,
      "grad_norm": 0.7169029116630554,
      "learning_rate": 9.390365240539527e-06,
      "loss": 0.0436,
      "step": 205040
    },
    {
      "epoch": 0.33558518751268307,
      "grad_norm": 1.4328118562698364,
      "learning_rate": 9.390299348326009e-06,
      "loss": 0.0506,
      "step": 205060
    },
    {
      "epoch": 0.3356179179513364,
      "grad_norm": 1.6150158643722534,
      "learning_rate": 9.390233456112492e-06,
      "loss": 0.0422,
      "step": 205080
    },
    {
      "epoch": 0.3356506483899897,
      "grad_norm": 1.1225762367248535,
      "learning_rate": 9.390167563898976e-06,
      "loss": 0.0528,
      "step": 205100
    },
    {
      "epoch": 0.3356833788286431,
      "grad_norm": 0.6106308698654175,
      "learning_rate": 9.390101671685458e-06,
      "loss": 0.0527,
      "step": 205120
    },
    {
      "epoch": 0.3357161092672964,
      "grad_norm": 1.7033621072769165,
      "learning_rate": 9.390035779471942e-06,
      "loss": 0.0639,
      "step": 205140
    },
    {
      "epoch": 0.33574883970594976,
      "grad_norm": 3.803936243057251,
      "learning_rate": 9.389969887258423e-06,
      "loss": 0.0465,
      "step": 205160
    },
    {
      "epoch": 0.3357815701446031,
      "grad_norm": 0.9087032079696655,
      "learning_rate": 9.389903995044907e-06,
      "loss": 0.0451,
      "step": 205180
    },
    {
      "epoch": 0.3358143005832564,
      "grad_norm": 2.85972261428833,
      "learning_rate": 9.389838102831389e-06,
      "loss": 0.0453,
      "step": 205200
    },
    {
      "epoch": 0.33584703102190977,
      "grad_norm": 3.8382315635681152,
      "learning_rate": 9.389772210617872e-06,
      "loss": 0.0591,
      "step": 205220
    },
    {
      "epoch": 0.3358797614605631,
      "grad_norm": 1.940747857093811,
      "learning_rate": 9.389706318404354e-06,
      "loss": 0.0526,
      "step": 205240
    },
    {
      "epoch": 0.33591249189921646,
      "grad_norm": 1.032858967781067,
      "learning_rate": 9.389640426190838e-06,
      "loss": 0.0454,
      "step": 205260
    },
    {
      "epoch": 0.3359452223378698,
      "grad_norm": 2.3691413402557373,
      "learning_rate": 9.38957453397732e-06,
      "loss": 0.0636,
      "step": 205280
    },
    {
      "epoch": 0.3359779527765231,
      "grad_norm": 3.9018125534057617,
      "learning_rate": 9.389508641763803e-06,
      "loss": 0.0578,
      "step": 205300
    },
    {
      "epoch": 0.33601068321517646,
      "grad_norm": 2.092158079147339,
      "learning_rate": 9.389442749550285e-06,
      "loss": 0.0618,
      "step": 205320
    },
    {
      "epoch": 0.3360434136538298,
      "grad_norm": 1.9317967891693115,
      "learning_rate": 9.389376857336769e-06,
      "loss": 0.0574,
      "step": 205340
    },
    {
      "epoch": 0.33607614409248315,
      "grad_norm": 5.996965408325195,
      "learning_rate": 9.389310965123253e-06,
      "loss": 0.0371,
      "step": 205360
    },
    {
      "epoch": 0.33610887453113647,
      "grad_norm": 1.2525984048843384,
      "learning_rate": 9.389245072909734e-06,
      "loss": 0.0494,
      "step": 205380
    },
    {
      "epoch": 0.3361416049697898,
      "grad_norm": 1.6981827020645142,
      "learning_rate": 9.389179180696218e-06,
      "loss": 0.0451,
      "step": 205400
    },
    {
      "epoch": 0.33617433540844316,
      "grad_norm": 0.5579020977020264,
      "learning_rate": 9.389113288482702e-06,
      "loss": 0.0453,
      "step": 205420
    },
    {
      "epoch": 0.3362070658470965,
      "grad_norm": 1.4119566679000854,
      "learning_rate": 9.389047396269183e-06,
      "loss": 0.0522,
      "step": 205440
    },
    {
      "epoch": 0.33623979628574985,
      "grad_norm": 0.5249888300895691,
      "learning_rate": 9.388981504055667e-06,
      "loss": 0.0408,
      "step": 205460
    },
    {
      "epoch": 0.33627252672440316,
      "grad_norm": 1.4726043939590454,
      "learning_rate": 9.38891561184215e-06,
      "loss": 0.0378,
      "step": 205480
    },
    {
      "epoch": 0.3363052571630565,
      "grad_norm": 2.3729944229125977,
      "learning_rate": 9.388849719628633e-06,
      "loss": 0.0282,
      "step": 205500
    },
    {
      "epoch": 0.33633798760170985,
      "grad_norm": 1.7818865776062012,
      "learning_rate": 9.388783827415116e-06,
      "loss": 0.0405,
      "step": 205520
    },
    {
      "epoch": 0.33637071804036317,
      "grad_norm": 3.0265982151031494,
      "learning_rate": 9.388717935201598e-06,
      "loss": 0.055,
      "step": 205540
    },
    {
      "epoch": 0.33640344847901654,
      "grad_norm": 1.0193355083465576,
      "learning_rate": 9.388652042988082e-06,
      "loss": 0.0466,
      "step": 205560
    },
    {
      "epoch": 0.33643617891766986,
      "grad_norm": 1.901449203491211,
      "learning_rate": 9.388586150774563e-06,
      "loss": 0.0632,
      "step": 205580
    },
    {
      "epoch": 0.3364689093563232,
      "grad_norm": 1.383042573928833,
      "learning_rate": 9.388520258561047e-06,
      "loss": 0.0473,
      "step": 205600
    },
    {
      "epoch": 0.33650163979497655,
      "grad_norm": 2.2541489601135254,
      "learning_rate": 9.388454366347529e-06,
      "loss": 0.0718,
      "step": 205620
    },
    {
      "epoch": 0.33653437023362986,
      "grad_norm": 0.9076463580131531,
      "learning_rate": 9.388388474134013e-06,
      "loss": 0.0346,
      "step": 205640
    },
    {
      "epoch": 0.33656710067228324,
      "grad_norm": 2.4029905796051025,
      "learning_rate": 9.388322581920494e-06,
      "loss": 0.0453,
      "step": 205660
    },
    {
      "epoch": 0.33659983111093655,
      "grad_norm": 1.5058343410491943,
      "learning_rate": 9.388256689706978e-06,
      "loss": 0.0537,
      "step": 205680
    },
    {
      "epoch": 0.33663256154958987,
      "grad_norm": 1.9703289270401,
      "learning_rate": 9.38819079749346e-06,
      "loss": 0.072,
      "step": 205700
    },
    {
      "epoch": 0.33666529198824324,
      "grad_norm": 1.3132644891738892,
      "learning_rate": 9.388124905279944e-06,
      "loss": 0.0433,
      "step": 205720
    },
    {
      "epoch": 0.33669802242689656,
      "grad_norm": 4.5190205574035645,
      "learning_rate": 9.388059013066425e-06,
      "loss": 0.059,
      "step": 205740
    },
    {
      "epoch": 0.33673075286554993,
      "grad_norm": 3.766329765319824,
      "learning_rate": 9.387993120852909e-06,
      "loss": 0.0705,
      "step": 205760
    },
    {
      "epoch": 0.33676348330420325,
      "grad_norm": 1.5732132196426392,
      "learning_rate": 9.387927228639393e-06,
      "loss": 0.0488,
      "step": 205780
    },
    {
      "epoch": 0.33679621374285656,
      "grad_norm": 2.6308066844940186,
      "learning_rate": 9.387861336425874e-06,
      "loss": 0.0525,
      "step": 205800
    },
    {
      "epoch": 0.33682894418150994,
      "grad_norm": 3.963906764984131,
      "learning_rate": 9.387795444212358e-06,
      "loss": 0.0535,
      "step": 205820
    },
    {
      "epoch": 0.33686167462016325,
      "grad_norm": 1.8250075578689575,
      "learning_rate": 9.387729551998842e-06,
      "loss": 0.0443,
      "step": 205840
    },
    {
      "epoch": 0.33689440505881657,
      "grad_norm": 1.2203381061553955,
      "learning_rate": 9.387663659785324e-06,
      "loss": 0.0507,
      "step": 205860
    },
    {
      "epoch": 0.33692713549746994,
      "grad_norm": 1.976186990737915,
      "learning_rate": 9.387597767571807e-06,
      "loss": 0.0534,
      "step": 205880
    },
    {
      "epoch": 0.33695986593612326,
      "grad_norm": 1.5462037324905396,
      "learning_rate": 9.38753187535829e-06,
      "loss": 0.0466,
      "step": 205900
    },
    {
      "epoch": 0.33699259637477663,
      "grad_norm": 0.7341775298118591,
      "learning_rate": 9.387465983144773e-06,
      "loss": 0.0451,
      "step": 205920
    },
    {
      "epoch": 0.33702532681342995,
      "grad_norm": 1.6798458099365234,
      "learning_rate": 9.387400090931256e-06,
      "loss": 0.0417,
      "step": 205940
    },
    {
      "epoch": 0.33705805725208327,
      "grad_norm": 0.7353207468986511,
      "learning_rate": 9.387334198717738e-06,
      "loss": 0.0398,
      "step": 205960
    },
    {
      "epoch": 0.33709078769073664,
      "grad_norm": 2.151648759841919,
      "learning_rate": 9.387268306504222e-06,
      "loss": 0.0374,
      "step": 205980
    },
    {
      "epoch": 0.33712351812938995,
      "grad_norm": 1.4813342094421387,
      "learning_rate": 9.387202414290704e-06,
      "loss": 0.0553,
      "step": 206000
    },
    {
      "epoch": 0.3371562485680433,
      "grad_norm": 0.9030972123146057,
      "learning_rate": 9.387136522077187e-06,
      "loss": 0.0462,
      "step": 206020
    },
    {
      "epoch": 0.33718897900669664,
      "grad_norm": 1.613869309425354,
      "learning_rate": 9.387070629863669e-06,
      "loss": 0.05,
      "step": 206040
    },
    {
      "epoch": 0.33722170944534996,
      "grad_norm": 0.9066301584243774,
      "learning_rate": 9.387004737650153e-06,
      "loss": 0.0414,
      "step": 206060
    },
    {
      "epoch": 0.33725443988400333,
      "grad_norm": 1.4172309637069702,
      "learning_rate": 9.386938845436635e-06,
      "loss": 0.0503,
      "step": 206080
    },
    {
      "epoch": 0.33728717032265665,
      "grad_norm": 3.207951545715332,
      "learning_rate": 9.386872953223118e-06,
      "loss": 0.0484,
      "step": 206100
    },
    {
      "epoch": 0.33731990076131,
      "grad_norm": 1.3156603574752808,
      "learning_rate": 9.386807061009602e-06,
      "loss": 0.0425,
      "step": 206120
    },
    {
      "epoch": 0.33735263119996334,
      "grad_norm": 0.8596175909042358,
      "learning_rate": 9.386741168796084e-06,
      "loss": 0.0555,
      "step": 206140
    },
    {
      "epoch": 0.33738536163861665,
      "grad_norm": 2.1612186431884766,
      "learning_rate": 9.386675276582567e-06,
      "loss": 0.0464,
      "step": 206160
    },
    {
      "epoch": 0.33741809207727,
      "grad_norm": 1.9359298944473267,
      "learning_rate": 9.386609384369049e-06,
      "loss": 0.0516,
      "step": 206180
    },
    {
      "epoch": 0.33745082251592334,
      "grad_norm": 1.6783264875411987,
      "learning_rate": 9.386543492155533e-06,
      "loss": 0.0499,
      "step": 206200
    },
    {
      "epoch": 0.3374835529545767,
      "grad_norm": 0.5796356201171875,
      "learning_rate": 9.386477599942016e-06,
      "loss": 0.0335,
      "step": 206220
    },
    {
      "epoch": 0.33751628339323003,
      "grad_norm": 0.9857189059257507,
      "learning_rate": 9.386411707728498e-06,
      "loss": 0.0471,
      "step": 206240
    },
    {
      "epoch": 0.33754901383188335,
      "grad_norm": 4.047596454620361,
      "learning_rate": 9.386345815514982e-06,
      "loss": 0.0571,
      "step": 206260
    },
    {
      "epoch": 0.3375817442705367,
      "grad_norm": 1.49177885055542,
      "learning_rate": 9.386279923301465e-06,
      "loss": 0.0475,
      "step": 206280
    },
    {
      "epoch": 0.33761447470919004,
      "grad_norm": 2.3532874584198,
      "learning_rate": 9.386214031087947e-06,
      "loss": 0.0629,
      "step": 206300
    },
    {
      "epoch": 0.3376472051478434,
      "grad_norm": 0.3920497000217438,
      "learning_rate": 9.38614813887443e-06,
      "loss": 0.0457,
      "step": 206320
    },
    {
      "epoch": 0.3376799355864967,
      "grad_norm": 0.40846309065818787,
      "learning_rate": 9.386082246660913e-06,
      "loss": 0.0544,
      "step": 206340
    },
    {
      "epoch": 0.33771266602515004,
      "grad_norm": 2.4593679904937744,
      "learning_rate": 9.386016354447396e-06,
      "loss": 0.035,
      "step": 206360
    },
    {
      "epoch": 0.3377453964638034,
      "grad_norm": 1.2509852647781372,
      "learning_rate": 9.385950462233878e-06,
      "loss": 0.0484,
      "step": 206380
    },
    {
      "epoch": 0.33777812690245673,
      "grad_norm": 0.9662041068077087,
      "learning_rate": 9.385884570020362e-06,
      "loss": 0.0312,
      "step": 206400
    },
    {
      "epoch": 0.3378108573411101,
      "grad_norm": 0.5869483351707458,
      "learning_rate": 9.385818677806844e-06,
      "loss": 0.0496,
      "step": 206420
    },
    {
      "epoch": 0.3378435877797634,
      "grad_norm": 0.8131958842277527,
      "learning_rate": 9.385752785593327e-06,
      "loss": 0.0464,
      "step": 206440
    },
    {
      "epoch": 0.33787631821841674,
      "grad_norm": 4.3405303955078125,
      "learning_rate": 9.385686893379809e-06,
      "loss": 0.0466,
      "step": 206460
    },
    {
      "epoch": 0.3379090486570701,
      "grad_norm": 1.4944895505905151,
      "learning_rate": 9.385621001166293e-06,
      "loss": 0.0415,
      "step": 206480
    },
    {
      "epoch": 0.3379417790957234,
      "grad_norm": 0.417030394077301,
      "learning_rate": 9.385555108952776e-06,
      "loss": 0.0544,
      "step": 206500
    },
    {
      "epoch": 0.3379745095343768,
      "grad_norm": 2.6661722660064697,
      "learning_rate": 9.385489216739258e-06,
      "loss": 0.0496,
      "step": 206520
    },
    {
      "epoch": 0.3380072399730301,
      "grad_norm": 1.7744837999343872,
      "learning_rate": 9.385423324525742e-06,
      "loss": 0.0436,
      "step": 206540
    },
    {
      "epoch": 0.33803997041168343,
      "grad_norm": 5.564403057098389,
      "learning_rate": 9.385357432312224e-06,
      "loss": 0.0566,
      "step": 206560
    },
    {
      "epoch": 0.3380727008503368,
      "grad_norm": 1.6038591861724854,
      "learning_rate": 9.385291540098707e-06,
      "loss": 0.0429,
      "step": 206580
    },
    {
      "epoch": 0.3381054312889901,
      "grad_norm": 1.0414446592330933,
      "learning_rate": 9.385225647885189e-06,
      "loss": 0.0414,
      "step": 206600
    },
    {
      "epoch": 0.3381381617276435,
      "grad_norm": 1.2085881233215332,
      "learning_rate": 9.385159755671673e-06,
      "loss": 0.0421,
      "step": 206620
    },
    {
      "epoch": 0.3381708921662968,
      "grad_norm": 1.7557166814804077,
      "learning_rate": 9.385093863458156e-06,
      "loss": 0.0421,
      "step": 206640
    },
    {
      "epoch": 0.3382036226049501,
      "grad_norm": 1.6342839002609253,
      "learning_rate": 9.385027971244638e-06,
      "loss": 0.0529,
      "step": 206660
    },
    {
      "epoch": 0.3382363530436035,
      "grad_norm": 1.7073782682418823,
      "learning_rate": 9.384962079031122e-06,
      "loss": 0.0381,
      "step": 206680
    },
    {
      "epoch": 0.3382690834822568,
      "grad_norm": 1.799131989479065,
      "learning_rate": 9.384896186817605e-06,
      "loss": 0.0505,
      "step": 206700
    },
    {
      "epoch": 0.3383018139209102,
      "grad_norm": 1.2799204587936401,
      "learning_rate": 9.384830294604087e-06,
      "loss": 0.0449,
      "step": 206720
    },
    {
      "epoch": 0.3383345443595635,
      "grad_norm": 1.1646138429641724,
      "learning_rate": 9.384764402390571e-06,
      "loss": 0.0508,
      "step": 206740
    },
    {
      "epoch": 0.3383672747982168,
      "grad_norm": 1.3342839479446411,
      "learning_rate": 9.384698510177053e-06,
      "loss": 0.0525,
      "step": 206760
    },
    {
      "epoch": 0.3384000052368702,
      "grad_norm": 1.1606659889221191,
      "learning_rate": 9.384632617963536e-06,
      "loss": 0.0542,
      "step": 206780
    },
    {
      "epoch": 0.3384327356755235,
      "grad_norm": 1.2116355895996094,
      "learning_rate": 9.384566725750018e-06,
      "loss": 0.0475,
      "step": 206800
    },
    {
      "epoch": 0.3384654661141769,
      "grad_norm": 1.5811842679977417,
      "learning_rate": 9.384500833536502e-06,
      "loss": 0.0482,
      "step": 206820
    },
    {
      "epoch": 0.3384981965528302,
      "grad_norm": 1.6485248804092407,
      "learning_rate": 9.384434941322985e-06,
      "loss": 0.0582,
      "step": 206840
    },
    {
      "epoch": 0.3385309269914835,
      "grad_norm": 9.258527755737305,
      "learning_rate": 9.384369049109467e-06,
      "loss": 0.0519,
      "step": 206860
    },
    {
      "epoch": 0.3385636574301369,
      "grad_norm": 0.7411383390426636,
      "learning_rate": 9.384303156895951e-06,
      "loss": 0.059,
      "step": 206880
    },
    {
      "epoch": 0.3385963878687902,
      "grad_norm": 1.071202278137207,
      "learning_rate": 9.384237264682433e-06,
      "loss": 0.0469,
      "step": 206900
    },
    {
      "epoch": 0.3386291183074436,
      "grad_norm": 2.249971866607666,
      "learning_rate": 9.384171372468916e-06,
      "loss": 0.0544,
      "step": 206920
    },
    {
      "epoch": 0.3386618487460969,
      "grad_norm": 1.1941418647766113,
      "learning_rate": 9.384105480255398e-06,
      "loss": 0.0507,
      "step": 206940
    },
    {
      "epoch": 0.3386945791847502,
      "grad_norm": 2.0062618255615234,
      "learning_rate": 9.384039588041882e-06,
      "loss": 0.0556,
      "step": 206960
    },
    {
      "epoch": 0.3387273096234036,
      "grad_norm": 1.374369502067566,
      "learning_rate": 9.383973695828364e-06,
      "loss": 0.0573,
      "step": 206980
    },
    {
      "epoch": 0.3387600400620569,
      "grad_norm": 0.49588310718536377,
      "learning_rate": 9.383907803614847e-06,
      "loss": 0.0532,
      "step": 207000
    },
    {
      "epoch": 0.33879277050071027,
      "grad_norm": 2.1555724143981934,
      "learning_rate": 9.383841911401331e-06,
      "loss": 0.0539,
      "step": 207020
    },
    {
      "epoch": 0.3388255009393636,
      "grad_norm": 1.778212070465088,
      "learning_rate": 9.383776019187813e-06,
      "loss": 0.0593,
      "step": 207040
    },
    {
      "epoch": 0.3388582313780169,
      "grad_norm": 1.9657127857208252,
      "learning_rate": 9.383710126974296e-06,
      "loss": 0.0594,
      "step": 207060
    },
    {
      "epoch": 0.3388909618166703,
      "grad_norm": 1.1933761835098267,
      "learning_rate": 9.38364423476078e-06,
      "loss": 0.0488,
      "step": 207080
    },
    {
      "epoch": 0.3389236922553236,
      "grad_norm": 1.7110120058059692,
      "learning_rate": 9.383578342547262e-06,
      "loss": 0.0497,
      "step": 207100
    },
    {
      "epoch": 0.33895642269397697,
      "grad_norm": 1.9962102174758911,
      "learning_rate": 9.383512450333745e-06,
      "loss": 0.0473,
      "step": 207120
    },
    {
      "epoch": 0.3389891531326303,
      "grad_norm": 2.2858011722564697,
      "learning_rate": 9.383446558120227e-06,
      "loss": 0.0496,
      "step": 207140
    },
    {
      "epoch": 0.3390218835712836,
      "grad_norm": 1.8136134147644043,
      "learning_rate": 9.383380665906711e-06,
      "loss": 0.0426,
      "step": 207160
    },
    {
      "epoch": 0.33905461400993697,
      "grad_norm": 3.2310264110565186,
      "learning_rate": 9.383314773693195e-06,
      "loss": 0.0497,
      "step": 207180
    },
    {
      "epoch": 0.3390873444485903,
      "grad_norm": 1.6755235195159912,
      "learning_rate": 9.383248881479676e-06,
      "loss": 0.041,
      "step": 207200
    },
    {
      "epoch": 0.33912007488724366,
      "grad_norm": 0.9901443719863892,
      "learning_rate": 9.38318298926616e-06,
      "loss": 0.0452,
      "step": 207220
    },
    {
      "epoch": 0.339152805325897,
      "grad_norm": 0.6753031611442566,
      "learning_rate": 9.383117097052642e-06,
      "loss": 0.0442,
      "step": 207240
    },
    {
      "epoch": 0.3391855357645503,
      "grad_norm": 2.129770278930664,
      "learning_rate": 9.383051204839125e-06,
      "loss": 0.0453,
      "step": 207260
    },
    {
      "epoch": 0.33921826620320367,
      "grad_norm": 8.581942558288574,
      "learning_rate": 9.382985312625607e-06,
      "loss": 0.0454,
      "step": 207280
    },
    {
      "epoch": 0.339250996641857,
      "grad_norm": 0.5724717378616333,
      "learning_rate": 9.382919420412091e-06,
      "loss": 0.05,
      "step": 207300
    },
    {
      "epoch": 0.33928372708051036,
      "grad_norm": 0.9231705665588379,
      "learning_rate": 9.382853528198573e-06,
      "loss": 0.0484,
      "step": 207320
    },
    {
      "epoch": 0.3393164575191637,
      "grad_norm": 1.6031609773635864,
      "learning_rate": 9.382787635985056e-06,
      "loss": 0.0325,
      "step": 207340
    },
    {
      "epoch": 0.339349187957817,
      "grad_norm": 0.4295923411846161,
      "learning_rate": 9.382721743771538e-06,
      "loss": 0.0451,
      "step": 207360
    },
    {
      "epoch": 0.33938191839647036,
      "grad_norm": 2.087768793106079,
      "learning_rate": 9.382655851558022e-06,
      "loss": 0.0514,
      "step": 207380
    },
    {
      "epoch": 0.3394146488351237,
      "grad_norm": 2.302424192428589,
      "learning_rate": 9.382589959344506e-06,
      "loss": 0.0613,
      "step": 207400
    },
    {
      "epoch": 0.33944737927377705,
      "grad_norm": 0.6876295208930969,
      "learning_rate": 9.382524067130987e-06,
      "loss": 0.0443,
      "step": 207420
    },
    {
      "epoch": 0.33948010971243037,
      "grad_norm": 1.8553177118301392,
      "learning_rate": 9.382458174917471e-06,
      "loss": 0.0508,
      "step": 207440
    },
    {
      "epoch": 0.3395128401510837,
      "grad_norm": 2.513197183609009,
      "learning_rate": 9.382392282703955e-06,
      "loss": 0.0524,
      "step": 207460
    },
    {
      "epoch": 0.33954557058973706,
      "grad_norm": 1.609761357307434,
      "learning_rate": 9.382326390490436e-06,
      "loss": 0.0367,
      "step": 207480
    },
    {
      "epoch": 0.3395783010283904,
      "grad_norm": 0.8694108724594116,
      "learning_rate": 9.38226049827692e-06,
      "loss": 0.0492,
      "step": 207500
    },
    {
      "epoch": 0.33961103146704374,
      "grad_norm": 2.481896162033081,
      "learning_rate": 9.382194606063402e-06,
      "loss": 0.0579,
      "step": 207520
    },
    {
      "epoch": 0.33964376190569706,
      "grad_norm": 1.1133159399032593,
      "learning_rate": 9.382128713849886e-06,
      "loss": 0.0426,
      "step": 207540
    },
    {
      "epoch": 0.3396764923443504,
      "grad_norm": 1.9713706970214844,
      "learning_rate": 9.382062821636369e-06,
      "loss": 0.057,
      "step": 207560
    },
    {
      "epoch": 0.33970922278300375,
      "grad_norm": 2.328875780105591,
      "learning_rate": 9.381996929422851e-06,
      "loss": 0.06,
      "step": 207580
    },
    {
      "epoch": 0.33974195322165707,
      "grad_norm": 1.3345470428466797,
      "learning_rate": 9.381931037209335e-06,
      "loss": 0.0472,
      "step": 207600
    },
    {
      "epoch": 0.33977468366031044,
      "grad_norm": 1.2534929513931274,
      "learning_rate": 9.381865144995817e-06,
      "loss": 0.0433,
      "step": 207620
    },
    {
      "epoch": 0.33980741409896376,
      "grad_norm": 1.996042251586914,
      "learning_rate": 9.3817992527823e-06,
      "loss": 0.0472,
      "step": 207640
    },
    {
      "epoch": 0.3398401445376171,
      "grad_norm": 0.8589233160018921,
      "learning_rate": 9.381733360568782e-06,
      "loss": 0.0435,
      "step": 207660
    },
    {
      "epoch": 0.33987287497627044,
      "grad_norm": 4.231363773345947,
      "learning_rate": 9.381667468355266e-06,
      "loss": 0.0572,
      "step": 207680
    },
    {
      "epoch": 0.33990560541492376,
      "grad_norm": 1.7733334302902222,
      "learning_rate": 9.381601576141747e-06,
      "loss": 0.053,
      "step": 207700
    },
    {
      "epoch": 0.33993833585357713,
      "grad_norm": 1.12565016746521,
      "learning_rate": 9.381535683928231e-06,
      "loss": 0.0578,
      "step": 207720
    },
    {
      "epoch": 0.33997106629223045,
      "grad_norm": 1.5292881727218628,
      "learning_rate": 9.381469791714713e-06,
      "loss": 0.0448,
      "step": 207740
    },
    {
      "epoch": 0.34000379673088377,
      "grad_norm": 2.672149896621704,
      "learning_rate": 9.381403899501197e-06,
      "loss": 0.0561,
      "step": 207760
    },
    {
      "epoch": 0.34003652716953714,
      "grad_norm": 0.4220222532749176,
      "learning_rate": 9.381338007287678e-06,
      "loss": 0.046,
      "step": 207780
    },
    {
      "epoch": 0.34006925760819046,
      "grad_norm": 0.8590441942214966,
      "learning_rate": 9.381272115074162e-06,
      "loss": 0.048,
      "step": 207800
    },
    {
      "epoch": 0.34010198804684383,
      "grad_norm": 0.9409302473068237,
      "learning_rate": 9.381206222860646e-06,
      "loss": 0.0293,
      "step": 207820
    },
    {
      "epoch": 0.34013471848549715,
      "grad_norm": 1.5539077520370483,
      "learning_rate": 9.381140330647127e-06,
      "loss": 0.0551,
      "step": 207840
    },
    {
      "epoch": 0.34016744892415046,
      "grad_norm": 2.7468695640563965,
      "learning_rate": 9.381074438433611e-06,
      "loss": 0.0404,
      "step": 207860
    },
    {
      "epoch": 0.34020017936280383,
      "grad_norm": 2.0720112323760986,
      "learning_rate": 9.381008546220095e-06,
      "loss": 0.0461,
      "step": 207880
    },
    {
      "epoch": 0.34023290980145715,
      "grad_norm": 2.75125789642334,
      "learning_rate": 9.380942654006577e-06,
      "loss": 0.0567,
      "step": 207900
    },
    {
      "epoch": 0.3402656402401105,
      "grad_norm": 2.2709736824035645,
      "learning_rate": 9.38087676179306e-06,
      "loss": 0.0638,
      "step": 207920
    },
    {
      "epoch": 0.34029837067876384,
      "grad_norm": 0.9747975468635559,
      "learning_rate": 9.380810869579544e-06,
      "loss": 0.0517,
      "step": 207940
    },
    {
      "epoch": 0.34033110111741716,
      "grad_norm": 1.612007975578308,
      "learning_rate": 9.380744977366026e-06,
      "loss": 0.0493,
      "step": 207960
    },
    {
      "epoch": 0.34036383155607053,
      "grad_norm": 1.7456369400024414,
      "learning_rate": 9.38067908515251e-06,
      "loss": 0.0447,
      "step": 207980
    },
    {
      "epoch": 0.34039656199472385,
      "grad_norm": 1.687026023864746,
      "learning_rate": 9.380613192938991e-06,
      "loss": 0.0472,
      "step": 208000
    },
    {
      "epoch": 0.3404292924333772,
      "grad_norm": 2.128448724746704,
      "learning_rate": 9.380547300725475e-06,
      "loss": 0.041,
      "step": 208020
    },
    {
      "epoch": 0.34046202287203053,
      "grad_norm": 0.7188306450843811,
      "learning_rate": 9.380481408511957e-06,
      "loss": 0.0391,
      "step": 208040
    },
    {
      "epoch": 0.34049475331068385,
      "grad_norm": 5.287097930908203,
      "learning_rate": 9.38041551629844e-06,
      "loss": 0.0449,
      "step": 208060
    },
    {
      "epoch": 0.3405274837493372,
      "grad_norm": 3.1410024166107178,
      "learning_rate": 9.380349624084922e-06,
      "loss": 0.0493,
      "step": 208080
    },
    {
      "epoch": 0.34056021418799054,
      "grad_norm": 2.83855938911438,
      "learning_rate": 9.380283731871406e-06,
      "loss": 0.0533,
      "step": 208100
    },
    {
      "epoch": 0.3405929446266439,
      "grad_norm": 2.253066062927246,
      "learning_rate": 9.380217839657888e-06,
      "loss": 0.0522,
      "step": 208120
    },
    {
      "epoch": 0.34062567506529723,
      "grad_norm": 6.523585796356201,
      "learning_rate": 9.380151947444371e-06,
      "loss": 0.0559,
      "step": 208140
    },
    {
      "epoch": 0.34065840550395055,
      "grad_norm": 5.409961223602295,
      "learning_rate": 9.380086055230853e-06,
      "loss": 0.0415,
      "step": 208160
    },
    {
      "epoch": 0.3406911359426039,
      "grad_norm": 2.152754783630371,
      "learning_rate": 9.380020163017337e-06,
      "loss": 0.0469,
      "step": 208180
    },
    {
      "epoch": 0.34072386638125723,
      "grad_norm": 1.3105627298355103,
      "learning_rate": 9.37995427080382e-06,
      "loss": 0.0561,
      "step": 208200
    },
    {
      "epoch": 0.3407565968199106,
      "grad_norm": 2.510241985321045,
      "learning_rate": 9.379888378590302e-06,
      "loss": 0.0491,
      "step": 208220
    },
    {
      "epoch": 0.3407893272585639,
      "grad_norm": 0.4595377445220947,
      "learning_rate": 9.379822486376786e-06,
      "loss": 0.0489,
      "step": 208240
    },
    {
      "epoch": 0.34082205769721724,
      "grad_norm": 2.595921516418457,
      "learning_rate": 9.37975659416327e-06,
      "loss": 0.0536,
      "step": 208260
    },
    {
      "epoch": 0.3408547881358706,
      "grad_norm": 2.295039653778076,
      "learning_rate": 9.379690701949751e-06,
      "loss": 0.0567,
      "step": 208280
    },
    {
      "epoch": 0.34088751857452393,
      "grad_norm": 2.017066478729248,
      "learning_rate": 9.379624809736235e-06,
      "loss": 0.042,
      "step": 208300
    },
    {
      "epoch": 0.3409202490131773,
      "grad_norm": 1.4813562631607056,
      "learning_rate": 9.379558917522718e-06,
      "loss": 0.0465,
      "step": 208320
    },
    {
      "epoch": 0.3409529794518306,
      "grad_norm": 0.6598943471908569,
      "learning_rate": 9.3794930253092e-06,
      "loss": 0.0451,
      "step": 208340
    },
    {
      "epoch": 0.34098570989048393,
      "grad_norm": 2.754257917404175,
      "learning_rate": 9.379427133095684e-06,
      "loss": 0.035,
      "step": 208360
    },
    {
      "epoch": 0.3410184403291373,
      "grad_norm": 2.850597381591797,
      "learning_rate": 9.379361240882166e-06,
      "loss": 0.0509,
      "step": 208380
    },
    {
      "epoch": 0.3410511707677906,
      "grad_norm": 2.189913034439087,
      "learning_rate": 9.37929534866865e-06,
      "loss": 0.0471,
      "step": 208400
    },
    {
      "epoch": 0.341083901206444,
      "grad_norm": 0.9266282916069031,
      "learning_rate": 9.379229456455131e-06,
      "loss": 0.0565,
      "step": 208420
    },
    {
      "epoch": 0.3411166316450973,
      "grad_norm": 2.244401216506958,
      "learning_rate": 9.379163564241615e-06,
      "loss": 0.0381,
      "step": 208440
    },
    {
      "epoch": 0.34114936208375063,
      "grad_norm": 1.6188385486602783,
      "learning_rate": 9.379097672028097e-06,
      "loss": 0.0483,
      "step": 208460
    },
    {
      "epoch": 0.341182092522404,
      "grad_norm": 2.693241834640503,
      "learning_rate": 9.37903177981458e-06,
      "loss": 0.0503,
      "step": 208480
    },
    {
      "epoch": 0.3412148229610573,
      "grad_norm": 3.1846203804016113,
      "learning_rate": 9.378965887601062e-06,
      "loss": 0.0405,
      "step": 208500
    },
    {
      "epoch": 0.3412475533997107,
      "grad_norm": 0.8300545811653137,
      "learning_rate": 9.378899995387546e-06,
      "loss": 0.0624,
      "step": 208520
    },
    {
      "epoch": 0.341280283838364,
      "grad_norm": 1.4946027994155884,
      "learning_rate": 9.378834103174028e-06,
      "loss": 0.0407,
      "step": 208540
    },
    {
      "epoch": 0.3413130142770173,
      "grad_norm": 0.6268404722213745,
      "learning_rate": 9.378768210960511e-06,
      "loss": 0.0404,
      "step": 208560
    },
    {
      "epoch": 0.3413457447156707,
      "grad_norm": 1.0862669944763184,
      "learning_rate": 9.378702318746993e-06,
      "loss": 0.05,
      "step": 208580
    },
    {
      "epoch": 0.341378475154324,
      "grad_norm": 2.0968616008758545,
      "learning_rate": 9.378636426533477e-06,
      "loss": 0.0548,
      "step": 208600
    },
    {
      "epoch": 0.3414112055929774,
      "grad_norm": 0.6506902575492859,
      "learning_rate": 9.37857053431996e-06,
      "loss": 0.0602,
      "step": 208620
    },
    {
      "epoch": 0.3414439360316307,
      "grad_norm": 2.8768038749694824,
      "learning_rate": 9.378504642106442e-06,
      "loss": 0.0382,
      "step": 208640
    },
    {
      "epoch": 0.341476666470284,
      "grad_norm": 1.9009357690811157,
      "learning_rate": 9.378438749892926e-06,
      "loss": 0.0474,
      "step": 208660
    },
    {
      "epoch": 0.3415093969089374,
      "grad_norm": 3.9683077335357666,
      "learning_rate": 9.37837285767941e-06,
      "loss": 0.0618,
      "step": 208680
    },
    {
      "epoch": 0.3415421273475907,
      "grad_norm": 3.7656240463256836,
      "learning_rate": 9.378306965465891e-06,
      "loss": 0.0422,
      "step": 208700
    },
    {
      "epoch": 0.341574857786244,
      "grad_norm": 2.8318824768066406,
      "learning_rate": 9.378241073252375e-06,
      "loss": 0.0372,
      "step": 208720
    },
    {
      "epoch": 0.3416075882248974,
      "grad_norm": 1.7932264804840088,
      "learning_rate": 9.378175181038858e-06,
      "loss": 0.0627,
      "step": 208740
    },
    {
      "epoch": 0.3416403186635507,
      "grad_norm": 0.4671981930732727,
      "learning_rate": 9.37810928882534e-06,
      "loss": 0.0547,
      "step": 208760
    },
    {
      "epoch": 0.3416730491022041,
      "grad_norm": 2.7018015384674072,
      "learning_rate": 9.378043396611824e-06,
      "loss": 0.0659,
      "step": 208780
    },
    {
      "epoch": 0.3417057795408574,
      "grad_norm": 1.2920241355895996,
      "learning_rate": 9.377977504398306e-06,
      "loss": 0.0379,
      "step": 208800
    },
    {
      "epoch": 0.3417385099795107,
      "grad_norm": 2.754477024078369,
      "learning_rate": 9.37791161218479e-06,
      "loss": 0.0559,
      "step": 208820
    },
    {
      "epoch": 0.3417712404181641,
      "grad_norm": 1.829279899597168,
      "learning_rate": 9.377845719971271e-06,
      "loss": 0.0301,
      "step": 208840
    },
    {
      "epoch": 0.3418039708568174,
      "grad_norm": 1.1076487302780151,
      "learning_rate": 9.377779827757755e-06,
      "loss": 0.0325,
      "step": 208860
    },
    {
      "epoch": 0.3418367012954708,
      "grad_norm": 1.6908700466156006,
      "learning_rate": 9.377713935544237e-06,
      "loss": 0.0442,
      "step": 208880
    },
    {
      "epoch": 0.3418694317341241,
      "grad_norm": 4.788415908813477,
      "learning_rate": 9.37764804333072e-06,
      "loss": 0.0519,
      "step": 208900
    },
    {
      "epoch": 0.3419021621727774,
      "grad_norm": 1.4210795164108276,
      "learning_rate": 9.377582151117202e-06,
      "loss": 0.0511,
      "step": 208920
    },
    {
      "epoch": 0.3419348926114308,
      "grad_norm": 1.939922571182251,
      "learning_rate": 9.377516258903686e-06,
      "loss": 0.0647,
      "step": 208940
    },
    {
      "epoch": 0.3419676230500841,
      "grad_norm": 0.8069392442703247,
      "learning_rate": 9.37745036669017e-06,
      "loss": 0.0324,
      "step": 208960
    },
    {
      "epoch": 0.3420003534887375,
      "grad_norm": 1.5664225816726685,
      "learning_rate": 9.377384474476651e-06,
      "loss": 0.0546,
      "step": 208980
    },
    {
      "epoch": 0.3420330839273908,
      "grad_norm": 0.8049575090408325,
      "learning_rate": 9.377318582263135e-06,
      "loss": 0.0537,
      "step": 209000
    },
    {
      "epoch": 0.3420658143660441,
      "grad_norm": 3.2489418983459473,
      "learning_rate": 9.377252690049617e-06,
      "loss": 0.0475,
      "step": 209020
    },
    {
      "epoch": 0.3420985448046975,
      "grad_norm": 1.8451030254364014,
      "learning_rate": 9.3771867978361e-06,
      "loss": 0.0584,
      "step": 209040
    },
    {
      "epoch": 0.3421312752433508,
      "grad_norm": 1.77767014503479,
      "learning_rate": 9.377120905622584e-06,
      "loss": 0.043,
      "step": 209060
    },
    {
      "epoch": 0.34216400568200417,
      "grad_norm": 0.838906466960907,
      "learning_rate": 9.377055013409066e-06,
      "loss": 0.0485,
      "step": 209080
    },
    {
      "epoch": 0.3421967361206575,
      "grad_norm": 0.31555989384651184,
      "learning_rate": 9.37698912119555e-06,
      "loss": 0.0363,
      "step": 209100
    },
    {
      "epoch": 0.3422294665593108,
      "grad_norm": 5.494242191314697,
      "learning_rate": 9.376923228982033e-06,
      "loss": 0.053,
      "step": 209120
    },
    {
      "epoch": 0.3422621969979642,
      "grad_norm": 1.2685304880142212,
      "learning_rate": 9.376857336768515e-06,
      "loss": 0.0518,
      "step": 209140
    },
    {
      "epoch": 0.3422949274366175,
      "grad_norm": 0.8512553572654724,
      "learning_rate": 9.376791444554998e-06,
      "loss": 0.0502,
      "step": 209160
    },
    {
      "epoch": 0.34232765787527086,
      "grad_norm": 2.060182571411133,
      "learning_rate": 9.37672555234148e-06,
      "loss": 0.0593,
      "step": 209180
    },
    {
      "epoch": 0.3423603883139242,
      "grad_norm": 1.4623193740844727,
      "learning_rate": 9.376659660127964e-06,
      "loss": 0.0428,
      "step": 209200
    },
    {
      "epoch": 0.3423931187525775,
      "grad_norm": 2.280606269836426,
      "learning_rate": 9.376593767914446e-06,
      "loss": 0.047,
      "step": 209220
    },
    {
      "epoch": 0.34242584919123087,
      "grad_norm": 1.5054707527160645,
      "learning_rate": 9.37652787570093e-06,
      "loss": 0.0585,
      "step": 209240
    },
    {
      "epoch": 0.3424585796298842,
      "grad_norm": 1.030286192893982,
      "learning_rate": 9.376461983487411e-06,
      "loss": 0.0392,
      "step": 209260
    },
    {
      "epoch": 0.34249131006853756,
      "grad_norm": 1.6373847723007202,
      "learning_rate": 9.376396091273895e-06,
      "loss": 0.0426,
      "step": 209280
    },
    {
      "epoch": 0.3425240405071909,
      "grad_norm": 1.4888579845428467,
      "learning_rate": 9.376330199060378e-06,
      "loss": 0.0398,
      "step": 209300
    },
    {
      "epoch": 0.3425567709458442,
      "grad_norm": 1.4268593788146973,
      "learning_rate": 9.37626430684686e-06,
      "loss": 0.036,
      "step": 209320
    },
    {
      "epoch": 0.34258950138449756,
      "grad_norm": 1.0275781154632568,
      "learning_rate": 9.376198414633344e-06,
      "loss": 0.0376,
      "step": 209340
    },
    {
      "epoch": 0.3426222318231509,
      "grad_norm": 1.8684002161026,
      "learning_rate": 9.376132522419826e-06,
      "loss": 0.0531,
      "step": 209360
    },
    {
      "epoch": 0.34265496226180425,
      "grad_norm": 0.8024661540985107,
      "learning_rate": 9.37606663020631e-06,
      "loss": 0.0523,
      "step": 209380
    },
    {
      "epoch": 0.34268769270045757,
      "grad_norm": 3.048506736755371,
      "learning_rate": 9.376000737992791e-06,
      "loss": 0.0431,
      "step": 209400
    },
    {
      "epoch": 0.3427204231391109,
      "grad_norm": 1.798315405845642,
      "learning_rate": 9.375934845779275e-06,
      "loss": 0.0528,
      "step": 209420
    },
    {
      "epoch": 0.34275315357776426,
      "grad_norm": 1.4467315673828125,
      "learning_rate": 9.375868953565757e-06,
      "loss": 0.0482,
      "step": 209440
    },
    {
      "epoch": 0.3427858840164176,
      "grad_norm": 1.303760051727295,
      "learning_rate": 9.37580306135224e-06,
      "loss": 0.0549,
      "step": 209460
    },
    {
      "epoch": 0.34281861445507095,
      "grad_norm": 3.5317187309265137,
      "learning_rate": 9.375737169138724e-06,
      "loss": 0.0616,
      "step": 209480
    },
    {
      "epoch": 0.34285134489372426,
      "grad_norm": 3.8162429332733154,
      "learning_rate": 9.375671276925206e-06,
      "loss": 0.0624,
      "step": 209500
    },
    {
      "epoch": 0.3428840753323776,
      "grad_norm": 0.8086539506912231,
      "learning_rate": 9.37560538471169e-06,
      "loss": 0.0483,
      "step": 209520
    },
    {
      "epoch": 0.34291680577103095,
      "grad_norm": 6.297855854034424,
      "learning_rate": 9.375539492498173e-06,
      "loss": 0.0568,
      "step": 209540
    },
    {
      "epoch": 0.34294953620968427,
      "grad_norm": 1.184869408607483,
      "learning_rate": 9.375473600284655e-06,
      "loss": 0.0537,
      "step": 209560
    },
    {
      "epoch": 0.34298226664833764,
      "grad_norm": 0.5686990022659302,
      "learning_rate": 9.375407708071139e-06,
      "loss": 0.0455,
      "step": 209580
    },
    {
      "epoch": 0.34301499708699096,
      "grad_norm": 2.6542322635650635,
      "learning_rate": 9.37534181585762e-06,
      "loss": 0.053,
      "step": 209600
    },
    {
      "epoch": 0.3430477275256443,
      "grad_norm": 3.1000866889953613,
      "learning_rate": 9.375275923644104e-06,
      "loss": 0.0683,
      "step": 209620
    },
    {
      "epoch": 0.34308045796429765,
      "grad_norm": 1.3585331439971924,
      "learning_rate": 9.375210031430588e-06,
      "loss": 0.0607,
      "step": 209640
    },
    {
      "epoch": 0.34311318840295096,
      "grad_norm": 5.078923225402832,
      "learning_rate": 9.37514413921707e-06,
      "loss": 0.0522,
      "step": 209660
    },
    {
      "epoch": 0.34314591884160434,
      "grad_norm": 2.314551830291748,
      "learning_rate": 9.375078247003553e-06,
      "loss": 0.0403,
      "step": 209680
    },
    {
      "epoch": 0.34317864928025765,
      "grad_norm": 1.5914247035980225,
      "learning_rate": 9.375012354790035e-06,
      "loss": 0.0558,
      "step": 209700
    },
    {
      "epoch": 0.34321137971891097,
      "grad_norm": 2.0454235076904297,
      "learning_rate": 9.374946462576519e-06,
      "loss": 0.0441,
      "step": 209720
    },
    {
      "epoch": 0.34324411015756434,
      "grad_norm": 3.259082794189453,
      "learning_rate": 9.374880570363e-06,
      "loss": 0.0479,
      "step": 209740
    },
    {
      "epoch": 0.34327684059621766,
      "grad_norm": 0.8797933459281921,
      "learning_rate": 9.374814678149484e-06,
      "loss": 0.0318,
      "step": 209760
    },
    {
      "epoch": 0.34330957103487103,
      "grad_norm": 1.8248605728149414,
      "learning_rate": 9.374748785935966e-06,
      "loss": 0.0624,
      "step": 209780
    },
    {
      "epoch": 0.34334230147352435,
      "grad_norm": 1.7905555963516235,
      "learning_rate": 9.37468289372245e-06,
      "loss": 0.044,
      "step": 209800
    },
    {
      "epoch": 0.34337503191217766,
      "grad_norm": 2.5355100631713867,
      "learning_rate": 9.374617001508931e-06,
      "loss": 0.0435,
      "step": 209820
    },
    {
      "epoch": 0.34340776235083104,
      "grad_norm": 0.46412625908851624,
      "learning_rate": 9.374551109295415e-06,
      "loss": 0.0406,
      "step": 209840
    },
    {
      "epoch": 0.34344049278948435,
      "grad_norm": 1.610931158065796,
      "learning_rate": 9.374485217081899e-06,
      "loss": 0.0455,
      "step": 209860
    },
    {
      "epoch": 0.3434732232281377,
      "grad_norm": 3.2315287590026855,
      "learning_rate": 9.37441932486838e-06,
      "loss": 0.0493,
      "step": 209880
    },
    {
      "epoch": 0.34350595366679104,
      "grad_norm": 1.4676846265792847,
      "learning_rate": 9.374353432654864e-06,
      "loss": 0.0497,
      "step": 209900
    },
    {
      "epoch": 0.34353868410544436,
      "grad_norm": 8.976228713989258,
      "learning_rate": 9.374287540441348e-06,
      "loss": 0.0523,
      "step": 209920
    },
    {
      "epoch": 0.34357141454409773,
      "grad_norm": 1.2457178831100464,
      "learning_rate": 9.37422164822783e-06,
      "loss": 0.0379,
      "step": 209940
    },
    {
      "epoch": 0.34360414498275105,
      "grad_norm": 1.8964734077453613,
      "learning_rate": 9.374155756014313e-06,
      "loss": 0.0505,
      "step": 209960
    },
    {
      "epoch": 0.3436368754214044,
      "grad_norm": 1.4423965215682983,
      "learning_rate": 9.374089863800795e-06,
      "loss": 0.0425,
      "step": 209980
    },
    {
      "epoch": 0.34366960586005774,
      "grad_norm": 1.384359359741211,
      "learning_rate": 9.374023971587279e-06,
      "loss": 0.0547,
      "step": 210000
    },
    {
      "epoch": 0.34370233629871105,
      "grad_norm": 1.5799814462661743,
      "learning_rate": 9.373958079373762e-06,
      "loss": 0.0444,
      "step": 210020
    },
    {
      "epoch": 0.3437350667373644,
      "grad_norm": 1.9873881340026855,
      "learning_rate": 9.373892187160244e-06,
      "loss": 0.0433,
      "step": 210040
    },
    {
      "epoch": 0.34376779717601774,
      "grad_norm": 3.970287799835205,
      "learning_rate": 9.373826294946728e-06,
      "loss": 0.0579,
      "step": 210060
    },
    {
      "epoch": 0.3438005276146711,
      "grad_norm": 2.112429141998291,
      "learning_rate": 9.37376040273321e-06,
      "loss": 0.059,
      "step": 210080
    },
    {
      "epoch": 0.34383325805332443,
      "grad_norm": 2.5128142833709717,
      "learning_rate": 9.373694510519693e-06,
      "loss": 0.0531,
      "step": 210100
    },
    {
      "epoch": 0.34386598849197775,
      "grad_norm": 2.750373125076294,
      "learning_rate": 9.373628618306175e-06,
      "loss": 0.0465,
      "step": 210120
    },
    {
      "epoch": 0.3438987189306311,
      "grad_norm": 1.3047852516174316,
      "learning_rate": 9.373562726092659e-06,
      "loss": 0.0445,
      "step": 210140
    },
    {
      "epoch": 0.34393144936928444,
      "grad_norm": 0.9942764639854431,
      "learning_rate": 9.37349683387914e-06,
      "loss": 0.0468,
      "step": 210160
    },
    {
      "epoch": 0.3439641798079378,
      "grad_norm": 3.302388906478882,
      "learning_rate": 9.373430941665624e-06,
      "loss": 0.0582,
      "step": 210180
    },
    {
      "epoch": 0.3439969102465911,
      "grad_norm": 4.0990190505981445,
      "learning_rate": 9.373365049452106e-06,
      "loss": 0.0519,
      "step": 210200
    },
    {
      "epoch": 0.34402964068524444,
      "grad_norm": 1.407507061958313,
      "learning_rate": 9.37329915723859e-06,
      "loss": 0.0364,
      "step": 210220
    },
    {
      "epoch": 0.3440623711238978,
      "grad_norm": 3.3134117126464844,
      "learning_rate": 9.373233265025073e-06,
      "loss": 0.052,
      "step": 210240
    },
    {
      "epoch": 0.34409510156255113,
      "grad_norm": 1.0336112976074219,
      "learning_rate": 9.373167372811555e-06,
      "loss": 0.0651,
      "step": 210260
    },
    {
      "epoch": 0.3441278320012045,
      "grad_norm": 3.8621339797973633,
      "learning_rate": 9.373101480598039e-06,
      "loss": 0.0379,
      "step": 210280
    },
    {
      "epoch": 0.3441605624398578,
      "grad_norm": 1.336984634399414,
      "learning_rate": 9.373035588384522e-06,
      "loss": 0.0519,
      "step": 210300
    },
    {
      "epoch": 0.34419329287851114,
      "grad_norm": 1.0551337003707886,
      "learning_rate": 9.372969696171004e-06,
      "loss": 0.0383,
      "step": 210320
    },
    {
      "epoch": 0.3442260233171645,
      "grad_norm": 2.041124105453491,
      "learning_rate": 9.372903803957488e-06,
      "loss": 0.0441,
      "step": 210340
    },
    {
      "epoch": 0.3442587537558178,
      "grad_norm": 0.6829906105995178,
      "learning_rate": 9.372837911743971e-06,
      "loss": 0.0517,
      "step": 210360
    },
    {
      "epoch": 0.3442914841944712,
      "grad_norm": 1.7734832763671875,
      "learning_rate": 9.372772019530453e-06,
      "loss": 0.0621,
      "step": 210380
    },
    {
      "epoch": 0.3443242146331245,
      "grad_norm": 2.568528175354004,
      "learning_rate": 9.372706127316937e-06,
      "loss": 0.0594,
      "step": 210400
    },
    {
      "epoch": 0.34435694507177783,
      "grad_norm": 1.2005983591079712,
      "learning_rate": 9.372640235103419e-06,
      "loss": 0.0562,
      "step": 210420
    },
    {
      "epoch": 0.3443896755104312,
      "grad_norm": 2.1500916481018066,
      "learning_rate": 9.372574342889902e-06,
      "loss": 0.0594,
      "step": 210440
    },
    {
      "epoch": 0.3444224059490845,
      "grad_norm": 0.7255975604057312,
      "learning_rate": 9.372508450676384e-06,
      "loss": 0.0442,
      "step": 210460
    },
    {
      "epoch": 0.3444551363877379,
      "grad_norm": 1.255552887916565,
      "learning_rate": 9.372442558462868e-06,
      "loss": 0.0566,
      "step": 210480
    },
    {
      "epoch": 0.3444878668263912,
      "grad_norm": 4.140727996826172,
      "learning_rate": 9.37237666624935e-06,
      "loss": 0.05,
      "step": 210500
    },
    {
      "epoch": 0.3445205972650445,
      "grad_norm": 1.8108278512954712,
      "learning_rate": 9.372310774035833e-06,
      "loss": 0.0546,
      "step": 210520
    },
    {
      "epoch": 0.3445533277036979,
      "grad_norm": 1.1222147941589355,
      "learning_rate": 9.372244881822315e-06,
      "loss": 0.0574,
      "step": 210540
    },
    {
      "epoch": 0.3445860581423512,
      "grad_norm": 1.5393203496932983,
      "learning_rate": 9.372178989608799e-06,
      "loss": 0.0538,
      "step": 210560
    },
    {
      "epoch": 0.3446187885810046,
      "grad_norm": 2.5547845363616943,
      "learning_rate": 9.37211309739528e-06,
      "loss": 0.0377,
      "step": 210580
    },
    {
      "epoch": 0.3446515190196579,
      "grad_norm": 1.5543310642242432,
      "learning_rate": 9.372047205181764e-06,
      "loss": 0.0518,
      "step": 210600
    },
    {
      "epoch": 0.3446842494583112,
      "grad_norm": 1.3702852725982666,
      "learning_rate": 9.371981312968246e-06,
      "loss": 0.048,
      "step": 210620
    },
    {
      "epoch": 0.3447169798969646,
      "grad_norm": 2.3306851387023926,
      "learning_rate": 9.37191542075473e-06,
      "loss": 0.0557,
      "step": 210640
    },
    {
      "epoch": 0.3447497103356179,
      "grad_norm": 0.31035497784614563,
      "learning_rate": 9.371849528541213e-06,
      "loss": 0.0292,
      "step": 210660
    },
    {
      "epoch": 0.3447824407742713,
      "grad_norm": 1.809651494026184,
      "learning_rate": 9.371783636327695e-06,
      "loss": 0.0523,
      "step": 210680
    },
    {
      "epoch": 0.3448151712129246,
      "grad_norm": 2.1411848068237305,
      "learning_rate": 9.371717744114179e-06,
      "loss": 0.0478,
      "step": 210700
    },
    {
      "epoch": 0.3448479016515779,
      "grad_norm": 2.5415260791778564,
      "learning_rate": 9.371651851900662e-06,
      "loss": 0.0428,
      "step": 210720
    },
    {
      "epoch": 0.3448806320902313,
      "grad_norm": 0.2570461630821228,
      "learning_rate": 9.371585959687144e-06,
      "loss": 0.0661,
      "step": 210740
    },
    {
      "epoch": 0.3449133625288846,
      "grad_norm": 2.4324777126312256,
      "learning_rate": 9.371520067473628e-06,
      "loss": 0.0431,
      "step": 210760
    },
    {
      "epoch": 0.344946092967538,
      "grad_norm": 3.4042162895202637,
      "learning_rate": 9.371454175260111e-06,
      "loss": 0.0399,
      "step": 210780
    },
    {
      "epoch": 0.3449788234061913,
      "grad_norm": 7.730203628540039,
      "learning_rate": 9.371388283046593e-06,
      "loss": 0.0552,
      "step": 210800
    },
    {
      "epoch": 0.3450115538448446,
      "grad_norm": 1.1146364212036133,
      "learning_rate": 9.371322390833077e-06,
      "loss": 0.0422,
      "step": 210820
    },
    {
      "epoch": 0.345044284283498,
      "grad_norm": 1.568158745765686,
      "learning_rate": 9.371256498619559e-06,
      "loss": 0.0548,
      "step": 210840
    },
    {
      "epoch": 0.3450770147221513,
      "grad_norm": 6.862572193145752,
      "learning_rate": 9.371190606406042e-06,
      "loss": 0.057,
      "step": 210860
    },
    {
      "epoch": 0.34510974516080467,
      "grad_norm": 1.2377086877822876,
      "learning_rate": 9.371124714192524e-06,
      "loss": 0.0511,
      "step": 210880
    },
    {
      "epoch": 0.345142475599458,
      "grad_norm": 0.998570442199707,
      "learning_rate": 9.371058821979008e-06,
      "loss": 0.0473,
      "step": 210900
    },
    {
      "epoch": 0.3451752060381113,
      "grad_norm": 8.009682655334473,
      "learning_rate": 9.37099292976549e-06,
      "loss": 0.0476,
      "step": 210920
    },
    {
      "epoch": 0.3452079364767647,
      "grad_norm": 4.5032453536987305,
      "learning_rate": 9.370927037551973e-06,
      "loss": 0.0444,
      "step": 210940
    },
    {
      "epoch": 0.345240666915418,
      "grad_norm": 2.0795493125915527,
      "learning_rate": 9.370861145338455e-06,
      "loss": 0.0646,
      "step": 210960
    },
    {
      "epoch": 0.34527339735407137,
      "grad_norm": 0.9735956788063049,
      "learning_rate": 9.370795253124939e-06,
      "loss": 0.0527,
      "step": 210980
    },
    {
      "epoch": 0.3453061277927247,
      "grad_norm": 2.3447957038879395,
      "learning_rate": 9.37072936091142e-06,
      "loss": 0.0365,
      "step": 211000
    },
    {
      "epoch": 0.345338858231378,
      "grad_norm": 2.0587358474731445,
      "learning_rate": 9.370663468697904e-06,
      "loss": 0.0385,
      "step": 211020
    },
    {
      "epoch": 0.34537158867003137,
      "grad_norm": 0.8503880500793457,
      "learning_rate": 9.370597576484388e-06,
      "loss": 0.0536,
      "step": 211040
    },
    {
      "epoch": 0.3454043191086847,
      "grad_norm": 1.201913833618164,
      "learning_rate": 9.37053168427087e-06,
      "loss": 0.0469,
      "step": 211060
    },
    {
      "epoch": 0.34543704954733806,
      "grad_norm": 1.6486570835113525,
      "learning_rate": 9.370465792057353e-06,
      "loss": 0.0354,
      "step": 211080
    },
    {
      "epoch": 0.3454697799859914,
      "grad_norm": 1.5607796907424927,
      "learning_rate": 9.370399899843837e-06,
      "loss": 0.0454,
      "step": 211100
    },
    {
      "epoch": 0.3455025104246447,
      "grad_norm": 0.9437170624732971,
      "learning_rate": 9.370334007630319e-06,
      "loss": 0.0523,
      "step": 211120
    },
    {
      "epoch": 0.34553524086329807,
      "grad_norm": 1.6032475233078003,
      "learning_rate": 9.370268115416802e-06,
      "loss": 0.049,
      "step": 211140
    },
    {
      "epoch": 0.3455679713019514,
      "grad_norm": 2.637190580368042,
      "learning_rate": 9.370202223203286e-06,
      "loss": 0.044,
      "step": 211160
    },
    {
      "epoch": 0.34560070174060475,
      "grad_norm": 1.5919173955917358,
      "learning_rate": 9.370136330989768e-06,
      "loss": 0.0494,
      "step": 211180
    },
    {
      "epoch": 0.34563343217925807,
      "grad_norm": 2.6016056537628174,
      "learning_rate": 9.370070438776251e-06,
      "loss": 0.0493,
      "step": 211200
    },
    {
      "epoch": 0.3456661626179114,
      "grad_norm": 1.6457841396331787,
      "learning_rate": 9.370004546562733e-06,
      "loss": 0.0559,
      "step": 211220
    },
    {
      "epoch": 0.34569889305656476,
      "grad_norm": 2.1457748413085938,
      "learning_rate": 9.369938654349217e-06,
      "loss": 0.0527,
      "step": 211240
    },
    {
      "epoch": 0.3457316234952181,
      "grad_norm": 2.593060255050659,
      "learning_rate": 9.369872762135699e-06,
      "loss": 0.05,
      "step": 211260
    },
    {
      "epoch": 0.34576435393387145,
      "grad_norm": 2.4149014949798584,
      "learning_rate": 9.369806869922182e-06,
      "loss": 0.0609,
      "step": 211280
    },
    {
      "epoch": 0.34579708437252477,
      "grad_norm": 2.433520793914795,
      "learning_rate": 9.369740977708664e-06,
      "loss": 0.0536,
      "step": 211300
    },
    {
      "epoch": 0.3458298148111781,
      "grad_norm": 1.3040111064910889,
      "learning_rate": 9.369675085495148e-06,
      "loss": 0.0359,
      "step": 211320
    },
    {
      "epoch": 0.34586254524983145,
      "grad_norm": 1.5415445566177368,
      "learning_rate": 9.36960919328163e-06,
      "loss": 0.0552,
      "step": 211340
    },
    {
      "epoch": 0.34589527568848477,
      "grad_norm": 1.8817551136016846,
      "learning_rate": 9.369543301068113e-06,
      "loss": 0.036,
      "step": 211360
    },
    {
      "epoch": 0.34592800612713814,
      "grad_norm": 2.1458992958068848,
      "learning_rate": 9.369477408854595e-06,
      "loss": 0.0578,
      "step": 211380
    },
    {
      "epoch": 0.34596073656579146,
      "grad_norm": 0.6624174118041992,
      "learning_rate": 9.369411516641079e-06,
      "loss": 0.0525,
      "step": 211400
    },
    {
      "epoch": 0.3459934670044448,
      "grad_norm": 1.5512940883636475,
      "learning_rate": 9.369345624427562e-06,
      "loss": 0.0514,
      "step": 211420
    },
    {
      "epoch": 0.34602619744309815,
      "grad_norm": 1.511647343635559,
      "learning_rate": 9.369279732214044e-06,
      "loss": 0.06,
      "step": 211440
    },
    {
      "epoch": 0.34605892788175147,
      "grad_norm": 0.9878063797950745,
      "learning_rate": 9.369213840000528e-06,
      "loss": 0.0532,
      "step": 211460
    },
    {
      "epoch": 0.3460916583204048,
      "grad_norm": 1.614771842956543,
      "learning_rate": 9.36914794778701e-06,
      "loss": 0.0381,
      "step": 211480
    },
    {
      "epoch": 0.34612438875905815,
      "grad_norm": 2.605672836303711,
      "learning_rate": 9.369082055573493e-06,
      "loss": 0.0467,
      "step": 211500
    },
    {
      "epoch": 0.34615711919771147,
      "grad_norm": 0.974001944065094,
      "learning_rate": 9.369016163359977e-06,
      "loss": 0.0362,
      "step": 211520
    },
    {
      "epoch": 0.34618984963636484,
      "grad_norm": 0.7057862877845764,
      "learning_rate": 9.368950271146459e-06,
      "loss": 0.0525,
      "step": 211540
    },
    {
      "epoch": 0.34622258007501816,
      "grad_norm": 2.1006925106048584,
      "learning_rate": 9.368884378932942e-06,
      "loss": 0.0536,
      "step": 211560
    },
    {
      "epoch": 0.3462553105136715,
      "grad_norm": 1.9429200887680054,
      "learning_rate": 9.368818486719426e-06,
      "loss": 0.0494,
      "step": 211580
    },
    {
      "epoch": 0.34628804095232485,
      "grad_norm": 1.8939896821975708,
      "learning_rate": 9.368752594505908e-06,
      "loss": 0.0548,
      "step": 211600
    },
    {
      "epoch": 0.34632077139097817,
      "grad_norm": 2.760195732116699,
      "learning_rate": 9.368686702292392e-06,
      "loss": 0.0648,
      "step": 211620
    },
    {
      "epoch": 0.34635350182963154,
      "grad_norm": 1.781798005104065,
      "learning_rate": 9.368620810078873e-06,
      "loss": 0.0595,
      "step": 211640
    },
    {
      "epoch": 0.34638623226828485,
      "grad_norm": 2.3805923461914062,
      "learning_rate": 9.368554917865357e-06,
      "loss": 0.0482,
      "step": 211660
    },
    {
      "epoch": 0.34641896270693817,
      "grad_norm": 0.45264944434165955,
      "learning_rate": 9.368489025651839e-06,
      "loss": 0.0497,
      "step": 211680
    },
    {
      "epoch": 0.34645169314559154,
      "grad_norm": 2.174832344055176,
      "learning_rate": 9.368423133438323e-06,
      "loss": 0.0459,
      "step": 211700
    },
    {
      "epoch": 0.34648442358424486,
      "grad_norm": 2.228090524673462,
      "learning_rate": 9.368357241224804e-06,
      "loss": 0.0399,
      "step": 211720
    },
    {
      "epoch": 0.34651715402289823,
      "grad_norm": 1.442954659461975,
      "learning_rate": 9.368291349011288e-06,
      "loss": 0.0525,
      "step": 211740
    },
    {
      "epoch": 0.34654988446155155,
      "grad_norm": 1.0954123735427856,
      "learning_rate": 9.368225456797772e-06,
      "loss": 0.0478,
      "step": 211760
    },
    {
      "epoch": 0.34658261490020487,
      "grad_norm": 4.83106803894043,
      "learning_rate": 9.368159564584253e-06,
      "loss": 0.0494,
      "step": 211780
    },
    {
      "epoch": 0.34661534533885824,
      "grad_norm": 4.119345188140869,
      "learning_rate": 9.368093672370737e-06,
      "loss": 0.0611,
      "step": 211800
    },
    {
      "epoch": 0.34664807577751156,
      "grad_norm": 1.9083789587020874,
      "learning_rate": 9.368027780157219e-06,
      "loss": 0.0533,
      "step": 211820
    },
    {
      "epoch": 0.3466808062161649,
      "grad_norm": 2.747217893600464,
      "learning_rate": 9.367961887943703e-06,
      "loss": 0.0432,
      "step": 211840
    },
    {
      "epoch": 0.34671353665481824,
      "grad_norm": 2.9054858684539795,
      "learning_rate": 9.367895995730184e-06,
      "loss": 0.0547,
      "step": 211860
    },
    {
      "epoch": 0.34674626709347156,
      "grad_norm": 1.4855226278305054,
      "learning_rate": 9.367830103516668e-06,
      "loss": 0.0508,
      "step": 211880
    },
    {
      "epoch": 0.34677899753212493,
      "grad_norm": 3.9577977657318115,
      "learning_rate": 9.367764211303152e-06,
      "loss": 0.0443,
      "step": 211900
    },
    {
      "epoch": 0.34681172797077825,
      "grad_norm": 2.33355975151062,
      "learning_rate": 9.367698319089633e-06,
      "loss": 0.0435,
      "step": 211920
    },
    {
      "epoch": 0.3468444584094316,
      "grad_norm": 2.2856578826904297,
      "learning_rate": 9.367632426876117e-06,
      "loss": 0.041,
      "step": 211940
    },
    {
      "epoch": 0.34687718884808494,
      "grad_norm": 1.6008429527282715,
      "learning_rate": 9.3675665346626e-06,
      "loss": 0.0554,
      "step": 211960
    },
    {
      "epoch": 0.34690991928673826,
      "grad_norm": 3.314826011657715,
      "learning_rate": 9.367500642449083e-06,
      "loss": 0.048,
      "step": 211980
    },
    {
      "epoch": 0.3469426497253916,
      "grad_norm": 2.163712501525879,
      "learning_rate": 9.367434750235566e-06,
      "loss": 0.048,
      "step": 212000
    },
    {
      "epoch": 0.34697538016404494,
      "grad_norm": 1.891746163368225,
      "learning_rate": 9.367368858022048e-06,
      "loss": 0.0545,
      "step": 212020
    },
    {
      "epoch": 0.3470081106026983,
      "grad_norm": 2.970400810241699,
      "learning_rate": 9.367302965808532e-06,
      "loss": 0.0377,
      "step": 212040
    },
    {
      "epoch": 0.34704084104135163,
      "grad_norm": 2.22759747505188,
      "learning_rate": 9.367237073595014e-06,
      "loss": 0.0495,
      "step": 212060
    },
    {
      "epoch": 0.34707357148000495,
      "grad_norm": 0.9432271718978882,
      "learning_rate": 9.367171181381497e-06,
      "loss": 0.0395,
      "step": 212080
    },
    {
      "epoch": 0.3471063019186583,
      "grad_norm": 2.152879238128662,
      "learning_rate": 9.367105289167979e-06,
      "loss": 0.0506,
      "step": 212100
    },
    {
      "epoch": 0.34713903235731164,
      "grad_norm": 2.576272964477539,
      "learning_rate": 9.367039396954463e-06,
      "loss": 0.0542,
      "step": 212120
    },
    {
      "epoch": 0.347171762795965,
      "grad_norm": 2.0774660110473633,
      "learning_rate": 9.366973504740946e-06,
      "loss": 0.0421,
      "step": 212140
    },
    {
      "epoch": 0.3472044932346183,
      "grad_norm": 1.185667634010315,
      "learning_rate": 9.366907612527428e-06,
      "loss": 0.0464,
      "step": 212160
    },
    {
      "epoch": 0.34723722367327164,
      "grad_norm": 0.6716085076332092,
      "learning_rate": 9.366841720313912e-06,
      "loss": 0.0594,
      "step": 212180
    },
    {
      "epoch": 0.347269954111925,
      "grad_norm": 2.148603677749634,
      "learning_rate": 9.366775828100394e-06,
      "loss": 0.0506,
      "step": 212200
    },
    {
      "epoch": 0.34730268455057833,
      "grad_norm": 2.721595048904419,
      "learning_rate": 9.366709935886877e-06,
      "loss": 0.0431,
      "step": 212220
    },
    {
      "epoch": 0.3473354149892317,
      "grad_norm": 10.263431549072266,
      "learning_rate": 9.366644043673359e-06,
      "loss": 0.0434,
      "step": 212240
    },
    {
      "epoch": 0.347368145427885,
      "grad_norm": 2.403876543045044,
      "learning_rate": 9.366578151459843e-06,
      "loss": 0.0618,
      "step": 212260
    },
    {
      "epoch": 0.34740087586653834,
      "grad_norm": 0.7808877229690552,
      "learning_rate": 9.366512259246325e-06,
      "loss": 0.0461,
      "step": 212280
    },
    {
      "epoch": 0.3474336063051917,
      "grad_norm": 2.299853801727295,
      "learning_rate": 9.366446367032808e-06,
      "loss": 0.04,
      "step": 212300
    },
    {
      "epoch": 0.34746633674384503,
      "grad_norm": 0.58668053150177,
      "learning_rate": 9.366380474819292e-06,
      "loss": 0.0429,
      "step": 212320
    },
    {
      "epoch": 0.3474990671824984,
      "grad_norm": 1.5836132764816284,
      "learning_rate": 9.366314582605774e-06,
      "loss": 0.0606,
      "step": 212340
    },
    {
      "epoch": 0.3475317976211517,
      "grad_norm": 8.797988891601562,
      "learning_rate": 9.366248690392257e-06,
      "loss": 0.0455,
      "step": 212360
    },
    {
      "epoch": 0.34756452805980503,
      "grad_norm": 3.090512752532959,
      "learning_rate": 9.36618279817874e-06,
      "loss": 0.0509,
      "step": 212380
    },
    {
      "epoch": 0.3475972584984584,
      "grad_norm": 0.23978891968727112,
      "learning_rate": 9.366116905965223e-06,
      "loss": 0.0373,
      "step": 212400
    },
    {
      "epoch": 0.3476299889371117,
      "grad_norm": 0.7173750996589661,
      "learning_rate": 9.366051013751706e-06,
      "loss": 0.0448,
      "step": 212420
    },
    {
      "epoch": 0.3476627193757651,
      "grad_norm": 0.7369031310081482,
      "learning_rate": 9.365985121538188e-06,
      "loss": 0.0393,
      "step": 212440
    },
    {
      "epoch": 0.3476954498144184,
      "grad_norm": 3.0712625980377197,
      "learning_rate": 9.365919229324672e-06,
      "loss": 0.0368,
      "step": 212460
    },
    {
      "epoch": 0.34772818025307173,
      "grad_norm": 1.3184922933578491,
      "learning_rate": 9.365853337111155e-06,
      "loss": 0.0368,
      "step": 212480
    },
    {
      "epoch": 0.3477609106917251,
      "grad_norm": 1.5154013633728027,
      "learning_rate": 9.365787444897637e-06,
      "loss": 0.0528,
      "step": 212500
    },
    {
      "epoch": 0.3477936411303784,
      "grad_norm": 1.1331409215927124,
      "learning_rate": 9.36572155268412e-06,
      "loss": 0.0498,
      "step": 212520
    },
    {
      "epoch": 0.3478263715690318,
      "grad_norm": 1.9330503940582275,
      "learning_rate": 9.365655660470603e-06,
      "loss": 0.0484,
      "step": 212540
    },
    {
      "epoch": 0.3478591020076851,
      "grad_norm": 1.6889015436172485,
      "learning_rate": 9.365589768257086e-06,
      "loss": 0.0411,
      "step": 212560
    },
    {
      "epoch": 0.3478918324463384,
      "grad_norm": 2.019141674041748,
      "learning_rate": 9.365523876043568e-06,
      "loss": 0.0443,
      "step": 212580
    },
    {
      "epoch": 0.3479245628849918,
      "grad_norm": 3.6637988090515137,
      "learning_rate": 9.365457983830052e-06,
      "loss": 0.06,
      "step": 212600
    },
    {
      "epoch": 0.3479572933236451,
      "grad_norm": 2.2742714881896973,
      "learning_rate": 9.365392091616534e-06,
      "loss": 0.0388,
      "step": 212620
    },
    {
      "epoch": 0.3479900237622985,
      "grad_norm": 2.9232563972473145,
      "learning_rate": 9.365326199403017e-06,
      "loss": 0.0521,
      "step": 212640
    },
    {
      "epoch": 0.3480227542009518,
      "grad_norm": 1.2474702596664429,
      "learning_rate": 9.365260307189499e-06,
      "loss": 0.0656,
      "step": 212660
    },
    {
      "epoch": 0.3480554846396051,
      "grad_norm": 0.8531402945518494,
      "learning_rate": 9.365194414975983e-06,
      "loss": 0.0534,
      "step": 212680
    },
    {
      "epoch": 0.3480882150782585,
      "grad_norm": 1.789006233215332,
      "learning_rate": 9.365128522762466e-06,
      "loss": 0.0488,
      "step": 212700
    },
    {
      "epoch": 0.3481209455169118,
      "grad_norm": 0.7251757979393005,
      "learning_rate": 9.365062630548948e-06,
      "loss": 0.0536,
      "step": 212720
    },
    {
      "epoch": 0.3481536759555652,
      "grad_norm": 0.49829599261283875,
      "learning_rate": 9.364996738335432e-06,
      "loss": 0.0456,
      "step": 212740
    },
    {
      "epoch": 0.3481864063942185,
      "grad_norm": 3.0017752647399902,
      "learning_rate": 9.364930846121915e-06,
      "loss": 0.0491,
      "step": 212760
    },
    {
      "epoch": 0.3482191368328718,
      "grad_norm": 0.6295199394226074,
      "learning_rate": 9.364864953908397e-06,
      "loss": 0.0532,
      "step": 212780
    },
    {
      "epoch": 0.3482518672715252,
      "grad_norm": 2.871648073196411,
      "learning_rate": 9.36479906169488e-06,
      "loss": 0.0532,
      "step": 212800
    },
    {
      "epoch": 0.3482845977101785,
      "grad_norm": 10.060999870300293,
      "learning_rate": 9.364733169481364e-06,
      "loss": 0.0539,
      "step": 212820
    },
    {
      "epoch": 0.3483173281488319,
      "grad_norm": 2.153953790664673,
      "learning_rate": 9.364667277267846e-06,
      "loss": 0.0422,
      "step": 212840
    },
    {
      "epoch": 0.3483500585874852,
      "grad_norm": 7.8241448402404785,
      "learning_rate": 9.36460138505433e-06,
      "loss": 0.0442,
      "step": 212860
    },
    {
      "epoch": 0.3483827890261385,
      "grad_norm": 2.301530361175537,
      "learning_rate": 9.364535492840812e-06,
      "loss": 0.0411,
      "step": 212880
    },
    {
      "epoch": 0.3484155194647919,
      "grad_norm": 1.5544246435165405,
      "learning_rate": 9.364469600627295e-06,
      "loss": 0.0581,
      "step": 212900
    },
    {
      "epoch": 0.3484482499034452,
      "grad_norm": 2.8183224201202393,
      "learning_rate": 9.364403708413777e-06,
      "loss": 0.0496,
      "step": 212920
    },
    {
      "epoch": 0.34848098034209857,
      "grad_norm": 7.584659099578857,
      "learning_rate": 9.364337816200261e-06,
      "loss": 0.0454,
      "step": 212940
    },
    {
      "epoch": 0.3485137107807519,
      "grad_norm": 1.6592668294906616,
      "learning_rate": 9.364271923986743e-06,
      "loss": 0.0542,
      "step": 212960
    },
    {
      "epoch": 0.3485464412194052,
      "grad_norm": 1.5655285120010376,
      "learning_rate": 9.364206031773226e-06,
      "loss": 0.0486,
      "step": 212980
    },
    {
      "epoch": 0.3485791716580586,
      "grad_norm": 2.889597177505493,
      "learning_rate": 9.364140139559708e-06,
      "loss": 0.0447,
      "step": 213000
    },
    {
      "epoch": 0.3486119020967119,
      "grad_norm": 0.5722610950469971,
      "learning_rate": 9.364074247346192e-06,
      "loss": 0.0525,
      "step": 213020
    },
    {
      "epoch": 0.34864463253536526,
      "grad_norm": 3.2649731636047363,
      "learning_rate": 9.364008355132674e-06,
      "loss": 0.0583,
      "step": 213040
    },
    {
      "epoch": 0.3486773629740186,
      "grad_norm": 1.8799599409103394,
      "learning_rate": 9.363942462919157e-06,
      "loss": 0.0508,
      "step": 213060
    },
    {
      "epoch": 0.3487100934126719,
      "grad_norm": 0.4364425837993622,
      "learning_rate": 9.363876570705641e-06,
      "loss": 0.0404,
      "step": 213080
    },
    {
      "epoch": 0.34874282385132527,
      "grad_norm": 2.871328353881836,
      "learning_rate": 9.363810678492123e-06,
      "loss": 0.057,
      "step": 213100
    },
    {
      "epoch": 0.3487755542899786,
      "grad_norm": 1.22553551197052,
      "learning_rate": 9.363744786278606e-06,
      "loss": 0.0341,
      "step": 213120
    },
    {
      "epoch": 0.34880828472863196,
      "grad_norm": 2.946030378341675,
      "learning_rate": 9.36367889406509e-06,
      "loss": 0.0495,
      "step": 213140
    },
    {
      "epoch": 0.3488410151672853,
      "grad_norm": 2.9252662658691406,
      "learning_rate": 9.363613001851572e-06,
      "loss": 0.0359,
      "step": 213160
    },
    {
      "epoch": 0.3488737456059386,
      "grad_norm": 1.1996783018112183,
      "learning_rate": 9.363547109638055e-06,
      "loss": 0.0329,
      "step": 213180
    },
    {
      "epoch": 0.34890647604459196,
      "grad_norm": 0.9189187288284302,
      "learning_rate": 9.363481217424539e-06,
      "loss": 0.0536,
      "step": 213200
    },
    {
      "epoch": 0.3489392064832453,
      "grad_norm": 0.5945582389831543,
      "learning_rate": 9.363415325211021e-06,
      "loss": 0.0583,
      "step": 213220
    },
    {
      "epoch": 0.34897193692189865,
      "grad_norm": 1.2664564847946167,
      "learning_rate": 9.363349432997504e-06,
      "loss": 0.0526,
      "step": 213240
    },
    {
      "epoch": 0.34900466736055197,
      "grad_norm": 0.9470924735069275,
      "learning_rate": 9.363283540783986e-06,
      "loss": 0.0476,
      "step": 213260
    },
    {
      "epoch": 0.3490373977992053,
      "grad_norm": 1.8632243871688843,
      "learning_rate": 9.36321764857047e-06,
      "loss": 0.0478,
      "step": 213280
    },
    {
      "epoch": 0.34907012823785866,
      "grad_norm": 1.1974660158157349,
      "learning_rate": 9.363151756356952e-06,
      "loss": 0.0513,
      "step": 213300
    },
    {
      "epoch": 0.349102858676512,
      "grad_norm": 3.056772470474243,
      "learning_rate": 9.363085864143435e-06,
      "loss": 0.0567,
      "step": 213320
    },
    {
      "epoch": 0.34913558911516535,
      "grad_norm": 2.245978593826294,
      "learning_rate": 9.363019971929917e-06,
      "loss": 0.0448,
      "step": 213340
    },
    {
      "epoch": 0.34916831955381866,
      "grad_norm": 3.0154812335968018,
      "learning_rate": 9.362954079716401e-06,
      "loss": 0.058,
      "step": 213360
    },
    {
      "epoch": 0.349201049992472,
      "grad_norm": 1.7430875301361084,
      "learning_rate": 9.362888187502883e-06,
      "loss": 0.0452,
      "step": 213380
    },
    {
      "epoch": 0.34923378043112535,
      "grad_norm": 1.0674166679382324,
      "learning_rate": 9.362822295289366e-06,
      "loss": 0.039,
      "step": 213400
    },
    {
      "epoch": 0.34926651086977867,
      "grad_norm": 0.975042998790741,
      "learning_rate": 9.362756403075848e-06,
      "loss": 0.0558,
      "step": 213420
    },
    {
      "epoch": 0.34929924130843204,
      "grad_norm": 1.5663678646087646,
      "learning_rate": 9.362690510862332e-06,
      "loss": 0.0544,
      "step": 213440
    },
    {
      "epoch": 0.34933197174708536,
      "grad_norm": 1.1372531652450562,
      "learning_rate": 9.362624618648814e-06,
      "loss": 0.0406,
      "step": 213460
    },
    {
      "epoch": 0.3493647021857387,
      "grad_norm": 2.285564661026001,
      "learning_rate": 9.362558726435297e-06,
      "loss": 0.0511,
      "step": 213480
    },
    {
      "epoch": 0.34939743262439205,
      "grad_norm": 1.3372732400894165,
      "learning_rate": 9.362492834221781e-06,
      "loss": 0.0529,
      "step": 213500
    },
    {
      "epoch": 0.34943016306304536,
      "grad_norm": 5.5384521484375,
      "learning_rate": 9.362426942008263e-06,
      "loss": 0.0556,
      "step": 213520
    },
    {
      "epoch": 0.34946289350169873,
      "grad_norm": 1.7279084920883179,
      "learning_rate": 9.362361049794746e-06,
      "loss": 0.0491,
      "step": 213540
    },
    {
      "epoch": 0.34949562394035205,
      "grad_norm": 1.233500599861145,
      "learning_rate": 9.36229515758123e-06,
      "loss": 0.0567,
      "step": 213560
    },
    {
      "epoch": 0.34952835437900537,
      "grad_norm": 2.5771896839141846,
      "learning_rate": 9.362229265367712e-06,
      "loss": 0.0535,
      "step": 213580
    },
    {
      "epoch": 0.34956108481765874,
      "grad_norm": 1.626699686050415,
      "learning_rate": 9.362163373154195e-06,
      "loss": 0.042,
      "step": 213600
    },
    {
      "epoch": 0.34959381525631206,
      "grad_norm": 1.8940339088439941,
      "learning_rate": 9.362097480940679e-06,
      "loss": 0.0444,
      "step": 213620
    },
    {
      "epoch": 0.34962654569496543,
      "grad_norm": 0.768841028213501,
      "learning_rate": 9.362031588727161e-06,
      "loss": 0.0381,
      "step": 213640
    },
    {
      "epoch": 0.34965927613361875,
      "grad_norm": 1.1476556062698364,
      "learning_rate": 9.361965696513645e-06,
      "loss": 0.0496,
      "step": 213660
    },
    {
      "epoch": 0.34969200657227206,
      "grad_norm": 2.346996545791626,
      "learning_rate": 9.361899804300126e-06,
      "loss": 0.0483,
      "step": 213680
    },
    {
      "epoch": 0.34972473701092543,
      "grad_norm": 1.1009459495544434,
      "learning_rate": 9.36183391208661e-06,
      "loss": 0.0397,
      "step": 213700
    },
    {
      "epoch": 0.34975746744957875,
      "grad_norm": 1.8710030317306519,
      "learning_rate": 9.361768019873092e-06,
      "loss": 0.0581,
      "step": 213720
    },
    {
      "epoch": 0.3497901978882321,
      "grad_norm": 4.03102445602417,
      "learning_rate": 9.361702127659576e-06,
      "loss": 0.0348,
      "step": 213740
    },
    {
      "epoch": 0.34982292832688544,
      "grad_norm": 2.234792470932007,
      "learning_rate": 9.361636235446057e-06,
      "loss": 0.0456,
      "step": 213760
    },
    {
      "epoch": 0.34985565876553876,
      "grad_norm": 3.4051973819732666,
      "learning_rate": 9.361570343232541e-06,
      "loss": 0.0437,
      "step": 213780
    },
    {
      "epoch": 0.34988838920419213,
      "grad_norm": 2.4935405254364014,
      "learning_rate": 9.361504451019023e-06,
      "loss": 0.028,
      "step": 213800
    },
    {
      "epoch": 0.34992111964284545,
      "grad_norm": 2.6184849739074707,
      "learning_rate": 9.361438558805506e-06,
      "loss": 0.0523,
      "step": 213820
    },
    {
      "epoch": 0.3499538500814988,
      "grad_norm": 0.6415941119194031,
      "learning_rate": 9.361372666591988e-06,
      "loss": 0.064,
      "step": 213840
    },
    {
      "epoch": 0.34998658052015214,
      "grad_norm": 2.115206241607666,
      "learning_rate": 9.361306774378472e-06,
      "loss": 0.0431,
      "step": 213860
    },
    {
      "epoch": 0.35001931095880545,
      "grad_norm": 0.49160128831863403,
      "learning_rate": 9.361240882164956e-06,
      "loss": 0.0343,
      "step": 213880
    },
    {
      "epoch": 0.3500520413974588,
      "grad_norm": 1.8595155477523804,
      "learning_rate": 9.361174989951437e-06,
      "loss": 0.0457,
      "step": 213900
    },
    {
      "epoch": 0.35008477183611214,
      "grad_norm": 1.4194481372833252,
      "learning_rate": 9.361109097737921e-06,
      "loss": 0.0536,
      "step": 213920
    },
    {
      "epoch": 0.3501175022747655,
      "grad_norm": 15.927287101745605,
      "learning_rate": 9.361043205524405e-06,
      "loss": 0.0584,
      "step": 213940
    },
    {
      "epoch": 0.35015023271341883,
      "grad_norm": 1.3733044862747192,
      "learning_rate": 9.360977313310887e-06,
      "loss": 0.0598,
      "step": 213960
    },
    {
      "epoch": 0.35018296315207215,
      "grad_norm": 1.6879838705062866,
      "learning_rate": 9.36091142109737e-06,
      "loss": 0.0446,
      "step": 213980
    },
    {
      "epoch": 0.3502156935907255,
      "grad_norm": 1.567196011543274,
      "learning_rate": 9.360845528883854e-06,
      "loss": 0.0494,
      "step": 214000
    },
    {
      "epoch": 0.35024842402937884,
      "grad_norm": 0.3512870967388153,
      "learning_rate": 9.360779636670336e-06,
      "loss": 0.0324,
      "step": 214020
    },
    {
      "epoch": 0.3502811544680322,
      "grad_norm": 0.7772805094718933,
      "learning_rate": 9.36071374445682e-06,
      "loss": 0.0576,
      "step": 214040
    },
    {
      "epoch": 0.3503138849066855,
      "grad_norm": 0.8484060168266296,
      "learning_rate": 9.360647852243301e-06,
      "loss": 0.0385,
      "step": 214060
    },
    {
      "epoch": 0.35034661534533884,
      "grad_norm": 1.7489742040634155,
      "learning_rate": 9.360581960029785e-06,
      "loss": 0.0439,
      "step": 214080
    },
    {
      "epoch": 0.3503793457839922,
      "grad_norm": 0.6644102334976196,
      "learning_rate": 9.360516067816267e-06,
      "loss": 0.049,
      "step": 214100
    },
    {
      "epoch": 0.35041207622264553,
      "grad_norm": 2.3237948417663574,
      "learning_rate": 9.36045017560275e-06,
      "loss": 0.0448,
      "step": 214120
    },
    {
      "epoch": 0.3504448066612989,
      "grad_norm": 0.5860832333564758,
      "learning_rate": 9.360384283389232e-06,
      "loss": 0.0562,
      "step": 214140
    },
    {
      "epoch": 0.3504775370999522,
      "grad_norm": 0.725975513458252,
      "learning_rate": 9.360318391175716e-06,
      "loss": 0.0529,
      "step": 214160
    },
    {
      "epoch": 0.35051026753860554,
      "grad_norm": 2.4245197772979736,
      "learning_rate": 9.360252498962197e-06,
      "loss": 0.0603,
      "step": 214180
    },
    {
      "epoch": 0.3505429979772589,
      "grad_norm": 5.65798807144165,
      "learning_rate": 9.360186606748681e-06,
      "loss": 0.0545,
      "step": 214200
    },
    {
      "epoch": 0.3505757284159122,
      "grad_norm": 1.6572529077529907,
      "learning_rate": 9.360120714535165e-06,
      "loss": 0.0274,
      "step": 214220
    },
    {
      "epoch": 0.35060845885456554,
      "grad_norm": 4.546576499938965,
      "learning_rate": 9.360054822321647e-06,
      "loss": 0.0526,
      "step": 214240
    },
    {
      "epoch": 0.3506411892932189,
      "grad_norm": 0.8131580352783203,
      "learning_rate": 9.35998893010813e-06,
      "loss": 0.0347,
      "step": 214260
    },
    {
      "epoch": 0.35067391973187223,
      "grad_norm": 0.4408245086669922,
      "learning_rate": 9.359923037894612e-06,
      "loss": 0.0661,
      "step": 214280
    },
    {
      "epoch": 0.3507066501705256,
      "grad_norm": 2.302365779876709,
      "learning_rate": 9.359857145681096e-06,
      "loss": 0.0444,
      "step": 214300
    },
    {
      "epoch": 0.3507393806091789,
      "grad_norm": 1.3393054008483887,
      "learning_rate": 9.359791253467578e-06,
      "loss": 0.0616,
      "step": 214320
    },
    {
      "epoch": 0.35077211104783224,
      "grad_norm": 0.3666502833366394,
      "learning_rate": 9.359725361254061e-06,
      "loss": 0.0521,
      "step": 214340
    },
    {
      "epoch": 0.3508048414864856,
      "grad_norm": 2.1336569786071777,
      "learning_rate": 9.359659469040545e-06,
      "loss": 0.0569,
      "step": 214360
    },
    {
      "epoch": 0.3508375719251389,
      "grad_norm": 2.780839681625366,
      "learning_rate": 9.359593576827027e-06,
      "loss": 0.0549,
      "step": 214380
    },
    {
      "epoch": 0.3508703023637923,
      "grad_norm": 1.222193717956543,
      "learning_rate": 9.35952768461351e-06,
      "loss": 0.0484,
      "step": 214400
    },
    {
      "epoch": 0.3509030328024456,
      "grad_norm": 1.6791987419128418,
      "learning_rate": 9.359461792399994e-06,
      "loss": 0.073,
      "step": 214420
    },
    {
      "epoch": 0.35093576324109893,
      "grad_norm": 1.6484309434890747,
      "learning_rate": 9.359395900186476e-06,
      "loss": 0.0601,
      "step": 214440
    },
    {
      "epoch": 0.3509684936797523,
      "grad_norm": 1.587667465209961,
      "learning_rate": 9.35933000797296e-06,
      "loss": 0.0476,
      "step": 214460
    },
    {
      "epoch": 0.3510012241184056,
      "grad_norm": 1.4600811004638672,
      "learning_rate": 9.359264115759441e-06,
      "loss": 0.0471,
      "step": 214480
    },
    {
      "epoch": 0.351033954557059,
      "grad_norm": 2.9987268447875977,
      "learning_rate": 9.359198223545925e-06,
      "loss": 0.05,
      "step": 214500
    },
    {
      "epoch": 0.3510666849957123,
      "grad_norm": 1.9158985614776611,
      "learning_rate": 9.359132331332407e-06,
      "loss": 0.0449,
      "step": 214520
    },
    {
      "epoch": 0.3510994154343656,
      "grad_norm": 0.6123685240745544,
      "learning_rate": 9.35906643911889e-06,
      "loss": 0.0423,
      "step": 214540
    },
    {
      "epoch": 0.351132145873019,
      "grad_norm": 0.5540263652801514,
      "learning_rate": 9.359000546905372e-06,
      "loss": 0.0507,
      "step": 214560
    },
    {
      "epoch": 0.3511648763116723,
      "grad_norm": 1.7376513481140137,
      "learning_rate": 9.358934654691856e-06,
      "loss": 0.0404,
      "step": 214580
    },
    {
      "epoch": 0.3511976067503257,
      "grad_norm": 2.3021140098571777,
      "learning_rate": 9.35886876247834e-06,
      "loss": 0.0452,
      "step": 214600
    },
    {
      "epoch": 0.351230337188979,
      "grad_norm": 1.285831332206726,
      "learning_rate": 9.358802870264821e-06,
      "loss": 0.0357,
      "step": 214620
    },
    {
      "epoch": 0.3512630676276323,
      "grad_norm": 1.6121947765350342,
      "learning_rate": 9.358736978051305e-06,
      "loss": 0.0635,
      "step": 214640
    },
    {
      "epoch": 0.3512957980662857,
      "grad_norm": 3.0608458518981934,
      "learning_rate": 9.358671085837787e-06,
      "loss": 0.0596,
      "step": 214660
    },
    {
      "epoch": 0.351328528504939,
      "grad_norm": 1.5330873727798462,
      "learning_rate": 9.35860519362427e-06,
      "loss": 0.0466,
      "step": 214680
    },
    {
      "epoch": 0.3513612589435924,
      "grad_norm": 1.2672390937805176,
      "learning_rate": 9.358539301410752e-06,
      "loss": 0.0533,
      "step": 214700
    },
    {
      "epoch": 0.3513939893822457,
      "grad_norm": 0.5115892887115479,
      "learning_rate": 9.358473409197236e-06,
      "loss": 0.0396,
      "step": 214720
    },
    {
      "epoch": 0.351426719820899,
      "grad_norm": 2.9293410778045654,
      "learning_rate": 9.35840751698372e-06,
      "loss": 0.0586,
      "step": 214740
    },
    {
      "epoch": 0.3514594502595524,
      "grad_norm": 4.278543949127197,
      "learning_rate": 9.358341624770201e-06,
      "loss": 0.0493,
      "step": 214760
    },
    {
      "epoch": 0.3514921806982057,
      "grad_norm": 2.844696044921875,
      "learning_rate": 9.358275732556685e-06,
      "loss": 0.0512,
      "step": 214780
    },
    {
      "epoch": 0.3515249111368591,
      "grad_norm": 1.9138423204421997,
      "learning_rate": 9.358209840343168e-06,
      "loss": 0.0462,
      "step": 214800
    },
    {
      "epoch": 0.3515576415755124,
      "grad_norm": 2.2673656940460205,
      "learning_rate": 9.35814394812965e-06,
      "loss": 0.0494,
      "step": 214820
    },
    {
      "epoch": 0.3515903720141657,
      "grad_norm": 0.42383888363838196,
      "learning_rate": 9.358078055916134e-06,
      "loss": 0.0583,
      "step": 214840
    },
    {
      "epoch": 0.3516231024528191,
      "grad_norm": 4.757297039031982,
      "learning_rate": 9.358012163702616e-06,
      "loss": 0.0488,
      "step": 214860
    },
    {
      "epoch": 0.3516558328914724,
      "grad_norm": 0.5365904569625854,
      "learning_rate": 9.3579462714891e-06,
      "loss": 0.0389,
      "step": 214880
    },
    {
      "epoch": 0.35168856333012577,
      "grad_norm": 0.4880671501159668,
      "learning_rate": 9.357880379275581e-06,
      "loss": 0.0422,
      "step": 214900
    },
    {
      "epoch": 0.3517212937687791,
      "grad_norm": 4.779674530029297,
      "learning_rate": 9.357814487062065e-06,
      "loss": 0.052,
      "step": 214920
    },
    {
      "epoch": 0.3517540242074324,
      "grad_norm": 1.353606939315796,
      "learning_rate": 9.357748594848548e-06,
      "loss": 0.045,
      "step": 214940
    },
    {
      "epoch": 0.3517867546460858,
      "grad_norm": 1.1113739013671875,
      "learning_rate": 9.35768270263503e-06,
      "loss": 0.0488,
      "step": 214960
    },
    {
      "epoch": 0.3518194850847391,
      "grad_norm": 2.249495029449463,
      "learning_rate": 9.357616810421514e-06,
      "loss": 0.0505,
      "step": 214980
    },
    {
      "epoch": 0.35185221552339246,
      "grad_norm": 2.904689311981201,
      "learning_rate": 9.357550918207996e-06,
      "loss": 0.0506,
      "step": 215000
    },
    {
      "epoch": 0.3518849459620458,
      "grad_norm": 1.318590760231018,
      "learning_rate": 9.35748502599448e-06,
      "loss": 0.0579,
      "step": 215020
    },
    {
      "epoch": 0.3519176764006991,
      "grad_norm": 1.8664264678955078,
      "learning_rate": 9.357419133780961e-06,
      "loss": 0.0569,
      "step": 215040
    },
    {
      "epoch": 0.35195040683935247,
      "grad_norm": 2.0825767517089844,
      "learning_rate": 9.357353241567445e-06,
      "loss": 0.0402,
      "step": 215060
    },
    {
      "epoch": 0.3519831372780058,
      "grad_norm": 0.4626767039299011,
      "learning_rate": 9.357287349353927e-06,
      "loss": 0.0353,
      "step": 215080
    },
    {
      "epoch": 0.35201586771665916,
      "grad_norm": 0.9568367600440979,
      "learning_rate": 9.35722145714041e-06,
      "loss": 0.0443,
      "step": 215100
    },
    {
      "epoch": 0.3520485981553125,
      "grad_norm": 1.0734074115753174,
      "learning_rate": 9.357155564926892e-06,
      "loss": 0.0429,
      "step": 215120
    },
    {
      "epoch": 0.3520813285939658,
      "grad_norm": 2.1290981769561768,
      "learning_rate": 9.357089672713376e-06,
      "loss": 0.0336,
      "step": 215140
    },
    {
      "epoch": 0.35211405903261916,
      "grad_norm": 0.5226354598999023,
      "learning_rate": 9.35702378049986e-06,
      "loss": 0.0355,
      "step": 215160
    },
    {
      "epoch": 0.3521467894712725,
      "grad_norm": 0.7368055582046509,
      "learning_rate": 9.356957888286343e-06,
      "loss": 0.0389,
      "step": 215180
    },
    {
      "epoch": 0.35217951990992585,
      "grad_norm": 1.2984015941619873,
      "learning_rate": 9.356891996072825e-06,
      "loss": 0.0524,
      "step": 215200
    },
    {
      "epoch": 0.35221225034857917,
      "grad_norm": 1.9067641496658325,
      "learning_rate": 9.356826103859308e-06,
      "loss": 0.0393,
      "step": 215220
    },
    {
      "epoch": 0.3522449807872325,
      "grad_norm": 2.0152406692504883,
      "learning_rate": 9.35676021164579e-06,
      "loss": 0.0465,
      "step": 215240
    },
    {
      "epoch": 0.35227771122588586,
      "grad_norm": 0.4309980273246765,
      "learning_rate": 9.356694319432274e-06,
      "loss": 0.0416,
      "step": 215260
    },
    {
      "epoch": 0.3523104416645392,
      "grad_norm": 2.496251344680786,
      "learning_rate": 9.356628427218757e-06,
      "loss": 0.0669,
      "step": 215280
    },
    {
      "epoch": 0.35234317210319255,
      "grad_norm": 1.7086964845657349,
      "learning_rate": 9.35656253500524e-06,
      "loss": 0.0511,
      "step": 215300
    },
    {
      "epoch": 0.35237590254184586,
      "grad_norm": 1.370336651802063,
      "learning_rate": 9.356496642791723e-06,
      "loss": 0.0492,
      "step": 215320
    },
    {
      "epoch": 0.3524086329804992,
      "grad_norm": 1.2834088802337646,
      "learning_rate": 9.356430750578205e-06,
      "loss": 0.039,
      "step": 215340
    },
    {
      "epoch": 0.35244136341915255,
      "grad_norm": 3.610973596572876,
      "learning_rate": 9.356364858364688e-06,
      "loss": 0.0466,
      "step": 215360
    },
    {
      "epoch": 0.35247409385780587,
      "grad_norm": 3.6956911087036133,
      "learning_rate": 9.35629896615117e-06,
      "loss": 0.0412,
      "step": 215380
    },
    {
      "epoch": 0.35250682429645924,
      "grad_norm": 5.061840534210205,
      "learning_rate": 9.356233073937654e-06,
      "loss": 0.0509,
      "step": 215400
    },
    {
      "epoch": 0.35253955473511256,
      "grad_norm": 3.487607955932617,
      "learning_rate": 9.356167181724136e-06,
      "loss": 0.0574,
      "step": 215420
    },
    {
      "epoch": 0.3525722851737659,
      "grad_norm": 1.9564874172210693,
      "learning_rate": 9.35610128951062e-06,
      "loss": 0.0513,
      "step": 215440
    },
    {
      "epoch": 0.35260501561241925,
      "grad_norm": 1.0189210176467896,
      "learning_rate": 9.356035397297101e-06,
      "loss": 0.037,
      "step": 215460
    },
    {
      "epoch": 0.35263774605107256,
      "grad_norm": 2.866163969039917,
      "learning_rate": 9.355969505083585e-06,
      "loss": 0.0408,
      "step": 215480
    },
    {
      "epoch": 0.35267047648972594,
      "grad_norm": 0.997173547744751,
      "learning_rate": 9.355903612870067e-06,
      "loss": 0.0429,
      "step": 215500
    },
    {
      "epoch": 0.35270320692837925,
      "grad_norm": 1.3571209907531738,
      "learning_rate": 9.35583772065655e-06,
      "loss": 0.0319,
      "step": 215520
    },
    {
      "epoch": 0.35273593736703257,
      "grad_norm": 36.836551666259766,
      "learning_rate": 9.355771828443034e-06,
      "loss": 0.0501,
      "step": 215540
    },
    {
      "epoch": 0.35276866780568594,
      "grad_norm": 0.6692149639129639,
      "learning_rate": 9.355705936229516e-06,
      "loss": 0.0521,
      "step": 215560
    },
    {
      "epoch": 0.35280139824433926,
      "grad_norm": 2.5060718059539795,
      "learning_rate": 9.355640044016e-06,
      "loss": 0.0554,
      "step": 215580
    },
    {
      "epoch": 0.35283412868299263,
      "grad_norm": 1.566245675086975,
      "learning_rate": 9.355574151802483e-06,
      "loss": 0.0526,
      "step": 215600
    },
    {
      "epoch": 0.35286685912164595,
      "grad_norm": 2.3366427421569824,
      "learning_rate": 9.355508259588965e-06,
      "loss": 0.0435,
      "step": 215620
    },
    {
      "epoch": 0.35289958956029926,
      "grad_norm": 2.2022972106933594,
      "learning_rate": 9.355442367375449e-06,
      "loss": 0.0603,
      "step": 215640
    },
    {
      "epoch": 0.35293231999895264,
      "grad_norm": 1.6845396757125854,
      "learning_rate": 9.355376475161932e-06,
      "loss": 0.0299,
      "step": 215660
    },
    {
      "epoch": 0.35296505043760595,
      "grad_norm": 1.595701813697815,
      "learning_rate": 9.355310582948414e-06,
      "loss": 0.036,
      "step": 215680
    },
    {
      "epoch": 0.3529977808762593,
      "grad_norm": 2.119715929031372,
      "learning_rate": 9.355244690734898e-06,
      "loss": 0.0548,
      "step": 215700
    },
    {
      "epoch": 0.35303051131491264,
      "grad_norm": 1.0542091131210327,
      "learning_rate": 9.35517879852138e-06,
      "loss": 0.0649,
      "step": 215720
    },
    {
      "epoch": 0.35306324175356596,
      "grad_norm": 1.4210468530654907,
      "learning_rate": 9.355112906307863e-06,
      "loss": 0.0447,
      "step": 215740
    },
    {
      "epoch": 0.35309597219221933,
      "grad_norm": 1.279274582862854,
      "learning_rate": 9.355047014094345e-06,
      "loss": 0.0484,
      "step": 215760
    },
    {
      "epoch": 0.35312870263087265,
      "grad_norm": 1.1122242212295532,
      "learning_rate": 9.354981121880829e-06,
      "loss": 0.0538,
      "step": 215780
    },
    {
      "epoch": 0.353161433069526,
      "grad_norm": 1.2485337257385254,
      "learning_rate": 9.35491522966731e-06,
      "loss": 0.0448,
      "step": 215800
    },
    {
      "epoch": 0.35319416350817934,
      "grad_norm": 2.69153094291687,
      "learning_rate": 9.354849337453794e-06,
      "loss": 0.0442,
      "step": 215820
    },
    {
      "epoch": 0.35322689394683265,
      "grad_norm": 3.0989346504211426,
      "learning_rate": 9.354783445240276e-06,
      "loss": 0.0577,
      "step": 215840
    },
    {
      "epoch": 0.353259624385486,
      "grad_norm": 5.918093204498291,
      "learning_rate": 9.35471755302676e-06,
      "loss": 0.0506,
      "step": 215860
    },
    {
      "epoch": 0.35329235482413934,
      "grad_norm": 0.9456143379211426,
      "learning_rate": 9.354651660813241e-06,
      "loss": 0.0389,
      "step": 215880
    },
    {
      "epoch": 0.3533250852627927,
      "grad_norm": 3.0268139839172363,
      "learning_rate": 9.354585768599725e-06,
      "loss": 0.0609,
      "step": 215900
    },
    {
      "epoch": 0.35335781570144603,
      "grad_norm": 3.3493692874908447,
      "learning_rate": 9.354519876386209e-06,
      "loss": 0.0519,
      "step": 215920
    },
    {
      "epoch": 0.35339054614009935,
      "grad_norm": 4.512770175933838,
      "learning_rate": 9.35445398417269e-06,
      "loss": 0.0453,
      "step": 215940
    },
    {
      "epoch": 0.3534232765787527,
      "grad_norm": 4.151573657989502,
      "learning_rate": 9.354388091959174e-06,
      "loss": 0.0437,
      "step": 215960
    },
    {
      "epoch": 0.35345600701740604,
      "grad_norm": 1.6268731355667114,
      "learning_rate": 9.354322199745658e-06,
      "loss": 0.0458,
      "step": 215980
    },
    {
      "epoch": 0.3534887374560594,
      "grad_norm": 1.6285743713378906,
      "learning_rate": 9.35425630753214e-06,
      "loss": 0.0443,
      "step": 216000
    },
    {
      "epoch": 0.3535214678947127,
      "grad_norm": 0.8140982389450073,
      "learning_rate": 9.354190415318623e-06,
      "loss": 0.0449,
      "step": 216020
    },
    {
      "epoch": 0.35355419833336604,
      "grad_norm": 0.7987193465232849,
      "learning_rate": 9.354124523105107e-06,
      "loss": 0.0512,
      "step": 216040
    },
    {
      "epoch": 0.3535869287720194,
      "grad_norm": 0.7494825720787048,
      "learning_rate": 9.354058630891589e-06,
      "loss": 0.041,
      "step": 216060
    },
    {
      "epoch": 0.35361965921067273,
      "grad_norm": 0.28222334384918213,
      "learning_rate": 9.353992738678072e-06,
      "loss": 0.0466,
      "step": 216080
    },
    {
      "epoch": 0.3536523896493261,
      "grad_norm": 1.1655312776565552,
      "learning_rate": 9.353926846464554e-06,
      "loss": 0.044,
      "step": 216100
    },
    {
      "epoch": 0.3536851200879794,
      "grad_norm": 1.7184029817581177,
      "learning_rate": 9.353860954251038e-06,
      "loss": 0.0445,
      "step": 216120
    },
    {
      "epoch": 0.35371785052663274,
      "grad_norm": 0.9583636522293091,
      "learning_rate": 9.35379506203752e-06,
      "loss": 0.0567,
      "step": 216140
    },
    {
      "epoch": 0.3537505809652861,
      "grad_norm": 2.3283097743988037,
      "learning_rate": 9.353729169824003e-06,
      "loss": 0.0581,
      "step": 216160
    },
    {
      "epoch": 0.3537833114039394,
      "grad_norm": 0.3365350365638733,
      "learning_rate": 9.353663277610485e-06,
      "loss": 0.0527,
      "step": 216180
    },
    {
      "epoch": 0.3538160418425928,
      "grad_norm": 1.2978532314300537,
      "learning_rate": 9.353597385396969e-06,
      "loss": 0.0347,
      "step": 216200
    },
    {
      "epoch": 0.3538487722812461,
      "grad_norm": 4.348071575164795,
      "learning_rate": 9.35353149318345e-06,
      "loss": 0.0469,
      "step": 216220
    },
    {
      "epoch": 0.35388150271989943,
      "grad_norm": 0.8393914103507996,
      "learning_rate": 9.353465600969934e-06,
      "loss": 0.0545,
      "step": 216240
    },
    {
      "epoch": 0.3539142331585528,
      "grad_norm": 2.214221954345703,
      "learning_rate": 9.353399708756416e-06,
      "loss": 0.0458,
      "step": 216260
    },
    {
      "epoch": 0.3539469635972061,
      "grad_norm": 2.2311689853668213,
      "learning_rate": 9.3533338165429e-06,
      "loss": 0.039,
      "step": 216280
    },
    {
      "epoch": 0.3539796940358595,
      "grad_norm": 1.687697410583496,
      "learning_rate": 9.353267924329381e-06,
      "loss": 0.0532,
      "step": 216300
    },
    {
      "epoch": 0.3540124244745128,
      "grad_norm": 2.849032402038574,
      "learning_rate": 9.353202032115865e-06,
      "loss": 0.0431,
      "step": 216320
    },
    {
      "epoch": 0.3540451549131661,
      "grad_norm": 1.4974961280822754,
      "learning_rate": 9.353136139902349e-06,
      "loss": 0.0458,
      "step": 216340
    },
    {
      "epoch": 0.3540778853518195,
      "grad_norm": 8.497213363647461,
      "learning_rate": 9.35307024768883e-06,
      "loss": 0.0484,
      "step": 216360
    },
    {
      "epoch": 0.3541106157904728,
      "grad_norm": 2.8077986240386963,
      "learning_rate": 9.353004355475314e-06,
      "loss": 0.0489,
      "step": 216380
    },
    {
      "epoch": 0.3541433462291262,
      "grad_norm": 1.2349605560302734,
      "learning_rate": 9.352938463261798e-06,
      "loss": 0.0368,
      "step": 216400
    },
    {
      "epoch": 0.3541760766677795,
      "grad_norm": 1.6910958290100098,
      "learning_rate": 9.35287257104828e-06,
      "loss": 0.0506,
      "step": 216420
    },
    {
      "epoch": 0.3542088071064328,
      "grad_norm": 2.356743335723877,
      "learning_rate": 9.352806678834763e-06,
      "loss": 0.0496,
      "step": 216440
    },
    {
      "epoch": 0.3542415375450862,
      "grad_norm": 0.7736361622810364,
      "learning_rate": 9.352740786621247e-06,
      "loss": 0.0673,
      "step": 216460
    },
    {
      "epoch": 0.3542742679837395,
      "grad_norm": 1.284646987915039,
      "learning_rate": 9.352674894407729e-06,
      "loss": 0.0403,
      "step": 216480
    },
    {
      "epoch": 0.3543069984223929,
      "grad_norm": 1.6480125188827515,
      "learning_rate": 9.352609002194212e-06,
      "loss": 0.0461,
      "step": 216500
    },
    {
      "epoch": 0.3543397288610462,
      "grad_norm": 2.5391762256622314,
      "learning_rate": 9.352543109980694e-06,
      "loss": 0.0619,
      "step": 216520
    },
    {
      "epoch": 0.3543724592996995,
      "grad_norm": 0.8457512855529785,
      "learning_rate": 9.352477217767178e-06,
      "loss": 0.0377,
      "step": 216540
    },
    {
      "epoch": 0.3544051897383529,
      "grad_norm": 1.789667010307312,
      "learning_rate": 9.35241132555366e-06,
      "loss": 0.0388,
      "step": 216560
    },
    {
      "epoch": 0.3544379201770062,
      "grad_norm": 1.0441354513168335,
      "learning_rate": 9.352345433340143e-06,
      "loss": 0.0702,
      "step": 216580
    },
    {
      "epoch": 0.3544706506156596,
      "grad_norm": 1.4447307586669922,
      "learning_rate": 9.352279541126625e-06,
      "loss": 0.0438,
      "step": 216600
    },
    {
      "epoch": 0.3545033810543129,
      "grad_norm": 0.4132455587387085,
      "learning_rate": 9.352213648913109e-06,
      "loss": 0.0358,
      "step": 216620
    },
    {
      "epoch": 0.3545361114929662,
      "grad_norm": 4.058782577514648,
      "learning_rate": 9.35214775669959e-06,
      "loss": 0.0555,
      "step": 216640
    },
    {
      "epoch": 0.3545688419316196,
      "grad_norm": 1.059247374534607,
      "learning_rate": 9.352081864486074e-06,
      "loss": 0.0479,
      "step": 216660
    },
    {
      "epoch": 0.3546015723702729,
      "grad_norm": 0.26019325852394104,
      "learning_rate": 9.352015972272558e-06,
      "loss": 0.0542,
      "step": 216680
    },
    {
      "epoch": 0.35463430280892627,
      "grad_norm": 0.8672162890434265,
      "learning_rate": 9.35195008005904e-06,
      "loss": 0.0495,
      "step": 216700
    },
    {
      "epoch": 0.3546670332475796,
      "grad_norm": 1.011643886566162,
      "learning_rate": 9.351884187845523e-06,
      "loss": 0.0409,
      "step": 216720
    },
    {
      "epoch": 0.3546997636862329,
      "grad_norm": 1.6089130640029907,
      "learning_rate": 9.351818295632005e-06,
      "loss": 0.0439,
      "step": 216740
    },
    {
      "epoch": 0.3547324941248863,
      "grad_norm": 2.1752142906188965,
      "learning_rate": 9.351752403418489e-06,
      "loss": 0.0362,
      "step": 216760
    },
    {
      "epoch": 0.3547652245635396,
      "grad_norm": 1.6686999797821045,
      "learning_rate": 9.351686511204972e-06,
      "loss": 0.0464,
      "step": 216780
    },
    {
      "epoch": 0.35479795500219297,
      "grad_norm": 2.7395596504211426,
      "learning_rate": 9.351620618991454e-06,
      "loss": 0.0446,
      "step": 216800
    },
    {
      "epoch": 0.3548306854408463,
      "grad_norm": 0.6464452147483826,
      "learning_rate": 9.351554726777938e-06,
      "loss": 0.0416,
      "step": 216820
    },
    {
      "epoch": 0.3548634158794996,
      "grad_norm": 0.9941794872283936,
      "learning_rate": 9.351488834564421e-06,
      "loss": 0.0661,
      "step": 216840
    },
    {
      "epoch": 0.35489614631815297,
      "grad_norm": 2.2126874923706055,
      "learning_rate": 9.351422942350903e-06,
      "loss": 0.0518,
      "step": 216860
    },
    {
      "epoch": 0.3549288767568063,
      "grad_norm": 2.5131995677948,
      "learning_rate": 9.351357050137387e-06,
      "loss": 0.05,
      "step": 216880
    },
    {
      "epoch": 0.35496160719545966,
      "grad_norm": 0.3141383230686188,
      "learning_rate": 9.351291157923869e-06,
      "loss": 0.0382,
      "step": 216900
    },
    {
      "epoch": 0.354994337634113,
      "grad_norm": 2.317148447036743,
      "learning_rate": 9.351225265710352e-06,
      "loss": 0.0466,
      "step": 216920
    },
    {
      "epoch": 0.3550270680727663,
      "grad_norm": 3.718078136444092,
      "learning_rate": 9.351159373496834e-06,
      "loss": 0.059,
      "step": 216940
    },
    {
      "epoch": 0.35505979851141967,
      "grad_norm": 1.8818780183792114,
      "learning_rate": 9.351093481283318e-06,
      "loss": 0.0504,
      "step": 216960
    },
    {
      "epoch": 0.355092528950073,
      "grad_norm": 1.8327925205230713,
      "learning_rate": 9.3510275890698e-06,
      "loss": 0.0507,
      "step": 216980
    },
    {
      "epoch": 0.35512525938872636,
      "grad_norm": 2.8272414207458496,
      "learning_rate": 9.350961696856283e-06,
      "loss": 0.0459,
      "step": 217000
    },
    {
      "epoch": 0.35515798982737967,
      "grad_norm": 1.3767930269241333,
      "learning_rate": 9.350895804642765e-06,
      "loss": 0.0426,
      "step": 217020
    },
    {
      "epoch": 0.355190720266033,
      "grad_norm": 2.707526683807373,
      "learning_rate": 9.350829912429249e-06,
      "loss": 0.0522,
      "step": 217040
    },
    {
      "epoch": 0.35522345070468636,
      "grad_norm": 5.932634353637695,
      "learning_rate": 9.350764020215732e-06,
      "loss": 0.037,
      "step": 217060
    },
    {
      "epoch": 0.3552561811433397,
      "grad_norm": 2.0099754333496094,
      "learning_rate": 9.350698128002214e-06,
      "loss": 0.0569,
      "step": 217080
    },
    {
      "epoch": 0.355288911581993,
      "grad_norm": 1.5361695289611816,
      "learning_rate": 9.350632235788698e-06,
      "loss": 0.0508,
      "step": 217100
    },
    {
      "epoch": 0.35532164202064637,
      "grad_norm": 2.074944019317627,
      "learning_rate": 9.35056634357518e-06,
      "loss": 0.0515,
      "step": 217120
    },
    {
      "epoch": 0.3553543724592997,
      "grad_norm": 8.022259712219238,
      "learning_rate": 9.350500451361663e-06,
      "loss": 0.0568,
      "step": 217140
    },
    {
      "epoch": 0.35538710289795306,
      "grad_norm": 2.991706132888794,
      "learning_rate": 9.350434559148145e-06,
      "loss": 0.0572,
      "step": 217160
    },
    {
      "epoch": 0.35541983333660637,
      "grad_norm": 4.832971572875977,
      "learning_rate": 9.350368666934629e-06,
      "loss": 0.0587,
      "step": 217180
    },
    {
      "epoch": 0.3554525637752597,
      "grad_norm": 1.328559160232544,
      "learning_rate": 9.350302774721112e-06,
      "loss": 0.0559,
      "step": 217200
    },
    {
      "epoch": 0.35548529421391306,
      "grad_norm": 11.088279724121094,
      "learning_rate": 9.350236882507594e-06,
      "loss": 0.0556,
      "step": 217220
    },
    {
      "epoch": 0.3555180246525664,
      "grad_norm": 1.2791022062301636,
      "learning_rate": 9.350170990294078e-06,
      "loss": 0.0407,
      "step": 217240
    },
    {
      "epoch": 0.35555075509121975,
      "grad_norm": 0.2485608607530594,
      "learning_rate": 9.350105098080561e-06,
      "loss": 0.0375,
      "step": 217260
    },
    {
      "epoch": 0.35558348552987307,
      "grad_norm": 3.571073532104492,
      "learning_rate": 9.350039205867043e-06,
      "loss": 0.0493,
      "step": 217280
    },
    {
      "epoch": 0.3556162159685264,
      "grad_norm": 0.6428180932998657,
      "learning_rate": 9.349973313653527e-06,
      "loss": 0.0329,
      "step": 217300
    },
    {
      "epoch": 0.35564894640717976,
      "grad_norm": 0.45900672674179077,
      "learning_rate": 9.349907421440009e-06,
      "loss": 0.0437,
      "step": 217320
    },
    {
      "epoch": 0.3556816768458331,
      "grad_norm": 1.9657281637191772,
      "learning_rate": 9.349841529226492e-06,
      "loss": 0.0427,
      "step": 217340
    },
    {
      "epoch": 0.35571440728448644,
      "grad_norm": 0.8682489395141602,
      "learning_rate": 9.349775637012974e-06,
      "loss": 0.0473,
      "step": 217360
    },
    {
      "epoch": 0.35574713772313976,
      "grad_norm": 2.8652937412261963,
      "learning_rate": 9.349709744799458e-06,
      "loss": 0.062,
      "step": 217380
    },
    {
      "epoch": 0.3557798681617931,
      "grad_norm": 0.589560866355896,
      "learning_rate": 9.349643852585941e-06,
      "loss": 0.0341,
      "step": 217400
    },
    {
      "epoch": 0.35581259860044645,
      "grad_norm": 0.9497435092926025,
      "learning_rate": 9.349577960372423e-06,
      "loss": 0.0369,
      "step": 217420
    },
    {
      "epoch": 0.35584532903909977,
      "grad_norm": 1.2186115980148315,
      "learning_rate": 9.349512068158907e-06,
      "loss": 0.0393,
      "step": 217440
    },
    {
      "epoch": 0.35587805947775314,
      "grad_norm": 1.046654462814331,
      "learning_rate": 9.349446175945389e-06,
      "loss": 0.057,
      "step": 217460
    },
    {
      "epoch": 0.35591078991640646,
      "grad_norm": 1.3845527172088623,
      "learning_rate": 9.349380283731872e-06,
      "loss": 0.0468,
      "step": 217480
    },
    {
      "epoch": 0.3559435203550598,
      "grad_norm": 2.2630858421325684,
      "learning_rate": 9.349314391518354e-06,
      "loss": 0.0525,
      "step": 217500
    },
    {
      "epoch": 0.35597625079371314,
      "grad_norm": 0.838860034942627,
      "learning_rate": 9.349248499304838e-06,
      "loss": 0.0454,
      "step": 217520
    },
    {
      "epoch": 0.35600898123236646,
      "grad_norm": 1.4183249473571777,
      "learning_rate": 9.34918260709132e-06,
      "loss": 0.0532,
      "step": 217540
    },
    {
      "epoch": 0.35604171167101983,
      "grad_norm": 2.515441417694092,
      "learning_rate": 9.349116714877803e-06,
      "loss": 0.0528,
      "step": 217560
    },
    {
      "epoch": 0.35607444210967315,
      "grad_norm": 1.7070000171661377,
      "learning_rate": 9.349050822664287e-06,
      "loss": 0.0412,
      "step": 217580
    },
    {
      "epoch": 0.35610717254832647,
      "grad_norm": 0.7937855124473572,
      "learning_rate": 9.348984930450769e-06,
      "loss": 0.0522,
      "step": 217600
    },
    {
      "epoch": 0.35613990298697984,
      "grad_norm": 6.401111602783203,
      "learning_rate": 9.348919038237252e-06,
      "loss": 0.0421,
      "step": 217620
    },
    {
      "epoch": 0.35617263342563316,
      "grad_norm": 1.4749826192855835,
      "learning_rate": 9.348853146023736e-06,
      "loss": 0.06,
      "step": 217640
    },
    {
      "epoch": 0.35620536386428653,
      "grad_norm": 0.6582083702087402,
      "learning_rate": 9.348787253810218e-06,
      "loss": 0.0433,
      "step": 217660
    },
    {
      "epoch": 0.35623809430293985,
      "grad_norm": 1.529820203781128,
      "learning_rate": 9.348721361596702e-06,
      "loss": 0.0389,
      "step": 217680
    },
    {
      "epoch": 0.35627082474159316,
      "grad_norm": 8.98701000213623,
      "learning_rate": 9.348655469383183e-06,
      "loss": 0.0432,
      "step": 217700
    },
    {
      "epoch": 0.35630355518024653,
      "grad_norm": 1.9826147556304932,
      "learning_rate": 9.348589577169667e-06,
      "loss": 0.0641,
      "step": 217720
    },
    {
      "epoch": 0.35633628561889985,
      "grad_norm": 3.243884801864624,
      "learning_rate": 9.34852368495615e-06,
      "loss": 0.0557,
      "step": 217740
    },
    {
      "epoch": 0.3563690160575532,
      "grad_norm": 1.3611570596694946,
      "learning_rate": 9.348457792742632e-06,
      "loss": 0.0523,
      "step": 217760
    },
    {
      "epoch": 0.35640174649620654,
      "grad_norm": 2.233459234237671,
      "learning_rate": 9.348391900529116e-06,
      "loss": 0.052,
      "step": 217780
    },
    {
      "epoch": 0.35643447693485986,
      "grad_norm": 1.7165687084197998,
      "learning_rate": 9.348326008315598e-06,
      "loss": 0.0443,
      "step": 217800
    },
    {
      "epoch": 0.35646720737351323,
      "grad_norm": 0.3871377408504486,
      "learning_rate": 9.348260116102082e-06,
      "loss": 0.0437,
      "step": 217820
    },
    {
      "epoch": 0.35649993781216655,
      "grad_norm": 1.3223662376403809,
      "learning_rate": 9.348194223888563e-06,
      "loss": 0.0473,
      "step": 217840
    },
    {
      "epoch": 0.3565326682508199,
      "grad_norm": 0.7869603037834167,
      "learning_rate": 9.348128331675047e-06,
      "loss": 0.0534,
      "step": 217860
    },
    {
      "epoch": 0.35656539868947323,
      "grad_norm": 1.3428386449813843,
      "learning_rate": 9.348062439461529e-06,
      "loss": 0.0523,
      "step": 217880
    },
    {
      "epoch": 0.35659812912812655,
      "grad_norm": 0.8761573433876038,
      "learning_rate": 9.347996547248012e-06,
      "loss": 0.0479,
      "step": 217900
    },
    {
      "epoch": 0.3566308595667799,
      "grad_norm": 2.753734588623047,
      "learning_rate": 9.347930655034494e-06,
      "loss": 0.0486,
      "step": 217920
    },
    {
      "epoch": 0.35666359000543324,
      "grad_norm": 0.5123151540756226,
      "learning_rate": 9.347864762820978e-06,
      "loss": 0.0635,
      "step": 217940
    },
    {
      "epoch": 0.3566963204440866,
      "grad_norm": 2.560063600540161,
      "learning_rate": 9.347798870607462e-06,
      "loss": 0.0481,
      "step": 217960
    },
    {
      "epoch": 0.35672905088273993,
      "grad_norm": 2.0793659687042236,
      "learning_rate": 9.347732978393943e-06,
      "loss": 0.0465,
      "step": 217980
    },
    {
      "epoch": 0.35676178132139325,
      "grad_norm": 2.396465301513672,
      "learning_rate": 9.347667086180427e-06,
      "loss": 0.0403,
      "step": 218000
    },
    {
      "epoch": 0.3567945117600466,
      "grad_norm": 1.4921952486038208,
      "learning_rate": 9.34760119396691e-06,
      "loss": 0.0408,
      "step": 218020
    },
    {
      "epoch": 0.35682724219869993,
      "grad_norm": 1.8916890621185303,
      "learning_rate": 9.347535301753393e-06,
      "loss": 0.0438,
      "step": 218040
    },
    {
      "epoch": 0.3568599726373533,
      "grad_norm": 2.3289458751678467,
      "learning_rate": 9.347469409539876e-06,
      "loss": 0.0511,
      "step": 218060
    },
    {
      "epoch": 0.3568927030760066,
      "grad_norm": 1.8169515132904053,
      "learning_rate": 9.347403517326358e-06,
      "loss": 0.0463,
      "step": 218080
    },
    {
      "epoch": 0.35692543351465994,
      "grad_norm": 0.4656578004360199,
      "learning_rate": 9.347337625112842e-06,
      "loss": 0.0513,
      "step": 218100
    },
    {
      "epoch": 0.3569581639533133,
      "grad_norm": 2.332315683364868,
      "learning_rate": 9.347271732899325e-06,
      "loss": 0.0649,
      "step": 218120
    },
    {
      "epoch": 0.35699089439196663,
      "grad_norm": 2.187000274658203,
      "learning_rate": 9.347205840685807e-06,
      "loss": 0.0506,
      "step": 218140
    },
    {
      "epoch": 0.35702362483062,
      "grad_norm": 1.41100013256073,
      "learning_rate": 9.34713994847229e-06,
      "loss": 0.0351,
      "step": 218160
    },
    {
      "epoch": 0.3570563552692733,
      "grad_norm": 2.5691030025482178,
      "learning_rate": 9.347074056258773e-06,
      "loss": 0.0364,
      "step": 218180
    },
    {
      "epoch": 0.35708908570792663,
      "grad_norm": 1.1418309211730957,
      "learning_rate": 9.347008164045256e-06,
      "loss": 0.0451,
      "step": 218200
    },
    {
      "epoch": 0.35712181614658,
      "grad_norm": 3.334798574447632,
      "learning_rate": 9.346942271831738e-06,
      "loss": 0.037,
      "step": 218220
    },
    {
      "epoch": 0.3571545465852333,
      "grad_norm": 2.1390936374664307,
      "learning_rate": 9.346876379618222e-06,
      "loss": 0.0513,
      "step": 218240
    },
    {
      "epoch": 0.3571872770238867,
      "grad_norm": 1.328140377998352,
      "learning_rate": 9.346810487404704e-06,
      "loss": 0.0429,
      "step": 218260
    },
    {
      "epoch": 0.35722000746254,
      "grad_norm": 1.168213129043579,
      "learning_rate": 9.346744595191187e-06,
      "loss": 0.0505,
      "step": 218280
    },
    {
      "epoch": 0.35725273790119333,
      "grad_norm": 2.070369005203247,
      "learning_rate": 9.346678702977669e-06,
      "loss": 0.0412,
      "step": 218300
    },
    {
      "epoch": 0.3572854683398467,
      "grad_norm": 0.8871009349822998,
      "learning_rate": 9.346612810764153e-06,
      "loss": 0.0513,
      "step": 218320
    },
    {
      "epoch": 0.3573181987785,
      "grad_norm": 1.3935719728469849,
      "learning_rate": 9.346546918550634e-06,
      "loss": 0.0533,
      "step": 218340
    },
    {
      "epoch": 0.3573509292171534,
      "grad_norm": 1.4003982543945312,
      "learning_rate": 9.346481026337118e-06,
      "loss": 0.0605,
      "step": 218360
    },
    {
      "epoch": 0.3573836596558067,
      "grad_norm": 0.6730547547340393,
      "learning_rate": 9.346415134123602e-06,
      "loss": 0.0283,
      "step": 218380
    },
    {
      "epoch": 0.35741639009446,
      "grad_norm": 2.0341989994049072,
      "learning_rate": 9.346349241910084e-06,
      "loss": 0.0506,
      "step": 218400
    },
    {
      "epoch": 0.3574491205331134,
      "grad_norm": 1.6890677213668823,
      "learning_rate": 9.346283349696567e-06,
      "loss": 0.0394,
      "step": 218420
    },
    {
      "epoch": 0.3574818509717667,
      "grad_norm": 2.3491480350494385,
      "learning_rate": 9.34621745748305e-06,
      "loss": 0.0371,
      "step": 218440
    },
    {
      "epoch": 0.3575145814104201,
      "grad_norm": 1.0301940441131592,
      "learning_rate": 9.346151565269533e-06,
      "loss": 0.0446,
      "step": 218460
    },
    {
      "epoch": 0.3575473118490734,
      "grad_norm": 2.512862205505371,
      "learning_rate": 9.346085673056016e-06,
      "loss": 0.0477,
      "step": 218480
    },
    {
      "epoch": 0.3575800422877267,
      "grad_norm": 0.7408952116966248,
      "learning_rate": 9.3460197808425e-06,
      "loss": 0.0485,
      "step": 218500
    },
    {
      "epoch": 0.3576127727263801,
      "grad_norm": 1.1146973371505737,
      "learning_rate": 9.345953888628982e-06,
      "loss": 0.0502,
      "step": 218520
    },
    {
      "epoch": 0.3576455031650334,
      "grad_norm": 6.1455302238464355,
      "learning_rate": 9.345887996415465e-06,
      "loss": 0.0383,
      "step": 218540
    },
    {
      "epoch": 0.3576782336036868,
      "grad_norm": 1.626452922821045,
      "learning_rate": 9.345822104201947e-06,
      "loss": 0.0572,
      "step": 218560
    },
    {
      "epoch": 0.3577109640423401,
      "grad_norm": 2.599747896194458,
      "learning_rate": 9.34575621198843e-06,
      "loss": 0.0444,
      "step": 218580
    },
    {
      "epoch": 0.3577436944809934,
      "grad_norm": 1.7035552263259888,
      "learning_rate": 9.345690319774913e-06,
      "loss": 0.0473,
      "step": 218600
    },
    {
      "epoch": 0.3577764249196468,
      "grad_norm": 1.9985013008117676,
      "learning_rate": 9.345624427561396e-06,
      "loss": 0.0458,
      "step": 218620
    },
    {
      "epoch": 0.3578091553583001,
      "grad_norm": 1.399915337562561,
      "learning_rate": 9.345558535347878e-06,
      "loss": 0.051,
      "step": 218640
    },
    {
      "epoch": 0.3578418857969535,
      "grad_norm": 2.8077657222747803,
      "learning_rate": 9.345492643134362e-06,
      "loss": 0.0387,
      "step": 218660
    },
    {
      "epoch": 0.3578746162356068,
      "grad_norm": 1.900542974472046,
      "learning_rate": 9.345426750920844e-06,
      "loss": 0.0531,
      "step": 218680
    },
    {
      "epoch": 0.3579073466742601,
      "grad_norm": 0.9923719763755798,
      "learning_rate": 9.345360858707327e-06,
      "loss": 0.0521,
      "step": 218700
    },
    {
      "epoch": 0.3579400771129135,
      "grad_norm": 0.8175198435783386,
      "learning_rate": 9.345294966493809e-06,
      "loss": 0.037,
      "step": 218720
    },
    {
      "epoch": 0.3579728075515668,
      "grad_norm": 2.5523154735565186,
      "learning_rate": 9.345229074280293e-06,
      "loss": 0.0515,
      "step": 218740
    },
    {
      "epoch": 0.35800553799022017,
      "grad_norm": 0.5791957378387451,
      "learning_rate": 9.345163182066776e-06,
      "loss": 0.0676,
      "step": 218760
    },
    {
      "epoch": 0.3580382684288735,
      "grad_norm": 0.6541555523872375,
      "learning_rate": 9.345097289853258e-06,
      "loss": 0.0424,
      "step": 218780
    },
    {
      "epoch": 0.3580709988675268,
      "grad_norm": 1.366585373878479,
      "learning_rate": 9.345031397639742e-06,
      "loss": 0.0431,
      "step": 218800
    },
    {
      "epoch": 0.3581037293061802,
      "grad_norm": 1.3975459337234497,
      "learning_rate": 9.344965505426225e-06,
      "loss": 0.0438,
      "step": 218820
    },
    {
      "epoch": 0.3581364597448335,
      "grad_norm": 1.7262632846832275,
      "learning_rate": 9.344899613212707e-06,
      "loss": 0.0565,
      "step": 218840
    },
    {
      "epoch": 0.35816919018348686,
      "grad_norm": 1.8067502975463867,
      "learning_rate": 9.34483372099919e-06,
      "loss": 0.0416,
      "step": 218860
    },
    {
      "epoch": 0.3582019206221402,
      "grad_norm": 0.5661107301712036,
      "learning_rate": 9.344767828785674e-06,
      "loss": 0.0459,
      "step": 218880
    },
    {
      "epoch": 0.3582346510607935,
      "grad_norm": 3.8687901496887207,
      "learning_rate": 9.344701936572156e-06,
      "loss": 0.0533,
      "step": 218900
    },
    {
      "epoch": 0.35826738149944687,
      "grad_norm": 2.711754560470581,
      "learning_rate": 9.34463604435864e-06,
      "loss": 0.06,
      "step": 218920
    },
    {
      "epoch": 0.3583001119381002,
      "grad_norm": 1.7037289142608643,
      "learning_rate": 9.344570152145122e-06,
      "loss": 0.0401,
      "step": 218940
    },
    {
      "epoch": 0.35833284237675356,
      "grad_norm": 1.8473254442214966,
      "learning_rate": 9.344504259931605e-06,
      "loss": 0.0416,
      "step": 218960
    },
    {
      "epoch": 0.3583655728154069,
      "grad_norm": 11.174174308776855,
      "learning_rate": 9.344438367718087e-06,
      "loss": 0.0656,
      "step": 218980
    },
    {
      "epoch": 0.3583983032540602,
      "grad_norm": 0.6180652379989624,
      "learning_rate": 9.34437247550457e-06,
      "loss": 0.0555,
      "step": 219000
    },
    {
      "epoch": 0.35843103369271356,
      "grad_norm": 3.633143663406372,
      "learning_rate": 9.344306583291053e-06,
      "loss": 0.056,
      "step": 219020
    },
    {
      "epoch": 0.3584637641313669,
      "grad_norm": 1.1548731327056885,
      "learning_rate": 9.344240691077536e-06,
      "loss": 0.0561,
      "step": 219040
    },
    {
      "epoch": 0.35849649457002025,
      "grad_norm": 1.0312100648880005,
      "learning_rate": 9.344174798864018e-06,
      "loss": 0.0366,
      "step": 219060
    },
    {
      "epoch": 0.35852922500867357,
      "grad_norm": 1.6463295221328735,
      "learning_rate": 9.344108906650502e-06,
      "loss": 0.0416,
      "step": 219080
    },
    {
      "epoch": 0.3585619554473269,
      "grad_norm": 0.6992185711860657,
      "learning_rate": 9.344043014436984e-06,
      "loss": 0.045,
      "step": 219100
    },
    {
      "epoch": 0.35859468588598026,
      "grad_norm": 0.6248536705970764,
      "learning_rate": 9.343977122223467e-06,
      "loss": 0.0407,
      "step": 219120
    },
    {
      "epoch": 0.3586274163246336,
      "grad_norm": 0.9846907258033752,
      "learning_rate": 9.34391123000995e-06,
      "loss": 0.046,
      "step": 219140
    },
    {
      "epoch": 0.35866014676328695,
      "grad_norm": 0.9461707472801208,
      "learning_rate": 9.343845337796433e-06,
      "loss": 0.0381,
      "step": 219160
    },
    {
      "epoch": 0.35869287720194026,
      "grad_norm": 3.585475444793701,
      "learning_rate": 9.343779445582916e-06,
      "loss": 0.0571,
      "step": 219180
    },
    {
      "epoch": 0.3587256076405936,
      "grad_norm": 0.1503928005695343,
      "learning_rate": 9.343713553369398e-06,
      "loss": 0.0458,
      "step": 219200
    },
    {
      "epoch": 0.35875833807924695,
      "grad_norm": 2.8523218631744385,
      "learning_rate": 9.343647661155882e-06,
      "loss": 0.0502,
      "step": 219220
    },
    {
      "epoch": 0.35879106851790027,
      "grad_norm": 3.9127838611602783,
      "learning_rate": 9.343581768942365e-06,
      "loss": 0.0396,
      "step": 219240
    },
    {
      "epoch": 0.35882379895655364,
      "grad_norm": 0.3288058638572693,
      "learning_rate": 9.343515876728847e-06,
      "loss": 0.0454,
      "step": 219260
    },
    {
      "epoch": 0.35885652939520696,
      "grad_norm": 1.9218096733093262,
      "learning_rate": 9.343449984515331e-06,
      "loss": 0.0717,
      "step": 219280
    },
    {
      "epoch": 0.3588892598338603,
      "grad_norm": 2.6110618114471436,
      "learning_rate": 9.343384092301814e-06,
      "loss": 0.0406,
      "step": 219300
    },
    {
      "epoch": 0.35892199027251365,
      "grad_norm": 2.452500343322754,
      "learning_rate": 9.343318200088296e-06,
      "loss": 0.0429,
      "step": 219320
    },
    {
      "epoch": 0.35895472071116696,
      "grad_norm": 1.5739470720291138,
      "learning_rate": 9.34325230787478e-06,
      "loss": 0.052,
      "step": 219340
    },
    {
      "epoch": 0.35898745114982034,
      "grad_norm": 1.9930410385131836,
      "learning_rate": 9.343186415661262e-06,
      "loss": 0.0595,
      "step": 219360
    },
    {
      "epoch": 0.35902018158847365,
      "grad_norm": 1.842922568321228,
      "learning_rate": 9.343120523447745e-06,
      "loss": 0.0417,
      "step": 219380
    },
    {
      "epoch": 0.35905291202712697,
      "grad_norm": 0.8316379189491272,
      "learning_rate": 9.343054631234227e-06,
      "loss": 0.0457,
      "step": 219400
    },
    {
      "epoch": 0.35908564246578034,
      "grad_norm": 1.4272637367248535,
      "learning_rate": 9.342988739020711e-06,
      "loss": 0.0444,
      "step": 219420
    },
    {
      "epoch": 0.35911837290443366,
      "grad_norm": 1.3497364521026611,
      "learning_rate": 9.342922846807193e-06,
      "loss": 0.0534,
      "step": 219440
    },
    {
      "epoch": 0.35915110334308703,
      "grad_norm": 7.8107523918151855,
      "learning_rate": 9.342856954593676e-06,
      "loss": 0.0451,
      "step": 219460
    },
    {
      "epoch": 0.35918383378174035,
      "grad_norm": 3.6552302837371826,
      "learning_rate": 9.342791062380158e-06,
      "loss": 0.052,
      "step": 219480
    },
    {
      "epoch": 0.35921656422039366,
      "grad_norm": 1.0625419616699219,
      "learning_rate": 9.342725170166642e-06,
      "loss": 0.0457,
      "step": 219500
    },
    {
      "epoch": 0.35924929465904704,
      "grad_norm": 3.223505735397339,
      "learning_rate": 9.342659277953125e-06,
      "loss": 0.0437,
      "step": 219520
    },
    {
      "epoch": 0.35928202509770035,
      "grad_norm": 0.3824107348918915,
      "learning_rate": 9.342593385739607e-06,
      "loss": 0.0509,
      "step": 219540
    },
    {
      "epoch": 0.3593147555363537,
      "grad_norm": 1.5832310914993286,
      "learning_rate": 9.342527493526091e-06,
      "loss": 0.0492,
      "step": 219560
    },
    {
      "epoch": 0.35934748597500704,
      "grad_norm": 3.312509536743164,
      "learning_rate": 9.342461601312573e-06,
      "loss": 0.0562,
      "step": 219580
    },
    {
      "epoch": 0.35938021641366036,
      "grad_norm": 1.5455073118209839,
      "learning_rate": 9.342395709099056e-06,
      "loss": 0.0513,
      "step": 219600
    },
    {
      "epoch": 0.35941294685231373,
      "grad_norm": 1.7009975910186768,
      "learning_rate": 9.34232981688554e-06,
      "loss": 0.0461,
      "step": 219620
    },
    {
      "epoch": 0.35944567729096705,
      "grad_norm": 2.9525349140167236,
      "learning_rate": 9.342263924672022e-06,
      "loss": 0.0533,
      "step": 219640
    },
    {
      "epoch": 0.3594784077296204,
      "grad_norm": 2.2311055660247803,
      "learning_rate": 9.342198032458505e-06,
      "loss": 0.0413,
      "step": 219660
    },
    {
      "epoch": 0.35951113816827374,
      "grad_norm": 2.0631210803985596,
      "learning_rate": 9.342132140244989e-06,
      "loss": 0.0485,
      "step": 219680
    },
    {
      "epoch": 0.35954386860692705,
      "grad_norm": 2.9759140014648438,
      "learning_rate": 9.342066248031471e-06,
      "loss": 0.0418,
      "step": 219700
    },
    {
      "epoch": 0.3595765990455804,
      "grad_norm": 1.0486301183700562,
      "learning_rate": 9.342000355817955e-06,
      "loss": 0.0508,
      "step": 219720
    },
    {
      "epoch": 0.35960932948423374,
      "grad_norm": 1.7558273077011108,
      "learning_rate": 9.341934463604436e-06,
      "loss": 0.049,
      "step": 219740
    },
    {
      "epoch": 0.3596420599228871,
      "grad_norm": 0.41259363293647766,
      "learning_rate": 9.34186857139092e-06,
      "loss": 0.046,
      "step": 219760
    },
    {
      "epoch": 0.35967479036154043,
      "grad_norm": 1.4463655948638916,
      "learning_rate": 9.341802679177402e-06,
      "loss": 0.0547,
      "step": 219780
    },
    {
      "epoch": 0.35970752080019375,
      "grad_norm": 2.655341148376465,
      "learning_rate": 9.341736786963885e-06,
      "loss": 0.0641,
      "step": 219800
    },
    {
      "epoch": 0.3597402512388471,
      "grad_norm": 1.8651431798934937,
      "learning_rate": 9.341670894750367e-06,
      "loss": 0.0592,
      "step": 219820
    },
    {
      "epoch": 0.35977298167750044,
      "grad_norm": 2.6910433769226074,
      "learning_rate": 9.341605002536851e-06,
      "loss": 0.049,
      "step": 219840
    },
    {
      "epoch": 0.35980571211615375,
      "grad_norm": 2.256904363632202,
      "learning_rate": 9.341539110323335e-06,
      "loss": 0.0479,
      "step": 219860
    },
    {
      "epoch": 0.3598384425548071,
      "grad_norm": 1.4615802764892578,
      "learning_rate": 9.341473218109816e-06,
      "loss": 0.0499,
      "step": 219880
    },
    {
      "epoch": 0.35987117299346044,
      "grad_norm": 0.878689706325531,
      "learning_rate": 9.3414073258963e-06,
      "loss": 0.0423,
      "step": 219900
    },
    {
      "epoch": 0.3599039034321138,
      "grad_norm": 0.8738089203834534,
      "learning_rate": 9.341341433682782e-06,
      "loss": 0.0454,
      "step": 219920
    },
    {
      "epoch": 0.35993663387076713,
      "grad_norm": 1.183798909187317,
      "learning_rate": 9.341275541469266e-06,
      "loss": 0.0431,
      "step": 219940
    },
    {
      "epoch": 0.35996936430942045,
      "grad_norm": 1.6770565509796143,
      "learning_rate": 9.341209649255747e-06,
      "loss": 0.0353,
      "step": 219960
    },
    {
      "epoch": 0.3600020947480738,
      "grad_norm": 0.9662975072860718,
      "learning_rate": 9.341143757042231e-06,
      "loss": 0.048,
      "step": 219980
    },
    {
      "epoch": 0.36003482518672714,
      "grad_norm": 2.836313486099243,
      "learning_rate": 9.341077864828713e-06,
      "loss": 0.0468,
      "step": 220000
    },
    {
      "epoch": 0.3600675556253805,
      "grad_norm": 2.1478703022003174,
      "learning_rate": 9.341011972615196e-06,
      "loss": 0.052,
      "step": 220020
    },
    {
      "epoch": 0.3601002860640338,
      "grad_norm": 0.9934446811676025,
      "learning_rate": 9.34094608040168e-06,
      "loss": 0.0548,
      "step": 220040
    },
    {
      "epoch": 0.36013301650268714,
      "grad_norm": 2.6683218479156494,
      "learning_rate": 9.340880188188162e-06,
      "loss": 0.0553,
      "step": 220060
    },
    {
      "epoch": 0.3601657469413405,
      "grad_norm": 3.6995506286621094,
      "learning_rate": 9.340814295974646e-06,
      "loss": 0.0495,
      "step": 220080
    },
    {
      "epoch": 0.36019847737999383,
      "grad_norm": 0.6017559766769409,
      "learning_rate": 9.340748403761129e-06,
      "loss": 0.0408,
      "step": 220100
    },
    {
      "epoch": 0.3602312078186472,
      "grad_norm": 5.337168216705322,
      "learning_rate": 9.340682511547611e-06,
      "loss": 0.0522,
      "step": 220120
    },
    {
      "epoch": 0.3602639382573005,
      "grad_norm": 0.5184927582740784,
      "learning_rate": 9.340616619334095e-06,
      "loss": 0.0475,
      "step": 220140
    },
    {
      "epoch": 0.36029666869595384,
      "grad_norm": 2.138875961303711,
      "learning_rate": 9.340550727120576e-06,
      "loss": 0.048,
      "step": 220160
    },
    {
      "epoch": 0.3603293991346072,
      "grad_norm": 3.8423750400543213,
      "learning_rate": 9.34048483490706e-06,
      "loss": 0.0479,
      "step": 220180
    },
    {
      "epoch": 0.3603621295732605,
      "grad_norm": 1.4644548892974854,
      "learning_rate": 9.340418942693544e-06,
      "loss": 0.0449,
      "step": 220200
    },
    {
      "epoch": 0.3603948600119139,
      "grad_norm": 1.3505719900131226,
      "learning_rate": 9.340353050480026e-06,
      "loss": 0.0587,
      "step": 220220
    },
    {
      "epoch": 0.3604275904505672,
      "grad_norm": 0.3549611270427704,
      "learning_rate": 9.340287158266509e-06,
      "loss": 0.0434,
      "step": 220240
    },
    {
      "epoch": 0.36046032088922053,
      "grad_norm": 1.7536863088607788,
      "learning_rate": 9.340221266052991e-06,
      "loss": 0.0588,
      "step": 220260
    },
    {
      "epoch": 0.3604930513278739,
      "grad_norm": 2.179119348526001,
      "learning_rate": 9.340155373839475e-06,
      "loss": 0.0596,
      "step": 220280
    },
    {
      "epoch": 0.3605257817665272,
      "grad_norm": 1.73789381980896,
      "learning_rate": 9.340089481625957e-06,
      "loss": 0.0508,
      "step": 220300
    },
    {
      "epoch": 0.3605585122051806,
      "grad_norm": 0.4305591881275177,
      "learning_rate": 9.34002358941244e-06,
      "loss": 0.0703,
      "step": 220320
    },
    {
      "epoch": 0.3605912426438339,
      "grad_norm": 3.633488893508911,
      "learning_rate": 9.339957697198922e-06,
      "loss": 0.046,
      "step": 220340
    },
    {
      "epoch": 0.3606239730824872,
      "grad_norm": 1.8458757400512695,
      "learning_rate": 9.339891804985406e-06,
      "loss": 0.0573,
      "step": 220360
    },
    {
      "epoch": 0.3606567035211406,
      "grad_norm": 3.0969789028167725,
      "learning_rate": 9.339825912771887e-06,
      "loss": 0.041,
      "step": 220380
    },
    {
      "epoch": 0.3606894339597939,
      "grad_norm": 0.7094579339027405,
      "learning_rate": 9.339760020558371e-06,
      "loss": 0.052,
      "step": 220400
    },
    {
      "epoch": 0.3607221643984473,
      "grad_norm": 2.793123960494995,
      "learning_rate": 9.339694128344855e-06,
      "loss": 0.0579,
      "step": 220420
    },
    {
      "epoch": 0.3607548948371006,
      "grad_norm": 4.476149559020996,
      "learning_rate": 9.339628236131337e-06,
      "loss": 0.0614,
      "step": 220440
    },
    {
      "epoch": 0.3607876252757539,
      "grad_norm": 1.5495049953460693,
      "learning_rate": 9.33956234391782e-06,
      "loss": 0.0596,
      "step": 220460
    },
    {
      "epoch": 0.3608203557144073,
      "grad_norm": 5.1500139236450195,
      "learning_rate": 9.339496451704304e-06,
      "loss": 0.0541,
      "step": 220480
    },
    {
      "epoch": 0.3608530861530606,
      "grad_norm": 3.9316189289093018,
      "learning_rate": 9.339430559490786e-06,
      "loss": 0.0396,
      "step": 220500
    },
    {
      "epoch": 0.360885816591714,
      "grad_norm": 0.9674310684204102,
      "learning_rate": 9.33936466727727e-06,
      "loss": 0.0537,
      "step": 220520
    },
    {
      "epoch": 0.3609185470303673,
      "grad_norm": 0.8516092896461487,
      "learning_rate": 9.339298775063751e-06,
      "loss": 0.049,
      "step": 220540
    },
    {
      "epoch": 0.3609512774690206,
      "grad_norm": 0.9594691395759583,
      "learning_rate": 9.339232882850235e-06,
      "loss": 0.0472,
      "step": 220560
    },
    {
      "epoch": 0.360984007907674,
      "grad_norm": 13.305855751037598,
      "learning_rate": 9.339166990636718e-06,
      "loss": 0.0603,
      "step": 220580
    },
    {
      "epoch": 0.3610167383463273,
      "grad_norm": 2.0278613567352295,
      "learning_rate": 9.3391010984232e-06,
      "loss": 0.0523,
      "step": 220600
    },
    {
      "epoch": 0.3610494687849807,
      "grad_norm": 1.855615258216858,
      "learning_rate": 9.339035206209684e-06,
      "loss": 0.0474,
      "step": 220620
    },
    {
      "epoch": 0.361082199223634,
      "grad_norm": 0.7454445958137512,
      "learning_rate": 9.338969313996166e-06,
      "loss": 0.0509,
      "step": 220640
    },
    {
      "epoch": 0.3611149296622873,
      "grad_norm": 0.8257224559783936,
      "learning_rate": 9.33890342178265e-06,
      "loss": 0.0486,
      "step": 220660
    },
    {
      "epoch": 0.3611476601009407,
      "grad_norm": 1.342925786972046,
      "learning_rate": 9.338837529569131e-06,
      "loss": 0.0449,
      "step": 220680
    },
    {
      "epoch": 0.361180390539594,
      "grad_norm": 1.192417860031128,
      "learning_rate": 9.338771637355615e-06,
      "loss": 0.0392,
      "step": 220700
    },
    {
      "epoch": 0.36121312097824737,
      "grad_norm": 2.6957523822784424,
      "learning_rate": 9.338705745142097e-06,
      "loss": 0.0585,
      "step": 220720
    },
    {
      "epoch": 0.3612458514169007,
      "grad_norm": 2.637338638305664,
      "learning_rate": 9.33863985292858e-06,
      "loss": 0.0507,
      "step": 220740
    },
    {
      "epoch": 0.361278581855554,
      "grad_norm": 2.4775912761688232,
      "learning_rate": 9.338573960715062e-06,
      "loss": 0.0535,
      "step": 220760
    },
    {
      "epoch": 0.3613113122942074,
      "grad_norm": 2.811877727508545,
      "learning_rate": 9.338508068501546e-06,
      "loss": 0.0526,
      "step": 220780
    },
    {
      "epoch": 0.3613440427328607,
      "grad_norm": 0.6796925663948059,
      "learning_rate": 9.33844217628803e-06,
      "loss": 0.0335,
      "step": 220800
    },
    {
      "epoch": 0.36137677317151407,
      "grad_norm": 1.867902159690857,
      "learning_rate": 9.338376284074511e-06,
      "loss": 0.0521,
      "step": 220820
    },
    {
      "epoch": 0.3614095036101674,
      "grad_norm": 0.6918364763259888,
      "learning_rate": 9.338310391860995e-06,
      "loss": 0.0377,
      "step": 220840
    },
    {
      "epoch": 0.3614422340488207,
      "grad_norm": 2.2551023960113525,
      "learning_rate": 9.338244499647478e-06,
      "loss": 0.0494,
      "step": 220860
    },
    {
      "epoch": 0.36147496448747407,
      "grad_norm": 0.9349119663238525,
      "learning_rate": 9.33817860743396e-06,
      "loss": 0.0368,
      "step": 220880
    },
    {
      "epoch": 0.3615076949261274,
      "grad_norm": 1.1286355257034302,
      "learning_rate": 9.338112715220444e-06,
      "loss": 0.0548,
      "step": 220900
    },
    {
      "epoch": 0.36154042536478076,
      "grad_norm": 2.394775390625,
      "learning_rate": 9.338046823006927e-06,
      "loss": 0.0543,
      "step": 220920
    },
    {
      "epoch": 0.3615731558034341,
      "grad_norm": 4.066479682922363,
      "learning_rate": 9.33798093079341e-06,
      "loss": 0.0528,
      "step": 220940
    },
    {
      "epoch": 0.3616058862420874,
      "grad_norm": 0.6519984006881714,
      "learning_rate": 9.337915038579893e-06,
      "loss": 0.05,
      "step": 220960
    },
    {
      "epoch": 0.36163861668074077,
      "grad_norm": 1.1369483470916748,
      "learning_rate": 9.337849146366375e-06,
      "loss": 0.0444,
      "step": 220980
    },
    {
      "epoch": 0.3616713471193941,
      "grad_norm": 2.0360217094421387,
      "learning_rate": 9.337783254152858e-06,
      "loss": 0.0481,
      "step": 221000
    },
    {
      "epoch": 0.36170407755804745,
      "grad_norm": 2.0873217582702637,
      "learning_rate": 9.33771736193934e-06,
      "loss": 0.053,
      "step": 221020
    },
    {
      "epoch": 0.36173680799670077,
      "grad_norm": 2.7412848472595215,
      "learning_rate": 9.337651469725824e-06,
      "loss": 0.0531,
      "step": 221040
    },
    {
      "epoch": 0.3617695384353541,
      "grad_norm": 2.1701343059539795,
      "learning_rate": 9.337585577512306e-06,
      "loss": 0.0406,
      "step": 221060
    },
    {
      "epoch": 0.36180226887400746,
      "grad_norm": 1.7509117126464844,
      "learning_rate": 9.33751968529879e-06,
      "loss": 0.0551,
      "step": 221080
    },
    {
      "epoch": 0.3618349993126608,
      "grad_norm": 1.1707385778427124,
      "learning_rate": 9.337453793085271e-06,
      "loss": 0.0539,
      "step": 221100
    },
    {
      "epoch": 0.36186772975131415,
      "grad_norm": 1.4062944650650024,
      "learning_rate": 9.337387900871755e-06,
      "loss": 0.0349,
      "step": 221120
    },
    {
      "epoch": 0.36190046018996747,
      "grad_norm": 1.4240823984146118,
      "learning_rate": 9.337322008658237e-06,
      "loss": 0.0502,
      "step": 221140
    },
    {
      "epoch": 0.3619331906286208,
      "grad_norm": 1.7524092197418213,
      "learning_rate": 9.33725611644472e-06,
      "loss": 0.0424,
      "step": 221160
    },
    {
      "epoch": 0.36196592106727415,
      "grad_norm": 0.8870899677276611,
      "learning_rate": 9.337190224231202e-06,
      "loss": 0.0433,
      "step": 221180
    },
    {
      "epoch": 0.36199865150592747,
      "grad_norm": 0.6200871467590332,
      "learning_rate": 9.337124332017686e-06,
      "loss": 0.0445,
      "step": 221200
    },
    {
      "epoch": 0.36203138194458084,
      "grad_norm": 1.2910398244857788,
      "learning_rate": 9.33705843980417e-06,
      "loss": 0.0572,
      "step": 221220
    },
    {
      "epoch": 0.36206411238323416,
      "grad_norm": 1.8963154554367065,
      "learning_rate": 9.336992547590651e-06,
      "loss": 0.035,
      "step": 221240
    },
    {
      "epoch": 0.3620968428218875,
      "grad_norm": 0.5837424397468567,
      "learning_rate": 9.336926655377135e-06,
      "loss": 0.0518,
      "step": 221260
    },
    {
      "epoch": 0.36212957326054085,
      "grad_norm": 2.0463712215423584,
      "learning_rate": 9.336860763163618e-06,
      "loss": 0.0442,
      "step": 221280
    },
    {
      "epoch": 0.36216230369919417,
      "grad_norm": 1.9719504117965698,
      "learning_rate": 9.3367948709501e-06,
      "loss": 0.0473,
      "step": 221300
    },
    {
      "epoch": 0.36219503413784754,
      "grad_norm": 2.496586799621582,
      "learning_rate": 9.336728978736584e-06,
      "loss": 0.0659,
      "step": 221320
    },
    {
      "epoch": 0.36222776457650085,
      "grad_norm": 0.8061837553977966,
      "learning_rate": 9.336663086523067e-06,
      "loss": 0.0433,
      "step": 221340
    },
    {
      "epoch": 0.36226049501515417,
      "grad_norm": 2.490431308746338,
      "learning_rate": 9.33659719430955e-06,
      "loss": 0.0416,
      "step": 221360
    },
    {
      "epoch": 0.36229322545380754,
      "grad_norm": 1.115357756614685,
      "learning_rate": 9.336531302096033e-06,
      "loss": 0.0494,
      "step": 221380
    },
    {
      "epoch": 0.36232595589246086,
      "grad_norm": 1.0344041585922241,
      "learning_rate": 9.336465409882515e-06,
      "loss": 0.0419,
      "step": 221400
    },
    {
      "epoch": 0.36235868633111423,
      "grad_norm": 2.7841134071350098,
      "learning_rate": 9.336399517668998e-06,
      "loss": 0.0438,
      "step": 221420
    },
    {
      "epoch": 0.36239141676976755,
      "grad_norm": 0.6574536561965942,
      "learning_rate": 9.33633362545548e-06,
      "loss": 0.0526,
      "step": 221440
    },
    {
      "epoch": 0.36242414720842087,
      "grad_norm": 1.49372136592865,
      "learning_rate": 9.336267733241964e-06,
      "loss": 0.0571,
      "step": 221460
    },
    {
      "epoch": 0.36245687764707424,
      "grad_norm": 1.244498372077942,
      "learning_rate": 9.336201841028446e-06,
      "loss": 0.0441,
      "step": 221480
    },
    {
      "epoch": 0.36248960808572755,
      "grad_norm": 0.5200719237327576,
      "learning_rate": 9.33613594881493e-06,
      "loss": 0.0511,
      "step": 221500
    },
    {
      "epoch": 0.3625223385243809,
      "grad_norm": 1.4308197498321533,
      "learning_rate": 9.336070056601411e-06,
      "loss": 0.0451,
      "step": 221520
    },
    {
      "epoch": 0.36255506896303424,
      "grad_norm": 1.9802203178405762,
      "learning_rate": 9.336004164387895e-06,
      "loss": 0.041,
      "step": 221540
    },
    {
      "epoch": 0.36258779940168756,
      "grad_norm": 1.5801492929458618,
      "learning_rate": 9.335938272174377e-06,
      "loss": 0.0472,
      "step": 221560
    },
    {
      "epoch": 0.36262052984034093,
      "grad_norm": 3.5935726165771484,
      "learning_rate": 9.33587237996086e-06,
      "loss": 0.0438,
      "step": 221580
    },
    {
      "epoch": 0.36265326027899425,
      "grad_norm": 0.390010803937912,
      "learning_rate": 9.335806487747344e-06,
      "loss": 0.0508,
      "step": 221600
    },
    {
      "epoch": 0.3626859907176476,
      "grad_norm": 3.4035699367523193,
      "learning_rate": 9.335740595533826e-06,
      "loss": 0.0412,
      "step": 221620
    },
    {
      "epoch": 0.36271872115630094,
      "grad_norm": 2.779667377471924,
      "learning_rate": 9.33567470332031e-06,
      "loss": 0.0525,
      "step": 221640
    },
    {
      "epoch": 0.36275145159495426,
      "grad_norm": 1.408941626548767,
      "learning_rate": 9.335608811106793e-06,
      "loss": 0.054,
      "step": 221660
    },
    {
      "epoch": 0.3627841820336076,
      "grad_norm": 9.908027648925781,
      "learning_rate": 9.335542918893275e-06,
      "loss": 0.0453,
      "step": 221680
    },
    {
      "epoch": 0.36281691247226094,
      "grad_norm": 1.6684645414352417,
      "learning_rate": 9.335477026679758e-06,
      "loss": 0.0503,
      "step": 221700
    },
    {
      "epoch": 0.3628496429109143,
      "grad_norm": 7.07413911819458,
      "learning_rate": 9.335411134466242e-06,
      "loss": 0.0402,
      "step": 221720
    },
    {
      "epoch": 0.36288237334956763,
      "grad_norm": 3.284273386001587,
      "learning_rate": 9.335345242252724e-06,
      "loss": 0.0592,
      "step": 221740
    },
    {
      "epoch": 0.36291510378822095,
      "grad_norm": 0.7910241484642029,
      "learning_rate": 9.335279350039208e-06,
      "loss": 0.055,
      "step": 221760
    },
    {
      "epoch": 0.3629478342268743,
      "grad_norm": 0.9194769859313965,
      "learning_rate": 9.33521345782569e-06,
      "loss": 0.0424,
      "step": 221780
    },
    {
      "epoch": 0.36298056466552764,
      "grad_norm": 1.26409113407135,
      "learning_rate": 9.335147565612173e-06,
      "loss": 0.0535,
      "step": 221800
    },
    {
      "epoch": 0.363013295104181,
      "grad_norm": 0.49600309133529663,
      "learning_rate": 9.335081673398655e-06,
      "loss": 0.0448,
      "step": 221820
    },
    {
      "epoch": 0.3630460255428343,
      "grad_norm": 0.9874999523162842,
      "learning_rate": 9.335015781185138e-06,
      "loss": 0.0621,
      "step": 221840
    },
    {
      "epoch": 0.36307875598148764,
      "grad_norm": 1.049135446548462,
      "learning_rate": 9.33494988897162e-06,
      "loss": 0.0526,
      "step": 221860
    },
    {
      "epoch": 0.363111486420141,
      "grad_norm": 1.5896433591842651,
      "learning_rate": 9.334883996758104e-06,
      "loss": 0.0731,
      "step": 221880
    },
    {
      "epoch": 0.36314421685879433,
      "grad_norm": 1.3737415075302124,
      "learning_rate": 9.334818104544586e-06,
      "loss": 0.0479,
      "step": 221900
    },
    {
      "epoch": 0.3631769472974477,
      "grad_norm": 1.6739767789840698,
      "learning_rate": 9.33475221233107e-06,
      "loss": 0.0406,
      "step": 221920
    },
    {
      "epoch": 0.363209677736101,
      "grad_norm": 1.1580806970596313,
      "learning_rate": 9.334686320117551e-06,
      "loss": 0.0472,
      "step": 221940
    },
    {
      "epoch": 0.36324240817475434,
      "grad_norm": 1.5238134860992432,
      "learning_rate": 9.334620427904035e-06,
      "loss": 0.054,
      "step": 221960
    },
    {
      "epoch": 0.3632751386134077,
      "grad_norm": 3.0868160724639893,
      "learning_rate": 9.334554535690519e-06,
      "loss": 0.0498,
      "step": 221980
    },
    {
      "epoch": 0.363307869052061,
      "grad_norm": 2.6163532733917236,
      "learning_rate": 9.334488643477e-06,
      "loss": 0.0546,
      "step": 222000
    },
    {
      "epoch": 0.3633405994907144,
      "grad_norm": 2.18023419380188,
      "learning_rate": 9.334422751263484e-06,
      "loss": 0.0599,
      "step": 222020
    },
    {
      "epoch": 0.3633733299293677,
      "grad_norm": 1.425751805305481,
      "learning_rate": 9.334356859049966e-06,
      "loss": 0.0646,
      "step": 222040
    },
    {
      "epoch": 0.36340606036802103,
      "grad_norm": 1.5160335302352905,
      "learning_rate": 9.33429096683645e-06,
      "loss": 0.0642,
      "step": 222060
    },
    {
      "epoch": 0.3634387908066744,
      "grad_norm": 1.213189959526062,
      "learning_rate": 9.334225074622933e-06,
      "loss": 0.0506,
      "step": 222080
    },
    {
      "epoch": 0.3634715212453277,
      "grad_norm": 1.2873234748840332,
      "learning_rate": 9.334159182409415e-06,
      "loss": 0.041,
      "step": 222100
    },
    {
      "epoch": 0.3635042516839811,
      "grad_norm": 2.3138155937194824,
      "learning_rate": 9.334093290195899e-06,
      "loss": 0.0476,
      "step": 222120
    },
    {
      "epoch": 0.3635369821226344,
      "grad_norm": 1.939095139503479,
      "learning_rate": 9.334027397982382e-06,
      "loss": 0.0446,
      "step": 222140
    },
    {
      "epoch": 0.36356971256128773,
      "grad_norm": 2.9480087757110596,
      "learning_rate": 9.333961505768864e-06,
      "loss": 0.0544,
      "step": 222160
    },
    {
      "epoch": 0.3636024429999411,
      "grad_norm": 1.732051134109497,
      "learning_rate": 9.333895613555348e-06,
      "loss": 0.0497,
      "step": 222180
    },
    {
      "epoch": 0.3636351734385944,
      "grad_norm": 1.7336421012878418,
      "learning_rate": 9.33382972134183e-06,
      "loss": 0.0495,
      "step": 222200
    },
    {
      "epoch": 0.3636679038772478,
      "grad_norm": 2.380385398864746,
      "learning_rate": 9.333763829128313e-06,
      "loss": 0.0516,
      "step": 222220
    },
    {
      "epoch": 0.3637006343159011,
      "grad_norm": 0.4844742715358734,
      "learning_rate": 9.333697936914795e-06,
      "loss": 0.036,
      "step": 222240
    },
    {
      "epoch": 0.3637333647545544,
      "grad_norm": 0.8832455277442932,
      "learning_rate": 9.333632044701279e-06,
      "loss": 0.0509,
      "step": 222260
    },
    {
      "epoch": 0.3637660951932078,
      "grad_norm": 2.1555309295654297,
      "learning_rate": 9.33356615248776e-06,
      "loss": 0.0589,
      "step": 222280
    },
    {
      "epoch": 0.3637988256318611,
      "grad_norm": 0.5773716568946838,
      "learning_rate": 9.333500260274244e-06,
      "loss": 0.0463,
      "step": 222300
    },
    {
      "epoch": 0.3638315560705145,
      "grad_norm": 1.1509170532226562,
      "learning_rate": 9.333434368060728e-06,
      "loss": 0.0393,
      "step": 222320
    },
    {
      "epoch": 0.3638642865091678,
      "grad_norm": 1.1402770280838013,
      "learning_rate": 9.33336847584721e-06,
      "loss": 0.0575,
      "step": 222340
    },
    {
      "epoch": 0.3638970169478211,
      "grad_norm": 1.1401971578598022,
      "learning_rate": 9.333302583633693e-06,
      "loss": 0.0414,
      "step": 222360
    },
    {
      "epoch": 0.3639297473864745,
      "grad_norm": 0.5919184684753418,
      "learning_rate": 9.333236691420175e-06,
      "loss": 0.0435,
      "step": 222380
    },
    {
      "epoch": 0.3639624778251278,
      "grad_norm": 5.428865909576416,
      "learning_rate": 9.333170799206659e-06,
      "loss": 0.0607,
      "step": 222400
    },
    {
      "epoch": 0.3639952082637812,
      "grad_norm": 2.7259297370910645,
      "learning_rate": 9.33310490699314e-06,
      "loss": 0.0408,
      "step": 222420
    },
    {
      "epoch": 0.3640279387024345,
      "grad_norm": 0.7821038961410522,
      "learning_rate": 9.333039014779624e-06,
      "loss": 0.0447,
      "step": 222440
    },
    {
      "epoch": 0.3640606691410878,
      "grad_norm": 0.4582158625125885,
      "learning_rate": 9.332973122566108e-06,
      "loss": 0.0371,
      "step": 222460
    },
    {
      "epoch": 0.3640933995797412,
      "grad_norm": 3.2673206329345703,
      "learning_rate": 9.33290723035259e-06,
      "loss": 0.0394,
      "step": 222480
    },
    {
      "epoch": 0.3641261300183945,
      "grad_norm": 0.6293086409568787,
      "learning_rate": 9.332841338139073e-06,
      "loss": 0.0457,
      "step": 222500
    },
    {
      "epoch": 0.3641588604570479,
      "grad_norm": 0.5531862378120422,
      "learning_rate": 9.332775445925557e-06,
      "loss": 0.0351,
      "step": 222520
    },
    {
      "epoch": 0.3641915908957012,
      "grad_norm": 1.687769889831543,
      "learning_rate": 9.332709553712039e-06,
      "loss": 0.0375,
      "step": 222540
    },
    {
      "epoch": 0.3642243213343545,
      "grad_norm": 1.6977699995040894,
      "learning_rate": 9.332643661498522e-06,
      "loss": 0.0517,
      "step": 222560
    },
    {
      "epoch": 0.3642570517730079,
      "grad_norm": 1.0567286014556885,
      "learning_rate": 9.332577769285004e-06,
      "loss": 0.0406,
      "step": 222580
    },
    {
      "epoch": 0.3642897822116612,
      "grad_norm": 3.9955410957336426,
      "learning_rate": 9.332511877071488e-06,
      "loss": 0.0443,
      "step": 222600
    },
    {
      "epoch": 0.36432251265031457,
      "grad_norm": 0.6292538642883301,
      "learning_rate": 9.33244598485797e-06,
      "loss": 0.0493,
      "step": 222620
    },
    {
      "epoch": 0.3643552430889679,
      "grad_norm": 4.619073390960693,
      "learning_rate": 9.332380092644453e-06,
      "loss": 0.0436,
      "step": 222640
    },
    {
      "epoch": 0.3643879735276212,
      "grad_norm": 1.574806809425354,
      "learning_rate": 9.332314200430937e-06,
      "loss": 0.0427,
      "step": 222660
    },
    {
      "epoch": 0.3644207039662746,
      "grad_norm": 3.453579902648926,
      "learning_rate": 9.332248308217419e-06,
      "loss": 0.0479,
      "step": 222680
    },
    {
      "epoch": 0.3644534344049279,
      "grad_norm": 0.44527754187583923,
      "learning_rate": 9.332182416003902e-06,
      "loss": 0.0569,
      "step": 222700
    },
    {
      "epoch": 0.3644861648435812,
      "grad_norm": 1.5843144655227661,
      "learning_rate": 9.332116523790384e-06,
      "loss": 0.0321,
      "step": 222720
    },
    {
      "epoch": 0.3645188952822346,
      "grad_norm": 2.9048686027526855,
      "learning_rate": 9.332050631576868e-06,
      "loss": 0.0468,
      "step": 222740
    },
    {
      "epoch": 0.3645516257208879,
      "grad_norm": 1.9283798933029175,
      "learning_rate": 9.33198473936335e-06,
      "loss": 0.0601,
      "step": 222760
    },
    {
      "epoch": 0.36458435615954127,
      "grad_norm": 2.6469874382019043,
      "learning_rate": 9.331918847149833e-06,
      "loss": 0.0464,
      "step": 222780
    },
    {
      "epoch": 0.3646170865981946,
      "grad_norm": 1.6741863489151,
      "learning_rate": 9.331852954936315e-06,
      "loss": 0.0459,
      "step": 222800
    },
    {
      "epoch": 0.3646498170368479,
      "grad_norm": 1.2062910795211792,
      "learning_rate": 9.331787062722799e-06,
      "loss": 0.049,
      "step": 222820
    },
    {
      "epoch": 0.3646825474755013,
      "grad_norm": 1.8204224109649658,
      "learning_rate": 9.33172117050928e-06,
      "loss": 0.0421,
      "step": 222840
    },
    {
      "epoch": 0.3647152779141546,
      "grad_norm": 3.466200351715088,
      "learning_rate": 9.331655278295764e-06,
      "loss": 0.0454,
      "step": 222860
    },
    {
      "epoch": 0.36474800835280796,
      "grad_norm": 0.7518590092658997,
      "learning_rate": 9.331589386082248e-06,
      "loss": 0.0472,
      "step": 222880
    },
    {
      "epoch": 0.3647807387914613,
      "grad_norm": 2.4680404663085938,
      "learning_rate": 9.33152349386873e-06,
      "loss": 0.043,
      "step": 222900
    },
    {
      "epoch": 0.3648134692301146,
      "grad_norm": 1.1993683576583862,
      "learning_rate": 9.331457601655213e-06,
      "loss": 0.0602,
      "step": 222920
    },
    {
      "epoch": 0.36484619966876797,
      "grad_norm": 2.3997585773468018,
      "learning_rate": 9.331391709441697e-06,
      "loss": 0.0532,
      "step": 222940
    },
    {
      "epoch": 0.3648789301074213,
      "grad_norm": 0.28382501006126404,
      "learning_rate": 9.331325817228179e-06,
      "loss": 0.0482,
      "step": 222960
    },
    {
      "epoch": 0.36491166054607466,
      "grad_norm": 1.2764865159988403,
      "learning_rate": 9.331259925014662e-06,
      "loss": 0.0428,
      "step": 222980
    },
    {
      "epoch": 0.364944390984728,
      "grad_norm": 1.660142421722412,
      "learning_rate": 9.331194032801144e-06,
      "loss": 0.0454,
      "step": 223000
    },
    {
      "epoch": 0.3649771214233813,
      "grad_norm": 1.8555948734283447,
      "learning_rate": 9.331128140587628e-06,
      "loss": 0.0488,
      "step": 223020
    },
    {
      "epoch": 0.36500985186203466,
      "grad_norm": 3.0481696128845215,
      "learning_rate": 9.331062248374111e-06,
      "loss": 0.0414,
      "step": 223040
    },
    {
      "epoch": 0.365042582300688,
      "grad_norm": 4.134581565856934,
      "learning_rate": 9.330996356160593e-06,
      "loss": 0.0418,
      "step": 223060
    },
    {
      "epoch": 0.36507531273934135,
      "grad_norm": 0.7639531493186951,
      "learning_rate": 9.330930463947077e-06,
      "loss": 0.0478,
      "step": 223080
    },
    {
      "epoch": 0.36510804317799467,
      "grad_norm": 2.5468761920928955,
      "learning_rate": 9.330864571733559e-06,
      "loss": 0.0405,
      "step": 223100
    },
    {
      "epoch": 0.365140773616648,
      "grad_norm": 1.4659932851791382,
      "learning_rate": 9.330798679520042e-06,
      "loss": 0.0428,
      "step": 223120
    },
    {
      "epoch": 0.36517350405530136,
      "grad_norm": 3.0069241523742676,
      "learning_rate": 9.330732787306524e-06,
      "loss": 0.0533,
      "step": 223140
    },
    {
      "epoch": 0.3652062344939547,
      "grad_norm": 2.1830904483795166,
      "learning_rate": 9.330666895093008e-06,
      "loss": 0.0498,
      "step": 223160
    },
    {
      "epoch": 0.36523896493260805,
      "grad_norm": 1.4300816059112549,
      "learning_rate": 9.33060100287949e-06,
      "loss": 0.0478,
      "step": 223180
    },
    {
      "epoch": 0.36527169537126136,
      "grad_norm": 3.777036428451538,
      "learning_rate": 9.330535110665973e-06,
      "loss": 0.0458,
      "step": 223200
    },
    {
      "epoch": 0.3653044258099147,
      "grad_norm": 6.906420707702637,
      "learning_rate": 9.330469218452455e-06,
      "loss": 0.0397,
      "step": 223220
    },
    {
      "epoch": 0.36533715624856805,
      "grad_norm": 3.5632827281951904,
      "learning_rate": 9.330403326238939e-06,
      "loss": 0.0553,
      "step": 223240
    },
    {
      "epoch": 0.36536988668722137,
      "grad_norm": 15.911371231079102,
      "learning_rate": 9.330337434025422e-06,
      "loss": 0.0551,
      "step": 223260
    },
    {
      "epoch": 0.36540261712587474,
      "grad_norm": 1.641033411026001,
      "learning_rate": 9.330271541811904e-06,
      "loss": 0.0469,
      "step": 223280
    },
    {
      "epoch": 0.36543534756452806,
      "grad_norm": 1.5843286514282227,
      "learning_rate": 9.330205649598388e-06,
      "loss": 0.0421,
      "step": 223300
    },
    {
      "epoch": 0.3654680780031814,
      "grad_norm": 1.7315806150436401,
      "learning_rate": 9.330139757384871e-06,
      "loss": 0.0361,
      "step": 223320
    },
    {
      "epoch": 0.36550080844183475,
      "grad_norm": 2.0053300857543945,
      "learning_rate": 9.330073865171353e-06,
      "loss": 0.0458,
      "step": 223340
    },
    {
      "epoch": 0.36553353888048806,
      "grad_norm": 1.8725547790527344,
      "learning_rate": 9.330007972957837e-06,
      "loss": 0.0441,
      "step": 223360
    },
    {
      "epoch": 0.36556626931914143,
      "grad_norm": 1.3722821474075317,
      "learning_rate": 9.32994208074432e-06,
      "loss": 0.0462,
      "step": 223380
    },
    {
      "epoch": 0.36559899975779475,
      "grad_norm": 0.7507485151290894,
      "learning_rate": 9.329876188530802e-06,
      "loss": 0.0428,
      "step": 223400
    },
    {
      "epoch": 0.36563173019644807,
      "grad_norm": 0.3301739990711212,
      "learning_rate": 9.329810296317286e-06,
      "loss": 0.0347,
      "step": 223420
    },
    {
      "epoch": 0.36566446063510144,
      "grad_norm": 2.4275341033935547,
      "learning_rate": 9.329744404103768e-06,
      "loss": 0.0565,
      "step": 223440
    },
    {
      "epoch": 0.36569719107375476,
      "grad_norm": 0.36699536442756653,
      "learning_rate": 9.329678511890251e-06,
      "loss": 0.0426,
      "step": 223460
    },
    {
      "epoch": 0.36572992151240813,
      "grad_norm": 1.9213758707046509,
      "learning_rate": 9.329612619676733e-06,
      "loss": 0.0681,
      "step": 223480
    },
    {
      "epoch": 0.36576265195106145,
      "grad_norm": 1.835942029953003,
      "learning_rate": 9.329546727463217e-06,
      "loss": 0.0409,
      "step": 223500
    },
    {
      "epoch": 0.36579538238971476,
      "grad_norm": 3.62532639503479,
      "learning_rate": 9.329480835249699e-06,
      "loss": 0.0428,
      "step": 223520
    },
    {
      "epoch": 0.36582811282836813,
      "grad_norm": 4.598780155181885,
      "learning_rate": 9.329414943036182e-06,
      "loss": 0.0417,
      "step": 223540
    },
    {
      "epoch": 0.36586084326702145,
      "grad_norm": 3.2238383293151855,
      "learning_rate": 9.329349050822664e-06,
      "loss": 0.0556,
      "step": 223560
    },
    {
      "epoch": 0.3658935737056748,
      "grad_norm": 1.5308570861816406,
      "learning_rate": 9.329283158609148e-06,
      "loss": 0.0483,
      "step": 223580
    },
    {
      "epoch": 0.36592630414432814,
      "grad_norm": 1.9949774742126465,
      "learning_rate": 9.32921726639563e-06,
      "loss": 0.0459,
      "step": 223600
    },
    {
      "epoch": 0.36595903458298146,
      "grad_norm": 1.9054878950119019,
      "learning_rate": 9.329151374182113e-06,
      "loss": 0.0422,
      "step": 223620
    },
    {
      "epoch": 0.36599176502163483,
      "grad_norm": 0.9599339962005615,
      "learning_rate": 9.329085481968597e-06,
      "loss": 0.0434,
      "step": 223640
    },
    {
      "epoch": 0.36602449546028815,
      "grad_norm": 1.143159031867981,
      "learning_rate": 9.329019589755079e-06,
      "loss": 0.0384,
      "step": 223660
    },
    {
      "epoch": 0.3660572258989415,
      "grad_norm": 2.7253825664520264,
      "learning_rate": 9.328953697541562e-06,
      "loss": 0.0442,
      "step": 223680
    },
    {
      "epoch": 0.36608995633759484,
      "grad_norm": 1.222717523574829,
      "learning_rate": 9.328887805328046e-06,
      "loss": 0.0329,
      "step": 223700
    },
    {
      "epoch": 0.36612268677624815,
      "grad_norm": 2.2143938541412354,
      "learning_rate": 9.328821913114528e-06,
      "loss": 0.0541,
      "step": 223720
    },
    {
      "epoch": 0.3661554172149015,
      "grad_norm": 2.7719948291778564,
      "learning_rate": 9.328756020901011e-06,
      "loss": 0.0427,
      "step": 223740
    },
    {
      "epoch": 0.36618814765355484,
      "grad_norm": 4.42977237701416,
      "learning_rate": 9.328690128687495e-06,
      "loss": 0.0408,
      "step": 223760
    },
    {
      "epoch": 0.3662208780922082,
      "grad_norm": 1.2515193223953247,
      "learning_rate": 9.328624236473977e-06,
      "loss": 0.0405,
      "step": 223780
    },
    {
      "epoch": 0.36625360853086153,
      "grad_norm": 3.1323394775390625,
      "learning_rate": 9.32855834426046e-06,
      "loss": 0.0461,
      "step": 223800
    },
    {
      "epoch": 0.36628633896951485,
      "grad_norm": 2.874196767807007,
      "learning_rate": 9.328492452046942e-06,
      "loss": 0.0426,
      "step": 223820
    },
    {
      "epoch": 0.3663190694081682,
      "grad_norm": 2.782400608062744,
      "learning_rate": 9.328426559833426e-06,
      "loss": 0.0514,
      "step": 223840
    },
    {
      "epoch": 0.36635179984682154,
      "grad_norm": 0.8130149841308594,
      "learning_rate": 9.328360667619908e-06,
      "loss": 0.049,
      "step": 223860
    },
    {
      "epoch": 0.3663845302854749,
      "grad_norm": 1.2985106706619263,
      "learning_rate": 9.328294775406391e-06,
      "loss": 0.0496,
      "step": 223880
    },
    {
      "epoch": 0.3664172607241282,
      "grad_norm": 1.4146264791488647,
      "learning_rate": 9.328228883192873e-06,
      "loss": 0.0438,
      "step": 223900
    },
    {
      "epoch": 0.36644999116278154,
      "grad_norm": 2.2776832580566406,
      "learning_rate": 9.328162990979357e-06,
      "loss": 0.0404,
      "step": 223920
    },
    {
      "epoch": 0.3664827216014349,
      "grad_norm": 0.4602029621601105,
      "learning_rate": 9.328097098765839e-06,
      "loss": 0.0514,
      "step": 223940
    },
    {
      "epoch": 0.36651545204008823,
      "grad_norm": 2.2692971229553223,
      "learning_rate": 9.328031206552322e-06,
      "loss": 0.0472,
      "step": 223960
    },
    {
      "epoch": 0.3665481824787416,
      "grad_norm": 0.9644908308982849,
      "learning_rate": 9.327965314338804e-06,
      "loss": 0.0618,
      "step": 223980
    },
    {
      "epoch": 0.3665809129173949,
      "grad_norm": 2.56282114982605,
      "learning_rate": 9.327899422125288e-06,
      "loss": 0.0638,
      "step": 224000
    },
    {
      "epoch": 0.36661364335604824,
      "grad_norm": 3.9019014835357666,
      "learning_rate": 9.32783352991177e-06,
      "loss": 0.0417,
      "step": 224020
    },
    {
      "epoch": 0.3666463737947016,
      "grad_norm": 0.7873192429542542,
      "learning_rate": 9.327767637698253e-06,
      "loss": 0.0537,
      "step": 224040
    },
    {
      "epoch": 0.3666791042333549,
      "grad_norm": 0.7468077540397644,
      "learning_rate": 9.327701745484737e-06,
      "loss": 0.0463,
      "step": 224060
    },
    {
      "epoch": 0.3667118346720083,
      "grad_norm": 1.0055317878723145,
      "learning_rate": 9.327635853271219e-06,
      "loss": 0.0409,
      "step": 224080
    },
    {
      "epoch": 0.3667445651106616,
      "grad_norm": 3.922703266143799,
      "learning_rate": 9.327569961057702e-06,
      "loss": 0.0488,
      "step": 224100
    },
    {
      "epoch": 0.36677729554931493,
      "grad_norm": 3.9649198055267334,
      "learning_rate": 9.327504068844186e-06,
      "loss": 0.0399,
      "step": 224120
    },
    {
      "epoch": 0.3668100259879683,
      "grad_norm": 1.5677145719528198,
      "learning_rate": 9.327438176630668e-06,
      "loss": 0.0461,
      "step": 224140
    },
    {
      "epoch": 0.3668427564266216,
      "grad_norm": 6.9113945960998535,
      "learning_rate": 9.327372284417152e-06,
      "loss": 0.0421,
      "step": 224160
    },
    {
      "epoch": 0.366875486865275,
      "grad_norm": 2.0348687171936035,
      "learning_rate": 9.327306392203635e-06,
      "loss": 0.0556,
      "step": 224180
    },
    {
      "epoch": 0.3669082173039283,
      "grad_norm": 2.076115846633911,
      "learning_rate": 9.327240499990117e-06,
      "loss": 0.0577,
      "step": 224200
    },
    {
      "epoch": 0.3669409477425816,
      "grad_norm": 0.6403632164001465,
      "learning_rate": 9.3271746077766e-06,
      "loss": 0.0415,
      "step": 224220
    },
    {
      "epoch": 0.366973678181235,
      "grad_norm": 1.0427709817886353,
      "learning_rate": 9.327108715563082e-06,
      "loss": 0.0459,
      "step": 224240
    },
    {
      "epoch": 0.3670064086198883,
      "grad_norm": 5.757593631744385,
      "learning_rate": 9.327042823349566e-06,
      "loss": 0.0485,
      "step": 224260
    },
    {
      "epoch": 0.3670391390585417,
      "grad_norm": 4.096967697143555,
      "learning_rate": 9.326976931136048e-06,
      "loss": 0.0437,
      "step": 224280
    },
    {
      "epoch": 0.367071869497195,
      "grad_norm": 2.1752405166625977,
      "learning_rate": 9.326911038922532e-06,
      "loss": 0.0535,
      "step": 224300
    },
    {
      "epoch": 0.3671045999358483,
      "grad_norm": 1.4897716045379639,
      "learning_rate": 9.326845146709013e-06,
      "loss": 0.0402,
      "step": 224320
    },
    {
      "epoch": 0.3671373303745017,
      "grad_norm": 0.9344280362129211,
      "learning_rate": 9.326779254495497e-06,
      "loss": 0.0595,
      "step": 224340
    },
    {
      "epoch": 0.367170060813155,
      "grad_norm": 1.670982003211975,
      "learning_rate": 9.326713362281979e-06,
      "loss": 0.0484,
      "step": 224360
    },
    {
      "epoch": 0.3672027912518084,
      "grad_norm": 1.3603709936141968,
      "learning_rate": 9.326647470068463e-06,
      "loss": 0.0469,
      "step": 224380
    },
    {
      "epoch": 0.3672355216904617,
      "grad_norm": 1.4413330554962158,
      "learning_rate": 9.326581577854944e-06,
      "loss": 0.0417,
      "step": 224400
    },
    {
      "epoch": 0.367268252129115,
      "grad_norm": 12.420923233032227,
      "learning_rate": 9.326515685641428e-06,
      "loss": 0.0547,
      "step": 224420
    },
    {
      "epoch": 0.3673009825677684,
      "grad_norm": 2.331472158432007,
      "learning_rate": 9.326449793427912e-06,
      "loss": 0.0631,
      "step": 224440
    },
    {
      "epoch": 0.3673337130064217,
      "grad_norm": 2.5428404808044434,
      "learning_rate": 9.326383901214393e-06,
      "loss": 0.0487,
      "step": 224460
    },
    {
      "epoch": 0.3673664434450751,
      "grad_norm": 0.4906970262527466,
      "learning_rate": 9.326318009000877e-06,
      "loss": 0.0581,
      "step": 224480
    },
    {
      "epoch": 0.3673991738837284,
      "grad_norm": 2.2846570014953613,
      "learning_rate": 9.32625211678736e-06,
      "loss": 0.0385,
      "step": 224500
    },
    {
      "epoch": 0.3674319043223817,
      "grad_norm": 1.002395510673523,
      "learning_rate": 9.326186224573843e-06,
      "loss": 0.0495,
      "step": 224520
    },
    {
      "epoch": 0.3674646347610351,
      "grad_norm": 2.043466806411743,
      "learning_rate": 9.326120332360326e-06,
      "loss": 0.0613,
      "step": 224540
    },
    {
      "epoch": 0.3674973651996884,
      "grad_norm": 0.535919189453125,
      "learning_rate": 9.32605444014681e-06,
      "loss": 0.0475,
      "step": 224560
    },
    {
      "epoch": 0.36753009563834177,
      "grad_norm": 1.9558085203170776,
      "learning_rate": 9.325988547933292e-06,
      "loss": 0.0531,
      "step": 224580
    },
    {
      "epoch": 0.3675628260769951,
      "grad_norm": 1.2231868505477905,
      "learning_rate": 9.325922655719775e-06,
      "loss": 0.0587,
      "step": 224600
    },
    {
      "epoch": 0.3675955565156484,
      "grad_norm": 2.358097553253174,
      "learning_rate": 9.325856763506257e-06,
      "loss": 0.0577,
      "step": 224620
    },
    {
      "epoch": 0.3676282869543018,
      "grad_norm": 1.072756290435791,
      "learning_rate": 9.32579087129274e-06,
      "loss": 0.047,
      "step": 224640
    },
    {
      "epoch": 0.3676610173929551,
      "grad_norm": 2.658829689025879,
      "learning_rate": 9.325724979079223e-06,
      "loss": 0.0505,
      "step": 224660
    },
    {
      "epoch": 0.36769374783160846,
      "grad_norm": 16.236513137817383,
      "learning_rate": 9.325659086865706e-06,
      "loss": 0.044,
      "step": 224680
    },
    {
      "epoch": 0.3677264782702618,
      "grad_norm": 0.7957823276519775,
      "learning_rate": 9.325593194652188e-06,
      "loss": 0.0407,
      "step": 224700
    },
    {
      "epoch": 0.3677592087089151,
      "grad_norm": 2.5996267795562744,
      "learning_rate": 9.325527302438672e-06,
      "loss": 0.0665,
      "step": 224720
    },
    {
      "epoch": 0.36779193914756847,
      "grad_norm": 2.125884532928467,
      "learning_rate": 9.325461410225154e-06,
      "loss": 0.0458,
      "step": 224740
    },
    {
      "epoch": 0.3678246695862218,
      "grad_norm": 0.2602901756763458,
      "learning_rate": 9.325395518011637e-06,
      "loss": 0.0488,
      "step": 224760
    },
    {
      "epoch": 0.36785740002487516,
      "grad_norm": 2.02620792388916,
      "learning_rate": 9.32532962579812e-06,
      "loss": 0.0499,
      "step": 224780
    },
    {
      "epoch": 0.3678901304635285,
      "grad_norm": 0.5658647418022156,
      "learning_rate": 9.325263733584603e-06,
      "loss": 0.0268,
      "step": 224800
    },
    {
      "epoch": 0.3679228609021818,
      "grad_norm": 1.7163805961608887,
      "learning_rate": 9.325197841371086e-06,
      "loss": 0.052,
      "step": 224820
    },
    {
      "epoch": 0.36795559134083516,
      "grad_norm": 2.152937889099121,
      "learning_rate": 9.325131949157568e-06,
      "loss": 0.0549,
      "step": 224840
    },
    {
      "epoch": 0.3679883217794885,
      "grad_norm": 2.239624261856079,
      "learning_rate": 9.325066056944052e-06,
      "loss": 0.0421,
      "step": 224860
    },
    {
      "epoch": 0.36802105221814185,
      "grad_norm": 2.56528902053833,
      "learning_rate": 9.325000164730534e-06,
      "loss": 0.0492,
      "step": 224880
    },
    {
      "epoch": 0.36805378265679517,
      "grad_norm": 2.7338712215423584,
      "learning_rate": 9.324934272517017e-06,
      "loss": 0.0582,
      "step": 224900
    },
    {
      "epoch": 0.3680865130954485,
      "grad_norm": 1.8944463729858398,
      "learning_rate": 9.3248683803035e-06,
      "loss": 0.0505,
      "step": 224920
    },
    {
      "epoch": 0.36811924353410186,
      "grad_norm": 1.0026464462280273,
      "learning_rate": 9.324802488089983e-06,
      "loss": 0.0547,
      "step": 224940
    },
    {
      "epoch": 0.3681519739727552,
      "grad_norm": 1.0026799440383911,
      "learning_rate": 9.324736595876466e-06,
      "loss": 0.0424,
      "step": 224960
    },
    {
      "epoch": 0.36818470441140855,
      "grad_norm": 1.3370256423950195,
      "learning_rate": 9.32467070366295e-06,
      "loss": 0.0534,
      "step": 224980
    },
    {
      "epoch": 0.36821743485006186,
      "grad_norm": 1.3067047595977783,
      "learning_rate": 9.324604811449432e-06,
      "loss": 0.0535,
      "step": 225000
    },
    {
      "epoch": 0.3682501652887152,
      "grad_norm": 1.8031898736953735,
      "learning_rate": 9.324538919235915e-06,
      "loss": 0.0602,
      "step": 225020
    },
    {
      "epoch": 0.36828289572736855,
      "grad_norm": 0.6427000761032104,
      "learning_rate": 9.324473027022397e-06,
      "loss": 0.0376,
      "step": 225040
    },
    {
      "epoch": 0.36831562616602187,
      "grad_norm": 0.6076961159706116,
      "learning_rate": 9.32440713480888e-06,
      "loss": 0.0459,
      "step": 225060
    },
    {
      "epoch": 0.36834835660467524,
      "grad_norm": 2.582919120788574,
      "learning_rate": 9.324341242595363e-06,
      "loss": 0.0438,
      "step": 225080
    },
    {
      "epoch": 0.36838108704332856,
      "grad_norm": 3.8659327030181885,
      "learning_rate": 9.324275350381846e-06,
      "loss": 0.0516,
      "step": 225100
    },
    {
      "epoch": 0.3684138174819819,
      "grad_norm": 1.0354987382888794,
      "learning_rate": 9.32420945816833e-06,
      "loss": 0.0344,
      "step": 225120
    },
    {
      "epoch": 0.36844654792063525,
      "grad_norm": 2.651505470275879,
      "learning_rate": 9.324143565954812e-06,
      "loss": 0.0388,
      "step": 225140
    },
    {
      "epoch": 0.36847927835928856,
      "grad_norm": 0.5151607394218445,
      "learning_rate": 9.324077673741295e-06,
      "loss": 0.0376,
      "step": 225160
    },
    {
      "epoch": 0.36851200879794194,
      "grad_norm": 2.6341774463653564,
      "learning_rate": 9.324011781527777e-06,
      "loss": 0.0537,
      "step": 225180
    },
    {
      "epoch": 0.36854473923659525,
      "grad_norm": 1.196919322013855,
      "learning_rate": 9.32394588931426e-06,
      "loss": 0.0482,
      "step": 225200
    },
    {
      "epoch": 0.36857746967524857,
      "grad_norm": 1.1966572999954224,
      "learning_rate": 9.323879997100743e-06,
      "loss": 0.038,
      "step": 225220
    },
    {
      "epoch": 0.36861020011390194,
      "grad_norm": 0.7285971641540527,
      "learning_rate": 9.323814104887226e-06,
      "loss": 0.0453,
      "step": 225240
    },
    {
      "epoch": 0.36864293055255526,
      "grad_norm": 1.8921102285385132,
      "learning_rate": 9.323748212673708e-06,
      "loss": 0.0423,
      "step": 225260
    },
    {
      "epoch": 0.36867566099120863,
      "grad_norm": 3.035867691040039,
      "learning_rate": 9.323682320460192e-06,
      "loss": 0.0393,
      "step": 225280
    },
    {
      "epoch": 0.36870839142986195,
      "grad_norm": 2.2589821815490723,
      "learning_rate": 9.323616428246675e-06,
      "loss": 0.0537,
      "step": 225300
    },
    {
      "epoch": 0.36874112186851526,
      "grad_norm": 0.4655095040798187,
      "learning_rate": 9.323550536033157e-06,
      "loss": 0.0482,
      "step": 225320
    },
    {
      "epoch": 0.36877385230716864,
      "grad_norm": 0.8078891634941101,
      "learning_rate": 9.32348464381964e-06,
      "loss": 0.0419,
      "step": 225340
    },
    {
      "epoch": 0.36880658274582195,
      "grad_norm": 4.22221565246582,
      "learning_rate": 9.323418751606124e-06,
      "loss": 0.0587,
      "step": 225360
    },
    {
      "epoch": 0.3688393131844753,
      "grad_norm": 1.3546006679534912,
      "learning_rate": 9.323352859392606e-06,
      "loss": 0.0548,
      "step": 225380
    },
    {
      "epoch": 0.36887204362312864,
      "grad_norm": 3.372845411300659,
      "learning_rate": 9.32328696717909e-06,
      "loss": 0.0427,
      "step": 225400
    },
    {
      "epoch": 0.36890477406178196,
      "grad_norm": 2.5241847038269043,
      "learning_rate": 9.323221074965572e-06,
      "loss": 0.0564,
      "step": 225420
    },
    {
      "epoch": 0.36893750450043533,
      "grad_norm": 1.4777953624725342,
      "learning_rate": 9.323155182752055e-06,
      "loss": 0.0475,
      "step": 225440
    },
    {
      "epoch": 0.36897023493908865,
      "grad_norm": 1.6905090808868408,
      "learning_rate": 9.323089290538537e-06,
      "loss": 0.0693,
      "step": 225460
    },
    {
      "epoch": 0.36900296537774196,
      "grad_norm": 0.6893759965896606,
      "learning_rate": 9.32302339832502e-06,
      "loss": 0.0547,
      "step": 225480
    },
    {
      "epoch": 0.36903569581639534,
      "grad_norm": 2.532231092453003,
      "learning_rate": 9.322957506111504e-06,
      "loss": 0.0387,
      "step": 225500
    },
    {
      "epoch": 0.36906842625504865,
      "grad_norm": 0.8866450190544128,
      "learning_rate": 9.322891613897986e-06,
      "loss": 0.043,
      "step": 225520
    },
    {
      "epoch": 0.369101156693702,
      "grad_norm": 1.6819798946380615,
      "learning_rate": 9.32282572168447e-06,
      "loss": 0.0495,
      "step": 225540
    },
    {
      "epoch": 0.36913388713235534,
      "grad_norm": 2.092618465423584,
      "learning_rate": 9.322759829470952e-06,
      "loss": 0.0552,
      "step": 225560
    },
    {
      "epoch": 0.36916661757100866,
      "grad_norm": 2.712308883666992,
      "learning_rate": 9.322693937257435e-06,
      "loss": 0.0451,
      "step": 225580
    },
    {
      "epoch": 0.36919934800966203,
      "grad_norm": 1.4092684984207153,
      "learning_rate": 9.322628045043917e-06,
      "loss": 0.0579,
      "step": 225600
    },
    {
      "epoch": 0.36923207844831535,
      "grad_norm": 1.2509397268295288,
      "learning_rate": 9.322562152830401e-06,
      "loss": 0.043,
      "step": 225620
    },
    {
      "epoch": 0.3692648088869687,
      "grad_norm": 1.8106955289840698,
      "learning_rate": 9.322496260616883e-06,
      "loss": 0.0653,
      "step": 225640
    },
    {
      "epoch": 0.36929753932562204,
      "grad_norm": 2.8429694175720215,
      "learning_rate": 9.322430368403366e-06,
      "loss": 0.049,
      "step": 225660
    },
    {
      "epoch": 0.36933026976427535,
      "grad_norm": 0.807279646396637,
      "learning_rate": 9.322364476189848e-06,
      "loss": 0.0486,
      "step": 225680
    },
    {
      "epoch": 0.3693630002029287,
      "grad_norm": 0.44342041015625,
      "learning_rate": 9.322298583976332e-06,
      "loss": 0.043,
      "step": 225700
    },
    {
      "epoch": 0.36939573064158204,
      "grad_norm": 1.9213541746139526,
      "learning_rate": 9.322232691762815e-06,
      "loss": 0.066,
      "step": 225720
    },
    {
      "epoch": 0.3694284610802354,
      "grad_norm": 1.0867441892623901,
      "learning_rate": 9.322166799549299e-06,
      "loss": 0.0503,
      "step": 225740
    },
    {
      "epoch": 0.36946119151888873,
      "grad_norm": 1.3352084159851074,
      "learning_rate": 9.322100907335781e-06,
      "loss": 0.0327,
      "step": 225760
    },
    {
      "epoch": 0.36949392195754205,
      "grad_norm": 1.940380573272705,
      "learning_rate": 9.322035015122264e-06,
      "loss": 0.0522,
      "step": 225780
    },
    {
      "epoch": 0.3695266523961954,
      "grad_norm": 1.4516499042510986,
      "learning_rate": 9.321969122908746e-06,
      "loss": 0.0307,
      "step": 225800
    },
    {
      "epoch": 0.36955938283484874,
      "grad_norm": 2.2337722778320312,
      "learning_rate": 9.32190323069523e-06,
      "loss": 0.0391,
      "step": 225820
    },
    {
      "epoch": 0.3695921132735021,
      "grad_norm": 1.568385362625122,
      "learning_rate": 9.321837338481714e-06,
      "loss": 0.0437,
      "step": 225840
    },
    {
      "epoch": 0.3696248437121554,
      "grad_norm": 1.6450148820877075,
      "learning_rate": 9.321771446268195e-06,
      "loss": 0.052,
      "step": 225860
    },
    {
      "epoch": 0.36965757415080874,
      "grad_norm": 1.5587831735610962,
      "learning_rate": 9.321705554054679e-06,
      "loss": 0.0587,
      "step": 225880
    },
    {
      "epoch": 0.3696903045894621,
      "grad_norm": 1.1028581857681274,
      "learning_rate": 9.321639661841161e-06,
      "loss": 0.0439,
      "step": 225900
    },
    {
      "epoch": 0.36972303502811543,
      "grad_norm": 0.6821123361587524,
      "learning_rate": 9.321573769627644e-06,
      "loss": 0.0376,
      "step": 225920
    },
    {
      "epoch": 0.3697557654667688,
      "grad_norm": 0.8089068531990051,
      "learning_rate": 9.321507877414126e-06,
      "loss": 0.0513,
      "step": 225940
    },
    {
      "epoch": 0.3697884959054221,
      "grad_norm": 1.8087571859359741,
      "learning_rate": 9.32144198520061e-06,
      "loss": 0.0401,
      "step": 225960
    },
    {
      "epoch": 0.36982122634407544,
      "grad_norm": 0.23117318749427795,
      "learning_rate": 9.321376092987092e-06,
      "loss": 0.0481,
      "step": 225980
    },
    {
      "epoch": 0.3698539567827288,
      "grad_norm": 2.720181941986084,
      "learning_rate": 9.321310200773575e-06,
      "loss": 0.0568,
      "step": 226000
    },
    {
      "epoch": 0.3698866872213821,
      "grad_norm": 1.0120996236801147,
      "learning_rate": 9.321244308560057e-06,
      "loss": 0.0398,
      "step": 226020
    },
    {
      "epoch": 0.3699194176600355,
      "grad_norm": 0.666162371635437,
      "learning_rate": 9.321178416346541e-06,
      "loss": 0.0489,
      "step": 226040
    },
    {
      "epoch": 0.3699521480986888,
      "grad_norm": 0.8766946792602539,
      "learning_rate": 9.321112524133023e-06,
      "loss": 0.0454,
      "step": 226060
    },
    {
      "epoch": 0.36998487853734213,
      "grad_norm": 1.8034991025924683,
      "learning_rate": 9.321046631919506e-06,
      "loss": 0.0398,
      "step": 226080
    },
    {
      "epoch": 0.3700176089759955,
      "grad_norm": 0.5080320239067078,
      "learning_rate": 9.32098073970599e-06,
      "loss": 0.0435,
      "step": 226100
    },
    {
      "epoch": 0.3700503394146488,
      "grad_norm": 0.6520667672157288,
      "learning_rate": 9.320914847492472e-06,
      "loss": 0.0467,
      "step": 226120
    },
    {
      "epoch": 0.3700830698533022,
      "grad_norm": 4.024451732635498,
      "learning_rate": 9.320848955278955e-06,
      "loss": 0.0414,
      "step": 226140
    },
    {
      "epoch": 0.3701158002919555,
      "grad_norm": 1.9170498847961426,
      "learning_rate": 9.320783063065439e-06,
      "loss": 0.05,
      "step": 226160
    },
    {
      "epoch": 0.3701485307306088,
      "grad_norm": 1.3631552457809448,
      "learning_rate": 9.320717170851921e-06,
      "loss": 0.026,
      "step": 226180
    },
    {
      "epoch": 0.3701812611692622,
      "grad_norm": 2.5233395099639893,
      "learning_rate": 9.320651278638405e-06,
      "loss": 0.0462,
      "step": 226200
    },
    {
      "epoch": 0.3702139916079155,
      "grad_norm": 2.2961227893829346,
      "learning_rate": 9.320585386424888e-06,
      "loss": 0.0372,
      "step": 226220
    },
    {
      "epoch": 0.3702467220465689,
      "grad_norm": 6.6112165451049805,
      "learning_rate": 9.32051949421137e-06,
      "loss": 0.0505,
      "step": 226240
    },
    {
      "epoch": 0.3702794524852222,
      "grad_norm": 0.8027774095535278,
      "learning_rate": 9.320453601997854e-06,
      "loss": 0.0429,
      "step": 226260
    },
    {
      "epoch": 0.3703121829238755,
      "grad_norm": 1.01347815990448,
      "learning_rate": 9.320387709784336e-06,
      "loss": 0.0527,
      "step": 226280
    },
    {
      "epoch": 0.3703449133625289,
      "grad_norm": 1.4068275690078735,
      "learning_rate": 9.320321817570819e-06,
      "loss": 0.0486,
      "step": 226300
    },
    {
      "epoch": 0.3703776438011822,
      "grad_norm": 1.251084327697754,
      "learning_rate": 9.320255925357301e-06,
      "loss": 0.0597,
      "step": 226320
    },
    {
      "epoch": 0.3704103742398356,
      "grad_norm": 3.0504868030548096,
      "learning_rate": 9.320190033143785e-06,
      "loss": 0.0487,
      "step": 226340
    },
    {
      "epoch": 0.3704431046784889,
      "grad_norm": 4.721763610839844,
      "learning_rate": 9.320124140930266e-06,
      "loss": 0.047,
      "step": 226360
    },
    {
      "epoch": 0.3704758351171422,
      "grad_norm": 1.0434218645095825,
      "learning_rate": 9.32005824871675e-06,
      "loss": 0.0626,
      "step": 226380
    },
    {
      "epoch": 0.3705085655557956,
      "grad_norm": 1.8689748048782349,
      "learning_rate": 9.319992356503232e-06,
      "loss": 0.0437,
      "step": 226400
    },
    {
      "epoch": 0.3705412959944489,
      "grad_norm": 1.1379013061523438,
      "learning_rate": 9.319926464289716e-06,
      "loss": 0.0356,
      "step": 226420
    },
    {
      "epoch": 0.3705740264331023,
      "grad_norm": 3.8550314903259277,
      "learning_rate": 9.319860572076197e-06,
      "loss": 0.054,
      "step": 226440
    },
    {
      "epoch": 0.3706067568717556,
      "grad_norm": 0.8747878074645996,
      "learning_rate": 9.319794679862681e-06,
      "loss": 0.0497,
      "step": 226460
    },
    {
      "epoch": 0.3706394873104089,
      "grad_norm": 3.223853349685669,
      "learning_rate": 9.319728787649165e-06,
      "loss": 0.0574,
      "step": 226480
    },
    {
      "epoch": 0.3706722177490623,
      "grad_norm": 0.9512297511100769,
      "learning_rate": 9.319662895435646e-06,
      "loss": 0.039,
      "step": 226500
    },
    {
      "epoch": 0.3707049481877156,
      "grad_norm": 1.9281293153762817,
      "learning_rate": 9.31959700322213e-06,
      "loss": 0.0502,
      "step": 226520
    },
    {
      "epoch": 0.37073767862636897,
      "grad_norm": 2.356379985809326,
      "learning_rate": 9.319531111008614e-06,
      "loss": 0.0428,
      "step": 226540
    },
    {
      "epoch": 0.3707704090650223,
      "grad_norm": 0.5604202151298523,
      "learning_rate": 9.319465218795096e-06,
      "loss": 0.0414,
      "step": 226560
    },
    {
      "epoch": 0.3708031395036756,
      "grad_norm": 2.277463912963867,
      "learning_rate": 9.319399326581579e-06,
      "loss": 0.0506,
      "step": 226580
    },
    {
      "epoch": 0.370835869942329,
      "grad_norm": 3.6829168796539307,
      "learning_rate": 9.319333434368063e-06,
      "loss": 0.0385,
      "step": 226600
    },
    {
      "epoch": 0.3708686003809823,
      "grad_norm": 3.7665328979492188,
      "learning_rate": 9.319267542154545e-06,
      "loss": 0.0488,
      "step": 226620
    },
    {
      "epoch": 0.37090133081963567,
      "grad_norm": 1.3219342231750488,
      "learning_rate": 9.319201649941028e-06,
      "loss": 0.0499,
      "step": 226640
    },
    {
      "epoch": 0.370934061258289,
      "grad_norm": 1.6053329706192017,
      "learning_rate": 9.31913575772751e-06,
      "loss": 0.0437,
      "step": 226660
    },
    {
      "epoch": 0.3709667916969423,
      "grad_norm": 2.6624269485473633,
      "learning_rate": 9.319069865513994e-06,
      "loss": 0.0474,
      "step": 226680
    },
    {
      "epoch": 0.37099952213559567,
      "grad_norm": 0.3088160455226898,
      "learning_rate": 9.319003973300476e-06,
      "loss": 0.0672,
      "step": 226700
    },
    {
      "epoch": 0.371032252574249,
      "grad_norm": 1.5213878154754639,
      "learning_rate": 9.31893808108696e-06,
      "loss": 0.0424,
      "step": 226720
    },
    {
      "epoch": 0.37106498301290236,
      "grad_norm": 1.8736419677734375,
      "learning_rate": 9.318872188873441e-06,
      "loss": 0.0465,
      "step": 226740
    },
    {
      "epoch": 0.3710977134515557,
      "grad_norm": 3.6968438625335693,
      "learning_rate": 9.318806296659925e-06,
      "loss": 0.0393,
      "step": 226760
    },
    {
      "epoch": 0.371130443890209,
      "grad_norm": 3.085155725479126,
      "learning_rate": 9.318740404446407e-06,
      "loss": 0.0494,
      "step": 226780
    },
    {
      "epoch": 0.37116317432886237,
      "grad_norm": 0.6903036236763,
      "learning_rate": 9.31867451223289e-06,
      "loss": 0.0387,
      "step": 226800
    },
    {
      "epoch": 0.3711959047675157,
      "grad_norm": 0.931659996509552,
      "learning_rate": 9.318608620019372e-06,
      "loss": 0.0406,
      "step": 226820
    },
    {
      "epoch": 0.37122863520616906,
      "grad_norm": 1.6657971143722534,
      "learning_rate": 9.318542727805856e-06,
      "loss": 0.0417,
      "step": 226840
    },
    {
      "epoch": 0.37126136564482237,
      "grad_norm": 1.2984365224838257,
      "learning_rate": 9.318476835592337e-06,
      "loss": 0.0511,
      "step": 226860
    },
    {
      "epoch": 0.3712940960834757,
      "grad_norm": 0.4873727262020111,
      "learning_rate": 9.318410943378821e-06,
      "loss": 0.0434,
      "step": 226880
    },
    {
      "epoch": 0.37132682652212906,
      "grad_norm": 9.07791805267334,
      "learning_rate": 9.318345051165305e-06,
      "loss": 0.0567,
      "step": 226900
    },
    {
      "epoch": 0.3713595569607824,
      "grad_norm": 0.7656258344650269,
      "learning_rate": 9.318279158951787e-06,
      "loss": 0.0473,
      "step": 226920
    },
    {
      "epoch": 0.37139228739943575,
      "grad_norm": 4.447351455688477,
      "learning_rate": 9.31821326673827e-06,
      "loss": 0.0471,
      "step": 226940
    },
    {
      "epoch": 0.37142501783808907,
      "grad_norm": 3.8980913162231445,
      "learning_rate": 9.318147374524754e-06,
      "loss": 0.0379,
      "step": 226960
    },
    {
      "epoch": 0.3714577482767424,
      "grad_norm": 1.2425912618637085,
      "learning_rate": 9.318081482311236e-06,
      "loss": 0.0615,
      "step": 226980
    },
    {
      "epoch": 0.37149047871539576,
      "grad_norm": 3.013584613800049,
      "learning_rate": 9.31801559009772e-06,
      "loss": 0.0405,
      "step": 227000
    },
    {
      "epoch": 0.37152320915404907,
      "grad_norm": 2.0156185626983643,
      "learning_rate": 9.317949697884203e-06,
      "loss": 0.0472,
      "step": 227020
    },
    {
      "epoch": 0.37155593959270244,
      "grad_norm": 1.3494305610656738,
      "learning_rate": 9.317883805670685e-06,
      "loss": 0.05,
      "step": 227040
    },
    {
      "epoch": 0.37158867003135576,
      "grad_norm": 1.703584909439087,
      "learning_rate": 9.317817913457168e-06,
      "loss": 0.0454,
      "step": 227060
    },
    {
      "epoch": 0.3716214004700091,
      "grad_norm": 0.9619463682174683,
      "learning_rate": 9.31775202124365e-06,
      "loss": 0.0468,
      "step": 227080
    },
    {
      "epoch": 0.37165413090866245,
      "grad_norm": 0.35466185212135315,
      "learning_rate": 9.317686129030134e-06,
      "loss": 0.0608,
      "step": 227100
    },
    {
      "epoch": 0.37168686134731577,
      "grad_norm": 0.9178997874259949,
      "learning_rate": 9.317620236816616e-06,
      "loss": 0.0463,
      "step": 227120
    },
    {
      "epoch": 0.37171959178596914,
      "grad_norm": 4.194206714630127,
      "learning_rate": 9.3175543446031e-06,
      "loss": 0.0436,
      "step": 227140
    },
    {
      "epoch": 0.37175232222462246,
      "grad_norm": 1.4697424173355103,
      "learning_rate": 9.317488452389581e-06,
      "loss": 0.0504,
      "step": 227160
    },
    {
      "epoch": 0.3717850526632758,
      "grad_norm": 2.245110511779785,
      "learning_rate": 9.317422560176065e-06,
      "loss": 0.0465,
      "step": 227180
    },
    {
      "epoch": 0.37181778310192914,
      "grad_norm": 0.8982604742050171,
      "learning_rate": 9.317356667962547e-06,
      "loss": 0.0479,
      "step": 227200
    },
    {
      "epoch": 0.37185051354058246,
      "grad_norm": 3.1567671298980713,
      "learning_rate": 9.31729077574903e-06,
      "loss": 0.0542,
      "step": 227220
    },
    {
      "epoch": 0.37188324397923583,
      "grad_norm": 1.7135651111602783,
      "learning_rate": 9.317224883535514e-06,
      "loss": 0.0503,
      "step": 227240
    },
    {
      "epoch": 0.37191597441788915,
      "grad_norm": 2.127030849456787,
      "learning_rate": 9.317158991321996e-06,
      "loss": 0.0445,
      "step": 227260
    },
    {
      "epoch": 0.37194870485654247,
      "grad_norm": 1.1100449562072754,
      "learning_rate": 9.31709309910848e-06,
      "loss": 0.0477,
      "step": 227280
    },
    {
      "epoch": 0.37198143529519584,
      "grad_norm": 2.0238454341888428,
      "learning_rate": 9.317027206894961e-06,
      "loss": 0.0467,
      "step": 227300
    },
    {
      "epoch": 0.37201416573384916,
      "grad_norm": 7.399377346038818,
      "learning_rate": 9.316961314681445e-06,
      "loss": 0.0474,
      "step": 227320
    },
    {
      "epoch": 0.37204689617250253,
      "grad_norm": 2.158965826034546,
      "learning_rate": 9.316895422467928e-06,
      "loss": 0.0432,
      "step": 227340
    },
    {
      "epoch": 0.37207962661115584,
      "grad_norm": 1.202614426612854,
      "learning_rate": 9.31682953025441e-06,
      "loss": 0.0316,
      "step": 227360
    },
    {
      "epoch": 0.37211235704980916,
      "grad_norm": 2.963456630706787,
      "learning_rate": 9.316763638040894e-06,
      "loss": 0.0714,
      "step": 227380
    },
    {
      "epoch": 0.37214508748846253,
      "grad_norm": 0.4291827380657196,
      "learning_rate": 9.316697745827377e-06,
      "loss": 0.0464,
      "step": 227400
    },
    {
      "epoch": 0.37217781792711585,
      "grad_norm": 4.508299350738525,
      "learning_rate": 9.31663185361386e-06,
      "loss": 0.0399,
      "step": 227420
    },
    {
      "epoch": 0.3722105483657692,
      "grad_norm": 8.091543197631836,
      "learning_rate": 9.316565961400343e-06,
      "loss": 0.046,
      "step": 227440
    },
    {
      "epoch": 0.37224327880442254,
      "grad_norm": 2.8324971199035645,
      "learning_rate": 9.316500069186825e-06,
      "loss": 0.0531,
      "step": 227460
    },
    {
      "epoch": 0.37227600924307586,
      "grad_norm": 2.1460788249969482,
      "learning_rate": 9.316434176973308e-06,
      "loss": 0.0477,
      "step": 227480
    },
    {
      "epoch": 0.37230873968172923,
      "grad_norm": 2.423116683959961,
      "learning_rate": 9.31636828475979e-06,
      "loss": 0.0405,
      "step": 227500
    },
    {
      "epoch": 0.37234147012038254,
      "grad_norm": 1.400026559829712,
      "learning_rate": 9.316302392546274e-06,
      "loss": 0.0343,
      "step": 227520
    },
    {
      "epoch": 0.3723742005590359,
      "grad_norm": 0.7094480991363525,
      "learning_rate": 9.316236500332756e-06,
      "loss": 0.0482,
      "step": 227540
    },
    {
      "epoch": 0.37240693099768923,
      "grad_norm": 1.4666597843170166,
      "learning_rate": 9.31617060811924e-06,
      "loss": 0.0462,
      "step": 227560
    },
    {
      "epoch": 0.37243966143634255,
      "grad_norm": 0.22313158214092255,
      "learning_rate": 9.316104715905721e-06,
      "loss": 0.027,
      "step": 227580
    },
    {
      "epoch": 0.3724723918749959,
      "grad_norm": 0.713535726070404,
      "learning_rate": 9.316038823692205e-06,
      "loss": 0.0545,
      "step": 227600
    },
    {
      "epoch": 0.37250512231364924,
      "grad_norm": 1.8664655685424805,
      "learning_rate": 9.315972931478688e-06,
      "loss": 0.0529,
      "step": 227620
    },
    {
      "epoch": 0.3725378527523026,
      "grad_norm": 1.3095355033874512,
      "learning_rate": 9.31590703926517e-06,
      "loss": 0.0463,
      "step": 227640
    },
    {
      "epoch": 0.37257058319095593,
      "grad_norm": 2.383622169494629,
      "learning_rate": 9.315841147051654e-06,
      "loss": 0.0527,
      "step": 227660
    },
    {
      "epoch": 0.37260331362960925,
      "grad_norm": 5.147856712341309,
      "learning_rate": 9.315775254838136e-06,
      "loss": 0.0468,
      "step": 227680
    },
    {
      "epoch": 0.3726360440682626,
      "grad_norm": 0.776573896408081,
      "learning_rate": 9.31570936262462e-06,
      "loss": 0.0498,
      "step": 227700
    },
    {
      "epoch": 0.37266877450691593,
      "grad_norm": 1.8524601459503174,
      "learning_rate": 9.315643470411101e-06,
      "loss": 0.0523,
      "step": 227720
    },
    {
      "epoch": 0.3727015049455693,
      "grad_norm": 1.3019330501556396,
      "learning_rate": 9.315577578197585e-06,
      "loss": 0.053,
      "step": 227740
    },
    {
      "epoch": 0.3727342353842226,
      "grad_norm": 2.376878499984741,
      "learning_rate": 9.315511685984068e-06,
      "loss": 0.0507,
      "step": 227760
    },
    {
      "epoch": 0.37276696582287594,
      "grad_norm": 1.1167852878570557,
      "learning_rate": 9.31544579377055e-06,
      "loss": 0.0386,
      "step": 227780
    },
    {
      "epoch": 0.3727996962615293,
      "grad_norm": 5.019084453582764,
      "learning_rate": 9.315379901557034e-06,
      "loss": 0.0561,
      "step": 227800
    },
    {
      "epoch": 0.37283242670018263,
      "grad_norm": 1.881975531578064,
      "learning_rate": 9.315314009343517e-06,
      "loss": 0.0484,
      "step": 227820
    },
    {
      "epoch": 0.372865157138836,
      "grad_norm": 8.315018653869629,
      "learning_rate": 9.31524811713e-06,
      "loss": 0.0467,
      "step": 227840
    },
    {
      "epoch": 0.3728978875774893,
      "grad_norm": 2.9920928478240967,
      "learning_rate": 9.315182224916483e-06,
      "loss": 0.0473,
      "step": 227860
    },
    {
      "epoch": 0.37293061801614263,
      "grad_norm": 2.435950517654419,
      "learning_rate": 9.315116332702965e-06,
      "loss": 0.0546,
      "step": 227880
    },
    {
      "epoch": 0.372963348454796,
      "grad_norm": 1.1569145917892456,
      "learning_rate": 9.315050440489448e-06,
      "loss": 0.0375,
      "step": 227900
    },
    {
      "epoch": 0.3729960788934493,
      "grad_norm": 2.5751771926879883,
      "learning_rate": 9.31498454827593e-06,
      "loss": 0.0546,
      "step": 227920
    },
    {
      "epoch": 0.3730288093321027,
      "grad_norm": 3.8063840866088867,
      "learning_rate": 9.314918656062414e-06,
      "loss": 0.045,
      "step": 227940
    },
    {
      "epoch": 0.373061539770756,
      "grad_norm": 2.306978225708008,
      "learning_rate": 9.314852763848898e-06,
      "loss": 0.0456,
      "step": 227960
    },
    {
      "epoch": 0.37309427020940933,
      "grad_norm": 2.813307762145996,
      "learning_rate": 9.31478687163538e-06,
      "loss": 0.0424,
      "step": 227980
    },
    {
      "epoch": 0.3731270006480627,
      "grad_norm": 1.4381859302520752,
      "learning_rate": 9.314720979421863e-06,
      "loss": 0.0435,
      "step": 228000
    },
    {
      "epoch": 0.373159731086716,
      "grad_norm": 2.3640565872192383,
      "learning_rate": 9.314655087208345e-06,
      "loss": 0.0371,
      "step": 228020
    },
    {
      "epoch": 0.3731924615253694,
      "grad_norm": 1.0779428482055664,
      "learning_rate": 9.314589194994828e-06,
      "loss": 0.0496,
      "step": 228040
    },
    {
      "epoch": 0.3732251919640227,
      "grad_norm": 1.4491395950317383,
      "learning_rate": 9.31452330278131e-06,
      "loss": 0.0447,
      "step": 228060
    },
    {
      "epoch": 0.373257922402676,
      "grad_norm": 1.7374253273010254,
      "learning_rate": 9.314457410567794e-06,
      "loss": 0.0489,
      "step": 228080
    },
    {
      "epoch": 0.3732906528413294,
      "grad_norm": 4.487820625305176,
      "learning_rate": 9.314391518354276e-06,
      "loss": 0.0521,
      "step": 228100
    },
    {
      "epoch": 0.3733233832799827,
      "grad_norm": 3.810094118118286,
      "learning_rate": 9.31432562614076e-06,
      "loss": 0.0539,
      "step": 228120
    },
    {
      "epoch": 0.3733561137186361,
      "grad_norm": 1.8569300174713135,
      "learning_rate": 9.314259733927243e-06,
      "loss": 0.0443,
      "step": 228140
    },
    {
      "epoch": 0.3733888441572894,
      "grad_norm": 2.1522908210754395,
      "learning_rate": 9.314193841713725e-06,
      "loss": 0.065,
      "step": 228160
    },
    {
      "epoch": 0.3734215745959427,
      "grad_norm": 2.31133770942688,
      "learning_rate": 9.314127949500208e-06,
      "loss": 0.0414,
      "step": 228180
    },
    {
      "epoch": 0.3734543050345961,
      "grad_norm": 5.83654260635376,
      "learning_rate": 9.314062057286692e-06,
      "loss": 0.0497,
      "step": 228200
    },
    {
      "epoch": 0.3734870354732494,
      "grad_norm": 3.434467315673828,
      "learning_rate": 9.313996165073174e-06,
      "loss": 0.0504,
      "step": 228220
    },
    {
      "epoch": 0.3735197659119027,
      "grad_norm": 2.4697115421295166,
      "learning_rate": 9.313930272859658e-06,
      "loss": 0.047,
      "step": 228240
    },
    {
      "epoch": 0.3735524963505561,
      "grad_norm": 1.2450569868087769,
      "learning_rate": 9.31386438064614e-06,
      "loss": 0.0561,
      "step": 228260
    },
    {
      "epoch": 0.3735852267892094,
      "grad_norm": 0.5176671743392944,
      "learning_rate": 9.313798488432623e-06,
      "loss": 0.0388,
      "step": 228280
    },
    {
      "epoch": 0.3736179572278628,
      "grad_norm": 2.027888774871826,
      "learning_rate": 9.313732596219107e-06,
      "loss": 0.0386,
      "step": 228300
    },
    {
      "epoch": 0.3736506876665161,
      "grad_norm": 2.8698410987854004,
      "learning_rate": 9.313666704005589e-06,
      "loss": 0.0496,
      "step": 228320
    },
    {
      "epoch": 0.3736834181051694,
      "grad_norm": 1.040806531906128,
      "learning_rate": 9.313600811792072e-06,
      "loss": 0.0438,
      "step": 228340
    },
    {
      "epoch": 0.3737161485438228,
      "grad_norm": 0.32936525344848633,
      "learning_rate": 9.313534919578554e-06,
      "loss": 0.0494,
      "step": 228360
    },
    {
      "epoch": 0.3737488789824761,
      "grad_norm": 1.580588698387146,
      "learning_rate": 9.313469027365038e-06,
      "loss": 0.0584,
      "step": 228380
    },
    {
      "epoch": 0.3737816094211295,
      "grad_norm": 1.443565011024475,
      "learning_rate": 9.31340313515152e-06,
      "loss": 0.0569,
      "step": 228400
    },
    {
      "epoch": 0.3738143398597828,
      "grad_norm": 0.8590284585952759,
      "learning_rate": 9.313337242938003e-06,
      "loss": 0.0457,
      "step": 228420
    },
    {
      "epoch": 0.3738470702984361,
      "grad_norm": 2.166691303253174,
      "learning_rate": 9.313271350724485e-06,
      "loss": 0.0508,
      "step": 228440
    },
    {
      "epoch": 0.3738798007370895,
      "grad_norm": 0.9974655508995056,
      "learning_rate": 9.313205458510969e-06,
      "loss": 0.0491,
      "step": 228460
    },
    {
      "epoch": 0.3739125311757428,
      "grad_norm": 2.4224231243133545,
      "learning_rate": 9.31313956629745e-06,
      "loss": 0.0457,
      "step": 228480
    },
    {
      "epoch": 0.3739452616143962,
      "grad_norm": 0.8681706190109253,
      "learning_rate": 9.313073674083934e-06,
      "loss": 0.0419,
      "step": 228500
    },
    {
      "epoch": 0.3739779920530495,
      "grad_norm": 2.651285409927368,
      "learning_rate": 9.313007781870418e-06,
      "loss": 0.0475,
      "step": 228520
    },
    {
      "epoch": 0.3740107224917028,
      "grad_norm": 1.3390549421310425,
      "learning_rate": 9.3129418896569e-06,
      "loss": 0.0435,
      "step": 228540
    },
    {
      "epoch": 0.3740434529303562,
      "grad_norm": 2.542757511138916,
      "learning_rate": 9.312875997443383e-06,
      "loss": 0.0533,
      "step": 228560
    },
    {
      "epoch": 0.3740761833690095,
      "grad_norm": 5.502256393432617,
      "learning_rate": 9.312810105229867e-06,
      "loss": 0.0516,
      "step": 228580
    },
    {
      "epoch": 0.37410891380766287,
      "grad_norm": 1.8129719495773315,
      "learning_rate": 9.312744213016349e-06,
      "loss": 0.0498,
      "step": 228600
    },
    {
      "epoch": 0.3741416442463162,
      "grad_norm": 0.5910277962684631,
      "learning_rate": 9.312678320802832e-06,
      "loss": 0.0498,
      "step": 228620
    },
    {
      "epoch": 0.3741743746849695,
      "grad_norm": 2.537715435028076,
      "learning_rate": 9.312612428589316e-06,
      "loss": 0.0415,
      "step": 228640
    },
    {
      "epoch": 0.3742071051236229,
      "grad_norm": 3.0866551399230957,
      "learning_rate": 9.312546536375798e-06,
      "loss": 0.046,
      "step": 228660
    },
    {
      "epoch": 0.3742398355622762,
      "grad_norm": 1.095056414604187,
      "learning_rate": 9.312480644162281e-06,
      "loss": 0.0434,
      "step": 228680
    },
    {
      "epoch": 0.37427256600092956,
      "grad_norm": 1.301259160041809,
      "learning_rate": 9.312414751948763e-06,
      "loss": 0.0398,
      "step": 228700
    },
    {
      "epoch": 0.3743052964395829,
      "grad_norm": 0.955838680267334,
      "learning_rate": 9.312348859735247e-06,
      "loss": 0.035,
      "step": 228720
    },
    {
      "epoch": 0.3743380268782362,
      "grad_norm": 0.8685178160667419,
      "learning_rate": 9.312282967521729e-06,
      "loss": 0.0395,
      "step": 228740
    },
    {
      "epoch": 0.37437075731688957,
      "grad_norm": 0.4654392600059509,
      "learning_rate": 9.312217075308212e-06,
      "loss": 0.0591,
      "step": 228760
    },
    {
      "epoch": 0.3744034877555429,
      "grad_norm": 0.38423675298690796,
      "learning_rate": 9.312151183094694e-06,
      "loss": 0.0435,
      "step": 228780
    },
    {
      "epoch": 0.37443621819419626,
      "grad_norm": 2.416842460632324,
      "learning_rate": 9.312085290881178e-06,
      "loss": 0.0404,
      "step": 228800
    },
    {
      "epoch": 0.3744689486328496,
      "grad_norm": 3.404590129852295,
      "learning_rate": 9.31201939866766e-06,
      "loss": 0.0485,
      "step": 228820
    },
    {
      "epoch": 0.3745016790715029,
      "grad_norm": 0.8705503344535828,
      "learning_rate": 9.311953506454143e-06,
      "loss": 0.0313,
      "step": 228840
    },
    {
      "epoch": 0.37453440951015626,
      "grad_norm": 4.762475967407227,
      "learning_rate": 9.311887614240625e-06,
      "loss": 0.0371,
      "step": 228860
    },
    {
      "epoch": 0.3745671399488096,
      "grad_norm": 1.2997558116912842,
      "learning_rate": 9.311821722027109e-06,
      "loss": 0.0523,
      "step": 228880
    },
    {
      "epoch": 0.37459987038746295,
      "grad_norm": 1.2106953859329224,
      "learning_rate": 9.31175582981359e-06,
      "loss": 0.0473,
      "step": 228900
    },
    {
      "epoch": 0.37463260082611627,
      "grad_norm": 0.795723557472229,
      "learning_rate": 9.311689937600074e-06,
      "loss": 0.0461,
      "step": 228920
    },
    {
      "epoch": 0.3746653312647696,
      "grad_norm": 2.477858304977417,
      "learning_rate": 9.311624045386558e-06,
      "loss": 0.0376,
      "step": 228940
    },
    {
      "epoch": 0.37469806170342296,
      "grad_norm": 1.6184784173965454,
      "learning_rate": 9.31155815317304e-06,
      "loss": 0.0591,
      "step": 228960
    },
    {
      "epoch": 0.3747307921420763,
      "grad_norm": 0.4856734275817871,
      "learning_rate": 9.311492260959523e-06,
      "loss": 0.0515,
      "step": 228980
    },
    {
      "epoch": 0.37476352258072965,
      "grad_norm": 1.3878695964813232,
      "learning_rate": 9.311426368746007e-06,
      "loss": 0.0428,
      "step": 229000
    },
    {
      "epoch": 0.37479625301938296,
      "grad_norm": 0.619236171245575,
      "learning_rate": 9.311360476532489e-06,
      "loss": 0.0408,
      "step": 229020
    },
    {
      "epoch": 0.3748289834580363,
      "grad_norm": 2.085766315460205,
      "learning_rate": 9.311294584318972e-06,
      "loss": 0.0389,
      "step": 229040
    },
    {
      "epoch": 0.37486171389668965,
      "grad_norm": 1.319602370262146,
      "learning_rate": 9.311228692105456e-06,
      "loss": 0.0384,
      "step": 229060
    },
    {
      "epoch": 0.37489444433534297,
      "grad_norm": 0.5923739671707153,
      "learning_rate": 9.311162799891938e-06,
      "loss": 0.0476,
      "step": 229080
    },
    {
      "epoch": 0.37492717477399634,
      "grad_norm": 2.8566079139709473,
      "learning_rate": 9.311096907678421e-06,
      "loss": 0.042,
      "step": 229100
    },
    {
      "epoch": 0.37495990521264966,
      "grad_norm": 3.1387479305267334,
      "learning_rate": 9.311031015464903e-06,
      "loss": 0.0507,
      "step": 229120
    },
    {
      "epoch": 0.374992635651303,
      "grad_norm": 0.3121426999568939,
      "learning_rate": 9.310965123251387e-06,
      "loss": 0.0349,
      "step": 229140
    },
    {
      "epoch": 0.37502536608995635,
      "grad_norm": 0.42598026990890503,
      "learning_rate": 9.310899231037869e-06,
      "loss": 0.0516,
      "step": 229160
    },
    {
      "epoch": 0.37505809652860966,
      "grad_norm": 1.1744152307510376,
      "learning_rate": 9.310833338824352e-06,
      "loss": 0.0533,
      "step": 229180
    },
    {
      "epoch": 0.37509082696726304,
      "grad_norm": 1.571793794631958,
      "learning_rate": 9.310767446610834e-06,
      "loss": 0.0432,
      "step": 229200
    },
    {
      "epoch": 0.37512355740591635,
      "grad_norm": 1.2193504571914673,
      "learning_rate": 9.310701554397318e-06,
      "loss": 0.0509,
      "step": 229220
    },
    {
      "epoch": 0.37515628784456967,
      "grad_norm": 2.0387773513793945,
      "learning_rate": 9.3106356621838e-06,
      "loss": 0.0406,
      "step": 229240
    },
    {
      "epoch": 0.37518901828322304,
      "grad_norm": 1.159705638885498,
      "learning_rate": 9.310569769970283e-06,
      "loss": 0.0532,
      "step": 229260
    },
    {
      "epoch": 0.37522174872187636,
      "grad_norm": 1.9710676670074463,
      "learning_rate": 9.310503877756765e-06,
      "loss": 0.0425,
      "step": 229280
    },
    {
      "epoch": 0.37525447916052973,
      "grad_norm": 3.531625986099243,
      "learning_rate": 9.310437985543249e-06,
      "loss": 0.0447,
      "step": 229300
    },
    {
      "epoch": 0.37528720959918305,
      "grad_norm": 1.709465742111206,
      "learning_rate": 9.310372093329732e-06,
      "loss": 0.0466,
      "step": 229320
    },
    {
      "epoch": 0.37531994003783636,
      "grad_norm": 10.562177658081055,
      "learning_rate": 9.310306201116214e-06,
      "loss": 0.0603,
      "step": 229340
    },
    {
      "epoch": 0.37535267047648974,
      "grad_norm": 0.7325516939163208,
      "learning_rate": 9.310240308902698e-06,
      "loss": 0.0523,
      "step": 229360
    },
    {
      "epoch": 0.37538540091514305,
      "grad_norm": 1.6829644441604614,
      "learning_rate": 9.310174416689181e-06,
      "loss": 0.0562,
      "step": 229380
    },
    {
      "epoch": 0.3754181313537964,
      "grad_norm": 1.8553223609924316,
      "learning_rate": 9.310108524475663e-06,
      "loss": 0.0547,
      "step": 229400
    },
    {
      "epoch": 0.37545086179244974,
      "grad_norm": 0.7362462878227234,
      "learning_rate": 9.310042632262147e-06,
      "loss": 0.035,
      "step": 229420
    },
    {
      "epoch": 0.37548359223110306,
      "grad_norm": 0.5870423316955566,
      "learning_rate": 9.30997674004863e-06,
      "loss": 0.0404,
      "step": 229440
    },
    {
      "epoch": 0.37551632266975643,
      "grad_norm": 0.6924596428871155,
      "learning_rate": 9.309910847835112e-06,
      "loss": 0.0462,
      "step": 229460
    },
    {
      "epoch": 0.37554905310840975,
      "grad_norm": 1.0624494552612305,
      "learning_rate": 9.309844955621596e-06,
      "loss": 0.0571,
      "step": 229480
    },
    {
      "epoch": 0.3755817835470631,
      "grad_norm": 0.994644045829773,
      "learning_rate": 9.309779063408078e-06,
      "loss": 0.0421,
      "step": 229500
    },
    {
      "epoch": 0.37561451398571644,
      "grad_norm": 1.9268218278884888,
      "learning_rate": 9.309713171194561e-06,
      "loss": 0.0433,
      "step": 229520
    },
    {
      "epoch": 0.37564724442436975,
      "grad_norm": 0.6224706172943115,
      "learning_rate": 9.309647278981043e-06,
      "loss": 0.0449,
      "step": 229540
    },
    {
      "epoch": 0.3756799748630231,
      "grad_norm": 1.7926594018936157,
      "learning_rate": 9.309581386767527e-06,
      "loss": 0.0611,
      "step": 229560
    },
    {
      "epoch": 0.37571270530167644,
      "grad_norm": 3.294712781906128,
      "learning_rate": 9.309515494554009e-06,
      "loss": 0.0385,
      "step": 229580
    },
    {
      "epoch": 0.3757454357403298,
      "grad_norm": 5.200161457061768,
      "learning_rate": 9.309449602340492e-06,
      "loss": 0.0423,
      "step": 229600
    },
    {
      "epoch": 0.37577816617898313,
      "grad_norm": 0.7202338576316833,
      "learning_rate": 9.309383710126974e-06,
      "loss": 0.0363,
      "step": 229620
    },
    {
      "epoch": 0.37581089661763645,
      "grad_norm": 2.8349924087524414,
      "learning_rate": 9.309317817913458e-06,
      "loss": 0.0338,
      "step": 229640
    },
    {
      "epoch": 0.3758436270562898,
      "grad_norm": 2.1553289890289307,
      "learning_rate": 9.30925192569994e-06,
      "loss": 0.0545,
      "step": 229660
    },
    {
      "epoch": 0.37587635749494314,
      "grad_norm": 6.910693168640137,
      "learning_rate": 9.309186033486423e-06,
      "loss": 0.051,
      "step": 229680
    },
    {
      "epoch": 0.3759090879335965,
      "grad_norm": 0.3975367248058319,
      "learning_rate": 9.309120141272907e-06,
      "loss": 0.0539,
      "step": 229700
    },
    {
      "epoch": 0.3759418183722498,
      "grad_norm": 0.47426512837409973,
      "learning_rate": 9.309054249059389e-06,
      "loss": 0.0415,
      "step": 229720
    },
    {
      "epoch": 0.37597454881090314,
      "grad_norm": 2.79742169380188,
      "learning_rate": 9.308988356845872e-06,
      "loss": 0.0489,
      "step": 229740
    },
    {
      "epoch": 0.3760072792495565,
      "grad_norm": 0.6194547414779663,
      "learning_rate": 9.308922464632354e-06,
      "loss": 0.0505,
      "step": 229760
    },
    {
      "epoch": 0.37604000968820983,
      "grad_norm": 4.984786033630371,
      "learning_rate": 9.308856572418838e-06,
      "loss": 0.06,
      "step": 229780
    },
    {
      "epoch": 0.3760727401268632,
      "grad_norm": 1.7940130233764648,
      "learning_rate": 9.308790680205321e-06,
      "loss": 0.0405,
      "step": 229800
    },
    {
      "epoch": 0.3761054705655165,
      "grad_norm": 3.355245590209961,
      "learning_rate": 9.308724787991803e-06,
      "loss": 0.0417,
      "step": 229820
    },
    {
      "epoch": 0.37613820100416984,
      "grad_norm": 1.3353519439697266,
      "learning_rate": 9.308658895778287e-06,
      "loss": 0.045,
      "step": 229840
    },
    {
      "epoch": 0.3761709314428232,
      "grad_norm": 1.2913899421691895,
      "learning_rate": 9.30859300356477e-06,
      "loss": 0.0513,
      "step": 229860
    },
    {
      "epoch": 0.3762036618814765,
      "grad_norm": 2.1226518154144287,
      "learning_rate": 9.308527111351252e-06,
      "loss": 0.0536,
      "step": 229880
    },
    {
      "epoch": 0.3762363923201299,
      "grad_norm": 1.7737551927566528,
      "learning_rate": 9.308461219137736e-06,
      "loss": 0.0425,
      "step": 229900
    },
    {
      "epoch": 0.3762691227587832,
      "grad_norm": 0.27861514687538147,
      "learning_rate": 9.308395326924218e-06,
      "loss": 0.0279,
      "step": 229920
    },
    {
      "epoch": 0.37630185319743653,
      "grad_norm": 1.06050443649292,
      "learning_rate": 9.308329434710701e-06,
      "loss": 0.0459,
      "step": 229940
    },
    {
      "epoch": 0.3763345836360899,
      "grad_norm": 2.2330446243286133,
      "learning_rate": 9.308263542497183e-06,
      "loss": 0.0516,
      "step": 229960
    },
    {
      "epoch": 0.3763673140747432,
      "grad_norm": 2.381317377090454,
      "learning_rate": 9.308197650283667e-06,
      "loss": 0.0511,
      "step": 229980
    },
    {
      "epoch": 0.3764000445133966,
      "grad_norm": 1.2058205604553223,
      "learning_rate": 9.308131758070149e-06,
      "loss": 0.047,
      "step": 230000
    },
    {
      "epoch": 0.3764327749520499,
      "grad_norm": 0.6275903582572937,
      "learning_rate": 9.308065865856632e-06,
      "loss": 0.0421,
      "step": 230020
    },
    {
      "epoch": 0.3764655053907032,
      "grad_norm": 32.52424240112305,
      "learning_rate": 9.307999973643114e-06,
      "loss": 0.0494,
      "step": 230040
    },
    {
      "epoch": 0.3764982358293566,
      "grad_norm": 1.8153839111328125,
      "learning_rate": 9.307934081429598e-06,
      "loss": 0.0611,
      "step": 230060
    },
    {
      "epoch": 0.3765309662680099,
      "grad_norm": 4.148895263671875,
      "learning_rate": 9.307868189216081e-06,
      "loss": 0.0441,
      "step": 230080
    },
    {
      "epoch": 0.3765636967066633,
      "grad_norm": 0.9139158129692078,
      "learning_rate": 9.307802297002563e-06,
      "loss": 0.0604,
      "step": 230100
    },
    {
      "epoch": 0.3765964271453166,
      "grad_norm": 1.6961718797683716,
      "learning_rate": 9.307736404789047e-06,
      "loss": 0.0537,
      "step": 230120
    },
    {
      "epoch": 0.3766291575839699,
      "grad_norm": 0.3270442485809326,
      "learning_rate": 9.307670512575529e-06,
      "loss": 0.0507,
      "step": 230140
    },
    {
      "epoch": 0.3766618880226233,
      "grad_norm": 2.2180349826812744,
      "learning_rate": 9.307604620362012e-06,
      "loss": 0.0589,
      "step": 230160
    },
    {
      "epoch": 0.3766946184612766,
      "grad_norm": 3.9764506816864014,
      "learning_rate": 9.307538728148496e-06,
      "loss": 0.0377,
      "step": 230180
    },
    {
      "epoch": 0.37672734889993,
      "grad_norm": 1.9917960166931152,
      "learning_rate": 9.307472835934978e-06,
      "loss": 0.0416,
      "step": 230200
    },
    {
      "epoch": 0.3767600793385833,
      "grad_norm": 0.2250155508518219,
      "learning_rate": 9.307406943721461e-06,
      "loss": 0.0357,
      "step": 230220
    },
    {
      "epoch": 0.3767928097772366,
      "grad_norm": 0.9466310739517212,
      "learning_rate": 9.307341051507945e-06,
      "loss": 0.0408,
      "step": 230240
    },
    {
      "epoch": 0.37682554021589,
      "grad_norm": 1.9071065187454224,
      "learning_rate": 9.307275159294427e-06,
      "loss": 0.0331,
      "step": 230260
    },
    {
      "epoch": 0.3768582706545433,
      "grad_norm": 1.2942527532577515,
      "learning_rate": 9.30720926708091e-06,
      "loss": 0.064,
      "step": 230280
    },
    {
      "epoch": 0.3768910010931967,
      "grad_norm": 1.1352237462997437,
      "learning_rate": 9.307143374867392e-06,
      "loss": 0.0405,
      "step": 230300
    },
    {
      "epoch": 0.37692373153185,
      "grad_norm": 1.8706849813461304,
      "learning_rate": 9.307077482653876e-06,
      "loss": 0.053,
      "step": 230320
    },
    {
      "epoch": 0.3769564619705033,
      "grad_norm": 0.5858607888221741,
      "learning_rate": 9.307011590440358e-06,
      "loss": 0.0398,
      "step": 230340
    },
    {
      "epoch": 0.3769891924091567,
      "grad_norm": 1.5260449647903442,
      "learning_rate": 9.306945698226842e-06,
      "loss": 0.0413,
      "step": 230360
    },
    {
      "epoch": 0.37702192284781,
      "grad_norm": 1.010944128036499,
      "learning_rate": 9.306879806013323e-06,
      "loss": 0.0551,
      "step": 230380
    },
    {
      "epoch": 0.37705465328646337,
      "grad_norm": 1.531248688697815,
      "learning_rate": 9.306813913799807e-06,
      "loss": 0.0326,
      "step": 230400
    },
    {
      "epoch": 0.3770873837251167,
      "grad_norm": 3.390489339828491,
      "learning_rate": 9.30674802158629e-06,
      "loss": 0.05,
      "step": 230420
    },
    {
      "epoch": 0.37712011416377,
      "grad_norm": 2.4664530754089355,
      "learning_rate": 9.306682129372772e-06,
      "loss": 0.0412,
      "step": 230440
    },
    {
      "epoch": 0.3771528446024234,
      "grad_norm": 1.346561074256897,
      "learning_rate": 9.306616237159256e-06,
      "loss": 0.0484,
      "step": 230460
    },
    {
      "epoch": 0.3771855750410767,
      "grad_norm": 1.3924998044967651,
      "learning_rate": 9.306550344945738e-06,
      "loss": 0.0491,
      "step": 230480
    },
    {
      "epoch": 0.37721830547973006,
      "grad_norm": 2.0247223377227783,
      "learning_rate": 9.306484452732222e-06,
      "loss": 0.0573,
      "step": 230500
    },
    {
      "epoch": 0.3772510359183834,
      "grad_norm": 1.2912362813949585,
      "learning_rate": 9.306418560518703e-06,
      "loss": 0.0471,
      "step": 230520
    },
    {
      "epoch": 0.3772837663570367,
      "grad_norm": 0.7631546854972839,
      "learning_rate": 9.306352668305187e-06,
      "loss": 0.0585,
      "step": 230540
    },
    {
      "epoch": 0.37731649679569007,
      "grad_norm": 3.027082681655884,
      "learning_rate": 9.306286776091669e-06,
      "loss": 0.0449,
      "step": 230560
    },
    {
      "epoch": 0.3773492272343434,
      "grad_norm": 1.6008332967758179,
      "learning_rate": 9.306220883878153e-06,
      "loss": 0.0341,
      "step": 230580
    },
    {
      "epoch": 0.37738195767299676,
      "grad_norm": 0.45359519124031067,
      "learning_rate": 9.306154991664636e-06,
      "loss": 0.0559,
      "step": 230600
    },
    {
      "epoch": 0.3774146881116501,
      "grad_norm": 1.2787506580352783,
      "learning_rate": 9.306089099451118e-06,
      "loss": 0.0508,
      "step": 230620
    },
    {
      "epoch": 0.3774474185503034,
      "grad_norm": 1.1776882410049438,
      "learning_rate": 9.306023207237602e-06,
      "loss": 0.0429,
      "step": 230640
    },
    {
      "epoch": 0.37748014898895677,
      "grad_norm": 5.491579532623291,
      "learning_rate": 9.305957315024085e-06,
      "loss": 0.0489,
      "step": 230660
    },
    {
      "epoch": 0.3775128794276101,
      "grad_norm": 0.6903786063194275,
      "learning_rate": 9.305891422810567e-06,
      "loss": 0.0459,
      "step": 230680
    },
    {
      "epoch": 0.37754560986626345,
      "grad_norm": 0.5992743372917175,
      "learning_rate": 9.30582553059705e-06,
      "loss": 0.0552,
      "step": 230700
    },
    {
      "epoch": 0.37757834030491677,
      "grad_norm": 2.824918270111084,
      "learning_rate": 9.305759638383533e-06,
      "loss": 0.038,
      "step": 230720
    },
    {
      "epoch": 0.3776110707435701,
      "grad_norm": 1.7541084289550781,
      "learning_rate": 9.305693746170016e-06,
      "loss": 0.0478,
      "step": 230740
    },
    {
      "epoch": 0.37764380118222346,
      "grad_norm": 2.353628158569336,
      "learning_rate": 9.3056278539565e-06,
      "loss": 0.0361,
      "step": 230760
    },
    {
      "epoch": 0.3776765316208768,
      "grad_norm": 0.9317877888679504,
      "learning_rate": 9.305561961742982e-06,
      "loss": 0.0493,
      "step": 230780
    },
    {
      "epoch": 0.37770926205953015,
      "grad_norm": 1.3251298666000366,
      "learning_rate": 9.305496069529465e-06,
      "loss": 0.0393,
      "step": 230800
    },
    {
      "epoch": 0.37774199249818347,
      "grad_norm": 1.9713096618652344,
      "learning_rate": 9.305430177315947e-06,
      "loss": 0.0374,
      "step": 230820
    },
    {
      "epoch": 0.3777747229368368,
      "grad_norm": 1.6500812768936157,
      "learning_rate": 9.30536428510243e-06,
      "loss": 0.0446,
      "step": 230840
    },
    {
      "epoch": 0.37780745337549015,
      "grad_norm": 1.7076201438903809,
      "learning_rate": 9.305298392888913e-06,
      "loss": 0.0622,
      "step": 230860
    },
    {
      "epoch": 0.37784018381414347,
      "grad_norm": 1.783297061920166,
      "learning_rate": 9.305232500675396e-06,
      "loss": 0.0438,
      "step": 230880
    },
    {
      "epoch": 0.37787291425279684,
      "grad_norm": 1.6040242910385132,
      "learning_rate": 9.305166608461878e-06,
      "loss": 0.0379,
      "step": 230900
    },
    {
      "epoch": 0.37790564469145016,
      "grad_norm": 0.8657284379005432,
      "learning_rate": 9.305100716248362e-06,
      "loss": 0.0559,
      "step": 230920
    },
    {
      "epoch": 0.3779383751301035,
      "grad_norm": 2.0022168159484863,
      "learning_rate": 9.305034824034844e-06,
      "loss": 0.0355,
      "step": 230940
    },
    {
      "epoch": 0.37797110556875685,
      "grad_norm": 2.4171719551086426,
      "learning_rate": 9.304968931821327e-06,
      "loss": 0.0475,
      "step": 230960
    },
    {
      "epoch": 0.37800383600741017,
      "grad_norm": 2.3543426990509033,
      "learning_rate": 9.30490303960781e-06,
      "loss": 0.0548,
      "step": 230980
    },
    {
      "epoch": 0.37803656644606354,
      "grad_norm": 0.7659288048744202,
      "learning_rate": 9.304837147394293e-06,
      "loss": 0.0497,
      "step": 231000
    },
    {
      "epoch": 0.37806929688471685,
      "grad_norm": 7.4911885261535645,
      "learning_rate": 9.304771255180776e-06,
      "loss": 0.0497,
      "step": 231020
    },
    {
      "epoch": 0.37810202732337017,
      "grad_norm": 1.4203228950500488,
      "learning_rate": 9.30470536296726e-06,
      "loss": 0.047,
      "step": 231040
    },
    {
      "epoch": 0.37813475776202354,
      "grad_norm": 2.3135464191436768,
      "learning_rate": 9.304639470753742e-06,
      "loss": 0.0449,
      "step": 231060
    },
    {
      "epoch": 0.37816748820067686,
      "grad_norm": 2.0610365867614746,
      "learning_rate": 9.304573578540225e-06,
      "loss": 0.0589,
      "step": 231080
    },
    {
      "epoch": 0.3782002186393302,
      "grad_norm": 0.7595951557159424,
      "learning_rate": 9.304507686326709e-06,
      "loss": 0.0539,
      "step": 231100
    },
    {
      "epoch": 0.37823294907798355,
      "grad_norm": 0.4603279232978821,
      "learning_rate": 9.30444179411319e-06,
      "loss": 0.0459,
      "step": 231120
    },
    {
      "epoch": 0.37826567951663687,
      "grad_norm": 0.9481830596923828,
      "learning_rate": 9.304375901899674e-06,
      "loss": 0.0392,
      "step": 231140
    },
    {
      "epoch": 0.37829840995529024,
      "grad_norm": 2.222579002380371,
      "learning_rate": 9.304310009686156e-06,
      "loss": 0.0439,
      "step": 231160
    },
    {
      "epoch": 0.37833114039394355,
      "grad_norm": 0.9017466902732849,
      "learning_rate": 9.30424411747264e-06,
      "loss": 0.0373,
      "step": 231180
    },
    {
      "epoch": 0.37836387083259687,
      "grad_norm": 3.6328768730163574,
      "learning_rate": 9.304178225259122e-06,
      "loss": 0.0452,
      "step": 231200
    },
    {
      "epoch": 0.37839660127125024,
      "grad_norm": 1.3373901844024658,
      "learning_rate": 9.304112333045605e-06,
      "loss": 0.0466,
      "step": 231220
    },
    {
      "epoch": 0.37842933170990356,
      "grad_norm": 2.0901377201080322,
      "learning_rate": 9.304046440832087e-06,
      "loss": 0.0519,
      "step": 231240
    },
    {
      "epoch": 0.37846206214855693,
      "grad_norm": 1.9815876483917236,
      "learning_rate": 9.30398054861857e-06,
      "loss": 0.0485,
      "step": 231260
    },
    {
      "epoch": 0.37849479258721025,
      "grad_norm": 4.85589599609375,
      "learning_rate": 9.303914656405053e-06,
      "loss": 0.0442,
      "step": 231280
    },
    {
      "epoch": 0.37852752302586357,
      "grad_norm": 2.0038275718688965,
      "learning_rate": 9.303848764191536e-06,
      "loss": 0.0494,
      "step": 231300
    },
    {
      "epoch": 0.37856025346451694,
      "grad_norm": 0.39021024107933044,
      "learning_rate": 9.303782871978018e-06,
      "loss": 0.0475,
      "step": 231320
    },
    {
      "epoch": 0.37859298390317025,
      "grad_norm": 2.169787883758545,
      "learning_rate": 9.303716979764502e-06,
      "loss": 0.0488,
      "step": 231340
    },
    {
      "epoch": 0.3786257143418236,
      "grad_norm": 2.410252571105957,
      "learning_rate": 9.303651087550985e-06,
      "loss": 0.0578,
      "step": 231360
    },
    {
      "epoch": 0.37865844478047694,
      "grad_norm": 1.884465217590332,
      "learning_rate": 9.303585195337467e-06,
      "loss": 0.0501,
      "step": 231380
    },
    {
      "epoch": 0.37869117521913026,
      "grad_norm": 2.3007936477661133,
      "learning_rate": 9.30351930312395e-06,
      "loss": 0.0541,
      "step": 231400
    },
    {
      "epoch": 0.37872390565778363,
      "grad_norm": 0.3683135509490967,
      "learning_rate": 9.303453410910434e-06,
      "loss": 0.0351,
      "step": 231420
    },
    {
      "epoch": 0.37875663609643695,
      "grad_norm": 1.0753931999206543,
      "learning_rate": 9.303387518696916e-06,
      "loss": 0.0302,
      "step": 231440
    },
    {
      "epoch": 0.3787893665350903,
      "grad_norm": 0.3615048825740814,
      "learning_rate": 9.3033216264834e-06,
      "loss": 0.0571,
      "step": 231460
    },
    {
      "epoch": 0.37882209697374364,
      "grad_norm": 2.04593825340271,
      "learning_rate": 9.303255734269883e-06,
      "loss": 0.0504,
      "step": 231480
    },
    {
      "epoch": 0.37885482741239696,
      "grad_norm": 1.5786547660827637,
      "learning_rate": 9.303189842056365e-06,
      "loss": 0.0448,
      "step": 231500
    },
    {
      "epoch": 0.3788875578510503,
      "grad_norm": 0.897991418838501,
      "learning_rate": 9.303123949842849e-06,
      "loss": 0.0377,
      "step": 231520
    },
    {
      "epoch": 0.37892028828970364,
      "grad_norm": 2.6904184818267822,
      "learning_rate": 9.30305805762933e-06,
      "loss": 0.0347,
      "step": 231540
    },
    {
      "epoch": 0.378953018728357,
      "grad_norm": 5.12240743637085,
      "learning_rate": 9.302992165415814e-06,
      "loss": 0.0498,
      "step": 231560
    },
    {
      "epoch": 0.37898574916701033,
      "grad_norm": 1.483952283859253,
      "learning_rate": 9.302926273202296e-06,
      "loss": 0.0458,
      "step": 231580
    },
    {
      "epoch": 0.37901847960566365,
      "grad_norm": 2.5230553150177,
      "learning_rate": 9.30286038098878e-06,
      "loss": 0.0446,
      "step": 231600
    },
    {
      "epoch": 0.379051210044317,
      "grad_norm": 0.8189907073974609,
      "learning_rate": 9.302794488775262e-06,
      "loss": 0.05,
      "step": 231620
    },
    {
      "epoch": 0.37908394048297034,
      "grad_norm": 1.2477933168411255,
      "learning_rate": 9.302728596561745e-06,
      "loss": 0.0575,
      "step": 231640
    },
    {
      "epoch": 0.3791166709216237,
      "grad_norm": 0.6582034826278687,
      "learning_rate": 9.302662704348227e-06,
      "loss": 0.0413,
      "step": 231660
    },
    {
      "epoch": 0.379149401360277,
      "grad_norm": 1.1823421716690063,
      "learning_rate": 9.30259681213471e-06,
      "loss": 0.038,
      "step": 231680
    },
    {
      "epoch": 0.37918213179893034,
      "grad_norm": 2.2648708820343018,
      "learning_rate": 9.302530919921193e-06,
      "loss": 0.058,
      "step": 231700
    },
    {
      "epoch": 0.3792148622375837,
      "grad_norm": 0.26887819170951843,
      "learning_rate": 9.302465027707676e-06,
      "loss": 0.0497,
      "step": 231720
    },
    {
      "epoch": 0.37924759267623703,
      "grad_norm": 1.8971766233444214,
      "learning_rate": 9.302399135494158e-06,
      "loss": 0.043,
      "step": 231740
    },
    {
      "epoch": 0.3792803231148904,
      "grad_norm": 1.176281213760376,
      "learning_rate": 9.302333243280642e-06,
      "loss": 0.0576,
      "step": 231760
    },
    {
      "epoch": 0.3793130535535437,
      "grad_norm": 1.633065938949585,
      "learning_rate": 9.302267351067125e-06,
      "loss": 0.0511,
      "step": 231780
    },
    {
      "epoch": 0.37934578399219704,
      "grad_norm": 6.368252754211426,
      "learning_rate": 9.302201458853607e-06,
      "loss": 0.0414,
      "step": 231800
    },
    {
      "epoch": 0.3793785144308504,
      "grad_norm": 0.4340287148952484,
      "learning_rate": 9.30213556664009e-06,
      "loss": 0.0541,
      "step": 231820
    },
    {
      "epoch": 0.3794112448695037,
      "grad_norm": 2.9070639610290527,
      "learning_rate": 9.302069674426574e-06,
      "loss": 0.0412,
      "step": 231840
    },
    {
      "epoch": 0.3794439753081571,
      "grad_norm": 2.165984630584717,
      "learning_rate": 9.302003782213056e-06,
      "loss": 0.0421,
      "step": 231860
    },
    {
      "epoch": 0.3794767057468104,
      "grad_norm": 1.328263759613037,
      "learning_rate": 9.30193788999954e-06,
      "loss": 0.0483,
      "step": 231880
    },
    {
      "epoch": 0.37950943618546373,
      "grad_norm": 0.5758046507835388,
      "learning_rate": 9.301871997786023e-06,
      "loss": 0.0426,
      "step": 231900
    },
    {
      "epoch": 0.3795421666241171,
      "grad_norm": 0.7902786135673523,
      "learning_rate": 9.301806105572505e-06,
      "loss": 0.0486,
      "step": 231920
    },
    {
      "epoch": 0.3795748970627704,
      "grad_norm": 1.3402886390686035,
      "learning_rate": 9.301740213358989e-06,
      "loss": 0.0498,
      "step": 231940
    },
    {
      "epoch": 0.3796076275014238,
      "grad_norm": 2.061450958251953,
      "learning_rate": 9.301674321145471e-06,
      "loss": 0.054,
      "step": 231960
    },
    {
      "epoch": 0.3796403579400771,
      "grad_norm": 1.7967829704284668,
      "learning_rate": 9.301608428931954e-06,
      "loss": 0.0521,
      "step": 231980
    },
    {
      "epoch": 0.37967308837873043,
      "grad_norm": 1.9401216506958008,
      "learning_rate": 9.301542536718436e-06,
      "loss": 0.0407,
      "step": 232000
    },
    {
      "epoch": 0.3797058188173838,
      "grad_norm": 0.9680435657501221,
      "learning_rate": 9.30147664450492e-06,
      "loss": 0.0391,
      "step": 232020
    },
    {
      "epoch": 0.3797385492560371,
      "grad_norm": 1.208146572113037,
      "learning_rate": 9.301410752291402e-06,
      "loss": 0.0458,
      "step": 232040
    },
    {
      "epoch": 0.3797712796946905,
      "grad_norm": 5.973991394042969,
      "learning_rate": 9.301344860077885e-06,
      "loss": 0.0567,
      "step": 232060
    },
    {
      "epoch": 0.3798040101333438,
      "grad_norm": 2.326728105545044,
      "learning_rate": 9.301278967864367e-06,
      "loss": 0.0544,
      "step": 232080
    },
    {
      "epoch": 0.3798367405719971,
      "grad_norm": 1.6963322162628174,
      "learning_rate": 9.301213075650851e-06,
      "loss": 0.0393,
      "step": 232100
    },
    {
      "epoch": 0.3798694710106505,
      "grad_norm": 2.2109694480895996,
      "learning_rate": 9.301147183437333e-06,
      "loss": 0.0356,
      "step": 232120
    },
    {
      "epoch": 0.3799022014493038,
      "grad_norm": 0.4640136659145355,
      "learning_rate": 9.301081291223816e-06,
      "loss": 0.0519,
      "step": 232140
    },
    {
      "epoch": 0.3799349318879572,
      "grad_norm": 2.569082021713257,
      "learning_rate": 9.3010153990103e-06,
      "loss": 0.0437,
      "step": 232160
    },
    {
      "epoch": 0.3799676623266105,
      "grad_norm": 1.1571450233459473,
      "learning_rate": 9.300949506796782e-06,
      "loss": 0.0418,
      "step": 232180
    },
    {
      "epoch": 0.3800003927652638,
      "grad_norm": 2.683866024017334,
      "learning_rate": 9.300883614583265e-06,
      "loss": 0.0404,
      "step": 232200
    },
    {
      "epoch": 0.3800331232039172,
      "grad_norm": 5.837057590484619,
      "learning_rate": 9.300817722369749e-06,
      "loss": 0.0524,
      "step": 232220
    },
    {
      "epoch": 0.3800658536425705,
      "grad_norm": 0.82820725440979,
      "learning_rate": 9.300751830156231e-06,
      "loss": 0.0374,
      "step": 232240
    },
    {
      "epoch": 0.3800985840812239,
      "grad_norm": 0.8786436915397644,
      "learning_rate": 9.300685937942714e-06,
      "loss": 0.0606,
      "step": 232260
    },
    {
      "epoch": 0.3801313145198772,
      "grad_norm": 0.6842007637023926,
      "learning_rate": 9.300620045729198e-06,
      "loss": 0.0521,
      "step": 232280
    },
    {
      "epoch": 0.3801640449585305,
      "grad_norm": 2.4059994220733643,
      "learning_rate": 9.30055415351568e-06,
      "loss": 0.0501,
      "step": 232300
    },
    {
      "epoch": 0.3801967753971839,
      "grad_norm": 2.1422221660614014,
      "learning_rate": 9.300488261302164e-06,
      "loss": 0.0378,
      "step": 232320
    },
    {
      "epoch": 0.3802295058358372,
      "grad_norm": 2.2352731227874756,
      "learning_rate": 9.300422369088645e-06,
      "loss": 0.0348,
      "step": 232340
    },
    {
      "epoch": 0.3802622362744906,
      "grad_norm": 1.1978728771209717,
      "learning_rate": 9.300356476875129e-06,
      "loss": 0.0548,
      "step": 232360
    },
    {
      "epoch": 0.3802949667131439,
      "grad_norm": 1.1833897829055786,
      "learning_rate": 9.300290584661611e-06,
      "loss": 0.0436,
      "step": 232380
    },
    {
      "epoch": 0.3803276971517972,
      "grad_norm": 1.8755567073822021,
      "learning_rate": 9.300224692448095e-06,
      "loss": 0.0401,
      "step": 232400
    },
    {
      "epoch": 0.3803604275904506,
      "grad_norm": 2.411170721054077,
      "learning_rate": 9.300158800234576e-06,
      "loss": 0.0466,
      "step": 232420
    },
    {
      "epoch": 0.3803931580291039,
      "grad_norm": 1.3114112615585327,
      "learning_rate": 9.30009290802106e-06,
      "loss": 0.0298,
      "step": 232440
    },
    {
      "epoch": 0.38042588846775727,
      "grad_norm": 0.987669050693512,
      "learning_rate": 9.300027015807542e-06,
      "loss": 0.0466,
      "step": 232460
    },
    {
      "epoch": 0.3804586189064106,
      "grad_norm": 1.2363803386688232,
      "learning_rate": 9.299961123594025e-06,
      "loss": 0.0533,
      "step": 232480
    },
    {
      "epoch": 0.3804913493450639,
      "grad_norm": 2.1637120246887207,
      "learning_rate": 9.299895231380507e-06,
      "loss": 0.0415,
      "step": 232500
    },
    {
      "epoch": 0.3805240797837173,
      "grad_norm": 3.0790562629699707,
      "learning_rate": 9.299829339166991e-06,
      "loss": 0.0526,
      "step": 232520
    },
    {
      "epoch": 0.3805568102223706,
      "grad_norm": 0.871272623538971,
      "learning_rate": 9.299763446953475e-06,
      "loss": 0.0434,
      "step": 232540
    },
    {
      "epoch": 0.38058954066102396,
      "grad_norm": 1.4396469593048096,
      "learning_rate": 9.299697554739956e-06,
      "loss": 0.0339,
      "step": 232560
    },
    {
      "epoch": 0.3806222710996773,
      "grad_norm": 2.1046271324157715,
      "learning_rate": 9.29963166252644e-06,
      "loss": 0.0518,
      "step": 232580
    },
    {
      "epoch": 0.3806550015383306,
      "grad_norm": 1.7558841705322266,
      "learning_rate": 9.299565770312922e-06,
      "loss": 0.0388,
      "step": 232600
    },
    {
      "epoch": 0.38068773197698397,
      "grad_norm": 1.5641722679138184,
      "learning_rate": 9.299499878099406e-06,
      "loss": 0.0504,
      "step": 232620
    },
    {
      "epoch": 0.3807204624156373,
      "grad_norm": 3.525266170501709,
      "learning_rate": 9.299433985885889e-06,
      "loss": 0.0386,
      "step": 232640
    },
    {
      "epoch": 0.38075319285429066,
      "grad_norm": 3.186849594116211,
      "learning_rate": 9.299368093672371e-06,
      "loss": 0.0466,
      "step": 232660
    },
    {
      "epoch": 0.380785923292944,
      "grad_norm": 1.8269338607788086,
      "learning_rate": 9.299302201458855e-06,
      "loss": 0.039,
      "step": 232680
    },
    {
      "epoch": 0.3808186537315973,
      "grad_norm": 0.8464685082435608,
      "learning_rate": 9.299236309245338e-06,
      "loss": 0.038,
      "step": 232700
    },
    {
      "epoch": 0.38085138417025066,
      "grad_norm": 0.4694213271141052,
      "learning_rate": 9.29917041703182e-06,
      "loss": 0.0564,
      "step": 232720
    },
    {
      "epoch": 0.380884114608904,
      "grad_norm": 2.803065538406372,
      "learning_rate": 9.299104524818304e-06,
      "loss": 0.0507,
      "step": 232740
    },
    {
      "epoch": 0.38091684504755735,
      "grad_norm": 1.047130823135376,
      "learning_rate": 9.299038632604786e-06,
      "loss": 0.051,
      "step": 232760
    },
    {
      "epoch": 0.38094957548621067,
      "grad_norm": 0.6053031086921692,
      "learning_rate": 9.298972740391269e-06,
      "loss": 0.0561,
      "step": 232780
    },
    {
      "epoch": 0.380982305924864,
      "grad_norm": 2.582759380340576,
      "learning_rate": 9.298906848177751e-06,
      "loss": 0.0742,
      "step": 232800
    },
    {
      "epoch": 0.38101503636351736,
      "grad_norm": 2.293415069580078,
      "learning_rate": 9.298840955964235e-06,
      "loss": 0.0529,
      "step": 232820
    },
    {
      "epoch": 0.3810477668021707,
      "grad_norm": 2.103203058242798,
      "learning_rate": 9.298775063750716e-06,
      "loss": 0.048,
      "step": 232840
    },
    {
      "epoch": 0.38108049724082405,
      "grad_norm": 2.592945098876953,
      "learning_rate": 9.2987091715372e-06,
      "loss": 0.0528,
      "step": 232860
    },
    {
      "epoch": 0.38111322767947736,
      "grad_norm": 0.8128916621208191,
      "learning_rate": 9.298643279323684e-06,
      "loss": 0.0479,
      "step": 232880
    },
    {
      "epoch": 0.3811459581181307,
      "grad_norm": 2.334401845932007,
      "learning_rate": 9.298577387110166e-06,
      "loss": 0.0524,
      "step": 232900
    },
    {
      "epoch": 0.38117868855678405,
      "grad_norm": 0.7048894762992859,
      "learning_rate": 9.298511494896649e-06,
      "loss": 0.0385,
      "step": 232920
    },
    {
      "epoch": 0.38121141899543737,
      "grad_norm": 2.098055839538574,
      "learning_rate": 9.298445602683131e-06,
      "loss": 0.0447,
      "step": 232940
    },
    {
      "epoch": 0.38124414943409074,
      "grad_norm": 1.3492017984390259,
      "learning_rate": 9.298379710469615e-06,
      "loss": 0.0476,
      "step": 232960
    },
    {
      "epoch": 0.38127687987274406,
      "grad_norm": 4.162798881530762,
      "learning_rate": 9.298313818256097e-06,
      "loss": 0.043,
      "step": 232980
    },
    {
      "epoch": 0.3813096103113974,
      "grad_norm": 2.9053642749786377,
      "learning_rate": 9.29824792604258e-06,
      "loss": 0.0552,
      "step": 233000
    },
    {
      "epoch": 0.38134234075005075,
      "grad_norm": 0.7555140256881714,
      "learning_rate": 9.298182033829064e-06,
      "loss": 0.0374,
      "step": 233020
    },
    {
      "epoch": 0.38137507118870406,
      "grad_norm": 3.950279474258423,
      "learning_rate": 9.298116141615546e-06,
      "loss": 0.0447,
      "step": 233040
    },
    {
      "epoch": 0.38140780162735743,
      "grad_norm": 1.3132323026657104,
      "learning_rate": 9.29805024940203e-06,
      "loss": 0.0465,
      "step": 233060
    },
    {
      "epoch": 0.38144053206601075,
      "grad_norm": 2.6034724712371826,
      "learning_rate": 9.297984357188513e-06,
      "loss": 0.0502,
      "step": 233080
    },
    {
      "epoch": 0.38147326250466407,
      "grad_norm": 5.295726776123047,
      "learning_rate": 9.297918464974995e-06,
      "loss": 0.0523,
      "step": 233100
    },
    {
      "epoch": 0.38150599294331744,
      "grad_norm": 1.9740450382232666,
      "learning_rate": 9.297852572761478e-06,
      "loss": 0.0513,
      "step": 233120
    },
    {
      "epoch": 0.38153872338197076,
      "grad_norm": 1.2227051258087158,
      "learning_rate": 9.29778668054796e-06,
      "loss": 0.0507,
      "step": 233140
    },
    {
      "epoch": 0.38157145382062413,
      "grad_norm": 0.7838605046272278,
      "learning_rate": 9.297720788334444e-06,
      "loss": 0.0478,
      "step": 233160
    },
    {
      "epoch": 0.38160418425927745,
      "grad_norm": 1.7630863189697266,
      "learning_rate": 9.297654896120926e-06,
      "loss": 0.0313,
      "step": 233180
    },
    {
      "epoch": 0.38163691469793076,
      "grad_norm": 1.3700026273727417,
      "learning_rate": 9.29758900390741e-06,
      "loss": 0.0436,
      "step": 233200
    },
    {
      "epoch": 0.38166964513658413,
      "grad_norm": 1.5542479753494263,
      "learning_rate": 9.297523111693893e-06,
      "loss": 0.0423,
      "step": 233220
    },
    {
      "epoch": 0.38170237557523745,
      "grad_norm": 3.687018632888794,
      "learning_rate": 9.297457219480375e-06,
      "loss": 0.0599,
      "step": 233240
    },
    {
      "epoch": 0.3817351060138908,
      "grad_norm": 0.5287010669708252,
      "learning_rate": 9.297391327266858e-06,
      "loss": 0.0564,
      "step": 233260
    },
    {
      "epoch": 0.38176783645254414,
      "grad_norm": 1.1641277074813843,
      "learning_rate": 9.29732543505334e-06,
      "loss": 0.0342,
      "step": 233280
    },
    {
      "epoch": 0.38180056689119746,
      "grad_norm": 1.5896806716918945,
      "learning_rate": 9.297259542839824e-06,
      "loss": 0.0573,
      "step": 233300
    },
    {
      "epoch": 0.38183329732985083,
      "grad_norm": 1.7426207065582275,
      "learning_rate": 9.297193650626306e-06,
      "loss": 0.0428,
      "step": 233320
    },
    {
      "epoch": 0.38186602776850415,
      "grad_norm": 2.916260242462158,
      "learning_rate": 9.29712775841279e-06,
      "loss": 0.0539,
      "step": 233340
    },
    {
      "epoch": 0.3818987582071575,
      "grad_norm": 2.044863224029541,
      "learning_rate": 9.297061866199271e-06,
      "loss": 0.0509,
      "step": 233360
    },
    {
      "epoch": 0.38193148864581083,
      "grad_norm": 11.626672744750977,
      "learning_rate": 9.296995973985755e-06,
      "loss": 0.0268,
      "step": 233380
    },
    {
      "epoch": 0.38196421908446415,
      "grad_norm": 0.7496092319488525,
      "learning_rate": 9.296930081772237e-06,
      "loss": 0.0555,
      "step": 233400
    },
    {
      "epoch": 0.3819969495231175,
      "grad_norm": 2.023353099822998,
      "learning_rate": 9.29686418955872e-06,
      "loss": 0.0438,
      "step": 233420
    },
    {
      "epoch": 0.38202967996177084,
      "grad_norm": 2.928377389907837,
      "learning_rate": 9.296798297345204e-06,
      "loss": 0.044,
      "step": 233440
    },
    {
      "epoch": 0.3820624104004242,
      "grad_norm": 0.37465164065361023,
      "learning_rate": 9.296732405131687e-06,
      "loss": 0.0331,
      "step": 233460
    },
    {
      "epoch": 0.38209514083907753,
      "grad_norm": 5.586175441741943,
      "learning_rate": 9.29666651291817e-06,
      "loss": 0.0511,
      "step": 233480
    },
    {
      "epoch": 0.38212787127773085,
      "grad_norm": 1.6412267684936523,
      "learning_rate": 9.296600620704653e-06,
      "loss": 0.0506,
      "step": 233500
    },
    {
      "epoch": 0.3821606017163842,
      "grad_norm": 1.974104404449463,
      "learning_rate": 9.296534728491135e-06,
      "loss": 0.048,
      "step": 233520
    },
    {
      "epoch": 0.38219333215503754,
      "grad_norm": 1.702796220779419,
      "learning_rate": 9.296468836277618e-06,
      "loss": 0.0417,
      "step": 233540
    },
    {
      "epoch": 0.3822260625936909,
      "grad_norm": 0.24323435127735138,
      "learning_rate": 9.2964029440641e-06,
      "loss": 0.0432,
      "step": 233560
    },
    {
      "epoch": 0.3822587930323442,
      "grad_norm": 1.7430061101913452,
      "learning_rate": 9.296337051850584e-06,
      "loss": 0.0432,
      "step": 233580
    },
    {
      "epoch": 0.38229152347099754,
      "grad_norm": 2.857736349105835,
      "learning_rate": 9.296271159637067e-06,
      "loss": 0.047,
      "step": 233600
    },
    {
      "epoch": 0.3823242539096509,
      "grad_norm": 1.8016964197158813,
      "learning_rate": 9.29620526742355e-06,
      "loss": 0.0421,
      "step": 233620
    },
    {
      "epoch": 0.38235698434830423,
      "grad_norm": 1.835713267326355,
      "learning_rate": 9.296139375210033e-06,
      "loss": 0.0416,
      "step": 233640
    },
    {
      "epoch": 0.3823897147869576,
      "grad_norm": 0.8780317306518555,
      "learning_rate": 9.296073482996515e-06,
      "loss": 0.0541,
      "step": 233660
    },
    {
      "epoch": 0.3824224452256109,
      "grad_norm": 2.052082061767578,
      "learning_rate": 9.296007590782998e-06,
      "loss": 0.049,
      "step": 233680
    },
    {
      "epoch": 0.38245517566426424,
      "grad_norm": 1.3007184267044067,
      "learning_rate": 9.29594169856948e-06,
      "loss": 0.0395,
      "step": 233700
    },
    {
      "epoch": 0.3824879061029176,
      "grad_norm": 2.658799409866333,
      "learning_rate": 9.295875806355964e-06,
      "loss": 0.0399,
      "step": 233720
    },
    {
      "epoch": 0.3825206365415709,
      "grad_norm": 1.7467687129974365,
      "learning_rate": 9.295809914142446e-06,
      "loss": 0.0397,
      "step": 233740
    },
    {
      "epoch": 0.3825533669802243,
      "grad_norm": 1.8745571374893188,
      "learning_rate": 9.29574402192893e-06,
      "loss": 0.0632,
      "step": 233760
    },
    {
      "epoch": 0.3825860974188776,
      "grad_norm": 1.7650375366210938,
      "learning_rate": 9.295678129715411e-06,
      "loss": 0.0412,
      "step": 233780
    },
    {
      "epoch": 0.38261882785753093,
      "grad_norm": 1.8253766298294067,
      "learning_rate": 9.295612237501895e-06,
      "loss": 0.0539,
      "step": 233800
    },
    {
      "epoch": 0.3826515582961843,
      "grad_norm": 1.8264062404632568,
      "learning_rate": 9.295546345288378e-06,
      "loss": 0.0315,
      "step": 233820
    },
    {
      "epoch": 0.3826842887348376,
      "grad_norm": 0.5953019261360168,
      "learning_rate": 9.29548045307486e-06,
      "loss": 0.0573,
      "step": 233840
    },
    {
      "epoch": 0.38271701917349094,
      "grad_norm": 0.906470775604248,
      "learning_rate": 9.295414560861344e-06,
      "loss": 0.0448,
      "step": 233860
    },
    {
      "epoch": 0.3827497496121443,
      "grad_norm": 1.7046762704849243,
      "learning_rate": 9.295348668647827e-06,
      "loss": 0.0428,
      "step": 233880
    },
    {
      "epoch": 0.3827824800507976,
      "grad_norm": 1.374659776687622,
      "learning_rate": 9.29528277643431e-06,
      "loss": 0.0433,
      "step": 233900
    },
    {
      "epoch": 0.382815210489451,
      "grad_norm": Infinity,
      "learning_rate": 9.295216884220793e-06,
      "loss": 0.038,
      "step": 233920
    },
    {
      "epoch": 0.3828479409281043,
      "grad_norm": 2.0168330669403076,
      "learning_rate": 9.295150992007276e-06,
      "loss": 0.0437,
      "step": 233940
    },
    {
      "epoch": 0.38288067136675763,
      "grad_norm": 1.6466553211212158,
      "learning_rate": 9.295085099793758e-06,
      "loss": 0.0473,
      "step": 233960
    },
    {
      "epoch": 0.382913401805411,
      "grad_norm": 0.9443733096122742,
      "learning_rate": 9.295019207580242e-06,
      "loss": 0.0501,
      "step": 233980
    },
    {
      "epoch": 0.3829461322440643,
      "grad_norm": 3.508997917175293,
      "learning_rate": 9.294953315366724e-06,
      "loss": 0.0464,
      "step": 234000
    },
    {
      "epoch": 0.3829788626827177,
      "grad_norm": 1.991581678390503,
      "learning_rate": 9.294887423153207e-06,
      "loss": 0.053,
      "step": 234020
    },
    {
      "epoch": 0.383011593121371,
      "grad_norm": 4.023202896118164,
      "learning_rate": 9.29482153093969e-06,
      "loss": 0.0383,
      "step": 234040
    },
    {
      "epoch": 0.3830443235600243,
      "grad_norm": 1.9954785108566284,
      "learning_rate": 9.294755638726173e-06,
      "loss": 0.0546,
      "step": 234060
    },
    {
      "epoch": 0.3830770539986777,
      "grad_norm": 1.1472278833389282,
      "learning_rate": 9.294689746512655e-06,
      "loss": 0.0357,
      "step": 234080
    },
    {
      "epoch": 0.383109784437331,
      "grad_norm": 0.871740460395813,
      "learning_rate": 9.294623854299138e-06,
      "loss": 0.0534,
      "step": 234100
    },
    {
      "epoch": 0.3831425148759844,
      "grad_norm": 0.34070470929145813,
      "learning_rate": 9.29455796208562e-06,
      "loss": 0.0388,
      "step": 234120
    },
    {
      "epoch": 0.3831752453146377,
      "grad_norm": 3.2701988220214844,
      "learning_rate": 9.294492069872104e-06,
      "loss": 0.0578,
      "step": 234140
    },
    {
      "epoch": 0.383207975753291,
      "grad_norm": 7.277941703796387,
      "learning_rate": 9.294426177658586e-06,
      "loss": 0.0428,
      "step": 234160
    },
    {
      "epoch": 0.3832407061919444,
      "grad_norm": 0.673410177230835,
      "learning_rate": 9.29436028544507e-06,
      "loss": 0.0404,
      "step": 234180
    },
    {
      "epoch": 0.3832734366305977,
      "grad_norm": 0.7281242609024048,
      "learning_rate": 9.294294393231553e-06,
      "loss": 0.0424,
      "step": 234200
    },
    {
      "epoch": 0.3833061670692511,
      "grad_norm": 2.238103151321411,
      "learning_rate": 9.294228501018035e-06,
      "loss": 0.0455,
      "step": 234220
    },
    {
      "epoch": 0.3833388975079044,
      "grad_norm": 8.048355102539062,
      "learning_rate": 9.294162608804518e-06,
      "loss": 0.0519,
      "step": 234240
    },
    {
      "epoch": 0.3833716279465577,
      "grad_norm": 2.3273112773895264,
      "learning_rate": 9.294096716591002e-06,
      "loss": 0.0409,
      "step": 234260
    },
    {
      "epoch": 0.3834043583852111,
      "grad_norm": 0.9278855323791504,
      "learning_rate": 9.294030824377484e-06,
      "loss": 0.0479,
      "step": 234280
    },
    {
      "epoch": 0.3834370888238644,
      "grad_norm": 2.340646982192993,
      "learning_rate": 9.293964932163968e-06,
      "loss": 0.0565,
      "step": 234300
    },
    {
      "epoch": 0.3834698192625178,
      "grad_norm": 1.940735101699829,
      "learning_rate": 9.293899039950451e-06,
      "loss": 0.0498,
      "step": 234320
    },
    {
      "epoch": 0.3835025497011711,
      "grad_norm": 0.9795626997947693,
      "learning_rate": 9.293833147736933e-06,
      "loss": 0.0347,
      "step": 234340
    },
    {
      "epoch": 0.3835352801398244,
      "grad_norm": 3.112828254699707,
      "learning_rate": 9.293767255523417e-06,
      "loss": 0.0598,
      "step": 234360
    },
    {
      "epoch": 0.3835680105784778,
      "grad_norm": 0.7242647409439087,
      "learning_rate": 9.293701363309898e-06,
      "loss": 0.061,
      "step": 234380
    },
    {
      "epoch": 0.3836007410171311,
      "grad_norm": 3.330672264099121,
      "learning_rate": 9.293635471096382e-06,
      "loss": 0.052,
      "step": 234400
    },
    {
      "epoch": 0.38363347145578447,
      "grad_norm": 1.7524040937423706,
      "learning_rate": 9.293569578882864e-06,
      "loss": 0.0404,
      "step": 234420
    },
    {
      "epoch": 0.3836662018944378,
      "grad_norm": 1.2689510583877563,
      "learning_rate": 9.293503686669348e-06,
      "loss": 0.0436,
      "step": 234440
    },
    {
      "epoch": 0.3836989323330911,
      "grad_norm": 1.1129326820373535,
      "learning_rate": 9.29343779445583e-06,
      "loss": 0.0413,
      "step": 234460
    },
    {
      "epoch": 0.3837316627717445,
      "grad_norm": 0.946450412273407,
      "learning_rate": 9.293371902242313e-06,
      "loss": 0.0397,
      "step": 234480
    },
    {
      "epoch": 0.3837643932103978,
      "grad_norm": 0.21992148458957672,
      "learning_rate": 9.293306010028795e-06,
      "loss": 0.0307,
      "step": 234500
    },
    {
      "epoch": 0.38379712364905116,
      "grad_norm": 2.0145647525787354,
      "learning_rate": 9.293240117815278e-06,
      "loss": 0.0443,
      "step": 234520
    },
    {
      "epoch": 0.3838298540877045,
      "grad_norm": 1.0401033163070679,
      "learning_rate": 9.29317422560176e-06,
      "loss": 0.0411,
      "step": 234540
    },
    {
      "epoch": 0.3838625845263578,
      "grad_norm": 2.37391996383667,
      "learning_rate": 9.293108333388244e-06,
      "loss": 0.0383,
      "step": 234560
    },
    {
      "epoch": 0.38389531496501117,
      "grad_norm": 1.812432885169983,
      "learning_rate": 9.293042441174726e-06,
      "loss": 0.0487,
      "step": 234580
    },
    {
      "epoch": 0.3839280454036645,
      "grad_norm": 0.5758847594261169,
      "learning_rate": 9.29297654896121e-06,
      "loss": 0.0657,
      "step": 234600
    },
    {
      "epoch": 0.38396077584231786,
      "grad_norm": 1.7953965663909912,
      "learning_rate": 9.292910656747693e-06,
      "loss": 0.0325,
      "step": 234620
    },
    {
      "epoch": 0.3839935062809712,
      "grad_norm": 1.2433323860168457,
      "learning_rate": 9.292844764534175e-06,
      "loss": 0.0398,
      "step": 234640
    },
    {
      "epoch": 0.3840262367196245,
      "grad_norm": 1.9711995124816895,
      "learning_rate": 9.292778872320659e-06,
      "loss": 0.0538,
      "step": 234660
    },
    {
      "epoch": 0.38405896715827786,
      "grad_norm": 0.6895071268081665,
      "learning_rate": 9.292712980107142e-06,
      "loss": 0.0447,
      "step": 234680
    },
    {
      "epoch": 0.3840916975969312,
      "grad_norm": 1.7760173082351685,
      "learning_rate": 9.292647087893624e-06,
      "loss": 0.049,
      "step": 234700
    },
    {
      "epoch": 0.38412442803558455,
      "grad_norm": 3.6837270259857178,
      "learning_rate": 9.292581195680108e-06,
      "loss": 0.0499,
      "step": 234720
    },
    {
      "epoch": 0.38415715847423787,
      "grad_norm": 2.8016839027404785,
      "learning_rate": 9.292515303466591e-06,
      "loss": 0.045,
      "step": 234740
    },
    {
      "epoch": 0.3841898889128912,
      "grad_norm": 1.7080860137939453,
      "learning_rate": 9.292449411253073e-06,
      "loss": 0.0424,
      "step": 234760
    },
    {
      "epoch": 0.38422261935154456,
      "grad_norm": 0.6789117455482483,
      "learning_rate": 9.292383519039557e-06,
      "loss": 0.0512,
      "step": 234780
    },
    {
      "epoch": 0.3842553497901979,
      "grad_norm": 1.9823979139328003,
      "learning_rate": 9.292317626826039e-06,
      "loss": 0.0517,
      "step": 234800
    },
    {
      "epoch": 0.38428808022885125,
      "grad_norm": 1.892958164215088,
      "learning_rate": 9.292251734612522e-06,
      "loss": 0.0534,
      "step": 234820
    },
    {
      "epoch": 0.38432081066750456,
      "grad_norm": 0.9105638861656189,
      "learning_rate": 9.292185842399004e-06,
      "loss": 0.0301,
      "step": 234840
    },
    {
      "epoch": 0.3843535411061579,
      "grad_norm": 1.6674455404281616,
      "learning_rate": 9.292119950185488e-06,
      "loss": 0.0367,
      "step": 234860
    },
    {
      "epoch": 0.38438627154481125,
      "grad_norm": 1.3468316793441772,
      "learning_rate": 9.29205405797197e-06,
      "loss": 0.0547,
      "step": 234880
    },
    {
      "epoch": 0.38441900198346457,
      "grad_norm": 8.15188217163086,
      "learning_rate": 9.291988165758453e-06,
      "loss": 0.0514,
      "step": 234900
    },
    {
      "epoch": 0.38445173242211794,
      "grad_norm": 1.627510905265808,
      "learning_rate": 9.291922273544935e-06,
      "loss": 0.0458,
      "step": 234920
    },
    {
      "epoch": 0.38448446286077126,
      "grad_norm": 2.803765296936035,
      "learning_rate": 9.291856381331419e-06,
      "loss": 0.0434,
      "step": 234940
    },
    {
      "epoch": 0.3845171932994246,
      "grad_norm": 1.5733420848846436,
      "learning_rate": 9.2917904891179e-06,
      "loss": 0.0598,
      "step": 234960
    },
    {
      "epoch": 0.38454992373807795,
      "grad_norm": 9.874068260192871,
      "learning_rate": 9.291724596904384e-06,
      "loss": 0.0432,
      "step": 234980
    },
    {
      "epoch": 0.38458265417673126,
      "grad_norm": 4.819300651550293,
      "learning_rate": 9.291658704690868e-06,
      "loss": 0.0529,
      "step": 235000
    },
    {
      "epoch": 0.38461538461538464,
      "grad_norm": 1.8915941715240479,
      "learning_rate": 9.29159281247735e-06,
      "loss": 0.0377,
      "step": 235020
    },
    {
      "epoch": 0.38464811505403795,
      "grad_norm": 1.0947725772857666,
      "learning_rate": 9.291526920263833e-06,
      "loss": 0.0478,
      "step": 235040
    },
    {
      "epoch": 0.38468084549269127,
      "grad_norm": 1.8234853744506836,
      "learning_rate": 9.291461028050317e-06,
      "loss": 0.0645,
      "step": 235060
    },
    {
      "epoch": 0.38471357593134464,
      "grad_norm": 1.184916615486145,
      "learning_rate": 9.291395135836799e-06,
      "loss": 0.0545,
      "step": 235080
    },
    {
      "epoch": 0.38474630636999796,
      "grad_norm": 8.153576850891113,
      "learning_rate": 9.291329243623282e-06,
      "loss": 0.0373,
      "step": 235100
    },
    {
      "epoch": 0.38477903680865133,
      "grad_norm": 1.2311140298843384,
      "learning_rate": 9.291263351409766e-06,
      "loss": 0.0528,
      "step": 235120
    },
    {
      "epoch": 0.38481176724730465,
      "grad_norm": 3.456246852874756,
      "learning_rate": 9.291197459196248e-06,
      "loss": 0.0396,
      "step": 235140
    },
    {
      "epoch": 0.38484449768595796,
      "grad_norm": 2.6385133266448975,
      "learning_rate": 9.291131566982731e-06,
      "loss": 0.0403,
      "step": 235160
    },
    {
      "epoch": 0.38487722812461134,
      "grad_norm": 5.325190544128418,
      "learning_rate": 9.291065674769213e-06,
      "loss": 0.0556,
      "step": 235180
    },
    {
      "epoch": 0.38490995856326465,
      "grad_norm": 0.8726911544799805,
      "learning_rate": 9.290999782555697e-06,
      "loss": 0.0465,
      "step": 235200
    },
    {
      "epoch": 0.384942689001918,
      "grad_norm": 0.937376856803894,
      "learning_rate": 9.290933890342179e-06,
      "loss": 0.0464,
      "step": 235220
    },
    {
      "epoch": 0.38497541944057134,
      "grad_norm": 1.2856401205062866,
      "learning_rate": 9.290867998128662e-06,
      "loss": 0.0338,
      "step": 235240
    },
    {
      "epoch": 0.38500814987922466,
      "grad_norm": 3.1356148719787598,
      "learning_rate": 9.290802105915144e-06,
      "loss": 0.0537,
      "step": 235260
    },
    {
      "epoch": 0.38504088031787803,
      "grad_norm": 2.4289472103118896,
      "learning_rate": 9.290736213701628e-06,
      "loss": 0.0467,
      "step": 235280
    },
    {
      "epoch": 0.38507361075653135,
      "grad_norm": 0.9901004433631897,
      "learning_rate": 9.29067032148811e-06,
      "loss": 0.0389,
      "step": 235300
    },
    {
      "epoch": 0.3851063411951847,
      "grad_norm": 0.2741944193840027,
      "learning_rate": 9.290604429274593e-06,
      "loss": 0.0393,
      "step": 235320
    },
    {
      "epoch": 0.38513907163383804,
      "grad_norm": 2.1145288944244385,
      "learning_rate": 9.290538537061077e-06,
      "loss": 0.0372,
      "step": 235340
    },
    {
      "epoch": 0.38517180207249135,
      "grad_norm": 2.1793203353881836,
      "learning_rate": 9.290472644847559e-06,
      "loss": 0.04,
      "step": 235360
    },
    {
      "epoch": 0.3852045325111447,
      "grad_norm": 0.7851155400276184,
      "learning_rate": 9.290406752634042e-06,
      "loss": 0.0353,
      "step": 235380
    },
    {
      "epoch": 0.38523726294979804,
      "grad_norm": 0.3411668837070465,
      "learning_rate": 9.290340860420524e-06,
      "loss": 0.0434,
      "step": 235400
    },
    {
      "epoch": 0.3852699933884514,
      "grad_norm": 1.832818865776062,
      "learning_rate": 9.290274968207008e-06,
      "loss": 0.0489,
      "step": 235420
    },
    {
      "epoch": 0.38530272382710473,
      "grad_norm": 2.097109794616699,
      "learning_rate": 9.29020907599349e-06,
      "loss": 0.0408,
      "step": 235440
    },
    {
      "epoch": 0.38533545426575805,
      "grad_norm": 1.0652780532836914,
      "learning_rate": 9.290143183779973e-06,
      "loss": 0.0523,
      "step": 235460
    },
    {
      "epoch": 0.3853681847044114,
      "grad_norm": 1.6445660591125488,
      "learning_rate": 9.290077291566457e-06,
      "loss": 0.0551,
      "step": 235480
    },
    {
      "epoch": 0.38540091514306474,
      "grad_norm": 0.2155604064464569,
      "learning_rate": 9.290011399352939e-06,
      "loss": 0.0412,
      "step": 235500
    },
    {
      "epoch": 0.3854336455817181,
      "grad_norm": 1.5597236156463623,
      "learning_rate": 9.289945507139422e-06,
      "loss": 0.0414,
      "step": 235520
    },
    {
      "epoch": 0.3854663760203714,
      "grad_norm": 2.544844388961792,
      "learning_rate": 9.289879614925906e-06,
      "loss": 0.0367,
      "step": 235540
    },
    {
      "epoch": 0.38549910645902474,
      "grad_norm": 2.0581114292144775,
      "learning_rate": 9.289813722712388e-06,
      "loss": 0.0496,
      "step": 235560
    },
    {
      "epoch": 0.3855318368976781,
      "grad_norm": 7.124310493469238,
      "learning_rate": 9.289747830498871e-06,
      "loss": 0.0606,
      "step": 235580
    },
    {
      "epoch": 0.38556456733633143,
      "grad_norm": 0.8000308275222778,
      "learning_rate": 9.289681938285353e-06,
      "loss": 0.0412,
      "step": 235600
    },
    {
      "epoch": 0.3855972977749848,
      "grad_norm": 2.7244064807891846,
      "learning_rate": 9.289616046071837e-06,
      "loss": 0.0454,
      "step": 235620
    },
    {
      "epoch": 0.3856300282136381,
      "grad_norm": 1.6554489135742188,
      "learning_rate": 9.289550153858319e-06,
      "loss": 0.0532,
      "step": 235640
    },
    {
      "epoch": 0.38566275865229144,
      "grad_norm": 0.5071291923522949,
      "learning_rate": 9.289484261644802e-06,
      "loss": 0.044,
      "step": 235660
    },
    {
      "epoch": 0.3856954890909448,
      "grad_norm": 0.8215873837471008,
      "learning_rate": 9.289418369431286e-06,
      "loss": 0.0491,
      "step": 235680
    },
    {
      "epoch": 0.3857282195295981,
      "grad_norm": 4.8239641189575195,
      "learning_rate": 9.289352477217768e-06,
      "loss": 0.0451,
      "step": 235700
    },
    {
      "epoch": 0.3857609499682515,
      "grad_norm": 11.634973526000977,
      "learning_rate": 9.289286585004251e-06,
      "loss": 0.0516,
      "step": 235720
    },
    {
      "epoch": 0.3857936804069048,
      "grad_norm": 2.1944737434387207,
      "learning_rate": 9.289220692790733e-06,
      "loss": 0.0533,
      "step": 235740
    },
    {
      "epoch": 0.38582641084555813,
      "grad_norm": 1.10155189037323,
      "learning_rate": 9.289154800577217e-06,
      "loss": 0.0597,
      "step": 235760
    },
    {
      "epoch": 0.3858591412842115,
      "grad_norm": 1.343842625617981,
      "learning_rate": 9.289088908363699e-06,
      "loss": 0.0327,
      "step": 235780
    },
    {
      "epoch": 0.3858918717228648,
      "grad_norm": 1.6952458620071411,
      "learning_rate": 9.289023016150182e-06,
      "loss": 0.0618,
      "step": 235800
    },
    {
      "epoch": 0.3859246021615182,
      "grad_norm": 1.038584589958191,
      "learning_rate": 9.288957123936664e-06,
      "loss": 0.0519,
      "step": 235820
    },
    {
      "epoch": 0.3859573326001715,
      "grad_norm": 3.3658595085144043,
      "learning_rate": 9.288891231723148e-06,
      "loss": 0.0413,
      "step": 235840
    },
    {
      "epoch": 0.3859900630388248,
      "grad_norm": 1.7435643672943115,
      "learning_rate": 9.288825339509631e-06,
      "loss": 0.034,
      "step": 235860
    },
    {
      "epoch": 0.3860227934774782,
      "grad_norm": 1.9710181951522827,
      "learning_rate": 9.288759447296113e-06,
      "loss": 0.0471,
      "step": 235880
    },
    {
      "epoch": 0.3860555239161315,
      "grad_norm": 1.75723135471344,
      "learning_rate": 9.288693555082597e-06,
      "loss": 0.0515,
      "step": 235900
    },
    {
      "epoch": 0.3860882543547849,
      "grad_norm": 1.206767201423645,
      "learning_rate": 9.28862766286908e-06,
      "loss": 0.0436,
      "step": 235920
    },
    {
      "epoch": 0.3861209847934382,
      "grad_norm": 3.502352714538574,
      "learning_rate": 9.288561770655562e-06,
      "loss": 0.0626,
      "step": 235940
    },
    {
      "epoch": 0.3861537152320915,
      "grad_norm": 1.9782750606536865,
      "learning_rate": 9.288495878442046e-06,
      "loss": 0.0518,
      "step": 235960
    },
    {
      "epoch": 0.3861864456707449,
      "grad_norm": 1.6825875043869019,
      "learning_rate": 9.288429986228528e-06,
      "loss": 0.0475,
      "step": 235980
    },
    {
      "epoch": 0.3862191761093982,
      "grad_norm": 1.646125316619873,
      "learning_rate": 9.288364094015011e-06,
      "loss": 0.0561,
      "step": 236000
    },
    {
      "epoch": 0.3862519065480516,
      "grad_norm": 8.14022159576416,
      "learning_rate": 9.288298201801493e-06,
      "loss": 0.0531,
      "step": 236020
    },
    {
      "epoch": 0.3862846369867049,
      "grad_norm": 1.695083498954773,
      "learning_rate": 9.288232309587977e-06,
      "loss": 0.0522,
      "step": 236040
    },
    {
      "epoch": 0.3863173674253582,
      "grad_norm": 1.3238497972488403,
      "learning_rate": 9.28816641737446e-06,
      "loss": 0.0416,
      "step": 236060
    },
    {
      "epoch": 0.3863500978640116,
      "grad_norm": 2.1827337741851807,
      "learning_rate": 9.288100525160942e-06,
      "loss": 0.0464,
      "step": 236080
    },
    {
      "epoch": 0.3863828283026649,
      "grad_norm": 4.6316399574279785,
      "learning_rate": 9.288034632947426e-06,
      "loss": 0.0501,
      "step": 236100
    },
    {
      "epoch": 0.3864155587413183,
      "grad_norm": 7.242539405822754,
      "learning_rate": 9.287968740733908e-06,
      "loss": 0.0435,
      "step": 236120
    },
    {
      "epoch": 0.3864482891799716,
      "grad_norm": 0.6230118870735168,
      "learning_rate": 9.287902848520391e-06,
      "loss": 0.0515,
      "step": 236140
    },
    {
      "epoch": 0.3864810196186249,
      "grad_norm": 0.38797610998153687,
      "learning_rate": 9.287836956306873e-06,
      "loss": 0.0352,
      "step": 236160
    },
    {
      "epoch": 0.3865137500572783,
      "grad_norm": 2.635140895843506,
      "learning_rate": 9.287771064093357e-06,
      "loss": 0.0484,
      "step": 236180
    },
    {
      "epoch": 0.3865464804959316,
      "grad_norm": 1.2819641828536987,
      "learning_rate": 9.287705171879839e-06,
      "loss": 0.0414,
      "step": 236200
    },
    {
      "epoch": 0.38657921093458497,
      "grad_norm": 2.464165687561035,
      "learning_rate": 9.287639279666322e-06,
      "loss": 0.057,
      "step": 236220
    },
    {
      "epoch": 0.3866119413732383,
      "grad_norm": 0.3004477024078369,
      "learning_rate": 9.287573387452806e-06,
      "loss": 0.0604,
      "step": 236240
    },
    {
      "epoch": 0.3866446718118916,
      "grad_norm": 3.741015911102295,
      "learning_rate": 9.287507495239288e-06,
      "loss": 0.0392,
      "step": 236260
    },
    {
      "epoch": 0.386677402250545,
      "grad_norm": 7.0145344734191895,
      "learning_rate": 9.287441603025771e-06,
      "loss": 0.0388,
      "step": 236280
    },
    {
      "epoch": 0.3867101326891983,
      "grad_norm": 4.256527900695801,
      "learning_rate": 9.287375710812255e-06,
      "loss": 0.0512,
      "step": 236300
    },
    {
      "epoch": 0.38674286312785167,
      "grad_norm": 3.779688835144043,
      "learning_rate": 9.287309818598737e-06,
      "loss": 0.0364,
      "step": 236320
    },
    {
      "epoch": 0.386775593566505,
      "grad_norm": 1.4749935865402222,
      "learning_rate": 9.28724392638522e-06,
      "loss": 0.0325,
      "step": 236340
    },
    {
      "epoch": 0.3868083240051583,
      "grad_norm": 1.8417751789093018,
      "learning_rate": 9.287178034171702e-06,
      "loss": 0.0455,
      "step": 236360
    },
    {
      "epoch": 0.38684105444381167,
      "grad_norm": 5.6949543952941895,
      "learning_rate": 9.287112141958186e-06,
      "loss": 0.05,
      "step": 236380
    },
    {
      "epoch": 0.386873784882465,
      "grad_norm": 0.5398330688476562,
      "learning_rate": 9.28704624974467e-06,
      "loss": 0.0341,
      "step": 236400
    },
    {
      "epoch": 0.38690651532111836,
      "grad_norm": 2.682317018508911,
      "learning_rate": 9.286980357531151e-06,
      "loss": 0.0494,
      "step": 236420
    },
    {
      "epoch": 0.3869392457597717,
      "grad_norm": 0.23731298744678497,
      "learning_rate": 9.286914465317635e-06,
      "loss": 0.0282,
      "step": 236440
    },
    {
      "epoch": 0.386971976198425,
      "grad_norm": 1.9352788925170898,
      "learning_rate": 9.286848573104117e-06,
      "loss": 0.0398,
      "step": 236460
    },
    {
      "epoch": 0.38700470663707837,
      "grad_norm": 2.1925048828125,
      "learning_rate": 9.2867826808906e-06,
      "loss": 0.0448,
      "step": 236480
    },
    {
      "epoch": 0.3870374370757317,
      "grad_norm": 0.5659884214401245,
      "learning_rate": 9.286716788677082e-06,
      "loss": 0.0356,
      "step": 236500
    },
    {
      "epoch": 0.38707016751438506,
      "grad_norm": 0.5754527449607849,
      "learning_rate": 9.286650896463566e-06,
      "loss": 0.032,
      "step": 236520
    },
    {
      "epoch": 0.38710289795303837,
      "grad_norm": 0.5986924171447754,
      "learning_rate": 9.286585004250048e-06,
      "loss": 0.043,
      "step": 236540
    },
    {
      "epoch": 0.3871356283916917,
      "grad_norm": 3.292839765548706,
      "learning_rate": 9.286519112036531e-06,
      "loss": 0.0438,
      "step": 236560
    },
    {
      "epoch": 0.38716835883034506,
      "grad_norm": 3.6779441833496094,
      "learning_rate": 9.286453219823013e-06,
      "loss": 0.0502,
      "step": 236580
    },
    {
      "epoch": 0.3872010892689984,
      "grad_norm": 1.0834760665893555,
      "learning_rate": 9.286387327609497e-06,
      "loss": 0.0434,
      "step": 236600
    },
    {
      "epoch": 0.38723381970765175,
      "grad_norm": 1.77200448513031,
      "learning_rate": 9.286321435395979e-06,
      "loss": 0.0561,
      "step": 236620
    },
    {
      "epoch": 0.38726655014630507,
      "grad_norm": 5.234147071838379,
      "learning_rate": 9.286255543182462e-06,
      "loss": 0.0413,
      "step": 236640
    },
    {
      "epoch": 0.3872992805849584,
      "grad_norm": 1.4165657758712769,
      "learning_rate": 9.286189650968946e-06,
      "loss": 0.0499,
      "step": 236660
    },
    {
      "epoch": 0.38733201102361176,
      "grad_norm": 1.5313714742660522,
      "learning_rate": 9.286123758755428e-06,
      "loss": 0.0467,
      "step": 236680
    },
    {
      "epoch": 0.38736474146226507,
      "grad_norm": 0.7750624418258667,
      "learning_rate": 9.286057866541912e-06,
      "loss": 0.0361,
      "step": 236700
    },
    {
      "epoch": 0.3873974719009184,
      "grad_norm": 10.689647674560547,
      "learning_rate": 9.285991974328395e-06,
      "loss": 0.0505,
      "step": 236720
    },
    {
      "epoch": 0.38743020233957176,
      "grad_norm": 0.9014930129051208,
      "learning_rate": 9.285926082114877e-06,
      "loss": 0.0536,
      "step": 236740
    },
    {
      "epoch": 0.3874629327782251,
      "grad_norm": 1.2749552726745605,
      "learning_rate": 9.28586018990136e-06,
      "loss": 0.0506,
      "step": 236760
    },
    {
      "epoch": 0.38749566321687845,
      "grad_norm": 1.2764151096343994,
      "learning_rate": 9.285794297687844e-06,
      "loss": 0.0419,
      "step": 236780
    },
    {
      "epoch": 0.38752839365553177,
      "grad_norm": 0.5677705407142639,
      "learning_rate": 9.285728405474326e-06,
      "loss": 0.0397,
      "step": 236800
    },
    {
      "epoch": 0.3875611240941851,
      "grad_norm": 5.252017021179199,
      "learning_rate": 9.28566251326081e-06,
      "loss": 0.0617,
      "step": 236820
    },
    {
      "epoch": 0.38759385453283846,
      "grad_norm": 2.3285179138183594,
      "learning_rate": 9.285596621047292e-06,
      "loss": 0.0376,
      "step": 236840
    },
    {
      "epoch": 0.38762658497149177,
      "grad_norm": 1.6624788045883179,
      "learning_rate": 9.285530728833775e-06,
      "loss": 0.0364,
      "step": 236860
    },
    {
      "epoch": 0.38765931541014514,
      "grad_norm": 3.099111318588257,
      "learning_rate": 9.285464836620257e-06,
      "loss": 0.035,
      "step": 236880
    },
    {
      "epoch": 0.38769204584879846,
      "grad_norm": 2.4831700325012207,
      "learning_rate": 9.28539894440674e-06,
      "loss": 0.0389,
      "step": 236900
    },
    {
      "epoch": 0.3877247762874518,
      "grad_norm": 1.7835997343063354,
      "learning_rate": 9.285333052193223e-06,
      "loss": 0.0433,
      "step": 236920
    },
    {
      "epoch": 0.38775750672610515,
      "grad_norm": 4.869556427001953,
      "learning_rate": 9.285267159979706e-06,
      "loss": 0.0454,
      "step": 236940
    },
    {
      "epoch": 0.38779023716475847,
      "grad_norm": 0.7925463914871216,
      "learning_rate": 9.285201267766188e-06,
      "loss": 0.0508,
      "step": 236960
    },
    {
      "epoch": 0.38782296760341184,
      "grad_norm": 4.425661087036133,
      "learning_rate": 9.285135375552672e-06,
      "loss": 0.0444,
      "step": 236980
    },
    {
      "epoch": 0.38785569804206516,
      "grad_norm": 0.5272967219352722,
      "learning_rate": 9.285069483339153e-06,
      "loss": 0.0544,
      "step": 237000
    },
    {
      "epoch": 0.3878884284807185,
      "grad_norm": 1.0589476823806763,
      "learning_rate": 9.285003591125637e-06,
      "loss": 0.0562,
      "step": 237020
    },
    {
      "epoch": 0.38792115891937184,
      "grad_norm": 0.3530995845794678,
      "learning_rate": 9.28493769891212e-06,
      "loss": 0.0383,
      "step": 237040
    },
    {
      "epoch": 0.38795388935802516,
      "grad_norm": 2.4195215702056885,
      "learning_rate": 9.284871806698603e-06,
      "loss": 0.0422,
      "step": 237060
    },
    {
      "epoch": 0.38798661979667853,
      "grad_norm": 1.028147578239441,
      "learning_rate": 9.284805914485086e-06,
      "loss": 0.0464,
      "step": 237080
    },
    {
      "epoch": 0.38801935023533185,
      "grad_norm": 3.674025774002075,
      "learning_rate": 9.28474002227157e-06,
      "loss": 0.0393,
      "step": 237100
    },
    {
      "epoch": 0.38805208067398517,
      "grad_norm": 1.0902700424194336,
      "learning_rate": 9.284674130058052e-06,
      "loss": 0.0449,
      "step": 237120
    },
    {
      "epoch": 0.38808481111263854,
      "grad_norm": 1.7409543991088867,
      "learning_rate": 9.284608237844535e-06,
      "loss": 0.0361,
      "step": 237140
    },
    {
      "epoch": 0.38811754155129186,
      "grad_norm": 1.1163122653961182,
      "learning_rate": 9.284542345631019e-06,
      "loss": 0.0478,
      "step": 237160
    },
    {
      "epoch": 0.38815027198994523,
      "grad_norm": 2.2586729526519775,
      "learning_rate": 9.2844764534175e-06,
      "loss": 0.0538,
      "step": 237180
    },
    {
      "epoch": 0.38818300242859854,
      "grad_norm": 2.455143451690674,
      "learning_rate": 9.284410561203984e-06,
      "loss": 0.0562,
      "step": 237200
    },
    {
      "epoch": 0.38821573286725186,
      "grad_norm": 1.474711298942566,
      "learning_rate": 9.284344668990466e-06,
      "loss": 0.0387,
      "step": 237220
    },
    {
      "epoch": 0.38824846330590523,
      "grad_norm": 1.3462811708450317,
      "learning_rate": 9.28427877677695e-06,
      "loss": 0.0458,
      "step": 237240
    },
    {
      "epoch": 0.38828119374455855,
      "grad_norm": 1.7389121055603027,
      "learning_rate": 9.284212884563432e-06,
      "loss": 0.031,
      "step": 237260
    },
    {
      "epoch": 0.3883139241832119,
      "grad_norm": 0.9702582359313965,
      "learning_rate": 9.284146992349915e-06,
      "loss": 0.0326,
      "step": 237280
    },
    {
      "epoch": 0.38834665462186524,
      "grad_norm": 5.701771259307861,
      "learning_rate": 9.284081100136397e-06,
      "loss": 0.0559,
      "step": 237300
    },
    {
      "epoch": 0.38837938506051856,
      "grad_norm": 1.836112380027771,
      "learning_rate": 9.28401520792288e-06,
      "loss": 0.0341,
      "step": 237320
    },
    {
      "epoch": 0.38841211549917193,
      "grad_norm": 0.9895530939102173,
      "learning_rate": 9.283949315709363e-06,
      "loss": 0.0497,
      "step": 237340
    },
    {
      "epoch": 0.38844484593782524,
      "grad_norm": 1.716817021369934,
      "learning_rate": 9.283883423495846e-06,
      "loss": 0.0581,
      "step": 237360
    },
    {
      "epoch": 0.3884775763764786,
      "grad_norm": 1.107958197593689,
      "learning_rate": 9.283817531282328e-06,
      "loss": 0.038,
      "step": 237380
    },
    {
      "epoch": 0.38851030681513193,
      "grad_norm": 0.8959557414054871,
      "learning_rate": 9.283751639068812e-06,
      "loss": 0.0473,
      "step": 237400
    },
    {
      "epoch": 0.38854303725378525,
      "grad_norm": 3.453324556350708,
      "learning_rate": 9.283685746855294e-06,
      "loss": 0.0422,
      "step": 237420
    },
    {
      "epoch": 0.3885757676924386,
      "grad_norm": 2.878563404083252,
      "learning_rate": 9.283619854641777e-06,
      "loss": 0.0641,
      "step": 237440
    },
    {
      "epoch": 0.38860849813109194,
      "grad_norm": 1.4833205938339233,
      "learning_rate": 9.28355396242826e-06,
      "loss": 0.0557,
      "step": 237460
    },
    {
      "epoch": 0.3886412285697453,
      "grad_norm": 3.2447237968444824,
      "learning_rate": 9.283488070214743e-06,
      "loss": 0.0429,
      "step": 237480
    },
    {
      "epoch": 0.38867395900839863,
      "grad_norm": 1.565324068069458,
      "learning_rate": 9.283422178001226e-06,
      "loss": 0.0528,
      "step": 237500
    },
    {
      "epoch": 0.38870668944705195,
      "grad_norm": 2.429500102996826,
      "learning_rate": 9.28335628578771e-06,
      "loss": 0.0399,
      "step": 237520
    },
    {
      "epoch": 0.3887394198857053,
      "grad_norm": 0.45283687114715576,
      "learning_rate": 9.283290393574192e-06,
      "loss": 0.0445,
      "step": 237540
    },
    {
      "epoch": 0.38877215032435863,
      "grad_norm": 3.7462220191955566,
      "learning_rate": 9.283224501360675e-06,
      "loss": 0.0491,
      "step": 237560
    },
    {
      "epoch": 0.388804880763012,
      "grad_norm": 2.0605156421661377,
      "learning_rate": 9.283158609147159e-06,
      "loss": 0.0462,
      "step": 237580
    },
    {
      "epoch": 0.3888376112016653,
      "grad_norm": 1.7472705841064453,
      "learning_rate": 9.28309271693364e-06,
      "loss": 0.0472,
      "step": 237600
    },
    {
      "epoch": 0.38887034164031864,
      "grad_norm": 1.4726909399032593,
      "learning_rate": 9.283026824720124e-06,
      "loss": 0.0409,
      "step": 237620
    },
    {
      "epoch": 0.388903072078972,
      "grad_norm": 2.307283639907837,
      "learning_rate": 9.282960932506606e-06,
      "loss": 0.0437,
      "step": 237640
    },
    {
      "epoch": 0.38893580251762533,
      "grad_norm": 0.5989604592323303,
      "learning_rate": 9.28289504029309e-06,
      "loss": 0.0453,
      "step": 237660
    },
    {
      "epoch": 0.3889685329562787,
      "grad_norm": 1.6506001949310303,
      "learning_rate": 9.282829148079572e-06,
      "loss": 0.0463,
      "step": 237680
    },
    {
      "epoch": 0.389001263394932,
      "grad_norm": 0.9960416555404663,
      "learning_rate": 9.282763255866055e-06,
      "loss": 0.0319,
      "step": 237700
    },
    {
      "epoch": 0.38903399383358533,
      "grad_norm": 0.7921957969665527,
      "learning_rate": 9.282697363652537e-06,
      "loss": 0.0432,
      "step": 237720
    },
    {
      "epoch": 0.3890667242722387,
      "grad_norm": 1.0761386156082153,
      "learning_rate": 9.28263147143902e-06,
      "loss": 0.0475,
      "step": 237740
    },
    {
      "epoch": 0.389099454710892,
      "grad_norm": 0.9121177196502686,
      "learning_rate": 9.282565579225503e-06,
      "loss": 0.0584,
      "step": 237760
    },
    {
      "epoch": 0.3891321851495454,
      "grad_norm": 1.7144792079925537,
      "learning_rate": 9.282499687011986e-06,
      "loss": 0.0525,
      "step": 237780
    },
    {
      "epoch": 0.3891649155881987,
      "grad_norm": 1.8201311826705933,
      "learning_rate": 9.28243379479847e-06,
      "loss": 0.0454,
      "step": 237800
    },
    {
      "epoch": 0.38919764602685203,
      "grad_norm": 1.2604660987854004,
      "learning_rate": 9.282367902584952e-06,
      "loss": 0.0406,
      "step": 237820
    },
    {
      "epoch": 0.3892303764655054,
      "grad_norm": 2.248605251312256,
      "learning_rate": 9.282302010371435e-06,
      "loss": 0.042,
      "step": 237840
    },
    {
      "epoch": 0.3892631069041587,
      "grad_norm": 0.6197342276573181,
      "learning_rate": 9.282236118157917e-06,
      "loss": 0.0406,
      "step": 237860
    },
    {
      "epoch": 0.3892958373428121,
      "grad_norm": 0.7043376564979553,
      "learning_rate": 9.2821702259444e-06,
      "loss": 0.0482,
      "step": 237880
    },
    {
      "epoch": 0.3893285677814654,
      "grad_norm": 1.6206363439559937,
      "learning_rate": 9.282104333730884e-06,
      "loss": 0.0301,
      "step": 237900
    },
    {
      "epoch": 0.3893612982201187,
      "grad_norm": 1.2202671766281128,
      "learning_rate": 9.282038441517366e-06,
      "loss": 0.0606,
      "step": 237920
    },
    {
      "epoch": 0.3893940286587721,
      "grad_norm": 3.7030677795410156,
      "learning_rate": 9.28197254930385e-06,
      "loss": 0.0359,
      "step": 237940
    },
    {
      "epoch": 0.3894267590974254,
      "grad_norm": 1.2274301052093506,
      "learning_rate": 9.281906657090333e-06,
      "loss": 0.038,
      "step": 237960
    },
    {
      "epoch": 0.3894594895360788,
      "grad_norm": 1.9138375520706177,
      "learning_rate": 9.281840764876815e-06,
      "loss": 0.0367,
      "step": 237980
    },
    {
      "epoch": 0.3894922199747321,
      "grad_norm": 0.5805254578590393,
      "learning_rate": 9.281774872663299e-06,
      "loss": 0.0423,
      "step": 238000
    },
    {
      "epoch": 0.3895249504133854,
      "grad_norm": 0.14217667281627655,
      "learning_rate": 9.28170898044978e-06,
      "loss": 0.0484,
      "step": 238020
    },
    {
      "epoch": 0.3895576808520388,
      "grad_norm": 3.3789474964141846,
      "learning_rate": 9.281643088236264e-06,
      "loss": 0.0467,
      "step": 238040
    },
    {
      "epoch": 0.3895904112906921,
      "grad_norm": 2.6951327323913574,
      "learning_rate": 9.281577196022746e-06,
      "loss": 0.0492,
      "step": 238060
    },
    {
      "epoch": 0.3896231417293455,
      "grad_norm": 0.1724616289138794,
      "learning_rate": 9.28151130380923e-06,
      "loss": 0.0581,
      "step": 238080
    },
    {
      "epoch": 0.3896558721679988,
      "grad_norm": 1.488916039466858,
      "learning_rate": 9.281445411595712e-06,
      "loss": 0.0476,
      "step": 238100
    },
    {
      "epoch": 0.3896886026066521,
      "grad_norm": 1.5048165321350098,
      "learning_rate": 9.281379519382195e-06,
      "loss": 0.0437,
      "step": 238120
    },
    {
      "epoch": 0.3897213330453055,
      "grad_norm": 2.297609806060791,
      "learning_rate": 9.281313627168679e-06,
      "loss": 0.039,
      "step": 238140
    },
    {
      "epoch": 0.3897540634839588,
      "grad_norm": 0.7469054460525513,
      "learning_rate": 9.28124773495516e-06,
      "loss": 0.0397,
      "step": 238160
    },
    {
      "epoch": 0.3897867939226122,
      "grad_norm": 1.2339171171188354,
      "learning_rate": 9.281181842741644e-06,
      "loss": 0.0471,
      "step": 238180
    },
    {
      "epoch": 0.3898195243612655,
      "grad_norm": 1.0999261140823364,
      "learning_rate": 9.281115950528126e-06,
      "loss": 0.0467,
      "step": 238200
    },
    {
      "epoch": 0.3898522547999188,
      "grad_norm": 1.5140656232833862,
      "learning_rate": 9.28105005831461e-06,
      "loss": 0.0539,
      "step": 238220
    },
    {
      "epoch": 0.3898849852385722,
      "grad_norm": 0.889013409614563,
      "learning_rate": 9.280984166101092e-06,
      "loss": 0.0456,
      "step": 238240
    },
    {
      "epoch": 0.3899177156772255,
      "grad_norm": 1.1763696670532227,
      "learning_rate": 9.280918273887575e-06,
      "loss": 0.0361,
      "step": 238260
    },
    {
      "epoch": 0.38995044611587887,
      "grad_norm": 1.3475395441055298,
      "learning_rate": 9.280852381674057e-06,
      "loss": 0.0638,
      "step": 238280
    },
    {
      "epoch": 0.3899831765545322,
      "grad_norm": 2.361711025238037,
      "learning_rate": 9.280786489460541e-06,
      "loss": 0.049,
      "step": 238300
    },
    {
      "epoch": 0.3900159069931855,
      "grad_norm": 2.3948256969451904,
      "learning_rate": 9.280720597247024e-06,
      "loss": 0.048,
      "step": 238320
    },
    {
      "epoch": 0.3900486374318389,
      "grad_norm": 5.09000301361084,
      "learning_rate": 9.280654705033506e-06,
      "loss": 0.0473,
      "step": 238340
    },
    {
      "epoch": 0.3900813678704922,
      "grad_norm": 0.7682133316993713,
      "learning_rate": 9.28058881281999e-06,
      "loss": 0.0477,
      "step": 238360
    },
    {
      "epoch": 0.39011409830914556,
      "grad_norm": 1.4791374206542969,
      "learning_rate": 9.280522920606474e-06,
      "loss": 0.033,
      "step": 238380
    },
    {
      "epoch": 0.3901468287477989,
      "grad_norm": 1.4124289751052856,
      "learning_rate": 9.280457028392955e-06,
      "loss": 0.0438,
      "step": 238400
    },
    {
      "epoch": 0.3901795591864522,
      "grad_norm": 1.176086664199829,
      "learning_rate": 9.280391136179439e-06,
      "loss": 0.0632,
      "step": 238420
    },
    {
      "epoch": 0.39021228962510557,
      "grad_norm": 1.1754674911499023,
      "learning_rate": 9.280325243965921e-06,
      "loss": 0.0655,
      "step": 238440
    },
    {
      "epoch": 0.3902450200637589,
      "grad_norm": 2.2227628231048584,
      "learning_rate": 9.280259351752404e-06,
      "loss": 0.0433,
      "step": 238460
    },
    {
      "epoch": 0.39027775050241226,
      "grad_norm": 2.0752251148223877,
      "learning_rate": 9.280193459538886e-06,
      "loss": 0.0358,
      "step": 238480
    },
    {
      "epoch": 0.3903104809410656,
      "grad_norm": 1.0905624628067017,
      "learning_rate": 9.28012756732537e-06,
      "loss": 0.033,
      "step": 238500
    },
    {
      "epoch": 0.3903432113797189,
      "grad_norm": 4.256949424743652,
      "learning_rate": 9.280061675111854e-06,
      "loss": 0.0349,
      "step": 238520
    },
    {
      "epoch": 0.39037594181837226,
      "grad_norm": 2.3055593967437744,
      "learning_rate": 9.279995782898335e-06,
      "loss": 0.0314,
      "step": 238540
    },
    {
      "epoch": 0.3904086722570256,
      "grad_norm": 0.8286653757095337,
      "learning_rate": 9.279929890684819e-06,
      "loss": 0.0443,
      "step": 238560
    },
    {
      "epoch": 0.39044140269567895,
      "grad_norm": 1.344980239868164,
      "learning_rate": 9.279863998471301e-06,
      "loss": 0.0318,
      "step": 238580
    },
    {
      "epoch": 0.39047413313433227,
      "grad_norm": 2.4263174533843994,
      "learning_rate": 9.279798106257785e-06,
      "loss": 0.0337,
      "step": 238600
    },
    {
      "epoch": 0.3905068635729856,
      "grad_norm": 1.5638837814331055,
      "learning_rate": 9.279732214044266e-06,
      "loss": 0.0373,
      "step": 238620
    },
    {
      "epoch": 0.39053959401163896,
      "grad_norm": 0.9845116138458252,
      "learning_rate": 9.27966632183075e-06,
      "loss": 0.04,
      "step": 238640
    },
    {
      "epoch": 0.3905723244502923,
      "grad_norm": 0.41417819261550903,
      "learning_rate": 9.279600429617232e-06,
      "loss": 0.0473,
      "step": 238660
    },
    {
      "epoch": 0.39060505488894565,
      "grad_norm": 0.9559687972068787,
      "learning_rate": 9.279534537403715e-06,
      "loss": 0.0411,
      "step": 238680
    },
    {
      "epoch": 0.39063778532759896,
      "grad_norm": 1.733981966972351,
      "learning_rate": 9.279468645190199e-06,
      "loss": 0.0481,
      "step": 238700
    },
    {
      "epoch": 0.3906705157662523,
      "grad_norm": 5.112467288970947,
      "learning_rate": 9.279402752976681e-06,
      "loss": 0.0336,
      "step": 238720
    },
    {
      "epoch": 0.39070324620490565,
      "grad_norm": 2.7797999382019043,
      "learning_rate": 9.279336860763165e-06,
      "loss": 0.0386,
      "step": 238740
    },
    {
      "epoch": 0.39073597664355897,
      "grad_norm": 0.6712125539779663,
      "learning_rate": 9.279270968549648e-06,
      "loss": 0.0483,
      "step": 238760
    },
    {
      "epoch": 0.39076870708221234,
      "grad_norm": 1.3895875215530396,
      "learning_rate": 9.27920507633613e-06,
      "loss": 0.0404,
      "step": 238780
    },
    {
      "epoch": 0.39080143752086566,
      "grad_norm": 2.400794744491577,
      "learning_rate": 9.279139184122614e-06,
      "loss": 0.0455,
      "step": 238800
    },
    {
      "epoch": 0.390834167959519,
      "grad_norm": 2.5589499473571777,
      "learning_rate": 9.279073291909095e-06,
      "loss": 0.0377,
      "step": 238820
    },
    {
      "epoch": 0.39086689839817235,
      "grad_norm": 2.5883595943450928,
      "learning_rate": 9.279007399695579e-06,
      "loss": 0.0446,
      "step": 238840
    },
    {
      "epoch": 0.39089962883682566,
      "grad_norm": 1.3380787372589111,
      "learning_rate": 9.278941507482063e-06,
      "loss": 0.0496,
      "step": 238860
    },
    {
      "epoch": 0.39093235927547904,
      "grad_norm": 3.282365083694458,
      "learning_rate": 9.278875615268545e-06,
      "loss": 0.0343,
      "step": 238880
    },
    {
      "epoch": 0.39096508971413235,
      "grad_norm": 2.187521457672119,
      "learning_rate": 9.278809723055028e-06,
      "loss": 0.0537,
      "step": 238900
    },
    {
      "epoch": 0.39099782015278567,
      "grad_norm": 0.7185972929000854,
      "learning_rate": 9.27874383084151e-06,
      "loss": 0.0429,
      "step": 238920
    },
    {
      "epoch": 0.39103055059143904,
      "grad_norm": 2.0616910457611084,
      "learning_rate": 9.278677938627994e-06,
      "loss": 0.0549,
      "step": 238940
    },
    {
      "epoch": 0.39106328103009236,
      "grad_norm": 0.9531471133232117,
      "learning_rate": 9.278612046414476e-06,
      "loss": 0.0416,
      "step": 238960
    },
    {
      "epoch": 0.39109601146874573,
      "grad_norm": 0.49655818939208984,
      "learning_rate": 9.278546154200959e-06,
      "loss": 0.0447,
      "step": 238980
    },
    {
      "epoch": 0.39112874190739905,
      "grad_norm": 1.1866450309753418,
      "learning_rate": 9.278480261987441e-06,
      "loss": 0.0538,
      "step": 239000
    },
    {
      "epoch": 0.39116147234605236,
      "grad_norm": 2.7550230026245117,
      "learning_rate": 9.278414369773925e-06,
      "loss": 0.0535,
      "step": 239020
    },
    {
      "epoch": 0.39119420278470574,
      "grad_norm": 1.5385276079177856,
      "learning_rate": 9.278348477560406e-06,
      "loss": 0.0584,
      "step": 239040
    },
    {
      "epoch": 0.39122693322335905,
      "grad_norm": 1.4946784973144531,
      "learning_rate": 9.27828258534689e-06,
      "loss": 0.0426,
      "step": 239060
    },
    {
      "epoch": 0.3912596636620124,
      "grad_norm": 1.8220453262329102,
      "learning_rate": 9.278216693133374e-06,
      "loss": 0.0451,
      "step": 239080
    },
    {
      "epoch": 0.39129239410066574,
      "grad_norm": 3.2301599979400635,
      "learning_rate": 9.278150800919856e-06,
      "loss": 0.0407,
      "step": 239100
    },
    {
      "epoch": 0.39132512453931906,
      "grad_norm": 1.939796805381775,
      "learning_rate": 9.278084908706339e-06,
      "loss": 0.0409,
      "step": 239120
    },
    {
      "epoch": 0.39135785497797243,
      "grad_norm": 0.49227985739707947,
      "learning_rate": 9.278019016492823e-06,
      "loss": 0.0469,
      "step": 239140
    },
    {
      "epoch": 0.39139058541662575,
      "grad_norm": 1.0703808069229126,
      "learning_rate": 9.277953124279305e-06,
      "loss": 0.0391,
      "step": 239160
    },
    {
      "epoch": 0.3914233158552791,
      "grad_norm": 0.7293673157691956,
      "learning_rate": 9.277887232065788e-06,
      "loss": 0.0391,
      "step": 239180
    },
    {
      "epoch": 0.39145604629393244,
      "grad_norm": 0.6300100088119507,
      "learning_rate": 9.277821339852272e-06,
      "loss": 0.0543,
      "step": 239200
    },
    {
      "epoch": 0.39148877673258575,
      "grad_norm": 3.1117968559265137,
      "learning_rate": 9.277755447638754e-06,
      "loss": 0.0586,
      "step": 239220
    },
    {
      "epoch": 0.3915215071712391,
      "grad_norm": 1.8488919734954834,
      "learning_rate": 9.277689555425237e-06,
      "loss": 0.0515,
      "step": 239240
    },
    {
      "epoch": 0.39155423760989244,
      "grad_norm": 3.4701037406921387,
      "learning_rate": 9.277623663211719e-06,
      "loss": 0.0603,
      "step": 239260
    },
    {
      "epoch": 0.3915869680485458,
      "grad_norm": 1.6658638715744019,
      "learning_rate": 9.277557770998203e-06,
      "loss": 0.0501,
      "step": 239280
    },
    {
      "epoch": 0.39161969848719913,
      "grad_norm": 2.2774972915649414,
      "learning_rate": 9.277491878784685e-06,
      "loss": 0.0283,
      "step": 239300
    },
    {
      "epoch": 0.39165242892585245,
      "grad_norm": 6.103366374969482,
      "learning_rate": 9.277425986571168e-06,
      "loss": 0.0477,
      "step": 239320
    },
    {
      "epoch": 0.3916851593645058,
      "grad_norm": 3.958930015563965,
      "learning_rate": 9.27736009435765e-06,
      "loss": 0.0439,
      "step": 239340
    },
    {
      "epoch": 0.39171788980315914,
      "grad_norm": 1.2708356380462646,
      "learning_rate": 9.277294202144134e-06,
      "loss": 0.0411,
      "step": 239360
    },
    {
      "epoch": 0.3917506202418125,
      "grad_norm": 2.346421480178833,
      "learning_rate": 9.277228309930616e-06,
      "loss": 0.0611,
      "step": 239380
    },
    {
      "epoch": 0.3917833506804658,
      "grad_norm": 0.9722020626068115,
      "learning_rate": 9.2771624177171e-06,
      "loss": 0.0383,
      "step": 239400
    },
    {
      "epoch": 0.39181608111911914,
      "grad_norm": 1.751326084136963,
      "learning_rate": 9.277096525503581e-06,
      "loss": 0.0327,
      "step": 239420
    },
    {
      "epoch": 0.3918488115577725,
      "grad_norm": 1.6246678829193115,
      "learning_rate": 9.277030633290065e-06,
      "loss": 0.0398,
      "step": 239440
    },
    {
      "epoch": 0.39188154199642583,
      "grad_norm": 1.8441025018692017,
      "learning_rate": 9.276964741076547e-06,
      "loss": 0.0431,
      "step": 239460
    },
    {
      "epoch": 0.39191427243507915,
      "grad_norm": 1.8615871667861938,
      "learning_rate": 9.27689884886303e-06,
      "loss": 0.0452,
      "step": 239480
    },
    {
      "epoch": 0.3919470028737325,
      "grad_norm": 1.4044800996780396,
      "learning_rate": 9.276832956649514e-06,
      "loss": 0.0533,
      "step": 239500
    },
    {
      "epoch": 0.39197973331238584,
      "grad_norm": 3.2137763500213623,
      "learning_rate": 9.276767064435996e-06,
      "loss": 0.0417,
      "step": 239520
    },
    {
      "epoch": 0.3920124637510392,
      "grad_norm": 2.1714236736297607,
      "learning_rate": 9.27670117222248e-06,
      "loss": 0.0437,
      "step": 239540
    },
    {
      "epoch": 0.3920451941896925,
      "grad_norm": 0.8075653910636902,
      "learning_rate": 9.276635280008963e-06,
      "loss": 0.0491,
      "step": 239560
    },
    {
      "epoch": 0.39207792462834584,
      "grad_norm": 1.595375895500183,
      "learning_rate": 9.276569387795445e-06,
      "loss": 0.0433,
      "step": 239580
    },
    {
      "epoch": 0.3921106550669992,
      "grad_norm": 1.694769263267517,
      "learning_rate": 9.276503495581928e-06,
      "loss": 0.0522,
      "step": 239600
    },
    {
      "epoch": 0.39214338550565253,
      "grad_norm": 1.2592211961746216,
      "learning_rate": 9.276437603368412e-06,
      "loss": 0.0451,
      "step": 239620
    },
    {
      "epoch": 0.3921761159443059,
      "grad_norm": 1.4037681818008423,
      "learning_rate": 9.276371711154894e-06,
      "loss": 0.0527,
      "step": 239640
    },
    {
      "epoch": 0.3922088463829592,
      "grad_norm": 1.3411506414413452,
      "learning_rate": 9.276305818941377e-06,
      "loss": 0.0524,
      "step": 239660
    },
    {
      "epoch": 0.39224157682161254,
      "grad_norm": 1.9732049703598022,
      "learning_rate": 9.27623992672786e-06,
      "loss": 0.046,
      "step": 239680
    },
    {
      "epoch": 0.3922743072602659,
      "grad_norm": 1.9720414876937866,
      "learning_rate": 9.276174034514343e-06,
      "loss": 0.0536,
      "step": 239700
    },
    {
      "epoch": 0.3923070376989192,
      "grad_norm": 1.6763081550598145,
      "learning_rate": 9.276108142300825e-06,
      "loss": 0.0529,
      "step": 239720
    },
    {
      "epoch": 0.3923397681375726,
      "grad_norm": 1.166032075881958,
      "learning_rate": 9.276042250087308e-06,
      "loss": 0.0494,
      "step": 239740
    },
    {
      "epoch": 0.3923724985762259,
      "grad_norm": 1.331676721572876,
      "learning_rate": 9.27597635787379e-06,
      "loss": 0.0473,
      "step": 239760
    },
    {
      "epoch": 0.39240522901487923,
      "grad_norm": 1.1005725860595703,
      "learning_rate": 9.275910465660274e-06,
      "loss": 0.0423,
      "step": 239780
    },
    {
      "epoch": 0.3924379594535326,
      "grad_norm": 1.8871791362762451,
      "learning_rate": 9.275844573446756e-06,
      "loss": 0.0434,
      "step": 239800
    },
    {
      "epoch": 0.3924706898921859,
      "grad_norm": 1.1108630895614624,
      "learning_rate": 9.27577868123324e-06,
      "loss": 0.0346,
      "step": 239820
    },
    {
      "epoch": 0.3925034203308393,
      "grad_norm": 0.4765666425228119,
      "learning_rate": 9.275712789019721e-06,
      "loss": 0.0481,
      "step": 239840
    },
    {
      "epoch": 0.3925361507694926,
      "grad_norm": 1.1361221075057983,
      "learning_rate": 9.275646896806205e-06,
      "loss": 0.0516,
      "step": 239860
    },
    {
      "epoch": 0.3925688812081459,
      "grad_norm": 2.1333420276641846,
      "learning_rate": 9.275581004592688e-06,
      "loss": 0.0433,
      "step": 239880
    },
    {
      "epoch": 0.3926016116467993,
      "grad_norm": 1.5360325574874878,
      "learning_rate": 9.27551511237917e-06,
      "loss": 0.0569,
      "step": 239900
    },
    {
      "epoch": 0.3926343420854526,
      "grad_norm": 9.459602355957031,
      "learning_rate": 9.275449220165654e-06,
      "loss": 0.0607,
      "step": 239920
    },
    {
      "epoch": 0.392667072524106,
      "grad_norm": 1.4137831926345825,
      "learning_rate": 9.275383327952137e-06,
      "loss": 0.036,
      "step": 239940
    },
    {
      "epoch": 0.3926998029627593,
      "grad_norm": 1.7847042083740234,
      "learning_rate": 9.27531743573862e-06,
      "loss": 0.039,
      "step": 239960
    },
    {
      "epoch": 0.3927325334014126,
      "grad_norm": 0.377516508102417,
      "learning_rate": 9.275251543525103e-06,
      "loss": 0.059,
      "step": 239980
    },
    {
      "epoch": 0.392765263840066,
      "grad_norm": 1.4056276082992554,
      "learning_rate": 9.275185651311586e-06,
      "loss": 0.0667,
      "step": 240000
    },
    {
      "epoch": 0.3927979942787193,
      "grad_norm": 3.8675520420074463,
      "learning_rate": 9.275119759098068e-06,
      "loss": 0.048,
      "step": 240020
    },
    {
      "epoch": 0.3928307247173727,
      "grad_norm": 1.0784586668014526,
      "learning_rate": 9.275053866884552e-06,
      "loss": 0.047,
      "step": 240040
    },
    {
      "epoch": 0.392863455156026,
      "grad_norm": 1.758663296699524,
      "learning_rate": 9.274987974671034e-06,
      "loss": 0.0456,
      "step": 240060
    },
    {
      "epoch": 0.3928961855946793,
      "grad_norm": 0.6472897529602051,
      "learning_rate": 9.274922082457517e-06,
      "loss": 0.0444,
      "step": 240080
    },
    {
      "epoch": 0.3929289160333327,
      "grad_norm": 1.4900048971176147,
      "learning_rate": 9.274856190244e-06,
      "loss": 0.0305,
      "step": 240100
    },
    {
      "epoch": 0.392961646471986,
      "grad_norm": 2.335005044937134,
      "learning_rate": 9.274790298030483e-06,
      "loss": 0.0496,
      "step": 240120
    },
    {
      "epoch": 0.3929943769106394,
      "grad_norm": 2.3207454681396484,
      "learning_rate": 9.274724405816965e-06,
      "loss": 0.0455,
      "step": 240140
    },
    {
      "epoch": 0.3930271073492927,
      "grad_norm": 1.0022860765457153,
      "learning_rate": 9.274658513603448e-06,
      "loss": 0.0497,
      "step": 240160
    },
    {
      "epoch": 0.393059837787946,
      "grad_norm": 0.6657879948616028,
      "learning_rate": 9.27459262138993e-06,
      "loss": 0.0337,
      "step": 240180
    },
    {
      "epoch": 0.3930925682265994,
      "grad_norm": 1.8179539442062378,
      "learning_rate": 9.274526729176414e-06,
      "loss": 0.0499,
      "step": 240200
    },
    {
      "epoch": 0.3931252986652527,
      "grad_norm": 5.294517993927002,
      "learning_rate": 9.274460836962896e-06,
      "loss": 0.0483,
      "step": 240220
    },
    {
      "epoch": 0.39315802910390607,
      "grad_norm": 1.6000139713287354,
      "learning_rate": 9.27439494474938e-06,
      "loss": 0.0536,
      "step": 240240
    },
    {
      "epoch": 0.3931907595425594,
      "grad_norm": 0.6554884314537048,
      "learning_rate": 9.274329052535863e-06,
      "loss": 0.0482,
      "step": 240260
    },
    {
      "epoch": 0.3932234899812127,
      "grad_norm": 1.8115108013153076,
      "learning_rate": 9.274263160322345e-06,
      "loss": 0.0355,
      "step": 240280
    },
    {
      "epoch": 0.3932562204198661,
      "grad_norm": 3.539127826690674,
      "learning_rate": 9.274197268108828e-06,
      "loss": 0.0464,
      "step": 240300
    },
    {
      "epoch": 0.3932889508585194,
      "grad_norm": 0.2802570164203644,
      "learning_rate": 9.27413137589531e-06,
      "loss": 0.0526,
      "step": 240320
    },
    {
      "epoch": 0.39332168129717276,
      "grad_norm": 1.3629800081253052,
      "learning_rate": 9.274065483681794e-06,
      "loss": 0.0398,
      "step": 240340
    },
    {
      "epoch": 0.3933544117358261,
      "grad_norm": 3.6733412742614746,
      "learning_rate": 9.273999591468277e-06,
      "loss": 0.0335,
      "step": 240360
    },
    {
      "epoch": 0.3933871421744794,
      "grad_norm": 0.49923190474510193,
      "learning_rate": 9.27393369925476e-06,
      "loss": 0.0417,
      "step": 240380
    },
    {
      "epoch": 0.39341987261313277,
      "grad_norm": 1.4454028606414795,
      "learning_rate": 9.273867807041243e-06,
      "loss": 0.0435,
      "step": 240400
    },
    {
      "epoch": 0.3934526030517861,
      "grad_norm": 3.939953565597534,
      "learning_rate": 9.273801914827727e-06,
      "loss": 0.0391,
      "step": 240420
    },
    {
      "epoch": 0.39348533349043946,
      "grad_norm": 1.245628833770752,
      "learning_rate": 9.273736022614208e-06,
      "loss": 0.0316,
      "step": 240440
    },
    {
      "epoch": 0.3935180639290928,
      "grad_norm": 3.5925025939941406,
      "learning_rate": 9.273670130400692e-06,
      "loss": 0.0525,
      "step": 240460
    },
    {
      "epoch": 0.3935507943677461,
      "grad_norm": 1.7252700328826904,
      "learning_rate": 9.273604238187174e-06,
      "loss": 0.0379,
      "step": 240480
    },
    {
      "epoch": 0.39358352480639947,
      "grad_norm": 1.5255560874938965,
      "learning_rate": 9.273538345973657e-06,
      "loss": 0.0386,
      "step": 240500
    },
    {
      "epoch": 0.3936162552450528,
      "grad_norm": 1.3815170526504517,
      "learning_rate": 9.27347245376014e-06,
      "loss": 0.0506,
      "step": 240520
    },
    {
      "epoch": 0.39364898568370615,
      "grad_norm": 2.5911457538604736,
      "learning_rate": 9.273406561546623e-06,
      "loss": 0.048,
      "step": 240540
    },
    {
      "epoch": 0.39368171612235947,
      "grad_norm": 2.007803201675415,
      "learning_rate": 9.273340669333105e-06,
      "loss": 0.041,
      "step": 240560
    },
    {
      "epoch": 0.3937144465610128,
      "grad_norm": 0.6790440678596497,
      "learning_rate": 9.273274777119588e-06,
      "loss": 0.0422,
      "step": 240580
    },
    {
      "epoch": 0.39374717699966616,
      "grad_norm": 2.0599889755249023,
      "learning_rate": 9.273208884906072e-06,
      "loss": 0.0438,
      "step": 240600
    },
    {
      "epoch": 0.3937799074383195,
      "grad_norm": 0.4025115966796875,
      "learning_rate": 9.273142992692554e-06,
      "loss": 0.0532,
      "step": 240620
    },
    {
      "epoch": 0.39381263787697285,
      "grad_norm": 0.3748440146446228,
      "learning_rate": 9.273077100479038e-06,
      "loss": 0.0488,
      "step": 240640
    },
    {
      "epoch": 0.39384536831562617,
      "grad_norm": 0.9232982993125916,
      "learning_rate": 9.27301120826552e-06,
      "loss": 0.0391,
      "step": 240660
    },
    {
      "epoch": 0.3938780987542795,
      "grad_norm": 1.3711780309677124,
      "learning_rate": 9.272945316052003e-06,
      "loss": 0.0454,
      "step": 240680
    },
    {
      "epoch": 0.39391082919293285,
      "grad_norm": 0.5528669357299805,
      "learning_rate": 9.272879423838485e-06,
      "loss": 0.0365,
      "step": 240700
    },
    {
      "epoch": 0.39394355963158617,
      "grad_norm": 3.5271432399749756,
      "learning_rate": 9.272813531624968e-06,
      "loss": 0.0382,
      "step": 240720
    },
    {
      "epoch": 0.39397629007023954,
      "grad_norm": 0.21217912435531616,
      "learning_rate": 9.272747639411452e-06,
      "loss": 0.053,
      "step": 240740
    },
    {
      "epoch": 0.39400902050889286,
      "grad_norm": 1.2587591409683228,
      "learning_rate": 9.272681747197934e-06,
      "loss": 0.0495,
      "step": 240760
    },
    {
      "epoch": 0.3940417509475462,
      "grad_norm": 3.001810073852539,
      "learning_rate": 9.272615854984418e-06,
      "loss": 0.0365,
      "step": 240780
    },
    {
      "epoch": 0.39407448138619955,
      "grad_norm": 2.8197848796844482,
      "learning_rate": 9.272549962770901e-06,
      "loss": 0.0472,
      "step": 240800
    },
    {
      "epoch": 0.39410721182485287,
      "grad_norm": 3.137880802154541,
      "learning_rate": 9.272484070557383e-06,
      "loss": 0.0309,
      "step": 240820
    },
    {
      "epoch": 0.39413994226350624,
      "grad_norm": 2.216831684112549,
      "learning_rate": 9.272418178343867e-06,
      "loss": 0.0367,
      "step": 240840
    },
    {
      "epoch": 0.39417267270215955,
      "grad_norm": 2.63968563079834,
      "learning_rate": 9.272352286130348e-06,
      "loss": 0.0625,
      "step": 240860
    },
    {
      "epoch": 0.39420540314081287,
      "grad_norm": 1.535887360572815,
      "learning_rate": 9.272286393916832e-06,
      "loss": 0.0261,
      "step": 240880
    },
    {
      "epoch": 0.39423813357946624,
      "grad_norm": 2.0033841133117676,
      "learning_rate": 9.272220501703314e-06,
      "loss": 0.0433,
      "step": 240900
    },
    {
      "epoch": 0.39427086401811956,
      "grad_norm": 1.4252370595932007,
      "learning_rate": 9.272154609489798e-06,
      "loss": 0.0557,
      "step": 240920
    },
    {
      "epoch": 0.39430359445677293,
      "grad_norm": 0.43566960096359253,
      "learning_rate": 9.27208871727628e-06,
      "loss": 0.044,
      "step": 240940
    },
    {
      "epoch": 0.39433632489542625,
      "grad_norm": 0.7598493099212646,
      "learning_rate": 9.272022825062763e-06,
      "loss": 0.0337,
      "step": 240960
    },
    {
      "epoch": 0.39436905533407957,
      "grad_norm": 0.4903170168399811,
      "learning_rate": 9.271956932849247e-06,
      "loss": 0.0447,
      "step": 240980
    },
    {
      "epoch": 0.39440178577273294,
      "grad_norm": 14.023022651672363,
      "learning_rate": 9.271891040635729e-06,
      "loss": 0.0425,
      "step": 241000
    },
    {
      "epoch": 0.39443451621138625,
      "grad_norm": 0.4852624535560608,
      "learning_rate": 9.271825148422212e-06,
      "loss": 0.0483,
      "step": 241020
    },
    {
      "epoch": 0.3944672466500396,
      "grad_norm": 1.2498680353164673,
      "learning_rate": 9.271759256208694e-06,
      "loss": 0.0326,
      "step": 241040
    },
    {
      "epoch": 0.39449997708869294,
      "grad_norm": 2.3901333808898926,
      "learning_rate": 9.271693363995178e-06,
      "loss": 0.0516,
      "step": 241060
    },
    {
      "epoch": 0.39453270752734626,
      "grad_norm": 0.6872141361236572,
      "learning_rate": 9.27162747178166e-06,
      "loss": 0.0443,
      "step": 241080
    },
    {
      "epoch": 0.39456543796599963,
      "grad_norm": 1.9569600820541382,
      "learning_rate": 9.271561579568143e-06,
      "loss": 0.0443,
      "step": 241100
    },
    {
      "epoch": 0.39459816840465295,
      "grad_norm": 1.636646032333374,
      "learning_rate": 9.271495687354625e-06,
      "loss": 0.0511,
      "step": 241120
    },
    {
      "epoch": 0.3946308988433063,
      "grad_norm": 1.3057886362075806,
      "learning_rate": 9.271429795141109e-06,
      "loss": 0.0498,
      "step": 241140
    },
    {
      "epoch": 0.39466362928195964,
      "grad_norm": 3.650566816329956,
      "learning_rate": 9.271363902927592e-06,
      "loss": 0.0336,
      "step": 241160
    },
    {
      "epoch": 0.39469635972061295,
      "grad_norm": 0.13566088676452637,
      "learning_rate": 9.271298010714074e-06,
      "loss": 0.048,
      "step": 241180
    },
    {
      "epoch": 0.3947290901592663,
      "grad_norm": 1.737268090248108,
      "learning_rate": 9.271232118500558e-06,
      "loss": 0.0472,
      "step": 241200
    },
    {
      "epoch": 0.39476182059791964,
      "grad_norm": 0.4635489284992218,
      "learning_rate": 9.271166226287041e-06,
      "loss": 0.0446,
      "step": 241220
    },
    {
      "epoch": 0.394794551036573,
      "grad_norm": 1.8194301128387451,
      "learning_rate": 9.271100334073523e-06,
      "loss": 0.0416,
      "step": 241240
    },
    {
      "epoch": 0.39482728147522633,
      "grad_norm": 3.422379732131958,
      "learning_rate": 9.271034441860007e-06,
      "loss": 0.053,
      "step": 241260
    },
    {
      "epoch": 0.39486001191387965,
      "grad_norm": 1.5352113246917725,
      "learning_rate": 9.270968549646489e-06,
      "loss": 0.0359,
      "step": 241280
    },
    {
      "epoch": 0.394892742352533,
      "grad_norm": 0.9720363616943359,
      "learning_rate": 9.270902657432972e-06,
      "loss": 0.0623,
      "step": 241300
    },
    {
      "epoch": 0.39492547279118634,
      "grad_norm": 1.2348589897155762,
      "learning_rate": 9.270836765219456e-06,
      "loss": 0.0289,
      "step": 241320
    },
    {
      "epoch": 0.3949582032298397,
      "grad_norm": 1.4564529657363892,
      "learning_rate": 9.270770873005938e-06,
      "loss": 0.0402,
      "step": 241340
    },
    {
      "epoch": 0.394990933668493,
      "grad_norm": 1.2255531549453735,
      "learning_rate": 9.270704980792421e-06,
      "loss": 0.0479,
      "step": 241360
    },
    {
      "epoch": 0.39502366410714634,
      "grad_norm": 1.0786089897155762,
      "learning_rate": 9.270639088578903e-06,
      "loss": 0.0509,
      "step": 241380
    },
    {
      "epoch": 0.3950563945457997,
      "grad_norm": 1.592658281326294,
      "learning_rate": 9.270573196365387e-06,
      "loss": 0.041,
      "step": 241400
    },
    {
      "epoch": 0.39508912498445303,
      "grad_norm": 1.1781575679779053,
      "learning_rate": 9.270507304151869e-06,
      "loss": 0.0541,
      "step": 241420
    },
    {
      "epoch": 0.3951218554231064,
      "grad_norm": 0.29241544008255005,
      "learning_rate": 9.270441411938352e-06,
      "loss": 0.0426,
      "step": 241440
    },
    {
      "epoch": 0.3951545858617597,
      "grad_norm": 0.5735398530960083,
      "learning_rate": 9.270375519724834e-06,
      "loss": 0.0493,
      "step": 241460
    },
    {
      "epoch": 0.39518731630041304,
      "grad_norm": 5.172743797302246,
      "learning_rate": 9.270309627511318e-06,
      "loss": 0.0347,
      "step": 241480
    },
    {
      "epoch": 0.3952200467390664,
      "grad_norm": 0.7281337976455688,
      "learning_rate": 9.2702437352978e-06,
      "loss": 0.0358,
      "step": 241500
    },
    {
      "epoch": 0.3952527771777197,
      "grad_norm": 1.070875644683838,
      "learning_rate": 9.270177843084283e-06,
      "loss": 0.0452,
      "step": 241520
    },
    {
      "epoch": 0.3952855076163731,
      "grad_norm": 1.840269684791565,
      "learning_rate": 9.270111950870767e-06,
      "loss": 0.0445,
      "step": 241540
    },
    {
      "epoch": 0.3953182380550264,
      "grad_norm": 1.449950933456421,
      "learning_rate": 9.270046058657249e-06,
      "loss": 0.0552,
      "step": 241560
    },
    {
      "epoch": 0.39535096849367973,
      "grad_norm": 1.2272380590438843,
      "learning_rate": 9.269980166443732e-06,
      "loss": 0.0494,
      "step": 241580
    },
    {
      "epoch": 0.3953836989323331,
      "grad_norm": 1.3214300870895386,
      "learning_rate": 9.269914274230216e-06,
      "loss": 0.0495,
      "step": 241600
    },
    {
      "epoch": 0.3954164293709864,
      "grad_norm": 0.6860986948013306,
      "learning_rate": 9.269848382016698e-06,
      "loss": 0.0428,
      "step": 241620
    },
    {
      "epoch": 0.3954491598096398,
      "grad_norm": 1.2935152053833008,
      "learning_rate": 9.269782489803181e-06,
      "loss": 0.037,
      "step": 241640
    },
    {
      "epoch": 0.3954818902482931,
      "grad_norm": 4.325201988220215,
      "learning_rate": 9.269716597589665e-06,
      "loss": 0.0467,
      "step": 241660
    },
    {
      "epoch": 0.3955146206869464,
      "grad_norm": 1.331357717514038,
      "learning_rate": 9.269650705376147e-06,
      "loss": 0.0578,
      "step": 241680
    },
    {
      "epoch": 0.3955473511255998,
      "grad_norm": 0.1544831395149231,
      "learning_rate": 9.26958481316263e-06,
      "loss": 0.0453,
      "step": 241700
    },
    {
      "epoch": 0.3955800815642531,
      "grad_norm": 1.2208778858184814,
      "learning_rate": 9.269518920949112e-06,
      "loss": 0.0508,
      "step": 241720
    },
    {
      "epoch": 0.3956128120029065,
      "grad_norm": 1.2302008867263794,
      "learning_rate": 9.269453028735596e-06,
      "loss": 0.0461,
      "step": 241740
    },
    {
      "epoch": 0.3956455424415598,
      "grad_norm": 0.8866505026817322,
      "learning_rate": 9.269387136522078e-06,
      "loss": 0.0572,
      "step": 241760
    },
    {
      "epoch": 0.3956782728802131,
      "grad_norm": 0.9239701628684998,
      "learning_rate": 9.269321244308561e-06,
      "loss": 0.0406,
      "step": 241780
    },
    {
      "epoch": 0.3957110033188665,
      "grad_norm": 2.4715209007263184,
      "learning_rate": 9.269255352095043e-06,
      "loss": 0.0427,
      "step": 241800
    },
    {
      "epoch": 0.3957437337575198,
      "grad_norm": 3.9907565116882324,
      "learning_rate": 9.269189459881527e-06,
      "loss": 0.046,
      "step": 241820
    },
    {
      "epoch": 0.3957764641961732,
      "grad_norm": 0.7887654304504395,
      "learning_rate": 9.269123567668009e-06,
      "loss": 0.0531,
      "step": 241840
    },
    {
      "epoch": 0.3958091946348265,
      "grad_norm": 1.7553682327270508,
      "learning_rate": 9.269057675454492e-06,
      "loss": 0.0365,
      "step": 241860
    },
    {
      "epoch": 0.3958419250734798,
      "grad_norm": 2.073509454727173,
      "learning_rate": 9.268991783240974e-06,
      "loss": 0.0466,
      "step": 241880
    },
    {
      "epoch": 0.3958746555121332,
      "grad_norm": 4.111740589141846,
      "learning_rate": 9.268925891027458e-06,
      "loss": 0.0424,
      "step": 241900
    },
    {
      "epoch": 0.3959073859507865,
      "grad_norm": 2.5429272651672363,
      "learning_rate": 9.268859998813941e-06,
      "loss": 0.0495,
      "step": 241920
    },
    {
      "epoch": 0.3959401163894399,
      "grad_norm": 1.3043855428695679,
      "learning_rate": 9.268794106600423e-06,
      "loss": 0.045,
      "step": 241940
    },
    {
      "epoch": 0.3959728468280932,
      "grad_norm": 1.8350911140441895,
      "learning_rate": 9.268728214386907e-06,
      "loss": 0.0524,
      "step": 241960
    },
    {
      "epoch": 0.3960055772667465,
      "grad_norm": 2.482433795928955,
      "learning_rate": 9.26866232217339e-06,
      "loss": 0.0465,
      "step": 241980
    },
    {
      "epoch": 0.3960383077053999,
      "grad_norm": 3.1257710456848145,
      "learning_rate": 9.268596429959872e-06,
      "loss": 0.0516,
      "step": 242000
    },
    {
      "epoch": 0.3960710381440532,
      "grad_norm": 4.742922306060791,
      "learning_rate": 9.268530537746356e-06,
      "loss": 0.0424,
      "step": 242020
    },
    {
      "epoch": 0.3961037685827066,
      "grad_norm": 0.9731509685516357,
      "learning_rate": 9.26846464553284e-06,
      "loss": 0.0361,
      "step": 242040
    },
    {
      "epoch": 0.3961364990213599,
      "grad_norm": 1.406825065612793,
      "learning_rate": 9.268398753319321e-06,
      "loss": 0.0453,
      "step": 242060
    },
    {
      "epoch": 0.3961692294600132,
      "grad_norm": 1.3185441493988037,
      "learning_rate": 9.268332861105805e-06,
      "loss": 0.0606,
      "step": 242080
    },
    {
      "epoch": 0.3962019598986666,
      "grad_norm": 0.61594158411026,
      "learning_rate": 9.268266968892287e-06,
      "loss": 0.0498,
      "step": 242100
    },
    {
      "epoch": 0.3962346903373199,
      "grad_norm": 3.895097017288208,
      "learning_rate": 9.26820107667877e-06,
      "loss": 0.0499,
      "step": 242120
    },
    {
      "epoch": 0.39626742077597327,
      "grad_norm": 7.313085556030273,
      "learning_rate": 9.268135184465252e-06,
      "loss": 0.0534,
      "step": 242140
    },
    {
      "epoch": 0.3963001512146266,
      "grad_norm": 2.778449296951294,
      "learning_rate": 9.268069292251736e-06,
      "loss": 0.0473,
      "step": 242160
    },
    {
      "epoch": 0.3963328816532799,
      "grad_norm": 0.8609729409217834,
      "learning_rate": 9.268003400038218e-06,
      "loss": 0.0367,
      "step": 242180
    },
    {
      "epoch": 0.3963656120919333,
      "grad_norm": 3.1150434017181396,
      "learning_rate": 9.267937507824701e-06,
      "loss": 0.0367,
      "step": 242200
    },
    {
      "epoch": 0.3963983425305866,
      "grad_norm": 1.8742177486419678,
      "learning_rate": 9.267871615611183e-06,
      "loss": 0.0378,
      "step": 242220
    },
    {
      "epoch": 0.3964310729692399,
      "grad_norm": 2.430063009262085,
      "learning_rate": 9.267805723397667e-06,
      "loss": 0.0488,
      "step": 242240
    },
    {
      "epoch": 0.3964638034078933,
      "grad_norm": 1.4101260900497437,
      "learning_rate": 9.267739831184149e-06,
      "loss": 0.0422,
      "step": 242260
    },
    {
      "epoch": 0.3964965338465466,
      "grad_norm": 1.637292742729187,
      "learning_rate": 9.267673938970632e-06,
      "loss": 0.0392,
      "step": 242280
    },
    {
      "epoch": 0.39652926428519997,
      "grad_norm": 1.8668888807296753,
      "learning_rate": 9.267608046757114e-06,
      "loss": 0.0681,
      "step": 242300
    },
    {
      "epoch": 0.3965619947238533,
      "grad_norm": 1.1659793853759766,
      "learning_rate": 9.267542154543598e-06,
      "loss": 0.0403,
      "step": 242320
    },
    {
      "epoch": 0.3965947251625066,
      "grad_norm": 1.545639157295227,
      "learning_rate": 9.267476262330081e-06,
      "loss": 0.0329,
      "step": 242340
    },
    {
      "epoch": 0.39662745560116,
      "grad_norm": 1.4188653230667114,
      "learning_rate": 9.267410370116563e-06,
      "loss": 0.0559,
      "step": 242360
    },
    {
      "epoch": 0.3966601860398133,
      "grad_norm": 1.4746958017349243,
      "learning_rate": 9.267344477903047e-06,
      "loss": 0.0383,
      "step": 242380
    },
    {
      "epoch": 0.39669291647846666,
      "grad_norm": 0.40658411383628845,
      "learning_rate": 9.26727858568953e-06,
      "loss": 0.0427,
      "step": 242400
    },
    {
      "epoch": 0.39672564691712,
      "grad_norm": 2.4323365688323975,
      "learning_rate": 9.267212693476012e-06,
      "loss": 0.0565,
      "step": 242420
    },
    {
      "epoch": 0.3967583773557733,
      "grad_norm": 4.257863521575928,
      "learning_rate": 9.267146801262496e-06,
      "loss": 0.0452,
      "step": 242440
    },
    {
      "epoch": 0.39679110779442667,
      "grad_norm": 2.358267307281494,
      "learning_rate": 9.26708090904898e-06,
      "loss": 0.0382,
      "step": 242460
    },
    {
      "epoch": 0.39682383823308,
      "grad_norm": 1.9824936389923096,
      "learning_rate": 9.267015016835461e-06,
      "loss": 0.0573,
      "step": 242480
    },
    {
      "epoch": 0.39685656867173336,
      "grad_norm": 0.8189141750335693,
      "learning_rate": 9.266949124621945e-06,
      "loss": 0.0309,
      "step": 242500
    },
    {
      "epoch": 0.3968892991103867,
      "grad_norm": 4.538649082183838,
      "learning_rate": 9.266883232408427e-06,
      "loss": 0.0437,
      "step": 242520
    },
    {
      "epoch": 0.39692202954904,
      "grad_norm": 1.983366847038269,
      "learning_rate": 9.26681734019491e-06,
      "loss": 0.04,
      "step": 242540
    },
    {
      "epoch": 0.39695475998769336,
      "grad_norm": 0.6554380655288696,
      "learning_rate": 9.266751447981392e-06,
      "loss": 0.0417,
      "step": 242560
    },
    {
      "epoch": 0.3969874904263467,
      "grad_norm": 2.5817413330078125,
      "learning_rate": 9.266685555767876e-06,
      "loss": 0.0542,
      "step": 242580
    },
    {
      "epoch": 0.39702022086500005,
      "grad_norm": 3.0750443935394287,
      "learning_rate": 9.266619663554358e-06,
      "loss": 0.0577,
      "step": 242600
    },
    {
      "epoch": 0.39705295130365337,
      "grad_norm": 1.649223804473877,
      "learning_rate": 9.266553771340841e-06,
      "loss": 0.0576,
      "step": 242620
    },
    {
      "epoch": 0.3970856817423067,
      "grad_norm": 0.7830869555473328,
      "learning_rate": 9.266487879127323e-06,
      "loss": 0.0321,
      "step": 242640
    },
    {
      "epoch": 0.39711841218096006,
      "grad_norm": 1.0741069316864014,
      "learning_rate": 9.266421986913807e-06,
      "loss": 0.045,
      "step": 242660
    },
    {
      "epoch": 0.3971511426196134,
      "grad_norm": 0.6248376369476318,
      "learning_rate": 9.266356094700289e-06,
      "loss": 0.0406,
      "step": 242680
    },
    {
      "epoch": 0.39718387305826675,
      "grad_norm": 2.0774309635162354,
      "learning_rate": 9.266290202486772e-06,
      "loss": 0.0412,
      "step": 242700
    },
    {
      "epoch": 0.39721660349692006,
      "grad_norm": 0.875414252281189,
      "learning_rate": 9.266224310273256e-06,
      "loss": 0.0389,
      "step": 242720
    },
    {
      "epoch": 0.3972493339355734,
      "grad_norm": 2.0637850761413574,
      "learning_rate": 9.266158418059738e-06,
      "loss": 0.0395,
      "step": 242740
    },
    {
      "epoch": 0.39728206437422675,
      "grad_norm": 1.852598786354065,
      "learning_rate": 9.266092525846221e-06,
      "loss": 0.0439,
      "step": 242760
    },
    {
      "epoch": 0.39731479481288007,
      "grad_norm": 1.0929323434829712,
      "learning_rate": 9.266026633632705e-06,
      "loss": 0.0425,
      "step": 242780
    },
    {
      "epoch": 0.39734752525153344,
      "grad_norm": 1.7392854690551758,
      "learning_rate": 9.265960741419187e-06,
      "loss": 0.045,
      "step": 242800
    },
    {
      "epoch": 0.39738025569018676,
      "grad_norm": 1.7451729774475098,
      "learning_rate": 9.26589484920567e-06,
      "loss": 0.0523,
      "step": 242820
    },
    {
      "epoch": 0.3974129861288401,
      "grad_norm": 1.5629944801330566,
      "learning_rate": 9.265828956992154e-06,
      "loss": 0.0513,
      "step": 242840
    },
    {
      "epoch": 0.39744571656749345,
      "grad_norm": 1.9153224229812622,
      "learning_rate": 9.265763064778636e-06,
      "loss": 0.0463,
      "step": 242860
    },
    {
      "epoch": 0.39747844700614676,
      "grad_norm": 0.9873681664466858,
      "learning_rate": 9.26569717256512e-06,
      "loss": 0.045,
      "step": 242880
    },
    {
      "epoch": 0.39751117744480013,
      "grad_norm": 3.9103171825408936,
      "learning_rate": 9.265631280351602e-06,
      "loss": 0.0654,
      "step": 242900
    },
    {
      "epoch": 0.39754390788345345,
      "grad_norm": 2.558943271636963,
      "learning_rate": 9.265565388138085e-06,
      "loss": 0.051,
      "step": 242920
    },
    {
      "epoch": 0.39757663832210677,
      "grad_norm": 3.371126890182495,
      "learning_rate": 9.265499495924567e-06,
      "loss": 0.0372,
      "step": 242940
    },
    {
      "epoch": 0.39760936876076014,
      "grad_norm": 0.6981062889099121,
      "learning_rate": 9.26543360371105e-06,
      "loss": 0.0501,
      "step": 242960
    },
    {
      "epoch": 0.39764209919941346,
      "grad_norm": 1.393104910850525,
      "learning_rate": 9.265367711497532e-06,
      "loss": 0.0573,
      "step": 242980
    },
    {
      "epoch": 0.39767482963806683,
      "grad_norm": 0.4361740052700043,
      "learning_rate": 9.265301819284016e-06,
      "loss": 0.0424,
      "step": 243000
    },
    {
      "epoch": 0.39770756007672015,
      "grad_norm": 0.8831126093864441,
      "learning_rate": 9.265235927070498e-06,
      "loss": 0.0516,
      "step": 243020
    },
    {
      "epoch": 0.39774029051537346,
      "grad_norm": 5.259583473205566,
      "learning_rate": 9.265170034856982e-06,
      "loss": 0.0486,
      "step": 243040
    },
    {
      "epoch": 0.39777302095402683,
      "grad_norm": 1.7055267095565796,
      "learning_rate": 9.265104142643463e-06,
      "loss": 0.0635,
      "step": 243060
    },
    {
      "epoch": 0.39780575139268015,
      "grad_norm": 2.9234728813171387,
      "learning_rate": 9.265038250429947e-06,
      "loss": 0.0485,
      "step": 243080
    },
    {
      "epoch": 0.3978384818313335,
      "grad_norm": 2.47680926322937,
      "learning_rate": 9.26497235821643e-06,
      "loss": 0.0456,
      "step": 243100
    },
    {
      "epoch": 0.39787121226998684,
      "grad_norm": 2.6970832347869873,
      "learning_rate": 9.264906466002912e-06,
      "loss": 0.0493,
      "step": 243120
    },
    {
      "epoch": 0.39790394270864016,
      "grad_norm": 1.9920237064361572,
      "learning_rate": 9.264840573789396e-06,
      "loss": 0.0483,
      "step": 243140
    },
    {
      "epoch": 0.39793667314729353,
      "grad_norm": 3.224001169204712,
      "learning_rate": 9.264774681575878e-06,
      "loss": 0.041,
      "step": 243160
    },
    {
      "epoch": 0.39796940358594685,
      "grad_norm": 3.2271225452423096,
      "learning_rate": 9.264708789362362e-06,
      "loss": 0.0377,
      "step": 243180
    },
    {
      "epoch": 0.3980021340246002,
      "grad_norm": 0.35768789052963257,
      "learning_rate": 9.264642897148845e-06,
      "loss": 0.0353,
      "step": 243200
    },
    {
      "epoch": 0.39803486446325353,
      "grad_norm": 1.2825205326080322,
      "learning_rate": 9.264577004935327e-06,
      "loss": 0.0432,
      "step": 243220
    },
    {
      "epoch": 0.39806759490190685,
      "grad_norm": 0.5102591514587402,
      "learning_rate": 9.26451111272181e-06,
      "loss": 0.0397,
      "step": 243240
    },
    {
      "epoch": 0.3981003253405602,
      "grad_norm": 2.396564483642578,
      "learning_rate": 9.264445220508294e-06,
      "loss": 0.0441,
      "step": 243260
    },
    {
      "epoch": 0.39813305577921354,
      "grad_norm": 8.73640251159668,
      "learning_rate": 9.264379328294776e-06,
      "loss": 0.0367,
      "step": 243280
    },
    {
      "epoch": 0.3981657862178669,
      "grad_norm": 1.7828431129455566,
      "learning_rate": 9.26431343608126e-06,
      "loss": 0.0587,
      "step": 243300
    },
    {
      "epoch": 0.39819851665652023,
      "grad_norm": 1.0799686908721924,
      "learning_rate": 9.264247543867742e-06,
      "loss": 0.042,
      "step": 243320
    },
    {
      "epoch": 0.39823124709517355,
      "grad_norm": 2.258444309234619,
      "learning_rate": 9.264181651654225e-06,
      "loss": 0.0475,
      "step": 243340
    },
    {
      "epoch": 0.3982639775338269,
      "grad_norm": 2.3802804946899414,
      "learning_rate": 9.264115759440707e-06,
      "loss": 0.0464,
      "step": 243360
    },
    {
      "epoch": 0.39829670797248024,
      "grad_norm": 2.509063720703125,
      "learning_rate": 9.26404986722719e-06,
      "loss": 0.0315,
      "step": 243380
    },
    {
      "epoch": 0.3983294384111336,
      "grad_norm": 1.9970152378082275,
      "learning_rate": 9.263983975013673e-06,
      "loss": 0.0444,
      "step": 243400
    },
    {
      "epoch": 0.3983621688497869,
      "grad_norm": 1.46367609500885,
      "learning_rate": 9.263918082800156e-06,
      "loss": 0.0383,
      "step": 243420
    },
    {
      "epoch": 0.39839489928844024,
      "grad_norm": 0.23527495563030243,
      "learning_rate": 9.26385219058664e-06,
      "loss": 0.0397,
      "step": 243440
    },
    {
      "epoch": 0.3984276297270936,
      "grad_norm": 0.652826726436615,
      "learning_rate": 9.263786298373122e-06,
      "loss": 0.0467,
      "step": 243460
    },
    {
      "epoch": 0.39846036016574693,
      "grad_norm": 1.6036863327026367,
      "learning_rate": 9.263720406159605e-06,
      "loss": 0.0466,
      "step": 243480
    },
    {
      "epoch": 0.3984930906044003,
      "grad_norm": 1.0273984670639038,
      "learning_rate": 9.263654513946087e-06,
      "loss": 0.042,
      "step": 243500
    },
    {
      "epoch": 0.3985258210430536,
      "grad_norm": 0.2141103595495224,
      "learning_rate": 9.26358862173257e-06,
      "loss": 0.0523,
      "step": 243520
    },
    {
      "epoch": 0.39855855148170694,
      "grad_norm": 3.0686988830566406,
      "learning_rate": 9.263522729519053e-06,
      "loss": 0.0374,
      "step": 243540
    },
    {
      "epoch": 0.3985912819203603,
      "grad_norm": 1.4631210565567017,
      "learning_rate": 9.263456837305536e-06,
      "loss": 0.0537,
      "step": 243560
    },
    {
      "epoch": 0.3986240123590136,
      "grad_norm": 1.4115196466445923,
      "learning_rate": 9.26339094509202e-06,
      "loss": 0.0401,
      "step": 243580
    },
    {
      "epoch": 0.398656742797667,
      "grad_norm": 2.714694023132324,
      "learning_rate": 9.263325052878502e-06,
      "loss": 0.077,
      "step": 243600
    },
    {
      "epoch": 0.3986894732363203,
      "grad_norm": 2.3891613483428955,
      "learning_rate": 9.263259160664985e-06,
      "loss": 0.0416,
      "step": 243620
    },
    {
      "epoch": 0.39872220367497363,
      "grad_norm": 1.3379989862442017,
      "learning_rate": 9.263193268451469e-06,
      "loss": 0.0482,
      "step": 243640
    },
    {
      "epoch": 0.398754934113627,
      "grad_norm": 2.3743131160736084,
      "learning_rate": 9.26312737623795e-06,
      "loss": 0.0494,
      "step": 243660
    },
    {
      "epoch": 0.3987876645522803,
      "grad_norm": 0.7216672301292419,
      "learning_rate": 9.263061484024434e-06,
      "loss": 0.0517,
      "step": 243680
    },
    {
      "epoch": 0.3988203949909337,
      "grad_norm": 1.6694546937942505,
      "learning_rate": 9.262995591810916e-06,
      "loss": 0.0512,
      "step": 243700
    },
    {
      "epoch": 0.398853125429587,
      "grad_norm": 8.155677795410156,
      "learning_rate": 9.2629296995974e-06,
      "loss": 0.0553,
      "step": 243720
    },
    {
      "epoch": 0.3988858558682403,
      "grad_norm": 0.7992409467697144,
      "learning_rate": 9.262863807383882e-06,
      "loss": 0.0427,
      "step": 243740
    },
    {
      "epoch": 0.3989185863068937,
      "grad_norm": 1.6418427228927612,
      "learning_rate": 9.262797915170365e-06,
      "loss": 0.0543,
      "step": 243760
    },
    {
      "epoch": 0.398951316745547,
      "grad_norm": 1.9311060905456543,
      "learning_rate": 9.262732022956849e-06,
      "loss": 0.0613,
      "step": 243780
    },
    {
      "epoch": 0.3989840471842004,
      "grad_norm": 3.5256147384643555,
      "learning_rate": 9.26266613074333e-06,
      "loss": 0.0506,
      "step": 243800
    },
    {
      "epoch": 0.3990167776228537,
      "grad_norm": 1.6293387413024902,
      "learning_rate": 9.262600238529814e-06,
      "loss": 0.0581,
      "step": 243820
    },
    {
      "epoch": 0.399049508061507,
      "grad_norm": 2.8204517364501953,
      "learning_rate": 9.262534346316296e-06,
      "loss": 0.0344,
      "step": 243840
    },
    {
      "epoch": 0.3990822385001604,
      "grad_norm": 4.613969802856445,
      "learning_rate": 9.26246845410278e-06,
      "loss": 0.0391,
      "step": 243860
    },
    {
      "epoch": 0.3991149689388137,
      "grad_norm": 0.5217189788818359,
      "learning_rate": 9.262402561889262e-06,
      "loss": 0.0416,
      "step": 243880
    },
    {
      "epoch": 0.3991476993774671,
      "grad_norm": 3.029872417449951,
      "learning_rate": 9.262336669675745e-06,
      "loss": 0.04,
      "step": 243900
    },
    {
      "epoch": 0.3991804298161204,
      "grad_norm": 1.5273017883300781,
      "learning_rate": 9.262270777462227e-06,
      "loss": 0.0453,
      "step": 243920
    },
    {
      "epoch": 0.3992131602547737,
      "grad_norm": 0.716325581073761,
      "learning_rate": 9.26220488524871e-06,
      "loss": 0.025,
      "step": 243940
    },
    {
      "epoch": 0.3992458906934271,
      "grad_norm": 1.268092393875122,
      "learning_rate": 9.262138993035193e-06,
      "loss": 0.0425,
      "step": 243960
    },
    {
      "epoch": 0.3992786211320804,
      "grad_norm": 1.806766390800476,
      "learning_rate": 9.262073100821676e-06,
      "loss": 0.0471,
      "step": 243980
    },
    {
      "epoch": 0.3993113515707338,
      "grad_norm": 20.30697250366211,
      "learning_rate": 9.26200720860816e-06,
      "loss": 0.0599,
      "step": 244000
    },
    {
      "epoch": 0.3993440820093871,
      "grad_norm": 1.8603975772857666,
      "learning_rate": 9.261941316394643e-06,
      "loss": 0.0467,
      "step": 244020
    },
    {
      "epoch": 0.3993768124480404,
      "grad_norm": 0.6850066781044006,
      "learning_rate": 9.261875424181125e-06,
      "loss": 0.0403,
      "step": 244040
    },
    {
      "epoch": 0.3994095428866938,
      "grad_norm": 2.613312244415283,
      "learning_rate": 9.261809531967609e-06,
      "loss": 0.0537,
      "step": 244060
    },
    {
      "epoch": 0.3994422733253471,
      "grad_norm": 0.6773618459701538,
      "learning_rate": 9.26174363975409e-06,
      "loss": 0.0436,
      "step": 244080
    },
    {
      "epoch": 0.39947500376400047,
      "grad_norm": 0.7500964999198914,
      "learning_rate": 9.261677747540574e-06,
      "loss": 0.0491,
      "step": 244100
    },
    {
      "epoch": 0.3995077342026538,
      "grad_norm": 1.9941377639770508,
      "learning_rate": 9.261611855327058e-06,
      "loss": 0.0405,
      "step": 244120
    },
    {
      "epoch": 0.3995404646413071,
      "grad_norm": 3.907578468322754,
      "learning_rate": 9.26154596311354e-06,
      "loss": 0.0529,
      "step": 244140
    },
    {
      "epoch": 0.3995731950799605,
      "grad_norm": 0.8897901177406311,
      "learning_rate": 9.261480070900023e-06,
      "loss": 0.0369,
      "step": 244160
    },
    {
      "epoch": 0.3996059255186138,
      "grad_norm": 1.6530098915100098,
      "learning_rate": 9.261414178686505e-06,
      "loss": 0.0548,
      "step": 244180
    },
    {
      "epoch": 0.39963865595726716,
      "grad_norm": 1.3105748891830444,
      "learning_rate": 9.261348286472989e-06,
      "loss": 0.0462,
      "step": 244200
    },
    {
      "epoch": 0.3996713863959205,
      "grad_norm": 0.5570579767227173,
      "learning_rate": 9.26128239425947e-06,
      "loss": 0.0396,
      "step": 244220
    },
    {
      "epoch": 0.3997041168345738,
      "grad_norm": 3.009976387023926,
      "learning_rate": 9.261216502045954e-06,
      "loss": 0.0506,
      "step": 244240
    },
    {
      "epoch": 0.39973684727322717,
      "grad_norm": 1.901102066040039,
      "learning_rate": 9.261150609832436e-06,
      "loss": 0.0437,
      "step": 244260
    },
    {
      "epoch": 0.3997695777118805,
      "grad_norm": 1.563639521598816,
      "learning_rate": 9.26108471761892e-06,
      "loss": 0.0513,
      "step": 244280
    },
    {
      "epoch": 0.39980230815053386,
      "grad_norm": 2.608809232711792,
      "learning_rate": 9.261018825405402e-06,
      "loss": 0.0581,
      "step": 244300
    },
    {
      "epoch": 0.3998350385891872,
      "grad_norm": 0.8636685013771057,
      "learning_rate": 9.260952933191885e-06,
      "loss": 0.0465,
      "step": 244320
    },
    {
      "epoch": 0.3998677690278405,
      "grad_norm": 1.7992675304412842,
      "learning_rate": 9.260887040978367e-06,
      "loss": 0.0542,
      "step": 244340
    },
    {
      "epoch": 0.39990049946649386,
      "grad_norm": 1.4577610492706299,
      "learning_rate": 9.26082114876485e-06,
      "loss": 0.0445,
      "step": 244360
    },
    {
      "epoch": 0.3999332299051472,
      "grad_norm": 2.799866199493408,
      "learning_rate": 9.260755256551334e-06,
      "loss": 0.0341,
      "step": 244380
    },
    {
      "epoch": 0.39996596034380055,
      "grad_norm": 2.2378902435302734,
      "learning_rate": 9.260689364337816e-06,
      "loss": 0.0612,
      "step": 244400
    },
    {
      "epoch": 0.39999869078245387,
      "grad_norm": 2.8294644355773926,
      "learning_rate": 9.2606234721243e-06,
      "loss": 0.0346,
      "step": 244420
    },
    {
      "epoch": 0.4000314212211072,
      "grad_norm": 1.5205848217010498,
      "learning_rate": 9.260557579910783e-06,
      "loss": 0.0481,
      "step": 244440
    },
    {
      "epoch": 0.40006415165976056,
      "grad_norm": 5.009101390838623,
      "learning_rate": 9.260491687697265e-06,
      "loss": 0.0538,
      "step": 244460
    },
    {
      "epoch": 0.4000968820984139,
      "grad_norm": 1.338841438293457,
      "learning_rate": 9.260425795483749e-06,
      "loss": 0.043,
      "step": 244480
    },
    {
      "epoch": 0.40012961253706725,
      "grad_norm": 0.7958154678344727,
      "learning_rate": 9.260359903270233e-06,
      "loss": 0.0333,
      "step": 244500
    },
    {
      "epoch": 0.40016234297572056,
      "grad_norm": 4.71167516708374,
      "learning_rate": 9.260294011056714e-06,
      "loss": 0.0498,
      "step": 244520
    },
    {
      "epoch": 0.4001950734143739,
      "grad_norm": 1.6388157606124878,
      "learning_rate": 9.260228118843198e-06,
      "loss": 0.0547,
      "step": 244540
    },
    {
      "epoch": 0.40022780385302725,
      "grad_norm": 3.643066883087158,
      "learning_rate": 9.26016222662968e-06,
      "loss": 0.0621,
      "step": 244560
    },
    {
      "epoch": 0.40026053429168057,
      "grad_norm": 3.7103006839752197,
      "learning_rate": 9.260096334416163e-06,
      "loss": 0.0352,
      "step": 244580
    },
    {
      "epoch": 0.40029326473033394,
      "grad_norm": 1.368241548538208,
      "learning_rate": 9.260030442202645e-06,
      "loss": 0.0396,
      "step": 244600
    },
    {
      "epoch": 0.40032599516898726,
      "grad_norm": 0.4064524173736572,
      "learning_rate": 9.259964549989129e-06,
      "loss": 0.0376,
      "step": 244620
    },
    {
      "epoch": 0.4003587256076406,
      "grad_norm": 1.156654715538025,
      "learning_rate": 9.259898657775611e-06,
      "loss": 0.0418,
      "step": 244640
    },
    {
      "epoch": 0.40039145604629395,
      "grad_norm": 2.704122304916382,
      "learning_rate": 9.259832765562094e-06,
      "loss": 0.0498,
      "step": 244660
    },
    {
      "epoch": 0.40042418648494726,
      "grad_norm": 0.42343977093696594,
      "learning_rate": 9.259766873348576e-06,
      "loss": 0.0484,
      "step": 244680
    },
    {
      "epoch": 0.40045691692360064,
      "grad_norm": 5.6887311935424805,
      "learning_rate": 9.25970098113506e-06,
      "loss": 0.0555,
      "step": 244700
    },
    {
      "epoch": 0.40048964736225395,
      "grad_norm": 1.350743293762207,
      "learning_rate": 9.259635088921542e-06,
      "loss": 0.0254,
      "step": 244720
    },
    {
      "epoch": 0.40052237780090727,
      "grad_norm": 1.497096061706543,
      "learning_rate": 9.259569196708025e-06,
      "loss": 0.043,
      "step": 244740
    },
    {
      "epoch": 0.40055510823956064,
      "grad_norm": 1.5124119520187378,
      "learning_rate": 9.259503304494509e-06,
      "loss": 0.0486,
      "step": 244760
    },
    {
      "epoch": 0.40058783867821396,
      "grad_norm": 1.77470862865448,
      "learning_rate": 9.259437412280991e-06,
      "loss": 0.0434,
      "step": 244780
    },
    {
      "epoch": 0.40062056911686733,
      "grad_norm": 1.21762216091156,
      "learning_rate": 9.259371520067474e-06,
      "loss": 0.0387,
      "step": 244800
    },
    {
      "epoch": 0.40065329955552065,
      "grad_norm": 4.342606067657471,
      "learning_rate": 9.259305627853958e-06,
      "loss": 0.0563,
      "step": 244820
    },
    {
      "epoch": 0.40068602999417396,
      "grad_norm": 0.6526904702186584,
      "learning_rate": 9.25923973564044e-06,
      "loss": 0.05,
      "step": 244840
    },
    {
      "epoch": 0.40071876043282734,
      "grad_norm": 0.7999624013900757,
      "learning_rate": 9.259173843426924e-06,
      "loss": 0.0416,
      "step": 244860
    },
    {
      "epoch": 0.40075149087148065,
      "grad_norm": 3.4350006580352783,
      "learning_rate": 9.259107951213407e-06,
      "loss": 0.0491,
      "step": 244880
    },
    {
      "epoch": 0.400784221310134,
      "grad_norm": 1.2292765378952026,
      "learning_rate": 9.259042058999889e-06,
      "loss": 0.0444,
      "step": 244900
    },
    {
      "epoch": 0.40081695174878734,
      "grad_norm": 0.9320297837257385,
      "learning_rate": 9.258976166786373e-06,
      "loss": 0.0468,
      "step": 244920
    },
    {
      "epoch": 0.40084968218744066,
      "grad_norm": 1.2730510234832764,
      "learning_rate": 9.258910274572855e-06,
      "loss": 0.0442,
      "step": 244940
    },
    {
      "epoch": 0.40088241262609403,
      "grad_norm": 2.8002467155456543,
      "learning_rate": 9.258844382359338e-06,
      "loss": 0.0415,
      "step": 244960
    },
    {
      "epoch": 0.40091514306474735,
      "grad_norm": 0.8215935826301575,
      "learning_rate": 9.25877849014582e-06,
      "loss": 0.0444,
      "step": 244980
    },
    {
      "epoch": 0.4009478735034007,
      "grad_norm": 1.644514560699463,
      "learning_rate": 9.258712597932304e-06,
      "loss": 0.0447,
      "step": 245000
    },
    {
      "epoch": 0.40098060394205404,
      "grad_norm": 2.5816969871520996,
      "learning_rate": 9.258646705718785e-06,
      "loss": 0.056,
      "step": 245020
    },
    {
      "epoch": 0.40101333438070735,
      "grad_norm": 1.079454779624939,
      "learning_rate": 9.258580813505269e-06,
      "loss": 0.0351,
      "step": 245040
    },
    {
      "epoch": 0.4010460648193607,
      "grad_norm": 0.6542276740074158,
      "learning_rate": 9.258514921291751e-06,
      "loss": 0.0421,
      "step": 245060
    },
    {
      "epoch": 0.40107879525801404,
      "grad_norm": 0.7366079688072205,
      "learning_rate": 9.258449029078235e-06,
      "loss": 0.0353,
      "step": 245080
    },
    {
      "epoch": 0.40111152569666736,
      "grad_norm": 1.3065329790115356,
      "learning_rate": 9.258383136864716e-06,
      "loss": 0.046,
      "step": 245100
    },
    {
      "epoch": 0.40114425613532073,
      "grad_norm": 3.1161837577819824,
      "learning_rate": 9.2583172446512e-06,
      "loss": 0.0484,
      "step": 245120
    },
    {
      "epoch": 0.40117698657397405,
      "grad_norm": 1.6600439548492432,
      "learning_rate": 9.258251352437682e-06,
      "loss": 0.0478,
      "step": 245140
    },
    {
      "epoch": 0.4012097170126274,
      "grad_norm": 0.21956664323806763,
      "learning_rate": 9.258185460224165e-06,
      "loss": 0.0477,
      "step": 245160
    },
    {
      "epoch": 0.40124244745128074,
      "grad_norm": 1.9053975343704224,
      "learning_rate": 9.258119568010649e-06,
      "loss": 0.0425,
      "step": 245180
    },
    {
      "epoch": 0.40127517788993405,
      "grad_norm": 2.0388827323913574,
      "learning_rate": 9.258053675797131e-06,
      "loss": 0.0528,
      "step": 245200
    },
    {
      "epoch": 0.4013079083285874,
      "grad_norm": 2.5669400691986084,
      "learning_rate": 9.257987783583615e-06,
      "loss": 0.045,
      "step": 245220
    },
    {
      "epoch": 0.40134063876724074,
      "grad_norm": 1.1188534498214722,
      "learning_rate": 9.257921891370098e-06,
      "loss": 0.0468,
      "step": 245240
    },
    {
      "epoch": 0.4013733692058941,
      "grad_norm": 1.1866958141326904,
      "learning_rate": 9.25785599915658e-06,
      "loss": 0.0463,
      "step": 245260
    },
    {
      "epoch": 0.40140609964454743,
      "grad_norm": 1.4476861953735352,
      "learning_rate": 9.257790106943064e-06,
      "loss": 0.0456,
      "step": 245280
    },
    {
      "epoch": 0.40143883008320075,
      "grad_norm": 1.0767771005630493,
      "learning_rate": 9.257724214729547e-06,
      "loss": 0.0527,
      "step": 245300
    },
    {
      "epoch": 0.4014715605218541,
      "grad_norm": 2.1848111152648926,
      "learning_rate": 9.257658322516029e-06,
      "loss": 0.0469,
      "step": 245320
    },
    {
      "epoch": 0.40150429096050744,
      "grad_norm": 3.368640899658203,
      "learning_rate": 9.257592430302513e-06,
      "loss": 0.0414,
      "step": 245340
    },
    {
      "epoch": 0.4015370213991608,
      "grad_norm": 1.153822898864746,
      "learning_rate": 9.257526538088995e-06,
      "loss": 0.0368,
      "step": 245360
    },
    {
      "epoch": 0.4015697518378141,
      "grad_norm": 3.5219948291778564,
      "learning_rate": 9.257460645875478e-06,
      "loss": 0.0549,
      "step": 245380
    },
    {
      "epoch": 0.40160248227646744,
      "grad_norm": 1.4674615859985352,
      "learning_rate": 9.25739475366196e-06,
      "loss": 0.0348,
      "step": 245400
    },
    {
      "epoch": 0.4016352127151208,
      "grad_norm": 4.274380207061768,
      "learning_rate": 9.257328861448444e-06,
      "loss": 0.0436,
      "step": 245420
    },
    {
      "epoch": 0.40166794315377413,
      "grad_norm": 2.67777419090271,
      "learning_rate": 9.257262969234926e-06,
      "loss": 0.0406,
      "step": 245440
    },
    {
      "epoch": 0.4017006735924275,
      "grad_norm": 1.854426622390747,
      "learning_rate": 9.257197077021409e-06,
      "loss": 0.0399,
      "step": 245460
    },
    {
      "epoch": 0.4017334040310808,
      "grad_norm": 1.2721242904663086,
      "learning_rate": 9.257131184807891e-06,
      "loss": 0.0527,
      "step": 245480
    },
    {
      "epoch": 0.40176613446973414,
      "grad_norm": 1.9765568971633911,
      "learning_rate": 9.257065292594375e-06,
      "loss": 0.0544,
      "step": 245500
    },
    {
      "epoch": 0.4017988649083875,
      "grad_norm": 1.3447808027267456,
      "learning_rate": 9.256999400380857e-06,
      "loss": 0.0463,
      "step": 245520
    },
    {
      "epoch": 0.4018315953470408,
      "grad_norm": 4.359926700592041,
      "learning_rate": 9.25693350816734e-06,
      "loss": 0.0334,
      "step": 245540
    },
    {
      "epoch": 0.4018643257856942,
      "grad_norm": 0.6686754822731018,
      "learning_rate": 9.256867615953824e-06,
      "loss": 0.0471,
      "step": 245560
    },
    {
      "epoch": 0.4018970562243475,
      "grad_norm": 1.615020751953125,
      "learning_rate": 9.256801723740306e-06,
      "loss": 0.0339,
      "step": 245580
    },
    {
      "epoch": 0.40192978666300083,
      "grad_norm": 4.814601898193359,
      "learning_rate": 9.25673583152679e-06,
      "loss": 0.0432,
      "step": 245600
    },
    {
      "epoch": 0.4019625171016542,
      "grad_norm": 0.7255545258522034,
      "learning_rate": 9.256669939313273e-06,
      "loss": 0.0367,
      "step": 245620
    },
    {
      "epoch": 0.4019952475403075,
      "grad_norm": 1.4863470792770386,
      "learning_rate": 9.256604047099755e-06,
      "loss": 0.0335,
      "step": 245640
    },
    {
      "epoch": 0.4020279779789609,
      "grad_norm": 2.036161184310913,
      "learning_rate": 9.256538154886238e-06,
      "loss": 0.0431,
      "step": 245660
    },
    {
      "epoch": 0.4020607084176142,
      "grad_norm": 2.511423349380493,
      "learning_rate": 9.256472262672722e-06,
      "loss": 0.0348,
      "step": 245680
    },
    {
      "epoch": 0.4020934388562675,
      "grad_norm": 0.9698283672332764,
      "learning_rate": 9.256406370459204e-06,
      "loss": 0.041,
      "step": 245700
    },
    {
      "epoch": 0.4021261692949209,
      "grad_norm": 2.4397809505462646,
      "learning_rate": 9.256340478245687e-06,
      "loss": 0.0543,
      "step": 245720
    },
    {
      "epoch": 0.4021588997335742,
      "grad_norm": 3.6970319747924805,
      "learning_rate": 9.25627458603217e-06,
      "loss": 0.0475,
      "step": 245740
    },
    {
      "epoch": 0.4021916301722276,
      "grad_norm": 3.0924341678619385,
      "learning_rate": 9.256208693818653e-06,
      "loss": 0.0398,
      "step": 245760
    },
    {
      "epoch": 0.4022243606108809,
      "grad_norm": 1.8373173475265503,
      "learning_rate": 9.256142801605135e-06,
      "loss": 0.0544,
      "step": 245780
    },
    {
      "epoch": 0.4022570910495342,
      "grad_norm": 0.9279187321662903,
      "learning_rate": 9.256076909391618e-06,
      "loss": 0.0401,
      "step": 245800
    },
    {
      "epoch": 0.4022898214881876,
      "grad_norm": 0.23890537023544312,
      "learning_rate": 9.2560110171781e-06,
      "loss": 0.029,
      "step": 245820
    },
    {
      "epoch": 0.4023225519268409,
      "grad_norm": 1.9802873134613037,
      "learning_rate": 9.255945124964584e-06,
      "loss": 0.0397,
      "step": 245840
    },
    {
      "epoch": 0.4023552823654943,
      "grad_norm": 1.0314457416534424,
      "learning_rate": 9.255879232751066e-06,
      "loss": 0.0399,
      "step": 245860
    },
    {
      "epoch": 0.4023880128041476,
      "grad_norm": 1.2972694635391235,
      "learning_rate": 9.25581334053755e-06,
      "loss": 0.0349,
      "step": 245880
    },
    {
      "epoch": 0.4024207432428009,
      "grad_norm": 0.7740322351455688,
      "learning_rate": 9.255747448324033e-06,
      "loss": 0.0407,
      "step": 245900
    },
    {
      "epoch": 0.4024534736814543,
      "grad_norm": 1.4167184829711914,
      "learning_rate": 9.255681556110515e-06,
      "loss": 0.0504,
      "step": 245920
    },
    {
      "epoch": 0.4024862041201076,
      "grad_norm": 1.6473830938339233,
      "learning_rate": 9.255615663896998e-06,
      "loss": 0.0398,
      "step": 245940
    },
    {
      "epoch": 0.402518934558761,
      "grad_norm": 3.1909615993499756,
      "learning_rate": 9.25554977168348e-06,
      "loss": 0.0534,
      "step": 245960
    },
    {
      "epoch": 0.4025516649974143,
      "grad_norm": 1.449249267578125,
      "learning_rate": 9.255483879469964e-06,
      "loss": 0.0524,
      "step": 245980
    },
    {
      "epoch": 0.4025843954360676,
      "grad_norm": 1.2443510293960571,
      "learning_rate": 9.255417987256446e-06,
      "loss": 0.0525,
      "step": 246000
    },
    {
      "epoch": 0.402617125874721,
      "grad_norm": 2.1028761863708496,
      "learning_rate": 9.25535209504293e-06,
      "loss": 0.0388,
      "step": 246020
    },
    {
      "epoch": 0.4026498563133743,
      "grad_norm": 0.7826230525970459,
      "learning_rate": 9.255286202829413e-06,
      "loss": 0.0314,
      "step": 246040
    },
    {
      "epoch": 0.40268258675202767,
      "grad_norm": 1.905097484588623,
      "learning_rate": 9.255220310615895e-06,
      "loss": 0.0502,
      "step": 246060
    },
    {
      "epoch": 0.402715317190681,
      "grad_norm": 0.8865349292755127,
      "learning_rate": 9.255154418402378e-06,
      "loss": 0.0364,
      "step": 246080
    },
    {
      "epoch": 0.4027480476293343,
      "grad_norm": 0.25721919536590576,
      "learning_rate": 9.255088526188862e-06,
      "loss": 0.0492,
      "step": 246100
    },
    {
      "epoch": 0.4027807780679877,
      "grad_norm": 1.073196530342102,
      "learning_rate": 9.255022633975344e-06,
      "loss": 0.0566,
      "step": 246120
    },
    {
      "epoch": 0.402813508506641,
      "grad_norm": 0.526698648929596,
      "learning_rate": 9.254956741761827e-06,
      "loss": 0.0446,
      "step": 246140
    },
    {
      "epoch": 0.40284623894529437,
      "grad_norm": 1.2289260625839233,
      "learning_rate": 9.25489084954831e-06,
      "loss": 0.0411,
      "step": 246160
    },
    {
      "epoch": 0.4028789693839477,
      "grad_norm": 2.300276756286621,
      "learning_rate": 9.254824957334793e-06,
      "loss": 0.0396,
      "step": 246180
    },
    {
      "epoch": 0.402911699822601,
      "grad_norm": 2.3438992500305176,
      "learning_rate": 9.254759065121275e-06,
      "loss": 0.0331,
      "step": 246200
    },
    {
      "epoch": 0.40294443026125437,
      "grad_norm": 2.3212978839874268,
      "learning_rate": 9.254693172907758e-06,
      "loss": 0.0325,
      "step": 246220
    },
    {
      "epoch": 0.4029771606999077,
      "grad_norm": 1.7579505443572998,
      "learning_rate": 9.254627280694242e-06,
      "loss": 0.0461,
      "step": 246240
    },
    {
      "epoch": 0.40300989113856106,
      "grad_norm": 1.498218297958374,
      "learning_rate": 9.254561388480724e-06,
      "loss": 0.0581,
      "step": 246260
    },
    {
      "epoch": 0.4030426215772144,
      "grad_norm": 1.6752482652664185,
      "learning_rate": 9.254495496267207e-06,
      "loss": 0.0375,
      "step": 246280
    },
    {
      "epoch": 0.4030753520158677,
      "grad_norm": 0.6267818212509155,
      "learning_rate": 9.25442960405369e-06,
      "loss": 0.0448,
      "step": 246300
    },
    {
      "epoch": 0.40310808245452107,
      "grad_norm": 1.7742314338684082,
      "learning_rate": 9.254363711840173e-06,
      "loss": 0.0572,
      "step": 246320
    },
    {
      "epoch": 0.4031408128931744,
      "grad_norm": 3.584028720855713,
      "learning_rate": 9.254297819626655e-06,
      "loss": 0.0433,
      "step": 246340
    },
    {
      "epoch": 0.40317354333182776,
      "grad_norm": 1.545845627784729,
      "learning_rate": 9.254231927413138e-06,
      "loss": 0.0462,
      "step": 246360
    },
    {
      "epoch": 0.40320627377048107,
      "grad_norm": 1.6713751554489136,
      "learning_rate": 9.25416603519962e-06,
      "loss": 0.0459,
      "step": 246380
    },
    {
      "epoch": 0.4032390042091344,
      "grad_norm": 0.7036022543907166,
      "learning_rate": 9.254100142986104e-06,
      "loss": 0.0369,
      "step": 246400
    },
    {
      "epoch": 0.40327173464778776,
      "grad_norm": 0.6574353575706482,
      "learning_rate": 9.254034250772587e-06,
      "loss": 0.0537,
      "step": 246420
    },
    {
      "epoch": 0.4033044650864411,
      "grad_norm": 1.5436530113220215,
      "learning_rate": 9.25396835855907e-06,
      "loss": 0.0464,
      "step": 246440
    },
    {
      "epoch": 0.40333719552509445,
      "grad_norm": 2.9648118019104004,
      "learning_rate": 9.253902466345553e-06,
      "loss": 0.0457,
      "step": 246460
    },
    {
      "epoch": 0.40336992596374777,
      "grad_norm": 2.272918939590454,
      "learning_rate": 9.253836574132036e-06,
      "loss": 0.0427,
      "step": 246480
    },
    {
      "epoch": 0.4034026564024011,
      "grad_norm": 0.661529541015625,
      "learning_rate": 9.253770681918518e-06,
      "loss": 0.0388,
      "step": 246500
    },
    {
      "epoch": 0.40343538684105446,
      "grad_norm": 1.0731524229049683,
      "learning_rate": 9.253704789705002e-06,
      "loss": 0.0322,
      "step": 246520
    },
    {
      "epoch": 0.40346811727970777,
      "grad_norm": 1.8606421947479248,
      "learning_rate": 9.253638897491484e-06,
      "loss": 0.0484,
      "step": 246540
    },
    {
      "epoch": 0.40350084771836114,
      "grad_norm": 1.9644840955734253,
      "learning_rate": 9.253573005277967e-06,
      "loss": 0.0488,
      "step": 246560
    },
    {
      "epoch": 0.40353357815701446,
      "grad_norm": 1.8252360820770264,
      "learning_rate": 9.25350711306445e-06,
      "loss": 0.0447,
      "step": 246580
    },
    {
      "epoch": 0.4035663085956678,
      "grad_norm": 9.254501342773438,
      "learning_rate": 9.253441220850933e-06,
      "loss": 0.0458,
      "step": 246600
    },
    {
      "epoch": 0.40359903903432115,
      "grad_norm": 2.1018168926239014,
      "learning_rate": 9.253375328637417e-06,
      "loss": 0.0459,
      "step": 246620
    },
    {
      "epoch": 0.40363176947297447,
      "grad_norm": 0.44321030378341675,
      "learning_rate": 9.253309436423898e-06,
      "loss": 0.0493,
      "step": 246640
    },
    {
      "epoch": 0.40366449991162784,
      "grad_norm": 2.8492321968078613,
      "learning_rate": 9.253243544210382e-06,
      "loss": 0.0376,
      "step": 246660
    },
    {
      "epoch": 0.40369723035028116,
      "grad_norm": 1.6951841115951538,
      "learning_rate": 9.253177651996864e-06,
      "loss": 0.0372,
      "step": 246680
    },
    {
      "epoch": 0.40372996078893447,
      "grad_norm": 2.1325175762176514,
      "learning_rate": 9.253111759783347e-06,
      "loss": 0.051,
      "step": 246700
    },
    {
      "epoch": 0.40376269122758784,
      "grad_norm": 2.5226528644561768,
      "learning_rate": 9.25304586756983e-06,
      "loss": 0.0488,
      "step": 246720
    },
    {
      "epoch": 0.40379542166624116,
      "grad_norm": 0.7238993644714355,
      "learning_rate": 9.252979975356313e-06,
      "loss": 0.0414,
      "step": 246740
    },
    {
      "epoch": 0.40382815210489453,
      "grad_norm": 0.8359217643737793,
      "learning_rate": 9.252914083142795e-06,
      "loss": 0.0498,
      "step": 246760
    },
    {
      "epoch": 0.40386088254354785,
      "grad_norm": 1.349731206893921,
      "learning_rate": 9.252848190929278e-06,
      "loss": 0.0375,
      "step": 246780
    },
    {
      "epoch": 0.40389361298220117,
      "grad_norm": 1.8925474882125854,
      "learning_rate": 9.252782298715762e-06,
      "loss": 0.0537,
      "step": 246800
    },
    {
      "epoch": 0.40392634342085454,
      "grad_norm": 0.8490273356437683,
      "learning_rate": 9.252716406502244e-06,
      "loss": 0.0434,
      "step": 246820
    },
    {
      "epoch": 0.40395907385950786,
      "grad_norm": 0.9565551280975342,
      "learning_rate": 9.252650514288727e-06,
      "loss": 0.0457,
      "step": 246840
    },
    {
      "epoch": 0.40399180429816123,
      "grad_norm": 0.33181774616241455,
      "learning_rate": 9.252584622075211e-06,
      "loss": 0.039,
      "step": 246860
    },
    {
      "epoch": 0.40402453473681454,
      "grad_norm": 0.5690715312957764,
      "learning_rate": 9.252518729861693e-06,
      "loss": 0.0558,
      "step": 246880
    },
    {
      "epoch": 0.40405726517546786,
      "grad_norm": 1.5274091958999634,
      "learning_rate": 9.252452837648177e-06,
      "loss": 0.0511,
      "step": 246900
    },
    {
      "epoch": 0.40408999561412123,
      "grad_norm": 0.4893495440483093,
      "learning_rate": 9.252386945434658e-06,
      "loss": 0.0532,
      "step": 246920
    },
    {
      "epoch": 0.40412272605277455,
      "grad_norm": 2.0743956565856934,
      "learning_rate": 9.252321053221142e-06,
      "loss": 0.0375,
      "step": 246940
    },
    {
      "epoch": 0.4041554564914279,
      "grad_norm": 0.9716597199440002,
      "learning_rate": 9.252255161007626e-06,
      "loss": 0.0294,
      "step": 246960
    },
    {
      "epoch": 0.40418818693008124,
      "grad_norm": 0.9514495134353638,
      "learning_rate": 9.252189268794108e-06,
      "loss": 0.0476,
      "step": 246980
    },
    {
      "epoch": 0.40422091736873456,
      "grad_norm": 1.4531491994857788,
      "learning_rate": 9.252123376580591e-06,
      "loss": 0.05,
      "step": 247000
    },
    {
      "epoch": 0.40425364780738793,
      "grad_norm": 5.063310146331787,
      "learning_rate": 9.252057484367073e-06,
      "loss": 0.0529,
      "step": 247020
    },
    {
      "epoch": 0.40428637824604124,
      "grad_norm": 0.5635960698127747,
      "learning_rate": 9.251991592153557e-06,
      "loss": 0.0543,
      "step": 247040
    },
    {
      "epoch": 0.4043191086846946,
      "grad_norm": 0.878400444984436,
      "learning_rate": 9.251925699940038e-06,
      "loss": 0.0441,
      "step": 247060
    },
    {
      "epoch": 0.40435183912334793,
      "grad_norm": 4.333266735076904,
      "learning_rate": 9.251859807726522e-06,
      "loss": 0.0665,
      "step": 247080
    },
    {
      "epoch": 0.40438456956200125,
      "grad_norm": 1.5763055086135864,
      "learning_rate": 9.251793915513004e-06,
      "loss": 0.0444,
      "step": 247100
    },
    {
      "epoch": 0.4044173000006546,
      "grad_norm": 4.953612804412842,
      "learning_rate": 9.251728023299488e-06,
      "loss": 0.0491,
      "step": 247120
    },
    {
      "epoch": 0.40445003043930794,
      "grad_norm": 3.588644027709961,
      "learning_rate": 9.25166213108597e-06,
      "loss": 0.0492,
      "step": 247140
    },
    {
      "epoch": 0.4044827608779613,
      "grad_norm": 3.421342372894287,
      "learning_rate": 9.251596238872453e-06,
      "loss": 0.0466,
      "step": 247160
    },
    {
      "epoch": 0.40451549131661463,
      "grad_norm": 2.620553970336914,
      "learning_rate": 9.251530346658935e-06,
      "loss": 0.0564,
      "step": 247180
    },
    {
      "epoch": 0.40454822175526794,
      "grad_norm": 2.9807846546173096,
      "learning_rate": 9.251464454445418e-06,
      "loss": 0.0498,
      "step": 247200
    },
    {
      "epoch": 0.4045809521939213,
      "grad_norm": 2.098379373550415,
      "learning_rate": 9.251398562231902e-06,
      "loss": 0.0449,
      "step": 247220
    },
    {
      "epoch": 0.40461368263257463,
      "grad_norm": 2.2477190494537354,
      "learning_rate": 9.251332670018384e-06,
      "loss": 0.0426,
      "step": 247240
    },
    {
      "epoch": 0.404646413071228,
      "grad_norm": 0.5254431962966919,
      "learning_rate": 9.251266777804868e-06,
      "loss": 0.0369,
      "step": 247260
    },
    {
      "epoch": 0.4046791435098813,
      "grad_norm": 0.5181785821914673,
      "learning_rate": 9.251200885591351e-06,
      "loss": 0.0317,
      "step": 247280
    },
    {
      "epoch": 0.40471187394853464,
      "grad_norm": 3.95216965675354,
      "learning_rate": 9.251134993377833e-06,
      "loss": 0.0371,
      "step": 247300
    },
    {
      "epoch": 0.404744604387188,
      "grad_norm": 3.8316683769226074,
      "learning_rate": 9.251069101164317e-06,
      "loss": 0.0386,
      "step": 247320
    },
    {
      "epoch": 0.40477733482584133,
      "grad_norm": 2.1400632858276367,
      "learning_rate": 9.2510032089508e-06,
      "loss": 0.0398,
      "step": 247340
    },
    {
      "epoch": 0.4048100652644947,
      "grad_norm": 3.6898319721221924,
      "learning_rate": 9.250937316737282e-06,
      "loss": 0.0411,
      "step": 247360
    },
    {
      "epoch": 0.404842795703148,
      "grad_norm": 1.860810399055481,
      "learning_rate": 9.250871424523766e-06,
      "loss": 0.0355,
      "step": 247380
    },
    {
      "epoch": 0.40487552614180133,
      "grad_norm": 3.4494824409484863,
      "learning_rate": 9.250805532310248e-06,
      "loss": 0.0483,
      "step": 247400
    },
    {
      "epoch": 0.4049082565804547,
      "grad_norm": 2.1177895069122314,
      "learning_rate": 9.250739640096731e-06,
      "loss": 0.043,
      "step": 247420
    },
    {
      "epoch": 0.404940987019108,
      "grad_norm": 1.103683352470398,
      "learning_rate": 9.250673747883213e-06,
      "loss": 0.0494,
      "step": 247440
    },
    {
      "epoch": 0.4049737174577614,
      "grad_norm": 1.65420401096344,
      "learning_rate": 9.250607855669697e-06,
      "loss": 0.0462,
      "step": 247460
    },
    {
      "epoch": 0.4050064478964147,
      "grad_norm": 1.1552356481552124,
      "learning_rate": 9.250541963456179e-06,
      "loss": 0.0258,
      "step": 247480
    },
    {
      "epoch": 0.40503917833506803,
      "grad_norm": 1.0244649648666382,
      "learning_rate": 9.250476071242662e-06,
      "loss": 0.0359,
      "step": 247500
    },
    {
      "epoch": 0.4050719087737214,
      "grad_norm": 1.109201431274414,
      "learning_rate": 9.250410179029144e-06,
      "loss": 0.0453,
      "step": 247520
    },
    {
      "epoch": 0.4051046392123747,
      "grad_norm": 1.0688329935073853,
      "learning_rate": 9.250344286815628e-06,
      "loss": 0.0513,
      "step": 247540
    },
    {
      "epoch": 0.4051373696510281,
      "grad_norm": 0.7837015390396118,
      "learning_rate": 9.25027839460211e-06,
      "loss": 0.0499,
      "step": 247560
    },
    {
      "epoch": 0.4051701000896814,
      "grad_norm": 2.2461814880371094,
      "learning_rate": 9.250212502388593e-06,
      "loss": 0.0475,
      "step": 247580
    },
    {
      "epoch": 0.4052028305283347,
      "grad_norm": 0.4885653257369995,
      "learning_rate": 9.250146610175077e-06,
      "loss": 0.038,
      "step": 247600
    },
    {
      "epoch": 0.4052355609669881,
      "grad_norm": 1.099688172340393,
      "learning_rate": 9.250080717961559e-06,
      "loss": 0.0313,
      "step": 247620
    },
    {
      "epoch": 0.4052682914056414,
      "grad_norm": 0.9684656858444214,
      "learning_rate": 9.250014825748042e-06,
      "loss": 0.0311,
      "step": 247640
    },
    {
      "epoch": 0.4053010218442948,
      "grad_norm": 1.5316545963287354,
      "learning_rate": 9.249948933534526e-06,
      "loss": 0.0447,
      "step": 247660
    },
    {
      "epoch": 0.4053337522829481,
      "grad_norm": 1.6289809942245483,
      "learning_rate": 9.249883041321008e-06,
      "loss": 0.044,
      "step": 247680
    },
    {
      "epoch": 0.4053664827216014,
      "grad_norm": 2.648313283920288,
      "learning_rate": 9.249817149107491e-06,
      "loss": 0.0662,
      "step": 247700
    },
    {
      "epoch": 0.4053992131602548,
      "grad_norm": 1.3174628019332886,
      "learning_rate": 9.249751256893975e-06,
      "loss": 0.0337,
      "step": 247720
    },
    {
      "epoch": 0.4054319435989081,
      "grad_norm": 0.18112726509571075,
      "learning_rate": 9.249685364680457e-06,
      "loss": 0.0289,
      "step": 247740
    },
    {
      "epoch": 0.4054646740375615,
      "grad_norm": 0.7127997279167175,
      "learning_rate": 9.24961947246694e-06,
      "loss": 0.0442,
      "step": 247760
    },
    {
      "epoch": 0.4054974044762148,
      "grad_norm": 2.4704084396362305,
      "learning_rate": 9.249553580253422e-06,
      "loss": 0.0369,
      "step": 247780
    },
    {
      "epoch": 0.4055301349148681,
      "grad_norm": 3.6480467319488525,
      "learning_rate": 9.249487688039906e-06,
      "loss": 0.042,
      "step": 247800
    },
    {
      "epoch": 0.4055628653535215,
      "grad_norm": 2.2517151832580566,
      "learning_rate": 9.249421795826388e-06,
      "loss": 0.0419,
      "step": 247820
    },
    {
      "epoch": 0.4055955957921748,
      "grad_norm": 1.1877720355987549,
      "learning_rate": 9.249355903612871e-06,
      "loss": 0.0409,
      "step": 247840
    },
    {
      "epoch": 0.4056283262308281,
      "grad_norm": 1.2340301275253296,
      "learning_rate": 9.249290011399353e-06,
      "loss": 0.0392,
      "step": 247860
    },
    {
      "epoch": 0.4056610566694815,
      "grad_norm": 3.2755911350250244,
      "learning_rate": 9.249224119185837e-06,
      "loss": 0.0624,
      "step": 247880
    },
    {
      "epoch": 0.4056937871081348,
      "grad_norm": 1.9758129119873047,
      "learning_rate": 9.249158226972319e-06,
      "loss": 0.0539,
      "step": 247900
    },
    {
      "epoch": 0.4057265175467882,
      "grad_norm": 0.7917603254318237,
      "learning_rate": 9.249092334758802e-06,
      "loss": 0.0439,
      "step": 247920
    },
    {
      "epoch": 0.4057592479854415,
      "grad_norm": 2.1825966835021973,
      "learning_rate": 9.249026442545284e-06,
      "loss": 0.0542,
      "step": 247940
    },
    {
      "epoch": 0.4057919784240948,
      "grad_norm": 0.863079309463501,
      "learning_rate": 9.248960550331768e-06,
      "loss": 0.04,
      "step": 247960
    },
    {
      "epoch": 0.4058247088627482,
      "grad_norm": 2.349370241165161,
      "learning_rate": 9.24889465811825e-06,
      "loss": 0.0363,
      "step": 247980
    },
    {
      "epoch": 0.4058574393014015,
      "grad_norm": 0.9831697344779968,
      "learning_rate": 9.248828765904733e-06,
      "loss": 0.0451,
      "step": 248000
    },
    {
      "epoch": 0.4058901697400549,
      "grad_norm": 3.813260316848755,
      "learning_rate": 9.248762873691217e-06,
      "loss": 0.0578,
      "step": 248020
    },
    {
      "epoch": 0.4059229001787082,
      "grad_norm": 0.5188937187194824,
      "learning_rate": 9.248696981477699e-06,
      "loss": 0.0459,
      "step": 248040
    },
    {
      "epoch": 0.4059556306173615,
      "grad_norm": 3.58951997756958,
      "learning_rate": 9.248631089264182e-06,
      "loss": 0.0373,
      "step": 248060
    },
    {
      "epoch": 0.4059883610560149,
      "grad_norm": 2.1609981060028076,
      "learning_rate": 9.248565197050666e-06,
      "loss": 0.056,
      "step": 248080
    },
    {
      "epoch": 0.4060210914946682,
      "grad_norm": 1.5698959827423096,
      "learning_rate": 9.248499304837148e-06,
      "loss": 0.0509,
      "step": 248100
    },
    {
      "epoch": 0.40605382193332157,
      "grad_norm": 0.9897040724754333,
      "learning_rate": 9.248433412623631e-06,
      "loss": 0.0451,
      "step": 248120
    },
    {
      "epoch": 0.4060865523719749,
      "grad_norm": 1.7042943239212036,
      "learning_rate": 9.248367520410115e-06,
      "loss": 0.0451,
      "step": 248140
    },
    {
      "epoch": 0.4061192828106282,
      "grad_norm": 1.7569549083709717,
      "learning_rate": 9.248301628196597e-06,
      "loss": 0.0445,
      "step": 248160
    },
    {
      "epoch": 0.4061520132492816,
      "grad_norm": 3.029695987701416,
      "learning_rate": 9.24823573598308e-06,
      "loss": 0.0504,
      "step": 248180
    },
    {
      "epoch": 0.4061847436879349,
      "grad_norm": 1.9281752109527588,
      "learning_rate": 9.248169843769562e-06,
      "loss": 0.0417,
      "step": 248200
    },
    {
      "epoch": 0.40621747412658826,
      "grad_norm": 0.4183305501937866,
      "learning_rate": 9.248103951556046e-06,
      "loss": 0.0531,
      "step": 248220
    },
    {
      "epoch": 0.4062502045652416,
      "grad_norm": 1.1642422676086426,
      "learning_rate": 9.248038059342528e-06,
      "loss": 0.0643,
      "step": 248240
    },
    {
      "epoch": 0.4062829350038949,
      "grad_norm": 1.4379832744598389,
      "learning_rate": 9.247972167129011e-06,
      "loss": 0.0289,
      "step": 248260
    },
    {
      "epoch": 0.40631566544254827,
      "grad_norm": 2.9501469135284424,
      "learning_rate": 9.247906274915493e-06,
      "loss": 0.0399,
      "step": 248280
    },
    {
      "epoch": 0.4063483958812016,
      "grad_norm": 3.1397297382354736,
      "learning_rate": 9.247840382701977e-06,
      "loss": 0.039,
      "step": 248300
    },
    {
      "epoch": 0.40638112631985496,
      "grad_norm": 1.3892903327941895,
      "learning_rate": 9.247774490488459e-06,
      "loss": 0.0342,
      "step": 248320
    },
    {
      "epoch": 0.4064138567585083,
      "grad_norm": 3.913773536682129,
      "learning_rate": 9.247708598274942e-06,
      "loss": 0.0368,
      "step": 248340
    },
    {
      "epoch": 0.4064465871971616,
      "grad_norm": 0.5104948282241821,
      "learning_rate": 9.247642706061426e-06,
      "loss": 0.0576,
      "step": 248360
    },
    {
      "epoch": 0.40647931763581496,
      "grad_norm": 1.1692836284637451,
      "learning_rate": 9.247576813847908e-06,
      "loss": 0.0418,
      "step": 248380
    },
    {
      "epoch": 0.4065120480744683,
      "grad_norm": 3.1208579540252686,
      "learning_rate": 9.247510921634391e-06,
      "loss": 0.0562,
      "step": 248400
    },
    {
      "epoch": 0.40654477851312165,
      "grad_norm": 1.2826673984527588,
      "learning_rate": 9.247445029420873e-06,
      "loss": 0.0481,
      "step": 248420
    },
    {
      "epoch": 0.40657750895177497,
      "grad_norm": 1.8706014156341553,
      "learning_rate": 9.247379137207357e-06,
      "loss": 0.0476,
      "step": 248440
    },
    {
      "epoch": 0.4066102393904283,
      "grad_norm": 0.8694700002670288,
      "learning_rate": 9.24731324499384e-06,
      "loss": 0.038,
      "step": 248460
    },
    {
      "epoch": 0.40664296982908166,
      "grad_norm": 0.9873691201210022,
      "learning_rate": 9.247247352780322e-06,
      "loss": 0.0398,
      "step": 248480
    },
    {
      "epoch": 0.406675700267735,
      "grad_norm": 1.2773600816726685,
      "learning_rate": 9.247181460566806e-06,
      "loss": 0.0389,
      "step": 248500
    },
    {
      "epoch": 0.40670843070638835,
      "grad_norm": 1.729327917098999,
      "learning_rate": 9.24711556835329e-06,
      "loss": 0.0446,
      "step": 248520
    },
    {
      "epoch": 0.40674116114504166,
      "grad_norm": 3.500767469406128,
      "learning_rate": 9.247049676139771e-06,
      "loss": 0.0352,
      "step": 248540
    },
    {
      "epoch": 0.406773891583695,
      "grad_norm": 5.953273773193359,
      "learning_rate": 9.246983783926255e-06,
      "loss": 0.0484,
      "step": 248560
    },
    {
      "epoch": 0.40680662202234835,
      "grad_norm": 2.7703356742858887,
      "learning_rate": 9.246917891712737e-06,
      "loss": 0.047,
      "step": 248580
    },
    {
      "epoch": 0.40683935246100167,
      "grad_norm": 1.0071203708648682,
      "learning_rate": 9.24685199949922e-06,
      "loss": 0.0329,
      "step": 248600
    },
    {
      "epoch": 0.40687208289965504,
      "grad_norm": 0.628131091594696,
      "learning_rate": 9.246786107285702e-06,
      "loss": 0.0407,
      "step": 248620
    },
    {
      "epoch": 0.40690481333830836,
      "grad_norm": 1.119911789894104,
      "learning_rate": 9.246720215072186e-06,
      "loss": 0.052,
      "step": 248640
    },
    {
      "epoch": 0.4069375437769617,
      "grad_norm": 0.8944388031959534,
      "learning_rate": 9.246654322858668e-06,
      "loss": 0.047,
      "step": 248660
    },
    {
      "epoch": 0.40697027421561505,
      "grad_norm": 1.9223843812942505,
      "learning_rate": 9.246588430645151e-06,
      "loss": 0.0519,
      "step": 248680
    },
    {
      "epoch": 0.40700300465426836,
      "grad_norm": 6.021173477172852,
      "learning_rate": 9.246522538431635e-06,
      "loss": 0.0415,
      "step": 248700
    },
    {
      "epoch": 0.40703573509292174,
      "grad_norm": 0.7805957198143005,
      "learning_rate": 9.246456646218117e-06,
      "loss": 0.0403,
      "step": 248720
    },
    {
      "epoch": 0.40706846553157505,
      "grad_norm": 1.008790135383606,
      "learning_rate": 9.2463907540046e-06,
      "loss": 0.0404,
      "step": 248740
    },
    {
      "epoch": 0.40710119597022837,
      "grad_norm": 1.0703216791152954,
      "learning_rate": 9.246324861791082e-06,
      "loss": 0.0475,
      "step": 248760
    },
    {
      "epoch": 0.40713392640888174,
      "grad_norm": 0.3482688069343567,
      "learning_rate": 9.246258969577566e-06,
      "loss": 0.0421,
      "step": 248780
    },
    {
      "epoch": 0.40716665684753506,
      "grad_norm": 3.8994698524475098,
      "learning_rate": 9.246193077364048e-06,
      "loss": 0.0376,
      "step": 248800
    },
    {
      "epoch": 0.40719938728618843,
      "grad_norm": 1.87943434715271,
      "learning_rate": 9.246127185150531e-06,
      "loss": 0.0409,
      "step": 248820
    },
    {
      "epoch": 0.40723211772484175,
      "grad_norm": 1.6437640190124512,
      "learning_rate": 9.246061292937013e-06,
      "loss": 0.0432,
      "step": 248840
    },
    {
      "epoch": 0.40726484816349506,
      "grad_norm": 0.3355359733104706,
      "learning_rate": 9.245995400723497e-06,
      "loss": 0.0496,
      "step": 248860
    },
    {
      "epoch": 0.40729757860214844,
      "grad_norm": 0.3152633011341095,
      "learning_rate": 9.24592950850998e-06,
      "loss": 0.0438,
      "step": 248880
    },
    {
      "epoch": 0.40733030904080175,
      "grad_norm": 2.573774814605713,
      "learning_rate": 9.245863616296462e-06,
      "loss": 0.047,
      "step": 248900
    },
    {
      "epoch": 0.4073630394794551,
      "grad_norm": 0.6894024610519409,
      "learning_rate": 9.245797724082946e-06,
      "loss": 0.0567,
      "step": 248920
    },
    {
      "epoch": 0.40739576991810844,
      "grad_norm": 0.4925884008407593,
      "learning_rate": 9.24573183186943e-06,
      "loss": 0.0497,
      "step": 248940
    },
    {
      "epoch": 0.40742850035676176,
      "grad_norm": 1.4303030967712402,
      "learning_rate": 9.245665939655911e-06,
      "loss": 0.0478,
      "step": 248960
    },
    {
      "epoch": 0.40746123079541513,
      "grad_norm": 1.6756579875946045,
      "learning_rate": 9.245600047442395e-06,
      "loss": 0.0292,
      "step": 248980
    },
    {
      "epoch": 0.40749396123406845,
      "grad_norm": 1.7055563926696777,
      "learning_rate": 9.245534155228877e-06,
      "loss": 0.0408,
      "step": 249000
    },
    {
      "epoch": 0.4075266916727218,
      "grad_norm": 0.8295144438743591,
      "learning_rate": 9.24546826301536e-06,
      "loss": 0.0388,
      "step": 249020
    },
    {
      "epoch": 0.40755942211137514,
      "grad_norm": 0.6695336103439331,
      "learning_rate": 9.245402370801842e-06,
      "loss": 0.0406,
      "step": 249040
    },
    {
      "epoch": 0.40759215255002845,
      "grad_norm": 1.0639574527740479,
      "learning_rate": 9.245336478588326e-06,
      "loss": 0.0462,
      "step": 249060
    },
    {
      "epoch": 0.4076248829886818,
      "grad_norm": 1.534115195274353,
      "learning_rate": 9.24527058637481e-06,
      "loss": 0.0395,
      "step": 249080
    },
    {
      "epoch": 0.40765761342733514,
      "grad_norm": 0.43095096945762634,
      "learning_rate": 9.245204694161291e-06,
      "loss": 0.0382,
      "step": 249100
    },
    {
      "epoch": 0.4076903438659885,
      "grad_norm": 3.4861302375793457,
      "learning_rate": 9.245138801947775e-06,
      "loss": 0.0664,
      "step": 249120
    },
    {
      "epoch": 0.40772307430464183,
      "grad_norm": 0.7236407995223999,
      "learning_rate": 9.245072909734257e-06,
      "loss": 0.047,
      "step": 249140
    },
    {
      "epoch": 0.40775580474329515,
      "grad_norm": 0.5588610172271729,
      "learning_rate": 9.24500701752074e-06,
      "loss": 0.0493,
      "step": 249160
    },
    {
      "epoch": 0.4077885351819485,
      "grad_norm": 0.6233925223350525,
      "learning_rate": 9.244941125307222e-06,
      "loss": 0.0493,
      "step": 249180
    },
    {
      "epoch": 0.40782126562060184,
      "grad_norm": 4.393259525299072,
      "learning_rate": 9.244875233093706e-06,
      "loss": 0.0509,
      "step": 249200
    },
    {
      "epoch": 0.4078539960592552,
      "grad_norm": 0.7489737272262573,
      "learning_rate": 9.244809340880188e-06,
      "loss": 0.0444,
      "step": 249220
    },
    {
      "epoch": 0.4078867264979085,
      "grad_norm": 0.770259439945221,
      "learning_rate": 9.244743448666672e-06,
      "loss": 0.044,
      "step": 249240
    },
    {
      "epoch": 0.40791945693656184,
      "grad_norm": 1.030672550201416,
      "learning_rate": 9.244677556453155e-06,
      "loss": 0.0488,
      "step": 249260
    },
    {
      "epoch": 0.4079521873752152,
      "grad_norm": 0.6169223785400391,
      "learning_rate": 9.244611664239637e-06,
      "loss": 0.0376,
      "step": 249280
    },
    {
      "epoch": 0.40798491781386853,
      "grad_norm": 1.0055654048919678,
      "learning_rate": 9.24454577202612e-06,
      "loss": 0.0302,
      "step": 249300
    },
    {
      "epoch": 0.4080176482525219,
      "grad_norm": 1.9758027791976929,
      "learning_rate": 9.244479879812604e-06,
      "loss": 0.0419,
      "step": 249320
    },
    {
      "epoch": 0.4080503786911752,
      "grad_norm": 1.5329598188400269,
      "learning_rate": 9.244413987599086e-06,
      "loss": 0.0559,
      "step": 249340
    },
    {
      "epoch": 0.40808310912982854,
      "grad_norm": 0.8945331573486328,
      "learning_rate": 9.24434809538557e-06,
      "loss": 0.0381,
      "step": 249360
    },
    {
      "epoch": 0.4081158395684819,
      "grad_norm": 1.556671380996704,
      "learning_rate": 9.244282203172052e-06,
      "loss": 0.046,
      "step": 249380
    },
    {
      "epoch": 0.4081485700071352,
      "grad_norm": 0.44628140330314636,
      "learning_rate": 9.244216310958535e-06,
      "loss": 0.057,
      "step": 249400
    },
    {
      "epoch": 0.4081813004457886,
      "grad_norm": 3.4963414669036865,
      "learning_rate": 9.244150418745019e-06,
      "loss": 0.045,
      "step": 249420
    },
    {
      "epoch": 0.4082140308844419,
      "grad_norm": 2.3285155296325684,
      "learning_rate": 9.2440845265315e-06,
      "loss": 0.0447,
      "step": 249440
    },
    {
      "epoch": 0.40824676132309523,
      "grad_norm": 0.8702280521392822,
      "learning_rate": 9.244018634317984e-06,
      "loss": 0.0403,
      "step": 249460
    },
    {
      "epoch": 0.4082794917617486,
      "grad_norm": 1.3889036178588867,
      "learning_rate": 9.243952742104466e-06,
      "loss": 0.0368,
      "step": 249480
    },
    {
      "epoch": 0.4083122222004019,
      "grad_norm": 2.1872904300689697,
      "learning_rate": 9.24388684989095e-06,
      "loss": 0.0443,
      "step": 249500
    },
    {
      "epoch": 0.4083449526390553,
      "grad_norm": 1.1501497030258179,
      "learning_rate": 9.243820957677432e-06,
      "loss": 0.062,
      "step": 249520
    },
    {
      "epoch": 0.4083776830777086,
      "grad_norm": 5.794909477233887,
      "learning_rate": 9.243755065463915e-06,
      "loss": 0.062,
      "step": 249540
    },
    {
      "epoch": 0.4084104135163619,
      "grad_norm": 3.3457727432250977,
      "learning_rate": 9.243689173250397e-06,
      "loss": 0.0587,
      "step": 249560
    },
    {
      "epoch": 0.4084431439550153,
      "grad_norm": 2.038708209991455,
      "learning_rate": 9.24362328103688e-06,
      "loss": 0.0504,
      "step": 249580
    },
    {
      "epoch": 0.4084758743936686,
      "grad_norm": 2.116240978240967,
      "learning_rate": 9.243557388823363e-06,
      "loss": 0.0519,
      "step": 249600
    },
    {
      "epoch": 0.408508604832322,
      "grad_norm": 1.371406078338623,
      "learning_rate": 9.243491496609846e-06,
      "loss": 0.0408,
      "step": 249620
    },
    {
      "epoch": 0.4085413352709753,
      "grad_norm": 0.9671628475189209,
      "learning_rate": 9.24342560439633e-06,
      "loss": 0.0399,
      "step": 249640
    },
    {
      "epoch": 0.4085740657096286,
      "grad_norm": 1.463164210319519,
      "learning_rate": 9.243359712182812e-06,
      "loss": 0.0413,
      "step": 249660
    },
    {
      "epoch": 0.408606796148282,
      "grad_norm": 0.5906134843826294,
      "learning_rate": 9.243293819969295e-06,
      "loss": 0.0566,
      "step": 249680
    },
    {
      "epoch": 0.4086395265869353,
      "grad_norm": 2.4977991580963135,
      "learning_rate": 9.243227927755779e-06,
      "loss": 0.0494,
      "step": 249700
    },
    {
      "epoch": 0.4086722570255887,
      "grad_norm": 1.5769938230514526,
      "learning_rate": 9.24316203554226e-06,
      "loss": 0.0413,
      "step": 249720
    },
    {
      "epoch": 0.408704987464242,
      "grad_norm": 0.9210249185562134,
      "learning_rate": 9.243096143328744e-06,
      "loss": 0.0346,
      "step": 249740
    },
    {
      "epoch": 0.4087377179028953,
      "grad_norm": 0.4636191129684448,
      "learning_rate": 9.243030251115228e-06,
      "loss": 0.0464,
      "step": 249760
    },
    {
      "epoch": 0.4087704483415487,
      "grad_norm": 3.3060529232025146,
      "learning_rate": 9.24296435890171e-06,
      "loss": 0.0462,
      "step": 249780
    },
    {
      "epoch": 0.408803178780202,
      "grad_norm": 1.1419954299926758,
      "learning_rate": 9.242898466688193e-06,
      "loss": 0.0402,
      "step": 249800
    },
    {
      "epoch": 0.4088359092188554,
      "grad_norm": 3.3702025413513184,
      "learning_rate": 9.242832574474675e-06,
      "loss": 0.0487,
      "step": 249820
    },
    {
      "epoch": 0.4088686396575087,
      "grad_norm": 2.3645074367523193,
      "learning_rate": 9.242766682261159e-06,
      "loss": 0.0637,
      "step": 249840
    },
    {
      "epoch": 0.408901370096162,
      "grad_norm": 2.0248074531555176,
      "learning_rate": 9.24270079004764e-06,
      "loss": 0.0419,
      "step": 249860
    },
    {
      "epoch": 0.4089341005348154,
      "grad_norm": 1.1920509338378906,
      "learning_rate": 9.242634897834124e-06,
      "loss": 0.0381,
      "step": 249880
    },
    {
      "epoch": 0.4089668309734687,
      "grad_norm": 0.7275077104568481,
      "learning_rate": 9.242569005620606e-06,
      "loss": 0.052,
      "step": 249900
    },
    {
      "epoch": 0.40899956141212207,
      "grad_norm": 1.4682908058166504,
      "learning_rate": 9.24250311340709e-06,
      "loss": 0.0545,
      "step": 249920
    },
    {
      "epoch": 0.4090322918507754,
      "grad_norm": 0.6139883399009705,
      "learning_rate": 9.242437221193572e-06,
      "loss": 0.0495,
      "step": 249940
    },
    {
      "epoch": 0.4090650222894287,
      "grad_norm": 2.752223491668701,
      "learning_rate": 9.242371328980055e-06,
      "loss": 0.0338,
      "step": 249960
    },
    {
      "epoch": 0.4090977527280821,
      "grad_norm": 5.499786376953125,
      "learning_rate": 9.242305436766537e-06,
      "loss": 0.0669,
      "step": 249980
    },
    {
      "epoch": 0.4091304831667354,
      "grad_norm": 1.304995059967041,
      "learning_rate": 9.24223954455302e-06,
      "loss": 0.0429,
      "step": 250000
    },
    {
      "epoch": 0.4091304831667354,
      "eval_loss": 0.022548284381628036,
      "eval_runtime": 6496.1131,
      "eval_samples_per_second": 158.227,
      "eval_steps_per_second": 15.823,
      "eval_sts-dev_pearson_cosine": 0.9459475520848367,
      "eval_sts-dev_spearman_cosine": 0.8727141145927279,
      "step": 250000
    },
    {
      "epoch": 0.40916321360538876,
      "grad_norm": 1.9508514404296875,
      "learning_rate": 9.242173652339503e-06,
      "loss": 0.0541,
      "step": 250020
    },
    {
      "epoch": 0.4091959440440421,
      "grad_norm": 0.9629577994346619,
      "learning_rate": 9.242107760125986e-06,
      "loss": 0.0357,
      "step": 250040
    },
    {
      "epoch": 0.4092286744826954,
      "grad_norm": 0.8773587942123413,
      "learning_rate": 9.24204186791247e-06,
      "loss": 0.0364,
      "step": 250060
    },
    {
      "epoch": 0.40926140492134877,
      "grad_norm": 0.7319911122322083,
      "learning_rate": 9.241975975698952e-06,
      "loss": 0.0343,
      "step": 250080
    },
    {
      "epoch": 0.4092941353600021,
      "grad_norm": 1.8514982461929321,
      "learning_rate": 9.241910083485435e-06,
      "loss": 0.0366,
      "step": 250100
    },
    {
      "epoch": 0.40932686579865546,
      "grad_norm": 1.938934326171875,
      "learning_rate": 9.241844191271919e-06,
      "loss": 0.036,
      "step": 250120
    },
    {
      "epoch": 0.4093595962373088,
      "grad_norm": 1.0241316556930542,
      "learning_rate": 9.2417782990584e-06,
      "loss": 0.0507,
      "step": 250140
    },
    {
      "epoch": 0.4093923266759621,
      "grad_norm": 1.27263605594635,
      "learning_rate": 9.241712406844884e-06,
      "loss": 0.0525,
      "step": 250160
    },
    {
      "epoch": 0.40942505711461546,
      "grad_norm": 0.7178109884262085,
      "learning_rate": 9.241646514631368e-06,
      "loss": 0.0513,
      "step": 250180
    },
    {
      "epoch": 0.4094577875532688,
      "grad_norm": 1.1280412673950195,
      "learning_rate": 9.24158062241785e-06,
      "loss": 0.0546,
      "step": 250200
    },
    {
      "epoch": 0.40949051799192215,
      "grad_norm": 5.209552764892578,
      "learning_rate": 9.241514730204333e-06,
      "loss": 0.0419,
      "step": 250220
    },
    {
      "epoch": 0.40952324843057547,
      "grad_norm": 1.6259244680404663,
      "learning_rate": 9.241448837990815e-06,
      "loss": 0.0464,
      "step": 250240
    },
    {
      "epoch": 0.4095559788692288,
      "grad_norm": 1.4938287734985352,
      "learning_rate": 9.241382945777299e-06,
      "loss": 0.0638,
      "step": 250260
    },
    {
      "epoch": 0.40958870930788216,
      "grad_norm": 1.4271968603134155,
      "learning_rate": 9.24131705356378e-06,
      "loss": 0.0445,
      "step": 250280
    },
    {
      "epoch": 0.4096214397465355,
      "grad_norm": 5.807096004486084,
      "learning_rate": 9.241251161350264e-06,
      "loss": 0.0545,
      "step": 250300
    },
    {
      "epoch": 0.40965417018518885,
      "grad_norm": 2.1183383464813232,
      "learning_rate": 9.241185269136746e-06,
      "loss": 0.04,
      "step": 250320
    },
    {
      "epoch": 0.40968690062384217,
      "grad_norm": 2.8791637420654297,
      "learning_rate": 9.24111937692323e-06,
      "loss": 0.0448,
      "step": 250340
    },
    {
      "epoch": 0.4097196310624955,
      "grad_norm": 2.412479877471924,
      "learning_rate": 9.241053484709712e-06,
      "loss": 0.0331,
      "step": 250360
    },
    {
      "epoch": 0.40975236150114885,
      "grad_norm": 2.8006815910339355,
      "learning_rate": 9.240987592496195e-06,
      "loss": 0.0422,
      "step": 250380
    },
    {
      "epoch": 0.40978509193980217,
      "grad_norm": 4.149197101593018,
      "learning_rate": 9.240921700282677e-06,
      "loss": 0.0446,
      "step": 250400
    },
    {
      "epoch": 0.40981782237845554,
      "grad_norm": 2.8148388862609863,
      "learning_rate": 9.24085580806916e-06,
      "loss": 0.042,
      "step": 250420
    },
    {
      "epoch": 0.40985055281710886,
      "grad_norm": 0.7912032008171082,
      "learning_rate": 9.240789915855644e-06,
      "loss": 0.0476,
      "step": 250440
    },
    {
      "epoch": 0.4098832832557622,
      "grad_norm": 2.8300557136535645,
      "learning_rate": 9.240724023642126e-06,
      "loss": 0.0425,
      "step": 250460
    },
    {
      "epoch": 0.40991601369441555,
      "grad_norm": 2.6792826652526855,
      "learning_rate": 9.24065813142861e-06,
      "loss": 0.0474,
      "step": 250480
    },
    {
      "epoch": 0.40994874413306887,
      "grad_norm": 0.9505677819252014,
      "learning_rate": 9.240592239215093e-06,
      "loss": 0.0385,
      "step": 250500
    },
    {
      "epoch": 0.40998147457172224,
      "grad_norm": 0.5459058880805969,
      "learning_rate": 9.240526347001575e-06,
      "loss": 0.037,
      "step": 250520
    },
    {
      "epoch": 0.41001420501037555,
      "grad_norm": 2.1524481773376465,
      "learning_rate": 9.240460454788059e-06,
      "loss": 0.0444,
      "step": 250540
    },
    {
      "epoch": 0.41004693544902887,
      "grad_norm": 0.5344428420066833,
      "learning_rate": 9.240394562574542e-06,
      "loss": 0.0374,
      "step": 250560
    },
    {
      "epoch": 0.41007966588768224,
      "grad_norm": 1.0777897834777832,
      "learning_rate": 9.240328670361024e-06,
      "loss": 0.0427,
      "step": 250580
    },
    {
      "epoch": 0.41011239632633556,
      "grad_norm": 0.35505619645118713,
      "learning_rate": 9.240262778147508e-06,
      "loss": 0.0328,
      "step": 250600
    },
    {
      "epoch": 0.41014512676498893,
      "grad_norm": 3.4040565490722656,
      "learning_rate": 9.24019688593399e-06,
      "loss": 0.0527,
      "step": 250620
    },
    {
      "epoch": 0.41017785720364225,
      "grad_norm": 0.8295028209686279,
      "learning_rate": 9.240130993720473e-06,
      "loss": 0.0637,
      "step": 250640
    },
    {
      "epoch": 0.41021058764229557,
      "grad_norm": 1.8757871389389038,
      "learning_rate": 9.240065101506955e-06,
      "loss": 0.0408,
      "step": 250660
    },
    {
      "epoch": 0.41024331808094894,
      "grad_norm": 0.4244605004787445,
      "learning_rate": 9.239999209293439e-06,
      "loss": 0.0349,
      "step": 250680
    },
    {
      "epoch": 0.41027604851960225,
      "grad_norm": 0.5786147117614746,
      "learning_rate": 9.23993331707992e-06,
      "loss": 0.0336,
      "step": 250700
    },
    {
      "epoch": 0.41030877895825557,
      "grad_norm": 1.3310816287994385,
      "learning_rate": 9.239867424866404e-06,
      "loss": 0.0513,
      "step": 250720
    },
    {
      "epoch": 0.41034150939690894,
      "grad_norm": 1.4282348155975342,
      "learning_rate": 9.239801532652886e-06,
      "loss": 0.0599,
      "step": 250740
    },
    {
      "epoch": 0.41037423983556226,
      "grad_norm": 0.6729655861854553,
      "learning_rate": 9.23973564043937e-06,
      "loss": 0.031,
      "step": 250760
    },
    {
      "epoch": 0.41040697027421563,
      "grad_norm": 2.1212780475616455,
      "learning_rate": 9.239669748225852e-06,
      "loss": 0.0377,
      "step": 250780
    },
    {
      "epoch": 0.41043970071286895,
      "grad_norm": 1.028940200805664,
      "learning_rate": 9.239603856012335e-06,
      "loss": 0.0398,
      "step": 250800
    },
    {
      "epoch": 0.41047243115152227,
      "grad_norm": 0.9515811204910278,
      "learning_rate": 9.239537963798819e-06,
      "loss": 0.0475,
      "step": 250820
    },
    {
      "epoch": 0.41050516159017564,
      "grad_norm": 1.6041291952133179,
      "learning_rate": 9.239472071585301e-06,
      "loss": 0.0389,
      "step": 250840
    },
    {
      "epoch": 0.41053789202882895,
      "grad_norm": 1.9372130632400513,
      "learning_rate": 9.239406179371784e-06,
      "loss": 0.0472,
      "step": 250860
    },
    {
      "epoch": 0.4105706224674823,
      "grad_norm": 1.75129234790802,
      "learning_rate": 9.239340287158266e-06,
      "loss": 0.0413,
      "step": 250880
    },
    {
      "epoch": 0.41060335290613564,
      "grad_norm": 0.9459596276283264,
      "learning_rate": 9.23927439494475e-06,
      "loss": 0.0376,
      "step": 250900
    },
    {
      "epoch": 0.41063608334478896,
      "grad_norm": 3.014800786972046,
      "learning_rate": 9.239208502731234e-06,
      "loss": 0.043,
      "step": 250920
    },
    {
      "epoch": 0.41066881378344233,
      "grad_norm": 2.0527570247650146,
      "learning_rate": 9.239142610517715e-06,
      "loss": 0.0557,
      "step": 250940
    },
    {
      "epoch": 0.41070154422209565,
      "grad_norm": 1.6495462656021118,
      "learning_rate": 9.239076718304199e-06,
      "loss": 0.0288,
      "step": 250960
    },
    {
      "epoch": 0.410734274660749,
      "grad_norm": 2.5989863872528076,
      "learning_rate": 9.239010826090683e-06,
      "loss": 0.0564,
      "step": 250980
    },
    {
      "epoch": 0.41076700509940234,
      "grad_norm": 0.9506586194038391,
      "learning_rate": 9.238944933877164e-06,
      "loss": 0.0386,
      "step": 251000
    },
    {
      "epoch": 0.41079973553805565,
      "grad_norm": 2.311612367630005,
      "learning_rate": 9.238879041663648e-06,
      "loss": 0.0369,
      "step": 251020
    },
    {
      "epoch": 0.410832465976709,
      "grad_norm": 1.939982533454895,
      "learning_rate": 9.23881314945013e-06,
      "loss": 0.0516,
      "step": 251040
    },
    {
      "epoch": 0.41086519641536234,
      "grad_norm": 1.6574333906173706,
      "learning_rate": 9.238747257236614e-06,
      "loss": 0.0516,
      "step": 251060
    },
    {
      "epoch": 0.4108979268540157,
      "grad_norm": 2.127923011779785,
      "learning_rate": 9.238681365023095e-06,
      "loss": 0.0553,
      "step": 251080
    },
    {
      "epoch": 0.41093065729266903,
      "grad_norm": 0.5358351469039917,
      "learning_rate": 9.238615472809579e-06,
      "loss": 0.0426,
      "step": 251100
    },
    {
      "epoch": 0.41096338773132235,
      "grad_norm": 0.7923257350921631,
      "learning_rate": 9.238549580596061e-06,
      "loss": 0.041,
      "step": 251120
    },
    {
      "epoch": 0.4109961181699757,
      "grad_norm": 2.3176794052124023,
      "learning_rate": 9.238483688382544e-06,
      "loss": 0.0439,
      "step": 251140
    },
    {
      "epoch": 0.41102884860862904,
      "grad_norm": 1.0961601734161377,
      "learning_rate": 9.238417796169028e-06,
      "loss": 0.0353,
      "step": 251160
    },
    {
      "epoch": 0.4110615790472824,
      "grad_norm": 2.357788562774658,
      "learning_rate": 9.23835190395551e-06,
      "loss": 0.0499,
      "step": 251180
    },
    {
      "epoch": 0.4110943094859357,
      "grad_norm": 1.749077558517456,
      "learning_rate": 9.238286011741994e-06,
      "loss": 0.0478,
      "step": 251200
    },
    {
      "epoch": 0.41112703992458904,
      "grad_norm": 1.7327829599380493,
      "learning_rate": 9.238220119528475e-06,
      "loss": 0.0387,
      "step": 251220
    },
    {
      "epoch": 0.4111597703632424,
      "grad_norm": 1.2742562294006348,
      "learning_rate": 9.238154227314959e-06,
      "loss": 0.0439,
      "step": 251240
    },
    {
      "epoch": 0.41119250080189573,
      "grad_norm": 2.380690336227417,
      "learning_rate": 9.238088335101441e-06,
      "loss": 0.0452,
      "step": 251260
    },
    {
      "epoch": 0.4112252312405491,
      "grad_norm": 3.332414150238037,
      "learning_rate": 9.238022442887925e-06,
      "loss": 0.0559,
      "step": 251280
    },
    {
      "epoch": 0.4112579616792024,
      "grad_norm": 2.7077574729919434,
      "learning_rate": 9.237956550674408e-06,
      "loss": 0.0415,
      "step": 251300
    },
    {
      "epoch": 0.41129069211785574,
      "grad_norm": 2.2925972938537598,
      "learning_rate": 9.23789065846089e-06,
      "loss": 0.0464,
      "step": 251320
    },
    {
      "epoch": 0.4113234225565091,
      "grad_norm": 1.0262799263000488,
      "learning_rate": 9.237824766247374e-06,
      "loss": 0.0364,
      "step": 251340
    },
    {
      "epoch": 0.4113561529951624,
      "grad_norm": 3.0521926879882812,
      "learning_rate": 9.237758874033857e-06,
      "loss": 0.0457,
      "step": 251360
    },
    {
      "epoch": 0.4113888834338158,
      "grad_norm": 0.7193456292152405,
      "learning_rate": 9.237692981820339e-06,
      "loss": 0.0406,
      "step": 251380
    },
    {
      "epoch": 0.4114216138724691,
      "grad_norm": 2.094693660736084,
      "learning_rate": 9.237627089606823e-06,
      "loss": 0.055,
      "step": 251400
    },
    {
      "epoch": 0.41145434431112243,
      "grad_norm": 0.26266536116600037,
      "learning_rate": 9.237561197393305e-06,
      "loss": 0.0373,
      "step": 251420
    },
    {
      "epoch": 0.4114870747497758,
      "grad_norm": 4.0401291847229,
      "learning_rate": 9.237495305179788e-06,
      "loss": 0.0456,
      "step": 251440
    },
    {
      "epoch": 0.4115198051884291,
      "grad_norm": 0.23706671595573425,
      "learning_rate": 9.23742941296627e-06,
      "loss": 0.0284,
      "step": 251460
    },
    {
      "epoch": 0.4115525356270825,
      "grad_norm": 2.8422980308532715,
      "learning_rate": 9.237363520752754e-06,
      "loss": 0.0382,
      "step": 251480
    },
    {
      "epoch": 0.4115852660657358,
      "grad_norm": 2.1490042209625244,
      "learning_rate": 9.237297628539235e-06,
      "loss": 0.0485,
      "step": 251500
    },
    {
      "epoch": 0.4116179965043891,
      "grad_norm": 0.7043893933296204,
      "learning_rate": 9.237231736325719e-06,
      "loss": 0.0405,
      "step": 251520
    },
    {
      "epoch": 0.4116507269430425,
      "grad_norm": 2.535893678665161,
      "learning_rate": 9.237165844112203e-06,
      "loss": 0.0485,
      "step": 251540
    },
    {
      "epoch": 0.4116834573816958,
      "grad_norm": 1.4641332626342773,
      "learning_rate": 9.237099951898685e-06,
      "loss": 0.046,
      "step": 251560
    },
    {
      "epoch": 0.4117161878203492,
      "grad_norm": 2.8053910732269287,
      "learning_rate": 9.237034059685168e-06,
      "loss": 0.0427,
      "step": 251580
    },
    {
      "epoch": 0.4117489182590025,
      "grad_norm": 2.21109676361084,
      "learning_rate": 9.23696816747165e-06,
      "loss": 0.0704,
      "step": 251600
    },
    {
      "epoch": 0.4117816486976558,
      "grad_norm": 0.9243322610855103,
      "learning_rate": 9.236902275258134e-06,
      "loss": 0.0616,
      "step": 251620
    },
    {
      "epoch": 0.4118143791363092,
      "grad_norm": 0.9422513246536255,
      "learning_rate": 9.236836383044616e-06,
      "loss": 0.0356,
      "step": 251640
    },
    {
      "epoch": 0.4118471095749625,
      "grad_norm": 1.9710890054702759,
      "learning_rate": 9.236770490831099e-06,
      "loss": 0.0383,
      "step": 251660
    },
    {
      "epoch": 0.4118798400136159,
      "grad_norm": 0.7890617251396179,
      "learning_rate": 9.236704598617581e-06,
      "loss": 0.0484,
      "step": 251680
    },
    {
      "epoch": 0.4119125704522692,
      "grad_norm": 1.550573706626892,
      "learning_rate": 9.236638706404065e-06,
      "loss": 0.0365,
      "step": 251700
    },
    {
      "epoch": 0.4119453008909225,
      "grad_norm": 4.984825611114502,
      "learning_rate": 9.236572814190548e-06,
      "loss": 0.0399,
      "step": 251720
    },
    {
      "epoch": 0.4119780313295759,
      "grad_norm": 1.9570140838623047,
      "learning_rate": 9.236506921977032e-06,
      "loss": 0.0501,
      "step": 251740
    },
    {
      "epoch": 0.4120107617682292,
      "grad_norm": 0.8644285202026367,
      "learning_rate": 9.236441029763514e-06,
      "loss": 0.0427,
      "step": 251760
    },
    {
      "epoch": 0.4120434922068826,
      "grad_norm": 0.7168620824813843,
      "learning_rate": 9.236375137549997e-06,
      "loss": 0.049,
      "step": 251780
    },
    {
      "epoch": 0.4120762226455359,
      "grad_norm": 2.3885791301727295,
      "learning_rate": 9.236309245336479e-06,
      "loss": 0.0412,
      "step": 251800
    },
    {
      "epoch": 0.4121089530841892,
      "grad_norm": 1.0395736694335938,
      "learning_rate": 9.236243353122963e-06,
      "loss": 0.0376,
      "step": 251820
    },
    {
      "epoch": 0.4121416835228426,
      "grad_norm": 1.5026121139526367,
      "learning_rate": 9.236177460909445e-06,
      "loss": 0.0511,
      "step": 251840
    },
    {
      "epoch": 0.4121744139614959,
      "grad_norm": 2.5054361820220947,
      "learning_rate": 9.236111568695928e-06,
      "loss": 0.0444,
      "step": 251860
    },
    {
      "epoch": 0.4122071444001493,
      "grad_norm": 0.7795582413673401,
      "learning_rate": 9.236045676482412e-06,
      "loss": 0.0439,
      "step": 251880
    },
    {
      "epoch": 0.4122398748388026,
      "grad_norm": 3.4694015979766846,
      "learning_rate": 9.235979784268894e-06,
      "loss": 0.0578,
      "step": 251900
    },
    {
      "epoch": 0.4122726052774559,
      "grad_norm": 0.23403304815292358,
      "learning_rate": 9.235913892055377e-06,
      "loss": 0.045,
      "step": 251920
    },
    {
      "epoch": 0.4123053357161093,
      "grad_norm": 1.5056871175765991,
      "learning_rate": 9.23584799984186e-06,
      "loss": 0.0463,
      "step": 251940
    },
    {
      "epoch": 0.4123380661547626,
      "grad_norm": 1.8902990818023682,
      "learning_rate": 9.235782107628343e-06,
      "loss": 0.0557,
      "step": 251960
    },
    {
      "epoch": 0.41237079659341597,
      "grad_norm": 1.2216309309005737,
      "learning_rate": 9.235716215414825e-06,
      "loss": 0.0527,
      "step": 251980
    },
    {
      "epoch": 0.4124035270320693,
      "grad_norm": 0.9147565960884094,
      "learning_rate": 9.235650323201308e-06,
      "loss": 0.0505,
      "step": 252000
    },
    {
      "epoch": 0.4124362574707226,
      "grad_norm": 1.7743737697601318,
      "learning_rate": 9.23558443098779e-06,
      "loss": 0.0415,
      "step": 252020
    },
    {
      "epoch": 0.412468987909376,
      "grad_norm": 3.666825532913208,
      "learning_rate": 9.235518538774274e-06,
      "loss": 0.0511,
      "step": 252040
    },
    {
      "epoch": 0.4125017183480293,
      "grad_norm": 1.1100221872329712,
      "learning_rate": 9.235452646560756e-06,
      "loss": 0.0438,
      "step": 252060
    },
    {
      "epoch": 0.41253444878668266,
      "grad_norm": 2.2231359481811523,
      "learning_rate": 9.23538675434724e-06,
      "loss": 0.0653,
      "step": 252080
    },
    {
      "epoch": 0.412567179225336,
      "grad_norm": 0.8226480484008789,
      "learning_rate": 9.235320862133723e-06,
      "loss": 0.0559,
      "step": 252100
    },
    {
      "epoch": 0.4125999096639893,
      "grad_norm": 2.586564540863037,
      "learning_rate": 9.235254969920205e-06,
      "loss": 0.0518,
      "step": 252120
    },
    {
      "epoch": 0.41263264010264267,
      "grad_norm": 1.371279001235962,
      "learning_rate": 9.235189077706688e-06,
      "loss": 0.0588,
      "step": 252140
    },
    {
      "epoch": 0.412665370541296,
      "grad_norm": 1.2547211647033691,
      "learning_rate": 9.235123185493172e-06,
      "loss": 0.0499,
      "step": 252160
    },
    {
      "epoch": 0.41269810097994936,
      "grad_norm": 1.4944732189178467,
      "learning_rate": 9.235057293279654e-06,
      "loss": 0.0495,
      "step": 252180
    },
    {
      "epoch": 0.4127308314186027,
      "grad_norm": 2.41973876953125,
      "learning_rate": 9.234991401066137e-06,
      "loss": 0.0475,
      "step": 252200
    },
    {
      "epoch": 0.412763561857256,
      "grad_norm": 1.079485535621643,
      "learning_rate": 9.234925508852621e-06,
      "loss": 0.0395,
      "step": 252220
    },
    {
      "epoch": 0.41279629229590936,
      "grad_norm": 12.771942138671875,
      "learning_rate": 9.234859616639103e-06,
      "loss": 0.0424,
      "step": 252240
    },
    {
      "epoch": 0.4128290227345627,
      "grad_norm": 1.4397473335266113,
      "learning_rate": 9.234793724425586e-06,
      "loss": 0.042,
      "step": 252260
    },
    {
      "epoch": 0.41286175317321605,
      "grad_norm": 1.9528666734695435,
      "learning_rate": 9.234727832212068e-06,
      "loss": 0.0554,
      "step": 252280
    },
    {
      "epoch": 0.41289448361186937,
      "grad_norm": 0.9353030323982239,
      "learning_rate": 9.234661939998552e-06,
      "loss": 0.0384,
      "step": 252300
    },
    {
      "epoch": 0.4129272140505227,
      "grad_norm": 1.7010128498077393,
      "learning_rate": 9.234596047785034e-06,
      "loss": 0.0435,
      "step": 252320
    },
    {
      "epoch": 0.41295994448917606,
      "grad_norm": 2.2302896976470947,
      "learning_rate": 9.234530155571517e-06,
      "loss": 0.0502,
      "step": 252340
    },
    {
      "epoch": 0.4129926749278294,
      "grad_norm": 1.5405263900756836,
      "learning_rate": 9.234464263358e-06,
      "loss": 0.049,
      "step": 252360
    },
    {
      "epoch": 0.41302540536648275,
      "grad_norm": 1.3783687353134155,
      "learning_rate": 9.234398371144483e-06,
      "loss": 0.0336,
      "step": 252380
    },
    {
      "epoch": 0.41305813580513606,
      "grad_norm": 1.1818292140960693,
      "learning_rate": 9.234332478930965e-06,
      "loss": 0.0539,
      "step": 252400
    },
    {
      "epoch": 0.4130908662437894,
      "grad_norm": 1.3006677627563477,
      "learning_rate": 9.234266586717448e-06,
      "loss": 0.0599,
      "step": 252420
    },
    {
      "epoch": 0.41312359668244275,
      "grad_norm": 0.46524083614349365,
      "learning_rate": 9.23420069450393e-06,
      "loss": 0.04,
      "step": 252440
    },
    {
      "epoch": 0.41315632712109607,
      "grad_norm": 0.9160014390945435,
      "learning_rate": 9.234134802290414e-06,
      "loss": 0.0463,
      "step": 252460
    },
    {
      "epoch": 0.41318905755974944,
      "grad_norm": 1.491392731666565,
      "learning_rate": 9.234068910076897e-06,
      "loss": 0.0459,
      "step": 252480
    },
    {
      "epoch": 0.41322178799840276,
      "grad_norm": 1.453912377357483,
      "learning_rate": 9.23400301786338e-06,
      "loss": 0.0285,
      "step": 252500
    },
    {
      "epoch": 0.4132545184370561,
      "grad_norm": 0.3844643533229828,
      "learning_rate": 9.233937125649863e-06,
      "loss": 0.0586,
      "step": 252520
    },
    {
      "epoch": 0.41328724887570945,
      "grad_norm": 2.672161340713501,
      "learning_rate": 9.233871233436346e-06,
      "loss": 0.0482,
      "step": 252540
    },
    {
      "epoch": 0.41331997931436276,
      "grad_norm": 1.3927580118179321,
      "learning_rate": 9.233805341222828e-06,
      "loss": 0.0392,
      "step": 252560
    },
    {
      "epoch": 0.41335270975301613,
      "grad_norm": 2.098726511001587,
      "learning_rate": 9.233739449009312e-06,
      "loss": 0.062,
      "step": 252580
    },
    {
      "epoch": 0.41338544019166945,
      "grad_norm": 0.9125306606292725,
      "learning_rate": 9.233673556795795e-06,
      "loss": 0.0475,
      "step": 252600
    },
    {
      "epoch": 0.41341817063032277,
      "grad_norm": 1.482164740562439,
      "learning_rate": 9.233607664582277e-06,
      "loss": 0.0435,
      "step": 252620
    },
    {
      "epoch": 0.41345090106897614,
      "grad_norm": 2.5898725986480713,
      "learning_rate": 9.233541772368761e-06,
      "loss": 0.0422,
      "step": 252640
    },
    {
      "epoch": 0.41348363150762946,
      "grad_norm": 0.7339158654212952,
      "learning_rate": 9.233475880155243e-06,
      "loss": 0.0408,
      "step": 252660
    },
    {
      "epoch": 0.41351636194628283,
      "grad_norm": 1.1738457679748535,
      "learning_rate": 9.233409987941726e-06,
      "loss": 0.0412,
      "step": 252680
    },
    {
      "epoch": 0.41354909238493615,
      "grad_norm": 4.209043979644775,
      "learning_rate": 9.233344095728208e-06,
      "loss": 0.0414,
      "step": 252700
    },
    {
      "epoch": 0.41358182282358946,
      "grad_norm": 1.796380877494812,
      "learning_rate": 9.233278203514692e-06,
      "loss": 0.0496,
      "step": 252720
    },
    {
      "epoch": 0.41361455326224283,
      "grad_norm": 0.9489835500717163,
      "learning_rate": 9.233212311301174e-06,
      "loss": 0.0513,
      "step": 252740
    },
    {
      "epoch": 0.41364728370089615,
      "grad_norm": 2.3174991607666016,
      "learning_rate": 9.233146419087657e-06,
      "loss": 0.0596,
      "step": 252760
    },
    {
      "epoch": 0.4136800141395495,
      "grad_norm": 0.4948346018791199,
      "learning_rate": 9.23308052687414e-06,
      "loss": 0.0354,
      "step": 252780
    },
    {
      "epoch": 0.41371274457820284,
      "grad_norm": 0.9974362850189209,
      "learning_rate": 9.233014634660623e-06,
      "loss": 0.0515,
      "step": 252800
    },
    {
      "epoch": 0.41374547501685616,
      "grad_norm": 0.42126980423927307,
      "learning_rate": 9.232948742447105e-06,
      "loss": 0.0308,
      "step": 252820
    },
    {
      "epoch": 0.41377820545550953,
      "grad_norm": 0.683165431022644,
      "learning_rate": 9.232882850233588e-06,
      "loss": 0.0494,
      "step": 252840
    },
    {
      "epoch": 0.41381093589416285,
      "grad_norm": 1.2462387084960938,
      "learning_rate": 9.23281695802007e-06,
      "loss": 0.0447,
      "step": 252860
    },
    {
      "epoch": 0.4138436663328162,
      "grad_norm": 1.1534830331802368,
      "learning_rate": 9.232751065806554e-06,
      "loss": 0.0438,
      "step": 252880
    },
    {
      "epoch": 0.41387639677146953,
      "grad_norm": 1.1444933414459229,
      "learning_rate": 9.232685173593037e-06,
      "loss": 0.0478,
      "step": 252900
    },
    {
      "epoch": 0.41390912721012285,
      "grad_norm": 0.5504673719406128,
      "learning_rate": 9.23261928137952e-06,
      "loss": 0.0446,
      "step": 252920
    },
    {
      "epoch": 0.4139418576487762,
      "grad_norm": 1.1050816774368286,
      "learning_rate": 9.232553389166003e-06,
      "loss": 0.0499,
      "step": 252940
    },
    {
      "epoch": 0.41397458808742954,
      "grad_norm": 2.362304449081421,
      "learning_rate": 9.232487496952487e-06,
      "loss": 0.0558,
      "step": 252960
    },
    {
      "epoch": 0.4140073185260829,
      "grad_norm": 1.0634351968765259,
      "learning_rate": 9.232421604738968e-06,
      "loss": 0.0437,
      "step": 252980
    },
    {
      "epoch": 0.41404004896473623,
      "grad_norm": 3.7767832279205322,
      "learning_rate": 9.232355712525452e-06,
      "loss": 0.0464,
      "step": 253000
    },
    {
      "epoch": 0.41407277940338955,
      "grad_norm": 1.4220035076141357,
      "learning_rate": 9.232289820311936e-06,
      "loss": 0.0421,
      "step": 253020
    },
    {
      "epoch": 0.4141055098420429,
      "grad_norm": 4.696931838989258,
      "learning_rate": 9.232223928098417e-06,
      "loss": 0.0527,
      "step": 253040
    },
    {
      "epoch": 0.41413824028069623,
      "grad_norm": 1.581866979598999,
      "learning_rate": 9.232158035884901e-06,
      "loss": 0.0474,
      "step": 253060
    },
    {
      "epoch": 0.4141709707193496,
      "grad_norm": 2.152444839477539,
      "learning_rate": 9.232092143671383e-06,
      "loss": 0.0459,
      "step": 253080
    },
    {
      "epoch": 0.4142037011580029,
      "grad_norm": 1.8514593839645386,
      "learning_rate": 9.232026251457867e-06,
      "loss": 0.0567,
      "step": 253100
    },
    {
      "epoch": 0.41423643159665624,
      "grad_norm": 0.9506243467330933,
      "learning_rate": 9.231960359244348e-06,
      "loss": 0.038,
      "step": 253120
    },
    {
      "epoch": 0.4142691620353096,
      "grad_norm": 1.345456838607788,
      "learning_rate": 9.231894467030832e-06,
      "loss": 0.0474,
      "step": 253140
    },
    {
      "epoch": 0.41430189247396293,
      "grad_norm": 0.8327076435089111,
      "learning_rate": 9.231828574817314e-06,
      "loss": 0.0416,
      "step": 253160
    },
    {
      "epoch": 0.4143346229126163,
      "grad_norm": 0.2926196753978729,
      "learning_rate": 9.231762682603797e-06,
      "loss": 0.0468,
      "step": 253180
    },
    {
      "epoch": 0.4143673533512696,
      "grad_norm": 0.9021556973457336,
      "learning_rate": 9.23169679039028e-06,
      "loss": 0.0427,
      "step": 253200
    },
    {
      "epoch": 0.41440008378992294,
      "grad_norm": 1.3559468984603882,
      "learning_rate": 9.231630898176763e-06,
      "loss": 0.043,
      "step": 253220
    },
    {
      "epoch": 0.4144328142285763,
      "grad_norm": 0.6851988434791565,
      "learning_rate": 9.231565005963245e-06,
      "loss": 0.0479,
      "step": 253240
    },
    {
      "epoch": 0.4144655446672296,
      "grad_norm": 0.5794975757598877,
      "learning_rate": 9.231499113749728e-06,
      "loss": 0.0468,
      "step": 253260
    },
    {
      "epoch": 0.414498275105883,
      "grad_norm": 1.62412691116333,
      "learning_rate": 9.231433221536212e-06,
      "loss": 0.0472,
      "step": 253280
    },
    {
      "epoch": 0.4145310055445363,
      "grad_norm": 2.042844772338867,
      "learning_rate": 9.231367329322694e-06,
      "loss": 0.0582,
      "step": 253300
    },
    {
      "epoch": 0.41456373598318963,
      "grad_norm": 0.9340193867683411,
      "learning_rate": 9.231301437109178e-06,
      "loss": 0.0486,
      "step": 253320
    },
    {
      "epoch": 0.414596466421843,
      "grad_norm": 8.369351387023926,
      "learning_rate": 9.231235544895661e-06,
      "loss": 0.059,
      "step": 253340
    },
    {
      "epoch": 0.4146291968604963,
      "grad_norm": 2.0994598865509033,
      "learning_rate": 9.231169652682143e-06,
      "loss": 0.0364,
      "step": 253360
    },
    {
      "epoch": 0.4146619272991497,
      "grad_norm": 2.32845139503479,
      "learning_rate": 9.231103760468627e-06,
      "loss": 0.049,
      "step": 253380
    },
    {
      "epoch": 0.414694657737803,
      "grad_norm": 1.8960033655166626,
      "learning_rate": 9.23103786825511e-06,
      "loss": 0.0407,
      "step": 253400
    },
    {
      "epoch": 0.4147273881764563,
      "grad_norm": 2.964306354522705,
      "learning_rate": 9.230971976041592e-06,
      "loss": 0.0561,
      "step": 253420
    },
    {
      "epoch": 0.4147601186151097,
      "grad_norm": 0.1779787391424179,
      "learning_rate": 9.230906083828076e-06,
      "loss": 0.0435,
      "step": 253440
    },
    {
      "epoch": 0.414792849053763,
      "grad_norm": 1.9125128984451294,
      "learning_rate": 9.230840191614558e-06,
      "loss": 0.0575,
      "step": 253460
    },
    {
      "epoch": 0.41482557949241633,
      "grad_norm": 1.8591101169586182,
      "learning_rate": 9.230774299401041e-06,
      "loss": 0.0585,
      "step": 253480
    },
    {
      "epoch": 0.4148583099310697,
      "grad_norm": 1.218131422996521,
      "learning_rate": 9.230708407187523e-06,
      "loss": 0.0379,
      "step": 253500
    },
    {
      "epoch": 0.414891040369723,
      "grad_norm": 8.228353500366211,
      "learning_rate": 9.230642514974007e-06,
      "loss": 0.0457,
      "step": 253520
    },
    {
      "epoch": 0.4149237708083764,
      "grad_norm": 0.9483508467674255,
      "learning_rate": 9.230576622760489e-06,
      "loss": 0.0353,
      "step": 253540
    },
    {
      "epoch": 0.4149565012470297,
      "grad_norm": 1.4931418895721436,
      "learning_rate": 9.230510730546972e-06,
      "loss": 0.0505,
      "step": 253560
    },
    {
      "epoch": 0.414989231685683,
      "grad_norm": 2.4146156311035156,
      "learning_rate": 9.230444838333454e-06,
      "loss": 0.0324,
      "step": 253580
    },
    {
      "epoch": 0.4150219621243364,
      "grad_norm": 1.5770723819732666,
      "learning_rate": 9.230378946119938e-06,
      "loss": 0.0567,
      "step": 253600
    },
    {
      "epoch": 0.4150546925629897,
      "grad_norm": 0.45744457840919495,
      "learning_rate": 9.230313053906421e-06,
      "loss": 0.0455,
      "step": 253620
    },
    {
      "epoch": 0.4150874230016431,
      "grad_norm": 1.2479429244995117,
      "learning_rate": 9.230247161692903e-06,
      "loss": 0.036,
      "step": 253640
    },
    {
      "epoch": 0.4151201534402964,
      "grad_norm": 3.8698644638061523,
      "learning_rate": 9.230181269479387e-06,
      "loss": 0.0507,
      "step": 253660
    },
    {
      "epoch": 0.4151528838789497,
      "grad_norm": 0.5043141841888428,
      "learning_rate": 9.230115377265869e-06,
      "loss": 0.0359,
      "step": 253680
    },
    {
      "epoch": 0.4151856143176031,
      "grad_norm": 2.2186031341552734,
      "learning_rate": 9.230049485052352e-06,
      "loss": 0.0427,
      "step": 253700
    },
    {
      "epoch": 0.4152183447562564,
      "grad_norm": 2.0911433696746826,
      "learning_rate": 9.229983592838834e-06,
      "loss": 0.0451,
      "step": 253720
    },
    {
      "epoch": 0.4152510751949098,
      "grad_norm": 2.3033194541931152,
      "learning_rate": 9.229917700625318e-06,
      "loss": 0.0393,
      "step": 253740
    },
    {
      "epoch": 0.4152838056335631,
      "grad_norm": 2.2485954761505127,
      "learning_rate": 9.229851808411801e-06,
      "loss": 0.0452,
      "step": 253760
    },
    {
      "epoch": 0.4153165360722164,
      "grad_norm": 1.5624396800994873,
      "learning_rate": 9.229785916198283e-06,
      "loss": 0.0402,
      "step": 253780
    },
    {
      "epoch": 0.4153492665108698,
      "grad_norm": 0.7777231931686401,
      "learning_rate": 9.229720023984767e-06,
      "loss": 0.0511,
      "step": 253800
    },
    {
      "epoch": 0.4153819969495231,
      "grad_norm": 6.00596809387207,
      "learning_rate": 9.22965413177125e-06,
      "loss": 0.0471,
      "step": 253820
    },
    {
      "epoch": 0.4154147273881765,
      "grad_norm": 2.8971211910247803,
      "learning_rate": 9.229588239557732e-06,
      "loss": 0.0534,
      "step": 253840
    },
    {
      "epoch": 0.4154474578268298,
      "grad_norm": 0.9396077394485474,
      "learning_rate": 9.229522347344216e-06,
      "loss": 0.0571,
      "step": 253860
    },
    {
      "epoch": 0.4154801882654831,
      "grad_norm": 2.2843244075775146,
      "learning_rate": 9.229456455130698e-06,
      "loss": 0.0292,
      "step": 253880
    },
    {
      "epoch": 0.4155129187041365,
      "grad_norm": 0.9904575943946838,
      "learning_rate": 9.229390562917181e-06,
      "loss": 0.0428,
      "step": 253900
    },
    {
      "epoch": 0.4155456491427898,
      "grad_norm": 1.5285786390304565,
      "learning_rate": 9.229324670703663e-06,
      "loss": 0.0485,
      "step": 253920
    },
    {
      "epoch": 0.41557837958144317,
      "grad_norm": 3.0539627075195312,
      "learning_rate": 9.229258778490147e-06,
      "loss": 0.0462,
      "step": 253940
    },
    {
      "epoch": 0.4156111100200965,
      "grad_norm": 2.9027132987976074,
      "learning_rate": 9.229192886276629e-06,
      "loss": 0.0388,
      "step": 253960
    },
    {
      "epoch": 0.4156438404587498,
      "grad_norm": 2.5443835258483887,
      "learning_rate": 9.229126994063112e-06,
      "loss": 0.0344,
      "step": 253980
    },
    {
      "epoch": 0.4156765708974032,
      "grad_norm": 2.8205161094665527,
      "learning_rate": 9.229061101849596e-06,
      "loss": 0.0468,
      "step": 254000
    },
    {
      "epoch": 0.4157093013360565,
      "grad_norm": 2.2859325408935547,
      "learning_rate": 9.228995209636078e-06,
      "loss": 0.0432,
      "step": 254020
    },
    {
      "epoch": 0.41574203177470986,
      "grad_norm": 5.597476482391357,
      "learning_rate": 9.228929317422561e-06,
      "loss": 0.0484,
      "step": 254040
    },
    {
      "epoch": 0.4157747622133632,
      "grad_norm": 4.279721260070801,
      "learning_rate": 9.228863425209043e-06,
      "loss": 0.0382,
      "step": 254060
    },
    {
      "epoch": 0.4158074926520165,
      "grad_norm": 1.1661635637283325,
      "learning_rate": 9.228797532995527e-06,
      "loss": 0.0398,
      "step": 254080
    },
    {
      "epoch": 0.41584022309066987,
      "grad_norm": 2.20729923248291,
      "learning_rate": 9.228731640782009e-06,
      "loss": 0.0516,
      "step": 254100
    },
    {
      "epoch": 0.4158729535293232,
      "grad_norm": 1.3249067068099976,
      "learning_rate": 9.228665748568492e-06,
      "loss": 0.0329,
      "step": 254120
    },
    {
      "epoch": 0.41590568396797656,
      "grad_norm": 1.8172709941864014,
      "learning_rate": 9.228599856354976e-06,
      "loss": 0.0506,
      "step": 254140
    },
    {
      "epoch": 0.4159384144066299,
      "grad_norm": 1.669434905052185,
      "learning_rate": 9.228533964141458e-06,
      "loss": 0.0382,
      "step": 254160
    },
    {
      "epoch": 0.4159711448452832,
      "grad_norm": 1.5235068798065186,
      "learning_rate": 9.228468071927941e-06,
      "loss": 0.0497,
      "step": 254180
    },
    {
      "epoch": 0.41600387528393656,
      "grad_norm": 2.336737871170044,
      "learning_rate": 9.228402179714425e-06,
      "loss": 0.0456,
      "step": 254200
    },
    {
      "epoch": 0.4160366057225899,
      "grad_norm": 1.821639060974121,
      "learning_rate": 9.228336287500907e-06,
      "loss": 0.0446,
      "step": 254220
    },
    {
      "epoch": 0.41606933616124325,
      "grad_norm": 4.115572452545166,
      "learning_rate": 9.22827039528739e-06,
      "loss": 0.0487,
      "step": 254240
    },
    {
      "epoch": 0.41610206659989657,
      "grad_norm": 3.433027505874634,
      "learning_rate": 9.228204503073872e-06,
      "loss": 0.0508,
      "step": 254260
    },
    {
      "epoch": 0.4161347970385499,
      "grad_norm": 1.7662639617919922,
      "learning_rate": 9.228138610860356e-06,
      "loss": 0.0452,
      "step": 254280
    },
    {
      "epoch": 0.41616752747720326,
      "grad_norm": 1.3696171045303345,
      "learning_rate": 9.228072718646838e-06,
      "loss": 0.0418,
      "step": 254300
    },
    {
      "epoch": 0.4162002579158566,
      "grad_norm": 1.8323774337768555,
      "learning_rate": 9.228006826433321e-06,
      "loss": 0.0469,
      "step": 254320
    },
    {
      "epoch": 0.41623298835450995,
      "grad_norm": 3.3623576164245605,
      "learning_rate": 9.227940934219805e-06,
      "loss": 0.0436,
      "step": 254340
    },
    {
      "epoch": 0.41626571879316326,
      "grad_norm": 1.8403955698013306,
      "learning_rate": 9.227875042006287e-06,
      "loss": 0.0419,
      "step": 254360
    },
    {
      "epoch": 0.4162984492318166,
      "grad_norm": 0.8925825953483582,
      "learning_rate": 9.22780914979277e-06,
      "loss": 0.0498,
      "step": 254380
    },
    {
      "epoch": 0.41633117967046995,
      "grad_norm": 1.0216399431228638,
      "learning_rate": 9.227743257579252e-06,
      "loss": 0.0404,
      "step": 254400
    },
    {
      "epoch": 0.41636391010912327,
      "grad_norm": 0.7880818247795105,
      "learning_rate": 9.227677365365736e-06,
      "loss": 0.0486,
      "step": 254420
    },
    {
      "epoch": 0.41639664054777664,
      "grad_norm": 1.4149268865585327,
      "learning_rate": 9.227611473152218e-06,
      "loss": 0.0383,
      "step": 254440
    },
    {
      "epoch": 0.41642937098642996,
      "grad_norm": 0.4008333086967468,
      "learning_rate": 9.227545580938701e-06,
      "loss": 0.0458,
      "step": 254460
    },
    {
      "epoch": 0.4164621014250833,
      "grad_norm": 0.9887388348579407,
      "learning_rate": 9.227479688725183e-06,
      "loss": 0.0336,
      "step": 254480
    },
    {
      "epoch": 0.41649483186373665,
      "grad_norm": 1.2557635307312012,
      "learning_rate": 9.227413796511667e-06,
      "loss": 0.0391,
      "step": 254500
    },
    {
      "epoch": 0.41652756230238996,
      "grad_norm": 0.4836178719997406,
      "learning_rate": 9.22734790429815e-06,
      "loss": 0.0393,
      "step": 254520
    },
    {
      "epoch": 0.41656029274104334,
      "grad_norm": 2.482884168624878,
      "learning_rate": 9.227282012084632e-06,
      "loss": 0.0463,
      "step": 254540
    },
    {
      "epoch": 0.41659302317969665,
      "grad_norm": 7.386944770812988,
      "learning_rate": 9.227216119871116e-06,
      "loss": 0.058,
      "step": 254560
    },
    {
      "epoch": 0.41662575361834997,
      "grad_norm": 1.8239697217941284,
      "learning_rate": 9.2271502276576e-06,
      "loss": 0.0503,
      "step": 254580
    },
    {
      "epoch": 0.41665848405700334,
      "grad_norm": 2.1633565425872803,
      "learning_rate": 9.227084335444081e-06,
      "loss": 0.0397,
      "step": 254600
    },
    {
      "epoch": 0.41669121449565666,
      "grad_norm": 2.1960597038269043,
      "learning_rate": 9.227018443230565e-06,
      "loss": 0.0514,
      "step": 254620
    },
    {
      "epoch": 0.41672394493431003,
      "grad_norm": 0.872573733329773,
      "learning_rate": 9.226952551017047e-06,
      "loss": 0.0455,
      "step": 254640
    },
    {
      "epoch": 0.41675667537296335,
      "grad_norm": 1.9298179149627686,
      "learning_rate": 9.22688665880353e-06,
      "loss": 0.0433,
      "step": 254660
    },
    {
      "epoch": 0.41678940581161666,
      "grad_norm": 2.8940987586975098,
      "learning_rate": 9.226820766590014e-06,
      "loss": 0.0491,
      "step": 254680
    },
    {
      "epoch": 0.41682213625027004,
      "grad_norm": 1.0231122970581055,
      "learning_rate": 9.226754874376496e-06,
      "loss": 0.0532,
      "step": 254700
    },
    {
      "epoch": 0.41685486668892335,
      "grad_norm": 1.7940319776535034,
      "learning_rate": 9.22668898216298e-06,
      "loss": 0.0528,
      "step": 254720
    },
    {
      "epoch": 0.4168875971275767,
      "grad_norm": 1.9516607522964478,
      "learning_rate": 9.226623089949461e-06,
      "loss": 0.0551,
      "step": 254740
    },
    {
      "epoch": 0.41692032756623004,
      "grad_norm": 0.8165837526321411,
      "learning_rate": 9.226557197735945e-06,
      "loss": 0.0464,
      "step": 254760
    },
    {
      "epoch": 0.41695305800488336,
      "grad_norm": 1.09158194065094,
      "learning_rate": 9.226491305522427e-06,
      "loss": 0.0353,
      "step": 254780
    },
    {
      "epoch": 0.41698578844353673,
      "grad_norm": 0.6190826892852783,
      "learning_rate": 9.22642541330891e-06,
      "loss": 0.0469,
      "step": 254800
    },
    {
      "epoch": 0.41701851888219005,
      "grad_norm": 0.5894678235054016,
      "learning_rate": 9.226359521095392e-06,
      "loss": 0.0474,
      "step": 254820
    },
    {
      "epoch": 0.4170512493208434,
      "grad_norm": 1.733628273010254,
      "learning_rate": 9.226293628881876e-06,
      "loss": 0.0406,
      "step": 254840
    },
    {
      "epoch": 0.41708397975949674,
      "grad_norm": 0.42991113662719727,
      "learning_rate": 9.226227736668358e-06,
      "loss": 0.0393,
      "step": 254860
    },
    {
      "epoch": 0.41711671019815005,
      "grad_norm": 1.1819723844528198,
      "learning_rate": 9.226161844454841e-06,
      "loss": 0.0459,
      "step": 254880
    },
    {
      "epoch": 0.4171494406368034,
      "grad_norm": 2.674865245819092,
      "learning_rate": 9.226095952241323e-06,
      "loss": 0.039,
      "step": 254900
    },
    {
      "epoch": 0.41718217107545674,
      "grad_norm": 1.2911525964736938,
      "learning_rate": 9.226030060027807e-06,
      "loss": 0.0496,
      "step": 254920
    },
    {
      "epoch": 0.4172149015141101,
      "grad_norm": 5.429457187652588,
      "learning_rate": 9.22596416781429e-06,
      "loss": 0.041,
      "step": 254940
    },
    {
      "epoch": 0.41724763195276343,
      "grad_norm": 1.8715972900390625,
      "learning_rate": 9.225898275600772e-06,
      "loss": 0.0385,
      "step": 254960
    },
    {
      "epoch": 0.41728036239141675,
      "grad_norm": 0.8660235404968262,
      "learning_rate": 9.225832383387256e-06,
      "loss": 0.0424,
      "step": 254980
    },
    {
      "epoch": 0.4173130928300701,
      "grad_norm": 1.3980984687805176,
      "learning_rate": 9.22576649117374e-06,
      "loss": 0.0431,
      "step": 255000
    },
    {
      "epoch": 0.41734582326872344,
      "grad_norm": 2.532057046890259,
      "learning_rate": 9.225700598960221e-06,
      "loss": 0.0441,
      "step": 255020
    },
    {
      "epoch": 0.4173785537073768,
      "grad_norm": 4.091770172119141,
      "learning_rate": 9.225634706746705e-06,
      "loss": 0.0318,
      "step": 255040
    },
    {
      "epoch": 0.4174112841460301,
      "grad_norm": 1.2156331539154053,
      "learning_rate": 9.225568814533189e-06,
      "loss": 0.0428,
      "step": 255060
    },
    {
      "epoch": 0.41744401458468344,
      "grad_norm": 1.2617970705032349,
      "learning_rate": 9.22550292231967e-06,
      "loss": 0.0424,
      "step": 255080
    },
    {
      "epoch": 0.4174767450233368,
      "grad_norm": 0.47644928097724915,
      "learning_rate": 9.225437030106154e-06,
      "loss": 0.0403,
      "step": 255100
    },
    {
      "epoch": 0.41750947546199013,
      "grad_norm": 1.5211211442947388,
      "learning_rate": 9.225371137892636e-06,
      "loss": 0.0524,
      "step": 255120
    },
    {
      "epoch": 0.4175422059006435,
      "grad_norm": 2.035236120223999,
      "learning_rate": 9.22530524567912e-06,
      "loss": 0.0503,
      "step": 255140
    },
    {
      "epoch": 0.4175749363392968,
      "grad_norm": 1.2215741872787476,
      "learning_rate": 9.225239353465601e-06,
      "loss": 0.045,
      "step": 255160
    },
    {
      "epoch": 0.41760766677795014,
      "grad_norm": 0.7838522791862488,
      "learning_rate": 9.225173461252085e-06,
      "loss": 0.0404,
      "step": 255180
    },
    {
      "epoch": 0.4176403972166035,
      "grad_norm": 4.114278793334961,
      "learning_rate": 9.225107569038567e-06,
      "loss": 0.05,
      "step": 255200
    },
    {
      "epoch": 0.4176731276552568,
      "grad_norm": 3.9757893085479736,
      "learning_rate": 9.22504167682505e-06,
      "loss": 0.0506,
      "step": 255220
    },
    {
      "epoch": 0.4177058580939102,
      "grad_norm": 2.320141553878784,
      "learning_rate": 9.224975784611532e-06,
      "loss": 0.0561,
      "step": 255240
    },
    {
      "epoch": 0.4177385885325635,
      "grad_norm": 0.8514189124107361,
      "learning_rate": 9.224909892398016e-06,
      "loss": 0.0511,
      "step": 255260
    },
    {
      "epoch": 0.41777131897121683,
      "grad_norm": 0.9182587265968323,
      "learning_rate": 9.224844000184498e-06,
      "loss": 0.0433,
      "step": 255280
    },
    {
      "epoch": 0.4178040494098702,
      "grad_norm": 3.6297152042388916,
      "learning_rate": 9.224778107970981e-06,
      "loss": 0.0353,
      "step": 255300
    },
    {
      "epoch": 0.4178367798485235,
      "grad_norm": 1.0852056741714478,
      "learning_rate": 9.224712215757465e-06,
      "loss": 0.0412,
      "step": 255320
    },
    {
      "epoch": 0.4178695102871769,
      "grad_norm": 1.0407767295837402,
      "learning_rate": 9.224646323543947e-06,
      "loss": 0.0469,
      "step": 255340
    },
    {
      "epoch": 0.4179022407258302,
      "grad_norm": 1.024457573890686,
      "learning_rate": 9.22458043133043e-06,
      "loss": 0.0557,
      "step": 255360
    },
    {
      "epoch": 0.4179349711644835,
      "grad_norm": 0.5206118226051331,
      "learning_rate": 9.224514539116914e-06,
      "loss": 0.0443,
      "step": 255380
    },
    {
      "epoch": 0.4179677016031369,
      "grad_norm": 1.5527913570404053,
      "learning_rate": 9.224448646903396e-06,
      "loss": 0.0436,
      "step": 255400
    },
    {
      "epoch": 0.4180004320417902,
      "grad_norm": 2.4736294746398926,
      "learning_rate": 9.22438275468988e-06,
      "loss": 0.0408,
      "step": 255420
    },
    {
      "epoch": 0.4180331624804436,
      "grad_norm": 1.7179499864578247,
      "learning_rate": 9.224316862476363e-06,
      "loss": 0.0499,
      "step": 255440
    },
    {
      "epoch": 0.4180658929190969,
      "grad_norm": 2.4089348316192627,
      "learning_rate": 9.224250970262845e-06,
      "loss": 0.0469,
      "step": 255460
    },
    {
      "epoch": 0.4180986233577502,
      "grad_norm": 1.427016019821167,
      "learning_rate": 9.224185078049329e-06,
      "loss": 0.0435,
      "step": 255480
    },
    {
      "epoch": 0.4181313537964036,
      "grad_norm": 1.5231815576553345,
      "learning_rate": 9.22411918583581e-06,
      "loss": 0.0455,
      "step": 255500
    },
    {
      "epoch": 0.4181640842350569,
      "grad_norm": 1.0671228170394897,
      "learning_rate": 9.224053293622294e-06,
      "loss": 0.0435,
      "step": 255520
    },
    {
      "epoch": 0.4181968146737103,
      "grad_norm": 2.3112025260925293,
      "learning_rate": 9.223987401408776e-06,
      "loss": 0.0631,
      "step": 255540
    },
    {
      "epoch": 0.4182295451123636,
      "grad_norm": 2.3344149589538574,
      "learning_rate": 9.22392150919526e-06,
      "loss": 0.042,
      "step": 255560
    },
    {
      "epoch": 0.4182622755510169,
      "grad_norm": 1.7289586067199707,
      "learning_rate": 9.223855616981742e-06,
      "loss": 0.0406,
      "step": 255580
    },
    {
      "epoch": 0.4182950059896703,
      "grad_norm": 0.635361909866333,
      "learning_rate": 9.223789724768225e-06,
      "loss": 0.0477,
      "step": 255600
    },
    {
      "epoch": 0.4183277364283236,
      "grad_norm": 3.03861927986145,
      "learning_rate": 9.223723832554707e-06,
      "loss": 0.0459,
      "step": 255620
    },
    {
      "epoch": 0.418360466866977,
      "grad_norm": 3.8333003520965576,
      "learning_rate": 9.22365794034119e-06,
      "loss": 0.05,
      "step": 255640
    },
    {
      "epoch": 0.4183931973056303,
      "grad_norm": 4.333985805511475,
      "learning_rate": 9.223592048127672e-06,
      "loss": 0.0473,
      "step": 255660
    },
    {
      "epoch": 0.4184259277442836,
      "grad_norm": 1.8344659805297852,
      "learning_rate": 9.223526155914156e-06,
      "loss": 0.0535,
      "step": 255680
    },
    {
      "epoch": 0.418458658182937,
      "grad_norm": 2.610046863555908,
      "learning_rate": 9.223460263700638e-06,
      "loss": 0.0528,
      "step": 255700
    },
    {
      "epoch": 0.4184913886215903,
      "grad_norm": 1.3400285243988037,
      "learning_rate": 9.223394371487122e-06,
      "loss": 0.0288,
      "step": 255720
    },
    {
      "epoch": 0.41852411906024367,
      "grad_norm": 0.2559327185153961,
      "learning_rate": 9.223328479273605e-06,
      "loss": 0.058,
      "step": 255740
    },
    {
      "epoch": 0.418556849498897,
      "grad_norm": 1.7660503387451172,
      "learning_rate": 9.223262587060087e-06,
      "loss": 0.0546,
      "step": 255760
    },
    {
      "epoch": 0.4185895799375503,
      "grad_norm": 2.6933369636535645,
      "learning_rate": 9.22319669484657e-06,
      "loss": 0.055,
      "step": 255780
    },
    {
      "epoch": 0.4186223103762037,
      "grad_norm": 1.8969030380249023,
      "learning_rate": 9.223130802633054e-06,
      "loss": 0.0367,
      "step": 255800
    },
    {
      "epoch": 0.418655040814857,
      "grad_norm": 1.1695070266723633,
      "learning_rate": 9.223064910419536e-06,
      "loss": 0.0389,
      "step": 255820
    },
    {
      "epoch": 0.41868777125351037,
      "grad_norm": 2.685093402862549,
      "learning_rate": 9.22299901820602e-06,
      "loss": 0.0414,
      "step": 255840
    },
    {
      "epoch": 0.4187205016921637,
      "grad_norm": 1.5090467929840088,
      "learning_rate": 9.222933125992503e-06,
      "loss": 0.0437,
      "step": 255860
    },
    {
      "epoch": 0.418753232130817,
      "grad_norm": 4.199524879455566,
      "learning_rate": 9.222867233778985e-06,
      "loss": 0.0492,
      "step": 255880
    },
    {
      "epoch": 0.41878596256947037,
      "grad_norm": 1.9116538763046265,
      "learning_rate": 9.222801341565469e-06,
      "loss": 0.0412,
      "step": 255900
    },
    {
      "epoch": 0.4188186930081237,
      "grad_norm": 1.2314131259918213,
      "learning_rate": 9.22273544935195e-06,
      "loss": 0.0388,
      "step": 255920
    },
    {
      "epoch": 0.41885142344677706,
      "grad_norm": 1.1084133386611938,
      "learning_rate": 9.222669557138434e-06,
      "loss": 0.0445,
      "step": 255940
    },
    {
      "epoch": 0.4188841538854304,
      "grad_norm": 1.0089609622955322,
      "learning_rate": 9.222603664924916e-06,
      "loss": 0.0321,
      "step": 255960
    },
    {
      "epoch": 0.4189168843240837,
      "grad_norm": 1.8119686841964722,
      "learning_rate": 9.2225377727114e-06,
      "loss": 0.0499,
      "step": 255980
    },
    {
      "epoch": 0.41894961476273707,
      "grad_norm": 0.9036187529563904,
      "learning_rate": 9.222471880497882e-06,
      "loss": 0.0432,
      "step": 256000
    },
    {
      "epoch": 0.4189823452013904,
      "grad_norm": 1.697182297706604,
      "learning_rate": 9.222405988284365e-06,
      "loss": 0.0393,
      "step": 256020
    },
    {
      "epoch": 0.41901507564004375,
      "grad_norm": 1.981282114982605,
      "learning_rate": 9.222340096070847e-06,
      "loss": 0.0357,
      "step": 256040
    },
    {
      "epoch": 0.41904780607869707,
      "grad_norm": 1.5615143775939941,
      "learning_rate": 9.22227420385733e-06,
      "loss": 0.0632,
      "step": 256060
    },
    {
      "epoch": 0.4190805365173504,
      "grad_norm": 0.44771260023117065,
      "learning_rate": 9.222208311643814e-06,
      "loss": 0.0375,
      "step": 256080
    },
    {
      "epoch": 0.41911326695600376,
      "grad_norm": 1.1155517101287842,
      "learning_rate": 9.222142419430296e-06,
      "loss": 0.0419,
      "step": 256100
    },
    {
      "epoch": 0.4191459973946571,
      "grad_norm": 0.8748801946640015,
      "learning_rate": 9.22207652721678e-06,
      "loss": 0.0537,
      "step": 256120
    },
    {
      "epoch": 0.41917872783331045,
      "grad_norm": 1.0023596286773682,
      "learning_rate": 9.222010635003262e-06,
      "loss": 0.0557,
      "step": 256140
    },
    {
      "epoch": 0.41921145827196377,
      "grad_norm": 0.899109423160553,
      "learning_rate": 9.221944742789745e-06,
      "loss": 0.0469,
      "step": 256160
    },
    {
      "epoch": 0.4192441887106171,
      "grad_norm": 1.6224534511566162,
      "learning_rate": 9.221878850576229e-06,
      "loss": 0.0378,
      "step": 256180
    },
    {
      "epoch": 0.41927691914927046,
      "grad_norm": 0.4039962887763977,
      "learning_rate": 9.22181295836271e-06,
      "loss": 0.0344,
      "step": 256200
    },
    {
      "epoch": 0.41930964958792377,
      "grad_norm": 1.857738733291626,
      "learning_rate": 9.221747066149194e-06,
      "loss": 0.0466,
      "step": 256220
    },
    {
      "epoch": 0.4193423800265771,
      "grad_norm": 2.2137951850891113,
      "learning_rate": 9.221681173935678e-06,
      "loss": 0.0424,
      "step": 256240
    },
    {
      "epoch": 0.41937511046523046,
      "grad_norm": 1.0615113973617554,
      "learning_rate": 9.22161528172216e-06,
      "loss": 0.032,
      "step": 256260
    },
    {
      "epoch": 0.4194078409038838,
      "grad_norm": 0.9167525172233582,
      "learning_rate": 9.221549389508643e-06,
      "loss": 0.043,
      "step": 256280
    },
    {
      "epoch": 0.41944057134253715,
      "grad_norm": 1.8761894702911377,
      "learning_rate": 9.221483497295125e-06,
      "loss": 0.0392,
      "step": 256300
    },
    {
      "epoch": 0.41947330178119047,
      "grad_norm": 3.162259817123413,
      "learning_rate": 9.221417605081609e-06,
      "loss": 0.0405,
      "step": 256320
    },
    {
      "epoch": 0.4195060322198438,
      "grad_norm": 1.8401081562042236,
      "learning_rate": 9.22135171286809e-06,
      "loss": 0.0469,
      "step": 256340
    },
    {
      "epoch": 0.41953876265849716,
      "grad_norm": 2.7612452507019043,
      "learning_rate": 9.221285820654574e-06,
      "loss": 0.0548,
      "step": 256360
    },
    {
      "epoch": 0.41957149309715047,
      "grad_norm": 1.0050357580184937,
      "learning_rate": 9.221219928441056e-06,
      "loss": 0.0416,
      "step": 256380
    },
    {
      "epoch": 0.41960422353580384,
      "grad_norm": 2.5795934200286865,
      "learning_rate": 9.22115403622754e-06,
      "loss": 0.0342,
      "step": 256400
    },
    {
      "epoch": 0.41963695397445716,
      "grad_norm": 1.9613357782363892,
      "learning_rate": 9.221088144014022e-06,
      "loss": 0.0501,
      "step": 256420
    },
    {
      "epoch": 0.4196696844131105,
      "grad_norm": 1.0603443384170532,
      "learning_rate": 9.221022251800505e-06,
      "loss": 0.0376,
      "step": 256440
    },
    {
      "epoch": 0.41970241485176385,
      "grad_norm": 2.0246834754943848,
      "learning_rate": 9.220956359586989e-06,
      "loss": 0.0537,
      "step": 256460
    },
    {
      "epoch": 0.41973514529041717,
      "grad_norm": 1.0614858865737915,
      "learning_rate": 9.22089046737347e-06,
      "loss": 0.0352,
      "step": 256480
    },
    {
      "epoch": 0.41976787572907054,
      "grad_norm": 2.5726213455200195,
      "learning_rate": 9.220824575159954e-06,
      "loss": 0.0644,
      "step": 256500
    },
    {
      "epoch": 0.41980060616772386,
      "grad_norm": 0.6849545836448669,
      "learning_rate": 9.220758682946436e-06,
      "loss": 0.0488,
      "step": 256520
    },
    {
      "epoch": 0.41983333660637717,
      "grad_norm": 1.6116094589233398,
      "learning_rate": 9.22069279073292e-06,
      "loss": 0.0526,
      "step": 256540
    },
    {
      "epoch": 0.41986606704503054,
      "grad_norm": 0.9614681005477905,
      "learning_rate": 9.220626898519402e-06,
      "loss": 0.0479,
      "step": 256560
    },
    {
      "epoch": 0.41989879748368386,
      "grad_norm": 1.1988950967788696,
      "learning_rate": 9.220561006305885e-06,
      "loss": 0.0502,
      "step": 256580
    },
    {
      "epoch": 0.41993152792233723,
      "grad_norm": 6.887413501739502,
      "learning_rate": 9.220495114092369e-06,
      "loss": 0.0393,
      "step": 256600
    },
    {
      "epoch": 0.41996425836099055,
      "grad_norm": 0.7465015053749084,
      "learning_rate": 9.22042922187885e-06,
      "loss": 0.0585,
      "step": 256620
    },
    {
      "epoch": 0.41999698879964387,
      "grad_norm": 1.5180208683013916,
      "learning_rate": 9.220363329665334e-06,
      "loss": 0.0473,
      "step": 256640
    },
    {
      "epoch": 0.42002971923829724,
      "grad_norm": 2.8860604763031006,
      "learning_rate": 9.220297437451818e-06,
      "loss": 0.0509,
      "step": 256660
    },
    {
      "epoch": 0.42006244967695056,
      "grad_norm": 2.8310272693634033,
      "learning_rate": 9.2202315452383e-06,
      "loss": 0.0608,
      "step": 256680
    },
    {
      "epoch": 0.42009518011560393,
      "grad_norm": 1.0251474380493164,
      "learning_rate": 9.220165653024783e-06,
      "loss": 0.0373,
      "step": 256700
    },
    {
      "epoch": 0.42012791055425724,
      "grad_norm": 1.746262550354004,
      "learning_rate": 9.220099760811265e-06,
      "loss": 0.0446,
      "step": 256720
    },
    {
      "epoch": 0.42016064099291056,
      "grad_norm": 1.5796730518341064,
      "learning_rate": 9.220033868597749e-06,
      "loss": 0.0453,
      "step": 256740
    },
    {
      "epoch": 0.42019337143156393,
      "grad_norm": 1.9424861669540405,
      "learning_rate": 9.21996797638423e-06,
      "loss": 0.0494,
      "step": 256760
    },
    {
      "epoch": 0.42022610187021725,
      "grad_norm": 2.330996036529541,
      "learning_rate": 9.219902084170714e-06,
      "loss": 0.0505,
      "step": 256780
    },
    {
      "epoch": 0.4202588323088706,
      "grad_norm": 2.260761260986328,
      "learning_rate": 9.219836191957198e-06,
      "loss": 0.0448,
      "step": 256800
    },
    {
      "epoch": 0.42029156274752394,
      "grad_norm": 3.1196374893188477,
      "learning_rate": 9.21977029974368e-06,
      "loss": 0.0462,
      "step": 256820
    },
    {
      "epoch": 0.42032429318617726,
      "grad_norm": 2.147028684616089,
      "learning_rate": 9.219704407530163e-06,
      "loss": 0.0486,
      "step": 256840
    },
    {
      "epoch": 0.42035702362483063,
      "grad_norm": 1.5886516571044922,
      "learning_rate": 9.219638515316645e-06,
      "loss": 0.0508,
      "step": 256860
    },
    {
      "epoch": 0.42038975406348394,
      "grad_norm": 0.7841046452522278,
      "learning_rate": 9.219572623103129e-06,
      "loss": 0.0378,
      "step": 256880
    },
    {
      "epoch": 0.4204224845021373,
      "grad_norm": 1.7180229425430298,
      "learning_rate": 9.21950673088961e-06,
      "loss": 0.0474,
      "step": 256900
    },
    {
      "epoch": 0.42045521494079063,
      "grad_norm": 0.37966910004615784,
      "learning_rate": 9.219440838676094e-06,
      "loss": 0.0421,
      "step": 256920
    },
    {
      "epoch": 0.42048794537944395,
      "grad_norm": 2.4157514572143555,
      "learning_rate": 9.219374946462576e-06,
      "loss": 0.0383,
      "step": 256940
    },
    {
      "epoch": 0.4205206758180973,
      "grad_norm": 2.9769680500030518,
      "learning_rate": 9.21930905424906e-06,
      "loss": 0.0404,
      "step": 256960
    },
    {
      "epoch": 0.42055340625675064,
      "grad_norm": 3.196694850921631,
      "learning_rate": 9.219243162035543e-06,
      "loss": 0.0494,
      "step": 256980
    },
    {
      "epoch": 0.420586136695404,
      "grad_norm": 1.43342125415802,
      "learning_rate": 9.219177269822025e-06,
      "loss": 0.0352,
      "step": 257000
    },
    {
      "epoch": 0.42061886713405733,
      "grad_norm": 0.9635405540466309,
      "learning_rate": 9.219111377608509e-06,
      "loss": 0.0361,
      "step": 257020
    },
    {
      "epoch": 0.42065159757271064,
      "grad_norm": 0.3943694829940796,
      "learning_rate": 9.219045485394993e-06,
      "loss": 0.0357,
      "step": 257040
    },
    {
      "epoch": 0.420684328011364,
      "grad_norm": 1.4317291975021362,
      "learning_rate": 9.218979593181474e-06,
      "loss": 0.0528,
      "step": 257060
    },
    {
      "epoch": 0.42071705845001733,
      "grad_norm": 2.2289254665374756,
      "learning_rate": 9.218913700967958e-06,
      "loss": 0.0544,
      "step": 257080
    },
    {
      "epoch": 0.4207497888886707,
      "grad_norm": 4.211126804351807,
      "learning_rate": 9.21884780875444e-06,
      "loss": 0.0462,
      "step": 257100
    },
    {
      "epoch": 0.420782519327324,
      "grad_norm": 1.016479253768921,
      "learning_rate": 9.218781916540923e-06,
      "loss": 0.0416,
      "step": 257120
    },
    {
      "epoch": 0.42081524976597734,
      "grad_norm": 1.953431248664856,
      "learning_rate": 9.218716024327407e-06,
      "loss": 0.0328,
      "step": 257140
    },
    {
      "epoch": 0.4208479802046307,
      "grad_norm": 6.650067329406738,
      "learning_rate": 9.218650132113889e-06,
      "loss": 0.061,
      "step": 257160
    },
    {
      "epoch": 0.42088071064328403,
      "grad_norm": 1.0235251188278198,
      "learning_rate": 9.218584239900373e-06,
      "loss": 0.0303,
      "step": 257180
    },
    {
      "epoch": 0.4209134410819374,
      "grad_norm": 2.455658435821533,
      "learning_rate": 9.218518347686854e-06,
      "loss": 0.0578,
      "step": 257200
    },
    {
      "epoch": 0.4209461715205907,
      "grad_norm": 1.3558765649795532,
      "learning_rate": 9.218452455473338e-06,
      "loss": 0.0437,
      "step": 257220
    },
    {
      "epoch": 0.42097890195924403,
      "grad_norm": 14.96578311920166,
      "learning_rate": 9.21838656325982e-06,
      "loss": 0.0481,
      "step": 257240
    },
    {
      "epoch": 0.4210116323978974,
      "grad_norm": 1.7521815299987793,
      "learning_rate": 9.218320671046304e-06,
      "loss": 0.0355,
      "step": 257260
    },
    {
      "epoch": 0.4210443628365507,
      "grad_norm": 1.839707374572754,
      "learning_rate": 9.218254778832785e-06,
      "loss": 0.0409,
      "step": 257280
    },
    {
      "epoch": 0.4210770932752041,
      "grad_norm": 1.486207365989685,
      "learning_rate": 9.218188886619269e-06,
      "loss": 0.0476,
      "step": 257300
    },
    {
      "epoch": 0.4211098237138574,
      "grad_norm": 0.2301265299320221,
      "learning_rate": 9.218122994405751e-06,
      "loss": 0.0423,
      "step": 257320
    },
    {
      "epoch": 0.42114255415251073,
      "grad_norm": 2.3128390312194824,
      "learning_rate": 9.218057102192234e-06,
      "loss": 0.0521,
      "step": 257340
    },
    {
      "epoch": 0.4211752845911641,
      "grad_norm": 1.57318115234375,
      "learning_rate": 9.217991209978718e-06,
      "loss": 0.0395,
      "step": 257360
    },
    {
      "epoch": 0.4212080150298174,
      "grad_norm": 4.661962032318115,
      "learning_rate": 9.2179253177652e-06,
      "loss": 0.0509,
      "step": 257380
    },
    {
      "epoch": 0.4212407454684708,
      "grad_norm": 1.2170617580413818,
      "learning_rate": 9.217859425551684e-06,
      "loss": 0.037,
      "step": 257400
    },
    {
      "epoch": 0.4212734759071241,
      "grad_norm": 1.4981786012649536,
      "learning_rate": 9.217793533338167e-06,
      "loss": 0.0538,
      "step": 257420
    },
    {
      "epoch": 0.4213062063457774,
      "grad_norm": 2.8634228706359863,
      "learning_rate": 9.217727641124649e-06,
      "loss": 0.0517,
      "step": 257440
    },
    {
      "epoch": 0.4213389367844308,
      "grad_norm": 0.48590323328971863,
      "learning_rate": 9.217661748911133e-06,
      "loss": 0.0586,
      "step": 257460
    },
    {
      "epoch": 0.4213716672230841,
      "grad_norm": 3.400362968444824,
      "learning_rate": 9.217595856697614e-06,
      "loss": 0.0414,
      "step": 257480
    },
    {
      "epoch": 0.4214043976617375,
      "grad_norm": 3.236027956008911,
      "learning_rate": 9.217529964484098e-06,
      "loss": 0.041,
      "step": 257500
    },
    {
      "epoch": 0.4214371281003908,
      "grad_norm": 0.8872908353805542,
      "learning_rate": 9.217464072270582e-06,
      "loss": 0.0332,
      "step": 257520
    },
    {
      "epoch": 0.4214698585390441,
      "grad_norm": 0.971750795841217,
      "learning_rate": 9.217398180057064e-06,
      "loss": 0.0477,
      "step": 257540
    },
    {
      "epoch": 0.4215025889776975,
      "grad_norm": 1.1008292436599731,
      "learning_rate": 9.217332287843547e-06,
      "loss": 0.0274,
      "step": 257560
    },
    {
      "epoch": 0.4215353194163508,
      "grad_norm": 0.4026224613189697,
      "learning_rate": 9.217266395630029e-06,
      "loss": 0.0377,
      "step": 257580
    },
    {
      "epoch": 0.4215680498550042,
      "grad_norm": 1.2064886093139648,
      "learning_rate": 9.217200503416513e-06,
      "loss": 0.0444,
      "step": 257600
    },
    {
      "epoch": 0.4216007802936575,
      "grad_norm": 1.8542473316192627,
      "learning_rate": 9.217134611202995e-06,
      "loss": 0.0448,
      "step": 257620
    },
    {
      "epoch": 0.4216335107323108,
      "grad_norm": 3.273728847503662,
      "learning_rate": 9.217068718989478e-06,
      "loss": 0.0493,
      "step": 257640
    },
    {
      "epoch": 0.4216662411709642,
      "grad_norm": 2.124518394470215,
      "learning_rate": 9.21700282677596e-06,
      "loss": 0.0506,
      "step": 257660
    },
    {
      "epoch": 0.4216989716096175,
      "grad_norm": 1.08392333984375,
      "learning_rate": 9.216936934562444e-06,
      "loss": 0.0563,
      "step": 257680
    },
    {
      "epoch": 0.4217317020482709,
      "grad_norm": 1.7437480688095093,
      "learning_rate": 9.216871042348925e-06,
      "loss": 0.046,
      "step": 257700
    },
    {
      "epoch": 0.4217644324869242,
      "grad_norm": 1.1628392934799194,
      "learning_rate": 9.216805150135409e-06,
      "loss": 0.0333,
      "step": 257720
    },
    {
      "epoch": 0.4217971629255775,
      "grad_norm": 1.161583423614502,
      "learning_rate": 9.216739257921891e-06,
      "loss": 0.032,
      "step": 257740
    },
    {
      "epoch": 0.4218298933642309,
      "grad_norm": 2.090604782104492,
      "learning_rate": 9.216673365708375e-06,
      "loss": 0.0489,
      "step": 257760
    },
    {
      "epoch": 0.4218626238028842,
      "grad_norm": 1.5636413097381592,
      "learning_rate": 9.216607473494858e-06,
      "loss": 0.0434,
      "step": 257780
    },
    {
      "epoch": 0.42189535424153757,
      "grad_norm": 2.8775837421417236,
      "learning_rate": 9.21654158128134e-06,
      "loss": 0.0467,
      "step": 257800
    },
    {
      "epoch": 0.4219280846801909,
      "grad_norm": 0.5928090810775757,
      "learning_rate": 9.216475689067824e-06,
      "loss": 0.0262,
      "step": 257820
    },
    {
      "epoch": 0.4219608151188442,
      "grad_norm": 1.5277483463287354,
      "learning_rate": 9.216409796854307e-06,
      "loss": 0.0476,
      "step": 257840
    },
    {
      "epoch": 0.4219935455574976,
      "grad_norm": 2.1958351135253906,
      "learning_rate": 9.216343904640789e-06,
      "loss": 0.0541,
      "step": 257860
    },
    {
      "epoch": 0.4220262759961509,
      "grad_norm": 1.9779547452926636,
      "learning_rate": 9.216278012427273e-06,
      "loss": 0.0258,
      "step": 257880
    },
    {
      "epoch": 0.42205900643480426,
      "grad_norm": 1.1964699029922485,
      "learning_rate": 9.216212120213756e-06,
      "loss": 0.0562,
      "step": 257900
    },
    {
      "epoch": 0.4220917368734576,
      "grad_norm": 0.3769979476928711,
      "learning_rate": 9.216146228000238e-06,
      "loss": 0.0329,
      "step": 257920
    },
    {
      "epoch": 0.4221244673121109,
      "grad_norm": 1.4945335388183594,
      "learning_rate": 9.216080335786722e-06,
      "loss": 0.0438,
      "step": 257940
    },
    {
      "epoch": 0.42215719775076427,
      "grad_norm": 2.030911684036255,
      "learning_rate": 9.216014443573204e-06,
      "loss": 0.0403,
      "step": 257960
    },
    {
      "epoch": 0.4221899281894176,
      "grad_norm": 0.9455031752586365,
      "learning_rate": 9.215948551359687e-06,
      "loss": 0.0407,
      "step": 257980
    },
    {
      "epoch": 0.42222265862807096,
      "grad_norm": 0.49131539463996887,
      "learning_rate": 9.215882659146169e-06,
      "loss": 0.0457,
      "step": 258000
    },
    {
      "epoch": 0.4222553890667243,
      "grad_norm": 1.0919387340545654,
      "learning_rate": 9.215816766932653e-06,
      "loss": 0.0344,
      "step": 258020
    },
    {
      "epoch": 0.4222881195053776,
      "grad_norm": 1.1133142709732056,
      "learning_rate": 9.215750874719135e-06,
      "loss": 0.0487,
      "step": 258040
    },
    {
      "epoch": 0.42232084994403096,
      "grad_norm": 0.662365734577179,
      "learning_rate": 9.215684982505618e-06,
      "loss": 0.0486,
      "step": 258060
    },
    {
      "epoch": 0.4223535803826843,
      "grad_norm": 0.6848410964012146,
      "learning_rate": 9.2156190902921e-06,
      "loss": 0.0409,
      "step": 258080
    },
    {
      "epoch": 0.42238631082133765,
      "grad_norm": 2.783827781677246,
      "learning_rate": 9.215553198078584e-06,
      "loss": 0.0501,
      "step": 258100
    },
    {
      "epoch": 0.42241904125999097,
      "grad_norm": 0.7231836915016174,
      "learning_rate": 9.215487305865066e-06,
      "loss": 0.0449,
      "step": 258120
    },
    {
      "epoch": 0.4224517716986443,
      "grad_norm": 3.3823859691619873,
      "learning_rate": 9.215421413651549e-06,
      "loss": 0.0407,
      "step": 258140
    },
    {
      "epoch": 0.42248450213729766,
      "grad_norm": 1.4144278764724731,
      "learning_rate": 9.215355521438033e-06,
      "loss": 0.0452,
      "step": 258160
    },
    {
      "epoch": 0.422517232575951,
      "grad_norm": 2.706508159637451,
      "learning_rate": 9.215289629224515e-06,
      "loss": 0.038,
      "step": 258180
    },
    {
      "epoch": 0.42254996301460435,
      "grad_norm": 0.7344327569007874,
      "learning_rate": 9.215223737010998e-06,
      "loss": 0.0465,
      "step": 258200
    },
    {
      "epoch": 0.42258269345325766,
      "grad_norm": 3.943459987640381,
      "learning_rate": 9.215157844797482e-06,
      "loss": 0.0424,
      "step": 258220
    },
    {
      "epoch": 0.422615423891911,
      "grad_norm": 0.41729727387428284,
      "learning_rate": 9.215091952583964e-06,
      "loss": 0.0588,
      "step": 258240
    },
    {
      "epoch": 0.42264815433056435,
      "grad_norm": 3.6488091945648193,
      "learning_rate": 9.215026060370447e-06,
      "loss": 0.0454,
      "step": 258260
    },
    {
      "epoch": 0.42268088476921767,
      "grad_norm": 0.16854321956634521,
      "learning_rate": 9.214960168156931e-06,
      "loss": 0.0402,
      "step": 258280
    },
    {
      "epoch": 0.42271361520787104,
      "grad_norm": 1.691780686378479,
      "learning_rate": 9.214894275943413e-06,
      "loss": 0.051,
      "step": 258300
    },
    {
      "epoch": 0.42274634564652436,
      "grad_norm": 0.9044376015663147,
      "learning_rate": 9.214828383729896e-06,
      "loss": 0.047,
      "step": 258320
    },
    {
      "epoch": 0.4227790760851777,
      "grad_norm": 4.056341648101807,
      "learning_rate": 9.214762491516378e-06,
      "loss": 0.0493,
      "step": 258340
    },
    {
      "epoch": 0.42281180652383105,
      "grad_norm": 1.6923514604568481,
      "learning_rate": 9.214696599302862e-06,
      "loss": 0.0316,
      "step": 258360
    },
    {
      "epoch": 0.42284453696248436,
      "grad_norm": 0.7532914876937866,
      "learning_rate": 9.214630707089344e-06,
      "loss": 0.0471,
      "step": 258380
    },
    {
      "epoch": 0.42287726740113774,
      "grad_norm": 0.9819841384887695,
      "learning_rate": 9.214564814875827e-06,
      "loss": 0.0377,
      "step": 258400
    },
    {
      "epoch": 0.42290999783979105,
      "grad_norm": 2.1227049827575684,
      "learning_rate": 9.21449892266231e-06,
      "loss": 0.0396,
      "step": 258420
    },
    {
      "epoch": 0.42294272827844437,
      "grad_norm": 3.231895685195923,
      "learning_rate": 9.214433030448793e-06,
      "loss": 0.0421,
      "step": 258440
    },
    {
      "epoch": 0.42297545871709774,
      "grad_norm": 1.0818040370941162,
      "learning_rate": 9.214367138235275e-06,
      "loss": 0.0406,
      "step": 258460
    },
    {
      "epoch": 0.42300818915575106,
      "grad_norm": 1.197198748588562,
      "learning_rate": 9.214301246021758e-06,
      "loss": 0.0426,
      "step": 258480
    },
    {
      "epoch": 0.42304091959440443,
      "grad_norm": 1.4773350954055786,
      "learning_rate": 9.21423535380824e-06,
      "loss": 0.0409,
      "step": 258500
    },
    {
      "epoch": 0.42307365003305775,
      "grad_norm": 0.4043121039867401,
      "learning_rate": 9.214169461594724e-06,
      "loss": 0.0432,
      "step": 258520
    },
    {
      "epoch": 0.42310638047171106,
      "grad_norm": 0.8095245957374573,
      "learning_rate": 9.214103569381206e-06,
      "loss": 0.0479,
      "step": 258540
    },
    {
      "epoch": 0.42313911091036444,
      "grad_norm": 1.124673843383789,
      "learning_rate": 9.21403767716769e-06,
      "loss": 0.0398,
      "step": 258560
    },
    {
      "epoch": 0.42317184134901775,
      "grad_norm": 0.9142150282859802,
      "learning_rate": 9.213971784954173e-06,
      "loss": 0.0572,
      "step": 258580
    },
    {
      "epoch": 0.4232045717876711,
      "grad_norm": 1.0015143156051636,
      "learning_rate": 9.213905892740655e-06,
      "loss": 0.0415,
      "step": 258600
    },
    {
      "epoch": 0.42323730222632444,
      "grad_norm": 3.345777750015259,
      "learning_rate": 9.213840000527138e-06,
      "loss": 0.0412,
      "step": 258620
    },
    {
      "epoch": 0.42327003266497776,
      "grad_norm": 0.7568203210830688,
      "learning_rate": 9.213774108313622e-06,
      "loss": 0.0444,
      "step": 258640
    },
    {
      "epoch": 0.42330276310363113,
      "grad_norm": 1.614537000656128,
      "learning_rate": 9.213708216100104e-06,
      "loss": 0.0566,
      "step": 258660
    },
    {
      "epoch": 0.42333549354228445,
      "grad_norm": 2.105238914489746,
      "learning_rate": 9.213642323886587e-06,
      "loss": 0.0361,
      "step": 258680
    },
    {
      "epoch": 0.4233682239809378,
      "grad_norm": 1.8210119009017944,
      "learning_rate": 9.213576431673071e-06,
      "loss": 0.0434,
      "step": 258700
    },
    {
      "epoch": 0.42340095441959114,
      "grad_norm": 1.857820749282837,
      "learning_rate": 9.213510539459553e-06,
      "loss": 0.0587,
      "step": 258720
    },
    {
      "epoch": 0.42343368485824445,
      "grad_norm": 2.371870517730713,
      "learning_rate": 9.213444647246036e-06,
      "loss": 0.0459,
      "step": 258740
    },
    {
      "epoch": 0.4234664152968978,
      "grad_norm": 1.4420561790466309,
      "learning_rate": 9.213378755032518e-06,
      "loss": 0.0389,
      "step": 258760
    },
    {
      "epoch": 0.42349914573555114,
      "grad_norm": 1.7286580801010132,
      "learning_rate": 9.213312862819002e-06,
      "loss": 0.0475,
      "step": 258780
    },
    {
      "epoch": 0.4235318761742045,
      "grad_norm": 1.911483883857727,
      "learning_rate": 9.213246970605484e-06,
      "loss": 0.0388,
      "step": 258800
    },
    {
      "epoch": 0.42356460661285783,
      "grad_norm": 2.768219232559204,
      "learning_rate": 9.213181078391967e-06,
      "loss": 0.0429,
      "step": 258820
    },
    {
      "epoch": 0.42359733705151115,
      "grad_norm": 0.8866472840309143,
      "learning_rate": 9.21311518617845e-06,
      "loss": 0.041,
      "step": 258840
    },
    {
      "epoch": 0.4236300674901645,
      "grad_norm": 0.5299223065376282,
      "learning_rate": 9.213049293964933e-06,
      "loss": 0.0439,
      "step": 258860
    },
    {
      "epoch": 0.42366279792881784,
      "grad_norm": 1.630552887916565,
      "learning_rate": 9.212983401751415e-06,
      "loss": 0.0574,
      "step": 258880
    },
    {
      "epoch": 0.4236955283674712,
      "grad_norm": 0.8483691811561584,
      "learning_rate": 9.212917509537898e-06,
      "loss": 0.0513,
      "step": 258900
    },
    {
      "epoch": 0.4237282588061245,
      "grad_norm": 3.377815008163452,
      "learning_rate": 9.212851617324382e-06,
      "loss": 0.0449,
      "step": 258920
    },
    {
      "epoch": 0.42376098924477784,
      "grad_norm": 1.1365935802459717,
      "learning_rate": 9.212785725110864e-06,
      "loss": 0.0422,
      "step": 258940
    },
    {
      "epoch": 0.4237937196834312,
      "grad_norm": 1.3630746603012085,
      "learning_rate": 9.212719832897347e-06,
      "loss": 0.0566,
      "step": 258960
    },
    {
      "epoch": 0.42382645012208453,
      "grad_norm": 3.6264994144439697,
      "learning_rate": 9.21265394068383e-06,
      "loss": 0.0438,
      "step": 258980
    },
    {
      "epoch": 0.4238591805607379,
      "grad_norm": 0.3885459899902344,
      "learning_rate": 9.212588048470313e-06,
      "loss": 0.0451,
      "step": 259000
    },
    {
      "epoch": 0.4238919109993912,
      "grad_norm": 0.7578482627868652,
      "learning_rate": 9.212522156256796e-06,
      "loss": 0.0406,
      "step": 259020
    },
    {
      "epoch": 0.42392464143804454,
      "grad_norm": 2.0824544429779053,
      "learning_rate": 9.212456264043278e-06,
      "loss": 0.0411,
      "step": 259040
    },
    {
      "epoch": 0.4239573718766979,
      "grad_norm": 0.834942638874054,
      "learning_rate": 9.212390371829762e-06,
      "loss": 0.0344,
      "step": 259060
    },
    {
      "epoch": 0.4239901023153512,
      "grad_norm": 0.8520922660827637,
      "learning_rate": 9.212324479616246e-06,
      "loss": 0.0507,
      "step": 259080
    },
    {
      "epoch": 0.42402283275400454,
      "grad_norm": 2.8024063110351562,
      "learning_rate": 9.212258587402727e-06,
      "loss": 0.0378,
      "step": 259100
    },
    {
      "epoch": 0.4240555631926579,
      "grad_norm": 0.9062613844871521,
      "learning_rate": 9.212192695189211e-06,
      "loss": 0.0413,
      "step": 259120
    },
    {
      "epoch": 0.42408829363131123,
      "grad_norm": 0.4954894483089447,
      "learning_rate": 9.212126802975693e-06,
      "loss": 0.0437,
      "step": 259140
    },
    {
      "epoch": 0.4241210240699646,
      "grad_norm": 2.1474215984344482,
      "learning_rate": 9.212060910762176e-06,
      "loss": 0.0413,
      "step": 259160
    },
    {
      "epoch": 0.4241537545086179,
      "grad_norm": 0.7185123562812805,
      "learning_rate": 9.211995018548658e-06,
      "loss": 0.0393,
      "step": 259180
    },
    {
      "epoch": 0.42418648494727124,
      "grad_norm": 0.8198637962341309,
      "learning_rate": 9.211929126335142e-06,
      "loss": 0.0381,
      "step": 259200
    },
    {
      "epoch": 0.4242192153859246,
      "grad_norm": 0.887305736541748,
      "learning_rate": 9.211863234121624e-06,
      "loss": 0.0596,
      "step": 259220
    },
    {
      "epoch": 0.4242519458245779,
      "grad_norm": 1.2511183023452759,
      "learning_rate": 9.211797341908107e-06,
      "loss": 0.0472,
      "step": 259240
    },
    {
      "epoch": 0.4242846762632313,
      "grad_norm": 2.0241482257843018,
      "learning_rate": 9.211731449694591e-06,
      "loss": 0.0517,
      "step": 259260
    },
    {
      "epoch": 0.4243174067018846,
      "grad_norm": 0.8670743107795715,
      "learning_rate": 9.211665557481073e-06,
      "loss": 0.0556,
      "step": 259280
    },
    {
      "epoch": 0.42435013714053793,
      "grad_norm": 0.49904921650886536,
      "learning_rate": 9.211599665267557e-06,
      "loss": 0.0324,
      "step": 259300
    },
    {
      "epoch": 0.4243828675791913,
      "grad_norm": 2.6631507873535156,
      "learning_rate": 9.211533773054038e-06,
      "loss": 0.0466,
      "step": 259320
    },
    {
      "epoch": 0.4244155980178446,
      "grad_norm": 0.7631985545158386,
      "learning_rate": 9.211467880840522e-06,
      "loss": 0.0533,
      "step": 259340
    },
    {
      "epoch": 0.424448328456498,
      "grad_norm": 0.8723867535591125,
      "learning_rate": 9.211401988627004e-06,
      "loss": 0.0511,
      "step": 259360
    },
    {
      "epoch": 0.4244810588951513,
      "grad_norm": 3.132044553756714,
      "learning_rate": 9.211336096413487e-06,
      "loss": 0.0522,
      "step": 259380
    },
    {
      "epoch": 0.4245137893338046,
      "grad_norm": 4.335181713104248,
      "learning_rate": 9.21127020419997e-06,
      "loss": 0.038,
      "step": 259400
    },
    {
      "epoch": 0.424546519772458,
      "grad_norm": 1.3152728080749512,
      "learning_rate": 9.211204311986453e-06,
      "loss": 0.0431,
      "step": 259420
    },
    {
      "epoch": 0.4245792502111113,
      "grad_norm": 2.286867380142212,
      "learning_rate": 9.211138419772937e-06,
      "loss": 0.0469,
      "step": 259440
    },
    {
      "epoch": 0.4246119806497647,
      "grad_norm": 1.4075024127960205,
      "learning_rate": 9.211072527559418e-06,
      "loss": 0.0444,
      "step": 259460
    },
    {
      "epoch": 0.424644711088418,
      "grad_norm": 1.625749945640564,
      "learning_rate": 9.211006635345902e-06,
      "loss": 0.0486,
      "step": 259480
    },
    {
      "epoch": 0.4246774415270713,
      "grad_norm": 0.4466105103492737,
      "learning_rate": 9.210940743132386e-06,
      "loss": 0.0498,
      "step": 259500
    },
    {
      "epoch": 0.4247101719657247,
      "grad_norm": 2.5533759593963623,
      "learning_rate": 9.210874850918867e-06,
      "loss": 0.0428,
      "step": 259520
    },
    {
      "epoch": 0.424742902404378,
      "grad_norm": 2.1910507678985596,
      "learning_rate": 9.210808958705351e-06,
      "loss": 0.0464,
      "step": 259540
    },
    {
      "epoch": 0.4247756328430314,
      "grad_norm": 2.7402260303497314,
      "learning_rate": 9.210743066491833e-06,
      "loss": 0.0412,
      "step": 259560
    },
    {
      "epoch": 0.4248083632816847,
      "grad_norm": 0.7681423425674438,
      "learning_rate": 9.210677174278317e-06,
      "loss": 0.0389,
      "step": 259580
    },
    {
      "epoch": 0.424841093720338,
      "grad_norm": 2.356508731842041,
      "learning_rate": 9.2106112820648e-06,
      "loss": 0.055,
      "step": 259600
    },
    {
      "epoch": 0.4248738241589914,
      "grad_norm": 1.5189090967178345,
      "learning_rate": 9.210545389851282e-06,
      "loss": 0.0524,
      "step": 259620
    },
    {
      "epoch": 0.4249065545976447,
      "grad_norm": 2.3383264541625977,
      "learning_rate": 9.210479497637766e-06,
      "loss": 0.0333,
      "step": 259640
    },
    {
      "epoch": 0.4249392850362981,
      "grad_norm": 2.170818328857422,
      "learning_rate": 9.210413605424248e-06,
      "loss": 0.0394,
      "step": 259660
    },
    {
      "epoch": 0.4249720154749514,
      "grad_norm": 1.2107443809509277,
      "learning_rate": 9.210347713210731e-06,
      "loss": 0.0457,
      "step": 259680
    },
    {
      "epoch": 0.4250047459136047,
      "grad_norm": 0.9586893320083618,
      "learning_rate": 9.210281820997213e-06,
      "loss": 0.0418,
      "step": 259700
    },
    {
      "epoch": 0.4250374763522581,
      "grad_norm": 1.7683659791946411,
      "learning_rate": 9.210215928783697e-06,
      "loss": 0.0485,
      "step": 259720
    },
    {
      "epoch": 0.4250702067909114,
      "grad_norm": 1.2145646810531616,
      "learning_rate": 9.210150036570178e-06,
      "loss": 0.0465,
      "step": 259740
    },
    {
      "epoch": 0.42510293722956477,
      "grad_norm": 3.1440927982330322,
      "learning_rate": 9.210084144356662e-06,
      "loss": 0.0514,
      "step": 259760
    },
    {
      "epoch": 0.4251356676682181,
      "grad_norm": 1.7078677415847778,
      "learning_rate": 9.210018252143144e-06,
      "loss": 0.0376,
      "step": 259780
    },
    {
      "epoch": 0.4251683981068714,
      "grad_norm": 3.6776134967803955,
      "learning_rate": 9.209952359929628e-06,
      "loss": 0.0414,
      "step": 259800
    },
    {
      "epoch": 0.4252011285455248,
      "grad_norm": 1.6259914636611938,
      "learning_rate": 9.209886467716111e-06,
      "loss": 0.0318,
      "step": 259820
    },
    {
      "epoch": 0.4252338589841781,
      "grad_norm": 1.5483595132827759,
      "learning_rate": 9.209820575502593e-06,
      "loss": 0.0472,
      "step": 259840
    },
    {
      "epoch": 0.42526658942283146,
      "grad_norm": 1.8003509044647217,
      "learning_rate": 9.209754683289077e-06,
      "loss": 0.0453,
      "step": 259860
    },
    {
      "epoch": 0.4252993198614848,
      "grad_norm": 1.3799076080322266,
      "learning_rate": 9.20968879107556e-06,
      "loss": 0.0458,
      "step": 259880
    },
    {
      "epoch": 0.4253320503001381,
      "grad_norm": 0.5329322218894958,
      "learning_rate": 9.209622898862042e-06,
      "loss": 0.0446,
      "step": 259900
    },
    {
      "epoch": 0.42536478073879147,
      "grad_norm": 2.2516536712646484,
      "learning_rate": 9.209557006648526e-06,
      "loss": 0.0388,
      "step": 259920
    },
    {
      "epoch": 0.4253975111774448,
      "grad_norm": 0.2035674750804901,
      "learning_rate": 9.209491114435008e-06,
      "loss": 0.0355,
      "step": 259940
    },
    {
      "epoch": 0.42543024161609816,
      "grad_norm": 1.4071584939956665,
      "learning_rate": 9.209425222221491e-06,
      "loss": 0.0537,
      "step": 259960
    },
    {
      "epoch": 0.4254629720547515,
      "grad_norm": 0.7591899633407593,
      "learning_rate": 9.209359330007975e-06,
      "loss": 0.0325,
      "step": 259980
    },
    {
      "epoch": 0.4254957024934048,
      "grad_norm": 0.44710612297058105,
      "learning_rate": 9.209293437794457e-06,
      "loss": 0.0467,
      "step": 260000
    },
    {
      "epoch": 0.42552843293205816,
      "grad_norm": 2.5980477333068848,
      "learning_rate": 9.20922754558094e-06,
      "loss": 0.0406,
      "step": 260020
    },
    {
      "epoch": 0.4255611633707115,
      "grad_norm": 0.2850552499294281,
      "learning_rate": 9.209161653367422e-06,
      "loss": 0.0504,
      "step": 260040
    },
    {
      "epoch": 0.42559389380936485,
      "grad_norm": 1.5731630325317383,
      "learning_rate": 9.209095761153906e-06,
      "loss": 0.0651,
      "step": 260060
    },
    {
      "epoch": 0.42562662424801817,
      "grad_norm": 0.42968136072158813,
      "learning_rate": 9.209029868940388e-06,
      "loss": 0.0449,
      "step": 260080
    },
    {
      "epoch": 0.4256593546866715,
      "grad_norm": 2.5346944332122803,
      "learning_rate": 9.208963976726871e-06,
      "loss": 0.0573,
      "step": 260100
    },
    {
      "epoch": 0.42569208512532486,
      "grad_norm": 1.9842870235443115,
      "learning_rate": 9.208898084513353e-06,
      "loss": 0.051,
      "step": 260120
    },
    {
      "epoch": 0.4257248155639782,
      "grad_norm": 2.2878916263580322,
      "learning_rate": 9.208832192299837e-06,
      "loss": 0.0413,
      "step": 260140
    },
    {
      "epoch": 0.42575754600263155,
      "grad_norm": 0.8944396376609802,
      "learning_rate": 9.208766300086319e-06,
      "loss": 0.0453,
      "step": 260160
    },
    {
      "epoch": 0.42579027644128487,
      "grad_norm": 0.7286210656166077,
      "learning_rate": 9.208700407872802e-06,
      "loss": 0.0416,
      "step": 260180
    },
    {
      "epoch": 0.4258230068799382,
      "grad_norm": 6.272665977478027,
      "learning_rate": 9.208634515659286e-06,
      "loss": 0.0456,
      "step": 260200
    },
    {
      "epoch": 0.42585573731859155,
      "grad_norm": 1.18181574344635,
      "learning_rate": 9.208568623445768e-06,
      "loss": 0.0423,
      "step": 260220
    },
    {
      "epoch": 0.42588846775724487,
      "grad_norm": 1.5736795663833618,
      "learning_rate": 9.208502731232251e-06,
      "loss": 0.0408,
      "step": 260240
    },
    {
      "epoch": 0.42592119819589824,
      "grad_norm": 0.7693618535995483,
      "learning_rate": 9.208436839018735e-06,
      "loss": 0.0349,
      "step": 260260
    },
    {
      "epoch": 0.42595392863455156,
      "grad_norm": 1.4712761640548706,
      "learning_rate": 9.208370946805217e-06,
      "loss": 0.0403,
      "step": 260280
    },
    {
      "epoch": 0.4259866590732049,
      "grad_norm": 1.0065897703170776,
      "learning_rate": 9.2083050545917e-06,
      "loss": 0.0392,
      "step": 260300
    },
    {
      "epoch": 0.42601938951185825,
      "grad_norm": 2.5875864028930664,
      "learning_rate": 9.208239162378184e-06,
      "loss": 0.0491,
      "step": 260320
    },
    {
      "epoch": 0.42605211995051157,
      "grad_norm": 1.6995402574539185,
      "learning_rate": 9.208173270164666e-06,
      "loss": 0.0568,
      "step": 260340
    },
    {
      "epoch": 0.42608485038916494,
      "grad_norm": 0.7135134339332581,
      "learning_rate": 9.20810737795115e-06,
      "loss": 0.0329,
      "step": 260360
    },
    {
      "epoch": 0.42611758082781825,
      "grad_norm": 1.4090385437011719,
      "learning_rate": 9.208041485737631e-06,
      "loss": 0.0446,
      "step": 260380
    },
    {
      "epoch": 0.42615031126647157,
      "grad_norm": 0.7146360874176025,
      "learning_rate": 9.207975593524115e-06,
      "loss": 0.0403,
      "step": 260400
    },
    {
      "epoch": 0.42618304170512494,
      "grad_norm": 0.3248846232891083,
      "learning_rate": 9.207909701310597e-06,
      "loss": 0.039,
      "step": 260420
    },
    {
      "epoch": 0.42621577214377826,
      "grad_norm": 1.5484740734100342,
      "learning_rate": 9.20784380909708e-06,
      "loss": 0.044,
      "step": 260440
    },
    {
      "epoch": 0.42624850258243163,
      "grad_norm": 3.863495111465454,
      "learning_rate": 9.207777916883562e-06,
      "loss": 0.0516,
      "step": 260460
    },
    {
      "epoch": 0.42628123302108495,
      "grad_norm": 1.148374319076538,
      "learning_rate": 9.207712024670046e-06,
      "loss": 0.0527,
      "step": 260480
    },
    {
      "epoch": 0.42631396345973827,
      "grad_norm": 1.176056981086731,
      "learning_rate": 9.207646132456528e-06,
      "loss": 0.0342,
      "step": 260500
    },
    {
      "epoch": 0.42634669389839164,
      "grad_norm": 1.384785532951355,
      "learning_rate": 9.207580240243011e-06,
      "loss": 0.0417,
      "step": 260520
    },
    {
      "epoch": 0.42637942433704495,
      "grad_norm": 0.4656153619289398,
      "learning_rate": 9.207514348029493e-06,
      "loss": 0.039,
      "step": 260540
    },
    {
      "epoch": 0.4264121547756983,
      "grad_norm": 1.9781864881515503,
      "learning_rate": 9.207448455815977e-06,
      "loss": 0.0515,
      "step": 260560
    },
    {
      "epoch": 0.42644488521435164,
      "grad_norm": 0.8280761241912842,
      "learning_rate": 9.207382563602459e-06,
      "loss": 0.0392,
      "step": 260580
    },
    {
      "epoch": 0.42647761565300496,
      "grad_norm": 2.453503370285034,
      "learning_rate": 9.207316671388942e-06,
      "loss": 0.0527,
      "step": 260600
    },
    {
      "epoch": 0.42651034609165833,
      "grad_norm": 3.5466253757476807,
      "learning_rate": 9.207250779175426e-06,
      "loss": 0.0372,
      "step": 260620
    },
    {
      "epoch": 0.42654307653031165,
      "grad_norm": 1.1168886423110962,
      "learning_rate": 9.207184886961908e-06,
      "loss": 0.0388,
      "step": 260640
    },
    {
      "epoch": 0.426575806968965,
      "grad_norm": 8.03754711151123,
      "learning_rate": 9.207118994748391e-06,
      "loss": 0.0405,
      "step": 260660
    },
    {
      "epoch": 0.42660853740761834,
      "grad_norm": 0.7650065422058105,
      "learning_rate": 9.207053102534875e-06,
      "loss": 0.0436,
      "step": 260680
    },
    {
      "epoch": 0.42664126784627165,
      "grad_norm": 2.216966390609741,
      "learning_rate": 9.206987210321357e-06,
      "loss": 0.0311,
      "step": 260700
    },
    {
      "epoch": 0.426673998284925,
      "grad_norm": 4.447022914886475,
      "learning_rate": 9.20692131810784e-06,
      "loss": 0.0482,
      "step": 260720
    },
    {
      "epoch": 0.42670672872357834,
      "grad_norm": 4.8921380043029785,
      "learning_rate": 9.206855425894324e-06,
      "loss": 0.0388,
      "step": 260740
    },
    {
      "epoch": 0.4267394591622317,
      "grad_norm": 4.374130725860596,
      "learning_rate": 9.206789533680806e-06,
      "loss": 0.0409,
      "step": 260760
    },
    {
      "epoch": 0.42677218960088503,
      "grad_norm": 0.6213160157203674,
      "learning_rate": 9.20672364146729e-06,
      "loss": 0.0412,
      "step": 260780
    },
    {
      "epoch": 0.42680492003953835,
      "grad_norm": 4.1079559326171875,
      "learning_rate": 9.206657749253771e-06,
      "loss": 0.0501,
      "step": 260800
    },
    {
      "epoch": 0.4268376504781917,
      "grad_norm": 0.7384431958198547,
      "learning_rate": 9.206591857040255e-06,
      "loss": 0.0495,
      "step": 260820
    },
    {
      "epoch": 0.42687038091684504,
      "grad_norm": 2.297858238220215,
      "learning_rate": 9.206525964826737e-06,
      "loss": 0.0383,
      "step": 260840
    },
    {
      "epoch": 0.4269031113554984,
      "grad_norm": 1.7739137411117554,
      "learning_rate": 9.20646007261322e-06,
      "loss": 0.0524,
      "step": 260860
    },
    {
      "epoch": 0.4269358417941517,
      "grad_norm": 1.060221791267395,
      "learning_rate": 9.206394180399702e-06,
      "loss": 0.0447,
      "step": 260880
    },
    {
      "epoch": 0.42696857223280504,
      "grad_norm": 1.390950322151184,
      "learning_rate": 9.206328288186186e-06,
      "loss": 0.0468,
      "step": 260900
    },
    {
      "epoch": 0.4270013026714584,
      "grad_norm": 2.6325502395629883,
      "learning_rate": 9.206262395972668e-06,
      "loss": 0.0584,
      "step": 260920
    },
    {
      "epoch": 0.42703403311011173,
      "grad_norm": 3.1051230430603027,
      "learning_rate": 9.206196503759151e-06,
      "loss": 0.0558,
      "step": 260940
    },
    {
      "epoch": 0.4270667635487651,
      "grad_norm": 1.415204405784607,
      "learning_rate": 9.206130611545633e-06,
      "loss": 0.0368,
      "step": 260960
    },
    {
      "epoch": 0.4270994939874184,
      "grad_norm": 1.483757734298706,
      "learning_rate": 9.206064719332117e-06,
      "loss": 0.046,
      "step": 260980
    },
    {
      "epoch": 0.42713222442607174,
      "grad_norm": 2.1568355560302734,
      "learning_rate": 9.2059988271186e-06,
      "loss": 0.0475,
      "step": 261000
    },
    {
      "epoch": 0.4271649548647251,
      "grad_norm": 1.0682848691940308,
      "learning_rate": 9.205932934905082e-06,
      "loss": 0.0365,
      "step": 261020
    },
    {
      "epoch": 0.4271976853033784,
      "grad_norm": 1.6600686311721802,
      "learning_rate": 9.205867042691566e-06,
      "loss": 0.057,
      "step": 261040
    },
    {
      "epoch": 0.4272304157420318,
      "grad_norm": 3.010187864303589,
      "learning_rate": 9.20580115047805e-06,
      "loss": 0.0399,
      "step": 261060
    },
    {
      "epoch": 0.4272631461806851,
      "grad_norm": 1.3351835012435913,
      "learning_rate": 9.205735258264531e-06,
      "loss": 0.0521,
      "step": 261080
    },
    {
      "epoch": 0.42729587661933843,
      "grad_norm": 1.7583963871002197,
      "learning_rate": 9.205669366051015e-06,
      "loss": 0.0459,
      "step": 261100
    },
    {
      "epoch": 0.4273286070579918,
      "grad_norm": 1.3651026487350464,
      "learning_rate": 9.205603473837499e-06,
      "loss": 0.0432,
      "step": 261120
    },
    {
      "epoch": 0.4273613374966451,
      "grad_norm": 0.7104716897010803,
      "learning_rate": 9.20553758162398e-06,
      "loss": 0.0345,
      "step": 261140
    },
    {
      "epoch": 0.4273940679352985,
      "grad_norm": 1.9069640636444092,
      "learning_rate": 9.205471689410464e-06,
      "loss": 0.041,
      "step": 261160
    },
    {
      "epoch": 0.4274267983739518,
      "grad_norm": 1.150815725326538,
      "learning_rate": 9.205405797196946e-06,
      "loss": 0.0569,
      "step": 261180
    },
    {
      "epoch": 0.4274595288126051,
      "grad_norm": 0.9728620648384094,
      "learning_rate": 9.20533990498343e-06,
      "loss": 0.0422,
      "step": 261200
    },
    {
      "epoch": 0.4274922592512585,
      "grad_norm": 1.8989999294281006,
      "learning_rate": 9.205274012769911e-06,
      "loss": 0.0518,
      "step": 261220
    },
    {
      "epoch": 0.4275249896899118,
      "grad_norm": 3.5225374698638916,
      "learning_rate": 9.205208120556395e-06,
      "loss": 0.0548,
      "step": 261240
    },
    {
      "epoch": 0.4275577201285652,
      "grad_norm": 2.065934896469116,
      "learning_rate": 9.205142228342877e-06,
      "loss": 0.0462,
      "step": 261260
    },
    {
      "epoch": 0.4275904505672185,
      "grad_norm": 0.9947091937065125,
      "learning_rate": 9.20507633612936e-06,
      "loss": 0.042,
      "step": 261280
    },
    {
      "epoch": 0.4276231810058718,
      "grad_norm": 3.886127233505249,
      "learning_rate": 9.205010443915842e-06,
      "loss": 0.0425,
      "step": 261300
    },
    {
      "epoch": 0.4276559114445252,
      "grad_norm": 3.4498367309570312,
      "learning_rate": 9.204944551702326e-06,
      "loss": 0.0457,
      "step": 261320
    },
    {
      "epoch": 0.4276886418831785,
      "grad_norm": 1.346260905265808,
      "learning_rate": 9.204878659488808e-06,
      "loss": 0.0413,
      "step": 261340
    },
    {
      "epoch": 0.4277213723218319,
      "grad_norm": 0.4107513129711151,
      "learning_rate": 9.204812767275291e-06,
      "loss": 0.0456,
      "step": 261360
    },
    {
      "epoch": 0.4277541027604852,
      "grad_norm": 2.0621657371520996,
      "learning_rate": 9.204746875061775e-06,
      "loss": 0.0435,
      "step": 261380
    },
    {
      "epoch": 0.4277868331991385,
      "grad_norm": 1.1377605199813843,
      "learning_rate": 9.204680982848257e-06,
      "loss": 0.0465,
      "step": 261400
    },
    {
      "epoch": 0.4278195636377919,
      "grad_norm": 3.4470622539520264,
      "learning_rate": 9.20461509063474e-06,
      "loss": 0.0376,
      "step": 261420
    },
    {
      "epoch": 0.4278522940764452,
      "grad_norm": 0.18408270180225372,
      "learning_rate": 9.204549198421222e-06,
      "loss": 0.0342,
      "step": 261440
    },
    {
      "epoch": 0.4278850245150986,
      "grad_norm": 2.2870397567749023,
      "learning_rate": 9.204483306207706e-06,
      "loss": 0.0391,
      "step": 261460
    },
    {
      "epoch": 0.4279177549537519,
      "grad_norm": 1.978378176689148,
      "learning_rate": 9.20441741399419e-06,
      "loss": 0.0414,
      "step": 261480
    },
    {
      "epoch": 0.4279504853924052,
      "grad_norm": 0.6262688636779785,
      "learning_rate": 9.204351521780671e-06,
      "loss": 0.0402,
      "step": 261500
    },
    {
      "epoch": 0.4279832158310586,
      "grad_norm": 0.927516758441925,
      "learning_rate": 9.204285629567155e-06,
      "loss": 0.0438,
      "step": 261520
    },
    {
      "epoch": 0.4280159462697119,
      "grad_norm": 0.7833852171897888,
      "learning_rate": 9.204219737353639e-06,
      "loss": 0.0487,
      "step": 261540
    },
    {
      "epoch": 0.42804867670836527,
      "grad_norm": 1.9375380277633667,
      "learning_rate": 9.20415384514012e-06,
      "loss": 0.0321,
      "step": 261560
    },
    {
      "epoch": 0.4280814071470186,
      "grad_norm": 2.3334977626800537,
      "learning_rate": 9.204087952926604e-06,
      "loss": 0.0402,
      "step": 261580
    },
    {
      "epoch": 0.4281141375856719,
      "grad_norm": 0.41410332918167114,
      "learning_rate": 9.204022060713086e-06,
      "loss": 0.0494,
      "step": 261600
    },
    {
      "epoch": 0.4281468680243253,
      "grad_norm": 0.8827625513076782,
      "learning_rate": 9.20395616849957e-06,
      "loss": 0.0509,
      "step": 261620
    },
    {
      "epoch": 0.4281795984629786,
      "grad_norm": 0.6901891827583313,
      "learning_rate": 9.203890276286051e-06,
      "loss": 0.041,
      "step": 261640
    },
    {
      "epoch": 0.42821232890163197,
      "grad_norm": 1.642928957939148,
      "learning_rate": 9.203824384072535e-06,
      "loss": 0.0394,
      "step": 261660
    },
    {
      "epoch": 0.4282450593402853,
      "grad_norm": 1.290151834487915,
      "learning_rate": 9.203758491859017e-06,
      "loss": 0.0404,
      "step": 261680
    },
    {
      "epoch": 0.4282777897789386,
      "grad_norm": 2.3870341777801514,
      "learning_rate": 9.2036925996455e-06,
      "loss": 0.0327,
      "step": 261700
    },
    {
      "epoch": 0.428310520217592,
      "grad_norm": 0.5947611331939697,
      "learning_rate": 9.203626707431984e-06,
      "loss": 0.0359,
      "step": 261720
    },
    {
      "epoch": 0.4283432506562453,
      "grad_norm": 1.1326320171356201,
      "learning_rate": 9.203560815218466e-06,
      "loss": 0.0563,
      "step": 261740
    },
    {
      "epoch": 0.42837598109489866,
      "grad_norm": 0.48878130316734314,
      "learning_rate": 9.20349492300495e-06,
      "loss": 0.0283,
      "step": 261760
    },
    {
      "epoch": 0.428408711533552,
      "grad_norm": 2.0696678161621094,
      "learning_rate": 9.203429030791431e-06,
      "loss": 0.0581,
      "step": 261780
    },
    {
      "epoch": 0.4284414419722053,
      "grad_norm": 1.406166434288025,
      "learning_rate": 9.203363138577915e-06,
      "loss": 0.0395,
      "step": 261800
    },
    {
      "epoch": 0.42847417241085867,
      "grad_norm": 4.741546154022217,
      "learning_rate": 9.203297246364397e-06,
      "loss": 0.0345,
      "step": 261820
    },
    {
      "epoch": 0.428506902849512,
      "grad_norm": 0.547703742980957,
      "learning_rate": 9.20323135415088e-06,
      "loss": 0.0524,
      "step": 261840
    },
    {
      "epoch": 0.4285396332881653,
      "grad_norm": 2.0530781745910645,
      "learning_rate": 9.203165461937364e-06,
      "loss": 0.0414,
      "step": 261860
    },
    {
      "epoch": 0.4285723637268187,
      "grad_norm": 0.9164189696311951,
      "learning_rate": 9.203099569723846e-06,
      "loss": 0.032,
      "step": 261880
    },
    {
      "epoch": 0.428605094165472,
      "grad_norm": 2.454259157180786,
      "learning_rate": 9.20303367751033e-06,
      "loss": 0.0469,
      "step": 261900
    },
    {
      "epoch": 0.42863782460412536,
      "grad_norm": 0.13551443815231323,
      "learning_rate": 9.202967785296813e-06,
      "loss": 0.0459,
      "step": 261920
    },
    {
      "epoch": 0.4286705550427787,
      "grad_norm": 3.465367078781128,
      "learning_rate": 9.202901893083295e-06,
      "loss": 0.0527,
      "step": 261940
    },
    {
      "epoch": 0.428703285481432,
      "grad_norm": 3.9360382556915283,
      "learning_rate": 9.202836000869779e-06,
      "loss": 0.045,
      "step": 261960
    },
    {
      "epoch": 0.42873601592008537,
      "grad_norm": 1.4720412492752075,
      "learning_rate": 9.20277010865626e-06,
      "loss": 0.0475,
      "step": 261980
    },
    {
      "epoch": 0.4287687463587387,
      "grad_norm": 1.023677945137024,
      "learning_rate": 9.202704216442744e-06,
      "loss": 0.0429,
      "step": 262000
    },
    {
      "epoch": 0.42880147679739206,
      "grad_norm": 0.7981402277946472,
      "learning_rate": 9.202638324229226e-06,
      "loss": 0.0482,
      "step": 262020
    },
    {
      "epoch": 0.4288342072360454,
      "grad_norm": 1.5230365991592407,
      "learning_rate": 9.20257243201571e-06,
      "loss": 0.0593,
      "step": 262040
    },
    {
      "epoch": 0.4288669376746987,
      "grad_norm": 1.3678114414215088,
      "learning_rate": 9.202506539802192e-06,
      "loss": 0.0496,
      "step": 262060
    },
    {
      "epoch": 0.42889966811335206,
      "grad_norm": 0.3514823019504547,
      "learning_rate": 9.202440647588675e-06,
      "loss": 0.0465,
      "step": 262080
    },
    {
      "epoch": 0.4289323985520054,
      "grad_norm": 0.9855796694755554,
      "learning_rate": 9.202374755375159e-06,
      "loss": 0.0507,
      "step": 262100
    },
    {
      "epoch": 0.42896512899065875,
      "grad_norm": 2.0484402179718018,
      "learning_rate": 9.20230886316164e-06,
      "loss": 0.0445,
      "step": 262120
    },
    {
      "epoch": 0.42899785942931207,
      "grad_norm": 3.2516837120056152,
      "learning_rate": 9.202242970948124e-06,
      "loss": 0.035,
      "step": 262140
    },
    {
      "epoch": 0.4290305898679654,
      "grad_norm": 0.94426429271698,
      "learning_rate": 9.202177078734606e-06,
      "loss": 0.0341,
      "step": 262160
    },
    {
      "epoch": 0.42906332030661876,
      "grad_norm": 3.127578020095825,
      "learning_rate": 9.20211118652109e-06,
      "loss": 0.0482,
      "step": 262180
    },
    {
      "epoch": 0.4290960507452721,
      "grad_norm": 1.8048348426818848,
      "learning_rate": 9.202045294307572e-06,
      "loss": 0.0434,
      "step": 262200
    },
    {
      "epoch": 0.42912878118392545,
      "grad_norm": 1.1112817525863647,
      "learning_rate": 9.201979402094055e-06,
      "loss": 0.046,
      "step": 262220
    },
    {
      "epoch": 0.42916151162257876,
      "grad_norm": 2.04171085357666,
      "learning_rate": 9.201913509880537e-06,
      "loss": 0.048,
      "step": 262240
    },
    {
      "epoch": 0.4291942420612321,
      "grad_norm": 1.5504628419876099,
      "learning_rate": 9.20184761766702e-06,
      "loss": 0.0569,
      "step": 262260
    },
    {
      "epoch": 0.42922697249988545,
      "grad_norm": 0.536194920539856,
      "learning_rate": 9.201781725453504e-06,
      "loss": 0.0431,
      "step": 262280
    },
    {
      "epoch": 0.42925970293853877,
      "grad_norm": 0.6329912543296814,
      "learning_rate": 9.201715833239988e-06,
      "loss": 0.0526,
      "step": 262300
    },
    {
      "epoch": 0.42929243337719214,
      "grad_norm": 0.7848960161209106,
      "learning_rate": 9.20164994102647e-06,
      "loss": 0.0537,
      "step": 262320
    },
    {
      "epoch": 0.42932516381584546,
      "grad_norm": 4.767159461975098,
      "learning_rate": 9.201584048812953e-06,
      "loss": 0.0376,
      "step": 262340
    },
    {
      "epoch": 0.4293578942544988,
      "grad_norm": 2.6201870441436768,
      "learning_rate": 9.201518156599435e-06,
      "loss": 0.0381,
      "step": 262360
    },
    {
      "epoch": 0.42939062469315215,
      "grad_norm": 0.8344930410385132,
      "learning_rate": 9.201452264385919e-06,
      "loss": 0.0365,
      "step": 262380
    },
    {
      "epoch": 0.42942335513180546,
      "grad_norm": 1.3379696607589722,
      "learning_rate": 9.2013863721724e-06,
      "loss": 0.0493,
      "step": 262400
    },
    {
      "epoch": 0.42945608557045883,
      "grad_norm": 2.555934429168701,
      "learning_rate": 9.201320479958884e-06,
      "loss": 0.0409,
      "step": 262420
    },
    {
      "epoch": 0.42948881600911215,
      "grad_norm": 2.892583131790161,
      "learning_rate": 9.201254587745368e-06,
      "loss": 0.0446,
      "step": 262440
    },
    {
      "epoch": 0.42952154644776547,
      "grad_norm": 1.4499437808990479,
      "learning_rate": 9.20118869553185e-06,
      "loss": 0.0425,
      "step": 262460
    },
    {
      "epoch": 0.42955427688641884,
      "grad_norm": 5.808080196380615,
      "learning_rate": 9.201122803318333e-06,
      "loss": 0.0612,
      "step": 262480
    },
    {
      "epoch": 0.42958700732507216,
      "grad_norm": 3.131196975708008,
      "learning_rate": 9.201056911104815e-06,
      "loss": 0.0512,
      "step": 262500
    },
    {
      "epoch": 0.42961973776372553,
      "grad_norm": 2.0680196285247803,
      "learning_rate": 9.200991018891299e-06,
      "loss": 0.0435,
      "step": 262520
    },
    {
      "epoch": 0.42965246820237885,
      "grad_norm": 1.9254412651062012,
      "learning_rate": 9.20092512667778e-06,
      "loss": 0.0405,
      "step": 262540
    },
    {
      "epoch": 0.42968519864103216,
      "grad_norm": 2.0461549758911133,
      "learning_rate": 9.200859234464264e-06,
      "loss": 0.0446,
      "step": 262560
    },
    {
      "epoch": 0.42971792907968553,
      "grad_norm": 2.8333864212036133,
      "learning_rate": 9.200793342250746e-06,
      "loss": 0.0476,
      "step": 262580
    },
    {
      "epoch": 0.42975065951833885,
      "grad_norm": 2.8460981845855713,
      "learning_rate": 9.20072745003723e-06,
      "loss": 0.0432,
      "step": 262600
    },
    {
      "epoch": 0.4297833899569922,
      "grad_norm": 1.065335988998413,
      "learning_rate": 9.200661557823712e-06,
      "loss": 0.0485,
      "step": 262620
    },
    {
      "epoch": 0.42981612039564554,
      "grad_norm": 1.8185330629348755,
      "learning_rate": 9.200595665610195e-06,
      "loss": 0.047,
      "step": 262640
    },
    {
      "epoch": 0.42984885083429886,
      "grad_norm": 1.5529201030731201,
      "learning_rate": 9.200529773396679e-06,
      "loss": 0.0472,
      "step": 262660
    },
    {
      "epoch": 0.42988158127295223,
      "grad_norm": 2.34421706199646,
      "learning_rate": 9.20046388118316e-06,
      "loss": 0.049,
      "step": 262680
    },
    {
      "epoch": 0.42991431171160555,
      "grad_norm": 1.6421291828155518,
      "learning_rate": 9.200397988969644e-06,
      "loss": 0.0397,
      "step": 262700
    },
    {
      "epoch": 0.4299470421502589,
      "grad_norm": 3.311746835708618,
      "learning_rate": 9.200332096756128e-06,
      "loss": 0.0476,
      "step": 262720
    },
    {
      "epoch": 0.42997977258891223,
      "grad_norm": 1.6184654235839844,
      "learning_rate": 9.20026620454261e-06,
      "loss": 0.0336,
      "step": 262740
    },
    {
      "epoch": 0.43001250302756555,
      "grad_norm": 1.3810597658157349,
      "learning_rate": 9.200200312329093e-06,
      "loss": 0.0528,
      "step": 262760
    },
    {
      "epoch": 0.4300452334662189,
      "grad_norm": 2.7350914478302,
      "learning_rate": 9.200134420115577e-06,
      "loss": 0.0335,
      "step": 262780
    },
    {
      "epoch": 0.43007796390487224,
      "grad_norm": 1.8981372117996216,
      "learning_rate": 9.200068527902059e-06,
      "loss": 0.0473,
      "step": 262800
    },
    {
      "epoch": 0.4301106943435256,
      "grad_norm": 1.419606328010559,
      "learning_rate": 9.200002635688542e-06,
      "loss": 0.0424,
      "step": 262820
    },
    {
      "epoch": 0.43014342478217893,
      "grad_norm": 2.453873634338379,
      "learning_rate": 9.199936743475024e-06,
      "loss": 0.04,
      "step": 262840
    },
    {
      "epoch": 0.43017615522083225,
      "grad_norm": 3.856532335281372,
      "learning_rate": 9.199870851261508e-06,
      "loss": 0.0447,
      "step": 262860
    },
    {
      "epoch": 0.4302088856594856,
      "grad_norm": 0.7448190450668335,
      "learning_rate": 9.19980495904799e-06,
      "loss": 0.0443,
      "step": 262880
    },
    {
      "epoch": 0.43024161609813893,
      "grad_norm": 2.157336473464966,
      "learning_rate": 9.199739066834473e-06,
      "loss": 0.0393,
      "step": 262900
    },
    {
      "epoch": 0.4302743465367923,
      "grad_norm": 0.9768017530441284,
      "learning_rate": 9.199673174620955e-06,
      "loss": 0.0272,
      "step": 262920
    },
    {
      "epoch": 0.4303070769754456,
      "grad_norm": 1.164320945739746,
      "learning_rate": 9.199607282407439e-06,
      "loss": 0.0463,
      "step": 262940
    },
    {
      "epoch": 0.43033980741409894,
      "grad_norm": 3.565974473953247,
      "learning_rate": 9.19954139019392e-06,
      "loss": 0.0403,
      "step": 262960
    },
    {
      "epoch": 0.4303725378527523,
      "grad_norm": 1.1180825233459473,
      "learning_rate": 9.199475497980404e-06,
      "loss": 0.035,
      "step": 262980
    },
    {
      "epoch": 0.43040526829140563,
      "grad_norm": 2.404207468032837,
      "learning_rate": 9.199409605766886e-06,
      "loss": 0.0629,
      "step": 263000
    },
    {
      "epoch": 0.430437998730059,
      "grad_norm": 2.4660773277282715,
      "learning_rate": 9.19934371355337e-06,
      "loss": 0.042,
      "step": 263020
    },
    {
      "epoch": 0.4304707291687123,
      "grad_norm": 1.7626038789749146,
      "learning_rate": 9.199277821339853e-06,
      "loss": 0.054,
      "step": 263040
    },
    {
      "epoch": 0.43050345960736563,
      "grad_norm": 1.3882386684417725,
      "learning_rate": 9.199211929126335e-06,
      "loss": 0.0544,
      "step": 263060
    },
    {
      "epoch": 0.430536190046019,
      "grad_norm": 0.7484573721885681,
      "learning_rate": 9.199146036912819e-06,
      "loss": 0.0393,
      "step": 263080
    },
    {
      "epoch": 0.4305689204846723,
      "grad_norm": 5.094721794128418,
      "learning_rate": 9.199080144699302e-06,
      "loss": 0.04,
      "step": 263100
    },
    {
      "epoch": 0.4306016509233257,
      "grad_norm": 2.8812968730926514,
      "learning_rate": 9.199014252485784e-06,
      "loss": 0.045,
      "step": 263120
    },
    {
      "epoch": 0.430634381361979,
      "grad_norm": 1.1967923641204834,
      "learning_rate": 9.198948360272268e-06,
      "loss": 0.0488,
      "step": 263140
    },
    {
      "epoch": 0.43066711180063233,
      "grad_norm": 2.225996732711792,
      "learning_rate": 9.198882468058752e-06,
      "loss": 0.0342,
      "step": 263160
    },
    {
      "epoch": 0.4306998422392857,
      "grad_norm": 0.2435150444507599,
      "learning_rate": 9.198816575845233e-06,
      "loss": 0.0398,
      "step": 263180
    },
    {
      "epoch": 0.430732572677939,
      "grad_norm": 2.5132627487182617,
      "learning_rate": 9.198750683631717e-06,
      "loss": 0.0629,
      "step": 263200
    },
    {
      "epoch": 0.4307653031165924,
      "grad_norm": 1.4397608041763306,
      "learning_rate": 9.198684791418199e-06,
      "loss": 0.0378,
      "step": 263220
    },
    {
      "epoch": 0.4307980335552457,
      "grad_norm": 0.4208882451057434,
      "learning_rate": 9.198618899204683e-06,
      "loss": 0.0401,
      "step": 263240
    },
    {
      "epoch": 0.430830763993899,
      "grad_norm": 1.668886423110962,
      "learning_rate": 9.198553006991164e-06,
      "loss": 0.0559,
      "step": 263260
    },
    {
      "epoch": 0.4308634944325524,
      "grad_norm": 1.61671781539917,
      "learning_rate": 9.198487114777648e-06,
      "loss": 0.038,
      "step": 263280
    },
    {
      "epoch": 0.4308962248712057,
      "grad_norm": 1.5360604524612427,
      "learning_rate": 9.19842122256413e-06,
      "loss": 0.0355,
      "step": 263300
    },
    {
      "epoch": 0.4309289553098591,
      "grad_norm": 2.2539870738983154,
      "learning_rate": 9.198355330350613e-06,
      "loss": 0.049,
      "step": 263320
    },
    {
      "epoch": 0.4309616857485124,
      "grad_norm": 2.7892367839813232,
      "learning_rate": 9.198289438137095e-06,
      "loss": 0.0435,
      "step": 263340
    },
    {
      "epoch": 0.4309944161871657,
      "grad_norm": 1.933353066444397,
      "learning_rate": 9.198223545923579e-06,
      "loss": 0.0523,
      "step": 263360
    },
    {
      "epoch": 0.4310271466258191,
      "grad_norm": 1.5376192331314087,
      "learning_rate": 9.19815765371006e-06,
      "loss": 0.0496,
      "step": 263380
    },
    {
      "epoch": 0.4310598770644724,
      "grad_norm": 1.316622257232666,
      "learning_rate": 9.198091761496544e-06,
      "loss": 0.0466,
      "step": 263400
    },
    {
      "epoch": 0.4310926075031258,
      "grad_norm": 1.6376404762268066,
      "learning_rate": 9.198025869283026e-06,
      "loss": 0.0467,
      "step": 263420
    },
    {
      "epoch": 0.4311253379417791,
      "grad_norm": 3.167290687561035,
      "learning_rate": 9.19795997706951e-06,
      "loss": 0.0476,
      "step": 263440
    },
    {
      "epoch": 0.4311580683804324,
      "grad_norm": 0.952354907989502,
      "learning_rate": 9.197894084855993e-06,
      "loss": 0.0337,
      "step": 263460
    },
    {
      "epoch": 0.4311907988190858,
      "grad_norm": 2.1319363117218018,
      "learning_rate": 9.197828192642475e-06,
      "loss": 0.0413,
      "step": 263480
    },
    {
      "epoch": 0.4312235292577391,
      "grad_norm": 1.130342721939087,
      "learning_rate": 9.197762300428959e-06,
      "loss": 0.0469,
      "step": 263500
    },
    {
      "epoch": 0.4312562596963925,
      "grad_norm": 0.7390049695968628,
      "learning_rate": 9.197696408215443e-06,
      "loss": 0.0401,
      "step": 263520
    },
    {
      "epoch": 0.4312889901350458,
      "grad_norm": 0.957801878452301,
      "learning_rate": 9.197630516001924e-06,
      "loss": 0.0507,
      "step": 263540
    },
    {
      "epoch": 0.4313217205736991,
      "grad_norm": 1.61326003074646,
      "learning_rate": 9.197564623788408e-06,
      "loss": 0.0405,
      "step": 263560
    },
    {
      "epoch": 0.4313544510123525,
      "grad_norm": 5.283932685852051,
      "learning_rate": 9.197498731574892e-06,
      "loss": 0.0459,
      "step": 263580
    },
    {
      "epoch": 0.4313871814510058,
      "grad_norm": 1.1697540283203125,
      "learning_rate": 9.197432839361374e-06,
      "loss": 0.0525,
      "step": 263600
    },
    {
      "epoch": 0.43141991188965917,
      "grad_norm": 0.2885565757751465,
      "learning_rate": 9.197366947147857e-06,
      "loss": 0.0379,
      "step": 263620
    },
    {
      "epoch": 0.4314526423283125,
      "grad_norm": 1.2424460649490356,
      "learning_rate": 9.197301054934339e-06,
      "loss": 0.0351,
      "step": 263640
    },
    {
      "epoch": 0.4314853727669658,
      "grad_norm": 0.7436986565589905,
      "learning_rate": 9.197235162720823e-06,
      "loss": 0.0404,
      "step": 263660
    },
    {
      "epoch": 0.4315181032056192,
      "grad_norm": 0.7253952622413635,
      "learning_rate": 9.197169270507304e-06,
      "loss": 0.0455,
      "step": 263680
    },
    {
      "epoch": 0.4315508336442725,
      "grad_norm": 3.5649242401123047,
      "learning_rate": 9.197103378293788e-06,
      "loss": 0.0375,
      "step": 263700
    },
    {
      "epoch": 0.43158356408292586,
      "grad_norm": 1.5439590215682983,
      "learning_rate": 9.19703748608027e-06,
      "loss": 0.0555,
      "step": 263720
    },
    {
      "epoch": 0.4316162945215792,
      "grad_norm": 0.7455043792724609,
      "learning_rate": 9.196971593866754e-06,
      "loss": 0.039,
      "step": 263740
    },
    {
      "epoch": 0.4316490249602325,
      "grad_norm": 0.5439433455467224,
      "learning_rate": 9.196905701653235e-06,
      "loss": 0.0406,
      "step": 263760
    },
    {
      "epoch": 0.43168175539888587,
      "grad_norm": 0.8565512895584106,
      "learning_rate": 9.196839809439719e-06,
      "loss": 0.0594,
      "step": 263780
    },
    {
      "epoch": 0.4317144858375392,
      "grad_norm": 2.8456645011901855,
      "learning_rate": 9.196773917226201e-06,
      "loss": 0.0514,
      "step": 263800
    },
    {
      "epoch": 0.43174721627619256,
      "grad_norm": 1.188429355621338,
      "learning_rate": 9.196708025012684e-06,
      "loss": 0.0392,
      "step": 263820
    },
    {
      "epoch": 0.4317799467148459,
      "grad_norm": 0.7096337080001831,
      "learning_rate": 9.196642132799168e-06,
      "loss": 0.0465,
      "step": 263840
    },
    {
      "epoch": 0.4318126771534992,
      "grad_norm": 6.375095844268799,
      "learning_rate": 9.19657624058565e-06,
      "loss": 0.0361,
      "step": 263860
    },
    {
      "epoch": 0.43184540759215256,
      "grad_norm": 4.233947277069092,
      "learning_rate": 9.196510348372134e-06,
      "loss": 0.0522,
      "step": 263880
    },
    {
      "epoch": 0.4318781380308059,
      "grad_norm": 0.5415369868278503,
      "learning_rate": 9.196444456158617e-06,
      "loss": 0.0365,
      "step": 263900
    },
    {
      "epoch": 0.43191086846945925,
      "grad_norm": 1.744927167892456,
      "learning_rate": 9.196378563945099e-06,
      "loss": 0.0384,
      "step": 263920
    },
    {
      "epoch": 0.43194359890811257,
      "grad_norm": 0.6570571660995483,
      "learning_rate": 9.196312671731583e-06,
      "loss": 0.0425,
      "step": 263940
    },
    {
      "epoch": 0.4319763293467659,
      "grad_norm": 1.851022720336914,
      "learning_rate": 9.196246779518066e-06,
      "loss": 0.0384,
      "step": 263960
    },
    {
      "epoch": 0.43200905978541926,
      "grad_norm": 1.5051994323730469,
      "learning_rate": 9.196180887304548e-06,
      "loss": 0.0353,
      "step": 263980
    },
    {
      "epoch": 0.4320417902240726,
      "grad_norm": 1.2132662534713745,
      "learning_rate": 9.196114995091032e-06,
      "loss": 0.0468,
      "step": 264000
    },
    {
      "epoch": 0.43207452066272595,
      "grad_norm": 1.2429478168487549,
      "learning_rate": 9.196049102877514e-06,
      "loss": 0.0539,
      "step": 264020
    },
    {
      "epoch": 0.43210725110137926,
      "grad_norm": 1.2050361633300781,
      "learning_rate": 9.195983210663997e-06,
      "loss": 0.035,
      "step": 264040
    },
    {
      "epoch": 0.4321399815400326,
      "grad_norm": 1.4236183166503906,
      "learning_rate": 9.195917318450479e-06,
      "loss": 0.0483,
      "step": 264060
    },
    {
      "epoch": 0.43217271197868595,
      "grad_norm": 2.926602840423584,
      "learning_rate": 9.195851426236963e-06,
      "loss": 0.0401,
      "step": 264080
    },
    {
      "epoch": 0.43220544241733927,
      "grad_norm": 1.6883118152618408,
      "learning_rate": 9.195785534023445e-06,
      "loss": 0.059,
      "step": 264100
    },
    {
      "epoch": 0.43223817285599264,
      "grad_norm": 0.8041443228721619,
      "learning_rate": 9.195719641809928e-06,
      "loss": 0.0376,
      "step": 264120
    },
    {
      "epoch": 0.43227090329464596,
      "grad_norm": 1.1108222007751465,
      "learning_rate": 9.19565374959641e-06,
      "loss": 0.0458,
      "step": 264140
    },
    {
      "epoch": 0.4323036337332993,
      "grad_norm": 3.0188260078430176,
      "learning_rate": 9.195587857382894e-06,
      "loss": 0.0376,
      "step": 264160
    },
    {
      "epoch": 0.43233636417195265,
      "grad_norm": 2.3547284603118896,
      "learning_rate": 9.195521965169377e-06,
      "loss": 0.0347,
      "step": 264180
    },
    {
      "epoch": 0.43236909461060596,
      "grad_norm": 6.096123218536377,
      "learning_rate": 9.195456072955859e-06,
      "loss": 0.0447,
      "step": 264200
    },
    {
      "epoch": 0.43240182504925934,
      "grad_norm": 1.9222677946090698,
      "learning_rate": 9.195390180742343e-06,
      "loss": 0.0343,
      "step": 264220
    },
    {
      "epoch": 0.43243455548791265,
      "grad_norm": 1.7045435905456543,
      "learning_rate": 9.195324288528825e-06,
      "loss": 0.0289,
      "step": 264240
    },
    {
      "epoch": 0.43246728592656597,
      "grad_norm": 2.405778408050537,
      "learning_rate": 9.195258396315308e-06,
      "loss": 0.0507,
      "step": 264260
    },
    {
      "epoch": 0.43250001636521934,
      "grad_norm": 2.7563693523406982,
      "learning_rate": 9.19519250410179e-06,
      "loss": 0.0472,
      "step": 264280
    },
    {
      "epoch": 0.43253274680387266,
      "grad_norm": 1.089401125907898,
      "learning_rate": 9.195126611888274e-06,
      "loss": 0.0468,
      "step": 264300
    },
    {
      "epoch": 0.43256547724252603,
      "grad_norm": 1.6485618352890015,
      "learning_rate": 9.195060719674757e-06,
      "loss": 0.0422,
      "step": 264320
    },
    {
      "epoch": 0.43259820768117935,
      "grad_norm": 1.4653544425964355,
      "learning_rate": 9.194994827461239e-06,
      "loss": 0.0459,
      "step": 264340
    },
    {
      "epoch": 0.43263093811983266,
      "grad_norm": 1.8405414819717407,
      "learning_rate": 9.194928935247723e-06,
      "loss": 0.0575,
      "step": 264360
    },
    {
      "epoch": 0.43266366855848604,
      "grad_norm": 2.1498119831085205,
      "learning_rate": 9.194863043034206e-06,
      "loss": 0.0466,
      "step": 264380
    },
    {
      "epoch": 0.43269639899713935,
      "grad_norm": 0.5283355712890625,
      "learning_rate": 9.194797150820688e-06,
      "loss": 0.0411,
      "step": 264400
    },
    {
      "epoch": 0.4327291294357927,
      "grad_norm": 0.9149037003517151,
      "learning_rate": 9.194731258607172e-06,
      "loss": 0.0307,
      "step": 264420
    },
    {
      "epoch": 0.43276185987444604,
      "grad_norm": 1.4846731424331665,
      "learning_rate": 9.194665366393654e-06,
      "loss": 0.0501,
      "step": 264440
    },
    {
      "epoch": 0.43279459031309936,
      "grad_norm": 0.5755509734153748,
      "learning_rate": 9.194599474180137e-06,
      "loss": 0.0346,
      "step": 264460
    },
    {
      "epoch": 0.43282732075175273,
      "grad_norm": 3.617401361465454,
      "learning_rate": 9.194533581966619e-06,
      "loss": 0.0361,
      "step": 264480
    },
    {
      "epoch": 0.43286005119040605,
      "grad_norm": 0.39756861329078674,
      "learning_rate": 9.194467689753103e-06,
      "loss": 0.0393,
      "step": 264500
    },
    {
      "epoch": 0.4328927816290594,
      "grad_norm": 2.649623394012451,
      "learning_rate": 9.194401797539585e-06,
      "loss": 0.0561,
      "step": 264520
    },
    {
      "epoch": 0.43292551206771274,
      "grad_norm": 1.1833370923995972,
      "learning_rate": 9.194335905326068e-06,
      "loss": 0.0399,
      "step": 264540
    },
    {
      "epoch": 0.43295824250636605,
      "grad_norm": 1.7868494987487793,
      "learning_rate": 9.194270013112552e-06,
      "loss": 0.0366,
      "step": 264560
    },
    {
      "epoch": 0.4329909729450194,
      "grad_norm": 0.8970911502838135,
      "learning_rate": 9.194204120899034e-06,
      "loss": 0.0417,
      "step": 264580
    },
    {
      "epoch": 0.43302370338367274,
      "grad_norm": 1.9122908115386963,
      "learning_rate": 9.194138228685517e-06,
      "loss": 0.0413,
      "step": 264600
    },
    {
      "epoch": 0.4330564338223261,
      "grad_norm": 0.3614485263824463,
      "learning_rate": 9.194072336472e-06,
      "loss": 0.0536,
      "step": 264620
    },
    {
      "epoch": 0.43308916426097943,
      "grad_norm": 1.9324748516082764,
      "learning_rate": 9.194006444258483e-06,
      "loss": 0.045,
      "step": 264640
    },
    {
      "epoch": 0.43312189469963275,
      "grad_norm": 0.7742593288421631,
      "learning_rate": 9.193940552044965e-06,
      "loss": 0.0521,
      "step": 264660
    },
    {
      "epoch": 0.4331546251382861,
      "grad_norm": 0.8313838243484497,
      "learning_rate": 9.193874659831448e-06,
      "loss": 0.06,
      "step": 264680
    },
    {
      "epoch": 0.43318735557693944,
      "grad_norm": 4.031198501586914,
      "learning_rate": 9.193808767617932e-06,
      "loss": 0.0384,
      "step": 264700
    },
    {
      "epoch": 0.43322008601559275,
      "grad_norm": 1.8304518461227417,
      "learning_rate": 9.193742875404414e-06,
      "loss": 0.0676,
      "step": 264720
    },
    {
      "epoch": 0.4332528164542461,
      "grad_norm": 3.1796233654022217,
      "learning_rate": 9.193676983190897e-06,
      "loss": 0.042,
      "step": 264740
    },
    {
      "epoch": 0.43328554689289944,
      "grad_norm": 0.6637126803398132,
      "learning_rate": 9.193611090977381e-06,
      "loss": 0.0444,
      "step": 264760
    },
    {
      "epoch": 0.4333182773315528,
      "grad_norm": 2.4464597702026367,
      "learning_rate": 9.193545198763863e-06,
      "loss": 0.0383,
      "step": 264780
    },
    {
      "epoch": 0.43335100777020613,
      "grad_norm": 1.2214394807815552,
      "learning_rate": 9.193479306550346e-06,
      "loss": 0.039,
      "step": 264800
    },
    {
      "epoch": 0.43338373820885945,
      "grad_norm": 0.7984173893928528,
      "learning_rate": 9.193413414336828e-06,
      "loss": 0.0399,
      "step": 264820
    },
    {
      "epoch": 0.4334164686475128,
      "grad_norm": 5.099844932556152,
      "learning_rate": 9.193347522123312e-06,
      "loss": 0.0472,
      "step": 264840
    },
    {
      "epoch": 0.43344919908616614,
      "grad_norm": 0.49256935715675354,
      "learning_rate": 9.193281629909794e-06,
      "loss": 0.0313,
      "step": 264860
    },
    {
      "epoch": 0.4334819295248195,
      "grad_norm": 3.9734761714935303,
      "learning_rate": 9.193215737696277e-06,
      "loss": 0.0533,
      "step": 264880
    },
    {
      "epoch": 0.4335146599634728,
      "grad_norm": 1.4841917753219604,
      "learning_rate": 9.193149845482761e-06,
      "loss": 0.0348,
      "step": 264900
    },
    {
      "epoch": 0.43354739040212614,
      "grad_norm": 2.1115827560424805,
      "learning_rate": 9.193083953269243e-06,
      "loss": 0.0449,
      "step": 264920
    },
    {
      "epoch": 0.4335801208407795,
      "grad_norm": 1.6982290744781494,
      "learning_rate": 9.193018061055726e-06,
      "loss": 0.0582,
      "step": 264940
    },
    {
      "epoch": 0.43361285127943283,
      "grad_norm": 4.169015884399414,
      "learning_rate": 9.192952168842208e-06,
      "loss": 0.0431,
      "step": 264960
    },
    {
      "epoch": 0.4336455817180862,
      "grad_norm": 0.6257604956626892,
      "learning_rate": 9.192886276628692e-06,
      "loss": 0.0289,
      "step": 264980
    },
    {
      "epoch": 0.4336783121567395,
      "grad_norm": 1.591625452041626,
      "learning_rate": 9.192820384415174e-06,
      "loss": 0.043,
      "step": 265000
    },
    {
      "epoch": 0.43371104259539284,
      "grad_norm": 0.7525875568389893,
      "learning_rate": 9.192754492201657e-06,
      "loss": 0.0402,
      "step": 265020
    },
    {
      "epoch": 0.4337437730340462,
      "grad_norm": 2.2391157150268555,
      "learning_rate": 9.19268859998814e-06,
      "loss": 0.041,
      "step": 265040
    },
    {
      "epoch": 0.4337765034726995,
      "grad_norm": 8.034976959228516,
      "learning_rate": 9.192622707774623e-06,
      "loss": 0.0464,
      "step": 265060
    },
    {
      "epoch": 0.4338092339113529,
      "grad_norm": 1.2297089099884033,
      "learning_rate": 9.192556815561106e-06,
      "loss": 0.0413,
      "step": 265080
    },
    {
      "epoch": 0.4338419643500062,
      "grad_norm": 2.3410849571228027,
      "learning_rate": 9.192490923347588e-06,
      "loss": 0.0357,
      "step": 265100
    },
    {
      "epoch": 0.43387469478865953,
      "grad_norm": 1.9968258142471313,
      "learning_rate": 9.192425031134072e-06,
      "loss": 0.0403,
      "step": 265120
    },
    {
      "epoch": 0.4339074252273129,
      "grad_norm": 1.1375783681869507,
      "learning_rate": 9.192359138920555e-06,
      "loss": 0.0325,
      "step": 265140
    },
    {
      "epoch": 0.4339401556659662,
      "grad_norm": 0.9177013635635376,
      "learning_rate": 9.192293246707037e-06,
      "loss": 0.0455,
      "step": 265160
    },
    {
      "epoch": 0.4339728861046196,
      "grad_norm": 1.049340844154358,
      "learning_rate": 9.192227354493521e-06,
      "loss": 0.0333,
      "step": 265180
    },
    {
      "epoch": 0.4340056165432729,
      "grad_norm": 4.552072525024414,
      "learning_rate": 9.192161462280003e-06,
      "loss": 0.0525,
      "step": 265200
    },
    {
      "epoch": 0.4340383469819262,
      "grad_norm": 2.270662307739258,
      "learning_rate": 9.192095570066486e-06,
      "loss": 0.0348,
      "step": 265220
    },
    {
      "epoch": 0.4340710774205796,
      "grad_norm": 1.7314711809158325,
      "learning_rate": 9.19202967785297e-06,
      "loss": 0.0442,
      "step": 265240
    },
    {
      "epoch": 0.4341038078592329,
      "grad_norm": 1.7884283065795898,
      "learning_rate": 9.191963785639452e-06,
      "loss": 0.035,
      "step": 265260
    },
    {
      "epoch": 0.4341365382978863,
      "grad_norm": 0.5151946544647217,
      "learning_rate": 9.191897893425936e-06,
      "loss": 0.0378,
      "step": 265280
    },
    {
      "epoch": 0.4341692687365396,
      "grad_norm": 2.5417888164520264,
      "learning_rate": 9.191832001212417e-06,
      "loss": 0.0319,
      "step": 265300
    },
    {
      "epoch": 0.4342019991751929,
      "grad_norm": 0.822321891784668,
      "learning_rate": 9.191766108998901e-06,
      "loss": 0.055,
      "step": 265320
    },
    {
      "epoch": 0.4342347296138463,
      "grad_norm": 0.7327497005462646,
      "learning_rate": 9.191700216785383e-06,
      "loss": 0.0418,
      "step": 265340
    },
    {
      "epoch": 0.4342674600524996,
      "grad_norm": 1.5691980123519897,
      "learning_rate": 9.191634324571866e-06,
      "loss": 0.0326,
      "step": 265360
    },
    {
      "epoch": 0.434300190491153,
      "grad_norm": 2.4553756713867188,
      "learning_rate": 9.191568432358348e-06,
      "loss": 0.053,
      "step": 265380
    },
    {
      "epoch": 0.4343329209298063,
      "grad_norm": 2.7970728874206543,
      "learning_rate": 9.191502540144832e-06,
      "loss": 0.0414,
      "step": 265400
    },
    {
      "epoch": 0.4343656513684596,
      "grad_norm": 1.8633164167404175,
      "learning_rate": 9.191436647931314e-06,
      "loss": 0.035,
      "step": 265420
    },
    {
      "epoch": 0.434398381807113,
      "grad_norm": 2.528634786605835,
      "learning_rate": 9.191370755717797e-06,
      "loss": 0.0347,
      "step": 265440
    },
    {
      "epoch": 0.4344311122457663,
      "grad_norm": 2.889026165008545,
      "learning_rate": 9.19130486350428e-06,
      "loss": 0.0677,
      "step": 265460
    },
    {
      "epoch": 0.4344638426844197,
      "grad_norm": 1.2202427387237549,
      "learning_rate": 9.191238971290763e-06,
      "loss": 0.0303,
      "step": 265480
    },
    {
      "epoch": 0.434496573123073,
      "grad_norm": 1.1449494361877441,
      "learning_rate": 9.191173079077246e-06,
      "loss": 0.0468,
      "step": 265500
    },
    {
      "epoch": 0.4345293035617263,
      "grad_norm": 1.12672758102417,
      "learning_rate": 9.191107186863728e-06,
      "loss": 0.0352,
      "step": 265520
    },
    {
      "epoch": 0.4345620340003797,
      "grad_norm": 0.620305597782135,
      "learning_rate": 9.191041294650212e-06,
      "loss": 0.0421,
      "step": 265540
    },
    {
      "epoch": 0.434594764439033,
      "grad_norm": 1.4830156564712524,
      "learning_rate": 9.190975402436696e-06,
      "loss": 0.0315,
      "step": 265560
    },
    {
      "epoch": 0.43462749487768637,
      "grad_norm": 1.5962417125701904,
      "learning_rate": 9.190909510223177e-06,
      "loss": 0.0592,
      "step": 265580
    },
    {
      "epoch": 0.4346602253163397,
      "grad_norm": 1.1865549087524414,
      "learning_rate": 9.190843618009661e-06,
      "loss": 0.0422,
      "step": 265600
    },
    {
      "epoch": 0.434692955754993,
      "grad_norm": 1.6420196294784546,
      "learning_rate": 9.190777725796145e-06,
      "loss": 0.0427,
      "step": 265620
    },
    {
      "epoch": 0.4347256861936464,
      "grad_norm": 0.62712162733078,
      "learning_rate": 9.190711833582627e-06,
      "loss": 0.0305,
      "step": 265640
    },
    {
      "epoch": 0.4347584166322997,
      "grad_norm": 2.693387269973755,
      "learning_rate": 9.19064594136911e-06,
      "loss": 0.0399,
      "step": 265660
    },
    {
      "epoch": 0.43479114707095307,
      "grad_norm": 2.180514097213745,
      "learning_rate": 9.190580049155592e-06,
      "loss": 0.0583,
      "step": 265680
    },
    {
      "epoch": 0.4348238775096064,
      "grad_norm": 0.808388888835907,
      "learning_rate": 9.190514156942076e-06,
      "loss": 0.029,
      "step": 265700
    },
    {
      "epoch": 0.4348566079482597,
      "grad_norm": 1.5947186946868896,
      "learning_rate": 9.190448264728557e-06,
      "loss": 0.0375,
      "step": 265720
    },
    {
      "epoch": 0.43488933838691307,
      "grad_norm": 3.821582317352295,
      "learning_rate": 9.190382372515041e-06,
      "loss": 0.0467,
      "step": 265740
    },
    {
      "epoch": 0.4349220688255664,
      "grad_norm": 1.0292390584945679,
      "learning_rate": 9.190316480301523e-06,
      "loss": 0.0598,
      "step": 265760
    },
    {
      "epoch": 0.43495479926421976,
      "grad_norm": 4.325755596160889,
      "learning_rate": 9.190250588088007e-06,
      "loss": 0.0441,
      "step": 265780
    },
    {
      "epoch": 0.4349875297028731,
      "grad_norm": 1.7750135660171509,
      "learning_rate": 9.190184695874488e-06,
      "loss": 0.0404,
      "step": 265800
    },
    {
      "epoch": 0.4350202601415264,
      "grad_norm": 1.1785812377929688,
      "learning_rate": 9.190118803660972e-06,
      "loss": 0.0472,
      "step": 265820
    },
    {
      "epoch": 0.43505299058017977,
      "grad_norm": 1.0925030708312988,
      "learning_rate": 9.190052911447454e-06,
      "loss": 0.0446,
      "step": 265840
    },
    {
      "epoch": 0.4350857210188331,
      "grad_norm": 0.6903366446495056,
      "learning_rate": 9.189987019233938e-06,
      "loss": 0.0541,
      "step": 265860
    },
    {
      "epoch": 0.43511845145748645,
      "grad_norm": 2.289562463760376,
      "learning_rate": 9.189921127020421e-06,
      "loss": 0.0384,
      "step": 265880
    },
    {
      "epoch": 0.43515118189613977,
      "grad_norm": 4.157536029815674,
      "learning_rate": 9.189855234806903e-06,
      "loss": 0.0557,
      "step": 265900
    },
    {
      "epoch": 0.4351839123347931,
      "grad_norm": 2.0229673385620117,
      "learning_rate": 9.189789342593387e-06,
      "loss": 0.038,
      "step": 265920
    },
    {
      "epoch": 0.43521664277344646,
      "grad_norm": 13.301681518554688,
      "learning_rate": 9.18972345037987e-06,
      "loss": 0.0477,
      "step": 265940
    },
    {
      "epoch": 0.4352493732120998,
      "grad_norm": 2.471229076385498,
      "learning_rate": 9.189657558166352e-06,
      "loss": 0.0581,
      "step": 265960
    },
    {
      "epoch": 0.43528210365075315,
      "grad_norm": 2.1380553245544434,
      "learning_rate": 9.189591665952836e-06,
      "loss": 0.0474,
      "step": 265980
    },
    {
      "epoch": 0.43531483408940647,
      "grad_norm": 1.2130225896835327,
      "learning_rate": 9.18952577373932e-06,
      "loss": 0.0608,
      "step": 266000
    },
    {
      "epoch": 0.4353475645280598,
      "grad_norm": 2.6138370037078857,
      "learning_rate": 9.189459881525801e-06,
      "loss": 0.0498,
      "step": 266020
    },
    {
      "epoch": 0.43538029496671315,
      "grad_norm": 0.6803359985351562,
      "learning_rate": 9.189393989312285e-06,
      "loss": 0.0326,
      "step": 266040
    },
    {
      "epoch": 0.43541302540536647,
      "grad_norm": 1.045873761177063,
      "learning_rate": 9.189328097098767e-06,
      "loss": 0.0569,
      "step": 266060
    },
    {
      "epoch": 0.43544575584401984,
      "grad_norm": 2.1527516841888428,
      "learning_rate": 9.18926220488525e-06,
      "loss": 0.0409,
      "step": 266080
    },
    {
      "epoch": 0.43547848628267316,
      "grad_norm": 1.3132901191711426,
      "learning_rate": 9.189196312671732e-06,
      "loss": 0.0368,
      "step": 266100
    },
    {
      "epoch": 0.4355112167213265,
      "grad_norm": 1.4611328840255737,
      "learning_rate": 9.189130420458216e-06,
      "loss": 0.034,
      "step": 266120
    },
    {
      "epoch": 0.43554394715997985,
      "grad_norm": 0.7187782526016235,
      "learning_rate": 9.189064528244698e-06,
      "loss": 0.0407,
      "step": 266140
    },
    {
      "epoch": 0.43557667759863317,
      "grad_norm": 0.5633650422096252,
      "learning_rate": 9.188998636031181e-06,
      "loss": 0.0271,
      "step": 266160
    },
    {
      "epoch": 0.43560940803728654,
      "grad_norm": 5.82265567779541,
      "learning_rate": 9.188932743817663e-06,
      "loss": 0.0556,
      "step": 266180
    },
    {
      "epoch": 0.43564213847593986,
      "grad_norm": 3.3833682537078857,
      "learning_rate": 9.188866851604147e-06,
      "loss": 0.0395,
      "step": 266200
    },
    {
      "epoch": 0.43567486891459317,
      "grad_norm": 2.823669910430908,
      "learning_rate": 9.188800959390629e-06,
      "loss": 0.0465,
      "step": 266220
    },
    {
      "epoch": 0.43570759935324654,
      "grad_norm": 1.3640332221984863,
      "learning_rate": 9.188735067177112e-06,
      "loss": 0.046,
      "step": 266240
    },
    {
      "epoch": 0.43574032979189986,
      "grad_norm": 3.609879732131958,
      "learning_rate": 9.188669174963594e-06,
      "loss": 0.0355,
      "step": 266260
    },
    {
      "epoch": 0.43577306023055323,
      "grad_norm": 14.133325576782227,
      "learning_rate": 9.188603282750078e-06,
      "loss": 0.0314,
      "step": 266280
    },
    {
      "epoch": 0.43580579066920655,
      "grad_norm": 2.0124778747558594,
      "learning_rate": 9.188537390536561e-06,
      "loss": 0.0376,
      "step": 266300
    },
    {
      "epoch": 0.43583852110785987,
      "grad_norm": 0.6183141469955444,
      "learning_rate": 9.188471498323043e-06,
      "loss": 0.0261,
      "step": 266320
    },
    {
      "epoch": 0.43587125154651324,
      "grad_norm": 3.1976640224456787,
      "learning_rate": 9.188405606109527e-06,
      "loss": 0.0436,
      "step": 266340
    },
    {
      "epoch": 0.43590398198516656,
      "grad_norm": 2.3459339141845703,
      "learning_rate": 9.18833971389601e-06,
      "loss": 0.0378,
      "step": 266360
    },
    {
      "epoch": 0.4359367124238199,
      "grad_norm": 1.5533736944198608,
      "learning_rate": 9.188273821682492e-06,
      "loss": 0.0529,
      "step": 266380
    },
    {
      "epoch": 0.43596944286247324,
      "grad_norm": 9.02111530303955,
      "learning_rate": 9.188207929468976e-06,
      "loss": 0.0432,
      "step": 266400
    },
    {
      "epoch": 0.43600217330112656,
      "grad_norm": 1.808663010597229,
      "learning_rate": 9.18814203725546e-06,
      "loss": 0.0488,
      "step": 266420
    },
    {
      "epoch": 0.43603490373977993,
      "grad_norm": 1.267159342765808,
      "learning_rate": 9.188076145041941e-06,
      "loss": 0.0369,
      "step": 266440
    },
    {
      "epoch": 0.43606763417843325,
      "grad_norm": 0.42399099469184875,
      "learning_rate": 9.188010252828425e-06,
      "loss": 0.0325,
      "step": 266460
    },
    {
      "epoch": 0.4361003646170866,
      "grad_norm": 0.5899146795272827,
      "learning_rate": 9.187944360614907e-06,
      "loss": 0.0381,
      "step": 266480
    },
    {
      "epoch": 0.43613309505573994,
      "grad_norm": 1.4361910820007324,
      "learning_rate": 9.18787846840139e-06,
      "loss": 0.0416,
      "step": 266500
    },
    {
      "epoch": 0.43616582549439326,
      "grad_norm": 1.5170890092849731,
      "learning_rate": 9.187812576187872e-06,
      "loss": 0.0463,
      "step": 266520
    },
    {
      "epoch": 0.43619855593304663,
      "grad_norm": 1.2053546905517578,
      "learning_rate": 9.187746683974356e-06,
      "loss": 0.0412,
      "step": 266540
    },
    {
      "epoch": 0.43623128637169994,
      "grad_norm": 1.2653307914733887,
      "learning_rate": 9.187680791760838e-06,
      "loss": 0.0349,
      "step": 266560
    },
    {
      "epoch": 0.4362640168103533,
      "grad_norm": 1.0084577798843384,
      "learning_rate": 9.187614899547321e-06,
      "loss": 0.0309,
      "step": 266580
    },
    {
      "epoch": 0.43629674724900663,
      "grad_norm": 1.3091678619384766,
      "learning_rate": 9.187549007333803e-06,
      "loss": 0.0425,
      "step": 266600
    },
    {
      "epoch": 0.43632947768765995,
      "grad_norm": 0.896315336227417,
      "learning_rate": 9.187483115120287e-06,
      "loss": 0.0467,
      "step": 266620
    },
    {
      "epoch": 0.4363622081263133,
      "grad_norm": 1.9699971675872803,
      "learning_rate": 9.18741722290677e-06,
      "loss": 0.0396,
      "step": 266640
    },
    {
      "epoch": 0.43639493856496664,
      "grad_norm": 2.787202835083008,
      "learning_rate": 9.187351330693252e-06,
      "loss": 0.0363,
      "step": 266660
    },
    {
      "epoch": 0.43642766900362,
      "grad_norm": 2.311121940612793,
      "learning_rate": 9.187285438479736e-06,
      "loss": 0.0613,
      "step": 266680
    },
    {
      "epoch": 0.43646039944227333,
      "grad_norm": 0.7914164662361145,
      "learning_rate": 9.187219546266218e-06,
      "loss": 0.0344,
      "step": 266700
    },
    {
      "epoch": 0.43649312988092664,
      "grad_norm": 3.0747110843658447,
      "learning_rate": 9.187153654052701e-06,
      "loss": 0.0454,
      "step": 266720
    },
    {
      "epoch": 0.43652586031958,
      "grad_norm": 2.2279090881347656,
      "learning_rate": 9.187087761839185e-06,
      "loss": 0.0484,
      "step": 266740
    },
    {
      "epoch": 0.43655859075823333,
      "grad_norm": 0.9065371751785278,
      "learning_rate": 9.187021869625667e-06,
      "loss": 0.0488,
      "step": 266760
    },
    {
      "epoch": 0.4365913211968867,
      "grad_norm": 2.331965446472168,
      "learning_rate": 9.18695597741215e-06,
      "loss": 0.0419,
      "step": 266780
    },
    {
      "epoch": 0.43662405163554,
      "grad_norm": 0.6520853638648987,
      "learning_rate": 9.186890085198634e-06,
      "loss": 0.0432,
      "step": 266800
    },
    {
      "epoch": 0.43665678207419334,
      "grad_norm": 1.5060230493545532,
      "learning_rate": 9.186824192985116e-06,
      "loss": 0.0499,
      "step": 266820
    },
    {
      "epoch": 0.4366895125128467,
      "grad_norm": 1.2673640251159668,
      "learning_rate": 9.1867583007716e-06,
      "loss": 0.0525,
      "step": 266840
    },
    {
      "epoch": 0.43672224295150003,
      "grad_norm": 11.04910659790039,
      "learning_rate": 9.186692408558081e-06,
      "loss": 0.0506,
      "step": 266860
    },
    {
      "epoch": 0.4367549733901534,
      "grad_norm": 1.0226291418075562,
      "learning_rate": 9.186626516344565e-06,
      "loss": 0.0339,
      "step": 266880
    },
    {
      "epoch": 0.4367877038288067,
      "grad_norm": 3.3963840007781982,
      "learning_rate": 9.186560624131047e-06,
      "loss": 0.0319,
      "step": 266900
    },
    {
      "epoch": 0.43682043426746003,
      "grad_norm": 2.2697606086730957,
      "learning_rate": 9.18649473191753e-06,
      "loss": 0.0321,
      "step": 266920
    },
    {
      "epoch": 0.4368531647061134,
      "grad_norm": 3.0056581497192383,
      "learning_rate": 9.186428839704012e-06,
      "loss": 0.0513,
      "step": 266940
    },
    {
      "epoch": 0.4368858951447667,
      "grad_norm": 1.1644855737686157,
      "learning_rate": 9.186362947490496e-06,
      "loss": 0.0461,
      "step": 266960
    },
    {
      "epoch": 0.4369186255834201,
      "grad_norm": 4.121541976928711,
      "learning_rate": 9.186297055276978e-06,
      "loss": 0.0468,
      "step": 266980
    },
    {
      "epoch": 0.4369513560220734,
      "grad_norm": 1.6483553647994995,
      "learning_rate": 9.186231163063461e-06,
      "loss": 0.0444,
      "step": 267000
    },
    {
      "epoch": 0.43698408646072673,
      "grad_norm": 12.300952911376953,
      "learning_rate": 9.186165270849945e-06,
      "loss": 0.0493,
      "step": 267020
    },
    {
      "epoch": 0.4370168168993801,
      "grad_norm": 2.2334578037261963,
      "learning_rate": 9.186099378636427e-06,
      "loss": 0.0538,
      "step": 267040
    },
    {
      "epoch": 0.4370495473380334,
      "grad_norm": 1.4616516828536987,
      "learning_rate": 9.18603348642291e-06,
      "loss": 0.0496,
      "step": 267060
    },
    {
      "epoch": 0.4370822777766868,
      "grad_norm": 1.012913703918457,
      "learning_rate": 9.185967594209392e-06,
      "loss": 0.0326,
      "step": 267080
    },
    {
      "epoch": 0.4371150082153401,
      "grad_norm": 2.516308546066284,
      "learning_rate": 9.185901701995876e-06,
      "loss": 0.0335,
      "step": 267100
    },
    {
      "epoch": 0.4371477386539934,
      "grad_norm": 0.31065884232521057,
      "learning_rate": 9.185835809782358e-06,
      "loss": 0.0572,
      "step": 267120
    },
    {
      "epoch": 0.4371804690926468,
      "grad_norm": 1.2546217441558838,
      "learning_rate": 9.185769917568841e-06,
      "loss": 0.0346,
      "step": 267140
    },
    {
      "epoch": 0.4372131995313001,
      "grad_norm": 0.7945267558097839,
      "learning_rate": 9.185704025355325e-06,
      "loss": 0.0291,
      "step": 267160
    },
    {
      "epoch": 0.4372459299699535,
      "grad_norm": 7.2128095626831055,
      "learning_rate": 9.185638133141807e-06,
      "loss": 0.0384,
      "step": 267180
    },
    {
      "epoch": 0.4372786604086068,
      "grad_norm": 0.9217888712882996,
      "learning_rate": 9.18557224092829e-06,
      "loss": 0.0465,
      "step": 267200
    },
    {
      "epoch": 0.4373113908472601,
      "grad_norm": 1.8784207105636597,
      "learning_rate": 9.185506348714774e-06,
      "loss": 0.0412,
      "step": 267220
    },
    {
      "epoch": 0.4373441212859135,
      "grad_norm": 0.9452951550483704,
      "learning_rate": 9.185440456501256e-06,
      "loss": 0.0478,
      "step": 267240
    },
    {
      "epoch": 0.4373768517245668,
      "grad_norm": 0.38258615136146545,
      "learning_rate": 9.18537456428774e-06,
      "loss": 0.043,
      "step": 267260
    },
    {
      "epoch": 0.4374095821632202,
      "grad_norm": 0.7479756474494934,
      "learning_rate": 9.185308672074221e-06,
      "loss": 0.0448,
      "step": 267280
    },
    {
      "epoch": 0.4374423126018735,
      "grad_norm": 2.6313939094543457,
      "learning_rate": 9.185242779860705e-06,
      "loss": 0.0424,
      "step": 267300
    },
    {
      "epoch": 0.4374750430405268,
      "grad_norm": 1.8896740674972534,
      "learning_rate": 9.185176887647187e-06,
      "loss": 0.0473,
      "step": 267320
    },
    {
      "epoch": 0.4375077734791802,
      "grad_norm": 0.601424515247345,
      "learning_rate": 9.18511099543367e-06,
      "loss": 0.0355,
      "step": 267340
    },
    {
      "epoch": 0.4375405039178335,
      "grad_norm": 1.1575852632522583,
      "learning_rate": 9.185045103220154e-06,
      "loss": 0.0411,
      "step": 267360
    },
    {
      "epoch": 0.4375732343564869,
      "grad_norm": 0.47074300050735474,
      "learning_rate": 9.184979211006636e-06,
      "loss": 0.0566,
      "step": 267380
    },
    {
      "epoch": 0.4376059647951402,
      "grad_norm": 1.992278814315796,
      "learning_rate": 9.18491331879312e-06,
      "loss": 0.0383,
      "step": 267400
    },
    {
      "epoch": 0.4376386952337935,
      "grad_norm": 0.8057448863983154,
      "learning_rate": 9.184847426579601e-06,
      "loss": 0.0342,
      "step": 267420
    },
    {
      "epoch": 0.4376714256724469,
      "grad_norm": 0.2717205882072449,
      "learning_rate": 9.184781534366085e-06,
      "loss": 0.0379,
      "step": 267440
    },
    {
      "epoch": 0.4377041561111002,
      "grad_norm": 2.2724738121032715,
      "learning_rate": 9.184715642152567e-06,
      "loss": 0.0569,
      "step": 267460
    },
    {
      "epoch": 0.4377368865497535,
      "grad_norm": 1.3948462009429932,
      "learning_rate": 9.18464974993905e-06,
      "loss": 0.0398,
      "step": 267480
    },
    {
      "epoch": 0.4377696169884069,
      "grad_norm": 0.21187616884708405,
      "learning_rate": 9.184583857725532e-06,
      "loss": 0.0503,
      "step": 267500
    },
    {
      "epoch": 0.4378023474270602,
      "grad_norm": 1.1374536752700806,
      "learning_rate": 9.184517965512016e-06,
      "loss": 0.0431,
      "step": 267520
    },
    {
      "epoch": 0.4378350778657136,
      "grad_norm": 1.0080227851867676,
      "learning_rate": 9.1844520732985e-06,
      "loss": 0.0286,
      "step": 267540
    },
    {
      "epoch": 0.4378678083043669,
      "grad_norm": 1.4881800413131714,
      "learning_rate": 9.184386181084981e-06,
      "loss": 0.0532,
      "step": 267560
    },
    {
      "epoch": 0.4379005387430202,
      "grad_norm": 0.5669293999671936,
      "learning_rate": 9.184320288871465e-06,
      "loss": 0.0484,
      "step": 267580
    },
    {
      "epoch": 0.4379332691816736,
      "grad_norm": 1.6293020248413086,
      "learning_rate": 9.184254396657949e-06,
      "loss": 0.0443,
      "step": 267600
    },
    {
      "epoch": 0.4379659996203269,
      "grad_norm": 0.40547892451286316,
      "learning_rate": 9.18418850444443e-06,
      "loss": 0.0437,
      "step": 267620
    },
    {
      "epoch": 0.43799873005898027,
      "grad_norm": 2.075087785720825,
      "learning_rate": 9.184122612230914e-06,
      "loss": 0.0271,
      "step": 267640
    },
    {
      "epoch": 0.4380314604976336,
      "grad_norm": 1.2913237810134888,
      "learning_rate": 9.184056720017396e-06,
      "loss": 0.0436,
      "step": 267660
    },
    {
      "epoch": 0.4380641909362869,
      "grad_norm": 1.4672733545303345,
      "learning_rate": 9.18399082780388e-06,
      "loss": 0.0444,
      "step": 267680
    },
    {
      "epoch": 0.4380969213749403,
      "grad_norm": 2.013359785079956,
      "learning_rate": 9.183924935590363e-06,
      "loss": 0.0425,
      "step": 267700
    },
    {
      "epoch": 0.4381296518135936,
      "grad_norm": 0.31783515214920044,
      "learning_rate": 9.183859043376845e-06,
      "loss": 0.0495,
      "step": 267720
    },
    {
      "epoch": 0.43816238225224696,
      "grad_norm": 0.7255528569221497,
      "learning_rate": 9.183793151163329e-06,
      "loss": 0.0382,
      "step": 267740
    },
    {
      "epoch": 0.4381951126909003,
      "grad_norm": 1.3846004009246826,
      "learning_rate": 9.18372725894981e-06,
      "loss": 0.0388,
      "step": 267760
    },
    {
      "epoch": 0.4382278431295536,
      "grad_norm": 1.215255856513977,
      "learning_rate": 9.183661366736294e-06,
      "loss": 0.0426,
      "step": 267780
    },
    {
      "epoch": 0.43826057356820697,
      "grad_norm": 1.050882339477539,
      "learning_rate": 9.183595474522776e-06,
      "loss": 0.0405,
      "step": 267800
    },
    {
      "epoch": 0.4382933040068603,
      "grad_norm": 1.0727638006210327,
      "learning_rate": 9.18352958230926e-06,
      "loss": 0.0394,
      "step": 267820
    },
    {
      "epoch": 0.43832603444551366,
      "grad_norm": 0.4671942889690399,
      "learning_rate": 9.183463690095741e-06,
      "loss": 0.0391,
      "step": 267840
    },
    {
      "epoch": 0.438358764884167,
      "grad_norm": 1.2413769960403442,
      "learning_rate": 9.183397797882225e-06,
      "loss": 0.0569,
      "step": 267860
    },
    {
      "epoch": 0.4383914953228203,
      "grad_norm": 2.7673425674438477,
      "learning_rate": 9.183331905668707e-06,
      "loss": 0.0494,
      "step": 267880
    },
    {
      "epoch": 0.43842422576147366,
      "grad_norm": 1.6766496896743774,
      "learning_rate": 9.18326601345519e-06,
      "loss": 0.0502,
      "step": 267900
    },
    {
      "epoch": 0.438456956200127,
      "grad_norm": 1.9101543426513672,
      "learning_rate": 9.183200121241674e-06,
      "loss": 0.044,
      "step": 267920
    },
    {
      "epoch": 0.43848968663878035,
      "grad_norm": 2.3972551822662354,
      "learning_rate": 9.183134229028156e-06,
      "loss": 0.0398,
      "step": 267940
    },
    {
      "epoch": 0.43852241707743367,
      "grad_norm": 1.372367262840271,
      "learning_rate": 9.18306833681464e-06,
      "loss": 0.0322,
      "step": 267960
    },
    {
      "epoch": 0.438555147516087,
      "grad_norm": 1.1233956813812256,
      "learning_rate": 9.183002444601123e-06,
      "loss": 0.0309,
      "step": 267980
    },
    {
      "epoch": 0.43858787795474036,
      "grad_norm": 2.9459118843078613,
      "learning_rate": 9.182936552387605e-06,
      "loss": 0.0336,
      "step": 268000
    },
    {
      "epoch": 0.4386206083933937,
      "grad_norm": 1.0022820234298706,
      "learning_rate": 9.182870660174089e-06,
      "loss": 0.0449,
      "step": 268020
    },
    {
      "epoch": 0.43865333883204705,
      "grad_norm": 0.7444366812705994,
      "learning_rate": 9.18280476796057e-06,
      "loss": 0.0391,
      "step": 268040
    },
    {
      "epoch": 0.43868606927070036,
      "grad_norm": 0.2953988015651703,
      "learning_rate": 9.182738875747054e-06,
      "loss": 0.0526,
      "step": 268060
    },
    {
      "epoch": 0.4387187997093537,
      "grad_norm": 2.716106653213501,
      "learning_rate": 9.182672983533538e-06,
      "loss": 0.046,
      "step": 268080
    },
    {
      "epoch": 0.43875153014800705,
      "grad_norm": 1.4868253469467163,
      "learning_rate": 9.18260709132002e-06,
      "loss": 0.0391,
      "step": 268100
    },
    {
      "epoch": 0.43878426058666037,
      "grad_norm": 0.2830282747745514,
      "learning_rate": 9.182541199106503e-06,
      "loss": 0.0276,
      "step": 268120
    },
    {
      "epoch": 0.43881699102531374,
      "grad_norm": 1.0204267501831055,
      "learning_rate": 9.182475306892985e-06,
      "loss": 0.049,
      "step": 268140
    },
    {
      "epoch": 0.43884972146396706,
      "grad_norm": 0.40305227041244507,
      "learning_rate": 9.182409414679469e-06,
      "loss": 0.0469,
      "step": 268160
    },
    {
      "epoch": 0.4388824519026204,
      "grad_norm": 1.7622921466827393,
      "learning_rate": 9.18234352246595e-06,
      "loss": 0.0271,
      "step": 268180
    },
    {
      "epoch": 0.43891518234127375,
      "grad_norm": 1.327609896659851,
      "learning_rate": 9.182277630252434e-06,
      "loss": 0.0442,
      "step": 268200
    },
    {
      "epoch": 0.43894791277992706,
      "grad_norm": 2.4460320472717285,
      "learning_rate": 9.182211738038916e-06,
      "loss": 0.0383,
      "step": 268220
    },
    {
      "epoch": 0.43898064321858044,
      "grad_norm": 0.9198466539382935,
      "learning_rate": 9.1821458458254e-06,
      "loss": 0.0361,
      "step": 268240
    },
    {
      "epoch": 0.43901337365723375,
      "grad_norm": 0.9424929022789001,
      "learning_rate": 9.182079953611882e-06,
      "loss": 0.0452,
      "step": 268260
    },
    {
      "epoch": 0.43904610409588707,
      "grad_norm": 2.2593820095062256,
      "learning_rate": 9.182014061398365e-06,
      "loss": 0.034,
      "step": 268280
    },
    {
      "epoch": 0.43907883453454044,
      "grad_norm": 0.18434137105941772,
      "learning_rate": 9.181948169184847e-06,
      "loss": 0.0483,
      "step": 268300
    },
    {
      "epoch": 0.43911156497319376,
      "grad_norm": 1.6594327688217163,
      "learning_rate": 9.18188227697133e-06,
      "loss": 0.0437,
      "step": 268320
    },
    {
      "epoch": 0.43914429541184713,
      "grad_norm": 1.181809902191162,
      "learning_rate": 9.181816384757814e-06,
      "loss": 0.037,
      "step": 268340
    },
    {
      "epoch": 0.43917702585050045,
      "grad_norm": 2.4555041790008545,
      "learning_rate": 9.181750492544296e-06,
      "loss": 0.0522,
      "step": 268360
    },
    {
      "epoch": 0.43920975628915376,
      "grad_norm": 0.830388605594635,
      "learning_rate": 9.18168460033078e-06,
      "loss": 0.0583,
      "step": 268380
    },
    {
      "epoch": 0.43924248672780714,
      "grad_norm": 0.7872136235237122,
      "learning_rate": 9.181618708117263e-06,
      "loss": 0.0456,
      "step": 268400
    },
    {
      "epoch": 0.43927521716646045,
      "grad_norm": 7.506450653076172,
      "learning_rate": 9.181552815903745e-06,
      "loss": 0.0493,
      "step": 268420
    },
    {
      "epoch": 0.4393079476051138,
      "grad_norm": 1.3247407674789429,
      "learning_rate": 9.181486923690229e-06,
      "loss": 0.0513,
      "step": 268440
    },
    {
      "epoch": 0.43934067804376714,
      "grad_norm": 2.9667184352874756,
      "learning_rate": 9.181421031476712e-06,
      "loss": 0.048,
      "step": 268460
    },
    {
      "epoch": 0.43937340848242046,
      "grad_norm": 1.6024436950683594,
      "learning_rate": 9.181355139263194e-06,
      "loss": 0.0508,
      "step": 268480
    },
    {
      "epoch": 0.43940613892107383,
      "grad_norm": 1.4463835954666138,
      "learning_rate": 9.181289247049678e-06,
      "loss": 0.0256,
      "step": 268500
    },
    {
      "epoch": 0.43943886935972715,
      "grad_norm": 5.2224955558776855,
      "learning_rate": 9.18122335483616e-06,
      "loss": 0.0512,
      "step": 268520
    },
    {
      "epoch": 0.4394715997983805,
      "grad_norm": 0.41720062494277954,
      "learning_rate": 9.181157462622643e-06,
      "loss": 0.043,
      "step": 268540
    },
    {
      "epoch": 0.43950433023703384,
      "grad_norm": 0.7385215759277344,
      "learning_rate": 9.181091570409125e-06,
      "loss": 0.0371,
      "step": 268560
    },
    {
      "epoch": 0.43953706067568715,
      "grad_norm": 0.7245786786079407,
      "learning_rate": 9.181025678195609e-06,
      "loss": 0.042,
      "step": 268580
    },
    {
      "epoch": 0.4395697911143405,
      "grad_norm": 2.5277175903320312,
      "learning_rate": 9.18095978598209e-06,
      "loss": 0.0372,
      "step": 268600
    },
    {
      "epoch": 0.43960252155299384,
      "grad_norm": 11.004438400268555,
      "learning_rate": 9.180893893768574e-06,
      "loss": 0.0512,
      "step": 268620
    },
    {
      "epoch": 0.4396352519916472,
      "grad_norm": 1.1225323677062988,
      "learning_rate": 9.180828001555056e-06,
      "loss": 0.0508,
      "step": 268640
    },
    {
      "epoch": 0.43966798243030053,
      "grad_norm": 2.3199005126953125,
      "learning_rate": 9.18076210934154e-06,
      "loss": 0.0508,
      "step": 268660
    },
    {
      "epoch": 0.43970071286895385,
      "grad_norm": 2.151216506958008,
      "learning_rate": 9.180696217128022e-06,
      "loss": 0.0582,
      "step": 268680
    },
    {
      "epoch": 0.4397334433076072,
      "grad_norm": 0.6587785482406616,
      "learning_rate": 9.180630324914505e-06,
      "loss": 0.0435,
      "step": 268700
    },
    {
      "epoch": 0.43976617374626054,
      "grad_norm": 0.7403159141540527,
      "learning_rate": 9.180564432700989e-06,
      "loss": 0.04,
      "step": 268720
    },
    {
      "epoch": 0.4397989041849139,
      "grad_norm": 0.9817855358123779,
      "learning_rate": 9.18049854048747e-06,
      "loss": 0.0475,
      "step": 268740
    },
    {
      "epoch": 0.4398316346235672,
      "grad_norm": 1.4224913120269775,
      "learning_rate": 9.180432648273954e-06,
      "loss": 0.0461,
      "step": 268760
    },
    {
      "epoch": 0.43986436506222054,
      "grad_norm": 1.3569406270980835,
      "learning_rate": 9.180366756060438e-06,
      "loss": 0.0436,
      "step": 268780
    },
    {
      "epoch": 0.4398970955008739,
      "grad_norm": 0.533096969127655,
      "learning_rate": 9.18030086384692e-06,
      "loss": 0.0355,
      "step": 268800
    },
    {
      "epoch": 0.43992982593952723,
      "grad_norm": 0.7806104421615601,
      "learning_rate": 9.180234971633403e-06,
      "loss": 0.0379,
      "step": 268820
    },
    {
      "epoch": 0.4399625563781806,
      "grad_norm": 1.9977433681488037,
      "learning_rate": 9.180169079419887e-06,
      "loss": 0.0424,
      "step": 268840
    },
    {
      "epoch": 0.4399952868168339,
      "grad_norm": 2.393653392791748,
      "learning_rate": 9.180103187206369e-06,
      "loss": 0.047,
      "step": 268860
    },
    {
      "epoch": 0.44002801725548724,
      "grad_norm": 0.34742188453674316,
      "learning_rate": 9.180037294992852e-06,
      "loss": 0.0389,
      "step": 268880
    },
    {
      "epoch": 0.4400607476941406,
      "grad_norm": 1.8313261270523071,
      "learning_rate": 9.179971402779334e-06,
      "loss": 0.048,
      "step": 268900
    },
    {
      "epoch": 0.4400934781327939,
      "grad_norm": 0.5244176387786865,
      "learning_rate": 9.179905510565818e-06,
      "loss": 0.0459,
      "step": 268920
    },
    {
      "epoch": 0.4401262085714473,
      "grad_norm": 0.4569733142852783,
      "learning_rate": 9.1798396183523e-06,
      "loss": 0.036,
      "step": 268940
    },
    {
      "epoch": 0.4401589390101006,
      "grad_norm": 1.681061029434204,
      "learning_rate": 9.179773726138783e-06,
      "loss": 0.0375,
      "step": 268960
    },
    {
      "epoch": 0.44019166944875393,
      "grad_norm": 1.874534249305725,
      "learning_rate": 9.179707833925265e-06,
      "loss": 0.0426,
      "step": 268980
    },
    {
      "epoch": 0.4402243998874073,
      "grad_norm": 0.8890466690063477,
      "learning_rate": 9.179641941711749e-06,
      "loss": 0.041,
      "step": 269000
    },
    {
      "epoch": 0.4402571303260606,
      "grad_norm": 2.525437593460083,
      "learning_rate": 9.17957604949823e-06,
      "loss": 0.0534,
      "step": 269020
    },
    {
      "epoch": 0.440289860764714,
      "grad_norm": 1.4873789548873901,
      "learning_rate": 9.179510157284714e-06,
      "loss": 0.0515,
      "step": 269040
    },
    {
      "epoch": 0.4403225912033673,
      "grad_norm": 1.1865063905715942,
      "learning_rate": 9.179444265071196e-06,
      "loss": 0.0466,
      "step": 269060
    },
    {
      "epoch": 0.4403553216420206,
      "grad_norm": 2.099616527557373,
      "learning_rate": 9.17937837285768e-06,
      "loss": 0.0491,
      "step": 269080
    },
    {
      "epoch": 0.440388052080674,
      "grad_norm": 2.62963604927063,
      "learning_rate": 9.179312480644163e-06,
      "loss": 0.0345,
      "step": 269100
    },
    {
      "epoch": 0.4404207825193273,
      "grad_norm": 0.7936460375785828,
      "learning_rate": 9.179246588430645e-06,
      "loss": 0.0334,
      "step": 269120
    },
    {
      "epoch": 0.4404535129579807,
      "grad_norm": 0.7453510761260986,
      "learning_rate": 9.179180696217129e-06,
      "loss": 0.057,
      "step": 269140
    },
    {
      "epoch": 0.440486243396634,
      "grad_norm": 0.30619683861732483,
      "learning_rate": 9.17911480400361e-06,
      "loss": 0.0541,
      "step": 269160
    },
    {
      "epoch": 0.4405189738352873,
      "grad_norm": 0.6900954246520996,
      "learning_rate": 9.179048911790094e-06,
      "loss": 0.0494,
      "step": 269180
    },
    {
      "epoch": 0.4405517042739407,
      "grad_norm": 1.260840654373169,
      "learning_rate": 9.178983019576578e-06,
      "loss": 0.0372,
      "step": 269200
    },
    {
      "epoch": 0.440584434712594,
      "grad_norm": 1.7262970209121704,
      "learning_rate": 9.17891712736306e-06,
      "loss": 0.0457,
      "step": 269220
    },
    {
      "epoch": 0.4406171651512474,
      "grad_norm": 0.53739333152771,
      "learning_rate": 9.178851235149543e-06,
      "loss": 0.0384,
      "step": 269240
    },
    {
      "epoch": 0.4406498955899007,
      "grad_norm": 0.655971348285675,
      "learning_rate": 9.178785342936027e-06,
      "loss": 0.0377,
      "step": 269260
    },
    {
      "epoch": 0.440682626028554,
      "grad_norm": 1.2028285264968872,
      "learning_rate": 9.178719450722509e-06,
      "loss": 0.0396,
      "step": 269280
    },
    {
      "epoch": 0.4407153564672074,
      "grad_norm": 0.7305539846420288,
      "learning_rate": 9.178653558508992e-06,
      "loss": 0.0452,
      "step": 269300
    },
    {
      "epoch": 0.4407480869058607,
      "grad_norm": 1.9679127931594849,
      "learning_rate": 9.178587666295474e-06,
      "loss": 0.0346,
      "step": 269320
    },
    {
      "epoch": 0.4407808173445141,
      "grad_norm": 2.1679255962371826,
      "learning_rate": 9.178521774081958e-06,
      "loss": 0.0482,
      "step": 269340
    },
    {
      "epoch": 0.4408135477831674,
      "grad_norm": 1.3372067213058472,
      "learning_rate": 9.17845588186844e-06,
      "loss": 0.0405,
      "step": 269360
    },
    {
      "epoch": 0.4408462782218207,
      "grad_norm": 1.519374966621399,
      "learning_rate": 9.178389989654923e-06,
      "loss": 0.0414,
      "step": 269380
    },
    {
      "epoch": 0.4408790086604741,
      "grad_norm": 1.4829319715499878,
      "learning_rate": 9.178324097441405e-06,
      "loss": 0.0436,
      "step": 269400
    },
    {
      "epoch": 0.4409117390991274,
      "grad_norm": 2.288370370864868,
      "learning_rate": 9.178258205227889e-06,
      "loss": 0.0389,
      "step": 269420
    },
    {
      "epoch": 0.44094446953778077,
      "grad_norm": 2.042269468307495,
      "learning_rate": 9.17819231301437e-06,
      "loss": 0.0373,
      "step": 269440
    },
    {
      "epoch": 0.4409771999764341,
      "grad_norm": 3.266505241394043,
      "learning_rate": 9.178126420800854e-06,
      "loss": 0.0482,
      "step": 269460
    },
    {
      "epoch": 0.4410099304150874,
      "grad_norm": 1.7661793231964111,
      "learning_rate": 9.178060528587338e-06,
      "loss": 0.0423,
      "step": 269480
    },
    {
      "epoch": 0.4410426608537408,
      "grad_norm": 4.107864856719971,
      "learning_rate": 9.17799463637382e-06,
      "loss": 0.0447,
      "step": 269500
    },
    {
      "epoch": 0.4410753912923941,
      "grad_norm": 2.6559908390045166,
      "learning_rate": 9.177928744160303e-06,
      "loss": 0.0443,
      "step": 269520
    },
    {
      "epoch": 0.44110812173104746,
      "grad_norm": 1.7654691934585571,
      "learning_rate": 9.177862851946785e-06,
      "loss": 0.0497,
      "step": 269540
    },
    {
      "epoch": 0.4411408521697008,
      "grad_norm": 2.536208391189575,
      "learning_rate": 9.177796959733269e-06,
      "loss": 0.0544,
      "step": 269560
    },
    {
      "epoch": 0.4411735826083541,
      "grad_norm": 0.6539469361305237,
      "learning_rate": 9.177731067519753e-06,
      "loss": 0.0388,
      "step": 269580
    },
    {
      "epoch": 0.44120631304700747,
      "grad_norm": 2.7035458087921143,
      "learning_rate": 9.177665175306234e-06,
      "loss": 0.0617,
      "step": 269600
    },
    {
      "epoch": 0.4412390434856608,
      "grad_norm": 2.7396740913391113,
      "learning_rate": 9.177599283092718e-06,
      "loss": 0.041,
      "step": 269620
    },
    {
      "epoch": 0.44127177392431416,
      "grad_norm": 1.5801783800125122,
      "learning_rate": 9.177533390879202e-06,
      "loss": 0.0422,
      "step": 269640
    },
    {
      "epoch": 0.4413045043629675,
      "grad_norm": 1.7512071132659912,
      "learning_rate": 9.177467498665683e-06,
      "loss": 0.0404,
      "step": 269660
    },
    {
      "epoch": 0.4413372348016208,
      "grad_norm": 1.2177200317382812,
      "learning_rate": 9.177401606452167e-06,
      "loss": 0.0447,
      "step": 269680
    },
    {
      "epoch": 0.44136996524027416,
      "grad_norm": 2.272582530975342,
      "learning_rate": 9.177335714238649e-06,
      "loss": 0.0407,
      "step": 269700
    },
    {
      "epoch": 0.4414026956789275,
      "grad_norm": 0.8236798048019409,
      "learning_rate": 9.177269822025133e-06,
      "loss": 0.0369,
      "step": 269720
    },
    {
      "epoch": 0.44143542611758085,
      "grad_norm": 2.3392062187194824,
      "learning_rate": 9.177203929811614e-06,
      "loss": 0.0374,
      "step": 269740
    },
    {
      "epoch": 0.44146815655623417,
      "grad_norm": 0.5999717712402344,
      "learning_rate": 9.177138037598098e-06,
      "loss": 0.0401,
      "step": 269760
    },
    {
      "epoch": 0.4415008869948875,
      "grad_norm": 0.7670711278915405,
      "learning_rate": 9.17707214538458e-06,
      "loss": 0.0315,
      "step": 269780
    },
    {
      "epoch": 0.44153361743354086,
      "grad_norm": 3.7172532081604004,
      "learning_rate": 9.177006253171063e-06,
      "loss": 0.0513,
      "step": 269800
    },
    {
      "epoch": 0.4415663478721942,
      "grad_norm": 1.8740553855895996,
      "learning_rate": 9.176940360957547e-06,
      "loss": 0.0384,
      "step": 269820
    },
    {
      "epoch": 0.44159907831084755,
      "grad_norm": 3.8430702686309814,
      "learning_rate": 9.176874468744029e-06,
      "loss": 0.0364,
      "step": 269840
    },
    {
      "epoch": 0.44163180874950086,
      "grad_norm": 1.6531161069869995,
      "learning_rate": 9.176808576530513e-06,
      "loss": 0.0356,
      "step": 269860
    },
    {
      "epoch": 0.4416645391881542,
      "grad_norm": 0.1422099620103836,
      "learning_rate": 9.176742684316994e-06,
      "loss": 0.0315,
      "step": 269880
    },
    {
      "epoch": 0.44169726962680755,
      "grad_norm": 0.7922623753547668,
      "learning_rate": 9.176676792103478e-06,
      "loss": 0.0468,
      "step": 269900
    },
    {
      "epoch": 0.44173000006546087,
      "grad_norm": 0.8744741678237915,
      "learning_rate": 9.17661089988996e-06,
      "loss": 0.0415,
      "step": 269920
    },
    {
      "epoch": 0.44176273050411424,
      "grad_norm": 3.5040056705474854,
      "learning_rate": 9.176545007676444e-06,
      "loss": 0.0488,
      "step": 269940
    },
    {
      "epoch": 0.44179546094276756,
      "grad_norm": 0.9828771352767944,
      "learning_rate": 9.176479115462925e-06,
      "loss": 0.034,
      "step": 269960
    },
    {
      "epoch": 0.4418281913814209,
      "grad_norm": 1.1030991077423096,
      "learning_rate": 9.176413223249409e-06,
      "loss": 0.0476,
      "step": 269980
    },
    {
      "epoch": 0.44186092182007425,
      "grad_norm": 0.7671075463294983,
      "learning_rate": 9.176347331035893e-06,
      "loss": 0.0408,
      "step": 270000
    },
    {
      "epoch": 0.44189365225872757,
      "grad_norm": 3.248664379119873,
      "learning_rate": 9.176281438822374e-06,
      "loss": 0.0433,
      "step": 270020
    },
    {
      "epoch": 0.44192638269738094,
      "grad_norm": 1.084293246269226,
      "learning_rate": 9.176215546608858e-06,
      "loss": 0.0435,
      "step": 270040
    },
    {
      "epoch": 0.44195911313603425,
      "grad_norm": 0.6877020001411438,
      "learning_rate": 9.176149654395342e-06,
      "loss": 0.045,
      "step": 270060
    },
    {
      "epoch": 0.44199184357468757,
      "grad_norm": 1.973479986190796,
      "learning_rate": 9.176083762181824e-06,
      "loss": 0.0513,
      "step": 270080
    },
    {
      "epoch": 0.44202457401334094,
      "grad_norm": 1.3142627477645874,
      "learning_rate": 9.176017869968307e-06,
      "loss": 0.0469,
      "step": 270100
    },
    {
      "epoch": 0.44205730445199426,
      "grad_norm": 3.851361036300659,
      "learning_rate": 9.175951977754789e-06,
      "loss": 0.0412,
      "step": 270120
    },
    {
      "epoch": 0.44209003489064763,
      "grad_norm": 0.7017328143119812,
      "learning_rate": 9.175886085541273e-06,
      "loss": 0.0369,
      "step": 270140
    },
    {
      "epoch": 0.44212276532930095,
      "grad_norm": 7.674973487854004,
      "learning_rate": 9.175820193327756e-06,
      "loss": 0.0417,
      "step": 270160
    },
    {
      "epoch": 0.44215549576795427,
      "grad_norm": 3.0643858909606934,
      "learning_rate": 9.175754301114238e-06,
      "loss": 0.0387,
      "step": 270180
    },
    {
      "epoch": 0.44218822620660764,
      "grad_norm": 2.9183549880981445,
      "learning_rate": 9.175688408900722e-06,
      "loss": 0.0429,
      "step": 270200
    },
    {
      "epoch": 0.44222095664526095,
      "grad_norm": 0.18508227169513702,
      "learning_rate": 9.175622516687204e-06,
      "loss": 0.05,
      "step": 270220
    },
    {
      "epoch": 0.4422536870839143,
      "grad_norm": 1.1145141124725342,
      "learning_rate": 9.175556624473687e-06,
      "loss": 0.0346,
      "step": 270240
    },
    {
      "epoch": 0.44228641752256764,
      "grad_norm": 0.6105425953865051,
      "learning_rate": 9.175490732260169e-06,
      "loss": 0.042,
      "step": 270260
    },
    {
      "epoch": 0.44231914796122096,
      "grad_norm": 2.1322097778320312,
      "learning_rate": 9.175424840046653e-06,
      "loss": 0.0329,
      "step": 270280
    },
    {
      "epoch": 0.44235187839987433,
      "grad_norm": 1.46822190284729,
      "learning_rate": 9.175358947833135e-06,
      "loss": 0.0433,
      "step": 270300
    },
    {
      "epoch": 0.44238460883852765,
      "grad_norm": 1.0861294269561768,
      "learning_rate": 9.175293055619618e-06,
      "loss": 0.0507,
      "step": 270320
    },
    {
      "epoch": 0.44241733927718097,
      "grad_norm": 2.3152084350585938,
      "learning_rate": 9.1752271634061e-06,
      "loss": 0.0471,
      "step": 270340
    },
    {
      "epoch": 0.44245006971583434,
      "grad_norm": 1.0404151678085327,
      "learning_rate": 9.175161271192584e-06,
      "loss": 0.0432,
      "step": 270360
    },
    {
      "epoch": 0.44248280015448765,
      "grad_norm": 1.575558066368103,
      "learning_rate": 9.175095378979067e-06,
      "loss": 0.0461,
      "step": 270380
    },
    {
      "epoch": 0.442515530593141,
      "grad_norm": 1.3808062076568604,
      "learning_rate": 9.175029486765549e-06,
      "loss": 0.0361,
      "step": 270400
    },
    {
      "epoch": 0.44254826103179434,
      "grad_norm": 1.5654863119125366,
      "learning_rate": 9.174963594552033e-06,
      "loss": 0.0567,
      "step": 270420
    },
    {
      "epoch": 0.44258099147044766,
      "grad_norm": 0.9434766173362732,
      "learning_rate": 9.174897702338516e-06,
      "loss": 0.0341,
      "step": 270440
    },
    {
      "epoch": 0.44261372190910103,
      "grad_norm": 2.145686626434326,
      "learning_rate": 9.174831810124998e-06,
      "loss": 0.0398,
      "step": 270460
    },
    {
      "epoch": 0.44264645234775435,
      "grad_norm": 1.6016803979873657,
      "learning_rate": 9.174765917911482e-06,
      "loss": 0.054,
      "step": 270480
    },
    {
      "epoch": 0.4426791827864077,
      "grad_norm": 2.01981258392334,
      "learning_rate": 9.174700025697964e-06,
      "loss": 0.0409,
      "step": 270500
    },
    {
      "epoch": 0.44271191322506104,
      "grad_norm": 2.773149251937866,
      "learning_rate": 9.174634133484447e-06,
      "loss": 0.0357,
      "step": 270520
    },
    {
      "epoch": 0.44274464366371435,
      "grad_norm": 1.272505760192871,
      "learning_rate": 9.17456824127093e-06,
      "loss": 0.0472,
      "step": 270540
    },
    {
      "epoch": 0.4427773741023677,
      "grad_norm": 0.6784366369247437,
      "learning_rate": 9.174502349057413e-06,
      "loss": 0.0518,
      "step": 270560
    },
    {
      "epoch": 0.44281010454102104,
      "grad_norm": 0.8539638519287109,
      "learning_rate": 9.174436456843896e-06,
      "loss": 0.0398,
      "step": 270580
    },
    {
      "epoch": 0.4428428349796744,
      "grad_norm": 0.6454281806945801,
      "learning_rate": 9.174370564630378e-06,
      "loss": 0.0518,
      "step": 270600
    },
    {
      "epoch": 0.44287556541832773,
      "grad_norm": 3.7726552486419678,
      "learning_rate": 9.174304672416862e-06,
      "loss": 0.0465,
      "step": 270620
    },
    {
      "epoch": 0.44290829585698105,
      "grad_norm": 1.0778834819793701,
      "learning_rate": 9.174238780203344e-06,
      "loss": 0.037,
      "step": 270640
    },
    {
      "epoch": 0.4429410262956344,
      "grad_norm": 1.453650951385498,
      "learning_rate": 9.174172887989827e-06,
      "loss": 0.0571,
      "step": 270660
    },
    {
      "epoch": 0.44297375673428774,
      "grad_norm": 2.479980707168579,
      "learning_rate": 9.174106995776309e-06,
      "loss": 0.0507,
      "step": 270680
    },
    {
      "epoch": 0.4430064871729411,
      "grad_norm": 1.5319123268127441,
      "learning_rate": 9.174041103562793e-06,
      "loss": 0.0401,
      "step": 270700
    },
    {
      "epoch": 0.4430392176115944,
      "grad_norm": 0.34569957852363586,
      "learning_rate": 9.173975211349275e-06,
      "loss": 0.0406,
      "step": 270720
    },
    {
      "epoch": 0.44307194805024774,
      "grad_norm": 2.5180675983428955,
      "learning_rate": 9.173909319135758e-06,
      "loss": 0.046,
      "step": 270740
    },
    {
      "epoch": 0.4431046784889011,
      "grad_norm": 1.9270139932632446,
      "learning_rate": 9.173843426922242e-06,
      "loss": 0.0516,
      "step": 270760
    },
    {
      "epoch": 0.44313740892755443,
      "grad_norm": 2.4674723148345947,
      "learning_rate": 9.173777534708724e-06,
      "loss": 0.047,
      "step": 270780
    },
    {
      "epoch": 0.4431701393662078,
      "grad_norm": 1.068373680114746,
      "learning_rate": 9.173711642495207e-06,
      "loss": 0.0406,
      "step": 270800
    },
    {
      "epoch": 0.4432028698048611,
      "grad_norm": 0.7058926224708557,
      "learning_rate": 9.17364575028169e-06,
      "loss": 0.0481,
      "step": 270820
    },
    {
      "epoch": 0.44323560024351444,
      "grad_norm": 0.864883303642273,
      "learning_rate": 9.173579858068173e-06,
      "loss": 0.0456,
      "step": 270840
    },
    {
      "epoch": 0.4432683306821678,
      "grad_norm": 2.205094337463379,
      "learning_rate": 9.173513965854656e-06,
      "loss": 0.0575,
      "step": 270860
    },
    {
      "epoch": 0.4433010611208211,
      "grad_norm": 1.1637365818023682,
      "learning_rate": 9.17344807364114e-06,
      "loss": 0.0644,
      "step": 270880
    },
    {
      "epoch": 0.4433337915594745,
      "grad_norm": 2.0160369873046875,
      "learning_rate": 9.173382181427622e-06,
      "loss": 0.0383,
      "step": 270900
    },
    {
      "epoch": 0.4433665219981278,
      "grad_norm": 1.257277250289917,
      "learning_rate": 9.173316289214105e-06,
      "loss": 0.04,
      "step": 270920
    },
    {
      "epoch": 0.44339925243678113,
      "grad_norm": 2.155684471130371,
      "learning_rate": 9.173250397000587e-06,
      "loss": 0.0296,
      "step": 270940
    },
    {
      "epoch": 0.4434319828754345,
      "grad_norm": 1.6235663890838623,
      "learning_rate": 9.173184504787071e-06,
      "loss": 0.043,
      "step": 270960
    },
    {
      "epoch": 0.4434647133140878,
      "grad_norm": 0.363322913646698,
      "learning_rate": 9.173118612573553e-06,
      "loss": 0.0293,
      "step": 270980
    },
    {
      "epoch": 0.4434974437527412,
      "grad_norm": 0.25624895095825195,
      "learning_rate": 9.173052720360036e-06,
      "loss": 0.0343,
      "step": 271000
    },
    {
      "epoch": 0.4435301741913945,
      "grad_norm": 1.7826790809631348,
      "learning_rate": 9.172986828146518e-06,
      "loss": 0.0453,
      "step": 271020
    },
    {
      "epoch": 0.4435629046300478,
      "grad_norm": 0.7833800315856934,
      "learning_rate": 9.172920935933002e-06,
      "loss": 0.0378,
      "step": 271040
    },
    {
      "epoch": 0.4435956350687012,
      "grad_norm": 2.021141767501831,
      "learning_rate": 9.172855043719484e-06,
      "loss": 0.0478,
      "step": 271060
    },
    {
      "epoch": 0.4436283655073545,
      "grad_norm": 2.712045907974243,
      "learning_rate": 9.172789151505967e-06,
      "loss": 0.0396,
      "step": 271080
    },
    {
      "epoch": 0.4436610959460079,
      "grad_norm": 0.7262065410614014,
      "learning_rate": 9.17272325929245e-06,
      "loss": 0.0408,
      "step": 271100
    },
    {
      "epoch": 0.4436938263846612,
      "grad_norm": 2.0289230346679688,
      "learning_rate": 9.172657367078933e-06,
      "loss": 0.0396,
      "step": 271120
    },
    {
      "epoch": 0.4437265568233145,
      "grad_norm": 2.8084070682525635,
      "learning_rate": 9.172591474865415e-06,
      "loss": 0.0441,
      "step": 271140
    },
    {
      "epoch": 0.4437592872619679,
      "grad_norm": 0.9293684959411621,
      "learning_rate": 9.172525582651898e-06,
      "loss": 0.0498,
      "step": 271160
    },
    {
      "epoch": 0.4437920177006212,
      "grad_norm": 0.8281539082527161,
      "learning_rate": 9.172459690438382e-06,
      "loss": 0.0428,
      "step": 271180
    },
    {
      "epoch": 0.4438247481392746,
      "grad_norm": 1.1705838441848755,
      "learning_rate": 9.172393798224864e-06,
      "loss": 0.0414,
      "step": 271200
    },
    {
      "epoch": 0.4438574785779279,
      "grad_norm": 2.1282877922058105,
      "learning_rate": 9.172327906011347e-06,
      "loss": 0.0581,
      "step": 271220
    },
    {
      "epoch": 0.4438902090165812,
      "grad_norm": 2.301299810409546,
      "learning_rate": 9.172262013797831e-06,
      "loss": 0.0385,
      "step": 271240
    },
    {
      "epoch": 0.4439229394552346,
      "grad_norm": 2.071073293685913,
      "learning_rate": 9.172196121584313e-06,
      "loss": 0.0541,
      "step": 271260
    },
    {
      "epoch": 0.4439556698938879,
      "grad_norm": 2.0783793926239014,
      "learning_rate": 9.172130229370796e-06,
      "loss": 0.0414,
      "step": 271280
    },
    {
      "epoch": 0.4439884003325413,
      "grad_norm": 2.9443037509918213,
      "learning_rate": 9.17206433715728e-06,
      "loss": 0.0381,
      "step": 271300
    },
    {
      "epoch": 0.4440211307711946,
      "grad_norm": 2.2639777660369873,
      "learning_rate": 9.171998444943762e-06,
      "loss": 0.0376,
      "step": 271320
    },
    {
      "epoch": 0.4440538612098479,
      "grad_norm": 0.9262267351150513,
      "learning_rate": 9.171932552730245e-06,
      "loss": 0.0402,
      "step": 271340
    },
    {
      "epoch": 0.4440865916485013,
      "grad_norm": 1.0703411102294922,
      "learning_rate": 9.171866660516727e-06,
      "loss": 0.0407,
      "step": 271360
    },
    {
      "epoch": 0.4441193220871546,
      "grad_norm": 0.40034326910972595,
      "learning_rate": 9.171800768303211e-06,
      "loss": 0.0336,
      "step": 271380
    },
    {
      "epoch": 0.44415205252580797,
      "grad_norm": 1.6945194005966187,
      "learning_rate": 9.171734876089693e-06,
      "loss": 0.0401,
      "step": 271400
    },
    {
      "epoch": 0.4441847829644613,
      "grad_norm": 1.8537862300872803,
      "learning_rate": 9.171668983876176e-06,
      "loss": 0.0477,
      "step": 271420
    },
    {
      "epoch": 0.4442175134031146,
      "grad_norm": 5.941469192504883,
      "learning_rate": 9.171603091662658e-06,
      "loss": 0.0458,
      "step": 271440
    },
    {
      "epoch": 0.444250243841768,
      "grad_norm": 1.7668845653533936,
      "learning_rate": 9.171537199449142e-06,
      "loss": 0.042,
      "step": 271460
    },
    {
      "epoch": 0.4442829742804213,
      "grad_norm": 3.4762253761291504,
      "learning_rate": 9.171471307235624e-06,
      "loss": 0.0398,
      "step": 271480
    },
    {
      "epoch": 0.44431570471907467,
      "grad_norm": 1.0862873792648315,
      "learning_rate": 9.171405415022107e-06,
      "loss": 0.0524,
      "step": 271500
    },
    {
      "epoch": 0.444348435157728,
      "grad_norm": 1.7892812490463257,
      "learning_rate": 9.17133952280859e-06,
      "loss": 0.0444,
      "step": 271520
    },
    {
      "epoch": 0.4443811655963813,
      "grad_norm": 1.1078978776931763,
      "learning_rate": 9.171273630595073e-06,
      "loss": 0.0481,
      "step": 271540
    },
    {
      "epoch": 0.4444138960350347,
      "grad_norm": 1.5490704774856567,
      "learning_rate": 9.171207738381556e-06,
      "loss": 0.0445,
      "step": 271560
    },
    {
      "epoch": 0.444446626473688,
      "grad_norm": 0.6616212725639343,
      "learning_rate": 9.171141846168038e-06,
      "loss": 0.0438,
      "step": 271580
    },
    {
      "epoch": 0.44447935691234136,
      "grad_norm": 2.146303653717041,
      "learning_rate": 9.171075953954522e-06,
      "loss": 0.034,
      "step": 271600
    },
    {
      "epoch": 0.4445120873509947,
      "grad_norm": 10.328540802001953,
      "learning_rate": 9.171010061741006e-06,
      "loss": 0.0521,
      "step": 271620
    },
    {
      "epoch": 0.444544817789648,
      "grad_norm": 2.4938321113586426,
      "learning_rate": 9.170944169527487e-06,
      "loss": 0.0379,
      "step": 271640
    },
    {
      "epoch": 0.44457754822830137,
      "grad_norm": 0.6083002686500549,
      "learning_rate": 9.170878277313971e-06,
      "loss": 0.0346,
      "step": 271660
    },
    {
      "epoch": 0.4446102786669547,
      "grad_norm": 2.0136337280273438,
      "learning_rate": 9.170812385100455e-06,
      "loss": 0.0387,
      "step": 271680
    },
    {
      "epoch": 0.44464300910560806,
      "grad_norm": 0.3705828785896301,
      "learning_rate": 9.170746492886936e-06,
      "loss": 0.0521,
      "step": 271700
    },
    {
      "epoch": 0.4446757395442614,
      "grad_norm": 1.678145170211792,
      "learning_rate": 9.17068060067342e-06,
      "loss": 0.049,
      "step": 271720
    },
    {
      "epoch": 0.4447084699829147,
      "grad_norm": 1.8200339078903198,
      "learning_rate": 9.170614708459902e-06,
      "loss": 0.0452,
      "step": 271740
    },
    {
      "epoch": 0.44474120042156806,
      "grad_norm": 1.2667124271392822,
      "learning_rate": 9.170548816246386e-06,
      "loss": 0.0417,
      "step": 271760
    },
    {
      "epoch": 0.4447739308602214,
      "grad_norm": 2.190603733062744,
      "learning_rate": 9.170482924032867e-06,
      "loss": 0.0328,
      "step": 271780
    },
    {
      "epoch": 0.44480666129887475,
      "grad_norm": 0.8930115699768066,
      "learning_rate": 9.170417031819351e-06,
      "loss": 0.0481,
      "step": 271800
    },
    {
      "epoch": 0.44483939173752807,
      "grad_norm": 1.3735756874084473,
      "learning_rate": 9.170351139605833e-06,
      "loss": 0.0442,
      "step": 271820
    },
    {
      "epoch": 0.4448721221761814,
      "grad_norm": 1.6208105087280273,
      "learning_rate": 9.170285247392316e-06,
      "loss": 0.0326,
      "step": 271840
    },
    {
      "epoch": 0.44490485261483476,
      "grad_norm": 1.6905561685562134,
      "learning_rate": 9.170219355178798e-06,
      "loss": 0.0246,
      "step": 271860
    },
    {
      "epoch": 0.4449375830534881,
      "grad_norm": 1.5602749586105347,
      "learning_rate": 9.170153462965282e-06,
      "loss": 0.0507,
      "step": 271880
    },
    {
      "epoch": 0.44497031349214144,
      "grad_norm": 1.262511134147644,
      "learning_rate": 9.170087570751764e-06,
      "loss": 0.0378,
      "step": 271900
    },
    {
      "epoch": 0.44500304393079476,
      "grad_norm": 2.6307194232940674,
      "learning_rate": 9.170021678538247e-06,
      "loss": 0.0396,
      "step": 271920
    },
    {
      "epoch": 0.4450357743694481,
      "grad_norm": 3.105480909347534,
      "learning_rate": 9.169955786324731e-06,
      "loss": 0.0476,
      "step": 271940
    },
    {
      "epoch": 0.44506850480810145,
      "grad_norm": 3.99287748336792,
      "learning_rate": 9.169889894111213e-06,
      "loss": 0.0445,
      "step": 271960
    },
    {
      "epoch": 0.44510123524675477,
      "grad_norm": 2.584439277648926,
      "learning_rate": 9.169824001897697e-06,
      "loss": 0.047,
      "step": 271980
    },
    {
      "epoch": 0.44513396568540814,
      "grad_norm": 2.2785048484802246,
      "learning_rate": 9.169758109684178e-06,
      "loss": 0.0376,
      "step": 272000
    },
    {
      "epoch": 0.44516669612406146,
      "grad_norm": 0.3663891553878784,
      "learning_rate": 9.169692217470662e-06,
      "loss": 0.0363,
      "step": 272020
    },
    {
      "epoch": 0.4451994265627148,
      "grad_norm": 3.149043083190918,
      "learning_rate": 9.169626325257146e-06,
      "loss": 0.052,
      "step": 272040
    },
    {
      "epoch": 0.44523215700136815,
      "grad_norm": 0.5008667707443237,
      "learning_rate": 9.169560433043627e-06,
      "loss": 0.0453,
      "step": 272060
    },
    {
      "epoch": 0.44526488744002146,
      "grad_norm": 0.5692165493965149,
      "learning_rate": 9.169494540830111e-06,
      "loss": 0.0334,
      "step": 272080
    },
    {
      "epoch": 0.44529761787867483,
      "grad_norm": 1.6355525255203247,
      "learning_rate": 9.169428648616595e-06,
      "loss": 0.0605,
      "step": 272100
    },
    {
      "epoch": 0.44533034831732815,
      "grad_norm": 2.5234391689300537,
      "learning_rate": 9.169362756403077e-06,
      "loss": 0.0428,
      "step": 272120
    },
    {
      "epoch": 0.44536307875598147,
      "grad_norm": 2.067366361618042,
      "learning_rate": 9.16929686418956e-06,
      "loss": 0.0358,
      "step": 272140
    },
    {
      "epoch": 0.44539580919463484,
      "grad_norm": 3.2220466136932373,
      "learning_rate": 9.169230971976042e-06,
      "loss": 0.0345,
      "step": 272160
    },
    {
      "epoch": 0.44542853963328816,
      "grad_norm": 1.956400752067566,
      "learning_rate": 9.169165079762526e-06,
      "loss": 0.0346,
      "step": 272180
    },
    {
      "epoch": 0.44546127007194153,
      "grad_norm": 0.6018822193145752,
      "learning_rate": 9.169099187549008e-06,
      "loss": 0.0434,
      "step": 272200
    },
    {
      "epoch": 0.44549400051059485,
      "grad_norm": 2.1981923580169678,
      "learning_rate": 9.169033295335491e-06,
      "loss": 0.0462,
      "step": 272220
    },
    {
      "epoch": 0.44552673094924816,
      "grad_norm": 0.843346893787384,
      "learning_rate": 9.168967403121973e-06,
      "loss": 0.0366,
      "step": 272240
    },
    {
      "epoch": 0.44555946138790153,
      "grad_norm": 0.37612783908843994,
      "learning_rate": 9.168901510908457e-06,
      "loss": 0.0539,
      "step": 272260
    },
    {
      "epoch": 0.44559219182655485,
      "grad_norm": 1.3057911396026611,
      "learning_rate": 9.16883561869494e-06,
      "loss": 0.0369,
      "step": 272280
    },
    {
      "epoch": 0.4456249222652082,
      "grad_norm": 0.5010384917259216,
      "learning_rate": 9.168769726481422e-06,
      "loss": 0.0391,
      "step": 272300
    },
    {
      "epoch": 0.44565765270386154,
      "grad_norm": 2.6026363372802734,
      "learning_rate": 9.168703834267906e-06,
      "loss": 0.05,
      "step": 272320
    },
    {
      "epoch": 0.44569038314251486,
      "grad_norm": 0.9783769249916077,
      "learning_rate": 9.168637942054388e-06,
      "loss": 0.0359,
      "step": 272340
    },
    {
      "epoch": 0.44572311358116823,
      "grad_norm": 0.9643893837928772,
      "learning_rate": 9.168572049840871e-06,
      "loss": 0.0313,
      "step": 272360
    },
    {
      "epoch": 0.44575584401982155,
      "grad_norm": 1.2099249362945557,
      "learning_rate": 9.168506157627353e-06,
      "loss": 0.0381,
      "step": 272380
    },
    {
      "epoch": 0.4457885744584749,
      "grad_norm": 1.6951526403427124,
      "learning_rate": 9.168440265413837e-06,
      "loss": 0.0388,
      "step": 272400
    },
    {
      "epoch": 0.44582130489712823,
      "grad_norm": 2.795325756072998,
      "learning_rate": 9.16837437320032e-06,
      "loss": 0.0286,
      "step": 272420
    },
    {
      "epoch": 0.44585403533578155,
      "grad_norm": 4.2831315994262695,
      "learning_rate": 9.168308480986802e-06,
      "loss": 0.0621,
      "step": 272440
    },
    {
      "epoch": 0.4458867657744349,
      "grad_norm": 1.7879692316055298,
      "learning_rate": 9.168242588773286e-06,
      "loss": 0.0367,
      "step": 272460
    },
    {
      "epoch": 0.44591949621308824,
      "grad_norm": 2.449445962905884,
      "learning_rate": 9.16817669655977e-06,
      "loss": 0.0309,
      "step": 272480
    },
    {
      "epoch": 0.4459522266517416,
      "grad_norm": 1.571802020072937,
      "learning_rate": 9.168110804346251e-06,
      "loss": 0.0389,
      "step": 272500
    },
    {
      "epoch": 0.44598495709039493,
      "grad_norm": 0.8900780081748962,
      "learning_rate": 9.168044912132735e-06,
      "loss": 0.0447,
      "step": 272520
    },
    {
      "epoch": 0.44601768752904825,
      "grad_norm": 2.5164246559143066,
      "learning_rate": 9.167979019919217e-06,
      "loss": 0.0319,
      "step": 272540
    },
    {
      "epoch": 0.4460504179677016,
      "grad_norm": 1.9384034872055054,
      "learning_rate": 9.1679131277057e-06,
      "loss": 0.0449,
      "step": 272560
    },
    {
      "epoch": 0.44608314840635493,
      "grad_norm": 1.804456114768982,
      "learning_rate": 9.167847235492182e-06,
      "loss": 0.0415,
      "step": 272580
    },
    {
      "epoch": 0.4461158788450083,
      "grad_norm": 5.619157314300537,
      "learning_rate": 9.167781343278666e-06,
      "loss": 0.049,
      "step": 272600
    },
    {
      "epoch": 0.4461486092836616,
      "grad_norm": 0.7750800251960754,
      "learning_rate": 9.16771545106515e-06,
      "loss": 0.0389,
      "step": 272620
    },
    {
      "epoch": 0.44618133972231494,
      "grad_norm": 1.3856734037399292,
      "learning_rate": 9.167649558851631e-06,
      "loss": 0.0418,
      "step": 272640
    },
    {
      "epoch": 0.4462140701609683,
      "grad_norm": 2.1818735599517822,
      "learning_rate": 9.167583666638115e-06,
      "loss": 0.0451,
      "step": 272660
    },
    {
      "epoch": 0.44624680059962163,
      "grad_norm": 0.8231576681137085,
      "learning_rate": 9.167517774424597e-06,
      "loss": 0.0519,
      "step": 272680
    },
    {
      "epoch": 0.446279531038275,
      "grad_norm": 2.558962821960449,
      "learning_rate": 9.16745188221108e-06,
      "loss": 0.0519,
      "step": 272700
    },
    {
      "epoch": 0.4463122614769283,
      "grad_norm": 0.8154116272926331,
      "learning_rate": 9.167385989997562e-06,
      "loss": 0.0379,
      "step": 272720
    },
    {
      "epoch": 0.44634499191558163,
      "grad_norm": 0.9141525626182556,
      "learning_rate": 9.167320097784046e-06,
      "loss": 0.0519,
      "step": 272740
    },
    {
      "epoch": 0.446377722354235,
      "grad_norm": 1.7803655862808228,
      "learning_rate": 9.167254205570528e-06,
      "loss": 0.0571,
      "step": 272760
    },
    {
      "epoch": 0.4464104527928883,
      "grad_norm": 1.2812830209732056,
      "learning_rate": 9.167188313357011e-06,
      "loss": 0.045,
      "step": 272780
    },
    {
      "epoch": 0.4464431832315417,
      "grad_norm": 0.4881989061832428,
      "learning_rate": 9.167122421143493e-06,
      "loss": 0.0384,
      "step": 272800
    },
    {
      "epoch": 0.446475913670195,
      "grad_norm": 1.234471082687378,
      "learning_rate": 9.167056528929977e-06,
      "loss": 0.0401,
      "step": 272820
    },
    {
      "epoch": 0.44650864410884833,
      "grad_norm": 1.2515164613723755,
      "learning_rate": 9.16699063671646e-06,
      "loss": 0.0424,
      "step": 272840
    },
    {
      "epoch": 0.4465413745475017,
      "grad_norm": 1.0047073364257812,
      "learning_rate": 9.166924744502944e-06,
      "loss": 0.0405,
      "step": 272860
    },
    {
      "epoch": 0.446574104986155,
      "grad_norm": 0.852783203125,
      "learning_rate": 9.166858852289426e-06,
      "loss": 0.0408,
      "step": 272880
    },
    {
      "epoch": 0.4466068354248084,
      "grad_norm": 2.12445330619812,
      "learning_rate": 9.16679296007591e-06,
      "loss": 0.0362,
      "step": 272900
    },
    {
      "epoch": 0.4466395658634617,
      "grad_norm": 1.5227543115615845,
      "learning_rate": 9.166727067862391e-06,
      "loss": 0.0318,
      "step": 272920
    },
    {
      "epoch": 0.446672296302115,
      "grad_norm": 0.4631950855255127,
      "learning_rate": 9.166661175648875e-06,
      "loss": 0.0436,
      "step": 272940
    },
    {
      "epoch": 0.4467050267407684,
      "grad_norm": 3.8347952365875244,
      "learning_rate": 9.166595283435357e-06,
      "loss": 0.0518,
      "step": 272960
    },
    {
      "epoch": 0.4467377571794217,
      "grad_norm": 7.741352081298828,
      "learning_rate": 9.16652939122184e-06,
      "loss": 0.0351,
      "step": 272980
    },
    {
      "epoch": 0.4467704876180751,
      "grad_norm": 2.354036808013916,
      "learning_rate": 9.166463499008324e-06,
      "loss": 0.0507,
      "step": 273000
    },
    {
      "epoch": 0.4468032180567284,
      "grad_norm": 1.0743427276611328,
      "learning_rate": 9.166397606794806e-06,
      "loss": 0.0378,
      "step": 273020
    },
    {
      "epoch": 0.4468359484953817,
      "grad_norm": 1.8756422996520996,
      "learning_rate": 9.16633171458129e-06,
      "loss": 0.0291,
      "step": 273040
    },
    {
      "epoch": 0.4468686789340351,
      "grad_norm": 0.8711757659912109,
      "learning_rate": 9.166265822367771e-06,
      "loss": 0.0511,
      "step": 273060
    },
    {
      "epoch": 0.4469014093726884,
      "grad_norm": 0.6608485579490662,
      "learning_rate": 9.166199930154255e-06,
      "loss": 0.0412,
      "step": 273080
    },
    {
      "epoch": 0.4469341398113417,
      "grad_norm": 0.40432459115982056,
      "learning_rate": 9.166134037940737e-06,
      "loss": 0.0368,
      "step": 273100
    },
    {
      "epoch": 0.4469668702499951,
      "grad_norm": 1.7546805143356323,
      "learning_rate": 9.16606814572722e-06,
      "loss": 0.0429,
      "step": 273120
    },
    {
      "epoch": 0.4469996006886484,
      "grad_norm": 4.419473171234131,
      "learning_rate": 9.166002253513702e-06,
      "loss": 0.0586,
      "step": 273140
    },
    {
      "epoch": 0.4470323311273018,
      "grad_norm": 1.0554025173187256,
      "learning_rate": 9.165936361300186e-06,
      "loss": 0.0379,
      "step": 273160
    },
    {
      "epoch": 0.4470650615659551,
      "grad_norm": 0.3250506520271301,
      "learning_rate": 9.165870469086668e-06,
      "loss": 0.0288,
      "step": 273180
    },
    {
      "epoch": 0.4470977920046084,
      "grad_norm": 0.6812494993209839,
      "learning_rate": 9.165804576873151e-06,
      "loss": 0.0415,
      "step": 273200
    },
    {
      "epoch": 0.4471305224432618,
      "grad_norm": 1.6920883655548096,
      "learning_rate": 9.165738684659635e-06,
      "loss": 0.0422,
      "step": 273220
    },
    {
      "epoch": 0.4471632528819151,
      "grad_norm": 1.0467360019683838,
      "learning_rate": 9.165672792446117e-06,
      "loss": 0.0424,
      "step": 273240
    },
    {
      "epoch": 0.4471959833205685,
      "grad_norm": 1.2429012060165405,
      "learning_rate": 9.1656069002326e-06,
      "loss": 0.0528,
      "step": 273260
    },
    {
      "epoch": 0.4472287137592218,
      "grad_norm": 0.8534144759178162,
      "learning_rate": 9.165541008019084e-06,
      "loss": 0.0525,
      "step": 273280
    },
    {
      "epoch": 0.4472614441978751,
      "grad_norm": 0.4481663107872009,
      "learning_rate": 9.165475115805566e-06,
      "loss": 0.04,
      "step": 273300
    },
    {
      "epoch": 0.4472941746365285,
      "grad_norm": 1.549350380897522,
      "learning_rate": 9.16540922359205e-06,
      "loss": 0.0502,
      "step": 273320
    },
    {
      "epoch": 0.4473269050751818,
      "grad_norm": 1.0688422918319702,
      "learning_rate": 9.165343331378533e-06,
      "loss": 0.0488,
      "step": 273340
    },
    {
      "epoch": 0.4473596355138352,
      "grad_norm": 0.6843125224113464,
      "learning_rate": 9.165277439165015e-06,
      "loss": 0.0317,
      "step": 273360
    },
    {
      "epoch": 0.4473923659524885,
      "grad_norm": 4.78195858001709,
      "learning_rate": 9.165211546951498e-06,
      "loss": 0.0553,
      "step": 273380
    },
    {
      "epoch": 0.4474250963911418,
      "grad_norm": 0.2649399936199188,
      "learning_rate": 9.16514565473798e-06,
      "loss": 0.0441,
      "step": 273400
    },
    {
      "epoch": 0.4474578268297952,
      "grad_norm": 1.106519341468811,
      "learning_rate": 9.165079762524464e-06,
      "loss": 0.043,
      "step": 273420
    },
    {
      "epoch": 0.4474905572684485,
      "grad_norm": 1.435258150100708,
      "learning_rate": 9.165013870310946e-06,
      "loss": 0.0476,
      "step": 273440
    },
    {
      "epoch": 0.44752328770710187,
      "grad_norm": 3.6830034255981445,
      "learning_rate": 9.16494797809743e-06,
      "loss": 0.0552,
      "step": 273460
    },
    {
      "epoch": 0.4475560181457552,
      "grad_norm": 2.2559540271759033,
      "learning_rate": 9.164882085883911e-06,
      "loss": 0.0335,
      "step": 273480
    },
    {
      "epoch": 0.4475887485844085,
      "grad_norm": 1.7299290895462036,
      "learning_rate": 9.164816193670395e-06,
      "loss": 0.042,
      "step": 273500
    },
    {
      "epoch": 0.4476214790230619,
      "grad_norm": 3.3038206100463867,
      "learning_rate": 9.164750301456877e-06,
      "loss": 0.0376,
      "step": 273520
    },
    {
      "epoch": 0.4476542094617152,
      "grad_norm": 1.704276204109192,
      "learning_rate": 9.16468440924336e-06,
      "loss": 0.0431,
      "step": 273540
    },
    {
      "epoch": 0.44768693990036856,
      "grad_norm": 0.6000993847846985,
      "learning_rate": 9.164618517029842e-06,
      "loss": 0.0402,
      "step": 273560
    },
    {
      "epoch": 0.4477196703390219,
      "grad_norm": 1.350968599319458,
      "learning_rate": 9.164552624816326e-06,
      "loss": 0.0408,
      "step": 273580
    },
    {
      "epoch": 0.4477524007776752,
      "grad_norm": 0.5831809639930725,
      "learning_rate": 9.16448673260281e-06,
      "loss": 0.0375,
      "step": 273600
    },
    {
      "epoch": 0.44778513121632857,
      "grad_norm": 1.073241114616394,
      "learning_rate": 9.164420840389291e-06,
      "loss": 0.046,
      "step": 273620
    },
    {
      "epoch": 0.4478178616549819,
      "grad_norm": 0.477385550737381,
      "learning_rate": 9.164354948175775e-06,
      "loss": 0.0355,
      "step": 273640
    },
    {
      "epoch": 0.44785059209363526,
      "grad_norm": 3.8592987060546875,
      "learning_rate": 9.164289055962259e-06,
      "loss": 0.0474,
      "step": 273660
    },
    {
      "epoch": 0.4478833225322886,
      "grad_norm": 1.499363660812378,
      "learning_rate": 9.16422316374874e-06,
      "loss": 0.0364,
      "step": 273680
    },
    {
      "epoch": 0.4479160529709419,
      "grad_norm": 1.4970057010650635,
      "learning_rate": 9.164157271535224e-06,
      "loss": 0.0589,
      "step": 273700
    },
    {
      "epoch": 0.44794878340959526,
      "grad_norm": 1.6121935844421387,
      "learning_rate": 9.164091379321708e-06,
      "loss": 0.0461,
      "step": 273720
    },
    {
      "epoch": 0.4479815138482486,
      "grad_norm": 1.0811307430267334,
      "learning_rate": 9.16402548710819e-06,
      "loss": 0.0402,
      "step": 273740
    },
    {
      "epoch": 0.44801424428690195,
      "grad_norm": 2.9219508171081543,
      "learning_rate": 9.163959594894673e-06,
      "loss": 0.0465,
      "step": 273760
    },
    {
      "epoch": 0.44804697472555527,
      "grad_norm": 1.5452178716659546,
      "learning_rate": 9.163893702681155e-06,
      "loss": 0.0416,
      "step": 273780
    },
    {
      "epoch": 0.4480797051642086,
      "grad_norm": 0.31788140535354614,
      "learning_rate": 9.163827810467639e-06,
      "loss": 0.0424,
      "step": 273800
    },
    {
      "epoch": 0.44811243560286196,
      "grad_norm": 0.5417172312736511,
      "learning_rate": 9.16376191825412e-06,
      "loss": 0.0392,
      "step": 273820
    },
    {
      "epoch": 0.4481451660415153,
      "grad_norm": 0.7754842638969421,
      "learning_rate": 9.163696026040604e-06,
      "loss": 0.0662,
      "step": 273840
    },
    {
      "epoch": 0.44817789648016865,
      "grad_norm": 1.3528542518615723,
      "learning_rate": 9.163630133827086e-06,
      "loss": 0.0467,
      "step": 273860
    },
    {
      "epoch": 0.44821062691882196,
      "grad_norm": 0.13651849329471588,
      "learning_rate": 9.16356424161357e-06,
      "loss": 0.0373,
      "step": 273880
    },
    {
      "epoch": 0.4482433573574753,
      "grad_norm": 1.7078646421432495,
      "learning_rate": 9.163498349400051e-06,
      "loss": 0.0355,
      "step": 273900
    },
    {
      "epoch": 0.44827608779612865,
      "grad_norm": 1.7700856924057007,
      "learning_rate": 9.163432457186535e-06,
      "loss": 0.0408,
      "step": 273920
    },
    {
      "epoch": 0.44830881823478197,
      "grad_norm": 0.7887254357337952,
      "learning_rate": 9.163366564973017e-06,
      "loss": 0.0442,
      "step": 273940
    },
    {
      "epoch": 0.44834154867343534,
      "grad_norm": 0.96416836977005,
      "learning_rate": 9.1633006727595e-06,
      "loss": 0.0415,
      "step": 273960
    },
    {
      "epoch": 0.44837427911208866,
      "grad_norm": 2.4367527961730957,
      "learning_rate": 9.163234780545982e-06,
      "loss": 0.0349,
      "step": 273980
    },
    {
      "epoch": 0.448407009550742,
      "grad_norm": 4.313111782073975,
      "learning_rate": 9.163168888332466e-06,
      "loss": 0.0485,
      "step": 274000
    },
    {
      "epoch": 0.44843973998939535,
      "grad_norm": 0.6624035239219666,
      "learning_rate": 9.16310299611895e-06,
      "loss": 0.0397,
      "step": 274020
    },
    {
      "epoch": 0.44847247042804866,
      "grad_norm": 3.1806247234344482,
      "learning_rate": 9.163037103905431e-06,
      "loss": 0.0414,
      "step": 274040
    },
    {
      "epoch": 0.44850520086670204,
      "grad_norm": 2.6901895999908447,
      "learning_rate": 9.162971211691915e-06,
      "loss": 0.0632,
      "step": 274060
    },
    {
      "epoch": 0.44853793130535535,
      "grad_norm": 0.9889597296714783,
      "learning_rate": 9.162905319478399e-06,
      "loss": 0.053,
      "step": 274080
    },
    {
      "epoch": 0.44857066174400867,
      "grad_norm": 3.646897554397583,
      "learning_rate": 9.16283942726488e-06,
      "loss": 0.0473,
      "step": 274100
    },
    {
      "epoch": 0.44860339218266204,
      "grad_norm": 0.7109391093254089,
      "learning_rate": 9.162773535051364e-06,
      "loss": 0.0413,
      "step": 274120
    },
    {
      "epoch": 0.44863612262131536,
      "grad_norm": 1.277660608291626,
      "learning_rate": 9.162707642837848e-06,
      "loss": 0.0311,
      "step": 274140
    },
    {
      "epoch": 0.44866885305996873,
      "grad_norm": 1.0539577007293701,
      "learning_rate": 9.16264175062433e-06,
      "loss": 0.0416,
      "step": 274160
    },
    {
      "epoch": 0.44870158349862205,
      "grad_norm": 2.4177818298339844,
      "learning_rate": 9.162575858410813e-06,
      "loss": 0.0411,
      "step": 274180
    },
    {
      "epoch": 0.44873431393727536,
      "grad_norm": 1.4262394905090332,
      "learning_rate": 9.162509966197295e-06,
      "loss": 0.0499,
      "step": 274200
    },
    {
      "epoch": 0.44876704437592874,
      "grad_norm": 3.7710602283477783,
      "learning_rate": 9.162444073983779e-06,
      "loss": 0.0521,
      "step": 274220
    },
    {
      "epoch": 0.44879977481458205,
      "grad_norm": 0.9934567213058472,
      "learning_rate": 9.16237818177026e-06,
      "loss": 0.063,
      "step": 274240
    },
    {
      "epoch": 0.4488325052532354,
      "grad_norm": 2.023902416229248,
      "learning_rate": 9.162312289556744e-06,
      "loss": 0.0374,
      "step": 274260
    },
    {
      "epoch": 0.44886523569188874,
      "grad_norm": 1.5801132917404175,
      "learning_rate": 9.162246397343226e-06,
      "loss": 0.0443,
      "step": 274280
    },
    {
      "epoch": 0.44889796613054206,
      "grad_norm": 0.4701145589351654,
      "learning_rate": 9.16218050512971e-06,
      "loss": 0.0398,
      "step": 274300
    },
    {
      "epoch": 0.44893069656919543,
      "grad_norm": 1.5107905864715576,
      "learning_rate": 9.162114612916191e-06,
      "loss": 0.0483,
      "step": 274320
    },
    {
      "epoch": 0.44896342700784875,
      "grad_norm": 1.5531798601150513,
      "learning_rate": 9.162048720702675e-06,
      "loss": 0.039,
      "step": 274340
    },
    {
      "epoch": 0.4489961574465021,
      "grad_norm": 2.664738178253174,
      "learning_rate": 9.161982828489157e-06,
      "loss": 0.0379,
      "step": 274360
    },
    {
      "epoch": 0.44902888788515544,
      "grad_norm": 1.1235207319259644,
      "learning_rate": 9.16191693627564e-06,
      "loss": 0.0421,
      "step": 274380
    },
    {
      "epoch": 0.44906161832380875,
      "grad_norm": 0.8230503797531128,
      "learning_rate": 9.161851044062124e-06,
      "loss": 0.0428,
      "step": 274400
    },
    {
      "epoch": 0.4490943487624621,
      "grad_norm": 2.6021640300750732,
      "learning_rate": 9.161785151848606e-06,
      "loss": 0.0429,
      "step": 274420
    },
    {
      "epoch": 0.44912707920111544,
      "grad_norm": 4.980271339416504,
      "learning_rate": 9.16171925963509e-06,
      "loss": 0.0419,
      "step": 274440
    },
    {
      "epoch": 0.4491598096397688,
      "grad_norm": 2.683344602584839,
      "learning_rate": 9.161653367421573e-06,
      "loss": 0.0557,
      "step": 274460
    },
    {
      "epoch": 0.44919254007842213,
      "grad_norm": 0.36142876744270325,
      "learning_rate": 9.161587475208055e-06,
      "loss": 0.0344,
      "step": 274480
    },
    {
      "epoch": 0.44922527051707545,
      "grad_norm": 0.564651370048523,
      "learning_rate": 9.161521582994539e-06,
      "loss": 0.0428,
      "step": 274500
    },
    {
      "epoch": 0.4492580009557288,
      "grad_norm": 1.5608175992965698,
      "learning_rate": 9.161455690781022e-06,
      "loss": 0.035,
      "step": 274520
    },
    {
      "epoch": 0.44929073139438214,
      "grad_norm": 2.365248680114746,
      "learning_rate": 9.161389798567504e-06,
      "loss": 0.0474,
      "step": 274540
    },
    {
      "epoch": 0.4493234618330355,
      "grad_norm": 1.6135324239730835,
      "learning_rate": 9.161323906353988e-06,
      "loss": 0.0329,
      "step": 274560
    },
    {
      "epoch": 0.4493561922716888,
      "grad_norm": 0.3041180670261383,
      "learning_rate": 9.16125801414047e-06,
      "loss": 0.0404,
      "step": 274580
    },
    {
      "epoch": 0.44938892271034214,
      "grad_norm": 1.356798768043518,
      "learning_rate": 9.161192121926953e-06,
      "loss": 0.048,
      "step": 274600
    },
    {
      "epoch": 0.4494216531489955,
      "grad_norm": 1.3924585580825806,
      "learning_rate": 9.161126229713435e-06,
      "loss": 0.0507,
      "step": 274620
    },
    {
      "epoch": 0.44945438358764883,
      "grad_norm": 0.9630769491195679,
      "learning_rate": 9.161060337499919e-06,
      "loss": 0.0534,
      "step": 274640
    },
    {
      "epoch": 0.4494871140263022,
      "grad_norm": 1.9827548265457153,
      "learning_rate": 9.1609944452864e-06,
      "loss": 0.0429,
      "step": 274660
    },
    {
      "epoch": 0.4495198444649555,
      "grad_norm": 2.4225308895111084,
      "learning_rate": 9.160928553072884e-06,
      "loss": 0.0544,
      "step": 274680
    },
    {
      "epoch": 0.44955257490360884,
      "grad_norm": 1.2483218908309937,
      "learning_rate": 9.160862660859366e-06,
      "loss": 0.0517,
      "step": 274700
    },
    {
      "epoch": 0.4495853053422622,
      "grad_norm": 0.41372790932655334,
      "learning_rate": 9.16079676864585e-06,
      "loss": 0.0339,
      "step": 274720
    },
    {
      "epoch": 0.4496180357809155,
      "grad_norm": 4.8421478271484375,
      "learning_rate": 9.160730876432333e-06,
      "loss": 0.0583,
      "step": 274740
    },
    {
      "epoch": 0.4496507662195689,
      "grad_norm": 2.370347738265991,
      "learning_rate": 9.160664984218815e-06,
      "loss": 0.0444,
      "step": 274760
    },
    {
      "epoch": 0.4496834966582222,
      "grad_norm": 0.8431330323219299,
      "learning_rate": 9.160599092005299e-06,
      "loss": 0.0431,
      "step": 274780
    },
    {
      "epoch": 0.44971622709687553,
      "grad_norm": 1.147711992263794,
      "learning_rate": 9.16053319979178e-06,
      "loss": 0.0463,
      "step": 274800
    },
    {
      "epoch": 0.4497489575355289,
      "grad_norm": 1.9210586547851562,
      "learning_rate": 9.160467307578264e-06,
      "loss": 0.0418,
      "step": 274820
    },
    {
      "epoch": 0.4497816879741822,
      "grad_norm": 42.670494079589844,
      "learning_rate": 9.160401415364746e-06,
      "loss": 0.048,
      "step": 274840
    },
    {
      "epoch": 0.4498144184128356,
      "grad_norm": 1.315071702003479,
      "learning_rate": 9.16033552315123e-06,
      "loss": 0.0344,
      "step": 274860
    },
    {
      "epoch": 0.4498471488514889,
      "grad_norm": 1.4029195308685303,
      "learning_rate": 9.160269630937713e-06,
      "loss": 0.0462,
      "step": 274880
    },
    {
      "epoch": 0.4498798792901422,
      "grad_norm": 0.201985701918602,
      "learning_rate": 9.160203738724195e-06,
      "loss": 0.0372,
      "step": 274900
    },
    {
      "epoch": 0.4499126097287956,
      "grad_norm": 1.0233443975448608,
      "learning_rate": 9.160137846510679e-06,
      "loss": 0.0518,
      "step": 274920
    },
    {
      "epoch": 0.4499453401674489,
      "grad_norm": 3.497152328491211,
      "learning_rate": 9.160071954297162e-06,
      "loss": 0.0422,
      "step": 274940
    },
    {
      "epoch": 0.4499780706061023,
      "grad_norm": 1.9691429138183594,
      "learning_rate": 9.160006062083644e-06,
      "loss": 0.0361,
      "step": 274960
    },
    {
      "epoch": 0.4500108010447556,
      "grad_norm": 4.184508800506592,
      "learning_rate": 9.159940169870128e-06,
      "loss": 0.0441,
      "step": 274980
    },
    {
      "epoch": 0.4500435314834089,
      "grad_norm": 1.7615470886230469,
      "learning_rate": 9.15987427765661e-06,
      "loss": 0.0551,
      "step": 275000
    },
    {
      "epoch": 0.4500762619220623,
      "grad_norm": 4.87101936340332,
      "learning_rate": 9.159808385443093e-06,
      "loss": 0.0451,
      "step": 275020
    },
    {
      "epoch": 0.4501089923607156,
      "grad_norm": 0.43812593817710876,
      "learning_rate": 9.159742493229575e-06,
      "loss": 0.0255,
      "step": 275040
    },
    {
      "epoch": 0.450141722799369,
      "grad_norm": 0.9495427012443542,
      "learning_rate": 9.159676601016059e-06,
      "loss": 0.0351,
      "step": 275060
    },
    {
      "epoch": 0.4501744532380223,
      "grad_norm": 2.0577683448791504,
      "learning_rate": 9.159610708802542e-06,
      "loss": 0.0368,
      "step": 275080
    },
    {
      "epoch": 0.4502071836766756,
      "grad_norm": 3.99048113822937,
      "learning_rate": 9.159544816589024e-06,
      "loss": 0.0441,
      "step": 275100
    },
    {
      "epoch": 0.450239914115329,
      "grad_norm": 1.5430907011032104,
      "learning_rate": 9.159478924375508e-06,
      "loss": 0.0461,
      "step": 275120
    },
    {
      "epoch": 0.4502726445539823,
      "grad_norm": 0.9980822801589966,
      "learning_rate": 9.15941303216199e-06,
      "loss": 0.0547,
      "step": 275140
    },
    {
      "epoch": 0.4503053749926357,
      "grad_norm": 1.5456938743591309,
      "learning_rate": 9.159347139948473e-06,
      "loss": 0.0369,
      "step": 275160
    },
    {
      "epoch": 0.450338105431289,
      "grad_norm": 1.0703277587890625,
      "learning_rate": 9.159281247734955e-06,
      "loss": 0.0301,
      "step": 275180
    },
    {
      "epoch": 0.4503708358699423,
      "grad_norm": 0.8209633827209473,
      "learning_rate": 9.159215355521439e-06,
      "loss": 0.0599,
      "step": 275200
    },
    {
      "epoch": 0.4504035663085957,
      "grad_norm": 0.8177977204322815,
      "learning_rate": 9.15914946330792e-06,
      "loss": 0.0413,
      "step": 275220
    },
    {
      "epoch": 0.450436296747249,
      "grad_norm": 0.7188168168067932,
      "learning_rate": 9.159083571094404e-06,
      "loss": 0.0463,
      "step": 275240
    },
    {
      "epoch": 0.45046902718590237,
      "grad_norm": 2.2898406982421875,
      "learning_rate": 9.159017678880888e-06,
      "loss": 0.0476,
      "step": 275260
    },
    {
      "epoch": 0.4505017576245557,
      "grad_norm": 2.198408365249634,
      "learning_rate": 9.15895178666737e-06,
      "loss": 0.0469,
      "step": 275280
    },
    {
      "epoch": 0.450534488063209,
      "grad_norm": 2.043363571166992,
      "learning_rate": 9.158885894453853e-06,
      "loss": 0.0514,
      "step": 275300
    },
    {
      "epoch": 0.4505672185018624,
      "grad_norm": 1.3699579238891602,
      "learning_rate": 9.158820002240337e-06,
      "loss": 0.0468,
      "step": 275320
    },
    {
      "epoch": 0.4505999489405157,
      "grad_norm": 1.4955532550811768,
      "learning_rate": 9.158754110026819e-06,
      "loss": 0.0398,
      "step": 275340
    },
    {
      "epoch": 0.45063267937916907,
      "grad_norm": 2.0775790214538574,
      "learning_rate": 9.158688217813302e-06,
      "loss": 0.0438,
      "step": 275360
    },
    {
      "epoch": 0.4506654098178224,
      "grad_norm": 0.3363634943962097,
      "learning_rate": 9.158622325599784e-06,
      "loss": 0.0562,
      "step": 275380
    },
    {
      "epoch": 0.4506981402564757,
      "grad_norm": 0.702505350112915,
      "learning_rate": 9.158556433386268e-06,
      "loss": 0.037,
      "step": 275400
    },
    {
      "epoch": 0.45073087069512907,
      "grad_norm": 1.6119645833969116,
      "learning_rate": 9.15849054117275e-06,
      "loss": 0.0505,
      "step": 275420
    },
    {
      "epoch": 0.4507636011337824,
      "grad_norm": 1.537226676940918,
      "learning_rate": 9.158424648959233e-06,
      "loss": 0.0474,
      "step": 275440
    },
    {
      "epoch": 0.45079633157243576,
      "grad_norm": 2.934950351715088,
      "learning_rate": 9.158358756745717e-06,
      "loss": 0.0453,
      "step": 275460
    },
    {
      "epoch": 0.4508290620110891,
      "grad_norm": 0.9680265188217163,
      "learning_rate": 9.158292864532199e-06,
      "loss": 0.0391,
      "step": 275480
    },
    {
      "epoch": 0.4508617924497424,
      "grad_norm": 0.4333651661872864,
      "learning_rate": 9.158226972318682e-06,
      "loss": 0.0335,
      "step": 275500
    },
    {
      "epoch": 0.45089452288839577,
      "grad_norm": 0.9875586032867432,
      "learning_rate": 9.158161080105164e-06,
      "loss": 0.0284,
      "step": 275520
    },
    {
      "epoch": 0.4509272533270491,
      "grad_norm": 1.2383832931518555,
      "learning_rate": 9.158095187891648e-06,
      "loss": 0.051,
      "step": 275540
    },
    {
      "epoch": 0.45095998376570245,
      "grad_norm": 8.948843955993652,
      "learning_rate": 9.15802929567813e-06,
      "loss": 0.0391,
      "step": 275560
    },
    {
      "epoch": 0.45099271420435577,
      "grad_norm": 0.9576166272163391,
      "learning_rate": 9.157963403464613e-06,
      "loss": 0.0368,
      "step": 275580
    },
    {
      "epoch": 0.4510254446430091,
      "grad_norm": 2.739386796951294,
      "learning_rate": 9.157897511251095e-06,
      "loss": 0.0468,
      "step": 275600
    },
    {
      "epoch": 0.45105817508166246,
      "grad_norm": 4.584391117095947,
      "learning_rate": 9.157831619037579e-06,
      "loss": 0.0357,
      "step": 275620
    },
    {
      "epoch": 0.4510909055203158,
      "grad_norm": 3.1094181537628174,
      "learning_rate": 9.157765726824062e-06,
      "loss": 0.0445,
      "step": 275640
    },
    {
      "epoch": 0.45112363595896915,
      "grad_norm": 1.271445393562317,
      "learning_rate": 9.157699834610544e-06,
      "loss": 0.0471,
      "step": 275660
    },
    {
      "epoch": 0.45115636639762247,
      "grad_norm": 1.5148594379425049,
      "learning_rate": 9.157633942397028e-06,
      "loss": 0.0496,
      "step": 275680
    },
    {
      "epoch": 0.4511890968362758,
      "grad_norm": 1.5674372911453247,
      "learning_rate": 9.157568050183512e-06,
      "loss": 0.0459,
      "step": 275700
    },
    {
      "epoch": 0.45122182727492915,
      "grad_norm": 0.21668694913387299,
      "learning_rate": 9.157502157969993e-06,
      "loss": 0.0376,
      "step": 275720
    },
    {
      "epoch": 0.45125455771358247,
      "grad_norm": 1.9880839586257935,
      "learning_rate": 9.157436265756477e-06,
      "loss": 0.0467,
      "step": 275740
    },
    {
      "epoch": 0.45128728815223584,
      "grad_norm": 0.8100703954696655,
      "learning_rate": 9.157370373542959e-06,
      "loss": 0.0409,
      "step": 275760
    },
    {
      "epoch": 0.45132001859088916,
      "grad_norm": 6.40650749206543,
      "learning_rate": 9.157304481329442e-06,
      "loss": 0.0423,
      "step": 275780
    },
    {
      "epoch": 0.4513527490295425,
      "grad_norm": 2.3537180423736572,
      "learning_rate": 9.157238589115926e-06,
      "loss": 0.0451,
      "step": 275800
    },
    {
      "epoch": 0.45138547946819585,
      "grad_norm": 1.3539481163024902,
      "learning_rate": 9.157172696902408e-06,
      "loss": 0.0407,
      "step": 275820
    },
    {
      "epoch": 0.45141820990684917,
      "grad_norm": 1.3492926359176636,
      "learning_rate": 9.157106804688892e-06,
      "loss": 0.0422,
      "step": 275840
    },
    {
      "epoch": 0.4514509403455025,
      "grad_norm": 1.3084642887115479,
      "learning_rate": 9.157040912475373e-06,
      "loss": 0.0564,
      "step": 275860
    },
    {
      "epoch": 0.45148367078415585,
      "grad_norm": 0.3376356363296509,
      "learning_rate": 9.156975020261857e-06,
      "loss": 0.036,
      "step": 275880
    },
    {
      "epoch": 0.45151640122280917,
      "grad_norm": 1.7555568218231201,
      "learning_rate": 9.156909128048339e-06,
      "loss": 0.0401,
      "step": 275900
    },
    {
      "epoch": 0.45154913166146254,
      "grad_norm": 1.9226493835449219,
      "learning_rate": 9.156843235834823e-06,
      "loss": 0.0363,
      "step": 275920
    },
    {
      "epoch": 0.45158186210011586,
      "grad_norm": 1.2426540851593018,
      "learning_rate": 9.156777343621304e-06,
      "loss": 0.0518,
      "step": 275940
    },
    {
      "epoch": 0.4516145925387692,
      "grad_norm": 2.0225861072540283,
      "learning_rate": 9.156711451407788e-06,
      "loss": 0.0452,
      "step": 275960
    },
    {
      "epoch": 0.45164732297742255,
      "grad_norm": 0.8512201905250549,
      "learning_rate": 9.15664555919427e-06,
      "loss": 0.0556,
      "step": 275980
    },
    {
      "epoch": 0.45168005341607587,
      "grad_norm": 2.351288080215454,
      "learning_rate": 9.156579666980753e-06,
      "loss": 0.041,
      "step": 276000
    },
    {
      "epoch": 0.45171278385472924,
      "grad_norm": 1.5847551822662354,
      "learning_rate": 9.156513774767235e-06,
      "loss": 0.0407,
      "step": 276020
    },
    {
      "epoch": 0.45174551429338256,
      "grad_norm": 0.6180952787399292,
      "learning_rate": 9.156447882553719e-06,
      "loss": 0.0367,
      "step": 276040
    },
    {
      "epoch": 0.45177824473203587,
      "grad_norm": 2.787235975265503,
      "learning_rate": 9.156381990340203e-06,
      "loss": 0.0446,
      "step": 276060
    },
    {
      "epoch": 0.45181097517068924,
      "grad_norm": 1.2046129703521729,
      "learning_rate": 9.156316098126684e-06,
      "loss": 0.0441,
      "step": 276080
    },
    {
      "epoch": 0.45184370560934256,
      "grad_norm": 3.416144609451294,
      "learning_rate": 9.156250205913168e-06,
      "loss": 0.0351,
      "step": 276100
    },
    {
      "epoch": 0.45187643604799593,
      "grad_norm": 0.8620383143424988,
      "learning_rate": 9.156184313699652e-06,
      "loss": 0.0418,
      "step": 276120
    },
    {
      "epoch": 0.45190916648664925,
      "grad_norm": 0.6134428977966309,
      "learning_rate": 9.156118421486133e-06,
      "loss": 0.0437,
      "step": 276140
    },
    {
      "epoch": 0.45194189692530257,
      "grad_norm": 0.2995082139968872,
      "learning_rate": 9.156052529272617e-06,
      "loss": 0.0378,
      "step": 276160
    },
    {
      "epoch": 0.45197462736395594,
      "grad_norm": 0.18473082780838013,
      "learning_rate": 9.1559866370591e-06,
      "loss": 0.0497,
      "step": 276180
    },
    {
      "epoch": 0.45200735780260926,
      "grad_norm": 0.5919588804244995,
      "learning_rate": 9.155920744845583e-06,
      "loss": 0.0508,
      "step": 276200
    },
    {
      "epoch": 0.4520400882412626,
      "grad_norm": 1.508537769317627,
      "learning_rate": 9.155854852632066e-06,
      "loss": 0.0392,
      "step": 276220
    },
    {
      "epoch": 0.45207281867991594,
      "grad_norm": 1.0989190340042114,
      "learning_rate": 9.155788960418548e-06,
      "loss": 0.034,
      "step": 276240
    },
    {
      "epoch": 0.45210554911856926,
      "grad_norm": 1.070156455039978,
      "learning_rate": 9.155723068205032e-06,
      "loss": 0.0545,
      "step": 276260
    },
    {
      "epoch": 0.45213827955722263,
      "grad_norm": 1.0863021612167358,
      "learning_rate": 9.155657175991514e-06,
      "loss": 0.031,
      "step": 276280
    },
    {
      "epoch": 0.45217100999587595,
      "grad_norm": 5.227461814880371,
      "learning_rate": 9.155591283777997e-06,
      "loss": 0.0471,
      "step": 276300
    },
    {
      "epoch": 0.4522037404345293,
      "grad_norm": 2.0557615756988525,
      "learning_rate": 9.155525391564479e-06,
      "loss": 0.0382,
      "step": 276320
    },
    {
      "epoch": 0.45223647087318264,
      "grad_norm": 0.9308983087539673,
      "learning_rate": 9.155459499350963e-06,
      "loss": 0.0344,
      "step": 276340
    },
    {
      "epoch": 0.45226920131183596,
      "grad_norm": 0.9301203489303589,
      "learning_rate": 9.155393607137444e-06,
      "loss": 0.048,
      "step": 276360
    },
    {
      "epoch": 0.4523019317504893,
      "grad_norm": 2.8351502418518066,
      "learning_rate": 9.155327714923928e-06,
      "loss": 0.0447,
      "step": 276380
    },
    {
      "epoch": 0.45233466218914264,
      "grad_norm": 1.6035957336425781,
      "learning_rate": 9.15526182271041e-06,
      "loss": 0.0375,
      "step": 276400
    },
    {
      "epoch": 0.452367392627796,
      "grad_norm": 1.5777313709259033,
      "learning_rate": 9.155195930496894e-06,
      "loss": 0.0379,
      "step": 276420
    },
    {
      "epoch": 0.45240012306644933,
      "grad_norm": 0.7451174259185791,
      "learning_rate": 9.155130038283377e-06,
      "loss": 0.0388,
      "step": 276440
    },
    {
      "epoch": 0.45243285350510265,
      "grad_norm": 1.3787415027618408,
      "learning_rate": 9.155064146069859e-06,
      "loss": 0.0406,
      "step": 276460
    },
    {
      "epoch": 0.452465583943756,
      "grad_norm": 2.132453680038452,
      "learning_rate": 9.154998253856343e-06,
      "loss": 0.0332,
      "step": 276480
    },
    {
      "epoch": 0.45249831438240934,
      "grad_norm": 3.1216561794281006,
      "learning_rate": 9.154932361642826e-06,
      "loss": 0.0473,
      "step": 276500
    },
    {
      "epoch": 0.4525310448210627,
      "grad_norm": 0.6257511377334595,
      "learning_rate": 9.154866469429308e-06,
      "loss": 0.0395,
      "step": 276520
    },
    {
      "epoch": 0.45256377525971603,
      "grad_norm": 2.022247791290283,
      "learning_rate": 9.154800577215792e-06,
      "loss": 0.0507,
      "step": 276540
    },
    {
      "epoch": 0.45259650569836934,
      "grad_norm": 1.5962233543395996,
      "learning_rate": 9.154734685002275e-06,
      "loss": 0.0522,
      "step": 276560
    },
    {
      "epoch": 0.4526292361370227,
      "grad_norm": 0.6207567453384399,
      "learning_rate": 9.154668792788757e-06,
      "loss": 0.0524,
      "step": 276580
    },
    {
      "epoch": 0.45266196657567603,
      "grad_norm": 1.266496181488037,
      "learning_rate": 9.15460290057524e-06,
      "loss": 0.054,
      "step": 276600
    },
    {
      "epoch": 0.4526946970143294,
      "grad_norm": 2.2352025508880615,
      "learning_rate": 9.154537008361723e-06,
      "loss": 0.0347,
      "step": 276620
    },
    {
      "epoch": 0.4527274274529827,
      "grad_norm": 0.7465254068374634,
      "learning_rate": 9.154471116148206e-06,
      "loss": 0.0305,
      "step": 276640
    },
    {
      "epoch": 0.45276015789163604,
      "grad_norm": 1.578717827796936,
      "learning_rate": 9.154405223934688e-06,
      "loss": 0.0404,
      "step": 276660
    },
    {
      "epoch": 0.4527928883302894,
      "grad_norm": 0.6956402063369751,
      "learning_rate": 9.154339331721172e-06,
      "loss": 0.0336,
      "step": 276680
    },
    {
      "epoch": 0.45282561876894273,
      "grad_norm": 1.993295431137085,
      "learning_rate": 9.154273439507654e-06,
      "loss": 0.0373,
      "step": 276700
    },
    {
      "epoch": 0.4528583492075961,
      "grad_norm": 1.1467366218566895,
      "learning_rate": 9.154207547294137e-06,
      "loss": 0.0534,
      "step": 276720
    },
    {
      "epoch": 0.4528910796462494,
      "grad_norm": 1.3794435262680054,
      "learning_rate": 9.154141655080619e-06,
      "loss": 0.0362,
      "step": 276740
    },
    {
      "epoch": 0.45292381008490273,
      "grad_norm": 0.778360903263092,
      "learning_rate": 9.154075762867103e-06,
      "loss": 0.0467,
      "step": 276760
    },
    {
      "epoch": 0.4529565405235561,
      "grad_norm": 2.1543633937835693,
      "learning_rate": 9.154009870653585e-06,
      "loss": 0.0545,
      "step": 276780
    },
    {
      "epoch": 0.4529892709622094,
      "grad_norm": 3.476086139678955,
      "learning_rate": 9.153943978440068e-06,
      "loss": 0.031,
      "step": 276800
    },
    {
      "epoch": 0.4530220014008628,
      "grad_norm": 1.9257146120071411,
      "learning_rate": 9.15387808622655e-06,
      "loss": 0.0518,
      "step": 276820
    },
    {
      "epoch": 0.4530547318395161,
      "grad_norm": 1.7749091386795044,
      "learning_rate": 9.153812194013034e-06,
      "loss": 0.0398,
      "step": 276840
    },
    {
      "epoch": 0.45308746227816943,
      "grad_norm": 3.652740955352783,
      "learning_rate": 9.153746301799517e-06,
      "loss": 0.039,
      "step": 276860
    },
    {
      "epoch": 0.4531201927168228,
      "grad_norm": 1.0251364707946777,
      "learning_rate": 9.153680409585999e-06,
      "loss": 0.0399,
      "step": 276880
    },
    {
      "epoch": 0.4531529231554761,
      "grad_norm": 18.248065948486328,
      "learning_rate": 9.153614517372483e-06,
      "loss": 0.0357,
      "step": 276900
    },
    {
      "epoch": 0.4531856535941295,
      "grad_norm": 2.4383020401000977,
      "learning_rate": 9.153548625158966e-06,
      "loss": 0.0407,
      "step": 276920
    },
    {
      "epoch": 0.4532183840327828,
      "grad_norm": 2.066239356994629,
      "learning_rate": 9.153482732945448e-06,
      "loss": 0.0453,
      "step": 276940
    },
    {
      "epoch": 0.4532511144714361,
      "grad_norm": 0.9684649705886841,
      "learning_rate": 9.153416840731932e-06,
      "loss": 0.0459,
      "step": 276960
    },
    {
      "epoch": 0.4532838449100895,
      "grad_norm": 2.590632677078247,
      "learning_rate": 9.153350948518415e-06,
      "loss": 0.0285,
      "step": 276980
    },
    {
      "epoch": 0.4533165753487428,
      "grad_norm": 1.3876359462738037,
      "learning_rate": 9.153285056304897e-06,
      "loss": 0.0402,
      "step": 277000
    },
    {
      "epoch": 0.4533493057873962,
      "grad_norm": 1.6849486827850342,
      "learning_rate": 9.15321916409138e-06,
      "loss": 0.0504,
      "step": 277020
    },
    {
      "epoch": 0.4533820362260495,
      "grad_norm": 0.915664792060852,
      "learning_rate": 9.153153271877863e-06,
      "loss": 0.0463,
      "step": 277040
    },
    {
      "epoch": 0.4534147666647028,
      "grad_norm": 0.9252426028251648,
      "learning_rate": 9.153087379664346e-06,
      "loss": 0.0426,
      "step": 277060
    },
    {
      "epoch": 0.4534474971033562,
      "grad_norm": 1.638565182685852,
      "learning_rate": 9.153021487450828e-06,
      "loss": 0.0551,
      "step": 277080
    },
    {
      "epoch": 0.4534802275420095,
      "grad_norm": 1.2294871807098389,
      "learning_rate": 9.152955595237312e-06,
      "loss": 0.0397,
      "step": 277100
    },
    {
      "epoch": 0.4535129579806629,
      "grad_norm": 1.6991571187973022,
      "learning_rate": 9.152889703023794e-06,
      "loss": 0.0425,
      "step": 277120
    },
    {
      "epoch": 0.4535456884193162,
      "grad_norm": 0.7351983189582825,
      "learning_rate": 9.152823810810277e-06,
      "loss": 0.0423,
      "step": 277140
    },
    {
      "epoch": 0.4535784188579695,
      "grad_norm": 2.4707043170928955,
      "learning_rate": 9.152757918596759e-06,
      "loss": 0.0613,
      "step": 277160
    },
    {
      "epoch": 0.4536111492966229,
      "grad_norm": 1.2612611055374146,
      "learning_rate": 9.152692026383243e-06,
      "loss": 0.0402,
      "step": 277180
    },
    {
      "epoch": 0.4536438797352762,
      "grad_norm": 1.4125860929489136,
      "learning_rate": 9.152626134169726e-06,
      "loss": 0.0447,
      "step": 277200
    },
    {
      "epoch": 0.4536766101739296,
      "grad_norm": 2.038644552230835,
      "learning_rate": 9.152560241956208e-06,
      "loss": 0.0431,
      "step": 277220
    },
    {
      "epoch": 0.4537093406125829,
      "grad_norm": 0.9136983752250671,
      "learning_rate": 9.152494349742692e-06,
      "loss": 0.0306,
      "step": 277240
    },
    {
      "epoch": 0.4537420710512362,
      "grad_norm": 1.1841485500335693,
      "learning_rate": 9.152428457529174e-06,
      "loss": 0.0385,
      "step": 277260
    },
    {
      "epoch": 0.4537748014898896,
      "grad_norm": 1.4831416606903076,
      "learning_rate": 9.152362565315657e-06,
      "loss": 0.0451,
      "step": 277280
    },
    {
      "epoch": 0.4538075319285429,
      "grad_norm": 2.4121899604797363,
      "learning_rate": 9.152296673102141e-06,
      "loss": 0.0399,
      "step": 277300
    },
    {
      "epoch": 0.45384026236719627,
      "grad_norm": 1.276467204093933,
      "learning_rate": 9.152230780888623e-06,
      "loss": 0.0382,
      "step": 277320
    },
    {
      "epoch": 0.4538729928058496,
      "grad_norm": 1.1604982614517212,
      "learning_rate": 9.152164888675106e-06,
      "loss": 0.0428,
      "step": 277340
    },
    {
      "epoch": 0.4539057232445029,
      "grad_norm": 0.99016934633255,
      "learning_rate": 9.15209899646159e-06,
      "loss": 0.0368,
      "step": 277360
    },
    {
      "epoch": 0.4539384536831563,
      "grad_norm": 0.9886412024497986,
      "learning_rate": 9.152033104248072e-06,
      "loss": 0.0443,
      "step": 277380
    },
    {
      "epoch": 0.4539711841218096,
      "grad_norm": 0.5207551121711731,
      "learning_rate": 9.151967212034555e-06,
      "loss": 0.0385,
      "step": 277400
    },
    {
      "epoch": 0.45400391456046296,
      "grad_norm": 8.76768684387207,
      "learning_rate": 9.151901319821037e-06,
      "loss": 0.0372,
      "step": 277420
    },
    {
      "epoch": 0.4540366449991163,
      "grad_norm": 1.080142855644226,
      "learning_rate": 9.151835427607521e-06,
      "loss": 0.0303,
      "step": 277440
    },
    {
      "epoch": 0.4540693754377696,
      "grad_norm": 1.6764745712280273,
      "learning_rate": 9.151769535394003e-06,
      "loss": 0.0303,
      "step": 277460
    },
    {
      "epoch": 0.45410210587642297,
      "grad_norm": 1.953776240348816,
      "learning_rate": 9.151703643180486e-06,
      "loss": 0.0427,
      "step": 277480
    },
    {
      "epoch": 0.4541348363150763,
      "grad_norm": 4.152990818023682,
      "learning_rate": 9.151637750966968e-06,
      "loss": 0.0526,
      "step": 277500
    },
    {
      "epoch": 0.45416756675372966,
      "grad_norm": 0.6963655352592468,
      "learning_rate": 9.151571858753452e-06,
      "loss": 0.0383,
      "step": 277520
    },
    {
      "epoch": 0.454200297192383,
      "grad_norm": 3.395453453063965,
      "learning_rate": 9.151505966539934e-06,
      "loss": 0.0528,
      "step": 277540
    },
    {
      "epoch": 0.4542330276310363,
      "grad_norm": 0.896440863609314,
      "learning_rate": 9.151440074326417e-06,
      "loss": 0.0363,
      "step": 277560
    },
    {
      "epoch": 0.45426575806968966,
      "grad_norm": 2.6987907886505127,
      "learning_rate": 9.151374182112901e-06,
      "loss": 0.0415,
      "step": 277580
    },
    {
      "epoch": 0.454298488508343,
      "grad_norm": 0.8386688232421875,
      "learning_rate": 9.151308289899383e-06,
      "loss": 0.0444,
      "step": 277600
    },
    {
      "epoch": 0.45433121894699635,
      "grad_norm": 1.9293395280838013,
      "learning_rate": 9.151242397685866e-06,
      "loss": 0.0379,
      "step": 277620
    },
    {
      "epoch": 0.45436394938564967,
      "grad_norm": 2.814800500869751,
      "learning_rate": 9.151176505472348e-06,
      "loss": 0.0379,
      "step": 277640
    },
    {
      "epoch": 0.454396679824303,
      "grad_norm": 0.9291067123413086,
      "learning_rate": 9.151110613258832e-06,
      "loss": 0.0258,
      "step": 277660
    },
    {
      "epoch": 0.45442941026295636,
      "grad_norm": 2.8991050720214844,
      "learning_rate": 9.151044721045314e-06,
      "loss": 0.0327,
      "step": 277680
    },
    {
      "epoch": 0.4544621407016097,
      "grad_norm": 0.09367883950471878,
      "learning_rate": 9.150978828831797e-06,
      "loss": 0.044,
      "step": 277700
    },
    {
      "epoch": 0.45449487114026305,
      "grad_norm": 2.172952890396118,
      "learning_rate": 9.150912936618281e-06,
      "loss": 0.0451,
      "step": 277720
    },
    {
      "epoch": 0.45452760157891636,
      "grad_norm": 1.498284101486206,
      "learning_rate": 9.150847044404763e-06,
      "loss": 0.0304,
      "step": 277740
    },
    {
      "epoch": 0.4545603320175697,
      "grad_norm": 1.8379486799240112,
      "learning_rate": 9.150781152191246e-06,
      "loss": 0.0377,
      "step": 277760
    },
    {
      "epoch": 0.45459306245622305,
      "grad_norm": 1.1277108192443848,
      "learning_rate": 9.15071525997773e-06,
      "loss": 0.049,
      "step": 277780
    },
    {
      "epoch": 0.45462579289487637,
      "grad_norm": 1.9824038743972778,
      "learning_rate": 9.150649367764212e-06,
      "loss": 0.0517,
      "step": 277800
    },
    {
      "epoch": 0.45465852333352974,
      "grad_norm": 1.8835309743881226,
      "learning_rate": 9.150583475550695e-06,
      "loss": 0.0492,
      "step": 277820
    },
    {
      "epoch": 0.45469125377218306,
      "grad_norm": 0.43270307779312134,
      "learning_rate": 9.150517583337177e-06,
      "loss": 0.0376,
      "step": 277840
    },
    {
      "epoch": 0.4547239842108364,
      "grad_norm": 1.019991159439087,
      "learning_rate": 9.150451691123661e-06,
      "loss": 0.052,
      "step": 277860
    },
    {
      "epoch": 0.45475671464948975,
      "grad_norm": 1.6454137563705444,
      "learning_rate": 9.150385798910143e-06,
      "loss": 0.0393,
      "step": 277880
    },
    {
      "epoch": 0.45478944508814306,
      "grad_norm": 0.7684754133224487,
      "learning_rate": 9.150319906696626e-06,
      "loss": 0.0441,
      "step": 277900
    },
    {
      "epoch": 0.45482217552679644,
      "grad_norm": 3.174375057220459,
      "learning_rate": 9.15025401448311e-06,
      "loss": 0.07,
      "step": 277920
    },
    {
      "epoch": 0.45485490596544975,
      "grad_norm": 0.5924788117408752,
      "learning_rate": 9.150188122269592e-06,
      "loss": 0.0442,
      "step": 277940
    },
    {
      "epoch": 0.45488763640410307,
      "grad_norm": 1.7201145887374878,
      "learning_rate": 9.150122230056076e-06,
      "loss": 0.0364,
      "step": 277960
    },
    {
      "epoch": 0.45492036684275644,
      "grad_norm": 0.527487576007843,
      "learning_rate": 9.150056337842557e-06,
      "loss": 0.0491,
      "step": 277980
    },
    {
      "epoch": 0.45495309728140976,
      "grad_norm": 2.023261070251465,
      "learning_rate": 9.149990445629041e-06,
      "loss": 0.0283,
      "step": 278000
    },
    {
      "epoch": 0.45498582772006313,
      "grad_norm": 3.5842812061309814,
      "learning_rate": 9.149924553415523e-06,
      "loss": 0.0447,
      "step": 278020
    },
    {
      "epoch": 0.45501855815871645,
      "grad_norm": 1.726665735244751,
      "learning_rate": 9.149858661202006e-06,
      "loss": 0.042,
      "step": 278040
    },
    {
      "epoch": 0.45505128859736976,
      "grad_norm": 1.5724059343338013,
      "learning_rate": 9.149792768988488e-06,
      "loss": 0.0284,
      "step": 278060
    },
    {
      "epoch": 0.45508401903602314,
      "grad_norm": 1.4889787435531616,
      "learning_rate": 9.149726876774972e-06,
      "loss": 0.0497,
      "step": 278080
    },
    {
      "epoch": 0.45511674947467645,
      "grad_norm": 3.758744478225708,
      "learning_rate": 9.149660984561456e-06,
      "loss": 0.0434,
      "step": 278100
    },
    {
      "epoch": 0.4551494799133298,
      "grad_norm": 2.006551504135132,
      "learning_rate": 9.149595092347937e-06,
      "loss": 0.0313,
      "step": 278120
    },
    {
      "epoch": 0.45518221035198314,
      "grad_norm": 2.2481422424316406,
      "learning_rate": 9.149529200134421e-06,
      "loss": 0.0418,
      "step": 278140
    },
    {
      "epoch": 0.45521494079063646,
      "grad_norm": 4.636415958404541,
      "learning_rate": 9.149463307920905e-06,
      "loss": 0.0522,
      "step": 278160
    },
    {
      "epoch": 0.45524767122928983,
      "grad_norm": 1.0715631246566772,
      "learning_rate": 9.149397415707387e-06,
      "loss": 0.0428,
      "step": 278180
    },
    {
      "epoch": 0.45528040166794315,
      "grad_norm": 0.3381357491016388,
      "learning_rate": 9.14933152349387e-06,
      "loss": 0.0317,
      "step": 278200
    },
    {
      "epoch": 0.4553131321065965,
      "grad_norm": 1.3921481370925903,
      "learning_rate": 9.149265631280352e-06,
      "loss": 0.0417,
      "step": 278220
    },
    {
      "epoch": 0.45534586254524984,
      "grad_norm": 0.597468912601471,
      "learning_rate": 9.149199739066836e-06,
      "loss": 0.0458,
      "step": 278240
    },
    {
      "epoch": 0.45537859298390315,
      "grad_norm": 1.207545518875122,
      "learning_rate": 9.14913384685332e-06,
      "loss": 0.0406,
      "step": 278260
    },
    {
      "epoch": 0.4554113234225565,
      "grad_norm": 1.786908507347107,
      "learning_rate": 9.149067954639801e-06,
      "loss": 0.0378,
      "step": 278280
    },
    {
      "epoch": 0.45544405386120984,
      "grad_norm": 1.1430221796035767,
      "learning_rate": 9.149002062426285e-06,
      "loss": 0.0317,
      "step": 278300
    },
    {
      "epoch": 0.4554767842998632,
      "grad_norm": 2.8106181621551514,
      "learning_rate": 9.148936170212767e-06,
      "loss": 0.0547,
      "step": 278320
    },
    {
      "epoch": 0.45550951473851653,
      "grad_norm": 0.6368600130081177,
      "learning_rate": 9.14887027799925e-06,
      "loss": 0.0433,
      "step": 278340
    },
    {
      "epoch": 0.45554224517716985,
      "grad_norm": 2.273648977279663,
      "learning_rate": 9.148804385785732e-06,
      "loss": 0.039,
      "step": 278360
    },
    {
      "epoch": 0.4555749756158232,
      "grad_norm": 1.3673405647277832,
      "learning_rate": 9.148738493572216e-06,
      "loss": 0.0531,
      "step": 278380
    },
    {
      "epoch": 0.45560770605447654,
      "grad_norm": 0.7757723331451416,
      "learning_rate": 9.148672601358697e-06,
      "loss": 0.048,
      "step": 278400
    },
    {
      "epoch": 0.4556404364931299,
      "grad_norm": 3.686628818511963,
      "learning_rate": 9.148606709145181e-06,
      "loss": 0.0318,
      "step": 278420
    },
    {
      "epoch": 0.4556731669317832,
      "grad_norm": 1.4078471660614014,
      "learning_rate": 9.148540816931663e-06,
      "loss": 0.0362,
      "step": 278440
    },
    {
      "epoch": 0.45570589737043654,
      "grad_norm": 6.895261764526367,
      "learning_rate": 9.148474924718147e-06,
      "loss": 0.0405,
      "step": 278460
    },
    {
      "epoch": 0.4557386278090899,
      "grad_norm": 1.8212436437606812,
      "learning_rate": 9.14840903250463e-06,
      "loss": 0.0438,
      "step": 278480
    },
    {
      "epoch": 0.45577135824774323,
      "grad_norm": 0.704203724861145,
      "learning_rate": 9.148343140291112e-06,
      "loss": 0.0515,
      "step": 278500
    },
    {
      "epoch": 0.4558040886863966,
      "grad_norm": 1.3137058019638062,
      "learning_rate": 9.148277248077596e-06,
      "loss": 0.0405,
      "step": 278520
    },
    {
      "epoch": 0.4558368191250499,
      "grad_norm": 17.93468475341797,
      "learning_rate": 9.14821135586408e-06,
      "loss": 0.0362,
      "step": 278540
    },
    {
      "epoch": 0.45586954956370324,
      "grad_norm": 2.5319957733154297,
      "learning_rate": 9.148145463650561e-06,
      "loss": 0.0499,
      "step": 278560
    },
    {
      "epoch": 0.4559022800023566,
      "grad_norm": 0.5157380104064941,
      "learning_rate": 9.148079571437045e-06,
      "loss": 0.0385,
      "step": 278580
    },
    {
      "epoch": 0.4559350104410099,
      "grad_norm": 4.03812313079834,
      "learning_rate": 9.148013679223528e-06,
      "loss": 0.0316,
      "step": 278600
    },
    {
      "epoch": 0.4559677408796633,
      "grad_norm": 0.42438045144081116,
      "learning_rate": 9.14794778701001e-06,
      "loss": 0.0355,
      "step": 278620
    },
    {
      "epoch": 0.4560004713183166,
      "grad_norm": 1.4346907138824463,
      "learning_rate": 9.147881894796494e-06,
      "loss": 0.0462,
      "step": 278640
    },
    {
      "epoch": 0.45603320175696993,
      "grad_norm": 6.309817314147949,
      "learning_rate": 9.147816002582976e-06,
      "loss": 0.0484,
      "step": 278660
    },
    {
      "epoch": 0.4560659321956233,
      "grad_norm": 6.1847147941589355,
      "learning_rate": 9.14775011036946e-06,
      "loss": 0.0527,
      "step": 278680
    },
    {
      "epoch": 0.4560986626342766,
      "grad_norm": 0.7782581448554993,
      "learning_rate": 9.147684218155941e-06,
      "loss": 0.0395,
      "step": 278700
    },
    {
      "epoch": 0.45613139307292994,
      "grad_norm": 1.3126665353775024,
      "learning_rate": 9.147618325942425e-06,
      "loss": 0.0342,
      "step": 278720
    },
    {
      "epoch": 0.4561641235115833,
      "grad_norm": 2.0390853881835938,
      "learning_rate": 9.147552433728907e-06,
      "loss": 0.0416,
      "step": 278740
    },
    {
      "epoch": 0.4561968539502366,
      "grad_norm": 2.530264139175415,
      "learning_rate": 9.14748654151539e-06,
      "loss": 0.0511,
      "step": 278760
    },
    {
      "epoch": 0.45622958438889,
      "grad_norm": 1.56690514087677,
      "learning_rate": 9.147420649301872e-06,
      "loss": 0.0435,
      "step": 278780
    },
    {
      "epoch": 0.4562623148275433,
      "grad_norm": 0.6554725170135498,
      "learning_rate": 9.147354757088356e-06,
      "loss": 0.0365,
      "step": 278800
    },
    {
      "epoch": 0.45629504526619663,
      "grad_norm": 2.4793310165405273,
      "learning_rate": 9.147288864874838e-06,
      "loss": 0.0472,
      "step": 278820
    },
    {
      "epoch": 0.45632777570485,
      "grad_norm": 1.0226455926895142,
      "learning_rate": 9.147222972661321e-06,
      "loss": 0.0549,
      "step": 278840
    },
    {
      "epoch": 0.4563605061435033,
      "grad_norm": 4.88218355178833,
      "learning_rate": 9.147157080447803e-06,
      "loss": 0.0493,
      "step": 278860
    },
    {
      "epoch": 0.4563932365821567,
      "grad_norm": 0.9311745762825012,
      "learning_rate": 9.147091188234287e-06,
      "loss": 0.0418,
      "step": 278880
    },
    {
      "epoch": 0.45642596702081,
      "grad_norm": 4.2793450355529785,
      "learning_rate": 9.14702529602077e-06,
      "loss": 0.0437,
      "step": 278900
    },
    {
      "epoch": 0.4564586974594633,
      "grad_norm": 1.382157802581787,
      "learning_rate": 9.146959403807252e-06,
      "loss": 0.0419,
      "step": 278920
    },
    {
      "epoch": 0.4564914278981167,
      "grad_norm": 2.4853761196136475,
      "learning_rate": 9.146893511593736e-06,
      "loss": 0.0428,
      "step": 278940
    },
    {
      "epoch": 0.45652415833677,
      "grad_norm": 2.7857723236083984,
      "learning_rate": 9.14682761938022e-06,
      "loss": 0.0335,
      "step": 278960
    },
    {
      "epoch": 0.4565568887754234,
      "grad_norm": 2.0253119468688965,
      "learning_rate": 9.146761727166701e-06,
      "loss": 0.0503,
      "step": 278980
    },
    {
      "epoch": 0.4565896192140767,
      "grad_norm": 1.6077678203582764,
      "learning_rate": 9.146695834953185e-06,
      "loss": 0.0476,
      "step": 279000
    },
    {
      "epoch": 0.45662234965273,
      "grad_norm": 2.796217203140259,
      "learning_rate": 9.146629942739668e-06,
      "loss": 0.0338,
      "step": 279020
    },
    {
      "epoch": 0.4566550800913834,
      "grad_norm": 3.875758171081543,
      "learning_rate": 9.14656405052615e-06,
      "loss": 0.0472,
      "step": 279040
    },
    {
      "epoch": 0.4566878105300367,
      "grad_norm": 1.970180869102478,
      "learning_rate": 9.146498158312634e-06,
      "loss": 0.0432,
      "step": 279060
    },
    {
      "epoch": 0.4567205409686901,
      "grad_norm": 2.359825372695923,
      "learning_rate": 9.146432266099116e-06,
      "loss": 0.0571,
      "step": 279080
    },
    {
      "epoch": 0.4567532714073434,
      "grad_norm": 1.7312326431274414,
      "learning_rate": 9.1463663738856e-06,
      "loss": 0.0267,
      "step": 279100
    },
    {
      "epoch": 0.4567860018459967,
      "grad_norm": 2.908717155456543,
      "learning_rate": 9.146300481672081e-06,
      "loss": 0.0519,
      "step": 279120
    },
    {
      "epoch": 0.4568187322846501,
      "grad_norm": 1.754073143005371,
      "learning_rate": 9.146234589458565e-06,
      "loss": 0.0396,
      "step": 279140
    },
    {
      "epoch": 0.4568514627233034,
      "grad_norm": 2.6788249015808105,
      "learning_rate": 9.146168697245047e-06,
      "loss": 0.0357,
      "step": 279160
    },
    {
      "epoch": 0.4568841931619568,
      "grad_norm": 1.351940631866455,
      "learning_rate": 9.14610280503153e-06,
      "loss": 0.0638,
      "step": 279180
    },
    {
      "epoch": 0.4569169236006101,
      "grad_norm": 1.6731815338134766,
      "learning_rate": 9.146036912818012e-06,
      "loss": 0.0489,
      "step": 279200
    },
    {
      "epoch": 0.4569496540392634,
      "grad_norm": 5.420557498931885,
      "learning_rate": 9.145971020604496e-06,
      "loss": 0.0497,
      "step": 279220
    },
    {
      "epoch": 0.4569823844779168,
      "grad_norm": 0.7132809162139893,
      "learning_rate": 9.145905128390978e-06,
      "loss": 0.0413,
      "step": 279240
    },
    {
      "epoch": 0.4570151149165701,
      "grad_norm": 0.9313358664512634,
      "learning_rate": 9.145839236177461e-06,
      "loss": 0.0395,
      "step": 279260
    },
    {
      "epoch": 0.45704784535522347,
      "grad_norm": 3.71114444732666,
      "learning_rate": 9.145773343963945e-06,
      "loss": 0.0302,
      "step": 279280
    },
    {
      "epoch": 0.4570805757938768,
      "grad_norm": 1.1567002534866333,
      "learning_rate": 9.145707451750427e-06,
      "loss": 0.0534,
      "step": 279300
    },
    {
      "epoch": 0.4571133062325301,
      "grad_norm": 1.7518000602722168,
      "learning_rate": 9.14564155953691e-06,
      "loss": 0.0424,
      "step": 279320
    },
    {
      "epoch": 0.4571460366711835,
      "grad_norm": 1.5118117332458496,
      "learning_rate": 9.145575667323394e-06,
      "loss": 0.0448,
      "step": 279340
    },
    {
      "epoch": 0.4571787671098368,
      "grad_norm": 0.7814252972602844,
      "learning_rate": 9.145509775109876e-06,
      "loss": 0.0459,
      "step": 279360
    },
    {
      "epoch": 0.45721149754849016,
      "grad_norm": 1.7843035459518433,
      "learning_rate": 9.14544388289636e-06,
      "loss": 0.0399,
      "step": 279380
    },
    {
      "epoch": 0.4572442279871435,
      "grad_norm": 1.503918170928955,
      "learning_rate": 9.145377990682843e-06,
      "loss": 0.0574,
      "step": 279400
    },
    {
      "epoch": 0.4572769584257968,
      "grad_norm": 1.4620171785354614,
      "learning_rate": 9.145312098469325e-06,
      "loss": 0.0412,
      "step": 279420
    },
    {
      "epoch": 0.45730968886445017,
      "grad_norm": 1.9571360349655151,
      "learning_rate": 9.145246206255808e-06,
      "loss": 0.0415,
      "step": 279440
    },
    {
      "epoch": 0.4573424193031035,
      "grad_norm": 1.3729217052459717,
      "learning_rate": 9.14518031404229e-06,
      "loss": 0.0614,
      "step": 279460
    },
    {
      "epoch": 0.45737514974175686,
      "grad_norm": 3.9704716205596924,
      "learning_rate": 9.145114421828774e-06,
      "loss": 0.0451,
      "step": 279480
    },
    {
      "epoch": 0.4574078801804102,
      "grad_norm": 2.37595796585083,
      "learning_rate": 9.145048529615256e-06,
      "loss": 0.0379,
      "step": 279500
    },
    {
      "epoch": 0.4574406106190635,
      "grad_norm": 2.6466798782348633,
      "learning_rate": 9.14498263740174e-06,
      "loss": 0.051,
      "step": 279520
    },
    {
      "epoch": 0.45747334105771686,
      "grad_norm": 0.19704808294773102,
      "learning_rate": 9.144916745188221e-06,
      "loss": 0.044,
      "step": 279540
    },
    {
      "epoch": 0.4575060714963702,
      "grad_norm": 2.594444513320923,
      "learning_rate": 9.144850852974705e-06,
      "loss": 0.0349,
      "step": 279560
    },
    {
      "epoch": 0.45753880193502355,
      "grad_norm": 0.7183381915092468,
      "learning_rate": 9.144784960761187e-06,
      "loss": 0.0274,
      "step": 279580
    },
    {
      "epoch": 0.45757153237367687,
      "grad_norm": 0.8093183040618896,
      "learning_rate": 9.14471906854767e-06,
      "loss": 0.0411,
      "step": 279600
    },
    {
      "epoch": 0.4576042628123302,
      "grad_norm": 3.519638776779175,
      "learning_rate": 9.144653176334152e-06,
      "loss": 0.0529,
      "step": 279620
    },
    {
      "epoch": 0.45763699325098356,
      "grad_norm": 0.31698551774024963,
      "learning_rate": 9.144587284120636e-06,
      "loss": 0.04,
      "step": 279640
    },
    {
      "epoch": 0.4576697236896369,
      "grad_norm": 3.80999755859375,
      "learning_rate": 9.14452139190712e-06,
      "loss": 0.0428,
      "step": 279660
    },
    {
      "epoch": 0.45770245412829025,
      "grad_norm": 2.003602981567383,
      "learning_rate": 9.144455499693601e-06,
      "loss": 0.0424,
      "step": 279680
    },
    {
      "epoch": 0.45773518456694356,
      "grad_norm": 1.2148504257202148,
      "learning_rate": 9.144389607480085e-06,
      "loss": 0.0565,
      "step": 279700
    },
    {
      "epoch": 0.4577679150055969,
      "grad_norm": 3.6056666374206543,
      "learning_rate": 9.144323715266567e-06,
      "loss": 0.0386,
      "step": 279720
    },
    {
      "epoch": 0.45780064544425025,
      "grad_norm": 0.6007727980613708,
      "learning_rate": 9.14425782305305e-06,
      "loss": 0.0402,
      "step": 279740
    },
    {
      "epoch": 0.45783337588290357,
      "grad_norm": 0.5762848854064941,
      "learning_rate": 9.144191930839534e-06,
      "loss": 0.039,
      "step": 279760
    },
    {
      "epoch": 0.45786610632155694,
      "grad_norm": 3.8770487308502197,
      "learning_rate": 9.144126038626016e-06,
      "loss": 0.0363,
      "step": 279780
    },
    {
      "epoch": 0.45789883676021026,
      "grad_norm": 3.3753654956817627,
      "learning_rate": 9.1440601464125e-06,
      "loss": 0.0422,
      "step": 279800
    },
    {
      "epoch": 0.4579315671988636,
      "grad_norm": 1.8232753276824951,
      "learning_rate": 9.143994254198983e-06,
      "loss": 0.0298,
      "step": 279820
    },
    {
      "epoch": 0.45796429763751695,
      "grad_norm": 2.5708391666412354,
      "learning_rate": 9.143928361985465e-06,
      "loss": 0.0489,
      "step": 279840
    },
    {
      "epoch": 0.45799702807617027,
      "grad_norm": 3.8028154373168945,
      "learning_rate": 9.143862469771948e-06,
      "loss": 0.033,
      "step": 279860
    },
    {
      "epoch": 0.45802975851482364,
      "grad_norm": 1.505208969116211,
      "learning_rate": 9.14379657755843e-06,
      "loss": 0.0334,
      "step": 279880
    },
    {
      "epoch": 0.45806248895347695,
      "grad_norm": 1.0326194763183594,
      "learning_rate": 9.143730685344914e-06,
      "loss": 0.0404,
      "step": 279900
    },
    {
      "epoch": 0.45809521939213027,
      "grad_norm": 5.21704626083374,
      "learning_rate": 9.143664793131396e-06,
      "loss": 0.0409,
      "step": 279920
    },
    {
      "epoch": 0.45812794983078364,
      "grad_norm": 2.7768936157226562,
      "learning_rate": 9.14359890091788e-06,
      "loss": 0.0435,
      "step": 279940
    },
    {
      "epoch": 0.45816068026943696,
      "grad_norm": 0.6171200275421143,
      "learning_rate": 9.143533008704361e-06,
      "loss": 0.0523,
      "step": 279960
    },
    {
      "epoch": 0.45819341070809033,
      "grad_norm": 0.4764716923236847,
      "learning_rate": 9.143467116490845e-06,
      "loss": 0.0587,
      "step": 279980
    },
    {
      "epoch": 0.45822614114674365,
      "grad_norm": 1.2522053718566895,
      "learning_rate": 9.143401224277327e-06,
      "loss": 0.0285,
      "step": 280000
    },
    {
      "epoch": 0.45825887158539697,
      "grad_norm": 3.4484355449676514,
      "learning_rate": 9.14333533206381e-06,
      "loss": 0.0445,
      "step": 280020
    },
    {
      "epoch": 0.45829160202405034,
      "grad_norm": 1.275178074836731,
      "learning_rate": 9.143269439850294e-06,
      "loss": 0.0433,
      "step": 280040
    },
    {
      "epoch": 0.45832433246270365,
      "grad_norm": 1.3250620365142822,
      "learning_rate": 9.143203547636776e-06,
      "loss": 0.0418,
      "step": 280060
    },
    {
      "epoch": 0.458357062901357,
      "grad_norm": 0.790204644203186,
      "learning_rate": 9.14313765542326e-06,
      "loss": 0.052,
      "step": 280080
    },
    {
      "epoch": 0.45838979334001034,
      "grad_norm": 1.5422778129577637,
      "learning_rate": 9.143071763209741e-06,
      "loss": 0.0392,
      "step": 280100
    },
    {
      "epoch": 0.45842252377866366,
      "grad_norm": 2.639561891555786,
      "learning_rate": 9.143005870996225e-06,
      "loss": 0.0412,
      "step": 280120
    },
    {
      "epoch": 0.45845525421731703,
      "grad_norm": 1.6271005868911743,
      "learning_rate": 9.142939978782709e-06,
      "loss": 0.0436,
      "step": 280140
    },
    {
      "epoch": 0.45848798465597035,
      "grad_norm": 0.5749402046203613,
      "learning_rate": 9.14287408656919e-06,
      "loss": 0.0389,
      "step": 280160
    },
    {
      "epoch": 0.4585207150946237,
      "grad_norm": 3.8842060565948486,
      "learning_rate": 9.142808194355674e-06,
      "loss": 0.0424,
      "step": 280180
    },
    {
      "epoch": 0.45855344553327704,
      "grad_norm": 0.3986191749572754,
      "learning_rate": 9.142742302142158e-06,
      "loss": 0.043,
      "step": 280200
    },
    {
      "epoch": 0.45858617597193035,
      "grad_norm": 0.9828438758850098,
      "learning_rate": 9.14267640992864e-06,
      "loss": 0.0471,
      "step": 280220
    },
    {
      "epoch": 0.4586189064105837,
      "grad_norm": 2.0108256340026855,
      "learning_rate": 9.142610517715123e-06,
      "loss": 0.0455,
      "step": 280240
    },
    {
      "epoch": 0.45865163684923704,
      "grad_norm": 2.366506576538086,
      "learning_rate": 9.142544625501605e-06,
      "loss": 0.0386,
      "step": 280260
    },
    {
      "epoch": 0.4586843672878904,
      "grad_norm": 3.420322895050049,
      "learning_rate": 9.142478733288089e-06,
      "loss": 0.0499,
      "step": 280280
    },
    {
      "epoch": 0.45871709772654373,
      "grad_norm": 1.8891583681106567,
      "learning_rate": 9.14241284107457e-06,
      "loss": 0.0522,
      "step": 280300
    },
    {
      "epoch": 0.45874982816519705,
      "grad_norm": 2.0094621181488037,
      "learning_rate": 9.142346948861054e-06,
      "loss": 0.0517,
      "step": 280320
    },
    {
      "epoch": 0.4587825586038504,
      "grad_norm": 1.5094188451766968,
      "learning_rate": 9.142281056647536e-06,
      "loss": 0.0374,
      "step": 280340
    },
    {
      "epoch": 0.45881528904250374,
      "grad_norm": 1.4443610906600952,
      "learning_rate": 9.14221516443402e-06,
      "loss": 0.038,
      "step": 280360
    },
    {
      "epoch": 0.4588480194811571,
      "grad_norm": 1.8517459630966187,
      "learning_rate": 9.142149272220503e-06,
      "loss": 0.0476,
      "step": 280380
    },
    {
      "epoch": 0.4588807499198104,
      "grad_norm": 2.404001474380493,
      "learning_rate": 9.142083380006985e-06,
      "loss": 0.0393,
      "step": 280400
    },
    {
      "epoch": 0.45891348035846374,
      "grad_norm": 3.1400022506713867,
      "learning_rate": 9.142017487793469e-06,
      "loss": 0.0396,
      "step": 280420
    },
    {
      "epoch": 0.4589462107971171,
      "grad_norm": 1.3752998113632202,
      "learning_rate": 9.14195159557995e-06,
      "loss": 0.0423,
      "step": 280440
    },
    {
      "epoch": 0.45897894123577043,
      "grad_norm": 1.147382140159607,
      "learning_rate": 9.141885703366434e-06,
      "loss": 0.0355,
      "step": 280460
    },
    {
      "epoch": 0.4590116716744238,
      "grad_norm": 0.290914922952652,
      "learning_rate": 9.141819811152916e-06,
      "loss": 0.0315,
      "step": 280480
    },
    {
      "epoch": 0.4590444021130771,
      "grad_norm": 1.0240041017532349,
      "learning_rate": 9.1417539189394e-06,
      "loss": 0.0421,
      "step": 280500
    },
    {
      "epoch": 0.45907713255173044,
      "grad_norm": 2.0503616333007812,
      "learning_rate": 9.141688026725881e-06,
      "loss": 0.034,
      "step": 280520
    },
    {
      "epoch": 0.4591098629903838,
      "grad_norm": 8.030922889709473,
      "learning_rate": 9.141622134512365e-06,
      "loss": 0.05,
      "step": 280540
    },
    {
      "epoch": 0.4591425934290371,
      "grad_norm": 2.2815256118774414,
      "learning_rate": 9.141556242298849e-06,
      "loss": 0.0365,
      "step": 280560
    },
    {
      "epoch": 0.4591753238676905,
      "grad_norm": 0.8913887143135071,
      "learning_rate": 9.141490350085332e-06,
      "loss": 0.053,
      "step": 280580
    },
    {
      "epoch": 0.4592080543063438,
      "grad_norm": 1.2126370668411255,
      "learning_rate": 9.141424457871814e-06,
      "loss": 0.0477,
      "step": 280600
    },
    {
      "epoch": 0.45924078474499713,
      "grad_norm": 2.0391101837158203,
      "learning_rate": 9.141358565658298e-06,
      "loss": 0.0406,
      "step": 280620
    },
    {
      "epoch": 0.4592735151836505,
      "grad_norm": 1.3281012773513794,
      "learning_rate": 9.14129267344478e-06,
      "loss": 0.028,
      "step": 280640
    },
    {
      "epoch": 0.4593062456223038,
      "grad_norm": 0.5765339136123657,
      "learning_rate": 9.141226781231263e-06,
      "loss": 0.0646,
      "step": 280660
    },
    {
      "epoch": 0.4593389760609572,
      "grad_norm": 1.581024169921875,
      "learning_rate": 9.141160889017745e-06,
      "loss": 0.0394,
      "step": 280680
    },
    {
      "epoch": 0.4593717064996105,
      "grad_norm": 1.1693919897079468,
      "learning_rate": 9.141094996804229e-06,
      "loss": 0.04,
      "step": 280700
    },
    {
      "epoch": 0.4594044369382638,
      "grad_norm": 0.7151432633399963,
      "learning_rate": 9.141029104590712e-06,
      "loss": 0.052,
      "step": 280720
    },
    {
      "epoch": 0.4594371673769172,
      "grad_norm": 0.5949947834014893,
      "learning_rate": 9.140963212377194e-06,
      "loss": 0.0344,
      "step": 280740
    },
    {
      "epoch": 0.4594698978155705,
      "grad_norm": 2.046800136566162,
      "learning_rate": 9.140897320163678e-06,
      "loss": 0.0579,
      "step": 280760
    },
    {
      "epoch": 0.4595026282542239,
      "grad_norm": 2.7107226848602295,
      "learning_rate": 9.14083142795016e-06,
      "loss": 0.0453,
      "step": 280780
    },
    {
      "epoch": 0.4595353586928772,
      "grad_norm": 1.2142231464385986,
      "learning_rate": 9.140765535736643e-06,
      "loss": 0.0433,
      "step": 280800
    },
    {
      "epoch": 0.4595680891315305,
      "grad_norm": 1.5667051076889038,
      "learning_rate": 9.140699643523125e-06,
      "loss": 0.0461,
      "step": 280820
    },
    {
      "epoch": 0.4596008195701839,
      "grad_norm": 0.5345444083213806,
      "learning_rate": 9.140633751309609e-06,
      "loss": 0.0377,
      "step": 280840
    },
    {
      "epoch": 0.4596335500088372,
      "grad_norm": 1.2647000551223755,
      "learning_rate": 9.14056785909609e-06,
      "loss": 0.0345,
      "step": 280860
    },
    {
      "epoch": 0.4596662804474906,
      "grad_norm": 0.49551087617874146,
      "learning_rate": 9.140501966882574e-06,
      "loss": 0.03,
      "step": 280880
    },
    {
      "epoch": 0.4596990108861439,
      "grad_norm": 0.9675561189651489,
      "learning_rate": 9.140436074669056e-06,
      "loss": 0.0585,
      "step": 280900
    },
    {
      "epoch": 0.4597317413247972,
      "grad_norm": 1.4634169340133667,
      "learning_rate": 9.14037018245554e-06,
      "loss": 0.0419,
      "step": 280920
    },
    {
      "epoch": 0.4597644717634506,
      "grad_norm": 3.8423635959625244,
      "learning_rate": 9.140304290242023e-06,
      "loss": 0.036,
      "step": 280940
    },
    {
      "epoch": 0.4597972022021039,
      "grad_norm": 0.9210798740386963,
      "learning_rate": 9.140238398028505e-06,
      "loss": 0.0519,
      "step": 280960
    },
    {
      "epoch": 0.4598299326407573,
      "grad_norm": 0.7571033239364624,
      "learning_rate": 9.140172505814989e-06,
      "loss": 0.0428,
      "step": 280980
    },
    {
      "epoch": 0.4598626630794106,
      "grad_norm": 1.5345333814620972,
      "learning_rate": 9.140106613601472e-06,
      "loss": 0.0332,
      "step": 281000
    },
    {
      "epoch": 0.4598953935180639,
      "grad_norm": 1.49140202999115,
      "learning_rate": 9.140040721387954e-06,
      "loss": 0.0312,
      "step": 281020
    },
    {
      "epoch": 0.4599281239567173,
      "grad_norm": 2.5472311973571777,
      "learning_rate": 9.139974829174438e-06,
      "loss": 0.0495,
      "step": 281040
    },
    {
      "epoch": 0.4599608543953706,
      "grad_norm": 1.150584101676941,
      "learning_rate": 9.139908936960921e-06,
      "loss": 0.035,
      "step": 281060
    },
    {
      "epoch": 0.45999358483402397,
      "grad_norm": 1.070090651512146,
      "learning_rate": 9.139843044747403e-06,
      "loss": 0.0507,
      "step": 281080
    },
    {
      "epoch": 0.4600263152726773,
      "grad_norm": 1.4081591367721558,
      "learning_rate": 9.139777152533887e-06,
      "loss": 0.0412,
      "step": 281100
    },
    {
      "epoch": 0.4600590457113306,
      "grad_norm": 1.3618298768997192,
      "learning_rate": 9.139711260320369e-06,
      "loss": 0.0433,
      "step": 281120
    },
    {
      "epoch": 0.460091776149984,
      "grad_norm": 1.8237888813018799,
      "learning_rate": 9.139645368106852e-06,
      "loss": 0.0373,
      "step": 281140
    },
    {
      "epoch": 0.4601245065886373,
      "grad_norm": 0.603546679019928,
      "learning_rate": 9.139579475893334e-06,
      "loss": 0.0392,
      "step": 281160
    },
    {
      "epoch": 0.46015723702729067,
      "grad_norm": 2.218773603439331,
      "learning_rate": 9.139513583679818e-06,
      "loss": 0.0437,
      "step": 281180
    },
    {
      "epoch": 0.460189967465944,
      "grad_norm": 0.9464309215545654,
      "learning_rate": 9.1394476914663e-06,
      "loss": 0.0427,
      "step": 281200
    },
    {
      "epoch": 0.4602226979045973,
      "grad_norm": 0.7691574692726135,
      "learning_rate": 9.139381799252783e-06,
      "loss": 0.0416,
      "step": 281220
    },
    {
      "epoch": 0.46025542834325067,
      "grad_norm": 1.201771855354309,
      "learning_rate": 9.139315907039265e-06,
      "loss": 0.0299,
      "step": 281240
    },
    {
      "epoch": 0.460288158781904,
      "grad_norm": 1.5317586660385132,
      "learning_rate": 9.139250014825749e-06,
      "loss": 0.0429,
      "step": 281260
    },
    {
      "epoch": 0.46032088922055736,
      "grad_norm": 2.8557074069976807,
      "learning_rate": 9.13918412261223e-06,
      "loss": 0.0458,
      "step": 281280
    },
    {
      "epoch": 0.4603536196592107,
      "grad_norm": 2.8572661876678467,
      "learning_rate": 9.139118230398714e-06,
      "loss": 0.042,
      "step": 281300
    },
    {
      "epoch": 0.460386350097864,
      "grad_norm": 0.9125268459320068,
      "learning_rate": 9.139052338185198e-06,
      "loss": 0.0458,
      "step": 281320
    },
    {
      "epoch": 0.46041908053651737,
      "grad_norm": 1.5702555179595947,
      "learning_rate": 9.13898644597168e-06,
      "loss": 0.0385,
      "step": 281340
    },
    {
      "epoch": 0.4604518109751707,
      "grad_norm": 3.5146877765655518,
      "learning_rate": 9.138920553758163e-06,
      "loss": 0.0356,
      "step": 281360
    },
    {
      "epoch": 0.46048454141382406,
      "grad_norm": 1.8527030944824219,
      "learning_rate": 9.138854661544647e-06,
      "loss": 0.0525,
      "step": 281380
    },
    {
      "epoch": 0.4605172718524774,
      "grad_norm": 0.47500601410865784,
      "learning_rate": 9.138788769331129e-06,
      "loss": 0.0289,
      "step": 281400
    },
    {
      "epoch": 0.4605500022911307,
      "grad_norm": 1.9724690914154053,
      "learning_rate": 9.138722877117612e-06,
      "loss": 0.0422,
      "step": 281420
    },
    {
      "epoch": 0.46058273272978406,
      "grad_norm": 1.8448598384857178,
      "learning_rate": 9.138656984904096e-06,
      "loss": 0.0456,
      "step": 281440
    },
    {
      "epoch": 0.4606154631684374,
      "grad_norm": 0.5053879618644714,
      "learning_rate": 9.138591092690578e-06,
      "loss": 0.0269,
      "step": 281460
    },
    {
      "epoch": 0.4606481936070907,
      "grad_norm": 0.2394779771566391,
      "learning_rate": 9.138525200477061e-06,
      "loss": 0.0488,
      "step": 281480
    },
    {
      "epoch": 0.46068092404574407,
      "grad_norm": 0.6465387940406799,
      "learning_rate": 9.138459308263543e-06,
      "loss": 0.0379,
      "step": 281500
    },
    {
      "epoch": 0.4607136544843974,
      "grad_norm": 1.3941515684127808,
      "learning_rate": 9.138393416050027e-06,
      "loss": 0.0422,
      "step": 281520
    },
    {
      "epoch": 0.46074638492305076,
      "grad_norm": 0.7382220029830933,
      "learning_rate": 9.138327523836509e-06,
      "loss": 0.0417,
      "step": 281540
    },
    {
      "epoch": 0.4607791153617041,
      "grad_norm": 0.9972453713417053,
      "learning_rate": 9.138261631622992e-06,
      "loss": 0.0418,
      "step": 281560
    },
    {
      "epoch": 0.4608118458003574,
      "grad_norm": 1.1170850992202759,
      "learning_rate": 9.138195739409474e-06,
      "loss": 0.0471,
      "step": 281580
    },
    {
      "epoch": 0.46084457623901076,
      "grad_norm": 0.30509158968925476,
      "learning_rate": 9.138129847195958e-06,
      "loss": 0.0362,
      "step": 281600
    },
    {
      "epoch": 0.4608773066776641,
      "grad_norm": 0.9800385236740112,
      "learning_rate": 9.13806395498244e-06,
      "loss": 0.0299,
      "step": 281620
    },
    {
      "epoch": 0.46091003711631745,
      "grad_norm": 0.2926095426082611,
      "learning_rate": 9.137998062768923e-06,
      "loss": 0.0436,
      "step": 281640
    },
    {
      "epoch": 0.46094276755497077,
      "grad_norm": 0.5903380513191223,
      "learning_rate": 9.137932170555405e-06,
      "loss": 0.0381,
      "step": 281660
    },
    {
      "epoch": 0.4609754979936241,
      "grad_norm": 0.7756305932998657,
      "learning_rate": 9.137866278341889e-06,
      "loss": 0.0444,
      "step": 281680
    },
    {
      "epoch": 0.46100822843227746,
      "grad_norm": 1.7759981155395508,
      "learning_rate": 9.13780038612837e-06,
      "loss": 0.0485,
      "step": 281700
    },
    {
      "epoch": 0.4610409588709308,
      "grad_norm": 4.588903427124023,
      "learning_rate": 9.137734493914854e-06,
      "loss": 0.0404,
      "step": 281720
    },
    {
      "epoch": 0.46107368930958414,
      "grad_norm": 1.5964252948760986,
      "learning_rate": 9.137668601701338e-06,
      "loss": 0.0441,
      "step": 281740
    },
    {
      "epoch": 0.46110641974823746,
      "grad_norm": 1.3003100156784058,
      "learning_rate": 9.13760270948782e-06,
      "loss": 0.0348,
      "step": 281760
    },
    {
      "epoch": 0.4611391501868908,
      "grad_norm": 1.9327428340911865,
      "learning_rate": 9.137536817274303e-06,
      "loss": 0.0554,
      "step": 281780
    },
    {
      "epoch": 0.46117188062554415,
      "grad_norm": 7.106172561645508,
      "learning_rate": 9.137470925060787e-06,
      "loss": 0.034,
      "step": 281800
    },
    {
      "epoch": 0.46120461106419747,
      "grad_norm": 1.653531789779663,
      "learning_rate": 9.137405032847269e-06,
      "loss": 0.0408,
      "step": 281820
    },
    {
      "epoch": 0.46123734150285084,
      "grad_norm": 0.26894140243530273,
      "learning_rate": 9.137339140633752e-06,
      "loss": 0.0392,
      "step": 281840
    },
    {
      "epoch": 0.46127007194150416,
      "grad_norm": 3.3040614128112793,
      "learning_rate": 9.137273248420236e-06,
      "loss": 0.0491,
      "step": 281860
    },
    {
      "epoch": 0.4613028023801575,
      "grad_norm": 1.2800837755203247,
      "learning_rate": 9.137207356206718e-06,
      "loss": 0.0438,
      "step": 281880
    },
    {
      "epoch": 0.46133553281881085,
      "grad_norm": 0.9263732433319092,
      "learning_rate": 9.137141463993202e-06,
      "loss": 0.0333,
      "step": 281900
    },
    {
      "epoch": 0.46136826325746416,
      "grad_norm": 2.016634225845337,
      "learning_rate": 9.137075571779683e-06,
      "loss": 0.0385,
      "step": 281920
    },
    {
      "epoch": 0.46140099369611753,
      "grad_norm": 0.9547151923179626,
      "learning_rate": 9.137009679566167e-06,
      "loss": 0.047,
      "step": 281940
    },
    {
      "epoch": 0.46143372413477085,
      "grad_norm": 1.722937822341919,
      "learning_rate": 9.136943787352649e-06,
      "loss": 0.0484,
      "step": 281960
    },
    {
      "epoch": 0.46146645457342417,
      "grad_norm": 0.576410710811615,
      "learning_rate": 9.136877895139132e-06,
      "loss": 0.0564,
      "step": 281980
    },
    {
      "epoch": 0.46149918501207754,
      "grad_norm": 1.7350223064422607,
      "learning_rate": 9.136812002925614e-06,
      "loss": 0.0411,
      "step": 282000
    },
    {
      "epoch": 0.46153191545073086,
      "grad_norm": 0.5248175859451294,
      "learning_rate": 9.136746110712098e-06,
      "loss": 0.0394,
      "step": 282020
    },
    {
      "epoch": 0.46156464588938423,
      "grad_norm": 1.1141235828399658,
      "learning_rate": 9.13668021849858e-06,
      "loss": 0.0388,
      "step": 282040
    },
    {
      "epoch": 0.46159737632803755,
      "grad_norm": 2.319478988647461,
      "learning_rate": 9.136614326285063e-06,
      "loss": 0.0453,
      "step": 282060
    },
    {
      "epoch": 0.46163010676669086,
      "grad_norm": 1.0548735857009888,
      "learning_rate": 9.136548434071545e-06,
      "loss": 0.0362,
      "step": 282080
    },
    {
      "epoch": 0.46166283720534423,
      "grad_norm": 1.8014708757400513,
      "learning_rate": 9.136482541858029e-06,
      "loss": 0.0384,
      "step": 282100
    },
    {
      "epoch": 0.46169556764399755,
      "grad_norm": 1.987820029258728,
      "learning_rate": 9.136416649644512e-06,
      "loss": 0.0386,
      "step": 282120
    },
    {
      "epoch": 0.4617282980826509,
      "grad_norm": 2.0047802925109863,
      "learning_rate": 9.136350757430994e-06,
      "loss": 0.0344,
      "step": 282140
    },
    {
      "epoch": 0.46176102852130424,
      "grad_norm": 0.5936174988746643,
      "learning_rate": 9.136284865217478e-06,
      "loss": 0.0419,
      "step": 282160
    },
    {
      "epoch": 0.46179375895995756,
      "grad_norm": 2.552523612976074,
      "learning_rate": 9.136218973003962e-06,
      "loss": 0.0313,
      "step": 282180
    },
    {
      "epoch": 0.46182648939861093,
      "grad_norm": 0.6283700466156006,
      "learning_rate": 9.136153080790443e-06,
      "loss": 0.0427,
      "step": 282200
    },
    {
      "epoch": 0.46185921983726425,
      "grad_norm": 2.0735409259796143,
      "learning_rate": 9.136087188576927e-06,
      "loss": 0.0514,
      "step": 282220
    },
    {
      "epoch": 0.4618919502759176,
      "grad_norm": 1.093584656715393,
      "learning_rate": 9.13602129636341e-06,
      "loss": 0.0334,
      "step": 282240
    },
    {
      "epoch": 0.46192468071457093,
      "grad_norm": 2.6262972354888916,
      "learning_rate": 9.135955404149893e-06,
      "loss": 0.0359,
      "step": 282260
    },
    {
      "epoch": 0.46195741115322425,
      "grad_norm": 2.2019968032836914,
      "learning_rate": 9.135889511936376e-06,
      "loss": 0.0577,
      "step": 282280
    },
    {
      "epoch": 0.4619901415918776,
      "grad_norm": 3.1316568851470947,
      "learning_rate": 9.135823619722858e-06,
      "loss": 0.0374,
      "step": 282300
    },
    {
      "epoch": 0.46202287203053094,
      "grad_norm": 2.648350715637207,
      "learning_rate": 9.135757727509342e-06,
      "loss": 0.0445,
      "step": 282320
    },
    {
      "epoch": 0.4620556024691843,
      "grad_norm": 2.695466995239258,
      "learning_rate": 9.135691835295823e-06,
      "loss": 0.0302,
      "step": 282340
    },
    {
      "epoch": 0.46208833290783763,
      "grad_norm": 0.37304776906967163,
      "learning_rate": 9.135625943082307e-06,
      "loss": 0.035,
      "step": 282360
    },
    {
      "epoch": 0.46212106334649095,
      "grad_norm": 1.0866906642913818,
      "learning_rate": 9.135560050868789e-06,
      "loss": 0.0522,
      "step": 282380
    },
    {
      "epoch": 0.4621537937851443,
      "grad_norm": 0.6513810753822327,
      "learning_rate": 9.135494158655273e-06,
      "loss": 0.0377,
      "step": 282400
    },
    {
      "epoch": 0.46218652422379763,
      "grad_norm": 1.7702769041061401,
      "learning_rate": 9.135428266441754e-06,
      "loss": 0.0332,
      "step": 282420
    },
    {
      "epoch": 0.462219254662451,
      "grad_norm": 1.628584623336792,
      "learning_rate": 9.135362374228238e-06,
      "loss": 0.0457,
      "step": 282440
    },
    {
      "epoch": 0.4622519851011043,
      "grad_norm": 1.7530537843704224,
      "learning_rate": 9.13529648201472e-06,
      "loss": 0.0403,
      "step": 282460
    },
    {
      "epoch": 0.46228471553975764,
      "grad_norm": 0.5622864365577698,
      "learning_rate": 9.135230589801203e-06,
      "loss": 0.0525,
      "step": 282480
    },
    {
      "epoch": 0.462317445978411,
      "grad_norm": 0.8206424713134766,
      "learning_rate": 9.135164697587687e-06,
      "loss": 0.0462,
      "step": 282500
    },
    {
      "epoch": 0.46235017641706433,
      "grad_norm": 0.36027947068214417,
      "learning_rate": 9.135098805374169e-06,
      "loss": 0.0379,
      "step": 282520
    },
    {
      "epoch": 0.4623829068557177,
      "grad_norm": 1.363855004310608,
      "learning_rate": 9.135032913160653e-06,
      "loss": 0.0369,
      "step": 282540
    },
    {
      "epoch": 0.462415637294371,
      "grad_norm": 0.7792915105819702,
      "learning_rate": 9.134967020947134e-06,
      "loss": 0.0458,
      "step": 282560
    },
    {
      "epoch": 0.46244836773302433,
      "grad_norm": 1.7206281423568726,
      "learning_rate": 9.134901128733618e-06,
      "loss": 0.0363,
      "step": 282580
    },
    {
      "epoch": 0.4624810981716777,
      "grad_norm": 2.0176568031311035,
      "learning_rate": 9.134835236520102e-06,
      "loss": 0.0359,
      "step": 282600
    },
    {
      "epoch": 0.462513828610331,
      "grad_norm": 0.6736932992935181,
      "learning_rate": 9.134769344306584e-06,
      "loss": 0.0371,
      "step": 282620
    },
    {
      "epoch": 0.4625465590489844,
      "grad_norm": 1.1498076915740967,
      "learning_rate": 9.134703452093067e-06,
      "loss": 0.0345,
      "step": 282640
    },
    {
      "epoch": 0.4625792894876377,
      "grad_norm": 1.543305516242981,
      "learning_rate": 9.13463755987955e-06,
      "loss": 0.0534,
      "step": 282660
    },
    {
      "epoch": 0.46261201992629103,
      "grad_norm": 0.5083763003349304,
      "learning_rate": 9.134571667666033e-06,
      "loss": 0.0546,
      "step": 282680
    },
    {
      "epoch": 0.4626447503649444,
      "grad_norm": 1.7430428266525269,
      "learning_rate": 9.134505775452516e-06,
      "loss": 0.0379,
      "step": 282700
    },
    {
      "epoch": 0.4626774808035977,
      "grad_norm": 0.7522258162498474,
      "learning_rate": 9.134439883238998e-06,
      "loss": 0.06,
      "step": 282720
    },
    {
      "epoch": 0.4627102112422511,
      "grad_norm": 0.9007613658905029,
      "learning_rate": 9.134373991025482e-06,
      "loss": 0.0299,
      "step": 282740
    },
    {
      "epoch": 0.4627429416809044,
      "grad_norm": 1.0389357805252075,
      "learning_rate": 9.134308098811964e-06,
      "loss": 0.0555,
      "step": 282760
    },
    {
      "epoch": 0.4627756721195577,
      "grad_norm": 0.987237811088562,
      "learning_rate": 9.134242206598447e-06,
      "loss": 0.0422,
      "step": 282780
    },
    {
      "epoch": 0.4628084025582111,
      "grad_norm": 0.6982335448265076,
      "learning_rate": 9.134176314384929e-06,
      "loss": 0.0473,
      "step": 282800
    },
    {
      "epoch": 0.4628411329968644,
      "grad_norm": 1.010202169418335,
      "learning_rate": 9.134110422171413e-06,
      "loss": 0.036,
      "step": 282820
    },
    {
      "epoch": 0.4628738634355178,
      "grad_norm": 0.957530677318573,
      "learning_rate": 9.134044529957896e-06,
      "loss": 0.0382,
      "step": 282840
    },
    {
      "epoch": 0.4629065938741711,
      "grad_norm": 0.7676241993904114,
      "learning_rate": 9.133978637744378e-06,
      "loss": 0.0462,
      "step": 282860
    },
    {
      "epoch": 0.4629393243128244,
      "grad_norm": 1.5941274166107178,
      "learning_rate": 9.133912745530862e-06,
      "loss": 0.0519,
      "step": 282880
    },
    {
      "epoch": 0.4629720547514778,
      "grad_norm": 2.5343098640441895,
      "learning_rate": 9.133846853317344e-06,
      "loss": 0.0423,
      "step": 282900
    },
    {
      "epoch": 0.4630047851901311,
      "grad_norm": 4.001073360443115,
      "learning_rate": 9.133780961103827e-06,
      "loss": 0.0414,
      "step": 282920
    },
    {
      "epoch": 0.4630375156287845,
      "grad_norm": 0.7650620937347412,
      "learning_rate": 9.133715068890309e-06,
      "loss": 0.0387,
      "step": 282940
    },
    {
      "epoch": 0.4630702460674378,
      "grad_norm": 2.384845018386841,
      "learning_rate": 9.133649176676793e-06,
      "loss": 0.0443,
      "step": 282960
    },
    {
      "epoch": 0.4631029765060911,
      "grad_norm": 1.508721947669983,
      "learning_rate": 9.133583284463276e-06,
      "loss": 0.045,
      "step": 282980
    },
    {
      "epoch": 0.4631357069447445,
      "grad_norm": 1.4849560260772705,
      "learning_rate": 9.133517392249758e-06,
      "loss": 0.0406,
      "step": 283000
    },
    {
      "epoch": 0.4631684373833978,
      "grad_norm": 1.5433810949325562,
      "learning_rate": 9.133451500036242e-06,
      "loss": 0.0391,
      "step": 283020
    },
    {
      "epoch": 0.4632011678220512,
      "grad_norm": 3.9114990234375,
      "learning_rate": 9.133385607822725e-06,
      "loss": 0.0387,
      "step": 283040
    },
    {
      "epoch": 0.4632338982607045,
      "grad_norm": 8.172052383422852,
      "learning_rate": 9.133319715609207e-06,
      "loss": 0.0475,
      "step": 283060
    },
    {
      "epoch": 0.4632666286993578,
      "grad_norm": 1.728724718093872,
      "learning_rate": 9.13325382339569e-06,
      "loss": 0.0407,
      "step": 283080
    },
    {
      "epoch": 0.4632993591380112,
      "grad_norm": 2.027276039123535,
      "learning_rate": 9.133187931182173e-06,
      "loss": 0.0572,
      "step": 283100
    },
    {
      "epoch": 0.4633320895766645,
      "grad_norm": 0.645340621471405,
      "learning_rate": 9.133122038968656e-06,
      "loss": 0.048,
      "step": 283120
    },
    {
      "epoch": 0.46336482001531787,
      "grad_norm": 1.6089298725128174,
      "learning_rate": 9.133056146755138e-06,
      "loss": 0.0376,
      "step": 283140
    },
    {
      "epoch": 0.4633975504539712,
      "grad_norm": 1.9137507677078247,
      "learning_rate": 9.132990254541622e-06,
      "loss": 0.0315,
      "step": 283160
    },
    {
      "epoch": 0.4634302808926245,
      "grad_norm": 0.5264012813568115,
      "learning_rate": 9.132924362328105e-06,
      "loss": 0.042,
      "step": 283180
    },
    {
      "epoch": 0.4634630113312779,
      "grad_norm": 0.6570164561271667,
      "learning_rate": 9.132858470114587e-06,
      "loss": 0.0483,
      "step": 283200
    },
    {
      "epoch": 0.4634957417699312,
      "grad_norm": 2.358297348022461,
      "learning_rate": 9.13279257790107e-06,
      "loss": 0.0358,
      "step": 283220
    },
    {
      "epoch": 0.46352847220858456,
      "grad_norm": 0.9001222252845764,
      "learning_rate": 9.132726685687553e-06,
      "loss": 0.0332,
      "step": 283240
    },
    {
      "epoch": 0.4635612026472379,
      "grad_norm": 1.2696378231048584,
      "learning_rate": 9.132660793474036e-06,
      "loss": 0.0429,
      "step": 283260
    },
    {
      "epoch": 0.4635939330858912,
      "grad_norm": 0.42670467495918274,
      "learning_rate": 9.132594901260518e-06,
      "loss": 0.0403,
      "step": 283280
    },
    {
      "epoch": 0.46362666352454457,
      "grad_norm": 3.0649075508117676,
      "learning_rate": 9.132529009047002e-06,
      "loss": 0.0514,
      "step": 283300
    },
    {
      "epoch": 0.4636593939631979,
      "grad_norm": 1.1466801166534424,
      "learning_rate": 9.132463116833484e-06,
      "loss": 0.0391,
      "step": 283320
    },
    {
      "epoch": 0.46369212440185126,
      "grad_norm": 0.818665087223053,
      "learning_rate": 9.132397224619967e-06,
      "loss": 0.045,
      "step": 283340
    },
    {
      "epoch": 0.4637248548405046,
      "grad_norm": 2.17149019241333,
      "learning_rate": 9.13233133240645e-06,
      "loss": 0.036,
      "step": 283360
    },
    {
      "epoch": 0.4637575852791579,
      "grad_norm": 1.4859106540679932,
      "learning_rate": 9.132265440192933e-06,
      "loss": 0.0442,
      "step": 283380
    },
    {
      "epoch": 0.46379031571781126,
      "grad_norm": 0.44307756423950195,
      "learning_rate": 9.132199547979416e-06,
      "loss": 0.0462,
      "step": 283400
    },
    {
      "epoch": 0.4638230461564646,
      "grad_norm": 2.3726861476898193,
      "learning_rate": 9.1321336557659e-06,
      "loss": 0.0505,
      "step": 283420
    },
    {
      "epoch": 0.46385577659511795,
      "grad_norm": 3.8162758350372314,
      "learning_rate": 9.132067763552382e-06,
      "loss": 0.0473,
      "step": 283440
    },
    {
      "epoch": 0.46388850703377127,
      "grad_norm": 4.951822280883789,
      "learning_rate": 9.132001871338865e-06,
      "loss": 0.0367,
      "step": 283460
    },
    {
      "epoch": 0.4639212374724246,
      "grad_norm": 1.4749298095703125,
      "learning_rate": 9.131935979125347e-06,
      "loss": 0.0554,
      "step": 283480
    },
    {
      "epoch": 0.46395396791107796,
      "grad_norm": 2.3834891319274902,
      "learning_rate": 9.131870086911831e-06,
      "loss": 0.0489,
      "step": 283500
    },
    {
      "epoch": 0.4639866983497313,
      "grad_norm": 0.349092572927475,
      "learning_rate": 9.131804194698313e-06,
      "loss": 0.0254,
      "step": 283520
    },
    {
      "epoch": 0.46401942878838465,
      "grad_norm": 0.30074313282966614,
      "learning_rate": 9.131738302484796e-06,
      "loss": 0.0684,
      "step": 283540
    },
    {
      "epoch": 0.46405215922703796,
      "grad_norm": 2.001671314239502,
      "learning_rate": 9.13167241027128e-06,
      "loss": 0.0356,
      "step": 283560
    },
    {
      "epoch": 0.4640848896656913,
      "grad_norm": 1.615662693977356,
      "learning_rate": 9.131606518057762e-06,
      "loss": 0.038,
      "step": 283580
    },
    {
      "epoch": 0.46411762010434465,
      "grad_norm": 1.4577077627182007,
      "learning_rate": 9.131540625844245e-06,
      "loss": 0.0463,
      "step": 283600
    },
    {
      "epoch": 0.46415035054299797,
      "grad_norm": 0.48374316096305847,
      "learning_rate": 9.131474733630727e-06,
      "loss": 0.0498,
      "step": 283620
    },
    {
      "epoch": 0.46418308098165134,
      "grad_norm": 5.050787448883057,
      "learning_rate": 9.131408841417211e-06,
      "loss": 0.0388,
      "step": 283640
    },
    {
      "epoch": 0.46421581142030466,
      "grad_norm": 1.031486988067627,
      "learning_rate": 9.131342949203693e-06,
      "loss": 0.06,
      "step": 283660
    },
    {
      "epoch": 0.464248541858958,
      "grad_norm": 3.194707155227661,
      "learning_rate": 9.131277056990176e-06,
      "loss": 0.0602,
      "step": 283680
    },
    {
      "epoch": 0.46428127229761135,
      "grad_norm": 1.415614366531372,
      "learning_rate": 9.131211164776658e-06,
      "loss": 0.0486,
      "step": 283700
    },
    {
      "epoch": 0.46431400273626466,
      "grad_norm": 3.6186447143554688,
      "learning_rate": 9.131145272563142e-06,
      "loss": 0.0399,
      "step": 283720
    },
    {
      "epoch": 0.46434673317491804,
      "grad_norm": 2.489626407623291,
      "learning_rate": 9.131079380349624e-06,
      "loss": 0.0326,
      "step": 283740
    },
    {
      "epoch": 0.46437946361357135,
      "grad_norm": 0.9893047213554382,
      "learning_rate": 9.131013488136107e-06,
      "loss": 0.0354,
      "step": 283760
    },
    {
      "epoch": 0.46441219405222467,
      "grad_norm": 0.7784968614578247,
      "learning_rate": 9.130947595922591e-06,
      "loss": 0.0335,
      "step": 283780
    },
    {
      "epoch": 0.46444492449087804,
      "grad_norm": 1.929268717765808,
      "learning_rate": 9.130881703709073e-06,
      "loss": 0.034,
      "step": 283800
    },
    {
      "epoch": 0.46447765492953136,
      "grad_norm": 1.1628667116165161,
      "learning_rate": 9.130815811495556e-06,
      "loss": 0.0441,
      "step": 283820
    },
    {
      "epoch": 0.46451038536818473,
      "grad_norm": 0.618718683719635,
      "learning_rate": 9.13074991928204e-06,
      "loss": 0.0468,
      "step": 283840
    },
    {
      "epoch": 0.46454311580683805,
      "grad_norm": 1.2272858619689941,
      "learning_rate": 9.130684027068522e-06,
      "loss": 0.0322,
      "step": 283860
    },
    {
      "epoch": 0.46457584624549136,
      "grad_norm": 3.2203283309936523,
      "learning_rate": 9.130618134855005e-06,
      "loss": 0.0477,
      "step": 283880
    },
    {
      "epoch": 0.46460857668414474,
      "grad_norm": 1.0449177026748657,
      "learning_rate": 9.130552242641489e-06,
      "loss": 0.0419,
      "step": 283900
    },
    {
      "epoch": 0.46464130712279805,
      "grad_norm": 2.477466583251953,
      "learning_rate": 9.130486350427971e-06,
      "loss": 0.0387,
      "step": 283920
    },
    {
      "epoch": 0.4646740375614514,
      "grad_norm": 1.804390788078308,
      "learning_rate": 9.130420458214455e-06,
      "loss": 0.0689,
      "step": 283940
    },
    {
      "epoch": 0.46470676800010474,
      "grad_norm": 2.9462757110595703,
      "learning_rate": 9.130354566000936e-06,
      "loss": 0.0352,
      "step": 283960
    },
    {
      "epoch": 0.46473949843875806,
      "grad_norm": 0.6250555515289307,
      "learning_rate": 9.13028867378742e-06,
      "loss": 0.0454,
      "step": 283980
    },
    {
      "epoch": 0.46477222887741143,
      "grad_norm": 0.7659887075424194,
      "learning_rate": 9.130222781573902e-06,
      "loss": 0.0363,
      "step": 284000
    },
    {
      "epoch": 0.46480495931606475,
      "grad_norm": 1.8757520914077759,
      "learning_rate": 9.130156889360385e-06,
      "loss": 0.0495,
      "step": 284020
    },
    {
      "epoch": 0.4648376897547181,
      "grad_norm": 7.082867622375488,
      "learning_rate": 9.130090997146867e-06,
      "loss": 0.0459,
      "step": 284040
    },
    {
      "epoch": 0.46487042019337144,
      "grad_norm": 1.048455834388733,
      "learning_rate": 9.130025104933351e-06,
      "loss": 0.0356,
      "step": 284060
    },
    {
      "epoch": 0.46490315063202475,
      "grad_norm": 1.3119451999664307,
      "learning_rate": 9.129959212719833e-06,
      "loss": 0.0365,
      "step": 284080
    },
    {
      "epoch": 0.4649358810706781,
      "grad_norm": 1.254544973373413,
      "learning_rate": 9.129893320506316e-06,
      "loss": 0.0356,
      "step": 284100
    },
    {
      "epoch": 0.46496861150933144,
      "grad_norm": 1.8067094087600708,
      "learning_rate": 9.129827428292798e-06,
      "loss": 0.0391,
      "step": 284120
    },
    {
      "epoch": 0.4650013419479848,
      "grad_norm": 0.6388307213783264,
      "learning_rate": 9.129761536079282e-06,
      "loss": 0.0401,
      "step": 284140
    },
    {
      "epoch": 0.46503407238663813,
      "grad_norm": 1.685626745223999,
      "learning_rate": 9.129695643865765e-06,
      "loss": 0.0578,
      "step": 284160
    },
    {
      "epoch": 0.46506680282529145,
      "grad_norm": 2.391139030456543,
      "learning_rate": 9.129629751652247e-06,
      "loss": 0.044,
      "step": 284180
    },
    {
      "epoch": 0.4650995332639448,
      "grad_norm": 1.7886375188827515,
      "learning_rate": 9.129563859438731e-06,
      "loss": 0.0553,
      "step": 284200
    },
    {
      "epoch": 0.46513226370259814,
      "grad_norm": 1.0808159112930298,
      "learning_rate": 9.129497967225215e-06,
      "loss": 0.0441,
      "step": 284220
    },
    {
      "epoch": 0.4651649941412515,
      "grad_norm": 1.9848252534866333,
      "learning_rate": 9.129432075011696e-06,
      "loss": 0.0323,
      "step": 284240
    },
    {
      "epoch": 0.4651977245799048,
      "grad_norm": 1.4336997270584106,
      "learning_rate": 9.12936618279818e-06,
      "loss": 0.0451,
      "step": 284260
    },
    {
      "epoch": 0.46523045501855814,
      "grad_norm": 0.6219251155853271,
      "learning_rate": 9.129300290584664e-06,
      "loss": 0.0446,
      "step": 284280
    },
    {
      "epoch": 0.4652631854572115,
      "grad_norm": 0.8179513812065125,
      "learning_rate": 9.129234398371146e-06,
      "loss": 0.0295,
      "step": 284300
    },
    {
      "epoch": 0.46529591589586483,
      "grad_norm": 2.5233922004699707,
      "learning_rate": 9.129168506157629e-06,
      "loss": 0.0379,
      "step": 284320
    },
    {
      "epoch": 0.46532864633451815,
      "grad_norm": 2.7798824310302734,
      "learning_rate": 9.129102613944111e-06,
      "loss": 0.0538,
      "step": 284340
    },
    {
      "epoch": 0.4653613767731715,
      "grad_norm": 5.069563865661621,
      "learning_rate": 9.129036721730595e-06,
      "loss": 0.0358,
      "step": 284360
    },
    {
      "epoch": 0.46539410721182484,
      "grad_norm": 0.4975166618824005,
      "learning_rate": 9.128970829517076e-06,
      "loss": 0.0385,
      "step": 284380
    },
    {
      "epoch": 0.4654268376504782,
      "grad_norm": 0.8136131763458252,
      "learning_rate": 9.12890493730356e-06,
      "loss": 0.0313,
      "step": 284400
    },
    {
      "epoch": 0.4654595680891315,
      "grad_norm": 0.6920756101608276,
      "learning_rate": 9.128839045090042e-06,
      "loss": 0.0301,
      "step": 284420
    },
    {
      "epoch": 0.46549229852778484,
      "grad_norm": 2.0178189277648926,
      "learning_rate": 9.128773152876526e-06,
      "loss": 0.0474,
      "step": 284440
    },
    {
      "epoch": 0.4655250289664382,
      "grad_norm": 1.0139868259429932,
      "learning_rate": 9.128707260663007e-06,
      "loss": 0.0552,
      "step": 284460
    },
    {
      "epoch": 0.46555775940509153,
      "grad_norm": 1.1746776103973389,
      "learning_rate": 9.128641368449491e-06,
      "loss": 0.0459,
      "step": 284480
    },
    {
      "epoch": 0.4655904898437449,
      "grad_norm": 1.3815817832946777,
      "learning_rate": 9.128575476235973e-06,
      "loss": 0.0454,
      "step": 284500
    },
    {
      "epoch": 0.4656232202823982,
      "grad_norm": 1.2202845811843872,
      "learning_rate": 9.128509584022457e-06,
      "loss": 0.0386,
      "step": 284520
    },
    {
      "epoch": 0.46565595072105154,
      "grad_norm": 2.2139296531677246,
      "learning_rate": 9.128443691808938e-06,
      "loss": 0.0562,
      "step": 284540
    },
    {
      "epoch": 0.4656886811597049,
      "grad_norm": 1.4543163776397705,
      "learning_rate": 9.128377799595422e-06,
      "loss": 0.0419,
      "step": 284560
    },
    {
      "epoch": 0.4657214115983582,
      "grad_norm": 2.1748905181884766,
      "learning_rate": 9.128311907381906e-06,
      "loss": 0.0502,
      "step": 284580
    },
    {
      "epoch": 0.4657541420370116,
      "grad_norm": 3.0668091773986816,
      "learning_rate": 9.128246015168387e-06,
      "loss": 0.0438,
      "step": 284600
    },
    {
      "epoch": 0.4657868724756649,
      "grad_norm": 2.237978219985962,
      "learning_rate": 9.128180122954871e-06,
      "loss": 0.0519,
      "step": 284620
    },
    {
      "epoch": 0.46581960291431823,
      "grad_norm": 2.875682830810547,
      "learning_rate": 9.128114230741355e-06,
      "loss": 0.0271,
      "step": 284640
    },
    {
      "epoch": 0.4658523333529716,
      "grad_norm": 1.4398595094680786,
      "learning_rate": 9.128048338527837e-06,
      "loss": 0.04,
      "step": 284660
    },
    {
      "epoch": 0.4658850637916249,
      "grad_norm": 0.383813738822937,
      "learning_rate": 9.12798244631432e-06,
      "loss": 0.0504,
      "step": 284680
    },
    {
      "epoch": 0.4659177942302783,
      "grad_norm": 0.4913516342639923,
      "learning_rate": 9.127916554100804e-06,
      "loss": 0.0336,
      "step": 284700
    },
    {
      "epoch": 0.4659505246689316,
      "grad_norm": 0.8051224946975708,
      "learning_rate": 9.127850661887286e-06,
      "loss": 0.0499,
      "step": 284720
    },
    {
      "epoch": 0.4659832551075849,
      "grad_norm": 1.4248771667480469,
      "learning_rate": 9.12778476967377e-06,
      "loss": 0.04,
      "step": 284740
    },
    {
      "epoch": 0.4660159855462383,
      "grad_norm": 0.6796965003013611,
      "learning_rate": 9.127718877460251e-06,
      "loss": 0.0434,
      "step": 284760
    },
    {
      "epoch": 0.4660487159848916,
      "grad_norm": 0.1595890372991562,
      "learning_rate": 9.127652985246735e-06,
      "loss": 0.0361,
      "step": 284780
    },
    {
      "epoch": 0.466081446423545,
      "grad_norm": 1.1620280742645264,
      "learning_rate": 9.127587093033217e-06,
      "loss": 0.0475,
      "step": 284800
    },
    {
      "epoch": 0.4661141768621983,
      "grad_norm": 3.603440523147583,
      "learning_rate": 9.1275212008197e-06,
      "loss": 0.0462,
      "step": 284820
    },
    {
      "epoch": 0.4661469073008516,
      "grad_norm": 1.1577659845352173,
      "learning_rate": 9.127455308606182e-06,
      "loss": 0.0388,
      "step": 284840
    },
    {
      "epoch": 0.466179637739505,
      "grad_norm": 1.6064194440841675,
      "learning_rate": 9.127389416392666e-06,
      "loss": 0.0478,
      "step": 284860
    },
    {
      "epoch": 0.4662123681781583,
      "grad_norm": 3.047961473464966,
      "learning_rate": 9.127323524179148e-06,
      "loss": 0.0496,
      "step": 284880
    },
    {
      "epoch": 0.4662450986168117,
      "grad_norm": 1.804659128189087,
      "learning_rate": 9.127257631965631e-06,
      "loss": 0.0332,
      "step": 284900
    },
    {
      "epoch": 0.466277829055465,
      "grad_norm": 0.5498261451721191,
      "learning_rate": 9.127191739752113e-06,
      "loss": 0.0333,
      "step": 284920
    },
    {
      "epoch": 0.4663105594941183,
      "grad_norm": 1.5040006637573242,
      "learning_rate": 9.127125847538597e-06,
      "loss": 0.0441,
      "step": 284940
    },
    {
      "epoch": 0.4663432899327717,
      "grad_norm": 0.5235508680343628,
      "learning_rate": 9.12705995532508e-06,
      "loss": 0.045,
      "step": 284960
    },
    {
      "epoch": 0.466376020371425,
      "grad_norm": 2.1597235202789307,
      "learning_rate": 9.126994063111562e-06,
      "loss": 0.0436,
      "step": 284980
    },
    {
      "epoch": 0.4664087508100784,
      "grad_norm": 2.228282928466797,
      "learning_rate": 9.126928170898046e-06,
      "loss": 0.0382,
      "step": 285000
    },
    {
      "epoch": 0.4664414812487317,
      "grad_norm": 7.260009288787842,
      "learning_rate": 9.12686227868453e-06,
      "loss": 0.0464,
      "step": 285020
    },
    {
      "epoch": 0.466474211687385,
      "grad_norm": 0.4181883931159973,
      "learning_rate": 9.126796386471011e-06,
      "loss": 0.0462,
      "step": 285040
    },
    {
      "epoch": 0.4665069421260384,
      "grad_norm": 1.3962271213531494,
      "learning_rate": 9.126730494257495e-06,
      "loss": 0.0371,
      "step": 285060
    },
    {
      "epoch": 0.4665396725646917,
      "grad_norm": 3.048501491546631,
      "learning_rate": 9.126664602043978e-06,
      "loss": 0.0429,
      "step": 285080
    },
    {
      "epoch": 0.46657240300334507,
      "grad_norm": 1.6356489658355713,
      "learning_rate": 9.12659870983046e-06,
      "loss": 0.0351,
      "step": 285100
    },
    {
      "epoch": 0.4666051334419984,
      "grad_norm": 1.5969488620758057,
      "learning_rate": 9.126532817616944e-06,
      "loss": 0.0448,
      "step": 285120
    },
    {
      "epoch": 0.4666378638806517,
      "grad_norm": 1.8378114700317383,
      "learning_rate": 9.126466925403426e-06,
      "loss": 0.0565,
      "step": 285140
    },
    {
      "epoch": 0.4666705943193051,
      "grad_norm": 4.458197116851807,
      "learning_rate": 9.12640103318991e-06,
      "loss": 0.0334,
      "step": 285160
    },
    {
      "epoch": 0.4667033247579584,
      "grad_norm": 0.5251180529594421,
      "learning_rate": 9.126335140976391e-06,
      "loss": 0.0391,
      "step": 285180
    },
    {
      "epoch": 0.46673605519661177,
      "grad_norm": 3.464862585067749,
      "learning_rate": 9.126269248762875e-06,
      "loss": 0.0436,
      "step": 285200
    },
    {
      "epoch": 0.4667687856352651,
      "grad_norm": 1.5717612504959106,
      "learning_rate": 9.126203356549357e-06,
      "loss": 0.0434,
      "step": 285220
    },
    {
      "epoch": 0.4668015160739184,
      "grad_norm": 0.6153327822685242,
      "learning_rate": 9.12613746433584e-06,
      "loss": 0.0391,
      "step": 285240
    },
    {
      "epoch": 0.46683424651257177,
      "grad_norm": 1.5878041982650757,
      "learning_rate": 9.126071572122322e-06,
      "loss": 0.0392,
      "step": 285260
    },
    {
      "epoch": 0.4668669769512251,
      "grad_norm": 0.5246673226356506,
      "learning_rate": 9.126005679908806e-06,
      "loss": 0.0441,
      "step": 285280
    },
    {
      "epoch": 0.46689970738987846,
      "grad_norm": 1.3849140405654907,
      "learning_rate": 9.12593978769529e-06,
      "loss": 0.0484,
      "step": 285300
    },
    {
      "epoch": 0.4669324378285318,
      "grad_norm": 1.6381431818008423,
      "learning_rate": 9.125873895481771e-06,
      "loss": 0.0389,
      "step": 285320
    },
    {
      "epoch": 0.4669651682671851,
      "grad_norm": 1.8705800771713257,
      "learning_rate": 9.125808003268255e-06,
      "loss": 0.0387,
      "step": 285340
    },
    {
      "epoch": 0.46699789870583847,
      "grad_norm": 2.343040704727173,
      "learning_rate": 9.125742111054737e-06,
      "loss": 0.0372,
      "step": 285360
    },
    {
      "epoch": 0.4670306291444918,
      "grad_norm": 0.4605591595172882,
      "learning_rate": 9.12567621884122e-06,
      "loss": 0.0486,
      "step": 285380
    },
    {
      "epoch": 0.46706335958314515,
      "grad_norm": 0.35891973972320557,
      "learning_rate": 9.125610326627702e-06,
      "loss": 0.0606,
      "step": 285400
    },
    {
      "epoch": 0.46709609002179847,
      "grad_norm": 2.426398754119873,
      "learning_rate": 9.125544434414186e-06,
      "loss": 0.0432,
      "step": 285420
    },
    {
      "epoch": 0.4671288204604518,
      "grad_norm": 0.9534787535667419,
      "learning_rate": 9.12547854220067e-06,
      "loss": 0.0437,
      "step": 285440
    },
    {
      "epoch": 0.46716155089910516,
      "grad_norm": 0.5927351713180542,
      "learning_rate": 9.125412649987151e-06,
      "loss": 0.0439,
      "step": 285460
    },
    {
      "epoch": 0.4671942813377585,
      "grad_norm": 1.048755407333374,
      "learning_rate": 9.125346757773635e-06,
      "loss": 0.0393,
      "step": 285480
    },
    {
      "epoch": 0.46722701177641185,
      "grad_norm": 1.8052974939346313,
      "learning_rate": 9.125280865560118e-06,
      "loss": 0.0394,
      "step": 285500
    },
    {
      "epoch": 0.46725974221506517,
      "grad_norm": 1.407776951789856,
      "learning_rate": 9.1252149733466e-06,
      "loss": 0.0321,
      "step": 285520
    },
    {
      "epoch": 0.4672924726537185,
      "grad_norm": 2.152576208114624,
      "learning_rate": 9.125149081133084e-06,
      "loss": 0.0431,
      "step": 285540
    },
    {
      "epoch": 0.46732520309237185,
      "grad_norm": 2.0513269901275635,
      "learning_rate": 9.125083188919566e-06,
      "loss": 0.0478,
      "step": 285560
    },
    {
      "epoch": 0.46735793353102517,
      "grad_norm": 1.0482598543167114,
      "learning_rate": 9.12501729670605e-06,
      "loss": 0.0558,
      "step": 285580
    },
    {
      "epoch": 0.46739066396967854,
      "grad_norm": 1.8458442687988281,
      "learning_rate": 9.124951404492531e-06,
      "loss": 0.0351,
      "step": 285600
    },
    {
      "epoch": 0.46742339440833186,
      "grad_norm": 1.4820220470428467,
      "learning_rate": 9.124885512279015e-06,
      "loss": 0.0473,
      "step": 285620
    },
    {
      "epoch": 0.4674561248469852,
      "grad_norm": 1.0616371631622314,
      "learning_rate": 9.124819620065498e-06,
      "loss": 0.0312,
      "step": 285640
    },
    {
      "epoch": 0.46748885528563855,
      "grad_norm": 0.5730849504470825,
      "learning_rate": 9.12475372785198e-06,
      "loss": 0.0302,
      "step": 285660
    },
    {
      "epoch": 0.46752158572429187,
      "grad_norm": 0.5550984740257263,
      "learning_rate": 9.124687835638464e-06,
      "loss": 0.0388,
      "step": 285680
    },
    {
      "epoch": 0.46755431616294524,
      "grad_norm": 1.9010119438171387,
      "learning_rate": 9.124621943424946e-06,
      "loss": 0.0441,
      "step": 285700
    },
    {
      "epoch": 0.46758704660159855,
      "grad_norm": 1.1543033123016357,
      "learning_rate": 9.12455605121143e-06,
      "loss": 0.046,
      "step": 285720
    },
    {
      "epoch": 0.46761977704025187,
      "grad_norm": 0.4557934105396271,
      "learning_rate": 9.124490158997911e-06,
      "loss": 0.03,
      "step": 285740
    },
    {
      "epoch": 0.46765250747890524,
      "grad_norm": 1.1147465705871582,
      "learning_rate": 9.124424266784395e-06,
      "loss": 0.041,
      "step": 285760
    },
    {
      "epoch": 0.46768523791755856,
      "grad_norm": 0.3064897954463959,
      "learning_rate": 9.124358374570877e-06,
      "loss": 0.0376,
      "step": 285780
    },
    {
      "epoch": 0.46771796835621193,
      "grad_norm": 5.222365856170654,
      "learning_rate": 9.12429248235736e-06,
      "loss": 0.0507,
      "step": 285800
    },
    {
      "epoch": 0.46775069879486525,
      "grad_norm": 1.3444595336914062,
      "learning_rate": 9.124226590143844e-06,
      "loss": 0.0407,
      "step": 285820
    },
    {
      "epoch": 0.46778342923351857,
      "grad_norm": 1.3559132814407349,
      "learning_rate": 9.124160697930326e-06,
      "loss": 0.0442,
      "step": 285840
    },
    {
      "epoch": 0.46781615967217194,
      "grad_norm": 0.5328594446182251,
      "learning_rate": 9.12409480571681e-06,
      "loss": 0.0593,
      "step": 285860
    },
    {
      "epoch": 0.46784889011082526,
      "grad_norm": 1.1045746803283691,
      "learning_rate": 9.124028913503293e-06,
      "loss": 0.0277,
      "step": 285880
    },
    {
      "epoch": 0.4678816205494786,
      "grad_norm": 2.4997994899749756,
      "learning_rate": 9.123963021289775e-06,
      "loss": 0.0485,
      "step": 285900
    },
    {
      "epoch": 0.46791435098813194,
      "grad_norm": 0.5340875387191772,
      "learning_rate": 9.123897129076258e-06,
      "loss": 0.0425,
      "step": 285920
    },
    {
      "epoch": 0.46794708142678526,
      "grad_norm": 2.070723533630371,
      "learning_rate": 9.12383123686274e-06,
      "loss": 0.0527,
      "step": 285940
    },
    {
      "epoch": 0.46797981186543863,
      "grad_norm": 3.3732516765594482,
      "learning_rate": 9.123765344649224e-06,
      "loss": 0.0461,
      "step": 285960
    },
    {
      "epoch": 0.46801254230409195,
      "grad_norm": 2.5171453952789307,
      "learning_rate": 9.123699452435706e-06,
      "loss": 0.033,
      "step": 285980
    },
    {
      "epoch": 0.4680452727427453,
      "grad_norm": 0.5739578604698181,
      "learning_rate": 9.12363356022219e-06,
      "loss": 0.0512,
      "step": 286000
    },
    {
      "epoch": 0.46807800318139864,
      "grad_norm": 1.406793475151062,
      "learning_rate": 9.123567668008673e-06,
      "loss": 0.0428,
      "step": 286020
    },
    {
      "epoch": 0.46811073362005196,
      "grad_norm": 1.0246009826660156,
      "learning_rate": 9.123501775795155e-06,
      "loss": 0.0284,
      "step": 286040
    },
    {
      "epoch": 0.4681434640587053,
      "grad_norm": 1.2213248014450073,
      "learning_rate": 9.123435883581638e-06,
      "loss": 0.0549,
      "step": 286060
    },
    {
      "epoch": 0.46817619449735864,
      "grad_norm": 1.4431405067443848,
      "learning_rate": 9.12336999136812e-06,
      "loss": 0.0449,
      "step": 286080
    },
    {
      "epoch": 0.468208924936012,
      "grad_norm": 1.0546540021896362,
      "learning_rate": 9.123304099154604e-06,
      "loss": 0.0398,
      "step": 286100
    },
    {
      "epoch": 0.46824165537466533,
      "grad_norm": 2.5677084922790527,
      "learning_rate": 9.123238206941086e-06,
      "loss": 0.0311,
      "step": 286120
    },
    {
      "epoch": 0.46827438581331865,
      "grad_norm": 1.5838594436645508,
      "learning_rate": 9.12317231472757e-06,
      "loss": 0.0383,
      "step": 286140
    },
    {
      "epoch": 0.468307116251972,
      "grad_norm": 0.8141213655471802,
      "learning_rate": 9.123106422514051e-06,
      "loss": 0.0426,
      "step": 286160
    },
    {
      "epoch": 0.46833984669062534,
      "grad_norm": 2.7846364974975586,
      "learning_rate": 9.123040530300535e-06,
      "loss": 0.0328,
      "step": 286180
    },
    {
      "epoch": 0.4683725771292787,
      "grad_norm": 1.2067615985870361,
      "learning_rate": 9.122974638087019e-06,
      "loss": 0.0449,
      "step": 286200
    },
    {
      "epoch": 0.468405307567932,
      "grad_norm": 1.2169033288955688,
      "learning_rate": 9.1229087458735e-06,
      "loss": 0.0381,
      "step": 286220
    },
    {
      "epoch": 0.46843803800658534,
      "grad_norm": 0.7393982410430908,
      "learning_rate": 9.122842853659984e-06,
      "loss": 0.0524,
      "step": 286240
    },
    {
      "epoch": 0.4684707684452387,
      "grad_norm": 1.5571825504302979,
      "learning_rate": 9.122776961446468e-06,
      "loss": 0.0458,
      "step": 286260
    },
    {
      "epoch": 0.46850349888389203,
      "grad_norm": 1.0707813501358032,
      "learning_rate": 9.12271106923295e-06,
      "loss": 0.0416,
      "step": 286280
    },
    {
      "epoch": 0.4685362293225454,
      "grad_norm": 1.975435495376587,
      "learning_rate": 9.122645177019433e-06,
      "loss": 0.0367,
      "step": 286300
    },
    {
      "epoch": 0.4685689597611987,
      "grad_norm": 2.2027080059051514,
      "learning_rate": 9.122579284805915e-06,
      "loss": 0.0536,
      "step": 286320
    },
    {
      "epoch": 0.46860169019985204,
      "grad_norm": 2.200690269470215,
      "learning_rate": 9.122513392592399e-06,
      "loss": 0.0306,
      "step": 286340
    },
    {
      "epoch": 0.4686344206385054,
      "grad_norm": 0.6419016718864441,
      "learning_rate": 9.122447500378882e-06,
      "loss": 0.0348,
      "step": 286360
    },
    {
      "epoch": 0.46866715107715873,
      "grad_norm": 0.8991008400917053,
      "learning_rate": 9.122381608165364e-06,
      "loss": 0.0487,
      "step": 286380
    },
    {
      "epoch": 0.4686998815158121,
      "grad_norm": 1.1906793117523193,
      "learning_rate": 9.122315715951848e-06,
      "loss": 0.0418,
      "step": 286400
    },
    {
      "epoch": 0.4687326119544654,
      "grad_norm": 2.0845489501953125,
      "learning_rate": 9.12224982373833e-06,
      "loss": 0.0447,
      "step": 286420
    },
    {
      "epoch": 0.46876534239311873,
      "grad_norm": 0.6779146194458008,
      "learning_rate": 9.122183931524813e-06,
      "loss": 0.0319,
      "step": 286440
    },
    {
      "epoch": 0.4687980728317721,
      "grad_norm": 1.573196530342102,
      "learning_rate": 9.122118039311295e-06,
      "loss": 0.0508,
      "step": 286460
    },
    {
      "epoch": 0.4688308032704254,
      "grad_norm": 2.1739683151245117,
      "learning_rate": 9.122052147097779e-06,
      "loss": 0.0423,
      "step": 286480
    },
    {
      "epoch": 0.4688635337090788,
      "grad_norm": 0.9905170798301697,
      "learning_rate": 9.12198625488426e-06,
      "loss": 0.042,
      "step": 286500
    },
    {
      "epoch": 0.4688962641477321,
      "grad_norm": 1.4187456369400024,
      "learning_rate": 9.121920362670744e-06,
      "loss": 0.0301,
      "step": 286520
    },
    {
      "epoch": 0.46892899458638543,
      "grad_norm": 1.363783597946167,
      "learning_rate": 9.121854470457226e-06,
      "loss": 0.037,
      "step": 286540
    },
    {
      "epoch": 0.4689617250250388,
      "grad_norm": 2.317220449447632,
      "learning_rate": 9.12178857824371e-06,
      "loss": 0.049,
      "step": 286560
    },
    {
      "epoch": 0.4689944554636921,
      "grad_norm": 1.0561178922653198,
      "learning_rate": 9.121722686030191e-06,
      "loss": 0.053,
      "step": 286580
    },
    {
      "epoch": 0.4690271859023455,
      "grad_norm": 0.8255758285522461,
      "learning_rate": 9.121656793816675e-06,
      "loss": 0.0282,
      "step": 286600
    },
    {
      "epoch": 0.4690599163409988,
      "grad_norm": 2.1703381538391113,
      "learning_rate": 9.121590901603159e-06,
      "loss": 0.0369,
      "step": 286620
    },
    {
      "epoch": 0.4690926467796521,
      "grad_norm": 1.627601981163025,
      "learning_rate": 9.12152500938964e-06,
      "loss": 0.0476,
      "step": 286640
    },
    {
      "epoch": 0.4691253772183055,
      "grad_norm": 1.3850420713424683,
      "learning_rate": 9.121459117176124e-06,
      "loss": 0.0394,
      "step": 286660
    },
    {
      "epoch": 0.4691581076569588,
      "grad_norm": 0.6817646622657776,
      "learning_rate": 9.121393224962608e-06,
      "loss": 0.0317,
      "step": 286680
    },
    {
      "epoch": 0.4691908380956122,
      "grad_norm": 2.0981030464172363,
      "learning_rate": 9.12132733274909e-06,
      "loss": 0.0552,
      "step": 286700
    },
    {
      "epoch": 0.4692235685342655,
      "grad_norm": 0.20267321169376373,
      "learning_rate": 9.121261440535573e-06,
      "loss": 0.0329,
      "step": 286720
    },
    {
      "epoch": 0.4692562989729188,
      "grad_norm": 0.8232319355010986,
      "learning_rate": 9.121195548322057e-06,
      "loss": 0.0299,
      "step": 286740
    },
    {
      "epoch": 0.4692890294115722,
      "grad_norm": 3.1821682453155518,
      "learning_rate": 9.121129656108539e-06,
      "loss": 0.0485,
      "step": 286760
    },
    {
      "epoch": 0.4693217598502255,
      "grad_norm": 1.4259915351867676,
      "learning_rate": 9.121063763895022e-06,
      "loss": 0.0467,
      "step": 286780
    },
    {
      "epoch": 0.4693544902888789,
      "grad_norm": 2.1110212802886963,
      "learning_rate": 9.120997871681504e-06,
      "loss": 0.0411,
      "step": 286800
    },
    {
      "epoch": 0.4693872207275322,
      "grad_norm": 1.851975440979004,
      "learning_rate": 9.120931979467988e-06,
      "loss": 0.0423,
      "step": 286820
    },
    {
      "epoch": 0.4694199511661855,
      "grad_norm": 0.30622634291648865,
      "learning_rate": 9.12086608725447e-06,
      "loss": 0.0417,
      "step": 286840
    },
    {
      "epoch": 0.4694526816048389,
      "grad_norm": 1.0998300313949585,
      "learning_rate": 9.120800195040953e-06,
      "loss": 0.0373,
      "step": 286860
    },
    {
      "epoch": 0.4694854120434922,
      "grad_norm": 2.8233091831207275,
      "learning_rate": 9.120734302827435e-06,
      "loss": 0.0409,
      "step": 286880
    },
    {
      "epoch": 0.4695181424821456,
      "grad_norm": 2.009432554244995,
      "learning_rate": 9.120668410613919e-06,
      "loss": 0.0376,
      "step": 286900
    },
    {
      "epoch": 0.4695508729207989,
      "grad_norm": 0.3387524485588074,
      "learning_rate": 9.1206025184004e-06,
      "loss": 0.043,
      "step": 286920
    },
    {
      "epoch": 0.4695836033594522,
      "grad_norm": 0.41342660784721375,
      "learning_rate": 9.120536626186884e-06,
      "loss": 0.0414,
      "step": 286940
    },
    {
      "epoch": 0.4696163337981056,
      "grad_norm": 0.5974715948104858,
      "learning_rate": 9.120470733973366e-06,
      "loss": 0.031,
      "step": 286960
    },
    {
      "epoch": 0.4696490642367589,
      "grad_norm": 3.562305450439453,
      "learning_rate": 9.12040484175985e-06,
      "loss": 0.0449,
      "step": 286980
    },
    {
      "epoch": 0.46968179467541227,
      "grad_norm": 1.4478411674499512,
      "learning_rate": 9.120338949546333e-06,
      "loss": 0.0518,
      "step": 287000
    },
    {
      "epoch": 0.4697145251140656,
      "grad_norm": 1.0628533363342285,
      "learning_rate": 9.120273057332815e-06,
      "loss": 0.0488,
      "step": 287020
    },
    {
      "epoch": 0.4697472555527189,
      "grad_norm": 0.5264691114425659,
      "learning_rate": 9.120207165119299e-06,
      "loss": 0.0438,
      "step": 287040
    },
    {
      "epoch": 0.4697799859913723,
      "grad_norm": 0.6107372045516968,
      "learning_rate": 9.120141272905782e-06,
      "loss": 0.0421,
      "step": 287060
    },
    {
      "epoch": 0.4698127164300256,
      "grad_norm": 1.512771487236023,
      "learning_rate": 9.120075380692264e-06,
      "loss": 0.0372,
      "step": 287080
    },
    {
      "epoch": 0.4698454468686789,
      "grad_norm": 0.6815367341041565,
      "learning_rate": 9.120009488478748e-06,
      "loss": 0.0386,
      "step": 287100
    },
    {
      "epoch": 0.4698781773073323,
      "grad_norm": 2.0375442504882812,
      "learning_rate": 9.119943596265231e-06,
      "loss": 0.051,
      "step": 287120
    },
    {
      "epoch": 0.4699109077459856,
      "grad_norm": 1.6398348808288574,
      "learning_rate": 9.119877704051713e-06,
      "loss": 0.04,
      "step": 287140
    },
    {
      "epoch": 0.46994363818463897,
      "grad_norm": 1.8186200857162476,
      "learning_rate": 9.119811811838197e-06,
      "loss": 0.034,
      "step": 287160
    },
    {
      "epoch": 0.4699763686232923,
      "grad_norm": 0.3691413998603821,
      "learning_rate": 9.119745919624679e-06,
      "loss": 0.0399,
      "step": 287180
    },
    {
      "epoch": 0.4700090990619456,
      "grad_norm": 1.9293291568756104,
      "learning_rate": 9.119680027411162e-06,
      "loss": 0.0475,
      "step": 287200
    },
    {
      "epoch": 0.470041829500599,
      "grad_norm": 1.9353275299072266,
      "learning_rate": 9.119614135197644e-06,
      "loss": 0.0416,
      "step": 287220
    },
    {
      "epoch": 0.4700745599392523,
      "grad_norm": 1.3895446062088013,
      "learning_rate": 9.119548242984128e-06,
      "loss": 0.0347,
      "step": 287240
    },
    {
      "epoch": 0.47010729037790566,
      "grad_norm": 1.5396536588668823,
      "learning_rate": 9.11948235077061e-06,
      "loss": 0.0305,
      "step": 287260
    },
    {
      "epoch": 0.470140020816559,
      "grad_norm": 1.3768508434295654,
      "learning_rate": 9.119416458557093e-06,
      "loss": 0.0442,
      "step": 287280
    },
    {
      "epoch": 0.4701727512552123,
      "grad_norm": 1.4605700969696045,
      "learning_rate": 9.119350566343575e-06,
      "loss": 0.0425,
      "step": 287300
    },
    {
      "epoch": 0.47020548169386567,
      "grad_norm": 0.6060217022895813,
      "learning_rate": 9.119284674130059e-06,
      "loss": 0.0504,
      "step": 287320
    },
    {
      "epoch": 0.470238212132519,
      "grad_norm": 2.7678442001342773,
      "learning_rate": 9.11921878191654e-06,
      "loss": 0.0322,
      "step": 287340
    },
    {
      "epoch": 0.47027094257117236,
      "grad_norm": 0.6771437525749207,
      "learning_rate": 9.119152889703024e-06,
      "loss": 0.0415,
      "step": 287360
    },
    {
      "epoch": 0.4703036730098257,
      "grad_norm": 0.6961469054222107,
      "learning_rate": 9.119086997489506e-06,
      "loss": 0.0473,
      "step": 287380
    },
    {
      "epoch": 0.470336403448479,
      "grad_norm": 1.0609179735183716,
      "learning_rate": 9.11902110527599e-06,
      "loss": 0.0399,
      "step": 287400
    },
    {
      "epoch": 0.47036913388713236,
      "grad_norm": 0.9072968363761902,
      "learning_rate": 9.118955213062473e-06,
      "loss": 0.038,
      "step": 287420
    },
    {
      "epoch": 0.4704018643257857,
      "grad_norm": 0.712417721748352,
      "learning_rate": 9.118889320848955e-06,
      "loss": 0.0382,
      "step": 287440
    },
    {
      "epoch": 0.47043459476443905,
      "grad_norm": 2.6282222270965576,
      "learning_rate": 9.118823428635439e-06,
      "loss": 0.044,
      "step": 287460
    },
    {
      "epoch": 0.47046732520309237,
      "grad_norm": 2.03480863571167,
      "learning_rate": 9.118757536421922e-06,
      "loss": 0.0424,
      "step": 287480
    },
    {
      "epoch": 0.4705000556417457,
      "grad_norm": 1.8844205141067505,
      "learning_rate": 9.118691644208404e-06,
      "loss": 0.0385,
      "step": 287500
    },
    {
      "epoch": 0.47053278608039906,
      "grad_norm": 8.371224403381348,
      "learning_rate": 9.118625751994888e-06,
      "loss": 0.0393,
      "step": 287520
    },
    {
      "epoch": 0.4705655165190524,
      "grad_norm": 0.18358218669891357,
      "learning_rate": 9.118559859781371e-06,
      "loss": 0.028,
      "step": 287540
    },
    {
      "epoch": 0.47059824695770575,
      "grad_norm": 0.9707417488098145,
      "learning_rate": 9.118493967567853e-06,
      "loss": 0.038,
      "step": 287560
    },
    {
      "epoch": 0.47063097739635906,
      "grad_norm": 2.658597469329834,
      "learning_rate": 9.118428075354337e-06,
      "loss": 0.0399,
      "step": 287580
    },
    {
      "epoch": 0.4706637078350124,
      "grad_norm": 1.9683011770248413,
      "learning_rate": 9.118362183140819e-06,
      "loss": 0.0501,
      "step": 287600
    },
    {
      "epoch": 0.47069643827366575,
      "grad_norm": 1.615002155303955,
      "learning_rate": 9.118296290927302e-06,
      "loss": 0.0525,
      "step": 287620
    },
    {
      "epoch": 0.47072916871231907,
      "grad_norm": 2.1061062812805176,
      "learning_rate": 9.118230398713784e-06,
      "loss": 0.0507,
      "step": 287640
    },
    {
      "epoch": 0.47076189915097244,
      "grad_norm": 2.1793994903564453,
      "learning_rate": 9.118164506500268e-06,
      "loss": 0.0327,
      "step": 287660
    },
    {
      "epoch": 0.47079462958962576,
      "grad_norm": 9.816680908203125,
      "learning_rate": 9.11809861428675e-06,
      "loss": 0.0389,
      "step": 287680
    },
    {
      "epoch": 0.4708273600282791,
      "grad_norm": 1.7326589822769165,
      "learning_rate": 9.118032722073233e-06,
      "loss": 0.0323,
      "step": 287700
    },
    {
      "epoch": 0.47086009046693245,
      "grad_norm": 1.2812414169311523,
      "learning_rate": 9.117966829859715e-06,
      "loss": 0.0403,
      "step": 287720
    },
    {
      "epoch": 0.47089282090558576,
      "grad_norm": 2.1387786865234375,
      "learning_rate": 9.117900937646199e-06,
      "loss": 0.0519,
      "step": 287740
    },
    {
      "epoch": 0.47092555134423913,
      "grad_norm": 5.268518924713135,
      "learning_rate": 9.117835045432682e-06,
      "loss": 0.031,
      "step": 287760
    },
    {
      "epoch": 0.47095828178289245,
      "grad_norm": 0.5532621145248413,
      "learning_rate": 9.117769153219164e-06,
      "loss": 0.0286,
      "step": 287780
    },
    {
      "epoch": 0.47099101222154577,
      "grad_norm": 1.9776531457901,
      "learning_rate": 9.117703261005648e-06,
      "loss": 0.0386,
      "step": 287800
    },
    {
      "epoch": 0.47102374266019914,
      "grad_norm": 1.0383516550064087,
      "learning_rate": 9.11763736879213e-06,
      "loss": 0.0367,
      "step": 287820
    },
    {
      "epoch": 0.47105647309885246,
      "grad_norm": 0.727394163608551,
      "learning_rate": 9.117571476578613e-06,
      "loss": 0.0365,
      "step": 287840
    },
    {
      "epoch": 0.47108920353750583,
      "grad_norm": 1.2409723997116089,
      "learning_rate": 9.117505584365097e-06,
      "loss": 0.0286,
      "step": 287860
    },
    {
      "epoch": 0.47112193397615915,
      "grad_norm": 1.0921953916549683,
      "learning_rate": 9.117439692151579e-06,
      "loss": 0.0397,
      "step": 287880
    },
    {
      "epoch": 0.47115466441481246,
      "grad_norm": 2.392244815826416,
      "learning_rate": 9.117373799938062e-06,
      "loss": 0.0436,
      "step": 287900
    },
    {
      "epoch": 0.47118739485346584,
      "grad_norm": 1.034806728363037,
      "learning_rate": 9.117307907724546e-06,
      "loss": 0.0416,
      "step": 287920
    },
    {
      "epoch": 0.47122012529211915,
      "grad_norm": 3.4825704097747803,
      "learning_rate": 9.117242015511028e-06,
      "loss": 0.0433,
      "step": 287940
    },
    {
      "epoch": 0.4712528557307725,
      "grad_norm": 0.5717702507972717,
      "learning_rate": 9.117176123297511e-06,
      "loss": 0.0394,
      "step": 287960
    },
    {
      "epoch": 0.47128558616942584,
      "grad_norm": 1.0622659921646118,
      "learning_rate": 9.117110231083993e-06,
      "loss": 0.0386,
      "step": 287980
    },
    {
      "epoch": 0.47131831660807916,
      "grad_norm": 1.2607966661453247,
      "learning_rate": 9.117044338870477e-06,
      "loss": 0.0304,
      "step": 288000
    },
    {
      "epoch": 0.47135104704673253,
      "grad_norm": 0.6932875514030457,
      "learning_rate": 9.116978446656959e-06,
      "loss": 0.0239,
      "step": 288020
    },
    {
      "epoch": 0.47138377748538585,
      "grad_norm": 0.8764803409576416,
      "learning_rate": 9.116912554443442e-06,
      "loss": 0.0332,
      "step": 288040
    },
    {
      "epoch": 0.4714165079240392,
      "grad_norm": 0.5627075433731079,
      "learning_rate": 9.116846662229924e-06,
      "loss": 0.0371,
      "step": 288060
    },
    {
      "epoch": 0.47144923836269254,
      "grad_norm": 1.5342999696731567,
      "learning_rate": 9.116780770016408e-06,
      "loss": 0.0414,
      "step": 288080
    },
    {
      "epoch": 0.47148196880134585,
      "grad_norm": 1.537133812904358,
      "learning_rate": 9.116714877802891e-06,
      "loss": 0.0446,
      "step": 288100
    },
    {
      "epoch": 0.4715146992399992,
      "grad_norm": 0.7122849225997925,
      "learning_rate": 9.116648985589373e-06,
      "loss": 0.0426,
      "step": 288120
    },
    {
      "epoch": 0.47154742967865254,
      "grad_norm": 1.428636908531189,
      "learning_rate": 9.116583093375857e-06,
      "loss": 0.0373,
      "step": 288140
    },
    {
      "epoch": 0.4715801601173059,
      "grad_norm": 2.4909565448760986,
      "learning_rate": 9.116517201162339e-06,
      "loss": 0.0438,
      "step": 288160
    },
    {
      "epoch": 0.47161289055595923,
      "grad_norm": 0.3515729010105133,
      "learning_rate": 9.116451308948822e-06,
      "loss": 0.036,
      "step": 288180
    },
    {
      "epoch": 0.47164562099461255,
      "grad_norm": 0.4376154839992523,
      "learning_rate": 9.116385416735304e-06,
      "loss": 0.0386,
      "step": 288200
    },
    {
      "epoch": 0.4716783514332659,
      "grad_norm": 1.6545625925064087,
      "learning_rate": 9.116319524521788e-06,
      "loss": 0.0503,
      "step": 288220
    },
    {
      "epoch": 0.47171108187191924,
      "grad_norm": 2.9982473850250244,
      "learning_rate": 9.11625363230827e-06,
      "loss": 0.0402,
      "step": 288240
    },
    {
      "epoch": 0.4717438123105726,
      "grad_norm": 1.3655728101730347,
      "learning_rate": 9.116187740094753e-06,
      "loss": 0.0447,
      "step": 288260
    },
    {
      "epoch": 0.4717765427492259,
      "grad_norm": 2.30122971534729,
      "learning_rate": 9.116121847881237e-06,
      "loss": 0.0484,
      "step": 288280
    },
    {
      "epoch": 0.47180927318787924,
      "grad_norm": 0.9277076721191406,
      "learning_rate": 9.116055955667719e-06,
      "loss": 0.0389,
      "step": 288300
    },
    {
      "epoch": 0.4718420036265326,
      "grad_norm": 1.0471181869506836,
      "learning_rate": 9.115990063454202e-06,
      "loss": 0.0435,
      "step": 288320
    },
    {
      "epoch": 0.47187473406518593,
      "grad_norm": 3.524458646774292,
      "learning_rate": 9.115924171240686e-06,
      "loss": 0.049,
      "step": 288340
    },
    {
      "epoch": 0.4719074645038393,
      "grad_norm": 1.7802456617355347,
      "learning_rate": 9.115858279027168e-06,
      "loss": 0.0396,
      "step": 288360
    },
    {
      "epoch": 0.4719401949424926,
      "grad_norm": 1.2008005380630493,
      "learning_rate": 9.115792386813652e-06,
      "loss": 0.0398,
      "step": 288380
    },
    {
      "epoch": 0.47197292538114594,
      "grad_norm": 0.3431185483932495,
      "learning_rate": 9.115726494600133e-06,
      "loss": 0.0395,
      "step": 288400
    },
    {
      "epoch": 0.4720056558197993,
      "grad_norm": 1.6660854816436768,
      "learning_rate": 9.115660602386617e-06,
      "loss": 0.0537,
      "step": 288420
    },
    {
      "epoch": 0.4720383862584526,
      "grad_norm": 1.7296122312545776,
      "learning_rate": 9.115594710173099e-06,
      "loss": 0.0519,
      "step": 288440
    },
    {
      "epoch": 0.472071116697106,
      "grad_norm": 6.6674113273620605,
      "learning_rate": 9.115528817959582e-06,
      "loss": 0.0434,
      "step": 288460
    },
    {
      "epoch": 0.4721038471357593,
      "grad_norm": 3.1878015995025635,
      "learning_rate": 9.115462925746066e-06,
      "loss": 0.0365,
      "step": 288480
    },
    {
      "epoch": 0.47213657757441263,
      "grad_norm": 1.546344518661499,
      "learning_rate": 9.115397033532548e-06,
      "loss": 0.0392,
      "step": 288500
    },
    {
      "epoch": 0.472169308013066,
      "grad_norm": 1.3227571249008179,
      "learning_rate": 9.115331141319032e-06,
      "loss": 0.0334,
      "step": 288520
    },
    {
      "epoch": 0.4722020384517193,
      "grad_norm": 0.8309344053268433,
      "learning_rate": 9.115265249105513e-06,
      "loss": 0.036,
      "step": 288540
    },
    {
      "epoch": 0.4722347688903727,
      "grad_norm": 1.4721816778182983,
      "learning_rate": 9.115199356891997e-06,
      "loss": 0.0628,
      "step": 288560
    },
    {
      "epoch": 0.472267499329026,
      "grad_norm": 0.8491482138633728,
      "learning_rate": 9.115133464678479e-06,
      "loss": 0.0329,
      "step": 288580
    },
    {
      "epoch": 0.4723002297676793,
      "grad_norm": 1.0346308946609497,
      "learning_rate": 9.115067572464963e-06,
      "loss": 0.041,
      "step": 288600
    },
    {
      "epoch": 0.4723329602063327,
      "grad_norm": 3.4405100345611572,
      "learning_rate": 9.115001680251444e-06,
      "loss": 0.0464,
      "step": 288620
    },
    {
      "epoch": 0.472365690644986,
      "grad_norm": 1.0135200023651123,
      "learning_rate": 9.114935788037928e-06,
      "loss": 0.0476,
      "step": 288640
    },
    {
      "epoch": 0.4723984210836394,
      "grad_norm": 2.248300075531006,
      "learning_rate": 9.114869895824412e-06,
      "loss": 0.0551,
      "step": 288660
    },
    {
      "epoch": 0.4724311515222927,
      "grad_norm": 1.763096570968628,
      "learning_rate": 9.114804003610893e-06,
      "loss": 0.0328,
      "step": 288680
    },
    {
      "epoch": 0.472463881960946,
      "grad_norm": 0.9254676103591919,
      "learning_rate": 9.114738111397377e-06,
      "loss": 0.0379,
      "step": 288700
    },
    {
      "epoch": 0.4724966123995994,
      "grad_norm": 1.2926969528198242,
      "learning_rate": 9.11467221918386e-06,
      "loss": 0.0415,
      "step": 288720
    },
    {
      "epoch": 0.4725293428382527,
      "grad_norm": 2.4843695163726807,
      "learning_rate": 9.114606326970343e-06,
      "loss": 0.0471,
      "step": 288740
    },
    {
      "epoch": 0.4725620732769061,
      "grad_norm": 0.9225667715072632,
      "learning_rate": 9.114540434756826e-06,
      "loss": 0.0375,
      "step": 288760
    },
    {
      "epoch": 0.4725948037155594,
      "grad_norm": 0.929009735584259,
      "learning_rate": 9.114474542543308e-06,
      "loss": 0.0412,
      "step": 288780
    },
    {
      "epoch": 0.4726275341542127,
      "grad_norm": 1.3493999242782593,
      "learning_rate": 9.114408650329792e-06,
      "loss": 0.0384,
      "step": 288800
    },
    {
      "epoch": 0.4726602645928661,
      "grad_norm": 1.2988439798355103,
      "learning_rate": 9.114342758116275e-06,
      "loss": 0.0473,
      "step": 288820
    },
    {
      "epoch": 0.4726929950315194,
      "grad_norm": 2.2199809551239014,
      "learning_rate": 9.114276865902757e-06,
      "loss": 0.0456,
      "step": 288840
    },
    {
      "epoch": 0.4727257254701728,
      "grad_norm": 0.8055204749107361,
      "learning_rate": 9.11421097368924e-06,
      "loss": 0.0337,
      "step": 288860
    },
    {
      "epoch": 0.4727584559088261,
      "grad_norm": 1.1082720756530762,
      "learning_rate": 9.114145081475723e-06,
      "loss": 0.0452,
      "step": 288880
    },
    {
      "epoch": 0.4727911863474794,
      "grad_norm": 3.7992050647735596,
      "learning_rate": 9.114079189262206e-06,
      "loss": 0.0418,
      "step": 288900
    },
    {
      "epoch": 0.4728239167861328,
      "grad_norm": 1.2363905906677246,
      "learning_rate": 9.114013297048688e-06,
      "loss": 0.0425,
      "step": 288920
    },
    {
      "epoch": 0.4728566472247861,
      "grad_norm": 1.9073704481124878,
      "learning_rate": 9.113947404835172e-06,
      "loss": 0.0356,
      "step": 288940
    },
    {
      "epoch": 0.47288937766343947,
      "grad_norm": 2.447190523147583,
      "learning_rate": 9.113881512621654e-06,
      "loss": 0.049,
      "step": 288960
    },
    {
      "epoch": 0.4729221081020928,
      "grad_norm": 2.469655990600586,
      "learning_rate": 9.113815620408137e-06,
      "loss": 0.0397,
      "step": 288980
    },
    {
      "epoch": 0.4729548385407461,
      "grad_norm": 1.6844974756240845,
      "learning_rate": 9.113749728194619e-06,
      "loss": 0.0442,
      "step": 289000
    },
    {
      "epoch": 0.4729875689793995,
      "grad_norm": 1.8560930490493774,
      "learning_rate": 9.113683835981103e-06,
      "loss": 0.0482,
      "step": 289020
    },
    {
      "epoch": 0.4730202994180528,
      "grad_norm": 0.6796895861625671,
      "learning_rate": 9.113617943767586e-06,
      "loss": 0.0469,
      "step": 289040
    },
    {
      "epoch": 0.47305302985670616,
      "grad_norm": 0.1598690003156662,
      "learning_rate": 9.113552051554068e-06,
      "loss": 0.0567,
      "step": 289060
    },
    {
      "epoch": 0.4730857602953595,
      "grad_norm": 0.7048044800758362,
      "learning_rate": 9.113486159340552e-06,
      "loss": 0.0327,
      "step": 289080
    },
    {
      "epoch": 0.4731184907340128,
      "grad_norm": 1.2481153011322021,
      "learning_rate": 9.113420267127035e-06,
      "loss": 0.0308,
      "step": 289100
    },
    {
      "epoch": 0.47315122117266617,
      "grad_norm": 2.1464040279388428,
      "learning_rate": 9.113354374913517e-06,
      "loss": 0.0369,
      "step": 289120
    },
    {
      "epoch": 0.4731839516113195,
      "grad_norm": 1.6685165166854858,
      "learning_rate": 9.1132884827e-06,
      "loss": 0.0392,
      "step": 289140
    },
    {
      "epoch": 0.47321668204997286,
      "grad_norm": 2.033238172531128,
      "learning_rate": 9.113222590486484e-06,
      "loss": 0.0464,
      "step": 289160
    },
    {
      "epoch": 0.4732494124886262,
      "grad_norm": 3.376650810241699,
      "learning_rate": 9.113156698272966e-06,
      "loss": 0.0455,
      "step": 289180
    },
    {
      "epoch": 0.4732821429272795,
      "grad_norm": 1.0909264087677002,
      "learning_rate": 9.11309080605945e-06,
      "loss": 0.0392,
      "step": 289200
    },
    {
      "epoch": 0.47331487336593286,
      "grad_norm": 1.7101287841796875,
      "learning_rate": 9.113024913845932e-06,
      "loss": 0.0412,
      "step": 289220
    },
    {
      "epoch": 0.4733476038045862,
      "grad_norm": 1.727566123008728,
      "learning_rate": 9.112959021632415e-06,
      "loss": 0.0489,
      "step": 289240
    },
    {
      "epoch": 0.47338033424323955,
      "grad_norm": 2.218491554260254,
      "learning_rate": 9.112893129418897e-06,
      "loss": 0.044,
      "step": 289260
    },
    {
      "epoch": 0.47341306468189287,
      "grad_norm": 2.6204352378845215,
      "learning_rate": 9.11282723720538e-06,
      "loss": 0.047,
      "step": 289280
    },
    {
      "epoch": 0.4734457951205462,
      "grad_norm": 2.3045520782470703,
      "learning_rate": 9.112761344991863e-06,
      "loss": 0.0492,
      "step": 289300
    },
    {
      "epoch": 0.47347852555919956,
      "grad_norm": 2.311549425125122,
      "learning_rate": 9.112695452778346e-06,
      "loss": 0.0456,
      "step": 289320
    },
    {
      "epoch": 0.4735112559978529,
      "grad_norm": 0.8460783362388611,
      "learning_rate": 9.112629560564828e-06,
      "loss": 0.0613,
      "step": 289340
    },
    {
      "epoch": 0.47354398643650625,
      "grad_norm": 1.4664973020553589,
      "learning_rate": 9.112563668351312e-06,
      "loss": 0.0441,
      "step": 289360
    },
    {
      "epoch": 0.47357671687515956,
      "grad_norm": 2.0707175731658936,
      "learning_rate": 9.112497776137794e-06,
      "loss": 0.0338,
      "step": 289380
    },
    {
      "epoch": 0.4736094473138129,
      "grad_norm": 0.5961049199104309,
      "learning_rate": 9.112431883924277e-06,
      "loss": 0.0504,
      "step": 289400
    },
    {
      "epoch": 0.47364217775246625,
      "grad_norm": 0.8625052571296692,
      "learning_rate": 9.112365991710759e-06,
      "loss": 0.0408,
      "step": 289420
    },
    {
      "epoch": 0.47367490819111957,
      "grad_norm": 0.7244458794593811,
      "learning_rate": 9.112300099497243e-06,
      "loss": 0.0556,
      "step": 289440
    },
    {
      "epoch": 0.47370763862977294,
      "grad_norm": 5.376404285430908,
      "learning_rate": 9.112234207283726e-06,
      "loss": 0.0323,
      "step": 289460
    },
    {
      "epoch": 0.47374036906842626,
      "grad_norm": 1.0468506813049316,
      "learning_rate": 9.112168315070208e-06,
      "loss": 0.0595,
      "step": 289480
    },
    {
      "epoch": 0.4737730995070796,
      "grad_norm": 0.24863141775131226,
      "learning_rate": 9.112102422856692e-06,
      "loss": 0.0437,
      "step": 289500
    },
    {
      "epoch": 0.47380582994573295,
      "grad_norm": 2.0396370887756348,
      "learning_rate": 9.112036530643175e-06,
      "loss": 0.047,
      "step": 289520
    },
    {
      "epoch": 0.47383856038438626,
      "grad_norm": 3.3254141807556152,
      "learning_rate": 9.111970638429657e-06,
      "loss": 0.0417,
      "step": 289540
    },
    {
      "epoch": 0.47387129082303964,
      "grad_norm": 2.588202476501465,
      "learning_rate": 9.11190474621614e-06,
      "loss": 0.0371,
      "step": 289560
    },
    {
      "epoch": 0.47390402126169295,
      "grad_norm": 4.166726589202881,
      "learning_rate": 9.111838854002624e-06,
      "loss": 0.0505,
      "step": 289580
    },
    {
      "epoch": 0.47393675170034627,
      "grad_norm": 1.0914331674575806,
      "learning_rate": 9.111772961789106e-06,
      "loss": 0.0437,
      "step": 289600
    },
    {
      "epoch": 0.47396948213899964,
      "grad_norm": 3.298198699951172,
      "learning_rate": 9.11170706957559e-06,
      "loss": 0.0432,
      "step": 289620
    },
    {
      "epoch": 0.47400221257765296,
      "grad_norm": 0.6185903549194336,
      "learning_rate": 9.111641177362072e-06,
      "loss": 0.0512,
      "step": 289640
    },
    {
      "epoch": 0.47403494301630633,
      "grad_norm": 3.6162946224212646,
      "learning_rate": 9.111575285148555e-06,
      "loss": 0.046,
      "step": 289660
    },
    {
      "epoch": 0.47406767345495965,
      "grad_norm": 1.4937729835510254,
      "learning_rate": 9.111509392935037e-06,
      "loss": 0.0404,
      "step": 289680
    },
    {
      "epoch": 0.47410040389361296,
      "grad_norm": 1.1390182971954346,
      "learning_rate": 9.11144350072152e-06,
      "loss": 0.0391,
      "step": 289700
    },
    {
      "epoch": 0.47413313433226634,
      "grad_norm": 0.6213340163230896,
      "learning_rate": 9.111377608508003e-06,
      "loss": 0.0437,
      "step": 289720
    },
    {
      "epoch": 0.47416586477091965,
      "grad_norm": 1.7431626319885254,
      "learning_rate": 9.111311716294486e-06,
      "loss": 0.0361,
      "step": 289740
    },
    {
      "epoch": 0.474198595209573,
      "grad_norm": 0.8825559616088867,
      "learning_rate": 9.111245824080968e-06,
      "loss": 0.0274,
      "step": 289760
    },
    {
      "epoch": 0.47423132564822634,
      "grad_norm": 0.9191168546676636,
      "learning_rate": 9.111179931867452e-06,
      "loss": 0.0352,
      "step": 289780
    },
    {
      "epoch": 0.47426405608687966,
      "grad_norm": 0.9842787384986877,
      "learning_rate": 9.111114039653934e-06,
      "loss": 0.0355,
      "step": 289800
    },
    {
      "epoch": 0.47429678652553303,
      "grad_norm": 0.4556737542152405,
      "learning_rate": 9.111048147440417e-06,
      "loss": 0.0477,
      "step": 289820
    },
    {
      "epoch": 0.47432951696418635,
      "grad_norm": 2.5408518314361572,
      "learning_rate": 9.110982255226901e-06,
      "loss": 0.04,
      "step": 289840
    },
    {
      "epoch": 0.47436224740283967,
      "grad_norm": 1.2164733409881592,
      "learning_rate": 9.110916363013383e-06,
      "loss": 0.0589,
      "step": 289860
    },
    {
      "epoch": 0.47439497784149304,
      "grad_norm": 1.9603211879730225,
      "learning_rate": 9.110850470799866e-06,
      "loss": 0.0406,
      "step": 289880
    },
    {
      "epoch": 0.47442770828014635,
      "grad_norm": 2.095583438873291,
      "learning_rate": 9.11078457858635e-06,
      "loss": 0.0278,
      "step": 289900
    },
    {
      "epoch": 0.4744604387187997,
      "grad_norm": 1.497512698173523,
      "learning_rate": 9.110718686372832e-06,
      "loss": 0.034,
      "step": 289920
    },
    {
      "epoch": 0.47449316915745304,
      "grad_norm": 2.457366704940796,
      "learning_rate": 9.110652794159315e-06,
      "loss": 0.0485,
      "step": 289940
    },
    {
      "epoch": 0.47452589959610636,
      "grad_norm": 0.93975430727005,
      "learning_rate": 9.110586901945799e-06,
      "loss": 0.0426,
      "step": 289960
    },
    {
      "epoch": 0.47455863003475973,
      "grad_norm": 0.7588430643081665,
      "learning_rate": 9.110521009732281e-06,
      "loss": 0.0414,
      "step": 289980
    },
    {
      "epoch": 0.47459136047341305,
      "grad_norm": 1.6374729871749878,
      "learning_rate": 9.110455117518764e-06,
      "loss": 0.0351,
      "step": 290000
    },
    {
      "epoch": 0.4746240909120664,
      "grad_norm": 1.2329998016357422,
      "learning_rate": 9.110389225305246e-06,
      "loss": 0.0516,
      "step": 290020
    },
    {
      "epoch": 0.47465682135071974,
      "grad_norm": 1.7323957681655884,
      "learning_rate": 9.11032333309173e-06,
      "loss": 0.0278,
      "step": 290040
    },
    {
      "epoch": 0.47468955178937305,
      "grad_norm": 1.995503544807434,
      "learning_rate": 9.110257440878212e-06,
      "loss": 0.0446,
      "step": 290060
    },
    {
      "epoch": 0.4747222822280264,
      "grad_norm": 0.5629022717475891,
      "learning_rate": 9.110191548664695e-06,
      "loss": 0.0422,
      "step": 290080
    },
    {
      "epoch": 0.47475501266667974,
      "grad_norm": 1.3145548105239868,
      "learning_rate": 9.110125656451177e-06,
      "loss": 0.0373,
      "step": 290100
    },
    {
      "epoch": 0.4747877431053331,
      "grad_norm": 1.4821820259094238,
      "learning_rate": 9.110059764237661e-06,
      "loss": 0.0369,
      "step": 290120
    },
    {
      "epoch": 0.47482047354398643,
      "grad_norm": 0.8187030553817749,
      "learning_rate": 9.109993872024143e-06,
      "loss": 0.0319,
      "step": 290140
    },
    {
      "epoch": 0.47485320398263975,
      "grad_norm": 0.18507324159145355,
      "learning_rate": 9.109927979810626e-06,
      "loss": 0.0438,
      "step": 290160
    },
    {
      "epoch": 0.4748859344212931,
      "grad_norm": 0.9625216126441956,
      "learning_rate": 9.109862087597108e-06,
      "loss": 0.0478,
      "step": 290180
    },
    {
      "epoch": 0.47491866485994644,
      "grad_norm": 0.6266039609909058,
      "learning_rate": 9.109796195383592e-06,
      "loss": 0.0332,
      "step": 290200
    },
    {
      "epoch": 0.4749513952985998,
      "grad_norm": 0.8403836488723755,
      "learning_rate": 9.109730303170075e-06,
      "loss": 0.0184,
      "step": 290220
    },
    {
      "epoch": 0.4749841257372531,
      "grad_norm": 2.591496706008911,
      "learning_rate": 9.109664410956557e-06,
      "loss": 0.0386,
      "step": 290240
    },
    {
      "epoch": 0.47501685617590644,
      "grad_norm": 0.7041311860084534,
      "learning_rate": 9.109598518743041e-06,
      "loss": 0.0324,
      "step": 290260
    },
    {
      "epoch": 0.4750495866145598,
      "grad_norm": 1.3349263668060303,
      "learning_rate": 9.109532626529523e-06,
      "loss": 0.0344,
      "step": 290280
    },
    {
      "epoch": 0.47508231705321313,
      "grad_norm": 1.2619422674179077,
      "learning_rate": 9.109466734316006e-06,
      "loss": 0.0398,
      "step": 290300
    },
    {
      "epoch": 0.4751150474918665,
      "grad_norm": 0.963580846786499,
      "learning_rate": 9.10940084210249e-06,
      "loss": 0.0405,
      "step": 290320
    },
    {
      "epoch": 0.4751477779305198,
      "grad_norm": 1.6435465812683105,
      "learning_rate": 9.109334949888972e-06,
      "loss": 0.0423,
      "step": 290340
    },
    {
      "epoch": 0.47518050836917314,
      "grad_norm": 0.5350788235664368,
      "learning_rate": 9.109269057675455e-06,
      "loss": 0.0479,
      "step": 290360
    },
    {
      "epoch": 0.4752132388078265,
      "grad_norm": 0.8569130897521973,
      "learning_rate": 9.109203165461939e-06,
      "loss": 0.0346,
      "step": 290380
    },
    {
      "epoch": 0.4752459692464798,
      "grad_norm": 1.837065577507019,
      "learning_rate": 9.109137273248421e-06,
      "loss": 0.0421,
      "step": 290400
    },
    {
      "epoch": 0.4752786996851332,
      "grad_norm": 1.394731879234314,
      "learning_rate": 9.109071381034905e-06,
      "loss": 0.0453,
      "step": 290420
    },
    {
      "epoch": 0.4753114301237865,
      "grad_norm": 1.5246105194091797,
      "learning_rate": 9.109005488821386e-06,
      "loss": 0.0324,
      "step": 290440
    },
    {
      "epoch": 0.47534416056243983,
      "grad_norm": 1.8480496406555176,
      "learning_rate": 9.10893959660787e-06,
      "loss": 0.0457,
      "step": 290460
    },
    {
      "epoch": 0.4753768910010932,
      "grad_norm": 0.36564579606056213,
      "learning_rate": 9.108873704394352e-06,
      "loss": 0.0362,
      "step": 290480
    },
    {
      "epoch": 0.4754096214397465,
      "grad_norm": 1.4134948253631592,
      "learning_rate": 9.108807812180835e-06,
      "loss": 0.0404,
      "step": 290500
    },
    {
      "epoch": 0.4754423518783999,
      "grad_norm": 1.8540064096450806,
      "learning_rate": 9.108741919967317e-06,
      "loss": 0.0508,
      "step": 290520
    },
    {
      "epoch": 0.4754750823170532,
      "grad_norm": 2.553485155105591,
      "learning_rate": 9.108676027753801e-06,
      "loss": 0.0354,
      "step": 290540
    },
    {
      "epoch": 0.4755078127557065,
      "grad_norm": 0.5370932817459106,
      "learning_rate": 9.108610135540285e-06,
      "loss": 0.0522,
      "step": 290560
    },
    {
      "epoch": 0.4755405431943599,
      "grad_norm": 1.4158886671066284,
      "learning_rate": 9.108544243326766e-06,
      "loss": 0.035,
      "step": 290580
    },
    {
      "epoch": 0.4755732736330132,
      "grad_norm": 1.0143136978149414,
      "learning_rate": 9.10847835111325e-06,
      "loss": 0.0471,
      "step": 290600
    },
    {
      "epoch": 0.4756060040716666,
      "grad_norm": 0.4451799988746643,
      "learning_rate": 9.108412458899732e-06,
      "loss": 0.0313,
      "step": 290620
    },
    {
      "epoch": 0.4756387345103199,
      "grad_norm": 0.9008345007896423,
      "learning_rate": 9.108346566686216e-06,
      "loss": 0.0321,
      "step": 290640
    },
    {
      "epoch": 0.4756714649489732,
      "grad_norm": 3.0171329975128174,
      "learning_rate": 9.108280674472697e-06,
      "loss": 0.042,
      "step": 290660
    },
    {
      "epoch": 0.4757041953876266,
      "grad_norm": 0.6349284052848816,
      "learning_rate": 9.108214782259181e-06,
      "loss": 0.0413,
      "step": 290680
    },
    {
      "epoch": 0.4757369258262799,
      "grad_norm": 2.0221447944641113,
      "learning_rate": 9.108148890045665e-06,
      "loss": 0.0501,
      "step": 290700
    },
    {
      "epoch": 0.4757696562649333,
      "grad_norm": 1.390199065208435,
      "learning_rate": 9.108082997832146e-06,
      "loss": 0.053,
      "step": 290720
    },
    {
      "epoch": 0.4758023867035866,
      "grad_norm": 1.9436379671096802,
      "learning_rate": 9.10801710561863e-06,
      "loss": 0.0378,
      "step": 290740
    },
    {
      "epoch": 0.4758351171422399,
      "grad_norm": 1.6062449216842651,
      "learning_rate": 9.107951213405114e-06,
      "loss": 0.0381,
      "step": 290760
    },
    {
      "epoch": 0.4758678475808933,
      "grad_norm": 2.847482204437256,
      "learning_rate": 9.107885321191596e-06,
      "loss": 0.0313,
      "step": 290780
    },
    {
      "epoch": 0.4759005780195466,
      "grad_norm": 1.2834250926971436,
      "learning_rate": 9.107819428978079e-06,
      "loss": 0.0425,
      "step": 290800
    },
    {
      "epoch": 0.4759333084582,
      "grad_norm": 0.49671289324760437,
      "learning_rate": 9.107753536764561e-06,
      "loss": 0.0281,
      "step": 290820
    },
    {
      "epoch": 0.4759660388968533,
      "grad_norm": 0.33209463953971863,
      "learning_rate": 9.107687644551045e-06,
      "loss": 0.0415,
      "step": 290840
    },
    {
      "epoch": 0.4759987693355066,
      "grad_norm": 1.3688671588897705,
      "learning_rate": 9.107621752337527e-06,
      "loss": 0.0495,
      "step": 290860
    },
    {
      "epoch": 0.47603149977416,
      "grad_norm": 1.535826563835144,
      "learning_rate": 9.10755586012401e-06,
      "loss": 0.051,
      "step": 290880
    },
    {
      "epoch": 0.4760642302128133,
      "grad_norm": 2.563106060028076,
      "learning_rate": 9.107489967910492e-06,
      "loss": 0.0396,
      "step": 290900
    },
    {
      "epoch": 0.47609696065146667,
      "grad_norm": 0.8593422174453735,
      "learning_rate": 9.107424075696976e-06,
      "loss": 0.0409,
      "step": 290920
    },
    {
      "epoch": 0.47612969109012,
      "grad_norm": 1.2099334001541138,
      "learning_rate": 9.10735818348346e-06,
      "loss": 0.0376,
      "step": 290940
    },
    {
      "epoch": 0.4761624215287733,
      "grad_norm": 1.6602946519851685,
      "learning_rate": 9.107292291269941e-06,
      "loss": 0.0518,
      "step": 290960
    },
    {
      "epoch": 0.4761951519674267,
      "grad_norm": 0.3066882789134979,
      "learning_rate": 9.107226399056425e-06,
      "loss": 0.0522,
      "step": 290980
    },
    {
      "epoch": 0.47622788240608,
      "grad_norm": 3.2247915267944336,
      "learning_rate": 9.107160506842907e-06,
      "loss": 0.0448,
      "step": 291000
    },
    {
      "epoch": 0.47626061284473337,
      "grad_norm": 0.9286414980888367,
      "learning_rate": 9.10709461462939e-06,
      "loss": 0.0439,
      "step": 291020
    },
    {
      "epoch": 0.4762933432833867,
      "grad_norm": 4.306798934936523,
      "learning_rate": 9.107028722415872e-06,
      "loss": 0.05,
      "step": 291040
    },
    {
      "epoch": 0.47632607372204,
      "grad_norm": 0.9928284883499146,
      "learning_rate": 9.106962830202356e-06,
      "loss": 0.0465,
      "step": 291060
    },
    {
      "epoch": 0.47635880416069337,
      "grad_norm": 2.3477470874786377,
      "learning_rate": 9.106896937988837e-06,
      "loss": 0.0379,
      "step": 291080
    },
    {
      "epoch": 0.4763915345993467,
      "grad_norm": 1.72347092628479,
      "learning_rate": 9.106831045775321e-06,
      "loss": 0.0381,
      "step": 291100
    },
    {
      "epoch": 0.47642426503800006,
      "grad_norm": 5.152987003326416,
      "learning_rate": 9.106765153561805e-06,
      "loss": 0.0475,
      "step": 291120
    },
    {
      "epoch": 0.4764569954766534,
      "grad_norm": 1.0795021057128906,
      "learning_rate": 9.106699261348288e-06,
      "loss": 0.0433,
      "step": 291140
    },
    {
      "epoch": 0.4764897259153067,
      "grad_norm": 1.0452337265014648,
      "learning_rate": 9.10663336913477e-06,
      "loss": 0.0463,
      "step": 291160
    },
    {
      "epoch": 0.47652245635396007,
      "grad_norm": 2.5342721939086914,
      "learning_rate": 9.106567476921254e-06,
      "loss": 0.0356,
      "step": 291180
    },
    {
      "epoch": 0.4765551867926134,
      "grad_norm": 0.8196765184402466,
      "learning_rate": 9.106501584707736e-06,
      "loss": 0.0417,
      "step": 291200
    },
    {
      "epoch": 0.47658791723126676,
      "grad_norm": 2.1947684288024902,
      "learning_rate": 9.10643569249422e-06,
      "loss": 0.0322,
      "step": 291220
    },
    {
      "epoch": 0.4766206476699201,
      "grad_norm": 0.6259059906005859,
      "learning_rate": 9.106369800280701e-06,
      "loss": 0.0441,
      "step": 291240
    },
    {
      "epoch": 0.4766533781085734,
      "grad_norm": 1.847282886505127,
      "learning_rate": 9.106303908067185e-06,
      "loss": 0.0398,
      "step": 291260
    },
    {
      "epoch": 0.47668610854722676,
      "grad_norm": 3.261279582977295,
      "learning_rate": 9.106238015853668e-06,
      "loss": 0.0495,
      "step": 291280
    },
    {
      "epoch": 0.4767188389858801,
      "grad_norm": 2.6053526401519775,
      "learning_rate": 9.10617212364015e-06,
      "loss": 0.0393,
      "step": 291300
    },
    {
      "epoch": 0.47675156942453345,
      "grad_norm": 0.9833301901817322,
      "learning_rate": 9.106106231426634e-06,
      "loss": 0.0462,
      "step": 291320
    },
    {
      "epoch": 0.47678429986318677,
      "grad_norm": 0.40343761444091797,
      "learning_rate": 9.106040339213116e-06,
      "loss": 0.0552,
      "step": 291340
    },
    {
      "epoch": 0.4768170303018401,
      "grad_norm": 2.240851402282715,
      "learning_rate": 9.1059744469996e-06,
      "loss": 0.029,
      "step": 291360
    },
    {
      "epoch": 0.47684976074049346,
      "grad_norm": 5.866394996643066,
      "learning_rate": 9.105908554786081e-06,
      "loss": 0.0401,
      "step": 291380
    },
    {
      "epoch": 0.4768824911791468,
      "grad_norm": 0.939910888671875,
      "learning_rate": 9.105842662572565e-06,
      "loss": 0.0578,
      "step": 291400
    },
    {
      "epoch": 0.47691522161780014,
      "grad_norm": 0.7435741424560547,
      "learning_rate": 9.105776770359047e-06,
      "loss": 0.0416,
      "step": 291420
    },
    {
      "epoch": 0.47694795205645346,
      "grad_norm": 0.4884408712387085,
      "learning_rate": 9.10571087814553e-06,
      "loss": 0.0417,
      "step": 291440
    },
    {
      "epoch": 0.4769806824951068,
      "grad_norm": 0.7186105251312256,
      "learning_rate": 9.105644985932012e-06,
      "loss": 0.0418,
      "step": 291460
    },
    {
      "epoch": 0.47701341293376015,
      "grad_norm": 1.6203495264053345,
      "learning_rate": 9.105579093718496e-06,
      "loss": 0.0595,
      "step": 291480
    },
    {
      "epoch": 0.47704614337241347,
      "grad_norm": 0.6074581146240234,
      "learning_rate": 9.10551320150498e-06,
      "loss": 0.0552,
      "step": 291500
    },
    {
      "epoch": 0.47707887381106684,
      "grad_norm": 0.6946496963500977,
      "learning_rate": 9.105447309291461e-06,
      "loss": 0.0357,
      "step": 291520
    },
    {
      "epoch": 0.47711160424972016,
      "grad_norm": 3.1064865589141846,
      "learning_rate": 9.105381417077945e-06,
      "loss": 0.0437,
      "step": 291540
    },
    {
      "epoch": 0.4771443346883735,
      "grad_norm": 1.6412851810455322,
      "learning_rate": 9.105315524864428e-06,
      "loss": 0.0556,
      "step": 291560
    },
    {
      "epoch": 0.47717706512702684,
      "grad_norm": 2.361670970916748,
      "learning_rate": 9.10524963265091e-06,
      "loss": 0.0546,
      "step": 291580
    },
    {
      "epoch": 0.47720979556568016,
      "grad_norm": 1.1332730054855347,
      "learning_rate": 9.105183740437394e-06,
      "loss": 0.0457,
      "step": 291600
    },
    {
      "epoch": 0.47724252600433353,
      "grad_norm": 2.9868013858795166,
      "learning_rate": 9.105117848223877e-06,
      "loss": 0.0424,
      "step": 291620
    },
    {
      "epoch": 0.47727525644298685,
      "grad_norm": 1.1242817640304565,
      "learning_rate": 9.10505195601036e-06,
      "loss": 0.0435,
      "step": 291640
    },
    {
      "epoch": 0.47730798688164017,
      "grad_norm": 1.1161017417907715,
      "learning_rate": 9.104986063796843e-06,
      "loss": 0.0487,
      "step": 291660
    },
    {
      "epoch": 0.47734071732029354,
      "grad_norm": 1.293127417564392,
      "learning_rate": 9.104920171583325e-06,
      "loss": 0.053,
      "step": 291680
    },
    {
      "epoch": 0.47737344775894686,
      "grad_norm": 2.086953639984131,
      "learning_rate": 9.104854279369808e-06,
      "loss": 0.0358,
      "step": 291700
    },
    {
      "epoch": 0.47740617819760023,
      "grad_norm": 2.251904010772705,
      "learning_rate": 9.10478838715629e-06,
      "loss": 0.0396,
      "step": 291720
    },
    {
      "epoch": 0.47743890863625355,
      "grad_norm": 1.0654748678207397,
      "learning_rate": 9.104722494942774e-06,
      "loss": 0.0384,
      "step": 291740
    },
    {
      "epoch": 0.47747163907490686,
      "grad_norm": 0.7983400225639343,
      "learning_rate": 9.104656602729256e-06,
      "loss": 0.0415,
      "step": 291760
    },
    {
      "epoch": 0.47750436951356023,
      "grad_norm": 0.7768393754959106,
      "learning_rate": 9.10459071051574e-06,
      "loss": 0.0453,
      "step": 291780
    },
    {
      "epoch": 0.47753709995221355,
      "grad_norm": 1.4524791240692139,
      "learning_rate": 9.104524818302221e-06,
      "loss": 0.0474,
      "step": 291800
    },
    {
      "epoch": 0.4775698303908669,
      "grad_norm": 3.3530752658843994,
      "learning_rate": 9.104458926088705e-06,
      "loss": 0.0529,
      "step": 291820
    },
    {
      "epoch": 0.47760256082952024,
      "grad_norm": 1.8357553482055664,
      "learning_rate": 9.104393033875187e-06,
      "loss": 0.0422,
      "step": 291840
    },
    {
      "epoch": 0.47763529126817356,
      "grad_norm": 0.9651163220405579,
      "learning_rate": 9.10432714166167e-06,
      "loss": 0.0369,
      "step": 291860
    },
    {
      "epoch": 0.47766802170682693,
      "grad_norm": 2.157052516937256,
      "learning_rate": 9.104261249448154e-06,
      "loss": 0.0369,
      "step": 291880
    },
    {
      "epoch": 0.47770075214548025,
      "grad_norm": 1.0056129693984985,
      "learning_rate": 9.104195357234636e-06,
      "loss": 0.0426,
      "step": 291900
    },
    {
      "epoch": 0.4777334825841336,
      "grad_norm": 1.3038326501846313,
      "learning_rate": 9.10412946502112e-06,
      "loss": 0.0411,
      "step": 291920
    },
    {
      "epoch": 0.47776621302278693,
      "grad_norm": 0.26418453454971313,
      "learning_rate": 9.104063572807603e-06,
      "loss": 0.0361,
      "step": 291940
    },
    {
      "epoch": 0.47779894346144025,
      "grad_norm": 13.123638153076172,
      "learning_rate": 9.103997680594085e-06,
      "loss": 0.0384,
      "step": 291960
    },
    {
      "epoch": 0.4778316739000936,
      "grad_norm": 1.7744908332824707,
      "learning_rate": 9.103931788380568e-06,
      "loss": 0.05,
      "step": 291980
    },
    {
      "epoch": 0.47786440433874694,
      "grad_norm": 0.7024470567703247,
      "learning_rate": 9.103865896167052e-06,
      "loss": 0.0329,
      "step": 292000
    },
    {
      "epoch": 0.4778971347774003,
      "grad_norm": 0.44691184163093567,
      "learning_rate": 9.103800003953534e-06,
      "loss": 0.0493,
      "step": 292020
    },
    {
      "epoch": 0.47792986521605363,
      "grad_norm": 2.062086343765259,
      "learning_rate": 9.103734111740017e-06,
      "loss": 0.0311,
      "step": 292040
    },
    {
      "epoch": 0.47796259565470695,
      "grad_norm": 2.4070398807525635,
      "learning_rate": 9.1036682195265e-06,
      "loss": 0.0273,
      "step": 292060
    },
    {
      "epoch": 0.4779953260933603,
      "grad_norm": 1.0668034553527832,
      "learning_rate": 9.103602327312983e-06,
      "loss": 0.0433,
      "step": 292080
    },
    {
      "epoch": 0.47802805653201363,
      "grad_norm": 0.673554539680481,
      "learning_rate": 9.103536435099465e-06,
      "loss": 0.0345,
      "step": 292100
    },
    {
      "epoch": 0.478060786970667,
      "grad_norm": 0.8613057136535645,
      "learning_rate": 9.103470542885948e-06,
      "loss": 0.0391,
      "step": 292120
    },
    {
      "epoch": 0.4780935174093203,
      "grad_norm": 0.6752690076828003,
      "learning_rate": 9.10340465067243e-06,
      "loss": 0.0551,
      "step": 292140
    },
    {
      "epoch": 0.47812624784797364,
      "grad_norm": 1.3831440210342407,
      "learning_rate": 9.103338758458914e-06,
      "loss": 0.0374,
      "step": 292160
    },
    {
      "epoch": 0.478158978286627,
      "grad_norm": 0.7845044136047363,
      "learning_rate": 9.103272866245396e-06,
      "loss": 0.0351,
      "step": 292180
    },
    {
      "epoch": 0.47819170872528033,
      "grad_norm": 2.7650821208953857,
      "learning_rate": 9.10320697403188e-06,
      "loss": 0.0589,
      "step": 292200
    },
    {
      "epoch": 0.4782244391639337,
      "grad_norm": 1.9633724689483643,
      "learning_rate": 9.103141081818361e-06,
      "loss": 0.0388,
      "step": 292220
    },
    {
      "epoch": 0.478257169602587,
      "grad_norm": 0.31431305408477783,
      "learning_rate": 9.103075189604845e-06,
      "loss": 0.0393,
      "step": 292240
    },
    {
      "epoch": 0.47828990004124033,
      "grad_norm": 2.2959766387939453,
      "learning_rate": 9.103009297391327e-06,
      "loss": 0.0368,
      "step": 292260
    },
    {
      "epoch": 0.4783226304798937,
      "grad_norm": 1.2702245712280273,
      "learning_rate": 9.10294340517781e-06,
      "loss": 0.0458,
      "step": 292280
    },
    {
      "epoch": 0.478355360918547,
      "grad_norm": 1.7119911909103394,
      "learning_rate": 9.102877512964294e-06,
      "loss": 0.038,
      "step": 292300
    },
    {
      "epoch": 0.4783880913572004,
      "grad_norm": 1.4210724830627441,
      "learning_rate": 9.102811620750776e-06,
      "loss": 0.0403,
      "step": 292320
    },
    {
      "epoch": 0.4784208217958537,
      "grad_norm": 3.0798285007476807,
      "learning_rate": 9.10274572853726e-06,
      "loss": 0.04,
      "step": 292340
    },
    {
      "epoch": 0.47845355223450703,
      "grad_norm": 1.2196857929229736,
      "learning_rate": 9.102679836323743e-06,
      "loss": 0.0343,
      "step": 292360
    },
    {
      "epoch": 0.4784862826731604,
      "grad_norm": 1.095695972442627,
      "learning_rate": 9.102613944110225e-06,
      "loss": 0.0371,
      "step": 292380
    },
    {
      "epoch": 0.4785190131118137,
      "grad_norm": 0.7764724493026733,
      "learning_rate": 9.102548051896708e-06,
      "loss": 0.034,
      "step": 292400
    },
    {
      "epoch": 0.4785517435504671,
      "grad_norm": 0.32628360390663147,
      "learning_rate": 9.102482159683192e-06,
      "loss": 0.0298,
      "step": 292420
    },
    {
      "epoch": 0.4785844739891204,
      "grad_norm": 1.396579384803772,
      "learning_rate": 9.102416267469674e-06,
      "loss": 0.0384,
      "step": 292440
    },
    {
      "epoch": 0.4786172044277737,
      "grad_norm": 0.5605176091194153,
      "learning_rate": 9.102350375256158e-06,
      "loss": 0.0553,
      "step": 292460
    },
    {
      "epoch": 0.4786499348664271,
      "grad_norm": 1.6407763957977295,
      "learning_rate": 9.10228448304264e-06,
      "loss": 0.0505,
      "step": 292480
    },
    {
      "epoch": 0.4786826653050804,
      "grad_norm": 2.708575963973999,
      "learning_rate": 9.102218590829123e-06,
      "loss": 0.0243,
      "step": 292500
    },
    {
      "epoch": 0.4787153957437338,
      "grad_norm": 0.4902137517929077,
      "learning_rate": 9.102152698615605e-06,
      "loss": 0.0197,
      "step": 292520
    },
    {
      "epoch": 0.4787481261823871,
      "grad_norm": 1.8950510025024414,
      "learning_rate": 9.102086806402089e-06,
      "loss": 0.0364,
      "step": 292540
    },
    {
      "epoch": 0.4787808566210404,
      "grad_norm": 4.033817768096924,
      "learning_rate": 9.10202091418857e-06,
      "loss": 0.0483,
      "step": 292560
    },
    {
      "epoch": 0.4788135870596938,
      "grad_norm": 2.360495090484619,
      "learning_rate": 9.101955021975054e-06,
      "loss": 0.0457,
      "step": 292580
    },
    {
      "epoch": 0.4788463174983471,
      "grad_norm": 0.8760396838188171,
      "learning_rate": 9.101889129761536e-06,
      "loss": 0.0424,
      "step": 292600
    },
    {
      "epoch": 0.4788790479370005,
      "grad_norm": 2.003556966781616,
      "learning_rate": 9.10182323754802e-06,
      "loss": 0.0491,
      "step": 292620
    },
    {
      "epoch": 0.4789117783756538,
      "grad_norm": 3.2739505767822266,
      "learning_rate": 9.101757345334501e-06,
      "loss": 0.0331,
      "step": 292640
    },
    {
      "epoch": 0.4789445088143071,
      "grad_norm": 1.409620761871338,
      "learning_rate": 9.101691453120985e-06,
      "loss": 0.0465,
      "step": 292660
    },
    {
      "epoch": 0.4789772392529605,
      "grad_norm": 4.48078727722168,
      "learning_rate": 9.101625560907469e-06,
      "loss": 0.0469,
      "step": 292680
    },
    {
      "epoch": 0.4790099696916138,
      "grad_norm": 1.2580121755599976,
      "learning_rate": 9.10155966869395e-06,
      "loss": 0.0286,
      "step": 292700
    },
    {
      "epoch": 0.4790427001302671,
      "grad_norm": 1.1202707290649414,
      "learning_rate": 9.101493776480434e-06,
      "loss": 0.0294,
      "step": 292720
    },
    {
      "epoch": 0.4790754305689205,
      "grad_norm": 1.3387062549591064,
      "learning_rate": 9.101427884266918e-06,
      "loss": 0.0414,
      "step": 292740
    },
    {
      "epoch": 0.4791081610075738,
      "grad_norm": 0.2672528922557831,
      "learning_rate": 9.1013619920534e-06,
      "loss": 0.0365,
      "step": 292760
    },
    {
      "epoch": 0.4791408914462272,
      "grad_norm": 0.9371476173400879,
      "learning_rate": 9.101296099839883e-06,
      "loss": 0.0359,
      "step": 292780
    },
    {
      "epoch": 0.4791736218848805,
      "grad_norm": 4.22537088394165,
      "learning_rate": 9.101230207626367e-06,
      "loss": 0.0471,
      "step": 292800
    },
    {
      "epoch": 0.4792063523235338,
      "grad_norm": 1.2949014902114868,
      "learning_rate": 9.101164315412849e-06,
      "loss": 0.0396,
      "step": 292820
    },
    {
      "epoch": 0.4792390827621872,
      "grad_norm": 1.5984654426574707,
      "learning_rate": 9.101098423199332e-06,
      "loss": 0.0454,
      "step": 292840
    },
    {
      "epoch": 0.4792718132008405,
      "grad_norm": 8.007771492004395,
      "learning_rate": 9.101032530985814e-06,
      "loss": 0.0586,
      "step": 292860
    },
    {
      "epoch": 0.4793045436394939,
      "grad_norm": 2.5393965244293213,
      "learning_rate": 9.100966638772298e-06,
      "loss": 0.0501,
      "step": 292880
    },
    {
      "epoch": 0.4793372740781472,
      "grad_norm": 3.0142321586608887,
      "learning_rate": 9.10090074655878e-06,
      "loss": 0.0303,
      "step": 292900
    },
    {
      "epoch": 0.4793700045168005,
      "grad_norm": 3.053706645965576,
      "learning_rate": 9.100834854345263e-06,
      "loss": 0.0376,
      "step": 292920
    },
    {
      "epoch": 0.4794027349554539,
      "grad_norm": 1.7609091997146606,
      "learning_rate": 9.100768962131745e-06,
      "loss": 0.0376,
      "step": 292940
    },
    {
      "epoch": 0.4794354653941072,
      "grad_norm": 0.3684093654155731,
      "learning_rate": 9.100703069918229e-06,
      "loss": 0.0346,
      "step": 292960
    },
    {
      "epoch": 0.47946819583276057,
      "grad_norm": 0.8882220387458801,
      "learning_rate": 9.10063717770471e-06,
      "loss": 0.0528,
      "step": 292980
    },
    {
      "epoch": 0.4795009262714139,
      "grad_norm": 2.4355051517486572,
      "learning_rate": 9.100571285491194e-06,
      "loss": 0.037,
      "step": 293000
    },
    {
      "epoch": 0.4795336567100672,
      "grad_norm": 0.8173329830169678,
      "learning_rate": 9.100505393277676e-06,
      "loss": 0.0571,
      "step": 293020
    },
    {
      "epoch": 0.4795663871487206,
      "grad_norm": 0.39945173263549805,
      "learning_rate": 9.10043950106416e-06,
      "loss": 0.0397,
      "step": 293040
    },
    {
      "epoch": 0.4795991175873739,
      "grad_norm": 1.5199047327041626,
      "learning_rate": 9.100373608850643e-06,
      "loss": 0.0348,
      "step": 293060
    },
    {
      "epoch": 0.47963184802602726,
      "grad_norm": 2.1188771724700928,
      "learning_rate": 9.100307716637125e-06,
      "loss": 0.0384,
      "step": 293080
    },
    {
      "epoch": 0.4796645784646806,
      "grad_norm": 1.0885354280471802,
      "learning_rate": 9.100241824423609e-06,
      "loss": 0.0472,
      "step": 293100
    },
    {
      "epoch": 0.4796973089033339,
      "grad_norm": 3.09783935546875,
      "learning_rate": 9.10017593221009e-06,
      "loss": 0.0332,
      "step": 293120
    },
    {
      "epoch": 0.47973003934198727,
      "grad_norm": 2.187560796737671,
      "learning_rate": 9.100110039996574e-06,
      "loss": 0.0374,
      "step": 293140
    },
    {
      "epoch": 0.4797627697806406,
      "grad_norm": 2.7516438961029053,
      "learning_rate": 9.100044147783058e-06,
      "loss": 0.0366,
      "step": 293160
    },
    {
      "epoch": 0.47979550021929396,
      "grad_norm": 0.3930667042732239,
      "learning_rate": 9.09997825556954e-06,
      "loss": 0.0424,
      "step": 293180
    },
    {
      "epoch": 0.4798282306579473,
      "grad_norm": 1.1717475652694702,
      "learning_rate": 9.099912363356023e-06,
      "loss": 0.0409,
      "step": 293200
    },
    {
      "epoch": 0.4798609610966006,
      "grad_norm": 0.9987185001373291,
      "learning_rate": 9.099846471142507e-06,
      "loss": 0.0429,
      "step": 293220
    },
    {
      "epoch": 0.47989369153525396,
      "grad_norm": 0.8555759191513062,
      "learning_rate": 9.099780578928989e-06,
      "loss": 0.0539,
      "step": 293240
    },
    {
      "epoch": 0.4799264219739073,
      "grad_norm": 3.106415271759033,
      "learning_rate": 9.099714686715472e-06,
      "loss": 0.0485,
      "step": 293260
    },
    {
      "epoch": 0.47995915241256065,
      "grad_norm": 1.543548822402954,
      "learning_rate": 9.099648794501954e-06,
      "loss": 0.042,
      "step": 293280
    },
    {
      "epoch": 0.47999188285121397,
      "grad_norm": 2.1469132900238037,
      "learning_rate": 9.099582902288438e-06,
      "loss": 0.0449,
      "step": 293300
    },
    {
      "epoch": 0.4800246132898673,
      "grad_norm": 1.2743682861328125,
      "learning_rate": 9.09951701007492e-06,
      "loss": 0.0365,
      "step": 293320
    },
    {
      "epoch": 0.48005734372852066,
      "grad_norm": 1.6728787422180176,
      "learning_rate": 9.099451117861403e-06,
      "loss": 0.0378,
      "step": 293340
    },
    {
      "epoch": 0.480090074167174,
      "grad_norm": 0.31086844205856323,
      "learning_rate": 9.099385225647885e-06,
      "loss": 0.0426,
      "step": 293360
    },
    {
      "epoch": 0.48012280460582735,
      "grad_norm": 2.8316280841827393,
      "learning_rate": 9.099319333434369e-06,
      "loss": 0.0409,
      "step": 293380
    },
    {
      "epoch": 0.48015553504448066,
      "grad_norm": 0.7987734079360962,
      "learning_rate": 9.099253441220852e-06,
      "loss": 0.0431,
      "step": 293400
    },
    {
      "epoch": 0.480188265483134,
      "grad_norm": 1.5421936511993408,
      "learning_rate": 9.099187549007334e-06,
      "loss": 0.0509,
      "step": 293420
    },
    {
      "epoch": 0.48022099592178735,
      "grad_norm": 2.2751386165618896,
      "learning_rate": 9.099121656793818e-06,
      "loss": 0.0529,
      "step": 293440
    },
    {
      "epoch": 0.48025372636044067,
      "grad_norm": 3.5225110054016113,
      "learning_rate": 9.0990557645803e-06,
      "loss": 0.0404,
      "step": 293460
    },
    {
      "epoch": 0.48028645679909404,
      "grad_norm": 2.552849769592285,
      "learning_rate": 9.098989872366783e-06,
      "loss": 0.0507,
      "step": 293480
    },
    {
      "epoch": 0.48031918723774736,
      "grad_norm": 2.3843748569488525,
      "learning_rate": 9.098923980153265e-06,
      "loss": 0.0373,
      "step": 293500
    },
    {
      "epoch": 0.4803519176764007,
      "grad_norm": 2.2803211212158203,
      "learning_rate": 9.098858087939749e-06,
      "loss": 0.0427,
      "step": 293520
    },
    {
      "epoch": 0.48038464811505405,
      "grad_norm": 0.3093762695789337,
      "learning_rate": 9.098792195726232e-06,
      "loss": 0.0322,
      "step": 293540
    },
    {
      "epoch": 0.48041737855370736,
      "grad_norm": 0.6322012543678284,
      "learning_rate": 9.098726303512714e-06,
      "loss": 0.0405,
      "step": 293560
    },
    {
      "epoch": 0.48045010899236074,
      "grad_norm": 1.6415034532546997,
      "learning_rate": 9.098660411299198e-06,
      "loss": 0.05,
      "step": 293580
    },
    {
      "epoch": 0.48048283943101405,
      "grad_norm": 1.4828722476959229,
      "learning_rate": 9.098594519085681e-06,
      "loss": 0.0351,
      "step": 293600
    },
    {
      "epoch": 0.48051556986966737,
      "grad_norm": 0.6669511795043945,
      "learning_rate": 9.098528626872163e-06,
      "loss": 0.0353,
      "step": 293620
    },
    {
      "epoch": 0.48054830030832074,
      "grad_norm": 2.9275355339050293,
      "learning_rate": 9.098462734658647e-06,
      "loss": 0.034,
      "step": 293640
    },
    {
      "epoch": 0.48058103074697406,
      "grad_norm": 2.49867844581604,
      "learning_rate": 9.098396842445129e-06,
      "loss": 0.0391,
      "step": 293660
    },
    {
      "epoch": 0.48061376118562743,
      "grad_norm": 1.0617696046829224,
      "learning_rate": 9.098330950231612e-06,
      "loss": 0.0277,
      "step": 293680
    },
    {
      "epoch": 0.48064649162428075,
      "grad_norm": 1.0641803741455078,
      "learning_rate": 9.098265058018094e-06,
      "loss": 0.0441,
      "step": 293700
    },
    {
      "epoch": 0.48067922206293406,
      "grad_norm": 1.982783317565918,
      "learning_rate": 9.098199165804578e-06,
      "loss": 0.0338,
      "step": 293720
    },
    {
      "epoch": 0.48071195250158744,
      "grad_norm": 3.9636902809143066,
      "learning_rate": 9.098133273591061e-06,
      "loss": 0.0384,
      "step": 293740
    },
    {
      "epoch": 0.48074468294024075,
      "grad_norm": 0.6264902949333191,
      "learning_rate": 9.098067381377543e-06,
      "loss": 0.044,
      "step": 293760
    },
    {
      "epoch": 0.4807774133788941,
      "grad_norm": 1.108064889907837,
      "learning_rate": 9.098001489164027e-06,
      "loss": 0.0485,
      "step": 293780
    },
    {
      "epoch": 0.48081014381754744,
      "grad_norm": 0.5656499862670898,
      "learning_rate": 9.097935596950509e-06,
      "loss": 0.0254,
      "step": 293800
    },
    {
      "epoch": 0.48084287425620076,
      "grad_norm": 1.5833491086959839,
      "learning_rate": 9.097869704736992e-06,
      "loss": 0.0341,
      "step": 293820
    },
    {
      "epoch": 0.48087560469485413,
      "grad_norm": 3.1203982830047607,
      "learning_rate": 9.097803812523474e-06,
      "loss": 0.039,
      "step": 293840
    },
    {
      "epoch": 0.48090833513350745,
      "grad_norm": 0.9139994382858276,
      "learning_rate": 9.097737920309958e-06,
      "loss": 0.0247,
      "step": 293860
    },
    {
      "epoch": 0.4809410655721608,
      "grad_norm": 0.9735878109931946,
      "learning_rate": 9.09767202809644e-06,
      "loss": 0.0541,
      "step": 293880
    },
    {
      "epoch": 0.48097379601081414,
      "grad_norm": 0.7360042333602905,
      "learning_rate": 9.097606135882923e-06,
      "loss": 0.0374,
      "step": 293900
    },
    {
      "epoch": 0.48100652644946745,
      "grad_norm": 2.448056697845459,
      "learning_rate": 9.097540243669407e-06,
      "loss": 0.0446,
      "step": 293920
    },
    {
      "epoch": 0.4810392568881208,
      "grad_norm": 2.359691619873047,
      "learning_rate": 9.097474351455889e-06,
      "loss": 0.0298,
      "step": 293940
    },
    {
      "epoch": 0.48107198732677414,
      "grad_norm": 1.30057692527771,
      "learning_rate": 9.097408459242372e-06,
      "loss": 0.037,
      "step": 293960
    },
    {
      "epoch": 0.4811047177654275,
      "grad_norm": 0.8807966113090515,
      "learning_rate": 9.097342567028856e-06,
      "loss": 0.0433,
      "step": 293980
    },
    {
      "epoch": 0.48113744820408083,
      "grad_norm": 1.4925612211227417,
      "learning_rate": 9.097276674815338e-06,
      "loss": 0.0311,
      "step": 294000
    },
    {
      "epoch": 0.48117017864273415,
      "grad_norm": 0.15199138224124908,
      "learning_rate": 9.097210782601821e-06,
      "loss": 0.0383,
      "step": 294020
    },
    {
      "epoch": 0.4812029090813875,
      "grad_norm": 1.06351900100708,
      "learning_rate": 9.097144890388303e-06,
      "loss": 0.0463,
      "step": 294040
    },
    {
      "epoch": 0.48123563952004084,
      "grad_norm": 1.9320244789123535,
      "learning_rate": 9.097078998174787e-06,
      "loss": 0.0336,
      "step": 294060
    },
    {
      "epoch": 0.4812683699586942,
      "grad_norm": 1.6234725713729858,
      "learning_rate": 9.09701310596127e-06,
      "loss": 0.0516,
      "step": 294080
    },
    {
      "epoch": 0.4813011003973475,
      "grad_norm": 1.4386903047561646,
      "learning_rate": 9.096947213747752e-06,
      "loss": 0.0434,
      "step": 294100
    },
    {
      "epoch": 0.48133383083600084,
      "grad_norm": 0.9441224932670593,
      "learning_rate": 9.096881321534236e-06,
      "loss": 0.0353,
      "step": 294120
    },
    {
      "epoch": 0.4813665612746542,
      "grad_norm": 1.731484293937683,
      "learning_rate": 9.096815429320718e-06,
      "loss": 0.0388,
      "step": 294140
    },
    {
      "epoch": 0.48139929171330753,
      "grad_norm": 0.6686043739318848,
      "learning_rate": 9.096749537107201e-06,
      "loss": 0.0308,
      "step": 294160
    },
    {
      "epoch": 0.4814320221519609,
      "grad_norm": 0.3357160985469818,
      "learning_rate": 9.096683644893683e-06,
      "loss": 0.055,
      "step": 294180
    },
    {
      "epoch": 0.4814647525906142,
      "grad_norm": 1.7364864349365234,
      "learning_rate": 9.096617752680167e-06,
      "loss": 0.0382,
      "step": 294200
    },
    {
      "epoch": 0.48149748302926754,
      "grad_norm": 0.7177051901817322,
      "learning_rate": 9.096551860466649e-06,
      "loss": 0.0391,
      "step": 294220
    },
    {
      "epoch": 0.4815302134679209,
      "grad_norm": 1.5973076820373535,
      "learning_rate": 9.096485968253132e-06,
      "loss": 0.0473,
      "step": 294240
    },
    {
      "epoch": 0.4815629439065742,
      "grad_norm": 0.8908619284629822,
      "learning_rate": 9.096420076039614e-06,
      "loss": 0.0463,
      "step": 294260
    },
    {
      "epoch": 0.4815956743452276,
      "grad_norm": 2.9830071926116943,
      "learning_rate": 9.096354183826098e-06,
      "loss": 0.0354,
      "step": 294280
    },
    {
      "epoch": 0.4816284047838809,
      "grad_norm": 2.9022374153137207,
      "learning_rate": 9.09628829161258e-06,
      "loss": 0.0461,
      "step": 294300
    },
    {
      "epoch": 0.48166113522253423,
      "grad_norm": 4.826310157775879,
      "learning_rate": 9.096222399399063e-06,
      "loss": 0.0497,
      "step": 294320
    },
    {
      "epoch": 0.4816938656611876,
      "grad_norm": 4.6424102783203125,
      "learning_rate": 9.096156507185547e-06,
      "loss": 0.0505,
      "step": 294340
    },
    {
      "epoch": 0.4817265960998409,
      "grad_norm": 1.0656163692474365,
      "learning_rate": 9.096090614972029e-06,
      "loss": 0.0321,
      "step": 294360
    },
    {
      "epoch": 0.4817593265384943,
      "grad_norm": 0.966258704662323,
      "learning_rate": 9.096024722758512e-06,
      "loss": 0.0461,
      "step": 294380
    },
    {
      "epoch": 0.4817920569771476,
      "grad_norm": 1.6829372644424438,
      "learning_rate": 9.095958830544996e-06,
      "loss": 0.0275,
      "step": 294400
    },
    {
      "epoch": 0.4818247874158009,
      "grad_norm": 2.5495009422302246,
      "learning_rate": 9.095892938331478e-06,
      "loss": 0.0394,
      "step": 294420
    },
    {
      "epoch": 0.4818575178544543,
      "grad_norm": 3.905705451965332,
      "learning_rate": 9.095827046117961e-06,
      "loss": 0.0428,
      "step": 294440
    },
    {
      "epoch": 0.4818902482931076,
      "grad_norm": 0.9376145005226135,
      "learning_rate": 9.095761153904445e-06,
      "loss": 0.0402,
      "step": 294460
    },
    {
      "epoch": 0.481922978731761,
      "grad_norm": 2.122976541519165,
      "learning_rate": 9.095695261690927e-06,
      "loss": 0.0371,
      "step": 294480
    },
    {
      "epoch": 0.4819557091704143,
      "grad_norm": 2.434436321258545,
      "learning_rate": 9.09562936947741e-06,
      "loss": 0.0361,
      "step": 294500
    },
    {
      "epoch": 0.4819884396090676,
      "grad_norm": 2.164414882659912,
      "learning_rate": 9.095563477263892e-06,
      "loss": 0.0482,
      "step": 294520
    },
    {
      "epoch": 0.482021170047721,
      "grad_norm": 1.0053943395614624,
      "learning_rate": 9.095497585050376e-06,
      "loss": 0.0308,
      "step": 294540
    },
    {
      "epoch": 0.4820539004863743,
      "grad_norm": 2.6367344856262207,
      "learning_rate": 9.095431692836858e-06,
      "loss": 0.038,
      "step": 294560
    },
    {
      "epoch": 0.4820866309250277,
      "grad_norm": 0.7875930666923523,
      "learning_rate": 9.095365800623342e-06,
      "loss": 0.0449,
      "step": 294580
    },
    {
      "epoch": 0.482119361363681,
      "grad_norm": 3.5620198249816895,
      "learning_rate": 9.095299908409823e-06,
      "loss": 0.0444,
      "step": 294600
    },
    {
      "epoch": 0.4821520918023343,
      "grad_norm": 0.6630274653434753,
      "learning_rate": 9.095234016196307e-06,
      "loss": 0.0402,
      "step": 294620
    },
    {
      "epoch": 0.4821848222409877,
      "grad_norm": 7.9255266189575195,
      "learning_rate": 9.095168123982789e-06,
      "loss": 0.0357,
      "step": 294640
    },
    {
      "epoch": 0.482217552679641,
      "grad_norm": 3.6454601287841797,
      "learning_rate": 9.095102231769272e-06,
      "loss": 0.0368,
      "step": 294660
    },
    {
      "epoch": 0.4822502831182944,
      "grad_norm": 1.071577787399292,
      "learning_rate": 9.095036339555754e-06,
      "loss": 0.0502,
      "step": 294680
    },
    {
      "epoch": 0.4822830135569477,
      "grad_norm": 0.941344141960144,
      "learning_rate": 9.094970447342238e-06,
      "loss": 0.04,
      "step": 294700
    },
    {
      "epoch": 0.482315743995601,
      "grad_norm": 0.8928764462471008,
      "learning_rate": 9.094904555128722e-06,
      "loss": 0.032,
      "step": 294720
    },
    {
      "epoch": 0.4823484744342544,
      "grad_norm": 2.9059669971466064,
      "learning_rate": 9.094838662915203e-06,
      "loss": 0.0441,
      "step": 294740
    },
    {
      "epoch": 0.4823812048729077,
      "grad_norm": 2.134793519973755,
      "learning_rate": 9.094772770701687e-06,
      "loss": 0.0542,
      "step": 294760
    },
    {
      "epoch": 0.48241393531156107,
      "grad_norm": 1.0684070587158203,
      "learning_rate": 9.09470687848817e-06,
      "loss": 0.0314,
      "step": 294780
    },
    {
      "epoch": 0.4824466657502144,
      "grad_norm": 0.6031386256217957,
      "learning_rate": 9.094640986274652e-06,
      "loss": 0.0429,
      "step": 294800
    },
    {
      "epoch": 0.4824793961888677,
      "grad_norm": 0.7940046787261963,
      "learning_rate": 9.094575094061136e-06,
      "loss": 0.0371,
      "step": 294820
    },
    {
      "epoch": 0.4825121266275211,
      "grad_norm": 2.056225299835205,
      "learning_rate": 9.09450920184762e-06,
      "loss": 0.0438,
      "step": 294840
    },
    {
      "epoch": 0.4825448570661744,
      "grad_norm": 0.519488513469696,
      "learning_rate": 9.094443309634102e-06,
      "loss": 0.0444,
      "step": 294860
    },
    {
      "epoch": 0.48257758750482777,
      "grad_norm": 0.6175560355186462,
      "learning_rate": 9.094377417420585e-06,
      "loss": 0.0361,
      "step": 294880
    },
    {
      "epoch": 0.4826103179434811,
      "grad_norm": 1.007922887802124,
      "learning_rate": 9.094311525207067e-06,
      "loss": 0.0313,
      "step": 294900
    },
    {
      "epoch": 0.4826430483821344,
      "grad_norm": 0.9521949887275696,
      "learning_rate": 9.09424563299355e-06,
      "loss": 0.0431,
      "step": 294920
    },
    {
      "epoch": 0.48267577882078777,
      "grad_norm": 0.573840320110321,
      "learning_rate": 9.094179740780033e-06,
      "loss": 0.0488,
      "step": 294940
    },
    {
      "epoch": 0.4827085092594411,
      "grad_norm": 2.016309976577759,
      "learning_rate": 9.094113848566516e-06,
      "loss": 0.0406,
      "step": 294960
    },
    {
      "epoch": 0.48274123969809446,
      "grad_norm": 0.709386944770813,
      "learning_rate": 9.094047956352998e-06,
      "loss": 0.04,
      "step": 294980
    },
    {
      "epoch": 0.4827739701367478,
      "grad_norm": 1.155044674873352,
      "learning_rate": 9.093982064139482e-06,
      "loss": 0.0346,
      "step": 295000
    },
    {
      "epoch": 0.4828067005754011,
      "grad_norm": 2.942761182785034,
      "learning_rate": 9.093916171925963e-06,
      "loss": 0.0398,
      "step": 295020
    },
    {
      "epoch": 0.48283943101405447,
      "grad_norm": 1.5813394784927368,
      "learning_rate": 9.093850279712447e-06,
      "loss": 0.044,
      "step": 295040
    },
    {
      "epoch": 0.4828721614527078,
      "grad_norm": 1.2294330596923828,
      "learning_rate": 9.093784387498929e-06,
      "loss": 0.0379,
      "step": 295060
    },
    {
      "epoch": 0.48290489189136115,
      "grad_norm": 0.8095505237579346,
      "learning_rate": 9.093718495285413e-06,
      "loss": 0.0323,
      "step": 295080
    },
    {
      "epoch": 0.48293762233001447,
      "grad_norm": 1.8032995462417603,
      "learning_rate": 9.093652603071894e-06,
      "loss": 0.0429,
      "step": 295100
    },
    {
      "epoch": 0.4829703527686678,
      "grad_norm": 1.438066840171814,
      "learning_rate": 9.093586710858378e-06,
      "loss": 0.0358,
      "step": 295120
    },
    {
      "epoch": 0.48300308320732116,
      "grad_norm": 0.5836100578308105,
      "learning_rate": 9.093520818644862e-06,
      "loss": 0.0374,
      "step": 295140
    },
    {
      "epoch": 0.4830358136459745,
      "grad_norm": 1.433047890663147,
      "learning_rate": 9.093454926431344e-06,
      "loss": 0.0377,
      "step": 295160
    },
    {
      "epoch": 0.48306854408462785,
      "grad_norm": 1.709914207458496,
      "learning_rate": 9.093389034217827e-06,
      "loss": 0.0314,
      "step": 295180
    },
    {
      "epoch": 0.48310127452328117,
      "grad_norm": 8.215436935424805,
      "learning_rate": 9.09332314200431e-06,
      "loss": 0.0336,
      "step": 295200
    },
    {
      "epoch": 0.4831340049619345,
      "grad_norm": 2.440814733505249,
      "learning_rate": 9.093257249790793e-06,
      "loss": 0.0365,
      "step": 295220
    },
    {
      "epoch": 0.48316673540058785,
      "grad_norm": 1.5107728242874146,
      "learning_rate": 9.093191357577276e-06,
      "loss": 0.0285,
      "step": 295240
    },
    {
      "epoch": 0.48319946583924117,
      "grad_norm": 1.0147451162338257,
      "learning_rate": 9.09312546536376e-06,
      "loss": 0.0375,
      "step": 295260
    },
    {
      "epoch": 0.48323219627789454,
      "grad_norm": 0.2983475625514984,
      "learning_rate": 9.093059573150242e-06,
      "loss": 0.0455,
      "step": 295280
    },
    {
      "epoch": 0.48326492671654786,
      "grad_norm": 1.3283683061599731,
      "learning_rate": 9.092993680936725e-06,
      "loss": 0.047,
      "step": 295300
    },
    {
      "epoch": 0.4832976571552012,
      "grad_norm": 0.6756284236907959,
      "learning_rate": 9.092927788723207e-06,
      "loss": 0.0502,
      "step": 295320
    },
    {
      "epoch": 0.48333038759385455,
      "grad_norm": 1.2963112592697144,
      "learning_rate": 9.09286189650969e-06,
      "loss": 0.0355,
      "step": 295340
    },
    {
      "epoch": 0.48336311803250787,
      "grad_norm": 1.2125509977340698,
      "learning_rate": 9.092796004296173e-06,
      "loss": 0.0437,
      "step": 295360
    },
    {
      "epoch": 0.48339584847116124,
      "grad_norm": 0.8704033493995667,
      "learning_rate": 9.092730112082656e-06,
      "loss": 0.035,
      "step": 295380
    },
    {
      "epoch": 0.48342857890981455,
      "grad_norm": 5.920471668243408,
      "learning_rate": 9.092664219869138e-06,
      "loss": 0.0406,
      "step": 295400
    },
    {
      "epoch": 0.48346130934846787,
      "grad_norm": 0.7909554839134216,
      "learning_rate": 9.092598327655622e-06,
      "loss": 0.0431,
      "step": 295420
    },
    {
      "epoch": 0.48349403978712124,
      "grad_norm": 0.4391317069530487,
      "learning_rate": 9.092532435442104e-06,
      "loss": 0.0406,
      "step": 295440
    },
    {
      "epoch": 0.48352677022577456,
      "grad_norm": 2.2303266525268555,
      "learning_rate": 9.092466543228587e-06,
      "loss": 0.053,
      "step": 295460
    },
    {
      "epoch": 0.4835595006644279,
      "grad_norm": 0.6243427395820618,
      "learning_rate": 9.092400651015069e-06,
      "loss": 0.0361,
      "step": 295480
    },
    {
      "epoch": 0.48359223110308125,
      "grad_norm": 2.7439382076263428,
      "learning_rate": 9.092334758801553e-06,
      "loss": 0.0535,
      "step": 295500
    },
    {
      "epoch": 0.48362496154173457,
      "grad_norm": 1.1298850774765015,
      "learning_rate": 9.092268866588036e-06,
      "loss": 0.0527,
      "step": 295520
    },
    {
      "epoch": 0.48365769198038794,
      "grad_norm": 1.7823379039764404,
      "learning_rate": 9.092202974374518e-06,
      "loss": 0.0329,
      "step": 295540
    },
    {
      "epoch": 0.48369042241904125,
      "grad_norm": 0.327351450920105,
      "learning_rate": 9.092137082161002e-06,
      "loss": 0.031,
      "step": 295560
    },
    {
      "epoch": 0.48372315285769457,
      "grad_norm": 0.8961813449859619,
      "learning_rate": 9.092071189947485e-06,
      "loss": 0.07,
      "step": 295580
    },
    {
      "epoch": 0.48375588329634794,
      "grad_norm": 1.1813093423843384,
      "learning_rate": 9.092005297733967e-06,
      "loss": 0.0376,
      "step": 295600
    },
    {
      "epoch": 0.48378861373500126,
      "grad_norm": 1.4593486785888672,
      "learning_rate": 9.09193940552045e-06,
      "loss": 0.0373,
      "step": 295620
    },
    {
      "epoch": 0.48382134417365463,
      "grad_norm": 0.7575263977050781,
      "learning_rate": 9.091873513306934e-06,
      "loss": 0.044,
      "step": 295640
    },
    {
      "epoch": 0.48385407461230795,
      "grad_norm": 1.1215238571166992,
      "learning_rate": 9.091807621093416e-06,
      "loss": 0.0289,
      "step": 295660
    },
    {
      "epoch": 0.48388680505096127,
      "grad_norm": 3.6913228034973145,
      "learning_rate": 9.0917417288799e-06,
      "loss": 0.0227,
      "step": 295680
    },
    {
      "epoch": 0.48391953548961464,
      "grad_norm": 1.5642101764678955,
      "learning_rate": 9.091675836666382e-06,
      "loss": 0.035,
      "step": 295700
    },
    {
      "epoch": 0.48395226592826796,
      "grad_norm": 4.956078052520752,
      "learning_rate": 9.091609944452865e-06,
      "loss": 0.039,
      "step": 295720
    },
    {
      "epoch": 0.4839849963669213,
      "grad_norm": 1.3913463354110718,
      "learning_rate": 9.091544052239347e-06,
      "loss": 0.0449,
      "step": 295740
    },
    {
      "epoch": 0.48401772680557464,
      "grad_norm": 1.551382303237915,
      "learning_rate": 9.09147816002583e-06,
      "loss": 0.0362,
      "step": 295760
    },
    {
      "epoch": 0.48405045724422796,
      "grad_norm": 0.9094139933586121,
      "learning_rate": 9.091412267812313e-06,
      "loss": 0.0345,
      "step": 295780
    },
    {
      "epoch": 0.48408318768288133,
      "grad_norm": 0.631846010684967,
      "learning_rate": 9.091346375598796e-06,
      "loss": 0.0514,
      "step": 295800
    },
    {
      "epoch": 0.48411591812153465,
      "grad_norm": 1.898438811302185,
      "learning_rate": 9.091280483385278e-06,
      "loss": 0.0403,
      "step": 295820
    },
    {
      "epoch": 0.484148648560188,
      "grad_norm": 1.7191892862319946,
      "learning_rate": 9.091214591171762e-06,
      "loss": 0.0277,
      "step": 295840
    },
    {
      "epoch": 0.48418137899884134,
      "grad_norm": 0.284183531999588,
      "learning_rate": 9.091148698958245e-06,
      "loss": 0.0311,
      "step": 295860
    },
    {
      "epoch": 0.48421410943749466,
      "grad_norm": 4.6706719398498535,
      "learning_rate": 9.091082806744727e-06,
      "loss": 0.0455,
      "step": 295880
    },
    {
      "epoch": 0.484246839876148,
      "grad_norm": 3.1876275539398193,
      "learning_rate": 9.09101691453121e-06,
      "loss": 0.0425,
      "step": 295900
    },
    {
      "epoch": 0.48427957031480134,
      "grad_norm": 0.9907189011573792,
      "learning_rate": 9.090951022317693e-06,
      "loss": 0.0451,
      "step": 295920
    },
    {
      "epoch": 0.4843123007534547,
      "grad_norm": 0.5401191711425781,
      "learning_rate": 9.090885130104176e-06,
      "loss": 0.0262,
      "step": 295940
    },
    {
      "epoch": 0.48434503119210803,
      "grad_norm": 0.5197334885597229,
      "learning_rate": 9.090819237890658e-06,
      "loss": 0.0296,
      "step": 295960
    },
    {
      "epoch": 0.48437776163076135,
      "grad_norm": 3.422576904296875,
      "learning_rate": 9.090753345677142e-06,
      "loss": 0.0424,
      "step": 295980
    },
    {
      "epoch": 0.4844104920694147,
      "grad_norm": 1.0591334104537964,
      "learning_rate": 9.090687453463625e-06,
      "loss": 0.0432,
      "step": 296000
    },
    {
      "epoch": 0.48444322250806804,
      "grad_norm": 1.2129098176956177,
      "learning_rate": 9.090621561250107e-06,
      "loss": 0.0473,
      "step": 296020
    },
    {
      "epoch": 0.4844759529467214,
      "grad_norm": 0.23388059437274933,
      "learning_rate": 9.09055566903659e-06,
      "loss": 0.0393,
      "step": 296040
    },
    {
      "epoch": 0.4845086833853747,
      "grad_norm": 0.41942986845970154,
      "learning_rate": 9.090489776823074e-06,
      "loss": 0.0291,
      "step": 296060
    },
    {
      "epoch": 0.48454141382402804,
      "grad_norm": 0.49564969539642334,
      "learning_rate": 9.090423884609556e-06,
      "loss": 0.035,
      "step": 296080
    },
    {
      "epoch": 0.4845741442626814,
      "grad_norm": 1.1474758386611938,
      "learning_rate": 9.09035799239604e-06,
      "loss": 0.0413,
      "step": 296100
    },
    {
      "epoch": 0.48460687470133473,
      "grad_norm": 1.6178075075149536,
      "learning_rate": 9.090292100182522e-06,
      "loss": 0.0388,
      "step": 296120
    },
    {
      "epoch": 0.4846396051399881,
      "grad_norm": 0.4994572103023529,
      "learning_rate": 9.090226207969005e-06,
      "loss": 0.0351,
      "step": 296140
    },
    {
      "epoch": 0.4846723355786414,
      "grad_norm": 1.205075740814209,
      "learning_rate": 9.090160315755487e-06,
      "loss": 0.042,
      "step": 296160
    },
    {
      "epoch": 0.48470506601729474,
      "grad_norm": 0.8866998553276062,
      "learning_rate": 9.090094423541971e-06,
      "loss": 0.0485,
      "step": 296180
    },
    {
      "epoch": 0.4847377964559481,
      "grad_norm": 2.2177646160125732,
      "learning_rate": 9.090028531328454e-06,
      "loss": 0.0386,
      "step": 296200
    },
    {
      "epoch": 0.48477052689460143,
      "grad_norm": 1.6243252754211426,
      "learning_rate": 9.089962639114936e-06,
      "loss": 0.0437,
      "step": 296220
    },
    {
      "epoch": 0.4848032573332548,
      "grad_norm": 1.193677544593811,
      "learning_rate": 9.08989674690142e-06,
      "loss": 0.031,
      "step": 296240
    },
    {
      "epoch": 0.4848359877719081,
      "grad_norm": 0.6273325681686401,
      "learning_rate": 9.089830854687902e-06,
      "loss": 0.0392,
      "step": 296260
    },
    {
      "epoch": 0.48486871821056143,
      "grad_norm": 0.25642168521881104,
      "learning_rate": 9.089764962474385e-06,
      "loss": 0.0446,
      "step": 296280
    },
    {
      "epoch": 0.4849014486492148,
      "grad_norm": 0.6340298056602478,
      "learning_rate": 9.089699070260867e-06,
      "loss": 0.0343,
      "step": 296300
    },
    {
      "epoch": 0.4849341790878681,
      "grad_norm": 0.675931453704834,
      "learning_rate": 9.089633178047351e-06,
      "loss": 0.0383,
      "step": 296320
    },
    {
      "epoch": 0.4849669095265215,
      "grad_norm": 3.5223257541656494,
      "learning_rate": 9.089567285833833e-06,
      "loss": 0.0447,
      "step": 296340
    },
    {
      "epoch": 0.4849996399651748,
      "grad_norm": 1.7802796363830566,
      "learning_rate": 9.089501393620316e-06,
      "loss": 0.0518,
      "step": 296360
    },
    {
      "epoch": 0.48503237040382813,
      "grad_norm": 1.586646556854248,
      "learning_rate": 9.0894355014068e-06,
      "loss": 0.0539,
      "step": 296380
    },
    {
      "epoch": 0.4850651008424815,
      "grad_norm": 0.8965623378753662,
      "learning_rate": 9.089369609193282e-06,
      "loss": 0.0533,
      "step": 296400
    },
    {
      "epoch": 0.4850978312811348,
      "grad_norm": 1.9408087730407715,
      "learning_rate": 9.089303716979765e-06,
      "loss": 0.0369,
      "step": 296420
    },
    {
      "epoch": 0.4851305617197882,
      "grad_norm": 1.4318078756332397,
      "learning_rate": 9.089237824766249e-06,
      "loss": 0.0366,
      "step": 296440
    },
    {
      "epoch": 0.4851632921584415,
      "grad_norm": 0.9115373492240906,
      "learning_rate": 9.089171932552731e-06,
      "loss": 0.0303,
      "step": 296460
    },
    {
      "epoch": 0.4851960225970948,
      "grad_norm": 3.270358085632324,
      "learning_rate": 9.089106040339214e-06,
      "loss": 0.0423,
      "step": 296480
    },
    {
      "epoch": 0.4852287530357482,
      "grad_norm": 2.7207601070404053,
      "learning_rate": 9.089040148125696e-06,
      "loss": 0.0335,
      "step": 296500
    },
    {
      "epoch": 0.4852614834744015,
      "grad_norm": 3.9557244777679443,
      "learning_rate": 9.08897425591218e-06,
      "loss": 0.0462,
      "step": 296520
    },
    {
      "epoch": 0.4852942139130549,
      "grad_norm": 0.6010644435882568,
      "learning_rate": 9.088908363698662e-06,
      "loss": 0.0358,
      "step": 296540
    },
    {
      "epoch": 0.4853269443517082,
      "grad_norm": 1.301953911781311,
      "learning_rate": 9.088842471485145e-06,
      "loss": 0.0328,
      "step": 296560
    },
    {
      "epoch": 0.4853596747903615,
      "grad_norm": 2.855872631072998,
      "learning_rate": 9.088776579271629e-06,
      "loss": 0.0514,
      "step": 296580
    },
    {
      "epoch": 0.4853924052290149,
      "grad_norm": 0.20930169522762299,
      "learning_rate": 9.088710687058111e-06,
      "loss": 0.0401,
      "step": 296600
    },
    {
      "epoch": 0.4854251356676682,
      "grad_norm": 0.37928441166877747,
      "learning_rate": 9.088644794844595e-06,
      "loss": 0.0576,
      "step": 296620
    },
    {
      "epoch": 0.4854578661063216,
      "grad_norm": 0.9917553663253784,
      "learning_rate": 9.088578902631076e-06,
      "loss": 0.0502,
      "step": 296640
    },
    {
      "epoch": 0.4854905965449749,
      "grad_norm": 4.405543327331543,
      "learning_rate": 9.08851301041756e-06,
      "loss": 0.0398,
      "step": 296660
    },
    {
      "epoch": 0.4855233269836282,
      "grad_norm": 0.37161117792129517,
      "learning_rate": 9.088447118204042e-06,
      "loss": 0.035,
      "step": 296680
    },
    {
      "epoch": 0.4855560574222816,
      "grad_norm": 1.4337365627288818,
      "learning_rate": 9.088381225990525e-06,
      "loss": 0.0354,
      "step": 296700
    },
    {
      "epoch": 0.4855887878609349,
      "grad_norm": 2.0438930988311768,
      "learning_rate": 9.088315333777007e-06,
      "loss": 0.0363,
      "step": 296720
    },
    {
      "epoch": 0.4856215182995883,
      "grad_norm": 0.9753355979919434,
      "learning_rate": 9.088249441563491e-06,
      "loss": 0.0306,
      "step": 296740
    },
    {
      "epoch": 0.4856542487382416,
      "grad_norm": 1.3878333568572998,
      "learning_rate": 9.088183549349975e-06,
      "loss": 0.0331,
      "step": 296760
    },
    {
      "epoch": 0.4856869791768949,
      "grad_norm": 1.6414211988449097,
      "learning_rate": 9.088117657136456e-06,
      "loss": 0.0386,
      "step": 296780
    },
    {
      "epoch": 0.4857197096155483,
      "grad_norm": 1.2955015897750854,
      "learning_rate": 9.08805176492294e-06,
      "loss": 0.0498,
      "step": 296800
    },
    {
      "epoch": 0.4857524400542016,
      "grad_norm": 0.9996711611747742,
      "learning_rate": 9.087985872709424e-06,
      "loss": 0.0374,
      "step": 296820
    },
    {
      "epoch": 0.48578517049285497,
      "grad_norm": 0.7455025911331177,
      "learning_rate": 9.087919980495906e-06,
      "loss": 0.0486,
      "step": 296840
    },
    {
      "epoch": 0.4858179009315083,
      "grad_norm": 0.26287731528282166,
      "learning_rate": 9.087854088282389e-06,
      "loss": 0.0443,
      "step": 296860
    },
    {
      "epoch": 0.4858506313701616,
      "grad_norm": 1.4978916645050049,
      "learning_rate": 9.087788196068871e-06,
      "loss": 0.0446,
      "step": 296880
    },
    {
      "epoch": 0.485883361808815,
      "grad_norm": 1.8477157354354858,
      "learning_rate": 9.087722303855355e-06,
      "loss": 0.0433,
      "step": 296900
    },
    {
      "epoch": 0.4859160922474683,
      "grad_norm": 0.7273598313331604,
      "learning_rate": 9.087656411641838e-06,
      "loss": 0.0376,
      "step": 296920
    },
    {
      "epoch": 0.48594882268612166,
      "grad_norm": 5.371476173400879,
      "learning_rate": 9.08759051942832e-06,
      "loss": 0.0605,
      "step": 296940
    },
    {
      "epoch": 0.485981553124775,
      "grad_norm": 1.56083345413208,
      "learning_rate": 9.087524627214804e-06,
      "loss": 0.0512,
      "step": 296960
    },
    {
      "epoch": 0.4860142835634283,
      "grad_norm": 2.076232433319092,
      "learning_rate": 9.087458735001286e-06,
      "loss": 0.045,
      "step": 296980
    },
    {
      "epoch": 0.48604701400208167,
      "grad_norm": 0.9244659543037415,
      "learning_rate": 9.087392842787769e-06,
      "loss": 0.0353,
      "step": 297000
    },
    {
      "epoch": 0.486079744440735,
      "grad_norm": 2.891630172729492,
      "learning_rate": 9.087326950574251e-06,
      "loss": 0.0541,
      "step": 297020
    },
    {
      "epoch": 0.48611247487938836,
      "grad_norm": 0.14869405329227448,
      "learning_rate": 9.087261058360735e-06,
      "loss": 0.0265,
      "step": 297040
    },
    {
      "epoch": 0.4861452053180417,
      "grad_norm": 0.5793935656547546,
      "learning_rate": 9.087195166147216e-06,
      "loss": 0.0431,
      "step": 297060
    },
    {
      "epoch": 0.486177935756695,
      "grad_norm": 4.750380516052246,
      "learning_rate": 9.0871292739337e-06,
      "loss": 0.0479,
      "step": 297080
    },
    {
      "epoch": 0.48621066619534836,
      "grad_norm": 0.7236461639404297,
      "learning_rate": 9.087063381720182e-06,
      "loss": 0.0401,
      "step": 297100
    },
    {
      "epoch": 0.4862433966340017,
      "grad_norm": 0.6381039023399353,
      "learning_rate": 9.086997489506666e-06,
      "loss": 0.0596,
      "step": 297120
    },
    {
      "epoch": 0.48627612707265505,
      "grad_norm": 0.5706369876861572,
      "learning_rate": 9.086931597293147e-06,
      "loss": 0.0318,
      "step": 297140
    },
    {
      "epoch": 0.48630885751130837,
      "grad_norm": 1.4862396717071533,
      "learning_rate": 9.086865705079631e-06,
      "loss": 0.0473,
      "step": 297160
    },
    {
      "epoch": 0.4863415879499617,
      "grad_norm": 0.5775795578956604,
      "learning_rate": 9.086799812866115e-06,
      "loss": 0.0428,
      "step": 297180
    },
    {
      "epoch": 0.48637431838861506,
      "grad_norm": 0.650761604309082,
      "learning_rate": 9.086733920652597e-06,
      "loss": 0.0415,
      "step": 297200
    },
    {
      "epoch": 0.4864070488272684,
      "grad_norm": 1.975329041481018,
      "learning_rate": 9.08666802843908e-06,
      "loss": 0.0445,
      "step": 297220
    },
    {
      "epoch": 0.48643977926592175,
      "grad_norm": 0.7175801992416382,
      "learning_rate": 9.086602136225564e-06,
      "loss": 0.0405,
      "step": 297240
    },
    {
      "epoch": 0.48647250970457506,
      "grad_norm": 1.2651976346969604,
      "learning_rate": 9.086536244012046e-06,
      "loss": 0.0322,
      "step": 297260
    },
    {
      "epoch": 0.4865052401432284,
      "grad_norm": 0.7000679969787598,
      "learning_rate": 9.08647035179853e-06,
      "loss": 0.0387,
      "step": 297280
    },
    {
      "epoch": 0.48653797058188175,
      "grad_norm": 3.7109460830688477,
      "learning_rate": 9.086404459585013e-06,
      "loss": 0.048,
      "step": 297300
    },
    {
      "epoch": 0.48657070102053507,
      "grad_norm": 1.8205711841583252,
      "learning_rate": 9.086338567371495e-06,
      "loss": 0.0438,
      "step": 297320
    },
    {
      "epoch": 0.48660343145918844,
      "grad_norm": 1.224490761756897,
      "learning_rate": 9.086272675157978e-06,
      "loss": 0.0407,
      "step": 297340
    },
    {
      "epoch": 0.48663616189784176,
      "grad_norm": 0.6536194682121277,
      "learning_rate": 9.08620678294446e-06,
      "loss": 0.0365,
      "step": 297360
    },
    {
      "epoch": 0.4866688923364951,
      "grad_norm": 0.3828968107700348,
      "learning_rate": 9.086140890730944e-06,
      "loss": 0.0493,
      "step": 297380
    },
    {
      "epoch": 0.48670162277514845,
      "grad_norm": 1.2802380323410034,
      "learning_rate": 9.086074998517426e-06,
      "loss": 0.0398,
      "step": 297400
    },
    {
      "epoch": 0.48673435321380176,
      "grad_norm": 1.2428911924362183,
      "learning_rate": 9.08600910630391e-06,
      "loss": 0.0385,
      "step": 297420
    },
    {
      "epoch": 0.48676708365245513,
      "grad_norm": 2.491419792175293,
      "learning_rate": 9.085943214090391e-06,
      "loss": 0.0428,
      "step": 297440
    },
    {
      "epoch": 0.48679981409110845,
      "grad_norm": 1.1490533351898193,
      "learning_rate": 9.085877321876875e-06,
      "loss": 0.0367,
      "step": 297460
    },
    {
      "epoch": 0.48683254452976177,
      "grad_norm": 3.4292752742767334,
      "learning_rate": 9.085811429663357e-06,
      "loss": 0.0395,
      "step": 297480
    },
    {
      "epoch": 0.48686527496841514,
      "grad_norm": 1.1940696239471436,
      "learning_rate": 9.08574553744984e-06,
      "loss": 0.0247,
      "step": 297500
    },
    {
      "epoch": 0.48689800540706846,
      "grad_norm": 2.2375049591064453,
      "learning_rate": 9.085679645236322e-06,
      "loss": 0.0318,
      "step": 297520
    },
    {
      "epoch": 0.48693073584572183,
      "grad_norm": 1.012105941772461,
      "learning_rate": 9.085613753022806e-06,
      "loss": 0.0243,
      "step": 297540
    },
    {
      "epoch": 0.48696346628437515,
      "grad_norm": 2.3081142902374268,
      "learning_rate": 9.08554786080929e-06,
      "loss": 0.0376,
      "step": 297560
    },
    {
      "epoch": 0.48699619672302846,
      "grad_norm": 3.5898945331573486,
      "learning_rate": 9.085481968595771e-06,
      "loss": 0.0485,
      "step": 297580
    },
    {
      "epoch": 0.48702892716168183,
      "grad_norm": 2.945812463760376,
      "learning_rate": 9.085416076382255e-06,
      "loss": 0.0358,
      "step": 297600
    },
    {
      "epoch": 0.48706165760033515,
      "grad_norm": 1.2076719999313354,
      "learning_rate": 9.085350184168738e-06,
      "loss": 0.0381,
      "step": 297620
    },
    {
      "epoch": 0.4870943880389885,
      "grad_norm": 1.8692827224731445,
      "learning_rate": 9.08528429195522e-06,
      "loss": 0.0507,
      "step": 297640
    },
    {
      "epoch": 0.48712711847764184,
      "grad_norm": 1.279566764831543,
      "learning_rate": 9.085218399741704e-06,
      "loss": 0.0396,
      "step": 297660
    },
    {
      "epoch": 0.48715984891629516,
      "grad_norm": 1.3865742683410645,
      "learning_rate": 9.085152507528187e-06,
      "loss": 0.0446,
      "step": 297680
    },
    {
      "epoch": 0.48719257935494853,
      "grad_norm": 0.4592648148536682,
      "learning_rate": 9.08508661531467e-06,
      "loss": 0.0368,
      "step": 297700
    },
    {
      "epoch": 0.48722530979360185,
      "grad_norm": 2.609896421432495,
      "learning_rate": 9.085020723101153e-06,
      "loss": 0.0386,
      "step": 297720
    },
    {
      "epoch": 0.4872580402322552,
      "grad_norm": 0.9519140124320984,
      "learning_rate": 9.084954830887635e-06,
      "loss": 0.0414,
      "step": 297740
    },
    {
      "epoch": 0.48729077067090854,
      "grad_norm": 0.9172130227088928,
      "learning_rate": 9.084888938674118e-06,
      "loss": 0.0318,
      "step": 297760
    },
    {
      "epoch": 0.48732350110956185,
      "grad_norm": 1.3651384115219116,
      "learning_rate": 9.0848230464606e-06,
      "loss": 0.0347,
      "step": 297780
    },
    {
      "epoch": 0.4873562315482152,
      "grad_norm": 3.0737740993499756,
      "learning_rate": 9.084757154247084e-06,
      "loss": 0.0382,
      "step": 297800
    },
    {
      "epoch": 0.48738896198686854,
      "grad_norm": 2.5794641971588135,
      "learning_rate": 9.084691262033566e-06,
      "loss": 0.0461,
      "step": 297820
    },
    {
      "epoch": 0.4874216924255219,
      "grad_norm": 0.9010353088378906,
      "learning_rate": 9.08462536982005e-06,
      "loss": 0.038,
      "step": 297840
    },
    {
      "epoch": 0.48745442286417523,
      "grad_norm": 1.3382527828216553,
      "learning_rate": 9.084559477606531e-06,
      "loss": 0.0435,
      "step": 297860
    },
    {
      "epoch": 0.48748715330282855,
      "grad_norm": 4.651123046875,
      "learning_rate": 9.084493585393015e-06,
      "loss": 0.0412,
      "step": 297880
    },
    {
      "epoch": 0.4875198837414819,
      "grad_norm": 2.218529462814331,
      "learning_rate": 9.084427693179497e-06,
      "loss": 0.0456,
      "step": 297900
    },
    {
      "epoch": 0.48755261418013524,
      "grad_norm": 0.8226881623268127,
      "learning_rate": 9.08436180096598e-06,
      "loss": 0.0361,
      "step": 297920
    },
    {
      "epoch": 0.4875853446187886,
      "grad_norm": 1.2886433601379395,
      "learning_rate": 9.084295908752462e-06,
      "loss": 0.0437,
      "step": 297940
    },
    {
      "epoch": 0.4876180750574419,
      "grad_norm": 1.766842007637024,
      "learning_rate": 9.084230016538946e-06,
      "loss": 0.0461,
      "step": 297960
    },
    {
      "epoch": 0.48765080549609524,
      "grad_norm": 0.47699204087257385,
      "learning_rate": 9.08416412432543e-06,
      "loss": 0.047,
      "step": 297980
    },
    {
      "epoch": 0.4876835359347486,
      "grad_norm": 3.5036802291870117,
      "learning_rate": 9.084098232111911e-06,
      "loss": 0.0373,
      "step": 298000
    },
    {
      "epoch": 0.48771626637340193,
      "grad_norm": 0.6425483822822571,
      "learning_rate": 9.084032339898395e-06,
      "loss": 0.0339,
      "step": 298020
    },
    {
      "epoch": 0.4877489968120553,
      "grad_norm": 1.4225211143493652,
      "learning_rate": 9.083966447684878e-06,
      "loss": 0.0385,
      "step": 298040
    },
    {
      "epoch": 0.4877817272507086,
      "grad_norm": 0.6987637281417847,
      "learning_rate": 9.08390055547136e-06,
      "loss": 0.0428,
      "step": 298060
    },
    {
      "epoch": 0.48781445768936194,
      "grad_norm": 0.44806358218193054,
      "learning_rate": 9.083834663257844e-06,
      "loss": 0.0552,
      "step": 298080
    },
    {
      "epoch": 0.4878471881280153,
      "grad_norm": 1.5408744812011719,
      "learning_rate": 9.083768771044327e-06,
      "loss": 0.05,
      "step": 298100
    },
    {
      "epoch": 0.4878799185666686,
      "grad_norm": 1.5824494361877441,
      "learning_rate": 9.08370287883081e-06,
      "loss": 0.042,
      "step": 298120
    },
    {
      "epoch": 0.487912649005322,
      "grad_norm": 1.643056869506836,
      "learning_rate": 9.083636986617293e-06,
      "loss": 0.0346,
      "step": 298140
    },
    {
      "epoch": 0.4879453794439753,
      "grad_norm": 0.6838515996932983,
      "learning_rate": 9.083571094403775e-06,
      "loss": 0.0326,
      "step": 298160
    },
    {
      "epoch": 0.48797810988262863,
      "grad_norm": 1.056502342224121,
      "learning_rate": 9.083505202190258e-06,
      "loss": 0.0334,
      "step": 298180
    },
    {
      "epoch": 0.488010840321282,
      "grad_norm": 5.685348033905029,
      "learning_rate": 9.08343930997674e-06,
      "loss": 0.0418,
      "step": 298200
    },
    {
      "epoch": 0.4880435707599353,
      "grad_norm": 1.8365492820739746,
      "learning_rate": 9.083373417763224e-06,
      "loss": 0.0392,
      "step": 298220
    },
    {
      "epoch": 0.4880763011985887,
      "grad_norm": 0.27348989248275757,
      "learning_rate": 9.083307525549706e-06,
      "loss": 0.0525,
      "step": 298240
    },
    {
      "epoch": 0.488109031637242,
      "grad_norm": 1.8896849155426025,
      "learning_rate": 9.08324163333619e-06,
      "loss": 0.0323,
      "step": 298260
    },
    {
      "epoch": 0.4881417620758953,
      "grad_norm": 1.2726311683654785,
      "learning_rate": 9.083175741122671e-06,
      "loss": 0.039,
      "step": 298280
    },
    {
      "epoch": 0.4881744925145487,
      "grad_norm": 2.688494920730591,
      "learning_rate": 9.083109848909155e-06,
      "loss": 0.0422,
      "step": 298300
    },
    {
      "epoch": 0.488207222953202,
      "grad_norm": 1.6703591346740723,
      "learning_rate": 9.083043956695638e-06,
      "loss": 0.0377,
      "step": 298320
    },
    {
      "epoch": 0.48823995339185533,
      "grad_norm": 0.9081196784973145,
      "learning_rate": 9.08297806448212e-06,
      "loss": 0.0433,
      "step": 298340
    },
    {
      "epoch": 0.4882726838305087,
      "grad_norm": 2.3354289531707764,
      "learning_rate": 9.082912172268604e-06,
      "loss": 0.0438,
      "step": 298360
    },
    {
      "epoch": 0.488305414269162,
      "grad_norm": 1.4384123086929321,
      "learning_rate": 9.082846280055086e-06,
      "loss": 0.0405,
      "step": 298380
    },
    {
      "epoch": 0.4883381447078154,
      "grad_norm": 0.389863520860672,
      "learning_rate": 9.08278038784157e-06,
      "loss": 0.0417,
      "step": 298400
    },
    {
      "epoch": 0.4883708751464687,
      "grad_norm": 1.532284140586853,
      "learning_rate": 9.082714495628053e-06,
      "loss": 0.0487,
      "step": 298420
    },
    {
      "epoch": 0.488403605585122,
      "grad_norm": 0.8016870021820068,
      "learning_rate": 9.082648603414535e-06,
      "loss": 0.0401,
      "step": 298440
    },
    {
      "epoch": 0.4884363360237754,
      "grad_norm": 2.2466185092926025,
      "learning_rate": 9.082582711201018e-06,
      "loss": 0.042,
      "step": 298460
    },
    {
      "epoch": 0.4884690664624287,
      "grad_norm": 1.8413890600204468,
      "learning_rate": 9.082516818987502e-06,
      "loss": 0.0385,
      "step": 298480
    },
    {
      "epoch": 0.4885017969010821,
      "grad_norm": 1.1580744981765747,
      "learning_rate": 9.082450926773984e-06,
      "loss": 0.0428,
      "step": 298500
    },
    {
      "epoch": 0.4885345273397354,
      "grad_norm": 0.6225228309631348,
      "learning_rate": 9.082385034560468e-06,
      "loss": 0.0449,
      "step": 298520
    },
    {
      "epoch": 0.4885672577783887,
      "grad_norm": 6.722506999969482,
      "learning_rate": 9.08231914234695e-06,
      "loss": 0.0452,
      "step": 298540
    },
    {
      "epoch": 0.4885999882170421,
      "grad_norm": 3.1512198448181152,
      "learning_rate": 9.082253250133433e-06,
      "loss": 0.0452,
      "step": 298560
    },
    {
      "epoch": 0.4886327186556954,
      "grad_norm": 0.261700838804245,
      "learning_rate": 9.082187357919915e-06,
      "loss": 0.0356,
      "step": 298580
    },
    {
      "epoch": 0.4886654490943488,
      "grad_norm": 1.895126461982727,
      "learning_rate": 9.082121465706398e-06,
      "loss": 0.0389,
      "step": 298600
    },
    {
      "epoch": 0.4886981795330021,
      "grad_norm": 1.2544865608215332,
      "learning_rate": 9.08205557349288e-06,
      "loss": 0.044,
      "step": 298620
    },
    {
      "epoch": 0.4887309099716554,
      "grad_norm": 0.7141340374946594,
      "learning_rate": 9.081989681279364e-06,
      "loss": 0.0418,
      "step": 298640
    },
    {
      "epoch": 0.4887636404103088,
      "grad_norm": 1.7150039672851562,
      "learning_rate": 9.081923789065848e-06,
      "loss": 0.0403,
      "step": 298660
    },
    {
      "epoch": 0.4887963708489621,
      "grad_norm": 4.716135025024414,
      "learning_rate": 9.08185789685233e-06,
      "loss": 0.0431,
      "step": 298680
    },
    {
      "epoch": 0.4888291012876155,
      "grad_norm": 1.0282564163208008,
      "learning_rate": 9.081792004638813e-06,
      "loss": 0.0307,
      "step": 298700
    },
    {
      "epoch": 0.4888618317262688,
      "grad_norm": 1.0840164422988892,
      "learning_rate": 9.081726112425295e-06,
      "loss": 0.0377,
      "step": 298720
    },
    {
      "epoch": 0.4888945621649221,
      "grad_norm": 1.772995114326477,
      "learning_rate": 9.081660220211778e-06,
      "loss": 0.0414,
      "step": 298740
    },
    {
      "epoch": 0.4889272926035755,
      "grad_norm": 1.3198500871658325,
      "learning_rate": 9.08159432799826e-06,
      "loss": 0.0432,
      "step": 298760
    },
    {
      "epoch": 0.4889600230422288,
      "grad_norm": 0.7423235774040222,
      "learning_rate": 9.081528435784744e-06,
      "loss": 0.0248,
      "step": 298780
    },
    {
      "epoch": 0.48899275348088217,
      "grad_norm": 2.6398181915283203,
      "learning_rate": 9.081462543571226e-06,
      "loss": 0.033,
      "step": 298800
    },
    {
      "epoch": 0.4890254839195355,
      "grad_norm": 1.4824482202529907,
      "learning_rate": 9.08139665135771e-06,
      "loss": 0.0378,
      "step": 298820
    },
    {
      "epoch": 0.4890582143581888,
      "grad_norm": 2.748950481414795,
      "learning_rate": 9.081330759144193e-06,
      "loss": 0.0393,
      "step": 298840
    },
    {
      "epoch": 0.4890909447968422,
      "grad_norm": 0.9887334704399109,
      "learning_rate": 9.081264866930677e-06,
      "loss": 0.0262,
      "step": 298860
    },
    {
      "epoch": 0.4891236752354955,
      "grad_norm": 0.6626264452934265,
      "learning_rate": 9.081198974717159e-06,
      "loss": 0.0406,
      "step": 298880
    },
    {
      "epoch": 0.48915640567414886,
      "grad_norm": 3.832764148712158,
      "learning_rate": 9.081133082503642e-06,
      "loss": 0.0419,
      "step": 298900
    },
    {
      "epoch": 0.4891891361128022,
      "grad_norm": 4.398448944091797,
      "learning_rate": 9.081067190290124e-06,
      "loss": 0.0442,
      "step": 298920
    },
    {
      "epoch": 0.4892218665514555,
      "grad_norm": 2.098123788833618,
      "learning_rate": 9.081001298076608e-06,
      "loss": 0.0479,
      "step": 298940
    },
    {
      "epoch": 0.48925459699010887,
      "grad_norm": 0.49342185258865356,
      "learning_rate": 9.08093540586309e-06,
      "loss": 0.0375,
      "step": 298960
    },
    {
      "epoch": 0.4892873274287622,
      "grad_norm": 0.9700933694839478,
      "learning_rate": 9.080869513649573e-06,
      "loss": 0.0381,
      "step": 298980
    },
    {
      "epoch": 0.48932005786741556,
      "grad_norm": 2.196977138519287,
      "learning_rate": 9.080803621436055e-06,
      "loss": 0.0428,
      "step": 299000
    },
    {
      "epoch": 0.4893527883060689,
      "grad_norm": 3.372771739959717,
      "learning_rate": 9.080737729222539e-06,
      "loss": 0.046,
      "step": 299020
    },
    {
      "epoch": 0.4893855187447222,
      "grad_norm": 1.4911798238754272,
      "learning_rate": 9.080671837009022e-06,
      "loss": 0.0342,
      "step": 299040
    },
    {
      "epoch": 0.48941824918337556,
      "grad_norm": 0.9975934624671936,
      "learning_rate": 9.080605944795504e-06,
      "loss": 0.0312,
      "step": 299060
    },
    {
      "epoch": 0.4894509796220289,
      "grad_norm": 2.2239253520965576,
      "learning_rate": 9.080540052581988e-06,
      "loss": 0.0401,
      "step": 299080
    },
    {
      "epoch": 0.48948371006068225,
      "grad_norm": 2.320770740509033,
      "learning_rate": 9.08047416036847e-06,
      "loss": 0.0404,
      "step": 299100
    },
    {
      "epoch": 0.48951644049933557,
      "grad_norm": 0.9828121066093445,
      "learning_rate": 9.080408268154953e-06,
      "loss": 0.0435,
      "step": 299120
    },
    {
      "epoch": 0.4895491709379889,
      "grad_norm": 4.059603691101074,
      "learning_rate": 9.080342375941435e-06,
      "loss": 0.0472,
      "step": 299140
    },
    {
      "epoch": 0.48958190137664226,
      "grad_norm": 0.418638676404953,
      "learning_rate": 9.080276483727919e-06,
      "loss": 0.0474,
      "step": 299160
    },
    {
      "epoch": 0.4896146318152956,
      "grad_norm": 1.1267049312591553,
      "learning_rate": 9.0802105915144e-06,
      "loss": 0.0525,
      "step": 299180
    },
    {
      "epoch": 0.48964736225394895,
      "grad_norm": 0.9337425231933594,
      "learning_rate": 9.080144699300884e-06,
      "loss": 0.0438,
      "step": 299200
    },
    {
      "epoch": 0.48968009269260226,
      "grad_norm": 0.3609614074230194,
      "learning_rate": 9.080078807087368e-06,
      "loss": 0.0374,
      "step": 299220
    },
    {
      "epoch": 0.4897128231312556,
      "grad_norm": 1.9775680303573608,
      "learning_rate": 9.08001291487385e-06,
      "loss": 0.0394,
      "step": 299240
    },
    {
      "epoch": 0.48974555356990895,
      "grad_norm": 0.854874849319458,
      "learning_rate": 9.079947022660333e-06,
      "loss": 0.0386,
      "step": 299260
    },
    {
      "epoch": 0.48977828400856227,
      "grad_norm": 1.1315001249313354,
      "learning_rate": 9.079881130446817e-06,
      "loss": 0.0301,
      "step": 299280
    },
    {
      "epoch": 0.48981101444721564,
      "grad_norm": 1.3827335834503174,
      "learning_rate": 9.079815238233299e-06,
      "loss": 0.0299,
      "step": 299300
    },
    {
      "epoch": 0.48984374488586896,
      "grad_norm": 2.227648973464966,
      "learning_rate": 9.079749346019782e-06,
      "loss": 0.0352,
      "step": 299320
    },
    {
      "epoch": 0.4898764753245223,
      "grad_norm": 0.8376485705375671,
      "learning_rate": 9.079683453806264e-06,
      "loss": 0.0415,
      "step": 299340
    },
    {
      "epoch": 0.48990920576317565,
      "grad_norm": 1.5604472160339355,
      "learning_rate": 9.079617561592748e-06,
      "loss": 0.0461,
      "step": 299360
    },
    {
      "epoch": 0.48994193620182896,
      "grad_norm": 0.5309671759605408,
      "learning_rate": 9.079551669379231e-06,
      "loss": 0.0472,
      "step": 299380
    },
    {
      "epoch": 0.48997466664048234,
      "grad_norm": 1.3895716667175293,
      "learning_rate": 9.079485777165713e-06,
      "loss": 0.043,
      "step": 299400
    },
    {
      "epoch": 0.49000739707913565,
      "grad_norm": 1.5424896478652954,
      "learning_rate": 9.079419884952197e-06,
      "loss": 0.0368,
      "step": 299420
    },
    {
      "epoch": 0.49004012751778897,
      "grad_norm": 1.2248997688293457,
      "learning_rate": 9.079353992738679e-06,
      "loss": 0.0541,
      "step": 299440
    },
    {
      "epoch": 0.49007285795644234,
      "grad_norm": 0.5882405042648315,
      "learning_rate": 9.079288100525162e-06,
      "loss": 0.0451,
      "step": 299460
    },
    {
      "epoch": 0.49010558839509566,
      "grad_norm": 3.6773269176483154,
      "learning_rate": 9.079222208311644e-06,
      "loss": 0.0458,
      "step": 299480
    },
    {
      "epoch": 0.49013831883374903,
      "grad_norm": 4.994022846221924,
      "learning_rate": 9.079156316098128e-06,
      "loss": 0.0497,
      "step": 299500
    },
    {
      "epoch": 0.49017104927240235,
      "grad_norm": 2.567584276199341,
      "learning_rate": 9.07909042388461e-06,
      "loss": 0.0305,
      "step": 299520
    },
    {
      "epoch": 0.49020377971105566,
      "grad_norm": 1.2999200820922852,
      "learning_rate": 9.079024531671093e-06,
      "loss": 0.0524,
      "step": 299540
    },
    {
      "epoch": 0.49023651014970904,
      "grad_norm": 1.0290862321853638,
      "learning_rate": 9.078958639457575e-06,
      "loss": 0.0352,
      "step": 299560
    },
    {
      "epoch": 0.49026924058836235,
      "grad_norm": 1.6766293048858643,
      "learning_rate": 9.078892747244059e-06,
      "loss": 0.0399,
      "step": 299580
    },
    {
      "epoch": 0.4903019710270157,
      "grad_norm": 0.6986256837844849,
      "learning_rate": 9.078826855030542e-06,
      "loss": 0.0416,
      "step": 299600
    },
    {
      "epoch": 0.49033470146566904,
      "grad_norm": 1.0738945007324219,
      "learning_rate": 9.078760962817024e-06,
      "loss": 0.0449,
      "step": 299620
    },
    {
      "epoch": 0.49036743190432236,
      "grad_norm": 1.2444019317626953,
      "learning_rate": 9.078695070603508e-06,
      "loss": 0.041,
      "step": 299640
    },
    {
      "epoch": 0.49040016234297573,
      "grad_norm": 1.3631335496902466,
      "learning_rate": 9.078629178389991e-06,
      "loss": 0.0382,
      "step": 299660
    },
    {
      "epoch": 0.49043289278162905,
      "grad_norm": 1.7412787675857544,
      "learning_rate": 9.078563286176473e-06,
      "loss": 0.0324,
      "step": 299680
    },
    {
      "epoch": 0.4904656232202824,
      "grad_norm": 1.0703184604644775,
      "learning_rate": 9.078497393962957e-06,
      "loss": 0.0523,
      "step": 299700
    },
    {
      "epoch": 0.49049835365893574,
      "grad_norm": 0.4451267719268799,
      "learning_rate": 9.07843150174944e-06,
      "loss": 0.0427,
      "step": 299720
    },
    {
      "epoch": 0.49053108409758905,
      "grad_norm": 0.929598331451416,
      "learning_rate": 9.078365609535922e-06,
      "loss": 0.0385,
      "step": 299740
    },
    {
      "epoch": 0.4905638145362424,
      "grad_norm": 1.323105812072754,
      "learning_rate": 9.078299717322406e-06,
      "loss": 0.0438,
      "step": 299760
    },
    {
      "epoch": 0.49059654497489574,
      "grad_norm": 0.8768003582954407,
      "learning_rate": 9.078233825108888e-06,
      "loss": 0.0414,
      "step": 299780
    },
    {
      "epoch": 0.4906292754135491,
      "grad_norm": 3.0694191455841064,
      "learning_rate": 9.078167932895371e-06,
      "loss": 0.0372,
      "step": 299800
    },
    {
      "epoch": 0.49066200585220243,
      "grad_norm": 2.0363855361938477,
      "learning_rate": 9.078102040681853e-06,
      "loss": 0.0456,
      "step": 299820
    },
    {
      "epoch": 0.49069473629085575,
      "grad_norm": 1.4266419410705566,
      "learning_rate": 9.078036148468337e-06,
      "loss": 0.0381,
      "step": 299840
    },
    {
      "epoch": 0.4907274667295091,
      "grad_norm": 1.80471670627594,
      "learning_rate": 9.077970256254819e-06,
      "loss": 0.0326,
      "step": 299860
    },
    {
      "epoch": 0.49076019716816244,
      "grad_norm": 1.5689467191696167,
      "learning_rate": 9.077904364041302e-06,
      "loss": 0.0343,
      "step": 299880
    },
    {
      "epoch": 0.4907929276068158,
      "grad_norm": 2.590071201324463,
      "learning_rate": 9.077838471827784e-06,
      "loss": 0.0414,
      "step": 299900
    },
    {
      "epoch": 0.4908256580454691,
      "grad_norm": 0.5600570440292358,
      "learning_rate": 9.077772579614268e-06,
      "loss": 0.0497,
      "step": 299920
    },
    {
      "epoch": 0.49085838848412244,
      "grad_norm": 9.040401458740234,
      "learning_rate": 9.07770668740075e-06,
      "loss": 0.0474,
      "step": 299940
    },
    {
      "epoch": 0.4908911189227758,
      "grad_norm": 3.664637565612793,
      "learning_rate": 9.077640795187233e-06,
      "loss": 0.0325,
      "step": 299960
    },
    {
      "epoch": 0.49092384936142913,
      "grad_norm": 0.9282178282737732,
      "learning_rate": 9.077574902973715e-06,
      "loss": 0.0398,
      "step": 299980
    },
    {
      "epoch": 0.4909565798000825,
      "grad_norm": 0.8513465523719788,
      "learning_rate": 9.077509010760199e-06,
      "loss": 0.0362,
      "step": 300000
    },
    {
      "epoch": 0.4909565798000825,
      "eval_loss": 0.02028754912316799,
      "eval_runtime": 6513.4438,
      "eval_samples_per_second": 157.806,
      "eval_steps_per_second": 15.781,
      "eval_sts-dev_pearson_cosine": 0.9510416260745636,
      "eval_sts-dev_spearman_cosine": 0.8749504245778962,
      "step": 300000
    },
    {
      "epoch": 0.4909893102387358,
      "grad_norm": 2.53583025932312,
      "learning_rate": 9.077443118546682e-06,
      "loss": 0.0319,
      "step": 300020
    },
    {
      "epoch": 0.49102204067738914,
      "grad_norm": 2.79226016998291,
      "learning_rate": 9.077377226333164e-06,
      "loss": 0.0451,
      "step": 300040
    },
    {
      "epoch": 0.4910547711160425,
      "grad_norm": 1.6315131187438965,
      "learning_rate": 9.077311334119648e-06,
      "loss": 0.0456,
      "step": 300060
    },
    {
      "epoch": 0.4910875015546958,
      "grad_norm": 3.709214687347412,
      "learning_rate": 9.077245441906131e-06,
      "loss": 0.0427,
      "step": 300080
    },
    {
      "epoch": 0.4911202319933492,
      "grad_norm": 1.0045324563980103,
      "learning_rate": 9.077179549692613e-06,
      "loss": 0.0447,
      "step": 300100
    },
    {
      "epoch": 0.4911529624320025,
      "grad_norm": 2.3116133213043213,
      "learning_rate": 9.077113657479097e-06,
      "loss": 0.0605,
      "step": 300120
    },
    {
      "epoch": 0.49118569287065583,
      "grad_norm": 1.12122642993927,
      "learning_rate": 9.07704776526558e-06,
      "loss": 0.0376,
      "step": 300140
    },
    {
      "epoch": 0.4912184233093092,
      "grad_norm": 1.9222794771194458,
      "learning_rate": 9.076981873052062e-06,
      "loss": 0.049,
      "step": 300160
    },
    {
      "epoch": 0.4912511537479625,
      "grad_norm": 1.0406880378723145,
      "learning_rate": 9.076915980838546e-06,
      "loss": 0.0355,
      "step": 300180
    },
    {
      "epoch": 0.4912838841866159,
      "grad_norm": 1.2545514106750488,
      "learning_rate": 9.076850088625028e-06,
      "loss": 0.0273,
      "step": 300200
    },
    {
      "epoch": 0.4913166146252692,
      "grad_norm": 0.46641993522644043,
      "learning_rate": 9.076784196411511e-06,
      "loss": 0.0275,
      "step": 300220
    },
    {
      "epoch": 0.4913493450639225,
      "grad_norm": 1.0316704511642456,
      "learning_rate": 9.076718304197993e-06,
      "loss": 0.0481,
      "step": 300240
    },
    {
      "epoch": 0.4913820755025759,
      "grad_norm": 2.6144981384277344,
      "learning_rate": 9.076652411984477e-06,
      "loss": 0.0382,
      "step": 300260
    },
    {
      "epoch": 0.4914148059412292,
      "grad_norm": 0.9580233097076416,
      "learning_rate": 9.076586519770959e-06,
      "loss": 0.0522,
      "step": 300280
    },
    {
      "epoch": 0.4914475363798826,
      "grad_norm": 1.9046685695648193,
      "learning_rate": 9.076520627557442e-06,
      "loss": 0.0314,
      "step": 300300
    },
    {
      "epoch": 0.4914802668185359,
      "grad_norm": 2.0845742225646973,
      "learning_rate": 9.076454735343924e-06,
      "loss": 0.0347,
      "step": 300320
    },
    {
      "epoch": 0.4915129972571892,
      "grad_norm": 2.0024197101593018,
      "learning_rate": 9.076388843130408e-06,
      "loss": 0.0484,
      "step": 300340
    },
    {
      "epoch": 0.4915457276958426,
      "grad_norm": 0.46475136280059814,
      "learning_rate": 9.07632295091689e-06,
      "loss": 0.04,
      "step": 300360
    },
    {
      "epoch": 0.4915784581344959,
      "grad_norm": 1.6855229139328003,
      "learning_rate": 9.076257058703373e-06,
      "loss": 0.0408,
      "step": 300380
    },
    {
      "epoch": 0.4916111885731493,
      "grad_norm": 0.9811789989471436,
      "learning_rate": 9.076191166489857e-06,
      "loss": 0.0425,
      "step": 300400
    },
    {
      "epoch": 0.4916439190118026,
      "grad_norm": 1.6044037342071533,
      "learning_rate": 9.076125274276339e-06,
      "loss": 0.028,
      "step": 300420
    },
    {
      "epoch": 0.4916766494504559,
      "grad_norm": 1.0992968082427979,
      "learning_rate": 9.076059382062822e-06,
      "loss": 0.0463,
      "step": 300440
    },
    {
      "epoch": 0.4917093798891093,
      "grad_norm": 1.41958487033844,
      "learning_rate": 9.075993489849306e-06,
      "loss": 0.0376,
      "step": 300460
    },
    {
      "epoch": 0.4917421103277626,
      "grad_norm": 1.2056002616882324,
      "learning_rate": 9.075927597635788e-06,
      "loss": 0.0508,
      "step": 300480
    },
    {
      "epoch": 0.491774840766416,
      "grad_norm": 0.49144247174263,
      "learning_rate": 9.075861705422271e-06,
      "loss": 0.033,
      "step": 300500
    },
    {
      "epoch": 0.4918075712050693,
      "grad_norm": 0.7517480850219727,
      "learning_rate": 9.075795813208755e-06,
      "loss": 0.044,
      "step": 300520
    },
    {
      "epoch": 0.4918403016437226,
      "grad_norm": 3.428948402404785,
      "learning_rate": 9.075729920995237e-06,
      "loss": 0.0382,
      "step": 300540
    },
    {
      "epoch": 0.491873032082376,
      "grad_norm": 0.9156874418258667,
      "learning_rate": 9.07566402878172e-06,
      "loss": 0.0302,
      "step": 300560
    },
    {
      "epoch": 0.4919057625210293,
      "grad_norm": 1.0105366706848145,
      "learning_rate": 9.075598136568202e-06,
      "loss": 0.0359,
      "step": 300580
    },
    {
      "epoch": 0.49193849295968267,
      "grad_norm": 0.8375393152236938,
      "learning_rate": 9.075532244354686e-06,
      "loss": 0.0484,
      "step": 300600
    },
    {
      "epoch": 0.491971223398336,
      "grad_norm": 1.8250688314437866,
      "learning_rate": 9.075466352141168e-06,
      "loss": 0.0305,
      "step": 300620
    },
    {
      "epoch": 0.4920039538369893,
      "grad_norm": 1.402354121208191,
      "learning_rate": 9.075400459927651e-06,
      "loss": 0.0343,
      "step": 300640
    },
    {
      "epoch": 0.4920366842756427,
      "grad_norm": 0.8544846773147583,
      "learning_rate": 9.075334567714133e-06,
      "loss": 0.0407,
      "step": 300660
    },
    {
      "epoch": 0.492069414714296,
      "grad_norm": 1.0196256637573242,
      "learning_rate": 9.075268675500617e-06,
      "loss": 0.0458,
      "step": 300680
    },
    {
      "epoch": 0.49210214515294937,
      "grad_norm": 0.9064697027206421,
      "learning_rate": 9.075202783287099e-06,
      "loss": 0.0364,
      "step": 300700
    },
    {
      "epoch": 0.4921348755916027,
      "grad_norm": 1.329199194908142,
      "learning_rate": 9.075136891073582e-06,
      "loss": 0.0417,
      "step": 300720
    },
    {
      "epoch": 0.492167606030256,
      "grad_norm": 12.270214080810547,
      "learning_rate": 9.075070998860064e-06,
      "loss": 0.0418,
      "step": 300740
    },
    {
      "epoch": 0.49220033646890937,
      "grad_norm": 1.0242685079574585,
      "learning_rate": 9.075005106646548e-06,
      "loss": 0.038,
      "step": 300760
    },
    {
      "epoch": 0.4922330669075627,
      "grad_norm": 3.0403473377227783,
      "learning_rate": 9.074939214433031e-06,
      "loss": 0.0387,
      "step": 300780
    },
    {
      "epoch": 0.49226579734621606,
      "grad_norm": 1.8880478143692017,
      "learning_rate": 9.074873322219513e-06,
      "loss": 0.0319,
      "step": 300800
    },
    {
      "epoch": 0.4922985277848694,
      "grad_norm": 0.2866111695766449,
      "learning_rate": 9.074807430005997e-06,
      "loss": 0.0357,
      "step": 300820
    },
    {
      "epoch": 0.4923312582235227,
      "grad_norm": 1.1300958395004272,
      "learning_rate": 9.074741537792479e-06,
      "loss": 0.0328,
      "step": 300840
    },
    {
      "epoch": 0.49236398866217607,
      "grad_norm": 18.38517951965332,
      "learning_rate": 9.074675645578962e-06,
      "loss": 0.0451,
      "step": 300860
    },
    {
      "epoch": 0.4923967191008294,
      "grad_norm": 6.564613342285156,
      "learning_rate": 9.074609753365446e-06,
      "loss": 0.0336,
      "step": 300880
    },
    {
      "epoch": 0.49242944953948276,
      "grad_norm": 2.5935544967651367,
      "learning_rate": 9.074543861151928e-06,
      "loss": 0.0394,
      "step": 300900
    },
    {
      "epoch": 0.49246217997813607,
      "grad_norm": 1.2417243719100952,
      "learning_rate": 9.074477968938412e-06,
      "loss": 0.0557,
      "step": 300920
    },
    {
      "epoch": 0.4924949104167894,
      "grad_norm": 1.438803791999817,
      "learning_rate": 9.074412076724895e-06,
      "loss": 0.0527,
      "step": 300940
    },
    {
      "epoch": 0.49252764085544276,
      "grad_norm": 1.2741764783859253,
      "learning_rate": 9.074346184511377e-06,
      "loss": 0.0455,
      "step": 300960
    },
    {
      "epoch": 0.4925603712940961,
      "grad_norm": 3.014026641845703,
      "learning_rate": 9.07428029229786e-06,
      "loss": 0.0488,
      "step": 300980
    },
    {
      "epoch": 0.49259310173274945,
      "grad_norm": 0.7887553572654724,
      "learning_rate": 9.074214400084342e-06,
      "loss": 0.0289,
      "step": 301000
    },
    {
      "epoch": 0.49262583217140277,
      "grad_norm": 0.4073992371559143,
      "learning_rate": 9.074148507870826e-06,
      "loss": 0.0324,
      "step": 301020
    },
    {
      "epoch": 0.4926585626100561,
      "grad_norm": 0.3117491602897644,
      "learning_rate": 9.074082615657308e-06,
      "loss": 0.0434,
      "step": 301040
    },
    {
      "epoch": 0.49269129304870946,
      "grad_norm": 1.6681492328643799,
      "learning_rate": 9.074016723443792e-06,
      "loss": 0.0359,
      "step": 301060
    },
    {
      "epoch": 0.49272402348736277,
      "grad_norm": 2.0304322242736816,
      "learning_rate": 9.073950831230273e-06,
      "loss": 0.0321,
      "step": 301080
    },
    {
      "epoch": 0.4927567539260161,
      "grad_norm": 1.7602007389068604,
      "learning_rate": 9.073884939016757e-06,
      "loss": 0.04,
      "step": 301100
    },
    {
      "epoch": 0.49278948436466946,
      "grad_norm": 1.495846152305603,
      "learning_rate": 9.07381904680324e-06,
      "loss": 0.0486,
      "step": 301120
    },
    {
      "epoch": 0.4928222148033228,
      "grad_norm": 2.6495532989501953,
      "learning_rate": 9.073753154589723e-06,
      "loss": 0.0453,
      "step": 301140
    },
    {
      "epoch": 0.49285494524197615,
      "grad_norm": 2.3736634254455566,
      "learning_rate": 9.073687262376206e-06,
      "loss": 0.0446,
      "step": 301160
    },
    {
      "epoch": 0.49288767568062947,
      "grad_norm": 1.2873082160949707,
      "learning_rate": 9.073621370162688e-06,
      "loss": 0.0603,
      "step": 301180
    },
    {
      "epoch": 0.4929204061192828,
      "grad_norm": 1.2612942457199097,
      "learning_rate": 9.073555477949172e-06,
      "loss": 0.0398,
      "step": 301200
    },
    {
      "epoch": 0.49295313655793616,
      "grad_norm": 2.7130749225616455,
      "learning_rate": 9.073489585735653e-06,
      "loss": 0.0435,
      "step": 301220
    },
    {
      "epoch": 0.4929858669965895,
      "grad_norm": 3.981332540512085,
      "learning_rate": 9.073423693522137e-06,
      "loss": 0.0416,
      "step": 301240
    },
    {
      "epoch": 0.49301859743524284,
      "grad_norm": 14.21507740020752,
      "learning_rate": 9.07335780130862e-06,
      "loss": 0.0325,
      "step": 301260
    },
    {
      "epoch": 0.49305132787389616,
      "grad_norm": 1.1029622554779053,
      "learning_rate": 9.073291909095103e-06,
      "loss": 0.0433,
      "step": 301280
    },
    {
      "epoch": 0.4930840583125495,
      "grad_norm": 2.880936622619629,
      "learning_rate": 9.073226016881586e-06,
      "loss": 0.0519,
      "step": 301300
    },
    {
      "epoch": 0.49311678875120285,
      "grad_norm": 1.337031602859497,
      "learning_rate": 9.07316012466807e-06,
      "loss": 0.0488,
      "step": 301320
    },
    {
      "epoch": 0.49314951918985617,
      "grad_norm": 0.496368408203125,
      "learning_rate": 9.073094232454552e-06,
      "loss": 0.0474,
      "step": 301340
    },
    {
      "epoch": 0.49318224962850954,
      "grad_norm": 3.061458110809326,
      "learning_rate": 9.073028340241035e-06,
      "loss": 0.0454,
      "step": 301360
    },
    {
      "epoch": 0.49321498006716286,
      "grad_norm": 1.2590794563293457,
      "learning_rate": 9.072962448027517e-06,
      "loss": 0.0419,
      "step": 301380
    },
    {
      "epoch": 0.4932477105058162,
      "grad_norm": 3.1755969524383545,
      "learning_rate": 9.072896555814e-06,
      "loss": 0.0317,
      "step": 301400
    },
    {
      "epoch": 0.49328044094446954,
      "grad_norm": 0.6627570390701294,
      "learning_rate": 9.072830663600483e-06,
      "loss": 0.0471,
      "step": 301420
    },
    {
      "epoch": 0.49331317138312286,
      "grad_norm": 0.3435269594192505,
      "learning_rate": 9.072764771386966e-06,
      "loss": 0.0488,
      "step": 301440
    },
    {
      "epoch": 0.49334590182177623,
      "grad_norm": 3.7769765853881836,
      "learning_rate": 9.072698879173448e-06,
      "loss": 0.0359,
      "step": 301460
    },
    {
      "epoch": 0.49337863226042955,
      "grad_norm": 0.8940232396125793,
      "learning_rate": 9.072632986959932e-06,
      "loss": 0.0455,
      "step": 301480
    },
    {
      "epoch": 0.49341136269908287,
      "grad_norm": 1.074705719947815,
      "learning_rate": 9.072567094746415e-06,
      "loss": 0.0601,
      "step": 301500
    },
    {
      "epoch": 0.49344409313773624,
      "grad_norm": 0.6221285462379456,
      "learning_rate": 9.072501202532897e-06,
      "loss": 0.0386,
      "step": 301520
    },
    {
      "epoch": 0.49347682357638956,
      "grad_norm": 2.467154026031494,
      "learning_rate": 9.07243531031938e-06,
      "loss": 0.0351,
      "step": 301540
    },
    {
      "epoch": 0.49350955401504293,
      "grad_norm": 1.233951449394226,
      "learning_rate": 9.072369418105863e-06,
      "loss": 0.0393,
      "step": 301560
    },
    {
      "epoch": 0.49354228445369625,
      "grad_norm": 1.4474542140960693,
      "learning_rate": 9.072303525892346e-06,
      "loss": 0.0363,
      "step": 301580
    },
    {
      "epoch": 0.49357501489234956,
      "grad_norm": 0.7394615411758423,
      "learning_rate": 9.072237633678828e-06,
      "loss": 0.0393,
      "step": 301600
    },
    {
      "epoch": 0.49360774533100293,
      "grad_norm": 1.527805209159851,
      "learning_rate": 9.072171741465312e-06,
      "loss": 0.0385,
      "step": 301620
    },
    {
      "epoch": 0.49364047576965625,
      "grad_norm": 0.8310337066650391,
      "learning_rate": 9.072105849251795e-06,
      "loss": 0.0329,
      "step": 301640
    },
    {
      "epoch": 0.4936732062083096,
      "grad_norm": 0.8095423579216003,
      "learning_rate": 9.072039957038277e-06,
      "loss": 0.0366,
      "step": 301660
    },
    {
      "epoch": 0.49370593664696294,
      "grad_norm": 1.2461191415786743,
      "learning_rate": 9.07197406482476e-06,
      "loss": 0.0442,
      "step": 301680
    },
    {
      "epoch": 0.49373866708561626,
      "grad_norm": 0.862402081489563,
      "learning_rate": 9.071908172611244e-06,
      "loss": 0.0282,
      "step": 301700
    },
    {
      "epoch": 0.49377139752426963,
      "grad_norm": 0.4390559196472168,
      "learning_rate": 9.071842280397726e-06,
      "loss": 0.0315,
      "step": 301720
    },
    {
      "epoch": 0.49380412796292295,
      "grad_norm": 0.790119469165802,
      "learning_rate": 9.07177638818421e-06,
      "loss": 0.0523,
      "step": 301740
    },
    {
      "epoch": 0.4938368584015763,
      "grad_norm": 0.8550615310668945,
      "learning_rate": 9.071710495970692e-06,
      "loss": 0.0338,
      "step": 301760
    },
    {
      "epoch": 0.49386958884022963,
      "grad_norm": 0.7785792946815491,
      "learning_rate": 9.071644603757175e-06,
      "loss": 0.0371,
      "step": 301780
    },
    {
      "epoch": 0.49390231927888295,
      "grad_norm": 4.15115213394165,
      "learning_rate": 9.071578711543657e-06,
      "loss": 0.0447,
      "step": 301800
    },
    {
      "epoch": 0.4939350497175363,
      "grad_norm": 1.7382293939590454,
      "learning_rate": 9.07151281933014e-06,
      "loss": 0.0373,
      "step": 301820
    },
    {
      "epoch": 0.49396778015618964,
      "grad_norm": 0.5566248297691345,
      "learning_rate": 9.071446927116624e-06,
      "loss": 0.0468,
      "step": 301840
    },
    {
      "epoch": 0.494000510594843,
      "grad_norm": 2.6804473400115967,
      "learning_rate": 9.071381034903106e-06,
      "loss": 0.051,
      "step": 301860
    },
    {
      "epoch": 0.49403324103349633,
      "grad_norm": 1.7200638055801392,
      "learning_rate": 9.07131514268959e-06,
      "loss": 0.0529,
      "step": 301880
    },
    {
      "epoch": 0.49406597147214965,
      "grad_norm": 3.161940813064575,
      "learning_rate": 9.071249250476072e-06,
      "loss": 0.0436,
      "step": 301900
    },
    {
      "epoch": 0.494098701910803,
      "grad_norm": 2.2885372638702393,
      "learning_rate": 9.071183358262555e-06,
      "loss": 0.048,
      "step": 301920
    },
    {
      "epoch": 0.49413143234945633,
      "grad_norm": 7.2455034255981445,
      "learning_rate": 9.071117466049037e-06,
      "loss": 0.0355,
      "step": 301940
    },
    {
      "epoch": 0.4941641627881097,
      "grad_norm": 2.779860734939575,
      "learning_rate": 9.07105157383552e-06,
      "loss": 0.0377,
      "step": 301960
    },
    {
      "epoch": 0.494196893226763,
      "grad_norm": 1.172309160232544,
      "learning_rate": 9.070985681622003e-06,
      "loss": 0.0397,
      "step": 301980
    },
    {
      "epoch": 0.49422962366541634,
      "grad_norm": 2.200381278991699,
      "learning_rate": 9.070919789408486e-06,
      "loss": 0.0314,
      "step": 302000
    },
    {
      "epoch": 0.4942623541040697,
      "grad_norm": 1.5369848012924194,
      "learning_rate": 9.070853897194968e-06,
      "loss": 0.0466,
      "step": 302020
    },
    {
      "epoch": 0.49429508454272303,
      "grad_norm": 2.0371909141540527,
      "learning_rate": 9.070788004981452e-06,
      "loss": 0.029,
      "step": 302040
    },
    {
      "epoch": 0.4943278149813764,
      "grad_norm": 1.0083374977111816,
      "learning_rate": 9.070722112767935e-06,
      "loss": 0.0311,
      "step": 302060
    },
    {
      "epoch": 0.4943605454200297,
      "grad_norm": 2.020075798034668,
      "learning_rate": 9.070656220554417e-06,
      "loss": 0.0294,
      "step": 302080
    },
    {
      "epoch": 0.49439327585868303,
      "grad_norm": 1.8855305910110474,
      "learning_rate": 9.0705903283409e-06,
      "loss": 0.0444,
      "step": 302100
    },
    {
      "epoch": 0.4944260062973364,
      "grad_norm": 2.817295789718628,
      "learning_rate": 9.070524436127384e-06,
      "loss": 0.0397,
      "step": 302120
    },
    {
      "epoch": 0.4944587367359897,
      "grad_norm": 1.3910636901855469,
      "learning_rate": 9.070458543913866e-06,
      "loss": 0.0399,
      "step": 302140
    },
    {
      "epoch": 0.4944914671746431,
      "grad_norm": 0.40713560581207275,
      "learning_rate": 9.07039265170035e-06,
      "loss": 0.0256,
      "step": 302160
    },
    {
      "epoch": 0.4945241976132964,
      "grad_norm": 0.7502368092536926,
      "learning_rate": 9.070326759486833e-06,
      "loss": 0.0356,
      "step": 302180
    },
    {
      "epoch": 0.49455692805194973,
      "grad_norm": 1.8700039386749268,
      "learning_rate": 9.070260867273315e-06,
      "loss": 0.0347,
      "step": 302200
    },
    {
      "epoch": 0.4945896584906031,
      "grad_norm": 1.3003960847854614,
      "learning_rate": 9.070194975059799e-06,
      "loss": 0.0414,
      "step": 302220
    },
    {
      "epoch": 0.4946223889292564,
      "grad_norm": 1.5430108308792114,
      "learning_rate": 9.07012908284628e-06,
      "loss": 0.0373,
      "step": 302240
    },
    {
      "epoch": 0.4946551193679098,
      "grad_norm": 1.3562393188476562,
      "learning_rate": 9.070063190632764e-06,
      "loss": 0.041,
      "step": 302260
    },
    {
      "epoch": 0.4946878498065631,
      "grad_norm": 1.2479974031448364,
      "learning_rate": 9.069997298419246e-06,
      "loss": 0.0364,
      "step": 302280
    },
    {
      "epoch": 0.4947205802452164,
      "grad_norm": 0.5628613829612732,
      "learning_rate": 9.06993140620573e-06,
      "loss": 0.029,
      "step": 302300
    },
    {
      "epoch": 0.4947533106838698,
      "grad_norm": 1.336245059967041,
      "learning_rate": 9.069865513992212e-06,
      "loss": 0.0434,
      "step": 302320
    },
    {
      "epoch": 0.4947860411225231,
      "grad_norm": 1.6325396299362183,
      "learning_rate": 9.069799621778695e-06,
      "loss": 0.0467,
      "step": 302340
    },
    {
      "epoch": 0.4948187715611765,
      "grad_norm": 1.4520659446716309,
      "learning_rate": 9.069733729565177e-06,
      "loss": 0.0459,
      "step": 302360
    },
    {
      "epoch": 0.4948515019998298,
      "grad_norm": 1.8733245134353638,
      "learning_rate": 9.06966783735166e-06,
      "loss": 0.0364,
      "step": 302380
    },
    {
      "epoch": 0.4948842324384831,
      "grad_norm": 0.2905120849609375,
      "learning_rate": 9.069601945138143e-06,
      "loss": 0.0411,
      "step": 302400
    },
    {
      "epoch": 0.4949169628771365,
      "grad_norm": 0.9965269565582275,
      "learning_rate": 9.069536052924626e-06,
      "loss": 0.0383,
      "step": 302420
    },
    {
      "epoch": 0.4949496933157898,
      "grad_norm": 0.31328868865966797,
      "learning_rate": 9.06947016071111e-06,
      "loss": 0.0421,
      "step": 302440
    },
    {
      "epoch": 0.4949824237544432,
      "grad_norm": 1.2491143941879272,
      "learning_rate": 9.069404268497592e-06,
      "loss": 0.0314,
      "step": 302460
    },
    {
      "epoch": 0.4950151541930965,
      "grad_norm": 0.8713375329971313,
      "learning_rate": 9.069338376284075e-06,
      "loss": 0.0317,
      "step": 302480
    },
    {
      "epoch": 0.4950478846317498,
      "grad_norm": 1.1728012561798096,
      "learning_rate": 9.069272484070559e-06,
      "loss": 0.037,
      "step": 302500
    },
    {
      "epoch": 0.4950806150704032,
      "grad_norm": 1.0605052709579468,
      "learning_rate": 9.069206591857041e-06,
      "loss": 0.0412,
      "step": 302520
    },
    {
      "epoch": 0.4951133455090565,
      "grad_norm": 1.0641857385635376,
      "learning_rate": 9.069140699643524e-06,
      "loss": 0.0355,
      "step": 302540
    },
    {
      "epoch": 0.4951460759477099,
      "grad_norm": 0.43594270944595337,
      "learning_rate": 9.069074807430008e-06,
      "loss": 0.0509,
      "step": 302560
    },
    {
      "epoch": 0.4951788063863632,
      "grad_norm": 1.7527971267700195,
      "learning_rate": 9.06900891521649e-06,
      "loss": 0.035,
      "step": 302580
    },
    {
      "epoch": 0.4952115368250165,
      "grad_norm": 1.0362253189086914,
      "learning_rate": 9.068943023002974e-06,
      "loss": 0.0396,
      "step": 302600
    },
    {
      "epoch": 0.4952442672636699,
      "grad_norm": 6.69355583190918,
      "learning_rate": 9.068877130789455e-06,
      "loss": 0.0386,
      "step": 302620
    },
    {
      "epoch": 0.4952769977023232,
      "grad_norm": 0.5749902129173279,
      "learning_rate": 9.068811238575939e-06,
      "loss": 0.0428,
      "step": 302640
    },
    {
      "epoch": 0.49530972814097657,
      "grad_norm": 1.6875985860824585,
      "learning_rate": 9.068745346362421e-06,
      "loss": 0.0419,
      "step": 302660
    },
    {
      "epoch": 0.4953424585796299,
      "grad_norm": 1.5578560829162598,
      "learning_rate": 9.068679454148904e-06,
      "loss": 0.0417,
      "step": 302680
    },
    {
      "epoch": 0.4953751890182832,
      "grad_norm": 1.2694674730300903,
      "learning_rate": 9.068613561935386e-06,
      "loss": 0.0358,
      "step": 302700
    },
    {
      "epoch": 0.4954079194569366,
      "grad_norm": 0.7290039658546448,
      "learning_rate": 9.06854766972187e-06,
      "loss": 0.0324,
      "step": 302720
    },
    {
      "epoch": 0.4954406498955899,
      "grad_norm": 2.8940529823303223,
      "learning_rate": 9.068481777508352e-06,
      "loss": 0.041,
      "step": 302740
    },
    {
      "epoch": 0.49547338033424326,
      "grad_norm": 1.5169562101364136,
      "learning_rate": 9.068415885294835e-06,
      "loss": 0.0412,
      "step": 302760
    },
    {
      "epoch": 0.4955061107728966,
      "grad_norm": 2.5570895671844482,
      "learning_rate": 9.068349993081317e-06,
      "loss": 0.0405,
      "step": 302780
    },
    {
      "epoch": 0.4955388412115499,
      "grad_norm": 3.1266143321990967,
      "learning_rate": 9.068284100867801e-06,
      "loss": 0.0247,
      "step": 302800
    },
    {
      "epoch": 0.49557157165020327,
      "grad_norm": 0.3767353892326355,
      "learning_rate": 9.068218208654283e-06,
      "loss": 0.0365,
      "step": 302820
    },
    {
      "epoch": 0.4956043020888566,
      "grad_norm": 0.6528254747390747,
      "learning_rate": 9.068152316440766e-06,
      "loss": 0.0545,
      "step": 302840
    },
    {
      "epoch": 0.49563703252750996,
      "grad_norm": 1.434699535369873,
      "learning_rate": 9.06808642422725e-06,
      "loss": 0.0449,
      "step": 302860
    },
    {
      "epoch": 0.4956697629661633,
      "grad_norm": 2.0773494243621826,
      "learning_rate": 9.068020532013732e-06,
      "loss": 0.0322,
      "step": 302880
    },
    {
      "epoch": 0.4957024934048166,
      "grad_norm": 0.9111396670341492,
      "learning_rate": 9.067954639800215e-06,
      "loss": 0.044,
      "step": 302900
    },
    {
      "epoch": 0.49573522384346996,
      "grad_norm": 0.663515031337738,
      "learning_rate": 9.067888747586699e-06,
      "loss": 0.0451,
      "step": 302920
    },
    {
      "epoch": 0.4957679542821233,
      "grad_norm": 0.9899376034736633,
      "learning_rate": 9.067822855373181e-06,
      "loss": 0.0408,
      "step": 302940
    },
    {
      "epoch": 0.49580068472077665,
      "grad_norm": 3.9594309329986572,
      "learning_rate": 9.067756963159665e-06,
      "loss": 0.0391,
      "step": 302960
    },
    {
      "epoch": 0.49583341515942997,
      "grad_norm": 2.032959222793579,
      "learning_rate": 9.067691070946148e-06,
      "loss": 0.0433,
      "step": 302980
    },
    {
      "epoch": 0.4958661455980833,
      "grad_norm": 2.019716262817383,
      "learning_rate": 9.06762517873263e-06,
      "loss": 0.0378,
      "step": 303000
    },
    {
      "epoch": 0.49589887603673666,
      "grad_norm": 1.131119728088379,
      "learning_rate": 9.067559286519114e-06,
      "loss": 0.0425,
      "step": 303020
    },
    {
      "epoch": 0.49593160647539,
      "grad_norm": 1.287872076034546,
      "learning_rate": 9.067493394305595e-06,
      "loss": 0.0354,
      "step": 303040
    },
    {
      "epoch": 0.49596433691404335,
      "grad_norm": 1.5803608894348145,
      "learning_rate": 9.067427502092079e-06,
      "loss": 0.0494,
      "step": 303060
    },
    {
      "epoch": 0.49599706735269666,
      "grad_norm": 4.1918535232543945,
      "learning_rate": 9.067361609878561e-06,
      "loss": 0.0546,
      "step": 303080
    },
    {
      "epoch": 0.49602979779135,
      "grad_norm": 3.3323590755462646,
      "learning_rate": 9.067295717665045e-06,
      "loss": 0.0466,
      "step": 303100
    },
    {
      "epoch": 0.49606252823000335,
      "grad_norm": 2.249800682067871,
      "learning_rate": 9.067229825451526e-06,
      "loss": 0.0435,
      "step": 303120
    },
    {
      "epoch": 0.49609525866865667,
      "grad_norm": 2.816058874130249,
      "learning_rate": 9.06716393323801e-06,
      "loss": 0.0414,
      "step": 303140
    },
    {
      "epoch": 0.49612798910731004,
      "grad_norm": 1.4208688735961914,
      "learning_rate": 9.067098041024492e-06,
      "loss": 0.0478,
      "step": 303160
    },
    {
      "epoch": 0.49616071954596336,
      "grad_norm": 0.34215137362480164,
      "learning_rate": 9.067032148810976e-06,
      "loss": 0.0355,
      "step": 303180
    },
    {
      "epoch": 0.4961934499846167,
      "grad_norm": 2.6325392723083496,
      "learning_rate": 9.066966256597457e-06,
      "loss": 0.0342,
      "step": 303200
    },
    {
      "epoch": 0.49622618042327005,
      "grad_norm": 0.4746303856372833,
      "learning_rate": 9.066900364383941e-06,
      "loss": 0.0474,
      "step": 303220
    },
    {
      "epoch": 0.49625891086192336,
      "grad_norm": 1.1024465560913086,
      "learning_rate": 9.066834472170425e-06,
      "loss": 0.0423,
      "step": 303240
    },
    {
      "epoch": 0.49629164130057674,
      "grad_norm": 0.7887421250343323,
      "learning_rate": 9.066768579956906e-06,
      "loss": 0.0532,
      "step": 303260
    },
    {
      "epoch": 0.49632437173923005,
      "grad_norm": 2.1785552501678467,
      "learning_rate": 9.06670268774339e-06,
      "loss": 0.0455,
      "step": 303280
    },
    {
      "epoch": 0.49635710217788337,
      "grad_norm": 0.6915152072906494,
      "learning_rate": 9.066636795529874e-06,
      "loss": 0.0436,
      "step": 303300
    },
    {
      "epoch": 0.49638983261653674,
      "grad_norm": 0.5993791818618774,
      "learning_rate": 9.066570903316356e-06,
      "loss": 0.0354,
      "step": 303320
    },
    {
      "epoch": 0.49642256305519006,
      "grad_norm": 0.6506661176681519,
      "learning_rate": 9.066505011102839e-06,
      "loss": 0.0371,
      "step": 303340
    },
    {
      "epoch": 0.49645529349384343,
      "grad_norm": 2.1431872844696045,
      "learning_rate": 9.066439118889323e-06,
      "loss": 0.0365,
      "step": 303360
    },
    {
      "epoch": 0.49648802393249675,
      "grad_norm": 11.836151123046875,
      "learning_rate": 9.066373226675805e-06,
      "loss": 0.0319,
      "step": 303380
    },
    {
      "epoch": 0.49652075437115006,
      "grad_norm": 2.396639823913574,
      "learning_rate": 9.066307334462288e-06,
      "loss": 0.045,
      "step": 303400
    },
    {
      "epoch": 0.49655348480980344,
      "grad_norm": 1.3159502744674683,
      "learning_rate": 9.06624144224877e-06,
      "loss": 0.0477,
      "step": 303420
    },
    {
      "epoch": 0.49658621524845675,
      "grad_norm": 2.177312135696411,
      "learning_rate": 9.066175550035254e-06,
      "loss": 0.037,
      "step": 303440
    },
    {
      "epoch": 0.4966189456871101,
      "grad_norm": 1.7203869819641113,
      "learning_rate": 9.066109657821736e-06,
      "loss": 0.0411,
      "step": 303460
    },
    {
      "epoch": 0.49665167612576344,
      "grad_norm": 1.497103214263916,
      "learning_rate": 9.066043765608219e-06,
      "loss": 0.0371,
      "step": 303480
    },
    {
      "epoch": 0.49668440656441676,
      "grad_norm": 1.0489932298660278,
      "learning_rate": 9.065977873394701e-06,
      "loss": 0.0453,
      "step": 303500
    },
    {
      "epoch": 0.49671713700307013,
      "grad_norm": 0.5466406941413879,
      "learning_rate": 9.065911981181185e-06,
      "loss": 0.0423,
      "step": 303520
    },
    {
      "epoch": 0.49674986744172345,
      "grad_norm": 19.15047264099121,
      "learning_rate": 9.065846088967667e-06,
      "loss": 0.039,
      "step": 303540
    },
    {
      "epoch": 0.4967825978803768,
      "grad_norm": 0.9221139550209045,
      "learning_rate": 9.06578019675415e-06,
      "loss": 0.0302,
      "step": 303560
    },
    {
      "epoch": 0.49681532831903014,
      "grad_norm": 2.23954701423645,
      "learning_rate": 9.065714304540634e-06,
      "loss": 0.0289,
      "step": 303580
    },
    {
      "epoch": 0.49684805875768345,
      "grad_norm": 0.43687471747398376,
      "learning_rate": 9.065648412327116e-06,
      "loss": 0.0515,
      "step": 303600
    },
    {
      "epoch": 0.4968807891963368,
      "grad_norm": 2.4368696212768555,
      "learning_rate": 9.0655825201136e-06,
      "loss": 0.0448,
      "step": 303620
    },
    {
      "epoch": 0.49691351963499014,
      "grad_norm": 5.190976142883301,
      "learning_rate": 9.065516627900081e-06,
      "loss": 0.0354,
      "step": 303640
    },
    {
      "epoch": 0.4969462500736435,
      "grad_norm": 0.11370743066072464,
      "learning_rate": 9.065450735686565e-06,
      "loss": 0.0295,
      "step": 303660
    },
    {
      "epoch": 0.49697898051229683,
      "grad_norm": 1.612597107887268,
      "learning_rate": 9.065384843473047e-06,
      "loss": 0.0357,
      "step": 303680
    },
    {
      "epoch": 0.49701171095095015,
      "grad_norm": 0.5472192764282227,
      "learning_rate": 9.06531895125953e-06,
      "loss": 0.0282,
      "step": 303700
    },
    {
      "epoch": 0.4970444413896035,
      "grad_norm": 1.7739412784576416,
      "learning_rate": 9.065253059046014e-06,
      "loss": 0.0317,
      "step": 303720
    },
    {
      "epoch": 0.49707717182825684,
      "grad_norm": 2.0053751468658447,
      "learning_rate": 9.065187166832496e-06,
      "loss": 0.0579,
      "step": 303740
    },
    {
      "epoch": 0.4971099022669102,
      "grad_norm": 2.8344943523406982,
      "learning_rate": 9.06512127461898e-06,
      "loss": 0.047,
      "step": 303760
    },
    {
      "epoch": 0.4971426327055635,
      "grad_norm": 2.6628198623657227,
      "learning_rate": 9.065055382405463e-06,
      "loss": 0.0406,
      "step": 303780
    },
    {
      "epoch": 0.49717536314421684,
      "grad_norm": 0.2287369668483734,
      "learning_rate": 9.064989490191945e-06,
      "loss": 0.0302,
      "step": 303800
    },
    {
      "epoch": 0.4972080935828702,
      "grad_norm": 1.2100000381469727,
      "learning_rate": 9.064923597978428e-06,
      "loss": 0.0504,
      "step": 303820
    },
    {
      "epoch": 0.49724082402152353,
      "grad_norm": 2.7465152740478516,
      "learning_rate": 9.06485770576491e-06,
      "loss": 0.0492,
      "step": 303840
    },
    {
      "epoch": 0.49727355446017685,
      "grad_norm": 1.6827263832092285,
      "learning_rate": 9.064791813551394e-06,
      "loss": 0.0265,
      "step": 303860
    },
    {
      "epoch": 0.4973062848988302,
      "grad_norm": 2.170804262161255,
      "learning_rate": 9.064725921337876e-06,
      "loss": 0.0349,
      "step": 303880
    },
    {
      "epoch": 0.49733901533748354,
      "grad_norm": 2.600654125213623,
      "learning_rate": 9.06466002912436e-06,
      "loss": 0.0467,
      "step": 303900
    },
    {
      "epoch": 0.4973717457761369,
      "grad_norm": 0.7618139982223511,
      "learning_rate": 9.064594136910841e-06,
      "loss": 0.0401,
      "step": 303920
    },
    {
      "epoch": 0.4974044762147902,
      "grad_norm": 2.45904541015625,
      "learning_rate": 9.064528244697325e-06,
      "loss": 0.045,
      "step": 303940
    },
    {
      "epoch": 0.49743720665344354,
      "grad_norm": 0.2899603545665741,
      "learning_rate": 9.064462352483808e-06,
      "loss": 0.0474,
      "step": 303960
    },
    {
      "epoch": 0.4974699370920969,
      "grad_norm": 3.081616163253784,
      "learning_rate": 9.06439646027029e-06,
      "loss": 0.046,
      "step": 303980
    },
    {
      "epoch": 0.49750266753075023,
      "grad_norm": 1.1997771263122559,
      "learning_rate": 9.064330568056774e-06,
      "loss": 0.0386,
      "step": 304000
    },
    {
      "epoch": 0.4975353979694036,
      "grad_norm": 1.1939173936843872,
      "learning_rate": 9.064264675843256e-06,
      "loss": 0.0519,
      "step": 304020
    },
    {
      "epoch": 0.4975681284080569,
      "grad_norm": 1.5562440156936646,
      "learning_rate": 9.06419878362974e-06,
      "loss": 0.0359,
      "step": 304040
    },
    {
      "epoch": 0.49760085884671024,
      "grad_norm": 1.2003318071365356,
      "learning_rate": 9.064132891416221e-06,
      "loss": 0.0373,
      "step": 304060
    },
    {
      "epoch": 0.4976335892853636,
      "grad_norm": 4.222716331481934,
      "learning_rate": 9.064066999202705e-06,
      "loss": 0.0356,
      "step": 304080
    },
    {
      "epoch": 0.4976663197240169,
      "grad_norm": 0.36132195591926575,
      "learning_rate": 9.064001106989188e-06,
      "loss": 0.0448,
      "step": 304100
    },
    {
      "epoch": 0.4976990501626703,
      "grad_norm": 1.2045725584030151,
      "learning_rate": 9.06393521477567e-06,
      "loss": 0.0417,
      "step": 304120
    },
    {
      "epoch": 0.4977317806013236,
      "grad_norm": 0.3697420358657837,
      "learning_rate": 9.063869322562154e-06,
      "loss": 0.0469,
      "step": 304140
    },
    {
      "epoch": 0.49776451103997693,
      "grad_norm": 2.042189359664917,
      "learning_rate": 9.063803430348637e-06,
      "loss": 0.0345,
      "step": 304160
    },
    {
      "epoch": 0.4977972414786303,
      "grad_norm": 1.076731562614441,
      "learning_rate": 9.06373753813512e-06,
      "loss": 0.0486,
      "step": 304180
    },
    {
      "epoch": 0.4978299719172836,
      "grad_norm": 0.8799540996551514,
      "learning_rate": 9.063671645921603e-06,
      "loss": 0.0489,
      "step": 304200
    },
    {
      "epoch": 0.497862702355937,
      "grad_norm": 1.212526798248291,
      "learning_rate": 9.063605753708085e-06,
      "loss": 0.0384,
      "step": 304220
    },
    {
      "epoch": 0.4978954327945903,
      "grad_norm": 0.6588561534881592,
      "learning_rate": 9.063539861494568e-06,
      "loss": 0.0394,
      "step": 304240
    },
    {
      "epoch": 0.4979281632332436,
      "grad_norm": 0.48483842611312866,
      "learning_rate": 9.06347396928105e-06,
      "loss": 0.0305,
      "step": 304260
    },
    {
      "epoch": 0.497960893671897,
      "grad_norm": 1.6090116500854492,
      "learning_rate": 9.063408077067534e-06,
      "loss": 0.036,
      "step": 304280
    },
    {
      "epoch": 0.4979936241105503,
      "grad_norm": 1.8706231117248535,
      "learning_rate": 9.063342184854017e-06,
      "loss": 0.0438,
      "step": 304300
    },
    {
      "epoch": 0.4980263545492037,
      "grad_norm": 0.20439249277114868,
      "learning_rate": 9.0632762926405e-06,
      "loss": 0.0333,
      "step": 304320
    },
    {
      "epoch": 0.498059084987857,
      "grad_norm": 0.7019810676574707,
      "learning_rate": 9.063210400426983e-06,
      "loss": 0.0373,
      "step": 304340
    },
    {
      "epoch": 0.4980918154265103,
      "grad_norm": 2.1604092121124268,
      "learning_rate": 9.063144508213465e-06,
      "loss": 0.0537,
      "step": 304360
    },
    {
      "epoch": 0.4981245458651637,
      "grad_norm": 3.0333735942840576,
      "learning_rate": 9.063078615999948e-06,
      "loss": 0.0329,
      "step": 304380
    },
    {
      "epoch": 0.498157276303817,
      "grad_norm": 1.350873351097107,
      "learning_rate": 9.06301272378643e-06,
      "loss": 0.0401,
      "step": 304400
    },
    {
      "epoch": 0.4981900067424704,
      "grad_norm": 1.3190672397613525,
      "learning_rate": 9.062946831572914e-06,
      "loss": 0.032,
      "step": 304420
    },
    {
      "epoch": 0.4982227371811237,
      "grad_norm": 1.6958476305007935,
      "learning_rate": 9.062880939359396e-06,
      "loss": 0.0319,
      "step": 304440
    },
    {
      "epoch": 0.498255467619777,
      "grad_norm": 2.1852829456329346,
      "learning_rate": 9.06281504714588e-06,
      "loss": 0.039,
      "step": 304460
    },
    {
      "epoch": 0.4982881980584304,
      "grad_norm": 0.5516746044158936,
      "learning_rate": 9.062749154932363e-06,
      "loss": 0.0501,
      "step": 304480
    },
    {
      "epoch": 0.4983209284970837,
      "grad_norm": 1.5812525749206543,
      "learning_rate": 9.062683262718845e-06,
      "loss": 0.0479,
      "step": 304500
    },
    {
      "epoch": 0.4983536589357371,
      "grad_norm": 0.38175585865974426,
      "learning_rate": 9.062617370505328e-06,
      "loss": 0.0365,
      "step": 304520
    },
    {
      "epoch": 0.4983863893743904,
      "grad_norm": 1.1094775199890137,
      "learning_rate": 9.062551478291812e-06,
      "loss": 0.0547,
      "step": 304540
    },
    {
      "epoch": 0.4984191198130437,
      "grad_norm": 3.506028652191162,
      "learning_rate": 9.062485586078294e-06,
      "loss": 0.0278,
      "step": 304560
    },
    {
      "epoch": 0.4984518502516971,
      "grad_norm": 0.33194437623023987,
      "learning_rate": 9.062419693864777e-06,
      "loss": 0.0347,
      "step": 304580
    },
    {
      "epoch": 0.4984845806903504,
      "grad_norm": 1.6131744384765625,
      "learning_rate": 9.06235380165126e-06,
      "loss": 0.0521,
      "step": 304600
    },
    {
      "epoch": 0.49851731112900377,
      "grad_norm": 0.6656123995780945,
      "learning_rate": 9.062287909437743e-06,
      "loss": 0.0321,
      "step": 304620
    },
    {
      "epoch": 0.4985500415676571,
      "grad_norm": 0.44924771785736084,
      "learning_rate": 9.062222017224227e-06,
      "loss": 0.0337,
      "step": 304640
    },
    {
      "epoch": 0.4985827720063104,
      "grad_norm": 1.9015510082244873,
      "learning_rate": 9.062156125010708e-06,
      "loss": 0.0325,
      "step": 304660
    },
    {
      "epoch": 0.4986155024449638,
      "grad_norm": 1.432188630104065,
      "learning_rate": 9.062090232797192e-06,
      "loss": 0.034,
      "step": 304680
    },
    {
      "epoch": 0.4986482328836171,
      "grad_norm": 2.666188955307007,
      "learning_rate": 9.062024340583674e-06,
      "loss": 0.0398,
      "step": 304700
    },
    {
      "epoch": 0.49868096332227047,
      "grad_norm": 1.5985488891601562,
      "learning_rate": 9.061958448370157e-06,
      "loss": 0.0355,
      "step": 304720
    },
    {
      "epoch": 0.4987136937609238,
      "grad_norm": 0.5670514106750488,
      "learning_rate": 9.06189255615664e-06,
      "loss": 0.0347,
      "step": 304740
    },
    {
      "epoch": 0.4987464241995771,
      "grad_norm": 0.31162554025650024,
      "learning_rate": 9.061826663943123e-06,
      "loss": 0.0439,
      "step": 304760
    },
    {
      "epoch": 0.49877915463823047,
      "grad_norm": 0.3464927077293396,
      "learning_rate": 9.061760771729605e-06,
      "loss": 0.0332,
      "step": 304780
    },
    {
      "epoch": 0.4988118850768838,
      "grad_norm": 2.482278823852539,
      "learning_rate": 9.061694879516088e-06,
      "loss": 0.0414,
      "step": 304800
    },
    {
      "epoch": 0.49884461551553716,
      "grad_norm": 0.2867002487182617,
      "learning_rate": 9.06162898730257e-06,
      "loss": 0.0366,
      "step": 304820
    },
    {
      "epoch": 0.4988773459541905,
      "grad_norm": 0.2693931758403778,
      "learning_rate": 9.061563095089054e-06,
      "loss": 0.0289,
      "step": 304840
    },
    {
      "epoch": 0.4989100763928438,
      "grad_norm": 0.48754820227622986,
      "learning_rate": 9.061497202875536e-06,
      "loss": 0.0359,
      "step": 304860
    },
    {
      "epoch": 0.49894280683149717,
      "grad_norm": 2.3182120323181152,
      "learning_rate": 9.06143131066202e-06,
      "loss": 0.0309,
      "step": 304880
    },
    {
      "epoch": 0.4989755372701505,
      "grad_norm": 4.3689751625061035,
      "learning_rate": 9.061365418448503e-06,
      "loss": 0.0515,
      "step": 304900
    },
    {
      "epoch": 0.49900826770880385,
      "grad_norm": 1.5310348272323608,
      "learning_rate": 9.061299526234985e-06,
      "loss": 0.0395,
      "step": 304920
    },
    {
      "epoch": 0.49904099814745717,
      "grad_norm": 2.5888376235961914,
      "learning_rate": 9.061233634021468e-06,
      "loss": 0.045,
      "step": 304940
    },
    {
      "epoch": 0.4990737285861105,
      "grad_norm": 5.076847076416016,
      "learning_rate": 9.061167741807952e-06,
      "loss": 0.0518,
      "step": 304960
    },
    {
      "epoch": 0.49910645902476386,
      "grad_norm": 1.596428632736206,
      "learning_rate": 9.061101849594434e-06,
      "loss": 0.0437,
      "step": 304980
    },
    {
      "epoch": 0.4991391894634172,
      "grad_norm": 0.1674574464559555,
      "learning_rate": 9.061035957380918e-06,
      "loss": 0.031,
      "step": 305000
    },
    {
      "epoch": 0.49917191990207055,
      "grad_norm": 0.8393369317054749,
      "learning_rate": 9.060970065167401e-06,
      "loss": 0.0499,
      "step": 305020
    },
    {
      "epoch": 0.49920465034072387,
      "grad_norm": 0.8695768117904663,
      "learning_rate": 9.060904172953883e-06,
      "loss": 0.0543,
      "step": 305040
    },
    {
      "epoch": 0.4992373807793772,
      "grad_norm": 1.6712549924850464,
      "learning_rate": 9.060838280740367e-06,
      "loss": 0.0448,
      "step": 305060
    },
    {
      "epoch": 0.49927011121803055,
      "grad_norm": 1.46030592918396,
      "learning_rate": 9.060772388526848e-06,
      "loss": 0.0376,
      "step": 305080
    },
    {
      "epoch": 0.49930284165668387,
      "grad_norm": 1.273976445198059,
      "learning_rate": 9.060706496313332e-06,
      "loss": 0.0275,
      "step": 305100
    },
    {
      "epoch": 0.49933557209533724,
      "grad_norm": 0.40095990896224976,
      "learning_rate": 9.060640604099814e-06,
      "loss": 0.0408,
      "step": 305120
    },
    {
      "epoch": 0.49936830253399056,
      "grad_norm": 1.127404808998108,
      "learning_rate": 9.060574711886298e-06,
      "loss": 0.0447,
      "step": 305140
    },
    {
      "epoch": 0.4994010329726439,
      "grad_norm": 2.1865341663360596,
      "learning_rate": 9.06050881967278e-06,
      "loss": 0.0389,
      "step": 305160
    },
    {
      "epoch": 0.49943376341129725,
      "grad_norm": 0.6411809325218201,
      "learning_rate": 9.060442927459263e-06,
      "loss": 0.0372,
      "step": 305180
    },
    {
      "epoch": 0.49946649384995057,
      "grad_norm": 1.371932864189148,
      "learning_rate": 9.060377035245745e-06,
      "loss": 0.05,
      "step": 305200
    },
    {
      "epoch": 0.49949922428860394,
      "grad_norm": 0.9825401306152344,
      "learning_rate": 9.060311143032229e-06,
      "loss": 0.0367,
      "step": 305220
    },
    {
      "epoch": 0.49953195472725725,
      "grad_norm": 3.515704870223999,
      "learning_rate": 9.06024525081871e-06,
      "loss": 0.0447,
      "step": 305240
    },
    {
      "epoch": 0.49956468516591057,
      "grad_norm": 1.125933051109314,
      "learning_rate": 9.060179358605194e-06,
      "loss": 0.0445,
      "step": 305260
    },
    {
      "epoch": 0.49959741560456394,
      "grad_norm": 1.3613883256912231,
      "learning_rate": 9.060113466391678e-06,
      "loss": 0.0424,
      "step": 305280
    },
    {
      "epoch": 0.49963014604321726,
      "grad_norm": 2.227626323699951,
      "learning_rate": 9.06004757417816e-06,
      "loss": 0.0424,
      "step": 305300
    },
    {
      "epoch": 0.49966287648187063,
      "grad_norm": 1.3971184492111206,
      "learning_rate": 9.059981681964643e-06,
      "loss": 0.0415,
      "step": 305320
    },
    {
      "epoch": 0.49969560692052395,
      "grad_norm": 1.5703731775283813,
      "learning_rate": 9.059915789751127e-06,
      "loss": 0.0471,
      "step": 305340
    },
    {
      "epoch": 0.49972833735917727,
      "grad_norm": 0.4580319821834564,
      "learning_rate": 9.059849897537609e-06,
      "loss": 0.0331,
      "step": 305360
    },
    {
      "epoch": 0.49976106779783064,
      "grad_norm": 1.075725793838501,
      "learning_rate": 9.059784005324092e-06,
      "loss": 0.049,
      "step": 305380
    },
    {
      "epoch": 0.49979379823648395,
      "grad_norm": 1.1716195344924927,
      "learning_rate": 9.059718113110576e-06,
      "loss": 0.0508,
      "step": 305400
    },
    {
      "epoch": 0.4998265286751373,
      "grad_norm": 1.03000009059906,
      "learning_rate": 9.059652220897058e-06,
      "loss": 0.041,
      "step": 305420
    },
    {
      "epoch": 0.49985925911379064,
      "grad_norm": 0.9697290062904358,
      "learning_rate": 9.059586328683541e-06,
      "loss": 0.0432,
      "step": 305440
    },
    {
      "epoch": 0.49989198955244396,
      "grad_norm": 0.998406708240509,
      "learning_rate": 9.059520436470023e-06,
      "loss": 0.0426,
      "step": 305460
    },
    {
      "epoch": 0.49992471999109733,
      "grad_norm": 0.6298475861549377,
      "learning_rate": 9.059454544256507e-06,
      "loss": 0.0371,
      "step": 305480
    },
    {
      "epoch": 0.49995745042975065,
      "grad_norm": 1.0059454441070557,
      "learning_rate": 9.059388652042989e-06,
      "loss": 0.0299,
      "step": 305500
    },
    {
      "epoch": 0.499990180868404,
      "grad_norm": 2.867908239364624,
      "learning_rate": 9.059322759829472e-06,
      "loss": 0.0506,
      "step": 305520
    },
    {
      "epoch": 0.5000229113070573,
      "grad_norm": 3.73000168800354,
      "learning_rate": 9.059256867615954e-06,
      "loss": 0.0448,
      "step": 305540
    },
    {
      "epoch": 0.5000556417457107,
      "grad_norm": 2.219144821166992,
      "learning_rate": 9.059190975402438e-06,
      "loss": 0.0352,
      "step": 305560
    },
    {
      "epoch": 0.500088372184364,
      "grad_norm": 0.9350537061691284,
      "learning_rate": 9.05912508318892e-06,
      "loss": 0.0528,
      "step": 305580
    },
    {
      "epoch": 0.5001211026230173,
      "grad_norm": 3.5491111278533936,
      "learning_rate": 9.059059190975403e-06,
      "loss": 0.0497,
      "step": 305600
    },
    {
      "epoch": 0.5001538330616707,
      "grad_norm": 1.4438562393188477,
      "learning_rate": 9.058993298761885e-06,
      "loss": 0.032,
      "step": 305620
    },
    {
      "epoch": 0.500186563500324,
      "grad_norm": 0.3846772611141205,
      "learning_rate": 9.058927406548369e-06,
      "loss": 0.0388,
      "step": 305640
    },
    {
      "epoch": 0.5002192939389773,
      "grad_norm": 1.1532946825027466,
      "learning_rate": 9.05886151433485e-06,
      "loss": 0.0455,
      "step": 305660
    },
    {
      "epoch": 0.5002520243776307,
      "grad_norm": 3.2483866214752197,
      "learning_rate": 9.058795622121334e-06,
      "loss": 0.0453,
      "step": 305680
    },
    {
      "epoch": 0.5002847548162841,
      "grad_norm": 1.5422097444534302,
      "learning_rate": 9.058729729907818e-06,
      "loss": 0.0383,
      "step": 305700
    },
    {
      "epoch": 0.5003174852549374,
      "grad_norm": 1.4513952732086182,
      "learning_rate": 9.0586638376943e-06,
      "loss": 0.0427,
      "step": 305720
    },
    {
      "epoch": 0.5003502156935907,
      "grad_norm": 0.22289876639842987,
      "learning_rate": 9.058597945480783e-06,
      "loss": 0.0461,
      "step": 305740
    },
    {
      "epoch": 0.5003829461322441,
      "grad_norm": 0.2610519230365753,
      "learning_rate": 9.058532053267267e-06,
      "loss": 0.0257,
      "step": 305760
    },
    {
      "epoch": 0.5004156765708974,
      "grad_norm": 0.5776357650756836,
      "learning_rate": 9.058466161053749e-06,
      "loss": 0.0452,
      "step": 305780
    },
    {
      "epoch": 0.5004484070095507,
      "grad_norm": 2.285245656967163,
      "learning_rate": 9.058400268840232e-06,
      "loss": 0.0423,
      "step": 305800
    },
    {
      "epoch": 0.5004811374482041,
      "grad_norm": 1.0016720294952393,
      "learning_rate": 9.058334376626716e-06,
      "loss": 0.0307,
      "step": 305820
    },
    {
      "epoch": 0.5005138678868574,
      "grad_norm": 0.6358940601348877,
      "learning_rate": 9.058268484413198e-06,
      "loss": 0.0454,
      "step": 305840
    },
    {
      "epoch": 0.5005465983255107,
      "grad_norm": 0.9380146265029907,
      "learning_rate": 9.058202592199681e-06,
      "loss": 0.0306,
      "step": 305860
    },
    {
      "epoch": 0.5005793287641641,
      "grad_norm": 1.7642933130264282,
      "learning_rate": 9.058136699986163e-06,
      "loss": 0.0411,
      "step": 305880
    },
    {
      "epoch": 0.5006120592028175,
      "grad_norm": 2.242238759994507,
      "learning_rate": 9.058070807772647e-06,
      "loss": 0.0326,
      "step": 305900
    },
    {
      "epoch": 0.5006447896414707,
      "grad_norm": 1.6757299900054932,
      "learning_rate": 9.058004915559129e-06,
      "loss": 0.0492,
      "step": 305920
    },
    {
      "epoch": 0.5006775200801241,
      "grad_norm": 0.9744938611984253,
      "learning_rate": 9.057939023345612e-06,
      "loss": 0.0427,
      "step": 305940
    },
    {
      "epoch": 0.5007102505187775,
      "grad_norm": 2.244035243988037,
      "learning_rate": 9.057873131132094e-06,
      "loss": 0.0419,
      "step": 305960
    },
    {
      "epoch": 0.5007429809574307,
      "grad_norm": 1.3441938161849976,
      "learning_rate": 9.057807238918578e-06,
      "loss": 0.0406,
      "step": 305980
    },
    {
      "epoch": 0.5007757113960841,
      "grad_norm": 3.861611843109131,
      "learning_rate": 9.05774134670506e-06,
      "loss": 0.0481,
      "step": 306000
    },
    {
      "epoch": 0.5008084418347375,
      "grad_norm": 1.5932447910308838,
      "learning_rate": 9.057675454491543e-06,
      "loss": 0.0416,
      "step": 306020
    },
    {
      "epoch": 0.5008411722733908,
      "grad_norm": 0.31376540660858154,
      "learning_rate": 9.057609562278027e-06,
      "loss": 0.0603,
      "step": 306040
    },
    {
      "epoch": 0.5008739027120441,
      "grad_norm": 1.5261778831481934,
      "learning_rate": 9.057543670064509e-06,
      "loss": 0.03,
      "step": 306060
    },
    {
      "epoch": 0.5009066331506975,
      "grad_norm": 0.6564860343933105,
      "learning_rate": 9.057477777850992e-06,
      "loss": 0.0466,
      "step": 306080
    },
    {
      "epoch": 0.5009393635893509,
      "grad_norm": 0.9386239051818848,
      "learning_rate": 9.057411885637474e-06,
      "loss": 0.0231,
      "step": 306100
    },
    {
      "epoch": 0.5009720940280041,
      "grad_norm": 1.7614960670471191,
      "learning_rate": 9.057345993423958e-06,
      "loss": 0.0429,
      "step": 306120
    },
    {
      "epoch": 0.5010048244666575,
      "grad_norm": 5.979888916015625,
      "learning_rate": 9.057280101210441e-06,
      "loss": 0.0384,
      "step": 306140
    },
    {
      "epoch": 0.5010375549053109,
      "grad_norm": 0.830232560634613,
      "learning_rate": 9.057214208996923e-06,
      "loss": 0.0329,
      "step": 306160
    },
    {
      "epoch": 0.5010702853439641,
      "grad_norm": 0.7895821928977966,
      "learning_rate": 9.057148316783407e-06,
      "loss": 0.0308,
      "step": 306180
    },
    {
      "epoch": 0.5011030157826175,
      "grad_norm": 2.585646152496338,
      "learning_rate": 9.05708242456989e-06,
      "loss": 0.0414,
      "step": 306200
    },
    {
      "epoch": 0.5011357462212709,
      "grad_norm": 0.3984840512275696,
      "learning_rate": 9.057016532356372e-06,
      "loss": 0.0353,
      "step": 306220
    },
    {
      "epoch": 0.5011684766599241,
      "grad_norm": 1.169142484664917,
      "learning_rate": 9.056950640142856e-06,
      "loss": 0.0519,
      "step": 306240
    },
    {
      "epoch": 0.5012012070985775,
      "grad_norm": 2.1111180782318115,
      "learning_rate": 9.056884747929338e-06,
      "loss": 0.0382,
      "step": 306260
    },
    {
      "epoch": 0.5012339375372309,
      "grad_norm": 2.3397328853607178,
      "learning_rate": 9.056818855715821e-06,
      "loss": 0.0529,
      "step": 306280
    },
    {
      "epoch": 0.5012666679758843,
      "grad_norm": 0.7976564764976501,
      "learning_rate": 9.056752963502303e-06,
      "loss": 0.0402,
      "step": 306300
    },
    {
      "epoch": 0.5012993984145375,
      "grad_norm": 1.2535853385925293,
      "learning_rate": 9.056687071288787e-06,
      "loss": 0.0399,
      "step": 306320
    },
    {
      "epoch": 0.5013321288531909,
      "grad_norm": 0.4474506676197052,
      "learning_rate": 9.056621179075269e-06,
      "loss": 0.0369,
      "step": 306340
    },
    {
      "epoch": 0.5013648592918443,
      "grad_norm": 2.3285114765167236,
      "learning_rate": 9.056555286861752e-06,
      "loss": 0.0334,
      "step": 306360
    },
    {
      "epoch": 0.5013975897304975,
      "grad_norm": 1.2758159637451172,
      "learning_rate": 9.056489394648234e-06,
      "loss": 0.0399,
      "step": 306380
    },
    {
      "epoch": 0.5014303201691509,
      "grad_norm": 1.572962999343872,
      "learning_rate": 9.056423502434718e-06,
      "loss": 0.0406,
      "step": 306400
    },
    {
      "epoch": 0.5014630506078043,
      "grad_norm": 1.9300038814544678,
      "learning_rate": 9.056357610221201e-06,
      "loss": 0.0494,
      "step": 306420
    },
    {
      "epoch": 0.5014957810464575,
      "grad_norm": 0.4288163483142853,
      "learning_rate": 9.056291718007683e-06,
      "loss": 0.0371,
      "step": 306440
    },
    {
      "epoch": 0.5015285114851109,
      "grad_norm": 0.9944183826446533,
      "learning_rate": 9.056225825794167e-06,
      "loss": 0.0452,
      "step": 306460
    },
    {
      "epoch": 0.5015612419237643,
      "grad_norm": 1.5094056129455566,
      "learning_rate": 9.056159933580649e-06,
      "loss": 0.0384,
      "step": 306480
    },
    {
      "epoch": 0.5015939723624177,
      "grad_norm": 3.3482439517974854,
      "learning_rate": 9.056094041367132e-06,
      "loss": 0.038,
      "step": 306500
    },
    {
      "epoch": 0.5016267028010709,
      "grad_norm": 1.3407360315322876,
      "learning_rate": 9.056028149153614e-06,
      "loss": 0.0371,
      "step": 306520
    },
    {
      "epoch": 0.5016594332397243,
      "grad_norm": 1.478832721710205,
      "learning_rate": 9.055962256940098e-06,
      "loss": 0.0428,
      "step": 306540
    },
    {
      "epoch": 0.5016921636783777,
      "grad_norm": 2.3012535572052,
      "learning_rate": 9.055896364726581e-06,
      "loss": 0.0357,
      "step": 306560
    },
    {
      "epoch": 0.5017248941170309,
      "grad_norm": 1.7102022171020508,
      "learning_rate": 9.055830472513063e-06,
      "loss": 0.0557,
      "step": 306580
    },
    {
      "epoch": 0.5017576245556843,
      "grad_norm": 3.8711295127868652,
      "learning_rate": 9.055764580299547e-06,
      "loss": 0.0358,
      "step": 306600
    },
    {
      "epoch": 0.5017903549943377,
      "grad_norm": 2.859938859939575,
      "learning_rate": 9.05569868808603e-06,
      "loss": 0.0274,
      "step": 306620
    },
    {
      "epoch": 0.5018230854329909,
      "grad_norm": 1.2461358308792114,
      "learning_rate": 9.055632795872512e-06,
      "loss": 0.0417,
      "step": 306640
    },
    {
      "epoch": 0.5018558158716443,
      "grad_norm": 1.0390254259109497,
      "learning_rate": 9.055566903658996e-06,
      "loss": 0.0507,
      "step": 306660
    },
    {
      "epoch": 0.5018885463102977,
      "grad_norm": 1.3181589841842651,
      "learning_rate": 9.055501011445478e-06,
      "loss": 0.0436,
      "step": 306680
    },
    {
      "epoch": 0.501921276748951,
      "grad_norm": 1.7436081171035767,
      "learning_rate": 9.055435119231961e-06,
      "loss": 0.0451,
      "step": 306700
    },
    {
      "epoch": 0.5019540071876043,
      "grad_norm": 1.2383798360824585,
      "learning_rate": 9.055369227018443e-06,
      "loss": 0.0529,
      "step": 306720
    },
    {
      "epoch": 0.5019867376262577,
      "grad_norm": 1.1387075185775757,
      "learning_rate": 9.055303334804927e-06,
      "loss": 0.0373,
      "step": 306740
    },
    {
      "epoch": 0.502019468064911,
      "grad_norm": 0.8818227648735046,
      "learning_rate": 9.05523744259141e-06,
      "loss": 0.0306,
      "step": 306760
    },
    {
      "epoch": 0.5020521985035643,
      "grad_norm": 2.4601194858551025,
      "learning_rate": 9.055171550377892e-06,
      "loss": 0.0369,
      "step": 306780
    },
    {
      "epoch": 0.5020849289422177,
      "grad_norm": 2.3019750118255615,
      "learning_rate": 9.055105658164376e-06,
      "loss": 0.042,
      "step": 306800
    },
    {
      "epoch": 0.502117659380871,
      "grad_norm": 2.6450400352478027,
      "learning_rate": 9.055039765950858e-06,
      "loss": 0.0444,
      "step": 306820
    },
    {
      "epoch": 0.5021503898195243,
      "grad_norm": 1.023985743522644,
      "learning_rate": 9.054973873737341e-06,
      "loss": 0.0377,
      "step": 306840
    },
    {
      "epoch": 0.5021831202581777,
      "grad_norm": 2.247546911239624,
      "learning_rate": 9.054907981523823e-06,
      "loss": 0.0376,
      "step": 306860
    },
    {
      "epoch": 0.5022158506968311,
      "grad_norm": 1.0977857112884521,
      "learning_rate": 9.054842089310307e-06,
      "loss": 0.0501,
      "step": 306880
    },
    {
      "epoch": 0.5022485811354844,
      "grad_norm": 0.8135057687759399,
      "learning_rate": 9.054776197096789e-06,
      "loss": 0.0284,
      "step": 306900
    },
    {
      "epoch": 0.5022813115741377,
      "grad_norm": 0.4054219424724579,
      "learning_rate": 9.054710304883272e-06,
      "loss": 0.0295,
      "step": 306920
    },
    {
      "epoch": 0.5023140420127911,
      "grad_norm": 0.6496223211288452,
      "learning_rate": 9.054644412669756e-06,
      "loss": 0.0362,
      "step": 306940
    },
    {
      "epoch": 0.5023467724514444,
      "grad_norm": 0.8923179507255554,
      "learning_rate": 9.054578520456238e-06,
      "loss": 0.0455,
      "step": 306960
    },
    {
      "epoch": 0.5023795028900977,
      "grad_norm": 2.2870590686798096,
      "learning_rate": 9.054512628242721e-06,
      "loss": 0.0334,
      "step": 306980
    },
    {
      "epoch": 0.5024122333287511,
      "grad_norm": 1.2480450868606567,
      "learning_rate": 9.054446736029205e-06,
      "loss": 0.0289,
      "step": 307000
    },
    {
      "epoch": 0.5024449637674044,
      "grad_norm": 0.7157282829284668,
      "learning_rate": 9.054380843815687e-06,
      "loss": 0.0434,
      "step": 307020
    },
    {
      "epoch": 0.5024776942060577,
      "grad_norm": 2.2038028240203857,
      "learning_rate": 9.05431495160217e-06,
      "loss": 0.0365,
      "step": 307040
    },
    {
      "epoch": 0.5025104246447111,
      "grad_norm": 2.529094934463501,
      "learning_rate": 9.054249059388652e-06,
      "loss": 0.0403,
      "step": 307060
    },
    {
      "epoch": 0.5025431550833644,
      "grad_norm": 1.138156771659851,
      "learning_rate": 9.054183167175136e-06,
      "loss": 0.0534,
      "step": 307080
    },
    {
      "epoch": 0.5025758855220178,
      "grad_norm": 0.5240262150764465,
      "learning_rate": 9.05411727496162e-06,
      "loss": 0.0469,
      "step": 307100
    },
    {
      "epoch": 0.5026086159606711,
      "grad_norm": 0.9919873476028442,
      "learning_rate": 9.054051382748101e-06,
      "loss": 0.0352,
      "step": 307120
    },
    {
      "epoch": 0.5026413463993245,
      "grad_norm": 1.0732977390289307,
      "learning_rate": 9.053985490534585e-06,
      "loss": 0.0461,
      "step": 307140
    },
    {
      "epoch": 0.5026740768379778,
      "grad_norm": 1.096372127532959,
      "learning_rate": 9.053919598321067e-06,
      "loss": 0.0236,
      "step": 307160
    },
    {
      "epoch": 0.5027068072766311,
      "grad_norm": 0.7525196075439453,
      "learning_rate": 9.05385370610755e-06,
      "loss": 0.0413,
      "step": 307180
    },
    {
      "epoch": 0.5027395377152845,
      "grad_norm": 0.5865190029144287,
      "learning_rate": 9.053787813894032e-06,
      "loss": 0.0296,
      "step": 307200
    },
    {
      "epoch": 0.5027722681539378,
      "grad_norm": 0.7114737033843994,
      "learning_rate": 9.053721921680516e-06,
      "loss": 0.0535,
      "step": 307220
    },
    {
      "epoch": 0.5028049985925911,
      "grad_norm": 2.81640625,
      "learning_rate": 9.053656029466998e-06,
      "loss": 0.0494,
      "step": 307240
    },
    {
      "epoch": 0.5028377290312445,
      "grad_norm": 1.8104431629180908,
      "learning_rate": 9.053590137253482e-06,
      "loss": 0.0438,
      "step": 307260
    },
    {
      "epoch": 0.5028704594698978,
      "grad_norm": 4.269319534301758,
      "learning_rate": 9.053524245039963e-06,
      "loss": 0.052,
      "step": 307280
    },
    {
      "epoch": 0.5029031899085512,
      "grad_norm": 0.8301098346710205,
      "learning_rate": 9.053458352826447e-06,
      "loss": 0.0383,
      "step": 307300
    },
    {
      "epoch": 0.5029359203472045,
      "grad_norm": 3.1380133628845215,
      "learning_rate": 9.05339246061293e-06,
      "loss": 0.0422,
      "step": 307320
    },
    {
      "epoch": 0.5029686507858578,
      "grad_norm": 1.5841971635818481,
      "learning_rate": 9.053326568399412e-06,
      "loss": 0.046,
      "step": 307340
    },
    {
      "epoch": 0.5030013812245112,
      "grad_norm": 1.4212242364883423,
      "learning_rate": 9.053260676185896e-06,
      "loss": 0.0636,
      "step": 307360
    },
    {
      "epoch": 0.5030341116631645,
      "grad_norm": 0.8920035362243652,
      "learning_rate": 9.05319478397238e-06,
      "loss": 0.0469,
      "step": 307380
    },
    {
      "epoch": 0.5030668421018178,
      "grad_norm": 1.2548983097076416,
      "learning_rate": 9.053128891758862e-06,
      "loss": 0.0354,
      "step": 307400
    },
    {
      "epoch": 0.5030995725404712,
      "grad_norm": 1.3959509134292603,
      "learning_rate": 9.053062999545345e-06,
      "loss": 0.0555,
      "step": 307420
    },
    {
      "epoch": 0.5031323029791245,
      "grad_norm": 0.8593437075614929,
      "learning_rate": 9.052997107331827e-06,
      "loss": 0.0301,
      "step": 307440
    },
    {
      "epoch": 0.5031650334177779,
      "grad_norm": 0.5216627717018127,
      "learning_rate": 9.05293121511831e-06,
      "loss": 0.0451,
      "step": 307460
    },
    {
      "epoch": 0.5031977638564312,
      "grad_norm": 1.4472192525863647,
      "learning_rate": 9.052865322904794e-06,
      "loss": 0.0387,
      "step": 307480
    },
    {
      "epoch": 0.5032304942950846,
      "grad_norm": 1.841779112815857,
      "learning_rate": 9.052799430691276e-06,
      "loss": 0.0552,
      "step": 307500
    },
    {
      "epoch": 0.5032632247337379,
      "grad_norm": 6.50394868850708,
      "learning_rate": 9.05273353847776e-06,
      "loss": 0.0354,
      "step": 307520
    },
    {
      "epoch": 0.5032959551723912,
      "grad_norm": 2.088090658187866,
      "learning_rate": 9.052667646264242e-06,
      "loss": 0.0423,
      "step": 307540
    },
    {
      "epoch": 0.5033286856110446,
      "grad_norm": 0.8615710735321045,
      "learning_rate": 9.052601754050725e-06,
      "loss": 0.0464,
      "step": 307560
    },
    {
      "epoch": 0.5033614160496979,
      "grad_norm": 1.4536622762680054,
      "learning_rate": 9.052535861837207e-06,
      "loss": 0.0499,
      "step": 307580
    },
    {
      "epoch": 0.5033941464883512,
      "grad_norm": 1.2517857551574707,
      "learning_rate": 9.05246996962369e-06,
      "loss": 0.0518,
      "step": 307600
    },
    {
      "epoch": 0.5034268769270046,
      "grad_norm": 2.3975281715393066,
      "learning_rate": 9.052404077410173e-06,
      "loss": 0.0454,
      "step": 307620
    },
    {
      "epoch": 0.5034596073656579,
      "grad_norm": 0.9446606040000916,
      "learning_rate": 9.052338185196656e-06,
      "loss": 0.0398,
      "step": 307640
    },
    {
      "epoch": 0.5034923378043112,
      "grad_norm": 1.9220929145812988,
      "learning_rate": 9.052272292983138e-06,
      "loss": 0.0356,
      "step": 307660
    },
    {
      "epoch": 0.5035250682429646,
      "grad_norm": 0.3149143159389496,
      "learning_rate": 9.052206400769622e-06,
      "loss": 0.0458,
      "step": 307680
    },
    {
      "epoch": 0.503557798681618,
      "grad_norm": 1.9193594455718994,
      "learning_rate": 9.052140508556103e-06,
      "loss": 0.0404,
      "step": 307700
    },
    {
      "epoch": 0.5035905291202712,
      "grad_norm": 1.6216603517532349,
      "learning_rate": 9.052074616342587e-06,
      "loss": 0.0457,
      "step": 307720
    },
    {
      "epoch": 0.5036232595589246,
      "grad_norm": 1.328980803489685,
      "learning_rate": 9.05200872412907e-06,
      "loss": 0.0523,
      "step": 307740
    },
    {
      "epoch": 0.503655989997578,
      "grad_norm": 1.8132312297821045,
      "learning_rate": 9.051942831915553e-06,
      "loss": 0.0387,
      "step": 307760
    },
    {
      "epoch": 0.5036887204362313,
      "grad_norm": 1.7648802995681763,
      "learning_rate": 9.051876939702036e-06,
      "loss": 0.0432,
      "step": 307780
    },
    {
      "epoch": 0.5037214508748846,
      "grad_norm": 0.6511331796646118,
      "learning_rate": 9.05181104748852e-06,
      "loss": 0.0351,
      "step": 307800
    },
    {
      "epoch": 0.503754181313538,
      "grad_norm": 0.7315685749053955,
      "learning_rate": 9.051745155275002e-06,
      "loss": 0.0383,
      "step": 307820
    },
    {
      "epoch": 0.5037869117521913,
      "grad_norm": 2.0136919021606445,
      "learning_rate": 9.051679263061485e-06,
      "loss": 0.0484,
      "step": 307840
    },
    {
      "epoch": 0.5038196421908446,
      "grad_norm": 0.9861794114112854,
      "learning_rate": 9.051613370847969e-06,
      "loss": 0.0447,
      "step": 307860
    },
    {
      "epoch": 0.503852372629498,
      "grad_norm": 0.9356203079223633,
      "learning_rate": 9.05154747863445e-06,
      "loss": 0.0439,
      "step": 307880
    },
    {
      "epoch": 0.5038851030681514,
      "grad_norm": 1.2901939153671265,
      "learning_rate": 9.051481586420934e-06,
      "loss": 0.0405,
      "step": 307900
    },
    {
      "epoch": 0.5039178335068046,
      "grad_norm": 0.9486628174781799,
      "learning_rate": 9.051415694207416e-06,
      "loss": 0.0471,
      "step": 307920
    },
    {
      "epoch": 0.503950563945458,
      "grad_norm": 1.8657631874084473,
      "learning_rate": 9.0513498019939e-06,
      "loss": 0.0384,
      "step": 307940
    },
    {
      "epoch": 0.5039832943841114,
      "grad_norm": 0.7278915643692017,
      "learning_rate": 9.051283909780382e-06,
      "loss": 0.0371,
      "step": 307960
    },
    {
      "epoch": 0.5040160248227646,
      "grad_norm": 2.4908385276794434,
      "learning_rate": 9.051218017566865e-06,
      "loss": 0.0405,
      "step": 307980
    },
    {
      "epoch": 0.504048755261418,
      "grad_norm": 0.8565801978111267,
      "learning_rate": 9.051152125353347e-06,
      "loss": 0.0442,
      "step": 308000
    },
    {
      "epoch": 0.5040814857000714,
      "grad_norm": 1.7349718809127808,
      "learning_rate": 9.05108623313983e-06,
      "loss": 0.0439,
      "step": 308020
    },
    {
      "epoch": 0.5041142161387246,
      "grad_norm": 0.4593159258365631,
      "learning_rate": 9.051020340926313e-06,
      "loss": 0.0338,
      "step": 308040
    },
    {
      "epoch": 0.504146946577378,
      "grad_norm": 1.1457504034042358,
      "learning_rate": 9.050954448712796e-06,
      "loss": 0.0367,
      "step": 308060
    },
    {
      "epoch": 0.5041796770160314,
      "grad_norm": 0.6225464344024658,
      "learning_rate": 9.050888556499278e-06,
      "loss": 0.0446,
      "step": 308080
    },
    {
      "epoch": 0.5042124074546847,
      "grad_norm": 1.1721056699752808,
      "learning_rate": 9.050822664285762e-06,
      "loss": 0.0405,
      "step": 308100
    },
    {
      "epoch": 0.504245137893338,
      "grad_norm": 2.456807851791382,
      "learning_rate": 9.050756772072245e-06,
      "loss": 0.0537,
      "step": 308120
    },
    {
      "epoch": 0.5042778683319914,
      "grad_norm": 1.4176689386367798,
      "learning_rate": 9.050690879858727e-06,
      "loss": 0.0364,
      "step": 308140
    },
    {
      "epoch": 0.5043105987706448,
      "grad_norm": 1.8490406274795532,
      "learning_rate": 9.05062498764521e-06,
      "loss": 0.0364,
      "step": 308160
    },
    {
      "epoch": 0.504343329209298,
      "grad_norm": 2.7597901821136475,
      "learning_rate": 9.050559095431694e-06,
      "loss": 0.0384,
      "step": 308180
    },
    {
      "epoch": 0.5043760596479514,
      "grad_norm": 1.558849811553955,
      "learning_rate": 9.050493203218176e-06,
      "loss": 0.0389,
      "step": 308200
    },
    {
      "epoch": 0.5044087900866048,
      "grad_norm": 0.849368691444397,
      "learning_rate": 9.05042731100466e-06,
      "loss": 0.0255,
      "step": 308220
    },
    {
      "epoch": 0.504441520525258,
      "grad_norm": 2.4056026935577393,
      "learning_rate": 9.050361418791143e-06,
      "loss": 0.0425,
      "step": 308240
    },
    {
      "epoch": 0.5044742509639114,
      "grad_norm": 0.6953132748603821,
      "learning_rate": 9.050295526577625e-06,
      "loss": 0.0478,
      "step": 308260
    },
    {
      "epoch": 0.5045069814025648,
      "grad_norm": 1.9261256456375122,
      "learning_rate": 9.050229634364109e-06,
      "loss": 0.0339,
      "step": 308280
    },
    {
      "epoch": 0.504539711841218,
      "grad_norm": 0.4874187409877777,
      "learning_rate": 9.05016374215059e-06,
      "loss": 0.0483,
      "step": 308300
    },
    {
      "epoch": 0.5045724422798714,
      "grad_norm": 0.32270514965057373,
      "learning_rate": 9.050097849937074e-06,
      "loss": 0.0433,
      "step": 308320
    },
    {
      "epoch": 0.5046051727185248,
      "grad_norm": 3.6310019493103027,
      "learning_rate": 9.050031957723556e-06,
      "loss": 0.0434,
      "step": 308340
    },
    {
      "epoch": 0.5046379031571782,
      "grad_norm": 1.2274798154830933,
      "learning_rate": 9.04996606551004e-06,
      "loss": 0.0475,
      "step": 308360
    },
    {
      "epoch": 0.5046706335958314,
      "grad_norm": 1.3913315534591675,
      "learning_rate": 9.049900173296522e-06,
      "loss": 0.0404,
      "step": 308380
    },
    {
      "epoch": 0.5047033640344848,
      "grad_norm": 0.3107271194458008,
      "learning_rate": 9.049834281083005e-06,
      "loss": 0.0221,
      "step": 308400
    },
    {
      "epoch": 0.5047360944731382,
      "grad_norm": 1.6642087697982788,
      "learning_rate": 9.049768388869487e-06,
      "loss": 0.0393,
      "step": 308420
    },
    {
      "epoch": 0.5047688249117914,
      "grad_norm": 0.5928738713264465,
      "learning_rate": 9.04970249665597e-06,
      "loss": 0.0327,
      "step": 308440
    },
    {
      "epoch": 0.5048015553504448,
      "grad_norm": 0.44180914759635925,
      "learning_rate": 9.049636604442453e-06,
      "loss": 0.0347,
      "step": 308460
    },
    {
      "epoch": 0.5048342857890982,
      "grad_norm": 1.4861994981765747,
      "learning_rate": 9.049570712228936e-06,
      "loss": 0.0485,
      "step": 308480
    },
    {
      "epoch": 0.5048670162277514,
      "grad_norm": 1.4755624532699585,
      "learning_rate": 9.049504820015418e-06,
      "loss": 0.0341,
      "step": 308500
    },
    {
      "epoch": 0.5048997466664048,
      "grad_norm": 1.9235808849334717,
      "learning_rate": 9.049438927801902e-06,
      "loss": 0.0401,
      "step": 308520
    },
    {
      "epoch": 0.5049324771050582,
      "grad_norm": 0.387617290019989,
      "learning_rate": 9.049373035588385e-06,
      "loss": 0.0422,
      "step": 308540
    },
    {
      "epoch": 0.5049652075437115,
      "grad_norm": 0.5879181623458862,
      "learning_rate": 9.049307143374867e-06,
      "loss": 0.029,
      "step": 308560
    },
    {
      "epoch": 0.5049979379823648,
      "grad_norm": 1.0429338216781616,
      "learning_rate": 9.04924125116135e-06,
      "loss": 0.0502,
      "step": 308580
    },
    {
      "epoch": 0.5050306684210182,
      "grad_norm": 2.2035014629364014,
      "learning_rate": 9.049175358947834e-06,
      "loss": 0.0408,
      "step": 308600
    },
    {
      "epoch": 0.5050633988596716,
      "grad_norm": 0.49925294518470764,
      "learning_rate": 9.049109466734316e-06,
      "loss": 0.0426,
      "step": 308620
    },
    {
      "epoch": 0.5050961292983248,
      "grad_norm": 1.1325101852416992,
      "learning_rate": 9.0490435745208e-06,
      "loss": 0.0335,
      "step": 308640
    },
    {
      "epoch": 0.5051288597369782,
      "grad_norm": 1.419499158859253,
      "learning_rate": 9.048977682307283e-06,
      "loss": 0.0395,
      "step": 308660
    },
    {
      "epoch": 0.5051615901756316,
      "grad_norm": 0.37096384167671204,
      "learning_rate": 9.048911790093765e-06,
      "loss": 0.0379,
      "step": 308680
    },
    {
      "epoch": 0.5051943206142848,
      "grad_norm": 3.48512864112854,
      "learning_rate": 9.048845897880249e-06,
      "loss": 0.0342,
      "step": 308700
    },
    {
      "epoch": 0.5052270510529382,
      "grad_norm": 1.6887235641479492,
      "learning_rate": 9.04878000566673e-06,
      "loss": 0.0522,
      "step": 308720
    },
    {
      "epoch": 0.5052597814915916,
      "grad_norm": 0.9880996346473694,
      "learning_rate": 9.048714113453214e-06,
      "loss": 0.0223,
      "step": 308740
    },
    {
      "epoch": 0.5052925119302449,
      "grad_norm": 1.5385714769363403,
      "learning_rate": 9.048648221239696e-06,
      "loss": 0.0365,
      "step": 308760
    },
    {
      "epoch": 0.5053252423688982,
      "grad_norm": 0.7840672135353088,
      "learning_rate": 9.04858232902618e-06,
      "loss": 0.0401,
      "step": 308780
    },
    {
      "epoch": 0.5053579728075516,
      "grad_norm": 1.9209591150283813,
      "learning_rate": 9.048516436812662e-06,
      "loss": 0.0511,
      "step": 308800
    },
    {
      "epoch": 0.5053907032462049,
      "grad_norm": 0.9223907589912415,
      "learning_rate": 9.048450544599145e-06,
      "loss": 0.0364,
      "step": 308820
    },
    {
      "epoch": 0.5054234336848582,
      "grad_norm": 0.4299704432487488,
      "learning_rate": 9.048384652385627e-06,
      "loss": 0.0415,
      "step": 308840
    },
    {
      "epoch": 0.5054561641235116,
      "grad_norm": 1.4692353010177612,
      "learning_rate": 9.048318760172111e-06,
      "loss": 0.0392,
      "step": 308860
    },
    {
      "epoch": 0.505488894562165,
      "grad_norm": 0.8463982939720154,
      "learning_rate": 9.048252867958594e-06,
      "loss": 0.0423,
      "step": 308880
    },
    {
      "epoch": 0.5055216250008182,
      "grad_norm": 1.1540462970733643,
      "learning_rate": 9.048186975745076e-06,
      "loss": 0.0481,
      "step": 308900
    },
    {
      "epoch": 0.5055543554394716,
      "grad_norm": 3.364946126937866,
      "learning_rate": 9.04812108353156e-06,
      "loss": 0.0393,
      "step": 308920
    },
    {
      "epoch": 0.505587085878125,
      "grad_norm": 0.3759726583957672,
      "learning_rate": 9.048055191318042e-06,
      "loss": 0.0359,
      "step": 308940
    },
    {
      "epoch": 0.5056198163167783,
      "grad_norm": 1.2560945749282837,
      "learning_rate": 9.047989299104525e-06,
      "loss": 0.035,
      "step": 308960
    },
    {
      "epoch": 0.5056525467554316,
      "grad_norm": 1.3353180885314941,
      "learning_rate": 9.047923406891009e-06,
      "loss": 0.0439,
      "step": 308980
    },
    {
      "epoch": 0.505685277194085,
      "grad_norm": 0.5552321076393127,
      "learning_rate": 9.047857514677491e-06,
      "loss": 0.0456,
      "step": 309000
    },
    {
      "epoch": 0.5057180076327383,
      "grad_norm": 1.4007989168167114,
      "learning_rate": 9.047791622463974e-06,
      "loss": 0.0339,
      "step": 309020
    },
    {
      "epoch": 0.5057507380713916,
      "grad_norm": 1.6554579734802246,
      "learning_rate": 9.047725730250458e-06,
      "loss": 0.0346,
      "step": 309040
    },
    {
      "epoch": 0.505783468510045,
      "grad_norm": 0.8658870458602905,
      "learning_rate": 9.04765983803694e-06,
      "loss": 0.0257,
      "step": 309060
    },
    {
      "epoch": 0.5058161989486983,
      "grad_norm": 1.0981669425964355,
      "learning_rate": 9.047593945823424e-06,
      "loss": 0.0445,
      "step": 309080
    },
    {
      "epoch": 0.5058489293873516,
      "grad_norm": 2.52777361869812,
      "learning_rate": 9.047528053609905e-06,
      "loss": 0.0333,
      "step": 309100
    },
    {
      "epoch": 0.505881659826005,
      "grad_norm": 0.09103358536958694,
      "learning_rate": 9.047462161396389e-06,
      "loss": 0.0266,
      "step": 309120
    },
    {
      "epoch": 0.5059143902646583,
      "grad_norm": 4.432956695556641,
      "learning_rate": 9.047396269182871e-06,
      "loss": 0.0579,
      "step": 309140
    },
    {
      "epoch": 0.5059471207033117,
      "grad_norm": 1.7069021463394165,
      "learning_rate": 9.047330376969355e-06,
      "loss": 0.0497,
      "step": 309160
    },
    {
      "epoch": 0.505979851141965,
      "grad_norm": 1.4693100452423096,
      "learning_rate": 9.047264484755836e-06,
      "loss": 0.035,
      "step": 309180
    },
    {
      "epoch": 0.5060125815806183,
      "grad_norm": 1.7726198434829712,
      "learning_rate": 9.04719859254232e-06,
      "loss": 0.0473,
      "step": 309200
    },
    {
      "epoch": 0.5060453120192717,
      "grad_norm": 0.515209436416626,
      "learning_rate": 9.047132700328804e-06,
      "loss": 0.0439,
      "step": 309220
    },
    {
      "epoch": 0.506078042457925,
      "grad_norm": 2.574817419052124,
      "learning_rate": 9.047066808115285e-06,
      "loss": 0.039,
      "step": 309240
    },
    {
      "epoch": 0.5061107728965784,
      "grad_norm": 1.2591474056243896,
      "learning_rate": 9.047000915901769e-06,
      "loss": 0.0458,
      "step": 309260
    },
    {
      "epoch": 0.5061435033352317,
      "grad_norm": 3.524393081665039,
      "learning_rate": 9.046935023688251e-06,
      "loss": 0.0388,
      "step": 309280
    },
    {
      "epoch": 0.506176233773885,
      "grad_norm": 1.7306835651397705,
      "learning_rate": 9.046869131474735e-06,
      "loss": 0.0404,
      "step": 309300
    },
    {
      "epoch": 0.5062089642125384,
      "grad_norm": 0.23502005636692047,
      "learning_rate": 9.046803239261216e-06,
      "loss": 0.0371,
      "step": 309320
    },
    {
      "epoch": 0.5062416946511917,
      "grad_norm": 1.8551983833312988,
      "learning_rate": 9.0467373470477e-06,
      "loss": 0.0433,
      "step": 309340
    },
    {
      "epoch": 0.5062744250898451,
      "grad_norm": 1.6004220247268677,
      "learning_rate": 9.046671454834182e-06,
      "loss": 0.0408,
      "step": 309360
    },
    {
      "epoch": 0.5063071555284984,
      "grad_norm": 1.2101212739944458,
      "learning_rate": 9.046605562620665e-06,
      "loss": 0.0355,
      "step": 309380
    },
    {
      "epoch": 0.5063398859671517,
      "grad_norm": 1.0149009227752686,
      "learning_rate": 9.046539670407149e-06,
      "loss": 0.0315,
      "step": 309400
    },
    {
      "epoch": 0.5063726164058051,
      "grad_norm": 1.100108027458191,
      "learning_rate": 9.046473778193633e-06,
      "loss": 0.0355,
      "step": 309420
    },
    {
      "epoch": 0.5064053468444584,
      "grad_norm": 0.8398935794830322,
      "learning_rate": 9.046407885980115e-06,
      "loss": 0.0403,
      "step": 309440
    },
    {
      "epoch": 0.5064380772831117,
      "grad_norm": 1.5285069942474365,
      "learning_rate": 9.046341993766598e-06,
      "loss": 0.0253,
      "step": 309460
    },
    {
      "epoch": 0.5064708077217651,
      "grad_norm": 0.5300800800323486,
      "learning_rate": 9.04627610155308e-06,
      "loss": 0.039,
      "step": 309480
    },
    {
      "epoch": 0.5065035381604184,
      "grad_norm": 1.0026953220367432,
      "learning_rate": 9.046210209339564e-06,
      "loss": 0.0294,
      "step": 309500
    },
    {
      "epoch": 0.5065362685990717,
      "grad_norm": 1.2206724882125854,
      "learning_rate": 9.046144317126046e-06,
      "loss": 0.0407,
      "step": 309520
    },
    {
      "epoch": 0.5065689990377251,
      "grad_norm": 1.675228238105774,
      "learning_rate": 9.046078424912529e-06,
      "loss": 0.0434,
      "step": 309540
    },
    {
      "epoch": 0.5066017294763785,
      "grad_norm": 6.653317451477051,
      "learning_rate": 9.046012532699013e-06,
      "loss": 0.0418,
      "step": 309560
    },
    {
      "epoch": 0.5066344599150318,
      "grad_norm": 2.044515609741211,
      "learning_rate": 9.045946640485495e-06,
      "loss": 0.0394,
      "step": 309580
    },
    {
      "epoch": 0.5066671903536851,
      "grad_norm": 0.6568916440010071,
      "learning_rate": 9.045880748271978e-06,
      "loss": 0.0402,
      "step": 309600
    },
    {
      "epoch": 0.5066999207923385,
      "grad_norm": 1.8774220943450928,
      "learning_rate": 9.04581485605846e-06,
      "loss": 0.0563,
      "step": 309620
    },
    {
      "epoch": 0.5067326512309918,
      "grad_norm": 3.276890277862549,
      "learning_rate": 9.045748963844944e-06,
      "loss": 0.0537,
      "step": 309640
    },
    {
      "epoch": 0.5067653816696451,
      "grad_norm": 0.9003670811653137,
      "learning_rate": 9.045683071631426e-06,
      "loss": 0.0535,
      "step": 309660
    },
    {
      "epoch": 0.5067981121082985,
      "grad_norm": Infinity,
      "learning_rate": 9.045617179417909e-06,
      "loss": 0.0347,
      "step": 309680
    },
    {
      "epoch": 0.5068308425469518,
      "grad_norm": 0.42466551065444946,
      "learning_rate": 9.045551287204391e-06,
      "loss": 0.0495,
      "step": 309700
    },
    {
      "epoch": 0.5068635729856051,
      "grad_norm": 1.48037850856781,
      "learning_rate": 9.045485394990875e-06,
      "loss": 0.0567,
      "step": 309720
    },
    {
      "epoch": 0.5068963034242585,
      "grad_norm": 1.9138758182525635,
      "learning_rate": 9.045419502777356e-06,
      "loss": 0.0475,
      "step": 309740
    },
    {
      "epoch": 0.5069290338629119,
      "grad_norm": 0.4965818226337433,
      "learning_rate": 9.04535361056384e-06,
      "loss": 0.0485,
      "step": 309760
    },
    {
      "epoch": 0.5069617643015651,
      "grad_norm": 0.7072632312774658,
      "learning_rate": 9.045287718350324e-06,
      "loss": 0.044,
      "step": 309780
    },
    {
      "epoch": 0.5069944947402185,
      "grad_norm": 1.8190817832946777,
      "learning_rate": 9.045221826136806e-06,
      "loss": 0.0393,
      "step": 309800
    },
    {
      "epoch": 0.5070272251788719,
      "grad_norm": 1.200150966644287,
      "learning_rate": 9.045155933923289e-06,
      "loss": 0.0408,
      "step": 309820
    },
    {
      "epoch": 0.5070599556175251,
      "grad_norm": 1.3903484344482422,
      "learning_rate": 9.045090041709773e-06,
      "loss": 0.0379,
      "step": 309840
    },
    {
      "epoch": 0.5070926860561785,
      "grad_norm": 0.822375476360321,
      "learning_rate": 9.045024149496255e-06,
      "loss": 0.043,
      "step": 309860
    },
    {
      "epoch": 0.5071254164948319,
      "grad_norm": 1.6224958896636963,
      "learning_rate": 9.044958257282738e-06,
      "loss": 0.0335,
      "step": 309880
    },
    {
      "epoch": 0.5071581469334852,
      "grad_norm": 2.7148587703704834,
      "learning_rate": 9.04489236506922e-06,
      "loss": 0.0348,
      "step": 309900
    },
    {
      "epoch": 0.5071908773721385,
      "grad_norm": 1.0583348274230957,
      "learning_rate": 9.044826472855704e-06,
      "loss": 0.0341,
      "step": 309920
    },
    {
      "epoch": 0.5072236078107919,
      "grad_norm": 0.8758670687675476,
      "learning_rate": 9.044760580642187e-06,
      "loss": 0.0342,
      "step": 309940
    },
    {
      "epoch": 0.5072563382494453,
      "grad_norm": 1.9521814584732056,
      "learning_rate": 9.04469468842867e-06,
      "loss": 0.0451,
      "step": 309960
    },
    {
      "epoch": 0.5072890686880985,
      "grad_norm": 1.1050233840942383,
      "learning_rate": 9.044628796215153e-06,
      "loss": 0.0513,
      "step": 309980
    },
    {
      "epoch": 0.5073217991267519,
      "grad_norm": 1.5915820598602295,
      "learning_rate": 9.044562904001635e-06,
      "loss": 0.0319,
      "step": 310000
    },
    {
      "epoch": 0.5073545295654053,
      "grad_norm": 0.3922107517719269,
      "learning_rate": 9.044497011788118e-06,
      "loss": 0.043,
      "step": 310020
    },
    {
      "epoch": 0.5073872600040585,
      "grad_norm": 1.1595834493637085,
      "learning_rate": 9.0444311195746e-06,
      "loss": 0.0458,
      "step": 310040
    },
    {
      "epoch": 0.5074199904427119,
      "grad_norm": 1.2879644632339478,
      "learning_rate": 9.044365227361084e-06,
      "loss": 0.0436,
      "step": 310060
    },
    {
      "epoch": 0.5074527208813653,
      "grad_norm": 0.8508642315864563,
      "learning_rate": 9.044299335147566e-06,
      "loss": 0.0472,
      "step": 310080
    },
    {
      "epoch": 0.5074854513200185,
      "grad_norm": 1.715070366859436,
      "learning_rate": 9.04423344293405e-06,
      "loss": 0.04,
      "step": 310100
    },
    {
      "epoch": 0.5075181817586719,
      "grad_norm": 1.0773838758468628,
      "learning_rate": 9.044167550720531e-06,
      "loss": 0.0379,
      "step": 310120
    },
    {
      "epoch": 0.5075509121973253,
      "grad_norm": 0.8939279913902283,
      "learning_rate": 9.044101658507015e-06,
      "loss": 0.0392,
      "step": 310140
    },
    {
      "epoch": 0.5075836426359787,
      "grad_norm": 0.40157631039619446,
      "learning_rate": 9.044035766293498e-06,
      "loss": 0.0303,
      "step": 310160
    },
    {
      "epoch": 0.5076163730746319,
      "grad_norm": 1.2047451734542847,
      "learning_rate": 9.04396987407998e-06,
      "loss": 0.0347,
      "step": 310180
    },
    {
      "epoch": 0.5076491035132853,
      "grad_norm": 3.492278814315796,
      "learning_rate": 9.043903981866464e-06,
      "loss": 0.032,
      "step": 310200
    },
    {
      "epoch": 0.5076818339519387,
      "grad_norm": 1.8280768394470215,
      "learning_rate": 9.043838089652947e-06,
      "loss": 0.0434,
      "step": 310220
    },
    {
      "epoch": 0.5077145643905919,
      "grad_norm": 2.3925113677978516,
      "learning_rate": 9.04377219743943e-06,
      "loss": 0.0305,
      "step": 310240
    },
    {
      "epoch": 0.5077472948292453,
      "grad_norm": 1.0168516635894775,
      "learning_rate": 9.043706305225913e-06,
      "loss": 0.042,
      "step": 310260
    },
    {
      "epoch": 0.5077800252678987,
      "grad_norm": 3.593228816986084,
      "learning_rate": 9.043640413012396e-06,
      "loss": 0.0439,
      "step": 310280
    },
    {
      "epoch": 0.5078127557065519,
      "grad_norm": 12.454222679138184,
      "learning_rate": 9.043574520798878e-06,
      "loss": 0.0326,
      "step": 310300
    },
    {
      "epoch": 0.5078454861452053,
      "grad_norm": 0.707102358341217,
      "learning_rate": 9.043508628585362e-06,
      "loss": 0.035,
      "step": 310320
    },
    {
      "epoch": 0.5078782165838587,
      "grad_norm": 1.525578498840332,
      "learning_rate": 9.043442736371844e-06,
      "loss": 0.0451,
      "step": 310340
    },
    {
      "epoch": 0.507910947022512,
      "grad_norm": 1.1758525371551514,
      "learning_rate": 9.043376844158327e-06,
      "loss": 0.0438,
      "step": 310360
    },
    {
      "epoch": 0.5079436774611653,
      "grad_norm": 2.299485445022583,
      "learning_rate": 9.04331095194481e-06,
      "loss": 0.0331,
      "step": 310380
    },
    {
      "epoch": 0.5079764078998187,
      "grad_norm": 1.2798320055007935,
      "learning_rate": 9.043245059731293e-06,
      "loss": 0.0328,
      "step": 310400
    },
    {
      "epoch": 0.508009138338472,
      "grad_norm": 1.1843279600143433,
      "learning_rate": 9.043179167517775e-06,
      "loss": 0.0306,
      "step": 310420
    },
    {
      "epoch": 0.5080418687771253,
      "grad_norm": 0.8665689826011658,
      "learning_rate": 9.043113275304258e-06,
      "loss": 0.0327,
      "step": 310440
    },
    {
      "epoch": 0.5080745992157787,
      "grad_norm": 0.4447391629219055,
      "learning_rate": 9.04304738309074e-06,
      "loss": 0.0477,
      "step": 310460
    },
    {
      "epoch": 0.5081073296544321,
      "grad_norm": 2.4334022998809814,
      "learning_rate": 9.042981490877224e-06,
      "loss": 0.0362,
      "step": 310480
    },
    {
      "epoch": 0.5081400600930853,
      "grad_norm": 0.4668141007423401,
      "learning_rate": 9.042915598663706e-06,
      "loss": 0.0318,
      "step": 310500
    },
    {
      "epoch": 0.5081727905317387,
      "grad_norm": 4.010202884674072,
      "learning_rate": 9.04284970645019e-06,
      "loss": 0.0431,
      "step": 310520
    },
    {
      "epoch": 0.5082055209703921,
      "grad_norm": 4.300736427307129,
      "learning_rate": 9.042783814236671e-06,
      "loss": 0.0408,
      "step": 310540
    },
    {
      "epoch": 0.5082382514090454,
      "grad_norm": 1.956085205078125,
      "learning_rate": 9.042717922023155e-06,
      "loss": 0.0351,
      "step": 310560
    },
    {
      "epoch": 0.5082709818476987,
      "grad_norm": 2.3106818199157715,
      "learning_rate": 9.042652029809638e-06,
      "loss": 0.0481,
      "step": 310580
    },
    {
      "epoch": 0.5083037122863521,
      "grad_norm": 3.106757879257202,
      "learning_rate": 9.04258613759612e-06,
      "loss": 0.0537,
      "step": 310600
    },
    {
      "epoch": 0.5083364427250054,
      "grad_norm": 0.525102972984314,
      "learning_rate": 9.042520245382604e-06,
      "loss": 0.0407,
      "step": 310620
    },
    {
      "epoch": 0.5083691731636587,
      "grad_norm": 2.132002353668213,
      "learning_rate": 9.042454353169087e-06,
      "loss": 0.0392,
      "step": 310640
    },
    {
      "epoch": 0.5084019036023121,
      "grad_norm": 0.9972260594367981,
      "learning_rate": 9.04238846095557e-06,
      "loss": 0.0447,
      "step": 310660
    },
    {
      "epoch": 0.5084346340409654,
      "grad_norm": 1.7106449604034424,
      "learning_rate": 9.042322568742053e-06,
      "loss": 0.0479,
      "step": 310680
    },
    {
      "epoch": 0.5084673644796187,
      "grad_norm": 0.6014581322669983,
      "learning_rate": 9.042256676528536e-06,
      "loss": 0.0358,
      "step": 310700
    },
    {
      "epoch": 0.5085000949182721,
      "grad_norm": 2.828270196914673,
      "learning_rate": 9.042190784315018e-06,
      "loss": 0.0423,
      "step": 310720
    },
    {
      "epoch": 0.5085328253569255,
      "grad_norm": 0.5510549545288086,
      "learning_rate": 9.042124892101502e-06,
      "loss": 0.0389,
      "step": 310740
    },
    {
      "epoch": 0.5085655557955788,
      "grad_norm": 19.348064422607422,
      "learning_rate": 9.042058999887984e-06,
      "loss": 0.0509,
      "step": 310760
    },
    {
      "epoch": 0.5085982862342321,
      "grad_norm": 1.0987128019332886,
      "learning_rate": 9.041993107674467e-06,
      "loss": 0.0387,
      "step": 310780
    },
    {
      "epoch": 0.5086310166728855,
      "grad_norm": 1.3940091133117676,
      "learning_rate": 9.04192721546095e-06,
      "loss": 0.0447,
      "step": 310800
    },
    {
      "epoch": 0.5086637471115388,
      "grad_norm": 0.8655134439468384,
      "learning_rate": 9.041861323247433e-06,
      "loss": 0.0417,
      "step": 310820
    },
    {
      "epoch": 0.5086964775501921,
      "grad_norm": 1.1019997596740723,
      "learning_rate": 9.041795431033915e-06,
      "loss": 0.0352,
      "step": 310840
    },
    {
      "epoch": 0.5087292079888455,
      "grad_norm": 1.2405744791030884,
      "learning_rate": 9.041729538820398e-06,
      "loss": 0.0437,
      "step": 310860
    },
    {
      "epoch": 0.5087619384274988,
      "grad_norm": 1.7193719148635864,
      "learning_rate": 9.04166364660688e-06,
      "loss": 0.043,
      "step": 310880
    },
    {
      "epoch": 0.5087946688661521,
      "grad_norm": 1.0076277256011963,
      "learning_rate": 9.041597754393364e-06,
      "loss": 0.0352,
      "step": 310900
    },
    {
      "epoch": 0.5088273993048055,
      "grad_norm": 1.0807361602783203,
      "learning_rate": 9.041531862179846e-06,
      "loss": 0.0389,
      "step": 310920
    },
    {
      "epoch": 0.5088601297434588,
      "grad_norm": 1.271188497543335,
      "learning_rate": 9.04146596996633e-06,
      "loss": 0.0427,
      "step": 310940
    },
    {
      "epoch": 0.5088928601821121,
      "grad_norm": 6.866033554077148,
      "learning_rate": 9.041400077752813e-06,
      "loss": 0.0379,
      "step": 310960
    },
    {
      "epoch": 0.5089255906207655,
      "grad_norm": 0.8777685165405273,
      "learning_rate": 9.041334185539295e-06,
      "loss": 0.0465,
      "step": 310980
    },
    {
      "epoch": 0.5089583210594189,
      "grad_norm": 1.1995279788970947,
      "learning_rate": 9.041268293325778e-06,
      "loss": 0.0317,
      "step": 311000
    },
    {
      "epoch": 0.5089910514980722,
      "grad_norm": 1.5216102600097656,
      "learning_rate": 9.041202401112262e-06,
      "loss": 0.0321,
      "step": 311020
    },
    {
      "epoch": 0.5090237819367255,
      "grad_norm": 0.9680169224739075,
      "learning_rate": 9.041136508898744e-06,
      "loss": 0.0248,
      "step": 311040
    },
    {
      "epoch": 0.5090565123753789,
      "grad_norm": 1.5469485521316528,
      "learning_rate": 9.041070616685227e-06,
      "loss": 0.0346,
      "step": 311060
    },
    {
      "epoch": 0.5090892428140322,
      "grad_norm": 1.3871427774429321,
      "learning_rate": 9.041004724471711e-06,
      "loss": 0.0363,
      "step": 311080
    },
    {
      "epoch": 0.5091219732526855,
      "grad_norm": 0.5236133337020874,
      "learning_rate": 9.040938832258193e-06,
      "loss": 0.0334,
      "step": 311100
    },
    {
      "epoch": 0.5091547036913389,
      "grad_norm": 1.737546443939209,
      "learning_rate": 9.040872940044677e-06,
      "loss": 0.0452,
      "step": 311120
    },
    {
      "epoch": 0.5091874341299922,
      "grad_norm": 1.571824550628662,
      "learning_rate": 9.040807047831158e-06,
      "loss": 0.0471,
      "step": 311140
    },
    {
      "epoch": 0.5092201645686455,
      "grad_norm": 1.7702960968017578,
      "learning_rate": 9.040741155617642e-06,
      "loss": 0.0357,
      "step": 311160
    },
    {
      "epoch": 0.5092528950072989,
      "grad_norm": 1.1412030458450317,
      "learning_rate": 9.040675263404124e-06,
      "loss": 0.0475,
      "step": 311180
    },
    {
      "epoch": 0.5092856254459522,
      "grad_norm": 1.8042471408843994,
      "learning_rate": 9.040609371190608e-06,
      "loss": 0.045,
      "step": 311200
    },
    {
      "epoch": 0.5093183558846056,
      "grad_norm": 2.8782594203948975,
      "learning_rate": 9.04054347897709e-06,
      "loss": 0.0449,
      "step": 311220
    },
    {
      "epoch": 0.5093510863232589,
      "grad_norm": 1.5839505195617676,
      "learning_rate": 9.040477586763573e-06,
      "loss": 0.0379,
      "step": 311240
    },
    {
      "epoch": 0.5093838167619122,
      "grad_norm": 2.408616542816162,
      "learning_rate": 9.040411694550055e-06,
      "loss": 0.0396,
      "step": 311260
    },
    {
      "epoch": 0.5094165472005656,
      "grad_norm": 1.002785563468933,
      "learning_rate": 9.040345802336538e-06,
      "loss": 0.0368,
      "step": 311280
    },
    {
      "epoch": 0.5094492776392189,
      "grad_norm": 1.5828619003295898,
      "learning_rate": 9.04027991012302e-06,
      "loss": 0.0302,
      "step": 311300
    },
    {
      "epoch": 0.5094820080778723,
      "grad_norm": 0.7763901948928833,
      "learning_rate": 9.040214017909504e-06,
      "loss": 0.0424,
      "step": 311320
    },
    {
      "epoch": 0.5095147385165256,
      "grad_norm": 2.1057379245758057,
      "learning_rate": 9.040148125695988e-06,
      "loss": 0.0316,
      "step": 311340
    },
    {
      "epoch": 0.5095474689551789,
      "grad_norm": 3.3104352951049805,
      "learning_rate": 9.04008223348247e-06,
      "loss": 0.0516,
      "step": 311360
    },
    {
      "epoch": 0.5095801993938323,
      "grad_norm": 2.17785906791687,
      "learning_rate": 9.040016341268953e-06,
      "loss": 0.0422,
      "step": 311380
    },
    {
      "epoch": 0.5096129298324856,
      "grad_norm": 1.0520962476730347,
      "learning_rate": 9.039950449055435e-06,
      "loss": 0.0447,
      "step": 311400
    },
    {
      "epoch": 0.509645660271139,
      "grad_norm": 2.4264421463012695,
      "learning_rate": 9.039884556841918e-06,
      "loss": 0.0319,
      "step": 311420
    },
    {
      "epoch": 0.5096783907097923,
      "grad_norm": 3.3459227085113525,
      "learning_rate": 9.039818664628402e-06,
      "loss": 0.0432,
      "step": 311440
    },
    {
      "epoch": 0.5097111211484456,
      "grad_norm": 2.047658681869507,
      "learning_rate": 9.039752772414884e-06,
      "loss": 0.0401,
      "step": 311460
    },
    {
      "epoch": 0.509743851587099,
      "grad_norm": 0.5222190618515015,
      "learning_rate": 9.039686880201368e-06,
      "loss": 0.0416,
      "step": 311480
    },
    {
      "epoch": 0.5097765820257523,
      "grad_norm": 1.7057626247406006,
      "learning_rate": 9.039620987987851e-06,
      "loss": 0.0478,
      "step": 311500
    },
    {
      "epoch": 0.5098093124644056,
      "grad_norm": 1.5484906435012817,
      "learning_rate": 9.039555095774333e-06,
      "loss": 0.0437,
      "step": 311520
    },
    {
      "epoch": 0.509842042903059,
      "grad_norm": 1.736423373222351,
      "learning_rate": 9.039489203560817e-06,
      "loss": 0.039,
      "step": 311540
    },
    {
      "epoch": 0.5098747733417123,
      "grad_norm": 8.123231887817383,
      "learning_rate": 9.039423311347299e-06,
      "loss": 0.0502,
      "step": 311560
    },
    {
      "epoch": 0.5099075037803656,
      "grad_norm": 0.9619476199150085,
      "learning_rate": 9.039357419133782e-06,
      "loss": 0.0391,
      "step": 311580
    },
    {
      "epoch": 0.509940234219019,
      "grad_norm": 1.632257342338562,
      "learning_rate": 9.039291526920264e-06,
      "loss": 0.0421,
      "step": 311600
    },
    {
      "epoch": 0.5099729646576724,
      "grad_norm": 0.7861005663871765,
      "learning_rate": 9.039225634706748e-06,
      "loss": 0.0479,
      "step": 311620
    },
    {
      "epoch": 0.5100056950963257,
      "grad_norm": 1.4802993535995483,
      "learning_rate": 9.03915974249323e-06,
      "loss": 0.0379,
      "step": 311640
    },
    {
      "epoch": 0.510038425534979,
      "grad_norm": 0.5880458950996399,
      "learning_rate": 9.039093850279713e-06,
      "loss": 0.0359,
      "step": 311660
    },
    {
      "epoch": 0.5100711559736324,
      "grad_norm": 1.197724461555481,
      "learning_rate": 9.039027958066197e-06,
      "loss": 0.0557,
      "step": 311680
    },
    {
      "epoch": 0.5101038864122857,
      "grad_norm": 0.4011431932449341,
      "learning_rate": 9.038962065852679e-06,
      "loss": 0.0359,
      "step": 311700
    },
    {
      "epoch": 0.510136616850939,
      "grad_norm": 1.9402885437011719,
      "learning_rate": 9.038896173639162e-06,
      "loss": 0.0426,
      "step": 311720
    },
    {
      "epoch": 0.5101693472895924,
      "grad_norm": 2.0674474239349365,
      "learning_rate": 9.038830281425644e-06,
      "loss": 0.0341,
      "step": 311740
    },
    {
      "epoch": 0.5102020777282457,
      "grad_norm": 0.7789903879165649,
      "learning_rate": 9.038764389212128e-06,
      "loss": 0.0346,
      "step": 311760
    },
    {
      "epoch": 0.510234808166899,
      "grad_norm": 1.1667057275772095,
      "learning_rate": 9.03869849699861e-06,
      "loss": 0.0393,
      "step": 311780
    },
    {
      "epoch": 0.5102675386055524,
      "grad_norm": 0.2495802789926529,
      "learning_rate": 9.038632604785093e-06,
      "loss": 0.0337,
      "step": 311800
    },
    {
      "epoch": 0.5103002690442058,
      "grad_norm": 0.6187620162963867,
      "learning_rate": 9.038566712571577e-06,
      "loss": 0.0528,
      "step": 311820
    },
    {
      "epoch": 0.510332999482859,
      "grad_norm": 0.6465054154396057,
      "learning_rate": 9.038500820358059e-06,
      "loss": 0.0321,
      "step": 311840
    },
    {
      "epoch": 0.5103657299215124,
      "grad_norm": 2.135765552520752,
      "learning_rate": 9.038434928144542e-06,
      "loss": 0.0374,
      "step": 311860
    },
    {
      "epoch": 0.5103984603601658,
      "grad_norm": 2.663076400756836,
      "learning_rate": 9.038369035931026e-06,
      "loss": 0.0533,
      "step": 311880
    },
    {
      "epoch": 0.510431190798819,
      "grad_norm": 0.9204961061477661,
      "learning_rate": 9.038303143717508e-06,
      "loss": 0.0369,
      "step": 311900
    },
    {
      "epoch": 0.5104639212374724,
      "grad_norm": 3.3348965644836426,
      "learning_rate": 9.038237251503991e-06,
      "loss": 0.031,
      "step": 311920
    },
    {
      "epoch": 0.5104966516761258,
      "grad_norm": 1.418459415435791,
      "learning_rate": 9.038171359290473e-06,
      "loss": 0.0487,
      "step": 311940
    },
    {
      "epoch": 0.510529382114779,
      "grad_norm": 0.7208772897720337,
      "learning_rate": 9.038105467076957e-06,
      "loss": 0.0413,
      "step": 311960
    },
    {
      "epoch": 0.5105621125534324,
      "grad_norm": 1.3177909851074219,
      "learning_rate": 9.038039574863439e-06,
      "loss": 0.0329,
      "step": 311980
    },
    {
      "epoch": 0.5105948429920858,
      "grad_norm": 1.1619360446929932,
      "learning_rate": 9.037973682649922e-06,
      "loss": 0.0425,
      "step": 312000
    },
    {
      "epoch": 0.5106275734307392,
      "grad_norm": 1.1595121622085571,
      "learning_rate": 9.037907790436404e-06,
      "loss": 0.0319,
      "step": 312020
    },
    {
      "epoch": 0.5106603038693924,
      "grad_norm": 0.5455670952796936,
      "learning_rate": 9.037841898222888e-06,
      "loss": 0.0431,
      "step": 312040
    },
    {
      "epoch": 0.5106930343080458,
      "grad_norm": 5.622011661529541,
      "learning_rate": 9.037776006009371e-06,
      "loss": 0.0416,
      "step": 312060
    },
    {
      "epoch": 0.5107257647466992,
      "grad_norm": 1.4331036806106567,
      "learning_rate": 9.037710113795853e-06,
      "loss": 0.0427,
      "step": 312080
    },
    {
      "epoch": 0.5107584951853524,
      "grad_norm": 1.8332023620605469,
      "learning_rate": 9.037644221582337e-06,
      "loss": 0.047,
      "step": 312100
    },
    {
      "epoch": 0.5107912256240058,
      "grad_norm": 4.44780969619751,
      "learning_rate": 9.037578329368819e-06,
      "loss": 0.049,
      "step": 312120
    },
    {
      "epoch": 0.5108239560626592,
      "grad_norm": 1.4213591814041138,
      "learning_rate": 9.037512437155302e-06,
      "loss": 0.0391,
      "step": 312140
    },
    {
      "epoch": 0.5108566865013124,
      "grad_norm": 1.0666440725326538,
      "learning_rate": 9.037446544941784e-06,
      "loss": 0.0466,
      "step": 312160
    },
    {
      "epoch": 0.5108894169399658,
      "grad_norm": 4.875565528869629,
      "learning_rate": 9.037380652728268e-06,
      "loss": 0.061,
      "step": 312180
    },
    {
      "epoch": 0.5109221473786192,
      "grad_norm": 3.060511589050293,
      "learning_rate": 9.037314760514751e-06,
      "loss": 0.0511,
      "step": 312200
    },
    {
      "epoch": 0.5109548778172726,
      "grad_norm": 3.016636848449707,
      "learning_rate": 9.037248868301233e-06,
      "loss": 0.0533,
      "step": 312220
    },
    {
      "epoch": 0.5109876082559258,
      "grad_norm": 1.1159473657608032,
      "learning_rate": 9.037182976087717e-06,
      "loss": 0.0468,
      "step": 312240
    },
    {
      "epoch": 0.5110203386945792,
      "grad_norm": 0.6496890187263489,
      "learning_rate": 9.0371170838742e-06,
      "loss": 0.0376,
      "step": 312260
    },
    {
      "epoch": 0.5110530691332326,
      "grad_norm": 1.350865125656128,
      "learning_rate": 9.037051191660682e-06,
      "loss": 0.0494,
      "step": 312280
    },
    {
      "epoch": 0.5110857995718858,
      "grad_norm": 1.5994060039520264,
      "learning_rate": 9.036985299447166e-06,
      "loss": 0.0516,
      "step": 312300
    },
    {
      "epoch": 0.5111185300105392,
      "grad_norm": 1.6000070571899414,
      "learning_rate": 9.036919407233648e-06,
      "loss": 0.0552,
      "step": 312320
    },
    {
      "epoch": 0.5111512604491926,
      "grad_norm": 2.925466299057007,
      "learning_rate": 9.036853515020131e-06,
      "loss": 0.0324,
      "step": 312340
    },
    {
      "epoch": 0.5111839908878458,
      "grad_norm": 2.7441415786743164,
      "learning_rate": 9.036787622806613e-06,
      "loss": 0.0293,
      "step": 312360
    },
    {
      "epoch": 0.5112167213264992,
      "grad_norm": 1.5585554838180542,
      "learning_rate": 9.036721730593097e-06,
      "loss": 0.0536,
      "step": 312380
    },
    {
      "epoch": 0.5112494517651526,
      "grad_norm": 0.9216800332069397,
      "learning_rate": 9.03665583837958e-06,
      "loss": 0.0453,
      "step": 312400
    },
    {
      "epoch": 0.511282182203806,
      "grad_norm": 1.252858281135559,
      "learning_rate": 9.036589946166062e-06,
      "loss": 0.0586,
      "step": 312420
    },
    {
      "epoch": 0.5113149126424592,
      "grad_norm": 0.6771236062049866,
      "learning_rate": 9.036524053952546e-06,
      "loss": 0.0431,
      "step": 312440
    },
    {
      "epoch": 0.5113476430811126,
      "grad_norm": 1.809885025024414,
      "learning_rate": 9.036458161739028e-06,
      "loss": 0.038,
      "step": 312460
    },
    {
      "epoch": 0.511380373519766,
      "grad_norm": 0.611870288848877,
      "learning_rate": 9.036392269525511e-06,
      "loss": 0.0238,
      "step": 312480
    },
    {
      "epoch": 0.5114131039584192,
      "grad_norm": 0.9587184190750122,
      "learning_rate": 9.036326377311993e-06,
      "loss": 0.0488,
      "step": 312500
    },
    {
      "epoch": 0.5114458343970726,
      "grad_norm": 1.633570671081543,
      "learning_rate": 9.036260485098477e-06,
      "loss": 0.032,
      "step": 312520
    },
    {
      "epoch": 0.511478564835726,
      "grad_norm": 2.0994837284088135,
      "learning_rate": 9.036194592884959e-06,
      "loss": 0.033,
      "step": 312540
    },
    {
      "epoch": 0.5115112952743792,
      "grad_norm": 0.4790278673171997,
      "learning_rate": 9.036128700671442e-06,
      "loss": 0.0467,
      "step": 312560
    },
    {
      "epoch": 0.5115440257130326,
      "grad_norm": 3.173509359359741,
      "learning_rate": 9.036062808457924e-06,
      "loss": 0.0533,
      "step": 312580
    },
    {
      "epoch": 0.511576756151686,
      "grad_norm": 2.585231065750122,
      "learning_rate": 9.035996916244408e-06,
      "loss": 0.0432,
      "step": 312600
    },
    {
      "epoch": 0.5116094865903393,
      "grad_norm": 0.8110806941986084,
      "learning_rate": 9.035931024030891e-06,
      "loss": 0.0417,
      "step": 312620
    },
    {
      "epoch": 0.5116422170289926,
      "grad_norm": 1.2278472185134888,
      "learning_rate": 9.035865131817373e-06,
      "loss": 0.0366,
      "step": 312640
    },
    {
      "epoch": 0.511674947467646,
      "grad_norm": 1.6634390354156494,
      "learning_rate": 9.035799239603857e-06,
      "loss": 0.0378,
      "step": 312660
    },
    {
      "epoch": 0.5117076779062993,
      "grad_norm": 0.7342395186424255,
      "learning_rate": 9.03573334739034e-06,
      "loss": 0.0547,
      "step": 312680
    },
    {
      "epoch": 0.5117404083449526,
      "grad_norm": 2.8045246601104736,
      "learning_rate": 9.035667455176822e-06,
      "loss": 0.0397,
      "step": 312700
    },
    {
      "epoch": 0.511773138783606,
      "grad_norm": 1.4012956619262695,
      "learning_rate": 9.035601562963306e-06,
      "loss": 0.0397,
      "step": 312720
    },
    {
      "epoch": 0.5118058692222593,
      "grad_norm": 0.624884307384491,
      "learning_rate": 9.03553567074979e-06,
      "loss": 0.0334,
      "step": 312740
    },
    {
      "epoch": 0.5118385996609126,
      "grad_norm": 0.44447311758995056,
      "learning_rate": 9.035469778536271e-06,
      "loss": 0.0414,
      "step": 312760
    },
    {
      "epoch": 0.511871330099566,
      "grad_norm": 0.5236548185348511,
      "learning_rate": 9.035403886322755e-06,
      "loss": 0.0422,
      "step": 312780
    },
    {
      "epoch": 0.5119040605382194,
      "grad_norm": 0.908234715461731,
      "learning_rate": 9.035337994109237e-06,
      "loss": 0.0322,
      "step": 312800
    },
    {
      "epoch": 0.5119367909768727,
      "grad_norm": 0.9829170107841492,
      "learning_rate": 9.03527210189572e-06,
      "loss": 0.0378,
      "step": 312820
    },
    {
      "epoch": 0.511969521415526,
      "grad_norm": 9.168429374694824,
      "learning_rate": 9.035206209682202e-06,
      "loss": 0.045,
      "step": 312840
    },
    {
      "epoch": 0.5120022518541794,
      "grad_norm": 0.5891264081001282,
      "learning_rate": 9.035140317468686e-06,
      "loss": 0.0362,
      "step": 312860
    },
    {
      "epoch": 0.5120349822928327,
      "grad_norm": 1.3139491081237793,
      "learning_rate": 9.035074425255168e-06,
      "loss": 0.0355,
      "step": 312880
    },
    {
      "epoch": 0.512067712731486,
      "grad_norm": 1.0894547700881958,
      "learning_rate": 9.035008533041651e-06,
      "loss": 0.0384,
      "step": 312900
    },
    {
      "epoch": 0.5121004431701394,
      "grad_norm": 0.23982998728752136,
      "learning_rate": 9.034942640828133e-06,
      "loss": 0.0291,
      "step": 312920
    },
    {
      "epoch": 0.5121331736087927,
      "grad_norm": 1.5488849878311157,
      "learning_rate": 9.034876748614617e-06,
      "loss": 0.0404,
      "step": 312940
    },
    {
      "epoch": 0.512165904047446,
      "grad_norm": 0.7072211503982544,
      "learning_rate": 9.034810856401099e-06,
      "loss": 0.0378,
      "step": 312960
    },
    {
      "epoch": 0.5121986344860994,
      "grad_norm": 2.382639169692993,
      "learning_rate": 9.034744964187582e-06,
      "loss": 0.0588,
      "step": 312980
    },
    {
      "epoch": 0.5122313649247527,
      "grad_norm": 1.6546701192855835,
      "learning_rate": 9.034679071974066e-06,
      "loss": 0.0416,
      "step": 313000
    },
    {
      "epoch": 0.5122640953634061,
      "grad_norm": 1.3043056726455688,
      "learning_rate": 9.034613179760548e-06,
      "loss": 0.042,
      "step": 313020
    },
    {
      "epoch": 0.5122968258020594,
      "grad_norm": 0.43505245447158813,
      "learning_rate": 9.034547287547031e-06,
      "loss": 0.0406,
      "step": 313040
    },
    {
      "epoch": 0.5123295562407127,
      "grad_norm": 1.7163923978805542,
      "learning_rate": 9.034481395333515e-06,
      "loss": 0.0275,
      "step": 313060
    },
    {
      "epoch": 0.5123622866793661,
      "grad_norm": 0.779271125793457,
      "learning_rate": 9.034415503119997e-06,
      "loss": 0.035,
      "step": 313080
    },
    {
      "epoch": 0.5123950171180194,
      "grad_norm": 1.2660331726074219,
      "learning_rate": 9.03434961090648e-06,
      "loss": 0.0388,
      "step": 313100
    },
    {
      "epoch": 0.5124277475566728,
      "grad_norm": 0.41795989871025085,
      "learning_rate": 9.034283718692964e-06,
      "loss": 0.0474,
      "step": 313120
    },
    {
      "epoch": 0.5124604779953261,
      "grad_norm": 0.850533127784729,
      "learning_rate": 9.034217826479446e-06,
      "loss": 0.0319,
      "step": 313140
    },
    {
      "epoch": 0.5124932084339794,
      "grad_norm": 3.1790502071380615,
      "learning_rate": 9.03415193426593e-06,
      "loss": 0.0433,
      "step": 313160
    },
    {
      "epoch": 0.5125259388726328,
      "grad_norm": 1.0528886318206787,
      "learning_rate": 9.034086042052411e-06,
      "loss": 0.0439,
      "step": 313180
    },
    {
      "epoch": 0.5125586693112861,
      "grad_norm": 0.6454387307167053,
      "learning_rate": 9.034020149838895e-06,
      "loss": 0.0361,
      "step": 313200
    },
    {
      "epoch": 0.5125913997499395,
      "grad_norm": 0.6451325416564941,
      "learning_rate": 9.033954257625377e-06,
      "loss": 0.0402,
      "step": 313220
    },
    {
      "epoch": 0.5126241301885928,
      "grad_norm": 0.43336814641952515,
      "learning_rate": 9.03388836541186e-06,
      "loss": 0.0258,
      "step": 313240
    },
    {
      "epoch": 0.5126568606272461,
      "grad_norm": 0.7802265882492065,
      "learning_rate": 9.033822473198342e-06,
      "loss": 0.0495,
      "step": 313260
    },
    {
      "epoch": 0.5126895910658995,
      "grad_norm": 0.27932652831077576,
      "learning_rate": 9.033756580984826e-06,
      "loss": 0.0379,
      "step": 313280
    },
    {
      "epoch": 0.5127223215045528,
      "grad_norm": 2.2366814613342285,
      "learning_rate": 9.033690688771308e-06,
      "loss": 0.0486,
      "step": 313300
    },
    {
      "epoch": 0.5127550519432061,
      "grad_norm": 0.3440706431865692,
      "learning_rate": 9.033624796557791e-06,
      "loss": 0.0376,
      "step": 313320
    },
    {
      "epoch": 0.5127877823818595,
      "grad_norm": 0.5006771683692932,
      "learning_rate": 9.033558904344273e-06,
      "loss": 0.0497,
      "step": 313340
    },
    {
      "epoch": 0.5128205128205128,
      "grad_norm": 0.7420441508293152,
      "learning_rate": 9.033493012130757e-06,
      "loss": 0.0307,
      "step": 313360
    },
    {
      "epoch": 0.5128532432591661,
      "grad_norm": 4.385673522949219,
      "learning_rate": 9.033427119917239e-06,
      "loss": 0.0408,
      "step": 313380
    },
    {
      "epoch": 0.5128859736978195,
      "grad_norm": 0.4874773621559143,
      "learning_rate": 9.033361227703722e-06,
      "loss": 0.0411,
      "step": 313400
    },
    {
      "epoch": 0.5129187041364729,
      "grad_norm": 1.772342562675476,
      "learning_rate": 9.033295335490206e-06,
      "loss": 0.0521,
      "step": 313420
    },
    {
      "epoch": 0.5129514345751262,
      "grad_norm": 2.897710084915161,
      "learning_rate": 9.033229443276688e-06,
      "loss": 0.0429,
      "step": 313440
    },
    {
      "epoch": 0.5129841650137795,
      "grad_norm": 2.04805588722229,
      "learning_rate": 9.033163551063171e-06,
      "loss": 0.0392,
      "step": 313460
    },
    {
      "epoch": 0.5130168954524329,
      "grad_norm": 0.871182382106781,
      "learning_rate": 9.033097658849655e-06,
      "loss": 0.0291,
      "step": 313480
    },
    {
      "epoch": 0.5130496258910862,
      "grad_norm": 0.9439361691474915,
      "learning_rate": 9.033031766636137e-06,
      "loss": 0.0333,
      "step": 313500
    },
    {
      "epoch": 0.5130823563297395,
      "grad_norm": 0.5336719155311584,
      "learning_rate": 9.03296587442262e-06,
      "loss": 0.036,
      "step": 313520
    },
    {
      "epoch": 0.5131150867683929,
      "grad_norm": 0.7818902730941772,
      "learning_rate": 9.032899982209104e-06,
      "loss": 0.0327,
      "step": 313540
    },
    {
      "epoch": 0.5131478172070462,
      "grad_norm": 0.8366332054138184,
      "learning_rate": 9.032834089995586e-06,
      "loss": 0.0423,
      "step": 313560
    },
    {
      "epoch": 0.5131805476456995,
      "grad_norm": 0.8359109163284302,
      "learning_rate": 9.03276819778207e-06,
      "loss": 0.0318,
      "step": 313580
    },
    {
      "epoch": 0.5132132780843529,
      "grad_norm": 1.5450001955032349,
      "learning_rate": 9.032702305568552e-06,
      "loss": 0.0341,
      "step": 313600
    },
    {
      "epoch": 0.5132460085230063,
      "grad_norm": 1.3087776899337769,
      "learning_rate": 9.032636413355035e-06,
      "loss": 0.0272,
      "step": 313620
    },
    {
      "epoch": 0.5132787389616595,
      "grad_norm": 1.6952515840530396,
      "learning_rate": 9.032570521141517e-06,
      "loss": 0.0433,
      "step": 313640
    },
    {
      "epoch": 0.5133114694003129,
      "grad_norm": 1.2174322605133057,
      "learning_rate": 9.032504628928e-06,
      "loss": 0.0361,
      "step": 313660
    },
    {
      "epoch": 0.5133441998389663,
      "grad_norm": 0.8474768996238708,
      "learning_rate": 9.032438736714482e-06,
      "loss": 0.0348,
      "step": 313680
    },
    {
      "epoch": 0.5133769302776195,
      "grad_norm": 1.2331938743591309,
      "learning_rate": 9.032372844500966e-06,
      "loss": 0.0402,
      "step": 313700
    },
    {
      "epoch": 0.5134096607162729,
      "grad_norm": 0.6557847857475281,
      "learning_rate": 9.032306952287448e-06,
      "loss": 0.0311,
      "step": 313720
    },
    {
      "epoch": 0.5134423911549263,
      "grad_norm": 0.526513397693634,
      "learning_rate": 9.032241060073932e-06,
      "loss": 0.0443,
      "step": 313740
    },
    {
      "epoch": 0.5134751215935796,
      "grad_norm": 0.8283641338348389,
      "learning_rate": 9.032175167860413e-06,
      "loss": 0.0359,
      "step": 313760
    },
    {
      "epoch": 0.5135078520322329,
      "grad_norm": 1.3231425285339355,
      "learning_rate": 9.032109275646897e-06,
      "loss": 0.0409,
      "step": 313780
    },
    {
      "epoch": 0.5135405824708863,
      "grad_norm": 1.265950083732605,
      "learning_rate": 9.03204338343338e-06,
      "loss": 0.0395,
      "step": 313800
    },
    {
      "epoch": 0.5135733129095396,
      "grad_norm": 1.2938575744628906,
      "learning_rate": 9.031977491219863e-06,
      "loss": 0.0371,
      "step": 313820
    },
    {
      "epoch": 0.5136060433481929,
      "grad_norm": 2.781858205795288,
      "learning_rate": 9.031911599006346e-06,
      "loss": 0.0501,
      "step": 313840
    },
    {
      "epoch": 0.5136387737868463,
      "grad_norm": Infinity,
      "learning_rate": 9.03184570679283e-06,
      "loss": 0.0354,
      "step": 313860
    },
    {
      "epoch": 0.5136715042254997,
      "grad_norm": 2.3105695247650146,
      "learning_rate": 9.031779814579312e-06,
      "loss": 0.0361,
      "step": 313880
    },
    {
      "epoch": 0.5137042346641529,
      "grad_norm": 1.6223341226577759,
      "learning_rate": 9.031713922365795e-06,
      "loss": 0.0312,
      "step": 313900
    },
    {
      "epoch": 0.5137369651028063,
      "grad_norm": 0.6704955697059631,
      "learning_rate": 9.031648030152279e-06,
      "loss": 0.039,
      "step": 313920
    },
    {
      "epoch": 0.5137696955414597,
      "grad_norm": 1.7750519514083862,
      "learning_rate": 9.03158213793876e-06,
      "loss": 0.0385,
      "step": 313940
    },
    {
      "epoch": 0.5138024259801129,
      "grad_norm": 2.492501974105835,
      "learning_rate": 9.031516245725244e-06,
      "loss": 0.058,
      "step": 313960
    },
    {
      "epoch": 0.5138351564187663,
      "grad_norm": 0.8717024922370911,
      "learning_rate": 9.031450353511726e-06,
      "loss": 0.0445,
      "step": 313980
    },
    {
      "epoch": 0.5138678868574197,
      "grad_norm": 1.8146675825119019,
      "learning_rate": 9.03138446129821e-06,
      "loss": 0.0466,
      "step": 314000
    },
    {
      "epoch": 0.513900617296073,
      "grad_norm": 1.3062143325805664,
      "learning_rate": 9.031318569084692e-06,
      "loss": 0.0487,
      "step": 314020
    },
    {
      "epoch": 0.5139333477347263,
      "grad_norm": 0.5329374670982361,
      "learning_rate": 9.031252676871175e-06,
      "loss": 0.0356,
      "step": 314040
    },
    {
      "epoch": 0.5139660781733797,
      "grad_norm": 1.4880720376968384,
      "learning_rate": 9.031186784657657e-06,
      "loss": 0.039,
      "step": 314060
    },
    {
      "epoch": 0.5139988086120331,
      "grad_norm": 0.9119477272033691,
      "learning_rate": 9.03112089244414e-06,
      "loss": 0.0389,
      "step": 314080
    },
    {
      "epoch": 0.5140315390506863,
      "grad_norm": 2.1852481365203857,
      "learning_rate": 9.031055000230623e-06,
      "loss": 0.0334,
      "step": 314100
    },
    {
      "epoch": 0.5140642694893397,
      "grad_norm": 1.9811030626296997,
      "learning_rate": 9.030989108017106e-06,
      "loss": 0.0416,
      "step": 314120
    },
    {
      "epoch": 0.5140969999279931,
      "grad_norm": 1.3005917072296143,
      "learning_rate": 9.03092321580359e-06,
      "loss": 0.0386,
      "step": 314140
    },
    {
      "epoch": 0.5141297303666463,
      "grad_norm": 2.5670692920684814,
      "learning_rate": 9.030857323590072e-06,
      "loss": 0.037,
      "step": 314160
    },
    {
      "epoch": 0.5141624608052997,
      "grad_norm": 1.8250086307525635,
      "learning_rate": 9.030791431376555e-06,
      "loss": 0.0387,
      "step": 314180
    },
    {
      "epoch": 0.5141951912439531,
      "grad_norm": 2.089672803878784,
      "learning_rate": 9.030725539163037e-06,
      "loss": 0.0461,
      "step": 314200
    },
    {
      "epoch": 0.5142279216826063,
      "grad_norm": 1.6254053115844727,
      "learning_rate": 9.03065964694952e-06,
      "loss": 0.0457,
      "step": 314220
    },
    {
      "epoch": 0.5142606521212597,
      "grad_norm": 1.7972018718719482,
      "learning_rate": 9.030593754736003e-06,
      "loss": 0.0362,
      "step": 314240
    },
    {
      "epoch": 0.5142933825599131,
      "grad_norm": 1.0897961854934692,
      "learning_rate": 9.030527862522486e-06,
      "loss": 0.0415,
      "step": 314260
    },
    {
      "epoch": 0.5143261129985665,
      "grad_norm": 1.0539649724960327,
      "learning_rate": 9.03046197030897e-06,
      "loss": 0.0353,
      "step": 314280
    },
    {
      "epoch": 0.5143588434372197,
      "grad_norm": 1.0611140727996826,
      "learning_rate": 9.030396078095452e-06,
      "loss": 0.048,
      "step": 314300
    },
    {
      "epoch": 0.5143915738758731,
      "grad_norm": 0.5534748435020447,
      "learning_rate": 9.030330185881935e-06,
      "loss": 0.0306,
      "step": 314320
    },
    {
      "epoch": 0.5144243043145265,
      "grad_norm": 2.000955581665039,
      "learning_rate": 9.030264293668419e-06,
      "loss": 0.0425,
      "step": 314340
    },
    {
      "epoch": 0.5144570347531797,
      "grad_norm": 1.5047343969345093,
      "learning_rate": 9.0301984014549e-06,
      "loss": 0.0303,
      "step": 314360
    },
    {
      "epoch": 0.5144897651918331,
      "grad_norm": 1.3528962135314941,
      "learning_rate": 9.030132509241384e-06,
      "loss": 0.0403,
      "step": 314380
    },
    {
      "epoch": 0.5145224956304865,
      "grad_norm": 1.0541565418243408,
      "learning_rate": 9.030066617027866e-06,
      "loss": 0.0471,
      "step": 314400
    },
    {
      "epoch": 0.5145552260691397,
      "grad_norm": 0.20864993333816528,
      "learning_rate": 9.03000072481435e-06,
      "loss": 0.0311,
      "step": 314420
    },
    {
      "epoch": 0.5145879565077931,
      "grad_norm": 1.041077733039856,
      "learning_rate": 9.029934832600832e-06,
      "loss": 0.0484,
      "step": 314440
    },
    {
      "epoch": 0.5146206869464465,
      "grad_norm": 4.99671745300293,
      "learning_rate": 9.029868940387315e-06,
      "loss": 0.0455,
      "step": 314460
    },
    {
      "epoch": 0.5146534173850998,
      "grad_norm": 1.4963246583938599,
      "learning_rate": 9.029803048173797e-06,
      "loss": 0.0379,
      "step": 314480
    },
    {
      "epoch": 0.5146861478237531,
      "grad_norm": 3.8829345703125,
      "learning_rate": 9.02973715596028e-06,
      "loss": 0.0528,
      "step": 314500
    },
    {
      "epoch": 0.5147188782624065,
      "grad_norm": 1.536821722984314,
      "learning_rate": 9.029671263746764e-06,
      "loss": 0.0323,
      "step": 314520
    },
    {
      "epoch": 0.5147516087010598,
      "grad_norm": 2.5135419368743896,
      "learning_rate": 9.029605371533246e-06,
      "loss": 0.0479,
      "step": 314540
    },
    {
      "epoch": 0.5147843391397131,
      "grad_norm": 3.4379794597625732,
      "learning_rate": 9.02953947931973e-06,
      "loss": 0.0438,
      "step": 314560
    },
    {
      "epoch": 0.5148170695783665,
      "grad_norm": 1.4077446460723877,
      "learning_rate": 9.029473587106212e-06,
      "loss": 0.0292,
      "step": 314580
    },
    {
      "epoch": 0.5148498000170199,
      "grad_norm": 1.02043616771698,
      "learning_rate": 9.029407694892695e-06,
      "loss": 0.0379,
      "step": 314600
    },
    {
      "epoch": 0.5148825304556731,
      "grad_norm": 1.4657347202301025,
      "learning_rate": 9.029341802679177e-06,
      "loss": 0.0396,
      "step": 314620
    },
    {
      "epoch": 0.5149152608943265,
      "grad_norm": 2.1897470951080322,
      "learning_rate": 9.02927591046566e-06,
      "loss": 0.0323,
      "step": 314640
    },
    {
      "epoch": 0.5149479913329799,
      "grad_norm": 1.9853827953338623,
      "learning_rate": 9.029210018252144e-06,
      "loss": 0.0319,
      "step": 314660
    },
    {
      "epoch": 0.5149807217716332,
      "grad_norm": 0.19321103394031525,
      "learning_rate": 9.029144126038626e-06,
      "loss": 0.0331,
      "step": 314680
    },
    {
      "epoch": 0.5150134522102865,
      "grad_norm": 1.0415409803390503,
      "learning_rate": 9.02907823382511e-06,
      "loss": 0.0327,
      "step": 314700
    },
    {
      "epoch": 0.5150461826489399,
      "grad_norm": 0.3183930218219757,
      "learning_rate": 9.029012341611593e-06,
      "loss": 0.0343,
      "step": 314720
    },
    {
      "epoch": 0.5150789130875932,
      "grad_norm": 2.762913465499878,
      "learning_rate": 9.028946449398075e-06,
      "loss": 0.0519,
      "step": 314740
    },
    {
      "epoch": 0.5151116435262465,
      "grad_norm": 1.3102071285247803,
      "learning_rate": 9.028880557184559e-06,
      "loss": 0.0418,
      "step": 314760
    },
    {
      "epoch": 0.5151443739648999,
      "grad_norm": 0.9076740741729736,
      "learning_rate": 9.02881466497104e-06,
      "loss": 0.045,
      "step": 314780
    },
    {
      "epoch": 0.5151771044035532,
      "grad_norm": 0.6669500470161438,
      "learning_rate": 9.028748772757524e-06,
      "loss": 0.03,
      "step": 314800
    },
    {
      "epoch": 0.5152098348422065,
      "grad_norm": 0.25039106607437134,
      "learning_rate": 9.028682880544006e-06,
      "loss": 0.0383,
      "step": 314820
    },
    {
      "epoch": 0.5152425652808599,
      "grad_norm": 2.097212314605713,
      "learning_rate": 9.02861698833049e-06,
      "loss": 0.0381,
      "step": 314840
    },
    {
      "epoch": 0.5152752957195132,
      "grad_norm": 3.1275949478149414,
      "learning_rate": 9.028551096116973e-06,
      "loss": 0.0542,
      "step": 314860
    },
    {
      "epoch": 0.5153080261581666,
      "grad_norm": 0.6341622471809387,
      "learning_rate": 9.028485203903455e-06,
      "loss": 0.0399,
      "step": 314880
    },
    {
      "epoch": 0.5153407565968199,
      "grad_norm": 1.910362958908081,
      "learning_rate": 9.028419311689939e-06,
      "loss": 0.0461,
      "step": 314900
    },
    {
      "epoch": 0.5153734870354733,
      "grad_norm": 2.2688426971435547,
      "learning_rate": 9.02835341947642e-06,
      "loss": 0.0382,
      "step": 314920
    },
    {
      "epoch": 0.5154062174741266,
      "grad_norm": 0.7784935832023621,
      "learning_rate": 9.028287527262904e-06,
      "loss": 0.0278,
      "step": 314940
    },
    {
      "epoch": 0.5154389479127799,
      "grad_norm": 0.3437270224094391,
      "learning_rate": 9.028221635049386e-06,
      "loss": 0.0448,
      "step": 314960
    },
    {
      "epoch": 0.5154716783514333,
      "grad_norm": 1.6005797386169434,
      "learning_rate": 9.02815574283587e-06,
      "loss": 0.0422,
      "step": 314980
    },
    {
      "epoch": 0.5155044087900866,
      "grad_norm": 0.6141555309295654,
      "learning_rate": 9.028089850622352e-06,
      "loss": 0.0434,
      "step": 315000
    },
    {
      "epoch": 0.5155371392287399,
      "grad_norm": 2.200821876525879,
      "learning_rate": 9.028023958408835e-06,
      "loss": 0.0318,
      "step": 315020
    },
    {
      "epoch": 0.5155698696673933,
      "grad_norm": 2.131504774093628,
      "learning_rate": 9.027958066195319e-06,
      "loss": 0.0568,
      "step": 315040
    },
    {
      "epoch": 0.5156026001060466,
      "grad_norm": 0.80250084400177,
      "learning_rate": 9.0278921739818e-06,
      "loss": 0.0496,
      "step": 315060
    },
    {
      "epoch": 0.5156353305447,
      "grad_norm": 0.5383778214454651,
      "learning_rate": 9.027826281768284e-06,
      "loss": 0.0346,
      "step": 315080
    },
    {
      "epoch": 0.5156680609833533,
      "grad_norm": 0.8053162097930908,
      "learning_rate": 9.027760389554768e-06,
      "loss": 0.034,
      "step": 315100
    },
    {
      "epoch": 0.5157007914220066,
      "grad_norm": 2.589773416519165,
      "learning_rate": 9.02769449734125e-06,
      "loss": 0.03,
      "step": 315120
    },
    {
      "epoch": 0.51573352186066,
      "grad_norm": 2.124617338180542,
      "learning_rate": 9.027628605127733e-06,
      "loss": 0.0306,
      "step": 315140
    },
    {
      "epoch": 0.5157662522993133,
      "grad_norm": 1.228898048400879,
      "learning_rate": 9.027562712914215e-06,
      "loss": 0.0405,
      "step": 315160
    },
    {
      "epoch": 0.5157989827379666,
      "grad_norm": 0.7687616348266602,
      "learning_rate": 9.027496820700699e-06,
      "loss": 0.0373,
      "step": 315180
    },
    {
      "epoch": 0.51583171317662,
      "grad_norm": 4.508130073547363,
      "learning_rate": 9.027430928487183e-06,
      "loss": 0.0333,
      "step": 315200
    },
    {
      "epoch": 0.5158644436152733,
      "grad_norm": 1.208848237991333,
      "learning_rate": 9.027365036273664e-06,
      "loss": 0.05,
      "step": 315220
    },
    {
      "epoch": 0.5158971740539267,
      "grad_norm": 0.6930237412452698,
      "learning_rate": 9.027299144060148e-06,
      "loss": 0.0403,
      "step": 315240
    },
    {
      "epoch": 0.51592990449258,
      "grad_norm": 3.6559557914733887,
      "learning_rate": 9.02723325184663e-06,
      "loss": 0.0381,
      "step": 315260
    },
    {
      "epoch": 0.5159626349312334,
      "grad_norm": 0.5296887755393982,
      "learning_rate": 9.027167359633114e-06,
      "loss": 0.0369,
      "step": 315280
    },
    {
      "epoch": 0.5159953653698867,
      "grad_norm": 1.2053120136260986,
      "learning_rate": 9.027101467419595e-06,
      "loss": 0.0411,
      "step": 315300
    },
    {
      "epoch": 0.51602809580854,
      "grad_norm": 0.3917660713195801,
      "learning_rate": 9.027035575206079e-06,
      "loss": 0.0333,
      "step": 315320
    },
    {
      "epoch": 0.5160608262471934,
      "grad_norm": 2.913240909576416,
      "learning_rate": 9.026969682992561e-06,
      "loss": 0.0435,
      "step": 315340
    },
    {
      "epoch": 0.5160935566858467,
      "grad_norm": 0.521186351776123,
      "learning_rate": 9.026903790779044e-06,
      "loss": 0.0325,
      "step": 315360
    },
    {
      "epoch": 0.5161262871245,
      "grad_norm": 6.332528114318848,
      "learning_rate": 9.026837898565526e-06,
      "loss": 0.0496,
      "step": 315380
    },
    {
      "epoch": 0.5161590175631534,
      "grad_norm": 0.889028787612915,
      "learning_rate": 9.02677200635201e-06,
      "loss": 0.0355,
      "step": 315400
    },
    {
      "epoch": 0.5161917480018067,
      "grad_norm": 2.0877814292907715,
      "learning_rate": 9.026706114138492e-06,
      "loss": 0.0527,
      "step": 315420
    },
    {
      "epoch": 0.51622447844046,
      "grad_norm": 0.3493770360946655,
      "learning_rate": 9.026640221924975e-06,
      "loss": 0.0388,
      "step": 315440
    },
    {
      "epoch": 0.5162572088791134,
      "grad_norm": 3.5832948684692383,
      "learning_rate": 9.026574329711459e-06,
      "loss": 0.035,
      "step": 315460
    },
    {
      "epoch": 0.5162899393177668,
      "grad_norm": 1.8505597114562988,
      "learning_rate": 9.026508437497941e-06,
      "loss": 0.0356,
      "step": 315480
    },
    {
      "epoch": 0.51632266975642,
      "grad_norm": 1.6411362886428833,
      "learning_rate": 9.026442545284425e-06,
      "loss": 0.0439,
      "step": 315500
    },
    {
      "epoch": 0.5163554001950734,
      "grad_norm": 2.213778495788574,
      "learning_rate": 9.026376653070908e-06,
      "loss": 0.0412,
      "step": 315520
    },
    {
      "epoch": 0.5163881306337268,
      "grad_norm": 0.7505209445953369,
      "learning_rate": 9.02631076085739e-06,
      "loss": 0.0427,
      "step": 315540
    },
    {
      "epoch": 0.51642086107238,
      "grad_norm": 1.8541786670684814,
      "learning_rate": 9.026244868643874e-06,
      "loss": 0.0445,
      "step": 315560
    },
    {
      "epoch": 0.5164535915110334,
      "grad_norm": 0.6924136877059937,
      "learning_rate": 9.026178976430357e-06,
      "loss": 0.0354,
      "step": 315580
    },
    {
      "epoch": 0.5164863219496868,
      "grad_norm": 1.3316057920455933,
      "learning_rate": 9.026113084216839e-06,
      "loss": 0.0402,
      "step": 315600
    },
    {
      "epoch": 0.5165190523883401,
      "grad_norm": 2.2213070392608643,
      "learning_rate": 9.026047192003323e-06,
      "loss": 0.0417,
      "step": 315620
    },
    {
      "epoch": 0.5165517828269934,
      "grad_norm": 5.108406066894531,
      "learning_rate": 9.025981299789805e-06,
      "loss": 0.0463,
      "step": 315640
    },
    {
      "epoch": 0.5165845132656468,
      "grad_norm": 0.13490238785743713,
      "learning_rate": 9.025915407576288e-06,
      "loss": 0.0317,
      "step": 315660
    },
    {
      "epoch": 0.5166172437043002,
      "grad_norm": 0.5801191329956055,
      "learning_rate": 9.02584951536277e-06,
      "loss": 0.0399,
      "step": 315680
    },
    {
      "epoch": 0.5166499741429534,
      "grad_norm": 0.520014226436615,
      "learning_rate": 9.025783623149254e-06,
      "loss": 0.0433,
      "step": 315700
    },
    {
      "epoch": 0.5166827045816068,
      "grad_norm": 1.4552819728851318,
      "learning_rate": 9.025717730935735e-06,
      "loss": 0.0495,
      "step": 315720
    },
    {
      "epoch": 0.5167154350202602,
      "grad_norm": 1.073774814605713,
      "learning_rate": 9.025651838722219e-06,
      "loss": 0.0424,
      "step": 315740
    },
    {
      "epoch": 0.5167481654589134,
      "grad_norm": 0.5959606766700745,
      "learning_rate": 9.025585946508701e-06,
      "loss": 0.0332,
      "step": 315760
    },
    {
      "epoch": 0.5167808958975668,
      "grad_norm": 2.973477840423584,
      "learning_rate": 9.025520054295185e-06,
      "loss": 0.0468,
      "step": 315780
    },
    {
      "epoch": 0.5168136263362202,
      "grad_norm": 1.2275490760803223,
      "learning_rate": 9.025454162081666e-06,
      "loss": 0.0429,
      "step": 315800
    },
    {
      "epoch": 0.5168463567748734,
      "grad_norm": 0.7826539874076843,
      "learning_rate": 9.02538826986815e-06,
      "loss": 0.0496,
      "step": 315820
    },
    {
      "epoch": 0.5168790872135268,
      "grad_norm": 0.592837393283844,
      "learning_rate": 9.025322377654634e-06,
      "loss": 0.0302,
      "step": 315840
    },
    {
      "epoch": 0.5169118176521802,
      "grad_norm": 0.935714066028595,
      "learning_rate": 9.025256485441116e-06,
      "loss": 0.0379,
      "step": 315860
    },
    {
      "epoch": 0.5169445480908336,
      "grad_norm": 2.5753111839294434,
      "learning_rate": 9.025190593227599e-06,
      "loss": 0.0311,
      "step": 315880
    },
    {
      "epoch": 0.5169772785294868,
      "grad_norm": 5.631018161773682,
      "learning_rate": 9.025124701014083e-06,
      "loss": 0.0311,
      "step": 315900
    },
    {
      "epoch": 0.5170100089681402,
      "grad_norm": 0.5467972159385681,
      "learning_rate": 9.025058808800565e-06,
      "loss": 0.0408,
      "step": 315920
    },
    {
      "epoch": 0.5170427394067936,
      "grad_norm": 0.8737727403640747,
      "learning_rate": 9.024992916587048e-06,
      "loss": 0.028,
      "step": 315940
    },
    {
      "epoch": 0.5170754698454468,
      "grad_norm": 0.41149643063545227,
      "learning_rate": 9.024927024373532e-06,
      "loss": 0.0204,
      "step": 315960
    },
    {
      "epoch": 0.5171082002841002,
      "grad_norm": 0.7580580711364746,
      "learning_rate": 9.024861132160014e-06,
      "loss": 0.0447,
      "step": 315980
    },
    {
      "epoch": 0.5171409307227536,
      "grad_norm": 6.850315093994141,
      "learning_rate": 9.024795239946497e-06,
      "loss": 0.0391,
      "step": 316000
    },
    {
      "epoch": 0.5171736611614068,
      "grad_norm": 1.133108139038086,
      "learning_rate": 9.024729347732979e-06,
      "loss": 0.0423,
      "step": 316020
    },
    {
      "epoch": 0.5172063916000602,
      "grad_norm": 1.330884337425232,
      "learning_rate": 9.024663455519463e-06,
      "loss": 0.0421,
      "step": 316040
    },
    {
      "epoch": 0.5172391220387136,
      "grad_norm": 0.5843513607978821,
      "learning_rate": 9.024597563305945e-06,
      "loss": 0.0261,
      "step": 316060
    },
    {
      "epoch": 0.517271852477367,
      "grad_norm": 0.9336981773376465,
      "learning_rate": 9.024531671092428e-06,
      "loss": 0.0487,
      "step": 316080
    },
    {
      "epoch": 0.5173045829160202,
      "grad_norm": 11.148728370666504,
      "learning_rate": 9.02446577887891e-06,
      "loss": 0.0391,
      "step": 316100
    },
    {
      "epoch": 0.5173373133546736,
      "grad_norm": 1.3668867349624634,
      "learning_rate": 9.024399886665394e-06,
      "loss": 0.0423,
      "step": 316120
    },
    {
      "epoch": 0.517370043793327,
      "grad_norm": 1.5629534721374512,
      "learning_rate": 9.024333994451876e-06,
      "loss": 0.0341,
      "step": 316140
    },
    {
      "epoch": 0.5174027742319802,
      "grad_norm": 1.5361524820327759,
      "learning_rate": 9.024268102238359e-06,
      "loss": 0.0346,
      "step": 316160
    },
    {
      "epoch": 0.5174355046706336,
      "grad_norm": 1.0416170358657837,
      "learning_rate": 9.024202210024841e-06,
      "loss": 0.039,
      "step": 316180
    },
    {
      "epoch": 0.517468235109287,
      "grad_norm": 1.4699842929840088,
      "learning_rate": 9.024136317811325e-06,
      "loss": 0.0448,
      "step": 316200
    },
    {
      "epoch": 0.5175009655479402,
      "grad_norm": 0.5146211385726929,
      "learning_rate": 9.024070425597807e-06,
      "loss": 0.0419,
      "step": 316220
    },
    {
      "epoch": 0.5175336959865936,
      "grad_norm": 1.6733770370483398,
      "learning_rate": 9.02400453338429e-06,
      "loss": 0.0498,
      "step": 316240
    },
    {
      "epoch": 0.517566426425247,
      "grad_norm": 1.926384449005127,
      "learning_rate": 9.023938641170774e-06,
      "loss": 0.0312,
      "step": 316260
    },
    {
      "epoch": 0.5175991568639003,
      "grad_norm": 1.1711210012435913,
      "learning_rate": 9.023872748957256e-06,
      "loss": 0.0384,
      "step": 316280
    },
    {
      "epoch": 0.5176318873025536,
      "grad_norm": 1.4742401838302612,
      "learning_rate": 9.02380685674374e-06,
      "loss": 0.0441,
      "step": 316300
    },
    {
      "epoch": 0.517664617741207,
      "grad_norm": 2.617335557937622,
      "learning_rate": 9.023740964530223e-06,
      "loss": 0.0504,
      "step": 316320
    },
    {
      "epoch": 0.5176973481798604,
      "grad_norm": 0.3048124313354492,
      "learning_rate": 9.023675072316705e-06,
      "loss": 0.0427,
      "step": 316340
    },
    {
      "epoch": 0.5177300786185136,
      "grad_norm": 1.3984179496765137,
      "learning_rate": 9.023609180103188e-06,
      "loss": 0.0352,
      "step": 316360
    },
    {
      "epoch": 0.517762809057167,
      "grad_norm": 0.5028473734855652,
      "learning_rate": 9.023543287889672e-06,
      "loss": 0.0374,
      "step": 316380
    },
    {
      "epoch": 0.5177955394958204,
      "grad_norm": 0.8255643844604492,
      "learning_rate": 9.023477395676154e-06,
      "loss": 0.045,
      "step": 316400
    },
    {
      "epoch": 0.5178282699344736,
      "grad_norm": 3.657041549682617,
      "learning_rate": 9.023411503462637e-06,
      "loss": 0.0273,
      "step": 316420
    },
    {
      "epoch": 0.517861000373127,
      "grad_norm": 1.108648419380188,
      "learning_rate": 9.02334561124912e-06,
      "loss": 0.0445,
      "step": 316440
    },
    {
      "epoch": 0.5178937308117804,
      "grad_norm": 0.5930710434913635,
      "learning_rate": 9.023279719035603e-06,
      "loss": 0.03,
      "step": 316460
    },
    {
      "epoch": 0.5179264612504336,
      "grad_norm": 1.3306106328964233,
      "learning_rate": 9.023213826822085e-06,
      "loss": 0.0318,
      "step": 316480
    },
    {
      "epoch": 0.517959191689087,
      "grad_norm": 0.948613703250885,
      "learning_rate": 9.023147934608568e-06,
      "loss": 0.0458,
      "step": 316500
    },
    {
      "epoch": 0.5179919221277404,
      "grad_norm": 3.293750286102295,
      "learning_rate": 9.02308204239505e-06,
      "loss": 0.0365,
      "step": 316520
    },
    {
      "epoch": 0.5180246525663937,
      "grad_norm": 2.4535861015319824,
      "learning_rate": 9.023016150181534e-06,
      "loss": 0.035,
      "step": 316540
    },
    {
      "epoch": 0.518057383005047,
      "grad_norm": 3.7895591259002686,
      "learning_rate": 9.022950257968016e-06,
      "loss": 0.0333,
      "step": 316560
    },
    {
      "epoch": 0.5180901134437004,
      "grad_norm": 1.6130609512329102,
      "learning_rate": 9.0228843657545e-06,
      "loss": 0.0371,
      "step": 316580
    },
    {
      "epoch": 0.5181228438823537,
      "grad_norm": 0.9144231677055359,
      "learning_rate": 9.022818473540983e-06,
      "loss": 0.0412,
      "step": 316600
    },
    {
      "epoch": 0.518155574321007,
      "grad_norm": 0.2353544384241104,
      "learning_rate": 9.022752581327465e-06,
      "loss": 0.0333,
      "step": 316620
    },
    {
      "epoch": 0.5181883047596604,
      "grad_norm": 0.9491857886314392,
      "learning_rate": 9.022686689113948e-06,
      "loss": 0.0346,
      "step": 316640
    },
    {
      "epoch": 0.5182210351983138,
      "grad_norm": 4.302774906158447,
      "learning_rate": 9.02262079690043e-06,
      "loss": 0.0453,
      "step": 316660
    },
    {
      "epoch": 0.518253765636967,
      "grad_norm": 0.6996484398841858,
      "learning_rate": 9.022554904686914e-06,
      "loss": 0.0405,
      "step": 316680
    },
    {
      "epoch": 0.5182864960756204,
      "grad_norm": 1.7252906560897827,
      "learning_rate": 9.022489012473397e-06,
      "loss": 0.0392,
      "step": 316700
    },
    {
      "epoch": 0.5183192265142738,
      "grad_norm": 2.078514337539673,
      "learning_rate": 9.02242312025988e-06,
      "loss": 0.0497,
      "step": 316720
    },
    {
      "epoch": 0.5183519569529271,
      "grad_norm": 2.222622871398926,
      "learning_rate": 9.022357228046363e-06,
      "loss": 0.0354,
      "step": 316740
    },
    {
      "epoch": 0.5183846873915804,
      "grad_norm": 0.7872390747070312,
      "learning_rate": 9.022291335832846e-06,
      "loss": 0.0466,
      "step": 316760
    },
    {
      "epoch": 0.5184174178302338,
      "grad_norm": 0.6195213794708252,
      "learning_rate": 9.022225443619328e-06,
      "loss": 0.0385,
      "step": 316780
    },
    {
      "epoch": 0.5184501482688871,
      "grad_norm": 1.3115174770355225,
      "learning_rate": 9.022159551405812e-06,
      "loss": 0.0371,
      "step": 316800
    },
    {
      "epoch": 0.5184828787075404,
      "grad_norm": 0.8260689377784729,
      "learning_rate": 9.022093659192294e-06,
      "loss": 0.0343,
      "step": 316820
    },
    {
      "epoch": 0.5185156091461938,
      "grad_norm": 1.6789840459823608,
      "learning_rate": 9.022027766978777e-06,
      "loss": 0.0348,
      "step": 316840
    },
    {
      "epoch": 0.5185483395848471,
      "grad_norm": 10.553204536437988,
      "learning_rate": 9.02196187476526e-06,
      "loss": 0.0438,
      "step": 316860
    },
    {
      "epoch": 0.5185810700235004,
      "grad_norm": 0.7327789068222046,
      "learning_rate": 9.021895982551743e-06,
      "loss": 0.0366,
      "step": 316880
    },
    {
      "epoch": 0.5186138004621538,
      "grad_norm": 1.0609883069992065,
      "learning_rate": 9.021830090338225e-06,
      "loss": 0.0344,
      "step": 316900
    },
    {
      "epoch": 0.5186465309008071,
      "grad_norm": 0.8965516686439514,
      "learning_rate": 9.021764198124708e-06,
      "loss": 0.0522,
      "step": 316920
    },
    {
      "epoch": 0.5186792613394605,
      "grad_norm": 1.387307047843933,
      "learning_rate": 9.02169830591119e-06,
      "loss": 0.0496,
      "step": 316940
    },
    {
      "epoch": 0.5187119917781138,
      "grad_norm": 0.9214701652526855,
      "learning_rate": 9.021632413697674e-06,
      "loss": 0.0334,
      "step": 316960
    },
    {
      "epoch": 0.5187447222167672,
      "grad_norm": 0.9218108654022217,
      "learning_rate": 9.021566521484157e-06,
      "loss": 0.0395,
      "step": 316980
    },
    {
      "epoch": 0.5187774526554205,
      "grad_norm": 1.2920360565185547,
      "learning_rate": 9.02150062927064e-06,
      "loss": 0.0432,
      "step": 317000
    },
    {
      "epoch": 0.5188101830940738,
      "grad_norm": 2.0800106525421143,
      "learning_rate": 9.021434737057123e-06,
      "loss": 0.0563,
      "step": 317020
    },
    {
      "epoch": 0.5188429135327272,
      "grad_norm": 0.5748978853225708,
      "learning_rate": 9.021368844843605e-06,
      "loss": 0.0357,
      "step": 317040
    },
    {
      "epoch": 0.5188756439713805,
      "grad_norm": 0.32877209782600403,
      "learning_rate": 9.021302952630088e-06,
      "loss": 0.0498,
      "step": 317060
    },
    {
      "epoch": 0.5189083744100338,
      "grad_norm": 2.2587692737579346,
      "learning_rate": 9.02123706041657e-06,
      "loss": 0.045,
      "step": 317080
    },
    {
      "epoch": 0.5189411048486872,
      "grad_norm": 1.140244483947754,
      "learning_rate": 9.021171168203054e-06,
      "loss": 0.0522,
      "step": 317100
    },
    {
      "epoch": 0.5189738352873405,
      "grad_norm": 2.3140065670013428,
      "learning_rate": 9.021105275989537e-06,
      "loss": 0.0422,
      "step": 317120
    },
    {
      "epoch": 0.5190065657259939,
      "grad_norm": 1.563991665840149,
      "learning_rate": 9.02103938377602e-06,
      "loss": 0.0585,
      "step": 317140
    },
    {
      "epoch": 0.5190392961646472,
      "grad_norm": 2.2946419715881348,
      "learning_rate": 9.020973491562503e-06,
      "loss": 0.0342,
      "step": 317160
    },
    {
      "epoch": 0.5190720266033005,
      "grad_norm": 1.42460298538208,
      "learning_rate": 9.020907599348987e-06,
      "loss": 0.0503,
      "step": 317180
    },
    {
      "epoch": 0.5191047570419539,
      "grad_norm": 1.3430006504058838,
      "learning_rate": 9.020841707135468e-06,
      "loss": 0.0415,
      "step": 317200
    },
    {
      "epoch": 0.5191374874806072,
      "grad_norm": 2.385040044784546,
      "learning_rate": 9.020775814921952e-06,
      "loss": 0.0518,
      "step": 317220
    },
    {
      "epoch": 0.5191702179192605,
      "grad_norm": 0.7835164070129395,
      "learning_rate": 9.020709922708434e-06,
      "loss": 0.0327,
      "step": 317240
    },
    {
      "epoch": 0.5192029483579139,
      "grad_norm": 1.5915080308914185,
      "learning_rate": 9.020644030494917e-06,
      "loss": 0.0324,
      "step": 317260
    },
    {
      "epoch": 0.5192356787965672,
      "grad_norm": 0.499040424823761,
      "learning_rate": 9.0205781382814e-06,
      "loss": 0.0388,
      "step": 317280
    },
    {
      "epoch": 0.5192684092352206,
      "grad_norm": 0.8536046743392944,
      "learning_rate": 9.020512246067883e-06,
      "loss": 0.0309,
      "step": 317300
    },
    {
      "epoch": 0.5193011396738739,
      "grad_norm": 0.2832075357437134,
      "learning_rate": 9.020446353854367e-06,
      "loss": 0.0291,
      "step": 317320
    },
    {
      "epoch": 0.5193338701125273,
      "grad_norm": 1.6519356966018677,
      "learning_rate": 9.020380461640848e-06,
      "loss": 0.0409,
      "step": 317340
    },
    {
      "epoch": 0.5193666005511806,
      "grad_norm": 1.8253700733184814,
      "learning_rate": 9.020314569427332e-06,
      "loss": 0.0316,
      "step": 317360
    },
    {
      "epoch": 0.5193993309898339,
      "grad_norm": 2.1573855876922607,
      "learning_rate": 9.020248677213814e-06,
      "loss": 0.0434,
      "step": 317380
    },
    {
      "epoch": 0.5194320614284873,
      "grad_norm": 1.5274319648742676,
      "learning_rate": 9.020182785000297e-06,
      "loss": 0.0393,
      "step": 317400
    },
    {
      "epoch": 0.5194647918671406,
      "grad_norm": 0.9647273421287537,
      "learning_rate": 9.02011689278678e-06,
      "loss": 0.0482,
      "step": 317420
    },
    {
      "epoch": 0.5194975223057939,
      "grad_norm": 2.5850181579589844,
      "learning_rate": 9.020051000573263e-06,
      "loss": 0.0272,
      "step": 317440
    },
    {
      "epoch": 0.5195302527444473,
      "grad_norm": 0.521724283695221,
      "learning_rate": 9.019985108359745e-06,
      "loss": 0.0249,
      "step": 317460
    },
    {
      "epoch": 0.5195629831831006,
      "grad_norm": 2.3078877925872803,
      "learning_rate": 9.019919216146228e-06,
      "loss": 0.0459,
      "step": 317480
    },
    {
      "epoch": 0.5195957136217539,
      "grad_norm": 2.3124330043792725,
      "learning_rate": 9.019853323932712e-06,
      "loss": 0.029,
      "step": 317500
    },
    {
      "epoch": 0.5196284440604073,
      "grad_norm": 1.9220958948135376,
      "learning_rate": 9.019787431719194e-06,
      "loss": 0.0391,
      "step": 317520
    },
    {
      "epoch": 0.5196611744990607,
      "grad_norm": 2.235239267349243,
      "learning_rate": 9.019721539505678e-06,
      "loss": 0.0333,
      "step": 317540
    },
    {
      "epoch": 0.519693904937714,
      "grad_norm": 1.8043177127838135,
      "learning_rate": 9.019655647292161e-06,
      "loss": 0.0411,
      "step": 317560
    },
    {
      "epoch": 0.5197266353763673,
      "grad_norm": 0.9397274851799011,
      "learning_rate": 9.019589755078643e-06,
      "loss": 0.0276,
      "step": 317580
    },
    {
      "epoch": 0.5197593658150207,
      "grad_norm": 0.23626579344272614,
      "learning_rate": 9.019523862865127e-06,
      "loss": 0.0469,
      "step": 317600
    },
    {
      "epoch": 0.519792096253674,
      "grad_norm": 1.106522560119629,
      "learning_rate": 9.019457970651608e-06,
      "loss": 0.0426,
      "step": 317620
    },
    {
      "epoch": 0.5198248266923273,
      "grad_norm": 1.1333189010620117,
      "learning_rate": 9.019392078438092e-06,
      "loss": 0.0535,
      "step": 317640
    },
    {
      "epoch": 0.5198575571309807,
      "grad_norm": 0.5650050044059753,
      "learning_rate": 9.019326186224576e-06,
      "loss": 0.037,
      "step": 317660
    },
    {
      "epoch": 0.519890287569634,
      "grad_norm": 2.4597549438476562,
      "learning_rate": 9.019260294011058e-06,
      "loss": 0.0424,
      "step": 317680
    },
    {
      "epoch": 0.5199230180082873,
      "grad_norm": 3.3637561798095703,
      "learning_rate": 9.019194401797541e-06,
      "loss": 0.0338,
      "step": 317700
    },
    {
      "epoch": 0.5199557484469407,
      "grad_norm": 2.2000622749328613,
      "learning_rate": 9.019128509584023e-06,
      "loss": 0.0322,
      "step": 317720
    },
    {
      "epoch": 0.5199884788855941,
      "grad_norm": 2.625462770462036,
      "learning_rate": 9.019062617370507e-06,
      "loss": 0.0478,
      "step": 317740
    },
    {
      "epoch": 0.5200212093242473,
      "grad_norm": 1.3196704387664795,
      "learning_rate": 9.018996725156988e-06,
      "loss": 0.0351,
      "step": 317760
    },
    {
      "epoch": 0.5200539397629007,
      "grad_norm": 0.9415411949157715,
      "learning_rate": 9.018930832943472e-06,
      "loss": 0.0425,
      "step": 317780
    },
    {
      "epoch": 0.5200866702015541,
      "grad_norm": 0.8698438405990601,
      "learning_rate": 9.018864940729954e-06,
      "loss": 0.0349,
      "step": 317800
    },
    {
      "epoch": 0.5201194006402073,
      "grad_norm": 0.7566709518432617,
      "learning_rate": 9.018799048516438e-06,
      "loss": 0.0394,
      "step": 317820
    },
    {
      "epoch": 0.5201521310788607,
      "grad_norm": 2.1362156867980957,
      "learning_rate": 9.01873315630292e-06,
      "loss": 0.036,
      "step": 317840
    },
    {
      "epoch": 0.5201848615175141,
      "grad_norm": 1.5616194009780884,
      "learning_rate": 9.018667264089403e-06,
      "loss": 0.0437,
      "step": 317860
    },
    {
      "epoch": 0.5202175919561673,
      "grad_norm": 1.40570867061615,
      "learning_rate": 9.018601371875887e-06,
      "loss": 0.0397,
      "step": 317880
    },
    {
      "epoch": 0.5202503223948207,
      "grad_norm": 2.503061294555664,
      "learning_rate": 9.018535479662369e-06,
      "loss": 0.0429,
      "step": 317900
    },
    {
      "epoch": 0.5202830528334741,
      "grad_norm": 1.8921136856079102,
      "learning_rate": 9.018469587448852e-06,
      "loss": 0.0349,
      "step": 317920
    },
    {
      "epoch": 0.5203157832721275,
      "grad_norm": 1.3739336729049683,
      "learning_rate": 9.018403695235336e-06,
      "loss": 0.0362,
      "step": 317940
    },
    {
      "epoch": 0.5203485137107807,
      "grad_norm": 1.0982654094696045,
      "learning_rate": 9.018337803021818e-06,
      "loss": 0.0439,
      "step": 317960
    },
    {
      "epoch": 0.5203812441494341,
      "grad_norm": 0.5050243735313416,
      "learning_rate": 9.018271910808301e-06,
      "loss": 0.0394,
      "step": 317980
    },
    {
      "epoch": 0.5204139745880875,
      "grad_norm": 0.8817142248153687,
      "learning_rate": 9.018206018594783e-06,
      "loss": 0.0374,
      "step": 318000
    },
    {
      "epoch": 0.5204467050267407,
      "grad_norm": 0.9963856339454651,
      "learning_rate": 9.018140126381267e-06,
      "loss": 0.0347,
      "step": 318020
    },
    {
      "epoch": 0.5204794354653941,
      "grad_norm": 1.776133418083191,
      "learning_rate": 9.01807423416775e-06,
      "loss": 0.0438,
      "step": 318040
    },
    {
      "epoch": 0.5205121659040475,
      "grad_norm": 1.7664299011230469,
      "learning_rate": 9.018008341954232e-06,
      "loss": 0.0289,
      "step": 318060
    },
    {
      "epoch": 0.5205448963427007,
      "grad_norm": 1.7362887859344482,
      "learning_rate": 9.017942449740716e-06,
      "loss": 0.0475,
      "step": 318080
    },
    {
      "epoch": 0.5205776267813541,
      "grad_norm": 2.0489559173583984,
      "learning_rate": 9.017876557527198e-06,
      "loss": 0.0427,
      "step": 318100
    },
    {
      "epoch": 0.5206103572200075,
      "grad_norm": 1.296920895576477,
      "learning_rate": 9.017810665313681e-06,
      "loss": 0.0451,
      "step": 318120
    },
    {
      "epoch": 0.5206430876586609,
      "grad_norm": 1.4091041088104248,
      "learning_rate": 9.017744773100163e-06,
      "loss": 0.0312,
      "step": 318140
    },
    {
      "epoch": 0.5206758180973141,
      "grad_norm": 2.346484899520874,
      "learning_rate": 9.017678880886647e-06,
      "loss": 0.0495,
      "step": 318160
    },
    {
      "epoch": 0.5207085485359675,
      "grad_norm": 0.6627151966094971,
      "learning_rate": 9.017612988673129e-06,
      "loss": 0.0375,
      "step": 318180
    },
    {
      "epoch": 0.5207412789746209,
      "grad_norm": 0.8070592284202576,
      "learning_rate": 9.017547096459612e-06,
      "loss": 0.0368,
      "step": 318200
    },
    {
      "epoch": 0.5207740094132741,
      "grad_norm": 1.5931848287582397,
      "learning_rate": 9.017481204246094e-06,
      "loss": 0.0476,
      "step": 318220
    },
    {
      "epoch": 0.5208067398519275,
      "grad_norm": 1.7671793699264526,
      "learning_rate": 9.017415312032578e-06,
      "loss": 0.0488,
      "step": 318240
    },
    {
      "epoch": 0.5208394702905809,
      "grad_norm": 1.3710222244262695,
      "learning_rate": 9.01734941981906e-06,
      "loss": 0.0512,
      "step": 318260
    },
    {
      "epoch": 0.5208722007292341,
      "grad_norm": 1.3704010248184204,
      "learning_rate": 9.017283527605543e-06,
      "loss": 0.0418,
      "step": 318280
    },
    {
      "epoch": 0.5209049311678875,
      "grad_norm": 3.231964111328125,
      "learning_rate": 9.017217635392027e-06,
      "loss": 0.0589,
      "step": 318300
    },
    {
      "epoch": 0.5209376616065409,
      "grad_norm": 2.3930230140686035,
      "learning_rate": 9.017151743178509e-06,
      "loss": 0.0545,
      "step": 318320
    },
    {
      "epoch": 0.5209703920451942,
      "grad_norm": 1.1355509757995605,
      "learning_rate": 9.017085850964992e-06,
      "loss": 0.0461,
      "step": 318340
    },
    {
      "epoch": 0.5210031224838475,
      "grad_norm": 0.9419263005256653,
      "learning_rate": 9.017019958751476e-06,
      "loss": 0.0426,
      "step": 318360
    },
    {
      "epoch": 0.5210358529225009,
      "grad_norm": 1.4092276096343994,
      "learning_rate": 9.016954066537958e-06,
      "loss": 0.029,
      "step": 318380
    },
    {
      "epoch": 0.5210685833611542,
      "grad_norm": 1.3352841138839722,
      "learning_rate": 9.016888174324441e-06,
      "loss": 0.0451,
      "step": 318400
    },
    {
      "epoch": 0.5211013137998075,
      "grad_norm": 1.3649652004241943,
      "learning_rate": 9.016822282110925e-06,
      "loss": 0.0288,
      "step": 318420
    },
    {
      "epoch": 0.5211340442384609,
      "grad_norm": 0.9893254637718201,
      "learning_rate": 9.016756389897407e-06,
      "loss": 0.031,
      "step": 318440
    },
    {
      "epoch": 0.5211667746771143,
      "grad_norm": 1.726054072380066,
      "learning_rate": 9.01669049768389e-06,
      "loss": 0.0531,
      "step": 318460
    },
    {
      "epoch": 0.5211995051157675,
      "grad_norm": 0.474016010761261,
      "learning_rate": 9.016624605470372e-06,
      "loss": 0.033,
      "step": 318480
    },
    {
      "epoch": 0.5212322355544209,
      "grad_norm": 3.087495803833008,
      "learning_rate": 9.016558713256856e-06,
      "loss": 0.0409,
      "step": 318500
    },
    {
      "epoch": 0.5212649659930743,
      "grad_norm": 2.061244010925293,
      "learning_rate": 9.016492821043338e-06,
      "loss": 0.0575,
      "step": 318520
    },
    {
      "epoch": 0.5212976964317276,
      "grad_norm": 0.6650508642196655,
      "learning_rate": 9.016426928829821e-06,
      "loss": 0.052,
      "step": 318540
    },
    {
      "epoch": 0.5213304268703809,
      "grad_norm": 2.4488959312438965,
      "learning_rate": 9.016361036616303e-06,
      "loss": 0.0443,
      "step": 318560
    },
    {
      "epoch": 0.5213631573090343,
      "grad_norm": 1.7000658512115479,
      "learning_rate": 9.016295144402787e-06,
      "loss": 0.0449,
      "step": 318580
    },
    {
      "epoch": 0.5213958877476876,
      "grad_norm": 2.263639450073242,
      "learning_rate": 9.016229252189269e-06,
      "loss": 0.039,
      "step": 318600
    },
    {
      "epoch": 0.5214286181863409,
      "grad_norm": 2.9772489070892334,
      "learning_rate": 9.016163359975752e-06,
      "loss": 0.0473,
      "step": 318620
    },
    {
      "epoch": 0.5214613486249943,
      "grad_norm": 2.388667345046997,
      "learning_rate": 9.016097467762234e-06,
      "loss": 0.046,
      "step": 318640
    },
    {
      "epoch": 0.5214940790636476,
      "grad_norm": 0.8655823469161987,
      "learning_rate": 9.016031575548718e-06,
      "loss": 0.0476,
      "step": 318660
    },
    {
      "epoch": 0.5215268095023009,
      "grad_norm": 1.6802645921707153,
      "learning_rate": 9.015965683335201e-06,
      "loss": 0.0294,
      "step": 318680
    },
    {
      "epoch": 0.5215595399409543,
      "grad_norm": 1.595047116279602,
      "learning_rate": 9.015899791121683e-06,
      "loss": 0.0389,
      "step": 318700
    },
    {
      "epoch": 0.5215922703796076,
      "grad_norm": 1.3274657726287842,
      "learning_rate": 9.015833898908167e-06,
      "loss": 0.0238,
      "step": 318720
    },
    {
      "epoch": 0.521625000818261,
      "grad_norm": 0.9026981592178345,
      "learning_rate": 9.01576800669465e-06,
      "loss": 0.0378,
      "step": 318740
    },
    {
      "epoch": 0.5216577312569143,
      "grad_norm": 0.8125737905502319,
      "learning_rate": 9.015702114481132e-06,
      "loss": 0.0314,
      "step": 318760
    },
    {
      "epoch": 0.5216904616955677,
      "grad_norm": 2.4937620162963867,
      "learning_rate": 9.015636222267616e-06,
      "loss": 0.0358,
      "step": 318780
    },
    {
      "epoch": 0.521723192134221,
      "grad_norm": 1.2014323472976685,
      "learning_rate": 9.0155703300541e-06,
      "loss": 0.0407,
      "step": 318800
    },
    {
      "epoch": 0.5217559225728743,
      "grad_norm": 1.579755187034607,
      "learning_rate": 9.015504437840581e-06,
      "loss": 0.0317,
      "step": 318820
    },
    {
      "epoch": 0.5217886530115277,
      "grad_norm": 1.1955177783966064,
      "learning_rate": 9.015438545627065e-06,
      "loss": 0.0454,
      "step": 318840
    },
    {
      "epoch": 0.521821383450181,
      "grad_norm": 4.477705955505371,
      "learning_rate": 9.015372653413547e-06,
      "loss": 0.037,
      "step": 318860
    },
    {
      "epoch": 0.5218541138888343,
      "grad_norm": 1.963728666305542,
      "learning_rate": 9.01530676120003e-06,
      "loss": 0.0407,
      "step": 318880
    },
    {
      "epoch": 0.5218868443274877,
      "grad_norm": 2.672060966491699,
      "learning_rate": 9.015240868986512e-06,
      "loss": 0.0355,
      "step": 318900
    },
    {
      "epoch": 0.521919574766141,
      "grad_norm": 2.5572595596313477,
      "learning_rate": 9.015174976772996e-06,
      "loss": 0.0476,
      "step": 318920
    },
    {
      "epoch": 0.5219523052047944,
      "grad_norm": 2.1789939403533936,
      "learning_rate": 9.015109084559478e-06,
      "loss": 0.04,
      "step": 318940
    },
    {
      "epoch": 0.5219850356434477,
      "grad_norm": 0.37374773621559143,
      "learning_rate": 9.015043192345961e-06,
      "loss": 0.0327,
      "step": 318960
    },
    {
      "epoch": 0.522017766082101,
      "grad_norm": 0.9330331087112427,
      "learning_rate": 9.014977300132443e-06,
      "loss": 0.0474,
      "step": 318980
    },
    {
      "epoch": 0.5220504965207544,
      "grad_norm": 1.0337250232696533,
      "learning_rate": 9.014911407918927e-06,
      "loss": 0.0321,
      "step": 319000
    },
    {
      "epoch": 0.5220832269594077,
      "grad_norm": 0.6898930668830872,
      "learning_rate": 9.014845515705409e-06,
      "loss": 0.0327,
      "step": 319020
    },
    {
      "epoch": 0.522115957398061,
      "grad_norm": 0.6098156571388245,
      "learning_rate": 9.014779623491892e-06,
      "loss": 0.035,
      "step": 319040
    },
    {
      "epoch": 0.5221486878367144,
      "grad_norm": 0.7198912501335144,
      "learning_rate": 9.014713731278376e-06,
      "loss": 0.0401,
      "step": 319060
    },
    {
      "epoch": 0.5221814182753677,
      "grad_norm": 0.3239566683769226,
      "learning_rate": 9.014647839064858e-06,
      "loss": 0.0347,
      "step": 319080
    },
    {
      "epoch": 0.522214148714021,
      "grad_norm": 0.1973259150981903,
      "learning_rate": 9.014581946851341e-06,
      "loss": 0.029,
      "step": 319100
    },
    {
      "epoch": 0.5222468791526744,
      "grad_norm": 5.516305446624756,
      "learning_rate": 9.014516054637823e-06,
      "loss": 0.0458,
      "step": 319120
    },
    {
      "epoch": 0.5222796095913278,
      "grad_norm": 1.2522350549697876,
      "learning_rate": 9.014450162424307e-06,
      "loss": 0.0419,
      "step": 319140
    },
    {
      "epoch": 0.5223123400299811,
      "grad_norm": 1.665762186050415,
      "learning_rate": 9.01438427021079e-06,
      "loss": 0.0468,
      "step": 319160
    },
    {
      "epoch": 0.5223450704686344,
      "grad_norm": 1.931452989578247,
      "learning_rate": 9.014318377997272e-06,
      "loss": 0.0506,
      "step": 319180
    },
    {
      "epoch": 0.5223778009072878,
      "grad_norm": 2.473789930343628,
      "learning_rate": 9.014252485783756e-06,
      "loss": 0.0482,
      "step": 319200
    },
    {
      "epoch": 0.5224105313459411,
      "grad_norm": 0.4185521602630615,
      "learning_rate": 9.01418659357024e-06,
      "loss": 0.0296,
      "step": 319220
    },
    {
      "epoch": 0.5224432617845944,
      "grad_norm": 2.524737596511841,
      "learning_rate": 9.014120701356721e-06,
      "loss": 0.0382,
      "step": 319240
    },
    {
      "epoch": 0.5224759922232478,
      "grad_norm": 2.948909044265747,
      "learning_rate": 9.014054809143205e-06,
      "loss": 0.0503,
      "step": 319260
    },
    {
      "epoch": 0.5225087226619011,
      "grad_norm": 0.8876862525939941,
      "learning_rate": 9.013988916929687e-06,
      "loss": 0.0452,
      "step": 319280
    },
    {
      "epoch": 0.5225414531005544,
      "grad_norm": 1.2597166299819946,
      "learning_rate": 9.01392302471617e-06,
      "loss": 0.0496,
      "step": 319300
    },
    {
      "epoch": 0.5225741835392078,
      "grad_norm": 0.8246714472770691,
      "learning_rate": 9.013857132502652e-06,
      "loss": 0.0297,
      "step": 319320
    },
    {
      "epoch": 0.5226069139778611,
      "grad_norm": 1.0826258659362793,
      "learning_rate": 9.013791240289136e-06,
      "loss": 0.0514,
      "step": 319340
    },
    {
      "epoch": 0.5226396444165144,
      "grad_norm": 0.9049518704414368,
      "learning_rate": 9.013725348075618e-06,
      "loss": 0.0294,
      "step": 319360
    },
    {
      "epoch": 0.5226723748551678,
      "grad_norm": 0.8585758209228516,
      "learning_rate": 9.013659455862101e-06,
      "loss": 0.0406,
      "step": 319380
    },
    {
      "epoch": 0.5227051052938212,
      "grad_norm": 0.7781081795692444,
      "learning_rate": 9.013593563648583e-06,
      "loss": 0.0305,
      "step": 319400
    },
    {
      "epoch": 0.5227378357324745,
      "grad_norm": 1.2177923917770386,
      "learning_rate": 9.013527671435067e-06,
      "loss": 0.053,
      "step": 319420
    },
    {
      "epoch": 0.5227705661711278,
      "grad_norm": 2.732328176498413,
      "learning_rate": 9.01346177922155e-06,
      "loss": 0.048,
      "step": 319440
    },
    {
      "epoch": 0.5228032966097812,
      "grad_norm": 1.0547688007354736,
      "learning_rate": 9.013395887008032e-06,
      "loss": 0.0343,
      "step": 319460
    },
    {
      "epoch": 0.5228360270484345,
      "grad_norm": 0.629000186920166,
      "learning_rate": 9.013329994794516e-06,
      "loss": 0.0386,
      "step": 319480
    },
    {
      "epoch": 0.5228687574870878,
      "grad_norm": 1.0839613676071167,
      "learning_rate": 9.013264102580998e-06,
      "loss": 0.0461,
      "step": 319500
    },
    {
      "epoch": 0.5229014879257412,
      "grad_norm": 1.2788047790527344,
      "learning_rate": 9.013198210367481e-06,
      "loss": 0.0571,
      "step": 319520
    },
    {
      "epoch": 0.5229342183643945,
      "grad_norm": 1.7585512399673462,
      "learning_rate": 9.013132318153965e-06,
      "loss": 0.0421,
      "step": 319540
    },
    {
      "epoch": 0.5229669488030478,
      "grad_norm": 0.333967387676239,
      "learning_rate": 9.013066425940447e-06,
      "loss": 0.0298,
      "step": 319560
    },
    {
      "epoch": 0.5229996792417012,
      "grad_norm": 1.55924391746521,
      "learning_rate": 9.01300053372693e-06,
      "loss": 0.0366,
      "step": 319580
    },
    {
      "epoch": 0.5230324096803546,
      "grad_norm": 1.5925323963165283,
      "learning_rate": 9.012934641513414e-06,
      "loss": 0.0362,
      "step": 319600
    },
    {
      "epoch": 0.5230651401190078,
      "grad_norm": 1.587932825088501,
      "learning_rate": 9.012868749299896e-06,
      "loss": 0.0417,
      "step": 319620
    },
    {
      "epoch": 0.5230978705576612,
      "grad_norm": 3.8536875247955322,
      "learning_rate": 9.01280285708638e-06,
      "loss": 0.0427,
      "step": 319640
    },
    {
      "epoch": 0.5231306009963146,
      "grad_norm": 0.5356642007827759,
      "learning_rate": 9.012736964872861e-06,
      "loss": 0.034,
      "step": 319660
    },
    {
      "epoch": 0.5231633314349678,
      "grad_norm": 0.627072274684906,
      "learning_rate": 9.012671072659345e-06,
      "loss": 0.0341,
      "step": 319680
    },
    {
      "epoch": 0.5231960618736212,
      "grad_norm": 1.5935707092285156,
      "learning_rate": 9.012605180445827e-06,
      "loss": 0.0406,
      "step": 319700
    },
    {
      "epoch": 0.5232287923122746,
      "grad_norm": 1.0874147415161133,
      "learning_rate": 9.01253928823231e-06,
      "loss": 0.0409,
      "step": 319720
    },
    {
      "epoch": 0.5232615227509279,
      "grad_norm": 2.7029056549072266,
      "learning_rate": 9.012473396018792e-06,
      "loss": 0.0461,
      "step": 319740
    },
    {
      "epoch": 0.5232942531895812,
      "grad_norm": 0.7333705425262451,
      "learning_rate": 9.012407503805276e-06,
      "loss": 0.0319,
      "step": 319760
    },
    {
      "epoch": 0.5233269836282346,
      "grad_norm": 1.2222294807434082,
      "learning_rate": 9.01234161159176e-06,
      "loss": 0.0375,
      "step": 319780
    },
    {
      "epoch": 0.523359714066888,
      "grad_norm": 1.4252246618270874,
      "learning_rate": 9.012275719378242e-06,
      "loss": 0.0322,
      "step": 319800
    },
    {
      "epoch": 0.5233924445055412,
      "grad_norm": 3.364194393157959,
      "learning_rate": 9.012209827164725e-06,
      "loss": 0.0435,
      "step": 319820
    },
    {
      "epoch": 0.5234251749441946,
      "grad_norm": 0.8000859022140503,
      "learning_rate": 9.012143934951207e-06,
      "loss": 0.0521,
      "step": 319840
    },
    {
      "epoch": 0.523457905382848,
      "grad_norm": 2.6989798545837402,
      "learning_rate": 9.01207804273769e-06,
      "loss": 0.0375,
      "step": 319860
    },
    {
      "epoch": 0.5234906358215012,
      "grad_norm": 0.6964321732521057,
      "learning_rate": 9.012012150524172e-06,
      "loss": 0.0405,
      "step": 319880
    },
    {
      "epoch": 0.5235233662601546,
      "grad_norm": 2.3656511306762695,
      "learning_rate": 9.011946258310656e-06,
      "loss": 0.0294,
      "step": 319900
    },
    {
      "epoch": 0.523556096698808,
      "grad_norm": 3.4389052391052246,
      "learning_rate": 9.011880366097138e-06,
      "loss": 0.0538,
      "step": 319920
    },
    {
      "epoch": 0.5235888271374612,
      "grad_norm": 1.9232455492019653,
      "learning_rate": 9.011814473883622e-06,
      "loss": 0.0322,
      "step": 319940
    },
    {
      "epoch": 0.5236215575761146,
      "grad_norm": 0.2807486355304718,
      "learning_rate": 9.011748581670105e-06,
      "loss": 0.0462,
      "step": 319960
    },
    {
      "epoch": 0.523654288014768,
      "grad_norm": 4.8702545166015625,
      "learning_rate": 9.011682689456589e-06,
      "loss": 0.036,
      "step": 319980
    },
    {
      "epoch": 0.5236870184534214,
      "grad_norm": 4.243731498718262,
      "learning_rate": 9.01161679724307e-06,
      "loss": 0.0459,
      "step": 320000
    },
    {
      "epoch": 0.5237197488920746,
      "grad_norm": 32.127872467041016,
      "learning_rate": 9.011550905029554e-06,
      "loss": 0.0427,
      "step": 320020
    },
    {
      "epoch": 0.523752479330728,
      "grad_norm": 0.5808183550834656,
      "learning_rate": 9.011485012816036e-06,
      "loss": 0.0314,
      "step": 320040
    },
    {
      "epoch": 0.5237852097693814,
      "grad_norm": 1.1891425848007202,
      "learning_rate": 9.01141912060252e-06,
      "loss": 0.0506,
      "step": 320060
    },
    {
      "epoch": 0.5238179402080346,
      "grad_norm": 1.058440923690796,
      "learning_rate": 9.011353228389002e-06,
      "loss": 0.0511,
      "step": 320080
    },
    {
      "epoch": 0.523850670646688,
      "grad_norm": 1.0328704118728638,
      "learning_rate": 9.011287336175485e-06,
      "loss": 0.0326,
      "step": 320100
    },
    {
      "epoch": 0.5238834010853414,
      "grad_norm": 0.6759063005447388,
      "learning_rate": 9.011221443961969e-06,
      "loss": 0.0474,
      "step": 320120
    },
    {
      "epoch": 0.5239161315239946,
      "grad_norm": 0.3720221221446991,
      "learning_rate": 9.01115555174845e-06,
      "loss": 0.0484,
      "step": 320140
    },
    {
      "epoch": 0.523948861962648,
      "grad_norm": 1.4223324060440063,
      "learning_rate": 9.011089659534934e-06,
      "loss": 0.0372,
      "step": 320160
    },
    {
      "epoch": 0.5239815924013014,
      "grad_norm": 0.21935026347637177,
      "learning_rate": 9.011023767321416e-06,
      "loss": 0.0281,
      "step": 320180
    },
    {
      "epoch": 0.5240143228399547,
      "grad_norm": 0.9308416843414307,
      "learning_rate": 9.0109578751079e-06,
      "loss": 0.0368,
      "step": 320200
    },
    {
      "epoch": 0.524047053278608,
      "grad_norm": 3.2662906646728516,
      "learning_rate": 9.010891982894382e-06,
      "loss": 0.0312,
      "step": 320220
    },
    {
      "epoch": 0.5240797837172614,
      "grad_norm": 1.2856749296188354,
      "learning_rate": 9.010826090680865e-06,
      "loss": 0.0529,
      "step": 320240
    },
    {
      "epoch": 0.5241125141559148,
      "grad_norm": 0.30793488025665283,
      "learning_rate": 9.010760198467347e-06,
      "loss": 0.0552,
      "step": 320260
    },
    {
      "epoch": 0.524145244594568,
      "grad_norm": 2.495248317718506,
      "learning_rate": 9.01069430625383e-06,
      "loss": 0.0408,
      "step": 320280
    },
    {
      "epoch": 0.5241779750332214,
      "grad_norm": 1.6541677713394165,
      "learning_rate": 9.010628414040313e-06,
      "loss": 0.0336,
      "step": 320300
    },
    {
      "epoch": 0.5242107054718748,
      "grad_norm": 2.075795888900757,
      "learning_rate": 9.010562521826796e-06,
      "loss": 0.0276,
      "step": 320320
    },
    {
      "epoch": 0.524243435910528,
      "grad_norm": 2.4563493728637695,
      "learning_rate": 9.01049662961328e-06,
      "loss": 0.0355,
      "step": 320340
    },
    {
      "epoch": 0.5242761663491814,
      "grad_norm": 2.3174684047698975,
      "learning_rate": 9.010430737399762e-06,
      "loss": 0.0377,
      "step": 320360
    },
    {
      "epoch": 0.5243088967878348,
      "grad_norm": 0.5574278831481934,
      "learning_rate": 9.010364845186245e-06,
      "loss": 0.0398,
      "step": 320380
    },
    {
      "epoch": 0.5243416272264881,
      "grad_norm": 0.3324914276599884,
      "learning_rate": 9.010298952972729e-06,
      "loss": 0.0389,
      "step": 320400
    },
    {
      "epoch": 0.5243743576651414,
      "grad_norm": 2.164320707321167,
      "learning_rate": 9.01023306075921e-06,
      "loss": 0.0496,
      "step": 320420
    },
    {
      "epoch": 0.5244070881037948,
      "grad_norm": 2.033217191696167,
      "learning_rate": 9.010167168545694e-06,
      "loss": 0.0469,
      "step": 320440
    },
    {
      "epoch": 0.5244398185424481,
      "grad_norm": 2.899845600128174,
      "learning_rate": 9.010101276332176e-06,
      "loss": 0.0305,
      "step": 320460
    },
    {
      "epoch": 0.5244725489811014,
      "grad_norm": 0.7273141145706177,
      "learning_rate": 9.01003538411866e-06,
      "loss": 0.0352,
      "step": 320480
    },
    {
      "epoch": 0.5245052794197548,
      "grad_norm": 0.48000699281692505,
      "learning_rate": 9.009969491905143e-06,
      "loss": 0.0448,
      "step": 320500
    },
    {
      "epoch": 0.5245380098584081,
      "grad_norm": 5.597713947296143,
      "learning_rate": 9.009903599691625e-06,
      "loss": 0.0348,
      "step": 320520
    },
    {
      "epoch": 0.5245707402970614,
      "grad_norm": 1.0392102003097534,
      "learning_rate": 9.009837707478109e-06,
      "loss": 0.0372,
      "step": 320540
    },
    {
      "epoch": 0.5246034707357148,
      "grad_norm": 1.1159536838531494,
      "learning_rate": 9.00977181526459e-06,
      "loss": 0.0416,
      "step": 320560
    },
    {
      "epoch": 0.5246362011743682,
      "grad_norm": 1.389185905456543,
      "learning_rate": 9.009705923051074e-06,
      "loss": 0.0404,
      "step": 320580
    },
    {
      "epoch": 0.5246689316130215,
      "grad_norm": 6.6160359382629395,
      "learning_rate": 9.009640030837556e-06,
      "loss": 0.0384,
      "step": 320600
    },
    {
      "epoch": 0.5247016620516748,
      "grad_norm": 1.078518271446228,
      "learning_rate": 9.00957413862404e-06,
      "loss": 0.0339,
      "step": 320620
    },
    {
      "epoch": 0.5247343924903282,
      "grad_norm": 1.8164763450622559,
      "learning_rate": 9.009508246410522e-06,
      "loss": 0.0361,
      "step": 320640
    },
    {
      "epoch": 0.5247671229289815,
      "grad_norm": 2.4744608402252197,
      "learning_rate": 9.009442354197005e-06,
      "loss": 0.0455,
      "step": 320660
    },
    {
      "epoch": 0.5247998533676348,
      "grad_norm": 2.2066195011138916,
      "learning_rate": 9.009376461983487e-06,
      "loss": 0.0357,
      "step": 320680
    },
    {
      "epoch": 0.5248325838062882,
      "grad_norm": 0.7878374457359314,
      "learning_rate": 9.00931056976997e-06,
      "loss": 0.0359,
      "step": 320700
    },
    {
      "epoch": 0.5248653142449415,
      "grad_norm": 0.5740084648132324,
      "learning_rate": 9.009244677556454e-06,
      "loss": 0.0331,
      "step": 320720
    },
    {
      "epoch": 0.5248980446835948,
      "grad_norm": 1.0096607208251953,
      "learning_rate": 9.009178785342936e-06,
      "loss": 0.0401,
      "step": 320740
    },
    {
      "epoch": 0.5249307751222482,
      "grad_norm": 0.47275257110595703,
      "learning_rate": 9.00911289312942e-06,
      "loss": 0.0381,
      "step": 320760
    },
    {
      "epoch": 0.5249635055609015,
      "grad_norm": 1.088036298751831,
      "learning_rate": 9.009047000915903e-06,
      "loss": 0.0327,
      "step": 320780
    },
    {
      "epoch": 0.5249962359995549,
      "grad_norm": 2.336615562438965,
      "learning_rate": 9.008981108702385e-06,
      "loss": 0.049,
      "step": 320800
    },
    {
      "epoch": 0.5250289664382082,
      "grad_norm": 1.5030932426452637,
      "learning_rate": 9.008915216488869e-06,
      "loss": 0.0426,
      "step": 320820
    },
    {
      "epoch": 0.5250616968768616,
      "grad_norm": 6.411459922790527,
      "learning_rate": 9.008849324275352e-06,
      "loss": 0.046,
      "step": 320840
    },
    {
      "epoch": 0.5250944273155149,
      "grad_norm": 0.6090806126594543,
      "learning_rate": 9.008783432061834e-06,
      "loss": 0.0388,
      "step": 320860
    },
    {
      "epoch": 0.5251271577541682,
      "grad_norm": 2.28304386138916,
      "learning_rate": 9.008717539848318e-06,
      "loss": 0.0448,
      "step": 320880
    },
    {
      "epoch": 0.5251598881928216,
      "grad_norm": 7.114634037017822,
      "learning_rate": 9.0086516476348e-06,
      "loss": 0.044,
      "step": 320900
    },
    {
      "epoch": 0.5251926186314749,
      "grad_norm": 0.979476273059845,
      "learning_rate": 9.008585755421283e-06,
      "loss": 0.0408,
      "step": 320920
    },
    {
      "epoch": 0.5252253490701282,
      "grad_norm": 1.109557032585144,
      "learning_rate": 9.008519863207765e-06,
      "loss": 0.0362,
      "step": 320940
    },
    {
      "epoch": 0.5252580795087816,
      "grad_norm": 1.476980209350586,
      "learning_rate": 9.008453970994249e-06,
      "loss": 0.0482,
      "step": 320960
    },
    {
      "epoch": 0.5252908099474349,
      "grad_norm": 0.39155521988868713,
      "learning_rate": 9.00838807878073e-06,
      "loss": 0.044,
      "step": 320980
    },
    {
      "epoch": 0.5253235403860883,
      "grad_norm": 0.20231492817401886,
      "learning_rate": 9.008322186567214e-06,
      "loss": 0.0467,
      "step": 321000
    },
    {
      "epoch": 0.5253562708247416,
      "grad_norm": 3.1938681602478027,
      "learning_rate": 9.008256294353696e-06,
      "loss": 0.0396,
      "step": 321020
    },
    {
      "epoch": 0.5253890012633949,
      "grad_norm": 1.6853569746017456,
      "learning_rate": 9.00819040214018e-06,
      "loss": 0.034,
      "step": 321040
    },
    {
      "epoch": 0.5254217317020483,
      "grad_norm": 1.3603317737579346,
      "learning_rate": 9.008124509926662e-06,
      "loss": 0.0444,
      "step": 321060
    },
    {
      "epoch": 0.5254544621407016,
      "grad_norm": 0.4649627208709717,
      "learning_rate": 9.008058617713145e-06,
      "loss": 0.035,
      "step": 321080
    },
    {
      "epoch": 0.525487192579355,
      "grad_norm": 0.8736140727996826,
      "learning_rate": 9.007992725499627e-06,
      "loss": 0.0341,
      "step": 321100
    },
    {
      "epoch": 0.5255199230180083,
      "grad_norm": 1.2880395650863647,
      "learning_rate": 9.00792683328611e-06,
      "loss": 0.0463,
      "step": 321120
    },
    {
      "epoch": 0.5255526534566616,
      "grad_norm": 1.170261025428772,
      "learning_rate": 9.007860941072594e-06,
      "loss": 0.0424,
      "step": 321140
    },
    {
      "epoch": 0.525585383895315,
      "grad_norm": 0.24385353922843933,
      "learning_rate": 9.007795048859076e-06,
      "loss": 0.0413,
      "step": 321160
    },
    {
      "epoch": 0.5256181143339683,
      "grad_norm": 0.9368621110916138,
      "learning_rate": 9.00772915664556e-06,
      "loss": 0.051,
      "step": 321180
    },
    {
      "epoch": 0.5256508447726217,
      "grad_norm": 1.6880214214324951,
      "learning_rate": 9.007663264432043e-06,
      "loss": 0.0362,
      "step": 321200
    },
    {
      "epoch": 0.525683575211275,
      "grad_norm": 0.8350426554679871,
      "learning_rate": 9.007597372218525e-06,
      "loss": 0.0445,
      "step": 321220
    },
    {
      "epoch": 0.5257163056499283,
      "grad_norm": 0.8166918754577637,
      "learning_rate": 9.007531480005009e-06,
      "loss": 0.0263,
      "step": 321240
    },
    {
      "epoch": 0.5257490360885817,
      "grad_norm": 1.0709993839263916,
      "learning_rate": 9.007465587791493e-06,
      "loss": 0.0372,
      "step": 321260
    },
    {
      "epoch": 0.525781766527235,
      "grad_norm": 3.195805549621582,
      "learning_rate": 9.007399695577974e-06,
      "loss": 0.0471,
      "step": 321280
    },
    {
      "epoch": 0.5258144969658883,
      "grad_norm": 0.8936901092529297,
      "learning_rate": 9.007333803364458e-06,
      "loss": 0.0308,
      "step": 321300
    },
    {
      "epoch": 0.5258472274045417,
      "grad_norm": 2.078638792037964,
      "learning_rate": 9.00726791115094e-06,
      "loss": 0.0425,
      "step": 321320
    },
    {
      "epoch": 0.525879957843195,
      "grad_norm": 0.37912654876708984,
      "learning_rate": 9.007202018937423e-06,
      "loss": 0.0343,
      "step": 321340
    },
    {
      "epoch": 0.5259126882818483,
      "grad_norm": 1.2708014249801636,
      "learning_rate": 9.007136126723905e-06,
      "loss": 0.0324,
      "step": 321360
    },
    {
      "epoch": 0.5259454187205017,
      "grad_norm": 1.453118085861206,
      "learning_rate": 9.007070234510389e-06,
      "loss": 0.0319,
      "step": 321380
    },
    {
      "epoch": 0.5259781491591551,
      "grad_norm": 2.3935530185699463,
      "learning_rate": 9.007004342296871e-06,
      "loss": 0.0333,
      "step": 321400
    },
    {
      "epoch": 0.5260108795978083,
      "grad_norm": 5.750962734222412,
      "learning_rate": 9.006938450083354e-06,
      "loss": 0.0539,
      "step": 321420
    },
    {
      "epoch": 0.5260436100364617,
      "grad_norm": 0.8190248012542725,
      "learning_rate": 9.006872557869836e-06,
      "loss": 0.0385,
      "step": 321440
    },
    {
      "epoch": 0.5260763404751151,
      "grad_norm": 0.9304693341255188,
      "learning_rate": 9.00680666565632e-06,
      "loss": 0.0373,
      "step": 321460
    },
    {
      "epoch": 0.5261090709137684,
      "grad_norm": 0.9885850548744202,
      "learning_rate": 9.006740773442802e-06,
      "loss": 0.0375,
      "step": 321480
    },
    {
      "epoch": 0.5261418013524217,
      "grad_norm": 3.9156479835510254,
      "learning_rate": 9.006674881229285e-06,
      "loss": 0.0409,
      "step": 321500
    },
    {
      "epoch": 0.5261745317910751,
      "grad_norm": 3.1758828163146973,
      "learning_rate": 9.006608989015769e-06,
      "loss": 0.0384,
      "step": 321520
    },
    {
      "epoch": 0.5262072622297284,
      "grad_norm": 1.9032845497131348,
      "learning_rate": 9.006543096802251e-06,
      "loss": 0.0353,
      "step": 321540
    },
    {
      "epoch": 0.5262399926683817,
      "grad_norm": 4.755101203918457,
      "learning_rate": 9.006477204588734e-06,
      "loss": 0.0288,
      "step": 321560
    },
    {
      "epoch": 0.5262727231070351,
      "grad_norm": 0.5661947727203369,
      "learning_rate": 9.006411312375218e-06,
      "loss": 0.0392,
      "step": 321580
    },
    {
      "epoch": 0.5263054535456885,
      "grad_norm": 1.9170150756835938,
      "learning_rate": 9.0063454201617e-06,
      "loss": 0.0449,
      "step": 321600
    },
    {
      "epoch": 0.5263381839843417,
      "grad_norm": 0.6307896375656128,
      "learning_rate": 9.006279527948184e-06,
      "loss": 0.0455,
      "step": 321620
    },
    {
      "epoch": 0.5263709144229951,
      "grad_norm": 0.8113442659378052,
      "learning_rate": 9.006213635734667e-06,
      "loss": 0.0362,
      "step": 321640
    },
    {
      "epoch": 0.5264036448616485,
      "grad_norm": 1.0429439544677734,
      "learning_rate": 9.006147743521149e-06,
      "loss": 0.0437,
      "step": 321660
    },
    {
      "epoch": 0.5264363753003017,
      "grad_norm": 0.411929190158844,
      "learning_rate": 9.006081851307633e-06,
      "loss": 0.0387,
      "step": 321680
    },
    {
      "epoch": 0.5264691057389551,
      "grad_norm": 0.3422516882419586,
      "learning_rate": 9.006015959094114e-06,
      "loss": 0.0428,
      "step": 321700
    },
    {
      "epoch": 0.5265018361776085,
      "grad_norm": 0.4590636193752289,
      "learning_rate": 9.005950066880598e-06,
      "loss": 0.0407,
      "step": 321720
    },
    {
      "epoch": 0.5265345666162617,
      "grad_norm": 0.2826850116252899,
      "learning_rate": 9.00588417466708e-06,
      "loss": 0.0434,
      "step": 321740
    },
    {
      "epoch": 0.5265672970549151,
      "grad_norm": 1.9817583560943604,
      "learning_rate": 9.005818282453564e-06,
      "loss": 0.0339,
      "step": 321760
    },
    {
      "epoch": 0.5266000274935685,
      "grad_norm": 1.5724302530288696,
      "learning_rate": 9.005752390240045e-06,
      "loss": 0.0387,
      "step": 321780
    },
    {
      "epoch": 0.5266327579322219,
      "grad_norm": 2.5544824600219727,
      "learning_rate": 9.005686498026529e-06,
      "loss": 0.045,
      "step": 321800
    },
    {
      "epoch": 0.5266654883708751,
      "grad_norm": 1.9778621196746826,
      "learning_rate": 9.005620605813011e-06,
      "loss": 0.0361,
      "step": 321820
    },
    {
      "epoch": 0.5266982188095285,
      "grad_norm": 1.0710816383361816,
      "learning_rate": 9.005554713599495e-06,
      "loss": 0.0344,
      "step": 321840
    },
    {
      "epoch": 0.5267309492481819,
      "grad_norm": 0.6187646389007568,
      "learning_rate": 9.005488821385976e-06,
      "loss": 0.0398,
      "step": 321860
    },
    {
      "epoch": 0.5267636796868351,
      "grad_norm": 1.6056478023529053,
      "learning_rate": 9.00542292917246e-06,
      "loss": 0.0426,
      "step": 321880
    },
    {
      "epoch": 0.5267964101254885,
      "grad_norm": 3.641784906387329,
      "learning_rate": 9.005357036958944e-06,
      "loss": 0.0517,
      "step": 321900
    },
    {
      "epoch": 0.5268291405641419,
      "grad_norm": 0.6454817056655884,
      "learning_rate": 9.005291144745425e-06,
      "loss": 0.0292,
      "step": 321920
    },
    {
      "epoch": 0.5268618710027951,
      "grad_norm": 1.2808841466903687,
      "learning_rate": 9.005225252531909e-06,
      "loss": 0.0299,
      "step": 321940
    },
    {
      "epoch": 0.5268946014414485,
      "grad_norm": 2.0236449241638184,
      "learning_rate": 9.005159360318391e-06,
      "loss": 0.0419,
      "step": 321960
    },
    {
      "epoch": 0.5269273318801019,
      "grad_norm": 1.5347696542739868,
      "learning_rate": 9.005093468104875e-06,
      "loss": 0.049,
      "step": 321980
    },
    {
      "epoch": 0.5269600623187553,
      "grad_norm": 0.8825255632400513,
      "learning_rate": 9.005027575891358e-06,
      "loss": 0.0464,
      "step": 322000
    },
    {
      "epoch": 0.5269927927574085,
      "grad_norm": 0.7538043260574341,
      "learning_rate": 9.00496168367784e-06,
      "loss": 0.038,
      "step": 322020
    },
    {
      "epoch": 0.5270255231960619,
      "grad_norm": 0.7157748937606812,
      "learning_rate": 9.004895791464324e-06,
      "loss": 0.0429,
      "step": 322040
    },
    {
      "epoch": 0.5270582536347153,
      "grad_norm": 1.0392173528671265,
      "learning_rate": 9.004829899250807e-06,
      "loss": 0.042,
      "step": 322060
    },
    {
      "epoch": 0.5270909840733685,
      "grad_norm": 1.8105895519256592,
      "learning_rate": 9.004764007037289e-06,
      "loss": 0.0371,
      "step": 322080
    },
    {
      "epoch": 0.5271237145120219,
      "grad_norm": 0.8991886377334595,
      "learning_rate": 9.004698114823773e-06,
      "loss": 0.0368,
      "step": 322100
    },
    {
      "epoch": 0.5271564449506753,
      "grad_norm": 3.143153190612793,
      "learning_rate": 9.004632222610255e-06,
      "loss": 0.0384,
      "step": 322120
    },
    {
      "epoch": 0.5271891753893285,
      "grad_norm": 2.065329074859619,
      "learning_rate": 9.004566330396738e-06,
      "loss": 0.0341,
      "step": 322140
    },
    {
      "epoch": 0.5272219058279819,
      "grad_norm": 1.6263214349746704,
      "learning_rate": 9.00450043818322e-06,
      "loss": 0.0421,
      "step": 322160
    },
    {
      "epoch": 0.5272546362666353,
      "grad_norm": 1.949426531791687,
      "learning_rate": 9.004434545969704e-06,
      "loss": 0.0489,
      "step": 322180
    },
    {
      "epoch": 0.5272873667052885,
      "grad_norm": 0.7919332385063171,
      "learning_rate": 9.004368653756186e-06,
      "loss": 0.0423,
      "step": 322200
    },
    {
      "epoch": 0.5273200971439419,
      "grad_norm": 2.8325161933898926,
      "learning_rate": 9.004302761542669e-06,
      "loss": 0.0491,
      "step": 322220
    },
    {
      "epoch": 0.5273528275825953,
      "grad_norm": 0.19986742734909058,
      "learning_rate": 9.004236869329153e-06,
      "loss": 0.0317,
      "step": 322240
    },
    {
      "epoch": 0.5273855580212486,
      "grad_norm": 1.2373058795928955,
      "learning_rate": 9.004170977115635e-06,
      "loss": 0.0396,
      "step": 322260
    },
    {
      "epoch": 0.5274182884599019,
      "grad_norm": 4.695120334625244,
      "learning_rate": 9.004105084902118e-06,
      "loss": 0.0486,
      "step": 322280
    },
    {
      "epoch": 0.5274510188985553,
      "grad_norm": 2.293971061706543,
      "learning_rate": 9.0040391926886e-06,
      "loss": 0.0433,
      "step": 322300
    },
    {
      "epoch": 0.5274837493372087,
      "grad_norm": 1.6647714376449585,
      "learning_rate": 9.003973300475084e-06,
      "loss": 0.0443,
      "step": 322320
    },
    {
      "epoch": 0.5275164797758619,
      "grad_norm": 1.7254737615585327,
      "learning_rate": 9.003907408261566e-06,
      "loss": 0.0475,
      "step": 322340
    },
    {
      "epoch": 0.5275492102145153,
      "grad_norm": 0.9261146187782288,
      "learning_rate": 9.003841516048049e-06,
      "loss": 0.0336,
      "step": 322360
    },
    {
      "epoch": 0.5275819406531687,
      "grad_norm": 0.905698299407959,
      "learning_rate": 9.003775623834533e-06,
      "loss": 0.0393,
      "step": 322380
    },
    {
      "epoch": 0.5276146710918219,
      "grad_norm": 0.879324734210968,
      "learning_rate": 9.003709731621015e-06,
      "loss": 0.0453,
      "step": 322400
    },
    {
      "epoch": 0.5276474015304753,
      "grad_norm": 1.4336943626403809,
      "learning_rate": 9.003643839407498e-06,
      "loss": 0.0388,
      "step": 322420
    },
    {
      "epoch": 0.5276801319691287,
      "grad_norm": 1.0605032444000244,
      "learning_rate": 9.003577947193982e-06,
      "loss": 0.0376,
      "step": 322440
    },
    {
      "epoch": 0.527712862407782,
      "grad_norm": 1.6812251806259155,
      "learning_rate": 9.003512054980464e-06,
      "loss": 0.0421,
      "step": 322460
    },
    {
      "epoch": 0.5277455928464353,
      "grad_norm": 0.6599750518798828,
      "learning_rate": 9.003446162766947e-06,
      "loss": 0.0389,
      "step": 322480
    },
    {
      "epoch": 0.5277783232850887,
      "grad_norm": 0.7727558612823486,
      "learning_rate": 9.00338027055343e-06,
      "loss": 0.0333,
      "step": 322500
    },
    {
      "epoch": 0.527811053723742,
      "grad_norm": 0.924167811870575,
      "learning_rate": 9.003314378339913e-06,
      "loss": 0.0302,
      "step": 322520
    },
    {
      "epoch": 0.5278437841623953,
      "grad_norm": 2.413177490234375,
      "learning_rate": 9.003248486126395e-06,
      "loss": 0.037,
      "step": 322540
    },
    {
      "epoch": 0.5278765146010487,
      "grad_norm": 0.48840585350990295,
      "learning_rate": 9.003182593912878e-06,
      "loss": 0.0289,
      "step": 322560
    },
    {
      "epoch": 0.527909245039702,
      "grad_norm": 1.9074457883834839,
      "learning_rate": 9.003116701699362e-06,
      "loss": 0.028,
      "step": 322580
    },
    {
      "epoch": 0.5279419754783553,
      "grad_norm": 2.191323757171631,
      "learning_rate": 9.003050809485844e-06,
      "loss": 0.0398,
      "step": 322600
    },
    {
      "epoch": 0.5279747059170087,
      "grad_norm": 2.4822518825531006,
      "learning_rate": 9.002984917272327e-06,
      "loss": 0.0351,
      "step": 322620
    },
    {
      "epoch": 0.528007436355662,
      "grad_norm": 1.3792164325714111,
      "learning_rate": 9.00291902505881e-06,
      "loss": 0.0287,
      "step": 322640
    },
    {
      "epoch": 0.5280401667943154,
      "grad_norm": 1.436574101448059,
      "learning_rate": 9.002853132845293e-06,
      "loss": 0.0369,
      "step": 322660
    },
    {
      "epoch": 0.5280728972329687,
      "grad_norm": 3.3924598693847656,
      "learning_rate": 9.002787240631775e-06,
      "loss": 0.0355,
      "step": 322680
    },
    {
      "epoch": 0.5281056276716221,
      "grad_norm": 4.713785648345947,
      "learning_rate": 9.002721348418258e-06,
      "loss": 0.0586,
      "step": 322700
    },
    {
      "epoch": 0.5281383581102754,
      "grad_norm": 1.4857609272003174,
      "learning_rate": 9.00265545620474e-06,
      "loss": 0.0479,
      "step": 322720
    },
    {
      "epoch": 0.5281710885489287,
      "grad_norm": 1.1297541856765747,
      "learning_rate": 9.002589563991224e-06,
      "loss": 0.025,
      "step": 322740
    },
    {
      "epoch": 0.5282038189875821,
      "grad_norm": 2.656796932220459,
      "learning_rate": 9.002523671777707e-06,
      "loss": 0.0444,
      "step": 322760
    },
    {
      "epoch": 0.5282365494262354,
      "grad_norm": 0.66395503282547,
      "learning_rate": 9.00245777956419e-06,
      "loss": 0.0441,
      "step": 322780
    },
    {
      "epoch": 0.5282692798648887,
      "grad_norm": 0.36976924538612366,
      "learning_rate": 9.002391887350673e-06,
      "loss": 0.0321,
      "step": 322800
    },
    {
      "epoch": 0.5283020103035421,
      "grad_norm": 0.5184118151664734,
      "learning_rate": 9.002325995137156e-06,
      "loss": 0.0407,
      "step": 322820
    },
    {
      "epoch": 0.5283347407421954,
      "grad_norm": 0.8787347078323364,
      "learning_rate": 9.002260102923638e-06,
      "loss": 0.0411,
      "step": 322840
    },
    {
      "epoch": 0.5283674711808488,
      "grad_norm": 2.7847518920898438,
      "learning_rate": 9.002194210710122e-06,
      "loss": 0.0394,
      "step": 322860
    },
    {
      "epoch": 0.5284002016195021,
      "grad_norm": 3.7943947315216064,
      "learning_rate": 9.002128318496604e-06,
      "loss": 0.0302,
      "step": 322880
    },
    {
      "epoch": 0.5284329320581554,
      "grad_norm": 2.406744956970215,
      "learning_rate": 9.002062426283087e-06,
      "loss": 0.0285,
      "step": 322900
    },
    {
      "epoch": 0.5284656624968088,
      "grad_norm": 0.9438866972923279,
      "learning_rate": 9.00199653406957e-06,
      "loss": 0.045,
      "step": 322920
    },
    {
      "epoch": 0.5284983929354621,
      "grad_norm": 1.9930444955825806,
      "learning_rate": 9.001930641856053e-06,
      "loss": 0.041,
      "step": 322940
    },
    {
      "epoch": 0.5285311233741155,
      "grad_norm": 0.35761573910713196,
      "learning_rate": 9.001864749642536e-06,
      "loss": 0.0365,
      "step": 322960
    },
    {
      "epoch": 0.5285638538127688,
      "grad_norm": 1.2405236959457397,
      "learning_rate": 9.001798857429018e-06,
      "loss": 0.0367,
      "step": 322980
    },
    {
      "epoch": 0.5285965842514221,
      "grad_norm": 2.4076952934265137,
      "learning_rate": 9.001732965215502e-06,
      "loss": 0.0404,
      "step": 323000
    },
    {
      "epoch": 0.5286293146900755,
      "grad_norm": 0.3407476842403412,
      "learning_rate": 9.001667073001984e-06,
      "loss": 0.0356,
      "step": 323020
    },
    {
      "epoch": 0.5286620451287288,
      "grad_norm": 1.0508760213851929,
      "learning_rate": 9.001601180788467e-06,
      "loss": 0.0484,
      "step": 323040
    },
    {
      "epoch": 0.5286947755673822,
      "grad_norm": 1.1819382905960083,
      "learning_rate": 9.00153528857495e-06,
      "loss": 0.0423,
      "step": 323060
    },
    {
      "epoch": 0.5287275060060355,
      "grad_norm": 1.6625694036483765,
      "learning_rate": 9.001469396361433e-06,
      "loss": 0.0373,
      "step": 323080
    },
    {
      "epoch": 0.5287602364446888,
      "grad_norm": 1.4619678258895874,
      "learning_rate": 9.001403504147915e-06,
      "loss": 0.0354,
      "step": 323100
    },
    {
      "epoch": 0.5287929668833422,
      "grad_norm": 3.144800901412964,
      "learning_rate": 9.001337611934398e-06,
      "loss": 0.0373,
      "step": 323120
    },
    {
      "epoch": 0.5288256973219955,
      "grad_norm": 0.8678011894226074,
      "learning_rate": 9.00127171972088e-06,
      "loss": 0.0223,
      "step": 323140
    },
    {
      "epoch": 0.5288584277606488,
      "grad_norm": 0.18681952357292175,
      "learning_rate": 9.001205827507364e-06,
      "loss": 0.0303,
      "step": 323160
    },
    {
      "epoch": 0.5288911581993022,
      "grad_norm": 0.45367881655693054,
      "learning_rate": 9.001139935293847e-06,
      "loss": 0.062,
      "step": 323180
    },
    {
      "epoch": 0.5289238886379555,
      "grad_norm": 1.3331331014633179,
      "learning_rate": 9.00107404308033e-06,
      "loss": 0.0433,
      "step": 323200
    },
    {
      "epoch": 0.5289566190766088,
      "grad_norm": 1.7914156913757324,
      "learning_rate": 9.001008150866813e-06,
      "loss": 0.0478,
      "step": 323220
    },
    {
      "epoch": 0.5289893495152622,
      "grad_norm": 0.9324905276298523,
      "learning_rate": 9.000942258653296e-06,
      "loss": 0.0445,
      "step": 323240
    },
    {
      "epoch": 0.5290220799539156,
      "grad_norm": 0.882290244102478,
      "learning_rate": 9.000876366439778e-06,
      "loss": 0.0374,
      "step": 323260
    },
    {
      "epoch": 0.5290548103925689,
      "grad_norm": 1.263942003250122,
      "learning_rate": 9.000810474226262e-06,
      "loss": 0.0457,
      "step": 323280
    },
    {
      "epoch": 0.5290875408312222,
      "grad_norm": 1.4343159198760986,
      "learning_rate": 9.000744582012746e-06,
      "loss": 0.0422,
      "step": 323300
    },
    {
      "epoch": 0.5291202712698756,
      "grad_norm": 2.7142069339752197,
      "learning_rate": 9.000678689799227e-06,
      "loss": 0.0447,
      "step": 323320
    },
    {
      "epoch": 0.5291530017085289,
      "grad_norm": 1.5309269428253174,
      "learning_rate": 9.000612797585711e-06,
      "loss": 0.0428,
      "step": 323340
    },
    {
      "epoch": 0.5291857321471822,
      "grad_norm": 1.4320733547210693,
      "learning_rate": 9.000546905372193e-06,
      "loss": 0.0457,
      "step": 323360
    },
    {
      "epoch": 0.5292184625858356,
      "grad_norm": 1.22394597530365,
      "learning_rate": 9.000481013158676e-06,
      "loss": 0.0323,
      "step": 323380
    },
    {
      "epoch": 0.5292511930244889,
      "grad_norm": 0.4229423999786377,
      "learning_rate": 9.000415120945158e-06,
      "loss": 0.0315,
      "step": 323400
    },
    {
      "epoch": 0.5292839234631422,
      "grad_norm": 1.2862721681594849,
      "learning_rate": 9.000349228731642e-06,
      "loss": 0.0331,
      "step": 323420
    },
    {
      "epoch": 0.5293166539017956,
      "grad_norm": 0.7886723875999451,
      "learning_rate": 9.000283336518124e-06,
      "loss": 0.0358,
      "step": 323440
    },
    {
      "epoch": 0.529349384340449,
      "grad_norm": 1.5365411043167114,
      "learning_rate": 9.000217444304607e-06,
      "loss": 0.0294,
      "step": 323460
    },
    {
      "epoch": 0.5293821147791022,
      "grad_norm": 1.2204490900039673,
      "learning_rate": 9.00015155209109e-06,
      "loss": 0.0341,
      "step": 323480
    },
    {
      "epoch": 0.5294148452177556,
      "grad_norm": 0.3617977797985077,
      "learning_rate": 9.000085659877573e-06,
      "loss": 0.031,
      "step": 323500
    },
    {
      "epoch": 0.529447575656409,
      "grad_norm": 0.5203657150268555,
      "learning_rate": 9.000019767664055e-06,
      "loss": 0.0378,
      "step": 323520
    },
    {
      "epoch": 0.5294803060950622,
      "grad_norm": 0.5711348652839661,
      "learning_rate": 8.999953875450538e-06,
      "loss": 0.0311,
      "step": 323540
    },
    {
      "epoch": 0.5295130365337156,
      "grad_norm": 1.8704595565795898,
      "learning_rate": 8.999887983237022e-06,
      "loss": 0.0397,
      "step": 323560
    },
    {
      "epoch": 0.529545766972369,
      "grad_norm": 1.10552179813385,
      "learning_rate": 8.999822091023504e-06,
      "loss": 0.0325,
      "step": 323580
    },
    {
      "epoch": 0.5295784974110223,
      "grad_norm": 0.5421335101127625,
      "learning_rate": 8.999756198809987e-06,
      "loss": 0.0419,
      "step": 323600
    },
    {
      "epoch": 0.5296112278496756,
      "grad_norm": 0.86719810962677,
      "learning_rate": 8.999690306596471e-06,
      "loss": 0.0273,
      "step": 323620
    },
    {
      "epoch": 0.529643958288329,
      "grad_norm": 1.8884940147399902,
      "learning_rate": 8.999624414382953e-06,
      "loss": 0.0489,
      "step": 323640
    },
    {
      "epoch": 0.5296766887269824,
      "grad_norm": 1.8173272609710693,
      "learning_rate": 8.999558522169437e-06,
      "loss": 0.0454,
      "step": 323660
    },
    {
      "epoch": 0.5297094191656356,
      "grad_norm": 3.6206772327423096,
      "learning_rate": 8.99949262995592e-06,
      "loss": 0.0405,
      "step": 323680
    },
    {
      "epoch": 0.529742149604289,
      "grad_norm": 2.0353164672851562,
      "learning_rate": 8.999426737742402e-06,
      "loss": 0.0368,
      "step": 323700
    },
    {
      "epoch": 0.5297748800429424,
      "grad_norm": 2.040999174118042,
      "learning_rate": 8.999360845528886e-06,
      "loss": 0.0396,
      "step": 323720
    },
    {
      "epoch": 0.5298076104815956,
      "grad_norm": 0.8379381895065308,
      "learning_rate": 8.999294953315367e-06,
      "loss": 0.0466,
      "step": 323740
    },
    {
      "epoch": 0.529840340920249,
      "grad_norm": 3.3904266357421875,
      "learning_rate": 8.999229061101851e-06,
      "loss": 0.0339,
      "step": 323760
    },
    {
      "epoch": 0.5298730713589024,
      "grad_norm": 1.6890660524368286,
      "learning_rate": 8.999163168888333e-06,
      "loss": 0.0289,
      "step": 323780
    },
    {
      "epoch": 0.5299058017975556,
      "grad_norm": 0.7139122486114502,
      "learning_rate": 8.999097276674817e-06,
      "loss": 0.0414,
      "step": 323800
    },
    {
      "epoch": 0.529938532236209,
      "grad_norm": 0.8187339305877686,
      "learning_rate": 8.999031384461298e-06,
      "loss": 0.0553,
      "step": 323820
    },
    {
      "epoch": 0.5299712626748624,
      "grad_norm": 1.691314458847046,
      "learning_rate": 8.998965492247782e-06,
      "loss": 0.0402,
      "step": 323840
    },
    {
      "epoch": 0.5300039931135158,
      "grad_norm": 2.389847993850708,
      "learning_rate": 8.998899600034264e-06,
      "loss": 0.0344,
      "step": 323860
    },
    {
      "epoch": 0.530036723552169,
      "grad_norm": 1.5664489269256592,
      "learning_rate": 8.998833707820748e-06,
      "loss": 0.0409,
      "step": 323880
    },
    {
      "epoch": 0.5300694539908224,
      "grad_norm": 1.092448353767395,
      "learning_rate": 8.99876781560723e-06,
      "loss": 0.0426,
      "step": 323900
    },
    {
      "epoch": 0.5301021844294758,
      "grad_norm": 1.7655199766159058,
      "learning_rate": 8.998701923393713e-06,
      "loss": 0.0416,
      "step": 323920
    },
    {
      "epoch": 0.530134914868129,
      "grad_norm": 2.206120014190674,
      "learning_rate": 8.998636031180195e-06,
      "loss": 0.0446,
      "step": 323940
    },
    {
      "epoch": 0.5301676453067824,
      "grad_norm": 0.24565385282039642,
      "learning_rate": 8.998570138966678e-06,
      "loss": 0.0443,
      "step": 323960
    },
    {
      "epoch": 0.5302003757454358,
      "grad_norm": 0.4151856601238251,
      "learning_rate": 8.998504246753162e-06,
      "loss": 0.0315,
      "step": 323980
    },
    {
      "epoch": 0.530233106184089,
      "grad_norm": 0.6586160659790039,
      "learning_rate": 8.998438354539644e-06,
      "loss": 0.0339,
      "step": 324000
    },
    {
      "epoch": 0.5302658366227424,
      "grad_norm": 2.0077528953552246,
      "learning_rate": 8.998372462326128e-06,
      "loss": 0.0319,
      "step": 324020
    },
    {
      "epoch": 0.5302985670613958,
      "grad_norm": 0.6652262210845947,
      "learning_rate": 8.998306570112611e-06,
      "loss": 0.0453,
      "step": 324040
    },
    {
      "epoch": 0.5303312975000491,
      "grad_norm": 1.5759251117706299,
      "learning_rate": 8.998240677899093e-06,
      "loss": 0.0404,
      "step": 324060
    },
    {
      "epoch": 0.5303640279387024,
      "grad_norm": 4.299671173095703,
      "learning_rate": 8.998174785685577e-06,
      "loss": 0.04,
      "step": 324080
    },
    {
      "epoch": 0.5303967583773558,
      "grad_norm": 2.2159836292266846,
      "learning_rate": 8.99810889347206e-06,
      "loss": 0.0374,
      "step": 324100
    },
    {
      "epoch": 0.5304294888160092,
      "grad_norm": 3.9766016006469727,
      "learning_rate": 8.998043001258542e-06,
      "loss": 0.035,
      "step": 324120
    },
    {
      "epoch": 0.5304622192546624,
      "grad_norm": 0.48447754979133606,
      "learning_rate": 8.997977109045026e-06,
      "loss": 0.0507,
      "step": 324140
    },
    {
      "epoch": 0.5304949496933158,
      "grad_norm": 2.6934897899627686,
      "learning_rate": 8.997911216831508e-06,
      "loss": 0.0492,
      "step": 324160
    },
    {
      "epoch": 0.5305276801319692,
      "grad_norm": 0.6807888746261597,
      "learning_rate": 8.997845324617991e-06,
      "loss": 0.0434,
      "step": 324180
    },
    {
      "epoch": 0.5305604105706224,
      "grad_norm": 1.0319393873214722,
      "learning_rate": 8.997779432404473e-06,
      "loss": 0.0212,
      "step": 324200
    },
    {
      "epoch": 0.5305931410092758,
      "grad_norm": 1.6570247411727905,
      "learning_rate": 8.997713540190957e-06,
      "loss": 0.0386,
      "step": 324220
    },
    {
      "epoch": 0.5306258714479292,
      "grad_norm": 0.8260483741760254,
      "learning_rate": 8.997647647977439e-06,
      "loss": 0.0479,
      "step": 324240
    },
    {
      "epoch": 0.5306586018865825,
      "grad_norm": 1.2675594091415405,
      "learning_rate": 8.997581755763922e-06,
      "loss": 0.0414,
      "step": 324260
    },
    {
      "epoch": 0.5306913323252358,
      "grad_norm": 1.1422852277755737,
      "learning_rate": 8.997515863550404e-06,
      "loss": 0.0439,
      "step": 324280
    },
    {
      "epoch": 0.5307240627638892,
      "grad_norm": 0.8962958455085754,
      "learning_rate": 8.997449971336888e-06,
      "loss": 0.0481,
      "step": 324300
    },
    {
      "epoch": 0.5307567932025425,
      "grad_norm": 1.2738324403762817,
      "learning_rate": 8.99738407912337e-06,
      "loss": 0.0377,
      "step": 324320
    },
    {
      "epoch": 0.5307895236411958,
      "grad_norm": 1.3174896240234375,
      "learning_rate": 8.997318186909853e-06,
      "loss": 0.0372,
      "step": 324340
    },
    {
      "epoch": 0.5308222540798492,
      "grad_norm": 1.5415345430374146,
      "learning_rate": 8.997252294696337e-06,
      "loss": 0.0419,
      "step": 324360
    },
    {
      "epoch": 0.5308549845185025,
      "grad_norm": 2.6264193058013916,
      "learning_rate": 8.997186402482819e-06,
      "loss": 0.0423,
      "step": 324380
    },
    {
      "epoch": 0.5308877149571558,
      "grad_norm": 0.47106897830963135,
      "learning_rate": 8.997120510269302e-06,
      "loss": 0.0409,
      "step": 324400
    },
    {
      "epoch": 0.5309204453958092,
      "grad_norm": 1.5446618795394897,
      "learning_rate": 8.997054618055786e-06,
      "loss": 0.0358,
      "step": 324420
    },
    {
      "epoch": 0.5309531758344626,
      "grad_norm": 1.1512699127197266,
      "learning_rate": 8.996988725842268e-06,
      "loss": 0.0238,
      "step": 324440
    },
    {
      "epoch": 0.5309859062731159,
      "grad_norm": 0.3871457874774933,
      "learning_rate": 8.996922833628751e-06,
      "loss": 0.0432,
      "step": 324460
    },
    {
      "epoch": 0.5310186367117692,
      "grad_norm": 7.898376941680908,
      "learning_rate": 8.996856941415235e-06,
      "loss": 0.0498,
      "step": 324480
    },
    {
      "epoch": 0.5310513671504226,
      "grad_norm": 0.8048083186149597,
      "learning_rate": 8.996791049201717e-06,
      "loss": 0.028,
      "step": 324500
    },
    {
      "epoch": 0.5310840975890759,
      "grad_norm": 4.572351932525635,
      "learning_rate": 8.9967251569882e-06,
      "loss": 0.041,
      "step": 324520
    },
    {
      "epoch": 0.5311168280277292,
      "grad_norm": 0.3410393297672272,
      "learning_rate": 8.996659264774682e-06,
      "loss": 0.0391,
      "step": 324540
    },
    {
      "epoch": 0.5311495584663826,
      "grad_norm": 0.8605272173881531,
      "learning_rate": 8.996593372561166e-06,
      "loss": 0.0387,
      "step": 324560
    },
    {
      "epoch": 0.5311822889050359,
      "grad_norm": 1.4184913635253906,
      "learning_rate": 8.996527480347648e-06,
      "loss": 0.0421,
      "step": 324580
    },
    {
      "epoch": 0.5312150193436892,
      "grad_norm": 1.366952896118164,
      "learning_rate": 8.996461588134131e-06,
      "loss": 0.027,
      "step": 324600
    },
    {
      "epoch": 0.5312477497823426,
      "grad_norm": 0.3289727568626404,
      "learning_rate": 8.996395695920613e-06,
      "loss": 0.0291,
      "step": 324620
    },
    {
      "epoch": 0.5312804802209959,
      "grad_norm": 1.7568440437316895,
      "learning_rate": 8.996329803707097e-06,
      "loss": 0.0391,
      "step": 324640
    },
    {
      "epoch": 0.5313132106596493,
      "grad_norm": 1.1977214813232422,
      "learning_rate": 8.996263911493579e-06,
      "loss": 0.0492,
      "step": 324660
    },
    {
      "epoch": 0.5313459410983026,
      "grad_norm": 0.22769182920455933,
      "learning_rate": 8.996198019280062e-06,
      "loss": 0.0391,
      "step": 324680
    },
    {
      "epoch": 0.531378671536956,
      "grad_norm": 2.2905237674713135,
      "learning_rate": 8.996132127066546e-06,
      "loss": 0.0554,
      "step": 324700
    },
    {
      "epoch": 0.5314114019756093,
      "grad_norm": 0.6169307231903076,
      "learning_rate": 8.996066234853028e-06,
      "loss": 0.0578,
      "step": 324720
    },
    {
      "epoch": 0.5314441324142626,
      "grad_norm": 1.6067790985107422,
      "learning_rate": 8.996000342639511e-06,
      "loss": 0.0376,
      "step": 324740
    },
    {
      "epoch": 0.531476862852916,
      "grad_norm": 0.837751030921936,
      "learning_rate": 8.995934450425993e-06,
      "loss": 0.0345,
      "step": 324760
    },
    {
      "epoch": 0.5315095932915693,
      "grad_norm": 0.8765996098518372,
      "learning_rate": 8.995868558212477e-06,
      "loss": 0.0388,
      "step": 324780
    },
    {
      "epoch": 0.5315423237302226,
      "grad_norm": 0.7072005271911621,
      "learning_rate": 8.995802665998959e-06,
      "loss": 0.0379,
      "step": 324800
    },
    {
      "epoch": 0.531575054168876,
      "grad_norm": 1.3437474966049194,
      "learning_rate": 8.995736773785442e-06,
      "loss": 0.0384,
      "step": 324820
    },
    {
      "epoch": 0.5316077846075293,
      "grad_norm": 0.14121094346046448,
      "learning_rate": 8.995670881571926e-06,
      "loss": 0.0346,
      "step": 324840
    },
    {
      "epoch": 0.5316405150461826,
      "grad_norm": 2.0545499324798584,
      "learning_rate": 8.995604989358408e-06,
      "loss": 0.0451,
      "step": 324860
    },
    {
      "epoch": 0.531673245484836,
      "grad_norm": 2.1291587352752686,
      "learning_rate": 8.995539097144891e-06,
      "loss": 0.034,
      "step": 324880
    },
    {
      "epoch": 0.5317059759234893,
      "grad_norm": 2.579554557800293,
      "learning_rate": 8.995473204931375e-06,
      "loss": 0.0392,
      "step": 324900
    },
    {
      "epoch": 0.5317387063621427,
      "grad_norm": 1.407498836517334,
      "learning_rate": 8.995407312717857e-06,
      "loss": 0.0438,
      "step": 324920
    },
    {
      "epoch": 0.531771436800796,
      "grad_norm": 1.6868126392364502,
      "learning_rate": 8.99534142050434e-06,
      "loss": 0.0294,
      "step": 324940
    },
    {
      "epoch": 0.5318041672394493,
      "grad_norm": 0.6048122048377991,
      "learning_rate": 8.995275528290822e-06,
      "loss": 0.0353,
      "step": 324960
    },
    {
      "epoch": 0.5318368976781027,
      "grad_norm": 2.6706323623657227,
      "learning_rate": 8.995209636077306e-06,
      "loss": 0.0341,
      "step": 324980
    },
    {
      "epoch": 0.531869628116756,
      "grad_norm": 1.6309666633605957,
      "learning_rate": 8.995143743863788e-06,
      "loss": 0.0341,
      "step": 325000
    },
    {
      "epoch": 0.5319023585554093,
      "grad_norm": 5.270913124084473,
      "learning_rate": 8.995077851650271e-06,
      "loss": 0.0383,
      "step": 325020
    },
    {
      "epoch": 0.5319350889940627,
      "grad_norm": 2.0975217819213867,
      "learning_rate": 8.995011959436755e-06,
      "loss": 0.0475,
      "step": 325040
    },
    {
      "epoch": 0.531967819432716,
      "grad_norm": 1.8061662912368774,
      "learning_rate": 8.994946067223237e-06,
      "loss": 0.0425,
      "step": 325060
    },
    {
      "epoch": 0.5320005498713694,
      "grad_norm": 3.6444029808044434,
      "learning_rate": 8.99488017500972e-06,
      "loss": 0.0363,
      "step": 325080
    },
    {
      "epoch": 0.5320332803100227,
      "grad_norm": 1.2466100454330444,
      "learning_rate": 8.994814282796202e-06,
      "loss": 0.0436,
      "step": 325100
    },
    {
      "epoch": 0.5320660107486761,
      "grad_norm": 0.9527602791786194,
      "learning_rate": 8.994748390582686e-06,
      "loss": 0.033,
      "step": 325120
    },
    {
      "epoch": 0.5320987411873294,
      "grad_norm": 1.3984549045562744,
      "learning_rate": 8.994682498369168e-06,
      "loss": 0.0312,
      "step": 325140
    },
    {
      "epoch": 0.5321314716259827,
      "grad_norm": 2.2279255390167236,
      "learning_rate": 8.994616606155651e-06,
      "loss": 0.0347,
      "step": 325160
    },
    {
      "epoch": 0.5321642020646361,
      "grad_norm": 0.6648785471916199,
      "learning_rate": 8.994550713942133e-06,
      "loss": 0.0415,
      "step": 325180
    },
    {
      "epoch": 0.5321969325032894,
      "grad_norm": 1.0545865297317505,
      "learning_rate": 8.994484821728617e-06,
      "loss": 0.0393,
      "step": 325200
    },
    {
      "epoch": 0.5322296629419427,
      "grad_norm": 2.603876829147339,
      "learning_rate": 8.9944189295151e-06,
      "loss": 0.0343,
      "step": 325220
    },
    {
      "epoch": 0.5322623933805961,
      "grad_norm": 0.5806171298027039,
      "learning_rate": 8.994353037301582e-06,
      "loss": 0.034,
      "step": 325240
    },
    {
      "epoch": 0.5322951238192494,
      "grad_norm": 2.8342461585998535,
      "learning_rate": 8.994287145088066e-06,
      "loss": 0.0343,
      "step": 325260
    },
    {
      "epoch": 0.5323278542579027,
      "grad_norm": 1.0626780986785889,
      "learning_rate": 8.99422125287455e-06,
      "loss": 0.0464,
      "step": 325280
    },
    {
      "epoch": 0.5323605846965561,
      "grad_norm": 1.2210636138916016,
      "learning_rate": 8.994155360661031e-06,
      "loss": 0.053,
      "step": 325300
    },
    {
      "epoch": 0.5323933151352095,
      "grad_norm": 1.6685523986816406,
      "learning_rate": 8.994089468447515e-06,
      "loss": 0.0376,
      "step": 325320
    },
    {
      "epoch": 0.5324260455738627,
      "grad_norm": 1.6796153783798218,
      "learning_rate": 8.994023576233997e-06,
      "loss": 0.0403,
      "step": 325340
    },
    {
      "epoch": 0.5324587760125161,
      "grad_norm": 0.7003259062767029,
      "learning_rate": 8.99395768402048e-06,
      "loss": 0.0379,
      "step": 325360
    },
    {
      "epoch": 0.5324915064511695,
      "grad_norm": 2.9764251708984375,
      "learning_rate": 8.993891791806962e-06,
      "loss": 0.0412,
      "step": 325380
    },
    {
      "epoch": 0.5325242368898228,
      "grad_norm": 0.671876847743988,
      "learning_rate": 8.993825899593446e-06,
      "loss": 0.0397,
      "step": 325400
    },
    {
      "epoch": 0.5325569673284761,
      "grad_norm": 0.6305327415466309,
      "learning_rate": 8.99376000737993e-06,
      "loss": 0.0381,
      "step": 325420
    },
    {
      "epoch": 0.5325896977671295,
      "grad_norm": 1.5118639469146729,
      "learning_rate": 8.993694115166411e-06,
      "loss": 0.0328,
      "step": 325440
    },
    {
      "epoch": 0.5326224282057828,
      "grad_norm": 1.1486462354660034,
      "learning_rate": 8.993628222952895e-06,
      "loss": 0.0448,
      "step": 325460
    },
    {
      "epoch": 0.5326551586444361,
      "grad_norm": 0.7780742049217224,
      "learning_rate": 8.993562330739377e-06,
      "loss": 0.0427,
      "step": 325480
    },
    {
      "epoch": 0.5326878890830895,
      "grad_norm": 6.541658878326416,
      "learning_rate": 8.99349643852586e-06,
      "loss": 0.0333,
      "step": 325500
    },
    {
      "epoch": 0.5327206195217429,
      "grad_norm": 1.4978538751602173,
      "learning_rate": 8.993430546312342e-06,
      "loss": 0.0406,
      "step": 325520
    },
    {
      "epoch": 0.5327533499603961,
      "grad_norm": 0.3293530344963074,
      "learning_rate": 8.993364654098826e-06,
      "loss": 0.0436,
      "step": 325540
    },
    {
      "epoch": 0.5327860803990495,
      "grad_norm": 0.9318905472755432,
      "learning_rate": 8.993298761885308e-06,
      "loss": 0.0411,
      "step": 325560
    },
    {
      "epoch": 0.5328188108377029,
      "grad_norm": 1.783927083015442,
      "learning_rate": 8.993232869671791e-06,
      "loss": 0.0447,
      "step": 325580
    },
    {
      "epoch": 0.5328515412763561,
      "grad_norm": 0.6423382759094238,
      "learning_rate": 8.993166977458275e-06,
      "loss": 0.0429,
      "step": 325600
    },
    {
      "epoch": 0.5328842717150095,
      "grad_norm": 1.4910001754760742,
      "learning_rate": 8.993101085244757e-06,
      "loss": 0.0475,
      "step": 325620
    },
    {
      "epoch": 0.5329170021536629,
      "grad_norm": 1.0691910982131958,
      "learning_rate": 8.99303519303124e-06,
      "loss": 0.0448,
      "step": 325640
    },
    {
      "epoch": 0.5329497325923161,
      "grad_norm": 1.3072443008422852,
      "learning_rate": 8.992969300817724e-06,
      "loss": 0.0421,
      "step": 325660
    },
    {
      "epoch": 0.5329824630309695,
      "grad_norm": 1.4795012474060059,
      "learning_rate": 8.992903408604206e-06,
      "loss": 0.0428,
      "step": 325680
    },
    {
      "epoch": 0.5330151934696229,
      "grad_norm": 0.9715490937232971,
      "learning_rate": 8.99283751639069e-06,
      "loss": 0.0417,
      "step": 325700
    },
    {
      "epoch": 0.5330479239082763,
      "grad_norm": 0.7402406334877014,
      "learning_rate": 8.992771624177171e-06,
      "loss": 0.0302,
      "step": 325720
    },
    {
      "epoch": 0.5330806543469295,
      "grad_norm": 0.3260197937488556,
      "learning_rate": 8.992705731963655e-06,
      "loss": 0.0404,
      "step": 325740
    },
    {
      "epoch": 0.5331133847855829,
      "grad_norm": 2.251227378845215,
      "learning_rate": 8.992639839750139e-06,
      "loss": 0.0337,
      "step": 325760
    },
    {
      "epoch": 0.5331461152242363,
      "grad_norm": 1.5278881788253784,
      "learning_rate": 8.99257394753662e-06,
      "loss": 0.0379,
      "step": 325780
    },
    {
      "epoch": 0.5331788456628895,
      "grad_norm": 0.9413650035858154,
      "learning_rate": 8.992508055323104e-06,
      "loss": 0.0417,
      "step": 325800
    },
    {
      "epoch": 0.5332115761015429,
      "grad_norm": 4.436686038970947,
      "learning_rate": 8.992442163109586e-06,
      "loss": 0.0399,
      "step": 325820
    },
    {
      "epoch": 0.5332443065401963,
      "grad_norm": 1.4098875522613525,
      "learning_rate": 8.99237627089607e-06,
      "loss": 0.029,
      "step": 325840
    },
    {
      "epoch": 0.5332770369788495,
      "grad_norm": 2.2995588779449463,
      "learning_rate": 8.992310378682551e-06,
      "loss": 0.0426,
      "step": 325860
    },
    {
      "epoch": 0.5333097674175029,
      "grad_norm": 1.5661859512329102,
      "learning_rate": 8.992244486469035e-06,
      "loss": 0.0453,
      "step": 325880
    },
    {
      "epoch": 0.5333424978561563,
      "grad_norm": 3.436816930770874,
      "learning_rate": 8.992178594255517e-06,
      "loss": 0.0348,
      "step": 325900
    },
    {
      "epoch": 0.5333752282948097,
      "grad_norm": 0.6265367269515991,
      "learning_rate": 8.992112702042e-06,
      "loss": 0.0477,
      "step": 325920
    },
    {
      "epoch": 0.5334079587334629,
      "grad_norm": 0.3677104413509369,
      "learning_rate": 8.992046809828482e-06,
      "loss": 0.0362,
      "step": 325940
    },
    {
      "epoch": 0.5334406891721163,
      "grad_norm": 2.169969320297241,
      "learning_rate": 8.991980917614966e-06,
      "loss": 0.0418,
      "step": 325960
    },
    {
      "epoch": 0.5334734196107697,
      "grad_norm": 0.7990769147872925,
      "learning_rate": 8.991915025401448e-06,
      "loss": 0.0397,
      "step": 325980
    },
    {
      "epoch": 0.5335061500494229,
      "grad_norm": 0.9683198928833008,
      "learning_rate": 8.991849133187931e-06,
      "loss": 0.0276,
      "step": 326000
    },
    {
      "epoch": 0.5335388804880763,
      "grad_norm": 3.2797229290008545,
      "learning_rate": 8.991783240974415e-06,
      "loss": 0.043,
      "step": 326020
    },
    {
      "epoch": 0.5335716109267297,
      "grad_norm": 0.673926055431366,
      "learning_rate": 8.991717348760897e-06,
      "loss": 0.0324,
      "step": 326040
    },
    {
      "epoch": 0.5336043413653829,
      "grad_norm": 2.5572588443756104,
      "learning_rate": 8.99165145654738e-06,
      "loss": 0.0366,
      "step": 326060
    },
    {
      "epoch": 0.5336370718040363,
      "grad_norm": 0.698058009147644,
      "learning_rate": 8.991585564333864e-06,
      "loss": 0.0356,
      "step": 326080
    },
    {
      "epoch": 0.5336698022426897,
      "grad_norm": 0.16478006541728973,
      "learning_rate": 8.991519672120346e-06,
      "loss": 0.0361,
      "step": 326100
    },
    {
      "epoch": 0.533702532681343,
      "grad_norm": 1.0745532512664795,
      "learning_rate": 8.99145377990683e-06,
      "loss": 0.0319,
      "step": 326120
    },
    {
      "epoch": 0.5337352631199963,
      "grad_norm": 0.579503059387207,
      "learning_rate": 8.991387887693313e-06,
      "loss": 0.0549,
      "step": 326140
    },
    {
      "epoch": 0.5337679935586497,
      "grad_norm": 0.5771440863609314,
      "learning_rate": 8.991321995479795e-06,
      "loss": 0.0352,
      "step": 326160
    },
    {
      "epoch": 0.533800723997303,
      "grad_norm": 4.591537952423096,
      "learning_rate": 8.991256103266279e-06,
      "loss": 0.0303,
      "step": 326180
    },
    {
      "epoch": 0.5338334544359563,
      "grad_norm": 1.0429167747497559,
      "learning_rate": 8.99119021105276e-06,
      "loss": 0.0294,
      "step": 326200
    },
    {
      "epoch": 0.5338661848746097,
      "grad_norm": 1.0052850246429443,
      "learning_rate": 8.991124318839244e-06,
      "loss": 0.0385,
      "step": 326220
    },
    {
      "epoch": 0.5338989153132631,
      "grad_norm": 0.4610958993434906,
      "learning_rate": 8.991058426625726e-06,
      "loss": 0.0477,
      "step": 326240
    },
    {
      "epoch": 0.5339316457519163,
      "grad_norm": 3.1463332176208496,
      "learning_rate": 8.99099253441221e-06,
      "loss": 0.0482,
      "step": 326260
    },
    {
      "epoch": 0.5339643761905697,
      "grad_norm": 1.518057107925415,
      "learning_rate": 8.990926642198692e-06,
      "loss": 0.0365,
      "step": 326280
    },
    {
      "epoch": 0.5339971066292231,
      "grad_norm": 1.83777916431427,
      "learning_rate": 8.990860749985175e-06,
      "loss": 0.04,
      "step": 326300
    },
    {
      "epoch": 0.5340298370678764,
      "grad_norm": 1.646027684211731,
      "learning_rate": 8.990794857771657e-06,
      "loss": 0.0461,
      "step": 326320
    },
    {
      "epoch": 0.5340625675065297,
      "grad_norm": 2.778012990951538,
      "learning_rate": 8.99072896555814e-06,
      "loss": 0.0393,
      "step": 326340
    },
    {
      "epoch": 0.5340952979451831,
      "grad_norm": 1.1972883939743042,
      "learning_rate": 8.990663073344622e-06,
      "loss": 0.0264,
      "step": 326360
    },
    {
      "epoch": 0.5341280283838364,
      "grad_norm": 0.6341660022735596,
      "learning_rate": 8.990597181131106e-06,
      "loss": 0.0413,
      "step": 326380
    },
    {
      "epoch": 0.5341607588224897,
      "grad_norm": 0.2838922441005707,
      "learning_rate": 8.99053128891759e-06,
      "loss": 0.0396,
      "step": 326400
    },
    {
      "epoch": 0.5341934892611431,
      "grad_norm": 1.5621167421340942,
      "learning_rate": 8.990465396704072e-06,
      "loss": 0.0342,
      "step": 326420
    },
    {
      "epoch": 0.5342262196997964,
      "grad_norm": 6.54834508895874,
      "learning_rate": 8.990399504490555e-06,
      "loss": 0.0476,
      "step": 326440
    },
    {
      "epoch": 0.5342589501384497,
      "grad_norm": 3.5085341930389404,
      "learning_rate": 8.990333612277039e-06,
      "loss": 0.0471,
      "step": 326460
    },
    {
      "epoch": 0.5342916805771031,
      "grad_norm": 0.9718847870826721,
      "learning_rate": 8.99026772006352e-06,
      "loss": 0.0407,
      "step": 326480
    },
    {
      "epoch": 0.5343244110157565,
      "grad_norm": 2.4351491928100586,
      "learning_rate": 8.990201827850004e-06,
      "loss": 0.0351,
      "step": 326500
    },
    {
      "epoch": 0.5343571414544098,
      "grad_norm": 2.683776617050171,
      "learning_rate": 8.990135935636488e-06,
      "loss": 0.0389,
      "step": 326520
    },
    {
      "epoch": 0.5343898718930631,
      "grad_norm": 1.785623550415039,
      "learning_rate": 8.99007004342297e-06,
      "loss": 0.031,
      "step": 326540
    },
    {
      "epoch": 0.5344226023317165,
      "grad_norm": 0.9219524264335632,
      "learning_rate": 8.990004151209453e-06,
      "loss": 0.0399,
      "step": 326560
    },
    {
      "epoch": 0.5344553327703698,
      "grad_norm": 1.1511269807815552,
      "learning_rate": 8.989938258995935e-06,
      "loss": 0.0511,
      "step": 326580
    },
    {
      "epoch": 0.5344880632090231,
      "grad_norm": 0.4716595411300659,
      "learning_rate": 8.989872366782419e-06,
      "loss": 0.0379,
      "step": 326600
    },
    {
      "epoch": 0.5345207936476765,
      "grad_norm": 0.9083720445632935,
      "learning_rate": 8.9898064745689e-06,
      "loss": 0.0369,
      "step": 326620
    },
    {
      "epoch": 0.5345535240863298,
      "grad_norm": 0.8259773254394531,
      "learning_rate": 8.989740582355384e-06,
      "loss": 0.0424,
      "step": 326640
    },
    {
      "epoch": 0.5345862545249831,
      "grad_norm": 1.6066884994506836,
      "learning_rate": 8.989674690141866e-06,
      "loss": 0.042,
      "step": 326660
    },
    {
      "epoch": 0.5346189849636365,
      "grad_norm": 1.5858879089355469,
      "learning_rate": 8.98960879792835e-06,
      "loss": 0.0474,
      "step": 326680
    },
    {
      "epoch": 0.5346517154022898,
      "grad_norm": 3.288529634475708,
      "learning_rate": 8.989542905714832e-06,
      "loss": 0.0419,
      "step": 326700
    },
    {
      "epoch": 0.5346844458409432,
      "grad_norm": 0.8460831046104431,
      "learning_rate": 8.989477013501315e-06,
      "loss": 0.0334,
      "step": 326720
    },
    {
      "epoch": 0.5347171762795965,
      "grad_norm": 0.9326014518737793,
      "learning_rate": 8.989411121287797e-06,
      "loss": 0.0279,
      "step": 326740
    },
    {
      "epoch": 0.5347499067182498,
      "grad_norm": 2.6275272369384766,
      "learning_rate": 8.98934522907428e-06,
      "loss": 0.0324,
      "step": 326760
    },
    {
      "epoch": 0.5347826371569032,
      "grad_norm": 3.7409589290618896,
      "learning_rate": 8.989279336860763e-06,
      "loss": 0.0441,
      "step": 326780
    },
    {
      "epoch": 0.5348153675955565,
      "grad_norm": 3.3927884101867676,
      "learning_rate": 8.989213444647246e-06,
      "loss": 0.0579,
      "step": 326800
    },
    {
      "epoch": 0.5348480980342099,
      "grad_norm": 1.8440303802490234,
      "learning_rate": 8.98914755243373e-06,
      "loss": 0.0341,
      "step": 326820
    },
    {
      "epoch": 0.5348808284728632,
      "grad_norm": 2.1060163974761963,
      "learning_rate": 8.989081660220212e-06,
      "loss": 0.0447,
      "step": 326840
    },
    {
      "epoch": 0.5349135589115165,
      "grad_norm": 0.7534621357917786,
      "learning_rate": 8.989015768006695e-06,
      "loss": 0.0385,
      "step": 326860
    },
    {
      "epoch": 0.5349462893501699,
      "grad_norm": 0.6632914543151855,
      "learning_rate": 8.988949875793179e-06,
      "loss": 0.0411,
      "step": 326880
    },
    {
      "epoch": 0.5349790197888232,
      "grad_norm": 0.25139787793159485,
      "learning_rate": 8.98888398357966e-06,
      "loss": 0.0372,
      "step": 326900
    },
    {
      "epoch": 0.5350117502274766,
      "grad_norm": 3.123502492904663,
      "learning_rate": 8.988818091366144e-06,
      "loss": 0.0472,
      "step": 326920
    },
    {
      "epoch": 0.5350444806661299,
      "grad_norm": 1.5920453071594238,
      "learning_rate": 8.988752199152628e-06,
      "loss": 0.0352,
      "step": 326940
    },
    {
      "epoch": 0.5350772111047832,
      "grad_norm": 2.0993447303771973,
      "learning_rate": 8.98868630693911e-06,
      "loss": 0.0483,
      "step": 326960
    },
    {
      "epoch": 0.5351099415434366,
      "grad_norm": 1.849195957183838,
      "learning_rate": 8.988620414725593e-06,
      "loss": 0.0366,
      "step": 326980
    },
    {
      "epoch": 0.5351426719820899,
      "grad_norm": 1.2622705698013306,
      "learning_rate": 8.988554522512075e-06,
      "loss": 0.0347,
      "step": 327000
    },
    {
      "epoch": 0.5351754024207432,
      "grad_norm": 1.8493521213531494,
      "learning_rate": 8.988488630298559e-06,
      "loss": 0.0374,
      "step": 327020
    },
    {
      "epoch": 0.5352081328593966,
      "grad_norm": 3.270228624343872,
      "learning_rate": 8.98842273808504e-06,
      "loss": 0.0404,
      "step": 327040
    },
    {
      "epoch": 0.5352408632980499,
      "grad_norm": 0.5656513571739197,
      "learning_rate": 8.988356845871524e-06,
      "loss": 0.0322,
      "step": 327060
    },
    {
      "epoch": 0.5352735937367032,
      "grad_norm": 0.7909623980522156,
      "learning_rate": 8.988290953658006e-06,
      "loss": 0.0527,
      "step": 327080
    },
    {
      "epoch": 0.5353063241753566,
      "grad_norm": 1.2089588642120361,
      "learning_rate": 8.98822506144449e-06,
      "loss": 0.035,
      "step": 327100
    },
    {
      "epoch": 0.53533905461401,
      "grad_norm": 1.0779386758804321,
      "learning_rate": 8.988159169230972e-06,
      "loss": 0.0466,
      "step": 327120
    },
    {
      "epoch": 0.5353717850526633,
      "grad_norm": 3.489647626876831,
      "learning_rate": 8.988093277017455e-06,
      "loss": 0.0271,
      "step": 327140
    },
    {
      "epoch": 0.5354045154913166,
      "grad_norm": 1.0603306293487549,
      "learning_rate": 8.988027384803939e-06,
      "loss": 0.0309,
      "step": 327160
    },
    {
      "epoch": 0.53543724592997,
      "grad_norm": 2.195289134979248,
      "learning_rate": 8.98796149259042e-06,
      "loss": 0.0282,
      "step": 327180
    },
    {
      "epoch": 0.5354699763686233,
      "grad_norm": 2.5388145446777344,
      "learning_rate": 8.987895600376904e-06,
      "loss": 0.0405,
      "step": 327200
    },
    {
      "epoch": 0.5355027068072766,
      "grad_norm": 1.1497236490249634,
      "learning_rate": 8.987829708163386e-06,
      "loss": 0.0482,
      "step": 327220
    },
    {
      "epoch": 0.53553543724593,
      "grad_norm": 1.6784764528274536,
      "learning_rate": 8.98776381594987e-06,
      "loss": 0.0331,
      "step": 327240
    },
    {
      "epoch": 0.5355681676845833,
      "grad_norm": 2.20801043510437,
      "learning_rate": 8.987697923736353e-06,
      "loss": 0.0353,
      "step": 327260
    },
    {
      "epoch": 0.5356008981232366,
      "grad_norm": 2.798788070678711,
      "learning_rate": 8.987632031522835e-06,
      "loss": 0.0308,
      "step": 327280
    },
    {
      "epoch": 0.53563362856189,
      "grad_norm": 2.2567245960235596,
      "learning_rate": 8.987566139309319e-06,
      "loss": 0.0368,
      "step": 327300
    },
    {
      "epoch": 0.5356663590005434,
      "grad_norm": 1.0364176034927368,
      "learning_rate": 8.987500247095802e-06,
      "loss": 0.0685,
      "step": 327320
    },
    {
      "epoch": 0.5356990894391966,
      "grad_norm": 3.1149861812591553,
      "learning_rate": 8.987434354882284e-06,
      "loss": 0.0366,
      "step": 327340
    },
    {
      "epoch": 0.53573181987785,
      "grad_norm": 2.23771595954895,
      "learning_rate": 8.987368462668768e-06,
      "loss": 0.0414,
      "step": 327360
    },
    {
      "epoch": 0.5357645503165034,
      "grad_norm": 1.1808326244354248,
      "learning_rate": 8.98730257045525e-06,
      "loss": 0.0383,
      "step": 327380
    },
    {
      "epoch": 0.5357972807551566,
      "grad_norm": 1.0368026494979858,
      "learning_rate": 8.987236678241733e-06,
      "loss": 0.0447,
      "step": 327400
    },
    {
      "epoch": 0.53583001119381,
      "grad_norm": 0.43339264392852783,
      "learning_rate": 8.987170786028215e-06,
      "loss": 0.0337,
      "step": 327420
    },
    {
      "epoch": 0.5358627416324634,
      "grad_norm": 0.48557180166244507,
      "learning_rate": 8.987104893814699e-06,
      "loss": 0.0374,
      "step": 327440
    },
    {
      "epoch": 0.5358954720711167,
      "grad_norm": 1.045962929725647,
      "learning_rate": 8.98703900160118e-06,
      "loss": 0.0478,
      "step": 327460
    },
    {
      "epoch": 0.53592820250977,
      "grad_norm": 3.0640172958374023,
      "learning_rate": 8.986973109387664e-06,
      "loss": 0.0321,
      "step": 327480
    },
    {
      "epoch": 0.5359609329484234,
      "grad_norm": 2.09698486328125,
      "learning_rate": 8.986907217174146e-06,
      "loss": 0.0313,
      "step": 327500
    },
    {
      "epoch": 0.5359936633870768,
      "grad_norm": 1.6342943906784058,
      "learning_rate": 8.98684132496063e-06,
      "loss": 0.0374,
      "step": 327520
    },
    {
      "epoch": 0.53602639382573,
      "grad_norm": 0.9760821461677551,
      "learning_rate": 8.986775432747113e-06,
      "loss": 0.0392,
      "step": 327540
    },
    {
      "epoch": 0.5360591242643834,
      "grad_norm": 0.76967853307724,
      "learning_rate": 8.986709540533595e-06,
      "loss": 0.0334,
      "step": 327560
    },
    {
      "epoch": 0.5360918547030368,
      "grad_norm": 0.6546884775161743,
      "learning_rate": 8.986643648320079e-06,
      "loss": 0.0367,
      "step": 327580
    },
    {
      "epoch": 0.53612458514169,
      "grad_norm": 1.3983182907104492,
      "learning_rate": 8.98657775610656e-06,
      "loss": 0.0414,
      "step": 327600
    },
    {
      "epoch": 0.5361573155803434,
      "grad_norm": 1.1714638471603394,
      "learning_rate": 8.986511863893044e-06,
      "loss": 0.0425,
      "step": 327620
    },
    {
      "epoch": 0.5361900460189968,
      "grad_norm": 2.4604179859161377,
      "learning_rate": 8.986445971679526e-06,
      "loss": 0.0307,
      "step": 327640
    },
    {
      "epoch": 0.53622277645765,
      "grad_norm": 0.7693201303482056,
      "learning_rate": 8.98638007946601e-06,
      "loss": 0.0278,
      "step": 327660
    },
    {
      "epoch": 0.5362555068963034,
      "grad_norm": 0.6792362928390503,
      "learning_rate": 8.986314187252493e-06,
      "loss": 0.0437,
      "step": 327680
    },
    {
      "epoch": 0.5362882373349568,
      "grad_norm": 4.002545356750488,
      "learning_rate": 8.986248295038977e-06,
      "loss": 0.0306,
      "step": 327700
    },
    {
      "epoch": 0.53632096777361,
      "grad_norm": 2.274942398071289,
      "learning_rate": 8.986182402825459e-06,
      "loss": 0.0442,
      "step": 327720
    },
    {
      "epoch": 0.5363536982122634,
      "grad_norm": 1.6566057205200195,
      "learning_rate": 8.986116510611943e-06,
      "loss": 0.0404,
      "step": 327740
    },
    {
      "epoch": 0.5363864286509168,
      "grad_norm": 1.3335964679718018,
      "learning_rate": 8.986050618398424e-06,
      "loss": 0.0299,
      "step": 327760
    },
    {
      "epoch": 0.5364191590895702,
      "grad_norm": 1.5920052528381348,
      "learning_rate": 8.985984726184908e-06,
      "loss": 0.0446,
      "step": 327780
    },
    {
      "epoch": 0.5364518895282234,
      "grad_norm": 0.7558775544166565,
      "learning_rate": 8.98591883397139e-06,
      "loss": 0.0357,
      "step": 327800
    },
    {
      "epoch": 0.5364846199668768,
      "grad_norm": 0.46811041235923767,
      "learning_rate": 8.985852941757874e-06,
      "loss": 0.0387,
      "step": 327820
    },
    {
      "epoch": 0.5365173504055302,
      "grad_norm": 1.3368077278137207,
      "learning_rate": 8.985787049544355e-06,
      "loss": 0.0381,
      "step": 327840
    },
    {
      "epoch": 0.5365500808441834,
      "grad_norm": 1.7344073057174683,
      "learning_rate": 8.985721157330839e-06,
      "loss": 0.0273,
      "step": 327860
    },
    {
      "epoch": 0.5365828112828368,
      "grad_norm": 1.7840640544891357,
      "learning_rate": 8.985655265117323e-06,
      "loss": 0.0377,
      "step": 327880
    },
    {
      "epoch": 0.5366155417214902,
      "grad_norm": 0.7411927580833435,
      "learning_rate": 8.985589372903804e-06,
      "loss": 0.0367,
      "step": 327900
    },
    {
      "epoch": 0.5366482721601434,
      "grad_norm": 1.4538087844848633,
      "learning_rate": 8.985523480690288e-06,
      "loss": 0.0403,
      "step": 327920
    },
    {
      "epoch": 0.5366810025987968,
      "grad_norm": 0.7279150485992432,
      "learning_rate": 8.98545758847677e-06,
      "loss": 0.0376,
      "step": 327940
    },
    {
      "epoch": 0.5367137330374502,
      "grad_norm": 1.652414321899414,
      "learning_rate": 8.985391696263254e-06,
      "loss": 0.0489,
      "step": 327960
    },
    {
      "epoch": 0.5367464634761036,
      "grad_norm": 6.654200077056885,
      "learning_rate": 8.985325804049735e-06,
      "loss": 0.0411,
      "step": 327980
    },
    {
      "epoch": 0.5367791939147568,
      "grad_norm": 4.379964828491211,
      "learning_rate": 8.985259911836219e-06,
      "loss": 0.04,
      "step": 328000
    },
    {
      "epoch": 0.5368119243534102,
      "grad_norm": 2.703171730041504,
      "learning_rate": 8.985194019622701e-06,
      "loss": 0.0399,
      "step": 328020
    },
    {
      "epoch": 0.5368446547920636,
      "grad_norm": 1.047318935394287,
      "learning_rate": 8.985128127409184e-06,
      "loss": 0.0438,
      "step": 328040
    },
    {
      "epoch": 0.5368773852307168,
      "grad_norm": 0.8300256729125977,
      "learning_rate": 8.985062235195668e-06,
      "loss": 0.041,
      "step": 328060
    },
    {
      "epoch": 0.5369101156693702,
      "grad_norm": 2.5918359756469727,
      "learning_rate": 8.98499634298215e-06,
      "loss": 0.0478,
      "step": 328080
    },
    {
      "epoch": 0.5369428461080236,
      "grad_norm": 0.7493977546691895,
      "learning_rate": 8.984930450768634e-06,
      "loss": 0.051,
      "step": 328100
    },
    {
      "epoch": 0.5369755765466768,
      "grad_norm": 0.7495531439781189,
      "learning_rate": 8.984864558555117e-06,
      "loss": 0.0447,
      "step": 328120
    },
    {
      "epoch": 0.5370083069853302,
      "grad_norm": 0.47331100702285767,
      "learning_rate": 8.984798666341599e-06,
      "loss": 0.0506,
      "step": 328140
    },
    {
      "epoch": 0.5370410374239836,
      "grad_norm": 0.6150824427604675,
      "learning_rate": 8.984732774128083e-06,
      "loss": 0.0273,
      "step": 328160
    },
    {
      "epoch": 0.5370737678626369,
      "grad_norm": 7.697697162628174,
      "learning_rate": 8.984666881914565e-06,
      "loss": 0.047,
      "step": 328180
    },
    {
      "epoch": 0.5371064983012902,
      "grad_norm": 0.39337772130966187,
      "learning_rate": 8.984600989701048e-06,
      "loss": 0.0369,
      "step": 328200
    },
    {
      "epoch": 0.5371392287399436,
      "grad_norm": 0.9050031304359436,
      "learning_rate": 8.984535097487532e-06,
      "loss": 0.0389,
      "step": 328220
    },
    {
      "epoch": 0.537171959178597,
      "grad_norm": 2.5649259090423584,
      "learning_rate": 8.984469205274014e-06,
      "loss": 0.0416,
      "step": 328240
    },
    {
      "epoch": 0.5372046896172502,
      "grad_norm": 1.0287537574768066,
      "learning_rate": 8.984403313060497e-06,
      "loss": 0.032,
      "step": 328260
    },
    {
      "epoch": 0.5372374200559036,
      "grad_norm": 1.658210039138794,
      "learning_rate": 8.984337420846979e-06,
      "loss": 0.0451,
      "step": 328280
    },
    {
      "epoch": 0.537270150494557,
      "grad_norm": 2.4810872077941895,
      "learning_rate": 8.984271528633463e-06,
      "loss": 0.0422,
      "step": 328300
    },
    {
      "epoch": 0.5373028809332102,
      "grad_norm": 1.0290223360061646,
      "learning_rate": 8.984205636419945e-06,
      "loss": 0.0354,
      "step": 328320
    },
    {
      "epoch": 0.5373356113718636,
      "grad_norm": 2.5223710536956787,
      "learning_rate": 8.984139744206428e-06,
      "loss": 0.0417,
      "step": 328340
    },
    {
      "epoch": 0.537368341810517,
      "grad_norm": 1.9474722146987915,
      "learning_rate": 8.98407385199291e-06,
      "loss": 0.0279,
      "step": 328360
    },
    {
      "epoch": 0.5374010722491703,
      "grad_norm": 0.2686637341976166,
      "learning_rate": 8.984007959779394e-06,
      "loss": 0.0367,
      "step": 328380
    },
    {
      "epoch": 0.5374338026878236,
      "grad_norm": 1.9376137256622314,
      "learning_rate": 8.983942067565875e-06,
      "loss": 0.0326,
      "step": 328400
    },
    {
      "epoch": 0.537466533126477,
      "grad_norm": 1.0614200830459595,
      "learning_rate": 8.983876175352359e-06,
      "loss": 0.0285,
      "step": 328420
    },
    {
      "epoch": 0.5374992635651303,
      "grad_norm": 3.108616352081299,
      "learning_rate": 8.983810283138843e-06,
      "loss": 0.0518,
      "step": 328440
    },
    {
      "epoch": 0.5375319940037836,
      "grad_norm": 1.6496837139129639,
      "learning_rate": 8.983744390925325e-06,
      "loss": 0.0298,
      "step": 328460
    },
    {
      "epoch": 0.537564724442437,
      "grad_norm": 1.6433830261230469,
      "learning_rate": 8.983678498711808e-06,
      "loss": 0.0395,
      "step": 328480
    },
    {
      "epoch": 0.5375974548810903,
      "grad_norm": 2.4262239933013916,
      "learning_rate": 8.983612606498292e-06,
      "loss": 0.04,
      "step": 328500
    },
    {
      "epoch": 0.5376301853197436,
      "grad_norm": 1.4967268705368042,
      "learning_rate": 8.983546714284774e-06,
      "loss": 0.0356,
      "step": 328520
    },
    {
      "epoch": 0.537662915758397,
      "grad_norm": 1.5936199426651,
      "learning_rate": 8.983480822071257e-06,
      "loss": 0.0416,
      "step": 328540
    },
    {
      "epoch": 0.5376956461970503,
      "grad_norm": 2.638458728790283,
      "learning_rate": 8.98341492985774e-06,
      "loss": 0.0398,
      "step": 328560
    },
    {
      "epoch": 0.5377283766357037,
      "grad_norm": 1.2571463584899902,
      "learning_rate": 8.983349037644223e-06,
      "loss": 0.0468,
      "step": 328580
    },
    {
      "epoch": 0.537761107074357,
      "grad_norm": 1.274356722831726,
      "learning_rate": 8.983283145430706e-06,
      "loss": 0.0319,
      "step": 328600
    },
    {
      "epoch": 0.5377938375130104,
      "grad_norm": 0.6610080599784851,
      "learning_rate": 8.983217253217188e-06,
      "loss": 0.0313,
      "step": 328620
    },
    {
      "epoch": 0.5378265679516637,
      "grad_norm": 2.0428755283355713,
      "learning_rate": 8.983151361003672e-06,
      "loss": 0.0372,
      "step": 328640
    },
    {
      "epoch": 0.537859298390317,
      "grad_norm": 1.3048168420791626,
      "learning_rate": 8.983085468790154e-06,
      "loss": 0.042,
      "step": 328660
    },
    {
      "epoch": 0.5378920288289704,
      "grad_norm": 0.5512959957122803,
      "learning_rate": 8.983019576576637e-06,
      "loss": 0.0429,
      "step": 328680
    },
    {
      "epoch": 0.5379247592676237,
      "grad_norm": 0.5986296534538269,
      "learning_rate": 8.982953684363119e-06,
      "loss": 0.0309,
      "step": 328700
    },
    {
      "epoch": 0.537957489706277,
      "grad_norm": 2.1154592037200928,
      "learning_rate": 8.982887792149603e-06,
      "loss": 0.0332,
      "step": 328720
    },
    {
      "epoch": 0.5379902201449304,
      "grad_norm": 2.847672700881958,
      "learning_rate": 8.982821899936085e-06,
      "loss": 0.0401,
      "step": 328740
    },
    {
      "epoch": 0.5380229505835837,
      "grad_norm": 1.5881259441375732,
      "learning_rate": 8.982756007722568e-06,
      "loss": 0.0494,
      "step": 328760
    },
    {
      "epoch": 0.5380556810222371,
      "grad_norm": 2.864306688308716,
      "learning_rate": 8.98269011550905e-06,
      "loss": 0.0343,
      "step": 328780
    },
    {
      "epoch": 0.5380884114608904,
      "grad_norm": 0.517799973487854,
      "learning_rate": 8.982624223295534e-06,
      "loss": 0.0299,
      "step": 328800
    },
    {
      "epoch": 0.5381211418995437,
      "grad_norm": 1.0645668506622314,
      "learning_rate": 8.982558331082016e-06,
      "loss": 0.0331,
      "step": 328820
    },
    {
      "epoch": 0.5381538723381971,
      "grad_norm": 0.37702250480651855,
      "learning_rate": 8.9824924388685e-06,
      "loss": 0.0358,
      "step": 328840
    },
    {
      "epoch": 0.5381866027768504,
      "grad_norm": 0.8363581299781799,
      "learning_rate": 8.982426546654983e-06,
      "loss": 0.0351,
      "step": 328860
    },
    {
      "epoch": 0.5382193332155037,
      "grad_norm": 0.39905861020088196,
      "learning_rate": 8.982360654441465e-06,
      "loss": 0.0418,
      "step": 328880
    },
    {
      "epoch": 0.5382520636541571,
      "grad_norm": 1.7614986896514893,
      "learning_rate": 8.982294762227948e-06,
      "loss": 0.0242,
      "step": 328900
    },
    {
      "epoch": 0.5382847940928104,
      "grad_norm": 1.2828407287597656,
      "learning_rate": 8.982228870014432e-06,
      "loss": 0.0394,
      "step": 328920
    },
    {
      "epoch": 0.5383175245314638,
      "grad_norm": 1.7912909984588623,
      "learning_rate": 8.982162977800914e-06,
      "loss": 0.0506,
      "step": 328940
    },
    {
      "epoch": 0.5383502549701171,
      "grad_norm": 0.5382999777793884,
      "learning_rate": 8.982097085587397e-06,
      "loss": 0.0215,
      "step": 328960
    },
    {
      "epoch": 0.5383829854087705,
      "grad_norm": 0.547338604927063,
      "learning_rate": 8.982031193373881e-06,
      "loss": 0.0339,
      "step": 328980
    },
    {
      "epoch": 0.5384157158474238,
      "grad_norm": 2.3300657272338867,
      "learning_rate": 8.981965301160363e-06,
      "loss": 0.0306,
      "step": 329000
    },
    {
      "epoch": 0.5384484462860771,
      "grad_norm": 0.9298762679100037,
      "learning_rate": 8.981899408946846e-06,
      "loss": 0.0427,
      "step": 329020
    },
    {
      "epoch": 0.5384811767247305,
      "grad_norm": 4.332140922546387,
      "learning_rate": 8.981833516733328e-06,
      "loss": 0.0425,
      "step": 329040
    },
    {
      "epoch": 0.5385139071633838,
      "grad_norm": 1.0594258308410645,
      "learning_rate": 8.981767624519812e-06,
      "loss": 0.0375,
      "step": 329060
    },
    {
      "epoch": 0.5385466376020371,
      "grad_norm": 0.7274330258369446,
      "learning_rate": 8.981701732306294e-06,
      "loss": 0.0433,
      "step": 329080
    },
    {
      "epoch": 0.5385793680406905,
      "grad_norm": 1.3526180982589722,
      "learning_rate": 8.981635840092777e-06,
      "loss": 0.0424,
      "step": 329100
    },
    {
      "epoch": 0.5386120984793438,
      "grad_norm": 3.286093235015869,
      "learning_rate": 8.98156994787926e-06,
      "loss": 0.0386,
      "step": 329120
    },
    {
      "epoch": 0.5386448289179971,
      "grad_norm": 1.7474400997161865,
      "learning_rate": 8.981504055665743e-06,
      "loss": 0.0459,
      "step": 329140
    },
    {
      "epoch": 0.5386775593566505,
      "grad_norm": 1.2329612970352173,
      "learning_rate": 8.981438163452225e-06,
      "loss": 0.0284,
      "step": 329160
    },
    {
      "epoch": 0.5387102897953039,
      "grad_norm": 2.130232572555542,
      "learning_rate": 8.981372271238708e-06,
      "loss": 0.0381,
      "step": 329180
    },
    {
      "epoch": 0.5387430202339571,
      "grad_norm": 1.00212824344635,
      "learning_rate": 8.98130637902519e-06,
      "loss": 0.0295,
      "step": 329200
    },
    {
      "epoch": 0.5387757506726105,
      "grad_norm": 2.715855836868286,
      "learning_rate": 8.981240486811674e-06,
      "loss": 0.0368,
      "step": 329220
    },
    {
      "epoch": 0.5388084811112639,
      "grad_norm": 1.991686463356018,
      "learning_rate": 8.981174594598157e-06,
      "loss": 0.0398,
      "step": 329240
    },
    {
      "epoch": 0.5388412115499172,
      "grad_norm": 1.2367626428604126,
      "learning_rate": 8.98110870238464e-06,
      "loss": 0.0386,
      "step": 329260
    },
    {
      "epoch": 0.5388739419885705,
      "grad_norm": 2.3278913497924805,
      "learning_rate": 8.981042810171123e-06,
      "loss": 0.034,
      "step": 329280
    },
    {
      "epoch": 0.5389066724272239,
      "grad_norm": 2.646087408065796,
      "learning_rate": 8.980976917957606e-06,
      "loss": 0.0373,
      "step": 329300
    },
    {
      "epoch": 0.5389394028658772,
      "grad_norm": 2.450828790664673,
      "learning_rate": 8.980911025744088e-06,
      "loss": 0.0511,
      "step": 329320
    },
    {
      "epoch": 0.5389721333045305,
      "grad_norm": 1.4306933879852295,
      "learning_rate": 8.980845133530572e-06,
      "loss": 0.0447,
      "step": 329340
    },
    {
      "epoch": 0.5390048637431839,
      "grad_norm": 0.8578580021858215,
      "learning_rate": 8.980779241317055e-06,
      "loss": 0.0362,
      "step": 329360
    },
    {
      "epoch": 0.5390375941818373,
      "grad_norm": 23.877788543701172,
      "learning_rate": 8.980713349103537e-06,
      "loss": 0.0344,
      "step": 329380
    },
    {
      "epoch": 0.5390703246204905,
      "grad_norm": 1.7014881372451782,
      "learning_rate": 8.980647456890021e-06,
      "loss": 0.039,
      "step": 329400
    },
    {
      "epoch": 0.5391030550591439,
      "grad_norm": 0.5969281196594238,
      "learning_rate": 8.980581564676503e-06,
      "loss": 0.0447,
      "step": 329420
    },
    {
      "epoch": 0.5391357854977973,
      "grad_norm": 3.484988212585449,
      "learning_rate": 8.980515672462986e-06,
      "loss": 0.0324,
      "step": 329440
    },
    {
      "epoch": 0.5391685159364505,
      "grad_norm": 1.0582516193389893,
      "learning_rate": 8.980449780249468e-06,
      "loss": 0.0421,
      "step": 329460
    },
    {
      "epoch": 0.5392012463751039,
      "grad_norm": 1.1415935754776,
      "learning_rate": 8.980383888035952e-06,
      "loss": 0.0286,
      "step": 329480
    },
    {
      "epoch": 0.5392339768137573,
      "grad_norm": 1.885498285293579,
      "learning_rate": 8.980317995822434e-06,
      "loss": 0.0348,
      "step": 329500
    },
    {
      "epoch": 0.5392667072524105,
      "grad_norm": 2.062818765640259,
      "learning_rate": 8.980252103608917e-06,
      "loss": 0.0468,
      "step": 329520
    },
    {
      "epoch": 0.5392994376910639,
      "grad_norm": 0.32008013129234314,
      "learning_rate": 8.9801862113954e-06,
      "loss": 0.036,
      "step": 329540
    },
    {
      "epoch": 0.5393321681297173,
      "grad_norm": 2.259479284286499,
      "learning_rate": 8.980120319181883e-06,
      "loss": 0.0536,
      "step": 329560
    },
    {
      "epoch": 0.5393648985683707,
      "grad_norm": 2.6416938304901123,
      "learning_rate": 8.980054426968365e-06,
      "loss": 0.0519,
      "step": 329580
    },
    {
      "epoch": 0.5393976290070239,
      "grad_norm": 1.3789520263671875,
      "learning_rate": 8.979988534754848e-06,
      "loss": 0.0475,
      "step": 329600
    },
    {
      "epoch": 0.5394303594456773,
      "grad_norm": 0.9701138734817505,
      "learning_rate": 8.979922642541332e-06,
      "loss": 0.0419,
      "step": 329620
    },
    {
      "epoch": 0.5394630898843307,
      "grad_norm": 1.5096521377563477,
      "learning_rate": 8.979856750327814e-06,
      "loss": 0.033,
      "step": 329640
    },
    {
      "epoch": 0.5394958203229839,
      "grad_norm": 1.33819580078125,
      "learning_rate": 8.979790858114297e-06,
      "loss": 0.0381,
      "step": 329660
    },
    {
      "epoch": 0.5395285507616373,
      "grad_norm": 0.5633381009101868,
      "learning_rate": 8.97972496590078e-06,
      "loss": 0.0545,
      "step": 329680
    },
    {
      "epoch": 0.5395612812002907,
      "grad_norm": 0.6253991723060608,
      "learning_rate": 8.979659073687263e-06,
      "loss": 0.0411,
      "step": 329700
    },
    {
      "epoch": 0.5395940116389439,
      "grad_norm": 3.156360626220703,
      "learning_rate": 8.979593181473746e-06,
      "loss": 0.0483,
      "step": 329720
    },
    {
      "epoch": 0.5396267420775973,
      "grad_norm": 0.1322893649339676,
      "learning_rate": 8.979527289260228e-06,
      "loss": 0.0309,
      "step": 329740
    },
    {
      "epoch": 0.5396594725162507,
      "grad_norm": 1.569535493850708,
      "learning_rate": 8.979461397046712e-06,
      "loss": 0.0387,
      "step": 329760
    },
    {
      "epoch": 0.539692202954904,
      "grad_norm": 1.2521170377731323,
      "learning_rate": 8.979395504833196e-06,
      "loss": 0.0347,
      "step": 329780
    },
    {
      "epoch": 0.5397249333935573,
      "grad_norm": 4.619496822357178,
      "learning_rate": 8.979329612619677e-06,
      "loss": 0.0393,
      "step": 329800
    },
    {
      "epoch": 0.5397576638322107,
      "grad_norm": 1.0597110986709595,
      "learning_rate": 8.979263720406161e-06,
      "loss": 0.0465,
      "step": 329820
    },
    {
      "epoch": 0.5397903942708641,
      "grad_norm": 1.3761565685272217,
      "learning_rate": 8.979197828192643e-06,
      "loss": 0.0375,
      "step": 329840
    },
    {
      "epoch": 0.5398231247095173,
      "grad_norm": 1.0145212411880493,
      "learning_rate": 8.979131935979127e-06,
      "loss": 0.0417,
      "step": 329860
    },
    {
      "epoch": 0.5398558551481707,
      "grad_norm": 19.42719268798828,
      "learning_rate": 8.979066043765608e-06,
      "loss": 0.0378,
      "step": 329880
    },
    {
      "epoch": 0.5398885855868241,
      "grad_norm": 1.6063783168792725,
      "learning_rate": 8.979000151552092e-06,
      "loss": 0.0283,
      "step": 329900
    },
    {
      "epoch": 0.5399213160254773,
      "grad_norm": 2.20414662361145,
      "learning_rate": 8.978934259338574e-06,
      "loss": 0.0472,
      "step": 329920
    },
    {
      "epoch": 0.5399540464641307,
      "grad_norm": 1.6632145643234253,
      "learning_rate": 8.978868367125057e-06,
      "loss": 0.0396,
      "step": 329940
    },
    {
      "epoch": 0.5399867769027841,
      "grad_norm": 1.2164299488067627,
      "learning_rate": 8.97880247491154e-06,
      "loss": 0.0392,
      "step": 329960
    },
    {
      "epoch": 0.5400195073414374,
      "grad_norm": 1.2491185665130615,
      "learning_rate": 8.978736582698023e-06,
      "loss": 0.0332,
      "step": 329980
    },
    {
      "epoch": 0.5400522377800907,
      "grad_norm": 0.9863672256469727,
      "learning_rate": 8.978670690484507e-06,
      "loss": 0.0383,
      "step": 330000
    },
    {
      "epoch": 0.5400849682187441,
      "grad_norm": 1.6012581586837769,
      "learning_rate": 8.978604798270988e-06,
      "loss": 0.0529,
      "step": 330020
    },
    {
      "epoch": 0.5401176986573974,
      "grad_norm": 1.4910969734191895,
      "learning_rate": 8.978538906057472e-06,
      "loss": 0.0468,
      "step": 330040
    },
    {
      "epoch": 0.5401504290960507,
      "grad_norm": 0.5801841020584106,
      "learning_rate": 8.978473013843954e-06,
      "loss": 0.0365,
      "step": 330060
    },
    {
      "epoch": 0.5401831595347041,
      "grad_norm": 2.2395741939544678,
      "learning_rate": 8.978407121630437e-06,
      "loss": 0.0449,
      "step": 330080
    },
    {
      "epoch": 0.5402158899733575,
      "grad_norm": 2.0155956745147705,
      "learning_rate": 8.978341229416921e-06,
      "loss": 0.0434,
      "step": 330100
    },
    {
      "epoch": 0.5402486204120107,
      "grad_norm": 0.7984115481376648,
      "learning_rate": 8.978275337203403e-06,
      "loss": 0.0439,
      "step": 330120
    },
    {
      "epoch": 0.5402813508506641,
      "grad_norm": 1.3898520469665527,
      "learning_rate": 8.978209444989887e-06,
      "loss": 0.0299,
      "step": 330140
    },
    {
      "epoch": 0.5403140812893175,
      "grad_norm": 1.9722990989685059,
      "learning_rate": 8.97814355277637e-06,
      "loss": 0.0483,
      "step": 330160
    },
    {
      "epoch": 0.5403468117279708,
      "grad_norm": 0.8304810523986816,
      "learning_rate": 8.978077660562852e-06,
      "loss": 0.0464,
      "step": 330180
    },
    {
      "epoch": 0.5403795421666241,
      "grad_norm": 1.7759674787521362,
      "learning_rate": 8.978011768349336e-06,
      "loss": 0.0362,
      "step": 330200
    },
    {
      "epoch": 0.5404122726052775,
      "grad_norm": 0.8624102473258972,
      "learning_rate": 8.977945876135818e-06,
      "loss": 0.035,
      "step": 330220
    },
    {
      "epoch": 0.5404450030439308,
      "grad_norm": 0.726600170135498,
      "learning_rate": 8.977879983922301e-06,
      "loss": 0.0369,
      "step": 330240
    },
    {
      "epoch": 0.5404777334825841,
      "grad_norm": 0.8226944208145142,
      "learning_rate": 8.977814091708783e-06,
      "loss": 0.0415,
      "step": 330260
    },
    {
      "epoch": 0.5405104639212375,
      "grad_norm": 0.224448099732399,
      "learning_rate": 8.977748199495267e-06,
      "loss": 0.0347,
      "step": 330280
    },
    {
      "epoch": 0.5405431943598908,
      "grad_norm": 5.948868751525879,
      "learning_rate": 8.977682307281748e-06,
      "loss": 0.0533,
      "step": 330300
    },
    {
      "epoch": 0.5405759247985441,
      "grad_norm": 0.5491560697555542,
      "learning_rate": 8.977616415068232e-06,
      "loss": 0.0377,
      "step": 330320
    },
    {
      "epoch": 0.5406086552371975,
      "grad_norm": 2.7293739318847656,
      "learning_rate": 8.977550522854716e-06,
      "loss": 0.0287,
      "step": 330340
    },
    {
      "epoch": 0.5406413856758508,
      "grad_norm": 1.6239545345306396,
      "learning_rate": 8.977484630641198e-06,
      "loss": 0.032,
      "step": 330360
    },
    {
      "epoch": 0.5406741161145042,
      "grad_norm": 0.7455852627754211,
      "learning_rate": 8.977418738427681e-06,
      "loss": 0.0409,
      "step": 330380
    },
    {
      "epoch": 0.5407068465531575,
      "grad_norm": 1.7709577083587646,
      "learning_rate": 8.977352846214163e-06,
      "loss": 0.0347,
      "step": 330400
    },
    {
      "epoch": 0.5407395769918109,
      "grad_norm": 7.1478166580200195,
      "learning_rate": 8.977286954000647e-06,
      "loss": 0.0555,
      "step": 330420
    },
    {
      "epoch": 0.5407723074304642,
      "grad_norm": 0.8555284142494202,
      "learning_rate": 8.977221061787129e-06,
      "loss": 0.04,
      "step": 330440
    },
    {
      "epoch": 0.5408050378691175,
      "grad_norm": 1.485290765762329,
      "learning_rate": 8.977155169573612e-06,
      "loss": 0.0387,
      "step": 330460
    },
    {
      "epoch": 0.5408377683077709,
      "grad_norm": 0.5720416903495789,
      "learning_rate": 8.977089277360096e-06,
      "loss": 0.0459,
      "step": 330480
    },
    {
      "epoch": 0.5408704987464242,
      "grad_norm": 0.9375726580619812,
      "learning_rate": 8.977023385146578e-06,
      "loss": 0.0387,
      "step": 330500
    },
    {
      "epoch": 0.5409032291850775,
      "grad_norm": 1.000815510749817,
      "learning_rate": 8.976957492933061e-06,
      "loss": 0.037,
      "step": 330520
    },
    {
      "epoch": 0.5409359596237309,
      "grad_norm": 1.8839406967163086,
      "learning_rate": 8.976891600719545e-06,
      "loss": 0.0302,
      "step": 330540
    },
    {
      "epoch": 0.5409686900623842,
      "grad_norm": 1.0354845523834229,
      "learning_rate": 8.976825708506027e-06,
      "loss": 0.0439,
      "step": 330560
    },
    {
      "epoch": 0.5410014205010375,
      "grad_norm": 1.1988073587417603,
      "learning_rate": 8.97675981629251e-06,
      "loss": 0.0273,
      "step": 330580
    },
    {
      "epoch": 0.5410341509396909,
      "grad_norm": 1.2703102827072144,
      "learning_rate": 8.976693924078992e-06,
      "loss": 0.0357,
      "step": 330600
    },
    {
      "epoch": 0.5410668813783442,
      "grad_norm": 0.8903508186340332,
      "learning_rate": 8.976628031865476e-06,
      "loss": 0.0478,
      "step": 330620
    },
    {
      "epoch": 0.5410996118169976,
      "grad_norm": 1.7402405738830566,
      "learning_rate": 8.976562139651958e-06,
      "loss": 0.0424,
      "step": 330640
    },
    {
      "epoch": 0.5411323422556509,
      "grad_norm": 0.5680590867996216,
      "learning_rate": 8.976496247438441e-06,
      "loss": 0.0415,
      "step": 330660
    },
    {
      "epoch": 0.5411650726943042,
      "grad_norm": 0.6646387577056885,
      "learning_rate": 8.976430355224925e-06,
      "loss": 0.024,
      "step": 330680
    },
    {
      "epoch": 0.5411978031329576,
      "grad_norm": 1.8617688417434692,
      "learning_rate": 8.976364463011407e-06,
      "loss": 0.0322,
      "step": 330700
    },
    {
      "epoch": 0.5412305335716109,
      "grad_norm": 2.038086414337158,
      "learning_rate": 8.97629857079789e-06,
      "loss": 0.029,
      "step": 330720
    },
    {
      "epoch": 0.5412632640102643,
      "grad_norm": 0.26957982778549194,
      "learning_rate": 8.976232678584372e-06,
      "loss": 0.0328,
      "step": 330740
    },
    {
      "epoch": 0.5412959944489176,
      "grad_norm": 1.4598798751831055,
      "learning_rate": 8.976166786370856e-06,
      "loss": 0.0346,
      "step": 330760
    },
    {
      "epoch": 0.5413287248875709,
      "grad_norm": 3.7878177165985107,
      "learning_rate": 8.976100894157338e-06,
      "loss": 0.0406,
      "step": 330780
    },
    {
      "epoch": 0.5413614553262243,
      "grad_norm": 1.1102524995803833,
      "learning_rate": 8.976035001943821e-06,
      "loss": 0.0398,
      "step": 330800
    },
    {
      "epoch": 0.5413941857648776,
      "grad_norm": 5.664592742919922,
      "learning_rate": 8.975969109730303e-06,
      "loss": 0.0374,
      "step": 330820
    },
    {
      "epoch": 0.541426916203531,
      "grad_norm": 0.8664678335189819,
      "learning_rate": 8.975903217516787e-06,
      "loss": 0.0369,
      "step": 330840
    },
    {
      "epoch": 0.5414596466421843,
      "grad_norm": 0.3233896493911743,
      "learning_rate": 8.975837325303269e-06,
      "loss": 0.0417,
      "step": 330860
    },
    {
      "epoch": 0.5414923770808376,
      "grad_norm": 1.2830026149749756,
      "learning_rate": 8.975771433089752e-06,
      "loss": 0.0324,
      "step": 330880
    },
    {
      "epoch": 0.541525107519491,
      "grad_norm": 2.2741873264312744,
      "learning_rate": 8.975705540876236e-06,
      "loss": 0.0545,
      "step": 330900
    },
    {
      "epoch": 0.5415578379581443,
      "grad_norm": 1.8338936567306519,
      "learning_rate": 8.975639648662718e-06,
      "loss": 0.0371,
      "step": 330920
    },
    {
      "epoch": 0.5415905683967976,
      "grad_norm": 0.6407508850097656,
      "learning_rate": 8.975573756449201e-06,
      "loss": 0.0372,
      "step": 330940
    },
    {
      "epoch": 0.541623298835451,
      "grad_norm": 0.38008126616477966,
      "learning_rate": 8.975507864235685e-06,
      "loss": 0.044,
      "step": 330960
    },
    {
      "epoch": 0.5416560292741043,
      "grad_norm": 1.9236128330230713,
      "learning_rate": 8.975441972022167e-06,
      "loss": 0.0341,
      "step": 330980
    },
    {
      "epoch": 0.5416887597127577,
      "grad_norm": 1.9496761560440063,
      "learning_rate": 8.97537607980865e-06,
      "loss": 0.0439,
      "step": 331000
    },
    {
      "epoch": 0.541721490151411,
      "grad_norm": 1.4349949359893799,
      "learning_rate": 8.975310187595132e-06,
      "loss": 0.0347,
      "step": 331020
    },
    {
      "epoch": 0.5417542205900644,
      "grad_norm": 0.9580225348472595,
      "learning_rate": 8.975244295381616e-06,
      "loss": 0.0332,
      "step": 331040
    },
    {
      "epoch": 0.5417869510287177,
      "grad_norm": 0.5593944787979126,
      "learning_rate": 8.9751784031681e-06,
      "loss": 0.0308,
      "step": 331060
    },
    {
      "epoch": 0.541819681467371,
      "grad_norm": 0.3873814344406128,
      "learning_rate": 8.975112510954581e-06,
      "loss": 0.0394,
      "step": 331080
    },
    {
      "epoch": 0.5418524119060244,
      "grad_norm": 0.9836968779563904,
      "learning_rate": 8.975046618741065e-06,
      "loss": 0.0402,
      "step": 331100
    },
    {
      "epoch": 0.5418851423446777,
      "grad_norm": 1.404059886932373,
      "learning_rate": 8.974980726527547e-06,
      "loss": 0.0339,
      "step": 331120
    },
    {
      "epoch": 0.541917872783331,
      "grad_norm": 1.9889616966247559,
      "learning_rate": 8.97491483431403e-06,
      "loss": 0.0466,
      "step": 331140
    },
    {
      "epoch": 0.5419506032219844,
      "grad_norm": 1.1943939924240112,
      "learning_rate": 8.974848942100512e-06,
      "loss": 0.0371,
      "step": 331160
    },
    {
      "epoch": 0.5419833336606377,
      "grad_norm": 0.9256513118743896,
      "learning_rate": 8.974783049886996e-06,
      "loss": 0.0333,
      "step": 331180
    },
    {
      "epoch": 0.542016064099291,
      "grad_norm": 1.0161782503128052,
      "learning_rate": 8.974717157673478e-06,
      "loss": 0.0266,
      "step": 331200
    },
    {
      "epoch": 0.5420487945379444,
      "grad_norm": 0.8991899490356445,
      "learning_rate": 8.974651265459961e-06,
      "loss": 0.0387,
      "step": 331220
    },
    {
      "epoch": 0.5420815249765978,
      "grad_norm": 0.9774160385131836,
      "learning_rate": 8.974585373246443e-06,
      "loss": 0.045,
      "step": 331240
    },
    {
      "epoch": 0.542114255415251,
      "grad_norm": 2.151844024658203,
      "learning_rate": 8.974519481032927e-06,
      "loss": 0.0357,
      "step": 331260
    },
    {
      "epoch": 0.5421469858539044,
      "grad_norm": 6.518030166625977,
      "learning_rate": 8.97445358881941e-06,
      "loss": 0.0417,
      "step": 331280
    },
    {
      "epoch": 0.5421797162925578,
      "grad_norm": 0.537883460521698,
      "learning_rate": 8.974387696605892e-06,
      "loss": 0.0292,
      "step": 331300
    },
    {
      "epoch": 0.542212446731211,
      "grad_norm": 0.5348183512687683,
      "learning_rate": 8.974321804392376e-06,
      "loss": 0.0349,
      "step": 331320
    },
    {
      "epoch": 0.5422451771698644,
      "grad_norm": 1.857779860496521,
      "learning_rate": 8.97425591217886e-06,
      "loss": 0.0354,
      "step": 331340
    },
    {
      "epoch": 0.5422779076085178,
      "grad_norm": 1.433858036994934,
      "learning_rate": 8.974190019965341e-06,
      "loss": 0.0338,
      "step": 331360
    },
    {
      "epoch": 0.5423106380471711,
      "grad_norm": 2.1096856594085693,
      "learning_rate": 8.974124127751825e-06,
      "loss": 0.0317,
      "step": 331380
    },
    {
      "epoch": 0.5423433684858244,
      "grad_norm": 0.7657150626182556,
      "learning_rate": 8.974058235538308e-06,
      "loss": 0.0357,
      "step": 331400
    },
    {
      "epoch": 0.5423760989244778,
      "grad_norm": 1.402982473373413,
      "learning_rate": 8.97399234332479e-06,
      "loss": 0.0379,
      "step": 331420
    },
    {
      "epoch": 0.5424088293631312,
      "grad_norm": 1.7650994062423706,
      "learning_rate": 8.973926451111274e-06,
      "loss": 0.0382,
      "step": 331440
    },
    {
      "epoch": 0.5424415598017844,
      "grad_norm": 0.9663903117179871,
      "learning_rate": 8.973860558897756e-06,
      "loss": 0.0317,
      "step": 331460
    },
    {
      "epoch": 0.5424742902404378,
      "grad_norm": 2.4389612674713135,
      "learning_rate": 8.97379466668424e-06,
      "loss": 0.0255,
      "step": 331480
    },
    {
      "epoch": 0.5425070206790912,
      "grad_norm": 1.2846627235412598,
      "learning_rate": 8.973728774470721e-06,
      "loss": 0.0318,
      "step": 331500
    },
    {
      "epoch": 0.5425397511177444,
      "grad_norm": 0.9130985140800476,
      "learning_rate": 8.973662882257205e-06,
      "loss": 0.0402,
      "step": 331520
    },
    {
      "epoch": 0.5425724815563978,
      "grad_norm": 0.836534321308136,
      "learning_rate": 8.973596990043687e-06,
      "loss": 0.0383,
      "step": 331540
    },
    {
      "epoch": 0.5426052119950512,
      "grad_norm": 0.9559293389320374,
      "learning_rate": 8.97353109783017e-06,
      "loss": 0.0389,
      "step": 331560
    },
    {
      "epoch": 0.5426379424337044,
      "grad_norm": 1.3224494457244873,
      "learning_rate": 8.973465205616652e-06,
      "loss": 0.0429,
      "step": 331580
    },
    {
      "epoch": 0.5426706728723578,
      "grad_norm": 1.1716501712799072,
      "learning_rate": 8.973399313403136e-06,
      "loss": 0.0325,
      "step": 331600
    },
    {
      "epoch": 0.5427034033110112,
      "grad_norm": 1.5518505573272705,
      "learning_rate": 8.973333421189618e-06,
      "loss": 0.0379,
      "step": 331620
    },
    {
      "epoch": 0.5427361337496646,
      "grad_norm": 0.5465868711471558,
      "learning_rate": 8.973267528976101e-06,
      "loss": 0.0284,
      "step": 331640
    },
    {
      "epoch": 0.5427688641883178,
      "grad_norm": 0.2487715780735016,
      "learning_rate": 8.973201636762583e-06,
      "loss": 0.0416,
      "step": 331660
    },
    {
      "epoch": 0.5428015946269712,
      "grad_norm": 0.8205496668815613,
      "learning_rate": 8.973135744549067e-06,
      "loss": 0.0254,
      "step": 331680
    },
    {
      "epoch": 0.5428343250656246,
      "grad_norm": 1.7132130861282349,
      "learning_rate": 8.97306985233555e-06,
      "loss": 0.0293,
      "step": 331700
    },
    {
      "epoch": 0.5428670555042778,
      "grad_norm": 0.21913021802902222,
      "learning_rate": 8.973003960122032e-06,
      "loss": 0.0537,
      "step": 331720
    },
    {
      "epoch": 0.5428997859429312,
      "grad_norm": 0.39626166224479675,
      "learning_rate": 8.972938067908516e-06,
      "loss": 0.0331,
      "step": 331740
    },
    {
      "epoch": 0.5429325163815846,
      "grad_norm": 1.2203658819198608,
      "learning_rate": 8.972872175695e-06,
      "loss": 0.0356,
      "step": 331760
    },
    {
      "epoch": 0.5429652468202378,
      "grad_norm": 3.7736356258392334,
      "learning_rate": 8.972806283481481e-06,
      "loss": 0.049,
      "step": 331780
    },
    {
      "epoch": 0.5429979772588912,
      "grad_norm": 0.6451684832572937,
      "learning_rate": 8.972740391267965e-06,
      "loss": 0.0426,
      "step": 331800
    },
    {
      "epoch": 0.5430307076975446,
      "grad_norm": 4.05162239074707,
      "learning_rate": 8.972674499054449e-06,
      "loss": 0.0491,
      "step": 331820
    },
    {
      "epoch": 0.543063438136198,
      "grad_norm": 0.4189039170742035,
      "learning_rate": 8.97260860684093e-06,
      "loss": 0.0354,
      "step": 331840
    },
    {
      "epoch": 0.5430961685748512,
      "grad_norm": 2.1882855892181396,
      "learning_rate": 8.972542714627414e-06,
      "loss": 0.0274,
      "step": 331860
    },
    {
      "epoch": 0.5431288990135046,
      "grad_norm": 1.7970136404037476,
      "learning_rate": 8.972476822413896e-06,
      "loss": 0.0321,
      "step": 331880
    },
    {
      "epoch": 0.543161629452158,
      "grad_norm": 1.21853768825531,
      "learning_rate": 8.97241093020038e-06,
      "loss": 0.0346,
      "step": 331900
    },
    {
      "epoch": 0.5431943598908112,
      "grad_norm": 1.3439377546310425,
      "learning_rate": 8.972345037986861e-06,
      "loss": 0.033,
      "step": 331920
    },
    {
      "epoch": 0.5432270903294646,
      "grad_norm": 1.6968191862106323,
      "learning_rate": 8.972279145773345e-06,
      "loss": 0.0428,
      "step": 331940
    },
    {
      "epoch": 0.543259820768118,
      "grad_norm": 0.6735855340957642,
      "learning_rate": 8.972213253559827e-06,
      "loss": 0.026,
      "step": 331960
    },
    {
      "epoch": 0.5432925512067712,
      "grad_norm": 0.6803213357925415,
      "learning_rate": 8.97214736134631e-06,
      "loss": 0.052,
      "step": 331980
    },
    {
      "epoch": 0.5433252816454246,
      "grad_norm": 1.9178441762924194,
      "learning_rate": 8.972081469132792e-06,
      "loss": 0.0311,
      "step": 332000
    },
    {
      "epoch": 0.543358012084078,
      "grad_norm": 1.1117820739746094,
      "learning_rate": 8.972015576919276e-06,
      "loss": 0.0409,
      "step": 332020
    },
    {
      "epoch": 0.5433907425227313,
      "grad_norm": 0.9631839394569397,
      "learning_rate": 8.971949684705758e-06,
      "loss": 0.0475,
      "step": 332040
    },
    {
      "epoch": 0.5434234729613846,
      "grad_norm": 1.0417673587799072,
      "learning_rate": 8.971883792492241e-06,
      "loss": 0.0408,
      "step": 332060
    },
    {
      "epoch": 0.543456203400038,
      "grad_norm": 0.10917074233293533,
      "learning_rate": 8.971817900278725e-06,
      "loss": 0.0428,
      "step": 332080
    },
    {
      "epoch": 0.5434889338386913,
      "grad_norm": 1.665522575378418,
      "learning_rate": 8.971752008065207e-06,
      "loss": 0.0367,
      "step": 332100
    },
    {
      "epoch": 0.5435216642773446,
      "grad_norm": 0.550377607345581,
      "learning_rate": 8.97168611585169e-06,
      "loss": 0.0351,
      "step": 332120
    },
    {
      "epoch": 0.543554394715998,
      "grad_norm": 0.5851130485534668,
      "learning_rate": 8.971620223638174e-06,
      "loss": 0.0393,
      "step": 332140
    },
    {
      "epoch": 0.5435871251546514,
      "grad_norm": 2.031721830368042,
      "learning_rate": 8.971554331424656e-06,
      "loss": 0.0417,
      "step": 332160
    },
    {
      "epoch": 0.5436198555933046,
      "grad_norm": 1.790213942527771,
      "learning_rate": 8.97148843921114e-06,
      "loss": 0.0423,
      "step": 332180
    },
    {
      "epoch": 0.543652586031958,
      "grad_norm": 10.915375709533691,
      "learning_rate": 8.971422546997623e-06,
      "loss": 0.0377,
      "step": 332200
    },
    {
      "epoch": 0.5436853164706114,
      "grad_norm": 0.5706985592842102,
      "learning_rate": 8.971356654784105e-06,
      "loss": 0.042,
      "step": 332220
    },
    {
      "epoch": 0.5437180469092647,
      "grad_norm": 1.0025652647018433,
      "learning_rate": 8.971290762570589e-06,
      "loss": 0.0369,
      "step": 332240
    },
    {
      "epoch": 0.543750777347918,
      "grad_norm": 0.6449366211891174,
      "learning_rate": 8.97122487035707e-06,
      "loss": 0.0459,
      "step": 332260
    },
    {
      "epoch": 0.5437835077865714,
      "grad_norm": 6.0812859535217285,
      "learning_rate": 8.971158978143554e-06,
      "loss": 0.0357,
      "step": 332280
    },
    {
      "epoch": 0.5438162382252247,
      "grad_norm": 4.9538655281066895,
      "learning_rate": 8.971093085930036e-06,
      "loss": 0.0436,
      "step": 332300
    },
    {
      "epoch": 0.543848968663878,
      "grad_norm": 1.3775074481964111,
      "learning_rate": 8.97102719371652e-06,
      "loss": 0.0352,
      "step": 332320
    },
    {
      "epoch": 0.5438816991025314,
      "grad_norm": 1.610912561416626,
      "learning_rate": 8.970961301503001e-06,
      "loss": 0.0329,
      "step": 332340
    },
    {
      "epoch": 0.5439144295411847,
      "grad_norm": 2.078630208969116,
      "learning_rate": 8.970895409289485e-06,
      "loss": 0.0359,
      "step": 332360
    },
    {
      "epoch": 0.543947159979838,
      "grad_norm": 5.126528739929199,
      "learning_rate": 8.970829517075967e-06,
      "loss": 0.0376,
      "step": 332380
    },
    {
      "epoch": 0.5439798904184914,
      "grad_norm": 0.9343587160110474,
      "learning_rate": 8.97076362486245e-06,
      "loss": 0.0408,
      "step": 332400
    },
    {
      "epoch": 0.5440126208571447,
      "grad_norm": 1.1691462993621826,
      "learning_rate": 8.970697732648932e-06,
      "loss": 0.0378,
      "step": 332420
    },
    {
      "epoch": 0.5440453512957981,
      "grad_norm": 1.0945476293563843,
      "learning_rate": 8.970631840435416e-06,
      "loss": 0.0454,
      "step": 332440
    },
    {
      "epoch": 0.5440780817344514,
      "grad_norm": 2.527714967727661,
      "learning_rate": 8.9705659482219e-06,
      "loss": 0.0402,
      "step": 332460
    },
    {
      "epoch": 0.5441108121731048,
      "grad_norm": 1.8016126155853271,
      "learning_rate": 8.970500056008382e-06,
      "loss": 0.0425,
      "step": 332480
    },
    {
      "epoch": 0.5441435426117581,
      "grad_norm": 3.524869918823242,
      "learning_rate": 8.970434163794865e-06,
      "loss": 0.0444,
      "step": 332500
    },
    {
      "epoch": 0.5441762730504114,
      "grad_norm": 0.4316956400871277,
      "learning_rate": 8.970368271581347e-06,
      "loss": 0.038,
      "step": 332520
    },
    {
      "epoch": 0.5442090034890648,
      "grad_norm": 2.2635769844055176,
      "learning_rate": 8.97030237936783e-06,
      "loss": 0.0394,
      "step": 332540
    },
    {
      "epoch": 0.5442417339277181,
      "grad_norm": 1.0703719854354858,
      "learning_rate": 8.970236487154314e-06,
      "loss": 0.0369,
      "step": 332560
    },
    {
      "epoch": 0.5442744643663714,
      "grad_norm": 0.7998769879341125,
      "learning_rate": 8.970170594940796e-06,
      "loss": 0.0263,
      "step": 332580
    },
    {
      "epoch": 0.5443071948050248,
      "grad_norm": 0.393924742937088,
      "learning_rate": 8.97010470272728e-06,
      "loss": 0.0241,
      "step": 332600
    },
    {
      "epoch": 0.5443399252436781,
      "grad_norm": 2.8352761268615723,
      "learning_rate": 8.970038810513763e-06,
      "loss": 0.0415,
      "step": 332620
    },
    {
      "epoch": 0.5443726556823315,
      "grad_norm": 1.2471132278442383,
      "learning_rate": 8.969972918300245e-06,
      "loss": 0.0574,
      "step": 332640
    },
    {
      "epoch": 0.5444053861209848,
      "grad_norm": 2.1444509029388428,
      "learning_rate": 8.969907026086729e-06,
      "loss": 0.0347,
      "step": 332660
    },
    {
      "epoch": 0.5444381165596381,
      "grad_norm": 2.222984790802002,
      "learning_rate": 8.96984113387321e-06,
      "loss": 0.0387,
      "step": 332680
    },
    {
      "epoch": 0.5444708469982915,
      "grad_norm": 3.207702398300171,
      "learning_rate": 8.969775241659694e-06,
      "loss": 0.0337,
      "step": 332700
    },
    {
      "epoch": 0.5445035774369448,
      "grad_norm": 0.8982239961624146,
      "learning_rate": 8.969709349446176e-06,
      "loss": 0.0369,
      "step": 332720
    },
    {
      "epoch": 0.5445363078755981,
      "grad_norm": 0.5166094899177551,
      "learning_rate": 8.96964345723266e-06,
      "loss": 0.0387,
      "step": 332740
    },
    {
      "epoch": 0.5445690383142515,
      "grad_norm": 0.9454465508460999,
      "learning_rate": 8.969577565019142e-06,
      "loss": 0.0424,
      "step": 332760
    },
    {
      "epoch": 0.5446017687529048,
      "grad_norm": 1.1906675100326538,
      "learning_rate": 8.969511672805625e-06,
      "loss": 0.038,
      "step": 332780
    },
    {
      "epoch": 0.5446344991915582,
      "grad_norm": 1.8740566968917847,
      "learning_rate": 8.969445780592109e-06,
      "loss": 0.03,
      "step": 332800
    },
    {
      "epoch": 0.5446672296302115,
      "grad_norm": 1.2690058946609497,
      "learning_rate": 8.96937988837859e-06,
      "loss": 0.0355,
      "step": 332820
    },
    {
      "epoch": 0.5446999600688649,
      "grad_norm": 2.010822296142578,
      "learning_rate": 8.969313996165074e-06,
      "loss": 0.0457,
      "step": 332840
    },
    {
      "epoch": 0.5447326905075182,
      "grad_norm": 0.7949016690254211,
      "learning_rate": 8.969248103951556e-06,
      "loss": 0.0403,
      "step": 332860
    },
    {
      "epoch": 0.5447654209461715,
      "grad_norm": 0.34972083568573,
      "learning_rate": 8.96918221173804e-06,
      "loss": 0.0338,
      "step": 332880
    },
    {
      "epoch": 0.5447981513848249,
      "grad_norm": 4.305199146270752,
      "learning_rate": 8.969116319524522e-06,
      "loss": 0.0383,
      "step": 332900
    },
    {
      "epoch": 0.5448308818234782,
      "grad_norm": 1.4383975267410278,
      "learning_rate": 8.969050427311005e-06,
      "loss": 0.0478,
      "step": 332920
    },
    {
      "epoch": 0.5448636122621315,
      "grad_norm": 1.6670091152191162,
      "learning_rate": 8.968984535097489e-06,
      "loss": 0.0321,
      "step": 332940
    },
    {
      "epoch": 0.5448963427007849,
      "grad_norm": 0.7217456698417664,
      "learning_rate": 8.96891864288397e-06,
      "loss": 0.039,
      "step": 332960
    },
    {
      "epoch": 0.5449290731394382,
      "grad_norm": 0.3853898048400879,
      "learning_rate": 8.968852750670454e-06,
      "loss": 0.0294,
      "step": 332980
    },
    {
      "epoch": 0.5449618035780915,
      "grad_norm": 2.7774972915649414,
      "learning_rate": 8.968786858456938e-06,
      "loss": 0.0421,
      "step": 333000
    },
    {
      "epoch": 0.5449945340167449,
      "grad_norm": 2.1102852821350098,
      "learning_rate": 8.96872096624342e-06,
      "loss": 0.0321,
      "step": 333020
    },
    {
      "epoch": 0.5450272644553983,
      "grad_norm": 0.6705310344696045,
      "learning_rate": 8.968655074029903e-06,
      "loss": 0.0404,
      "step": 333040
    },
    {
      "epoch": 0.5450599948940515,
      "grad_norm": 1.6213459968566895,
      "learning_rate": 8.968589181816385e-06,
      "loss": 0.0387,
      "step": 333060
    },
    {
      "epoch": 0.5450927253327049,
      "grad_norm": 7.62994384765625,
      "learning_rate": 8.968523289602869e-06,
      "loss": 0.0411,
      "step": 333080
    },
    {
      "epoch": 0.5451254557713583,
      "grad_norm": 0.7315056324005127,
      "learning_rate": 8.96845739738935e-06,
      "loss": 0.0445,
      "step": 333100
    },
    {
      "epoch": 0.5451581862100116,
      "grad_norm": 1.0889523029327393,
      "learning_rate": 8.968391505175834e-06,
      "loss": 0.0261,
      "step": 333120
    },
    {
      "epoch": 0.5451909166486649,
      "grad_norm": 1.8167740106582642,
      "learning_rate": 8.968325612962318e-06,
      "loss": 0.0368,
      "step": 333140
    },
    {
      "epoch": 0.5452236470873183,
      "grad_norm": 1.2019239664077759,
      "learning_rate": 8.9682597207488e-06,
      "loss": 0.037,
      "step": 333160
    },
    {
      "epoch": 0.5452563775259716,
      "grad_norm": 0.6994615197181702,
      "learning_rate": 8.968193828535283e-06,
      "loss": 0.0384,
      "step": 333180
    },
    {
      "epoch": 0.5452891079646249,
      "grad_norm": 2.384798288345337,
      "learning_rate": 8.968127936321765e-06,
      "loss": 0.0377,
      "step": 333200
    },
    {
      "epoch": 0.5453218384032783,
      "grad_norm": 0.6612632870674133,
      "learning_rate": 8.968062044108249e-06,
      "loss": 0.0297,
      "step": 333220
    },
    {
      "epoch": 0.5453545688419317,
      "grad_norm": 1.2749552726745605,
      "learning_rate": 8.96799615189473e-06,
      "loss": 0.0374,
      "step": 333240
    },
    {
      "epoch": 0.5453872992805849,
      "grad_norm": 1.686181664466858,
      "learning_rate": 8.967930259681214e-06,
      "loss": 0.0287,
      "step": 333260
    },
    {
      "epoch": 0.5454200297192383,
      "grad_norm": 1.4439154863357544,
      "learning_rate": 8.967864367467696e-06,
      "loss": 0.0418,
      "step": 333280
    },
    {
      "epoch": 0.5454527601578917,
      "grad_norm": 1.010648488998413,
      "learning_rate": 8.96779847525418e-06,
      "loss": 0.0461,
      "step": 333300
    },
    {
      "epoch": 0.5454854905965449,
      "grad_norm": 2.3811025619506836,
      "learning_rate": 8.967732583040663e-06,
      "loss": 0.0377,
      "step": 333320
    },
    {
      "epoch": 0.5455182210351983,
      "grad_norm": 0.9254149794578552,
      "learning_rate": 8.967666690827145e-06,
      "loss": 0.0407,
      "step": 333340
    },
    {
      "epoch": 0.5455509514738517,
      "grad_norm": 1.2793340682983398,
      "learning_rate": 8.967600798613629e-06,
      "loss": 0.036,
      "step": 333360
    },
    {
      "epoch": 0.545583681912505,
      "grad_norm": 0.4975568950176239,
      "learning_rate": 8.967534906400112e-06,
      "loss": 0.0359,
      "step": 333380
    },
    {
      "epoch": 0.5456164123511583,
      "grad_norm": 2.124960422515869,
      "learning_rate": 8.967469014186594e-06,
      "loss": 0.039,
      "step": 333400
    },
    {
      "epoch": 0.5456491427898117,
      "grad_norm": 1.4476100206375122,
      "learning_rate": 8.967403121973078e-06,
      "loss": 0.0413,
      "step": 333420
    },
    {
      "epoch": 0.545681873228465,
      "grad_norm": 1.3438977003097534,
      "learning_rate": 8.96733722975956e-06,
      "loss": 0.0368,
      "step": 333440
    },
    {
      "epoch": 0.5457146036671183,
      "grad_norm": 1.3406198024749756,
      "learning_rate": 8.967271337546043e-06,
      "loss": 0.0394,
      "step": 333460
    },
    {
      "epoch": 0.5457473341057717,
      "grad_norm": 1.0882210731506348,
      "learning_rate": 8.967205445332525e-06,
      "loss": 0.0392,
      "step": 333480
    },
    {
      "epoch": 0.5457800645444251,
      "grad_norm": 2.2627711296081543,
      "learning_rate": 8.967139553119009e-06,
      "loss": 0.0336,
      "step": 333500
    },
    {
      "epoch": 0.5458127949830783,
      "grad_norm": 1.4990675449371338,
      "learning_rate": 8.967073660905492e-06,
      "loss": 0.041,
      "step": 333520
    },
    {
      "epoch": 0.5458455254217317,
      "grad_norm": 0.29865363240242004,
      "learning_rate": 8.967007768691974e-06,
      "loss": 0.0388,
      "step": 333540
    },
    {
      "epoch": 0.5458782558603851,
      "grad_norm": 1.242926001548767,
      "learning_rate": 8.966941876478458e-06,
      "loss": 0.0343,
      "step": 333560
    },
    {
      "epoch": 0.5459109862990383,
      "grad_norm": 1.075952172279358,
      "learning_rate": 8.96687598426494e-06,
      "loss": 0.0467,
      "step": 333580
    },
    {
      "epoch": 0.5459437167376917,
      "grad_norm": 1.34804105758667,
      "learning_rate": 8.966810092051423e-06,
      "loss": 0.0434,
      "step": 333600
    },
    {
      "epoch": 0.5459764471763451,
      "grad_norm": 0.8342357873916626,
      "learning_rate": 8.966744199837905e-06,
      "loss": 0.0362,
      "step": 333620
    },
    {
      "epoch": 0.5460091776149983,
      "grad_norm": 1.066033959388733,
      "learning_rate": 8.966678307624389e-06,
      "loss": 0.0388,
      "step": 333640
    },
    {
      "epoch": 0.5460419080536517,
      "grad_norm": 1.760301947593689,
      "learning_rate": 8.96661241541087e-06,
      "loss": 0.0302,
      "step": 333660
    },
    {
      "epoch": 0.5460746384923051,
      "grad_norm": 1.5114150047302246,
      "learning_rate": 8.966546523197354e-06,
      "loss": 0.0338,
      "step": 333680
    },
    {
      "epoch": 0.5461073689309585,
      "grad_norm": 1.0202769041061401,
      "learning_rate": 8.966480630983836e-06,
      "loss": 0.035,
      "step": 333700
    },
    {
      "epoch": 0.5461400993696117,
      "grad_norm": 0.4778885841369629,
      "learning_rate": 8.96641473877032e-06,
      "loss": 0.0316,
      "step": 333720
    },
    {
      "epoch": 0.5461728298082651,
      "grad_norm": 0.9931638240814209,
      "learning_rate": 8.966348846556803e-06,
      "loss": 0.0347,
      "step": 333740
    },
    {
      "epoch": 0.5462055602469185,
      "grad_norm": 3.3041892051696777,
      "learning_rate": 8.966282954343285e-06,
      "loss": 0.046,
      "step": 333760
    },
    {
      "epoch": 0.5462382906855717,
      "grad_norm": 1.368299961090088,
      "learning_rate": 8.966217062129769e-06,
      "loss": 0.0386,
      "step": 333780
    },
    {
      "epoch": 0.5462710211242251,
      "grad_norm": 1.4729530811309814,
      "learning_rate": 8.966151169916252e-06,
      "loss": 0.0427,
      "step": 333800
    },
    {
      "epoch": 0.5463037515628785,
      "grad_norm": 0.636819064617157,
      "learning_rate": 8.966085277702734e-06,
      "loss": 0.0425,
      "step": 333820
    },
    {
      "epoch": 0.5463364820015317,
      "grad_norm": 2.3099615573883057,
      "learning_rate": 8.966019385489218e-06,
      "loss": 0.0348,
      "step": 333840
    },
    {
      "epoch": 0.5463692124401851,
      "grad_norm": 1.4845938682556152,
      "learning_rate": 8.965953493275702e-06,
      "loss": 0.0281,
      "step": 333860
    },
    {
      "epoch": 0.5464019428788385,
      "grad_norm": 1.5467571020126343,
      "learning_rate": 8.965887601062183e-06,
      "loss": 0.0355,
      "step": 333880
    },
    {
      "epoch": 0.5464346733174918,
      "grad_norm": 1.36806321144104,
      "learning_rate": 8.965821708848667e-06,
      "loss": 0.0628,
      "step": 333900
    },
    {
      "epoch": 0.5464674037561451,
      "grad_norm": 5.555110931396484,
      "learning_rate": 8.965755816635149e-06,
      "loss": 0.0357,
      "step": 333920
    },
    {
      "epoch": 0.5465001341947985,
      "grad_norm": 1.928868055343628,
      "learning_rate": 8.965689924421633e-06,
      "loss": 0.049,
      "step": 333940
    },
    {
      "epoch": 0.5465328646334519,
      "grad_norm": 0.6919645667076111,
      "learning_rate": 8.965624032208114e-06,
      "loss": 0.053,
      "step": 333960
    },
    {
      "epoch": 0.5465655950721051,
      "grad_norm": 0.6845467686653137,
      "learning_rate": 8.965558139994598e-06,
      "loss": 0.0322,
      "step": 333980
    },
    {
      "epoch": 0.5465983255107585,
      "grad_norm": 1.597718358039856,
      "learning_rate": 8.96549224778108e-06,
      "loss": 0.044,
      "step": 334000
    },
    {
      "epoch": 0.5466310559494119,
      "grad_norm": 1.2899712324142456,
      "learning_rate": 8.965426355567563e-06,
      "loss": 0.0312,
      "step": 334020
    },
    {
      "epoch": 0.5466637863880651,
      "grad_norm": 1.3871409893035889,
      "learning_rate": 8.965360463354045e-06,
      "loss": 0.0439,
      "step": 334040
    },
    {
      "epoch": 0.5466965168267185,
      "grad_norm": 0.63660728931427,
      "learning_rate": 8.965294571140529e-06,
      "loss": 0.0328,
      "step": 334060
    },
    {
      "epoch": 0.5467292472653719,
      "grad_norm": 1.4383480548858643,
      "learning_rate": 8.965228678927011e-06,
      "loss": 0.0378,
      "step": 334080
    },
    {
      "epoch": 0.5467619777040252,
      "grad_norm": 0.6736664772033691,
      "learning_rate": 8.965162786713494e-06,
      "loss": 0.0413,
      "step": 334100
    },
    {
      "epoch": 0.5467947081426785,
      "grad_norm": 0.5794800519943237,
      "learning_rate": 8.965096894499978e-06,
      "loss": 0.034,
      "step": 334120
    },
    {
      "epoch": 0.5468274385813319,
      "grad_norm": 0.846928060054779,
      "learning_rate": 8.96503100228646e-06,
      "loss": 0.0345,
      "step": 334140
    },
    {
      "epoch": 0.5468601690199852,
      "grad_norm": 1.3792353868484497,
      "learning_rate": 8.964965110072944e-06,
      "loss": 0.0438,
      "step": 334160
    },
    {
      "epoch": 0.5468928994586385,
      "grad_norm": 1.7955900430679321,
      "learning_rate": 8.964899217859427e-06,
      "loss": 0.0296,
      "step": 334180
    },
    {
      "epoch": 0.5469256298972919,
      "grad_norm": 2.401549816131592,
      "learning_rate": 8.964833325645909e-06,
      "loss": 0.0395,
      "step": 334200
    },
    {
      "epoch": 0.5469583603359452,
      "grad_norm": 1.5179600715637207,
      "learning_rate": 8.964767433432393e-06,
      "loss": 0.0441,
      "step": 334220
    },
    {
      "epoch": 0.5469910907745985,
      "grad_norm": 1.432805061340332,
      "learning_rate": 8.964701541218876e-06,
      "loss": 0.0392,
      "step": 334240
    },
    {
      "epoch": 0.5470238212132519,
      "grad_norm": 1.0329205989837646,
      "learning_rate": 8.964635649005358e-06,
      "loss": 0.0492,
      "step": 334260
    },
    {
      "epoch": 0.5470565516519053,
      "grad_norm": 0.971979558467865,
      "learning_rate": 8.964569756791842e-06,
      "loss": 0.0331,
      "step": 334280
    },
    {
      "epoch": 0.5470892820905586,
      "grad_norm": 1.7008235454559326,
      "learning_rate": 8.964503864578324e-06,
      "loss": 0.0347,
      "step": 334300
    },
    {
      "epoch": 0.5471220125292119,
      "grad_norm": 1.176363229751587,
      "learning_rate": 8.964437972364807e-06,
      "loss": 0.029,
      "step": 334320
    },
    {
      "epoch": 0.5471547429678653,
      "grad_norm": 1.1539421081542969,
      "learning_rate": 8.964372080151289e-06,
      "loss": 0.0502,
      "step": 334340
    },
    {
      "epoch": 0.5471874734065186,
      "grad_norm": 1.6285256147384644,
      "learning_rate": 8.964306187937773e-06,
      "loss": 0.0473,
      "step": 334360
    },
    {
      "epoch": 0.5472202038451719,
      "grad_norm": 1.1206603050231934,
      "learning_rate": 8.964240295724254e-06,
      "loss": 0.0511,
      "step": 334380
    },
    {
      "epoch": 0.5472529342838253,
      "grad_norm": 2.0295441150665283,
      "learning_rate": 8.964174403510738e-06,
      "loss": 0.0276,
      "step": 334400
    },
    {
      "epoch": 0.5472856647224786,
      "grad_norm": 1.5945323705673218,
      "learning_rate": 8.96410851129722e-06,
      "loss": 0.0418,
      "step": 334420
    },
    {
      "epoch": 0.5473183951611319,
      "grad_norm": 0.4973371624946594,
      "learning_rate": 8.964042619083704e-06,
      "loss": 0.041,
      "step": 334440
    },
    {
      "epoch": 0.5473511255997853,
      "grad_norm": 1.5456939935684204,
      "learning_rate": 8.963976726870185e-06,
      "loss": 0.0447,
      "step": 334460
    },
    {
      "epoch": 0.5473838560384386,
      "grad_norm": 2.474158525466919,
      "learning_rate": 8.963910834656669e-06,
      "loss": 0.0377,
      "step": 334480
    },
    {
      "epoch": 0.547416586477092,
      "grad_norm": 3.44901180267334,
      "learning_rate": 8.963844942443151e-06,
      "loss": 0.0398,
      "step": 334500
    },
    {
      "epoch": 0.5474493169157453,
      "grad_norm": 0.25489169359207153,
      "learning_rate": 8.963779050229635e-06,
      "loss": 0.0317,
      "step": 334520
    },
    {
      "epoch": 0.5474820473543986,
      "grad_norm": 0.3540612459182739,
      "learning_rate": 8.963713158016118e-06,
      "loss": 0.0309,
      "step": 334540
    },
    {
      "epoch": 0.547514777793052,
      "grad_norm": 1.1663557291030884,
      "learning_rate": 8.9636472658026e-06,
      "loss": 0.0385,
      "step": 334560
    },
    {
      "epoch": 0.5475475082317053,
      "grad_norm": 0.5431115031242371,
      "learning_rate": 8.963581373589084e-06,
      "loss": 0.0319,
      "step": 334580
    },
    {
      "epoch": 0.5475802386703587,
      "grad_norm": 0.8235801458358765,
      "learning_rate": 8.963515481375567e-06,
      "loss": 0.0376,
      "step": 334600
    },
    {
      "epoch": 0.547612969109012,
      "grad_norm": 2.2316696643829346,
      "learning_rate": 8.963449589162049e-06,
      "loss": 0.0386,
      "step": 334620
    },
    {
      "epoch": 0.5476456995476653,
      "grad_norm": 1.5926690101623535,
      "learning_rate": 8.963383696948533e-06,
      "loss": 0.04,
      "step": 334640
    },
    {
      "epoch": 0.5476784299863187,
      "grad_norm": 1.4170560836791992,
      "learning_rate": 8.963317804735016e-06,
      "loss": 0.0422,
      "step": 334660
    },
    {
      "epoch": 0.547711160424972,
      "grad_norm": 2.029869794845581,
      "learning_rate": 8.963251912521498e-06,
      "loss": 0.0329,
      "step": 334680
    },
    {
      "epoch": 0.5477438908636254,
      "grad_norm": 3.080021381378174,
      "learning_rate": 8.963186020307982e-06,
      "loss": 0.0399,
      "step": 334700
    },
    {
      "epoch": 0.5477766213022787,
      "grad_norm": 0.5292752385139465,
      "learning_rate": 8.963120128094464e-06,
      "loss": 0.0293,
      "step": 334720
    },
    {
      "epoch": 0.547809351740932,
      "grad_norm": 0.6331883668899536,
      "learning_rate": 8.963054235880947e-06,
      "loss": 0.0398,
      "step": 334740
    },
    {
      "epoch": 0.5478420821795854,
      "grad_norm": 2.8075528144836426,
      "learning_rate": 8.962988343667429e-06,
      "loss": 0.0438,
      "step": 334760
    },
    {
      "epoch": 0.5478748126182387,
      "grad_norm": 3.7604548931121826,
      "learning_rate": 8.962922451453913e-06,
      "loss": 0.0366,
      "step": 334780
    },
    {
      "epoch": 0.547907543056892,
      "grad_norm": 1.6744040250778198,
      "learning_rate": 8.962856559240395e-06,
      "loss": 0.0332,
      "step": 334800
    },
    {
      "epoch": 0.5479402734955454,
      "grad_norm": 1.276239037513733,
      "learning_rate": 8.962790667026878e-06,
      "loss": 0.0549,
      "step": 334820
    },
    {
      "epoch": 0.5479730039341987,
      "grad_norm": 0.5278752446174622,
      "learning_rate": 8.96272477481336e-06,
      "loss": 0.0292,
      "step": 334840
    },
    {
      "epoch": 0.548005734372852,
      "grad_norm": 2.362839698791504,
      "learning_rate": 8.962658882599844e-06,
      "loss": 0.0384,
      "step": 334860
    },
    {
      "epoch": 0.5480384648115054,
      "grad_norm": 0.2990768849849701,
      "learning_rate": 8.962592990386326e-06,
      "loss": 0.0365,
      "step": 334880
    },
    {
      "epoch": 0.5480711952501588,
      "grad_norm": 2.4933929443359375,
      "learning_rate": 8.962527098172809e-06,
      "loss": 0.0395,
      "step": 334900
    },
    {
      "epoch": 0.548103925688812,
      "grad_norm": 1.8957074880599976,
      "learning_rate": 8.962461205959293e-06,
      "loss": 0.0476,
      "step": 334920
    },
    {
      "epoch": 0.5481366561274654,
      "grad_norm": 2.1014597415924072,
      "learning_rate": 8.962395313745775e-06,
      "loss": 0.0453,
      "step": 334940
    },
    {
      "epoch": 0.5481693865661188,
      "grad_norm": 0.5723299384117126,
      "learning_rate": 8.962329421532258e-06,
      "loss": 0.0318,
      "step": 334960
    },
    {
      "epoch": 0.5482021170047721,
      "grad_norm": 0.7821228504180908,
      "learning_rate": 8.962263529318742e-06,
      "loss": 0.0374,
      "step": 334980
    },
    {
      "epoch": 0.5482348474434254,
      "grad_norm": 1.8794361352920532,
      "learning_rate": 8.962197637105224e-06,
      "loss": 0.0529,
      "step": 335000
    },
    {
      "epoch": 0.5482675778820788,
      "grad_norm": 1.4488543272018433,
      "learning_rate": 8.962131744891707e-06,
      "loss": 0.0298,
      "step": 335020
    },
    {
      "epoch": 0.5483003083207321,
      "grad_norm": 2.5297651290893555,
      "learning_rate": 8.96206585267819e-06,
      "loss": 0.0399,
      "step": 335040
    },
    {
      "epoch": 0.5483330387593854,
      "grad_norm": 1.2021375894546509,
      "learning_rate": 8.961999960464673e-06,
      "loss": 0.0426,
      "step": 335060
    },
    {
      "epoch": 0.5483657691980388,
      "grad_norm": 2.027358293533325,
      "learning_rate": 8.961934068251156e-06,
      "loss": 0.0364,
      "step": 335080
    },
    {
      "epoch": 0.5483984996366922,
      "grad_norm": 1.0954055786132812,
      "learning_rate": 8.961868176037638e-06,
      "loss": 0.0367,
      "step": 335100
    },
    {
      "epoch": 0.5484312300753454,
      "grad_norm": 2.654125452041626,
      "learning_rate": 8.961802283824122e-06,
      "loss": 0.0306,
      "step": 335120
    },
    {
      "epoch": 0.5484639605139988,
      "grad_norm": 2.1004867553710938,
      "learning_rate": 8.961736391610604e-06,
      "loss": 0.0448,
      "step": 335140
    },
    {
      "epoch": 0.5484966909526522,
      "grad_norm": 0.27516159415245056,
      "learning_rate": 8.961670499397087e-06,
      "loss": 0.0309,
      "step": 335160
    },
    {
      "epoch": 0.5485294213913054,
      "grad_norm": 1.4451230764389038,
      "learning_rate": 8.96160460718357e-06,
      "loss": 0.0421,
      "step": 335180
    },
    {
      "epoch": 0.5485621518299588,
      "grad_norm": 0.8709158301353455,
      "learning_rate": 8.961538714970053e-06,
      "loss": 0.0366,
      "step": 335200
    },
    {
      "epoch": 0.5485948822686122,
      "grad_norm": 1.2798216342926025,
      "learning_rate": 8.961472822756535e-06,
      "loss": 0.0448,
      "step": 335220
    },
    {
      "epoch": 0.5486276127072655,
      "grad_norm": 0.7373136878013611,
      "learning_rate": 8.961406930543018e-06,
      "loss": 0.0308,
      "step": 335240
    },
    {
      "epoch": 0.5486603431459188,
      "grad_norm": 0.8852985501289368,
      "learning_rate": 8.961341038329502e-06,
      "loss": 0.0359,
      "step": 335260
    },
    {
      "epoch": 0.5486930735845722,
      "grad_norm": 2.740800380706787,
      "learning_rate": 8.961275146115984e-06,
      "loss": 0.0352,
      "step": 335280
    },
    {
      "epoch": 0.5487258040232256,
      "grad_norm": 1.423003911972046,
      "learning_rate": 8.961209253902467e-06,
      "loss": 0.0277,
      "step": 335300
    },
    {
      "epoch": 0.5487585344618788,
      "grad_norm": 0.3392758369445801,
      "learning_rate": 8.96114336168895e-06,
      "loss": 0.0259,
      "step": 335320
    },
    {
      "epoch": 0.5487912649005322,
      "grad_norm": 3.690211534500122,
      "learning_rate": 8.961077469475433e-06,
      "loss": 0.0448,
      "step": 335340
    },
    {
      "epoch": 0.5488239953391856,
      "grad_norm": 1.0653520822525024,
      "learning_rate": 8.961011577261915e-06,
      "loss": 0.0373,
      "step": 335360
    },
    {
      "epoch": 0.5488567257778388,
      "grad_norm": 1.2393088340759277,
      "learning_rate": 8.960945685048398e-06,
      "loss": 0.0361,
      "step": 335380
    },
    {
      "epoch": 0.5488894562164922,
      "grad_norm": 1.2040687799453735,
      "learning_rate": 8.960879792834882e-06,
      "loss": 0.0302,
      "step": 335400
    },
    {
      "epoch": 0.5489221866551456,
      "grad_norm": 1.2841713428497314,
      "learning_rate": 8.960813900621364e-06,
      "loss": 0.0336,
      "step": 335420
    },
    {
      "epoch": 0.5489549170937988,
      "grad_norm": 0.4347924590110779,
      "learning_rate": 8.960748008407847e-06,
      "loss": 0.0423,
      "step": 335440
    },
    {
      "epoch": 0.5489876475324522,
      "grad_norm": 0.56084144115448,
      "learning_rate": 8.960682116194331e-06,
      "loss": 0.0322,
      "step": 335460
    },
    {
      "epoch": 0.5490203779711056,
      "grad_norm": 1.3626246452331543,
      "learning_rate": 8.960616223980813e-06,
      "loss": 0.046,
      "step": 335480
    },
    {
      "epoch": 0.549053108409759,
      "grad_norm": 2.2656660079956055,
      "learning_rate": 8.960550331767296e-06,
      "loss": 0.051,
      "step": 335500
    },
    {
      "epoch": 0.5490858388484122,
      "grad_norm": 2.9622082710266113,
      "learning_rate": 8.960484439553778e-06,
      "loss": 0.0468,
      "step": 335520
    },
    {
      "epoch": 0.5491185692870656,
      "grad_norm": 1.366283655166626,
      "learning_rate": 8.960418547340262e-06,
      "loss": 0.034,
      "step": 335540
    },
    {
      "epoch": 0.549151299725719,
      "grad_norm": 7.430610179901123,
      "learning_rate": 8.960352655126744e-06,
      "loss": 0.0397,
      "step": 335560
    },
    {
      "epoch": 0.5491840301643722,
      "grad_norm": 2.3459019660949707,
      "learning_rate": 8.960286762913227e-06,
      "loss": 0.0473,
      "step": 335580
    },
    {
      "epoch": 0.5492167606030256,
      "grad_norm": 1.1671273708343506,
      "learning_rate": 8.960220870699711e-06,
      "loss": 0.0379,
      "step": 335600
    },
    {
      "epoch": 0.549249491041679,
      "grad_norm": 3.26604962348938,
      "learning_rate": 8.960154978486193e-06,
      "loss": 0.0436,
      "step": 335620
    },
    {
      "epoch": 0.5492822214803322,
      "grad_norm": 1.032629370689392,
      "learning_rate": 8.960089086272676e-06,
      "loss": 0.0445,
      "step": 335640
    },
    {
      "epoch": 0.5493149519189856,
      "grad_norm": 1.7710713148117065,
      "learning_rate": 8.960023194059158e-06,
      "loss": 0.0465,
      "step": 335660
    },
    {
      "epoch": 0.549347682357639,
      "grad_norm": 0.997576117515564,
      "learning_rate": 8.959957301845642e-06,
      "loss": 0.0429,
      "step": 335680
    },
    {
      "epoch": 0.5493804127962924,
      "grad_norm": 1.8666934967041016,
      "learning_rate": 8.959891409632124e-06,
      "loss": 0.0514,
      "step": 335700
    },
    {
      "epoch": 0.5494131432349456,
      "grad_norm": 0.30143171548843384,
      "learning_rate": 8.959825517418607e-06,
      "loss": 0.0414,
      "step": 335720
    },
    {
      "epoch": 0.549445873673599,
      "grad_norm": 4.733361721038818,
      "learning_rate": 8.95975962520509e-06,
      "loss": 0.0331,
      "step": 335740
    },
    {
      "epoch": 0.5494786041122524,
      "grad_norm": 1.042739748954773,
      "learning_rate": 8.959693732991573e-06,
      "loss": 0.0393,
      "step": 335760
    },
    {
      "epoch": 0.5495113345509056,
      "grad_norm": 0.6820273399353027,
      "learning_rate": 8.959627840778056e-06,
      "loss": 0.0444,
      "step": 335780
    },
    {
      "epoch": 0.549544064989559,
      "grad_norm": 5.307973384857178,
      "learning_rate": 8.959561948564538e-06,
      "loss": 0.0406,
      "step": 335800
    },
    {
      "epoch": 0.5495767954282124,
      "grad_norm": 1.3701200485229492,
      "learning_rate": 8.959496056351022e-06,
      "loss": 0.0386,
      "step": 335820
    },
    {
      "epoch": 0.5496095258668656,
      "grad_norm": 1.38232421875,
      "learning_rate": 8.959430164137506e-06,
      "loss": 0.0323,
      "step": 335840
    },
    {
      "epoch": 0.549642256305519,
      "grad_norm": 2.1658005714416504,
      "learning_rate": 8.959364271923987e-06,
      "loss": 0.0363,
      "step": 335860
    },
    {
      "epoch": 0.5496749867441724,
      "grad_norm": 0.7072117328643799,
      "learning_rate": 8.959298379710471e-06,
      "loss": 0.0363,
      "step": 335880
    },
    {
      "epoch": 0.5497077171828257,
      "grad_norm": 3.4017457962036133,
      "learning_rate": 8.959232487496953e-06,
      "loss": 0.0327,
      "step": 335900
    },
    {
      "epoch": 0.549740447621479,
      "grad_norm": 1.5462344884872437,
      "learning_rate": 8.959166595283436e-06,
      "loss": 0.028,
      "step": 335920
    },
    {
      "epoch": 0.5497731780601324,
      "grad_norm": 1.7483134269714355,
      "learning_rate": 8.959100703069918e-06,
      "loss": 0.0341,
      "step": 335940
    },
    {
      "epoch": 0.5498059084987857,
      "grad_norm": 1.1017640829086304,
      "learning_rate": 8.959034810856402e-06,
      "loss": 0.0394,
      "step": 335960
    },
    {
      "epoch": 0.549838638937439,
      "grad_norm": 1.1753038167953491,
      "learning_rate": 8.958968918642886e-06,
      "loss": 0.0352,
      "step": 335980
    },
    {
      "epoch": 0.5498713693760924,
      "grad_norm": 1.3087860345840454,
      "learning_rate": 8.958903026429367e-06,
      "loss": 0.0414,
      "step": 336000
    },
    {
      "epoch": 0.5499040998147458,
      "grad_norm": 1.0811316967010498,
      "learning_rate": 8.958837134215851e-06,
      "loss": 0.0393,
      "step": 336020
    },
    {
      "epoch": 0.549936830253399,
      "grad_norm": 1.2093905210494995,
      "learning_rate": 8.958771242002333e-06,
      "loss": 0.0313,
      "step": 336040
    },
    {
      "epoch": 0.5499695606920524,
      "grad_norm": 2.023289680480957,
      "learning_rate": 8.958705349788816e-06,
      "loss": 0.0326,
      "step": 336060
    },
    {
      "epoch": 0.5500022911307058,
      "grad_norm": 2.845653772354126,
      "learning_rate": 8.958639457575298e-06,
      "loss": 0.0509,
      "step": 336080
    },
    {
      "epoch": 0.550035021569359,
      "grad_norm": 2.1516265869140625,
      "learning_rate": 8.958573565361782e-06,
      "loss": 0.0321,
      "step": 336100
    },
    {
      "epoch": 0.5500677520080124,
      "grad_norm": 0.45207905769348145,
      "learning_rate": 8.958507673148264e-06,
      "loss": 0.0352,
      "step": 336120
    },
    {
      "epoch": 0.5501004824466658,
      "grad_norm": 0.8419932126998901,
      "learning_rate": 8.958441780934747e-06,
      "loss": 0.0488,
      "step": 336140
    },
    {
      "epoch": 0.5501332128853191,
      "grad_norm": 1.6630761623382568,
      "learning_rate": 8.958375888721231e-06,
      "loss": 0.0456,
      "step": 336160
    },
    {
      "epoch": 0.5501659433239724,
      "grad_norm": 0.5543729066848755,
      "learning_rate": 8.958309996507713e-06,
      "loss": 0.0473,
      "step": 336180
    },
    {
      "epoch": 0.5501986737626258,
      "grad_norm": 5.796133995056152,
      "learning_rate": 8.958244104294197e-06,
      "loss": 0.0532,
      "step": 336200
    },
    {
      "epoch": 0.5502314042012791,
      "grad_norm": 0.8652993440628052,
      "learning_rate": 8.95817821208068e-06,
      "loss": 0.0466,
      "step": 336220
    },
    {
      "epoch": 0.5502641346399324,
      "grad_norm": 9.727548599243164,
      "learning_rate": 8.958112319867162e-06,
      "loss": 0.0292,
      "step": 336240
    },
    {
      "epoch": 0.5502968650785858,
      "grad_norm": 1.7733348608016968,
      "learning_rate": 8.958046427653646e-06,
      "loss": 0.0442,
      "step": 336260
    },
    {
      "epoch": 0.5503295955172391,
      "grad_norm": 1.1382771730422974,
      "learning_rate": 8.957980535440127e-06,
      "loss": 0.0353,
      "step": 336280
    },
    {
      "epoch": 0.5503623259558924,
      "grad_norm": 1.889609456062317,
      "learning_rate": 8.957914643226611e-06,
      "loss": 0.0364,
      "step": 336300
    },
    {
      "epoch": 0.5503950563945458,
      "grad_norm": 1.1766211986541748,
      "learning_rate": 8.957848751013095e-06,
      "loss": 0.0523,
      "step": 336320
    },
    {
      "epoch": 0.5504277868331992,
      "grad_norm": 0.9724447727203369,
      "learning_rate": 8.957782858799577e-06,
      "loss": 0.0334,
      "step": 336340
    },
    {
      "epoch": 0.5504605172718525,
      "grad_norm": 1.0101624727249146,
      "learning_rate": 8.95771696658606e-06,
      "loss": 0.0293,
      "step": 336360
    },
    {
      "epoch": 0.5504932477105058,
      "grad_norm": 2.300299882888794,
      "learning_rate": 8.957651074372542e-06,
      "loss": 0.0455,
      "step": 336380
    },
    {
      "epoch": 0.5505259781491592,
      "grad_norm": 2.247467279434204,
      "learning_rate": 8.957585182159026e-06,
      "loss": 0.0515,
      "step": 336400
    },
    {
      "epoch": 0.5505587085878125,
      "grad_norm": 0.7188522219657898,
      "learning_rate": 8.957519289945507e-06,
      "loss": 0.0487,
      "step": 336420
    },
    {
      "epoch": 0.5505914390264658,
      "grad_norm": 1.9107626676559448,
      "learning_rate": 8.957453397731991e-06,
      "loss": 0.0346,
      "step": 336440
    },
    {
      "epoch": 0.5506241694651192,
      "grad_norm": 1.4405030012130737,
      "learning_rate": 8.957387505518473e-06,
      "loss": 0.0448,
      "step": 336460
    },
    {
      "epoch": 0.5506568999037725,
      "grad_norm": 0.7919397354125977,
      "learning_rate": 8.957321613304957e-06,
      "loss": 0.0299,
      "step": 336480
    },
    {
      "epoch": 0.5506896303424258,
      "grad_norm": 0.6187182664871216,
      "learning_rate": 8.957255721091438e-06,
      "loss": 0.0294,
      "step": 336500
    },
    {
      "epoch": 0.5507223607810792,
      "grad_norm": 2.8058624267578125,
      "learning_rate": 8.957189828877922e-06,
      "loss": 0.0528,
      "step": 336520
    },
    {
      "epoch": 0.5507550912197325,
      "grad_norm": 1.5439549684524536,
      "learning_rate": 8.957123936664404e-06,
      "loss": 0.0484,
      "step": 336540
    },
    {
      "epoch": 0.5507878216583859,
      "grad_norm": 1.4271889925003052,
      "learning_rate": 8.957058044450888e-06,
      "loss": 0.0396,
      "step": 336560
    },
    {
      "epoch": 0.5508205520970392,
      "grad_norm": 2.608731985092163,
      "learning_rate": 8.956992152237371e-06,
      "loss": 0.0383,
      "step": 336580
    },
    {
      "epoch": 0.5508532825356925,
      "grad_norm": 1.6948992013931274,
      "learning_rate": 8.956926260023853e-06,
      "loss": 0.0363,
      "step": 336600
    },
    {
      "epoch": 0.5508860129743459,
      "grad_norm": 1.487390398979187,
      "learning_rate": 8.956860367810337e-06,
      "loss": 0.0311,
      "step": 336620
    },
    {
      "epoch": 0.5509187434129992,
      "grad_norm": 1.1896682977676392,
      "learning_rate": 8.95679447559682e-06,
      "loss": 0.0352,
      "step": 336640
    },
    {
      "epoch": 0.5509514738516526,
      "grad_norm": 0.2871546149253845,
      "learning_rate": 8.956728583383302e-06,
      "loss": 0.03,
      "step": 336660
    },
    {
      "epoch": 0.5509842042903059,
      "grad_norm": 2.4782233238220215,
      "learning_rate": 8.956662691169786e-06,
      "loss": 0.0295,
      "step": 336680
    },
    {
      "epoch": 0.5510169347289592,
      "grad_norm": 0.3359970152378082,
      "learning_rate": 8.95659679895627e-06,
      "loss": 0.041,
      "step": 336700
    },
    {
      "epoch": 0.5510496651676126,
      "grad_norm": 0.5120400190353394,
      "learning_rate": 8.956530906742751e-06,
      "loss": 0.0386,
      "step": 336720
    },
    {
      "epoch": 0.5510823956062659,
      "grad_norm": 1.5369982719421387,
      "learning_rate": 8.956465014529235e-06,
      "loss": 0.0458,
      "step": 336740
    },
    {
      "epoch": 0.5511151260449193,
      "grad_norm": 1.099131464958191,
      "learning_rate": 8.956399122315717e-06,
      "loss": 0.0351,
      "step": 336760
    },
    {
      "epoch": 0.5511478564835726,
      "grad_norm": 1.3514407873153687,
      "learning_rate": 8.9563332301022e-06,
      "loss": 0.0286,
      "step": 336780
    },
    {
      "epoch": 0.5511805869222259,
      "grad_norm": 0.9134442210197449,
      "learning_rate": 8.956267337888682e-06,
      "loss": 0.0302,
      "step": 336800
    },
    {
      "epoch": 0.5512133173608793,
      "grad_norm": 6.652400016784668,
      "learning_rate": 8.956201445675166e-06,
      "loss": 0.0298,
      "step": 336820
    },
    {
      "epoch": 0.5512460477995326,
      "grad_norm": 0.7130510210990906,
      "learning_rate": 8.956135553461648e-06,
      "loss": 0.0328,
      "step": 336840
    },
    {
      "epoch": 0.5512787782381859,
      "grad_norm": 2.2825469970703125,
      "learning_rate": 8.956069661248131e-06,
      "loss": 0.0331,
      "step": 336860
    },
    {
      "epoch": 0.5513115086768393,
      "grad_norm": 2.515033483505249,
      "learning_rate": 8.956003769034613e-06,
      "loss": 0.0477,
      "step": 336880
    },
    {
      "epoch": 0.5513442391154926,
      "grad_norm": 0.5726534724235535,
      "learning_rate": 8.955937876821097e-06,
      "loss": 0.0447,
      "step": 336900
    },
    {
      "epoch": 0.551376969554146,
      "grad_norm": 0.7846868634223938,
      "learning_rate": 8.955871984607579e-06,
      "loss": 0.037,
      "step": 336920
    },
    {
      "epoch": 0.5514096999927993,
      "grad_norm": 1.0051796436309814,
      "learning_rate": 8.955806092394062e-06,
      "loss": 0.0449,
      "step": 336940
    },
    {
      "epoch": 0.5514424304314527,
      "grad_norm": 0.8136740922927856,
      "learning_rate": 8.955740200180546e-06,
      "loss": 0.037,
      "step": 336960
    },
    {
      "epoch": 0.551475160870106,
      "grad_norm": 0.641372799873352,
      "learning_rate": 8.955674307967028e-06,
      "loss": 0.0413,
      "step": 336980
    },
    {
      "epoch": 0.5515078913087593,
      "grad_norm": 2.799107313156128,
      "learning_rate": 8.955608415753511e-06,
      "loss": 0.0339,
      "step": 337000
    },
    {
      "epoch": 0.5515406217474127,
      "grad_norm": 1.0019068717956543,
      "learning_rate": 8.955542523539995e-06,
      "loss": 0.0261,
      "step": 337020
    },
    {
      "epoch": 0.551573352186066,
      "grad_norm": 1.3580206632614136,
      "learning_rate": 8.955476631326477e-06,
      "loss": 0.0271,
      "step": 337040
    },
    {
      "epoch": 0.5516060826247193,
      "grad_norm": 0.8605878949165344,
      "learning_rate": 8.95541073911296e-06,
      "loss": 0.0404,
      "step": 337060
    },
    {
      "epoch": 0.5516388130633727,
      "grad_norm": 1.300781011581421,
      "learning_rate": 8.955344846899444e-06,
      "loss": 0.0377,
      "step": 337080
    },
    {
      "epoch": 0.551671543502026,
      "grad_norm": 1.5383766889572144,
      "learning_rate": 8.955278954685926e-06,
      "loss": 0.0458,
      "step": 337100
    },
    {
      "epoch": 0.5517042739406793,
      "grad_norm": 2.8891921043395996,
      "learning_rate": 8.95521306247241e-06,
      "loss": 0.0354,
      "step": 337120
    },
    {
      "epoch": 0.5517370043793327,
      "grad_norm": 0.5600993633270264,
      "learning_rate": 8.955147170258891e-06,
      "loss": 0.0375,
      "step": 337140
    },
    {
      "epoch": 0.5517697348179861,
      "grad_norm": 1.2183479070663452,
      "learning_rate": 8.955081278045375e-06,
      "loss": 0.0399,
      "step": 337160
    },
    {
      "epoch": 0.5518024652566393,
      "grad_norm": 0.21508660912513733,
      "learning_rate": 8.955015385831857e-06,
      "loss": 0.0378,
      "step": 337180
    },
    {
      "epoch": 0.5518351956952927,
      "grad_norm": 0.4669654965400696,
      "learning_rate": 8.95494949361834e-06,
      "loss": 0.0417,
      "step": 337200
    },
    {
      "epoch": 0.5518679261339461,
      "grad_norm": 0.737693727016449,
      "learning_rate": 8.954883601404822e-06,
      "loss": 0.0379,
      "step": 337220
    },
    {
      "epoch": 0.5519006565725993,
      "grad_norm": 1.926387071609497,
      "learning_rate": 8.954817709191306e-06,
      "loss": 0.0491,
      "step": 337240
    },
    {
      "epoch": 0.5519333870112527,
      "grad_norm": 1.6772085428237915,
      "learning_rate": 8.954751816977788e-06,
      "loss": 0.0302,
      "step": 337260
    },
    {
      "epoch": 0.5519661174499061,
      "grad_norm": 0.24722518026828766,
      "learning_rate": 8.954685924764271e-06,
      "loss": 0.0356,
      "step": 337280
    },
    {
      "epoch": 0.5519988478885594,
      "grad_norm": 2.232761859893799,
      "learning_rate": 8.954620032550753e-06,
      "loss": 0.0416,
      "step": 337300
    },
    {
      "epoch": 0.5520315783272127,
      "grad_norm": 1.4686837196350098,
      "learning_rate": 8.954554140337237e-06,
      "loss": 0.0346,
      "step": 337320
    },
    {
      "epoch": 0.5520643087658661,
      "grad_norm": 0.6257555484771729,
      "learning_rate": 8.954488248123719e-06,
      "loss": 0.0415,
      "step": 337340
    },
    {
      "epoch": 0.5520970392045195,
      "grad_norm": 2.9053359031677246,
      "learning_rate": 8.954422355910202e-06,
      "loss": 0.0408,
      "step": 337360
    },
    {
      "epoch": 0.5521297696431727,
      "grad_norm": 2.5167157649993896,
      "learning_rate": 8.954356463696686e-06,
      "loss": 0.0343,
      "step": 337380
    },
    {
      "epoch": 0.5521625000818261,
      "grad_norm": 1.3531230688095093,
      "learning_rate": 8.954290571483168e-06,
      "loss": 0.0468,
      "step": 337400
    },
    {
      "epoch": 0.5521952305204795,
      "grad_norm": 1.5472217798233032,
      "learning_rate": 8.954224679269651e-06,
      "loss": 0.0318,
      "step": 337420
    },
    {
      "epoch": 0.5522279609591327,
      "grad_norm": 1.5836467742919922,
      "learning_rate": 8.954158787056135e-06,
      "loss": 0.0438,
      "step": 337440
    },
    {
      "epoch": 0.5522606913977861,
      "grad_norm": 1.5120669603347778,
      "learning_rate": 8.954092894842617e-06,
      "loss": 0.0349,
      "step": 337460
    },
    {
      "epoch": 0.5522934218364395,
      "grad_norm": 1.1133743524551392,
      "learning_rate": 8.9540270026291e-06,
      "loss": 0.0356,
      "step": 337480
    },
    {
      "epoch": 0.5523261522750927,
      "grad_norm": 3.4795098304748535,
      "learning_rate": 8.953961110415584e-06,
      "loss": 0.0524,
      "step": 337500
    },
    {
      "epoch": 0.5523588827137461,
      "grad_norm": 1.1261581182479858,
      "learning_rate": 8.953895218202066e-06,
      "loss": 0.0379,
      "step": 337520
    },
    {
      "epoch": 0.5523916131523995,
      "grad_norm": 0.8604537844657898,
      "learning_rate": 8.95382932598855e-06,
      "loss": 0.0411,
      "step": 337540
    },
    {
      "epoch": 0.5524243435910529,
      "grad_norm": 1.5883594751358032,
      "learning_rate": 8.953763433775031e-06,
      "loss": 0.0323,
      "step": 337560
    },
    {
      "epoch": 0.5524570740297061,
      "grad_norm": 1.0272717475891113,
      "learning_rate": 8.953697541561515e-06,
      "loss": 0.0395,
      "step": 337580
    },
    {
      "epoch": 0.5524898044683595,
      "grad_norm": 1.091164231300354,
      "learning_rate": 8.953631649347997e-06,
      "loss": 0.0358,
      "step": 337600
    },
    {
      "epoch": 0.5525225349070129,
      "grad_norm": 1.856626272201538,
      "learning_rate": 8.95356575713448e-06,
      "loss": 0.0293,
      "step": 337620
    },
    {
      "epoch": 0.5525552653456661,
      "grad_norm": 1.2658385038375854,
      "learning_rate": 8.953499864920962e-06,
      "loss": 0.0396,
      "step": 337640
    },
    {
      "epoch": 0.5525879957843195,
      "grad_norm": 0.7292788028717041,
      "learning_rate": 8.953433972707446e-06,
      "loss": 0.0395,
      "step": 337660
    },
    {
      "epoch": 0.5526207262229729,
      "grad_norm": 11.363317489624023,
      "learning_rate": 8.953368080493928e-06,
      "loss": 0.0477,
      "step": 337680
    },
    {
      "epoch": 0.5526534566616261,
      "grad_norm": 0.3462959825992584,
      "learning_rate": 8.953302188280411e-06,
      "loss": 0.0231,
      "step": 337700
    },
    {
      "epoch": 0.5526861871002795,
      "grad_norm": 0.3742324411869049,
      "learning_rate": 8.953236296066895e-06,
      "loss": 0.033,
      "step": 337720
    },
    {
      "epoch": 0.5527189175389329,
      "grad_norm": 3.268587589263916,
      "learning_rate": 8.953170403853377e-06,
      "loss": 0.0495,
      "step": 337740
    },
    {
      "epoch": 0.5527516479775862,
      "grad_norm": 0.704423189163208,
      "learning_rate": 8.95310451163986e-06,
      "loss": 0.0423,
      "step": 337760
    },
    {
      "epoch": 0.5527843784162395,
      "grad_norm": 1.3132773637771606,
      "learning_rate": 8.953038619426342e-06,
      "loss": 0.0373,
      "step": 337780
    },
    {
      "epoch": 0.5528171088548929,
      "grad_norm": 1.1373515129089355,
      "learning_rate": 8.952972727212826e-06,
      "loss": 0.0324,
      "step": 337800
    },
    {
      "epoch": 0.5528498392935463,
      "grad_norm": 1.0801091194152832,
      "learning_rate": 8.95290683499931e-06,
      "loss": 0.0476,
      "step": 337820
    },
    {
      "epoch": 0.5528825697321995,
      "grad_norm": 1.3940643072128296,
      "learning_rate": 8.952840942785791e-06,
      "loss": 0.0356,
      "step": 337840
    },
    {
      "epoch": 0.5529153001708529,
      "grad_norm": 1.9792914390563965,
      "learning_rate": 8.952775050572275e-06,
      "loss": 0.0576,
      "step": 337860
    },
    {
      "epoch": 0.5529480306095063,
      "grad_norm": 1.6439450979232788,
      "learning_rate": 8.952709158358759e-06,
      "loss": 0.0475,
      "step": 337880
    },
    {
      "epoch": 0.5529807610481595,
      "grad_norm": 1.1407724618911743,
      "learning_rate": 8.95264326614524e-06,
      "loss": 0.0345,
      "step": 337900
    },
    {
      "epoch": 0.5530134914868129,
      "grad_norm": 1.757071852684021,
      "learning_rate": 8.952577373931724e-06,
      "loss": 0.031,
      "step": 337920
    },
    {
      "epoch": 0.5530462219254663,
      "grad_norm": 1.6441398859024048,
      "learning_rate": 8.952511481718206e-06,
      "loss": 0.0326,
      "step": 337940
    },
    {
      "epoch": 0.5530789523641196,
      "grad_norm": 1.6037522554397583,
      "learning_rate": 8.95244558950469e-06,
      "loss": 0.0362,
      "step": 337960
    },
    {
      "epoch": 0.5531116828027729,
      "grad_norm": 1.3715708255767822,
      "learning_rate": 8.952379697291171e-06,
      "loss": 0.0328,
      "step": 337980
    },
    {
      "epoch": 0.5531444132414263,
      "grad_norm": 0.2731521725654602,
      "learning_rate": 8.952313805077655e-06,
      "loss": 0.0275,
      "step": 338000
    },
    {
      "epoch": 0.5531771436800796,
      "grad_norm": 0.7994775176048279,
      "learning_rate": 8.952247912864137e-06,
      "loss": 0.0371,
      "step": 338020
    },
    {
      "epoch": 0.5532098741187329,
      "grad_norm": 0.8776769042015076,
      "learning_rate": 8.95218202065062e-06,
      "loss": 0.0405,
      "step": 338040
    },
    {
      "epoch": 0.5532426045573863,
      "grad_norm": 2.0155932903289795,
      "learning_rate": 8.952116128437104e-06,
      "loss": 0.0365,
      "step": 338060
    },
    {
      "epoch": 0.5532753349960396,
      "grad_norm": 3.065809488296509,
      "learning_rate": 8.952050236223586e-06,
      "loss": 0.0418,
      "step": 338080
    },
    {
      "epoch": 0.5533080654346929,
      "grad_norm": 2.251521348953247,
      "learning_rate": 8.95198434401007e-06,
      "loss": 0.0499,
      "step": 338100
    },
    {
      "epoch": 0.5533407958733463,
      "grad_norm": 1.3032320737838745,
      "learning_rate": 8.951918451796551e-06,
      "loss": 0.0329,
      "step": 338120
    },
    {
      "epoch": 0.5533735263119997,
      "grad_norm": 1.3125125169754028,
      "learning_rate": 8.951852559583035e-06,
      "loss": 0.0353,
      "step": 338140
    },
    {
      "epoch": 0.553406256750653,
      "grad_norm": 1.9340144395828247,
      "learning_rate": 8.951786667369517e-06,
      "loss": 0.0259,
      "step": 338160
    },
    {
      "epoch": 0.5534389871893063,
      "grad_norm": 2.3262171745300293,
      "learning_rate": 8.951720775156e-06,
      "loss": 0.0356,
      "step": 338180
    },
    {
      "epoch": 0.5534717176279597,
      "grad_norm": 4.362644672393799,
      "learning_rate": 8.951654882942482e-06,
      "loss": 0.0387,
      "step": 338200
    },
    {
      "epoch": 0.553504448066613,
      "grad_norm": 0.18638445436954498,
      "learning_rate": 8.951588990728966e-06,
      "loss": 0.048,
      "step": 338220
    },
    {
      "epoch": 0.5535371785052663,
      "grad_norm": 0.4259120523929596,
      "learning_rate": 8.95152309851545e-06,
      "loss": 0.0454,
      "step": 338240
    },
    {
      "epoch": 0.5535699089439197,
      "grad_norm": 1.6105612516403198,
      "learning_rate": 8.951457206301933e-06,
      "loss": 0.0485,
      "step": 338260
    },
    {
      "epoch": 0.553602639382573,
      "grad_norm": 1.0195523500442505,
      "learning_rate": 8.951391314088415e-06,
      "loss": 0.0237,
      "step": 338280
    },
    {
      "epoch": 0.5536353698212263,
      "grad_norm": 4.934205532073975,
      "learning_rate": 8.951325421874899e-06,
      "loss": 0.0492,
      "step": 338300
    },
    {
      "epoch": 0.5536681002598797,
      "grad_norm": 0.4026886224746704,
      "learning_rate": 8.95125952966138e-06,
      "loss": 0.0355,
      "step": 338320
    },
    {
      "epoch": 0.553700830698533,
      "grad_norm": 1.6064175367355347,
      "learning_rate": 8.951193637447864e-06,
      "loss": 0.0366,
      "step": 338340
    },
    {
      "epoch": 0.5537335611371864,
      "grad_norm": 1.9865458011627197,
      "learning_rate": 8.951127745234346e-06,
      "loss": 0.0443,
      "step": 338360
    },
    {
      "epoch": 0.5537662915758397,
      "grad_norm": 1.279940128326416,
      "learning_rate": 8.95106185302083e-06,
      "loss": 0.036,
      "step": 338380
    },
    {
      "epoch": 0.553799022014493,
      "grad_norm": 0.4910249412059784,
      "learning_rate": 8.950995960807311e-06,
      "loss": 0.0343,
      "step": 338400
    },
    {
      "epoch": 0.5538317524531464,
      "grad_norm": 0.3543066680431366,
      "learning_rate": 8.950930068593795e-06,
      "loss": 0.0368,
      "step": 338420
    },
    {
      "epoch": 0.5538644828917997,
      "grad_norm": 1.9332804679870605,
      "learning_rate": 8.950864176380279e-06,
      "loss": 0.0333,
      "step": 338440
    },
    {
      "epoch": 0.553897213330453,
      "grad_norm": 1.0061376094818115,
      "learning_rate": 8.95079828416676e-06,
      "loss": 0.0415,
      "step": 338460
    },
    {
      "epoch": 0.5539299437691064,
      "grad_norm": 2.154484510421753,
      "learning_rate": 8.950732391953244e-06,
      "loss": 0.05,
      "step": 338480
    },
    {
      "epoch": 0.5539626742077597,
      "grad_norm": 1.0536713600158691,
      "learning_rate": 8.950666499739726e-06,
      "loss": 0.0467,
      "step": 338500
    },
    {
      "epoch": 0.5539954046464131,
      "grad_norm": 2.51383376121521,
      "learning_rate": 8.95060060752621e-06,
      "loss": 0.0445,
      "step": 338520
    },
    {
      "epoch": 0.5540281350850664,
      "grad_norm": 0.9052255153656006,
      "learning_rate": 8.950534715312691e-06,
      "loss": 0.0326,
      "step": 338540
    },
    {
      "epoch": 0.5540608655237198,
      "grad_norm": 1.6185561418533325,
      "learning_rate": 8.950468823099175e-06,
      "loss": 0.0347,
      "step": 338560
    },
    {
      "epoch": 0.5540935959623731,
      "grad_norm": 1.4264851808547974,
      "learning_rate": 8.950402930885657e-06,
      "loss": 0.0561,
      "step": 338580
    },
    {
      "epoch": 0.5541263264010264,
      "grad_norm": 2.8114194869995117,
      "learning_rate": 8.95033703867214e-06,
      "loss": 0.0477,
      "step": 338600
    },
    {
      "epoch": 0.5541590568396798,
      "grad_norm": 1.160189151763916,
      "learning_rate": 8.950271146458624e-06,
      "loss": 0.0333,
      "step": 338620
    },
    {
      "epoch": 0.5541917872783331,
      "grad_norm": 18.537376403808594,
      "learning_rate": 8.950205254245106e-06,
      "loss": 0.0497,
      "step": 338640
    },
    {
      "epoch": 0.5542245177169864,
      "grad_norm": 2.0647573471069336,
      "learning_rate": 8.95013936203159e-06,
      "loss": 0.0434,
      "step": 338660
    },
    {
      "epoch": 0.5542572481556398,
      "grad_norm": 0.9991083145141602,
      "learning_rate": 8.950073469818073e-06,
      "loss": 0.0357,
      "step": 338680
    },
    {
      "epoch": 0.5542899785942931,
      "grad_norm": 2.454209089279175,
      "learning_rate": 8.950007577604555e-06,
      "loss": 0.0433,
      "step": 338700
    },
    {
      "epoch": 0.5543227090329464,
      "grad_norm": 0.8569703698158264,
      "learning_rate": 8.949941685391039e-06,
      "loss": 0.0409,
      "step": 338720
    },
    {
      "epoch": 0.5543554394715998,
      "grad_norm": 0.6055179834365845,
      "learning_rate": 8.94987579317752e-06,
      "loss": 0.0302,
      "step": 338740
    },
    {
      "epoch": 0.5543881699102532,
      "grad_norm": 0.6491643786430359,
      "learning_rate": 8.949809900964004e-06,
      "loss": 0.0266,
      "step": 338760
    },
    {
      "epoch": 0.5544209003489065,
      "grad_norm": 3.8597090244293213,
      "learning_rate": 8.949744008750488e-06,
      "loss": 0.0387,
      "step": 338780
    },
    {
      "epoch": 0.5544536307875598,
      "grad_norm": 1.2242110967636108,
      "learning_rate": 8.94967811653697e-06,
      "loss": 0.0329,
      "step": 338800
    },
    {
      "epoch": 0.5544863612262132,
      "grad_norm": 0.8063276410102844,
      "learning_rate": 8.949612224323453e-06,
      "loss": 0.042,
      "step": 338820
    },
    {
      "epoch": 0.5545190916648665,
      "grad_norm": 0.7622461915016174,
      "learning_rate": 8.949546332109935e-06,
      "loss": 0.0514,
      "step": 338840
    },
    {
      "epoch": 0.5545518221035198,
      "grad_norm": 1.8844542503356934,
      "learning_rate": 8.949480439896419e-06,
      "loss": 0.0407,
      "step": 338860
    },
    {
      "epoch": 0.5545845525421732,
      "grad_norm": 0.9760271906852722,
      "learning_rate": 8.9494145476829e-06,
      "loss": 0.0243,
      "step": 338880
    },
    {
      "epoch": 0.5546172829808265,
      "grad_norm": 0.593017041683197,
      "learning_rate": 8.949348655469384e-06,
      "loss": 0.0355,
      "step": 338900
    },
    {
      "epoch": 0.5546500134194798,
      "grad_norm": 2.7971699237823486,
      "learning_rate": 8.949282763255866e-06,
      "loss": 0.0504,
      "step": 338920
    },
    {
      "epoch": 0.5546827438581332,
      "grad_norm": 1.1213219165802002,
      "learning_rate": 8.94921687104235e-06,
      "loss": 0.0559,
      "step": 338940
    },
    {
      "epoch": 0.5547154742967865,
      "grad_norm": 0.3971625566482544,
      "learning_rate": 8.949150978828832e-06,
      "loss": 0.0385,
      "step": 338960
    },
    {
      "epoch": 0.5547482047354398,
      "grad_norm": 1.4185471534729004,
      "learning_rate": 8.949085086615315e-06,
      "loss": 0.0291,
      "step": 338980
    },
    {
      "epoch": 0.5547809351740932,
      "grad_norm": 1.154829978942871,
      "learning_rate": 8.949019194401799e-06,
      "loss": 0.0393,
      "step": 339000
    },
    {
      "epoch": 0.5548136656127466,
      "grad_norm": 1.683933138847351,
      "learning_rate": 8.94895330218828e-06,
      "loss": 0.0301,
      "step": 339020
    },
    {
      "epoch": 0.5548463960513998,
      "grad_norm": 0.7180256843566895,
      "learning_rate": 8.948887409974764e-06,
      "loss": 0.0445,
      "step": 339040
    },
    {
      "epoch": 0.5548791264900532,
      "grad_norm": 1.2396286725997925,
      "learning_rate": 8.948821517761248e-06,
      "loss": 0.0425,
      "step": 339060
    },
    {
      "epoch": 0.5549118569287066,
      "grad_norm": 0.8894615173339844,
      "learning_rate": 8.94875562554773e-06,
      "loss": 0.0335,
      "step": 339080
    },
    {
      "epoch": 0.5549445873673599,
      "grad_norm": 0.5555915832519531,
      "learning_rate": 8.948689733334213e-06,
      "loss": 0.0429,
      "step": 339100
    },
    {
      "epoch": 0.5549773178060132,
      "grad_norm": 2.568091869354248,
      "learning_rate": 8.948623841120697e-06,
      "loss": 0.0478,
      "step": 339120
    },
    {
      "epoch": 0.5550100482446666,
      "grad_norm": 0.9152657389640808,
      "learning_rate": 8.948557948907179e-06,
      "loss": 0.0481,
      "step": 339140
    },
    {
      "epoch": 0.5550427786833199,
      "grad_norm": 0.5257192850112915,
      "learning_rate": 8.948492056693662e-06,
      "loss": 0.0364,
      "step": 339160
    },
    {
      "epoch": 0.5550755091219732,
      "grad_norm": 0.39057457447052,
      "learning_rate": 8.948426164480144e-06,
      "loss": 0.0243,
      "step": 339180
    },
    {
      "epoch": 0.5551082395606266,
      "grad_norm": 1.5082770586013794,
      "learning_rate": 8.948360272266628e-06,
      "loss": 0.0386,
      "step": 339200
    },
    {
      "epoch": 0.55514096999928,
      "grad_norm": 0.9481176137924194,
      "learning_rate": 8.94829438005311e-06,
      "loss": 0.0391,
      "step": 339220
    },
    {
      "epoch": 0.5551737004379332,
      "grad_norm": 2.798449754714966,
      "learning_rate": 8.948228487839593e-06,
      "loss": 0.0478,
      "step": 339240
    },
    {
      "epoch": 0.5552064308765866,
      "grad_norm": 1.5197713375091553,
      "learning_rate": 8.948162595626075e-06,
      "loss": 0.0383,
      "step": 339260
    },
    {
      "epoch": 0.55523916131524,
      "grad_norm": 1.2507920265197754,
      "learning_rate": 8.948096703412559e-06,
      "loss": 0.0405,
      "step": 339280
    },
    {
      "epoch": 0.5552718917538932,
      "grad_norm": 0.5884032845497131,
      "learning_rate": 8.94803081119904e-06,
      "loss": 0.0372,
      "step": 339300
    },
    {
      "epoch": 0.5553046221925466,
      "grad_norm": 2.6239309310913086,
      "learning_rate": 8.947964918985524e-06,
      "loss": 0.0406,
      "step": 339320
    },
    {
      "epoch": 0.5553373526312,
      "grad_norm": 1.463365077972412,
      "learning_rate": 8.947899026772006e-06,
      "loss": 0.0433,
      "step": 339340
    },
    {
      "epoch": 0.5553700830698532,
      "grad_norm": 2.158046245574951,
      "learning_rate": 8.94783313455849e-06,
      "loss": 0.0304,
      "step": 339360
    },
    {
      "epoch": 0.5554028135085066,
      "grad_norm": 0.41504719853401184,
      "learning_rate": 8.947767242344972e-06,
      "loss": 0.0391,
      "step": 339380
    },
    {
      "epoch": 0.55543554394716,
      "grad_norm": 0.7646835446357727,
      "learning_rate": 8.947701350131455e-06,
      "loss": 0.0458,
      "step": 339400
    },
    {
      "epoch": 0.5554682743858134,
      "grad_norm": 1.14145028591156,
      "learning_rate": 8.947635457917939e-06,
      "loss": 0.0357,
      "step": 339420
    },
    {
      "epoch": 0.5555010048244666,
      "grad_norm": 0.2936299741268158,
      "learning_rate": 8.94756956570442e-06,
      "loss": 0.0293,
      "step": 339440
    },
    {
      "epoch": 0.55553373526312,
      "grad_norm": 0.9931887984275818,
      "learning_rate": 8.947503673490904e-06,
      "loss": 0.0567,
      "step": 339460
    },
    {
      "epoch": 0.5555664657017734,
      "grad_norm": 3.0658299922943115,
      "learning_rate": 8.947437781277388e-06,
      "loss": 0.0414,
      "step": 339480
    },
    {
      "epoch": 0.5555991961404266,
      "grad_norm": 2.3871521949768066,
      "learning_rate": 8.94737188906387e-06,
      "loss": 0.0321,
      "step": 339500
    },
    {
      "epoch": 0.55563192657908,
      "grad_norm": 0.40660935640335083,
      "learning_rate": 8.947305996850353e-06,
      "loss": 0.0404,
      "step": 339520
    },
    {
      "epoch": 0.5556646570177334,
      "grad_norm": 1.1241785287857056,
      "learning_rate": 8.947240104636837e-06,
      "loss": 0.0329,
      "step": 339540
    },
    {
      "epoch": 0.5556973874563866,
      "grad_norm": 1.4006177186965942,
      "learning_rate": 8.947174212423319e-06,
      "loss": 0.0383,
      "step": 339560
    },
    {
      "epoch": 0.55573011789504,
      "grad_norm": 0.2831382751464844,
      "learning_rate": 8.947108320209802e-06,
      "loss": 0.0308,
      "step": 339580
    },
    {
      "epoch": 0.5557628483336934,
      "grad_norm": 2.695545196533203,
      "learning_rate": 8.947042427996284e-06,
      "loss": 0.0351,
      "step": 339600
    },
    {
      "epoch": 0.5557955787723468,
      "grad_norm": 1.3488306999206543,
      "learning_rate": 8.946976535782768e-06,
      "loss": 0.0354,
      "step": 339620
    },
    {
      "epoch": 0.555828309211,
      "grad_norm": 0.180673286318779,
      "learning_rate": 8.94691064356925e-06,
      "loss": 0.0405,
      "step": 339640
    },
    {
      "epoch": 0.5558610396496534,
      "grad_norm": 1.2752071619033813,
      "learning_rate": 8.946844751355733e-06,
      "loss": 0.0439,
      "step": 339660
    },
    {
      "epoch": 0.5558937700883068,
      "grad_norm": 0.2218141406774521,
      "learning_rate": 8.946778859142215e-06,
      "loss": 0.0296,
      "step": 339680
    },
    {
      "epoch": 0.55592650052696,
      "grad_norm": 0.36533987522125244,
      "learning_rate": 8.946712966928699e-06,
      "loss": 0.0322,
      "step": 339700
    },
    {
      "epoch": 0.5559592309656134,
      "grad_norm": 1.3901240825653076,
      "learning_rate": 8.94664707471518e-06,
      "loss": 0.0326,
      "step": 339720
    },
    {
      "epoch": 0.5559919614042668,
      "grad_norm": 0.551240086555481,
      "learning_rate": 8.946581182501664e-06,
      "loss": 0.0429,
      "step": 339740
    },
    {
      "epoch": 0.55602469184292,
      "grad_norm": 0.9353721141815186,
      "learning_rate": 8.946515290288146e-06,
      "loss": 0.0417,
      "step": 339760
    },
    {
      "epoch": 0.5560574222815734,
      "grad_norm": 1.1428724527359009,
      "learning_rate": 8.94644939807463e-06,
      "loss": 0.0444,
      "step": 339780
    },
    {
      "epoch": 0.5560901527202268,
      "grad_norm": 0.5299622416496277,
      "learning_rate": 8.946383505861113e-06,
      "loss": 0.0297,
      "step": 339800
    },
    {
      "epoch": 0.5561228831588801,
      "grad_norm": 0.4819084107875824,
      "learning_rate": 8.946317613647595e-06,
      "loss": 0.024,
      "step": 339820
    },
    {
      "epoch": 0.5561556135975334,
      "grad_norm": 1.809045672416687,
      "learning_rate": 8.946251721434079e-06,
      "loss": 0.0387,
      "step": 339840
    },
    {
      "epoch": 0.5561883440361868,
      "grad_norm": 0.9096884727478027,
      "learning_rate": 8.946185829220562e-06,
      "loss": 0.0347,
      "step": 339860
    },
    {
      "epoch": 0.5562210744748401,
      "grad_norm": 0.389749675989151,
      "learning_rate": 8.946119937007044e-06,
      "loss": 0.0337,
      "step": 339880
    },
    {
      "epoch": 0.5562538049134934,
      "grad_norm": 0.28521963953971863,
      "learning_rate": 8.946054044793528e-06,
      "loss": 0.0415,
      "step": 339900
    },
    {
      "epoch": 0.5562865353521468,
      "grad_norm": 1.6659555435180664,
      "learning_rate": 8.945988152580012e-06,
      "loss": 0.0417,
      "step": 339920
    },
    {
      "epoch": 0.5563192657908002,
      "grad_norm": 1.6909477710723877,
      "learning_rate": 8.945922260366493e-06,
      "loss": 0.0316,
      "step": 339940
    },
    {
      "epoch": 0.5563519962294534,
      "grad_norm": 0.47010084986686707,
      "learning_rate": 8.945856368152977e-06,
      "loss": 0.0329,
      "step": 339960
    },
    {
      "epoch": 0.5563847266681068,
      "grad_norm": 2.945697069168091,
      "learning_rate": 8.945790475939459e-06,
      "loss": 0.043,
      "step": 339980
    },
    {
      "epoch": 0.5564174571067602,
      "grad_norm": 0.21264931559562683,
      "learning_rate": 8.945724583725942e-06,
      "loss": 0.0376,
      "step": 340000
    },
    {
      "epoch": 0.5564501875454135,
      "grad_norm": 1.6684842109680176,
      "learning_rate": 8.945658691512424e-06,
      "loss": 0.0383,
      "step": 340020
    },
    {
      "epoch": 0.5564829179840668,
      "grad_norm": 0.8385951519012451,
      "learning_rate": 8.945592799298908e-06,
      "loss": 0.0358,
      "step": 340040
    },
    {
      "epoch": 0.5565156484227202,
      "grad_norm": 1.4499708414077759,
      "learning_rate": 8.94552690708539e-06,
      "loss": 0.034,
      "step": 340060
    },
    {
      "epoch": 0.5565483788613735,
      "grad_norm": 3.5798819065093994,
      "learning_rate": 8.945461014871873e-06,
      "loss": 0.0377,
      "step": 340080
    },
    {
      "epoch": 0.5565811093000268,
      "grad_norm": 0.8436384797096252,
      "learning_rate": 8.945395122658355e-06,
      "loss": 0.0306,
      "step": 340100
    },
    {
      "epoch": 0.5566138397386802,
      "grad_norm": 1.9274935722351074,
      "learning_rate": 8.945329230444839e-06,
      "loss": 0.048,
      "step": 340120
    },
    {
      "epoch": 0.5566465701773335,
      "grad_norm": 1.4169751405715942,
      "learning_rate": 8.94526333823132e-06,
      "loss": 0.0371,
      "step": 340140
    },
    {
      "epoch": 0.5566793006159868,
      "grad_norm": 2.604004383087158,
      "learning_rate": 8.945197446017804e-06,
      "loss": 0.0467,
      "step": 340160
    },
    {
      "epoch": 0.5567120310546402,
      "grad_norm": 1.4192708730697632,
      "learning_rate": 8.945131553804288e-06,
      "loss": 0.0412,
      "step": 340180
    },
    {
      "epoch": 0.5567447614932935,
      "grad_norm": 1.0619107484817505,
      "learning_rate": 8.94506566159077e-06,
      "loss": 0.0281,
      "step": 340200
    },
    {
      "epoch": 0.5567774919319469,
      "grad_norm": 0.9131797552108765,
      "learning_rate": 8.944999769377253e-06,
      "loss": 0.0434,
      "step": 340220
    },
    {
      "epoch": 0.5568102223706002,
      "grad_norm": 0.33948206901550293,
      "learning_rate": 8.944933877163735e-06,
      "loss": 0.0326,
      "step": 340240
    },
    {
      "epoch": 0.5568429528092536,
      "grad_norm": 1.2699040174484253,
      "learning_rate": 8.944867984950219e-06,
      "loss": 0.0384,
      "step": 340260
    },
    {
      "epoch": 0.5568756832479069,
      "grad_norm": 2.63460636138916,
      "learning_rate": 8.944802092736703e-06,
      "loss": 0.037,
      "step": 340280
    },
    {
      "epoch": 0.5569084136865602,
      "grad_norm": 2.3856871128082275,
      "learning_rate": 8.944736200523184e-06,
      "loss": 0.0412,
      "step": 340300
    },
    {
      "epoch": 0.5569411441252136,
      "grad_norm": 0.8169398903846741,
      "learning_rate": 8.944670308309668e-06,
      "loss": 0.0302,
      "step": 340320
    },
    {
      "epoch": 0.5569738745638669,
      "grad_norm": 1.1595674753189087,
      "learning_rate": 8.944604416096152e-06,
      "loss": 0.0434,
      "step": 340340
    },
    {
      "epoch": 0.5570066050025202,
      "grad_norm": 0.87703937292099,
      "learning_rate": 8.944538523882633e-06,
      "loss": 0.0439,
      "step": 340360
    },
    {
      "epoch": 0.5570393354411736,
      "grad_norm": 1.025383472442627,
      "learning_rate": 8.944472631669117e-06,
      "loss": 0.0398,
      "step": 340380
    },
    {
      "epoch": 0.5570720658798269,
      "grad_norm": 0.8220434188842773,
      "learning_rate": 8.944406739455599e-06,
      "loss": 0.0332,
      "step": 340400
    },
    {
      "epoch": 0.5571047963184803,
      "grad_norm": 0.7265562415122986,
      "learning_rate": 8.944340847242083e-06,
      "loss": 0.0411,
      "step": 340420
    },
    {
      "epoch": 0.5571375267571336,
      "grad_norm": 8.063077926635742,
      "learning_rate": 8.944274955028564e-06,
      "loss": 0.0337,
      "step": 340440
    },
    {
      "epoch": 0.5571702571957869,
      "grad_norm": 1.8045296669006348,
      "learning_rate": 8.944209062815048e-06,
      "loss": 0.0456,
      "step": 340460
    },
    {
      "epoch": 0.5572029876344403,
      "grad_norm": 0.804387092590332,
      "learning_rate": 8.94414317060153e-06,
      "loss": 0.0424,
      "step": 340480
    },
    {
      "epoch": 0.5572357180730936,
      "grad_norm": 1.6524587869644165,
      "learning_rate": 8.944077278388014e-06,
      "loss": 0.0349,
      "step": 340500
    },
    {
      "epoch": 0.557268448511747,
      "grad_norm": 0.6635529398918152,
      "learning_rate": 8.944011386174497e-06,
      "loss": 0.0343,
      "step": 340520
    },
    {
      "epoch": 0.5573011789504003,
      "grad_norm": 1.2966187000274658,
      "learning_rate": 8.943945493960979e-06,
      "loss": 0.0415,
      "step": 340540
    },
    {
      "epoch": 0.5573339093890536,
      "grad_norm": 2.7711892127990723,
      "learning_rate": 8.943879601747463e-06,
      "loss": 0.0332,
      "step": 340560
    },
    {
      "epoch": 0.557366639827707,
      "grad_norm": 0.7090953588485718,
      "learning_rate": 8.943813709533944e-06,
      "loss": 0.0445,
      "step": 340580
    },
    {
      "epoch": 0.5573993702663603,
      "grad_norm": 3.6134016513824463,
      "learning_rate": 8.943747817320428e-06,
      "loss": 0.0311,
      "step": 340600
    },
    {
      "epoch": 0.5574321007050137,
      "grad_norm": 0.4969077706336975,
      "learning_rate": 8.94368192510691e-06,
      "loss": 0.0507,
      "step": 340620
    },
    {
      "epoch": 0.557464831143667,
      "grad_norm": 1.707381010055542,
      "learning_rate": 8.943616032893394e-06,
      "loss": 0.043,
      "step": 340640
    },
    {
      "epoch": 0.5574975615823203,
      "grad_norm": 2.189293146133423,
      "learning_rate": 8.943550140679877e-06,
      "loss": 0.0441,
      "step": 340660
    },
    {
      "epoch": 0.5575302920209737,
      "grad_norm": 1.0758470296859741,
      "learning_rate": 8.943484248466359e-06,
      "loss": 0.0276,
      "step": 340680
    },
    {
      "epoch": 0.557563022459627,
      "grad_norm": 0.6964243054389954,
      "learning_rate": 8.943418356252843e-06,
      "loss": 0.0304,
      "step": 340700
    },
    {
      "epoch": 0.5575957528982803,
      "grad_norm": 2.5343685150146484,
      "learning_rate": 8.943352464039326e-06,
      "loss": 0.029,
      "step": 340720
    },
    {
      "epoch": 0.5576284833369337,
      "grad_norm": 0.6795945167541504,
      "learning_rate": 8.943286571825808e-06,
      "loss": 0.0397,
      "step": 340740
    },
    {
      "epoch": 0.557661213775587,
      "grad_norm": 2.980340003967285,
      "learning_rate": 8.943220679612292e-06,
      "loss": 0.0376,
      "step": 340760
    },
    {
      "epoch": 0.5576939442142403,
      "grad_norm": 1.9584463834762573,
      "learning_rate": 8.943154787398774e-06,
      "loss": 0.0484,
      "step": 340780
    },
    {
      "epoch": 0.5577266746528937,
      "grad_norm": 2.1076114177703857,
      "learning_rate": 8.943088895185257e-06,
      "loss": 0.0323,
      "step": 340800
    },
    {
      "epoch": 0.5577594050915471,
      "grad_norm": 0.7133158445358276,
      "learning_rate": 8.943023002971739e-06,
      "loss": 0.0378,
      "step": 340820
    },
    {
      "epoch": 0.5577921355302004,
      "grad_norm": 0.6628012657165527,
      "learning_rate": 8.942957110758223e-06,
      "loss": 0.0262,
      "step": 340840
    },
    {
      "epoch": 0.5578248659688537,
      "grad_norm": 3.9072048664093018,
      "learning_rate": 8.942891218544705e-06,
      "loss": 0.0429,
      "step": 340860
    },
    {
      "epoch": 0.5578575964075071,
      "grad_norm": 0.7545860409736633,
      "learning_rate": 8.942825326331188e-06,
      "loss": 0.0414,
      "step": 340880
    },
    {
      "epoch": 0.5578903268461604,
      "grad_norm": 1.0541563034057617,
      "learning_rate": 8.942759434117672e-06,
      "loss": 0.0324,
      "step": 340900
    },
    {
      "epoch": 0.5579230572848137,
      "grad_norm": 1.6691555976867676,
      "learning_rate": 8.942693541904154e-06,
      "loss": 0.0494,
      "step": 340920
    },
    {
      "epoch": 0.5579557877234671,
      "grad_norm": 0.6500768661499023,
      "learning_rate": 8.942627649690637e-06,
      "loss": 0.0411,
      "step": 340940
    },
    {
      "epoch": 0.5579885181621204,
      "grad_norm": 0.7536451816558838,
      "learning_rate": 8.942561757477119e-06,
      "loss": 0.0413,
      "step": 340960
    },
    {
      "epoch": 0.5580212486007737,
      "grad_norm": 0.5970316529273987,
      "learning_rate": 8.942495865263603e-06,
      "loss": 0.037,
      "step": 340980
    },
    {
      "epoch": 0.5580539790394271,
      "grad_norm": 0.4084009528160095,
      "learning_rate": 8.942429973050085e-06,
      "loss": 0.0398,
      "step": 341000
    },
    {
      "epoch": 0.5580867094780805,
      "grad_norm": 1.88467276096344,
      "learning_rate": 8.942364080836568e-06,
      "loss": 0.0444,
      "step": 341020
    },
    {
      "epoch": 0.5581194399167337,
      "grad_norm": 0.7974078059196472,
      "learning_rate": 8.942298188623052e-06,
      "loss": 0.0538,
      "step": 341040
    },
    {
      "epoch": 0.5581521703553871,
      "grad_norm": 1.5636342763900757,
      "learning_rate": 8.942232296409534e-06,
      "loss": 0.0406,
      "step": 341060
    },
    {
      "epoch": 0.5581849007940405,
      "grad_norm": 0.35967108607292175,
      "learning_rate": 8.942166404196017e-06,
      "loss": 0.0403,
      "step": 341080
    },
    {
      "epoch": 0.5582176312326937,
      "grad_norm": 1.0565385818481445,
      "learning_rate": 8.9421005119825e-06,
      "loss": 0.0359,
      "step": 341100
    },
    {
      "epoch": 0.5582503616713471,
      "grad_norm": 1.0352834463119507,
      "learning_rate": 8.942034619768983e-06,
      "loss": 0.0406,
      "step": 341120
    },
    {
      "epoch": 0.5582830921100005,
      "grad_norm": 0.8239680528640747,
      "learning_rate": 8.941968727555466e-06,
      "loss": 0.0275,
      "step": 341140
    },
    {
      "epoch": 0.5583158225486538,
      "grad_norm": 0.4544637203216553,
      "learning_rate": 8.941902835341948e-06,
      "loss": 0.0339,
      "step": 341160
    },
    {
      "epoch": 0.5583485529873071,
      "grad_norm": 0.7797783017158508,
      "learning_rate": 8.941836943128432e-06,
      "loss": 0.041,
      "step": 341180
    },
    {
      "epoch": 0.5583812834259605,
      "grad_norm": 0.4673832654953003,
      "learning_rate": 8.941771050914914e-06,
      "loss": 0.0365,
      "step": 341200
    },
    {
      "epoch": 0.5584140138646139,
      "grad_norm": 0.8934168815612793,
      "learning_rate": 8.941705158701397e-06,
      "loss": 0.043,
      "step": 341220
    },
    {
      "epoch": 0.5584467443032671,
      "grad_norm": 0.7014402747154236,
      "learning_rate": 8.94163926648788e-06,
      "loss": 0.0417,
      "step": 341240
    },
    {
      "epoch": 0.5584794747419205,
      "grad_norm": 1.8389222621917725,
      "learning_rate": 8.941573374274363e-06,
      "loss": 0.0462,
      "step": 341260
    },
    {
      "epoch": 0.5585122051805739,
      "grad_norm": 0.8573166728019714,
      "learning_rate": 8.941507482060846e-06,
      "loss": 0.0307,
      "step": 341280
    },
    {
      "epoch": 0.5585449356192271,
      "grad_norm": 1.1791733503341675,
      "learning_rate": 8.941441589847328e-06,
      "loss": 0.0356,
      "step": 341300
    },
    {
      "epoch": 0.5585776660578805,
      "grad_norm": 1.7874250411987305,
      "learning_rate": 8.941375697633812e-06,
      "loss": 0.0364,
      "step": 341320
    },
    {
      "epoch": 0.5586103964965339,
      "grad_norm": 0.3848394453525543,
      "learning_rate": 8.941309805420294e-06,
      "loss": 0.0484,
      "step": 341340
    },
    {
      "epoch": 0.5586431269351871,
      "grad_norm": 0.7197118401527405,
      "learning_rate": 8.941243913206777e-06,
      "loss": 0.0356,
      "step": 341360
    },
    {
      "epoch": 0.5586758573738405,
      "grad_norm": 0.6535822153091431,
      "learning_rate": 8.941178020993259e-06,
      "loss": 0.0298,
      "step": 341380
    },
    {
      "epoch": 0.5587085878124939,
      "grad_norm": 3.2396886348724365,
      "learning_rate": 8.941112128779743e-06,
      "loss": 0.0387,
      "step": 341400
    },
    {
      "epoch": 0.5587413182511473,
      "grad_norm": 2.4049432277679443,
      "learning_rate": 8.941046236566225e-06,
      "loss": 0.034,
      "step": 341420
    },
    {
      "epoch": 0.5587740486898005,
      "grad_norm": 0.9707196354866028,
      "learning_rate": 8.940980344352708e-06,
      "loss": 0.0322,
      "step": 341440
    },
    {
      "epoch": 0.5588067791284539,
      "grad_norm": 1.0521095991134644,
      "learning_rate": 8.940914452139192e-06,
      "loss": 0.0345,
      "step": 341460
    },
    {
      "epoch": 0.5588395095671073,
      "grad_norm": 0.6695153117179871,
      "learning_rate": 8.940848559925674e-06,
      "loss": 0.0351,
      "step": 341480
    },
    {
      "epoch": 0.5588722400057605,
      "grad_norm": 0.6791746616363525,
      "learning_rate": 8.940782667712157e-06,
      "loss": 0.0391,
      "step": 341500
    },
    {
      "epoch": 0.5589049704444139,
      "grad_norm": 1.6843498945236206,
      "learning_rate": 8.940716775498641e-06,
      "loss": 0.0348,
      "step": 341520
    },
    {
      "epoch": 0.5589377008830673,
      "grad_norm": 1.215833306312561,
      "learning_rate": 8.940650883285123e-06,
      "loss": 0.0384,
      "step": 341540
    },
    {
      "epoch": 0.5589704313217205,
      "grad_norm": 1.1608858108520508,
      "learning_rate": 8.940584991071606e-06,
      "loss": 0.0432,
      "step": 341560
    },
    {
      "epoch": 0.5590031617603739,
      "grad_norm": 0.902108907699585,
      "learning_rate": 8.94051909885809e-06,
      "loss": 0.0412,
      "step": 341580
    },
    {
      "epoch": 0.5590358921990273,
      "grad_norm": 0.3632652759552002,
      "learning_rate": 8.940453206644572e-06,
      "loss": 0.0348,
      "step": 341600
    },
    {
      "epoch": 0.5590686226376806,
      "grad_norm": 5.034004211425781,
      "learning_rate": 8.940387314431055e-06,
      "loss": 0.0414,
      "step": 341620
    },
    {
      "epoch": 0.5591013530763339,
      "grad_norm": 8.255760192871094,
      "learning_rate": 8.940321422217537e-06,
      "loss": 0.0518,
      "step": 341640
    },
    {
      "epoch": 0.5591340835149873,
      "grad_norm": 1.074232578277588,
      "learning_rate": 8.940255530004021e-06,
      "loss": 0.0494,
      "step": 341660
    },
    {
      "epoch": 0.5591668139536407,
      "grad_norm": 0.4641328454017639,
      "learning_rate": 8.940189637790503e-06,
      "loss": 0.0414,
      "step": 341680
    },
    {
      "epoch": 0.5591995443922939,
      "grad_norm": 0.8804718852043152,
      "learning_rate": 8.940123745576986e-06,
      "loss": 0.0328,
      "step": 341700
    },
    {
      "epoch": 0.5592322748309473,
      "grad_norm": 1.648209810256958,
      "learning_rate": 8.940057853363468e-06,
      "loss": 0.0491,
      "step": 341720
    },
    {
      "epoch": 0.5592650052696007,
      "grad_norm": 1.4115769863128662,
      "learning_rate": 8.939991961149952e-06,
      "loss": 0.0359,
      "step": 341740
    },
    {
      "epoch": 0.5592977357082539,
      "grad_norm": 1.7730863094329834,
      "learning_rate": 8.939926068936434e-06,
      "loss": 0.0436,
      "step": 341760
    },
    {
      "epoch": 0.5593304661469073,
      "grad_norm": 1.6588544845581055,
      "learning_rate": 8.939860176722917e-06,
      "loss": 0.0457,
      "step": 341780
    },
    {
      "epoch": 0.5593631965855607,
      "grad_norm": 1.0223448276519775,
      "learning_rate": 8.9397942845094e-06,
      "loss": 0.0291,
      "step": 341800
    },
    {
      "epoch": 0.5593959270242139,
      "grad_norm": 0.9564504027366638,
      "learning_rate": 8.939728392295883e-06,
      "loss": 0.0371,
      "step": 341820
    },
    {
      "epoch": 0.5594286574628673,
      "grad_norm": 0.8337931036949158,
      "learning_rate": 8.939662500082366e-06,
      "loss": 0.0355,
      "step": 341840
    },
    {
      "epoch": 0.5594613879015207,
      "grad_norm": 1.1095645427703857,
      "learning_rate": 8.939596607868848e-06,
      "loss": 0.0384,
      "step": 341860
    },
    {
      "epoch": 0.559494118340174,
      "grad_norm": 1.3955508470535278,
      "learning_rate": 8.939530715655332e-06,
      "loss": 0.0412,
      "step": 341880
    },
    {
      "epoch": 0.5595268487788273,
      "grad_norm": 0.31855255365371704,
      "learning_rate": 8.939464823441815e-06,
      "loss": 0.0361,
      "step": 341900
    },
    {
      "epoch": 0.5595595792174807,
      "grad_norm": 2.1983911991119385,
      "learning_rate": 8.939398931228297e-06,
      "loss": 0.0224,
      "step": 341920
    },
    {
      "epoch": 0.559592309656134,
      "grad_norm": 1.578474521636963,
      "learning_rate": 8.939333039014781e-06,
      "loss": 0.0317,
      "step": 341940
    },
    {
      "epoch": 0.5596250400947873,
      "grad_norm": 0.865045964717865,
      "learning_rate": 8.939267146801265e-06,
      "loss": 0.0443,
      "step": 341960
    },
    {
      "epoch": 0.5596577705334407,
      "grad_norm": 0.7323518395423889,
      "learning_rate": 8.939201254587746e-06,
      "loss": 0.0327,
      "step": 341980
    },
    {
      "epoch": 0.559690500972094,
      "grad_norm": 2.669591188430786,
      "learning_rate": 8.93913536237423e-06,
      "loss": 0.0313,
      "step": 342000
    },
    {
      "epoch": 0.5597232314107473,
      "grad_norm": 1.7609175443649292,
      "learning_rate": 8.939069470160712e-06,
      "loss": 0.0341,
      "step": 342020
    },
    {
      "epoch": 0.5597559618494007,
      "grad_norm": 1.2858500480651855,
      "learning_rate": 8.939003577947195e-06,
      "loss": 0.0334,
      "step": 342040
    },
    {
      "epoch": 0.5597886922880541,
      "grad_norm": 0.8464424014091492,
      "learning_rate": 8.938937685733677e-06,
      "loss": 0.0302,
      "step": 342060
    },
    {
      "epoch": 0.5598214227267074,
      "grad_norm": 0.5356599688529968,
      "learning_rate": 8.938871793520161e-06,
      "loss": 0.0291,
      "step": 342080
    },
    {
      "epoch": 0.5598541531653607,
      "grad_norm": 0.4092959463596344,
      "learning_rate": 8.938805901306643e-06,
      "loss": 0.0281,
      "step": 342100
    },
    {
      "epoch": 0.5598868836040141,
      "grad_norm": 1.1073732376098633,
      "learning_rate": 8.938740009093126e-06,
      "loss": 0.0322,
      "step": 342120
    },
    {
      "epoch": 0.5599196140426674,
      "grad_norm": 0.338660329580307,
      "learning_rate": 8.938674116879608e-06,
      "loss": 0.0415,
      "step": 342140
    },
    {
      "epoch": 0.5599523444813207,
      "grad_norm": 1.4148004055023193,
      "learning_rate": 8.938608224666092e-06,
      "loss": 0.0352,
      "step": 342160
    },
    {
      "epoch": 0.5599850749199741,
      "grad_norm": 0.787813663482666,
      "learning_rate": 8.938542332452574e-06,
      "loss": 0.0249,
      "step": 342180
    },
    {
      "epoch": 0.5600178053586274,
      "grad_norm": 4.793123245239258,
      "learning_rate": 8.938476440239057e-06,
      "loss": 0.0333,
      "step": 342200
    },
    {
      "epoch": 0.5600505357972807,
      "grad_norm": 1.0717004537582397,
      "learning_rate": 8.93841054802554e-06,
      "loss": 0.0281,
      "step": 342220
    },
    {
      "epoch": 0.5600832662359341,
      "grad_norm": 1.2373428344726562,
      "learning_rate": 8.938344655812023e-06,
      "loss": 0.0337,
      "step": 342240
    },
    {
      "epoch": 0.5601159966745874,
      "grad_norm": 0.637514054775238,
      "learning_rate": 8.938278763598506e-06,
      "loss": 0.0483,
      "step": 342260
    },
    {
      "epoch": 0.5601487271132408,
      "grad_norm": 2.476498603820801,
      "learning_rate": 8.938212871384988e-06,
      "loss": 0.0288,
      "step": 342280
    },
    {
      "epoch": 0.5601814575518941,
      "grad_norm": 2.37606143951416,
      "learning_rate": 8.938146979171472e-06,
      "loss": 0.0451,
      "step": 342300
    },
    {
      "epoch": 0.5602141879905475,
      "grad_norm": 1.9830403327941895,
      "learning_rate": 8.938081086957956e-06,
      "loss": 0.0363,
      "step": 342320
    },
    {
      "epoch": 0.5602469184292008,
      "grad_norm": 1.2481482028961182,
      "learning_rate": 8.938015194744437e-06,
      "loss": 0.0422,
      "step": 342340
    },
    {
      "epoch": 0.5602796488678541,
      "grad_norm": 1.510942816734314,
      "learning_rate": 8.937949302530921e-06,
      "loss": 0.0572,
      "step": 342360
    },
    {
      "epoch": 0.5603123793065075,
      "grad_norm": 1.1787489652633667,
      "learning_rate": 8.937883410317405e-06,
      "loss": 0.0346,
      "step": 342380
    },
    {
      "epoch": 0.5603451097451608,
      "grad_norm": 1.6778268814086914,
      "learning_rate": 8.937817518103886e-06,
      "loss": 0.0411,
      "step": 342400
    },
    {
      "epoch": 0.5603778401838141,
      "grad_norm": 0.5225347280502319,
      "learning_rate": 8.93775162589037e-06,
      "loss": 0.0396,
      "step": 342420
    },
    {
      "epoch": 0.5604105706224675,
      "grad_norm": 1.908006191253662,
      "learning_rate": 8.937685733676852e-06,
      "loss": 0.0339,
      "step": 342440
    },
    {
      "epoch": 0.5604433010611208,
      "grad_norm": 3.080876350402832,
      "learning_rate": 8.937619841463336e-06,
      "loss": 0.0341,
      "step": 342460
    },
    {
      "epoch": 0.5604760314997742,
      "grad_norm": 1.7621936798095703,
      "learning_rate": 8.937553949249817e-06,
      "loss": 0.0395,
      "step": 342480
    },
    {
      "epoch": 0.5605087619384275,
      "grad_norm": 0.9227372407913208,
      "learning_rate": 8.937488057036301e-06,
      "loss": 0.0421,
      "step": 342500
    },
    {
      "epoch": 0.5605414923770808,
      "grad_norm": 2.0250489711761475,
      "learning_rate": 8.937422164822783e-06,
      "loss": 0.0405,
      "step": 342520
    },
    {
      "epoch": 0.5605742228157342,
      "grad_norm": 1.7798031568527222,
      "learning_rate": 8.937356272609267e-06,
      "loss": 0.0412,
      "step": 342540
    },
    {
      "epoch": 0.5606069532543875,
      "grad_norm": 0.41106411814689636,
      "learning_rate": 8.937290380395748e-06,
      "loss": 0.0348,
      "step": 342560
    },
    {
      "epoch": 0.5606396836930408,
      "grad_norm": 0.8549059629440308,
      "learning_rate": 8.937224488182232e-06,
      "loss": 0.0329,
      "step": 342580
    },
    {
      "epoch": 0.5606724141316942,
      "grad_norm": 1.4611338376998901,
      "learning_rate": 8.937158595968714e-06,
      "loss": 0.0364,
      "step": 342600
    },
    {
      "epoch": 0.5607051445703475,
      "grad_norm": 6.390625476837158,
      "learning_rate": 8.937092703755197e-06,
      "loss": 0.0376,
      "step": 342620
    },
    {
      "epoch": 0.5607378750090009,
      "grad_norm": 2.4265494346618652,
      "learning_rate": 8.937026811541681e-06,
      "loss": 0.0281,
      "step": 342640
    },
    {
      "epoch": 0.5607706054476542,
      "grad_norm": 0.6082596182823181,
      "learning_rate": 8.936960919328163e-06,
      "loss": 0.0399,
      "step": 342660
    },
    {
      "epoch": 0.5608033358863076,
      "grad_norm": 4.9210028648376465,
      "learning_rate": 8.936895027114647e-06,
      "loss": 0.0214,
      "step": 342680
    },
    {
      "epoch": 0.5608360663249609,
      "grad_norm": 1.1676474809646606,
      "learning_rate": 8.93682913490113e-06,
      "loss": 0.0394,
      "step": 342700
    },
    {
      "epoch": 0.5608687967636142,
      "grad_norm": 2.15627121925354,
      "learning_rate": 8.936763242687612e-06,
      "loss": 0.0359,
      "step": 342720
    },
    {
      "epoch": 0.5609015272022676,
      "grad_norm": 2.841458559036255,
      "learning_rate": 8.936697350474096e-06,
      "loss": 0.0452,
      "step": 342740
    },
    {
      "epoch": 0.5609342576409209,
      "grad_norm": 1.3262486457824707,
      "learning_rate": 8.93663145826058e-06,
      "loss": 0.0344,
      "step": 342760
    },
    {
      "epoch": 0.5609669880795742,
      "grad_norm": 0.8909768462181091,
      "learning_rate": 8.936565566047061e-06,
      "loss": 0.0502,
      "step": 342780
    },
    {
      "epoch": 0.5609997185182276,
      "grad_norm": 0.19700300693511963,
      "learning_rate": 8.936499673833545e-06,
      "loss": 0.0428,
      "step": 342800
    },
    {
      "epoch": 0.5610324489568809,
      "grad_norm": 0.9006645083427429,
      "learning_rate": 8.936433781620027e-06,
      "loss": 0.0428,
      "step": 342820
    },
    {
      "epoch": 0.5610651793955342,
      "grad_norm": 1.1342206001281738,
      "learning_rate": 8.93636788940651e-06,
      "loss": 0.0323,
      "step": 342840
    },
    {
      "epoch": 0.5610979098341876,
      "grad_norm": 0.6244850158691406,
      "learning_rate": 8.936301997192992e-06,
      "loss": 0.0154,
      "step": 342860
    },
    {
      "epoch": 0.561130640272841,
      "grad_norm": 0.4072920083999634,
      "learning_rate": 8.936236104979476e-06,
      "loss": 0.0486,
      "step": 342880
    },
    {
      "epoch": 0.5611633707114942,
      "grad_norm": 0.6175569891929626,
      "learning_rate": 8.936170212765958e-06,
      "loss": 0.0478,
      "step": 342900
    },
    {
      "epoch": 0.5611961011501476,
      "grad_norm": 1.5490436553955078,
      "learning_rate": 8.936104320552441e-06,
      "loss": 0.0368,
      "step": 342920
    },
    {
      "epoch": 0.561228831588801,
      "grad_norm": 1.6017485857009888,
      "learning_rate": 8.936038428338923e-06,
      "loss": 0.0358,
      "step": 342940
    },
    {
      "epoch": 0.5612615620274543,
      "grad_norm": 1.7993851900100708,
      "learning_rate": 8.935972536125407e-06,
      "loss": 0.0523,
      "step": 342960
    },
    {
      "epoch": 0.5612942924661076,
      "grad_norm": 0.5249359607696533,
      "learning_rate": 8.935906643911888e-06,
      "loss": 0.037,
      "step": 342980
    },
    {
      "epoch": 0.561327022904761,
      "grad_norm": 3.75167179107666,
      "learning_rate": 8.935840751698372e-06,
      "loss": 0.0409,
      "step": 343000
    },
    {
      "epoch": 0.5613597533434143,
      "grad_norm": 1.1657828092575073,
      "learning_rate": 8.935774859484856e-06,
      "loss": 0.0403,
      "step": 343020
    },
    {
      "epoch": 0.5613924837820676,
      "grad_norm": 2.0060198307037354,
      "learning_rate": 8.935708967271338e-06,
      "loss": 0.04,
      "step": 343040
    },
    {
      "epoch": 0.561425214220721,
      "grad_norm": 1.1771568059921265,
      "learning_rate": 8.935643075057821e-06,
      "loss": 0.0321,
      "step": 343060
    },
    {
      "epoch": 0.5614579446593744,
      "grad_norm": 0.6303865909576416,
      "learning_rate": 8.935577182844303e-06,
      "loss": 0.0352,
      "step": 343080
    },
    {
      "epoch": 0.5614906750980276,
      "grad_norm": 0.6328374147415161,
      "learning_rate": 8.935511290630787e-06,
      "loss": 0.0288,
      "step": 343100
    },
    {
      "epoch": 0.561523405536681,
      "grad_norm": 1.1808584928512573,
      "learning_rate": 8.93544539841727e-06,
      "loss": 0.0319,
      "step": 343120
    },
    {
      "epoch": 0.5615561359753344,
      "grad_norm": 2.0465359687805176,
      "learning_rate": 8.935379506203752e-06,
      "loss": 0.0491,
      "step": 343140
    },
    {
      "epoch": 0.5615888664139876,
      "grad_norm": 0.9014163613319397,
      "learning_rate": 8.935313613990236e-06,
      "loss": 0.0327,
      "step": 343160
    },
    {
      "epoch": 0.561621596852641,
      "grad_norm": 1.1784058809280396,
      "learning_rate": 8.93524772177672e-06,
      "loss": 0.024,
      "step": 343180
    },
    {
      "epoch": 0.5616543272912944,
      "grad_norm": 1.1610758304595947,
      "learning_rate": 8.935181829563201e-06,
      "loss": 0.0335,
      "step": 343200
    },
    {
      "epoch": 0.5616870577299476,
      "grad_norm": 1.0470833778381348,
      "learning_rate": 8.935115937349685e-06,
      "loss": 0.0377,
      "step": 343220
    },
    {
      "epoch": 0.561719788168601,
      "grad_norm": 0.36640316247940063,
      "learning_rate": 8.935050045136167e-06,
      "loss": 0.0359,
      "step": 343240
    },
    {
      "epoch": 0.5617525186072544,
      "grad_norm": 2.189483165740967,
      "learning_rate": 8.93498415292265e-06,
      "loss": 0.0246,
      "step": 343260
    },
    {
      "epoch": 0.5617852490459078,
      "grad_norm": 0.7002224922180176,
      "learning_rate": 8.934918260709132e-06,
      "loss": 0.0384,
      "step": 343280
    },
    {
      "epoch": 0.561817979484561,
      "grad_norm": 1.1057932376861572,
      "learning_rate": 8.934852368495616e-06,
      "loss": 0.0318,
      "step": 343300
    },
    {
      "epoch": 0.5618507099232144,
      "grad_norm": 1.572005271911621,
      "learning_rate": 8.934786476282098e-06,
      "loss": 0.0318,
      "step": 343320
    },
    {
      "epoch": 0.5618834403618678,
      "grad_norm": 3.3839218616485596,
      "learning_rate": 8.934720584068581e-06,
      "loss": 0.0375,
      "step": 343340
    },
    {
      "epoch": 0.561916170800521,
      "grad_norm": 1.1515032052993774,
      "learning_rate": 8.934654691855065e-06,
      "loss": 0.0412,
      "step": 343360
    },
    {
      "epoch": 0.5619489012391744,
      "grad_norm": 0.41582998633384705,
      "learning_rate": 8.934588799641547e-06,
      "loss": 0.0316,
      "step": 343380
    },
    {
      "epoch": 0.5619816316778278,
      "grad_norm": 3.2644474506378174,
      "learning_rate": 8.93452290742803e-06,
      "loss": 0.0401,
      "step": 343400
    },
    {
      "epoch": 0.562014362116481,
      "grad_norm": 0.6346616744995117,
      "learning_rate": 8.934457015214512e-06,
      "loss": 0.0318,
      "step": 343420
    },
    {
      "epoch": 0.5620470925551344,
      "grad_norm": 1.2726119756698608,
      "learning_rate": 8.934391123000996e-06,
      "loss": 0.0331,
      "step": 343440
    },
    {
      "epoch": 0.5620798229937878,
      "grad_norm": 1.1007020473480225,
      "learning_rate": 8.934325230787478e-06,
      "loss": 0.0298,
      "step": 343460
    },
    {
      "epoch": 0.5621125534324412,
      "grad_norm": 1.1230977773666382,
      "learning_rate": 8.934259338573961e-06,
      "loss": 0.031,
      "step": 343480
    },
    {
      "epoch": 0.5621452838710944,
      "grad_norm": 2.75834321975708,
      "learning_rate": 8.934193446360445e-06,
      "loss": 0.0353,
      "step": 343500
    },
    {
      "epoch": 0.5621780143097478,
      "grad_norm": 1.160509467124939,
      "learning_rate": 8.934127554146927e-06,
      "loss": 0.0458,
      "step": 343520
    },
    {
      "epoch": 0.5622107447484012,
      "grad_norm": 2.9269633293151855,
      "learning_rate": 8.93406166193341e-06,
      "loss": 0.0422,
      "step": 343540
    },
    {
      "epoch": 0.5622434751870544,
      "grad_norm": 1.338107943534851,
      "learning_rate": 8.933995769719894e-06,
      "loss": 0.0343,
      "step": 343560
    },
    {
      "epoch": 0.5622762056257078,
      "grad_norm": 1.7623977661132812,
      "learning_rate": 8.933929877506376e-06,
      "loss": 0.0327,
      "step": 343580
    },
    {
      "epoch": 0.5623089360643612,
      "grad_norm": 2.8997716903686523,
      "learning_rate": 8.93386398529286e-06,
      "loss": 0.0384,
      "step": 343600
    },
    {
      "epoch": 0.5623416665030144,
      "grad_norm": 4.20888090133667,
      "learning_rate": 8.933798093079341e-06,
      "loss": 0.0477,
      "step": 343620
    },
    {
      "epoch": 0.5623743969416678,
      "grad_norm": 0.8620542883872986,
      "learning_rate": 8.933732200865825e-06,
      "loss": 0.0432,
      "step": 343640
    },
    {
      "epoch": 0.5624071273803212,
      "grad_norm": 0.5303148627281189,
      "learning_rate": 8.933666308652307e-06,
      "loss": 0.0469,
      "step": 343660
    },
    {
      "epoch": 0.5624398578189745,
      "grad_norm": 2.11873197555542,
      "learning_rate": 8.93360041643879e-06,
      "loss": 0.0359,
      "step": 343680
    },
    {
      "epoch": 0.5624725882576278,
      "grad_norm": 0.5618978142738342,
      "learning_rate": 8.933534524225274e-06,
      "loss": 0.0386,
      "step": 343700
    },
    {
      "epoch": 0.5625053186962812,
      "grad_norm": 0.9418631196022034,
      "learning_rate": 8.933468632011756e-06,
      "loss": 0.0408,
      "step": 343720
    },
    {
      "epoch": 0.5625380491349345,
      "grad_norm": 1.8631283044815063,
      "learning_rate": 8.93340273979824e-06,
      "loss": 0.0266,
      "step": 343740
    },
    {
      "epoch": 0.5625707795735878,
      "grad_norm": 1.4868197441101074,
      "learning_rate": 8.933336847584721e-06,
      "loss": 0.0263,
      "step": 343760
    },
    {
      "epoch": 0.5626035100122412,
      "grad_norm": 1.4546518325805664,
      "learning_rate": 8.933270955371205e-06,
      "loss": 0.0347,
      "step": 343780
    },
    {
      "epoch": 0.5626362404508946,
      "grad_norm": 1.7355458736419678,
      "learning_rate": 8.933205063157687e-06,
      "loss": 0.0301,
      "step": 343800
    },
    {
      "epoch": 0.5626689708895478,
      "grad_norm": 0.1781732738018036,
      "learning_rate": 8.93313917094417e-06,
      "loss": 0.0478,
      "step": 343820
    },
    {
      "epoch": 0.5627017013282012,
      "grad_norm": 1.3077744245529175,
      "learning_rate": 8.933073278730652e-06,
      "loss": 0.0283,
      "step": 343840
    },
    {
      "epoch": 0.5627344317668546,
      "grad_norm": 1.6542011499404907,
      "learning_rate": 8.933007386517136e-06,
      "loss": 0.03,
      "step": 343860
    },
    {
      "epoch": 0.5627671622055079,
      "grad_norm": 1.8808834552764893,
      "learning_rate": 8.93294149430362e-06,
      "loss": 0.0441,
      "step": 343880
    },
    {
      "epoch": 0.5627998926441612,
      "grad_norm": 0.9236618280410767,
      "learning_rate": 8.932875602090101e-06,
      "loss": 0.0215,
      "step": 343900
    },
    {
      "epoch": 0.5628326230828146,
      "grad_norm": 1.7422224283218384,
      "learning_rate": 8.932809709876585e-06,
      "loss": 0.0416,
      "step": 343920
    },
    {
      "epoch": 0.5628653535214679,
      "grad_norm": 0.17623896896839142,
      "learning_rate": 8.932743817663068e-06,
      "loss": 0.0398,
      "step": 343940
    },
    {
      "epoch": 0.5628980839601212,
      "grad_norm": 2.812659978866577,
      "learning_rate": 8.93267792544955e-06,
      "loss": 0.035,
      "step": 343960
    },
    {
      "epoch": 0.5629308143987746,
      "grad_norm": 0.29581418633461,
      "learning_rate": 8.932612033236034e-06,
      "loss": 0.0461,
      "step": 343980
    },
    {
      "epoch": 0.5629635448374279,
      "grad_norm": 0.46556389331817627,
      "learning_rate": 8.932546141022516e-06,
      "loss": 0.0385,
      "step": 344000
    },
    {
      "epoch": 0.5629962752760812,
      "grad_norm": 3.1195664405822754,
      "learning_rate": 8.932480248809e-06,
      "loss": 0.0331,
      "step": 344020
    },
    {
      "epoch": 0.5630290057147346,
      "grad_norm": 1.4447494745254517,
      "learning_rate": 8.932414356595483e-06,
      "loss": 0.0369,
      "step": 344040
    },
    {
      "epoch": 0.563061736153388,
      "grad_norm": 2.146732807159424,
      "learning_rate": 8.932348464381965e-06,
      "loss": 0.0284,
      "step": 344060
    },
    {
      "epoch": 0.5630944665920413,
      "grad_norm": 0.9284661412239075,
      "learning_rate": 8.932282572168448e-06,
      "loss": 0.0527,
      "step": 344080
    },
    {
      "epoch": 0.5631271970306946,
      "grad_norm": 1.9642601013183594,
      "learning_rate": 8.93221667995493e-06,
      "loss": 0.0349,
      "step": 344100
    },
    {
      "epoch": 0.563159927469348,
      "grad_norm": 0.9622101783752441,
      "learning_rate": 8.932150787741414e-06,
      "loss": 0.0317,
      "step": 344120
    },
    {
      "epoch": 0.5631926579080013,
      "grad_norm": 1.065152883529663,
      "learning_rate": 8.932084895527896e-06,
      "loss": 0.0277,
      "step": 344140
    },
    {
      "epoch": 0.5632253883466546,
      "grad_norm": 0.8475267291069031,
      "learning_rate": 8.93201900331438e-06,
      "loss": 0.0314,
      "step": 344160
    },
    {
      "epoch": 0.563258118785308,
      "grad_norm": 1.6826187372207642,
      "learning_rate": 8.931953111100861e-06,
      "loss": 0.0364,
      "step": 344180
    },
    {
      "epoch": 0.5632908492239613,
      "grad_norm": 1.4471861124038696,
      "learning_rate": 8.931887218887345e-06,
      "loss": 0.0406,
      "step": 344200
    },
    {
      "epoch": 0.5633235796626146,
      "grad_norm": 0.21481665968894958,
      "learning_rate": 8.931821326673827e-06,
      "loss": 0.0304,
      "step": 344220
    },
    {
      "epoch": 0.563356310101268,
      "grad_norm": 1.5825260877609253,
      "learning_rate": 8.93175543446031e-06,
      "loss": 0.0533,
      "step": 344240
    },
    {
      "epoch": 0.5633890405399213,
      "grad_norm": 0.30052247643470764,
      "learning_rate": 8.931689542246792e-06,
      "loss": 0.0382,
      "step": 344260
    },
    {
      "epoch": 0.5634217709785747,
      "grad_norm": 1.0371959209442139,
      "learning_rate": 8.931623650033276e-06,
      "loss": 0.0337,
      "step": 344280
    },
    {
      "epoch": 0.563454501417228,
      "grad_norm": 1.6593152284622192,
      "learning_rate": 8.93155775781976e-06,
      "loss": 0.0386,
      "step": 344300
    },
    {
      "epoch": 0.5634872318558813,
      "grad_norm": 1.2839080095291138,
      "learning_rate": 8.931491865606241e-06,
      "loss": 0.0321,
      "step": 344320
    },
    {
      "epoch": 0.5635199622945347,
      "grad_norm": 1.2484322786331177,
      "learning_rate": 8.931425973392725e-06,
      "loss": 0.0512,
      "step": 344340
    },
    {
      "epoch": 0.563552692733188,
      "grad_norm": 1.2996184825897217,
      "learning_rate": 8.931360081179209e-06,
      "loss": 0.0387,
      "step": 344360
    },
    {
      "epoch": 0.5635854231718413,
      "grad_norm": 1.4432438611984253,
      "learning_rate": 8.93129418896569e-06,
      "loss": 0.0453,
      "step": 344380
    },
    {
      "epoch": 0.5636181536104947,
      "grad_norm": 0.25127914547920227,
      "learning_rate": 8.931228296752174e-06,
      "loss": 0.0278,
      "step": 344400
    },
    {
      "epoch": 0.563650884049148,
      "grad_norm": 2.4918649196624756,
      "learning_rate": 8.931162404538658e-06,
      "loss": 0.0454,
      "step": 344420
    },
    {
      "epoch": 0.5636836144878014,
      "grad_norm": 3.1850311756134033,
      "learning_rate": 8.93109651232514e-06,
      "loss": 0.0324,
      "step": 344440
    },
    {
      "epoch": 0.5637163449264547,
      "grad_norm": 0.668979823589325,
      "learning_rate": 8.931030620111623e-06,
      "loss": 0.0348,
      "step": 344460
    },
    {
      "epoch": 0.563749075365108,
      "grad_norm": 1.3154139518737793,
      "learning_rate": 8.930964727898105e-06,
      "loss": 0.0572,
      "step": 344480
    },
    {
      "epoch": 0.5637818058037614,
      "grad_norm": 0.4131883978843689,
      "learning_rate": 8.930898835684589e-06,
      "loss": 0.0414,
      "step": 344500
    },
    {
      "epoch": 0.5638145362424147,
      "grad_norm": 0.4155324399471283,
      "learning_rate": 8.93083294347107e-06,
      "loss": 0.0403,
      "step": 344520
    },
    {
      "epoch": 0.5638472666810681,
      "grad_norm": 0.4149110019207001,
      "learning_rate": 8.930767051257554e-06,
      "loss": 0.031,
      "step": 344540
    },
    {
      "epoch": 0.5638799971197214,
      "grad_norm": 0.8282859921455383,
      "learning_rate": 8.930701159044036e-06,
      "loss": 0.0366,
      "step": 344560
    },
    {
      "epoch": 0.5639127275583747,
      "grad_norm": 2.355835199356079,
      "learning_rate": 8.93063526683052e-06,
      "loss": 0.0366,
      "step": 344580
    },
    {
      "epoch": 0.5639454579970281,
      "grad_norm": 2.424159526824951,
      "learning_rate": 8.930569374617001e-06,
      "loss": 0.043,
      "step": 344600
    },
    {
      "epoch": 0.5639781884356814,
      "grad_norm": 0.873989999294281,
      "learning_rate": 8.930503482403485e-06,
      "loss": 0.0431,
      "step": 344620
    },
    {
      "epoch": 0.5640109188743347,
      "grad_norm": 1.856889009475708,
      "learning_rate": 8.930437590189967e-06,
      "loss": 0.0317,
      "step": 344640
    },
    {
      "epoch": 0.5640436493129881,
      "grad_norm": 2.6267333030700684,
      "learning_rate": 8.93037169797645e-06,
      "loss": 0.033,
      "step": 344660
    },
    {
      "epoch": 0.5640763797516414,
      "grad_norm": 0.4206046164035797,
      "learning_rate": 8.930305805762934e-06,
      "loss": 0.029,
      "step": 344680
    },
    {
      "epoch": 0.5641091101902947,
      "grad_norm": 0.5560159087181091,
      "learning_rate": 8.930239913549416e-06,
      "loss": 0.0358,
      "step": 344700
    },
    {
      "epoch": 0.5641418406289481,
      "grad_norm": 1.2257373332977295,
      "learning_rate": 8.9301740213359e-06,
      "loss": 0.0354,
      "step": 344720
    },
    {
      "epoch": 0.5641745710676015,
      "grad_norm": 1.3758196830749512,
      "learning_rate": 8.930108129122383e-06,
      "loss": 0.0404,
      "step": 344740
    },
    {
      "epoch": 0.5642073015062548,
      "grad_norm": 0.5258913636207581,
      "learning_rate": 8.930042236908865e-06,
      "loss": 0.0485,
      "step": 344760
    },
    {
      "epoch": 0.5642400319449081,
      "grad_norm": 1.5810339450836182,
      "learning_rate": 8.929976344695349e-06,
      "loss": 0.0321,
      "step": 344780
    },
    {
      "epoch": 0.5642727623835615,
      "grad_norm": 0.6072449088096619,
      "learning_rate": 8.929910452481832e-06,
      "loss": 0.0303,
      "step": 344800
    },
    {
      "epoch": 0.5643054928222148,
      "grad_norm": 0.40443935990333557,
      "learning_rate": 8.929844560268314e-06,
      "loss": 0.0274,
      "step": 344820
    },
    {
      "epoch": 0.5643382232608681,
      "grad_norm": 0.7252234220504761,
      "learning_rate": 8.929778668054798e-06,
      "loss": 0.0304,
      "step": 344840
    },
    {
      "epoch": 0.5643709536995215,
      "grad_norm": 1.222843885421753,
      "learning_rate": 8.92971277584128e-06,
      "loss": 0.0494,
      "step": 344860
    },
    {
      "epoch": 0.5644036841381748,
      "grad_norm": 0.4626460373401642,
      "learning_rate": 8.929646883627763e-06,
      "loss": 0.0363,
      "step": 344880
    },
    {
      "epoch": 0.5644364145768281,
      "grad_norm": 1.7762465476989746,
      "learning_rate": 8.929580991414245e-06,
      "loss": 0.0298,
      "step": 344900
    },
    {
      "epoch": 0.5644691450154815,
      "grad_norm": 0.9160351753234863,
      "learning_rate": 8.929515099200729e-06,
      "loss": 0.0331,
      "step": 344920
    },
    {
      "epoch": 0.5645018754541349,
      "grad_norm": 1.0786701440811157,
      "learning_rate": 8.92944920698721e-06,
      "loss": 0.032,
      "step": 344940
    },
    {
      "epoch": 0.5645346058927881,
      "grad_norm": 1.470880389213562,
      "learning_rate": 8.929383314773694e-06,
      "loss": 0.0438,
      "step": 344960
    },
    {
      "epoch": 0.5645673363314415,
      "grad_norm": 0.9093220233917236,
      "learning_rate": 8.929317422560176e-06,
      "loss": 0.0351,
      "step": 344980
    },
    {
      "epoch": 0.5646000667700949,
      "grad_norm": 0.3678516149520874,
      "learning_rate": 8.92925153034666e-06,
      "loss": 0.0439,
      "step": 345000
    },
    {
      "epoch": 0.5646327972087481,
      "grad_norm": 1.8249917030334473,
      "learning_rate": 8.929185638133141e-06,
      "loss": 0.0431,
      "step": 345020
    },
    {
      "epoch": 0.5646655276474015,
      "grad_norm": 0.9593760371208191,
      "learning_rate": 8.929119745919625e-06,
      "loss": 0.0422,
      "step": 345040
    },
    {
      "epoch": 0.5646982580860549,
      "grad_norm": 9.825118064880371,
      "learning_rate": 8.929053853706107e-06,
      "loss": 0.0373,
      "step": 345060
    },
    {
      "epoch": 0.5647309885247082,
      "grad_norm": 1.4692366123199463,
      "learning_rate": 8.92898796149259e-06,
      "loss": 0.0332,
      "step": 345080
    },
    {
      "epoch": 0.5647637189633615,
      "grad_norm": 0.7394764423370361,
      "learning_rate": 8.928922069279074e-06,
      "loss": 0.0356,
      "step": 345100
    },
    {
      "epoch": 0.5647964494020149,
      "grad_norm": 0.8014992475509644,
      "learning_rate": 8.928856177065556e-06,
      "loss": 0.0243,
      "step": 345120
    },
    {
      "epoch": 0.5648291798406683,
      "grad_norm": 1.0154224634170532,
      "learning_rate": 8.92879028485204e-06,
      "loss": 0.0304,
      "step": 345140
    },
    {
      "epoch": 0.5648619102793215,
      "grad_norm": 0.9551942348480225,
      "learning_rate": 8.928724392638523e-06,
      "loss": 0.0248,
      "step": 345160
    },
    {
      "epoch": 0.5648946407179749,
      "grad_norm": 1.5178273916244507,
      "learning_rate": 8.928658500425005e-06,
      "loss": 0.0438,
      "step": 345180
    },
    {
      "epoch": 0.5649273711566283,
      "grad_norm": 1.129818320274353,
      "learning_rate": 8.928592608211489e-06,
      "loss": 0.0328,
      "step": 345200
    },
    {
      "epoch": 0.5649601015952815,
      "grad_norm": 1.1479815244674683,
      "learning_rate": 8.928526715997972e-06,
      "loss": 0.0395,
      "step": 345220
    },
    {
      "epoch": 0.5649928320339349,
      "grad_norm": 1.5753538608551025,
      "learning_rate": 8.928460823784454e-06,
      "loss": 0.0494,
      "step": 345240
    },
    {
      "epoch": 0.5650255624725883,
      "grad_norm": 1.1149859428405762,
      "learning_rate": 8.928394931570938e-06,
      "loss": 0.0427,
      "step": 345260
    },
    {
      "epoch": 0.5650582929112415,
      "grad_norm": 1.1896016597747803,
      "learning_rate": 8.92832903935742e-06,
      "loss": 0.0367,
      "step": 345280
    },
    {
      "epoch": 0.5650910233498949,
      "grad_norm": 1.1812207698822021,
      "learning_rate": 8.928263147143903e-06,
      "loss": 0.0368,
      "step": 345300
    },
    {
      "epoch": 0.5651237537885483,
      "grad_norm": 0.49047523736953735,
      "learning_rate": 8.928197254930385e-06,
      "loss": 0.0428,
      "step": 345320
    },
    {
      "epoch": 0.5651564842272017,
      "grad_norm": 1.541605830192566,
      "learning_rate": 8.928131362716869e-06,
      "loss": 0.0356,
      "step": 345340
    },
    {
      "epoch": 0.5651892146658549,
      "grad_norm": 1.9890679121017456,
      "learning_rate": 8.92806547050335e-06,
      "loss": 0.0525,
      "step": 345360
    },
    {
      "epoch": 0.5652219451045083,
      "grad_norm": 1.4657924175262451,
      "learning_rate": 8.927999578289834e-06,
      "loss": 0.0415,
      "step": 345380
    },
    {
      "epoch": 0.5652546755431617,
      "grad_norm": 0.38690951466560364,
      "learning_rate": 8.927933686076316e-06,
      "loss": 0.0355,
      "step": 345400
    },
    {
      "epoch": 0.5652874059818149,
      "grad_norm": 0.6709775328636169,
      "learning_rate": 8.9278677938628e-06,
      "loss": 0.027,
      "step": 345420
    },
    {
      "epoch": 0.5653201364204683,
      "grad_norm": 1.2272323369979858,
      "learning_rate": 8.927801901649282e-06,
      "loss": 0.0478,
      "step": 345440
    },
    {
      "epoch": 0.5653528668591217,
      "grad_norm": 2.3549468517303467,
      "learning_rate": 8.927736009435765e-06,
      "loss": 0.0273,
      "step": 345460
    },
    {
      "epoch": 0.5653855972977749,
      "grad_norm": 1.9706735610961914,
      "learning_rate": 8.927670117222249e-06,
      "loss": 0.0333,
      "step": 345480
    },
    {
      "epoch": 0.5654183277364283,
      "grad_norm": 1.9897795915603638,
      "learning_rate": 8.92760422500873e-06,
      "loss": 0.0368,
      "step": 345500
    },
    {
      "epoch": 0.5654510581750817,
      "grad_norm": 1.5015052556991577,
      "learning_rate": 8.927538332795214e-06,
      "loss": 0.0362,
      "step": 345520
    },
    {
      "epoch": 0.565483788613735,
      "grad_norm": 1.4723703861236572,
      "learning_rate": 8.927472440581698e-06,
      "loss": 0.0427,
      "step": 345540
    },
    {
      "epoch": 0.5655165190523883,
      "grad_norm": 1.910461187362671,
      "learning_rate": 8.92740654836818e-06,
      "loss": 0.0411,
      "step": 345560
    },
    {
      "epoch": 0.5655492494910417,
      "grad_norm": 0.9673514366149902,
      "learning_rate": 8.927340656154663e-06,
      "loss": 0.0345,
      "step": 345580
    },
    {
      "epoch": 0.5655819799296951,
      "grad_norm": 1.2110977172851562,
      "learning_rate": 8.927274763941147e-06,
      "loss": 0.0405,
      "step": 345600
    },
    {
      "epoch": 0.5656147103683483,
      "grad_norm": 0.6252190470695496,
      "learning_rate": 8.927208871727629e-06,
      "loss": 0.0407,
      "step": 345620
    },
    {
      "epoch": 0.5656474408070017,
      "grad_norm": 1.0194811820983887,
      "learning_rate": 8.927142979514112e-06,
      "loss": 0.0323,
      "step": 345640
    },
    {
      "epoch": 0.5656801712456551,
      "grad_norm": 1.414641261100769,
      "learning_rate": 8.927077087300594e-06,
      "loss": 0.0476,
      "step": 345660
    },
    {
      "epoch": 0.5657129016843083,
      "grad_norm": 1.3647751808166504,
      "learning_rate": 8.927011195087078e-06,
      "loss": 0.039,
      "step": 345680
    },
    {
      "epoch": 0.5657456321229617,
      "grad_norm": 1.4341425895690918,
      "learning_rate": 8.92694530287356e-06,
      "loss": 0.0358,
      "step": 345700
    },
    {
      "epoch": 0.5657783625616151,
      "grad_norm": 0.3014814853668213,
      "learning_rate": 8.926879410660043e-06,
      "loss": 0.0407,
      "step": 345720
    },
    {
      "epoch": 0.5658110930002684,
      "grad_norm": 1.435469150543213,
      "learning_rate": 8.926813518446525e-06,
      "loss": 0.0321,
      "step": 345740
    },
    {
      "epoch": 0.5658438234389217,
      "grad_norm": 0.2846817970275879,
      "learning_rate": 8.926747626233009e-06,
      "loss": 0.0303,
      "step": 345760
    },
    {
      "epoch": 0.5658765538775751,
      "grad_norm": 5.679852485656738,
      "learning_rate": 8.92668173401949e-06,
      "loss": 0.0448,
      "step": 345780
    },
    {
      "epoch": 0.5659092843162284,
      "grad_norm": 0.3239680230617523,
      "learning_rate": 8.926615841805974e-06,
      "loss": 0.0337,
      "step": 345800
    },
    {
      "epoch": 0.5659420147548817,
      "grad_norm": 0.7487832903862,
      "learning_rate": 8.926549949592458e-06,
      "loss": 0.0316,
      "step": 345820
    },
    {
      "epoch": 0.5659747451935351,
      "grad_norm": 0.7066607475280762,
      "learning_rate": 8.92648405737894e-06,
      "loss": 0.0339,
      "step": 345840
    },
    {
      "epoch": 0.5660074756321885,
      "grad_norm": 2.5660054683685303,
      "learning_rate": 8.926418165165423e-06,
      "loss": 0.0504,
      "step": 345860
    },
    {
      "epoch": 0.5660402060708417,
      "grad_norm": 0.6178694367408752,
      "learning_rate": 8.926352272951905e-06,
      "loss": 0.0357,
      "step": 345880
    },
    {
      "epoch": 0.5660729365094951,
      "grad_norm": 7.560938835144043,
      "learning_rate": 8.926286380738389e-06,
      "loss": 0.0349,
      "step": 345900
    },
    {
      "epoch": 0.5661056669481485,
      "grad_norm": 3.2067275047302246,
      "learning_rate": 8.92622048852487e-06,
      "loss": 0.043,
      "step": 345920
    },
    {
      "epoch": 0.5661383973868018,
      "grad_norm": 2.615894079208374,
      "learning_rate": 8.926154596311354e-06,
      "loss": 0.0375,
      "step": 345940
    },
    {
      "epoch": 0.5661711278254551,
      "grad_norm": 2.7919585704803467,
      "learning_rate": 8.926088704097838e-06,
      "loss": 0.0503,
      "step": 345960
    },
    {
      "epoch": 0.5662038582641085,
      "grad_norm": 0.8485460877418518,
      "learning_rate": 8.926022811884321e-06,
      "loss": 0.0269,
      "step": 345980
    },
    {
      "epoch": 0.5662365887027618,
      "grad_norm": 1.8197264671325684,
      "learning_rate": 8.925956919670803e-06,
      "loss": 0.0425,
      "step": 346000
    },
    {
      "epoch": 0.5662693191414151,
      "grad_norm": 0.5943450927734375,
      "learning_rate": 8.925891027457287e-06,
      "loss": 0.05,
      "step": 346020
    },
    {
      "epoch": 0.5663020495800685,
      "grad_norm": 1.7125720977783203,
      "learning_rate": 8.925825135243769e-06,
      "loss": 0.0419,
      "step": 346040
    },
    {
      "epoch": 0.5663347800187218,
      "grad_norm": 1.1301997900009155,
      "learning_rate": 8.925759243030252e-06,
      "loss": 0.0251,
      "step": 346060
    },
    {
      "epoch": 0.5663675104573751,
      "grad_norm": 0.41899964213371277,
      "learning_rate": 8.925693350816734e-06,
      "loss": 0.0311,
      "step": 346080
    },
    {
      "epoch": 0.5664002408960285,
      "grad_norm": 3.8402748107910156,
      "learning_rate": 8.925627458603218e-06,
      "loss": 0.0407,
      "step": 346100
    },
    {
      "epoch": 0.5664329713346818,
      "grad_norm": 0.7873921394348145,
      "learning_rate": 8.9255615663897e-06,
      "loss": 0.0366,
      "step": 346120
    },
    {
      "epoch": 0.5664657017733352,
      "grad_norm": 1.2714110612869263,
      "learning_rate": 8.925495674176183e-06,
      "loss": 0.0403,
      "step": 346140
    },
    {
      "epoch": 0.5664984322119885,
      "grad_norm": 0.9797111749649048,
      "learning_rate": 8.925429781962667e-06,
      "loss": 0.0353,
      "step": 346160
    },
    {
      "epoch": 0.5665311626506419,
      "grad_norm": 1.1179267168045044,
      "learning_rate": 8.925363889749149e-06,
      "loss": 0.0392,
      "step": 346180
    },
    {
      "epoch": 0.5665638930892952,
      "grad_norm": 1.5885462760925293,
      "learning_rate": 8.925297997535632e-06,
      "loss": 0.0343,
      "step": 346200
    },
    {
      "epoch": 0.5665966235279485,
      "grad_norm": 0.8648990392684937,
      "learning_rate": 8.925232105322114e-06,
      "loss": 0.0327,
      "step": 346220
    },
    {
      "epoch": 0.5666293539666019,
      "grad_norm": 0.4568229019641876,
      "learning_rate": 8.925166213108598e-06,
      "loss": 0.033,
      "step": 346240
    },
    {
      "epoch": 0.5666620844052552,
      "grad_norm": 0.633272647857666,
      "learning_rate": 8.92510032089508e-06,
      "loss": 0.0431,
      "step": 346260
    },
    {
      "epoch": 0.5666948148439085,
      "grad_norm": 0.39252427220344543,
      "learning_rate": 8.925034428681563e-06,
      "loss": 0.0365,
      "step": 346280
    },
    {
      "epoch": 0.5667275452825619,
      "grad_norm": 1.0339009761810303,
      "learning_rate": 8.924968536468045e-06,
      "loss": 0.0241,
      "step": 346300
    },
    {
      "epoch": 0.5667602757212152,
      "grad_norm": 3.5159029960632324,
      "learning_rate": 8.924902644254529e-06,
      "loss": 0.054,
      "step": 346320
    },
    {
      "epoch": 0.5667930061598686,
      "grad_norm": 0.08956450968980789,
      "learning_rate": 8.924836752041012e-06,
      "loss": 0.0534,
      "step": 346340
    },
    {
      "epoch": 0.5668257365985219,
      "grad_norm": 1.939309000968933,
      "learning_rate": 8.924770859827494e-06,
      "loss": 0.0407,
      "step": 346360
    },
    {
      "epoch": 0.5668584670371752,
      "grad_norm": 2.9667506217956543,
      "learning_rate": 8.924704967613978e-06,
      "loss": 0.0409,
      "step": 346380
    },
    {
      "epoch": 0.5668911974758286,
      "grad_norm": 0.3752081096172333,
      "learning_rate": 8.924639075400462e-06,
      "loss": 0.0406,
      "step": 346400
    },
    {
      "epoch": 0.5669239279144819,
      "grad_norm": 1.9815349578857422,
      "learning_rate": 8.924573183186943e-06,
      "loss": 0.0461,
      "step": 346420
    },
    {
      "epoch": 0.5669566583531352,
      "grad_norm": 0.5512640476226807,
      "learning_rate": 8.924507290973427e-06,
      "loss": 0.0338,
      "step": 346440
    },
    {
      "epoch": 0.5669893887917886,
      "grad_norm": 1.0337156057357788,
      "learning_rate": 8.924441398759909e-06,
      "loss": 0.0422,
      "step": 346460
    },
    {
      "epoch": 0.5670221192304419,
      "grad_norm": 0.6724374294281006,
      "learning_rate": 8.924375506546393e-06,
      "loss": 0.0367,
      "step": 346480
    },
    {
      "epoch": 0.5670548496690953,
      "grad_norm": 2.5925514698028564,
      "learning_rate": 8.924309614332874e-06,
      "loss": 0.0375,
      "step": 346500
    },
    {
      "epoch": 0.5670875801077486,
      "grad_norm": 2.864190101623535,
      "learning_rate": 8.924243722119358e-06,
      "loss": 0.0376,
      "step": 346520
    },
    {
      "epoch": 0.567120310546402,
      "grad_norm": 1.0518161058425903,
      "learning_rate": 8.924177829905842e-06,
      "loss": 0.0263,
      "step": 346540
    },
    {
      "epoch": 0.5671530409850553,
      "grad_norm": 1.1212303638458252,
      "learning_rate": 8.924111937692323e-06,
      "loss": 0.0323,
      "step": 346560
    },
    {
      "epoch": 0.5671857714237086,
      "grad_norm": 0.5705785751342773,
      "learning_rate": 8.924046045478807e-06,
      "loss": 0.0399,
      "step": 346580
    },
    {
      "epoch": 0.567218501862362,
      "grad_norm": 0.22410623729228973,
      "learning_rate": 8.923980153265289e-06,
      "loss": 0.0409,
      "step": 346600
    },
    {
      "epoch": 0.5672512323010153,
      "grad_norm": 0.7639012336730957,
      "learning_rate": 8.923914261051773e-06,
      "loss": 0.0351,
      "step": 346620
    },
    {
      "epoch": 0.5672839627396686,
      "grad_norm": 20.147863388061523,
      "learning_rate": 8.923848368838254e-06,
      "loss": 0.0351,
      "step": 346640
    },
    {
      "epoch": 0.567316693178322,
      "grad_norm": 1.9004005193710327,
      "learning_rate": 8.923782476624738e-06,
      "loss": 0.0325,
      "step": 346660
    },
    {
      "epoch": 0.5673494236169753,
      "grad_norm": 0.3900829255580902,
      "learning_rate": 8.92371658441122e-06,
      "loss": 0.0345,
      "step": 346680
    },
    {
      "epoch": 0.5673821540556286,
      "grad_norm": 2.4094314575195312,
      "learning_rate": 8.923650692197703e-06,
      "loss": 0.0343,
      "step": 346700
    },
    {
      "epoch": 0.567414884494282,
      "grad_norm": 1.0391813516616821,
      "learning_rate": 8.923584799984187e-06,
      "loss": 0.031,
      "step": 346720
    },
    {
      "epoch": 0.5674476149329354,
      "grad_norm": 0.9256232976913452,
      "learning_rate": 8.923518907770669e-06,
      "loss": 0.0305,
      "step": 346740
    },
    {
      "epoch": 0.5674803453715886,
      "grad_norm": 12.838180541992188,
      "learning_rate": 8.923453015557153e-06,
      "loss": 0.0372,
      "step": 346760
    },
    {
      "epoch": 0.567513075810242,
      "grad_norm": 0.34252431988716125,
      "learning_rate": 8.923387123343636e-06,
      "loss": 0.0366,
      "step": 346780
    },
    {
      "epoch": 0.5675458062488954,
      "grad_norm": 1.7843784093856812,
      "learning_rate": 8.923321231130118e-06,
      "loss": 0.0368,
      "step": 346800
    },
    {
      "epoch": 0.5675785366875487,
      "grad_norm": 2.3228447437286377,
      "learning_rate": 8.923255338916602e-06,
      "loss": 0.0362,
      "step": 346820
    },
    {
      "epoch": 0.567611267126202,
      "grad_norm": 1.5973680019378662,
      "learning_rate": 8.923189446703084e-06,
      "loss": 0.039,
      "step": 346840
    },
    {
      "epoch": 0.5676439975648554,
      "grad_norm": 6.467409133911133,
      "learning_rate": 8.923123554489567e-06,
      "loss": 0.0412,
      "step": 346860
    },
    {
      "epoch": 0.5676767280035087,
      "grad_norm": 0.45171239972114563,
      "learning_rate": 8.92305766227605e-06,
      "loss": 0.0468,
      "step": 346880
    },
    {
      "epoch": 0.567709458442162,
      "grad_norm": 0.9793182015419006,
      "learning_rate": 8.922991770062533e-06,
      "loss": 0.0406,
      "step": 346900
    },
    {
      "epoch": 0.5677421888808154,
      "grad_norm": 1.1811985969543457,
      "learning_rate": 8.922925877849016e-06,
      "loss": 0.0443,
      "step": 346920
    },
    {
      "epoch": 0.5677749193194688,
      "grad_norm": 1.0574795007705688,
      "learning_rate": 8.922859985635498e-06,
      "loss": 0.0264,
      "step": 346940
    },
    {
      "epoch": 0.567807649758122,
      "grad_norm": 0.8606570959091187,
      "learning_rate": 8.922794093421982e-06,
      "loss": 0.0313,
      "step": 346960
    },
    {
      "epoch": 0.5678403801967754,
      "grad_norm": 1.354095220565796,
      "learning_rate": 8.922728201208464e-06,
      "loss": 0.0568,
      "step": 346980
    },
    {
      "epoch": 0.5678731106354288,
      "grad_norm": 0.4744204878807068,
      "learning_rate": 8.922662308994947e-06,
      "loss": 0.0411,
      "step": 347000
    },
    {
      "epoch": 0.567905841074082,
      "grad_norm": 0.25858262181282043,
      "learning_rate": 8.922596416781429e-06,
      "loss": 0.0355,
      "step": 347020
    },
    {
      "epoch": 0.5679385715127354,
      "grad_norm": 2.669480085372925,
      "learning_rate": 8.922530524567913e-06,
      "loss": 0.0455,
      "step": 347040
    },
    {
      "epoch": 0.5679713019513888,
      "grad_norm": 2.220418691635132,
      "learning_rate": 8.922464632354395e-06,
      "loss": 0.0337,
      "step": 347060
    },
    {
      "epoch": 0.568004032390042,
      "grad_norm": 1.6134297847747803,
      "learning_rate": 8.922398740140878e-06,
      "loss": 0.0411,
      "step": 347080
    },
    {
      "epoch": 0.5680367628286954,
      "grad_norm": 0.4140097498893738,
      "learning_rate": 8.92233284792736e-06,
      "loss": 0.0369,
      "step": 347100
    },
    {
      "epoch": 0.5680694932673488,
      "grad_norm": 1.0813606977462769,
      "learning_rate": 8.922266955713844e-06,
      "loss": 0.0412,
      "step": 347120
    },
    {
      "epoch": 0.5681022237060022,
      "grad_norm": 7.453429698944092,
      "learning_rate": 8.922201063500327e-06,
      "loss": 0.0383,
      "step": 347140
    },
    {
      "epoch": 0.5681349541446554,
      "grad_norm": 0.8785494565963745,
      "learning_rate": 8.922135171286809e-06,
      "loss": 0.0473,
      "step": 347160
    },
    {
      "epoch": 0.5681676845833088,
      "grad_norm": 0.8029040098190308,
      "learning_rate": 8.922069279073293e-06,
      "loss": 0.0377,
      "step": 347180
    },
    {
      "epoch": 0.5682004150219622,
      "grad_norm": 0.8452149629592896,
      "learning_rate": 8.922003386859776e-06,
      "loss": 0.0382,
      "step": 347200
    },
    {
      "epoch": 0.5682331454606154,
      "grad_norm": 0.6505728960037231,
      "learning_rate": 8.921937494646258e-06,
      "loss": 0.0334,
      "step": 347220
    },
    {
      "epoch": 0.5682658758992688,
      "grad_norm": 0.5252856016159058,
      "learning_rate": 8.921871602432742e-06,
      "loss": 0.0496,
      "step": 347240
    },
    {
      "epoch": 0.5682986063379222,
      "grad_norm": 1.36757493019104,
      "learning_rate": 8.921805710219225e-06,
      "loss": 0.0462,
      "step": 347260
    },
    {
      "epoch": 0.5683313367765754,
      "grad_norm": 1.439937949180603,
      "learning_rate": 8.921739818005707e-06,
      "loss": 0.0333,
      "step": 347280
    },
    {
      "epoch": 0.5683640672152288,
      "grad_norm": 1.5793989896774292,
      "learning_rate": 8.92167392579219e-06,
      "loss": 0.0198,
      "step": 347300
    },
    {
      "epoch": 0.5683967976538822,
      "grad_norm": 2.931389570236206,
      "learning_rate": 8.921608033578673e-06,
      "loss": 0.0391,
      "step": 347320
    },
    {
      "epoch": 0.5684295280925354,
      "grad_norm": 2.387057304382324,
      "learning_rate": 8.921542141365156e-06,
      "loss": 0.0381,
      "step": 347340
    },
    {
      "epoch": 0.5684622585311888,
      "grad_norm": 12.782302856445312,
      "learning_rate": 8.921476249151638e-06,
      "loss": 0.0308,
      "step": 347360
    },
    {
      "epoch": 0.5684949889698422,
      "grad_norm": 2.6078808307647705,
      "learning_rate": 8.921410356938122e-06,
      "loss": 0.0347,
      "step": 347380
    },
    {
      "epoch": 0.5685277194084956,
      "grad_norm": 0.5799100399017334,
      "learning_rate": 8.921344464724604e-06,
      "loss": 0.0329,
      "step": 347400
    },
    {
      "epoch": 0.5685604498471488,
      "grad_norm": 1.0864676237106323,
      "learning_rate": 8.921278572511087e-06,
      "loss": 0.0374,
      "step": 347420
    },
    {
      "epoch": 0.5685931802858022,
      "grad_norm": 1.840583324432373,
      "learning_rate": 8.921212680297569e-06,
      "loss": 0.0269,
      "step": 347440
    },
    {
      "epoch": 0.5686259107244556,
      "grad_norm": 1.853811502456665,
      "learning_rate": 8.921146788084053e-06,
      "loss": 0.0325,
      "step": 347460
    },
    {
      "epoch": 0.5686586411631088,
      "grad_norm": 1.8156452178955078,
      "learning_rate": 8.921080895870535e-06,
      "loss": 0.046,
      "step": 347480
    },
    {
      "epoch": 0.5686913716017622,
      "grad_norm": 0.6616948246955872,
      "learning_rate": 8.921015003657018e-06,
      "loss": 0.048,
      "step": 347500
    },
    {
      "epoch": 0.5687241020404156,
      "grad_norm": 1.4254028797149658,
      "learning_rate": 8.920949111443502e-06,
      "loss": 0.0387,
      "step": 347520
    },
    {
      "epoch": 0.5687568324790688,
      "grad_norm": 1.3817747831344604,
      "learning_rate": 8.920883219229984e-06,
      "loss": 0.0453,
      "step": 347540
    },
    {
      "epoch": 0.5687895629177222,
      "grad_norm": 0.7874102592468262,
      "learning_rate": 8.920817327016467e-06,
      "loss": 0.0499,
      "step": 347560
    },
    {
      "epoch": 0.5688222933563756,
      "grad_norm": 3.210958242416382,
      "learning_rate": 8.92075143480295e-06,
      "loss": 0.0427,
      "step": 347580
    },
    {
      "epoch": 0.568855023795029,
      "grad_norm": 4.296656608581543,
      "learning_rate": 8.920685542589433e-06,
      "loss": 0.0369,
      "step": 347600
    },
    {
      "epoch": 0.5688877542336822,
      "grad_norm": 2.5163607597351074,
      "learning_rate": 8.920619650375916e-06,
      "loss": 0.0387,
      "step": 347620
    },
    {
      "epoch": 0.5689204846723356,
      "grad_norm": 2.7722949981689453,
      "learning_rate": 8.9205537581624e-06,
      "loss": 0.0396,
      "step": 347640
    },
    {
      "epoch": 0.568953215110989,
      "grad_norm": 0.41603320837020874,
      "learning_rate": 8.920487865948882e-06,
      "loss": 0.0296,
      "step": 347660
    },
    {
      "epoch": 0.5689859455496422,
      "grad_norm": 1.2226704359054565,
      "learning_rate": 8.920421973735365e-06,
      "loss": 0.0396,
      "step": 347680
    },
    {
      "epoch": 0.5690186759882956,
      "grad_norm": 1.0866986513137817,
      "learning_rate": 8.920356081521847e-06,
      "loss": 0.0313,
      "step": 347700
    },
    {
      "epoch": 0.569051406426949,
      "grad_norm": 2.3797662258148193,
      "learning_rate": 8.92029018930833e-06,
      "loss": 0.0342,
      "step": 347720
    },
    {
      "epoch": 0.5690841368656022,
      "grad_norm": 1.7717748880386353,
      "learning_rate": 8.920224297094813e-06,
      "loss": 0.0347,
      "step": 347740
    },
    {
      "epoch": 0.5691168673042556,
      "grad_norm": 1.1575266122817993,
      "learning_rate": 8.920158404881296e-06,
      "loss": 0.0337,
      "step": 347760
    },
    {
      "epoch": 0.569149597742909,
      "grad_norm": 1.6860642433166504,
      "learning_rate": 8.920092512667778e-06,
      "loss": 0.0417,
      "step": 347780
    },
    {
      "epoch": 0.5691823281815623,
      "grad_norm": 2.0513508319854736,
      "learning_rate": 8.920026620454262e-06,
      "loss": 0.0526,
      "step": 347800
    },
    {
      "epoch": 0.5692150586202156,
      "grad_norm": 1.8164609670639038,
      "learning_rate": 8.919960728240744e-06,
      "loss": 0.0333,
      "step": 347820
    },
    {
      "epoch": 0.569247789058869,
      "grad_norm": 2.397071599960327,
      "learning_rate": 8.919894836027227e-06,
      "loss": 0.0463,
      "step": 347840
    },
    {
      "epoch": 0.5692805194975223,
      "grad_norm": 1.2243260145187378,
      "learning_rate": 8.91982894381371e-06,
      "loss": 0.0289,
      "step": 347860
    },
    {
      "epoch": 0.5693132499361756,
      "grad_norm": 1.5007851123809814,
      "learning_rate": 8.919763051600193e-06,
      "loss": 0.0373,
      "step": 347880
    },
    {
      "epoch": 0.569345980374829,
      "grad_norm": 1.7420333623886108,
      "learning_rate": 8.919697159386675e-06,
      "loss": 0.031,
      "step": 347900
    },
    {
      "epoch": 0.5693787108134823,
      "grad_norm": 0.41188687086105347,
      "learning_rate": 8.919631267173158e-06,
      "loss": 0.0366,
      "step": 347920
    },
    {
      "epoch": 0.5694114412521356,
      "grad_norm": 0.9508479833602905,
      "learning_rate": 8.919565374959642e-06,
      "loss": 0.0343,
      "step": 347940
    },
    {
      "epoch": 0.569444171690789,
      "grad_norm": 0.5326718091964722,
      "learning_rate": 8.919499482746124e-06,
      "loss": 0.042,
      "step": 347960
    },
    {
      "epoch": 0.5694769021294424,
      "grad_norm": 6.1265645027160645,
      "learning_rate": 8.919433590532607e-06,
      "loss": 0.0394,
      "step": 347980
    },
    {
      "epoch": 0.5695096325680957,
      "grad_norm": 1.5168640613555908,
      "learning_rate": 8.919367698319091e-06,
      "loss": 0.0246,
      "step": 348000
    },
    {
      "epoch": 0.569542363006749,
      "grad_norm": 0.527619481086731,
      "learning_rate": 8.919301806105573e-06,
      "loss": 0.0486,
      "step": 348020
    },
    {
      "epoch": 0.5695750934454024,
      "grad_norm": 2.217935562133789,
      "learning_rate": 8.919235913892056e-06,
      "loss": 0.0415,
      "step": 348040
    },
    {
      "epoch": 0.5696078238840557,
      "grad_norm": 1.7563718557357788,
      "learning_rate": 8.91917002167854e-06,
      "loss": 0.0385,
      "step": 348060
    },
    {
      "epoch": 0.569640554322709,
      "grad_norm": 1.4601175785064697,
      "learning_rate": 8.919104129465022e-06,
      "loss": 0.0395,
      "step": 348080
    },
    {
      "epoch": 0.5696732847613624,
      "grad_norm": 1.2761473655700684,
      "learning_rate": 8.919038237251505e-06,
      "loss": 0.0317,
      "step": 348100
    },
    {
      "epoch": 0.5697060152000157,
      "grad_norm": 1.0920947790145874,
      "learning_rate": 8.918972345037987e-06,
      "loss": 0.0263,
      "step": 348120
    },
    {
      "epoch": 0.569738745638669,
      "grad_norm": 1.1444705724716187,
      "learning_rate": 8.918906452824471e-06,
      "loss": 0.03,
      "step": 348140
    },
    {
      "epoch": 0.5697714760773224,
      "grad_norm": 1.1197137832641602,
      "learning_rate": 8.918840560610953e-06,
      "loss": 0.0416,
      "step": 348160
    },
    {
      "epoch": 0.5698042065159757,
      "grad_norm": 0.9118466973304749,
      "learning_rate": 8.918774668397436e-06,
      "loss": 0.0392,
      "step": 348180
    },
    {
      "epoch": 0.5698369369546291,
      "grad_norm": 0.9809705018997192,
      "learning_rate": 8.918708776183918e-06,
      "loss": 0.0396,
      "step": 348200
    },
    {
      "epoch": 0.5698696673932824,
      "grad_norm": 1.4364912509918213,
      "learning_rate": 8.918642883970402e-06,
      "loss": 0.035,
      "step": 348220
    },
    {
      "epoch": 0.5699023978319357,
      "grad_norm": 1.211097002029419,
      "learning_rate": 8.918576991756884e-06,
      "loss": 0.0475,
      "step": 348240
    },
    {
      "epoch": 0.5699351282705891,
      "grad_norm": 2.1890015602111816,
      "learning_rate": 8.918511099543367e-06,
      "loss": 0.0371,
      "step": 348260
    },
    {
      "epoch": 0.5699678587092424,
      "grad_norm": 0.853516697883606,
      "learning_rate": 8.918445207329851e-06,
      "loss": 0.0325,
      "step": 348280
    },
    {
      "epoch": 0.5700005891478958,
      "grad_norm": 0.4766671657562256,
      "learning_rate": 8.918379315116333e-06,
      "loss": 0.0389,
      "step": 348300
    },
    {
      "epoch": 0.5700333195865491,
      "grad_norm": 1.0417982339859009,
      "learning_rate": 8.918313422902816e-06,
      "loss": 0.0331,
      "step": 348320
    },
    {
      "epoch": 0.5700660500252024,
      "grad_norm": 1.0783864259719849,
      "learning_rate": 8.918247530689298e-06,
      "loss": 0.0287,
      "step": 348340
    },
    {
      "epoch": 0.5700987804638558,
      "grad_norm": 0.5680493116378784,
      "learning_rate": 8.918181638475782e-06,
      "loss": 0.0316,
      "step": 348360
    },
    {
      "epoch": 0.5701315109025091,
      "grad_norm": 1.07314932346344,
      "learning_rate": 8.918115746262265e-06,
      "loss": 0.034,
      "step": 348380
    },
    {
      "epoch": 0.5701642413411625,
      "grad_norm": 0.5007374882698059,
      "learning_rate": 8.918049854048747e-06,
      "loss": 0.0397,
      "step": 348400
    },
    {
      "epoch": 0.5701969717798158,
      "grad_norm": 1.4712365865707397,
      "learning_rate": 8.917983961835231e-06,
      "loss": 0.0354,
      "step": 348420
    },
    {
      "epoch": 0.5702297022184691,
      "grad_norm": 1.0837535858154297,
      "learning_rate": 8.917918069621715e-06,
      "loss": 0.034,
      "step": 348440
    },
    {
      "epoch": 0.5702624326571225,
      "grad_norm": 3.356602430343628,
      "learning_rate": 8.917852177408196e-06,
      "loss": 0.0587,
      "step": 348460
    },
    {
      "epoch": 0.5702951630957758,
      "grad_norm": 0.7849405407905579,
      "learning_rate": 8.91778628519468e-06,
      "loss": 0.0352,
      "step": 348480
    },
    {
      "epoch": 0.5703278935344291,
      "grad_norm": 0.7428562045097351,
      "learning_rate": 8.917720392981162e-06,
      "loss": 0.0431,
      "step": 348500
    },
    {
      "epoch": 0.5703606239730825,
      "grad_norm": 0.5936079621315002,
      "learning_rate": 8.917654500767646e-06,
      "loss": 0.0403,
      "step": 348520
    },
    {
      "epoch": 0.5703933544117358,
      "grad_norm": 1.4498014450073242,
      "learning_rate": 8.917588608554127e-06,
      "loss": 0.0445,
      "step": 348540
    },
    {
      "epoch": 0.5704260848503891,
      "grad_norm": 0.6432034969329834,
      "learning_rate": 8.917522716340611e-06,
      "loss": 0.0284,
      "step": 348560
    },
    {
      "epoch": 0.5704588152890425,
      "grad_norm": 2.232001304626465,
      "learning_rate": 8.917456824127093e-06,
      "loss": 0.0379,
      "step": 348580
    },
    {
      "epoch": 0.5704915457276959,
      "grad_norm": 0.6593748331069946,
      "learning_rate": 8.917390931913576e-06,
      "loss": 0.0388,
      "step": 348600
    },
    {
      "epoch": 0.5705242761663492,
      "grad_norm": 1.968356966972351,
      "learning_rate": 8.91732503970006e-06,
      "loss": 0.0305,
      "step": 348620
    },
    {
      "epoch": 0.5705570066050025,
      "grad_norm": 0.26343774795532227,
      "learning_rate": 8.917259147486542e-06,
      "loss": 0.0362,
      "step": 348640
    },
    {
      "epoch": 0.5705897370436559,
      "grad_norm": 0.5778923630714417,
      "learning_rate": 8.917193255273026e-06,
      "loss": 0.0432,
      "step": 348660
    },
    {
      "epoch": 0.5706224674823092,
      "grad_norm": 1.237155795097351,
      "learning_rate": 8.917127363059507e-06,
      "loss": 0.0408,
      "step": 348680
    },
    {
      "epoch": 0.5706551979209625,
      "grad_norm": 1.786973476409912,
      "learning_rate": 8.917061470845991e-06,
      "loss": 0.0303,
      "step": 348700
    },
    {
      "epoch": 0.5706879283596159,
      "grad_norm": 1.486517310142517,
      "learning_rate": 8.916995578632473e-06,
      "loss": 0.0404,
      "step": 348720
    },
    {
      "epoch": 0.5707206587982692,
      "grad_norm": 6.714296817779541,
      "learning_rate": 8.916929686418956e-06,
      "loss": 0.0516,
      "step": 348740
    },
    {
      "epoch": 0.5707533892369225,
      "grad_norm": 0.5806589722633362,
      "learning_rate": 8.91686379420544e-06,
      "loss": 0.0421,
      "step": 348760
    },
    {
      "epoch": 0.5707861196755759,
      "grad_norm": 1.2415310144424438,
      "learning_rate": 8.916797901991922e-06,
      "loss": 0.0285,
      "step": 348780
    },
    {
      "epoch": 0.5708188501142293,
      "grad_norm": 1.8888167142868042,
      "learning_rate": 8.916732009778406e-06,
      "loss": 0.0295,
      "step": 348800
    },
    {
      "epoch": 0.5708515805528825,
      "grad_norm": 1.4262373447418213,
      "learning_rate": 8.916666117564889e-06,
      "loss": 0.0382,
      "step": 348820
    },
    {
      "epoch": 0.5708843109915359,
      "grad_norm": 0.3929004967212677,
      "learning_rate": 8.916600225351371e-06,
      "loss": 0.028,
      "step": 348840
    },
    {
      "epoch": 0.5709170414301893,
      "grad_norm": 1.2291337251663208,
      "learning_rate": 8.916534333137855e-06,
      "loss": 0.0326,
      "step": 348860
    },
    {
      "epoch": 0.5709497718688425,
      "grad_norm": 5.943287372589111,
      "learning_rate": 8.916468440924337e-06,
      "loss": 0.0399,
      "step": 348880
    },
    {
      "epoch": 0.5709825023074959,
      "grad_norm": 1.0475916862487793,
      "learning_rate": 8.91640254871082e-06,
      "loss": 0.0425,
      "step": 348900
    },
    {
      "epoch": 0.5710152327461493,
      "grad_norm": 0.8617593050003052,
      "learning_rate": 8.916336656497302e-06,
      "loss": 0.0388,
      "step": 348920
    },
    {
      "epoch": 0.5710479631848026,
      "grad_norm": 0.7293753027915955,
      "learning_rate": 8.916270764283786e-06,
      "loss": 0.0374,
      "step": 348940
    },
    {
      "epoch": 0.5710806936234559,
      "grad_norm": 1.7412760257720947,
      "learning_rate": 8.916204872070267e-06,
      "loss": 0.0345,
      "step": 348960
    },
    {
      "epoch": 0.5711134240621093,
      "grad_norm": 1.1178691387176514,
      "learning_rate": 8.916138979856751e-06,
      "loss": 0.0338,
      "step": 348980
    },
    {
      "epoch": 0.5711461545007627,
      "grad_norm": 0.3690154552459717,
      "learning_rate": 8.916073087643235e-06,
      "loss": 0.0359,
      "step": 349000
    },
    {
      "epoch": 0.5711788849394159,
      "grad_norm": 1.421966791152954,
      "learning_rate": 8.916007195429717e-06,
      "loss": 0.0492,
      "step": 349020
    },
    {
      "epoch": 0.5712116153780693,
      "grad_norm": 0.8875179886817932,
      "learning_rate": 8.9159413032162e-06,
      "loss": 0.0439,
      "step": 349040
    },
    {
      "epoch": 0.5712443458167227,
      "grad_norm": 1.0748075246810913,
      "learning_rate": 8.915875411002682e-06,
      "loss": 0.029,
      "step": 349060
    },
    {
      "epoch": 0.5712770762553759,
      "grad_norm": 2.726590156555176,
      "learning_rate": 8.915809518789166e-06,
      "loss": 0.0437,
      "step": 349080
    },
    {
      "epoch": 0.5713098066940293,
      "grad_norm": 3.16615891456604,
      "learning_rate": 8.915743626575648e-06,
      "loss": 0.0354,
      "step": 349100
    },
    {
      "epoch": 0.5713425371326827,
      "grad_norm": 1.3970661163330078,
      "learning_rate": 8.915677734362131e-06,
      "loss": 0.036,
      "step": 349120
    },
    {
      "epoch": 0.5713752675713359,
      "grad_norm": 0.9507176280021667,
      "learning_rate": 8.915611842148613e-06,
      "loss": 0.0348,
      "step": 349140
    },
    {
      "epoch": 0.5714079980099893,
      "grad_norm": 1.0708825588226318,
      "learning_rate": 8.915545949935097e-06,
      "loss": 0.0314,
      "step": 349160
    },
    {
      "epoch": 0.5714407284486427,
      "grad_norm": 1.0380407571792603,
      "learning_rate": 8.91548005772158e-06,
      "loss": 0.0375,
      "step": 349180
    },
    {
      "epoch": 0.5714734588872961,
      "grad_norm": 1.0978270769119263,
      "learning_rate": 8.915414165508062e-06,
      "loss": 0.0407,
      "step": 349200
    },
    {
      "epoch": 0.5715061893259493,
      "grad_norm": 0.6008336544036865,
      "learning_rate": 8.915348273294546e-06,
      "loss": 0.033,
      "step": 349220
    },
    {
      "epoch": 0.5715389197646027,
      "grad_norm": 0.4413905143737793,
      "learning_rate": 8.91528238108103e-06,
      "loss": 0.0305,
      "step": 349240
    },
    {
      "epoch": 0.5715716502032561,
      "grad_norm": 0.8178796172142029,
      "learning_rate": 8.915216488867511e-06,
      "loss": 0.0327,
      "step": 349260
    },
    {
      "epoch": 0.5716043806419093,
      "grad_norm": 2.3531057834625244,
      "learning_rate": 8.915150596653995e-06,
      "loss": 0.0369,
      "step": 349280
    },
    {
      "epoch": 0.5716371110805627,
      "grad_norm": 1.3605663776397705,
      "learning_rate": 8.915084704440477e-06,
      "loss": 0.041,
      "step": 349300
    },
    {
      "epoch": 0.5716698415192161,
      "grad_norm": 1.1162419319152832,
      "learning_rate": 8.91501881222696e-06,
      "loss": 0.0367,
      "step": 349320
    },
    {
      "epoch": 0.5717025719578693,
      "grad_norm": 0.7567890286445618,
      "learning_rate": 8.914952920013444e-06,
      "loss": 0.0472,
      "step": 349340
    },
    {
      "epoch": 0.5717353023965227,
      "grad_norm": 1.180422306060791,
      "learning_rate": 8.914887027799926e-06,
      "loss": 0.0437,
      "step": 349360
    },
    {
      "epoch": 0.5717680328351761,
      "grad_norm": 3.2787234783172607,
      "learning_rate": 8.91482113558641e-06,
      "loss": 0.0543,
      "step": 349380
    },
    {
      "epoch": 0.5718007632738294,
      "grad_norm": 1.3833369016647339,
      "learning_rate": 8.914755243372891e-06,
      "loss": 0.0317,
      "step": 349400
    },
    {
      "epoch": 0.5718334937124827,
      "grad_norm": 2.6051583290100098,
      "learning_rate": 8.914689351159375e-06,
      "loss": 0.0418,
      "step": 349420
    },
    {
      "epoch": 0.5718662241511361,
      "grad_norm": 1.274572491645813,
      "learning_rate": 8.914623458945857e-06,
      "loss": 0.0285,
      "step": 349440
    },
    {
      "epoch": 0.5718989545897895,
      "grad_norm": 0.7626854181289673,
      "learning_rate": 8.91455756673234e-06,
      "loss": 0.0346,
      "step": 349460
    },
    {
      "epoch": 0.5719316850284427,
      "grad_norm": 0.7750642895698547,
      "learning_rate": 8.914491674518822e-06,
      "loss": 0.036,
      "step": 349480
    },
    {
      "epoch": 0.5719644154670961,
      "grad_norm": 0.4307252764701843,
      "learning_rate": 8.914425782305306e-06,
      "loss": 0.0371,
      "step": 349500
    },
    {
      "epoch": 0.5719971459057495,
      "grad_norm": 0.71602863073349,
      "learning_rate": 8.914359890091788e-06,
      "loss": 0.041,
      "step": 349520
    },
    {
      "epoch": 0.5720298763444027,
      "grad_norm": 0.642398476600647,
      "learning_rate": 8.914293997878271e-06,
      "loss": 0.0435,
      "step": 349540
    },
    {
      "epoch": 0.5720626067830561,
      "grad_norm": 0.9036122560501099,
      "learning_rate": 8.914228105664755e-06,
      "loss": 0.0368,
      "step": 349560
    },
    {
      "epoch": 0.5720953372217095,
      "grad_norm": 0.6925013661384583,
      "learning_rate": 8.914162213451237e-06,
      "loss": 0.0318,
      "step": 349580
    },
    {
      "epoch": 0.5721280676603628,
      "grad_norm": 2.5821237564086914,
      "learning_rate": 8.91409632123772e-06,
      "loss": 0.0418,
      "step": 349600
    },
    {
      "epoch": 0.5721607980990161,
      "grad_norm": 0.8067620396614075,
      "learning_rate": 8.914030429024204e-06,
      "loss": 0.0449,
      "step": 349620
    },
    {
      "epoch": 0.5721935285376695,
      "grad_norm": 2.9203104972839355,
      "learning_rate": 8.913964536810686e-06,
      "loss": 0.0302,
      "step": 349640
    },
    {
      "epoch": 0.5722262589763228,
      "grad_norm": 2.2435669898986816,
      "learning_rate": 8.91389864459717e-06,
      "loss": 0.0301,
      "step": 349660
    },
    {
      "epoch": 0.5722589894149761,
      "grad_norm": 1.6930103302001953,
      "learning_rate": 8.913832752383653e-06,
      "loss": 0.0546,
      "step": 349680
    },
    {
      "epoch": 0.5722917198536295,
      "grad_norm": 0.45351535081863403,
      "learning_rate": 8.913766860170135e-06,
      "loss": 0.0295,
      "step": 349700
    },
    {
      "epoch": 0.5723244502922828,
      "grad_norm": 1.4485700130462646,
      "learning_rate": 8.913700967956618e-06,
      "loss": 0.0468,
      "step": 349720
    },
    {
      "epoch": 0.5723571807309361,
      "grad_norm": 0.24131961166858673,
      "learning_rate": 8.9136350757431e-06,
      "loss": 0.0369,
      "step": 349740
    },
    {
      "epoch": 0.5723899111695895,
      "grad_norm": 1.7951043844223022,
      "learning_rate": 8.913569183529584e-06,
      "loss": 0.0434,
      "step": 349760
    },
    {
      "epoch": 0.5724226416082429,
      "grad_norm": 6.599536895751953,
      "learning_rate": 8.913503291316066e-06,
      "loss": 0.0371,
      "step": 349780
    },
    {
      "epoch": 0.5724553720468962,
      "grad_norm": 1.1500362157821655,
      "learning_rate": 8.91343739910255e-06,
      "loss": 0.0405,
      "step": 349800
    },
    {
      "epoch": 0.5724881024855495,
      "grad_norm": 1.8555200099945068,
      "learning_rate": 8.913371506889031e-06,
      "loss": 0.0346,
      "step": 349820
    },
    {
      "epoch": 0.5725208329242029,
      "grad_norm": 0.37986335158348083,
      "learning_rate": 8.913305614675515e-06,
      "loss": 0.041,
      "step": 349840
    },
    {
      "epoch": 0.5725535633628562,
      "grad_norm": 0.5496407151222229,
      "learning_rate": 8.913239722461997e-06,
      "loss": 0.03,
      "step": 349860
    },
    {
      "epoch": 0.5725862938015095,
      "grad_norm": 0.6985371708869934,
      "learning_rate": 8.91317383024848e-06,
      "loss": 0.0303,
      "step": 349880
    },
    {
      "epoch": 0.5726190242401629,
      "grad_norm": 9.30382251739502,
      "learning_rate": 8.913107938034962e-06,
      "loss": 0.035,
      "step": 349900
    },
    {
      "epoch": 0.5726517546788162,
      "grad_norm": 1.1076065301895142,
      "learning_rate": 8.913042045821446e-06,
      "loss": 0.0391,
      "step": 349920
    },
    {
      "epoch": 0.5726844851174695,
      "grad_norm": 0.35323938727378845,
      "learning_rate": 8.912976153607928e-06,
      "loss": 0.0368,
      "step": 349940
    },
    {
      "epoch": 0.5727172155561229,
      "grad_norm": 3.0501818656921387,
      "learning_rate": 8.912910261394411e-06,
      "loss": 0.0407,
      "step": 349960
    },
    {
      "epoch": 0.5727499459947762,
      "grad_norm": 1.6661406755447388,
      "learning_rate": 8.912844369180895e-06,
      "loss": 0.0391,
      "step": 349980
    },
    {
      "epoch": 0.5727826764334296,
      "grad_norm": 0.6751202344894409,
      "learning_rate": 8.912778476967377e-06,
      "loss": 0.0278,
      "step": 350000
    },
    {
      "epoch": 0.5727826764334296,
      "eval_loss": 0.018645556643605232,
      "eval_runtime": 6506.1571,
      "eval_samples_per_second": 157.982,
      "eval_steps_per_second": 15.798,
      "eval_sts-dev_pearson_cosine": 0.9553039495733493,
      "eval_sts-dev_spearman_cosine": 0.8776681772255568,
      "step": 350000
    },
    {
      "epoch": 0.5728154068720829,
      "grad_norm": 2.2135164737701416,
      "learning_rate": 8.91271258475386e-06,
      "loss": 0.0361,
      "step": 350020
    },
    {
      "epoch": 0.5728481373107362,
      "grad_norm": 0.7044419050216675,
      "learning_rate": 8.912646692540344e-06,
      "loss": 0.0453,
      "step": 350040
    },
    {
      "epoch": 0.5728808677493896,
      "grad_norm": 1.2346982955932617,
      "learning_rate": 8.912580800326826e-06,
      "loss": 0.0539,
      "step": 350060
    },
    {
      "epoch": 0.5729135981880429,
      "grad_norm": 1.1022741794586182,
      "learning_rate": 8.91251490811331e-06,
      "loss": 0.0316,
      "step": 350080
    },
    {
      "epoch": 0.5729463286266963,
      "grad_norm": 2.2178382873535156,
      "learning_rate": 8.912449015899793e-06,
      "loss": 0.0351,
      "step": 350100
    },
    {
      "epoch": 0.5729790590653496,
      "grad_norm": 0.6802586317062378,
      "learning_rate": 8.912383123686275e-06,
      "loss": 0.0481,
      "step": 350120
    },
    {
      "epoch": 0.5730117895040029,
      "grad_norm": 1.103101372718811,
      "learning_rate": 8.912317231472758e-06,
      "loss": 0.0389,
      "step": 350140
    },
    {
      "epoch": 0.5730445199426563,
      "grad_norm": 1.1846197843551636,
      "learning_rate": 8.91225133925924e-06,
      "loss": 0.0459,
      "step": 350160
    },
    {
      "epoch": 0.5730772503813096,
      "grad_norm": 1.1501588821411133,
      "learning_rate": 8.912185447045724e-06,
      "loss": 0.0304,
      "step": 350180
    },
    {
      "epoch": 0.5731099808199629,
      "grad_norm": 1.0414575338363647,
      "learning_rate": 8.912119554832206e-06,
      "loss": 0.0403,
      "step": 350200
    },
    {
      "epoch": 0.5731427112586163,
      "grad_norm": 1.5326671600341797,
      "learning_rate": 8.91205366261869e-06,
      "loss": 0.0297,
      "step": 350220
    },
    {
      "epoch": 0.5731754416972696,
      "grad_norm": 2.0850830078125,
      "learning_rate": 8.911987770405171e-06,
      "loss": 0.0336,
      "step": 350240
    },
    {
      "epoch": 0.573208172135923,
      "grad_norm": 1.3081328868865967,
      "learning_rate": 8.911921878191655e-06,
      "loss": 0.0407,
      "step": 350260
    },
    {
      "epoch": 0.5732409025745763,
      "grad_norm": 2.016575336456299,
      "learning_rate": 8.911855985978137e-06,
      "loss": 0.0356,
      "step": 350280
    },
    {
      "epoch": 0.5732736330132296,
      "grad_norm": 1.6221396923065186,
      "learning_rate": 8.91179009376462e-06,
      "loss": 0.0316,
      "step": 350300
    },
    {
      "epoch": 0.573306363451883,
      "grad_norm": 3.704875946044922,
      "learning_rate": 8.911724201551102e-06,
      "loss": 0.0368,
      "step": 350320
    },
    {
      "epoch": 0.5733390938905363,
      "grad_norm": 1.137015461921692,
      "learning_rate": 8.911658309337586e-06,
      "loss": 0.0464,
      "step": 350340
    },
    {
      "epoch": 0.5733718243291896,
      "grad_norm": 1.8937489986419678,
      "learning_rate": 8.91159241712407e-06,
      "loss": 0.0358,
      "step": 350360
    },
    {
      "epoch": 0.573404554767843,
      "grad_norm": 0.45832598209381104,
      "learning_rate": 8.911526524910551e-06,
      "loss": 0.0276,
      "step": 350380
    },
    {
      "epoch": 0.5734372852064963,
      "grad_norm": 6.361567497253418,
      "learning_rate": 8.911460632697035e-06,
      "loss": 0.0389,
      "step": 350400
    },
    {
      "epoch": 0.5734700156451497,
      "grad_norm": 0.324960321187973,
      "learning_rate": 8.911394740483518e-06,
      "loss": 0.0418,
      "step": 350420
    },
    {
      "epoch": 0.573502746083803,
      "grad_norm": 0.5080987215042114,
      "learning_rate": 8.91132884827e-06,
      "loss": 0.0194,
      "step": 350440
    },
    {
      "epoch": 0.5735354765224564,
      "grad_norm": 1.4827247858047485,
      "learning_rate": 8.911262956056484e-06,
      "loss": 0.0371,
      "step": 350460
    },
    {
      "epoch": 0.5735682069611097,
      "grad_norm": 3.3424437046051025,
      "learning_rate": 8.911197063842968e-06,
      "loss": 0.0416,
      "step": 350480
    },
    {
      "epoch": 0.573600937399763,
      "grad_norm": 3.4368057250976562,
      "learning_rate": 8.91113117162945e-06,
      "loss": 0.034,
      "step": 350500
    },
    {
      "epoch": 0.5736336678384164,
      "grad_norm": 2.857341766357422,
      "learning_rate": 8.911065279415933e-06,
      "loss": 0.0285,
      "step": 350520
    },
    {
      "epoch": 0.5736663982770697,
      "grad_norm": 0.357749879360199,
      "learning_rate": 8.910999387202415e-06,
      "loss": 0.0406,
      "step": 350540
    },
    {
      "epoch": 0.573699128715723,
      "grad_norm": 9.192910194396973,
      "learning_rate": 8.910933494988899e-06,
      "loss": 0.0438,
      "step": 350560
    },
    {
      "epoch": 0.5737318591543764,
      "grad_norm": 1.3861634731292725,
      "learning_rate": 8.91086760277538e-06,
      "loss": 0.0367,
      "step": 350580
    },
    {
      "epoch": 0.5737645895930297,
      "grad_norm": 0.38521161675453186,
      "learning_rate": 8.910801710561864e-06,
      "loss": 0.0342,
      "step": 350600
    },
    {
      "epoch": 0.573797320031683,
      "grad_norm": 0.5603404641151428,
      "learning_rate": 8.910735818348346e-06,
      "loss": 0.0405,
      "step": 350620
    },
    {
      "epoch": 0.5738300504703364,
      "grad_norm": 0.5600486993789673,
      "learning_rate": 8.91066992613483e-06,
      "loss": 0.0341,
      "step": 350640
    },
    {
      "epoch": 0.5738627809089898,
      "grad_norm": 6.010827541351318,
      "learning_rate": 8.910604033921311e-06,
      "loss": 0.0447,
      "step": 350660
    },
    {
      "epoch": 0.573895511347643,
      "grad_norm": 0.7134346961975098,
      "learning_rate": 8.910538141707795e-06,
      "loss": 0.0326,
      "step": 350680
    },
    {
      "epoch": 0.5739282417862964,
      "grad_norm": 1.418845534324646,
      "learning_rate": 8.910472249494277e-06,
      "loss": 0.036,
      "step": 350700
    },
    {
      "epoch": 0.5739609722249498,
      "grad_norm": 0.9578920602798462,
      "learning_rate": 8.91040635728076e-06,
      "loss": 0.0354,
      "step": 350720
    },
    {
      "epoch": 0.5739937026636031,
      "grad_norm": 0.5026111006736755,
      "learning_rate": 8.910340465067244e-06,
      "loss": 0.034,
      "step": 350740
    },
    {
      "epoch": 0.5740264331022564,
      "grad_norm": 3.4714369773864746,
      "learning_rate": 8.910274572853726e-06,
      "loss": 0.0292,
      "step": 350760
    },
    {
      "epoch": 0.5740591635409098,
      "grad_norm": 0.29771021008491516,
      "learning_rate": 8.91020868064021e-06,
      "loss": 0.0381,
      "step": 350780
    },
    {
      "epoch": 0.5740918939795631,
      "grad_norm": 0.5752297043800354,
      "learning_rate": 8.910142788426691e-06,
      "loss": 0.0367,
      "step": 350800
    },
    {
      "epoch": 0.5741246244182164,
      "grad_norm": 1.066219687461853,
      "learning_rate": 8.910076896213175e-06,
      "loss": 0.0374,
      "step": 350820
    },
    {
      "epoch": 0.5741573548568698,
      "grad_norm": 0.7771419286727905,
      "learning_rate": 8.910011003999659e-06,
      "loss": 0.0357,
      "step": 350840
    },
    {
      "epoch": 0.5741900852955232,
      "grad_norm": 1.1005058288574219,
      "learning_rate": 8.90994511178614e-06,
      "loss": 0.0375,
      "step": 350860
    },
    {
      "epoch": 0.5742228157341764,
      "grad_norm": 3.294006586074829,
      "learning_rate": 8.909879219572624e-06,
      "loss": 0.0306,
      "step": 350880
    },
    {
      "epoch": 0.5742555461728298,
      "grad_norm": 1.640993595123291,
      "learning_rate": 8.909813327359108e-06,
      "loss": 0.0326,
      "step": 350900
    },
    {
      "epoch": 0.5742882766114832,
      "grad_norm": 1.5217002630233765,
      "learning_rate": 8.90974743514559e-06,
      "loss": 0.0368,
      "step": 350920
    },
    {
      "epoch": 0.5743210070501364,
      "grad_norm": 1.2687504291534424,
      "learning_rate": 8.909681542932073e-06,
      "loss": 0.0442,
      "step": 350940
    },
    {
      "epoch": 0.5743537374887898,
      "grad_norm": 0.36524099111557007,
      "learning_rate": 8.909615650718555e-06,
      "loss": 0.0286,
      "step": 350960
    },
    {
      "epoch": 0.5743864679274432,
      "grad_norm": 1.1029784679412842,
      "learning_rate": 8.909549758505039e-06,
      "loss": 0.0414,
      "step": 350980
    },
    {
      "epoch": 0.5744191983660965,
      "grad_norm": 1.9800351858139038,
      "learning_rate": 8.90948386629152e-06,
      "loss": 0.0406,
      "step": 351000
    },
    {
      "epoch": 0.5744519288047498,
      "grad_norm": 2.292449712753296,
      "learning_rate": 8.909417974078004e-06,
      "loss": 0.0331,
      "step": 351020
    },
    {
      "epoch": 0.5744846592434032,
      "grad_norm": 1.7981551885604858,
      "learning_rate": 8.909352081864486e-06,
      "loss": 0.0457,
      "step": 351040
    },
    {
      "epoch": 0.5745173896820566,
      "grad_norm": 1.0213557481765747,
      "learning_rate": 8.90928618965097e-06,
      "loss": 0.0378,
      "step": 351060
    },
    {
      "epoch": 0.5745501201207098,
      "grad_norm": 2.033433437347412,
      "learning_rate": 8.909220297437453e-06,
      "loss": 0.0353,
      "step": 351080
    },
    {
      "epoch": 0.5745828505593632,
      "grad_norm": 5.545825958251953,
      "learning_rate": 8.909154405223935e-06,
      "loss": 0.035,
      "step": 351100
    },
    {
      "epoch": 0.5746155809980166,
      "grad_norm": 0.5323852896690369,
      "learning_rate": 8.909088513010419e-06,
      "loss": 0.0302,
      "step": 351120
    },
    {
      "epoch": 0.5746483114366698,
      "grad_norm": 3.449213743209839,
      "learning_rate": 8.9090226207969e-06,
      "loss": 0.047,
      "step": 351140
    },
    {
      "epoch": 0.5746810418753232,
      "grad_norm": 1.0719581842422485,
      "learning_rate": 8.908956728583384e-06,
      "loss": 0.0391,
      "step": 351160
    },
    {
      "epoch": 0.5747137723139766,
      "grad_norm": 0.4564255177974701,
      "learning_rate": 8.908890836369866e-06,
      "loss": 0.0242,
      "step": 351180
    },
    {
      "epoch": 0.5747465027526298,
      "grad_norm": 1.5486985445022583,
      "learning_rate": 8.90882494415635e-06,
      "loss": 0.0309,
      "step": 351200
    },
    {
      "epoch": 0.5747792331912832,
      "grad_norm": 1.5621789693832397,
      "learning_rate": 8.908759051942833e-06,
      "loss": 0.0458,
      "step": 351220
    },
    {
      "epoch": 0.5748119636299366,
      "grad_norm": 1.8484171628952026,
      "learning_rate": 8.908693159729315e-06,
      "loss": 0.0357,
      "step": 351240
    },
    {
      "epoch": 0.57484469406859,
      "grad_norm": 1.3604763746261597,
      "learning_rate": 8.908627267515799e-06,
      "loss": 0.0421,
      "step": 351260
    },
    {
      "epoch": 0.5748774245072432,
      "grad_norm": 0.7812476754188538,
      "learning_rate": 8.908561375302282e-06,
      "loss": 0.0325,
      "step": 351280
    },
    {
      "epoch": 0.5749101549458966,
      "grad_norm": 1.7163816690444946,
      "learning_rate": 8.908495483088764e-06,
      "loss": 0.0269,
      "step": 351300
    },
    {
      "epoch": 0.57494288538455,
      "grad_norm": 0.7535122632980347,
      "learning_rate": 8.908429590875248e-06,
      "loss": 0.0345,
      "step": 351320
    },
    {
      "epoch": 0.5749756158232032,
      "grad_norm": 0.671783447265625,
      "learning_rate": 8.90836369866173e-06,
      "loss": 0.0287,
      "step": 351340
    },
    {
      "epoch": 0.5750083462618566,
      "grad_norm": 1.4659817218780518,
      "learning_rate": 8.908297806448213e-06,
      "loss": 0.0451,
      "step": 351360
    },
    {
      "epoch": 0.57504107670051,
      "grad_norm": 1.3508566617965698,
      "learning_rate": 8.908231914234695e-06,
      "loss": 0.0541,
      "step": 351380
    },
    {
      "epoch": 0.5750738071391632,
      "grad_norm": 0.32883262634277344,
      "learning_rate": 8.908166022021179e-06,
      "loss": 0.0274,
      "step": 351400
    },
    {
      "epoch": 0.5751065375778166,
      "grad_norm": 2.402219533920288,
      "learning_rate": 8.90810012980766e-06,
      "loss": 0.047,
      "step": 351420
    },
    {
      "epoch": 0.57513926801647,
      "grad_norm": 1.3231446743011475,
      "learning_rate": 8.908034237594144e-06,
      "loss": 0.0377,
      "step": 351440
    },
    {
      "epoch": 0.5751719984551233,
      "grad_norm": 0.678429126739502,
      "learning_rate": 8.907968345380628e-06,
      "loss": 0.0423,
      "step": 351460
    },
    {
      "epoch": 0.5752047288937766,
      "grad_norm": 0.9433557391166687,
      "learning_rate": 8.90790245316711e-06,
      "loss": 0.045,
      "step": 351480
    },
    {
      "epoch": 0.57523745933243,
      "grad_norm": 1.3136341571807861,
      "learning_rate": 8.907836560953593e-06,
      "loss": 0.039,
      "step": 351500
    },
    {
      "epoch": 0.5752701897710834,
      "grad_norm": 0.910994291305542,
      "learning_rate": 8.907770668740075e-06,
      "loss": 0.0362,
      "step": 351520
    },
    {
      "epoch": 0.5753029202097366,
      "grad_norm": 2.4376165866851807,
      "learning_rate": 8.907704776526559e-06,
      "loss": 0.0393,
      "step": 351540
    },
    {
      "epoch": 0.57533565064839,
      "grad_norm": 0.33978700637817383,
      "learning_rate": 8.90763888431304e-06,
      "loss": 0.0402,
      "step": 351560
    },
    {
      "epoch": 0.5753683810870434,
      "grad_norm": 1.3922019004821777,
      "learning_rate": 8.907572992099524e-06,
      "loss": 0.0327,
      "step": 351580
    },
    {
      "epoch": 0.5754011115256966,
      "grad_norm": 0.5244755148887634,
      "learning_rate": 8.907507099886008e-06,
      "loss": 0.0483,
      "step": 351600
    },
    {
      "epoch": 0.57543384196435,
      "grad_norm": 0.5907754898071289,
      "learning_rate": 8.90744120767249e-06,
      "loss": 0.0234,
      "step": 351620
    },
    {
      "epoch": 0.5754665724030034,
      "grad_norm": 1.8767805099487305,
      "learning_rate": 8.907375315458973e-06,
      "loss": 0.0337,
      "step": 351640
    },
    {
      "epoch": 0.5754993028416567,
      "grad_norm": 1.0556659698486328,
      "learning_rate": 8.907309423245457e-06,
      "loss": 0.0376,
      "step": 351660
    },
    {
      "epoch": 0.57553203328031,
      "grad_norm": 1.1427100896835327,
      "learning_rate": 8.907243531031939e-06,
      "loss": 0.0391,
      "step": 351680
    },
    {
      "epoch": 0.5755647637189634,
      "grad_norm": 1.3523154258728027,
      "learning_rate": 8.907177638818422e-06,
      "loss": 0.0359,
      "step": 351700
    },
    {
      "epoch": 0.5755974941576167,
      "grad_norm": 1.3416452407836914,
      "learning_rate": 8.907111746604904e-06,
      "loss": 0.0274,
      "step": 351720
    },
    {
      "epoch": 0.57563022459627,
      "grad_norm": 1.05637788772583,
      "learning_rate": 8.907045854391388e-06,
      "loss": 0.0563,
      "step": 351740
    },
    {
      "epoch": 0.5756629550349234,
      "grad_norm": 1.012536883354187,
      "learning_rate": 8.90697996217787e-06,
      "loss": 0.0308,
      "step": 351760
    },
    {
      "epoch": 0.5756956854735767,
      "grad_norm": 0.8864477276802063,
      "learning_rate": 8.906914069964353e-06,
      "loss": 0.0298,
      "step": 351780
    },
    {
      "epoch": 0.57572841591223,
      "grad_norm": 2.7251126766204834,
      "learning_rate": 8.906848177750837e-06,
      "loss": 0.0384,
      "step": 351800
    },
    {
      "epoch": 0.5757611463508834,
      "grad_norm": 2.5862550735473633,
      "learning_rate": 8.906782285537319e-06,
      "loss": 0.0349,
      "step": 351820
    },
    {
      "epoch": 0.5757938767895368,
      "grad_norm": 1.3484902381896973,
      "learning_rate": 8.906716393323802e-06,
      "loss": 0.0257,
      "step": 351840
    },
    {
      "epoch": 0.5758266072281901,
      "grad_norm": 0.85395747423172,
      "learning_rate": 8.906650501110284e-06,
      "loss": 0.045,
      "step": 351860
    },
    {
      "epoch": 0.5758593376668434,
      "grad_norm": 1.1510814428329468,
      "learning_rate": 8.906584608896768e-06,
      "loss": 0.0417,
      "step": 351880
    },
    {
      "epoch": 0.5758920681054968,
      "grad_norm": 1.368376612663269,
      "learning_rate": 8.90651871668325e-06,
      "loss": 0.0263,
      "step": 351900
    },
    {
      "epoch": 0.5759247985441501,
      "grad_norm": 0.6447779536247253,
      "learning_rate": 8.906452824469733e-06,
      "loss": 0.04,
      "step": 351920
    },
    {
      "epoch": 0.5759575289828034,
      "grad_norm": 1.6288756132125854,
      "learning_rate": 8.906386932256215e-06,
      "loss": 0.0431,
      "step": 351940
    },
    {
      "epoch": 0.5759902594214568,
      "grad_norm": 3.018754720687866,
      "learning_rate": 8.906321040042699e-06,
      "loss": 0.032,
      "step": 351960
    },
    {
      "epoch": 0.5760229898601101,
      "grad_norm": 1.071109652519226,
      "learning_rate": 8.90625514782918e-06,
      "loss": 0.038,
      "step": 351980
    },
    {
      "epoch": 0.5760557202987634,
      "grad_norm": 0.34827426075935364,
      "learning_rate": 8.906189255615664e-06,
      "loss": 0.0434,
      "step": 352000
    },
    {
      "epoch": 0.5760884507374168,
      "grad_norm": 0.5402259230613708,
      "learning_rate": 8.906123363402148e-06,
      "loss": 0.0478,
      "step": 352020
    },
    {
      "epoch": 0.5761211811760701,
      "grad_norm": 0.29313135147094727,
      "learning_rate": 8.90605747118863e-06,
      "loss": 0.026,
      "step": 352040
    },
    {
      "epoch": 0.5761539116147235,
      "grad_norm": 0.822844386100769,
      "learning_rate": 8.905991578975113e-06,
      "loss": 0.04,
      "step": 352060
    },
    {
      "epoch": 0.5761866420533768,
      "grad_norm": 1.9093888998031616,
      "learning_rate": 8.905925686761597e-06,
      "loss": 0.0417,
      "step": 352080
    },
    {
      "epoch": 0.5762193724920301,
      "grad_norm": 3.253887414932251,
      "learning_rate": 8.905859794548079e-06,
      "loss": 0.0415,
      "step": 352100
    },
    {
      "epoch": 0.5762521029306835,
      "grad_norm": 1.2562527656555176,
      "learning_rate": 8.905793902334562e-06,
      "loss": 0.0448,
      "step": 352120
    },
    {
      "epoch": 0.5762848333693368,
      "grad_norm": 0.36692970991134644,
      "learning_rate": 8.905728010121046e-06,
      "loss": 0.0314,
      "step": 352140
    },
    {
      "epoch": 0.5763175638079902,
      "grad_norm": 1.1886192560195923,
      "learning_rate": 8.905662117907528e-06,
      "loss": 0.0352,
      "step": 352160
    },
    {
      "epoch": 0.5763502942466435,
      "grad_norm": 0.6647033095359802,
      "learning_rate": 8.905596225694011e-06,
      "loss": 0.0355,
      "step": 352180
    },
    {
      "epoch": 0.5763830246852968,
      "grad_norm": 3.8803226947784424,
      "learning_rate": 8.905530333480493e-06,
      "loss": 0.0435,
      "step": 352200
    },
    {
      "epoch": 0.5764157551239502,
      "grad_norm": 12.596317291259766,
      "learning_rate": 8.905464441266977e-06,
      "loss": 0.0503,
      "step": 352220
    },
    {
      "epoch": 0.5764484855626035,
      "grad_norm": 0.42411020398139954,
      "learning_rate": 8.905398549053459e-06,
      "loss": 0.0213,
      "step": 352240
    },
    {
      "epoch": 0.5764812160012569,
      "grad_norm": 0.6714393496513367,
      "learning_rate": 8.905332656839942e-06,
      "loss": 0.03,
      "step": 352260
    },
    {
      "epoch": 0.5765139464399102,
      "grad_norm": 3.923781156539917,
      "learning_rate": 8.905266764626424e-06,
      "loss": 0.0385,
      "step": 352280
    },
    {
      "epoch": 0.5765466768785635,
      "grad_norm": 1.9109740257263184,
      "learning_rate": 8.905200872412908e-06,
      "loss": 0.0284,
      "step": 352300
    },
    {
      "epoch": 0.5765794073172169,
      "grad_norm": 0.9146280288696289,
      "learning_rate": 8.90513498019939e-06,
      "loss": 0.0397,
      "step": 352320
    },
    {
      "epoch": 0.5766121377558702,
      "grad_norm": 1.382890224456787,
      "learning_rate": 8.905069087985873e-06,
      "loss": 0.037,
      "step": 352340
    },
    {
      "epoch": 0.5766448681945235,
      "grad_norm": 0.27423685789108276,
      "learning_rate": 8.905003195772355e-06,
      "loss": 0.0347,
      "step": 352360
    },
    {
      "epoch": 0.5766775986331769,
      "grad_norm": 2.181608200073242,
      "learning_rate": 8.904937303558839e-06,
      "loss": 0.0332,
      "step": 352380
    },
    {
      "epoch": 0.5767103290718302,
      "grad_norm": 4.158947467803955,
      "learning_rate": 8.904871411345322e-06,
      "loss": 0.048,
      "step": 352400
    },
    {
      "epoch": 0.5767430595104835,
      "grad_norm": 1.5886900424957275,
      "learning_rate": 8.904805519131804e-06,
      "loss": 0.0317,
      "step": 352420
    },
    {
      "epoch": 0.5767757899491369,
      "grad_norm": 1.892808198928833,
      "learning_rate": 8.904739626918288e-06,
      "loss": 0.038,
      "step": 352440
    },
    {
      "epoch": 0.5768085203877903,
      "grad_norm": 2.7970964908599854,
      "learning_rate": 8.904673734704772e-06,
      "loss": 0.0335,
      "step": 352460
    },
    {
      "epoch": 0.5768412508264436,
      "grad_norm": 0.9898622035980225,
      "learning_rate": 8.904607842491253e-06,
      "loss": 0.036,
      "step": 352480
    },
    {
      "epoch": 0.5768739812650969,
      "grad_norm": 0.1308983713388443,
      "learning_rate": 8.904541950277737e-06,
      "loss": 0.033,
      "step": 352500
    },
    {
      "epoch": 0.5769067117037503,
      "grad_norm": 1.2696473598480225,
      "learning_rate": 8.90447605806422e-06,
      "loss": 0.0304,
      "step": 352520
    },
    {
      "epoch": 0.5769394421424036,
      "grad_norm": 1.3433846235275269,
      "learning_rate": 8.904410165850702e-06,
      "loss": 0.0299,
      "step": 352540
    },
    {
      "epoch": 0.5769721725810569,
      "grad_norm": 2.0150487422943115,
      "learning_rate": 8.904344273637186e-06,
      "loss": 0.0293,
      "step": 352560
    },
    {
      "epoch": 0.5770049030197103,
      "grad_norm": 1.0409711599349976,
      "learning_rate": 8.904278381423668e-06,
      "loss": 0.0465,
      "step": 352580
    },
    {
      "epoch": 0.5770376334583636,
      "grad_norm": 0.8079350590705872,
      "learning_rate": 8.904212489210152e-06,
      "loss": 0.0327,
      "step": 352600
    },
    {
      "epoch": 0.5770703638970169,
      "grad_norm": 1.684437870979309,
      "learning_rate": 8.904146596996633e-06,
      "loss": 0.0313,
      "step": 352620
    },
    {
      "epoch": 0.5771030943356703,
      "grad_norm": 0.58342444896698,
      "learning_rate": 8.904080704783117e-06,
      "loss": 0.0355,
      "step": 352640
    },
    {
      "epoch": 0.5771358247743237,
      "grad_norm": 3.138184070587158,
      "learning_rate": 8.904014812569599e-06,
      "loss": 0.044,
      "step": 352660
    },
    {
      "epoch": 0.5771685552129769,
      "grad_norm": 0.5895683765411377,
      "learning_rate": 8.903948920356082e-06,
      "loss": 0.0483,
      "step": 352680
    },
    {
      "epoch": 0.5772012856516303,
      "grad_norm": 0.8151348829269409,
      "learning_rate": 8.903883028142564e-06,
      "loss": 0.0307,
      "step": 352700
    },
    {
      "epoch": 0.5772340160902837,
      "grad_norm": 0.7057161927223206,
      "learning_rate": 8.903817135929048e-06,
      "loss": 0.0466,
      "step": 352720
    },
    {
      "epoch": 0.577266746528937,
      "grad_norm": 0.2173074334859848,
      "learning_rate": 8.90375124371553e-06,
      "loss": 0.0258,
      "step": 352740
    },
    {
      "epoch": 0.5772994769675903,
      "grad_norm": 3.061042070388794,
      "learning_rate": 8.903685351502013e-06,
      "loss": 0.0388,
      "step": 352760
    },
    {
      "epoch": 0.5773322074062437,
      "grad_norm": 0.9974915385246277,
      "learning_rate": 8.903619459288495e-06,
      "loss": 0.0363,
      "step": 352780
    },
    {
      "epoch": 0.577364937844897,
      "grad_norm": 0.386100709438324,
      "learning_rate": 8.903553567074979e-06,
      "loss": 0.0405,
      "step": 352800
    },
    {
      "epoch": 0.5773976682835503,
      "grad_norm": 0.28340691328048706,
      "learning_rate": 8.903487674861463e-06,
      "loss": 0.0349,
      "step": 352820
    },
    {
      "epoch": 0.5774303987222037,
      "grad_norm": 1.4302945137023926,
      "learning_rate": 8.903421782647944e-06,
      "loss": 0.0328,
      "step": 352840
    },
    {
      "epoch": 0.577463129160857,
      "grad_norm": 1.910957932472229,
      "learning_rate": 8.903355890434428e-06,
      "loss": 0.0375,
      "step": 352860
    },
    {
      "epoch": 0.5774958595995103,
      "grad_norm": 4.701864719390869,
      "learning_rate": 8.903289998220912e-06,
      "loss": 0.0376,
      "step": 352880
    },
    {
      "epoch": 0.5775285900381637,
      "grad_norm": 2.4424052238464355,
      "learning_rate": 8.903224106007393e-06,
      "loss": 0.0454,
      "step": 352900
    },
    {
      "epoch": 0.5775613204768171,
      "grad_norm": 0.3698064684867859,
      "learning_rate": 8.903158213793877e-06,
      "loss": 0.0306,
      "step": 352920
    },
    {
      "epoch": 0.5775940509154703,
      "grad_norm": 1.2458215951919556,
      "learning_rate": 8.90309232158036e-06,
      "loss": 0.0287,
      "step": 352940
    },
    {
      "epoch": 0.5776267813541237,
      "grad_norm": 0.9377558827400208,
      "learning_rate": 8.903026429366843e-06,
      "loss": 0.0358,
      "step": 352960
    },
    {
      "epoch": 0.5776595117927771,
      "grad_norm": 0.8413373827934265,
      "learning_rate": 8.902960537153326e-06,
      "loss": 0.0368,
      "step": 352980
    },
    {
      "epoch": 0.5776922422314303,
      "grad_norm": 9.07368278503418,
      "learning_rate": 8.902894644939808e-06,
      "loss": 0.0273,
      "step": 353000
    },
    {
      "epoch": 0.5777249726700837,
      "grad_norm": 4.912447452545166,
      "learning_rate": 8.902828752726292e-06,
      "loss": 0.0378,
      "step": 353020
    },
    {
      "epoch": 0.5777577031087371,
      "grad_norm": 0.6790598034858704,
      "learning_rate": 8.902762860512773e-06,
      "loss": 0.0386,
      "step": 353040
    },
    {
      "epoch": 0.5777904335473903,
      "grad_norm": 0.7116686105728149,
      "learning_rate": 8.902696968299257e-06,
      "loss": 0.0402,
      "step": 353060
    },
    {
      "epoch": 0.5778231639860437,
      "grad_norm": 3.727912425994873,
      "learning_rate": 8.902631076085739e-06,
      "loss": 0.0305,
      "step": 353080
    },
    {
      "epoch": 0.5778558944246971,
      "grad_norm": 2.06103515625,
      "learning_rate": 8.902565183872223e-06,
      "loss": 0.0404,
      "step": 353100
    },
    {
      "epoch": 0.5778886248633505,
      "grad_norm": 1.2697337865829468,
      "learning_rate": 8.902499291658704e-06,
      "loss": 0.043,
      "step": 353120
    },
    {
      "epoch": 0.5779213553020037,
      "grad_norm": 0.7779617309570312,
      "learning_rate": 8.902433399445188e-06,
      "loss": 0.0283,
      "step": 353140
    },
    {
      "epoch": 0.5779540857406571,
      "grad_norm": 1.066867709159851,
      "learning_rate": 8.90236750723167e-06,
      "loss": 0.03,
      "step": 353160
    },
    {
      "epoch": 0.5779868161793105,
      "grad_norm": 0.893855631351471,
      "learning_rate": 8.902301615018154e-06,
      "loss": 0.0417,
      "step": 353180
    },
    {
      "epoch": 0.5780195466179637,
      "grad_norm": 1.4694154262542725,
      "learning_rate": 8.902235722804637e-06,
      "loss": 0.028,
      "step": 353200
    },
    {
      "epoch": 0.5780522770566171,
      "grad_norm": 0.6181262731552124,
      "learning_rate": 8.902169830591119e-06,
      "loss": 0.0299,
      "step": 353220
    },
    {
      "epoch": 0.5780850074952705,
      "grad_norm": 0.5707810521125793,
      "learning_rate": 8.902103938377603e-06,
      "loss": 0.0362,
      "step": 353240
    },
    {
      "epoch": 0.5781177379339237,
      "grad_norm": 1.6105592250823975,
      "learning_rate": 8.902038046164086e-06,
      "loss": 0.0365,
      "step": 353260
    },
    {
      "epoch": 0.5781504683725771,
      "grad_norm": 2.6832869052886963,
      "learning_rate": 8.901972153950568e-06,
      "loss": 0.051,
      "step": 353280
    },
    {
      "epoch": 0.5781831988112305,
      "grad_norm": 1.0528030395507812,
      "learning_rate": 8.901906261737052e-06,
      "loss": 0.0317,
      "step": 353300
    },
    {
      "epoch": 0.5782159292498839,
      "grad_norm": 0.8252617120742798,
      "learning_rate": 8.901840369523535e-06,
      "loss": 0.0354,
      "step": 353320
    },
    {
      "epoch": 0.5782486596885371,
      "grad_norm": 0.7641430497169495,
      "learning_rate": 8.901774477310017e-06,
      "loss": 0.0436,
      "step": 353340
    },
    {
      "epoch": 0.5782813901271905,
      "grad_norm": 0.3842519521713257,
      "learning_rate": 8.9017085850965e-06,
      "loss": 0.0259,
      "step": 353360
    },
    {
      "epoch": 0.5783141205658439,
      "grad_norm": 1.0776736736297607,
      "learning_rate": 8.901642692882983e-06,
      "loss": 0.0353,
      "step": 353380
    },
    {
      "epoch": 0.5783468510044971,
      "grad_norm": 1.419503092765808,
      "learning_rate": 8.901576800669466e-06,
      "loss": 0.0365,
      "step": 353400
    },
    {
      "epoch": 0.5783795814431505,
      "grad_norm": 1.8365229368209839,
      "learning_rate": 8.901510908455948e-06,
      "loss": 0.04,
      "step": 353420
    },
    {
      "epoch": 0.5784123118818039,
      "grad_norm": 0.49742308259010315,
      "learning_rate": 8.901445016242432e-06,
      "loss": 0.0354,
      "step": 353440
    },
    {
      "epoch": 0.5784450423204571,
      "grad_norm": 0.6027986407279968,
      "learning_rate": 8.901379124028914e-06,
      "loss": 0.0302,
      "step": 353460
    },
    {
      "epoch": 0.5784777727591105,
      "grad_norm": 0.6378294825553894,
      "learning_rate": 8.901313231815397e-06,
      "loss": 0.0459,
      "step": 353480
    },
    {
      "epoch": 0.5785105031977639,
      "grad_norm": 1.6655335426330566,
      "learning_rate": 8.901247339601879e-06,
      "loss": 0.0386,
      "step": 353500
    },
    {
      "epoch": 0.5785432336364172,
      "grad_norm": 2.3310866355895996,
      "learning_rate": 8.901181447388363e-06,
      "loss": 0.0341,
      "step": 353520
    },
    {
      "epoch": 0.5785759640750705,
      "grad_norm": 0.9652696251869202,
      "learning_rate": 8.901115555174846e-06,
      "loss": 0.0351,
      "step": 353540
    },
    {
      "epoch": 0.5786086945137239,
      "grad_norm": 0.915545642375946,
      "learning_rate": 8.901049662961328e-06,
      "loss": 0.0361,
      "step": 353560
    },
    {
      "epoch": 0.5786414249523772,
      "grad_norm": 0.22023539245128632,
      "learning_rate": 8.900983770747812e-06,
      "loss": 0.0408,
      "step": 353580
    },
    {
      "epoch": 0.5786741553910305,
      "grad_norm": 1.5534076690673828,
      "learning_rate": 8.900917878534294e-06,
      "loss": 0.0382,
      "step": 353600
    },
    {
      "epoch": 0.5787068858296839,
      "grad_norm": 1.2689828872680664,
      "learning_rate": 8.900851986320777e-06,
      "loss": 0.0371,
      "step": 353620
    },
    {
      "epoch": 0.5787396162683373,
      "grad_norm": 0.33282470703125,
      "learning_rate": 8.900786094107259e-06,
      "loss": 0.0401,
      "step": 353640
    },
    {
      "epoch": 0.5787723467069905,
      "grad_norm": 1.2632936239242554,
      "learning_rate": 8.900720201893743e-06,
      "loss": 0.0442,
      "step": 353660
    },
    {
      "epoch": 0.5788050771456439,
      "grad_norm": 2.6033194065093994,
      "learning_rate": 8.900654309680226e-06,
      "loss": 0.0431,
      "step": 353680
    },
    {
      "epoch": 0.5788378075842973,
      "grad_norm": 2.475968360900879,
      "learning_rate": 8.900588417466708e-06,
      "loss": 0.0391,
      "step": 353700
    },
    {
      "epoch": 0.5788705380229506,
      "grad_norm": 0.44346871972084045,
      "learning_rate": 8.900522525253192e-06,
      "loss": 0.0357,
      "step": 353720
    },
    {
      "epoch": 0.5789032684616039,
      "grad_norm": 0.5977334380149841,
      "learning_rate": 8.900456633039675e-06,
      "loss": 0.0278,
      "step": 353740
    },
    {
      "epoch": 0.5789359989002573,
      "grad_norm": 1.8077019453048706,
      "learning_rate": 8.900390740826157e-06,
      "loss": 0.0344,
      "step": 353760
    },
    {
      "epoch": 0.5789687293389106,
      "grad_norm": 1.0024791955947876,
      "learning_rate": 8.90032484861264e-06,
      "loss": 0.026,
      "step": 353780
    },
    {
      "epoch": 0.5790014597775639,
      "grad_norm": 1.2887954711914062,
      "learning_rate": 8.900258956399123e-06,
      "loss": 0.0342,
      "step": 353800
    },
    {
      "epoch": 0.5790341902162173,
      "grad_norm": 0.507764995098114,
      "learning_rate": 8.900193064185606e-06,
      "loss": 0.0376,
      "step": 353820
    },
    {
      "epoch": 0.5790669206548706,
      "grad_norm": 0.7853555679321289,
      "learning_rate": 8.900127171972088e-06,
      "loss": 0.0398,
      "step": 353840
    },
    {
      "epoch": 0.5790996510935239,
      "grad_norm": 1.7506893873214722,
      "learning_rate": 8.900061279758572e-06,
      "loss": 0.0393,
      "step": 353860
    },
    {
      "epoch": 0.5791323815321773,
      "grad_norm": 0.8737329840660095,
      "learning_rate": 8.899995387545054e-06,
      "loss": 0.0341,
      "step": 353880
    },
    {
      "epoch": 0.5791651119708306,
      "grad_norm": 0.21623662114143372,
      "learning_rate": 8.899929495331537e-06,
      "loss": 0.0295,
      "step": 353900
    },
    {
      "epoch": 0.579197842409484,
      "grad_norm": 0.7339192628860474,
      "learning_rate": 8.89986360311802e-06,
      "loss": 0.0317,
      "step": 353920
    },
    {
      "epoch": 0.5792305728481373,
      "grad_norm": 0.9485852122306824,
      "learning_rate": 8.899797710904503e-06,
      "loss": 0.0281,
      "step": 353940
    },
    {
      "epoch": 0.5792633032867907,
      "grad_norm": 1.571119785308838,
      "learning_rate": 8.899731818690986e-06,
      "loss": 0.0617,
      "step": 353960
    },
    {
      "epoch": 0.579296033725444,
      "grad_norm": 0.9603990316390991,
      "learning_rate": 8.899665926477468e-06,
      "loss": 0.0434,
      "step": 353980
    },
    {
      "epoch": 0.5793287641640973,
      "grad_norm": 0.8285213708877563,
      "learning_rate": 8.899600034263952e-06,
      "loss": 0.0349,
      "step": 354000
    },
    {
      "epoch": 0.5793614946027507,
      "grad_norm": 1.6065144538879395,
      "learning_rate": 8.899534142050434e-06,
      "loss": 0.0389,
      "step": 354020
    },
    {
      "epoch": 0.579394225041404,
      "grad_norm": 3.937459707260132,
      "learning_rate": 8.899468249836917e-06,
      "loss": 0.0397,
      "step": 354040
    },
    {
      "epoch": 0.5794269554800573,
      "grad_norm": 0.8571350574493408,
      "learning_rate": 8.8994023576234e-06,
      "loss": 0.036,
      "step": 354060
    },
    {
      "epoch": 0.5794596859187107,
      "grad_norm": 4.119422435760498,
      "learning_rate": 8.899336465409883e-06,
      "loss": 0.0399,
      "step": 354080
    },
    {
      "epoch": 0.579492416357364,
      "grad_norm": 0.813816487789154,
      "learning_rate": 8.899270573196366e-06,
      "loss": 0.0322,
      "step": 354100
    },
    {
      "epoch": 0.5795251467960174,
      "grad_norm": 1.0462396144866943,
      "learning_rate": 8.89920468098285e-06,
      "loss": 0.0328,
      "step": 354120
    },
    {
      "epoch": 0.5795578772346707,
      "grad_norm": 1.184130311012268,
      "learning_rate": 8.899138788769332e-06,
      "loss": 0.0271,
      "step": 354140
    },
    {
      "epoch": 0.579590607673324,
      "grad_norm": 0.4832012951374054,
      "learning_rate": 8.899072896555815e-06,
      "loss": 0.0406,
      "step": 354160
    },
    {
      "epoch": 0.5796233381119774,
      "grad_norm": 5.581504821777344,
      "learning_rate": 8.899007004342297e-06,
      "loss": 0.053,
      "step": 354180
    },
    {
      "epoch": 0.5796560685506307,
      "grad_norm": 0.7723917961120605,
      "learning_rate": 8.898941112128781e-06,
      "loss": 0.0395,
      "step": 354200
    },
    {
      "epoch": 0.579688798989284,
      "grad_norm": 1.0502475500106812,
      "learning_rate": 8.898875219915263e-06,
      "loss": 0.0252,
      "step": 354220
    },
    {
      "epoch": 0.5797215294279374,
      "grad_norm": 3.7005324363708496,
      "learning_rate": 8.898809327701746e-06,
      "loss": 0.0384,
      "step": 354240
    },
    {
      "epoch": 0.5797542598665907,
      "grad_norm": 3.5906612873077393,
      "learning_rate": 8.89874343548823e-06,
      "loss": 0.0354,
      "step": 354260
    },
    {
      "epoch": 0.579786990305244,
      "grad_norm": 0.41156113147735596,
      "learning_rate": 8.898677543274712e-06,
      "loss": 0.0317,
      "step": 354280
    },
    {
      "epoch": 0.5798197207438974,
      "grad_norm": 3.772766590118408,
      "learning_rate": 8.898611651061195e-06,
      "loss": 0.0425,
      "step": 354300
    },
    {
      "epoch": 0.5798524511825508,
      "grad_norm": 1.1086041927337646,
      "learning_rate": 8.898545758847677e-06,
      "loss": 0.0341,
      "step": 354320
    },
    {
      "epoch": 0.5798851816212041,
      "grad_norm": 2.2426884174346924,
      "learning_rate": 8.898479866634161e-06,
      "loss": 0.0509,
      "step": 354340
    },
    {
      "epoch": 0.5799179120598574,
      "grad_norm": 0.7360921502113342,
      "learning_rate": 8.898413974420643e-06,
      "loss": 0.0453,
      "step": 354360
    },
    {
      "epoch": 0.5799506424985108,
      "grad_norm": 1.117427945137024,
      "learning_rate": 8.898348082207126e-06,
      "loss": 0.0396,
      "step": 354380
    },
    {
      "epoch": 0.5799833729371641,
      "grad_norm": 0.690683126449585,
      "learning_rate": 8.898282189993608e-06,
      "loss": 0.0437,
      "step": 354400
    },
    {
      "epoch": 0.5800161033758174,
      "grad_norm": 2.739237070083618,
      "learning_rate": 8.898216297780092e-06,
      "loss": 0.0348,
      "step": 354420
    },
    {
      "epoch": 0.5800488338144708,
      "grad_norm": 0.6202136278152466,
      "learning_rate": 8.898150405566575e-06,
      "loss": 0.0398,
      "step": 354440
    },
    {
      "epoch": 0.5800815642531241,
      "grad_norm": 2.599233865737915,
      "learning_rate": 8.898084513353057e-06,
      "loss": 0.039,
      "step": 354460
    },
    {
      "epoch": 0.5801142946917774,
      "grad_norm": 1.4659826755523682,
      "learning_rate": 8.898018621139541e-06,
      "loss": 0.0318,
      "step": 354480
    },
    {
      "epoch": 0.5801470251304308,
      "grad_norm": 0.9515412449836731,
      "learning_rate": 8.897952728926025e-06,
      "loss": 0.049,
      "step": 354500
    },
    {
      "epoch": 0.5801797555690842,
      "grad_norm": 0.6397678256034851,
      "learning_rate": 8.897886836712506e-06,
      "loss": 0.0428,
      "step": 354520
    },
    {
      "epoch": 0.5802124860077374,
      "grad_norm": 1.199720859527588,
      "learning_rate": 8.89782094449899e-06,
      "loss": 0.0309,
      "step": 354540
    },
    {
      "epoch": 0.5802452164463908,
      "grad_norm": 0.7045406103134155,
      "learning_rate": 8.897755052285472e-06,
      "loss": 0.045,
      "step": 354560
    },
    {
      "epoch": 0.5802779468850442,
      "grad_norm": 1.6074714660644531,
      "learning_rate": 8.897689160071955e-06,
      "loss": 0.0255,
      "step": 354580
    },
    {
      "epoch": 0.5803106773236975,
      "grad_norm": 1.2559820413589478,
      "learning_rate": 8.897623267858439e-06,
      "loss": 0.0369,
      "step": 354600
    },
    {
      "epoch": 0.5803434077623508,
      "grad_norm": 3.164031744003296,
      "learning_rate": 8.897557375644921e-06,
      "loss": 0.0417,
      "step": 354620
    },
    {
      "epoch": 0.5803761382010042,
      "grad_norm": 0.640693724155426,
      "learning_rate": 8.897491483431405e-06,
      "loss": 0.0277,
      "step": 354640
    },
    {
      "epoch": 0.5804088686396575,
      "grad_norm": 2.6226625442504883,
      "learning_rate": 8.897425591217886e-06,
      "loss": 0.035,
      "step": 354660
    },
    {
      "epoch": 0.5804415990783108,
      "grad_norm": 5.090999126434326,
      "learning_rate": 8.89735969900437e-06,
      "loss": 0.0384,
      "step": 354680
    },
    {
      "epoch": 0.5804743295169642,
      "grad_norm": 0.8916618227958679,
      "learning_rate": 8.897293806790852e-06,
      "loss": 0.0362,
      "step": 354700
    },
    {
      "epoch": 0.5805070599556176,
      "grad_norm": 1.4196211099624634,
      "learning_rate": 8.897227914577335e-06,
      "loss": 0.0423,
      "step": 354720
    },
    {
      "epoch": 0.5805397903942708,
      "grad_norm": 0.41399091482162476,
      "learning_rate": 8.897162022363817e-06,
      "loss": 0.0311,
      "step": 354740
    },
    {
      "epoch": 0.5805725208329242,
      "grad_norm": 1.012262225151062,
      "learning_rate": 8.897096130150301e-06,
      "loss": 0.0386,
      "step": 354760
    },
    {
      "epoch": 0.5806052512715776,
      "grad_norm": 0.39251136779785156,
      "learning_rate": 8.897030237936783e-06,
      "loss": 0.0339,
      "step": 354780
    },
    {
      "epoch": 0.5806379817102308,
      "grad_norm": 0.6024481654167175,
      "learning_rate": 8.896964345723266e-06,
      "loss": 0.0357,
      "step": 354800
    },
    {
      "epoch": 0.5806707121488842,
      "grad_norm": 1.2190245389938354,
      "learning_rate": 8.896898453509748e-06,
      "loss": 0.0314,
      "step": 354820
    },
    {
      "epoch": 0.5807034425875376,
      "grad_norm": 0.6586222648620605,
      "learning_rate": 8.896832561296232e-06,
      "loss": 0.0262,
      "step": 354840
    },
    {
      "epoch": 0.5807361730261908,
      "grad_norm": 2.5363824367523193,
      "learning_rate": 8.896766669082716e-06,
      "loss": 0.0511,
      "step": 354860
    },
    {
      "epoch": 0.5807689034648442,
      "grad_norm": 0.7689511775970459,
      "learning_rate": 8.896700776869197e-06,
      "loss": 0.0438,
      "step": 354880
    },
    {
      "epoch": 0.5808016339034976,
      "grad_norm": 1.530070185661316,
      "learning_rate": 8.896634884655681e-06,
      "loss": 0.0442,
      "step": 354900
    },
    {
      "epoch": 0.580834364342151,
      "grad_norm": 1.5369789600372314,
      "learning_rate": 8.896568992442165e-06,
      "loss": 0.047,
      "step": 354920
    },
    {
      "epoch": 0.5808670947808042,
      "grad_norm": 0.9095448851585388,
      "learning_rate": 8.896503100228646e-06,
      "loss": 0.0416,
      "step": 354940
    },
    {
      "epoch": 0.5808998252194576,
      "grad_norm": 0.7500932812690735,
      "learning_rate": 8.89643720801513e-06,
      "loss": 0.0306,
      "step": 354960
    },
    {
      "epoch": 0.580932555658111,
      "grad_norm": 0.8392055034637451,
      "learning_rate": 8.896371315801614e-06,
      "loss": 0.0372,
      "step": 354980
    },
    {
      "epoch": 0.5809652860967642,
      "grad_norm": 1.4937094449996948,
      "learning_rate": 8.896305423588096e-06,
      "loss": 0.0355,
      "step": 355000
    },
    {
      "epoch": 0.5809980165354176,
      "grad_norm": 0.2783174216747284,
      "learning_rate": 8.896239531374579e-06,
      "loss": 0.0395,
      "step": 355020
    },
    {
      "epoch": 0.581030746974071,
      "grad_norm": 0.3167651295661926,
      "learning_rate": 8.896173639161061e-06,
      "loss": 0.0305,
      "step": 355040
    },
    {
      "epoch": 0.5810634774127242,
      "grad_norm": 1.674301028251648,
      "learning_rate": 8.896107746947545e-06,
      "loss": 0.0306,
      "step": 355060
    },
    {
      "epoch": 0.5810962078513776,
      "grad_norm": 1.902038812637329,
      "learning_rate": 8.896041854734027e-06,
      "loss": 0.0345,
      "step": 355080
    },
    {
      "epoch": 0.581128938290031,
      "grad_norm": 1.2535876035690308,
      "learning_rate": 8.89597596252051e-06,
      "loss": 0.0436,
      "step": 355100
    },
    {
      "epoch": 0.5811616687286844,
      "grad_norm": 0.7238590121269226,
      "learning_rate": 8.895910070306992e-06,
      "loss": 0.0292,
      "step": 355120
    },
    {
      "epoch": 0.5811943991673376,
      "grad_norm": 0.5540726184844971,
      "learning_rate": 8.895844178093476e-06,
      "loss": 0.0287,
      "step": 355140
    },
    {
      "epoch": 0.581227129605991,
      "grad_norm": 1.2630090713500977,
      "learning_rate": 8.895778285879957e-06,
      "loss": 0.0331,
      "step": 355160
    },
    {
      "epoch": 0.5812598600446444,
      "grad_norm": 0.886360764503479,
      "learning_rate": 8.895712393666441e-06,
      "loss": 0.0388,
      "step": 355180
    },
    {
      "epoch": 0.5812925904832976,
      "grad_norm": 1.725637674331665,
      "learning_rate": 8.895646501452923e-06,
      "loss": 0.0433,
      "step": 355200
    },
    {
      "epoch": 0.581325320921951,
      "grad_norm": 2.841967821121216,
      "learning_rate": 8.895580609239407e-06,
      "loss": 0.0516,
      "step": 355220
    },
    {
      "epoch": 0.5813580513606044,
      "grad_norm": 0.927354633808136,
      "learning_rate": 8.89551471702589e-06,
      "loss": 0.0477,
      "step": 355240
    },
    {
      "epoch": 0.5813907817992576,
      "grad_norm": 0.9516481757164001,
      "learning_rate": 8.895448824812372e-06,
      "loss": 0.0312,
      "step": 355260
    },
    {
      "epoch": 0.581423512237911,
      "grad_norm": 2.019447088241577,
      "learning_rate": 8.895382932598856e-06,
      "loss": 0.0459,
      "step": 355280
    },
    {
      "epoch": 0.5814562426765644,
      "grad_norm": 1.199865460395813,
      "learning_rate": 8.89531704038534e-06,
      "loss": 0.0351,
      "step": 355300
    },
    {
      "epoch": 0.5814889731152177,
      "grad_norm": 0.884772777557373,
      "learning_rate": 8.895251148171821e-06,
      "loss": 0.0294,
      "step": 355320
    },
    {
      "epoch": 0.581521703553871,
      "grad_norm": 2.064821720123291,
      "learning_rate": 8.895185255958305e-06,
      "loss": 0.0451,
      "step": 355340
    },
    {
      "epoch": 0.5815544339925244,
      "grad_norm": 2.0233263969421387,
      "learning_rate": 8.895119363744788e-06,
      "loss": 0.03,
      "step": 355360
    },
    {
      "epoch": 0.5815871644311777,
      "grad_norm": 0.4443717300891876,
      "learning_rate": 8.89505347153127e-06,
      "loss": 0.0395,
      "step": 355380
    },
    {
      "epoch": 0.581619894869831,
      "grad_norm": 0.6659021377563477,
      "learning_rate": 8.894987579317754e-06,
      "loss": 0.0413,
      "step": 355400
    },
    {
      "epoch": 0.5816526253084844,
      "grad_norm": 0.5667813420295715,
      "learning_rate": 8.894921687104236e-06,
      "loss": 0.0262,
      "step": 355420
    },
    {
      "epoch": 0.5816853557471378,
      "grad_norm": 1.0877208709716797,
      "learning_rate": 8.89485579489072e-06,
      "loss": 0.0327,
      "step": 355440
    },
    {
      "epoch": 0.581718086185791,
      "grad_norm": 2.2397518157958984,
      "learning_rate": 8.894789902677201e-06,
      "loss": 0.0258,
      "step": 355460
    },
    {
      "epoch": 0.5817508166244444,
      "grad_norm": 0.6141345500946045,
      "learning_rate": 8.894724010463685e-06,
      "loss": 0.0471,
      "step": 355480
    },
    {
      "epoch": 0.5817835470630978,
      "grad_norm": 1.736042857170105,
      "learning_rate": 8.894658118250167e-06,
      "loss": 0.0301,
      "step": 355500
    },
    {
      "epoch": 0.5818162775017511,
      "grad_norm": 2.051535129547119,
      "learning_rate": 8.89459222603665e-06,
      "loss": 0.0356,
      "step": 355520
    },
    {
      "epoch": 0.5818490079404044,
      "grad_norm": 1.858778953552246,
      "learning_rate": 8.894526333823132e-06,
      "loss": 0.0394,
      "step": 355540
    },
    {
      "epoch": 0.5818817383790578,
      "grad_norm": 5.667964935302734,
      "learning_rate": 8.894460441609616e-06,
      "loss": 0.0327,
      "step": 355560
    },
    {
      "epoch": 0.5819144688177111,
      "grad_norm": 0.2746323049068451,
      "learning_rate": 8.894394549396098e-06,
      "loss": 0.0435,
      "step": 355580
    },
    {
      "epoch": 0.5819471992563644,
      "grad_norm": 1.690562129020691,
      "learning_rate": 8.894328657182581e-06,
      "loss": 0.0327,
      "step": 355600
    },
    {
      "epoch": 0.5819799296950178,
      "grad_norm": 1.0588877201080322,
      "learning_rate": 8.894262764969063e-06,
      "loss": 0.0366,
      "step": 355620
    },
    {
      "epoch": 0.5820126601336711,
      "grad_norm": 1.1295067071914673,
      "learning_rate": 8.894196872755547e-06,
      "loss": 0.0325,
      "step": 355640
    },
    {
      "epoch": 0.5820453905723244,
      "grad_norm": 0.7988102436065674,
      "learning_rate": 8.89413098054203e-06,
      "loss": 0.0455,
      "step": 355660
    },
    {
      "epoch": 0.5820781210109778,
      "grad_norm": 1.6630537509918213,
      "learning_rate": 8.894065088328512e-06,
      "loss": 0.0402,
      "step": 355680
    },
    {
      "epoch": 0.5821108514496312,
      "grad_norm": 1.0672098398208618,
      "learning_rate": 8.893999196114996e-06,
      "loss": 0.0297,
      "step": 355700
    },
    {
      "epoch": 0.5821435818882844,
      "grad_norm": 0.24951627850532532,
      "learning_rate": 8.89393330390148e-06,
      "loss": 0.0523,
      "step": 355720
    },
    {
      "epoch": 0.5821763123269378,
      "grad_norm": 1.012829065322876,
      "learning_rate": 8.893867411687961e-06,
      "loss": 0.0461,
      "step": 355740
    },
    {
      "epoch": 0.5822090427655912,
      "grad_norm": 0.8446205258369446,
      "learning_rate": 8.893801519474445e-06,
      "loss": 0.0305,
      "step": 355760
    },
    {
      "epoch": 0.5822417732042445,
      "grad_norm": 0.7382487058639526,
      "learning_rate": 8.893735627260928e-06,
      "loss": 0.0358,
      "step": 355780
    },
    {
      "epoch": 0.5822745036428978,
      "grad_norm": 1.7206227779388428,
      "learning_rate": 8.89366973504741e-06,
      "loss": 0.0382,
      "step": 355800
    },
    {
      "epoch": 0.5823072340815512,
      "grad_norm": 1.4093002080917358,
      "learning_rate": 8.893603842833894e-06,
      "loss": 0.034,
      "step": 355820
    },
    {
      "epoch": 0.5823399645202045,
      "grad_norm": 0.3537442684173584,
      "learning_rate": 8.893537950620376e-06,
      "loss": 0.0343,
      "step": 355840
    },
    {
      "epoch": 0.5823726949588578,
      "grad_norm": 1.11539626121521,
      "learning_rate": 8.89347205840686e-06,
      "loss": 0.0396,
      "step": 355860
    },
    {
      "epoch": 0.5824054253975112,
      "grad_norm": 1.237797737121582,
      "learning_rate": 8.893406166193341e-06,
      "loss": 0.0302,
      "step": 355880
    },
    {
      "epoch": 0.5824381558361645,
      "grad_norm": 0.5990162491798401,
      "learning_rate": 8.893340273979825e-06,
      "loss": 0.0364,
      "step": 355900
    },
    {
      "epoch": 0.5824708862748178,
      "grad_norm": 1.5306636095046997,
      "learning_rate": 8.893274381766307e-06,
      "loss": 0.0289,
      "step": 355920
    },
    {
      "epoch": 0.5825036167134712,
      "grad_norm": 1.5488221645355225,
      "learning_rate": 8.89320848955279e-06,
      "loss": 0.0306,
      "step": 355940
    },
    {
      "epoch": 0.5825363471521245,
      "grad_norm": 0.5506057143211365,
      "learning_rate": 8.893142597339272e-06,
      "loss": 0.021,
      "step": 355960
    },
    {
      "epoch": 0.5825690775907779,
      "grad_norm": 3.0138983726501465,
      "learning_rate": 8.893076705125756e-06,
      "loss": 0.0338,
      "step": 355980
    },
    {
      "epoch": 0.5826018080294312,
      "grad_norm": 2.4861984252929688,
      "learning_rate": 8.893010812912238e-06,
      "loss": 0.0266,
      "step": 356000
    },
    {
      "epoch": 0.5826345384680846,
      "grad_norm": 1.3692500591278076,
      "learning_rate": 8.892944920698721e-06,
      "loss": 0.0324,
      "step": 356020
    },
    {
      "epoch": 0.5826672689067379,
      "grad_norm": 0.4661886692047119,
      "learning_rate": 8.892879028485205e-06,
      "loss": 0.0404,
      "step": 356040
    },
    {
      "epoch": 0.5826999993453912,
      "grad_norm": 4.527353286743164,
      "learning_rate": 8.892813136271687e-06,
      "loss": 0.0417,
      "step": 356060
    },
    {
      "epoch": 0.5827327297840446,
      "grad_norm": 1.30978262424469,
      "learning_rate": 8.89274724405817e-06,
      "loss": 0.0329,
      "step": 356080
    },
    {
      "epoch": 0.5827654602226979,
      "grad_norm": 0.39371296763420105,
      "learning_rate": 8.892681351844654e-06,
      "loss": 0.041,
      "step": 356100
    },
    {
      "epoch": 0.5827981906613512,
      "grad_norm": 0.9403220415115356,
      "learning_rate": 8.892615459631136e-06,
      "loss": 0.0356,
      "step": 356120
    },
    {
      "epoch": 0.5828309211000046,
      "grad_norm": 1.441387414932251,
      "learning_rate": 8.89254956741762e-06,
      "loss": 0.0259,
      "step": 356140
    },
    {
      "epoch": 0.5828636515386579,
      "grad_norm": 1.0139110088348389,
      "learning_rate": 8.892483675204103e-06,
      "loss": 0.032,
      "step": 356160
    },
    {
      "epoch": 0.5828963819773113,
      "grad_norm": 0.4002377986907959,
      "learning_rate": 8.892417782990585e-06,
      "loss": 0.0318,
      "step": 356180
    },
    {
      "epoch": 0.5829291124159646,
      "grad_norm": 1.2889496088027954,
      "learning_rate": 8.892351890777068e-06,
      "loss": 0.0258,
      "step": 356200
    },
    {
      "epoch": 0.5829618428546179,
      "grad_norm": 0.9727782607078552,
      "learning_rate": 8.89228599856355e-06,
      "loss": 0.0313,
      "step": 356220
    },
    {
      "epoch": 0.5829945732932713,
      "grad_norm": 0.5628517866134644,
      "learning_rate": 8.892220106350034e-06,
      "loss": 0.0269,
      "step": 356240
    },
    {
      "epoch": 0.5830273037319246,
      "grad_norm": 2.585531234741211,
      "learning_rate": 8.892154214136516e-06,
      "loss": 0.0346,
      "step": 356260
    },
    {
      "epoch": 0.583060034170578,
      "grad_norm": 1.6430071592330933,
      "learning_rate": 8.892088321923e-06,
      "loss": 0.036,
      "step": 356280
    },
    {
      "epoch": 0.5830927646092313,
      "grad_norm": 0.41242051124572754,
      "learning_rate": 8.892022429709481e-06,
      "loss": 0.0327,
      "step": 356300
    },
    {
      "epoch": 0.5831254950478846,
      "grad_norm": 1.3589776754379272,
      "learning_rate": 8.891956537495965e-06,
      "loss": 0.0268,
      "step": 356320
    },
    {
      "epoch": 0.583158225486538,
      "grad_norm": 0.20262601971626282,
      "learning_rate": 8.891890645282447e-06,
      "loss": 0.0388,
      "step": 356340
    },
    {
      "epoch": 0.5831909559251913,
      "grad_norm": 3.026294708251953,
      "learning_rate": 8.89182475306893e-06,
      "loss": 0.04,
      "step": 356360
    },
    {
      "epoch": 0.5832236863638447,
      "grad_norm": 1.5800095796585083,
      "learning_rate": 8.891758860855414e-06,
      "loss": 0.0503,
      "step": 356380
    },
    {
      "epoch": 0.583256416802498,
      "grad_norm": 1.834344744682312,
      "learning_rate": 8.891692968641896e-06,
      "loss": 0.0378,
      "step": 356400
    },
    {
      "epoch": 0.5832891472411513,
      "grad_norm": 0.510074257850647,
      "learning_rate": 8.89162707642838e-06,
      "loss": 0.0312,
      "step": 356420
    },
    {
      "epoch": 0.5833218776798047,
      "grad_norm": 1.3912409543991089,
      "learning_rate": 8.891561184214861e-06,
      "loss": 0.0406,
      "step": 356440
    },
    {
      "epoch": 0.583354608118458,
      "grad_norm": 0.4047735929489136,
      "learning_rate": 8.891495292001345e-06,
      "loss": 0.0288,
      "step": 356460
    },
    {
      "epoch": 0.5833873385571113,
      "grad_norm": 0.7377676963806152,
      "learning_rate": 8.891429399787827e-06,
      "loss": 0.0307,
      "step": 356480
    },
    {
      "epoch": 0.5834200689957647,
      "grad_norm": 1.078562617301941,
      "learning_rate": 8.89136350757431e-06,
      "loss": 0.034,
      "step": 356500
    },
    {
      "epoch": 0.583452799434418,
      "grad_norm": 1.8584611415863037,
      "learning_rate": 8.891297615360794e-06,
      "loss": 0.036,
      "step": 356520
    },
    {
      "epoch": 0.5834855298730713,
      "grad_norm": 0.4681123197078705,
      "learning_rate": 8.891231723147278e-06,
      "loss": 0.037,
      "step": 356540
    },
    {
      "epoch": 0.5835182603117247,
      "grad_norm": 0.6850155591964722,
      "learning_rate": 8.89116583093376e-06,
      "loss": 0.0348,
      "step": 356560
    },
    {
      "epoch": 0.5835509907503781,
      "grad_norm": 1.2588828802108765,
      "learning_rate": 8.891099938720243e-06,
      "loss": 0.0488,
      "step": 356580
    },
    {
      "epoch": 0.5835837211890313,
      "grad_norm": 0.8704819679260254,
      "learning_rate": 8.891034046506725e-06,
      "loss": 0.0325,
      "step": 356600
    },
    {
      "epoch": 0.5836164516276847,
      "grad_norm": 0.6786360740661621,
      "learning_rate": 8.890968154293208e-06,
      "loss": 0.0297,
      "step": 356620
    },
    {
      "epoch": 0.5836491820663381,
      "grad_norm": 0.2502948045730591,
      "learning_rate": 8.89090226207969e-06,
      "loss": 0.0395,
      "step": 356640
    },
    {
      "epoch": 0.5836819125049914,
      "grad_norm": 0.4226028025150299,
      "learning_rate": 8.890836369866174e-06,
      "loss": 0.0374,
      "step": 356660
    },
    {
      "epoch": 0.5837146429436447,
      "grad_norm": 1.594709038734436,
      "learning_rate": 8.890770477652656e-06,
      "loss": 0.0417,
      "step": 356680
    },
    {
      "epoch": 0.5837473733822981,
      "grad_norm": 0.5470890402793884,
      "learning_rate": 8.89070458543914e-06,
      "loss": 0.0328,
      "step": 356700
    },
    {
      "epoch": 0.5837801038209514,
      "grad_norm": 0.5913740992546082,
      "learning_rate": 8.890638693225623e-06,
      "loss": 0.0333,
      "step": 356720
    },
    {
      "epoch": 0.5838128342596047,
      "grad_norm": 1.5079069137573242,
      "learning_rate": 8.890572801012105e-06,
      "loss": 0.0325,
      "step": 356740
    },
    {
      "epoch": 0.5838455646982581,
      "grad_norm": 0.4948898255825043,
      "learning_rate": 8.890506908798588e-06,
      "loss": 0.0261,
      "step": 356760
    },
    {
      "epoch": 0.5838782951369115,
      "grad_norm": 0.7458814382553101,
      "learning_rate": 8.89044101658507e-06,
      "loss": 0.0444,
      "step": 356780
    },
    {
      "epoch": 0.5839110255755647,
      "grad_norm": 0.3948257863521576,
      "learning_rate": 8.890375124371554e-06,
      "loss": 0.0434,
      "step": 356800
    },
    {
      "epoch": 0.5839437560142181,
      "grad_norm": 0.7141537070274353,
      "learning_rate": 8.890309232158036e-06,
      "loss": 0.0361,
      "step": 356820
    },
    {
      "epoch": 0.5839764864528715,
      "grad_norm": 2.960902452468872,
      "learning_rate": 8.89024333994452e-06,
      "loss": 0.041,
      "step": 356840
    },
    {
      "epoch": 0.5840092168915247,
      "grad_norm": 1.4489879608154297,
      "learning_rate": 8.890177447731001e-06,
      "loss": 0.038,
      "step": 356860
    },
    {
      "epoch": 0.5840419473301781,
      "grad_norm": 0.4296557605266571,
      "learning_rate": 8.890111555517485e-06,
      "loss": 0.0376,
      "step": 356880
    },
    {
      "epoch": 0.5840746777688315,
      "grad_norm": 2.1294233798980713,
      "learning_rate": 8.890045663303969e-06,
      "loss": 0.0429,
      "step": 356900
    },
    {
      "epoch": 0.5841074082074847,
      "grad_norm": 1.4736216068267822,
      "learning_rate": 8.88997977109045e-06,
      "loss": 0.0512,
      "step": 356920
    },
    {
      "epoch": 0.5841401386461381,
      "grad_norm": 0.9366223812103271,
      "learning_rate": 8.889913878876934e-06,
      "loss": 0.0315,
      "step": 356940
    },
    {
      "epoch": 0.5841728690847915,
      "grad_norm": 0.5471771359443665,
      "learning_rate": 8.889847986663418e-06,
      "loss": 0.0323,
      "step": 356960
    },
    {
      "epoch": 0.5842055995234449,
      "grad_norm": 1.0028667449951172,
      "learning_rate": 8.8897820944499e-06,
      "loss": 0.0264,
      "step": 356980
    },
    {
      "epoch": 0.5842383299620981,
      "grad_norm": 1.3912757635116577,
      "learning_rate": 8.889716202236383e-06,
      "loss": 0.0262,
      "step": 357000
    },
    {
      "epoch": 0.5842710604007515,
      "grad_norm": 1.2993475198745728,
      "learning_rate": 8.889650310022865e-06,
      "loss": 0.0436,
      "step": 357020
    },
    {
      "epoch": 0.5843037908394049,
      "grad_norm": 0.6793332099914551,
      "learning_rate": 8.889584417809349e-06,
      "loss": 0.0364,
      "step": 357040
    },
    {
      "epoch": 0.5843365212780581,
      "grad_norm": 0.7707147598266602,
      "learning_rate": 8.889518525595832e-06,
      "loss": 0.0394,
      "step": 357060
    },
    {
      "epoch": 0.5843692517167115,
      "grad_norm": 0.8614999651908875,
      "learning_rate": 8.889452633382314e-06,
      "loss": 0.0292,
      "step": 357080
    },
    {
      "epoch": 0.5844019821553649,
      "grad_norm": 0.27759772539138794,
      "learning_rate": 8.889386741168798e-06,
      "loss": 0.0382,
      "step": 357100
    },
    {
      "epoch": 0.5844347125940181,
      "grad_norm": 0.22481557726860046,
      "learning_rate": 8.88932084895528e-06,
      "loss": 0.0332,
      "step": 357120
    },
    {
      "epoch": 0.5844674430326715,
      "grad_norm": 1.6946913003921509,
      "learning_rate": 8.889254956741763e-06,
      "loss": 0.0433,
      "step": 357140
    },
    {
      "epoch": 0.5845001734713249,
      "grad_norm": 0.9481992721557617,
      "learning_rate": 8.889189064528245e-06,
      "loss": 0.0327,
      "step": 357160
    },
    {
      "epoch": 0.5845329039099783,
      "grad_norm": 1.0300145149230957,
      "learning_rate": 8.889123172314729e-06,
      "loss": 0.031,
      "step": 357180
    },
    {
      "epoch": 0.5845656343486315,
      "grad_norm": 0.6460293531417847,
      "learning_rate": 8.88905728010121e-06,
      "loss": 0.0409,
      "step": 357200
    },
    {
      "epoch": 0.5845983647872849,
      "grad_norm": 1.7504903078079224,
      "learning_rate": 8.888991387887694e-06,
      "loss": 0.0373,
      "step": 357220
    },
    {
      "epoch": 0.5846310952259383,
      "grad_norm": 2.7292046546936035,
      "learning_rate": 8.888925495674176e-06,
      "loss": 0.0474,
      "step": 357240
    },
    {
      "epoch": 0.5846638256645915,
      "grad_norm": 4.835075855255127,
      "learning_rate": 8.88885960346066e-06,
      "loss": 0.033,
      "step": 357260
    },
    {
      "epoch": 0.5846965561032449,
      "grad_norm": 2.0409657955169678,
      "learning_rate": 8.888793711247143e-06,
      "loss": 0.0377,
      "step": 357280
    },
    {
      "epoch": 0.5847292865418983,
      "grad_norm": 1.4416460990905762,
      "learning_rate": 8.888727819033625e-06,
      "loss": 0.0418,
      "step": 357300
    },
    {
      "epoch": 0.5847620169805515,
      "grad_norm": 1.445237636566162,
      "learning_rate": 8.888661926820109e-06,
      "loss": 0.0298,
      "step": 357320
    },
    {
      "epoch": 0.5847947474192049,
      "grad_norm": 0.5381118655204773,
      "learning_rate": 8.888596034606592e-06,
      "loss": 0.0357,
      "step": 357340
    },
    {
      "epoch": 0.5848274778578583,
      "grad_norm": 1.5103299617767334,
      "learning_rate": 8.888530142393074e-06,
      "loss": 0.0442,
      "step": 357360
    },
    {
      "epoch": 0.5848602082965116,
      "grad_norm": 4.282952785491943,
      "learning_rate": 8.888464250179558e-06,
      "loss": 0.0357,
      "step": 357380
    },
    {
      "epoch": 0.5848929387351649,
      "grad_norm": 2.1715335845947266,
      "learning_rate": 8.88839835796604e-06,
      "loss": 0.0304,
      "step": 357400
    },
    {
      "epoch": 0.5849256691738183,
      "grad_norm": 0.5707017183303833,
      "learning_rate": 8.888332465752523e-06,
      "loss": 0.0445,
      "step": 357420
    },
    {
      "epoch": 0.5849583996124716,
      "grad_norm": 1.610264539718628,
      "learning_rate": 8.888266573539007e-06,
      "loss": 0.0287,
      "step": 357440
    },
    {
      "epoch": 0.5849911300511249,
      "grad_norm": 1.6056835651397705,
      "learning_rate": 8.888200681325489e-06,
      "loss": 0.0406,
      "step": 357460
    },
    {
      "epoch": 0.5850238604897783,
      "grad_norm": 0.3241610825061798,
      "learning_rate": 8.888134789111972e-06,
      "loss": 0.0479,
      "step": 357480
    },
    {
      "epoch": 0.5850565909284317,
      "grad_norm": 5.153896331787109,
      "learning_rate": 8.888068896898454e-06,
      "loss": 0.0503,
      "step": 357500
    },
    {
      "epoch": 0.5850893213670849,
      "grad_norm": 0.49871528148651123,
      "learning_rate": 8.888003004684938e-06,
      "loss": 0.0447,
      "step": 357520
    },
    {
      "epoch": 0.5851220518057383,
      "grad_norm": 2.121201276779175,
      "learning_rate": 8.88793711247142e-06,
      "loss": 0.0457,
      "step": 357540
    },
    {
      "epoch": 0.5851547822443917,
      "grad_norm": 0.26007726788520813,
      "learning_rate": 8.887871220257903e-06,
      "loss": 0.0293,
      "step": 357560
    },
    {
      "epoch": 0.585187512683045,
      "grad_norm": 1.5821993350982666,
      "learning_rate": 8.887805328044385e-06,
      "loss": 0.0381,
      "step": 357580
    },
    {
      "epoch": 0.5852202431216983,
      "grad_norm": 1.826473355293274,
      "learning_rate": 8.887739435830869e-06,
      "loss": 0.0446,
      "step": 357600
    },
    {
      "epoch": 0.5852529735603517,
      "grad_norm": 1.9074373245239258,
      "learning_rate": 8.88767354361735e-06,
      "loss": 0.0338,
      "step": 357620
    },
    {
      "epoch": 0.585285703999005,
      "grad_norm": 0.8294195532798767,
      "learning_rate": 8.887607651403834e-06,
      "loss": 0.0383,
      "step": 357640
    },
    {
      "epoch": 0.5853184344376583,
      "grad_norm": 0.4406054615974426,
      "learning_rate": 8.887541759190316e-06,
      "loss": 0.0461,
      "step": 357660
    },
    {
      "epoch": 0.5853511648763117,
      "grad_norm": 1.4428699016571045,
      "learning_rate": 8.8874758669768e-06,
      "loss": 0.0367,
      "step": 357680
    },
    {
      "epoch": 0.585383895314965,
      "grad_norm": 1.9513447284698486,
      "learning_rate": 8.887409974763283e-06,
      "loss": 0.0462,
      "step": 357700
    },
    {
      "epoch": 0.5854166257536183,
      "grad_norm": 0.7345249652862549,
      "learning_rate": 8.887344082549765e-06,
      "loss": 0.045,
      "step": 357720
    },
    {
      "epoch": 0.5854493561922717,
      "grad_norm": 1.8066730499267578,
      "learning_rate": 8.887278190336249e-06,
      "loss": 0.0246,
      "step": 357740
    },
    {
      "epoch": 0.585482086630925,
      "grad_norm": 1.4974024295806885,
      "learning_rate": 8.887212298122732e-06,
      "loss": 0.0347,
      "step": 357760
    },
    {
      "epoch": 0.5855148170695784,
      "grad_norm": 1.484366774559021,
      "learning_rate": 8.887146405909214e-06,
      "loss": 0.0378,
      "step": 357780
    },
    {
      "epoch": 0.5855475475082317,
      "grad_norm": 0.5135911107063293,
      "learning_rate": 8.887080513695698e-06,
      "loss": 0.0307,
      "step": 357800
    },
    {
      "epoch": 0.585580277946885,
      "grad_norm": 1.3168073892593384,
      "learning_rate": 8.887014621482181e-06,
      "loss": 0.0421,
      "step": 357820
    },
    {
      "epoch": 0.5856130083855384,
      "grad_norm": 1.0581605434417725,
      "learning_rate": 8.886948729268663e-06,
      "loss": 0.0372,
      "step": 357840
    },
    {
      "epoch": 0.5856457388241917,
      "grad_norm": 5.725719928741455,
      "learning_rate": 8.886882837055147e-06,
      "loss": 0.0532,
      "step": 357860
    },
    {
      "epoch": 0.5856784692628451,
      "grad_norm": 0.32905471324920654,
      "learning_rate": 8.886816944841629e-06,
      "loss": 0.029,
      "step": 357880
    },
    {
      "epoch": 0.5857111997014984,
      "grad_norm": 3.5677645206451416,
      "learning_rate": 8.886751052628112e-06,
      "loss": 0.0262,
      "step": 357900
    },
    {
      "epoch": 0.5857439301401517,
      "grad_norm": 2.5846216678619385,
      "learning_rate": 8.886685160414594e-06,
      "loss": 0.0305,
      "step": 357920
    },
    {
      "epoch": 0.5857766605788051,
      "grad_norm": 3.05971622467041,
      "learning_rate": 8.886619268201078e-06,
      "loss": 0.0359,
      "step": 357940
    },
    {
      "epoch": 0.5858093910174584,
      "grad_norm": 1.4586284160614014,
      "learning_rate": 8.88655337598756e-06,
      "loss": 0.0367,
      "step": 357960
    },
    {
      "epoch": 0.5858421214561118,
      "grad_norm": 1.1307048797607422,
      "learning_rate": 8.886487483774043e-06,
      "loss": 0.0393,
      "step": 357980
    },
    {
      "epoch": 0.5858748518947651,
      "grad_norm": 1.0207149982452393,
      "learning_rate": 8.886421591560525e-06,
      "loss": 0.0308,
      "step": 358000
    },
    {
      "epoch": 0.5859075823334184,
      "grad_norm": 0.679367184638977,
      "learning_rate": 8.886355699347009e-06,
      "loss": 0.0333,
      "step": 358020
    },
    {
      "epoch": 0.5859403127720718,
      "grad_norm": 1.2399325370788574,
      "learning_rate": 8.88628980713349e-06,
      "loss": 0.0331,
      "step": 358040
    },
    {
      "epoch": 0.5859730432107251,
      "grad_norm": 1.0052459239959717,
      "learning_rate": 8.886223914919974e-06,
      "loss": 0.0396,
      "step": 358060
    },
    {
      "epoch": 0.5860057736493784,
      "grad_norm": 0.949436604976654,
      "learning_rate": 8.886158022706458e-06,
      "loss": 0.0422,
      "step": 358080
    },
    {
      "epoch": 0.5860385040880318,
      "grad_norm": 0.7303012013435364,
      "learning_rate": 8.88609213049294e-06,
      "loss": 0.0252,
      "step": 358100
    },
    {
      "epoch": 0.5860712345266851,
      "grad_norm": 0.24634820222854614,
      "learning_rate": 8.886026238279423e-06,
      "loss": 0.0314,
      "step": 358120
    },
    {
      "epoch": 0.5861039649653385,
      "grad_norm": 1.2718642950057983,
      "learning_rate": 8.885960346065907e-06,
      "loss": 0.0297,
      "step": 358140
    },
    {
      "epoch": 0.5861366954039918,
      "grad_norm": 0.8960434794425964,
      "learning_rate": 8.885894453852389e-06,
      "loss": 0.0312,
      "step": 358160
    },
    {
      "epoch": 0.5861694258426452,
      "grad_norm": 1.226859450340271,
      "learning_rate": 8.885828561638872e-06,
      "loss": 0.0371,
      "step": 358180
    },
    {
      "epoch": 0.5862021562812985,
      "grad_norm": 3.9667506217956543,
      "learning_rate": 8.885762669425356e-06,
      "loss": 0.0262,
      "step": 358200
    },
    {
      "epoch": 0.5862348867199518,
      "grad_norm": 0.5398544669151306,
      "learning_rate": 8.885696777211838e-06,
      "loss": 0.0441,
      "step": 358220
    },
    {
      "epoch": 0.5862676171586052,
      "grad_norm": 1.9801348447799683,
      "learning_rate": 8.885630884998321e-06,
      "loss": 0.0318,
      "step": 358240
    },
    {
      "epoch": 0.5863003475972585,
      "grad_norm": 0.41488978266716003,
      "learning_rate": 8.885564992784803e-06,
      "loss": 0.0496,
      "step": 358260
    },
    {
      "epoch": 0.5863330780359118,
      "grad_norm": 2.4292120933532715,
      "learning_rate": 8.885499100571287e-06,
      "loss": 0.0409,
      "step": 358280
    },
    {
      "epoch": 0.5863658084745652,
      "grad_norm": 1.6550596952438354,
      "learning_rate": 8.885433208357769e-06,
      "loss": 0.0327,
      "step": 358300
    },
    {
      "epoch": 0.5863985389132185,
      "grad_norm": 0.6241673231124878,
      "learning_rate": 8.885367316144252e-06,
      "loss": 0.0374,
      "step": 358320
    },
    {
      "epoch": 0.5864312693518718,
      "grad_norm": 1.6009217500686646,
      "learning_rate": 8.885301423930734e-06,
      "loss": 0.0386,
      "step": 358340
    },
    {
      "epoch": 0.5864639997905252,
      "grad_norm": 1.0274393558502197,
      "learning_rate": 8.885235531717218e-06,
      "loss": 0.0356,
      "step": 358360
    },
    {
      "epoch": 0.5864967302291786,
      "grad_norm": 0.7281943559646606,
      "learning_rate": 8.8851696395037e-06,
      "loss": 0.0407,
      "step": 358380
    },
    {
      "epoch": 0.5865294606678318,
      "grad_norm": 1.9351364374160767,
      "learning_rate": 8.885103747290183e-06,
      "loss": 0.0455,
      "step": 358400
    },
    {
      "epoch": 0.5865621911064852,
      "grad_norm": 0.43695276975631714,
      "learning_rate": 8.885037855076665e-06,
      "loss": 0.0402,
      "step": 358420
    },
    {
      "epoch": 0.5865949215451386,
      "grad_norm": 2.206468105316162,
      "learning_rate": 8.884971962863149e-06,
      "loss": 0.0434,
      "step": 358440
    },
    {
      "epoch": 0.5866276519837919,
      "grad_norm": 0.5718386173248291,
      "learning_rate": 8.88490607064963e-06,
      "loss": 0.0318,
      "step": 358460
    },
    {
      "epoch": 0.5866603824224452,
      "grad_norm": 1.0062509775161743,
      "learning_rate": 8.884840178436114e-06,
      "loss": 0.0377,
      "step": 358480
    },
    {
      "epoch": 0.5866931128610986,
      "grad_norm": 1.1230871677398682,
      "learning_rate": 8.884774286222598e-06,
      "loss": 0.0332,
      "step": 358500
    },
    {
      "epoch": 0.5867258432997519,
      "grad_norm": 0.9195370078086853,
      "learning_rate": 8.88470839400908e-06,
      "loss": 0.0435,
      "step": 358520
    },
    {
      "epoch": 0.5867585737384052,
      "grad_norm": 1.726190209388733,
      "learning_rate": 8.884642501795563e-06,
      "loss": 0.0264,
      "step": 358540
    },
    {
      "epoch": 0.5867913041770586,
      "grad_norm": 1.6177281141281128,
      "learning_rate": 8.884576609582047e-06,
      "loss": 0.0335,
      "step": 358560
    },
    {
      "epoch": 0.5868240346157119,
      "grad_norm": 0.8738150596618652,
      "learning_rate": 8.884510717368529e-06,
      "loss": 0.0272,
      "step": 358580
    },
    {
      "epoch": 0.5868567650543652,
      "grad_norm": 1.8438748121261597,
      "learning_rate": 8.884444825155012e-06,
      "loss": 0.0381,
      "step": 358600
    },
    {
      "epoch": 0.5868894954930186,
      "grad_norm": 2.082240104675293,
      "learning_rate": 8.884378932941496e-06,
      "loss": 0.0284,
      "step": 358620
    },
    {
      "epoch": 0.586922225931672,
      "grad_norm": 1.2774409055709839,
      "learning_rate": 8.884313040727978e-06,
      "loss": 0.047,
      "step": 358640
    },
    {
      "epoch": 0.5869549563703252,
      "grad_norm": 0.6840344667434692,
      "learning_rate": 8.884247148514461e-06,
      "loss": 0.038,
      "step": 358660
    },
    {
      "epoch": 0.5869876868089786,
      "grad_norm": 1.6053640842437744,
      "learning_rate": 8.884181256300943e-06,
      "loss": 0.0304,
      "step": 358680
    },
    {
      "epoch": 0.587020417247632,
      "grad_norm": 1.6327617168426514,
      "learning_rate": 8.884115364087427e-06,
      "loss": 0.0399,
      "step": 358700
    },
    {
      "epoch": 0.5870531476862852,
      "grad_norm": 2.642014265060425,
      "learning_rate": 8.884049471873909e-06,
      "loss": 0.0383,
      "step": 358720
    },
    {
      "epoch": 0.5870858781249386,
      "grad_norm": 0.7763763666152954,
      "learning_rate": 8.883983579660392e-06,
      "loss": 0.0346,
      "step": 358740
    },
    {
      "epoch": 0.587118608563592,
      "grad_norm": 0.7173309922218323,
      "learning_rate": 8.883917687446874e-06,
      "loss": 0.026,
      "step": 358760
    },
    {
      "epoch": 0.5871513390022453,
      "grad_norm": 0.7115435004234314,
      "learning_rate": 8.883851795233358e-06,
      "loss": 0.0296,
      "step": 358780
    },
    {
      "epoch": 0.5871840694408986,
      "grad_norm": 0.872693657875061,
      "learning_rate": 8.88378590301984e-06,
      "loss": 0.0445,
      "step": 358800
    },
    {
      "epoch": 0.587216799879552,
      "grad_norm": 0.28908777236938477,
      "learning_rate": 8.883720010806323e-06,
      "loss": 0.0296,
      "step": 358820
    },
    {
      "epoch": 0.5872495303182054,
      "grad_norm": 4.0989203453063965,
      "learning_rate": 8.883654118592807e-06,
      "loss": 0.0439,
      "step": 358840
    },
    {
      "epoch": 0.5872822607568586,
      "grad_norm": 1.0798791646957397,
      "learning_rate": 8.883588226379289e-06,
      "loss": 0.0395,
      "step": 358860
    },
    {
      "epoch": 0.587314991195512,
      "grad_norm": 0.9996169805526733,
      "learning_rate": 8.883522334165772e-06,
      "loss": 0.0402,
      "step": 358880
    },
    {
      "epoch": 0.5873477216341654,
      "grad_norm": 3.160956382751465,
      "learning_rate": 8.883456441952254e-06,
      "loss": 0.0324,
      "step": 358900
    },
    {
      "epoch": 0.5873804520728186,
      "grad_norm": 1.2855349779129028,
      "learning_rate": 8.883390549738738e-06,
      "loss": 0.0416,
      "step": 358920
    },
    {
      "epoch": 0.587413182511472,
      "grad_norm": 1.9194387197494507,
      "learning_rate": 8.883324657525222e-06,
      "loss": 0.0425,
      "step": 358940
    },
    {
      "epoch": 0.5874459129501254,
      "grad_norm": 1.060180425643921,
      "learning_rate": 8.883258765311703e-06,
      "loss": 0.0336,
      "step": 358960
    },
    {
      "epoch": 0.5874786433887786,
      "grad_norm": 1.9276344776153564,
      "learning_rate": 8.883192873098187e-06,
      "loss": 0.0396,
      "step": 358980
    },
    {
      "epoch": 0.587511373827432,
      "grad_norm": 0.8750149607658386,
      "learning_rate": 8.88312698088467e-06,
      "loss": 0.0318,
      "step": 359000
    },
    {
      "epoch": 0.5875441042660854,
      "grad_norm": 3.287781000137329,
      "learning_rate": 8.883061088671152e-06,
      "loss": 0.0427,
      "step": 359020
    },
    {
      "epoch": 0.5875768347047388,
      "grad_norm": 1.7469041347503662,
      "learning_rate": 8.882995196457636e-06,
      "loss": 0.0283,
      "step": 359040
    },
    {
      "epoch": 0.587609565143392,
      "grad_norm": 1.5076169967651367,
      "learning_rate": 8.882929304244118e-06,
      "loss": 0.0471,
      "step": 359060
    },
    {
      "epoch": 0.5876422955820454,
      "grad_norm": 1.701146125793457,
      "learning_rate": 8.882863412030602e-06,
      "loss": 0.0412,
      "step": 359080
    },
    {
      "epoch": 0.5876750260206988,
      "grad_norm": 0.9838380217552185,
      "learning_rate": 8.882797519817083e-06,
      "loss": 0.0481,
      "step": 359100
    },
    {
      "epoch": 0.587707756459352,
      "grad_norm": 0.9120108485221863,
      "learning_rate": 8.882731627603567e-06,
      "loss": 0.0388,
      "step": 359120
    },
    {
      "epoch": 0.5877404868980054,
      "grad_norm": 0.41185566782951355,
      "learning_rate": 8.882665735390049e-06,
      "loss": 0.0391,
      "step": 359140
    },
    {
      "epoch": 0.5877732173366588,
      "grad_norm": 2.5531280040740967,
      "learning_rate": 8.882599843176533e-06,
      "loss": 0.0322,
      "step": 359160
    },
    {
      "epoch": 0.587805947775312,
      "grad_norm": 0.9117220640182495,
      "learning_rate": 8.882533950963016e-06,
      "loss": 0.0385,
      "step": 359180
    },
    {
      "epoch": 0.5878386782139654,
      "grad_norm": 0.936583399772644,
      "learning_rate": 8.882468058749498e-06,
      "loss": 0.0261,
      "step": 359200
    },
    {
      "epoch": 0.5878714086526188,
      "grad_norm": 0.6799331903457642,
      "learning_rate": 8.882402166535982e-06,
      "loss": 0.0283,
      "step": 359220
    },
    {
      "epoch": 0.5879041390912721,
      "grad_norm": 0.3668719232082367,
      "learning_rate": 8.882336274322463e-06,
      "loss": 0.0266,
      "step": 359240
    },
    {
      "epoch": 0.5879368695299254,
      "grad_norm": 0.7916178107261658,
      "learning_rate": 8.882270382108947e-06,
      "loss": 0.0396,
      "step": 359260
    },
    {
      "epoch": 0.5879695999685788,
      "grad_norm": 1.0628448724746704,
      "learning_rate": 8.882204489895429e-06,
      "loss": 0.0257,
      "step": 359280
    },
    {
      "epoch": 0.5880023304072322,
      "grad_norm": 1.530165195465088,
      "learning_rate": 8.882138597681913e-06,
      "loss": 0.0331,
      "step": 359300
    },
    {
      "epoch": 0.5880350608458854,
      "grad_norm": 4.013701438903809,
      "learning_rate": 8.882072705468396e-06,
      "loss": 0.0466,
      "step": 359320
    },
    {
      "epoch": 0.5880677912845388,
      "grad_norm": 1.3130241632461548,
      "learning_rate": 8.882006813254878e-06,
      "loss": 0.0486,
      "step": 359340
    },
    {
      "epoch": 0.5881005217231922,
      "grad_norm": 1.6724878549575806,
      "learning_rate": 8.881940921041362e-06,
      "loss": 0.0313,
      "step": 359360
    },
    {
      "epoch": 0.5881332521618454,
      "grad_norm": 0.849271833896637,
      "learning_rate": 8.881875028827845e-06,
      "loss": 0.0359,
      "step": 359380
    },
    {
      "epoch": 0.5881659826004988,
      "grad_norm": 1.011224389076233,
      "learning_rate": 8.881809136614327e-06,
      "loss": 0.035,
      "step": 359400
    },
    {
      "epoch": 0.5881987130391522,
      "grad_norm": 4.020320415496826,
      "learning_rate": 8.88174324440081e-06,
      "loss": 0.0358,
      "step": 359420
    },
    {
      "epoch": 0.5882314434778055,
      "grad_norm": 0.3671395182609558,
      "learning_rate": 8.881677352187293e-06,
      "loss": 0.0388,
      "step": 359440
    },
    {
      "epoch": 0.5882641739164588,
      "grad_norm": 2.1587367057800293,
      "learning_rate": 8.881611459973776e-06,
      "loss": 0.0603,
      "step": 359460
    },
    {
      "epoch": 0.5882969043551122,
      "grad_norm": 1.2053277492523193,
      "learning_rate": 8.881545567760258e-06,
      "loss": 0.0296,
      "step": 359480
    },
    {
      "epoch": 0.5883296347937655,
      "grad_norm": 0.5234797596931458,
      "learning_rate": 8.881479675546742e-06,
      "loss": 0.0349,
      "step": 359500
    },
    {
      "epoch": 0.5883623652324188,
      "grad_norm": 1.0117366313934326,
      "learning_rate": 8.881413783333225e-06,
      "loss": 0.0279,
      "step": 359520
    },
    {
      "epoch": 0.5883950956710722,
      "grad_norm": 1.3849068880081177,
      "learning_rate": 8.881347891119707e-06,
      "loss": 0.0482,
      "step": 359540
    },
    {
      "epoch": 0.5884278261097255,
      "grad_norm": 1.6719926595687866,
      "learning_rate": 8.88128199890619e-06,
      "loss": 0.0391,
      "step": 359560
    },
    {
      "epoch": 0.5884605565483788,
      "grad_norm": 1.6100449562072754,
      "learning_rate": 8.881216106692673e-06,
      "loss": 0.0314,
      "step": 359580
    },
    {
      "epoch": 0.5884932869870322,
      "grad_norm": 0.6504416465759277,
      "learning_rate": 8.881150214479156e-06,
      "loss": 0.03,
      "step": 359600
    },
    {
      "epoch": 0.5885260174256856,
      "grad_norm": 2.0716190338134766,
      "learning_rate": 8.881084322265638e-06,
      "loss": 0.0365,
      "step": 359620
    },
    {
      "epoch": 0.5885587478643389,
      "grad_norm": 2.952721118927002,
      "learning_rate": 8.881018430052122e-06,
      "loss": 0.0363,
      "step": 359640
    },
    {
      "epoch": 0.5885914783029922,
      "grad_norm": 0.447569340467453,
      "learning_rate": 8.880952537838604e-06,
      "loss": 0.0361,
      "step": 359660
    },
    {
      "epoch": 0.5886242087416456,
      "grad_norm": 2.76305890083313,
      "learning_rate": 8.880886645625087e-06,
      "loss": 0.0293,
      "step": 359680
    },
    {
      "epoch": 0.5886569391802989,
      "grad_norm": 2.4237701892852783,
      "learning_rate": 8.880820753411569e-06,
      "loss": 0.0342,
      "step": 359700
    },
    {
      "epoch": 0.5886896696189522,
      "grad_norm": 6.878786087036133,
      "learning_rate": 8.880754861198053e-06,
      "loss": 0.0325,
      "step": 359720
    },
    {
      "epoch": 0.5887224000576056,
      "grad_norm": 1.7277357578277588,
      "learning_rate": 8.880688968984536e-06,
      "loss": 0.0398,
      "step": 359740
    },
    {
      "epoch": 0.5887551304962589,
      "grad_norm": 3.139846086502075,
      "learning_rate": 8.880623076771018e-06,
      "loss": 0.0566,
      "step": 359760
    },
    {
      "epoch": 0.5887878609349122,
      "grad_norm": 0.5146377086639404,
      "learning_rate": 8.880557184557502e-06,
      "loss": 0.031,
      "step": 359780
    },
    {
      "epoch": 0.5888205913735656,
      "grad_norm": 2.326719045639038,
      "learning_rate": 8.880491292343985e-06,
      "loss": 0.0422,
      "step": 359800
    },
    {
      "epoch": 0.5888533218122189,
      "grad_norm": 0.9964901208877563,
      "learning_rate": 8.880425400130467e-06,
      "loss": 0.0307,
      "step": 359820
    },
    {
      "epoch": 0.5888860522508723,
      "grad_norm": 1.3508918285369873,
      "learning_rate": 8.88035950791695e-06,
      "loss": 0.0353,
      "step": 359840
    },
    {
      "epoch": 0.5889187826895256,
      "grad_norm": 1.3272451162338257,
      "learning_rate": 8.880293615703433e-06,
      "loss": 0.0295,
      "step": 359860
    },
    {
      "epoch": 0.588951513128179,
      "grad_norm": 2.914030075073242,
      "learning_rate": 8.880227723489916e-06,
      "loss": 0.0404,
      "step": 359880
    },
    {
      "epoch": 0.5889842435668323,
      "grad_norm": 0.2906992435455322,
      "learning_rate": 8.8801618312764e-06,
      "loss": 0.0388,
      "step": 359900
    },
    {
      "epoch": 0.5890169740054856,
      "grad_norm": 2.5186378955841064,
      "learning_rate": 8.880095939062882e-06,
      "loss": 0.04,
      "step": 359920
    },
    {
      "epoch": 0.589049704444139,
      "grad_norm": 0.7210748791694641,
      "learning_rate": 8.880030046849365e-06,
      "loss": 0.0314,
      "step": 359940
    },
    {
      "epoch": 0.5890824348827923,
      "grad_norm": 2.0218605995178223,
      "learning_rate": 8.879964154635847e-06,
      "loss": 0.0296,
      "step": 359960
    },
    {
      "epoch": 0.5891151653214456,
      "grad_norm": 0.5263038277626038,
      "learning_rate": 8.87989826242233e-06,
      "loss": 0.0309,
      "step": 359980
    },
    {
      "epoch": 0.589147895760099,
      "grad_norm": 1.9486163854599,
      "learning_rate": 8.879832370208813e-06,
      "loss": 0.0453,
      "step": 360000
    },
    {
      "epoch": 0.5891806261987523,
      "grad_norm": 0.3578684628009796,
      "learning_rate": 8.879766477995296e-06,
      "loss": 0.0429,
      "step": 360020
    },
    {
      "epoch": 0.5892133566374057,
      "grad_norm": 1.282785177230835,
      "learning_rate": 8.879700585781778e-06,
      "loss": 0.0332,
      "step": 360040
    },
    {
      "epoch": 0.589246087076059,
      "grad_norm": 0.3411412835121155,
      "learning_rate": 8.879634693568262e-06,
      "loss": 0.0307,
      "step": 360060
    },
    {
      "epoch": 0.5892788175147123,
      "grad_norm": 1.4493069648742676,
      "learning_rate": 8.879568801354744e-06,
      "loss": 0.0352,
      "step": 360080
    },
    {
      "epoch": 0.5893115479533657,
      "grad_norm": 1.0157897472381592,
      "learning_rate": 8.879502909141227e-06,
      "loss": 0.029,
      "step": 360100
    },
    {
      "epoch": 0.589344278392019,
      "grad_norm": 1.708426833152771,
      "learning_rate": 8.87943701692771e-06,
      "loss": 0.0299,
      "step": 360120
    },
    {
      "epoch": 0.5893770088306723,
      "grad_norm": 0.819500207901001,
      "learning_rate": 8.879371124714193e-06,
      "loss": 0.0338,
      "step": 360140
    },
    {
      "epoch": 0.5894097392693257,
      "grad_norm": 0.5707365274429321,
      "learning_rate": 8.879305232500676e-06,
      "loss": 0.0303,
      "step": 360160
    },
    {
      "epoch": 0.589442469707979,
      "grad_norm": 0.4542442262172699,
      "learning_rate": 8.87923934028716e-06,
      "loss": 0.0394,
      "step": 360180
    },
    {
      "epoch": 0.5894752001466323,
      "grad_norm": 2.0864710807800293,
      "learning_rate": 8.879173448073642e-06,
      "loss": 0.0383,
      "step": 360200
    },
    {
      "epoch": 0.5895079305852857,
      "grad_norm": 0.724166214466095,
      "learning_rate": 8.879107555860125e-06,
      "loss": 0.042,
      "step": 360220
    },
    {
      "epoch": 0.5895406610239391,
      "grad_norm": 0.991239070892334,
      "learning_rate": 8.879041663646609e-06,
      "loss": 0.034,
      "step": 360240
    },
    {
      "epoch": 0.5895733914625924,
      "grad_norm": 2.433087110519409,
      "learning_rate": 8.87897577143309e-06,
      "loss": 0.0299,
      "step": 360260
    },
    {
      "epoch": 0.5896061219012457,
      "grad_norm": 3.1785569190979004,
      "learning_rate": 8.878909879219574e-06,
      "loss": 0.0261,
      "step": 360280
    },
    {
      "epoch": 0.5896388523398991,
      "grad_norm": 0.5922318696975708,
      "learning_rate": 8.878843987006056e-06,
      "loss": 0.0431,
      "step": 360300
    },
    {
      "epoch": 0.5896715827785524,
      "grad_norm": 8.446544647216797,
      "learning_rate": 8.87877809479254e-06,
      "loss": 0.034,
      "step": 360320
    },
    {
      "epoch": 0.5897043132172057,
      "grad_norm": 1.0255111455917358,
      "learning_rate": 8.878712202579022e-06,
      "loss": 0.0395,
      "step": 360340
    },
    {
      "epoch": 0.5897370436558591,
      "grad_norm": 0.9798215627670288,
      "learning_rate": 8.878646310365505e-06,
      "loss": 0.0399,
      "step": 360360
    },
    {
      "epoch": 0.5897697740945124,
      "grad_norm": 0.9201107025146484,
      "learning_rate": 8.878580418151987e-06,
      "loss": 0.0303,
      "step": 360380
    },
    {
      "epoch": 0.5898025045331657,
      "grad_norm": 0.4715317189693451,
      "learning_rate": 8.878514525938471e-06,
      "loss": 0.0465,
      "step": 360400
    },
    {
      "epoch": 0.5898352349718191,
      "grad_norm": 1.2535629272460938,
      "learning_rate": 8.878448633724953e-06,
      "loss": 0.0415,
      "step": 360420
    },
    {
      "epoch": 0.5898679654104725,
      "grad_norm": 1.2045105695724487,
      "learning_rate": 8.878382741511436e-06,
      "loss": 0.0368,
      "step": 360440
    },
    {
      "epoch": 0.5899006958491257,
      "grad_norm": 0.478763222694397,
      "learning_rate": 8.878316849297918e-06,
      "loss": 0.0341,
      "step": 360460
    },
    {
      "epoch": 0.5899334262877791,
      "grad_norm": 2.4010112285614014,
      "learning_rate": 8.878250957084402e-06,
      "loss": 0.0284,
      "step": 360480
    },
    {
      "epoch": 0.5899661567264325,
      "grad_norm": 1.1115070581436157,
      "learning_rate": 8.878185064870884e-06,
      "loss": 0.0268,
      "step": 360500
    },
    {
      "epoch": 0.5899988871650858,
      "grad_norm": 2.8507022857666016,
      "learning_rate": 8.878119172657367e-06,
      "loss": 0.0488,
      "step": 360520
    },
    {
      "epoch": 0.5900316176037391,
      "grad_norm": 1.0448229312896729,
      "learning_rate": 8.878053280443851e-06,
      "loss": 0.0337,
      "step": 360540
    },
    {
      "epoch": 0.5900643480423925,
      "grad_norm": 1.270897388458252,
      "learning_rate": 8.877987388230333e-06,
      "loss": 0.0401,
      "step": 360560
    },
    {
      "epoch": 0.5900970784810458,
      "grad_norm": 1.0057820081710815,
      "learning_rate": 8.877921496016816e-06,
      "loss": 0.0285,
      "step": 360580
    },
    {
      "epoch": 0.5901298089196991,
      "grad_norm": 3.023719549179077,
      "learning_rate": 8.8778556038033e-06,
      "loss": 0.0318,
      "step": 360600
    },
    {
      "epoch": 0.5901625393583525,
      "grad_norm": 0.5572087168693542,
      "learning_rate": 8.877789711589782e-06,
      "loss": 0.0286,
      "step": 360620
    },
    {
      "epoch": 0.5901952697970059,
      "grad_norm": 1.4338260889053345,
      "learning_rate": 8.877723819376265e-06,
      "loss": 0.0382,
      "step": 360640
    },
    {
      "epoch": 0.5902280002356591,
      "grad_norm": 0.9482830166816711,
      "learning_rate": 8.877657927162749e-06,
      "loss": 0.0419,
      "step": 360660
    },
    {
      "epoch": 0.5902607306743125,
      "grad_norm": 0.9645163416862488,
      "learning_rate": 8.877592034949231e-06,
      "loss": 0.0339,
      "step": 360680
    },
    {
      "epoch": 0.5902934611129659,
      "grad_norm": 0.35223791003227234,
      "learning_rate": 8.877526142735714e-06,
      "loss": 0.0323,
      "step": 360700
    },
    {
      "epoch": 0.5903261915516191,
      "grad_norm": 0.40475934743881226,
      "learning_rate": 8.877460250522196e-06,
      "loss": 0.0501,
      "step": 360720
    },
    {
      "epoch": 0.5903589219902725,
      "grad_norm": 0.8303666710853577,
      "learning_rate": 8.87739435830868e-06,
      "loss": 0.0308,
      "step": 360740
    },
    {
      "epoch": 0.5903916524289259,
      "grad_norm": 0.8161411285400391,
      "learning_rate": 8.877328466095162e-06,
      "loss": 0.0269,
      "step": 360760
    },
    {
      "epoch": 0.5904243828675791,
      "grad_norm": 3.141366481781006,
      "learning_rate": 8.877262573881645e-06,
      "loss": 0.0399,
      "step": 360780
    },
    {
      "epoch": 0.5904571133062325,
      "grad_norm": 1.470715880393982,
      "learning_rate": 8.877196681668127e-06,
      "loss": 0.0319,
      "step": 360800
    },
    {
      "epoch": 0.5904898437448859,
      "grad_norm": 0.771521806716919,
      "learning_rate": 8.877130789454611e-06,
      "loss": 0.0281,
      "step": 360820
    },
    {
      "epoch": 0.5905225741835393,
      "grad_norm": 0.20604446530342102,
      "learning_rate": 8.877064897241093e-06,
      "loss": 0.0356,
      "step": 360840
    },
    {
      "epoch": 0.5905553046221925,
      "grad_norm": 1.3355393409729004,
      "learning_rate": 8.876999005027576e-06,
      "loss": 0.0522,
      "step": 360860
    },
    {
      "epoch": 0.5905880350608459,
      "grad_norm": 1.1705020666122437,
      "learning_rate": 8.876933112814058e-06,
      "loss": 0.0398,
      "step": 360880
    },
    {
      "epoch": 0.5906207654994993,
      "grad_norm": 3.2309465408325195,
      "learning_rate": 8.876867220600542e-06,
      "loss": 0.0331,
      "step": 360900
    },
    {
      "epoch": 0.5906534959381525,
      "grad_norm": 1.6486787796020508,
      "learning_rate": 8.876801328387025e-06,
      "loss": 0.0348,
      "step": 360920
    },
    {
      "epoch": 0.5906862263768059,
      "grad_norm": 3.080374002456665,
      "learning_rate": 8.876735436173507e-06,
      "loss": 0.043,
      "step": 360940
    },
    {
      "epoch": 0.5907189568154593,
      "grad_norm": 0.9954433441162109,
      "learning_rate": 8.876669543959991e-06,
      "loss": 0.037,
      "step": 360960
    },
    {
      "epoch": 0.5907516872541125,
      "grad_norm": 1.7459819316864014,
      "learning_rate": 8.876603651746475e-06,
      "loss": 0.0441,
      "step": 360980
    },
    {
      "epoch": 0.5907844176927659,
      "grad_norm": 0.8016812205314636,
      "learning_rate": 8.876537759532956e-06,
      "loss": 0.0483,
      "step": 361000
    },
    {
      "epoch": 0.5908171481314193,
      "grad_norm": 0.8167745471000671,
      "learning_rate": 8.87647186731944e-06,
      "loss": 0.0376,
      "step": 361020
    },
    {
      "epoch": 0.5908498785700727,
      "grad_norm": 1.944266676902771,
      "learning_rate": 8.876405975105924e-06,
      "loss": 0.0453,
      "step": 361040
    },
    {
      "epoch": 0.5908826090087259,
      "grad_norm": 1.391660451889038,
      "learning_rate": 8.876340082892405e-06,
      "loss": 0.0341,
      "step": 361060
    },
    {
      "epoch": 0.5909153394473793,
      "grad_norm": 1.043528437614441,
      "learning_rate": 8.876274190678889e-06,
      "loss": 0.0378,
      "step": 361080
    },
    {
      "epoch": 0.5909480698860327,
      "grad_norm": 0.4697840213775635,
      "learning_rate": 8.876208298465371e-06,
      "loss": 0.0292,
      "step": 361100
    },
    {
      "epoch": 0.5909808003246859,
      "grad_norm": 1.0020630359649658,
      "learning_rate": 8.876142406251855e-06,
      "loss": 0.0315,
      "step": 361120
    },
    {
      "epoch": 0.5910135307633393,
      "grad_norm": 1.5440127849578857,
      "learning_rate": 8.876076514038336e-06,
      "loss": 0.0492,
      "step": 361140
    },
    {
      "epoch": 0.5910462612019927,
      "grad_norm": 0.12595511972904205,
      "learning_rate": 8.87601062182482e-06,
      "loss": 0.0237,
      "step": 361160
    },
    {
      "epoch": 0.5910789916406459,
      "grad_norm": 1.356997013092041,
      "learning_rate": 8.875944729611302e-06,
      "loss": 0.0379,
      "step": 361180
    },
    {
      "epoch": 0.5911117220792993,
      "grad_norm": 0.9178532958030701,
      "learning_rate": 8.875878837397786e-06,
      "loss": 0.0381,
      "step": 361200
    },
    {
      "epoch": 0.5911444525179527,
      "grad_norm": 3.5989015102386475,
      "learning_rate": 8.875812945184267e-06,
      "loss": 0.0326,
      "step": 361220
    },
    {
      "epoch": 0.591177182956606,
      "grad_norm": 0.6689327955245972,
      "learning_rate": 8.875747052970751e-06,
      "loss": 0.0323,
      "step": 361240
    },
    {
      "epoch": 0.5912099133952593,
      "grad_norm": 0.17822985351085663,
      "learning_rate": 8.875681160757233e-06,
      "loss": 0.0338,
      "step": 361260
    },
    {
      "epoch": 0.5912426438339127,
      "grad_norm": 1.9450702667236328,
      "learning_rate": 8.875615268543716e-06,
      "loss": 0.042,
      "step": 361280
    },
    {
      "epoch": 0.591275374272566,
      "grad_norm": 0.7701297402381897,
      "learning_rate": 8.8755493763302e-06,
      "loss": 0.0417,
      "step": 361300
    },
    {
      "epoch": 0.5913081047112193,
      "grad_norm": 1.437059760093689,
      "learning_rate": 8.875483484116682e-06,
      "loss": 0.0346,
      "step": 361320
    },
    {
      "epoch": 0.5913408351498727,
      "grad_norm": 1.3550711870193481,
      "learning_rate": 8.875417591903166e-06,
      "loss": 0.0446,
      "step": 361340
    },
    {
      "epoch": 0.591373565588526,
      "grad_norm": 0.8559374213218689,
      "learning_rate": 8.875351699689647e-06,
      "loss": 0.0425,
      "step": 361360
    },
    {
      "epoch": 0.5914062960271793,
      "grad_norm": 0.5362967252731323,
      "learning_rate": 8.875285807476131e-06,
      "loss": 0.038,
      "step": 361380
    },
    {
      "epoch": 0.5914390264658327,
      "grad_norm": 2.648203134536743,
      "learning_rate": 8.875219915262615e-06,
      "loss": 0.0421,
      "step": 361400
    },
    {
      "epoch": 0.5914717569044861,
      "grad_norm": 1.0913397073745728,
      "learning_rate": 8.875154023049097e-06,
      "loss": 0.0307,
      "step": 361420
    },
    {
      "epoch": 0.5915044873431393,
      "grad_norm": 0.9229966402053833,
      "learning_rate": 8.87508813083558e-06,
      "loss": 0.038,
      "step": 361440
    },
    {
      "epoch": 0.5915372177817927,
      "grad_norm": 1.532179355621338,
      "learning_rate": 8.875022238622064e-06,
      "loss": 0.0407,
      "step": 361460
    },
    {
      "epoch": 0.5915699482204461,
      "grad_norm": 2.091749429702759,
      "learning_rate": 8.874956346408546e-06,
      "loss": 0.0428,
      "step": 361480
    },
    {
      "epoch": 0.5916026786590994,
      "grad_norm": 1.525439977645874,
      "learning_rate": 8.87489045419503e-06,
      "loss": 0.032,
      "step": 361500
    },
    {
      "epoch": 0.5916354090977527,
      "grad_norm": 2.540498971939087,
      "learning_rate": 8.874824561981511e-06,
      "loss": 0.0391,
      "step": 361520
    },
    {
      "epoch": 0.5916681395364061,
      "grad_norm": 1.8195942640304565,
      "learning_rate": 8.874758669767995e-06,
      "loss": 0.0387,
      "step": 361540
    },
    {
      "epoch": 0.5917008699750594,
      "grad_norm": 1.77519690990448,
      "learning_rate": 8.874692777554477e-06,
      "loss": 0.0342,
      "step": 361560
    },
    {
      "epoch": 0.5917336004137127,
      "grad_norm": 0.9691839814186096,
      "learning_rate": 8.87462688534096e-06,
      "loss": 0.0325,
      "step": 361580
    },
    {
      "epoch": 0.5917663308523661,
      "grad_norm": 2.9783082008361816,
      "learning_rate": 8.874560993127442e-06,
      "loss": 0.0523,
      "step": 361600
    },
    {
      "epoch": 0.5917990612910194,
      "grad_norm": 2.8819451332092285,
      "learning_rate": 8.874495100913926e-06,
      "loss": 0.0399,
      "step": 361620
    },
    {
      "epoch": 0.5918317917296727,
      "grad_norm": 1.9635467529296875,
      "learning_rate": 8.87442920870041e-06,
      "loss": 0.0408,
      "step": 361640
    },
    {
      "epoch": 0.5918645221683261,
      "grad_norm": 0.4691492021083832,
      "learning_rate": 8.874363316486891e-06,
      "loss": 0.0356,
      "step": 361660
    },
    {
      "epoch": 0.5918972526069795,
      "grad_norm": 1.934665560722351,
      "learning_rate": 8.874297424273375e-06,
      "loss": 0.0406,
      "step": 361680
    },
    {
      "epoch": 0.5919299830456328,
      "grad_norm": 1.4847537279129028,
      "learning_rate": 8.874231532059857e-06,
      "loss": 0.0432,
      "step": 361700
    },
    {
      "epoch": 0.5919627134842861,
      "grad_norm": 0.7310218811035156,
      "learning_rate": 8.87416563984634e-06,
      "loss": 0.0484,
      "step": 361720
    },
    {
      "epoch": 0.5919954439229395,
      "grad_norm": 0.8685556650161743,
      "learning_rate": 8.874099747632822e-06,
      "loss": 0.0471,
      "step": 361740
    },
    {
      "epoch": 0.5920281743615928,
      "grad_norm": 1.2220821380615234,
      "learning_rate": 8.874033855419306e-06,
      "loss": 0.0341,
      "step": 361760
    },
    {
      "epoch": 0.5920609048002461,
      "grad_norm": 0.40155646204948425,
      "learning_rate": 8.87396796320579e-06,
      "loss": 0.0371,
      "step": 361780
    },
    {
      "epoch": 0.5920936352388995,
      "grad_norm": 3.96270489692688,
      "learning_rate": 8.873902070992271e-06,
      "loss": 0.0399,
      "step": 361800
    },
    {
      "epoch": 0.5921263656775528,
      "grad_norm": 0.9546728730201721,
      "learning_rate": 8.873836178778755e-06,
      "loss": 0.0311,
      "step": 361820
    },
    {
      "epoch": 0.5921590961162061,
      "grad_norm": 1.3568603992462158,
      "learning_rate": 8.873770286565238e-06,
      "loss": 0.0274,
      "step": 361840
    },
    {
      "epoch": 0.5921918265548595,
      "grad_norm": 0.3942820727825165,
      "learning_rate": 8.87370439435172e-06,
      "loss": 0.0412,
      "step": 361860
    },
    {
      "epoch": 0.5922245569935128,
      "grad_norm": 0.5082517862319946,
      "learning_rate": 8.873638502138204e-06,
      "loss": 0.0302,
      "step": 361880
    },
    {
      "epoch": 0.5922572874321662,
      "grad_norm": 0.42625927925109863,
      "learning_rate": 8.873572609924686e-06,
      "loss": 0.0327,
      "step": 361900
    },
    {
      "epoch": 0.5922900178708195,
      "grad_norm": 2.9366300106048584,
      "learning_rate": 8.87350671771117e-06,
      "loss": 0.0514,
      "step": 361920
    },
    {
      "epoch": 0.5923227483094728,
      "grad_norm": 0.4590793550014496,
      "learning_rate": 8.873440825497651e-06,
      "loss": 0.039,
      "step": 361940
    },
    {
      "epoch": 0.5923554787481262,
      "grad_norm": 0.546295166015625,
      "learning_rate": 8.873374933284135e-06,
      "loss": 0.0323,
      "step": 361960
    },
    {
      "epoch": 0.5923882091867795,
      "grad_norm": 0.4239377975463867,
      "learning_rate": 8.873309041070617e-06,
      "loss": 0.0464,
      "step": 361980
    },
    {
      "epoch": 0.5924209396254329,
      "grad_norm": 1.3875993490219116,
      "learning_rate": 8.8732431488571e-06,
      "loss": 0.0501,
      "step": 362000
    },
    {
      "epoch": 0.5924536700640862,
      "grad_norm": 0.7507469654083252,
      "learning_rate": 8.873177256643584e-06,
      "loss": 0.0367,
      "step": 362020
    },
    {
      "epoch": 0.5924864005027395,
      "grad_norm": 2.4450740814208984,
      "learning_rate": 8.873111364430066e-06,
      "loss": 0.0626,
      "step": 362040
    },
    {
      "epoch": 0.5925191309413929,
      "grad_norm": 0.6854196190834045,
      "learning_rate": 8.87304547221655e-06,
      "loss": 0.0397,
      "step": 362060
    },
    {
      "epoch": 0.5925518613800462,
      "grad_norm": 1.3733683824539185,
      "learning_rate": 8.872979580003031e-06,
      "loss": 0.0409,
      "step": 362080
    },
    {
      "epoch": 0.5925845918186996,
      "grad_norm": 1.3110746145248413,
      "learning_rate": 8.872913687789515e-06,
      "loss": 0.0329,
      "step": 362100
    },
    {
      "epoch": 0.5926173222573529,
      "grad_norm": 0.9031506180763245,
      "learning_rate": 8.872847795575997e-06,
      "loss": 0.0334,
      "step": 362120
    },
    {
      "epoch": 0.5926500526960062,
      "grad_norm": 1.9392677545547485,
      "learning_rate": 8.87278190336248e-06,
      "loss": 0.0303,
      "step": 362140
    },
    {
      "epoch": 0.5926827831346596,
      "grad_norm": 1.8570749759674072,
      "learning_rate": 8.872716011148964e-06,
      "loss": 0.0339,
      "step": 362160
    },
    {
      "epoch": 0.5927155135733129,
      "grad_norm": 0.5269585847854614,
      "learning_rate": 8.872650118935446e-06,
      "loss": 0.0346,
      "step": 362180
    },
    {
      "epoch": 0.5927482440119662,
      "grad_norm": 1.357820987701416,
      "learning_rate": 8.87258422672193e-06,
      "loss": 0.0336,
      "step": 362200
    },
    {
      "epoch": 0.5927809744506196,
      "grad_norm": 0.2729280889034271,
      "learning_rate": 8.872518334508413e-06,
      "loss": 0.0356,
      "step": 362220
    },
    {
      "epoch": 0.5928137048892729,
      "grad_norm": 1.0867387056350708,
      "learning_rate": 8.872452442294895e-06,
      "loss": 0.0279,
      "step": 362240
    },
    {
      "epoch": 0.5928464353279262,
      "grad_norm": 1.313645839691162,
      "learning_rate": 8.872386550081378e-06,
      "loss": 0.0409,
      "step": 362260
    },
    {
      "epoch": 0.5928791657665796,
      "grad_norm": 1.9943723678588867,
      "learning_rate": 8.87232065786786e-06,
      "loss": 0.0323,
      "step": 362280
    },
    {
      "epoch": 0.592911896205233,
      "grad_norm": 1.006601095199585,
      "learning_rate": 8.872254765654344e-06,
      "loss": 0.0402,
      "step": 362300
    },
    {
      "epoch": 0.5929446266438863,
      "grad_norm": 4.2884931564331055,
      "learning_rate": 8.872188873440826e-06,
      "loss": 0.0429,
      "step": 362320
    },
    {
      "epoch": 0.5929773570825396,
      "grad_norm": 1.4185471534729004,
      "learning_rate": 8.87212298122731e-06,
      "loss": 0.031,
      "step": 362340
    },
    {
      "epoch": 0.593010087521193,
      "grad_norm": 2.407731294631958,
      "learning_rate": 8.872057089013793e-06,
      "loss": 0.0354,
      "step": 362360
    },
    {
      "epoch": 0.5930428179598463,
      "grad_norm": 0.9312223196029663,
      "learning_rate": 8.871991196800275e-06,
      "loss": 0.0381,
      "step": 362380
    },
    {
      "epoch": 0.5930755483984996,
      "grad_norm": 0.4262562692165375,
      "learning_rate": 8.871925304586758e-06,
      "loss": 0.0394,
      "step": 362400
    },
    {
      "epoch": 0.593108278837153,
      "grad_norm": 1.0642147064208984,
      "learning_rate": 8.87185941237324e-06,
      "loss": 0.0371,
      "step": 362420
    },
    {
      "epoch": 0.5931410092758063,
      "grad_norm": 2.7325501441955566,
      "learning_rate": 8.871793520159724e-06,
      "loss": 0.0409,
      "step": 362440
    },
    {
      "epoch": 0.5931737397144596,
      "grad_norm": 1.1191844940185547,
      "learning_rate": 8.871727627946206e-06,
      "loss": 0.0284,
      "step": 362460
    },
    {
      "epoch": 0.593206470153113,
      "grad_norm": 0.5581691861152649,
      "learning_rate": 8.87166173573269e-06,
      "loss": 0.0447,
      "step": 362480
    },
    {
      "epoch": 0.5932392005917664,
      "grad_norm": 1.1414495706558228,
      "learning_rate": 8.871595843519171e-06,
      "loss": 0.0364,
      "step": 362500
    },
    {
      "epoch": 0.5932719310304196,
      "grad_norm": 0.4879065752029419,
      "learning_rate": 8.871529951305655e-06,
      "loss": 0.0315,
      "step": 362520
    },
    {
      "epoch": 0.593304661469073,
      "grad_norm": 2.0764710903167725,
      "learning_rate": 8.871464059092137e-06,
      "loss": 0.0297,
      "step": 362540
    },
    {
      "epoch": 0.5933373919077264,
      "grad_norm": 1.587540864944458,
      "learning_rate": 8.87139816687862e-06,
      "loss": 0.0351,
      "step": 362560
    },
    {
      "epoch": 0.5933701223463796,
      "grad_norm": 0.3368659019470215,
      "learning_rate": 8.871332274665104e-06,
      "loss": 0.0404,
      "step": 362580
    },
    {
      "epoch": 0.593402852785033,
      "grad_norm": 1.7648999691009521,
      "learning_rate": 8.871266382451586e-06,
      "loss": 0.0334,
      "step": 362600
    },
    {
      "epoch": 0.5934355832236864,
      "grad_norm": 2.629298210144043,
      "learning_rate": 8.87120049023807e-06,
      "loss": 0.026,
      "step": 362620
    },
    {
      "epoch": 0.5934683136623397,
      "grad_norm": 2.344370126724243,
      "learning_rate": 8.871134598024553e-06,
      "loss": 0.0334,
      "step": 362640
    },
    {
      "epoch": 0.593501044100993,
      "grad_norm": 1.033656358718872,
      "learning_rate": 8.871068705811035e-06,
      "loss": 0.0321,
      "step": 362660
    },
    {
      "epoch": 0.5935337745396464,
      "grad_norm": 2.9136810302734375,
      "learning_rate": 8.871002813597518e-06,
      "loss": 0.0384,
      "step": 362680
    },
    {
      "epoch": 0.5935665049782998,
      "grad_norm": 0.8305720090866089,
      "learning_rate": 8.870936921384002e-06,
      "loss": 0.0327,
      "step": 362700
    },
    {
      "epoch": 0.593599235416953,
      "grad_norm": 1.8907732963562012,
      "learning_rate": 8.870871029170484e-06,
      "loss": 0.0322,
      "step": 362720
    },
    {
      "epoch": 0.5936319658556064,
      "grad_norm": 1.207341194152832,
      "learning_rate": 8.870805136956967e-06,
      "loss": 0.0381,
      "step": 362740
    },
    {
      "epoch": 0.5936646962942598,
      "grad_norm": 0.9465813040733337,
      "learning_rate": 8.87073924474345e-06,
      "loss": 0.0231,
      "step": 362760
    },
    {
      "epoch": 0.593697426732913,
      "grad_norm": 2.1814639568328857,
      "learning_rate": 8.870673352529933e-06,
      "loss": 0.0388,
      "step": 362780
    },
    {
      "epoch": 0.5937301571715664,
      "grad_norm": 1.995468258857727,
      "learning_rate": 8.870607460316415e-06,
      "loss": 0.0318,
      "step": 362800
    },
    {
      "epoch": 0.5937628876102198,
      "grad_norm": 2.015899181365967,
      "learning_rate": 8.870541568102898e-06,
      "loss": 0.0388,
      "step": 362820
    },
    {
      "epoch": 0.593795618048873,
      "grad_norm": 0.9140145778656006,
      "learning_rate": 8.87047567588938e-06,
      "loss": 0.0259,
      "step": 362840
    },
    {
      "epoch": 0.5938283484875264,
      "grad_norm": 0.21092912554740906,
      "learning_rate": 8.870409783675864e-06,
      "loss": 0.0346,
      "step": 362860
    },
    {
      "epoch": 0.5938610789261798,
      "grad_norm": 1.5583362579345703,
      "learning_rate": 8.870343891462346e-06,
      "loss": 0.0532,
      "step": 362880
    },
    {
      "epoch": 0.5938938093648332,
      "grad_norm": 1.926285743713379,
      "learning_rate": 8.87027799924883e-06,
      "loss": 0.0334,
      "step": 362900
    },
    {
      "epoch": 0.5939265398034864,
      "grad_norm": 1.267581582069397,
      "learning_rate": 8.870212107035311e-06,
      "loss": 0.0378,
      "step": 362920
    },
    {
      "epoch": 0.5939592702421398,
      "grad_norm": 3.2603580951690674,
      "learning_rate": 8.870146214821795e-06,
      "loss": 0.0343,
      "step": 362940
    },
    {
      "epoch": 0.5939920006807932,
      "grad_norm": 1.4582444429397583,
      "learning_rate": 8.870080322608278e-06,
      "loss": 0.0387,
      "step": 362960
    },
    {
      "epoch": 0.5940247311194464,
      "grad_norm": 0.3392758071422577,
      "learning_rate": 8.87001443039476e-06,
      "loss": 0.0295,
      "step": 362980
    },
    {
      "epoch": 0.5940574615580998,
      "grad_norm": 1.471268653869629,
      "learning_rate": 8.869948538181244e-06,
      "loss": 0.0329,
      "step": 363000
    },
    {
      "epoch": 0.5940901919967532,
      "grad_norm": 0.4710361659526825,
      "learning_rate": 8.869882645967728e-06,
      "loss": 0.0372,
      "step": 363020
    },
    {
      "epoch": 0.5941229224354064,
      "grad_norm": 1.317899227142334,
      "learning_rate": 8.86981675375421e-06,
      "loss": 0.036,
      "step": 363040
    },
    {
      "epoch": 0.5941556528740598,
      "grad_norm": 0.8855060935020447,
      "learning_rate": 8.869750861540693e-06,
      "loss": 0.0308,
      "step": 363060
    },
    {
      "epoch": 0.5941883833127132,
      "grad_norm": 1.7486991882324219,
      "learning_rate": 8.869684969327177e-06,
      "loss": 0.0369,
      "step": 363080
    },
    {
      "epoch": 0.5942211137513665,
      "grad_norm": 1.619043231010437,
      "learning_rate": 8.869619077113659e-06,
      "loss": 0.0329,
      "step": 363100
    },
    {
      "epoch": 0.5942538441900198,
      "grad_norm": 0.6258323192596436,
      "learning_rate": 8.869553184900142e-06,
      "loss": 0.0465,
      "step": 363120
    },
    {
      "epoch": 0.5942865746286732,
      "grad_norm": 1.7214734554290771,
      "learning_rate": 8.869487292686624e-06,
      "loss": 0.0342,
      "step": 363140
    },
    {
      "epoch": 0.5943193050673266,
      "grad_norm": 2.142202138900757,
      "learning_rate": 8.869421400473108e-06,
      "loss": 0.0419,
      "step": 363160
    },
    {
      "epoch": 0.5943520355059798,
      "grad_norm": 1.9972915649414062,
      "learning_rate": 8.86935550825959e-06,
      "loss": 0.0466,
      "step": 363180
    },
    {
      "epoch": 0.5943847659446332,
      "grad_norm": 4.7945027351379395,
      "learning_rate": 8.869289616046073e-06,
      "loss": 0.0385,
      "step": 363200
    },
    {
      "epoch": 0.5944174963832866,
      "grad_norm": 1.100007176399231,
      "learning_rate": 8.869223723832555e-06,
      "loss": 0.0483,
      "step": 363220
    },
    {
      "epoch": 0.5944502268219398,
      "grad_norm": 1.1748188734054565,
      "learning_rate": 8.869157831619039e-06,
      "loss": 0.0442,
      "step": 363240
    },
    {
      "epoch": 0.5944829572605932,
      "grad_norm": 0.5314587354660034,
      "learning_rate": 8.86909193940552e-06,
      "loss": 0.0438,
      "step": 363260
    },
    {
      "epoch": 0.5945156876992466,
      "grad_norm": 1.3955011367797852,
      "learning_rate": 8.869026047192004e-06,
      "loss": 0.0471,
      "step": 363280
    },
    {
      "epoch": 0.5945484181378999,
      "grad_norm": 0.8512316942214966,
      "learning_rate": 8.868960154978486e-06,
      "loss": 0.0369,
      "step": 363300
    },
    {
      "epoch": 0.5945811485765532,
      "grad_norm": 0.11136627942323685,
      "learning_rate": 8.86889426276497e-06,
      "loss": 0.0295,
      "step": 363320
    },
    {
      "epoch": 0.5946138790152066,
      "grad_norm": 2.00249981880188,
      "learning_rate": 8.868828370551451e-06,
      "loss": 0.0386,
      "step": 363340
    },
    {
      "epoch": 0.5946466094538599,
      "grad_norm": 2.231902837753296,
      "learning_rate": 8.868762478337935e-06,
      "loss": 0.0345,
      "step": 363360
    },
    {
      "epoch": 0.5946793398925132,
      "grad_norm": 2.298945188522339,
      "learning_rate": 8.868696586124419e-06,
      "loss": 0.031,
      "step": 363380
    },
    {
      "epoch": 0.5947120703311666,
      "grad_norm": 3.6512093544006348,
      "learning_rate": 8.8686306939109e-06,
      "loss": 0.0424,
      "step": 363400
    },
    {
      "epoch": 0.59474480076982,
      "grad_norm": 1.2010159492492676,
      "learning_rate": 8.868564801697384e-06,
      "loss": 0.0402,
      "step": 363420
    },
    {
      "epoch": 0.5947775312084732,
      "grad_norm": 1.2894123792648315,
      "learning_rate": 8.868498909483868e-06,
      "loss": 0.055,
      "step": 363440
    },
    {
      "epoch": 0.5948102616471266,
      "grad_norm": 2.261003255844116,
      "learning_rate": 8.86843301727035e-06,
      "loss": 0.0414,
      "step": 363460
    },
    {
      "epoch": 0.59484299208578,
      "grad_norm": 1.1975339651107788,
      "learning_rate": 8.868367125056833e-06,
      "loss": 0.0474,
      "step": 363480
    },
    {
      "epoch": 0.5948757225244333,
      "grad_norm": 0.6025441288948059,
      "learning_rate": 8.868301232843317e-06,
      "loss": 0.0283,
      "step": 363500
    },
    {
      "epoch": 0.5949084529630866,
      "grad_norm": 1.1389755010604858,
      "learning_rate": 8.868235340629799e-06,
      "loss": 0.0346,
      "step": 363520
    },
    {
      "epoch": 0.59494118340174,
      "grad_norm": 0.9379865527153015,
      "learning_rate": 8.868169448416282e-06,
      "loss": 0.0506,
      "step": 363540
    },
    {
      "epoch": 0.5949739138403933,
      "grad_norm": 1.2961770296096802,
      "learning_rate": 8.868103556202764e-06,
      "loss": 0.033,
      "step": 363560
    },
    {
      "epoch": 0.5950066442790466,
      "grad_norm": 0.41962364315986633,
      "learning_rate": 8.868037663989248e-06,
      "loss": 0.0361,
      "step": 363580
    },
    {
      "epoch": 0.5950393747177,
      "grad_norm": 0.22022376954555511,
      "learning_rate": 8.86797177177573e-06,
      "loss": 0.0363,
      "step": 363600
    },
    {
      "epoch": 0.5950721051563533,
      "grad_norm": 1.2667877674102783,
      "learning_rate": 8.867905879562213e-06,
      "loss": 0.0313,
      "step": 363620
    },
    {
      "epoch": 0.5951048355950066,
      "grad_norm": 0.9173898100852966,
      "learning_rate": 8.867839987348695e-06,
      "loss": 0.0213,
      "step": 363640
    },
    {
      "epoch": 0.59513756603366,
      "grad_norm": 0.31965649127960205,
      "learning_rate": 8.867774095135179e-06,
      "loss": 0.0383,
      "step": 363660
    },
    {
      "epoch": 0.5951702964723133,
      "grad_norm": 0.6110767126083374,
      "learning_rate": 8.86770820292166e-06,
      "loss": 0.0457,
      "step": 363680
    },
    {
      "epoch": 0.5952030269109667,
      "grad_norm": 0.8412606120109558,
      "learning_rate": 8.867642310708144e-06,
      "loss": 0.0316,
      "step": 363700
    },
    {
      "epoch": 0.59523575734962,
      "grad_norm": 1.7787259817123413,
      "learning_rate": 8.867576418494626e-06,
      "loss": 0.0373,
      "step": 363720
    },
    {
      "epoch": 0.5952684877882733,
      "grad_norm": 2.0915699005126953,
      "learning_rate": 8.86751052628111e-06,
      "loss": 0.0343,
      "step": 363740
    },
    {
      "epoch": 0.5953012182269267,
      "grad_norm": 0.4255344271659851,
      "learning_rate": 8.867444634067593e-06,
      "loss": 0.0329,
      "step": 363760
    },
    {
      "epoch": 0.59533394866558,
      "grad_norm": 1.8381191492080688,
      "learning_rate": 8.867378741854075e-06,
      "loss": 0.0336,
      "step": 363780
    },
    {
      "epoch": 0.5953666791042334,
      "grad_norm": 0.8201766014099121,
      "learning_rate": 8.867312849640559e-06,
      "loss": 0.0321,
      "step": 363800
    },
    {
      "epoch": 0.5953994095428867,
      "grad_norm": 2.474829912185669,
      "learning_rate": 8.867246957427042e-06,
      "loss": 0.0415,
      "step": 363820
    },
    {
      "epoch": 0.59543213998154,
      "grad_norm": 0.7597877979278564,
      "learning_rate": 8.867181065213524e-06,
      "loss": 0.0436,
      "step": 363840
    },
    {
      "epoch": 0.5954648704201934,
      "grad_norm": 0.5899341106414795,
      "learning_rate": 8.867115173000008e-06,
      "loss": 0.0397,
      "step": 363860
    },
    {
      "epoch": 0.5954976008588467,
      "grad_norm": 2.2602410316467285,
      "learning_rate": 8.867049280786491e-06,
      "loss": 0.0358,
      "step": 363880
    },
    {
      "epoch": 0.5955303312975001,
      "grad_norm": 1.4080370664596558,
      "learning_rate": 8.866983388572973e-06,
      "loss": 0.0388,
      "step": 363900
    },
    {
      "epoch": 0.5955630617361534,
      "grad_norm": 0.4375050961971283,
      "learning_rate": 8.866917496359457e-06,
      "loss": 0.0578,
      "step": 363920
    },
    {
      "epoch": 0.5955957921748067,
      "grad_norm": 1.0802230834960938,
      "learning_rate": 8.866851604145939e-06,
      "loss": 0.0284,
      "step": 363940
    },
    {
      "epoch": 0.5956285226134601,
      "grad_norm": 0.4586149752140045,
      "learning_rate": 8.866785711932422e-06,
      "loss": 0.0381,
      "step": 363960
    },
    {
      "epoch": 0.5956612530521134,
      "grad_norm": 0.9540100693702698,
      "learning_rate": 8.866719819718904e-06,
      "loss": 0.0316,
      "step": 363980
    },
    {
      "epoch": 0.5956939834907667,
      "grad_norm": 0.2341976761817932,
      "learning_rate": 8.866653927505388e-06,
      "loss": 0.0274,
      "step": 364000
    },
    {
      "epoch": 0.5957267139294201,
      "grad_norm": 1.9524527788162231,
      "learning_rate": 8.86658803529187e-06,
      "loss": 0.0403,
      "step": 364020
    },
    {
      "epoch": 0.5957594443680734,
      "grad_norm": 3.287997245788574,
      "learning_rate": 8.866522143078353e-06,
      "loss": 0.0406,
      "step": 364040
    },
    {
      "epoch": 0.5957921748067267,
      "grad_norm": 1.6808075904846191,
      "learning_rate": 8.866456250864835e-06,
      "loss": 0.0355,
      "step": 364060
    },
    {
      "epoch": 0.5958249052453801,
      "grad_norm": 1.9160610437393188,
      "learning_rate": 8.866390358651319e-06,
      "loss": 0.0406,
      "step": 364080
    },
    {
      "epoch": 0.5958576356840334,
      "grad_norm": 2.653151750564575,
      "learning_rate": 8.866324466437802e-06,
      "loss": 0.0374,
      "step": 364100
    },
    {
      "epoch": 0.5958903661226868,
      "grad_norm": 0.5095150470733643,
      "learning_rate": 8.866258574224284e-06,
      "loss": 0.0305,
      "step": 364120
    },
    {
      "epoch": 0.5959230965613401,
      "grad_norm": 1.6788220405578613,
      "learning_rate": 8.866192682010768e-06,
      "loss": 0.0302,
      "step": 364140
    },
    {
      "epoch": 0.5959558269999935,
      "grad_norm": 0.7840241193771362,
      "learning_rate": 8.86612678979725e-06,
      "loss": 0.0269,
      "step": 364160
    },
    {
      "epoch": 0.5959885574386468,
      "grad_norm": 1.2458213567733765,
      "learning_rate": 8.866060897583733e-06,
      "loss": 0.0256,
      "step": 364180
    },
    {
      "epoch": 0.5960212878773001,
      "grad_norm": 1.4673082828521729,
      "learning_rate": 8.865995005370215e-06,
      "loss": 0.0385,
      "step": 364200
    },
    {
      "epoch": 0.5960540183159535,
      "grad_norm": 1.3790124654769897,
      "learning_rate": 8.865929113156699e-06,
      "loss": 0.0418,
      "step": 364220
    },
    {
      "epoch": 0.5960867487546068,
      "grad_norm": 1.0200214385986328,
      "learning_rate": 8.865863220943182e-06,
      "loss": 0.0353,
      "step": 364240
    },
    {
      "epoch": 0.5961194791932601,
      "grad_norm": 0.8408417105674744,
      "learning_rate": 8.865797328729664e-06,
      "loss": 0.0418,
      "step": 364260
    },
    {
      "epoch": 0.5961522096319135,
      "grad_norm": 1.6010862588882446,
      "learning_rate": 8.865731436516148e-06,
      "loss": 0.0424,
      "step": 364280
    },
    {
      "epoch": 0.5961849400705668,
      "grad_norm": 2.6122677326202393,
      "learning_rate": 8.865665544302631e-06,
      "loss": 0.0512,
      "step": 364300
    },
    {
      "epoch": 0.5962176705092201,
      "grad_norm": 1.4260965585708618,
      "learning_rate": 8.865599652089113e-06,
      "loss": 0.0279,
      "step": 364320
    },
    {
      "epoch": 0.5962504009478735,
      "grad_norm": 1.0333561897277832,
      "learning_rate": 8.865533759875597e-06,
      "loss": 0.0283,
      "step": 364340
    },
    {
      "epoch": 0.5962831313865269,
      "grad_norm": 0.9387226104736328,
      "learning_rate": 8.865467867662079e-06,
      "loss": 0.0395,
      "step": 364360
    },
    {
      "epoch": 0.5963158618251801,
      "grad_norm": 0.6354104280471802,
      "learning_rate": 8.865401975448562e-06,
      "loss": 0.0359,
      "step": 364380
    },
    {
      "epoch": 0.5963485922638335,
      "grad_norm": 0.8222338557243347,
      "learning_rate": 8.865336083235044e-06,
      "loss": 0.0431,
      "step": 364400
    },
    {
      "epoch": 0.5963813227024869,
      "grad_norm": 1.6559467315673828,
      "learning_rate": 8.865270191021528e-06,
      "loss": 0.0332,
      "step": 364420
    },
    {
      "epoch": 0.5964140531411402,
      "grad_norm": 1.4939993619918823,
      "learning_rate": 8.86520429880801e-06,
      "loss": 0.0448,
      "step": 364440
    },
    {
      "epoch": 0.5964467835797935,
      "grad_norm": 2.2298123836517334,
      "learning_rate": 8.865138406594493e-06,
      "loss": 0.0374,
      "step": 364460
    },
    {
      "epoch": 0.5964795140184469,
      "grad_norm": 1.8495057821273804,
      "learning_rate": 8.865072514380977e-06,
      "loss": 0.0303,
      "step": 364480
    },
    {
      "epoch": 0.5965122444571002,
      "grad_norm": 1.949196457862854,
      "learning_rate": 8.865006622167459e-06,
      "loss": 0.0407,
      "step": 364500
    },
    {
      "epoch": 0.5965449748957535,
      "grad_norm": 1.2721134424209595,
      "learning_rate": 8.864940729953942e-06,
      "loss": 0.0268,
      "step": 364520
    },
    {
      "epoch": 0.5965777053344069,
      "grad_norm": 0.9535636901855469,
      "learning_rate": 8.864874837740424e-06,
      "loss": 0.0342,
      "step": 364540
    },
    {
      "epoch": 0.5966104357730603,
      "grad_norm": 2.7566230297088623,
      "learning_rate": 8.864808945526908e-06,
      "loss": 0.0365,
      "step": 364560
    },
    {
      "epoch": 0.5966431662117135,
      "grad_norm": 1.7754414081573486,
      "learning_rate": 8.86474305331339e-06,
      "loss": 0.0317,
      "step": 364580
    },
    {
      "epoch": 0.5966758966503669,
      "grad_norm": 1.3483353853225708,
      "learning_rate": 8.864677161099873e-06,
      "loss": 0.0407,
      "step": 364600
    },
    {
      "epoch": 0.5967086270890203,
      "grad_norm": 0.8566721081733704,
      "learning_rate": 8.864611268886357e-06,
      "loss": 0.0435,
      "step": 364620
    },
    {
      "epoch": 0.5967413575276735,
      "grad_norm": 1.1471813917160034,
      "learning_rate": 8.864545376672839e-06,
      "loss": 0.0368,
      "step": 364640
    },
    {
      "epoch": 0.5967740879663269,
      "grad_norm": 0.8143293857574463,
      "learning_rate": 8.864479484459322e-06,
      "loss": 0.0377,
      "step": 364660
    },
    {
      "epoch": 0.5968068184049803,
      "grad_norm": 1.040635347366333,
      "learning_rate": 8.864413592245806e-06,
      "loss": 0.0347,
      "step": 364680
    },
    {
      "epoch": 0.5968395488436335,
      "grad_norm": 0.5677160620689392,
      "learning_rate": 8.864347700032288e-06,
      "loss": 0.0287,
      "step": 364700
    },
    {
      "epoch": 0.5968722792822869,
      "grad_norm": 3.167459726333618,
      "learning_rate": 8.864281807818771e-06,
      "loss": 0.0471,
      "step": 364720
    },
    {
      "epoch": 0.5969050097209403,
      "grad_norm": 0.9766055345535278,
      "learning_rate": 8.864215915605253e-06,
      "loss": 0.031,
      "step": 364740
    },
    {
      "epoch": 0.5969377401595937,
      "grad_norm": 0.9419293999671936,
      "learning_rate": 8.864150023391737e-06,
      "loss": 0.0407,
      "step": 364760
    },
    {
      "epoch": 0.5969704705982469,
      "grad_norm": 0.362588107585907,
      "learning_rate": 8.864084131178219e-06,
      "loss": 0.0462,
      "step": 364780
    },
    {
      "epoch": 0.5970032010369003,
      "grad_norm": 1.7238837480545044,
      "learning_rate": 8.864018238964702e-06,
      "loss": 0.0258,
      "step": 364800
    },
    {
      "epoch": 0.5970359314755537,
      "grad_norm": 1.3029484748840332,
      "learning_rate": 8.863952346751186e-06,
      "loss": 0.0311,
      "step": 364820
    },
    {
      "epoch": 0.5970686619142069,
      "grad_norm": 0.6851518154144287,
      "learning_rate": 8.863886454537668e-06,
      "loss": 0.0396,
      "step": 364840
    },
    {
      "epoch": 0.5971013923528603,
      "grad_norm": 1.2421739101409912,
      "learning_rate": 8.863820562324151e-06,
      "loss": 0.0289,
      "step": 364860
    },
    {
      "epoch": 0.5971341227915137,
      "grad_norm": 4.013026237487793,
      "learning_rate": 8.863754670110633e-06,
      "loss": 0.0531,
      "step": 364880
    },
    {
      "epoch": 0.5971668532301669,
      "grad_norm": 2.4411401748657227,
      "learning_rate": 8.863688777897117e-06,
      "loss": 0.027,
      "step": 364900
    },
    {
      "epoch": 0.5971995836688203,
      "grad_norm": 0.9593501091003418,
      "learning_rate": 8.863622885683599e-06,
      "loss": 0.0354,
      "step": 364920
    },
    {
      "epoch": 0.5972323141074737,
      "grad_norm": 0.9847521185874939,
      "learning_rate": 8.863556993470082e-06,
      "loss": 0.0382,
      "step": 364940
    },
    {
      "epoch": 0.597265044546127,
      "grad_norm": 0.6068388223648071,
      "learning_rate": 8.863491101256564e-06,
      "loss": 0.038,
      "step": 364960
    },
    {
      "epoch": 0.5972977749847803,
      "grad_norm": 3.6618502140045166,
      "learning_rate": 8.863425209043048e-06,
      "loss": 0.0382,
      "step": 364980
    },
    {
      "epoch": 0.5973305054234337,
      "grad_norm": 1.0260350704193115,
      "learning_rate": 8.863359316829531e-06,
      "loss": 0.044,
      "step": 365000
    },
    {
      "epoch": 0.5973632358620871,
      "grad_norm": 0.2238311767578125,
      "learning_rate": 8.863293424616013e-06,
      "loss": 0.0302,
      "step": 365020
    },
    {
      "epoch": 0.5973959663007403,
      "grad_norm": 0.5568323731422424,
      "learning_rate": 8.863227532402497e-06,
      "loss": 0.0501,
      "step": 365040
    },
    {
      "epoch": 0.5974286967393937,
      "grad_norm": 0.33166614174842834,
      "learning_rate": 8.86316164018898e-06,
      "loss": 0.0438,
      "step": 365060
    },
    {
      "epoch": 0.5974614271780471,
      "grad_norm": 0.5090544819831848,
      "learning_rate": 8.863095747975462e-06,
      "loss": 0.0265,
      "step": 365080
    },
    {
      "epoch": 0.5974941576167003,
      "grad_norm": 0.9176403284072876,
      "learning_rate": 8.863029855761946e-06,
      "loss": 0.035,
      "step": 365100
    },
    {
      "epoch": 0.5975268880553537,
      "grad_norm": 0.9755039811134338,
      "learning_rate": 8.862963963548428e-06,
      "loss": 0.039,
      "step": 365120
    },
    {
      "epoch": 0.5975596184940071,
      "grad_norm": 0.8517544865608215,
      "learning_rate": 8.862898071334912e-06,
      "loss": 0.0395,
      "step": 365140
    },
    {
      "epoch": 0.5975923489326604,
      "grad_norm": 3.05318021774292,
      "learning_rate": 8.862832179121395e-06,
      "loss": 0.0439,
      "step": 365160
    },
    {
      "epoch": 0.5976250793713137,
      "grad_norm": 0.774602472782135,
      "learning_rate": 8.862766286907877e-06,
      "loss": 0.0386,
      "step": 365180
    },
    {
      "epoch": 0.5976578098099671,
      "grad_norm": 1.4232964515686035,
      "learning_rate": 8.86270039469436e-06,
      "loss": 0.0418,
      "step": 365200
    },
    {
      "epoch": 0.5976905402486204,
      "grad_norm": 1.0478403568267822,
      "learning_rate": 8.862634502480842e-06,
      "loss": 0.0367,
      "step": 365220
    },
    {
      "epoch": 0.5977232706872737,
      "grad_norm": 1.7174371480941772,
      "learning_rate": 8.862568610267326e-06,
      "loss": 0.0296,
      "step": 365240
    },
    {
      "epoch": 0.5977560011259271,
      "grad_norm": 1.4020110368728638,
      "learning_rate": 8.862502718053808e-06,
      "loss": 0.0409,
      "step": 365260
    },
    {
      "epoch": 0.5977887315645805,
      "grad_norm": 0.45037034153938293,
      "learning_rate": 8.862436825840292e-06,
      "loss": 0.0252,
      "step": 365280
    },
    {
      "epoch": 0.5978214620032337,
      "grad_norm": 1.0562148094177246,
      "learning_rate": 8.862370933626773e-06,
      "loss": 0.0461,
      "step": 365300
    },
    {
      "epoch": 0.5978541924418871,
      "grad_norm": 0.3336910307407379,
      "learning_rate": 8.862305041413257e-06,
      "loss": 0.0264,
      "step": 365320
    },
    {
      "epoch": 0.5978869228805405,
      "grad_norm": 1.0450383424758911,
      "learning_rate": 8.862239149199739e-06,
      "loss": 0.027,
      "step": 365340
    },
    {
      "epoch": 0.5979196533191938,
      "grad_norm": 4.2449517250061035,
      "learning_rate": 8.862173256986222e-06,
      "loss": 0.0272,
      "step": 365360
    },
    {
      "epoch": 0.5979523837578471,
      "grad_norm": 1.9175182580947876,
      "learning_rate": 8.862107364772704e-06,
      "loss": 0.0366,
      "step": 365380
    },
    {
      "epoch": 0.5979851141965005,
      "grad_norm": 1.0449498891830444,
      "learning_rate": 8.862041472559188e-06,
      "loss": 0.0376,
      "step": 365400
    },
    {
      "epoch": 0.5980178446351538,
      "grad_norm": 0.18986289203166962,
      "learning_rate": 8.861975580345672e-06,
      "loss": 0.036,
      "step": 365420
    },
    {
      "epoch": 0.5980505750738071,
      "grad_norm": 0.9278404712677002,
      "learning_rate": 8.861909688132153e-06,
      "loss": 0.0444,
      "step": 365440
    },
    {
      "epoch": 0.5980833055124605,
      "grad_norm": 1.0829733610153198,
      "learning_rate": 8.861843795918637e-06,
      "loss": 0.0313,
      "step": 365460
    },
    {
      "epoch": 0.5981160359511138,
      "grad_norm": 0.8011190891265869,
      "learning_rate": 8.86177790370512e-06,
      "loss": 0.0345,
      "step": 365480
    },
    {
      "epoch": 0.5981487663897671,
      "grad_norm": 1.0389034748077393,
      "learning_rate": 8.861712011491603e-06,
      "loss": 0.0424,
      "step": 365500
    },
    {
      "epoch": 0.5981814968284205,
      "grad_norm": 1.7638070583343506,
      "learning_rate": 8.861646119278086e-06,
      "loss": 0.031,
      "step": 365520
    },
    {
      "epoch": 0.5982142272670739,
      "grad_norm": 0.3253380358219147,
      "learning_rate": 8.86158022706457e-06,
      "loss": 0.0306,
      "step": 365540
    },
    {
      "epoch": 0.5982469577057272,
      "grad_norm": 1.1180543899536133,
      "learning_rate": 8.861514334851052e-06,
      "loss": 0.0352,
      "step": 365560
    },
    {
      "epoch": 0.5982796881443805,
      "grad_norm": 1.2134215831756592,
      "learning_rate": 8.861448442637535e-06,
      "loss": 0.0377,
      "step": 365580
    },
    {
      "epoch": 0.5983124185830339,
      "grad_norm": 0.8986313343048096,
      "learning_rate": 8.861382550424017e-06,
      "loss": 0.0294,
      "step": 365600
    },
    {
      "epoch": 0.5983451490216872,
      "grad_norm": 2.1242587566375732,
      "learning_rate": 8.8613166582105e-06,
      "loss": 0.0558,
      "step": 365620
    },
    {
      "epoch": 0.5983778794603405,
      "grad_norm": 1.132965326309204,
      "learning_rate": 8.861250765996983e-06,
      "loss": 0.0331,
      "step": 365640
    },
    {
      "epoch": 0.5984106098989939,
      "grad_norm": 0.6362162232398987,
      "learning_rate": 8.861184873783466e-06,
      "loss": 0.0416,
      "step": 365660
    },
    {
      "epoch": 0.5984433403376472,
      "grad_norm": 2.2229065895080566,
      "learning_rate": 8.861118981569948e-06,
      "loss": 0.0358,
      "step": 365680
    },
    {
      "epoch": 0.5984760707763005,
      "grad_norm": 1.2069684267044067,
      "learning_rate": 8.861053089356432e-06,
      "loss": 0.0572,
      "step": 365700
    },
    {
      "epoch": 0.5985088012149539,
      "grad_norm": 0.6275297403335571,
      "learning_rate": 8.860987197142914e-06,
      "loss": 0.0362,
      "step": 365720
    },
    {
      "epoch": 0.5985415316536072,
      "grad_norm": 2.218986988067627,
      "learning_rate": 8.860921304929397e-06,
      "loss": 0.0286,
      "step": 365740
    },
    {
      "epoch": 0.5985742620922606,
      "grad_norm": 1.8110803365707397,
      "learning_rate": 8.860855412715879e-06,
      "loss": 0.0372,
      "step": 365760
    },
    {
      "epoch": 0.5986069925309139,
      "grad_norm": 0.7767966985702515,
      "learning_rate": 8.860789520502363e-06,
      "loss": 0.0483,
      "step": 365780
    },
    {
      "epoch": 0.5986397229695672,
      "grad_norm": 0.7489929795265198,
      "learning_rate": 8.860723628288846e-06,
      "loss": 0.0383,
      "step": 365800
    },
    {
      "epoch": 0.5986724534082206,
      "grad_norm": 3.669039249420166,
      "learning_rate": 8.860657736075328e-06,
      "loss": 0.0479,
      "step": 365820
    },
    {
      "epoch": 0.5987051838468739,
      "grad_norm": 0.5749764442443848,
      "learning_rate": 8.860591843861812e-06,
      "loss": 0.0303,
      "step": 365840
    },
    {
      "epoch": 0.5987379142855273,
      "grad_norm": 0.3219130039215088,
      "learning_rate": 8.860525951648295e-06,
      "loss": 0.0413,
      "step": 365860
    },
    {
      "epoch": 0.5987706447241806,
      "grad_norm": 0.18220314383506775,
      "learning_rate": 8.860460059434777e-06,
      "loss": 0.0386,
      "step": 365880
    },
    {
      "epoch": 0.5988033751628339,
      "grad_norm": 1.829958200454712,
      "learning_rate": 8.86039416722126e-06,
      "loss": 0.0306,
      "step": 365900
    },
    {
      "epoch": 0.5988361056014873,
      "grad_norm": 0.3745500445365906,
      "learning_rate": 8.860328275007744e-06,
      "loss": 0.0358,
      "step": 365920
    },
    {
      "epoch": 0.5988688360401406,
      "grad_norm": 1.2393052577972412,
      "learning_rate": 8.860262382794226e-06,
      "loss": 0.0333,
      "step": 365940
    },
    {
      "epoch": 0.598901566478794,
      "grad_norm": 1.1309832334518433,
      "learning_rate": 8.86019649058071e-06,
      "loss": 0.0397,
      "step": 365960
    },
    {
      "epoch": 0.5989342969174473,
      "grad_norm": 1.0518349409103394,
      "learning_rate": 8.860130598367192e-06,
      "loss": 0.0345,
      "step": 365980
    },
    {
      "epoch": 0.5989670273561006,
      "grad_norm": 1.872939109802246,
      "learning_rate": 8.860064706153675e-06,
      "loss": 0.0344,
      "step": 366000
    },
    {
      "epoch": 0.598999757794754,
      "grad_norm": 4.825409889221191,
      "learning_rate": 8.859998813940157e-06,
      "loss": 0.0393,
      "step": 366020
    },
    {
      "epoch": 0.5990324882334073,
      "grad_norm": 0.17544853687286377,
      "learning_rate": 8.85993292172664e-06,
      "loss": 0.0429,
      "step": 366040
    },
    {
      "epoch": 0.5990652186720606,
      "grad_norm": 0.8951764106750488,
      "learning_rate": 8.859867029513123e-06,
      "loss": 0.0333,
      "step": 366060
    },
    {
      "epoch": 0.599097949110714,
      "grad_norm": 1.8409641981124878,
      "learning_rate": 8.859801137299606e-06,
      "loss": 0.0314,
      "step": 366080
    },
    {
      "epoch": 0.5991306795493673,
      "grad_norm": 0.8068642616271973,
      "learning_rate": 8.859735245086088e-06,
      "loss": 0.0329,
      "step": 366100
    },
    {
      "epoch": 0.5991634099880206,
      "grad_norm": 1.4891260862350464,
      "learning_rate": 8.859669352872572e-06,
      "loss": 0.036,
      "step": 366120
    },
    {
      "epoch": 0.599196140426674,
      "grad_norm": 1.169450283050537,
      "learning_rate": 8.859603460659054e-06,
      "loss": 0.0381,
      "step": 366140
    },
    {
      "epoch": 0.5992288708653274,
      "grad_norm": 0.645108163356781,
      "learning_rate": 8.859537568445537e-06,
      "loss": 0.0409,
      "step": 366160
    },
    {
      "epoch": 0.5992616013039807,
      "grad_norm": 1.7277756929397583,
      "learning_rate": 8.859471676232019e-06,
      "loss": 0.0302,
      "step": 366180
    },
    {
      "epoch": 0.599294331742634,
      "grad_norm": 2.122210741043091,
      "learning_rate": 8.859405784018503e-06,
      "loss": 0.0439,
      "step": 366200
    },
    {
      "epoch": 0.5993270621812874,
      "grad_norm": 0.7058828473091125,
      "learning_rate": 8.859339891804986e-06,
      "loss": 0.0457,
      "step": 366220
    },
    {
      "epoch": 0.5993597926199407,
      "grad_norm": 0.6087273359298706,
      "learning_rate": 8.859273999591468e-06,
      "loss": 0.0419,
      "step": 366240
    },
    {
      "epoch": 0.599392523058594,
      "grad_norm": 1.2792326211929321,
      "learning_rate": 8.859208107377952e-06,
      "loss": 0.0448,
      "step": 366260
    },
    {
      "epoch": 0.5994252534972474,
      "grad_norm": 1.328244924545288,
      "learning_rate": 8.859142215164435e-06,
      "loss": 0.028,
      "step": 366280
    },
    {
      "epoch": 0.5994579839359007,
      "grad_norm": 0.734711229801178,
      "learning_rate": 8.859076322950917e-06,
      "loss": 0.0388,
      "step": 366300
    },
    {
      "epoch": 0.599490714374554,
      "grad_norm": 10.1633939743042,
      "learning_rate": 8.8590104307374e-06,
      "loss": 0.0358,
      "step": 366320
    },
    {
      "epoch": 0.5995234448132074,
      "grad_norm": 3.7208445072174072,
      "learning_rate": 8.858944538523884e-06,
      "loss": 0.0338,
      "step": 366340
    },
    {
      "epoch": 0.5995561752518608,
      "grad_norm": 2.0842881202697754,
      "learning_rate": 8.858878646310366e-06,
      "loss": 0.0315,
      "step": 366360
    },
    {
      "epoch": 0.599588905690514,
      "grad_norm": 0.7210730314254761,
      "learning_rate": 8.85881275409685e-06,
      "loss": 0.0278,
      "step": 366380
    },
    {
      "epoch": 0.5996216361291674,
      "grad_norm": 0.29753655195236206,
      "learning_rate": 8.858746861883332e-06,
      "loss": 0.0381,
      "step": 366400
    },
    {
      "epoch": 0.5996543665678208,
      "grad_norm": 0.9888700246810913,
      "learning_rate": 8.858680969669815e-06,
      "loss": 0.0372,
      "step": 366420
    },
    {
      "epoch": 0.599687097006474,
      "grad_norm": 0.8430702686309814,
      "learning_rate": 8.858615077456297e-06,
      "loss": 0.0404,
      "step": 366440
    },
    {
      "epoch": 0.5997198274451274,
      "grad_norm": 0.763154149055481,
      "learning_rate": 8.85854918524278e-06,
      "loss": 0.0287,
      "step": 366460
    },
    {
      "epoch": 0.5997525578837808,
      "grad_norm": 0.5599402189254761,
      "learning_rate": 8.858483293029263e-06,
      "loss": 0.0361,
      "step": 366480
    },
    {
      "epoch": 0.599785288322434,
      "grad_norm": 0.5930078625679016,
      "learning_rate": 8.858417400815746e-06,
      "loss": 0.0339,
      "step": 366500
    },
    {
      "epoch": 0.5998180187610874,
      "grad_norm": 0.9959543943405151,
      "learning_rate": 8.858351508602228e-06,
      "loss": 0.0459,
      "step": 366520
    },
    {
      "epoch": 0.5998507491997408,
      "grad_norm": 3.919050931930542,
      "learning_rate": 8.858285616388712e-06,
      "loss": 0.0438,
      "step": 366540
    },
    {
      "epoch": 0.5998834796383942,
      "grad_norm": 0.3722400367259979,
      "learning_rate": 8.858219724175195e-06,
      "loss": 0.0623,
      "step": 366560
    },
    {
      "epoch": 0.5999162100770474,
      "grad_norm": 0.7270599007606506,
      "learning_rate": 8.858153831961677e-06,
      "loss": 0.0424,
      "step": 366580
    },
    {
      "epoch": 0.5999489405157008,
      "grad_norm": 3.279583692550659,
      "learning_rate": 8.85808793974816e-06,
      "loss": 0.0332,
      "step": 366600
    },
    {
      "epoch": 0.5999816709543542,
      "grad_norm": 2.0469419956207275,
      "learning_rate": 8.858022047534643e-06,
      "loss": 0.0286,
      "step": 366620
    },
    {
      "epoch": 0.6000144013930074,
      "grad_norm": 1.6811320781707764,
      "learning_rate": 8.857956155321126e-06,
      "loss": 0.0422,
      "step": 366640
    },
    {
      "epoch": 0.6000471318316608,
      "grad_norm": 1.2424156665802002,
      "learning_rate": 8.85789026310761e-06,
      "loss": 0.0353,
      "step": 366660
    },
    {
      "epoch": 0.6000798622703142,
      "grad_norm": 1.4926379919052124,
      "learning_rate": 8.857824370894092e-06,
      "loss": 0.0336,
      "step": 366680
    },
    {
      "epoch": 0.6001125927089674,
      "grad_norm": 1.2769097089767456,
      "learning_rate": 8.857758478680575e-06,
      "loss": 0.0351,
      "step": 366700
    },
    {
      "epoch": 0.6001453231476208,
      "grad_norm": 0.7662103176116943,
      "learning_rate": 8.857692586467059e-06,
      "loss": 0.0384,
      "step": 366720
    },
    {
      "epoch": 0.6001780535862742,
      "grad_norm": 1.3189246654510498,
      "learning_rate": 8.857626694253541e-06,
      "loss": 0.0464,
      "step": 366740
    },
    {
      "epoch": 0.6002107840249276,
      "grad_norm": 1.7047919034957886,
      "learning_rate": 8.857560802040024e-06,
      "loss": 0.0323,
      "step": 366760
    },
    {
      "epoch": 0.6002435144635808,
      "grad_norm": 2.1398394107818604,
      "learning_rate": 8.857494909826506e-06,
      "loss": 0.0428,
      "step": 366780
    },
    {
      "epoch": 0.6002762449022342,
      "grad_norm": 0.6943286657333374,
      "learning_rate": 8.85742901761299e-06,
      "loss": 0.0463,
      "step": 366800
    },
    {
      "epoch": 0.6003089753408876,
      "grad_norm": 4.32710599899292,
      "learning_rate": 8.857363125399472e-06,
      "loss": 0.0424,
      "step": 366820
    },
    {
      "epoch": 0.6003417057795408,
      "grad_norm": 1.42470383644104,
      "learning_rate": 8.857297233185955e-06,
      "loss": 0.0408,
      "step": 366840
    },
    {
      "epoch": 0.6003744362181942,
      "grad_norm": 0.9544546604156494,
      "learning_rate": 8.857231340972437e-06,
      "loss": 0.0388,
      "step": 366860
    },
    {
      "epoch": 0.6004071666568476,
      "grad_norm": 1.4733456373214722,
      "learning_rate": 8.857165448758921e-06,
      "loss": 0.0303,
      "step": 366880
    },
    {
      "epoch": 0.6004398970955008,
      "grad_norm": 2.308220386505127,
      "learning_rate": 8.857099556545403e-06,
      "loss": 0.0296,
      "step": 366900
    },
    {
      "epoch": 0.6004726275341542,
      "grad_norm": 0.24707284569740295,
      "learning_rate": 8.857033664331886e-06,
      "loss": 0.0495,
      "step": 366920
    },
    {
      "epoch": 0.6005053579728076,
      "grad_norm": 1.518540859222412,
      "learning_rate": 8.85696777211837e-06,
      "loss": 0.0339,
      "step": 366940
    },
    {
      "epoch": 0.6005380884114608,
      "grad_norm": 0.6006766557693481,
      "learning_rate": 8.856901879904852e-06,
      "loss": 0.0328,
      "step": 366960
    },
    {
      "epoch": 0.6005708188501142,
      "grad_norm": 1.3648476600646973,
      "learning_rate": 8.856835987691335e-06,
      "loss": 0.0394,
      "step": 366980
    },
    {
      "epoch": 0.6006035492887676,
      "grad_norm": 1.1168289184570312,
      "learning_rate": 8.856770095477817e-06,
      "loss": 0.0293,
      "step": 367000
    },
    {
      "epoch": 0.600636279727421,
      "grad_norm": 0.2240571528673172,
      "learning_rate": 8.856704203264301e-06,
      "loss": 0.0266,
      "step": 367020
    },
    {
      "epoch": 0.6006690101660742,
      "grad_norm": 1.4916882514953613,
      "learning_rate": 8.856638311050783e-06,
      "loss": 0.0364,
      "step": 367040
    },
    {
      "epoch": 0.6007017406047276,
      "grad_norm": 5.014556407928467,
      "learning_rate": 8.856572418837266e-06,
      "loss": 0.0419,
      "step": 367060
    },
    {
      "epoch": 0.600734471043381,
      "grad_norm": 1.376075267791748,
      "learning_rate": 8.85650652662375e-06,
      "loss": 0.0381,
      "step": 367080
    },
    {
      "epoch": 0.6007672014820342,
      "grad_norm": 1.0370690822601318,
      "learning_rate": 8.856440634410234e-06,
      "loss": 0.0333,
      "step": 367100
    },
    {
      "epoch": 0.6007999319206876,
      "grad_norm": 0.8207594156265259,
      "learning_rate": 8.856374742196715e-06,
      "loss": 0.0504,
      "step": 367120
    },
    {
      "epoch": 0.600832662359341,
      "grad_norm": 0.7261782884597778,
      "learning_rate": 8.856308849983199e-06,
      "loss": 0.033,
      "step": 367140
    },
    {
      "epoch": 0.6008653927979942,
      "grad_norm": 0.531606912612915,
      "learning_rate": 8.856242957769681e-06,
      "loss": 0.0249,
      "step": 367160
    },
    {
      "epoch": 0.6008981232366476,
      "grad_norm": 2.672290802001953,
      "learning_rate": 8.856177065556165e-06,
      "loss": 0.0475,
      "step": 367180
    },
    {
      "epoch": 0.600930853675301,
      "grad_norm": 0.6501591801643372,
      "learning_rate": 8.856111173342646e-06,
      "loss": 0.0389,
      "step": 367200
    },
    {
      "epoch": 0.6009635841139543,
      "grad_norm": 1.4994617700576782,
      "learning_rate": 8.85604528112913e-06,
      "loss": 0.0321,
      "step": 367220
    },
    {
      "epoch": 0.6009963145526076,
      "grad_norm": 4.797268867492676,
      "learning_rate": 8.855979388915612e-06,
      "loss": 0.0368,
      "step": 367240
    },
    {
      "epoch": 0.601029044991261,
      "grad_norm": 1.9787607192993164,
      "learning_rate": 8.855913496702095e-06,
      "loss": 0.0452,
      "step": 367260
    },
    {
      "epoch": 0.6010617754299143,
      "grad_norm": 1.820029616355896,
      "learning_rate": 8.855847604488579e-06,
      "loss": 0.0367,
      "step": 367280
    },
    {
      "epoch": 0.6010945058685676,
      "grad_norm": 1.5153238773345947,
      "learning_rate": 8.855781712275061e-06,
      "loss": 0.0326,
      "step": 367300
    },
    {
      "epoch": 0.601127236307221,
      "grad_norm": 1.4449682235717773,
      "learning_rate": 8.855715820061545e-06,
      "loss": 0.0312,
      "step": 367320
    },
    {
      "epoch": 0.6011599667458744,
      "grad_norm": 0.768672525882721,
      "learning_rate": 8.855649927848026e-06,
      "loss": 0.0357,
      "step": 367340
    },
    {
      "epoch": 0.6011926971845276,
      "grad_norm": 2.6436614990234375,
      "learning_rate": 8.85558403563451e-06,
      "loss": 0.0304,
      "step": 367360
    },
    {
      "epoch": 0.601225427623181,
      "grad_norm": 1.0266392230987549,
      "learning_rate": 8.855518143420992e-06,
      "loss": 0.0277,
      "step": 367380
    },
    {
      "epoch": 0.6012581580618344,
      "grad_norm": 1.637968897819519,
      "learning_rate": 8.855452251207476e-06,
      "loss": 0.0526,
      "step": 367400
    },
    {
      "epoch": 0.6012908885004877,
      "grad_norm": 1.6022480726242065,
      "learning_rate": 8.855386358993957e-06,
      "loss": 0.0348,
      "step": 367420
    },
    {
      "epoch": 0.601323618939141,
      "grad_norm": 2.675823926925659,
      "learning_rate": 8.855320466780441e-06,
      "loss": 0.0201,
      "step": 367440
    },
    {
      "epoch": 0.6013563493777944,
      "grad_norm": 1.3131932020187378,
      "learning_rate": 8.855254574566925e-06,
      "loss": 0.0356,
      "step": 367460
    },
    {
      "epoch": 0.6013890798164477,
      "grad_norm": 0.4674813449382782,
      "learning_rate": 8.855188682353406e-06,
      "loss": 0.0354,
      "step": 367480
    },
    {
      "epoch": 0.601421810255101,
      "grad_norm": 1.7503223419189453,
      "learning_rate": 8.85512279013989e-06,
      "loss": 0.0437,
      "step": 367500
    },
    {
      "epoch": 0.6014545406937544,
      "grad_norm": 0.6400895714759827,
      "learning_rate": 8.855056897926374e-06,
      "loss": 0.0298,
      "step": 367520
    },
    {
      "epoch": 0.6014872711324077,
      "grad_norm": 1.4749404191970825,
      "learning_rate": 8.854991005712856e-06,
      "loss": 0.0408,
      "step": 367540
    },
    {
      "epoch": 0.601520001571061,
      "grad_norm": 0.430158406496048,
      "learning_rate": 8.854925113499339e-06,
      "loss": 0.0295,
      "step": 367560
    },
    {
      "epoch": 0.6015527320097144,
      "grad_norm": 2.359618902206421,
      "learning_rate": 8.854859221285821e-06,
      "loss": 0.0237,
      "step": 367580
    },
    {
      "epoch": 0.6015854624483677,
      "grad_norm": 3.5467965602874756,
      "learning_rate": 8.854793329072305e-06,
      "loss": 0.0322,
      "step": 367600
    },
    {
      "epoch": 0.6016181928870211,
      "grad_norm": 0.4035995602607727,
      "learning_rate": 8.854727436858788e-06,
      "loss": 0.038,
      "step": 367620
    },
    {
      "epoch": 0.6016509233256744,
      "grad_norm": 1.717146873474121,
      "learning_rate": 8.85466154464527e-06,
      "loss": 0.0537,
      "step": 367640
    },
    {
      "epoch": 0.6016836537643278,
      "grad_norm": 0.2702656388282776,
      "learning_rate": 8.854595652431754e-06,
      "loss": 0.038,
      "step": 367660
    },
    {
      "epoch": 0.6017163842029811,
      "grad_norm": 0.6780228018760681,
      "learning_rate": 8.854529760218236e-06,
      "loss": 0.0335,
      "step": 367680
    },
    {
      "epoch": 0.6017491146416344,
      "grad_norm": 0.4182687997817993,
      "learning_rate": 8.854463868004719e-06,
      "loss": 0.0275,
      "step": 367700
    },
    {
      "epoch": 0.6017818450802878,
      "grad_norm": 0.9537389278411865,
      "learning_rate": 8.854397975791201e-06,
      "loss": 0.0419,
      "step": 367720
    },
    {
      "epoch": 0.6018145755189411,
      "grad_norm": 0.9737532138824463,
      "learning_rate": 8.854332083577685e-06,
      "loss": 0.0313,
      "step": 367740
    },
    {
      "epoch": 0.6018473059575944,
      "grad_norm": 1.34153413772583,
      "learning_rate": 8.854266191364167e-06,
      "loss": 0.0289,
      "step": 367760
    },
    {
      "epoch": 0.6018800363962478,
      "grad_norm": 0.6588217616081238,
      "learning_rate": 8.85420029915065e-06,
      "loss": 0.0384,
      "step": 367780
    },
    {
      "epoch": 0.6019127668349011,
      "grad_norm": 0.46711987257003784,
      "learning_rate": 8.854134406937132e-06,
      "loss": 0.0272,
      "step": 367800
    },
    {
      "epoch": 0.6019454972735545,
      "grad_norm": 1.4513423442840576,
      "learning_rate": 8.854068514723616e-06,
      "loss": 0.0325,
      "step": 367820
    },
    {
      "epoch": 0.6019782277122078,
      "grad_norm": 1.134568214416504,
      "learning_rate": 8.8540026225101e-06,
      "loss": 0.0469,
      "step": 367840
    },
    {
      "epoch": 0.6020109581508611,
      "grad_norm": 0.30894386768341064,
      "learning_rate": 8.853936730296581e-06,
      "loss": 0.0375,
      "step": 367860
    },
    {
      "epoch": 0.6020436885895145,
      "grad_norm": 0.8721423745155334,
      "learning_rate": 8.853870838083065e-06,
      "loss": 0.0339,
      "step": 367880
    },
    {
      "epoch": 0.6020764190281678,
      "grad_norm": 1.2051875591278076,
      "learning_rate": 8.853804945869548e-06,
      "loss": 0.0239,
      "step": 367900
    },
    {
      "epoch": 0.6021091494668211,
      "grad_norm": 4.487891674041748,
      "learning_rate": 8.85373905365603e-06,
      "loss": 0.035,
      "step": 367920
    },
    {
      "epoch": 0.6021418799054745,
      "grad_norm": 2.151034116744995,
      "learning_rate": 8.853673161442514e-06,
      "loss": 0.0406,
      "step": 367940
    },
    {
      "epoch": 0.6021746103441278,
      "grad_norm": 0.4692702889442444,
      "learning_rate": 8.853607269228996e-06,
      "loss": 0.0367,
      "step": 367960
    },
    {
      "epoch": 0.6022073407827812,
      "grad_norm": 1.7584216594696045,
      "learning_rate": 8.85354137701548e-06,
      "loss": 0.0444,
      "step": 367980
    },
    {
      "epoch": 0.6022400712214345,
      "grad_norm": 0.8967640995979309,
      "learning_rate": 8.853475484801963e-06,
      "loss": 0.0391,
      "step": 368000
    },
    {
      "epoch": 0.6022728016600879,
      "grad_norm": 1.8085283041000366,
      "learning_rate": 8.853409592588445e-06,
      "loss": 0.0277,
      "step": 368020
    },
    {
      "epoch": 0.6023055320987412,
      "grad_norm": 0.6266322731971741,
      "learning_rate": 8.853343700374928e-06,
      "loss": 0.043,
      "step": 368040
    },
    {
      "epoch": 0.6023382625373945,
      "grad_norm": 0.730421781539917,
      "learning_rate": 8.85327780816141e-06,
      "loss": 0.0296,
      "step": 368060
    },
    {
      "epoch": 0.6023709929760479,
      "grad_norm": 0.9528756141662598,
      "learning_rate": 8.853211915947894e-06,
      "loss": 0.0302,
      "step": 368080
    },
    {
      "epoch": 0.6024037234147012,
      "grad_norm": 4.539196968078613,
      "learning_rate": 8.853146023734376e-06,
      "loss": 0.0339,
      "step": 368100
    },
    {
      "epoch": 0.6024364538533545,
      "grad_norm": 2.234607458114624,
      "learning_rate": 8.85308013152086e-06,
      "loss": 0.0496,
      "step": 368120
    },
    {
      "epoch": 0.6024691842920079,
      "grad_norm": 1.563908338546753,
      "learning_rate": 8.853014239307341e-06,
      "loss": 0.0378,
      "step": 368140
    },
    {
      "epoch": 0.6025019147306612,
      "grad_norm": 4.193044662475586,
      "learning_rate": 8.852948347093825e-06,
      "loss": 0.0371,
      "step": 368160
    },
    {
      "epoch": 0.6025346451693145,
      "grad_norm": 1.0811785459518433,
      "learning_rate": 8.852882454880307e-06,
      "loss": 0.0321,
      "step": 368180
    },
    {
      "epoch": 0.6025673756079679,
      "grad_norm": 2.201535940170288,
      "learning_rate": 8.85281656266679e-06,
      "loss": 0.0476,
      "step": 368200
    },
    {
      "epoch": 0.6026001060466213,
      "grad_norm": 1.4635334014892578,
      "learning_rate": 8.852750670453272e-06,
      "loss": 0.0281,
      "step": 368220
    },
    {
      "epoch": 0.6026328364852745,
      "grad_norm": 1.3876582384109497,
      "learning_rate": 8.852684778239756e-06,
      "loss": 0.0357,
      "step": 368240
    },
    {
      "epoch": 0.6026655669239279,
      "grad_norm": 1.1533393859863281,
      "learning_rate": 8.85261888602624e-06,
      "loss": 0.0332,
      "step": 368260
    },
    {
      "epoch": 0.6026982973625813,
      "grad_norm": 2.6030025482177734,
      "learning_rate": 8.852552993812721e-06,
      "loss": 0.0365,
      "step": 368280
    },
    {
      "epoch": 0.6027310278012346,
      "grad_norm": 2.0948994159698486,
      "learning_rate": 8.852487101599205e-06,
      "loss": 0.046,
      "step": 368300
    },
    {
      "epoch": 0.6027637582398879,
      "grad_norm": 1.7586865425109863,
      "learning_rate": 8.852421209385688e-06,
      "loss": 0.0278,
      "step": 368320
    },
    {
      "epoch": 0.6027964886785413,
      "grad_norm": 2.5329301357269287,
      "learning_rate": 8.85235531717217e-06,
      "loss": 0.0476,
      "step": 368340
    },
    {
      "epoch": 0.6028292191171946,
      "grad_norm": 1.7897101640701294,
      "learning_rate": 8.852289424958654e-06,
      "loss": 0.0395,
      "step": 368360
    },
    {
      "epoch": 0.6028619495558479,
      "grad_norm": 0.6840510368347168,
      "learning_rate": 8.852223532745137e-06,
      "loss": 0.0248,
      "step": 368380
    },
    {
      "epoch": 0.6028946799945013,
      "grad_norm": 0.6293777227401733,
      "learning_rate": 8.85215764053162e-06,
      "loss": 0.0448,
      "step": 368400
    },
    {
      "epoch": 0.6029274104331547,
      "grad_norm": 1.9264483451843262,
      "learning_rate": 8.852091748318103e-06,
      "loss": 0.034,
      "step": 368420
    },
    {
      "epoch": 0.6029601408718079,
      "grad_norm": 0.8961892127990723,
      "learning_rate": 8.852025856104585e-06,
      "loss": 0.0349,
      "step": 368440
    },
    {
      "epoch": 0.6029928713104613,
      "grad_norm": 1.2106804847717285,
      "learning_rate": 8.851959963891068e-06,
      "loss": 0.0378,
      "step": 368460
    },
    {
      "epoch": 0.6030256017491147,
      "grad_norm": 1.2850879430770874,
      "learning_rate": 8.85189407167755e-06,
      "loss": 0.0305,
      "step": 368480
    },
    {
      "epoch": 0.6030583321877679,
      "grad_norm": 1.9053981304168701,
      "learning_rate": 8.851828179464034e-06,
      "loss": 0.024,
      "step": 368500
    },
    {
      "epoch": 0.6030910626264213,
      "grad_norm": 0.6280418038368225,
      "learning_rate": 8.851762287250516e-06,
      "loss": 0.032,
      "step": 368520
    },
    {
      "epoch": 0.6031237930650747,
      "grad_norm": 4.974253177642822,
      "learning_rate": 8.851696395037e-06,
      "loss": 0.0318,
      "step": 368540
    },
    {
      "epoch": 0.603156523503728,
      "grad_norm": 6.88668155670166,
      "learning_rate": 8.851630502823481e-06,
      "loss": 0.0419,
      "step": 368560
    },
    {
      "epoch": 0.6031892539423813,
      "grad_norm": 1.9869987964630127,
      "learning_rate": 8.851564610609965e-06,
      "loss": 0.0389,
      "step": 368580
    },
    {
      "epoch": 0.6032219843810347,
      "grad_norm": 0.8262758851051331,
      "learning_rate": 8.851498718396447e-06,
      "loss": 0.0356,
      "step": 368600
    },
    {
      "epoch": 0.6032547148196881,
      "grad_norm": 2.0645596981048584,
      "learning_rate": 8.85143282618293e-06,
      "loss": 0.0464,
      "step": 368620
    },
    {
      "epoch": 0.6032874452583413,
      "grad_norm": 1.4239046573638916,
      "learning_rate": 8.851366933969414e-06,
      "loss": 0.04,
      "step": 368640
    },
    {
      "epoch": 0.6033201756969947,
      "grad_norm": 0.4493481516838074,
      "learning_rate": 8.851301041755896e-06,
      "loss": 0.0395,
      "step": 368660
    },
    {
      "epoch": 0.6033529061356481,
      "grad_norm": 0.593411922454834,
      "learning_rate": 8.85123514954238e-06,
      "loss": 0.0346,
      "step": 368680
    },
    {
      "epoch": 0.6033856365743013,
      "grad_norm": 0.31688714027404785,
      "learning_rate": 8.851169257328863e-06,
      "loss": 0.046,
      "step": 368700
    },
    {
      "epoch": 0.6034183670129547,
      "grad_norm": 0.9504968523979187,
      "learning_rate": 8.851103365115345e-06,
      "loss": 0.025,
      "step": 368720
    },
    {
      "epoch": 0.6034510974516081,
      "grad_norm": 2.461183547973633,
      "learning_rate": 8.851037472901828e-06,
      "loss": 0.0281,
      "step": 368740
    },
    {
      "epoch": 0.6034838278902613,
      "grad_norm": 0.6357726454734802,
      "learning_rate": 8.850971580688312e-06,
      "loss": 0.0466,
      "step": 368760
    },
    {
      "epoch": 0.6035165583289147,
      "grad_norm": 1.0077269077301025,
      "learning_rate": 8.850905688474794e-06,
      "loss": 0.0261,
      "step": 368780
    },
    {
      "epoch": 0.6035492887675681,
      "grad_norm": 1.1944169998168945,
      "learning_rate": 8.850839796261277e-06,
      "loss": 0.0392,
      "step": 368800
    },
    {
      "epoch": 0.6035820192062215,
      "grad_norm": 8.940544128417969,
      "learning_rate": 8.85077390404776e-06,
      "loss": 0.0342,
      "step": 368820
    },
    {
      "epoch": 0.6036147496448747,
      "grad_norm": 2.236818790435791,
      "learning_rate": 8.850708011834243e-06,
      "loss": 0.0277,
      "step": 368840
    },
    {
      "epoch": 0.6036474800835281,
      "grad_norm": 0.6649709343910217,
      "learning_rate": 8.850642119620725e-06,
      "loss": 0.0342,
      "step": 368860
    },
    {
      "epoch": 0.6036802105221815,
      "grad_norm": 2.850236177444458,
      "learning_rate": 8.850576227407208e-06,
      "loss": 0.0406,
      "step": 368880
    },
    {
      "epoch": 0.6037129409608347,
      "grad_norm": 0.17144961655139923,
      "learning_rate": 8.85051033519369e-06,
      "loss": 0.047,
      "step": 368900
    },
    {
      "epoch": 0.6037456713994881,
      "grad_norm": 2.6210668087005615,
      "learning_rate": 8.850444442980174e-06,
      "loss": 0.0358,
      "step": 368920
    },
    {
      "epoch": 0.6037784018381415,
      "grad_norm": 1.326928973197937,
      "learning_rate": 8.850378550766656e-06,
      "loss": 0.0415,
      "step": 368940
    },
    {
      "epoch": 0.6038111322767947,
      "grad_norm": 1.9688713550567627,
      "learning_rate": 8.85031265855314e-06,
      "loss": 0.0318,
      "step": 368960
    },
    {
      "epoch": 0.6038438627154481,
      "grad_norm": 2.212855339050293,
      "learning_rate": 8.850246766339621e-06,
      "loss": 0.0519,
      "step": 368980
    },
    {
      "epoch": 0.6038765931541015,
      "grad_norm": 1.40545654296875,
      "learning_rate": 8.850180874126105e-06,
      "loss": 0.0375,
      "step": 369000
    },
    {
      "epoch": 0.6039093235927548,
      "grad_norm": 0.4532710313796997,
      "learning_rate": 8.850114981912588e-06,
      "loss": 0.0348,
      "step": 369020
    },
    {
      "epoch": 0.6039420540314081,
      "grad_norm": 2.042020320892334,
      "learning_rate": 8.85004908969907e-06,
      "loss": 0.0447,
      "step": 369040
    },
    {
      "epoch": 0.6039747844700615,
      "grad_norm": 1.3562356233596802,
      "learning_rate": 8.849983197485554e-06,
      "loss": 0.0318,
      "step": 369060
    },
    {
      "epoch": 0.6040075149087148,
      "grad_norm": 0.4056580066680908,
      "learning_rate": 8.849917305272036e-06,
      "loss": 0.0306,
      "step": 369080
    },
    {
      "epoch": 0.6040402453473681,
      "grad_norm": 5.533121585845947,
      "learning_rate": 8.84985141305852e-06,
      "loss": 0.0382,
      "step": 369100
    },
    {
      "epoch": 0.6040729757860215,
      "grad_norm": 1.514090895652771,
      "learning_rate": 8.849785520845003e-06,
      "loss": 0.0391,
      "step": 369120
    },
    {
      "epoch": 0.6041057062246749,
      "grad_norm": 1.3797639608383179,
      "learning_rate": 8.849719628631485e-06,
      "loss": 0.0417,
      "step": 369140
    },
    {
      "epoch": 0.6041384366633281,
      "grad_norm": 1.6506648063659668,
      "learning_rate": 8.849653736417968e-06,
      "loss": 0.0465,
      "step": 369160
    },
    {
      "epoch": 0.6041711671019815,
      "grad_norm": 8.435386657714844,
      "learning_rate": 8.849587844204452e-06,
      "loss": 0.0443,
      "step": 369180
    },
    {
      "epoch": 0.6042038975406349,
      "grad_norm": 0.4681294560432434,
      "learning_rate": 8.849521951990934e-06,
      "loss": 0.0273,
      "step": 369200
    },
    {
      "epoch": 0.6042366279792882,
      "grad_norm": 0.4867115914821625,
      "learning_rate": 8.849456059777418e-06,
      "loss": 0.0342,
      "step": 369220
    },
    {
      "epoch": 0.6042693584179415,
      "grad_norm": 2.4311366081237793,
      "learning_rate": 8.8493901675639e-06,
      "loss": 0.0403,
      "step": 369240
    },
    {
      "epoch": 0.6043020888565949,
      "grad_norm": 0.8974113464355469,
      "learning_rate": 8.849324275350383e-06,
      "loss": 0.0381,
      "step": 369260
    },
    {
      "epoch": 0.6043348192952482,
      "grad_norm": 0.8365866541862488,
      "learning_rate": 8.849258383136865e-06,
      "loss": 0.0288,
      "step": 369280
    },
    {
      "epoch": 0.6043675497339015,
      "grad_norm": 2.5180203914642334,
      "learning_rate": 8.849192490923348e-06,
      "loss": 0.0379,
      "step": 369300
    },
    {
      "epoch": 0.6044002801725549,
      "grad_norm": 0.39966970682144165,
      "learning_rate": 8.84912659870983e-06,
      "loss": 0.0388,
      "step": 369320
    },
    {
      "epoch": 0.6044330106112082,
      "grad_norm": 3.1904664039611816,
      "learning_rate": 8.849060706496314e-06,
      "loss": 0.0448,
      "step": 369340
    },
    {
      "epoch": 0.6044657410498615,
      "grad_norm": 1.6867711544036865,
      "learning_rate": 8.848994814282796e-06,
      "loss": 0.0287,
      "step": 369360
    },
    {
      "epoch": 0.6044984714885149,
      "grad_norm": 0.06135893985629082,
      "learning_rate": 8.84892892206928e-06,
      "loss": 0.0379,
      "step": 369380
    },
    {
      "epoch": 0.6045312019271682,
      "grad_norm": 3.034196615219116,
      "learning_rate": 8.848863029855763e-06,
      "loss": 0.0466,
      "step": 369400
    },
    {
      "epoch": 0.6045639323658216,
      "grad_norm": 1.2247140407562256,
      "learning_rate": 8.848797137642245e-06,
      "loss": 0.0349,
      "step": 369420
    },
    {
      "epoch": 0.6045966628044749,
      "grad_norm": 1.6004306077957153,
      "learning_rate": 8.848731245428729e-06,
      "loss": 0.0415,
      "step": 369440
    },
    {
      "epoch": 0.6046293932431283,
      "grad_norm": 1.1207985877990723,
      "learning_rate": 8.84866535321521e-06,
      "loss": 0.0326,
      "step": 369460
    },
    {
      "epoch": 0.6046621236817816,
      "grad_norm": 1.228851079940796,
      "learning_rate": 8.848599461001694e-06,
      "loss": 0.0299,
      "step": 369480
    },
    {
      "epoch": 0.6046948541204349,
      "grad_norm": 0.8822138905525208,
      "learning_rate": 8.848533568788178e-06,
      "loss": 0.0387,
      "step": 369500
    },
    {
      "epoch": 0.6047275845590883,
      "grad_norm": 2.341418504714966,
      "learning_rate": 8.84846767657466e-06,
      "loss": 0.0422,
      "step": 369520
    },
    {
      "epoch": 0.6047603149977416,
      "grad_norm": 0.8766626119613647,
      "learning_rate": 8.848401784361143e-06,
      "loss": 0.0364,
      "step": 369540
    },
    {
      "epoch": 0.6047930454363949,
      "grad_norm": 0.6345072984695435,
      "learning_rate": 8.848335892147627e-06,
      "loss": 0.0339,
      "step": 369560
    },
    {
      "epoch": 0.6048257758750483,
      "grad_norm": 0.32135194540023804,
      "learning_rate": 8.848269999934109e-06,
      "loss": 0.0339,
      "step": 369580
    },
    {
      "epoch": 0.6048585063137016,
      "grad_norm": 1.766616702079773,
      "learning_rate": 8.848204107720592e-06,
      "loss": 0.0269,
      "step": 369600
    },
    {
      "epoch": 0.604891236752355,
      "grad_norm": 0.6241326332092285,
      "learning_rate": 8.848138215507074e-06,
      "loss": 0.04,
      "step": 369620
    },
    {
      "epoch": 0.6049239671910083,
      "grad_norm": 1.7803839445114136,
      "learning_rate": 8.848072323293558e-06,
      "loss": 0.0424,
      "step": 369640
    },
    {
      "epoch": 0.6049566976296616,
      "grad_norm": 0.6030198335647583,
      "learning_rate": 8.84800643108004e-06,
      "loss": 0.038,
      "step": 369660
    },
    {
      "epoch": 0.604989428068315,
      "grad_norm": 1.038923740386963,
      "learning_rate": 8.847940538866523e-06,
      "loss": 0.0357,
      "step": 369680
    },
    {
      "epoch": 0.6050221585069683,
      "grad_norm": 0.7712751626968384,
      "learning_rate": 8.847874646653005e-06,
      "loss": 0.0407,
      "step": 369700
    },
    {
      "epoch": 0.6050548889456216,
      "grad_norm": 1.0451390743255615,
      "learning_rate": 8.847808754439489e-06,
      "loss": 0.0281,
      "step": 369720
    },
    {
      "epoch": 0.605087619384275,
      "grad_norm": 0.7880561351776123,
      "learning_rate": 8.847742862225972e-06,
      "loss": 0.0384,
      "step": 369740
    },
    {
      "epoch": 0.6051203498229283,
      "grad_norm": 0.7325701117515564,
      "learning_rate": 8.847676970012454e-06,
      "loss": 0.0319,
      "step": 369760
    },
    {
      "epoch": 0.6051530802615817,
      "grad_norm": 1.8035249710083008,
      "learning_rate": 8.847611077798938e-06,
      "loss": 0.0396,
      "step": 369780
    },
    {
      "epoch": 0.605185810700235,
      "grad_norm": 1.0352888107299805,
      "learning_rate": 8.84754518558542e-06,
      "loss": 0.0424,
      "step": 369800
    },
    {
      "epoch": 0.6052185411388883,
      "grad_norm": 0.7291631102561951,
      "learning_rate": 8.847479293371903e-06,
      "loss": 0.0331,
      "step": 369820
    },
    {
      "epoch": 0.6052512715775417,
      "grad_norm": 0.638388991355896,
      "learning_rate": 8.847413401158385e-06,
      "loss": 0.0334,
      "step": 369840
    },
    {
      "epoch": 0.605284002016195,
      "grad_norm": 0.2766450047492981,
      "learning_rate": 8.847347508944869e-06,
      "loss": 0.0334,
      "step": 369860
    },
    {
      "epoch": 0.6053167324548484,
      "grad_norm": 3.480036973953247,
      "learning_rate": 8.847281616731352e-06,
      "loss": 0.0355,
      "step": 369880
    },
    {
      "epoch": 0.6053494628935017,
      "grad_norm": 0.17997661232948303,
      "learning_rate": 8.847215724517834e-06,
      "loss": 0.0361,
      "step": 369900
    },
    {
      "epoch": 0.605382193332155,
      "grad_norm": 0.5991906523704529,
      "learning_rate": 8.847149832304318e-06,
      "loss": 0.0325,
      "step": 369920
    },
    {
      "epoch": 0.6054149237708084,
      "grad_norm": 0.8630810379981995,
      "learning_rate": 8.847083940090801e-06,
      "loss": 0.0392,
      "step": 369940
    },
    {
      "epoch": 0.6054476542094617,
      "grad_norm": 1.185330867767334,
      "learning_rate": 8.847018047877283e-06,
      "loss": 0.045,
      "step": 369960
    },
    {
      "epoch": 0.605480384648115,
      "grad_norm": 2.0236105918884277,
      "learning_rate": 8.846952155663767e-06,
      "loss": 0.0403,
      "step": 369980
    },
    {
      "epoch": 0.6055131150867684,
      "grad_norm": 0.4521158039569855,
      "learning_rate": 8.846886263450249e-06,
      "loss": 0.0425,
      "step": 370000
    },
    {
      "epoch": 0.6055458455254217,
      "grad_norm": 0.563054621219635,
      "learning_rate": 8.846820371236732e-06,
      "loss": 0.037,
      "step": 370020
    },
    {
      "epoch": 0.605578575964075,
      "grad_norm": 0.3865296542644501,
      "learning_rate": 8.846754479023214e-06,
      "loss": 0.0408,
      "step": 370040
    },
    {
      "epoch": 0.6056113064027284,
      "grad_norm": 1.0031670331954956,
      "learning_rate": 8.846688586809698e-06,
      "loss": 0.0476,
      "step": 370060
    },
    {
      "epoch": 0.6056440368413818,
      "grad_norm": 0.3483555316925049,
      "learning_rate": 8.846622694596181e-06,
      "loss": 0.0228,
      "step": 370080
    },
    {
      "epoch": 0.605676767280035,
      "grad_norm": 1.5974897146224976,
      "learning_rate": 8.846556802382663e-06,
      "loss": 0.0393,
      "step": 370100
    },
    {
      "epoch": 0.6057094977186884,
      "grad_norm": 1.4674237966537476,
      "learning_rate": 8.846490910169147e-06,
      "loss": 0.039,
      "step": 370120
    },
    {
      "epoch": 0.6057422281573418,
      "grad_norm": 1.1849721670150757,
      "learning_rate": 8.846425017955629e-06,
      "loss": 0.044,
      "step": 370140
    },
    {
      "epoch": 0.6057749585959951,
      "grad_norm": 1.0973879098892212,
      "learning_rate": 8.846359125742112e-06,
      "loss": 0.0478,
      "step": 370160
    },
    {
      "epoch": 0.6058076890346484,
      "grad_norm": 0.796331524848938,
      "learning_rate": 8.846293233528594e-06,
      "loss": 0.032,
      "step": 370180
    },
    {
      "epoch": 0.6058404194733018,
      "grad_norm": 0.2301715463399887,
      "learning_rate": 8.846227341315078e-06,
      "loss": 0.0337,
      "step": 370200
    },
    {
      "epoch": 0.6058731499119551,
      "grad_norm": 1.0045747756958008,
      "learning_rate": 8.84616144910156e-06,
      "loss": 0.0333,
      "step": 370220
    },
    {
      "epoch": 0.6059058803506084,
      "grad_norm": 0.7795597314834595,
      "learning_rate": 8.846095556888043e-06,
      "loss": 0.0351,
      "step": 370240
    },
    {
      "epoch": 0.6059386107892618,
      "grad_norm": 0.82316654920578,
      "learning_rate": 8.846029664674525e-06,
      "loss": 0.0363,
      "step": 370260
    },
    {
      "epoch": 0.6059713412279152,
      "grad_norm": 0.4891568124294281,
      "learning_rate": 8.845963772461009e-06,
      "loss": 0.0312,
      "step": 370280
    },
    {
      "epoch": 0.6060040716665684,
      "grad_norm": 1.2070165872573853,
      "learning_rate": 8.845897880247492e-06,
      "loss": 0.0362,
      "step": 370300
    },
    {
      "epoch": 0.6060368021052218,
      "grad_norm": 2.0678200721740723,
      "learning_rate": 8.845831988033974e-06,
      "loss": 0.0381,
      "step": 370320
    },
    {
      "epoch": 0.6060695325438752,
      "grad_norm": 1.0803661346435547,
      "learning_rate": 8.845766095820458e-06,
      "loss": 0.036,
      "step": 370340
    },
    {
      "epoch": 0.6061022629825285,
      "grad_norm": 0.7159910202026367,
      "learning_rate": 8.845700203606941e-06,
      "loss": 0.0317,
      "step": 370360
    },
    {
      "epoch": 0.6061349934211818,
      "grad_norm": 1.3013778924942017,
      "learning_rate": 8.845634311393423e-06,
      "loss": 0.034,
      "step": 370380
    },
    {
      "epoch": 0.6061677238598352,
      "grad_norm": 0.8127827048301697,
      "learning_rate": 8.845568419179907e-06,
      "loss": 0.0368,
      "step": 370400
    },
    {
      "epoch": 0.6062004542984885,
      "grad_norm": 2.049326181411743,
      "learning_rate": 8.845502526966389e-06,
      "loss": 0.0423,
      "step": 370420
    },
    {
      "epoch": 0.6062331847371418,
      "grad_norm": 1.176937222480774,
      "learning_rate": 8.845436634752872e-06,
      "loss": 0.0386,
      "step": 370440
    },
    {
      "epoch": 0.6062659151757952,
      "grad_norm": 2.057588577270508,
      "learning_rate": 8.845370742539356e-06,
      "loss": 0.0348,
      "step": 370460
    },
    {
      "epoch": 0.6062986456144486,
      "grad_norm": 1.056774616241455,
      "learning_rate": 8.845304850325838e-06,
      "loss": 0.0365,
      "step": 370480
    },
    {
      "epoch": 0.6063313760531018,
      "grad_norm": 6.833432197570801,
      "learning_rate": 8.845238958112321e-06,
      "loss": 0.0299,
      "step": 370500
    },
    {
      "epoch": 0.6063641064917552,
      "grad_norm": 0.18216723203659058,
      "learning_rate": 8.845173065898803e-06,
      "loss": 0.0439,
      "step": 370520
    },
    {
      "epoch": 0.6063968369304086,
      "grad_norm": 1.2740439176559448,
      "learning_rate": 8.845107173685287e-06,
      "loss": 0.0263,
      "step": 370540
    },
    {
      "epoch": 0.6064295673690618,
      "grad_norm": 0.588431715965271,
      "learning_rate": 8.845041281471769e-06,
      "loss": 0.0342,
      "step": 370560
    },
    {
      "epoch": 0.6064622978077152,
      "grad_norm": 0.6691229939460754,
      "learning_rate": 8.844975389258252e-06,
      "loss": 0.0314,
      "step": 370580
    },
    {
      "epoch": 0.6064950282463686,
      "grad_norm": 0.7682489156723022,
      "learning_rate": 8.844909497044734e-06,
      "loss": 0.0347,
      "step": 370600
    },
    {
      "epoch": 0.6065277586850218,
      "grad_norm": 0.8889361023902893,
      "learning_rate": 8.844843604831218e-06,
      "loss": 0.0384,
      "step": 370620
    },
    {
      "epoch": 0.6065604891236752,
      "grad_norm": 1.956680417060852,
      "learning_rate": 8.8447777126177e-06,
      "loss": 0.0188,
      "step": 370640
    },
    {
      "epoch": 0.6065932195623286,
      "grad_norm": 10.193772315979004,
      "learning_rate": 8.844711820404183e-06,
      "loss": 0.0299,
      "step": 370660
    },
    {
      "epoch": 0.606625950000982,
      "grad_norm": 1.5290197134017944,
      "learning_rate": 8.844645928190667e-06,
      "loss": 0.0316,
      "step": 370680
    },
    {
      "epoch": 0.6066586804396352,
      "grad_norm": 0.35184597969055176,
      "learning_rate": 8.844580035977149e-06,
      "loss": 0.035,
      "step": 370700
    },
    {
      "epoch": 0.6066914108782886,
      "grad_norm": 1.5702619552612305,
      "learning_rate": 8.844514143763632e-06,
      "loss": 0.0329,
      "step": 370720
    },
    {
      "epoch": 0.606724141316942,
      "grad_norm": 1.3836642503738403,
      "learning_rate": 8.844448251550116e-06,
      "loss": 0.0252,
      "step": 370740
    },
    {
      "epoch": 0.6067568717555952,
      "grad_norm": 0.5444176197052002,
      "learning_rate": 8.844382359336598e-06,
      "loss": 0.0367,
      "step": 370760
    },
    {
      "epoch": 0.6067896021942486,
      "grad_norm": 2.3809192180633545,
      "learning_rate": 8.844316467123081e-06,
      "loss": 0.0474,
      "step": 370780
    },
    {
      "epoch": 0.606822332632902,
      "grad_norm": 4.246231555938721,
      "learning_rate": 8.844250574909565e-06,
      "loss": 0.0368,
      "step": 370800
    },
    {
      "epoch": 0.6068550630715552,
      "grad_norm": 1.3464993238449097,
      "learning_rate": 8.844184682696047e-06,
      "loss": 0.0327,
      "step": 370820
    },
    {
      "epoch": 0.6068877935102086,
      "grad_norm": 0.7086213231086731,
      "learning_rate": 8.84411879048253e-06,
      "loss": 0.0354,
      "step": 370840
    },
    {
      "epoch": 0.606920523948862,
      "grad_norm": 1.2839908599853516,
      "learning_rate": 8.844052898269012e-06,
      "loss": 0.0421,
      "step": 370860
    },
    {
      "epoch": 0.6069532543875154,
      "grad_norm": 1.161309838294983,
      "learning_rate": 8.843987006055496e-06,
      "loss": 0.0399,
      "step": 370880
    },
    {
      "epoch": 0.6069859848261686,
      "grad_norm": 0.8390875458717346,
      "learning_rate": 8.843921113841978e-06,
      "loss": 0.0501,
      "step": 370900
    },
    {
      "epoch": 0.607018715264822,
      "grad_norm": 0.7584209442138672,
      "learning_rate": 8.843855221628461e-06,
      "loss": 0.0322,
      "step": 370920
    },
    {
      "epoch": 0.6070514457034754,
      "grad_norm": 2.5195648670196533,
      "learning_rate": 8.843789329414943e-06,
      "loss": 0.046,
      "step": 370940
    },
    {
      "epoch": 0.6070841761421286,
      "grad_norm": 0.5493484139442444,
      "learning_rate": 8.843723437201427e-06,
      "loss": 0.0378,
      "step": 370960
    },
    {
      "epoch": 0.607116906580782,
      "grad_norm": 0.06495493650436401,
      "learning_rate": 8.843657544987909e-06,
      "loss": 0.033,
      "step": 370980
    },
    {
      "epoch": 0.6071496370194354,
      "grad_norm": 0.9090807437896729,
      "learning_rate": 8.843591652774392e-06,
      "loss": 0.0327,
      "step": 371000
    },
    {
      "epoch": 0.6071823674580886,
      "grad_norm": 2.283350706100464,
      "learning_rate": 8.843525760560874e-06,
      "loss": 0.0346,
      "step": 371020
    },
    {
      "epoch": 0.607215097896742,
      "grad_norm": 1.4336273670196533,
      "learning_rate": 8.843459868347358e-06,
      "loss": 0.036,
      "step": 371040
    },
    {
      "epoch": 0.6072478283353954,
      "grad_norm": 1.8462010622024536,
      "learning_rate": 8.84339397613384e-06,
      "loss": 0.0368,
      "step": 371060
    },
    {
      "epoch": 0.6072805587740487,
      "grad_norm": 0.6554203629493713,
      "learning_rate": 8.843328083920323e-06,
      "loss": 0.035,
      "step": 371080
    },
    {
      "epoch": 0.607313289212702,
      "grad_norm": 2.812753438949585,
      "learning_rate": 8.843262191706807e-06,
      "loss": 0.0443,
      "step": 371100
    },
    {
      "epoch": 0.6073460196513554,
      "grad_norm": 0.8850661516189575,
      "learning_rate": 8.843196299493289e-06,
      "loss": 0.0301,
      "step": 371120
    },
    {
      "epoch": 0.6073787500900087,
      "grad_norm": 1.6140849590301514,
      "learning_rate": 8.843130407279772e-06,
      "loss": 0.0371,
      "step": 371140
    },
    {
      "epoch": 0.607411480528662,
      "grad_norm": 2.814737558364868,
      "learning_rate": 8.843064515066256e-06,
      "loss": 0.0407,
      "step": 371160
    },
    {
      "epoch": 0.6074442109673154,
      "grad_norm": 0.7482482194900513,
      "learning_rate": 8.842998622852738e-06,
      "loss": 0.0484,
      "step": 371180
    },
    {
      "epoch": 0.6074769414059688,
      "grad_norm": 0.6279591917991638,
      "learning_rate": 8.842932730639221e-06,
      "loss": 0.0254,
      "step": 371200
    },
    {
      "epoch": 0.607509671844622,
      "grad_norm": 4.073598861694336,
      "learning_rate": 8.842866838425705e-06,
      "loss": 0.0431,
      "step": 371220
    },
    {
      "epoch": 0.6075424022832754,
      "grad_norm": 5.64381217956543,
      "learning_rate": 8.842800946212187e-06,
      "loss": 0.0375,
      "step": 371240
    },
    {
      "epoch": 0.6075751327219288,
      "grad_norm": 1.4670910835266113,
      "learning_rate": 8.84273505399867e-06,
      "loss": 0.0513,
      "step": 371260
    },
    {
      "epoch": 0.6076078631605821,
      "grad_norm": 0.8807713389396667,
      "learning_rate": 8.842669161785152e-06,
      "loss": 0.0405,
      "step": 371280
    },
    {
      "epoch": 0.6076405935992354,
      "grad_norm": 2.9418225288391113,
      "learning_rate": 8.842603269571636e-06,
      "loss": 0.0341,
      "step": 371300
    },
    {
      "epoch": 0.6076733240378888,
      "grad_norm": 1.7874516248703003,
      "learning_rate": 8.842537377358118e-06,
      "loss": 0.049,
      "step": 371320
    },
    {
      "epoch": 0.6077060544765421,
      "grad_norm": 2.3257815837860107,
      "learning_rate": 8.842471485144601e-06,
      "loss": 0.0327,
      "step": 371340
    },
    {
      "epoch": 0.6077387849151954,
      "grad_norm": 0.48628443479537964,
      "learning_rate": 8.842405592931083e-06,
      "loss": 0.0295,
      "step": 371360
    },
    {
      "epoch": 0.6077715153538488,
      "grad_norm": 0.774696946144104,
      "learning_rate": 8.842339700717567e-06,
      "loss": 0.0331,
      "step": 371380
    },
    {
      "epoch": 0.6078042457925021,
      "grad_norm": 0.5772687196731567,
      "learning_rate": 8.842273808504049e-06,
      "loss": 0.042,
      "step": 371400
    },
    {
      "epoch": 0.6078369762311554,
      "grad_norm": 1.2605112791061401,
      "learning_rate": 8.842207916290532e-06,
      "loss": 0.0306,
      "step": 371420
    },
    {
      "epoch": 0.6078697066698088,
      "grad_norm": 1.1443183422088623,
      "learning_rate": 8.842142024077014e-06,
      "loss": 0.0267,
      "step": 371440
    },
    {
      "epoch": 0.6079024371084621,
      "grad_norm": 3.3833978176116943,
      "learning_rate": 8.842076131863498e-06,
      "loss": 0.0393,
      "step": 371460
    },
    {
      "epoch": 0.6079351675471155,
      "grad_norm": 1.0351314544677734,
      "learning_rate": 8.842010239649982e-06,
      "loss": 0.0407,
      "step": 371480
    },
    {
      "epoch": 0.6079678979857688,
      "grad_norm": 0.7431915998458862,
      "learning_rate": 8.841944347436463e-06,
      "loss": 0.027,
      "step": 371500
    },
    {
      "epoch": 0.6080006284244222,
      "grad_norm": 1.7561063766479492,
      "learning_rate": 8.841878455222947e-06,
      "loss": 0.0327,
      "step": 371520
    },
    {
      "epoch": 0.6080333588630755,
      "grad_norm": 1.4355318546295166,
      "learning_rate": 8.84181256300943e-06,
      "loss": 0.0329,
      "step": 371540
    },
    {
      "epoch": 0.6080660893017288,
      "grad_norm": 2.144411325454712,
      "learning_rate": 8.841746670795912e-06,
      "loss": 0.0393,
      "step": 371560
    },
    {
      "epoch": 0.6080988197403822,
      "grad_norm": 0.8948083519935608,
      "learning_rate": 8.841680778582396e-06,
      "loss": 0.0367,
      "step": 371580
    },
    {
      "epoch": 0.6081315501790355,
      "grad_norm": 1.5502816438674927,
      "learning_rate": 8.84161488636888e-06,
      "loss": 0.0466,
      "step": 371600
    },
    {
      "epoch": 0.6081642806176888,
      "grad_norm": 2.2081375122070312,
      "learning_rate": 8.841548994155362e-06,
      "loss": 0.0359,
      "step": 371620
    },
    {
      "epoch": 0.6081970110563422,
      "grad_norm": 2.828932046890259,
      "learning_rate": 8.841483101941845e-06,
      "loss": 0.032,
      "step": 371640
    },
    {
      "epoch": 0.6082297414949955,
      "grad_norm": 0.3195360600948334,
      "learning_rate": 8.841417209728327e-06,
      "loss": 0.0281,
      "step": 371660
    },
    {
      "epoch": 0.6082624719336489,
      "grad_norm": 0.5338629484176636,
      "learning_rate": 8.84135131751481e-06,
      "loss": 0.0288,
      "step": 371680
    },
    {
      "epoch": 0.6082952023723022,
      "grad_norm": 1.4522031545639038,
      "learning_rate": 8.841285425301292e-06,
      "loss": 0.0502,
      "step": 371700
    },
    {
      "epoch": 0.6083279328109555,
      "grad_norm": 1.742577314376831,
      "learning_rate": 8.841219533087776e-06,
      "loss": 0.0311,
      "step": 371720
    },
    {
      "epoch": 0.6083606632496089,
      "grad_norm": 1.154674768447876,
      "learning_rate": 8.841153640874258e-06,
      "loss": 0.0213,
      "step": 371740
    },
    {
      "epoch": 0.6083933936882622,
      "grad_norm": 1.5661619901657104,
      "learning_rate": 8.841087748660742e-06,
      "loss": 0.0256,
      "step": 371760
    },
    {
      "epoch": 0.6084261241269155,
      "grad_norm": 3.6630101203918457,
      "learning_rate": 8.841021856447223e-06,
      "loss": 0.0509,
      "step": 371780
    },
    {
      "epoch": 0.6084588545655689,
      "grad_norm": 1.8486803770065308,
      "learning_rate": 8.840955964233707e-06,
      "loss": 0.0399,
      "step": 371800
    },
    {
      "epoch": 0.6084915850042222,
      "grad_norm": 0.5968304872512817,
      "learning_rate": 8.840890072020189e-06,
      "loss": 0.0388,
      "step": 371820
    },
    {
      "epoch": 0.6085243154428756,
      "grad_norm": 1.6234850883483887,
      "learning_rate": 8.840824179806673e-06,
      "loss": 0.0418,
      "step": 371840
    },
    {
      "epoch": 0.6085570458815289,
      "grad_norm": 1.0624788999557495,
      "learning_rate": 8.840758287593156e-06,
      "loss": 0.038,
      "step": 371860
    },
    {
      "epoch": 0.6085897763201823,
      "grad_norm": 0.34531086683273315,
      "learning_rate": 8.840692395379638e-06,
      "loss": 0.0279,
      "step": 371880
    },
    {
      "epoch": 0.6086225067588356,
      "grad_norm": 4.1358771324157715,
      "learning_rate": 8.840626503166122e-06,
      "loss": 0.0376,
      "step": 371900
    },
    {
      "epoch": 0.6086552371974889,
      "grad_norm": 2.024458169937134,
      "learning_rate": 8.840560610952603e-06,
      "loss": 0.0331,
      "step": 371920
    },
    {
      "epoch": 0.6086879676361423,
      "grad_norm": 2.288236379623413,
      "learning_rate": 8.840494718739087e-06,
      "loss": 0.0353,
      "step": 371940
    },
    {
      "epoch": 0.6087206980747956,
      "grad_norm": 1.598663091659546,
      "learning_rate": 8.84042882652557e-06,
      "loss": 0.0466,
      "step": 371960
    },
    {
      "epoch": 0.6087534285134489,
      "grad_norm": 1.789319634437561,
      "learning_rate": 8.840362934312053e-06,
      "loss": 0.0364,
      "step": 371980
    },
    {
      "epoch": 0.6087861589521023,
      "grad_norm": 1.5428199768066406,
      "learning_rate": 8.840297042098536e-06,
      "loss": 0.035,
      "step": 372000
    },
    {
      "epoch": 0.6088188893907556,
      "grad_norm": 2.787423849105835,
      "learning_rate": 8.84023114988502e-06,
      "loss": 0.0509,
      "step": 372020
    },
    {
      "epoch": 0.6088516198294089,
      "grad_norm": 1.5418927669525146,
      "learning_rate": 8.840165257671502e-06,
      "loss": 0.0313,
      "step": 372040
    },
    {
      "epoch": 0.6088843502680623,
      "grad_norm": 1.4899581670761108,
      "learning_rate": 8.840099365457985e-06,
      "loss": 0.0339,
      "step": 372060
    },
    {
      "epoch": 0.6089170807067157,
      "grad_norm": 1.3211712837219238,
      "learning_rate": 8.840033473244467e-06,
      "loss": 0.0382,
      "step": 372080
    },
    {
      "epoch": 0.608949811145369,
      "grad_norm": 0.5094437003135681,
      "learning_rate": 8.83996758103095e-06,
      "loss": 0.0367,
      "step": 372100
    },
    {
      "epoch": 0.6089825415840223,
      "grad_norm": 0.8007943034172058,
      "learning_rate": 8.839901688817433e-06,
      "loss": 0.0281,
      "step": 372120
    },
    {
      "epoch": 0.6090152720226757,
      "grad_norm": 0.4873429536819458,
      "learning_rate": 8.839835796603916e-06,
      "loss": 0.033,
      "step": 372140
    },
    {
      "epoch": 0.609048002461329,
      "grad_norm": 0.8014602661132812,
      "learning_rate": 8.839769904390398e-06,
      "loss": 0.027,
      "step": 372160
    },
    {
      "epoch": 0.6090807328999823,
      "grad_norm": 1.8826274871826172,
      "learning_rate": 8.839704012176882e-06,
      "loss": 0.0311,
      "step": 372180
    },
    {
      "epoch": 0.6091134633386357,
      "grad_norm": 0.5212364196777344,
      "learning_rate": 8.839638119963365e-06,
      "loss": 0.0358,
      "step": 372200
    },
    {
      "epoch": 0.609146193777289,
      "grad_norm": 0.7290996313095093,
      "learning_rate": 8.839572227749847e-06,
      "loss": 0.0497,
      "step": 372220
    },
    {
      "epoch": 0.6091789242159423,
      "grad_norm": 1.6983494758605957,
      "learning_rate": 8.83950633553633e-06,
      "loss": 0.0329,
      "step": 372240
    },
    {
      "epoch": 0.6092116546545957,
      "grad_norm": 1.696913480758667,
      "learning_rate": 8.839440443322813e-06,
      "loss": 0.045,
      "step": 372260
    },
    {
      "epoch": 0.6092443850932491,
      "grad_norm": 0.9163705110549927,
      "learning_rate": 8.839374551109296e-06,
      "loss": 0.0286,
      "step": 372280
    },
    {
      "epoch": 0.6092771155319023,
      "grad_norm": 3.349443197250366,
      "learning_rate": 8.839308658895778e-06,
      "loss": 0.0428,
      "step": 372300
    },
    {
      "epoch": 0.6093098459705557,
      "grad_norm": 0.5987571477890015,
      "learning_rate": 8.839242766682262e-06,
      "loss": 0.0241,
      "step": 372320
    },
    {
      "epoch": 0.6093425764092091,
      "grad_norm": 0.4255512058734894,
      "learning_rate": 8.839176874468745e-06,
      "loss": 0.0299,
      "step": 372340
    },
    {
      "epoch": 0.6093753068478623,
      "grad_norm": 0.5856485962867737,
      "learning_rate": 8.839110982255227e-06,
      "loss": 0.0383,
      "step": 372360
    },
    {
      "epoch": 0.6094080372865157,
      "grad_norm": 0.8816548585891724,
      "learning_rate": 8.83904509004171e-06,
      "loss": 0.0288,
      "step": 372380
    },
    {
      "epoch": 0.6094407677251691,
      "grad_norm": 0.4503994584083557,
      "learning_rate": 8.838979197828194e-06,
      "loss": 0.0321,
      "step": 372400
    },
    {
      "epoch": 0.6094734981638223,
      "grad_norm": 0.9932576417922974,
      "learning_rate": 8.838913305614676e-06,
      "loss": 0.0531,
      "step": 372420
    },
    {
      "epoch": 0.6095062286024757,
      "grad_norm": 0.3707660436630249,
      "learning_rate": 8.83884741340116e-06,
      "loss": 0.0281,
      "step": 372440
    },
    {
      "epoch": 0.6095389590411291,
      "grad_norm": 1.2034419775009155,
      "learning_rate": 8.838781521187642e-06,
      "loss": 0.0331,
      "step": 372460
    },
    {
      "epoch": 0.6095716894797824,
      "grad_norm": 1.192012071609497,
      "learning_rate": 8.838715628974125e-06,
      "loss": 0.0308,
      "step": 372480
    },
    {
      "epoch": 0.6096044199184357,
      "grad_norm": 1.4318982362747192,
      "learning_rate": 8.838649736760607e-06,
      "loss": 0.0455,
      "step": 372500
    },
    {
      "epoch": 0.6096371503570891,
      "grad_norm": 2.422372341156006,
      "learning_rate": 8.83858384454709e-06,
      "loss": 0.0303,
      "step": 372520
    },
    {
      "epoch": 0.6096698807957425,
      "grad_norm": 0.496224969625473,
      "learning_rate": 8.838517952333574e-06,
      "loss": 0.0271,
      "step": 372540
    },
    {
      "epoch": 0.6097026112343957,
      "grad_norm": 1.4847378730773926,
      "learning_rate": 8.838452060120056e-06,
      "loss": 0.0272,
      "step": 372560
    },
    {
      "epoch": 0.6097353416730491,
      "grad_norm": 0.930992066860199,
      "learning_rate": 8.83838616790654e-06,
      "loss": 0.0398,
      "step": 372580
    },
    {
      "epoch": 0.6097680721117025,
      "grad_norm": 0.3022134006023407,
      "learning_rate": 8.838320275693022e-06,
      "loss": 0.0367,
      "step": 372600
    },
    {
      "epoch": 0.6098008025503557,
      "grad_norm": 0.4749892055988312,
      "learning_rate": 8.838254383479505e-06,
      "loss": 0.0269,
      "step": 372620
    },
    {
      "epoch": 0.6098335329890091,
      "grad_norm": 1.1187453269958496,
      "learning_rate": 8.838188491265987e-06,
      "loss": 0.0461,
      "step": 372640
    },
    {
      "epoch": 0.6098662634276625,
      "grad_norm": 2.9160211086273193,
      "learning_rate": 8.83812259905247e-06,
      "loss": 0.0518,
      "step": 372660
    },
    {
      "epoch": 0.6098989938663157,
      "grad_norm": 0.8151524662971497,
      "learning_rate": 8.838056706838953e-06,
      "loss": 0.0391,
      "step": 372680
    },
    {
      "epoch": 0.6099317243049691,
      "grad_norm": 1.9120436906814575,
      "learning_rate": 8.837990814625436e-06,
      "loss": 0.0381,
      "step": 372700
    },
    {
      "epoch": 0.6099644547436225,
      "grad_norm": 1.1415334939956665,
      "learning_rate": 8.83792492241192e-06,
      "loss": 0.0316,
      "step": 372720
    },
    {
      "epoch": 0.6099971851822759,
      "grad_norm": 3.4479386806488037,
      "learning_rate": 8.837859030198402e-06,
      "loss": 0.0445,
      "step": 372740
    },
    {
      "epoch": 0.6100299156209291,
      "grad_norm": 1.2252483367919922,
      "learning_rate": 8.837793137984885e-06,
      "loss": 0.044,
      "step": 372760
    },
    {
      "epoch": 0.6100626460595825,
      "grad_norm": 4.355083465576172,
      "learning_rate": 8.837727245771369e-06,
      "loss": 0.0283,
      "step": 372780
    },
    {
      "epoch": 0.6100953764982359,
      "grad_norm": 1.3429713249206543,
      "learning_rate": 8.83766135355785e-06,
      "loss": 0.0343,
      "step": 372800
    },
    {
      "epoch": 0.6101281069368891,
      "grad_norm": 1.7025688886642456,
      "learning_rate": 8.837595461344334e-06,
      "loss": 0.045,
      "step": 372820
    },
    {
      "epoch": 0.6101608373755425,
      "grad_norm": 2.424548387527466,
      "learning_rate": 8.837529569130816e-06,
      "loss": 0.0256,
      "step": 372840
    },
    {
      "epoch": 0.6101935678141959,
      "grad_norm": 1.7791281938552856,
      "learning_rate": 8.8374636769173e-06,
      "loss": 0.0394,
      "step": 372860
    },
    {
      "epoch": 0.6102262982528491,
      "grad_norm": 1.8021299839019775,
      "learning_rate": 8.837397784703782e-06,
      "loss": 0.0467,
      "step": 372880
    },
    {
      "epoch": 0.6102590286915025,
      "grad_norm": 2.1039841175079346,
      "learning_rate": 8.837331892490265e-06,
      "loss": 0.0496,
      "step": 372900
    },
    {
      "epoch": 0.6102917591301559,
      "grad_norm": 1.2160634994506836,
      "learning_rate": 8.837266000276749e-06,
      "loss": 0.0386,
      "step": 372920
    },
    {
      "epoch": 0.6103244895688092,
      "grad_norm": 0.7823880910873413,
      "learning_rate": 8.83720010806323e-06,
      "loss": 0.0299,
      "step": 372940
    },
    {
      "epoch": 0.6103572200074625,
      "grad_norm": 1.1884807348251343,
      "learning_rate": 8.837134215849714e-06,
      "loss": 0.0323,
      "step": 372960
    },
    {
      "epoch": 0.6103899504461159,
      "grad_norm": 0.9881445169448853,
      "learning_rate": 8.837068323636196e-06,
      "loss": 0.0352,
      "step": 372980
    },
    {
      "epoch": 0.6104226808847693,
      "grad_norm": 0.33802303671836853,
      "learning_rate": 8.83700243142268e-06,
      "loss": 0.032,
      "step": 373000
    },
    {
      "epoch": 0.6104554113234225,
      "grad_norm": 1.2915863990783691,
      "learning_rate": 8.836936539209162e-06,
      "loss": 0.0252,
      "step": 373020
    },
    {
      "epoch": 0.6104881417620759,
      "grad_norm": 0.8405905961990356,
      "learning_rate": 8.836870646995645e-06,
      "loss": 0.0386,
      "step": 373040
    },
    {
      "epoch": 0.6105208722007293,
      "grad_norm": 0.6757828593254089,
      "learning_rate": 8.836804754782127e-06,
      "loss": 0.0476,
      "step": 373060
    },
    {
      "epoch": 0.6105536026393825,
      "grad_norm": 0.6059618592262268,
      "learning_rate": 8.836738862568611e-06,
      "loss": 0.0248,
      "step": 373080
    },
    {
      "epoch": 0.6105863330780359,
      "grad_norm": 0.7224711179733276,
      "learning_rate": 8.836672970355093e-06,
      "loss": 0.0467,
      "step": 373100
    },
    {
      "epoch": 0.6106190635166893,
      "grad_norm": 2.403164863586426,
      "learning_rate": 8.836607078141576e-06,
      "loss": 0.0509,
      "step": 373120
    },
    {
      "epoch": 0.6106517939553426,
      "grad_norm": 0.3505803048610687,
      "learning_rate": 8.83654118592806e-06,
      "loss": 0.033,
      "step": 373140
    },
    {
      "epoch": 0.6106845243939959,
      "grad_norm": 1.1227607727050781,
      "learning_rate": 8.836475293714542e-06,
      "loss": 0.0302,
      "step": 373160
    },
    {
      "epoch": 0.6107172548326493,
      "grad_norm": 1.1944855451583862,
      "learning_rate": 8.836409401501025e-06,
      "loss": 0.0344,
      "step": 373180
    },
    {
      "epoch": 0.6107499852713026,
      "grad_norm": 0.9919944405555725,
      "learning_rate": 8.836343509287509e-06,
      "loss": 0.0436,
      "step": 373200
    },
    {
      "epoch": 0.6107827157099559,
      "grad_norm": 1.3170623779296875,
      "learning_rate": 8.836277617073991e-06,
      "loss": 0.0319,
      "step": 373220
    },
    {
      "epoch": 0.6108154461486093,
      "grad_norm": 2.4285926818847656,
      "learning_rate": 8.836211724860474e-06,
      "loss": 0.0414,
      "step": 373240
    },
    {
      "epoch": 0.6108481765872626,
      "grad_norm": 1.4527732133865356,
      "learning_rate": 8.836145832646958e-06,
      "loss": 0.0344,
      "step": 373260
    },
    {
      "epoch": 0.6108809070259159,
      "grad_norm": 1.5353385210037231,
      "learning_rate": 8.83607994043344e-06,
      "loss": 0.0413,
      "step": 373280
    },
    {
      "epoch": 0.6109136374645693,
      "grad_norm": 0.5744847059249878,
      "learning_rate": 8.836014048219924e-06,
      "loss": 0.0363,
      "step": 373300
    },
    {
      "epoch": 0.6109463679032227,
      "grad_norm": 2.2874789237976074,
      "learning_rate": 8.835948156006405e-06,
      "loss": 0.0425,
      "step": 373320
    },
    {
      "epoch": 0.610979098341876,
      "grad_norm": 4.1744537353515625,
      "learning_rate": 8.835882263792889e-06,
      "loss": 0.0449,
      "step": 373340
    },
    {
      "epoch": 0.6110118287805293,
      "grad_norm": 0.4625625014305115,
      "learning_rate": 8.835816371579371e-06,
      "loss": 0.0375,
      "step": 373360
    },
    {
      "epoch": 0.6110445592191827,
      "grad_norm": 1.1338825225830078,
      "learning_rate": 8.835750479365854e-06,
      "loss": 0.0289,
      "step": 373380
    },
    {
      "epoch": 0.611077289657836,
      "grad_norm": 0.8499294519424438,
      "learning_rate": 8.835684587152336e-06,
      "loss": 0.0384,
      "step": 373400
    },
    {
      "epoch": 0.6111100200964893,
      "grad_norm": 2.416358232498169,
      "learning_rate": 8.83561869493882e-06,
      "loss": 0.0511,
      "step": 373420
    },
    {
      "epoch": 0.6111427505351427,
      "grad_norm": 0.7909805774688721,
      "learning_rate": 8.835552802725302e-06,
      "loss": 0.03,
      "step": 373440
    },
    {
      "epoch": 0.611175480973796,
      "grad_norm": 0.9792816042900085,
      "learning_rate": 8.835486910511785e-06,
      "loss": 0.0306,
      "step": 373460
    },
    {
      "epoch": 0.6112082114124493,
      "grad_norm": 2.4796652793884277,
      "learning_rate": 8.835421018298267e-06,
      "loss": 0.0455,
      "step": 373480
    },
    {
      "epoch": 0.6112409418511027,
      "grad_norm": 0.6047706604003906,
      "learning_rate": 8.835355126084751e-06,
      "loss": 0.0334,
      "step": 373500
    },
    {
      "epoch": 0.611273672289756,
      "grad_norm": 0.5897772908210754,
      "learning_rate": 8.835289233871235e-06,
      "loss": 0.0353,
      "step": 373520
    },
    {
      "epoch": 0.6113064027284094,
      "grad_norm": 1.6397873163223267,
      "learning_rate": 8.835223341657716e-06,
      "loss": 0.0258,
      "step": 373540
    },
    {
      "epoch": 0.6113391331670627,
      "grad_norm": 0.9558752775192261,
      "learning_rate": 8.8351574494442e-06,
      "loss": 0.0363,
      "step": 373560
    },
    {
      "epoch": 0.611371863605716,
      "grad_norm": 0.6192553639411926,
      "learning_rate": 8.835091557230684e-06,
      "loss": 0.0413,
      "step": 373580
    },
    {
      "epoch": 0.6114045940443694,
      "grad_norm": 1.639401912689209,
      "learning_rate": 8.835025665017165e-06,
      "loss": 0.0492,
      "step": 373600
    },
    {
      "epoch": 0.6114373244830227,
      "grad_norm": 1.228029489517212,
      "learning_rate": 8.834959772803649e-06,
      "loss": 0.0261,
      "step": 373620
    },
    {
      "epoch": 0.611470054921676,
      "grad_norm": 0.6200376152992249,
      "learning_rate": 8.834893880590133e-06,
      "loss": 0.0392,
      "step": 373640
    },
    {
      "epoch": 0.6115027853603294,
      "grad_norm": 1.584894061088562,
      "learning_rate": 8.834827988376615e-06,
      "loss": 0.0261,
      "step": 373660
    },
    {
      "epoch": 0.6115355157989827,
      "grad_norm": 0.5844178795814514,
      "learning_rate": 8.834762096163098e-06,
      "loss": 0.0423,
      "step": 373680
    },
    {
      "epoch": 0.6115682462376361,
      "grad_norm": 2.727957010269165,
      "learning_rate": 8.83469620394958e-06,
      "loss": 0.0318,
      "step": 373700
    },
    {
      "epoch": 0.6116009766762894,
      "grad_norm": 1.339616060256958,
      "learning_rate": 8.834630311736064e-06,
      "loss": 0.0306,
      "step": 373720
    },
    {
      "epoch": 0.6116337071149428,
      "grad_norm": 1.0616710186004639,
      "learning_rate": 8.834564419522546e-06,
      "loss": 0.0393,
      "step": 373740
    },
    {
      "epoch": 0.6116664375535961,
      "grad_norm": 1.667402744293213,
      "learning_rate": 8.834498527309029e-06,
      "loss": 0.0264,
      "step": 373760
    },
    {
      "epoch": 0.6116991679922494,
      "grad_norm": 0.6242619752883911,
      "learning_rate": 8.834432635095511e-06,
      "loss": 0.0455,
      "step": 373780
    },
    {
      "epoch": 0.6117318984309028,
      "grad_norm": 1.2468008995056152,
      "learning_rate": 8.834366742881995e-06,
      "loss": 0.0385,
      "step": 373800
    },
    {
      "epoch": 0.6117646288695561,
      "grad_norm": 2.5008792877197266,
      "learning_rate": 8.834300850668476e-06,
      "loss": 0.0501,
      "step": 373820
    },
    {
      "epoch": 0.6117973593082094,
      "grad_norm": 1.2426458597183228,
      "learning_rate": 8.83423495845496e-06,
      "loss": 0.0289,
      "step": 373840
    },
    {
      "epoch": 0.6118300897468628,
      "grad_norm": 0.9906349182128906,
      "learning_rate": 8.834169066241442e-06,
      "loss": 0.0355,
      "step": 373860
    },
    {
      "epoch": 0.6118628201855161,
      "grad_norm": 1.394780158996582,
      "learning_rate": 8.834103174027926e-06,
      "loss": 0.0332,
      "step": 373880
    },
    {
      "epoch": 0.6118955506241694,
      "grad_norm": 0.28157609701156616,
      "learning_rate": 8.834037281814407e-06,
      "loss": 0.0399,
      "step": 373900
    },
    {
      "epoch": 0.6119282810628228,
      "grad_norm": 1.8415827751159668,
      "learning_rate": 8.833971389600891e-06,
      "loss": 0.0382,
      "step": 373920
    },
    {
      "epoch": 0.6119610115014762,
      "grad_norm": 0.8794082403182983,
      "learning_rate": 8.833905497387375e-06,
      "loss": 0.037,
      "step": 373940
    },
    {
      "epoch": 0.6119937419401295,
      "grad_norm": 0.6708359718322754,
      "learning_rate": 8.833839605173856e-06,
      "loss": 0.0294,
      "step": 373960
    },
    {
      "epoch": 0.6120264723787828,
      "grad_norm": 1.2248347997665405,
      "learning_rate": 8.83377371296034e-06,
      "loss": 0.0376,
      "step": 373980
    },
    {
      "epoch": 0.6120592028174362,
      "grad_norm": 1.2241309881210327,
      "learning_rate": 8.833707820746824e-06,
      "loss": 0.0305,
      "step": 374000
    },
    {
      "epoch": 0.6120919332560895,
      "grad_norm": 1.2228087186813354,
      "learning_rate": 8.833641928533306e-06,
      "loss": 0.0408,
      "step": 374020
    },
    {
      "epoch": 0.6121246636947428,
      "grad_norm": 1.2131681442260742,
      "learning_rate": 8.833576036319789e-06,
      "loss": 0.0283,
      "step": 374040
    },
    {
      "epoch": 0.6121573941333962,
      "grad_norm": 1.223711371421814,
      "learning_rate": 8.833510144106273e-06,
      "loss": 0.0422,
      "step": 374060
    },
    {
      "epoch": 0.6121901245720495,
      "grad_norm": 0.8865966200828552,
      "learning_rate": 8.833444251892755e-06,
      "loss": 0.0341,
      "step": 374080
    },
    {
      "epoch": 0.6122228550107028,
      "grad_norm": 0.7657243609428406,
      "learning_rate": 8.833378359679238e-06,
      "loss": 0.0254,
      "step": 374100
    },
    {
      "epoch": 0.6122555854493562,
      "grad_norm": 1.813679575920105,
      "learning_rate": 8.83331246746572e-06,
      "loss": 0.0396,
      "step": 374120
    },
    {
      "epoch": 0.6122883158880096,
      "grad_norm": 0.9015851616859436,
      "learning_rate": 8.833246575252204e-06,
      "loss": 0.0382,
      "step": 374140
    },
    {
      "epoch": 0.6123210463266628,
      "grad_norm": 0.5429168343544006,
      "learning_rate": 8.833180683038686e-06,
      "loss": 0.0347,
      "step": 374160
    },
    {
      "epoch": 0.6123537767653162,
      "grad_norm": 0.2365594357252121,
      "learning_rate": 8.83311479082517e-06,
      "loss": 0.0304,
      "step": 374180
    },
    {
      "epoch": 0.6123865072039696,
      "grad_norm": 1.8509011268615723,
      "learning_rate": 8.833048898611651e-06,
      "loss": 0.0319,
      "step": 374200
    },
    {
      "epoch": 0.6124192376426228,
      "grad_norm": 0.9347090721130371,
      "learning_rate": 8.832983006398135e-06,
      "loss": 0.0322,
      "step": 374220
    },
    {
      "epoch": 0.6124519680812762,
      "grad_norm": 4.108166694641113,
      "learning_rate": 8.832917114184617e-06,
      "loss": 0.0324,
      "step": 374240
    },
    {
      "epoch": 0.6124846985199296,
      "grad_norm": 1.3541516065597534,
      "learning_rate": 8.8328512219711e-06,
      "loss": 0.0324,
      "step": 374260
    },
    {
      "epoch": 0.6125174289585829,
      "grad_norm": 0.7235707640647888,
      "learning_rate": 8.832785329757582e-06,
      "loss": 0.0398,
      "step": 374280
    },
    {
      "epoch": 0.6125501593972362,
      "grad_norm": 0.5299726724624634,
      "learning_rate": 8.832719437544066e-06,
      "loss": 0.0323,
      "step": 374300
    },
    {
      "epoch": 0.6125828898358896,
      "grad_norm": 0.6451207995414734,
      "learning_rate": 8.83265354533055e-06,
      "loss": 0.041,
      "step": 374320
    },
    {
      "epoch": 0.612615620274543,
      "grad_norm": 0.9180921316146851,
      "learning_rate": 8.832587653117031e-06,
      "loss": 0.0294,
      "step": 374340
    },
    {
      "epoch": 0.6126483507131962,
      "grad_norm": 1.5540951490402222,
      "learning_rate": 8.832521760903515e-06,
      "loss": 0.0346,
      "step": 374360
    },
    {
      "epoch": 0.6126810811518496,
      "grad_norm": 0.5544844269752502,
      "learning_rate": 8.832455868689998e-06,
      "loss": 0.0308,
      "step": 374380
    },
    {
      "epoch": 0.612713811590503,
      "grad_norm": 1.7062158584594727,
      "learning_rate": 8.83238997647648e-06,
      "loss": 0.0385,
      "step": 374400
    },
    {
      "epoch": 0.6127465420291562,
      "grad_norm": 1.3599339723587036,
      "learning_rate": 8.832324084262964e-06,
      "loss": 0.0389,
      "step": 374420
    },
    {
      "epoch": 0.6127792724678096,
      "grad_norm": 0.30206960439682007,
      "learning_rate": 8.832258192049447e-06,
      "loss": 0.0261,
      "step": 374440
    },
    {
      "epoch": 0.612812002906463,
      "grad_norm": 1.4121845960617065,
      "learning_rate": 8.83219229983593e-06,
      "loss": 0.0358,
      "step": 374460
    },
    {
      "epoch": 0.6128447333451162,
      "grad_norm": 1.3892543315887451,
      "learning_rate": 8.832126407622413e-06,
      "loss": 0.0311,
      "step": 374480
    },
    {
      "epoch": 0.6128774637837696,
      "grad_norm": 0.18306010961532593,
      "learning_rate": 8.832060515408895e-06,
      "loss": 0.0399,
      "step": 374500
    },
    {
      "epoch": 0.612910194222423,
      "grad_norm": 2.194850444793701,
      "learning_rate": 8.831994623195378e-06,
      "loss": 0.037,
      "step": 374520
    },
    {
      "epoch": 0.6129429246610764,
      "grad_norm": 0.5975512266159058,
      "learning_rate": 8.83192873098186e-06,
      "loss": 0.02,
      "step": 374540
    },
    {
      "epoch": 0.6129756550997296,
      "grad_norm": 0.2462066262960434,
      "learning_rate": 8.831862838768344e-06,
      "loss": 0.0403,
      "step": 374560
    },
    {
      "epoch": 0.613008385538383,
      "grad_norm": 6.6614670753479,
      "learning_rate": 8.831796946554826e-06,
      "loss": 0.0296,
      "step": 374580
    },
    {
      "epoch": 0.6130411159770364,
      "grad_norm": 1.0934760570526123,
      "learning_rate": 8.83173105434131e-06,
      "loss": 0.0246,
      "step": 374600
    },
    {
      "epoch": 0.6130738464156896,
      "grad_norm": 1.8441565036773682,
      "learning_rate": 8.831665162127791e-06,
      "loss": 0.039,
      "step": 374620
    },
    {
      "epoch": 0.613106576854343,
      "grad_norm": 0.6098780632019043,
      "learning_rate": 8.831599269914275e-06,
      "loss": 0.0355,
      "step": 374640
    },
    {
      "epoch": 0.6131393072929964,
      "grad_norm": 0.8907705545425415,
      "learning_rate": 8.831533377700758e-06,
      "loss": 0.0386,
      "step": 374660
    },
    {
      "epoch": 0.6131720377316496,
      "grad_norm": 0.5361139178276062,
      "learning_rate": 8.83146748548724e-06,
      "loss": 0.0324,
      "step": 374680
    },
    {
      "epoch": 0.613204768170303,
      "grad_norm": 0.932873547077179,
      "learning_rate": 8.831401593273724e-06,
      "loss": 0.0291,
      "step": 374700
    },
    {
      "epoch": 0.6132374986089564,
      "grad_norm": 1.397097110748291,
      "learning_rate": 8.831335701060206e-06,
      "loss": 0.0273,
      "step": 374720
    },
    {
      "epoch": 0.6132702290476097,
      "grad_norm": 0.7472434043884277,
      "learning_rate": 8.83126980884669e-06,
      "loss": 0.0384,
      "step": 374740
    },
    {
      "epoch": 0.613302959486263,
      "grad_norm": 0.7198522090911865,
      "learning_rate": 8.831203916633171e-06,
      "loss": 0.0431,
      "step": 374760
    },
    {
      "epoch": 0.6133356899249164,
      "grad_norm": 0.8509058952331543,
      "learning_rate": 8.831138024419655e-06,
      "loss": 0.0355,
      "step": 374780
    },
    {
      "epoch": 0.6133684203635698,
      "grad_norm": 1.0344539880752563,
      "learning_rate": 8.831072132206138e-06,
      "loss": 0.0343,
      "step": 374800
    },
    {
      "epoch": 0.613401150802223,
      "grad_norm": 1.1618220806121826,
      "learning_rate": 8.831006239992622e-06,
      "loss": 0.0414,
      "step": 374820
    },
    {
      "epoch": 0.6134338812408764,
      "grad_norm": 1.5585722923278809,
      "learning_rate": 8.830940347779104e-06,
      "loss": 0.0349,
      "step": 374840
    },
    {
      "epoch": 0.6134666116795298,
      "grad_norm": 1.0753757953643799,
      "learning_rate": 8.830874455565587e-06,
      "loss": 0.0265,
      "step": 374860
    },
    {
      "epoch": 0.613499342118183,
      "grad_norm": 1.9072747230529785,
      "learning_rate": 8.83080856335207e-06,
      "loss": 0.0391,
      "step": 374880
    },
    {
      "epoch": 0.6135320725568364,
      "grad_norm": 1.4174261093139648,
      "learning_rate": 8.830742671138553e-06,
      "loss": 0.0313,
      "step": 374900
    },
    {
      "epoch": 0.6135648029954898,
      "grad_norm": 1.486236572265625,
      "learning_rate": 8.830676778925035e-06,
      "loss": 0.0416,
      "step": 374920
    },
    {
      "epoch": 0.6135975334341431,
      "grad_norm": 0.7481825351715088,
      "learning_rate": 8.830610886711518e-06,
      "loss": 0.0318,
      "step": 374940
    },
    {
      "epoch": 0.6136302638727964,
      "grad_norm": 0.5081890821456909,
      "learning_rate": 8.830544994498e-06,
      "loss": 0.039,
      "step": 374960
    },
    {
      "epoch": 0.6136629943114498,
      "grad_norm": 0.7498681545257568,
      "learning_rate": 8.830479102284484e-06,
      "loss": 0.0372,
      "step": 374980
    },
    {
      "epoch": 0.6136957247501031,
      "grad_norm": 1.9702367782592773,
      "learning_rate": 8.830413210070967e-06,
      "loss": 0.0415,
      "step": 375000
    },
    {
      "epoch": 0.6137284551887564,
      "grad_norm": 0.39241471886634827,
      "learning_rate": 8.83034731785745e-06,
      "loss": 0.0294,
      "step": 375020
    },
    {
      "epoch": 0.6137611856274098,
      "grad_norm": 1.025826096534729,
      "learning_rate": 8.830281425643933e-06,
      "loss": 0.0312,
      "step": 375040
    },
    {
      "epoch": 0.6137939160660631,
      "grad_norm": 3.819894552230835,
      "learning_rate": 8.830215533430415e-06,
      "loss": 0.0372,
      "step": 375060
    },
    {
      "epoch": 0.6138266465047164,
      "grad_norm": 0.09693332016468048,
      "learning_rate": 8.830149641216898e-06,
      "loss": 0.0353,
      "step": 375080
    },
    {
      "epoch": 0.6138593769433698,
      "grad_norm": 1.7903074026107788,
      "learning_rate": 8.83008374900338e-06,
      "loss": 0.0345,
      "step": 375100
    },
    {
      "epoch": 0.6138921073820232,
      "grad_norm": 1.046134352684021,
      "learning_rate": 8.830017856789864e-06,
      "loss": 0.0351,
      "step": 375120
    },
    {
      "epoch": 0.6139248378206765,
      "grad_norm": 0.4822646975517273,
      "learning_rate": 8.829951964576346e-06,
      "loss": 0.036,
      "step": 375140
    },
    {
      "epoch": 0.6139575682593298,
      "grad_norm": 2.6565303802490234,
      "learning_rate": 8.82988607236283e-06,
      "loss": 0.0427,
      "step": 375160
    },
    {
      "epoch": 0.6139902986979832,
      "grad_norm": 1.2367453575134277,
      "learning_rate": 8.829820180149313e-06,
      "loss": 0.0319,
      "step": 375180
    },
    {
      "epoch": 0.6140230291366365,
      "grad_norm": 0.9054353833198547,
      "learning_rate": 8.829754287935795e-06,
      "loss": 0.0372,
      "step": 375200
    },
    {
      "epoch": 0.6140557595752898,
      "grad_norm": 1.3100146055221558,
      "learning_rate": 8.829688395722278e-06,
      "loss": 0.0214,
      "step": 375220
    },
    {
      "epoch": 0.6140884900139432,
      "grad_norm": 0.33709466457366943,
      "learning_rate": 8.829622503508762e-06,
      "loss": 0.0444,
      "step": 375240
    },
    {
      "epoch": 0.6141212204525965,
      "grad_norm": 2.44620418548584,
      "learning_rate": 8.829556611295244e-06,
      "loss": 0.0397,
      "step": 375260
    },
    {
      "epoch": 0.6141539508912498,
      "grad_norm": 1.0322197675704956,
      "learning_rate": 8.829490719081727e-06,
      "loss": 0.0422,
      "step": 375280
    },
    {
      "epoch": 0.6141866813299032,
      "grad_norm": 1.123966097831726,
      "learning_rate": 8.82942482686821e-06,
      "loss": 0.0268,
      "step": 375300
    },
    {
      "epoch": 0.6142194117685565,
      "grad_norm": 1.4616366624832153,
      "learning_rate": 8.829358934654693e-06,
      "loss": 0.036,
      "step": 375320
    },
    {
      "epoch": 0.6142521422072098,
      "grad_norm": 1.244447112083435,
      "learning_rate": 8.829293042441175e-06,
      "loss": 0.0421,
      "step": 375340
    },
    {
      "epoch": 0.6142848726458632,
      "grad_norm": 1.2722917795181274,
      "learning_rate": 8.829227150227658e-06,
      "loss": 0.0349,
      "step": 375360
    },
    {
      "epoch": 0.6143176030845166,
      "grad_norm": 0.9900149703025818,
      "learning_rate": 8.829161258014142e-06,
      "loss": 0.0449,
      "step": 375380
    },
    {
      "epoch": 0.6143503335231699,
      "grad_norm": 3.206181049346924,
      "learning_rate": 8.829095365800624e-06,
      "loss": 0.048,
      "step": 375400
    },
    {
      "epoch": 0.6143830639618232,
      "grad_norm": 1.0028096437454224,
      "learning_rate": 8.829029473587108e-06,
      "loss": 0.0294,
      "step": 375420
    },
    {
      "epoch": 0.6144157944004766,
      "grad_norm": 1.8515727519989014,
      "learning_rate": 8.82896358137359e-06,
      "loss": 0.0358,
      "step": 375440
    },
    {
      "epoch": 0.6144485248391299,
      "grad_norm": 1.5426182746887207,
      "learning_rate": 8.828897689160073e-06,
      "loss": 0.0447,
      "step": 375460
    },
    {
      "epoch": 0.6144812552777832,
      "grad_norm": 1.3034141063690186,
      "learning_rate": 8.828831796946555e-06,
      "loss": 0.0298,
      "step": 375480
    },
    {
      "epoch": 0.6145139857164366,
      "grad_norm": 1.1376266479492188,
      "learning_rate": 8.828765904733038e-06,
      "loss": 0.0283,
      "step": 375500
    },
    {
      "epoch": 0.6145467161550899,
      "grad_norm": 1.5164403915405273,
      "learning_rate": 8.82870001251952e-06,
      "loss": 0.0267,
      "step": 375520
    },
    {
      "epoch": 0.6145794465937432,
      "grad_norm": 1.9437474012374878,
      "learning_rate": 8.828634120306004e-06,
      "loss": 0.0316,
      "step": 375540
    },
    {
      "epoch": 0.6146121770323966,
      "grad_norm": 1.1405538320541382,
      "learning_rate": 8.828568228092488e-06,
      "loss": 0.026,
      "step": 375560
    },
    {
      "epoch": 0.6146449074710499,
      "grad_norm": 1.8602672815322876,
      "learning_rate": 8.82850233587897e-06,
      "loss": 0.0294,
      "step": 375580
    },
    {
      "epoch": 0.6146776379097033,
      "grad_norm": 1.6733856201171875,
      "learning_rate": 8.828436443665453e-06,
      "loss": 0.033,
      "step": 375600
    },
    {
      "epoch": 0.6147103683483566,
      "grad_norm": 1.2678714990615845,
      "learning_rate": 8.828370551451937e-06,
      "loss": 0.0254,
      "step": 375620
    },
    {
      "epoch": 0.61474309878701,
      "grad_norm": 0.22551296651363373,
      "learning_rate": 8.828304659238418e-06,
      "loss": 0.0342,
      "step": 375640
    },
    {
      "epoch": 0.6147758292256633,
      "grad_norm": 0.691950261592865,
      "learning_rate": 8.828238767024902e-06,
      "loss": 0.039,
      "step": 375660
    },
    {
      "epoch": 0.6148085596643166,
      "grad_norm": 1.420907974243164,
      "learning_rate": 8.828172874811384e-06,
      "loss": 0.047,
      "step": 375680
    },
    {
      "epoch": 0.61484129010297,
      "grad_norm": 1.8020964860916138,
      "learning_rate": 8.828106982597868e-06,
      "loss": 0.0441,
      "step": 375700
    },
    {
      "epoch": 0.6148740205416233,
      "grad_norm": 1.3114835023880005,
      "learning_rate": 8.828041090384351e-06,
      "loss": 0.035,
      "step": 375720
    },
    {
      "epoch": 0.6149067509802766,
      "grad_norm": 1.4381972551345825,
      "learning_rate": 8.827975198170833e-06,
      "loss": 0.0333,
      "step": 375740
    },
    {
      "epoch": 0.61493948141893,
      "grad_norm": 0.9316144585609436,
      "learning_rate": 8.827909305957317e-06,
      "loss": 0.0372,
      "step": 375760
    },
    {
      "epoch": 0.6149722118575833,
      "grad_norm": 1.2212332487106323,
      "learning_rate": 8.827843413743799e-06,
      "loss": 0.0408,
      "step": 375780
    },
    {
      "epoch": 0.6150049422962367,
      "grad_norm": 3.1960091590881348,
      "learning_rate": 8.827777521530282e-06,
      "loss": 0.0496,
      "step": 375800
    },
    {
      "epoch": 0.61503767273489,
      "grad_norm": 0.7115856409072876,
      "learning_rate": 8.827711629316764e-06,
      "loss": 0.0445,
      "step": 375820
    },
    {
      "epoch": 0.6150704031735433,
      "grad_norm": 1.0014476776123047,
      "learning_rate": 8.827645737103248e-06,
      "loss": 0.0364,
      "step": 375840
    },
    {
      "epoch": 0.6151031336121967,
      "grad_norm": 1.6708985567092896,
      "learning_rate": 8.82757984488973e-06,
      "loss": 0.0411,
      "step": 375860
    },
    {
      "epoch": 0.61513586405085,
      "grad_norm": 0.8948214650154114,
      "learning_rate": 8.827513952676213e-06,
      "loss": 0.0278,
      "step": 375880
    },
    {
      "epoch": 0.6151685944895033,
      "grad_norm": 0.4210827052593231,
      "learning_rate": 8.827448060462695e-06,
      "loss": 0.0441,
      "step": 375900
    },
    {
      "epoch": 0.6152013249281567,
      "grad_norm": 1.1954959630966187,
      "learning_rate": 8.827382168249179e-06,
      "loss": 0.0408,
      "step": 375920
    },
    {
      "epoch": 0.61523405536681,
      "grad_norm": 0.9309098124504089,
      "learning_rate": 8.82731627603566e-06,
      "loss": 0.0195,
      "step": 375940
    },
    {
      "epoch": 0.6152667858054633,
      "grad_norm": 1.3262487649917603,
      "learning_rate": 8.827250383822144e-06,
      "loss": 0.0295,
      "step": 375960
    },
    {
      "epoch": 0.6152995162441167,
      "grad_norm": 1.3837738037109375,
      "learning_rate": 8.827184491608628e-06,
      "loss": 0.0355,
      "step": 375980
    },
    {
      "epoch": 0.6153322466827701,
      "grad_norm": 0.5126792192459106,
      "learning_rate": 8.82711859939511e-06,
      "loss": 0.033,
      "step": 376000
    },
    {
      "epoch": 0.6153649771214234,
      "grad_norm": 0.6570810079574585,
      "learning_rate": 8.827052707181593e-06,
      "loss": 0.0316,
      "step": 376020
    },
    {
      "epoch": 0.6153977075600767,
      "grad_norm": 1.488451600074768,
      "learning_rate": 8.826986814968077e-06,
      "loss": 0.0444,
      "step": 376040
    },
    {
      "epoch": 0.6154304379987301,
      "grad_norm": 5.255941867828369,
      "learning_rate": 8.826920922754559e-06,
      "loss": 0.0372,
      "step": 376060
    },
    {
      "epoch": 0.6154631684373834,
      "grad_norm": 1.2951804399490356,
      "learning_rate": 8.826855030541042e-06,
      "loss": 0.0476,
      "step": 376080
    },
    {
      "epoch": 0.6154958988760367,
      "grad_norm": 0.2859887480735779,
      "learning_rate": 8.826789138327526e-06,
      "loss": 0.037,
      "step": 376100
    },
    {
      "epoch": 0.6155286293146901,
      "grad_norm": 0.5144352912902832,
      "learning_rate": 8.826723246114008e-06,
      "loss": 0.0297,
      "step": 376120
    },
    {
      "epoch": 0.6155613597533434,
      "grad_norm": 0.7318134903907776,
      "learning_rate": 8.826657353900491e-06,
      "loss": 0.0354,
      "step": 376140
    },
    {
      "epoch": 0.6155940901919967,
      "grad_norm": 2.293917179107666,
      "learning_rate": 8.826591461686973e-06,
      "loss": 0.0332,
      "step": 376160
    },
    {
      "epoch": 0.6156268206306501,
      "grad_norm": 0.4216081202030182,
      "learning_rate": 8.826525569473457e-06,
      "loss": 0.0277,
      "step": 376180
    },
    {
      "epoch": 0.6156595510693035,
      "grad_norm": 1.1087126731872559,
      "learning_rate": 8.826459677259939e-06,
      "loss": 0.0392,
      "step": 376200
    },
    {
      "epoch": 0.6156922815079567,
      "grad_norm": 3.3215606212615967,
      "learning_rate": 8.826393785046422e-06,
      "loss": 0.0336,
      "step": 376220
    },
    {
      "epoch": 0.6157250119466101,
      "grad_norm": 0.8201338052749634,
      "learning_rate": 8.826327892832904e-06,
      "loss": 0.0334,
      "step": 376240
    },
    {
      "epoch": 0.6157577423852635,
      "grad_norm": 0.7814756631851196,
      "learning_rate": 8.826262000619388e-06,
      "loss": 0.0304,
      "step": 376260
    },
    {
      "epoch": 0.6157904728239167,
      "grad_norm": 2.402387857437134,
      "learning_rate": 8.82619610840587e-06,
      "loss": 0.0519,
      "step": 376280
    },
    {
      "epoch": 0.6158232032625701,
      "grad_norm": 0.5855184197425842,
      "learning_rate": 8.826130216192353e-06,
      "loss": 0.0309,
      "step": 376300
    },
    {
      "epoch": 0.6158559337012235,
      "grad_norm": 1.8306505680084229,
      "learning_rate": 8.826064323978835e-06,
      "loss": 0.0385,
      "step": 376320
    },
    {
      "epoch": 0.6158886641398768,
      "grad_norm": 1.2270382642745972,
      "learning_rate": 8.825998431765319e-06,
      "loss": 0.0231,
      "step": 376340
    },
    {
      "epoch": 0.6159213945785301,
      "grad_norm": 0.8928114175796509,
      "learning_rate": 8.825932539551802e-06,
      "loss": 0.0303,
      "step": 376360
    },
    {
      "epoch": 0.6159541250171835,
      "grad_norm": 0.8200329542160034,
      "learning_rate": 8.825866647338284e-06,
      "loss": 0.0329,
      "step": 376380
    },
    {
      "epoch": 0.6159868554558369,
      "grad_norm": 1.214430809020996,
      "learning_rate": 8.825800755124768e-06,
      "loss": 0.0377,
      "step": 376400
    },
    {
      "epoch": 0.6160195858944901,
      "grad_norm": 1.6239356994628906,
      "learning_rate": 8.825734862911251e-06,
      "loss": 0.0298,
      "step": 376420
    },
    {
      "epoch": 0.6160523163331435,
      "grad_norm": 0.5676780939102173,
      "learning_rate": 8.825668970697733e-06,
      "loss": 0.0385,
      "step": 376440
    },
    {
      "epoch": 0.6160850467717969,
      "grad_norm": 2.6705329418182373,
      "learning_rate": 8.825603078484217e-06,
      "loss": 0.0385,
      "step": 376460
    },
    {
      "epoch": 0.6161177772104501,
      "grad_norm": 1.853620171546936,
      "learning_rate": 8.8255371862707e-06,
      "loss": 0.0377,
      "step": 376480
    },
    {
      "epoch": 0.6161505076491035,
      "grad_norm": 1.7769280672073364,
      "learning_rate": 8.825471294057182e-06,
      "loss": 0.0379,
      "step": 376500
    },
    {
      "epoch": 0.6161832380877569,
      "grad_norm": 0.9203168153762817,
      "learning_rate": 8.825405401843666e-06,
      "loss": 0.0382,
      "step": 376520
    },
    {
      "epoch": 0.6162159685264101,
      "grad_norm": 0.5046605467796326,
      "learning_rate": 8.825339509630148e-06,
      "loss": 0.0286,
      "step": 376540
    },
    {
      "epoch": 0.6162486989650635,
      "grad_norm": 2.379101276397705,
      "learning_rate": 8.825273617416631e-06,
      "loss": 0.0536,
      "step": 376560
    },
    {
      "epoch": 0.6162814294037169,
      "grad_norm": 1.1618024110794067,
      "learning_rate": 8.825207725203113e-06,
      "loss": 0.0412,
      "step": 376580
    },
    {
      "epoch": 0.6163141598423703,
      "grad_norm": 0.9908413290977478,
      "learning_rate": 8.825141832989597e-06,
      "loss": 0.0307,
      "step": 376600
    },
    {
      "epoch": 0.6163468902810235,
      "grad_norm": 8.72143840789795,
      "learning_rate": 8.825075940776079e-06,
      "loss": 0.0386,
      "step": 376620
    },
    {
      "epoch": 0.6163796207196769,
      "grad_norm": 1.7161741256713867,
      "learning_rate": 8.825010048562562e-06,
      "loss": 0.0279,
      "step": 376640
    },
    {
      "epoch": 0.6164123511583303,
      "grad_norm": 2.719773292541504,
      "learning_rate": 8.824944156349044e-06,
      "loss": 0.0314,
      "step": 376660
    },
    {
      "epoch": 0.6164450815969835,
      "grad_norm": 2.5577011108398438,
      "learning_rate": 8.824878264135528e-06,
      "loss": 0.0498,
      "step": 376680
    },
    {
      "epoch": 0.6164778120356369,
      "grad_norm": 6.147876262664795,
      "learning_rate": 8.82481237192201e-06,
      "loss": 0.0383,
      "step": 376700
    },
    {
      "epoch": 0.6165105424742903,
      "grad_norm": 2.7575197219848633,
      "learning_rate": 8.824746479708493e-06,
      "loss": 0.0303,
      "step": 376720
    },
    {
      "epoch": 0.6165432729129435,
      "grad_norm": 3.0984203815460205,
      "learning_rate": 8.824680587494975e-06,
      "loss": 0.0408,
      "step": 376740
    },
    {
      "epoch": 0.6165760033515969,
      "grad_norm": 0.8279898762702942,
      "learning_rate": 8.824614695281459e-06,
      "loss": 0.0309,
      "step": 376760
    },
    {
      "epoch": 0.6166087337902503,
      "grad_norm": 2.1549103260040283,
      "learning_rate": 8.824548803067942e-06,
      "loss": 0.0488,
      "step": 376780
    },
    {
      "epoch": 0.6166414642289036,
      "grad_norm": 1.398404598236084,
      "learning_rate": 8.824482910854424e-06,
      "loss": 0.0296,
      "step": 376800
    },
    {
      "epoch": 0.6166741946675569,
      "grad_norm": 1.0970468521118164,
      "learning_rate": 8.824417018640908e-06,
      "loss": 0.0329,
      "step": 376820
    },
    {
      "epoch": 0.6167069251062103,
      "grad_norm": 1.1993299722671509,
      "learning_rate": 8.824351126427391e-06,
      "loss": 0.0273,
      "step": 376840
    },
    {
      "epoch": 0.6167396555448637,
      "grad_norm": 0.7446510791778564,
      "learning_rate": 8.824285234213873e-06,
      "loss": 0.0419,
      "step": 376860
    },
    {
      "epoch": 0.6167723859835169,
      "grad_norm": 0.7636898159980774,
      "learning_rate": 8.824219342000357e-06,
      "loss": 0.0398,
      "step": 376880
    },
    {
      "epoch": 0.6168051164221703,
      "grad_norm": 1.3891301155090332,
      "learning_rate": 8.82415344978684e-06,
      "loss": 0.0307,
      "step": 376900
    },
    {
      "epoch": 0.6168378468608237,
      "grad_norm": 0.9218955039978027,
      "learning_rate": 8.824087557573322e-06,
      "loss": 0.0375,
      "step": 376920
    },
    {
      "epoch": 0.6168705772994769,
      "grad_norm": 0.4574989676475525,
      "learning_rate": 8.824021665359806e-06,
      "loss": 0.0327,
      "step": 376940
    },
    {
      "epoch": 0.6169033077381303,
      "grad_norm": 2.222325563430786,
      "learning_rate": 8.823955773146288e-06,
      "loss": 0.038,
      "step": 376960
    },
    {
      "epoch": 0.6169360381767837,
      "grad_norm": 0.44829657673835754,
      "learning_rate": 8.823889880932771e-06,
      "loss": 0.0329,
      "step": 376980
    },
    {
      "epoch": 0.616968768615437,
      "grad_norm": 0.19359587132930756,
      "learning_rate": 8.823823988719253e-06,
      "loss": 0.0424,
      "step": 377000
    },
    {
      "epoch": 0.6170014990540903,
      "grad_norm": 1.6117204427719116,
      "learning_rate": 8.823758096505737e-06,
      "loss": 0.0321,
      "step": 377020
    },
    {
      "epoch": 0.6170342294927437,
      "grad_norm": 2.463428020477295,
      "learning_rate": 8.823692204292219e-06,
      "loss": 0.0286,
      "step": 377040
    },
    {
      "epoch": 0.617066959931397,
      "grad_norm": 0.7536623477935791,
      "learning_rate": 8.823626312078702e-06,
      "loss": 0.0348,
      "step": 377060
    },
    {
      "epoch": 0.6170996903700503,
      "grad_norm": 0.4625183045864105,
      "learning_rate": 8.823560419865184e-06,
      "loss": 0.0406,
      "step": 377080
    },
    {
      "epoch": 0.6171324208087037,
      "grad_norm": 3.613187074661255,
      "learning_rate": 8.823494527651668e-06,
      "loss": 0.0337,
      "step": 377100
    },
    {
      "epoch": 0.617165151247357,
      "grad_norm": 2.715766668319702,
      "learning_rate": 8.823428635438151e-06,
      "loss": 0.0315,
      "step": 377120
    },
    {
      "epoch": 0.6171978816860103,
      "grad_norm": 1.3465005159378052,
      "learning_rate": 8.823362743224633e-06,
      "loss": 0.036,
      "step": 377140
    },
    {
      "epoch": 0.6172306121246637,
      "grad_norm": 2.678603172302246,
      "learning_rate": 8.823296851011117e-06,
      "loss": 0.0333,
      "step": 377160
    },
    {
      "epoch": 0.617263342563317,
      "grad_norm": 0.24818173050880432,
      "learning_rate": 8.823230958797599e-06,
      "loss": 0.027,
      "step": 377180
    },
    {
      "epoch": 0.6172960730019704,
      "grad_norm": 0.9310929775238037,
      "learning_rate": 8.823165066584082e-06,
      "loss": 0.0359,
      "step": 377200
    },
    {
      "epoch": 0.6173288034406237,
      "grad_norm": 0.998950183391571,
      "learning_rate": 8.823099174370566e-06,
      "loss": 0.0391,
      "step": 377220
    },
    {
      "epoch": 0.6173615338792771,
      "grad_norm": 2.6493654251098633,
      "learning_rate": 8.823033282157048e-06,
      "loss": 0.0399,
      "step": 377240
    },
    {
      "epoch": 0.6173942643179304,
      "grad_norm": 0.45061251521110535,
      "learning_rate": 8.822967389943531e-06,
      "loss": 0.03,
      "step": 377260
    },
    {
      "epoch": 0.6174269947565837,
      "grad_norm": 1.6342312097549438,
      "learning_rate": 8.822901497730015e-06,
      "loss": 0.0401,
      "step": 377280
    },
    {
      "epoch": 0.6174597251952371,
      "grad_norm": 1.4931148290634155,
      "learning_rate": 8.822835605516497e-06,
      "loss": 0.0257,
      "step": 377300
    },
    {
      "epoch": 0.6174924556338904,
      "grad_norm": 0.5554714798927307,
      "learning_rate": 8.82276971330298e-06,
      "loss": 0.0222,
      "step": 377320
    },
    {
      "epoch": 0.6175251860725437,
      "grad_norm": 1.3362385034561157,
      "learning_rate": 8.822703821089462e-06,
      "loss": 0.0326,
      "step": 377340
    },
    {
      "epoch": 0.6175579165111971,
      "grad_norm": 0.4030790627002716,
      "learning_rate": 8.822637928875946e-06,
      "loss": 0.0232,
      "step": 377360
    },
    {
      "epoch": 0.6175906469498504,
      "grad_norm": 0.51877760887146,
      "learning_rate": 8.822572036662428e-06,
      "loss": 0.0456,
      "step": 377380
    },
    {
      "epoch": 0.6176233773885038,
      "grad_norm": 1.3845741748809814,
      "learning_rate": 8.822506144448911e-06,
      "loss": 0.0332,
      "step": 377400
    },
    {
      "epoch": 0.6176561078271571,
      "grad_norm": 1.9383889436721802,
      "learning_rate": 8.822440252235393e-06,
      "loss": 0.0223,
      "step": 377420
    },
    {
      "epoch": 0.6176888382658104,
      "grad_norm": 0.40620431303977966,
      "learning_rate": 8.822374360021877e-06,
      "loss": 0.035,
      "step": 377440
    },
    {
      "epoch": 0.6177215687044638,
      "grad_norm": 0.879830539226532,
      "learning_rate": 8.822308467808359e-06,
      "loss": 0.0439,
      "step": 377460
    },
    {
      "epoch": 0.6177542991431171,
      "grad_norm": 0.3900492191314697,
      "learning_rate": 8.822242575594842e-06,
      "loss": 0.0336,
      "step": 377480
    },
    {
      "epoch": 0.6177870295817705,
      "grad_norm": 0.46729400753974915,
      "learning_rate": 8.822176683381326e-06,
      "loss": 0.0267,
      "step": 377500
    },
    {
      "epoch": 0.6178197600204238,
      "grad_norm": 1.6642695665359497,
      "learning_rate": 8.822110791167808e-06,
      "loss": 0.0349,
      "step": 377520
    },
    {
      "epoch": 0.6178524904590771,
      "grad_norm": 1.2937029600143433,
      "learning_rate": 8.822044898954291e-06,
      "loss": 0.0384,
      "step": 377540
    },
    {
      "epoch": 0.6178852208977305,
      "grad_norm": 2.7100253105163574,
      "learning_rate": 8.821979006740773e-06,
      "loss": 0.0498,
      "step": 377560
    },
    {
      "epoch": 0.6179179513363838,
      "grad_norm": 1.8429310321807861,
      "learning_rate": 8.821913114527257e-06,
      "loss": 0.0376,
      "step": 377580
    },
    {
      "epoch": 0.6179506817750372,
      "grad_norm": 2.2302796840667725,
      "learning_rate": 8.82184722231374e-06,
      "loss": 0.0356,
      "step": 377600
    },
    {
      "epoch": 0.6179834122136905,
      "grad_norm": 0.7473365664482117,
      "learning_rate": 8.821781330100222e-06,
      "loss": 0.0382,
      "step": 377620
    },
    {
      "epoch": 0.6180161426523438,
      "grad_norm": 1.9728965759277344,
      "learning_rate": 8.821715437886706e-06,
      "loss": 0.0275,
      "step": 377640
    },
    {
      "epoch": 0.6180488730909972,
      "grad_norm": 0.6884370446205139,
      "learning_rate": 8.82164954567319e-06,
      "loss": 0.032,
      "step": 377660
    },
    {
      "epoch": 0.6180816035296505,
      "grad_norm": 1.2134499549865723,
      "learning_rate": 8.821583653459671e-06,
      "loss": 0.0378,
      "step": 377680
    },
    {
      "epoch": 0.6181143339683038,
      "grad_norm": 0.9869769215583801,
      "learning_rate": 8.821517761246155e-06,
      "loss": 0.0294,
      "step": 377700
    },
    {
      "epoch": 0.6181470644069572,
      "grad_norm": 0.909440815448761,
      "learning_rate": 8.821451869032637e-06,
      "loss": 0.0495,
      "step": 377720
    },
    {
      "epoch": 0.6181797948456105,
      "grad_norm": 1.9073574542999268,
      "learning_rate": 8.82138597681912e-06,
      "loss": 0.0259,
      "step": 377740
    },
    {
      "epoch": 0.6182125252842638,
      "grad_norm": 2.621918201446533,
      "learning_rate": 8.821320084605602e-06,
      "loss": 0.03,
      "step": 377760
    },
    {
      "epoch": 0.6182452557229172,
      "grad_norm": 0.5015835165977478,
      "learning_rate": 8.821254192392086e-06,
      "loss": 0.0281,
      "step": 377780
    },
    {
      "epoch": 0.6182779861615706,
      "grad_norm": 1.5678468942642212,
      "learning_rate": 8.821188300178568e-06,
      "loss": 0.0306,
      "step": 377800
    },
    {
      "epoch": 0.6183107166002239,
      "grad_norm": 6.21850061416626,
      "learning_rate": 8.821122407965052e-06,
      "loss": 0.0491,
      "step": 377820
    },
    {
      "epoch": 0.6183434470388772,
      "grad_norm": 1.0010539293289185,
      "learning_rate": 8.821056515751535e-06,
      "loss": 0.0353,
      "step": 377840
    },
    {
      "epoch": 0.6183761774775306,
      "grad_norm": 3.4867501258850098,
      "learning_rate": 8.820990623538017e-06,
      "loss": 0.0317,
      "step": 377860
    },
    {
      "epoch": 0.6184089079161839,
      "grad_norm": 1.0273300409317017,
      "learning_rate": 8.8209247313245e-06,
      "loss": 0.0401,
      "step": 377880
    },
    {
      "epoch": 0.6184416383548372,
      "grad_norm": 1.9504634141921997,
      "learning_rate": 8.820858839110982e-06,
      "loss": 0.0335,
      "step": 377900
    },
    {
      "epoch": 0.6184743687934906,
      "grad_norm": 0.5597811937332153,
      "learning_rate": 8.820792946897466e-06,
      "loss": 0.0287,
      "step": 377920
    },
    {
      "epoch": 0.6185070992321439,
      "grad_norm": 1.9796581268310547,
      "learning_rate": 8.820727054683948e-06,
      "loss": 0.0358,
      "step": 377940
    },
    {
      "epoch": 0.6185398296707972,
      "grad_norm": 2.492004871368408,
      "learning_rate": 8.820661162470432e-06,
      "loss": 0.0376,
      "step": 377960
    },
    {
      "epoch": 0.6185725601094506,
      "grad_norm": 1.8224042654037476,
      "learning_rate": 8.820595270256913e-06,
      "loss": 0.0482,
      "step": 377980
    },
    {
      "epoch": 0.618605290548104,
      "grad_norm": 1.1013296842575073,
      "learning_rate": 8.820529378043397e-06,
      "loss": 0.0432,
      "step": 378000
    },
    {
      "epoch": 0.6186380209867572,
      "grad_norm": 1.3195128440856934,
      "learning_rate": 8.82046348582988e-06,
      "loss": 0.0368,
      "step": 378020
    },
    {
      "epoch": 0.6186707514254106,
      "grad_norm": 2.9259402751922607,
      "learning_rate": 8.820397593616363e-06,
      "loss": 0.0372,
      "step": 378040
    },
    {
      "epoch": 0.618703481864064,
      "grad_norm": 0.6270114183425903,
      "learning_rate": 8.820331701402846e-06,
      "loss": 0.0386,
      "step": 378060
    },
    {
      "epoch": 0.6187362123027172,
      "grad_norm": 1.0037901401519775,
      "learning_rate": 8.82026580918933e-06,
      "loss": 0.0262,
      "step": 378080
    },
    {
      "epoch": 0.6187689427413706,
      "grad_norm": 1.4457333087921143,
      "learning_rate": 8.820199916975812e-06,
      "loss": 0.0434,
      "step": 378100
    },
    {
      "epoch": 0.618801673180024,
      "grad_norm": 1.3562488555908203,
      "learning_rate": 8.820134024762295e-06,
      "loss": 0.0381,
      "step": 378120
    },
    {
      "epoch": 0.6188344036186773,
      "grad_norm": 1.4005701541900635,
      "learning_rate": 8.820068132548777e-06,
      "loss": 0.0432,
      "step": 378140
    },
    {
      "epoch": 0.6188671340573306,
      "grad_norm": 0.5706954598426819,
      "learning_rate": 8.82000224033526e-06,
      "loss": 0.0418,
      "step": 378160
    },
    {
      "epoch": 0.618899864495984,
      "grad_norm": 1.3937273025512695,
      "learning_rate": 8.819936348121744e-06,
      "loss": 0.0314,
      "step": 378180
    },
    {
      "epoch": 0.6189325949346373,
      "grad_norm": 0.6006250977516174,
      "learning_rate": 8.819870455908226e-06,
      "loss": 0.0403,
      "step": 378200
    },
    {
      "epoch": 0.6189653253732906,
      "grad_norm": 1.7025840282440186,
      "learning_rate": 8.81980456369471e-06,
      "loss": 0.0385,
      "step": 378220
    },
    {
      "epoch": 0.618998055811944,
      "grad_norm": 0.470319002866745,
      "learning_rate": 8.819738671481192e-06,
      "loss": 0.0269,
      "step": 378240
    },
    {
      "epoch": 0.6190307862505974,
      "grad_norm": 0.7188738584518433,
      "learning_rate": 8.819672779267675e-06,
      "loss": 0.0422,
      "step": 378260
    },
    {
      "epoch": 0.6190635166892506,
      "grad_norm": 1.755337119102478,
      "learning_rate": 8.819606887054157e-06,
      "loss": 0.0383,
      "step": 378280
    },
    {
      "epoch": 0.619096247127904,
      "grad_norm": 0.5586517453193665,
      "learning_rate": 8.81954099484064e-06,
      "loss": 0.0404,
      "step": 378300
    },
    {
      "epoch": 0.6191289775665574,
      "grad_norm": 0.5667635798454285,
      "learning_rate": 8.819475102627123e-06,
      "loss": 0.0325,
      "step": 378320
    },
    {
      "epoch": 0.6191617080052106,
      "grad_norm": 1.3194479942321777,
      "learning_rate": 8.819409210413606e-06,
      "loss": 0.0263,
      "step": 378340
    },
    {
      "epoch": 0.619194438443864,
      "grad_norm": 1.1000511646270752,
      "learning_rate": 8.819343318200088e-06,
      "loss": 0.0288,
      "step": 378360
    },
    {
      "epoch": 0.6192271688825174,
      "grad_norm": 3.7979631423950195,
      "learning_rate": 8.819277425986572e-06,
      "loss": 0.0437,
      "step": 378380
    },
    {
      "epoch": 0.6192598993211706,
      "grad_norm": 0.6416857838630676,
      "learning_rate": 8.819211533773055e-06,
      "loss": 0.032,
      "step": 378400
    },
    {
      "epoch": 0.619292629759824,
      "grad_norm": 1.8525521755218506,
      "learning_rate": 8.819145641559537e-06,
      "loss": 0.0401,
      "step": 378420
    },
    {
      "epoch": 0.6193253601984774,
      "grad_norm": 0.6227959990501404,
      "learning_rate": 8.81907974934602e-06,
      "loss": 0.0453,
      "step": 378440
    },
    {
      "epoch": 0.6193580906371308,
      "grad_norm": 0.757074773311615,
      "learning_rate": 8.819013857132504e-06,
      "loss": 0.0267,
      "step": 378460
    },
    {
      "epoch": 0.619390821075784,
      "grad_norm": 0.1690477877855301,
      "learning_rate": 8.818947964918986e-06,
      "loss": 0.0518,
      "step": 378480
    },
    {
      "epoch": 0.6194235515144374,
      "grad_norm": 4.067193984985352,
      "learning_rate": 8.81888207270547e-06,
      "loss": 0.0426,
      "step": 378500
    },
    {
      "epoch": 0.6194562819530908,
      "grad_norm": 0.7739656567573547,
      "learning_rate": 8.818816180491953e-06,
      "loss": 0.0345,
      "step": 378520
    },
    {
      "epoch": 0.619489012391744,
      "grad_norm": 2.0967273712158203,
      "learning_rate": 8.818750288278435e-06,
      "loss": 0.0365,
      "step": 378540
    },
    {
      "epoch": 0.6195217428303974,
      "grad_norm": 1.001359224319458,
      "learning_rate": 8.818684396064919e-06,
      "loss": 0.0514,
      "step": 378560
    },
    {
      "epoch": 0.6195544732690508,
      "grad_norm": 1.3281571865081787,
      "learning_rate": 8.8186185038514e-06,
      "loss": 0.0297,
      "step": 378580
    },
    {
      "epoch": 0.619587203707704,
      "grad_norm": 1.167714238166809,
      "learning_rate": 8.818552611637884e-06,
      "loss": 0.0362,
      "step": 378600
    },
    {
      "epoch": 0.6196199341463574,
      "grad_norm": 0.5839204788208008,
      "learning_rate": 8.818486719424366e-06,
      "loss": 0.0277,
      "step": 378620
    },
    {
      "epoch": 0.6196526645850108,
      "grad_norm": 2.908317804336548,
      "learning_rate": 8.81842082721085e-06,
      "loss": 0.0384,
      "step": 378640
    },
    {
      "epoch": 0.6196853950236642,
      "grad_norm": 2.8643267154693604,
      "learning_rate": 8.818354934997332e-06,
      "loss": 0.031,
      "step": 378660
    },
    {
      "epoch": 0.6197181254623174,
      "grad_norm": 1.3195905685424805,
      "learning_rate": 8.818289042783815e-06,
      "loss": 0.0381,
      "step": 378680
    },
    {
      "epoch": 0.6197508559009708,
      "grad_norm": 2.614015817642212,
      "learning_rate": 8.818223150570297e-06,
      "loss": 0.0312,
      "step": 378700
    },
    {
      "epoch": 0.6197835863396242,
      "grad_norm": 1.4558426141738892,
      "learning_rate": 8.81815725835678e-06,
      "loss": 0.0312,
      "step": 378720
    },
    {
      "epoch": 0.6198163167782774,
      "grad_norm": 0.3519449234008789,
      "learning_rate": 8.818091366143263e-06,
      "loss": 0.0316,
      "step": 378740
    },
    {
      "epoch": 0.6198490472169308,
      "grad_norm": 1.1937353610992432,
      "learning_rate": 8.818025473929746e-06,
      "loss": 0.0489,
      "step": 378760
    },
    {
      "epoch": 0.6198817776555842,
      "grad_norm": 2.1745786666870117,
      "learning_rate": 8.817959581716228e-06,
      "loss": 0.0627,
      "step": 378780
    },
    {
      "epoch": 0.6199145080942374,
      "grad_norm": 0.9788427948951721,
      "learning_rate": 8.817893689502712e-06,
      "loss": 0.0364,
      "step": 378800
    },
    {
      "epoch": 0.6199472385328908,
      "grad_norm": 0.6993206739425659,
      "learning_rate": 8.817827797289195e-06,
      "loss": 0.0347,
      "step": 378820
    },
    {
      "epoch": 0.6199799689715442,
      "grad_norm": 0.4587109386920929,
      "learning_rate": 8.817761905075677e-06,
      "loss": 0.0384,
      "step": 378840
    },
    {
      "epoch": 0.6200126994101975,
      "grad_norm": 1.0259069204330444,
      "learning_rate": 8.81769601286216e-06,
      "loss": 0.0283,
      "step": 378860
    },
    {
      "epoch": 0.6200454298488508,
      "grad_norm": 1.3400906324386597,
      "learning_rate": 8.817630120648644e-06,
      "loss": 0.0285,
      "step": 378880
    },
    {
      "epoch": 0.6200781602875042,
      "grad_norm": 1.632431149482727,
      "learning_rate": 8.817564228435126e-06,
      "loss": 0.0356,
      "step": 378900
    },
    {
      "epoch": 0.6201108907261575,
      "grad_norm": 2.421313524246216,
      "learning_rate": 8.81749833622161e-06,
      "loss": 0.0463,
      "step": 378920
    },
    {
      "epoch": 0.6201436211648108,
      "grad_norm": 1.8631924390792847,
      "learning_rate": 8.817432444008093e-06,
      "loss": 0.0396,
      "step": 378940
    },
    {
      "epoch": 0.6201763516034642,
      "grad_norm": 0.4020039737224579,
      "learning_rate": 8.817366551794575e-06,
      "loss": 0.0412,
      "step": 378960
    },
    {
      "epoch": 0.6202090820421176,
      "grad_norm": 1.2259308099746704,
      "learning_rate": 8.817300659581059e-06,
      "loss": 0.0413,
      "step": 378980
    },
    {
      "epoch": 0.6202418124807708,
      "grad_norm": 5.331111431121826,
      "learning_rate": 8.81723476736754e-06,
      "loss": 0.0222,
      "step": 379000
    },
    {
      "epoch": 0.6202745429194242,
      "grad_norm": 1.6622097492218018,
      "learning_rate": 8.817168875154024e-06,
      "loss": 0.0355,
      "step": 379020
    },
    {
      "epoch": 0.6203072733580776,
      "grad_norm": 1.1829073429107666,
      "learning_rate": 8.817102982940506e-06,
      "loss": 0.0256,
      "step": 379040
    },
    {
      "epoch": 0.6203400037967309,
      "grad_norm": 4.885733127593994,
      "learning_rate": 8.81703709072699e-06,
      "loss": 0.0327,
      "step": 379060
    },
    {
      "epoch": 0.6203727342353842,
      "grad_norm": 0.9574297070503235,
      "learning_rate": 8.816971198513472e-06,
      "loss": 0.0265,
      "step": 379080
    },
    {
      "epoch": 0.6204054646740376,
      "grad_norm": 2.7723538875579834,
      "learning_rate": 8.816905306299955e-06,
      "loss": 0.0448,
      "step": 379100
    },
    {
      "epoch": 0.6204381951126909,
      "grad_norm": 1.2487155199050903,
      "learning_rate": 8.816839414086437e-06,
      "loss": 0.0336,
      "step": 379120
    },
    {
      "epoch": 0.6204709255513442,
      "grad_norm": 0.48294302821159363,
      "learning_rate": 8.81677352187292e-06,
      "loss": 0.0317,
      "step": 379140
    },
    {
      "epoch": 0.6205036559899976,
      "grad_norm": 1.4518145322799683,
      "learning_rate": 8.816707629659403e-06,
      "loss": 0.0475,
      "step": 379160
    },
    {
      "epoch": 0.6205363864286509,
      "grad_norm": 2.981131076812744,
      "learning_rate": 8.816641737445886e-06,
      "loss": 0.034,
      "step": 379180
    },
    {
      "epoch": 0.6205691168673042,
      "grad_norm": 1.0828875303268433,
      "learning_rate": 8.81657584523237e-06,
      "loss": 0.0361,
      "step": 379200
    },
    {
      "epoch": 0.6206018473059576,
      "grad_norm": 2.199333906173706,
      "learning_rate": 8.816509953018852e-06,
      "loss": 0.0339,
      "step": 379220
    },
    {
      "epoch": 0.620634577744611,
      "grad_norm": 0.623958945274353,
      "learning_rate": 8.816444060805335e-06,
      "loss": 0.0326,
      "step": 379240
    },
    {
      "epoch": 0.6206673081832643,
      "grad_norm": 0.9346476793289185,
      "learning_rate": 8.816378168591819e-06,
      "loss": 0.0495,
      "step": 379260
    },
    {
      "epoch": 0.6207000386219176,
      "grad_norm": 1.08397376537323,
      "learning_rate": 8.8163122763783e-06,
      "loss": 0.0422,
      "step": 379280
    },
    {
      "epoch": 0.620732769060571,
      "grad_norm": 1.3410921096801758,
      "learning_rate": 8.816246384164784e-06,
      "loss": 0.0315,
      "step": 379300
    },
    {
      "epoch": 0.6207654994992243,
      "grad_norm": 0.8714115619659424,
      "learning_rate": 8.816180491951268e-06,
      "loss": 0.0402,
      "step": 379320
    },
    {
      "epoch": 0.6207982299378776,
      "grad_norm": 2.386603355407715,
      "learning_rate": 8.81611459973775e-06,
      "loss": 0.0473,
      "step": 379340
    },
    {
      "epoch": 0.620830960376531,
      "grad_norm": 0.5404723882675171,
      "learning_rate": 8.816048707524233e-06,
      "loss": 0.0379,
      "step": 379360
    },
    {
      "epoch": 0.6208636908151843,
      "grad_norm": 1.1925705671310425,
      "learning_rate": 8.815982815310715e-06,
      "loss": 0.0369,
      "step": 379380
    },
    {
      "epoch": 0.6208964212538376,
      "grad_norm": 1.19515860080719,
      "learning_rate": 8.815916923097199e-06,
      "loss": 0.034,
      "step": 379400
    },
    {
      "epoch": 0.620929151692491,
      "grad_norm": 1.4301449060440063,
      "learning_rate": 8.815851030883681e-06,
      "loss": 0.0458,
      "step": 379420
    },
    {
      "epoch": 0.6209618821311443,
      "grad_norm": 1.0276159048080444,
      "learning_rate": 8.815785138670164e-06,
      "loss": 0.0322,
      "step": 379440
    },
    {
      "epoch": 0.6209946125697977,
      "grad_norm": 0.2090219110250473,
      "learning_rate": 8.815719246456646e-06,
      "loss": 0.032,
      "step": 379460
    },
    {
      "epoch": 0.621027343008451,
      "grad_norm": 2.19726824760437,
      "learning_rate": 8.81565335424313e-06,
      "loss": 0.0404,
      "step": 379480
    },
    {
      "epoch": 0.6210600734471043,
      "grad_norm": 2.3702001571655273,
      "learning_rate": 8.815587462029612e-06,
      "loss": 0.0416,
      "step": 379500
    },
    {
      "epoch": 0.6210928038857577,
      "grad_norm": 1.1075060367584229,
      "learning_rate": 8.815521569816095e-06,
      "loss": 0.0315,
      "step": 379520
    },
    {
      "epoch": 0.621125534324411,
      "grad_norm": 1.2190630435943604,
      "learning_rate": 8.815455677602577e-06,
      "loss": 0.0239,
      "step": 379540
    },
    {
      "epoch": 0.6211582647630643,
      "grad_norm": 1.577635407447815,
      "learning_rate": 8.815389785389061e-06,
      "loss": 0.0287,
      "step": 379560
    },
    {
      "epoch": 0.6211909952017177,
      "grad_norm": 2.9518797397613525,
      "learning_rate": 8.815323893175544e-06,
      "loss": 0.0351,
      "step": 379580
    },
    {
      "epoch": 0.621223725640371,
      "grad_norm": 0.5271841883659363,
      "learning_rate": 8.815258000962026e-06,
      "loss": 0.0292,
      "step": 379600
    },
    {
      "epoch": 0.6212564560790244,
      "grad_norm": 0.6570878624916077,
      "learning_rate": 8.81519210874851e-06,
      "loss": 0.0334,
      "step": 379620
    },
    {
      "epoch": 0.6212891865176777,
      "grad_norm": 0.9678937792778015,
      "learning_rate": 8.815126216534992e-06,
      "loss": 0.0314,
      "step": 379640
    },
    {
      "epoch": 0.6213219169563311,
      "grad_norm": 0.41368067264556885,
      "learning_rate": 8.815060324321475e-06,
      "loss": 0.0365,
      "step": 379660
    },
    {
      "epoch": 0.6213546473949844,
      "grad_norm": 0.43925032019615173,
      "learning_rate": 8.814994432107959e-06,
      "loss": 0.0297,
      "step": 379680
    },
    {
      "epoch": 0.6213873778336377,
      "grad_norm": 0.5185566544532776,
      "learning_rate": 8.814928539894441e-06,
      "loss": 0.0299,
      "step": 379700
    },
    {
      "epoch": 0.6214201082722911,
      "grad_norm": 2.056330442428589,
      "learning_rate": 8.814862647680924e-06,
      "loss": 0.0304,
      "step": 379720
    },
    {
      "epoch": 0.6214528387109444,
      "grad_norm": 0.4376215636730194,
      "learning_rate": 8.814796755467408e-06,
      "loss": 0.0375,
      "step": 379740
    },
    {
      "epoch": 0.6214855691495977,
      "grad_norm": 1.7273335456848145,
      "learning_rate": 8.81473086325389e-06,
      "loss": 0.0384,
      "step": 379760
    },
    {
      "epoch": 0.6215182995882511,
      "grad_norm": 1.0958110094070435,
      "learning_rate": 8.814664971040374e-06,
      "loss": 0.0284,
      "step": 379780
    },
    {
      "epoch": 0.6215510300269044,
      "grad_norm": 2.0002214908599854,
      "learning_rate": 8.814599078826855e-06,
      "loss": 0.0363,
      "step": 379800
    },
    {
      "epoch": 0.6215837604655577,
      "grad_norm": 1.0087299346923828,
      "learning_rate": 8.814533186613339e-06,
      "loss": 0.0327,
      "step": 379820
    },
    {
      "epoch": 0.6216164909042111,
      "grad_norm": 1.4017770290374756,
      "learning_rate": 8.814467294399821e-06,
      "loss": 0.0515,
      "step": 379840
    },
    {
      "epoch": 0.6216492213428645,
      "grad_norm": 0.9904226660728455,
      "learning_rate": 8.814401402186305e-06,
      "loss": 0.031,
      "step": 379860
    },
    {
      "epoch": 0.6216819517815177,
      "grad_norm": 0.8206269145011902,
      "learning_rate": 8.814335509972786e-06,
      "loss": 0.0258,
      "step": 379880
    },
    {
      "epoch": 0.6217146822201711,
      "grad_norm": 1.7520201206207275,
      "learning_rate": 8.81426961775927e-06,
      "loss": 0.0395,
      "step": 379900
    },
    {
      "epoch": 0.6217474126588245,
      "grad_norm": 1.2763442993164062,
      "learning_rate": 8.814203725545752e-06,
      "loss": 0.044,
      "step": 379920
    },
    {
      "epoch": 0.6217801430974778,
      "grad_norm": 0.37610557675361633,
      "learning_rate": 8.814137833332235e-06,
      "loss": 0.0297,
      "step": 379940
    },
    {
      "epoch": 0.6218128735361311,
      "grad_norm": 1.247024655342102,
      "learning_rate": 8.814071941118719e-06,
      "loss": 0.0389,
      "step": 379960
    },
    {
      "epoch": 0.6218456039747845,
      "grad_norm": 3.4817683696746826,
      "learning_rate": 8.814006048905201e-06,
      "loss": 0.0284,
      "step": 379980
    },
    {
      "epoch": 0.6218783344134378,
      "grad_norm": 8.41766357421875,
      "learning_rate": 8.813940156691685e-06,
      "loss": 0.0398,
      "step": 380000
    },
    {
      "epoch": 0.6219110648520911,
      "grad_norm": 1.5913513898849487,
      "learning_rate": 8.813874264478166e-06,
      "loss": 0.0442,
      "step": 380020
    },
    {
      "epoch": 0.6219437952907445,
      "grad_norm": 0.3358806371688843,
      "learning_rate": 8.81380837226465e-06,
      "loss": 0.047,
      "step": 380040
    },
    {
      "epoch": 0.6219765257293979,
      "grad_norm": 1.3046398162841797,
      "learning_rate": 8.813742480051134e-06,
      "loss": 0.031,
      "step": 380060
    },
    {
      "epoch": 0.6220092561680511,
      "grad_norm": 0.9781603813171387,
      "learning_rate": 8.813676587837616e-06,
      "loss": 0.0387,
      "step": 380080
    },
    {
      "epoch": 0.6220419866067045,
      "grad_norm": 0.3934136927127838,
      "learning_rate": 8.813610695624099e-06,
      "loss": 0.0529,
      "step": 380100
    },
    {
      "epoch": 0.6220747170453579,
      "grad_norm": 0.9317978620529175,
      "learning_rate": 8.813544803410583e-06,
      "loss": 0.0358,
      "step": 380120
    },
    {
      "epoch": 0.6221074474840111,
      "grad_norm": 2.192662000656128,
      "learning_rate": 8.813478911197065e-06,
      "loss": 0.0261,
      "step": 380140
    },
    {
      "epoch": 0.6221401779226645,
      "grad_norm": 0.7771278619766235,
      "learning_rate": 8.813413018983548e-06,
      "loss": 0.0313,
      "step": 380160
    },
    {
      "epoch": 0.6221729083613179,
      "grad_norm": 1.2069759368896484,
      "learning_rate": 8.81334712677003e-06,
      "loss": 0.0264,
      "step": 380180
    },
    {
      "epoch": 0.6222056387999712,
      "grad_norm": 1.4910833835601807,
      "learning_rate": 8.813281234556514e-06,
      "loss": 0.0329,
      "step": 380200
    },
    {
      "epoch": 0.6222383692386245,
      "grad_norm": 1.2120921611785889,
      "learning_rate": 8.813215342342996e-06,
      "loss": 0.0283,
      "step": 380220
    },
    {
      "epoch": 0.6222710996772779,
      "grad_norm": 2.5087976455688477,
      "learning_rate": 8.813149450129479e-06,
      "loss": 0.0364,
      "step": 380240
    },
    {
      "epoch": 0.6223038301159313,
      "grad_norm": 0.4858190715312958,
      "learning_rate": 8.813083557915961e-06,
      "loss": 0.0384,
      "step": 380260
    },
    {
      "epoch": 0.6223365605545845,
      "grad_norm": 1.0432101488113403,
      "learning_rate": 8.813017665702445e-06,
      "loss": 0.0341,
      "step": 380280
    },
    {
      "epoch": 0.6223692909932379,
      "grad_norm": 10.942705154418945,
      "learning_rate": 8.812951773488928e-06,
      "loss": 0.0378,
      "step": 380300
    },
    {
      "epoch": 0.6224020214318913,
      "grad_norm": 0.5361773371696472,
      "learning_rate": 8.81288588127541e-06,
      "loss": 0.0319,
      "step": 380320
    },
    {
      "epoch": 0.6224347518705445,
      "grad_norm": 0.8985428810119629,
      "learning_rate": 8.812819989061894e-06,
      "loss": 0.0245,
      "step": 380340
    },
    {
      "epoch": 0.6224674823091979,
      "grad_norm": 8.560176849365234,
      "learning_rate": 8.812754096848376e-06,
      "loss": 0.0355,
      "step": 380360
    },
    {
      "epoch": 0.6225002127478513,
      "grad_norm": 1.900067925453186,
      "learning_rate": 8.812688204634859e-06,
      "loss": 0.0385,
      "step": 380380
    },
    {
      "epoch": 0.6225329431865045,
      "grad_norm": 2.8012948036193848,
      "learning_rate": 8.812622312421341e-06,
      "loss": 0.0416,
      "step": 380400
    },
    {
      "epoch": 0.6225656736251579,
      "grad_norm": 0.6077089309692383,
      "learning_rate": 8.812556420207825e-06,
      "loss": 0.0382,
      "step": 380420
    },
    {
      "epoch": 0.6225984040638113,
      "grad_norm": 0.9361763000488281,
      "learning_rate": 8.812490527994308e-06,
      "loss": 0.045,
      "step": 380440
    },
    {
      "epoch": 0.6226311345024647,
      "grad_norm": 1.8658630847930908,
      "learning_rate": 8.81242463578079e-06,
      "loss": 0.0412,
      "step": 380460
    },
    {
      "epoch": 0.6226638649411179,
      "grad_norm": 2.3723628520965576,
      "learning_rate": 8.812358743567274e-06,
      "loss": 0.0491,
      "step": 380480
    },
    {
      "epoch": 0.6226965953797713,
      "grad_norm": 0.7885697484016418,
      "learning_rate": 8.812292851353757e-06,
      "loss": 0.0383,
      "step": 380500
    },
    {
      "epoch": 0.6227293258184247,
      "grad_norm": 1.0535900592803955,
      "learning_rate": 8.81222695914024e-06,
      "loss": 0.0331,
      "step": 380520
    },
    {
      "epoch": 0.6227620562570779,
      "grad_norm": 0.509918749332428,
      "learning_rate": 8.812161066926723e-06,
      "loss": 0.0277,
      "step": 380540
    },
    {
      "epoch": 0.6227947866957313,
      "grad_norm": 0.8107106685638428,
      "learning_rate": 8.812095174713205e-06,
      "loss": 0.0382,
      "step": 380560
    },
    {
      "epoch": 0.6228275171343847,
      "grad_norm": 1.399154782295227,
      "learning_rate": 8.812029282499688e-06,
      "loss": 0.0411,
      "step": 380580
    },
    {
      "epoch": 0.6228602475730379,
      "grad_norm": 0.5054699778556824,
      "learning_rate": 8.81196339028617e-06,
      "loss": 0.0401,
      "step": 380600
    },
    {
      "epoch": 0.6228929780116913,
      "grad_norm": 1.8123364448547363,
      "learning_rate": 8.811897498072654e-06,
      "loss": 0.0428,
      "step": 380620
    },
    {
      "epoch": 0.6229257084503447,
      "grad_norm": 0.9359804391860962,
      "learning_rate": 8.811831605859137e-06,
      "loss": 0.0457,
      "step": 380640
    },
    {
      "epoch": 0.622958438888998,
      "grad_norm": 2.291097640991211,
      "learning_rate": 8.81176571364562e-06,
      "loss": 0.037,
      "step": 380660
    },
    {
      "epoch": 0.6229911693276513,
      "grad_norm": 0.8430635929107666,
      "learning_rate": 8.811699821432103e-06,
      "loss": 0.0275,
      "step": 380680
    },
    {
      "epoch": 0.6230238997663047,
      "grad_norm": 0.9919478297233582,
      "learning_rate": 8.811633929218585e-06,
      "loss": 0.0302,
      "step": 380700
    },
    {
      "epoch": 0.623056630204958,
      "grad_norm": 1.3057291507720947,
      "learning_rate": 8.811568037005068e-06,
      "loss": 0.0427,
      "step": 380720
    },
    {
      "epoch": 0.6230893606436113,
      "grad_norm": 5.369714736938477,
      "learning_rate": 8.81150214479155e-06,
      "loss": 0.0259,
      "step": 380740
    },
    {
      "epoch": 0.6231220910822647,
      "grad_norm": 0.8781946301460266,
      "learning_rate": 8.811436252578034e-06,
      "loss": 0.0382,
      "step": 380760
    },
    {
      "epoch": 0.6231548215209181,
      "grad_norm": 0.4535585045814514,
      "learning_rate": 8.811370360364516e-06,
      "loss": 0.0462,
      "step": 380780
    },
    {
      "epoch": 0.6231875519595713,
      "grad_norm": 0.8352216482162476,
      "learning_rate": 8.811304468151e-06,
      "loss": 0.0349,
      "step": 380800
    },
    {
      "epoch": 0.6232202823982247,
      "grad_norm": 1.1974070072174072,
      "learning_rate": 8.811238575937481e-06,
      "loss": 0.0352,
      "step": 380820
    },
    {
      "epoch": 0.6232530128368781,
      "grad_norm": 2.9884581565856934,
      "learning_rate": 8.811172683723965e-06,
      "loss": 0.0374,
      "step": 380840
    },
    {
      "epoch": 0.6232857432755313,
      "grad_norm": 0.4644445776939392,
      "learning_rate": 8.811106791510448e-06,
      "loss": 0.0393,
      "step": 380860
    },
    {
      "epoch": 0.6233184737141847,
      "grad_norm": 3.866187572479248,
      "learning_rate": 8.81104089929693e-06,
      "loss": 0.0345,
      "step": 380880
    },
    {
      "epoch": 0.6233512041528381,
      "grad_norm": 1.3733068704605103,
      "learning_rate": 8.810975007083414e-06,
      "loss": 0.0441,
      "step": 380900
    },
    {
      "epoch": 0.6233839345914914,
      "grad_norm": 2.214731216430664,
      "learning_rate": 8.810909114869897e-06,
      "loss": 0.0317,
      "step": 380920
    },
    {
      "epoch": 0.6234166650301447,
      "grad_norm": 2.1028480529785156,
      "learning_rate": 8.81084322265638e-06,
      "loss": 0.0371,
      "step": 380940
    },
    {
      "epoch": 0.6234493954687981,
      "grad_norm": 0.9406535029411316,
      "learning_rate": 8.810777330442863e-06,
      "loss": 0.0269,
      "step": 380960
    },
    {
      "epoch": 0.6234821259074514,
      "grad_norm": 0.9248241186141968,
      "learning_rate": 8.810711438229345e-06,
      "loss": 0.029,
      "step": 380980
    },
    {
      "epoch": 0.6235148563461047,
      "grad_norm": 1.1561365127563477,
      "learning_rate": 8.810645546015828e-06,
      "loss": 0.0307,
      "step": 381000
    },
    {
      "epoch": 0.6235475867847581,
      "grad_norm": 0.2887299954891205,
      "learning_rate": 8.810579653802312e-06,
      "loss": 0.0328,
      "step": 381020
    },
    {
      "epoch": 0.6235803172234115,
      "grad_norm": 1.5110986232757568,
      "learning_rate": 8.810513761588794e-06,
      "loss": 0.0393,
      "step": 381040
    },
    {
      "epoch": 0.6236130476620647,
      "grad_norm": 0.9348236918449402,
      "learning_rate": 8.810447869375277e-06,
      "loss": 0.0447,
      "step": 381060
    },
    {
      "epoch": 0.6236457781007181,
      "grad_norm": 3.2468953132629395,
      "learning_rate": 8.81038197716176e-06,
      "loss": 0.0241,
      "step": 381080
    },
    {
      "epoch": 0.6236785085393715,
      "grad_norm": 1.0232422351837158,
      "learning_rate": 8.810316084948243e-06,
      "loss": 0.0242,
      "step": 381100
    },
    {
      "epoch": 0.6237112389780248,
      "grad_norm": 7.62849235534668,
      "learning_rate": 8.810250192734725e-06,
      "loss": 0.0302,
      "step": 381120
    },
    {
      "epoch": 0.6237439694166781,
      "grad_norm": 1.2641018629074097,
      "learning_rate": 8.810184300521208e-06,
      "loss": 0.0328,
      "step": 381140
    },
    {
      "epoch": 0.6237766998553315,
      "grad_norm": 3.6412858963012695,
      "learning_rate": 8.81011840830769e-06,
      "loss": 0.0358,
      "step": 381160
    },
    {
      "epoch": 0.6238094302939848,
      "grad_norm": 0.4522773027420044,
      "learning_rate": 8.810052516094174e-06,
      "loss": 0.0452,
      "step": 381180
    },
    {
      "epoch": 0.6238421607326381,
      "grad_norm": 3.034541606903076,
      "learning_rate": 8.809986623880656e-06,
      "loss": 0.0322,
      "step": 381200
    },
    {
      "epoch": 0.6238748911712915,
      "grad_norm": 0.9634844660758972,
      "learning_rate": 8.80992073166714e-06,
      "loss": 0.0365,
      "step": 381220
    },
    {
      "epoch": 0.6239076216099448,
      "grad_norm": 0.805525004863739,
      "learning_rate": 8.809854839453623e-06,
      "loss": 0.027,
      "step": 381240
    },
    {
      "epoch": 0.6239403520485981,
      "grad_norm": 0.2051856964826584,
      "learning_rate": 8.809788947240105e-06,
      "loss": 0.0324,
      "step": 381260
    },
    {
      "epoch": 0.6239730824872515,
      "grad_norm": 0.28876063227653503,
      "learning_rate": 8.809723055026588e-06,
      "loss": 0.036,
      "step": 381280
    },
    {
      "epoch": 0.6240058129259048,
      "grad_norm": 0.9760788083076477,
      "learning_rate": 8.809657162813072e-06,
      "loss": 0.0374,
      "step": 381300
    },
    {
      "epoch": 0.6240385433645582,
      "grad_norm": 2.0580551624298096,
      "learning_rate": 8.809591270599554e-06,
      "loss": 0.0365,
      "step": 381320
    },
    {
      "epoch": 0.6240712738032115,
      "grad_norm": 0.6786676049232483,
      "learning_rate": 8.809525378386037e-06,
      "loss": 0.0329,
      "step": 381340
    },
    {
      "epoch": 0.6241040042418649,
      "grad_norm": 1.142781138420105,
      "learning_rate": 8.809459486172521e-06,
      "loss": 0.0397,
      "step": 381360
    },
    {
      "epoch": 0.6241367346805182,
      "grad_norm": 1.7002633810043335,
      "learning_rate": 8.809393593959003e-06,
      "loss": 0.0363,
      "step": 381380
    },
    {
      "epoch": 0.6241694651191715,
      "grad_norm": 0.668043851852417,
      "learning_rate": 8.809327701745486e-06,
      "loss": 0.0354,
      "step": 381400
    },
    {
      "epoch": 0.6242021955578249,
      "grad_norm": 1.0333669185638428,
      "learning_rate": 8.809261809531968e-06,
      "loss": 0.0514,
      "step": 381420
    },
    {
      "epoch": 0.6242349259964782,
      "grad_norm": 0.5553584098815918,
      "learning_rate": 8.809195917318452e-06,
      "loss": 0.034,
      "step": 381440
    },
    {
      "epoch": 0.6242676564351315,
      "grad_norm": 1.5705534219741821,
      "learning_rate": 8.809130025104934e-06,
      "loss": 0.036,
      "step": 381460
    },
    {
      "epoch": 0.6243003868737849,
      "grad_norm": 2.7189812660217285,
      "learning_rate": 8.809064132891417e-06,
      "loss": 0.0362,
      "step": 381480
    },
    {
      "epoch": 0.6243331173124382,
      "grad_norm": 1.1933223009109497,
      "learning_rate": 8.8089982406779e-06,
      "loss": 0.0234,
      "step": 381500
    },
    {
      "epoch": 0.6243658477510916,
      "grad_norm": 1.6223002672195435,
      "learning_rate": 8.808932348464383e-06,
      "loss": 0.028,
      "step": 381520
    },
    {
      "epoch": 0.6243985781897449,
      "grad_norm": 3.497490882873535,
      "learning_rate": 8.808866456250865e-06,
      "loss": 0.0479,
      "step": 381540
    },
    {
      "epoch": 0.6244313086283982,
      "grad_norm": 1.027403473854065,
      "learning_rate": 8.808800564037348e-06,
      "loss": 0.0441,
      "step": 381560
    },
    {
      "epoch": 0.6244640390670516,
      "grad_norm": 1.7594635486602783,
      "learning_rate": 8.80873467182383e-06,
      "loss": 0.0304,
      "step": 381580
    },
    {
      "epoch": 0.6244967695057049,
      "grad_norm": 1.246897578239441,
      "learning_rate": 8.808668779610314e-06,
      "loss": 0.0376,
      "step": 381600
    },
    {
      "epoch": 0.6245294999443582,
      "grad_norm": 0.5200256109237671,
      "learning_rate": 8.808602887396796e-06,
      "loss": 0.0289,
      "step": 381620
    },
    {
      "epoch": 0.6245622303830116,
      "grad_norm": 1.612392783164978,
      "learning_rate": 8.80853699518328e-06,
      "loss": 0.0351,
      "step": 381640
    },
    {
      "epoch": 0.6245949608216649,
      "grad_norm": 1.1275367736816406,
      "learning_rate": 8.808471102969763e-06,
      "loss": 0.0402,
      "step": 381660
    },
    {
      "epoch": 0.6246276912603183,
      "grad_norm": 1.258239507675171,
      "learning_rate": 8.808405210756245e-06,
      "loss": 0.0369,
      "step": 381680
    },
    {
      "epoch": 0.6246604216989716,
      "grad_norm": 0.9902604222297668,
      "learning_rate": 8.808339318542728e-06,
      "loss": 0.0342,
      "step": 381700
    },
    {
      "epoch": 0.624693152137625,
      "grad_norm": 0.5986449718475342,
      "learning_rate": 8.808273426329212e-06,
      "loss": 0.0348,
      "step": 381720
    },
    {
      "epoch": 0.6247258825762783,
      "grad_norm": 2.1446402072906494,
      "learning_rate": 8.808207534115694e-06,
      "loss": 0.0429,
      "step": 381740
    },
    {
      "epoch": 0.6247586130149316,
      "grad_norm": 2.113814353942871,
      "learning_rate": 8.808141641902178e-06,
      "loss": 0.035,
      "step": 381760
    },
    {
      "epoch": 0.624791343453585,
      "grad_norm": 0.6796018481254578,
      "learning_rate": 8.808075749688661e-06,
      "loss": 0.0369,
      "step": 381780
    },
    {
      "epoch": 0.6248240738922383,
      "grad_norm": 0.9172894358634949,
      "learning_rate": 8.808009857475143e-06,
      "loss": 0.033,
      "step": 381800
    },
    {
      "epoch": 0.6248568043308916,
      "grad_norm": 1.05357825756073,
      "learning_rate": 8.807943965261627e-06,
      "loss": 0.0327,
      "step": 381820
    },
    {
      "epoch": 0.624889534769545,
      "grad_norm": 5.978453159332275,
      "learning_rate": 8.807878073048108e-06,
      "loss": 0.0484,
      "step": 381840
    },
    {
      "epoch": 0.6249222652081983,
      "grad_norm": 0.53505939245224,
      "learning_rate": 8.807812180834592e-06,
      "loss": 0.0252,
      "step": 381860
    },
    {
      "epoch": 0.6249549956468516,
      "grad_norm": 1.27992582321167,
      "learning_rate": 8.807746288621074e-06,
      "loss": 0.0335,
      "step": 381880
    },
    {
      "epoch": 0.624987726085505,
      "grad_norm": 0.5283344388008118,
      "learning_rate": 8.807680396407558e-06,
      "loss": 0.0346,
      "step": 381900
    },
    {
      "epoch": 0.6250204565241584,
      "grad_norm": 1.3358303308486938,
      "learning_rate": 8.80761450419404e-06,
      "loss": 0.0414,
      "step": 381920
    },
    {
      "epoch": 0.6250531869628116,
      "grad_norm": 0.804055392742157,
      "learning_rate": 8.807548611980523e-06,
      "loss": 0.0213,
      "step": 381940
    },
    {
      "epoch": 0.625085917401465,
      "grad_norm": 0.6161623597145081,
      "learning_rate": 8.807482719767005e-06,
      "loss": 0.0357,
      "step": 381960
    },
    {
      "epoch": 0.6251186478401184,
      "grad_norm": 0.33807870745658875,
      "learning_rate": 8.807416827553488e-06,
      "loss": 0.0442,
      "step": 381980
    },
    {
      "epoch": 0.6251513782787717,
      "grad_norm": 1.618050217628479,
      "learning_rate": 8.80735093533997e-06,
      "loss": 0.0309,
      "step": 382000
    },
    {
      "epoch": 0.625184108717425,
      "grad_norm": 0.5708260536193848,
      "learning_rate": 8.807285043126454e-06,
      "loss": 0.0247,
      "step": 382020
    },
    {
      "epoch": 0.6252168391560784,
      "grad_norm": 1.1331340074539185,
      "learning_rate": 8.807219150912938e-06,
      "loss": 0.0377,
      "step": 382040
    },
    {
      "epoch": 0.6252495695947317,
      "grad_norm": 0.6152474880218506,
      "learning_rate": 8.80715325869942e-06,
      "loss": 0.0342,
      "step": 382060
    },
    {
      "epoch": 0.625282300033385,
      "grad_norm": 0.5879475474357605,
      "learning_rate": 8.807087366485903e-06,
      "loss": 0.054,
      "step": 382080
    },
    {
      "epoch": 0.6253150304720384,
      "grad_norm": 1.300726294517517,
      "learning_rate": 8.807021474272387e-06,
      "loss": 0.0304,
      "step": 382100
    },
    {
      "epoch": 0.6253477609106918,
      "grad_norm": 0.733055830001831,
      "learning_rate": 8.806955582058869e-06,
      "loss": 0.0253,
      "step": 382120
    },
    {
      "epoch": 0.625380491349345,
      "grad_norm": 0.5370354652404785,
      "learning_rate": 8.806889689845352e-06,
      "loss": 0.0385,
      "step": 382140
    },
    {
      "epoch": 0.6254132217879984,
      "grad_norm": 1.8423347473144531,
      "learning_rate": 8.806823797631836e-06,
      "loss": 0.0386,
      "step": 382160
    },
    {
      "epoch": 0.6254459522266518,
      "grad_norm": 0.5727786421775818,
      "learning_rate": 8.806757905418318e-06,
      "loss": 0.0328,
      "step": 382180
    },
    {
      "epoch": 0.625478682665305,
      "grad_norm": 2.726937770843506,
      "learning_rate": 8.806692013204801e-06,
      "loss": 0.0376,
      "step": 382200
    },
    {
      "epoch": 0.6255114131039584,
      "grad_norm": 0.6057122349739075,
      "learning_rate": 8.806626120991283e-06,
      "loss": 0.0258,
      "step": 382220
    },
    {
      "epoch": 0.6255441435426118,
      "grad_norm": 0.1829804629087448,
      "learning_rate": 8.806560228777767e-06,
      "loss": 0.0373,
      "step": 382240
    },
    {
      "epoch": 0.625576873981265,
      "grad_norm": 0.866689920425415,
      "learning_rate": 8.806494336564249e-06,
      "loss": 0.0331,
      "step": 382260
    },
    {
      "epoch": 0.6256096044199184,
      "grad_norm": Infinity,
      "learning_rate": 8.806428444350732e-06,
      "loss": 0.0372,
      "step": 382280
    },
    {
      "epoch": 0.6256423348585718,
      "grad_norm": 1.1374447345733643,
      "learning_rate": 8.806362552137214e-06,
      "loss": 0.0297,
      "step": 382300
    },
    {
      "epoch": 0.6256750652972252,
      "grad_norm": 1.412558674812317,
      "learning_rate": 8.806296659923698e-06,
      "loss": 0.0345,
      "step": 382320
    },
    {
      "epoch": 0.6257077957358784,
      "grad_norm": 0.6857057213783264,
      "learning_rate": 8.80623076771018e-06,
      "loss": 0.0254,
      "step": 382340
    },
    {
      "epoch": 0.6257405261745318,
      "grad_norm": 0.40901562571525574,
      "learning_rate": 8.806164875496663e-06,
      "loss": 0.0354,
      "step": 382360
    },
    {
      "epoch": 0.6257732566131852,
      "grad_norm": 1.3842042684555054,
      "learning_rate": 8.806098983283145e-06,
      "loss": 0.0302,
      "step": 382380
    },
    {
      "epoch": 0.6258059870518384,
      "grad_norm": 1.2913098335266113,
      "learning_rate": 8.806033091069629e-06,
      "loss": 0.029,
      "step": 382400
    },
    {
      "epoch": 0.6258387174904918,
      "grad_norm": 4.868739128112793,
      "learning_rate": 8.805967198856112e-06,
      "loss": 0.0475,
      "step": 382420
    },
    {
      "epoch": 0.6258714479291452,
      "grad_norm": 0.525276780128479,
      "learning_rate": 8.805901306642594e-06,
      "loss": 0.0305,
      "step": 382440
    },
    {
      "epoch": 0.6259041783677984,
      "grad_norm": 2.582807779312134,
      "learning_rate": 8.805835414429078e-06,
      "loss": 0.0378,
      "step": 382460
    },
    {
      "epoch": 0.6259369088064518,
      "grad_norm": 1.205717921257019,
      "learning_rate": 8.80576952221556e-06,
      "loss": 0.0453,
      "step": 382480
    },
    {
      "epoch": 0.6259696392451052,
      "grad_norm": 1.2558141946792603,
      "learning_rate": 8.805703630002043e-06,
      "loss": 0.0379,
      "step": 382500
    },
    {
      "epoch": 0.6260023696837586,
      "grad_norm": 1.110508680343628,
      "learning_rate": 8.805637737788527e-06,
      "loss": 0.0287,
      "step": 382520
    },
    {
      "epoch": 0.6260351001224118,
      "grad_norm": 1.422056794166565,
      "learning_rate": 8.805571845575009e-06,
      "loss": 0.0321,
      "step": 382540
    },
    {
      "epoch": 0.6260678305610652,
      "grad_norm": 1.9402318000793457,
      "learning_rate": 8.805505953361492e-06,
      "loss": 0.0303,
      "step": 382560
    },
    {
      "epoch": 0.6261005609997186,
      "grad_norm": 0.9303567409515381,
      "learning_rate": 8.805440061147976e-06,
      "loss": 0.0363,
      "step": 382580
    },
    {
      "epoch": 0.6261332914383718,
      "grad_norm": 0.675416886806488,
      "learning_rate": 8.805374168934458e-06,
      "loss": 0.0414,
      "step": 382600
    },
    {
      "epoch": 0.6261660218770252,
      "grad_norm": 0.8035568594932556,
      "learning_rate": 8.805308276720941e-06,
      "loss": 0.0403,
      "step": 382620
    },
    {
      "epoch": 0.6261987523156786,
      "grad_norm": 3.0341672897338867,
      "learning_rate": 8.805242384507423e-06,
      "loss": 0.0372,
      "step": 382640
    },
    {
      "epoch": 0.6262314827543318,
      "grad_norm": 0.24049034714698792,
      "learning_rate": 8.805176492293907e-06,
      "loss": 0.0459,
      "step": 382660
    },
    {
      "epoch": 0.6262642131929852,
      "grad_norm": 0.7722115516662598,
      "learning_rate": 8.805110600080389e-06,
      "loss": 0.0444,
      "step": 382680
    },
    {
      "epoch": 0.6262969436316386,
      "grad_norm": 2.654853343963623,
      "learning_rate": 8.805044707866872e-06,
      "loss": 0.046,
      "step": 382700
    },
    {
      "epoch": 0.6263296740702919,
      "grad_norm": 0.4453188180923462,
      "learning_rate": 8.804978815653354e-06,
      "loss": 0.0386,
      "step": 382720
    },
    {
      "epoch": 0.6263624045089452,
      "grad_norm": 1.875982642173767,
      "learning_rate": 8.804912923439838e-06,
      "loss": 0.0376,
      "step": 382740
    },
    {
      "epoch": 0.6263951349475986,
      "grad_norm": 0.7212408185005188,
      "learning_rate": 8.804847031226321e-06,
      "loss": 0.0323,
      "step": 382760
    },
    {
      "epoch": 0.626427865386252,
      "grad_norm": 1.0116453170776367,
      "learning_rate": 8.804781139012803e-06,
      "loss": 0.0427,
      "step": 382780
    },
    {
      "epoch": 0.6264605958249052,
      "grad_norm": 0.6532541513442993,
      "learning_rate": 8.804715246799287e-06,
      "loss": 0.0277,
      "step": 382800
    },
    {
      "epoch": 0.6264933262635586,
      "grad_norm": 1.6363345384597778,
      "learning_rate": 8.804649354585769e-06,
      "loss": 0.0382,
      "step": 382820
    },
    {
      "epoch": 0.626526056702212,
      "grad_norm": 0.6166199445724487,
      "learning_rate": 8.804583462372252e-06,
      "loss": 0.0452,
      "step": 382840
    },
    {
      "epoch": 0.6265587871408652,
      "grad_norm": 0.6467674374580383,
      "learning_rate": 8.804517570158734e-06,
      "loss": 0.0308,
      "step": 382860
    },
    {
      "epoch": 0.6265915175795186,
      "grad_norm": 3.740147352218628,
      "learning_rate": 8.804451677945218e-06,
      "loss": 0.0416,
      "step": 382880
    },
    {
      "epoch": 0.626624248018172,
      "grad_norm": 0.9274053573608398,
      "learning_rate": 8.804385785731701e-06,
      "loss": 0.0376,
      "step": 382900
    },
    {
      "epoch": 0.6266569784568253,
      "grad_norm": 0.3327268660068512,
      "learning_rate": 8.804319893518183e-06,
      "loss": 0.0292,
      "step": 382920
    },
    {
      "epoch": 0.6266897088954786,
      "grad_norm": 3.1907670497894287,
      "learning_rate": 8.804254001304667e-06,
      "loss": 0.03,
      "step": 382940
    },
    {
      "epoch": 0.626722439334132,
      "grad_norm": 1.3747957944869995,
      "learning_rate": 8.80418810909115e-06,
      "loss": 0.0315,
      "step": 382960
    },
    {
      "epoch": 0.6267551697727853,
      "grad_norm": 0.18466322124004364,
      "learning_rate": 8.804122216877632e-06,
      "loss": 0.0305,
      "step": 382980
    },
    {
      "epoch": 0.6267879002114386,
      "grad_norm": 1.2127928733825684,
      "learning_rate": 8.804056324664116e-06,
      "loss": 0.0319,
      "step": 383000
    },
    {
      "epoch": 0.626820630650092,
      "grad_norm": 0.7502550482749939,
      "learning_rate": 8.803990432450598e-06,
      "loss": 0.0326,
      "step": 383020
    },
    {
      "epoch": 0.6268533610887453,
      "grad_norm": 0.8273717164993286,
      "learning_rate": 8.803924540237081e-06,
      "loss": 0.0345,
      "step": 383040
    },
    {
      "epoch": 0.6268860915273986,
      "grad_norm": 0.6885294914245605,
      "learning_rate": 8.803858648023563e-06,
      "loss": 0.0344,
      "step": 383060
    },
    {
      "epoch": 0.626918821966052,
      "grad_norm": 1.2427316904067993,
      "learning_rate": 8.803792755810047e-06,
      "loss": 0.0409,
      "step": 383080
    },
    {
      "epoch": 0.6269515524047053,
      "grad_norm": 0.6191160082817078,
      "learning_rate": 8.80372686359653e-06,
      "loss": 0.0373,
      "step": 383100
    },
    {
      "epoch": 0.6269842828433587,
      "grad_norm": 0.7243102788925171,
      "learning_rate": 8.803660971383012e-06,
      "loss": 0.037,
      "step": 383120
    },
    {
      "epoch": 0.627017013282012,
      "grad_norm": 0.9710973501205444,
      "learning_rate": 8.803595079169496e-06,
      "loss": 0.0326,
      "step": 383140
    },
    {
      "epoch": 0.6270497437206654,
      "grad_norm": 0.39134296774864197,
      "learning_rate": 8.803529186955978e-06,
      "loss": 0.0213,
      "step": 383160
    },
    {
      "epoch": 0.6270824741593187,
      "grad_norm": 0.7987668514251709,
      "learning_rate": 8.803463294742461e-06,
      "loss": 0.0317,
      "step": 383180
    },
    {
      "epoch": 0.627115204597972,
      "grad_norm": 0.8300485610961914,
      "learning_rate": 8.803397402528943e-06,
      "loss": 0.0231,
      "step": 383200
    },
    {
      "epoch": 0.6271479350366254,
      "grad_norm": 0.4728811979293823,
      "learning_rate": 8.803331510315427e-06,
      "loss": 0.0487,
      "step": 383220
    },
    {
      "epoch": 0.6271806654752787,
      "grad_norm": 1.345276117324829,
      "learning_rate": 8.803265618101909e-06,
      "loss": 0.04,
      "step": 383240
    },
    {
      "epoch": 0.627213395913932,
      "grad_norm": 1.166176676750183,
      "learning_rate": 8.803199725888392e-06,
      "loss": 0.0298,
      "step": 383260
    },
    {
      "epoch": 0.6272461263525854,
      "grad_norm": 0.4351591467857361,
      "learning_rate": 8.803133833674876e-06,
      "loss": 0.0358,
      "step": 383280
    },
    {
      "epoch": 0.6272788567912387,
      "grad_norm": 2.2597782611846924,
      "learning_rate": 8.803067941461358e-06,
      "loss": 0.0398,
      "step": 383300
    },
    {
      "epoch": 0.6273115872298921,
      "grad_norm": 0.4005964696407318,
      "learning_rate": 8.803002049247841e-06,
      "loss": 0.0379,
      "step": 383320
    },
    {
      "epoch": 0.6273443176685454,
      "grad_norm": 3.0663812160491943,
      "learning_rate": 8.802936157034325e-06,
      "loss": 0.039,
      "step": 383340
    },
    {
      "epoch": 0.6273770481071987,
      "grad_norm": 1.4795069694519043,
      "learning_rate": 8.802870264820807e-06,
      "loss": 0.0295,
      "step": 383360
    },
    {
      "epoch": 0.6274097785458521,
      "grad_norm": 0.31201058626174927,
      "learning_rate": 8.80280437260729e-06,
      "loss": 0.0238,
      "step": 383380
    },
    {
      "epoch": 0.6274425089845054,
      "grad_norm": 1.451899766921997,
      "learning_rate": 8.802738480393772e-06,
      "loss": 0.0311,
      "step": 383400
    },
    {
      "epoch": 0.6274752394231587,
      "grad_norm": 0.6512359976768494,
      "learning_rate": 8.802672588180256e-06,
      "loss": 0.04,
      "step": 383420
    },
    {
      "epoch": 0.6275079698618121,
      "grad_norm": 0.5508325099945068,
      "learning_rate": 8.802606695966738e-06,
      "loss": 0.0315,
      "step": 383440
    },
    {
      "epoch": 0.6275407003004654,
      "grad_norm": 2.2437665462493896,
      "learning_rate": 8.802540803753221e-06,
      "loss": 0.0325,
      "step": 383460
    },
    {
      "epoch": 0.6275734307391188,
      "grad_norm": 1.6025643348693848,
      "learning_rate": 8.802474911539705e-06,
      "loss": 0.028,
      "step": 383480
    },
    {
      "epoch": 0.6276061611777721,
      "grad_norm": 0.5866665244102478,
      "learning_rate": 8.802409019326187e-06,
      "loss": 0.0553,
      "step": 383500
    },
    {
      "epoch": 0.6276388916164255,
      "grad_norm": 3.098358154296875,
      "learning_rate": 8.80234312711267e-06,
      "loss": 0.0311,
      "step": 383520
    },
    {
      "epoch": 0.6276716220550788,
      "grad_norm": 0.30599310994148254,
      "learning_rate": 8.802277234899152e-06,
      "loss": 0.0333,
      "step": 383540
    },
    {
      "epoch": 0.6277043524937321,
      "grad_norm": 0.8763003945350647,
      "learning_rate": 8.802211342685636e-06,
      "loss": 0.0509,
      "step": 383560
    },
    {
      "epoch": 0.6277370829323855,
      "grad_norm": 1.5979480743408203,
      "learning_rate": 8.802145450472118e-06,
      "loss": 0.0407,
      "step": 383580
    },
    {
      "epoch": 0.6277698133710388,
      "grad_norm": 3.301833391189575,
      "learning_rate": 8.802079558258601e-06,
      "loss": 0.029,
      "step": 383600
    },
    {
      "epoch": 0.6278025438096921,
      "grad_norm": 2.9074392318725586,
      "learning_rate": 8.802013666045083e-06,
      "loss": 0.0417,
      "step": 383620
    },
    {
      "epoch": 0.6278352742483455,
      "grad_norm": 3.239861488342285,
      "learning_rate": 8.801947773831567e-06,
      "loss": 0.031,
      "step": 383640
    },
    {
      "epoch": 0.6278680046869988,
      "grad_norm": 1.0038045644760132,
      "learning_rate": 8.801881881618049e-06,
      "loss": 0.0401,
      "step": 383660
    },
    {
      "epoch": 0.6279007351256521,
      "grad_norm": 1.3283672332763672,
      "learning_rate": 8.801815989404532e-06,
      "loss": 0.0407,
      "step": 383680
    },
    {
      "epoch": 0.6279334655643055,
      "grad_norm": 0.21653105318546295,
      "learning_rate": 8.801750097191016e-06,
      "loss": 0.0261,
      "step": 383700
    },
    {
      "epoch": 0.6279661960029588,
      "grad_norm": 1.6014286279678345,
      "learning_rate": 8.801684204977498e-06,
      "loss": 0.0356,
      "step": 383720
    },
    {
      "epoch": 0.6279989264416121,
      "grad_norm": 1.1452027559280396,
      "learning_rate": 8.801618312763981e-06,
      "loss": 0.0266,
      "step": 383740
    },
    {
      "epoch": 0.6280316568802655,
      "grad_norm": 1.6490790843963623,
      "learning_rate": 8.801552420550465e-06,
      "loss": 0.0417,
      "step": 383760
    },
    {
      "epoch": 0.6280643873189189,
      "grad_norm": 1.5328571796417236,
      "learning_rate": 8.801486528336947e-06,
      "loss": 0.0426,
      "step": 383780
    },
    {
      "epoch": 0.6280971177575722,
      "grad_norm": 0.5544687509536743,
      "learning_rate": 8.80142063612343e-06,
      "loss": 0.0304,
      "step": 383800
    },
    {
      "epoch": 0.6281298481962255,
      "grad_norm": 0.5836811065673828,
      "learning_rate": 8.801354743909914e-06,
      "loss": 0.0302,
      "step": 383820
    },
    {
      "epoch": 0.6281625786348789,
      "grad_norm": 0.2378256618976593,
      "learning_rate": 8.801288851696396e-06,
      "loss": 0.0298,
      "step": 383840
    },
    {
      "epoch": 0.6281953090735322,
      "grad_norm": 0.4837585985660553,
      "learning_rate": 8.80122295948288e-06,
      "loss": 0.0371,
      "step": 383860
    },
    {
      "epoch": 0.6282280395121855,
      "grad_norm": 2.650240182876587,
      "learning_rate": 8.801157067269361e-06,
      "loss": 0.0323,
      "step": 383880
    },
    {
      "epoch": 0.6282607699508389,
      "grad_norm": 2.4832661151885986,
      "learning_rate": 8.801091175055845e-06,
      "loss": 0.0424,
      "step": 383900
    },
    {
      "epoch": 0.6282935003894922,
      "grad_norm": 0.5136935710906982,
      "learning_rate": 8.801025282842327e-06,
      "loss": 0.0339,
      "step": 383920
    },
    {
      "epoch": 0.6283262308281455,
      "grad_norm": 2.84847354888916,
      "learning_rate": 8.80095939062881e-06,
      "loss": 0.0431,
      "step": 383940
    },
    {
      "epoch": 0.6283589612667989,
      "grad_norm": 0.4499645531177521,
      "learning_rate": 8.800893498415292e-06,
      "loss": 0.0427,
      "step": 383960
    },
    {
      "epoch": 0.6283916917054523,
      "grad_norm": 1.934946060180664,
      "learning_rate": 8.800827606201776e-06,
      "loss": 0.0407,
      "step": 383980
    },
    {
      "epoch": 0.6284244221441055,
      "grad_norm": 1.8627067804336548,
      "learning_rate": 8.800761713988258e-06,
      "loss": 0.0313,
      "step": 384000
    },
    {
      "epoch": 0.6284571525827589,
      "grad_norm": 3.538379430770874,
      "learning_rate": 8.800695821774741e-06,
      "loss": 0.0265,
      "step": 384020
    },
    {
      "epoch": 0.6284898830214123,
      "grad_norm": 0.7600488066673279,
      "learning_rate": 8.800629929561223e-06,
      "loss": 0.0399,
      "step": 384040
    },
    {
      "epoch": 0.6285226134600655,
      "grad_norm": 0.7180598378181458,
      "learning_rate": 8.800564037347707e-06,
      "loss": 0.0369,
      "step": 384060
    },
    {
      "epoch": 0.6285553438987189,
      "grad_norm": 0.8525444865226746,
      "learning_rate": 8.80049814513419e-06,
      "loss": 0.0411,
      "step": 384080
    },
    {
      "epoch": 0.6285880743373723,
      "grad_norm": 4.67479133605957,
      "learning_rate": 8.800432252920672e-06,
      "loss": 0.0375,
      "step": 384100
    },
    {
      "epoch": 0.6286208047760256,
      "grad_norm": 0.6999431848526001,
      "learning_rate": 8.800366360707156e-06,
      "loss": 0.0329,
      "step": 384120
    },
    {
      "epoch": 0.6286535352146789,
      "grad_norm": 2.411294460296631,
      "learning_rate": 8.80030046849364e-06,
      "loss": 0.0474,
      "step": 384140
    },
    {
      "epoch": 0.6286862656533323,
      "grad_norm": 2.0199687480926514,
      "learning_rate": 8.800234576280122e-06,
      "loss": 0.0304,
      "step": 384160
    },
    {
      "epoch": 0.6287189960919857,
      "grad_norm": 0.7200403809547424,
      "learning_rate": 8.800168684066605e-06,
      "loss": 0.0369,
      "step": 384180
    },
    {
      "epoch": 0.6287517265306389,
      "grad_norm": 0.9950710535049438,
      "learning_rate": 8.800102791853089e-06,
      "loss": 0.036,
      "step": 384200
    },
    {
      "epoch": 0.6287844569692923,
      "grad_norm": 1.315679669380188,
      "learning_rate": 8.80003689963957e-06,
      "loss": 0.0375,
      "step": 384220
    },
    {
      "epoch": 0.6288171874079457,
      "grad_norm": 1.5911822319030762,
      "learning_rate": 8.799971007426054e-06,
      "loss": 0.0499,
      "step": 384240
    },
    {
      "epoch": 0.6288499178465989,
      "grad_norm": 0.8928829431533813,
      "learning_rate": 8.799905115212536e-06,
      "loss": 0.0333,
      "step": 384260
    },
    {
      "epoch": 0.6288826482852523,
      "grad_norm": 2.042978525161743,
      "learning_rate": 8.79983922299902e-06,
      "loss": 0.0323,
      "step": 384280
    },
    {
      "epoch": 0.6289153787239057,
      "grad_norm": 3.801727056503296,
      "learning_rate": 8.799773330785502e-06,
      "loss": 0.0402,
      "step": 384300
    },
    {
      "epoch": 0.6289481091625589,
      "grad_norm": 1.9351928234100342,
      "learning_rate": 8.799707438571985e-06,
      "loss": 0.0366,
      "step": 384320
    },
    {
      "epoch": 0.6289808396012123,
      "grad_norm": 1.002389669418335,
      "learning_rate": 8.799641546358467e-06,
      "loss": 0.0328,
      "step": 384340
    },
    {
      "epoch": 0.6290135700398657,
      "grad_norm": 0.7045352458953857,
      "learning_rate": 8.79957565414495e-06,
      "loss": 0.0514,
      "step": 384360
    },
    {
      "epoch": 0.6290463004785191,
      "grad_norm": 5.415339469909668,
      "learning_rate": 8.799509761931433e-06,
      "loss": 0.0372,
      "step": 384380
    },
    {
      "epoch": 0.6290790309171723,
      "grad_norm": 0.7313079237937927,
      "learning_rate": 8.799443869717916e-06,
      "loss": 0.0416,
      "step": 384400
    },
    {
      "epoch": 0.6291117613558257,
      "grad_norm": 0.5847287774085999,
      "learning_rate": 8.799377977504398e-06,
      "loss": 0.0377,
      "step": 384420
    },
    {
      "epoch": 0.6291444917944791,
      "grad_norm": 0.9711441993713379,
      "learning_rate": 8.799312085290882e-06,
      "loss": 0.0359,
      "step": 384440
    },
    {
      "epoch": 0.6291772222331323,
      "grad_norm": 0.23987780511379242,
      "learning_rate": 8.799246193077363e-06,
      "loss": 0.0378,
      "step": 384460
    },
    {
      "epoch": 0.6292099526717857,
      "grad_norm": 2.6685006618499756,
      "learning_rate": 8.799180300863847e-06,
      "loss": 0.0306,
      "step": 384480
    },
    {
      "epoch": 0.6292426831104391,
      "grad_norm": 2.822568893432617,
      "learning_rate": 8.79911440865033e-06,
      "loss": 0.0388,
      "step": 384500
    },
    {
      "epoch": 0.6292754135490923,
      "grad_norm": 1.3796895742416382,
      "learning_rate": 8.799048516436813e-06,
      "loss": 0.0386,
      "step": 384520
    },
    {
      "epoch": 0.6293081439877457,
      "grad_norm": 0.9464880228042603,
      "learning_rate": 8.798982624223296e-06,
      "loss": 0.0352,
      "step": 384540
    },
    {
      "epoch": 0.6293408744263991,
      "grad_norm": 1.2781161069869995,
      "learning_rate": 8.79891673200978e-06,
      "loss": 0.0355,
      "step": 384560
    },
    {
      "epoch": 0.6293736048650524,
      "grad_norm": 0.9145380258560181,
      "learning_rate": 8.798850839796262e-06,
      "loss": 0.032,
      "step": 384580
    },
    {
      "epoch": 0.6294063353037057,
      "grad_norm": 1.5130095481872559,
      "learning_rate": 8.798784947582745e-06,
      "loss": 0.0345,
      "step": 384600
    },
    {
      "epoch": 0.6294390657423591,
      "grad_norm": 3.833146572113037,
      "learning_rate": 8.798719055369229e-06,
      "loss": 0.0373,
      "step": 384620
    },
    {
      "epoch": 0.6294717961810125,
      "grad_norm": 0.16882272064685822,
      "learning_rate": 8.79865316315571e-06,
      "loss": 0.0285,
      "step": 384640
    },
    {
      "epoch": 0.6295045266196657,
      "grad_norm": 1.112545371055603,
      "learning_rate": 8.798587270942194e-06,
      "loss": 0.0434,
      "step": 384660
    },
    {
      "epoch": 0.6295372570583191,
      "grad_norm": 0.5113058686256409,
      "learning_rate": 8.798521378728676e-06,
      "loss": 0.0328,
      "step": 384680
    },
    {
      "epoch": 0.6295699874969725,
      "grad_norm": 1.6555229425430298,
      "learning_rate": 8.79845548651516e-06,
      "loss": 0.0353,
      "step": 384700
    },
    {
      "epoch": 0.6296027179356257,
      "grad_norm": 0.792965829372406,
      "learning_rate": 8.798389594301642e-06,
      "loss": 0.0382,
      "step": 384720
    },
    {
      "epoch": 0.6296354483742791,
      "grad_norm": 0.5851159691810608,
      "learning_rate": 8.798323702088125e-06,
      "loss": 0.0377,
      "step": 384740
    },
    {
      "epoch": 0.6296681788129325,
      "grad_norm": 0.5037193894386292,
      "learning_rate": 8.798257809874607e-06,
      "loss": 0.0383,
      "step": 384760
    },
    {
      "epoch": 0.6297009092515858,
      "grad_norm": 1.0711861848831177,
      "learning_rate": 8.79819191766109e-06,
      "loss": 0.0316,
      "step": 384780
    },
    {
      "epoch": 0.6297336396902391,
      "grad_norm": 3.4794814586639404,
      "learning_rate": 8.798126025447573e-06,
      "loss": 0.0341,
      "step": 384800
    },
    {
      "epoch": 0.6297663701288925,
      "grad_norm": 0.9359208941459656,
      "learning_rate": 8.798060133234056e-06,
      "loss": 0.0371,
      "step": 384820
    },
    {
      "epoch": 0.6297991005675458,
      "grad_norm": 0.7374436259269714,
      "learning_rate": 8.797994241020538e-06,
      "loss": 0.0285,
      "step": 384840
    },
    {
      "epoch": 0.6298318310061991,
      "grad_norm": 0.9157986640930176,
      "learning_rate": 8.797928348807022e-06,
      "loss": 0.0288,
      "step": 384860
    },
    {
      "epoch": 0.6298645614448525,
      "grad_norm": 0.49486565589904785,
      "learning_rate": 8.797862456593505e-06,
      "loss": 0.0446,
      "step": 384880
    },
    {
      "epoch": 0.6298972918835058,
      "grad_norm": 3.9812099933624268,
      "learning_rate": 8.797796564379987e-06,
      "loss": 0.0413,
      "step": 384900
    },
    {
      "epoch": 0.6299300223221591,
      "grad_norm": 1.649292230606079,
      "learning_rate": 8.79773067216647e-06,
      "loss": 0.0414,
      "step": 384920
    },
    {
      "epoch": 0.6299627527608125,
      "grad_norm": 5.292860507965088,
      "learning_rate": 8.797664779952954e-06,
      "loss": 0.046,
      "step": 384940
    },
    {
      "epoch": 0.6299954831994659,
      "grad_norm": 2.68168568611145,
      "learning_rate": 8.797598887739436e-06,
      "loss": 0.0423,
      "step": 384960
    },
    {
      "epoch": 0.6300282136381192,
      "grad_norm": 1.4345331192016602,
      "learning_rate": 8.79753299552592e-06,
      "loss": 0.0374,
      "step": 384980
    },
    {
      "epoch": 0.6300609440767725,
      "grad_norm": 1.6947522163391113,
      "learning_rate": 8.797467103312403e-06,
      "loss": 0.0369,
      "step": 385000
    },
    {
      "epoch": 0.6300936745154259,
      "grad_norm": 1.1567497253417969,
      "learning_rate": 8.797401211098885e-06,
      "loss": 0.0365,
      "step": 385020
    },
    {
      "epoch": 0.6301264049540792,
      "grad_norm": 2.0581696033477783,
      "learning_rate": 8.797335318885369e-06,
      "loss": 0.0297,
      "step": 385040
    },
    {
      "epoch": 0.6301591353927325,
      "grad_norm": 1.638510823249817,
      "learning_rate": 8.79726942667185e-06,
      "loss": 0.0458,
      "step": 385060
    },
    {
      "epoch": 0.6301918658313859,
      "grad_norm": 0.8287516832351685,
      "learning_rate": 8.797203534458334e-06,
      "loss": 0.0362,
      "step": 385080
    },
    {
      "epoch": 0.6302245962700392,
      "grad_norm": 1.1712487936019897,
      "learning_rate": 8.797137642244816e-06,
      "loss": 0.0406,
      "step": 385100
    },
    {
      "epoch": 0.6302573267086925,
      "grad_norm": 1.8591853380203247,
      "learning_rate": 8.7970717500313e-06,
      "loss": 0.0424,
      "step": 385120
    },
    {
      "epoch": 0.6302900571473459,
      "grad_norm": 1.4065768718719482,
      "learning_rate": 8.797005857817782e-06,
      "loss": 0.0417,
      "step": 385140
    },
    {
      "epoch": 0.6303227875859992,
      "grad_norm": 0.5925549268722534,
      "learning_rate": 8.796939965604265e-06,
      "loss": 0.0376,
      "step": 385160
    },
    {
      "epoch": 0.6303555180246526,
      "grad_norm": 0.3290519416332245,
      "learning_rate": 8.796874073390747e-06,
      "loss": 0.0302,
      "step": 385180
    },
    {
      "epoch": 0.6303882484633059,
      "grad_norm": 0.9188979864120483,
      "learning_rate": 8.79680818117723e-06,
      "loss": 0.0333,
      "step": 385200
    },
    {
      "epoch": 0.6304209789019593,
      "grad_norm": 0.14242874085903168,
      "learning_rate": 8.796742288963714e-06,
      "loss": 0.0294,
      "step": 385220
    },
    {
      "epoch": 0.6304537093406126,
      "grad_norm": 2.701942205429077,
      "learning_rate": 8.796676396750196e-06,
      "loss": 0.0429,
      "step": 385240
    },
    {
      "epoch": 0.6304864397792659,
      "grad_norm": 0.40607354044914246,
      "learning_rate": 8.79661050453668e-06,
      "loss": 0.0419,
      "step": 385260
    },
    {
      "epoch": 0.6305191702179193,
      "grad_norm": 1.919924259185791,
      "learning_rate": 8.796544612323162e-06,
      "loss": 0.0359,
      "step": 385280
    },
    {
      "epoch": 0.6305519006565726,
      "grad_norm": 2.4099433422088623,
      "learning_rate": 8.796478720109645e-06,
      "loss": 0.0464,
      "step": 385300
    },
    {
      "epoch": 0.6305846310952259,
      "grad_norm": 0.5907853841781616,
      "learning_rate": 8.796412827896127e-06,
      "loss": 0.0395,
      "step": 385320
    },
    {
      "epoch": 0.6306173615338793,
      "grad_norm": 0.468144953250885,
      "learning_rate": 8.79634693568261e-06,
      "loss": 0.0339,
      "step": 385340
    },
    {
      "epoch": 0.6306500919725326,
      "grad_norm": 2.161813735961914,
      "learning_rate": 8.796281043469094e-06,
      "loss": 0.0259,
      "step": 385360
    },
    {
      "epoch": 0.630682822411186,
      "grad_norm": 1.3857022523880005,
      "learning_rate": 8.796215151255578e-06,
      "loss": 0.0358,
      "step": 385380
    },
    {
      "epoch": 0.6307155528498393,
      "grad_norm": 0.9606049060821533,
      "learning_rate": 8.79614925904206e-06,
      "loss": 0.0344,
      "step": 385400
    },
    {
      "epoch": 0.6307482832884926,
      "grad_norm": 0.8746187686920166,
      "learning_rate": 8.796083366828543e-06,
      "loss": 0.034,
      "step": 385420
    },
    {
      "epoch": 0.630781013727146,
      "grad_norm": 1.1693028211593628,
      "learning_rate": 8.796017474615025e-06,
      "loss": 0.044,
      "step": 385440
    },
    {
      "epoch": 0.6308137441657993,
      "grad_norm": 1.5011028051376343,
      "learning_rate": 8.795951582401509e-06,
      "loss": 0.0269,
      "step": 385460
    },
    {
      "epoch": 0.6308464746044526,
      "grad_norm": 0.6845325827598572,
      "learning_rate": 8.79588569018799e-06,
      "loss": 0.0449,
      "step": 385480
    },
    {
      "epoch": 0.630879205043106,
      "grad_norm": 1.0711439847946167,
      "learning_rate": 8.795819797974474e-06,
      "loss": 0.0308,
      "step": 385500
    },
    {
      "epoch": 0.6309119354817593,
      "grad_norm": 1.7914773225784302,
      "learning_rate": 8.795753905760956e-06,
      "loss": 0.034,
      "step": 385520
    },
    {
      "epoch": 0.6309446659204127,
      "grad_norm": 0.3995770215988159,
      "learning_rate": 8.79568801354744e-06,
      "loss": 0.0389,
      "step": 385540
    },
    {
      "epoch": 0.630977396359066,
      "grad_norm": 0.3036941885948181,
      "learning_rate": 8.795622121333923e-06,
      "loss": 0.038,
      "step": 385560
    },
    {
      "epoch": 0.6310101267977194,
      "grad_norm": 1.9345518350601196,
      "learning_rate": 8.795556229120405e-06,
      "loss": 0.033,
      "step": 385580
    },
    {
      "epoch": 0.6310428572363727,
      "grad_norm": 1.6444998979568481,
      "learning_rate": 8.795490336906889e-06,
      "loss": 0.0503,
      "step": 385600
    },
    {
      "epoch": 0.631075587675026,
      "grad_norm": 0.45221006870269775,
      "learning_rate": 8.79542444469337e-06,
      "loss": 0.0508,
      "step": 385620
    },
    {
      "epoch": 0.6311083181136794,
      "grad_norm": 1.0771182775497437,
      "learning_rate": 8.795358552479854e-06,
      "loss": 0.0336,
      "step": 385640
    },
    {
      "epoch": 0.6311410485523327,
      "grad_norm": 5.818704128265381,
      "learning_rate": 8.795292660266336e-06,
      "loss": 0.0399,
      "step": 385660
    },
    {
      "epoch": 0.631173778990986,
      "grad_norm": 1.5324641466140747,
      "learning_rate": 8.79522676805282e-06,
      "loss": 0.0307,
      "step": 385680
    },
    {
      "epoch": 0.6312065094296394,
      "grad_norm": 0.8530952334403992,
      "learning_rate": 8.795160875839302e-06,
      "loss": 0.0283,
      "step": 385700
    },
    {
      "epoch": 0.6312392398682927,
      "grad_norm": 2.788747549057007,
      "learning_rate": 8.795094983625785e-06,
      "loss": 0.0462,
      "step": 385720
    },
    {
      "epoch": 0.631271970306946,
      "grad_norm": 1.3276969194412231,
      "learning_rate": 8.795029091412269e-06,
      "loss": 0.0289,
      "step": 385740
    },
    {
      "epoch": 0.6313047007455994,
      "grad_norm": 0.10339522361755371,
      "learning_rate": 8.794963199198751e-06,
      "loss": 0.0317,
      "step": 385760
    },
    {
      "epoch": 0.6313374311842528,
      "grad_norm": 0.5063777565956116,
      "learning_rate": 8.794897306985234e-06,
      "loss": 0.0214,
      "step": 385780
    },
    {
      "epoch": 0.631370161622906,
      "grad_norm": 0.7294512391090393,
      "learning_rate": 8.794831414771718e-06,
      "loss": 0.0344,
      "step": 385800
    },
    {
      "epoch": 0.6314028920615594,
      "grad_norm": 0.4897614121437073,
      "learning_rate": 8.7947655225582e-06,
      "loss": 0.0384,
      "step": 385820
    },
    {
      "epoch": 0.6314356225002128,
      "grad_norm": 1.430608868598938,
      "learning_rate": 8.794699630344684e-06,
      "loss": 0.0271,
      "step": 385840
    },
    {
      "epoch": 0.631468352938866,
      "grad_norm": 1.2377415895462036,
      "learning_rate": 8.794633738131165e-06,
      "loss": 0.0342,
      "step": 385860
    },
    {
      "epoch": 0.6315010833775194,
      "grad_norm": 0.29724767804145813,
      "learning_rate": 8.794567845917649e-06,
      "loss": 0.028,
      "step": 385880
    },
    {
      "epoch": 0.6315338138161728,
      "grad_norm": 0.6779447197914124,
      "learning_rate": 8.794501953704131e-06,
      "loss": 0.0596,
      "step": 385900
    },
    {
      "epoch": 0.6315665442548261,
      "grad_norm": 0.3288224935531616,
      "learning_rate": 8.794436061490614e-06,
      "loss": 0.0371,
      "step": 385920
    },
    {
      "epoch": 0.6315992746934794,
      "grad_norm": 1.0896321535110474,
      "learning_rate": 8.794370169277098e-06,
      "loss": 0.0337,
      "step": 385940
    },
    {
      "epoch": 0.6316320051321328,
      "grad_norm": 0.9616267681121826,
      "learning_rate": 8.79430427706358e-06,
      "loss": 0.0324,
      "step": 385960
    },
    {
      "epoch": 0.6316647355707862,
      "grad_norm": 2.451343059539795,
      "learning_rate": 8.794238384850064e-06,
      "loss": 0.0362,
      "step": 385980
    },
    {
      "epoch": 0.6316974660094394,
      "grad_norm": 1.8048491477966309,
      "learning_rate": 8.794172492636545e-06,
      "loss": 0.0272,
      "step": 386000
    },
    {
      "epoch": 0.6317301964480928,
      "grad_norm": 0.33150535821914673,
      "learning_rate": 8.794106600423029e-06,
      "loss": 0.0449,
      "step": 386020
    },
    {
      "epoch": 0.6317629268867462,
      "grad_norm": 1.0736563205718994,
      "learning_rate": 8.794040708209511e-06,
      "loss": 0.0388,
      "step": 386040
    },
    {
      "epoch": 0.6317956573253994,
      "grad_norm": 1.4882510900497437,
      "learning_rate": 8.793974815995995e-06,
      "loss": 0.032,
      "step": 386060
    },
    {
      "epoch": 0.6318283877640528,
      "grad_norm": 0.5797619819641113,
      "learning_rate": 8.793908923782476e-06,
      "loss": 0.0317,
      "step": 386080
    },
    {
      "epoch": 0.6318611182027062,
      "grad_norm": 0.4398361146450043,
      "learning_rate": 8.79384303156896e-06,
      "loss": 0.0316,
      "step": 386100
    },
    {
      "epoch": 0.6318938486413594,
      "grad_norm": 0.910875141620636,
      "learning_rate": 8.793777139355444e-06,
      "loss": 0.0476,
      "step": 386120
    },
    {
      "epoch": 0.6319265790800128,
      "grad_norm": 0.4743026793003082,
      "learning_rate": 8.793711247141925e-06,
      "loss": 0.0415,
      "step": 386140
    },
    {
      "epoch": 0.6319593095186662,
      "grad_norm": 0.9096959233283997,
      "learning_rate": 8.793645354928409e-06,
      "loss": 0.0323,
      "step": 386160
    },
    {
      "epoch": 0.6319920399573196,
      "grad_norm": 1.187590479850769,
      "learning_rate": 8.793579462714893e-06,
      "loss": 0.0497,
      "step": 386180
    },
    {
      "epoch": 0.6320247703959728,
      "grad_norm": 1.2819340229034424,
      "learning_rate": 8.793513570501375e-06,
      "loss": 0.0388,
      "step": 386200
    },
    {
      "epoch": 0.6320575008346262,
      "grad_norm": 0.993074893951416,
      "learning_rate": 8.793447678287858e-06,
      "loss": 0.0269,
      "step": 386220
    },
    {
      "epoch": 0.6320902312732796,
      "grad_norm": 0.9323781728744507,
      "learning_rate": 8.79338178607434e-06,
      "loss": 0.0308,
      "step": 386240
    },
    {
      "epoch": 0.6321229617119328,
      "grad_norm": 0.9530290365219116,
      "learning_rate": 8.793315893860824e-06,
      "loss": 0.0363,
      "step": 386260
    },
    {
      "epoch": 0.6321556921505862,
      "grad_norm": 2.003044843673706,
      "learning_rate": 8.793250001647307e-06,
      "loss": 0.0334,
      "step": 386280
    },
    {
      "epoch": 0.6321884225892396,
      "grad_norm": 0.7075967192649841,
      "learning_rate": 8.793184109433789e-06,
      "loss": 0.0332,
      "step": 386300
    },
    {
      "epoch": 0.6322211530278928,
      "grad_norm": 1.675626277923584,
      "learning_rate": 8.793118217220273e-06,
      "loss": 0.035,
      "step": 386320
    },
    {
      "epoch": 0.6322538834665462,
      "grad_norm": 0.32852625846862793,
      "learning_rate": 8.793052325006755e-06,
      "loss": 0.0456,
      "step": 386340
    },
    {
      "epoch": 0.6322866139051996,
      "grad_norm": 0.5053362250328064,
      "learning_rate": 8.792986432793238e-06,
      "loss": 0.0321,
      "step": 386360
    },
    {
      "epoch": 0.632319344343853,
      "grad_norm": 1.8605237007141113,
      "learning_rate": 8.79292054057972e-06,
      "loss": 0.0411,
      "step": 386380
    },
    {
      "epoch": 0.6323520747825062,
      "grad_norm": 0.47413116693496704,
      "learning_rate": 8.792854648366204e-06,
      "loss": 0.031,
      "step": 386400
    },
    {
      "epoch": 0.6323848052211596,
      "grad_norm": 2.811356544494629,
      "learning_rate": 8.792788756152686e-06,
      "loss": 0.0322,
      "step": 386420
    },
    {
      "epoch": 0.632417535659813,
      "grad_norm": 0.7447865605354309,
      "learning_rate": 8.792722863939169e-06,
      "loss": 0.0353,
      "step": 386440
    },
    {
      "epoch": 0.6324502660984662,
      "grad_norm": 1.1054013967514038,
      "learning_rate": 8.792656971725651e-06,
      "loss": 0.0347,
      "step": 386460
    },
    {
      "epoch": 0.6324829965371196,
      "grad_norm": 0.3034622371196747,
      "learning_rate": 8.792591079512135e-06,
      "loss": 0.0358,
      "step": 386480
    },
    {
      "epoch": 0.632515726975773,
      "grad_norm": 1.986734390258789,
      "learning_rate": 8.792525187298616e-06,
      "loss": 0.032,
      "step": 386500
    },
    {
      "epoch": 0.6325484574144262,
      "grad_norm": 0.9014042019844055,
      "learning_rate": 8.7924592950851e-06,
      "loss": 0.044,
      "step": 386520
    },
    {
      "epoch": 0.6325811878530796,
      "grad_norm": 0.852011501789093,
      "learning_rate": 8.792393402871584e-06,
      "loss": 0.0372,
      "step": 386540
    },
    {
      "epoch": 0.632613918291733,
      "grad_norm": 1.106911301612854,
      "learning_rate": 8.792327510658066e-06,
      "loss": 0.0382,
      "step": 386560
    },
    {
      "epoch": 0.6326466487303862,
      "grad_norm": 0.7441660165786743,
      "learning_rate": 8.792261618444549e-06,
      "loss": 0.0263,
      "step": 386580
    },
    {
      "epoch": 0.6326793791690396,
      "grad_norm": 0.2599225342273712,
      "learning_rate": 8.792195726231033e-06,
      "loss": 0.0332,
      "step": 386600
    },
    {
      "epoch": 0.632712109607693,
      "grad_norm": 0.3911162316799164,
      "learning_rate": 8.792129834017515e-06,
      "loss": 0.0363,
      "step": 386620
    },
    {
      "epoch": 0.6327448400463463,
      "grad_norm": 0.5876170992851257,
      "learning_rate": 8.792063941803998e-06,
      "loss": 0.0244,
      "step": 386640
    },
    {
      "epoch": 0.6327775704849996,
      "grad_norm": 2.2371466159820557,
      "learning_rate": 8.791998049590482e-06,
      "loss": 0.0259,
      "step": 386660
    },
    {
      "epoch": 0.632810300923653,
      "grad_norm": 1.0179870128631592,
      "learning_rate": 8.791932157376964e-06,
      "loss": 0.0302,
      "step": 386680
    },
    {
      "epoch": 0.6328430313623064,
      "grad_norm": 1.9765276908874512,
      "learning_rate": 8.791866265163447e-06,
      "loss": 0.047,
      "step": 386700
    },
    {
      "epoch": 0.6328757618009596,
      "grad_norm": 1.082761526107788,
      "learning_rate": 8.791800372949929e-06,
      "loss": 0.0445,
      "step": 386720
    },
    {
      "epoch": 0.632908492239613,
      "grad_norm": 1.4457111358642578,
      "learning_rate": 8.791734480736413e-06,
      "loss": 0.03,
      "step": 386740
    },
    {
      "epoch": 0.6329412226782664,
      "grad_norm": 0.7205390930175781,
      "learning_rate": 8.791668588522895e-06,
      "loss": 0.0282,
      "step": 386760
    },
    {
      "epoch": 0.6329739531169196,
      "grad_norm": 0.354525089263916,
      "learning_rate": 8.791602696309378e-06,
      "loss": 0.0364,
      "step": 386780
    },
    {
      "epoch": 0.633006683555573,
      "grad_norm": 1.743484377861023,
      "learning_rate": 8.79153680409586e-06,
      "loss": 0.0381,
      "step": 386800
    },
    {
      "epoch": 0.6330394139942264,
      "grad_norm": 0.678246796131134,
      "learning_rate": 8.791470911882344e-06,
      "loss": 0.0291,
      "step": 386820
    },
    {
      "epoch": 0.6330721444328797,
      "grad_norm": 0.6414291858673096,
      "learning_rate": 8.791405019668826e-06,
      "loss": 0.0327,
      "step": 386840
    },
    {
      "epoch": 0.633104874871533,
      "grad_norm": 2.975700855255127,
      "learning_rate": 8.79133912745531e-06,
      "loss": 0.0389,
      "step": 386860
    },
    {
      "epoch": 0.6331376053101864,
      "grad_norm": 1.1571996212005615,
      "learning_rate": 8.791273235241791e-06,
      "loss": 0.0478,
      "step": 386880
    },
    {
      "epoch": 0.6331703357488397,
      "grad_norm": 0.4087332487106323,
      "learning_rate": 8.791207343028275e-06,
      "loss": 0.0274,
      "step": 386900
    },
    {
      "epoch": 0.633203066187493,
      "grad_norm": 1.109797477722168,
      "learning_rate": 8.791141450814758e-06,
      "loss": 0.0271,
      "step": 386920
    },
    {
      "epoch": 0.6332357966261464,
      "grad_norm": 0.20624884963035583,
      "learning_rate": 8.79107555860124e-06,
      "loss": 0.0307,
      "step": 386940
    },
    {
      "epoch": 0.6332685270647997,
      "grad_norm": 0.4075179398059845,
      "learning_rate": 8.791009666387724e-06,
      "loss": 0.0305,
      "step": 386960
    },
    {
      "epoch": 0.633301257503453,
      "grad_norm": 0.4783403277397156,
      "learning_rate": 8.790943774174207e-06,
      "loss": 0.036,
      "step": 386980
    },
    {
      "epoch": 0.6333339879421064,
      "grad_norm": 0.7779062390327454,
      "learning_rate": 8.79087788196069e-06,
      "loss": 0.0395,
      "step": 387000
    },
    {
      "epoch": 0.6333667183807598,
      "grad_norm": 0.681479275226593,
      "learning_rate": 8.790811989747173e-06,
      "loss": 0.0312,
      "step": 387020
    },
    {
      "epoch": 0.6333994488194131,
      "grad_norm": 0.8741452097892761,
      "learning_rate": 8.790746097533656e-06,
      "loss": 0.0362,
      "step": 387040
    },
    {
      "epoch": 0.6334321792580664,
      "grad_norm": 1.08255136013031,
      "learning_rate": 8.790680205320138e-06,
      "loss": 0.0316,
      "step": 387060
    },
    {
      "epoch": 0.6334649096967198,
      "grad_norm": 0.969882607460022,
      "learning_rate": 8.790614313106622e-06,
      "loss": 0.0331,
      "step": 387080
    },
    {
      "epoch": 0.6334976401353731,
      "grad_norm": 1.4221395254135132,
      "learning_rate": 8.790548420893104e-06,
      "loss": 0.0357,
      "step": 387100
    },
    {
      "epoch": 0.6335303705740264,
      "grad_norm": 3.8192193508148193,
      "learning_rate": 8.790482528679587e-06,
      "loss": 0.0309,
      "step": 387120
    },
    {
      "epoch": 0.6335631010126798,
      "grad_norm": 0.20673389732837677,
      "learning_rate": 8.79041663646607e-06,
      "loss": 0.0265,
      "step": 387140
    },
    {
      "epoch": 0.6335958314513331,
      "grad_norm": 0.6603655219078064,
      "learning_rate": 8.790350744252553e-06,
      "loss": 0.0262,
      "step": 387160
    },
    {
      "epoch": 0.6336285618899864,
      "grad_norm": 0.1343347728252411,
      "learning_rate": 8.790284852039035e-06,
      "loss": 0.0356,
      "step": 387180
    },
    {
      "epoch": 0.6336612923286398,
      "grad_norm": 1.1019655466079712,
      "learning_rate": 8.790218959825518e-06,
      "loss": 0.0424,
      "step": 387200
    },
    {
      "epoch": 0.6336940227672931,
      "grad_norm": 1.0245155096054077,
      "learning_rate": 8.790153067612e-06,
      "loss": 0.0333,
      "step": 387220
    },
    {
      "epoch": 0.6337267532059465,
      "grad_norm": 1.6882902383804321,
      "learning_rate": 8.790087175398484e-06,
      "loss": 0.0322,
      "step": 387240
    },
    {
      "epoch": 0.6337594836445998,
      "grad_norm": 1.1529871225357056,
      "learning_rate": 8.790021283184966e-06,
      "loss": 0.0384,
      "step": 387260
    },
    {
      "epoch": 0.6337922140832531,
      "grad_norm": 4.568589210510254,
      "learning_rate": 8.78995539097145e-06,
      "loss": 0.026,
      "step": 387280
    },
    {
      "epoch": 0.6338249445219065,
      "grad_norm": 0.8260684013366699,
      "learning_rate": 8.789889498757931e-06,
      "loss": 0.0368,
      "step": 387300
    },
    {
      "epoch": 0.6338576749605598,
      "grad_norm": 0.7325334548950195,
      "learning_rate": 8.789823606544415e-06,
      "loss": 0.0404,
      "step": 387320
    },
    {
      "epoch": 0.6338904053992132,
      "grad_norm": 1.0561771392822266,
      "learning_rate": 8.789757714330898e-06,
      "loss": 0.044,
      "step": 387340
    },
    {
      "epoch": 0.6339231358378665,
      "grad_norm": 1.0429351329803467,
      "learning_rate": 8.78969182211738e-06,
      "loss": 0.03,
      "step": 387360
    },
    {
      "epoch": 0.6339558662765198,
      "grad_norm": 1.3727689981460571,
      "learning_rate": 8.789625929903864e-06,
      "loss": 0.0449,
      "step": 387380
    },
    {
      "epoch": 0.6339885967151732,
      "grad_norm": 0.5953449606895447,
      "learning_rate": 8.789560037690347e-06,
      "loss": 0.0307,
      "step": 387400
    },
    {
      "epoch": 0.6340213271538265,
      "grad_norm": 1.0960359573364258,
      "learning_rate": 8.78949414547683e-06,
      "loss": 0.0315,
      "step": 387420
    },
    {
      "epoch": 0.6340540575924799,
      "grad_norm": 4.359765529632568,
      "learning_rate": 8.789428253263313e-06,
      "loss": 0.0271,
      "step": 387440
    },
    {
      "epoch": 0.6340867880311332,
      "grad_norm": 1.6673144102096558,
      "learning_rate": 8.789362361049796e-06,
      "loss": 0.0352,
      "step": 387460
    },
    {
      "epoch": 0.6341195184697865,
      "grad_norm": 0.9559906125068665,
      "learning_rate": 8.789296468836278e-06,
      "loss": 0.0558,
      "step": 387480
    },
    {
      "epoch": 0.6341522489084399,
      "grad_norm": 1.601717472076416,
      "learning_rate": 8.789230576622762e-06,
      "loss": 0.0399,
      "step": 387500
    },
    {
      "epoch": 0.6341849793470932,
      "grad_norm": 2.7374634742736816,
      "learning_rate": 8.789164684409244e-06,
      "loss": 0.044,
      "step": 387520
    },
    {
      "epoch": 0.6342177097857465,
      "grad_norm": 0.4775022566318512,
      "learning_rate": 8.789098792195727e-06,
      "loss": 0.0322,
      "step": 387540
    },
    {
      "epoch": 0.6342504402243999,
      "grad_norm": 3.855648994445801,
      "learning_rate": 8.78903289998221e-06,
      "loss": 0.0379,
      "step": 387560
    },
    {
      "epoch": 0.6342831706630532,
      "grad_norm": 1.1575109958648682,
      "learning_rate": 8.788967007768693e-06,
      "loss": 0.0386,
      "step": 387580
    },
    {
      "epoch": 0.6343159011017065,
      "grad_norm": 1.6192445755004883,
      "learning_rate": 8.788901115555175e-06,
      "loss": 0.0477,
      "step": 387600
    },
    {
      "epoch": 0.6343486315403599,
      "grad_norm": 0.5434444546699524,
      "learning_rate": 8.788835223341658e-06,
      "loss": 0.0446,
      "step": 387620
    },
    {
      "epoch": 0.6343813619790133,
      "grad_norm": 1.4806243181228638,
      "learning_rate": 8.78876933112814e-06,
      "loss": 0.0439,
      "step": 387640
    },
    {
      "epoch": 0.6344140924176666,
      "grad_norm": 1.8905681371688843,
      "learning_rate": 8.788703438914624e-06,
      "loss": 0.0434,
      "step": 387660
    },
    {
      "epoch": 0.6344468228563199,
      "grad_norm": 1.258213758468628,
      "learning_rate": 8.788637546701107e-06,
      "loss": 0.0452,
      "step": 387680
    },
    {
      "epoch": 0.6344795532949733,
      "grad_norm": 1.7208870649337769,
      "learning_rate": 8.78857165448759e-06,
      "loss": 0.0332,
      "step": 387700
    },
    {
      "epoch": 0.6345122837336266,
      "grad_norm": 0.48189473152160645,
      "learning_rate": 8.788505762274073e-06,
      "loss": 0.0265,
      "step": 387720
    },
    {
      "epoch": 0.6345450141722799,
      "grad_norm": 1.6755927801132202,
      "learning_rate": 8.788439870060555e-06,
      "loss": 0.0284,
      "step": 387740
    },
    {
      "epoch": 0.6345777446109333,
      "grad_norm": 3.3541462421417236,
      "learning_rate": 8.788373977847038e-06,
      "loss": 0.0409,
      "step": 387760
    },
    {
      "epoch": 0.6346104750495866,
      "grad_norm": 0.9125484228134155,
      "learning_rate": 8.788308085633522e-06,
      "loss": 0.0418,
      "step": 387780
    },
    {
      "epoch": 0.6346432054882399,
      "grad_norm": 1.4570140838623047,
      "learning_rate": 8.788242193420004e-06,
      "loss": 0.0381,
      "step": 387800
    },
    {
      "epoch": 0.6346759359268933,
      "grad_norm": 1.7144691944122314,
      "learning_rate": 8.788176301206487e-06,
      "loss": 0.0329,
      "step": 387820
    },
    {
      "epoch": 0.6347086663655467,
      "grad_norm": 2.0436112880706787,
      "learning_rate": 8.788110408992971e-06,
      "loss": 0.0377,
      "step": 387840
    },
    {
      "epoch": 0.6347413968041999,
      "grad_norm": 1.2908713817596436,
      "learning_rate": 8.788044516779453e-06,
      "loss": 0.0492,
      "step": 387860
    },
    {
      "epoch": 0.6347741272428533,
      "grad_norm": 1.6150630712509155,
      "learning_rate": 8.787978624565937e-06,
      "loss": 0.0288,
      "step": 387880
    },
    {
      "epoch": 0.6348068576815067,
      "grad_norm": 2.3872504234313965,
      "learning_rate": 8.787912732352418e-06,
      "loss": 0.0319,
      "step": 387900
    },
    {
      "epoch": 0.63483958812016,
      "grad_norm": 0.4743330776691437,
      "learning_rate": 8.787846840138902e-06,
      "loss": 0.0393,
      "step": 387920
    },
    {
      "epoch": 0.6348723185588133,
      "grad_norm": 1.041591763496399,
      "learning_rate": 8.787780947925384e-06,
      "loss": 0.0358,
      "step": 387940
    },
    {
      "epoch": 0.6349050489974667,
      "grad_norm": 1.888442873954773,
      "learning_rate": 8.787715055711867e-06,
      "loss": 0.0356,
      "step": 387960
    },
    {
      "epoch": 0.63493777943612,
      "grad_norm": 0.31603261828422546,
      "learning_rate": 8.78764916349835e-06,
      "loss": 0.0321,
      "step": 387980
    },
    {
      "epoch": 0.6349705098747733,
      "grad_norm": 0.4273313879966736,
      "learning_rate": 8.787583271284833e-06,
      "loss": 0.0317,
      "step": 388000
    },
    {
      "epoch": 0.6350032403134267,
      "grad_norm": 0.6237717866897583,
      "learning_rate": 8.787517379071317e-06,
      "loss": 0.0337,
      "step": 388020
    },
    {
      "epoch": 0.6350359707520801,
      "grad_norm": 2.720386505126953,
      "learning_rate": 8.787451486857798e-06,
      "loss": 0.0508,
      "step": 388040
    },
    {
      "epoch": 0.6350687011907333,
      "grad_norm": 0.8336261510848999,
      "learning_rate": 8.787385594644282e-06,
      "loss": 0.036,
      "step": 388060
    },
    {
      "epoch": 0.6351014316293867,
      "grad_norm": 0.8123838901519775,
      "learning_rate": 8.787319702430764e-06,
      "loss": 0.0379,
      "step": 388080
    },
    {
      "epoch": 0.6351341620680401,
      "grad_norm": 0.587012767791748,
      "learning_rate": 8.787253810217248e-06,
      "loss": 0.0407,
      "step": 388100
    },
    {
      "epoch": 0.6351668925066933,
      "grad_norm": 1.09939706325531,
      "learning_rate": 8.78718791800373e-06,
      "loss": 0.0398,
      "step": 388120
    },
    {
      "epoch": 0.6351996229453467,
      "grad_norm": 1.8264530897140503,
      "learning_rate": 8.787122025790213e-06,
      "loss": 0.0335,
      "step": 388140
    },
    {
      "epoch": 0.6352323533840001,
      "grad_norm": 2.754328966140747,
      "learning_rate": 8.787056133576697e-06,
      "loss": 0.0288,
      "step": 388160
    },
    {
      "epoch": 0.6352650838226533,
      "grad_norm": 0.23326906561851501,
      "learning_rate": 8.786990241363178e-06,
      "loss": 0.018,
      "step": 388180
    },
    {
      "epoch": 0.6352978142613067,
      "grad_norm": 1.4572232961654663,
      "learning_rate": 8.786924349149662e-06,
      "loss": 0.0417,
      "step": 388200
    },
    {
      "epoch": 0.6353305446999601,
      "grad_norm": 1.2444522380828857,
      "learning_rate": 8.786858456936146e-06,
      "loss": 0.0342,
      "step": 388220
    },
    {
      "epoch": 0.6353632751386135,
      "grad_norm": 2.7459235191345215,
      "learning_rate": 8.786792564722628e-06,
      "loss": 0.0363,
      "step": 388240
    },
    {
      "epoch": 0.6353960055772667,
      "grad_norm": 3.186847448348999,
      "learning_rate": 8.786726672509111e-06,
      "loss": 0.0368,
      "step": 388260
    },
    {
      "epoch": 0.6354287360159201,
      "grad_norm": 1.250044822692871,
      "learning_rate": 8.786660780295593e-06,
      "loss": 0.0307,
      "step": 388280
    },
    {
      "epoch": 0.6354614664545735,
      "grad_norm": 2.8272862434387207,
      "learning_rate": 8.786594888082077e-06,
      "loss": 0.0301,
      "step": 388300
    },
    {
      "epoch": 0.6354941968932267,
      "grad_norm": 0.9637138843536377,
      "learning_rate": 8.786528995868558e-06,
      "loss": 0.0335,
      "step": 388320
    },
    {
      "epoch": 0.6355269273318801,
      "grad_norm": 1.005326509475708,
      "learning_rate": 8.786463103655042e-06,
      "loss": 0.038,
      "step": 388340
    },
    {
      "epoch": 0.6355596577705335,
      "grad_norm": 0.711068868637085,
      "learning_rate": 8.786397211441524e-06,
      "loss": 0.0404,
      "step": 388360
    },
    {
      "epoch": 0.6355923882091867,
      "grad_norm": 1.9173128604888916,
      "learning_rate": 8.786331319228008e-06,
      "loss": 0.0441,
      "step": 388380
    },
    {
      "epoch": 0.6356251186478401,
      "grad_norm": 1.3906928300857544,
      "learning_rate": 8.786265427014491e-06,
      "loss": 0.0264,
      "step": 388400
    },
    {
      "epoch": 0.6356578490864935,
      "grad_norm": 3.36755108833313,
      "learning_rate": 8.786199534800973e-06,
      "loss": 0.0472,
      "step": 388420
    },
    {
      "epoch": 0.6356905795251468,
      "grad_norm": 3.0437512397766113,
      "learning_rate": 8.786133642587457e-06,
      "loss": 0.0339,
      "step": 388440
    },
    {
      "epoch": 0.6357233099638001,
      "grad_norm": 0.777528703212738,
      "learning_rate": 8.786067750373939e-06,
      "loss": 0.0343,
      "step": 388460
    },
    {
      "epoch": 0.6357560404024535,
      "grad_norm": 1.4489318132400513,
      "learning_rate": 8.786001858160422e-06,
      "loss": 0.026,
      "step": 388480
    },
    {
      "epoch": 0.6357887708411069,
      "grad_norm": 1.7649130821228027,
      "learning_rate": 8.785935965946904e-06,
      "loss": 0.0384,
      "step": 388500
    },
    {
      "epoch": 0.6358215012797601,
      "grad_norm": 0.8584762811660767,
      "learning_rate": 8.785870073733388e-06,
      "loss": 0.0241,
      "step": 388520
    },
    {
      "epoch": 0.6358542317184135,
      "grad_norm": 1.0352623462677002,
      "learning_rate": 8.78580418151987e-06,
      "loss": 0.0394,
      "step": 388540
    },
    {
      "epoch": 0.6358869621570669,
      "grad_norm": 1.4572408199310303,
      "learning_rate": 8.785738289306353e-06,
      "loss": 0.0385,
      "step": 388560
    },
    {
      "epoch": 0.6359196925957201,
      "grad_norm": 0.5662677884101868,
      "learning_rate": 8.785672397092837e-06,
      "loss": 0.0402,
      "step": 388580
    },
    {
      "epoch": 0.6359524230343735,
      "grad_norm": 1.5686099529266357,
      "learning_rate": 8.785606504879319e-06,
      "loss": 0.0278,
      "step": 388600
    },
    {
      "epoch": 0.6359851534730269,
      "grad_norm": 1.223050594329834,
      "learning_rate": 8.785540612665802e-06,
      "loss": 0.0385,
      "step": 388620
    },
    {
      "epoch": 0.6360178839116802,
      "grad_norm": 0.892493486404419,
      "learning_rate": 8.785474720452286e-06,
      "loss": 0.03,
      "step": 388640
    },
    {
      "epoch": 0.6360506143503335,
      "grad_norm": 1.1119062900543213,
      "learning_rate": 8.785408828238768e-06,
      "loss": 0.0397,
      "step": 388660
    },
    {
      "epoch": 0.6360833447889869,
      "grad_norm": 0.5607866048812866,
      "learning_rate": 8.785342936025251e-06,
      "loss": 0.0433,
      "step": 388680
    },
    {
      "epoch": 0.6361160752276402,
      "grad_norm": 0.5944909453392029,
      "learning_rate": 8.785277043811733e-06,
      "loss": 0.0261,
      "step": 388700
    },
    {
      "epoch": 0.6361488056662935,
      "grad_norm": 1.6397262811660767,
      "learning_rate": 8.785211151598217e-06,
      "loss": 0.0318,
      "step": 388720
    },
    {
      "epoch": 0.6361815361049469,
      "grad_norm": 1.7568470239639282,
      "learning_rate": 8.7851452593847e-06,
      "loss": 0.0354,
      "step": 388740
    },
    {
      "epoch": 0.6362142665436002,
      "grad_norm": 0.4205484688282013,
      "learning_rate": 8.785079367171182e-06,
      "loss": 0.038,
      "step": 388760
    },
    {
      "epoch": 0.6362469969822535,
      "grad_norm": 0.718805193901062,
      "learning_rate": 8.785013474957666e-06,
      "loss": 0.0369,
      "step": 388780
    },
    {
      "epoch": 0.6362797274209069,
      "grad_norm": 3.323493480682373,
      "learning_rate": 8.784947582744148e-06,
      "loss": 0.035,
      "step": 388800
    },
    {
      "epoch": 0.6363124578595603,
      "grad_norm": 1.6213068962097168,
      "learning_rate": 8.784881690530631e-06,
      "loss": 0.0354,
      "step": 388820
    },
    {
      "epoch": 0.6363451882982136,
      "grad_norm": 0.9534494876861572,
      "learning_rate": 8.784815798317113e-06,
      "loss": 0.0306,
      "step": 388840
    },
    {
      "epoch": 0.6363779187368669,
      "grad_norm": 3.8824336528778076,
      "learning_rate": 8.784749906103597e-06,
      "loss": 0.0331,
      "step": 388860
    },
    {
      "epoch": 0.6364106491755203,
      "grad_norm": 1.0835211277008057,
      "learning_rate": 8.784684013890079e-06,
      "loss": 0.024,
      "step": 388880
    },
    {
      "epoch": 0.6364433796141736,
      "grad_norm": 0.6422291398048401,
      "learning_rate": 8.784618121676562e-06,
      "loss": 0.0319,
      "step": 388900
    },
    {
      "epoch": 0.6364761100528269,
      "grad_norm": 4.211513519287109,
      "learning_rate": 8.784552229463044e-06,
      "loss": 0.0436,
      "step": 388920
    },
    {
      "epoch": 0.6365088404914803,
      "grad_norm": 1.242559790611267,
      "learning_rate": 8.784486337249528e-06,
      "loss": 0.0424,
      "step": 388940
    },
    {
      "epoch": 0.6365415709301336,
      "grad_norm": 0.8773688077926636,
      "learning_rate": 8.784420445036011e-06,
      "loss": 0.034,
      "step": 388960
    },
    {
      "epoch": 0.6365743013687869,
      "grad_norm": 1.6983201503753662,
      "learning_rate": 8.784354552822493e-06,
      "loss": 0.03,
      "step": 388980
    },
    {
      "epoch": 0.6366070318074403,
      "grad_norm": 0.318829745054245,
      "learning_rate": 8.784288660608977e-06,
      "loss": 0.037,
      "step": 389000
    },
    {
      "epoch": 0.6366397622460936,
      "grad_norm": 1.2706758975982666,
      "learning_rate": 8.78422276839546e-06,
      "loss": 0.0356,
      "step": 389020
    },
    {
      "epoch": 0.636672492684747,
      "grad_norm": 0.37897539138793945,
      "learning_rate": 8.784156876181942e-06,
      "loss": 0.0285,
      "step": 389040
    },
    {
      "epoch": 0.6367052231234003,
      "grad_norm": 6.236710548400879,
      "learning_rate": 8.784090983968426e-06,
      "loss": 0.0329,
      "step": 389060
    },
    {
      "epoch": 0.6367379535620536,
      "grad_norm": 0.5663692951202393,
      "learning_rate": 8.78402509175491e-06,
      "loss": 0.0379,
      "step": 389080
    },
    {
      "epoch": 0.636770684000707,
      "grad_norm": 1.0954430103302002,
      "learning_rate": 8.783959199541391e-06,
      "loss": 0.0479,
      "step": 389100
    },
    {
      "epoch": 0.6368034144393603,
      "grad_norm": 1.1753883361816406,
      "learning_rate": 8.783893307327875e-06,
      "loss": 0.0279,
      "step": 389120
    },
    {
      "epoch": 0.6368361448780137,
      "grad_norm": 0.7661649584770203,
      "learning_rate": 8.783827415114357e-06,
      "loss": 0.027,
      "step": 389140
    },
    {
      "epoch": 0.636868875316667,
      "grad_norm": 0.9284095764160156,
      "learning_rate": 8.78376152290084e-06,
      "loss": 0.0309,
      "step": 389160
    },
    {
      "epoch": 0.6369016057553203,
      "grad_norm": 2.618166446685791,
      "learning_rate": 8.783695630687322e-06,
      "loss": 0.0431,
      "step": 389180
    },
    {
      "epoch": 0.6369343361939737,
      "grad_norm": 1.4827369451522827,
      "learning_rate": 8.783629738473806e-06,
      "loss": 0.0351,
      "step": 389200
    },
    {
      "epoch": 0.636967066632627,
      "grad_norm": 1.610452651977539,
      "learning_rate": 8.783563846260288e-06,
      "loss": 0.0351,
      "step": 389220
    },
    {
      "epoch": 0.6369997970712804,
      "grad_norm": 0.574610710144043,
      "learning_rate": 8.783497954046771e-06,
      "loss": 0.0261,
      "step": 389240
    },
    {
      "epoch": 0.6370325275099337,
      "grad_norm": 2.920525550842285,
      "learning_rate": 8.783432061833253e-06,
      "loss": 0.0343,
      "step": 389260
    },
    {
      "epoch": 0.637065257948587,
      "grad_norm": 0.6380356550216675,
      "learning_rate": 8.783366169619737e-06,
      "loss": 0.0339,
      "step": 389280
    },
    {
      "epoch": 0.6370979883872404,
      "grad_norm": 1.8794664144515991,
      "learning_rate": 8.783300277406219e-06,
      "loss": 0.0297,
      "step": 389300
    },
    {
      "epoch": 0.6371307188258937,
      "grad_norm": 0.6531050801277161,
      "learning_rate": 8.783234385192702e-06,
      "loss": 0.0327,
      "step": 389320
    },
    {
      "epoch": 0.637163449264547,
      "grad_norm": 1.1032545566558838,
      "learning_rate": 8.783168492979184e-06,
      "loss": 0.0282,
      "step": 389340
    },
    {
      "epoch": 0.6371961797032004,
      "grad_norm": 0.4715043306350708,
      "learning_rate": 8.783102600765668e-06,
      "loss": 0.0499,
      "step": 389360
    },
    {
      "epoch": 0.6372289101418537,
      "grad_norm": 1.0968900918960571,
      "learning_rate": 8.783036708552151e-06,
      "loss": 0.0387,
      "step": 389380
    },
    {
      "epoch": 0.637261640580507,
      "grad_norm": 1.5148688554763794,
      "learning_rate": 8.782970816338633e-06,
      "loss": 0.0331,
      "step": 389400
    },
    {
      "epoch": 0.6372943710191604,
      "grad_norm": 1.565636396408081,
      "learning_rate": 8.782904924125117e-06,
      "loss": 0.0321,
      "step": 389420
    },
    {
      "epoch": 0.6373271014578137,
      "grad_norm": 1.4618102312088013,
      "learning_rate": 8.7828390319116e-06,
      "loss": 0.0402,
      "step": 389440
    },
    {
      "epoch": 0.637359831896467,
      "grad_norm": 0.8222981095314026,
      "learning_rate": 8.782773139698082e-06,
      "loss": 0.045,
      "step": 389460
    },
    {
      "epoch": 0.6373925623351204,
      "grad_norm": 2.0713002681732178,
      "learning_rate": 8.782707247484566e-06,
      "loss": 0.0285,
      "step": 389480
    },
    {
      "epoch": 0.6374252927737738,
      "grad_norm": 0.8313344120979309,
      "learning_rate": 8.78264135527105e-06,
      "loss": 0.0452,
      "step": 389500
    },
    {
      "epoch": 0.6374580232124271,
      "grad_norm": 2.0153684616088867,
      "learning_rate": 8.782575463057531e-06,
      "loss": 0.0358,
      "step": 389520
    },
    {
      "epoch": 0.6374907536510804,
      "grad_norm": 0.8422183990478516,
      "learning_rate": 8.782509570844015e-06,
      "loss": 0.0335,
      "step": 389540
    },
    {
      "epoch": 0.6375234840897338,
      "grad_norm": 0.862076997756958,
      "learning_rate": 8.782443678630497e-06,
      "loss": 0.035,
      "step": 389560
    },
    {
      "epoch": 0.6375562145283871,
      "grad_norm": 1.9146300554275513,
      "learning_rate": 8.78237778641698e-06,
      "loss": 0.0282,
      "step": 389580
    },
    {
      "epoch": 0.6375889449670404,
      "grad_norm": 0.7314562797546387,
      "learning_rate": 8.782311894203462e-06,
      "loss": 0.0247,
      "step": 389600
    },
    {
      "epoch": 0.6376216754056938,
      "grad_norm": 1.2312206029891968,
      "learning_rate": 8.782246001989946e-06,
      "loss": 0.0251,
      "step": 389620
    },
    {
      "epoch": 0.6376544058443471,
      "grad_norm": 4.812543869018555,
      "learning_rate": 8.782180109776428e-06,
      "loss": 0.0432,
      "step": 389640
    },
    {
      "epoch": 0.6376871362830004,
      "grad_norm": 0.5192971229553223,
      "learning_rate": 8.782114217562911e-06,
      "loss": 0.0221,
      "step": 389660
    },
    {
      "epoch": 0.6377198667216538,
      "grad_norm": 5.189918041229248,
      "learning_rate": 8.782048325349393e-06,
      "loss": 0.0349,
      "step": 389680
    },
    {
      "epoch": 0.6377525971603072,
      "grad_norm": 0.8044979572296143,
      "learning_rate": 8.781982433135877e-06,
      "loss": 0.045,
      "step": 389700
    },
    {
      "epoch": 0.6377853275989604,
      "grad_norm": 0.6390044093132019,
      "learning_rate": 8.781916540922359e-06,
      "loss": 0.0315,
      "step": 389720
    },
    {
      "epoch": 0.6378180580376138,
      "grad_norm": 1.6472748517990112,
      "learning_rate": 8.781850648708842e-06,
      "loss": 0.0465,
      "step": 389740
    },
    {
      "epoch": 0.6378507884762672,
      "grad_norm": 0.5723243951797485,
      "learning_rate": 8.781784756495326e-06,
      "loss": 0.0424,
      "step": 389760
    },
    {
      "epoch": 0.6378835189149205,
      "grad_norm": 1.4031298160552979,
      "learning_rate": 8.781718864281808e-06,
      "loss": 0.0384,
      "step": 389780
    },
    {
      "epoch": 0.6379162493535738,
      "grad_norm": 0.446540892124176,
      "learning_rate": 8.781652972068291e-06,
      "loss": 0.0338,
      "step": 389800
    },
    {
      "epoch": 0.6379489797922272,
      "grad_norm": 0.642686665058136,
      "learning_rate": 8.781587079854775e-06,
      "loss": 0.0393,
      "step": 389820
    },
    {
      "epoch": 0.6379817102308805,
      "grad_norm": 3.182379961013794,
      "learning_rate": 8.781521187641257e-06,
      "loss": 0.0329,
      "step": 389840
    },
    {
      "epoch": 0.6380144406695338,
      "grad_norm": 1.3986142873764038,
      "learning_rate": 8.78145529542774e-06,
      "loss": 0.0345,
      "step": 389860
    },
    {
      "epoch": 0.6380471711081872,
      "grad_norm": 1.8965578079223633,
      "learning_rate": 8.781389403214224e-06,
      "loss": 0.0422,
      "step": 389880
    },
    {
      "epoch": 0.6380799015468406,
      "grad_norm": 1.131857991218567,
      "learning_rate": 8.781323511000706e-06,
      "loss": 0.028,
      "step": 389900
    },
    {
      "epoch": 0.6381126319854938,
      "grad_norm": 1.9742025136947632,
      "learning_rate": 8.78125761878719e-06,
      "loss": 0.0398,
      "step": 389920
    },
    {
      "epoch": 0.6381453624241472,
      "grad_norm": 0.8447095155715942,
      "learning_rate": 8.781191726573671e-06,
      "loss": 0.0369,
      "step": 389940
    },
    {
      "epoch": 0.6381780928628006,
      "grad_norm": 0.5032299757003784,
      "learning_rate": 8.781125834360155e-06,
      "loss": 0.0263,
      "step": 389960
    },
    {
      "epoch": 0.6382108233014538,
      "grad_norm": 0.9490402936935425,
      "learning_rate": 8.781059942146637e-06,
      "loss": 0.0282,
      "step": 389980
    },
    {
      "epoch": 0.6382435537401072,
      "grad_norm": 2.0300350189208984,
      "learning_rate": 8.78099404993312e-06,
      "loss": 0.0319,
      "step": 390000
    },
    {
      "epoch": 0.6382762841787606,
      "grad_norm": 3.9930827617645264,
      "learning_rate": 8.780928157719602e-06,
      "loss": 0.0379,
      "step": 390020
    },
    {
      "epoch": 0.6383090146174139,
      "grad_norm": 0.5700880289077759,
      "learning_rate": 8.780862265506086e-06,
      "loss": 0.0292,
      "step": 390040
    },
    {
      "epoch": 0.6383417450560672,
      "grad_norm": 0.8390404582023621,
      "learning_rate": 8.780796373292568e-06,
      "loss": 0.0286,
      "step": 390060
    },
    {
      "epoch": 0.6383744754947206,
      "grad_norm": 0.4950704574584961,
      "learning_rate": 8.780730481079051e-06,
      "loss": 0.0388,
      "step": 390080
    },
    {
      "epoch": 0.638407205933374,
      "grad_norm": 1.2650307416915894,
      "learning_rate": 8.780664588865533e-06,
      "loss": 0.0324,
      "step": 390100
    },
    {
      "epoch": 0.6384399363720272,
      "grad_norm": 1.8690659999847412,
      "learning_rate": 8.780598696652017e-06,
      "loss": 0.0218,
      "step": 390120
    },
    {
      "epoch": 0.6384726668106806,
      "grad_norm": 0.5290512442588806,
      "learning_rate": 8.7805328044385e-06,
      "loss": 0.0317,
      "step": 390140
    },
    {
      "epoch": 0.638505397249334,
      "grad_norm": 0.8582062721252441,
      "learning_rate": 8.780466912224982e-06,
      "loss": 0.0334,
      "step": 390160
    },
    {
      "epoch": 0.6385381276879872,
      "grad_norm": 0.31382089853286743,
      "learning_rate": 8.780401020011466e-06,
      "loss": 0.0309,
      "step": 390180
    },
    {
      "epoch": 0.6385708581266406,
      "grad_norm": 1.3214167356491089,
      "learning_rate": 8.780335127797948e-06,
      "loss": 0.0385,
      "step": 390200
    },
    {
      "epoch": 0.638603588565294,
      "grad_norm": 0.35385051369667053,
      "learning_rate": 8.780269235584431e-06,
      "loss": 0.0335,
      "step": 390220
    },
    {
      "epoch": 0.6386363190039472,
      "grad_norm": 0.748347818851471,
      "learning_rate": 8.780203343370915e-06,
      "loss": 0.0465,
      "step": 390240
    },
    {
      "epoch": 0.6386690494426006,
      "grad_norm": 0.6848519444465637,
      "learning_rate": 8.780137451157397e-06,
      "loss": 0.0312,
      "step": 390260
    },
    {
      "epoch": 0.638701779881254,
      "grad_norm": 0.6796048283576965,
      "learning_rate": 8.78007155894388e-06,
      "loss": 0.0395,
      "step": 390280
    },
    {
      "epoch": 0.6387345103199074,
      "grad_norm": 1.7825610637664795,
      "learning_rate": 8.780005666730364e-06,
      "loss": 0.0272,
      "step": 390300
    },
    {
      "epoch": 0.6387672407585606,
      "grad_norm": 0.8613002300262451,
      "learning_rate": 8.779939774516846e-06,
      "loss": 0.0303,
      "step": 390320
    },
    {
      "epoch": 0.638799971197214,
      "grad_norm": 1.3808503150939941,
      "learning_rate": 8.77987388230333e-06,
      "loss": 0.0399,
      "step": 390340
    },
    {
      "epoch": 0.6388327016358674,
      "grad_norm": 1.0329091548919678,
      "learning_rate": 8.779807990089812e-06,
      "loss": 0.0378,
      "step": 390360
    },
    {
      "epoch": 0.6388654320745206,
      "grad_norm": 0.4736851453781128,
      "learning_rate": 8.779742097876295e-06,
      "loss": 0.036,
      "step": 390380
    },
    {
      "epoch": 0.638898162513174,
      "grad_norm": 1.3201276063919067,
      "learning_rate": 8.779676205662777e-06,
      "loss": 0.0332,
      "step": 390400
    },
    {
      "epoch": 0.6389308929518274,
      "grad_norm": 1.0450536012649536,
      "learning_rate": 8.77961031344926e-06,
      "loss": 0.0361,
      "step": 390420
    },
    {
      "epoch": 0.6389636233904806,
      "grad_norm": 1.5295979976654053,
      "learning_rate": 8.779544421235742e-06,
      "loss": 0.0356,
      "step": 390440
    },
    {
      "epoch": 0.638996353829134,
      "grad_norm": 1.572701096534729,
      "learning_rate": 8.779478529022226e-06,
      "loss": 0.03,
      "step": 390460
    },
    {
      "epoch": 0.6390290842677874,
      "grad_norm": 1.8142826557159424,
      "learning_rate": 8.77941263680871e-06,
      "loss": 0.0354,
      "step": 390480
    },
    {
      "epoch": 0.6390618147064407,
      "grad_norm": 1.4927421808242798,
      "learning_rate": 8.779346744595192e-06,
      "loss": 0.0304,
      "step": 390500
    },
    {
      "epoch": 0.639094545145094,
      "grad_norm": 3.2401936054229736,
      "learning_rate": 8.779280852381675e-06,
      "loss": 0.0358,
      "step": 390520
    },
    {
      "epoch": 0.6391272755837474,
      "grad_norm": 0.865332841873169,
      "learning_rate": 8.779214960168157e-06,
      "loss": 0.0394,
      "step": 390540
    },
    {
      "epoch": 0.6391600060224008,
      "grad_norm": 0.15340113639831543,
      "learning_rate": 8.77914906795464e-06,
      "loss": 0.0311,
      "step": 390560
    },
    {
      "epoch": 0.639192736461054,
      "grad_norm": 1.8710793256759644,
      "learning_rate": 8.779083175741122e-06,
      "loss": 0.0355,
      "step": 390580
    },
    {
      "epoch": 0.6392254668997074,
      "grad_norm": 0.6023090481758118,
      "learning_rate": 8.779017283527606e-06,
      "loss": 0.029,
      "step": 390600
    },
    {
      "epoch": 0.6392581973383608,
      "grad_norm": 1.4431649446487427,
      "learning_rate": 8.77895139131409e-06,
      "loss": 0.0314,
      "step": 390620
    },
    {
      "epoch": 0.639290927777014,
      "grad_norm": 1.4508936405181885,
      "learning_rate": 8.778885499100572e-06,
      "loss": 0.0357,
      "step": 390640
    },
    {
      "epoch": 0.6393236582156674,
      "grad_norm": 1.1784355640411377,
      "learning_rate": 8.778819606887055e-06,
      "loss": 0.0403,
      "step": 390660
    },
    {
      "epoch": 0.6393563886543208,
      "grad_norm": 2.0852997303009033,
      "learning_rate": 8.778753714673539e-06,
      "loss": 0.0552,
      "step": 390680
    },
    {
      "epoch": 0.6393891190929741,
      "grad_norm": 1.0665295124053955,
      "learning_rate": 8.77868782246002e-06,
      "loss": 0.0435,
      "step": 390700
    },
    {
      "epoch": 0.6394218495316274,
      "grad_norm": 1.264182686805725,
      "learning_rate": 8.778621930246504e-06,
      "loss": 0.0291,
      "step": 390720
    },
    {
      "epoch": 0.6394545799702808,
      "grad_norm": 0.3300958573818207,
      "learning_rate": 8.778556038032986e-06,
      "loss": 0.0442,
      "step": 390740
    },
    {
      "epoch": 0.6394873104089341,
      "grad_norm": 1.3199037313461304,
      "learning_rate": 8.77849014581947e-06,
      "loss": 0.041,
      "step": 390760
    },
    {
      "epoch": 0.6395200408475874,
      "grad_norm": 0.7502720952033997,
      "learning_rate": 8.778424253605952e-06,
      "loss": 0.0329,
      "step": 390780
    },
    {
      "epoch": 0.6395527712862408,
      "grad_norm": 0.22469815611839294,
      "learning_rate": 8.778358361392435e-06,
      "loss": 0.0338,
      "step": 390800
    },
    {
      "epoch": 0.6395855017248941,
      "grad_norm": 0.9470593333244324,
      "learning_rate": 8.778292469178917e-06,
      "loss": 0.0356,
      "step": 390820
    },
    {
      "epoch": 0.6396182321635474,
      "grad_norm": 1.0204466581344604,
      "learning_rate": 8.7782265769654e-06,
      "loss": 0.0415,
      "step": 390840
    },
    {
      "epoch": 0.6396509626022008,
      "grad_norm": 1.846388578414917,
      "learning_rate": 8.778160684751884e-06,
      "loss": 0.0336,
      "step": 390860
    },
    {
      "epoch": 0.6396836930408542,
      "grad_norm": 0.33875876665115356,
      "learning_rate": 8.778094792538366e-06,
      "loss": 0.0339,
      "step": 390880
    },
    {
      "epoch": 0.6397164234795075,
      "grad_norm": 1.3735052347183228,
      "learning_rate": 8.77802890032485e-06,
      "loss": 0.0443,
      "step": 390900
    },
    {
      "epoch": 0.6397491539181608,
      "grad_norm": 0.32709193229675293,
      "learning_rate": 8.777963008111332e-06,
      "loss": 0.0449,
      "step": 390920
    },
    {
      "epoch": 0.6397818843568142,
      "grad_norm": 1.240533471107483,
      "learning_rate": 8.777897115897815e-06,
      "loss": 0.0374,
      "step": 390940
    },
    {
      "epoch": 0.6398146147954675,
      "grad_norm": 0.5956702828407288,
      "learning_rate": 8.777831223684297e-06,
      "loss": 0.0389,
      "step": 390960
    },
    {
      "epoch": 0.6398473452341208,
      "grad_norm": 0.6795110106468201,
      "learning_rate": 8.77776533147078e-06,
      "loss": 0.0313,
      "step": 390980
    },
    {
      "epoch": 0.6398800756727742,
      "grad_norm": 2.9205610752105713,
      "learning_rate": 8.777699439257264e-06,
      "loss": 0.0368,
      "step": 391000
    },
    {
      "epoch": 0.6399128061114275,
      "grad_norm": 0.7438228130340576,
      "learning_rate": 8.777633547043746e-06,
      "loss": 0.0404,
      "step": 391020
    },
    {
      "epoch": 0.6399455365500808,
      "grad_norm": 1.6355286836624146,
      "learning_rate": 8.77756765483023e-06,
      "loss": 0.0297,
      "step": 391040
    },
    {
      "epoch": 0.6399782669887342,
      "grad_norm": 1.530295968055725,
      "learning_rate": 8.777501762616713e-06,
      "loss": 0.0376,
      "step": 391060
    },
    {
      "epoch": 0.6400109974273875,
      "grad_norm": 1.4126938581466675,
      "learning_rate": 8.777435870403195e-06,
      "loss": 0.0346,
      "step": 391080
    },
    {
      "epoch": 0.6400437278660409,
      "grad_norm": 1.5195387601852417,
      "learning_rate": 8.777369978189679e-06,
      "loss": 0.0311,
      "step": 391100
    },
    {
      "epoch": 0.6400764583046942,
      "grad_norm": 3.4517719745635986,
      "learning_rate": 8.77730408597616e-06,
      "loss": 0.0317,
      "step": 391120
    },
    {
      "epoch": 0.6401091887433475,
      "grad_norm": 1.2455010414123535,
      "learning_rate": 8.777238193762644e-06,
      "loss": 0.0295,
      "step": 391140
    },
    {
      "epoch": 0.6401419191820009,
      "grad_norm": 1.170359492301941,
      "learning_rate": 8.777172301549126e-06,
      "loss": 0.0516,
      "step": 391160
    },
    {
      "epoch": 0.6401746496206542,
      "grad_norm": 0.8704727292060852,
      "learning_rate": 8.77710640933561e-06,
      "loss": 0.0284,
      "step": 391180
    },
    {
      "epoch": 0.6402073800593076,
      "grad_norm": 0.3205070197582245,
      "learning_rate": 8.777040517122093e-06,
      "loss": 0.0341,
      "step": 391200
    },
    {
      "epoch": 0.6402401104979609,
      "grad_norm": 2.280972957611084,
      "learning_rate": 8.776974624908575e-06,
      "loss": 0.0429,
      "step": 391220
    },
    {
      "epoch": 0.6402728409366142,
      "grad_norm": 1.6794242858886719,
      "learning_rate": 8.776908732695059e-06,
      "loss": 0.0214,
      "step": 391240
    },
    {
      "epoch": 0.6403055713752676,
      "grad_norm": 2.406078815460205,
      "learning_rate": 8.77684284048154e-06,
      "loss": 0.038,
      "step": 391260
    },
    {
      "epoch": 0.6403383018139209,
      "grad_norm": 0.2679747939109802,
      "learning_rate": 8.776776948268024e-06,
      "loss": 0.0335,
      "step": 391280
    },
    {
      "epoch": 0.6403710322525743,
      "grad_norm": 0.3064042031764984,
      "learning_rate": 8.776711056054506e-06,
      "loss": 0.0297,
      "step": 391300
    },
    {
      "epoch": 0.6404037626912276,
      "grad_norm": 0.8996783494949341,
      "learning_rate": 8.77664516384099e-06,
      "loss": 0.0302,
      "step": 391320
    },
    {
      "epoch": 0.6404364931298809,
      "grad_norm": 0.9592686891555786,
      "learning_rate": 8.776579271627472e-06,
      "loss": 0.0293,
      "step": 391340
    },
    {
      "epoch": 0.6404692235685343,
      "grad_norm": 1.8739967346191406,
      "learning_rate": 8.776513379413955e-06,
      "loss": 0.0443,
      "step": 391360
    },
    {
      "epoch": 0.6405019540071876,
      "grad_norm": 0.295475572347641,
      "learning_rate": 8.776447487200437e-06,
      "loss": 0.0293,
      "step": 391380
    },
    {
      "epoch": 0.6405346844458409,
      "grad_norm": 1.6355443000793457,
      "learning_rate": 8.77638159498692e-06,
      "loss": 0.027,
      "step": 391400
    },
    {
      "epoch": 0.6405674148844943,
      "grad_norm": 3.1090455055236816,
      "learning_rate": 8.776315702773404e-06,
      "loss": 0.0496,
      "step": 391420
    },
    {
      "epoch": 0.6406001453231476,
      "grad_norm": 0.5615649223327637,
      "learning_rate": 8.776249810559886e-06,
      "loss": 0.033,
      "step": 391440
    },
    {
      "epoch": 0.640632875761801,
      "grad_norm": 4.423083782196045,
      "learning_rate": 8.77618391834637e-06,
      "loss": 0.0327,
      "step": 391460
    },
    {
      "epoch": 0.6406656062004543,
      "grad_norm": 0.8757742047309875,
      "learning_rate": 8.776118026132853e-06,
      "loss": 0.0274,
      "step": 391480
    },
    {
      "epoch": 0.6406983366391077,
      "grad_norm": 0.6826450228691101,
      "learning_rate": 8.776052133919335e-06,
      "loss": 0.0361,
      "step": 391500
    },
    {
      "epoch": 0.640731067077761,
      "grad_norm": 1.4353255033493042,
      "learning_rate": 8.775986241705819e-06,
      "loss": 0.038,
      "step": 391520
    },
    {
      "epoch": 0.6407637975164143,
      "grad_norm": 2.564300298690796,
      "learning_rate": 8.775920349492302e-06,
      "loss": 0.0413,
      "step": 391540
    },
    {
      "epoch": 0.6407965279550677,
      "grad_norm": 1.980619192123413,
      "learning_rate": 8.775854457278784e-06,
      "loss": 0.0465,
      "step": 391560
    },
    {
      "epoch": 0.640829258393721,
      "grad_norm": 2.1406054496765137,
      "learning_rate": 8.775788565065268e-06,
      "loss": 0.0395,
      "step": 391580
    },
    {
      "epoch": 0.6408619888323743,
      "grad_norm": 1.0327856540679932,
      "learning_rate": 8.77572267285175e-06,
      "loss": 0.0335,
      "step": 391600
    },
    {
      "epoch": 0.6408947192710277,
      "grad_norm": 1.025204062461853,
      "learning_rate": 8.775656780638233e-06,
      "loss": 0.042,
      "step": 391620
    },
    {
      "epoch": 0.640927449709681,
      "grad_norm": 0.2129576951265335,
      "learning_rate": 8.775590888424715e-06,
      "loss": 0.0386,
      "step": 391640
    },
    {
      "epoch": 0.6409601801483343,
      "grad_norm": 1.4169732332229614,
      "learning_rate": 8.775524996211199e-06,
      "loss": 0.0448,
      "step": 391660
    },
    {
      "epoch": 0.6409929105869877,
      "grad_norm": 2.6729681491851807,
      "learning_rate": 8.77545910399768e-06,
      "loss": 0.048,
      "step": 391680
    },
    {
      "epoch": 0.6410256410256411,
      "grad_norm": 1.5481630563735962,
      "learning_rate": 8.775393211784164e-06,
      "loss": 0.0342,
      "step": 391700
    },
    {
      "epoch": 0.6410583714642943,
      "grad_norm": 0.5266445875167847,
      "learning_rate": 8.775327319570646e-06,
      "loss": 0.0362,
      "step": 391720
    },
    {
      "epoch": 0.6410911019029477,
      "grad_norm": 2.0402374267578125,
      "learning_rate": 8.77526142735713e-06,
      "loss": 0.0402,
      "step": 391740
    },
    {
      "epoch": 0.6411238323416011,
      "grad_norm": 0.41185447573661804,
      "learning_rate": 8.775195535143612e-06,
      "loss": 0.0299,
      "step": 391760
    },
    {
      "epoch": 0.6411565627802543,
      "grad_norm": 0.528393566608429,
      "learning_rate": 8.775129642930095e-06,
      "loss": 0.0437,
      "step": 391780
    },
    {
      "epoch": 0.6411892932189077,
      "grad_norm": 2.3008108139038086,
      "learning_rate": 8.775063750716579e-06,
      "loss": 0.044,
      "step": 391800
    },
    {
      "epoch": 0.6412220236575611,
      "grad_norm": 1.1362299919128418,
      "learning_rate": 8.77499785850306e-06,
      "loss": 0.0379,
      "step": 391820
    },
    {
      "epoch": 0.6412547540962144,
      "grad_norm": 1.9151694774627686,
      "learning_rate": 8.774931966289544e-06,
      "loss": 0.0509,
      "step": 391840
    },
    {
      "epoch": 0.6412874845348677,
      "grad_norm": 0.6951583623886108,
      "learning_rate": 8.774866074076028e-06,
      "loss": 0.0302,
      "step": 391860
    },
    {
      "epoch": 0.6413202149735211,
      "grad_norm": 1.4357023239135742,
      "learning_rate": 8.77480018186251e-06,
      "loss": 0.0447,
      "step": 391880
    },
    {
      "epoch": 0.6413529454121745,
      "grad_norm": 1.318271279335022,
      "learning_rate": 8.774734289648993e-06,
      "loss": 0.0412,
      "step": 391900
    },
    {
      "epoch": 0.6413856758508277,
      "grad_norm": 2.9820141792297363,
      "learning_rate": 8.774668397435477e-06,
      "loss": 0.0373,
      "step": 391920
    },
    {
      "epoch": 0.6414184062894811,
      "grad_norm": 0.9617183804512024,
      "learning_rate": 8.774602505221959e-06,
      "loss": 0.0269,
      "step": 391940
    },
    {
      "epoch": 0.6414511367281345,
      "grad_norm": 0.6340910792350769,
      "learning_rate": 8.774536613008443e-06,
      "loss": 0.031,
      "step": 391960
    },
    {
      "epoch": 0.6414838671667877,
      "grad_norm": 0.8541669845581055,
      "learning_rate": 8.774470720794924e-06,
      "loss": 0.0412,
      "step": 391980
    },
    {
      "epoch": 0.6415165976054411,
      "grad_norm": 0.8195111155509949,
      "learning_rate": 8.774404828581408e-06,
      "loss": 0.039,
      "step": 392000
    },
    {
      "epoch": 0.6415493280440945,
      "grad_norm": 0.42470747232437134,
      "learning_rate": 8.77433893636789e-06,
      "loss": 0.0272,
      "step": 392020
    },
    {
      "epoch": 0.6415820584827477,
      "grad_norm": 0.2871536314487457,
      "learning_rate": 8.774273044154373e-06,
      "loss": 0.029,
      "step": 392040
    },
    {
      "epoch": 0.6416147889214011,
      "grad_norm": 0.9613838195800781,
      "learning_rate": 8.774207151940855e-06,
      "loss": 0.0326,
      "step": 392060
    },
    {
      "epoch": 0.6416475193600545,
      "grad_norm": 3.7269139289855957,
      "learning_rate": 8.774141259727339e-06,
      "loss": 0.0379,
      "step": 392080
    },
    {
      "epoch": 0.6416802497987077,
      "grad_norm": 1.150836706161499,
      "learning_rate": 8.774075367513821e-06,
      "loss": 0.0327,
      "step": 392100
    },
    {
      "epoch": 0.6417129802373611,
      "grad_norm": 2.7553184032440186,
      "learning_rate": 8.774009475300304e-06,
      "loss": 0.0429,
      "step": 392120
    },
    {
      "epoch": 0.6417457106760145,
      "grad_norm": 0.1831815391778946,
      "learning_rate": 8.773943583086786e-06,
      "loss": 0.0436,
      "step": 392140
    },
    {
      "epoch": 0.6417784411146679,
      "grad_norm": 1.0557245016098022,
      "learning_rate": 8.77387769087327e-06,
      "loss": 0.0487,
      "step": 392160
    },
    {
      "epoch": 0.6418111715533211,
      "grad_norm": 2.6571452617645264,
      "learning_rate": 8.773811798659752e-06,
      "loss": 0.0391,
      "step": 392180
    },
    {
      "epoch": 0.6418439019919745,
      "grad_norm": 1.2740578651428223,
      "learning_rate": 8.773745906446235e-06,
      "loss": 0.0373,
      "step": 392200
    },
    {
      "epoch": 0.6418766324306279,
      "grad_norm": 1.7477153539657593,
      "learning_rate": 8.773680014232719e-06,
      "loss": 0.0405,
      "step": 392220
    },
    {
      "epoch": 0.6419093628692811,
      "grad_norm": 0.5472638607025146,
      "learning_rate": 8.773614122019201e-06,
      "loss": 0.038,
      "step": 392240
    },
    {
      "epoch": 0.6419420933079345,
      "grad_norm": 1.2617884874343872,
      "learning_rate": 8.773548229805684e-06,
      "loss": 0.0351,
      "step": 392260
    },
    {
      "epoch": 0.6419748237465879,
      "grad_norm": 1.9667949676513672,
      "learning_rate": 8.773482337592168e-06,
      "loss": 0.0444,
      "step": 392280
    },
    {
      "epoch": 0.6420075541852411,
      "grad_norm": 1.2099714279174805,
      "learning_rate": 8.77341644537865e-06,
      "loss": 0.0371,
      "step": 392300
    },
    {
      "epoch": 0.6420402846238945,
      "grad_norm": 0.33121350407600403,
      "learning_rate": 8.773350553165134e-06,
      "loss": 0.0316,
      "step": 392320
    },
    {
      "epoch": 0.6420730150625479,
      "grad_norm": 0.9519491195678711,
      "learning_rate": 8.773284660951617e-06,
      "loss": 0.0268,
      "step": 392340
    },
    {
      "epoch": 0.6421057455012013,
      "grad_norm": 1.5333144664764404,
      "learning_rate": 8.773218768738099e-06,
      "loss": 0.0402,
      "step": 392360
    },
    {
      "epoch": 0.6421384759398545,
      "grad_norm": 3.3294177055358887,
      "learning_rate": 8.773152876524583e-06,
      "loss": 0.0364,
      "step": 392380
    },
    {
      "epoch": 0.6421712063785079,
      "grad_norm": 2.1667304039001465,
      "learning_rate": 8.773086984311065e-06,
      "loss": 0.0369,
      "step": 392400
    },
    {
      "epoch": 0.6422039368171613,
      "grad_norm": 0.36469972133636475,
      "learning_rate": 8.773021092097548e-06,
      "loss": 0.0409,
      "step": 392420
    },
    {
      "epoch": 0.6422366672558145,
      "grad_norm": 1.191335916519165,
      "learning_rate": 8.77295519988403e-06,
      "loss": 0.046,
      "step": 392440
    },
    {
      "epoch": 0.6422693976944679,
      "grad_norm": 1.6810914278030396,
      "learning_rate": 8.772889307670514e-06,
      "loss": 0.027,
      "step": 392460
    },
    {
      "epoch": 0.6423021281331213,
      "grad_norm": 2.1680216789245605,
      "learning_rate": 8.772823415456995e-06,
      "loss": 0.042,
      "step": 392480
    },
    {
      "epoch": 0.6423348585717745,
      "grad_norm": 1.796911597251892,
      "learning_rate": 8.772757523243479e-06,
      "loss": 0.02,
      "step": 392500
    },
    {
      "epoch": 0.6423675890104279,
      "grad_norm": 1.5621273517608643,
      "learning_rate": 8.772691631029961e-06,
      "loss": 0.0317,
      "step": 392520
    },
    {
      "epoch": 0.6424003194490813,
      "grad_norm": 1.7442753314971924,
      "learning_rate": 8.772625738816445e-06,
      "loss": 0.0415,
      "step": 392540
    },
    {
      "epoch": 0.6424330498877346,
      "grad_norm": 1.1680936813354492,
      "learning_rate": 8.772559846602926e-06,
      "loss": 0.0325,
      "step": 392560
    },
    {
      "epoch": 0.6424657803263879,
      "grad_norm": 1.9598950147628784,
      "learning_rate": 8.77249395438941e-06,
      "loss": 0.0359,
      "step": 392580
    },
    {
      "epoch": 0.6424985107650413,
      "grad_norm": 1.7086801528930664,
      "learning_rate": 8.772428062175894e-06,
      "loss": 0.0316,
      "step": 392600
    },
    {
      "epoch": 0.6425312412036946,
      "grad_norm": 0.5180283188819885,
      "learning_rate": 8.772362169962375e-06,
      "loss": 0.0378,
      "step": 392620
    },
    {
      "epoch": 0.6425639716423479,
      "grad_norm": 0.9247266054153442,
      "learning_rate": 8.772296277748859e-06,
      "loss": 0.0332,
      "step": 392640
    },
    {
      "epoch": 0.6425967020810013,
      "grad_norm": 1.5879896879196167,
      "learning_rate": 8.772230385535343e-06,
      "loss": 0.0355,
      "step": 392660
    },
    {
      "epoch": 0.6426294325196547,
      "grad_norm": 1.8675144910812378,
      "learning_rate": 8.772164493321825e-06,
      "loss": 0.0323,
      "step": 392680
    },
    {
      "epoch": 0.6426621629583079,
      "grad_norm": 1.6817550659179688,
      "learning_rate": 8.772098601108308e-06,
      "loss": 0.033,
      "step": 392700
    },
    {
      "epoch": 0.6426948933969613,
      "grad_norm": 1.2851792573928833,
      "learning_rate": 8.772032708894792e-06,
      "loss": 0.0278,
      "step": 392720
    },
    {
      "epoch": 0.6427276238356147,
      "grad_norm": 1.542839765548706,
      "learning_rate": 8.771966816681274e-06,
      "loss": 0.024,
      "step": 392740
    },
    {
      "epoch": 0.642760354274268,
      "grad_norm": 1.7426977157592773,
      "learning_rate": 8.771900924467757e-06,
      "loss": 0.0407,
      "step": 392760
    },
    {
      "epoch": 0.6427930847129213,
      "grad_norm": 2.391061305999756,
      "learning_rate": 8.771835032254239e-06,
      "loss": 0.0433,
      "step": 392780
    },
    {
      "epoch": 0.6428258151515747,
      "grad_norm": 1.0653079748153687,
      "learning_rate": 8.771769140040723e-06,
      "loss": 0.0258,
      "step": 392800
    },
    {
      "epoch": 0.642858545590228,
      "grad_norm": 6.255350112915039,
      "learning_rate": 8.771703247827205e-06,
      "loss": 0.0288,
      "step": 392820
    },
    {
      "epoch": 0.6428912760288813,
      "grad_norm": 1.5278353691101074,
      "learning_rate": 8.771637355613688e-06,
      "loss": 0.0334,
      "step": 392840
    },
    {
      "epoch": 0.6429240064675347,
      "grad_norm": 1.6009434461593628,
      "learning_rate": 8.77157146340017e-06,
      "loss": 0.0419,
      "step": 392860
    },
    {
      "epoch": 0.642956736906188,
      "grad_norm": 1.1318840980529785,
      "learning_rate": 8.771505571186654e-06,
      "loss": 0.0441,
      "step": 392880
    },
    {
      "epoch": 0.6429894673448413,
      "grad_norm": 0.7239748239517212,
      "learning_rate": 8.771439678973136e-06,
      "loss": 0.0376,
      "step": 392900
    },
    {
      "epoch": 0.6430221977834947,
      "grad_norm": 0.7228056192398071,
      "learning_rate": 8.771373786759619e-06,
      "loss": 0.0239,
      "step": 392920
    },
    {
      "epoch": 0.643054928222148,
      "grad_norm": 0.9547116756439209,
      "learning_rate": 8.771307894546101e-06,
      "loss": 0.0292,
      "step": 392940
    },
    {
      "epoch": 0.6430876586608014,
      "grad_norm": 0.2420683354139328,
      "learning_rate": 8.771242002332585e-06,
      "loss": 0.0301,
      "step": 392960
    },
    {
      "epoch": 0.6431203890994547,
      "grad_norm": 2.2352089881896973,
      "learning_rate": 8.771176110119068e-06,
      "loss": 0.0378,
      "step": 392980
    },
    {
      "epoch": 0.643153119538108,
      "grad_norm": 0.6662793755531311,
      "learning_rate": 8.77111021790555e-06,
      "loss": 0.0278,
      "step": 393000
    },
    {
      "epoch": 0.6431858499767614,
      "grad_norm": 1.6685396432876587,
      "learning_rate": 8.771044325692034e-06,
      "loss": 0.0278,
      "step": 393020
    },
    {
      "epoch": 0.6432185804154147,
      "grad_norm": 0.34010058641433716,
      "learning_rate": 8.770978433478516e-06,
      "loss": 0.03,
      "step": 393040
    },
    {
      "epoch": 0.6432513108540681,
      "grad_norm": 1.8691414594650269,
      "learning_rate": 8.770912541265e-06,
      "loss": 0.0422,
      "step": 393060
    },
    {
      "epoch": 0.6432840412927214,
      "grad_norm": 2.2831084728240967,
      "learning_rate": 8.770846649051483e-06,
      "loss": 0.0279,
      "step": 393080
    },
    {
      "epoch": 0.6433167717313747,
      "grad_norm": 0.906754195690155,
      "learning_rate": 8.770780756837966e-06,
      "loss": 0.0304,
      "step": 393100
    },
    {
      "epoch": 0.6433495021700281,
      "grad_norm": 3.1094019412994385,
      "learning_rate": 8.770714864624448e-06,
      "loss": 0.0503,
      "step": 393120
    },
    {
      "epoch": 0.6433822326086814,
      "grad_norm": 0.9735237956047058,
      "learning_rate": 8.770648972410932e-06,
      "loss": 0.0357,
      "step": 393140
    },
    {
      "epoch": 0.6434149630473348,
      "grad_norm": 0.7573155760765076,
      "learning_rate": 8.770583080197414e-06,
      "loss": 0.0328,
      "step": 393160
    },
    {
      "epoch": 0.6434476934859881,
      "grad_norm": 2.510404348373413,
      "learning_rate": 8.770517187983897e-06,
      "loss": 0.0296,
      "step": 393180
    },
    {
      "epoch": 0.6434804239246414,
      "grad_norm": 1.134445309638977,
      "learning_rate": 8.77045129577038e-06,
      "loss": 0.0425,
      "step": 393200
    },
    {
      "epoch": 0.6435131543632948,
      "grad_norm": 0.21109049022197723,
      "learning_rate": 8.770385403556863e-06,
      "loss": 0.0344,
      "step": 393220
    },
    {
      "epoch": 0.6435458848019481,
      "grad_norm": 2.0624914169311523,
      "learning_rate": 8.770319511343345e-06,
      "loss": 0.0298,
      "step": 393240
    },
    {
      "epoch": 0.6435786152406014,
      "grad_norm": 1.517333984375,
      "learning_rate": 8.770253619129828e-06,
      "loss": 0.0272,
      "step": 393260
    },
    {
      "epoch": 0.6436113456792548,
      "grad_norm": 0.9246255159378052,
      "learning_rate": 8.77018772691631e-06,
      "loss": 0.0322,
      "step": 393280
    },
    {
      "epoch": 0.6436440761179081,
      "grad_norm": 3.589012861251831,
      "learning_rate": 8.770121834702794e-06,
      "loss": 0.0416,
      "step": 393300
    },
    {
      "epoch": 0.6436768065565615,
      "grad_norm": 1.6732707023620605,
      "learning_rate": 8.770055942489277e-06,
      "loss": 0.0391,
      "step": 393320
    },
    {
      "epoch": 0.6437095369952148,
      "grad_norm": 0.6283190250396729,
      "learning_rate": 8.76999005027576e-06,
      "loss": 0.0295,
      "step": 393340
    },
    {
      "epoch": 0.6437422674338682,
      "grad_norm": 0.6391416192054749,
      "learning_rate": 8.769924158062243e-06,
      "loss": 0.0262,
      "step": 393360
    },
    {
      "epoch": 0.6437749978725215,
      "grad_norm": 0.21807853877544403,
      "learning_rate": 8.769858265848725e-06,
      "loss": 0.0267,
      "step": 393380
    },
    {
      "epoch": 0.6438077283111748,
      "grad_norm": 0.7175295948982239,
      "learning_rate": 8.769792373635208e-06,
      "loss": 0.0348,
      "step": 393400
    },
    {
      "epoch": 0.6438404587498282,
      "grad_norm": 1.1022419929504395,
      "learning_rate": 8.76972648142169e-06,
      "loss": 0.0434,
      "step": 393420
    },
    {
      "epoch": 0.6438731891884815,
      "grad_norm": 1.3296809196472168,
      "learning_rate": 8.769660589208174e-06,
      "loss": 0.0384,
      "step": 393440
    },
    {
      "epoch": 0.6439059196271348,
      "grad_norm": 3.3664512634277344,
      "learning_rate": 8.769594696994657e-06,
      "loss": 0.0414,
      "step": 393460
    },
    {
      "epoch": 0.6439386500657882,
      "grad_norm": 0.8098931908607483,
      "learning_rate": 8.76952880478114e-06,
      "loss": 0.0221,
      "step": 393480
    },
    {
      "epoch": 0.6439713805044415,
      "grad_norm": 2.702468156814575,
      "learning_rate": 8.769462912567623e-06,
      "loss": 0.0316,
      "step": 393500
    },
    {
      "epoch": 0.6440041109430948,
      "grad_norm": 1.002753496170044,
      "learning_rate": 8.769397020354106e-06,
      "loss": 0.0239,
      "step": 393520
    },
    {
      "epoch": 0.6440368413817482,
      "grad_norm": 1.0684473514556885,
      "learning_rate": 8.769331128140588e-06,
      "loss": 0.0278,
      "step": 393540
    },
    {
      "epoch": 0.6440695718204016,
      "grad_norm": 0.45281991362571716,
      "learning_rate": 8.769265235927072e-06,
      "loss": 0.0312,
      "step": 393560
    },
    {
      "epoch": 0.6441023022590548,
      "grad_norm": 2.939419984817505,
      "learning_rate": 8.769199343713554e-06,
      "loss": 0.0377,
      "step": 393580
    },
    {
      "epoch": 0.6441350326977082,
      "grad_norm": 0.7608291506767273,
      "learning_rate": 8.769133451500037e-06,
      "loss": 0.0295,
      "step": 393600
    },
    {
      "epoch": 0.6441677631363616,
      "grad_norm": 1.9394410848617554,
      "learning_rate": 8.76906755928652e-06,
      "loss": 0.0463,
      "step": 393620
    },
    {
      "epoch": 0.6442004935750149,
      "grad_norm": 0.329967200756073,
      "learning_rate": 8.769001667073003e-06,
      "loss": 0.0318,
      "step": 393640
    },
    {
      "epoch": 0.6442332240136682,
      "grad_norm": 4.1016764640808105,
      "learning_rate": 8.768935774859486e-06,
      "loss": 0.0261,
      "step": 393660
    },
    {
      "epoch": 0.6442659544523216,
      "grad_norm": 0.7941122055053711,
      "learning_rate": 8.768869882645968e-06,
      "loss": 0.0278,
      "step": 393680
    },
    {
      "epoch": 0.6442986848909749,
      "grad_norm": 0.19443480670452118,
      "learning_rate": 8.768803990432452e-06,
      "loss": 0.0387,
      "step": 393700
    },
    {
      "epoch": 0.6443314153296282,
      "grad_norm": 0.9675576686859131,
      "learning_rate": 8.768738098218934e-06,
      "loss": 0.0306,
      "step": 393720
    },
    {
      "epoch": 0.6443641457682816,
      "grad_norm": 2.025754690170288,
      "learning_rate": 8.768672206005417e-06,
      "loss": 0.031,
      "step": 393740
    },
    {
      "epoch": 0.644396876206935,
      "grad_norm": 0.48550379276275635,
      "learning_rate": 8.7686063137919e-06,
      "loss": 0.0395,
      "step": 393760
    },
    {
      "epoch": 0.6444296066455882,
      "grad_norm": 1.3135193586349487,
      "learning_rate": 8.768540421578383e-06,
      "loss": 0.0359,
      "step": 393780
    },
    {
      "epoch": 0.6444623370842416,
      "grad_norm": 1.142860770225525,
      "learning_rate": 8.768474529364865e-06,
      "loss": 0.0271,
      "step": 393800
    },
    {
      "epoch": 0.644495067522895,
      "grad_norm": 0.22247380018234253,
      "learning_rate": 8.768408637151348e-06,
      "loss": 0.0312,
      "step": 393820
    },
    {
      "epoch": 0.6445277979615482,
      "grad_norm": 0.35714656114578247,
      "learning_rate": 8.768342744937832e-06,
      "loss": 0.0176,
      "step": 393840
    },
    {
      "epoch": 0.6445605284002016,
      "grad_norm": 0.5208939909934998,
      "learning_rate": 8.768276852724314e-06,
      "loss": 0.039,
      "step": 393860
    },
    {
      "epoch": 0.644593258838855,
      "grad_norm": 2.5179507732391357,
      "learning_rate": 8.768210960510797e-06,
      "loss": 0.0384,
      "step": 393880
    },
    {
      "epoch": 0.6446259892775082,
      "grad_norm": 0.9701586961746216,
      "learning_rate": 8.768145068297281e-06,
      "loss": 0.0396,
      "step": 393900
    },
    {
      "epoch": 0.6446587197161616,
      "grad_norm": 2.0922324657440186,
      "learning_rate": 8.768079176083763e-06,
      "loss": 0.0437,
      "step": 393920
    },
    {
      "epoch": 0.644691450154815,
      "grad_norm": 6.912407875061035,
      "learning_rate": 8.768013283870246e-06,
      "loss": 0.0405,
      "step": 393940
    },
    {
      "epoch": 0.6447241805934684,
      "grad_norm": 1.2652000188827515,
      "learning_rate": 8.767947391656728e-06,
      "loss": 0.0378,
      "step": 393960
    },
    {
      "epoch": 0.6447569110321216,
      "grad_norm": 0.7681226134300232,
      "learning_rate": 8.767881499443212e-06,
      "loss": 0.0325,
      "step": 393980
    },
    {
      "epoch": 0.644789641470775,
      "grad_norm": 1.6775195598602295,
      "learning_rate": 8.767815607229696e-06,
      "loss": 0.0296,
      "step": 394000
    },
    {
      "epoch": 0.6448223719094284,
      "grad_norm": 0.7147045135498047,
      "learning_rate": 8.767749715016177e-06,
      "loss": 0.0281,
      "step": 394020
    },
    {
      "epoch": 0.6448551023480816,
      "grad_norm": 0.825846254825592,
      "learning_rate": 8.767683822802661e-06,
      "loss": 0.0314,
      "step": 394040
    },
    {
      "epoch": 0.644887832786735,
      "grad_norm": 1.451526165008545,
      "learning_rate": 8.767617930589143e-06,
      "loss": 0.0351,
      "step": 394060
    },
    {
      "epoch": 0.6449205632253884,
      "grad_norm": 1.4466670751571655,
      "learning_rate": 8.767552038375627e-06,
      "loss": 0.0437,
      "step": 394080
    },
    {
      "epoch": 0.6449532936640416,
      "grad_norm": 0.5881111025810242,
      "learning_rate": 8.767486146162108e-06,
      "loss": 0.0328,
      "step": 394100
    },
    {
      "epoch": 0.644986024102695,
      "grad_norm": 1.4387751817703247,
      "learning_rate": 8.767420253948592e-06,
      "loss": 0.0361,
      "step": 394120
    },
    {
      "epoch": 0.6450187545413484,
      "grad_norm": 1.4176639318466187,
      "learning_rate": 8.767354361735074e-06,
      "loss": 0.0256,
      "step": 394140
    },
    {
      "epoch": 0.6450514849800018,
      "grad_norm": 0.9032477736473083,
      "learning_rate": 8.767288469521557e-06,
      "loss": 0.02,
      "step": 394160
    },
    {
      "epoch": 0.645084215418655,
      "grad_norm": 0.27692633867263794,
      "learning_rate": 8.76722257730804e-06,
      "loss": 0.0302,
      "step": 394180
    },
    {
      "epoch": 0.6451169458573084,
      "grad_norm": 1.1760315895080566,
      "learning_rate": 8.767156685094523e-06,
      "loss": 0.024,
      "step": 394200
    },
    {
      "epoch": 0.6451496762959618,
      "grad_norm": 0.7868952751159668,
      "learning_rate": 8.767090792881005e-06,
      "loss": 0.0381,
      "step": 394220
    },
    {
      "epoch": 0.645182406734615,
      "grad_norm": 0.3695533871650696,
      "learning_rate": 8.767024900667488e-06,
      "loss": 0.0326,
      "step": 394240
    },
    {
      "epoch": 0.6452151371732684,
      "grad_norm": 2.5823166370391846,
      "learning_rate": 8.766959008453972e-06,
      "loss": 0.0482,
      "step": 394260
    },
    {
      "epoch": 0.6452478676119218,
      "grad_norm": 0.3518458604812622,
      "learning_rate": 8.766893116240454e-06,
      "loss": 0.0283,
      "step": 394280
    },
    {
      "epoch": 0.645280598050575,
      "grad_norm": 1.3041350841522217,
      "learning_rate": 8.766827224026937e-06,
      "loss": 0.0345,
      "step": 394300
    },
    {
      "epoch": 0.6453133284892284,
      "grad_norm": 0.5344918966293335,
      "learning_rate": 8.766761331813421e-06,
      "loss": 0.0363,
      "step": 394320
    },
    {
      "epoch": 0.6453460589278818,
      "grad_norm": 0.48628437519073486,
      "learning_rate": 8.766695439599903e-06,
      "loss": 0.0334,
      "step": 394340
    },
    {
      "epoch": 0.6453787893665351,
      "grad_norm": 0.5352947115898132,
      "learning_rate": 8.766629547386387e-06,
      "loss": 0.0214,
      "step": 394360
    },
    {
      "epoch": 0.6454115198051884,
      "grad_norm": 0.960515558719635,
      "learning_rate": 8.76656365517287e-06,
      "loss": 0.0256,
      "step": 394380
    },
    {
      "epoch": 0.6454442502438418,
      "grad_norm": 0.7493060827255249,
      "learning_rate": 8.766497762959352e-06,
      "loss": 0.0331,
      "step": 394400
    },
    {
      "epoch": 0.6454769806824951,
      "grad_norm": 1.6594990491867065,
      "learning_rate": 8.766431870745836e-06,
      "loss": 0.0451,
      "step": 394420
    },
    {
      "epoch": 0.6455097111211484,
      "grad_norm": 1.4578077793121338,
      "learning_rate": 8.766365978532318e-06,
      "loss": 0.0289,
      "step": 394440
    },
    {
      "epoch": 0.6455424415598018,
      "grad_norm": 3.9115078449249268,
      "learning_rate": 8.766300086318801e-06,
      "loss": 0.0353,
      "step": 394460
    },
    {
      "epoch": 0.6455751719984552,
      "grad_norm": 0.5517195463180542,
      "learning_rate": 8.766234194105283e-06,
      "loss": 0.0346,
      "step": 394480
    },
    {
      "epoch": 0.6456079024371084,
      "grad_norm": 0.8952023983001709,
      "learning_rate": 8.766168301891767e-06,
      "loss": 0.041,
      "step": 394500
    },
    {
      "epoch": 0.6456406328757618,
      "grad_norm": 1.6485068798065186,
      "learning_rate": 8.766102409678248e-06,
      "loss": 0.0337,
      "step": 394520
    },
    {
      "epoch": 0.6456733633144152,
      "grad_norm": 6.76128625869751,
      "learning_rate": 8.766036517464732e-06,
      "loss": 0.0475,
      "step": 394540
    },
    {
      "epoch": 0.6457060937530685,
      "grad_norm": 0.45985960960388184,
      "learning_rate": 8.765970625251214e-06,
      "loss": 0.0355,
      "step": 394560
    },
    {
      "epoch": 0.6457388241917218,
      "grad_norm": 1.3845490217208862,
      "learning_rate": 8.765904733037698e-06,
      "loss": 0.0427,
      "step": 394580
    },
    {
      "epoch": 0.6457715546303752,
      "grad_norm": 0.31573957204818726,
      "learning_rate": 8.76583884082418e-06,
      "loss": 0.0188,
      "step": 394600
    },
    {
      "epoch": 0.6458042850690285,
      "grad_norm": 2.785250663757324,
      "learning_rate": 8.765772948610663e-06,
      "loss": 0.0273,
      "step": 394620
    },
    {
      "epoch": 0.6458370155076818,
      "grad_norm": 1.367566704750061,
      "learning_rate": 8.765707056397147e-06,
      "loss": 0.0367,
      "step": 394640
    },
    {
      "epoch": 0.6458697459463352,
      "grad_norm": 3.0213539600372314,
      "learning_rate": 8.765641164183628e-06,
      "loss": 0.0314,
      "step": 394660
    },
    {
      "epoch": 0.6459024763849885,
      "grad_norm": 0.7795050740242004,
      "learning_rate": 8.765575271970112e-06,
      "loss": 0.0337,
      "step": 394680
    },
    {
      "epoch": 0.6459352068236418,
      "grad_norm": 4.524180889129639,
      "learning_rate": 8.765509379756596e-06,
      "loss": 0.0347,
      "step": 394700
    },
    {
      "epoch": 0.6459679372622952,
      "grad_norm": 0.34132644534111023,
      "learning_rate": 8.765443487543078e-06,
      "loss": 0.0413,
      "step": 394720
    },
    {
      "epoch": 0.6460006677009485,
      "grad_norm": 0.9153178930282593,
      "learning_rate": 8.765377595329561e-06,
      "loss": 0.0381,
      "step": 394740
    },
    {
      "epoch": 0.6460333981396019,
      "grad_norm": 0.2554769814014435,
      "learning_rate": 8.765311703116045e-06,
      "loss": 0.0261,
      "step": 394760
    },
    {
      "epoch": 0.6460661285782552,
      "grad_norm": 0.5691908597946167,
      "learning_rate": 8.765245810902527e-06,
      "loss": 0.0392,
      "step": 394780
    },
    {
      "epoch": 0.6460988590169086,
      "grad_norm": 0.720833957195282,
      "learning_rate": 8.76517991868901e-06,
      "loss": 0.0388,
      "step": 394800
    },
    {
      "epoch": 0.6461315894555619,
      "grad_norm": 1.3141230344772339,
      "learning_rate": 8.765114026475492e-06,
      "loss": 0.0409,
      "step": 394820
    },
    {
      "epoch": 0.6461643198942152,
      "grad_norm": 0.40922054648399353,
      "learning_rate": 8.765048134261976e-06,
      "loss": 0.032,
      "step": 394840
    },
    {
      "epoch": 0.6461970503328686,
      "grad_norm": 0.6311283111572266,
      "learning_rate": 8.764982242048458e-06,
      "loss": 0.028,
      "step": 394860
    },
    {
      "epoch": 0.6462297807715219,
      "grad_norm": 1.5786197185516357,
      "learning_rate": 8.764916349834941e-06,
      "loss": 0.0306,
      "step": 394880
    },
    {
      "epoch": 0.6462625112101752,
      "grad_norm": 1.2086118459701538,
      "learning_rate": 8.764850457621423e-06,
      "loss": 0.0315,
      "step": 394900
    },
    {
      "epoch": 0.6462952416488286,
      "grad_norm": 0.9144669771194458,
      "learning_rate": 8.764784565407907e-06,
      "loss": 0.0326,
      "step": 394920
    },
    {
      "epoch": 0.6463279720874819,
      "grad_norm": 1.2181535959243774,
      "learning_rate": 8.764718673194389e-06,
      "loss": 0.0429,
      "step": 394940
    },
    {
      "epoch": 0.6463607025261352,
      "grad_norm": 1.7600985765457153,
      "learning_rate": 8.764652780980872e-06,
      "loss": 0.0416,
      "step": 394960
    },
    {
      "epoch": 0.6463934329647886,
      "grad_norm": 0.441183477640152,
      "learning_rate": 8.764586888767354e-06,
      "loss": 0.0344,
      "step": 394980
    },
    {
      "epoch": 0.646426163403442,
      "grad_norm": 0.5420768857002258,
      "learning_rate": 8.764520996553838e-06,
      "loss": 0.0311,
      "step": 395000
    },
    {
      "epoch": 0.6464588938420953,
      "grad_norm": 0.7597171664237976,
      "learning_rate": 8.76445510434032e-06,
      "loss": 0.0242,
      "step": 395020
    },
    {
      "epoch": 0.6464916242807486,
      "grad_norm": 1.4741147756576538,
      "learning_rate": 8.764389212126803e-06,
      "loss": 0.0306,
      "step": 395040
    },
    {
      "epoch": 0.646524354719402,
      "grad_norm": 0.3620906174182892,
      "learning_rate": 8.764323319913287e-06,
      "loss": 0.0343,
      "step": 395060
    },
    {
      "epoch": 0.6465570851580553,
      "grad_norm": 1.9763219356536865,
      "learning_rate": 8.764257427699769e-06,
      "loss": 0.0277,
      "step": 395080
    },
    {
      "epoch": 0.6465898155967086,
      "grad_norm": 0.4208528399467468,
      "learning_rate": 8.764191535486252e-06,
      "loss": 0.0366,
      "step": 395100
    },
    {
      "epoch": 0.646622546035362,
      "grad_norm": 1.18742036819458,
      "learning_rate": 8.764125643272736e-06,
      "loss": 0.039,
      "step": 395120
    },
    {
      "epoch": 0.6466552764740153,
      "grad_norm": 1.1630512475967407,
      "learning_rate": 8.764059751059218e-06,
      "loss": 0.0298,
      "step": 395140
    },
    {
      "epoch": 0.6466880069126686,
      "grad_norm": 2.470691442489624,
      "learning_rate": 8.763993858845701e-06,
      "loss": 0.0265,
      "step": 395160
    },
    {
      "epoch": 0.646720737351322,
      "grad_norm": 1.11345636844635,
      "learning_rate": 8.763927966632185e-06,
      "loss": 0.0465,
      "step": 395180
    },
    {
      "epoch": 0.6467534677899753,
      "grad_norm": 0.8151748180389404,
      "learning_rate": 8.763862074418667e-06,
      "loss": 0.0394,
      "step": 395200
    },
    {
      "epoch": 0.6467861982286287,
      "grad_norm": 0.4305364787578583,
      "learning_rate": 8.76379618220515e-06,
      "loss": 0.0358,
      "step": 395220
    },
    {
      "epoch": 0.646818928667282,
      "grad_norm": 1.5518451929092407,
      "learning_rate": 8.763730289991632e-06,
      "loss": 0.0373,
      "step": 395240
    },
    {
      "epoch": 0.6468516591059353,
      "grad_norm": 1.4285414218902588,
      "learning_rate": 8.763664397778116e-06,
      "loss": 0.0421,
      "step": 395260
    },
    {
      "epoch": 0.6468843895445887,
      "grad_norm": 0.4277280271053314,
      "learning_rate": 8.763598505564598e-06,
      "loss": 0.0331,
      "step": 395280
    },
    {
      "epoch": 0.646917119983242,
      "grad_norm": 0.44685304164886475,
      "learning_rate": 8.763532613351081e-06,
      "loss": 0.0357,
      "step": 395300
    },
    {
      "epoch": 0.6469498504218953,
      "grad_norm": 0.7177153825759888,
      "learning_rate": 8.763466721137563e-06,
      "loss": 0.0396,
      "step": 395320
    },
    {
      "epoch": 0.6469825808605487,
      "grad_norm": 3.523747444152832,
      "learning_rate": 8.763400828924047e-06,
      "loss": 0.0352,
      "step": 395340
    },
    {
      "epoch": 0.647015311299202,
      "grad_norm": 0.7714736461639404,
      "learning_rate": 8.763334936710529e-06,
      "loss": 0.0436,
      "step": 395360
    },
    {
      "epoch": 0.6470480417378554,
      "grad_norm": 0.9040360450744629,
      "learning_rate": 8.763269044497012e-06,
      "loss": 0.033,
      "step": 395380
    },
    {
      "epoch": 0.6470807721765087,
      "grad_norm": 0.9941769242286682,
      "learning_rate": 8.763203152283494e-06,
      "loss": 0.0446,
      "step": 395400
    },
    {
      "epoch": 0.6471135026151621,
      "grad_norm": 0.657826840877533,
      "learning_rate": 8.763137260069978e-06,
      "loss": 0.031,
      "step": 395420
    },
    {
      "epoch": 0.6471462330538154,
      "grad_norm": 0.7424790859222412,
      "learning_rate": 8.763071367856461e-06,
      "loss": 0.0363,
      "step": 395440
    },
    {
      "epoch": 0.6471789634924687,
      "grad_norm": 0.7417104840278625,
      "learning_rate": 8.763005475642943e-06,
      "loss": 0.038,
      "step": 395460
    },
    {
      "epoch": 0.6472116939311221,
      "grad_norm": 0.2116144895553589,
      "learning_rate": 8.762939583429427e-06,
      "loss": 0.0313,
      "step": 395480
    },
    {
      "epoch": 0.6472444243697754,
      "grad_norm": 3.2468020915985107,
      "learning_rate": 8.76287369121591e-06,
      "loss": 0.0329,
      "step": 395500
    },
    {
      "epoch": 0.6472771548084287,
      "grad_norm": 0.18153709173202515,
      "learning_rate": 8.762807799002392e-06,
      "loss": 0.0303,
      "step": 395520
    },
    {
      "epoch": 0.6473098852470821,
      "grad_norm": 1.8129247426986694,
      "learning_rate": 8.762741906788876e-06,
      "loss": 0.0346,
      "step": 395540
    },
    {
      "epoch": 0.6473426156857354,
      "grad_norm": 0.49655240774154663,
      "learning_rate": 8.76267601457536e-06,
      "loss": 0.0383,
      "step": 395560
    },
    {
      "epoch": 0.6473753461243887,
      "grad_norm": 0.9252051115036011,
      "learning_rate": 8.762610122361841e-06,
      "loss": 0.0286,
      "step": 395580
    },
    {
      "epoch": 0.6474080765630421,
      "grad_norm": 1.0560643672943115,
      "learning_rate": 8.762544230148325e-06,
      "loss": 0.0407,
      "step": 395600
    },
    {
      "epoch": 0.6474408070016955,
      "grad_norm": 0.8676447868347168,
      "learning_rate": 8.762478337934807e-06,
      "loss": 0.0265,
      "step": 395620
    },
    {
      "epoch": 0.6474735374403487,
      "grad_norm": 2.4688334465026855,
      "learning_rate": 8.76241244572129e-06,
      "loss": 0.0363,
      "step": 395640
    },
    {
      "epoch": 0.6475062678790021,
      "grad_norm": 0.9690667390823364,
      "learning_rate": 8.762346553507772e-06,
      "loss": 0.0474,
      "step": 395660
    },
    {
      "epoch": 0.6475389983176555,
      "grad_norm": 0.6638085842132568,
      "learning_rate": 8.762280661294256e-06,
      "loss": 0.0319,
      "step": 395680
    },
    {
      "epoch": 0.6475717287563088,
      "grad_norm": 2.243431329727173,
      "learning_rate": 8.762214769080738e-06,
      "loss": 0.0375,
      "step": 395700
    },
    {
      "epoch": 0.6476044591949621,
      "grad_norm": 1.2759475708007812,
      "learning_rate": 8.762148876867221e-06,
      "loss": 0.0359,
      "step": 395720
    },
    {
      "epoch": 0.6476371896336155,
      "grad_norm": 0.38451650738716125,
      "learning_rate": 8.762082984653703e-06,
      "loss": 0.0308,
      "step": 395740
    },
    {
      "epoch": 0.6476699200722688,
      "grad_norm": 0.3227495551109314,
      "learning_rate": 8.762017092440187e-06,
      "loss": 0.0171,
      "step": 395760
    },
    {
      "epoch": 0.6477026505109221,
      "grad_norm": 1.7822158336639404,
      "learning_rate": 8.76195120022667e-06,
      "loss": 0.0343,
      "step": 395780
    },
    {
      "epoch": 0.6477353809495755,
      "grad_norm": 0.5588994026184082,
      "learning_rate": 8.761885308013152e-06,
      "loss": 0.0339,
      "step": 395800
    },
    {
      "epoch": 0.6477681113882289,
      "grad_norm": 1.4261474609375,
      "learning_rate": 8.761819415799636e-06,
      "loss": 0.0354,
      "step": 395820
    },
    {
      "epoch": 0.6478008418268821,
      "grad_norm": 1.782277226448059,
      "learning_rate": 8.761753523586118e-06,
      "loss": 0.0342,
      "step": 395840
    },
    {
      "epoch": 0.6478335722655355,
      "grad_norm": 0.9254538416862488,
      "learning_rate": 8.761687631372601e-06,
      "loss": 0.0296,
      "step": 395860
    },
    {
      "epoch": 0.6478663027041889,
      "grad_norm": 1.7286508083343506,
      "learning_rate": 8.761621739159085e-06,
      "loss": 0.0252,
      "step": 395880
    },
    {
      "epoch": 0.6478990331428421,
      "grad_norm": 0.4886958599090576,
      "learning_rate": 8.761555846945567e-06,
      "loss": 0.0267,
      "step": 395900
    },
    {
      "epoch": 0.6479317635814955,
      "grad_norm": 0.9495382308959961,
      "learning_rate": 8.76148995473205e-06,
      "loss": 0.0357,
      "step": 395920
    },
    {
      "epoch": 0.6479644940201489,
      "grad_norm": 0.2574637830257416,
      "learning_rate": 8.761424062518534e-06,
      "loss": 0.0281,
      "step": 395940
    },
    {
      "epoch": 0.6479972244588021,
      "grad_norm": 4.929962158203125,
      "learning_rate": 8.761358170305016e-06,
      "loss": 0.0392,
      "step": 395960
    },
    {
      "epoch": 0.6480299548974555,
      "grad_norm": 0.756353497505188,
      "learning_rate": 8.7612922780915e-06,
      "loss": 0.0335,
      "step": 395980
    },
    {
      "epoch": 0.6480626853361089,
      "grad_norm": 0.767744779586792,
      "learning_rate": 8.761226385877981e-06,
      "loss": 0.042,
      "step": 396000
    },
    {
      "epoch": 0.6480954157747623,
      "grad_norm": 2.5288891792297363,
      "learning_rate": 8.761160493664465e-06,
      "loss": 0.0401,
      "step": 396020
    },
    {
      "epoch": 0.6481281462134155,
      "grad_norm": 0.6162725687026978,
      "learning_rate": 8.761094601450947e-06,
      "loss": 0.0293,
      "step": 396040
    },
    {
      "epoch": 0.6481608766520689,
      "grad_norm": 1.2597118616104126,
      "learning_rate": 8.76102870923743e-06,
      "loss": 0.0315,
      "step": 396060
    },
    {
      "epoch": 0.6481936070907223,
      "grad_norm": 0.30996087193489075,
      "learning_rate": 8.760962817023912e-06,
      "loss": 0.0309,
      "step": 396080
    },
    {
      "epoch": 0.6482263375293755,
      "grad_norm": 1.3674801588058472,
      "learning_rate": 8.760896924810396e-06,
      "loss": 0.0297,
      "step": 396100
    },
    {
      "epoch": 0.6482590679680289,
      "grad_norm": 0.20410466194152832,
      "learning_rate": 8.76083103259688e-06,
      "loss": 0.0282,
      "step": 396120
    },
    {
      "epoch": 0.6482917984066823,
      "grad_norm": 3.640054702758789,
      "learning_rate": 8.760765140383361e-06,
      "loss": 0.0431,
      "step": 396140
    },
    {
      "epoch": 0.6483245288453355,
      "grad_norm": 2.1769304275512695,
      "learning_rate": 8.760699248169845e-06,
      "loss": 0.041,
      "step": 396160
    },
    {
      "epoch": 0.6483572592839889,
      "grad_norm": 1.2086697816848755,
      "learning_rate": 8.760633355956327e-06,
      "loss": 0.0378,
      "step": 396180
    },
    {
      "epoch": 0.6483899897226423,
      "grad_norm": 0.8971160054206848,
      "learning_rate": 8.76056746374281e-06,
      "loss": 0.0404,
      "step": 396200
    },
    {
      "epoch": 0.6484227201612957,
      "grad_norm": 1.0098888874053955,
      "learning_rate": 8.760501571529292e-06,
      "loss": 0.0366,
      "step": 396220
    },
    {
      "epoch": 0.6484554505999489,
      "grad_norm": 1.1406991481781006,
      "learning_rate": 8.760435679315776e-06,
      "loss": 0.0248,
      "step": 396240
    },
    {
      "epoch": 0.6484881810386023,
      "grad_norm": 2.4182770252227783,
      "learning_rate": 8.760369787102258e-06,
      "loss": 0.0369,
      "step": 396260
    },
    {
      "epoch": 0.6485209114772557,
      "grad_norm": 0.7977961897850037,
      "learning_rate": 8.760303894888741e-06,
      "loss": 0.0385,
      "step": 396280
    },
    {
      "epoch": 0.6485536419159089,
      "grad_norm": 0.47619032859802246,
      "learning_rate": 8.760238002675225e-06,
      "loss": 0.0335,
      "step": 396300
    },
    {
      "epoch": 0.6485863723545623,
      "grad_norm": 2.0282649993896484,
      "learning_rate": 8.760172110461707e-06,
      "loss": 0.032,
      "step": 396320
    },
    {
      "epoch": 0.6486191027932157,
      "grad_norm": 0.6991869807243347,
      "learning_rate": 8.76010621824819e-06,
      "loss": 0.041,
      "step": 396340
    },
    {
      "epoch": 0.6486518332318689,
      "grad_norm": 1.1115288734436035,
      "learning_rate": 8.760040326034674e-06,
      "loss": 0.0268,
      "step": 396360
    },
    {
      "epoch": 0.6486845636705223,
      "grad_norm": 0.614408016204834,
      "learning_rate": 8.759974433821156e-06,
      "loss": 0.0404,
      "step": 396380
    },
    {
      "epoch": 0.6487172941091757,
      "grad_norm": 1.4260295629501343,
      "learning_rate": 8.75990854160764e-06,
      "loss": 0.0287,
      "step": 396400
    },
    {
      "epoch": 0.648750024547829,
      "grad_norm": 0.5752909183502197,
      "learning_rate": 8.759842649394121e-06,
      "loss": 0.0318,
      "step": 396420
    },
    {
      "epoch": 0.6487827549864823,
      "grad_norm": 0.9554964900016785,
      "learning_rate": 8.759776757180605e-06,
      "loss": 0.041,
      "step": 396440
    },
    {
      "epoch": 0.6488154854251357,
      "grad_norm": 0.48157185316085815,
      "learning_rate": 8.759710864967087e-06,
      "loss": 0.0324,
      "step": 396460
    },
    {
      "epoch": 0.648848215863789,
      "grad_norm": 0.17753320932388306,
      "learning_rate": 8.75964497275357e-06,
      "loss": 0.0343,
      "step": 396480
    },
    {
      "epoch": 0.6488809463024423,
      "grad_norm": 2.9099228382110596,
      "learning_rate": 8.759579080540054e-06,
      "loss": 0.0419,
      "step": 396500
    },
    {
      "epoch": 0.6489136767410957,
      "grad_norm": 1.9112772941589355,
      "learning_rate": 8.759513188326536e-06,
      "loss": 0.0293,
      "step": 396520
    },
    {
      "epoch": 0.648946407179749,
      "grad_norm": 0.2966967225074768,
      "learning_rate": 8.75944729611302e-06,
      "loss": 0.0286,
      "step": 396540
    },
    {
      "epoch": 0.6489791376184023,
      "grad_norm": 0.31233692169189453,
      "learning_rate": 8.759381403899501e-06,
      "loss": 0.03,
      "step": 396560
    },
    {
      "epoch": 0.6490118680570557,
      "grad_norm": 3.824549674987793,
      "learning_rate": 8.759315511685985e-06,
      "loss": 0.0298,
      "step": 396580
    },
    {
      "epoch": 0.6490445984957091,
      "grad_norm": 8.539060592651367,
      "learning_rate": 8.759249619472467e-06,
      "loss": 0.0299,
      "step": 396600
    },
    {
      "epoch": 0.6490773289343624,
      "grad_norm": 1.4420660734176636,
      "learning_rate": 8.75918372725895e-06,
      "loss": 0.0238,
      "step": 396620
    },
    {
      "epoch": 0.6491100593730157,
      "grad_norm": 1.1362770795822144,
      "learning_rate": 8.759117835045432e-06,
      "loss": 0.0367,
      "step": 396640
    },
    {
      "epoch": 0.6491427898116691,
      "grad_norm": 2.0737876892089844,
      "learning_rate": 8.759051942831916e-06,
      "loss": 0.0451,
      "step": 396660
    },
    {
      "epoch": 0.6491755202503224,
      "grad_norm": 0.7997537851333618,
      "learning_rate": 8.7589860506184e-06,
      "loss": 0.0404,
      "step": 396680
    },
    {
      "epoch": 0.6492082506889757,
      "grad_norm": 0.9920926690101624,
      "learning_rate": 8.758920158404882e-06,
      "loss": 0.0334,
      "step": 396700
    },
    {
      "epoch": 0.6492409811276291,
      "grad_norm": 1.046481966972351,
      "learning_rate": 8.758854266191365e-06,
      "loss": 0.0337,
      "step": 396720
    },
    {
      "epoch": 0.6492737115662824,
      "grad_norm": 0.5164774060249329,
      "learning_rate": 8.758788373977849e-06,
      "loss": 0.0254,
      "step": 396740
    },
    {
      "epoch": 0.6493064420049357,
      "grad_norm": 0.5355434417724609,
      "learning_rate": 8.75872248176433e-06,
      "loss": 0.0436,
      "step": 396760
    },
    {
      "epoch": 0.6493391724435891,
      "grad_norm": 1.2937027215957642,
      "learning_rate": 8.758656589550814e-06,
      "loss": 0.0256,
      "step": 396780
    },
    {
      "epoch": 0.6493719028822424,
      "grad_norm": 0.5803550481796265,
      "learning_rate": 8.758590697337296e-06,
      "loss": 0.0306,
      "step": 396800
    },
    {
      "epoch": 0.6494046333208958,
      "grad_norm": 1.8639293909072876,
      "learning_rate": 8.75852480512378e-06,
      "loss": 0.051,
      "step": 396820
    },
    {
      "epoch": 0.6494373637595491,
      "grad_norm": 0.2939944267272949,
      "learning_rate": 8.758458912910263e-06,
      "loss": 0.0344,
      "step": 396840
    },
    {
      "epoch": 0.6494700941982025,
      "grad_norm": 1.227929711341858,
      "learning_rate": 8.758393020696745e-06,
      "loss": 0.0369,
      "step": 396860
    },
    {
      "epoch": 0.6495028246368558,
      "grad_norm": 0.9811832904815674,
      "learning_rate": 8.758327128483229e-06,
      "loss": 0.0245,
      "step": 396880
    },
    {
      "epoch": 0.6495355550755091,
      "grad_norm": 0.5793059468269348,
      "learning_rate": 8.75826123626971e-06,
      "loss": 0.0279,
      "step": 396900
    },
    {
      "epoch": 0.6495682855141625,
      "grad_norm": 0.12719300389289856,
      "learning_rate": 8.758195344056194e-06,
      "loss": 0.0316,
      "step": 396920
    },
    {
      "epoch": 0.6496010159528158,
      "grad_norm": 1.3370696306228638,
      "learning_rate": 8.758129451842676e-06,
      "loss": 0.0296,
      "step": 396940
    },
    {
      "epoch": 0.6496337463914691,
      "grad_norm": 1.9506893157958984,
      "learning_rate": 8.75806355962916e-06,
      "loss": 0.041,
      "step": 396960
    },
    {
      "epoch": 0.6496664768301225,
      "grad_norm": 1.9005597829818726,
      "learning_rate": 8.757997667415642e-06,
      "loss": 0.0323,
      "step": 396980
    },
    {
      "epoch": 0.6496992072687758,
      "grad_norm": 1.0744428634643555,
      "learning_rate": 8.757931775202125e-06,
      "loss": 0.0443,
      "step": 397000
    },
    {
      "epoch": 0.6497319377074292,
      "grad_norm": 1.2470998764038086,
      "learning_rate": 8.757865882988607e-06,
      "loss": 0.0264,
      "step": 397020
    },
    {
      "epoch": 0.6497646681460825,
      "grad_norm": 1.799364447593689,
      "learning_rate": 8.75779999077509e-06,
      "loss": 0.0324,
      "step": 397040
    },
    {
      "epoch": 0.6497973985847358,
      "grad_norm": 1.7177155017852783,
      "learning_rate": 8.757734098561573e-06,
      "loss": 0.0401,
      "step": 397060
    },
    {
      "epoch": 0.6498301290233892,
      "grad_norm": 1.3299329280853271,
      "learning_rate": 8.757668206348056e-06,
      "loss": 0.025,
      "step": 397080
    },
    {
      "epoch": 0.6498628594620425,
      "grad_norm": 0.6643086671829224,
      "learning_rate": 8.75760231413454e-06,
      "loss": 0.029,
      "step": 397100
    },
    {
      "epoch": 0.6498955899006958,
      "grad_norm": 0.6260772943496704,
      "learning_rate": 8.757536421921022e-06,
      "loss": 0.0363,
      "step": 397120
    },
    {
      "epoch": 0.6499283203393492,
      "grad_norm": 0.24831511080265045,
      "learning_rate": 8.757470529707505e-06,
      "loss": 0.0292,
      "step": 397140
    },
    {
      "epoch": 0.6499610507780025,
      "grad_norm": 2.09092378616333,
      "learning_rate": 8.757404637493989e-06,
      "loss": 0.0237,
      "step": 397160
    },
    {
      "epoch": 0.6499937812166559,
      "grad_norm": 0.6699127554893494,
      "learning_rate": 8.75733874528047e-06,
      "loss": 0.0345,
      "step": 397180
    },
    {
      "epoch": 0.6500265116553092,
      "grad_norm": 2.599642515182495,
      "learning_rate": 8.757272853066954e-06,
      "loss": 0.0377,
      "step": 397200
    },
    {
      "epoch": 0.6500592420939626,
      "grad_norm": 1.3079115152359009,
      "learning_rate": 8.757206960853438e-06,
      "loss": 0.0397,
      "step": 397220
    },
    {
      "epoch": 0.6500919725326159,
      "grad_norm": 2.8307902812957764,
      "learning_rate": 8.75714106863992e-06,
      "loss": 0.0388,
      "step": 397240
    },
    {
      "epoch": 0.6501247029712692,
      "grad_norm": 4.565562725067139,
      "learning_rate": 8.757075176426403e-06,
      "loss": 0.0286,
      "step": 397260
    },
    {
      "epoch": 0.6501574334099226,
      "grad_norm": 0.8430092334747314,
      "learning_rate": 8.757009284212885e-06,
      "loss": 0.0329,
      "step": 397280
    },
    {
      "epoch": 0.6501901638485759,
      "grad_norm": 1.606960415840149,
      "learning_rate": 8.756943391999369e-06,
      "loss": 0.0289,
      "step": 397300
    },
    {
      "epoch": 0.6502228942872292,
      "grad_norm": 0.941402018070221,
      "learning_rate": 8.75687749978585e-06,
      "loss": 0.0298,
      "step": 397320
    },
    {
      "epoch": 0.6502556247258826,
      "grad_norm": 0.9705835580825806,
      "learning_rate": 8.756811607572334e-06,
      "loss": 0.0256,
      "step": 397340
    },
    {
      "epoch": 0.6502883551645359,
      "grad_norm": 2.7079055309295654,
      "learning_rate": 8.756745715358816e-06,
      "loss": 0.0451,
      "step": 397360
    },
    {
      "epoch": 0.6503210856031892,
      "grad_norm": 0.4135590195655823,
      "learning_rate": 8.7566798231453e-06,
      "loss": 0.0278,
      "step": 397380
    },
    {
      "epoch": 0.6503538160418426,
      "grad_norm": 0.6495686173439026,
      "learning_rate": 8.756613930931782e-06,
      "loss": 0.0379,
      "step": 397400
    },
    {
      "epoch": 0.650386546480496,
      "grad_norm": 0.6559935808181763,
      "learning_rate": 8.756548038718265e-06,
      "loss": 0.0416,
      "step": 397420
    },
    {
      "epoch": 0.6504192769191492,
      "grad_norm": 0.6159514784812927,
      "learning_rate": 8.756482146504747e-06,
      "loss": 0.0384,
      "step": 397440
    },
    {
      "epoch": 0.6504520073578026,
      "grad_norm": 1.9086719751358032,
      "learning_rate": 8.75641625429123e-06,
      "loss": 0.0496,
      "step": 397460
    },
    {
      "epoch": 0.650484737796456,
      "grad_norm": 1.0012542009353638,
      "learning_rate": 8.756350362077714e-06,
      "loss": 0.0406,
      "step": 397480
    },
    {
      "epoch": 0.6505174682351093,
      "grad_norm": 4.056854724884033,
      "learning_rate": 8.756284469864196e-06,
      "loss": 0.0362,
      "step": 397500
    },
    {
      "epoch": 0.6505501986737626,
      "grad_norm": 0.43341872096061707,
      "learning_rate": 8.75621857765068e-06,
      "loss": 0.0339,
      "step": 397520
    },
    {
      "epoch": 0.650582929112416,
      "grad_norm": 0.6956416964530945,
      "learning_rate": 8.756152685437163e-06,
      "loss": 0.0397,
      "step": 397540
    },
    {
      "epoch": 0.6506156595510693,
      "grad_norm": 1.2940698862075806,
      "learning_rate": 8.756086793223645e-06,
      "loss": 0.0383,
      "step": 397560
    },
    {
      "epoch": 0.6506483899897226,
      "grad_norm": 1.2878375053405762,
      "learning_rate": 8.756020901010129e-06,
      "loss": 0.037,
      "step": 397580
    },
    {
      "epoch": 0.650681120428376,
      "grad_norm": 1.1025722026824951,
      "learning_rate": 8.755955008796612e-06,
      "loss": 0.03,
      "step": 397600
    },
    {
      "epoch": 0.6507138508670294,
      "grad_norm": 1.2387977838516235,
      "learning_rate": 8.755889116583094e-06,
      "loss": 0.0389,
      "step": 397620
    },
    {
      "epoch": 0.6507465813056826,
      "grad_norm": 1.3700217008590698,
      "learning_rate": 8.755823224369578e-06,
      "loss": 0.0274,
      "step": 397640
    },
    {
      "epoch": 0.650779311744336,
      "grad_norm": 0.9848290681838989,
      "learning_rate": 8.75575733215606e-06,
      "loss": 0.0396,
      "step": 397660
    },
    {
      "epoch": 0.6508120421829894,
      "grad_norm": 11.575844764709473,
      "learning_rate": 8.755691439942543e-06,
      "loss": 0.0446,
      "step": 397680
    },
    {
      "epoch": 0.6508447726216426,
      "grad_norm": 0.34499239921569824,
      "learning_rate": 8.755625547729025e-06,
      "loss": 0.0274,
      "step": 397700
    },
    {
      "epoch": 0.650877503060296,
      "grad_norm": 3.0684423446655273,
      "learning_rate": 8.755559655515509e-06,
      "loss": 0.0375,
      "step": 397720
    },
    {
      "epoch": 0.6509102334989494,
      "grad_norm": 0.3823489546775818,
      "learning_rate": 8.75549376330199e-06,
      "loss": 0.0454,
      "step": 397740
    },
    {
      "epoch": 0.6509429639376026,
      "grad_norm": 2.2425055503845215,
      "learning_rate": 8.755427871088474e-06,
      "loss": 0.0445,
      "step": 397760
    },
    {
      "epoch": 0.650975694376256,
      "grad_norm": 1.906474232673645,
      "learning_rate": 8.755361978874956e-06,
      "loss": 0.0399,
      "step": 397780
    },
    {
      "epoch": 0.6510084248149094,
      "grad_norm": 0.5344812870025635,
      "learning_rate": 8.75529608666144e-06,
      "loss": 0.022,
      "step": 397800
    },
    {
      "epoch": 0.6510411552535627,
      "grad_norm": 0.8123753070831299,
      "learning_rate": 8.755230194447922e-06,
      "loss": 0.026,
      "step": 397820
    },
    {
      "epoch": 0.651073885692216,
      "grad_norm": 0.7562267780303955,
      "learning_rate": 8.755164302234405e-06,
      "loss": 0.0285,
      "step": 397840
    },
    {
      "epoch": 0.6511066161308694,
      "grad_norm": 0.4133564531803131,
      "learning_rate": 8.755098410020887e-06,
      "loss": 0.0333,
      "step": 397860
    },
    {
      "epoch": 0.6511393465695228,
      "grad_norm": 0.9985086917877197,
      "learning_rate": 8.75503251780737e-06,
      "loss": 0.0397,
      "step": 397880
    },
    {
      "epoch": 0.651172077008176,
      "grad_norm": 1.0371110439300537,
      "learning_rate": 8.754966625593854e-06,
      "loss": 0.0227,
      "step": 397900
    },
    {
      "epoch": 0.6512048074468294,
      "grad_norm": 0.7800493836402893,
      "learning_rate": 8.754900733380336e-06,
      "loss": 0.0244,
      "step": 397920
    },
    {
      "epoch": 0.6512375378854828,
      "grad_norm": 0.5656806230545044,
      "learning_rate": 8.75483484116682e-06,
      "loss": 0.0279,
      "step": 397940
    },
    {
      "epoch": 0.651270268324136,
      "grad_norm": 1.3641738891601562,
      "learning_rate": 8.754768948953303e-06,
      "loss": 0.0522,
      "step": 397960
    },
    {
      "epoch": 0.6513029987627894,
      "grad_norm": 1.2384192943572998,
      "learning_rate": 8.754703056739785e-06,
      "loss": 0.0318,
      "step": 397980
    },
    {
      "epoch": 0.6513357292014428,
      "grad_norm": 1.5759159326553345,
      "learning_rate": 8.754637164526269e-06,
      "loss": 0.0388,
      "step": 398000
    },
    {
      "epoch": 0.651368459640096,
      "grad_norm": 1.3264539241790771,
      "learning_rate": 8.754571272312752e-06,
      "loss": 0.0407,
      "step": 398020
    },
    {
      "epoch": 0.6514011900787494,
      "grad_norm": 0.9816566109657288,
      "learning_rate": 8.754505380099234e-06,
      "loss": 0.0276,
      "step": 398040
    },
    {
      "epoch": 0.6514339205174028,
      "grad_norm": 3.545480489730835,
      "learning_rate": 8.754439487885718e-06,
      "loss": 0.0345,
      "step": 398060
    },
    {
      "epoch": 0.6514666509560562,
      "grad_norm": 0.758898913860321,
      "learning_rate": 8.7543735956722e-06,
      "loss": 0.0301,
      "step": 398080
    },
    {
      "epoch": 0.6514993813947094,
      "grad_norm": 3.2907068729400635,
      "learning_rate": 8.754307703458683e-06,
      "loss": 0.028,
      "step": 398100
    },
    {
      "epoch": 0.6515321118333628,
      "grad_norm": 0.5407682657241821,
      "learning_rate": 8.754241811245165e-06,
      "loss": 0.0417,
      "step": 398120
    },
    {
      "epoch": 0.6515648422720162,
      "grad_norm": 1.3726394176483154,
      "learning_rate": 8.754175919031649e-06,
      "loss": 0.0367,
      "step": 398140
    },
    {
      "epoch": 0.6515975727106694,
      "grad_norm": 1.4429901838302612,
      "learning_rate": 8.75411002681813e-06,
      "loss": 0.0416,
      "step": 398160
    },
    {
      "epoch": 0.6516303031493228,
      "grad_norm": 2.1866419315338135,
      "learning_rate": 8.754044134604614e-06,
      "loss": 0.0311,
      "step": 398180
    },
    {
      "epoch": 0.6516630335879762,
      "grad_norm": 1.882710337638855,
      "learning_rate": 8.753978242391096e-06,
      "loss": 0.0309,
      "step": 398200
    },
    {
      "epoch": 0.6516957640266294,
      "grad_norm": 1.818289041519165,
      "learning_rate": 8.75391235017758e-06,
      "loss": 0.0344,
      "step": 398220
    },
    {
      "epoch": 0.6517284944652828,
      "grad_norm": 2.7439920902252197,
      "learning_rate": 8.753846457964063e-06,
      "loss": 0.0295,
      "step": 398240
    },
    {
      "epoch": 0.6517612249039362,
      "grad_norm": 0.7564177513122559,
      "learning_rate": 8.753780565750545e-06,
      "loss": 0.0426,
      "step": 398260
    },
    {
      "epoch": 0.6517939553425895,
      "grad_norm": 2.7900829315185547,
      "learning_rate": 8.753714673537029e-06,
      "loss": 0.0365,
      "step": 398280
    },
    {
      "epoch": 0.6518266857812428,
      "grad_norm": 1.338804841041565,
      "learning_rate": 8.753648781323511e-06,
      "loss": 0.0463,
      "step": 398300
    },
    {
      "epoch": 0.6518594162198962,
      "grad_norm": 1.1747695207595825,
      "learning_rate": 8.753582889109994e-06,
      "loss": 0.0374,
      "step": 398320
    },
    {
      "epoch": 0.6518921466585496,
      "grad_norm": 1.2588953971862793,
      "learning_rate": 8.753516996896478e-06,
      "loss": 0.0358,
      "step": 398340
    },
    {
      "epoch": 0.6519248770972028,
      "grad_norm": 0.6465264558792114,
      "learning_rate": 8.75345110468296e-06,
      "loss": 0.0303,
      "step": 398360
    },
    {
      "epoch": 0.6519576075358562,
      "grad_norm": 1.152040719985962,
      "learning_rate": 8.753385212469444e-06,
      "loss": 0.0365,
      "step": 398380
    },
    {
      "epoch": 0.6519903379745096,
      "grad_norm": 2.237008810043335,
      "learning_rate": 8.753319320255927e-06,
      "loss": 0.0456,
      "step": 398400
    },
    {
      "epoch": 0.6520230684131628,
      "grad_norm": 6.747768878936768,
      "learning_rate": 8.753253428042409e-06,
      "loss": 0.0505,
      "step": 398420
    },
    {
      "epoch": 0.6520557988518162,
      "grad_norm": 2.026792287826538,
      "learning_rate": 8.753187535828893e-06,
      "loss": 0.0354,
      "step": 398440
    },
    {
      "epoch": 0.6520885292904696,
      "grad_norm": 2.28205943107605,
      "learning_rate": 8.753121643615374e-06,
      "loss": 0.031,
      "step": 398460
    },
    {
      "epoch": 0.6521212597291229,
      "grad_norm": 1.1433435678482056,
      "learning_rate": 8.753055751401858e-06,
      "loss": 0.0317,
      "step": 398480
    },
    {
      "epoch": 0.6521539901677762,
      "grad_norm": 0.526221513748169,
      "learning_rate": 8.75298985918834e-06,
      "loss": 0.0298,
      "step": 398500
    },
    {
      "epoch": 0.6521867206064296,
      "grad_norm": 1.9827195405960083,
      "learning_rate": 8.752923966974824e-06,
      "loss": 0.0372,
      "step": 398520
    },
    {
      "epoch": 0.6522194510450829,
      "grad_norm": 2.5110647678375244,
      "learning_rate": 8.752858074761305e-06,
      "loss": 0.0328,
      "step": 398540
    },
    {
      "epoch": 0.6522521814837362,
      "grad_norm": 0.6157577037811279,
      "learning_rate": 8.752792182547789e-06,
      "loss": 0.0443,
      "step": 398560
    },
    {
      "epoch": 0.6522849119223896,
      "grad_norm": 0.3007493317127228,
      "learning_rate": 8.752726290334273e-06,
      "loss": 0.0307,
      "step": 398580
    },
    {
      "epoch": 0.652317642361043,
      "grad_norm": 0.515252411365509,
      "learning_rate": 8.752660398120754e-06,
      "loss": 0.0285,
      "step": 398600
    },
    {
      "epoch": 0.6523503727996962,
      "grad_norm": 2.048041582107544,
      "learning_rate": 8.752594505907238e-06,
      "loss": 0.0308,
      "step": 398620
    },
    {
      "epoch": 0.6523831032383496,
      "grad_norm": 1.0091348886489868,
      "learning_rate": 8.75252861369372e-06,
      "loss": 0.0396,
      "step": 398640
    },
    {
      "epoch": 0.652415833677003,
      "grad_norm": 2.797440767288208,
      "learning_rate": 8.752462721480204e-06,
      "loss": 0.0342,
      "step": 398660
    },
    {
      "epoch": 0.6524485641156563,
      "grad_norm": 0.48217883706092834,
      "learning_rate": 8.752396829266685e-06,
      "loss": 0.0297,
      "step": 398680
    },
    {
      "epoch": 0.6524812945543096,
      "grad_norm": 0.954842746257782,
      "learning_rate": 8.752330937053169e-06,
      "loss": 0.0394,
      "step": 398700
    },
    {
      "epoch": 0.652514024992963,
      "grad_norm": 0.6244201064109802,
      "learning_rate": 8.752265044839653e-06,
      "loss": 0.0363,
      "step": 398720
    },
    {
      "epoch": 0.6525467554316163,
      "grad_norm": 1.0209076404571533,
      "learning_rate": 8.752199152626135e-06,
      "loss": 0.0393,
      "step": 398740
    },
    {
      "epoch": 0.6525794858702696,
      "grad_norm": 2.6242785453796387,
      "learning_rate": 8.752133260412618e-06,
      "loss": 0.0389,
      "step": 398760
    },
    {
      "epoch": 0.652612216308923,
      "grad_norm": 1.6260809898376465,
      "learning_rate": 8.752067368199102e-06,
      "loss": 0.0374,
      "step": 398780
    },
    {
      "epoch": 0.6526449467475763,
      "grad_norm": 0.5120384097099304,
      "learning_rate": 8.752001475985584e-06,
      "loss": 0.036,
      "step": 398800
    },
    {
      "epoch": 0.6526776771862296,
      "grad_norm": 3.0440707206726074,
      "learning_rate": 8.751935583772067e-06,
      "loss": 0.0411,
      "step": 398820
    },
    {
      "epoch": 0.652710407624883,
      "grad_norm": 1.3414037227630615,
      "learning_rate": 8.751869691558549e-06,
      "loss": 0.0337,
      "step": 398840
    },
    {
      "epoch": 0.6527431380635363,
      "grad_norm": 1.6908904314041138,
      "learning_rate": 8.751803799345033e-06,
      "loss": 0.0366,
      "step": 398860
    },
    {
      "epoch": 0.6527758685021897,
      "grad_norm": 0.6227617263793945,
      "learning_rate": 8.751737907131515e-06,
      "loss": 0.0349,
      "step": 398880
    },
    {
      "epoch": 0.652808598940843,
      "grad_norm": 1.5548839569091797,
      "learning_rate": 8.751672014917998e-06,
      "loss": 0.0266,
      "step": 398900
    },
    {
      "epoch": 0.6528413293794963,
      "grad_norm": 3.8965070247650146,
      "learning_rate": 8.75160612270448e-06,
      "loss": 0.0329,
      "step": 398920
    },
    {
      "epoch": 0.6528740598181497,
      "grad_norm": 1.4646378755569458,
      "learning_rate": 8.751540230490964e-06,
      "loss": 0.0338,
      "step": 398940
    },
    {
      "epoch": 0.652906790256803,
      "grad_norm": 1.3943607807159424,
      "learning_rate": 8.751474338277447e-06,
      "loss": 0.0329,
      "step": 398960
    },
    {
      "epoch": 0.6529395206954564,
      "grad_norm": 1.3095595836639404,
      "learning_rate": 8.751408446063929e-06,
      "loss": 0.0303,
      "step": 398980
    },
    {
      "epoch": 0.6529722511341097,
      "grad_norm": 1.9612174034118652,
      "learning_rate": 8.751342553850413e-06,
      "loss": 0.0429,
      "step": 399000
    },
    {
      "epoch": 0.653004981572763,
      "grad_norm": 0.5087392330169678,
      "learning_rate": 8.751276661636895e-06,
      "loss": 0.0282,
      "step": 399020
    },
    {
      "epoch": 0.6530377120114164,
      "grad_norm": 1.6233409643173218,
      "learning_rate": 8.751210769423378e-06,
      "loss": 0.0404,
      "step": 399040
    },
    {
      "epoch": 0.6530704424500697,
      "grad_norm": 0.9445124864578247,
      "learning_rate": 8.75114487720986e-06,
      "loss": 0.0247,
      "step": 399060
    },
    {
      "epoch": 0.6531031728887231,
      "grad_norm": 0.8592886924743652,
      "learning_rate": 8.751078984996344e-06,
      "loss": 0.0317,
      "step": 399080
    },
    {
      "epoch": 0.6531359033273764,
      "grad_norm": 1.1125506162643433,
      "learning_rate": 8.751013092782826e-06,
      "loss": 0.0262,
      "step": 399100
    },
    {
      "epoch": 0.6531686337660297,
      "grad_norm": 1.4837173223495483,
      "learning_rate": 8.750947200569309e-06,
      "loss": 0.0281,
      "step": 399120
    },
    {
      "epoch": 0.6532013642046831,
      "grad_norm": 0.8138779401779175,
      "learning_rate": 8.750881308355793e-06,
      "loss": 0.0247,
      "step": 399140
    },
    {
      "epoch": 0.6532340946433364,
      "grad_norm": 1.3376284837722778,
      "learning_rate": 8.750815416142275e-06,
      "loss": 0.0372,
      "step": 399160
    },
    {
      "epoch": 0.6532668250819897,
      "grad_norm": 0.7052133679389954,
      "learning_rate": 8.750749523928758e-06,
      "loss": 0.0318,
      "step": 399180
    },
    {
      "epoch": 0.6532995555206431,
      "grad_norm": 1.9180552959442139,
      "learning_rate": 8.750683631715242e-06,
      "loss": 0.0325,
      "step": 399200
    },
    {
      "epoch": 0.6533322859592964,
      "grad_norm": 1.3018977642059326,
      "learning_rate": 8.750617739501724e-06,
      "loss": 0.0349,
      "step": 399220
    },
    {
      "epoch": 0.6533650163979497,
      "grad_norm": 2.7378950119018555,
      "learning_rate": 8.750551847288207e-06,
      "loss": 0.0268,
      "step": 399240
    },
    {
      "epoch": 0.6533977468366031,
      "grad_norm": 1.6281789541244507,
      "learning_rate": 8.750485955074689e-06,
      "loss": 0.0299,
      "step": 399260
    },
    {
      "epoch": 0.6534304772752565,
      "grad_norm": 0.6106274127960205,
      "learning_rate": 8.750420062861173e-06,
      "loss": 0.0283,
      "step": 399280
    },
    {
      "epoch": 0.6534632077139098,
      "grad_norm": 0.5285264849662781,
      "learning_rate": 8.750354170647656e-06,
      "loss": 0.0429,
      "step": 399300
    },
    {
      "epoch": 0.6534959381525631,
      "grad_norm": 0.8137276768684387,
      "learning_rate": 8.750288278434138e-06,
      "loss": 0.041,
      "step": 399320
    },
    {
      "epoch": 0.6535286685912165,
      "grad_norm": 0.321514755487442,
      "learning_rate": 8.750222386220622e-06,
      "loss": 0.0379,
      "step": 399340
    },
    {
      "epoch": 0.6535613990298698,
      "grad_norm": 0.9489532709121704,
      "learning_rate": 8.750156494007104e-06,
      "loss": 0.0418,
      "step": 399360
    },
    {
      "epoch": 0.6535941294685231,
      "grad_norm": 1.2278907299041748,
      "learning_rate": 8.750090601793587e-06,
      "loss": 0.0462,
      "step": 399380
    },
    {
      "epoch": 0.6536268599071765,
      "grad_norm": 1.0931323766708374,
      "learning_rate": 8.75002470958007e-06,
      "loss": 0.0397,
      "step": 399400
    },
    {
      "epoch": 0.6536595903458298,
      "grad_norm": 0.32741454243659973,
      "learning_rate": 8.749958817366553e-06,
      "loss": 0.0336,
      "step": 399420
    },
    {
      "epoch": 0.6536923207844831,
      "grad_norm": 1.6090950965881348,
      "learning_rate": 8.749892925153035e-06,
      "loss": 0.0479,
      "step": 399440
    },
    {
      "epoch": 0.6537250512231365,
      "grad_norm": 0.9624406695365906,
      "learning_rate": 8.749827032939518e-06,
      "loss": 0.028,
      "step": 399460
    },
    {
      "epoch": 0.6537577816617899,
      "grad_norm": 0.9035755395889282,
      "learning_rate": 8.749761140726e-06,
      "loss": 0.0278,
      "step": 399480
    },
    {
      "epoch": 0.6537905121004431,
      "grad_norm": 0.5706042647361755,
      "learning_rate": 8.749695248512484e-06,
      "loss": 0.0252,
      "step": 399500
    },
    {
      "epoch": 0.6538232425390965,
      "grad_norm": 0.6893337965011597,
      "learning_rate": 8.749629356298967e-06,
      "loss": 0.0341,
      "step": 399520
    },
    {
      "epoch": 0.6538559729777499,
      "grad_norm": 0.5823720693588257,
      "learning_rate": 8.74956346408545e-06,
      "loss": 0.0442,
      "step": 399540
    },
    {
      "epoch": 0.6538887034164031,
      "grad_norm": 0.4358319640159607,
      "learning_rate": 8.749497571871933e-06,
      "loss": 0.0349,
      "step": 399560
    },
    {
      "epoch": 0.6539214338550565,
      "grad_norm": 0.4567476511001587,
      "learning_rate": 8.749431679658416e-06,
      "loss": 0.0286,
      "step": 399580
    },
    {
      "epoch": 0.6539541642937099,
      "grad_norm": 0.9940798282623291,
      "learning_rate": 8.749365787444898e-06,
      "loss": 0.0419,
      "step": 399600
    },
    {
      "epoch": 0.6539868947323632,
      "grad_norm": 0.9993065595626831,
      "learning_rate": 8.749299895231382e-06,
      "loss": 0.0395,
      "step": 399620
    },
    {
      "epoch": 0.6540196251710165,
      "grad_norm": 0.7412497401237488,
      "learning_rate": 8.749234003017865e-06,
      "loss": 0.039,
      "step": 399640
    },
    {
      "epoch": 0.6540523556096699,
      "grad_norm": 0.703970193862915,
      "learning_rate": 8.749168110804347e-06,
      "loss": 0.0317,
      "step": 399660
    },
    {
      "epoch": 0.6540850860483233,
      "grad_norm": 1.567432165145874,
      "learning_rate": 8.749102218590831e-06,
      "loss": 0.038,
      "step": 399680
    },
    {
      "epoch": 0.6541178164869765,
      "grad_norm": 4.199395179748535,
      "learning_rate": 8.749036326377313e-06,
      "loss": 0.0355,
      "step": 399700
    },
    {
      "epoch": 0.6541505469256299,
      "grad_norm": 1.2854349613189697,
      "learning_rate": 8.748970434163796e-06,
      "loss": 0.0338,
      "step": 399720
    },
    {
      "epoch": 0.6541832773642833,
      "grad_norm": 1.3045971393585205,
      "learning_rate": 8.748904541950278e-06,
      "loss": 0.028,
      "step": 399740
    },
    {
      "epoch": 0.6542160078029365,
      "grad_norm": 1.803045630455017,
      "learning_rate": 8.748838649736762e-06,
      "loss": 0.0388,
      "step": 399760
    },
    {
      "epoch": 0.6542487382415899,
      "grad_norm": 0.24857541918754578,
      "learning_rate": 8.748772757523244e-06,
      "loss": 0.0285,
      "step": 399780
    },
    {
      "epoch": 0.6542814686802433,
      "grad_norm": 0.16422046720981598,
      "learning_rate": 8.748706865309727e-06,
      "loss": 0.0521,
      "step": 399800
    },
    {
      "epoch": 0.6543141991188965,
      "grad_norm": 1.315004825592041,
      "learning_rate": 8.74864097309621e-06,
      "loss": 0.0331,
      "step": 399820
    },
    {
      "epoch": 0.6543469295575499,
      "grad_norm": 1.9323862791061401,
      "learning_rate": 8.748575080882693e-06,
      "loss": 0.0662,
      "step": 399840
    },
    {
      "epoch": 0.6543796599962033,
      "grad_norm": 1.4716246128082275,
      "learning_rate": 8.748509188669175e-06,
      "loss": 0.0327,
      "step": 399860
    },
    {
      "epoch": 0.6544123904348567,
      "grad_norm": 0.33851227164268494,
      "learning_rate": 8.748443296455658e-06,
      "loss": 0.034,
      "step": 399880
    },
    {
      "epoch": 0.6544451208735099,
      "grad_norm": 0.43253380060195923,
      "learning_rate": 8.74837740424214e-06,
      "loss": 0.0352,
      "step": 399900
    },
    {
      "epoch": 0.6544778513121633,
      "grad_norm": 1.2151226997375488,
      "learning_rate": 8.748311512028624e-06,
      "loss": 0.0299,
      "step": 399920
    },
    {
      "epoch": 0.6545105817508167,
      "grad_norm": 0.5080254077911377,
      "learning_rate": 8.748245619815107e-06,
      "loss": 0.0235,
      "step": 399940
    },
    {
      "epoch": 0.6545433121894699,
      "grad_norm": 0.6356027126312256,
      "learning_rate": 8.74817972760159e-06,
      "loss": 0.0226,
      "step": 399960
    },
    {
      "epoch": 0.6545760426281233,
      "grad_norm": 2.3947994709014893,
      "learning_rate": 8.748113835388073e-06,
      "loss": 0.0348,
      "step": 399980
    },
    {
      "epoch": 0.6546087730667767,
      "grad_norm": 0.6389948129653931,
      "learning_rate": 8.748047943174556e-06,
      "loss": 0.0442,
      "step": 400000
    },
    {
      "epoch": 0.6546087730667767,
      "eval_loss": 0.017211738973855972,
      "eval_runtime": 6500.4144,
      "eval_samples_per_second": 158.122,
      "eval_steps_per_second": 15.812,
      "eval_sts-dev_pearson_cosine": 0.9590091126850876,
      "eval_sts-dev_spearman_cosine": 0.879515997408928,
      "step": 400000
    },
    {
      "epoch": 0.6546415035054299,
      "grad_norm": 0.16619376838207245,
      "learning_rate": 8.747982050961038e-06,
      "loss": 0.0331,
      "step": 400020
    },
    {
      "epoch": 0.6546742339440833,
      "grad_norm": 1.3319566249847412,
      "learning_rate": 8.747916158747522e-06,
      "loss": 0.0319,
      "step": 400040
    },
    {
      "epoch": 0.6547069643827367,
      "grad_norm": 0.3784010410308838,
      "learning_rate": 8.747850266534005e-06,
      "loss": 0.0267,
      "step": 400060
    },
    {
      "epoch": 0.65473969482139,
      "grad_norm": 1.069568395614624,
      "learning_rate": 8.747784374320487e-06,
      "loss": 0.0428,
      "step": 400080
    },
    {
      "epoch": 0.6547724252600433,
      "grad_norm": 0.7369615435600281,
      "learning_rate": 8.747718482106971e-06,
      "loss": 0.0278,
      "step": 400100
    },
    {
      "epoch": 0.6548051556986967,
      "grad_norm": 1.1687424182891846,
      "learning_rate": 8.747652589893453e-06,
      "loss": 0.0327,
      "step": 400120
    },
    {
      "epoch": 0.6548378861373501,
      "grad_norm": 0.6924857497215271,
      "learning_rate": 8.747586697679936e-06,
      "loss": 0.0319,
      "step": 400140
    },
    {
      "epoch": 0.6548706165760033,
      "grad_norm": 0.6392734050750732,
      "learning_rate": 8.747520805466418e-06,
      "loss": 0.0412,
      "step": 400160
    },
    {
      "epoch": 0.6549033470146567,
      "grad_norm": 0.6094045042991638,
      "learning_rate": 8.747454913252902e-06,
      "loss": 0.0363,
      "step": 400180
    },
    {
      "epoch": 0.6549360774533101,
      "grad_norm": 0.6271216869354248,
      "learning_rate": 8.747389021039384e-06,
      "loss": 0.0478,
      "step": 400200
    },
    {
      "epoch": 0.6549688078919633,
      "grad_norm": 1.7439043521881104,
      "learning_rate": 8.747323128825867e-06,
      "loss": 0.0238,
      "step": 400220
    },
    {
      "epoch": 0.6550015383306167,
      "grad_norm": 1.7732653617858887,
      "learning_rate": 8.74725723661235e-06,
      "loss": 0.0305,
      "step": 400240
    },
    {
      "epoch": 0.6550342687692701,
      "grad_norm": 2.39658522605896,
      "learning_rate": 8.747191344398833e-06,
      "loss": 0.0378,
      "step": 400260
    },
    {
      "epoch": 0.6550669992079234,
      "grad_norm": 1.3760851621627808,
      "learning_rate": 8.747125452185315e-06,
      "loss": 0.0416,
      "step": 400280
    },
    {
      "epoch": 0.6550997296465767,
      "grad_norm": 1.855747103691101,
      "learning_rate": 8.747059559971798e-06,
      "loss": 0.0231,
      "step": 400300
    },
    {
      "epoch": 0.6551324600852301,
      "grad_norm": 1.0193390846252441,
      "learning_rate": 8.746993667758282e-06,
      "loss": 0.0376,
      "step": 400320
    },
    {
      "epoch": 0.6551651905238834,
      "grad_norm": 0.5428394675254822,
      "learning_rate": 8.746927775544764e-06,
      "loss": 0.0401,
      "step": 400340
    },
    {
      "epoch": 0.6551979209625367,
      "grad_norm": 1.6226204633712769,
      "learning_rate": 8.746861883331247e-06,
      "loss": 0.0346,
      "step": 400360
    },
    {
      "epoch": 0.6552306514011901,
      "grad_norm": 3.8333189487457275,
      "learning_rate": 8.746795991117731e-06,
      "loss": 0.0466,
      "step": 400380
    },
    {
      "epoch": 0.6552633818398435,
      "grad_norm": 1.2174056768417358,
      "learning_rate": 8.746730098904213e-06,
      "loss": 0.037,
      "step": 400400
    },
    {
      "epoch": 0.6552961122784967,
      "grad_norm": 1.7463188171386719,
      "learning_rate": 8.746664206690697e-06,
      "loss": 0.0365,
      "step": 400420
    },
    {
      "epoch": 0.6553288427171501,
      "grad_norm": 0.974270761013031,
      "learning_rate": 8.74659831447718e-06,
      "loss": 0.0265,
      "step": 400440
    },
    {
      "epoch": 0.6553615731558035,
      "grad_norm": 0.49824169278144836,
      "learning_rate": 8.746532422263662e-06,
      "loss": 0.0355,
      "step": 400460
    },
    {
      "epoch": 0.6553943035944567,
      "grad_norm": 0.4648345410823822,
      "learning_rate": 8.746466530050146e-06,
      "loss": 0.037,
      "step": 400480
    },
    {
      "epoch": 0.6554270340331101,
      "grad_norm": 0.5451686382293701,
      "learning_rate": 8.746400637836627e-06,
      "loss": 0.0363,
      "step": 400500
    },
    {
      "epoch": 0.6554597644717635,
      "grad_norm": 1.3716824054718018,
      "learning_rate": 8.746334745623111e-06,
      "loss": 0.0343,
      "step": 400520
    },
    {
      "epoch": 0.6554924949104168,
      "grad_norm": 0.6431211829185486,
      "learning_rate": 8.746268853409593e-06,
      "loss": 0.0359,
      "step": 400540
    },
    {
      "epoch": 0.6555252253490701,
      "grad_norm": 0.6364778280258179,
      "learning_rate": 8.746202961196077e-06,
      "loss": 0.0341,
      "step": 400560
    },
    {
      "epoch": 0.6555579557877235,
      "grad_norm": 0.3426719307899475,
      "learning_rate": 8.746137068982558e-06,
      "loss": 0.0395,
      "step": 400580
    },
    {
      "epoch": 0.6555906862263768,
      "grad_norm": 0.5938016176223755,
      "learning_rate": 8.746071176769042e-06,
      "loss": 0.0379,
      "step": 400600
    },
    {
      "epoch": 0.6556234166650301,
      "grad_norm": 1.4576884508132935,
      "learning_rate": 8.746005284555524e-06,
      "loss": 0.0355,
      "step": 400620
    },
    {
      "epoch": 0.6556561471036835,
      "grad_norm": 0.6323593854904175,
      "learning_rate": 8.745939392342007e-06,
      "loss": 0.0389,
      "step": 400640
    },
    {
      "epoch": 0.6556888775423368,
      "grad_norm": 0.7720882296562195,
      "learning_rate": 8.74587350012849e-06,
      "loss": 0.0218,
      "step": 400660
    },
    {
      "epoch": 0.6557216079809901,
      "grad_norm": 0.3093770146369934,
      "learning_rate": 8.745807607914973e-06,
      "loss": 0.0405,
      "step": 400680
    },
    {
      "epoch": 0.6557543384196435,
      "grad_norm": 0.3300448954105377,
      "learning_rate": 8.745741715701457e-06,
      "loss": 0.0369,
      "step": 400700
    },
    {
      "epoch": 0.6557870688582969,
      "grad_norm": 0.8119030594825745,
      "learning_rate": 8.745675823487938e-06,
      "loss": 0.0387,
      "step": 400720
    },
    {
      "epoch": 0.6558197992969502,
      "grad_norm": 0.55275958776474,
      "learning_rate": 8.745609931274422e-06,
      "loss": 0.0353,
      "step": 400740
    },
    {
      "epoch": 0.6558525297356035,
      "grad_norm": 0.7243263125419617,
      "learning_rate": 8.745544039060904e-06,
      "loss": 0.0266,
      "step": 400760
    },
    {
      "epoch": 0.6558852601742569,
      "grad_norm": 0.8736827373504639,
      "learning_rate": 8.745478146847388e-06,
      "loss": 0.0256,
      "step": 400780
    },
    {
      "epoch": 0.6559179906129102,
      "grad_norm": 1.5572055578231812,
      "learning_rate": 8.745412254633871e-06,
      "loss": 0.0361,
      "step": 400800
    },
    {
      "epoch": 0.6559507210515635,
      "grad_norm": 1.211233139038086,
      "learning_rate": 8.745346362420353e-06,
      "loss": 0.0463,
      "step": 400820
    },
    {
      "epoch": 0.6559834514902169,
      "grad_norm": 0.23396308720111847,
      "learning_rate": 8.745280470206837e-06,
      "loss": 0.0466,
      "step": 400840
    },
    {
      "epoch": 0.6560161819288702,
      "grad_norm": 0.8253134489059448,
      "learning_rate": 8.74521457799332e-06,
      "loss": 0.0317,
      "step": 400860
    },
    {
      "epoch": 0.6560489123675235,
      "grad_norm": 1.475647211074829,
      "learning_rate": 8.745148685779802e-06,
      "loss": 0.0257,
      "step": 400880
    },
    {
      "epoch": 0.6560816428061769,
      "grad_norm": 0.6577889919281006,
      "learning_rate": 8.745082793566286e-06,
      "loss": 0.037,
      "step": 400900
    },
    {
      "epoch": 0.6561143732448302,
      "grad_norm": 0.4137327969074249,
      "learning_rate": 8.745016901352768e-06,
      "loss": 0.0324,
      "step": 400920
    },
    {
      "epoch": 0.6561471036834836,
      "grad_norm": 2.146470069885254,
      "learning_rate": 8.744951009139251e-06,
      "loss": 0.0307,
      "step": 400940
    },
    {
      "epoch": 0.6561798341221369,
      "grad_norm": 1.623063087463379,
      "learning_rate": 8.744885116925733e-06,
      "loss": 0.0251,
      "step": 400960
    },
    {
      "epoch": 0.6562125645607902,
      "grad_norm": 1.1243101358413696,
      "learning_rate": 8.744819224712217e-06,
      "loss": 0.0308,
      "step": 400980
    },
    {
      "epoch": 0.6562452949994436,
      "grad_norm": 1.9739420413970947,
      "learning_rate": 8.744753332498699e-06,
      "loss": 0.0334,
      "step": 401000
    },
    {
      "epoch": 0.6562780254380969,
      "grad_norm": 3.952003002166748,
      "learning_rate": 8.744687440285182e-06,
      "loss": 0.0415,
      "step": 401020
    },
    {
      "epoch": 0.6563107558767503,
      "grad_norm": 2.9912638664245605,
      "learning_rate": 8.744621548071666e-06,
      "loss": 0.0303,
      "step": 401040
    },
    {
      "epoch": 0.6563434863154036,
      "grad_norm": 0.4388529360294342,
      "learning_rate": 8.744555655858148e-06,
      "loss": 0.0361,
      "step": 401060
    },
    {
      "epoch": 0.6563762167540569,
      "grad_norm": 0.2837122082710266,
      "learning_rate": 8.744489763644631e-06,
      "loss": 0.0463,
      "step": 401080
    },
    {
      "epoch": 0.6564089471927103,
      "grad_norm": 0.32114288210868835,
      "learning_rate": 8.744423871431113e-06,
      "loss": 0.0325,
      "step": 401100
    },
    {
      "epoch": 0.6564416776313636,
      "grad_norm": 0.12251661717891693,
      "learning_rate": 8.744357979217597e-06,
      "loss": 0.0371,
      "step": 401120
    },
    {
      "epoch": 0.656474408070017,
      "grad_norm": 1.6295908689498901,
      "learning_rate": 8.744292087004079e-06,
      "loss": 0.0411,
      "step": 401140
    },
    {
      "epoch": 0.6565071385086703,
      "grad_norm": 2.838383674621582,
      "learning_rate": 8.744226194790562e-06,
      "loss": 0.0271,
      "step": 401160
    },
    {
      "epoch": 0.6565398689473236,
      "grad_norm": 1.4841190576553345,
      "learning_rate": 8.744160302577046e-06,
      "loss": 0.034,
      "step": 401180
    },
    {
      "epoch": 0.656572599385977,
      "grad_norm": 0.5855181217193604,
      "learning_rate": 8.744094410363528e-06,
      "loss": 0.0308,
      "step": 401200
    },
    {
      "epoch": 0.6566053298246303,
      "grad_norm": 3.202437400817871,
      "learning_rate": 8.744028518150011e-06,
      "loss": 0.0402,
      "step": 401220
    },
    {
      "epoch": 0.6566380602632836,
      "grad_norm": 1.3756659030914307,
      "learning_rate": 8.743962625936495e-06,
      "loss": 0.0299,
      "step": 401240
    },
    {
      "epoch": 0.656670790701937,
      "grad_norm": 1.127303123474121,
      "learning_rate": 8.743896733722977e-06,
      "loss": 0.0255,
      "step": 401260
    },
    {
      "epoch": 0.6567035211405903,
      "grad_norm": 1.62688148021698,
      "learning_rate": 8.74383084150946e-06,
      "loss": 0.0313,
      "step": 401280
    },
    {
      "epoch": 0.6567362515792436,
      "grad_norm": 1.2248197793960571,
      "learning_rate": 8.743764949295942e-06,
      "loss": 0.0508,
      "step": 401300
    },
    {
      "epoch": 0.656768982017897,
      "grad_norm": 0.7109394073486328,
      "learning_rate": 8.743699057082426e-06,
      "loss": 0.0414,
      "step": 401320
    },
    {
      "epoch": 0.6568017124565504,
      "grad_norm": 3.263237237930298,
      "learning_rate": 8.743633164868908e-06,
      "loss": 0.0269,
      "step": 401340
    },
    {
      "epoch": 0.6568344428952037,
      "grad_norm": 0.21225009858608246,
      "learning_rate": 8.743567272655391e-06,
      "loss": 0.0303,
      "step": 401360
    },
    {
      "epoch": 0.656867173333857,
      "grad_norm": 0.4026845097541809,
      "learning_rate": 8.743501380441873e-06,
      "loss": 0.0366,
      "step": 401380
    },
    {
      "epoch": 0.6568999037725104,
      "grad_norm": 1.019481897354126,
      "learning_rate": 8.743435488228357e-06,
      "loss": 0.0401,
      "step": 401400
    },
    {
      "epoch": 0.6569326342111637,
      "grad_norm": 3.0241434574127197,
      "learning_rate": 8.74336959601484e-06,
      "loss": 0.0266,
      "step": 401420
    },
    {
      "epoch": 0.656965364649817,
      "grad_norm": 2.608344793319702,
      "learning_rate": 8.743303703801322e-06,
      "loss": 0.0413,
      "step": 401440
    },
    {
      "epoch": 0.6569980950884704,
      "grad_norm": 1.2564268112182617,
      "learning_rate": 8.743237811587806e-06,
      "loss": 0.0301,
      "step": 401460
    },
    {
      "epoch": 0.6570308255271237,
      "grad_norm": 2.012375593185425,
      "learning_rate": 8.743171919374288e-06,
      "loss": 0.0264,
      "step": 401480
    },
    {
      "epoch": 0.657063555965777,
      "grad_norm": 0.3323803246021271,
      "learning_rate": 8.743106027160771e-06,
      "loss": 0.0318,
      "step": 401500
    },
    {
      "epoch": 0.6570962864044304,
      "grad_norm": 0.6879235506057739,
      "learning_rate": 8.743040134947253e-06,
      "loss": 0.0252,
      "step": 401520
    },
    {
      "epoch": 0.6571290168430838,
      "grad_norm": 1.1383005380630493,
      "learning_rate": 8.742974242733737e-06,
      "loss": 0.0323,
      "step": 401540
    },
    {
      "epoch": 0.657161747281737,
      "grad_norm": 2.186436414718628,
      "learning_rate": 8.74290835052022e-06,
      "loss": 0.0336,
      "step": 401560
    },
    {
      "epoch": 0.6571944777203904,
      "grad_norm": 1.2766326665878296,
      "learning_rate": 8.742842458306702e-06,
      "loss": 0.0361,
      "step": 401580
    },
    {
      "epoch": 0.6572272081590438,
      "grad_norm": 1.528725028038025,
      "learning_rate": 8.742776566093186e-06,
      "loss": 0.0356,
      "step": 401600
    },
    {
      "epoch": 0.657259938597697,
      "grad_norm": 0.35670188069343567,
      "learning_rate": 8.74271067387967e-06,
      "loss": 0.0331,
      "step": 401620
    },
    {
      "epoch": 0.6572926690363504,
      "grad_norm": 0.22753185033798218,
      "learning_rate": 8.742644781666151e-06,
      "loss": 0.0337,
      "step": 401640
    },
    {
      "epoch": 0.6573253994750038,
      "grad_norm": 0.46126216650009155,
      "learning_rate": 8.742578889452635e-06,
      "loss": 0.0422,
      "step": 401660
    },
    {
      "epoch": 0.657358129913657,
      "grad_norm": 0.272589772939682,
      "learning_rate": 8.742512997239117e-06,
      "loss": 0.0286,
      "step": 401680
    },
    {
      "epoch": 0.6573908603523104,
      "grad_norm": 2.4159138202667236,
      "learning_rate": 8.7424471050256e-06,
      "loss": 0.0348,
      "step": 401700
    },
    {
      "epoch": 0.6574235907909638,
      "grad_norm": 1.6868536472320557,
      "learning_rate": 8.742381212812082e-06,
      "loss": 0.0282,
      "step": 401720
    },
    {
      "epoch": 0.6574563212296172,
      "grad_norm": 0.36216217279434204,
      "learning_rate": 8.742315320598566e-06,
      "loss": 0.0314,
      "step": 401740
    },
    {
      "epoch": 0.6574890516682704,
      "grad_norm": 5.4529571533203125,
      "learning_rate": 8.74224942838505e-06,
      "loss": 0.0332,
      "step": 401760
    },
    {
      "epoch": 0.6575217821069238,
      "grad_norm": 1.1747212409973145,
      "learning_rate": 8.742183536171531e-06,
      "loss": 0.0489,
      "step": 401780
    },
    {
      "epoch": 0.6575545125455772,
      "grad_norm": 1.900010585784912,
      "learning_rate": 8.742117643958015e-06,
      "loss": 0.0406,
      "step": 401800
    },
    {
      "epoch": 0.6575872429842304,
      "grad_norm": 0.76679927110672,
      "learning_rate": 8.742051751744497e-06,
      "loss": 0.0309,
      "step": 401820
    },
    {
      "epoch": 0.6576199734228838,
      "grad_norm": 2.537330150604248,
      "learning_rate": 8.74198585953098e-06,
      "loss": 0.0347,
      "step": 401840
    },
    {
      "epoch": 0.6576527038615372,
      "grad_norm": 2.178097724914551,
      "learning_rate": 8.741919967317462e-06,
      "loss": 0.0292,
      "step": 401860
    },
    {
      "epoch": 0.6576854343001904,
      "grad_norm": 1.3816595077514648,
      "learning_rate": 8.741854075103946e-06,
      "loss": 0.0322,
      "step": 401880
    },
    {
      "epoch": 0.6577181647388438,
      "grad_norm": 0.451619416475296,
      "learning_rate": 8.741788182890428e-06,
      "loss": 0.0369,
      "step": 401900
    },
    {
      "epoch": 0.6577508951774972,
      "grad_norm": 0.6294505000114441,
      "learning_rate": 8.741722290676911e-06,
      "loss": 0.0316,
      "step": 401920
    },
    {
      "epoch": 0.6577836256161506,
      "grad_norm": 0.9999940395355225,
      "learning_rate": 8.741656398463393e-06,
      "loss": 0.0417,
      "step": 401940
    },
    {
      "epoch": 0.6578163560548038,
      "grad_norm": 1.5417536497116089,
      "learning_rate": 8.741590506249877e-06,
      "loss": 0.0371,
      "step": 401960
    },
    {
      "epoch": 0.6578490864934572,
      "grad_norm": 0.7118582129478455,
      "learning_rate": 8.74152461403636e-06,
      "loss": 0.0324,
      "step": 401980
    },
    {
      "epoch": 0.6578818169321106,
      "grad_norm": 1.2082223892211914,
      "learning_rate": 8.741458721822842e-06,
      "loss": 0.0333,
      "step": 402000
    },
    {
      "epoch": 0.6579145473707638,
      "grad_norm": 2.4214730262756348,
      "learning_rate": 8.741392829609326e-06,
      "loss": 0.0322,
      "step": 402020
    },
    {
      "epoch": 0.6579472778094172,
      "grad_norm": 1.950488567352295,
      "learning_rate": 8.74132693739581e-06,
      "loss": 0.0326,
      "step": 402040
    },
    {
      "epoch": 0.6579800082480706,
      "grad_norm": 0.5691583752632141,
      "learning_rate": 8.741261045182291e-06,
      "loss": 0.0402,
      "step": 402060
    },
    {
      "epoch": 0.6580127386867238,
      "grad_norm": 0.709557056427002,
      "learning_rate": 8.741195152968775e-06,
      "loss": 0.031,
      "step": 402080
    },
    {
      "epoch": 0.6580454691253772,
      "grad_norm": 0.6268230080604553,
      "learning_rate": 8.741129260755259e-06,
      "loss": 0.0303,
      "step": 402100
    },
    {
      "epoch": 0.6580781995640306,
      "grad_norm": 0.6667370200157166,
      "learning_rate": 8.74106336854174e-06,
      "loss": 0.0335,
      "step": 402120
    },
    {
      "epoch": 0.658110930002684,
      "grad_norm": 5.904857158660889,
      "learning_rate": 8.740997476328224e-06,
      "loss": 0.0366,
      "step": 402140
    },
    {
      "epoch": 0.6581436604413372,
      "grad_norm": 2.4029324054718018,
      "learning_rate": 8.740931584114706e-06,
      "loss": 0.0383,
      "step": 402160
    },
    {
      "epoch": 0.6581763908799906,
      "grad_norm": 0.9092519879341125,
      "learning_rate": 8.74086569190119e-06,
      "loss": 0.0304,
      "step": 402180
    },
    {
      "epoch": 0.658209121318644,
      "grad_norm": 1.1229453086853027,
      "learning_rate": 8.740799799687671e-06,
      "loss": 0.0361,
      "step": 402200
    },
    {
      "epoch": 0.6582418517572972,
      "grad_norm": 2.964580774307251,
      "learning_rate": 8.740733907474155e-06,
      "loss": 0.0339,
      "step": 402220
    },
    {
      "epoch": 0.6582745821959506,
      "grad_norm": 2.099966287612915,
      "learning_rate": 8.740668015260637e-06,
      "loss": 0.0289,
      "step": 402240
    },
    {
      "epoch": 0.658307312634604,
      "grad_norm": 1.6584666967391968,
      "learning_rate": 8.74060212304712e-06,
      "loss": 0.039,
      "step": 402260
    },
    {
      "epoch": 0.6583400430732572,
      "grad_norm": 0.8921546936035156,
      "learning_rate": 8.740536230833602e-06,
      "loss": 0.032,
      "step": 402280
    },
    {
      "epoch": 0.6583727735119106,
      "grad_norm": 0.3492790460586548,
      "learning_rate": 8.740470338620086e-06,
      "loss": 0.0313,
      "step": 402300
    },
    {
      "epoch": 0.658405503950564,
      "grad_norm": 1.751627802848816,
      "learning_rate": 8.740404446406568e-06,
      "loss": 0.0338,
      "step": 402320
    },
    {
      "epoch": 0.6584382343892173,
      "grad_norm": 0.43754011392593384,
      "learning_rate": 8.740338554193051e-06,
      "loss": 0.0262,
      "step": 402340
    },
    {
      "epoch": 0.6584709648278706,
      "grad_norm": 0.8748875856399536,
      "learning_rate": 8.740272661979535e-06,
      "loss": 0.0346,
      "step": 402360
    },
    {
      "epoch": 0.658503695266524,
      "grad_norm": 6.605196475982666,
      "learning_rate": 8.740206769766017e-06,
      "loss": 0.0434,
      "step": 402380
    },
    {
      "epoch": 0.6585364257051773,
      "grad_norm": 1.9677776098251343,
      "learning_rate": 8.7401408775525e-06,
      "loss": 0.0404,
      "step": 402400
    },
    {
      "epoch": 0.6585691561438306,
      "grad_norm": 2.1878867149353027,
      "learning_rate": 8.740074985338984e-06,
      "loss": 0.0336,
      "step": 402420
    },
    {
      "epoch": 0.658601886582484,
      "grad_norm": 0.2606726586818695,
      "learning_rate": 8.740009093125466e-06,
      "loss": 0.0342,
      "step": 402440
    },
    {
      "epoch": 0.6586346170211373,
      "grad_norm": 2.0544593334198,
      "learning_rate": 8.73994320091195e-06,
      "loss": 0.0287,
      "step": 402460
    },
    {
      "epoch": 0.6586673474597906,
      "grad_norm": 0.6103911399841309,
      "learning_rate": 8.739877308698433e-06,
      "loss": 0.0315,
      "step": 402480
    },
    {
      "epoch": 0.658700077898444,
      "grad_norm": 1.23912513256073,
      "learning_rate": 8.739811416484915e-06,
      "loss": 0.0493,
      "step": 402500
    },
    {
      "epoch": 0.6587328083370974,
      "grad_norm": 1.8921512365341187,
      "learning_rate": 8.739745524271399e-06,
      "loss": 0.0485,
      "step": 402520
    },
    {
      "epoch": 0.6587655387757507,
      "grad_norm": 0.7258275151252747,
      "learning_rate": 8.73967963205788e-06,
      "loss": 0.0314,
      "step": 402540
    },
    {
      "epoch": 0.658798269214404,
      "grad_norm": 0.5253123044967651,
      "learning_rate": 8.739613739844364e-06,
      "loss": 0.0294,
      "step": 402560
    },
    {
      "epoch": 0.6588309996530574,
      "grad_norm": 0.6888947486877441,
      "learning_rate": 8.739547847630846e-06,
      "loss": 0.0375,
      "step": 402580
    },
    {
      "epoch": 0.6588637300917107,
      "grad_norm": 1.2221612930297852,
      "learning_rate": 8.73948195541733e-06,
      "loss": 0.0417,
      "step": 402600
    },
    {
      "epoch": 0.658896460530364,
      "grad_norm": 1.094373345375061,
      "learning_rate": 8.739416063203811e-06,
      "loss": 0.0253,
      "step": 402620
    },
    {
      "epoch": 0.6589291909690174,
      "grad_norm": 1.1584824323654175,
      "learning_rate": 8.739350170990295e-06,
      "loss": 0.0376,
      "step": 402640
    },
    {
      "epoch": 0.6589619214076707,
      "grad_norm": 1.3021137714385986,
      "learning_rate": 8.739284278776777e-06,
      "loss": 0.0286,
      "step": 402660
    },
    {
      "epoch": 0.658994651846324,
      "grad_norm": 0.6897766590118408,
      "learning_rate": 8.73921838656326e-06,
      "loss": 0.0401,
      "step": 402680
    },
    {
      "epoch": 0.6590273822849774,
      "grad_norm": 0.3032079041004181,
      "learning_rate": 8.739152494349742e-06,
      "loss": 0.0315,
      "step": 402700
    },
    {
      "epoch": 0.6590601127236307,
      "grad_norm": 1.5482349395751953,
      "learning_rate": 8.739086602136226e-06,
      "loss": 0.0497,
      "step": 402720
    },
    {
      "epoch": 0.6590928431622841,
      "grad_norm": 2.95422625541687,
      "learning_rate": 8.739020709922708e-06,
      "loss": 0.0543,
      "step": 402740
    },
    {
      "epoch": 0.6591255736009374,
      "grad_norm": 0.6284604072570801,
      "learning_rate": 8.738954817709191e-06,
      "loss": 0.0337,
      "step": 402760
    },
    {
      "epoch": 0.6591583040395907,
      "grad_norm": 2.917945146560669,
      "learning_rate": 8.738888925495675e-06,
      "loss": 0.0358,
      "step": 402780
    },
    {
      "epoch": 0.6591910344782441,
      "grad_norm": 1.3086460828781128,
      "learning_rate": 8.738823033282157e-06,
      "loss": 0.0412,
      "step": 402800
    },
    {
      "epoch": 0.6592237649168974,
      "grad_norm": 0.3723764419555664,
      "learning_rate": 8.73875714106864e-06,
      "loss": 0.0301,
      "step": 402820
    },
    {
      "epoch": 0.6592564953555508,
      "grad_norm": 1.8449703454971313,
      "learning_rate": 8.738691248855124e-06,
      "loss": 0.0407,
      "step": 402840
    },
    {
      "epoch": 0.6592892257942041,
      "grad_norm": 0.415146142244339,
      "learning_rate": 8.738625356641606e-06,
      "loss": 0.0299,
      "step": 402860
    },
    {
      "epoch": 0.6593219562328574,
      "grad_norm": 0.45503076910972595,
      "learning_rate": 8.73855946442809e-06,
      "loss": 0.0289,
      "step": 402880
    },
    {
      "epoch": 0.6593546866715108,
      "grad_norm": 0.9269095063209534,
      "learning_rate": 8.738493572214573e-06,
      "loss": 0.0428,
      "step": 402900
    },
    {
      "epoch": 0.6593874171101641,
      "grad_norm": 0.6928108930587769,
      "learning_rate": 8.738427680001055e-06,
      "loss": 0.0331,
      "step": 402920
    },
    {
      "epoch": 0.6594201475488175,
      "grad_norm": 0.8146666288375854,
      "learning_rate": 8.738361787787539e-06,
      "loss": 0.0296,
      "step": 402940
    },
    {
      "epoch": 0.6594528779874708,
      "grad_norm": 2.8464417457580566,
      "learning_rate": 8.73829589557402e-06,
      "loss": 0.0292,
      "step": 402960
    },
    {
      "epoch": 0.6594856084261241,
      "grad_norm": 0.613619863986969,
      "learning_rate": 8.738230003360504e-06,
      "loss": 0.0349,
      "step": 402980
    },
    {
      "epoch": 0.6595183388647775,
      "grad_norm": 0.6061961054801941,
      "learning_rate": 8.738164111146986e-06,
      "loss": 0.0339,
      "step": 403000
    },
    {
      "epoch": 0.6595510693034308,
      "grad_norm": 1.0725281238555908,
      "learning_rate": 8.73809821893347e-06,
      "loss": 0.044,
      "step": 403020
    },
    {
      "epoch": 0.6595837997420841,
      "grad_norm": 1.79901921749115,
      "learning_rate": 8.738032326719952e-06,
      "loss": 0.0326,
      "step": 403040
    },
    {
      "epoch": 0.6596165301807375,
      "grad_norm": 1.9597939252853394,
      "learning_rate": 8.737966434506435e-06,
      "loss": 0.0446,
      "step": 403060
    },
    {
      "epoch": 0.6596492606193908,
      "grad_norm": 1.0198044776916504,
      "learning_rate": 8.737900542292917e-06,
      "loss": 0.04,
      "step": 403080
    },
    {
      "epoch": 0.6596819910580441,
      "grad_norm": 0.8407673239707947,
      "learning_rate": 8.7378346500794e-06,
      "loss": 0.0381,
      "step": 403100
    },
    {
      "epoch": 0.6597147214966975,
      "grad_norm": 1.084011197090149,
      "learning_rate": 8.737768757865882e-06,
      "loss": 0.0307,
      "step": 403120
    },
    {
      "epoch": 0.6597474519353509,
      "grad_norm": 2.2339205741882324,
      "learning_rate": 8.737702865652366e-06,
      "loss": 0.0427,
      "step": 403140
    },
    {
      "epoch": 0.6597801823740042,
      "grad_norm": 0.9313863515853882,
      "learning_rate": 8.73763697343885e-06,
      "loss": 0.0306,
      "step": 403160
    },
    {
      "epoch": 0.6598129128126575,
      "grad_norm": 1.286599040031433,
      "learning_rate": 8.737571081225332e-06,
      "loss": 0.0491,
      "step": 403180
    },
    {
      "epoch": 0.6598456432513109,
      "grad_norm": 1.2855358123779297,
      "learning_rate": 8.737505189011815e-06,
      "loss": 0.0265,
      "step": 403200
    },
    {
      "epoch": 0.6598783736899642,
      "grad_norm": 2.0110340118408203,
      "learning_rate": 8.737439296798299e-06,
      "loss": 0.0402,
      "step": 403220
    },
    {
      "epoch": 0.6599111041286175,
      "grad_norm": 3.903073310852051,
      "learning_rate": 8.73737340458478e-06,
      "loss": 0.031,
      "step": 403240
    },
    {
      "epoch": 0.6599438345672709,
      "grad_norm": 1.9258180856704712,
      "learning_rate": 8.737307512371264e-06,
      "loss": 0.0313,
      "step": 403260
    },
    {
      "epoch": 0.6599765650059242,
      "grad_norm": 0.44828665256500244,
      "learning_rate": 8.737241620157748e-06,
      "loss": 0.0353,
      "step": 403280
    },
    {
      "epoch": 0.6600092954445775,
      "grad_norm": 0.8837262988090515,
      "learning_rate": 8.73717572794423e-06,
      "loss": 0.0268,
      "step": 403300
    },
    {
      "epoch": 0.6600420258832309,
      "grad_norm": 1.5619068145751953,
      "learning_rate": 8.737109835730713e-06,
      "loss": 0.0361,
      "step": 403320
    },
    {
      "epoch": 0.6600747563218842,
      "grad_norm": 2.2142419815063477,
      "learning_rate": 8.737043943517195e-06,
      "loss": 0.0429,
      "step": 403340
    },
    {
      "epoch": 0.6601074867605375,
      "grad_norm": 0.6349369883537292,
      "learning_rate": 8.736978051303679e-06,
      "loss": 0.0343,
      "step": 403360
    },
    {
      "epoch": 0.6601402171991909,
      "grad_norm": 1.6208628416061401,
      "learning_rate": 8.73691215909016e-06,
      "loss": 0.0464,
      "step": 403380
    },
    {
      "epoch": 0.6601729476378443,
      "grad_norm": 1.9242794513702393,
      "learning_rate": 8.736846266876644e-06,
      "loss": 0.0367,
      "step": 403400
    },
    {
      "epoch": 0.6602056780764975,
      "grad_norm": 1.3360456228256226,
      "learning_rate": 8.736780374663126e-06,
      "loss": 0.0331,
      "step": 403420
    },
    {
      "epoch": 0.6602384085151509,
      "grad_norm": 2.6175928115844727,
      "learning_rate": 8.73671448244961e-06,
      "loss": 0.0311,
      "step": 403440
    },
    {
      "epoch": 0.6602711389538043,
      "grad_norm": 1.8720924854278564,
      "learning_rate": 8.736648590236092e-06,
      "loss": 0.0335,
      "step": 403460
    },
    {
      "epoch": 0.6603038693924576,
      "grad_norm": 0.9199132323265076,
      "learning_rate": 8.736582698022575e-06,
      "loss": 0.0349,
      "step": 403480
    },
    {
      "epoch": 0.6603365998311109,
      "grad_norm": 0.9746326208114624,
      "learning_rate": 8.736516805809059e-06,
      "loss": 0.041,
      "step": 403500
    },
    {
      "epoch": 0.6603693302697643,
      "grad_norm": 1.2683062553405762,
      "learning_rate": 8.73645091359554e-06,
      "loss": 0.0308,
      "step": 403520
    },
    {
      "epoch": 0.6604020607084176,
      "grad_norm": 0.19861498475074768,
      "learning_rate": 8.736385021382024e-06,
      "loss": 0.0336,
      "step": 403540
    },
    {
      "epoch": 0.6604347911470709,
      "grad_norm": 3.0551977157592773,
      "learning_rate": 8.736319129168506e-06,
      "loss": 0.0431,
      "step": 403560
    },
    {
      "epoch": 0.6604675215857243,
      "grad_norm": 0.5074866414070129,
      "learning_rate": 8.73625323695499e-06,
      "loss": 0.0435,
      "step": 403580
    },
    {
      "epoch": 0.6605002520243777,
      "grad_norm": 2.0183796882629395,
      "learning_rate": 8.736187344741472e-06,
      "loss": 0.0328,
      "step": 403600
    },
    {
      "epoch": 0.6605329824630309,
      "grad_norm": 0.358720600605011,
      "learning_rate": 8.736121452527955e-06,
      "loss": 0.0288,
      "step": 403620
    },
    {
      "epoch": 0.6605657129016843,
      "grad_norm": 0.314337819814682,
      "learning_rate": 8.736055560314439e-06,
      "loss": 0.0453,
      "step": 403640
    },
    {
      "epoch": 0.6605984433403377,
      "grad_norm": 1.5314040184020996,
      "learning_rate": 8.735989668100922e-06,
      "loss": 0.0358,
      "step": 403660
    },
    {
      "epoch": 0.6606311737789909,
      "grad_norm": 1.8282687664031982,
      "learning_rate": 8.735923775887404e-06,
      "loss": 0.0348,
      "step": 403680
    },
    {
      "epoch": 0.6606639042176443,
      "grad_norm": 1.3948708772659302,
      "learning_rate": 8.735857883673888e-06,
      "loss": 0.0374,
      "step": 403700
    },
    {
      "epoch": 0.6606966346562977,
      "grad_norm": 2.679013729095459,
      "learning_rate": 8.73579199146037e-06,
      "loss": 0.035,
      "step": 403720
    },
    {
      "epoch": 0.660729365094951,
      "grad_norm": 0.11038248986005783,
      "learning_rate": 8.735726099246853e-06,
      "loss": 0.0266,
      "step": 403740
    },
    {
      "epoch": 0.6607620955336043,
      "grad_norm": 3.3016517162323,
      "learning_rate": 8.735660207033335e-06,
      "loss": 0.0401,
      "step": 403760
    },
    {
      "epoch": 0.6607948259722577,
      "grad_norm": 1.1147279739379883,
      "learning_rate": 8.735594314819819e-06,
      "loss": 0.0471,
      "step": 403780
    },
    {
      "epoch": 0.6608275564109111,
      "grad_norm": 1.0414131879806519,
      "learning_rate": 8.7355284226063e-06,
      "loss": 0.0288,
      "step": 403800
    },
    {
      "epoch": 0.6608602868495643,
      "grad_norm": 1.764088749885559,
      "learning_rate": 8.735462530392784e-06,
      "loss": 0.0304,
      "step": 403820
    },
    {
      "epoch": 0.6608930172882177,
      "grad_norm": 0.31201863288879395,
      "learning_rate": 8.735396638179266e-06,
      "loss": 0.0349,
      "step": 403840
    },
    {
      "epoch": 0.6609257477268711,
      "grad_norm": 0.9525706768035889,
      "learning_rate": 8.73533074596575e-06,
      "loss": 0.0296,
      "step": 403860
    },
    {
      "epoch": 0.6609584781655243,
      "grad_norm": 0.30459001660346985,
      "learning_rate": 8.735264853752233e-06,
      "loss": 0.024,
      "step": 403880
    },
    {
      "epoch": 0.6609912086041777,
      "grad_norm": 0.5222476124763489,
      "learning_rate": 8.735198961538715e-06,
      "loss": 0.0447,
      "step": 403900
    },
    {
      "epoch": 0.6610239390428311,
      "grad_norm": 1.5476998090744019,
      "learning_rate": 8.735133069325199e-06,
      "loss": 0.03,
      "step": 403920
    },
    {
      "epoch": 0.6610566694814843,
      "grad_norm": 1.6064605712890625,
      "learning_rate": 8.73506717711168e-06,
      "loss": 0.0326,
      "step": 403940
    },
    {
      "epoch": 0.6610893999201377,
      "grad_norm": 0.8855584859848022,
      "learning_rate": 8.735001284898164e-06,
      "loss": 0.0402,
      "step": 403960
    },
    {
      "epoch": 0.6611221303587911,
      "grad_norm": 1.3895232677459717,
      "learning_rate": 8.734935392684646e-06,
      "loss": 0.0428,
      "step": 403980
    },
    {
      "epoch": 0.6611548607974445,
      "grad_norm": 1.5556374788284302,
      "learning_rate": 8.73486950047113e-06,
      "loss": 0.0391,
      "step": 404000
    },
    {
      "epoch": 0.6611875912360977,
      "grad_norm": 0.7610146403312683,
      "learning_rate": 8.734803608257613e-06,
      "loss": 0.0306,
      "step": 404020
    },
    {
      "epoch": 0.6612203216747511,
      "grad_norm": 0.5223143696784973,
      "learning_rate": 8.734737716044095e-06,
      "loss": 0.02,
      "step": 404040
    },
    {
      "epoch": 0.6612530521134045,
      "grad_norm": 1.516268014907837,
      "learning_rate": 8.734671823830579e-06,
      "loss": 0.0434,
      "step": 404060
    },
    {
      "epoch": 0.6612857825520577,
      "grad_norm": 1.8221416473388672,
      "learning_rate": 8.734605931617062e-06,
      "loss": 0.0293,
      "step": 404080
    },
    {
      "epoch": 0.6613185129907111,
      "grad_norm": 1.5572530031204224,
      "learning_rate": 8.734540039403544e-06,
      "loss": 0.0457,
      "step": 404100
    },
    {
      "epoch": 0.6613512434293645,
      "grad_norm": 2.268226385116577,
      "learning_rate": 8.734474147190028e-06,
      "loss": 0.0341,
      "step": 404120
    },
    {
      "epoch": 0.6613839738680177,
      "grad_norm": 1.6102229356765747,
      "learning_rate": 8.73440825497651e-06,
      "loss": 0.0379,
      "step": 404140
    },
    {
      "epoch": 0.6614167043066711,
      "grad_norm": 4.112458229064941,
      "learning_rate": 8.734342362762993e-06,
      "loss": 0.0434,
      "step": 404160
    },
    {
      "epoch": 0.6614494347453245,
      "grad_norm": 1.452073574066162,
      "learning_rate": 8.734276470549475e-06,
      "loss": 0.0296,
      "step": 404180
    },
    {
      "epoch": 0.6614821651839778,
      "grad_norm": 0.7485783696174622,
      "learning_rate": 8.734210578335959e-06,
      "loss": 0.0379,
      "step": 404200
    },
    {
      "epoch": 0.6615148956226311,
      "grad_norm": 1.4842294454574585,
      "learning_rate": 8.734144686122442e-06,
      "loss": 0.0351,
      "step": 404220
    },
    {
      "epoch": 0.6615476260612845,
      "grad_norm": 0.17376503348350525,
      "learning_rate": 8.734078793908924e-06,
      "loss": 0.0329,
      "step": 404240
    },
    {
      "epoch": 0.6615803564999378,
      "grad_norm": 1.3625245094299316,
      "learning_rate": 8.734012901695408e-06,
      "loss": 0.0292,
      "step": 404260
    },
    {
      "epoch": 0.6616130869385911,
      "grad_norm": 1.82719087600708,
      "learning_rate": 8.73394700948189e-06,
      "loss": 0.0411,
      "step": 404280
    },
    {
      "epoch": 0.6616458173772445,
      "grad_norm": 1.528785228729248,
      "learning_rate": 8.733881117268373e-06,
      "loss": 0.0442,
      "step": 404300
    },
    {
      "epoch": 0.6616785478158979,
      "grad_norm": 2.0078418254852295,
      "learning_rate": 8.733815225054855e-06,
      "loss": 0.0306,
      "step": 404320
    },
    {
      "epoch": 0.6617112782545511,
      "grad_norm": 0.6893339157104492,
      "learning_rate": 8.733749332841339e-06,
      "loss": 0.0321,
      "step": 404340
    },
    {
      "epoch": 0.6617440086932045,
      "grad_norm": 0.22170336544513702,
      "learning_rate": 8.73368344062782e-06,
      "loss": 0.0281,
      "step": 404360
    },
    {
      "epoch": 0.6617767391318579,
      "grad_norm": 0.7829872369766235,
      "learning_rate": 8.733617548414304e-06,
      "loss": 0.0393,
      "step": 404380
    },
    {
      "epoch": 0.6618094695705112,
      "grad_norm": 1.0125607252120972,
      "learning_rate": 8.733551656200788e-06,
      "loss": 0.033,
      "step": 404400
    },
    {
      "epoch": 0.6618422000091645,
      "grad_norm": 0.6621111631393433,
      "learning_rate": 8.73348576398727e-06,
      "loss": 0.0339,
      "step": 404420
    },
    {
      "epoch": 0.6618749304478179,
      "grad_norm": 0.8373797535896301,
      "learning_rate": 8.733419871773753e-06,
      "loss": 0.0258,
      "step": 404440
    },
    {
      "epoch": 0.6619076608864712,
      "grad_norm": 0.28149309754371643,
      "learning_rate": 8.733353979560237e-06,
      "loss": 0.0318,
      "step": 404460
    },
    {
      "epoch": 0.6619403913251245,
      "grad_norm": 3.0223147869110107,
      "learning_rate": 8.733288087346719e-06,
      "loss": 0.0357,
      "step": 404480
    },
    {
      "epoch": 0.6619731217637779,
      "grad_norm": 1.61497163772583,
      "learning_rate": 8.733222195133203e-06,
      "loss": 0.0301,
      "step": 404500
    },
    {
      "epoch": 0.6620058522024312,
      "grad_norm": 0.451263427734375,
      "learning_rate": 8.733156302919684e-06,
      "loss": 0.0411,
      "step": 404520
    },
    {
      "epoch": 0.6620385826410845,
      "grad_norm": 1.1986029148101807,
      "learning_rate": 8.733090410706168e-06,
      "loss": 0.0226,
      "step": 404540
    },
    {
      "epoch": 0.6620713130797379,
      "grad_norm": 0.7875614762306213,
      "learning_rate": 8.733024518492652e-06,
      "loss": 0.0472,
      "step": 404560
    },
    {
      "epoch": 0.6621040435183912,
      "grad_norm": 0.4258344769477844,
      "learning_rate": 8.732958626279133e-06,
      "loss": 0.0373,
      "step": 404580
    },
    {
      "epoch": 0.6621367739570446,
      "grad_norm": 0.3452529311180115,
      "learning_rate": 8.732892734065617e-06,
      "loss": 0.0338,
      "step": 404600
    },
    {
      "epoch": 0.6621695043956979,
      "grad_norm": 0.45890992879867554,
      "learning_rate": 8.732826841852099e-06,
      "loss": 0.0294,
      "step": 404620
    },
    {
      "epoch": 0.6622022348343513,
      "grad_norm": 1.7984347343444824,
      "learning_rate": 8.732760949638583e-06,
      "loss": 0.036,
      "step": 404640
    },
    {
      "epoch": 0.6622349652730046,
      "grad_norm": 1.5316424369812012,
      "learning_rate": 8.732695057425064e-06,
      "loss": 0.043,
      "step": 404660
    },
    {
      "epoch": 0.6622676957116579,
      "grad_norm": 0.5617177486419678,
      "learning_rate": 8.732629165211548e-06,
      "loss": 0.0283,
      "step": 404680
    },
    {
      "epoch": 0.6623004261503113,
      "grad_norm": 7.2900166511535645,
      "learning_rate": 8.73256327299803e-06,
      "loss": 0.0248,
      "step": 404700
    },
    {
      "epoch": 0.6623331565889646,
      "grad_norm": 0.6341412663459778,
      "learning_rate": 8.732497380784514e-06,
      "loss": 0.0231,
      "step": 404720
    },
    {
      "epoch": 0.6623658870276179,
      "grad_norm": 0.40254321694374084,
      "learning_rate": 8.732431488570995e-06,
      "loss": 0.0438,
      "step": 404740
    },
    {
      "epoch": 0.6623986174662713,
      "grad_norm": 0.8023709058761597,
      "learning_rate": 8.732365596357479e-06,
      "loss": 0.0422,
      "step": 404760
    },
    {
      "epoch": 0.6624313479049246,
      "grad_norm": 0.4273398816585541,
      "learning_rate": 8.732299704143961e-06,
      "loss": 0.0281,
      "step": 404780
    },
    {
      "epoch": 0.662464078343578,
      "grad_norm": 0.7978212833404541,
      "learning_rate": 8.732233811930444e-06,
      "loss": 0.0357,
      "step": 404800
    },
    {
      "epoch": 0.6624968087822313,
      "grad_norm": 1.8570891618728638,
      "learning_rate": 8.732167919716928e-06,
      "loss": 0.0278,
      "step": 404820
    },
    {
      "epoch": 0.6625295392208846,
      "grad_norm": 0.7374565601348877,
      "learning_rate": 8.73210202750341e-06,
      "loss": 0.0318,
      "step": 404840
    },
    {
      "epoch": 0.662562269659538,
      "grad_norm": 0.373171329498291,
      "learning_rate": 8.732036135289894e-06,
      "loss": 0.0343,
      "step": 404860
    },
    {
      "epoch": 0.6625950000981913,
      "grad_norm": 0.4579196572303772,
      "learning_rate": 8.731970243076377e-06,
      "loss": 0.0374,
      "step": 404880
    },
    {
      "epoch": 0.6626277305368447,
      "grad_norm": 0.6972620487213135,
      "learning_rate": 8.731904350862859e-06,
      "loss": 0.0265,
      "step": 404900
    },
    {
      "epoch": 0.662660460975498,
      "grad_norm": 1.6955732107162476,
      "learning_rate": 8.731838458649343e-06,
      "loss": 0.0288,
      "step": 404920
    },
    {
      "epoch": 0.6626931914141513,
      "grad_norm": 0.08933752775192261,
      "learning_rate": 8.731772566435826e-06,
      "loss": 0.0319,
      "step": 404940
    },
    {
      "epoch": 0.6627259218528047,
      "grad_norm": 2.2165021896362305,
      "learning_rate": 8.731706674222308e-06,
      "loss": 0.0323,
      "step": 404960
    },
    {
      "epoch": 0.662758652291458,
      "grad_norm": 1.6104466915130615,
      "learning_rate": 8.731640782008792e-06,
      "loss": 0.0373,
      "step": 404980
    },
    {
      "epoch": 0.6627913827301114,
      "grad_norm": 1.1471918821334839,
      "learning_rate": 8.731574889795274e-06,
      "loss": 0.0358,
      "step": 405000
    },
    {
      "epoch": 0.6628241131687647,
      "grad_norm": 2.0307443141937256,
      "learning_rate": 8.731508997581757e-06,
      "loss": 0.0369,
      "step": 405020
    },
    {
      "epoch": 0.662856843607418,
      "grad_norm": 0.9411450028419495,
      "learning_rate": 8.731443105368239e-06,
      "loss": 0.043,
      "step": 405040
    },
    {
      "epoch": 0.6628895740460714,
      "grad_norm": 2.6988234519958496,
      "learning_rate": 8.731377213154723e-06,
      "loss": 0.0431,
      "step": 405060
    },
    {
      "epoch": 0.6629223044847247,
      "grad_norm": 1.2005053758621216,
      "learning_rate": 8.731311320941205e-06,
      "loss": 0.0347,
      "step": 405080
    },
    {
      "epoch": 0.662955034923378,
      "grad_norm": 0.6891489624977112,
      "learning_rate": 8.731245428727688e-06,
      "loss": 0.0366,
      "step": 405100
    },
    {
      "epoch": 0.6629877653620314,
      "grad_norm": 0.6427727937698364,
      "learning_rate": 8.73117953651417e-06,
      "loss": 0.0326,
      "step": 405120
    },
    {
      "epoch": 0.6630204958006847,
      "grad_norm": 1.504160761833191,
      "learning_rate": 8.731113644300654e-06,
      "loss": 0.0363,
      "step": 405140
    },
    {
      "epoch": 0.663053226239338,
      "grad_norm": 1.7997148036956787,
      "learning_rate": 8.731047752087135e-06,
      "loss": 0.0301,
      "step": 405160
    },
    {
      "epoch": 0.6630859566779914,
      "grad_norm": 0.9419583678245544,
      "learning_rate": 8.730981859873619e-06,
      "loss": 0.0314,
      "step": 405180
    },
    {
      "epoch": 0.6631186871166448,
      "grad_norm": 0.6154763102531433,
      "learning_rate": 8.730915967660103e-06,
      "loss": 0.0296,
      "step": 405200
    },
    {
      "epoch": 0.663151417555298,
      "grad_norm": 1.3625333309173584,
      "learning_rate": 8.730850075446585e-06,
      "loss": 0.034,
      "step": 405220
    },
    {
      "epoch": 0.6631841479939514,
      "grad_norm": 0.826358437538147,
      "learning_rate": 8.730784183233068e-06,
      "loss": 0.0316,
      "step": 405240
    },
    {
      "epoch": 0.6632168784326048,
      "grad_norm": 1.6241955757141113,
      "learning_rate": 8.730718291019552e-06,
      "loss": 0.0297,
      "step": 405260
    },
    {
      "epoch": 0.6632496088712581,
      "grad_norm": 2.3286640644073486,
      "learning_rate": 8.730652398806034e-06,
      "loss": 0.0275,
      "step": 405280
    },
    {
      "epoch": 0.6632823393099114,
      "grad_norm": 1.3995273113250732,
      "learning_rate": 8.730586506592517e-06,
      "loss": 0.0388,
      "step": 405300
    },
    {
      "epoch": 0.6633150697485648,
      "grad_norm": 1.2161364555358887,
      "learning_rate": 8.730520614379e-06,
      "loss": 0.0357,
      "step": 405320
    },
    {
      "epoch": 0.6633478001872181,
      "grad_norm": 0.7154079079627991,
      "learning_rate": 8.730454722165483e-06,
      "loss": 0.0422,
      "step": 405340
    },
    {
      "epoch": 0.6633805306258714,
      "grad_norm": 0.9182748794555664,
      "learning_rate": 8.730388829951966e-06,
      "loss": 0.0325,
      "step": 405360
    },
    {
      "epoch": 0.6634132610645248,
      "grad_norm": 0.3669266402721405,
      "learning_rate": 8.730322937738448e-06,
      "loss": 0.0444,
      "step": 405380
    },
    {
      "epoch": 0.6634459915031782,
      "grad_norm": 1.294292688369751,
      "learning_rate": 8.730257045524932e-06,
      "loss": 0.0281,
      "step": 405400
    },
    {
      "epoch": 0.6634787219418314,
      "grad_norm": 0.7700400948524475,
      "learning_rate": 8.730191153311414e-06,
      "loss": 0.046,
      "step": 405420
    },
    {
      "epoch": 0.6635114523804848,
      "grad_norm": 1.3974323272705078,
      "learning_rate": 8.730125261097897e-06,
      "loss": 0.0375,
      "step": 405440
    },
    {
      "epoch": 0.6635441828191382,
      "grad_norm": 0.9577723145484924,
      "learning_rate": 8.730059368884379e-06,
      "loss": 0.0397,
      "step": 405460
    },
    {
      "epoch": 0.6635769132577914,
      "grad_norm": 2.777048110961914,
      "learning_rate": 8.729993476670863e-06,
      "loss": 0.0313,
      "step": 405480
    },
    {
      "epoch": 0.6636096436964448,
      "grad_norm": 0.6147314310073853,
      "learning_rate": 8.729927584457345e-06,
      "loss": 0.0305,
      "step": 405500
    },
    {
      "epoch": 0.6636423741350982,
      "grad_norm": 3.9587488174438477,
      "learning_rate": 8.729861692243828e-06,
      "loss": 0.0272,
      "step": 405520
    },
    {
      "epoch": 0.6636751045737515,
      "grad_norm": 1.3577053546905518,
      "learning_rate": 8.72979580003031e-06,
      "loss": 0.0316,
      "step": 405540
    },
    {
      "epoch": 0.6637078350124048,
      "grad_norm": 0.765665590763092,
      "learning_rate": 8.729729907816794e-06,
      "loss": 0.0424,
      "step": 405560
    },
    {
      "epoch": 0.6637405654510582,
      "grad_norm": 2.363215208053589,
      "learning_rate": 8.729664015603276e-06,
      "loss": 0.033,
      "step": 405580
    },
    {
      "epoch": 0.6637732958897116,
      "grad_norm": 0.4284762144088745,
      "learning_rate": 8.729598123389759e-06,
      "loss": 0.0305,
      "step": 405600
    },
    {
      "epoch": 0.6638060263283648,
      "grad_norm": 1.9094396829605103,
      "learning_rate": 8.729532231176243e-06,
      "loss": 0.0298,
      "step": 405620
    },
    {
      "epoch": 0.6638387567670182,
      "grad_norm": 0.46656426787376404,
      "learning_rate": 8.729466338962725e-06,
      "loss": 0.042,
      "step": 405640
    },
    {
      "epoch": 0.6638714872056716,
      "grad_norm": 1.3834130764007568,
      "learning_rate": 8.729400446749208e-06,
      "loss": 0.0285,
      "step": 405660
    },
    {
      "epoch": 0.6639042176443248,
      "grad_norm": 1.0554510354995728,
      "learning_rate": 8.729334554535692e-06,
      "loss": 0.0328,
      "step": 405680
    },
    {
      "epoch": 0.6639369480829782,
      "grad_norm": 0.5972477197647095,
      "learning_rate": 8.729268662322174e-06,
      "loss": 0.0288,
      "step": 405700
    },
    {
      "epoch": 0.6639696785216316,
      "grad_norm": 0.3249581456184387,
      "learning_rate": 8.729202770108657e-06,
      "loss": 0.0279,
      "step": 405720
    },
    {
      "epoch": 0.6640024089602848,
      "grad_norm": 1.8972790241241455,
      "learning_rate": 8.729136877895141e-06,
      "loss": 0.0403,
      "step": 405740
    },
    {
      "epoch": 0.6640351393989382,
      "grad_norm": 0.9798453450202942,
      "learning_rate": 8.729070985681623e-06,
      "loss": 0.0302,
      "step": 405760
    },
    {
      "epoch": 0.6640678698375916,
      "grad_norm": 0.24256108701229095,
      "learning_rate": 8.729005093468106e-06,
      "loss": 0.0393,
      "step": 405780
    },
    {
      "epoch": 0.664100600276245,
      "grad_norm": 0.6289349794387817,
      "learning_rate": 8.728939201254588e-06,
      "loss": 0.0286,
      "step": 405800
    },
    {
      "epoch": 0.6641333307148982,
      "grad_norm": 0.3550753593444824,
      "learning_rate": 8.728873309041072e-06,
      "loss": 0.0339,
      "step": 405820
    },
    {
      "epoch": 0.6641660611535516,
      "grad_norm": 2.05338191986084,
      "learning_rate": 8.728807416827554e-06,
      "loss": 0.0312,
      "step": 405840
    },
    {
      "epoch": 0.664198791592205,
      "grad_norm": 0.3905276656150818,
      "learning_rate": 8.728741524614037e-06,
      "loss": 0.0352,
      "step": 405860
    },
    {
      "epoch": 0.6642315220308582,
      "grad_norm": 1.825951337814331,
      "learning_rate": 8.72867563240052e-06,
      "loss": 0.0357,
      "step": 405880
    },
    {
      "epoch": 0.6642642524695116,
      "grad_norm": 2.492492437362671,
      "learning_rate": 8.728609740187003e-06,
      "loss": 0.045,
      "step": 405900
    },
    {
      "epoch": 0.664296982908165,
      "grad_norm": 0.1873830109834671,
      "learning_rate": 8.728543847973485e-06,
      "loss": 0.0297,
      "step": 405920
    },
    {
      "epoch": 0.6643297133468182,
      "grad_norm": 1.1730077266693115,
      "learning_rate": 8.728477955759968e-06,
      "loss": 0.0322,
      "step": 405940
    },
    {
      "epoch": 0.6643624437854716,
      "grad_norm": 0.49906763434410095,
      "learning_rate": 8.72841206354645e-06,
      "loss": 0.0347,
      "step": 405960
    },
    {
      "epoch": 0.664395174224125,
      "grad_norm": 1.7552456855773926,
      "learning_rate": 8.728346171332934e-06,
      "loss": 0.0447,
      "step": 405980
    },
    {
      "epoch": 0.6644279046627783,
      "grad_norm": 1.419052004814148,
      "learning_rate": 8.728280279119417e-06,
      "loss": 0.0297,
      "step": 406000
    },
    {
      "epoch": 0.6644606351014316,
      "grad_norm": 0.555895984172821,
      "learning_rate": 8.7282143869059e-06,
      "loss": 0.0237,
      "step": 406020
    },
    {
      "epoch": 0.664493365540085,
      "grad_norm": 0.828132688999176,
      "learning_rate": 8.728148494692383e-06,
      "loss": 0.0374,
      "step": 406040
    },
    {
      "epoch": 0.6645260959787384,
      "grad_norm": 2.131767749786377,
      "learning_rate": 8.728082602478866e-06,
      "loss": 0.041,
      "step": 406060
    },
    {
      "epoch": 0.6645588264173916,
      "grad_norm": 0.5002058148384094,
      "learning_rate": 8.728016710265348e-06,
      "loss": 0.048,
      "step": 406080
    },
    {
      "epoch": 0.664591556856045,
      "grad_norm": 1.8406001329421997,
      "learning_rate": 8.727950818051832e-06,
      "loss": 0.038,
      "step": 406100
    },
    {
      "epoch": 0.6646242872946984,
      "grad_norm": 1.83445405960083,
      "learning_rate": 8.727884925838315e-06,
      "loss": 0.0359,
      "step": 406120
    },
    {
      "epoch": 0.6646570177333516,
      "grad_norm": 1.2776962518692017,
      "learning_rate": 8.727819033624797e-06,
      "loss": 0.0356,
      "step": 406140
    },
    {
      "epoch": 0.664689748172005,
      "grad_norm": 1.2917855978012085,
      "learning_rate": 8.727753141411281e-06,
      "loss": 0.0291,
      "step": 406160
    },
    {
      "epoch": 0.6647224786106584,
      "grad_norm": 1.3522326946258545,
      "learning_rate": 8.727687249197763e-06,
      "loss": 0.0309,
      "step": 406180
    },
    {
      "epoch": 0.6647552090493116,
      "grad_norm": 0.4899727702140808,
      "learning_rate": 8.727621356984246e-06,
      "loss": 0.0352,
      "step": 406200
    },
    {
      "epoch": 0.664787939487965,
      "grad_norm": 0.7140716910362244,
      "learning_rate": 8.727555464770728e-06,
      "loss": 0.0417,
      "step": 406220
    },
    {
      "epoch": 0.6648206699266184,
      "grad_norm": 1.4529008865356445,
      "learning_rate": 8.727489572557212e-06,
      "loss": 0.0332,
      "step": 406240
    },
    {
      "epoch": 0.6648534003652717,
      "grad_norm": 2.7081100940704346,
      "learning_rate": 8.727423680343694e-06,
      "loss": 0.0347,
      "step": 406260
    },
    {
      "epoch": 0.664886130803925,
      "grad_norm": 1.9374476671218872,
      "learning_rate": 8.727357788130177e-06,
      "loss": 0.0299,
      "step": 406280
    },
    {
      "epoch": 0.6649188612425784,
      "grad_norm": 1.5076135396957397,
      "learning_rate": 8.72729189591666e-06,
      "loss": 0.0279,
      "step": 406300
    },
    {
      "epoch": 0.6649515916812317,
      "grad_norm": 1.5759328603744507,
      "learning_rate": 8.727226003703143e-06,
      "loss": 0.0282,
      "step": 406320
    },
    {
      "epoch": 0.664984322119885,
      "grad_norm": 3.475682258605957,
      "learning_rate": 8.727160111489626e-06,
      "loss": 0.0349,
      "step": 406340
    },
    {
      "epoch": 0.6650170525585384,
      "grad_norm": 0.683432400226593,
      "learning_rate": 8.727094219276108e-06,
      "loss": 0.0319,
      "step": 406360
    },
    {
      "epoch": 0.6650497829971918,
      "grad_norm": 0.9537177085876465,
      "learning_rate": 8.727028327062592e-06,
      "loss": 0.0419,
      "step": 406380
    },
    {
      "epoch": 0.665082513435845,
      "grad_norm": 1.4190274477005005,
      "learning_rate": 8.726962434849074e-06,
      "loss": 0.0298,
      "step": 406400
    },
    {
      "epoch": 0.6651152438744984,
      "grad_norm": 0.22037145495414734,
      "learning_rate": 8.726896542635557e-06,
      "loss": 0.0182,
      "step": 406420
    },
    {
      "epoch": 0.6651479743131518,
      "grad_norm": 1.9244105815887451,
      "learning_rate": 8.726830650422041e-06,
      "loss": 0.0431,
      "step": 406440
    },
    {
      "epoch": 0.6651807047518051,
      "grad_norm": 1.4242364168167114,
      "learning_rate": 8.726764758208523e-06,
      "loss": 0.0294,
      "step": 406460
    },
    {
      "epoch": 0.6652134351904584,
      "grad_norm": 0.853559672832489,
      "learning_rate": 8.726698865995006e-06,
      "loss": 0.0401,
      "step": 406480
    },
    {
      "epoch": 0.6652461656291118,
      "grad_norm": 1.4534423351287842,
      "learning_rate": 8.72663297378149e-06,
      "loss": 0.0459,
      "step": 406500
    },
    {
      "epoch": 0.6652788960677651,
      "grad_norm": 2.336865186691284,
      "learning_rate": 8.726567081567972e-06,
      "loss": 0.0409,
      "step": 406520
    },
    {
      "epoch": 0.6653116265064184,
      "grad_norm": 1.4573923349380493,
      "learning_rate": 8.726501189354456e-06,
      "loss": 0.0326,
      "step": 406540
    },
    {
      "epoch": 0.6653443569450718,
      "grad_norm": 0.47515085339546204,
      "learning_rate": 8.726435297140937e-06,
      "loss": 0.033,
      "step": 406560
    },
    {
      "epoch": 0.6653770873837251,
      "grad_norm": 0.09043888002634048,
      "learning_rate": 8.726369404927421e-06,
      "loss": 0.047,
      "step": 406580
    },
    {
      "epoch": 0.6654098178223784,
      "grad_norm": 0.8216052651405334,
      "learning_rate": 8.726303512713903e-06,
      "loss": 0.033,
      "step": 406600
    },
    {
      "epoch": 0.6654425482610318,
      "grad_norm": 1.0590685606002808,
      "learning_rate": 8.726237620500386e-06,
      "loss": 0.0398,
      "step": 406620
    },
    {
      "epoch": 0.6654752786996851,
      "grad_norm": 0.7253008484840393,
      "learning_rate": 8.726171728286868e-06,
      "loss": 0.023,
      "step": 406640
    },
    {
      "epoch": 0.6655080091383385,
      "grad_norm": 1.7891539335250854,
      "learning_rate": 8.726105836073352e-06,
      "loss": 0.0282,
      "step": 406660
    },
    {
      "epoch": 0.6655407395769918,
      "grad_norm": 0.8222143650054932,
      "learning_rate": 8.726039943859836e-06,
      "loss": 0.0444,
      "step": 406680
    },
    {
      "epoch": 0.6655734700156452,
      "grad_norm": 1.6431301832199097,
      "learning_rate": 8.725974051646317e-06,
      "loss": 0.0322,
      "step": 406700
    },
    {
      "epoch": 0.6656062004542985,
      "grad_norm": 1.0700936317443848,
      "learning_rate": 8.725908159432801e-06,
      "loss": 0.0401,
      "step": 406720
    },
    {
      "epoch": 0.6656389308929518,
      "grad_norm": 0.6335585713386536,
      "learning_rate": 8.725842267219283e-06,
      "loss": 0.0312,
      "step": 406740
    },
    {
      "epoch": 0.6656716613316052,
      "grad_norm": 0.7853001952171326,
      "learning_rate": 8.725776375005767e-06,
      "loss": 0.0208,
      "step": 406760
    },
    {
      "epoch": 0.6657043917702585,
      "grad_norm": 0.8746861815452576,
      "learning_rate": 8.725710482792248e-06,
      "loss": 0.0332,
      "step": 406780
    },
    {
      "epoch": 0.6657371222089118,
      "grad_norm": 0.8838253617286682,
      "learning_rate": 8.725644590578732e-06,
      "loss": 0.0442,
      "step": 406800
    },
    {
      "epoch": 0.6657698526475652,
      "grad_norm": 0.5934379696846008,
      "learning_rate": 8.725578698365214e-06,
      "loss": 0.0411,
      "step": 406820
    },
    {
      "epoch": 0.6658025830862185,
      "grad_norm": 1.2909107208251953,
      "learning_rate": 8.725512806151697e-06,
      "loss": 0.0346,
      "step": 406840
    },
    {
      "epoch": 0.6658353135248719,
      "grad_norm": 1.3157150745391846,
      "learning_rate": 8.725446913938181e-06,
      "loss": 0.0281,
      "step": 406860
    },
    {
      "epoch": 0.6658680439635252,
      "grad_norm": 0.3635656237602234,
      "learning_rate": 8.725381021724663e-06,
      "loss": 0.0322,
      "step": 406880
    },
    {
      "epoch": 0.6659007744021785,
      "grad_norm": 1.0428653955459595,
      "learning_rate": 8.725315129511147e-06,
      "loss": 0.0441,
      "step": 406900
    },
    {
      "epoch": 0.6659335048408319,
      "grad_norm": 1.135439395904541,
      "learning_rate": 8.72524923729763e-06,
      "loss": 0.0348,
      "step": 406920
    },
    {
      "epoch": 0.6659662352794852,
      "grad_norm": 1.2391588687896729,
      "learning_rate": 8.725183345084112e-06,
      "loss": 0.0435,
      "step": 406940
    },
    {
      "epoch": 0.6659989657181385,
      "grad_norm": 0.8089766502380371,
      "learning_rate": 8.725117452870596e-06,
      "loss": 0.017,
      "step": 406960
    },
    {
      "epoch": 0.6660316961567919,
      "grad_norm": 3.495912551879883,
      "learning_rate": 8.725051560657077e-06,
      "loss": 0.0328,
      "step": 406980
    },
    {
      "epoch": 0.6660644265954452,
      "grad_norm": 0.6377608180046082,
      "learning_rate": 8.724985668443561e-06,
      "loss": 0.0358,
      "step": 407000
    },
    {
      "epoch": 0.6660971570340986,
      "grad_norm": 0.8222697973251343,
      "learning_rate": 8.724919776230045e-06,
      "loss": 0.0378,
      "step": 407020
    },
    {
      "epoch": 0.6661298874727519,
      "grad_norm": 1.3843357563018799,
      "learning_rate": 8.724853884016527e-06,
      "loss": 0.0302,
      "step": 407040
    },
    {
      "epoch": 0.6661626179114053,
      "grad_norm": 0.39725226163864136,
      "learning_rate": 8.72478799180301e-06,
      "loss": 0.0372,
      "step": 407060
    },
    {
      "epoch": 0.6661953483500586,
      "grad_norm": 1.874910593032837,
      "learning_rate": 8.724722099589492e-06,
      "loss": 0.0256,
      "step": 407080
    },
    {
      "epoch": 0.6662280787887119,
      "grad_norm": 0.9119200706481934,
      "learning_rate": 8.724656207375976e-06,
      "loss": 0.0333,
      "step": 407100
    },
    {
      "epoch": 0.6662608092273653,
      "grad_norm": 0.4760661721229553,
      "learning_rate": 8.724590315162458e-06,
      "loss": 0.032,
      "step": 407120
    },
    {
      "epoch": 0.6662935396660186,
      "grad_norm": 0.5755648016929626,
      "learning_rate": 8.724524422948941e-06,
      "loss": 0.0388,
      "step": 407140
    },
    {
      "epoch": 0.6663262701046719,
      "grad_norm": 2.8017194271087646,
      "learning_rate": 8.724458530735423e-06,
      "loss": 0.0296,
      "step": 407160
    },
    {
      "epoch": 0.6663590005433253,
      "grad_norm": 1.1175014972686768,
      "learning_rate": 8.724392638521907e-06,
      "loss": 0.035,
      "step": 407180
    },
    {
      "epoch": 0.6663917309819786,
      "grad_norm": 0.3427658975124359,
      "learning_rate": 8.724326746308388e-06,
      "loss": 0.0225,
      "step": 407200
    },
    {
      "epoch": 0.6664244614206319,
      "grad_norm": 0.5767624974250793,
      "learning_rate": 8.724260854094872e-06,
      "loss": 0.0337,
      "step": 407220
    },
    {
      "epoch": 0.6664571918592853,
      "grad_norm": 0.6393152475357056,
      "learning_rate": 8.724194961881356e-06,
      "loss": 0.0324,
      "step": 407240
    },
    {
      "epoch": 0.6664899222979387,
      "grad_norm": 1.6497747898101807,
      "learning_rate": 8.724129069667838e-06,
      "loss": 0.0455,
      "step": 407260
    },
    {
      "epoch": 0.666522652736592,
      "grad_norm": 0.8297612071037292,
      "learning_rate": 8.724063177454321e-06,
      "loss": 0.0324,
      "step": 407280
    },
    {
      "epoch": 0.6665553831752453,
      "grad_norm": 1.4105790853500366,
      "learning_rate": 8.723997285240805e-06,
      "loss": 0.0382,
      "step": 407300
    },
    {
      "epoch": 0.6665881136138987,
      "grad_norm": 2.390244245529175,
      "learning_rate": 8.723931393027287e-06,
      "loss": 0.0335,
      "step": 407320
    },
    {
      "epoch": 0.666620844052552,
      "grad_norm": 1.0494037866592407,
      "learning_rate": 8.72386550081377e-06,
      "loss": 0.0424,
      "step": 407340
    },
    {
      "epoch": 0.6666535744912053,
      "grad_norm": 2.096465587615967,
      "learning_rate": 8.723799608600252e-06,
      "loss": 0.0308,
      "step": 407360
    },
    {
      "epoch": 0.6666863049298587,
      "grad_norm": 1.0599944591522217,
      "learning_rate": 8.723733716386736e-06,
      "loss": 0.0458,
      "step": 407380
    },
    {
      "epoch": 0.666719035368512,
      "grad_norm": 1.1003313064575195,
      "learning_rate": 8.72366782417322e-06,
      "loss": 0.0303,
      "step": 407400
    },
    {
      "epoch": 0.6667517658071653,
      "grad_norm": 2.0052850246429443,
      "learning_rate": 8.723601931959701e-06,
      "loss": 0.0283,
      "step": 407420
    },
    {
      "epoch": 0.6667844962458187,
      "grad_norm": 0.13322308659553528,
      "learning_rate": 8.723536039746185e-06,
      "loss": 0.0353,
      "step": 407440
    },
    {
      "epoch": 0.6668172266844721,
      "grad_norm": 3.4297983646392822,
      "learning_rate": 8.723470147532667e-06,
      "loss": 0.0326,
      "step": 407460
    },
    {
      "epoch": 0.6668499571231253,
      "grad_norm": 0.3470834493637085,
      "learning_rate": 8.72340425531915e-06,
      "loss": 0.0279,
      "step": 407480
    },
    {
      "epoch": 0.6668826875617787,
      "grad_norm": 3.1052162647247314,
      "learning_rate": 8.723338363105632e-06,
      "loss": 0.0285,
      "step": 407500
    },
    {
      "epoch": 0.6669154180004321,
      "grad_norm": 3.050096035003662,
      "learning_rate": 8.723272470892116e-06,
      "loss": 0.0506,
      "step": 407520
    },
    {
      "epoch": 0.6669481484390853,
      "grad_norm": 3.336496591567993,
      "learning_rate": 8.723206578678598e-06,
      "loss": 0.0294,
      "step": 407540
    },
    {
      "epoch": 0.6669808788777387,
      "grad_norm": 2.702247381210327,
      "learning_rate": 8.723140686465081e-06,
      "loss": 0.0352,
      "step": 407560
    },
    {
      "epoch": 0.6670136093163921,
      "grad_norm": 0.3528376519680023,
      "learning_rate": 8.723074794251563e-06,
      "loss": 0.0242,
      "step": 407580
    },
    {
      "epoch": 0.6670463397550453,
      "grad_norm": 1.3759099245071411,
      "learning_rate": 8.723008902038047e-06,
      "loss": 0.0441,
      "step": 407600
    },
    {
      "epoch": 0.6670790701936987,
      "grad_norm": 0.7142327427864075,
      "learning_rate": 8.722943009824529e-06,
      "loss": 0.0317,
      "step": 407620
    },
    {
      "epoch": 0.6671118006323521,
      "grad_norm": 0.41594305634498596,
      "learning_rate": 8.722877117611012e-06,
      "loss": 0.0479,
      "step": 407640
    },
    {
      "epoch": 0.6671445310710055,
      "grad_norm": 1.263962984085083,
      "learning_rate": 8.722811225397496e-06,
      "loss": 0.0456,
      "step": 407660
    },
    {
      "epoch": 0.6671772615096587,
      "grad_norm": 2.8657565116882324,
      "learning_rate": 8.722745333183978e-06,
      "loss": 0.0323,
      "step": 407680
    },
    {
      "epoch": 0.6672099919483121,
      "grad_norm": 2.9515886306762695,
      "learning_rate": 8.722679440970461e-06,
      "loss": 0.0204,
      "step": 407700
    },
    {
      "epoch": 0.6672427223869655,
      "grad_norm": 10.178994178771973,
      "learning_rate": 8.722613548756945e-06,
      "loss": 0.0423,
      "step": 407720
    },
    {
      "epoch": 0.6672754528256187,
      "grad_norm": 1.2040268182754517,
      "learning_rate": 8.722547656543427e-06,
      "loss": 0.0352,
      "step": 407740
    },
    {
      "epoch": 0.6673081832642721,
      "grad_norm": 1.7370519638061523,
      "learning_rate": 8.72248176432991e-06,
      "loss": 0.0358,
      "step": 407760
    },
    {
      "epoch": 0.6673409137029255,
      "grad_norm": 2.1897618770599365,
      "learning_rate": 8.722415872116394e-06,
      "loss": 0.0495,
      "step": 407780
    },
    {
      "epoch": 0.6673736441415787,
      "grad_norm": 1.319831132888794,
      "learning_rate": 8.722349979902876e-06,
      "loss": 0.0351,
      "step": 407800
    },
    {
      "epoch": 0.6674063745802321,
      "grad_norm": 0.458128958940506,
      "learning_rate": 8.72228408768936e-06,
      "loss": 0.0297,
      "step": 407820
    },
    {
      "epoch": 0.6674391050188855,
      "grad_norm": 2.130452871322632,
      "learning_rate": 8.722218195475841e-06,
      "loss": 0.0301,
      "step": 407840
    },
    {
      "epoch": 0.6674718354575389,
      "grad_norm": 0.5561915636062622,
      "learning_rate": 8.722152303262325e-06,
      "loss": 0.0335,
      "step": 407860
    },
    {
      "epoch": 0.6675045658961921,
      "grad_norm": 1.3485076427459717,
      "learning_rate": 8.722086411048807e-06,
      "loss": 0.0417,
      "step": 407880
    },
    {
      "epoch": 0.6675372963348455,
      "grad_norm": 0.670193612575531,
      "learning_rate": 8.72202051883529e-06,
      "loss": 0.0391,
      "step": 407900
    },
    {
      "epoch": 0.6675700267734989,
      "grad_norm": 1.1536455154418945,
      "learning_rate": 8.721954626621772e-06,
      "loss": 0.0371,
      "step": 407920
    },
    {
      "epoch": 0.6676027572121521,
      "grad_norm": 0.7604448795318604,
      "learning_rate": 8.721888734408256e-06,
      "loss": 0.0189,
      "step": 407940
    },
    {
      "epoch": 0.6676354876508055,
      "grad_norm": 1.7741999626159668,
      "learning_rate": 8.721822842194738e-06,
      "loss": 0.0332,
      "step": 407960
    },
    {
      "epoch": 0.6676682180894589,
      "grad_norm": 0.7793399095535278,
      "learning_rate": 8.721756949981221e-06,
      "loss": 0.0299,
      "step": 407980
    },
    {
      "epoch": 0.6677009485281121,
      "grad_norm": 0.43978920578956604,
      "learning_rate": 8.721691057767703e-06,
      "loss": 0.0288,
      "step": 408000
    },
    {
      "epoch": 0.6677336789667655,
      "grad_norm": 0.9799797534942627,
      "learning_rate": 8.721625165554187e-06,
      "loss": 0.0316,
      "step": 408020
    },
    {
      "epoch": 0.6677664094054189,
      "grad_norm": 1.751508116722107,
      "learning_rate": 8.72155927334067e-06,
      "loss": 0.0501,
      "step": 408040
    },
    {
      "epoch": 0.6677991398440722,
      "grad_norm": 0.44037675857543945,
      "learning_rate": 8.721493381127152e-06,
      "loss": 0.0375,
      "step": 408060
    },
    {
      "epoch": 0.6678318702827255,
      "grad_norm": 0.46523821353912354,
      "learning_rate": 8.721427488913636e-06,
      "loss": 0.0502,
      "step": 408080
    },
    {
      "epoch": 0.6678646007213789,
      "grad_norm": 0.6127694845199585,
      "learning_rate": 8.72136159670012e-06,
      "loss": 0.0318,
      "step": 408100
    },
    {
      "epoch": 0.6678973311600322,
      "grad_norm": 0.31751707196235657,
      "learning_rate": 8.721295704486601e-06,
      "loss": 0.0394,
      "step": 408120
    },
    {
      "epoch": 0.6679300615986855,
      "grad_norm": 0.35872170329093933,
      "learning_rate": 8.721229812273085e-06,
      "loss": 0.0288,
      "step": 408140
    },
    {
      "epoch": 0.6679627920373389,
      "grad_norm": 1.7040307521820068,
      "learning_rate": 8.721163920059568e-06,
      "loss": 0.0229,
      "step": 408160
    },
    {
      "epoch": 0.6679955224759923,
      "grad_norm": 0.47452330589294434,
      "learning_rate": 8.72109802784605e-06,
      "loss": 0.0315,
      "step": 408180
    },
    {
      "epoch": 0.6680282529146455,
      "grad_norm": 1.3885829448699951,
      "learning_rate": 8.721032135632534e-06,
      "loss": 0.0325,
      "step": 408200
    },
    {
      "epoch": 0.6680609833532989,
      "grad_norm": 1.4743361473083496,
      "learning_rate": 8.720966243419016e-06,
      "loss": 0.0264,
      "step": 408220
    },
    {
      "epoch": 0.6680937137919523,
      "grad_norm": 1.410340428352356,
      "learning_rate": 8.7209003512055e-06,
      "loss": 0.0317,
      "step": 408240
    },
    {
      "epoch": 0.6681264442306056,
      "grad_norm": 1.4489179849624634,
      "learning_rate": 8.720834458991981e-06,
      "loss": 0.0328,
      "step": 408260
    },
    {
      "epoch": 0.6681591746692589,
      "grad_norm": 0.5300531387329102,
      "learning_rate": 8.720768566778465e-06,
      "loss": 0.0322,
      "step": 408280
    },
    {
      "epoch": 0.6681919051079123,
      "grad_norm": 2.076603412628174,
      "learning_rate": 8.720702674564947e-06,
      "loss": 0.0318,
      "step": 408300
    },
    {
      "epoch": 0.6682246355465656,
      "grad_norm": 1.4578876495361328,
      "learning_rate": 8.72063678235143e-06,
      "loss": 0.0447,
      "step": 408320
    },
    {
      "epoch": 0.6682573659852189,
      "grad_norm": 0.7620788216590881,
      "learning_rate": 8.720570890137912e-06,
      "loss": 0.0388,
      "step": 408340
    },
    {
      "epoch": 0.6682900964238723,
      "grad_norm": 1.1590299606323242,
      "learning_rate": 8.720504997924396e-06,
      "loss": 0.0359,
      "step": 408360
    },
    {
      "epoch": 0.6683228268625256,
      "grad_norm": 1.5235159397125244,
      "learning_rate": 8.720439105710878e-06,
      "loss": 0.044,
      "step": 408380
    },
    {
      "epoch": 0.6683555573011789,
      "grad_norm": 1.04945707321167,
      "learning_rate": 8.720373213497361e-06,
      "loss": 0.0396,
      "step": 408400
    },
    {
      "epoch": 0.6683882877398323,
      "grad_norm": 0.5464446544647217,
      "learning_rate": 8.720307321283843e-06,
      "loss": 0.0316,
      "step": 408420
    },
    {
      "epoch": 0.6684210181784856,
      "grad_norm": 0.4699140787124634,
      "learning_rate": 8.720241429070327e-06,
      "loss": 0.0242,
      "step": 408440
    },
    {
      "epoch": 0.668453748617139,
      "grad_norm": 1.266542911529541,
      "learning_rate": 8.72017553685681e-06,
      "loss": 0.0233,
      "step": 408460
    },
    {
      "epoch": 0.6684864790557923,
      "grad_norm": 7.635674476623535,
      "learning_rate": 8.720109644643292e-06,
      "loss": 0.0458,
      "step": 408480
    },
    {
      "epoch": 0.6685192094944457,
      "grad_norm": 0.6525586247444153,
      "learning_rate": 8.720043752429776e-06,
      "loss": 0.0267,
      "step": 408500
    },
    {
      "epoch": 0.668551939933099,
      "grad_norm": 1.2976964712142944,
      "learning_rate": 8.71997786021626e-06,
      "loss": 0.0348,
      "step": 408520
    },
    {
      "epoch": 0.6685846703717523,
      "grad_norm": 0.9786761999130249,
      "learning_rate": 8.719911968002741e-06,
      "loss": 0.036,
      "step": 408540
    },
    {
      "epoch": 0.6686174008104057,
      "grad_norm": 1.4307401180267334,
      "learning_rate": 8.719846075789225e-06,
      "loss": 0.0381,
      "step": 408560
    },
    {
      "epoch": 0.668650131249059,
      "grad_norm": 1.3039733171463013,
      "learning_rate": 8.719780183575709e-06,
      "loss": 0.0274,
      "step": 408580
    },
    {
      "epoch": 0.6686828616877123,
      "grad_norm": 2.638665199279785,
      "learning_rate": 8.71971429136219e-06,
      "loss": 0.0296,
      "step": 408600
    },
    {
      "epoch": 0.6687155921263657,
      "grad_norm": 1.3281246423721313,
      "learning_rate": 8.719648399148674e-06,
      "loss": 0.0428,
      "step": 408620
    },
    {
      "epoch": 0.668748322565019,
      "grad_norm": 2.73040771484375,
      "learning_rate": 8.719582506935156e-06,
      "loss": 0.0312,
      "step": 408640
    },
    {
      "epoch": 0.6687810530036724,
      "grad_norm": 1.428820013999939,
      "learning_rate": 8.71951661472164e-06,
      "loss": 0.0231,
      "step": 408660
    },
    {
      "epoch": 0.6688137834423257,
      "grad_norm": 1.7156625986099243,
      "learning_rate": 8.719450722508121e-06,
      "loss": 0.0313,
      "step": 408680
    },
    {
      "epoch": 0.668846513880979,
      "grad_norm": 2.0704503059387207,
      "learning_rate": 8.719384830294605e-06,
      "loss": 0.0367,
      "step": 408700
    },
    {
      "epoch": 0.6688792443196324,
      "grad_norm": 0.4051625430583954,
      "learning_rate": 8.719318938081087e-06,
      "loss": 0.0231,
      "step": 408720
    },
    {
      "epoch": 0.6689119747582857,
      "grad_norm": 0.5430090427398682,
      "learning_rate": 8.71925304586757e-06,
      "loss": 0.0216,
      "step": 408740
    },
    {
      "epoch": 0.668944705196939,
      "grad_norm": 0.3654358983039856,
      "learning_rate": 8.719187153654052e-06,
      "loss": 0.0345,
      "step": 408760
    },
    {
      "epoch": 0.6689774356355924,
      "grad_norm": 2.21152663230896,
      "learning_rate": 8.719121261440536e-06,
      "loss": 0.0226,
      "step": 408780
    },
    {
      "epoch": 0.6690101660742457,
      "grad_norm": 1.1088393926620483,
      "learning_rate": 8.71905536922702e-06,
      "loss": 0.0424,
      "step": 408800
    },
    {
      "epoch": 0.669042896512899,
      "grad_norm": 1.1635856628417969,
      "learning_rate": 8.718989477013501e-06,
      "loss": 0.0327,
      "step": 408820
    },
    {
      "epoch": 0.6690756269515524,
      "grad_norm": 10.04858112335205,
      "learning_rate": 8.718923584799985e-06,
      "loss": 0.0265,
      "step": 408840
    },
    {
      "epoch": 0.6691083573902058,
      "grad_norm": 1.8948026895523071,
      "learning_rate": 8.718857692586467e-06,
      "loss": 0.0351,
      "step": 408860
    },
    {
      "epoch": 0.6691410878288591,
      "grad_norm": 2.189208745956421,
      "learning_rate": 8.71879180037295e-06,
      "loss": 0.0426,
      "step": 408880
    },
    {
      "epoch": 0.6691738182675124,
      "grad_norm": 0.380399614572525,
      "learning_rate": 8.718725908159434e-06,
      "loss": 0.0333,
      "step": 408900
    },
    {
      "epoch": 0.6692065487061658,
      "grad_norm": 1.688746690750122,
      "learning_rate": 8.718660015945916e-06,
      "loss": 0.0246,
      "step": 408920
    },
    {
      "epoch": 0.6692392791448191,
      "grad_norm": 1.1467722654342651,
      "learning_rate": 8.7185941237324e-06,
      "loss": 0.0359,
      "step": 408940
    },
    {
      "epoch": 0.6692720095834724,
      "grad_norm": 1.3680415153503418,
      "learning_rate": 8.718528231518883e-06,
      "loss": 0.0335,
      "step": 408960
    },
    {
      "epoch": 0.6693047400221258,
      "grad_norm": 0.22274191677570343,
      "learning_rate": 8.718462339305365e-06,
      "loss": 0.0294,
      "step": 408980
    },
    {
      "epoch": 0.6693374704607791,
      "grad_norm": 0.3254736661911011,
      "learning_rate": 8.718396447091849e-06,
      "loss": 0.0264,
      "step": 409000
    },
    {
      "epoch": 0.6693702008994324,
      "grad_norm": 0.8176150918006897,
      "learning_rate": 8.71833055487833e-06,
      "loss": 0.0396,
      "step": 409020
    },
    {
      "epoch": 0.6694029313380858,
      "grad_norm": 0.32619646191596985,
      "learning_rate": 8.718264662664814e-06,
      "loss": 0.0357,
      "step": 409040
    },
    {
      "epoch": 0.6694356617767391,
      "grad_norm": 1.9764988422393799,
      "learning_rate": 8.718198770451296e-06,
      "loss": 0.033,
      "step": 409060
    },
    {
      "epoch": 0.6694683922153924,
      "grad_norm": 0.9133588671684265,
      "learning_rate": 8.71813287823778e-06,
      "loss": 0.0368,
      "step": 409080
    },
    {
      "epoch": 0.6695011226540458,
      "grad_norm": 0.1441992223262787,
      "learning_rate": 8.718066986024261e-06,
      "loss": 0.0221,
      "step": 409100
    },
    {
      "epoch": 0.6695338530926992,
      "grad_norm": 0.9002895951271057,
      "learning_rate": 8.718001093810745e-06,
      "loss": 0.0317,
      "step": 409120
    },
    {
      "epoch": 0.6695665835313525,
      "grad_norm": 1.1807501316070557,
      "learning_rate": 8.717935201597229e-06,
      "loss": 0.0338,
      "step": 409140
    },
    {
      "epoch": 0.6695993139700058,
      "grad_norm": 1.246376872062683,
      "learning_rate": 8.71786930938371e-06,
      "loss": 0.0323,
      "step": 409160
    },
    {
      "epoch": 0.6696320444086592,
      "grad_norm": 0.7286669611930847,
      "learning_rate": 8.717803417170194e-06,
      "loss": 0.0381,
      "step": 409180
    },
    {
      "epoch": 0.6696647748473125,
      "grad_norm": 1.7449607849121094,
      "learning_rate": 8.717737524956676e-06,
      "loss": 0.0295,
      "step": 409200
    },
    {
      "epoch": 0.6696975052859658,
      "grad_norm": 2.2324910163879395,
      "learning_rate": 8.71767163274316e-06,
      "loss": 0.0283,
      "step": 409220
    },
    {
      "epoch": 0.6697302357246192,
      "grad_norm": 0.8667919635772705,
      "learning_rate": 8.717605740529641e-06,
      "loss": 0.0405,
      "step": 409240
    },
    {
      "epoch": 0.6697629661632725,
      "grad_norm": 0.5750820636749268,
      "learning_rate": 8.717539848316125e-06,
      "loss": 0.0518,
      "step": 409260
    },
    {
      "epoch": 0.6697956966019258,
      "grad_norm": 1.5986859798431396,
      "learning_rate": 8.717473956102609e-06,
      "loss": 0.04,
      "step": 409280
    },
    {
      "epoch": 0.6698284270405792,
      "grad_norm": 1.1866565942764282,
      "learning_rate": 8.71740806388909e-06,
      "loss": 0.0455,
      "step": 409300
    },
    {
      "epoch": 0.6698611574792326,
      "grad_norm": 1.9718754291534424,
      "learning_rate": 8.717342171675574e-06,
      "loss": 0.0222,
      "step": 409320
    },
    {
      "epoch": 0.6698938879178858,
      "grad_norm": 2.022557258605957,
      "learning_rate": 8.717276279462058e-06,
      "loss": 0.0363,
      "step": 409340
    },
    {
      "epoch": 0.6699266183565392,
      "grad_norm": 0.45741283893585205,
      "learning_rate": 8.71721038724854e-06,
      "loss": 0.0395,
      "step": 409360
    },
    {
      "epoch": 0.6699593487951926,
      "grad_norm": 0.3629414737224579,
      "learning_rate": 8.717144495035023e-06,
      "loss": 0.0337,
      "step": 409380
    },
    {
      "epoch": 0.6699920792338458,
      "grad_norm": 0.6262497305870056,
      "learning_rate": 8.717078602821505e-06,
      "loss": 0.0249,
      "step": 409400
    },
    {
      "epoch": 0.6700248096724992,
      "grad_norm": 0.6668084859848022,
      "learning_rate": 8.717012710607989e-06,
      "loss": 0.0377,
      "step": 409420
    },
    {
      "epoch": 0.6700575401111526,
      "grad_norm": 0.9795911312103271,
      "learning_rate": 8.71694681839447e-06,
      "loss": 0.0368,
      "step": 409440
    },
    {
      "epoch": 0.6700902705498059,
      "grad_norm": 3.375561475753784,
      "learning_rate": 8.716880926180954e-06,
      "loss": 0.0328,
      "step": 409460
    },
    {
      "epoch": 0.6701230009884592,
      "grad_norm": 0.33396124839782715,
      "learning_rate": 8.716815033967438e-06,
      "loss": 0.0297,
      "step": 409480
    },
    {
      "epoch": 0.6701557314271126,
      "grad_norm": 0.7059114575386047,
      "learning_rate": 8.71674914175392e-06,
      "loss": 0.0444,
      "step": 409500
    },
    {
      "epoch": 0.670188461865766,
      "grad_norm": 1.2492226362228394,
      "learning_rate": 8.716683249540403e-06,
      "loss": 0.0387,
      "step": 409520
    },
    {
      "epoch": 0.6702211923044192,
      "grad_norm": 1.3401415348052979,
      "learning_rate": 8.716617357326885e-06,
      "loss": 0.0386,
      "step": 409540
    },
    {
      "epoch": 0.6702539227430726,
      "grad_norm": 1.3016088008880615,
      "learning_rate": 8.716551465113369e-06,
      "loss": 0.0346,
      "step": 409560
    },
    {
      "epoch": 0.670286653181726,
      "grad_norm": 0.5745057463645935,
      "learning_rate": 8.71648557289985e-06,
      "loss": 0.0379,
      "step": 409580
    },
    {
      "epoch": 0.6703193836203792,
      "grad_norm": 0.4933115839958191,
      "learning_rate": 8.716419680686334e-06,
      "loss": 0.029,
      "step": 409600
    },
    {
      "epoch": 0.6703521140590326,
      "grad_norm": 1.9040027856826782,
      "learning_rate": 8.716353788472816e-06,
      "loss": 0.0337,
      "step": 409620
    },
    {
      "epoch": 0.670384844497686,
      "grad_norm": 11.298958778381348,
      "learning_rate": 8.7162878962593e-06,
      "loss": 0.0311,
      "step": 409640
    },
    {
      "epoch": 0.6704175749363392,
      "grad_norm": 2.5440423488616943,
      "learning_rate": 8.716222004045782e-06,
      "loss": 0.0352,
      "step": 409660
    },
    {
      "epoch": 0.6704503053749926,
      "grad_norm": 1.3916971683502197,
      "learning_rate": 8.716156111832265e-06,
      "loss": 0.0477,
      "step": 409680
    },
    {
      "epoch": 0.670483035813646,
      "grad_norm": 3.4504005908966064,
      "learning_rate": 8.716090219618749e-06,
      "loss": 0.0275,
      "step": 409700
    },
    {
      "epoch": 0.6705157662522994,
      "grad_norm": 1.3663064241409302,
      "learning_rate": 8.71602432740523e-06,
      "loss": 0.0253,
      "step": 409720
    },
    {
      "epoch": 0.6705484966909526,
      "grad_norm": 1.1573197841644287,
      "learning_rate": 8.715958435191714e-06,
      "loss": 0.0278,
      "step": 409740
    },
    {
      "epoch": 0.670581227129606,
      "grad_norm": 2.252227783203125,
      "learning_rate": 8.715892542978198e-06,
      "loss": 0.0338,
      "step": 409760
    },
    {
      "epoch": 0.6706139575682594,
      "grad_norm": 0.9231746792793274,
      "learning_rate": 8.71582665076468e-06,
      "loss": 0.0373,
      "step": 409780
    },
    {
      "epoch": 0.6706466880069126,
      "grad_norm": 1.0131967067718506,
      "learning_rate": 8.715760758551163e-06,
      "loss": 0.0416,
      "step": 409800
    },
    {
      "epoch": 0.670679418445566,
      "grad_norm": 1.4521468877792358,
      "learning_rate": 8.715694866337645e-06,
      "loss": 0.0471,
      "step": 409820
    },
    {
      "epoch": 0.6707121488842194,
      "grad_norm": 1.5774221420288086,
      "learning_rate": 8.715628974124129e-06,
      "loss": 0.0289,
      "step": 409840
    },
    {
      "epoch": 0.6707448793228726,
      "grad_norm": 1.5279780626296997,
      "learning_rate": 8.715563081910612e-06,
      "loss": 0.0584,
      "step": 409860
    },
    {
      "epoch": 0.670777609761526,
      "grad_norm": 0.2527111768722534,
      "learning_rate": 8.715497189697094e-06,
      "loss": 0.0352,
      "step": 409880
    },
    {
      "epoch": 0.6708103402001794,
      "grad_norm": 1.621863603591919,
      "learning_rate": 8.715431297483578e-06,
      "loss": 0.0317,
      "step": 409900
    },
    {
      "epoch": 0.6708430706388328,
      "grad_norm": 1.323931097984314,
      "learning_rate": 8.71536540527006e-06,
      "loss": 0.0308,
      "step": 409920
    },
    {
      "epoch": 0.670875801077486,
      "grad_norm": 6.091503143310547,
      "learning_rate": 8.715299513056543e-06,
      "loss": 0.0388,
      "step": 409940
    },
    {
      "epoch": 0.6709085315161394,
      "grad_norm": 3.0392696857452393,
      "learning_rate": 8.715233620843025e-06,
      "loss": 0.035,
      "step": 409960
    },
    {
      "epoch": 0.6709412619547928,
      "grad_norm": 1.36285400390625,
      "learning_rate": 8.715167728629509e-06,
      "loss": 0.0317,
      "step": 409980
    },
    {
      "epoch": 0.670973992393446,
      "grad_norm": 0.7219410538673401,
      "learning_rate": 8.71510183641599e-06,
      "loss": 0.0415,
      "step": 410000
    },
    {
      "epoch": 0.6710067228320994,
      "grad_norm": 1.2345246076583862,
      "learning_rate": 8.715035944202474e-06,
      "loss": 0.0216,
      "step": 410020
    },
    {
      "epoch": 0.6710394532707528,
      "grad_norm": 1.5529675483703613,
      "learning_rate": 8.714970051988956e-06,
      "loss": 0.0305,
      "step": 410040
    },
    {
      "epoch": 0.671072183709406,
      "grad_norm": 0.7078106999397278,
      "learning_rate": 8.71490415977544e-06,
      "loss": 0.0309,
      "step": 410060
    },
    {
      "epoch": 0.6711049141480594,
      "grad_norm": 3.6363401412963867,
      "learning_rate": 8.714838267561923e-06,
      "loss": 0.0374,
      "step": 410080
    },
    {
      "epoch": 0.6711376445867128,
      "grad_norm": 2.6746208667755127,
      "learning_rate": 8.714772375348405e-06,
      "loss": 0.0401,
      "step": 410100
    },
    {
      "epoch": 0.6711703750253661,
      "grad_norm": 0.5114328265190125,
      "learning_rate": 8.714706483134889e-06,
      "loss": 0.0467,
      "step": 410120
    },
    {
      "epoch": 0.6712031054640194,
      "grad_norm": 1.8625690937042236,
      "learning_rate": 8.714640590921372e-06,
      "loss": 0.0514,
      "step": 410140
    },
    {
      "epoch": 0.6712358359026728,
      "grad_norm": 3.581162214279175,
      "learning_rate": 8.714574698707854e-06,
      "loss": 0.0513,
      "step": 410160
    },
    {
      "epoch": 0.6712685663413261,
      "grad_norm": 0.5367543697357178,
      "learning_rate": 8.714508806494338e-06,
      "loss": 0.032,
      "step": 410180
    },
    {
      "epoch": 0.6713012967799794,
      "grad_norm": 0.7881338000297546,
      "learning_rate": 8.714442914280821e-06,
      "loss": 0.0325,
      "step": 410200
    },
    {
      "epoch": 0.6713340272186328,
      "grad_norm": 1.3200342655181885,
      "learning_rate": 8.714377022067303e-06,
      "loss": 0.0346,
      "step": 410220
    },
    {
      "epoch": 0.6713667576572862,
      "grad_norm": 1.2049696445465088,
      "learning_rate": 8.714311129853787e-06,
      "loss": 0.0385,
      "step": 410240
    },
    {
      "epoch": 0.6713994880959394,
      "grad_norm": 1.212645411491394,
      "learning_rate": 8.714245237640269e-06,
      "loss": 0.0345,
      "step": 410260
    },
    {
      "epoch": 0.6714322185345928,
      "grad_norm": 2.3605823516845703,
      "learning_rate": 8.714179345426752e-06,
      "loss": 0.0515,
      "step": 410280
    },
    {
      "epoch": 0.6714649489732462,
      "grad_norm": 1.2039361000061035,
      "learning_rate": 8.714113453213234e-06,
      "loss": 0.0429,
      "step": 410300
    },
    {
      "epoch": 0.6714976794118995,
      "grad_norm": 0.9782699942588806,
      "learning_rate": 8.714047560999718e-06,
      "loss": 0.032,
      "step": 410320
    },
    {
      "epoch": 0.6715304098505528,
      "grad_norm": 1.181195855140686,
      "learning_rate": 8.7139816687862e-06,
      "loss": 0.0256,
      "step": 410340
    },
    {
      "epoch": 0.6715631402892062,
      "grad_norm": 0.6815544366836548,
      "learning_rate": 8.713915776572683e-06,
      "loss": 0.0329,
      "step": 410360
    },
    {
      "epoch": 0.6715958707278595,
      "grad_norm": 0.3618762791156769,
      "learning_rate": 8.713849884359165e-06,
      "loss": 0.0306,
      "step": 410380
    },
    {
      "epoch": 0.6716286011665128,
      "grad_norm": 0.5001932382583618,
      "learning_rate": 8.713783992145649e-06,
      "loss": 0.0383,
      "step": 410400
    },
    {
      "epoch": 0.6716613316051662,
      "grad_norm": 1.5662710666656494,
      "learning_rate": 8.71371809993213e-06,
      "loss": 0.0341,
      "step": 410420
    },
    {
      "epoch": 0.6716940620438195,
      "grad_norm": 0.8936066627502441,
      "learning_rate": 8.713652207718614e-06,
      "loss": 0.0321,
      "step": 410440
    },
    {
      "epoch": 0.6717267924824728,
      "grad_norm": 2.0104305744171143,
      "learning_rate": 8.713586315505096e-06,
      "loss": 0.023,
      "step": 410460
    },
    {
      "epoch": 0.6717595229211262,
      "grad_norm": 0.5448406934738159,
      "learning_rate": 8.71352042329158e-06,
      "loss": 0.029,
      "step": 410480
    },
    {
      "epoch": 0.6717922533597795,
      "grad_norm": 0.8149194121360779,
      "learning_rate": 8.713454531078063e-06,
      "loss": 0.0236,
      "step": 410500
    },
    {
      "epoch": 0.6718249837984329,
      "grad_norm": 1.1193373203277588,
      "learning_rate": 8.713388638864545e-06,
      "loss": 0.0352,
      "step": 410520
    },
    {
      "epoch": 0.6718577142370862,
      "grad_norm": 0.3264867067337036,
      "learning_rate": 8.713322746651029e-06,
      "loss": 0.0357,
      "step": 410540
    },
    {
      "epoch": 0.6718904446757396,
      "grad_norm": 1.6405754089355469,
      "learning_rate": 8.713256854437512e-06,
      "loss": 0.0402,
      "step": 410560
    },
    {
      "epoch": 0.6719231751143929,
      "grad_norm": 0.8523550033569336,
      "learning_rate": 8.713190962223994e-06,
      "loss": 0.0376,
      "step": 410580
    },
    {
      "epoch": 0.6719559055530462,
      "grad_norm": 0.8192702531814575,
      "learning_rate": 8.713125070010478e-06,
      "loss": 0.0246,
      "step": 410600
    },
    {
      "epoch": 0.6719886359916996,
      "grad_norm": 1.9990767240524292,
      "learning_rate": 8.713059177796962e-06,
      "loss": 0.0288,
      "step": 410620
    },
    {
      "epoch": 0.6720213664303529,
      "grad_norm": 0.5133143663406372,
      "learning_rate": 8.712993285583443e-06,
      "loss": 0.0353,
      "step": 410640
    },
    {
      "epoch": 0.6720540968690062,
      "grad_norm": 7.083938121795654,
      "learning_rate": 8.712927393369927e-06,
      "loss": 0.0304,
      "step": 410660
    },
    {
      "epoch": 0.6720868273076596,
      "grad_norm": 1.5429133176803589,
      "learning_rate": 8.712861501156409e-06,
      "loss": 0.0219,
      "step": 410680
    },
    {
      "epoch": 0.6721195577463129,
      "grad_norm": 0.4673210382461548,
      "learning_rate": 8.712795608942893e-06,
      "loss": 0.0351,
      "step": 410700
    },
    {
      "epoch": 0.6721522881849663,
      "grad_norm": 3.1915576457977295,
      "learning_rate": 8.712729716729374e-06,
      "loss": 0.0291,
      "step": 410720
    },
    {
      "epoch": 0.6721850186236196,
      "grad_norm": 2.2647716999053955,
      "learning_rate": 8.712663824515858e-06,
      "loss": 0.0295,
      "step": 410740
    },
    {
      "epoch": 0.6722177490622729,
      "grad_norm": 6.800604343414307,
      "learning_rate": 8.71259793230234e-06,
      "loss": 0.0392,
      "step": 410760
    },
    {
      "epoch": 0.6722504795009263,
      "grad_norm": 1.5107473134994507,
      "learning_rate": 8.712532040088823e-06,
      "loss": 0.0396,
      "step": 410780
    },
    {
      "epoch": 0.6722832099395796,
      "grad_norm": 0.10787470638751984,
      "learning_rate": 8.712466147875305e-06,
      "loss": 0.0344,
      "step": 410800
    },
    {
      "epoch": 0.672315940378233,
      "grad_norm": 0.25754374265670776,
      "learning_rate": 8.712400255661789e-06,
      "loss": 0.0305,
      "step": 410820
    },
    {
      "epoch": 0.6723486708168863,
      "grad_norm": 0.30111730098724365,
      "learning_rate": 8.71233436344827e-06,
      "loss": 0.0294,
      "step": 410840
    },
    {
      "epoch": 0.6723814012555396,
      "grad_norm": 2.866588830947876,
      "learning_rate": 8.712268471234754e-06,
      "loss": 0.0462,
      "step": 410860
    },
    {
      "epoch": 0.672414131694193,
      "grad_norm": 1.6491345167160034,
      "learning_rate": 8.712202579021238e-06,
      "loss": 0.0341,
      "step": 410880
    },
    {
      "epoch": 0.6724468621328463,
      "grad_norm": 0.7478783130645752,
      "learning_rate": 8.71213668680772e-06,
      "loss": 0.0442,
      "step": 410900
    },
    {
      "epoch": 0.6724795925714997,
      "grad_norm": 1.1735605001449585,
      "learning_rate": 8.712070794594203e-06,
      "loss": 0.0368,
      "step": 410920
    },
    {
      "epoch": 0.672512323010153,
      "grad_norm": 0.8543133735656738,
      "learning_rate": 8.712004902380687e-06,
      "loss": 0.0349,
      "step": 410940
    },
    {
      "epoch": 0.6725450534488063,
      "grad_norm": 0.626302182674408,
      "learning_rate": 8.711939010167169e-06,
      "loss": 0.0331,
      "step": 410960
    },
    {
      "epoch": 0.6725777838874597,
      "grad_norm": 2.5829060077667236,
      "learning_rate": 8.711873117953653e-06,
      "loss": 0.0341,
      "step": 410980
    },
    {
      "epoch": 0.672610514326113,
      "grad_norm": 0.40430116653442383,
      "learning_rate": 8.711807225740136e-06,
      "loss": 0.0361,
      "step": 411000
    },
    {
      "epoch": 0.6726432447647663,
      "grad_norm": 1.250078797340393,
      "learning_rate": 8.711741333526618e-06,
      "loss": 0.0353,
      "step": 411020
    },
    {
      "epoch": 0.6726759752034197,
      "grad_norm": 0.7504625916481018,
      "learning_rate": 8.711675441313102e-06,
      "loss": 0.0314,
      "step": 411040
    },
    {
      "epoch": 0.672708705642073,
      "grad_norm": 0.5275562405586243,
      "learning_rate": 8.711609549099584e-06,
      "loss": 0.0322,
      "step": 411060
    },
    {
      "epoch": 0.6727414360807263,
      "grad_norm": 1.5686641931533813,
      "learning_rate": 8.711543656886067e-06,
      "loss": 0.0418,
      "step": 411080
    },
    {
      "epoch": 0.6727741665193797,
      "grad_norm": 1.1963281631469727,
      "learning_rate": 8.711477764672549e-06,
      "loss": 0.0332,
      "step": 411100
    },
    {
      "epoch": 0.6728068969580331,
      "grad_norm": 1.323930263519287,
      "learning_rate": 8.711411872459033e-06,
      "loss": 0.0317,
      "step": 411120
    },
    {
      "epoch": 0.6728396273966863,
      "grad_norm": 0.6864950060844421,
      "learning_rate": 8.711345980245514e-06,
      "loss": 0.0264,
      "step": 411140
    },
    {
      "epoch": 0.6728723578353397,
      "grad_norm": 0.6327030062675476,
      "learning_rate": 8.711280088031998e-06,
      "loss": 0.0271,
      "step": 411160
    },
    {
      "epoch": 0.6729050882739931,
      "grad_norm": 2.3636527061462402,
      "learning_rate": 8.71121419581848e-06,
      "loss": 0.0309,
      "step": 411180
    },
    {
      "epoch": 0.6729378187126464,
      "grad_norm": 3.0060439109802246,
      "learning_rate": 8.711148303604964e-06,
      "loss": 0.0516,
      "step": 411200
    },
    {
      "epoch": 0.6729705491512997,
      "grad_norm": 1.7076996564865112,
      "learning_rate": 8.711082411391445e-06,
      "loss": 0.0311,
      "step": 411220
    },
    {
      "epoch": 0.6730032795899531,
      "grad_norm": 0.35668909549713135,
      "learning_rate": 8.711016519177929e-06,
      "loss": 0.0269,
      "step": 411240
    },
    {
      "epoch": 0.6730360100286064,
      "grad_norm": 0.7555742263793945,
      "learning_rate": 8.710950626964413e-06,
      "loss": 0.0332,
      "step": 411260
    },
    {
      "epoch": 0.6730687404672597,
      "grad_norm": 1.8706285953521729,
      "learning_rate": 8.710884734750894e-06,
      "loss": 0.0309,
      "step": 411280
    },
    {
      "epoch": 0.6731014709059131,
      "grad_norm": 1.6888210773468018,
      "learning_rate": 8.710818842537378e-06,
      "loss": 0.0294,
      "step": 411300
    },
    {
      "epoch": 0.6731342013445665,
      "grad_norm": 0.9084784388542175,
      "learning_rate": 8.71075295032386e-06,
      "loss": 0.0272,
      "step": 411320
    },
    {
      "epoch": 0.6731669317832197,
      "grad_norm": 1.7908719778060913,
      "learning_rate": 8.710687058110344e-06,
      "loss": 0.0371,
      "step": 411340
    },
    {
      "epoch": 0.6731996622218731,
      "grad_norm": 0.3010340631008148,
      "learning_rate": 8.710621165896827e-06,
      "loss": 0.0239,
      "step": 411360
    },
    {
      "epoch": 0.6732323926605265,
      "grad_norm": 1.0056992769241333,
      "learning_rate": 8.710555273683309e-06,
      "loss": 0.0329,
      "step": 411380
    },
    {
      "epoch": 0.6732651230991797,
      "grad_norm": 0.6240719556808472,
      "learning_rate": 8.710489381469793e-06,
      "loss": 0.0248,
      "step": 411400
    },
    {
      "epoch": 0.6732978535378331,
      "grad_norm": 1.5280795097351074,
      "learning_rate": 8.710423489256276e-06,
      "loss": 0.0347,
      "step": 411420
    },
    {
      "epoch": 0.6733305839764865,
      "grad_norm": 0.7254886627197266,
      "learning_rate": 8.710357597042758e-06,
      "loss": 0.0299,
      "step": 411440
    },
    {
      "epoch": 0.6733633144151397,
      "grad_norm": 1.1264889240264893,
      "learning_rate": 8.710291704829242e-06,
      "loss": 0.0323,
      "step": 411460
    },
    {
      "epoch": 0.6733960448537931,
      "grad_norm": 0.5159501433372498,
      "learning_rate": 8.710225812615724e-06,
      "loss": 0.0223,
      "step": 411480
    },
    {
      "epoch": 0.6734287752924465,
      "grad_norm": 0.40788570046424866,
      "learning_rate": 8.710159920402207e-06,
      "loss": 0.0241,
      "step": 411500
    },
    {
      "epoch": 0.6734615057310999,
      "grad_norm": 0.4429658353328705,
      "learning_rate": 8.710094028188689e-06,
      "loss": 0.0323,
      "step": 411520
    },
    {
      "epoch": 0.6734942361697531,
      "grad_norm": 1.484178900718689,
      "learning_rate": 8.710028135975173e-06,
      "loss": 0.0259,
      "step": 411540
    },
    {
      "epoch": 0.6735269666084065,
      "grad_norm": 1.0998486280441284,
      "learning_rate": 8.709962243761655e-06,
      "loss": 0.043,
      "step": 411560
    },
    {
      "epoch": 0.6735596970470599,
      "grad_norm": 0.24713121354579926,
      "learning_rate": 8.709896351548138e-06,
      "loss": 0.0334,
      "step": 411580
    },
    {
      "epoch": 0.6735924274857131,
      "grad_norm": 0.8656218647956848,
      "learning_rate": 8.709830459334622e-06,
      "loss": 0.0425,
      "step": 411600
    },
    {
      "epoch": 0.6736251579243665,
      "grad_norm": 0.7906123995780945,
      "learning_rate": 8.709764567121104e-06,
      "loss": 0.0245,
      "step": 411620
    },
    {
      "epoch": 0.6736578883630199,
      "grad_norm": 1.5684754848480225,
      "learning_rate": 8.709698674907587e-06,
      "loss": 0.0312,
      "step": 411640
    },
    {
      "epoch": 0.6736906188016731,
      "grad_norm": 0.8299262523651123,
      "learning_rate": 8.709632782694069e-06,
      "loss": 0.0361,
      "step": 411660
    },
    {
      "epoch": 0.6737233492403265,
      "grad_norm": 0.6228318214416504,
      "learning_rate": 8.709566890480553e-06,
      "loss": 0.0341,
      "step": 411680
    },
    {
      "epoch": 0.6737560796789799,
      "grad_norm": 0.41112130880355835,
      "learning_rate": 8.709500998267035e-06,
      "loss": 0.0326,
      "step": 411700
    },
    {
      "epoch": 0.6737888101176331,
      "grad_norm": 1.3370245695114136,
      "learning_rate": 8.709435106053518e-06,
      "loss": 0.0294,
      "step": 411720
    },
    {
      "epoch": 0.6738215405562865,
      "grad_norm": 3.198974132537842,
      "learning_rate": 8.709369213840002e-06,
      "loss": 0.0345,
      "step": 411740
    },
    {
      "epoch": 0.6738542709949399,
      "grad_norm": 2.3032314777374268,
      "learning_rate": 8.709303321626484e-06,
      "loss": 0.0457,
      "step": 411760
    },
    {
      "epoch": 0.6738870014335933,
      "grad_norm": 1.2958886623382568,
      "learning_rate": 8.709237429412967e-06,
      "loss": 0.0349,
      "step": 411780
    },
    {
      "epoch": 0.6739197318722465,
      "grad_norm": 1.3993871212005615,
      "learning_rate": 8.70917153719945e-06,
      "loss": 0.0379,
      "step": 411800
    },
    {
      "epoch": 0.6739524623108999,
      "grad_norm": 1.1116669178009033,
      "learning_rate": 8.709105644985933e-06,
      "loss": 0.0416,
      "step": 411820
    },
    {
      "epoch": 0.6739851927495533,
      "grad_norm": 1.2268866300582886,
      "learning_rate": 8.709039752772416e-06,
      "loss": 0.0407,
      "step": 411840
    },
    {
      "epoch": 0.6740179231882065,
      "grad_norm": 0.8668238520622253,
      "learning_rate": 8.708973860558898e-06,
      "loss": 0.0323,
      "step": 411860
    },
    {
      "epoch": 0.6740506536268599,
      "grad_norm": 1.1755067110061646,
      "learning_rate": 8.708907968345382e-06,
      "loss": 0.0252,
      "step": 411880
    },
    {
      "epoch": 0.6740833840655133,
      "grad_norm": 0.9302487373352051,
      "learning_rate": 8.708842076131864e-06,
      "loss": 0.0382,
      "step": 411900
    },
    {
      "epoch": 0.6741161145041665,
      "grad_norm": 4.1374030113220215,
      "learning_rate": 8.708776183918347e-06,
      "loss": 0.0453,
      "step": 411920
    },
    {
      "epoch": 0.6741488449428199,
      "grad_norm": 0.2909553349018097,
      "learning_rate": 8.708710291704829e-06,
      "loss": 0.0342,
      "step": 411940
    },
    {
      "epoch": 0.6741815753814733,
      "grad_norm": 2.3986546993255615,
      "learning_rate": 8.708644399491313e-06,
      "loss": 0.034,
      "step": 411960
    },
    {
      "epoch": 0.6742143058201266,
      "grad_norm": 1.6543700695037842,
      "learning_rate": 8.708578507277796e-06,
      "loss": 0.0549,
      "step": 411980
    },
    {
      "epoch": 0.6742470362587799,
      "grad_norm": 1.075921893119812,
      "learning_rate": 8.708512615064278e-06,
      "loss": 0.0421,
      "step": 412000
    },
    {
      "epoch": 0.6742797666974333,
      "grad_norm": 1.4750783443450928,
      "learning_rate": 8.708446722850762e-06,
      "loss": 0.0249,
      "step": 412020
    },
    {
      "epoch": 0.6743124971360867,
      "grad_norm": 1.0210374593734741,
      "learning_rate": 8.708380830637244e-06,
      "loss": 0.0417,
      "step": 412040
    },
    {
      "epoch": 0.6743452275747399,
      "grad_norm": 0.9496697187423706,
      "learning_rate": 8.708314938423727e-06,
      "loss": 0.0278,
      "step": 412060
    },
    {
      "epoch": 0.6743779580133933,
      "grad_norm": 0.4233327805995941,
      "learning_rate": 8.70824904621021e-06,
      "loss": 0.042,
      "step": 412080
    },
    {
      "epoch": 0.6744106884520467,
      "grad_norm": 2.2749693393707275,
      "learning_rate": 8.708183153996693e-06,
      "loss": 0.0397,
      "step": 412100
    },
    {
      "epoch": 0.6744434188906999,
      "grad_norm": 0.8406347036361694,
      "learning_rate": 8.708117261783176e-06,
      "loss": 0.0249,
      "step": 412120
    },
    {
      "epoch": 0.6744761493293533,
      "grad_norm": 0.5897011756896973,
      "learning_rate": 8.708051369569658e-06,
      "loss": 0.0311,
      "step": 412140
    },
    {
      "epoch": 0.6745088797680067,
      "grad_norm": 0.19537542760372162,
      "learning_rate": 8.707985477356142e-06,
      "loss": 0.0345,
      "step": 412160
    },
    {
      "epoch": 0.67454161020666,
      "grad_norm": 0.8415953516960144,
      "learning_rate": 8.707919585142625e-06,
      "loss": 0.0314,
      "step": 412180
    },
    {
      "epoch": 0.6745743406453133,
      "grad_norm": 0.9463672041893005,
      "learning_rate": 8.707853692929107e-06,
      "loss": 0.0416,
      "step": 412200
    },
    {
      "epoch": 0.6746070710839667,
      "grad_norm": 1.0656760931015015,
      "learning_rate": 8.707787800715591e-06,
      "loss": 0.0314,
      "step": 412220
    },
    {
      "epoch": 0.67463980152262,
      "grad_norm": 2.447650909423828,
      "learning_rate": 8.707721908502073e-06,
      "loss": 0.0515,
      "step": 412240
    },
    {
      "epoch": 0.6746725319612733,
      "grad_norm": 0.8373534679412842,
      "learning_rate": 8.707656016288556e-06,
      "loss": 0.0341,
      "step": 412260
    },
    {
      "epoch": 0.6747052623999267,
      "grad_norm": 1.2595595121383667,
      "learning_rate": 8.707590124075038e-06,
      "loss": 0.032,
      "step": 412280
    },
    {
      "epoch": 0.67473799283858,
      "grad_norm": 1.5529189109802246,
      "learning_rate": 8.707524231861522e-06,
      "loss": 0.0271,
      "step": 412300
    },
    {
      "epoch": 0.6747707232772333,
      "grad_norm": 0.4426104426383972,
      "learning_rate": 8.707458339648005e-06,
      "loss": 0.0329,
      "step": 412320
    },
    {
      "epoch": 0.6748034537158867,
      "grad_norm": 1.6868230104446411,
      "learning_rate": 8.707392447434487e-06,
      "loss": 0.0306,
      "step": 412340
    },
    {
      "epoch": 0.67483618415454,
      "grad_norm": 1.2365965843200684,
      "learning_rate": 8.707326555220971e-06,
      "loss": 0.0311,
      "step": 412360
    },
    {
      "epoch": 0.6748689145931934,
      "grad_norm": 0.8109620213508606,
      "learning_rate": 8.707260663007453e-06,
      "loss": 0.0409,
      "step": 412380
    },
    {
      "epoch": 0.6749016450318467,
      "grad_norm": 1.5122582912445068,
      "learning_rate": 8.707194770793936e-06,
      "loss": 0.0489,
      "step": 412400
    },
    {
      "epoch": 0.6749343754705001,
      "grad_norm": 1.5997787714004517,
      "learning_rate": 8.707128878580418e-06,
      "loss": 0.0319,
      "step": 412420
    },
    {
      "epoch": 0.6749671059091534,
      "grad_norm": 0.3309956192970276,
      "learning_rate": 8.707062986366902e-06,
      "loss": 0.0352,
      "step": 412440
    },
    {
      "epoch": 0.6749998363478067,
      "grad_norm": 1.6140053272247314,
      "learning_rate": 8.706997094153384e-06,
      "loss": 0.0366,
      "step": 412460
    },
    {
      "epoch": 0.6750325667864601,
      "grad_norm": 0.7022102475166321,
      "learning_rate": 8.706931201939867e-06,
      "loss": 0.0358,
      "step": 412480
    },
    {
      "epoch": 0.6750652972251134,
      "grad_norm": 0.27220097184181213,
      "learning_rate": 8.70686530972635e-06,
      "loss": 0.0462,
      "step": 412500
    },
    {
      "epoch": 0.6750980276637667,
      "grad_norm": 0.9554508924484253,
      "learning_rate": 8.706799417512833e-06,
      "loss": 0.0414,
      "step": 412520
    },
    {
      "epoch": 0.6751307581024201,
      "grad_norm": 1.2915985584259033,
      "learning_rate": 8.706733525299316e-06,
      "loss": 0.0416,
      "step": 412540
    },
    {
      "epoch": 0.6751634885410734,
      "grad_norm": 1.2071659564971924,
      "learning_rate": 8.706667633085798e-06,
      "loss": 0.0245,
      "step": 412560
    },
    {
      "epoch": 0.6751962189797268,
      "grad_norm": 0.24151399731636047,
      "learning_rate": 8.706601740872282e-06,
      "loss": 0.0264,
      "step": 412580
    },
    {
      "epoch": 0.6752289494183801,
      "grad_norm": 0.7856120467185974,
      "learning_rate": 8.706535848658765e-06,
      "loss": 0.0194,
      "step": 412600
    },
    {
      "epoch": 0.6752616798570334,
      "grad_norm": 1.148620367050171,
      "learning_rate": 8.706469956445247e-06,
      "loss": 0.0377,
      "step": 412620
    },
    {
      "epoch": 0.6752944102956868,
      "grad_norm": 1.4565125703811646,
      "learning_rate": 8.706404064231731e-06,
      "loss": 0.029,
      "step": 412640
    },
    {
      "epoch": 0.6753271407343401,
      "grad_norm": 0.793479323387146,
      "learning_rate": 8.706338172018215e-06,
      "loss": 0.0317,
      "step": 412660
    },
    {
      "epoch": 0.6753598711729935,
      "grad_norm": 1.1150531768798828,
      "learning_rate": 8.706272279804696e-06,
      "loss": 0.0275,
      "step": 412680
    },
    {
      "epoch": 0.6753926016116468,
      "grad_norm": 1.366492748260498,
      "learning_rate": 8.70620638759118e-06,
      "loss": 0.0341,
      "step": 412700
    },
    {
      "epoch": 0.6754253320503001,
      "grad_norm": 3.063736915588379,
      "learning_rate": 8.706140495377662e-06,
      "loss": 0.0314,
      "step": 412720
    },
    {
      "epoch": 0.6754580624889535,
      "grad_norm": 0.8024449944496155,
      "learning_rate": 8.706074603164146e-06,
      "loss": 0.0265,
      "step": 412740
    },
    {
      "epoch": 0.6754907929276068,
      "grad_norm": 1.0360000133514404,
      "learning_rate": 8.706008710950627e-06,
      "loss": 0.0327,
      "step": 412760
    },
    {
      "epoch": 0.6755235233662602,
      "grad_norm": 1.07186758518219,
      "learning_rate": 8.705942818737111e-06,
      "loss": 0.0288,
      "step": 412780
    },
    {
      "epoch": 0.6755562538049135,
      "grad_norm": 1.7199647426605225,
      "learning_rate": 8.705876926523593e-06,
      "loss": 0.0397,
      "step": 412800
    },
    {
      "epoch": 0.6755889842435668,
      "grad_norm": 0.780019998550415,
      "learning_rate": 8.705811034310076e-06,
      "loss": 0.028,
      "step": 412820
    },
    {
      "epoch": 0.6756217146822202,
      "grad_norm": 0.34656015038490295,
      "learning_rate": 8.705745142096558e-06,
      "loss": 0.0244,
      "step": 412840
    },
    {
      "epoch": 0.6756544451208735,
      "grad_norm": 1.4738898277282715,
      "learning_rate": 8.705679249883042e-06,
      "loss": 0.0403,
      "step": 412860
    },
    {
      "epoch": 0.6756871755595268,
      "grad_norm": 5.322604179382324,
      "learning_rate": 8.705613357669524e-06,
      "loss": 0.0252,
      "step": 412880
    },
    {
      "epoch": 0.6757199059981802,
      "grad_norm": 1.0900731086730957,
      "learning_rate": 8.705547465456007e-06,
      "loss": 0.0439,
      "step": 412900
    },
    {
      "epoch": 0.6757526364368335,
      "grad_norm": 0.5858896374702454,
      "learning_rate": 8.705481573242491e-06,
      "loss": 0.0363,
      "step": 412920
    },
    {
      "epoch": 0.6757853668754868,
      "grad_norm": 1.4961233139038086,
      "learning_rate": 8.705415681028973e-06,
      "loss": 0.0395,
      "step": 412940
    },
    {
      "epoch": 0.6758180973141402,
      "grad_norm": 1.0305863618850708,
      "learning_rate": 8.705349788815456e-06,
      "loss": 0.0356,
      "step": 412960
    },
    {
      "epoch": 0.6758508277527936,
      "grad_norm": 0.32232844829559326,
      "learning_rate": 8.70528389660194e-06,
      "loss": 0.0505,
      "step": 412980
    },
    {
      "epoch": 0.6758835581914469,
      "grad_norm": 3.127448320388794,
      "learning_rate": 8.705218004388422e-06,
      "loss": 0.0373,
      "step": 413000
    },
    {
      "epoch": 0.6759162886301002,
      "grad_norm": 8.850111961364746,
      "learning_rate": 8.705152112174906e-06,
      "loss": 0.0321,
      "step": 413020
    },
    {
      "epoch": 0.6759490190687536,
      "grad_norm": 1.6281306743621826,
      "learning_rate": 8.705086219961389e-06,
      "loss": 0.0397,
      "step": 413040
    },
    {
      "epoch": 0.6759817495074069,
      "grad_norm": 0.55939781665802,
      "learning_rate": 8.705020327747871e-06,
      "loss": 0.0292,
      "step": 413060
    },
    {
      "epoch": 0.6760144799460602,
      "grad_norm": 0.19731488823890686,
      "learning_rate": 8.704954435534355e-06,
      "loss": 0.0369,
      "step": 413080
    },
    {
      "epoch": 0.6760472103847136,
      "grad_norm": 1.0196605920791626,
      "learning_rate": 8.704888543320837e-06,
      "loss": 0.0279,
      "step": 413100
    },
    {
      "epoch": 0.6760799408233669,
      "grad_norm": 1.1384950876235962,
      "learning_rate": 8.70482265110732e-06,
      "loss": 0.022,
      "step": 413120
    },
    {
      "epoch": 0.6761126712620202,
      "grad_norm": 2.0522220134735107,
      "learning_rate": 8.704756758893802e-06,
      "loss": 0.0445,
      "step": 413140
    },
    {
      "epoch": 0.6761454017006736,
      "grad_norm": 0.1733229011297226,
      "learning_rate": 8.704690866680286e-06,
      "loss": 0.0237,
      "step": 413160
    },
    {
      "epoch": 0.676178132139327,
      "grad_norm": 1.247949242591858,
      "learning_rate": 8.704624974466767e-06,
      "loss": 0.0253,
      "step": 413180
    },
    {
      "epoch": 0.6762108625779802,
      "grad_norm": 1.2419333457946777,
      "learning_rate": 8.704559082253251e-06,
      "loss": 0.0367,
      "step": 413200
    },
    {
      "epoch": 0.6762435930166336,
      "grad_norm": 1.2972919940948486,
      "learning_rate": 8.704493190039733e-06,
      "loss": 0.038,
      "step": 413220
    },
    {
      "epoch": 0.676276323455287,
      "grad_norm": 1.1178405284881592,
      "learning_rate": 8.704427297826217e-06,
      "loss": 0.0365,
      "step": 413240
    },
    {
      "epoch": 0.6763090538939402,
      "grad_norm": 0.3843371272087097,
      "learning_rate": 8.704361405612698e-06,
      "loss": 0.0316,
      "step": 413260
    },
    {
      "epoch": 0.6763417843325936,
      "grad_norm": 1.9706493616104126,
      "learning_rate": 8.704295513399182e-06,
      "loss": 0.0307,
      "step": 413280
    },
    {
      "epoch": 0.676374514771247,
      "grad_norm": 1.944395661354065,
      "learning_rate": 8.704229621185664e-06,
      "loss": 0.0266,
      "step": 413300
    },
    {
      "epoch": 0.6764072452099003,
      "grad_norm": 0.6308079361915588,
      "learning_rate": 8.704163728972148e-06,
      "loss": 0.0353,
      "step": 413320
    },
    {
      "epoch": 0.6764399756485536,
      "grad_norm": 1.19059419631958,
      "learning_rate": 8.704097836758631e-06,
      "loss": 0.0289,
      "step": 413340
    },
    {
      "epoch": 0.676472706087207,
      "grad_norm": 2.4950897693634033,
      "learning_rate": 8.704031944545113e-06,
      "loss": 0.0324,
      "step": 413360
    },
    {
      "epoch": 0.6765054365258604,
      "grad_norm": 0.9656417965888977,
      "learning_rate": 8.703966052331597e-06,
      "loss": 0.0319,
      "step": 413380
    },
    {
      "epoch": 0.6765381669645136,
      "grad_norm": 0.6060386896133423,
      "learning_rate": 8.70390016011808e-06,
      "loss": 0.0284,
      "step": 413400
    },
    {
      "epoch": 0.676570897403167,
      "grad_norm": 0.6677411198616028,
      "learning_rate": 8.703834267904562e-06,
      "loss": 0.0475,
      "step": 413420
    },
    {
      "epoch": 0.6766036278418204,
      "grad_norm": 0.628150463104248,
      "learning_rate": 8.703768375691046e-06,
      "loss": 0.0358,
      "step": 413440
    },
    {
      "epoch": 0.6766363582804736,
      "grad_norm": 0.6268777251243591,
      "learning_rate": 8.70370248347753e-06,
      "loss": 0.0327,
      "step": 413460
    },
    {
      "epoch": 0.676669088719127,
      "grad_norm": 0.6408373713493347,
      "learning_rate": 8.703636591264011e-06,
      "loss": 0.0229,
      "step": 413480
    },
    {
      "epoch": 0.6767018191577804,
      "grad_norm": 0.7286381125450134,
      "learning_rate": 8.703570699050495e-06,
      "loss": 0.0288,
      "step": 413500
    },
    {
      "epoch": 0.6767345495964336,
      "grad_norm": 0.9633299112319946,
      "learning_rate": 8.703504806836977e-06,
      "loss": 0.0393,
      "step": 413520
    },
    {
      "epoch": 0.676767280035087,
      "grad_norm": 1.459010362625122,
      "learning_rate": 8.70343891462346e-06,
      "loss": 0.032,
      "step": 413540
    },
    {
      "epoch": 0.6768000104737404,
      "grad_norm": 2.7572877407073975,
      "learning_rate": 8.703373022409942e-06,
      "loss": 0.0305,
      "step": 413560
    },
    {
      "epoch": 0.6768327409123938,
      "grad_norm": 0.7310409545898438,
      "learning_rate": 8.703307130196426e-06,
      "loss": 0.0395,
      "step": 413580
    },
    {
      "epoch": 0.676865471351047,
      "grad_norm": 1.8151732683181763,
      "learning_rate": 8.703241237982908e-06,
      "loss": 0.0391,
      "step": 413600
    },
    {
      "epoch": 0.6768982017897004,
      "grad_norm": 10.737051010131836,
      "learning_rate": 8.703175345769391e-06,
      "loss": 0.0294,
      "step": 413620
    },
    {
      "epoch": 0.6769309322283538,
      "grad_norm": 1.8012570142745972,
      "learning_rate": 8.703109453555873e-06,
      "loss": 0.0323,
      "step": 413640
    },
    {
      "epoch": 0.676963662667007,
      "grad_norm": 0.7415834665298462,
      "learning_rate": 8.703043561342357e-06,
      "loss": 0.0405,
      "step": 413660
    },
    {
      "epoch": 0.6769963931056604,
      "grad_norm": 1.5037243366241455,
      "learning_rate": 8.702977669128839e-06,
      "loss": 0.0363,
      "step": 413680
    },
    {
      "epoch": 0.6770291235443138,
      "grad_norm": 1.7790437936782837,
      "learning_rate": 8.702911776915322e-06,
      "loss": 0.0388,
      "step": 413700
    },
    {
      "epoch": 0.677061853982967,
      "grad_norm": 0.6396276354789734,
      "learning_rate": 8.702845884701806e-06,
      "loss": 0.0284,
      "step": 413720
    },
    {
      "epoch": 0.6770945844216204,
      "grad_norm": 0.26595935225486755,
      "learning_rate": 8.702779992488288e-06,
      "loss": 0.0372,
      "step": 413740
    },
    {
      "epoch": 0.6771273148602738,
      "grad_norm": 1.3729310035705566,
      "learning_rate": 8.702714100274771e-06,
      "loss": 0.0345,
      "step": 413760
    },
    {
      "epoch": 0.6771600452989271,
      "grad_norm": 0.9336603283882141,
      "learning_rate": 8.702648208061255e-06,
      "loss": 0.0398,
      "step": 413780
    },
    {
      "epoch": 0.6771927757375804,
      "grad_norm": 0.3298308551311493,
      "learning_rate": 8.702582315847737e-06,
      "loss": 0.032,
      "step": 413800
    },
    {
      "epoch": 0.6772255061762338,
      "grad_norm": 0.5900678634643555,
      "learning_rate": 8.70251642363422e-06,
      "loss": 0.026,
      "step": 413820
    },
    {
      "epoch": 0.6772582366148872,
      "grad_norm": 1.1872658729553223,
      "learning_rate": 8.702450531420704e-06,
      "loss": 0.0303,
      "step": 413840
    },
    {
      "epoch": 0.6772909670535404,
      "grad_norm": 0.9349583387374878,
      "learning_rate": 8.702384639207186e-06,
      "loss": 0.0391,
      "step": 413860
    },
    {
      "epoch": 0.6773236974921938,
      "grad_norm": 1.1333298683166504,
      "learning_rate": 8.70231874699367e-06,
      "loss": 0.0352,
      "step": 413880
    },
    {
      "epoch": 0.6773564279308472,
      "grad_norm": 1.8471044301986694,
      "learning_rate": 8.702252854780151e-06,
      "loss": 0.0279,
      "step": 413900
    },
    {
      "epoch": 0.6773891583695004,
      "grad_norm": 0.24031606316566467,
      "learning_rate": 8.702186962566635e-06,
      "loss": 0.0253,
      "step": 413920
    },
    {
      "epoch": 0.6774218888081538,
      "grad_norm": 0.9131965637207031,
      "learning_rate": 8.702121070353117e-06,
      "loss": 0.037,
      "step": 413940
    },
    {
      "epoch": 0.6774546192468072,
      "grad_norm": 1.3573133945465088,
      "learning_rate": 8.7020551781396e-06,
      "loss": 0.0376,
      "step": 413960
    },
    {
      "epoch": 0.6774873496854605,
      "grad_norm": 1.270135760307312,
      "learning_rate": 8.701989285926082e-06,
      "loss": 0.0394,
      "step": 413980
    },
    {
      "epoch": 0.6775200801241138,
      "grad_norm": 1.6228585243225098,
      "learning_rate": 8.701923393712566e-06,
      "loss": 0.0494,
      "step": 414000
    },
    {
      "epoch": 0.6775528105627672,
      "grad_norm": 1.2247072458267212,
      "learning_rate": 8.701857501499048e-06,
      "loss": 0.0282,
      "step": 414020
    },
    {
      "epoch": 0.6775855410014205,
      "grad_norm": 1.4994142055511475,
      "learning_rate": 8.701791609285531e-06,
      "loss": 0.0306,
      "step": 414040
    },
    {
      "epoch": 0.6776182714400738,
      "grad_norm": 1.4970660209655762,
      "learning_rate": 8.701725717072015e-06,
      "loss": 0.0272,
      "step": 414060
    },
    {
      "epoch": 0.6776510018787272,
      "grad_norm": 0.5501101016998291,
      "learning_rate": 8.701659824858497e-06,
      "loss": 0.0303,
      "step": 414080
    },
    {
      "epoch": 0.6776837323173805,
      "grad_norm": 1.5553475618362427,
      "learning_rate": 8.70159393264498e-06,
      "loss": 0.0307,
      "step": 414100
    },
    {
      "epoch": 0.6777164627560338,
      "grad_norm": 0.9240458607673645,
      "learning_rate": 8.701528040431462e-06,
      "loss": 0.0241,
      "step": 414120
    },
    {
      "epoch": 0.6777491931946872,
      "grad_norm": 0.5661019682884216,
      "learning_rate": 8.701462148217946e-06,
      "loss": 0.0405,
      "step": 414140
    },
    {
      "epoch": 0.6777819236333406,
      "grad_norm": 0.7939713001251221,
      "learning_rate": 8.701396256004428e-06,
      "loss": 0.0379,
      "step": 414160
    },
    {
      "epoch": 0.6778146540719939,
      "grad_norm": 0.3435724377632141,
      "learning_rate": 8.701330363790911e-06,
      "loss": 0.0449,
      "step": 414180
    },
    {
      "epoch": 0.6778473845106472,
      "grad_norm": 0.7882493734359741,
      "learning_rate": 8.701264471577395e-06,
      "loss": 0.0379,
      "step": 414200
    },
    {
      "epoch": 0.6778801149493006,
      "grad_norm": 2.690964460372925,
      "learning_rate": 8.701198579363878e-06,
      "loss": 0.0386,
      "step": 414220
    },
    {
      "epoch": 0.6779128453879539,
      "grad_norm": 1.2827742099761963,
      "learning_rate": 8.70113268715036e-06,
      "loss": 0.0327,
      "step": 414240
    },
    {
      "epoch": 0.6779455758266072,
      "grad_norm": 1.8138773441314697,
      "learning_rate": 8.701066794936844e-06,
      "loss": 0.0294,
      "step": 414260
    },
    {
      "epoch": 0.6779783062652606,
      "grad_norm": 0.4200214147567749,
      "learning_rate": 8.701000902723326e-06,
      "loss": 0.0304,
      "step": 414280
    },
    {
      "epoch": 0.6780110367039139,
      "grad_norm": 2.070685625076294,
      "learning_rate": 8.70093501050981e-06,
      "loss": 0.0362,
      "step": 414300
    },
    {
      "epoch": 0.6780437671425672,
      "grad_norm": 0.15775200724601746,
      "learning_rate": 8.700869118296291e-06,
      "loss": 0.028,
      "step": 414320
    },
    {
      "epoch": 0.6780764975812206,
      "grad_norm": 1.9056962728500366,
      "learning_rate": 8.700803226082775e-06,
      "loss": 0.0274,
      "step": 414340
    },
    {
      "epoch": 0.6781092280198739,
      "grad_norm": 2.5128700733184814,
      "learning_rate": 8.700737333869257e-06,
      "loss": 0.0323,
      "step": 414360
    },
    {
      "epoch": 0.6781419584585273,
      "grad_norm": 0.5540639162063599,
      "learning_rate": 8.70067144165574e-06,
      "loss": 0.0406,
      "step": 414380
    },
    {
      "epoch": 0.6781746888971806,
      "grad_norm": 1.1979804039001465,
      "learning_rate": 8.700605549442222e-06,
      "loss": 0.037,
      "step": 414400
    },
    {
      "epoch": 0.678207419335834,
      "grad_norm": 0.8113876581192017,
      "learning_rate": 8.700539657228706e-06,
      "loss": 0.0335,
      "step": 414420
    },
    {
      "epoch": 0.6782401497744873,
      "grad_norm": 1.4515337944030762,
      "learning_rate": 8.70047376501519e-06,
      "loss": 0.0423,
      "step": 414440
    },
    {
      "epoch": 0.6782728802131406,
      "grad_norm": 0.5219616293907166,
      "learning_rate": 8.700407872801671e-06,
      "loss": 0.0423,
      "step": 414460
    },
    {
      "epoch": 0.678305610651794,
      "grad_norm": 0.4457108974456787,
      "learning_rate": 8.700341980588155e-06,
      "loss": 0.0329,
      "step": 414480
    },
    {
      "epoch": 0.6783383410904473,
      "grad_norm": 0.4766336679458618,
      "learning_rate": 8.700276088374637e-06,
      "loss": 0.0276,
      "step": 414500
    },
    {
      "epoch": 0.6783710715291006,
      "grad_norm": 1.1211307048797607,
      "learning_rate": 8.70021019616112e-06,
      "loss": 0.0435,
      "step": 414520
    },
    {
      "epoch": 0.678403801967754,
      "grad_norm": 0.47439244389533997,
      "learning_rate": 8.700144303947602e-06,
      "loss": 0.0262,
      "step": 414540
    },
    {
      "epoch": 0.6784365324064073,
      "grad_norm": 0.4136475622653961,
      "learning_rate": 8.700078411734086e-06,
      "loss": 0.0316,
      "step": 414560
    },
    {
      "epoch": 0.6784692628450606,
      "grad_norm": 0.5531485676765442,
      "learning_rate": 8.70001251952057e-06,
      "loss": 0.0396,
      "step": 414580
    },
    {
      "epoch": 0.678501993283714,
      "grad_norm": 0.826980710029602,
      "learning_rate": 8.699946627307051e-06,
      "loss": 0.0378,
      "step": 414600
    },
    {
      "epoch": 0.6785347237223673,
      "grad_norm": 1.4237608909606934,
      "learning_rate": 8.699880735093535e-06,
      "loss": 0.0232,
      "step": 414620
    },
    {
      "epoch": 0.6785674541610207,
      "grad_norm": 0.6537901163101196,
      "learning_rate": 8.699814842880018e-06,
      "loss": 0.0266,
      "step": 414640
    },
    {
      "epoch": 0.678600184599674,
      "grad_norm": 1.8105628490447998,
      "learning_rate": 8.6997489506665e-06,
      "loss": 0.0409,
      "step": 414660
    },
    {
      "epoch": 0.6786329150383273,
      "grad_norm": 1.8036538362503052,
      "learning_rate": 8.699683058452984e-06,
      "loss": 0.0331,
      "step": 414680
    },
    {
      "epoch": 0.6786656454769807,
      "grad_norm": 0.331698477268219,
      "learning_rate": 8.699617166239466e-06,
      "loss": 0.0273,
      "step": 414700
    },
    {
      "epoch": 0.678698375915634,
      "grad_norm": 1.6730509996414185,
      "learning_rate": 8.69955127402595e-06,
      "loss": 0.0328,
      "step": 414720
    },
    {
      "epoch": 0.6787311063542874,
      "grad_norm": 0.7605457305908203,
      "learning_rate": 8.699485381812431e-06,
      "loss": 0.035,
      "step": 414740
    },
    {
      "epoch": 0.6787638367929407,
      "grad_norm": 0.26674821972846985,
      "learning_rate": 8.699419489598915e-06,
      "loss": 0.0254,
      "step": 414760
    },
    {
      "epoch": 0.678796567231594,
      "grad_norm": 2.663971424102783,
      "learning_rate": 8.699353597385399e-06,
      "loss": 0.0391,
      "step": 414780
    },
    {
      "epoch": 0.6788292976702474,
      "grad_norm": 1.3280305862426758,
      "learning_rate": 8.69928770517188e-06,
      "loss": 0.0266,
      "step": 414800
    },
    {
      "epoch": 0.6788620281089007,
      "grad_norm": 1.5499941110610962,
      "learning_rate": 8.699221812958364e-06,
      "loss": 0.0327,
      "step": 414820
    },
    {
      "epoch": 0.6788947585475541,
      "grad_norm": 0.43437498807907104,
      "learning_rate": 8.699155920744846e-06,
      "loss": 0.0336,
      "step": 414840
    },
    {
      "epoch": 0.6789274889862074,
      "grad_norm": 1.6548148393630981,
      "learning_rate": 8.69909002853133e-06,
      "loss": 0.0267,
      "step": 414860
    },
    {
      "epoch": 0.6789602194248607,
      "grad_norm": 1.0562090873718262,
      "learning_rate": 8.699024136317811e-06,
      "loss": 0.0349,
      "step": 414880
    },
    {
      "epoch": 0.6789929498635141,
      "grad_norm": 1.2048454284667969,
      "learning_rate": 8.698958244104295e-06,
      "loss": 0.0267,
      "step": 414900
    },
    {
      "epoch": 0.6790256803021674,
      "grad_norm": 0.7164954543113708,
      "learning_rate": 8.698892351890777e-06,
      "loss": 0.0341,
      "step": 414920
    },
    {
      "epoch": 0.6790584107408207,
      "grad_norm": 0.17355641722679138,
      "learning_rate": 8.69882645967726e-06,
      "loss": 0.0523,
      "step": 414940
    },
    {
      "epoch": 0.6790911411794741,
      "grad_norm": 1.7392313480377197,
      "learning_rate": 8.698760567463744e-06,
      "loss": 0.037,
      "step": 414960
    },
    {
      "epoch": 0.6791238716181274,
      "grad_norm": 0.41191476583480835,
      "learning_rate": 8.698694675250226e-06,
      "loss": 0.0361,
      "step": 414980
    },
    {
      "epoch": 0.6791566020567807,
      "grad_norm": 2.446584701538086,
      "learning_rate": 8.69862878303671e-06,
      "loss": 0.0459,
      "step": 415000
    },
    {
      "epoch": 0.6791893324954341,
      "grad_norm": 0.5869547724723816,
      "learning_rate": 8.698562890823193e-06,
      "loss": 0.045,
      "step": 415020
    },
    {
      "epoch": 0.6792220629340875,
      "grad_norm": 0.5781755447387695,
      "learning_rate": 8.698496998609675e-06,
      "loss": 0.0329,
      "step": 415040
    },
    {
      "epoch": 0.6792547933727408,
      "grad_norm": 2.111046552658081,
      "learning_rate": 8.698431106396159e-06,
      "loss": 0.0299,
      "step": 415060
    },
    {
      "epoch": 0.6792875238113941,
      "grad_norm": 0.4089462459087372,
      "learning_rate": 8.69836521418264e-06,
      "loss": 0.0317,
      "step": 415080
    },
    {
      "epoch": 0.6793202542500475,
      "grad_norm": 0.8309762477874756,
      "learning_rate": 8.698299321969124e-06,
      "loss": 0.0253,
      "step": 415100
    },
    {
      "epoch": 0.6793529846887008,
      "grad_norm": 0.5312277674674988,
      "learning_rate": 8.698233429755608e-06,
      "loss": 0.0326,
      "step": 415120
    },
    {
      "epoch": 0.6793857151273541,
      "grad_norm": 2.0255119800567627,
      "learning_rate": 8.69816753754209e-06,
      "loss": 0.0371,
      "step": 415140
    },
    {
      "epoch": 0.6794184455660075,
      "grad_norm": 0.3097350001335144,
      "learning_rate": 8.698101645328573e-06,
      "loss": 0.0492,
      "step": 415160
    },
    {
      "epoch": 0.6794511760046608,
      "grad_norm": 1.1202479600906372,
      "learning_rate": 8.698035753115055e-06,
      "loss": 0.0391,
      "step": 415180
    },
    {
      "epoch": 0.6794839064433141,
      "grad_norm": 0.33244481682777405,
      "learning_rate": 8.697969860901539e-06,
      "loss": 0.023,
      "step": 415200
    },
    {
      "epoch": 0.6795166368819675,
      "grad_norm": 0.6479660272598267,
      "learning_rate": 8.69790396868802e-06,
      "loss": 0.0281,
      "step": 415220
    },
    {
      "epoch": 0.6795493673206209,
      "grad_norm": 0.4021729528903961,
      "learning_rate": 8.697838076474504e-06,
      "loss": 0.0247,
      "step": 415240
    },
    {
      "epoch": 0.6795820977592741,
      "grad_norm": 2.9490346908569336,
      "learning_rate": 8.697772184260986e-06,
      "loss": 0.0317,
      "step": 415260
    },
    {
      "epoch": 0.6796148281979275,
      "grad_norm": 1.2574695348739624,
      "learning_rate": 8.69770629204747e-06,
      "loss": 0.0361,
      "step": 415280
    },
    {
      "epoch": 0.6796475586365809,
      "grad_norm": 1.889551043510437,
      "learning_rate": 8.697640399833951e-06,
      "loss": 0.0442,
      "step": 415300
    },
    {
      "epoch": 0.6796802890752341,
      "grad_norm": 2.1912105083465576,
      "learning_rate": 8.697574507620435e-06,
      "loss": 0.0419,
      "step": 415320
    },
    {
      "epoch": 0.6797130195138875,
      "grad_norm": 0.9809609651565552,
      "learning_rate": 8.697508615406917e-06,
      "loss": 0.0267,
      "step": 415340
    },
    {
      "epoch": 0.6797457499525409,
      "grad_norm": 1.5943437814712524,
      "learning_rate": 8.6974427231934e-06,
      "loss": 0.0362,
      "step": 415360
    },
    {
      "epoch": 0.6797784803911942,
      "grad_norm": 2.4908127784729004,
      "learning_rate": 8.697376830979884e-06,
      "loss": 0.038,
      "step": 415380
    },
    {
      "epoch": 0.6798112108298475,
      "grad_norm": 0.9211692810058594,
      "learning_rate": 8.697310938766366e-06,
      "loss": 0.0232,
      "step": 415400
    },
    {
      "epoch": 0.6798439412685009,
      "grad_norm": 2.257420778274536,
      "learning_rate": 8.69724504655285e-06,
      "loss": 0.0289,
      "step": 415420
    },
    {
      "epoch": 0.6798766717071543,
      "grad_norm": 0.8581026792526245,
      "learning_rate": 8.697179154339333e-06,
      "loss": 0.0231,
      "step": 415440
    },
    {
      "epoch": 0.6799094021458075,
      "grad_norm": 0.8983663320541382,
      "learning_rate": 8.697113262125815e-06,
      "loss": 0.0357,
      "step": 415460
    },
    {
      "epoch": 0.6799421325844609,
      "grad_norm": 0.8406714200973511,
      "learning_rate": 8.697047369912299e-06,
      "loss": 0.0333,
      "step": 415480
    },
    {
      "epoch": 0.6799748630231143,
      "grad_norm": 0.5556949377059937,
      "learning_rate": 8.696981477698782e-06,
      "loss": 0.0356,
      "step": 415500
    },
    {
      "epoch": 0.6800075934617675,
      "grad_norm": 1.259360432624817,
      "learning_rate": 8.696915585485264e-06,
      "loss": 0.0471,
      "step": 415520
    },
    {
      "epoch": 0.6800403239004209,
      "grad_norm": 0.6563867926597595,
      "learning_rate": 8.696849693271748e-06,
      "loss": 0.0413,
      "step": 415540
    },
    {
      "epoch": 0.6800730543390743,
      "grad_norm": 5.659729480743408,
      "learning_rate": 8.69678380105823e-06,
      "loss": 0.027,
      "step": 415560
    },
    {
      "epoch": 0.6801057847777275,
      "grad_norm": 1.698017954826355,
      "learning_rate": 8.696717908844713e-06,
      "loss": 0.0367,
      "step": 415580
    },
    {
      "epoch": 0.6801385152163809,
      "grad_norm": 1.1218657493591309,
      "learning_rate": 8.696652016631195e-06,
      "loss": 0.0362,
      "step": 415600
    },
    {
      "epoch": 0.6801712456550343,
      "grad_norm": 0.52339106798172,
      "learning_rate": 8.696586124417679e-06,
      "loss": 0.0261,
      "step": 415620
    },
    {
      "epoch": 0.6802039760936877,
      "grad_norm": 0.578822910785675,
      "learning_rate": 8.69652023220416e-06,
      "loss": 0.0409,
      "step": 415640
    },
    {
      "epoch": 0.6802367065323409,
      "grad_norm": 2.9312562942504883,
      "learning_rate": 8.696454339990644e-06,
      "loss": 0.0445,
      "step": 415660
    },
    {
      "epoch": 0.6802694369709943,
      "grad_norm": 1.8063385486602783,
      "learning_rate": 8.696388447777126e-06,
      "loss": 0.0443,
      "step": 415680
    },
    {
      "epoch": 0.6803021674096477,
      "grad_norm": 0.2308802753686905,
      "learning_rate": 8.69632255556361e-06,
      "loss": 0.0314,
      "step": 415700
    },
    {
      "epoch": 0.6803348978483009,
      "grad_norm": 1.9638957977294922,
      "learning_rate": 8.696256663350092e-06,
      "loss": 0.0412,
      "step": 415720
    },
    {
      "epoch": 0.6803676282869543,
      "grad_norm": 0.6701370477676392,
      "learning_rate": 8.696190771136575e-06,
      "loss": 0.0295,
      "step": 415740
    },
    {
      "epoch": 0.6804003587256077,
      "grad_norm": 1.466178059577942,
      "learning_rate": 8.696124878923059e-06,
      "loss": 0.0301,
      "step": 415760
    },
    {
      "epoch": 0.6804330891642609,
      "grad_norm": 1.1720317602157593,
      "learning_rate": 8.69605898670954e-06,
      "loss": 0.0381,
      "step": 415780
    },
    {
      "epoch": 0.6804658196029143,
      "grad_norm": 1.3821667432785034,
      "learning_rate": 8.695993094496024e-06,
      "loss": 0.0261,
      "step": 415800
    },
    {
      "epoch": 0.6804985500415677,
      "grad_norm": 1.5704830884933472,
      "learning_rate": 8.695927202282508e-06,
      "loss": 0.0388,
      "step": 415820
    },
    {
      "epoch": 0.680531280480221,
      "grad_norm": 1.585469126701355,
      "learning_rate": 8.69586131006899e-06,
      "loss": 0.0322,
      "step": 415840
    },
    {
      "epoch": 0.6805640109188743,
      "grad_norm": 1.3797242641448975,
      "learning_rate": 8.695795417855473e-06,
      "loss": 0.0343,
      "step": 415860
    },
    {
      "epoch": 0.6805967413575277,
      "grad_norm": 1.1766241788864136,
      "learning_rate": 8.695729525641957e-06,
      "loss": 0.0276,
      "step": 415880
    },
    {
      "epoch": 0.680629471796181,
      "grad_norm": 1.9347255229949951,
      "learning_rate": 8.695663633428439e-06,
      "loss": 0.0377,
      "step": 415900
    },
    {
      "epoch": 0.6806622022348343,
      "grad_norm": 1.0485748052597046,
      "learning_rate": 8.695597741214922e-06,
      "loss": 0.0403,
      "step": 415920
    },
    {
      "epoch": 0.6806949326734877,
      "grad_norm": 0.5848516225814819,
      "learning_rate": 8.695531849001404e-06,
      "loss": 0.0355,
      "step": 415940
    },
    {
      "epoch": 0.6807276631121411,
      "grad_norm": 2.8535549640655518,
      "learning_rate": 8.695465956787888e-06,
      "loss": 0.0408,
      "step": 415960
    },
    {
      "epoch": 0.6807603935507943,
      "grad_norm": 0.2508910000324249,
      "learning_rate": 8.69540006457437e-06,
      "loss": 0.0322,
      "step": 415980
    },
    {
      "epoch": 0.6807931239894477,
      "grad_norm": 0.5321534276008606,
      "learning_rate": 8.695334172360853e-06,
      "loss": 0.0242,
      "step": 416000
    },
    {
      "epoch": 0.6808258544281011,
      "grad_norm": 0.21417754888534546,
      "learning_rate": 8.695268280147335e-06,
      "loss": 0.032,
      "step": 416020
    },
    {
      "epoch": 0.6808585848667544,
      "grad_norm": 1.229617953300476,
      "learning_rate": 8.695202387933819e-06,
      "loss": 0.0391,
      "step": 416040
    },
    {
      "epoch": 0.6808913153054077,
      "grad_norm": 0.792797327041626,
      "learning_rate": 8.6951364957203e-06,
      "loss": 0.0304,
      "step": 416060
    },
    {
      "epoch": 0.6809240457440611,
      "grad_norm": 0.5610911846160889,
      "learning_rate": 8.695070603506784e-06,
      "loss": 0.0279,
      "step": 416080
    },
    {
      "epoch": 0.6809567761827144,
      "grad_norm": 2.287137508392334,
      "learning_rate": 8.695004711293266e-06,
      "loss": 0.0285,
      "step": 416100
    },
    {
      "epoch": 0.6809895066213677,
      "grad_norm": 1.893369197845459,
      "learning_rate": 8.69493881907975e-06,
      "loss": 0.0338,
      "step": 416120
    },
    {
      "epoch": 0.6810222370600211,
      "grad_norm": 3.6417081356048584,
      "learning_rate": 8.694872926866232e-06,
      "loss": 0.0469,
      "step": 416140
    },
    {
      "epoch": 0.6810549674986744,
      "grad_norm": 1.3247863054275513,
      "learning_rate": 8.694807034652715e-06,
      "loss": 0.0378,
      "step": 416160
    },
    {
      "epoch": 0.6810876979373277,
      "grad_norm": 0.571110725402832,
      "learning_rate": 8.694741142439199e-06,
      "loss": 0.0336,
      "step": 416180
    },
    {
      "epoch": 0.6811204283759811,
      "grad_norm": 2.228980779647827,
      "learning_rate": 8.69467525022568e-06,
      "loss": 0.0399,
      "step": 416200
    },
    {
      "epoch": 0.6811531588146345,
      "grad_norm": 1.1251558065414429,
      "learning_rate": 8.694609358012164e-06,
      "loss": 0.0409,
      "step": 416220
    },
    {
      "epoch": 0.6811858892532878,
      "grad_norm": 1.5668174028396606,
      "learning_rate": 8.694543465798648e-06,
      "loss": 0.0432,
      "step": 416240
    },
    {
      "epoch": 0.6812186196919411,
      "grad_norm": 0.9338423013687134,
      "learning_rate": 8.69447757358513e-06,
      "loss": 0.0406,
      "step": 416260
    },
    {
      "epoch": 0.6812513501305945,
      "grad_norm": 4.290981292724609,
      "learning_rate": 8.694411681371613e-06,
      "loss": 0.029,
      "step": 416280
    },
    {
      "epoch": 0.6812840805692478,
      "grad_norm": 9.751373291015625,
      "learning_rate": 8.694345789158097e-06,
      "loss": 0.0292,
      "step": 416300
    },
    {
      "epoch": 0.6813168110079011,
      "grad_norm": 2.5189900398254395,
      "learning_rate": 8.694279896944579e-06,
      "loss": 0.0258,
      "step": 416320
    },
    {
      "epoch": 0.6813495414465545,
      "grad_norm": 0.249447762966156,
      "learning_rate": 8.694214004731062e-06,
      "loss": 0.032,
      "step": 416340
    },
    {
      "epoch": 0.6813822718852078,
      "grad_norm": 0.7382304072380066,
      "learning_rate": 8.694148112517544e-06,
      "loss": 0.0237,
      "step": 416360
    },
    {
      "epoch": 0.6814150023238611,
      "grad_norm": 0.5212152004241943,
      "learning_rate": 8.694082220304028e-06,
      "loss": 0.0422,
      "step": 416380
    },
    {
      "epoch": 0.6814477327625145,
      "grad_norm": 1.6172500848770142,
      "learning_rate": 8.69401632809051e-06,
      "loss": 0.0271,
      "step": 416400
    },
    {
      "epoch": 0.6814804632011678,
      "grad_norm": 1.0618045330047607,
      "learning_rate": 8.693950435876993e-06,
      "loss": 0.0358,
      "step": 416420
    },
    {
      "epoch": 0.6815131936398212,
      "grad_norm": 0.7813439965248108,
      "learning_rate": 8.693884543663475e-06,
      "loss": 0.0286,
      "step": 416440
    },
    {
      "epoch": 0.6815459240784745,
      "grad_norm": 1.605237603187561,
      "learning_rate": 8.693818651449959e-06,
      "loss": 0.0321,
      "step": 416460
    },
    {
      "epoch": 0.6815786545171278,
      "grad_norm": 0.6224121451377869,
      "learning_rate": 8.69375275923644e-06,
      "loss": 0.0355,
      "step": 416480
    },
    {
      "epoch": 0.6816113849557812,
      "grad_norm": 0.5902488827705383,
      "learning_rate": 8.693686867022924e-06,
      "loss": 0.0232,
      "step": 416500
    },
    {
      "epoch": 0.6816441153944345,
      "grad_norm": 0.4184046983718872,
      "learning_rate": 8.693620974809408e-06,
      "loss": 0.0413,
      "step": 416520
    },
    {
      "epoch": 0.6816768458330879,
      "grad_norm": 1.6204432249069214,
      "learning_rate": 8.69355508259589e-06,
      "loss": 0.0332,
      "step": 416540
    },
    {
      "epoch": 0.6817095762717412,
      "grad_norm": 1.958415150642395,
      "learning_rate": 8.693489190382373e-06,
      "loss": 0.0266,
      "step": 416560
    },
    {
      "epoch": 0.6817423067103945,
      "grad_norm": 0.7011559009552002,
      "learning_rate": 8.693423298168855e-06,
      "loss": 0.0223,
      "step": 416580
    },
    {
      "epoch": 0.6817750371490479,
      "grad_norm": 2.276416778564453,
      "learning_rate": 8.693357405955339e-06,
      "loss": 0.0313,
      "step": 416600
    },
    {
      "epoch": 0.6818077675877012,
      "grad_norm": 1.0425796508789062,
      "learning_rate": 8.693291513741822e-06,
      "loss": 0.0375,
      "step": 416620
    },
    {
      "epoch": 0.6818404980263546,
      "grad_norm": 0.995293140411377,
      "learning_rate": 8.693225621528304e-06,
      "loss": 0.0449,
      "step": 416640
    },
    {
      "epoch": 0.6818732284650079,
      "grad_norm": 1.4020906686782837,
      "learning_rate": 8.693159729314788e-06,
      "loss": 0.029,
      "step": 416660
    },
    {
      "epoch": 0.6819059589036612,
      "grad_norm": 0.2703401744365692,
      "learning_rate": 8.693093837101271e-06,
      "loss": 0.0241,
      "step": 416680
    },
    {
      "epoch": 0.6819386893423146,
      "grad_norm": 3.6190383434295654,
      "learning_rate": 8.693027944887753e-06,
      "loss": 0.0244,
      "step": 416700
    },
    {
      "epoch": 0.6819714197809679,
      "grad_norm": 0.8380364775657654,
      "learning_rate": 8.692962052674237e-06,
      "loss": 0.0316,
      "step": 416720
    },
    {
      "epoch": 0.6820041502196212,
      "grad_norm": 0.7682374119758606,
      "learning_rate": 8.692896160460719e-06,
      "loss": 0.0281,
      "step": 416740
    },
    {
      "epoch": 0.6820368806582746,
      "grad_norm": 0.5830630660057068,
      "learning_rate": 8.692830268247202e-06,
      "loss": 0.0254,
      "step": 416760
    },
    {
      "epoch": 0.6820696110969279,
      "grad_norm": 0.5039142966270447,
      "learning_rate": 8.692764376033684e-06,
      "loss": 0.0425,
      "step": 416780
    },
    {
      "epoch": 0.6821023415355812,
      "grad_norm": 0.5795090794563293,
      "learning_rate": 8.692698483820168e-06,
      "loss": 0.0355,
      "step": 416800
    },
    {
      "epoch": 0.6821350719742346,
      "grad_norm": 1.0006780624389648,
      "learning_rate": 8.69263259160665e-06,
      "loss": 0.0339,
      "step": 416820
    },
    {
      "epoch": 0.682167802412888,
      "grad_norm": 3.9703152179718018,
      "learning_rate": 8.692566699393133e-06,
      "loss": 0.0339,
      "step": 416840
    },
    {
      "epoch": 0.6822005328515413,
      "grad_norm": 0.8135573863983154,
      "learning_rate": 8.692500807179615e-06,
      "loss": 0.0386,
      "step": 416860
    },
    {
      "epoch": 0.6822332632901946,
      "grad_norm": 1.1162837743759155,
      "learning_rate": 8.692434914966099e-06,
      "loss": 0.0339,
      "step": 416880
    },
    {
      "epoch": 0.682265993728848,
      "grad_norm": 0.9310556650161743,
      "learning_rate": 8.692369022752582e-06,
      "loss": 0.0328,
      "step": 416900
    },
    {
      "epoch": 0.6822987241675013,
      "grad_norm": 1.0166552066802979,
      "learning_rate": 8.692303130539064e-06,
      "loss": 0.0218,
      "step": 416920
    },
    {
      "epoch": 0.6823314546061546,
      "grad_norm": 2.295074701309204,
      "learning_rate": 8.692237238325548e-06,
      "loss": 0.0355,
      "step": 416940
    },
    {
      "epoch": 0.682364185044808,
      "grad_norm": 1.8224141597747803,
      "learning_rate": 8.69217134611203e-06,
      "loss": 0.0319,
      "step": 416960
    },
    {
      "epoch": 0.6823969154834613,
      "grad_norm": 1.3680193424224854,
      "learning_rate": 8.692105453898513e-06,
      "loss": 0.0325,
      "step": 416980
    },
    {
      "epoch": 0.6824296459221146,
      "grad_norm": 0.9088426232337952,
      "learning_rate": 8.692039561684997e-06,
      "loss": 0.0375,
      "step": 417000
    },
    {
      "epoch": 0.682462376360768,
      "grad_norm": 0.6720699071884155,
      "learning_rate": 8.691973669471479e-06,
      "loss": 0.0377,
      "step": 417020
    },
    {
      "epoch": 0.6824951067994214,
      "grad_norm": 0.4770154654979706,
      "learning_rate": 8.691907777257963e-06,
      "loss": 0.0278,
      "step": 417040
    },
    {
      "epoch": 0.6825278372380746,
      "grad_norm": 1.6399130821228027,
      "learning_rate": 8.691841885044446e-06,
      "loss": 0.0252,
      "step": 417060
    },
    {
      "epoch": 0.682560567676728,
      "grad_norm": 1.124428391456604,
      "learning_rate": 8.691775992830928e-06,
      "loss": 0.0376,
      "step": 417080
    },
    {
      "epoch": 0.6825932981153814,
      "grad_norm": 0.7003185749053955,
      "learning_rate": 8.691710100617412e-06,
      "loss": 0.0339,
      "step": 417100
    },
    {
      "epoch": 0.6826260285540346,
      "grad_norm": 0.9393502473831177,
      "learning_rate": 8.691644208403893e-06,
      "loss": 0.0377,
      "step": 417120
    },
    {
      "epoch": 0.682658758992688,
      "grad_norm": 1.1606247425079346,
      "learning_rate": 8.691578316190377e-06,
      "loss": 0.0306,
      "step": 417140
    },
    {
      "epoch": 0.6826914894313414,
      "grad_norm": 5.635505676269531,
      "learning_rate": 8.691512423976859e-06,
      "loss": 0.0389,
      "step": 417160
    },
    {
      "epoch": 0.6827242198699947,
      "grad_norm": 0.41182413697242737,
      "learning_rate": 8.691446531763343e-06,
      "loss": 0.0277,
      "step": 417180
    },
    {
      "epoch": 0.682756950308648,
      "grad_norm": 1.4353162050247192,
      "learning_rate": 8.691380639549824e-06,
      "loss": 0.0295,
      "step": 417200
    },
    {
      "epoch": 0.6827896807473014,
      "grad_norm": 0.14031344652175903,
      "learning_rate": 8.691314747336308e-06,
      "loss": 0.025,
      "step": 417220
    },
    {
      "epoch": 0.6828224111859548,
      "grad_norm": 0.7038220167160034,
      "learning_rate": 8.691248855122792e-06,
      "loss": 0.0371,
      "step": 417240
    },
    {
      "epoch": 0.682855141624608,
      "grad_norm": 1.5591448545455933,
      "learning_rate": 8.691182962909273e-06,
      "loss": 0.0392,
      "step": 417260
    },
    {
      "epoch": 0.6828878720632614,
      "grad_norm": 2.411334276199341,
      "learning_rate": 8.691117070695757e-06,
      "loss": 0.0402,
      "step": 417280
    },
    {
      "epoch": 0.6829206025019148,
      "grad_norm": 1.375472903251648,
      "learning_rate": 8.691051178482239e-06,
      "loss": 0.0288,
      "step": 417300
    },
    {
      "epoch": 0.682953332940568,
      "grad_norm": 2.5728697776794434,
      "learning_rate": 8.690985286268723e-06,
      "loss": 0.0425,
      "step": 417320
    },
    {
      "epoch": 0.6829860633792214,
      "grad_norm": 0.49919843673706055,
      "learning_rate": 8.690919394055204e-06,
      "loss": 0.0323,
      "step": 417340
    },
    {
      "epoch": 0.6830187938178748,
      "grad_norm": 1.1577732563018799,
      "learning_rate": 8.690853501841688e-06,
      "loss": 0.0353,
      "step": 417360
    },
    {
      "epoch": 0.683051524256528,
      "grad_norm": 1.1757599115371704,
      "learning_rate": 8.69078760962817e-06,
      "loss": 0.0331,
      "step": 417380
    },
    {
      "epoch": 0.6830842546951814,
      "grad_norm": 0.4180883467197418,
      "learning_rate": 8.690721717414654e-06,
      "loss": 0.0329,
      "step": 417400
    },
    {
      "epoch": 0.6831169851338348,
      "grad_norm": 1.3208799362182617,
      "learning_rate": 8.690655825201137e-06,
      "loss": 0.0269,
      "step": 417420
    },
    {
      "epoch": 0.683149715572488,
      "grad_norm": 0.8678979277610779,
      "learning_rate": 8.690589932987619e-06,
      "loss": 0.0304,
      "step": 417440
    },
    {
      "epoch": 0.6831824460111414,
      "grad_norm": 0.41052407026290894,
      "learning_rate": 8.690524040774103e-06,
      "loss": 0.041,
      "step": 417460
    },
    {
      "epoch": 0.6832151764497948,
      "grad_norm": 1.0393997430801392,
      "learning_rate": 8.690458148560586e-06,
      "loss": 0.0488,
      "step": 417480
    },
    {
      "epoch": 0.6832479068884482,
      "grad_norm": 1.3675220012664795,
      "learning_rate": 8.690392256347068e-06,
      "loss": 0.0366,
      "step": 417500
    },
    {
      "epoch": 0.6832806373271014,
      "grad_norm": 0.16525734961032867,
      "learning_rate": 8.690326364133552e-06,
      "loss": 0.0357,
      "step": 417520
    },
    {
      "epoch": 0.6833133677657548,
      "grad_norm": 1.7860400676727295,
      "learning_rate": 8.690260471920034e-06,
      "loss": 0.0457,
      "step": 417540
    },
    {
      "epoch": 0.6833460982044082,
      "grad_norm": 1.2841721773147583,
      "learning_rate": 8.690194579706517e-06,
      "loss": 0.0345,
      "step": 417560
    },
    {
      "epoch": 0.6833788286430614,
      "grad_norm": 0.9693776369094849,
      "learning_rate": 8.690128687493e-06,
      "loss": 0.0226,
      "step": 417580
    },
    {
      "epoch": 0.6834115590817148,
      "grad_norm": 0.8467486500740051,
      "learning_rate": 8.690062795279483e-06,
      "loss": 0.02,
      "step": 417600
    },
    {
      "epoch": 0.6834442895203682,
      "grad_norm": 0.6395931839942932,
      "learning_rate": 8.689996903065966e-06,
      "loss": 0.0287,
      "step": 417620
    },
    {
      "epoch": 0.6834770199590214,
      "grad_norm": 0.5103744864463806,
      "learning_rate": 8.689931010852448e-06,
      "loss": 0.042,
      "step": 417640
    },
    {
      "epoch": 0.6835097503976748,
      "grad_norm": 1.0452181100845337,
      "learning_rate": 8.689865118638932e-06,
      "loss": 0.0294,
      "step": 417660
    },
    {
      "epoch": 0.6835424808363282,
      "grad_norm": 1.751144528388977,
      "learning_rate": 8.689799226425414e-06,
      "loss": 0.0367,
      "step": 417680
    },
    {
      "epoch": 0.6835752112749816,
      "grad_norm": 1.7708022594451904,
      "learning_rate": 8.689733334211897e-06,
      "loss": 0.0394,
      "step": 417700
    },
    {
      "epoch": 0.6836079417136348,
      "grad_norm": 2.5093302726745605,
      "learning_rate": 8.689667441998379e-06,
      "loss": 0.0378,
      "step": 417720
    },
    {
      "epoch": 0.6836406721522882,
      "grad_norm": 1.2196478843688965,
      "learning_rate": 8.689601549784863e-06,
      "loss": 0.0348,
      "step": 417740
    },
    {
      "epoch": 0.6836734025909416,
      "grad_norm": 0.6291741132736206,
      "learning_rate": 8.689535657571345e-06,
      "loss": 0.0358,
      "step": 417760
    },
    {
      "epoch": 0.6837061330295948,
      "grad_norm": 0.4682135283946991,
      "learning_rate": 8.689469765357828e-06,
      "loss": 0.0435,
      "step": 417780
    },
    {
      "epoch": 0.6837388634682482,
      "grad_norm": 1.5750240087509155,
      "learning_rate": 8.689403873144312e-06,
      "loss": 0.0301,
      "step": 417800
    },
    {
      "epoch": 0.6837715939069016,
      "grad_norm": 1.8385517597198486,
      "learning_rate": 8.689337980930794e-06,
      "loss": 0.0454,
      "step": 417820
    },
    {
      "epoch": 0.6838043243455548,
      "grad_norm": 0.6892237663269043,
      "learning_rate": 8.689272088717277e-06,
      "loss": 0.0498,
      "step": 417840
    },
    {
      "epoch": 0.6838370547842082,
      "grad_norm": 0.4432503581047058,
      "learning_rate": 8.68920619650376e-06,
      "loss": 0.029,
      "step": 417860
    },
    {
      "epoch": 0.6838697852228616,
      "grad_norm": 0.5613958239555359,
      "learning_rate": 8.689140304290243e-06,
      "loss": 0.0285,
      "step": 417880
    },
    {
      "epoch": 0.6839025156615149,
      "grad_norm": 2.538811683654785,
      "learning_rate": 8.689074412076726e-06,
      "loss": 0.0406,
      "step": 417900
    },
    {
      "epoch": 0.6839352461001682,
      "grad_norm": 0.3517993986606598,
      "learning_rate": 8.689008519863208e-06,
      "loss": 0.0422,
      "step": 417920
    },
    {
      "epoch": 0.6839679765388216,
      "grad_norm": 1.5428885221481323,
      "learning_rate": 8.688942627649692e-06,
      "loss": 0.038,
      "step": 417940
    },
    {
      "epoch": 0.684000706977475,
      "grad_norm": 0.852145791053772,
      "learning_rate": 8.688876735436175e-06,
      "loss": 0.0341,
      "step": 417960
    },
    {
      "epoch": 0.6840334374161282,
      "grad_norm": 1.290091633796692,
      "learning_rate": 8.688810843222657e-06,
      "loss": 0.0282,
      "step": 417980
    },
    {
      "epoch": 0.6840661678547816,
      "grad_norm": 1.0654011964797974,
      "learning_rate": 8.68874495100914e-06,
      "loss": 0.0369,
      "step": 418000
    },
    {
      "epoch": 0.684098898293435,
      "grad_norm": 1.2695682048797607,
      "learning_rate": 8.688679058795623e-06,
      "loss": 0.0366,
      "step": 418020
    },
    {
      "epoch": 0.6841316287320882,
      "grad_norm": 0.6649206280708313,
      "learning_rate": 8.688613166582106e-06,
      "loss": 0.0399,
      "step": 418040
    },
    {
      "epoch": 0.6841643591707416,
      "grad_norm": 1.0683403015136719,
      "learning_rate": 8.688547274368588e-06,
      "loss": 0.0357,
      "step": 418060
    },
    {
      "epoch": 0.684197089609395,
      "grad_norm": 0.8481233716011047,
      "learning_rate": 8.688481382155072e-06,
      "loss": 0.035,
      "step": 418080
    },
    {
      "epoch": 0.6842298200480483,
      "grad_norm": 1.8308844566345215,
      "learning_rate": 8.688415489941554e-06,
      "loss": 0.0482,
      "step": 418100
    },
    {
      "epoch": 0.6842625504867016,
      "grad_norm": 1.3592978715896606,
      "learning_rate": 8.688349597728037e-06,
      "loss": 0.0336,
      "step": 418120
    },
    {
      "epoch": 0.684295280925355,
      "grad_norm": 0.7028366327285767,
      "learning_rate": 8.688283705514519e-06,
      "loss": 0.0326,
      "step": 418140
    },
    {
      "epoch": 0.6843280113640083,
      "grad_norm": 2.0429084300994873,
      "learning_rate": 8.688217813301003e-06,
      "loss": 0.0354,
      "step": 418160
    },
    {
      "epoch": 0.6843607418026616,
      "grad_norm": 0.5160625576972961,
      "learning_rate": 8.688151921087485e-06,
      "loss": 0.0427,
      "step": 418180
    },
    {
      "epoch": 0.684393472241315,
      "grad_norm": 0.6468233466148376,
      "learning_rate": 8.688086028873968e-06,
      "loss": 0.0397,
      "step": 418200
    },
    {
      "epoch": 0.6844262026799683,
      "grad_norm": 0.47547388076782227,
      "learning_rate": 8.688020136660452e-06,
      "loss": 0.0422,
      "step": 418220
    },
    {
      "epoch": 0.6844589331186216,
      "grad_norm": 1.4329348802566528,
      "learning_rate": 8.687954244446934e-06,
      "loss": 0.0371,
      "step": 418240
    },
    {
      "epoch": 0.684491663557275,
      "grad_norm": 1.3600454330444336,
      "learning_rate": 8.687888352233417e-06,
      "loss": 0.0432,
      "step": 418260
    },
    {
      "epoch": 0.6845243939959283,
      "grad_norm": 0.611561119556427,
      "learning_rate": 8.6878224600199e-06,
      "loss": 0.0342,
      "step": 418280
    },
    {
      "epoch": 0.6845571244345817,
      "grad_norm": 3.3833630084991455,
      "learning_rate": 8.687756567806383e-06,
      "loss": 0.0368,
      "step": 418300
    },
    {
      "epoch": 0.684589854873235,
      "grad_norm": 0.4869181513786316,
      "learning_rate": 8.687690675592866e-06,
      "loss": 0.0348,
      "step": 418320
    },
    {
      "epoch": 0.6846225853118884,
      "grad_norm": 0.7839717864990234,
      "learning_rate": 8.68762478337935e-06,
      "loss": 0.0313,
      "step": 418340
    },
    {
      "epoch": 0.6846553157505417,
      "grad_norm": 0.9869811534881592,
      "learning_rate": 8.687558891165832e-06,
      "loss": 0.0259,
      "step": 418360
    },
    {
      "epoch": 0.684688046189195,
      "grad_norm": 1.8041259050369263,
      "learning_rate": 8.687492998952315e-06,
      "loss": 0.0316,
      "step": 418380
    },
    {
      "epoch": 0.6847207766278484,
      "grad_norm": 0.5892869234085083,
      "learning_rate": 8.687427106738797e-06,
      "loss": 0.0367,
      "step": 418400
    },
    {
      "epoch": 0.6847535070665017,
      "grad_norm": 2.161932945251465,
      "learning_rate": 8.687361214525281e-06,
      "loss": 0.0326,
      "step": 418420
    },
    {
      "epoch": 0.684786237505155,
      "grad_norm": 2.187302350997925,
      "learning_rate": 8.687295322311763e-06,
      "loss": 0.0313,
      "step": 418440
    },
    {
      "epoch": 0.6848189679438084,
      "grad_norm": 1.816206932067871,
      "learning_rate": 8.687229430098246e-06,
      "loss": 0.0366,
      "step": 418460
    },
    {
      "epoch": 0.6848516983824617,
      "grad_norm": 1.3533542156219482,
      "learning_rate": 8.687163537884728e-06,
      "loss": 0.0226,
      "step": 418480
    },
    {
      "epoch": 0.6848844288211151,
      "grad_norm": 1.8186309337615967,
      "learning_rate": 8.687097645671212e-06,
      "loss": 0.0302,
      "step": 418500
    },
    {
      "epoch": 0.6849171592597684,
      "grad_norm": 0.6448522210121155,
      "learning_rate": 8.687031753457694e-06,
      "loss": 0.0373,
      "step": 418520
    },
    {
      "epoch": 0.6849498896984217,
      "grad_norm": 0.47368544340133667,
      "learning_rate": 8.686965861244177e-06,
      "loss": 0.0308,
      "step": 418540
    },
    {
      "epoch": 0.6849826201370751,
      "grad_norm": 1.8916656970977783,
      "learning_rate": 8.68689996903066e-06,
      "loss": 0.0295,
      "step": 418560
    },
    {
      "epoch": 0.6850153505757284,
      "grad_norm": 3.02607798576355,
      "learning_rate": 8.686834076817143e-06,
      "loss": 0.04,
      "step": 418580
    },
    {
      "epoch": 0.6850480810143817,
      "grad_norm": 2.1354920864105225,
      "learning_rate": 8.686768184603626e-06,
      "loss": 0.0208,
      "step": 418600
    },
    {
      "epoch": 0.6850808114530351,
      "grad_norm": 1.4995779991149902,
      "learning_rate": 8.686702292390108e-06,
      "loss": 0.0266,
      "step": 418620
    },
    {
      "epoch": 0.6851135418916884,
      "grad_norm": 2.632485866546631,
      "learning_rate": 8.686636400176592e-06,
      "loss": 0.0287,
      "step": 418640
    },
    {
      "epoch": 0.6851462723303418,
      "grad_norm": 0.6058508157730103,
      "learning_rate": 8.686570507963075e-06,
      "loss": 0.0353,
      "step": 418660
    },
    {
      "epoch": 0.6851790027689951,
      "grad_norm": 0.49929362535476685,
      "learning_rate": 8.686504615749557e-06,
      "loss": 0.0299,
      "step": 418680
    },
    {
      "epoch": 0.6852117332076485,
      "grad_norm": 0.5669854879379272,
      "learning_rate": 8.686438723536041e-06,
      "loss": 0.0292,
      "step": 418700
    },
    {
      "epoch": 0.6852444636463018,
      "grad_norm": 1.1311147212982178,
      "learning_rate": 8.686372831322525e-06,
      "loss": 0.0456,
      "step": 418720
    },
    {
      "epoch": 0.6852771940849551,
      "grad_norm": 0.8901875615119934,
      "learning_rate": 8.686306939109006e-06,
      "loss": 0.0383,
      "step": 418740
    },
    {
      "epoch": 0.6853099245236085,
      "grad_norm": 1.2228107452392578,
      "learning_rate": 8.68624104689549e-06,
      "loss": 0.0247,
      "step": 418760
    },
    {
      "epoch": 0.6853426549622618,
      "grad_norm": 1.601858139038086,
      "learning_rate": 8.686175154681972e-06,
      "loss": 0.0364,
      "step": 418780
    },
    {
      "epoch": 0.6853753854009151,
      "grad_norm": 1.1269259452819824,
      "learning_rate": 8.686109262468455e-06,
      "loss": 0.0302,
      "step": 418800
    },
    {
      "epoch": 0.6854081158395685,
      "grad_norm": 2.935220956802368,
      "learning_rate": 8.686043370254937e-06,
      "loss": 0.0332,
      "step": 418820
    },
    {
      "epoch": 0.6854408462782218,
      "grad_norm": 1.1063635349273682,
      "learning_rate": 8.685977478041421e-06,
      "loss": 0.0378,
      "step": 418840
    },
    {
      "epoch": 0.6854735767168751,
      "grad_norm": 3.38279390335083,
      "learning_rate": 8.685911585827903e-06,
      "loss": 0.0342,
      "step": 418860
    },
    {
      "epoch": 0.6855063071555285,
      "grad_norm": 2.795278787612915,
      "learning_rate": 8.685845693614386e-06,
      "loss": 0.032,
      "step": 418880
    },
    {
      "epoch": 0.6855390375941819,
      "grad_norm": 1.9398751258850098,
      "learning_rate": 8.685779801400868e-06,
      "loss": 0.0328,
      "step": 418900
    },
    {
      "epoch": 0.6855717680328351,
      "grad_norm": 0.5988401174545288,
      "learning_rate": 8.685713909187352e-06,
      "loss": 0.0316,
      "step": 418920
    },
    {
      "epoch": 0.6856044984714885,
      "grad_norm": 0.5769973397254944,
      "learning_rate": 8.685648016973834e-06,
      "loss": 0.0449,
      "step": 418940
    },
    {
      "epoch": 0.6856372289101419,
      "grad_norm": 1.3554261922836304,
      "learning_rate": 8.685582124760317e-06,
      "loss": 0.0275,
      "step": 418960
    },
    {
      "epoch": 0.6856699593487952,
      "grad_norm": 0.8307296633720398,
      "learning_rate": 8.685516232546801e-06,
      "loss": 0.0295,
      "step": 418980
    },
    {
      "epoch": 0.6857026897874485,
      "grad_norm": 2.5126655101776123,
      "learning_rate": 8.685450340333283e-06,
      "loss": 0.0414,
      "step": 419000
    },
    {
      "epoch": 0.6857354202261019,
      "grad_norm": 1.241473913192749,
      "learning_rate": 8.685384448119766e-06,
      "loss": 0.0319,
      "step": 419020
    },
    {
      "epoch": 0.6857681506647552,
      "grad_norm": 0.884097158908844,
      "learning_rate": 8.685318555906248e-06,
      "loss": 0.0214,
      "step": 419040
    },
    {
      "epoch": 0.6858008811034085,
      "grad_norm": 1.3523709774017334,
      "learning_rate": 8.685252663692732e-06,
      "loss": 0.0371,
      "step": 419060
    },
    {
      "epoch": 0.6858336115420619,
      "grad_norm": 0.5958203077316284,
      "learning_rate": 8.685186771479216e-06,
      "loss": 0.0284,
      "step": 419080
    },
    {
      "epoch": 0.6858663419807153,
      "grad_norm": 1.5279457569122314,
      "learning_rate": 8.685120879265697e-06,
      "loss": 0.0231,
      "step": 419100
    },
    {
      "epoch": 0.6858990724193685,
      "grad_norm": 0.5454347133636475,
      "learning_rate": 8.685054987052181e-06,
      "loss": 0.032,
      "step": 419120
    },
    {
      "epoch": 0.6859318028580219,
      "grad_norm": 0.47577622532844543,
      "learning_rate": 8.684989094838665e-06,
      "loss": 0.0232,
      "step": 419140
    },
    {
      "epoch": 0.6859645332966753,
      "grad_norm": 3.9679150581359863,
      "learning_rate": 8.684923202625146e-06,
      "loss": 0.0417,
      "step": 419160
    },
    {
      "epoch": 0.6859972637353285,
      "grad_norm": 0.9727568626403809,
      "learning_rate": 8.68485731041163e-06,
      "loss": 0.0343,
      "step": 419180
    },
    {
      "epoch": 0.6860299941739819,
      "grad_norm": 0.635756254196167,
      "learning_rate": 8.684791418198112e-06,
      "loss": 0.0288,
      "step": 419200
    },
    {
      "epoch": 0.6860627246126353,
      "grad_norm": 1.6631346940994263,
      "learning_rate": 8.684725525984596e-06,
      "loss": 0.0356,
      "step": 419220
    },
    {
      "epoch": 0.6860954550512885,
      "grad_norm": 1.0331939458847046,
      "learning_rate": 8.684659633771077e-06,
      "loss": 0.0371,
      "step": 419240
    },
    {
      "epoch": 0.6861281854899419,
      "grad_norm": 1.5550111532211304,
      "learning_rate": 8.684593741557561e-06,
      "loss": 0.0228,
      "step": 419260
    },
    {
      "epoch": 0.6861609159285953,
      "grad_norm": 0.5312099456787109,
      "learning_rate": 8.684527849344043e-06,
      "loss": 0.0289,
      "step": 419280
    },
    {
      "epoch": 0.6861936463672487,
      "grad_norm": 0.6660676598548889,
      "learning_rate": 8.684461957130526e-06,
      "loss": 0.0374,
      "step": 419300
    },
    {
      "epoch": 0.6862263768059019,
      "grad_norm": 0.7071825861930847,
      "learning_rate": 8.684396064917008e-06,
      "loss": 0.0273,
      "step": 419320
    },
    {
      "epoch": 0.6862591072445553,
      "grad_norm": 0.6067840456962585,
      "learning_rate": 8.684330172703492e-06,
      "loss": 0.0222,
      "step": 419340
    },
    {
      "epoch": 0.6862918376832087,
      "grad_norm": 2.2009472846984863,
      "learning_rate": 8.684264280489976e-06,
      "loss": 0.0299,
      "step": 419360
    },
    {
      "epoch": 0.6863245681218619,
      "grad_norm": 0.2298540621995926,
      "learning_rate": 8.684198388276457e-06,
      "loss": 0.0384,
      "step": 419380
    },
    {
      "epoch": 0.6863572985605153,
      "grad_norm": 2.386035442352295,
      "learning_rate": 8.684132496062941e-06,
      "loss": 0.0409,
      "step": 419400
    },
    {
      "epoch": 0.6863900289991687,
      "grad_norm": 1.8226314783096313,
      "learning_rate": 8.684066603849423e-06,
      "loss": 0.0335,
      "step": 419420
    },
    {
      "epoch": 0.6864227594378219,
      "grad_norm": 1.8434712886810303,
      "learning_rate": 8.684000711635907e-06,
      "loss": 0.0363,
      "step": 419440
    },
    {
      "epoch": 0.6864554898764753,
      "grad_norm": 1.9313322305679321,
      "learning_rate": 8.68393481942239e-06,
      "loss": 0.0363,
      "step": 419460
    },
    {
      "epoch": 0.6864882203151287,
      "grad_norm": 1.0571333169937134,
      "learning_rate": 8.683868927208872e-06,
      "loss": 0.0298,
      "step": 419480
    },
    {
      "epoch": 0.686520950753782,
      "grad_norm": 1.9972903728485107,
      "learning_rate": 8.683803034995356e-06,
      "loss": 0.038,
      "step": 419500
    },
    {
      "epoch": 0.6865536811924353,
      "grad_norm": 0.441193163394928,
      "learning_rate": 8.68373714278184e-06,
      "loss": 0.0222,
      "step": 419520
    },
    {
      "epoch": 0.6865864116310887,
      "grad_norm": 1.3362669944763184,
      "learning_rate": 8.683671250568321e-06,
      "loss": 0.0353,
      "step": 419540
    },
    {
      "epoch": 0.6866191420697421,
      "grad_norm": 1.254183053970337,
      "learning_rate": 8.683605358354805e-06,
      "loss": 0.0325,
      "step": 419560
    },
    {
      "epoch": 0.6866518725083953,
      "grad_norm": 1.345386028289795,
      "learning_rate": 8.683539466141287e-06,
      "loss": 0.0464,
      "step": 419580
    },
    {
      "epoch": 0.6866846029470487,
      "grad_norm": 2.1561238765716553,
      "learning_rate": 8.68347357392777e-06,
      "loss": 0.0324,
      "step": 419600
    },
    {
      "epoch": 0.6867173333857021,
      "grad_norm": 1.4483762979507446,
      "learning_rate": 8.683407681714252e-06,
      "loss": 0.0297,
      "step": 419620
    },
    {
      "epoch": 0.6867500638243553,
      "grad_norm": 1.2150167226791382,
      "learning_rate": 8.683341789500736e-06,
      "loss": 0.0288,
      "step": 419640
    },
    {
      "epoch": 0.6867827942630087,
      "grad_norm": 0.43749186396598816,
      "learning_rate": 8.683275897287218e-06,
      "loss": 0.0356,
      "step": 419660
    },
    {
      "epoch": 0.6868155247016621,
      "grad_norm": 1.4563268423080444,
      "learning_rate": 8.683210005073701e-06,
      "loss": 0.0318,
      "step": 419680
    },
    {
      "epoch": 0.6868482551403154,
      "grad_norm": 0.6334871053695679,
      "learning_rate": 8.683144112860185e-06,
      "loss": 0.0276,
      "step": 419700
    },
    {
      "epoch": 0.6868809855789687,
      "grad_norm": 0.2595824599266052,
      "learning_rate": 8.683078220646667e-06,
      "loss": 0.0332,
      "step": 419720
    },
    {
      "epoch": 0.6869137160176221,
      "grad_norm": 3.4888997077941895,
      "learning_rate": 8.68301232843315e-06,
      "loss": 0.0314,
      "step": 419740
    },
    {
      "epoch": 0.6869464464562755,
      "grad_norm": 4.403398513793945,
      "learning_rate": 8.682946436219632e-06,
      "loss": 0.0326,
      "step": 419760
    },
    {
      "epoch": 0.6869791768949287,
      "grad_norm": 1.6331788301467896,
      "learning_rate": 8.682880544006116e-06,
      "loss": 0.0247,
      "step": 419780
    },
    {
      "epoch": 0.6870119073335821,
      "grad_norm": 1.710569977760315,
      "learning_rate": 8.682814651792598e-06,
      "loss": 0.0392,
      "step": 419800
    },
    {
      "epoch": 0.6870446377722355,
      "grad_norm": 1.2846336364746094,
      "learning_rate": 8.682748759579081e-06,
      "loss": 0.0334,
      "step": 419820
    },
    {
      "epoch": 0.6870773682108887,
      "grad_norm": 1.3961944580078125,
      "learning_rate": 8.682682867365565e-06,
      "loss": 0.0453,
      "step": 419840
    },
    {
      "epoch": 0.6871100986495421,
      "grad_norm": 8.425220489501953,
      "learning_rate": 8.682616975152047e-06,
      "loss": 0.0367,
      "step": 419860
    },
    {
      "epoch": 0.6871428290881955,
      "grad_norm": 1.0784722566604614,
      "learning_rate": 8.68255108293853e-06,
      "loss": 0.0398,
      "step": 419880
    },
    {
      "epoch": 0.6871755595268488,
      "grad_norm": 1.0404250621795654,
      "learning_rate": 8.682485190725014e-06,
      "loss": 0.0288,
      "step": 419900
    },
    {
      "epoch": 0.6872082899655021,
      "grad_norm": 1.0383580923080444,
      "learning_rate": 8.682419298511496e-06,
      "loss": 0.0309,
      "step": 419920
    },
    {
      "epoch": 0.6872410204041555,
      "grad_norm": 0.4557850658893585,
      "learning_rate": 8.68235340629798e-06,
      "loss": 0.0285,
      "step": 419940
    },
    {
      "epoch": 0.6872737508428088,
      "grad_norm": 1.042156457901001,
      "learning_rate": 8.682287514084461e-06,
      "loss": 0.04,
      "step": 419960
    },
    {
      "epoch": 0.6873064812814621,
      "grad_norm": 0.9966914653778076,
      "learning_rate": 8.682221621870945e-06,
      "loss": 0.0304,
      "step": 419980
    },
    {
      "epoch": 0.6873392117201155,
      "grad_norm": 0.5040397644042969,
      "learning_rate": 8.682155729657427e-06,
      "loss": 0.0303,
      "step": 420000
    },
    {
      "epoch": 0.6873719421587688,
      "grad_norm": 2.0970687866210938,
      "learning_rate": 8.68208983744391e-06,
      "loss": 0.0473,
      "step": 420020
    },
    {
      "epoch": 0.6874046725974221,
      "grad_norm": 2.635246992111206,
      "learning_rate": 8.682023945230394e-06,
      "loss": 0.0359,
      "step": 420040
    },
    {
      "epoch": 0.6874374030360755,
      "grad_norm": 0.6300111413002014,
      "learning_rate": 8.681958053016876e-06,
      "loss": 0.0224,
      "step": 420060
    },
    {
      "epoch": 0.6874701334747289,
      "grad_norm": 0.8147052526473999,
      "learning_rate": 8.68189216080336e-06,
      "loss": 0.0253,
      "step": 420080
    },
    {
      "epoch": 0.6875028639133821,
      "grad_norm": 8.504623413085938,
      "learning_rate": 8.681826268589841e-06,
      "loss": 0.0286,
      "step": 420100
    },
    {
      "epoch": 0.6875355943520355,
      "grad_norm": 1.2600352764129639,
      "learning_rate": 8.681760376376325e-06,
      "loss": 0.0285,
      "step": 420120
    },
    {
      "epoch": 0.6875683247906889,
      "grad_norm": 0.4721945822238922,
      "learning_rate": 8.681694484162807e-06,
      "loss": 0.0306,
      "step": 420140
    },
    {
      "epoch": 0.6876010552293422,
      "grad_norm": 0.5574638843536377,
      "learning_rate": 8.68162859194929e-06,
      "loss": 0.0324,
      "step": 420160
    },
    {
      "epoch": 0.6876337856679955,
      "grad_norm": 2.698511838912964,
      "learning_rate": 8.681562699735772e-06,
      "loss": 0.018,
      "step": 420180
    },
    {
      "epoch": 0.6876665161066489,
      "grad_norm": 0.42878052592277527,
      "learning_rate": 8.681496807522256e-06,
      "loss": 0.0392,
      "step": 420200
    },
    {
      "epoch": 0.6876992465453022,
      "grad_norm": 0.7116962671279907,
      "learning_rate": 8.681430915308738e-06,
      "loss": 0.0362,
      "step": 420220
    },
    {
      "epoch": 0.6877319769839555,
      "grad_norm": 1.5336638689041138,
      "learning_rate": 8.681365023095221e-06,
      "loss": 0.0322,
      "step": 420240
    },
    {
      "epoch": 0.6877647074226089,
      "grad_norm": 0.5704357624053955,
      "learning_rate": 8.681299130881705e-06,
      "loss": 0.0259,
      "step": 420260
    },
    {
      "epoch": 0.6877974378612622,
      "grad_norm": 1.466739296913147,
      "learning_rate": 8.681233238668187e-06,
      "loss": 0.0412,
      "step": 420280
    },
    {
      "epoch": 0.6878301682999155,
      "grad_norm": 2.3926949501037598,
      "learning_rate": 8.68116734645467e-06,
      "loss": 0.0275,
      "step": 420300
    },
    {
      "epoch": 0.6878628987385689,
      "grad_norm": 0.35974809527397156,
      "learning_rate": 8.681101454241154e-06,
      "loss": 0.0385,
      "step": 420320
    },
    {
      "epoch": 0.6878956291772222,
      "grad_norm": 1.581795573234558,
      "learning_rate": 8.681035562027636e-06,
      "loss": 0.0353,
      "step": 420340
    },
    {
      "epoch": 0.6879283596158756,
      "grad_norm": 1.6770117282867432,
      "learning_rate": 8.68096966981412e-06,
      "loss": 0.035,
      "step": 420360
    },
    {
      "epoch": 0.6879610900545289,
      "grad_norm": 1.690138816833496,
      "learning_rate": 8.680903777600601e-06,
      "loss": 0.0381,
      "step": 420380
    },
    {
      "epoch": 0.6879938204931823,
      "grad_norm": 1.0692201852798462,
      "learning_rate": 8.680837885387085e-06,
      "loss": 0.0431,
      "step": 420400
    },
    {
      "epoch": 0.6880265509318356,
      "grad_norm": 4.042606353759766,
      "learning_rate": 8.680771993173568e-06,
      "loss": 0.0382,
      "step": 420420
    },
    {
      "epoch": 0.6880592813704889,
      "grad_norm": 0.98402339220047,
      "learning_rate": 8.68070610096005e-06,
      "loss": 0.037,
      "step": 420440
    },
    {
      "epoch": 0.6880920118091423,
      "grad_norm": 0.3402688503265381,
      "learning_rate": 8.680640208746534e-06,
      "loss": 0.0273,
      "step": 420460
    },
    {
      "epoch": 0.6881247422477956,
      "grad_norm": 2.230232000350952,
      "learning_rate": 8.680574316533016e-06,
      "loss": 0.031,
      "step": 420480
    },
    {
      "epoch": 0.6881574726864489,
      "grad_norm": 1.497274398803711,
      "learning_rate": 8.6805084243195e-06,
      "loss": 0.0237,
      "step": 420500
    },
    {
      "epoch": 0.6881902031251023,
      "grad_norm": 1.6143314838409424,
      "learning_rate": 8.680442532105981e-06,
      "loss": 0.0348,
      "step": 420520
    },
    {
      "epoch": 0.6882229335637556,
      "grad_norm": 1.3887410163879395,
      "learning_rate": 8.680376639892465e-06,
      "loss": 0.0278,
      "step": 420540
    },
    {
      "epoch": 0.688255664002409,
      "grad_norm": 1.9095262289047241,
      "learning_rate": 8.680310747678947e-06,
      "loss": 0.0378,
      "step": 420560
    },
    {
      "epoch": 0.6882883944410623,
      "grad_norm": 1.2478843927383423,
      "learning_rate": 8.68024485546543e-06,
      "loss": 0.0366,
      "step": 420580
    },
    {
      "epoch": 0.6883211248797156,
      "grad_norm": 0.2507461607456207,
      "learning_rate": 8.680178963251912e-06,
      "loss": 0.0346,
      "step": 420600
    },
    {
      "epoch": 0.688353855318369,
      "grad_norm": 1.2637196779251099,
      "learning_rate": 8.680113071038396e-06,
      "loss": 0.0279,
      "step": 420620
    },
    {
      "epoch": 0.6883865857570223,
      "grad_norm": 0.11152925342321396,
      "learning_rate": 8.68004717882488e-06,
      "loss": 0.0352,
      "step": 420640
    },
    {
      "epoch": 0.6884193161956756,
      "grad_norm": 2.3423328399658203,
      "learning_rate": 8.679981286611361e-06,
      "loss": 0.0411,
      "step": 420660
    },
    {
      "epoch": 0.688452046634329,
      "grad_norm": 2.0597445964813232,
      "learning_rate": 8.679915394397845e-06,
      "loss": 0.0345,
      "step": 420680
    },
    {
      "epoch": 0.6884847770729823,
      "grad_norm": 1.3547968864440918,
      "learning_rate": 8.679849502184328e-06,
      "loss": 0.031,
      "step": 420700
    },
    {
      "epoch": 0.6885175075116357,
      "grad_norm": 0.9503425359725952,
      "learning_rate": 8.67978360997081e-06,
      "loss": 0.035,
      "step": 420720
    },
    {
      "epoch": 0.688550237950289,
      "grad_norm": 2.1740994453430176,
      "learning_rate": 8.679717717757294e-06,
      "loss": 0.0295,
      "step": 420740
    },
    {
      "epoch": 0.6885829683889424,
      "grad_norm": 0.15404385328292847,
      "learning_rate": 8.679651825543778e-06,
      "loss": 0.0364,
      "step": 420760
    },
    {
      "epoch": 0.6886156988275957,
      "grad_norm": 0.8036995530128479,
      "learning_rate": 8.67958593333026e-06,
      "loss": 0.0234,
      "step": 420780
    },
    {
      "epoch": 0.688648429266249,
      "grad_norm": 1.3303290605545044,
      "learning_rate": 8.679520041116743e-06,
      "loss": 0.0313,
      "step": 420800
    },
    {
      "epoch": 0.6886811597049024,
      "grad_norm": 1.0202982425689697,
      "learning_rate": 8.679454148903225e-06,
      "loss": 0.0463,
      "step": 420820
    },
    {
      "epoch": 0.6887138901435557,
      "grad_norm": 0.5515815019607544,
      "learning_rate": 8.679388256689708e-06,
      "loss": 0.0272,
      "step": 420840
    },
    {
      "epoch": 0.688746620582209,
      "grad_norm": 1.1488451957702637,
      "learning_rate": 8.67932236447619e-06,
      "loss": 0.0331,
      "step": 420860
    },
    {
      "epoch": 0.6887793510208624,
      "grad_norm": 0.7516541481018066,
      "learning_rate": 8.679256472262674e-06,
      "loss": 0.0355,
      "step": 420880
    },
    {
      "epoch": 0.6888120814595157,
      "grad_norm": 0.37349212169647217,
      "learning_rate": 8.679190580049156e-06,
      "loss": 0.0267,
      "step": 420900
    },
    {
      "epoch": 0.688844811898169,
      "grad_norm": 0.11255811154842377,
      "learning_rate": 8.67912468783564e-06,
      "loss": 0.0264,
      "step": 420920
    },
    {
      "epoch": 0.6888775423368224,
      "grad_norm": 0.7687464356422424,
      "learning_rate": 8.679058795622121e-06,
      "loss": 0.0275,
      "step": 420940
    },
    {
      "epoch": 0.6889102727754758,
      "grad_norm": 1.300230860710144,
      "learning_rate": 8.678992903408605e-06,
      "loss": 0.0381,
      "step": 420960
    },
    {
      "epoch": 0.688943003214129,
      "grad_norm": 0.9007318019866943,
      "learning_rate": 8.678927011195087e-06,
      "loss": 0.0346,
      "step": 420980
    },
    {
      "epoch": 0.6889757336527824,
      "grad_norm": 1.1476168632507324,
      "learning_rate": 8.67886111898157e-06,
      "loss": 0.0254,
      "step": 421000
    },
    {
      "epoch": 0.6890084640914358,
      "grad_norm": 1.948818564414978,
      "learning_rate": 8.678795226768052e-06,
      "loss": 0.0299,
      "step": 421020
    },
    {
      "epoch": 0.689041194530089,
      "grad_norm": 1.707312822341919,
      "learning_rate": 8.678729334554536e-06,
      "loss": 0.0235,
      "step": 421040
    },
    {
      "epoch": 0.6890739249687424,
      "grad_norm": 0.42579758167266846,
      "learning_rate": 8.67866344234102e-06,
      "loss": 0.0298,
      "step": 421060
    },
    {
      "epoch": 0.6891066554073958,
      "grad_norm": 1.6493184566497803,
      "learning_rate": 8.678597550127501e-06,
      "loss": 0.032,
      "step": 421080
    },
    {
      "epoch": 0.6891393858460491,
      "grad_norm": 0.4638664126396179,
      "learning_rate": 8.678531657913985e-06,
      "loss": 0.0294,
      "step": 421100
    },
    {
      "epoch": 0.6891721162847024,
      "grad_norm": 1.2681139707565308,
      "learning_rate": 8.678465765700469e-06,
      "loss": 0.0325,
      "step": 421120
    },
    {
      "epoch": 0.6892048467233558,
      "grad_norm": 0.9561192393302917,
      "learning_rate": 8.67839987348695e-06,
      "loss": 0.0282,
      "step": 421140
    },
    {
      "epoch": 0.6892375771620092,
      "grad_norm": 1.1555148363113403,
      "learning_rate": 8.678333981273434e-06,
      "loss": 0.0396,
      "step": 421160
    },
    {
      "epoch": 0.6892703076006624,
      "grad_norm": 1.4600412845611572,
      "learning_rate": 8.678268089059918e-06,
      "loss": 0.0324,
      "step": 421180
    },
    {
      "epoch": 0.6893030380393158,
      "grad_norm": 0.8182737827301025,
      "learning_rate": 8.6782021968464e-06,
      "loss": 0.0184,
      "step": 421200
    },
    {
      "epoch": 0.6893357684779692,
      "grad_norm": 0.3793178200721741,
      "learning_rate": 8.678136304632883e-06,
      "loss": 0.0332,
      "step": 421220
    },
    {
      "epoch": 0.6893684989166224,
      "grad_norm": 0.48322394490242004,
      "learning_rate": 8.678070412419365e-06,
      "loss": 0.025,
      "step": 421240
    },
    {
      "epoch": 0.6894012293552758,
      "grad_norm": 0.5753160715103149,
      "learning_rate": 8.678004520205849e-06,
      "loss": 0.0326,
      "step": 421260
    },
    {
      "epoch": 0.6894339597939292,
      "grad_norm": 1.855965495109558,
      "learning_rate": 8.67793862799233e-06,
      "loss": 0.0343,
      "step": 421280
    },
    {
      "epoch": 0.6894666902325824,
      "grad_norm": 0.5008773803710938,
      "learning_rate": 8.677872735778814e-06,
      "loss": 0.0349,
      "step": 421300
    },
    {
      "epoch": 0.6894994206712358,
      "grad_norm": 8.325177192687988,
      "learning_rate": 8.677806843565296e-06,
      "loss": 0.0325,
      "step": 421320
    },
    {
      "epoch": 0.6895321511098892,
      "grad_norm": 0.9513661861419678,
      "learning_rate": 8.67774095135178e-06,
      "loss": 0.0375,
      "step": 421340
    },
    {
      "epoch": 0.6895648815485426,
      "grad_norm": 0.7504033446311951,
      "learning_rate": 8.677675059138261e-06,
      "loss": 0.0309,
      "step": 421360
    },
    {
      "epoch": 0.6895976119871958,
      "grad_norm": 1.963999629020691,
      "learning_rate": 8.677609166924745e-06,
      "loss": 0.0309,
      "step": 421380
    },
    {
      "epoch": 0.6896303424258492,
      "grad_norm": 1.9874581098556519,
      "learning_rate": 8.677543274711227e-06,
      "loss": 0.0331,
      "step": 421400
    },
    {
      "epoch": 0.6896630728645026,
      "grad_norm": 0.25035837292671204,
      "learning_rate": 8.67747738249771e-06,
      "loss": 0.0345,
      "step": 421420
    },
    {
      "epoch": 0.6896958033031558,
      "grad_norm": 1.9610649347305298,
      "learning_rate": 8.677411490284194e-06,
      "loss": 0.0457,
      "step": 421440
    },
    {
      "epoch": 0.6897285337418092,
      "grad_norm": 2.633268356323242,
      "learning_rate": 8.677345598070676e-06,
      "loss": 0.0408,
      "step": 421460
    },
    {
      "epoch": 0.6897612641804626,
      "grad_norm": 0.978003740310669,
      "learning_rate": 8.67727970585716e-06,
      "loss": 0.0361,
      "step": 421480
    },
    {
      "epoch": 0.6897939946191158,
      "grad_norm": 1.6505218744277954,
      "learning_rate": 8.677213813643643e-06,
      "loss": 0.0338,
      "step": 421500
    },
    {
      "epoch": 0.6898267250577692,
      "grad_norm": 0.40176090598106384,
      "learning_rate": 8.677147921430125e-06,
      "loss": 0.034,
      "step": 421520
    },
    {
      "epoch": 0.6898594554964226,
      "grad_norm": 2.4841806888580322,
      "learning_rate": 8.677082029216609e-06,
      "loss": 0.0364,
      "step": 421540
    },
    {
      "epoch": 0.689892185935076,
      "grad_norm": 0.5807898044586182,
      "learning_rate": 8.677016137003092e-06,
      "loss": 0.0348,
      "step": 421560
    },
    {
      "epoch": 0.6899249163737292,
      "grad_norm": 0.9099138379096985,
      "learning_rate": 8.676950244789574e-06,
      "loss": 0.0271,
      "step": 421580
    },
    {
      "epoch": 0.6899576468123826,
      "grad_norm": 1.151721715927124,
      "learning_rate": 8.676884352576058e-06,
      "loss": 0.0366,
      "step": 421600
    },
    {
      "epoch": 0.689990377251036,
      "grad_norm": 0.44485965371131897,
      "learning_rate": 8.67681846036254e-06,
      "loss": 0.0393,
      "step": 421620
    },
    {
      "epoch": 0.6900231076896892,
      "grad_norm": 2.0910656452178955,
      "learning_rate": 8.676752568149023e-06,
      "loss": 0.0348,
      "step": 421640
    },
    {
      "epoch": 0.6900558381283426,
      "grad_norm": 1.9557216167449951,
      "learning_rate": 8.676686675935505e-06,
      "loss": 0.0291,
      "step": 421660
    },
    {
      "epoch": 0.690088568566996,
      "grad_norm": 0.7563018798828125,
      "learning_rate": 8.676620783721989e-06,
      "loss": 0.0307,
      "step": 421680
    },
    {
      "epoch": 0.6901212990056492,
      "grad_norm": 0.5138590335845947,
      "learning_rate": 8.67655489150847e-06,
      "loss": 0.0333,
      "step": 421700
    },
    {
      "epoch": 0.6901540294443026,
      "grad_norm": 1.7130545377731323,
      "learning_rate": 8.676488999294954e-06,
      "loss": 0.0316,
      "step": 421720
    },
    {
      "epoch": 0.690186759882956,
      "grad_norm": 0.4718543291091919,
      "learning_rate": 8.676423107081436e-06,
      "loss": 0.035,
      "step": 421740
    },
    {
      "epoch": 0.6902194903216093,
      "grad_norm": 1.2549539804458618,
      "learning_rate": 8.67635721486792e-06,
      "loss": 0.0398,
      "step": 421760
    },
    {
      "epoch": 0.6902522207602626,
      "grad_norm": 0.6405599117279053,
      "learning_rate": 8.676291322654401e-06,
      "loss": 0.0226,
      "step": 421780
    },
    {
      "epoch": 0.690284951198916,
      "grad_norm": 0.4287284314632416,
      "learning_rate": 8.676225430440885e-06,
      "loss": 0.0326,
      "step": 421800
    },
    {
      "epoch": 0.6903176816375693,
      "grad_norm": 1.0271788835525513,
      "learning_rate": 8.676159538227369e-06,
      "loss": 0.0449,
      "step": 421820
    },
    {
      "epoch": 0.6903504120762226,
      "grad_norm": 0.9238430261611938,
      "learning_rate": 8.67609364601385e-06,
      "loss": 0.0373,
      "step": 421840
    },
    {
      "epoch": 0.690383142514876,
      "grad_norm": 0.9942391514778137,
      "learning_rate": 8.676027753800334e-06,
      "loss": 0.0275,
      "step": 421860
    },
    {
      "epoch": 0.6904158729535294,
      "grad_norm": 0.948409378528595,
      "learning_rate": 8.675961861586816e-06,
      "loss": 0.0291,
      "step": 421880
    },
    {
      "epoch": 0.6904486033921826,
      "grad_norm": 0.6923201680183411,
      "learning_rate": 8.6758959693733e-06,
      "loss": 0.0402,
      "step": 421900
    },
    {
      "epoch": 0.690481333830836,
      "grad_norm": 1.1045889854431152,
      "learning_rate": 8.675830077159783e-06,
      "loss": 0.0302,
      "step": 421920
    },
    {
      "epoch": 0.6905140642694894,
      "grad_norm": 1.0455068349838257,
      "learning_rate": 8.675764184946267e-06,
      "loss": 0.0448,
      "step": 421940
    },
    {
      "epoch": 0.6905467947081427,
      "grad_norm": 1.2461568117141724,
      "learning_rate": 8.675698292732749e-06,
      "loss": 0.0396,
      "step": 421960
    },
    {
      "epoch": 0.690579525146796,
      "grad_norm": 1.5414276123046875,
      "learning_rate": 8.675632400519232e-06,
      "loss": 0.0264,
      "step": 421980
    },
    {
      "epoch": 0.6906122555854494,
      "grad_norm": 0.3266158401966095,
      "learning_rate": 8.675566508305714e-06,
      "loss": 0.0291,
      "step": 422000
    },
    {
      "epoch": 0.6906449860241027,
      "grad_norm": 1.328359603881836,
      "learning_rate": 8.675500616092198e-06,
      "loss": 0.0265,
      "step": 422020
    },
    {
      "epoch": 0.690677716462756,
      "grad_norm": 0.7220537066459656,
      "learning_rate": 8.67543472387868e-06,
      "loss": 0.0233,
      "step": 422040
    },
    {
      "epoch": 0.6907104469014094,
      "grad_norm": 1.5914268493652344,
      "learning_rate": 8.675368831665163e-06,
      "loss": 0.0307,
      "step": 422060
    },
    {
      "epoch": 0.6907431773400627,
      "grad_norm": 0.5495244264602661,
      "learning_rate": 8.675302939451645e-06,
      "loss": 0.0396,
      "step": 422080
    },
    {
      "epoch": 0.690775907778716,
      "grad_norm": 1.596434235572815,
      "learning_rate": 8.675237047238129e-06,
      "loss": 0.0381,
      "step": 422100
    },
    {
      "epoch": 0.6908086382173694,
      "grad_norm": 0.7381250858306885,
      "learning_rate": 8.67517115502461e-06,
      "loss": 0.0231,
      "step": 422120
    },
    {
      "epoch": 0.6908413686560227,
      "grad_norm": 1.6024162769317627,
      "learning_rate": 8.675105262811094e-06,
      "loss": 0.0331,
      "step": 422140
    },
    {
      "epoch": 0.6908740990946761,
      "grad_norm": 2.2430062294006348,
      "learning_rate": 8.675039370597578e-06,
      "loss": 0.03,
      "step": 422160
    },
    {
      "epoch": 0.6909068295333294,
      "grad_norm": 0.7918822765350342,
      "learning_rate": 8.67497347838406e-06,
      "loss": 0.0299,
      "step": 422180
    },
    {
      "epoch": 0.6909395599719828,
      "grad_norm": 3.064979076385498,
      "learning_rate": 8.674907586170543e-06,
      "loss": 0.0368,
      "step": 422200
    },
    {
      "epoch": 0.6909722904106361,
      "grad_norm": 1.5359821319580078,
      "learning_rate": 8.674841693957025e-06,
      "loss": 0.0323,
      "step": 422220
    },
    {
      "epoch": 0.6910050208492894,
      "grad_norm": 0.6416917443275452,
      "learning_rate": 8.674775801743509e-06,
      "loss": 0.0337,
      "step": 422240
    },
    {
      "epoch": 0.6910377512879428,
      "grad_norm": 0.7116718888282776,
      "learning_rate": 8.67470990952999e-06,
      "loss": 0.0387,
      "step": 422260
    },
    {
      "epoch": 0.6910704817265961,
      "grad_norm": 3.97231388092041,
      "learning_rate": 8.674644017316474e-06,
      "loss": 0.0353,
      "step": 422280
    },
    {
      "epoch": 0.6911032121652494,
      "grad_norm": 2.683284282684326,
      "learning_rate": 8.674578125102958e-06,
      "loss": 0.0273,
      "step": 422300
    },
    {
      "epoch": 0.6911359426039028,
      "grad_norm": 1.7813832759857178,
      "learning_rate": 8.67451223288944e-06,
      "loss": 0.028,
      "step": 422320
    },
    {
      "epoch": 0.6911686730425561,
      "grad_norm": 0.9207803606987,
      "learning_rate": 8.674446340675923e-06,
      "loss": 0.0353,
      "step": 422340
    },
    {
      "epoch": 0.6912014034812095,
      "grad_norm": 1.55842125415802,
      "learning_rate": 8.674380448462407e-06,
      "loss": 0.0373,
      "step": 422360
    },
    {
      "epoch": 0.6912341339198628,
      "grad_norm": 5.181665420532227,
      "learning_rate": 8.674314556248889e-06,
      "loss": 0.0353,
      "step": 422380
    },
    {
      "epoch": 0.6912668643585161,
      "grad_norm": 0.8952465653419495,
      "learning_rate": 8.674248664035372e-06,
      "loss": 0.0242,
      "step": 422400
    },
    {
      "epoch": 0.6912995947971695,
      "grad_norm": 1.2658272981643677,
      "learning_rate": 8.674182771821854e-06,
      "loss": 0.0324,
      "step": 422420
    },
    {
      "epoch": 0.6913323252358228,
      "grad_norm": 4.2442522048950195,
      "learning_rate": 8.674116879608338e-06,
      "loss": 0.0244,
      "step": 422440
    },
    {
      "epoch": 0.6913650556744761,
      "grad_norm": 1.7664399147033691,
      "learning_rate": 8.67405098739482e-06,
      "loss": 0.0368,
      "step": 422460
    },
    {
      "epoch": 0.6913977861131295,
      "grad_norm": 0.8878927826881409,
      "learning_rate": 8.673985095181303e-06,
      "loss": 0.03,
      "step": 422480
    },
    {
      "epoch": 0.6914305165517828,
      "grad_norm": 1.1514067649841309,
      "learning_rate": 8.673919202967787e-06,
      "loss": 0.0526,
      "step": 422500
    },
    {
      "epoch": 0.6914632469904362,
      "grad_norm": 0.4300350546836853,
      "learning_rate": 8.673853310754269e-06,
      "loss": 0.0332,
      "step": 422520
    },
    {
      "epoch": 0.6914959774290895,
      "grad_norm": 2.8949427604675293,
      "learning_rate": 8.673787418540752e-06,
      "loss": 0.0306,
      "step": 422540
    },
    {
      "epoch": 0.6915287078677429,
      "grad_norm": 0.2661014497280121,
      "learning_rate": 8.673721526327234e-06,
      "loss": 0.0269,
      "step": 422560
    },
    {
      "epoch": 0.6915614383063962,
      "grad_norm": 0.3270314633846283,
      "learning_rate": 8.673655634113718e-06,
      "loss": 0.0247,
      "step": 422580
    },
    {
      "epoch": 0.6915941687450495,
      "grad_norm": 0.505285382270813,
      "learning_rate": 8.6735897419002e-06,
      "loss": 0.0292,
      "step": 422600
    },
    {
      "epoch": 0.6916268991837029,
      "grad_norm": 1.346734881401062,
      "learning_rate": 8.673523849686683e-06,
      "loss": 0.0202,
      "step": 422620
    },
    {
      "epoch": 0.6916596296223562,
      "grad_norm": 1.3459482192993164,
      "learning_rate": 8.673457957473165e-06,
      "loss": 0.0377,
      "step": 422640
    },
    {
      "epoch": 0.6916923600610095,
      "grad_norm": 0.24400246143341064,
      "learning_rate": 8.673392065259649e-06,
      "loss": 0.0316,
      "step": 422660
    },
    {
      "epoch": 0.6917250904996629,
      "grad_norm": 0.42536723613739014,
      "learning_rate": 8.673326173046132e-06,
      "loss": 0.0244,
      "step": 422680
    },
    {
      "epoch": 0.6917578209383162,
      "grad_norm": 0.8063603639602661,
      "learning_rate": 8.673260280832614e-06,
      "loss": 0.0204,
      "step": 422700
    },
    {
      "epoch": 0.6917905513769695,
      "grad_norm": 0.7436856031417847,
      "learning_rate": 8.673194388619098e-06,
      "loss": 0.0263,
      "step": 422720
    },
    {
      "epoch": 0.6918232818156229,
      "grad_norm": 0.9979972839355469,
      "learning_rate": 8.673128496405581e-06,
      "loss": 0.0435,
      "step": 422740
    },
    {
      "epoch": 0.6918560122542763,
      "grad_norm": 0.4506305754184723,
      "learning_rate": 8.673062604192063e-06,
      "loss": 0.0264,
      "step": 422760
    },
    {
      "epoch": 0.6918887426929295,
      "grad_norm": 0.41303756833076477,
      "learning_rate": 8.672996711978547e-06,
      "loss": 0.0392,
      "step": 422780
    },
    {
      "epoch": 0.6919214731315829,
      "grad_norm": 0.747027575969696,
      "learning_rate": 8.672930819765029e-06,
      "loss": 0.0276,
      "step": 422800
    },
    {
      "epoch": 0.6919542035702363,
      "grad_norm": 0.4673912227153778,
      "learning_rate": 8.672864927551512e-06,
      "loss": 0.0293,
      "step": 422820
    },
    {
      "epoch": 0.6919869340088896,
      "grad_norm": 2.3113486766815186,
      "learning_rate": 8.672799035337994e-06,
      "loss": 0.0244,
      "step": 422840
    },
    {
      "epoch": 0.6920196644475429,
      "grad_norm": 0.6202114820480347,
      "learning_rate": 8.672733143124478e-06,
      "loss": 0.0334,
      "step": 422860
    },
    {
      "epoch": 0.6920523948861963,
      "grad_norm": 0.5760636329650879,
      "learning_rate": 8.672667250910961e-06,
      "loss": 0.0395,
      "step": 422880
    },
    {
      "epoch": 0.6920851253248496,
      "grad_norm": 2.735464572906494,
      "learning_rate": 8.672601358697443e-06,
      "loss": 0.0309,
      "step": 422900
    },
    {
      "epoch": 0.6921178557635029,
      "grad_norm": 1.5403355360031128,
      "learning_rate": 8.672535466483927e-06,
      "loss": 0.0398,
      "step": 422920
    },
    {
      "epoch": 0.6921505862021563,
      "grad_norm": 0.5287023782730103,
      "learning_rate": 8.672469574270409e-06,
      "loss": 0.0336,
      "step": 422940
    },
    {
      "epoch": 0.6921833166408096,
      "grad_norm": 0.5290122032165527,
      "learning_rate": 8.672403682056892e-06,
      "loss": 0.0407,
      "step": 422960
    },
    {
      "epoch": 0.6922160470794629,
      "grad_norm": 0.7158542275428772,
      "learning_rate": 8.672337789843374e-06,
      "loss": 0.036,
      "step": 422980
    },
    {
      "epoch": 0.6922487775181163,
      "grad_norm": 0.8184232711791992,
      "learning_rate": 8.672271897629858e-06,
      "loss": 0.026,
      "step": 423000
    },
    {
      "epoch": 0.6922815079567697,
      "grad_norm": 1.7208009958267212,
      "learning_rate": 8.67220600541634e-06,
      "loss": 0.0338,
      "step": 423020
    },
    {
      "epoch": 0.6923142383954229,
      "grad_norm": 1.493243932723999,
      "learning_rate": 8.672140113202823e-06,
      "loss": 0.0371,
      "step": 423040
    },
    {
      "epoch": 0.6923469688340763,
      "grad_norm": 0.5192054510116577,
      "learning_rate": 8.672074220989305e-06,
      "loss": 0.0297,
      "step": 423060
    },
    {
      "epoch": 0.6923796992727297,
      "grad_norm": 1.6394850015640259,
      "learning_rate": 8.672008328775789e-06,
      "loss": 0.0268,
      "step": 423080
    },
    {
      "epoch": 0.692412429711383,
      "grad_norm": 0.8886535167694092,
      "learning_rate": 8.671942436562272e-06,
      "loss": 0.0211,
      "step": 423100
    },
    {
      "epoch": 0.6924451601500363,
      "grad_norm": 0.2870837450027466,
      "learning_rate": 8.671876544348754e-06,
      "loss": 0.0338,
      "step": 423120
    },
    {
      "epoch": 0.6924778905886897,
      "grad_norm": 1.7359340190887451,
      "learning_rate": 8.671810652135238e-06,
      "loss": 0.0403,
      "step": 423140
    },
    {
      "epoch": 0.692510621027343,
      "grad_norm": 1.397255301475525,
      "learning_rate": 8.671744759921722e-06,
      "loss": 0.0318,
      "step": 423160
    },
    {
      "epoch": 0.6925433514659963,
      "grad_norm": 0.8218896389007568,
      "learning_rate": 8.671678867708203e-06,
      "loss": 0.0275,
      "step": 423180
    },
    {
      "epoch": 0.6925760819046497,
      "grad_norm": 0.8174623847007751,
      "learning_rate": 8.671612975494687e-06,
      "loss": 0.0327,
      "step": 423200
    },
    {
      "epoch": 0.6926088123433031,
      "grad_norm": 2.310523271560669,
      "learning_rate": 8.67154708328117e-06,
      "loss": 0.0447,
      "step": 423220
    },
    {
      "epoch": 0.6926415427819563,
      "grad_norm": 0.5400621891021729,
      "learning_rate": 8.671481191067652e-06,
      "loss": 0.0304,
      "step": 423240
    },
    {
      "epoch": 0.6926742732206097,
      "grad_norm": 0.7085237503051758,
      "learning_rate": 8.671415298854136e-06,
      "loss": 0.0268,
      "step": 423260
    },
    {
      "epoch": 0.6927070036592631,
      "grad_norm": 1.3377288579940796,
      "learning_rate": 8.671349406640618e-06,
      "loss": 0.0295,
      "step": 423280
    },
    {
      "epoch": 0.6927397340979163,
      "grad_norm": 0.5697312355041504,
      "learning_rate": 8.671283514427102e-06,
      "loss": 0.0306,
      "step": 423300
    },
    {
      "epoch": 0.6927724645365697,
      "grad_norm": 0.5915865898132324,
      "learning_rate": 8.671217622213583e-06,
      "loss": 0.0283,
      "step": 423320
    },
    {
      "epoch": 0.6928051949752231,
      "grad_norm": 3.455430746078491,
      "learning_rate": 8.671151730000067e-06,
      "loss": 0.0353,
      "step": 423340
    },
    {
      "epoch": 0.6928379254138763,
      "grad_norm": 1.2637066841125488,
      "learning_rate": 8.671085837786549e-06,
      "loss": 0.0281,
      "step": 423360
    },
    {
      "epoch": 0.6928706558525297,
      "grad_norm": 2.2028584480285645,
      "learning_rate": 8.671019945573033e-06,
      "loss": 0.042,
      "step": 423380
    },
    {
      "epoch": 0.6929033862911831,
      "grad_norm": 0.7454102039337158,
      "learning_rate": 8.670954053359514e-06,
      "loss": 0.0443,
      "step": 423400
    },
    {
      "epoch": 0.6929361167298365,
      "grad_norm": 0.954805850982666,
      "learning_rate": 8.670888161145998e-06,
      "loss": 0.0403,
      "step": 423420
    },
    {
      "epoch": 0.6929688471684897,
      "grad_norm": 0.9195001125335693,
      "learning_rate": 8.67082226893248e-06,
      "loss": 0.0249,
      "step": 423440
    },
    {
      "epoch": 0.6930015776071431,
      "grad_norm": 0.8498545289039612,
      "learning_rate": 8.670756376718963e-06,
      "loss": 0.0275,
      "step": 423460
    },
    {
      "epoch": 0.6930343080457965,
      "grad_norm": 0.4812757968902588,
      "learning_rate": 8.670690484505447e-06,
      "loss": 0.0402,
      "step": 423480
    },
    {
      "epoch": 0.6930670384844497,
      "grad_norm": 0.34797877073287964,
      "learning_rate": 8.670624592291929e-06,
      "loss": 0.0301,
      "step": 423500
    },
    {
      "epoch": 0.6930997689231031,
      "grad_norm": 1.61684250831604,
      "learning_rate": 8.670558700078413e-06,
      "loss": 0.0219,
      "step": 423520
    },
    {
      "epoch": 0.6931324993617565,
      "grad_norm": 2.1485095024108887,
      "learning_rate": 8.670492807864896e-06,
      "loss": 0.0273,
      "step": 423540
    },
    {
      "epoch": 0.6931652298004097,
      "grad_norm": 0.6607927680015564,
      "learning_rate": 8.670426915651378e-06,
      "loss": 0.0256,
      "step": 423560
    },
    {
      "epoch": 0.6931979602390631,
      "grad_norm": 0.7472392916679382,
      "learning_rate": 8.670361023437862e-06,
      "loss": 0.0313,
      "step": 423580
    },
    {
      "epoch": 0.6932306906777165,
      "grad_norm": 0.9502196907997131,
      "learning_rate": 8.670295131224345e-06,
      "loss": 0.0266,
      "step": 423600
    },
    {
      "epoch": 0.6932634211163698,
      "grad_norm": 3.938218116760254,
      "learning_rate": 8.670229239010827e-06,
      "loss": 0.0273,
      "step": 423620
    },
    {
      "epoch": 0.6932961515550231,
      "grad_norm": 0.8348473310470581,
      "learning_rate": 8.67016334679731e-06,
      "loss": 0.0286,
      "step": 423640
    },
    {
      "epoch": 0.6933288819936765,
      "grad_norm": 0.432628333568573,
      "learning_rate": 8.670097454583793e-06,
      "loss": 0.0294,
      "step": 423660
    },
    {
      "epoch": 0.6933616124323299,
      "grad_norm": 0.8065987825393677,
      "learning_rate": 8.670031562370276e-06,
      "loss": 0.0326,
      "step": 423680
    },
    {
      "epoch": 0.6933943428709831,
      "grad_norm": 1.867506742477417,
      "learning_rate": 8.669965670156758e-06,
      "loss": 0.0517,
      "step": 423700
    },
    {
      "epoch": 0.6934270733096365,
      "grad_norm": 0.9086737632751465,
      "learning_rate": 8.669899777943242e-06,
      "loss": 0.0339,
      "step": 423720
    },
    {
      "epoch": 0.6934598037482899,
      "grad_norm": 0.4346995949745178,
      "learning_rate": 8.669833885729724e-06,
      "loss": 0.04,
      "step": 423740
    },
    {
      "epoch": 0.6934925341869431,
      "grad_norm": 1.7906370162963867,
      "learning_rate": 8.669767993516207e-06,
      "loss": 0.0308,
      "step": 423760
    },
    {
      "epoch": 0.6935252646255965,
      "grad_norm": 1.4867644309997559,
      "learning_rate": 8.669702101302689e-06,
      "loss": 0.0503,
      "step": 423780
    },
    {
      "epoch": 0.6935579950642499,
      "grad_norm": 1.1574678421020508,
      "learning_rate": 8.669636209089173e-06,
      "loss": 0.0291,
      "step": 423800
    },
    {
      "epoch": 0.6935907255029032,
      "grad_norm": 0.8380195498466492,
      "learning_rate": 8.669570316875654e-06,
      "loss": 0.029,
      "step": 423820
    },
    {
      "epoch": 0.6936234559415565,
      "grad_norm": 2.347759962081909,
      "learning_rate": 8.669504424662138e-06,
      "loss": 0.0282,
      "step": 423840
    },
    {
      "epoch": 0.6936561863802099,
      "grad_norm": 1.47175133228302,
      "learning_rate": 8.66943853244862e-06,
      "loss": 0.0333,
      "step": 423860
    },
    {
      "epoch": 0.6936889168188632,
      "grad_norm": 0.5289260149002075,
      "learning_rate": 8.669372640235104e-06,
      "loss": 0.0328,
      "step": 423880
    },
    {
      "epoch": 0.6937216472575165,
      "grad_norm": 0.7863408327102661,
      "learning_rate": 8.669306748021587e-06,
      "loss": 0.0444,
      "step": 423900
    },
    {
      "epoch": 0.6937543776961699,
      "grad_norm": 1.3035686016082764,
      "learning_rate": 8.669240855808069e-06,
      "loss": 0.0349,
      "step": 423920
    },
    {
      "epoch": 0.6937871081348232,
      "grad_norm": 0.7177771925926208,
      "learning_rate": 8.669174963594553e-06,
      "loss": 0.0335,
      "step": 423940
    },
    {
      "epoch": 0.6938198385734765,
      "grad_norm": 2.757472038269043,
      "learning_rate": 8.669109071381036e-06,
      "loss": 0.0396,
      "step": 423960
    },
    {
      "epoch": 0.6938525690121299,
      "grad_norm": 0.9632360339164734,
      "learning_rate": 8.669043179167518e-06,
      "loss": 0.0241,
      "step": 423980
    },
    {
      "epoch": 0.6938852994507833,
      "grad_norm": 1.037366509437561,
      "learning_rate": 8.668977286954002e-06,
      "loss": 0.042,
      "step": 424000
    },
    {
      "epoch": 0.6939180298894366,
      "grad_norm": 1.2059885263442993,
      "learning_rate": 8.668911394740485e-06,
      "loss": 0.0352,
      "step": 424020
    },
    {
      "epoch": 0.6939507603280899,
      "grad_norm": 1.4869139194488525,
      "learning_rate": 8.668845502526967e-06,
      "loss": 0.0311,
      "step": 424040
    },
    {
      "epoch": 0.6939834907667433,
      "grad_norm": 0.7007443904876709,
      "learning_rate": 8.66877961031345e-06,
      "loss": 0.0284,
      "step": 424060
    },
    {
      "epoch": 0.6940162212053966,
      "grad_norm": 0.5174130797386169,
      "learning_rate": 8.668713718099933e-06,
      "loss": 0.0378,
      "step": 424080
    },
    {
      "epoch": 0.6940489516440499,
      "grad_norm": 1.1785943508148193,
      "learning_rate": 8.668647825886416e-06,
      "loss": 0.0325,
      "step": 424100
    },
    {
      "epoch": 0.6940816820827033,
      "grad_norm": 0.6640558242797852,
      "learning_rate": 8.668581933672898e-06,
      "loss": 0.0389,
      "step": 424120
    },
    {
      "epoch": 0.6941144125213566,
      "grad_norm": 0.34384483098983765,
      "learning_rate": 8.668516041459382e-06,
      "loss": 0.0243,
      "step": 424140
    },
    {
      "epoch": 0.6941471429600099,
      "grad_norm": 1.8176101446151733,
      "learning_rate": 8.668450149245864e-06,
      "loss": 0.0278,
      "step": 424160
    },
    {
      "epoch": 0.6941798733986633,
      "grad_norm": 1.68511962890625,
      "learning_rate": 8.668384257032347e-06,
      "loss": 0.034,
      "step": 424180
    },
    {
      "epoch": 0.6942126038373166,
      "grad_norm": 0.5014349818229675,
      "learning_rate": 8.668318364818829e-06,
      "loss": 0.0319,
      "step": 424200
    },
    {
      "epoch": 0.69424533427597,
      "grad_norm": 1.1992923021316528,
      "learning_rate": 8.668252472605313e-06,
      "loss": 0.037,
      "step": 424220
    },
    {
      "epoch": 0.6942780647146233,
      "grad_norm": 2.6218087673187256,
      "learning_rate": 8.668186580391795e-06,
      "loss": 0.0301,
      "step": 424240
    },
    {
      "epoch": 0.6943107951532766,
      "grad_norm": 3.2625014781951904,
      "learning_rate": 8.668120688178278e-06,
      "loss": 0.0342,
      "step": 424260
    },
    {
      "epoch": 0.69434352559193,
      "grad_norm": 0.5369636416435242,
      "learning_rate": 8.668054795964762e-06,
      "loss": 0.0354,
      "step": 424280
    },
    {
      "epoch": 0.6943762560305833,
      "grad_norm": 0.9718867540359497,
      "learning_rate": 8.667988903751244e-06,
      "loss": 0.0376,
      "step": 424300
    },
    {
      "epoch": 0.6944089864692367,
      "grad_norm": 1.7342160940170288,
      "learning_rate": 8.667923011537727e-06,
      "loss": 0.0323,
      "step": 424320
    },
    {
      "epoch": 0.69444171690789,
      "grad_norm": 32.91354751586914,
      "learning_rate": 8.66785711932421e-06,
      "loss": 0.0344,
      "step": 424340
    },
    {
      "epoch": 0.6944744473465433,
      "grad_norm": 0.29861634969711304,
      "learning_rate": 8.667791227110693e-06,
      "loss": 0.0349,
      "step": 424360
    },
    {
      "epoch": 0.6945071777851967,
      "grad_norm": 0.7838751077651978,
      "learning_rate": 8.667725334897176e-06,
      "loss": 0.0326,
      "step": 424380
    },
    {
      "epoch": 0.69453990822385,
      "grad_norm": 1.1825124025344849,
      "learning_rate": 8.66765944268366e-06,
      "loss": 0.0392,
      "step": 424400
    },
    {
      "epoch": 0.6945726386625034,
      "grad_norm": 1.0961281061172485,
      "learning_rate": 8.667593550470142e-06,
      "loss": 0.0325,
      "step": 424420
    },
    {
      "epoch": 0.6946053691011567,
      "grad_norm": 1.6510123014450073,
      "learning_rate": 8.667527658256625e-06,
      "loss": 0.0291,
      "step": 424440
    },
    {
      "epoch": 0.69463809953981,
      "grad_norm": 0.730486273765564,
      "learning_rate": 8.667461766043107e-06,
      "loss": 0.0275,
      "step": 424460
    },
    {
      "epoch": 0.6946708299784634,
      "grad_norm": 0.437002032995224,
      "learning_rate": 8.66739587382959e-06,
      "loss": 0.0281,
      "step": 424480
    },
    {
      "epoch": 0.6947035604171167,
      "grad_norm": 0.7903742790222168,
      "learning_rate": 8.667329981616073e-06,
      "loss": 0.0372,
      "step": 424500
    },
    {
      "epoch": 0.69473629085577,
      "grad_norm": 1.895159125328064,
      "learning_rate": 8.667264089402556e-06,
      "loss": 0.0307,
      "step": 424520
    },
    {
      "epoch": 0.6947690212944234,
      "grad_norm": 1.800773024559021,
      "learning_rate": 8.667198197189038e-06,
      "loss": 0.0362,
      "step": 424540
    },
    {
      "epoch": 0.6948017517330767,
      "grad_norm": 0.9851065278053284,
      "learning_rate": 8.667132304975522e-06,
      "loss": 0.0391,
      "step": 424560
    },
    {
      "epoch": 0.69483448217173,
      "grad_norm": 1.7679108381271362,
      "learning_rate": 8.667066412762004e-06,
      "loss": 0.0247,
      "step": 424580
    },
    {
      "epoch": 0.6948672126103834,
      "grad_norm": 0.6977109313011169,
      "learning_rate": 8.667000520548487e-06,
      "loss": 0.0344,
      "step": 424600
    },
    {
      "epoch": 0.6948999430490368,
      "grad_norm": 1.162540078163147,
      "learning_rate": 8.66693462833497e-06,
      "loss": 0.024,
      "step": 424620
    },
    {
      "epoch": 0.6949326734876901,
      "grad_norm": 1.5251479148864746,
      "learning_rate": 8.666868736121453e-06,
      "loss": 0.0331,
      "step": 424640
    },
    {
      "epoch": 0.6949654039263434,
      "grad_norm": 1.5245676040649414,
      "learning_rate": 8.666802843907936e-06,
      "loss": 0.0472,
      "step": 424660
    },
    {
      "epoch": 0.6949981343649968,
      "grad_norm": 0.8687493205070496,
      "learning_rate": 8.666736951694418e-06,
      "loss": 0.026,
      "step": 424680
    },
    {
      "epoch": 0.6950308648036501,
      "grad_norm": 2.876797914505005,
      "learning_rate": 8.666671059480902e-06,
      "loss": 0.0419,
      "step": 424700
    },
    {
      "epoch": 0.6950635952423034,
      "grad_norm": 0.3804604709148407,
      "learning_rate": 8.666605167267385e-06,
      "loss": 0.0414,
      "step": 424720
    },
    {
      "epoch": 0.6950963256809568,
      "grad_norm": 0.3871135115623474,
      "learning_rate": 8.666539275053867e-06,
      "loss": 0.0285,
      "step": 424740
    },
    {
      "epoch": 0.6951290561196101,
      "grad_norm": 1.188808560371399,
      "learning_rate": 8.666473382840351e-06,
      "loss": 0.0283,
      "step": 424760
    },
    {
      "epoch": 0.6951617865582634,
      "grad_norm": 1.015483021736145,
      "learning_rate": 8.666407490626834e-06,
      "loss": 0.0387,
      "step": 424780
    },
    {
      "epoch": 0.6951945169969168,
      "grad_norm": 1.6997296810150146,
      "learning_rate": 8.666341598413316e-06,
      "loss": 0.0269,
      "step": 424800
    },
    {
      "epoch": 0.6952272474355702,
      "grad_norm": 2.006690502166748,
      "learning_rate": 8.6662757061998e-06,
      "loss": 0.0385,
      "step": 424820
    },
    {
      "epoch": 0.6952599778742234,
      "grad_norm": 1.0294430255889893,
      "learning_rate": 8.666209813986282e-06,
      "loss": 0.0334,
      "step": 424840
    },
    {
      "epoch": 0.6952927083128768,
      "grad_norm": 0.28643518686294556,
      "learning_rate": 8.666143921772765e-06,
      "loss": 0.0284,
      "step": 424860
    },
    {
      "epoch": 0.6953254387515302,
      "grad_norm": 0.8096796274185181,
      "learning_rate": 8.666078029559247e-06,
      "loss": 0.0349,
      "step": 424880
    },
    {
      "epoch": 0.6953581691901835,
      "grad_norm": 0.6795557737350464,
      "learning_rate": 8.666012137345731e-06,
      "loss": 0.0278,
      "step": 424900
    },
    {
      "epoch": 0.6953908996288368,
      "grad_norm": 1.4385154247283936,
      "learning_rate": 8.665946245132213e-06,
      "loss": 0.0293,
      "step": 424920
    },
    {
      "epoch": 0.6954236300674902,
      "grad_norm": 0.6534225344657898,
      "learning_rate": 8.665880352918696e-06,
      "loss": 0.0337,
      "step": 424940
    },
    {
      "epoch": 0.6954563605061435,
      "grad_norm": 0.7862085103988647,
      "learning_rate": 8.66581446070518e-06,
      "loss": 0.032,
      "step": 424960
    },
    {
      "epoch": 0.6954890909447968,
      "grad_norm": 0.9831777215003967,
      "learning_rate": 8.665748568491662e-06,
      "loss": 0.0319,
      "step": 424980
    },
    {
      "epoch": 0.6955218213834502,
      "grad_norm": 1.7039064168930054,
      "learning_rate": 8.665682676278145e-06,
      "loss": 0.031,
      "step": 425000
    },
    {
      "epoch": 0.6955545518221036,
      "grad_norm": 0.9373913407325745,
      "learning_rate": 8.665616784064627e-06,
      "loss": 0.0369,
      "step": 425020
    },
    {
      "epoch": 0.6955872822607568,
      "grad_norm": 0.5824229121208191,
      "learning_rate": 8.665550891851111e-06,
      "loss": 0.0312,
      "step": 425040
    },
    {
      "epoch": 0.6956200126994102,
      "grad_norm": 1.1781344413757324,
      "learning_rate": 8.665484999637593e-06,
      "loss": 0.0337,
      "step": 425060
    },
    {
      "epoch": 0.6956527431380636,
      "grad_norm": 1.0546880960464478,
      "learning_rate": 8.665419107424076e-06,
      "loss": 0.036,
      "step": 425080
    },
    {
      "epoch": 0.6956854735767168,
      "grad_norm": 0.8239922523498535,
      "learning_rate": 8.665353215210558e-06,
      "loss": 0.0395,
      "step": 425100
    },
    {
      "epoch": 0.6957182040153702,
      "grad_norm": 0.8573424220085144,
      "learning_rate": 8.665287322997042e-06,
      "loss": 0.0293,
      "step": 425120
    },
    {
      "epoch": 0.6957509344540236,
      "grad_norm": 0.615445077419281,
      "learning_rate": 8.665221430783525e-06,
      "loss": 0.0386,
      "step": 425140
    },
    {
      "epoch": 0.6957836648926768,
      "grad_norm": 1.371647834777832,
      "learning_rate": 8.665155538570007e-06,
      "loss": 0.0349,
      "step": 425160
    },
    {
      "epoch": 0.6958163953313302,
      "grad_norm": 0.713683009147644,
      "learning_rate": 8.665089646356491e-06,
      "loss": 0.0397,
      "step": 425180
    },
    {
      "epoch": 0.6958491257699836,
      "grad_norm": 1.0697760581970215,
      "learning_rate": 8.665023754142975e-06,
      "loss": 0.0242,
      "step": 425200
    },
    {
      "epoch": 0.695881856208637,
      "grad_norm": 0.7838307619094849,
      "learning_rate": 8.664957861929456e-06,
      "loss": 0.0386,
      "step": 425220
    },
    {
      "epoch": 0.6959145866472902,
      "grad_norm": 0.8169626593589783,
      "learning_rate": 8.66489196971594e-06,
      "loss": 0.0318,
      "step": 425240
    },
    {
      "epoch": 0.6959473170859436,
      "grad_norm": 0.8321458697319031,
      "learning_rate": 8.664826077502422e-06,
      "loss": 0.0416,
      "step": 425260
    },
    {
      "epoch": 0.695980047524597,
      "grad_norm": 0.7546903491020203,
      "learning_rate": 8.664760185288905e-06,
      "loss": 0.0352,
      "step": 425280
    },
    {
      "epoch": 0.6960127779632502,
      "grad_norm": 1.629564881324768,
      "learning_rate": 8.664694293075387e-06,
      "loss": 0.0282,
      "step": 425300
    },
    {
      "epoch": 0.6960455084019036,
      "grad_norm": 0.7186092138290405,
      "learning_rate": 8.664628400861871e-06,
      "loss": 0.034,
      "step": 425320
    },
    {
      "epoch": 0.696078238840557,
      "grad_norm": 0.5167558789253235,
      "learning_rate": 8.664562508648355e-06,
      "loss": 0.0376,
      "step": 425340
    },
    {
      "epoch": 0.6961109692792102,
      "grad_norm": 1.1035975217819214,
      "learning_rate": 8.664496616434836e-06,
      "loss": 0.0257,
      "step": 425360
    },
    {
      "epoch": 0.6961436997178636,
      "grad_norm": 2.6548776626586914,
      "learning_rate": 8.66443072422132e-06,
      "loss": 0.0366,
      "step": 425380
    },
    {
      "epoch": 0.696176430156517,
      "grad_norm": 0.6398153305053711,
      "learning_rate": 8.664364832007802e-06,
      "loss": 0.0218,
      "step": 425400
    },
    {
      "epoch": 0.6962091605951704,
      "grad_norm": 0.5480948686599731,
      "learning_rate": 8.664298939794286e-06,
      "loss": 0.0392,
      "step": 425420
    },
    {
      "epoch": 0.6962418910338236,
      "grad_norm": 0.8390225768089294,
      "learning_rate": 8.664233047580767e-06,
      "loss": 0.0271,
      "step": 425440
    },
    {
      "epoch": 0.696274621472477,
      "grad_norm": 0.790457546710968,
      "learning_rate": 8.664167155367251e-06,
      "loss": 0.0287,
      "step": 425460
    },
    {
      "epoch": 0.6963073519111304,
      "grad_norm": 0.8172304034233093,
      "learning_rate": 8.664101263153733e-06,
      "loss": 0.0447,
      "step": 425480
    },
    {
      "epoch": 0.6963400823497836,
      "grad_norm": 0.731682538986206,
      "learning_rate": 8.664035370940216e-06,
      "loss": 0.044,
      "step": 425500
    },
    {
      "epoch": 0.696372812788437,
      "grad_norm": 0.5706760883331299,
      "learning_rate": 8.6639694787267e-06,
      "loss": 0.0327,
      "step": 425520
    },
    {
      "epoch": 0.6964055432270904,
      "grad_norm": 0.7204551100730896,
      "learning_rate": 8.663903586513182e-06,
      "loss": 0.0421,
      "step": 425540
    },
    {
      "epoch": 0.6964382736657436,
      "grad_norm": 1.87080717086792,
      "learning_rate": 8.663837694299666e-06,
      "loss": 0.0392,
      "step": 425560
    },
    {
      "epoch": 0.696471004104397,
      "grad_norm": 0.987463653087616,
      "learning_rate": 8.663771802086149e-06,
      "loss": 0.0443,
      "step": 425580
    },
    {
      "epoch": 0.6965037345430504,
      "grad_norm": 1.1483656167984009,
      "learning_rate": 8.663705909872631e-06,
      "loss": 0.0264,
      "step": 425600
    },
    {
      "epoch": 0.6965364649817037,
      "grad_norm": 1.193173885345459,
      "learning_rate": 8.663640017659115e-06,
      "loss": 0.0418,
      "step": 425620
    },
    {
      "epoch": 0.696569195420357,
      "grad_norm": 0.18330654501914978,
      "learning_rate": 8.663574125445597e-06,
      "loss": 0.0305,
      "step": 425640
    },
    {
      "epoch": 0.6966019258590104,
      "grad_norm": 1.2474689483642578,
      "learning_rate": 8.66350823323208e-06,
      "loss": 0.0291,
      "step": 425660
    },
    {
      "epoch": 0.6966346562976637,
      "grad_norm": 1.145904302597046,
      "learning_rate": 8.663442341018564e-06,
      "loss": 0.0315,
      "step": 425680
    },
    {
      "epoch": 0.696667386736317,
      "grad_norm": 3.495547294616699,
      "learning_rate": 8.663376448805046e-06,
      "loss": 0.0399,
      "step": 425700
    },
    {
      "epoch": 0.6967001171749704,
      "grad_norm": 0.9453086256980896,
      "learning_rate": 8.66331055659153e-06,
      "loss": 0.0392,
      "step": 425720
    },
    {
      "epoch": 0.6967328476136238,
      "grad_norm": 2.0370757579803467,
      "learning_rate": 8.663244664378011e-06,
      "loss": 0.0374,
      "step": 425740
    },
    {
      "epoch": 0.696765578052277,
      "grad_norm": 2.30788516998291,
      "learning_rate": 8.663178772164495e-06,
      "loss": 0.039,
      "step": 425760
    },
    {
      "epoch": 0.6967983084909304,
      "grad_norm": 1.063378095626831,
      "learning_rate": 8.663112879950977e-06,
      "loss": 0.0364,
      "step": 425780
    },
    {
      "epoch": 0.6968310389295838,
      "grad_norm": 2.1478662490844727,
      "learning_rate": 8.66304698773746e-06,
      "loss": 0.0355,
      "step": 425800
    },
    {
      "epoch": 0.696863769368237,
      "grad_norm": 1.3209797143936157,
      "learning_rate": 8.662981095523942e-06,
      "loss": 0.0397,
      "step": 425820
    },
    {
      "epoch": 0.6968964998068904,
      "grad_norm": 1.4131801128387451,
      "learning_rate": 8.662915203310426e-06,
      "loss": 0.0341,
      "step": 425840
    },
    {
      "epoch": 0.6969292302455438,
      "grad_norm": 0.3911871612071991,
      "learning_rate": 8.662849311096907e-06,
      "loss": 0.0273,
      "step": 425860
    },
    {
      "epoch": 0.6969619606841971,
      "grad_norm": 1.3036235570907593,
      "learning_rate": 8.662783418883391e-06,
      "loss": 0.0261,
      "step": 425880
    },
    {
      "epoch": 0.6969946911228504,
      "grad_norm": 3.057380437850952,
      "learning_rate": 8.662717526669873e-06,
      "loss": 0.0292,
      "step": 425900
    },
    {
      "epoch": 0.6970274215615038,
      "grad_norm": 1.4022465944290161,
      "learning_rate": 8.662651634456357e-06,
      "loss": 0.0433,
      "step": 425920
    },
    {
      "epoch": 0.6970601520001571,
      "grad_norm": 0.25364676117897034,
      "learning_rate": 8.66258574224284e-06,
      "loss": 0.0374,
      "step": 425940
    },
    {
      "epoch": 0.6970928824388104,
      "grad_norm": 0.7686082124710083,
      "learning_rate": 8.662519850029322e-06,
      "loss": 0.033,
      "step": 425960
    },
    {
      "epoch": 0.6971256128774638,
      "grad_norm": 3.1210286617279053,
      "learning_rate": 8.662453957815806e-06,
      "loss": 0.0369,
      "step": 425980
    },
    {
      "epoch": 0.6971583433161171,
      "grad_norm": 1.4185758829116821,
      "learning_rate": 8.66238806560229e-06,
      "loss": 0.0304,
      "step": 426000
    },
    {
      "epoch": 0.6971910737547704,
      "grad_norm": 0.29103344678878784,
      "learning_rate": 8.662322173388771e-06,
      "loss": 0.0404,
      "step": 426020
    },
    {
      "epoch": 0.6972238041934238,
      "grad_norm": 2.6028897762298584,
      "learning_rate": 8.662256281175255e-06,
      "loss": 0.0423,
      "step": 426040
    },
    {
      "epoch": 0.6972565346320772,
      "grad_norm": 0.3416600525379181,
      "learning_rate": 8.662190388961738e-06,
      "loss": 0.0294,
      "step": 426060
    },
    {
      "epoch": 0.6972892650707305,
      "grad_norm": 0.5767586827278137,
      "learning_rate": 8.66212449674822e-06,
      "loss": 0.0317,
      "step": 426080
    },
    {
      "epoch": 0.6973219955093838,
      "grad_norm": 0.9273413419723511,
      "learning_rate": 8.662058604534704e-06,
      "loss": 0.042,
      "step": 426100
    },
    {
      "epoch": 0.6973547259480372,
      "grad_norm": 0.39902520179748535,
      "learning_rate": 8.661992712321186e-06,
      "loss": 0.0502,
      "step": 426120
    },
    {
      "epoch": 0.6973874563866905,
      "grad_norm": 0.36613741517066956,
      "learning_rate": 8.66192682010767e-06,
      "loss": 0.044,
      "step": 426140
    },
    {
      "epoch": 0.6974201868253438,
      "grad_norm": 0.9144970178604126,
      "learning_rate": 8.661860927894151e-06,
      "loss": 0.0356,
      "step": 426160
    },
    {
      "epoch": 0.6974529172639972,
      "grad_norm": 0.9173458218574524,
      "learning_rate": 8.661795035680635e-06,
      "loss": 0.03,
      "step": 426180
    },
    {
      "epoch": 0.6974856477026505,
      "grad_norm": 2.336570978164673,
      "learning_rate": 8.661729143467117e-06,
      "loss": 0.0482,
      "step": 426200
    },
    {
      "epoch": 0.6975183781413038,
      "grad_norm": 1.5944867134094238,
      "learning_rate": 8.6616632512536e-06,
      "loss": 0.022,
      "step": 426220
    },
    {
      "epoch": 0.6975511085799572,
      "grad_norm": 0.28516730666160583,
      "learning_rate": 8.661597359040082e-06,
      "loss": 0.032,
      "step": 426240
    },
    {
      "epoch": 0.6975838390186105,
      "grad_norm": 0.3622439205646515,
      "learning_rate": 8.661531466826566e-06,
      "loss": 0.035,
      "step": 426260
    },
    {
      "epoch": 0.6976165694572639,
      "grad_norm": 0.667999804019928,
      "learning_rate": 8.661465574613048e-06,
      "loss": 0.0393,
      "step": 426280
    },
    {
      "epoch": 0.6976492998959172,
      "grad_norm": 0.7021059393882751,
      "learning_rate": 8.661399682399531e-06,
      "loss": 0.034,
      "step": 426300
    },
    {
      "epoch": 0.6976820303345705,
      "grad_norm": 1.0003970861434937,
      "learning_rate": 8.661333790186015e-06,
      "loss": 0.0363,
      "step": 426320
    },
    {
      "epoch": 0.6977147607732239,
      "grad_norm": 0.7090343236923218,
      "learning_rate": 8.661267897972497e-06,
      "loss": 0.0261,
      "step": 426340
    },
    {
      "epoch": 0.6977474912118772,
      "grad_norm": 2.9523274898529053,
      "learning_rate": 8.66120200575898e-06,
      "loss": 0.0422,
      "step": 426360
    },
    {
      "epoch": 0.6977802216505306,
      "grad_norm": 0.5188284516334534,
      "learning_rate": 8.661136113545464e-06,
      "loss": 0.0267,
      "step": 426380
    },
    {
      "epoch": 0.6978129520891839,
      "grad_norm": 0.4359002411365509,
      "learning_rate": 8.661070221331946e-06,
      "loss": 0.0378,
      "step": 426400
    },
    {
      "epoch": 0.6978456825278372,
      "grad_norm": 0.7729886770248413,
      "learning_rate": 8.66100432911843e-06,
      "loss": 0.0348,
      "step": 426420
    },
    {
      "epoch": 0.6978784129664906,
      "grad_norm": 3.920395851135254,
      "learning_rate": 8.660938436904913e-06,
      "loss": 0.0349,
      "step": 426440
    },
    {
      "epoch": 0.6979111434051439,
      "grad_norm": 2.1261823177337646,
      "learning_rate": 8.660872544691395e-06,
      "loss": 0.0386,
      "step": 426460
    },
    {
      "epoch": 0.6979438738437973,
      "grad_norm": 1.6452891826629639,
      "learning_rate": 8.660806652477878e-06,
      "loss": 0.0299,
      "step": 426480
    },
    {
      "epoch": 0.6979766042824506,
      "grad_norm": 4.808802127838135,
      "learning_rate": 8.66074076026436e-06,
      "loss": 0.028,
      "step": 426500
    },
    {
      "epoch": 0.6980093347211039,
      "grad_norm": 1.57223641872406,
      "learning_rate": 8.660674868050844e-06,
      "loss": 0.0398,
      "step": 426520
    },
    {
      "epoch": 0.6980420651597573,
      "grad_norm": 0.5956488847732544,
      "learning_rate": 8.660608975837326e-06,
      "loss": 0.02,
      "step": 426540
    },
    {
      "epoch": 0.6980747955984106,
      "grad_norm": 0.8194335699081421,
      "learning_rate": 8.66054308362381e-06,
      "loss": 0.031,
      "step": 426560
    },
    {
      "epoch": 0.6981075260370639,
      "grad_norm": 1.996514916419983,
      "learning_rate": 8.660477191410291e-06,
      "loss": 0.0296,
      "step": 426580
    },
    {
      "epoch": 0.6981402564757173,
      "grad_norm": 0.5113399624824524,
      "learning_rate": 8.660411299196775e-06,
      "loss": 0.0305,
      "step": 426600
    },
    {
      "epoch": 0.6981729869143706,
      "grad_norm": 1.3629417419433594,
      "learning_rate": 8.660345406983257e-06,
      "loss": 0.0309,
      "step": 426620
    },
    {
      "epoch": 0.698205717353024,
      "grad_norm": 0.47567498683929443,
      "learning_rate": 8.66027951476974e-06,
      "loss": 0.0288,
      "step": 426640
    },
    {
      "epoch": 0.6982384477916773,
      "grad_norm": 0.3683156967163086,
      "learning_rate": 8.660213622556222e-06,
      "loss": 0.0245,
      "step": 426660
    },
    {
      "epoch": 0.6982711782303307,
      "grad_norm": 1.5425326824188232,
      "learning_rate": 8.660147730342706e-06,
      "loss": 0.0305,
      "step": 426680
    },
    {
      "epoch": 0.698303908668984,
      "grad_norm": 1.0255117416381836,
      "learning_rate": 8.660081838129188e-06,
      "loss": 0.0315,
      "step": 426700
    },
    {
      "epoch": 0.6983366391076373,
      "grad_norm": 0.11833000183105469,
      "learning_rate": 8.660015945915671e-06,
      "loss": 0.0335,
      "step": 426720
    },
    {
      "epoch": 0.6983693695462907,
      "grad_norm": 0.8822448253631592,
      "learning_rate": 8.659950053702155e-06,
      "loss": 0.0404,
      "step": 426740
    },
    {
      "epoch": 0.698402099984944,
      "grad_norm": 0.5925366878509521,
      "learning_rate": 8.659884161488637e-06,
      "loss": 0.0348,
      "step": 426760
    },
    {
      "epoch": 0.6984348304235973,
      "grad_norm": 1.5419596433639526,
      "learning_rate": 8.65981826927512e-06,
      "loss": 0.0414,
      "step": 426780
    },
    {
      "epoch": 0.6984675608622507,
      "grad_norm": 0.16090014576911926,
      "learning_rate": 8.659752377061604e-06,
      "loss": 0.0275,
      "step": 426800
    },
    {
      "epoch": 0.698500291300904,
      "grad_norm": 0.6534785032272339,
      "learning_rate": 8.659686484848086e-06,
      "loss": 0.0359,
      "step": 426820
    },
    {
      "epoch": 0.6985330217395573,
      "grad_norm": 0.18305987119674683,
      "learning_rate": 8.65962059263457e-06,
      "loss": 0.0312,
      "step": 426840
    },
    {
      "epoch": 0.6985657521782107,
      "grad_norm": 0.2236984521150589,
      "learning_rate": 8.659554700421053e-06,
      "loss": 0.0347,
      "step": 426860
    },
    {
      "epoch": 0.6985984826168641,
      "grad_norm": 0.29292377829551697,
      "learning_rate": 8.659488808207535e-06,
      "loss": 0.0345,
      "step": 426880
    },
    {
      "epoch": 0.6986312130555173,
      "grad_norm": 0.8939377069473267,
      "learning_rate": 8.659422915994018e-06,
      "loss": 0.0303,
      "step": 426900
    },
    {
      "epoch": 0.6986639434941707,
      "grad_norm": 0.3701797127723694,
      "learning_rate": 8.6593570237805e-06,
      "loss": 0.0355,
      "step": 426920
    },
    {
      "epoch": 0.6986966739328241,
      "grad_norm": 0.3551252484321594,
      "learning_rate": 8.659291131566984e-06,
      "loss": 0.0319,
      "step": 426940
    },
    {
      "epoch": 0.6987294043714773,
      "grad_norm": 1.3347433805465698,
      "learning_rate": 8.659225239353466e-06,
      "loss": 0.0211,
      "step": 426960
    },
    {
      "epoch": 0.6987621348101307,
      "grad_norm": 0.5584346055984497,
      "learning_rate": 8.65915934713995e-06,
      "loss": 0.0207,
      "step": 426980
    },
    {
      "epoch": 0.6987948652487841,
      "grad_norm": 0.3283061385154724,
      "learning_rate": 8.659093454926431e-06,
      "loss": 0.0266,
      "step": 427000
    },
    {
      "epoch": 0.6988275956874374,
      "grad_norm": 0.9819019436836243,
      "learning_rate": 8.659027562712915e-06,
      "loss": 0.0251,
      "step": 427020
    },
    {
      "epoch": 0.6988603261260907,
      "grad_norm": 0.9216949343681335,
      "learning_rate": 8.658961670499397e-06,
      "loss": 0.0285,
      "step": 427040
    },
    {
      "epoch": 0.6988930565647441,
      "grad_norm": 0.6554191708564758,
      "learning_rate": 8.65889577828588e-06,
      "loss": 0.0258,
      "step": 427060
    },
    {
      "epoch": 0.6989257870033975,
      "grad_norm": 0.4815952181816101,
      "learning_rate": 8.658829886072364e-06,
      "loss": 0.0372,
      "step": 427080
    },
    {
      "epoch": 0.6989585174420507,
      "grad_norm": 1.2606072425842285,
      "learning_rate": 8.658763993858846e-06,
      "loss": 0.0422,
      "step": 427100
    },
    {
      "epoch": 0.6989912478807041,
      "grad_norm": 1.1580406427383423,
      "learning_rate": 8.65869810164533e-06,
      "loss": 0.0233,
      "step": 427120
    },
    {
      "epoch": 0.6990239783193575,
      "grad_norm": 0.8578624725341797,
      "learning_rate": 8.658632209431811e-06,
      "loss": 0.0291,
      "step": 427140
    },
    {
      "epoch": 0.6990567087580107,
      "grad_norm": 0.8063381910324097,
      "learning_rate": 8.658566317218295e-06,
      "loss": 0.0343,
      "step": 427160
    },
    {
      "epoch": 0.6990894391966641,
      "grad_norm": 1.7785582542419434,
      "learning_rate": 8.658500425004778e-06,
      "loss": 0.0324,
      "step": 427180
    },
    {
      "epoch": 0.6991221696353175,
      "grad_norm": 2.124333143234253,
      "learning_rate": 8.65843453279126e-06,
      "loss": 0.0381,
      "step": 427200
    },
    {
      "epoch": 0.6991549000739707,
      "grad_norm": 0.5491271018981934,
      "learning_rate": 8.658368640577744e-06,
      "loss": 0.0262,
      "step": 427220
    },
    {
      "epoch": 0.6991876305126241,
      "grad_norm": 1.133760929107666,
      "learning_rate": 8.658302748364228e-06,
      "loss": 0.0391,
      "step": 427240
    },
    {
      "epoch": 0.6992203609512775,
      "grad_norm": 5.176421642303467,
      "learning_rate": 8.65823685615071e-06,
      "loss": 0.0462,
      "step": 427260
    },
    {
      "epoch": 0.6992530913899309,
      "grad_norm": 1.4247174263000488,
      "learning_rate": 8.658170963937193e-06,
      "loss": 0.0344,
      "step": 427280
    },
    {
      "epoch": 0.6992858218285841,
      "grad_norm": 0.978996753692627,
      "learning_rate": 8.658105071723675e-06,
      "loss": 0.032,
      "step": 427300
    },
    {
      "epoch": 0.6993185522672375,
      "grad_norm": 1.483818531036377,
      "learning_rate": 8.658039179510158e-06,
      "loss": 0.028,
      "step": 427320
    },
    {
      "epoch": 0.6993512827058909,
      "grad_norm": 1.062542200088501,
      "learning_rate": 8.65797328729664e-06,
      "loss": 0.0282,
      "step": 427340
    },
    {
      "epoch": 0.6993840131445441,
      "grad_norm": 1.476481318473816,
      "learning_rate": 8.657907395083124e-06,
      "loss": 0.0298,
      "step": 427360
    },
    {
      "epoch": 0.6994167435831975,
      "grad_norm": 0.6209040880203247,
      "learning_rate": 8.657841502869606e-06,
      "loss": 0.031,
      "step": 427380
    },
    {
      "epoch": 0.6994494740218509,
      "grad_norm": 1.2198312282562256,
      "learning_rate": 8.65777561065609e-06,
      "loss": 0.0502,
      "step": 427400
    },
    {
      "epoch": 0.6994822044605041,
      "grad_norm": 2.370790481567383,
      "learning_rate": 8.657709718442571e-06,
      "loss": 0.0345,
      "step": 427420
    },
    {
      "epoch": 0.6995149348991575,
      "grad_norm": 2.6768345832824707,
      "learning_rate": 8.657643826229055e-06,
      "loss": 0.0281,
      "step": 427440
    },
    {
      "epoch": 0.6995476653378109,
      "grad_norm": 1.7494330406188965,
      "learning_rate": 8.657577934015539e-06,
      "loss": 0.0273,
      "step": 427460
    },
    {
      "epoch": 0.6995803957764642,
      "grad_norm": 0.7512141466140747,
      "learning_rate": 8.65751204180202e-06,
      "loss": 0.0287,
      "step": 427480
    },
    {
      "epoch": 0.6996131262151175,
      "grad_norm": 1.179022192955017,
      "learning_rate": 8.657446149588504e-06,
      "loss": 0.0326,
      "step": 427500
    },
    {
      "epoch": 0.6996458566537709,
      "grad_norm": 0.8279159069061279,
      "learning_rate": 8.657380257374986e-06,
      "loss": 0.0341,
      "step": 427520
    },
    {
      "epoch": 0.6996785870924243,
      "grad_norm": 3.0996768474578857,
      "learning_rate": 8.65731436516147e-06,
      "loss": 0.0249,
      "step": 427540
    },
    {
      "epoch": 0.6997113175310775,
      "grad_norm": 2.383819818496704,
      "learning_rate": 8.657248472947953e-06,
      "loss": 0.0339,
      "step": 427560
    },
    {
      "epoch": 0.6997440479697309,
      "grad_norm": 0.2709692120552063,
      "learning_rate": 8.657182580734435e-06,
      "loss": 0.037,
      "step": 427580
    },
    {
      "epoch": 0.6997767784083843,
      "grad_norm": 1.8558626174926758,
      "learning_rate": 8.657116688520919e-06,
      "loss": 0.0356,
      "step": 427600
    },
    {
      "epoch": 0.6998095088470375,
      "grad_norm": 0.5793340802192688,
      "learning_rate": 8.657050796307402e-06,
      "loss": 0.029,
      "step": 427620
    },
    {
      "epoch": 0.6998422392856909,
      "grad_norm": 0.8686495423316956,
      "learning_rate": 8.656984904093884e-06,
      "loss": 0.025,
      "step": 427640
    },
    {
      "epoch": 0.6998749697243443,
      "grad_norm": 0.6832649111747742,
      "learning_rate": 8.656919011880368e-06,
      "loss": 0.0416,
      "step": 427660
    },
    {
      "epoch": 0.6999077001629976,
      "grad_norm": 0.4474236071109772,
      "learning_rate": 8.65685311966685e-06,
      "loss": 0.0438,
      "step": 427680
    },
    {
      "epoch": 0.6999404306016509,
      "grad_norm": 0.1992066204547882,
      "learning_rate": 8.656787227453333e-06,
      "loss": 0.0255,
      "step": 427700
    },
    {
      "epoch": 0.6999731610403043,
      "grad_norm": 0.686044454574585,
      "learning_rate": 8.656721335239815e-06,
      "loss": 0.0333,
      "step": 427720
    },
    {
      "epoch": 0.7000058914789576,
      "grad_norm": 2.6921024322509766,
      "learning_rate": 8.656655443026299e-06,
      "loss": 0.039,
      "step": 427740
    },
    {
      "epoch": 0.7000386219176109,
      "grad_norm": 2.6845409870147705,
      "learning_rate": 8.65658955081278e-06,
      "loss": 0.0321,
      "step": 427760
    },
    {
      "epoch": 0.7000713523562643,
      "grad_norm": 1.3435068130493164,
      "learning_rate": 8.656523658599264e-06,
      "loss": 0.0321,
      "step": 427780
    },
    {
      "epoch": 0.7001040827949176,
      "grad_norm": 1.1328012943267822,
      "learning_rate": 8.656457766385748e-06,
      "loss": 0.0422,
      "step": 427800
    },
    {
      "epoch": 0.7001368132335709,
      "grad_norm": 2.3409881591796875,
      "learning_rate": 8.65639187417223e-06,
      "loss": 0.0367,
      "step": 427820
    },
    {
      "epoch": 0.7001695436722243,
      "grad_norm": 3.1429057121276855,
      "learning_rate": 8.656325981958713e-06,
      "loss": 0.0228,
      "step": 427840
    },
    {
      "epoch": 0.7002022741108777,
      "grad_norm": 0.3311520218849182,
      "learning_rate": 8.656260089745195e-06,
      "loss": 0.0328,
      "step": 427860
    },
    {
      "epoch": 0.700235004549531,
      "grad_norm": 0.7853983640670776,
      "learning_rate": 8.656194197531679e-06,
      "loss": 0.0414,
      "step": 427880
    },
    {
      "epoch": 0.7002677349881843,
      "grad_norm": 1.3465206623077393,
      "learning_rate": 8.65612830531816e-06,
      "loss": 0.0337,
      "step": 427900
    },
    {
      "epoch": 0.7003004654268377,
      "grad_norm": 1.6101152896881104,
      "learning_rate": 8.656062413104644e-06,
      "loss": 0.021,
      "step": 427920
    },
    {
      "epoch": 0.700333195865491,
      "grad_norm": 1.0217299461364746,
      "learning_rate": 8.655996520891126e-06,
      "loss": 0.0255,
      "step": 427940
    },
    {
      "epoch": 0.7003659263041443,
      "grad_norm": 0.7426519989967346,
      "learning_rate": 8.65593062867761e-06,
      "loss": 0.0304,
      "step": 427960
    },
    {
      "epoch": 0.7003986567427977,
      "grad_norm": 2.3579957485198975,
      "learning_rate": 8.655864736464093e-06,
      "loss": 0.0341,
      "step": 427980
    },
    {
      "epoch": 0.700431387181451,
      "grad_norm": 0.47546783089637756,
      "learning_rate": 8.655798844250575e-06,
      "loss": 0.0411,
      "step": 428000
    },
    {
      "epoch": 0.7004641176201043,
      "grad_norm": 0.355961412191391,
      "learning_rate": 8.655732952037059e-06,
      "loss": 0.0213,
      "step": 428020
    },
    {
      "epoch": 0.7004968480587577,
      "grad_norm": 0.621435284614563,
      "learning_rate": 8.655667059823542e-06,
      "loss": 0.0402,
      "step": 428040
    },
    {
      "epoch": 0.700529578497411,
      "grad_norm": 1.2236233949661255,
      "learning_rate": 8.655601167610024e-06,
      "loss": 0.03,
      "step": 428060
    },
    {
      "epoch": 0.7005623089360644,
      "grad_norm": 1.0925884246826172,
      "learning_rate": 8.655535275396508e-06,
      "loss": 0.0342,
      "step": 428080
    },
    {
      "epoch": 0.7005950393747177,
      "grad_norm": 2.6081666946411133,
      "learning_rate": 8.65546938318299e-06,
      "loss": 0.0279,
      "step": 428100
    },
    {
      "epoch": 0.700627769813371,
      "grad_norm": 0.8118685483932495,
      "learning_rate": 8.655403490969473e-06,
      "loss": 0.0286,
      "step": 428120
    },
    {
      "epoch": 0.7006605002520244,
      "grad_norm": 0.9416946172714233,
      "learning_rate": 8.655337598755957e-06,
      "loss": 0.0364,
      "step": 428140
    },
    {
      "epoch": 0.7006932306906777,
      "grad_norm": 0.6650780439376831,
      "learning_rate": 8.655271706542439e-06,
      "loss": 0.0333,
      "step": 428160
    },
    {
      "epoch": 0.700725961129331,
      "grad_norm": 0.4058435559272766,
      "learning_rate": 8.655205814328922e-06,
      "loss": 0.0272,
      "step": 428180
    },
    {
      "epoch": 0.7007586915679844,
      "grad_norm": 2.6319994926452637,
      "learning_rate": 8.655139922115404e-06,
      "loss": 0.0303,
      "step": 428200
    },
    {
      "epoch": 0.7007914220066377,
      "grad_norm": 1.0486842393875122,
      "learning_rate": 8.655074029901888e-06,
      "loss": 0.0308,
      "step": 428220
    },
    {
      "epoch": 0.7008241524452911,
      "grad_norm": 1.4992620944976807,
      "learning_rate": 8.65500813768837e-06,
      "loss": 0.0325,
      "step": 428240
    },
    {
      "epoch": 0.7008568828839444,
      "grad_norm": 0.4388708770275116,
      "learning_rate": 8.654942245474853e-06,
      "loss": 0.0282,
      "step": 428260
    },
    {
      "epoch": 0.7008896133225978,
      "grad_norm": 2.3142149448394775,
      "learning_rate": 8.654876353261335e-06,
      "loss": 0.0389,
      "step": 428280
    },
    {
      "epoch": 0.7009223437612511,
      "grad_norm": 1.0444105863571167,
      "learning_rate": 8.654810461047819e-06,
      "loss": 0.0329,
      "step": 428300
    },
    {
      "epoch": 0.7009550741999044,
      "grad_norm": 1.823577642440796,
      "learning_rate": 8.6547445688343e-06,
      "loss": 0.0372,
      "step": 428320
    },
    {
      "epoch": 0.7009878046385578,
      "grad_norm": 0.43640556931495667,
      "learning_rate": 8.654678676620784e-06,
      "loss": 0.0313,
      "step": 428340
    },
    {
      "epoch": 0.7010205350772111,
      "grad_norm": 1.2627640962600708,
      "learning_rate": 8.654612784407268e-06,
      "loss": 0.0416,
      "step": 428360
    },
    {
      "epoch": 0.7010532655158644,
      "grad_norm": 0.7988289594650269,
      "learning_rate": 8.65454689219375e-06,
      "loss": 0.0256,
      "step": 428380
    },
    {
      "epoch": 0.7010859959545178,
      "grad_norm": 2.2977194786071777,
      "learning_rate": 8.654480999980233e-06,
      "loss": 0.0266,
      "step": 428400
    },
    {
      "epoch": 0.7011187263931711,
      "grad_norm": 0.9559634327888489,
      "learning_rate": 8.654415107766717e-06,
      "loss": 0.0425,
      "step": 428420
    },
    {
      "epoch": 0.7011514568318244,
      "grad_norm": 0.9613749384880066,
      "learning_rate": 8.654349215553199e-06,
      "loss": 0.0304,
      "step": 428440
    },
    {
      "epoch": 0.7011841872704778,
      "grad_norm": 2.5315663814544678,
      "learning_rate": 8.654283323339682e-06,
      "loss": 0.0345,
      "step": 428460
    },
    {
      "epoch": 0.7012169177091311,
      "grad_norm": 1.950295329093933,
      "learning_rate": 8.654217431126166e-06,
      "loss": 0.0444,
      "step": 428480
    },
    {
      "epoch": 0.7012496481477845,
      "grad_norm": 1.5086873769760132,
      "learning_rate": 8.654151538912648e-06,
      "loss": 0.0339,
      "step": 428500
    },
    {
      "epoch": 0.7012823785864378,
      "grad_norm": 0.511246383190155,
      "learning_rate": 8.654085646699131e-06,
      "loss": 0.027,
      "step": 428520
    },
    {
      "epoch": 0.7013151090250912,
      "grad_norm": 0.35980790853500366,
      "learning_rate": 8.654019754485613e-06,
      "loss": 0.0298,
      "step": 428540
    },
    {
      "epoch": 0.7013478394637445,
      "grad_norm": 0.1450035721063614,
      "learning_rate": 8.653953862272097e-06,
      "loss": 0.0289,
      "step": 428560
    },
    {
      "epoch": 0.7013805699023978,
      "grad_norm": 3.7051806449890137,
      "learning_rate": 8.653887970058579e-06,
      "loss": 0.033,
      "step": 428580
    },
    {
      "epoch": 0.7014133003410512,
      "grad_norm": 0.9399698972702026,
      "learning_rate": 8.653822077845062e-06,
      "loss": 0.0408,
      "step": 428600
    },
    {
      "epoch": 0.7014460307797045,
      "grad_norm": 0.9972538948059082,
      "learning_rate": 8.653756185631544e-06,
      "loss": 0.0307,
      "step": 428620
    },
    {
      "epoch": 0.7014787612183578,
      "grad_norm": 0.42590007185935974,
      "learning_rate": 8.653690293418028e-06,
      "loss": 0.0302,
      "step": 428640
    },
    {
      "epoch": 0.7015114916570112,
      "grad_norm": 2.2015762329101562,
      "learning_rate": 8.65362440120451e-06,
      "loss": 0.0275,
      "step": 428660
    },
    {
      "epoch": 0.7015442220956645,
      "grad_norm": 0.36167386174201965,
      "learning_rate": 8.653558508990993e-06,
      "loss": 0.0434,
      "step": 428680
    },
    {
      "epoch": 0.7015769525343178,
      "grad_norm": 2.026085376739502,
      "learning_rate": 8.653492616777475e-06,
      "loss": 0.0412,
      "step": 428700
    },
    {
      "epoch": 0.7016096829729712,
      "grad_norm": 2.145617723464966,
      "learning_rate": 8.653426724563959e-06,
      "loss": 0.0328,
      "step": 428720
    },
    {
      "epoch": 0.7016424134116246,
      "grad_norm": 0.30487626791000366,
      "learning_rate": 8.65336083235044e-06,
      "loss": 0.0399,
      "step": 428740
    },
    {
      "epoch": 0.7016751438502778,
      "grad_norm": 1.7731834650039673,
      "learning_rate": 8.653294940136924e-06,
      "loss": 0.0445,
      "step": 428760
    },
    {
      "epoch": 0.7017078742889312,
      "grad_norm": 0.34815099835395813,
      "learning_rate": 8.653229047923408e-06,
      "loss": 0.0343,
      "step": 428780
    },
    {
      "epoch": 0.7017406047275846,
      "grad_norm": 1.067047357559204,
      "learning_rate": 8.65316315570989e-06,
      "loss": 0.0408,
      "step": 428800
    },
    {
      "epoch": 0.7017733351662379,
      "grad_norm": 1.361525535583496,
      "learning_rate": 8.653097263496373e-06,
      "loss": 0.0347,
      "step": 428820
    },
    {
      "epoch": 0.7018060656048912,
      "grad_norm": 1.6228468418121338,
      "learning_rate": 8.653031371282857e-06,
      "loss": 0.0351,
      "step": 428840
    },
    {
      "epoch": 0.7018387960435446,
      "grad_norm": 1.9068396091461182,
      "learning_rate": 8.652965479069339e-06,
      "loss": 0.0294,
      "step": 428860
    },
    {
      "epoch": 0.7018715264821979,
      "grad_norm": 0.7374681830406189,
      "learning_rate": 8.652899586855822e-06,
      "loss": 0.0315,
      "step": 428880
    },
    {
      "epoch": 0.7019042569208512,
      "grad_norm": 1.5210121870040894,
      "learning_rate": 8.652833694642306e-06,
      "loss": 0.0357,
      "step": 428900
    },
    {
      "epoch": 0.7019369873595046,
      "grad_norm": 1.3780587911605835,
      "learning_rate": 8.652767802428788e-06,
      "loss": 0.0201,
      "step": 428920
    },
    {
      "epoch": 0.701969717798158,
      "grad_norm": 2.6995601654052734,
      "learning_rate": 8.652701910215271e-06,
      "loss": 0.0287,
      "step": 428940
    },
    {
      "epoch": 0.7020024482368112,
      "grad_norm": 1.1577190160751343,
      "learning_rate": 8.652636018001753e-06,
      "loss": 0.0276,
      "step": 428960
    },
    {
      "epoch": 0.7020351786754646,
      "grad_norm": 0.250541627407074,
      "learning_rate": 8.652570125788237e-06,
      "loss": 0.0408,
      "step": 428980
    },
    {
      "epoch": 0.702067909114118,
      "grad_norm": 0.7147239446640015,
      "learning_rate": 8.652504233574719e-06,
      "loss": 0.0399,
      "step": 429000
    },
    {
      "epoch": 0.7021006395527712,
      "grad_norm": 0.8941729664802551,
      "learning_rate": 8.652438341361202e-06,
      "loss": 0.0321,
      "step": 429020
    },
    {
      "epoch": 0.7021333699914246,
      "grad_norm": 1.4345643520355225,
      "learning_rate": 8.652372449147684e-06,
      "loss": 0.0312,
      "step": 429040
    },
    {
      "epoch": 0.702166100430078,
      "grad_norm": 1.1721898317337036,
      "learning_rate": 8.652306556934168e-06,
      "loss": 0.0269,
      "step": 429060
    },
    {
      "epoch": 0.7021988308687312,
      "grad_norm": 2.3241052627563477,
      "learning_rate": 8.65224066472065e-06,
      "loss": 0.0357,
      "step": 429080
    },
    {
      "epoch": 0.7022315613073846,
      "grad_norm": 0.2362380176782608,
      "learning_rate": 8.652174772507133e-06,
      "loss": 0.0259,
      "step": 429100
    },
    {
      "epoch": 0.702264291746038,
      "grad_norm": 0.9803497195243835,
      "learning_rate": 8.652108880293615e-06,
      "loss": 0.0392,
      "step": 429120
    },
    {
      "epoch": 0.7022970221846914,
      "grad_norm": 0.9418595433235168,
      "learning_rate": 8.652042988080099e-06,
      "loss": 0.0293,
      "step": 429140
    },
    {
      "epoch": 0.7023297526233446,
      "grad_norm": 0.9051106572151184,
      "learning_rate": 8.651977095866582e-06,
      "loss": 0.0289,
      "step": 429160
    },
    {
      "epoch": 0.702362483061998,
      "grad_norm": 0.36394545435905457,
      "learning_rate": 8.651911203653064e-06,
      "loss": 0.035,
      "step": 429180
    },
    {
      "epoch": 0.7023952135006514,
      "grad_norm": 0.27788546681404114,
      "learning_rate": 8.651845311439548e-06,
      "loss": 0.0202,
      "step": 429200
    },
    {
      "epoch": 0.7024279439393046,
      "grad_norm": 1.485804796218872,
      "learning_rate": 8.651779419226031e-06,
      "loss": 0.0197,
      "step": 429220
    },
    {
      "epoch": 0.702460674377958,
      "grad_norm": 0.49458593130111694,
      "learning_rate": 8.651713527012513e-06,
      "loss": 0.0237,
      "step": 429240
    },
    {
      "epoch": 0.7024934048166114,
      "grad_norm": 1.9164659976959229,
      "learning_rate": 8.651647634798997e-06,
      "loss": 0.0285,
      "step": 429260
    },
    {
      "epoch": 0.7025261352552646,
      "grad_norm": 4.24017858505249,
      "learning_rate": 8.65158174258548e-06,
      "loss": 0.03,
      "step": 429280
    },
    {
      "epoch": 0.702558865693918,
      "grad_norm": 1.278505802154541,
      "learning_rate": 8.651515850371962e-06,
      "loss": 0.0425,
      "step": 429300
    },
    {
      "epoch": 0.7025915961325714,
      "grad_norm": 2.6459758281707764,
      "learning_rate": 8.651449958158446e-06,
      "loss": 0.035,
      "step": 429320
    },
    {
      "epoch": 0.7026243265712248,
      "grad_norm": 1.9898796081542969,
      "learning_rate": 8.651384065944928e-06,
      "loss": 0.0265,
      "step": 429340
    },
    {
      "epoch": 0.702657057009878,
      "grad_norm": 1.494530200958252,
      "learning_rate": 8.651318173731412e-06,
      "loss": 0.0399,
      "step": 429360
    },
    {
      "epoch": 0.7026897874485314,
      "grad_norm": 0.7677934169769287,
      "learning_rate": 8.651252281517893e-06,
      "loss": 0.0483,
      "step": 429380
    },
    {
      "epoch": 0.7027225178871848,
      "grad_norm": 3.6576194763183594,
      "learning_rate": 8.651186389304377e-06,
      "loss": 0.0336,
      "step": 429400
    },
    {
      "epoch": 0.702755248325838,
      "grad_norm": 0.5788916349411011,
      "learning_rate": 8.651120497090859e-06,
      "loss": 0.0502,
      "step": 429420
    },
    {
      "epoch": 0.7027879787644914,
      "grad_norm": 0.7817187309265137,
      "learning_rate": 8.651054604877342e-06,
      "loss": 0.0267,
      "step": 429440
    },
    {
      "epoch": 0.7028207092031448,
      "grad_norm": 0.8965907692909241,
      "learning_rate": 8.650988712663824e-06,
      "loss": 0.0326,
      "step": 429460
    },
    {
      "epoch": 0.702853439641798,
      "grad_norm": 0.4184182584285736,
      "learning_rate": 8.650922820450308e-06,
      "loss": 0.0354,
      "step": 429480
    },
    {
      "epoch": 0.7028861700804514,
      "grad_norm": 0.512357771396637,
      "learning_rate": 8.65085692823679e-06,
      "loss": 0.0216,
      "step": 429500
    },
    {
      "epoch": 0.7029189005191048,
      "grad_norm": 0.6943177580833435,
      "learning_rate": 8.650791036023273e-06,
      "loss": 0.0355,
      "step": 429520
    },
    {
      "epoch": 0.7029516309577581,
      "grad_norm": 1.4440028667449951,
      "learning_rate": 8.650725143809757e-06,
      "loss": 0.029,
      "step": 429540
    },
    {
      "epoch": 0.7029843613964114,
      "grad_norm": 1.3134008646011353,
      "learning_rate": 8.650659251596239e-06,
      "loss": 0.0456,
      "step": 429560
    },
    {
      "epoch": 0.7030170918350648,
      "grad_norm": 0.5029453635215759,
      "learning_rate": 8.650593359382722e-06,
      "loss": 0.0259,
      "step": 429580
    },
    {
      "epoch": 0.7030498222737182,
      "grad_norm": 2.3308703899383545,
      "learning_rate": 8.650527467169204e-06,
      "loss": 0.0435,
      "step": 429600
    },
    {
      "epoch": 0.7030825527123714,
      "grad_norm": 0.6729644536972046,
      "learning_rate": 8.650461574955688e-06,
      "loss": 0.0392,
      "step": 429620
    },
    {
      "epoch": 0.7031152831510248,
      "grad_norm": 3.436966896057129,
      "learning_rate": 8.650395682742172e-06,
      "loss": 0.031,
      "step": 429640
    },
    {
      "epoch": 0.7031480135896782,
      "grad_norm": 1.030884861946106,
      "learning_rate": 8.650329790528653e-06,
      "loss": 0.027,
      "step": 429660
    },
    {
      "epoch": 0.7031807440283314,
      "grad_norm": 2.148970365524292,
      "learning_rate": 8.650263898315137e-06,
      "loss": 0.024,
      "step": 429680
    },
    {
      "epoch": 0.7032134744669848,
      "grad_norm": 0.5860225558280945,
      "learning_rate": 8.65019800610162e-06,
      "loss": 0.0324,
      "step": 429700
    },
    {
      "epoch": 0.7032462049056382,
      "grad_norm": 1.1692984104156494,
      "learning_rate": 8.650132113888103e-06,
      "loss": 0.0344,
      "step": 429720
    },
    {
      "epoch": 0.7032789353442915,
      "grad_norm": 1.3185641765594482,
      "learning_rate": 8.650066221674586e-06,
      "loss": 0.0187,
      "step": 429740
    },
    {
      "epoch": 0.7033116657829448,
      "grad_norm": 2.692824363708496,
      "learning_rate": 8.650000329461068e-06,
      "loss": 0.0318,
      "step": 429760
    },
    {
      "epoch": 0.7033443962215982,
      "grad_norm": 3.170565128326416,
      "learning_rate": 8.649934437247552e-06,
      "loss": 0.0391,
      "step": 429780
    },
    {
      "epoch": 0.7033771266602515,
      "grad_norm": 2.5203354358673096,
      "learning_rate": 8.649868545034033e-06,
      "loss": 0.0387,
      "step": 429800
    },
    {
      "epoch": 0.7034098570989048,
      "grad_norm": 1.5986268520355225,
      "learning_rate": 8.649802652820517e-06,
      "loss": 0.0316,
      "step": 429820
    },
    {
      "epoch": 0.7034425875375582,
      "grad_norm": 1.7521891593933105,
      "learning_rate": 8.649736760606999e-06,
      "loss": 0.0324,
      "step": 429840
    },
    {
      "epoch": 0.7034753179762115,
      "grad_norm": 2.53568434715271,
      "learning_rate": 8.649670868393483e-06,
      "loss": 0.0403,
      "step": 429860
    },
    {
      "epoch": 0.7035080484148648,
      "grad_norm": 6.74268913269043,
      "learning_rate": 8.649604976179964e-06,
      "loss": 0.0394,
      "step": 429880
    },
    {
      "epoch": 0.7035407788535182,
      "grad_norm": 0.478575199842453,
      "learning_rate": 8.649539083966448e-06,
      "loss": 0.0415,
      "step": 429900
    },
    {
      "epoch": 0.7035735092921716,
      "grad_norm": 2.25618314743042,
      "learning_rate": 8.649473191752932e-06,
      "loss": 0.0365,
      "step": 429920
    },
    {
      "epoch": 0.7036062397308249,
      "grad_norm": 0.48535433411598206,
      "learning_rate": 8.649407299539413e-06,
      "loss": 0.0349,
      "step": 429940
    },
    {
      "epoch": 0.7036389701694782,
      "grad_norm": 0.11164722591638565,
      "learning_rate": 8.649341407325897e-06,
      "loss": 0.0348,
      "step": 429960
    },
    {
      "epoch": 0.7036717006081316,
      "grad_norm": 4.365797996520996,
      "learning_rate": 8.649275515112379e-06,
      "loss": 0.0241,
      "step": 429980
    },
    {
      "epoch": 0.7037044310467849,
      "grad_norm": 0.6953298449516296,
      "learning_rate": 8.649209622898863e-06,
      "loss": 0.0317,
      "step": 430000
    },
    {
      "epoch": 0.7037371614854382,
      "grad_norm": 1.5521479845046997,
      "learning_rate": 8.649143730685346e-06,
      "loss": 0.032,
      "step": 430020
    },
    {
      "epoch": 0.7037698919240916,
      "grad_norm": 0.8992102742195129,
      "learning_rate": 8.649077838471828e-06,
      "loss": 0.0288,
      "step": 430040
    },
    {
      "epoch": 0.7038026223627449,
      "grad_norm": 1.0033233165740967,
      "learning_rate": 8.649011946258312e-06,
      "loss": 0.0378,
      "step": 430060
    },
    {
      "epoch": 0.7038353528013982,
      "grad_norm": 0.5620426535606384,
      "learning_rate": 8.648946054044795e-06,
      "loss": 0.0307,
      "step": 430080
    },
    {
      "epoch": 0.7038680832400516,
      "grad_norm": 0.1550380140542984,
      "learning_rate": 8.648880161831277e-06,
      "loss": 0.0299,
      "step": 430100
    },
    {
      "epoch": 0.7039008136787049,
      "grad_norm": 0.6146456599235535,
      "learning_rate": 8.64881426961776e-06,
      "loss": 0.0255,
      "step": 430120
    },
    {
      "epoch": 0.7039335441173583,
      "grad_norm": 0.43462976813316345,
      "learning_rate": 8.648748377404243e-06,
      "loss": 0.0306,
      "step": 430140
    },
    {
      "epoch": 0.7039662745560116,
      "grad_norm": 1.802992582321167,
      "learning_rate": 8.648682485190726e-06,
      "loss": 0.0263,
      "step": 430160
    },
    {
      "epoch": 0.703999004994665,
      "grad_norm": 2.00272536277771,
      "learning_rate": 8.648616592977208e-06,
      "loss": 0.0309,
      "step": 430180
    },
    {
      "epoch": 0.7040317354333183,
      "grad_norm": 1.7176343202590942,
      "learning_rate": 8.648550700763692e-06,
      "loss": 0.0333,
      "step": 430200
    },
    {
      "epoch": 0.7040644658719716,
      "grad_norm": 2.521162986755371,
      "learning_rate": 8.648484808550174e-06,
      "loss": 0.0335,
      "step": 430220
    },
    {
      "epoch": 0.704097196310625,
      "grad_norm": 1.1960232257843018,
      "learning_rate": 8.648418916336657e-06,
      "loss": 0.0569,
      "step": 430240
    },
    {
      "epoch": 0.7041299267492783,
      "grad_norm": 0.8319207429885864,
      "learning_rate": 8.64835302412314e-06,
      "loss": 0.0384,
      "step": 430260
    },
    {
      "epoch": 0.7041626571879316,
      "grad_norm": 2.2534284591674805,
      "learning_rate": 8.648287131909623e-06,
      "loss": 0.0415,
      "step": 430280
    },
    {
      "epoch": 0.704195387626585,
      "grad_norm": 0.5147988796234131,
      "learning_rate": 8.648221239696106e-06,
      "loss": 0.0247,
      "step": 430300
    },
    {
      "epoch": 0.7042281180652383,
      "grad_norm": 1.1527284383773804,
      "learning_rate": 8.648155347482588e-06,
      "loss": 0.0364,
      "step": 430320
    },
    {
      "epoch": 0.7042608485038917,
      "grad_norm": 1.1644409894943237,
      "learning_rate": 8.648089455269072e-06,
      "loss": 0.0428,
      "step": 430340
    },
    {
      "epoch": 0.704293578942545,
      "grad_norm": 0.4887821674346924,
      "learning_rate": 8.648023563055554e-06,
      "loss": 0.031,
      "step": 430360
    },
    {
      "epoch": 0.7043263093811983,
      "grad_norm": 0.42177602648735046,
      "learning_rate": 8.647957670842037e-06,
      "loss": 0.0232,
      "step": 430380
    },
    {
      "epoch": 0.7043590398198517,
      "grad_norm": 0.38250765204429626,
      "learning_rate": 8.64789177862852e-06,
      "loss": 0.0205,
      "step": 430400
    },
    {
      "epoch": 0.704391770258505,
      "grad_norm": 0.31777799129486084,
      "learning_rate": 8.647825886415003e-06,
      "loss": 0.0191,
      "step": 430420
    },
    {
      "epoch": 0.7044245006971583,
      "grad_norm": 0.8250542879104614,
      "learning_rate": 8.647759994201486e-06,
      "loss": 0.0259,
      "step": 430440
    },
    {
      "epoch": 0.7044572311358117,
      "grad_norm": 1.147884726524353,
      "learning_rate": 8.64769410198797e-06,
      "loss": 0.0423,
      "step": 430460
    },
    {
      "epoch": 0.704489961574465,
      "grad_norm": 1.665164589881897,
      "learning_rate": 8.647628209774452e-06,
      "loss": 0.0316,
      "step": 430480
    },
    {
      "epoch": 0.7045226920131183,
      "grad_norm": 2.1407556533813477,
      "learning_rate": 8.647562317560935e-06,
      "loss": 0.0447,
      "step": 430500
    },
    {
      "epoch": 0.7045554224517717,
      "grad_norm": 0.5876168012619019,
      "learning_rate": 8.647496425347417e-06,
      "loss": 0.0424,
      "step": 430520
    },
    {
      "epoch": 0.7045881528904251,
      "grad_norm": 1.7392069101333618,
      "learning_rate": 8.6474305331339e-06,
      "loss": 0.0314,
      "step": 430540
    },
    {
      "epoch": 0.7046208833290784,
      "grad_norm": 0.947636604309082,
      "learning_rate": 8.647364640920383e-06,
      "loss": 0.0315,
      "step": 430560
    },
    {
      "epoch": 0.7046536137677317,
      "grad_norm": 0.284612774848938,
      "learning_rate": 8.647298748706866e-06,
      "loss": 0.0261,
      "step": 430580
    },
    {
      "epoch": 0.7046863442063851,
      "grad_norm": 0.42513197660446167,
      "learning_rate": 8.64723285649335e-06,
      "loss": 0.0413,
      "step": 430600
    },
    {
      "epoch": 0.7047190746450384,
      "grad_norm": 1.1818958520889282,
      "learning_rate": 8.647166964279832e-06,
      "loss": 0.0498,
      "step": 430620
    },
    {
      "epoch": 0.7047518050836917,
      "grad_norm": 1.0297141075134277,
      "learning_rate": 8.647101072066315e-06,
      "loss": 0.0273,
      "step": 430640
    },
    {
      "epoch": 0.7047845355223451,
      "grad_norm": 0.847456693649292,
      "learning_rate": 8.647035179852797e-06,
      "loss": 0.026,
      "step": 430660
    },
    {
      "epoch": 0.7048172659609984,
      "grad_norm": 2.045924663543701,
      "learning_rate": 8.64696928763928e-06,
      "loss": 0.0282,
      "step": 430680
    },
    {
      "epoch": 0.7048499963996517,
      "grad_norm": 2.6877501010894775,
      "learning_rate": 8.646903395425763e-06,
      "loss": 0.0448,
      "step": 430700
    },
    {
      "epoch": 0.7048827268383051,
      "grad_norm": 1.024289846420288,
      "learning_rate": 8.646837503212246e-06,
      "loss": 0.0328,
      "step": 430720
    },
    {
      "epoch": 0.7049154572769585,
      "grad_norm": 0.4233803153038025,
      "learning_rate": 8.646771610998728e-06,
      "loss": 0.0387,
      "step": 430740
    },
    {
      "epoch": 0.7049481877156117,
      "grad_norm": 0.2311704307794571,
      "learning_rate": 8.646705718785212e-06,
      "loss": 0.0269,
      "step": 430760
    },
    {
      "epoch": 0.7049809181542651,
      "grad_norm": 3.0896050930023193,
      "learning_rate": 8.646639826571694e-06,
      "loss": 0.0389,
      "step": 430780
    },
    {
      "epoch": 0.7050136485929185,
      "grad_norm": 0.25357624888420105,
      "learning_rate": 8.646573934358177e-06,
      "loss": 0.0309,
      "step": 430800
    },
    {
      "epoch": 0.7050463790315717,
      "grad_norm": 0.8155985474586487,
      "learning_rate": 8.64650804214466e-06,
      "loss": 0.0464,
      "step": 430820
    },
    {
      "epoch": 0.7050791094702251,
      "grad_norm": 0.6379633545875549,
      "learning_rate": 8.646442149931143e-06,
      "loss": 0.0303,
      "step": 430840
    },
    {
      "epoch": 0.7051118399088785,
      "grad_norm": 0.23701664805412292,
      "learning_rate": 8.646376257717626e-06,
      "loss": 0.0455,
      "step": 430860
    },
    {
      "epoch": 0.7051445703475318,
      "grad_norm": 0.7874407172203064,
      "learning_rate": 8.64631036550411e-06,
      "loss": 0.0257,
      "step": 430880
    },
    {
      "epoch": 0.7051773007861851,
      "grad_norm": 0.38856709003448486,
      "learning_rate": 8.646244473290592e-06,
      "loss": 0.0397,
      "step": 430900
    },
    {
      "epoch": 0.7052100312248385,
      "grad_norm": 1.8745357990264893,
      "learning_rate": 8.646178581077075e-06,
      "loss": 0.0534,
      "step": 430920
    },
    {
      "epoch": 0.7052427616634919,
      "grad_norm": 0.6590422987937927,
      "learning_rate": 8.646112688863557e-06,
      "loss": 0.033,
      "step": 430940
    },
    {
      "epoch": 0.7052754921021451,
      "grad_norm": 0.9077172875404358,
      "learning_rate": 8.646046796650041e-06,
      "loss": 0.038,
      "step": 430960
    },
    {
      "epoch": 0.7053082225407985,
      "grad_norm": 0.5290054082870483,
      "learning_rate": 8.645980904436524e-06,
      "loss": 0.0226,
      "step": 430980
    },
    {
      "epoch": 0.7053409529794519,
      "grad_norm": 1.3099164962768555,
      "learning_rate": 8.645915012223006e-06,
      "loss": 0.0297,
      "step": 431000
    },
    {
      "epoch": 0.7053736834181051,
      "grad_norm": 1.2660722732543945,
      "learning_rate": 8.64584912000949e-06,
      "loss": 0.0415,
      "step": 431020
    },
    {
      "epoch": 0.7054064138567585,
      "grad_norm": 5.456864356994629,
      "learning_rate": 8.645783227795972e-06,
      "loss": 0.0385,
      "step": 431040
    },
    {
      "epoch": 0.7054391442954119,
      "grad_norm": 3.0669124126434326,
      "learning_rate": 8.645717335582455e-06,
      "loss": 0.0443,
      "step": 431060
    },
    {
      "epoch": 0.7054718747340651,
      "grad_norm": 0.8453729152679443,
      "learning_rate": 8.645651443368937e-06,
      "loss": 0.0261,
      "step": 431080
    },
    {
      "epoch": 0.7055046051727185,
      "grad_norm": 0.8335683345794678,
      "learning_rate": 8.645585551155421e-06,
      "loss": 0.0363,
      "step": 431100
    },
    {
      "epoch": 0.7055373356113719,
      "grad_norm": 0.6679128408432007,
      "learning_rate": 8.645519658941903e-06,
      "loss": 0.0245,
      "step": 431120
    },
    {
      "epoch": 0.7055700660500253,
      "grad_norm": 0.6744764447212219,
      "learning_rate": 8.645453766728386e-06,
      "loss": 0.0347,
      "step": 431140
    },
    {
      "epoch": 0.7056027964886785,
      "grad_norm": 0.4753393232822418,
      "learning_rate": 8.645387874514868e-06,
      "loss": 0.0301,
      "step": 431160
    },
    {
      "epoch": 0.7056355269273319,
      "grad_norm": 0.25640541315078735,
      "learning_rate": 8.645321982301352e-06,
      "loss": 0.0463,
      "step": 431180
    },
    {
      "epoch": 0.7056682573659853,
      "grad_norm": 1.1087391376495361,
      "learning_rate": 8.645256090087835e-06,
      "loss": 0.0406,
      "step": 431200
    },
    {
      "epoch": 0.7057009878046385,
      "grad_norm": 3.1142983436584473,
      "learning_rate": 8.645190197874317e-06,
      "loss": 0.0273,
      "step": 431220
    },
    {
      "epoch": 0.7057337182432919,
      "grad_norm": 0.5158907771110535,
      "learning_rate": 8.645124305660801e-06,
      "loss": 0.0256,
      "step": 431240
    },
    {
      "epoch": 0.7057664486819453,
      "grad_norm": 2.5398972034454346,
      "learning_rate": 8.645058413447284e-06,
      "loss": 0.041,
      "step": 431260
    },
    {
      "epoch": 0.7057991791205985,
      "grad_norm": 0.6582414507865906,
      "learning_rate": 8.644992521233766e-06,
      "loss": 0.0298,
      "step": 431280
    },
    {
      "epoch": 0.7058319095592519,
      "grad_norm": 1.2491921186447144,
      "learning_rate": 8.64492662902025e-06,
      "loss": 0.0243,
      "step": 431300
    },
    {
      "epoch": 0.7058646399979053,
      "grad_norm": 1.475995421409607,
      "learning_rate": 8.644860736806734e-06,
      "loss": 0.0316,
      "step": 431320
    },
    {
      "epoch": 0.7058973704365585,
      "grad_norm": 1.1420096158981323,
      "learning_rate": 8.644794844593215e-06,
      "loss": 0.0325,
      "step": 431340
    },
    {
      "epoch": 0.7059301008752119,
      "grad_norm": 3.5376787185668945,
      "learning_rate": 8.644728952379699e-06,
      "loss": 0.0379,
      "step": 431360
    },
    {
      "epoch": 0.7059628313138653,
      "grad_norm": 0.8043157458305359,
      "learning_rate": 8.644663060166181e-06,
      "loss": 0.0472,
      "step": 431380
    },
    {
      "epoch": 0.7059955617525187,
      "grad_norm": 0.5861148834228516,
      "learning_rate": 8.644597167952665e-06,
      "loss": 0.0316,
      "step": 431400
    },
    {
      "epoch": 0.7060282921911719,
      "grad_norm": 3.218506336212158,
      "learning_rate": 8.644531275739146e-06,
      "loss": 0.0373,
      "step": 431420
    },
    {
      "epoch": 0.7060610226298253,
      "grad_norm": 0.4539145231246948,
      "learning_rate": 8.64446538352563e-06,
      "loss": 0.025,
      "step": 431440
    },
    {
      "epoch": 0.7060937530684787,
      "grad_norm": 0.963677704334259,
      "learning_rate": 8.644399491312112e-06,
      "loss": 0.0303,
      "step": 431460
    },
    {
      "epoch": 0.7061264835071319,
      "grad_norm": 1.1783918142318726,
      "learning_rate": 8.644333599098595e-06,
      "loss": 0.0395,
      "step": 431480
    },
    {
      "epoch": 0.7061592139457853,
      "grad_norm": 2.2917096614837646,
      "learning_rate": 8.644267706885077e-06,
      "loss": 0.0344,
      "step": 431500
    },
    {
      "epoch": 0.7061919443844387,
      "grad_norm": 0.1828075498342514,
      "learning_rate": 8.644201814671561e-06,
      "loss": 0.0216,
      "step": 431520
    },
    {
      "epoch": 0.7062246748230919,
      "grad_norm": 3.819282054901123,
      "learning_rate": 8.644135922458043e-06,
      "loss": 0.038,
      "step": 431540
    },
    {
      "epoch": 0.7062574052617453,
      "grad_norm": 0.20502716302871704,
      "learning_rate": 8.644070030244526e-06,
      "loss": 0.0303,
      "step": 431560
    },
    {
      "epoch": 0.7062901357003987,
      "grad_norm": 3.7574567794799805,
      "learning_rate": 8.644004138031008e-06,
      "loss": 0.0489,
      "step": 431580
    },
    {
      "epoch": 0.706322866139052,
      "grad_norm": 1.451356053352356,
      "learning_rate": 8.643938245817492e-06,
      "loss": 0.0321,
      "step": 431600
    },
    {
      "epoch": 0.7063555965777053,
      "grad_norm": 0.6570009589195251,
      "learning_rate": 8.643872353603975e-06,
      "loss": 0.0392,
      "step": 431620
    },
    {
      "epoch": 0.7063883270163587,
      "grad_norm": 1.1630162000656128,
      "learning_rate": 8.643806461390457e-06,
      "loss": 0.0343,
      "step": 431640
    },
    {
      "epoch": 0.706421057455012,
      "grad_norm": 0.7775779366493225,
      "learning_rate": 8.643740569176941e-06,
      "loss": 0.0344,
      "step": 431660
    },
    {
      "epoch": 0.7064537878936653,
      "grad_norm": 1.3048114776611328,
      "learning_rate": 8.643674676963425e-06,
      "loss": 0.0214,
      "step": 431680
    },
    {
      "epoch": 0.7064865183323187,
      "grad_norm": 1.1628565788269043,
      "learning_rate": 8.643608784749906e-06,
      "loss": 0.0328,
      "step": 431700
    },
    {
      "epoch": 0.706519248770972,
      "grad_norm": 1.0178155899047852,
      "learning_rate": 8.64354289253639e-06,
      "loss": 0.0296,
      "step": 431720
    },
    {
      "epoch": 0.7065519792096253,
      "grad_norm": 0.4809451699256897,
      "learning_rate": 8.643477000322874e-06,
      "loss": 0.0408,
      "step": 431740
    },
    {
      "epoch": 0.7065847096482787,
      "grad_norm": 0.5904322862625122,
      "learning_rate": 8.643411108109356e-06,
      "loss": 0.0368,
      "step": 431760
    },
    {
      "epoch": 0.7066174400869321,
      "grad_norm": 0.8589072823524475,
      "learning_rate": 8.643345215895839e-06,
      "loss": 0.0264,
      "step": 431780
    },
    {
      "epoch": 0.7066501705255854,
      "grad_norm": 2.932378053665161,
      "learning_rate": 8.643279323682321e-06,
      "loss": 0.031,
      "step": 431800
    },
    {
      "epoch": 0.7066829009642387,
      "grad_norm": 2.610361337661743,
      "learning_rate": 8.643213431468805e-06,
      "loss": 0.0374,
      "step": 431820
    },
    {
      "epoch": 0.7067156314028921,
      "grad_norm": 0.38593998551368713,
      "learning_rate": 8.643147539255286e-06,
      "loss": 0.0349,
      "step": 431840
    },
    {
      "epoch": 0.7067483618415454,
      "grad_norm": 1.5591086149215698,
      "learning_rate": 8.64308164704177e-06,
      "loss": 0.0491,
      "step": 431860
    },
    {
      "epoch": 0.7067810922801987,
      "grad_norm": 3.732067346572876,
      "learning_rate": 8.643015754828252e-06,
      "loss": 0.0273,
      "step": 431880
    },
    {
      "epoch": 0.7068138227188521,
      "grad_norm": 0.18882180750370026,
      "learning_rate": 8.642949862614736e-06,
      "loss": 0.0286,
      "step": 431900
    },
    {
      "epoch": 0.7068465531575054,
      "grad_norm": 0.5754120349884033,
      "learning_rate": 8.642883970401217e-06,
      "loss": 0.0218,
      "step": 431920
    },
    {
      "epoch": 0.7068792835961587,
      "grad_norm": 0.7747515439987183,
      "learning_rate": 8.642818078187701e-06,
      "loss": 0.0283,
      "step": 431940
    },
    {
      "epoch": 0.7069120140348121,
      "grad_norm": 0.7368648052215576,
      "learning_rate": 8.642752185974183e-06,
      "loss": 0.0353,
      "step": 431960
    },
    {
      "epoch": 0.7069447444734654,
      "grad_norm": 0.30484941601753235,
      "learning_rate": 8.642686293760667e-06,
      "loss": 0.0304,
      "step": 431980
    },
    {
      "epoch": 0.7069774749121188,
      "grad_norm": 1.2009048461914062,
      "learning_rate": 8.64262040154715e-06,
      "loss": 0.0303,
      "step": 432000
    },
    {
      "epoch": 0.7070102053507721,
      "grad_norm": 0.5929791331291199,
      "learning_rate": 8.642554509333632e-06,
      "loss": 0.0283,
      "step": 432020
    },
    {
      "epoch": 0.7070429357894255,
      "grad_norm": 1.4125823974609375,
      "learning_rate": 8.642488617120116e-06,
      "loss": 0.0227,
      "step": 432040
    },
    {
      "epoch": 0.7070756662280788,
      "grad_norm": 1.9573907852172852,
      "learning_rate": 8.6424227249066e-06,
      "loss": 0.0227,
      "step": 432060
    },
    {
      "epoch": 0.7071083966667321,
      "grad_norm": 0.8069835305213928,
      "learning_rate": 8.642356832693081e-06,
      "loss": 0.0259,
      "step": 432080
    },
    {
      "epoch": 0.7071411271053855,
      "grad_norm": 0.9950853586196899,
      "learning_rate": 8.642290940479565e-06,
      "loss": 0.0308,
      "step": 432100
    },
    {
      "epoch": 0.7071738575440388,
      "grad_norm": 1.6215041875839233,
      "learning_rate": 8.642225048266048e-06,
      "loss": 0.0329,
      "step": 432120
    },
    {
      "epoch": 0.7072065879826921,
      "grad_norm": 0.7499647736549377,
      "learning_rate": 8.64215915605253e-06,
      "loss": 0.0366,
      "step": 432140
    },
    {
      "epoch": 0.7072393184213455,
      "grad_norm": 0.9224214553833008,
      "learning_rate": 8.642093263839014e-06,
      "loss": 0.0237,
      "step": 432160
    },
    {
      "epoch": 0.7072720488599988,
      "grad_norm": 0.6784276962280273,
      "learning_rate": 8.642027371625496e-06,
      "loss": 0.0328,
      "step": 432180
    },
    {
      "epoch": 0.7073047792986522,
      "grad_norm": 0.829086422920227,
      "learning_rate": 8.64196147941198e-06,
      "loss": 0.0369,
      "step": 432200
    },
    {
      "epoch": 0.7073375097373055,
      "grad_norm": 0.7820826768875122,
      "learning_rate": 8.641895587198461e-06,
      "loss": 0.0411,
      "step": 432220
    },
    {
      "epoch": 0.7073702401759588,
      "grad_norm": 0.7409095764160156,
      "learning_rate": 8.641829694984945e-06,
      "loss": 0.0343,
      "step": 432240
    },
    {
      "epoch": 0.7074029706146122,
      "grad_norm": 0.6332129836082458,
      "learning_rate": 8.641763802771427e-06,
      "loss": 0.0306,
      "step": 432260
    },
    {
      "epoch": 0.7074357010532655,
      "grad_norm": 0.818291187286377,
      "learning_rate": 8.64169791055791e-06,
      "loss": 0.0283,
      "step": 432280
    },
    {
      "epoch": 0.7074684314919188,
      "grad_norm": 1.7850123643875122,
      "learning_rate": 8.641632018344392e-06,
      "loss": 0.0285,
      "step": 432300
    },
    {
      "epoch": 0.7075011619305722,
      "grad_norm": 1.4167649745941162,
      "learning_rate": 8.641566126130876e-06,
      "loss": 0.0374,
      "step": 432320
    },
    {
      "epoch": 0.7075338923692255,
      "grad_norm": 2.270822763442993,
      "learning_rate": 8.641500233917358e-06,
      "loss": 0.0399,
      "step": 432340
    },
    {
      "epoch": 0.7075666228078789,
      "grad_norm": 0.3757902681827545,
      "learning_rate": 8.641434341703841e-06,
      "loss": 0.0304,
      "step": 432360
    },
    {
      "epoch": 0.7075993532465322,
      "grad_norm": 9.253623962402344,
      "learning_rate": 8.641368449490325e-06,
      "loss": 0.0358,
      "step": 432380
    },
    {
      "epoch": 0.7076320836851856,
      "grad_norm": 0.361873596906662,
      "learning_rate": 8.641302557276807e-06,
      "loss": 0.0315,
      "step": 432400
    },
    {
      "epoch": 0.7076648141238389,
      "grad_norm": 0.45179590582847595,
      "learning_rate": 8.64123666506329e-06,
      "loss": 0.0411,
      "step": 432420
    },
    {
      "epoch": 0.7076975445624922,
      "grad_norm": 0.8727725148200989,
      "learning_rate": 8.641170772849772e-06,
      "loss": 0.0314,
      "step": 432440
    },
    {
      "epoch": 0.7077302750011456,
      "grad_norm": 2.3448078632354736,
      "learning_rate": 8.641104880636256e-06,
      "loss": 0.031,
      "step": 432460
    },
    {
      "epoch": 0.7077630054397989,
      "grad_norm": 1.838645100593567,
      "learning_rate": 8.64103898842274e-06,
      "loss": 0.035,
      "step": 432480
    },
    {
      "epoch": 0.7077957358784522,
      "grad_norm": 1.312159776687622,
      "learning_rate": 8.640973096209223e-06,
      "loss": 0.0357,
      "step": 432500
    },
    {
      "epoch": 0.7078284663171056,
      "grad_norm": 0.9074667692184448,
      "learning_rate": 8.640907203995705e-06,
      "loss": 0.0337,
      "step": 432520
    },
    {
      "epoch": 0.7078611967557589,
      "grad_norm": 0.6309069395065308,
      "learning_rate": 8.640841311782188e-06,
      "loss": 0.0398,
      "step": 432540
    },
    {
      "epoch": 0.7078939271944122,
      "grad_norm": 1.392645001411438,
      "learning_rate": 8.64077541956867e-06,
      "loss": 0.0288,
      "step": 432560
    },
    {
      "epoch": 0.7079266576330656,
      "grad_norm": 2.9267776012420654,
      "learning_rate": 8.640709527355154e-06,
      "loss": 0.0281,
      "step": 432580
    },
    {
      "epoch": 0.707959388071719,
      "grad_norm": 0.8094896674156189,
      "learning_rate": 8.640643635141636e-06,
      "loss": 0.0327,
      "step": 432600
    },
    {
      "epoch": 0.7079921185103722,
      "grad_norm": 0.9478891491889954,
      "learning_rate": 8.64057774292812e-06,
      "loss": 0.0291,
      "step": 432620
    },
    {
      "epoch": 0.7080248489490256,
      "grad_norm": 2.3080391883850098,
      "learning_rate": 8.640511850714601e-06,
      "loss": 0.0334,
      "step": 432640
    },
    {
      "epoch": 0.708057579387679,
      "grad_norm": 1.0372577905654907,
      "learning_rate": 8.640445958501085e-06,
      "loss": 0.0374,
      "step": 432660
    },
    {
      "epoch": 0.7080903098263323,
      "grad_norm": 1.3907495737075806,
      "learning_rate": 8.640380066287567e-06,
      "loss": 0.0338,
      "step": 432680
    },
    {
      "epoch": 0.7081230402649856,
      "grad_norm": 0.9592860341072083,
      "learning_rate": 8.64031417407405e-06,
      "loss": 0.0375,
      "step": 432700
    },
    {
      "epoch": 0.708155770703639,
      "grad_norm": 0.48264649510383606,
      "learning_rate": 8.640248281860534e-06,
      "loss": 0.0303,
      "step": 432720
    },
    {
      "epoch": 0.7081885011422923,
      "grad_norm": 0.9657739400863647,
      "learning_rate": 8.640182389647016e-06,
      "loss": 0.0352,
      "step": 432740
    },
    {
      "epoch": 0.7082212315809456,
      "grad_norm": 1.1562769412994385,
      "learning_rate": 8.6401164974335e-06,
      "loss": 0.0334,
      "step": 432760
    },
    {
      "epoch": 0.708253962019599,
      "grad_norm": 1.5280866622924805,
      "learning_rate": 8.640050605219981e-06,
      "loss": 0.0268,
      "step": 432780
    },
    {
      "epoch": 0.7082866924582524,
      "grad_norm": 1.1809337139129639,
      "learning_rate": 8.639984713006465e-06,
      "loss": 0.0372,
      "step": 432800
    },
    {
      "epoch": 0.7083194228969056,
      "grad_norm": 1.6150070428848267,
      "learning_rate": 8.639918820792947e-06,
      "loss": 0.0393,
      "step": 432820
    },
    {
      "epoch": 0.708352153335559,
      "grad_norm": 1.4990588426589966,
      "learning_rate": 8.63985292857943e-06,
      "loss": 0.0282,
      "step": 432840
    },
    {
      "epoch": 0.7083848837742124,
      "grad_norm": 1.7081201076507568,
      "learning_rate": 8.639787036365914e-06,
      "loss": 0.0253,
      "step": 432860
    },
    {
      "epoch": 0.7084176142128656,
      "grad_norm": 3.7100636959075928,
      "learning_rate": 8.639721144152396e-06,
      "loss": 0.0461,
      "step": 432880
    },
    {
      "epoch": 0.708450344651519,
      "grad_norm": 0.5810356140136719,
      "learning_rate": 8.63965525193888e-06,
      "loss": 0.0315,
      "step": 432900
    },
    {
      "epoch": 0.7084830750901724,
      "grad_norm": 1.1419901847839355,
      "learning_rate": 8.639589359725363e-06,
      "loss": 0.043,
      "step": 432920
    },
    {
      "epoch": 0.7085158055288256,
      "grad_norm": 0.8204205632209778,
      "learning_rate": 8.639523467511845e-06,
      "loss": 0.0499,
      "step": 432940
    },
    {
      "epoch": 0.708548535967479,
      "grad_norm": 1.0166664123535156,
      "learning_rate": 8.639457575298328e-06,
      "loss": 0.0352,
      "step": 432960
    },
    {
      "epoch": 0.7085812664061324,
      "grad_norm": 0.9696234464645386,
      "learning_rate": 8.63939168308481e-06,
      "loss": 0.0345,
      "step": 432980
    },
    {
      "epoch": 0.7086139968447858,
      "grad_norm": 0.49149611592292786,
      "learning_rate": 8.639325790871294e-06,
      "loss": 0.0265,
      "step": 433000
    },
    {
      "epoch": 0.708646727283439,
      "grad_norm": 0.8002340793609619,
      "learning_rate": 8.639259898657776e-06,
      "loss": 0.0344,
      "step": 433020
    },
    {
      "epoch": 0.7086794577220924,
      "grad_norm": 1.6891734600067139,
      "learning_rate": 8.63919400644426e-06,
      "loss": 0.0305,
      "step": 433040
    },
    {
      "epoch": 0.7087121881607458,
      "grad_norm": 1.0589354038238525,
      "learning_rate": 8.639128114230743e-06,
      "loss": 0.0451,
      "step": 433060
    },
    {
      "epoch": 0.708744918599399,
      "grad_norm": 2.122715711593628,
      "learning_rate": 8.639062222017225e-06,
      "loss": 0.0465,
      "step": 433080
    },
    {
      "epoch": 0.7087776490380524,
      "grad_norm": 3.6118502616882324,
      "learning_rate": 8.638996329803708e-06,
      "loss": 0.0385,
      "step": 433100
    },
    {
      "epoch": 0.7088103794767058,
      "grad_norm": 2.261845350265503,
      "learning_rate": 8.63893043759019e-06,
      "loss": 0.0198,
      "step": 433120
    },
    {
      "epoch": 0.708843109915359,
      "grad_norm": 0.21814562380313873,
      "learning_rate": 8.638864545376674e-06,
      "loss": 0.0305,
      "step": 433140
    },
    {
      "epoch": 0.7088758403540124,
      "grad_norm": 1.780013918876648,
      "learning_rate": 8.638798653163156e-06,
      "loss": 0.0416,
      "step": 433160
    },
    {
      "epoch": 0.7089085707926658,
      "grad_norm": 3.040178060531616,
      "learning_rate": 8.63873276094964e-06,
      "loss": 0.0381,
      "step": 433180
    },
    {
      "epoch": 0.7089413012313192,
      "grad_norm": 0.9206512570381165,
      "learning_rate": 8.638666868736121e-06,
      "loss": 0.0414,
      "step": 433200
    },
    {
      "epoch": 0.7089740316699724,
      "grad_norm": 1.0602222681045532,
      "learning_rate": 8.638600976522605e-06,
      "loss": 0.0462,
      "step": 433220
    },
    {
      "epoch": 0.7090067621086258,
      "grad_norm": 0.6011135578155518,
      "learning_rate": 8.638535084309088e-06,
      "loss": 0.0272,
      "step": 433240
    },
    {
      "epoch": 0.7090394925472792,
      "grad_norm": 1.4423036575317383,
      "learning_rate": 8.63846919209557e-06,
      "loss": 0.0253,
      "step": 433260
    },
    {
      "epoch": 0.7090722229859324,
      "grad_norm": 0.8461987972259521,
      "learning_rate": 8.638403299882054e-06,
      "loss": 0.0322,
      "step": 433280
    },
    {
      "epoch": 0.7091049534245858,
      "grad_norm": 0.6371451020240784,
      "learning_rate": 8.638337407668537e-06,
      "loss": 0.0353,
      "step": 433300
    },
    {
      "epoch": 0.7091376838632392,
      "grad_norm": 0.32491010427474976,
      "learning_rate": 8.63827151545502e-06,
      "loss": 0.031,
      "step": 433320
    },
    {
      "epoch": 0.7091704143018924,
      "grad_norm": 1.835917353630066,
      "learning_rate": 8.638205623241503e-06,
      "loss": 0.0365,
      "step": 433340
    },
    {
      "epoch": 0.7092031447405458,
      "grad_norm": 0.3350459635257721,
      "learning_rate": 8.638139731027985e-06,
      "loss": 0.0305,
      "step": 433360
    },
    {
      "epoch": 0.7092358751791992,
      "grad_norm": 1.3189045190811157,
      "learning_rate": 8.638073838814468e-06,
      "loss": 0.031,
      "step": 433380
    },
    {
      "epoch": 0.7092686056178525,
      "grad_norm": 1.0624186992645264,
      "learning_rate": 8.63800794660095e-06,
      "loss": 0.0276,
      "step": 433400
    },
    {
      "epoch": 0.7093013360565058,
      "grad_norm": 0.9956899285316467,
      "learning_rate": 8.637942054387434e-06,
      "loss": 0.0359,
      "step": 433420
    },
    {
      "epoch": 0.7093340664951592,
      "grad_norm": 1.5940666198730469,
      "learning_rate": 8.637876162173918e-06,
      "loss": 0.0272,
      "step": 433440
    },
    {
      "epoch": 0.7093667969338125,
      "grad_norm": 0.38309791684150696,
      "learning_rate": 8.6378102699604e-06,
      "loss": 0.0342,
      "step": 433460
    },
    {
      "epoch": 0.7093995273724658,
      "grad_norm": 0.540831446647644,
      "learning_rate": 8.637744377746883e-06,
      "loss": 0.0409,
      "step": 433480
    },
    {
      "epoch": 0.7094322578111192,
      "grad_norm": 0.7381660342216492,
      "learning_rate": 8.637678485533365e-06,
      "loss": 0.0302,
      "step": 433500
    },
    {
      "epoch": 0.7094649882497726,
      "grad_norm": 0.7117054462432861,
      "learning_rate": 8.637612593319848e-06,
      "loss": 0.0184,
      "step": 433520
    },
    {
      "epoch": 0.7094977186884258,
      "grad_norm": 1.2075368165969849,
      "learning_rate": 8.63754670110633e-06,
      "loss": 0.0423,
      "step": 433540
    },
    {
      "epoch": 0.7095304491270792,
      "grad_norm": 0.41399267315864563,
      "learning_rate": 8.637480808892814e-06,
      "loss": 0.0279,
      "step": 433560
    },
    {
      "epoch": 0.7095631795657326,
      "grad_norm": 0.42346933484077454,
      "learning_rate": 8.637414916679296e-06,
      "loss": 0.0357,
      "step": 433580
    },
    {
      "epoch": 0.7095959100043859,
      "grad_norm": 0.19543848931789398,
      "learning_rate": 8.63734902446578e-06,
      "loss": 0.0291,
      "step": 433600
    },
    {
      "epoch": 0.7096286404430392,
      "grad_norm": 0.7611007690429688,
      "learning_rate": 8.637283132252261e-06,
      "loss": 0.031,
      "step": 433620
    },
    {
      "epoch": 0.7096613708816926,
      "grad_norm": 0.20214655995368958,
      "learning_rate": 8.637217240038745e-06,
      "loss": 0.0298,
      "step": 433640
    },
    {
      "epoch": 0.7096941013203459,
      "grad_norm": 1.3547465801239014,
      "learning_rate": 8.637151347825229e-06,
      "loss": 0.034,
      "step": 433660
    },
    {
      "epoch": 0.7097268317589992,
      "grad_norm": 0.5114492177963257,
      "learning_rate": 8.63708545561171e-06,
      "loss": 0.0313,
      "step": 433680
    },
    {
      "epoch": 0.7097595621976526,
      "grad_norm": 0.3785838186740875,
      "learning_rate": 8.637019563398194e-06,
      "loss": 0.025,
      "step": 433700
    },
    {
      "epoch": 0.7097922926363059,
      "grad_norm": 1.842244029045105,
      "learning_rate": 8.636953671184678e-06,
      "loss": 0.0414,
      "step": 433720
    },
    {
      "epoch": 0.7098250230749592,
      "grad_norm": 1.3234713077545166,
      "learning_rate": 8.63688777897116e-06,
      "loss": 0.0366,
      "step": 433740
    },
    {
      "epoch": 0.7098577535136126,
      "grad_norm": 0.4920181632041931,
      "learning_rate": 8.636821886757643e-06,
      "loss": 0.0323,
      "step": 433760
    },
    {
      "epoch": 0.709890483952266,
      "grad_norm": 1.3290975093841553,
      "learning_rate": 8.636755994544127e-06,
      "loss": 0.0392,
      "step": 433780
    },
    {
      "epoch": 0.7099232143909193,
      "grad_norm": 3.3096323013305664,
      "learning_rate": 8.636690102330609e-06,
      "loss": 0.0328,
      "step": 433800
    },
    {
      "epoch": 0.7099559448295726,
      "grad_norm": 1.2057383060455322,
      "learning_rate": 8.636624210117092e-06,
      "loss": 0.028,
      "step": 433820
    },
    {
      "epoch": 0.709988675268226,
      "grad_norm": 1.2598869800567627,
      "learning_rate": 8.636558317903574e-06,
      "loss": 0.0322,
      "step": 433840
    },
    {
      "epoch": 0.7100214057068793,
      "grad_norm": 1.3612736463546753,
      "learning_rate": 8.636492425690058e-06,
      "loss": 0.0418,
      "step": 433860
    },
    {
      "epoch": 0.7100541361455326,
      "grad_norm": 1.762370228767395,
      "learning_rate": 8.63642653347654e-06,
      "loss": 0.0298,
      "step": 433880
    },
    {
      "epoch": 0.710086866584186,
      "grad_norm": 2.520596742630005,
      "learning_rate": 8.636360641263023e-06,
      "loss": 0.0326,
      "step": 433900
    },
    {
      "epoch": 0.7101195970228393,
      "grad_norm": 0.6370563507080078,
      "learning_rate": 8.636294749049505e-06,
      "loss": 0.0478,
      "step": 433920
    },
    {
      "epoch": 0.7101523274614926,
      "grad_norm": 0.6416510343551636,
      "learning_rate": 8.636228856835989e-06,
      "loss": 0.0268,
      "step": 433940
    },
    {
      "epoch": 0.710185057900146,
      "grad_norm": 0.1550411880016327,
      "learning_rate": 8.63616296462247e-06,
      "loss": 0.0251,
      "step": 433960
    },
    {
      "epoch": 0.7102177883387993,
      "grad_norm": 2.0061943531036377,
      "learning_rate": 8.636097072408954e-06,
      "loss": 0.0354,
      "step": 433980
    },
    {
      "epoch": 0.7102505187774527,
      "grad_norm": 0.7600393891334534,
      "learning_rate": 8.636031180195436e-06,
      "loss": 0.0293,
      "step": 434000
    },
    {
      "epoch": 0.710283249216106,
      "grad_norm": 1.047961950302124,
      "learning_rate": 8.63596528798192e-06,
      "loss": 0.0244,
      "step": 434020
    },
    {
      "epoch": 0.7103159796547593,
      "grad_norm": 1.9455639123916626,
      "learning_rate": 8.635899395768403e-06,
      "loss": 0.0323,
      "step": 434040
    },
    {
      "epoch": 0.7103487100934127,
      "grad_norm": 0.8271419405937195,
      "learning_rate": 8.635833503554885e-06,
      "loss": 0.0326,
      "step": 434060
    },
    {
      "epoch": 0.710381440532066,
      "grad_norm": 1.5063148736953735,
      "learning_rate": 8.635767611341369e-06,
      "loss": 0.0262,
      "step": 434080
    },
    {
      "epoch": 0.7104141709707193,
      "grad_norm": 0.28059878945350647,
      "learning_rate": 8.635701719127852e-06,
      "loss": 0.0299,
      "step": 434100
    },
    {
      "epoch": 0.7104469014093727,
      "grad_norm": 0.46539127826690674,
      "learning_rate": 8.635635826914334e-06,
      "loss": 0.0306,
      "step": 434120
    },
    {
      "epoch": 0.710479631848026,
      "grad_norm": 1.3586139678955078,
      "learning_rate": 8.635569934700818e-06,
      "loss": 0.0379,
      "step": 434140
    },
    {
      "epoch": 0.7105123622866794,
      "grad_norm": 0.35606059432029724,
      "learning_rate": 8.635504042487301e-06,
      "loss": 0.0284,
      "step": 434160
    },
    {
      "epoch": 0.7105450927253327,
      "grad_norm": 0.46485674381256104,
      "learning_rate": 8.635438150273783e-06,
      "loss": 0.0407,
      "step": 434180
    },
    {
      "epoch": 0.710577823163986,
      "grad_norm": 1.237014889717102,
      "learning_rate": 8.635372258060267e-06,
      "loss": 0.0258,
      "step": 434200
    },
    {
      "epoch": 0.7106105536026394,
      "grad_norm": 1.3418874740600586,
      "learning_rate": 8.635306365846749e-06,
      "loss": 0.0365,
      "step": 434220
    },
    {
      "epoch": 0.7106432840412927,
      "grad_norm": 0.4465940594673157,
      "learning_rate": 8.635240473633232e-06,
      "loss": 0.025,
      "step": 434240
    },
    {
      "epoch": 0.7106760144799461,
      "grad_norm": 0.6628472208976746,
      "learning_rate": 8.635174581419714e-06,
      "loss": 0.0456,
      "step": 434260
    },
    {
      "epoch": 0.7107087449185994,
      "grad_norm": 0.8977752327919006,
      "learning_rate": 8.635108689206198e-06,
      "loss": 0.0324,
      "step": 434280
    },
    {
      "epoch": 0.7107414753572527,
      "grad_norm": 1.8170397281646729,
      "learning_rate": 8.63504279699268e-06,
      "loss": 0.0468,
      "step": 434300
    },
    {
      "epoch": 0.7107742057959061,
      "grad_norm": 0.3251532316207886,
      "learning_rate": 8.634976904779163e-06,
      "loss": 0.0314,
      "step": 434320
    },
    {
      "epoch": 0.7108069362345594,
      "grad_norm": 0.49300089478492737,
      "learning_rate": 8.634911012565645e-06,
      "loss": 0.0479,
      "step": 434340
    },
    {
      "epoch": 0.7108396666732127,
      "grad_norm": 0.7201105356216431,
      "learning_rate": 8.634845120352129e-06,
      "loss": 0.0349,
      "step": 434360
    },
    {
      "epoch": 0.7108723971118661,
      "grad_norm": 0.43947353959083557,
      "learning_rate": 8.63477922813861e-06,
      "loss": 0.0511,
      "step": 434380
    },
    {
      "epoch": 0.7109051275505194,
      "grad_norm": 1.173927903175354,
      "learning_rate": 8.634713335925094e-06,
      "loss": 0.046,
      "step": 434400
    },
    {
      "epoch": 0.7109378579891728,
      "grad_norm": 1.2900390625,
      "learning_rate": 8.634647443711576e-06,
      "loss": 0.0263,
      "step": 434420
    },
    {
      "epoch": 0.7109705884278261,
      "grad_norm": 2.10174822807312,
      "learning_rate": 8.63458155149806e-06,
      "loss": 0.0346,
      "step": 434440
    },
    {
      "epoch": 0.7110033188664795,
      "grad_norm": 0.4111775755882263,
      "learning_rate": 8.634515659284543e-06,
      "loss": 0.0278,
      "step": 434460
    },
    {
      "epoch": 0.7110360493051328,
      "grad_norm": 1.2333317995071411,
      "learning_rate": 8.634449767071025e-06,
      "loss": 0.0311,
      "step": 434480
    },
    {
      "epoch": 0.7110687797437861,
      "grad_norm": 1.770790696144104,
      "learning_rate": 8.634383874857509e-06,
      "loss": 0.0343,
      "step": 434500
    },
    {
      "epoch": 0.7111015101824395,
      "grad_norm": 0.9705379605293274,
      "learning_rate": 8.634317982643992e-06,
      "loss": 0.0413,
      "step": 434520
    },
    {
      "epoch": 0.7111342406210928,
      "grad_norm": 0.7255191206932068,
      "learning_rate": 8.634252090430474e-06,
      "loss": 0.0393,
      "step": 434540
    },
    {
      "epoch": 0.7111669710597461,
      "grad_norm": 5.579419136047363,
      "learning_rate": 8.634186198216958e-06,
      "loss": 0.0275,
      "step": 434560
    },
    {
      "epoch": 0.7111997014983995,
      "grad_norm": 1.3894381523132324,
      "learning_rate": 8.634120306003441e-06,
      "loss": 0.0383,
      "step": 434580
    },
    {
      "epoch": 0.7112324319370528,
      "grad_norm": 0.37670502066612244,
      "learning_rate": 8.634054413789923e-06,
      "loss": 0.0312,
      "step": 434600
    },
    {
      "epoch": 0.7112651623757061,
      "grad_norm": 0.7848103046417236,
      "learning_rate": 8.633988521576407e-06,
      "loss": 0.0301,
      "step": 434620
    },
    {
      "epoch": 0.7112978928143595,
      "grad_norm": 1.1039973497390747,
      "learning_rate": 8.633922629362889e-06,
      "loss": 0.037,
      "step": 434640
    },
    {
      "epoch": 0.7113306232530129,
      "grad_norm": 6.063847064971924,
      "learning_rate": 8.633856737149372e-06,
      "loss": 0.034,
      "step": 434660
    },
    {
      "epoch": 0.7113633536916661,
      "grad_norm": 0.6692053079605103,
      "learning_rate": 8.633790844935854e-06,
      "loss": 0.0398,
      "step": 434680
    },
    {
      "epoch": 0.7113960841303195,
      "grad_norm": 0.4483826756477356,
      "learning_rate": 8.633724952722338e-06,
      "loss": 0.041,
      "step": 434700
    },
    {
      "epoch": 0.7114288145689729,
      "grad_norm": 0.4724951684474945,
      "learning_rate": 8.63365906050882e-06,
      "loss": 0.0294,
      "step": 434720
    },
    {
      "epoch": 0.7114615450076262,
      "grad_norm": 1.411628007888794,
      "learning_rate": 8.633593168295303e-06,
      "loss": 0.0268,
      "step": 434740
    },
    {
      "epoch": 0.7114942754462795,
      "grad_norm": 0.8220858573913574,
      "learning_rate": 8.633527276081785e-06,
      "loss": 0.0331,
      "step": 434760
    },
    {
      "epoch": 0.7115270058849329,
      "grad_norm": 0.8841573596000671,
      "learning_rate": 8.633461383868269e-06,
      "loss": 0.0273,
      "step": 434780
    },
    {
      "epoch": 0.7115597363235862,
      "grad_norm": 0.952672004699707,
      "learning_rate": 8.63339549165475e-06,
      "loss": 0.0353,
      "step": 434800
    },
    {
      "epoch": 0.7115924667622395,
      "grad_norm": 0.32127267122268677,
      "learning_rate": 8.633329599441234e-06,
      "loss": 0.0315,
      "step": 434820
    },
    {
      "epoch": 0.7116251972008929,
      "grad_norm": 0.7742432355880737,
      "learning_rate": 8.633263707227718e-06,
      "loss": 0.0344,
      "step": 434840
    },
    {
      "epoch": 0.7116579276395463,
      "grad_norm": 0.7028393745422363,
      "learning_rate": 8.6331978150142e-06,
      "loss": 0.0314,
      "step": 434860
    },
    {
      "epoch": 0.7116906580781995,
      "grad_norm": 3.0162010192871094,
      "learning_rate": 8.633131922800683e-06,
      "loss": 0.0492,
      "step": 434880
    },
    {
      "epoch": 0.7117233885168529,
      "grad_norm": 2.2007575035095215,
      "learning_rate": 8.633066030587167e-06,
      "loss": 0.0341,
      "step": 434900
    },
    {
      "epoch": 0.7117561189555063,
      "grad_norm": 2.4695937633514404,
      "learning_rate": 8.633000138373649e-06,
      "loss": 0.0328,
      "step": 434920
    },
    {
      "epoch": 0.7117888493941595,
      "grad_norm": 0.5457942485809326,
      "learning_rate": 8.632934246160132e-06,
      "loss": 0.0365,
      "step": 434940
    },
    {
      "epoch": 0.7118215798328129,
      "grad_norm": 7.1083478927612305,
      "learning_rate": 8.632868353946616e-06,
      "loss": 0.0279,
      "step": 434960
    },
    {
      "epoch": 0.7118543102714663,
      "grad_norm": 0.8894851207733154,
      "learning_rate": 8.632802461733098e-06,
      "loss": 0.0353,
      "step": 434980
    },
    {
      "epoch": 0.7118870407101195,
      "grad_norm": 0.6894447207450867,
      "learning_rate": 8.632736569519581e-06,
      "loss": 0.0247,
      "step": 435000
    },
    {
      "epoch": 0.7119197711487729,
      "grad_norm": 1.0567883253097534,
      "learning_rate": 8.632670677306063e-06,
      "loss": 0.0358,
      "step": 435020
    },
    {
      "epoch": 0.7119525015874263,
      "grad_norm": 0.5643060803413391,
      "learning_rate": 8.632604785092547e-06,
      "loss": 0.0289,
      "step": 435040
    },
    {
      "epoch": 0.7119852320260797,
      "grad_norm": 7.088087558746338,
      "learning_rate": 8.632538892879029e-06,
      "loss": 0.0389,
      "step": 435060
    },
    {
      "epoch": 0.7120179624647329,
      "grad_norm": 0.8087299466133118,
      "learning_rate": 8.632473000665512e-06,
      "loss": 0.0366,
      "step": 435080
    },
    {
      "epoch": 0.7120506929033863,
      "grad_norm": 0.8043521642684937,
      "learning_rate": 8.632407108451994e-06,
      "loss": 0.0357,
      "step": 435100
    },
    {
      "epoch": 0.7120834233420397,
      "grad_norm": 0.5489389300346375,
      "learning_rate": 8.632341216238478e-06,
      "loss": 0.0319,
      "step": 435120
    },
    {
      "epoch": 0.7121161537806929,
      "grad_norm": 1.3145588636398315,
      "learning_rate": 8.63227532402496e-06,
      "loss": 0.0362,
      "step": 435140
    },
    {
      "epoch": 0.7121488842193463,
      "grad_norm": 1.07783842086792,
      "learning_rate": 8.632209431811443e-06,
      "loss": 0.0301,
      "step": 435160
    },
    {
      "epoch": 0.7121816146579997,
      "grad_norm": 0.6329647898674011,
      "learning_rate": 8.632143539597927e-06,
      "loss": 0.0286,
      "step": 435180
    },
    {
      "epoch": 0.7122143450966529,
      "grad_norm": 0.9760320782661438,
      "learning_rate": 8.632077647384409e-06,
      "loss": 0.035,
      "step": 435200
    },
    {
      "epoch": 0.7122470755353063,
      "grad_norm": 0.17298531532287598,
      "learning_rate": 8.632011755170892e-06,
      "loss": 0.035,
      "step": 435220
    },
    {
      "epoch": 0.7122798059739597,
      "grad_norm": 1.073266625404358,
      "learning_rate": 8.631945862957374e-06,
      "loss": 0.0401,
      "step": 435240
    },
    {
      "epoch": 0.712312536412613,
      "grad_norm": 0.6245303750038147,
      "learning_rate": 8.631879970743858e-06,
      "loss": 0.0355,
      "step": 435260
    },
    {
      "epoch": 0.7123452668512663,
      "grad_norm": 0.7490305304527283,
      "learning_rate": 8.631814078530341e-06,
      "loss": 0.0377,
      "step": 435280
    },
    {
      "epoch": 0.7123779972899197,
      "grad_norm": 0.6485514044761658,
      "learning_rate": 8.631748186316823e-06,
      "loss": 0.0357,
      "step": 435300
    },
    {
      "epoch": 0.7124107277285731,
      "grad_norm": 0.4870043098926544,
      "learning_rate": 8.631682294103307e-06,
      "loss": 0.029,
      "step": 435320
    },
    {
      "epoch": 0.7124434581672263,
      "grad_norm": 1.8048819303512573,
      "learning_rate": 8.63161640188979e-06,
      "loss": 0.0376,
      "step": 435340
    },
    {
      "epoch": 0.7124761886058797,
      "grad_norm": 0.5488177537918091,
      "learning_rate": 8.631550509676272e-06,
      "loss": 0.0461,
      "step": 435360
    },
    {
      "epoch": 0.7125089190445331,
      "grad_norm": 0.9105782508850098,
      "learning_rate": 8.631484617462756e-06,
      "loss": 0.0351,
      "step": 435380
    },
    {
      "epoch": 0.7125416494831863,
      "grad_norm": 0.6839506030082703,
      "learning_rate": 8.631418725249238e-06,
      "loss": 0.028,
      "step": 435400
    },
    {
      "epoch": 0.7125743799218397,
      "grad_norm": 0.16788658499717712,
      "learning_rate": 8.631352833035721e-06,
      "loss": 0.0287,
      "step": 435420
    },
    {
      "epoch": 0.7126071103604931,
      "grad_norm": 0.4721514880657196,
      "learning_rate": 8.631286940822203e-06,
      "loss": 0.0332,
      "step": 435440
    },
    {
      "epoch": 0.7126398407991464,
      "grad_norm": 0.6556268334388733,
      "learning_rate": 8.631221048608687e-06,
      "loss": 0.0293,
      "step": 435460
    },
    {
      "epoch": 0.7126725712377997,
      "grad_norm": 0.713911771774292,
      "learning_rate": 8.631155156395169e-06,
      "loss": 0.0323,
      "step": 435480
    },
    {
      "epoch": 0.7127053016764531,
      "grad_norm": 1.030672550201416,
      "learning_rate": 8.631089264181652e-06,
      "loss": 0.0361,
      "step": 435500
    },
    {
      "epoch": 0.7127380321151064,
      "grad_norm": 0.6967881321907043,
      "learning_rate": 8.631023371968136e-06,
      "loss": 0.0356,
      "step": 435520
    },
    {
      "epoch": 0.7127707625537597,
      "grad_norm": 2.187379837036133,
      "learning_rate": 8.630957479754618e-06,
      "loss": 0.0362,
      "step": 435540
    },
    {
      "epoch": 0.7128034929924131,
      "grad_norm": 1.4903422594070435,
      "learning_rate": 8.630891587541101e-06,
      "loss": 0.04,
      "step": 435560
    },
    {
      "epoch": 0.7128362234310665,
      "grad_norm": 2.681253671646118,
      "learning_rate": 8.630825695327583e-06,
      "loss": 0.0443,
      "step": 435580
    },
    {
      "epoch": 0.7128689538697197,
      "grad_norm": 0.3740295469760895,
      "learning_rate": 8.630759803114067e-06,
      "loss": 0.0285,
      "step": 435600
    },
    {
      "epoch": 0.7129016843083731,
      "grad_norm": 0.8094494938850403,
      "learning_rate": 8.630693910900549e-06,
      "loss": 0.0303,
      "step": 435620
    },
    {
      "epoch": 0.7129344147470265,
      "grad_norm": 0.8687707185745239,
      "learning_rate": 8.630628018687032e-06,
      "loss": 0.0379,
      "step": 435640
    },
    {
      "epoch": 0.7129671451856798,
      "grad_norm": 0.8904479742050171,
      "learning_rate": 8.630562126473514e-06,
      "loss": 0.0251,
      "step": 435660
    },
    {
      "epoch": 0.7129998756243331,
      "grad_norm": 2.082761764526367,
      "learning_rate": 8.630496234259998e-06,
      "loss": 0.0295,
      "step": 435680
    },
    {
      "epoch": 0.7130326060629865,
      "grad_norm": 0.6468081474304199,
      "learning_rate": 8.630430342046482e-06,
      "loss": 0.025,
      "step": 435700
    },
    {
      "epoch": 0.7130653365016398,
      "grad_norm": 1.6987897157669067,
      "learning_rate": 8.630364449832963e-06,
      "loss": 0.037,
      "step": 435720
    },
    {
      "epoch": 0.7130980669402931,
      "grad_norm": 1.6706849336624146,
      "learning_rate": 8.630298557619447e-06,
      "loss": 0.028,
      "step": 435740
    },
    {
      "epoch": 0.7131307973789465,
      "grad_norm": 0.49776485562324524,
      "learning_rate": 8.63023266540593e-06,
      "loss": 0.0416,
      "step": 435760
    },
    {
      "epoch": 0.7131635278175998,
      "grad_norm": 1.520764946937561,
      "learning_rate": 8.630166773192412e-06,
      "loss": 0.0198,
      "step": 435780
    },
    {
      "epoch": 0.7131962582562531,
      "grad_norm": 2.7234768867492676,
      "learning_rate": 8.630100880978896e-06,
      "loss": 0.0361,
      "step": 435800
    },
    {
      "epoch": 0.7132289886949065,
      "grad_norm": 1.1852456331253052,
      "learning_rate": 8.630034988765378e-06,
      "loss": 0.0319,
      "step": 435820
    },
    {
      "epoch": 0.7132617191335598,
      "grad_norm": 0.8911092281341553,
      "learning_rate": 8.629969096551862e-06,
      "loss": 0.0352,
      "step": 435840
    },
    {
      "epoch": 0.7132944495722132,
      "grad_norm": 0.29561445116996765,
      "learning_rate": 8.629903204338343e-06,
      "loss": 0.0265,
      "step": 435860
    },
    {
      "epoch": 0.7133271800108665,
      "grad_norm": 0.3697361648082733,
      "learning_rate": 8.629837312124827e-06,
      "loss": 0.0209,
      "step": 435880
    },
    {
      "epoch": 0.7133599104495199,
      "grad_norm": 1.4916019439697266,
      "learning_rate": 8.62977141991131e-06,
      "loss": 0.0386,
      "step": 435900
    },
    {
      "epoch": 0.7133926408881732,
      "grad_norm": 0.7759361863136292,
      "learning_rate": 8.629705527697792e-06,
      "loss": 0.0362,
      "step": 435920
    },
    {
      "epoch": 0.7134253713268265,
      "grad_norm": 0.9121776819229126,
      "learning_rate": 8.629639635484276e-06,
      "loss": 0.0388,
      "step": 435940
    },
    {
      "epoch": 0.7134581017654799,
      "grad_norm": 0.9200454950332642,
      "learning_rate": 8.629573743270758e-06,
      "loss": 0.0444,
      "step": 435960
    },
    {
      "epoch": 0.7134908322041332,
      "grad_norm": 1.0259783267974854,
      "learning_rate": 8.629507851057242e-06,
      "loss": 0.0222,
      "step": 435980
    },
    {
      "epoch": 0.7135235626427865,
      "grad_norm": 1.908597707748413,
      "learning_rate": 8.629441958843723e-06,
      "loss": 0.032,
      "step": 436000
    },
    {
      "epoch": 0.7135562930814399,
      "grad_norm": 1.2112033367156982,
      "learning_rate": 8.629376066630207e-06,
      "loss": 0.038,
      "step": 436020
    },
    {
      "epoch": 0.7135890235200932,
      "grad_norm": 1.1806254386901855,
      "learning_rate": 8.629310174416689e-06,
      "loss": 0.0387,
      "step": 436040
    },
    {
      "epoch": 0.7136217539587466,
      "grad_norm": 1.2249468564987183,
      "learning_rate": 8.629244282203173e-06,
      "loss": 0.0349,
      "step": 436060
    },
    {
      "epoch": 0.7136544843973999,
      "grad_norm": 0.715299129486084,
      "learning_rate": 8.629178389989656e-06,
      "loss": 0.0346,
      "step": 436080
    },
    {
      "epoch": 0.7136872148360532,
      "grad_norm": 2.413008213043213,
      "learning_rate": 8.629112497776138e-06,
      "loss": 0.0393,
      "step": 436100
    },
    {
      "epoch": 0.7137199452747066,
      "grad_norm": 1.5774235725402832,
      "learning_rate": 8.629046605562622e-06,
      "loss": 0.0348,
      "step": 436120
    },
    {
      "epoch": 0.7137526757133599,
      "grad_norm": 0.5645303130149841,
      "learning_rate": 8.628980713349105e-06,
      "loss": 0.0323,
      "step": 436140
    },
    {
      "epoch": 0.7137854061520132,
      "grad_norm": 0.5404351949691772,
      "learning_rate": 8.628914821135587e-06,
      "loss": 0.0364,
      "step": 436160
    },
    {
      "epoch": 0.7138181365906666,
      "grad_norm": 5.462588310241699,
      "learning_rate": 8.62884892892207e-06,
      "loss": 0.0459,
      "step": 436180
    },
    {
      "epoch": 0.7138508670293199,
      "grad_norm": 2.8406789302825928,
      "learning_rate": 8.628783036708553e-06,
      "loss": 0.0275,
      "step": 436200
    },
    {
      "epoch": 0.7138835974679733,
      "grad_norm": 0.660350501537323,
      "learning_rate": 8.628717144495036e-06,
      "loss": 0.0314,
      "step": 436220
    },
    {
      "epoch": 0.7139163279066266,
      "grad_norm": 0.6228073835372925,
      "learning_rate": 8.62865125228152e-06,
      "loss": 0.0259,
      "step": 436240
    },
    {
      "epoch": 0.71394905834528,
      "grad_norm": 0.2855496108531952,
      "learning_rate": 8.628585360068002e-06,
      "loss": 0.034,
      "step": 436260
    },
    {
      "epoch": 0.7139817887839333,
      "grad_norm": 5.637582302093506,
      "learning_rate": 8.628519467854485e-06,
      "loss": 0.0245,
      "step": 436280
    },
    {
      "epoch": 0.7140145192225866,
      "grad_norm": 1.089478850364685,
      "learning_rate": 8.628453575640967e-06,
      "loss": 0.0449,
      "step": 436300
    },
    {
      "epoch": 0.71404724966124,
      "grad_norm": 0.4756245017051697,
      "learning_rate": 8.62838768342745e-06,
      "loss": 0.0308,
      "step": 436320
    },
    {
      "epoch": 0.7140799800998933,
      "grad_norm": 1.8722753524780273,
      "learning_rate": 8.628321791213933e-06,
      "loss": 0.038,
      "step": 436340
    },
    {
      "epoch": 0.7141127105385466,
      "grad_norm": 1.3266369104385376,
      "learning_rate": 8.628255899000416e-06,
      "loss": 0.0283,
      "step": 436360
    },
    {
      "epoch": 0.7141454409772,
      "grad_norm": 0.42956361174583435,
      "learning_rate": 8.628190006786898e-06,
      "loss": 0.037,
      "step": 436380
    },
    {
      "epoch": 0.7141781714158533,
      "grad_norm": 0.5654430985450745,
      "learning_rate": 8.628124114573382e-06,
      "loss": 0.0321,
      "step": 436400
    },
    {
      "epoch": 0.7142109018545066,
      "grad_norm": 2.954782485961914,
      "learning_rate": 8.628058222359864e-06,
      "loss": 0.0371,
      "step": 436420
    },
    {
      "epoch": 0.71424363229316,
      "grad_norm": 4.457596778869629,
      "learning_rate": 8.627992330146347e-06,
      "loss": 0.032,
      "step": 436440
    },
    {
      "epoch": 0.7142763627318134,
      "grad_norm": 0.7788973450660706,
      "learning_rate": 8.627926437932829e-06,
      "loss": 0.025,
      "step": 436460
    },
    {
      "epoch": 0.7143090931704666,
      "grad_norm": 1.187686800956726,
      "learning_rate": 8.627860545719313e-06,
      "loss": 0.0334,
      "step": 436480
    },
    {
      "epoch": 0.71434182360912,
      "grad_norm": 0.49747300148010254,
      "learning_rate": 8.627794653505796e-06,
      "loss": 0.0376,
      "step": 436500
    },
    {
      "epoch": 0.7143745540477734,
      "grad_norm": 1.1199017763137817,
      "learning_rate": 8.627728761292278e-06,
      "loss": 0.0275,
      "step": 436520
    },
    {
      "epoch": 0.7144072844864267,
      "grad_norm": 1.7636252641677856,
      "learning_rate": 8.627662869078762e-06,
      "loss": 0.039,
      "step": 436540
    },
    {
      "epoch": 0.71444001492508,
      "grad_norm": 1.0000063180923462,
      "learning_rate": 8.627596976865245e-06,
      "loss": 0.0376,
      "step": 436560
    },
    {
      "epoch": 0.7144727453637334,
      "grad_norm": 1.7278228998184204,
      "learning_rate": 8.627531084651727e-06,
      "loss": 0.0399,
      "step": 436580
    },
    {
      "epoch": 0.7145054758023867,
      "grad_norm": 0.3570508360862732,
      "learning_rate": 8.62746519243821e-06,
      "loss": 0.0387,
      "step": 436600
    },
    {
      "epoch": 0.71453820624104,
      "grad_norm": 1.5232425928115845,
      "learning_rate": 8.627399300224694e-06,
      "loss": 0.0251,
      "step": 436620
    },
    {
      "epoch": 0.7145709366796934,
      "grad_norm": 0.7754669785499573,
      "learning_rate": 8.627333408011176e-06,
      "loss": 0.023,
      "step": 436640
    },
    {
      "epoch": 0.7146036671183468,
      "grad_norm": 0.6995719075202942,
      "learning_rate": 8.62726751579766e-06,
      "loss": 0.0329,
      "step": 436660
    },
    {
      "epoch": 0.714636397557,
      "grad_norm": 0.5278615951538086,
      "learning_rate": 8.627201623584142e-06,
      "loss": 0.0296,
      "step": 436680
    },
    {
      "epoch": 0.7146691279956534,
      "grad_norm": 1.5027755498886108,
      "learning_rate": 8.627135731370625e-06,
      "loss": 0.0243,
      "step": 436700
    },
    {
      "epoch": 0.7147018584343068,
      "grad_norm": 1.8402334451675415,
      "learning_rate": 8.627069839157107e-06,
      "loss": 0.0292,
      "step": 436720
    },
    {
      "epoch": 0.71473458887296,
      "grad_norm": 1.4866070747375488,
      "learning_rate": 8.62700394694359e-06,
      "loss": 0.0284,
      "step": 436740
    },
    {
      "epoch": 0.7147673193116134,
      "grad_norm": 0.6332353949546814,
      "learning_rate": 8.626938054730073e-06,
      "loss": 0.025,
      "step": 436760
    },
    {
      "epoch": 0.7148000497502668,
      "grad_norm": 0.469125896692276,
      "learning_rate": 8.626872162516556e-06,
      "loss": 0.0322,
      "step": 436780
    },
    {
      "epoch": 0.71483278018892,
      "grad_norm": 0.40873226523399353,
      "learning_rate": 8.626806270303038e-06,
      "loss": 0.0244,
      "step": 436800
    },
    {
      "epoch": 0.7148655106275734,
      "grad_norm": 2.1240041255950928,
      "learning_rate": 8.626740378089522e-06,
      "loss": 0.0304,
      "step": 436820
    },
    {
      "epoch": 0.7148982410662268,
      "grad_norm": 1.3494491577148438,
      "learning_rate": 8.626674485876004e-06,
      "loss": 0.0287,
      "step": 436840
    },
    {
      "epoch": 0.7149309715048802,
      "grad_norm": 1.3908342123031616,
      "learning_rate": 8.626608593662487e-06,
      "loss": 0.0339,
      "step": 436860
    },
    {
      "epoch": 0.7149637019435334,
      "grad_norm": 1.2545346021652222,
      "learning_rate": 8.62654270144897e-06,
      "loss": 0.025,
      "step": 436880
    },
    {
      "epoch": 0.7149964323821868,
      "grad_norm": 1.6010576486587524,
      "learning_rate": 8.626476809235453e-06,
      "loss": 0.0289,
      "step": 436900
    },
    {
      "epoch": 0.7150291628208402,
      "grad_norm": 1.1162422895431519,
      "learning_rate": 8.626410917021936e-06,
      "loss": 0.0259,
      "step": 436920
    },
    {
      "epoch": 0.7150618932594934,
      "grad_norm": 0.909858763217926,
      "learning_rate": 8.62634502480842e-06,
      "loss": 0.0286,
      "step": 436940
    },
    {
      "epoch": 0.7150946236981468,
      "grad_norm": 0.7585780024528503,
      "learning_rate": 8.626279132594902e-06,
      "loss": 0.0324,
      "step": 436960
    },
    {
      "epoch": 0.7151273541368002,
      "grad_norm": 0.7347827553749084,
      "learning_rate": 8.626213240381385e-06,
      "loss": 0.026,
      "step": 436980
    },
    {
      "epoch": 0.7151600845754534,
      "grad_norm": 0.4435669779777527,
      "learning_rate": 8.626147348167869e-06,
      "loss": 0.0301,
      "step": 437000
    },
    {
      "epoch": 0.7151928150141068,
      "grad_norm": 1.3537451028823853,
      "learning_rate": 8.62608145595435e-06,
      "loss": 0.0345,
      "step": 437020
    },
    {
      "epoch": 0.7152255454527602,
      "grad_norm": 1.5074526071548462,
      "learning_rate": 8.626015563740834e-06,
      "loss": 0.0391,
      "step": 437040
    },
    {
      "epoch": 0.7152582758914134,
      "grad_norm": 3.3891243934631348,
      "learning_rate": 8.625949671527316e-06,
      "loss": 0.0322,
      "step": 437060
    },
    {
      "epoch": 0.7152910063300668,
      "grad_norm": 1.3249926567077637,
      "learning_rate": 8.6258837793138e-06,
      "loss": 0.0282,
      "step": 437080
    },
    {
      "epoch": 0.7153237367687202,
      "grad_norm": 1.451098918914795,
      "learning_rate": 8.625817887100282e-06,
      "loss": 0.0393,
      "step": 437100
    },
    {
      "epoch": 0.7153564672073736,
      "grad_norm": 0.9774353504180908,
      "learning_rate": 8.625751994886765e-06,
      "loss": 0.039,
      "step": 437120
    },
    {
      "epoch": 0.7153891976460268,
      "grad_norm": 2.630984306335449,
      "learning_rate": 8.625686102673247e-06,
      "loss": 0.0287,
      "step": 437140
    },
    {
      "epoch": 0.7154219280846802,
      "grad_norm": 0.5126984715461731,
      "learning_rate": 8.62562021045973e-06,
      "loss": 0.0376,
      "step": 437160
    },
    {
      "epoch": 0.7154546585233336,
      "grad_norm": 0.6744905710220337,
      "learning_rate": 8.625554318246213e-06,
      "loss": 0.0295,
      "step": 437180
    },
    {
      "epoch": 0.7154873889619868,
      "grad_norm": 1.0102283954620361,
      "learning_rate": 8.625488426032696e-06,
      "loss": 0.0349,
      "step": 437200
    },
    {
      "epoch": 0.7155201194006402,
      "grad_norm": 2.1273374557495117,
      "learning_rate": 8.625422533819178e-06,
      "loss": 0.0303,
      "step": 437220
    },
    {
      "epoch": 0.7155528498392936,
      "grad_norm": 1.2957900762557983,
      "learning_rate": 8.625356641605662e-06,
      "loss": 0.0351,
      "step": 437240
    },
    {
      "epoch": 0.7155855802779468,
      "grad_norm": 0.7641782164573669,
      "learning_rate": 8.625290749392144e-06,
      "loss": 0.0261,
      "step": 437260
    },
    {
      "epoch": 0.7156183107166002,
      "grad_norm": 0.9651720523834229,
      "learning_rate": 8.625224857178627e-06,
      "loss": 0.0377,
      "step": 437280
    },
    {
      "epoch": 0.7156510411552536,
      "grad_norm": 1.197850227355957,
      "learning_rate": 8.625158964965111e-06,
      "loss": 0.0361,
      "step": 437300
    },
    {
      "epoch": 0.715683771593907,
      "grad_norm": 0.20632952451705933,
      "learning_rate": 8.625093072751593e-06,
      "loss": 0.035,
      "step": 437320
    },
    {
      "epoch": 0.7157165020325602,
      "grad_norm": 1.4995999336242676,
      "learning_rate": 8.625027180538076e-06,
      "loss": 0.0186,
      "step": 437340
    },
    {
      "epoch": 0.7157492324712136,
      "grad_norm": 0.8341747522354126,
      "learning_rate": 8.62496128832456e-06,
      "loss": 0.0219,
      "step": 437360
    },
    {
      "epoch": 0.715781962909867,
      "grad_norm": 0.3116114139556885,
      "learning_rate": 8.624895396111042e-06,
      "loss": 0.0419,
      "step": 437380
    },
    {
      "epoch": 0.7158146933485202,
      "grad_norm": 1.2585498094558716,
      "learning_rate": 8.624829503897525e-06,
      "loss": 0.05,
      "step": 437400
    },
    {
      "epoch": 0.7158474237871736,
      "grad_norm": 1.2994874715805054,
      "learning_rate": 8.624763611684009e-06,
      "loss": 0.021,
      "step": 437420
    },
    {
      "epoch": 0.715880154225827,
      "grad_norm": 1.4367786645889282,
      "learning_rate": 8.624697719470491e-06,
      "loss": 0.0336,
      "step": 437440
    },
    {
      "epoch": 0.7159128846644802,
      "grad_norm": 0.3463195264339447,
      "learning_rate": 8.624631827256974e-06,
      "loss": 0.0189,
      "step": 437460
    },
    {
      "epoch": 0.7159456151031336,
      "grad_norm": 3.5884974002838135,
      "learning_rate": 8.624565935043456e-06,
      "loss": 0.0348,
      "step": 437480
    },
    {
      "epoch": 0.715978345541787,
      "grad_norm": 1.028954267501831,
      "learning_rate": 8.62450004282994e-06,
      "loss": 0.03,
      "step": 437500
    },
    {
      "epoch": 0.7160110759804403,
      "grad_norm": 0.5638461709022522,
      "learning_rate": 8.624434150616422e-06,
      "loss": 0.0357,
      "step": 437520
    },
    {
      "epoch": 0.7160438064190936,
      "grad_norm": 2.0315849781036377,
      "learning_rate": 8.624368258402905e-06,
      "loss": 0.0338,
      "step": 437540
    },
    {
      "epoch": 0.716076536857747,
      "grad_norm": 1.077970027923584,
      "learning_rate": 8.624302366189387e-06,
      "loss": 0.028,
      "step": 437560
    },
    {
      "epoch": 0.7161092672964003,
      "grad_norm": 1.2345046997070312,
      "learning_rate": 8.624236473975871e-06,
      "loss": 0.0303,
      "step": 437580
    },
    {
      "epoch": 0.7161419977350536,
      "grad_norm": 0.7280631065368652,
      "learning_rate": 8.624170581762353e-06,
      "loss": 0.035,
      "step": 437600
    },
    {
      "epoch": 0.716174728173707,
      "grad_norm": 1.1777417659759521,
      "learning_rate": 8.624104689548836e-06,
      "loss": 0.022,
      "step": 437620
    },
    {
      "epoch": 0.7162074586123603,
      "grad_norm": 1.4616645574569702,
      "learning_rate": 8.62403879733532e-06,
      "loss": 0.0375,
      "step": 437640
    },
    {
      "epoch": 0.7162401890510136,
      "grad_norm": 1.0973374843597412,
      "learning_rate": 8.623972905121802e-06,
      "loss": 0.0288,
      "step": 437660
    },
    {
      "epoch": 0.716272919489667,
      "grad_norm": 1.8917526006698608,
      "learning_rate": 8.623907012908285e-06,
      "loss": 0.0285,
      "step": 437680
    },
    {
      "epoch": 0.7163056499283204,
      "grad_norm": 0.5275533199310303,
      "learning_rate": 8.623841120694767e-06,
      "loss": 0.0268,
      "step": 437700
    },
    {
      "epoch": 0.7163383803669737,
      "grad_norm": 0.23045021295547485,
      "learning_rate": 8.623775228481251e-06,
      "loss": 0.0273,
      "step": 437720
    },
    {
      "epoch": 0.716371110805627,
      "grad_norm": 3.179786205291748,
      "learning_rate": 8.623709336267735e-06,
      "loss": 0.038,
      "step": 437740
    },
    {
      "epoch": 0.7164038412442804,
      "grad_norm": 1.4510451555252075,
      "learning_rate": 8.623643444054216e-06,
      "loss": 0.0257,
      "step": 437760
    },
    {
      "epoch": 0.7164365716829337,
      "grad_norm": 1.2004122734069824,
      "learning_rate": 8.6235775518407e-06,
      "loss": 0.0331,
      "step": 437780
    },
    {
      "epoch": 0.716469302121587,
      "grad_norm": 0.3115353584289551,
      "learning_rate": 8.623511659627184e-06,
      "loss": 0.019,
      "step": 437800
    },
    {
      "epoch": 0.7165020325602404,
      "grad_norm": 0.8711920976638794,
      "learning_rate": 8.623445767413665e-06,
      "loss": 0.0319,
      "step": 437820
    },
    {
      "epoch": 0.7165347629988937,
      "grad_norm": 0.45972102880477905,
      "learning_rate": 8.623379875200149e-06,
      "loss": 0.0339,
      "step": 437840
    },
    {
      "epoch": 0.716567493437547,
      "grad_norm": 1.677699327468872,
      "learning_rate": 8.623313982986631e-06,
      "loss": 0.0228,
      "step": 437860
    },
    {
      "epoch": 0.7166002238762004,
      "grad_norm": 0.8948726654052734,
      "learning_rate": 8.623248090773115e-06,
      "loss": 0.0358,
      "step": 437880
    },
    {
      "epoch": 0.7166329543148537,
      "grad_norm": 1.0813356637954712,
      "learning_rate": 8.623182198559596e-06,
      "loss": 0.0316,
      "step": 437900
    },
    {
      "epoch": 0.7166656847535071,
      "grad_norm": 0.27403807640075684,
      "learning_rate": 8.62311630634608e-06,
      "loss": 0.0305,
      "step": 437920
    },
    {
      "epoch": 0.7166984151921604,
      "grad_norm": 1.2383792400360107,
      "learning_rate": 8.623050414132562e-06,
      "loss": 0.0353,
      "step": 437940
    },
    {
      "epoch": 0.7167311456308137,
      "grad_norm": 1.0255227088928223,
      "learning_rate": 8.622984521919045e-06,
      "loss": 0.033,
      "step": 437960
    },
    {
      "epoch": 0.7167638760694671,
      "grad_norm": 1.431395173072815,
      "learning_rate": 8.622918629705529e-06,
      "loss": 0.0268,
      "step": 437980
    },
    {
      "epoch": 0.7167966065081204,
      "grad_norm": 0.3503742218017578,
      "learning_rate": 8.622852737492011e-06,
      "loss": 0.0316,
      "step": 438000
    },
    {
      "epoch": 0.7168293369467738,
      "grad_norm": 1.058976650238037,
      "learning_rate": 8.622786845278495e-06,
      "loss": 0.0361,
      "step": 438020
    },
    {
      "epoch": 0.7168620673854271,
      "grad_norm": 0.5851770639419556,
      "learning_rate": 8.622720953064976e-06,
      "loss": 0.0284,
      "step": 438040
    },
    {
      "epoch": 0.7168947978240804,
      "grad_norm": 0.5802913904190063,
      "learning_rate": 8.62265506085146e-06,
      "loss": 0.0206,
      "step": 438060
    },
    {
      "epoch": 0.7169275282627338,
      "grad_norm": 0.7818290591239929,
      "learning_rate": 8.622589168637942e-06,
      "loss": 0.0228,
      "step": 438080
    },
    {
      "epoch": 0.7169602587013871,
      "grad_norm": 1.560447335243225,
      "learning_rate": 8.622523276424426e-06,
      "loss": 0.0424,
      "step": 438100
    },
    {
      "epoch": 0.7169929891400405,
      "grad_norm": 5.319457054138184,
      "learning_rate": 8.622457384210909e-06,
      "loss": 0.0319,
      "step": 438120
    },
    {
      "epoch": 0.7170257195786938,
      "grad_norm": 0.7233639359474182,
      "learning_rate": 8.622391491997391e-06,
      "loss": 0.0262,
      "step": 438140
    },
    {
      "epoch": 0.7170584500173471,
      "grad_norm": 1.1672204732894897,
      "learning_rate": 8.622325599783875e-06,
      "loss": 0.0326,
      "step": 438160
    },
    {
      "epoch": 0.7170911804560005,
      "grad_norm": 2.1567418575286865,
      "learning_rate": 8.622259707570358e-06,
      "loss": 0.0388,
      "step": 438180
    },
    {
      "epoch": 0.7171239108946538,
      "grad_norm": 0.9875556826591492,
      "learning_rate": 8.62219381535684e-06,
      "loss": 0.0432,
      "step": 438200
    },
    {
      "epoch": 0.7171566413333071,
      "grad_norm": 0.33320531249046326,
      "learning_rate": 8.622127923143324e-06,
      "loss": 0.0282,
      "step": 438220
    },
    {
      "epoch": 0.7171893717719605,
      "grad_norm": 0.8230617642402649,
      "learning_rate": 8.622062030929806e-06,
      "loss": 0.0309,
      "step": 438240
    },
    {
      "epoch": 0.7172221022106138,
      "grad_norm": 1.221281886100769,
      "learning_rate": 8.621996138716289e-06,
      "loss": 0.0252,
      "step": 438260
    },
    {
      "epoch": 0.7172548326492671,
      "grad_norm": 1.2345675230026245,
      "learning_rate": 8.621930246502771e-06,
      "loss": 0.0283,
      "step": 438280
    },
    {
      "epoch": 0.7172875630879205,
      "grad_norm": 3.2214882373809814,
      "learning_rate": 8.621864354289255e-06,
      "loss": 0.0327,
      "step": 438300
    },
    {
      "epoch": 0.7173202935265739,
      "grad_norm": 1.7710858583450317,
      "learning_rate": 8.621798462075737e-06,
      "loss": 0.0405,
      "step": 438320
    },
    {
      "epoch": 0.7173530239652272,
      "grad_norm": 0.1984412670135498,
      "learning_rate": 8.62173256986222e-06,
      "loss": 0.0344,
      "step": 438340
    },
    {
      "epoch": 0.7173857544038805,
      "grad_norm": 0.9198293089866638,
      "learning_rate": 8.621666677648704e-06,
      "loss": 0.0352,
      "step": 438360
    },
    {
      "epoch": 0.7174184848425339,
      "grad_norm": 3.188450813293457,
      "learning_rate": 8.621600785435186e-06,
      "loss": 0.0221,
      "step": 438380
    },
    {
      "epoch": 0.7174512152811872,
      "grad_norm": 1.4640357494354248,
      "learning_rate": 8.62153489322167e-06,
      "loss": 0.0355,
      "step": 438400
    },
    {
      "epoch": 0.7174839457198405,
      "grad_norm": 0.5430946350097656,
      "learning_rate": 8.621469001008151e-06,
      "loss": 0.0211,
      "step": 438420
    },
    {
      "epoch": 0.7175166761584939,
      "grad_norm": 0.668843150138855,
      "learning_rate": 8.621403108794635e-06,
      "loss": 0.0314,
      "step": 438440
    },
    {
      "epoch": 0.7175494065971472,
      "grad_norm": 1.1655769348144531,
      "learning_rate": 8.621337216581117e-06,
      "loss": 0.0271,
      "step": 438460
    },
    {
      "epoch": 0.7175821370358005,
      "grad_norm": 0.2906084358692169,
      "learning_rate": 8.6212713243676e-06,
      "loss": 0.02,
      "step": 438480
    },
    {
      "epoch": 0.7176148674744539,
      "grad_norm": 1.247653603553772,
      "learning_rate": 8.621205432154082e-06,
      "loss": 0.0358,
      "step": 438500
    },
    {
      "epoch": 0.7176475979131073,
      "grad_norm": 3.2467477321624756,
      "learning_rate": 8.621139539940566e-06,
      "loss": 0.0334,
      "step": 438520
    },
    {
      "epoch": 0.7176803283517605,
      "grad_norm": 3.766667366027832,
      "learning_rate": 8.62107364772705e-06,
      "loss": 0.0336,
      "step": 438540
    },
    {
      "epoch": 0.7177130587904139,
      "grad_norm": 0.9740610122680664,
      "learning_rate": 8.621007755513531e-06,
      "loss": 0.0435,
      "step": 438560
    },
    {
      "epoch": 0.7177457892290673,
      "grad_norm": 0.1421431452035904,
      "learning_rate": 8.620941863300015e-06,
      "loss": 0.0259,
      "step": 438580
    },
    {
      "epoch": 0.7177785196677205,
      "grad_norm": 0.6131330728530884,
      "learning_rate": 8.620875971086498e-06,
      "loss": 0.0262,
      "step": 438600
    },
    {
      "epoch": 0.7178112501063739,
      "grad_norm": 0.4643237888813019,
      "learning_rate": 8.62081007887298e-06,
      "loss": 0.0336,
      "step": 438620
    },
    {
      "epoch": 0.7178439805450273,
      "grad_norm": 1.9617445468902588,
      "learning_rate": 8.620744186659464e-06,
      "loss": 0.038,
      "step": 438640
    },
    {
      "epoch": 0.7178767109836806,
      "grad_norm": 2.579965114593506,
      "learning_rate": 8.620678294445946e-06,
      "loss": 0.0254,
      "step": 438660
    },
    {
      "epoch": 0.7179094414223339,
      "grad_norm": 0.9173466563224792,
      "learning_rate": 8.62061240223243e-06,
      "loss": 0.033,
      "step": 438680
    },
    {
      "epoch": 0.7179421718609873,
      "grad_norm": 0.349806547164917,
      "learning_rate": 8.620546510018913e-06,
      "loss": 0.033,
      "step": 438700
    },
    {
      "epoch": 0.7179749022996407,
      "grad_norm": 0.6660414934158325,
      "learning_rate": 8.620480617805395e-06,
      "loss": 0.0353,
      "step": 438720
    },
    {
      "epoch": 0.7180076327382939,
      "grad_norm": 4.023538589477539,
      "learning_rate": 8.620414725591878e-06,
      "loss": 0.034,
      "step": 438740
    },
    {
      "epoch": 0.7180403631769473,
      "grad_norm": 6.589757442474365,
      "learning_rate": 8.62034883337836e-06,
      "loss": 0.0374,
      "step": 438760
    },
    {
      "epoch": 0.7180730936156007,
      "grad_norm": 0.4554455578327179,
      "learning_rate": 8.620282941164844e-06,
      "loss": 0.0401,
      "step": 438780
    },
    {
      "epoch": 0.7181058240542539,
      "grad_norm": 1.8074462413787842,
      "learning_rate": 8.620217048951326e-06,
      "loss": 0.0348,
      "step": 438800
    },
    {
      "epoch": 0.7181385544929073,
      "grad_norm": 0.33920079469680786,
      "learning_rate": 8.62015115673781e-06,
      "loss": 0.037,
      "step": 438820
    },
    {
      "epoch": 0.7181712849315607,
      "grad_norm": 2.2263588905334473,
      "learning_rate": 8.620085264524291e-06,
      "loss": 0.0399,
      "step": 438840
    },
    {
      "epoch": 0.7182040153702139,
      "grad_norm": 1.0982550382614136,
      "learning_rate": 8.620019372310775e-06,
      "loss": 0.0217,
      "step": 438860
    },
    {
      "epoch": 0.7182367458088673,
      "grad_norm": 0.25789597630500793,
      "learning_rate": 8.619953480097257e-06,
      "loss": 0.0278,
      "step": 438880
    },
    {
      "epoch": 0.7182694762475207,
      "grad_norm": 3.4199435710906982,
      "learning_rate": 8.61988758788374e-06,
      "loss": 0.0356,
      "step": 438900
    },
    {
      "epoch": 0.7183022066861741,
      "grad_norm": 0.7331733107566833,
      "learning_rate": 8.619821695670224e-06,
      "loss": 0.028,
      "step": 438920
    },
    {
      "epoch": 0.7183349371248273,
      "grad_norm": 2.0432329177856445,
      "learning_rate": 8.619755803456706e-06,
      "loss": 0.0334,
      "step": 438940
    },
    {
      "epoch": 0.7183676675634807,
      "grad_norm": 1.5554530620574951,
      "learning_rate": 8.61968991124319e-06,
      "loss": 0.0276,
      "step": 438960
    },
    {
      "epoch": 0.7184003980021341,
      "grad_norm": 2.1095492839813232,
      "learning_rate": 8.619624019029673e-06,
      "loss": 0.0343,
      "step": 438980
    },
    {
      "epoch": 0.7184331284407873,
      "grad_norm": 1.491629958152771,
      "learning_rate": 8.619558126816155e-06,
      "loss": 0.033,
      "step": 439000
    },
    {
      "epoch": 0.7184658588794407,
      "grad_norm": 0.2903570830821991,
      "learning_rate": 8.619492234602638e-06,
      "loss": 0.0216,
      "step": 439020
    },
    {
      "epoch": 0.7184985893180941,
      "grad_norm": 2.100114345550537,
      "learning_rate": 8.619426342389122e-06,
      "loss": 0.0413,
      "step": 439040
    },
    {
      "epoch": 0.7185313197567473,
      "grad_norm": 0.38354822993278503,
      "learning_rate": 8.619360450175604e-06,
      "loss": 0.0246,
      "step": 439060
    },
    {
      "epoch": 0.7185640501954007,
      "grad_norm": 2.9765474796295166,
      "learning_rate": 8.619294557962087e-06,
      "loss": 0.0332,
      "step": 439080
    },
    {
      "epoch": 0.7185967806340541,
      "grad_norm": 0.7061443328857422,
      "learning_rate": 8.61922866574857e-06,
      "loss": 0.0311,
      "step": 439100
    },
    {
      "epoch": 0.7186295110727074,
      "grad_norm": 0.7409886717796326,
      "learning_rate": 8.619162773535053e-06,
      "loss": 0.0243,
      "step": 439120
    },
    {
      "epoch": 0.7186622415113607,
      "grad_norm": 0.6471496224403381,
      "learning_rate": 8.619096881321535e-06,
      "loss": 0.0337,
      "step": 439140
    },
    {
      "epoch": 0.7186949719500141,
      "grad_norm": 0.9953489899635315,
      "learning_rate": 8.619030989108018e-06,
      "loss": 0.0331,
      "step": 439160
    },
    {
      "epoch": 0.7187277023886675,
      "grad_norm": 1.0647541284561157,
      "learning_rate": 8.6189650968945e-06,
      "loss": 0.0351,
      "step": 439180
    },
    {
      "epoch": 0.7187604328273207,
      "grad_norm": 1.4536985158920288,
      "learning_rate": 8.618899204680984e-06,
      "loss": 0.0295,
      "step": 439200
    },
    {
      "epoch": 0.7187931632659741,
      "grad_norm": 0.8002926707267761,
      "learning_rate": 8.618833312467466e-06,
      "loss": 0.0364,
      "step": 439220
    },
    {
      "epoch": 0.7188258937046275,
      "grad_norm": 0.5094516277313232,
      "learning_rate": 8.61876742025395e-06,
      "loss": 0.0435,
      "step": 439240
    },
    {
      "epoch": 0.7188586241432807,
      "grad_norm": 1.035802960395813,
      "learning_rate": 8.618701528040431e-06,
      "loss": 0.0292,
      "step": 439260
    },
    {
      "epoch": 0.7188913545819341,
      "grad_norm": 3.9270401000976562,
      "learning_rate": 8.618635635826915e-06,
      "loss": 0.035,
      "step": 439280
    },
    {
      "epoch": 0.7189240850205875,
      "grad_norm": 1.6966359615325928,
      "learning_rate": 8.618569743613397e-06,
      "loss": 0.0337,
      "step": 439300
    },
    {
      "epoch": 0.7189568154592408,
      "grad_norm": 2.2605178356170654,
      "learning_rate": 8.61850385139988e-06,
      "loss": 0.0407,
      "step": 439320
    },
    {
      "epoch": 0.7189895458978941,
      "grad_norm": 0.08728177845478058,
      "learning_rate": 8.618437959186364e-06,
      "loss": 0.0207,
      "step": 439340
    },
    {
      "epoch": 0.7190222763365475,
      "grad_norm": 2.8023087978363037,
      "learning_rate": 8.618372066972846e-06,
      "loss": 0.0406,
      "step": 439360
    },
    {
      "epoch": 0.7190550067752008,
      "grad_norm": 0.8939530849456787,
      "learning_rate": 8.61830617475933e-06,
      "loss": 0.029,
      "step": 439380
    },
    {
      "epoch": 0.7190877372138541,
      "grad_norm": 2.2551777362823486,
      "learning_rate": 8.618240282545813e-06,
      "loss": 0.0315,
      "step": 439400
    },
    {
      "epoch": 0.7191204676525075,
      "grad_norm": 3.5662949085235596,
      "learning_rate": 8.618174390332295e-06,
      "loss": 0.0324,
      "step": 439420
    },
    {
      "epoch": 0.7191531980911609,
      "grad_norm": 2.191948175430298,
      "learning_rate": 8.618108498118778e-06,
      "loss": 0.0451,
      "step": 439440
    },
    {
      "epoch": 0.7191859285298141,
      "grad_norm": 1.299276351928711,
      "learning_rate": 8.618042605905262e-06,
      "loss": 0.0289,
      "step": 439460
    },
    {
      "epoch": 0.7192186589684675,
      "grad_norm": 0.7067614197731018,
      "learning_rate": 8.617976713691744e-06,
      "loss": 0.0303,
      "step": 439480
    },
    {
      "epoch": 0.7192513894071209,
      "grad_norm": 0.43689608573913574,
      "learning_rate": 8.617910821478227e-06,
      "loss": 0.0415,
      "step": 439500
    },
    {
      "epoch": 0.7192841198457742,
      "grad_norm": 1.0848464965820312,
      "learning_rate": 8.61784492926471e-06,
      "loss": 0.038,
      "step": 439520
    },
    {
      "epoch": 0.7193168502844275,
      "grad_norm": 0.5747251510620117,
      "learning_rate": 8.617779037051193e-06,
      "loss": 0.0387,
      "step": 439540
    },
    {
      "epoch": 0.7193495807230809,
      "grad_norm": 1.1446470022201538,
      "learning_rate": 8.617713144837675e-06,
      "loss": 0.042,
      "step": 439560
    },
    {
      "epoch": 0.7193823111617342,
      "grad_norm": 0.38306909799575806,
      "learning_rate": 8.617647252624158e-06,
      "loss": 0.0256,
      "step": 439580
    },
    {
      "epoch": 0.7194150416003875,
      "grad_norm": 0.38568776845932007,
      "learning_rate": 8.61758136041064e-06,
      "loss": 0.0339,
      "step": 439600
    },
    {
      "epoch": 0.7194477720390409,
      "grad_norm": 0.792439877986908,
      "learning_rate": 8.617515468197124e-06,
      "loss": 0.0416,
      "step": 439620
    },
    {
      "epoch": 0.7194805024776942,
      "grad_norm": 1.0260944366455078,
      "learning_rate": 8.617449575983606e-06,
      "loss": 0.0501,
      "step": 439640
    },
    {
      "epoch": 0.7195132329163475,
      "grad_norm": 1.4221503734588623,
      "learning_rate": 8.61738368377009e-06,
      "loss": 0.0307,
      "step": 439660
    },
    {
      "epoch": 0.7195459633550009,
      "grad_norm": 1.3415043354034424,
      "learning_rate": 8.617317791556571e-06,
      "loss": 0.0427,
      "step": 439680
    },
    {
      "epoch": 0.7195786937936542,
      "grad_norm": 0.4608333706855774,
      "learning_rate": 8.617251899343055e-06,
      "loss": 0.0467,
      "step": 439700
    },
    {
      "epoch": 0.7196114242323075,
      "grad_norm": 0.3344630002975464,
      "learning_rate": 8.617186007129538e-06,
      "loss": 0.0285,
      "step": 439720
    },
    {
      "epoch": 0.7196441546709609,
      "grad_norm": 0.7033624053001404,
      "learning_rate": 8.61712011491602e-06,
      "loss": 0.0358,
      "step": 439740
    },
    {
      "epoch": 0.7196768851096143,
      "grad_norm": 0.46666085720062256,
      "learning_rate": 8.617054222702504e-06,
      "loss": 0.037,
      "step": 439760
    },
    {
      "epoch": 0.7197096155482676,
      "grad_norm": 1.6953823566436768,
      "learning_rate": 8.616988330488988e-06,
      "loss": 0.0311,
      "step": 439780
    },
    {
      "epoch": 0.7197423459869209,
      "grad_norm": 1.3931398391723633,
      "learning_rate": 8.61692243827547e-06,
      "loss": 0.0378,
      "step": 439800
    },
    {
      "epoch": 0.7197750764255743,
      "grad_norm": 0.7722033858299255,
      "learning_rate": 8.616856546061953e-06,
      "loss": 0.0321,
      "step": 439820
    },
    {
      "epoch": 0.7198078068642276,
      "grad_norm": 1.55901038646698,
      "learning_rate": 8.616790653848437e-06,
      "loss": 0.0438,
      "step": 439840
    },
    {
      "epoch": 0.7198405373028809,
      "grad_norm": 1.7502140998840332,
      "learning_rate": 8.616724761634918e-06,
      "loss": 0.0207,
      "step": 439860
    },
    {
      "epoch": 0.7198732677415343,
      "grad_norm": 0.4555242955684662,
      "learning_rate": 8.616658869421402e-06,
      "loss": 0.0294,
      "step": 439880
    },
    {
      "epoch": 0.7199059981801876,
      "grad_norm": 0.6919412016868591,
      "learning_rate": 8.616592977207884e-06,
      "loss": 0.0313,
      "step": 439900
    },
    {
      "epoch": 0.7199387286188409,
      "grad_norm": 0.5154972672462463,
      "learning_rate": 8.616527084994368e-06,
      "loss": 0.036,
      "step": 439920
    },
    {
      "epoch": 0.7199714590574943,
      "grad_norm": 1.2391759157180786,
      "learning_rate": 8.61646119278085e-06,
      "loss": 0.0385,
      "step": 439940
    },
    {
      "epoch": 0.7200041894961476,
      "grad_norm": 1.9505841732025146,
      "learning_rate": 8.616395300567333e-06,
      "loss": 0.0304,
      "step": 439960
    },
    {
      "epoch": 0.720036919934801,
      "grad_norm": 1.4934375286102295,
      "learning_rate": 8.616329408353815e-06,
      "loss": 0.0364,
      "step": 439980
    },
    {
      "epoch": 0.7200696503734543,
      "grad_norm": 1.6953517198562622,
      "learning_rate": 8.616263516140299e-06,
      "loss": 0.0355,
      "step": 440000
    },
    {
      "epoch": 0.7201023808121076,
      "grad_norm": 2.3644425868988037,
      "learning_rate": 8.61619762392678e-06,
      "loss": 0.031,
      "step": 440020
    },
    {
      "epoch": 0.720135111250761,
      "grad_norm": 0.7671937346458435,
      "learning_rate": 8.616131731713264e-06,
      "loss": 0.0355,
      "step": 440040
    },
    {
      "epoch": 0.7201678416894143,
      "grad_norm": 2.1318087577819824,
      "learning_rate": 8.616065839499746e-06,
      "loss": 0.0249,
      "step": 440060
    },
    {
      "epoch": 0.7202005721280677,
      "grad_norm": 0.9194343090057373,
      "learning_rate": 8.61599994728623e-06,
      "loss": 0.0422,
      "step": 440080
    },
    {
      "epoch": 0.720233302566721,
      "grad_norm": 0.2444227635860443,
      "learning_rate": 8.615934055072713e-06,
      "loss": 0.0322,
      "step": 440100
    },
    {
      "epoch": 0.7202660330053743,
      "grad_norm": 4.603325843811035,
      "learning_rate": 8.615868162859195e-06,
      "loss": 0.0356,
      "step": 440120
    },
    {
      "epoch": 0.7202987634440277,
      "grad_norm": 2.5162951946258545,
      "learning_rate": 8.615802270645679e-06,
      "loss": 0.0463,
      "step": 440140
    },
    {
      "epoch": 0.720331493882681,
      "grad_norm": 2.0628511905670166,
      "learning_rate": 8.61573637843216e-06,
      "loss": 0.0279,
      "step": 440160
    },
    {
      "epoch": 0.7203642243213344,
      "grad_norm": 0.2721908688545227,
      "learning_rate": 8.615670486218644e-06,
      "loss": 0.0267,
      "step": 440180
    },
    {
      "epoch": 0.7203969547599877,
      "grad_norm": 0.31592634320259094,
      "learning_rate": 8.615604594005128e-06,
      "loss": 0.0356,
      "step": 440200
    },
    {
      "epoch": 0.720429685198641,
      "grad_norm": 0.5826379060745239,
      "learning_rate": 8.615538701791611e-06,
      "loss": 0.0377,
      "step": 440220
    },
    {
      "epoch": 0.7204624156372944,
      "grad_norm": 3.489657163619995,
      "learning_rate": 8.615472809578093e-06,
      "loss": 0.0285,
      "step": 440240
    },
    {
      "epoch": 0.7204951460759477,
      "grad_norm": 0.7123463749885559,
      "learning_rate": 8.615406917364577e-06,
      "loss": 0.0306,
      "step": 440260
    },
    {
      "epoch": 0.720527876514601,
      "grad_norm": 2.387470245361328,
      "learning_rate": 8.615341025151059e-06,
      "loss": 0.0368,
      "step": 440280
    },
    {
      "epoch": 0.7205606069532544,
      "grad_norm": 2.3904964923858643,
      "learning_rate": 8.615275132937542e-06,
      "loss": 0.035,
      "step": 440300
    },
    {
      "epoch": 0.7205933373919077,
      "grad_norm": 1.2210110425949097,
      "learning_rate": 8.615209240724024e-06,
      "loss": 0.0262,
      "step": 440320
    },
    {
      "epoch": 0.720626067830561,
      "grad_norm": 1.258362054824829,
      "learning_rate": 8.615143348510508e-06,
      "loss": 0.046,
      "step": 440340
    },
    {
      "epoch": 0.7206587982692144,
      "grad_norm": 0.6848919987678528,
      "learning_rate": 8.61507745629699e-06,
      "loss": 0.042,
      "step": 440360
    },
    {
      "epoch": 0.7206915287078678,
      "grad_norm": 1.1323444843292236,
      "learning_rate": 8.615011564083473e-06,
      "loss": 0.0344,
      "step": 440380
    },
    {
      "epoch": 0.720724259146521,
      "grad_norm": 0.7162026762962341,
      "learning_rate": 8.614945671869955e-06,
      "loss": 0.0324,
      "step": 440400
    },
    {
      "epoch": 0.7207569895851744,
      "grad_norm": 1.1450940370559692,
      "learning_rate": 8.614879779656439e-06,
      "loss": 0.0318,
      "step": 440420
    },
    {
      "epoch": 0.7207897200238278,
      "grad_norm": 3.5302529335021973,
      "learning_rate": 8.614813887442922e-06,
      "loss": 0.0379,
      "step": 440440
    },
    {
      "epoch": 0.7208224504624811,
      "grad_norm": 0.4976210594177246,
      "learning_rate": 8.614747995229404e-06,
      "loss": 0.0222,
      "step": 440460
    },
    {
      "epoch": 0.7208551809011344,
      "grad_norm": 0.5717897415161133,
      "learning_rate": 8.614682103015888e-06,
      "loss": 0.0266,
      "step": 440480
    },
    {
      "epoch": 0.7208879113397878,
      "grad_norm": 0.9290731549263,
      "learning_rate": 8.61461621080237e-06,
      "loss": 0.0296,
      "step": 440500
    },
    {
      "epoch": 0.7209206417784411,
      "grad_norm": 2.114351987838745,
      "learning_rate": 8.614550318588853e-06,
      "loss": 0.0431,
      "step": 440520
    },
    {
      "epoch": 0.7209533722170944,
      "grad_norm": 2.413731575012207,
      "learning_rate": 8.614484426375335e-06,
      "loss": 0.0255,
      "step": 440540
    },
    {
      "epoch": 0.7209861026557478,
      "grad_norm": 0.2984198331832886,
      "learning_rate": 8.614418534161819e-06,
      "loss": 0.0289,
      "step": 440560
    },
    {
      "epoch": 0.7210188330944012,
      "grad_norm": 1.7853337526321411,
      "learning_rate": 8.614352641948302e-06,
      "loss": 0.0332,
      "step": 440580
    },
    {
      "epoch": 0.7210515635330544,
      "grad_norm": 0.3961656987667084,
      "learning_rate": 8.614286749734784e-06,
      "loss": 0.0336,
      "step": 440600
    },
    {
      "epoch": 0.7210842939717078,
      "grad_norm": 0.4467708468437195,
      "learning_rate": 8.614220857521268e-06,
      "loss": 0.0278,
      "step": 440620
    },
    {
      "epoch": 0.7211170244103612,
      "grad_norm": 0.8816570043563843,
      "learning_rate": 8.614154965307751e-06,
      "loss": 0.0354,
      "step": 440640
    },
    {
      "epoch": 0.7211497548490144,
      "grad_norm": 2.2560532093048096,
      "learning_rate": 8.614089073094233e-06,
      "loss": 0.0291,
      "step": 440660
    },
    {
      "epoch": 0.7211824852876678,
      "grad_norm": 0.6087496876716614,
      "learning_rate": 8.614023180880717e-06,
      "loss": 0.0421,
      "step": 440680
    },
    {
      "epoch": 0.7212152157263212,
      "grad_norm": 0.3809512257575989,
      "learning_rate": 8.613957288667199e-06,
      "loss": 0.0312,
      "step": 440700
    },
    {
      "epoch": 0.7212479461649745,
      "grad_norm": 0.5165929794311523,
      "learning_rate": 8.613891396453682e-06,
      "loss": 0.0361,
      "step": 440720
    },
    {
      "epoch": 0.7212806766036278,
      "grad_norm": 0.6418516635894775,
      "learning_rate": 8.613825504240164e-06,
      "loss": 0.0358,
      "step": 440740
    },
    {
      "epoch": 0.7213134070422812,
      "grad_norm": 0.5599576234817505,
      "learning_rate": 8.613759612026648e-06,
      "loss": 0.0345,
      "step": 440760
    },
    {
      "epoch": 0.7213461374809346,
      "grad_norm": 1.0057328939437866,
      "learning_rate": 8.61369371981313e-06,
      "loss": 0.0267,
      "step": 440780
    },
    {
      "epoch": 0.7213788679195878,
      "grad_norm": 0.8102993965148926,
      "learning_rate": 8.613627827599613e-06,
      "loss": 0.0287,
      "step": 440800
    },
    {
      "epoch": 0.7214115983582412,
      "grad_norm": 7.20024299621582,
      "learning_rate": 8.613561935386097e-06,
      "loss": 0.0244,
      "step": 440820
    },
    {
      "epoch": 0.7214443287968946,
      "grad_norm": 2.511331081390381,
      "learning_rate": 8.613496043172579e-06,
      "loss": 0.0351,
      "step": 440840
    },
    {
      "epoch": 0.7214770592355478,
      "grad_norm": 2.0002899169921875,
      "learning_rate": 8.613430150959062e-06,
      "loss": 0.0285,
      "step": 440860
    },
    {
      "epoch": 0.7215097896742012,
      "grad_norm": 1.226121425628662,
      "learning_rate": 8.613364258745544e-06,
      "loss": 0.0349,
      "step": 440880
    },
    {
      "epoch": 0.7215425201128546,
      "grad_norm": 0.7237820625305176,
      "learning_rate": 8.613298366532028e-06,
      "loss": 0.0402,
      "step": 440900
    },
    {
      "epoch": 0.7215752505515078,
      "grad_norm": 3.334660291671753,
      "learning_rate": 8.61323247431851e-06,
      "loss": 0.027,
      "step": 440920
    },
    {
      "epoch": 0.7216079809901612,
      "grad_norm": 0.6274939179420471,
      "learning_rate": 8.613166582104993e-06,
      "loss": 0.034,
      "step": 440940
    },
    {
      "epoch": 0.7216407114288146,
      "grad_norm": 0.696544885635376,
      "learning_rate": 8.613100689891477e-06,
      "loss": 0.0296,
      "step": 440960
    },
    {
      "epoch": 0.721673441867468,
      "grad_norm": 1.5169893503189087,
      "learning_rate": 8.613034797677959e-06,
      "loss": 0.0361,
      "step": 440980
    },
    {
      "epoch": 0.7217061723061212,
      "grad_norm": 0.6888677477836609,
      "learning_rate": 8.612968905464442e-06,
      "loss": 0.0336,
      "step": 441000
    },
    {
      "epoch": 0.7217389027447746,
      "grad_norm": 0.7409448027610779,
      "learning_rate": 8.612903013250926e-06,
      "loss": 0.0326,
      "step": 441020
    },
    {
      "epoch": 0.721771633183428,
      "grad_norm": 0.9743834137916565,
      "learning_rate": 8.612837121037408e-06,
      "loss": 0.0448,
      "step": 441040
    },
    {
      "epoch": 0.7218043636220812,
      "grad_norm": 0.4587586224079132,
      "learning_rate": 8.612771228823891e-06,
      "loss": 0.0293,
      "step": 441060
    },
    {
      "epoch": 0.7218370940607346,
      "grad_norm": 0.8020811676979065,
      "learning_rate": 8.612705336610373e-06,
      "loss": 0.0272,
      "step": 441080
    },
    {
      "epoch": 0.721869824499388,
      "grad_norm": 1.9830478429794312,
      "learning_rate": 8.612639444396857e-06,
      "loss": 0.0364,
      "step": 441100
    },
    {
      "epoch": 0.7219025549380412,
      "grad_norm": 1.5499414205551147,
      "learning_rate": 8.612573552183339e-06,
      "loss": 0.042,
      "step": 441120
    },
    {
      "epoch": 0.7219352853766946,
      "grad_norm": 0.7902905941009521,
      "learning_rate": 8.612507659969822e-06,
      "loss": 0.0315,
      "step": 441140
    },
    {
      "epoch": 0.721968015815348,
      "grad_norm": 0.6356695890426636,
      "learning_rate": 8.612441767756306e-06,
      "loss": 0.0205,
      "step": 441160
    },
    {
      "epoch": 0.7220007462540013,
      "grad_norm": 1.232926845550537,
      "learning_rate": 8.612375875542788e-06,
      "loss": 0.0394,
      "step": 441180
    },
    {
      "epoch": 0.7220334766926546,
      "grad_norm": 0.8272291421890259,
      "learning_rate": 8.612309983329271e-06,
      "loss": 0.03,
      "step": 441200
    },
    {
      "epoch": 0.722066207131308,
      "grad_norm": 2.951841115951538,
      "learning_rate": 8.612244091115753e-06,
      "loss": 0.0308,
      "step": 441220
    },
    {
      "epoch": 0.7220989375699614,
      "grad_norm": 0.852587878704071,
      "learning_rate": 8.612178198902237e-06,
      "loss": 0.0361,
      "step": 441240
    },
    {
      "epoch": 0.7221316680086146,
      "grad_norm": 2.9268062114715576,
      "learning_rate": 8.612112306688719e-06,
      "loss": 0.0274,
      "step": 441260
    },
    {
      "epoch": 0.722164398447268,
      "grad_norm": 0.6949412226676941,
      "learning_rate": 8.612046414475202e-06,
      "loss": 0.033,
      "step": 441280
    },
    {
      "epoch": 0.7221971288859214,
      "grad_norm": 0.9624302983283997,
      "learning_rate": 8.611980522261684e-06,
      "loss": 0.0393,
      "step": 441300
    },
    {
      "epoch": 0.7222298593245746,
      "grad_norm": 1.3565009832382202,
      "learning_rate": 8.611914630048168e-06,
      "loss": 0.0331,
      "step": 441320
    },
    {
      "epoch": 0.722262589763228,
      "grad_norm": 2.0299880504608154,
      "learning_rate": 8.61184873783465e-06,
      "loss": 0.0435,
      "step": 441340
    },
    {
      "epoch": 0.7222953202018814,
      "grad_norm": 12.777297019958496,
      "learning_rate": 8.611782845621133e-06,
      "loss": 0.0412,
      "step": 441360
    },
    {
      "epoch": 0.7223280506405347,
      "grad_norm": 1.8602311611175537,
      "learning_rate": 8.611716953407617e-06,
      "loss": 0.0382,
      "step": 441380
    },
    {
      "epoch": 0.722360781079188,
      "grad_norm": 0.7863678932189941,
      "learning_rate": 8.611651061194099e-06,
      "loss": 0.0316,
      "step": 441400
    },
    {
      "epoch": 0.7223935115178414,
      "grad_norm": 0.8928143382072449,
      "learning_rate": 8.611585168980582e-06,
      "loss": 0.0287,
      "step": 441420
    },
    {
      "epoch": 0.7224262419564947,
      "grad_norm": 1.647998571395874,
      "learning_rate": 8.611519276767066e-06,
      "loss": 0.0365,
      "step": 441440
    },
    {
      "epoch": 0.722458972395148,
      "grad_norm": 0.90769362449646,
      "learning_rate": 8.611453384553548e-06,
      "loss": 0.0396,
      "step": 441460
    },
    {
      "epoch": 0.7224917028338014,
      "grad_norm": 2.176805257797241,
      "learning_rate": 8.611387492340031e-06,
      "loss": 0.0352,
      "step": 441480
    },
    {
      "epoch": 0.7225244332724547,
      "grad_norm": 1.0870662927627563,
      "learning_rate": 8.611321600126515e-06,
      "loss": 0.0249,
      "step": 441500
    },
    {
      "epoch": 0.722557163711108,
      "grad_norm": 2.355888843536377,
      "learning_rate": 8.611255707912997e-06,
      "loss": 0.0347,
      "step": 441520
    },
    {
      "epoch": 0.7225898941497614,
      "grad_norm": 0.5137903094291687,
      "learning_rate": 8.61118981569948e-06,
      "loss": 0.0426,
      "step": 441540
    },
    {
      "epoch": 0.7226226245884148,
      "grad_norm": 0.5537531971931458,
      "learning_rate": 8.611123923485962e-06,
      "loss": 0.0259,
      "step": 441560
    },
    {
      "epoch": 0.7226553550270681,
      "grad_norm": 0.860934853553772,
      "learning_rate": 8.611058031272446e-06,
      "loss": 0.0221,
      "step": 441580
    },
    {
      "epoch": 0.7226880854657214,
      "grad_norm": 1.1202877759933472,
      "learning_rate": 8.610992139058928e-06,
      "loss": 0.024,
      "step": 441600
    },
    {
      "epoch": 0.7227208159043748,
      "grad_norm": 0.24054425954818726,
      "learning_rate": 8.610926246845411e-06,
      "loss": 0.0346,
      "step": 441620
    },
    {
      "epoch": 0.7227535463430281,
      "grad_norm": 1.4332904815673828,
      "learning_rate": 8.610860354631893e-06,
      "loss": 0.0365,
      "step": 441640
    },
    {
      "epoch": 0.7227862767816814,
      "grad_norm": 0.4493313133716583,
      "learning_rate": 8.610794462418377e-06,
      "loss": 0.0321,
      "step": 441660
    },
    {
      "epoch": 0.7228190072203348,
      "grad_norm": 0.38245004415512085,
      "learning_rate": 8.610728570204859e-06,
      "loss": 0.0309,
      "step": 441680
    },
    {
      "epoch": 0.7228517376589881,
      "grad_norm": 1.9235846996307373,
      "learning_rate": 8.610662677991342e-06,
      "loss": 0.0262,
      "step": 441700
    },
    {
      "epoch": 0.7228844680976414,
      "grad_norm": 1.319704532623291,
      "learning_rate": 8.610596785777824e-06,
      "loss": 0.0323,
      "step": 441720
    },
    {
      "epoch": 0.7229171985362948,
      "grad_norm": 0.8287445306777954,
      "learning_rate": 8.610530893564308e-06,
      "loss": 0.0239,
      "step": 441740
    },
    {
      "epoch": 0.7229499289749481,
      "grad_norm": 1.4557158946990967,
      "learning_rate": 8.610465001350791e-06,
      "loss": 0.0415,
      "step": 441760
    },
    {
      "epoch": 0.7229826594136015,
      "grad_norm": 0.6279813647270203,
      "learning_rate": 8.610399109137273e-06,
      "loss": 0.0371,
      "step": 441780
    },
    {
      "epoch": 0.7230153898522548,
      "grad_norm": 1.2016671895980835,
      "learning_rate": 8.610333216923757e-06,
      "loss": 0.0326,
      "step": 441800
    },
    {
      "epoch": 0.7230481202909081,
      "grad_norm": 0.5497915744781494,
      "learning_rate": 8.61026732471024e-06,
      "loss": 0.0378,
      "step": 441820
    },
    {
      "epoch": 0.7230808507295615,
      "grad_norm": 0.8740939497947693,
      "learning_rate": 8.610201432496722e-06,
      "loss": 0.0389,
      "step": 441840
    },
    {
      "epoch": 0.7231135811682148,
      "grad_norm": 1.4517349004745483,
      "learning_rate": 8.610135540283206e-06,
      "loss": 0.0388,
      "step": 441860
    },
    {
      "epoch": 0.7231463116068682,
      "grad_norm": 0.8355341553688049,
      "learning_rate": 8.61006964806969e-06,
      "loss": 0.0297,
      "step": 441880
    },
    {
      "epoch": 0.7231790420455215,
      "grad_norm": 0.5564439296722412,
      "learning_rate": 8.610003755856171e-06,
      "loss": 0.0427,
      "step": 441900
    },
    {
      "epoch": 0.7232117724841748,
      "grad_norm": 0.5551774501800537,
      "learning_rate": 8.609937863642655e-06,
      "loss": 0.0423,
      "step": 441920
    },
    {
      "epoch": 0.7232445029228282,
      "grad_norm": 2.1964988708496094,
      "learning_rate": 8.609871971429137e-06,
      "loss": 0.0392,
      "step": 441940
    },
    {
      "epoch": 0.7232772333614815,
      "grad_norm": 0.9309965968132019,
      "learning_rate": 8.60980607921562e-06,
      "loss": 0.0217,
      "step": 441960
    },
    {
      "epoch": 0.7233099638001349,
      "grad_norm": 2.8696866035461426,
      "learning_rate": 8.609740187002102e-06,
      "loss": 0.0287,
      "step": 441980
    },
    {
      "epoch": 0.7233426942387882,
      "grad_norm": 0.7433499097824097,
      "learning_rate": 8.609674294788586e-06,
      "loss": 0.0296,
      "step": 442000
    },
    {
      "epoch": 0.7233754246774415,
      "grad_norm": 1.8254475593566895,
      "learning_rate": 8.609608402575068e-06,
      "loss": 0.0322,
      "step": 442020
    },
    {
      "epoch": 0.7234081551160949,
      "grad_norm": 1.0660456418991089,
      "learning_rate": 8.609542510361552e-06,
      "loss": 0.0334,
      "step": 442040
    },
    {
      "epoch": 0.7234408855547482,
      "grad_norm": 0.7282101511955261,
      "learning_rate": 8.609476618148033e-06,
      "loss": 0.0351,
      "step": 442060
    },
    {
      "epoch": 0.7234736159934015,
      "grad_norm": 0.9145803451538086,
      "learning_rate": 8.609410725934517e-06,
      "loss": 0.0316,
      "step": 442080
    },
    {
      "epoch": 0.7235063464320549,
      "grad_norm": 0.4793764054775238,
      "learning_rate": 8.609344833720999e-06,
      "loss": 0.0322,
      "step": 442100
    },
    {
      "epoch": 0.7235390768707082,
      "grad_norm": 1.0696804523468018,
      "learning_rate": 8.609278941507482e-06,
      "loss": 0.0343,
      "step": 442120
    },
    {
      "epoch": 0.7235718073093615,
      "grad_norm": 0.4292663037776947,
      "learning_rate": 8.609213049293964e-06,
      "loss": 0.03,
      "step": 442140
    },
    {
      "epoch": 0.7236045377480149,
      "grad_norm": 1.8557498455047607,
      "learning_rate": 8.609147157080448e-06,
      "loss": 0.03,
      "step": 442160
    },
    {
      "epoch": 0.7236372681866683,
      "grad_norm": 1.133194088935852,
      "learning_rate": 8.609081264866932e-06,
      "loss": 0.025,
      "step": 442180
    },
    {
      "epoch": 0.7236699986253216,
      "grad_norm": 1.5277246236801147,
      "learning_rate": 8.609015372653413e-06,
      "loss": 0.0412,
      "step": 442200
    },
    {
      "epoch": 0.7237027290639749,
      "grad_norm": 2.38851261138916,
      "learning_rate": 8.608949480439897e-06,
      "loss": 0.0315,
      "step": 442220
    },
    {
      "epoch": 0.7237354595026283,
      "grad_norm": 1.7900166511535645,
      "learning_rate": 8.60888358822638e-06,
      "loss": 0.037,
      "step": 442240
    },
    {
      "epoch": 0.7237681899412816,
      "grad_norm": 0.22374221682548523,
      "learning_rate": 8.608817696012862e-06,
      "loss": 0.0399,
      "step": 442260
    },
    {
      "epoch": 0.7238009203799349,
      "grad_norm": 0.9528096318244934,
      "learning_rate": 8.608751803799346e-06,
      "loss": 0.0391,
      "step": 442280
    },
    {
      "epoch": 0.7238336508185883,
      "grad_norm": 0.8989560008049011,
      "learning_rate": 8.60868591158583e-06,
      "loss": 0.0355,
      "step": 442300
    },
    {
      "epoch": 0.7238663812572416,
      "grad_norm": 1.5566601753234863,
      "learning_rate": 8.608620019372312e-06,
      "loss": 0.037,
      "step": 442320
    },
    {
      "epoch": 0.7238991116958949,
      "grad_norm": 0.29249128699302673,
      "learning_rate": 8.608554127158795e-06,
      "loss": 0.0192,
      "step": 442340
    },
    {
      "epoch": 0.7239318421345483,
      "grad_norm": 0.4588729739189148,
      "learning_rate": 8.608488234945277e-06,
      "loss": 0.032,
      "step": 442360
    },
    {
      "epoch": 0.7239645725732017,
      "grad_norm": 1.5328792333602905,
      "learning_rate": 8.60842234273176e-06,
      "loss": 0.0336,
      "step": 442380
    },
    {
      "epoch": 0.7239973030118549,
      "grad_norm": 1.2329432964324951,
      "learning_rate": 8.608356450518243e-06,
      "loss": 0.0322,
      "step": 442400
    },
    {
      "epoch": 0.7240300334505083,
      "grad_norm": 0.5927839279174805,
      "learning_rate": 8.608290558304726e-06,
      "loss": 0.0359,
      "step": 442420
    },
    {
      "epoch": 0.7240627638891617,
      "grad_norm": 0.3415175676345825,
      "learning_rate": 8.608224666091208e-06,
      "loss": 0.0285,
      "step": 442440
    },
    {
      "epoch": 0.724095494327815,
      "grad_norm": 1.1150366067886353,
      "learning_rate": 8.608158773877692e-06,
      "loss": 0.0264,
      "step": 442460
    },
    {
      "epoch": 0.7241282247664683,
      "grad_norm": 0.9488780498504639,
      "learning_rate": 8.608092881664173e-06,
      "loss": 0.0316,
      "step": 442480
    },
    {
      "epoch": 0.7241609552051217,
      "grad_norm": 1.927758812904358,
      "learning_rate": 8.608026989450657e-06,
      "loss": 0.0314,
      "step": 442500
    },
    {
      "epoch": 0.724193685643775,
      "grad_norm": 1.512515664100647,
      "learning_rate": 8.607961097237139e-06,
      "loss": 0.0446,
      "step": 442520
    },
    {
      "epoch": 0.7242264160824283,
      "grad_norm": 0.5864458084106445,
      "learning_rate": 8.607895205023623e-06,
      "loss": 0.0331,
      "step": 442540
    },
    {
      "epoch": 0.7242591465210817,
      "grad_norm": 0.38042545318603516,
      "learning_rate": 8.607829312810106e-06,
      "loss": 0.0249,
      "step": 442560
    },
    {
      "epoch": 0.724291876959735,
      "grad_norm": 3.873380184173584,
      "learning_rate": 8.607763420596588e-06,
      "loss": 0.0306,
      "step": 442580
    },
    {
      "epoch": 0.7243246073983883,
      "grad_norm": 2.8060131072998047,
      "learning_rate": 8.607697528383072e-06,
      "loss": 0.0303,
      "step": 442600
    },
    {
      "epoch": 0.7243573378370417,
      "grad_norm": 0.9108500480651855,
      "learning_rate": 8.607631636169555e-06,
      "loss": 0.0311,
      "step": 442620
    },
    {
      "epoch": 0.7243900682756951,
      "grad_norm": 0.5704349875450134,
      "learning_rate": 8.607565743956037e-06,
      "loss": 0.035,
      "step": 442640
    },
    {
      "epoch": 0.7244227987143483,
      "grad_norm": 2.7879536151885986,
      "learning_rate": 8.60749985174252e-06,
      "loss": 0.0342,
      "step": 442660
    },
    {
      "epoch": 0.7244555291530017,
      "grad_norm": 0.3258804380893707,
      "learning_rate": 8.607433959529004e-06,
      "loss": 0.032,
      "step": 442680
    },
    {
      "epoch": 0.7244882595916551,
      "grad_norm": 2.3881924152374268,
      "learning_rate": 8.607368067315486e-06,
      "loss": 0.0336,
      "step": 442700
    },
    {
      "epoch": 0.7245209900303083,
      "grad_norm": 1.0034905672073364,
      "learning_rate": 8.60730217510197e-06,
      "loss": 0.0352,
      "step": 442720
    },
    {
      "epoch": 0.7245537204689617,
      "grad_norm": 0.4872453510761261,
      "learning_rate": 8.607236282888452e-06,
      "loss": 0.0279,
      "step": 442740
    },
    {
      "epoch": 0.7245864509076151,
      "grad_norm": 0.8633398413658142,
      "learning_rate": 8.607170390674935e-06,
      "loss": 0.0347,
      "step": 442760
    },
    {
      "epoch": 0.7246191813462683,
      "grad_norm": 2.1782028675079346,
      "learning_rate": 8.607104498461417e-06,
      "loss": 0.0303,
      "step": 442780
    },
    {
      "epoch": 0.7246519117849217,
      "grad_norm": 0.545712411403656,
      "learning_rate": 8.6070386062479e-06,
      "loss": 0.025,
      "step": 442800
    },
    {
      "epoch": 0.7246846422235751,
      "grad_norm": 1.2045046091079712,
      "learning_rate": 8.606972714034383e-06,
      "loss": 0.0335,
      "step": 442820
    },
    {
      "epoch": 0.7247173726622285,
      "grad_norm": 0.9153429865837097,
      "learning_rate": 8.606906821820866e-06,
      "loss": 0.0296,
      "step": 442840
    },
    {
      "epoch": 0.7247501031008817,
      "grad_norm": 1.0669575929641724,
      "learning_rate": 8.606840929607348e-06,
      "loss": 0.0355,
      "step": 442860
    },
    {
      "epoch": 0.7247828335395351,
      "grad_norm": 3.8135013580322266,
      "learning_rate": 8.606775037393832e-06,
      "loss": 0.0286,
      "step": 442880
    },
    {
      "epoch": 0.7248155639781885,
      "grad_norm": 1.3893471956253052,
      "learning_rate": 8.606709145180314e-06,
      "loss": 0.0396,
      "step": 442900
    },
    {
      "epoch": 0.7248482944168417,
      "grad_norm": 0.590125560760498,
      "learning_rate": 8.606643252966797e-06,
      "loss": 0.0227,
      "step": 442920
    },
    {
      "epoch": 0.7248810248554951,
      "grad_norm": 0.5149956941604614,
      "learning_rate": 8.60657736075328e-06,
      "loss": 0.0293,
      "step": 442940
    },
    {
      "epoch": 0.7249137552941485,
      "grad_norm": 2.907557725906372,
      "learning_rate": 8.606511468539763e-06,
      "loss": 0.0381,
      "step": 442960
    },
    {
      "epoch": 0.7249464857328017,
      "grad_norm": 1.8154826164245605,
      "learning_rate": 8.606445576326246e-06,
      "loss": 0.0272,
      "step": 442980
    },
    {
      "epoch": 0.7249792161714551,
      "grad_norm": 0.6207911968231201,
      "learning_rate": 8.60637968411273e-06,
      "loss": 0.0455,
      "step": 443000
    },
    {
      "epoch": 0.7250119466101085,
      "grad_norm": 0.3343656659126282,
      "learning_rate": 8.606313791899212e-06,
      "loss": 0.0286,
      "step": 443020
    },
    {
      "epoch": 0.7250446770487619,
      "grad_norm": 0.9074687361717224,
      "learning_rate": 8.606247899685695e-06,
      "loss": 0.0301,
      "step": 443040
    },
    {
      "epoch": 0.7250774074874151,
      "grad_norm": 0.9335570335388184,
      "learning_rate": 8.606182007472179e-06,
      "loss": 0.0308,
      "step": 443060
    },
    {
      "epoch": 0.7251101379260685,
      "grad_norm": 1.6786199808120728,
      "learning_rate": 8.60611611525866e-06,
      "loss": 0.0375,
      "step": 443080
    },
    {
      "epoch": 0.7251428683647219,
      "grad_norm": 0.8241561651229858,
      "learning_rate": 8.606050223045144e-06,
      "loss": 0.031,
      "step": 443100
    },
    {
      "epoch": 0.7251755988033751,
      "grad_norm": 1.157094955444336,
      "learning_rate": 8.605984330831626e-06,
      "loss": 0.0273,
      "step": 443120
    },
    {
      "epoch": 0.7252083292420285,
      "grad_norm": 1.0212920904159546,
      "learning_rate": 8.60591843861811e-06,
      "loss": 0.0295,
      "step": 443140
    },
    {
      "epoch": 0.7252410596806819,
      "grad_norm": 1.6887823343276978,
      "learning_rate": 8.605852546404592e-06,
      "loss": 0.0252,
      "step": 443160
    },
    {
      "epoch": 0.7252737901193351,
      "grad_norm": 0.6741001605987549,
      "learning_rate": 8.605786654191075e-06,
      "loss": 0.0287,
      "step": 443180
    },
    {
      "epoch": 0.7253065205579885,
      "grad_norm": 0.30236196517944336,
      "learning_rate": 8.605720761977557e-06,
      "loss": 0.0387,
      "step": 443200
    },
    {
      "epoch": 0.7253392509966419,
      "grad_norm": 0.34538328647613525,
      "learning_rate": 8.60565486976404e-06,
      "loss": 0.0336,
      "step": 443220
    },
    {
      "epoch": 0.7253719814352952,
      "grad_norm": 3.158628463745117,
      "learning_rate": 8.605588977550523e-06,
      "loss": 0.0374,
      "step": 443240
    },
    {
      "epoch": 0.7254047118739485,
      "grad_norm": 2.030780792236328,
      "learning_rate": 8.605523085337006e-06,
      "loss": 0.027,
      "step": 443260
    },
    {
      "epoch": 0.7254374423126019,
      "grad_norm": 1.586549162864685,
      "learning_rate": 8.60545719312349e-06,
      "loss": 0.0405,
      "step": 443280
    },
    {
      "epoch": 0.7254701727512552,
      "grad_norm": 0.9430453181266785,
      "learning_rate": 8.605391300909972e-06,
      "loss": 0.0326,
      "step": 443300
    },
    {
      "epoch": 0.7255029031899085,
      "grad_norm": 1.8230845928192139,
      "learning_rate": 8.605325408696455e-06,
      "loss": 0.0296,
      "step": 443320
    },
    {
      "epoch": 0.7255356336285619,
      "grad_norm": 1.0315624475479126,
      "learning_rate": 8.605259516482937e-06,
      "loss": 0.0289,
      "step": 443340
    },
    {
      "epoch": 0.7255683640672153,
      "grad_norm": 0.5489148497581482,
      "learning_rate": 8.60519362426942e-06,
      "loss": 0.0486,
      "step": 443360
    },
    {
      "epoch": 0.7256010945058685,
      "grad_norm": 1.4939987659454346,
      "learning_rate": 8.605127732055903e-06,
      "loss": 0.0394,
      "step": 443380
    },
    {
      "epoch": 0.7256338249445219,
      "grad_norm": 0.7628777027130127,
      "learning_rate": 8.605061839842386e-06,
      "loss": 0.0353,
      "step": 443400
    },
    {
      "epoch": 0.7256665553831753,
      "grad_norm": 1.4969537258148193,
      "learning_rate": 8.60499594762887e-06,
      "loss": 0.0422,
      "step": 443420
    },
    {
      "epoch": 0.7256992858218286,
      "grad_norm": 0.944807231426239,
      "learning_rate": 8.604930055415352e-06,
      "loss": 0.0396,
      "step": 443440
    },
    {
      "epoch": 0.7257320162604819,
      "grad_norm": 0.3610246479511261,
      "learning_rate": 8.604864163201835e-06,
      "loss": 0.0232,
      "step": 443460
    },
    {
      "epoch": 0.7257647466991353,
      "grad_norm": 0.5588806867599487,
      "learning_rate": 8.604798270988319e-06,
      "loss": 0.0395,
      "step": 443480
    },
    {
      "epoch": 0.7257974771377886,
      "grad_norm": 3.044414520263672,
      "learning_rate": 8.6047323787748e-06,
      "loss": 0.0328,
      "step": 443500
    },
    {
      "epoch": 0.7258302075764419,
      "grad_norm": 0.3192855715751648,
      "learning_rate": 8.604666486561284e-06,
      "loss": 0.0312,
      "step": 443520
    },
    {
      "epoch": 0.7258629380150953,
      "grad_norm": 0.7495393753051758,
      "learning_rate": 8.604600594347766e-06,
      "loss": 0.0351,
      "step": 443540
    },
    {
      "epoch": 0.7258956684537486,
      "grad_norm": 0.3153938353061676,
      "learning_rate": 8.60453470213425e-06,
      "loss": 0.0426,
      "step": 443560
    },
    {
      "epoch": 0.7259283988924019,
      "grad_norm": 0.9506138563156128,
      "learning_rate": 8.604468809920732e-06,
      "loss": 0.0371,
      "step": 443580
    },
    {
      "epoch": 0.7259611293310553,
      "grad_norm": 0.5146913528442383,
      "learning_rate": 8.604402917707215e-06,
      "loss": 0.0306,
      "step": 443600
    },
    {
      "epoch": 0.7259938597697086,
      "grad_norm": 0.3010185658931732,
      "learning_rate": 8.604337025493699e-06,
      "loss": 0.0303,
      "step": 443620
    },
    {
      "epoch": 0.726026590208362,
      "grad_norm": 1.9099496603012085,
      "learning_rate": 8.604271133280181e-06,
      "loss": 0.0357,
      "step": 443640
    },
    {
      "epoch": 0.7260593206470153,
      "grad_norm": 1.6173895597457886,
      "learning_rate": 8.604205241066664e-06,
      "loss": 0.0314,
      "step": 443660
    },
    {
      "epoch": 0.7260920510856687,
      "grad_norm": 1.7667781114578247,
      "learning_rate": 8.604139348853146e-06,
      "loss": 0.0475,
      "step": 443680
    },
    {
      "epoch": 0.726124781524322,
      "grad_norm": 0.6256402730941772,
      "learning_rate": 8.60407345663963e-06,
      "loss": 0.0345,
      "step": 443700
    },
    {
      "epoch": 0.7261575119629753,
      "grad_norm": 0.44024232029914856,
      "learning_rate": 8.604007564426112e-06,
      "loss": 0.0297,
      "step": 443720
    },
    {
      "epoch": 0.7261902424016287,
      "grad_norm": 0.6748351454734802,
      "learning_rate": 8.603941672212595e-06,
      "loss": 0.0261,
      "step": 443740
    },
    {
      "epoch": 0.726222972840282,
      "grad_norm": 0.4838513731956482,
      "learning_rate": 8.603875779999077e-06,
      "loss": 0.0372,
      "step": 443760
    },
    {
      "epoch": 0.7262557032789353,
      "grad_norm": 0.5819823145866394,
      "learning_rate": 8.603809887785561e-06,
      "loss": 0.0276,
      "step": 443780
    },
    {
      "epoch": 0.7262884337175887,
      "grad_norm": 1.0456089973449707,
      "learning_rate": 8.603743995572044e-06,
      "loss": 0.022,
      "step": 443800
    },
    {
      "epoch": 0.726321164156242,
      "grad_norm": 0.9041092395782471,
      "learning_rate": 8.603678103358526e-06,
      "loss": 0.032,
      "step": 443820
    },
    {
      "epoch": 0.7263538945948954,
      "grad_norm": 1.5707706212997437,
      "learning_rate": 8.60361221114501e-06,
      "loss": 0.0345,
      "step": 443840
    },
    {
      "epoch": 0.7263866250335487,
      "grad_norm": 0.8108687400817871,
      "learning_rate": 8.603546318931494e-06,
      "loss": 0.0389,
      "step": 443860
    },
    {
      "epoch": 0.726419355472202,
      "grad_norm": 1.3234314918518066,
      "learning_rate": 8.603480426717975e-06,
      "loss": 0.0209,
      "step": 443880
    },
    {
      "epoch": 0.7264520859108554,
      "grad_norm": 0.5303032398223877,
      "learning_rate": 8.603414534504459e-06,
      "loss": 0.0375,
      "step": 443900
    },
    {
      "epoch": 0.7264848163495087,
      "grad_norm": 0.5710591673851013,
      "learning_rate": 8.603348642290941e-06,
      "loss": 0.0346,
      "step": 443920
    },
    {
      "epoch": 0.726517546788162,
      "grad_norm": 0.49476170539855957,
      "learning_rate": 8.603282750077424e-06,
      "loss": 0.0309,
      "step": 443940
    },
    {
      "epoch": 0.7265502772268154,
      "grad_norm": 0.7463812828063965,
      "learning_rate": 8.603216857863908e-06,
      "loss": 0.0369,
      "step": 443960
    },
    {
      "epoch": 0.7265830076654687,
      "grad_norm": 0.8907697796821594,
      "learning_rate": 8.60315096565039e-06,
      "loss": 0.0386,
      "step": 443980
    },
    {
      "epoch": 0.726615738104122,
      "grad_norm": 0.6222164630889893,
      "learning_rate": 8.603085073436874e-06,
      "loss": 0.0294,
      "step": 444000
    },
    {
      "epoch": 0.7266484685427754,
      "grad_norm": 0.9997785687446594,
      "learning_rate": 8.603019181223355e-06,
      "loss": 0.0412,
      "step": 444020
    },
    {
      "epoch": 0.7266811989814288,
      "grad_norm": 1.438734531402588,
      "learning_rate": 8.602953289009839e-06,
      "loss": 0.0334,
      "step": 444040
    },
    {
      "epoch": 0.7267139294200821,
      "grad_norm": 1.2853277921676636,
      "learning_rate": 8.602887396796321e-06,
      "loss": 0.0302,
      "step": 444060
    },
    {
      "epoch": 0.7267466598587354,
      "grad_norm": 0.3041970729827881,
      "learning_rate": 8.602821504582805e-06,
      "loss": 0.0242,
      "step": 444080
    },
    {
      "epoch": 0.7267793902973888,
      "grad_norm": 4.6815972328186035,
      "learning_rate": 8.602755612369286e-06,
      "loss": 0.0366,
      "step": 444100
    },
    {
      "epoch": 0.7268121207360421,
      "grad_norm": 3.9497904777526855,
      "learning_rate": 8.60268972015577e-06,
      "loss": 0.0274,
      "step": 444120
    },
    {
      "epoch": 0.7268448511746954,
      "grad_norm": 0.5770672559738159,
      "learning_rate": 8.602623827942252e-06,
      "loss": 0.036,
      "step": 444140
    },
    {
      "epoch": 0.7268775816133488,
      "grad_norm": 1.6886173486709595,
      "learning_rate": 8.602557935728735e-06,
      "loss": 0.0375,
      "step": 444160
    },
    {
      "epoch": 0.7269103120520021,
      "grad_norm": 1.1323472261428833,
      "learning_rate": 8.602492043515217e-06,
      "loss": 0.036,
      "step": 444180
    },
    {
      "epoch": 0.7269430424906554,
      "grad_norm": 2.0374433994293213,
      "learning_rate": 8.602426151301701e-06,
      "loss": 0.0317,
      "step": 444200
    },
    {
      "epoch": 0.7269757729293088,
      "grad_norm": 0.48757466673851013,
      "learning_rate": 8.602360259088185e-06,
      "loss": 0.0411,
      "step": 444220
    },
    {
      "epoch": 0.7270085033679622,
      "grad_norm": 1.4035412073135376,
      "learning_rate": 8.602294366874666e-06,
      "loss": 0.0503,
      "step": 444240
    },
    {
      "epoch": 0.7270412338066155,
      "grad_norm": 0.7067487835884094,
      "learning_rate": 8.60222847466115e-06,
      "loss": 0.0512,
      "step": 444260
    },
    {
      "epoch": 0.7270739642452688,
      "grad_norm": 1.0751588344573975,
      "learning_rate": 8.602162582447634e-06,
      "loss": 0.0322,
      "step": 444280
    },
    {
      "epoch": 0.7271066946839222,
      "grad_norm": 1.6025090217590332,
      "learning_rate": 8.602096690234116e-06,
      "loss": 0.0361,
      "step": 444300
    },
    {
      "epoch": 0.7271394251225755,
      "grad_norm": 1.2981059551239014,
      "learning_rate": 8.602030798020599e-06,
      "loss": 0.0323,
      "step": 444320
    },
    {
      "epoch": 0.7271721555612288,
      "grad_norm": 1.1444681882858276,
      "learning_rate": 8.601964905807083e-06,
      "loss": 0.0323,
      "step": 444340
    },
    {
      "epoch": 0.7272048859998822,
      "grad_norm": 1.095592737197876,
      "learning_rate": 8.601899013593565e-06,
      "loss": 0.0265,
      "step": 444360
    },
    {
      "epoch": 0.7272376164385355,
      "grad_norm": 0.7045431733131409,
      "learning_rate": 8.601833121380048e-06,
      "loss": 0.0237,
      "step": 444380
    },
    {
      "epoch": 0.7272703468771888,
      "grad_norm": 0.8350437879562378,
      "learning_rate": 8.60176722916653e-06,
      "loss": 0.0318,
      "step": 444400
    },
    {
      "epoch": 0.7273030773158422,
      "grad_norm": 0.8914170265197754,
      "learning_rate": 8.601701336953014e-06,
      "loss": 0.0266,
      "step": 444420
    },
    {
      "epoch": 0.7273358077544956,
      "grad_norm": 0.08575494587421417,
      "learning_rate": 8.601635444739496e-06,
      "loss": 0.0321,
      "step": 444440
    },
    {
      "epoch": 0.7273685381931488,
      "grad_norm": 0.6215618252754211,
      "learning_rate": 8.601569552525979e-06,
      "loss": 0.0328,
      "step": 444460
    },
    {
      "epoch": 0.7274012686318022,
      "grad_norm": 1.4106377363204956,
      "learning_rate": 8.601503660312461e-06,
      "loss": 0.0331,
      "step": 444480
    },
    {
      "epoch": 0.7274339990704556,
      "grad_norm": 0.07401853799819946,
      "learning_rate": 8.601437768098945e-06,
      "loss": 0.0316,
      "step": 444500
    },
    {
      "epoch": 0.7274667295091088,
      "grad_norm": 0.4258224368095398,
      "learning_rate": 8.601371875885426e-06,
      "loss": 0.024,
      "step": 444520
    },
    {
      "epoch": 0.7274994599477622,
      "grad_norm": 0.9337429404258728,
      "learning_rate": 8.60130598367191e-06,
      "loss": 0.0287,
      "step": 444540
    },
    {
      "epoch": 0.7275321903864156,
      "grad_norm": 1.2856414318084717,
      "learning_rate": 8.601240091458392e-06,
      "loss": 0.0397,
      "step": 444560
    },
    {
      "epoch": 0.7275649208250689,
      "grad_norm": 1.307167887687683,
      "learning_rate": 8.601174199244876e-06,
      "loss": 0.0398,
      "step": 444580
    },
    {
      "epoch": 0.7275976512637222,
      "grad_norm": 1.3809053897857666,
      "learning_rate": 8.601108307031359e-06,
      "loss": 0.0304,
      "step": 444600
    },
    {
      "epoch": 0.7276303817023756,
      "grad_norm": 0.9760243892669678,
      "learning_rate": 8.601042414817841e-06,
      "loss": 0.0236,
      "step": 444620
    },
    {
      "epoch": 0.727663112141029,
      "grad_norm": 0.3326278030872345,
      "learning_rate": 8.600976522604325e-06,
      "loss": 0.0414,
      "step": 444640
    },
    {
      "epoch": 0.7276958425796822,
      "grad_norm": 1.1334306001663208,
      "learning_rate": 8.600910630390808e-06,
      "loss": 0.033,
      "step": 444660
    },
    {
      "epoch": 0.7277285730183356,
      "grad_norm": 1.7826972007751465,
      "learning_rate": 8.60084473817729e-06,
      "loss": 0.0398,
      "step": 444680
    },
    {
      "epoch": 0.727761303456989,
      "grad_norm": 1.4208436012268066,
      "learning_rate": 8.600778845963774e-06,
      "loss": 0.03,
      "step": 444700
    },
    {
      "epoch": 0.7277940338956422,
      "grad_norm": 1.0113013982772827,
      "learning_rate": 8.600712953750257e-06,
      "loss": 0.0365,
      "step": 444720
    },
    {
      "epoch": 0.7278267643342956,
      "grad_norm": 0.4379430413246155,
      "learning_rate": 8.60064706153674e-06,
      "loss": 0.0419,
      "step": 444740
    },
    {
      "epoch": 0.727859494772949,
      "grad_norm": 1.4676848649978638,
      "learning_rate": 8.600581169323223e-06,
      "loss": 0.0271,
      "step": 444760
    },
    {
      "epoch": 0.7278922252116022,
      "grad_norm": 0.6080301403999329,
      "learning_rate": 8.600515277109705e-06,
      "loss": 0.0257,
      "step": 444780
    },
    {
      "epoch": 0.7279249556502556,
      "grad_norm": 0.8483689427375793,
      "learning_rate": 8.600449384896188e-06,
      "loss": 0.0412,
      "step": 444800
    },
    {
      "epoch": 0.727957686088909,
      "grad_norm": 1.4989452362060547,
      "learning_rate": 8.60038349268267e-06,
      "loss": 0.0324,
      "step": 444820
    },
    {
      "epoch": 0.7279904165275624,
      "grad_norm": 0.580252468585968,
      "learning_rate": 8.600317600469154e-06,
      "loss": 0.0256,
      "step": 444840
    },
    {
      "epoch": 0.7280231469662156,
      "grad_norm": 0.9745757579803467,
      "learning_rate": 8.600251708255636e-06,
      "loss": 0.0371,
      "step": 444860
    },
    {
      "epoch": 0.728055877404869,
      "grad_norm": 1.3243494033813477,
      "learning_rate": 8.60018581604212e-06,
      "loss": 0.0305,
      "step": 444880
    },
    {
      "epoch": 0.7280886078435224,
      "grad_norm": 1.0279916524887085,
      "learning_rate": 8.600119923828601e-06,
      "loss": 0.03,
      "step": 444900
    },
    {
      "epoch": 0.7281213382821756,
      "grad_norm": 1.140859842300415,
      "learning_rate": 8.600054031615085e-06,
      "loss": 0.0371,
      "step": 444920
    },
    {
      "epoch": 0.728154068720829,
      "grad_norm": 0.4420538544654846,
      "learning_rate": 8.599988139401567e-06,
      "loss": 0.0265,
      "step": 444940
    },
    {
      "epoch": 0.7281867991594824,
      "grad_norm": 2.2221992015838623,
      "learning_rate": 8.59992224718805e-06,
      "loss": 0.0368,
      "step": 444960
    },
    {
      "epoch": 0.7282195295981356,
      "grad_norm": 0.21670030057430267,
      "learning_rate": 8.599856354974532e-06,
      "loss": 0.0225,
      "step": 444980
    },
    {
      "epoch": 0.728252260036789,
      "grad_norm": 0.31864839792251587,
      "learning_rate": 8.599790462761016e-06,
      "loss": 0.0323,
      "step": 445000
    },
    {
      "epoch": 0.7282849904754424,
      "grad_norm": 1.9670557975769043,
      "learning_rate": 8.5997245705475e-06,
      "loss": 0.0352,
      "step": 445020
    },
    {
      "epoch": 0.7283177209140957,
      "grad_norm": 1.2029163837432861,
      "learning_rate": 8.599658678333981e-06,
      "loss": 0.0351,
      "step": 445040
    },
    {
      "epoch": 0.728350451352749,
      "grad_norm": 2.097318172454834,
      "learning_rate": 8.599592786120465e-06,
      "loss": 0.0293,
      "step": 445060
    },
    {
      "epoch": 0.7283831817914024,
      "grad_norm": 1.1852818727493286,
      "learning_rate": 8.599526893906948e-06,
      "loss": 0.0352,
      "step": 445080
    },
    {
      "epoch": 0.7284159122300558,
      "grad_norm": 0.4113161563873291,
      "learning_rate": 8.59946100169343e-06,
      "loss": 0.0275,
      "step": 445100
    },
    {
      "epoch": 0.728448642668709,
      "grad_norm": 0.48086586594581604,
      "learning_rate": 8.599395109479914e-06,
      "loss": 0.0307,
      "step": 445120
    },
    {
      "epoch": 0.7284813731073624,
      "grad_norm": 1.4149394035339355,
      "learning_rate": 8.599329217266397e-06,
      "loss": 0.0367,
      "step": 445140
    },
    {
      "epoch": 0.7285141035460158,
      "grad_norm": 0.34063857793807983,
      "learning_rate": 8.59926332505288e-06,
      "loss": 0.0391,
      "step": 445160
    },
    {
      "epoch": 0.728546833984669,
      "grad_norm": 0.8943569660186768,
      "learning_rate": 8.599197432839363e-06,
      "loss": 0.0301,
      "step": 445180
    },
    {
      "epoch": 0.7285795644233224,
      "grad_norm": 0.5658963918685913,
      "learning_rate": 8.599131540625845e-06,
      "loss": 0.0295,
      "step": 445200
    },
    {
      "epoch": 0.7286122948619758,
      "grad_norm": 0.5824753046035767,
      "learning_rate": 8.599065648412328e-06,
      "loss": 0.0292,
      "step": 445220
    },
    {
      "epoch": 0.7286450253006291,
      "grad_norm": 2.502366304397583,
      "learning_rate": 8.59899975619881e-06,
      "loss": 0.0364,
      "step": 445240
    },
    {
      "epoch": 0.7286777557392824,
      "grad_norm": 0.9934699535369873,
      "learning_rate": 8.598933863985294e-06,
      "loss": 0.0301,
      "step": 445260
    },
    {
      "epoch": 0.7287104861779358,
      "grad_norm": 0.9771644473075867,
      "learning_rate": 8.598867971771776e-06,
      "loss": 0.0335,
      "step": 445280
    },
    {
      "epoch": 0.7287432166165891,
      "grad_norm": 0.8294395208358765,
      "learning_rate": 8.59880207955826e-06,
      "loss": 0.0375,
      "step": 445300
    },
    {
      "epoch": 0.7287759470552424,
      "grad_norm": 5.359920978546143,
      "learning_rate": 8.598736187344741e-06,
      "loss": 0.0282,
      "step": 445320
    },
    {
      "epoch": 0.7288086774938958,
      "grad_norm": 0.4003048241138458,
      "learning_rate": 8.598670295131225e-06,
      "loss": 0.0337,
      "step": 445340
    },
    {
      "epoch": 0.7288414079325491,
      "grad_norm": 1.7301136255264282,
      "learning_rate": 8.598604402917707e-06,
      "loss": 0.0351,
      "step": 445360
    },
    {
      "epoch": 0.7288741383712024,
      "grad_norm": 0.8669434189796448,
      "learning_rate": 8.59853851070419e-06,
      "loss": 0.0238,
      "step": 445380
    },
    {
      "epoch": 0.7289068688098558,
      "grad_norm": 0.5436339378356934,
      "learning_rate": 8.598472618490674e-06,
      "loss": 0.0311,
      "step": 445400
    },
    {
      "epoch": 0.7289395992485092,
      "grad_norm": 0.3544310927391052,
      "learning_rate": 8.598406726277156e-06,
      "loss": 0.0265,
      "step": 445420
    },
    {
      "epoch": 0.7289723296871624,
      "grad_norm": 1.1438825130462646,
      "learning_rate": 8.59834083406364e-06,
      "loss": 0.0295,
      "step": 445440
    },
    {
      "epoch": 0.7290050601258158,
      "grad_norm": 0.9073213338851929,
      "learning_rate": 8.598274941850123e-06,
      "loss": 0.0301,
      "step": 445460
    },
    {
      "epoch": 0.7290377905644692,
      "grad_norm": 2.35404896736145,
      "learning_rate": 8.598209049636605e-06,
      "loss": 0.0294,
      "step": 445480
    },
    {
      "epoch": 0.7290705210031225,
      "grad_norm": 0.4481032192707062,
      "learning_rate": 8.598143157423088e-06,
      "loss": 0.0299,
      "step": 445500
    },
    {
      "epoch": 0.7291032514417758,
      "grad_norm": 1.04971182346344,
      "learning_rate": 8.598077265209572e-06,
      "loss": 0.0405,
      "step": 445520
    },
    {
      "epoch": 0.7291359818804292,
      "grad_norm": 0.5561128258705139,
      "learning_rate": 8.598011372996054e-06,
      "loss": 0.0306,
      "step": 445540
    },
    {
      "epoch": 0.7291687123190825,
      "grad_norm": 0.5084058046340942,
      "learning_rate": 8.597945480782537e-06,
      "loss": 0.0323,
      "step": 445560
    },
    {
      "epoch": 0.7292014427577358,
      "grad_norm": 2.2451179027557373,
      "learning_rate": 8.59787958856902e-06,
      "loss": 0.0398,
      "step": 445580
    },
    {
      "epoch": 0.7292341731963892,
      "grad_norm": 1.5689551830291748,
      "learning_rate": 8.597813696355503e-06,
      "loss": 0.0266,
      "step": 445600
    },
    {
      "epoch": 0.7292669036350425,
      "grad_norm": 0.49804556369781494,
      "learning_rate": 8.597747804141985e-06,
      "loss": 0.0432,
      "step": 445620
    },
    {
      "epoch": 0.7292996340736958,
      "grad_norm": 1.811532735824585,
      "learning_rate": 8.597681911928468e-06,
      "loss": 0.0471,
      "step": 445640
    },
    {
      "epoch": 0.7293323645123492,
      "grad_norm": 1.1043623685836792,
      "learning_rate": 8.59761601971495e-06,
      "loss": 0.0273,
      "step": 445660
    },
    {
      "epoch": 0.7293650949510025,
      "grad_norm": 0.3665998876094818,
      "learning_rate": 8.597550127501434e-06,
      "loss": 0.0363,
      "step": 445680
    },
    {
      "epoch": 0.7293978253896559,
      "grad_norm": 1.6449270248413086,
      "learning_rate": 8.597484235287916e-06,
      "loss": 0.0322,
      "step": 445700
    },
    {
      "epoch": 0.7294305558283092,
      "grad_norm": 1.1904700994491577,
      "learning_rate": 8.5974183430744e-06,
      "loss": 0.0311,
      "step": 445720
    },
    {
      "epoch": 0.7294632862669626,
      "grad_norm": 0.5842531323432922,
      "learning_rate": 8.597352450860883e-06,
      "loss": 0.0266,
      "step": 445740
    },
    {
      "epoch": 0.7294960167056159,
      "grad_norm": 0.6028501987457275,
      "learning_rate": 8.597286558647365e-06,
      "loss": 0.0293,
      "step": 445760
    },
    {
      "epoch": 0.7295287471442692,
      "grad_norm": 1.1413909196853638,
      "learning_rate": 8.597220666433848e-06,
      "loss": 0.0336,
      "step": 445780
    },
    {
      "epoch": 0.7295614775829226,
      "grad_norm": 1.2037482261657715,
      "learning_rate": 8.59715477422033e-06,
      "loss": 0.0411,
      "step": 445800
    },
    {
      "epoch": 0.7295942080215759,
      "grad_norm": 0.15937992930412292,
      "learning_rate": 8.597088882006814e-06,
      "loss": 0.0347,
      "step": 445820
    },
    {
      "epoch": 0.7296269384602292,
      "grad_norm": 0.5150044560432434,
      "learning_rate": 8.597022989793297e-06,
      "loss": 0.0334,
      "step": 445840
    },
    {
      "epoch": 0.7296596688988826,
      "grad_norm": 2.6618800163269043,
      "learning_rate": 8.59695709757978e-06,
      "loss": 0.0383,
      "step": 445860
    },
    {
      "epoch": 0.7296923993375359,
      "grad_norm": 6.682435035705566,
      "learning_rate": 8.596891205366263e-06,
      "loss": 0.052,
      "step": 445880
    },
    {
      "epoch": 0.7297251297761893,
      "grad_norm": 0.6364932060241699,
      "learning_rate": 8.596825313152747e-06,
      "loss": 0.0249,
      "step": 445900
    },
    {
      "epoch": 0.7297578602148426,
      "grad_norm": 0.85988849401474,
      "learning_rate": 8.596759420939228e-06,
      "loss": 0.0311,
      "step": 445920
    },
    {
      "epoch": 0.7297905906534959,
      "grad_norm": 3.843946933746338,
      "learning_rate": 8.596693528725712e-06,
      "loss": 0.0336,
      "step": 445940
    },
    {
      "epoch": 0.7298233210921493,
      "grad_norm": 1.013663411140442,
      "learning_rate": 8.596627636512194e-06,
      "loss": 0.0248,
      "step": 445960
    },
    {
      "epoch": 0.7298560515308026,
      "grad_norm": 0.4297637641429901,
      "learning_rate": 8.596561744298678e-06,
      "loss": 0.0293,
      "step": 445980
    },
    {
      "epoch": 0.729888781969456,
      "grad_norm": 2.3883469104766846,
      "learning_rate": 8.59649585208516e-06,
      "loss": 0.0312,
      "step": 446000
    },
    {
      "epoch": 0.7299215124081093,
      "grad_norm": 0.44891148805618286,
      "learning_rate": 8.596429959871643e-06,
      "loss": 0.0445,
      "step": 446020
    },
    {
      "epoch": 0.7299542428467626,
      "grad_norm": 1.581484079360962,
      "learning_rate": 8.596364067658125e-06,
      "loss": 0.0217,
      "step": 446040
    },
    {
      "epoch": 0.729986973285416,
      "grad_norm": 1.6682970523834229,
      "learning_rate": 8.596298175444608e-06,
      "loss": 0.0247,
      "step": 446060
    },
    {
      "epoch": 0.7300197037240693,
      "grad_norm": 1.188435673713684,
      "learning_rate": 8.596232283231092e-06,
      "loss": 0.0227,
      "step": 446080
    },
    {
      "epoch": 0.7300524341627227,
      "grad_norm": 1.4222334623336792,
      "learning_rate": 8.596166391017574e-06,
      "loss": 0.0384,
      "step": 446100
    },
    {
      "epoch": 0.730085164601376,
      "grad_norm": 2.412641763687134,
      "learning_rate": 8.596100498804058e-06,
      "loss": 0.0347,
      "step": 446120
    },
    {
      "epoch": 0.7301178950400293,
      "grad_norm": 1.1761592626571655,
      "learning_rate": 8.59603460659054e-06,
      "loss": 0.0342,
      "step": 446140
    },
    {
      "epoch": 0.7301506254786827,
      "grad_norm": 0.6611016988754272,
      "learning_rate": 8.595968714377023e-06,
      "loss": 0.0347,
      "step": 446160
    },
    {
      "epoch": 0.730183355917336,
      "grad_norm": 1.4465290307998657,
      "learning_rate": 8.595902822163505e-06,
      "loss": 0.0344,
      "step": 446180
    },
    {
      "epoch": 0.7302160863559893,
      "grad_norm": 0.8037384748458862,
      "learning_rate": 8.595836929949988e-06,
      "loss": 0.0268,
      "step": 446200
    },
    {
      "epoch": 0.7302488167946427,
      "grad_norm": 1.3694500923156738,
      "learning_rate": 8.59577103773647e-06,
      "loss": 0.0232,
      "step": 446220
    },
    {
      "epoch": 0.730281547233296,
      "grad_norm": 0.5092949271202087,
      "learning_rate": 8.595705145522954e-06,
      "loss": 0.0335,
      "step": 446240
    },
    {
      "epoch": 0.7303142776719493,
      "grad_norm": 0.9314423203468323,
      "learning_rate": 8.595639253309438e-06,
      "loss": 0.0277,
      "step": 446260
    },
    {
      "epoch": 0.7303470081106027,
      "grad_norm": 1.3048994541168213,
      "learning_rate": 8.59557336109592e-06,
      "loss": 0.0369,
      "step": 446280
    },
    {
      "epoch": 0.7303797385492561,
      "grad_norm": 1.7270872592926025,
      "learning_rate": 8.595507468882403e-06,
      "loss": 0.031,
      "step": 446300
    },
    {
      "epoch": 0.7304124689879093,
      "grad_norm": 0.5712616443634033,
      "learning_rate": 8.595441576668887e-06,
      "loss": 0.034,
      "step": 446320
    },
    {
      "epoch": 0.7304451994265627,
      "grad_norm": 0.8919482231140137,
      "learning_rate": 8.595375684455369e-06,
      "loss": 0.0326,
      "step": 446340
    },
    {
      "epoch": 0.7304779298652161,
      "grad_norm": 1.638336181640625,
      "learning_rate": 8.595309792241852e-06,
      "loss": 0.0297,
      "step": 446360
    },
    {
      "epoch": 0.7305106603038694,
      "grad_norm": 0.31792086362838745,
      "learning_rate": 8.595243900028334e-06,
      "loss": 0.0356,
      "step": 446380
    },
    {
      "epoch": 0.7305433907425227,
      "grad_norm": 1.2295979261398315,
      "learning_rate": 8.595178007814818e-06,
      "loss": 0.0219,
      "step": 446400
    },
    {
      "epoch": 0.7305761211811761,
      "grad_norm": 1.3872456550598145,
      "learning_rate": 8.5951121156013e-06,
      "loss": 0.0424,
      "step": 446420
    },
    {
      "epoch": 0.7306088516198294,
      "grad_norm": 1.6258389949798584,
      "learning_rate": 8.595046223387783e-06,
      "loss": 0.0291,
      "step": 446440
    },
    {
      "epoch": 0.7306415820584827,
      "grad_norm": 5.361813068389893,
      "learning_rate": 8.594980331174267e-06,
      "loss": 0.0436,
      "step": 446460
    },
    {
      "epoch": 0.7306743124971361,
      "grad_norm": 0.48814448714256287,
      "learning_rate": 8.594914438960749e-06,
      "loss": 0.0386,
      "step": 446480
    },
    {
      "epoch": 0.7307070429357895,
      "grad_norm": 1.3381260633468628,
      "learning_rate": 8.594848546747232e-06,
      "loss": 0.0415,
      "step": 446500
    },
    {
      "epoch": 0.7307397733744427,
      "grad_norm": 0.707693874835968,
      "learning_rate": 8.594782654533714e-06,
      "loss": 0.0235,
      "step": 446520
    },
    {
      "epoch": 0.7307725038130961,
      "grad_norm": 2.911958694458008,
      "learning_rate": 8.594716762320198e-06,
      "loss": 0.0267,
      "step": 446540
    },
    {
      "epoch": 0.7308052342517495,
      "grad_norm": 0.6908910274505615,
      "learning_rate": 8.59465087010668e-06,
      "loss": 0.031,
      "step": 446560
    },
    {
      "epoch": 0.7308379646904027,
      "grad_norm": 0.3945053815841675,
      "learning_rate": 8.594584977893163e-06,
      "loss": 0.0269,
      "step": 446580
    },
    {
      "epoch": 0.7308706951290561,
      "grad_norm": 2.3444201946258545,
      "learning_rate": 8.594519085679645e-06,
      "loss": 0.0301,
      "step": 446600
    },
    {
      "epoch": 0.7309034255677095,
      "grad_norm": 0.8316752910614014,
      "learning_rate": 8.594453193466129e-06,
      "loss": 0.021,
      "step": 446620
    },
    {
      "epoch": 0.7309361560063627,
      "grad_norm": 2.9319710731506348,
      "learning_rate": 8.594387301252612e-06,
      "loss": 0.0263,
      "step": 446640
    },
    {
      "epoch": 0.7309688864450161,
      "grad_norm": 1.4390497207641602,
      "learning_rate": 8.594321409039094e-06,
      "loss": 0.0307,
      "step": 446660
    },
    {
      "epoch": 0.7310016168836695,
      "grad_norm": 2.504197359085083,
      "learning_rate": 8.594255516825578e-06,
      "loss": 0.0248,
      "step": 446680
    },
    {
      "epoch": 0.7310343473223229,
      "grad_norm": 0.27734580636024475,
      "learning_rate": 8.594189624612061e-06,
      "loss": 0.0312,
      "step": 446700
    },
    {
      "epoch": 0.7310670777609761,
      "grad_norm": 1.5978786945343018,
      "learning_rate": 8.594123732398543e-06,
      "loss": 0.038,
      "step": 446720
    },
    {
      "epoch": 0.7310998081996295,
      "grad_norm": 0.431867390871048,
      "learning_rate": 8.594057840185027e-06,
      "loss": 0.0269,
      "step": 446740
    },
    {
      "epoch": 0.7311325386382829,
      "grad_norm": 0.558198869228363,
      "learning_rate": 8.593991947971509e-06,
      "loss": 0.0253,
      "step": 446760
    },
    {
      "epoch": 0.7311652690769361,
      "grad_norm": 1.5292423963546753,
      "learning_rate": 8.593926055757992e-06,
      "loss": 0.0296,
      "step": 446780
    },
    {
      "epoch": 0.7311979995155895,
      "grad_norm": 2.1482555866241455,
      "learning_rate": 8.593860163544476e-06,
      "loss": 0.0329,
      "step": 446800
    },
    {
      "epoch": 0.7312307299542429,
      "grad_norm": 0.8419505953788757,
      "learning_rate": 8.593794271330958e-06,
      "loss": 0.0401,
      "step": 446820
    },
    {
      "epoch": 0.7312634603928961,
      "grad_norm": 0.7387462258338928,
      "learning_rate": 8.593728379117441e-06,
      "loss": 0.0384,
      "step": 446840
    },
    {
      "epoch": 0.7312961908315495,
      "grad_norm": 0.5609050989151001,
      "learning_rate": 8.593662486903923e-06,
      "loss": 0.0247,
      "step": 446860
    },
    {
      "epoch": 0.7313289212702029,
      "grad_norm": 0.3644807040691376,
      "learning_rate": 8.593596594690407e-06,
      "loss": 0.0322,
      "step": 446880
    },
    {
      "epoch": 0.7313616517088563,
      "grad_norm": 0.58091801404953,
      "learning_rate": 8.593530702476889e-06,
      "loss": 0.0251,
      "step": 446900
    },
    {
      "epoch": 0.7313943821475095,
      "grad_norm": 2.611804485321045,
      "learning_rate": 8.593464810263372e-06,
      "loss": 0.0414,
      "step": 446920
    },
    {
      "epoch": 0.7314271125861629,
      "grad_norm": 1.488129734992981,
      "learning_rate": 8.593398918049854e-06,
      "loss": 0.031,
      "step": 446940
    },
    {
      "epoch": 0.7314598430248163,
      "grad_norm": 1.064473032951355,
      "learning_rate": 8.593333025836338e-06,
      "loss": 0.0428,
      "step": 446960
    },
    {
      "epoch": 0.7314925734634695,
      "grad_norm": 0.8322101831436157,
      "learning_rate": 8.59326713362282e-06,
      "loss": 0.0376,
      "step": 446980
    },
    {
      "epoch": 0.7315253039021229,
      "grad_norm": 0.2768954038619995,
      "learning_rate": 8.593201241409303e-06,
      "loss": 0.0337,
      "step": 447000
    },
    {
      "epoch": 0.7315580343407763,
      "grad_norm": 1.9880094528198242,
      "learning_rate": 8.593135349195785e-06,
      "loss": 0.039,
      "step": 447020
    },
    {
      "epoch": 0.7315907647794295,
      "grad_norm": 0.6797940731048584,
      "learning_rate": 8.593069456982269e-06,
      "loss": 0.0492,
      "step": 447040
    },
    {
      "epoch": 0.7316234952180829,
      "grad_norm": 1.6061595678329468,
      "learning_rate": 8.593003564768752e-06,
      "loss": 0.0367,
      "step": 447060
    },
    {
      "epoch": 0.7316562256567363,
      "grad_norm": 1.9329012632369995,
      "learning_rate": 8.592937672555234e-06,
      "loss": 0.0384,
      "step": 447080
    },
    {
      "epoch": 0.7316889560953896,
      "grad_norm": 2.2615036964416504,
      "learning_rate": 8.592871780341718e-06,
      "loss": 0.0338,
      "step": 447100
    },
    {
      "epoch": 0.7317216865340429,
      "grad_norm": 0.3138996362686157,
      "learning_rate": 8.592805888128201e-06,
      "loss": 0.0268,
      "step": 447120
    },
    {
      "epoch": 0.7317544169726963,
      "grad_norm": 0.716907799243927,
      "learning_rate": 8.592739995914683e-06,
      "loss": 0.0284,
      "step": 447140
    },
    {
      "epoch": 0.7317871474113496,
      "grad_norm": 0.7161424160003662,
      "learning_rate": 8.592674103701167e-06,
      "loss": 0.027,
      "step": 447160
    },
    {
      "epoch": 0.7318198778500029,
      "grad_norm": 2.181291103363037,
      "learning_rate": 8.59260821148765e-06,
      "loss": 0.0331,
      "step": 447180
    },
    {
      "epoch": 0.7318526082886563,
      "grad_norm": 0.5429598689079285,
      "learning_rate": 8.592542319274132e-06,
      "loss": 0.0372,
      "step": 447200
    },
    {
      "epoch": 0.7318853387273097,
      "grad_norm": 1.2210054397583008,
      "learning_rate": 8.592476427060616e-06,
      "loss": 0.0363,
      "step": 447220
    },
    {
      "epoch": 0.7319180691659629,
      "grad_norm": 0.842515230178833,
      "learning_rate": 8.592410534847098e-06,
      "loss": 0.0282,
      "step": 447240
    },
    {
      "epoch": 0.7319507996046163,
      "grad_norm": 1.442541241645813,
      "learning_rate": 8.592344642633581e-06,
      "loss": 0.0298,
      "step": 447260
    },
    {
      "epoch": 0.7319835300432697,
      "grad_norm": 0.9086191654205322,
      "learning_rate": 8.592278750420063e-06,
      "loss": 0.0438,
      "step": 447280
    },
    {
      "epoch": 0.732016260481923,
      "grad_norm": 0.742537796497345,
      "learning_rate": 8.592212858206547e-06,
      "loss": 0.0331,
      "step": 447300
    },
    {
      "epoch": 0.7320489909205763,
      "grad_norm": 1.3071107864379883,
      "learning_rate": 8.592146965993029e-06,
      "loss": 0.0289,
      "step": 447320
    },
    {
      "epoch": 0.7320817213592297,
      "grad_norm": 0.6610724925994873,
      "learning_rate": 8.592081073779512e-06,
      "loss": 0.0195,
      "step": 447340
    },
    {
      "epoch": 0.732114451797883,
      "grad_norm": 1.1749106645584106,
      "learning_rate": 8.592015181565994e-06,
      "loss": 0.028,
      "step": 447360
    },
    {
      "epoch": 0.7321471822365363,
      "grad_norm": 0.6617105603218079,
      "learning_rate": 8.591949289352478e-06,
      "loss": 0.0359,
      "step": 447380
    },
    {
      "epoch": 0.7321799126751897,
      "grad_norm": 1.2241894006729126,
      "learning_rate": 8.59188339713896e-06,
      "loss": 0.0343,
      "step": 447400
    },
    {
      "epoch": 0.732212643113843,
      "grad_norm": 2.0388948917388916,
      "learning_rate": 8.591817504925443e-06,
      "loss": 0.0347,
      "step": 447420
    },
    {
      "epoch": 0.7322453735524963,
      "grad_norm": 0.8841907382011414,
      "learning_rate": 8.591751612711927e-06,
      "loss": 0.0308,
      "step": 447440
    },
    {
      "epoch": 0.7322781039911497,
      "grad_norm": 5.815988063812256,
      "learning_rate": 8.591685720498409e-06,
      "loss": 0.0283,
      "step": 447460
    },
    {
      "epoch": 0.732310834429803,
      "grad_norm": 0.9541439414024353,
      "learning_rate": 8.591619828284892e-06,
      "loss": 0.0289,
      "step": 447480
    },
    {
      "epoch": 0.7323435648684564,
      "grad_norm": 1.8681000471115112,
      "learning_rate": 8.591553936071376e-06,
      "loss": 0.0264,
      "step": 447500
    },
    {
      "epoch": 0.7323762953071097,
      "grad_norm": 2.044217586517334,
      "learning_rate": 8.591488043857858e-06,
      "loss": 0.0439,
      "step": 447520
    },
    {
      "epoch": 0.732409025745763,
      "grad_norm": 1.7696651220321655,
      "learning_rate": 8.591422151644341e-06,
      "loss": 0.0299,
      "step": 447540
    },
    {
      "epoch": 0.7324417561844164,
      "grad_norm": 0.1492621898651123,
      "learning_rate": 8.591356259430825e-06,
      "loss": 0.0215,
      "step": 447560
    },
    {
      "epoch": 0.7324744866230697,
      "grad_norm": 0.877831757068634,
      "learning_rate": 8.591290367217307e-06,
      "loss": 0.0352,
      "step": 447580
    },
    {
      "epoch": 0.7325072170617231,
      "grad_norm": 2.1868293285369873,
      "learning_rate": 8.59122447500379e-06,
      "loss": 0.037,
      "step": 447600
    },
    {
      "epoch": 0.7325399475003764,
      "grad_norm": 0.9152817726135254,
      "learning_rate": 8.591158582790272e-06,
      "loss": 0.0271,
      "step": 447620
    },
    {
      "epoch": 0.7325726779390297,
      "grad_norm": 0.7837679386138916,
      "learning_rate": 8.591092690576756e-06,
      "loss": 0.0383,
      "step": 447640
    },
    {
      "epoch": 0.7326054083776831,
      "grad_norm": 1.4725663661956787,
      "learning_rate": 8.591026798363238e-06,
      "loss": 0.0319,
      "step": 447660
    },
    {
      "epoch": 0.7326381388163364,
      "grad_norm": 0.8169280886650085,
      "learning_rate": 8.590960906149721e-06,
      "loss": 0.0319,
      "step": 447680
    },
    {
      "epoch": 0.7326708692549898,
      "grad_norm": 0.561004638671875,
      "learning_rate": 8.590895013936203e-06,
      "loss": 0.032,
      "step": 447700
    },
    {
      "epoch": 0.7327035996936431,
      "grad_norm": 0.6480763554573059,
      "learning_rate": 8.590829121722687e-06,
      "loss": 0.0286,
      "step": 447720
    },
    {
      "epoch": 0.7327363301322964,
      "grad_norm": 0.4996092915534973,
      "learning_rate": 8.590763229509169e-06,
      "loss": 0.0358,
      "step": 447740
    },
    {
      "epoch": 0.7327690605709498,
      "grad_norm": 0.7454610466957092,
      "learning_rate": 8.590697337295652e-06,
      "loss": 0.0304,
      "step": 447760
    },
    {
      "epoch": 0.7328017910096031,
      "grad_norm": 0.5596768260002136,
      "learning_rate": 8.590631445082134e-06,
      "loss": 0.0303,
      "step": 447780
    },
    {
      "epoch": 0.7328345214482564,
      "grad_norm": 0.6725850105285645,
      "learning_rate": 8.590565552868618e-06,
      "loss": 0.0323,
      "step": 447800
    },
    {
      "epoch": 0.7328672518869098,
      "grad_norm": 1.0386625528335571,
      "learning_rate": 8.5904996606551e-06,
      "loss": 0.0232,
      "step": 447820
    },
    {
      "epoch": 0.7328999823255631,
      "grad_norm": 0.639481782913208,
      "learning_rate": 8.590433768441583e-06,
      "loss": 0.0236,
      "step": 447840
    },
    {
      "epoch": 0.7329327127642165,
      "grad_norm": 1.825760841369629,
      "learning_rate": 8.590367876228067e-06,
      "loss": 0.0425,
      "step": 447860
    },
    {
      "epoch": 0.7329654432028698,
      "grad_norm": 0.8229641318321228,
      "learning_rate": 8.590301984014549e-06,
      "loss": 0.0293,
      "step": 447880
    },
    {
      "epoch": 0.7329981736415232,
      "grad_norm": 2.0829176902770996,
      "learning_rate": 8.590236091801032e-06,
      "loss": 0.0323,
      "step": 447900
    },
    {
      "epoch": 0.7330309040801765,
      "grad_norm": 1.3306673765182495,
      "learning_rate": 8.590170199587516e-06,
      "loss": 0.0361,
      "step": 447920
    },
    {
      "epoch": 0.7330636345188298,
      "grad_norm": 2.5273327827453613,
      "learning_rate": 8.590104307373998e-06,
      "loss": 0.0278,
      "step": 447940
    },
    {
      "epoch": 0.7330963649574832,
      "grad_norm": 0.550900936126709,
      "learning_rate": 8.590038415160481e-06,
      "loss": 0.0347,
      "step": 447960
    },
    {
      "epoch": 0.7331290953961365,
      "grad_norm": 1.1413438320159912,
      "learning_rate": 8.589972522946965e-06,
      "loss": 0.0291,
      "step": 447980
    },
    {
      "epoch": 0.7331618258347898,
      "grad_norm": 1.2931467294692993,
      "learning_rate": 8.589906630733447e-06,
      "loss": 0.0253,
      "step": 448000
    },
    {
      "epoch": 0.7331945562734432,
      "grad_norm": 1.3123292922973633,
      "learning_rate": 8.58984073851993e-06,
      "loss": 0.0447,
      "step": 448020
    },
    {
      "epoch": 0.7332272867120965,
      "grad_norm": 0.7236095666885376,
      "learning_rate": 8.589774846306412e-06,
      "loss": 0.0277,
      "step": 448040
    },
    {
      "epoch": 0.7332600171507498,
      "grad_norm": 0.21305176615715027,
      "learning_rate": 8.589708954092896e-06,
      "loss": 0.0401,
      "step": 448060
    },
    {
      "epoch": 0.7332927475894032,
      "grad_norm": 0.22673767805099487,
      "learning_rate": 8.589643061879378e-06,
      "loss": 0.0292,
      "step": 448080
    },
    {
      "epoch": 0.7333254780280565,
      "grad_norm": 0.9527704119682312,
      "learning_rate": 8.589577169665861e-06,
      "loss": 0.0309,
      "step": 448100
    },
    {
      "epoch": 0.7333582084667098,
      "grad_norm": 2.155736207962036,
      "learning_rate": 8.589511277452343e-06,
      "loss": 0.0337,
      "step": 448120
    },
    {
      "epoch": 0.7333909389053632,
      "grad_norm": 0.6036369204521179,
      "learning_rate": 8.589445385238827e-06,
      "loss": 0.0418,
      "step": 448140
    },
    {
      "epoch": 0.7334236693440166,
      "grad_norm": 5.576313018798828,
      "learning_rate": 8.589379493025309e-06,
      "loss": 0.038,
      "step": 448160
    },
    {
      "epoch": 0.7334563997826699,
      "grad_norm": 0.6299219131469727,
      "learning_rate": 8.589313600811792e-06,
      "loss": 0.0238,
      "step": 448180
    },
    {
      "epoch": 0.7334891302213232,
      "grad_norm": 0.6327125430107117,
      "learning_rate": 8.589247708598276e-06,
      "loss": 0.026,
      "step": 448200
    },
    {
      "epoch": 0.7335218606599766,
      "grad_norm": 1.122571587562561,
      "learning_rate": 8.589181816384758e-06,
      "loss": 0.0249,
      "step": 448220
    },
    {
      "epoch": 0.7335545910986299,
      "grad_norm": 1.2002947330474854,
      "learning_rate": 8.589115924171241e-06,
      "loss": 0.027,
      "step": 448240
    },
    {
      "epoch": 0.7335873215372832,
      "grad_norm": 0.6465632915496826,
      "learning_rate": 8.589050031957723e-06,
      "loss": 0.0328,
      "step": 448260
    },
    {
      "epoch": 0.7336200519759366,
      "grad_norm": 1.5396658182144165,
      "learning_rate": 8.588984139744207e-06,
      "loss": 0.0347,
      "step": 448280
    },
    {
      "epoch": 0.7336527824145899,
      "grad_norm": 0.6868606805801392,
      "learning_rate": 8.58891824753069e-06,
      "loss": 0.0342,
      "step": 448300
    },
    {
      "epoch": 0.7336855128532432,
      "grad_norm": 1.038163423538208,
      "learning_rate": 8.588852355317172e-06,
      "loss": 0.0362,
      "step": 448320
    },
    {
      "epoch": 0.7337182432918966,
      "grad_norm": 0.2811526656150818,
      "learning_rate": 8.588786463103656e-06,
      "loss": 0.0247,
      "step": 448340
    },
    {
      "epoch": 0.73375097373055,
      "grad_norm": 0.5367552042007446,
      "learning_rate": 8.58872057089014e-06,
      "loss": 0.0261,
      "step": 448360
    },
    {
      "epoch": 0.7337837041692032,
      "grad_norm": 0.1282149702310562,
      "learning_rate": 8.588654678676622e-06,
      "loss": 0.0295,
      "step": 448380
    },
    {
      "epoch": 0.7338164346078566,
      "grad_norm": 3.8770101070404053,
      "learning_rate": 8.588588786463105e-06,
      "loss": 0.0388,
      "step": 448400
    },
    {
      "epoch": 0.73384916504651,
      "grad_norm": 1.094652771949768,
      "learning_rate": 8.588522894249587e-06,
      "loss": 0.0271,
      "step": 448420
    },
    {
      "epoch": 0.7338818954851632,
      "grad_norm": 1.5264966487884521,
      "learning_rate": 8.58845700203607e-06,
      "loss": 0.0396,
      "step": 448440
    },
    {
      "epoch": 0.7339146259238166,
      "grad_norm": 1.6032289266586304,
      "learning_rate": 8.588391109822552e-06,
      "loss": 0.0252,
      "step": 448460
    },
    {
      "epoch": 0.73394735636247,
      "grad_norm": 0.6481648683547974,
      "learning_rate": 8.588325217609036e-06,
      "loss": 0.0197,
      "step": 448480
    },
    {
      "epoch": 0.7339800868011233,
      "grad_norm": 1.4979790449142456,
      "learning_rate": 8.588259325395518e-06,
      "loss": 0.0293,
      "step": 448500
    },
    {
      "epoch": 0.7340128172397766,
      "grad_norm": 1.5226526260375977,
      "learning_rate": 8.588193433182002e-06,
      "loss": 0.043,
      "step": 448520
    },
    {
      "epoch": 0.73404554767843,
      "grad_norm": 1.4711437225341797,
      "learning_rate": 8.588127540968485e-06,
      "loss": 0.0379,
      "step": 448540
    },
    {
      "epoch": 0.7340782781170834,
      "grad_norm": 1.4014132022857666,
      "learning_rate": 8.588061648754967e-06,
      "loss": 0.0255,
      "step": 448560
    },
    {
      "epoch": 0.7341110085557366,
      "grad_norm": 0.7542148232460022,
      "learning_rate": 8.58799575654145e-06,
      "loss": 0.027,
      "step": 448580
    },
    {
      "epoch": 0.73414373899439,
      "grad_norm": 0.9830375909805298,
      "learning_rate": 8.587929864327933e-06,
      "loss": 0.0258,
      "step": 448600
    },
    {
      "epoch": 0.7341764694330434,
      "grad_norm": 1.505650281906128,
      "learning_rate": 8.587863972114416e-06,
      "loss": 0.0341,
      "step": 448620
    },
    {
      "epoch": 0.7342091998716966,
      "grad_norm": 0.5279505252838135,
      "learning_rate": 8.587798079900898e-06,
      "loss": 0.0349,
      "step": 448640
    },
    {
      "epoch": 0.73424193031035,
      "grad_norm": 1.951371669769287,
      "learning_rate": 8.587732187687382e-06,
      "loss": 0.0329,
      "step": 448660
    },
    {
      "epoch": 0.7342746607490034,
      "grad_norm": 0.10321778059005737,
      "learning_rate": 8.587666295473865e-06,
      "loss": 0.0297,
      "step": 448680
    },
    {
      "epoch": 0.7343073911876566,
      "grad_norm": 0.6219319701194763,
      "learning_rate": 8.587600403260347e-06,
      "loss": 0.0392,
      "step": 448700
    },
    {
      "epoch": 0.73434012162631,
      "grad_norm": 0.3285139799118042,
      "learning_rate": 8.58753451104683e-06,
      "loss": 0.0268,
      "step": 448720
    },
    {
      "epoch": 0.7343728520649634,
      "grad_norm": 0.6071654558181763,
      "learning_rate": 8.587468618833314e-06,
      "loss": 0.0331,
      "step": 448740
    },
    {
      "epoch": 0.7344055825036168,
      "grad_norm": 0.4395756125450134,
      "learning_rate": 8.587402726619796e-06,
      "loss": 0.0346,
      "step": 448760
    },
    {
      "epoch": 0.73443831294227,
      "grad_norm": 0.7923609614372253,
      "learning_rate": 8.58733683440628e-06,
      "loss": 0.0363,
      "step": 448780
    },
    {
      "epoch": 0.7344710433809234,
      "grad_norm": 1.0579885244369507,
      "learning_rate": 8.587270942192762e-06,
      "loss": 0.0362,
      "step": 448800
    },
    {
      "epoch": 0.7345037738195768,
      "grad_norm": 0.8078740835189819,
      "learning_rate": 8.587205049979245e-06,
      "loss": 0.028,
      "step": 448820
    },
    {
      "epoch": 0.73453650425823,
      "grad_norm": 0.9125068187713623,
      "learning_rate": 8.587139157765727e-06,
      "loss": 0.0378,
      "step": 448840
    },
    {
      "epoch": 0.7345692346968834,
      "grad_norm": 1.6780602931976318,
      "learning_rate": 8.58707326555221e-06,
      "loss": 0.0256,
      "step": 448860
    },
    {
      "epoch": 0.7346019651355368,
      "grad_norm": 0.25712472200393677,
      "learning_rate": 8.587007373338693e-06,
      "loss": 0.0208,
      "step": 448880
    },
    {
      "epoch": 0.73463469557419,
      "grad_norm": 1.7728937864303589,
      "learning_rate": 8.586941481125176e-06,
      "loss": 0.0309,
      "step": 448900
    },
    {
      "epoch": 0.7346674260128434,
      "grad_norm": 0.37151455879211426,
      "learning_rate": 8.58687558891166e-06,
      "loss": 0.0326,
      "step": 448920
    },
    {
      "epoch": 0.7347001564514968,
      "grad_norm": 2.474806308746338,
      "learning_rate": 8.586809696698142e-06,
      "loss": 0.0342,
      "step": 448940
    },
    {
      "epoch": 0.7347328868901501,
      "grad_norm": 7.757075786590576,
      "learning_rate": 8.586743804484625e-06,
      "loss": 0.0329,
      "step": 448960
    },
    {
      "epoch": 0.7347656173288034,
      "grad_norm": 0.9043855667114258,
      "learning_rate": 8.586677912271107e-06,
      "loss": 0.0272,
      "step": 448980
    },
    {
      "epoch": 0.7347983477674568,
      "grad_norm": 0.9921860098838806,
      "learning_rate": 8.58661202005759e-06,
      "loss": 0.0364,
      "step": 449000
    },
    {
      "epoch": 0.7348310782061102,
      "grad_norm": 0.40637829899787903,
      "learning_rate": 8.586546127844073e-06,
      "loss": 0.0322,
      "step": 449020
    },
    {
      "epoch": 0.7348638086447634,
      "grad_norm": 0.9831677675247192,
      "learning_rate": 8.586480235630556e-06,
      "loss": 0.0321,
      "step": 449040
    },
    {
      "epoch": 0.7348965390834168,
      "grad_norm": 0.46112126111984253,
      "learning_rate": 8.586414343417038e-06,
      "loss": 0.0345,
      "step": 449060
    },
    {
      "epoch": 0.7349292695220702,
      "grad_norm": 0.4912921190261841,
      "learning_rate": 8.586348451203522e-06,
      "loss": 0.0376,
      "step": 449080
    },
    {
      "epoch": 0.7349619999607234,
      "grad_norm": 1.8630995750427246,
      "learning_rate": 8.586282558990005e-06,
      "loss": 0.0305,
      "step": 449100
    },
    {
      "epoch": 0.7349947303993768,
      "grad_norm": 0.6537784934043884,
      "learning_rate": 8.586216666776487e-06,
      "loss": 0.0297,
      "step": 449120
    },
    {
      "epoch": 0.7350274608380302,
      "grad_norm": 0.5533333420753479,
      "learning_rate": 8.58615077456297e-06,
      "loss": 0.0262,
      "step": 449140
    },
    {
      "epoch": 0.7350601912766835,
      "grad_norm": 0.8982693552970886,
      "learning_rate": 8.586084882349454e-06,
      "loss": 0.0316,
      "step": 449160
    },
    {
      "epoch": 0.7350929217153368,
      "grad_norm": 0.24761874973773956,
      "learning_rate": 8.586018990135936e-06,
      "loss": 0.0204,
      "step": 449180
    },
    {
      "epoch": 0.7351256521539902,
      "grad_norm": 0.6915209889411926,
      "learning_rate": 8.58595309792242e-06,
      "loss": 0.0335,
      "step": 449200
    },
    {
      "epoch": 0.7351583825926435,
      "grad_norm": 1.9684851169586182,
      "learning_rate": 8.585887205708902e-06,
      "loss": 0.0485,
      "step": 449220
    },
    {
      "epoch": 0.7351911130312968,
      "grad_norm": 0.3828172981739044,
      "learning_rate": 8.585821313495385e-06,
      "loss": 0.0272,
      "step": 449240
    },
    {
      "epoch": 0.7352238434699502,
      "grad_norm": 1.9391911029815674,
      "learning_rate": 8.585755421281869e-06,
      "loss": 0.0346,
      "step": 449260
    },
    {
      "epoch": 0.7352565739086036,
      "grad_norm": 0.9385367631912231,
      "learning_rate": 8.58568952906835e-06,
      "loss": 0.0406,
      "step": 449280
    },
    {
      "epoch": 0.7352893043472568,
      "grad_norm": 1.625353455543518,
      "learning_rate": 8.585623636854834e-06,
      "loss": 0.0477,
      "step": 449300
    },
    {
      "epoch": 0.7353220347859102,
      "grad_norm": 0.9276955127716064,
      "learning_rate": 8.585557744641316e-06,
      "loss": 0.0309,
      "step": 449320
    },
    {
      "epoch": 0.7353547652245636,
      "grad_norm": 0.7739495635032654,
      "learning_rate": 8.5854918524278e-06,
      "loss": 0.0322,
      "step": 449340
    },
    {
      "epoch": 0.7353874956632169,
      "grad_norm": 0.32723525166511536,
      "learning_rate": 8.585425960214282e-06,
      "loss": 0.0247,
      "step": 449360
    },
    {
      "epoch": 0.7354202261018702,
      "grad_norm": 0.6415255665779114,
      "learning_rate": 8.585360068000765e-06,
      "loss": 0.0319,
      "step": 449380
    },
    {
      "epoch": 0.7354529565405236,
      "grad_norm": 0.3142152428627014,
      "learning_rate": 8.585294175787247e-06,
      "loss": 0.0328,
      "step": 449400
    },
    {
      "epoch": 0.7354856869791769,
      "grad_norm": 1.2834850549697876,
      "learning_rate": 8.58522828357373e-06,
      "loss": 0.0339,
      "step": 449420
    },
    {
      "epoch": 0.7355184174178302,
      "grad_norm": 1.395378828048706,
      "learning_rate": 8.585162391360213e-06,
      "loss": 0.0428,
      "step": 449440
    },
    {
      "epoch": 0.7355511478564836,
      "grad_norm": 0.224868044257164,
      "learning_rate": 8.585096499146696e-06,
      "loss": 0.0256,
      "step": 449460
    },
    {
      "epoch": 0.7355838782951369,
      "grad_norm": 0.339115172624588,
      "learning_rate": 8.58503060693318e-06,
      "loss": 0.0391,
      "step": 449480
    },
    {
      "epoch": 0.7356166087337902,
      "grad_norm": 1.8538975715637207,
      "learning_rate": 8.584964714719662e-06,
      "loss": 0.0265,
      "step": 449500
    },
    {
      "epoch": 0.7356493391724436,
      "grad_norm": 0.4187033176422119,
      "learning_rate": 8.584898822506145e-06,
      "loss": 0.0256,
      "step": 449520
    },
    {
      "epoch": 0.735682069611097,
      "grad_norm": 2.405574083328247,
      "learning_rate": 8.584832930292629e-06,
      "loss": 0.0337,
      "step": 449540
    },
    {
      "epoch": 0.7357148000497503,
      "grad_norm": 1.4764478206634521,
      "learning_rate": 8.58476703807911e-06,
      "loss": 0.0376,
      "step": 449560
    },
    {
      "epoch": 0.7357475304884036,
      "grad_norm": 0.9916076064109802,
      "learning_rate": 8.584701145865594e-06,
      "loss": 0.0294,
      "step": 449580
    },
    {
      "epoch": 0.735780260927057,
      "grad_norm": 1.6143467426300049,
      "learning_rate": 8.584635253652078e-06,
      "loss": 0.0186,
      "step": 449600
    },
    {
      "epoch": 0.7358129913657103,
      "grad_norm": 0.18441234529018402,
      "learning_rate": 8.58456936143856e-06,
      "loss": 0.0364,
      "step": 449620
    },
    {
      "epoch": 0.7358457218043636,
      "grad_norm": 0.3393373191356659,
      "learning_rate": 8.584503469225043e-06,
      "loss": 0.0336,
      "step": 449640
    },
    {
      "epoch": 0.735878452243017,
      "grad_norm": 0.5296157002449036,
      "learning_rate": 8.584437577011525e-06,
      "loss": 0.0241,
      "step": 449660
    },
    {
      "epoch": 0.7359111826816703,
      "grad_norm": 1.3313912153244019,
      "learning_rate": 8.584371684798009e-06,
      "loss": 0.0306,
      "step": 449680
    },
    {
      "epoch": 0.7359439131203236,
      "grad_norm": 0.550416111946106,
      "learning_rate": 8.58430579258449e-06,
      "loss": 0.0363,
      "step": 449700
    },
    {
      "epoch": 0.735976643558977,
      "grad_norm": 0.7985554933547974,
      "learning_rate": 8.584239900370974e-06,
      "loss": 0.0285,
      "step": 449720
    },
    {
      "epoch": 0.7360093739976303,
      "grad_norm": 1.1475228071212769,
      "learning_rate": 8.584174008157456e-06,
      "loss": 0.0289,
      "step": 449740
    },
    {
      "epoch": 0.7360421044362837,
      "grad_norm": 0.7446603178977966,
      "learning_rate": 8.58410811594394e-06,
      "loss": 0.0238,
      "step": 449760
    },
    {
      "epoch": 0.736074834874937,
      "grad_norm": 0.4607698619365692,
      "learning_rate": 8.584042223730422e-06,
      "loss": 0.0253,
      "step": 449780
    },
    {
      "epoch": 0.7361075653135903,
      "grad_norm": 1.2976840734481812,
      "learning_rate": 8.583976331516905e-06,
      "loss": 0.0359,
      "step": 449800
    },
    {
      "epoch": 0.7361402957522437,
      "grad_norm": 0.5415871143341064,
      "learning_rate": 8.583910439303387e-06,
      "loss": 0.04,
      "step": 449820
    },
    {
      "epoch": 0.736173026190897,
      "grad_norm": 2.0180017948150635,
      "learning_rate": 8.58384454708987e-06,
      "loss": 0.0282,
      "step": 449840
    },
    {
      "epoch": 0.7362057566295503,
      "grad_norm": 1.2302340269088745,
      "learning_rate": 8.583778654876353e-06,
      "loss": 0.0405,
      "step": 449860
    },
    {
      "epoch": 0.7362384870682037,
      "grad_norm": 1.6650707721710205,
      "learning_rate": 8.583712762662836e-06,
      "loss": 0.062,
      "step": 449880
    },
    {
      "epoch": 0.736271217506857,
      "grad_norm": 1.4269224405288696,
      "learning_rate": 8.58364687044932e-06,
      "loss": 0.0277,
      "step": 449900
    },
    {
      "epoch": 0.7363039479455104,
      "grad_norm": 1.4058727025985718,
      "learning_rate": 8.583580978235802e-06,
      "loss": 0.0268,
      "step": 449920
    },
    {
      "epoch": 0.7363366783841637,
      "grad_norm": 1.1590216159820557,
      "learning_rate": 8.583515086022285e-06,
      "loss": 0.0413,
      "step": 449940
    },
    {
      "epoch": 0.7363694088228171,
      "grad_norm": 1.0115982294082642,
      "learning_rate": 8.583449193808769e-06,
      "loss": 0.0423,
      "step": 449960
    },
    {
      "epoch": 0.7364021392614704,
      "grad_norm": 1.1598997116088867,
      "learning_rate": 8.583383301595251e-06,
      "loss": 0.0422,
      "step": 449980
    },
    {
      "epoch": 0.7364348697001237,
      "grad_norm": 0.9540298581123352,
      "learning_rate": 8.583317409381734e-06,
      "loss": 0.0324,
      "step": 450000
    },
    {
      "epoch": 0.7364348697001237,
      "eval_loss": 0.016121551394462585,
      "eval_runtime": 6514.1584,
      "eval_samples_per_second": 157.788,
      "eval_steps_per_second": 15.779,
      "eval_sts-dev_pearson_cosine": 0.9619297040457969,
      "eval_sts-dev_spearman_cosine": 0.8814872454439553,
      "step": 450000
    },
    {
      "epoch": 0.7364676001387771,
      "grad_norm": 0.7546043395996094,
      "learning_rate": 8.583251517168218e-06,
      "loss": 0.0363,
      "step": 450020
    },
    {
      "epoch": 0.7365003305774304,
      "grad_norm": 0.7087180018424988,
      "learning_rate": 8.5831856249547e-06,
      "loss": 0.0366,
      "step": 450040
    },
    {
      "epoch": 0.7365330610160837,
      "grad_norm": 1.0338093042373657,
      "learning_rate": 8.583119732741184e-06,
      "loss": 0.0349,
      "step": 450060
    },
    {
      "epoch": 0.7365657914547371,
      "grad_norm": 0.5003573298454285,
      "learning_rate": 8.583053840527665e-06,
      "loss": 0.0336,
      "step": 450080
    },
    {
      "epoch": 0.7365985218933904,
      "grad_norm": 0.19510675966739655,
      "learning_rate": 8.582987948314149e-06,
      "loss": 0.0274,
      "step": 450100
    },
    {
      "epoch": 0.7366312523320437,
      "grad_norm": 0.5239892601966858,
      "learning_rate": 8.582922056100631e-06,
      "loss": 0.0423,
      "step": 450120
    },
    {
      "epoch": 0.7366639827706971,
      "grad_norm": 0.4090120792388916,
      "learning_rate": 8.582856163887114e-06,
      "loss": 0.0268,
      "step": 450140
    },
    {
      "epoch": 0.7366967132093505,
      "grad_norm": 1.3451051712036133,
      "learning_rate": 8.582790271673596e-06,
      "loss": 0.0366,
      "step": 450160
    },
    {
      "epoch": 0.7367294436480037,
      "grad_norm": 1.2124205827713013,
      "learning_rate": 8.58272437946008e-06,
      "loss": 0.0274,
      "step": 450180
    },
    {
      "epoch": 0.7367621740866571,
      "grad_norm": 2.4618148803710938,
      "learning_rate": 8.582658487246562e-06,
      "loss": 0.0285,
      "step": 450200
    },
    {
      "epoch": 0.7367949045253105,
      "grad_norm": 0.6949142217636108,
      "learning_rate": 8.582592595033045e-06,
      "loss": 0.0284,
      "step": 450220
    },
    {
      "epoch": 0.7368276349639638,
      "grad_norm": 0.9241562485694885,
      "learning_rate": 8.582526702819527e-06,
      "loss": 0.03,
      "step": 450240
    },
    {
      "epoch": 0.7368603654026171,
      "grad_norm": 3.562032461166382,
      "learning_rate": 8.582460810606011e-06,
      "loss": 0.034,
      "step": 450260
    },
    {
      "epoch": 0.7368930958412705,
      "grad_norm": 2.195183753967285,
      "learning_rate": 8.582394918392494e-06,
      "loss": 0.0343,
      "step": 450280
    },
    {
      "epoch": 0.7369258262799238,
      "grad_norm": 1.0035187005996704,
      "learning_rate": 8.582329026178976e-06,
      "loss": 0.0314,
      "step": 450300
    },
    {
      "epoch": 0.7369585567185771,
      "grad_norm": 1.636494517326355,
      "learning_rate": 8.58226313396546e-06,
      "loss": 0.0388,
      "step": 450320
    },
    {
      "epoch": 0.7369912871572305,
      "grad_norm": 0.3989250957965851,
      "learning_rate": 8.582197241751944e-06,
      "loss": 0.025,
      "step": 450340
    },
    {
      "epoch": 0.7370240175958839,
      "grad_norm": 1.9371229410171509,
      "learning_rate": 8.582131349538425e-06,
      "loss": 0.0443,
      "step": 450360
    },
    {
      "epoch": 0.7370567480345371,
      "grad_norm": 1.9878215789794922,
      "learning_rate": 8.582065457324909e-06,
      "loss": 0.0318,
      "step": 450380
    },
    {
      "epoch": 0.7370894784731905,
      "grad_norm": 0.5239750146865845,
      "learning_rate": 8.581999565111393e-06,
      "loss": 0.026,
      "step": 450400
    },
    {
      "epoch": 0.7371222089118439,
      "grad_norm": 0.32885095477104187,
      "learning_rate": 8.581933672897875e-06,
      "loss": 0.0213,
      "step": 450420
    },
    {
      "epoch": 0.7371549393504971,
      "grad_norm": 0.4428984224796295,
      "learning_rate": 8.581867780684358e-06,
      "loss": 0.0243,
      "step": 450440
    },
    {
      "epoch": 0.7371876697891505,
      "grad_norm": 0.7642333507537842,
      "learning_rate": 8.58180188847084e-06,
      "loss": 0.0276,
      "step": 450460
    },
    {
      "epoch": 0.7372204002278039,
      "grad_norm": 0.7716410756111145,
      "learning_rate": 8.581735996257324e-06,
      "loss": 0.0351,
      "step": 450480
    },
    {
      "epoch": 0.7372531306664571,
      "grad_norm": 0.9911271929740906,
      "learning_rate": 8.581670104043805e-06,
      "loss": 0.0436,
      "step": 450500
    },
    {
      "epoch": 0.7372858611051105,
      "grad_norm": 0.37385293841362,
      "learning_rate": 8.581604211830289e-06,
      "loss": 0.0246,
      "step": 450520
    },
    {
      "epoch": 0.7373185915437639,
      "grad_norm": 0.40014493465423584,
      "learning_rate": 8.581538319616771e-06,
      "loss": 0.0333,
      "step": 450540
    },
    {
      "epoch": 0.7373513219824173,
      "grad_norm": 3.0916390419006348,
      "learning_rate": 8.581472427403255e-06,
      "loss": 0.034,
      "step": 450560
    },
    {
      "epoch": 0.7373840524210705,
      "grad_norm": 1.5770057439804077,
      "learning_rate": 8.581406535189736e-06,
      "loss": 0.0309,
      "step": 450580
    },
    {
      "epoch": 0.7374167828597239,
      "grad_norm": 0.36591637134552,
      "learning_rate": 8.58134064297622e-06,
      "loss": 0.035,
      "step": 450600
    },
    {
      "epoch": 0.7374495132983773,
      "grad_norm": 0.20797206461429596,
      "learning_rate": 8.581274750762702e-06,
      "loss": 0.0205,
      "step": 450620
    },
    {
      "epoch": 0.7374822437370305,
      "grad_norm": 2.597973346710205,
      "learning_rate": 8.581208858549186e-06,
      "loss": 0.0252,
      "step": 450640
    },
    {
      "epoch": 0.7375149741756839,
      "grad_norm": 1.6767568588256836,
      "learning_rate": 8.581142966335669e-06,
      "loss": 0.0335,
      "step": 450660
    },
    {
      "epoch": 0.7375477046143373,
      "grad_norm": 0.5796799063682556,
      "learning_rate": 8.581077074122151e-06,
      "loss": 0.0366,
      "step": 450680
    },
    {
      "epoch": 0.7375804350529905,
      "grad_norm": 1.664037823677063,
      "learning_rate": 8.581011181908635e-06,
      "loss": 0.0374,
      "step": 450700
    },
    {
      "epoch": 0.7376131654916439,
      "grad_norm": 0.5800141096115112,
      "learning_rate": 8.580945289695116e-06,
      "loss": 0.0406,
      "step": 450720
    },
    {
      "epoch": 0.7376458959302973,
      "grad_norm": 1.349129319190979,
      "learning_rate": 8.5808793974816e-06,
      "loss": 0.0352,
      "step": 450740
    },
    {
      "epoch": 0.7376786263689507,
      "grad_norm": 1.2298659086227417,
      "learning_rate": 8.580813505268084e-06,
      "loss": 0.0251,
      "step": 450760
    },
    {
      "epoch": 0.7377113568076039,
      "grad_norm": 0.7736691236495972,
      "learning_rate": 8.580747613054567e-06,
      "loss": 0.0297,
      "step": 450780
    },
    {
      "epoch": 0.7377440872462573,
      "grad_norm": 1.6326429843902588,
      "learning_rate": 8.580681720841049e-06,
      "loss": 0.0323,
      "step": 450800
    },
    {
      "epoch": 0.7377768176849107,
      "grad_norm": 0.17820362746715546,
      "learning_rate": 8.580615828627533e-06,
      "loss": 0.0387,
      "step": 450820
    },
    {
      "epoch": 0.7378095481235639,
      "grad_norm": 1.2132500410079956,
      "learning_rate": 8.580549936414015e-06,
      "loss": 0.0312,
      "step": 450840
    },
    {
      "epoch": 0.7378422785622173,
      "grad_norm": 0.7372803092002869,
      "learning_rate": 8.580484044200498e-06,
      "loss": 0.0292,
      "step": 450860
    },
    {
      "epoch": 0.7378750090008707,
      "grad_norm": 0.8198135495185852,
      "learning_rate": 8.58041815198698e-06,
      "loss": 0.0242,
      "step": 450880
    },
    {
      "epoch": 0.7379077394395239,
      "grad_norm": 1.329695224761963,
      "learning_rate": 8.580352259773464e-06,
      "loss": 0.0304,
      "step": 450900
    },
    {
      "epoch": 0.7379404698781773,
      "grad_norm": 1.1364413499832153,
      "learning_rate": 8.580286367559946e-06,
      "loss": 0.0342,
      "step": 450920
    },
    {
      "epoch": 0.7379732003168307,
      "grad_norm": 0.26050224900245667,
      "learning_rate": 8.580220475346429e-06,
      "loss": 0.0354,
      "step": 450940
    },
    {
      "epoch": 0.7380059307554839,
      "grad_norm": 1.0538097620010376,
      "learning_rate": 8.580154583132911e-06,
      "loss": 0.0405,
      "step": 450960
    },
    {
      "epoch": 0.7380386611941373,
      "grad_norm": 0.7660263180732727,
      "learning_rate": 8.580088690919395e-06,
      "loss": 0.0275,
      "step": 450980
    },
    {
      "epoch": 0.7380713916327907,
      "grad_norm": 1.792761206626892,
      "learning_rate": 8.580022798705878e-06,
      "loss": 0.0364,
      "step": 451000
    },
    {
      "epoch": 0.738104122071444,
      "grad_norm": 0.7418302297592163,
      "learning_rate": 8.57995690649236e-06,
      "loss": 0.0319,
      "step": 451020
    },
    {
      "epoch": 0.7381368525100973,
      "grad_norm": 2.1491758823394775,
      "learning_rate": 8.579891014278844e-06,
      "loss": 0.0441,
      "step": 451040
    },
    {
      "epoch": 0.7381695829487507,
      "grad_norm": 1.9516518115997314,
      "learning_rate": 8.579825122065326e-06,
      "loss": 0.028,
      "step": 451060
    },
    {
      "epoch": 0.738202313387404,
      "grad_norm": 1.0306504964828491,
      "learning_rate": 8.57975922985181e-06,
      "loss": 0.0311,
      "step": 451080
    },
    {
      "epoch": 0.7382350438260573,
      "grad_norm": 0.6364253759384155,
      "learning_rate": 8.579693337638291e-06,
      "loss": 0.0385,
      "step": 451100
    },
    {
      "epoch": 0.7382677742647107,
      "grad_norm": 0.6116965413093567,
      "learning_rate": 8.579627445424775e-06,
      "loss": 0.0243,
      "step": 451120
    },
    {
      "epoch": 0.7383005047033641,
      "grad_norm": 1.0694693326950073,
      "learning_rate": 8.579561553211258e-06,
      "loss": 0.0244,
      "step": 451140
    },
    {
      "epoch": 0.7383332351420173,
      "grad_norm": 0.8749592900276184,
      "learning_rate": 8.57949566099774e-06,
      "loss": 0.0327,
      "step": 451160
    },
    {
      "epoch": 0.7383659655806707,
      "grad_norm": 0.8747110366821289,
      "learning_rate": 8.579429768784224e-06,
      "loss": 0.0321,
      "step": 451180
    },
    {
      "epoch": 0.7383986960193241,
      "grad_norm": 1.743928074836731,
      "learning_rate": 8.579363876570707e-06,
      "loss": 0.0249,
      "step": 451200
    },
    {
      "epoch": 0.7384314264579774,
      "grad_norm": 1.7131520509719849,
      "learning_rate": 8.57929798435719e-06,
      "loss": 0.0345,
      "step": 451220
    },
    {
      "epoch": 0.7384641568966307,
      "grad_norm": 0.47408124804496765,
      "learning_rate": 8.579232092143673e-06,
      "loss": 0.0353,
      "step": 451240
    },
    {
      "epoch": 0.7384968873352841,
      "grad_norm": 1.72880220413208,
      "learning_rate": 8.579166199930155e-06,
      "loss": 0.0454,
      "step": 451260
    },
    {
      "epoch": 0.7385296177739374,
      "grad_norm": 1.6464720964431763,
      "learning_rate": 8.579100307716638e-06,
      "loss": 0.0386,
      "step": 451280
    },
    {
      "epoch": 0.7385623482125907,
      "grad_norm": 0.7846001386642456,
      "learning_rate": 8.57903441550312e-06,
      "loss": 0.0322,
      "step": 451300
    },
    {
      "epoch": 0.7385950786512441,
      "grad_norm": 0.7962468862533569,
      "learning_rate": 8.578968523289604e-06,
      "loss": 0.036,
      "step": 451320
    },
    {
      "epoch": 0.7386278090898974,
      "grad_norm": 0.7627114057540894,
      "learning_rate": 8.578902631076086e-06,
      "loss": 0.0376,
      "step": 451340
    },
    {
      "epoch": 0.7386605395285507,
      "grad_norm": 0.6962700486183167,
      "learning_rate": 8.57883673886257e-06,
      "loss": 0.0422,
      "step": 451360
    },
    {
      "epoch": 0.7386932699672041,
      "grad_norm": 1.8223308324813843,
      "learning_rate": 8.578770846649053e-06,
      "loss": 0.0267,
      "step": 451380
    },
    {
      "epoch": 0.7387260004058575,
      "grad_norm": 1.1242471933364868,
      "learning_rate": 8.578704954435535e-06,
      "loss": 0.0335,
      "step": 451400
    },
    {
      "epoch": 0.7387587308445108,
      "grad_norm": 1.436697244644165,
      "learning_rate": 8.578639062222018e-06,
      "loss": 0.0326,
      "step": 451420
    },
    {
      "epoch": 0.7387914612831641,
      "grad_norm": 0.8077171444892883,
      "learning_rate": 8.5785731700085e-06,
      "loss": 0.033,
      "step": 451440
    },
    {
      "epoch": 0.7388241917218175,
      "grad_norm": 1.1542396545410156,
      "learning_rate": 8.578507277794984e-06,
      "loss": 0.0315,
      "step": 451460
    },
    {
      "epoch": 0.7388569221604708,
      "grad_norm": 2.2420477867126465,
      "learning_rate": 8.578441385581466e-06,
      "loss": 0.0284,
      "step": 451480
    },
    {
      "epoch": 0.7388896525991241,
      "grad_norm": 0.5235248804092407,
      "learning_rate": 8.57837549336795e-06,
      "loss": 0.0249,
      "step": 451500
    },
    {
      "epoch": 0.7389223830377775,
      "grad_norm": 0.7226470708847046,
      "learning_rate": 8.578309601154433e-06,
      "loss": 0.0297,
      "step": 451520
    },
    {
      "epoch": 0.7389551134764308,
      "grad_norm": 0.5058350563049316,
      "learning_rate": 8.578243708940915e-06,
      "loss": 0.0335,
      "step": 451540
    },
    {
      "epoch": 0.7389878439150841,
      "grad_norm": 1.399153470993042,
      "learning_rate": 8.578177816727398e-06,
      "loss": 0.022,
      "step": 451560
    },
    {
      "epoch": 0.7390205743537375,
      "grad_norm": 0.36750665307044983,
      "learning_rate": 8.578111924513882e-06,
      "loss": 0.0426,
      "step": 451580
    },
    {
      "epoch": 0.7390533047923908,
      "grad_norm": 1.6787346601486206,
      "learning_rate": 8.578046032300364e-06,
      "loss": 0.0378,
      "step": 451600
    },
    {
      "epoch": 0.7390860352310442,
      "grad_norm": 1.2528408765792847,
      "learning_rate": 8.577980140086847e-06,
      "loss": 0.0262,
      "step": 451620
    },
    {
      "epoch": 0.7391187656696975,
      "grad_norm": 1.775902271270752,
      "learning_rate": 8.57791424787333e-06,
      "loss": 0.024,
      "step": 451640
    },
    {
      "epoch": 0.7391514961083508,
      "grad_norm": 0.26313960552215576,
      "learning_rate": 8.577848355659813e-06,
      "loss": 0.0267,
      "step": 451660
    },
    {
      "epoch": 0.7391842265470042,
      "grad_norm": 1.3063499927520752,
      "learning_rate": 8.577782463446295e-06,
      "loss": 0.0378,
      "step": 451680
    },
    {
      "epoch": 0.7392169569856575,
      "grad_norm": 0.9529086351394653,
      "learning_rate": 8.577716571232778e-06,
      "loss": 0.0292,
      "step": 451700
    },
    {
      "epoch": 0.7392496874243109,
      "grad_norm": 0.6520469784736633,
      "learning_rate": 8.577650679019262e-06,
      "loss": 0.0296,
      "step": 451720
    },
    {
      "epoch": 0.7392824178629642,
      "grad_norm": 2.870266914367676,
      "learning_rate": 8.577584786805744e-06,
      "loss": 0.0246,
      "step": 451740
    },
    {
      "epoch": 0.7393151483016175,
      "grad_norm": 0.44860729575157166,
      "learning_rate": 8.577518894592227e-06,
      "loss": 0.0333,
      "step": 451760
    },
    {
      "epoch": 0.7393478787402709,
      "grad_norm": 0.6105886101722717,
      "learning_rate": 8.57745300237871e-06,
      "loss": 0.0257,
      "step": 451780
    },
    {
      "epoch": 0.7393806091789242,
      "grad_norm": 0.1168096736073494,
      "learning_rate": 8.577387110165193e-06,
      "loss": 0.0315,
      "step": 451800
    },
    {
      "epoch": 0.7394133396175776,
      "grad_norm": 1.1904412508010864,
      "learning_rate": 8.577321217951675e-06,
      "loss": 0.0217,
      "step": 451820
    },
    {
      "epoch": 0.7394460700562309,
      "grad_norm": 1.5117859840393066,
      "learning_rate": 8.577255325738158e-06,
      "loss": 0.0295,
      "step": 451840
    },
    {
      "epoch": 0.7394788004948842,
      "grad_norm": 1.3169506788253784,
      "learning_rate": 8.57718943352464e-06,
      "loss": 0.0436,
      "step": 451860
    },
    {
      "epoch": 0.7395115309335376,
      "grad_norm": 0.4606776833534241,
      "learning_rate": 8.577123541311124e-06,
      "loss": 0.034,
      "step": 451880
    },
    {
      "epoch": 0.7395442613721909,
      "grad_norm": 2.011697769165039,
      "learning_rate": 8.577057649097606e-06,
      "loss": 0.0329,
      "step": 451900
    },
    {
      "epoch": 0.7395769918108442,
      "grad_norm": 2.710638999938965,
      "learning_rate": 8.57699175688409e-06,
      "loss": 0.0399,
      "step": 451920
    },
    {
      "epoch": 0.7396097222494976,
      "grad_norm": 1.6331676244735718,
      "learning_rate": 8.576925864670573e-06,
      "loss": 0.0348,
      "step": 451940
    },
    {
      "epoch": 0.7396424526881509,
      "grad_norm": 0.865185022354126,
      "learning_rate": 8.576859972457055e-06,
      "loss": 0.027,
      "step": 451960
    },
    {
      "epoch": 0.7396751831268042,
      "grad_norm": 0.6443539261817932,
      "learning_rate": 8.576794080243538e-06,
      "loss": 0.0336,
      "step": 451980
    },
    {
      "epoch": 0.7397079135654576,
      "grad_norm": 1.5855286121368408,
      "learning_rate": 8.576728188030022e-06,
      "loss": 0.0377,
      "step": 452000
    },
    {
      "epoch": 0.739740644004111,
      "grad_norm": 0.35035020112991333,
      "learning_rate": 8.576662295816504e-06,
      "loss": 0.0245,
      "step": 452020
    },
    {
      "epoch": 0.7397733744427643,
      "grad_norm": 1.3239070177078247,
      "learning_rate": 8.576596403602987e-06,
      "loss": 0.0373,
      "step": 452040
    },
    {
      "epoch": 0.7398061048814176,
      "grad_norm": 0.5653079748153687,
      "learning_rate": 8.576530511389471e-06,
      "loss": 0.0248,
      "step": 452060
    },
    {
      "epoch": 0.739838835320071,
      "grad_norm": 0.8732929229736328,
      "learning_rate": 8.576464619175953e-06,
      "loss": 0.0272,
      "step": 452080
    },
    {
      "epoch": 0.7398715657587243,
      "grad_norm": 1.6631414890289307,
      "learning_rate": 8.576398726962437e-06,
      "loss": 0.0427,
      "step": 452100
    },
    {
      "epoch": 0.7399042961973776,
      "grad_norm": 0.557217001914978,
      "learning_rate": 8.576332834748918e-06,
      "loss": 0.0369,
      "step": 452120
    },
    {
      "epoch": 0.739937026636031,
      "grad_norm": 1.0504053831100464,
      "learning_rate": 8.576266942535402e-06,
      "loss": 0.0335,
      "step": 452140
    },
    {
      "epoch": 0.7399697570746843,
      "grad_norm": 0.361017644405365,
      "learning_rate": 8.576201050321884e-06,
      "loss": 0.0451,
      "step": 452160
    },
    {
      "epoch": 0.7400024875133376,
      "grad_norm": 2.3647923469543457,
      "learning_rate": 8.576135158108367e-06,
      "loss": 0.0279,
      "step": 452180
    },
    {
      "epoch": 0.740035217951991,
      "grad_norm": 1.3957483768463135,
      "learning_rate": 8.57606926589485e-06,
      "loss": 0.0236,
      "step": 452200
    },
    {
      "epoch": 0.7400679483906444,
      "grad_norm": 1.4258068799972534,
      "learning_rate": 8.576003373681333e-06,
      "loss": 0.0457,
      "step": 452220
    },
    {
      "epoch": 0.7401006788292976,
      "grad_norm": 1.4127177000045776,
      "learning_rate": 8.575937481467815e-06,
      "loss": 0.0339,
      "step": 452240
    },
    {
      "epoch": 0.740133409267951,
      "grad_norm": 0.41881632804870605,
      "learning_rate": 8.575871589254298e-06,
      "loss": 0.0307,
      "step": 452260
    },
    {
      "epoch": 0.7401661397066044,
      "grad_norm": 0.6843514442443848,
      "learning_rate": 8.57580569704078e-06,
      "loss": 0.021,
      "step": 452280
    },
    {
      "epoch": 0.7401988701452576,
      "grad_norm": 5.616788387298584,
      "learning_rate": 8.575739804827264e-06,
      "loss": 0.0246,
      "step": 452300
    },
    {
      "epoch": 0.740231600583911,
      "grad_norm": 0.7596463561058044,
      "learning_rate": 8.575673912613748e-06,
      "loss": 0.0242,
      "step": 452320
    },
    {
      "epoch": 0.7402643310225644,
      "grad_norm": 0.8187515735626221,
      "learning_rate": 8.57560802040023e-06,
      "loss": 0.0472,
      "step": 452340
    },
    {
      "epoch": 0.7402970614612177,
      "grad_norm": 0.5947707295417786,
      "learning_rate": 8.575542128186713e-06,
      "loss": 0.0334,
      "step": 452360
    },
    {
      "epoch": 0.740329791899871,
      "grad_norm": 0.577074408531189,
      "learning_rate": 8.575476235973197e-06,
      "loss": 0.0415,
      "step": 452380
    },
    {
      "epoch": 0.7403625223385244,
      "grad_norm": 1.832804560661316,
      "learning_rate": 8.575410343759678e-06,
      "loss": 0.0251,
      "step": 452400
    },
    {
      "epoch": 0.7403952527771778,
      "grad_norm": 0.566143810749054,
      "learning_rate": 8.575344451546162e-06,
      "loss": 0.0254,
      "step": 452420
    },
    {
      "epoch": 0.740427983215831,
      "grad_norm": 1.3651989698410034,
      "learning_rate": 8.575278559332646e-06,
      "loss": 0.0331,
      "step": 452440
    },
    {
      "epoch": 0.7404607136544844,
      "grad_norm": 0.7639572024345398,
      "learning_rate": 8.575212667119128e-06,
      "loss": 0.0358,
      "step": 452460
    },
    {
      "epoch": 0.7404934440931378,
      "grad_norm": 0.2818273901939392,
      "learning_rate": 8.575146774905611e-06,
      "loss": 0.0367,
      "step": 452480
    },
    {
      "epoch": 0.740526174531791,
      "grad_norm": 0.6817172169685364,
      "learning_rate": 8.575080882692093e-06,
      "loss": 0.0281,
      "step": 452500
    },
    {
      "epoch": 0.7405589049704444,
      "grad_norm": 0.6408823132514954,
      "learning_rate": 8.575014990478577e-06,
      "loss": 0.0372,
      "step": 452520
    },
    {
      "epoch": 0.7405916354090978,
      "grad_norm": 0.6711265444755554,
      "learning_rate": 8.574949098265058e-06,
      "loss": 0.0337,
      "step": 452540
    },
    {
      "epoch": 0.740624365847751,
      "grad_norm": 1.274040937423706,
      "learning_rate": 8.574883206051542e-06,
      "loss": 0.0452,
      "step": 452560
    },
    {
      "epoch": 0.7406570962864044,
      "grad_norm": 0.3000003397464752,
      "learning_rate": 8.574817313838024e-06,
      "loss": 0.0359,
      "step": 452580
    },
    {
      "epoch": 0.7406898267250578,
      "grad_norm": 1.1624667644500732,
      "learning_rate": 8.574751421624508e-06,
      "loss": 0.0268,
      "step": 452600
    },
    {
      "epoch": 0.7407225571637112,
      "grad_norm": 0.24955374002456665,
      "learning_rate": 8.57468552941099e-06,
      "loss": 0.0312,
      "step": 452620
    },
    {
      "epoch": 0.7407552876023644,
      "grad_norm": 4.577504634857178,
      "learning_rate": 8.574619637197473e-06,
      "loss": 0.0358,
      "step": 452640
    },
    {
      "epoch": 0.7407880180410178,
      "grad_norm": 0.7368493676185608,
      "learning_rate": 8.574553744983955e-06,
      "loss": 0.0314,
      "step": 452660
    },
    {
      "epoch": 0.7408207484796712,
      "grad_norm": 1.539683222770691,
      "learning_rate": 8.574487852770439e-06,
      "loss": 0.0397,
      "step": 452680
    },
    {
      "epoch": 0.7408534789183244,
      "grad_norm": 0.22834663093090057,
      "learning_rate": 8.57442196055692e-06,
      "loss": 0.0328,
      "step": 452700
    },
    {
      "epoch": 0.7408862093569778,
      "grad_norm": 1.4000369310379028,
      "learning_rate": 8.574356068343404e-06,
      "loss": 0.0278,
      "step": 452720
    },
    {
      "epoch": 0.7409189397956312,
      "grad_norm": 0.4478750228881836,
      "learning_rate": 8.574290176129888e-06,
      "loss": 0.0402,
      "step": 452740
    },
    {
      "epoch": 0.7409516702342844,
      "grad_norm": 1.4775131940841675,
      "learning_rate": 8.57422428391637e-06,
      "loss": 0.0279,
      "step": 452760
    },
    {
      "epoch": 0.7409844006729378,
      "grad_norm": 1.3842016458511353,
      "learning_rate": 8.574158391702853e-06,
      "loss": 0.0349,
      "step": 452780
    },
    {
      "epoch": 0.7410171311115912,
      "grad_norm": 2.195594549179077,
      "learning_rate": 8.574092499489337e-06,
      "loss": 0.0411,
      "step": 452800
    },
    {
      "epoch": 0.7410498615502445,
      "grad_norm": 0.1933186948299408,
      "learning_rate": 8.574026607275819e-06,
      "loss": 0.0436,
      "step": 452820
    },
    {
      "epoch": 0.7410825919888978,
      "grad_norm": 3.0549182891845703,
      "learning_rate": 8.573960715062302e-06,
      "loss": 0.0319,
      "step": 452840
    },
    {
      "epoch": 0.7411153224275512,
      "grad_norm": 1.0559545755386353,
      "learning_rate": 8.573894822848786e-06,
      "loss": 0.0262,
      "step": 452860
    },
    {
      "epoch": 0.7411480528662046,
      "grad_norm": 0.6450008153915405,
      "learning_rate": 8.573828930635268e-06,
      "loss": 0.0478,
      "step": 452880
    },
    {
      "epoch": 0.7411807833048578,
      "grad_norm": 1.2468003034591675,
      "learning_rate": 8.573763038421751e-06,
      "loss": 0.0397,
      "step": 452900
    },
    {
      "epoch": 0.7412135137435112,
      "grad_norm": 0.9251391291618347,
      "learning_rate": 8.573697146208233e-06,
      "loss": 0.0234,
      "step": 452920
    },
    {
      "epoch": 0.7412462441821646,
      "grad_norm": 1.9690452814102173,
      "learning_rate": 8.573631253994717e-06,
      "loss": 0.0312,
      "step": 452940
    },
    {
      "epoch": 0.7412789746208178,
      "grad_norm": 1.8244328498840332,
      "learning_rate": 8.573565361781199e-06,
      "loss": 0.0304,
      "step": 452960
    },
    {
      "epoch": 0.7413117050594712,
      "grad_norm": 0.33647826313972473,
      "learning_rate": 8.573499469567682e-06,
      "loss": 0.0273,
      "step": 452980
    },
    {
      "epoch": 0.7413444354981246,
      "grad_norm": 1.3775007724761963,
      "learning_rate": 8.573433577354164e-06,
      "loss": 0.0342,
      "step": 453000
    },
    {
      "epoch": 0.7413771659367779,
      "grad_norm": 2.504000663757324,
      "learning_rate": 8.573367685140648e-06,
      "loss": 0.0315,
      "step": 453020
    },
    {
      "epoch": 0.7414098963754312,
      "grad_norm": 1.5283533334732056,
      "learning_rate": 8.57330179292713e-06,
      "loss": 0.034,
      "step": 453040
    },
    {
      "epoch": 0.7414426268140846,
      "grad_norm": 2.1051976680755615,
      "learning_rate": 8.573235900713613e-06,
      "loss": 0.0303,
      "step": 453060
    },
    {
      "epoch": 0.7414753572527379,
      "grad_norm": 1.3633228540420532,
      "learning_rate": 8.573170008500095e-06,
      "loss": 0.0308,
      "step": 453080
    },
    {
      "epoch": 0.7415080876913912,
      "grad_norm": 1.1602176427841187,
      "learning_rate": 8.573104116286579e-06,
      "loss": 0.0303,
      "step": 453100
    },
    {
      "epoch": 0.7415408181300446,
      "grad_norm": 0.49822601675987244,
      "learning_rate": 8.573038224073062e-06,
      "loss": 0.0356,
      "step": 453120
    },
    {
      "epoch": 0.741573548568698,
      "grad_norm": 0.6902020573616028,
      "learning_rate": 8.572972331859544e-06,
      "loss": 0.0412,
      "step": 453140
    },
    {
      "epoch": 0.7416062790073512,
      "grad_norm": 0.6440185904502869,
      "learning_rate": 8.572906439646028e-06,
      "loss": 0.0267,
      "step": 453160
    },
    {
      "epoch": 0.7416390094460046,
      "grad_norm": 1.3029662370681763,
      "learning_rate": 8.572840547432511e-06,
      "loss": 0.0373,
      "step": 453180
    },
    {
      "epoch": 0.741671739884658,
      "grad_norm": 1.2022639513015747,
      "learning_rate": 8.572774655218993e-06,
      "loss": 0.0227,
      "step": 453200
    },
    {
      "epoch": 0.7417044703233113,
      "grad_norm": 1.220967411994934,
      "learning_rate": 8.572708763005477e-06,
      "loss": 0.0301,
      "step": 453220
    },
    {
      "epoch": 0.7417372007619646,
      "grad_norm": 0.2152712494134903,
      "learning_rate": 8.57264287079196e-06,
      "loss": 0.0252,
      "step": 453240
    },
    {
      "epoch": 0.741769931200618,
      "grad_norm": 0.7818232774734497,
      "learning_rate": 8.572576978578442e-06,
      "loss": 0.0289,
      "step": 453260
    },
    {
      "epoch": 0.7418026616392713,
      "grad_norm": 0.34001412987709045,
      "learning_rate": 8.572511086364926e-06,
      "loss": 0.025,
      "step": 453280
    },
    {
      "epoch": 0.7418353920779246,
      "grad_norm": 0.9679440855979919,
      "learning_rate": 8.572445194151408e-06,
      "loss": 0.0282,
      "step": 453300
    },
    {
      "epoch": 0.741868122516578,
      "grad_norm": 1.4480286836624146,
      "learning_rate": 8.572379301937891e-06,
      "loss": 0.0342,
      "step": 453320
    },
    {
      "epoch": 0.7419008529552313,
      "grad_norm": 1.1976102590560913,
      "learning_rate": 8.572313409724373e-06,
      "loss": 0.0303,
      "step": 453340
    },
    {
      "epoch": 0.7419335833938846,
      "grad_norm": 2.3156380653381348,
      "learning_rate": 8.572247517510857e-06,
      "loss": 0.0355,
      "step": 453360
    },
    {
      "epoch": 0.741966313832538,
      "grad_norm": 1.5696955919265747,
      "learning_rate": 8.572181625297339e-06,
      "loss": 0.0247,
      "step": 453380
    },
    {
      "epoch": 0.7419990442711913,
      "grad_norm": 0.5300086736679077,
      "learning_rate": 8.572115733083822e-06,
      "loss": 0.0275,
      "step": 453400
    },
    {
      "epoch": 0.7420317747098447,
      "grad_norm": 0.7319291830062866,
      "learning_rate": 8.572049840870304e-06,
      "loss": 0.0399,
      "step": 453420
    },
    {
      "epoch": 0.742064505148498,
      "grad_norm": 0.6584497094154358,
      "learning_rate": 8.571983948656788e-06,
      "loss": 0.025,
      "step": 453440
    },
    {
      "epoch": 0.7420972355871513,
      "grad_norm": 2.1224734783172607,
      "learning_rate": 8.571918056443271e-06,
      "loss": 0.0364,
      "step": 453460
    },
    {
      "epoch": 0.7421299660258047,
      "grad_norm": 1.1155591011047363,
      "learning_rate": 8.571852164229753e-06,
      "loss": 0.0306,
      "step": 453480
    },
    {
      "epoch": 0.742162696464458,
      "grad_norm": 1.473057508468628,
      "learning_rate": 8.571786272016237e-06,
      "loss": 0.027,
      "step": 453500
    },
    {
      "epoch": 0.7421954269031114,
      "grad_norm": 0.6167200207710266,
      "learning_rate": 8.571720379802719e-06,
      "loss": 0.0375,
      "step": 453520
    },
    {
      "epoch": 0.7422281573417647,
      "grad_norm": 0.640285849571228,
      "learning_rate": 8.571654487589202e-06,
      "loss": 0.0337,
      "step": 453540
    },
    {
      "epoch": 0.742260887780418,
      "grad_norm": 2.282017707824707,
      "learning_rate": 8.571588595375686e-06,
      "loss": 0.0302,
      "step": 453560
    },
    {
      "epoch": 0.7422936182190714,
      "grad_norm": 1.0060226917266846,
      "learning_rate": 8.571522703162168e-06,
      "loss": 0.0401,
      "step": 453580
    },
    {
      "epoch": 0.7423263486577247,
      "grad_norm": 2.72011661529541,
      "learning_rate": 8.571456810948651e-06,
      "loss": 0.0356,
      "step": 453600
    },
    {
      "epoch": 0.7423590790963781,
      "grad_norm": 1.046685814857483,
      "learning_rate": 8.571390918735135e-06,
      "loss": 0.0335,
      "step": 453620
    },
    {
      "epoch": 0.7423918095350314,
      "grad_norm": 1.9599323272705078,
      "learning_rate": 8.571325026521617e-06,
      "loss": 0.0351,
      "step": 453640
    },
    {
      "epoch": 0.7424245399736847,
      "grad_norm": 0.8633260130882263,
      "learning_rate": 8.5712591343081e-06,
      "loss": 0.0247,
      "step": 453660
    },
    {
      "epoch": 0.7424572704123381,
      "grad_norm": 1.7970954179763794,
      "learning_rate": 8.571193242094582e-06,
      "loss": 0.0303,
      "step": 453680
    },
    {
      "epoch": 0.7424900008509914,
      "grad_norm": 1.3402868509292603,
      "learning_rate": 8.571127349881066e-06,
      "loss": 0.0258,
      "step": 453700
    },
    {
      "epoch": 0.7425227312896447,
      "grad_norm": 2.7297301292419434,
      "learning_rate": 8.571061457667548e-06,
      "loss": 0.0395,
      "step": 453720
    },
    {
      "epoch": 0.7425554617282981,
      "grad_norm": 1.5344867706298828,
      "learning_rate": 8.570995565454031e-06,
      "loss": 0.0307,
      "step": 453740
    },
    {
      "epoch": 0.7425881921669514,
      "grad_norm": 0.45671412348747253,
      "learning_rate": 8.570929673240513e-06,
      "loss": 0.0316,
      "step": 453760
    },
    {
      "epoch": 0.7426209226056047,
      "grad_norm": 0.9813408255577087,
      "learning_rate": 8.570863781026997e-06,
      "loss": 0.0266,
      "step": 453780
    },
    {
      "epoch": 0.7426536530442581,
      "grad_norm": 0.514143168926239,
      "learning_rate": 8.570797888813479e-06,
      "loss": 0.0195,
      "step": 453800
    },
    {
      "epoch": 0.7426863834829114,
      "grad_norm": 1.2849799394607544,
      "learning_rate": 8.570731996599962e-06,
      "loss": 0.041,
      "step": 453820
    },
    {
      "epoch": 0.7427191139215648,
      "grad_norm": 1.098139762878418,
      "learning_rate": 8.570666104386446e-06,
      "loss": 0.0353,
      "step": 453840
    },
    {
      "epoch": 0.7427518443602181,
      "grad_norm": 0.9311041235923767,
      "learning_rate": 8.570600212172928e-06,
      "loss": 0.0302,
      "step": 453860
    },
    {
      "epoch": 0.7427845747988715,
      "grad_norm": 0.21743632853031158,
      "learning_rate": 8.570534319959411e-06,
      "loss": 0.0259,
      "step": 453880
    },
    {
      "epoch": 0.7428173052375248,
      "grad_norm": 1.1923542022705078,
      "learning_rate": 8.570468427745893e-06,
      "loss": 0.0275,
      "step": 453900
    },
    {
      "epoch": 0.7428500356761781,
      "grad_norm": 1.2191524505615234,
      "learning_rate": 8.570402535532377e-06,
      "loss": 0.0274,
      "step": 453920
    },
    {
      "epoch": 0.7428827661148315,
      "grad_norm": 0.9078943133354187,
      "learning_rate": 8.570336643318859e-06,
      "loss": 0.02,
      "step": 453940
    },
    {
      "epoch": 0.7429154965534848,
      "grad_norm": 1.1510276794433594,
      "learning_rate": 8.570270751105342e-06,
      "loss": 0.0297,
      "step": 453960
    },
    {
      "epoch": 0.7429482269921381,
      "grad_norm": 1.573608160018921,
      "learning_rate": 8.570204858891826e-06,
      "loss": 0.0323,
      "step": 453980
    },
    {
      "epoch": 0.7429809574307915,
      "grad_norm": 1.1312432289123535,
      "learning_rate": 8.570138966678308e-06,
      "loss": 0.0222,
      "step": 454000
    },
    {
      "epoch": 0.7430136878694448,
      "grad_norm": 0.9461610913276672,
      "learning_rate": 8.570073074464791e-06,
      "loss": 0.0303,
      "step": 454020
    },
    {
      "epoch": 0.7430464183080981,
      "grad_norm": 0.5282884836196899,
      "learning_rate": 8.570007182251275e-06,
      "loss": 0.0338,
      "step": 454040
    },
    {
      "epoch": 0.7430791487467515,
      "grad_norm": 0.8632700443267822,
      "learning_rate": 8.569941290037757e-06,
      "loss": 0.0272,
      "step": 454060
    },
    {
      "epoch": 0.7431118791854049,
      "grad_norm": 1.2253601551055908,
      "learning_rate": 8.56987539782424e-06,
      "loss": 0.031,
      "step": 454080
    },
    {
      "epoch": 0.7431446096240582,
      "grad_norm": 2.080411195755005,
      "learning_rate": 8.569809505610722e-06,
      "loss": 0.0345,
      "step": 454100
    },
    {
      "epoch": 0.7431773400627115,
      "grad_norm": 1.9707072973251343,
      "learning_rate": 8.569743613397206e-06,
      "loss": 0.0284,
      "step": 454120
    },
    {
      "epoch": 0.7432100705013649,
      "grad_norm": 1.0105289220809937,
      "learning_rate": 8.569677721183688e-06,
      "loss": 0.0241,
      "step": 454140
    },
    {
      "epoch": 0.7432428009400182,
      "grad_norm": 0.15608938038349152,
      "learning_rate": 8.569611828970171e-06,
      "loss": 0.0301,
      "step": 454160
    },
    {
      "epoch": 0.7432755313786715,
      "grad_norm": 0.4971381723880768,
      "learning_rate": 8.569545936756655e-06,
      "loss": 0.0233,
      "step": 454180
    },
    {
      "epoch": 0.7433082618173249,
      "grad_norm": 1.5073906183242798,
      "learning_rate": 8.569480044543137e-06,
      "loss": 0.0283,
      "step": 454200
    },
    {
      "epoch": 0.7433409922559782,
      "grad_norm": 0.28847941756248474,
      "learning_rate": 8.56941415232962e-06,
      "loss": 0.0313,
      "step": 454220
    },
    {
      "epoch": 0.7433737226946315,
      "grad_norm": 0.47175633907318115,
      "learning_rate": 8.569348260116102e-06,
      "loss": 0.0301,
      "step": 454240
    },
    {
      "epoch": 0.7434064531332849,
      "grad_norm": 2.136620283126831,
      "learning_rate": 8.569282367902586e-06,
      "loss": 0.0295,
      "step": 454260
    },
    {
      "epoch": 0.7434391835719383,
      "grad_norm": 3.614004373550415,
      "learning_rate": 8.569216475689068e-06,
      "loss": 0.0414,
      "step": 454280
    },
    {
      "epoch": 0.7434719140105915,
      "grad_norm": 1.2945319414138794,
      "learning_rate": 8.569150583475551e-06,
      "loss": 0.0357,
      "step": 454300
    },
    {
      "epoch": 0.7435046444492449,
      "grad_norm": 1.5923335552215576,
      "learning_rate": 8.569084691262033e-06,
      "loss": 0.028,
      "step": 454320
    },
    {
      "epoch": 0.7435373748878983,
      "grad_norm": 0.148431196808815,
      "learning_rate": 8.569018799048517e-06,
      "loss": 0.0251,
      "step": 454340
    },
    {
      "epoch": 0.7435701053265515,
      "grad_norm": 4.0929484367370605,
      "learning_rate": 8.568952906835e-06,
      "loss": 0.0268,
      "step": 454360
    },
    {
      "epoch": 0.7436028357652049,
      "grad_norm": 1.05403733253479,
      "learning_rate": 8.568887014621482e-06,
      "loss": 0.0261,
      "step": 454380
    },
    {
      "epoch": 0.7436355662038583,
      "grad_norm": 0.21162240207195282,
      "learning_rate": 8.568821122407966e-06,
      "loss": 0.0428,
      "step": 454400
    },
    {
      "epoch": 0.7436682966425116,
      "grad_norm": 2.2539267539978027,
      "learning_rate": 8.56875523019445e-06,
      "loss": 0.0355,
      "step": 454420
    },
    {
      "epoch": 0.7437010270811649,
      "grad_norm": 0.46533724665641785,
      "learning_rate": 8.568689337980931e-06,
      "loss": 0.0223,
      "step": 454440
    },
    {
      "epoch": 0.7437337575198183,
      "grad_norm": 1.0224215984344482,
      "learning_rate": 8.568623445767415e-06,
      "loss": 0.0319,
      "step": 454460
    },
    {
      "epoch": 0.7437664879584717,
      "grad_norm": 1.1057721376419067,
      "learning_rate": 8.568557553553897e-06,
      "loss": 0.0252,
      "step": 454480
    },
    {
      "epoch": 0.7437992183971249,
      "grad_norm": 0.7870633602142334,
      "learning_rate": 8.56849166134038e-06,
      "loss": 0.0331,
      "step": 454500
    },
    {
      "epoch": 0.7438319488357783,
      "grad_norm": 1.480200171470642,
      "learning_rate": 8.568425769126864e-06,
      "loss": 0.0438,
      "step": 454520
    },
    {
      "epoch": 0.7438646792744317,
      "grad_norm": 2.9535319805145264,
      "learning_rate": 8.568359876913346e-06,
      "loss": 0.0381,
      "step": 454540
    },
    {
      "epoch": 0.7438974097130849,
      "grad_norm": 0.4348149597644806,
      "learning_rate": 8.56829398469983e-06,
      "loss": 0.0302,
      "step": 454560
    },
    {
      "epoch": 0.7439301401517383,
      "grad_norm": 0.24617725610733032,
      "learning_rate": 8.568228092486311e-06,
      "loss": 0.038,
      "step": 454580
    },
    {
      "epoch": 0.7439628705903917,
      "grad_norm": 0.7266936898231506,
      "learning_rate": 8.568162200272795e-06,
      "loss": 0.0295,
      "step": 454600
    },
    {
      "epoch": 0.7439956010290449,
      "grad_norm": 0.6482768654823303,
      "learning_rate": 8.568096308059277e-06,
      "loss": 0.0312,
      "step": 454620
    },
    {
      "epoch": 0.7440283314676983,
      "grad_norm": 2.879218101501465,
      "learning_rate": 8.56803041584576e-06,
      "loss": 0.0372,
      "step": 454640
    },
    {
      "epoch": 0.7440610619063517,
      "grad_norm": 0.5371739268302917,
      "learning_rate": 8.567964523632242e-06,
      "loss": 0.0306,
      "step": 454660
    },
    {
      "epoch": 0.7440937923450051,
      "grad_norm": 1.421954870223999,
      "learning_rate": 8.567898631418726e-06,
      "loss": 0.0288,
      "step": 454680
    },
    {
      "epoch": 0.7441265227836583,
      "grad_norm": 0.7012171149253845,
      "learning_rate": 8.567832739205208e-06,
      "loss": 0.0295,
      "step": 454700
    },
    {
      "epoch": 0.7441592532223117,
      "grad_norm": 2.2781565189361572,
      "learning_rate": 8.567766846991692e-06,
      "loss": 0.0363,
      "step": 454720
    },
    {
      "epoch": 0.7441919836609651,
      "grad_norm": 0.8815277218818665,
      "learning_rate": 8.567700954778173e-06,
      "loss": 0.0281,
      "step": 454740
    },
    {
      "epoch": 0.7442247140996183,
      "grad_norm": 1.023339867591858,
      "learning_rate": 8.567635062564657e-06,
      "loss": 0.0249,
      "step": 454760
    },
    {
      "epoch": 0.7442574445382717,
      "grad_norm": 0.40128132700920105,
      "learning_rate": 8.56756917035114e-06,
      "loss": 0.0208,
      "step": 454780
    },
    {
      "epoch": 0.7442901749769251,
      "grad_norm": 0.6709458231925964,
      "learning_rate": 8.567503278137622e-06,
      "loss": 0.023,
      "step": 454800
    },
    {
      "epoch": 0.7443229054155783,
      "grad_norm": 1.4968012571334839,
      "learning_rate": 8.567437385924106e-06,
      "loss": 0.0284,
      "step": 454820
    },
    {
      "epoch": 0.7443556358542317,
      "grad_norm": 0.8823142647743225,
      "learning_rate": 8.56737149371059e-06,
      "loss": 0.041,
      "step": 454840
    },
    {
      "epoch": 0.7443883662928851,
      "grad_norm": 0.5203601717948914,
      "learning_rate": 8.567305601497072e-06,
      "loss": 0.0348,
      "step": 454860
    },
    {
      "epoch": 0.7444210967315384,
      "grad_norm": 0.4350321590900421,
      "learning_rate": 8.567239709283555e-06,
      "loss": 0.0264,
      "step": 454880
    },
    {
      "epoch": 0.7444538271701917,
      "grad_norm": 0.5951411724090576,
      "learning_rate": 8.567173817070039e-06,
      "loss": 0.0311,
      "step": 454900
    },
    {
      "epoch": 0.7444865576088451,
      "grad_norm": 1.6844907999038696,
      "learning_rate": 8.56710792485652e-06,
      "loss": 0.0361,
      "step": 454920
    },
    {
      "epoch": 0.7445192880474985,
      "grad_norm": 1.0564587116241455,
      "learning_rate": 8.567042032643004e-06,
      "loss": 0.0302,
      "step": 454940
    },
    {
      "epoch": 0.7445520184861517,
      "grad_norm": 0.20283006131649017,
      "learning_rate": 8.566976140429486e-06,
      "loss": 0.0232,
      "step": 454960
    },
    {
      "epoch": 0.7445847489248051,
      "grad_norm": 0.760879397392273,
      "learning_rate": 8.56691024821597e-06,
      "loss": 0.0331,
      "step": 454980
    },
    {
      "epoch": 0.7446174793634585,
      "grad_norm": 0.04395899921655655,
      "learning_rate": 8.566844356002452e-06,
      "loss": 0.0283,
      "step": 455000
    },
    {
      "epoch": 0.7446502098021117,
      "grad_norm": 1.731050729751587,
      "learning_rate": 8.566778463788935e-06,
      "loss": 0.0324,
      "step": 455020
    },
    {
      "epoch": 0.7446829402407651,
      "grad_norm": 0.40695467591285706,
      "learning_rate": 8.566712571575417e-06,
      "loss": 0.0265,
      "step": 455040
    },
    {
      "epoch": 0.7447156706794185,
      "grad_norm": 1.574143648147583,
      "learning_rate": 8.5666466793619e-06,
      "loss": 0.0236,
      "step": 455060
    },
    {
      "epoch": 0.7447484011180718,
      "grad_norm": 0.8051475286483765,
      "learning_rate": 8.566580787148383e-06,
      "loss": 0.0477,
      "step": 455080
    },
    {
      "epoch": 0.7447811315567251,
      "grad_norm": 0.7389669418334961,
      "learning_rate": 8.566514894934866e-06,
      "loss": 0.031,
      "step": 455100
    },
    {
      "epoch": 0.7448138619953785,
      "grad_norm": 1.551634669303894,
      "learning_rate": 8.566449002721348e-06,
      "loss": 0.0431,
      "step": 455120
    },
    {
      "epoch": 0.7448465924340318,
      "grad_norm": 1.1968291997909546,
      "learning_rate": 8.566383110507832e-06,
      "loss": 0.048,
      "step": 455140
    },
    {
      "epoch": 0.7448793228726851,
      "grad_norm": 1.0929666757583618,
      "learning_rate": 8.566317218294315e-06,
      "loss": 0.0324,
      "step": 455160
    },
    {
      "epoch": 0.7449120533113385,
      "grad_norm": 0.9732015132904053,
      "learning_rate": 8.566251326080797e-06,
      "loss": 0.0397,
      "step": 455180
    },
    {
      "epoch": 0.7449447837499918,
      "grad_norm": 0.9966074228286743,
      "learning_rate": 8.56618543386728e-06,
      "loss": 0.0283,
      "step": 455200
    },
    {
      "epoch": 0.7449775141886451,
      "grad_norm": 0.2388293743133545,
      "learning_rate": 8.566119541653764e-06,
      "loss": 0.0169,
      "step": 455220
    },
    {
      "epoch": 0.7450102446272985,
      "grad_norm": 1.1796603202819824,
      "learning_rate": 8.566053649440246e-06,
      "loss": 0.0318,
      "step": 455240
    },
    {
      "epoch": 0.7450429750659519,
      "grad_norm": 1.1614305973052979,
      "learning_rate": 8.56598775722673e-06,
      "loss": 0.0373,
      "step": 455260
    },
    {
      "epoch": 0.7450757055046052,
      "grad_norm": 0.6558474898338318,
      "learning_rate": 8.565921865013213e-06,
      "loss": 0.0297,
      "step": 455280
    },
    {
      "epoch": 0.7451084359432585,
      "grad_norm": 0.3869094252586365,
      "learning_rate": 8.565855972799695e-06,
      "loss": 0.033,
      "step": 455300
    },
    {
      "epoch": 0.7451411663819119,
      "grad_norm": 2.01522159576416,
      "learning_rate": 8.565790080586179e-06,
      "loss": 0.032,
      "step": 455320
    },
    {
      "epoch": 0.7451738968205652,
      "grad_norm": 1.0935930013656616,
      "learning_rate": 8.56572418837266e-06,
      "loss": 0.0285,
      "step": 455340
    },
    {
      "epoch": 0.7452066272592185,
      "grad_norm": 1.880947232246399,
      "learning_rate": 8.565658296159144e-06,
      "loss": 0.0324,
      "step": 455360
    },
    {
      "epoch": 0.7452393576978719,
      "grad_norm": 1.0615190267562866,
      "learning_rate": 8.565592403945626e-06,
      "loss": 0.029,
      "step": 455380
    },
    {
      "epoch": 0.7452720881365252,
      "grad_norm": 0.861181378364563,
      "learning_rate": 8.56552651173211e-06,
      "loss": 0.0265,
      "step": 455400
    },
    {
      "epoch": 0.7453048185751785,
      "grad_norm": 0.8976776599884033,
      "learning_rate": 8.565460619518592e-06,
      "loss": 0.0289,
      "step": 455420
    },
    {
      "epoch": 0.7453375490138319,
      "grad_norm": 0.4157583713531494,
      "learning_rate": 8.565394727305075e-06,
      "loss": 0.024,
      "step": 455440
    },
    {
      "epoch": 0.7453702794524852,
      "grad_norm": 0.6637806296348572,
      "learning_rate": 8.565328835091557e-06,
      "loss": 0.0275,
      "step": 455460
    },
    {
      "epoch": 0.7454030098911386,
      "grad_norm": 1.523627758026123,
      "learning_rate": 8.56526294287804e-06,
      "loss": 0.0304,
      "step": 455480
    },
    {
      "epoch": 0.7454357403297919,
      "grad_norm": 0.6812778115272522,
      "learning_rate": 8.565197050664523e-06,
      "loss": 0.0279,
      "step": 455500
    },
    {
      "epoch": 0.7454684707684452,
      "grad_norm": 0.6548888683319092,
      "learning_rate": 8.565131158451006e-06,
      "loss": 0.0445,
      "step": 455520
    },
    {
      "epoch": 0.7455012012070986,
      "grad_norm": 0.9469074010848999,
      "learning_rate": 8.565065266237488e-06,
      "loss": 0.0411,
      "step": 455540
    },
    {
      "epoch": 0.7455339316457519,
      "grad_norm": 0.24218624830245972,
      "learning_rate": 8.564999374023972e-06,
      "loss": 0.0336,
      "step": 455560
    },
    {
      "epoch": 0.7455666620844053,
      "grad_norm": 2.2740941047668457,
      "learning_rate": 8.564933481810455e-06,
      "loss": 0.0334,
      "step": 455580
    },
    {
      "epoch": 0.7455993925230586,
      "grad_norm": 1.2232846021652222,
      "learning_rate": 8.564867589596937e-06,
      "loss": 0.0325,
      "step": 455600
    },
    {
      "epoch": 0.7456321229617119,
      "grad_norm": 0.6337171196937561,
      "learning_rate": 8.56480169738342e-06,
      "loss": 0.0282,
      "step": 455620
    },
    {
      "epoch": 0.7456648534003653,
      "grad_norm": 0.9135124087333679,
      "learning_rate": 8.564735805169904e-06,
      "loss": 0.0249,
      "step": 455640
    },
    {
      "epoch": 0.7456975838390186,
      "grad_norm": 0.7931122779846191,
      "learning_rate": 8.564669912956386e-06,
      "loss": 0.0265,
      "step": 455660
    },
    {
      "epoch": 0.745730314277672,
      "grad_norm": 1.5531307458877563,
      "learning_rate": 8.56460402074287e-06,
      "loss": 0.0366,
      "step": 455680
    },
    {
      "epoch": 0.7457630447163253,
      "grad_norm": 0.5250711441040039,
      "learning_rate": 8.564538128529353e-06,
      "loss": 0.031,
      "step": 455700
    },
    {
      "epoch": 0.7457957751549786,
      "grad_norm": 1.1284406185150146,
      "learning_rate": 8.564472236315835e-06,
      "loss": 0.033,
      "step": 455720
    },
    {
      "epoch": 0.745828505593632,
      "grad_norm": 2.2092792987823486,
      "learning_rate": 8.564406344102319e-06,
      "loss": 0.0395,
      "step": 455740
    },
    {
      "epoch": 0.7458612360322853,
      "grad_norm": 2.9581780433654785,
      "learning_rate": 8.5643404518888e-06,
      "loss": 0.0315,
      "step": 455760
    },
    {
      "epoch": 0.7458939664709386,
      "grad_norm": 0.9611814618110657,
      "learning_rate": 8.564274559675284e-06,
      "loss": 0.0471,
      "step": 455780
    },
    {
      "epoch": 0.745926696909592,
      "grad_norm": 0.9566500186920166,
      "learning_rate": 8.564208667461766e-06,
      "loss": 0.025,
      "step": 455800
    },
    {
      "epoch": 0.7459594273482453,
      "grad_norm": 1.2531344890594482,
      "learning_rate": 8.56414277524825e-06,
      "loss": 0.0424,
      "step": 455820
    },
    {
      "epoch": 0.7459921577868986,
      "grad_norm": 0.6224141716957092,
      "learning_rate": 8.564076883034732e-06,
      "loss": 0.0296,
      "step": 455840
    },
    {
      "epoch": 0.746024888225552,
      "grad_norm": 5.9443039894104,
      "learning_rate": 8.564010990821215e-06,
      "loss": 0.0336,
      "step": 455860
    },
    {
      "epoch": 0.7460576186642054,
      "grad_norm": 1.7730376720428467,
      "learning_rate": 8.563945098607697e-06,
      "loss": 0.0265,
      "step": 455880
    },
    {
      "epoch": 0.7460903491028587,
      "grad_norm": 1.296477198600769,
      "learning_rate": 8.56387920639418e-06,
      "loss": 0.0322,
      "step": 455900
    },
    {
      "epoch": 0.746123079541512,
      "grad_norm": 0.2683621048927307,
      "learning_rate": 8.563813314180663e-06,
      "loss": 0.0257,
      "step": 455920
    },
    {
      "epoch": 0.7461558099801654,
      "grad_norm": 0.6350030899047852,
      "learning_rate": 8.563747421967146e-06,
      "loss": 0.0319,
      "step": 455940
    },
    {
      "epoch": 0.7461885404188187,
      "grad_norm": 0.5797369480133057,
      "learning_rate": 8.56368152975363e-06,
      "loss": 0.0237,
      "step": 455960
    },
    {
      "epoch": 0.746221270857472,
      "grad_norm": 0.8227390050888062,
      "learning_rate": 8.563615637540112e-06,
      "loss": 0.0304,
      "step": 455980
    },
    {
      "epoch": 0.7462540012961254,
      "grad_norm": 0.9151421189308167,
      "learning_rate": 8.563549745326595e-06,
      "loss": 0.0313,
      "step": 456000
    },
    {
      "epoch": 0.7462867317347787,
      "grad_norm": 3.0140128135681152,
      "learning_rate": 8.563483853113079e-06,
      "loss": 0.0336,
      "step": 456020
    },
    {
      "epoch": 0.746319462173432,
      "grad_norm": 1.0838180780410767,
      "learning_rate": 8.56341796089956e-06,
      "loss": 0.0272,
      "step": 456040
    },
    {
      "epoch": 0.7463521926120854,
      "grad_norm": 2.797056198120117,
      "learning_rate": 8.563352068686044e-06,
      "loss": 0.0402,
      "step": 456060
    },
    {
      "epoch": 0.7463849230507388,
      "grad_norm": 1.9641497135162354,
      "learning_rate": 8.563286176472528e-06,
      "loss": 0.0284,
      "step": 456080
    },
    {
      "epoch": 0.746417653489392,
      "grad_norm": 0.6204356551170349,
      "learning_rate": 8.56322028425901e-06,
      "loss": 0.0365,
      "step": 456100
    },
    {
      "epoch": 0.7464503839280454,
      "grad_norm": 2.1277830600738525,
      "learning_rate": 8.563154392045493e-06,
      "loss": 0.0361,
      "step": 456120
    },
    {
      "epoch": 0.7464831143666988,
      "grad_norm": 0.5560372471809387,
      "learning_rate": 8.563088499831975e-06,
      "loss": 0.0367,
      "step": 456140
    },
    {
      "epoch": 0.746515844805352,
      "grad_norm": 1.813218355178833,
      "learning_rate": 8.563022607618459e-06,
      "loss": 0.0325,
      "step": 456160
    },
    {
      "epoch": 0.7465485752440054,
      "grad_norm": 0.9386047124862671,
      "learning_rate": 8.56295671540494e-06,
      "loss": 0.0347,
      "step": 456180
    },
    {
      "epoch": 0.7465813056826588,
      "grad_norm": 2.264324903488159,
      "learning_rate": 8.562890823191424e-06,
      "loss": 0.0385,
      "step": 456200
    },
    {
      "epoch": 0.746614036121312,
      "grad_norm": 0.2671397030353546,
      "learning_rate": 8.562824930977906e-06,
      "loss": 0.0274,
      "step": 456220
    },
    {
      "epoch": 0.7466467665599654,
      "grad_norm": 1.3552356958389282,
      "learning_rate": 8.56275903876439e-06,
      "loss": 0.0364,
      "step": 456240
    },
    {
      "epoch": 0.7466794969986188,
      "grad_norm": 1.4965219497680664,
      "learning_rate": 8.562693146550872e-06,
      "loss": 0.036,
      "step": 456260
    },
    {
      "epoch": 0.7467122274372722,
      "grad_norm": 1.1047792434692383,
      "learning_rate": 8.562627254337355e-06,
      "loss": 0.0375,
      "step": 456280
    },
    {
      "epoch": 0.7467449578759254,
      "grad_norm": 0.7312521934509277,
      "learning_rate": 8.562561362123839e-06,
      "loss": 0.0332,
      "step": 456300
    },
    {
      "epoch": 0.7467776883145788,
      "grad_norm": 1.1137101650238037,
      "learning_rate": 8.562495469910321e-06,
      "loss": 0.0369,
      "step": 456320
    },
    {
      "epoch": 0.7468104187532322,
      "grad_norm": 0.5804345607757568,
      "learning_rate": 8.562429577696804e-06,
      "loss": 0.0326,
      "step": 456340
    },
    {
      "epoch": 0.7468431491918854,
      "grad_norm": 0.7187511324882507,
      "learning_rate": 8.562363685483286e-06,
      "loss": 0.0303,
      "step": 456360
    },
    {
      "epoch": 0.7468758796305388,
      "grad_norm": 1.1741772890090942,
      "learning_rate": 8.56229779326977e-06,
      "loss": 0.0393,
      "step": 456380
    },
    {
      "epoch": 0.7469086100691922,
      "grad_norm": 0.846930205821991,
      "learning_rate": 8.562231901056254e-06,
      "loss": 0.0334,
      "step": 456400
    },
    {
      "epoch": 0.7469413405078454,
      "grad_norm": 0.9845828413963318,
      "learning_rate": 8.562166008842735e-06,
      "loss": 0.0322,
      "step": 456420
    },
    {
      "epoch": 0.7469740709464988,
      "grad_norm": 1.0218665599822998,
      "learning_rate": 8.562100116629219e-06,
      "loss": 0.0334,
      "step": 456440
    },
    {
      "epoch": 0.7470068013851522,
      "grad_norm": 0.9459458589553833,
      "learning_rate": 8.562034224415703e-06,
      "loss": 0.043,
      "step": 456460
    },
    {
      "epoch": 0.7470395318238054,
      "grad_norm": 2.8520095348358154,
      "learning_rate": 8.561968332202184e-06,
      "loss": 0.0346,
      "step": 456480
    },
    {
      "epoch": 0.7470722622624588,
      "grad_norm": 0.10281761735677719,
      "learning_rate": 8.561902439988668e-06,
      "loss": 0.0292,
      "step": 456500
    },
    {
      "epoch": 0.7471049927011122,
      "grad_norm": 1.1366174221038818,
      "learning_rate": 8.56183654777515e-06,
      "loss": 0.0432,
      "step": 456520
    },
    {
      "epoch": 0.7471377231397656,
      "grad_norm": 1.10999596118927,
      "learning_rate": 8.561770655561634e-06,
      "loss": 0.0347,
      "step": 456540
    },
    {
      "epoch": 0.7471704535784188,
      "grad_norm": 0.5662975907325745,
      "learning_rate": 8.561704763348115e-06,
      "loss": 0.0261,
      "step": 456560
    },
    {
      "epoch": 0.7472031840170722,
      "grad_norm": 1.0624345541000366,
      "learning_rate": 8.561638871134599e-06,
      "loss": 0.0282,
      "step": 456580
    },
    {
      "epoch": 0.7472359144557256,
      "grad_norm": 1.6305526494979858,
      "learning_rate": 8.561572978921081e-06,
      "loss": 0.0345,
      "step": 456600
    },
    {
      "epoch": 0.7472686448943788,
      "grad_norm": 0.3402661085128784,
      "learning_rate": 8.561507086707565e-06,
      "loss": 0.0287,
      "step": 456620
    },
    {
      "epoch": 0.7473013753330322,
      "grad_norm": 0.5607851147651672,
      "learning_rate": 8.561441194494048e-06,
      "loss": 0.0388,
      "step": 456640
    },
    {
      "epoch": 0.7473341057716856,
      "grad_norm": 2.0420475006103516,
      "learning_rate": 8.56137530228053e-06,
      "loss": 0.0334,
      "step": 456660
    },
    {
      "epoch": 0.7473668362103388,
      "grad_norm": 1.4898364543914795,
      "learning_rate": 8.561309410067014e-06,
      "loss": 0.0381,
      "step": 456680
    },
    {
      "epoch": 0.7473995666489922,
      "grad_norm": 0.9624407291412354,
      "learning_rate": 8.561243517853495e-06,
      "loss": 0.0459,
      "step": 456700
    },
    {
      "epoch": 0.7474322970876456,
      "grad_norm": 0.27483659982681274,
      "learning_rate": 8.561177625639979e-06,
      "loss": 0.0365,
      "step": 456720
    },
    {
      "epoch": 0.747465027526299,
      "grad_norm": 0.47955358028411865,
      "learning_rate": 8.561111733426461e-06,
      "loss": 0.0334,
      "step": 456740
    },
    {
      "epoch": 0.7474977579649522,
      "grad_norm": 1.2281898260116577,
      "learning_rate": 8.561045841212945e-06,
      "loss": 0.0381,
      "step": 456760
    },
    {
      "epoch": 0.7475304884036056,
      "grad_norm": 1.7434624433517456,
      "learning_rate": 8.560979948999426e-06,
      "loss": 0.0302,
      "step": 456780
    },
    {
      "epoch": 0.747563218842259,
      "grad_norm": 1.0915683507919312,
      "learning_rate": 8.56091405678591e-06,
      "loss": 0.0276,
      "step": 456800
    },
    {
      "epoch": 0.7475959492809122,
      "grad_norm": 0.4701731204986572,
      "learning_rate": 8.560848164572394e-06,
      "loss": 0.0268,
      "step": 456820
    },
    {
      "epoch": 0.7476286797195656,
      "grad_norm": 0.6014187335968018,
      "learning_rate": 8.560782272358875e-06,
      "loss": 0.0271,
      "step": 456840
    },
    {
      "epoch": 0.747661410158219,
      "grad_norm": 0.4196721017360687,
      "learning_rate": 8.560716380145359e-06,
      "loss": 0.0258,
      "step": 456860
    },
    {
      "epoch": 0.7476941405968722,
      "grad_norm": 0.5188602805137634,
      "learning_rate": 8.560650487931843e-06,
      "loss": 0.0335,
      "step": 456880
    },
    {
      "epoch": 0.7477268710355256,
      "grad_norm": 2.2122557163238525,
      "learning_rate": 8.560584595718325e-06,
      "loss": 0.0208,
      "step": 456900
    },
    {
      "epoch": 0.747759601474179,
      "grad_norm": 0.5557168126106262,
      "learning_rate": 8.560518703504808e-06,
      "loss": 0.0306,
      "step": 456920
    },
    {
      "epoch": 0.7477923319128323,
      "grad_norm": 1.0629204511642456,
      "learning_rate": 8.56045281129129e-06,
      "loss": 0.036,
      "step": 456940
    },
    {
      "epoch": 0.7478250623514856,
      "grad_norm": 0.5158154368400574,
      "learning_rate": 8.560386919077774e-06,
      "loss": 0.0356,
      "step": 456960
    },
    {
      "epoch": 0.747857792790139,
      "grad_norm": 1.4422760009765625,
      "learning_rate": 8.560321026864257e-06,
      "loss": 0.0258,
      "step": 456980
    },
    {
      "epoch": 0.7478905232287923,
      "grad_norm": 0.6794479489326477,
      "learning_rate": 8.560255134650739e-06,
      "loss": 0.0238,
      "step": 457000
    },
    {
      "epoch": 0.7479232536674456,
      "grad_norm": 0.888573169708252,
      "learning_rate": 8.560189242437223e-06,
      "loss": 0.0292,
      "step": 457020
    },
    {
      "epoch": 0.747955984106099,
      "grad_norm": 0.7041221857070923,
      "learning_rate": 8.560123350223705e-06,
      "loss": 0.0257,
      "step": 457040
    },
    {
      "epoch": 0.7479887145447524,
      "grad_norm": 1.8494843244552612,
      "learning_rate": 8.560057458010188e-06,
      "loss": 0.0241,
      "step": 457060
    },
    {
      "epoch": 0.7480214449834056,
      "grad_norm": 0.22377713024616241,
      "learning_rate": 8.55999156579667e-06,
      "loss": 0.0321,
      "step": 457080
    },
    {
      "epoch": 0.748054175422059,
      "grad_norm": 0.5425518155097961,
      "learning_rate": 8.559925673583154e-06,
      "loss": 0.03,
      "step": 457100
    },
    {
      "epoch": 0.7480869058607124,
      "grad_norm": 3.6174685955047607,
      "learning_rate": 8.559859781369636e-06,
      "loss": 0.032,
      "step": 457120
    },
    {
      "epoch": 0.7481196362993657,
      "grad_norm": 0.2378702610731125,
      "learning_rate": 8.559793889156119e-06,
      "loss": 0.0307,
      "step": 457140
    },
    {
      "epoch": 0.748152366738019,
      "grad_norm": 2.4370317459106445,
      "learning_rate": 8.559727996942601e-06,
      "loss": 0.0296,
      "step": 457160
    },
    {
      "epoch": 0.7481850971766724,
      "grad_norm": 1.245345115661621,
      "learning_rate": 8.559662104729085e-06,
      "loss": 0.0286,
      "step": 457180
    },
    {
      "epoch": 0.7482178276153257,
      "grad_norm": 1.0791611671447754,
      "learning_rate": 8.559596212515568e-06,
      "loss": 0.0323,
      "step": 457200
    },
    {
      "epoch": 0.748250558053979,
      "grad_norm": 0.3326728343963623,
      "learning_rate": 8.55953032030205e-06,
      "loss": 0.0263,
      "step": 457220
    },
    {
      "epoch": 0.7482832884926324,
      "grad_norm": 0.6144609451293945,
      "learning_rate": 8.559464428088534e-06,
      "loss": 0.0252,
      "step": 457240
    },
    {
      "epoch": 0.7483160189312857,
      "grad_norm": 0.4986717700958252,
      "learning_rate": 8.559398535875017e-06,
      "loss": 0.0213,
      "step": 457260
    },
    {
      "epoch": 0.748348749369939,
      "grad_norm": 1.896637201309204,
      "learning_rate": 8.559332643661499e-06,
      "loss": 0.0236,
      "step": 457280
    },
    {
      "epoch": 0.7483814798085924,
      "grad_norm": 2.1698431968688965,
      "learning_rate": 8.559266751447983e-06,
      "loss": 0.0316,
      "step": 457300
    },
    {
      "epoch": 0.7484142102472457,
      "grad_norm": 1.3494880199432373,
      "learning_rate": 8.559200859234465e-06,
      "loss": 0.0323,
      "step": 457320
    },
    {
      "epoch": 0.7484469406858991,
      "grad_norm": 1.2695196866989136,
      "learning_rate": 8.559134967020948e-06,
      "loss": 0.0345,
      "step": 457340
    },
    {
      "epoch": 0.7484796711245524,
      "grad_norm": 0.9962133169174194,
      "learning_rate": 8.559069074807432e-06,
      "loss": 0.0233,
      "step": 457360
    },
    {
      "epoch": 0.7485124015632058,
      "grad_norm": 0.5650981664657593,
      "learning_rate": 8.559003182593914e-06,
      "loss": 0.0391,
      "step": 457380
    },
    {
      "epoch": 0.7485451320018591,
      "grad_norm": 1.0895311832427979,
      "learning_rate": 8.558937290380397e-06,
      "loss": 0.0248,
      "step": 457400
    },
    {
      "epoch": 0.7485778624405124,
      "grad_norm": 1.7729748487472534,
      "learning_rate": 8.55887139816688e-06,
      "loss": 0.0221,
      "step": 457420
    },
    {
      "epoch": 0.7486105928791658,
      "grad_norm": 4.581205368041992,
      "learning_rate": 8.558805505953363e-06,
      "loss": 0.0281,
      "step": 457440
    },
    {
      "epoch": 0.7486433233178191,
      "grad_norm": 2.128790855407715,
      "learning_rate": 8.558739613739845e-06,
      "loss": 0.0338,
      "step": 457460
    },
    {
      "epoch": 0.7486760537564724,
      "grad_norm": 1.1103037595748901,
      "learning_rate": 8.558673721526328e-06,
      "loss": 0.0385,
      "step": 457480
    },
    {
      "epoch": 0.7487087841951258,
      "grad_norm": 1.0759539604187012,
      "learning_rate": 8.55860782931281e-06,
      "loss": 0.0304,
      "step": 457500
    },
    {
      "epoch": 0.7487415146337791,
      "grad_norm": 0.4322463274002075,
      "learning_rate": 8.558541937099294e-06,
      "loss": 0.0296,
      "step": 457520
    },
    {
      "epoch": 0.7487742450724325,
      "grad_norm": 1.6363555192947388,
      "learning_rate": 8.558476044885776e-06,
      "loss": 0.0322,
      "step": 457540
    },
    {
      "epoch": 0.7488069755110858,
      "grad_norm": 1.573147177696228,
      "learning_rate": 8.55841015267226e-06,
      "loss": 0.0272,
      "step": 457560
    },
    {
      "epoch": 0.7488397059497391,
      "grad_norm": 1.7455962896347046,
      "learning_rate": 8.558344260458741e-06,
      "loss": 0.0473,
      "step": 457580
    },
    {
      "epoch": 0.7488724363883925,
      "grad_norm": 0.25581634044647217,
      "learning_rate": 8.558278368245225e-06,
      "loss": 0.0404,
      "step": 457600
    },
    {
      "epoch": 0.7489051668270458,
      "grad_norm": 1.2316547632217407,
      "learning_rate": 8.558212476031708e-06,
      "loss": 0.0284,
      "step": 457620
    },
    {
      "epoch": 0.7489378972656991,
      "grad_norm": 1.09583580493927,
      "learning_rate": 8.55814658381819e-06,
      "loss": 0.0327,
      "step": 457640
    },
    {
      "epoch": 0.7489706277043525,
      "grad_norm": 0.9671244025230408,
      "learning_rate": 8.558080691604674e-06,
      "loss": 0.0265,
      "step": 457660
    },
    {
      "epoch": 0.7490033581430058,
      "grad_norm": 1.8310846090316772,
      "learning_rate": 8.558014799391157e-06,
      "loss": 0.0348,
      "step": 457680
    },
    {
      "epoch": 0.7490360885816592,
      "grad_norm": 0.9269673228263855,
      "learning_rate": 8.55794890717764e-06,
      "loss": 0.0242,
      "step": 457700
    },
    {
      "epoch": 0.7490688190203125,
      "grad_norm": 0.5535939931869507,
      "learning_rate": 8.557883014964123e-06,
      "loss": 0.0257,
      "step": 457720
    },
    {
      "epoch": 0.7491015494589659,
      "grad_norm": 0.635613203048706,
      "learning_rate": 8.557817122750606e-06,
      "loss": 0.0213,
      "step": 457740
    },
    {
      "epoch": 0.7491342798976192,
      "grad_norm": 1.97306227684021,
      "learning_rate": 8.557751230537088e-06,
      "loss": 0.0284,
      "step": 457760
    },
    {
      "epoch": 0.7491670103362725,
      "grad_norm": 1.4717822074890137,
      "learning_rate": 8.557685338323572e-06,
      "loss": 0.0231,
      "step": 457780
    },
    {
      "epoch": 0.7491997407749259,
      "grad_norm": 1.93220853805542,
      "learning_rate": 8.557619446110054e-06,
      "loss": 0.0311,
      "step": 457800
    },
    {
      "epoch": 0.7492324712135792,
      "grad_norm": 0.3233214318752289,
      "learning_rate": 8.557553553896537e-06,
      "loss": 0.0243,
      "step": 457820
    },
    {
      "epoch": 0.7492652016522325,
      "grad_norm": 0.18371188640594482,
      "learning_rate": 8.55748766168302e-06,
      "loss": 0.0352,
      "step": 457840
    },
    {
      "epoch": 0.7492979320908859,
      "grad_norm": 0.6685294508934021,
      "learning_rate": 8.557421769469503e-06,
      "loss": 0.0337,
      "step": 457860
    },
    {
      "epoch": 0.7493306625295392,
      "grad_norm": 1.554581880569458,
      "learning_rate": 8.557355877255985e-06,
      "loss": 0.0363,
      "step": 457880
    },
    {
      "epoch": 0.7493633929681925,
      "grad_norm": 3.5252292156219482,
      "learning_rate": 8.557289985042468e-06,
      "loss": 0.0395,
      "step": 457900
    },
    {
      "epoch": 0.7493961234068459,
      "grad_norm": 0.5891909599304199,
      "learning_rate": 8.55722409282895e-06,
      "loss": 0.0338,
      "step": 457920
    },
    {
      "epoch": 0.7494288538454993,
      "grad_norm": 0.30242618918418884,
      "learning_rate": 8.557158200615434e-06,
      "loss": 0.0327,
      "step": 457940
    },
    {
      "epoch": 0.7494615842841525,
      "grad_norm": 0.913571834564209,
      "learning_rate": 8.557092308401916e-06,
      "loss": 0.0314,
      "step": 457960
    },
    {
      "epoch": 0.7494943147228059,
      "grad_norm": 0.8257484436035156,
      "learning_rate": 8.5570264161884e-06,
      "loss": 0.04,
      "step": 457980
    },
    {
      "epoch": 0.7495270451614593,
      "grad_norm": 1.4988921880722046,
      "learning_rate": 8.556960523974883e-06,
      "loss": 0.0377,
      "step": 458000
    },
    {
      "epoch": 0.7495597756001126,
      "grad_norm": 4.456597328186035,
      "learning_rate": 8.556894631761365e-06,
      "loss": 0.0285,
      "step": 458020
    },
    {
      "epoch": 0.7495925060387659,
      "grad_norm": 2.0845048427581787,
      "learning_rate": 8.556828739547848e-06,
      "loss": 0.0336,
      "step": 458040
    },
    {
      "epoch": 0.7496252364774193,
      "grad_norm": 0.4908701479434967,
      "learning_rate": 8.556762847334332e-06,
      "loss": 0.0441,
      "step": 458060
    },
    {
      "epoch": 0.7496579669160726,
      "grad_norm": 0.8712658286094666,
      "learning_rate": 8.556696955120814e-06,
      "loss": 0.0312,
      "step": 458080
    },
    {
      "epoch": 0.7496906973547259,
      "grad_norm": 1.3405508995056152,
      "learning_rate": 8.556631062907297e-06,
      "loss": 0.0277,
      "step": 458100
    },
    {
      "epoch": 0.7497234277933793,
      "grad_norm": 0.7100892663002014,
      "learning_rate": 8.556565170693781e-06,
      "loss": 0.0271,
      "step": 458120
    },
    {
      "epoch": 0.7497561582320327,
      "grad_norm": 1.6293271780014038,
      "learning_rate": 8.556499278480263e-06,
      "loss": 0.0323,
      "step": 458140
    },
    {
      "epoch": 0.7497888886706859,
      "grad_norm": 2.046558141708374,
      "learning_rate": 8.556433386266746e-06,
      "loss": 0.0505,
      "step": 458160
    },
    {
      "epoch": 0.7498216191093393,
      "grad_norm": 0.3395715355873108,
      "learning_rate": 8.556367494053228e-06,
      "loss": 0.0231,
      "step": 458180
    },
    {
      "epoch": 0.7498543495479927,
      "grad_norm": 1.4645545482635498,
      "learning_rate": 8.556301601839712e-06,
      "loss": 0.0382,
      "step": 458200
    },
    {
      "epoch": 0.7498870799866459,
      "grad_norm": 0.2725221514701843,
      "learning_rate": 8.556235709626194e-06,
      "loss": 0.0289,
      "step": 458220
    },
    {
      "epoch": 0.7499198104252993,
      "grad_norm": 1.3615102767944336,
      "learning_rate": 8.556169817412677e-06,
      "loss": 0.0262,
      "step": 458240
    },
    {
      "epoch": 0.7499525408639527,
      "grad_norm": 0.7173298001289368,
      "learning_rate": 8.55610392519916e-06,
      "loss": 0.0344,
      "step": 458260
    },
    {
      "epoch": 0.749985271302606,
      "grad_norm": 0.7055355906486511,
      "learning_rate": 8.556038032985643e-06,
      "loss": 0.0398,
      "step": 458280
    },
    {
      "epoch": 0.7500180017412593,
      "grad_norm": 1.9557865858078003,
      "learning_rate": 8.555972140772125e-06,
      "loss": 0.0244,
      "step": 458300
    },
    {
      "epoch": 0.7500507321799127,
      "grad_norm": 0.44773542881011963,
      "learning_rate": 8.555906248558608e-06,
      "loss": 0.0326,
      "step": 458320
    },
    {
      "epoch": 0.7500834626185661,
      "grad_norm": 1.0669221878051758,
      "learning_rate": 8.55584035634509e-06,
      "loss": 0.0442,
      "step": 458340
    },
    {
      "epoch": 0.7501161930572193,
      "grad_norm": 0.8323507905006409,
      "learning_rate": 8.555774464131574e-06,
      "loss": 0.0302,
      "step": 458360
    },
    {
      "epoch": 0.7501489234958727,
      "grad_norm": 1.3492039442062378,
      "learning_rate": 8.555708571918056e-06,
      "loss": 0.0353,
      "step": 458380
    },
    {
      "epoch": 0.7501816539345261,
      "grad_norm": 1.0288008451461792,
      "learning_rate": 8.55564267970454e-06,
      "loss": 0.0277,
      "step": 458400
    },
    {
      "epoch": 0.7502143843731793,
      "grad_norm": 1.3827296495437622,
      "learning_rate": 8.555576787491023e-06,
      "loss": 0.0322,
      "step": 458420
    },
    {
      "epoch": 0.7502471148118327,
      "grad_norm": 1.328331470489502,
      "learning_rate": 8.555510895277505e-06,
      "loss": 0.0433,
      "step": 458440
    },
    {
      "epoch": 0.7502798452504861,
      "grad_norm": 1.082470178604126,
      "learning_rate": 8.555445003063988e-06,
      "loss": 0.0336,
      "step": 458460
    },
    {
      "epoch": 0.7503125756891393,
      "grad_norm": 0.41485652327537537,
      "learning_rate": 8.555379110850472e-06,
      "loss": 0.0317,
      "step": 458480
    },
    {
      "epoch": 0.7503453061277927,
      "grad_norm": 0.5915943384170532,
      "learning_rate": 8.555313218636954e-06,
      "loss": 0.0341,
      "step": 458500
    },
    {
      "epoch": 0.7503780365664461,
      "grad_norm": 1.0438714027404785,
      "learning_rate": 8.555247326423437e-06,
      "loss": 0.0346,
      "step": 458520
    },
    {
      "epoch": 0.7504107670050995,
      "grad_norm": 0.9969226717948914,
      "learning_rate": 8.555181434209921e-06,
      "loss": 0.0343,
      "step": 458540
    },
    {
      "epoch": 0.7504434974437527,
      "grad_norm": 1.0292664766311646,
      "learning_rate": 8.555115541996403e-06,
      "loss": 0.0273,
      "step": 458560
    },
    {
      "epoch": 0.7504762278824061,
      "grad_norm": 0.3590819239616394,
      "learning_rate": 8.555049649782887e-06,
      "loss": 0.0346,
      "step": 458580
    },
    {
      "epoch": 0.7505089583210595,
      "grad_norm": 0.7575581073760986,
      "learning_rate": 8.554983757569368e-06,
      "loss": 0.0317,
      "step": 458600
    },
    {
      "epoch": 0.7505416887597127,
      "grad_norm": 1.9884685277938843,
      "learning_rate": 8.554917865355852e-06,
      "loss": 0.0301,
      "step": 458620
    },
    {
      "epoch": 0.7505744191983661,
      "grad_norm": 0.7519974112510681,
      "learning_rate": 8.554851973142334e-06,
      "loss": 0.0182,
      "step": 458640
    },
    {
      "epoch": 0.7506071496370195,
      "grad_norm": 2.158766269683838,
      "learning_rate": 8.554786080928818e-06,
      "loss": 0.0398,
      "step": 458660
    },
    {
      "epoch": 0.7506398800756727,
      "grad_norm": 0.8838948011398315,
      "learning_rate": 8.5547201887153e-06,
      "loss": 0.0291,
      "step": 458680
    },
    {
      "epoch": 0.7506726105143261,
      "grad_norm": 1.2455554008483887,
      "learning_rate": 8.554654296501783e-06,
      "loss": 0.0295,
      "step": 458700
    },
    {
      "epoch": 0.7507053409529795,
      "grad_norm": 1.6126518249511719,
      "learning_rate": 8.554588404288265e-06,
      "loss": 0.0293,
      "step": 458720
    },
    {
      "epoch": 0.7507380713916328,
      "grad_norm": 1.0931427478790283,
      "learning_rate": 8.554522512074748e-06,
      "loss": 0.0304,
      "step": 458740
    },
    {
      "epoch": 0.7507708018302861,
      "grad_norm": 2.923251152038574,
      "learning_rate": 8.554456619861232e-06,
      "loss": 0.0362,
      "step": 458760
    },
    {
      "epoch": 0.7508035322689395,
      "grad_norm": 0.19725088775157928,
      "learning_rate": 8.554390727647714e-06,
      "loss": 0.0251,
      "step": 458780
    },
    {
      "epoch": 0.7508362627075928,
      "grad_norm": 2.180361270904541,
      "learning_rate": 8.554324835434198e-06,
      "loss": 0.0315,
      "step": 458800
    },
    {
      "epoch": 0.7508689931462461,
      "grad_norm": 1.127758264541626,
      "learning_rate": 8.55425894322068e-06,
      "loss": 0.0264,
      "step": 458820
    },
    {
      "epoch": 0.7509017235848995,
      "grad_norm": 2.062875986099243,
      "learning_rate": 8.554193051007163e-06,
      "loss": 0.0304,
      "step": 458840
    },
    {
      "epoch": 0.7509344540235529,
      "grad_norm": 1.1334643363952637,
      "learning_rate": 8.554127158793647e-06,
      "loss": 0.0405,
      "step": 458860
    },
    {
      "epoch": 0.7509671844622061,
      "grad_norm": 0.3241545259952545,
      "learning_rate": 8.554061266580128e-06,
      "loss": 0.0234,
      "step": 458880
    },
    {
      "epoch": 0.7509999149008595,
      "grad_norm": 1.3184553384780884,
      "learning_rate": 8.553995374366612e-06,
      "loss": 0.0208,
      "step": 458900
    },
    {
      "epoch": 0.7510326453395129,
      "grad_norm": 0.4390767812728882,
      "learning_rate": 8.553929482153096e-06,
      "loss": 0.0313,
      "step": 458920
    },
    {
      "epoch": 0.7510653757781662,
      "grad_norm": 1.0209927558898926,
      "learning_rate": 8.553863589939578e-06,
      "loss": 0.0383,
      "step": 458940
    },
    {
      "epoch": 0.7510981062168195,
      "grad_norm": 0.8613820672035217,
      "learning_rate": 8.553797697726061e-06,
      "loss": 0.0338,
      "step": 458960
    },
    {
      "epoch": 0.7511308366554729,
      "grad_norm": 1.5965718030929565,
      "learning_rate": 8.553731805512543e-06,
      "loss": 0.0408,
      "step": 458980
    },
    {
      "epoch": 0.7511635670941262,
      "grad_norm": 0.6970524191856384,
      "learning_rate": 8.553665913299027e-06,
      "loss": 0.0345,
      "step": 459000
    },
    {
      "epoch": 0.7511962975327795,
      "grad_norm": 1.185011863708496,
      "learning_rate": 8.553600021085509e-06,
      "loss": 0.0288,
      "step": 459020
    },
    {
      "epoch": 0.7512290279714329,
      "grad_norm": 1.0559489727020264,
      "learning_rate": 8.553534128871992e-06,
      "loss": 0.0329,
      "step": 459040
    },
    {
      "epoch": 0.7512617584100862,
      "grad_norm": 0.4294947683811188,
      "learning_rate": 8.553468236658474e-06,
      "loss": 0.0345,
      "step": 459060
    },
    {
      "epoch": 0.7512944888487395,
      "grad_norm": 2.7959823608398438,
      "learning_rate": 8.553402344444958e-06,
      "loss": 0.0394,
      "step": 459080
    },
    {
      "epoch": 0.7513272192873929,
      "grad_norm": 2.228417158126831,
      "learning_rate": 8.553336452231441e-06,
      "loss": 0.0361,
      "step": 459100
    },
    {
      "epoch": 0.7513599497260463,
      "grad_norm": 0.6531791090965271,
      "learning_rate": 8.553270560017923e-06,
      "loss": 0.0207,
      "step": 459120
    },
    {
      "epoch": 0.7513926801646996,
      "grad_norm": 0.24664001166820526,
      "learning_rate": 8.553204667804407e-06,
      "loss": 0.0293,
      "step": 459140
    },
    {
      "epoch": 0.7514254106033529,
      "grad_norm": 1.3364495038986206,
      "learning_rate": 8.553138775590889e-06,
      "loss": 0.0381,
      "step": 459160
    },
    {
      "epoch": 0.7514581410420063,
      "grad_norm": 2.5420591831207275,
      "learning_rate": 8.553072883377372e-06,
      "loss": 0.0348,
      "step": 459180
    },
    {
      "epoch": 0.7514908714806596,
      "grad_norm": 0.9678736925125122,
      "learning_rate": 8.553006991163854e-06,
      "loss": 0.0344,
      "step": 459200
    },
    {
      "epoch": 0.7515236019193129,
      "grad_norm": 0.6790795922279358,
      "learning_rate": 8.552941098950338e-06,
      "loss": 0.0375,
      "step": 459220
    },
    {
      "epoch": 0.7515563323579663,
      "grad_norm": 1.9766467809677124,
      "learning_rate": 8.552875206736821e-06,
      "loss": 0.0245,
      "step": 459240
    },
    {
      "epoch": 0.7515890627966196,
      "grad_norm": 0.8976479768753052,
      "learning_rate": 8.552809314523303e-06,
      "loss": 0.0391,
      "step": 459260
    },
    {
      "epoch": 0.7516217932352729,
      "grad_norm": 0.16957701742649078,
      "learning_rate": 8.552743422309787e-06,
      "loss": 0.0267,
      "step": 459280
    },
    {
      "epoch": 0.7516545236739263,
      "grad_norm": 0.3262535035610199,
      "learning_rate": 8.55267753009627e-06,
      "loss": 0.0334,
      "step": 459300
    },
    {
      "epoch": 0.7516872541125796,
      "grad_norm": 0.7654885053634644,
      "learning_rate": 8.552611637882752e-06,
      "loss": 0.0267,
      "step": 459320
    },
    {
      "epoch": 0.7517199845512329,
      "grad_norm": 1.2318609952926636,
      "learning_rate": 8.552545745669236e-06,
      "loss": 0.0274,
      "step": 459340
    },
    {
      "epoch": 0.7517527149898863,
      "grad_norm": 1.8796279430389404,
      "learning_rate": 8.552479853455718e-06,
      "loss": 0.034,
      "step": 459360
    },
    {
      "epoch": 0.7517854454285396,
      "grad_norm": 0.6808501482009888,
      "learning_rate": 8.552413961242201e-06,
      "loss": 0.0399,
      "step": 459380
    },
    {
      "epoch": 0.751818175867193,
      "grad_norm": 0.9893763661384583,
      "learning_rate": 8.552348069028683e-06,
      "loss": 0.04,
      "step": 459400
    },
    {
      "epoch": 0.7518509063058463,
      "grad_norm": 1.7181068658828735,
      "learning_rate": 8.552282176815167e-06,
      "loss": 0.0272,
      "step": 459420
    },
    {
      "epoch": 0.7518836367444997,
      "grad_norm": 3.2731645107269287,
      "learning_rate": 8.55221628460165e-06,
      "loss": 0.0275,
      "step": 459440
    },
    {
      "epoch": 0.751916367183153,
      "grad_norm": 1.1237478256225586,
      "learning_rate": 8.552150392388132e-06,
      "loss": 0.046,
      "step": 459460
    },
    {
      "epoch": 0.7519490976218063,
      "grad_norm": 3.8564016819000244,
      "learning_rate": 8.552084500174616e-06,
      "loss": 0.029,
      "step": 459480
    },
    {
      "epoch": 0.7519818280604597,
      "grad_norm": 0.9266703724861145,
      "learning_rate": 8.552018607961098e-06,
      "loss": 0.0279,
      "step": 459500
    },
    {
      "epoch": 0.752014558499113,
      "grad_norm": 1.1650166511535645,
      "learning_rate": 8.551952715747581e-06,
      "loss": 0.0271,
      "step": 459520
    },
    {
      "epoch": 0.7520472889377663,
      "grad_norm": 0.9869665503501892,
      "learning_rate": 8.551886823534063e-06,
      "loss": 0.0421,
      "step": 459540
    },
    {
      "epoch": 0.7520800193764197,
      "grad_norm": 0.8681882619857788,
      "learning_rate": 8.551820931320547e-06,
      "loss": 0.023,
      "step": 459560
    },
    {
      "epoch": 0.752112749815073,
      "grad_norm": 0.28930845856666565,
      "learning_rate": 8.551755039107029e-06,
      "loss": 0.038,
      "step": 459580
    },
    {
      "epoch": 0.7521454802537264,
      "grad_norm": 1.3456703424453735,
      "learning_rate": 8.551689146893512e-06,
      "loss": 0.033,
      "step": 459600
    },
    {
      "epoch": 0.7521782106923797,
      "grad_norm": 0.69728022813797,
      "learning_rate": 8.551623254679994e-06,
      "loss": 0.0307,
      "step": 459620
    },
    {
      "epoch": 0.752210941131033,
      "grad_norm": 1.2502864599227905,
      "learning_rate": 8.551557362466478e-06,
      "loss": 0.0416,
      "step": 459640
    },
    {
      "epoch": 0.7522436715696864,
      "grad_norm": 2.5522124767303467,
      "learning_rate": 8.551491470252961e-06,
      "loss": 0.0333,
      "step": 459660
    },
    {
      "epoch": 0.7522764020083397,
      "grad_norm": 0.7461397647857666,
      "learning_rate": 8.551425578039443e-06,
      "loss": 0.0268,
      "step": 459680
    },
    {
      "epoch": 0.752309132446993,
      "grad_norm": 0.9494073390960693,
      "learning_rate": 8.551359685825927e-06,
      "loss": 0.0343,
      "step": 459700
    },
    {
      "epoch": 0.7523418628856464,
      "grad_norm": 1.0933966636657715,
      "learning_rate": 8.55129379361241e-06,
      "loss": 0.0366,
      "step": 459720
    },
    {
      "epoch": 0.7523745933242997,
      "grad_norm": 0.6273921728134155,
      "learning_rate": 8.551227901398892e-06,
      "loss": 0.0279,
      "step": 459740
    },
    {
      "epoch": 0.752407323762953,
      "grad_norm": 0.7225296497344971,
      "learning_rate": 8.551162009185376e-06,
      "loss": 0.0294,
      "step": 459760
    },
    {
      "epoch": 0.7524400542016064,
      "grad_norm": 0.5501903295516968,
      "learning_rate": 8.551096116971858e-06,
      "loss": 0.0202,
      "step": 459780
    },
    {
      "epoch": 0.7524727846402598,
      "grad_norm": 0.49646463990211487,
      "learning_rate": 8.551030224758341e-06,
      "loss": 0.0284,
      "step": 459800
    },
    {
      "epoch": 0.7525055150789131,
      "grad_norm": 0.49662113189697266,
      "learning_rate": 8.550964332544825e-06,
      "loss": 0.0296,
      "step": 459820
    },
    {
      "epoch": 0.7525382455175664,
      "grad_norm": 1.2787410020828247,
      "learning_rate": 8.550898440331307e-06,
      "loss": 0.0328,
      "step": 459840
    },
    {
      "epoch": 0.7525709759562198,
      "grad_norm": 1.1729792356491089,
      "learning_rate": 8.55083254811779e-06,
      "loss": 0.0244,
      "step": 459860
    },
    {
      "epoch": 0.7526037063948731,
      "grad_norm": 1.1019575595855713,
      "learning_rate": 8.550766655904272e-06,
      "loss": 0.042,
      "step": 459880
    },
    {
      "epoch": 0.7526364368335264,
      "grad_norm": 1.373221516609192,
      "learning_rate": 8.550700763690756e-06,
      "loss": 0.0335,
      "step": 459900
    },
    {
      "epoch": 0.7526691672721798,
      "grad_norm": 0.6447871327400208,
      "learning_rate": 8.550634871477238e-06,
      "loss": 0.0269,
      "step": 459920
    },
    {
      "epoch": 0.7527018977108331,
      "grad_norm": 0.8492496013641357,
      "learning_rate": 8.550568979263721e-06,
      "loss": 0.0317,
      "step": 459940
    },
    {
      "epoch": 0.7527346281494864,
      "grad_norm": 0.24462002515792847,
      "learning_rate": 8.550503087050203e-06,
      "loss": 0.0318,
      "step": 459960
    },
    {
      "epoch": 0.7527673585881398,
      "grad_norm": 2.4911742210388184,
      "learning_rate": 8.550437194836687e-06,
      "loss": 0.0365,
      "step": 459980
    },
    {
      "epoch": 0.7528000890267932,
      "grad_norm": 0.7878594398498535,
      "learning_rate": 8.550371302623169e-06,
      "loss": 0.0213,
      "step": 460000
    },
    {
      "epoch": 0.7528328194654464,
      "grad_norm": 1.5236810445785522,
      "learning_rate": 8.550305410409652e-06,
      "loss": 0.0315,
      "step": 460020
    },
    {
      "epoch": 0.7528655499040998,
      "grad_norm": 1.0217777490615845,
      "learning_rate": 8.550239518196136e-06,
      "loss": 0.0231,
      "step": 460040
    },
    {
      "epoch": 0.7528982803427532,
      "grad_norm": 1.4619946479797363,
      "learning_rate": 8.550173625982618e-06,
      "loss": 0.0403,
      "step": 460060
    },
    {
      "epoch": 0.7529310107814065,
      "grad_norm": 0.2971777021884918,
      "learning_rate": 8.550107733769101e-06,
      "loss": 0.0308,
      "step": 460080
    },
    {
      "epoch": 0.7529637412200598,
      "grad_norm": 1.8544737100601196,
      "learning_rate": 8.550041841555585e-06,
      "loss": 0.0309,
      "step": 460100
    },
    {
      "epoch": 0.7529964716587132,
      "grad_norm": 0.3663899004459381,
      "learning_rate": 8.549975949342067e-06,
      "loss": 0.035,
      "step": 460120
    },
    {
      "epoch": 0.7530292020973665,
      "grad_norm": 0.10159527510404587,
      "learning_rate": 8.54991005712855e-06,
      "loss": 0.033,
      "step": 460140
    },
    {
      "epoch": 0.7530619325360198,
      "grad_norm": 1.2548723220825195,
      "learning_rate": 8.549844164915034e-06,
      "loss": 0.0325,
      "step": 460160
    },
    {
      "epoch": 0.7530946629746732,
      "grad_norm": 5.5060296058654785,
      "learning_rate": 8.549778272701516e-06,
      "loss": 0.0244,
      "step": 460180
    },
    {
      "epoch": 0.7531273934133266,
      "grad_norm": 0.3156837821006775,
      "learning_rate": 8.549712380488e-06,
      "loss": 0.0427,
      "step": 460200
    },
    {
      "epoch": 0.7531601238519798,
      "grad_norm": 0.5646833181381226,
      "learning_rate": 8.549646488274481e-06,
      "loss": 0.0384,
      "step": 460220
    },
    {
      "epoch": 0.7531928542906332,
      "grad_norm": 1.3817002773284912,
      "learning_rate": 8.549580596060965e-06,
      "loss": 0.0275,
      "step": 460240
    },
    {
      "epoch": 0.7532255847292866,
      "grad_norm": 1.1511114835739136,
      "learning_rate": 8.549514703847447e-06,
      "loss": 0.0266,
      "step": 460260
    },
    {
      "epoch": 0.7532583151679398,
      "grad_norm": 0.49399030208587646,
      "learning_rate": 8.54944881163393e-06,
      "loss": 0.0304,
      "step": 460280
    },
    {
      "epoch": 0.7532910456065932,
      "grad_norm": 1.1395823955535889,
      "learning_rate": 8.549382919420412e-06,
      "loss": 0.019,
      "step": 460300
    },
    {
      "epoch": 0.7533237760452466,
      "grad_norm": 0.8284165859222412,
      "learning_rate": 8.549317027206896e-06,
      "loss": 0.0268,
      "step": 460320
    },
    {
      "epoch": 0.7533565064838998,
      "grad_norm": 0.1464909017086029,
      "learning_rate": 8.549251134993378e-06,
      "loss": 0.024,
      "step": 460340
    },
    {
      "epoch": 0.7533892369225532,
      "grad_norm": 0.15226519107818604,
      "learning_rate": 8.549185242779861e-06,
      "loss": 0.0304,
      "step": 460360
    },
    {
      "epoch": 0.7534219673612066,
      "grad_norm": 0.44654354453086853,
      "learning_rate": 8.549119350566343e-06,
      "loss": 0.0388,
      "step": 460380
    },
    {
      "epoch": 0.75345469779986,
      "grad_norm": 0.5890933275222778,
      "learning_rate": 8.549053458352827e-06,
      "loss": 0.0328,
      "step": 460400
    },
    {
      "epoch": 0.7534874282385132,
      "grad_norm": 0.4947245419025421,
      "learning_rate": 8.548987566139309e-06,
      "loss": 0.0333,
      "step": 460420
    },
    {
      "epoch": 0.7535201586771666,
      "grad_norm": 0.7413349151611328,
      "learning_rate": 8.548921673925792e-06,
      "loss": 0.0344,
      "step": 460440
    },
    {
      "epoch": 0.75355288911582,
      "grad_norm": 1.8847416639328003,
      "learning_rate": 8.548855781712276e-06,
      "loss": 0.0366,
      "step": 460460
    },
    {
      "epoch": 0.7535856195544732,
      "grad_norm": 0.685495138168335,
      "learning_rate": 8.548789889498758e-06,
      "loss": 0.0291,
      "step": 460480
    },
    {
      "epoch": 0.7536183499931266,
      "grad_norm": 1.0404828786849976,
      "learning_rate": 8.548723997285241e-06,
      "loss": 0.0255,
      "step": 460500
    },
    {
      "epoch": 0.75365108043178,
      "grad_norm": 1.298469066619873,
      "learning_rate": 8.548658105071725e-06,
      "loss": 0.0363,
      "step": 460520
    },
    {
      "epoch": 0.7536838108704332,
      "grad_norm": 0.3778277039527893,
      "learning_rate": 8.548592212858207e-06,
      "loss": 0.0307,
      "step": 460540
    },
    {
      "epoch": 0.7537165413090866,
      "grad_norm": 0.4876604378223419,
      "learning_rate": 8.54852632064469e-06,
      "loss": 0.0282,
      "step": 460560
    },
    {
      "epoch": 0.75374927174774,
      "grad_norm": 0.8156852722167969,
      "learning_rate": 8.548460428431174e-06,
      "loss": 0.0246,
      "step": 460580
    },
    {
      "epoch": 0.7537820021863934,
      "grad_norm": 0.40744975209236145,
      "learning_rate": 8.548394536217656e-06,
      "loss": 0.0361,
      "step": 460600
    },
    {
      "epoch": 0.7538147326250466,
      "grad_norm": 0.5370362401008606,
      "learning_rate": 8.54832864400414e-06,
      "loss": 0.0386,
      "step": 460620
    },
    {
      "epoch": 0.7538474630637,
      "grad_norm": 1.677660584449768,
      "learning_rate": 8.548262751790621e-06,
      "loss": 0.0304,
      "step": 460640
    },
    {
      "epoch": 0.7538801935023534,
      "grad_norm": 0.9807119369506836,
      "learning_rate": 8.548196859577105e-06,
      "loss": 0.04,
      "step": 460660
    },
    {
      "epoch": 0.7539129239410066,
      "grad_norm": 1.0478054285049438,
      "learning_rate": 8.548130967363587e-06,
      "loss": 0.0347,
      "step": 460680
    },
    {
      "epoch": 0.75394565437966,
      "grad_norm": 0.2189732939004898,
      "learning_rate": 8.54806507515007e-06,
      "loss": 0.0252,
      "step": 460700
    },
    {
      "epoch": 0.7539783848183134,
      "grad_norm": 0.9410767555236816,
      "learning_rate": 8.547999182936552e-06,
      "loss": 0.0245,
      "step": 460720
    },
    {
      "epoch": 0.7540111152569666,
      "grad_norm": 1.4186910390853882,
      "learning_rate": 8.547933290723036e-06,
      "loss": 0.0209,
      "step": 460740
    },
    {
      "epoch": 0.75404384569562,
      "grad_norm": 0.17982754111289978,
      "learning_rate": 8.547867398509518e-06,
      "loss": 0.0313,
      "step": 460760
    },
    {
      "epoch": 0.7540765761342734,
      "grad_norm": 1.3078309297561646,
      "learning_rate": 8.547801506296001e-06,
      "loss": 0.0208,
      "step": 460780
    },
    {
      "epoch": 0.7541093065729267,
      "grad_norm": 0.9418999552726746,
      "learning_rate": 8.547735614082483e-06,
      "loss": 0.0325,
      "step": 460800
    },
    {
      "epoch": 0.75414203701158,
      "grad_norm": 6.671766757965088,
      "learning_rate": 8.547669721868967e-06,
      "loss": 0.0324,
      "step": 460820
    },
    {
      "epoch": 0.7541747674502334,
      "grad_norm": 1.6925524473190308,
      "learning_rate": 8.54760382965545e-06,
      "loss": 0.0292,
      "step": 460840
    },
    {
      "epoch": 0.7542074978888867,
      "grad_norm": 1.2122297286987305,
      "learning_rate": 8.547537937441932e-06,
      "loss": 0.0323,
      "step": 460860
    },
    {
      "epoch": 0.75424022832754,
      "grad_norm": 1.5474729537963867,
      "learning_rate": 8.547472045228416e-06,
      "loss": 0.031,
      "step": 460880
    },
    {
      "epoch": 0.7542729587661934,
      "grad_norm": 0.49866536259651184,
      "learning_rate": 8.5474061530149e-06,
      "loss": 0.0279,
      "step": 460900
    },
    {
      "epoch": 0.7543056892048468,
      "grad_norm": 0.7647902369499207,
      "learning_rate": 8.547340260801381e-06,
      "loss": 0.0406,
      "step": 460920
    },
    {
      "epoch": 0.7543384196435,
      "grad_norm": 0.5420889854431152,
      "learning_rate": 8.547274368587865e-06,
      "loss": 0.0406,
      "step": 460940
    },
    {
      "epoch": 0.7543711500821534,
      "grad_norm": 0.5921815037727356,
      "learning_rate": 8.547208476374349e-06,
      "loss": 0.0296,
      "step": 460960
    },
    {
      "epoch": 0.7544038805208068,
      "grad_norm": 1.098820447921753,
      "learning_rate": 8.54714258416083e-06,
      "loss": 0.0376,
      "step": 460980
    },
    {
      "epoch": 0.7544366109594601,
      "grad_norm": 2.671180486679077,
      "learning_rate": 8.547076691947314e-06,
      "loss": 0.0462,
      "step": 461000
    },
    {
      "epoch": 0.7544693413981134,
      "grad_norm": 0.21243804693222046,
      "learning_rate": 8.547010799733796e-06,
      "loss": 0.0292,
      "step": 461020
    },
    {
      "epoch": 0.7545020718367668,
      "grad_norm": 1.096655011177063,
      "learning_rate": 8.54694490752028e-06,
      "loss": 0.0259,
      "step": 461040
    },
    {
      "epoch": 0.7545348022754201,
      "grad_norm": 1.0984618663787842,
      "learning_rate": 8.546879015306762e-06,
      "loss": 0.0331,
      "step": 461060
    },
    {
      "epoch": 0.7545675327140734,
      "grad_norm": 1.2808233499526978,
      "learning_rate": 8.546813123093245e-06,
      "loss": 0.0508,
      "step": 461080
    },
    {
      "epoch": 0.7546002631527268,
      "grad_norm": 3.3437693119049072,
      "learning_rate": 8.546747230879727e-06,
      "loss": 0.0313,
      "step": 461100
    },
    {
      "epoch": 0.7546329935913801,
      "grad_norm": 2.328591823577881,
      "learning_rate": 8.54668133866621e-06,
      "loss": 0.0368,
      "step": 461120
    },
    {
      "epoch": 0.7546657240300334,
      "grad_norm": 0.5123075246810913,
      "learning_rate": 8.546615446452692e-06,
      "loss": 0.0416,
      "step": 461140
    },
    {
      "epoch": 0.7546984544686868,
      "grad_norm": 0.40654686093330383,
      "learning_rate": 8.546549554239176e-06,
      "loss": 0.0409,
      "step": 461160
    },
    {
      "epoch": 0.7547311849073401,
      "grad_norm": 0.8358212113380432,
      "learning_rate": 8.546483662025658e-06,
      "loss": 0.0288,
      "step": 461180
    },
    {
      "epoch": 0.7547639153459935,
      "grad_norm": 0.6797937154769897,
      "learning_rate": 8.546417769812142e-06,
      "loss": 0.038,
      "step": 461200
    },
    {
      "epoch": 0.7547966457846468,
      "grad_norm": 0.36745038628578186,
      "learning_rate": 8.546351877598625e-06,
      "loss": 0.0282,
      "step": 461220
    },
    {
      "epoch": 0.7548293762233002,
      "grad_norm": 0.8579713702201843,
      "learning_rate": 8.546285985385107e-06,
      "loss": 0.025,
      "step": 461240
    },
    {
      "epoch": 0.7548621066619535,
      "grad_norm": 2.6653432846069336,
      "learning_rate": 8.54622009317159e-06,
      "loss": 0.0346,
      "step": 461260
    },
    {
      "epoch": 0.7548948371006068,
      "grad_norm": 0.6682075262069702,
      "learning_rate": 8.546154200958073e-06,
      "loss": 0.0414,
      "step": 461280
    },
    {
      "epoch": 0.7549275675392602,
      "grad_norm": 1.8370388746261597,
      "learning_rate": 8.546088308744556e-06,
      "loss": 0.0202,
      "step": 461300
    },
    {
      "epoch": 0.7549602979779135,
      "grad_norm": 1.3013747930526733,
      "learning_rate": 8.54602241653104e-06,
      "loss": 0.0289,
      "step": 461320
    },
    {
      "epoch": 0.7549930284165668,
      "grad_norm": 0.7348044514656067,
      "learning_rate": 8.545956524317523e-06,
      "loss": 0.0293,
      "step": 461340
    },
    {
      "epoch": 0.7550257588552202,
      "grad_norm": 0.7295663952827454,
      "learning_rate": 8.545890632104005e-06,
      "loss": 0.0445,
      "step": 461360
    },
    {
      "epoch": 0.7550584892938735,
      "grad_norm": 0.3229473829269409,
      "learning_rate": 8.545824739890489e-06,
      "loss": 0.0315,
      "step": 461380
    },
    {
      "epoch": 0.7550912197325269,
      "grad_norm": 0.9162580370903015,
      "learning_rate": 8.54575884767697e-06,
      "loss": 0.025,
      "step": 461400
    },
    {
      "epoch": 0.7551239501711802,
      "grad_norm": 0.8882035613059998,
      "learning_rate": 8.545692955463454e-06,
      "loss": 0.0265,
      "step": 461420
    },
    {
      "epoch": 0.7551566806098335,
      "grad_norm": 0.47307848930358887,
      "learning_rate": 8.545627063249936e-06,
      "loss": 0.0241,
      "step": 461440
    },
    {
      "epoch": 0.7551894110484869,
      "grad_norm": 1.563083291053772,
      "learning_rate": 8.54556117103642e-06,
      "loss": 0.0343,
      "step": 461460
    },
    {
      "epoch": 0.7552221414871402,
      "grad_norm": 1.8682845830917358,
      "learning_rate": 8.545495278822902e-06,
      "loss": 0.0285,
      "step": 461480
    },
    {
      "epoch": 0.7552548719257935,
      "grad_norm": 1.07451593875885,
      "learning_rate": 8.545429386609385e-06,
      "loss": 0.0324,
      "step": 461500
    },
    {
      "epoch": 0.7552876023644469,
      "grad_norm": 0.4462921917438507,
      "learning_rate": 8.545363494395867e-06,
      "loss": 0.0257,
      "step": 461520
    },
    {
      "epoch": 0.7553203328031002,
      "grad_norm": 1.817611813545227,
      "learning_rate": 8.54529760218235e-06,
      "loss": 0.0275,
      "step": 461540
    },
    {
      "epoch": 0.7553530632417536,
      "grad_norm": 2.1413893699645996,
      "learning_rate": 8.545231709968834e-06,
      "loss": 0.03,
      "step": 461560
    },
    {
      "epoch": 0.7553857936804069,
      "grad_norm": 0.44131436944007874,
      "learning_rate": 8.545165817755316e-06,
      "loss": 0.0363,
      "step": 461580
    },
    {
      "epoch": 0.7554185241190603,
      "grad_norm": 0.732827365398407,
      "learning_rate": 8.5450999255418e-06,
      "loss": 0.0389,
      "step": 461600
    },
    {
      "epoch": 0.7554512545577136,
      "grad_norm": 1.2386184930801392,
      "learning_rate": 8.545034033328282e-06,
      "loss": 0.0313,
      "step": 461620
    },
    {
      "epoch": 0.7554839849963669,
      "grad_norm": 1.3880951404571533,
      "learning_rate": 8.544968141114765e-06,
      "loss": 0.0249,
      "step": 461640
    },
    {
      "epoch": 0.7555167154350203,
      "grad_norm": 1.5171544551849365,
      "learning_rate": 8.544902248901247e-06,
      "loss": 0.0434,
      "step": 461660
    },
    {
      "epoch": 0.7555494458736736,
      "grad_norm": 1.4750043153762817,
      "learning_rate": 8.54483635668773e-06,
      "loss": 0.0299,
      "step": 461680
    },
    {
      "epoch": 0.7555821763123269,
      "grad_norm": 1.0606642961502075,
      "learning_rate": 8.544770464474214e-06,
      "loss": 0.036,
      "step": 461700
    },
    {
      "epoch": 0.7556149067509803,
      "grad_norm": 1.625400185585022,
      "learning_rate": 8.544704572260696e-06,
      "loss": 0.0299,
      "step": 461720
    },
    {
      "epoch": 0.7556476371896336,
      "grad_norm": 0.37807223200798035,
      "learning_rate": 8.54463868004718e-06,
      "loss": 0.0341,
      "step": 461740
    },
    {
      "epoch": 0.7556803676282869,
      "grad_norm": 1.5028388500213623,
      "learning_rate": 8.544572787833663e-06,
      "loss": 0.0247,
      "step": 461760
    },
    {
      "epoch": 0.7557130980669403,
      "grad_norm": 0.5257852077484131,
      "learning_rate": 8.544506895620145e-06,
      "loss": 0.0315,
      "step": 461780
    },
    {
      "epoch": 0.7557458285055937,
      "grad_norm": 0.9690765142440796,
      "learning_rate": 8.544441003406629e-06,
      "loss": 0.0292,
      "step": 461800
    },
    {
      "epoch": 0.755778558944247,
      "grad_norm": 0.4035930335521698,
      "learning_rate": 8.54437511119311e-06,
      "loss": 0.0218,
      "step": 461820
    },
    {
      "epoch": 0.7558112893829003,
      "grad_norm": 1.394050121307373,
      "learning_rate": 8.544309218979594e-06,
      "loss": 0.0482,
      "step": 461840
    },
    {
      "epoch": 0.7558440198215537,
      "grad_norm": 0.27712422609329224,
      "learning_rate": 8.544243326766076e-06,
      "loss": 0.0338,
      "step": 461860
    },
    {
      "epoch": 0.755876750260207,
      "grad_norm": 0.7018871903419495,
      "learning_rate": 8.54417743455256e-06,
      "loss": 0.0272,
      "step": 461880
    },
    {
      "epoch": 0.7559094806988603,
      "grad_norm": 0.4696381390094757,
      "learning_rate": 8.544111542339042e-06,
      "loss": 0.0307,
      "step": 461900
    },
    {
      "epoch": 0.7559422111375137,
      "grad_norm": 1.4690693616867065,
      "learning_rate": 8.544045650125525e-06,
      "loss": 0.0333,
      "step": 461920
    },
    {
      "epoch": 0.755974941576167,
      "grad_norm": 0.9436547160148621,
      "learning_rate": 8.543979757912009e-06,
      "loss": 0.0308,
      "step": 461940
    },
    {
      "epoch": 0.7560076720148203,
      "grad_norm": 1.8139221668243408,
      "learning_rate": 8.54391386569849e-06,
      "loss": 0.0299,
      "step": 461960
    },
    {
      "epoch": 0.7560404024534737,
      "grad_norm": 4.644062519073486,
      "learning_rate": 8.543847973484974e-06,
      "loss": 0.0297,
      "step": 461980
    },
    {
      "epoch": 0.7560731328921271,
      "grad_norm": 0.6507556438446045,
      "learning_rate": 8.543782081271456e-06,
      "loss": 0.0355,
      "step": 462000
    },
    {
      "epoch": 0.7561058633307803,
      "grad_norm": 0.52407306432724,
      "learning_rate": 8.54371618905794e-06,
      "loss": 0.0421,
      "step": 462020
    },
    {
      "epoch": 0.7561385937694337,
      "grad_norm": 0.7150628566741943,
      "learning_rate": 8.543650296844422e-06,
      "loss": 0.0314,
      "step": 462040
    },
    {
      "epoch": 0.7561713242080871,
      "grad_norm": 1.1209802627563477,
      "learning_rate": 8.543584404630905e-06,
      "loss": 0.03,
      "step": 462060
    },
    {
      "epoch": 0.7562040546467403,
      "grad_norm": 0.6815893650054932,
      "learning_rate": 8.543518512417389e-06,
      "loss": 0.0232,
      "step": 462080
    },
    {
      "epoch": 0.7562367850853937,
      "grad_norm": 3.9945790767669678,
      "learning_rate": 8.54345262020387e-06,
      "loss": 0.0416,
      "step": 462100
    },
    {
      "epoch": 0.7562695155240471,
      "grad_norm": 1.45601487159729,
      "learning_rate": 8.543386727990354e-06,
      "loss": 0.0343,
      "step": 462120
    },
    {
      "epoch": 0.7563022459627003,
      "grad_norm": 0.5346137881278992,
      "learning_rate": 8.543320835776838e-06,
      "loss": 0.0282,
      "step": 462140
    },
    {
      "epoch": 0.7563349764013537,
      "grad_norm": 7.001781940460205,
      "learning_rate": 8.54325494356332e-06,
      "loss": 0.0263,
      "step": 462160
    },
    {
      "epoch": 0.7563677068400071,
      "grad_norm": 0.4786991477012634,
      "learning_rate": 8.543189051349803e-06,
      "loss": 0.0289,
      "step": 462180
    },
    {
      "epoch": 0.7564004372786604,
      "grad_norm": 1.2756155729293823,
      "learning_rate": 8.543123159136285e-06,
      "loss": 0.039,
      "step": 462200
    },
    {
      "epoch": 0.7564331677173137,
      "grad_norm": 0.8896101117134094,
      "learning_rate": 8.543057266922769e-06,
      "loss": 0.0374,
      "step": 462220
    },
    {
      "epoch": 0.7564658981559671,
      "grad_norm": 0.43470677733421326,
      "learning_rate": 8.54299137470925e-06,
      "loss": 0.0368,
      "step": 462240
    },
    {
      "epoch": 0.7564986285946205,
      "grad_norm": 0.96279376745224,
      "learning_rate": 8.542925482495734e-06,
      "loss": 0.0407,
      "step": 462260
    },
    {
      "epoch": 0.7565313590332737,
      "grad_norm": 1.180981993675232,
      "learning_rate": 8.542859590282218e-06,
      "loss": 0.0362,
      "step": 462280
    },
    {
      "epoch": 0.7565640894719271,
      "grad_norm": 1.4021183252334595,
      "learning_rate": 8.5427936980687e-06,
      "loss": 0.0328,
      "step": 462300
    },
    {
      "epoch": 0.7565968199105805,
      "grad_norm": 1.8131664991378784,
      "learning_rate": 8.542727805855183e-06,
      "loss": 0.0382,
      "step": 462320
    },
    {
      "epoch": 0.7566295503492337,
      "grad_norm": 1.8578240871429443,
      "learning_rate": 8.542661913641665e-06,
      "loss": 0.032,
      "step": 462340
    },
    {
      "epoch": 0.7566622807878871,
      "grad_norm": 1.2898119688034058,
      "learning_rate": 8.542596021428149e-06,
      "loss": 0.0276,
      "step": 462360
    },
    {
      "epoch": 0.7566950112265405,
      "grad_norm": 0.6068586111068726,
      "learning_rate": 8.54253012921463e-06,
      "loss": 0.0209,
      "step": 462380
    },
    {
      "epoch": 0.7567277416651937,
      "grad_norm": 0.46527963876724243,
      "learning_rate": 8.542464237001114e-06,
      "loss": 0.0293,
      "step": 462400
    },
    {
      "epoch": 0.7567604721038471,
      "grad_norm": 1.7249674797058105,
      "learning_rate": 8.542398344787596e-06,
      "loss": 0.0293,
      "step": 462420
    },
    {
      "epoch": 0.7567932025425005,
      "grad_norm": 1.8739123344421387,
      "learning_rate": 8.54233245257408e-06,
      "loss": 0.0322,
      "step": 462440
    },
    {
      "epoch": 0.7568259329811539,
      "grad_norm": 0.6164631247520447,
      "learning_rate": 8.542266560360562e-06,
      "loss": 0.0281,
      "step": 462460
    },
    {
      "epoch": 0.7568586634198071,
      "grad_norm": 0.11047759652137756,
      "learning_rate": 8.542200668147045e-06,
      "loss": 0.0274,
      "step": 462480
    },
    {
      "epoch": 0.7568913938584605,
      "grad_norm": 1.3906337022781372,
      "learning_rate": 8.542134775933529e-06,
      "loss": 0.0295,
      "step": 462500
    },
    {
      "epoch": 0.7569241242971139,
      "grad_norm": 1.074728012084961,
      "learning_rate": 8.54206888372001e-06,
      "loss": 0.0323,
      "step": 462520
    },
    {
      "epoch": 0.7569568547357671,
      "grad_norm": 1.5394227504730225,
      "learning_rate": 8.542002991506494e-06,
      "loss": 0.0314,
      "step": 462540
    },
    {
      "epoch": 0.7569895851744205,
      "grad_norm": 0.14749547839164734,
      "learning_rate": 8.541937099292978e-06,
      "loss": 0.0421,
      "step": 462560
    },
    {
      "epoch": 0.7570223156130739,
      "grad_norm": 0.9279181361198425,
      "learning_rate": 8.54187120707946e-06,
      "loss": 0.0245,
      "step": 462580
    },
    {
      "epoch": 0.7570550460517271,
      "grad_norm": 2.1570355892181396,
      "learning_rate": 8.541805314865943e-06,
      "loss": 0.0356,
      "step": 462600
    },
    {
      "epoch": 0.7570877764903805,
      "grad_norm": 0.9212780594825745,
      "learning_rate": 8.541739422652427e-06,
      "loss": 0.029,
      "step": 462620
    },
    {
      "epoch": 0.7571205069290339,
      "grad_norm": 0.36627694964408875,
      "learning_rate": 8.541673530438909e-06,
      "loss": 0.0361,
      "step": 462640
    },
    {
      "epoch": 0.7571532373676872,
      "grad_norm": 1.074379563331604,
      "learning_rate": 8.541607638225393e-06,
      "loss": 0.0265,
      "step": 462660
    },
    {
      "epoch": 0.7571859678063405,
      "grad_norm": 2.346205472946167,
      "learning_rate": 8.541541746011874e-06,
      "loss": 0.0244,
      "step": 462680
    },
    {
      "epoch": 0.7572186982449939,
      "grad_norm": 0.6043941378593445,
      "learning_rate": 8.541475853798358e-06,
      "loss": 0.0297,
      "step": 462700
    },
    {
      "epoch": 0.7572514286836473,
      "grad_norm": 1.3936084508895874,
      "learning_rate": 8.54140996158484e-06,
      "loss": 0.0329,
      "step": 462720
    },
    {
      "epoch": 0.7572841591223005,
      "grad_norm": 2.446815252304077,
      "learning_rate": 8.541344069371324e-06,
      "loss": 0.0384,
      "step": 462740
    },
    {
      "epoch": 0.7573168895609539,
      "grad_norm": 1.0050095319747925,
      "learning_rate": 8.541278177157805e-06,
      "loss": 0.0298,
      "step": 462760
    },
    {
      "epoch": 0.7573496199996073,
      "grad_norm": 1.8362783193588257,
      "learning_rate": 8.541212284944289e-06,
      "loss": 0.0385,
      "step": 462780
    },
    {
      "epoch": 0.7573823504382605,
      "grad_norm": 0.6221219897270203,
      "learning_rate": 8.541146392730771e-06,
      "loss": 0.0251,
      "step": 462800
    },
    {
      "epoch": 0.7574150808769139,
      "grad_norm": 0.9891215562820435,
      "learning_rate": 8.541080500517254e-06,
      "loss": 0.0426,
      "step": 462820
    },
    {
      "epoch": 0.7574478113155673,
      "grad_norm": 1.5040913820266724,
      "learning_rate": 8.541014608303736e-06,
      "loss": 0.0282,
      "step": 462840
    },
    {
      "epoch": 0.7574805417542206,
      "grad_norm": 0.9254205822944641,
      "learning_rate": 8.54094871609022e-06,
      "loss": 0.0265,
      "step": 462860
    },
    {
      "epoch": 0.7575132721928739,
      "grad_norm": 1.0905877351760864,
      "learning_rate": 8.540882823876704e-06,
      "loss": 0.0279,
      "step": 462880
    },
    {
      "epoch": 0.7575460026315273,
      "grad_norm": 0.39072632789611816,
      "learning_rate": 8.540816931663185e-06,
      "loss": 0.0395,
      "step": 462900
    },
    {
      "epoch": 0.7575787330701806,
      "grad_norm": 0.2547779977321625,
      "learning_rate": 8.540751039449669e-06,
      "loss": 0.0341,
      "step": 462920
    },
    {
      "epoch": 0.7576114635088339,
      "grad_norm": 0.4271561801433563,
      "learning_rate": 8.540685147236153e-06,
      "loss": 0.0261,
      "step": 462940
    },
    {
      "epoch": 0.7576441939474873,
      "grad_norm": 0.9589815139770508,
      "learning_rate": 8.540619255022635e-06,
      "loss": 0.0317,
      "step": 462960
    },
    {
      "epoch": 0.7576769243861406,
      "grad_norm": 1.5955743789672852,
      "learning_rate": 8.540553362809118e-06,
      "loss": 0.0361,
      "step": 462980
    },
    {
      "epoch": 0.7577096548247939,
      "grad_norm": 1.4422166347503662,
      "learning_rate": 8.540487470595602e-06,
      "loss": 0.04,
      "step": 463000
    },
    {
      "epoch": 0.7577423852634473,
      "grad_norm": 3.370413303375244,
      "learning_rate": 8.540421578382084e-06,
      "loss": 0.0259,
      "step": 463020
    },
    {
      "epoch": 0.7577751157021007,
      "grad_norm": 1.1695932149887085,
      "learning_rate": 8.540355686168567e-06,
      "loss": 0.0364,
      "step": 463040
    },
    {
      "epoch": 0.757807846140754,
      "grad_norm": 0.3603701591491699,
      "learning_rate": 8.540289793955049e-06,
      "loss": 0.0246,
      "step": 463060
    },
    {
      "epoch": 0.7578405765794073,
      "grad_norm": 0.2951796352863312,
      "learning_rate": 8.540223901741533e-06,
      "loss": 0.0266,
      "step": 463080
    },
    {
      "epoch": 0.7578733070180607,
      "grad_norm": 0.5491712093353271,
      "learning_rate": 8.540158009528015e-06,
      "loss": 0.0339,
      "step": 463100
    },
    {
      "epoch": 0.757906037456714,
      "grad_norm": 0.5514310002326965,
      "learning_rate": 8.540092117314498e-06,
      "loss": 0.0339,
      "step": 463120
    },
    {
      "epoch": 0.7579387678953673,
      "grad_norm": 3.4069106578826904,
      "learning_rate": 8.54002622510098e-06,
      "loss": 0.0323,
      "step": 463140
    },
    {
      "epoch": 0.7579714983340207,
      "grad_norm": 0.5978263020515442,
      "learning_rate": 8.539960332887464e-06,
      "loss": 0.034,
      "step": 463160
    },
    {
      "epoch": 0.758004228772674,
      "grad_norm": 0.9294242858886719,
      "learning_rate": 8.539894440673945e-06,
      "loss": 0.0247,
      "step": 463180
    },
    {
      "epoch": 0.7580369592113273,
      "grad_norm": 1.166177749633789,
      "learning_rate": 8.539828548460429e-06,
      "loss": 0.0289,
      "step": 463200
    },
    {
      "epoch": 0.7580696896499807,
      "grad_norm": 1.6662803888320923,
      "learning_rate": 8.539762656246911e-06,
      "loss": 0.0457,
      "step": 463220
    },
    {
      "epoch": 0.758102420088634,
      "grad_norm": 2.751574993133545,
      "learning_rate": 8.539696764033395e-06,
      "loss": 0.0297,
      "step": 463240
    },
    {
      "epoch": 0.7581351505272874,
      "grad_norm": 0.2816751003265381,
      "learning_rate": 8.539630871819876e-06,
      "loss": 0.0336,
      "step": 463260
    },
    {
      "epoch": 0.7581678809659407,
      "grad_norm": 0.2917572259902954,
      "learning_rate": 8.53956497960636e-06,
      "loss": 0.0254,
      "step": 463280
    },
    {
      "epoch": 0.758200611404594,
      "grad_norm": 0.6626287698745728,
      "learning_rate": 8.539499087392844e-06,
      "loss": 0.0323,
      "step": 463300
    },
    {
      "epoch": 0.7582333418432474,
      "grad_norm": 1.021952748298645,
      "learning_rate": 8.539433195179326e-06,
      "loss": 0.0437,
      "step": 463320
    },
    {
      "epoch": 0.7582660722819007,
      "grad_norm": 0.9484269618988037,
      "learning_rate": 8.539367302965809e-06,
      "loss": 0.0387,
      "step": 463340
    },
    {
      "epoch": 0.758298802720554,
      "grad_norm": 0.29277023673057556,
      "learning_rate": 8.539301410752293e-06,
      "loss": 0.0338,
      "step": 463360
    },
    {
      "epoch": 0.7583315331592074,
      "grad_norm": 0.23933100700378418,
      "learning_rate": 8.539235518538775e-06,
      "loss": 0.0307,
      "step": 463380
    },
    {
      "epoch": 0.7583642635978607,
      "grad_norm": 0.44741085171699524,
      "learning_rate": 8.539169626325258e-06,
      "loss": 0.0313,
      "step": 463400
    },
    {
      "epoch": 0.7583969940365141,
      "grad_norm": 0.5097838640213013,
      "learning_rate": 8.539103734111742e-06,
      "loss": 0.0251,
      "step": 463420
    },
    {
      "epoch": 0.7584297244751674,
      "grad_norm": 0.6052950620651245,
      "learning_rate": 8.539037841898224e-06,
      "loss": 0.0347,
      "step": 463440
    },
    {
      "epoch": 0.7584624549138208,
      "grad_norm": 0.7082247138023376,
      "learning_rate": 8.538971949684707e-06,
      "loss": 0.0286,
      "step": 463460
    },
    {
      "epoch": 0.7584951853524741,
      "grad_norm": 1.2224770784378052,
      "learning_rate": 8.538906057471189e-06,
      "loss": 0.042,
      "step": 463480
    },
    {
      "epoch": 0.7585279157911274,
      "grad_norm": 0.8698090314865112,
      "learning_rate": 8.538840165257673e-06,
      "loss": 0.0325,
      "step": 463500
    },
    {
      "epoch": 0.7585606462297808,
      "grad_norm": 1.2929987907409668,
      "learning_rate": 8.538774273044155e-06,
      "loss": 0.0332,
      "step": 463520
    },
    {
      "epoch": 0.7585933766684341,
      "grad_norm": 0.5376071929931641,
      "learning_rate": 8.538708380830638e-06,
      "loss": 0.028,
      "step": 463540
    },
    {
      "epoch": 0.7586261071070874,
      "grad_norm": 1.0780882835388184,
      "learning_rate": 8.53864248861712e-06,
      "loss": 0.0363,
      "step": 463560
    },
    {
      "epoch": 0.7586588375457408,
      "grad_norm": 0.2801775634288788,
      "learning_rate": 8.538576596403604e-06,
      "loss": 0.0281,
      "step": 463580
    },
    {
      "epoch": 0.7586915679843941,
      "grad_norm": 1.0490731000900269,
      "learning_rate": 8.538510704190086e-06,
      "loss": 0.0287,
      "step": 463600
    },
    {
      "epoch": 0.7587242984230474,
      "grad_norm": 1.1118634939193726,
      "learning_rate": 8.538444811976569e-06,
      "loss": 0.0412,
      "step": 463620
    },
    {
      "epoch": 0.7587570288617008,
      "grad_norm": 1.3559215068817139,
      "learning_rate": 8.538378919763051e-06,
      "loss": 0.0231,
      "step": 463640
    },
    {
      "epoch": 0.7587897593003542,
      "grad_norm": 1.3457773923873901,
      "learning_rate": 8.538313027549535e-06,
      "loss": 0.032,
      "step": 463660
    },
    {
      "epoch": 0.7588224897390075,
      "grad_norm": 6.39864444732666,
      "learning_rate": 8.538247135336018e-06,
      "loss": 0.0275,
      "step": 463680
    },
    {
      "epoch": 0.7588552201776608,
      "grad_norm": 0.31200480461120605,
      "learning_rate": 8.5381812431225e-06,
      "loss": 0.0353,
      "step": 463700
    },
    {
      "epoch": 0.7588879506163142,
      "grad_norm": 1.2585537433624268,
      "learning_rate": 8.538115350908984e-06,
      "loss": 0.0322,
      "step": 463720
    },
    {
      "epoch": 0.7589206810549675,
      "grad_norm": 2.008023500442505,
      "learning_rate": 8.538049458695467e-06,
      "loss": 0.0406,
      "step": 463740
    },
    {
      "epoch": 0.7589534114936208,
      "grad_norm": 1.1344201564788818,
      "learning_rate": 8.53798356648195e-06,
      "loss": 0.0246,
      "step": 463760
    },
    {
      "epoch": 0.7589861419322742,
      "grad_norm": 1.280892252922058,
      "learning_rate": 8.537917674268433e-06,
      "loss": 0.0329,
      "step": 463780
    },
    {
      "epoch": 0.7590188723709275,
      "grad_norm": 2.3407886028289795,
      "learning_rate": 8.537851782054916e-06,
      "loss": 0.0365,
      "step": 463800
    },
    {
      "epoch": 0.7590516028095808,
      "grad_norm": 2.5879030227661133,
      "learning_rate": 8.537785889841398e-06,
      "loss": 0.0254,
      "step": 463820
    },
    {
      "epoch": 0.7590843332482342,
      "grad_norm": 2.5248379707336426,
      "learning_rate": 8.537719997627882e-06,
      "loss": 0.0219,
      "step": 463840
    },
    {
      "epoch": 0.7591170636868876,
      "grad_norm": 1.5004485845565796,
      "learning_rate": 8.537654105414364e-06,
      "loss": 0.0262,
      "step": 463860
    },
    {
      "epoch": 0.7591497941255408,
      "grad_norm": 0.5382384061813354,
      "learning_rate": 8.537588213200847e-06,
      "loss": 0.0317,
      "step": 463880
    },
    {
      "epoch": 0.7591825245641942,
      "grad_norm": 0.8390370607376099,
      "learning_rate": 8.53752232098733e-06,
      "loss": 0.0322,
      "step": 463900
    },
    {
      "epoch": 0.7592152550028476,
      "grad_norm": 0.5918781161308289,
      "learning_rate": 8.537456428773813e-06,
      "loss": 0.0212,
      "step": 463920
    },
    {
      "epoch": 0.7592479854415009,
      "grad_norm": 0.4905887544155121,
      "learning_rate": 8.537390536560295e-06,
      "loss": 0.0378,
      "step": 463940
    },
    {
      "epoch": 0.7592807158801542,
      "grad_norm": 0.32611003518104553,
      "learning_rate": 8.537324644346778e-06,
      "loss": 0.0229,
      "step": 463960
    },
    {
      "epoch": 0.7593134463188076,
      "grad_norm": 1.2489550113677979,
      "learning_rate": 8.53725875213326e-06,
      "loss": 0.0249,
      "step": 463980
    },
    {
      "epoch": 0.7593461767574609,
      "grad_norm": 1.436336636543274,
      "learning_rate": 8.537192859919744e-06,
      "loss": 0.0378,
      "step": 464000
    },
    {
      "epoch": 0.7593789071961142,
      "grad_norm": 2.0294737815856934,
      "learning_rate": 8.537126967706227e-06,
      "loss": 0.0266,
      "step": 464020
    },
    {
      "epoch": 0.7594116376347676,
      "grad_norm": 1.8406434059143066,
      "learning_rate": 8.53706107549271e-06,
      "loss": 0.0421,
      "step": 464040
    },
    {
      "epoch": 0.759444368073421,
      "grad_norm": 1.7027872800827026,
      "learning_rate": 8.536995183279193e-06,
      "loss": 0.033,
      "step": 464060
    },
    {
      "epoch": 0.7594770985120742,
      "grad_norm": 1.4964169263839722,
      "learning_rate": 8.536929291065675e-06,
      "loss": 0.0337,
      "step": 464080
    },
    {
      "epoch": 0.7595098289507276,
      "grad_norm": 0.41838738322257996,
      "learning_rate": 8.536863398852158e-06,
      "loss": 0.0272,
      "step": 464100
    },
    {
      "epoch": 0.759542559389381,
      "grad_norm": 1.7500886917114258,
      "learning_rate": 8.536797506638642e-06,
      "loss": 0.0332,
      "step": 464120
    },
    {
      "epoch": 0.7595752898280342,
      "grad_norm": 0.26485708355903625,
      "learning_rate": 8.536731614425124e-06,
      "loss": 0.0307,
      "step": 464140
    },
    {
      "epoch": 0.7596080202666876,
      "grad_norm": 1.9434301853179932,
      "learning_rate": 8.536665722211607e-06,
      "loss": 0.0328,
      "step": 464160
    },
    {
      "epoch": 0.759640750705341,
      "grad_norm": 1.7399710416793823,
      "learning_rate": 8.536599829998091e-06,
      "loss": 0.0282,
      "step": 464180
    },
    {
      "epoch": 0.7596734811439942,
      "grad_norm": 2.509058713912964,
      "learning_rate": 8.536533937784573e-06,
      "loss": 0.0291,
      "step": 464200
    },
    {
      "epoch": 0.7597062115826476,
      "grad_norm": 15.351947784423828,
      "learning_rate": 8.536468045571056e-06,
      "loss": 0.043,
      "step": 464220
    },
    {
      "epoch": 0.759738942021301,
      "grad_norm": 1.0565855503082275,
      "learning_rate": 8.536402153357538e-06,
      "loss": 0.0339,
      "step": 464240
    },
    {
      "epoch": 0.7597716724599544,
      "grad_norm": 1.4699068069458008,
      "learning_rate": 8.536336261144022e-06,
      "loss": 0.0318,
      "step": 464260
    },
    {
      "epoch": 0.7598044028986076,
      "grad_norm": 2.3395330905914307,
      "learning_rate": 8.536270368930504e-06,
      "loss": 0.0354,
      "step": 464280
    },
    {
      "epoch": 0.759837133337261,
      "grad_norm": 0.22829045355319977,
      "learning_rate": 8.536204476716987e-06,
      "loss": 0.0306,
      "step": 464300
    },
    {
      "epoch": 0.7598698637759144,
      "grad_norm": 0.4355300962924957,
      "learning_rate": 8.53613858450347e-06,
      "loss": 0.0263,
      "step": 464320
    },
    {
      "epoch": 0.7599025942145676,
      "grad_norm": 0.7549124956130981,
      "learning_rate": 8.536072692289953e-06,
      "loss": 0.0306,
      "step": 464340
    },
    {
      "epoch": 0.759935324653221,
      "grad_norm": 2.1927924156188965,
      "learning_rate": 8.536006800076435e-06,
      "loss": 0.0284,
      "step": 464360
    },
    {
      "epoch": 0.7599680550918744,
      "grad_norm": 1.2468023300170898,
      "learning_rate": 8.535940907862918e-06,
      "loss": 0.0222,
      "step": 464380
    },
    {
      "epoch": 0.7600007855305276,
      "grad_norm": 1.7192732095718384,
      "learning_rate": 8.535875015649402e-06,
      "loss": 0.0337,
      "step": 464400
    },
    {
      "epoch": 0.760033515969181,
      "grad_norm": 0.4680330157279968,
      "learning_rate": 8.535809123435884e-06,
      "loss": 0.0304,
      "step": 464420
    },
    {
      "epoch": 0.7600662464078344,
      "grad_norm": 0.8086274862289429,
      "learning_rate": 8.535743231222367e-06,
      "loss": 0.0262,
      "step": 464440
    },
    {
      "epoch": 0.7600989768464878,
      "grad_norm": 0.5843347907066345,
      "learning_rate": 8.53567733900885e-06,
      "loss": 0.0279,
      "step": 464460
    },
    {
      "epoch": 0.760131707285141,
      "grad_norm": 2.0423858165740967,
      "learning_rate": 8.535611446795333e-06,
      "loss": 0.0263,
      "step": 464480
    },
    {
      "epoch": 0.7601644377237944,
      "grad_norm": 1.016381859779358,
      "learning_rate": 8.535545554581815e-06,
      "loss": 0.0374,
      "step": 464500
    },
    {
      "epoch": 0.7601971681624478,
      "grad_norm": 0.611481785774231,
      "learning_rate": 8.535479662368298e-06,
      "loss": 0.0263,
      "step": 464520
    },
    {
      "epoch": 0.760229898601101,
      "grad_norm": 0.8322532176971436,
      "learning_rate": 8.535413770154782e-06,
      "loss": 0.0276,
      "step": 464540
    },
    {
      "epoch": 0.7602626290397544,
      "grad_norm": 1.4424233436584473,
      "learning_rate": 8.535347877941264e-06,
      "loss": 0.0188,
      "step": 464560
    },
    {
      "epoch": 0.7602953594784078,
      "grad_norm": 1.530963659286499,
      "learning_rate": 8.535281985727747e-06,
      "loss": 0.0346,
      "step": 464580
    },
    {
      "epoch": 0.760328089917061,
      "grad_norm": 3.407104969024658,
      "learning_rate": 8.535216093514231e-06,
      "loss": 0.041,
      "step": 464600
    },
    {
      "epoch": 0.7603608203557144,
      "grad_norm": 1.504902720451355,
      "learning_rate": 8.535150201300713e-06,
      "loss": 0.0298,
      "step": 464620
    },
    {
      "epoch": 0.7603935507943678,
      "grad_norm": 5.587480068206787,
      "learning_rate": 8.535084309087197e-06,
      "loss": 0.0255,
      "step": 464640
    },
    {
      "epoch": 0.7604262812330211,
      "grad_norm": 0.3547353446483612,
      "learning_rate": 8.535018416873678e-06,
      "loss": 0.0238,
      "step": 464660
    },
    {
      "epoch": 0.7604590116716744,
      "grad_norm": 1.5261131525039673,
      "learning_rate": 8.534952524660162e-06,
      "loss": 0.0393,
      "step": 464680
    },
    {
      "epoch": 0.7604917421103278,
      "grad_norm": 1.7079218626022339,
      "learning_rate": 8.534886632446644e-06,
      "loss": 0.0272,
      "step": 464700
    },
    {
      "epoch": 0.7605244725489811,
      "grad_norm": 0.27058684825897217,
      "learning_rate": 8.534820740233127e-06,
      "loss": 0.0385,
      "step": 464720
    },
    {
      "epoch": 0.7605572029876344,
      "grad_norm": 1.9357856512069702,
      "learning_rate": 8.534754848019611e-06,
      "loss": 0.0344,
      "step": 464740
    },
    {
      "epoch": 0.7605899334262878,
      "grad_norm": 2.330711603164673,
      "learning_rate": 8.534688955806093e-06,
      "loss": 0.0339,
      "step": 464760
    },
    {
      "epoch": 0.7606226638649412,
      "grad_norm": 1.8291714191436768,
      "learning_rate": 8.534623063592577e-06,
      "loss": 0.0301,
      "step": 464780
    },
    {
      "epoch": 0.7606553943035944,
      "grad_norm": 0.7821352481842041,
      "learning_rate": 8.534557171379058e-06,
      "loss": 0.0357,
      "step": 464800
    },
    {
      "epoch": 0.7606881247422478,
      "grad_norm": 0.23667453229427338,
      "learning_rate": 8.534491279165542e-06,
      "loss": 0.0277,
      "step": 464820
    },
    {
      "epoch": 0.7607208551809012,
      "grad_norm": 1.4292235374450684,
      "learning_rate": 8.534425386952024e-06,
      "loss": 0.0343,
      "step": 464840
    },
    {
      "epoch": 0.7607535856195545,
      "grad_norm": 0.6000710129737854,
      "learning_rate": 8.534359494738507e-06,
      "loss": 0.0214,
      "step": 464860
    },
    {
      "epoch": 0.7607863160582078,
      "grad_norm": 0.8018075227737427,
      "learning_rate": 8.53429360252499e-06,
      "loss": 0.0268,
      "step": 464880
    },
    {
      "epoch": 0.7608190464968612,
      "grad_norm": 0.615344762802124,
      "learning_rate": 8.534227710311473e-06,
      "loss": 0.0288,
      "step": 464900
    },
    {
      "epoch": 0.7608517769355145,
      "grad_norm": 0.709245502948761,
      "learning_rate": 8.534161818097957e-06,
      "loss": 0.0276,
      "step": 464920
    },
    {
      "epoch": 0.7608845073741678,
      "grad_norm": 0.44506287574768066,
      "learning_rate": 8.534095925884438e-06,
      "loss": 0.0357,
      "step": 464940
    },
    {
      "epoch": 0.7609172378128212,
      "grad_norm": 0.7302281856536865,
      "learning_rate": 8.534030033670922e-06,
      "loss": 0.0275,
      "step": 464960
    },
    {
      "epoch": 0.7609499682514745,
      "grad_norm": 1.4646472930908203,
      "learning_rate": 8.533964141457406e-06,
      "loss": 0.0331,
      "step": 464980
    },
    {
      "epoch": 0.7609826986901278,
      "grad_norm": 2.460373878479004,
      "learning_rate": 8.533898249243888e-06,
      "loss": 0.0332,
      "step": 465000
    },
    {
      "epoch": 0.7610154291287812,
      "grad_norm": 0.5277031660079956,
      "learning_rate": 8.533832357030371e-06,
      "loss": 0.0291,
      "step": 465020
    },
    {
      "epoch": 0.7610481595674345,
      "grad_norm": 0.13365933299064636,
      "learning_rate": 8.533766464816853e-06,
      "loss": 0.0341,
      "step": 465040
    },
    {
      "epoch": 0.7610808900060878,
      "grad_norm": 3.7057459354400635,
      "learning_rate": 8.533700572603337e-06,
      "loss": 0.021,
      "step": 465060
    },
    {
      "epoch": 0.7611136204447412,
      "grad_norm": 0.44637495279312134,
      "learning_rate": 8.53363468038982e-06,
      "loss": 0.0279,
      "step": 465080
    },
    {
      "epoch": 0.7611463508833946,
      "grad_norm": 0.9197227358818054,
      "learning_rate": 8.533568788176302e-06,
      "loss": 0.0343,
      "step": 465100
    },
    {
      "epoch": 0.7611790813220479,
      "grad_norm": 0.4548499882221222,
      "learning_rate": 8.533502895962786e-06,
      "loss": 0.0265,
      "step": 465120
    },
    {
      "epoch": 0.7612118117607012,
      "grad_norm": 0.6593096256256104,
      "learning_rate": 8.533437003749268e-06,
      "loss": 0.0318,
      "step": 465140
    },
    {
      "epoch": 0.7612445421993546,
      "grad_norm": 0.3944980800151825,
      "learning_rate": 8.533371111535751e-06,
      "loss": 0.0242,
      "step": 465160
    },
    {
      "epoch": 0.7612772726380079,
      "grad_norm": 0.4818074107170105,
      "learning_rate": 8.533305219322233e-06,
      "loss": 0.0245,
      "step": 465180
    },
    {
      "epoch": 0.7613100030766612,
      "grad_norm": 0.6859666705131531,
      "learning_rate": 8.533239327108717e-06,
      "loss": 0.025,
      "step": 465200
    },
    {
      "epoch": 0.7613427335153146,
      "grad_norm": 1.637237787246704,
      "learning_rate": 8.533173434895198e-06,
      "loss": 0.0305,
      "step": 465220
    },
    {
      "epoch": 0.7613754639539679,
      "grad_norm": 1.972835898399353,
      "learning_rate": 8.533107542681682e-06,
      "loss": 0.0396,
      "step": 465240
    },
    {
      "epoch": 0.7614081943926212,
      "grad_norm": 1.1922521591186523,
      "learning_rate": 8.533041650468164e-06,
      "loss": 0.0291,
      "step": 465260
    },
    {
      "epoch": 0.7614409248312746,
      "grad_norm": 1.470075011253357,
      "learning_rate": 8.532975758254648e-06,
      "loss": 0.0295,
      "step": 465280
    },
    {
      "epoch": 0.7614736552699279,
      "grad_norm": 0.7803766131401062,
      "learning_rate": 8.53290986604113e-06,
      "loss": 0.0313,
      "step": 465300
    },
    {
      "epoch": 0.7615063857085813,
      "grad_norm": 0.3852885663509369,
      "learning_rate": 8.532843973827613e-06,
      "loss": 0.0379,
      "step": 465320
    },
    {
      "epoch": 0.7615391161472346,
      "grad_norm": 1.7791333198547363,
      "learning_rate": 8.532778081614097e-06,
      "loss": 0.0319,
      "step": 465340
    },
    {
      "epoch": 0.761571846585888,
      "grad_norm": 0.768618106842041,
      "learning_rate": 8.532712189400579e-06,
      "loss": 0.0397,
      "step": 465360
    },
    {
      "epoch": 0.7616045770245413,
      "grad_norm": 1.2069475650787354,
      "learning_rate": 8.532646297187062e-06,
      "loss": 0.0318,
      "step": 465380
    },
    {
      "epoch": 0.7616373074631946,
      "grad_norm": 1.0692055225372314,
      "learning_rate": 8.532580404973546e-06,
      "loss": 0.0172,
      "step": 465400
    },
    {
      "epoch": 0.761670037901848,
      "grad_norm": 1.332787275314331,
      "learning_rate": 8.532514512760028e-06,
      "loss": 0.0372,
      "step": 465420
    },
    {
      "epoch": 0.7617027683405013,
      "grad_norm": 1.9147562980651855,
      "learning_rate": 8.532448620546511e-06,
      "loss": 0.0274,
      "step": 465440
    },
    {
      "epoch": 0.7617354987791546,
      "grad_norm": 0.4033743739128113,
      "learning_rate": 8.532382728332995e-06,
      "loss": 0.0419,
      "step": 465460
    },
    {
      "epoch": 0.761768229217808,
      "grad_norm": 1.1965923309326172,
      "learning_rate": 8.532316836119477e-06,
      "loss": 0.0342,
      "step": 465480
    },
    {
      "epoch": 0.7618009596564613,
      "grad_norm": 0.6626724600791931,
      "learning_rate": 8.53225094390596e-06,
      "loss": 0.0266,
      "step": 465500
    },
    {
      "epoch": 0.7618336900951147,
      "grad_norm": 0.6201573610305786,
      "learning_rate": 8.532185051692442e-06,
      "loss": 0.0306,
      "step": 465520
    },
    {
      "epoch": 0.761866420533768,
      "grad_norm": 0.5167562961578369,
      "learning_rate": 8.532119159478926e-06,
      "loss": 0.0286,
      "step": 465540
    },
    {
      "epoch": 0.7618991509724213,
      "grad_norm": 0.41624537110328674,
      "learning_rate": 8.532053267265408e-06,
      "loss": 0.0282,
      "step": 465560
    },
    {
      "epoch": 0.7619318814110747,
      "grad_norm": 0.44561144709587097,
      "learning_rate": 8.531987375051891e-06,
      "loss": 0.0227,
      "step": 465580
    },
    {
      "epoch": 0.761964611849728,
      "grad_norm": 1.8154001235961914,
      "learning_rate": 8.531921482838373e-06,
      "loss": 0.035,
      "step": 465600
    },
    {
      "epoch": 0.7619973422883813,
      "grad_norm": 1.8550573587417603,
      "learning_rate": 8.531855590624857e-06,
      "loss": 0.035,
      "step": 465620
    },
    {
      "epoch": 0.7620300727270347,
      "grad_norm": 1.2844383716583252,
      "learning_rate": 8.531789698411339e-06,
      "loss": 0.0228,
      "step": 465640
    },
    {
      "epoch": 0.762062803165688,
      "grad_norm": 1.1720274686813354,
      "learning_rate": 8.531723806197822e-06,
      "loss": 0.0289,
      "step": 465660
    },
    {
      "epoch": 0.7620955336043413,
      "grad_norm": 0.8058829307556152,
      "learning_rate": 8.531657913984304e-06,
      "loss": 0.046,
      "step": 465680
    },
    {
      "epoch": 0.7621282640429947,
      "grad_norm": 1.558860182762146,
      "learning_rate": 8.531592021770788e-06,
      "loss": 0.0382,
      "step": 465700
    },
    {
      "epoch": 0.7621609944816481,
      "grad_norm": 0.43269696831703186,
      "learning_rate": 8.531526129557271e-06,
      "loss": 0.033,
      "step": 465720
    },
    {
      "epoch": 0.7621937249203014,
      "grad_norm": 0.859704852104187,
      "learning_rate": 8.531460237343753e-06,
      "loss": 0.0356,
      "step": 465740
    },
    {
      "epoch": 0.7622264553589547,
      "grad_norm": 1.5967333316802979,
      "learning_rate": 8.531394345130237e-06,
      "loss": 0.0329,
      "step": 465760
    },
    {
      "epoch": 0.7622591857976081,
      "grad_norm": 1.7991571426391602,
      "learning_rate": 8.53132845291672e-06,
      "loss": 0.0241,
      "step": 465780
    },
    {
      "epoch": 0.7622919162362614,
      "grad_norm": 0.3909567892551422,
      "learning_rate": 8.531262560703202e-06,
      "loss": 0.0379,
      "step": 465800
    },
    {
      "epoch": 0.7623246466749147,
      "grad_norm": 0.4577280879020691,
      "learning_rate": 8.531196668489686e-06,
      "loss": 0.028,
      "step": 465820
    },
    {
      "epoch": 0.7623573771135681,
      "grad_norm": 0.5312709808349609,
      "learning_rate": 8.53113077627617e-06,
      "loss": 0.0284,
      "step": 465840
    },
    {
      "epoch": 0.7623901075522214,
      "grad_norm": 2.054064989089966,
      "learning_rate": 8.531064884062651e-06,
      "loss": 0.0302,
      "step": 465860
    },
    {
      "epoch": 0.7624228379908747,
      "grad_norm": 0.8066231608390808,
      "learning_rate": 8.530998991849135e-06,
      "loss": 0.0329,
      "step": 465880
    },
    {
      "epoch": 0.7624555684295281,
      "grad_norm": 1.624433159828186,
      "learning_rate": 8.530933099635617e-06,
      "loss": 0.0305,
      "step": 465900
    },
    {
      "epoch": 0.7624882988681815,
      "grad_norm": 0.645211935043335,
      "learning_rate": 8.5308672074221e-06,
      "loss": 0.0284,
      "step": 465920
    },
    {
      "epoch": 0.7625210293068347,
      "grad_norm": 0.656311571598053,
      "learning_rate": 8.530801315208582e-06,
      "loss": 0.0327,
      "step": 465940
    },
    {
      "epoch": 0.7625537597454881,
      "grad_norm": 0.8626009225845337,
      "learning_rate": 8.530735422995066e-06,
      "loss": 0.0323,
      "step": 465960
    },
    {
      "epoch": 0.7625864901841415,
      "grad_norm": 0.9649755954742432,
      "learning_rate": 8.530669530781548e-06,
      "loss": 0.0267,
      "step": 465980
    },
    {
      "epoch": 0.7626192206227947,
      "grad_norm": 0.7592427730560303,
      "learning_rate": 8.530603638568031e-06,
      "loss": 0.0351,
      "step": 466000
    },
    {
      "epoch": 0.7626519510614481,
      "grad_norm": 0.33341681957244873,
      "learning_rate": 8.530537746354513e-06,
      "loss": 0.0383,
      "step": 466020
    },
    {
      "epoch": 0.7626846815001015,
      "grad_norm": 3.289092540740967,
      "learning_rate": 8.530471854140997e-06,
      "loss": 0.0435,
      "step": 466040
    },
    {
      "epoch": 0.7627174119387548,
      "grad_norm": 4.287530899047852,
      "learning_rate": 8.530405961927479e-06,
      "loss": 0.0308,
      "step": 466060
    },
    {
      "epoch": 0.7627501423774081,
      "grad_norm": 0.2432248443365097,
      "learning_rate": 8.530340069713962e-06,
      "loss": 0.0267,
      "step": 466080
    },
    {
      "epoch": 0.7627828728160615,
      "grad_norm": 0.6227967143058777,
      "learning_rate": 8.530274177500444e-06,
      "loss": 0.0278,
      "step": 466100
    },
    {
      "epoch": 0.7628156032547149,
      "grad_norm": 0.8174148797988892,
      "learning_rate": 8.530208285286928e-06,
      "loss": 0.0225,
      "step": 466120
    },
    {
      "epoch": 0.7628483336933681,
      "grad_norm": 0.4181215465068817,
      "learning_rate": 8.530142393073411e-06,
      "loss": 0.0259,
      "step": 466140
    },
    {
      "epoch": 0.7628810641320215,
      "grad_norm": 1.3339424133300781,
      "learning_rate": 8.530076500859893e-06,
      "loss": 0.0402,
      "step": 466160
    },
    {
      "epoch": 0.7629137945706749,
      "grad_norm": 1.292206883430481,
      "learning_rate": 8.530010608646377e-06,
      "loss": 0.0287,
      "step": 466180
    },
    {
      "epoch": 0.7629465250093281,
      "grad_norm": 0.8661595582962036,
      "learning_rate": 8.52994471643286e-06,
      "loss": 0.0288,
      "step": 466200
    },
    {
      "epoch": 0.7629792554479815,
      "grad_norm": 1.1151612997055054,
      "learning_rate": 8.529878824219342e-06,
      "loss": 0.0241,
      "step": 466220
    },
    {
      "epoch": 0.7630119858866349,
      "grad_norm": 0.8939868807792664,
      "learning_rate": 8.529812932005826e-06,
      "loss": 0.0375,
      "step": 466240
    },
    {
      "epoch": 0.7630447163252881,
      "grad_norm": 0.5692499279975891,
      "learning_rate": 8.52974703979231e-06,
      "loss": 0.0278,
      "step": 466260
    },
    {
      "epoch": 0.7630774467639415,
      "grad_norm": 0.678957462310791,
      "learning_rate": 8.529681147578791e-06,
      "loss": 0.0303,
      "step": 466280
    },
    {
      "epoch": 0.7631101772025949,
      "grad_norm": 1.0277546644210815,
      "learning_rate": 8.529615255365275e-06,
      "loss": 0.0267,
      "step": 466300
    },
    {
      "epoch": 0.7631429076412483,
      "grad_norm": 1.1039032936096191,
      "learning_rate": 8.529549363151757e-06,
      "loss": 0.0375,
      "step": 466320
    },
    {
      "epoch": 0.7631756380799015,
      "grad_norm": 0.5001305937767029,
      "learning_rate": 8.52948347093824e-06,
      "loss": 0.0347,
      "step": 466340
    },
    {
      "epoch": 0.7632083685185549,
      "grad_norm": 0.6714792847633362,
      "learning_rate": 8.529417578724722e-06,
      "loss": 0.0309,
      "step": 466360
    },
    {
      "epoch": 0.7632410989572083,
      "grad_norm": 1.4379879236221313,
      "learning_rate": 8.529351686511206e-06,
      "loss": 0.0462,
      "step": 466380
    },
    {
      "epoch": 0.7632738293958615,
      "grad_norm": 1.1669118404388428,
      "learning_rate": 8.529285794297688e-06,
      "loss": 0.0313,
      "step": 466400
    },
    {
      "epoch": 0.7633065598345149,
      "grad_norm": 1.928510308265686,
      "learning_rate": 8.529219902084171e-06,
      "loss": 0.032,
      "step": 466420
    },
    {
      "epoch": 0.7633392902731683,
      "grad_norm": 2.697666645050049,
      "learning_rate": 8.529154009870653e-06,
      "loss": 0.0276,
      "step": 466440
    },
    {
      "epoch": 0.7633720207118215,
      "grad_norm": 1.5979928970336914,
      "learning_rate": 8.529088117657137e-06,
      "loss": 0.044,
      "step": 466460
    },
    {
      "epoch": 0.7634047511504749,
      "grad_norm": 2.021350383758545,
      "learning_rate": 8.52902222544362e-06,
      "loss": 0.0329,
      "step": 466480
    },
    {
      "epoch": 0.7634374815891283,
      "grad_norm": 1.0940691232681274,
      "learning_rate": 8.528956333230102e-06,
      "loss": 0.0335,
      "step": 466500
    },
    {
      "epoch": 0.7634702120277816,
      "grad_norm": 0.6722792387008667,
      "learning_rate": 8.528890441016586e-06,
      "loss": 0.0335,
      "step": 466520
    },
    {
      "epoch": 0.7635029424664349,
      "grad_norm": 0.3050242066383362,
      "learning_rate": 8.528824548803068e-06,
      "loss": 0.0327,
      "step": 466540
    },
    {
      "epoch": 0.7635356729050883,
      "grad_norm": 1.3730493783950806,
      "learning_rate": 8.528758656589551e-06,
      "loss": 0.0218,
      "step": 466560
    },
    {
      "epoch": 0.7635684033437417,
      "grad_norm": 0.6258889436721802,
      "learning_rate": 8.528692764376035e-06,
      "loss": 0.0374,
      "step": 466580
    },
    {
      "epoch": 0.7636011337823949,
      "grad_norm": 1.6508291959762573,
      "learning_rate": 8.528626872162517e-06,
      "loss": 0.0369,
      "step": 466600
    },
    {
      "epoch": 0.7636338642210483,
      "grad_norm": 1.6663583517074585,
      "learning_rate": 8.528560979949e-06,
      "loss": 0.0378,
      "step": 466620
    },
    {
      "epoch": 0.7636665946597017,
      "grad_norm": 0.8732967972755432,
      "learning_rate": 8.528495087735484e-06,
      "loss": 0.0355,
      "step": 466640
    },
    {
      "epoch": 0.7636993250983549,
      "grad_norm": 0.6074438095092773,
      "learning_rate": 8.528429195521966e-06,
      "loss": 0.0313,
      "step": 466660
    },
    {
      "epoch": 0.7637320555370083,
      "grad_norm": 2.225287914276123,
      "learning_rate": 8.52836330330845e-06,
      "loss": 0.0264,
      "step": 466680
    },
    {
      "epoch": 0.7637647859756617,
      "grad_norm": 0.847572386264801,
      "learning_rate": 8.528297411094931e-06,
      "loss": 0.036,
      "step": 466700
    },
    {
      "epoch": 0.763797516414315,
      "grad_norm": 1.8724147081375122,
      "learning_rate": 8.528231518881415e-06,
      "loss": 0.0385,
      "step": 466720
    },
    {
      "epoch": 0.7638302468529683,
      "grad_norm": 0.6734867691993713,
      "learning_rate": 8.528165626667897e-06,
      "loss": 0.0309,
      "step": 466740
    },
    {
      "epoch": 0.7638629772916217,
      "grad_norm": 0.41674792766571045,
      "learning_rate": 8.52809973445438e-06,
      "loss": 0.0271,
      "step": 466760
    },
    {
      "epoch": 0.763895707730275,
      "grad_norm": 0.4221387207508087,
      "learning_rate": 8.528033842240862e-06,
      "loss": 0.0417,
      "step": 466780
    },
    {
      "epoch": 0.7639284381689283,
      "grad_norm": 1.6098010540008545,
      "learning_rate": 8.527967950027346e-06,
      "loss": 0.0421,
      "step": 466800
    },
    {
      "epoch": 0.7639611686075817,
      "grad_norm": 0.6621679663658142,
      "learning_rate": 8.527902057813828e-06,
      "loss": 0.0324,
      "step": 466820
    },
    {
      "epoch": 0.763993899046235,
      "grad_norm": 0.1945698857307434,
      "learning_rate": 8.527836165600311e-06,
      "loss": 0.0319,
      "step": 466840
    },
    {
      "epoch": 0.7640266294848883,
      "grad_norm": 0.630993127822876,
      "learning_rate": 8.527770273386795e-06,
      "loss": 0.0246,
      "step": 466860
    },
    {
      "epoch": 0.7640593599235417,
      "grad_norm": 1.0260626077651978,
      "learning_rate": 8.527704381173277e-06,
      "loss": 0.0364,
      "step": 466880
    },
    {
      "epoch": 0.764092090362195,
      "grad_norm": 0.25884586572647095,
      "learning_rate": 8.52763848895976e-06,
      "loss": 0.0356,
      "step": 466900
    },
    {
      "epoch": 0.7641248208008484,
      "grad_norm": 0.9056327939033508,
      "learning_rate": 8.527572596746242e-06,
      "loss": 0.025,
      "step": 466920
    },
    {
      "epoch": 0.7641575512395017,
      "grad_norm": 2.3222005367279053,
      "learning_rate": 8.527506704532726e-06,
      "loss": 0.0302,
      "step": 466940
    },
    {
      "epoch": 0.7641902816781551,
      "grad_norm": 0.9550817608833313,
      "learning_rate": 8.52744081231921e-06,
      "loss": 0.0267,
      "step": 466960
    },
    {
      "epoch": 0.7642230121168084,
      "grad_norm": 4.034316062927246,
      "learning_rate": 8.527374920105691e-06,
      "loss": 0.0326,
      "step": 466980
    },
    {
      "epoch": 0.7642557425554617,
      "grad_norm": 0.9208828210830688,
      "learning_rate": 8.527309027892175e-06,
      "loss": 0.0251,
      "step": 467000
    },
    {
      "epoch": 0.7642884729941151,
      "grad_norm": 1.8439887762069702,
      "learning_rate": 8.527243135678659e-06,
      "loss": 0.0331,
      "step": 467020
    },
    {
      "epoch": 0.7643212034327684,
      "grad_norm": 0.6439366936683655,
      "learning_rate": 8.52717724346514e-06,
      "loss": 0.032,
      "step": 467040
    },
    {
      "epoch": 0.7643539338714217,
      "grad_norm": 0.21004550158977509,
      "learning_rate": 8.527111351251624e-06,
      "loss": 0.0271,
      "step": 467060
    },
    {
      "epoch": 0.7643866643100751,
      "grad_norm": 0.7984306812286377,
      "learning_rate": 8.527045459038106e-06,
      "loss": 0.0279,
      "step": 467080
    },
    {
      "epoch": 0.7644193947487284,
      "grad_norm": 3.1493494510650635,
      "learning_rate": 8.52697956682459e-06,
      "loss": 0.0293,
      "step": 467100
    },
    {
      "epoch": 0.7644521251873818,
      "grad_norm": 0.553463339805603,
      "learning_rate": 8.526913674611071e-06,
      "loss": 0.0277,
      "step": 467120
    },
    {
      "epoch": 0.7644848556260351,
      "grad_norm": 0.9404027462005615,
      "learning_rate": 8.526847782397555e-06,
      "loss": 0.036,
      "step": 467140
    },
    {
      "epoch": 0.7645175860646884,
      "grad_norm": 0.3010491132736206,
      "learning_rate": 8.526781890184037e-06,
      "loss": 0.0294,
      "step": 467160
    },
    {
      "epoch": 0.7645503165033418,
      "grad_norm": 0.7538252472877502,
      "learning_rate": 8.52671599797052e-06,
      "loss": 0.0387,
      "step": 467180
    },
    {
      "epoch": 0.7645830469419951,
      "grad_norm": 4.714673042297363,
      "learning_rate": 8.526650105757004e-06,
      "loss": 0.0243,
      "step": 467200
    },
    {
      "epoch": 0.7646157773806485,
      "grad_norm": 1.502851128578186,
      "learning_rate": 8.526584213543486e-06,
      "loss": 0.0258,
      "step": 467220
    },
    {
      "epoch": 0.7646485078193018,
      "grad_norm": 1.1512867212295532,
      "learning_rate": 8.52651832132997e-06,
      "loss": 0.0184,
      "step": 467240
    },
    {
      "epoch": 0.7646812382579551,
      "grad_norm": 1.0366411209106445,
      "learning_rate": 8.526452429116452e-06,
      "loss": 0.0281,
      "step": 467260
    },
    {
      "epoch": 0.7647139686966085,
      "grad_norm": 0.8814746737480164,
      "learning_rate": 8.526386536902935e-06,
      "loss": 0.0402,
      "step": 467280
    },
    {
      "epoch": 0.7647466991352618,
      "grad_norm": 0.7766980528831482,
      "learning_rate": 8.526320644689417e-06,
      "loss": 0.0296,
      "step": 467300
    },
    {
      "epoch": 0.7647794295739152,
      "grad_norm": 0.3566245138645172,
      "learning_rate": 8.5262547524759e-06,
      "loss": 0.0291,
      "step": 467320
    },
    {
      "epoch": 0.7648121600125685,
      "grad_norm": 2.9628348350524902,
      "learning_rate": 8.526188860262382e-06,
      "loss": 0.0291,
      "step": 467340
    },
    {
      "epoch": 0.7648448904512218,
      "grad_norm": 1.671581745147705,
      "learning_rate": 8.526122968048866e-06,
      "loss": 0.0322,
      "step": 467360
    },
    {
      "epoch": 0.7648776208898752,
      "grad_norm": 0.5575971007347107,
      "learning_rate": 8.52605707583535e-06,
      "loss": 0.0336,
      "step": 467380
    },
    {
      "epoch": 0.7649103513285285,
      "grad_norm": 0.4261099100112915,
      "learning_rate": 8.525991183621832e-06,
      "loss": 0.0275,
      "step": 467400
    },
    {
      "epoch": 0.7649430817671818,
      "grad_norm": 1.433733582496643,
      "learning_rate": 8.525925291408315e-06,
      "loss": 0.0466,
      "step": 467420
    },
    {
      "epoch": 0.7649758122058352,
      "grad_norm": 1.5386030673980713,
      "learning_rate": 8.525859399194799e-06,
      "loss": 0.0255,
      "step": 467440
    },
    {
      "epoch": 0.7650085426444885,
      "grad_norm": 1.1368268728256226,
      "learning_rate": 8.52579350698128e-06,
      "loss": 0.0418,
      "step": 467460
    },
    {
      "epoch": 0.7650412730831418,
      "grad_norm": 2.556743621826172,
      "learning_rate": 8.525727614767764e-06,
      "loss": 0.0336,
      "step": 467480
    },
    {
      "epoch": 0.7650740035217952,
      "grad_norm": 1.0041277408599854,
      "learning_rate": 8.525661722554246e-06,
      "loss": 0.0261,
      "step": 467500
    },
    {
      "epoch": 0.7651067339604486,
      "grad_norm": 0.3691032826900482,
      "learning_rate": 8.52559583034073e-06,
      "loss": 0.0166,
      "step": 467520
    },
    {
      "epoch": 0.7651394643991019,
      "grad_norm": 0.6661215424537659,
      "learning_rate": 8.525529938127213e-06,
      "loss": 0.0312,
      "step": 467540
    },
    {
      "epoch": 0.7651721948377552,
      "grad_norm": 0.2550613582134247,
      "learning_rate": 8.525464045913695e-06,
      "loss": 0.0366,
      "step": 467560
    },
    {
      "epoch": 0.7652049252764086,
      "grad_norm": 1.2550841569900513,
      "learning_rate": 8.525398153700179e-06,
      "loss": 0.0318,
      "step": 467580
    },
    {
      "epoch": 0.7652376557150619,
      "grad_norm": 2.683715581893921,
      "learning_rate": 8.52533226148666e-06,
      "loss": 0.0273,
      "step": 467600
    },
    {
      "epoch": 0.7652703861537152,
      "grad_norm": 1.665777564048767,
      "learning_rate": 8.525266369273144e-06,
      "loss": 0.0351,
      "step": 467620
    },
    {
      "epoch": 0.7653031165923686,
      "grad_norm": 0.917948305606842,
      "learning_rate": 8.525200477059626e-06,
      "loss": 0.0342,
      "step": 467640
    },
    {
      "epoch": 0.7653358470310219,
      "grad_norm": 0.4473251700401306,
      "learning_rate": 8.52513458484611e-06,
      "loss": 0.0309,
      "step": 467660
    },
    {
      "epoch": 0.7653685774696752,
      "grad_norm": 2.445472478866577,
      "learning_rate": 8.525068692632592e-06,
      "loss": 0.0332,
      "step": 467680
    },
    {
      "epoch": 0.7654013079083286,
      "grad_norm": 0.4149326980113983,
      "learning_rate": 8.525002800419075e-06,
      "loss": 0.0222,
      "step": 467700
    },
    {
      "epoch": 0.7654340383469819,
      "grad_norm": 1.6101323366165161,
      "learning_rate": 8.524936908205557e-06,
      "loss": 0.0383,
      "step": 467720
    },
    {
      "epoch": 0.7654667687856352,
      "grad_norm": 1.406620979309082,
      "learning_rate": 8.52487101599204e-06,
      "loss": 0.0259,
      "step": 467740
    },
    {
      "epoch": 0.7654994992242886,
      "grad_norm": 0.8232815265655518,
      "learning_rate": 8.524805123778524e-06,
      "loss": 0.0198,
      "step": 467760
    },
    {
      "epoch": 0.765532229662942,
      "grad_norm": 0.952029287815094,
      "learning_rate": 8.524739231565006e-06,
      "loss": 0.0286,
      "step": 467780
    },
    {
      "epoch": 0.7655649601015952,
      "grad_norm": 0.30548378825187683,
      "learning_rate": 8.52467333935149e-06,
      "loss": 0.0396,
      "step": 467800
    },
    {
      "epoch": 0.7655976905402486,
      "grad_norm": 1.6339818239212036,
      "learning_rate": 8.524607447137973e-06,
      "loss": 0.0352,
      "step": 467820
    },
    {
      "epoch": 0.765630420978902,
      "grad_norm": 10.488061904907227,
      "learning_rate": 8.524541554924455e-06,
      "loss": 0.0326,
      "step": 467840
    },
    {
      "epoch": 0.7656631514175553,
      "grad_norm": 5.1363019943237305,
      "learning_rate": 8.524475662710939e-06,
      "loss": 0.036,
      "step": 467860
    },
    {
      "epoch": 0.7656958818562086,
      "grad_norm": 0.5449283719062805,
      "learning_rate": 8.52440977049742e-06,
      "loss": 0.0334,
      "step": 467880
    },
    {
      "epoch": 0.765728612294862,
      "grad_norm": 0.07611118257045746,
      "learning_rate": 8.524343878283904e-06,
      "loss": 0.0264,
      "step": 467900
    },
    {
      "epoch": 0.7657613427335153,
      "grad_norm": 0.2261606901884079,
      "learning_rate": 8.524277986070388e-06,
      "loss": 0.0351,
      "step": 467920
    },
    {
      "epoch": 0.7657940731721686,
      "grad_norm": 0.9595139622688293,
      "learning_rate": 8.52421209385687e-06,
      "loss": 0.0442,
      "step": 467940
    },
    {
      "epoch": 0.765826803610822,
      "grad_norm": 1.5958847999572754,
      "learning_rate": 8.524146201643353e-06,
      "loss": 0.031,
      "step": 467960
    },
    {
      "epoch": 0.7658595340494754,
      "grad_norm": 1.4219129085540771,
      "learning_rate": 8.524080309429835e-06,
      "loss": 0.0311,
      "step": 467980
    },
    {
      "epoch": 0.7658922644881286,
      "grad_norm": 2.0568454265594482,
      "learning_rate": 8.524014417216319e-06,
      "loss": 0.0409,
      "step": 468000
    },
    {
      "epoch": 0.765924994926782,
      "grad_norm": 0.9289897680282593,
      "learning_rate": 8.5239485250028e-06,
      "loss": 0.0232,
      "step": 468020
    },
    {
      "epoch": 0.7659577253654354,
      "grad_norm": 1.8084434270858765,
      "learning_rate": 8.523882632789284e-06,
      "loss": 0.0328,
      "step": 468040
    },
    {
      "epoch": 0.7659904558040886,
      "grad_norm": 0.47376999258995056,
      "learning_rate": 8.523816740575766e-06,
      "loss": 0.0463,
      "step": 468060
    },
    {
      "epoch": 0.766023186242742,
      "grad_norm": 0.34597349166870117,
      "learning_rate": 8.52375084836225e-06,
      "loss": 0.0256,
      "step": 468080
    },
    {
      "epoch": 0.7660559166813954,
      "grad_norm": 1.6820906400680542,
      "learning_rate": 8.523684956148732e-06,
      "loss": 0.0266,
      "step": 468100
    },
    {
      "epoch": 0.7660886471200486,
      "grad_norm": 0.9519110321998596,
      "learning_rate": 8.523619063935215e-06,
      "loss": 0.029,
      "step": 468120
    },
    {
      "epoch": 0.766121377558702,
      "grad_norm": 0.8358259201049805,
      "learning_rate": 8.523553171721697e-06,
      "loss": 0.0314,
      "step": 468140
    },
    {
      "epoch": 0.7661541079973554,
      "grad_norm": 1.0735325813293457,
      "learning_rate": 8.52348727950818e-06,
      "loss": 0.0319,
      "step": 468160
    },
    {
      "epoch": 0.7661868384360088,
      "grad_norm": 1.1718041896820068,
      "learning_rate": 8.523421387294664e-06,
      "loss": 0.0371,
      "step": 468180
    },
    {
      "epoch": 0.766219568874662,
      "grad_norm": 3.6838955879211426,
      "learning_rate": 8.523355495081146e-06,
      "loss": 0.0453,
      "step": 468200
    },
    {
      "epoch": 0.7662522993133154,
      "grad_norm": 0.10583475977182388,
      "learning_rate": 8.52328960286763e-06,
      "loss": 0.0363,
      "step": 468220
    },
    {
      "epoch": 0.7662850297519688,
      "grad_norm": 5.074660778045654,
      "learning_rate": 8.523223710654113e-06,
      "loss": 0.0237,
      "step": 468240
    },
    {
      "epoch": 0.766317760190622,
      "grad_norm": 2.341372489929199,
      "learning_rate": 8.523157818440595e-06,
      "loss": 0.0361,
      "step": 468260
    },
    {
      "epoch": 0.7663504906292754,
      "grad_norm": 0.9184773564338684,
      "learning_rate": 8.523091926227079e-06,
      "loss": 0.0289,
      "step": 468280
    },
    {
      "epoch": 0.7663832210679288,
      "grad_norm": 1.549456000328064,
      "learning_rate": 8.523026034013562e-06,
      "loss": 0.0399,
      "step": 468300
    },
    {
      "epoch": 0.766415951506582,
      "grad_norm": 0.4615119695663452,
      "learning_rate": 8.522960141800044e-06,
      "loss": 0.0325,
      "step": 468320
    },
    {
      "epoch": 0.7664486819452354,
      "grad_norm": 1.5026265382766724,
      "learning_rate": 8.522894249586528e-06,
      "loss": 0.0376,
      "step": 468340
    },
    {
      "epoch": 0.7664814123838888,
      "grad_norm": 0.6982862949371338,
      "learning_rate": 8.52282835737301e-06,
      "loss": 0.0311,
      "step": 468360
    },
    {
      "epoch": 0.7665141428225422,
      "grad_norm": 1.6505770683288574,
      "learning_rate": 8.522762465159493e-06,
      "loss": 0.0416,
      "step": 468380
    },
    {
      "epoch": 0.7665468732611954,
      "grad_norm": 0.8770773410797119,
      "learning_rate": 8.522696572945975e-06,
      "loss": 0.038,
      "step": 468400
    },
    {
      "epoch": 0.7665796036998488,
      "grad_norm": 0.5061419606208801,
      "learning_rate": 8.522630680732459e-06,
      "loss": 0.023,
      "step": 468420
    },
    {
      "epoch": 0.7666123341385022,
      "grad_norm": 1.5376851558685303,
      "learning_rate": 8.52256478851894e-06,
      "loss": 0.0429,
      "step": 468440
    },
    {
      "epoch": 0.7666450645771554,
      "grad_norm": 1.5489567518234253,
      "learning_rate": 8.522498896305424e-06,
      "loss": 0.0372,
      "step": 468460
    },
    {
      "epoch": 0.7666777950158088,
      "grad_norm": 0.6002310514450073,
      "learning_rate": 8.522433004091906e-06,
      "loss": 0.0331,
      "step": 468480
    },
    {
      "epoch": 0.7667105254544622,
      "grad_norm": 1.3797911405563354,
      "learning_rate": 8.52236711187839e-06,
      "loss": 0.0329,
      "step": 468500
    },
    {
      "epoch": 0.7667432558931154,
      "grad_norm": 1.0135072469711304,
      "learning_rate": 8.522301219664872e-06,
      "loss": 0.0308,
      "step": 468520
    },
    {
      "epoch": 0.7667759863317688,
      "grad_norm": 0.7271772027015686,
      "learning_rate": 8.522235327451355e-06,
      "loss": 0.0249,
      "step": 468540
    },
    {
      "epoch": 0.7668087167704222,
      "grad_norm": 0.8767645359039307,
      "learning_rate": 8.522169435237839e-06,
      "loss": 0.0395,
      "step": 468560
    },
    {
      "epoch": 0.7668414472090755,
      "grad_norm": 0.923186182975769,
      "learning_rate": 8.52210354302432e-06,
      "loss": 0.0342,
      "step": 468580
    },
    {
      "epoch": 0.7668741776477288,
      "grad_norm": 0.6949229836463928,
      "learning_rate": 8.522037650810804e-06,
      "loss": 0.0257,
      "step": 468600
    },
    {
      "epoch": 0.7669069080863822,
      "grad_norm": 1.3744235038757324,
      "learning_rate": 8.521971758597288e-06,
      "loss": 0.036,
      "step": 468620
    },
    {
      "epoch": 0.7669396385250355,
      "grad_norm": 2.5864925384521484,
      "learning_rate": 8.52190586638377e-06,
      "loss": 0.0322,
      "step": 468640
    },
    {
      "epoch": 0.7669723689636888,
      "grad_norm": 1.0035734176635742,
      "learning_rate": 8.521839974170253e-06,
      "loss": 0.0293,
      "step": 468660
    },
    {
      "epoch": 0.7670050994023422,
      "grad_norm": 0.9162984490394592,
      "learning_rate": 8.521774081956737e-06,
      "loss": 0.0287,
      "step": 468680
    },
    {
      "epoch": 0.7670378298409956,
      "grad_norm": 0.7467617988586426,
      "learning_rate": 8.521708189743219e-06,
      "loss": 0.03,
      "step": 468700
    },
    {
      "epoch": 0.7670705602796488,
      "grad_norm": 0.6610848307609558,
      "learning_rate": 8.521642297529703e-06,
      "loss": 0.0306,
      "step": 468720
    },
    {
      "epoch": 0.7671032907183022,
      "grad_norm": 1.228739857673645,
      "learning_rate": 8.521576405316184e-06,
      "loss": 0.0368,
      "step": 468740
    },
    {
      "epoch": 0.7671360211569556,
      "grad_norm": 1.2790426015853882,
      "learning_rate": 8.521510513102668e-06,
      "loss": 0.037,
      "step": 468760
    },
    {
      "epoch": 0.7671687515956089,
      "grad_norm": 0.6371971964836121,
      "learning_rate": 8.52144462088915e-06,
      "loss": 0.0304,
      "step": 468780
    },
    {
      "epoch": 0.7672014820342622,
      "grad_norm": 1.7071127891540527,
      "learning_rate": 8.521378728675633e-06,
      "loss": 0.033,
      "step": 468800
    },
    {
      "epoch": 0.7672342124729156,
      "grad_norm": 1.0785127878189087,
      "learning_rate": 8.521312836462115e-06,
      "loss": 0.0429,
      "step": 468820
    },
    {
      "epoch": 0.7672669429115689,
      "grad_norm": 2.811553478240967,
      "learning_rate": 8.521246944248599e-06,
      "loss": 0.0359,
      "step": 468840
    },
    {
      "epoch": 0.7672996733502222,
      "grad_norm": 0.7540846467018127,
      "learning_rate": 8.521181052035081e-06,
      "loss": 0.0337,
      "step": 468860
    },
    {
      "epoch": 0.7673324037888756,
      "grad_norm": 0.7728928327560425,
      "learning_rate": 8.521115159821564e-06,
      "loss": 0.0355,
      "step": 468880
    },
    {
      "epoch": 0.7673651342275289,
      "grad_norm": 0.7462666630744934,
      "learning_rate": 8.521049267608046e-06,
      "loss": 0.0207,
      "step": 468900
    },
    {
      "epoch": 0.7673978646661822,
      "grad_norm": 1.0254616737365723,
      "learning_rate": 8.52098337539453e-06,
      "loss": 0.0452,
      "step": 468920
    },
    {
      "epoch": 0.7674305951048356,
      "grad_norm": 1.0006685256958008,
      "learning_rate": 8.520917483181014e-06,
      "loss": 0.0375,
      "step": 468940
    },
    {
      "epoch": 0.767463325543489,
      "grad_norm": 0.503089427947998,
      "learning_rate": 8.520851590967495e-06,
      "loss": 0.0262,
      "step": 468960
    },
    {
      "epoch": 0.7674960559821423,
      "grad_norm": 0.9672756791114807,
      "learning_rate": 8.520785698753979e-06,
      "loss": 0.0333,
      "step": 468980
    },
    {
      "epoch": 0.7675287864207956,
      "grad_norm": 1.293967366218567,
      "learning_rate": 8.520719806540461e-06,
      "loss": 0.032,
      "step": 469000
    },
    {
      "epoch": 0.767561516859449,
      "grad_norm": 0.8332499265670776,
      "learning_rate": 8.520653914326944e-06,
      "loss": 0.0258,
      "step": 469020
    },
    {
      "epoch": 0.7675942472981023,
      "grad_norm": 1.8018035888671875,
      "learning_rate": 8.520588022113428e-06,
      "loss": 0.0253,
      "step": 469040
    },
    {
      "epoch": 0.7676269777367556,
      "grad_norm": 1.6013672351837158,
      "learning_rate": 8.520522129899912e-06,
      "loss": 0.0302,
      "step": 469060
    },
    {
      "epoch": 0.767659708175409,
      "grad_norm": 0.3366667926311493,
      "learning_rate": 8.520456237686394e-06,
      "loss": 0.0422,
      "step": 469080
    },
    {
      "epoch": 0.7676924386140623,
      "grad_norm": 0.965000569820404,
      "learning_rate": 8.520390345472877e-06,
      "loss": 0.0243,
      "step": 469100
    },
    {
      "epoch": 0.7677251690527156,
      "grad_norm": 0.36421430110931396,
      "learning_rate": 8.520324453259359e-06,
      "loss": 0.0306,
      "step": 469120
    },
    {
      "epoch": 0.767757899491369,
      "grad_norm": 0.5015789866447449,
      "learning_rate": 8.520258561045843e-06,
      "loss": 0.0325,
      "step": 469140
    },
    {
      "epoch": 0.7677906299300223,
      "grad_norm": 1.0761268138885498,
      "learning_rate": 8.520192668832324e-06,
      "loss": 0.0316,
      "step": 469160
    },
    {
      "epoch": 0.7678233603686757,
      "grad_norm": 1.476489543914795,
      "learning_rate": 8.520126776618808e-06,
      "loss": 0.0288,
      "step": 469180
    },
    {
      "epoch": 0.767856090807329,
      "grad_norm": 1.4951145648956299,
      "learning_rate": 8.52006088440529e-06,
      "loss": 0.0303,
      "step": 469200
    },
    {
      "epoch": 0.7678888212459823,
      "grad_norm": 3.457427740097046,
      "learning_rate": 8.519994992191774e-06,
      "loss": 0.0327,
      "step": 469220
    },
    {
      "epoch": 0.7679215516846357,
      "grad_norm": 0.38552072644233704,
      "learning_rate": 8.519929099978255e-06,
      "loss": 0.0273,
      "step": 469240
    },
    {
      "epoch": 0.767954282123289,
      "grad_norm": 0.44011440873146057,
      "learning_rate": 8.519863207764739e-06,
      "loss": 0.0276,
      "step": 469260
    },
    {
      "epoch": 0.7679870125619424,
      "grad_norm": 1.0129239559173584,
      "learning_rate": 8.519797315551221e-06,
      "loss": 0.0247,
      "step": 469280
    },
    {
      "epoch": 0.7680197430005957,
      "grad_norm": 1.764264464378357,
      "learning_rate": 8.519731423337705e-06,
      "loss": 0.0268,
      "step": 469300
    },
    {
      "epoch": 0.768052473439249,
      "grad_norm": 0.6842749714851379,
      "learning_rate": 8.519665531124188e-06,
      "loss": 0.0315,
      "step": 469320
    },
    {
      "epoch": 0.7680852038779024,
      "grad_norm": 0.4399605989456177,
      "learning_rate": 8.51959963891067e-06,
      "loss": 0.0167,
      "step": 469340
    },
    {
      "epoch": 0.7681179343165557,
      "grad_norm": 0.15548448264598846,
      "learning_rate": 8.519533746697154e-06,
      "loss": 0.036,
      "step": 469360
    },
    {
      "epoch": 0.7681506647552091,
      "grad_norm": 1.8548028469085693,
      "learning_rate": 8.519467854483635e-06,
      "loss": 0.0323,
      "step": 469380
    },
    {
      "epoch": 0.7681833951938624,
      "grad_norm": 0.513620138168335,
      "learning_rate": 8.519401962270119e-06,
      "loss": 0.0394,
      "step": 469400
    },
    {
      "epoch": 0.7682161256325157,
      "grad_norm": 1.2881345748901367,
      "learning_rate": 8.519336070056603e-06,
      "loss": 0.0295,
      "step": 469420
    },
    {
      "epoch": 0.7682488560711691,
      "grad_norm": 2.2823967933654785,
      "learning_rate": 8.519270177843085e-06,
      "loss": 0.0286,
      "step": 469440
    },
    {
      "epoch": 0.7682815865098224,
      "grad_norm": 0.5849370956420898,
      "learning_rate": 8.519204285629568e-06,
      "loss": 0.0169,
      "step": 469460
    },
    {
      "epoch": 0.7683143169484757,
      "grad_norm": 1.4060689210891724,
      "learning_rate": 8.519138393416052e-06,
      "loss": 0.0384,
      "step": 469480
    },
    {
      "epoch": 0.7683470473871291,
      "grad_norm": 1.3144383430480957,
      "learning_rate": 8.519072501202534e-06,
      "loss": 0.0331,
      "step": 469500
    },
    {
      "epoch": 0.7683797778257824,
      "grad_norm": 1.011035442352295,
      "learning_rate": 8.519006608989017e-06,
      "loss": 0.0357,
      "step": 469520
    },
    {
      "epoch": 0.7684125082644357,
      "grad_norm": 0.33125871419906616,
      "learning_rate": 8.518940716775499e-06,
      "loss": 0.024,
      "step": 469540
    },
    {
      "epoch": 0.7684452387030891,
      "grad_norm": 0.6231801509857178,
      "learning_rate": 8.518874824561983e-06,
      "loss": 0.0251,
      "step": 469560
    },
    {
      "epoch": 0.7684779691417425,
      "grad_norm": 0.8306993246078491,
      "learning_rate": 8.518808932348465e-06,
      "loss": 0.0299,
      "step": 469580
    },
    {
      "epoch": 0.7685106995803958,
      "grad_norm": 0.45520564913749695,
      "learning_rate": 8.518743040134948e-06,
      "loss": 0.0242,
      "step": 469600
    },
    {
      "epoch": 0.7685434300190491,
      "grad_norm": 0.6276890635490417,
      "learning_rate": 8.51867714792143e-06,
      "loss": 0.0332,
      "step": 469620
    },
    {
      "epoch": 0.7685761604577025,
      "grad_norm": 1.271304965019226,
      "learning_rate": 8.518611255707914e-06,
      "loss": 0.0283,
      "step": 469640
    },
    {
      "epoch": 0.7686088908963558,
      "grad_norm": 0.6897425055503845,
      "learning_rate": 8.518545363494397e-06,
      "loss": 0.032,
      "step": 469660
    },
    {
      "epoch": 0.7686416213350091,
      "grad_norm": 0.8365309238433838,
      "learning_rate": 8.518479471280879e-06,
      "loss": 0.0348,
      "step": 469680
    },
    {
      "epoch": 0.7686743517736625,
      "grad_norm": 1.7870469093322754,
      "learning_rate": 8.518413579067363e-06,
      "loss": 0.0372,
      "step": 469700
    },
    {
      "epoch": 0.7687070822123158,
      "grad_norm": 1.5666106939315796,
      "learning_rate": 8.518347686853845e-06,
      "loss": 0.0365,
      "step": 469720
    },
    {
      "epoch": 0.7687398126509691,
      "grad_norm": 4.249516010284424,
      "learning_rate": 8.518281794640328e-06,
      "loss": 0.0362,
      "step": 469740
    },
    {
      "epoch": 0.7687725430896225,
      "grad_norm": 3.2592146396636963,
      "learning_rate": 8.51821590242681e-06,
      "loss": 0.0343,
      "step": 469760
    },
    {
      "epoch": 0.7688052735282759,
      "grad_norm": 2.485921859741211,
      "learning_rate": 8.518150010213294e-06,
      "loss": 0.0447,
      "step": 469780
    },
    {
      "epoch": 0.7688380039669291,
      "grad_norm": 0.4966985881328583,
      "learning_rate": 8.518084117999777e-06,
      "loss": 0.0366,
      "step": 469800
    },
    {
      "epoch": 0.7688707344055825,
      "grad_norm": 1.2165533304214478,
      "learning_rate": 8.518018225786259e-06,
      "loss": 0.0287,
      "step": 469820
    },
    {
      "epoch": 0.7689034648442359,
      "grad_norm": 1.6556271314620972,
      "learning_rate": 8.517952333572743e-06,
      "loss": 0.0403,
      "step": 469840
    },
    {
      "epoch": 0.7689361952828891,
      "grad_norm": 0.9653660655021667,
      "learning_rate": 8.517886441359226e-06,
      "loss": 0.0432,
      "step": 469860
    },
    {
      "epoch": 0.7689689257215425,
      "grad_norm": 0.6407178044319153,
      "learning_rate": 8.517820549145708e-06,
      "loss": 0.0331,
      "step": 469880
    },
    {
      "epoch": 0.7690016561601959,
      "grad_norm": 0.9093362092971802,
      "learning_rate": 8.517754656932192e-06,
      "loss": 0.0316,
      "step": 469900
    },
    {
      "epoch": 0.7690343865988492,
      "grad_norm": 1.2306280136108398,
      "learning_rate": 8.517688764718674e-06,
      "loss": 0.0376,
      "step": 469920
    },
    {
      "epoch": 0.7690671170375025,
      "grad_norm": 0.9563792943954468,
      "learning_rate": 8.517622872505157e-06,
      "loss": 0.0397,
      "step": 469940
    },
    {
      "epoch": 0.7690998474761559,
      "grad_norm": 1.7787246704101562,
      "learning_rate": 8.51755698029164e-06,
      "loss": 0.0217,
      "step": 469960
    },
    {
      "epoch": 0.7691325779148093,
      "grad_norm": 1.1105095148086548,
      "learning_rate": 8.517491088078123e-06,
      "loss": 0.0303,
      "step": 469980
    },
    {
      "epoch": 0.7691653083534625,
      "grad_norm": 0.5633031725883484,
      "learning_rate": 8.517425195864606e-06,
      "loss": 0.0264,
      "step": 470000
    },
    {
      "epoch": 0.7691980387921159,
      "grad_norm": 1.1084063053131104,
      "learning_rate": 8.517359303651088e-06,
      "loss": 0.0316,
      "step": 470020
    },
    {
      "epoch": 0.7692307692307693,
      "grad_norm": 1.2954217195510864,
      "learning_rate": 8.517293411437572e-06,
      "loss": 0.0386,
      "step": 470040
    },
    {
      "epoch": 0.7692634996694225,
      "grad_norm": 1.4538830518722534,
      "learning_rate": 8.517227519224054e-06,
      "loss": 0.0244,
      "step": 470060
    },
    {
      "epoch": 0.7692962301080759,
      "grad_norm": 0.2982042729854584,
      "learning_rate": 8.517161627010537e-06,
      "loss": 0.0314,
      "step": 470080
    },
    {
      "epoch": 0.7693289605467293,
      "grad_norm": 1.075951337814331,
      "learning_rate": 8.51709573479702e-06,
      "loss": 0.0322,
      "step": 470100
    },
    {
      "epoch": 0.7693616909853825,
      "grad_norm": 1.0588440895080566,
      "learning_rate": 8.517029842583503e-06,
      "loss": 0.0312,
      "step": 470120
    },
    {
      "epoch": 0.7693944214240359,
      "grad_norm": 1.7319841384887695,
      "learning_rate": 8.516963950369985e-06,
      "loss": 0.0353,
      "step": 470140
    },
    {
      "epoch": 0.7694271518626893,
      "grad_norm": 1.22784423828125,
      "learning_rate": 8.516898058156468e-06,
      "loss": 0.0271,
      "step": 470160
    },
    {
      "epoch": 0.7694598823013427,
      "grad_norm": 1.7522636651992798,
      "learning_rate": 8.51683216594295e-06,
      "loss": 0.0329,
      "step": 470180
    },
    {
      "epoch": 0.7694926127399959,
      "grad_norm": 0.9970765709877014,
      "learning_rate": 8.516766273729434e-06,
      "loss": 0.0278,
      "step": 470200
    },
    {
      "epoch": 0.7695253431786493,
      "grad_norm": 0.554948091506958,
      "learning_rate": 8.516700381515917e-06,
      "loss": 0.024,
      "step": 470220
    },
    {
      "epoch": 0.7695580736173027,
      "grad_norm": 1.5640814304351807,
      "learning_rate": 8.5166344893024e-06,
      "loss": 0.0316,
      "step": 470240
    },
    {
      "epoch": 0.7695908040559559,
      "grad_norm": 0.4699479341506958,
      "learning_rate": 8.516568597088883e-06,
      "loss": 0.0323,
      "step": 470260
    },
    {
      "epoch": 0.7696235344946093,
      "grad_norm": 1.1012587547302246,
      "learning_rate": 8.516502704875366e-06,
      "loss": 0.0247,
      "step": 470280
    },
    {
      "epoch": 0.7696562649332627,
      "grad_norm": 1.1958186626434326,
      "learning_rate": 8.516436812661848e-06,
      "loss": 0.0289,
      "step": 470300
    },
    {
      "epoch": 0.7696889953719159,
      "grad_norm": 0.641526997089386,
      "learning_rate": 8.516370920448332e-06,
      "loss": 0.035,
      "step": 470320
    },
    {
      "epoch": 0.7697217258105693,
      "grad_norm": 0.6193796396255493,
      "learning_rate": 8.516305028234814e-06,
      "loss": 0.0217,
      "step": 470340
    },
    {
      "epoch": 0.7697544562492227,
      "grad_norm": 1.5687482357025146,
      "learning_rate": 8.516239136021297e-06,
      "loss": 0.0372,
      "step": 470360
    },
    {
      "epoch": 0.769787186687876,
      "grad_norm": 0.14352798461914062,
      "learning_rate": 8.516173243807781e-06,
      "loss": 0.0372,
      "step": 470380
    },
    {
      "epoch": 0.7698199171265293,
      "grad_norm": 1.2935090065002441,
      "learning_rate": 8.516107351594263e-06,
      "loss": 0.038,
      "step": 470400
    },
    {
      "epoch": 0.7698526475651827,
      "grad_norm": 0.4162358045578003,
      "learning_rate": 8.516041459380746e-06,
      "loss": 0.0262,
      "step": 470420
    },
    {
      "epoch": 0.769885378003836,
      "grad_norm": 2.232067346572876,
      "learning_rate": 8.515975567167228e-06,
      "loss": 0.0448,
      "step": 470440
    },
    {
      "epoch": 0.7699181084424893,
      "grad_norm": 0.5964819192886353,
      "learning_rate": 8.515909674953712e-06,
      "loss": 0.0378,
      "step": 470460
    },
    {
      "epoch": 0.7699508388811427,
      "grad_norm": 0.160154789686203,
      "learning_rate": 8.515843782740194e-06,
      "loss": 0.0371,
      "step": 470480
    },
    {
      "epoch": 0.7699835693197961,
      "grad_norm": 1.4364309310913086,
      "learning_rate": 8.515777890526677e-06,
      "loss": 0.0354,
      "step": 470500
    },
    {
      "epoch": 0.7700162997584493,
      "grad_norm": 0.6082457304000854,
      "learning_rate": 8.51571199831316e-06,
      "loss": 0.0345,
      "step": 470520
    },
    {
      "epoch": 0.7700490301971027,
      "grad_norm": 0.5538260340690613,
      "learning_rate": 8.515646106099643e-06,
      "loss": 0.0241,
      "step": 470540
    },
    {
      "epoch": 0.7700817606357561,
      "grad_norm": 1.5972368717193604,
      "learning_rate": 8.515580213886125e-06,
      "loss": 0.0252,
      "step": 470560
    },
    {
      "epoch": 0.7701144910744093,
      "grad_norm": 0.3689693212509155,
      "learning_rate": 8.515514321672608e-06,
      "loss": 0.0328,
      "step": 470580
    },
    {
      "epoch": 0.7701472215130627,
      "grad_norm": 1.8417913913726807,
      "learning_rate": 8.515448429459092e-06,
      "loss": 0.0336,
      "step": 470600
    },
    {
      "epoch": 0.7701799519517161,
      "grad_norm": 1.3850195407867432,
      "learning_rate": 8.515382537245574e-06,
      "loss": 0.0346,
      "step": 470620
    },
    {
      "epoch": 0.7702126823903694,
      "grad_norm": 0.2292250096797943,
      "learning_rate": 8.515316645032057e-06,
      "loss": 0.028,
      "step": 470640
    },
    {
      "epoch": 0.7702454128290227,
      "grad_norm": 1.2614004611968994,
      "learning_rate": 8.515250752818541e-06,
      "loss": 0.0335,
      "step": 470660
    },
    {
      "epoch": 0.7702781432676761,
      "grad_norm": 0.41707032918930054,
      "learning_rate": 8.515184860605023e-06,
      "loss": 0.0272,
      "step": 470680
    },
    {
      "epoch": 0.7703108737063294,
      "grad_norm": 1.1352566480636597,
      "learning_rate": 8.515118968391506e-06,
      "loss": 0.0362,
      "step": 470700
    },
    {
      "epoch": 0.7703436041449827,
      "grad_norm": 0.407579630613327,
      "learning_rate": 8.51505307617799e-06,
      "loss": 0.0315,
      "step": 470720
    },
    {
      "epoch": 0.7703763345836361,
      "grad_norm": 0.916671872138977,
      "learning_rate": 8.514987183964472e-06,
      "loss": 0.0273,
      "step": 470740
    },
    {
      "epoch": 0.7704090650222895,
      "grad_norm": 1.166006088256836,
      "learning_rate": 8.514921291750956e-06,
      "loss": 0.0227,
      "step": 470760
    },
    {
      "epoch": 0.7704417954609427,
      "grad_norm": 0.7837883234024048,
      "learning_rate": 8.514855399537437e-06,
      "loss": 0.0319,
      "step": 470780
    },
    {
      "epoch": 0.7704745258995961,
      "grad_norm": 1.402552843093872,
      "learning_rate": 8.514789507323921e-06,
      "loss": 0.031,
      "step": 470800
    },
    {
      "epoch": 0.7705072563382495,
      "grad_norm": 1.1714099645614624,
      "learning_rate": 8.514723615110403e-06,
      "loss": 0.0262,
      "step": 470820
    },
    {
      "epoch": 0.7705399867769028,
      "grad_norm": 2.6308555603027344,
      "learning_rate": 8.514657722896886e-06,
      "loss": 0.0311,
      "step": 470840
    },
    {
      "epoch": 0.7705727172155561,
      "grad_norm": 0.8353896737098694,
      "learning_rate": 8.514591830683368e-06,
      "loss": 0.039,
      "step": 470860
    },
    {
      "epoch": 0.7706054476542095,
      "grad_norm": 1.5213783979415894,
      "learning_rate": 8.514525938469852e-06,
      "loss": 0.0247,
      "step": 470880
    },
    {
      "epoch": 0.7706381780928628,
      "grad_norm": 2.1521658897399902,
      "learning_rate": 8.514460046256334e-06,
      "loss": 0.0311,
      "step": 470900
    },
    {
      "epoch": 0.7706709085315161,
      "grad_norm": 0.5592684745788574,
      "learning_rate": 8.514394154042817e-06,
      "loss": 0.0196,
      "step": 470920
    },
    {
      "epoch": 0.7707036389701695,
      "grad_norm": 0.47857069969177246,
      "learning_rate": 8.5143282618293e-06,
      "loss": 0.0342,
      "step": 470940
    },
    {
      "epoch": 0.7707363694088228,
      "grad_norm": 0.7047324776649475,
      "learning_rate": 8.514262369615783e-06,
      "loss": 0.0272,
      "step": 470960
    },
    {
      "epoch": 0.7707690998474761,
      "grad_norm": 1.591879963874817,
      "learning_rate": 8.514196477402265e-06,
      "loss": 0.0275,
      "step": 470980
    },
    {
      "epoch": 0.7708018302861295,
      "grad_norm": 1.1532964706420898,
      "learning_rate": 8.514130585188748e-06,
      "loss": 0.0391,
      "step": 471000
    },
    {
      "epoch": 0.7708345607247828,
      "grad_norm": 0.7928420901298523,
      "learning_rate": 8.514064692975232e-06,
      "loss": 0.0266,
      "step": 471020
    },
    {
      "epoch": 0.7708672911634362,
      "grad_norm": 0.9906963109970093,
      "learning_rate": 8.513998800761714e-06,
      "loss": 0.03,
      "step": 471040
    },
    {
      "epoch": 0.7709000216020895,
      "grad_norm": 3.3886170387268066,
      "learning_rate": 8.513932908548197e-06,
      "loss": 0.0372,
      "step": 471060
    },
    {
      "epoch": 0.7709327520407429,
      "grad_norm": 0.6227877140045166,
      "learning_rate": 8.513867016334681e-06,
      "loss": 0.0246,
      "step": 471080
    },
    {
      "epoch": 0.7709654824793962,
      "grad_norm": 2.0731892585754395,
      "learning_rate": 8.513801124121163e-06,
      "loss": 0.0275,
      "step": 471100
    },
    {
      "epoch": 0.7709982129180495,
      "grad_norm": 1.0326746702194214,
      "learning_rate": 8.513735231907647e-06,
      "loss": 0.0244,
      "step": 471120
    },
    {
      "epoch": 0.7710309433567029,
      "grad_norm": 0.3823970854282379,
      "learning_rate": 8.51366933969413e-06,
      "loss": 0.0259,
      "step": 471140
    },
    {
      "epoch": 0.7710636737953562,
      "grad_norm": 1.5977615118026733,
      "learning_rate": 8.513603447480612e-06,
      "loss": 0.0353,
      "step": 471160
    },
    {
      "epoch": 0.7710964042340095,
      "grad_norm": 0.3696995973587036,
      "learning_rate": 8.513537555267096e-06,
      "loss": 0.0278,
      "step": 471180
    },
    {
      "epoch": 0.7711291346726629,
      "grad_norm": 0.7224963307380676,
      "learning_rate": 8.513471663053577e-06,
      "loss": 0.0299,
      "step": 471200
    },
    {
      "epoch": 0.7711618651113162,
      "grad_norm": 1.9867056608200073,
      "learning_rate": 8.513405770840061e-06,
      "loss": 0.0457,
      "step": 471220
    },
    {
      "epoch": 0.7711945955499696,
      "grad_norm": 1.129719853401184,
      "learning_rate": 8.513339878626543e-06,
      "loss": 0.0347,
      "step": 471240
    },
    {
      "epoch": 0.7712273259886229,
      "grad_norm": 0.9454073309898376,
      "learning_rate": 8.513273986413027e-06,
      "loss": 0.0389,
      "step": 471260
    },
    {
      "epoch": 0.7712600564272762,
      "grad_norm": 0.8447659015655518,
      "learning_rate": 8.513208094199508e-06,
      "loss": 0.0276,
      "step": 471280
    },
    {
      "epoch": 0.7712927868659296,
      "grad_norm": 1.5491539239883423,
      "learning_rate": 8.513142201985992e-06,
      "loss": 0.0273,
      "step": 471300
    },
    {
      "epoch": 0.7713255173045829,
      "grad_norm": 0.22701667249202728,
      "learning_rate": 8.513076309772474e-06,
      "loss": 0.0274,
      "step": 471320
    },
    {
      "epoch": 0.7713582477432362,
      "grad_norm": 0.682755172252655,
      "learning_rate": 8.513010417558958e-06,
      "loss": 0.0443,
      "step": 471340
    },
    {
      "epoch": 0.7713909781818896,
      "grad_norm": 0.38808026909828186,
      "learning_rate": 8.51294452534544e-06,
      "loss": 0.0264,
      "step": 471360
    },
    {
      "epoch": 0.7714237086205429,
      "grad_norm": 1.1028058528900146,
      "learning_rate": 8.512878633131923e-06,
      "loss": 0.0284,
      "step": 471380
    },
    {
      "epoch": 0.7714564390591963,
      "grad_norm": 0.5530949234962463,
      "learning_rate": 8.512812740918407e-06,
      "loss": 0.0414,
      "step": 471400
    },
    {
      "epoch": 0.7714891694978496,
      "grad_norm": 0.5117232799530029,
      "learning_rate": 8.512746848704888e-06,
      "loss": 0.0261,
      "step": 471420
    },
    {
      "epoch": 0.771521899936503,
      "grad_norm": 1.2676348686218262,
      "learning_rate": 8.512680956491372e-06,
      "loss": 0.0269,
      "step": 471440
    },
    {
      "epoch": 0.7715546303751563,
      "grad_norm": 0.7528715133666992,
      "learning_rate": 8.512615064277856e-06,
      "loss": 0.0258,
      "step": 471460
    },
    {
      "epoch": 0.7715873608138096,
      "grad_norm": 0.37598541378974915,
      "learning_rate": 8.512549172064338e-06,
      "loss": 0.0258,
      "step": 471480
    },
    {
      "epoch": 0.771620091252463,
      "grad_norm": 4.515103816986084,
      "learning_rate": 8.512483279850821e-06,
      "loss": 0.0414,
      "step": 471500
    },
    {
      "epoch": 0.7716528216911163,
      "grad_norm": 0.9090427160263062,
      "learning_rate": 8.512417387637305e-06,
      "loss": 0.0326,
      "step": 471520
    },
    {
      "epoch": 0.7716855521297696,
      "grad_norm": 0.9069218635559082,
      "learning_rate": 8.512351495423787e-06,
      "loss": 0.0251,
      "step": 471540
    },
    {
      "epoch": 0.771718282568423,
      "grad_norm": 0.6142666935920715,
      "learning_rate": 8.51228560321027e-06,
      "loss": 0.0244,
      "step": 471560
    },
    {
      "epoch": 0.7717510130070763,
      "grad_norm": 0.572361946105957,
      "learning_rate": 8.512219710996752e-06,
      "loss": 0.0351,
      "step": 471580
    },
    {
      "epoch": 0.7717837434457296,
      "grad_norm": 2.933295249938965,
      "learning_rate": 8.512153818783236e-06,
      "loss": 0.0323,
      "step": 471600
    },
    {
      "epoch": 0.771816473884383,
      "grad_norm": 1.1845965385437012,
      "learning_rate": 8.512087926569718e-06,
      "loss": 0.0229,
      "step": 471620
    },
    {
      "epoch": 0.7718492043230364,
      "grad_norm": 0.6844661235809326,
      "learning_rate": 8.512022034356201e-06,
      "loss": 0.0341,
      "step": 471640
    },
    {
      "epoch": 0.7718819347616896,
      "grad_norm": 1.3360447883605957,
      "learning_rate": 8.511956142142683e-06,
      "loss": 0.0366,
      "step": 471660
    },
    {
      "epoch": 0.771914665200343,
      "grad_norm": 0.9345573782920837,
      "learning_rate": 8.511890249929167e-06,
      "loss": 0.0384,
      "step": 471680
    },
    {
      "epoch": 0.7719473956389964,
      "grad_norm": 1.5477004051208496,
      "learning_rate": 8.511824357715649e-06,
      "loss": 0.0396,
      "step": 471700
    },
    {
      "epoch": 0.7719801260776497,
      "grad_norm": 1.0830738544464111,
      "learning_rate": 8.511758465502132e-06,
      "loss": 0.0279,
      "step": 471720
    },
    {
      "epoch": 0.772012856516303,
      "grad_norm": 0.9893966317176819,
      "learning_rate": 8.511692573288614e-06,
      "loss": 0.026,
      "step": 471740
    },
    {
      "epoch": 0.7720455869549564,
      "grad_norm": 2.0743050575256348,
      "learning_rate": 8.511626681075098e-06,
      "loss": 0.0258,
      "step": 471760
    },
    {
      "epoch": 0.7720783173936097,
      "grad_norm": 0.40861353278160095,
      "learning_rate": 8.511560788861581e-06,
      "loss": 0.0228,
      "step": 471780
    },
    {
      "epoch": 0.772111047832263,
      "grad_norm": 0.7417994141578674,
      "learning_rate": 8.511494896648063e-06,
      "loss": 0.0217,
      "step": 471800
    },
    {
      "epoch": 0.7721437782709164,
      "grad_norm": 2.152362823486328,
      "learning_rate": 8.511429004434547e-06,
      "loss": 0.0309,
      "step": 471820
    },
    {
      "epoch": 0.7721765087095698,
      "grad_norm": 0.7101563811302185,
      "learning_rate": 8.51136311222103e-06,
      "loss": 0.0254,
      "step": 471840
    },
    {
      "epoch": 0.772209239148223,
      "grad_norm": 0.26248353719711304,
      "learning_rate": 8.511297220007512e-06,
      "loss": 0.0333,
      "step": 471860
    },
    {
      "epoch": 0.7722419695868764,
      "grad_norm": 2.5885508060455322,
      "learning_rate": 8.511231327793996e-06,
      "loss": 0.0346,
      "step": 471880
    },
    {
      "epoch": 0.7722747000255298,
      "grad_norm": 0.8622113466262817,
      "learning_rate": 8.51116543558048e-06,
      "loss": 0.0284,
      "step": 471900
    },
    {
      "epoch": 0.772307430464183,
      "grad_norm": 0.9677643775939941,
      "learning_rate": 8.511099543366961e-06,
      "loss": 0.0339,
      "step": 471920
    },
    {
      "epoch": 0.7723401609028364,
      "grad_norm": 1.1309221982955933,
      "learning_rate": 8.511033651153445e-06,
      "loss": 0.0342,
      "step": 471940
    },
    {
      "epoch": 0.7723728913414898,
      "grad_norm": 1.1901320219039917,
      "learning_rate": 8.510967758939927e-06,
      "loss": 0.027,
      "step": 471960
    },
    {
      "epoch": 0.772405621780143,
      "grad_norm": 0.9636662602424622,
      "learning_rate": 8.51090186672641e-06,
      "loss": 0.0353,
      "step": 471980
    },
    {
      "epoch": 0.7724383522187964,
      "grad_norm": 0.7951804995536804,
      "learning_rate": 8.510835974512892e-06,
      "loss": 0.0262,
      "step": 472000
    },
    {
      "epoch": 0.7724710826574498,
      "grad_norm": 0.45602843165397644,
      "learning_rate": 8.510770082299376e-06,
      "loss": 0.0289,
      "step": 472020
    },
    {
      "epoch": 0.7725038130961032,
      "grad_norm": 0.4933856427669525,
      "learning_rate": 8.510704190085858e-06,
      "loss": 0.0306,
      "step": 472040
    },
    {
      "epoch": 0.7725365435347564,
      "grad_norm": 1.619560718536377,
      "learning_rate": 8.510638297872341e-06,
      "loss": 0.0384,
      "step": 472060
    },
    {
      "epoch": 0.7725692739734098,
      "grad_norm": 1.6486883163452148,
      "learning_rate": 8.510572405658823e-06,
      "loss": 0.0336,
      "step": 472080
    },
    {
      "epoch": 0.7726020044120632,
      "grad_norm": 1.4392571449279785,
      "learning_rate": 8.510506513445307e-06,
      "loss": 0.0326,
      "step": 472100
    },
    {
      "epoch": 0.7726347348507164,
      "grad_norm": 0.7570080161094666,
      "learning_rate": 8.51044062123179e-06,
      "loss": 0.0229,
      "step": 472120
    },
    {
      "epoch": 0.7726674652893698,
      "grad_norm": 0.9942266941070557,
      "learning_rate": 8.510374729018272e-06,
      "loss": 0.0311,
      "step": 472140
    },
    {
      "epoch": 0.7727001957280232,
      "grad_norm": 0.46740150451660156,
      "learning_rate": 8.510308836804756e-06,
      "loss": 0.0339,
      "step": 472160
    },
    {
      "epoch": 0.7727329261666764,
      "grad_norm": 0.5288239121437073,
      "learning_rate": 8.510242944591238e-06,
      "loss": 0.0334,
      "step": 472180
    },
    {
      "epoch": 0.7727656566053298,
      "grad_norm": 1.1632704734802246,
      "learning_rate": 8.510177052377721e-06,
      "loss": 0.0296,
      "step": 472200
    },
    {
      "epoch": 0.7727983870439832,
      "grad_norm": 3.183699607849121,
      "learning_rate": 8.510111160164203e-06,
      "loss": 0.0345,
      "step": 472220
    },
    {
      "epoch": 0.7728311174826366,
      "grad_norm": 0.37194639444351196,
      "learning_rate": 8.510045267950687e-06,
      "loss": 0.0266,
      "step": 472240
    },
    {
      "epoch": 0.7728638479212898,
      "grad_norm": 0.5886773467063904,
      "learning_rate": 8.50997937573717e-06,
      "loss": 0.0394,
      "step": 472260
    },
    {
      "epoch": 0.7728965783599432,
      "grad_norm": 1.0372000932693481,
      "learning_rate": 8.509913483523652e-06,
      "loss": 0.0267,
      "step": 472280
    },
    {
      "epoch": 0.7729293087985966,
      "grad_norm": 2.0548906326293945,
      "learning_rate": 8.509847591310136e-06,
      "loss": 0.0276,
      "step": 472300
    },
    {
      "epoch": 0.7729620392372498,
      "grad_norm": 1.5904383659362793,
      "learning_rate": 8.50978169909662e-06,
      "loss": 0.022,
      "step": 472320
    },
    {
      "epoch": 0.7729947696759032,
      "grad_norm": 1.3514443635940552,
      "learning_rate": 8.509715806883101e-06,
      "loss": 0.0417,
      "step": 472340
    },
    {
      "epoch": 0.7730275001145566,
      "grad_norm": 0.3859265148639679,
      "learning_rate": 8.509649914669585e-06,
      "loss": 0.0314,
      "step": 472360
    },
    {
      "epoch": 0.7730602305532098,
      "grad_norm": 1.2597529888153076,
      "learning_rate": 8.509584022456067e-06,
      "loss": 0.0329,
      "step": 472380
    },
    {
      "epoch": 0.7730929609918632,
      "grad_norm": 0.9019647240638733,
      "learning_rate": 8.50951813024255e-06,
      "loss": 0.0274,
      "step": 472400
    },
    {
      "epoch": 0.7731256914305166,
      "grad_norm": 0.8921585083007812,
      "learning_rate": 8.509452238029032e-06,
      "loss": 0.0356,
      "step": 472420
    },
    {
      "epoch": 0.7731584218691699,
      "grad_norm": 0.5706669092178345,
      "learning_rate": 8.509386345815516e-06,
      "loss": 0.034,
      "step": 472440
    },
    {
      "epoch": 0.7731911523078232,
      "grad_norm": 1.5428285598754883,
      "learning_rate": 8.509320453602e-06,
      "loss": 0.0285,
      "step": 472460
    },
    {
      "epoch": 0.7732238827464766,
      "grad_norm": 0.2748522162437439,
      "learning_rate": 8.509254561388481e-06,
      "loss": 0.0313,
      "step": 472480
    },
    {
      "epoch": 0.77325661318513,
      "grad_norm": 0.533140242099762,
      "learning_rate": 8.509188669174965e-06,
      "loss": 0.0319,
      "step": 472500
    },
    {
      "epoch": 0.7732893436237832,
      "grad_norm": 0.69322669506073,
      "learning_rate": 8.509122776961447e-06,
      "loss": 0.0321,
      "step": 472520
    },
    {
      "epoch": 0.7733220740624366,
      "grad_norm": 2.6637496948242188,
      "learning_rate": 8.50905688474793e-06,
      "loss": 0.0318,
      "step": 472540
    },
    {
      "epoch": 0.77335480450109,
      "grad_norm": 0.2608785331249237,
      "learning_rate": 8.508990992534412e-06,
      "loss": 0.032,
      "step": 472560
    },
    {
      "epoch": 0.7733875349397432,
      "grad_norm": 0.13110455870628357,
      "learning_rate": 8.508925100320896e-06,
      "loss": 0.0351,
      "step": 472580
    },
    {
      "epoch": 0.7734202653783966,
      "grad_norm": 1.0489134788513184,
      "learning_rate": 8.508859208107378e-06,
      "loss": 0.0287,
      "step": 472600
    },
    {
      "epoch": 0.77345299581705,
      "grad_norm": 0.44763684272766113,
      "learning_rate": 8.508793315893861e-06,
      "loss": 0.0419,
      "step": 472620
    },
    {
      "epoch": 0.7734857262557033,
      "grad_norm": 1.2771761417388916,
      "learning_rate": 8.508727423680345e-06,
      "loss": 0.0328,
      "step": 472640
    },
    {
      "epoch": 0.7735184566943566,
      "grad_norm": 1.5740717649459839,
      "learning_rate": 8.508661531466827e-06,
      "loss": 0.041,
      "step": 472660
    },
    {
      "epoch": 0.77355118713301,
      "grad_norm": 0.3206499218940735,
      "learning_rate": 8.50859563925331e-06,
      "loss": 0.0207,
      "step": 472680
    },
    {
      "epoch": 0.7735839175716633,
      "grad_norm": 1.099807858467102,
      "learning_rate": 8.508529747039794e-06,
      "loss": 0.0291,
      "step": 472700
    },
    {
      "epoch": 0.7736166480103166,
      "grad_norm": 1.4212393760681152,
      "learning_rate": 8.508463854826276e-06,
      "loss": 0.0224,
      "step": 472720
    },
    {
      "epoch": 0.77364937844897,
      "grad_norm": 0.5991222858428955,
      "learning_rate": 8.50839796261276e-06,
      "loss": 0.0293,
      "step": 472740
    },
    {
      "epoch": 0.7736821088876233,
      "grad_norm": 3.709103584289551,
      "learning_rate": 8.508332070399241e-06,
      "loss": 0.0379,
      "step": 472760
    },
    {
      "epoch": 0.7737148393262766,
      "grad_norm": 1.6171824932098389,
      "learning_rate": 8.508266178185725e-06,
      "loss": 0.0268,
      "step": 472780
    },
    {
      "epoch": 0.77374756976493,
      "grad_norm": 0.8051267862319946,
      "learning_rate": 8.508200285972207e-06,
      "loss": 0.0311,
      "step": 472800
    },
    {
      "epoch": 0.7737803002035833,
      "grad_norm": 0.9377464056015015,
      "learning_rate": 8.50813439375869e-06,
      "loss": 0.0361,
      "step": 472820
    },
    {
      "epoch": 0.7738130306422367,
      "grad_norm": 2.353182792663574,
      "learning_rate": 8.508068501545174e-06,
      "loss": 0.0274,
      "step": 472840
    },
    {
      "epoch": 0.77384576108089,
      "grad_norm": 1.0692564249038696,
      "learning_rate": 8.508002609331656e-06,
      "loss": 0.025,
      "step": 472860
    },
    {
      "epoch": 0.7738784915195434,
      "grad_norm": 1.4954617023468018,
      "learning_rate": 8.50793671711814e-06,
      "loss": 0.0342,
      "step": 472880
    },
    {
      "epoch": 0.7739112219581967,
      "grad_norm": 0.7315577864646912,
      "learning_rate": 8.507870824904621e-06,
      "loss": 0.0329,
      "step": 472900
    },
    {
      "epoch": 0.77394395239685,
      "grad_norm": 0.45996373891830444,
      "learning_rate": 8.507804932691105e-06,
      "loss": 0.0359,
      "step": 472920
    },
    {
      "epoch": 0.7739766828355034,
      "grad_norm": 1.9263194799423218,
      "learning_rate": 8.507739040477587e-06,
      "loss": 0.044,
      "step": 472940
    },
    {
      "epoch": 0.7740094132741567,
      "grad_norm": 1.7234231233596802,
      "learning_rate": 8.50767314826407e-06,
      "loss": 0.0209,
      "step": 472960
    },
    {
      "epoch": 0.77404214371281,
      "grad_norm": 2.2559001445770264,
      "learning_rate": 8.507607256050552e-06,
      "loss": 0.0211,
      "step": 472980
    },
    {
      "epoch": 0.7740748741514634,
      "grad_norm": 3.388608455657959,
      "learning_rate": 8.507541363837036e-06,
      "loss": 0.0232,
      "step": 473000
    },
    {
      "epoch": 0.7741076045901167,
      "grad_norm": 1.2676730155944824,
      "learning_rate": 8.507475471623518e-06,
      "loss": 0.0258,
      "step": 473020
    },
    {
      "epoch": 0.7741403350287701,
      "grad_norm": 0.9743878841400146,
      "learning_rate": 8.507409579410001e-06,
      "loss": 0.0311,
      "step": 473040
    },
    {
      "epoch": 0.7741730654674234,
      "grad_norm": 0.24855923652648926,
      "learning_rate": 8.507343687196485e-06,
      "loss": 0.0334,
      "step": 473060
    },
    {
      "epoch": 0.7742057959060767,
      "grad_norm": 0.6252776384353638,
      "learning_rate": 8.507277794982967e-06,
      "loss": 0.0279,
      "step": 473080
    },
    {
      "epoch": 0.7742385263447301,
      "grad_norm": 1.4051681756973267,
      "learning_rate": 8.50721190276945e-06,
      "loss": 0.0422,
      "step": 473100
    },
    {
      "epoch": 0.7742712567833834,
      "grad_norm": 1.471767783164978,
      "learning_rate": 8.507146010555934e-06,
      "loss": 0.0319,
      "step": 473120
    },
    {
      "epoch": 0.7743039872220367,
      "grad_norm": 0.4573329985141754,
      "learning_rate": 8.507080118342416e-06,
      "loss": 0.0331,
      "step": 473140
    },
    {
      "epoch": 0.7743367176606901,
      "grad_norm": 1.130690574645996,
      "learning_rate": 8.5070142261289e-06,
      "loss": 0.028,
      "step": 473160
    },
    {
      "epoch": 0.7743694480993434,
      "grad_norm": 1.3395267724990845,
      "learning_rate": 8.506948333915383e-06,
      "loss": 0.0295,
      "step": 473180
    },
    {
      "epoch": 0.7744021785379968,
      "grad_norm": 1.0092852115631104,
      "learning_rate": 8.506882441701865e-06,
      "loss": 0.0236,
      "step": 473200
    },
    {
      "epoch": 0.7744349089766501,
      "grad_norm": 1.1913185119628906,
      "learning_rate": 8.506816549488349e-06,
      "loss": 0.0239,
      "step": 473220
    },
    {
      "epoch": 0.7744676394153035,
      "grad_norm": 0.17387238144874573,
      "learning_rate": 8.50675065727483e-06,
      "loss": 0.0431,
      "step": 473240
    },
    {
      "epoch": 0.7745003698539568,
      "grad_norm": 0.3358156085014343,
      "learning_rate": 8.506684765061314e-06,
      "loss": 0.0446,
      "step": 473260
    },
    {
      "epoch": 0.7745331002926101,
      "grad_norm": 1.7076278924942017,
      "learning_rate": 8.506618872847796e-06,
      "loss": 0.0335,
      "step": 473280
    },
    {
      "epoch": 0.7745658307312635,
      "grad_norm": 1.3272120952606201,
      "learning_rate": 8.50655298063428e-06,
      "loss": 0.0254,
      "step": 473300
    },
    {
      "epoch": 0.7745985611699168,
      "grad_norm": 1.379944086074829,
      "learning_rate": 8.506487088420761e-06,
      "loss": 0.0317,
      "step": 473320
    },
    {
      "epoch": 0.7746312916085701,
      "grad_norm": 1.2792609930038452,
      "learning_rate": 8.506421196207245e-06,
      "loss": 0.0366,
      "step": 473340
    },
    {
      "epoch": 0.7746640220472235,
      "grad_norm": 0.33630484342575073,
      "learning_rate": 8.506355303993727e-06,
      "loss": 0.0315,
      "step": 473360
    },
    {
      "epoch": 0.7746967524858768,
      "grad_norm": 0.7688592076301575,
      "learning_rate": 8.50628941178021e-06,
      "loss": 0.0254,
      "step": 473380
    },
    {
      "epoch": 0.7747294829245301,
      "grad_norm": 1.0334105491638184,
      "learning_rate": 8.506223519566692e-06,
      "loss": 0.0389,
      "step": 473400
    },
    {
      "epoch": 0.7747622133631835,
      "grad_norm": 1.2782418727874756,
      "learning_rate": 8.506157627353176e-06,
      "loss": 0.0364,
      "step": 473420
    },
    {
      "epoch": 0.7747949438018368,
      "grad_norm": 0.8790085911750793,
      "learning_rate": 8.50609173513966e-06,
      "loss": 0.04,
      "step": 473440
    },
    {
      "epoch": 0.7748276742404901,
      "grad_norm": 0.7915922999382019,
      "learning_rate": 8.506025842926141e-06,
      "loss": 0.0284,
      "step": 473460
    },
    {
      "epoch": 0.7748604046791435,
      "grad_norm": 0.9777892827987671,
      "learning_rate": 8.505959950712625e-06,
      "loss": 0.0312,
      "step": 473480
    },
    {
      "epoch": 0.7748931351177969,
      "grad_norm": 1.9675408601760864,
      "learning_rate": 8.505894058499109e-06,
      "loss": 0.0287,
      "step": 473500
    },
    {
      "epoch": 0.7749258655564502,
      "grad_norm": 0.7982999086380005,
      "learning_rate": 8.50582816628559e-06,
      "loss": 0.025,
      "step": 473520
    },
    {
      "epoch": 0.7749585959951035,
      "grad_norm": 1.714365839958191,
      "learning_rate": 8.505762274072074e-06,
      "loss": 0.0377,
      "step": 473540
    },
    {
      "epoch": 0.7749913264337569,
      "grad_norm": 1.3013635873794556,
      "learning_rate": 8.505696381858558e-06,
      "loss": 0.0284,
      "step": 473560
    },
    {
      "epoch": 0.7750240568724102,
      "grad_norm": 3.2341363430023193,
      "learning_rate": 8.50563048964504e-06,
      "loss": 0.0378,
      "step": 473580
    },
    {
      "epoch": 0.7750567873110635,
      "grad_norm": 0.46609729528427124,
      "learning_rate": 8.505564597431523e-06,
      "loss": 0.0252,
      "step": 473600
    },
    {
      "epoch": 0.7750895177497169,
      "grad_norm": 0.7804872989654541,
      "learning_rate": 8.505498705218005e-06,
      "loss": 0.0486,
      "step": 473620
    },
    {
      "epoch": 0.7751222481883702,
      "grad_norm": 1.6623226404190063,
      "learning_rate": 8.505432813004489e-06,
      "loss": 0.0367,
      "step": 473640
    },
    {
      "epoch": 0.7751549786270235,
      "grad_norm": 0.8248335123062134,
      "learning_rate": 8.50536692079097e-06,
      "loss": 0.0312,
      "step": 473660
    },
    {
      "epoch": 0.7751877090656769,
      "grad_norm": 3.8874778747558594,
      "learning_rate": 8.505301028577454e-06,
      "loss": 0.0414,
      "step": 473680
    },
    {
      "epoch": 0.7752204395043303,
      "grad_norm": 0.5656797289848328,
      "learning_rate": 8.505235136363936e-06,
      "loss": 0.025,
      "step": 473700
    },
    {
      "epoch": 0.7752531699429835,
      "grad_norm": 0.8623604774475098,
      "learning_rate": 8.50516924415042e-06,
      "loss": 0.0317,
      "step": 473720
    },
    {
      "epoch": 0.7752859003816369,
      "grad_norm": 0.954815149307251,
      "learning_rate": 8.505103351936902e-06,
      "loss": 0.0277,
      "step": 473740
    },
    {
      "epoch": 0.7753186308202903,
      "grad_norm": 3.769941568374634,
      "learning_rate": 8.505037459723385e-06,
      "loss": 0.0231,
      "step": 473760
    },
    {
      "epoch": 0.7753513612589435,
      "grad_norm": 1.0405572652816772,
      "learning_rate": 8.504971567509867e-06,
      "loss": 0.0302,
      "step": 473780
    },
    {
      "epoch": 0.7753840916975969,
      "grad_norm": 1.4712210893630981,
      "learning_rate": 8.50490567529635e-06,
      "loss": 0.0304,
      "step": 473800
    },
    {
      "epoch": 0.7754168221362503,
      "grad_norm": 0.30161052942276,
      "learning_rate": 8.504839783082832e-06,
      "loss": 0.0306,
      "step": 473820
    },
    {
      "epoch": 0.7754495525749036,
      "grad_norm": 1.9286704063415527,
      "learning_rate": 8.504773890869316e-06,
      "loss": 0.0341,
      "step": 473840
    },
    {
      "epoch": 0.7754822830135569,
      "grad_norm": 0.6836056709289551,
      "learning_rate": 8.5047079986558e-06,
      "loss": 0.0431,
      "step": 473860
    },
    {
      "epoch": 0.7755150134522103,
      "grad_norm": 0.6154012680053711,
      "learning_rate": 8.504642106442282e-06,
      "loss": 0.023,
      "step": 473880
    },
    {
      "epoch": 0.7755477438908637,
      "grad_norm": 0.15214863419532776,
      "learning_rate": 8.504576214228765e-06,
      "loss": 0.027,
      "step": 473900
    },
    {
      "epoch": 0.7755804743295169,
      "grad_norm": 1.6613882780075073,
      "learning_rate": 8.504510322015249e-06,
      "loss": 0.0293,
      "step": 473920
    },
    {
      "epoch": 0.7756132047681703,
      "grad_norm": 0.7248067855834961,
      "learning_rate": 8.50444442980173e-06,
      "loss": 0.0336,
      "step": 473940
    },
    {
      "epoch": 0.7756459352068237,
      "grad_norm": 0.5842736959457397,
      "learning_rate": 8.504378537588214e-06,
      "loss": 0.0385,
      "step": 473960
    },
    {
      "epoch": 0.7756786656454769,
      "grad_norm": 0.7910052537918091,
      "learning_rate": 8.504312645374698e-06,
      "loss": 0.0312,
      "step": 473980
    },
    {
      "epoch": 0.7757113960841303,
      "grad_norm": 0.4790372848510742,
      "learning_rate": 8.50424675316118e-06,
      "loss": 0.0363,
      "step": 474000
    },
    {
      "epoch": 0.7757441265227837,
      "grad_norm": 0.7127048373222351,
      "learning_rate": 8.504180860947663e-06,
      "loss": 0.037,
      "step": 474020
    },
    {
      "epoch": 0.775776856961437,
      "grad_norm": 0.3607081174850464,
      "learning_rate": 8.504114968734145e-06,
      "loss": 0.0319,
      "step": 474040
    },
    {
      "epoch": 0.7758095874000903,
      "grad_norm": 1.889301061630249,
      "learning_rate": 8.504049076520629e-06,
      "loss": 0.0317,
      "step": 474060
    },
    {
      "epoch": 0.7758423178387437,
      "grad_norm": 0.7076732516288757,
      "learning_rate": 8.50398318430711e-06,
      "loss": 0.0349,
      "step": 474080
    },
    {
      "epoch": 0.7758750482773971,
      "grad_norm": 0.801413357257843,
      "learning_rate": 8.503917292093594e-06,
      "loss": 0.0359,
      "step": 474100
    },
    {
      "epoch": 0.7759077787160503,
      "grad_norm": 0.49957075715065,
      "learning_rate": 8.503851399880076e-06,
      "loss": 0.0337,
      "step": 474120
    },
    {
      "epoch": 0.7759405091547037,
      "grad_norm": 0.3050301671028137,
      "learning_rate": 8.50378550766656e-06,
      "loss": 0.034,
      "step": 474140
    },
    {
      "epoch": 0.7759732395933571,
      "grad_norm": 0.7222556471824646,
      "learning_rate": 8.503719615453042e-06,
      "loss": 0.021,
      "step": 474160
    },
    {
      "epoch": 0.7760059700320103,
      "grad_norm": 0.8757607340812683,
      "learning_rate": 8.503653723239525e-06,
      "loss": 0.0296,
      "step": 474180
    },
    {
      "epoch": 0.7760387004706637,
      "grad_norm": 0.6591227054595947,
      "learning_rate": 8.503587831026007e-06,
      "loss": 0.0313,
      "step": 474200
    },
    {
      "epoch": 0.7760714309093171,
      "grad_norm": 0.6521555781364441,
      "learning_rate": 8.50352193881249e-06,
      "loss": 0.0385,
      "step": 474220
    },
    {
      "epoch": 0.7761041613479703,
      "grad_norm": 1.1577813625335693,
      "learning_rate": 8.503456046598974e-06,
      "loss": 0.0278,
      "step": 474240
    },
    {
      "epoch": 0.7761368917866237,
      "grad_norm": 1.7497167587280273,
      "learning_rate": 8.503390154385456e-06,
      "loss": 0.0383,
      "step": 474260
    },
    {
      "epoch": 0.7761696222252771,
      "grad_norm": 0.6119627356529236,
      "learning_rate": 8.50332426217194e-06,
      "loss": 0.0229,
      "step": 474280
    },
    {
      "epoch": 0.7762023526639305,
      "grad_norm": 0.9760638475418091,
      "learning_rate": 8.503258369958423e-06,
      "loss": 0.0314,
      "step": 474300
    },
    {
      "epoch": 0.7762350831025837,
      "grad_norm": 1.1666604280471802,
      "learning_rate": 8.503192477744905e-06,
      "loss": 0.0297,
      "step": 474320
    },
    {
      "epoch": 0.7762678135412371,
      "grad_norm": 0.2446104735136032,
      "learning_rate": 8.503126585531389e-06,
      "loss": 0.0326,
      "step": 474340
    },
    {
      "epoch": 0.7763005439798905,
      "grad_norm": 1.4189881086349487,
      "learning_rate": 8.503060693317872e-06,
      "loss": 0.0431,
      "step": 474360
    },
    {
      "epoch": 0.7763332744185437,
      "grad_norm": 0.8831128478050232,
      "learning_rate": 8.502994801104354e-06,
      "loss": 0.0235,
      "step": 474380
    },
    {
      "epoch": 0.7763660048571971,
      "grad_norm": 0.828086793422699,
      "learning_rate": 8.502928908890838e-06,
      "loss": 0.0201,
      "step": 474400
    },
    {
      "epoch": 0.7763987352958505,
      "grad_norm": 0.4872289299964905,
      "learning_rate": 8.50286301667732e-06,
      "loss": 0.0323,
      "step": 474420
    },
    {
      "epoch": 0.7764314657345037,
      "grad_norm": 1.0971158742904663,
      "learning_rate": 8.502797124463803e-06,
      "loss": 0.0325,
      "step": 474440
    },
    {
      "epoch": 0.7764641961731571,
      "grad_norm": 1.6761505603790283,
      "learning_rate": 8.502731232250285e-06,
      "loss": 0.0222,
      "step": 474460
    },
    {
      "epoch": 0.7764969266118105,
      "grad_norm": 0.7075915932655334,
      "learning_rate": 8.502665340036769e-06,
      "loss": 0.0278,
      "step": 474480
    },
    {
      "epoch": 0.7765296570504638,
      "grad_norm": 0.5192782282829285,
      "learning_rate": 8.50259944782325e-06,
      "loss": 0.0342,
      "step": 474500
    },
    {
      "epoch": 0.7765623874891171,
      "grad_norm": 0.6940311193466187,
      "learning_rate": 8.502533555609734e-06,
      "loss": 0.0284,
      "step": 474520
    },
    {
      "epoch": 0.7765951179277705,
      "grad_norm": 0.7789899110794067,
      "learning_rate": 8.502467663396216e-06,
      "loss": 0.0244,
      "step": 474540
    },
    {
      "epoch": 0.7766278483664238,
      "grad_norm": 0.3780817985534668,
      "learning_rate": 8.5024017711827e-06,
      "loss": 0.0281,
      "step": 474560
    },
    {
      "epoch": 0.7766605788050771,
      "grad_norm": 0.5193488001823425,
      "learning_rate": 8.502335878969183e-06,
      "loss": 0.0239,
      "step": 474580
    },
    {
      "epoch": 0.7766933092437305,
      "grad_norm": 1.473368763923645,
      "learning_rate": 8.502269986755665e-06,
      "loss": 0.0285,
      "step": 474600
    },
    {
      "epoch": 0.7767260396823839,
      "grad_norm": 1.525169014930725,
      "learning_rate": 8.502204094542149e-06,
      "loss": 0.0368,
      "step": 474620
    },
    {
      "epoch": 0.7767587701210371,
      "grad_norm": 6.756445407867432,
      "learning_rate": 8.50213820232863e-06,
      "loss": 0.0269,
      "step": 474640
    },
    {
      "epoch": 0.7767915005596905,
      "grad_norm": 0.4905151128768921,
      "learning_rate": 8.502072310115114e-06,
      "loss": 0.0266,
      "step": 474660
    },
    {
      "epoch": 0.7768242309983439,
      "grad_norm": 1.5978633165359497,
      "learning_rate": 8.502006417901598e-06,
      "loss": 0.0325,
      "step": 474680
    },
    {
      "epoch": 0.7768569614369972,
      "grad_norm": 1.118996262550354,
      "learning_rate": 8.50194052568808e-06,
      "loss": 0.0369,
      "step": 474700
    },
    {
      "epoch": 0.7768896918756505,
      "grad_norm": 1.2177802324295044,
      "learning_rate": 8.501874633474563e-06,
      "loss": 0.0278,
      "step": 474720
    },
    {
      "epoch": 0.7769224223143039,
      "grad_norm": 13.15568733215332,
      "learning_rate": 8.501808741261047e-06,
      "loss": 0.0275,
      "step": 474740
    },
    {
      "epoch": 0.7769551527529572,
      "grad_norm": 0.2889021039009094,
      "learning_rate": 8.501742849047529e-06,
      "loss": 0.0282,
      "step": 474760
    },
    {
      "epoch": 0.7769878831916105,
      "grad_norm": 0.9901690483093262,
      "learning_rate": 8.501676956834012e-06,
      "loss": 0.0276,
      "step": 474780
    },
    {
      "epoch": 0.7770206136302639,
      "grad_norm": 0.3021135628223419,
      "learning_rate": 8.501611064620494e-06,
      "loss": 0.0257,
      "step": 474800
    },
    {
      "epoch": 0.7770533440689172,
      "grad_norm": 3.502337694168091,
      "learning_rate": 8.501545172406978e-06,
      "loss": 0.0227,
      "step": 474820
    },
    {
      "epoch": 0.7770860745075705,
      "grad_norm": 1.8855934143066406,
      "learning_rate": 8.50147928019346e-06,
      "loss": 0.0358,
      "step": 474840
    },
    {
      "epoch": 0.7771188049462239,
      "grad_norm": 1.951001763343811,
      "learning_rate": 8.501413387979943e-06,
      "loss": 0.0371,
      "step": 474860
    },
    {
      "epoch": 0.7771515353848772,
      "grad_norm": 0.9783212542533875,
      "learning_rate": 8.501347495766425e-06,
      "loss": 0.034,
      "step": 474880
    },
    {
      "epoch": 0.7771842658235306,
      "grad_norm": 1.0492091178894043,
      "learning_rate": 8.501281603552909e-06,
      "loss": 0.0369,
      "step": 474900
    },
    {
      "epoch": 0.7772169962621839,
      "grad_norm": 1.578035831451416,
      "learning_rate": 8.501215711339392e-06,
      "loss": 0.0238,
      "step": 474920
    },
    {
      "epoch": 0.7772497267008373,
      "grad_norm": 0.7328863143920898,
      "learning_rate": 8.501149819125874e-06,
      "loss": 0.0308,
      "step": 474940
    },
    {
      "epoch": 0.7772824571394906,
      "grad_norm": 0.684658944606781,
      "learning_rate": 8.501083926912358e-06,
      "loss": 0.0242,
      "step": 474960
    },
    {
      "epoch": 0.7773151875781439,
      "grad_norm": 0.9895879626274109,
      "learning_rate": 8.50101803469884e-06,
      "loss": 0.0433,
      "step": 474980
    },
    {
      "epoch": 0.7773479180167973,
      "grad_norm": 2.5213863849639893,
      "learning_rate": 8.500952142485323e-06,
      "loss": 0.0344,
      "step": 475000
    },
    {
      "epoch": 0.7773806484554506,
      "grad_norm": 1.8629547357559204,
      "learning_rate": 8.500886250271805e-06,
      "loss": 0.0393,
      "step": 475020
    },
    {
      "epoch": 0.7774133788941039,
      "grad_norm": 0.4921356439590454,
      "learning_rate": 8.500820358058289e-06,
      "loss": 0.0364,
      "step": 475040
    },
    {
      "epoch": 0.7774461093327573,
      "grad_norm": 0.6697034239768982,
      "learning_rate": 8.50075446584477e-06,
      "loss": 0.0397,
      "step": 475060
    },
    {
      "epoch": 0.7774788397714106,
      "grad_norm": 1.5241442918777466,
      "learning_rate": 8.500688573631254e-06,
      "loss": 0.0378,
      "step": 475080
    },
    {
      "epoch": 0.777511570210064,
      "grad_norm": 0.14675851166248322,
      "learning_rate": 8.500622681417738e-06,
      "loss": 0.0177,
      "step": 475100
    },
    {
      "epoch": 0.7775443006487173,
      "grad_norm": 1.2460185289382935,
      "learning_rate": 8.50055678920422e-06,
      "loss": 0.0311,
      "step": 475120
    },
    {
      "epoch": 0.7775770310873706,
      "grad_norm": 1.2011162042617798,
      "learning_rate": 8.500490896990703e-06,
      "loss": 0.0216,
      "step": 475140
    },
    {
      "epoch": 0.777609761526024,
      "grad_norm": 1.16752290725708,
      "learning_rate": 8.500425004777187e-06,
      "loss": 0.029,
      "step": 475160
    },
    {
      "epoch": 0.7776424919646773,
      "grad_norm": 0.45763030648231506,
      "learning_rate": 8.500359112563669e-06,
      "loss": 0.0324,
      "step": 475180
    },
    {
      "epoch": 0.7776752224033306,
      "grad_norm": 0.20586146414279938,
      "learning_rate": 8.500293220350153e-06,
      "loss": 0.0296,
      "step": 475200
    },
    {
      "epoch": 0.777707952841984,
      "grad_norm": 0.6985728740692139,
      "learning_rate": 8.500227328136634e-06,
      "loss": 0.0329,
      "step": 475220
    },
    {
      "epoch": 0.7777406832806373,
      "grad_norm": 1.0294294357299805,
      "learning_rate": 8.500161435923118e-06,
      "loss": 0.0279,
      "step": 475240
    },
    {
      "epoch": 0.7777734137192907,
      "grad_norm": 0.8413320779800415,
      "learning_rate": 8.5000955437096e-06,
      "loss": 0.0333,
      "step": 475260
    },
    {
      "epoch": 0.777806144157944,
      "grad_norm": 1.0944669246673584,
      "learning_rate": 8.500029651496084e-06,
      "loss": 0.0333,
      "step": 475280
    },
    {
      "epoch": 0.7778388745965974,
      "grad_norm": 2.8304691314697266,
      "learning_rate": 8.499963759282567e-06,
      "loss": 0.0314,
      "step": 475300
    },
    {
      "epoch": 0.7778716050352507,
      "grad_norm": 1.3142913579940796,
      "learning_rate": 8.499897867069049e-06,
      "loss": 0.0309,
      "step": 475320
    },
    {
      "epoch": 0.777904335473904,
      "grad_norm": 0.9427985548973083,
      "learning_rate": 8.499831974855533e-06,
      "loss": 0.0455,
      "step": 475340
    },
    {
      "epoch": 0.7779370659125574,
      "grad_norm": 0.28465795516967773,
      "learning_rate": 8.499766082642014e-06,
      "loss": 0.0252,
      "step": 475360
    },
    {
      "epoch": 0.7779697963512107,
      "grad_norm": 1.082094430923462,
      "learning_rate": 8.499700190428498e-06,
      "loss": 0.0352,
      "step": 475380
    },
    {
      "epoch": 0.778002526789864,
      "grad_norm": 1.357835292816162,
      "learning_rate": 8.49963429821498e-06,
      "loss": 0.0258,
      "step": 475400
    },
    {
      "epoch": 0.7780352572285174,
      "grad_norm": 1.0596998929977417,
      "learning_rate": 8.499568406001464e-06,
      "loss": 0.0282,
      "step": 475420
    },
    {
      "epoch": 0.7780679876671707,
      "grad_norm": 0.6941677927970886,
      "learning_rate": 8.499502513787945e-06,
      "loss": 0.027,
      "step": 475440
    },
    {
      "epoch": 0.778100718105824,
      "grad_norm": 0.544813871383667,
      "learning_rate": 8.499436621574429e-06,
      "loss": 0.0372,
      "step": 475460
    },
    {
      "epoch": 0.7781334485444774,
      "grad_norm": 1.3725303411483765,
      "learning_rate": 8.499370729360913e-06,
      "loss": 0.0286,
      "step": 475480
    },
    {
      "epoch": 0.7781661789831308,
      "grad_norm": 1.3626937866210938,
      "learning_rate": 8.499304837147394e-06,
      "loss": 0.0302,
      "step": 475500
    },
    {
      "epoch": 0.778198909421784,
      "grad_norm": 3.366180658340454,
      "learning_rate": 8.499238944933878e-06,
      "loss": 0.04,
      "step": 475520
    },
    {
      "epoch": 0.7782316398604374,
      "grad_norm": 0.6238844394683838,
      "learning_rate": 8.499173052720362e-06,
      "loss": 0.0317,
      "step": 475540
    },
    {
      "epoch": 0.7782643702990908,
      "grad_norm": 1.152086853981018,
      "learning_rate": 8.499107160506844e-06,
      "loss": 0.0207,
      "step": 475560
    },
    {
      "epoch": 0.778297100737744,
      "grad_norm": 1.7468843460083008,
      "learning_rate": 8.499041268293327e-06,
      "loss": 0.027,
      "step": 475580
    },
    {
      "epoch": 0.7783298311763974,
      "grad_norm": 0.9829726815223694,
      "learning_rate": 8.498975376079809e-06,
      "loss": 0.0382,
      "step": 475600
    },
    {
      "epoch": 0.7783625616150508,
      "grad_norm": 2.0448875427246094,
      "learning_rate": 8.498909483866293e-06,
      "loss": 0.0333,
      "step": 475620
    },
    {
      "epoch": 0.7783952920537041,
      "grad_norm": 1.8674687147140503,
      "learning_rate": 8.498843591652776e-06,
      "loss": 0.0333,
      "step": 475640
    },
    {
      "epoch": 0.7784280224923574,
      "grad_norm": 0.2600192129611969,
      "learning_rate": 8.498777699439258e-06,
      "loss": 0.0421,
      "step": 475660
    },
    {
      "epoch": 0.7784607529310108,
      "grad_norm": 0.745269775390625,
      "learning_rate": 8.498711807225742e-06,
      "loss": 0.0352,
      "step": 475680
    },
    {
      "epoch": 0.7784934833696642,
      "grad_norm": 4.183197975158691,
      "learning_rate": 8.498645915012224e-06,
      "loss": 0.0239,
      "step": 475700
    },
    {
      "epoch": 0.7785262138083174,
      "grad_norm": 1.0300288200378418,
      "learning_rate": 8.498580022798707e-06,
      "loss": 0.0273,
      "step": 475720
    },
    {
      "epoch": 0.7785589442469708,
      "grad_norm": 0.3737702965736389,
      "learning_rate": 8.498514130585189e-06,
      "loss": 0.0296,
      "step": 475740
    },
    {
      "epoch": 0.7785916746856242,
      "grad_norm": 2.1930856704711914,
      "learning_rate": 8.498448238371673e-06,
      "loss": 0.021,
      "step": 475760
    },
    {
      "epoch": 0.7786244051242774,
      "grad_norm": 1.3707693815231323,
      "learning_rate": 8.498382346158155e-06,
      "loss": 0.0293,
      "step": 475780
    },
    {
      "epoch": 0.7786571355629308,
      "grad_norm": 0.2795808017253876,
      "learning_rate": 8.498316453944638e-06,
      "loss": 0.0202,
      "step": 475800
    },
    {
      "epoch": 0.7786898660015842,
      "grad_norm": 1.4836585521697998,
      "learning_rate": 8.49825056173112e-06,
      "loss": 0.0286,
      "step": 475820
    },
    {
      "epoch": 0.7787225964402374,
      "grad_norm": 1.5470998287200928,
      "learning_rate": 8.498184669517604e-06,
      "loss": 0.0283,
      "step": 475840
    },
    {
      "epoch": 0.7787553268788908,
      "grad_norm": 0.38913053274154663,
      "learning_rate": 8.498118777304085e-06,
      "loss": 0.0329,
      "step": 475860
    },
    {
      "epoch": 0.7787880573175442,
      "grad_norm": 3.4118287563323975,
      "learning_rate": 8.498052885090569e-06,
      "loss": 0.0366,
      "step": 475880
    },
    {
      "epoch": 0.7788207877561976,
      "grad_norm": 0.19015128910541534,
      "learning_rate": 8.497986992877053e-06,
      "loss": 0.0241,
      "step": 475900
    },
    {
      "epoch": 0.7788535181948508,
      "grad_norm": 0.39678773283958435,
      "learning_rate": 8.497921100663535e-06,
      "loss": 0.0394,
      "step": 475920
    },
    {
      "epoch": 0.7788862486335042,
      "grad_norm": 0.10233144462108612,
      "learning_rate": 8.497855208450018e-06,
      "loss": 0.042,
      "step": 475940
    },
    {
      "epoch": 0.7789189790721576,
      "grad_norm": 1.095123052597046,
      "learning_rate": 8.497789316236502e-06,
      "loss": 0.0365,
      "step": 475960
    },
    {
      "epoch": 0.7789517095108108,
      "grad_norm": 2.7113654613494873,
      "learning_rate": 8.497723424022984e-06,
      "loss": 0.0391,
      "step": 475980
    },
    {
      "epoch": 0.7789844399494642,
      "grad_norm": 1.6996362209320068,
      "learning_rate": 8.497657531809467e-06,
      "loss": 0.0344,
      "step": 476000
    },
    {
      "epoch": 0.7790171703881176,
      "grad_norm": 0.5658930540084839,
      "learning_rate": 8.49759163959595e-06,
      "loss": 0.0342,
      "step": 476020
    },
    {
      "epoch": 0.7790499008267708,
      "grad_norm": 0.22790473699569702,
      "learning_rate": 8.497525747382433e-06,
      "loss": 0.0212,
      "step": 476040
    },
    {
      "epoch": 0.7790826312654242,
      "grad_norm": 2.4362337589263916,
      "learning_rate": 8.497459855168916e-06,
      "loss": 0.0385,
      "step": 476060
    },
    {
      "epoch": 0.7791153617040776,
      "grad_norm": 1.9919090270996094,
      "learning_rate": 8.497393962955398e-06,
      "loss": 0.041,
      "step": 476080
    },
    {
      "epoch": 0.7791480921427308,
      "grad_norm": 0.6599509716033936,
      "learning_rate": 8.497328070741882e-06,
      "loss": 0.0413,
      "step": 476100
    },
    {
      "epoch": 0.7791808225813842,
      "grad_norm": 0.5175321102142334,
      "learning_rate": 8.497262178528364e-06,
      "loss": 0.0353,
      "step": 476120
    },
    {
      "epoch": 0.7792135530200376,
      "grad_norm": 2.1929495334625244,
      "learning_rate": 8.497196286314847e-06,
      "loss": 0.0317,
      "step": 476140
    },
    {
      "epoch": 0.779246283458691,
      "grad_norm": 1.3667192459106445,
      "learning_rate": 8.497130394101329e-06,
      "loss": 0.0322,
      "step": 476160
    },
    {
      "epoch": 0.7792790138973442,
      "grad_norm": 0.47586560249328613,
      "learning_rate": 8.497064501887813e-06,
      "loss": 0.025,
      "step": 476180
    },
    {
      "epoch": 0.7793117443359976,
      "grad_norm": 0.45984604954719543,
      "learning_rate": 8.496998609674295e-06,
      "loss": 0.0317,
      "step": 476200
    },
    {
      "epoch": 0.779344474774651,
      "grad_norm": 1.879482388496399,
      "learning_rate": 8.496932717460778e-06,
      "loss": 0.0219,
      "step": 476220
    },
    {
      "epoch": 0.7793772052133042,
      "grad_norm": 0.5553001165390015,
      "learning_rate": 8.49686682524726e-06,
      "loss": 0.0248,
      "step": 476240
    },
    {
      "epoch": 0.7794099356519576,
      "grad_norm": 3.678088426589966,
      "learning_rate": 8.496800933033744e-06,
      "loss": 0.0305,
      "step": 476260
    },
    {
      "epoch": 0.779442666090611,
      "grad_norm": 0.6967065334320068,
      "learning_rate": 8.496735040820227e-06,
      "loss": 0.0245,
      "step": 476280
    },
    {
      "epoch": 0.7794753965292642,
      "grad_norm": 0.384992390871048,
      "learning_rate": 8.49666914860671e-06,
      "loss": 0.0301,
      "step": 476300
    },
    {
      "epoch": 0.7795081269679176,
      "grad_norm": 0.3411289155483246,
      "learning_rate": 8.496603256393193e-06,
      "loss": 0.023,
      "step": 476320
    },
    {
      "epoch": 0.779540857406571,
      "grad_norm": 1.9937200546264648,
      "learning_rate": 8.496537364179676e-06,
      "loss": 0.027,
      "step": 476340
    },
    {
      "epoch": 0.7795735878452243,
      "grad_norm": 1.9047093391418457,
      "learning_rate": 8.496471471966158e-06,
      "loss": 0.0345,
      "step": 476360
    },
    {
      "epoch": 0.7796063182838776,
      "grad_norm": 0.2758304476737976,
      "learning_rate": 8.496405579752642e-06,
      "loss": 0.0317,
      "step": 476380
    },
    {
      "epoch": 0.779639048722531,
      "grad_norm": 1.069579839706421,
      "learning_rate": 8.496339687539125e-06,
      "loss": 0.0318,
      "step": 476400
    },
    {
      "epoch": 0.7796717791611844,
      "grad_norm": 1.1136589050292969,
      "learning_rate": 8.496273795325607e-06,
      "loss": 0.0296,
      "step": 476420
    },
    {
      "epoch": 0.7797045095998376,
      "grad_norm": 0.9743197560310364,
      "learning_rate": 8.496207903112091e-06,
      "loss": 0.0286,
      "step": 476440
    },
    {
      "epoch": 0.779737240038491,
      "grad_norm": 0.4554678797721863,
      "learning_rate": 8.496142010898573e-06,
      "loss": 0.027,
      "step": 476460
    },
    {
      "epoch": 0.7797699704771444,
      "grad_norm": 0.847005307674408,
      "learning_rate": 8.496076118685056e-06,
      "loss": 0.0246,
      "step": 476480
    },
    {
      "epoch": 0.7798027009157976,
      "grad_norm": 1.407157301902771,
      "learning_rate": 8.496010226471538e-06,
      "loss": 0.0295,
      "step": 476500
    },
    {
      "epoch": 0.779835431354451,
      "grad_norm": 1.5249102115631104,
      "learning_rate": 8.495944334258022e-06,
      "loss": 0.0322,
      "step": 476520
    },
    {
      "epoch": 0.7798681617931044,
      "grad_norm": 0.984569251537323,
      "learning_rate": 8.495878442044504e-06,
      "loss": 0.0327,
      "step": 476540
    },
    {
      "epoch": 0.7799008922317577,
      "grad_norm": 1.6028368473052979,
      "learning_rate": 8.495812549830987e-06,
      "loss": 0.0404,
      "step": 476560
    },
    {
      "epoch": 0.779933622670411,
      "grad_norm": 1.5649205446243286,
      "learning_rate": 8.49574665761747e-06,
      "loss": 0.0324,
      "step": 476580
    },
    {
      "epoch": 0.7799663531090644,
      "grad_norm": 0.4330536127090454,
      "learning_rate": 8.495680765403953e-06,
      "loss": 0.0359,
      "step": 476600
    },
    {
      "epoch": 0.7799990835477177,
      "grad_norm": 0.6986478567123413,
      "learning_rate": 8.495614873190435e-06,
      "loss": 0.0319,
      "step": 476620
    },
    {
      "epoch": 0.780031813986371,
      "grad_norm": 0.7484341263771057,
      "learning_rate": 8.495548980976918e-06,
      "loss": 0.0257,
      "step": 476640
    },
    {
      "epoch": 0.7800645444250244,
      "grad_norm": 1.347325086593628,
      "learning_rate": 8.4954830887634e-06,
      "loss": 0.0303,
      "step": 476660
    },
    {
      "epoch": 0.7800972748636777,
      "grad_norm": 2.7042667865753174,
      "learning_rate": 8.495417196549884e-06,
      "loss": 0.0347,
      "step": 476680
    },
    {
      "epoch": 0.780130005302331,
      "grad_norm": 0.45079758763313293,
      "learning_rate": 8.495351304336367e-06,
      "loss": 0.0248,
      "step": 476700
    },
    {
      "epoch": 0.7801627357409844,
      "grad_norm": 1.3765183687210083,
      "learning_rate": 8.49528541212285e-06,
      "loss": 0.0264,
      "step": 476720
    },
    {
      "epoch": 0.7801954661796378,
      "grad_norm": 1.0257030725479126,
      "learning_rate": 8.495219519909333e-06,
      "loss": 0.0512,
      "step": 476740
    },
    {
      "epoch": 0.7802281966182911,
      "grad_norm": 0.514845073223114,
      "learning_rate": 8.495153627695816e-06,
      "loss": 0.0323,
      "step": 476760
    },
    {
      "epoch": 0.7802609270569444,
      "grad_norm": 0.48992782831192017,
      "learning_rate": 8.495087735482298e-06,
      "loss": 0.0237,
      "step": 476780
    },
    {
      "epoch": 0.7802936574955978,
      "grad_norm": 1.64373779296875,
      "learning_rate": 8.495021843268782e-06,
      "loss": 0.0402,
      "step": 476800
    },
    {
      "epoch": 0.7803263879342511,
      "grad_norm": 0.7125220894813538,
      "learning_rate": 8.494955951055265e-06,
      "loss": 0.0345,
      "step": 476820
    },
    {
      "epoch": 0.7803591183729044,
      "grad_norm": 1.1361207962036133,
      "learning_rate": 8.494890058841747e-06,
      "loss": 0.0317,
      "step": 476840
    },
    {
      "epoch": 0.7803918488115578,
      "grad_norm": 0.3773166835308075,
      "learning_rate": 8.494824166628231e-06,
      "loss": 0.0211,
      "step": 476860
    },
    {
      "epoch": 0.7804245792502111,
      "grad_norm": 2.081341505050659,
      "learning_rate": 8.494758274414713e-06,
      "loss": 0.0308,
      "step": 476880
    },
    {
      "epoch": 0.7804573096888644,
      "grad_norm": 0.431837260723114,
      "learning_rate": 8.494692382201196e-06,
      "loss": 0.0296,
      "step": 476900
    },
    {
      "epoch": 0.7804900401275178,
      "grad_norm": 1.287015676498413,
      "learning_rate": 8.494626489987678e-06,
      "loss": 0.0221,
      "step": 476920
    },
    {
      "epoch": 0.7805227705661711,
      "grad_norm": 0.6370741724967957,
      "learning_rate": 8.494560597774162e-06,
      "loss": 0.0399,
      "step": 476940
    },
    {
      "epoch": 0.7805555010048245,
      "grad_norm": 0.263577938079834,
      "learning_rate": 8.494494705560644e-06,
      "loss": 0.03,
      "step": 476960
    },
    {
      "epoch": 0.7805882314434778,
      "grad_norm": 0.9654865860939026,
      "learning_rate": 8.494428813347127e-06,
      "loss": 0.0201,
      "step": 476980
    },
    {
      "epoch": 0.7806209618821311,
      "grad_norm": 1.0961458683013916,
      "learning_rate": 8.49436292113361e-06,
      "loss": 0.0304,
      "step": 477000
    },
    {
      "epoch": 0.7806536923207845,
      "grad_norm": 0.6540823578834534,
      "learning_rate": 8.494297028920093e-06,
      "loss": 0.0362,
      "step": 477020
    },
    {
      "epoch": 0.7806864227594378,
      "grad_norm": 1.8568463325500488,
      "learning_rate": 8.494231136706576e-06,
      "loss": 0.0321,
      "step": 477040
    },
    {
      "epoch": 0.7807191531980912,
      "grad_norm": 1.1823797225952148,
      "learning_rate": 8.494165244493058e-06,
      "loss": 0.0238,
      "step": 477060
    },
    {
      "epoch": 0.7807518836367445,
      "grad_norm": 1.6640444993972778,
      "learning_rate": 8.494099352279542e-06,
      "loss": 0.0375,
      "step": 477080
    },
    {
      "epoch": 0.7807846140753978,
      "grad_norm": 0.4052181839942932,
      "learning_rate": 8.494033460066024e-06,
      "loss": 0.0291,
      "step": 477100
    },
    {
      "epoch": 0.7808173445140512,
      "grad_norm": 0.9521154165267944,
      "learning_rate": 8.493967567852507e-06,
      "loss": 0.0374,
      "step": 477120
    },
    {
      "epoch": 0.7808500749527045,
      "grad_norm": 2.7940125465393066,
      "learning_rate": 8.493901675638991e-06,
      "loss": 0.0202,
      "step": 477140
    },
    {
      "epoch": 0.7808828053913579,
      "grad_norm": 1.13158118724823,
      "learning_rate": 8.493835783425473e-06,
      "loss": 0.027,
      "step": 477160
    },
    {
      "epoch": 0.7809155358300112,
      "grad_norm": 1.5512887239456177,
      "learning_rate": 8.493769891211956e-06,
      "loss": 0.032,
      "step": 477180
    },
    {
      "epoch": 0.7809482662686645,
      "grad_norm": 0.5181249380111694,
      "learning_rate": 8.49370399899844e-06,
      "loss": 0.0289,
      "step": 477200
    },
    {
      "epoch": 0.7809809967073179,
      "grad_norm": 2.4146194458007812,
      "learning_rate": 8.493638106784922e-06,
      "loss": 0.0385,
      "step": 477220
    },
    {
      "epoch": 0.7810137271459712,
      "grad_norm": 0.9812431335449219,
      "learning_rate": 8.493572214571406e-06,
      "loss": 0.0272,
      "step": 477240
    },
    {
      "epoch": 0.7810464575846245,
      "grad_norm": 0.7858394384384155,
      "learning_rate": 8.493506322357887e-06,
      "loss": 0.0271,
      "step": 477260
    },
    {
      "epoch": 0.7810791880232779,
      "grad_norm": 0.5867286324501038,
      "learning_rate": 8.493440430144371e-06,
      "loss": 0.0364,
      "step": 477280
    },
    {
      "epoch": 0.7811119184619312,
      "grad_norm": 0.17547547817230225,
      "learning_rate": 8.493374537930853e-06,
      "loss": 0.0223,
      "step": 477300
    },
    {
      "epoch": 0.7811446489005845,
      "grad_norm": 0.408306747674942,
      "learning_rate": 8.493308645717337e-06,
      "loss": 0.026,
      "step": 477320
    },
    {
      "epoch": 0.7811773793392379,
      "grad_norm": 0.6413980722427368,
      "learning_rate": 8.493242753503818e-06,
      "loss": 0.0275,
      "step": 477340
    },
    {
      "epoch": 0.7812101097778913,
      "grad_norm": 2.0347421169281006,
      "learning_rate": 8.493176861290302e-06,
      "loss": 0.0337,
      "step": 477360
    },
    {
      "epoch": 0.7812428402165446,
      "grad_norm": 1.257828950881958,
      "learning_rate": 8.493110969076784e-06,
      "loss": 0.0301,
      "step": 477380
    },
    {
      "epoch": 0.7812755706551979,
      "grad_norm": 0.7426308989524841,
      "learning_rate": 8.493045076863267e-06,
      "loss": 0.0251,
      "step": 477400
    },
    {
      "epoch": 0.7813083010938513,
      "grad_norm": 2.483741283416748,
      "learning_rate": 8.492979184649751e-06,
      "loss": 0.0459,
      "step": 477420
    },
    {
      "epoch": 0.7813410315325046,
      "grad_norm": 0.6734176278114319,
      "learning_rate": 8.492913292436233e-06,
      "loss": 0.023,
      "step": 477440
    },
    {
      "epoch": 0.7813737619711579,
      "grad_norm": 1.226338505744934,
      "learning_rate": 8.492847400222717e-06,
      "loss": 0.027,
      "step": 477460
    },
    {
      "epoch": 0.7814064924098113,
      "grad_norm": 0.6639873385429382,
      "learning_rate": 8.492781508009198e-06,
      "loss": 0.0314,
      "step": 477480
    },
    {
      "epoch": 0.7814392228484646,
      "grad_norm": 0.9499150514602661,
      "learning_rate": 8.492715615795682e-06,
      "loss": 0.0328,
      "step": 477500
    },
    {
      "epoch": 0.7814719532871179,
      "grad_norm": 0.7666475772857666,
      "learning_rate": 8.492649723582166e-06,
      "loss": 0.0381,
      "step": 477520
    },
    {
      "epoch": 0.7815046837257713,
      "grad_norm": 0.4225101172924042,
      "learning_rate": 8.492583831368647e-06,
      "loss": 0.0346,
      "step": 477540
    },
    {
      "epoch": 0.7815374141644247,
      "grad_norm": 0.2716755270957947,
      "learning_rate": 8.492517939155131e-06,
      "loss": 0.0258,
      "step": 477560
    },
    {
      "epoch": 0.7815701446030779,
      "grad_norm": 1.6085798740386963,
      "learning_rate": 8.492452046941615e-06,
      "loss": 0.0352,
      "step": 477580
    },
    {
      "epoch": 0.7816028750417313,
      "grad_norm": 0.5735672116279602,
      "learning_rate": 8.492386154728097e-06,
      "loss": 0.0251,
      "step": 477600
    },
    {
      "epoch": 0.7816356054803847,
      "grad_norm": 0.5613640546798706,
      "learning_rate": 8.49232026251458e-06,
      "loss": 0.0294,
      "step": 477620
    },
    {
      "epoch": 0.781668335919038,
      "grad_norm": 2.6699328422546387,
      "learning_rate": 8.492254370301062e-06,
      "loss": 0.0277,
      "step": 477640
    },
    {
      "epoch": 0.7817010663576913,
      "grad_norm": 2.244307041168213,
      "learning_rate": 8.492188478087546e-06,
      "loss": 0.0264,
      "step": 477660
    },
    {
      "epoch": 0.7817337967963447,
      "grad_norm": 1.6946382522583008,
      "learning_rate": 8.492122585874028e-06,
      "loss": 0.0408,
      "step": 477680
    },
    {
      "epoch": 0.781766527234998,
      "grad_norm": 5.219115734100342,
      "learning_rate": 8.492056693660511e-06,
      "loss": 0.0387,
      "step": 477700
    },
    {
      "epoch": 0.7817992576736513,
      "grad_norm": 0.5305967926979065,
      "learning_rate": 8.491990801446993e-06,
      "loss": 0.0249,
      "step": 477720
    },
    {
      "epoch": 0.7818319881123047,
      "grad_norm": 1.7148253917694092,
      "learning_rate": 8.491924909233477e-06,
      "loss": 0.0286,
      "step": 477740
    },
    {
      "epoch": 0.7818647185509581,
      "grad_norm": 0.22419632971286774,
      "learning_rate": 8.49185901701996e-06,
      "loss": 0.0313,
      "step": 477760
    },
    {
      "epoch": 0.7818974489896113,
      "grad_norm": 0.35589438676834106,
      "learning_rate": 8.491793124806442e-06,
      "loss": 0.0257,
      "step": 477780
    },
    {
      "epoch": 0.7819301794282647,
      "grad_norm": 0.8060725927352905,
      "learning_rate": 8.491727232592926e-06,
      "loss": 0.0291,
      "step": 477800
    },
    {
      "epoch": 0.7819629098669181,
      "grad_norm": 0.7649304270744324,
      "learning_rate": 8.491661340379408e-06,
      "loss": 0.0269,
      "step": 477820
    },
    {
      "epoch": 0.7819956403055713,
      "grad_norm": 0.6906429529190063,
      "learning_rate": 8.491595448165891e-06,
      "loss": 0.0248,
      "step": 477840
    },
    {
      "epoch": 0.7820283707442247,
      "grad_norm": 3.1125705242156982,
      "learning_rate": 8.491529555952373e-06,
      "loss": 0.022,
      "step": 477860
    },
    {
      "epoch": 0.7820611011828781,
      "grad_norm": 0.3427909314632416,
      "learning_rate": 8.491463663738857e-06,
      "loss": 0.0307,
      "step": 477880
    },
    {
      "epoch": 0.7820938316215313,
      "grad_norm": 0.8338423371315002,
      "learning_rate": 8.491397771525339e-06,
      "loss": 0.0362,
      "step": 477900
    },
    {
      "epoch": 0.7821265620601847,
      "grad_norm": 2.3176801204681396,
      "learning_rate": 8.491331879311822e-06,
      "loss": 0.0302,
      "step": 477920
    },
    {
      "epoch": 0.7821592924988381,
      "grad_norm": 1.3839013576507568,
      "learning_rate": 8.491265987098306e-06,
      "loss": 0.037,
      "step": 477940
    },
    {
      "epoch": 0.7821920229374915,
      "grad_norm": 0.6586779356002808,
      "learning_rate": 8.491200094884788e-06,
      "loss": 0.0395,
      "step": 477960
    },
    {
      "epoch": 0.7822247533761447,
      "grad_norm": 1.3695992231369019,
      "learning_rate": 8.491134202671271e-06,
      "loss": 0.0336,
      "step": 477980
    },
    {
      "epoch": 0.7822574838147981,
      "grad_norm": 1.501315712928772,
      "learning_rate": 8.491068310457755e-06,
      "loss": 0.0342,
      "step": 478000
    },
    {
      "epoch": 0.7822902142534515,
      "grad_norm": 1.338080644607544,
      "learning_rate": 8.491002418244237e-06,
      "loss": 0.0273,
      "step": 478020
    },
    {
      "epoch": 0.7823229446921047,
      "grad_norm": 0.9874044060707092,
      "learning_rate": 8.49093652603072e-06,
      "loss": 0.0371,
      "step": 478040
    },
    {
      "epoch": 0.7823556751307581,
      "grad_norm": 1.7527191638946533,
      "learning_rate": 8.490870633817202e-06,
      "loss": 0.0301,
      "step": 478060
    },
    {
      "epoch": 0.7823884055694115,
      "grad_norm": 0.5575011968612671,
      "learning_rate": 8.490804741603686e-06,
      "loss": 0.0241,
      "step": 478080
    },
    {
      "epoch": 0.7824211360080647,
      "grad_norm": 1.7207579612731934,
      "learning_rate": 8.49073884939017e-06,
      "loss": 0.0329,
      "step": 478100
    },
    {
      "epoch": 0.7824538664467181,
      "grad_norm": 0.3380054235458374,
      "learning_rate": 8.490672957176651e-06,
      "loss": 0.0255,
      "step": 478120
    },
    {
      "epoch": 0.7824865968853715,
      "grad_norm": 1.096123218536377,
      "learning_rate": 8.490607064963135e-06,
      "loss": 0.0319,
      "step": 478140
    },
    {
      "epoch": 0.7825193273240248,
      "grad_norm": 1.2434930801391602,
      "learning_rate": 8.490541172749617e-06,
      "loss": 0.0327,
      "step": 478160
    },
    {
      "epoch": 0.7825520577626781,
      "grad_norm": 0.3413357734680176,
      "learning_rate": 8.4904752805361e-06,
      "loss": 0.0256,
      "step": 478180
    },
    {
      "epoch": 0.7825847882013315,
      "grad_norm": 2.0441086292266846,
      "learning_rate": 8.490409388322582e-06,
      "loss": 0.0245,
      "step": 478200
    },
    {
      "epoch": 0.7826175186399849,
      "grad_norm": 1.0236071348190308,
      "learning_rate": 8.490343496109066e-06,
      "loss": 0.0324,
      "step": 478220
    },
    {
      "epoch": 0.7826502490786381,
      "grad_norm": 1.167776346206665,
      "learning_rate": 8.490277603895548e-06,
      "loss": 0.0351,
      "step": 478240
    },
    {
      "epoch": 0.7826829795172915,
      "grad_norm": 3.213958740234375,
      "learning_rate": 8.490211711682031e-06,
      "loss": 0.0351,
      "step": 478260
    },
    {
      "epoch": 0.7827157099559449,
      "grad_norm": 1.1386125087738037,
      "learning_rate": 8.490145819468513e-06,
      "loss": 0.0422,
      "step": 478280
    },
    {
      "epoch": 0.7827484403945981,
      "grad_norm": 0.7429571747779846,
      "learning_rate": 8.490079927254997e-06,
      "loss": 0.0477,
      "step": 478300
    },
    {
      "epoch": 0.7827811708332515,
      "grad_norm": 0.29689931869506836,
      "learning_rate": 8.49001403504148e-06,
      "loss": 0.0283,
      "step": 478320
    },
    {
      "epoch": 0.7828139012719049,
      "grad_norm": 2.273702621459961,
      "learning_rate": 8.489948142827962e-06,
      "loss": 0.0176,
      "step": 478340
    },
    {
      "epoch": 0.7828466317105582,
      "grad_norm": 1.8599574565887451,
      "learning_rate": 8.489882250614446e-06,
      "loss": 0.0267,
      "step": 478360
    },
    {
      "epoch": 0.7828793621492115,
      "grad_norm": 0.7760817408561707,
      "learning_rate": 8.48981635840093e-06,
      "loss": 0.0346,
      "step": 478380
    },
    {
      "epoch": 0.7829120925878649,
      "grad_norm": 0.4873083233833313,
      "learning_rate": 8.489750466187411e-06,
      "loss": 0.0307,
      "step": 478400
    },
    {
      "epoch": 0.7829448230265182,
      "grad_norm": 1.5906929969787598,
      "learning_rate": 8.489684573973895e-06,
      "loss": 0.0337,
      "step": 478420
    },
    {
      "epoch": 0.7829775534651715,
      "grad_norm": 0.7132092118263245,
      "learning_rate": 8.489618681760378e-06,
      "loss": 0.0258,
      "step": 478440
    },
    {
      "epoch": 0.7830102839038249,
      "grad_norm": 4.3906331062316895,
      "learning_rate": 8.48955278954686e-06,
      "loss": 0.0345,
      "step": 478460
    },
    {
      "epoch": 0.7830430143424782,
      "grad_norm": 0.38845327496528625,
      "learning_rate": 8.489486897333344e-06,
      "loss": 0.0316,
      "step": 478480
    },
    {
      "epoch": 0.7830757447811315,
      "grad_norm": 1.6590522527694702,
      "learning_rate": 8.489421005119826e-06,
      "loss": 0.025,
      "step": 478500
    },
    {
      "epoch": 0.7831084752197849,
      "grad_norm": 0.26849475502967834,
      "learning_rate": 8.48935511290631e-06,
      "loss": 0.0263,
      "step": 478520
    },
    {
      "epoch": 0.7831412056584383,
      "grad_norm": 0.711097002029419,
      "learning_rate": 8.489289220692791e-06,
      "loss": 0.0324,
      "step": 478540
    },
    {
      "epoch": 0.7831739360970916,
      "grad_norm": 0.3600200116634369,
      "learning_rate": 8.489223328479275e-06,
      "loss": 0.0311,
      "step": 478560
    },
    {
      "epoch": 0.7832066665357449,
      "grad_norm": 1.1256465911865234,
      "learning_rate": 8.489157436265757e-06,
      "loss": 0.0432,
      "step": 478580
    },
    {
      "epoch": 0.7832393969743983,
      "grad_norm": 2.8450677394866943,
      "learning_rate": 8.48909154405224e-06,
      "loss": 0.0358,
      "step": 478600
    },
    {
      "epoch": 0.7832721274130516,
      "grad_norm": 0.5719797015190125,
      "learning_rate": 8.489025651838722e-06,
      "loss": 0.0285,
      "step": 478620
    },
    {
      "epoch": 0.7833048578517049,
      "grad_norm": 2.6802358627319336,
      "learning_rate": 8.488959759625206e-06,
      "loss": 0.0372,
      "step": 478640
    },
    {
      "epoch": 0.7833375882903583,
      "grad_norm": 1.4638657569885254,
      "learning_rate": 8.488893867411688e-06,
      "loss": 0.028,
      "step": 478660
    },
    {
      "epoch": 0.7833703187290116,
      "grad_norm": 0.4387844204902649,
      "learning_rate": 8.488827975198171e-06,
      "loss": 0.0293,
      "step": 478680
    },
    {
      "epoch": 0.7834030491676649,
      "grad_norm": 0.43897250294685364,
      "learning_rate": 8.488762082984653e-06,
      "loss": 0.0445,
      "step": 478700
    },
    {
      "epoch": 0.7834357796063183,
      "grad_norm": 1.271087408065796,
      "learning_rate": 8.488696190771137e-06,
      "loss": 0.0331,
      "step": 478720
    },
    {
      "epoch": 0.7834685100449716,
      "grad_norm": 0.8862602710723877,
      "learning_rate": 8.48863029855762e-06,
      "loss": 0.0308,
      "step": 478740
    },
    {
      "epoch": 0.783501240483625,
      "grad_norm": 0.3833276927471161,
      "learning_rate": 8.488564406344102e-06,
      "loss": 0.0315,
      "step": 478760
    },
    {
      "epoch": 0.7835339709222783,
      "grad_norm": 1.0441100597381592,
      "learning_rate": 8.488498514130586e-06,
      "loss": 0.0336,
      "step": 478780
    },
    {
      "epoch": 0.7835667013609317,
      "grad_norm": 2.1359875202178955,
      "learning_rate": 8.48843262191707e-06,
      "loss": 0.0337,
      "step": 478800
    },
    {
      "epoch": 0.783599431799585,
      "grad_norm": 1.0502440929412842,
      "learning_rate": 8.488366729703551e-06,
      "loss": 0.038,
      "step": 478820
    },
    {
      "epoch": 0.7836321622382383,
      "grad_norm": 6.026014804840088,
      "learning_rate": 8.488300837490035e-06,
      "loss": 0.0281,
      "step": 478840
    },
    {
      "epoch": 0.7836648926768917,
      "grad_norm": 0.8225259780883789,
      "learning_rate": 8.488234945276518e-06,
      "loss": 0.0368,
      "step": 478860
    },
    {
      "epoch": 0.783697623115545,
      "grad_norm": 0.11400753259658813,
      "learning_rate": 8.488169053063e-06,
      "loss": 0.0335,
      "step": 478880
    },
    {
      "epoch": 0.7837303535541983,
      "grad_norm": 0.4543710947036743,
      "learning_rate": 8.488103160849484e-06,
      "loss": 0.0327,
      "step": 478900
    },
    {
      "epoch": 0.7837630839928517,
      "grad_norm": 0.9815514087677002,
      "learning_rate": 8.488037268635966e-06,
      "loss": 0.0322,
      "step": 478920
    },
    {
      "epoch": 0.783795814431505,
      "grad_norm": 1.0881567001342773,
      "learning_rate": 8.48797137642245e-06,
      "loss": 0.028,
      "step": 478940
    },
    {
      "epoch": 0.7838285448701583,
      "grad_norm": 0.650714099407196,
      "learning_rate": 8.487905484208931e-06,
      "loss": 0.0277,
      "step": 478960
    },
    {
      "epoch": 0.7838612753088117,
      "grad_norm": 1.3424078226089478,
      "learning_rate": 8.487839591995415e-06,
      "loss": 0.0401,
      "step": 478980
    },
    {
      "epoch": 0.783894005747465,
      "grad_norm": 2.7902331352233887,
      "learning_rate": 8.487773699781897e-06,
      "loss": 0.0337,
      "step": 479000
    },
    {
      "epoch": 0.7839267361861184,
      "grad_norm": 0.7925520539283752,
      "learning_rate": 8.48770780756838e-06,
      "loss": 0.0364,
      "step": 479020
    },
    {
      "epoch": 0.7839594666247717,
      "grad_norm": 1.3074101209640503,
      "learning_rate": 8.487641915354862e-06,
      "loss": 0.0219,
      "step": 479040
    },
    {
      "epoch": 0.783992197063425,
      "grad_norm": 0.7501703500747681,
      "learning_rate": 8.487576023141346e-06,
      "loss": 0.0302,
      "step": 479060
    },
    {
      "epoch": 0.7840249275020784,
      "grad_norm": 1.4238955974578857,
      "learning_rate": 8.487510130927828e-06,
      "loss": 0.025,
      "step": 479080
    },
    {
      "epoch": 0.7840576579407317,
      "grad_norm": 1.6633955240249634,
      "learning_rate": 8.487444238714311e-06,
      "loss": 0.0266,
      "step": 479100
    },
    {
      "epoch": 0.784090388379385,
      "grad_norm": 0.7331687211990356,
      "learning_rate": 8.487378346500795e-06,
      "loss": 0.025,
      "step": 479120
    },
    {
      "epoch": 0.7841231188180384,
      "grad_norm": 1.4731882810592651,
      "learning_rate": 8.487312454287277e-06,
      "loss": 0.0451,
      "step": 479140
    },
    {
      "epoch": 0.7841558492566917,
      "grad_norm": 0.593437910079956,
      "learning_rate": 8.48724656207376e-06,
      "loss": 0.0278,
      "step": 479160
    },
    {
      "epoch": 0.7841885796953451,
      "grad_norm": 0.8953033685684204,
      "learning_rate": 8.487180669860244e-06,
      "loss": 0.0232,
      "step": 479180
    },
    {
      "epoch": 0.7842213101339984,
      "grad_norm": 1.5024089813232422,
      "learning_rate": 8.487114777646726e-06,
      "loss": 0.0285,
      "step": 479200
    },
    {
      "epoch": 0.7842540405726518,
      "grad_norm": 0.48702529072761536,
      "learning_rate": 8.48704888543321e-06,
      "loss": 0.0281,
      "step": 479220
    },
    {
      "epoch": 0.7842867710113051,
      "grad_norm": 1.1457974910736084,
      "learning_rate": 8.486982993219693e-06,
      "loss": 0.0399,
      "step": 479240
    },
    {
      "epoch": 0.7843195014499584,
      "grad_norm": 0.5653398036956787,
      "learning_rate": 8.486917101006175e-06,
      "loss": 0.0323,
      "step": 479260
    },
    {
      "epoch": 0.7843522318886118,
      "grad_norm": 1.447047472000122,
      "learning_rate": 8.486851208792659e-06,
      "loss": 0.035,
      "step": 479280
    },
    {
      "epoch": 0.7843849623272651,
      "grad_norm": 0.7578893303871155,
      "learning_rate": 8.48678531657914e-06,
      "loss": 0.023,
      "step": 479300
    },
    {
      "epoch": 0.7844176927659184,
      "grad_norm": 1.355916142463684,
      "learning_rate": 8.486719424365624e-06,
      "loss": 0.0346,
      "step": 479320
    },
    {
      "epoch": 0.7844504232045718,
      "grad_norm": 0.5707909464836121,
      "learning_rate": 8.486653532152106e-06,
      "loss": 0.0294,
      "step": 479340
    },
    {
      "epoch": 0.7844831536432251,
      "grad_norm": 0.9564122557640076,
      "learning_rate": 8.48658763993859e-06,
      "loss": 0.0289,
      "step": 479360
    },
    {
      "epoch": 0.7845158840818784,
      "grad_norm": 0.26759883761405945,
      "learning_rate": 8.486521747725071e-06,
      "loss": 0.0264,
      "step": 479380
    },
    {
      "epoch": 0.7845486145205318,
      "grad_norm": 0.7276039123535156,
      "learning_rate": 8.486455855511555e-06,
      "loss": 0.0314,
      "step": 479400
    },
    {
      "epoch": 0.7845813449591852,
      "grad_norm": 1.4732394218444824,
      "learning_rate": 8.486389963298037e-06,
      "loss": 0.0308,
      "step": 479420
    },
    {
      "epoch": 0.7846140753978385,
      "grad_norm": 1.2139571905136108,
      "learning_rate": 8.48632407108452e-06,
      "loss": 0.0238,
      "step": 479440
    },
    {
      "epoch": 0.7846468058364918,
      "grad_norm": 2.4344818592071533,
      "learning_rate": 8.486258178871002e-06,
      "loss": 0.0227,
      "step": 479460
    },
    {
      "epoch": 0.7846795362751452,
      "grad_norm": 0.4690864682197571,
      "learning_rate": 8.486192286657486e-06,
      "loss": 0.0312,
      "step": 479480
    },
    {
      "epoch": 0.7847122667137985,
      "grad_norm": 4.427663326263428,
      "learning_rate": 8.48612639444397e-06,
      "loss": 0.0426,
      "step": 479500
    },
    {
      "epoch": 0.7847449971524518,
      "grad_norm": 1.0332106351852417,
      "learning_rate": 8.486060502230451e-06,
      "loss": 0.0464,
      "step": 479520
    },
    {
      "epoch": 0.7847777275911052,
      "grad_norm": 1.4363269805908203,
      "learning_rate": 8.485994610016935e-06,
      "loss": 0.0265,
      "step": 479540
    },
    {
      "epoch": 0.7848104580297585,
      "grad_norm": 0.3128308951854706,
      "learning_rate": 8.485928717803417e-06,
      "loss": 0.025,
      "step": 479560
    },
    {
      "epoch": 0.7848431884684118,
      "grad_norm": 0.6188441514968872,
      "learning_rate": 8.4858628255899e-06,
      "loss": 0.0208,
      "step": 479580
    },
    {
      "epoch": 0.7848759189070652,
      "grad_norm": 1.152863621711731,
      "learning_rate": 8.485796933376384e-06,
      "loss": 0.0363,
      "step": 479600
    },
    {
      "epoch": 0.7849086493457186,
      "grad_norm": 1.4025388956069946,
      "learning_rate": 8.485731041162868e-06,
      "loss": 0.0322,
      "step": 479620
    },
    {
      "epoch": 0.7849413797843718,
      "grad_norm": 1.0802652835845947,
      "learning_rate": 8.48566514894935e-06,
      "loss": 0.0323,
      "step": 479640
    },
    {
      "epoch": 0.7849741102230252,
      "grad_norm": 0.07438384741544724,
      "learning_rate": 8.485599256735833e-06,
      "loss": 0.0341,
      "step": 479660
    },
    {
      "epoch": 0.7850068406616786,
      "grad_norm": 0.35459211468696594,
      "learning_rate": 8.485533364522315e-06,
      "loss": 0.0281,
      "step": 479680
    },
    {
      "epoch": 0.7850395711003318,
      "grad_norm": 0.1764139086008072,
      "learning_rate": 8.485467472308799e-06,
      "loss": 0.0343,
      "step": 479700
    },
    {
      "epoch": 0.7850723015389852,
      "grad_norm": 2.2478039264678955,
      "learning_rate": 8.48540158009528e-06,
      "loss": 0.0325,
      "step": 479720
    },
    {
      "epoch": 0.7851050319776386,
      "grad_norm": 0.33233320713043213,
      "learning_rate": 8.485335687881764e-06,
      "loss": 0.0282,
      "step": 479740
    },
    {
      "epoch": 0.7851377624162919,
      "grad_norm": 0.8854263424873352,
      "learning_rate": 8.485269795668246e-06,
      "loss": 0.0211,
      "step": 479760
    },
    {
      "epoch": 0.7851704928549452,
      "grad_norm": 1.6798713207244873,
      "learning_rate": 8.48520390345473e-06,
      "loss": 0.0434,
      "step": 479780
    },
    {
      "epoch": 0.7852032232935986,
      "grad_norm": 0.840846598148346,
      "learning_rate": 8.485138011241211e-06,
      "loss": 0.029,
      "step": 479800
    },
    {
      "epoch": 0.785235953732252,
      "grad_norm": 1.8653979301452637,
      "learning_rate": 8.485072119027695e-06,
      "loss": 0.0386,
      "step": 479820
    },
    {
      "epoch": 0.7852686841709052,
      "grad_norm": 0.3336200714111328,
      "learning_rate": 8.485006226814177e-06,
      "loss": 0.0249,
      "step": 479840
    },
    {
      "epoch": 0.7853014146095586,
      "grad_norm": 0.3019874095916748,
      "learning_rate": 8.48494033460066e-06,
      "loss": 0.0348,
      "step": 479860
    },
    {
      "epoch": 0.785334145048212,
      "grad_norm": 2.401737689971924,
      "learning_rate": 8.484874442387144e-06,
      "loss": 0.0287,
      "step": 479880
    },
    {
      "epoch": 0.7853668754868652,
      "grad_norm": 0.6175702214241028,
      "learning_rate": 8.484808550173626e-06,
      "loss": 0.0322,
      "step": 479900
    },
    {
      "epoch": 0.7853996059255186,
      "grad_norm": 17.313751220703125,
      "learning_rate": 8.48474265796011e-06,
      "loss": 0.0212,
      "step": 479920
    },
    {
      "epoch": 0.785432336364172,
      "grad_norm": 0.4460372030735016,
      "learning_rate": 8.484676765746592e-06,
      "loss": 0.0247,
      "step": 479940
    },
    {
      "epoch": 0.7854650668028252,
      "grad_norm": 1.9575458765029907,
      "learning_rate": 8.484610873533075e-06,
      "loss": 0.0409,
      "step": 479960
    },
    {
      "epoch": 0.7854977972414786,
      "grad_norm": 1.3408585786819458,
      "learning_rate": 8.484544981319559e-06,
      "loss": 0.0222,
      "step": 479980
    },
    {
      "epoch": 0.785530527680132,
      "grad_norm": 0.25084784626960754,
      "learning_rate": 8.48447908910604e-06,
      "loss": 0.0333,
      "step": 480000
    },
    {
      "epoch": 0.7855632581187854,
      "grad_norm": 3.2111518383026123,
      "learning_rate": 8.484413196892524e-06,
      "loss": 0.0234,
      "step": 480020
    },
    {
      "epoch": 0.7855959885574386,
      "grad_norm": 1.5084362030029297,
      "learning_rate": 8.484347304679008e-06,
      "loss": 0.028,
      "step": 480040
    },
    {
      "epoch": 0.785628718996092,
      "grad_norm": 0.20669932663440704,
      "learning_rate": 8.48428141246549e-06,
      "loss": 0.0218,
      "step": 480060
    },
    {
      "epoch": 0.7856614494347454,
      "grad_norm": 0.9928686022758484,
      "learning_rate": 8.484215520251973e-06,
      "loss": 0.0269,
      "step": 480080
    },
    {
      "epoch": 0.7856941798733986,
      "grad_norm": 0.28757843375205994,
      "learning_rate": 8.484149628038455e-06,
      "loss": 0.035,
      "step": 480100
    },
    {
      "epoch": 0.785726910312052,
      "grad_norm": 0.2977558374404907,
      "learning_rate": 8.484083735824939e-06,
      "loss": 0.0317,
      "step": 480120
    },
    {
      "epoch": 0.7857596407507054,
      "grad_norm": 0.32156965136528015,
      "learning_rate": 8.48401784361142e-06,
      "loss": 0.0262,
      "step": 480140
    },
    {
      "epoch": 0.7857923711893586,
      "grad_norm": 7.438640594482422,
      "learning_rate": 8.483951951397904e-06,
      "loss": 0.0458,
      "step": 480160
    },
    {
      "epoch": 0.785825101628012,
      "grad_norm": 1.0270363092422485,
      "learning_rate": 8.483886059184386e-06,
      "loss": 0.0408,
      "step": 480180
    },
    {
      "epoch": 0.7858578320666654,
      "grad_norm": 0.573584794998169,
      "learning_rate": 8.48382016697087e-06,
      "loss": 0.0323,
      "step": 480200
    },
    {
      "epoch": 0.7858905625053187,
      "grad_norm": 0.3121260106563568,
      "learning_rate": 8.483754274757353e-06,
      "loss": 0.03,
      "step": 480220
    },
    {
      "epoch": 0.785923292943972,
      "grad_norm": 1.59579598903656,
      "learning_rate": 8.483688382543835e-06,
      "loss": 0.0241,
      "step": 480240
    },
    {
      "epoch": 0.7859560233826254,
      "grad_norm": 2.871354103088379,
      "learning_rate": 8.483622490330319e-06,
      "loss": 0.0312,
      "step": 480260
    },
    {
      "epoch": 0.7859887538212788,
      "grad_norm": 0.7144429087638855,
      "learning_rate": 8.4835565981168e-06,
      "loss": 0.0348,
      "step": 480280
    },
    {
      "epoch": 0.786021484259932,
      "grad_norm": 0.7033624053001404,
      "learning_rate": 8.483490705903284e-06,
      "loss": 0.0339,
      "step": 480300
    },
    {
      "epoch": 0.7860542146985854,
      "grad_norm": 0.6312768459320068,
      "learning_rate": 8.483424813689766e-06,
      "loss": 0.0288,
      "step": 480320
    },
    {
      "epoch": 0.7860869451372388,
      "grad_norm": 0.39147135615348816,
      "learning_rate": 8.48335892147625e-06,
      "loss": 0.0336,
      "step": 480340
    },
    {
      "epoch": 0.786119675575892,
      "grad_norm": 1.7024441957473755,
      "learning_rate": 8.483293029262733e-06,
      "loss": 0.0266,
      "step": 480360
    },
    {
      "epoch": 0.7861524060145454,
      "grad_norm": 0.5348702669143677,
      "learning_rate": 8.483227137049215e-06,
      "loss": 0.0349,
      "step": 480380
    },
    {
      "epoch": 0.7861851364531988,
      "grad_norm": 3.0033035278320312,
      "learning_rate": 8.483161244835699e-06,
      "loss": 0.03,
      "step": 480400
    },
    {
      "epoch": 0.7862178668918521,
      "grad_norm": 1.843409538269043,
      "learning_rate": 8.483095352622182e-06,
      "loss": 0.0316,
      "step": 480420
    },
    {
      "epoch": 0.7862505973305054,
      "grad_norm": 0.3494934141635895,
      "learning_rate": 8.483029460408664e-06,
      "loss": 0.031,
      "step": 480440
    },
    {
      "epoch": 0.7862833277691588,
      "grad_norm": 1.4110591411590576,
      "learning_rate": 8.482963568195148e-06,
      "loss": 0.029,
      "step": 480460
    },
    {
      "epoch": 0.7863160582078121,
      "grad_norm": 0.33515995740890503,
      "learning_rate": 8.48289767598163e-06,
      "loss": 0.0363,
      "step": 480480
    },
    {
      "epoch": 0.7863487886464654,
      "grad_norm": 0.6292723417282104,
      "learning_rate": 8.482831783768113e-06,
      "loss": 0.0328,
      "step": 480500
    },
    {
      "epoch": 0.7863815190851188,
      "grad_norm": 1.0004736185073853,
      "learning_rate": 8.482765891554595e-06,
      "loss": 0.0322,
      "step": 480520
    },
    {
      "epoch": 0.7864142495237721,
      "grad_norm": 1.0067503452301025,
      "learning_rate": 8.482699999341079e-06,
      "loss": 0.0221,
      "step": 480540
    },
    {
      "epoch": 0.7864469799624254,
      "grad_norm": 1.1461200714111328,
      "learning_rate": 8.482634107127562e-06,
      "loss": 0.0326,
      "step": 480560
    },
    {
      "epoch": 0.7864797104010788,
      "grad_norm": 1.001573920249939,
      "learning_rate": 8.482568214914044e-06,
      "loss": 0.0304,
      "step": 480580
    },
    {
      "epoch": 0.7865124408397322,
      "grad_norm": 0.24268263578414917,
      "learning_rate": 8.482502322700528e-06,
      "loss": 0.0299,
      "step": 480600
    },
    {
      "epoch": 0.7865451712783855,
      "grad_norm": 0.7416300177574158,
      "learning_rate": 8.48243643048701e-06,
      "loss": 0.024,
      "step": 480620
    },
    {
      "epoch": 0.7865779017170388,
      "grad_norm": 1.1914570331573486,
      "learning_rate": 8.482370538273493e-06,
      "loss": 0.0299,
      "step": 480640
    },
    {
      "epoch": 0.7866106321556922,
      "grad_norm": 1.3094017505645752,
      "learning_rate": 8.482304646059975e-06,
      "loss": 0.0396,
      "step": 480660
    },
    {
      "epoch": 0.7866433625943455,
      "grad_norm": 1.2126356363296509,
      "learning_rate": 8.482238753846459e-06,
      "loss": 0.0319,
      "step": 480680
    },
    {
      "epoch": 0.7866760930329988,
      "grad_norm": 2.97530198097229,
      "learning_rate": 8.48217286163294e-06,
      "loss": 0.0258,
      "step": 480700
    },
    {
      "epoch": 0.7867088234716522,
      "grad_norm": 1.467819333076477,
      "learning_rate": 8.482106969419424e-06,
      "loss": 0.0406,
      "step": 480720
    },
    {
      "epoch": 0.7867415539103055,
      "grad_norm": 0.32091715931892395,
      "learning_rate": 8.482041077205906e-06,
      "loss": 0.037,
      "step": 480740
    },
    {
      "epoch": 0.7867742843489588,
      "grad_norm": 1.3359030485153198,
      "learning_rate": 8.48197518499239e-06,
      "loss": 0.0319,
      "step": 480760
    },
    {
      "epoch": 0.7868070147876122,
      "grad_norm": 1.6698088645935059,
      "learning_rate": 8.481909292778873e-06,
      "loss": 0.036,
      "step": 480780
    },
    {
      "epoch": 0.7868397452262655,
      "grad_norm": 1.3676807880401611,
      "learning_rate": 8.481843400565355e-06,
      "loss": 0.039,
      "step": 480800
    },
    {
      "epoch": 0.7868724756649189,
      "grad_norm": 0.713185727596283,
      "learning_rate": 8.481777508351839e-06,
      "loss": 0.0327,
      "step": 480820
    },
    {
      "epoch": 0.7869052061035722,
      "grad_norm": 0.97812819480896,
      "learning_rate": 8.481711616138322e-06,
      "loss": 0.0281,
      "step": 480840
    },
    {
      "epoch": 0.7869379365422255,
      "grad_norm": 0.6627920269966125,
      "learning_rate": 8.481645723924804e-06,
      "loss": 0.0325,
      "step": 480860
    },
    {
      "epoch": 0.7869706669808789,
      "grad_norm": 0.6991633772850037,
      "learning_rate": 8.481579831711288e-06,
      "loss": 0.0305,
      "step": 480880
    },
    {
      "epoch": 0.7870033974195322,
      "grad_norm": 0.9106063842773438,
      "learning_rate": 8.48151393949777e-06,
      "loss": 0.029,
      "step": 480900
    },
    {
      "epoch": 0.7870361278581856,
      "grad_norm": 0.4631214737892151,
      "learning_rate": 8.481448047284253e-06,
      "loss": 0.0251,
      "step": 480920
    },
    {
      "epoch": 0.7870688582968389,
      "grad_norm": 0.7042756080627441,
      "learning_rate": 8.481382155070737e-06,
      "loss": 0.036,
      "step": 480940
    },
    {
      "epoch": 0.7871015887354922,
      "grad_norm": 1.4869996309280396,
      "learning_rate": 8.481316262857219e-06,
      "loss": 0.0256,
      "step": 480960
    },
    {
      "epoch": 0.7871343191741456,
      "grad_norm": 0.5813302397727966,
      "learning_rate": 8.481250370643702e-06,
      "loss": 0.0207,
      "step": 480980
    },
    {
      "epoch": 0.7871670496127989,
      "grad_norm": 0.7295902967453003,
      "learning_rate": 8.481184478430184e-06,
      "loss": 0.0353,
      "step": 481000
    },
    {
      "epoch": 0.7871997800514523,
      "grad_norm": 0.17663633823394775,
      "learning_rate": 8.481118586216668e-06,
      "loss": 0.026,
      "step": 481020
    },
    {
      "epoch": 0.7872325104901056,
      "grad_norm": 1.175176978111267,
      "learning_rate": 8.48105269400315e-06,
      "loss": 0.0305,
      "step": 481040
    },
    {
      "epoch": 0.7872652409287589,
      "grad_norm": 3.989250659942627,
      "learning_rate": 8.480986801789633e-06,
      "loss": 0.0403,
      "step": 481060
    },
    {
      "epoch": 0.7872979713674123,
      "grad_norm": 0.6048265099525452,
      "learning_rate": 8.480920909576115e-06,
      "loss": 0.0313,
      "step": 481080
    },
    {
      "epoch": 0.7873307018060656,
      "grad_norm": 1.3727562427520752,
      "learning_rate": 8.480855017362599e-06,
      "loss": 0.0329,
      "step": 481100
    },
    {
      "epoch": 0.7873634322447189,
      "grad_norm": 1.0842206478118896,
      "learning_rate": 8.48078912514908e-06,
      "loss": 0.0312,
      "step": 481120
    },
    {
      "epoch": 0.7873961626833723,
      "grad_norm": 1.1185654401779175,
      "learning_rate": 8.480723232935564e-06,
      "loss": 0.0359,
      "step": 481140
    },
    {
      "epoch": 0.7874288931220256,
      "grad_norm": 1.4524081945419312,
      "learning_rate": 8.480657340722048e-06,
      "loss": 0.0211,
      "step": 481160
    },
    {
      "epoch": 0.787461623560679,
      "grad_norm": 0.7806540727615356,
      "learning_rate": 8.48059144850853e-06,
      "loss": 0.0342,
      "step": 481180
    },
    {
      "epoch": 0.7874943539993323,
      "grad_norm": 0.23454993963241577,
      "learning_rate": 8.480525556295013e-06,
      "loss": 0.025,
      "step": 481200
    },
    {
      "epoch": 0.7875270844379857,
      "grad_norm": 19.52658462524414,
      "learning_rate": 8.480459664081497e-06,
      "loss": 0.0363,
      "step": 481220
    },
    {
      "epoch": 0.787559814876639,
      "grad_norm": 0.6489378213882446,
      "learning_rate": 8.480393771867979e-06,
      "loss": 0.0312,
      "step": 481240
    },
    {
      "epoch": 0.7875925453152923,
      "grad_norm": 1.1154974699020386,
      "learning_rate": 8.480327879654462e-06,
      "loss": 0.0219,
      "step": 481260
    },
    {
      "epoch": 0.7876252757539457,
      "grad_norm": 4.328068733215332,
      "learning_rate": 8.480261987440946e-06,
      "loss": 0.038,
      "step": 481280
    },
    {
      "epoch": 0.787658006192599,
      "grad_norm": 2.253333806991577,
      "learning_rate": 8.480196095227428e-06,
      "loss": 0.0352,
      "step": 481300
    },
    {
      "epoch": 0.7876907366312523,
      "grad_norm": 1.7597588300704956,
      "learning_rate": 8.480130203013912e-06,
      "loss": 0.0237,
      "step": 481320
    },
    {
      "epoch": 0.7877234670699057,
      "grad_norm": 1.2215487957000732,
      "learning_rate": 8.480064310800393e-06,
      "loss": 0.0202,
      "step": 481340
    },
    {
      "epoch": 0.787756197508559,
      "grad_norm": 0.5774582624435425,
      "learning_rate": 8.479998418586877e-06,
      "loss": 0.0374,
      "step": 481360
    },
    {
      "epoch": 0.7877889279472123,
      "grad_norm": 0.4614253044128418,
      "learning_rate": 8.479932526373359e-06,
      "loss": 0.032,
      "step": 481380
    },
    {
      "epoch": 0.7878216583858657,
      "grad_norm": 0.8383868932723999,
      "learning_rate": 8.479866634159843e-06,
      "loss": 0.0235,
      "step": 481400
    },
    {
      "epoch": 0.7878543888245191,
      "grad_norm": 0.4108642339706421,
      "learning_rate": 8.479800741946324e-06,
      "loss": 0.0372,
      "step": 481420
    },
    {
      "epoch": 0.7878871192631723,
      "grad_norm": 0.28426694869995117,
      "learning_rate": 8.479734849732808e-06,
      "loss": 0.0278,
      "step": 481440
    },
    {
      "epoch": 0.7879198497018257,
      "grad_norm": 1.9326584339141846,
      "learning_rate": 8.47966895751929e-06,
      "loss": 0.0283,
      "step": 481460
    },
    {
      "epoch": 0.7879525801404791,
      "grad_norm": 0.25504863262176514,
      "learning_rate": 8.479603065305773e-06,
      "loss": 0.0215,
      "step": 481480
    },
    {
      "epoch": 0.7879853105791323,
      "grad_norm": 1.6847909688949585,
      "learning_rate": 8.479537173092255e-06,
      "loss": 0.0223,
      "step": 481500
    },
    {
      "epoch": 0.7880180410177857,
      "grad_norm": 0.9227349162101746,
      "learning_rate": 8.479471280878739e-06,
      "loss": 0.035,
      "step": 481520
    },
    {
      "epoch": 0.7880507714564391,
      "grad_norm": 1.9064677953720093,
      "learning_rate": 8.479405388665221e-06,
      "loss": 0.0419,
      "step": 481540
    },
    {
      "epoch": 0.7880835018950924,
      "grad_norm": 1.6367669105529785,
      "learning_rate": 8.479339496451704e-06,
      "loss": 0.0358,
      "step": 481560
    },
    {
      "epoch": 0.7881162323337457,
      "grad_norm": 0.9870509505271912,
      "learning_rate": 8.479273604238188e-06,
      "loss": 0.028,
      "step": 481580
    },
    {
      "epoch": 0.7881489627723991,
      "grad_norm": 0.9871479272842407,
      "learning_rate": 8.47920771202467e-06,
      "loss": 0.0314,
      "step": 481600
    },
    {
      "epoch": 0.7881816932110525,
      "grad_norm": 2.1492602825164795,
      "learning_rate": 8.479141819811154e-06,
      "loss": 0.0274,
      "step": 481620
    },
    {
      "epoch": 0.7882144236497057,
      "grad_norm": 0.7820992469787598,
      "learning_rate": 8.479075927597637e-06,
      "loss": 0.0245,
      "step": 481640
    },
    {
      "epoch": 0.7882471540883591,
      "grad_norm": 1.8237004280090332,
      "learning_rate": 8.479010035384119e-06,
      "loss": 0.028,
      "step": 481660
    },
    {
      "epoch": 0.7882798845270125,
      "grad_norm": 0.9470648169517517,
      "learning_rate": 8.478944143170603e-06,
      "loss": 0.0302,
      "step": 481680
    },
    {
      "epoch": 0.7883126149656657,
      "grad_norm": 1.4979935884475708,
      "learning_rate": 8.478878250957086e-06,
      "loss": 0.0376,
      "step": 481700
    },
    {
      "epoch": 0.7883453454043191,
      "grad_norm": 1.6142734289169312,
      "learning_rate": 8.478812358743568e-06,
      "loss": 0.0377,
      "step": 481720
    },
    {
      "epoch": 0.7883780758429725,
      "grad_norm": 0.7073409557342529,
      "learning_rate": 8.478746466530052e-06,
      "loss": 0.0299,
      "step": 481740
    },
    {
      "epoch": 0.7884108062816257,
      "grad_norm": 0.23116840422153473,
      "learning_rate": 8.478680574316534e-06,
      "loss": 0.0305,
      "step": 481760
    },
    {
      "epoch": 0.7884435367202791,
      "grad_norm": 2.9209282398223877,
      "learning_rate": 8.478614682103017e-06,
      "loss": 0.0496,
      "step": 481780
    },
    {
      "epoch": 0.7884762671589325,
      "grad_norm": 1.029518485069275,
      "learning_rate": 8.478548789889499e-06,
      "loss": 0.0271,
      "step": 481800
    },
    {
      "epoch": 0.7885089975975857,
      "grad_norm": 0.7389406561851501,
      "learning_rate": 8.478482897675983e-06,
      "loss": 0.0283,
      "step": 481820
    },
    {
      "epoch": 0.7885417280362391,
      "grad_norm": 0.20275871455669403,
      "learning_rate": 8.478417005462464e-06,
      "loss": 0.0287,
      "step": 481840
    },
    {
      "epoch": 0.7885744584748925,
      "grad_norm": 0.8611778616905212,
      "learning_rate": 8.478351113248948e-06,
      "loss": 0.0306,
      "step": 481860
    },
    {
      "epoch": 0.7886071889135459,
      "grad_norm": 3.1460232734680176,
      "learning_rate": 8.47828522103543e-06,
      "loss": 0.0312,
      "step": 481880
    },
    {
      "epoch": 0.7886399193521991,
      "grad_norm": 1.185226321220398,
      "learning_rate": 8.478219328821914e-06,
      "loss": 0.027,
      "step": 481900
    },
    {
      "epoch": 0.7886726497908525,
      "grad_norm": 0.14395228028297424,
      "learning_rate": 8.478153436608395e-06,
      "loss": 0.02,
      "step": 481920
    },
    {
      "epoch": 0.7887053802295059,
      "grad_norm": 0.3439401090145111,
      "learning_rate": 8.478087544394879e-06,
      "loss": 0.0246,
      "step": 481940
    },
    {
      "epoch": 0.7887381106681591,
      "grad_norm": 1.0964086055755615,
      "learning_rate": 8.478021652181363e-06,
      "loss": 0.0287,
      "step": 481960
    },
    {
      "epoch": 0.7887708411068125,
      "grad_norm": 0.3600621819496155,
      "learning_rate": 8.477955759967845e-06,
      "loss": 0.0353,
      "step": 481980
    },
    {
      "epoch": 0.7888035715454659,
      "grad_norm": 1.8358254432678223,
      "learning_rate": 8.477889867754328e-06,
      "loss": 0.0285,
      "step": 482000
    },
    {
      "epoch": 0.7888363019841191,
      "grad_norm": 3.863454580307007,
      "learning_rate": 8.477823975540812e-06,
      "loss": 0.0342,
      "step": 482020
    },
    {
      "epoch": 0.7888690324227725,
      "grad_norm": 0.6774319410324097,
      "learning_rate": 8.477758083327294e-06,
      "loss": 0.0177,
      "step": 482040
    },
    {
      "epoch": 0.7889017628614259,
      "grad_norm": 0.8323715925216675,
      "learning_rate": 8.477692191113777e-06,
      "loss": 0.0237,
      "step": 482060
    },
    {
      "epoch": 0.7889344933000793,
      "grad_norm": 0.6345995664596558,
      "learning_rate": 8.47762629890026e-06,
      "loss": 0.0291,
      "step": 482080
    },
    {
      "epoch": 0.7889672237387325,
      "grad_norm": 1.0599300861358643,
      "learning_rate": 8.477560406686743e-06,
      "loss": 0.0357,
      "step": 482100
    },
    {
      "epoch": 0.7889999541773859,
      "grad_norm": 1.0260602235794067,
      "learning_rate": 8.477494514473226e-06,
      "loss": 0.0311,
      "step": 482120
    },
    {
      "epoch": 0.7890326846160393,
      "grad_norm": 0.30539000034332275,
      "learning_rate": 8.477428622259708e-06,
      "loss": 0.0338,
      "step": 482140
    },
    {
      "epoch": 0.7890654150546925,
      "grad_norm": 0.9021763801574707,
      "learning_rate": 8.477362730046192e-06,
      "loss": 0.0386,
      "step": 482160
    },
    {
      "epoch": 0.7890981454933459,
      "grad_norm": 0.6361474394798279,
      "learning_rate": 8.477296837832674e-06,
      "loss": 0.0357,
      "step": 482180
    },
    {
      "epoch": 0.7891308759319993,
      "grad_norm": 0.3528798520565033,
      "learning_rate": 8.477230945619157e-06,
      "loss": 0.0295,
      "step": 482200
    },
    {
      "epoch": 0.7891636063706525,
      "grad_norm": 0.9034424424171448,
      "learning_rate": 8.477165053405639e-06,
      "loss": 0.0217,
      "step": 482220
    },
    {
      "epoch": 0.7891963368093059,
      "grad_norm": 0.5430574417114258,
      "learning_rate": 8.477099161192123e-06,
      "loss": 0.025,
      "step": 482240
    },
    {
      "epoch": 0.7892290672479593,
      "grad_norm": 2.291473627090454,
      "learning_rate": 8.477033268978605e-06,
      "loss": 0.0344,
      "step": 482260
    },
    {
      "epoch": 0.7892617976866126,
      "grad_norm": 1.2087777853012085,
      "learning_rate": 8.476967376765088e-06,
      "loss": 0.0257,
      "step": 482280
    },
    {
      "epoch": 0.7892945281252659,
      "grad_norm": 0.7321369051933289,
      "learning_rate": 8.47690148455157e-06,
      "loss": 0.0293,
      "step": 482300
    },
    {
      "epoch": 0.7893272585639193,
      "grad_norm": 1.2010287046432495,
      "learning_rate": 8.476835592338054e-06,
      "loss": 0.0394,
      "step": 482320
    },
    {
      "epoch": 0.7893599890025726,
      "grad_norm": 0.9123740196228027,
      "learning_rate": 8.476769700124537e-06,
      "loss": 0.0195,
      "step": 482340
    },
    {
      "epoch": 0.7893927194412259,
      "grad_norm": 1.649413824081421,
      "learning_rate": 8.476703807911019e-06,
      "loss": 0.0193,
      "step": 482360
    },
    {
      "epoch": 0.7894254498798793,
      "grad_norm": 2.2147209644317627,
      "learning_rate": 8.476637915697503e-06,
      "loss": 0.0345,
      "step": 482380
    },
    {
      "epoch": 0.7894581803185327,
      "grad_norm": 0.5300036072731018,
      "learning_rate": 8.476572023483986e-06,
      "loss": 0.0358,
      "step": 482400
    },
    {
      "epoch": 0.7894909107571859,
      "grad_norm": 1.8950086832046509,
      "learning_rate": 8.476506131270468e-06,
      "loss": 0.0404,
      "step": 482420
    },
    {
      "epoch": 0.7895236411958393,
      "grad_norm": 1.2241497039794922,
      "learning_rate": 8.476440239056952e-06,
      "loss": 0.0408,
      "step": 482440
    },
    {
      "epoch": 0.7895563716344927,
      "grad_norm": 13.620217323303223,
      "learning_rate": 8.476374346843435e-06,
      "loss": 0.0329,
      "step": 482460
    },
    {
      "epoch": 0.789589102073146,
      "grad_norm": 1.9967806339263916,
      "learning_rate": 8.476308454629917e-06,
      "loss": 0.0372,
      "step": 482480
    },
    {
      "epoch": 0.7896218325117993,
      "grad_norm": 0.3517337143421173,
      "learning_rate": 8.4762425624164e-06,
      "loss": 0.0356,
      "step": 482500
    },
    {
      "epoch": 0.7896545629504527,
      "grad_norm": 1.1278706789016724,
      "learning_rate": 8.476176670202883e-06,
      "loss": 0.0244,
      "step": 482520
    },
    {
      "epoch": 0.789687293389106,
      "grad_norm": 1.0523673295974731,
      "learning_rate": 8.476110777989366e-06,
      "loss": 0.0353,
      "step": 482540
    },
    {
      "epoch": 0.7897200238277593,
      "grad_norm": 0.791294515132904,
      "learning_rate": 8.476044885775848e-06,
      "loss": 0.0322,
      "step": 482560
    },
    {
      "epoch": 0.7897527542664127,
      "grad_norm": 1.0660747289657593,
      "learning_rate": 8.475978993562332e-06,
      "loss": 0.0462,
      "step": 482580
    },
    {
      "epoch": 0.789785484705066,
      "grad_norm": 0.5024083852767944,
      "learning_rate": 8.475913101348814e-06,
      "loss": 0.0245,
      "step": 482600
    },
    {
      "epoch": 0.7898182151437193,
      "grad_norm": 0.8789703249931335,
      "learning_rate": 8.475847209135297e-06,
      "loss": 0.0387,
      "step": 482620
    },
    {
      "epoch": 0.7898509455823727,
      "grad_norm": 0.9897534251213074,
      "learning_rate": 8.47578131692178e-06,
      "loss": 0.0378,
      "step": 482640
    },
    {
      "epoch": 0.789883676021026,
      "grad_norm": 0.5138454437255859,
      "learning_rate": 8.475715424708263e-06,
      "loss": 0.0267,
      "step": 482660
    },
    {
      "epoch": 0.7899164064596794,
      "grad_norm": 0.8360322117805481,
      "learning_rate": 8.475649532494746e-06,
      "loss": 0.0197,
      "step": 482680
    },
    {
      "epoch": 0.7899491368983327,
      "grad_norm": 0.4702118933200836,
      "learning_rate": 8.475583640281228e-06,
      "loss": 0.036,
      "step": 482700
    },
    {
      "epoch": 0.789981867336986,
      "grad_norm": 0.5292478203773499,
      "learning_rate": 8.475517748067712e-06,
      "loss": 0.0363,
      "step": 482720
    },
    {
      "epoch": 0.7900145977756394,
      "grad_norm": 1.3853758573532104,
      "learning_rate": 8.475451855854194e-06,
      "loss": 0.0338,
      "step": 482740
    },
    {
      "epoch": 0.7900473282142927,
      "grad_norm": 0.2644982635974884,
      "learning_rate": 8.475385963640677e-06,
      "loss": 0.0315,
      "step": 482760
    },
    {
      "epoch": 0.7900800586529461,
      "grad_norm": 0.3233562707901001,
      "learning_rate": 8.47532007142716e-06,
      "loss": 0.0309,
      "step": 482780
    },
    {
      "epoch": 0.7901127890915994,
      "grad_norm": 0.47927871346473694,
      "learning_rate": 8.475254179213643e-06,
      "loss": 0.0331,
      "step": 482800
    },
    {
      "epoch": 0.7901455195302527,
      "grad_norm": 0.7390674352645874,
      "learning_rate": 8.475188287000126e-06,
      "loss": 0.0333,
      "step": 482820
    },
    {
      "epoch": 0.7901782499689061,
      "grad_norm": 0.45467591285705566,
      "learning_rate": 8.475122394786608e-06,
      "loss": 0.0222,
      "step": 482840
    },
    {
      "epoch": 0.7902109804075594,
      "grad_norm": 0.279074102640152,
      "learning_rate": 8.475056502573092e-06,
      "loss": 0.0306,
      "step": 482860
    },
    {
      "epoch": 0.7902437108462128,
      "grad_norm": 1.0342957973480225,
      "learning_rate": 8.474990610359575e-06,
      "loss": 0.0366,
      "step": 482880
    },
    {
      "epoch": 0.7902764412848661,
      "grad_norm": 1.4034528732299805,
      "learning_rate": 8.474924718146057e-06,
      "loss": 0.0269,
      "step": 482900
    },
    {
      "epoch": 0.7903091717235194,
      "grad_norm": 0.4709821343421936,
      "learning_rate": 8.474858825932541e-06,
      "loss": 0.0306,
      "step": 482920
    },
    {
      "epoch": 0.7903419021621728,
      "grad_norm": 1.1451724767684937,
      "learning_rate": 8.474792933719023e-06,
      "loss": 0.0249,
      "step": 482940
    },
    {
      "epoch": 0.7903746326008261,
      "grad_norm": 1.1263537406921387,
      "learning_rate": 8.474727041505506e-06,
      "loss": 0.0342,
      "step": 482960
    },
    {
      "epoch": 0.7904073630394794,
      "grad_norm": 0.6832547783851624,
      "learning_rate": 8.474661149291988e-06,
      "loss": 0.032,
      "step": 482980
    },
    {
      "epoch": 0.7904400934781328,
      "grad_norm": 2.0435285568237305,
      "learning_rate": 8.474595257078472e-06,
      "loss": 0.0245,
      "step": 483000
    },
    {
      "epoch": 0.7904728239167861,
      "grad_norm": 0.5009154677391052,
      "learning_rate": 8.474529364864955e-06,
      "loss": 0.0258,
      "step": 483020
    },
    {
      "epoch": 0.7905055543554395,
      "grad_norm": 1.325696349143982,
      "learning_rate": 8.474463472651437e-06,
      "loss": 0.0238,
      "step": 483040
    },
    {
      "epoch": 0.7905382847940928,
      "grad_norm": 2.6078927516937256,
      "learning_rate": 8.474397580437921e-06,
      "loss": 0.033,
      "step": 483060
    },
    {
      "epoch": 0.7905710152327462,
      "grad_norm": 1.0401684045791626,
      "learning_rate": 8.474331688224403e-06,
      "loss": 0.0294,
      "step": 483080
    },
    {
      "epoch": 0.7906037456713995,
      "grad_norm": 0.49220675230026245,
      "learning_rate": 8.474265796010886e-06,
      "loss": 0.0361,
      "step": 483100
    },
    {
      "epoch": 0.7906364761100528,
      "grad_norm": 1.123740315437317,
      "learning_rate": 8.474199903797368e-06,
      "loss": 0.03,
      "step": 483120
    },
    {
      "epoch": 0.7906692065487062,
      "grad_norm": 0.21810907125473022,
      "learning_rate": 8.474134011583852e-06,
      "loss": 0.0324,
      "step": 483140
    },
    {
      "epoch": 0.7907019369873595,
      "grad_norm": 0.9072633981704712,
      "learning_rate": 8.474068119370334e-06,
      "loss": 0.0309,
      "step": 483160
    },
    {
      "epoch": 0.7907346674260128,
      "grad_norm": 0.812824010848999,
      "learning_rate": 8.474002227156817e-06,
      "loss": 0.0332,
      "step": 483180
    },
    {
      "epoch": 0.7907673978646662,
      "grad_norm": 0.8895487785339355,
      "learning_rate": 8.473936334943301e-06,
      "loss": 0.0364,
      "step": 483200
    },
    {
      "epoch": 0.7908001283033195,
      "grad_norm": 0.7541599273681641,
      "learning_rate": 8.473870442729783e-06,
      "loss": 0.0235,
      "step": 483220
    },
    {
      "epoch": 0.7908328587419728,
      "grad_norm": 1.982180118560791,
      "learning_rate": 8.473804550516266e-06,
      "loss": 0.0454,
      "step": 483240
    },
    {
      "epoch": 0.7908655891806262,
      "grad_norm": 1.3737294673919678,
      "learning_rate": 8.47373865830275e-06,
      "loss": 0.0235,
      "step": 483260
    },
    {
      "epoch": 0.7908983196192796,
      "grad_norm": 2.1136529445648193,
      "learning_rate": 8.473672766089232e-06,
      "loss": 0.0407,
      "step": 483280
    },
    {
      "epoch": 0.7909310500579328,
      "grad_norm": 1.5423552989959717,
      "learning_rate": 8.473606873875716e-06,
      "loss": 0.0341,
      "step": 483300
    },
    {
      "epoch": 0.7909637804965862,
      "grad_norm": 0.7580139636993408,
      "learning_rate": 8.473540981662197e-06,
      "loss": 0.0283,
      "step": 483320
    },
    {
      "epoch": 0.7909965109352396,
      "grad_norm": 4.35052490234375,
      "learning_rate": 8.473475089448681e-06,
      "loss": 0.0313,
      "step": 483340
    },
    {
      "epoch": 0.7910292413738929,
      "grad_norm": 0.3316996097564697,
      "learning_rate": 8.473409197235163e-06,
      "loss": 0.0234,
      "step": 483360
    },
    {
      "epoch": 0.7910619718125462,
      "grad_norm": 0.4090825617313385,
      "learning_rate": 8.473343305021646e-06,
      "loss": 0.0266,
      "step": 483380
    },
    {
      "epoch": 0.7910947022511996,
      "grad_norm": 1.0977892875671387,
      "learning_rate": 8.47327741280813e-06,
      "loss": 0.0369,
      "step": 483400
    },
    {
      "epoch": 0.7911274326898529,
      "grad_norm": 0.2330797016620636,
      "learning_rate": 8.473211520594612e-06,
      "loss": 0.0443,
      "step": 483420
    },
    {
      "epoch": 0.7911601631285062,
      "grad_norm": 0.7664914727210999,
      "learning_rate": 8.473145628381096e-06,
      "loss": 0.0209,
      "step": 483440
    },
    {
      "epoch": 0.7911928935671596,
      "grad_norm": 1.5551416873931885,
      "learning_rate": 8.473079736167577e-06,
      "loss": 0.0346,
      "step": 483460
    },
    {
      "epoch": 0.791225624005813,
      "grad_norm": 0.6252322196960449,
      "learning_rate": 8.473013843954061e-06,
      "loss": 0.0484,
      "step": 483480
    },
    {
      "epoch": 0.7912583544444662,
      "grad_norm": 0.7290065884590149,
      "learning_rate": 8.472947951740543e-06,
      "loss": 0.0249,
      "step": 483500
    },
    {
      "epoch": 0.7912910848831196,
      "grad_norm": 2.1118485927581787,
      "learning_rate": 8.472882059527026e-06,
      "loss": 0.0371,
      "step": 483520
    },
    {
      "epoch": 0.791323815321773,
      "grad_norm": 1.0130313634872437,
      "learning_rate": 8.472816167313508e-06,
      "loss": 0.0396,
      "step": 483540
    },
    {
      "epoch": 0.7913565457604262,
      "grad_norm": 0.4362015724182129,
      "learning_rate": 8.472750275099992e-06,
      "loss": 0.0273,
      "step": 483560
    },
    {
      "epoch": 0.7913892761990796,
      "grad_norm": 0.09094072878360748,
      "learning_rate": 8.472684382886474e-06,
      "loss": 0.0254,
      "step": 483580
    },
    {
      "epoch": 0.791422006637733,
      "grad_norm": 0.6637513041496277,
      "learning_rate": 8.472618490672957e-06,
      "loss": 0.0318,
      "step": 483600
    },
    {
      "epoch": 0.7914547370763862,
      "grad_norm": 0.7783125638961792,
      "learning_rate": 8.472552598459441e-06,
      "loss": 0.0332,
      "step": 483620
    },
    {
      "epoch": 0.7914874675150396,
      "grad_norm": 0.4838891327381134,
      "learning_rate": 8.472486706245923e-06,
      "loss": 0.0317,
      "step": 483640
    },
    {
      "epoch": 0.791520197953693,
      "grad_norm": 2.0090179443359375,
      "learning_rate": 8.472420814032407e-06,
      "loss": 0.0381,
      "step": 483660
    },
    {
      "epoch": 0.7915529283923464,
      "grad_norm": 0.9061240553855896,
      "learning_rate": 8.47235492181889e-06,
      "loss": 0.0261,
      "step": 483680
    },
    {
      "epoch": 0.7915856588309996,
      "grad_norm": 1.6836529970169067,
      "learning_rate": 8.472289029605372e-06,
      "loss": 0.0326,
      "step": 483700
    },
    {
      "epoch": 0.791618389269653,
      "grad_norm": 1.136047601699829,
      "learning_rate": 8.472223137391856e-06,
      "loss": 0.0253,
      "step": 483720
    },
    {
      "epoch": 0.7916511197083064,
      "grad_norm": 1.1717230081558228,
      "learning_rate": 8.47215724517834e-06,
      "loss": 0.0366,
      "step": 483740
    },
    {
      "epoch": 0.7916838501469596,
      "grad_norm": 1.8045908212661743,
      "learning_rate": 8.472091352964821e-06,
      "loss": 0.0313,
      "step": 483760
    },
    {
      "epoch": 0.791716580585613,
      "grad_norm": 0.4609406590461731,
      "learning_rate": 8.472025460751305e-06,
      "loss": 0.0268,
      "step": 483780
    },
    {
      "epoch": 0.7917493110242664,
      "grad_norm": 0.9914432764053345,
      "learning_rate": 8.471959568537787e-06,
      "loss": 0.0375,
      "step": 483800
    },
    {
      "epoch": 0.7917820414629196,
      "grad_norm": 0.36685383319854736,
      "learning_rate": 8.47189367632427e-06,
      "loss": 0.0343,
      "step": 483820
    },
    {
      "epoch": 0.791814771901573,
      "grad_norm": 1.9503626823425293,
      "learning_rate": 8.471827784110752e-06,
      "loss": 0.0257,
      "step": 483840
    },
    {
      "epoch": 0.7918475023402264,
      "grad_norm": 0.4129420220851898,
      "learning_rate": 8.471761891897236e-06,
      "loss": 0.0282,
      "step": 483860
    },
    {
      "epoch": 0.7918802327788798,
      "grad_norm": 7.410266399383545,
      "learning_rate": 8.471695999683717e-06,
      "loss": 0.0361,
      "step": 483880
    },
    {
      "epoch": 0.791912963217533,
      "grad_norm": 0.5638819932937622,
      "learning_rate": 8.471630107470201e-06,
      "loss": 0.0293,
      "step": 483900
    },
    {
      "epoch": 0.7919456936561864,
      "grad_norm": 2.6214449405670166,
      "learning_rate": 8.471564215256683e-06,
      "loss": 0.0348,
      "step": 483920
    },
    {
      "epoch": 0.7919784240948398,
      "grad_norm": 0.23934245109558105,
      "learning_rate": 8.471498323043167e-06,
      "loss": 0.0223,
      "step": 483940
    },
    {
      "epoch": 0.792011154533493,
      "grad_norm": 0.7702768445014954,
      "learning_rate": 8.471432430829648e-06,
      "loss": 0.0291,
      "step": 483960
    },
    {
      "epoch": 0.7920438849721464,
      "grad_norm": 1.7673296928405762,
      "learning_rate": 8.471366538616132e-06,
      "loss": 0.0253,
      "step": 483980
    },
    {
      "epoch": 0.7920766154107998,
      "grad_norm": 1.5320762395858765,
      "learning_rate": 8.471300646402616e-06,
      "loss": 0.0239,
      "step": 484000
    },
    {
      "epoch": 0.792109345849453,
      "grad_norm": 0.8002318739891052,
      "learning_rate": 8.471234754189098e-06,
      "loss": 0.0283,
      "step": 484020
    },
    {
      "epoch": 0.7921420762881064,
      "grad_norm": 1.610190749168396,
      "learning_rate": 8.471168861975581e-06,
      "loss": 0.0383,
      "step": 484040
    },
    {
      "epoch": 0.7921748067267598,
      "grad_norm": 0.7409687042236328,
      "learning_rate": 8.471102969762065e-06,
      "loss": 0.0268,
      "step": 484060
    },
    {
      "epoch": 0.7922075371654131,
      "grad_norm": 0.32075536251068115,
      "learning_rate": 8.471037077548547e-06,
      "loss": 0.0327,
      "step": 484080
    },
    {
      "epoch": 0.7922402676040664,
      "grad_norm": 1.8191837072372437,
      "learning_rate": 8.47097118533503e-06,
      "loss": 0.0303,
      "step": 484100
    },
    {
      "epoch": 0.7922729980427198,
      "grad_norm": 1.0775771141052246,
      "learning_rate": 8.470905293121514e-06,
      "loss": 0.0295,
      "step": 484120
    },
    {
      "epoch": 0.7923057284813732,
      "grad_norm": 0.26357728242874146,
      "learning_rate": 8.470839400907996e-06,
      "loss": 0.0323,
      "step": 484140
    },
    {
      "epoch": 0.7923384589200264,
      "grad_norm": 0.3756362497806549,
      "learning_rate": 8.47077350869448e-06,
      "loss": 0.0229,
      "step": 484160
    },
    {
      "epoch": 0.7923711893586798,
      "grad_norm": 1.1448562145233154,
      "learning_rate": 8.470707616480961e-06,
      "loss": 0.0446,
      "step": 484180
    },
    {
      "epoch": 0.7924039197973332,
      "grad_norm": 3.038341522216797,
      "learning_rate": 8.470641724267445e-06,
      "loss": 0.0383,
      "step": 484200
    },
    {
      "epoch": 0.7924366502359864,
      "grad_norm": 1.2844276428222656,
      "learning_rate": 8.470575832053927e-06,
      "loss": 0.027,
      "step": 484220
    },
    {
      "epoch": 0.7924693806746398,
      "grad_norm": 0.5932233333587646,
      "learning_rate": 8.47050993984041e-06,
      "loss": 0.0281,
      "step": 484240
    },
    {
      "epoch": 0.7925021111132932,
      "grad_norm": 0.8487491011619568,
      "learning_rate": 8.470444047626892e-06,
      "loss": 0.0238,
      "step": 484260
    },
    {
      "epoch": 0.7925348415519465,
      "grad_norm": 2.1551051139831543,
      "learning_rate": 8.470378155413376e-06,
      "loss": 0.0213,
      "step": 484280
    },
    {
      "epoch": 0.7925675719905998,
      "grad_norm": 0.5247031450271606,
      "learning_rate": 8.470312263199858e-06,
      "loss": 0.0344,
      "step": 484300
    },
    {
      "epoch": 0.7926003024292532,
      "grad_norm": 0.3665711283683777,
      "learning_rate": 8.470246370986341e-06,
      "loss": 0.0245,
      "step": 484320
    },
    {
      "epoch": 0.7926330328679065,
      "grad_norm": 0.8121950030326843,
      "learning_rate": 8.470180478772823e-06,
      "loss": 0.0263,
      "step": 484340
    },
    {
      "epoch": 0.7926657633065598,
      "grad_norm": 0.78549724817276,
      "learning_rate": 8.470114586559307e-06,
      "loss": 0.0292,
      "step": 484360
    },
    {
      "epoch": 0.7926984937452132,
      "grad_norm": 1.2387865781784058,
      "learning_rate": 8.470048694345789e-06,
      "loss": 0.0315,
      "step": 484380
    },
    {
      "epoch": 0.7927312241838665,
      "grad_norm": 0.31441786885261536,
      "learning_rate": 8.469982802132272e-06,
      "loss": 0.0315,
      "step": 484400
    },
    {
      "epoch": 0.7927639546225198,
      "grad_norm": 1.4628572463989258,
      "learning_rate": 8.469916909918756e-06,
      "loss": 0.0283,
      "step": 484420
    },
    {
      "epoch": 0.7927966850611732,
      "grad_norm": 0.417050302028656,
      "learning_rate": 8.469851017705238e-06,
      "loss": 0.0432,
      "step": 484440
    },
    {
      "epoch": 0.7928294154998266,
      "grad_norm": 1.0882129669189453,
      "learning_rate": 8.469785125491721e-06,
      "loss": 0.0243,
      "step": 484460
    },
    {
      "epoch": 0.7928621459384798,
      "grad_norm": 0.2364175021648407,
      "learning_rate": 8.469719233278205e-06,
      "loss": 0.0276,
      "step": 484480
    },
    {
      "epoch": 0.7928948763771332,
      "grad_norm": 1.7262743711471558,
      "learning_rate": 8.469653341064687e-06,
      "loss": 0.0295,
      "step": 484500
    },
    {
      "epoch": 0.7929276068157866,
      "grad_norm": 0.4233609139919281,
      "learning_rate": 8.46958744885117e-06,
      "loss": 0.0354,
      "step": 484520
    },
    {
      "epoch": 0.7929603372544399,
      "grad_norm": 1.1512064933776855,
      "learning_rate": 8.469521556637654e-06,
      "loss": 0.0264,
      "step": 484540
    },
    {
      "epoch": 0.7929930676930932,
      "grad_norm": 0.9805934429168701,
      "learning_rate": 8.469455664424136e-06,
      "loss": 0.0189,
      "step": 484560
    },
    {
      "epoch": 0.7930257981317466,
      "grad_norm": 1.1144810914993286,
      "learning_rate": 8.46938977221062e-06,
      "loss": 0.0233,
      "step": 484580
    },
    {
      "epoch": 0.7930585285703999,
      "grad_norm": 0.9151755571365356,
      "learning_rate": 8.469323879997101e-06,
      "loss": 0.0261,
      "step": 484600
    },
    {
      "epoch": 0.7930912590090532,
      "grad_norm": 0.4917888939380646,
      "learning_rate": 8.469257987783585e-06,
      "loss": 0.0222,
      "step": 484620
    },
    {
      "epoch": 0.7931239894477066,
      "grad_norm": 0.2796992063522339,
      "learning_rate": 8.469192095570067e-06,
      "loss": 0.0302,
      "step": 484640
    },
    {
      "epoch": 0.7931567198863599,
      "grad_norm": 0.5291258692741394,
      "learning_rate": 8.46912620335655e-06,
      "loss": 0.0331,
      "step": 484660
    },
    {
      "epoch": 0.7931894503250132,
      "grad_norm": 1.0831944942474365,
      "learning_rate": 8.469060311143032e-06,
      "loss": 0.0475,
      "step": 484680
    },
    {
      "epoch": 0.7932221807636666,
      "grad_norm": 1.1378878355026245,
      "learning_rate": 8.468994418929516e-06,
      "loss": 0.0281,
      "step": 484700
    },
    {
      "epoch": 0.79325491120232,
      "grad_norm": 0.6012548208236694,
      "learning_rate": 8.468928526715998e-06,
      "loss": 0.0354,
      "step": 484720
    },
    {
      "epoch": 0.7932876416409733,
      "grad_norm": 0.6769123673439026,
      "learning_rate": 8.468862634502481e-06,
      "loss": 0.0257,
      "step": 484740
    },
    {
      "epoch": 0.7933203720796266,
      "grad_norm": 0.9636244177818298,
      "learning_rate": 8.468796742288963e-06,
      "loss": 0.0292,
      "step": 484760
    },
    {
      "epoch": 0.79335310251828,
      "grad_norm": 0.6079344749450684,
      "learning_rate": 8.468730850075447e-06,
      "loss": 0.0321,
      "step": 484780
    },
    {
      "epoch": 0.7933858329569333,
      "grad_norm": 3.5850327014923096,
      "learning_rate": 8.46866495786193e-06,
      "loss": 0.0341,
      "step": 484800
    },
    {
      "epoch": 0.7934185633955866,
      "grad_norm": 1.4957506656646729,
      "learning_rate": 8.468599065648412e-06,
      "loss": 0.0254,
      "step": 484820
    },
    {
      "epoch": 0.79345129383424,
      "grad_norm": 0.7862958312034607,
      "learning_rate": 8.468533173434896e-06,
      "loss": 0.0431,
      "step": 484840
    },
    {
      "epoch": 0.7934840242728933,
      "grad_norm": 1.1961760520935059,
      "learning_rate": 8.46846728122138e-06,
      "loss": 0.0303,
      "step": 484860
    },
    {
      "epoch": 0.7935167547115466,
      "grad_norm": 1.8085453510284424,
      "learning_rate": 8.468401389007861e-06,
      "loss": 0.0321,
      "step": 484880
    },
    {
      "epoch": 0.7935494851502,
      "grad_norm": 0.44533833861351013,
      "learning_rate": 8.468335496794345e-06,
      "loss": 0.0248,
      "step": 484900
    },
    {
      "epoch": 0.7935822155888533,
      "grad_norm": 0.43219637870788574,
      "learning_rate": 8.468269604580828e-06,
      "loss": 0.043,
      "step": 484920
    },
    {
      "epoch": 0.7936149460275067,
      "grad_norm": 0.5360535383224487,
      "learning_rate": 8.46820371236731e-06,
      "loss": 0.0415,
      "step": 484940
    },
    {
      "epoch": 0.79364767646616,
      "grad_norm": 0.9728186726570129,
      "learning_rate": 8.468137820153794e-06,
      "loss": 0.0221,
      "step": 484960
    },
    {
      "epoch": 0.7936804069048133,
      "grad_norm": 0.6757648587226868,
      "learning_rate": 8.468071927940276e-06,
      "loss": 0.0213,
      "step": 484980
    },
    {
      "epoch": 0.7937131373434667,
      "grad_norm": 3.842592477798462,
      "learning_rate": 8.46800603572676e-06,
      "loss": 0.0377,
      "step": 485000
    },
    {
      "epoch": 0.79374586778212,
      "grad_norm": 0.6209459900856018,
      "learning_rate": 8.467940143513241e-06,
      "loss": 0.0328,
      "step": 485020
    },
    {
      "epoch": 0.7937785982207733,
      "grad_norm": 0.5399914383888245,
      "learning_rate": 8.467874251299725e-06,
      "loss": 0.0238,
      "step": 485040
    },
    {
      "epoch": 0.7938113286594267,
      "grad_norm": 0.790988028049469,
      "learning_rate": 8.467808359086207e-06,
      "loss": 0.0376,
      "step": 485060
    },
    {
      "epoch": 0.79384405909808,
      "grad_norm": 1.140534520149231,
      "learning_rate": 8.46774246687269e-06,
      "loss": 0.032,
      "step": 485080
    },
    {
      "epoch": 0.7938767895367334,
      "grad_norm": 0.3519055247306824,
      "learning_rate": 8.467676574659172e-06,
      "loss": 0.0315,
      "step": 485100
    },
    {
      "epoch": 0.7939095199753867,
      "grad_norm": 0.5654239058494568,
      "learning_rate": 8.467610682445656e-06,
      "loss": 0.0403,
      "step": 485120
    },
    {
      "epoch": 0.7939422504140401,
      "grad_norm": 1.4185235500335693,
      "learning_rate": 8.46754479023214e-06,
      "loss": 0.0412,
      "step": 485140
    },
    {
      "epoch": 0.7939749808526934,
      "grad_norm": 2.752533197402954,
      "learning_rate": 8.467478898018621e-06,
      "loss": 0.0311,
      "step": 485160
    },
    {
      "epoch": 0.7940077112913467,
      "grad_norm": 0.3739442527294159,
      "learning_rate": 8.467413005805105e-06,
      "loss": 0.0232,
      "step": 485180
    },
    {
      "epoch": 0.7940404417300001,
      "grad_norm": 1.112335205078125,
      "learning_rate": 8.467347113591587e-06,
      "loss": 0.0243,
      "step": 485200
    },
    {
      "epoch": 0.7940731721686534,
      "grad_norm": 1.4366647005081177,
      "learning_rate": 8.46728122137807e-06,
      "loss": 0.0357,
      "step": 485220
    },
    {
      "epoch": 0.7941059026073067,
      "grad_norm": 1.942845344543457,
      "learning_rate": 8.467215329164554e-06,
      "loss": 0.0389,
      "step": 485240
    },
    {
      "epoch": 0.7941386330459601,
      "grad_norm": 0.9706255197525024,
      "learning_rate": 8.467149436951036e-06,
      "loss": 0.0297,
      "step": 485260
    },
    {
      "epoch": 0.7941713634846134,
      "grad_norm": 0.8322689533233643,
      "learning_rate": 8.46708354473752e-06,
      "loss": 0.0263,
      "step": 485280
    },
    {
      "epoch": 0.7942040939232667,
      "grad_norm": 2.7601356506347656,
      "learning_rate": 8.467017652524003e-06,
      "loss": 0.0405,
      "step": 485300
    },
    {
      "epoch": 0.7942368243619201,
      "grad_norm": 1.398547887802124,
      "learning_rate": 8.466951760310485e-06,
      "loss": 0.0291,
      "step": 485320
    },
    {
      "epoch": 0.7942695548005735,
      "grad_norm": 1.2457075119018555,
      "learning_rate": 8.466885868096969e-06,
      "loss": 0.0337,
      "step": 485340
    },
    {
      "epoch": 0.7943022852392267,
      "grad_norm": 1.211929202079773,
      "learning_rate": 8.46681997588345e-06,
      "loss": 0.0322,
      "step": 485360
    },
    {
      "epoch": 0.7943350156778801,
      "grad_norm": 1.2210700511932373,
      "learning_rate": 8.466754083669934e-06,
      "loss": 0.039,
      "step": 485380
    },
    {
      "epoch": 0.7943677461165335,
      "grad_norm": 0.9092110991477966,
      "learning_rate": 8.466688191456416e-06,
      "loss": 0.0258,
      "step": 485400
    },
    {
      "epoch": 0.7944004765551868,
      "grad_norm": 3.3811495304107666,
      "learning_rate": 8.4666222992429e-06,
      "loss": 0.0338,
      "step": 485420
    },
    {
      "epoch": 0.7944332069938401,
      "grad_norm": 0.38064515590667725,
      "learning_rate": 8.466556407029381e-06,
      "loss": 0.0238,
      "step": 485440
    },
    {
      "epoch": 0.7944659374324935,
      "grad_norm": 0.34510383009910583,
      "learning_rate": 8.466490514815865e-06,
      "loss": 0.0457,
      "step": 485460
    },
    {
      "epoch": 0.7944986678711468,
      "grad_norm": 0.45858994126319885,
      "learning_rate": 8.466424622602349e-06,
      "loss": 0.0364,
      "step": 485480
    },
    {
      "epoch": 0.7945313983098001,
      "grad_norm": 0.5792462825775146,
      "learning_rate": 8.46635873038883e-06,
      "loss": 0.0341,
      "step": 485500
    },
    {
      "epoch": 0.7945641287484535,
      "grad_norm": 1.0815482139587402,
      "learning_rate": 8.466292838175314e-06,
      "loss": 0.0256,
      "step": 485520
    },
    {
      "epoch": 0.7945968591871069,
      "grad_norm": 2.1765832901000977,
      "learning_rate": 8.466226945961796e-06,
      "loss": 0.0317,
      "step": 485540
    },
    {
      "epoch": 0.7946295896257601,
      "grad_norm": 0.7777527570724487,
      "learning_rate": 8.46616105374828e-06,
      "loss": 0.0353,
      "step": 485560
    },
    {
      "epoch": 0.7946623200644135,
      "grad_norm": 0.6826193928718567,
      "learning_rate": 8.466095161534761e-06,
      "loss": 0.031,
      "step": 485580
    },
    {
      "epoch": 0.7946950505030669,
      "grad_norm": 0.3981720209121704,
      "learning_rate": 8.466029269321245e-06,
      "loss": 0.0268,
      "step": 485600
    },
    {
      "epoch": 0.7947277809417201,
      "grad_norm": 0.7809028625488281,
      "learning_rate": 8.465963377107727e-06,
      "loss": 0.0244,
      "step": 485620
    },
    {
      "epoch": 0.7947605113803735,
      "grad_norm": 0.7112197875976562,
      "learning_rate": 8.46589748489421e-06,
      "loss": 0.0234,
      "step": 485640
    },
    {
      "epoch": 0.7947932418190269,
      "grad_norm": 0.9869585037231445,
      "learning_rate": 8.465831592680694e-06,
      "loss": 0.0339,
      "step": 485660
    },
    {
      "epoch": 0.7948259722576801,
      "grad_norm": 0.8721141815185547,
      "learning_rate": 8.465765700467176e-06,
      "loss": 0.0252,
      "step": 485680
    },
    {
      "epoch": 0.7948587026963335,
      "grad_norm": 0.6884100437164307,
      "learning_rate": 8.46569980825366e-06,
      "loss": 0.0365,
      "step": 485700
    },
    {
      "epoch": 0.7948914331349869,
      "grad_norm": 1.4658362865447998,
      "learning_rate": 8.465633916040143e-06,
      "loss": 0.0278,
      "step": 485720
    },
    {
      "epoch": 0.7949241635736403,
      "grad_norm": 0.31751254200935364,
      "learning_rate": 8.465568023826625e-06,
      "loss": 0.0268,
      "step": 485740
    },
    {
      "epoch": 0.7949568940122935,
      "grad_norm": 1.1888831853866577,
      "learning_rate": 8.465502131613109e-06,
      "loss": 0.0319,
      "step": 485760
    },
    {
      "epoch": 0.7949896244509469,
      "grad_norm": 0.22827960550785065,
      "learning_rate": 8.46543623939959e-06,
      "loss": 0.0252,
      "step": 485780
    },
    {
      "epoch": 0.7950223548896003,
      "grad_norm": 0.5617002844810486,
      "learning_rate": 8.465370347186074e-06,
      "loss": 0.0323,
      "step": 485800
    },
    {
      "epoch": 0.7950550853282535,
      "grad_norm": 1.4849324226379395,
      "learning_rate": 8.465304454972556e-06,
      "loss": 0.0324,
      "step": 485820
    },
    {
      "epoch": 0.7950878157669069,
      "grad_norm": 1.6994404792785645,
      "learning_rate": 8.46523856275904e-06,
      "loss": 0.024,
      "step": 485840
    },
    {
      "epoch": 0.7951205462055603,
      "grad_norm": 1.5734131336212158,
      "learning_rate": 8.465172670545523e-06,
      "loss": 0.031,
      "step": 485860
    },
    {
      "epoch": 0.7951532766442135,
      "grad_norm": 2.9241933822631836,
      "learning_rate": 8.465106778332005e-06,
      "loss": 0.0259,
      "step": 485880
    },
    {
      "epoch": 0.7951860070828669,
      "grad_norm": 0.3466941714286804,
      "learning_rate": 8.465040886118489e-06,
      "loss": 0.0371,
      "step": 485900
    },
    {
      "epoch": 0.7952187375215203,
      "grad_norm": 0.6471315622329712,
      "learning_rate": 8.46497499390497e-06,
      "loss": 0.0318,
      "step": 485920
    },
    {
      "epoch": 0.7952514679601737,
      "grad_norm": 0.40667325258255005,
      "learning_rate": 8.464909101691454e-06,
      "loss": 0.0192,
      "step": 485940
    },
    {
      "epoch": 0.7952841983988269,
      "grad_norm": 0.44510239362716675,
      "learning_rate": 8.464843209477936e-06,
      "loss": 0.0305,
      "step": 485960
    },
    {
      "epoch": 0.7953169288374803,
      "grad_norm": 1.4827300310134888,
      "learning_rate": 8.46477731726442e-06,
      "loss": 0.0456,
      "step": 485980
    },
    {
      "epoch": 0.7953496592761337,
      "grad_norm": 0.4097815752029419,
      "learning_rate": 8.464711425050901e-06,
      "loss": 0.0387,
      "step": 486000
    },
    {
      "epoch": 0.7953823897147869,
      "grad_norm": 0.6751894354820251,
      "learning_rate": 8.464645532837385e-06,
      "loss": 0.0302,
      "step": 486020
    },
    {
      "epoch": 0.7954151201534403,
      "grad_norm": 0.6074555516242981,
      "learning_rate": 8.464579640623869e-06,
      "loss": 0.0322,
      "step": 486040
    },
    {
      "epoch": 0.7954478505920937,
      "grad_norm": 1.935401439666748,
      "learning_rate": 8.46451374841035e-06,
      "loss": 0.0386,
      "step": 486060
    },
    {
      "epoch": 0.7954805810307469,
      "grad_norm": 0.9904405474662781,
      "learning_rate": 8.464447856196834e-06,
      "loss": 0.0204,
      "step": 486080
    },
    {
      "epoch": 0.7955133114694003,
      "grad_norm": 0.4438842535018921,
      "learning_rate": 8.464381963983318e-06,
      "loss": 0.019,
      "step": 486100
    },
    {
      "epoch": 0.7955460419080537,
      "grad_norm": 0.5166836380958557,
      "learning_rate": 8.4643160717698e-06,
      "loss": 0.0275,
      "step": 486120
    },
    {
      "epoch": 0.795578772346707,
      "grad_norm": 1.066739797592163,
      "learning_rate": 8.464250179556283e-06,
      "loss": 0.0281,
      "step": 486140
    },
    {
      "epoch": 0.7956115027853603,
      "grad_norm": 0.76971435546875,
      "learning_rate": 8.464184287342765e-06,
      "loss": 0.0271,
      "step": 486160
    },
    {
      "epoch": 0.7956442332240137,
      "grad_norm": 1.3480207920074463,
      "learning_rate": 8.464118395129249e-06,
      "loss": 0.0424,
      "step": 486180
    },
    {
      "epoch": 0.795676963662667,
      "grad_norm": 1.999595284461975,
      "learning_rate": 8.464052502915732e-06,
      "loss": 0.0332,
      "step": 486200
    },
    {
      "epoch": 0.7957096941013203,
      "grad_norm": 0.7018077969551086,
      "learning_rate": 8.463986610702214e-06,
      "loss": 0.0316,
      "step": 486220
    },
    {
      "epoch": 0.7957424245399737,
      "grad_norm": 0.5635188817977905,
      "learning_rate": 8.463920718488698e-06,
      "loss": 0.0244,
      "step": 486240
    },
    {
      "epoch": 0.795775154978627,
      "grad_norm": 0.7832872271537781,
      "learning_rate": 8.46385482627518e-06,
      "loss": 0.0303,
      "step": 486260
    },
    {
      "epoch": 0.7958078854172803,
      "grad_norm": 0.9097339510917664,
      "learning_rate": 8.463788934061663e-06,
      "loss": 0.0304,
      "step": 486280
    },
    {
      "epoch": 0.7958406158559337,
      "grad_norm": 0.6745917201042175,
      "learning_rate": 8.463723041848145e-06,
      "loss": 0.0271,
      "step": 486300
    },
    {
      "epoch": 0.7958733462945871,
      "grad_norm": 1.0909074544906616,
      "learning_rate": 8.463657149634629e-06,
      "loss": 0.0335,
      "step": 486320
    },
    {
      "epoch": 0.7959060767332404,
      "grad_norm": 1.3901602029800415,
      "learning_rate": 8.46359125742111e-06,
      "loss": 0.0312,
      "step": 486340
    },
    {
      "epoch": 0.7959388071718937,
      "grad_norm": 0.4225143492221832,
      "learning_rate": 8.463525365207594e-06,
      "loss": 0.033,
      "step": 486360
    },
    {
      "epoch": 0.7959715376105471,
      "grad_norm": 1.0201777219772339,
      "learning_rate": 8.463459472994076e-06,
      "loss": 0.0395,
      "step": 486380
    },
    {
      "epoch": 0.7960042680492004,
      "grad_norm": 0.1274622529745102,
      "learning_rate": 8.46339358078056e-06,
      "loss": 0.0288,
      "step": 486400
    },
    {
      "epoch": 0.7960369984878537,
      "grad_norm": 1.195981502532959,
      "learning_rate": 8.463327688567042e-06,
      "loss": 0.0378,
      "step": 486420
    },
    {
      "epoch": 0.7960697289265071,
      "grad_norm": 1.901751160621643,
      "learning_rate": 8.463261796353525e-06,
      "loss": 0.0339,
      "step": 486440
    },
    {
      "epoch": 0.7961024593651604,
      "grad_norm": 0.8648034334182739,
      "learning_rate": 8.463195904140009e-06,
      "loss": 0.0267,
      "step": 486460
    },
    {
      "epoch": 0.7961351898038137,
      "grad_norm": 0.4482485353946686,
      "learning_rate": 8.46313001192649e-06,
      "loss": 0.0218,
      "step": 486480
    },
    {
      "epoch": 0.7961679202424671,
      "grad_norm": 0.5335277915000916,
      "learning_rate": 8.463064119712974e-06,
      "loss": 0.0343,
      "step": 486500
    },
    {
      "epoch": 0.7962006506811204,
      "grad_norm": 1.6936290264129639,
      "learning_rate": 8.462998227499458e-06,
      "loss": 0.0317,
      "step": 486520
    },
    {
      "epoch": 0.7962333811197738,
      "grad_norm": 1.0907436609268188,
      "learning_rate": 8.46293233528594e-06,
      "loss": 0.03,
      "step": 486540
    },
    {
      "epoch": 0.7962661115584271,
      "grad_norm": 1.2616889476776123,
      "learning_rate": 8.462866443072423e-06,
      "loss": 0.0253,
      "step": 486560
    },
    {
      "epoch": 0.7962988419970805,
      "grad_norm": 0.5763405561447144,
      "learning_rate": 8.462800550858907e-06,
      "loss": 0.0328,
      "step": 486580
    },
    {
      "epoch": 0.7963315724357338,
      "grad_norm": 0.2831622064113617,
      "learning_rate": 8.462734658645389e-06,
      "loss": 0.0375,
      "step": 486600
    },
    {
      "epoch": 0.7963643028743871,
      "grad_norm": 1.6083029508590698,
      "learning_rate": 8.462668766431872e-06,
      "loss": 0.0312,
      "step": 486620
    },
    {
      "epoch": 0.7963970333130405,
      "grad_norm": 0.4335387647151947,
      "learning_rate": 8.462602874218354e-06,
      "loss": 0.0404,
      "step": 486640
    },
    {
      "epoch": 0.7964297637516938,
      "grad_norm": 0.6967663764953613,
      "learning_rate": 8.462536982004838e-06,
      "loss": 0.0243,
      "step": 486660
    },
    {
      "epoch": 0.7964624941903471,
      "grad_norm": 0.2104754000902176,
      "learning_rate": 8.46247108979132e-06,
      "loss": 0.0237,
      "step": 486680
    },
    {
      "epoch": 0.7964952246290005,
      "grad_norm": 0.1801365613937378,
      "learning_rate": 8.462405197577803e-06,
      "loss": 0.0229,
      "step": 486700
    },
    {
      "epoch": 0.7965279550676538,
      "grad_norm": 2.4446606636047363,
      "learning_rate": 8.462339305364285e-06,
      "loss": 0.0331,
      "step": 486720
    },
    {
      "epoch": 0.7965606855063072,
      "grad_norm": 0.3933383524417877,
      "learning_rate": 8.462273413150769e-06,
      "loss": 0.0383,
      "step": 486740
    },
    {
      "epoch": 0.7965934159449605,
      "grad_norm": 0.2136395424604416,
      "learning_rate": 8.46220752093725e-06,
      "loss": 0.0232,
      "step": 486760
    },
    {
      "epoch": 0.7966261463836138,
      "grad_norm": 0.3297715187072754,
      "learning_rate": 8.462141628723734e-06,
      "loss": 0.0256,
      "step": 486780
    },
    {
      "epoch": 0.7966588768222672,
      "grad_norm": 1.110027551651001,
      "learning_rate": 8.462075736510216e-06,
      "loss": 0.0272,
      "step": 486800
    },
    {
      "epoch": 0.7966916072609205,
      "grad_norm": 0.9832722544670105,
      "learning_rate": 8.4620098442967e-06,
      "loss": 0.0229,
      "step": 486820
    },
    {
      "epoch": 0.7967243376995738,
      "grad_norm": 0.6769592761993408,
      "learning_rate": 8.461943952083183e-06,
      "loss": 0.0336,
      "step": 486840
    },
    {
      "epoch": 0.7967570681382272,
      "grad_norm": 0.4568442404270172,
      "learning_rate": 8.461878059869665e-06,
      "loss": 0.025,
      "step": 486860
    },
    {
      "epoch": 0.7967897985768805,
      "grad_norm": 0.4043098986148834,
      "learning_rate": 8.461812167656149e-06,
      "loss": 0.0202,
      "step": 486880
    },
    {
      "epoch": 0.7968225290155339,
      "grad_norm": 0.33686622977256775,
      "learning_rate": 8.461746275442632e-06,
      "loss": 0.0344,
      "step": 486900
    },
    {
      "epoch": 0.7968552594541872,
      "grad_norm": 2.6184003353118896,
      "learning_rate": 8.461680383229114e-06,
      "loss": 0.0292,
      "step": 486920
    },
    {
      "epoch": 0.7968879898928406,
      "grad_norm": 1.2741684913635254,
      "learning_rate": 8.461614491015598e-06,
      "loss": 0.0344,
      "step": 486940
    },
    {
      "epoch": 0.7969207203314939,
      "grad_norm": 0.3254016637802124,
      "learning_rate": 8.461548598802081e-06,
      "loss": 0.03,
      "step": 486960
    },
    {
      "epoch": 0.7969534507701472,
      "grad_norm": 0.8032581210136414,
      "learning_rate": 8.461482706588563e-06,
      "loss": 0.0244,
      "step": 486980
    },
    {
      "epoch": 0.7969861812088006,
      "grad_norm": 1.1178650856018066,
      "learning_rate": 8.461416814375047e-06,
      "loss": 0.0345,
      "step": 487000
    },
    {
      "epoch": 0.7970189116474539,
      "grad_norm": 0.7817128300666809,
      "learning_rate": 8.461350922161529e-06,
      "loss": 0.0441,
      "step": 487020
    },
    {
      "epoch": 0.7970516420861072,
      "grad_norm": 0.6492223143577576,
      "learning_rate": 8.461285029948012e-06,
      "loss": 0.0219,
      "step": 487040
    },
    {
      "epoch": 0.7970843725247606,
      "grad_norm": 1.4328709840774536,
      "learning_rate": 8.461219137734494e-06,
      "loss": 0.0414,
      "step": 487060
    },
    {
      "epoch": 0.7971171029634139,
      "grad_norm": 2.5439350605010986,
      "learning_rate": 8.461153245520978e-06,
      "loss": 0.038,
      "step": 487080
    },
    {
      "epoch": 0.7971498334020672,
      "grad_norm": 0.5147104859352112,
      "learning_rate": 8.46108735330746e-06,
      "loss": 0.0353,
      "step": 487100
    },
    {
      "epoch": 0.7971825638407206,
      "grad_norm": 0.37876471877098083,
      "learning_rate": 8.461021461093943e-06,
      "loss": 0.0287,
      "step": 487120
    },
    {
      "epoch": 0.797215294279374,
      "grad_norm": 0.12717215716838837,
      "learning_rate": 8.460955568880425e-06,
      "loss": 0.0319,
      "step": 487140
    },
    {
      "epoch": 0.7972480247180272,
      "grad_norm": 3.53822660446167,
      "learning_rate": 8.460889676666909e-06,
      "loss": 0.0244,
      "step": 487160
    },
    {
      "epoch": 0.7972807551566806,
      "grad_norm": 0.7916083335876465,
      "learning_rate": 8.46082378445339e-06,
      "loss": 0.0328,
      "step": 487180
    },
    {
      "epoch": 0.797313485595334,
      "grad_norm": 2.001411199569702,
      "learning_rate": 8.460757892239874e-06,
      "loss": 0.0295,
      "step": 487200
    },
    {
      "epoch": 0.7973462160339873,
      "grad_norm": 0.7520672082901001,
      "learning_rate": 8.460692000026356e-06,
      "loss": 0.0443,
      "step": 487220
    },
    {
      "epoch": 0.7973789464726406,
      "grad_norm": 0.398941308259964,
      "learning_rate": 8.46062610781284e-06,
      "loss": 0.0382,
      "step": 487240
    },
    {
      "epoch": 0.797411676911294,
      "grad_norm": 1.3242955207824707,
      "learning_rate": 8.460560215599323e-06,
      "loss": 0.0291,
      "step": 487260
    },
    {
      "epoch": 0.7974444073499473,
      "grad_norm": 0.44792959094047546,
      "learning_rate": 8.460494323385805e-06,
      "loss": 0.0268,
      "step": 487280
    },
    {
      "epoch": 0.7974771377886006,
      "grad_norm": 1.345952033996582,
      "learning_rate": 8.460428431172289e-06,
      "loss": 0.0314,
      "step": 487300
    },
    {
      "epoch": 0.797509868227254,
      "grad_norm": 0.6847338676452637,
      "learning_rate": 8.460362538958772e-06,
      "loss": 0.0409,
      "step": 487320
    },
    {
      "epoch": 0.7975425986659073,
      "grad_norm": 1.753823161125183,
      "learning_rate": 8.460296646745256e-06,
      "loss": 0.0359,
      "step": 487340
    },
    {
      "epoch": 0.7975753291045606,
      "grad_norm": 4.61871862411499,
      "learning_rate": 8.460230754531738e-06,
      "loss": 0.0393,
      "step": 487360
    },
    {
      "epoch": 0.797608059543214,
      "grad_norm": 0.29384124279022217,
      "learning_rate": 8.460164862318222e-06,
      "loss": 0.0272,
      "step": 487380
    },
    {
      "epoch": 0.7976407899818674,
      "grad_norm": 1.5198053121566772,
      "learning_rate": 8.460098970104703e-06,
      "loss": 0.0304,
      "step": 487400
    },
    {
      "epoch": 0.7976735204205206,
      "grad_norm": 0.8515196442604065,
      "learning_rate": 8.460033077891187e-06,
      "loss": 0.0285,
      "step": 487420
    },
    {
      "epoch": 0.797706250859174,
      "grad_norm": 4.81230354309082,
      "learning_rate": 8.459967185677669e-06,
      "loss": 0.0404,
      "step": 487440
    },
    {
      "epoch": 0.7977389812978274,
      "grad_norm": 1.443705439567566,
      "learning_rate": 8.459901293464152e-06,
      "loss": 0.0349,
      "step": 487460
    },
    {
      "epoch": 0.7977717117364806,
      "grad_norm": 1.922573447227478,
      "learning_rate": 8.459835401250634e-06,
      "loss": 0.0334,
      "step": 487480
    },
    {
      "epoch": 0.797804442175134,
      "grad_norm": 1.0068769454956055,
      "learning_rate": 8.459769509037118e-06,
      "loss": 0.033,
      "step": 487500
    },
    {
      "epoch": 0.7978371726137874,
      "grad_norm": 1.1307591199874878,
      "learning_rate": 8.4597036168236e-06,
      "loss": 0.0372,
      "step": 487520
    },
    {
      "epoch": 0.7978699030524407,
      "grad_norm": 1.258976697921753,
      "learning_rate": 8.459637724610083e-06,
      "loss": 0.038,
      "step": 487540
    },
    {
      "epoch": 0.797902633491094,
      "grad_norm": 0.26365208625793457,
      "learning_rate": 8.459571832396565e-06,
      "loss": 0.0265,
      "step": 487560
    },
    {
      "epoch": 0.7979353639297474,
      "grad_norm": 0.4057086408138275,
      "learning_rate": 8.459505940183049e-06,
      "loss": 0.0307,
      "step": 487580
    },
    {
      "epoch": 0.7979680943684008,
      "grad_norm": 1.3725780248641968,
      "learning_rate": 8.459440047969533e-06,
      "loss": 0.0287,
      "step": 487600
    },
    {
      "epoch": 0.798000824807054,
      "grad_norm": 0.6491238474845886,
      "learning_rate": 8.459374155756014e-06,
      "loss": 0.0306,
      "step": 487620
    },
    {
      "epoch": 0.7980335552457074,
      "grad_norm": 0.27482011914253235,
      "learning_rate": 8.459308263542498e-06,
      "loss": 0.0259,
      "step": 487640
    },
    {
      "epoch": 0.7980662856843608,
      "grad_norm": 2.9182965755462646,
      "learning_rate": 8.45924237132898e-06,
      "loss": 0.0392,
      "step": 487660
    },
    {
      "epoch": 0.798099016123014,
      "grad_norm": 2.144275426864624,
      "learning_rate": 8.459176479115463e-06,
      "loss": 0.0415,
      "step": 487680
    },
    {
      "epoch": 0.7981317465616674,
      "grad_norm": 0.7877383232116699,
      "learning_rate": 8.459110586901947e-06,
      "loss": 0.0342,
      "step": 487700
    },
    {
      "epoch": 0.7981644770003208,
      "grad_norm": 0.9248002171516418,
      "learning_rate": 8.459044694688429e-06,
      "loss": 0.0388,
      "step": 487720
    },
    {
      "epoch": 0.798197207438974,
      "grad_norm": 0.7508999109268188,
      "learning_rate": 8.458978802474913e-06,
      "loss": 0.0316,
      "step": 487740
    },
    {
      "epoch": 0.7982299378776274,
      "grad_norm": 1.078995704650879,
      "learning_rate": 8.458912910261396e-06,
      "loss": 0.0348,
      "step": 487760
    },
    {
      "epoch": 0.7982626683162808,
      "grad_norm": 0.8128015398979187,
      "learning_rate": 8.458847018047878e-06,
      "loss": 0.0289,
      "step": 487780
    },
    {
      "epoch": 0.7982953987549342,
      "grad_norm": 1.190517544746399,
      "learning_rate": 8.458781125834362e-06,
      "loss": 0.0293,
      "step": 487800
    },
    {
      "epoch": 0.7983281291935874,
      "grad_norm": 0.9392013549804688,
      "learning_rate": 8.458715233620843e-06,
      "loss": 0.0354,
      "step": 487820
    },
    {
      "epoch": 0.7983608596322408,
      "grad_norm": 0.45092353224754333,
      "learning_rate": 8.458649341407327e-06,
      "loss": 0.0356,
      "step": 487840
    },
    {
      "epoch": 0.7983935900708942,
      "grad_norm": 1.1717137098312378,
      "learning_rate": 8.458583449193809e-06,
      "loss": 0.0343,
      "step": 487860
    },
    {
      "epoch": 0.7984263205095474,
      "grad_norm": 0.19288425147533417,
      "learning_rate": 8.458517556980293e-06,
      "loss": 0.0417,
      "step": 487880
    },
    {
      "epoch": 0.7984590509482008,
      "grad_norm": 0.5415564179420471,
      "learning_rate": 8.458451664766774e-06,
      "loss": 0.0348,
      "step": 487900
    },
    {
      "epoch": 0.7984917813868542,
      "grad_norm": 0.2668423056602478,
      "learning_rate": 8.458385772553258e-06,
      "loss": 0.04,
      "step": 487920
    },
    {
      "epoch": 0.7985245118255074,
      "grad_norm": 1.2104710340499878,
      "learning_rate": 8.458319880339742e-06,
      "loss": 0.0388,
      "step": 487940
    },
    {
      "epoch": 0.7985572422641608,
      "grad_norm": 0.5454573631286621,
      "learning_rate": 8.458253988126224e-06,
      "loss": 0.0168,
      "step": 487960
    },
    {
      "epoch": 0.7985899727028142,
      "grad_norm": 1.0694674253463745,
      "learning_rate": 8.458188095912707e-06,
      "loss": 0.0332,
      "step": 487980
    },
    {
      "epoch": 0.7986227031414675,
      "grad_norm": 0.5179262161254883,
      "learning_rate": 8.458122203699189e-06,
      "loss": 0.0279,
      "step": 488000
    },
    {
      "epoch": 0.7986554335801208,
      "grad_norm": 5.585838317871094,
      "learning_rate": 8.458056311485673e-06,
      "loss": 0.029,
      "step": 488020
    },
    {
      "epoch": 0.7986881640187742,
      "grad_norm": 0.28741583228111267,
      "learning_rate": 8.457990419272154e-06,
      "loss": 0.0332,
      "step": 488040
    },
    {
      "epoch": 0.7987208944574276,
      "grad_norm": 1.0847058296203613,
      "learning_rate": 8.457924527058638e-06,
      "loss": 0.0309,
      "step": 488060
    },
    {
      "epoch": 0.7987536248960808,
      "grad_norm": 1.5321580171585083,
      "learning_rate": 8.457858634845122e-06,
      "loss": 0.0258,
      "step": 488080
    },
    {
      "epoch": 0.7987863553347342,
      "grad_norm": 0.7359711527824402,
      "learning_rate": 8.457792742631604e-06,
      "loss": 0.0353,
      "step": 488100
    },
    {
      "epoch": 0.7988190857733876,
      "grad_norm": 1.054611086845398,
      "learning_rate": 8.457726850418087e-06,
      "loss": 0.0315,
      "step": 488120
    },
    {
      "epoch": 0.7988518162120408,
      "grad_norm": 0.4929673969745636,
      "learning_rate": 8.45766095820457e-06,
      "loss": 0.0348,
      "step": 488140
    },
    {
      "epoch": 0.7988845466506942,
      "grad_norm": 0.5791289210319519,
      "learning_rate": 8.457595065991053e-06,
      "loss": 0.0296,
      "step": 488160
    },
    {
      "epoch": 0.7989172770893476,
      "grad_norm": 1.6553112268447876,
      "learning_rate": 8.457529173777536e-06,
      "loss": 0.0374,
      "step": 488180
    },
    {
      "epoch": 0.7989500075280009,
      "grad_norm": 1.4359906911849976,
      "learning_rate": 8.457463281564018e-06,
      "loss": 0.0306,
      "step": 488200
    },
    {
      "epoch": 0.7989827379666542,
      "grad_norm": 1.0765756368637085,
      "learning_rate": 8.457397389350502e-06,
      "loss": 0.0248,
      "step": 488220
    },
    {
      "epoch": 0.7990154684053076,
      "grad_norm": 0.6462679505348206,
      "learning_rate": 8.457331497136984e-06,
      "loss": 0.0263,
      "step": 488240
    },
    {
      "epoch": 0.7990481988439609,
      "grad_norm": 0.7860392928123474,
      "learning_rate": 8.457265604923467e-06,
      "loss": 0.0328,
      "step": 488260
    },
    {
      "epoch": 0.7990809292826142,
      "grad_norm": 1.554284691810608,
      "learning_rate": 8.457199712709949e-06,
      "loss": 0.0281,
      "step": 488280
    },
    {
      "epoch": 0.7991136597212676,
      "grad_norm": 0.8134608268737793,
      "learning_rate": 8.457133820496433e-06,
      "loss": 0.0327,
      "step": 488300
    },
    {
      "epoch": 0.799146390159921,
      "grad_norm": 1.0867602825164795,
      "learning_rate": 8.457067928282916e-06,
      "loss": 0.0276,
      "step": 488320
    },
    {
      "epoch": 0.7991791205985742,
      "grad_norm": 0.4550315737724304,
      "learning_rate": 8.457002036069398e-06,
      "loss": 0.0456,
      "step": 488340
    },
    {
      "epoch": 0.7992118510372276,
      "grad_norm": 1.3941227197647095,
      "learning_rate": 8.456936143855882e-06,
      "loss": 0.0408,
      "step": 488360
    },
    {
      "epoch": 0.799244581475881,
      "grad_norm": 0.7370998859405518,
      "learning_rate": 8.456870251642364e-06,
      "loss": 0.0434,
      "step": 488380
    },
    {
      "epoch": 0.7992773119145343,
      "grad_norm": 0.518007218837738,
      "learning_rate": 8.456804359428847e-06,
      "loss": 0.0394,
      "step": 488400
    },
    {
      "epoch": 0.7993100423531876,
      "grad_norm": 0.7208464741706848,
      "learning_rate": 8.456738467215329e-06,
      "loss": 0.0413,
      "step": 488420
    },
    {
      "epoch": 0.799342772791841,
      "grad_norm": 1.2315782308578491,
      "learning_rate": 8.456672575001813e-06,
      "loss": 0.0306,
      "step": 488440
    },
    {
      "epoch": 0.7993755032304943,
      "grad_norm": 0.4035538136959076,
      "learning_rate": 8.456606682788295e-06,
      "loss": 0.0288,
      "step": 488460
    },
    {
      "epoch": 0.7994082336691476,
      "grad_norm": 0.5070109963417053,
      "learning_rate": 8.456540790574778e-06,
      "loss": 0.0342,
      "step": 488480
    },
    {
      "epoch": 0.799440964107801,
      "grad_norm": 1.7293016910552979,
      "learning_rate": 8.456474898361262e-06,
      "loss": 0.0367,
      "step": 488500
    },
    {
      "epoch": 0.7994736945464543,
      "grad_norm": 1.239047646522522,
      "learning_rate": 8.456409006147744e-06,
      "loss": 0.026,
      "step": 488520
    },
    {
      "epoch": 0.7995064249851076,
      "grad_norm": 0.9540824294090271,
      "learning_rate": 8.456343113934227e-06,
      "loss": 0.0328,
      "step": 488540
    },
    {
      "epoch": 0.799539155423761,
      "grad_norm": 1.0065683126449585,
      "learning_rate": 8.45627722172071e-06,
      "loss": 0.0297,
      "step": 488560
    },
    {
      "epoch": 0.7995718858624143,
      "grad_norm": 2.5217201709747314,
      "learning_rate": 8.456211329507193e-06,
      "loss": 0.0323,
      "step": 488580
    },
    {
      "epoch": 0.7996046163010677,
      "grad_norm": 0.6701352596282959,
      "learning_rate": 8.456145437293676e-06,
      "loss": 0.0239,
      "step": 488600
    },
    {
      "epoch": 0.799637346739721,
      "grad_norm": 0.8528918623924255,
      "learning_rate": 8.456079545080158e-06,
      "loss": 0.0437,
      "step": 488620
    },
    {
      "epoch": 0.7996700771783743,
      "grad_norm": 0.3577762246131897,
      "learning_rate": 8.456013652866642e-06,
      "loss": 0.0264,
      "step": 488640
    },
    {
      "epoch": 0.7997028076170277,
      "grad_norm": 0.7080499529838562,
      "learning_rate": 8.455947760653125e-06,
      "loss": 0.0414,
      "step": 488660
    },
    {
      "epoch": 0.799735538055681,
      "grad_norm": 0.5931682586669922,
      "learning_rate": 8.455881868439607e-06,
      "loss": 0.0338,
      "step": 488680
    },
    {
      "epoch": 0.7997682684943344,
      "grad_norm": 0.284438818693161,
      "learning_rate": 8.45581597622609e-06,
      "loss": 0.0205,
      "step": 488700
    },
    {
      "epoch": 0.7998009989329877,
      "grad_norm": 0.674914538860321,
      "learning_rate": 8.455750084012573e-06,
      "loss": 0.0333,
      "step": 488720
    },
    {
      "epoch": 0.799833729371641,
      "grad_norm": 0.8153479695320129,
      "learning_rate": 8.455684191799056e-06,
      "loss": 0.0379,
      "step": 488740
    },
    {
      "epoch": 0.7998664598102944,
      "grad_norm": 1.7616055011749268,
      "learning_rate": 8.455618299585538e-06,
      "loss": 0.0317,
      "step": 488760
    },
    {
      "epoch": 0.7998991902489477,
      "grad_norm": 1.8161487579345703,
      "learning_rate": 8.455552407372022e-06,
      "loss": 0.0352,
      "step": 488780
    },
    {
      "epoch": 0.7999319206876011,
      "grad_norm": 2.4695682525634766,
      "learning_rate": 8.455486515158504e-06,
      "loss": 0.0392,
      "step": 488800
    },
    {
      "epoch": 0.7999646511262544,
      "grad_norm": 1.0766263008117676,
      "learning_rate": 8.455420622944987e-06,
      "loss": 0.0239,
      "step": 488820
    },
    {
      "epoch": 0.7999973815649077,
      "grad_norm": 1.5005124807357788,
      "learning_rate": 8.455354730731469e-06,
      "loss": 0.0218,
      "step": 488840
    },
    {
      "epoch": 0.8000301120035611,
      "grad_norm": 4.041206359863281,
      "learning_rate": 8.455288838517953e-06,
      "loss": 0.0421,
      "step": 488860
    },
    {
      "epoch": 0.8000628424422144,
      "grad_norm": 0.8468318581581116,
      "learning_rate": 8.455222946304436e-06,
      "loss": 0.0342,
      "step": 488880
    },
    {
      "epoch": 0.8000955728808677,
      "grad_norm": 0.9360068440437317,
      "learning_rate": 8.455157054090918e-06,
      "loss": 0.028,
      "step": 488900
    },
    {
      "epoch": 0.8001283033195211,
      "grad_norm": 0.6225190162658691,
      "learning_rate": 8.455091161877402e-06,
      "loss": 0.0301,
      "step": 488920
    },
    {
      "epoch": 0.8001610337581744,
      "grad_norm": 0.3304864466190338,
      "learning_rate": 8.455025269663885e-06,
      "loss": 0.0285,
      "step": 488940
    },
    {
      "epoch": 0.8001937641968278,
      "grad_norm": 1.2490202188491821,
      "learning_rate": 8.454959377450367e-06,
      "loss": 0.0328,
      "step": 488960
    },
    {
      "epoch": 0.8002264946354811,
      "grad_norm": 0.6178063750267029,
      "learning_rate": 8.454893485236851e-06,
      "loss": 0.0245,
      "step": 488980
    },
    {
      "epoch": 0.8002592250741345,
      "grad_norm": 0.8567855954170227,
      "learning_rate": 8.454827593023334e-06,
      "loss": 0.0318,
      "step": 489000
    },
    {
      "epoch": 0.8002919555127878,
      "grad_norm": 1.5838406085968018,
      "learning_rate": 8.454761700809816e-06,
      "loss": 0.0429,
      "step": 489020
    },
    {
      "epoch": 0.8003246859514411,
      "grad_norm": 0.6572651267051697,
      "learning_rate": 8.4546958085963e-06,
      "loss": 0.025,
      "step": 489040
    },
    {
      "epoch": 0.8003574163900945,
      "grad_norm": 1.557863712310791,
      "learning_rate": 8.454629916382782e-06,
      "loss": 0.0307,
      "step": 489060
    },
    {
      "epoch": 0.8003901468287478,
      "grad_norm": 1.3741086721420288,
      "learning_rate": 8.454564024169265e-06,
      "loss": 0.0298,
      "step": 489080
    },
    {
      "epoch": 0.8004228772674011,
      "grad_norm": 0.9696195721626282,
      "learning_rate": 8.454498131955747e-06,
      "loss": 0.0322,
      "step": 489100
    },
    {
      "epoch": 0.8004556077060545,
      "grad_norm": 2.60908842086792,
      "learning_rate": 8.454432239742231e-06,
      "loss": 0.0347,
      "step": 489120
    },
    {
      "epoch": 0.8004883381447078,
      "grad_norm": 1.1053118705749512,
      "learning_rate": 8.454366347528713e-06,
      "loss": 0.0287,
      "step": 489140
    },
    {
      "epoch": 0.8005210685833611,
      "grad_norm": 2.922220230102539,
      "learning_rate": 8.454300455315196e-06,
      "loss": 0.0207,
      "step": 489160
    },
    {
      "epoch": 0.8005537990220145,
      "grad_norm": 1.324135422706604,
      "learning_rate": 8.454234563101678e-06,
      "loss": 0.0358,
      "step": 489180
    },
    {
      "epoch": 0.8005865294606679,
      "grad_norm": 1.6215159893035889,
      "learning_rate": 8.454168670888162e-06,
      "loss": 0.0368,
      "step": 489200
    },
    {
      "epoch": 0.8006192598993211,
      "grad_norm": 3.19016695022583,
      "learning_rate": 8.454102778674644e-06,
      "loss": 0.0327,
      "step": 489220
    },
    {
      "epoch": 0.8006519903379745,
      "grad_norm": 0.4593493342399597,
      "learning_rate": 8.454036886461127e-06,
      "loss": 0.0307,
      "step": 489240
    },
    {
      "epoch": 0.8006847207766279,
      "grad_norm": 1.537672996520996,
      "learning_rate": 8.45397099424761e-06,
      "loss": 0.0345,
      "step": 489260
    },
    {
      "epoch": 0.8007174512152812,
      "grad_norm": 3.4479165077209473,
      "learning_rate": 8.453905102034093e-06,
      "loss": 0.0308,
      "step": 489280
    },
    {
      "epoch": 0.8007501816539345,
      "grad_norm": 1.1180461645126343,
      "learning_rate": 8.453839209820576e-06,
      "loss": 0.0352,
      "step": 489300
    },
    {
      "epoch": 0.8007829120925879,
      "grad_norm": 0.43676289916038513,
      "learning_rate": 8.453773317607058e-06,
      "loss": 0.0322,
      "step": 489320
    },
    {
      "epoch": 0.8008156425312412,
      "grad_norm": 1.5817240476608276,
      "learning_rate": 8.453707425393542e-06,
      "loss": 0.0337,
      "step": 489340
    },
    {
      "epoch": 0.8008483729698945,
      "grad_norm": 2.2864811420440674,
      "learning_rate": 8.453641533180025e-06,
      "loss": 0.0276,
      "step": 489360
    },
    {
      "epoch": 0.8008811034085479,
      "grad_norm": 1.6934155225753784,
      "learning_rate": 8.453575640966507e-06,
      "loss": 0.0457,
      "step": 489380
    },
    {
      "epoch": 0.8009138338472013,
      "grad_norm": 0.8033738732337952,
      "learning_rate": 8.453509748752991e-06,
      "loss": 0.0236,
      "step": 489400
    },
    {
      "epoch": 0.8009465642858545,
      "grad_norm": 1.5371801853179932,
      "learning_rate": 8.453443856539475e-06,
      "loss": 0.0292,
      "step": 489420
    },
    {
      "epoch": 0.8009792947245079,
      "grad_norm": 1.391323208808899,
      "learning_rate": 8.453377964325956e-06,
      "loss": 0.0269,
      "step": 489440
    },
    {
      "epoch": 0.8010120251631613,
      "grad_norm": 0.5510289669036865,
      "learning_rate": 8.45331207211244e-06,
      "loss": 0.034,
      "step": 489460
    },
    {
      "epoch": 0.8010447556018145,
      "grad_norm": 1.008327603340149,
      "learning_rate": 8.453246179898922e-06,
      "loss": 0.044,
      "step": 489480
    },
    {
      "epoch": 0.8010774860404679,
      "grad_norm": 0.6543901562690735,
      "learning_rate": 8.453180287685405e-06,
      "loss": 0.0269,
      "step": 489500
    },
    {
      "epoch": 0.8011102164791213,
      "grad_norm": 0.5905881524085999,
      "learning_rate": 8.453114395471887e-06,
      "loss": 0.0338,
      "step": 489520
    },
    {
      "epoch": 0.8011429469177745,
      "grad_norm": 1.2836986780166626,
      "learning_rate": 8.453048503258371e-06,
      "loss": 0.0347,
      "step": 489540
    },
    {
      "epoch": 0.8011756773564279,
      "grad_norm": 0.7431990504264832,
      "learning_rate": 8.452982611044853e-06,
      "loss": 0.0239,
      "step": 489560
    },
    {
      "epoch": 0.8012084077950813,
      "grad_norm": 0.9935058355331421,
      "learning_rate": 8.452916718831336e-06,
      "loss": 0.0219,
      "step": 489580
    },
    {
      "epoch": 0.8012411382337347,
      "grad_norm": 1.9949488639831543,
      "learning_rate": 8.452850826617818e-06,
      "loss": 0.037,
      "step": 489600
    },
    {
      "epoch": 0.8012738686723879,
      "grad_norm": 1.0611584186553955,
      "learning_rate": 8.452784934404302e-06,
      "loss": 0.0279,
      "step": 489620
    },
    {
      "epoch": 0.8013065991110413,
      "grad_norm": 0.5524787306785583,
      "learning_rate": 8.452719042190784e-06,
      "loss": 0.0245,
      "step": 489640
    },
    {
      "epoch": 0.8013393295496947,
      "grad_norm": 0.3730510175228119,
      "learning_rate": 8.452653149977267e-06,
      "loss": 0.0283,
      "step": 489660
    },
    {
      "epoch": 0.8013720599883479,
      "grad_norm": 1.5164539813995361,
      "learning_rate": 8.452587257763751e-06,
      "loss": 0.0193,
      "step": 489680
    },
    {
      "epoch": 0.8014047904270013,
      "grad_norm": 0.4800712466239929,
      "learning_rate": 8.452521365550233e-06,
      "loss": 0.0266,
      "step": 489700
    },
    {
      "epoch": 0.8014375208656547,
      "grad_norm": 0.39329540729522705,
      "learning_rate": 8.452455473336716e-06,
      "loss": 0.0316,
      "step": 489720
    },
    {
      "epoch": 0.8014702513043079,
      "grad_norm": 0.5593065619468689,
      "learning_rate": 8.4523895811232e-06,
      "loss": 0.0452,
      "step": 489740
    },
    {
      "epoch": 0.8015029817429613,
      "grad_norm": 0.5590646266937256,
      "learning_rate": 8.452323688909682e-06,
      "loss": 0.022,
      "step": 489760
    },
    {
      "epoch": 0.8015357121816147,
      "grad_norm": 1.7534867525100708,
      "learning_rate": 8.452257796696166e-06,
      "loss": 0.0375,
      "step": 489780
    },
    {
      "epoch": 0.801568442620268,
      "grad_norm": 0.6562551259994507,
      "learning_rate": 8.452191904482649e-06,
      "loss": 0.027,
      "step": 489800
    },
    {
      "epoch": 0.8016011730589213,
      "grad_norm": 2.429072856903076,
      "learning_rate": 8.452126012269131e-06,
      "loss": 0.0275,
      "step": 489820
    },
    {
      "epoch": 0.8016339034975747,
      "grad_norm": 1.0608487129211426,
      "learning_rate": 8.452060120055615e-06,
      "loss": 0.0228,
      "step": 489840
    },
    {
      "epoch": 0.8016666339362281,
      "grad_norm": 0.6340838670730591,
      "learning_rate": 8.451994227842096e-06,
      "loss": 0.0301,
      "step": 489860
    },
    {
      "epoch": 0.8016993643748813,
      "grad_norm": 0.32191118597984314,
      "learning_rate": 8.45192833562858e-06,
      "loss": 0.0305,
      "step": 489880
    },
    {
      "epoch": 0.8017320948135347,
      "grad_norm": 0.6719074845314026,
      "learning_rate": 8.451862443415062e-06,
      "loss": 0.0301,
      "step": 489900
    },
    {
      "epoch": 0.8017648252521881,
      "grad_norm": 1.1240622997283936,
      "learning_rate": 8.451796551201546e-06,
      "loss": 0.0402,
      "step": 489920
    },
    {
      "epoch": 0.8017975556908413,
      "grad_norm": 0.5525366067886353,
      "learning_rate": 8.451730658988027e-06,
      "loss": 0.0353,
      "step": 489940
    },
    {
      "epoch": 0.8018302861294947,
      "grad_norm": 0.7608181238174438,
      "learning_rate": 8.451664766774511e-06,
      "loss": 0.0314,
      "step": 489960
    },
    {
      "epoch": 0.8018630165681481,
      "grad_norm": 0.7926127314567566,
      "learning_rate": 8.451598874560993e-06,
      "loss": 0.0222,
      "step": 489980
    },
    {
      "epoch": 0.8018957470068014,
      "grad_norm": 1.6636501550674438,
      "learning_rate": 8.451532982347477e-06,
      "loss": 0.0308,
      "step": 490000
    },
    {
      "epoch": 0.8019284774454547,
      "grad_norm": 1.3472371101379395,
      "learning_rate": 8.451467090133958e-06,
      "loss": 0.0375,
      "step": 490020
    },
    {
      "epoch": 0.8019612078841081,
      "grad_norm": 0.7704695463180542,
      "learning_rate": 8.451401197920442e-06,
      "loss": 0.0322,
      "step": 490040
    },
    {
      "epoch": 0.8019939383227614,
      "grad_norm": 1.461949110031128,
      "learning_rate": 8.451335305706926e-06,
      "loss": 0.0429,
      "step": 490060
    },
    {
      "epoch": 0.8020266687614147,
      "grad_norm": 1.1223609447479248,
      "learning_rate": 8.451269413493407e-06,
      "loss": 0.033,
      "step": 490080
    },
    {
      "epoch": 0.8020593992000681,
      "grad_norm": 1.3320529460906982,
      "learning_rate": 8.451203521279891e-06,
      "loss": 0.0329,
      "step": 490100
    },
    {
      "epoch": 0.8020921296387215,
      "grad_norm": 0.6132679581642151,
      "learning_rate": 8.451137629066373e-06,
      "loss": 0.0252,
      "step": 490120
    },
    {
      "epoch": 0.8021248600773747,
      "grad_norm": 2.832634210586548,
      "learning_rate": 8.451071736852857e-06,
      "loss": 0.027,
      "step": 490140
    },
    {
      "epoch": 0.8021575905160281,
      "grad_norm": 0.8590724468231201,
      "learning_rate": 8.45100584463934e-06,
      "loss": 0.0246,
      "step": 490160
    },
    {
      "epoch": 0.8021903209546815,
      "grad_norm": 0.3114241361618042,
      "learning_rate": 8.450939952425824e-06,
      "loss": 0.0352,
      "step": 490180
    },
    {
      "epoch": 0.8022230513933347,
      "grad_norm": 0.6448600888252258,
      "learning_rate": 8.450874060212306e-06,
      "loss": 0.0362,
      "step": 490200
    },
    {
      "epoch": 0.8022557818319881,
      "grad_norm": 1.6401405334472656,
      "learning_rate": 8.45080816799879e-06,
      "loss": 0.0251,
      "step": 490220
    },
    {
      "epoch": 0.8022885122706415,
      "grad_norm": 0.16738414764404297,
      "learning_rate": 8.450742275785271e-06,
      "loss": 0.0307,
      "step": 490240
    },
    {
      "epoch": 0.8023212427092948,
      "grad_norm": 0.7621893882751465,
      "learning_rate": 8.450676383571755e-06,
      "loss": 0.0306,
      "step": 490260
    },
    {
      "epoch": 0.8023539731479481,
      "grad_norm": 0.5093300938606262,
      "learning_rate": 8.450610491358237e-06,
      "loss": 0.0278,
      "step": 490280
    },
    {
      "epoch": 0.8023867035866015,
      "grad_norm": 1.1026350259780884,
      "learning_rate": 8.45054459914472e-06,
      "loss": 0.0338,
      "step": 490300
    },
    {
      "epoch": 0.8024194340252548,
      "grad_norm": 1.044730544090271,
      "learning_rate": 8.450478706931202e-06,
      "loss": 0.0359,
      "step": 490320
    },
    {
      "epoch": 0.8024521644639081,
      "grad_norm": 1.0867424011230469,
      "learning_rate": 8.450412814717686e-06,
      "loss": 0.0275,
      "step": 490340
    },
    {
      "epoch": 0.8024848949025615,
      "grad_norm": 1.4064009189605713,
      "learning_rate": 8.450346922504168e-06,
      "loss": 0.0265,
      "step": 490360
    },
    {
      "epoch": 0.8025176253412148,
      "grad_norm": 1.1510084867477417,
      "learning_rate": 8.450281030290651e-06,
      "loss": 0.0257,
      "step": 490380
    },
    {
      "epoch": 0.8025503557798681,
      "grad_norm": 2.657280206680298,
      "learning_rate": 8.450215138077133e-06,
      "loss": 0.0298,
      "step": 490400
    },
    {
      "epoch": 0.8025830862185215,
      "grad_norm": 0.6862013936042786,
      "learning_rate": 8.450149245863617e-06,
      "loss": 0.0216,
      "step": 490420
    },
    {
      "epoch": 0.8026158166571749,
      "grad_norm": 1.0439265966415405,
      "learning_rate": 8.4500833536501e-06,
      "loss": 0.0233,
      "step": 490440
    },
    {
      "epoch": 0.8026485470958282,
      "grad_norm": 0.8452709317207336,
      "learning_rate": 8.450017461436582e-06,
      "loss": 0.026,
      "step": 490460
    },
    {
      "epoch": 0.8026812775344815,
      "grad_norm": 0.7936441898345947,
      "learning_rate": 8.449951569223066e-06,
      "loss": 0.0319,
      "step": 490480
    },
    {
      "epoch": 0.8027140079731349,
      "grad_norm": 0.964900016784668,
      "learning_rate": 8.449885677009548e-06,
      "loss": 0.0385,
      "step": 490500
    },
    {
      "epoch": 0.8027467384117882,
      "grad_norm": 0.4321238398551941,
      "learning_rate": 8.449819784796031e-06,
      "loss": 0.0264,
      "step": 490520
    },
    {
      "epoch": 0.8027794688504415,
      "grad_norm": 0.34651103615760803,
      "learning_rate": 8.449753892582515e-06,
      "loss": 0.0243,
      "step": 490540
    },
    {
      "epoch": 0.8028121992890949,
      "grad_norm": 0.21499623358249664,
      "learning_rate": 8.449688000368997e-06,
      "loss": 0.0295,
      "step": 490560
    },
    {
      "epoch": 0.8028449297277482,
      "grad_norm": 0.43461042642593384,
      "learning_rate": 8.44962210815548e-06,
      "loss": 0.0296,
      "step": 490580
    },
    {
      "epoch": 0.8028776601664015,
      "grad_norm": 0.5531631112098694,
      "learning_rate": 8.449556215941964e-06,
      "loss": 0.0253,
      "step": 490600
    },
    {
      "epoch": 0.8029103906050549,
      "grad_norm": 0.7899076342582703,
      "learning_rate": 8.449490323728446e-06,
      "loss": 0.0215,
      "step": 490620
    },
    {
      "epoch": 0.8029431210437082,
      "grad_norm": 1.7990494966506958,
      "learning_rate": 8.44942443151493e-06,
      "loss": 0.0225,
      "step": 490640
    },
    {
      "epoch": 0.8029758514823616,
      "grad_norm": 0.1011488288640976,
      "learning_rate": 8.449358539301411e-06,
      "loss": 0.0219,
      "step": 490660
    },
    {
      "epoch": 0.8030085819210149,
      "grad_norm": 1.3891164064407349,
      "learning_rate": 8.449292647087895e-06,
      "loss": 0.0204,
      "step": 490680
    },
    {
      "epoch": 0.8030413123596682,
      "grad_norm": 1.408186674118042,
      "learning_rate": 8.449226754874377e-06,
      "loss": 0.0261,
      "step": 490700
    },
    {
      "epoch": 0.8030740427983216,
      "grad_norm": 0.7742739915847778,
      "learning_rate": 8.44916086266086e-06,
      "loss": 0.0273,
      "step": 490720
    },
    {
      "epoch": 0.8031067732369749,
      "grad_norm": 0.4413016438484192,
      "learning_rate": 8.449094970447342e-06,
      "loss": 0.0337,
      "step": 490740
    },
    {
      "epoch": 0.8031395036756283,
      "grad_norm": 0.9623443484306335,
      "learning_rate": 8.449029078233826e-06,
      "loss": 0.0233,
      "step": 490760
    },
    {
      "epoch": 0.8031722341142816,
      "grad_norm": 1.3734718561172485,
      "learning_rate": 8.44896318602031e-06,
      "loss": 0.0337,
      "step": 490780
    },
    {
      "epoch": 0.8032049645529349,
      "grad_norm": 0.8947253227233887,
      "learning_rate": 8.448897293806791e-06,
      "loss": 0.0327,
      "step": 490800
    },
    {
      "epoch": 0.8032376949915883,
      "grad_norm": 1.1003862619400024,
      "learning_rate": 8.448831401593275e-06,
      "loss": 0.0255,
      "step": 490820
    },
    {
      "epoch": 0.8032704254302416,
      "grad_norm": 3.738210439682007,
      "learning_rate": 8.448765509379757e-06,
      "loss": 0.0262,
      "step": 490840
    },
    {
      "epoch": 0.803303155868895,
      "grad_norm": 2.401261568069458,
      "learning_rate": 8.44869961716624e-06,
      "loss": 0.0309,
      "step": 490860
    },
    {
      "epoch": 0.8033358863075483,
      "grad_norm": 0.7654679417610168,
      "learning_rate": 8.448633724952722e-06,
      "loss": 0.0256,
      "step": 490880
    },
    {
      "epoch": 0.8033686167462016,
      "grad_norm": 1.9775946140289307,
      "learning_rate": 8.448567832739206e-06,
      "loss": 0.0383,
      "step": 490900
    },
    {
      "epoch": 0.803401347184855,
      "grad_norm": 10.465570449829102,
      "learning_rate": 8.44850194052569e-06,
      "loss": 0.0345,
      "step": 490920
    },
    {
      "epoch": 0.8034340776235083,
      "grad_norm": 0.6071785092353821,
      "learning_rate": 8.448436048312171e-06,
      "loss": 0.0319,
      "step": 490940
    },
    {
      "epoch": 0.8034668080621616,
      "grad_norm": 1.2900956869125366,
      "learning_rate": 8.448370156098655e-06,
      "loss": 0.0308,
      "step": 490960
    },
    {
      "epoch": 0.803499538500815,
      "grad_norm": 2.221139430999756,
      "learning_rate": 8.448304263885138e-06,
      "loss": 0.0276,
      "step": 490980
    },
    {
      "epoch": 0.8035322689394683,
      "grad_norm": 0.28195899724960327,
      "learning_rate": 8.44823837167162e-06,
      "loss": 0.0229,
      "step": 491000
    },
    {
      "epoch": 0.8035649993781216,
      "grad_norm": 0.8357810974121094,
      "learning_rate": 8.448172479458104e-06,
      "loss": 0.036,
      "step": 491020
    },
    {
      "epoch": 0.803597729816775,
      "grad_norm": 1.080003023147583,
      "learning_rate": 8.448106587244586e-06,
      "loss": 0.035,
      "step": 491040
    },
    {
      "epoch": 0.8036304602554284,
      "grad_norm": 1.0790090560913086,
      "learning_rate": 8.44804069503107e-06,
      "loss": 0.0298,
      "step": 491060
    },
    {
      "epoch": 0.8036631906940817,
      "grad_norm": 0.5127272009849548,
      "learning_rate": 8.447974802817551e-06,
      "loss": 0.025,
      "step": 491080
    },
    {
      "epoch": 0.803695921132735,
      "grad_norm": 4.359793663024902,
      "learning_rate": 8.447908910604035e-06,
      "loss": 0.0416,
      "step": 491100
    },
    {
      "epoch": 0.8037286515713884,
      "grad_norm": 0.2797185182571411,
      "learning_rate": 8.447843018390518e-06,
      "loss": 0.0316,
      "step": 491120
    },
    {
      "epoch": 0.8037613820100417,
      "grad_norm": 0.4275369942188263,
      "learning_rate": 8.447777126177e-06,
      "loss": 0.0328,
      "step": 491140
    },
    {
      "epoch": 0.803794112448695,
      "grad_norm": 1.058531641960144,
      "learning_rate": 8.447711233963484e-06,
      "loss": 0.0279,
      "step": 491160
    },
    {
      "epoch": 0.8038268428873484,
      "grad_norm": 2.6679747104644775,
      "learning_rate": 8.447645341749966e-06,
      "loss": 0.0303,
      "step": 491180
    },
    {
      "epoch": 0.8038595733260017,
      "grad_norm": 1.547269344329834,
      "learning_rate": 8.44757944953645e-06,
      "loss": 0.0281,
      "step": 491200
    },
    {
      "epoch": 0.803892303764655,
      "grad_norm": 1.2729227542877197,
      "learning_rate": 8.447513557322931e-06,
      "loss": 0.0345,
      "step": 491220
    },
    {
      "epoch": 0.8039250342033084,
      "grad_norm": 1.354030728340149,
      "learning_rate": 8.447447665109415e-06,
      "loss": 0.0358,
      "step": 491240
    },
    {
      "epoch": 0.8039577646419618,
      "grad_norm": 0.436967134475708,
      "learning_rate": 8.447381772895897e-06,
      "loss": 0.0299,
      "step": 491260
    },
    {
      "epoch": 0.803990495080615,
      "grad_norm": 1.1457648277282715,
      "learning_rate": 8.44731588068238e-06,
      "loss": 0.0261,
      "step": 491280
    },
    {
      "epoch": 0.8040232255192684,
      "grad_norm": 2.170259952545166,
      "learning_rate": 8.447249988468862e-06,
      "loss": 0.03,
      "step": 491300
    },
    {
      "epoch": 0.8040559559579218,
      "grad_norm": 0.4113946855068207,
      "learning_rate": 8.447184096255346e-06,
      "loss": 0.0416,
      "step": 491320
    },
    {
      "epoch": 0.804088686396575,
      "grad_norm": 0.9544928669929504,
      "learning_rate": 8.44711820404183e-06,
      "loss": 0.0228,
      "step": 491340
    },
    {
      "epoch": 0.8041214168352284,
      "grad_norm": 0.8341064453125,
      "learning_rate": 8.447052311828311e-06,
      "loss": 0.0363,
      "step": 491360
    },
    {
      "epoch": 0.8041541472738818,
      "grad_norm": 1.8878613710403442,
      "learning_rate": 8.446986419614795e-06,
      "loss": 0.0386,
      "step": 491380
    },
    {
      "epoch": 0.804186877712535,
      "grad_norm": 0.39762547612190247,
      "learning_rate": 8.446920527401278e-06,
      "loss": 0.0314,
      "step": 491400
    },
    {
      "epoch": 0.8042196081511884,
      "grad_norm": 1.1166574954986572,
      "learning_rate": 8.44685463518776e-06,
      "loss": 0.0287,
      "step": 491420
    },
    {
      "epoch": 0.8042523385898418,
      "grad_norm": 0.7477651238441467,
      "learning_rate": 8.446788742974244e-06,
      "loss": 0.0405,
      "step": 491440
    },
    {
      "epoch": 0.8042850690284952,
      "grad_norm": 0.27696430683135986,
      "learning_rate": 8.446722850760728e-06,
      "loss": 0.026,
      "step": 491460
    },
    {
      "epoch": 0.8043177994671484,
      "grad_norm": 2.7352793216705322,
      "learning_rate": 8.44665695854721e-06,
      "loss": 0.0295,
      "step": 491480
    },
    {
      "epoch": 0.8043505299058018,
      "grad_norm": 0.48315101861953735,
      "learning_rate": 8.446591066333693e-06,
      "loss": 0.0319,
      "step": 491500
    },
    {
      "epoch": 0.8043832603444552,
      "grad_norm": 0.400741845369339,
      "learning_rate": 8.446525174120175e-06,
      "loss": 0.0339,
      "step": 491520
    },
    {
      "epoch": 0.8044159907831084,
      "grad_norm": 0.7151246666908264,
      "learning_rate": 8.446459281906658e-06,
      "loss": 0.0343,
      "step": 491540
    },
    {
      "epoch": 0.8044487212217618,
      "grad_norm": 0.23803484439849854,
      "learning_rate": 8.44639338969314e-06,
      "loss": 0.035,
      "step": 491560
    },
    {
      "epoch": 0.8044814516604152,
      "grad_norm": 1.970608115196228,
      "learning_rate": 8.446327497479624e-06,
      "loss": 0.027,
      "step": 491580
    },
    {
      "epoch": 0.8045141820990684,
      "grad_norm": 1.6082884073257446,
      "learning_rate": 8.446261605266106e-06,
      "loss": 0.0369,
      "step": 491600
    },
    {
      "epoch": 0.8045469125377218,
      "grad_norm": 4.325967788696289,
      "learning_rate": 8.44619571305259e-06,
      "loss": 0.0354,
      "step": 491620
    },
    {
      "epoch": 0.8045796429763752,
      "grad_norm": 2.0717990398406982,
      "learning_rate": 8.446129820839071e-06,
      "loss": 0.0311,
      "step": 491640
    },
    {
      "epoch": 0.8046123734150286,
      "grad_norm": 0.6987829208374023,
      "learning_rate": 8.446063928625555e-06,
      "loss": 0.0263,
      "step": 491660
    },
    {
      "epoch": 0.8046451038536818,
      "grad_norm": 1.1690152883529663,
      "learning_rate": 8.445998036412037e-06,
      "loss": 0.0377,
      "step": 491680
    },
    {
      "epoch": 0.8046778342923352,
      "grad_norm": 1.7404413223266602,
      "learning_rate": 8.44593214419852e-06,
      "loss": 0.0279,
      "step": 491700
    },
    {
      "epoch": 0.8047105647309886,
      "grad_norm": 0.8418764472007751,
      "learning_rate": 8.445866251985004e-06,
      "loss": 0.0348,
      "step": 491720
    },
    {
      "epoch": 0.8047432951696418,
      "grad_norm": 1.372101068496704,
      "learning_rate": 8.445800359771486e-06,
      "loss": 0.0306,
      "step": 491740
    },
    {
      "epoch": 0.8047760256082952,
      "grad_norm": 1.0533355474472046,
      "learning_rate": 8.44573446755797e-06,
      "loss": 0.032,
      "step": 491760
    },
    {
      "epoch": 0.8048087560469486,
      "grad_norm": 1.0263301134109497,
      "learning_rate": 8.445668575344453e-06,
      "loss": 0.0259,
      "step": 491780
    },
    {
      "epoch": 0.8048414864856018,
      "grad_norm": 1.9693483114242554,
      "learning_rate": 8.445602683130935e-06,
      "loss": 0.0256,
      "step": 491800
    },
    {
      "epoch": 0.8048742169242552,
      "grad_norm": 0.7767791152000427,
      "learning_rate": 8.445536790917419e-06,
      "loss": 0.044,
      "step": 491820
    },
    {
      "epoch": 0.8049069473629086,
      "grad_norm": 0.3456188440322876,
      "learning_rate": 8.445470898703902e-06,
      "loss": 0.0342,
      "step": 491840
    },
    {
      "epoch": 0.804939677801562,
      "grad_norm": 0.7865333557128906,
      "learning_rate": 8.445405006490384e-06,
      "loss": 0.0329,
      "step": 491860
    },
    {
      "epoch": 0.8049724082402152,
      "grad_norm": 0.981351912021637,
      "learning_rate": 8.445339114276868e-06,
      "loss": 0.0315,
      "step": 491880
    },
    {
      "epoch": 0.8050051386788686,
      "grad_norm": 0.5528863072395325,
      "learning_rate": 8.44527322206335e-06,
      "loss": 0.0314,
      "step": 491900
    },
    {
      "epoch": 0.805037869117522,
      "grad_norm": 0.309736430644989,
      "learning_rate": 8.445207329849833e-06,
      "loss": 0.024,
      "step": 491920
    },
    {
      "epoch": 0.8050705995561752,
      "grad_norm": 0.6459994912147522,
      "learning_rate": 8.445141437636315e-06,
      "loss": 0.0356,
      "step": 491940
    },
    {
      "epoch": 0.8051033299948286,
      "grad_norm": 1.2828872203826904,
      "learning_rate": 8.445075545422799e-06,
      "loss": 0.0371,
      "step": 491960
    },
    {
      "epoch": 0.805136060433482,
      "grad_norm": 0.6956921219825745,
      "learning_rate": 8.44500965320928e-06,
      "loss": 0.0275,
      "step": 491980
    },
    {
      "epoch": 0.8051687908721352,
      "grad_norm": 0.6845325827598572,
      "learning_rate": 8.444943760995764e-06,
      "loss": 0.0426,
      "step": 492000
    },
    {
      "epoch": 0.8052015213107886,
      "grad_norm": 1.9569026231765747,
      "learning_rate": 8.444877868782246e-06,
      "loss": 0.0381,
      "step": 492020
    },
    {
      "epoch": 0.805234251749442,
      "grad_norm": 1.2743213176727295,
      "learning_rate": 8.44481197656873e-06,
      "loss": 0.0319,
      "step": 492040
    },
    {
      "epoch": 0.8052669821880953,
      "grad_norm": 1.925307273864746,
      "learning_rate": 8.444746084355211e-06,
      "loss": 0.0238,
      "step": 492060
    },
    {
      "epoch": 0.8052997126267486,
      "grad_norm": 0.279744416475296,
      "learning_rate": 8.444680192141695e-06,
      "loss": 0.0221,
      "step": 492080
    },
    {
      "epoch": 0.805332443065402,
      "grad_norm": 0.4165261685848236,
      "learning_rate": 8.444614299928177e-06,
      "loss": 0.0357,
      "step": 492100
    },
    {
      "epoch": 0.8053651735040553,
      "grad_norm": 0.6650952100753784,
      "learning_rate": 8.44454840771466e-06,
      "loss": 0.0357,
      "step": 492120
    },
    {
      "epoch": 0.8053979039427086,
      "grad_norm": 1.6231476068496704,
      "learning_rate": 8.444482515501144e-06,
      "loss": 0.0241,
      "step": 492140
    },
    {
      "epoch": 0.805430634381362,
      "grad_norm": 0.8958361148834229,
      "learning_rate": 8.444416623287626e-06,
      "loss": 0.0323,
      "step": 492160
    },
    {
      "epoch": 0.8054633648200153,
      "grad_norm": 0.7533127665519714,
      "learning_rate": 8.44435073107411e-06,
      "loss": 0.0239,
      "step": 492180
    },
    {
      "epoch": 0.8054960952586686,
      "grad_norm": 1.0992929935455322,
      "learning_rate": 8.444284838860593e-06,
      "loss": 0.0349,
      "step": 492200
    },
    {
      "epoch": 0.805528825697322,
      "grad_norm": 1.509856939315796,
      "learning_rate": 8.444218946647075e-06,
      "loss": 0.0318,
      "step": 492220
    },
    {
      "epoch": 0.8055615561359754,
      "grad_norm": 1.5749675035476685,
      "learning_rate": 8.444153054433559e-06,
      "loss": 0.0223,
      "step": 492240
    },
    {
      "epoch": 0.8055942865746287,
      "grad_norm": 0.5555159449577332,
      "learning_rate": 8.444087162220042e-06,
      "loss": 0.0235,
      "step": 492260
    },
    {
      "epoch": 0.805627017013282,
      "grad_norm": 1.3144645690917969,
      "learning_rate": 8.444021270006524e-06,
      "loss": 0.039,
      "step": 492280
    },
    {
      "epoch": 0.8056597474519354,
      "grad_norm": 0.8360260128974915,
      "learning_rate": 8.443955377793008e-06,
      "loss": 0.0292,
      "step": 492300
    },
    {
      "epoch": 0.8056924778905887,
      "grad_norm": 1.0163705348968506,
      "learning_rate": 8.44388948557949e-06,
      "loss": 0.025,
      "step": 492320
    },
    {
      "epoch": 0.805725208329242,
      "grad_norm": 0.3212011456489563,
      "learning_rate": 8.443823593365973e-06,
      "loss": 0.032,
      "step": 492340
    },
    {
      "epoch": 0.8057579387678954,
      "grad_norm": 0.7251337170600891,
      "learning_rate": 8.443757701152455e-06,
      "loss": 0.0255,
      "step": 492360
    },
    {
      "epoch": 0.8057906692065487,
      "grad_norm": 0.9285417199134827,
      "learning_rate": 8.443691808938939e-06,
      "loss": 0.0405,
      "step": 492380
    },
    {
      "epoch": 0.805823399645202,
      "grad_norm": 4.1700944900512695,
      "learning_rate": 8.44362591672542e-06,
      "loss": 0.029,
      "step": 492400
    },
    {
      "epoch": 0.8058561300838554,
      "grad_norm": 0.5960577130317688,
      "learning_rate": 8.443560024511904e-06,
      "loss": 0.0275,
      "step": 492420
    },
    {
      "epoch": 0.8058888605225087,
      "grad_norm": 2.0522098541259766,
      "learning_rate": 8.443494132298386e-06,
      "loss": 0.0386,
      "step": 492440
    },
    {
      "epoch": 0.8059215909611621,
      "grad_norm": 0.6823962330818176,
      "learning_rate": 8.44342824008487e-06,
      "loss": 0.0243,
      "step": 492460
    },
    {
      "epoch": 0.8059543213998154,
      "grad_norm": 4.055010795593262,
      "learning_rate": 8.443362347871351e-06,
      "loss": 0.0263,
      "step": 492480
    },
    {
      "epoch": 0.8059870518384687,
      "grad_norm": 0.5977574586868286,
      "learning_rate": 8.443296455657835e-06,
      "loss": 0.0265,
      "step": 492500
    },
    {
      "epoch": 0.8060197822771221,
      "grad_norm": 3.516744375228882,
      "learning_rate": 8.443230563444319e-06,
      "loss": 0.0324,
      "step": 492520
    },
    {
      "epoch": 0.8060525127157754,
      "grad_norm": 0.8094164133071899,
      "learning_rate": 8.4431646712308e-06,
      "loss": 0.0242,
      "step": 492540
    },
    {
      "epoch": 0.8060852431544288,
      "grad_norm": 1.2373253107070923,
      "learning_rate": 8.443098779017284e-06,
      "loss": 0.0238,
      "step": 492560
    },
    {
      "epoch": 0.8061179735930821,
      "grad_norm": 0.26094356179237366,
      "learning_rate": 8.443032886803768e-06,
      "loss": 0.0269,
      "step": 492580
    },
    {
      "epoch": 0.8061507040317354,
      "grad_norm": 1.7103869915008545,
      "learning_rate": 8.44296699459025e-06,
      "loss": 0.0348,
      "step": 492600
    },
    {
      "epoch": 0.8061834344703888,
      "grad_norm": 0.338599294424057,
      "learning_rate": 8.442901102376733e-06,
      "loss": 0.0314,
      "step": 492620
    },
    {
      "epoch": 0.8062161649090421,
      "grad_norm": 1.3433682918548584,
      "learning_rate": 8.442835210163217e-06,
      "loss": 0.0362,
      "step": 492640
    },
    {
      "epoch": 0.8062488953476955,
      "grad_norm": 3.054276704788208,
      "learning_rate": 8.442769317949699e-06,
      "loss": 0.0277,
      "step": 492660
    },
    {
      "epoch": 0.8062816257863488,
      "grad_norm": 4.042740345001221,
      "learning_rate": 8.442703425736182e-06,
      "loss": 0.0254,
      "step": 492680
    },
    {
      "epoch": 0.8063143562250021,
      "grad_norm": 1.6278141736984253,
      "learning_rate": 8.442637533522664e-06,
      "loss": 0.0352,
      "step": 492700
    },
    {
      "epoch": 0.8063470866636555,
      "grad_norm": 0.6026356816291809,
      "learning_rate": 8.442571641309148e-06,
      "loss": 0.0232,
      "step": 492720
    },
    {
      "epoch": 0.8063798171023088,
      "grad_norm": 1.5999877452850342,
      "learning_rate": 8.44250574909563e-06,
      "loss": 0.0288,
      "step": 492740
    },
    {
      "epoch": 0.8064125475409621,
      "grad_norm": 0.9733754992485046,
      "learning_rate": 8.442439856882113e-06,
      "loss": 0.032,
      "step": 492760
    },
    {
      "epoch": 0.8064452779796155,
      "grad_norm": 0.38267192244529724,
      "learning_rate": 8.442373964668595e-06,
      "loss": 0.0252,
      "step": 492780
    },
    {
      "epoch": 0.8064780084182688,
      "grad_norm": 1.6562843322753906,
      "learning_rate": 8.442308072455079e-06,
      "loss": 0.0371,
      "step": 492800
    },
    {
      "epoch": 0.8065107388569221,
      "grad_norm": 1.296776294708252,
      "learning_rate": 8.44224218024156e-06,
      "loss": 0.033,
      "step": 492820
    },
    {
      "epoch": 0.8065434692955755,
      "grad_norm": 0.15553411841392517,
      "learning_rate": 8.442176288028044e-06,
      "loss": 0.0245,
      "step": 492840
    },
    {
      "epoch": 0.8065761997342289,
      "grad_norm": 1.126509666442871,
      "learning_rate": 8.442110395814526e-06,
      "loss": 0.0303,
      "step": 492860
    },
    {
      "epoch": 0.8066089301728822,
      "grad_norm": 1.216692328453064,
      "learning_rate": 8.44204450360101e-06,
      "loss": 0.0289,
      "step": 492880
    },
    {
      "epoch": 0.8066416606115355,
      "grad_norm": 1.0371900796890259,
      "learning_rate": 8.441978611387493e-06,
      "loss": 0.0269,
      "step": 492900
    },
    {
      "epoch": 0.8066743910501889,
      "grad_norm": 1.0545845031738281,
      "learning_rate": 8.441912719173975e-06,
      "loss": 0.0383,
      "step": 492920
    },
    {
      "epoch": 0.8067071214888422,
      "grad_norm": 0.32360368967056274,
      "learning_rate": 8.441846826960459e-06,
      "loss": 0.0222,
      "step": 492940
    },
    {
      "epoch": 0.8067398519274955,
      "grad_norm": 1.1071133613586426,
      "learning_rate": 8.441780934746942e-06,
      "loss": 0.0278,
      "step": 492960
    },
    {
      "epoch": 0.8067725823661489,
      "grad_norm": 0.5166375637054443,
      "learning_rate": 8.441715042533424e-06,
      "loss": 0.0252,
      "step": 492980
    },
    {
      "epoch": 0.8068053128048022,
      "grad_norm": 1.258386254310608,
      "learning_rate": 8.441649150319908e-06,
      "loss": 0.0298,
      "step": 493000
    },
    {
      "epoch": 0.8068380432434555,
      "grad_norm": 1.3228975534439087,
      "learning_rate": 8.441583258106391e-06,
      "loss": 0.0341,
      "step": 493020
    },
    {
      "epoch": 0.8068707736821089,
      "grad_norm": 2.0192208290100098,
      "learning_rate": 8.441517365892873e-06,
      "loss": 0.0411,
      "step": 493040
    },
    {
      "epoch": 0.8069035041207622,
      "grad_norm": 0.7193528413772583,
      "learning_rate": 8.441451473679357e-06,
      "loss": 0.0262,
      "step": 493060
    },
    {
      "epoch": 0.8069362345594155,
      "grad_norm": 0.1798824667930603,
      "learning_rate": 8.441385581465839e-06,
      "loss": 0.0271,
      "step": 493080
    },
    {
      "epoch": 0.8069689649980689,
      "grad_norm": 1.0976766347885132,
      "learning_rate": 8.441319689252322e-06,
      "loss": 0.0293,
      "step": 493100
    },
    {
      "epoch": 0.8070016954367223,
      "grad_norm": 3.1810648441314697,
      "learning_rate": 8.441253797038804e-06,
      "loss": 0.0374,
      "step": 493120
    },
    {
      "epoch": 0.8070344258753755,
      "grad_norm": 1.2799291610717773,
      "learning_rate": 8.441187904825288e-06,
      "loss": 0.0284,
      "step": 493140
    },
    {
      "epoch": 0.8070671563140289,
      "grad_norm": 0.9140360951423645,
      "learning_rate": 8.44112201261177e-06,
      "loss": 0.0405,
      "step": 493160
    },
    {
      "epoch": 0.8070998867526823,
      "grad_norm": 0.4646371304988861,
      "learning_rate": 8.441056120398253e-06,
      "loss": 0.027,
      "step": 493180
    },
    {
      "epoch": 0.8071326171913356,
      "grad_norm": 2.041968584060669,
      "learning_rate": 8.440990228184735e-06,
      "loss": 0.0328,
      "step": 493200
    },
    {
      "epoch": 0.8071653476299889,
      "grad_norm": 1.0499651432037354,
      "learning_rate": 8.440924335971219e-06,
      "loss": 0.0239,
      "step": 493220
    },
    {
      "epoch": 0.8071980780686423,
      "grad_norm": 0.7080577611923218,
      "learning_rate": 8.440858443757702e-06,
      "loss": 0.0313,
      "step": 493240
    },
    {
      "epoch": 0.8072308085072956,
      "grad_norm": 1.1101441383361816,
      "learning_rate": 8.440792551544184e-06,
      "loss": 0.0222,
      "step": 493260
    },
    {
      "epoch": 0.8072635389459489,
      "grad_norm": 0.43301019072532654,
      "learning_rate": 8.440726659330668e-06,
      "loss": 0.0304,
      "step": 493280
    },
    {
      "epoch": 0.8072962693846023,
      "grad_norm": 0.8122422695159912,
      "learning_rate": 8.44066076711715e-06,
      "loss": 0.0252,
      "step": 493300
    },
    {
      "epoch": 0.8073289998232557,
      "grad_norm": 2.5775773525238037,
      "learning_rate": 8.440594874903633e-06,
      "loss": 0.0299,
      "step": 493320
    },
    {
      "epoch": 0.8073617302619089,
      "grad_norm": 0.60621577501297,
      "learning_rate": 8.440528982690115e-06,
      "loss": 0.0244,
      "step": 493340
    },
    {
      "epoch": 0.8073944607005623,
      "grad_norm": 0.6628276705741882,
      "learning_rate": 8.440463090476599e-06,
      "loss": 0.0328,
      "step": 493360
    },
    {
      "epoch": 0.8074271911392157,
      "grad_norm": 0.36114302277565,
      "learning_rate": 8.440397198263082e-06,
      "loss": 0.0271,
      "step": 493380
    },
    {
      "epoch": 0.8074599215778689,
      "grad_norm": 1.4464528560638428,
      "learning_rate": 8.440331306049564e-06,
      "loss": 0.0282,
      "step": 493400
    },
    {
      "epoch": 0.8074926520165223,
      "grad_norm": 0.36896535754203796,
      "learning_rate": 8.440265413836048e-06,
      "loss": 0.044,
      "step": 493420
    },
    {
      "epoch": 0.8075253824551757,
      "grad_norm": 1.771056890487671,
      "learning_rate": 8.440199521622531e-06,
      "loss": 0.0332,
      "step": 493440
    },
    {
      "epoch": 0.807558112893829,
      "grad_norm": 0.38481226563453674,
      "learning_rate": 8.440133629409013e-06,
      "loss": 0.0349,
      "step": 493460
    },
    {
      "epoch": 0.8075908433324823,
      "grad_norm": 0.5722929239273071,
      "learning_rate": 8.440067737195497e-06,
      "loss": 0.0363,
      "step": 493480
    },
    {
      "epoch": 0.8076235737711357,
      "grad_norm": 0.46692967414855957,
      "learning_rate": 8.440001844981979e-06,
      "loss": 0.022,
      "step": 493500
    },
    {
      "epoch": 0.8076563042097891,
      "grad_norm": 0.5700827240943909,
      "learning_rate": 8.439935952768462e-06,
      "loss": 0.0269,
      "step": 493520
    },
    {
      "epoch": 0.8076890346484423,
      "grad_norm": 0.994552493095398,
      "learning_rate": 8.439870060554944e-06,
      "loss": 0.0339,
      "step": 493540
    },
    {
      "epoch": 0.8077217650870957,
      "grad_norm": 0.8370088934898376,
      "learning_rate": 8.439804168341428e-06,
      "loss": 0.0327,
      "step": 493560
    },
    {
      "epoch": 0.8077544955257491,
      "grad_norm": 0.7018661499023438,
      "learning_rate": 8.439738276127911e-06,
      "loss": 0.0355,
      "step": 493580
    },
    {
      "epoch": 0.8077872259644023,
      "grad_norm": 0.4244977831840515,
      "learning_rate": 8.439672383914393e-06,
      "loss": 0.0208,
      "step": 493600
    },
    {
      "epoch": 0.8078199564030557,
      "grad_norm": 0.9226893186569214,
      "learning_rate": 8.439606491700877e-06,
      "loss": 0.0302,
      "step": 493620
    },
    {
      "epoch": 0.8078526868417091,
      "grad_norm": 1.069764256477356,
      "learning_rate": 8.439540599487359e-06,
      "loss": 0.0284,
      "step": 493640
    },
    {
      "epoch": 0.8078854172803623,
      "grad_norm": 1.21880304813385,
      "learning_rate": 8.439474707273842e-06,
      "loss": 0.0297,
      "step": 493660
    },
    {
      "epoch": 0.8079181477190157,
      "grad_norm": 5.8611741065979,
      "learning_rate": 8.439408815060324e-06,
      "loss": 0.0281,
      "step": 493680
    },
    {
      "epoch": 0.8079508781576691,
      "grad_norm": 0.6354959607124329,
      "learning_rate": 8.439342922846808e-06,
      "loss": 0.0283,
      "step": 493700
    },
    {
      "epoch": 0.8079836085963225,
      "grad_norm": 1.1485161781311035,
      "learning_rate": 8.43927703063329e-06,
      "loss": 0.0265,
      "step": 493720
    },
    {
      "epoch": 0.8080163390349757,
      "grad_norm": 0.6092550158500671,
      "learning_rate": 8.439211138419773e-06,
      "loss": 0.0316,
      "step": 493740
    },
    {
      "epoch": 0.8080490694736291,
      "grad_norm": 0.6062476634979248,
      "learning_rate": 8.439145246206257e-06,
      "loss": 0.032,
      "step": 493760
    },
    {
      "epoch": 0.8080817999122825,
      "grad_norm": 1.468400001525879,
      "learning_rate": 8.439079353992739e-06,
      "loss": 0.0276,
      "step": 493780
    },
    {
      "epoch": 0.8081145303509357,
      "grad_norm": 0.30959317088127136,
      "learning_rate": 8.439013461779222e-06,
      "loss": 0.0204,
      "step": 493800
    },
    {
      "epoch": 0.8081472607895891,
      "grad_norm": 0.8778554201126099,
      "learning_rate": 8.438947569565706e-06,
      "loss": 0.0345,
      "step": 493820
    },
    {
      "epoch": 0.8081799912282425,
      "grad_norm": 1.449059009552002,
      "learning_rate": 8.438881677352188e-06,
      "loss": 0.0399,
      "step": 493840
    },
    {
      "epoch": 0.8082127216668957,
      "grad_norm": 5.441015243530273,
      "learning_rate": 8.438815785138672e-06,
      "loss": 0.0368,
      "step": 493860
    },
    {
      "epoch": 0.8082454521055491,
      "grad_norm": 1.2960597276687622,
      "learning_rate": 8.438749892925153e-06,
      "loss": 0.03,
      "step": 493880
    },
    {
      "epoch": 0.8082781825442025,
      "grad_norm": 1.9964268207550049,
      "learning_rate": 8.438684000711637e-06,
      "loss": 0.0283,
      "step": 493900
    },
    {
      "epoch": 0.8083109129828558,
      "grad_norm": 0.5352096557617188,
      "learning_rate": 8.43861810849812e-06,
      "loss": 0.0369,
      "step": 493920
    },
    {
      "epoch": 0.8083436434215091,
      "grad_norm": 1.2275290489196777,
      "learning_rate": 8.438552216284603e-06,
      "loss": 0.0249,
      "step": 493940
    },
    {
      "epoch": 0.8083763738601625,
      "grad_norm": 1.2377252578735352,
      "learning_rate": 8.438486324071086e-06,
      "loss": 0.0439,
      "step": 493960
    },
    {
      "epoch": 0.8084091042988159,
      "grad_norm": 0.24512983858585358,
      "learning_rate": 8.438420431857568e-06,
      "loss": 0.0297,
      "step": 493980
    },
    {
      "epoch": 0.8084418347374691,
      "grad_norm": 1.2283719778060913,
      "learning_rate": 8.438354539644052e-06,
      "loss": 0.0328,
      "step": 494000
    },
    {
      "epoch": 0.8084745651761225,
      "grad_norm": 2.7535955905914307,
      "learning_rate": 8.438288647430533e-06,
      "loss": 0.0316,
      "step": 494020
    },
    {
      "epoch": 0.8085072956147759,
      "grad_norm": 3.8938779830932617,
      "learning_rate": 8.438222755217017e-06,
      "loss": 0.0337,
      "step": 494040
    },
    {
      "epoch": 0.8085400260534291,
      "grad_norm": 0.8104354739189148,
      "learning_rate": 8.438156863003499e-06,
      "loss": 0.0356,
      "step": 494060
    },
    {
      "epoch": 0.8085727564920825,
      "grad_norm": 1.5155739784240723,
      "learning_rate": 8.438090970789983e-06,
      "loss": 0.0312,
      "step": 494080
    },
    {
      "epoch": 0.8086054869307359,
      "grad_norm": 0.4724840819835663,
      "learning_rate": 8.438025078576464e-06,
      "loss": 0.0292,
      "step": 494100
    },
    {
      "epoch": 0.8086382173693892,
      "grad_norm": 1.6744108200073242,
      "learning_rate": 8.437959186362948e-06,
      "loss": 0.025,
      "step": 494120
    },
    {
      "epoch": 0.8086709478080425,
      "grad_norm": 1.0255681276321411,
      "learning_rate": 8.43789329414943e-06,
      "loss": 0.0364,
      "step": 494140
    },
    {
      "epoch": 0.8087036782466959,
      "grad_norm": 0.9178604483604431,
      "learning_rate": 8.437827401935913e-06,
      "loss": 0.0259,
      "step": 494160
    },
    {
      "epoch": 0.8087364086853492,
      "grad_norm": 1.3287525177001953,
      "learning_rate": 8.437761509722397e-06,
      "loss": 0.0262,
      "step": 494180
    },
    {
      "epoch": 0.8087691391240025,
      "grad_norm": 2.5369064807891846,
      "learning_rate": 8.437695617508879e-06,
      "loss": 0.0389,
      "step": 494200
    },
    {
      "epoch": 0.8088018695626559,
      "grad_norm": 0.16185273230075836,
      "learning_rate": 8.437629725295363e-06,
      "loss": 0.0271,
      "step": 494220
    },
    {
      "epoch": 0.8088346000013092,
      "grad_norm": 1.0900684595108032,
      "learning_rate": 8.437563833081846e-06,
      "loss": 0.0288,
      "step": 494240
    },
    {
      "epoch": 0.8088673304399625,
      "grad_norm": 1.8169740438461304,
      "learning_rate": 8.437497940868328e-06,
      "loss": 0.0426,
      "step": 494260
    },
    {
      "epoch": 0.8089000608786159,
      "grad_norm": 0.43939435482025146,
      "learning_rate": 8.437432048654812e-06,
      "loss": 0.0307,
      "step": 494280
    },
    {
      "epoch": 0.8089327913172693,
      "grad_norm": 0.199275940656662,
      "learning_rate": 8.437366156441295e-06,
      "loss": 0.0237,
      "step": 494300
    },
    {
      "epoch": 0.8089655217559226,
      "grad_norm": 0.8649216890335083,
      "learning_rate": 8.437300264227777e-06,
      "loss": 0.0231,
      "step": 494320
    },
    {
      "epoch": 0.8089982521945759,
      "grad_norm": 0.6243184804916382,
      "learning_rate": 8.43723437201426e-06,
      "loss": 0.0368,
      "step": 494340
    },
    {
      "epoch": 0.8090309826332293,
      "grad_norm": 0.7088470458984375,
      "learning_rate": 8.437168479800743e-06,
      "loss": 0.0315,
      "step": 494360
    },
    {
      "epoch": 0.8090637130718826,
      "grad_norm": 0.5603579878807068,
      "learning_rate": 8.437102587587226e-06,
      "loss": 0.0289,
      "step": 494380
    },
    {
      "epoch": 0.8090964435105359,
      "grad_norm": 1.1023260354995728,
      "learning_rate": 8.437036695373708e-06,
      "loss": 0.0217,
      "step": 494400
    },
    {
      "epoch": 0.8091291739491893,
      "grad_norm": 1.1319564580917358,
      "learning_rate": 8.436970803160192e-06,
      "loss": 0.0276,
      "step": 494420
    },
    {
      "epoch": 0.8091619043878426,
      "grad_norm": 4.787477493286133,
      "learning_rate": 8.436904910946674e-06,
      "loss": 0.0411,
      "step": 494440
    },
    {
      "epoch": 0.8091946348264959,
      "grad_norm": 1.1478370428085327,
      "learning_rate": 8.436839018733157e-06,
      "loss": 0.0369,
      "step": 494460
    },
    {
      "epoch": 0.8092273652651493,
      "grad_norm": 1.0974452495574951,
      "learning_rate": 8.436773126519639e-06,
      "loss": 0.0336,
      "step": 494480
    },
    {
      "epoch": 0.8092600957038026,
      "grad_norm": 0.14663273096084595,
      "learning_rate": 8.436707234306123e-06,
      "loss": 0.0304,
      "step": 494500
    },
    {
      "epoch": 0.809292826142456,
      "grad_norm": 0.5061530470848083,
      "learning_rate": 8.436641342092605e-06,
      "loss": 0.0349,
      "step": 494520
    },
    {
      "epoch": 0.8093255565811093,
      "grad_norm": 0.2412770688533783,
      "learning_rate": 8.436575449879088e-06,
      "loss": 0.0232,
      "step": 494540
    },
    {
      "epoch": 0.8093582870197626,
      "grad_norm": 2.1933815479278564,
      "learning_rate": 8.436509557665572e-06,
      "loss": 0.0275,
      "step": 494560
    },
    {
      "epoch": 0.809391017458416,
      "grad_norm": 0.31717807054519653,
      "learning_rate": 8.436443665452054e-06,
      "loss": 0.0179,
      "step": 494580
    },
    {
      "epoch": 0.8094237478970693,
      "grad_norm": 0.5411447286605835,
      "learning_rate": 8.436377773238537e-06,
      "loss": 0.0259,
      "step": 494600
    },
    {
      "epoch": 0.8094564783357227,
      "grad_norm": 0.49473103880882263,
      "learning_rate": 8.43631188102502e-06,
      "loss": 0.0284,
      "step": 494620
    },
    {
      "epoch": 0.809489208774376,
      "grad_norm": 0.9763095378875732,
      "learning_rate": 8.436245988811503e-06,
      "loss": 0.0327,
      "step": 494640
    },
    {
      "epoch": 0.8095219392130293,
      "grad_norm": 0.677151620388031,
      "learning_rate": 8.436180096597986e-06,
      "loss": 0.0342,
      "step": 494660
    },
    {
      "epoch": 0.8095546696516827,
      "grad_norm": 2.230369806289673,
      "learning_rate": 8.43611420438447e-06,
      "loss": 0.0264,
      "step": 494680
    },
    {
      "epoch": 0.809587400090336,
      "grad_norm": 1.6958502531051636,
      "learning_rate": 8.436048312170952e-06,
      "loss": 0.0363,
      "step": 494700
    },
    {
      "epoch": 0.8096201305289894,
      "grad_norm": 1.3097972869873047,
      "learning_rate": 8.435982419957435e-06,
      "loss": 0.0333,
      "step": 494720
    },
    {
      "epoch": 0.8096528609676427,
      "grad_norm": 0.5331571698188782,
      "learning_rate": 8.435916527743917e-06,
      "loss": 0.0248,
      "step": 494740
    },
    {
      "epoch": 0.809685591406296,
      "grad_norm": 0.7264554500579834,
      "learning_rate": 8.4358506355304e-06,
      "loss": 0.0302,
      "step": 494760
    },
    {
      "epoch": 0.8097183218449494,
      "grad_norm": 0.23570814728736877,
      "learning_rate": 8.435784743316883e-06,
      "loss": 0.0318,
      "step": 494780
    },
    {
      "epoch": 0.8097510522836027,
      "grad_norm": 0.5332327485084534,
      "learning_rate": 8.435718851103366e-06,
      "loss": 0.0244,
      "step": 494800
    },
    {
      "epoch": 0.809783782722256,
      "grad_norm": 0.8766617774963379,
      "learning_rate": 8.435652958889848e-06,
      "loss": 0.0276,
      "step": 494820
    },
    {
      "epoch": 0.8098165131609094,
      "grad_norm": 0.5025815963745117,
      "learning_rate": 8.435587066676332e-06,
      "loss": 0.0254,
      "step": 494840
    },
    {
      "epoch": 0.8098492435995627,
      "grad_norm": 1.0369484424591064,
      "learning_rate": 8.435521174462814e-06,
      "loss": 0.0298,
      "step": 494860
    },
    {
      "epoch": 0.809881974038216,
      "grad_norm": 1.7828339338302612,
      "learning_rate": 8.435455282249297e-06,
      "loss": 0.0202,
      "step": 494880
    },
    {
      "epoch": 0.8099147044768694,
      "grad_norm": 1.2818200588226318,
      "learning_rate": 8.435389390035779e-06,
      "loss": 0.0273,
      "step": 494900
    },
    {
      "epoch": 0.8099474349155228,
      "grad_norm": 0.7160770893096924,
      "learning_rate": 8.435323497822263e-06,
      "loss": 0.0274,
      "step": 494920
    },
    {
      "epoch": 0.809980165354176,
      "grad_norm": 0.5757249593734741,
      "learning_rate": 8.435257605608745e-06,
      "loss": 0.0222,
      "step": 494940
    },
    {
      "epoch": 0.8100128957928294,
      "grad_norm": 0.7715139389038086,
      "learning_rate": 8.435191713395228e-06,
      "loss": 0.0315,
      "step": 494960
    },
    {
      "epoch": 0.8100456262314828,
      "grad_norm": 0.688583254814148,
      "learning_rate": 8.435125821181712e-06,
      "loss": 0.0521,
      "step": 494980
    },
    {
      "epoch": 0.8100783566701361,
      "grad_norm": 1.6911365985870361,
      "learning_rate": 8.435059928968194e-06,
      "loss": 0.0239,
      "step": 495000
    },
    {
      "epoch": 0.8101110871087894,
      "grad_norm": 1.1165509223937988,
      "learning_rate": 8.434994036754677e-06,
      "loss": 0.0303,
      "step": 495020
    },
    {
      "epoch": 0.8101438175474428,
      "grad_norm": 1.367767572402954,
      "learning_rate": 8.43492814454116e-06,
      "loss": 0.0367,
      "step": 495040
    },
    {
      "epoch": 0.8101765479860961,
      "grad_norm": 0.7071044445037842,
      "learning_rate": 8.434862252327643e-06,
      "loss": 0.0282,
      "step": 495060
    },
    {
      "epoch": 0.8102092784247494,
      "grad_norm": 1.3593339920043945,
      "learning_rate": 8.434796360114126e-06,
      "loss": 0.0255,
      "step": 495080
    },
    {
      "epoch": 0.8102420088634028,
      "grad_norm": 0.20149260759353638,
      "learning_rate": 8.43473046790061e-06,
      "loss": 0.0372,
      "step": 495100
    },
    {
      "epoch": 0.8102747393020562,
      "grad_norm": 1.1458826065063477,
      "learning_rate": 8.434664575687092e-06,
      "loss": 0.0447,
      "step": 495120
    },
    {
      "epoch": 0.8103074697407094,
      "grad_norm": 0.37511929869651794,
      "learning_rate": 8.434598683473575e-06,
      "loss": 0.0308,
      "step": 495140
    },
    {
      "epoch": 0.8103402001793628,
      "grad_norm": 1.0252525806427002,
      "learning_rate": 8.434532791260057e-06,
      "loss": 0.0339,
      "step": 495160
    },
    {
      "epoch": 0.8103729306180162,
      "grad_norm": 0.5556000471115112,
      "learning_rate": 8.43446689904654e-06,
      "loss": 0.0411,
      "step": 495180
    },
    {
      "epoch": 0.8104056610566694,
      "grad_norm": 3.147150993347168,
      "learning_rate": 8.434401006833023e-06,
      "loss": 0.0351,
      "step": 495200
    },
    {
      "epoch": 0.8104383914953228,
      "grad_norm": 0.6326474547386169,
      "learning_rate": 8.434335114619506e-06,
      "loss": 0.0252,
      "step": 495220
    },
    {
      "epoch": 0.8104711219339762,
      "grad_norm": 1.2986661195755005,
      "learning_rate": 8.434269222405988e-06,
      "loss": 0.0263,
      "step": 495240
    },
    {
      "epoch": 0.8105038523726295,
      "grad_norm": 0.33210209012031555,
      "learning_rate": 8.434203330192472e-06,
      "loss": 0.0451,
      "step": 495260
    },
    {
      "epoch": 0.8105365828112828,
      "grad_norm": 1.7261124849319458,
      "learning_rate": 8.434137437978954e-06,
      "loss": 0.0265,
      "step": 495280
    },
    {
      "epoch": 0.8105693132499362,
      "grad_norm": 1.7462939023971558,
      "learning_rate": 8.434071545765437e-06,
      "loss": 0.0265,
      "step": 495300
    },
    {
      "epoch": 0.8106020436885896,
      "grad_norm": 0.34320366382598877,
      "learning_rate": 8.43400565355192e-06,
      "loss": 0.0243,
      "step": 495320
    },
    {
      "epoch": 0.8106347741272428,
      "grad_norm": 1.2026753425598145,
      "learning_rate": 8.433939761338403e-06,
      "loss": 0.0183,
      "step": 495340
    },
    {
      "epoch": 0.8106675045658962,
      "grad_norm": 0.6279362440109253,
      "learning_rate": 8.433873869124886e-06,
      "loss": 0.0285,
      "step": 495360
    },
    {
      "epoch": 0.8107002350045496,
      "grad_norm": 0.2978792190551758,
      "learning_rate": 8.433807976911368e-06,
      "loss": 0.0338,
      "step": 495380
    },
    {
      "epoch": 0.8107329654432028,
      "grad_norm": 0.6955265998840332,
      "learning_rate": 8.433742084697852e-06,
      "loss": 0.0412,
      "step": 495400
    },
    {
      "epoch": 0.8107656958818562,
      "grad_norm": 0.6434338092803955,
      "learning_rate": 8.433676192484335e-06,
      "loss": 0.0376,
      "step": 495420
    },
    {
      "epoch": 0.8107984263205096,
      "grad_norm": 1.5772584676742554,
      "learning_rate": 8.433610300270817e-06,
      "loss": 0.0288,
      "step": 495440
    },
    {
      "epoch": 0.8108311567591628,
      "grad_norm": 0.6729219555854797,
      "learning_rate": 8.433544408057301e-06,
      "loss": 0.0282,
      "step": 495460
    },
    {
      "epoch": 0.8108638871978162,
      "grad_norm": 0.8175913691520691,
      "learning_rate": 8.433478515843784e-06,
      "loss": 0.0278,
      "step": 495480
    },
    {
      "epoch": 0.8108966176364696,
      "grad_norm": 0.2430497109889984,
      "learning_rate": 8.433412623630266e-06,
      "loss": 0.0353,
      "step": 495500
    },
    {
      "epoch": 0.810929348075123,
      "grad_norm": 0.5929717421531677,
      "learning_rate": 8.43334673141675e-06,
      "loss": 0.0312,
      "step": 495520
    },
    {
      "epoch": 0.8109620785137762,
      "grad_norm": 0.6952645182609558,
      "learning_rate": 8.433280839203232e-06,
      "loss": 0.0301,
      "step": 495540
    },
    {
      "epoch": 0.8109948089524296,
      "grad_norm": 1.243848204612732,
      "learning_rate": 8.433214946989715e-06,
      "loss": 0.0342,
      "step": 495560
    },
    {
      "epoch": 0.811027539391083,
      "grad_norm": 0.19081269204616547,
      "learning_rate": 8.433149054776197e-06,
      "loss": 0.0322,
      "step": 495580
    },
    {
      "epoch": 0.8110602698297362,
      "grad_norm": 0.8682796955108643,
      "learning_rate": 8.433083162562681e-06,
      "loss": 0.0439,
      "step": 495600
    },
    {
      "epoch": 0.8110930002683896,
      "grad_norm": 1.2315305471420288,
      "learning_rate": 8.433017270349163e-06,
      "loss": 0.0322,
      "step": 495620
    },
    {
      "epoch": 0.811125730707043,
      "grad_norm": 1.2402300834655762,
      "learning_rate": 8.432951378135646e-06,
      "loss": 0.0288,
      "step": 495640
    },
    {
      "epoch": 0.8111584611456962,
      "grad_norm": 4.645044803619385,
      "learning_rate": 8.432885485922128e-06,
      "loss": 0.0311,
      "step": 495660
    },
    {
      "epoch": 0.8111911915843496,
      "grad_norm": 0.41069328784942627,
      "learning_rate": 8.432819593708612e-06,
      "loss": 0.0286,
      "step": 495680
    },
    {
      "epoch": 0.811223922023003,
      "grad_norm": 0.7828205823898315,
      "learning_rate": 8.432753701495095e-06,
      "loss": 0.0217,
      "step": 495700
    },
    {
      "epoch": 0.8112566524616562,
      "grad_norm": 0.8189760446548462,
      "learning_rate": 8.432687809281577e-06,
      "loss": 0.0221,
      "step": 495720
    },
    {
      "epoch": 0.8112893829003096,
      "grad_norm": 0.9828916788101196,
      "learning_rate": 8.432621917068061e-06,
      "loss": 0.0295,
      "step": 495740
    },
    {
      "epoch": 0.811322113338963,
      "grad_norm": 1.4528322219848633,
      "learning_rate": 8.432556024854543e-06,
      "loss": 0.0292,
      "step": 495760
    },
    {
      "epoch": 0.8113548437776164,
      "grad_norm": 0.2333509474992752,
      "learning_rate": 8.432490132641026e-06,
      "loss": 0.0288,
      "step": 495780
    },
    {
      "epoch": 0.8113875742162696,
      "grad_norm": 1.9916504621505737,
      "learning_rate": 8.43242424042751e-06,
      "loss": 0.0256,
      "step": 495800
    },
    {
      "epoch": 0.811420304654923,
      "grad_norm": 0.5108506083488464,
      "learning_rate": 8.432358348213992e-06,
      "loss": 0.0267,
      "step": 495820
    },
    {
      "epoch": 0.8114530350935764,
      "grad_norm": 0.5022606253623962,
      "learning_rate": 8.432292456000475e-06,
      "loss": 0.0337,
      "step": 495840
    },
    {
      "epoch": 0.8114857655322296,
      "grad_norm": 1.6785802841186523,
      "learning_rate": 8.432226563786959e-06,
      "loss": 0.0415,
      "step": 495860
    },
    {
      "epoch": 0.811518495970883,
      "grad_norm": 3.022707462310791,
      "learning_rate": 8.432160671573441e-06,
      "loss": 0.0372,
      "step": 495880
    },
    {
      "epoch": 0.8115512264095364,
      "grad_norm": 0.7711323499679565,
      "learning_rate": 8.432094779359925e-06,
      "loss": 0.024,
      "step": 495900
    },
    {
      "epoch": 0.8115839568481896,
      "grad_norm": 1.0873310565948486,
      "learning_rate": 8.432028887146406e-06,
      "loss": 0.0212,
      "step": 495920
    },
    {
      "epoch": 0.811616687286843,
      "grad_norm": 0.643245279788971,
      "learning_rate": 8.43196299493289e-06,
      "loss": 0.0289,
      "step": 495940
    },
    {
      "epoch": 0.8116494177254964,
      "grad_norm": 0.5470570921897888,
      "learning_rate": 8.431897102719372e-06,
      "loss": 0.035,
      "step": 495960
    },
    {
      "epoch": 0.8116821481641497,
      "grad_norm": 0.952899694442749,
      "learning_rate": 8.431831210505856e-06,
      "loss": 0.0285,
      "step": 495980
    },
    {
      "epoch": 0.811714878602803,
      "grad_norm": 2.6166486740112305,
      "learning_rate": 8.431765318292337e-06,
      "loss": 0.0328,
      "step": 496000
    },
    {
      "epoch": 0.8117476090414564,
      "grad_norm": 0.4670814871788025,
      "learning_rate": 8.431699426078821e-06,
      "loss": 0.0403,
      "step": 496020
    },
    {
      "epoch": 0.8117803394801097,
      "grad_norm": 0.3651847541332245,
      "learning_rate": 8.431633533865305e-06,
      "loss": 0.0337,
      "step": 496040
    },
    {
      "epoch": 0.811813069918763,
      "grad_norm": 0.6185347437858582,
      "learning_rate": 8.431567641651786e-06,
      "loss": 0.0332,
      "step": 496060
    },
    {
      "epoch": 0.8118458003574164,
      "grad_norm": 0.6354112029075623,
      "learning_rate": 8.43150174943827e-06,
      "loss": 0.0273,
      "step": 496080
    },
    {
      "epoch": 0.8118785307960698,
      "grad_norm": 1.7976852655410767,
      "learning_rate": 8.431435857224752e-06,
      "loss": 0.0291,
      "step": 496100
    },
    {
      "epoch": 0.811911261234723,
      "grad_norm": 0.7869969010353088,
      "learning_rate": 8.431369965011236e-06,
      "loss": 0.0315,
      "step": 496120
    },
    {
      "epoch": 0.8119439916733764,
      "grad_norm": 1.711543321609497,
      "learning_rate": 8.431304072797717e-06,
      "loss": 0.0402,
      "step": 496140
    },
    {
      "epoch": 0.8119767221120298,
      "grad_norm": 0.35760051012039185,
      "learning_rate": 8.431238180584201e-06,
      "loss": 0.0286,
      "step": 496160
    },
    {
      "epoch": 0.8120094525506831,
      "grad_norm": 0.8467236161231995,
      "learning_rate": 8.431172288370683e-06,
      "loss": 0.037,
      "step": 496180
    },
    {
      "epoch": 0.8120421829893364,
      "grad_norm": 1.160183072090149,
      "learning_rate": 8.431106396157166e-06,
      "loss": 0.0359,
      "step": 496200
    },
    {
      "epoch": 0.8120749134279898,
      "grad_norm": 1.6631802320480347,
      "learning_rate": 8.43104050394365e-06,
      "loss": 0.0301,
      "step": 496220
    },
    {
      "epoch": 0.8121076438666431,
      "grad_norm": 0.6994720697402954,
      "learning_rate": 8.430974611730132e-06,
      "loss": 0.0226,
      "step": 496240
    },
    {
      "epoch": 0.8121403743052964,
      "grad_norm": 3.6732726097106934,
      "learning_rate": 8.430908719516616e-06,
      "loss": 0.0273,
      "step": 496260
    },
    {
      "epoch": 0.8121731047439498,
      "grad_norm": 0.8799643516540527,
      "learning_rate": 8.430842827303099e-06,
      "loss": 0.0294,
      "step": 496280
    },
    {
      "epoch": 0.8122058351826031,
      "grad_norm": 4.171731472015381,
      "learning_rate": 8.430776935089581e-06,
      "loss": 0.0317,
      "step": 496300
    },
    {
      "epoch": 0.8122385656212564,
      "grad_norm": 0.6812331676483154,
      "learning_rate": 8.430711042876065e-06,
      "loss": 0.0333,
      "step": 496320
    },
    {
      "epoch": 0.8122712960599098,
      "grad_norm": 0.5016327500343323,
      "learning_rate": 8.430645150662547e-06,
      "loss": 0.0331,
      "step": 496340
    },
    {
      "epoch": 0.8123040264985631,
      "grad_norm": 0.843550980091095,
      "learning_rate": 8.43057925844903e-06,
      "loss": 0.0257,
      "step": 496360
    },
    {
      "epoch": 0.8123367569372165,
      "grad_norm": 0.5119316577911377,
      "learning_rate": 8.430513366235512e-06,
      "loss": 0.0277,
      "step": 496380
    },
    {
      "epoch": 0.8123694873758698,
      "grad_norm": 0.8117472529411316,
      "learning_rate": 8.430447474021996e-06,
      "loss": 0.0189,
      "step": 496400
    },
    {
      "epoch": 0.8124022178145232,
      "grad_norm": 0.6832591891288757,
      "learning_rate": 8.43038158180848e-06,
      "loss": 0.0222,
      "step": 496420
    },
    {
      "epoch": 0.8124349482531765,
      "grad_norm": 1.0552629232406616,
      "learning_rate": 8.430315689594961e-06,
      "loss": 0.0329,
      "step": 496440
    },
    {
      "epoch": 0.8124676786918298,
      "grad_norm": 2.334536075592041,
      "learning_rate": 8.430249797381445e-06,
      "loss": 0.026,
      "step": 496460
    },
    {
      "epoch": 0.8125004091304832,
      "grad_norm": 0.3696064054965973,
      "learning_rate": 8.430183905167927e-06,
      "loss": 0.0344,
      "step": 496480
    },
    {
      "epoch": 0.8125331395691365,
      "grad_norm": 1.1214491128921509,
      "learning_rate": 8.43011801295441e-06,
      "loss": 0.0343,
      "step": 496500
    },
    {
      "epoch": 0.8125658700077898,
      "grad_norm": 1.0210844278335571,
      "learning_rate": 8.430052120740892e-06,
      "loss": 0.0302,
      "step": 496520
    },
    {
      "epoch": 0.8125986004464432,
      "grad_norm": 1.0554739236831665,
      "learning_rate": 8.429986228527376e-06,
      "loss": 0.0415,
      "step": 496540
    },
    {
      "epoch": 0.8126313308850965,
      "grad_norm": 0.3736470341682434,
      "learning_rate": 8.429920336313858e-06,
      "loss": 0.0273,
      "step": 496560
    },
    {
      "epoch": 0.8126640613237499,
      "grad_norm": 0.12577077746391296,
      "learning_rate": 8.429854444100341e-06,
      "loss": 0.0274,
      "step": 496580
    },
    {
      "epoch": 0.8126967917624032,
      "grad_norm": 1.9098855257034302,
      "learning_rate": 8.429788551886825e-06,
      "loss": 0.032,
      "step": 496600
    },
    {
      "epoch": 0.8127295222010565,
      "grad_norm": 0.5141668915748596,
      "learning_rate": 8.429722659673307e-06,
      "loss": 0.0298,
      "step": 496620
    },
    {
      "epoch": 0.8127622526397099,
      "grad_norm": 1.6378228664398193,
      "learning_rate": 8.42965676745979e-06,
      "loss": 0.0305,
      "step": 496640
    },
    {
      "epoch": 0.8127949830783632,
      "grad_norm": 1.3473141193389893,
      "learning_rate": 8.429590875246274e-06,
      "loss": 0.0372,
      "step": 496660
    },
    {
      "epoch": 0.8128277135170165,
      "grad_norm": 0.9845662117004395,
      "learning_rate": 8.429524983032756e-06,
      "loss": 0.0234,
      "step": 496680
    },
    {
      "epoch": 0.8128604439556699,
      "grad_norm": 1.1809443235397339,
      "learning_rate": 8.42945909081924e-06,
      "loss": 0.0372,
      "step": 496700
    },
    {
      "epoch": 0.8128931743943232,
      "grad_norm": 1.27041494846344,
      "learning_rate": 8.429393198605721e-06,
      "loss": 0.026,
      "step": 496720
    },
    {
      "epoch": 0.8129259048329766,
      "grad_norm": 0.5991551280021667,
      "learning_rate": 8.429327306392205e-06,
      "loss": 0.0232,
      "step": 496740
    },
    {
      "epoch": 0.8129586352716299,
      "grad_norm": 0.6189314723014832,
      "learning_rate": 8.429261414178688e-06,
      "loss": 0.0247,
      "step": 496760
    },
    {
      "epoch": 0.8129913657102833,
      "grad_norm": 3.848559856414795,
      "learning_rate": 8.42919552196517e-06,
      "loss": 0.0411,
      "step": 496780
    },
    {
      "epoch": 0.8130240961489366,
      "grad_norm": 1.0697842836380005,
      "learning_rate": 8.429129629751654e-06,
      "loss": 0.0266,
      "step": 496800
    },
    {
      "epoch": 0.8130568265875899,
      "grad_norm": 0.977627158164978,
      "learning_rate": 8.429063737538136e-06,
      "loss": 0.0211,
      "step": 496820
    },
    {
      "epoch": 0.8130895570262433,
      "grad_norm": 0.2504121661186218,
      "learning_rate": 8.42899784532462e-06,
      "loss": 0.0336,
      "step": 496840
    },
    {
      "epoch": 0.8131222874648966,
      "grad_norm": 2.1065993309020996,
      "learning_rate": 8.428931953111101e-06,
      "loss": 0.0291,
      "step": 496860
    },
    {
      "epoch": 0.8131550179035499,
      "grad_norm": 0.8595550060272217,
      "learning_rate": 8.428866060897585e-06,
      "loss": 0.0187,
      "step": 496880
    },
    {
      "epoch": 0.8131877483422033,
      "grad_norm": 2.6572787761688232,
      "learning_rate": 8.428800168684067e-06,
      "loss": 0.0254,
      "step": 496900
    },
    {
      "epoch": 0.8132204787808566,
      "grad_norm": 0.4052947759628296,
      "learning_rate": 8.42873427647055e-06,
      "loss": 0.0208,
      "step": 496920
    },
    {
      "epoch": 0.8132532092195099,
      "grad_norm": 2.6970038414001465,
      "learning_rate": 8.428668384257032e-06,
      "loss": 0.0292,
      "step": 496940
    },
    {
      "epoch": 0.8132859396581633,
      "grad_norm": 0.2519415020942688,
      "learning_rate": 8.428602492043516e-06,
      "loss": 0.0294,
      "step": 496960
    },
    {
      "epoch": 0.8133186700968167,
      "grad_norm": 1.8994674682617188,
      "learning_rate": 8.428536599829998e-06,
      "loss": 0.0314,
      "step": 496980
    },
    {
      "epoch": 0.81335140053547,
      "grad_norm": 1.0490995645523071,
      "learning_rate": 8.428470707616481e-06,
      "loss": 0.0328,
      "step": 497000
    },
    {
      "epoch": 0.8133841309741233,
      "grad_norm": 0.25908222794532776,
      "learning_rate": 8.428404815402965e-06,
      "loss": 0.0347,
      "step": 497020
    },
    {
      "epoch": 0.8134168614127767,
      "grad_norm": 1.6221994161605835,
      "learning_rate": 8.428338923189447e-06,
      "loss": 0.0272,
      "step": 497040
    },
    {
      "epoch": 0.81344959185143,
      "grad_norm": 0.7772150039672852,
      "learning_rate": 8.42827303097593e-06,
      "loss": 0.0263,
      "step": 497060
    },
    {
      "epoch": 0.8134823222900833,
      "grad_norm": 2.0341851711273193,
      "learning_rate": 8.428207138762414e-06,
      "loss": 0.0309,
      "step": 497080
    },
    {
      "epoch": 0.8135150527287367,
      "grad_norm": 1.1957449913024902,
      "learning_rate": 8.428141246548896e-06,
      "loss": 0.036,
      "step": 497100
    },
    {
      "epoch": 0.81354778316739,
      "grad_norm": 0.47466638684272766,
      "learning_rate": 8.42807535433538e-06,
      "loss": 0.0263,
      "step": 497120
    },
    {
      "epoch": 0.8135805136060433,
      "grad_norm": 0.2877042293548584,
      "learning_rate": 8.428009462121863e-06,
      "loss": 0.0287,
      "step": 497140
    },
    {
      "epoch": 0.8136132440446967,
      "grad_norm": 15.410811424255371,
      "learning_rate": 8.427943569908345e-06,
      "loss": 0.0411,
      "step": 497160
    },
    {
      "epoch": 0.8136459744833501,
      "grad_norm": 0.8856396079063416,
      "learning_rate": 8.427877677694828e-06,
      "loss": 0.028,
      "step": 497180
    },
    {
      "epoch": 0.8136787049220033,
      "grad_norm": 1.5050623416900635,
      "learning_rate": 8.42781178548131e-06,
      "loss": 0.0271,
      "step": 497200
    },
    {
      "epoch": 0.8137114353606567,
      "grad_norm": 0.5154449939727783,
      "learning_rate": 8.427745893267794e-06,
      "loss": 0.0269,
      "step": 497220
    },
    {
      "epoch": 0.8137441657993101,
      "grad_norm": 1.8811962604522705,
      "learning_rate": 8.427680001054276e-06,
      "loss": 0.0273,
      "step": 497240
    },
    {
      "epoch": 0.8137768962379633,
      "grad_norm": 6.52006196975708,
      "learning_rate": 8.42761410884076e-06,
      "loss": 0.0347,
      "step": 497260
    },
    {
      "epoch": 0.8138096266766167,
      "grad_norm": 1.6507325172424316,
      "learning_rate": 8.427548216627241e-06,
      "loss": 0.0275,
      "step": 497280
    },
    {
      "epoch": 0.8138423571152701,
      "grad_norm": 0.40409237146377563,
      "learning_rate": 8.427482324413725e-06,
      "loss": 0.0261,
      "step": 497300
    },
    {
      "epoch": 0.8138750875539233,
      "grad_norm": 0.9534865617752075,
      "learning_rate": 8.427416432200207e-06,
      "loss": 0.0337,
      "step": 497320
    },
    {
      "epoch": 0.8139078179925767,
      "grad_norm": 0.5139517784118652,
      "learning_rate": 8.42735053998669e-06,
      "loss": 0.0256,
      "step": 497340
    },
    {
      "epoch": 0.8139405484312301,
      "grad_norm": 1.3992036581039429,
      "learning_rate": 8.427284647773172e-06,
      "loss": 0.028,
      "step": 497360
    },
    {
      "epoch": 0.8139732788698835,
      "grad_norm": 0.8854568600654602,
      "learning_rate": 8.427218755559656e-06,
      "loss": 0.0259,
      "step": 497380
    },
    {
      "epoch": 0.8140060093085367,
      "grad_norm": 2.500601291656494,
      "learning_rate": 8.42715286334614e-06,
      "loss": 0.0267,
      "step": 497400
    },
    {
      "epoch": 0.8140387397471901,
      "grad_norm": 0.8821219801902771,
      "learning_rate": 8.427086971132621e-06,
      "loss": 0.0324,
      "step": 497420
    },
    {
      "epoch": 0.8140714701858435,
      "grad_norm": 1.7161478996276855,
      "learning_rate": 8.427021078919105e-06,
      "loss": 0.0329,
      "step": 497440
    },
    {
      "epoch": 0.8141042006244967,
      "grad_norm": 1.571702003479004,
      "learning_rate": 8.426955186705588e-06,
      "loss": 0.0345,
      "step": 497460
    },
    {
      "epoch": 0.8141369310631501,
      "grad_norm": 0.47443121671676636,
      "learning_rate": 8.42688929449207e-06,
      "loss": 0.0401,
      "step": 497480
    },
    {
      "epoch": 0.8141696615018035,
      "grad_norm": 0.5856200456619263,
      "learning_rate": 8.426823402278554e-06,
      "loss": 0.0381,
      "step": 497500
    },
    {
      "epoch": 0.8142023919404567,
      "grad_norm": 0.1876620501279831,
      "learning_rate": 8.426757510065037e-06,
      "loss": 0.0349,
      "step": 497520
    },
    {
      "epoch": 0.8142351223791101,
      "grad_norm": 1.4350101947784424,
      "learning_rate": 8.42669161785152e-06,
      "loss": 0.0342,
      "step": 497540
    },
    {
      "epoch": 0.8142678528177635,
      "grad_norm": 8.916159629821777,
      "learning_rate": 8.426625725638003e-06,
      "loss": 0.0335,
      "step": 497560
    },
    {
      "epoch": 0.8143005832564169,
      "grad_norm": 0.5162119269371033,
      "learning_rate": 8.426559833424485e-06,
      "loss": 0.0278,
      "step": 497580
    },
    {
      "epoch": 0.8143333136950701,
      "grad_norm": 1.2488577365875244,
      "learning_rate": 8.426493941210968e-06,
      "loss": 0.0259,
      "step": 497600
    },
    {
      "epoch": 0.8143660441337235,
      "grad_norm": 0.39666804671287537,
      "learning_rate": 8.42642804899745e-06,
      "loss": 0.0343,
      "step": 497620
    },
    {
      "epoch": 0.8143987745723769,
      "grad_norm": 3.6575934886932373,
      "learning_rate": 8.426362156783934e-06,
      "loss": 0.0416,
      "step": 497640
    },
    {
      "epoch": 0.8144315050110301,
      "grad_norm": 3.183579444885254,
      "learning_rate": 8.426296264570416e-06,
      "loss": 0.035,
      "step": 497660
    },
    {
      "epoch": 0.8144642354496835,
      "grad_norm": 1.3406819105148315,
      "learning_rate": 8.4262303723569e-06,
      "loss": 0.04,
      "step": 497680
    },
    {
      "epoch": 0.8144969658883369,
      "grad_norm": 2.3677585124969482,
      "learning_rate": 8.426164480143381e-06,
      "loss": 0.0332,
      "step": 497700
    },
    {
      "epoch": 0.8145296963269901,
      "grad_norm": 0.7193405032157898,
      "learning_rate": 8.426098587929865e-06,
      "loss": 0.0219,
      "step": 497720
    },
    {
      "epoch": 0.8145624267656435,
      "grad_norm": 0.49756452441215515,
      "learning_rate": 8.426032695716347e-06,
      "loss": 0.0241,
      "step": 497740
    },
    {
      "epoch": 0.8145951572042969,
      "grad_norm": 0.7771138548851013,
      "learning_rate": 8.42596680350283e-06,
      "loss": 0.0326,
      "step": 497760
    },
    {
      "epoch": 0.8146278876429502,
      "grad_norm": 0.7719391584396362,
      "learning_rate": 8.425900911289312e-06,
      "loss": 0.043,
      "step": 497780
    },
    {
      "epoch": 0.8146606180816035,
      "grad_norm": 0.4268459677696228,
      "learning_rate": 8.425835019075796e-06,
      "loss": 0.0316,
      "step": 497800
    },
    {
      "epoch": 0.8146933485202569,
      "grad_norm": 1.0333839654922485,
      "learning_rate": 8.42576912686228e-06,
      "loss": 0.0332,
      "step": 497820
    },
    {
      "epoch": 0.8147260789589102,
      "grad_norm": 1.080045223236084,
      "learning_rate": 8.425703234648761e-06,
      "loss": 0.0328,
      "step": 497840
    },
    {
      "epoch": 0.8147588093975635,
      "grad_norm": 0.6024815440177917,
      "learning_rate": 8.425637342435245e-06,
      "loss": 0.0279,
      "step": 497860
    },
    {
      "epoch": 0.8147915398362169,
      "grad_norm": 0.6806008219718933,
      "learning_rate": 8.425571450221728e-06,
      "loss": 0.0259,
      "step": 497880
    },
    {
      "epoch": 0.8148242702748703,
      "grad_norm": 3.7275054454803467,
      "learning_rate": 8.425505558008212e-06,
      "loss": 0.0238,
      "step": 497900
    },
    {
      "epoch": 0.8148570007135235,
      "grad_norm": 1.1888903379440308,
      "learning_rate": 8.425439665794694e-06,
      "loss": 0.0381,
      "step": 497920
    },
    {
      "epoch": 0.8148897311521769,
      "grad_norm": 1.6664562225341797,
      "learning_rate": 8.425373773581178e-06,
      "loss": 0.0401,
      "step": 497940
    },
    {
      "epoch": 0.8149224615908303,
      "grad_norm": 1.0512773990631104,
      "learning_rate": 8.42530788136766e-06,
      "loss": 0.031,
      "step": 497960
    },
    {
      "epoch": 0.8149551920294836,
      "grad_norm": 1.3283973932266235,
      "learning_rate": 8.425241989154143e-06,
      "loss": 0.0368,
      "step": 497980
    },
    {
      "epoch": 0.8149879224681369,
      "grad_norm": 1.0017577409744263,
      "learning_rate": 8.425176096940625e-06,
      "loss": 0.0236,
      "step": 498000
    },
    {
      "epoch": 0.8150206529067903,
      "grad_norm": 0.9554875493049622,
      "learning_rate": 8.425110204727109e-06,
      "loss": 0.0331,
      "step": 498020
    },
    {
      "epoch": 0.8150533833454436,
      "grad_norm": 0.4987037777900696,
      "learning_rate": 8.42504431251359e-06,
      "loss": 0.0336,
      "step": 498040
    },
    {
      "epoch": 0.8150861137840969,
      "grad_norm": 1.2838246822357178,
      "learning_rate": 8.424978420300074e-06,
      "loss": 0.0226,
      "step": 498060
    },
    {
      "epoch": 0.8151188442227503,
      "grad_norm": 0.27144134044647217,
      "learning_rate": 8.424912528086556e-06,
      "loss": 0.0275,
      "step": 498080
    },
    {
      "epoch": 0.8151515746614036,
      "grad_norm": 0.1544053703546524,
      "learning_rate": 8.42484663587304e-06,
      "loss": 0.0338,
      "step": 498100
    },
    {
      "epoch": 0.8151843051000569,
      "grad_norm": 0.41917750239372253,
      "learning_rate": 8.424780743659521e-06,
      "loss": 0.0328,
      "step": 498120
    },
    {
      "epoch": 0.8152170355387103,
      "grad_norm": 0.6862380504608154,
      "learning_rate": 8.424714851446005e-06,
      "loss": 0.0311,
      "step": 498140
    },
    {
      "epoch": 0.8152497659773636,
      "grad_norm": 0.734541118144989,
      "learning_rate": 8.424648959232489e-06,
      "loss": 0.0285,
      "step": 498160
    },
    {
      "epoch": 0.815282496416017,
      "grad_norm": 0.292327880859375,
      "learning_rate": 8.42458306701897e-06,
      "loss": 0.0234,
      "step": 498180
    },
    {
      "epoch": 0.8153152268546703,
      "grad_norm": 2.0734479427337646,
      "learning_rate": 8.424517174805454e-06,
      "loss": 0.0465,
      "step": 498200
    },
    {
      "epoch": 0.8153479572933237,
      "grad_norm": 1.7486001253128052,
      "learning_rate": 8.424451282591936e-06,
      "loss": 0.0328,
      "step": 498220
    },
    {
      "epoch": 0.815380687731977,
      "grad_norm": 0.5573487281799316,
      "learning_rate": 8.42438539037842e-06,
      "loss": 0.0333,
      "step": 498240
    },
    {
      "epoch": 0.8154134181706303,
      "grad_norm": 0.6689696311950684,
      "learning_rate": 8.424319498164903e-06,
      "loss": 0.0297,
      "step": 498260
    },
    {
      "epoch": 0.8154461486092837,
      "grad_norm": 2.3214797973632812,
      "learning_rate": 8.424253605951385e-06,
      "loss": 0.0355,
      "step": 498280
    },
    {
      "epoch": 0.815478879047937,
      "grad_norm": 1.0750483274459839,
      "learning_rate": 8.424187713737869e-06,
      "loss": 0.0368,
      "step": 498300
    },
    {
      "epoch": 0.8155116094865903,
      "grad_norm": 2.5313007831573486,
      "learning_rate": 8.424121821524352e-06,
      "loss": 0.0315,
      "step": 498320
    },
    {
      "epoch": 0.8155443399252437,
      "grad_norm": 1.3290424346923828,
      "learning_rate": 8.424055929310834e-06,
      "loss": 0.0214,
      "step": 498340
    },
    {
      "epoch": 0.815577070363897,
      "grad_norm": 2.940699577331543,
      "learning_rate": 8.423990037097318e-06,
      "loss": 0.0357,
      "step": 498360
    },
    {
      "epoch": 0.8156098008025504,
      "grad_norm": 0.0560794398188591,
      "learning_rate": 8.4239241448838e-06,
      "loss": 0.021,
      "step": 498380
    },
    {
      "epoch": 0.8156425312412037,
      "grad_norm": 3.925149917602539,
      "learning_rate": 8.423858252670283e-06,
      "loss": 0.0388,
      "step": 498400
    },
    {
      "epoch": 0.815675261679857,
      "grad_norm": 2.5203604698181152,
      "learning_rate": 8.423792360456765e-06,
      "loss": 0.0345,
      "step": 498420
    },
    {
      "epoch": 0.8157079921185104,
      "grad_norm": 2.1157796382904053,
      "learning_rate": 8.423726468243249e-06,
      "loss": 0.034,
      "step": 498440
    },
    {
      "epoch": 0.8157407225571637,
      "grad_norm": 0.7814799547195435,
      "learning_rate": 8.42366057602973e-06,
      "loss": 0.0219,
      "step": 498460
    },
    {
      "epoch": 0.815773452995817,
      "grad_norm": 0.7301753163337708,
      "learning_rate": 8.423594683816214e-06,
      "loss": 0.0316,
      "step": 498480
    },
    {
      "epoch": 0.8158061834344704,
      "grad_norm": 0.7542704939842224,
      "learning_rate": 8.423528791602698e-06,
      "loss": 0.0246,
      "step": 498500
    },
    {
      "epoch": 0.8158389138731237,
      "grad_norm": 1.0676711797714233,
      "learning_rate": 8.42346289938918e-06,
      "loss": 0.0239,
      "step": 498520
    },
    {
      "epoch": 0.8158716443117771,
      "grad_norm": 0.6189464330673218,
      "learning_rate": 8.423397007175663e-06,
      "loss": 0.0354,
      "step": 498540
    },
    {
      "epoch": 0.8159043747504304,
      "grad_norm": 1.5422873497009277,
      "learning_rate": 8.423331114962145e-06,
      "loss": 0.0261,
      "step": 498560
    },
    {
      "epoch": 0.8159371051890837,
      "grad_norm": 1.6659311056137085,
      "learning_rate": 8.423265222748629e-06,
      "loss": 0.0297,
      "step": 498580
    },
    {
      "epoch": 0.8159698356277371,
      "grad_norm": 0.5971537828445435,
      "learning_rate": 8.42319933053511e-06,
      "loss": 0.0232,
      "step": 498600
    },
    {
      "epoch": 0.8160025660663904,
      "grad_norm": 0.8508515357971191,
      "learning_rate": 8.423133438321594e-06,
      "loss": 0.0364,
      "step": 498620
    },
    {
      "epoch": 0.8160352965050438,
      "grad_norm": 0.6149807572364807,
      "learning_rate": 8.423067546108078e-06,
      "loss": 0.0238,
      "step": 498640
    },
    {
      "epoch": 0.8160680269436971,
      "grad_norm": 2.912626028060913,
      "learning_rate": 8.42300165389456e-06,
      "loss": 0.0266,
      "step": 498660
    },
    {
      "epoch": 0.8161007573823504,
      "grad_norm": 0.30261680483818054,
      "learning_rate": 8.422935761681043e-06,
      "loss": 0.0257,
      "step": 498680
    },
    {
      "epoch": 0.8161334878210038,
      "grad_norm": 1.8529523611068726,
      "learning_rate": 8.422869869467527e-06,
      "loss": 0.0371,
      "step": 498700
    },
    {
      "epoch": 0.8161662182596571,
      "grad_norm": 0.1504291146993637,
      "learning_rate": 8.422803977254009e-06,
      "loss": 0.0176,
      "step": 498720
    },
    {
      "epoch": 0.8161989486983104,
      "grad_norm": 0.9938297867774963,
      "learning_rate": 8.422738085040492e-06,
      "loss": 0.0241,
      "step": 498740
    },
    {
      "epoch": 0.8162316791369638,
      "grad_norm": 3.165343761444092,
      "learning_rate": 8.422672192826974e-06,
      "loss": 0.0244,
      "step": 498760
    },
    {
      "epoch": 0.8162644095756171,
      "grad_norm": 0.864177942276001,
      "learning_rate": 8.422606300613458e-06,
      "loss": 0.0302,
      "step": 498780
    },
    {
      "epoch": 0.8162971400142705,
      "grad_norm": 1.1229445934295654,
      "learning_rate": 8.42254040839994e-06,
      "loss": 0.0365,
      "step": 498800
    },
    {
      "epoch": 0.8163298704529238,
      "grad_norm": 0.912878692150116,
      "learning_rate": 8.422474516186423e-06,
      "loss": 0.0425,
      "step": 498820
    },
    {
      "epoch": 0.8163626008915772,
      "grad_norm": 0.3843032121658325,
      "learning_rate": 8.422408623972905e-06,
      "loss": 0.0419,
      "step": 498840
    },
    {
      "epoch": 0.8163953313302305,
      "grad_norm": 2.7276031970977783,
      "learning_rate": 8.422342731759389e-06,
      "loss": 0.0372,
      "step": 498860
    },
    {
      "epoch": 0.8164280617688838,
      "grad_norm": 0.9936877489089966,
      "learning_rate": 8.422276839545872e-06,
      "loss": 0.0308,
      "step": 498880
    },
    {
      "epoch": 0.8164607922075372,
      "grad_norm": 1.6992923021316528,
      "learning_rate": 8.422210947332354e-06,
      "loss": 0.0344,
      "step": 498900
    },
    {
      "epoch": 0.8164935226461905,
      "grad_norm": 0.9246429204940796,
      "learning_rate": 8.422145055118838e-06,
      "loss": 0.0333,
      "step": 498920
    },
    {
      "epoch": 0.8165262530848438,
      "grad_norm": 0.9504032731056213,
      "learning_rate": 8.42207916290532e-06,
      "loss": 0.0321,
      "step": 498940
    },
    {
      "epoch": 0.8165589835234972,
      "grad_norm": 1.1692887544631958,
      "learning_rate": 8.422013270691803e-06,
      "loss": 0.0358,
      "step": 498960
    },
    {
      "epoch": 0.8165917139621505,
      "grad_norm": 1.794813871383667,
      "learning_rate": 8.421947378478285e-06,
      "loss": 0.0254,
      "step": 498980
    },
    {
      "epoch": 0.8166244444008038,
      "grad_norm": 2.6083781719207764,
      "learning_rate": 8.421881486264769e-06,
      "loss": 0.0344,
      "step": 499000
    },
    {
      "epoch": 0.8166571748394572,
      "grad_norm": 2.2670042514801025,
      "learning_rate": 8.42181559405125e-06,
      "loss": 0.0302,
      "step": 499020
    },
    {
      "epoch": 0.8166899052781106,
      "grad_norm": 0.7101596593856812,
      "learning_rate": 8.421749701837734e-06,
      "loss": 0.0286,
      "step": 499040
    },
    {
      "epoch": 0.8167226357167638,
      "grad_norm": 1.5855748653411865,
      "learning_rate": 8.421683809624218e-06,
      "loss": 0.0271,
      "step": 499060
    },
    {
      "epoch": 0.8167553661554172,
      "grad_norm": 0.35612091422080994,
      "learning_rate": 8.4216179174107e-06,
      "loss": 0.0342,
      "step": 499080
    },
    {
      "epoch": 0.8167880965940706,
      "grad_norm": 0.9312992095947266,
      "learning_rate": 8.421552025197183e-06,
      "loss": 0.0212,
      "step": 499100
    },
    {
      "epoch": 0.8168208270327239,
      "grad_norm": 0.9993603229522705,
      "learning_rate": 8.421486132983667e-06,
      "loss": 0.0373,
      "step": 499120
    },
    {
      "epoch": 0.8168535574713772,
      "grad_norm": 0.2855394184589386,
      "learning_rate": 8.421420240770149e-06,
      "loss": 0.0315,
      "step": 499140
    },
    {
      "epoch": 0.8168862879100306,
      "grad_norm": 0.8991586565971375,
      "learning_rate": 8.421354348556632e-06,
      "loss": 0.0299,
      "step": 499160
    },
    {
      "epoch": 0.8169190183486839,
      "grad_norm": 0.5227197408676147,
      "learning_rate": 8.421288456343114e-06,
      "loss": 0.0293,
      "step": 499180
    },
    {
      "epoch": 0.8169517487873372,
      "grad_norm": 3.1375186443328857,
      "learning_rate": 8.421222564129598e-06,
      "loss": 0.0278,
      "step": 499200
    },
    {
      "epoch": 0.8169844792259906,
      "grad_norm": 1.0571361780166626,
      "learning_rate": 8.421156671916081e-06,
      "loss": 0.0366,
      "step": 499220
    },
    {
      "epoch": 0.817017209664644,
      "grad_norm": 0.6385015845298767,
      "learning_rate": 8.421090779702563e-06,
      "loss": 0.0333,
      "step": 499240
    },
    {
      "epoch": 0.8170499401032972,
      "grad_norm": 0.7179456949234009,
      "learning_rate": 8.421024887489047e-06,
      "loss": 0.023,
      "step": 499260
    },
    {
      "epoch": 0.8170826705419506,
      "grad_norm": 0.5615831613540649,
      "learning_rate": 8.420958995275529e-06,
      "loss": 0.0204,
      "step": 499280
    },
    {
      "epoch": 0.817115400980604,
      "grad_norm": 2.1269099712371826,
      "learning_rate": 8.420893103062012e-06,
      "loss": 0.0365,
      "step": 499300
    },
    {
      "epoch": 0.8171481314192572,
      "grad_norm": 1.34129798412323,
      "learning_rate": 8.420827210848494e-06,
      "loss": 0.0328,
      "step": 499320
    },
    {
      "epoch": 0.8171808618579106,
      "grad_norm": 0.12755292654037476,
      "learning_rate": 8.420761318634978e-06,
      "loss": 0.0449,
      "step": 499340
    },
    {
      "epoch": 0.817213592296564,
      "grad_norm": 1.0080738067626953,
      "learning_rate": 8.42069542642146e-06,
      "loss": 0.0265,
      "step": 499360
    },
    {
      "epoch": 0.8172463227352172,
      "grad_norm": 1.7842804193496704,
      "learning_rate": 8.420629534207943e-06,
      "loss": 0.0372,
      "step": 499380
    },
    {
      "epoch": 0.8172790531738706,
      "grad_norm": 1.5868968963623047,
      "learning_rate": 8.420563641994425e-06,
      "loss": 0.0352,
      "step": 499400
    },
    {
      "epoch": 0.817311783612524,
      "grad_norm": 1.8766897916793823,
      "learning_rate": 8.420497749780909e-06,
      "loss": 0.0289,
      "step": 499420
    },
    {
      "epoch": 0.8173445140511774,
      "grad_norm": 1.8601003885269165,
      "learning_rate": 8.420431857567392e-06,
      "loss": 0.0284,
      "step": 499440
    },
    {
      "epoch": 0.8173772444898306,
      "grad_norm": 1.5307544469833374,
      "learning_rate": 8.420365965353874e-06,
      "loss": 0.0325,
      "step": 499460
    },
    {
      "epoch": 0.817409974928484,
      "grad_norm": 1.8297077417373657,
      "learning_rate": 8.420300073140358e-06,
      "loss": 0.0286,
      "step": 499480
    },
    {
      "epoch": 0.8174427053671374,
      "grad_norm": 0.7782292366027832,
      "learning_rate": 8.420234180926841e-06,
      "loss": 0.0256,
      "step": 499500
    },
    {
      "epoch": 0.8174754358057906,
      "grad_norm": 0.9473521113395691,
      "learning_rate": 8.420168288713323e-06,
      "loss": 0.0238,
      "step": 499520
    },
    {
      "epoch": 0.817508166244444,
      "grad_norm": 2.359283447265625,
      "learning_rate": 8.420102396499807e-06,
      "loss": 0.0278,
      "step": 499540
    },
    {
      "epoch": 0.8175408966830974,
      "grad_norm": 6.822428226470947,
      "learning_rate": 8.42003650428629e-06,
      "loss": 0.0245,
      "step": 499560
    },
    {
      "epoch": 0.8175736271217506,
      "grad_norm": 1.1999125480651855,
      "learning_rate": 8.419970612072772e-06,
      "loss": 0.037,
      "step": 499580
    },
    {
      "epoch": 0.817606357560404,
      "grad_norm": 1.5881470441818237,
      "learning_rate": 8.419904719859256e-06,
      "loss": 0.0269,
      "step": 499600
    },
    {
      "epoch": 0.8176390879990574,
      "grad_norm": 2.941124677658081,
      "learning_rate": 8.419838827645738e-06,
      "loss": 0.0439,
      "step": 499620
    },
    {
      "epoch": 0.8176718184377108,
      "grad_norm": 1.0845906734466553,
      "learning_rate": 8.419772935432221e-06,
      "loss": 0.0264,
      "step": 499640
    },
    {
      "epoch": 0.817704548876364,
      "grad_norm": 0.7438192367553711,
      "learning_rate": 8.419707043218703e-06,
      "loss": 0.026,
      "step": 499660
    },
    {
      "epoch": 0.8177372793150174,
      "grad_norm": 0.9138495326042175,
      "learning_rate": 8.419641151005187e-06,
      "loss": 0.0274,
      "step": 499680
    },
    {
      "epoch": 0.8177700097536708,
      "grad_norm": 2.2178688049316406,
      "learning_rate": 8.419575258791669e-06,
      "loss": 0.036,
      "step": 499700
    },
    {
      "epoch": 0.817802740192324,
      "grad_norm": 0.9675859808921814,
      "learning_rate": 8.419509366578152e-06,
      "loss": 0.0226,
      "step": 499720
    },
    {
      "epoch": 0.8178354706309774,
      "grad_norm": 1.034874677658081,
      "learning_rate": 8.419443474364634e-06,
      "loss": 0.023,
      "step": 499740
    },
    {
      "epoch": 0.8178682010696308,
      "grad_norm": 1.6601566076278687,
      "learning_rate": 8.419377582151118e-06,
      "loss": 0.0356,
      "step": 499760
    },
    {
      "epoch": 0.817900931508284,
      "grad_norm": 1.6574058532714844,
      "learning_rate": 8.4193116899376e-06,
      "loss": 0.0366,
      "step": 499780
    },
    {
      "epoch": 0.8179336619469374,
      "grad_norm": 1.2001670598983765,
      "learning_rate": 8.419245797724083e-06,
      "loss": 0.0229,
      "step": 499800
    },
    {
      "epoch": 0.8179663923855908,
      "grad_norm": 0.522943377494812,
      "learning_rate": 8.419179905510565e-06,
      "loss": 0.0297,
      "step": 499820
    },
    {
      "epoch": 0.8179991228242441,
      "grad_norm": 0.29193514585494995,
      "learning_rate": 8.419114013297049e-06,
      "loss": 0.0412,
      "step": 499840
    },
    {
      "epoch": 0.8180318532628974,
      "grad_norm": 2.002758264541626,
      "learning_rate": 8.419048121083532e-06,
      "loss": 0.0248,
      "step": 499860
    },
    {
      "epoch": 0.8180645837015508,
      "grad_norm": 0.7708319425582886,
      "learning_rate": 8.418982228870014e-06,
      "loss": 0.0272,
      "step": 499880
    },
    {
      "epoch": 0.8180973141402041,
      "grad_norm": 0.6563282608985901,
      "learning_rate": 8.418916336656498e-06,
      "loss": 0.0253,
      "step": 499900
    },
    {
      "epoch": 0.8181300445788574,
      "grad_norm": 0.7694166898727417,
      "learning_rate": 8.418850444442982e-06,
      "loss": 0.0236,
      "step": 499920
    },
    {
      "epoch": 0.8181627750175108,
      "grad_norm": 1.3966203927993774,
      "learning_rate": 8.418784552229463e-06,
      "loss": 0.0345,
      "step": 499940
    },
    {
      "epoch": 0.8181955054561642,
      "grad_norm": 0.1915711909532547,
      "learning_rate": 8.418718660015947e-06,
      "loss": 0.0253,
      "step": 499960
    },
    {
      "epoch": 0.8182282358948174,
      "grad_norm": 1.1356337070465088,
      "learning_rate": 8.41865276780243e-06,
      "loss": 0.0369,
      "step": 499980
    },
    {
      "epoch": 0.8182609663334708,
      "grad_norm": 0.9236891865730286,
      "learning_rate": 8.418586875588912e-06,
      "loss": 0.0275,
      "step": 500000
    },
    {
      "epoch": 0.8182609663334708,
      "eval_loss": 0.015416729263961315,
      "eval_runtime": 6499.3279,
      "eval_samples_per_second": 158.148,
      "eval_steps_per_second": 15.815,
      "eval_sts-dev_pearson_cosine": 0.9634800483221864,
      "eval_sts-dev_spearman_cosine": 0.8823617028508361,
      "step": 500000
    },
    {
      "epoch": 0.8182936967721242,
      "grad_norm": 0.756228506565094,
      "learning_rate": 8.418520983375396e-06,
      "loss": 0.0342,
      "step": 500020
    },
    {
      "epoch": 0.8183264272107775,
      "grad_norm": 0.9944620728492737,
      "learning_rate": 8.418455091161878e-06,
      "loss": 0.0294,
      "step": 500040
    },
    {
      "epoch": 0.8183591576494308,
      "grad_norm": 2.522094249725342,
      "learning_rate": 8.418389198948362e-06,
      "loss": 0.0181,
      "step": 500060
    },
    {
      "epoch": 0.8183918880880842,
      "grad_norm": 1.4863272905349731,
      "learning_rate": 8.418323306734843e-06,
      "loss": 0.031,
      "step": 500080
    },
    {
      "epoch": 0.8184246185267375,
      "grad_norm": 1.1059107780456543,
      "learning_rate": 8.418257414521327e-06,
      "loss": 0.0319,
      "step": 500100
    },
    {
      "epoch": 0.8184573489653908,
      "grad_norm": 1.164212942123413,
      "learning_rate": 8.418191522307809e-06,
      "loss": 0.0312,
      "step": 500120
    },
    {
      "epoch": 0.8184900794040442,
      "grad_norm": 0.6024447679519653,
      "learning_rate": 8.418125630094292e-06,
      "loss": 0.0398,
      "step": 500140
    },
    {
      "epoch": 0.8185228098426975,
      "grad_norm": 0.13824686408042908,
      "learning_rate": 8.418059737880774e-06,
      "loss": 0.0298,
      "step": 500160
    },
    {
      "epoch": 0.8185555402813508,
      "grad_norm": 0.40585270524024963,
      "learning_rate": 8.417993845667258e-06,
      "loss": 0.0259,
      "step": 500180
    },
    {
      "epoch": 0.8185882707200042,
      "grad_norm": 0.45504146814346313,
      "learning_rate": 8.41792795345374e-06,
      "loss": 0.0267,
      "step": 500200
    },
    {
      "epoch": 0.8186210011586575,
      "grad_norm": 1.5784692764282227,
      "learning_rate": 8.417862061240223e-06,
      "loss": 0.0413,
      "step": 500220
    },
    {
      "epoch": 0.8186537315973109,
      "grad_norm": 3.4902215003967285,
      "learning_rate": 8.417796169026707e-06,
      "loss": 0.026,
      "step": 500240
    },
    {
      "epoch": 0.8186864620359642,
      "grad_norm": 0.36037495732307434,
      "learning_rate": 8.417730276813189e-06,
      "loss": 0.0336,
      "step": 500260
    },
    {
      "epoch": 0.8187191924746176,
      "grad_norm": 0.33100706338882446,
      "learning_rate": 8.417664384599673e-06,
      "loss": 0.0374,
      "step": 500280
    },
    {
      "epoch": 0.8187519229132709,
      "grad_norm": 0.2724151015281677,
      "learning_rate": 8.417598492386156e-06,
      "loss": 0.0253,
      "step": 500300
    },
    {
      "epoch": 0.8187846533519242,
      "grad_norm": 0.3511715829372406,
      "learning_rate": 8.417532600172638e-06,
      "loss": 0.0236,
      "step": 500320
    },
    {
      "epoch": 0.8188173837905776,
      "grad_norm": 0.18069463968276978,
      "learning_rate": 8.417466707959122e-06,
      "loss": 0.0273,
      "step": 500340
    },
    {
      "epoch": 0.8188501142292309,
      "grad_norm": 0.2984016239643097,
      "learning_rate": 8.417400815745605e-06,
      "loss": 0.0286,
      "step": 500360
    },
    {
      "epoch": 0.8188828446678842,
      "grad_norm": 1.2363523244857788,
      "learning_rate": 8.417334923532087e-06,
      "loss": 0.0335,
      "step": 500380
    },
    {
      "epoch": 0.8189155751065376,
      "grad_norm": 1.0428284406661987,
      "learning_rate": 8.41726903131857e-06,
      "loss": 0.0391,
      "step": 500400
    },
    {
      "epoch": 0.8189483055451909,
      "grad_norm": 1.2320328950881958,
      "learning_rate": 8.417203139105053e-06,
      "loss": 0.0377,
      "step": 500420
    },
    {
      "epoch": 0.8189810359838443,
      "grad_norm": 0.7212628722190857,
      "learning_rate": 8.417137246891536e-06,
      "loss": 0.0338,
      "step": 500440
    },
    {
      "epoch": 0.8190137664224976,
      "grad_norm": 0.1699356734752655,
      "learning_rate": 8.417071354678018e-06,
      "loss": 0.0279,
      "step": 500460
    },
    {
      "epoch": 0.8190464968611509,
      "grad_norm": 0.0455920547246933,
      "learning_rate": 8.417005462464502e-06,
      "loss": 0.0221,
      "step": 500480
    },
    {
      "epoch": 0.8190792272998043,
      "grad_norm": 0.6030804514884949,
      "learning_rate": 8.416939570250983e-06,
      "loss": 0.0272,
      "step": 500500
    },
    {
      "epoch": 0.8191119577384576,
      "grad_norm": 0.2919389605522156,
      "learning_rate": 8.416873678037467e-06,
      "loss": 0.0379,
      "step": 500520
    },
    {
      "epoch": 0.819144688177111,
      "grad_norm": 1.8620476722717285,
      "learning_rate": 8.416807785823949e-06,
      "loss": 0.032,
      "step": 500540
    },
    {
      "epoch": 0.8191774186157643,
      "grad_norm": 0.47232601046562195,
      "learning_rate": 8.416741893610433e-06,
      "loss": 0.0371,
      "step": 500560
    },
    {
      "epoch": 0.8192101490544176,
      "grad_norm": 1.8872283697128296,
      "learning_rate": 8.416676001396914e-06,
      "loss": 0.0243,
      "step": 500580
    },
    {
      "epoch": 0.819242879493071,
      "grad_norm": 1.191947102546692,
      "learning_rate": 8.416610109183398e-06,
      "loss": 0.03,
      "step": 500600
    },
    {
      "epoch": 0.8192756099317243,
      "grad_norm": 0.6642354130744934,
      "learning_rate": 8.416544216969882e-06,
      "loss": 0.0268,
      "step": 500620
    },
    {
      "epoch": 0.8193083403703777,
      "grad_norm": 1.4993007183074951,
      "learning_rate": 8.416478324756364e-06,
      "loss": 0.0376,
      "step": 500640
    },
    {
      "epoch": 0.819341070809031,
      "grad_norm": 0.541658878326416,
      "learning_rate": 8.416412432542847e-06,
      "loss": 0.0255,
      "step": 500660
    },
    {
      "epoch": 0.8193738012476843,
      "grad_norm": 0.7350987195968628,
      "learning_rate": 8.41634654032933e-06,
      "loss": 0.0354,
      "step": 500680
    },
    {
      "epoch": 0.8194065316863377,
      "grad_norm": 0.43548867106437683,
      "learning_rate": 8.416280648115813e-06,
      "loss": 0.0385,
      "step": 500700
    },
    {
      "epoch": 0.819439262124991,
      "grad_norm": 1.7595919370651245,
      "learning_rate": 8.416214755902296e-06,
      "loss": 0.0317,
      "step": 500720
    },
    {
      "epoch": 0.8194719925636443,
      "grad_norm": 0.532077968120575,
      "learning_rate": 8.41614886368878e-06,
      "loss": 0.0279,
      "step": 500740
    },
    {
      "epoch": 0.8195047230022977,
      "grad_norm": 1.534441351890564,
      "learning_rate": 8.416082971475262e-06,
      "loss": 0.0271,
      "step": 500760
    },
    {
      "epoch": 0.819537453440951,
      "grad_norm": 1.7088818550109863,
      "learning_rate": 8.416017079261745e-06,
      "loss": 0.0307,
      "step": 500780
    },
    {
      "epoch": 0.8195701838796043,
      "grad_norm": 1.3125522136688232,
      "learning_rate": 8.415951187048227e-06,
      "loss": 0.0286,
      "step": 500800
    },
    {
      "epoch": 0.8196029143182577,
      "grad_norm": 0.6396465301513672,
      "learning_rate": 8.41588529483471e-06,
      "loss": 0.025,
      "step": 500820
    },
    {
      "epoch": 0.8196356447569111,
      "grad_norm": 1.0288410186767578,
      "learning_rate": 8.415819402621193e-06,
      "loss": 0.0289,
      "step": 500840
    },
    {
      "epoch": 0.8196683751955643,
      "grad_norm": 0.8956464529037476,
      "learning_rate": 8.415753510407676e-06,
      "loss": 0.0387,
      "step": 500860
    },
    {
      "epoch": 0.8197011056342177,
      "grad_norm": 1.0693879127502441,
      "learning_rate": 8.415687618194158e-06,
      "loss": 0.0264,
      "step": 500880
    },
    {
      "epoch": 0.8197338360728711,
      "grad_norm": 1.7563269138336182,
      "learning_rate": 8.415621725980642e-06,
      "loss": 0.0298,
      "step": 500900
    },
    {
      "epoch": 0.8197665665115244,
      "grad_norm": 1.5983576774597168,
      "learning_rate": 8.415555833767124e-06,
      "loss": 0.0298,
      "step": 500920
    },
    {
      "epoch": 0.8197992969501777,
      "grad_norm": 0.7320714592933655,
      "learning_rate": 8.415489941553607e-06,
      "loss": 0.0362,
      "step": 500940
    },
    {
      "epoch": 0.8198320273888311,
      "grad_norm": 0.35771745443344116,
      "learning_rate": 8.41542404934009e-06,
      "loss": 0.036,
      "step": 500960
    },
    {
      "epoch": 0.8198647578274844,
      "grad_norm": 1.2267241477966309,
      "learning_rate": 8.415358157126573e-06,
      "loss": 0.0259,
      "step": 500980
    },
    {
      "epoch": 0.8198974882661377,
      "grad_norm": 0.8481120467185974,
      "learning_rate": 8.415292264913056e-06,
      "loss": 0.037,
      "step": 501000
    },
    {
      "epoch": 0.8199302187047911,
      "grad_norm": 1.1323562860488892,
      "learning_rate": 8.415226372699538e-06,
      "loss": 0.0294,
      "step": 501020
    },
    {
      "epoch": 0.8199629491434445,
      "grad_norm": 0.4309045672416687,
      "learning_rate": 8.415160480486022e-06,
      "loss": 0.0307,
      "step": 501040
    },
    {
      "epoch": 0.8199956795820977,
      "grad_norm": 1.863239049911499,
      "learning_rate": 8.415094588272504e-06,
      "loss": 0.0375,
      "step": 501060
    },
    {
      "epoch": 0.8200284100207511,
      "grad_norm": 0.22634507715702057,
      "learning_rate": 8.415028696058987e-06,
      "loss": 0.0153,
      "step": 501080
    },
    {
      "epoch": 0.8200611404594045,
      "grad_norm": 0.4970635771751404,
      "learning_rate": 8.41496280384547e-06,
      "loss": 0.0179,
      "step": 501100
    },
    {
      "epoch": 0.8200938708980577,
      "grad_norm": 1.1778048276901245,
      "learning_rate": 8.414896911631953e-06,
      "loss": 0.026,
      "step": 501120
    },
    {
      "epoch": 0.8201266013367111,
      "grad_norm": 0.2096298784017563,
      "learning_rate": 8.414831019418436e-06,
      "loss": 0.0282,
      "step": 501140
    },
    {
      "epoch": 0.8201593317753645,
      "grad_norm": 1.1475260257720947,
      "learning_rate": 8.41476512720492e-06,
      "loss": 0.048,
      "step": 501160
    },
    {
      "epoch": 0.8201920622140177,
      "grad_norm": 3.7464287281036377,
      "learning_rate": 8.414699234991402e-06,
      "loss": 0.0376,
      "step": 501180
    },
    {
      "epoch": 0.8202247926526711,
      "grad_norm": 1.263931393623352,
      "learning_rate": 8.414633342777885e-06,
      "loss": 0.0309,
      "step": 501200
    },
    {
      "epoch": 0.8202575230913245,
      "grad_norm": 0.3022495210170746,
      "learning_rate": 8.414567450564367e-06,
      "loss": 0.0361,
      "step": 501220
    },
    {
      "epoch": 0.8202902535299779,
      "grad_norm": 1.0874351263046265,
      "learning_rate": 8.41450155835085e-06,
      "loss": 0.0317,
      "step": 501240
    },
    {
      "epoch": 0.8203229839686311,
      "grad_norm": 0.5100713968276978,
      "learning_rate": 8.414435666137333e-06,
      "loss": 0.0284,
      "step": 501260
    },
    {
      "epoch": 0.8203557144072845,
      "grad_norm": 2.7901852130889893,
      "learning_rate": 8.414369773923816e-06,
      "loss": 0.0341,
      "step": 501280
    },
    {
      "epoch": 0.8203884448459379,
      "grad_norm": 0.649897575378418,
      "learning_rate": 8.414303881710298e-06,
      "loss": 0.0347,
      "step": 501300
    },
    {
      "epoch": 0.8204211752845911,
      "grad_norm": 2.419822931289673,
      "learning_rate": 8.414237989496782e-06,
      "loss": 0.0347,
      "step": 501320
    },
    {
      "epoch": 0.8204539057232445,
      "grad_norm": 0.9537013173103333,
      "learning_rate": 8.414172097283265e-06,
      "loss": 0.0197,
      "step": 501340
    },
    {
      "epoch": 0.8204866361618979,
      "grad_norm": 0.5105686187744141,
      "learning_rate": 8.414106205069747e-06,
      "loss": 0.0217,
      "step": 501360
    },
    {
      "epoch": 0.8205193666005511,
      "grad_norm": 0.4355297088623047,
      "learning_rate": 8.41404031285623e-06,
      "loss": 0.034,
      "step": 501380
    },
    {
      "epoch": 0.8205520970392045,
      "grad_norm": 0.8785417079925537,
      "learning_rate": 8.413974420642713e-06,
      "loss": 0.0239,
      "step": 501400
    },
    {
      "epoch": 0.8205848274778579,
      "grad_norm": 0.7437629103660583,
      "learning_rate": 8.413908528429196e-06,
      "loss": 0.0316,
      "step": 501420
    },
    {
      "epoch": 0.8206175579165111,
      "grad_norm": 0.5479865074157715,
      "learning_rate": 8.413842636215678e-06,
      "loss": 0.0349,
      "step": 501440
    },
    {
      "epoch": 0.8206502883551645,
      "grad_norm": 0.8640333414077759,
      "learning_rate": 8.413776744002162e-06,
      "loss": 0.0223,
      "step": 501460
    },
    {
      "epoch": 0.8206830187938179,
      "grad_norm": 0.8374588489532471,
      "learning_rate": 8.413710851788645e-06,
      "loss": 0.0346,
      "step": 501480
    },
    {
      "epoch": 0.8207157492324713,
      "grad_norm": 0.9363725781440735,
      "learning_rate": 8.413644959575127e-06,
      "loss": 0.0321,
      "step": 501500
    },
    {
      "epoch": 0.8207484796711245,
      "grad_norm": 0.39810171723365784,
      "learning_rate": 8.41357906736161e-06,
      "loss": 0.0336,
      "step": 501520
    },
    {
      "epoch": 0.8207812101097779,
      "grad_norm": 0.4812295436859131,
      "learning_rate": 8.413513175148094e-06,
      "loss": 0.0279,
      "step": 501540
    },
    {
      "epoch": 0.8208139405484313,
      "grad_norm": 0.6854058504104614,
      "learning_rate": 8.413447282934576e-06,
      "loss": 0.0345,
      "step": 501560
    },
    {
      "epoch": 0.8208466709870845,
      "grad_norm": 0.27395790815353394,
      "learning_rate": 8.41338139072106e-06,
      "loss": 0.0251,
      "step": 501580
    },
    {
      "epoch": 0.8208794014257379,
      "grad_norm": 0.17697225511074066,
      "learning_rate": 8.413315498507542e-06,
      "loss": 0.0375,
      "step": 501600
    },
    {
      "epoch": 0.8209121318643913,
      "grad_norm": 1.5001485347747803,
      "learning_rate": 8.413249606294025e-06,
      "loss": 0.0391,
      "step": 501620
    },
    {
      "epoch": 0.8209448623030445,
      "grad_norm": 1.2401353120803833,
      "learning_rate": 8.413183714080507e-06,
      "loss": 0.034,
      "step": 501640
    },
    {
      "epoch": 0.8209775927416979,
      "grad_norm": 1.2055974006652832,
      "learning_rate": 8.413117821866991e-06,
      "loss": 0.0186,
      "step": 501660
    },
    {
      "epoch": 0.8210103231803513,
      "grad_norm": 0.4442998468875885,
      "learning_rate": 8.413051929653474e-06,
      "loss": 0.0218,
      "step": 501680
    },
    {
      "epoch": 0.8210430536190046,
      "grad_norm": 1.2433778047561646,
      "learning_rate": 8.412986037439956e-06,
      "loss": 0.0273,
      "step": 501700
    },
    {
      "epoch": 0.8210757840576579,
      "grad_norm": 0.7288256287574768,
      "learning_rate": 8.41292014522644e-06,
      "loss": 0.0214,
      "step": 501720
    },
    {
      "epoch": 0.8211085144963113,
      "grad_norm": 1.4329499006271362,
      "learning_rate": 8.412854253012922e-06,
      "loss": 0.0314,
      "step": 501740
    },
    {
      "epoch": 0.8211412449349647,
      "grad_norm": 0.9282984137535095,
      "learning_rate": 8.412788360799405e-06,
      "loss": 0.0338,
      "step": 501760
    },
    {
      "epoch": 0.8211739753736179,
      "grad_norm": 0.3785190284252167,
      "learning_rate": 8.412722468585887e-06,
      "loss": 0.0253,
      "step": 501780
    },
    {
      "epoch": 0.8212067058122713,
      "grad_norm": 1.6005980968475342,
      "learning_rate": 8.412656576372371e-06,
      "loss": 0.0398,
      "step": 501800
    },
    {
      "epoch": 0.8212394362509247,
      "grad_norm": 1.2674858570098877,
      "learning_rate": 8.412590684158853e-06,
      "loss": 0.0366,
      "step": 501820
    },
    {
      "epoch": 0.8212721666895779,
      "grad_norm": 1.3348973989486694,
      "learning_rate": 8.412524791945336e-06,
      "loss": 0.0377,
      "step": 501840
    },
    {
      "epoch": 0.8213048971282313,
      "grad_norm": 0.23091784119606018,
      "learning_rate": 8.412458899731818e-06,
      "loss": 0.0299,
      "step": 501860
    },
    {
      "epoch": 0.8213376275668847,
      "grad_norm": 0.6874868869781494,
      "learning_rate": 8.412393007518302e-06,
      "loss": 0.0276,
      "step": 501880
    },
    {
      "epoch": 0.821370358005538,
      "grad_norm": 1.1201362609863281,
      "learning_rate": 8.412327115304785e-06,
      "loss": 0.0266,
      "step": 501900
    },
    {
      "epoch": 0.8214030884441913,
      "grad_norm": 0.7251022458076477,
      "learning_rate": 8.412261223091267e-06,
      "loss": 0.0304,
      "step": 501920
    },
    {
      "epoch": 0.8214358188828447,
      "grad_norm": 1.7807643413543701,
      "learning_rate": 8.412195330877751e-06,
      "loss": 0.0428,
      "step": 501940
    },
    {
      "epoch": 0.821468549321498,
      "grad_norm": 0.37765035033226013,
      "learning_rate": 8.412129438664235e-06,
      "loss": 0.0344,
      "step": 501960
    },
    {
      "epoch": 0.8215012797601513,
      "grad_norm": 0.11202453076839447,
      "learning_rate": 8.412063546450716e-06,
      "loss": 0.0252,
      "step": 501980
    },
    {
      "epoch": 0.8215340101988047,
      "grad_norm": 0.8730443716049194,
      "learning_rate": 8.4119976542372e-06,
      "loss": 0.0298,
      "step": 502000
    },
    {
      "epoch": 0.821566740637458,
      "grad_norm": 1.099745273590088,
      "learning_rate": 8.411931762023684e-06,
      "loss": 0.0376,
      "step": 502020
    },
    {
      "epoch": 0.8215994710761113,
      "grad_norm": 1.2356654405593872,
      "learning_rate": 8.411865869810165e-06,
      "loss": 0.0376,
      "step": 502040
    },
    {
      "epoch": 0.8216322015147647,
      "grad_norm": 1.3075443506240845,
      "learning_rate": 8.411799977596649e-06,
      "loss": 0.0341,
      "step": 502060
    },
    {
      "epoch": 0.821664931953418,
      "grad_norm": 0.48246827721595764,
      "learning_rate": 8.411734085383131e-06,
      "loss": 0.0291,
      "step": 502080
    },
    {
      "epoch": 0.8216976623920714,
      "grad_norm": 0.8700282573699951,
      "learning_rate": 8.411668193169615e-06,
      "loss": 0.0273,
      "step": 502100
    },
    {
      "epoch": 0.8217303928307247,
      "grad_norm": 1.3061819076538086,
      "learning_rate": 8.411602300956096e-06,
      "loss": 0.0257,
      "step": 502120
    },
    {
      "epoch": 0.8217631232693781,
      "grad_norm": 0.16813045740127563,
      "learning_rate": 8.41153640874258e-06,
      "loss": 0.0258,
      "step": 502140
    },
    {
      "epoch": 0.8217958537080314,
      "grad_norm": 0.5097666382789612,
      "learning_rate": 8.411470516529062e-06,
      "loss": 0.026,
      "step": 502160
    },
    {
      "epoch": 0.8218285841466847,
      "grad_norm": 0.2414342164993286,
      "learning_rate": 8.411404624315545e-06,
      "loss": 0.0236,
      "step": 502180
    },
    {
      "epoch": 0.8218613145853381,
      "grad_norm": 1.4609163999557495,
      "learning_rate": 8.411338732102027e-06,
      "loss": 0.0388,
      "step": 502200
    },
    {
      "epoch": 0.8218940450239914,
      "grad_norm": 1.2452696561813354,
      "learning_rate": 8.411272839888511e-06,
      "loss": 0.0385,
      "step": 502220
    },
    {
      "epoch": 0.8219267754626447,
      "grad_norm": 0.9570465683937073,
      "learning_rate": 8.411206947674993e-06,
      "loss": 0.0325,
      "step": 502240
    },
    {
      "epoch": 0.8219595059012981,
      "grad_norm": 0.30495718121528625,
      "learning_rate": 8.411141055461476e-06,
      "loss": 0.0309,
      "step": 502260
    },
    {
      "epoch": 0.8219922363399514,
      "grad_norm": 1.1036603450775146,
      "learning_rate": 8.41107516324796e-06,
      "loss": 0.0278,
      "step": 502280
    },
    {
      "epoch": 0.8220249667786048,
      "grad_norm": 6.735602855682373,
      "learning_rate": 8.411009271034442e-06,
      "loss": 0.0262,
      "step": 502300
    },
    {
      "epoch": 0.8220576972172581,
      "grad_norm": 0.6080226898193359,
      "learning_rate": 8.410943378820926e-06,
      "loss": 0.0303,
      "step": 502320
    },
    {
      "epoch": 0.8220904276559114,
      "grad_norm": 0.4427050054073334,
      "learning_rate": 8.410877486607409e-06,
      "loss": 0.0301,
      "step": 502340
    },
    {
      "epoch": 0.8221231580945648,
      "grad_norm": 0.7206740975379944,
      "learning_rate": 8.410811594393891e-06,
      "loss": 0.0263,
      "step": 502360
    },
    {
      "epoch": 0.8221558885332181,
      "grad_norm": 3.9174959659576416,
      "learning_rate": 8.410745702180375e-06,
      "loss": 0.0256,
      "step": 502380
    },
    {
      "epoch": 0.8221886189718715,
      "grad_norm": 1.109621524810791,
      "learning_rate": 8.410679809966858e-06,
      "loss": 0.0283,
      "step": 502400
    },
    {
      "epoch": 0.8222213494105248,
      "grad_norm": 0.9360246062278748,
      "learning_rate": 8.41061391775334e-06,
      "loss": 0.0316,
      "step": 502420
    },
    {
      "epoch": 0.8222540798491781,
      "grad_norm": 0.8415657877922058,
      "learning_rate": 8.410548025539824e-06,
      "loss": 0.029,
      "step": 502440
    },
    {
      "epoch": 0.8222868102878315,
      "grad_norm": 0.3901670575141907,
      "learning_rate": 8.410482133326306e-06,
      "loss": 0.0227,
      "step": 502460
    },
    {
      "epoch": 0.8223195407264848,
      "grad_norm": 1.8406058549880981,
      "learning_rate": 8.410416241112789e-06,
      "loss": 0.0453,
      "step": 502480
    },
    {
      "epoch": 0.8223522711651382,
      "grad_norm": 0.6286168098449707,
      "learning_rate": 8.410350348899271e-06,
      "loss": 0.0244,
      "step": 502500
    },
    {
      "epoch": 0.8223850016037915,
      "grad_norm": 1.2225641012191772,
      "learning_rate": 8.410284456685755e-06,
      "loss": 0.0242,
      "step": 502520
    },
    {
      "epoch": 0.8224177320424448,
      "grad_norm": 0.5222322940826416,
      "learning_rate": 8.410218564472237e-06,
      "loss": 0.0285,
      "step": 502540
    },
    {
      "epoch": 0.8224504624810982,
      "grad_norm": 1.4472465515136719,
      "learning_rate": 8.41015267225872e-06,
      "loss": 0.0271,
      "step": 502560
    },
    {
      "epoch": 0.8224831929197515,
      "grad_norm": 1.0640461444854736,
      "learning_rate": 8.410086780045202e-06,
      "loss": 0.0331,
      "step": 502580
    },
    {
      "epoch": 0.8225159233584048,
      "grad_norm": 0.7593437433242798,
      "learning_rate": 8.410020887831686e-06,
      "loss": 0.0368,
      "step": 502600
    },
    {
      "epoch": 0.8225486537970582,
      "grad_norm": 1.3630461692810059,
      "learning_rate": 8.409954995618167e-06,
      "loss": 0.0339,
      "step": 502620
    },
    {
      "epoch": 0.8225813842357115,
      "grad_norm": 0.6968238949775696,
      "learning_rate": 8.409889103404651e-06,
      "loss": 0.0215,
      "step": 502640
    },
    {
      "epoch": 0.8226141146743648,
      "grad_norm": 1.1215885877609253,
      "learning_rate": 8.409823211191133e-06,
      "loss": 0.0394,
      "step": 502660
    },
    {
      "epoch": 0.8226468451130182,
      "grad_norm": 0.7447208762168884,
      "learning_rate": 8.409757318977617e-06,
      "loss": 0.0305,
      "step": 502680
    },
    {
      "epoch": 0.8226795755516716,
      "grad_norm": 0.778483510017395,
      "learning_rate": 8.4096914267641e-06,
      "loss": 0.037,
      "step": 502700
    },
    {
      "epoch": 0.8227123059903249,
      "grad_norm": 1.3657692670822144,
      "learning_rate": 8.409625534550582e-06,
      "loss": 0.0286,
      "step": 502720
    },
    {
      "epoch": 0.8227450364289782,
      "grad_norm": 0.43334928154945374,
      "learning_rate": 8.409559642337066e-06,
      "loss": 0.0288,
      "step": 502740
    },
    {
      "epoch": 0.8227777668676316,
      "grad_norm": 0.4979053735733032,
      "learning_rate": 8.40949375012355e-06,
      "loss": 0.0314,
      "step": 502760
    },
    {
      "epoch": 0.8228104973062849,
      "grad_norm": 0.974627673625946,
      "learning_rate": 8.409427857910031e-06,
      "loss": 0.0302,
      "step": 502780
    },
    {
      "epoch": 0.8228432277449382,
      "grad_norm": 0.7899162173271179,
      "learning_rate": 8.409361965696515e-06,
      "loss": 0.0298,
      "step": 502800
    },
    {
      "epoch": 0.8228759581835916,
      "grad_norm": 0.7088854908943176,
      "learning_rate": 8.409296073482998e-06,
      "loss": 0.0322,
      "step": 502820
    },
    {
      "epoch": 0.8229086886222449,
      "grad_norm": 1.6525144577026367,
      "learning_rate": 8.40923018126948e-06,
      "loss": 0.0459,
      "step": 502840
    },
    {
      "epoch": 0.8229414190608982,
      "grad_norm": 0.8901063203811646,
      "learning_rate": 8.409164289055964e-06,
      "loss": 0.0308,
      "step": 502860
    },
    {
      "epoch": 0.8229741494995516,
      "grad_norm": 0.9211885333061218,
      "learning_rate": 8.409098396842446e-06,
      "loss": 0.03,
      "step": 502880
    },
    {
      "epoch": 0.823006879938205,
      "grad_norm": 0.9557154178619385,
      "learning_rate": 8.40903250462893e-06,
      "loss": 0.0277,
      "step": 502900
    },
    {
      "epoch": 0.8230396103768582,
      "grad_norm": 1.8686856031417847,
      "learning_rate": 8.408966612415411e-06,
      "loss": 0.0299,
      "step": 502920
    },
    {
      "epoch": 0.8230723408155116,
      "grad_norm": 0.9040728807449341,
      "learning_rate": 8.408900720201895e-06,
      "loss": 0.0436,
      "step": 502940
    },
    {
      "epoch": 0.823105071254165,
      "grad_norm": 0.8221967816352844,
      "learning_rate": 8.408834827988377e-06,
      "loss": 0.0227,
      "step": 502960
    },
    {
      "epoch": 0.8231378016928182,
      "grad_norm": 1.0805881023406982,
      "learning_rate": 8.40876893577486e-06,
      "loss": 0.0511,
      "step": 502980
    },
    {
      "epoch": 0.8231705321314716,
      "grad_norm": 0.4202861785888672,
      "learning_rate": 8.408703043561342e-06,
      "loss": 0.0271,
      "step": 503000
    },
    {
      "epoch": 0.823203262570125,
      "grad_norm": 0.6914221048355103,
      "learning_rate": 8.408637151347826e-06,
      "loss": 0.0244,
      "step": 503020
    },
    {
      "epoch": 0.8232359930087783,
      "grad_norm": 4.589884281158447,
      "learning_rate": 8.408571259134308e-06,
      "loss": 0.0311,
      "step": 503040
    },
    {
      "epoch": 0.8232687234474316,
      "grad_norm": 0.2545596659183502,
      "learning_rate": 8.408505366920791e-06,
      "loss": 0.0469,
      "step": 503060
    },
    {
      "epoch": 0.823301453886085,
      "grad_norm": 1.0314452648162842,
      "learning_rate": 8.408439474707275e-06,
      "loss": 0.0277,
      "step": 503080
    },
    {
      "epoch": 0.8233341843247384,
      "grad_norm": 0.9516497850418091,
      "learning_rate": 8.408373582493757e-06,
      "loss": 0.0254,
      "step": 503100
    },
    {
      "epoch": 0.8233669147633916,
      "grad_norm": 1.23844313621521,
      "learning_rate": 8.40830769028024e-06,
      "loss": 0.0369,
      "step": 503120
    },
    {
      "epoch": 0.823399645202045,
      "grad_norm": 0.39819762110710144,
      "learning_rate": 8.408241798066724e-06,
      "loss": 0.0269,
      "step": 503140
    },
    {
      "epoch": 0.8234323756406984,
      "grad_norm": 1.627465009689331,
      "learning_rate": 8.408175905853206e-06,
      "loss": 0.036,
      "step": 503160
    },
    {
      "epoch": 0.8234651060793516,
      "grad_norm": 0.4435368478298187,
      "learning_rate": 8.40811001363969e-06,
      "loss": 0.0318,
      "step": 503180
    },
    {
      "epoch": 0.823497836518005,
      "grad_norm": 0.5853809714317322,
      "learning_rate": 8.408044121426173e-06,
      "loss": 0.0293,
      "step": 503200
    },
    {
      "epoch": 0.8235305669566584,
      "grad_norm": 1.6871778964996338,
      "learning_rate": 8.407978229212655e-06,
      "loss": 0.0381,
      "step": 503220
    },
    {
      "epoch": 0.8235632973953116,
      "grad_norm": 0.36376315355300903,
      "learning_rate": 8.407912336999138e-06,
      "loss": 0.0255,
      "step": 503240
    },
    {
      "epoch": 0.823596027833965,
      "grad_norm": 1.4173208475112915,
      "learning_rate": 8.40784644478562e-06,
      "loss": 0.0393,
      "step": 503260
    },
    {
      "epoch": 0.8236287582726184,
      "grad_norm": 1.558142900466919,
      "learning_rate": 8.407780552572104e-06,
      "loss": 0.0295,
      "step": 503280
    },
    {
      "epoch": 0.8236614887112718,
      "grad_norm": 1.53709876537323,
      "learning_rate": 8.407714660358586e-06,
      "loss": 0.0266,
      "step": 503300
    },
    {
      "epoch": 0.823694219149925,
      "grad_norm": 0.7844643592834473,
      "learning_rate": 8.40764876814507e-06,
      "loss": 0.0231,
      "step": 503320
    },
    {
      "epoch": 0.8237269495885784,
      "grad_norm": 2.450575828552246,
      "learning_rate": 8.407582875931551e-06,
      "loss": 0.0301,
      "step": 503340
    },
    {
      "epoch": 0.8237596800272318,
      "grad_norm": 0.972201943397522,
      "learning_rate": 8.407516983718035e-06,
      "loss": 0.0327,
      "step": 503360
    },
    {
      "epoch": 0.823792410465885,
      "grad_norm": 2.495392084121704,
      "learning_rate": 8.407451091504517e-06,
      "loss": 0.0382,
      "step": 503380
    },
    {
      "epoch": 0.8238251409045384,
      "grad_norm": 3.003394842147827,
      "learning_rate": 8.407385199291e-06,
      "loss": 0.0322,
      "step": 503400
    },
    {
      "epoch": 0.8238578713431918,
      "grad_norm": 0.5891243815422058,
      "learning_rate": 8.407319307077484e-06,
      "loss": 0.0247,
      "step": 503420
    },
    {
      "epoch": 0.823890601781845,
      "grad_norm": 0.38750410079956055,
      "learning_rate": 8.407253414863966e-06,
      "loss": 0.0345,
      "step": 503440
    },
    {
      "epoch": 0.8239233322204984,
      "grad_norm": 2.3114676475524902,
      "learning_rate": 8.40718752265045e-06,
      "loss": 0.0338,
      "step": 503460
    },
    {
      "epoch": 0.8239560626591518,
      "grad_norm": 1.679338812828064,
      "learning_rate": 8.407121630436931e-06,
      "loss": 0.0367,
      "step": 503480
    },
    {
      "epoch": 0.8239887930978052,
      "grad_norm": 1.1424190998077393,
      "learning_rate": 8.407055738223415e-06,
      "loss": 0.0318,
      "step": 503500
    },
    {
      "epoch": 0.8240215235364584,
      "grad_norm": 0.7416382431983948,
      "learning_rate": 8.406989846009898e-06,
      "loss": 0.0344,
      "step": 503520
    },
    {
      "epoch": 0.8240542539751118,
      "grad_norm": 1.0536140203475952,
      "learning_rate": 8.40692395379638e-06,
      "loss": 0.0362,
      "step": 503540
    },
    {
      "epoch": 0.8240869844137652,
      "grad_norm": 1.8148446083068848,
      "learning_rate": 8.406858061582864e-06,
      "loss": 0.0302,
      "step": 503560
    },
    {
      "epoch": 0.8241197148524184,
      "grad_norm": 0.19374768435955048,
      "learning_rate": 8.406792169369347e-06,
      "loss": 0.0258,
      "step": 503580
    },
    {
      "epoch": 0.8241524452910718,
      "grad_norm": 1.4625707864761353,
      "learning_rate": 8.40672627715583e-06,
      "loss": 0.0314,
      "step": 503600
    },
    {
      "epoch": 0.8241851757297252,
      "grad_norm": 1.2039648294448853,
      "learning_rate": 8.406660384942313e-06,
      "loss": 0.0291,
      "step": 503620
    },
    {
      "epoch": 0.8242179061683784,
      "grad_norm": 0.4262750446796417,
      "learning_rate": 8.406594492728795e-06,
      "loss": 0.0297,
      "step": 503640
    },
    {
      "epoch": 0.8242506366070318,
      "grad_norm": 0.44549670815467834,
      "learning_rate": 8.406528600515278e-06,
      "loss": 0.0209,
      "step": 503660
    },
    {
      "epoch": 0.8242833670456852,
      "grad_norm": 1.1179945468902588,
      "learning_rate": 8.40646270830176e-06,
      "loss": 0.0266,
      "step": 503680
    },
    {
      "epoch": 0.8243160974843385,
      "grad_norm": 0.3835625946521759,
      "learning_rate": 8.406396816088244e-06,
      "loss": 0.0302,
      "step": 503700
    },
    {
      "epoch": 0.8243488279229918,
      "grad_norm": 1.0415630340576172,
      "learning_rate": 8.406330923874726e-06,
      "loss": 0.0296,
      "step": 503720
    },
    {
      "epoch": 0.8243815583616452,
      "grad_norm": 1.630933403968811,
      "learning_rate": 8.40626503166121e-06,
      "loss": 0.0314,
      "step": 503740
    },
    {
      "epoch": 0.8244142888002985,
      "grad_norm": 0.9332761764526367,
      "learning_rate": 8.406199139447691e-06,
      "loss": 0.0341,
      "step": 503760
    },
    {
      "epoch": 0.8244470192389518,
      "grad_norm": 0.7813428640365601,
      "learning_rate": 8.406133247234175e-06,
      "loss": 0.0322,
      "step": 503780
    },
    {
      "epoch": 0.8244797496776052,
      "grad_norm": 2.8620572090148926,
      "learning_rate": 8.406067355020658e-06,
      "loss": 0.0372,
      "step": 503800
    },
    {
      "epoch": 0.8245124801162586,
      "grad_norm": 0.5842984914779663,
      "learning_rate": 8.40600146280714e-06,
      "loss": 0.0293,
      "step": 503820
    },
    {
      "epoch": 0.8245452105549118,
      "grad_norm": 0.9209339618682861,
      "learning_rate": 8.405935570593624e-06,
      "loss": 0.02,
      "step": 503840
    },
    {
      "epoch": 0.8245779409935652,
      "grad_norm": 1.087742567062378,
      "learning_rate": 8.405869678380106e-06,
      "loss": 0.0306,
      "step": 503860
    },
    {
      "epoch": 0.8246106714322186,
      "grad_norm": 0.7726776599884033,
      "learning_rate": 8.40580378616659e-06,
      "loss": 0.0302,
      "step": 503880
    },
    {
      "epoch": 0.8246434018708719,
      "grad_norm": 0.6268381476402283,
      "learning_rate": 8.405737893953071e-06,
      "loss": 0.0253,
      "step": 503900
    },
    {
      "epoch": 0.8246761323095252,
      "grad_norm": 1.4468927383422852,
      "learning_rate": 8.405672001739555e-06,
      "loss": 0.0221,
      "step": 503920
    },
    {
      "epoch": 0.8247088627481786,
      "grad_norm": 1.027742862701416,
      "learning_rate": 8.405606109526038e-06,
      "loss": 0.0238,
      "step": 503940
    },
    {
      "epoch": 0.8247415931868319,
      "grad_norm": 0.19242681562900543,
      "learning_rate": 8.40554021731252e-06,
      "loss": 0.0278,
      "step": 503960
    },
    {
      "epoch": 0.8247743236254852,
      "grad_norm": 0.47872230410575867,
      "learning_rate": 8.405474325099004e-06,
      "loss": 0.0285,
      "step": 503980
    },
    {
      "epoch": 0.8248070540641386,
      "grad_norm": 0.6872311234474182,
      "learning_rate": 8.405408432885488e-06,
      "loss": 0.0294,
      "step": 504000
    },
    {
      "epoch": 0.8248397845027919,
      "grad_norm": 0.6744790077209473,
      "learning_rate": 8.40534254067197e-06,
      "loss": 0.0281,
      "step": 504020
    },
    {
      "epoch": 0.8248725149414452,
      "grad_norm": 0.3100600242614746,
      "learning_rate": 8.405276648458453e-06,
      "loss": 0.0266,
      "step": 504040
    },
    {
      "epoch": 0.8249052453800986,
      "grad_norm": 1.4654734134674072,
      "learning_rate": 8.405210756244935e-06,
      "loss": 0.028,
      "step": 504060
    },
    {
      "epoch": 0.824937975818752,
      "grad_norm": 1.9244524240493774,
      "learning_rate": 8.405144864031418e-06,
      "loss": 0.0315,
      "step": 504080
    },
    {
      "epoch": 0.8249707062574052,
      "grad_norm": 0.9562583565711975,
      "learning_rate": 8.4050789718179e-06,
      "loss": 0.036,
      "step": 504100
    },
    {
      "epoch": 0.8250034366960586,
      "grad_norm": 1.2705072164535522,
      "learning_rate": 8.405013079604384e-06,
      "loss": 0.0353,
      "step": 504120
    },
    {
      "epoch": 0.825036167134712,
      "grad_norm": 0.6247863173484802,
      "learning_rate": 8.404947187390868e-06,
      "loss": 0.0372,
      "step": 504140
    },
    {
      "epoch": 0.8250688975733653,
      "grad_norm": 0.626364529132843,
      "learning_rate": 8.40488129517735e-06,
      "loss": 0.0317,
      "step": 504160
    },
    {
      "epoch": 0.8251016280120186,
      "grad_norm": 3.2158827781677246,
      "learning_rate": 8.404815402963833e-06,
      "loss": 0.036,
      "step": 504180
    },
    {
      "epoch": 0.825134358450672,
      "grad_norm": 0.358555406332016,
      "learning_rate": 8.404749510750315e-06,
      "loss": 0.0189,
      "step": 504200
    },
    {
      "epoch": 0.8251670888893253,
      "grad_norm": 0.4833641052246094,
      "learning_rate": 8.404683618536798e-06,
      "loss": 0.0256,
      "step": 504220
    },
    {
      "epoch": 0.8251998193279786,
      "grad_norm": 0.6134554743766785,
      "learning_rate": 8.40461772632328e-06,
      "loss": 0.0309,
      "step": 504240
    },
    {
      "epoch": 0.825232549766632,
      "grad_norm": 0.6033252477645874,
      "learning_rate": 8.404551834109764e-06,
      "loss": 0.0244,
      "step": 504260
    },
    {
      "epoch": 0.8252652802052853,
      "grad_norm": 2.5711145401000977,
      "learning_rate": 8.404485941896246e-06,
      "loss": 0.043,
      "step": 504280
    },
    {
      "epoch": 0.8252980106439386,
      "grad_norm": 1.3099842071533203,
      "learning_rate": 8.40442004968273e-06,
      "loss": 0.0312,
      "step": 504300
    },
    {
      "epoch": 0.825330741082592,
      "grad_norm": 1.3970569372177124,
      "learning_rate": 8.404354157469213e-06,
      "loss": 0.0315,
      "step": 504320
    },
    {
      "epoch": 0.8253634715212453,
      "grad_norm": 0.5927549600601196,
      "learning_rate": 8.404288265255695e-06,
      "loss": 0.0344,
      "step": 504340
    },
    {
      "epoch": 0.8253962019598987,
      "grad_norm": 0.6433324813842773,
      "learning_rate": 8.404222373042179e-06,
      "loss": 0.0269,
      "step": 504360
    },
    {
      "epoch": 0.825428932398552,
      "grad_norm": 1.3713032007217407,
      "learning_rate": 8.404156480828662e-06,
      "loss": 0.0335,
      "step": 504380
    },
    {
      "epoch": 0.8254616628372053,
      "grad_norm": 0.7202673554420471,
      "learning_rate": 8.404090588615144e-06,
      "loss": 0.0352,
      "step": 504400
    },
    {
      "epoch": 0.8254943932758587,
      "grad_norm": 0.6678480505943298,
      "learning_rate": 8.404024696401628e-06,
      "loss": 0.0223,
      "step": 504420
    },
    {
      "epoch": 0.825527123714512,
      "grad_norm": 0.32832619547843933,
      "learning_rate": 8.40395880418811e-06,
      "loss": 0.0295,
      "step": 504440
    },
    {
      "epoch": 0.8255598541531654,
      "grad_norm": 1.0177679061889648,
      "learning_rate": 8.403892911974593e-06,
      "loss": 0.0335,
      "step": 504460
    },
    {
      "epoch": 0.8255925845918187,
      "grad_norm": 0.5876697897911072,
      "learning_rate": 8.403827019761077e-06,
      "loss": 0.0375,
      "step": 504480
    },
    {
      "epoch": 0.825625315030472,
      "grad_norm": 1.9455305337905884,
      "learning_rate": 8.403761127547559e-06,
      "loss": 0.0372,
      "step": 504500
    },
    {
      "epoch": 0.8256580454691254,
      "grad_norm": 2.055053472518921,
      "learning_rate": 8.403695235334042e-06,
      "loss": 0.0312,
      "step": 504520
    },
    {
      "epoch": 0.8256907759077787,
      "grad_norm": 4.1637864112854,
      "learning_rate": 8.403629343120524e-06,
      "loss": 0.0267,
      "step": 504540
    },
    {
      "epoch": 0.8257235063464321,
      "grad_norm": 0.9922204613685608,
      "learning_rate": 8.403563450907008e-06,
      "loss": 0.0395,
      "step": 504560
    },
    {
      "epoch": 0.8257562367850854,
      "grad_norm": 0.4138052761554718,
      "learning_rate": 8.40349755869349e-06,
      "loss": 0.0229,
      "step": 504580
    },
    {
      "epoch": 0.8257889672237387,
      "grad_norm": 3.5700759887695312,
      "learning_rate": 8.403431666479973e-06,
      "loss": 0.0294,
      "step": 504600
    },
    {
      "epoch": 0.8258216976623921,
      "grad_norm": 0.5721673369407654,
      "learning_rate": 8.403365774266455e-06,
      "loss": 0.0215,
      "step": 504620
    },
    {
      "epoch": 0.8258544281010454,
      "grad_norm": 2.3384740352630615,
      "learning_rate": 8.403299882052939e-06,
      "loss": 0.0355,
      "step": 504640
    },
    {
      "epoch": 0.8258871585396987,
      "grad_norm": 0.7186251282691956,
      "learning_rate": 8.40323398983942e-06,
      "loss": 0.021,
      "step": 504660
    },
    {
      "epoch": 0.8259198889783521,
      "grad_norm": 1.140250325202942,
      "learning_rate": 8.403168097625904e-06,
      "loss": 0.0373,
      "step": 504680
    },
    {
      "epoch": 0.8259526194170054,
      "grad_norm": 0.25966721773147583,
      "learning_rate": 8.403102205412386e-06,
      "loss": 0.0257,
      "step": 504700
    },
    {
      "epoch": 0.8259853498556587,
      "grad_norm": 0.6147198677062988,
      "learning_rate": 8.40303631319887e-06,
      "loss": 0.0207,
      "step": 504720
    },
    {
      "epoch": 0.8260180802943121,
      "grad_norm": 0.7102366089820862,
      "learning_rate": 8.402970420985353e-06,
      "loss": 0.0218,
      "step": 504740
    },
    {
      "epoch": 0.8260508107329655,
      "grad_norm": 0.43239903450012207,
      "learning_rate": 8.402904528771835e-06,
      "loss": 0.0315,
      "step": 504760
    },
    {
      "epoch": 0.8260835411716188,
      "grad_norm": 0.4560125768184662,
      "learning_rate": 8.402838636558319e-06,
      "loss": 0.0301,
      "step": 504780
    },
    {
      "epoch": 0.8261162716102721,
      "grad_norm": 2.361175537109375,
      "learning_rate": 8.402772744344802e-06,
      "loss": 0.0321,
      "step": 504800
    },
    {
      "epoch": 0.8261490020489255,
      "grad_norm": 0.07386968284845352,
      "learning_rate": 8.402706852131284e-06,
      "loss": 0.0307,
      "step": 504820
    },
    {
      "epoch": 0.8261817324875788,
      "grad_norm": 0.30871695280075073,
      "learning_rate": 8.402640959917768e-06,
      "loss": 0.029,
      "step": 504840
    },
    {
      "epoch": 0.8262144629262321,
      "grad_norm": 1.9659852981567383,
      "learning_rate": 8.402575067704251e-06,
      "loss": 0.0349,
      "step": 504860
    },
    {
      "epoch": 0.8262471933648855,
      "grad_norm": 1.1332954168319702,
      "learning_rate": 8.402509175490733e-06,
      "loss": 0.0323,
      "step": 504880
    },
    {
      "epoch": 0.8262799238035388,
      "grad_norm": 0.7410255074501038,
      "learning_rate": 8.402443283277217e-06,
      "loss": 0.0253,
      "step": 504900
    },
    {
      "epoch": 0.8263126542421921,
      "grad_norm": 0.5752835869789124,
      "learning_rate": 8.402377391063699e-06,
      "loss": 0.0326,
      "step": 504920
    },
    {
      "epoch": 0.8263453846808455,
      "grad_norm": 7.710131645202637,
      "learning_rate": 8.402311498850182e-06,
      "loss": 0.027,
      "step": 504940
    },
    {
      "epoch": 0.8263781151194989,
      "grad_norm": 1.1707069873809814,
      "learning_rate": 8.402245606636664e-06,
      "loss": 0.0327,
      "step": 504960
    },
    {
      "epoch": 0.8264108455581521,
      "grad_norm": 0.2521016597747803,
      "learning_rate": 8.402179714423148e-06,
      "loss": 0.0431,
      "step": 504980
    },
    {
      "epoch": 0.8264435759968055,
      "grad_norm": 1.0893439054489136,
      "learning_rate": 8.40211382220963e-06,
      "loss": 0.026,
      "step": 505000
    },
    {
      "epoch": 0.8264763064354589,
      "grad_norm": 1.197593092918396,
      "learning_rate": 8.402047929996113e-06,
      "loss": 0.0255,
      "step": 505020
    },
    {
      "epoch": 0.8265090368741121,
      "grad_norm": 0.14958474040031433,
      "learning_rate": 8.401982037782595e-06,
      "loss": 0.0296,
      "step": 505040
    },
    {
      "epoch": 0.8265417673127655,
      "grad_norm": 0.627884030342102,
      "learning_rate": 8.401916145569079e-06,
      "loss": 0.03,
      "step": 505060
    },
    {
      "epoch": 0.8265744977514189,
      "grad_norm": 0.5782220363616943,
      "learning_rate": 8.40185025335556e-06,
      "loss": 0.0344,
      "step": 505080
    },
    {
      "epoch": 0.8266072281900722,
      "grad_norm": 0.5568630695343018,
      "learning_rate": 8.401784361142044e-06,
      "loss": 0.036,
      "step": 505100
    },
    {
      "epoch": 0.8266399586287255,
      "grad_norm": 2.5381784439086914,
      "learning_rate": 8.401718468928528e-06,
      "loss": 0.0294,
      "step": 505120
    },
    {
      "epoch": 0.8266726890673789,
      "grad_norm": 1.5455522537231445,
      "learning_rate": 8.40165257671501e-06,
      "loss": 0.0351,
      "step": 505140
    },
    {
      "epoch": 0.8267054195060323,
      "grad_norm": 1.0609827041625977,
      "learning_rate": 8.401586684501493e-06,
      "loss": 0.0362,
      "step": 505160
    },
    {
      "epoch": 0.8267381499446855,
      "grad_norm": 0.7694171071052551,
      "learning_rate": 8.401520792287977e-06,
      "loss": 0.0314,
      "step": 505180
    },
    {
      "epoch": 0.8267708803833389,
      "grad_norm": 0.8918813467025757,
      "learning_rate": 8.401454900074459e-06,
      "loss": 0.0336,
      "step": 505200
    },
    {
      "epoch": 0.8268036108219923,
      "grad_norm": 0.9245997071266174,
      "learning_rate": 8.401389007860942e-06,
      "loss": 0.0281,
      "step": 505220
    },
    {
      "epoch": 0.8268363412606455,
      "grad_norm": 1.8686856031417847,
      "learning_rate": 8.401323115647426e-06,
      "loss": 0.036,
      "step": 505240
    },
    {
      "epoch": 0.8268690716992989,
      "grad_norm": 1.2352579832077026,
      "learning_rate": 8.401257223433908e-06,
      "loss": 0.0252,
      "step": 505260
    },
    {
      "epoch": 0.8269018021379523,
      "grad_norm": 0.9550825357437134,
      "learning_rate": 8.401191331220391e-06,
      "loss": 0.0296,
      "step": 505280
    },
    {
      "epoch": 0.8269345325766055,
      "grad_norm": 2.922428846359253,
      "learning_rate": 8.401125439006873e-06,
      "loss": 0.0212,
      "step": 505300
    },
    {
      "epoch": 0.8269672630152589,
      "grad_norm": 0.571551501750946,
      "learning_rate": 8.401059546793357e-06,
      "loss": 0.0266,
      "step": 505320
    },
    {
      "epoch": 0.8269999934539123,
      "grad_norm": 1.1412949562072754,
      "learning_rate": 8.400993654579839e-06,
      "loss": 0.0399,
      "step": 505340
    },
    {
      "epoch": 0.8270327238925657,
      "grad_norm": 0.4703531265258789,
      "learning_rate": 8.400927762366322e-06,
      "loss": 0.0315,
      "step": 505360
    },
    {
      "epoch": 0.8270654543312189,
      "grad_norm": 2.0574374198913574,
      "learning_rate": 8.400861870152804e-06,
      "loss": 0.025,
      "step": 505380
    },
    {
      "epoch": 0.8270981847698723,
      "grad_norm": 1.5739439725875854,
      "learning_rate": 8.400795977939288e-06,
      "loss": 0.0424,
      "step": 505400
    },
    {
      "epoch": 0.8271309152085257,
      "grad_norm": 0.61904376745224,
      "learning_rate": 8.40073008572577e-06,
      "loss": 0.0331,
      "step": 505420
    },
    {
      "epoch": 0.8271636456471789,
      "grad_norm": 0.32904598116874695,
      "learning_rate": 8.400664193512253e-06,
      "loss": 0.0317,
      "step": 505440
    },
    {
      "epoch": 0.8271963760858323,
      "grad_norm": 0.7976815104484558,
      "learning_rate": 8.400598301298735e-06,
      "loss": 0.0232,
      "step": 505460
    },
    {
      "epoch": 0.8272291065244857,
      "grad_norm": 1.387901782989502,
      "learning_rate": 8.400532409085219e-06,
      "loss": 0.0339,
      "step": 505480
    },
    {
      "epoch": 0.8272618369631389,
      "grad_norm": 0.6091259717941284,
      "learning_rate": 8.4004665168717e-06,
      "loss": 0.0274,
      "step": 505500
    },
    {
      "epoch": 0.8272945674017923,
      "grad_norm": 0.464186429977417,
      "learning_rate": 8.400400624658184e-06,
      "loss": 0.0325,
      "step": 505520
    },
    {
      "epoch": 0.8273272978404457,
      "grad_norm": 0.553715169429779,
      "learning_rate": 8.400334732444668e-06,
      "loss": 0.037,
      "step": 505540
    },
    {
      "epoch": 0.827360028279099,
      "grad_norm": 0.15687397122383118,
      "learning_rate": 8.40026884023115e-06,
      "loss": 0.0346,
      "step": 505560
    },
    {
      "epoch": 0.8273927587177523,
      "grad_norm": 0.3478928804397583,
      "learning_rate": 8.400202948017633e-06,
      "loss": 0.0373,
      "step": 505580
    },
    {
      "epoch": 0.8274254891564057,
      "grad_norm": 0.6038651466369629,
      "learning_rate": 8.400137055804117e-06,
      "loss": 0.0373,
      "step": 505600
    },
    {
      "epoch": 0.827458219595059,
      "grad_norm": 0.40816283226013184,
      "learning_rate": 8.400071163590599e-06,
      "loss": 0.033,
      "step": 505620
    },
    {
      "epoch": 0.8274909500337123,
      "grad_norm": 0.5044259428977966,
      "learning_rate": 8.400005271377082e-06,
      "loss": 0.0389,
      "step": 505640
    },
    {
      "epoch": 0.8275236804723657,
      "grad_norm": 1.7484561204910278,
      "learning_rate": 8.399939379163566e-06,
      "loss": 0.0247,
      "step": 505660
    },
    {
      "epoch": 0.8275564109110191,
      "grad_norm": 0.824514627456665,
      "learning_rate": 8.399873486950048e-06,
      "loss": 0.0355,
      "step": 505680
    },
    {
      "epoch": 0.8275891413496723,
      "grad_norm": 1.2614307403564453,
      "learning_rate": 8.399807594736531e-06,
      "loss": 0.025,
      "step": 505700
    },
    {
      "epoch": 0.8276218717883257,
      "grad_norm": 1.2903772592544556,
      "learning_rate": 8.399741702523013e-06,
      "loss": 0.0239,
      "step": 505720
    },
    {
      "epoch": 0.8276546022269791,
      "grad_norm": 0.9713814854621887,
      "learning_rate": 8.399675810309497e-06,
      "loss": 0.0198,
      "step": 505740
    },
    {
      "epoch": 0.8276873326656324,
      "grad_norm": 1.1360948085784912,
      "learning_rate": 8.399609918095979e-06,
      "loss": 0.0434,
      "step": 505760
    },
    {
      "epoch": 0.8277200631042857,
      "grad_norm": 0.5501999258995056,
      "learning_rate": 8.399544025882462e-06,
      "loss": 0.0255,
      "step": 505780
    },
    {
      "epoch": 0.8277527935429391,
      "grad_norm": 1.2682702541351318,
      "learning_rate": 8.399478133668944e-06,
      "loss": 0.0336,
      "step": 505800
    },
    {
      "epoch": 0.8277855239815924,
      "grad_norm": 0.6146051287651062,
      "learning_rate": 8.399412241455428e-06,
      "loss": 0.0335,
      "step": 505820
    },
    {
      "epoch": 0.8278182544202457,
      "grad_norm": 0.28928911685943604,
      "learning_rate": 8.39934634924191e-06,
      "loss": 0.0275,
      "step": 505840
    },
    {
      "epoch": 0.8278509848588991,
      "grad_norm": 0.5679100751876831,
      "learning_rate": 8.399280457028393e-06,
      "loss": 0.0236,
      "step": 505860
    },
    {
      "epoch": 0.8278837152975524,
      "grad_norm": 0.7924994230270386,
      "learning_rate": 8.399214564814875e-06,
      "loss": 0.0345,
      "step": 505880
    },
    {
      "epoch": 0.8279164457362057,
      "grad_norm": 3.7843053340911865,
      "learning_rate": 8.399148672601359e-06,
      "loss": 0.0193,
      "step": 505900
    },
    {
      "epoch": 0.8279491761748591,
      "grad_norm": 2.73370099067688,
      "learning_rate": 8.399082780387842e-06,
      "loss": 0.04,
      "step": 505920
    },
    {
      "epoch": 0.8279819066135125,
      "grad_norm": 0.9037415385246277,
      "learning_rate": 8.399016888174324e-06,
      "loss": 0.0265,
      "step": 505940
    },
    {
      "epoch": 0.8280146370521658,
      "grad_norm": 0.7211161255836487,
      "learning_rate": 8.398950995960808e-06,
      "loss": 0.0263,
      "step": 505960
    },
    {
      "epoch": 0.8280473674908191,
      "grad_norm": 1.1735869646072388,
      "learning_rate": 8.398885103747291e-06,
      "loss": 0.0258,
      "step": 505980
    },
    {
      "epoch": 0.8280800979294725,
      "grad_norm": 0.46121418476104736,
      "learning_rate": 8.398819211533773e-06,
      "loss": 0.0261,
      "step": 506000
    },
    {
      "epoch": 0.8281128283681258,
      "grad_norm": 1.750704288482666,
      "learning_rate": 8.398753319320257e-06,
      "loss": 0.0332,
      "step": 506020
    },
    {
      "epoch": 0.8281455588067791,
      "grad_norm": 0.5637767314910889,
      "learning_rate": 8.39868742710674e-06,
      "loss": 0.0315,
      "step": 506040
    },
    {
      "epoch": 0.8281782892454325,
      "grad_norm": 0.5219684839248657,
      "learning_rate": 8.398621534893222e-06,
      "loss": 0.035,
      "step": 506060
    },
    {
      "epoch": 0.8282110196840858,
      "grad_norm": 1.6969987154006958,
      "learning_rate": 8.398555642679706e-06,
      "loss": 0.0389,
      "step": 506080
    },
    {
      "epoch": 0.8282437501227391,
      "grad_norm": 1.5113749504089355,
      "learning_rate": 8.398489750466188e-06,
      "loss": 0.026,
      "step": 506100
    },
    {
      "epoch": 0.8282764805613925,
      "grad_norm": 0.8818380832672119,
      "learning_rate": 8.398423858252671e-06,
      "loss": 0.0405,
      "step": 506120
    },
    {
      "epoch": 0.8283092110000458,
      "grad_norm": 0.5698450207710266,
      "learning_rate": 8.398357966039153e-06,
      "loss": 0.0254,
      "step": 506140
    },
    {
      "epoch": 0.8283419414386992,
      "grad_norm": 0.8048326969146729,
      "learning_rate": 8.398292073825637e-06,
      "loss": 0.0185,
      "step": 506160
    },
    {
      "epoch": 0.8283746718773525,
      "grad_norm": 1.3083181381225586,
      "learning_rate": 8.398226181612119e-06,
      "loss": 0.0317,
      "step": 506180
    },
    {
      "epoch": 0.8284074023160058,
      "grad_norm": 3.4442992210388184,
      "learning_rate": 8.398160289398602e-06,
      "loss": 0.048,
      "step": 506200
    },
    {
      "epoch": 0.8284401327546592,
      "grad_norm": 0.6880082488059998,
      "learning_rate": 8.398094397185084e-06,
      "loss": 0.026,
      "step": 506220
    },
    {
      "epoch": 0.8284728631933125,
      "grad_norm": 1.0061273574829102,
      "learning_rate": 8.398028504971568e-06,
      "loss": 0.0294,
      "step": 506240
    },
    {
      "epoch": 0.8285055936319659,
      "grad_norm": 1.215108871459961,
      "learning_rate": 8.397962612758052e-06,
      "loss": 0.0259,
      "step": 506260
    },
    {
      "epoch": 0.8285383240706192,
      "grad_norm": 1.4619516134262085,
      "learning_rate": 8.397896720544533e-06,
      "loss": 0.0364,
      "step": 506280
    },
    {
      "epoch": 0.8285710545092725,
      "grad_norm": 1.0762048959732056,
      "learning_rate": 8.397830828331017e-06,
      "loss": 0.0415,
      "step": 506300
    },
    {
      "epoch": 0.8286037849479259,
      "grad_norm": 0.5081806182861328,
      "learning_rate": 8.397764936117499e-06,
      "loss": 0.0258,
      "step": 506320
    },
    {
      "epoch": 0.8286365153865792,
      "grad_norm": 1.3702996969223022,
      "learning_rate": 8.397699043903982e-06,
      "loss": 0.0313,
      "step": 506340
    },
    {
      "epoch": 0.8286692458252326,
      "grad_norm": 1.3461847305297852,
      "learning_rate": 8.397633151690466e-06,
      "loss": 0.0255,
      "step": 506360
    },
    {
      "epoch": 0.8287019762638859,
      "grad_norm": 0.7237867712974548,
      "learning_rate": 8.397567259476948e-06,
      "loss": 0.0221,
      "step": 506380
    },
    {
      "epoch": 0.8287347067025392,
      "grad_norm": 1.2886731624603271,
      "learning_rate": 8.397501367263432e-06,
      "loss": 0.0197,
      "step": 506400
    },
    {
      "epoch": 0.8287674371411926,
      "grad_norm": 1.3986958265304565,
      "learning_rate": 8.397435475049915e-06,
      "loss": 0.0346,
      "step": 506420
    },
    {
      "epoch": 0.8288001675798459,
      "grad_norm": 0.7361522316932678,
      "learning_rate": 8.397369582836397e-06,
      "loss": 0.0237,
      "step": 506440
    },
    {
      "epoch": 0.8288328980184992,
      "grad_norm": 0.9081842303276062,
      "learning_rate": 8.39730369062288e-06,
      "loss": 0.0285,
      "step": 506460
    },
    {
      "epoch": 0.8288656284571526,
      "grad_norm": 0.716027557849884,
      "learning_rate": 8.397237798409362e-06,
      "loss": 0.032,
      "step": 506480
    },
    {
      "epoch": 0.8288983588958059,
      "grad_norm": 0.9521201252937317,
      "learning_rate": 8.397171906195846e-06,
      "loss": 0.0206,
      "step": 506500
    },
    {
      "epoch": 0.8289310893344592,
      "grad_norm": 2.2157211303710938,
      "learning_rate": 8.397106013982328e-06,
      "loss": 0.0264,
      "step": 506520
    },
    {
      "epoch": 0.8289638197731126,
      "grad_norm": 1.9092351198196411,
      "learning_rate": 8.397040121768812e-06,
      "loss": 0.0303,
      "step": 506540
    },
    {
      "epoch": 0.828996550211766,
      "grad_norm": 0.6386911869049072,
      "learning_rate": 8.396974229555293e-06,
      "loss": 0.0294,
      "step": 506560
    },
    {
      "epoch": 0.8290292806504193,
      "grad_norm": 1.0507910251617432,
      "learning_rate": 8.396908337341777e-06,
      "loss": 0.0248,
      "step": 506580
    },
    {
      "epoch": 0.8290620110890726,
      "grad_norm": 0.8240792155265808,
      "learning_rate": 8.39684244512826e-06,
      "loss": 0.0231,
      "step": 506600
    },
    {
      "epoch": 0.829094741527726,
      "grad_norm": 1.7882258892059326,
      "learning_rate": 8.396776552914743e-06,
      "loss": 0.0259,
      "step": 506620
    },
    {
      "epoch": 0.8291274719663793,
      "grad_norm": 1.6384036540985107,
      "learning_rate": 8.396710660701226e-06,
      "loss": 0.0332,
      "step": 506640
    },
    {
      "epoch": 0.8291602024050326,
      "grad_norm": 0.9975512623786926,
      "learning_rate": 8.396644768487708e-06,
      "loss": 0.0279,
      "step": 506660
    },
    {
      "epoch": 0.829192932843686,
      "grad_norm": 0.14520396292209625,
      "learning_rate": 8.396578876274192e-06,
      "loss": 0.0269,
      "step": 506680
    },
    {
      "epoch": 0.8292256632823393,
      "grad_norm": 0.4972023069858551,
      "learning_rate": 8.396512984060673e-06,
      "loss": 0.037,
      "step": 506700
    },
    {
      "epoch": 0.8292583937209926,
      "grad_norm": 2.69950270652771,
      "learning_rate": 8.396447091847157e-06,
      "loss": 0.0436,
      "step": 506720
    },
    {
      "epoch": 0.829291124159646,
      "grad_norm": 1.8920562267303467,
      "learning_rate": 8.396381199633639e-06,
      "loss": 0.0248,
      "step": 506740
    },
    {
      "epoch": 0.8293238545982994,
      "grad_norm": 0.686731219291687,
      "learning_rate": 8.396315307420123e-06,
      "loss": 0.0421,
      "step": 506760
    },
    {
      "epoch": 0.8293565850369526,
      "grad_norm": 3.093681812286377,
      "learning_rate": 8.396249415206606e-06,
      "loss": 0.0305,
      "step": 506780
    },
    {
      "epoch": 0.829389315475606,
      "grad_norm": 3.8114097118377686,
      "learning_rate": 8.396183522993088e-06,
      "loss": 0.0299,
      "step": 506800
    },
    {
      "epoch": 0.8294220459142594,
      "grad_norm": 1.2706284523010254,
      "learning_rate": 8.396117630779572e-06,
      "loss": 0.0305,
      "step": 506820
    },
    {
      "epoch": 0.8294547763529126,
      "grad_norm": 0.5564389824867249,
      "learning_rate": 8.396051738566055e-06,
      "loss": 0.0247,
      "step": 506840
    },
    {
      "epoch": 0.829487506791566,
      "grad_norm": 1.0910875797271729,
      "learning_rate": 8.395985846352537e-06,
      "loss": 0.0248,
      "step": 506860
    },
    {
      "epoch": 0.8295202372302194,
      "grad_norm": 1.1780961751937866,
      "learning_rate": 8.39591995413902e-06,
      "loss": 0.0299,
      "step": 506880
    },
    {
      "epoch": 0.8295529676688727,
      "grad_norm": 2.0725255012512207,
      "learning_rate": 8.395854061925503e-06,
      "loss": 0.0287,
      "step": 506900
    },
    {
      "epoch": 0.829585698107526,
      "grad_norm": 1.869429349899292,
      "learning_rate": 8.395788169711986e-06,
      "loss": 0.0304,
      "step": 506920
    },
    {
      "epoch": 0.8296184285461794,
      "grad_norm": 0.7008897662162781,
      "learning_rate": 8.39572227749847e-06,
      "loss": 0.0214,
      "step": 506940
    },
    {
      "epoch": 0.8296511589848327,
      "grad_norm": 0.6568565368652344,
      "learning_rate": 8.395656385284952e-06,
      "loss": 0.0269,
      "step": 506960
    },
    {
      "epoch": 0.829683889423486,
      "grad_norm": 0.5387762188911438,
      "learning_rate": 8.395590493071435e-06,
      "loss": 0.033,
      "step": 506980
    },
    {
      "epoch": 0.8297166198621394,
      "grad_norm": 0.7504109740257263,
      "learning_rate": 8.395524600857917e-06,
      "loss": 0.0313,
      "step": 507000
    },
    {
      "epoch": 0.8297493503007928,
      "grad_norm": 0.721938967704773,
      "learning_rate": 8.3954587086444e-06,
      "loss": 0.0206,
      "step": 507020
    },
    {
      "epoch": 0.829782080739446,
      "grad_norm": 1.2367072105407715,
      "learning_rate": 8.395392816430883e-06,
      "loss": 0.0348,
      "step": 507040
    },
    {
      "epoch": 0.8298148111780994,
      "grad_norm": 0.37696337699890137,
      "learning_rate": 8.395326924217366e-06,
      "loss": 0.0212,
      "step": 507060
    },
    {
      "epoch": 0.8298475416167528,
      "grad_norm": 0.747908353805542,
      "learning_rate": 8.395261032003848e-06,
      "loss": 0.0373,
      "step": 507080
    },
    {
      "epoch": 0.829880272055406,
      "grad_norm": 2.2535059452056885,
      "learning_rate": 8.395195139790332e-06,
      "loss": 0.0272,
      "step": 507100
    },
    {
      "epoch": 0.8299130024940594,
      "grad_norm": 0.5133900046348572,
      "learning_rate": 8.395129247576814e-06,
      "loss": 0.0376,
      "step": 507120
    },
    {
      "epoch": 0.8299457329327128,
      "grad_norm": 1.2237720489501953,
      "learning_rate": 8.395063355363297e-06,
      "loss": 0.031,
      "step": 507140
    },
    {
      "epoch": 0.829978463371366,
      "grad_norm": 1.3757754564285278,
      "learning_rate": 8.39499746314978e-06,
      "loss": 0.0315,
      "step": 507160
    },
    {
      "epoch": 0.8300111938100194,
      "grad_norm": 0.4289184510707855,
      "learning_rate": 8.394931570936263e-06,
      "loss": 0.0244,
      "step": 507180
    },
    {
      "epoch": 0.8300439242486728,
      "grad_norm": 0.9185531139373779,
      "learning_rate": 8.394865678722746e-06,
      "loss": 0.0271,
      "step": 507200
    },
    {
      "epoch": 0.8300766546873262,
      "grad_norm": 0.7496616840362549,
      "learning_rate": 8.39479978650923e-06,
      "loss": 0.0282,
      "step": 507220
    },
    {
      "epoch": 0.8301093851259794,
      "grad_norm": 0.2218731790781021,
      "learning_rate": 8.394733894295712e-06,
      "loss": 0.0283,
      "step": 507240
    },
    {
      "epoch": 0.8301421155646328,
      "grad_norm": 2.7911536693573,
      "learning_rate": 8.394668002082195e-06,
      "loss": 0.0382,
      "step": 507260
    },
    {
      "epoch": 0.8301748460032862,
      "grad_norm": 0.8279279470443726,
      "learning_rate": 8.394602109868677e-06,
      "loss": 0.0285,
      "step": 507280
    },
    {
      "epoch": 0.8302075764419394,
      "grad_norm": 0.6592740416526794,
      "learning_rate": 8.39453621765516e-06,
      "loss": 0.0327,
      "step": 507300
    },
    {
      "epoch": 0.8302403068805928,
      "grad_norm": 0.4885011315345764,
      "learning_rate": 8.394470325441644e-06,
      "loss": 0.0298,
      "step": 507320
    },
    {
      "epoch": 0.8302730373192462,
      "grad_norm": 1.0798232555389404,
      "learning_rate": 8.394404433228126e-06,
      "loss": 0.0241,
      "step": 507340
    },
    {
      "epoch": 0.8303057677578994,
      "grad_norm": 0.5014496445655823,
      "learning_rate": 8.39433854101461e-06,
      "loss": 0.0272,
      "step": 507360
    },
    {
      "epoch": 0.8303384981965528,
      "grad_norm": 0.32855933904647827,
      "learning_rate": 8.394272648801092e-06,
      "loss": 0.0283,
      "step": 507380
    },
    {
      "epoch": 0.8303712286352062,
      "grad_norm": 0.8573944568634033,
      "learning_rate": 8.394206756587575e-06,
      "loss": 0.0334,
      "step": 507400
    },
    {
      "epoch": 0.8304039590738596,
      "grad_norm": 0.8016555309295654,
      "learning_rate": 8.394140864374057e-06,
      "loss": 0.0263,
      "step": 507420
    },
    {
      "epoch": 0.8304366895125128,
      "grad_norm": 3.3638803958892822,
      "learning_rate": 8.39407497216054e-06,
      "loss": 0.0371,
      "step": 507440
    },
    {
      "epoch": 0.8304694199511662,
      "grad_norm": 1.492652177810669,
      "learning_rate": 8.394009079947023e-06,
      "loss": 0.0387,
      "step": 507460
    },
    {
      "epoch": 0.8305021503898196,
      "grad_norm": 1.3164379596710205,
      "learning_rate": 8.393943187733506e-06,
      "loss": 0.0365,
      "step": 507480
    },
    {
      "epoch": 0.8305348808284728,
      "grad_norm": 0.9067519903182983,
      "learning_rate": 8.393877295519988e-06,
      "loss": 0.0333,
      "step": 507500
    },
    {
      "epoch": 0.8305676112671262,
      "grad_norm": 1.0249029397964478,
      "learning_rate": 8.393811403306472e-06,
      "loss": 0.0243,
      "step": 507520
    },
    {
      "epoch": 0.8306003417057796,
      "grad_norm": 1.7245131731033325,
      "learning_rate": 8.393745511092954e-06,
      "loss": 0.0316,
      "step": 507540
    },
    {
      "epoch": 0.8306330721444328,
      "grad_norm": 0.7371415495872498,
      "learning_rate": 8.393679618879437e-06,
      "loss": 0.036,
      "step": 507560
    },
    {
      "epoch": 0.8306658025830862,
      "grad_norm": 0.8323922753334045,
      "learning_rate": 8.39361372666592e-06,
      "loss": 0.0385,
      "step": 507580
    },
    {
      "epoch": 0.8306985330217396,
      "grad_norm": 1.9460656642913818,
      "learning_rate": 8.393547834452403e-06,
      "loss": 0.0401,
      "step": 507600
    },
    {
      "epoch": 0.8307312634603929,
      "grad_norm": 0.7428408265113831,
      "learning_rate": 8.393481942238886e-06,
      "loss": 0.0308,
      "step": 507620
    },
    {
      "epoch": 0.8307639938990462,
      "grad_norm": 0.7286999225616455,
      "learning_rate": 8.39341605002537e-06,
      "loss": 0.0299,
      "step": 507640
    },
    {
      "epoch": 0.8307967243376996,
      "grad_norm": 1.0574058294296265,
      "learning_rate": 8.393350157811852e-06,
      "loss": 0.0246,
      "step": 507660
    },
    {
      "epoch": 0.830829454776353,
      "grad_norm": 1.8192108869552612,
      "learning_rate": 8.393284265598335e-06,
      "loss": 0.0287,
      "step": 507680
    },
    {
      "epoch": 0.8308621852150062,
      "grad_norm": 0.23463232815265656,
      "learning_rate": 8.393218373384819e-06,
      "loss": 0.0273,
      "step": 507700
    },
    {
      "epoch": 0.8308949156536596,
      "grad_norm": 0.6712485551834106,
      "learning_rate": 8.3931524811713e-06,
      "loss": 0.0299,
      "step": 507720
    },
    {
      "epoch": 0.830927646092313,
      "grad_norm": 0.3001472055912018,
      "learning_rate": 8.393086588957784e-06,
      "loss": 0.0283,
      "step": 507740
    },
    {
      "epoch": 0.8309603765309662,
      "grad_norm": 0.9929922819137573,
      "learning_rate": 8.393020696744266e-06,
      "loss": 0.025,
      "step": 507760
    },
    {
      "epoch": 0.8309931069696196,
      "grad_norm": 1.792840600013733,
      "learning_rate": 8.39295480453075e-06,
      "loss": 0.0438,
      "step": 507780
    },
    {
      "epoch": 0.831025837408273,
      "grad_norm": 2.082764148712158,
      "learning_rate": 8.392888912317232e-06,
      "loss": 0.0268,
      "step": 507800
    },
    {
      "epoch": 0.8310585678469263,
      "grad_norm": 0.6997177600860596,
      "learning_rate": 8.392823020103715e-06,
      "loss": 0.0296,
      "step": 507820
    },
    {
      "epoch": 0.8310912982855796,
      "grad_norm": 1.4516772031784058,
      "learning_rate": 8.392757127890197e-06,
      "loss": 0.0226,
      "step": 507840
    },
    {
      "epoch": 0.831124028724233,
      "grad_norm": 1.1215472221374512,
      "learning_rate": 8.392691235676681e-06,
      "loss": 0.0275,
      "step": 507860
    },
    {
      "epoch": 0.8311567591628863,
      "grad_norm": 1.9987683296203613,
      "learning_rate": 8.392625343463163e-06,
      "loss": 0.0291,
      "step": 507880
    },
    {
      "epoch": 0.8311894896015396,
      "grad_norm": 1.0723086595535278,
      "learning_rate": 8.392559451249646e-06,
      "loss": 0.0328,
      "step": 507900
    },
    {
      "epoch": 0.831222220040193,
      "grad_norm": 0.7762824296951294,
      "learning_rate": 8.392493559036128e-06,
      "loss": 0.0266,
      "step": 507920
    },
    {
      "epoch": 0.8312549504788463,
      "grad_norm": 1.4949791431427002,
      "learning_rate": 8.392427666822612e-06,
      "loss": 0.0287,
      "step": 507940
    },
    {
      "epoch": 0.8312876809174996,
      "grad_norm": 0.4293731153011322,
      "learning_rate": 8.392361774609095e-06,
      "loss": 0.0344,
      "step": 507960
    },
    {
      "epoch": 0.831320411356153,
      "grad_norm": 2.2102932929992676,
      "learning_rate": 8.392295882395577e-06,
      "loss": 0.0279,
      "step": 507980
    },
    {
      "epoch": 0.8313531417948063,
      "grad_norm": 1.164939522743225,
      "learning_rate": 8.392229990182061e-06,
      "loss": 0.0289,
      "step": 508000
    },
    {
      "epoch": 0.8313858722334597,
      "grad_norm": 0.4947955012321472,
      "learning_rate": 8.392164097968544e-06,
      "loss": 0.0178,
      "step": 508020
    },
    {
      "epoch": 0.831418602672113,
      "grad_norm": 0.9007487297058105,
      "learning_rate": 8.392098205755026e-06,
      "loss": 0.0325,
      "step": 508040
    },
    {
      "epoch": 0.8314513331107664,
      "grad_norm": 1.0114057064056396,
      "learning_rate": 8.39203231354151e-06,
      "loss": 0.0292,
      "step": 508060
    },
    {
      "epoch": 0.8314840635494197,
      "grad_norm": 1.5530112981796265,
      "learning_rate": 8.391966421327994e-06,
      "loss": 0.0255,
      "step": 508080
    },
    {
      "epoch": 0.831516793988073,
      "grad_norm": 1.0468553304672241,
      "learning_rate": 8.391900529114475e-06,
      "loss": 0.0311,
      "step": 508100
    },
    {
      "epoch": 0.8315495244267264,
      "grad_norm": 1.6572860479354858,
      "learning_rate": 8.391834636900959e-06,
      "loss": 0.0385,
      "step": 508120
    },
    {
      "epoch": 0.8315822548653797,
      "grad_norm": 1.020595669746399,
      "learning_rate": 8.391768744687441e-06,
      "loss": 0.0295,
      "step": 508140
    },
    {
      "epoch": 0.831614985304033,
      "grad_norm": 0.37884268164634705,
      "learning_rate": 8.391702852473924e-06,
      "loss": 0.0299,
      "step": 508160
    },
    {
      "epoch": 0.8316477157426864,
      "grad_norm": 0.8078426122665405,
      "learning_rate": 8.391636960260406e-06,
      "loss": 0.0258,
      "step": 508180
    },
    {
      "epoch": 0.8316804461813397,
      "grad_norm": 0.4649348556995392,
      "learning_rate": 8.39157106804689e-06,
      "loss": 0.025,
      "step": 508200
    },
    {
      "epoch": 0.8317131766199931,
      "grad_norm": 1.9127967357635498,
      "learning_rate": 8.391505175833372e-06,
      "loss": 0.0371,
      "step": 508220
    },
    {
      "epoch": 0.8317459070586464,
      "grad_norm": 0.5211825966835022,
      "learning_rate": 8.391439283619855e-06,
      "loss": 0.0201,
      "step": 508240
    },
    {
      "epoch": 0.8317786374972997,
      "grad_norm": 1.0227679014205933,
      "learning_rate": 8.391373391406337e-06,
      "loss": 0.0417,
      "step": 508260
    },
    {
      "epoch": 0.8318113679359531,
      "grad_norm": 0.25628843903541565,
      "learning_rate": 8.391307499192821e-06,
      "loss": 0.0295,
      "step": 508280
    },
    {
      "epoch": 0.8318440983746064,
      "grad_norm": 1.873412013053894,
      "learning_rate": 8.391241606979303e-06,
      "loss": 0.031,
      "step": 508300
    },
    {
      "epoch": 0.8318768288132597,
      "grad_norm": 1.006738543510437,
      "learning_rate": 8.391175714765786e-06,
      "loss": 0.0311,
      "step": 508320
    },
    {
      "epoch": 0.8319095592519131,
      "grad_norm": 0.8466916084289551,
      "learning_rate": 8.391109822552268e-06,
      "loss": 0.0305,
      "step": 508340
    },
    {
      "epoch": 0.8319422896905664,
      "grad_norm": 1.041965126991272,
      "learning_rate": 8.391043930338752e-06,
      "loss": 0.0393,
      "step": 508360
    },
    {
      "epoch": 0.8319750201292198,
      "grad_norm": 1.7106207609176636,
      "learning_rate": 8.390978038125235e-06,
      "loss": 0.0307,
      "step": 508380
    },
    {
      "epoch": 0.8320077505678731,
      "grad_norm": 0.9795450568199158,
      "learning_rate": 8.390912145911717e-06,
      "loss": 0.0226,
      "step": 508400
    },
    {
      "epoch": 0.8320404810065265,
      "grad_norm": 0.5235000848770142,
      "learning_rate": 8.390846253698201e-06,
      "loss": 0.0394,
      "step": 508420
    },
    {
      "epoch": 0.8320732114451798,
      "grad_norm": 1.2417045831680298,
      "learning_rate": 8.390780361484685e-06,
      "loss": 0.0337,
      "step": 508440
    },
    {
      "epoch": 0.8321059418838331,
      "grad_norm": 0.5063934922218323,
      "learning_rate": 8.390714469271168e-06,
      "loss": 0.0345,
      "step": 508460
    },
    {
      "epoch": 0.8321386723224865,
      "grad_norm": 1.00290048122406,
      "learning_rate": 8.39064857705765e-06,
      "loss": 0.041,
      "step": 508480
    },
    {
      "epoch": 0.8321714027611398,
      "grad_norm": 2.1362528800964355,
      "learning_rate": 8.390582684844134e-06,
      "loss": 0.0312,
      "step": 508500
    },
    {
      "epoch": 0.8322041331997931,
      "grad_norm": 1.0795954465866089,
      "learning_rate": 8.390516792630615e-06,
      "loss": 0.034,
      "step": 508520
    },
    {
      "epoch": 0.8322368636384465,
      "grad_norm": 2.151657819747925,
      "learning_rate": 8.390450900417099e-06,
      "loss": 0.0264,
      "step": 508540
    },
    {
      "epoch": 0.8322695940770998,
      "grad_norm": 0.15549449622631073,
      "learning_rate": 8.390385008203581e-06,
      "loss": 0.0303,
      "step": 508560
    },
    {
      "epoch": 0.8323023245157531,
      "grad_norm": 8.988984107971191,
      "learning_rate": 8.390319115990065e-06,
      "loss": 0.0374,
      "step": 508580
    },
    {
      "epoch": 0.8323350549544065,
      "grad_norm": 0.24643200635910034,
      "learning_rate": 8.390253223776546e-06,
      "loss": 0.032,
      "step": 508600
    },
    {
      "epoch": 0.8323677853930599,
      "grad_norm": 0.3835010826587677,
      "learning_rate": 8.39018733156303e-06,
      "loss": 0.0232,
      "step": 508620
    },
    {
      "epoch": 0.8324005158317132,
      "grad_norm": 0.49720650911331177,
      "learning_rate": 8.390121439349512e-06,
      "loss": 0.0412,
      "step": 508640
    },
    {
      "epoch": 0.8324332462703665,
      "grad_norm": 0.6960524916648865,
      "learning_rate": 8.390055547135996e-06,
      "loss": 0.0269,
      "step": 508660
    },
    {
      "epoch": 0.8324659767090199,
      "grad_norm": 0.882743239402771,
      "learning_rate": 8.389989654922477e-06,
      "loss": 0.0277,
      "step": 508680
    },
    {
      "epoch": 0.8324987071476732,
      "grad_norm": 0.6956207156181335,
      "learning_rate": 8.389923762708961e-06,
      "loss": 0.024,
      "step": 508700
    },
    {
      "epoch": 0.8325314375863265,
      "grad_norm": 1.905676245689392,
      "learning_rate": 8.389857870495445e-06,
      "loss": 0.0312,
      "step": 508720
    },
    {
      "epoch": 0.8325641680249799,
      "grad_norm": 1.4169186353683472,
      "learning_rate": 8.389791978281926e-06,
      "loss": 0.037,
      "step": 508740
    },
    {
      "epoch": 0.8325968984636332,
      "grad_norm": 0.8882024884223938,
      "learning_rate": 8.38972608606841e-06,
      "loss": 0.0326,
      "step": 508760
    },
    {
      "epoch": 0.8326296289022865,
      "grad_norm": 1.927177906036377,
      "learning_rate": 8.389660193854892e-06,
      "loss": 0.0293,
      "step": 508780
    },
    {
      "epoch": 0.8326623593409399,
      "grad_norm": 1.3528109788894653,
      "learning_rate": 8.389594301641376e-06,
      "loss": 0.0304,
      "step": 508800
    },
    {
      "epoch": 0.8326950897795933,
      "grad_norm": 0.7489198446273804,
      "learning_rate": 8.389528409427859e-06,
      "loss": 0.0246,
      "step": 508820
    },
    {
      "epoch": 0.8327278202182465,
      "grad_norm": 1.3015828132629395,
      "learning_rate": 8.389462517214341e-06,
      "loss": 0.0347,
      "step": 508840
    },
    {
      "epoch": 0.8327605506568999,
      "grad_norm": 0.890316367149353,
      "learning_rate": 8.389396625000825e-06,
      "loss": 0.0261,
      "step": 508860
    },
    {
      "epoch": 0.8327932810955533,
      "grad_norm": 0.4004113972187042,
      "learning_rate": 8.389330732787308e-06,
      "loss": 0.0247,
      "step": 508880
    },
    {
      "epoch": 0.8328260115342065,
      "grad_norm": 1.292371392250061,
      "learning_rate": 8.38926484057379e-06,
      "loss": 0.0176,
      "step": 508900
    },
    {
      "epoch": 0.8328587419728599,
      "grad_norm": 0.7386962175369263,
      "learning_rate": 8.389198948360274e-06,
      "loss": 0.0244,
      "step": 508920
    },
    {
      "epoch": 0.8328914724115133,
      "grad_norm": 1.4635593891143799,
      "learning_rate": 8.389133056146756e-06,
      "loss": 0.0222,
      "step": 508940
    },
    {
      "epoch": 0.8329242028501666,
      "grad_norm": 0.46104803681373596,
      "learning_rate": 8.38906716393324e-06,
      "loss": 0.0324,
      "step": 508960
    },
    {
      "epoch": 0.8329569332888199,
      "grad_norm": 2.5753707885742188,
      "learning_rate": 8.389001271719721e-06,
      "loss": 0.034,
      "step": 508980
    },
    {
      "epoch": 0.8329896637274733,
      "grad_norm": 7.801275253295898,
      "learning_rate": 8.388935379506205e-06,
      "loss": 0.0284,
      "step": 509000
    },
    {
      "epoch": 0.8330223941661267,
      "grad_norm": 0.32429298758506775,
      "learning_rate": 8.388869487292687e-06,
      "loss": 0.0302,
      "step": 509020
    },
    {
      "epoch": 0.8330551246047799,
      "grad_norm": 0.8450213074684143,
      "learning_rate": 8.38880359507917e-06,
      "loss": 0.0195,
      "step": 509040
    },
    {
      "epoch": 0.8330878550434333,
      "grad_norm": 1.72183096408844,
      "learning_rate": 8.388737702865654e-06,
      "loss": 0.0314,
      "step": 509060
    },
    {
      "epoch": 0.8331205854820867,
      "grad_norm": 1.6981028318405151,
      "learning_rate": 8.388671810652136e-06,
      "loss": 0.0333,
      "step": 509080
    },
    {
      "epoch": 0.8331533159207399,
      "grad_norm": 0.5779520273208618,
      "learning_rate": 8.38860591843862e-06,
      "loss": 0.0229,
      "step": 509100
    },
    {
      "epoch": 0.8331860463593933,
      "grad_norm": 4.613338470458984,
      "learning_rate": 8.388540026225101e-06,
      "loss": 0.0336,
      "step": 509120
    },
    {
      "epoch": 0.8332187767980467,
      "grad_norm": 0.2803056538105011,
      "learning_rate": 8.388474134011585e-06,
      "loss": 0.0289,
      "step": 509140
    },
    {
      "epoch": 0.8332515072366999,
      "grad_norm": 0.30816027522087097,
      "learning_rate": 8.388408241798067e-06,
      "loss": 0.03,
      "step": 509160
    },
    {
      "epoch": 0.8332842376753533,
      "grad_norm": 3.099130392074585,
      "learning_rate": 8.38834234958455e-06,
      "loss": 0.0248,
      "step": 509180
    },
    {
      "epoch": 0.8333169681140067,
      "grad_norm": 1.9524247646331787,
      "learning_rate": 8.388276457371034e-06,
      "loss": 0.0178,
      "step": 509200
    },
    {
      "epoch": 0.8333496985526601,
      "grad_norm": 0.9216530919075012,
      "learning_rate": 8.388210565157516e-06,
      "loss": 0.0352,
      "step": 509220
    },
    {
      "epoch": 0.8333824289913133,
      "grad_norm": 0.5536396503448486,
      "learning_rate": 8.388144672944e-06,
      "loss": 0.0312,
      "step": 509240
    },
    {
      "epoch": 0.8334151594299667,
      "grad_norm": 1.4574774503707886,
      "learning_rate": 8.388078780730483e-06,
      "loss": 0.0198,
      "step": 509260
    },
    {
      "epoch": 0.8334478898686201,
      "grad_norm": 0.44500622153282166,
      "learning_rate": 8.388012888516965e-06,
      "loss": 0.0314,
      "step": 509280
    },
    {
      "epoch": 0.8334806203072733,
      "grad_norm": 0.8003162741661072,
      "learning_rate": 8.387946996303448e-06,
      "loss": 0.0245,
      "step": 509300
    },
    {
      "epoch": 0.8335133507459267,
      "grad_norm": 0.22514018416404724,
      "learning_rate": 8.38788110408993e-06,
      "loss": 0.0291,
      "step": 509320
    },
    {
      "epoch": 0.8335460811845801,
      "grad_norm": 1.7782403230667114,
      "learning_rate": 8.387815211876414e-06,
      "loss": 0.03,
      "step": 509340
    },
    {
      "epoch": 0.8335788116232333,
      "grad_norm": 0.678241491317749,
      "learning_rate": 8.387749319662896e-06,
      "loss": 0.0291,
      "step": 509360
    },
    {
      "epoch": 0.8336115420618867,
      "grad_norm": 1.1734181642532349,
      "learning_rate": 8.38768342744938e-06,
      "loss": 0.0295,
      "step": 509380
    },
    {
      "epoch": 0.8336442725005401,
      "grad_norm": 1.0831058025360107,
      "learning_rate": 8.387617535235863e-06,
      "loss": 0.029,
      "step": 509400
    },
    {
      "epoch": 0.8336770029391934,
      "grad_norm": 0.9663279056549072,
      "learning_rate": 8.387551643022345e-06,
      "loss": 0.0283,
      "step": 509420
    },
    {
      "epoch": 0.8337097333778467,
      "grad_norm": 3.014052391052246,
      "learning_rate": 8.387485750808828e-06,
      "loss": 0.0289,
      "step": 509440
    },
    {
      "epoch": 0.8337424638165001,
      "grad_norm": 0.8673843741416931,
      "learning_rate": 8.38741985859531e-06,
      "loss": 0.0265,
      "step": 509460
    },
    {
      "epoch": 0.8337751942551535,
      "grad_norm": 0.6889600157737732,
      "learning_rate": 8.387353966381794e-06,
      "loss": 0.0196,
      "step": 509480
    },
    {
      "epoch": 0.8338079246938067,
      "grad_norm": 2.3400611877441406,
      "learning_rate": 8.387288074168276e-06,
      "loss": 0.0274,
      "step": 509500
    },
    {
      "epoch": 0.8338406551324601,
      "grad_norm": 1.0449049472808838,
      "learning_rate": 8.38722218195476e-06,
      "loss": 0.0367,
      "step": 509520
    },
    {
      "epoch": 0.8338733855711135,
      "grad_norm": 0.3042447865009308,
      "learning_rate": 8.387156289741241e-06,
      "loss": 0.0371,
      "step": 509540
    },
    {
      "epoch": 0.8339061160097667,
      "grad_norm": 1.446303367614746,
      "learning_rate": 8.387090397527725e-06,
      "loss": 0.0288,
      "step": 509560
    },
    {
      "epoch": 0.8339388464484201,
      "grad_norm": 0.22196999192237854,
      "learning_rate": 8.387024505314207e-06,
      "loss": 0.0317,
      "step": 509580
    },
    {
      "epoch": 0.8339715768870735,
      "grad_norm": 0.5163787603378296,
      "learning_rate": 8.38695861310069e-06,
      "loss": 0.0359,
      "step": 509600
    },
    {
      "epoch": 0.8340043073257268,
      "grad_norm": 1.0233862400054932,
      "learning_rate": 8.386892720887174e-06,
      "loss": 0.0283,
      "step": 509620
    },
    {
      "epoch": 0.8340370377643801,
      "grad_norm": 1.9892122745513916,
      "learning_rate": 8.386826828673656e-06,
      "loss": 0.0293,
      "step": 509640
    },
    {
      "epoch": 0.8340697682030335,
      "grad_norm": 2.395167112350464,
      "learning_rate": 8.38676093646014e-06,
      "loss": 0.0279,
      "step": 509660
    },
    {
      "epoch": 0.8341024986416868,
      "grad_norm": 2.5455105304718018,
      "learning_rate": 8.386695044246623e-06,
      "loss": 0.03,
      "step": 509680
    },
    {
      "epoch": 0.8341352290803401,
      "grad_norm": 1.562048077583313,
      "learning_rate": 8.386629152033105e-06,
      "loss": 0.0287,
      "step": 509700
    },
    {
      "epoch": 0.8341679595189935,
      "grad_norm": 0.5047103762626648,
      "learning_rate": 8.386563259819588e-06,
      "loss": 0.0234,
      "step": 509720
    },
    {
      "epoch": 0.8342006899576468,
      "grad_norm": 4.519892692565918,
      "learning_rate": 8.38649736760607e-06,
      "loss": 0.0278,
      "step": 509740
    },
    {
      "epoch": 0.8342334203963001,
      "grad_norm": 0.7250687479972839,
      "learning_rate": 8.386431475392554e-06,
      "loss": 0.0244,
      "step": 509760
    },
    {
      "epoch": 0.8342661508349535,
      "grad_norm": 0.28831472992897034,
      "learning_rate": 8.386365583179037e-06,
      "loss": 0.0286,
      "step": 509780
    },
    {
      "epoch": 0.8342988812736069,
      "grad_norm": 4.9802422523498535,
      "learning_rate": 8.38629969096552e-06,
      "loss": 0.043,
      "step": 509800
    },
    {
      "epoch": 0.8343316117122601,
      "grad_norm": 0.17529283463954926,
      "learning_rate": 8.386233798752003e-06,
      "loss": 0.0263,
      "step": 509820
    },
    {
      "epoch": 0.8343643421509135,
      "grad_norm": 1.285636305809021,
      "learning_rate": 8.386167906538485e-06,
      "loss": 0.0274,
      "step": 509840
    },
    {
      "epoch": 0.8343970725895669,
      "grad_norm": 0.7718988656997681,
      "learning_rate": 8.386102014324968e-06,
      "loss": 0.0365,
      "step": 509860
    },
    {
      "epoch": 0.8344298030282202,
      "grad_norm": 1.0049595832824707,
      "learning_rate": 8.38603612211145e-06,
      "loss": 0.037,
      "step": 509880
    },
    {
      "epoch": 0.8344625334668735,
      "grad_norm": 0.49486029148101807,
      "learning_rate": 8.385970229897934e-06,
      "loss": 0.0232,
      "step": 509900
    },
    {
      "epoch": 0.8344952639055269,
      "grad_norm": 2.87076997756958,
      "learning_rate": 8.385904337684416e-06,
      "loss": 0.0285,
      "step": 509920
    },
    {
      "epoch": 0.8345279943441802,
      "grad_norm": 1.458617925643921,
      "learning_rate": 8.3858384454709e-06,
      "loss": 0.0261,
      "step": 509940
    },
    {
      "epoch": 0.8345607247828335,
      "grad_norm": 1.4635305404663086,
      "learning_rate": 8.385772553257381e-06,
      "loss": 0.0314,
      "step": 509960
    },
    {
      "epoch": 0.8345934552214869,
      "grad_norm": 0.9221687912940979,
      "learning_rate": 8.385706661043865e-06,
      "loss": 0.0325,
      "step": 509980
    },
    {
      "epoch": 0.8346261856601402,
      "grad_norm": 1.3876895904541016,
      "learning_rate": 8.385640768830348e-06,
      "loss": 0.028,
      "step": 510000
    },
    {
      "epoch": 0.8346589160987935,
      "grad_norm": 1.2525113821029663,
      "learning_rate": 8.38557487661683e-06,
      "loss": 0.0255,
      "step": 510020
    },
    {
      "epoch": 0.8346916465374469,
      "grad_norm": 1.3898452520370483,
      "learning_rate": 8.385508984403314e-06,
      "loss": 0.0294,
      "step": 510040
    },
    {
      "epoch": 0.8347243769761002,
      "grad_norm": 1.3742152452468872,
      "learning_rate": 8.385443092189797e-06,
      "loss": 0.0454,
      "step": 510060
    },
    {
      "epoch": 0.8347571074147536,
      "grad_norm": 0.775740921497345,
      "learning_rate": 8.38537719997628e-06,
      "loss": 0.0316,
      "step": 510080
    },
    {
      "epoch": 0.8347898378534069,
      "grad_norm": 1.937695860862732,
      "learning_rate": 8.385311307762763e-06,
      "loss": 0.0398,
      "step": 510100
    },
    {
      "epoch": 0.8348225682920603,
      "grad_norm": 0.4330938756465912,
      "learning_rate": 8.385245415549247e-06,
      "loss": 0.0347,
      "step": 510120
    },
    {
      "epoch": 0.8348552987307136,
      "grad_norm": 1.5704052448272705,
      "learning_rate": 8.385179523335728e-06,
      "loss": 0.0231,
      "step": 510140
    },
    {
      "epoch": 0.8348880291693669,
      "grad_norm": 5.4024481773376465,
      "learning_rate": 8.385113631122212e-06,
      "loss": 0.0406,
      "step": 510160
    },
    {
      "epoch": 0.8349207596080203,
      "grad_norm": 1.4902777671813965,
      "learning_rate": 8.385047738908694e-06,
      "loss": 0.0228,
      "step": 510180
    },
    {
      "epoch": 0.8349534900466736,
      "grad_norm": 0.7814164757728577,
      "learning_rate": 8.384981846695177e-06,
      "loss": 0.0265,
      "step": 510200
    },
    {
      "epoch": 0.8349862204853269,
      "grad_norm": 0.47429442405700684,
      "learning_rate": 8.38491595448166e-06,
      "loss": 0.0302,
      "step": 510220
    },
    {
      "epoch": 0.8350189509239803,
      "grad_norm": 0.9331256151199341,
      "learning_rate": 8.384850062268143e-06,
      "loss": 0.0269,
      "step": 510240
    },
    {
      "epoch": 0.8350516813626336,
      "grad_norm": 0.5796529650688171,
      "learning_rate": 8.384784170054625e-06,
      "loss": 0.0337,
      "step": 510260
    },
    {
      "epoch": 0.835084411801287,
      "grad_norm": 0.24133329093456268,
      "learning_rate": 8.384718277841108e-06,
      "loss": 0.0398,
      "step": 510280
    },
    {
      "epoch": 0.8351171422399403,
      "grad_norm": 1.5123730897903442,
      "learning_rate": 8.38465238562759e-06,
      "loss": 0.0314,
      "step": 510300
    },
    {
      "epoch": 0.8351498726785936,
      "grad_norm": 1.1292093992233276,
      "learning_rate": 8.384586493414074e-06,
      "loss": 0.0328,
      "step": 510320
    },
    {
      "epoch": 0.835182603117247,
      "grad_norm": 1.2359188795089722,
      "learning_rate": 8.384520601200556e-06,
      "loss": 0.0351,
      "step": 510340
    },
    {
      "epoch": 0.8352153335559003,
      "grad_norm": 1.962872862815857,
      "learning_rate": 8.38445470898704e-06,
      "loss": 0.0349,
      "step": 510360
    },
    {
      "epoch": 0.8352480639945536,
      "grad_norm": 0.5372455716133118,
      "learning_rate": 8.384388816773521e-06,
      "loss": 0.0192,
      "step": 510380
    },
    {
      "epoch": 0.835280794433207,
      "grad_norm": 0.7754606604576111,
      "learning_rate": 8.384322924560005e-06,
      "loss": 0.0241,
      "step": 510400
    },
    {
      "epoch": 0.8353135248718603,
      "grad_norm": 0.4156379997730255,
      "learning_rate": 8.384257032346488e-06,
      "loss": 0.0259,
      "step": 510420
    },
    {
      "epoch": 0.8353462553105137,
      "grad_norm": 1.3714909553527832,
      "learning_rate": 8.38419114013297e-06,
      "loss": 0.0273,
      "step": 510440
    },
    {
      "epoch": 0.835378985749167,
      "grad_norm": 1.5063358545303345,
      "learning_rate": 8.384125247919454e-06,
      "loss": 0.0381,
      "step": 510460
    },
    {
      "epoch": 0.8354117161878204,
      "grad_norm": 0.24609725177288055,
      "learning_rate": 8.384059355705938e-06,
      "loss": 0.0234,
      "step": 510480
    },
    {
      "epoch": 0.8354444466264737,
      "grad_norm": 1.9557777643203735,
      "learning_rate": 8.38399346349242e-06,
      "loss": 0.0252,
      "step": 510500
    },
    {
      "epoch": 0.835477177065127,
      "grad_norm": 0.8888383507728577,
      "learning_rate": 8.383927571278903e-06,
      "loss": 0.0317,
      "step": 510520
    },
    {
      "epoch": 0.8355099075037804,
      "grad_norm": 1.5529354810714722,
      "learning_rate": 8.383861679065387e-06,
      "loss": 0.0368,
      "step": 510540
    },
    {
      "epoch": 0.8355426379424337,
      "grad_norm": 0.6956251263618469,
      "learning_rate": 8.383795786851869e-06,
      "loss": 0.0315,
      "step": 510560
    },
    {
      "epoch": 0.835575368381087,
      "grad_norm": 0.677039384841919,
      "learning_rate": 8.383729894638352e-06,
      "loss": 0.0244,
      "step": 510580
    },
    {
      "epoch": 0.8356080988197404,
      "grad_norm": 1.3344225883483887,
      "learning_rate": 8.383664002424834e-06,
      "loss": 0.0302,
      "step": 510600
    },
    {
      "epoch": 0.8356408292583937,
      "grad_norm": 0.8719430565834045,
      "learning_rate": 8.383598110211318e-06,
      "loss": 0.0394,
      "step": 510620
    },
    {
      "epoch": 0.835673559697047,
      "grad_norm": 0.8489274382591248,
      "learning_rate": 8.3835322179978e-06,
      "loss": 0.0239,
      "step": 510640
    },
    {
      "epoch": 0.8357062901357004,
      "grad_norm": 1.3218179941177368,
      "learning_rate": 8.383466325784283e-06,
      "loss": 0.0321,
      "step": 510660
    },
    {
      "epoch": 0.8357390205743538,
      "grad_norm": 0.7651053071022034,
      "learning_rate": 8.383400433570765e-06,
      "loss": 0.0286,
      "step": 510680
    },
    {
      "epoch": 0.835771751013007,
      "grad_norm": 1.2437636852264404,
      "learning_rate": 8.383334541357249e-06,
      "loss": 0.0181,
      "step": 510700
    },
    {
      "epoch": 0.8358044814516604,
      "grad_norm": 1.334800362586975,
      "learning_rate": 8.38326864914373e-06,
      "loss": 0.0365,
      "step": 510720
    },
    {
      "epoch": 0.8358372118903138,
      "grad_norm": 1.0724914073944092,
      "learning_rate": 8.383202756930214e-06,
      "loss": 0.0234,
      "step": 510740
    },
    {
      "epoch": 0.835869942328967,
      "grad_norm": 1.0983561277389526,
      "learning_rate": 8.383136864716696e-06,
      "loss": 0.0332,
      "step": 510760
    },
    {
      "epoch": 0.8359026727676204,
      "grad_norm": 1.403283953666687,
      "learning_rate": 8.38307097250318e-06,
      "loss": 0.0188,
      "step": 510780
    },
    {
      "epoch": 0.8359354032062738,
      "grad_norm": 0.6928563117980957,
      "learning_rate": 8.383005080289663e-06,
      "loss": 0.0257,
      "step": 510800
    },
    {
      "epoch": 0.8359681336449271,
      "grad_norm": 1.0318621397018433,
      "learning_rate": 8.382939188076145e-06,
      "loss": 0.0358,
      "step": 510820
    },
    {
      "epoch": 0.8360008640835804,
      "grad_norm": 1.014201045036316,
      "learning_rate": 8.382873295862629e-06,
      "loss": 0.0268,
      "step": 510840
    },
    {
      "epoch": 0.8360335945222338,
      "grad_norm": 0.6618436574935913,
      "learning_rate": 8.382807403649112e-06,
      "loss": 0.0247,
      "step": 510860
    },
    {
      "epoch": 0.8360663249608872,
      "grad_norm": 0.3717607855796814,
      "learning_rate": 8.382741511435594e-06,
      "loss": 0.0174,
      "step": 510880
    },
    {
      "epoch": 0.8360990553995404,
      "grad_norm": 0.6960093379020691,
      "learning_rate": 8.382675619222078e-06,
      "loss": 0.0285,
      "step": 510900
    },
    {
      "epoch": 0.8361317858381938,
      "grad_norm": 0.8259488344192505,
      "learning_rate": 8.382609727008561e-06,
      "loss": 0.0191,
      "step": 510920
    },
    {
      "epoch": 0.8361645162768472,
      "grad_norm": 3.6990020275115967,
      "learning_rate": 8.382543834795043e-06,
      "loss": 0.0242,
      "step": 510940
    },
    {
      "epoch": 0.8361972467155004,
      "grad_norm": 1.8639295101165771,
      "learning_rate": 8.382477942581527e-06,
      "loss": 0.0286,
      "step": 510960
    },
    {
      "epoch": 0.8362299771541538,
      "grad_norm": 0.42303287982940674,
      "learning_rate": 8.382412050368009e-06,
      "loss": 0.0294,
      "step": 510980
    },
    {
      "epoch": 0.8362627075928072,
      "grad_norm": 1.1851387023925781,
      "learning_rate": 8.382346158154492e-06,
      "loss": 0.0281,
      "step": 511000
    },
    {
      "epoch": 0.8362954380314604,
      "grad_norm": 0.5350069999694824,
      "learning_rate": 8.382280265940974e-06,
      "loss": 0.0318,
      "step": 511020
    },
    {
      "epoch": 0.8363281684701138,
      "grad_norm": 0.45421311259269714,
      "learning_rate": 8.382214373727458e-06,
      "loss": 0.0218,
      "step": 511040
    },
    {
      "epoch": 0.8363608989087672,
      "grad_norm": 0.4611136317253113,
      "learning_rate": 8.38214848151394e-06,
      "loss": 0.0286,
      "step": 511060
    },
    {
      "epoch": 0.8363936293474206,
      "grad_norm": 0.508459210395813,
      "learning_rate": 8.382082589300423e-06,
      "loss": 0.0292,
      "step": 511080
    },
    {
      "epoch": 0.8364263597860738,
      "grad_norm": 0.5615231990814209,
      "learning_rate": 8.382016697086905e-06,
      "loss": 0.0315,
      "step": 511100
    },
    {
      "epoch": 0.8364590902247272,
      "grad_norm": 0.5150112509727478,
      "learning_rate": 8.381950804873389e-06,
      "loss": 0.0341,
      "step": 511120
    },
    {
      "epoch": 0.8364918206633806,
      "grad_norm": 0.9458470344543457,
      "learning_rate": 8.38188491265987e-06,
      "loss": 0.0254,
      "step": 511140
    },
    {
      "epoch": 0.8365245511020338,
      "grad_norm": 2.0848276615142822,
      "learning_rate": 8.381819020446354e-06,
      "loss": 0.0344,
      "step": 511160
    },
    {
      "epoch": 0.8365572815406872,
      "grad_norm": 0.7744044065475464,
      "learning_rate": 8.381753128232838e-06,
      "loss": 0.0348,
      "step": 511180
    },
    {
      "epoch": 0.8365900119793406,
      "grad_norm": 1.0742335319519043,
      "learning_rate": 8.38168723601932e-06,
      "loss": 0.0333,
      "step": 511200
    },
    {
      "epoch": 0.8366227424179938,
      "grad_norm": 0.6640097498893738,
      "learning_rate": 8.381621343805803e-06,
      "loss": 0.0246,
      "step": 511220
    },
    {
      "epoch": 0.8366554728566472,
      "grad_norm": 0.677769124507904,
      "learning_rate": 8.381555451592287e-06,
      "loss": 0.0311,
      "step": 511240
    },
    {
      "epoch": 0.8366882032953006,
      "grad_norm": 0.7509567737579346,
      "learning_rate": 8.381489559378769e-06,
      "loss": 0.03,
      "step": 511260
    },
    {
      "epoch": 0.836720933733954,
      "grad_norm": 1.5753934383392334,
      "learning_rate": 8.381423667165252e-06,
      "loss": 0.023,
      "step": 511280
    },
    {
      "epoch": 0.8367536641726072,
      "grad_norm": 1.0220547914505005,
      "learning_rate": 8.381357774951736e-06,
      "loss": 0.0357,
      "step": 511300
    },
    {
      "epoch": 0.8367863946112606,
      "grad_norm": 0.8399180769920349,
      "learning_rate": 8.381291882738218e-06,
      "loss": 0.0312,
      "step": 511320
    },
    {
      "epoch": 0.836819125049914,
      "grad_norm": 0.6070336699485779,
      "learning_rate": 8.381225990524701e-06,
      "loss": 0.0298,
      "step": 511340
    },
    {
      "epoch": 0.8368518554885672,
      "grad_norm": 0.3550108075141907,
      "learning_rate": 8.381160098311183e-06,
      "loss": 0.0339,
      "step": 511360
    },
    {
      "epoch": 0.8368845859272206,
      "grad_norm": 0.4189869165420532,
      "learning_rate": 8.381094206097667e-06,
      "loss": 0.0228,
      "step": 511380
    },
    {
      "epoch": 0.836917316365874,
      "grad_norm": 0.4445803761482239,
      "learning_rate": 8.381028313884149e-06,
      "loss": 0.0396,
      "step": 511400
    },
    {
      "epoch": 0.8369500468045272,
      "grad_norm": 0.7208763957023621,
      "learning_rate": 8.380962421670632e-06,
      "loss": 0.0239,
      "step": 511420
    },
    {
      "epoch": 0.8369827772431806,
      "grad_norm": 0.20283208787441254,
      "learning_rate": 8.380896529457114e-06,
      "loss": 0.022,
      "step": 511440
    },
    {
      "epoch": 0.837015507681834,
      "grad_norm": 1.529430627822876,
      "learning_rate": 8.380830637243598e-06,
      "loss": 0.0321,
      "step": 511460
    },
    {
      "epoch": 0.8370482381204873,
      "grad_norm": 1.257434606552124,
      "learning_rate": 8.38076474503008e-06,
      "loss": 0.0271,
      "step": 511480
    },
    {
      "epoch": 0.8370809685591406,
      "grad_norm": 1.5399996042251587,
      "learning_rate": 8.380698852816563e-06,
      "loss": 0.0292,
      "step": 511500
    },
    {
      "epoch": 0.837113698997794,
      "grad_norm": 2.3578999042510986,
      "learning_rate": 8.380632960603047e-06,
      "loss": 0.0252,
      "step": 511520
    },
    {
      "epoch": 0.8371464294364473,
      "grad_norm": 0.8605734705924988,
      "learning_rate": 8.380567068389529e-06,
      "loss": 0.0267,
      "step": 511540
    },
    {
      "epoch": 0.8371791598751006,
      "grad_norm": 1.3644492626190186,
      "learning_rate": 8.380501176176012e-06,
      "loss": 0.0162,
      "step": 511560
    },
    {
      "epoch": 0.837211890313754,
      "grad_norm": 0.4108603000640869,
      "learning_rate": 8.380435283962494e-06,
      "loss": 0.0222,
      "step": 511580
    },
    {
      "epoch": 0.8372446207524074,
      "grad_norm": 0.8317936062812805,
      "learning_rate": 8.380369391748978e-06,
      "loss": 0.0254,
      "step": 511600
    },
    {
      "epoch": 0.8372773511910606,
      "grad_norm": 1.6560277938842773,
      "learning_rate": 8.38030349953546e-06,
      "loss": 0.0349,
      "step": 511620
    },
    {
      "epoch": 0.837310081629714,
      "grad_norm": 0.5685932040214539,
      "learning_rate": 8.380237607321943e-06,
      "loss": 0.0254,
      "step": 511640
    },
    {
      "epoch": 0.8373428120683674,
      "grad_norm": 1.4799911975860596,
      "learning_rate": 8.380171715108427e-06,
      "loss": 0.0382,
      "step": 511660
    },
    {
      "epoch": 0.8373755425070207,
      "grad_norm": 1.4254494905471802,
      "learning_rate": 8.380105822894909e-06,
      "loss": 0.0287,
      "step": 511680
    },
    {
      "epoch": 0.837408272945674,
      "grad_norm": 1.082349181175232,
      "learning_rate": 8.380039930681392e-06,
      "loss": 0.0309,
      "step": 511700
    },
    {
      "epoch": 0.8374410033843274,
      "grad_norm": 0.6776260137557983,
      "learning_rate": 8.379974038467876e-06,
      "loss": 0.0453,
      "step": 511720
    },
    {
      "epoch": 0.8374737338229807,
      "grad_norm": 1.2735708951950073,
      "learning_rate": 8.379908146254358e-06,
      "loss": 0.0292,
      "step": 511740
    },
    {
      "epoch": 0.837506464261634,
      "grad_norm": 0.796491265296936,
      "learning_rate": 8.379842254040841e-06,
      "loss": 0.0291,
      "step": 511760
    },
    {
      "epoch": 0.8375391947002874,
      "grad_norm": 1.2377516031265259,
      "learning_rate": 8.379776361827323e-06,
      "loss": 0.0354,
      "step": 511780
    },
    {
      "epoch": 0.8375719251389407,
      "grad_norm": 0.8215429186820984,
      "learning_rate": 8.379710469613807e-06,
      "loss": 0.034,
      "step": 511800
    },
    {
      "epoch": 0.837604655577594,
      "grad_norm": 0.425553560256958,
      "learning_rate": 8.379644577400289e-06,
      "loss": 0.0255,
      "step": 511820
    },
    {
      "epoch": 0.8376373860162474,
      "grad_norm": 1.058790683746338,
      "learning_rate": 8.379578685186772e-06,
      "loss": 0.0357,
      "step": 511840
    },
    {
      "epoch": 0.8376701164549007,
      "grad_norm": 0.7520353198051453,
      "learning_rate": 8.379512792973254e-06,
      "loss": 0.0336,
      "step": 511860
    },
    {
      "epoch": 0.8377028468935541,
      "grad_norm": 0.5682500600814819,
      "learning_rate": 8.379446900759738e-06,
      "loss": 0.0357,
      "step": 511880
    },
    {
      "epoch": 0.8377355773322074,
      "grad_norm": 0.5425479412078857,
      "learning_rate": 8.379381008546221e-06,
      "loss": 0.0232,
      "step": 511900
    },
    {
      "epoch": 0.8377683077708608,
      "grad_norm": 1.0394734144210815,
      "learning_rate": 8.379315116332703e-06,
      "loss": 0.0271,
      "step": 511920
    },
    {
      "epoch": 0.8378010382095141,
      "grad_norm": 0.661340594291687,
      "learning_rate": 8.379249224119187e-06,
      "loss": 0.0222,
      "step": 511940
    },
    {
      "epoch": 0.8378337686481674,
      "grad_norm": 0.9793447852134705,
      "learning_rate": 8.379183331905669e-06,
      "loss": 0.025,
      "step": 511960
    },
    {
      "epoch": 0.8378664990868208,
      "grad_norm": 0.41789624094963074,
      "learning_rate": 8.379117439692152e-06,
      "loss": 0.0315,
      "step": 511980
    },
    {
      "epoch": 0.8378992295254741,
      "grad_norm": 0.577975332736969,
      "learning_rate": 8.379051547478634e-06,
      "loss": 0.0354,
      "step": 512000
    },
    {
      "epoch": 0.8379319599641274,
      "grad_norm": 1.1439803838729858,
      "learning_rate": 8.378985655265118e-06,
      "loss": 0.0274,
      "step": 512020
    },
    {
      "epoch": 0.8379646904027808,
      "grad_norm": 1.3749722242355347,
      "learning_rate": 8.378919763051601e-06,
      "loss": 0.0329,
      "step": 512040
    },
    {
      "epoch": 0.8379974208414341,
      "grad_norm": 0.860474705696106,
      "learning_rate": 8.378853870838083e-06,
      "loss": 0.0291,
      "step": 512060
    },
    {
      "epoch": 0.8380301512800875,
      "grad_norm": 0.5701127648353577,
      "learning_rate": 8.378787978624567e-06,
      "loss": 0.0275,
      "step": 512080
    },
    {
      "epoch": 0.8380628817187408,
      "grad_norm": 0.7686430215835571,
      "learning_rate": 8.37872208641105e-06,
      "loss": 0.029,
      "step": 512100
    },
    {
      "epoch": 0.8380956121573941,
      "grad_norm": 0.7758325338363647,
      "learning_rate": 8.378656194197532e-06,
      "loss": 0.0271,
      "step": 512120
    },
    {
      "epoch": 0.8381283425960475,
      "grad_norm": 0.7968153953552246,
      "learning_rate": 8.378590301984016e-06,
      "loss": 0.029,
      "step": 512140
    },
    {
      "epoch": 0.8381610730347008,
      "grad_norm": 1.1317510604858398,
      "learning_rate": 8.378524409770498e-06,
      "loss": 0.0314,
      "step": 512160
    },
    {
      "epoch": 0.8381938034733541,
      "grad_norm": 2.3142518997192383,
      "learning_rate": 8.378458517556981e-06,
      "loss": 0.0328,
      "step": 512180
    },
    {
      "epoch": 0.8382265339120075,
      "grad_norm": 0.6464798450469971,
      "learning_rate": 8.378392625343463e-06,
      "loss": 0.0264,
      "step": 512200
    },
    {
      "epoch": 0.8382592643506608,
      "grad_norm": 0.4492735266685486,
      "learning_rate": 8.378326733129947e-06,
      "loss": 0.0317,
      "step": 512220
    },
    {
      "epoch": 0.8382919947893142,
      "grad_norm": 2.0923101902008057,
      "learning_rate": 8.37826084091643e-06,
      "loss": 0.0329,
      "step": 512240
    },
    {
      "epoch": 0.8383247252279675,
      "grad_norm": 0.3003305196762085,
      "learning_rate": 8.378194948702912e-06,
      "loss": 0.0396,
      "step": 512260
    },
    {
      "epoch": 0.8383574556666209,
      "grad_norm": 0.3388361930847168,
      "learning_rate": 8.378129056489396e-06,
      "loss": 0.0261,
      "step": 512280
    },
    {
      "epoch": 0.8383901861052742,
      "grad_norm": 1.4407509565353394,
      "learning_rate": 8.378063164275878e-06,
      "loss": 0.0327,
      "step": 512300
    },
    {
      "epoch": 0.8384229165439275,
      "grad_norm": 1.4967783689498901,
      "learning_rate": 8.377997272062361e-06,
      "loss": 0.0346,
      "step": 512320
    },
    {
      "epoch": 0.8384556469825809,
      "grad_norm": 1.027443289756775,
      "learning_rate": 8.377931379848843e-06,
      "loss": 0.0477,
      "step": 512340
    },
    {
      "epoch": 0.8384883774212342,
      "grad_norm": 0.7516555190086365,
      "learning_rate": 8.377865487635327e-06,
      "loss": 0.0199,
      "step": 512360
    },
    {
      "epoch": 0.8385211078598875,
      "grad_norm": 0.9874030947685242,
      "learning_rate": 8.377799595421809e-06,
      "loss": 0.0316,
      "step": 512380
    },
    {
      "epoch": 0.8385538382985409,
      "grad_norm": 0.850994884967804,
      "learning_rate": 8.377733703208292e-06,
      "loss": 0.0241,
      "step": 512400
    },
    {
      "epoch": 0.8385865687371942,
      "grad_norm": 0.35356518626213074,
      "learning_rate": 8.377667810994774e-06,
      "loss": 0.0247,
      "step": 512420
    },
    {
      "epoch": 0.8386192991758475,
      "grad_norm": 1.180534839630127,
      "learning_rate": 8.377601918781258e-06,
      "loss": 0.0233,
      "step": 512440
    },
    {
      "epoch": 0.8386520296145009,
      "grad_norm": 27.748374938964844,
      "learning_rate": 8.377536026567741e-06,
      "loss": 0.0405,
      "step": 512460
    },
    {
      "epoch": 0.8386847600531542,
      "grad_norm": 0.5282034873962402,
      "learning_rate": 8.377470134354223e-06,
      "loss": 0.0306,
      "step": 512480
    },
    {
      "epoch": 0.8387174904918075,
      "grad_norm": 1.1991703510284424,
      "learning_rate": 8.377404242140707e-06,
      "loss": 0.0302,
      "step": 512500
    },
    {
      "epoch": 0.8387502209304609,
      "grad_norm": 2.0984818935394287,
      "learning_rate": 8.37733834992719e-06,
      "loss": 0.0289,
      "step": 512520
    },
    {
      "epoch": 0.8387829513691143,
      "grad_norm": 1.5173183679580688,
      "learning_rate": 8.377272457713672e-06,
      "loss": 0.0252,
      "step": 512540
    },
    {
      "epoch": 0.8388156818077676,
      "grad_norm": 0.751848042011261,
      "learning_rate": 8.377206565500156e-06,
      "loss": 0.0332,
      "step": 512560
    },
    {
      "epoch": 0.8388484122464209,
      "grad_norm": 1.6840482950210571,
      "learning_rate": 8.37714067328664e-06,
      "loss": 0.0326,
      "step": 512580
    },
    {
      "epoch": 0.8388811426850743,
      "grad_norm": 0.5974200963973999,
      "learning_rate": 8.377074781073122e-06,
      "loss": 0.0245,
      "step": 512600
    },
    {
      "epoch": 0.8389138731237276,
      "grad_norm": 0.39465150237083435,
      "learning_rate": 8.377008888859605e-06,
      "loss": 0.0298,
      "step": 512620
    },
    {
      "epoch": 0.8389466035623809,
      "grad_norm": 0.29437631368637085,
      "learning_rate": 8.376942996646087e-06,
      "loss": 0.0363,
      "step": 512640
    },
    {
      "epoch": 0.8389793340010343,
      "grad_norm": 2.010658025741577,
      "learning_rate": 8.37687710443257e-06,
      "loss": 0.0305,
      "step": 512660
    },
    {
      "epoch": 0.8390120644396876,
      "grad_norm": 0.2976754605770111,
      "learning_rate": 8.376811212219052e-06,
      "loss": 0.0295,
      "step": 512680
    },
    {
      "epoch": 0.8390447948783409,
      "grad_norm": 0.27869701385498047,
      "learning_rate": 8.376745320005536e-06,
      "loss": 0.029,
      "step": 512700
    },
    {
      "epoch": 0.8390775253169943,
      "grad_norm": 1.4564599990844727,
      "learning_rate": 8.376679427792018e-06,
      "loss": 0.0368,
      "step": 512720
    },
    {
      "epoch": 0.8391102557556477,
      "grad_norm": 0.26856568455696106,
      "learning_rate": 8.376613535578502e-06,
      "loss": 0.026,
      "step": 512740
    },
    {
      "epoch": 0.8391429861943009,
      "grad_norm": 0.5182256102561951,
      "learning_rate": 8.376547643364983e-06,
      "loss": 0.0304,
      "step": 512760
    },
    {
      "epoch": 0.8391757166329543,
      "grad_norm": 0.3703884482383728,
      "learning_rate": 8.376481751151467e-06,
      "loss": 0.0275,
      "step": 512780
    },
    {
      "epoch": 0.8392084470716077,
      "grad_norm": 0.6315441131591797,
      "learning_rate": 8.376415858937949e-06,
      "loss": 0.029,
      "step": 512800
    },
    {
      "epoch": 0.839241177510261,
      "grad_norm": 1.5018067359924316,
      "learning_rate": 8.376349966724432e-06,
      "loss": 0.0292,
      "step": 512820
    },
    {
      "epoch": 0.8392739079489143,
      "grad_norm": 0.7851388454437256,
      "learning_rate": 8.376284074510916e-06,
      "loss": 0.0262,
      "step": 512840
    },
    {
      "epoch": 0.8393066383875677,
      "grad_norm": 0.7448118925094604,
      "learning_rate": 8.376218182297398e-06,
      "loss": 0.0361,
      "step": 512860
    },
    {
      "epoch": 0.839339368826221,
      "grad_norm": 0.4155130386352539,
      "learning_rate": 8.376152290083882e-06,
      "loss": 0.026,
      "step": 512880
    },
    {
      "epoch": 0.8393720992648743,
      "grad_norm": 1.5926498174667358,
      "learning_rate": 8.376086397870365e-06,
      "loss": 0.036,
      "step": 512900
    },
    {
      "epoch": 0.8394048297035277,
      "grad_norm": 1.3001620769500732,
      "learning_rate": 8.376020505656847e-06,
      "loss": 0.0339,
      "step": 512920
    },
    {
      "epoch": 0.8394375601421811,
      "grad_norm": 0.6081722974777222,
      "learning_rate": 8.37595461344333e-06,
      "loss": 0.0283,
      "step": 512940
    },
    {
      "epoch": 0.8394702905808343,
      "grad_norm": 0.9980704188346863,
      "learning_rate": 8.375888721229814e-06,
      "loss": 0.0273,
      "step": 512960
    },
    {
      "epoch": 0.8395030210194877,
      "grad_norm": 1.359440565109253,
      "learning_rate": 8.375822829016296e-06,
      "loss": 0.0275,
      "step": 512980
    },
    {
      "epoch": 0.8395357514581411,
      "grad_norm": 2.5434882640838623,
      "learning_rate": 8.37575693680278e-06,
      "loss": 0.0211,
      "step": 513000
    },
    {
      "epoch": 0.8395684818967943,
      "grad_norm": 0.5639567971229553,
      "learning_rate": 8.375691044589262e-06,
      "loss": 0.0466,
      "step": 513020
    },
    {
      "epoch": 0.8396012123354477,
      "grad_norm": 0.9139577150344849,
      "learning_rate": 8.375625152375745e-06,
      "loss": 0.0349,
      "step": 513040
    },
    {
      "epoch": 0.8396339427741011,
      "grad_norm": 0.4766887426376343,
      "learning_rate": 8.375559260162227e-06,
      "loss": 0.0332,
      "step": 513060
    },
    {
      "epoch": 0.8396666732127543,
      "grad_norm": 0.14841607213020325,
      "learning_rate": 8.37549336794871e-06,
      "loss": 0.0186,
      "step": 513080
    },
    {
      "epoch": 0.8396994036514077,
      "grad_norm": 0.4718279242515564,
      "learning_rate": 8.375427475735193e-06,
      "loss": 0.0243,
      "step": 513100
    },
    {
      "epoch": 0.8397321340900611,
      "grad_norm": 1.581169605255127,
      "learning_rate": 8.375361583521676e-06,
      "loss": 0.0188,
      "step": 513120
    },
    {
      "epoch": 0.8397648645287145,
      "grad_norm": 1.8182191848754883,
      "learning_rate": 8.375295691308158e-06,
      "loss": 0.0259,
      "step": 513140
    },
    {
      "epoch": 0.8397975949673677,
      "grad_norm": 0.32917895913124084,
      "learning_rate": 8.375229799094642e-06,
      "loss": 0.0249,
      "step": 513160
    },
    {
      "epoch": 0.8398303254060211,
      "grad_norm": 0.4849013090133667,
      "learning_rate": 8.375163906881124e-06,
      "loss": 0.0273,
      "step": 513180
    },
    {
      "epoch": 0.8398630558446745,
      "grad_norm": 2.4567856788635254,
      "learning_rate": 8.375098014667607e-06,
      "loss": 0.0341,
      "step": 513200
    },
    {
      "epoch": 0.8398957862833277,
      "grad_norm": 0.308737188577652,
      "learning_rate": 8.375032122454089e-06,
      "loss": 0.0292,
      "step": 513220
    },
    {
      "epoch": 0.8399285167219811,
      "grad_norm": 1.472623586654663,
      "learning_rate": 8.374966230240573e-06,
      "loss": 0.0196,
      "step": 513240
    },
    {
      "epoch": 0.8399612471606345,
      "grad_norm": 0.9709911942481995,
      "learning_rate": 8.374900338027056e-06,
      "loss": 0.0246,
      "step": 513260
    },
    {
      "epoch": 0.8399939775992877,
      "grad_norm": 0.8807615637779236,
      "learning_rate": 8.374834445813538e-06,
      "loss": 0.0309,
      "step": 513280
    },
    {
      "epoch": 0.8400267080379411,
      "grad_norm": 1.9820767641067505,
      "learning_rate": 8.374768553600022e-06,
      "loss": 0.0408,
      "step": 513300
    },
    {
      "epoch": 0.8400594384765945,
      "grad_norm": 0.054531529545784,
      "learning_rate": 8.374702661386505e-06,
      "loss": 0.0266,
      "step": 513320
    },
    {
      "epoch": 0.8400921689152478,
      "grad_norm": 0.38454222679138184,
      "learning_rate": 8.374636769172987e-06,
      "loss": 0.0257,
      "step": 513340
    },
    {
      "epoch": 0.8401248993539011,
      "grad_norm": 0.3748363256454468,
      "learning_rate": 8.37457087695947e-06,
      "loss": 0.0376,
      "step": 513360
    },
    {
      "epoch": 0.8401576297925545,
      "grad_norm": 0.1644987165927887,
      "learning_rate": 8.374504984745954e-06,
      "loss": 0.0244,
      "step": 513380
    },
    {
      "epoch": 0.8401903602312079,
      "grad_norm": 0.3046826124191284,
      "learning_rate": 8.374439092532436e-06,
      "loss": 0.0237,
      "step": 513400
    },
    {
      "epoch": 0.8402230906698611,
      "grad_norm": 0.940976619720459,
      "learning_rate": 8.37437320031892e-06,
      "loss": 0.035,
      "step": 513420
    },
    {
      "epoch": 0.8402558211085145,
      "grad_norm": 0.19564096629619598,
      "learning_rate": 8.374307308105402e-06,
      "loss": 0.0296,
      "step": 513440
    },
    {
      "epoch": 0.8402885515471679,
      "grad_norm": 0.49410349130630493,
      "learning_rate": 8.374241415891885e-06,
      "loss": 0.0232,
      "step": 513460
    },
    {
      "epoch": 0.8403212819858211,
      "grad_norm": 1.0154435634613037,
      "learning_rate": 8.374175523678367e-06,
      "loss": 0.0279,
      "step": 513480
    },
    {
      "epoch": 0.8403540124244745,
      "grad_norm": 0.40520554780960083,
      "learning_rate": 8.37410963146485e-06,
      "loss": 0.0331,
      "step": 513500
    },
    {
      "epoch": 0.8403867428631279,
      "grad_norm": 1.4151779413223267,
      "learning_rate": 8.374043739251333e-06,
      "loss": 0.0348,
      "step": 513520
    },
    {
      "epoch": 0.8404194733017812,
      "grad_norm": 0.7558156251907349,
      "learning_rate": 8.373977847037816e-06,
      "loss": 0.0157,
      "step": 513540
    },
    {
      "epoch": 0.8404522037404345,
      "grad_norm": 0.24080657958984375,
      "learning_rate": 8.373911954824298e-06,
      "loss": 0.0389,
      "step": 513560
    },
    {
      "epoch": 0.8404849341790879,
      "grad_norm": 0.11063136160373688,
      "learning_rate": 8.373846062610782e-06,
      "loss": 0.0361,
      "step": 513580
    },
    {
      "epoch": 0.8405176646177412,
      "grad_norm": 0.5467543005943298,
      "learning_rate": 8.373780170397264e-06,
      "loss": 0.0306,
      "step": 513600
    },
    {
      "epoch": 0.8405503950563945,
      "grad_norm": 1.0513718128204346,
      "learning_rate": 8.373714278183747e-06,
      "loss": 0.029,
      "step": 513620
    },
    {
      "epoch": 0.8405831254950479,
      "grad_norm": 2.7113747596740723,
      "learning_rate": 8.37364838597023e-06,
      "loss": 0.0449,
      "step": 513640
    },
    {
      "epoch": 0.8406158559337013,
      "grad_norm": 1.1517387628555298,
      "learning_rate": 8.373582493756713e-06,
      "loss": 0.0279,
      "step": 513660
    },
    {
      "epoch": 0.8406485863723545,
      "grad_norm": 1.5601892471313477,
      "learning_rate": 8.373516601543196e-06,
      "loss": 0.023,
      "step": 513680
    },
    {
      "epoch": 0.8406813168110079,
      "grad_norm": 1.6635204553604126,
      "learning_rate": 8.37345070932968e-06,
      "loss": 0.0409,
      "step": 513700
    },
    {
      "epoch": 0.8407140472496613,
      "grad_norm": 1.0858826637268066,
      "learning_rate": 8.373384817116162e-06,
      "loss": 0.0231,
      "step": 513720
    },
    {
      "epoch": 0.8407467776883146,
      "grad_norm": 0.557019054889679,
      "learning_rate": 8.373318924902645e-06,
      "loss": 0.0299,
      "step": 513740
    },
    {
      "epoch": 0.8407795081269679,
      "grad_norm": 3.5379927158355713,
      "learning_rate": 8.373253032689129e-06,
      "loss": 0.0356,
      "step": 513760
    },
    {
      "epoch": 0.8408122385656213,
      "grad_norm": 0.8070377707481384,
      "learning_rate": 8.37318714047561e-06,
      "loss": 0.0364,
      "step": 513780
    },
    {
      "epoch": 0.8408449690042746,
      "grad_norm": 1.4815130233764648,
      "learning_rate": 8.373121248262094e-06,
      "loss": 0.0357,
      "step": 513800
    },
    {
      "epoch": 0.8408776994429279,
      "grad_norm": 0.8153737187385559,
      "learning_rate": 8.373055356048576e-06,
      "loss": 0.0277,
      "step": 513820
    },
    {
      "epoch": 0.8409104298815813,
      "grad_norm": 0.9919715523719788,
      "learning_rate": 8.37298946383506e-06,
      "loss": 0.0413,
      "step": 513840
    },
    {
      "epoch": 0.8409431603202346,
      "grad_norm": 1.5349520444869995,
      "learning_rate": 8.372923571621542e-06,
      "loss": 0.0302,
      "step": 513860
    },
    {
      "epoch": 0.8409758907588879,
      "grad_norm": 0.6537732481956482,
      "learning_rate": 8.372857679408025e-06,
      "loss": 0.027,
      "step": 513880
    },
    {
      "epoch": 0.8410086211975413,
      "grad_norm": 0.7381813526153564,
      "learning_rate": 8.372791787194507e-06,
      "loss": 0.0267,
      "step": 513900
    },
    {
      "epoch": 0.8410413516361946,
      "grad_norm": 0.7138646841049194,
      "learning_rate": 8.37272589498099e-06,
      "loss": 0.0265,
      "step": 513920
    },
    {
      "epoch": 0.841074082074848,
      "grad_norm": 0.6583712697029114,
      "learning_rate": 8.372660002767473e-06,
      "loss": 0.0266,
      "step": 513940
    },
    {
      "epoch": 0.8411068125135013,
      "grad_norm": 1.128524899482727,
      "learning_rate": 8.372594110553956e-06,
      "loss": 0.039,
      "step": 513960
    },
    {
      "epoch": 0.8411395429521547,
      "grad_norm": 1.0104303359985352,
      "learning_rate": 8.37252821834044e-06,
      "loss": 0.0293,
      "step": 513980
    },
    {
      "epoch": 0.841172273390808,
      "grad_norm": 0.36239027976989746,
      "learning_rate": 8.372462326126922e-06,
      "loss": 0.0235,
      "step": 514000
    },
    {
      "epoch": 0.8412050038294613,
      "grad_norm": 0.9975362420082092,
      "learning_rate": 8.372396433913405e-06,
      "loss": 0.034,
      "step": 514020
    },
    {
      "epoch": 0.8412377342681147,
      "grad_norm": 1.1405861377716064,
      "learning_rate": 8.372330541699887e-06,
      "loss": 0.0223,
      "step": 514040
    },
    {
      "epoch": 0.841270464706768,
      "grad_norm": 1.419560194015503,
      "learning_rate": 8.37226464948637e-06,
      "loss": 0.032,
      "step": 514060
    },
    {
      "epoch": 0.8413031951454213,
      "grad_norm": 1.3823903799057007,
      "learning_rate": 8.372198757272854e-06,
      "loss": 0.0306,
      "step": 514080
    },
    {
      "epoch": 0.8413359255840747,
      "grad_norm": 0.16199354827404022,
      "learning_rate": 8.372132865059336e-06,
      "loss": 0.0298,
      "step": 514100
    },
    {
      "epoch": 0.841368656022728,
      "grad_norm": 1.003446102142334,
      "learning_rate": 8.37206697284582e-06,
      "loss": 0.0338,
      "step": 514120
    },
    {
      "epoch": 0.8414013864613814,
      "grad_norm": 1.1804956197738647,
      "learning_rate": 8.372001080632303e-06,
      "loss": 0.0307,
      "step": 514140
    },
    {
      "epoch": 0.8414341169000347,
      "grad_norm": 0.9777649641036987,
      "learning_rate": 8.371935188418785e-06,
      "loss": 0.0223,
      "step": 514160
    },
    {
      "epoch": 0.841466847338688,
      "grad_norm": 0.641770601272583,
      "learning_rate": 8.371869296205269e-06,
      "loss": 0.0241,
      "step": 514180
    },
    {
      "epoch": 0.8414995777773414,
      "grad_norm": 1.1014219522476196,
      "learning_rate": 8.371803403991751e-06,
      "loss": 0.0206,
      "step": 514200
    },
    {
      "epoch": 0.8415323082159947,
      "grad_norm": 0.5846141576766968,
      "learning_rate": 8.371737511778234e-06,
      "loss": 0.0233,
      "step": 514220
    },
    {
      "epoch": 0.841565038654648,
      "grad_norm": 1.2883727550506592,
      "learning_rate": 8.371671619564716e-06,
      "loss": 0.026,
      "step": 514240
    },
    {
      "epoch": 0.8415977690933014,
      "grad_norm": 1.2156540155410767,
      "learning_rate": 8.3716057273512e-06,
      "loss": 0.0208,
      "step": 514260
    },
    {
      "epoch": 0.8416304995319547,
      "grad_norm": 1.1702470779418945,
      "learning_rate": 8.371539835137682e-06,
      "loss": 0.0298,
      "step": 514280
    },
    {
      "epoch": 0.841663229970608,
      "grad_norm": 1.9864444732666016,
      "learning_rate": 8.371473942924165e-06,
      "loss": 0.0268,
      "step": 514300
    },
    {
      "epoch": 0.8416959604092614,
      "grad_norm": 0.9519277811050415,
      "learning_rate": 8.371408050710647e-06,
      "loss": 0.0342,
      "step": 514320
    },
    {
      "epoch": 0.8417286908479148,
      "grad_norm": 0.9707338809967041,
      "learning_rate": 8.371342158497131e-06,
      "loss": 0.0358,
      "step": 514340
    },
    {
      "epoch": 0.8417614212865681,
      "grad_norm": 1.792136788368225,
      "learning_rate": 8.371276266283614e-06,
      "loss": 0.03,
      "step": 514360
    },
    {
      "epoch": 0.8417941517252214,
      "grad_norm": 0.8739434480667114,
      "learning_rate": 8.371210374070096e-06,
      "loss": 0.0244,
      "step": 514380
    },
    {
      "epoch": 0.8418268821638748,
      "grad_norm": 0.8828936815261841,
      "learning_rate": 8.37114448185658e-06,
      "loss": 0.0265,
      "step": 514400
    },
    {
      "epoch": 0.8418596126025281,
      "grad_norm": 0.39083045721054077,
      "learning_rate": 8.371078589643062e-06,
      "loss": 0.0178,
      "step": 514420
    },
    {
      "epoch": 0.8418923430411814,
      "grad_norm": 0.8838555812835693,
      "learning_rate": 8.371012697429545e-06,
      "loss": 0.0313,
      "step": 514440
    },
    {
      "epoch": 0.8419250734798348,
      "grad_norm": 1.8653085231781006,
      "learning_rate": 8.370946805216027e-06,
      "loss": 0.0302,
      "step": 514460
    },
    {
      "epoch": 0.8419578039184881,
      "grad_norm": 1.1140310764312744,
      "learning_rate": 8.370880913002511e-06,
      "loss": 0.0244,
      "step": 514480
    },
    {
      "epoch": 0.8419905343571414,
      "grad_norm": 1.0101851224899292,
      "learning_rate": 8.370815020788994e-06,
      "loss": 0.0362,
      "step": 514500
    },
    {
      "epoch": 0.8420232647957948,
      "grad_norm": 4.610245227813721,
      "learning_rate": 8.370749128575476e-06,
      "loss": 0.0421,
      "step": 514520
    },
    {
      "epoch": 0.8420559952344482,
      "grad_norm": 1.1547763347625732,
      "learning_rate": 8.37068323636196e-06,
      "loss": 0.0379,
      "step": 514540
    },
    {
      "epoch": 0.8420887256731014,
      "grad_norm": 2.087322473526001,
      "learning_rate": 8.370617344148444e-06,
      "loss": 0.0295,
      "step": 514560
    },
    {
      "epoch": 0.8421214561117548,
      "grad_norm": 0.2613169550895691,
      "learning_rate": 8.370551451934925e-06,
      "loss": 0.0329,
      "step": 514580
    },
    {
      "epoch": 0.8421541865504082,
      "grad_norm": 2.3185923099517822,
      "learning_rate": 8.370485559721409e-06,
      "loss": 0.0321,
      "step": 514600
    },
    {
      "epoch": 0.8421869169890615,
      "grad_norm": 0.7311400175094604,
      "learning_rate": 8.370419667507891e-06,
      "loss": 0.0196,
      "step": 514620
    },
    {
      "epoch": 0.8422196474277148,
      "grad_norm": 1.2373186349868774,
      "learning_rate": 8.370353775294375e-06,
      "loss": 0.0206,
      "step": 514640
    },
    {
      "epoch": 0.8422523778663682,
      "grad_norm": 1.0957027673721313,
      "learning_rate": 8.370287883080856e-06,
      "loss": 0.0227,
      "step": 514660
    },
    {
      "epoch": 0.8422851083050215,
      "grad_norm": 0.7060939073562622,
      "learning_rate": 8.37022199086734e-06,
      "loss": 0.031,
      "step": 514680
    },
    {
      "epoch": 0.8423178387436748,
      "grad_norm": 0.9672877192497253,
      "learning_rate": 8.370156098653824e-06,
      "loss": 0.0343,
      "step": 514700
    },
    {
      "epoch": 0.8423505691823282,
      "grad_norm": 0.4693247675895691,
      "learning_rate": 8.370090206440305e-06,
      "loss": 0.0277,
      "step": 514720
    },
    {
      "epoch": 0.8423832996209816,
      "grad_norm": 1.9300317764282227,
      "learning_rate": 8.370024314226789e-06,
      "loss": 0.0337,
      "step": 514740
    },
    {
      "epoch": 0.8424160300596348,
      "grad_norm": 0.4434925317764282,
      "learning_rate": 8.369958422013271e-06,
      "loss": 0.032,
      "step": 514760
    },
    {
      "epoch": 0.8424487604982882,
      "grad_norm": 0.8279253244400024,
      "learning_rate": 8.369892529799755e-06,
      "loss": 0.0321,
      "step": 514780
    },
    {
      "epoch": 0.8424814909369416,
      "grad_norm": 0.3964329957962036,
      "learning_rate": 8.369826637586236e-06,
      "loss": 0.0279,
      "step": 514800
    },
    {
      "epoch": 0.8425142213755948,
      "grad_norm": 0.47703877091407776,
      "learning_rate": 8.36976074537272e-06,
      "loss": 0.032,
      "step": 514820
    },
    {
      "epoch": 0.8425469518142482,
      "grad_norm": 1.0111924409866333,
      "learning_rate": 8.369694853159202e-06,
      "loss": 0.0263,
      "step": 514840
    },
    {
      "epoch": 0.8425796822529016,
      "grad_norm": 0.26649996638298035,
      "learning_rate": 8.369628960945686e-06,
      "loss": 0.0276,
      "step": 514860
    },
    {
      "epoch": 0.8426124126915548,
      "grad_norm": 0.5494698286056519,
      "learning_rate": 8.369563068732169e-06,
      "loss": 0.0318,
      "step": 514880
    },
    {
      "epoch": 0.8426451431302082,
      "grad_norm": 0.6563540697097778,
      "learning_rate": 8.369497176518651e-06,
      "loss": 0.0278,
      "step": 514900
    },
    {
      "epoch": 0.8426778735688616,
      "grad_norm": 0.6857914328575134,
      "learning_rate": 8.369431284305135e-06,
      "loss": 0.0283,
      "step": 514920
    },
    {
      "epoch": 0.842710604007515,
      "grad_norm": 2.542834997177124,
      "learning_rate": 8.369365392091618e-06,
      "loss": 0.019,
      "step": 514940
    },
    {
      "epoch": 0.8427433344461682,
      "grad_norm": 1.0946624279022217,
      "learning_rate": 8.3692994998781e-06,
      "loss": 0.0185,
      "step": 514960
    },
    {
      "epoch": 0.8427760648848216,
      "grad_norm": 0.517721951007843,
      "learning_rate": 8.369233607664584e-06,
      "loss": 0.0254,
      "step": 514980
    },
    {
      "epoch": 0.842808795323475,
      "grad_norm": 0.8204488158226013,
      "learning_rate": 8.369167715451066e-06,
      "loss": 0.0392,
      "step": 515000
    },
    {
      "epoch": 0.8428415257621282,
      "grad_norm": 0.4983392059803009,
      "learning_rate": 8.369101823237549e-06,
      "loss": 0.0283,
      "step": 515020
    },
    {
      "epoch": 0.8428742562007816,
      "grad_norm": 0.25334417819976807,
      "learning_rate": 8.369035931024033e-06,
      "loss": 0.0206,
      "step": 515040
    },
    {
      "epoch": 0.842906986639435,
      "grad_norm": 1.3906681537628174,
      "learning_rate": 8.368970038810515e-06,
      "loss": 0.0356,
      "step": 515060
    },
    {
      "epoch": 0.8429397170780882,
      "grad_norm": 0.13807538151741028,
      "learning_rate": 8.368904146596998e-06,
      "loss": 0.0195,
      "step": 515080
    },
    {
      "epoch": 0.8429724475167416,
      "grad_norm": 0.43652430176734924,
      "learning_rate": 8.36883825438348e-06,
      "loss": 0.0303,
      "step": 515100
    },
    {
      "epoch": 0.843005177955395,
      "grad_norm": 0.6172163486480713,
      "learning_rate": 8.368772362169964e-06,
      "loss": 0.0232,
      "step": 515120
    },
    {
      "epoch": 0.8430379083940484,
      "grad_norm": 0.8550609350204468,
      "learning_rate": 8.368706469956446e-06,
      "loss": 0.0278,
      "step": 515140
    },
    {
      "epoch": 0.8430706388327016,
      "grad_norm": 1.9043627977371216,
      "learning_rate": 8.368640577742929e-06,
      "loss": 0.0299,
      "step": 515160
    },
    {
      "epoch": 0.843103369271355,
      "grad_norm": 0.6390014886856079,
      "learning_rate": 8.368574685529411e-06,
      "loss": 0.0182,
      "step": 515180
    },
    {
      "epoch": 0.8431360997100084,
      "grad_norm": 0.9454190731048584,
      "learning_rate": 8.368508793315895e-06,
      "loss": 0.0323,
      "step": 515200
    },
    {
      "epoch": 0.8431688301486616,
      "grad_norm": 0.40926799178123474,
      "learning_rate": 8.368442901102377e-06,
      "loss": 0.0216,
      "step": 515220
    },
    {
      "epoch": 0.843201560587315,
      "grad_norm": 0.332212895154953,
      "learning_rate": 8.36837700888886e-06,
      "loss": 0.0257,
      "step": 515240
    },
    {
      "epoch": 0.8432342910259684,
      "grad_norm": 1.413756012916565,
      "learning_rate": 8.368311116675342e-06,
      "loss": 0.0235,
      "step": 515260
    },
    {
      "epoch": 0.8432670214646216,
      "grad_norm": 0.8599782586097717,
      "learning_rate": 8.368245224461826e-06,
      "loss": 0.0396,
      "step": 515280
    },
    {
      "epoch": 0.843299751903275,
      "grad_norm": 2.0229597091674805,
      "learning_rate": 8.36817933224831e-06,
      "loss": 0.0298,
      "step": 515300
    },
    {
      "epoch": 0.8433324823419284,
      "grad_norm": 0.5144335627555847,
      "learning_rate": 8.368113440034791e-06,
      "loss": 0.03,
      "step": 515320
    },
    {
      "epoch": 0.8433652127805816,
      "grad_norm": 0.6356525421142578,
      "learning_rate": 8.368047547821275e-06,
      "loss": 0.0271,
      "step": 515340
    },
    {
      "epoch": 0.843397943219235,
      "grad_norm": 1.4987530708312988,
      "learning_rate": 8.367981655607758e-06,
      "loss": 0.0297,
      "step": 515360
    },
    {
      "epoch": 0.8434306736578884,
      "grad_norm": 0.33442506194114685,
      "learning_rate": 8.36791576339424e-06,
      "loss": 0.0219,
      "step": 515380
    },
    {
      "epoch": 0.8434634040965417,
      "grad_norm": 0.42805173993110657,
      "learning_rate": 8.367849871180724e-06,
      "loss": 0.025,
      "step": 515400
    },
    {
      "epoch": 0.843496134535195,
      "grad_norm": 0.6556529402732849,
      "learning_rate": 8.367783978967207e-06,
      "loss": 0.0222,
      "step": 515420
    },
    {
      "epoch": 0.8435288649738484,
      "grad_norm": 0.5982301235198975,
      "learning_rate": 8.36771808675369e-06,
      "loss": 0.0332,
      "step": 515440
    },
    {
      "epoch": 0.8435615954125018,
      "grad_norm": 0.576103925704956,
      "learning_rate": 8.367652194540173e-06,
      "loss": 0.0302,
      "step": 515460
    },
    {
      "epoch": 0.843594325851155,
      "grad_norm": 1.210269808769226,
      "learning_rate": 8.367586302326655e-06,
      "loss": 0.0281,
      "step": 515480
    },
    {
      "epoch": 0.8436270562898084,
      "grad_norm": 0.7282025218009949,
      "learning_rate": 8.367520410113138e-06,
      "loss": 0.025,
      "step": 515500
    },
    {
      "epoch": 0.8436597867284618,
      "grad_norm": 0.8162029981613159,
      "learning_rate": 8.36745451789962e-06,
      "loss": 0.0266,
      "step": 515520
    },
    {
      "epoch": 0.843692517167115,
      "grad_norm": 0.23839512467384338,
      "learning_rate": 8.367388625686104e-06,
      "loss": 0.0165,
      "step": 515540
    },
    {
      "epoch": 0.8437252476057684,
      "grad_norm": 0.1275676041841507,
      "learning_rate": 8.367322733472586e-06,
      "loss": 0.0337,
      "step": 515560
    },
    {
      "epoch": 0.8437579780444218,
      "grad_norm": 0.3406863808631897,
      "learning_rate": 8.36725684125907e-06,
      "loss": 0.0275,
      "step": 515580
    },
    {
      "epoch": 0.8437907084830751,
      "grad_norm": 0.5350124835968018,
      "learning_rate": 8.367190949045551e-06,
      "loss": 0.0347,
      "step": 515600
    },
    {
      "epoch": 0.8438234389217284,
      "grad_norm": 0.6379498243331909,
      "learning_rate": 8.367125056832035e-06,
      "loss": 0.0257,
      "step": 515620
    },
    {
      "epoch": 0.8438561693603818,
      "grad_norm": 0.9989690184593201,
      "learning_rate": 8.367059164618517e-06,
      "loss": 0.0271,
      "step": 515640
    },
    {
      "epoch": 0.8438888997990351,
      "grad_norm": 0.16352111101150513,
      "learning_rate": 8.366993272405e-06,
      "loss": 0.0357,
      "step": 515660
    },
    {
      "epoch": 0.8439216302376884,
      "grad_norm": 2.377786636352539,
      "learning_rate": 8.366927380191484e-06,
      "loss": 0.0349,
      "step": 515680
    },
    {
      "epoch": 0.8439543606763418,
      "grad_norm": 0.4517165720462799,
      "learning_rate": 8.366861487977966e-06,
      "loss": 0.036,
      "step": 515700
    },
    {
      "epoch": 0.8439870911149951,
      "grad_norm": 1.2675567865371704,
      "learning_rate": 8.36679559576445e-06,
      "loss": 0.0365,
      "step": 515720
    },
    {
      "epoch": 0.8440198215536484,
      "grad_norm": 0.5385728478431702,
      "learning_rate": 8.366729703550933e-06,
      "loss": 0.0284,
      "step": 515740
    },
    {
      "epoch": 0.8440525519923018,
      "grad_norm": 0.7647696137428284,
      "learning_rate": 8.366663811337415e-06,
      "loss": 0.0262,
      "step": 515760
    },
    {
      "epoch": 0.8440852824309552,
      "grad_norm": 0.9538806080818176,
      "learning_rate": 8.366597919123898e-06,
      "loss": 0.0285,
      "step": 515780
    },
    {
      "epoch": 0.8441180128696085,
      "grad_norm": 1.6290024518966675,
      "learning_rate": 8.366532026910382e-06,
      "loss": 0.0489,
      "step": 515800
    },
    {
      "epoch": 0.8441507433082618,
      "grad_norm": 0.361500084400177,
      "learning_rate": 8.366466134696864e-06,
      "loss": 0.0338,
      "step": 515820
    },
    {
      "epoch": 0.8441834737469152,
      "grad_norm": 2.3386871814727783,
      "learning_rate": 8.366400242483347e-06,
      "loss": 0.0316,
      "step": 515840
    },
    {
      "epoch": 0.8442162041855685,
      "grad_norm": 1.3040058612823486,
      "learning_rate": 8.36633435026983e-06,
      "loss": 0.0344,
      "step": 515860
    },
    {
      "epoch": 0.8442489346242218,
      "grad_norm": 0.7657243609428406,
      "learning_rate": 8.366268458056313e-06,
      "loss": 0.0369,
      "step": 515880
    },
    {
      "epoch": 0.8442816650628752,
      "grad_norm": 1.0326906442642212,
      "learning_rate": 8.366202565842795e-06,
      "loss": 0.0275,
      "step": 515900
    },
    {
      "epoch": 0.8443143955015285,
      "grad_norm": 1.6495100259780884,
      "learning_rate": 8.366136673629278e-06,
      "loss": 0.0361,
      "step": 515920
    },
    {
      "epoch": 0.8443471259401818,
      "grad_norm": 1.7837640047073364,
      "learning_rate": 8.36607078141576e-06,
      "loss": 0.0215,
      "step": 515940
    },
    {
      "epoch": 0.8443798563788352,
      "grad_norm": 2.3334622383117676,
      "learning_rate": 8.366004889202244e-06,
      "loss": 0.0226,
      "step": 515960
    },
    {
      "epoch": 0.8444125868174885,
      "grad_norm": 0.8954557180404663,
      "learning_rate": 8.365938996988726e-06,
      "loss": 0.0357,
      "step": 515980
    },
    {
      "epoch": 0.8444453172561419,
      "grad_norm": 0.8515122532844543,
      "learning_rate": 8.36587310477521e-06,
      "loss": 0.023,
      "step": 516000
    },
    {
      "epoch": 0.8444780476947952,
      "grad_norm": 0.7883631587028503,
      "learning_rate": 8.365807212561691e-06,
      "loss": 0.0221,
      "step": 516020
    },
    {
      "epoch": 0.8445107781334485,
      "grad_norm": 0.3943149149417877,
      "learning_rate": 8.365741320348175e-06,
      "loss": 0.0284,
      "step": 516040
    },
    {
      "epoch": 0.8445435085721019,
      "grad_norm": 1.8027487993240356,
      "learning_rate": 8.365675428134657e-06,
      "loss": 0.0267,
      "step": 516060
    },
    {
      "epoch": 0.8445762390107552,
      "grad_norm": 1.1577894687652588,
      "learning_rate": 8.36560953592114e-06,
      "loss": 0.0277,
      "step": 516080
    },
    {
      "epoch": 0.8446089694494086,
      "grad_norm": 4.2553887367248535,
      "learning_rate": 8.365543643707624e-06,
      "loss": 0.0196,
      "step": 516100
    },
    {
      "epoch": 0.8446416998880619,
      "grad_norm": 0.3510737419128418,
      "learning_rate": 8.365477751494106e-06,
      "loss": 0.0296,
      "step": 516120
    },
    {
      "epoch": 0.8446744303267152,
      "grad_norm": 0.4235400855541229,
      "learning_rate": 8.36541185928059e-06,
      "loss": 0.0299,
      "step": 516140
    },
    {
      "epoch": 0.8447071607653686,
      "grad_norm": 0.836428701877594,
      "learning_rate": 8.365345967067073e-06,
      "loss": 0.023,
      "step": 516160
    },
    {
      "epoch": 0.8447398912040219,
      "grad_norm": 0.8145509362220764,
      "learning_rate": 8.365280074853556e-06,
      "loss": 0.0275,
      "step": 516180
    },
    {
      "epoch": 0.8447726216426753,
      "grad_norm": 0.39439913630485535,
      "learning_rate": 8.365214182640038e-06,
      "loss": 0.0305,
      "step": 516200
    },
    {
      "epoch": 0.8448053520813286,
      "grad_norm": 1.1365865468978882,
      "learning_rate": 8.365148290426522e-06,
      "loss": 0.0446,
      "step": 516220
    },
    {
      "epoch": 0.8448380825199819,
      "grad_norm": 1.1253902912139893,
      "learning_rate": 8.365082398213004e-06,
      "loss": 0.0206,
      "step": 516240
    },
    {
      "epoch": 0.8448708129586353,
      "grad_norm": 0.5932056903839111,
      "learning_rate": 8.365016505999487e-06,
      "loss": 0.0296,
      "step": 516260
    },
    {
      "epoch": 0.8449035433972886,
      "grad_norm": 0.6665441989898682,
      "learning_rate": 8.36495061378597e-06,
      "loss": 0.0382,
      "step": 516280
    },
    {
      "epoch": 0.8449362738359419,
      "grad_norm": 2.0625739097595215,
      "learning_rate": 8.364884721572453e-06,
      "loss": 0.0323,
      "step": 516300
    },
    {
      "epoch": 0.8449690042745953,
      "grad_norm": 0.35001277923583984,
      "learning_rate": 8.364818829358935e-06,
      "loss": 0.0296,
      "step": 516320
    },
    {
      "epoch": 0.8450017347132486,
      "grad_norm": 0.7645969390869141,
      "learning_rate": 8.364752937145418e-06,
      "loss": 0.0342,
      "step": 516340
    },
    {
      "epoch": 0.845034465151902,
      "grad_norm": 1.5106267929077148,
      "learning_rate": 8.3646870449319e-06,
      "loss": 0.0244,
      "step": 516360
    },
    {
      "epoch": 0.8450671955905553,
      "grad_norm": 0.6070137023925781,
      "learning_rate": 8.364621152718384e-06,
      "loss": 0.0157,
      "step": 516380
    },
    {
      "epoch": 0.8450999260292087,
      "grad_norm": 0.8131046891212463,
      "learning_rate": 8.364555260504866e-06,
      "loss": 0.0297,
      "step": 516400
    },
    {
      "epoch": 0.845132656467862,
      "grad_norm": 1.3957570791244507,
      "learning_rate": 8.36448936829135e-06,
      "loss": 0.0282,
      "step": 516420
    },
    {
      "epoch": 0.8451653869065153,
      "grad_norm": 0.6148903965950012,
      "learning_rate": 8.364423476077833e-06,
      "loss": 0.0249,
      "step": 516440
    },
    {
      "epoch": 0.8451981173451687,
      "grad_norm": 1.3736257553100586,
      "learning_rate": 8.364357583864315e-06,
      "loss": 0.0333,
      "step": 516460
    },
    {
      "epoch": 0.845230847783822,
      "grad_norm": 0.7821486592292786,
      "learning_rate": 8.364291691650798e-06,
      "loss": 0.0265,
      "step": 516480
    },
    {
      "epoch": 0.8452635782224753,
      "grad_norm": 1.0496188402175903,
      "learning_rate": 8.36422579943728e-06,
      "loss": 0.0303,
      "step": 516500
    },
    {
      "epoch": 0.8452963086611287,
      "grad_norm": 0.2925710082054138,
      "learning_rate": 8.364159907223764e-06,
      "loss": 0.0257,
      "step": 516520
    },
    {
      "epoch": 0.845329039099782,
      "grad_norm": 0.41033661365509033,
      "learning_rate": 8.364094015010247e-06,
      "loss": 0.0288,
      "step": 516540
    },
    {
      "epoch": 0.8453617695384353,
      "grad_norm": 0.8948795199394226,
      "learning_rate": 8.36402812279673e-06,
      "loss": 0.0255,
      "step": 516560
    },
    {
      "epoch": 0.8453944999770887,
      "grad_norm": 1.2096928358078003,
      "learning_rate": 8.363962230583213e-06,
      "loss": 0.027,
      "step": 516580
    },
    {
      "epoch": 0.8454272304157421,
      "grad_norm": 0.4413527250289917,
      "learning_rate": 8.363896338369697e-06,
      "loss": 0.024,
      "step": 516600
    },
    {
      "epoch": 0.8454599608543953,
      "grad_norm": 1.647599458694458,
      "learning_rate": 8.363830446156178e-06,
      "loss": 0.0352,
      "step": 516620
    },
    {
      "epoch": 0.8454926912930487,
      "grad_norm": 1.2461761236190796,
      "learning_rate": 8.363764553942662e-06,
      "loss": 0.0316,
      "step": 516640
    },
    {
      "epoch": 0.8455254217317021,
      "grad_norm": 1.1960726976394653,
      "learning_rate": 8.363698661729144e-06,
      "loss": 0.0271,
      "step": 516660
    },
    {
      "epoch": 0.8455581521703553,
      "grad_norm": 0.5319535732269287,
      "learning_rate": 8.363632769515628e-06,
      "loss": 0.027,
      "step": 516680
    },
    {
      "epoch": 0.8455908826090087,
      "grad_norm": 1.9646070003509521,
      "learning_rate": 8.36356687730211e-06,
      "loss": 0.0181,
      "step": 516700
    },
    {
      "epoch": 0.8456236130476621,
      "grad_norm": 0.4958297312259674,
      "learning_rate": 8.363500985088593e-06,
      "loss": 0.0265,
      "step": 516720
    },
    {
      "epoch": 0.8456563434863154,
      "grad_norm": 0.7133859395980835,
      "learning_rate": 8.363435092875075e-06,
      "loss": 0.0316,
      "step": 516740
    },
    {
      "epoch": 0.8456890739249687,
      "grad_norm": 1.4192428588867188,
      "learning_rate": 8.363369200661558e-06,
      "loss": 0.0394,
      "step": 516760
    },
    {
      "epoch": 0.8457218043636221,
      "grad_norm": 1.4416683912277222,
      "learning_rate": 8.36330330844804e-06,
      "loss": 0.0384,
      "step": 516780
    },
    {
      "epoch": 0.8457545348022755,
      "grad_norm": 1.0621880292892456,
      "learning_rate": 8.363237416234524e-06,
      "loss": 0.0294,
      "step": 516800
    },
    {
      "epoch": 0.8457872652409287,
      "grad_norm": 0.47699543833732605,
      "learning_rate": 8.363171524021008e-06,
      "loss": 0.03,
      "step": 516820
    },
    {
      "epoch": 0.8458199956795821,
      "grad_norm": 1.573056936264038,
      "learning_rate": 8.36310563180749e-06,
      "loss": 0.0357,
      "step": 516840
    },
    {
      "epoch": 0.8458527261182355,
      "grad_norm": 1.5488455295562744,
      "learning_rate": 8.363039739593973e-06,
      "loss": 0.0257,
      "step": 516860
    },
    {
      "epoch": 0.8458854565568887,
      "grad_norm": 0.24825970828533173,
      "learning_rate": 8.362973847380455e-06,
      "loss": 0.0314,
      "step": 516880
    },
    {
      "epoch": 0.8459181869955421,
      "grad_norm": 0.7841390371322632,
      "learning_rate": 8.362907955166939e-06,
      "loss": 0.0365,
      "step": 516900
    },
    {
      "epoch": 0.8459509174341955,
      "grad_norm": 2.3217527866363525,
      "learning_rate": 8.362842062953422e-06,
      "loss": 0.0339,
      "step": 516920
    },
    {
      "epoch": 0.8459836478728487,
      "grad_norm": 0.38743337988853455,
      "learning_rate": 8.362776170739904e-06,
      "loss": 0.032,
      "step": 516940
    },
    {
      "epoch": 0.8460163783115021,
      "grad_norm": 0.7411764860153198,
      "learning_rate": 8.362710278526388e-06,
      "loss": 0.0364,
      "step": 516960
    },
    {
      "epoch": 0.8460491087501555,
      "grad_norm": 0.7545027136802673,
      "learning_rate": 8.362644386312871e-06,
      "loss": 0.032,
      "step": 516980
    },
    {
      "epoch": 0.8460818391888089,
      "grad_norm": 1.0564193725585938,
      "learning_rate": 8.362578494099353e-06,
      "loss": 0.0248,
      "step": 517000
    },
    {
      "epoch": 0.8461145696274621,
      "grad_norm": 0.7676496505737305,
      "learning_rate": 8.362512601885837e-06,
      "loss": 0.0254,
      "step": 517020
    },
    {
      "epoch": 0.8461473000661155,
      "grad_norm": 4.73315954208374,
      "learning_rate": 8.362446709672319e-06,
      "loss": 0.028,
      "step": 517040
    },
    {
      "epoch": 0.8461800305047689,
      "grad_norm": 1.369072675704956,
      "learning_rate": 8.362380817458802e-06,
      "loss": 0.0387,
      "step": 517060
    },
    {
      "epoch": 0.8462127609434221,
      "grad_norm": 0.2279241979122162,
      "learning_rate": 8.362314925245284e-06,
      "loss": 0.0208,
      "step": 517080
    },
    {
      "epoch": 0.8462454913820755,
      "grad_norm": 0.40846890211105347,
      "learning_rate": 8.362249033031768e-06,
      "loss": 0.0295,
      "step": 517100
    },
    {
      "epoch": 0.8462782218207289,
      "grad_norm": 1.2231910228729248,
      "learning_rate": 8.36218314081825e-06,
      "loss": 0.0354,
      "step": 517120
    },
    {
      "epoch": 0.8463109522593821,
      "grad_norm": 0.22319716215133667,
      "learning_rate": 8.362117248604733e-06,
      "loss": 0.0169,
      "step": 517140
    },
    {
      "epoch": 0.8463436826980355,
      "grad_norm": 2.182267427444458,
      "learning_rate": 8.362051356391217e-06,
      "loss": 0.0318,
      "step": 517160
    },
    {
      "epoch": 0.8463764131366889,
      "grad_norm": 1.908339500427246,
      "learning_rate": 8.361985464177699e-06,
      "loss": 0.0308,
      "step": 517180
    },
    {
      "epoch": 0.8464091435753422,
      "grad_norm": 0.8862096071243286,
      "learning_rate": 8.361919571964182e-06,
      "loss": 0.0327,
      "step": 517200
    },
    {
      "epoch": 0.8464418740139955,
      "grad_norm": 0.3901080787181854,
      "learning_rate": 8.361853679750664e-06,
      "loss": 0.0216,
      "step": 517220
    },
    {
      "epoch": 0.8464746044526489,
      "grad_norm": 0.9132316708564758,
      "learning_rate": 8.361787787537148e-06,
      "loss": 0.0278,
      "step": 517240
    },
    {
      "epoch": 0.8465073348913023,
      "grad_norm": 0.2026689201593399,
      "learning_rate": 8.36172189532363e-06,
      "loss": 0.0283,
      "step": 517260
    },
    {
      "epoch": 0.8465400653299555,
      "grad_norm": 0.9827445149421692,
      "learning_rate": 8.361656003110113e-06,
      "loss": 0.0293,
      "step": 517280
    },
    {
      "epoch": 0.8465727957686089,
      "grad_norm": 0.2707712948322296,
      "learning_rate": 8.361590110896595e-06,
      "loss": 0.0188,
      "step": 517300
    },
    {
      "epoch": 0.8466055262072623,
      "grad_norm": 1.8309205770492554,
      "learning_rate": 8.361524218683079e-06,
      "loss": 0.0323,
      "step": 517320
    },
    {
      "epoch": 0.8466382566459155,
      "grad_norm": 0.24969199299812317,
      "learning_rate": 8.361458326469562e-06,
      "loss": 0.029,
      "step": 517340
    },
    {
      "epoch": 0.8466709870845689,
      "grad_norm": 1.520262598991394,
      "learning_rate": 8.361392434256044e-06,
      "loss": 0.0204,
      "step": 517360
    },
    {
      "epoch": 0.8467037175232223,
      "grad_norm": 0.8925860524177551,
      "learning_rate": 8.361326542042528e-06,
      "loss": 0.0368,
      "step": 517380
    },
    {
      "epoch": 0.8467364479618756,
      "grad_norm": 1.5131915807724,
      "learning_rate": 8.361260649829011e-06,
      "loss": 0.0333,
      "step": 517400
    },
    {
      "epoch": 0.8467691784005289,
      "grad_norm": 1.5844857692718506,
      "learning_rate": 8.361194757615493e-06,
      "loss": 0.0216,
      "step": 517420
    },
    {
      "epoch": 0.8468019088391823,
      "grad_norm": 0.7091113328933716,
      "learning_rate": 8.361128865401977e-06,
      "loss": 0.0322,
      "step": 517440
    },
    {
      "epoch": 0.8468346392778356,
      "grad_norm": 1.029619574546814,
      "learning_rate": 8.361062973188459e-06,
      "loss": 0.0299,
      "step": 517460
    },
    {
      "epoch": 0.8468673697164889,
      "grad_norm": 0.3771277964115143,
      "learning_rate": 8.360997080974942e-06,
      "loss": 0.0319,
      "step": 517480
    },
    {
      "epoch": 0.8469001001551423,
      "grad_norm": 0.6858465671539307,
      "learning_rate": 8.360931188761426e-06,
      "loss": 0.0312,
      "step": 517500
    },
    {
      "epoch": 0.8469328305937956,
      "grad_norm": 0.30244705080986023,
      "learning_rate": 8.360865296547908e-06,
      "loss": 0.0205,
      "step": 517520
    },
    {
      "epoch": 0.8469655610324489,
      "grad_norm": 1.6537401676177979,
      "learning_rate": 8.360799404334391e-06,
      "loss": 0.0423,
      "step": 517540
    },
    {
      "epoch": 0.8469982914711023,
      "grad_norm": 1.1085108518600464,
      "learning_rate": 8.360733512120873e-06,
      "loss": 0.0367,
      "step": 517560
    },
    {
      "epoch": 0.8470310219097557,
      "grad_norm": 2.1224899291992188,
      "learning_rate": 8.360667619907357e-06,
      "loss": 0.0273,
      "step": 517580
    },
    {
      "epoch": 0.847063752348409,
      "grad_norm": 1.2255380153656006,
      "learning_rate": 8.360601727693839e-06,
      "loss": 0.0303,
      "step": 517600
    },
    {
      "epoch": 0.8470964827870623,
      "grad_norm": 1.9103561639785767,
      "learning_rate": 8.360535835480322e-06,
      "loss": 0.0414,
      "step": 517620
    },
    {
      "epoch": 0.8471292132257157,
      "grad_norm": 0.7526509165763855,
      "learning_rate": 8.360469943266804e-06,
      "loss": 0.0302,
      "step": 517640
    },
    {
      "epoch": 0.847161943664369,
      "grad_norm": 1.5140210390090942,
      "learning_rate": 8.360404051053288e-06,
      "loss": 0.0394,
      "step": 517660
    },
    {
      "epoch": 0.8471946741030223,
      "grad_norm": 1.2819828987121582,
      "learning_rate": 8.36033815883977e-06,
      "loss": 0.026,
      "step": 517680
    },
    {
      "epoch": 0.8472274045416757,
      "grad_norm": 0.7333658933639526,
      "learning_rate": 8.360272266626253e-06,
      "loss": 0.0259,
      "step": 517700
    },
    {
      "epoch": 0.847260134980329,
      "grad_norm": 1.202061414718628,
      "learning_rate": 8.360206374412737e-06,
      "loss": 0.0339,
      "step": 517720
    },
    {
      "epoch": 0.8472928654189823,
      "grad_norm": 0.4287813901901245,
      "learning_rate": 8.360140482199219e-06,
      "loss": 0.0225,
      "step": 517740
    },
    {
      "epoch": 0.8473255958576357,
      "grad_norm": 0.10668287426233292,
      "learning_rate": 8.360074589985702e-06,
      "loss": 0.0373,
      "step": 517760
    },
    {
      "epoch": 0.847358326296289,
      "grad_norm": 0.9891777634620667,
      "learning_rate": 8.360008697772186e-06,
      "loss": 0.0329,
      "step": 517780
    },
    {
      "epoch": 0.8473910567349424,
      "grad_norm": 0.23979586362838745,
      "learning_rate": 8.359942805558668e-06,
      "loss": 0.0326,
      "step": 517800
    },
    {
      "epoch": 0.8474237871735957,
      "grad_norm": 1.2445398569107056,
      "learning_rate": 8.359876913345151e-06,
      "loss": 0.0368,
      "step": 517820
    },
    {
      "epoch": 0.847456517612249,
      "grad_norm": 0.2599914073944092,
      "learning_rate": 8.359811021131633e-06,
      "loss": 0.0266,
      "step": 517840
    },
    {
      "epoch": 0.8474892480509024,
      "grad_norm": 1.4753119945526123,
      "learning_rate": 8.359745128918117e-06,
      "loss": 0.0297,
      "step": 517860
    },
    {
      "epoch": 0.8475219784895557,
      "grad_norm": 0.46913909912109375,
      "learning_rate": 8.3596792367046e-06,
      "loss": 0.0325,
      "step": 517880
    },
    {
      "epoch": 0.847554708928209,
      "grad_norm": 0.807718813419342,
      "learning_rate": 8.359613344491082e-06,
      "loss": 0.0301,
      "step": 517900
    },
    {
      "epoch": 0.8475874393668624,
      "grad_norm": 0.6035059690475464,
      "learning_rate": 8.359547452277566e-06,
      "loss": 0.0253,
      "step": 517920
    },
    {
      "epoch": 0.8476201698055157,
      "grad_norm": 0.5177812576293945,
      "learning_rate": 8.359481560064048e-06,
      "loss": 0.0228,
      "step": 517940
    },
    {
      "epoch": 0.8476529002441691,
      "grad_norm": 0.6895460486412048,
      "learning_rate": 8.359415667850531e-06,
      "loss": 0.0283,
      "step": 517960
    },
    {
      "epoch": 0.8476856306828224,
      "grad_norm": 2.0651581287384033,
      "learning_rate": 8.359349775637013e-06,
      "loss": 0.0352,
      "step": 517980
    },
    {
      "epoch": 0.8477183611214758,
      "grad_norm": 0.2629183232784271,
      "learning_rate": 8.359283883423497e-06,
      "loss": 0.0298,
      "step": 518000
    },
    {
      "epoch": 0.8477510915601291,
      "grad_norm": 1.9953925609588623,
      "learning_rate": 8.359217991209979e-06,
      "loss": 0.0418,
      "step": 518020
    },
    {
      "epoch": 0.8477838219987824,
      "grad_norm": 0.8476990461349487,
      "learning_rate": 8.359152098996462e-06,
      "loss": 0.0287,
      "step": 518040
    },
    {
      "epoch": 0.8478165524374358,
      "grad_norm": 0.4197229743003845,
      "learning_rate": 8.359086206782944e-06,
      "loss": 0.0263,
      "step": 518060
    },
    {
      "epoch": 0.8478492828760891,
      "grad_norm": 1.5257242918014526,
      "learning_rate": 8.359020314569428e-06,
      "loss": 0.0405,
      "step": 518080
    },
    {
      "epoch": 0.8478820133147424,
      "grad_norm": 0.3637850880622864,
      "learning_rate": 8.35895442235591e-06,
      "loss": 0.03,
      "step": 518100
    },
    {
      "epoch": 0.8479147437533958,
      "grad_norm": 1.5757709741592407,
      "learning_rate": 8.358888530142393e-06,
      "loss": 0.0265,
      "step": 518120
    },
    {
      "epoch": 0.8479474741920491,
      "grad_norm": 2.3004980087280273,
      "learning_rate": 8.358822637928877e-06,
      "loss": 0.0394,
      "step": 518140
    },
    {
      "epoch": 0.8479802046307024,
      "grad_norm": 2.016921043395996,
      "learning_rate": 8.358756745715359e-06,
      "loss": 0.0319,
      "step": 518160
    },
    {
      "epoch": 0.8480129350693558,
      "grad_norm": 1.680448055267334,
      "learning_rate": 8.358690853501842e-06,
      "loss": 0.0277,
      "step": 518180
    },
    {
      "epoch": 0.8480456655080091,
      "grad_norm": 0.9840179681777954,
      "learning_rate": 8.358624961288326e-06,
      "loss": 0.027,
      "step": 518200
    },
    {
      "epoch": 0.8480783959466625,
      "grad_norm": 0.5547106266021729,
      "learning_rate": 8.358559069074808e-06,
      "loss": 0.0337,
      "step": 518220
    },
    {
      "epoch": 0.8481111263853158,
      "grad_norm": 0.48004627227783203,
      "learning_rate": 8.358493176861291e-06,
      "loss": 0.023,
      "step": 518240
    },
    {
      "epoch": 0.8481438568239692,
      "grad_norm": 2.1412644386291504,
      "learning_rate": 8.358427284647775e-06,
      "loss": 0.0306,
      "step": 518260
    },
    {
      "epoch": 0.8481765872626225,
      "grad_norm": 0.3392670750617981,
      "learning_rate": 8.358361392434257e-06,
      "loss": 0.0307,
      "step": 518280
    },
    {
      "epoch": 0.8482093177012758,
      "grad_norm": 1.7956823110580444,
      "learning_rate": 8.35829550022074e-06,
      "loss": 0.0301,
      "step": 518300
    },
    {
      "epoch": 0.8482420481399292,
      "grad_norm": 0.7888745665550232,
      "learning_rate": 8.358229608007222e-06,
      "loss": 0.0249,
      "step": 518320
    },
    {
      "epoch": 0.8482747785785825,
      "grad_norm": 0.34520432353019714,
      "learning_rate": 8.358163715793706e-06,
      "loss": 0.0315,
      "step": 518340
    },
    {
      "epoch": 0.8483075090172358,
      "grad_norm": 0.4452408254146576,
      "learning_rate": 8.358097823580188e-06,
      "loss": 0.0244,
      "step": 518360
    },
    {
      "epoch": 0.8483402394558892,
      "grad_norm": 0.7116773128509521,
      "learning_rate": 8.358031931366671e-06,
      "loss": 0.0262,
      "step": 518380
    },
    {
      "epoch": 0.8483729698945425,
      "grad_norm": 5.223156929016113,
      "learning_rate": 8.357966039153153e-06,
      "loss": 0.0332,
      "step": 518400
    },
    {
      "epoch": 0.8484057003331958,
      "grad_norm": 0.20904485881328583,
      "learning_rate": 8.357900146939637e-06,
      "loss": 0.0255,
      "step": 518420
    },
    {
      "epoch": 0.8484384307718492,
      "grad_norm": 0.5238271951675415,
      "learning_rate": 8.357834254726119e-06,
      "loss": 0.0245,
      "step": 518440
    },
    {
      "epoch": 0.8484711612105026,
      "grad_norm": 0.5963073968887329,
      "learning_rate": 8.357768362512602e-06,
      "loss": 0.0248,
      "step": 518460
    },
    {
      "epoch": 0.8485038916491559,
      "grad_norm": 0.6302036643028259,
      "learning_rate": 8.357702470299084e-06,
      "loss": 0.0277,
      "step": 518480
    },
    {
      "epoch": 0.8485366220878092,
      "grad_norm": 0.9473075270652771,
      "learning_rate": 8.357636578085568e-06,
      "loss": 0.0302,
      "step": 518500
    },
    {
      "epoch": 0.8485693525264626,
      "grad_norm": 0.646628201007843,
      "learning_rate": 8.357570685872051e-06,
      "loss": 0.0262,
      "step": 518520
    },
    {
      "epoch": 0.8486020829651159,
      "grad_norm": 0.8927170038223267,
      "learning_rate": 8.357504793658533e-06,
      "loss": 0.0375,
      "step": 518540
    },
    {
      "epoch": 0.8486348134037692,
      "grad_norm": 1.0200343132019043,
      "learning_rate": 8.357438901445017e-06,
      "loss": 0.0312,
      "step": 518560
    },
    {
      "epoch": 0.8486675438424226,
      "grad_norm": 1.1172983646392822,
      "learning_rate": 8.3573730092315e-06,
      "loss": 0.0361,
      "step": 518580
    },
    {
      "epoch": 0.8487002742810759,
      "grad_norm": 0.985887348651886,
      "learning_rate": 8.357307117017982e-06,
      "loss": 0.0264,
      "step": 518600
    },
    {
      "epoch": 0.8487330047197292,
      "grad_norm": 1.4391509294509888,
      "learning_rate": 8.357241224804466e-06,
      "loss": 0.035,
      "step": 518620
    },
    {
      "epoch": 0.8487657351583826,
      "grad_norm": 1.2873090505599976,
      "learning_rate": 8.35717533259095e-06,
      "loss": 0.0287,
      "step": 518640
    },
    {
      "epoch": 0.848798465597036,
      "grad_norm": 2.496676206588745,
      "learning_rate": 8.357109440377431e-06,
      "loss": 0.0319,
      "step": 518660
    },
    {
      "epoch": 0.8488311960356892,
      "grad_norm": 0.4832107722759247,
      "learning_rate": 8.357043548163915e-06,
      "loss": 0.0263,
      "step": 518680
    },
    {
      "epoch": 0.8488639264743426,
      "grad_norm": 3.1833221912384033,
      "learning_rate": 8.356977655950397e-06,
      "loss": 0.0333,
      "step": 518700
    },
    {
      "epoch": 0.848896656912996,
      "grad_norm": 0.31136950850486755,
      "learning_rate": 8.35691176373688e-06,
      "loss": 0.0246,
      "step": 518720
    },
    {
      "epoch": 0.8489293873516492,
      "grad_norm": 1.154394268989563,
      "learning_rate": 8.356845871523362e-06,
      "loss": 0.0228,
      "step": 518740
    },
    {
      "epoch": 0.8489621177903026,
      "grad_norm": 1.6325334310531616,
      "learning_rate": 8.356779979309846e-06,
      "loss": 0.0315,
      "step": 518760
    },
    {
      "epoch": 0.848994848228956,
      "grad_norm": 0.6106020212173462,
      "learning_rate": 8.356714087096328e-06,
      "loss": 0.0235,
      "step": 518780
    },
    {
      "epoch": 0.8490275786676093,
      "grad_norm": 1.4094536304473877,
      "learning_rate": 8.356648194882811e-06,
      "loss": 0.0319,
      "step": 518800
    },
    {
      "epoch": 0.8490603091062626,
      "grad_norm": 0.9031468629837036,
      "learning_rate": 8.356582302669293e-06,
      "loss": 0.0206,
      "step": 518820
    },
    {
      "epoch": 0.849093039544916,
      "grad_norm": 1.0738554000854492,
      "learning_rate": 8.356516410455777e-06,
      "loss": 0.0247,
      "step": 518840
    },
    {
      "epoch": 0.8491257699835694,
      "grad_norm": 3.612375497817993,
      "learning_rate": 8.356450518242259e-06,
      "loss": 0.0334,
      "step": 518860
    },
    {
      "epoch": 0.8491585004222226,
      "grad_norm": 0.6798840165138245,
      "learning_rate": 8.356384626028742e-06,
      "loss": 0.04,
      "step": 518880
    },
    {
      "epoch": 0.849191230860876,
      "grad_norm": 1.1200878620147705,
      "learning_rate": 8.356318733815226e-06,
      "loss": 0.0247,
      "step": 518900
    },
    {
      "epoch": 0.8492239612995294,
      "grad_norm": 6.667994976043701,
      "learning_rate": 8.356252841601708e-06,
      "loss": 0.0335,
      "step": 518920
    },
    {
      "epoch": 0.8492566917381826,
      "grad_norm": 1.582553744316101,
      "learning_rate": 8.356186949388192e-06,
      "loss": 0.022,
      "step": 518940
    },
    {
      "epoch": 0.849289422176836,
      "grad_norm": 1.7250930070877075,
      "learning_rate": 8.356121057174675e-06,
      "loss": 0.0331,
      "step": 518960
    },
    {
      "epoch": 0.8493221526154894,
      "grad_norm": 0.9745028018951416,
      "learning_rate": 8.356055164961157e-06,
      "loss": 0.0329,
      "step": 518980
    },
    {
      "epoch": 0.8493548830541426,
      "grad_norm": 1.628752589225769,
      "learning_rate": 8.35598927274764e-06,
      "loss": 0.0304,
      "step": 519000
    },
    {
      "epoch": 0.849387613492796,
      "grad_norm": 1.444212794303894,
      "learning_rate": 8.355923380534124e-06,
      "loss": 0.0278,
      "step": 519020
    },
    {
      "epoch": 0.8494203439314494,
      "grad_norm": 1.4984359741210938,
      "learning_rate": 8.355857488320606e-06,
      "loss": 0.0283,
      "step": 519040
    },
    {
      "epoch": 0.8494530743701028,
      "grad_norm": 0.996197521686554,
      "learning_rate": 8.35579159610709e-06,
      "loss": 0.0266,
      "step": 519060
    },
    {
      "epoch": 0.849485804808756,
      "grad_norm": 0.3058203160762787,
      "learning_rate": 8.355725703893572e-06,
      "loss": 0.0293,
      "step": 519080
    },
    {
      "epoch": 0.8495185352474094,
      "grad_norm": 0.2218216508626938,
      "learning_rate": 8.355659811680055e-06,
      "loss": 0.023,
      "step": 519100
    },
    {
      "epoch": 0.8495512656860628,
      "grad_norm": 0.6237245202064514,
      "learning_rate": 8.355593919466537e-06,
      "loss": 0.0344,
      "step": 519120
    },
    {
      "epoch": 0.849583996124716,
      "grad_norm": 0.9022071957588196,
      "learning_rate": 8.35552802725302e-06,
      "loss": 0.0227,
      "step": 519140
    },
    {
      "epoch": 0.8496167265633694,
      "grad_norm": 1.9195773601531982,
      "learning_rate": 8.355462135039502e-06,
      "loss": 0.022,
      "step": 519160
    },
    {
      "epoch": 0.8496494570020228,
      "grad_norm": 0.47883448004722595,
      "learning_rate": 8.355396242825986e-06,
      "loss": 0.0228,
      "step": 519180
    },
    {
      "epoch": 0.849682187440676,
      "grad_norm": 1.0686638355255127,
      "learning_rate": 8.355330350612468e-06,
      "loss": 0.0308,
      "step": 519200
    },
    {
      "epoch": 0.8497149178793294,
      "grad_norm": 1.6194108724594116,
      "learning_rate": 8.355264458398952e-06,
      "loss": 0.025,
      "step": 519220
    },
    {
      "epoch": 0.8497476483179828,
      "grad_norm": 0.26181694865226746,
      "learning_rate": 8.355198566185433e-06,
      "loss": 0.0262,
      "step": 519240
    },
    {
      "epoch": 0.8497803787566361,
      "grad_norm": 0.7222435474395752,
      "learning_rate": 8.355132673971917e-06,
      "loss": 0.028,
      "step": 519260
    },
    {
      "epoch": 0.8498131091952894,
      "grad_norm": 0.7428967952728271,
      "learning_rate": 8.3550667817584e-06,
      "loss": 0.029,
      "step": 519280
    },
    {
      "epoch": 0.8498458396339428,
      "grad_norm": 0.8755170702934265,
      "learning_rate": 8.355000889544883e-06,
      "loss": 0.0341,
      "step": 519300
    },
    {
      "epoch": 0.8498785700725962,
      "grad_norm": 0.37801593542099,
      "learning_rate": 8.354934997331366e-06,
      "loss": 0.0362,
      "step": 519320
    },
    {
      "epoch": 0.8499113005112494,
      "grad_norm": 0.49070310592651367,
      "learning_rate": 8.354869105117848e-06,
      "loss": 0.0204,
      "step": 519340
    },
    {
      "epoch": 0.8499440309499028,
      "grad_norm": 0.39360225200653076,
      "learning_rate": 8.354803212904332e-06,
      "loss": 0.0297,
      "step": 519360
    },
    {
      "epoch": 0.8499767613885562,
      "grad_norm": 2.8479578495025635,
      "learning_rate": 8.354737320690815e-06,
      "loss": 0.0287,
      "step": 519380
    },
    {
      "epoch": 0.8500094918272094,
      "grad_norm": 0.3155103623867035,
      "learning_rate": 8.354671428477297e-06,
      "loss": 0.0194,
      "step": 519400
    },
    {
      "epoch": 0.8500422222658628,
      "grad_norm": 0.7741295099258423,
      "learning_rate": 8.35460553626378e-06,
      "loss": 0.0281,
      "step": 519420
    },
    {
      "epoch": 0.8500749527045162,
      "grad_norm": 0.19775225222110748,
      "learning_rate": 8.354539644050264e-06,
      "loss": 0.0352,
      "step": 519440
    },
    {
      "epoch": 0.8501076831431695,
      "grad_norm": 0.8315355777740479,
      "learning_rate": 8.354473751836746e-06,
      "loss": 0.0338,
      "step": 519460
    },
    {
      "epoch": 0.8501404135818228,
      "grad_norm": 2.75016713142395,
      "learning_rate": 8.35440785962323e-06,
      "loss": 0.0297,
      "step": 519480
    },
    {
      "epoch": 0.8501731440204762,
      "grad_norm": 1.3251686096191406,
      "learning_rate": 8.354341967409712e-06,
      "loss": 0.0434,
      "step": 519500
    },
    {
      "epoch": 0.8502058744591295,
      "grad_norm": 0.7554593682289124,
      "learning_rate": 8.354276075196195e-06,
      "loss": 0.0216,
      "step": 519520
    },
    {
      "epoch": 0.8502386048977828,
      "grad_norm": 0.17153799533843994,
      "learning_rate": 8.354210182982677e-06,
      "loss": 0.0346,
      "step": 519540
    },
    {
      "epoch": 0.8502713353364362,
      "grad_norm": 0.8399378657341003,
      "learning_rate": 8.35414429076916e-06,
      "loss": 0.0284,
      "step": 519560
    },
    {
      "epoch": 0.8503040657750895,
      "grad_norm": 0.783891499042511,
      "learning_rate": 8.354078398555643e-06,
      "loss": 0.0328,
      "step": 519580
    },
    {
      "epoch": 0.8503367962137428,
      "grad_norm": 2.1697070598602295,
      "learning_rate": 8.354012506342126e-06,
      "loss": 0.0251,
      "step": 519600
    },
    {
      "epoch": 0.8503695266523962,
      "grad_norm": 0.8746739029884338,
      "learning_rate": 8.35394661412861e-06,
      "loss": 0.0299,
      "step": 519620
    },
    {
      "epoch": 0.8504022570910496,
      "grad_norm": 0.5441682934761047,
      "learning_rate": 8.353880721915092e-06,
      "loss": 0.0247,
      "step": 519640
    },
    {
      "epoch": 0.8504349875297029,
      "grad_norm": 1.787545084953308,
      "learning_rate": 8.353814829701575e-06,
      "loss": 0.0398,
      "step": 519660
    },
    {
      "epoch": 0.8504677179683562,
      "grad_norm": 1.6744130849838257,
      "learning_rate": 8.353748937488057e-06,
      "loss": 0.0284,
      "step": 519680
    },
    {
      "epoch": 0.8505004484070096,
      "grad_norm": 6.272958278656006,
      "learning_rate": 8.35368304527454e-06,
      "loss": 0.0237,
      "step": 519700
    },
    {
      "epoch": 0.8505331788456629,
      "grad_norm": 1.097347378730774,
      "learning_rate": 8.353617153061023e-06,
      "loss": 0.0258,
      "step": 519720
    },
    {
      "epoch": 0.8505659092843162,
      "grad_norm": 0.4378737807273865,
      "learning_rate": 8.353551260847506e-06,
      "loss": 0.0358,
      "step": 519740
    },
    {
      "epoch": 0.8505986397229696,
      "grad_norm": 1.2244818210601807,
      "learning_rate": 8.35348536863399e-06,
      "loss": 0.0299,
      "step": 519760
    },
    {
      "epoch": 0.8506313701616229,
      "grad_norm": 0.31827306747436523,
      "learning_rate": 8.353419476420472e-06,
      "loss": 0.0336,
      "step": 519780
    },
    {
      "epoch": 0.8506641006002762,
      "grad_norm": 1.054507851600647,
      "learning_rate": 8.353353584206955e-06,
      "loss": 0.0331,
      "step": 519800
    },
    {
      "epoch": 0.8506968310389296,
      "grad_norm": 1.3440245389938354,
      "learning_rate": 8.353287691993439e-06,
      "loss": 0.0316,
      "step": 519820
    },
    {
      "epoch": 0.8507295614775829,
      "grad_norm": 0.6273605823516846,
      "learning_rate": 8.35322179977992e-06,
      "loss": 0.0358,
      "step": 519840
    },
    {
      "epoch": 0.8507622919162363,
      "grad_norm": 1.6236759424209595,
      "learning_rate": 8.353155907566404e-06,
      "loss": 0.0272,
      "step": 519860
    },
    {
      "epoch": 0.8507950223548896,
      "grad_norm": 2.385495901107788,
      "learning_rate": 8.353090015352886e-06,
      "loss": 0.0377,
      "step": 519880
    },
    {
      "epoch": 0.850827752793543,
      "grad_norm": 0.6499466896057129,
      "learning_rate": 8.35302412313937e-06,
      "loss": 0.0303,
      "step": 519900
    },
    {
      "epoch": 0.8508604832321963,
      "grad_norm": 1.420991063117981,
      "learning_rate": 8.352958230925852e-06,
      "loss": 0.0239,
      "step": 519920
    },
    {
      "epoch": 0.8508932136708496,
      "grad_norm": 4.118956565856934,
      "learning_rate": 8.352892338712335e-06,
      "loss": 0.0303,
      "step": 519940
    },
    {
      "epoch": 0.850925944109503,
      "grad_norm": 3.0486485958099365,
      "learning_rate": 8.352826446498819e-06,
      "loss": 0.032,
      "step": 519960
    },
    {
      "epoch": 0.8509586745481563,
      "grad_norm": 1.6406352519989014,
      "learning_rate": 8.3527605542853e-06,
      "loss": 0.0351,
      "step": 519980
    },
    {
      "epoch": 0.8509914049868096,
      "grad_norm": 1.6295180320739746,
      "learning_rate": 8.352694662071784e-06,
      "loss": 0.0412,
      "step": 520000
    },
    {
      "epoch": 0.851024135425463,
      "grad_norm": 1.807271122932434,
      "learning_rate": 8.352628769858266e-06,
      "loss": 0.032,
      "step": 520020
    },
    {
      "epoch": 0.8510568658641163,
      "grad_norm": 1.557310938835144,
      "learning_rate": 8.35256287764475e-06,
      "loss": 0.0514,
      "step": 520040
    },
    {
      "epoch": 0.8510895963027697,
      "grad_norm": 0.5417903065681458,
      "learning_rate": 8.352496985431232e-06,
      "loss": 0.0225,
      "step": 520060
    },
    {
      "epoch": 0.851122326741423,
      "grad_norm": 0.3043022155761719,
      "learning_rate": 8.352431093217715e-06,
      "loss": 0.025,
      "step": 520080
    },
    {
      "epoch": 0.8511550571800763,
      "grad_norm": 0.2947092652320862,
      "learning_rate": 8.352365201004197e-06,
      "loss": 0.0347,
      "step": 520100
    },
    {
      "epoch": 0.8511877876187297,
      "grad_norm": 0.7859086394309998,
      "learning_rate": 8.35229930879068e-06,
      "loss": 0.0241,
      "step": 520120
    },
    {
      "epoch": 0.851220518057383,
      "grad_norm": 0.5388548970222473,
      "learning_rate": 8.352233416577163e-06,
      "loss": 0.0338,
      "step": 520140
    },
    {
      "epoch": 0.8512532484960363,
      "grad_norm": 0.2870525121688843,
      "learning_rate": 8.352167524363646e-06,
      "loss": 0.0262,
      "step": 520160
    },
    {
      "epoch": 0.8512859789346897,
      "grad_norm": 1.3180100917816162,
      "learning_rate": 8.35210163215013e-06,
      "loss": 0.028,
      "step": 520180
    },
    {
      "epoch": 0.851318709373343,
      "grad_norm": 1.696759819984436,
      "learning_rate": 8.352035739936612e-06,
      "loss": 0.0301,
      "step": 520200
    },
    {
      "epoch": 0.8513514398119963,
      "grad_norm": 0.7150468826293945,
      "learning_rate": 8.351969847723095e-06,
      "loss": 0.0299,
      "step": 520220
    },
    {
      "epoch": 0.8513841702506497,
      "grad_norm": 0.18483474850654602,
      "learning_rate": 8.351903955509579e-06,
      "loss": 0.0336,
      "step": 520240
    },
    {
      "epoch": 0.8514169006893031,
      "grad_norm": 1.2908741235733032,
      "learning_rate": 8.35183806329606e-06,
      "loss": 0.0255,
      "step": 520260
    },
    {
      "epoch": 0.8514496311279564,
      "grad_norm": 0.6046862006187439,
      "learning_rate": 8.351772171082544e-06,
      "loss": 0.0293,
      "step": 520280
    },
    {
      "epoch": 0.8514823615666097,
      "grad_norm": 0.5145148634910583,
      "learning_rate": 8.351706278869026e-06,
      "loss": 0.03,
      "step": 520300
    },
    {
      "epoch": 0.8515150920052631,
      "grad_norm": 0.8156492114067078,
      "learning_rate": 8.35164038665551e-06,
      "loss": 0.0327,
      "step": 520320
    },
    {
      "epoch": 0.8515478224439164,
      "grad_norm": 0.733110785484314,
      "learning_rate": 8.351574494441993e-06,
      "loss": 0.0291,
      "step": 520340
    },
    {
      "epoch": 0.8515805528825697,
      "grad_norm": 1.0204415321350098,
      "learning_rate": 8.351508602228475e-06,
      "loss": 0.0311,
      "step": 520360
    },
    {
      "epoch": 0.8516132833212231,
      "grad_norm": 1.0715714693069458,
      "learning_rate": 8.351442710014959e-06,
      "loss": 0.0343,
      "step": 520380
    },
    {
      "epoch": 0.8516460137598764,
      "grad_norm": 0.6444008946418762,
      "learning_rate": 8.35137681780144e-06,
      "loss": 0.0252,
      "step": 520400
    },
    {
      "epoch": 0.8516787441985297,
      "grad_norm": 0.34768444299697876,
      "learning_rate": 8.351310925587924e-06,
      "loss": 0.0376,
      "step": 520420
    },
    {
      "epoch": 0.8517114746371831,
      "grad_norm": 6.693332195281982,
      "learning_rate": 8.351245033374406e-06,
      "loss": 0.0293,
      "step": 520440
    },
    {
      "epoch": 0.8517442050758365,
      "grad_norm": 0.881140410900116,
      "learning_rate": 8.35117914116089e-06,
      "loss": 0.0154,
      "step": 520460
    },
    {
      "epoch": 0.8517769355144897,
      "grad_norm": 1.394310474395752,
      "learning_rate": 8.351113248947372e-06,
      "loss": 0.0262,
      "step": 520480
    },
    {
      "epoch": 0.8518096659531431,
      "grad_norm": 0.7451164126396179,
      "learning_rate": 8.351047356733855e-06,
      "loss": 0.0277,
      "step": 520500
    },
    {
      "epoch": 0.8518423963917965,
      "grad_norm": 0.7346168160438538,
      "learning_rate": 8.350981464520337e-06,
      "loss": 0.0279,
      "step": 520520
    },
    {
      "epoch": 0.8518751268304497,
      "grad_norm": 0.6296168565750122,
      "learning_rate": 8.350915572306821e-06,
      "loss": 0.0276,
      "step": 520540
    },
    {
      "epoch": 0.8519078572691031,
      "grad_norm": 0.6085690259933472,
      "learning_rate": 8.350849680093304e-06,
      "loss": 0.0343,
      "step": 520560
    },
    {
      "epoch": 0.8519405877077565,
      "grad_norm": 1.7801425457000732,
      "learning_rate": 8.350783787879786e-06,
      "loss": 0.0246,
      "step": 520580
    },
    {
      "epoch": 0.8519733181464098,
      "grad_norm": 2.334533452987671,
      "learning_rate": 8.35071789566627e-06,
      "loss": 0.0237,
      "step": 520600
    },
    {
      "epoch": 0.8520060485850631,
      "grad_norm": 0.6691074371337891,
      "learning_rate": 8.350652003452754e-06,
      "loss": 0.0373,
      "step": 520620
    },
    {
      "epoch": 0.8520387790237165,
      "grad_norm": 3.47991681098938,
      "learning_rate": 8.350586111239235e-06,
      "loss": 0.0314,
      "step": 520640
    },
    {
      "epoch": 0.8520715094623699,
      "grad_norm": 1.198500394821167,
      "learning_rate": 8.350520219025719e-06,
      "loss": 0.0311,
      "step": 520660
    },
    {
      "epoch": 0.8521042399010231,
      "grad_norm": 1.196398377418518,
      "learning_rate": 8.350454326812203e-06,
      "loss": 0.0289,
      "step": 520680
    },
    {
      "epoch": 0.8521369703396765,
      "grad_norm": 0.6615079641342163,
      "learning_rate": 8.350388434598684e-06,
      "loss": 0.0288,
      "step": 520700
    },
    {
      "epoch": 0.8521697007783299,
      "grad_norm": 1.9995442628860474,
      "learning_rate": 8.350322542385168e-06,
      "loss": 0.0329,
      "step": 520720
    },
    {
      "epoch": 0.8522024312169831,
      "grad_norm": 0.8266438841819763,
      "learning_rate": 8.35025665017165e-06,
      "loss": 0.0292,
      "step": 520740
    },
    {
      "epoch": 0.8522351616556365,
      "grad_norm": 0.7172071933746338,
      "learning_rate": 8.350190757958134e-06,
      "loss": 0.0454,
      "step": 520760
    },
    {
      "epoch": 0.8522678920942899,
      "grad_norm": 0.3048848509788513,
      "learning_rate": 8.350124865744615e-06,
      "loss": 0.0294,
      "step": 520780
    },
    {
      "epoch": 0.8523006225329431,
      "grad_norm": 0.5075138807296753,
      "learning_rate": 8.350058973531099e-06,
      "loss": 0.0337,
      "step": 520800
    },
    {
      "epoch": 0.8523333529715965,
      "grad_norm": 0.8913995623588562,
      "learning_rate": 8.349993081317581e-06,
      "loss": 0.036,
      "step": 520820
    },
    {
      "epoch": 0.8523660834102499,
      "grad_norm": 1.4603825807571411,
      "learning_rate": 8.349927189104064e-06,
      "loss": 0.0205,
      "step": 520840
    },
    {
      "epoch": 0.8523988138489033,
      "grad_norm": 0.559736430644989,
      "learning_rate": 8.349861296890546e-06,
      "loss": 0.0285,
      "step": 520860
    },
    {
      "epoch": 0.8524315442875565,
      "grad_norm": 0.4875245988368988,
      "learning_rate": 8.34979540467703e-06,
      "loss": 0.0376,
      "step": 520880
    },
    {
      "epoch": 0.8524642747262099,
      "grad_norm": 1.8067690134048462,
      "learning_rate": 8.349729512463512e-06,
      "loss": 0.0367,
      "step": 520900
    },
    {
      "epoch": 0.8524970051648633,
      "grad_norm": 0.9379950761795044,
      "learning_rate": 8.349663620249995e-06,
      "loss": 0.0242,
      "step": 520920
    },
    {
      "epoch": 0.8525297356035165,
      "grad_norm": 0.37242013216018677,
      "learning_rate": 8.349597728036477e-06,
      "loss": 0.0207,
      "step": 520940
    },
    {
      "epoch": 0.8525624660421699,
      "grad_norm": 0.8412789106369019,
      "learning_rate": 8.349531835822961e-06,
      "loss": 0.0198,
      "step": 520960
    },
    {
      "epoch": 0.8525951964808233,
      "grad_norm": 0.8896047472953796,
      "learning_rate": 8.349465943609445e-06,
      "loss": 0.024,
      "step": 520980
    },
    {
      "epoch": 0.8526279269194765,
      "grad_norm": 0.29131367802619934,
      "learning_rate": 8.349400051395926e-06,
      "loss": 0.0298,
      "step": 521000
    },
    {
      "epoch": 0.8526606573581299,
      "grad_norm": 0.4090788960456848,
      "learning_rate": 8.34933415918241e-06,
      "loss": 0.031,
      "step": 521020
    },
    {
      "epoch": 0.8526933877967833,
      "grad_norm": 1.2437739372253418,
      "learning_rate": 8.349268266968894e-06,
      "loss": 0.0364,
      "step": 521040
    },
    {
      "epoch": 0.8527261182354365,
      "grad_norm": 0.49096816778182983,
      "learning_rate": 8.349202374755375e-06,
      "loss": 0.0243,
      "step": 521060
    },
    {
      "epoch": 0.8527588486740899,
      "grad_norm": 0.8220692873001099,
      "learning_rate": 8.349136482541859e-06,
      "loss": 0.0356,
      "step": 521080
    },
    {
      "epoch": 0.8527915791127433,
      "grad_norm": 0.8119723200798035,
      "learning_rate": 8.349070590328343e-06,
      "loss": 0.024,
      "step": 521100
    },
    {
      "epoch": 0.8528243095513967,
      "grad_norm": 0.4006507396697998,
      "learning_rate": 8.349004698114825e-06,
      "loss": 0.0227,
      "step": 521120
    },
    {
      "epoch": 0.8528570399900499,
      "grad_norm": 0.28652822971343994,
      "learning_rate": 8.348938805901308e-06,
      "loss": 0.0359,
      "step": 521140
    },
    {
      "epoch": 0.8528897704287033,
      "grad_norm": 0.3242397606372833,
      "learning_rate": 8.34887291368779e-06,
      "loss": 0.0289,
      "step": 521160
    },
    {
      "epoch": 0.8529225008673567,
      "grad_norm": 0.7426019906997681,
      "learning_rate": 8.348807021474274e-06,
      "loss": 0.0324,
      "step": 521180
    },
    {
      "epoch": 0.8529552313060099,
      "grad_norm": 1.3950015306472778,
      "learning_rate": 8.348741129260756e-06,
      "loss": 0.0298,
      "step": 521200
    },
    {
      "epoch": 0.8529879617446633,
      "grad_norm": 0.39430752396583557,
      "learning_rate": 8.348675237047239e-06,
      "loss": 0.0267,
      "step": 521220
    },
    {
      "epoch": 0.8530206921833167,
      "grad_norm": 1.0783220529556274,
      "learning_rate": 8.348609344833721e-06,
      "loss": 0.0315,
      "step": 521240
    },
    {
      "epoch": 0.8530534226219699,
      "grad_norm": 0.7052239179611206,
      "learning_rate": 8.348543452620205e-06,
      "loss": 0.0256,
      "step": 521260
    },
    {
      "epoch": 0.8530861530606233,
      "grad_norm": 1.825648307800293,
      "learning_rate": 8.348477560406686e-06,
      "loss": 0.0302,
      "step": 521280
    },
    {
      "epoch": 0.8531188834992767,
      "grad_norm": 1.001341700553894,
      "learning_rate": 8.34841166819317e-06,
      "loss": 0.0304,
      "step": 521300
    },
    {
      "epoch": 0.85315161393793,
      "grad_norm": 0.6808086633682251,
      "learning_rate": 8.348345775979652e-06,
      "loss": 0.0252,
      "step": 521320
    },
    {
      "epoch": 0.8531843443765833,
      "grad_norm": 1.3993710279464722,
      "learning_rate": 8.348279883766136e-06,
      "loss": 0.0288,
      "step": 521340
    },
    {
      "epoch": 0.8532170748152367,
      "grad_norm": 0.20946232974529266,
      "learning_rate": 8.348213991552619e-06,
      "loss": 0.0233,
      "step": 521360
    },
    {
      "epoch": 0.85324980525389,
      "grad_norm": 2.4114811420440674,
      "learning_rate": 8.348148099339101e-06,
      "loss": 0.028,
      "step": 521380
    },
    {
      "epoch": 0.8532825356925433,
      "grad_norm": 0.7472014427185059,
      "learning_rate": 8.348082207125585e-06,
      "loss": 0.0181,
      "step": 521400
    },
    {
      "epoch": 0.8533152661311967,
      "grad_norm": 0.971432626247406,
      "learning_rate": 8.348016314912068e-06,
      "loss": 0.0193,
      "step": 521420
    },
    {
      "epoch": 0.85334799656985,
      "grad_norm": 2.335791826248169,
      "learning_rate": 8.34795042269855e-06,
      "loss": 0.0294,
      "step": 521440
    },
    {
      "epoch": 0.8533807270085033,
      "grad_norm": 0.6597740650177002,
      "learning_rate": 8.347884530485034e-06,
      "loss": 0.0252,
      "step": 521460
    },
    {
      "epoch": 0.8534134574471567,
      "grad_norm": 0.40358179807662964,
      "learning_rate": 8.347818638271517e-06,
      "loss": 0.03,
      "step": 521480
    },
    {
      "epoch": 0.8534461878858101,
      "grad_norm": 0.9474145770072937,
      "learning_rate": 8.347752746057999e-06,
      "loss": 0.0246,
      "step": 521500
    },
    {
      "epoch": 0.8534789183244634,
      "grad_norm": 0.31499814987182617,
      "learning_rate": 8.347686853844483e-06,
      "loss": 0.0216,
      "step": 521520
    },
    {
      "epoch": 0.8535116487631167,
      "grad_norm": 0.48617789149284363,
      "learning_rate": 8.347620961630965e-06,
      "loss": 0.0275,
      "step": 521540
    },
    {
      "epoch": 0.8535443792017701,
      "grad_norm": 0.7302330136299133,
      "learning_rate": 8.347555069417448e-06,
      "loss": 0.0208,
      "step": 521560
    },
    {
      "epoch": 0.8535771096404234,
      "grad_norm": 3.9085240364074707,
      "learning_rate": 8.34748917720393e-06,
      "loss": 0.0318,
      "step": 521580
    },
    {
      "epoch": 0.8536098400790767,
      "grad_norm": 0.9344837069511414,
      "learning_rate": 8.347423284990414e-06,
      "loss": 0.0238,
      "step": 521600
    },
    {
      "epoch": 0.8536425705177301,
      "grad_norm": 0.060581471771001816,
      "learning_rate": 8.347357392776896e-06,
      "loss": 0.0279,
      "step": 521620
    },
    {
      "epoch": 0.8536753009563834,
      "grad_norm": 1.6488111019134521,
      "learning_rate": 8.34729150056338e-06,
      "loss": 0.0247,
      "step": 521640
    },
    {
      "epoch": 0.8537080313950367,
      "grad_norm": 0.71905517578125,
      "learning_rate": 8.347225608349861e-06,
      "loss": 0.0264,
      "step": 521660
    },
    {
      "epoch": 0.8537407618336901,
      "grad_norm": 0.5704670548439026,
      "learning_rate": 8.347159716136345e-06,
      "loss": 0.0338,
      "step": 521680
    },
    {
      "epoch": 0.8537734922723434,
      "grad_norm": 1.4902551174163818,
      "learning_rate": 8.347093823922827e-06,
      "loss": 0.0281,
      "step": 521700
    },
    {
      "epoch": 0.8538062227109968,
      "grad_norm": 1.24405837059021,
      "learning_rate": 8.34702793170931e-06,
      "loss": 0.0326,
      "step": 521720
    },
    {
      "epoch": 0.8538389531496501,
      "grad_norm": 1.6024450063705444,
      "learning_rate": 8.346962039495794e-06,
      "loss": 0.0234,
      "step": 521740
    },
    {
      "epoch": 0.8538716835883035,
      "grad_norm": 1.0490275621414185,
      "learning_rate": 8.346896147282276e-06,
      "loss": 0.0383,
      "step": 521760
    },
    {
      "epoch": 0.8539044140269568,
      "grad_norm": 0.5555176138877869,
      "learning_rate": 8.34683025506876e-06,
      "loss": 0.032,
      "step": 521780
    },
    {
      "epoch": 0.8539371444656101,
      "grad_norm": 0.25623852014541626,
      "learning_rate": 8.346764362855243e-06,
      "loss": 0.0312,
      "step": 521800
    },
    {
      "epoch": 0.8539698749042635,
      "grad_norm": 0.5169591307640076,
      "learning_rate": 8.346698470641725e-06,
      "loss": 0.0339,
      "step": 521820
    },
    {
      "epoch": 0.8540026053429168,
      "grad_norm": 0.9219512939453125,
      "learning_rate": 8.346632578428208e-06,
      "loss": 0.0302,
      "step": 521840
    },
    {
      "epoch": 0.8540353357815701,
      "grad_norm": 0.5423965454101562,
      "learning_rate": 8.346566686214692e-06,
      "loss": 0.0296,
      "step": 521860
    },
    {
      "epoch": 0.8540680662202235,
      "grad_norm": 1.2178248167037964,
      "learning_rate": 8.346500794001174e-06,
      "loss": 0.0333,
      "step": 521880
    },
    {
      "epoch": 0.8541007966588768,
      "grad_norm": 0.3685883581638336,
      "learning_rate": 8.346434901787657e-06,
      "loss": 0.0318,
      "step": 521900
    },
    {
      "epoch": 0.8541335270975302,
      "grad_norm": 1.0177464485168457,
      "learning_rate": 8.34636900957414e-06,
      "loss": 0.0194,
      "step": 521920
    },
    {
      "epoch": 0.8541662575361835,
      "grad_norm": 1.9398921728134155,
      "learning_rate": 8.346303117360623e-06,
      "loss": 0.0276,
      "step": 521940
    },
    {
      "epoch": 0.8541989879748368,
      "grad_norm": 1.5261144638061523,
      "learning_rate": 8.346237225147105e-06,
      "loss": 0.049,
      "step": 521960
    },
    {
      "epoch": 0.8542317184134902,
      "grad_norm": 0.6886342167854309,
      "learning_rate": 8.346171332933588e-06,
      "loss": 0.0324,
      "step": 521980
    },
    {
      "epoch": 0.8542644488521435,
      "grad_norm": 0.8466293215751648,
      "learning_rate": 8.34610544072007e-06,
      "loss": 0.0319,
      "step": 522000
    },
    {
      "epoch": 0.8542971792907968,
      "grad_norm": 1.0634452104568481,
      "learning_rate": 8.346039548506554e-06,
      "loss": 0.0249,
      "step": 522020
    },
    {
      "epoch": 0.8543299097294502,
      "grad_norm": 0.20872527360916138,
      "learning_rate": 8.345973656293036e-06,
      "loss": 0.0348,
      "step": 522040
    },
    {
      "epoch": 0.8543626401681035,
      "grad_norm": 0.23856289684772491,
      "learning_rate": 8.34590776407952e-06,
      "loss": 0.0323,
      "step": 522060
    },
    {
      "epoch": 0.8543953706067569,
      "grad_norm": 0.7804684638977051,
      "learning_rate": 8.345841871866003e-06,
      "loss": 0.0215,
      "step": 522080
    },
    {
      "epoch": 0.8544281010454102,
      "grad_norm": 0.9214285016059875,
      "learning_rate": 8.345775979652485e-06,
      "loss": 0.023,
      "step": 522100
    },
    {
      "epoch": 0.8544608314840636,
      "grad_norm": 0.8788492679595947,
      "learning_rate": 8.345710087438968e-06,
      "loss": 0.0252,
      "step": 522120
    },
    {
      "epoch": 0.8544935619227169,
      "grad_norm": 0.7240620851516724,
      "learning_rate": 8.34564419522545e-06,
      "loss": 0.0319,
      "step": 522140
    },
    {
      "epoch": 0.8545262923613702,
      "grad_norm": 0.9308972954750061,
      "learning_rate": 8.345578303011934e-06,
      "loss": 0.0214,
      "step": 522160
    },
    {
      "epoch": 0.8545590228000236,
      "grad_norm": 0.4585250914096832,
      "learning_rate": 8.345512410798416e-06,
      "loss": 0.0272,
      "step": 522180
    },
    {
      "epoch": 0.8545917532386769,
      "grad_norm": 1.0076974630355835,
      "learning_rate": 8.3454465185849e-06,
      "loss": 0.0299,
      "step": 522200
    },
    {
      "epoch": 0.8546244836773302,
      "grad_norm": 1.6182551383972168,
      "learning_rate": 8.345380626371383e-06,
      "loss": 0.0283,
      "step": 522220
    },
    {
      "epoch": 0.8546572141159836,
      "grad_norm": 0.20348294079303741,
      "learning_rate": 8.345314734157865e-06,
      "loss": 0.0286,
      "step": 522240
    },
    {
      "epoch": 0.8546899445546369,
      "grad_norm": 1.6574790477752686,
      "learning_rate": 8.345248841944348e-06,
      "loss": 0.0316,
      "step": 522260
    },
    {
      "epoch": 0.8547226749932902,
      "grad_norm": 0.6808732748031616,
      "learning_rate": 8.345182949730832e-06,
      "loss": 0.033,
      "step": 522280
    },
    {
      "epoch": 0.8547554054319436,
      "grad_norm": 1.3357691764831543,
      "learning_rate": 8.345117057517314e-06,
      "loss": 0.0301,
      "step": 522300
    },
    {
      "epoch": 0.854788135870597,
      "grad_norm": 0.9967658519744873,
      "learning_rate": 8.345051165303797e-06,
      "loss": 0.0381,
      "step": 522320
    },
    {
      "epoch": 0.8548208663092502,
      "grad_norm": 1.846235990524292,
      "learning_rate": 8.34498527309028e-06,
      "loss": 0.0293,
      "step": 522340
    },
    {
      "epoch": 0.8548535967479036,
      "grad_norm": 3.5829880237579346,
      "learning_rate": 8.344919380876763e-06,
      "loss": 0.0259,
      "step": 522360
    },
    {
      "epoch": 0.854886327186557,
      "grad_norm": 1.1904311180114746,
      "learning_rate": 8.344853488663245e-06,
      "loss": 0.0309,
      "step": 522380
    },
    {
      "epoch": 0.8549190576252103,
      "grad_norm": 1.5762170553207397,
      "learning_rate": 8.344787596449728e-06,
      "loss": 0.0332,
      "step": 522400
    },
    {
      "epoch": 0.8549517880638636,
      "grad_norm": 0.9025228023529053,
      "learning_rate": 8.344721704236212e-06,
      "loss": 0.0324,
      "step": 522420
    },
    {
      "epoch": 0.854984518502517,
      "grad_norm": 2.930678606033325,
      "learning_rate": 8.344655812022694e-06,
      "loss": 0.0257,
      "step": 522440
    },
    {
      "epoch": 0.8550172489411703,
      "grad_norm": 0.9965726137161255,
      "learning_rate": 8.344589919809177e-06,
      "loss": 0.0348,
      "step": 522460
    },
    {
      "epoch": 0.8550499793798236,
      "grad_norm": 0.7630769610404968,
      "learning_rate": 8.34452402759566e-06,
      "loss": 0.0251,
      "step": 522480
    },
    {
      "epoch": 0.855082709818477,
      "grad_norm": 4.81117057800293,
      "learning_rate": 8.344458135382143e-06,
      "loss": 0.0281,
      "step": 522500
    },
    {
      "epoch": 0.8551154402571304,
      "grad_norm": 0.5581778883934021,
      "learning_rate": 8.344392243168625e-06,
      "loss": 0.036,
      "step": 522520
    },
    {
      "epoch": 0.8551481706957836,
      "grad_norm": 0.3930973708629608,
      "learning_rate": 8.344326350955108e-06,
      "loss": 0.0181,
      "step": 522540
    },
    {
      "epoch": 0.855180901134437,
      "grad_norm": 0.5607267022132874,
      "learning_rate": 8.34426045874159e-06,
      "loss": 0.036,
      "step": 522560
    },
    {
      "epoch": 0.8552136315730904,
      "grad_norm": 2.118729591369629,
      "learning_rate": 8.344194566528074e-06,
      "loss": 0.0429,
      "step": 522580
    },
    {
      "epoch": 0.8552463620117436,
      "grad_norm": 1.389222502708435,
      "learning_rate": 8.344128674314557e-06,
      "loss": 0.0293,
      "step": 522600
    },
    {
      "epoch": 0.855279092450397,
      "grad_norm": 0.34619462490081787,
      "learning_rate": 8.34406278210104e-06,
      "loss": 0.0281,
      "step": 522620
    },
    {
      "epoch": 0.8553118228890504,
      "grad_norm": 0.2839195132255554,
      "learning_rate": 8.343996889887523e-06,
      "loss": 0.0221,
      "step": 522640
    },
    {
      "epoch": 0.8553445533277036,
      "grad_norm": 0.5436850190162659,
      "learning_rate": 8.343930997674007e-06,
      "loss": 0.0354,
      "step": 522660
    },
    {
      "epoch": 0.855377283766357,
      "grad_norm": 1.8524863719940186,
      "learning_rate": 8.343865105460488e-06,
      "loss": 0.0377,
      "step": 522680
    },
    {
      "epoch": 0.8554100142050104,
      "grad_norm": 1.4377846717834473,
      "learning_rate": 8.343799213246972e-06,
      "loss": 0.036,
      "step": 522700
    },
    {
      "epoch": 0.8554427446436638,
      "grad_norm": 0.7184144854545593,
      "learning_rate": 8.343733321033454e-06,
      "loss": 0.0253,
      "step": 522720
    },
    {
      "epoch": 0.855475475082317,
      "grad_norm": 0.7204140424728394,
      "learning_rate": 8.343667428819937e-06,
      "loss": 0.0313,
      "step": 522740
    },
    {
      "epoch": 0.8555082055209704,
      "grad_norm": 1.0894348621368408,
      "learning_rate": 8.34360153660642e-06,
      "loss": 0.0259,
      "step": 522760
    },
    {
      "epoch": 0.8555409359596238,
      "grad_norm": 0.5643147230148315,
      "learning_rate": 8.343535644392903e-06,
      "loss": 0.0295,
      "step": 522780
    },
    {
      "epoch": 0.855573666398277,
      "grad_norm": 0.7782388925552368,
      "learning_rate": 8.343469752179387e-06,
      "loss": 0.0295,
      "step": 522800
    },
    {
      "epoch": 0.8556063968369304,
      "grad_norm": 0.8032712936401367,
      "learning_rate": 8.343403859965868e-06,
      "loss": 0.0324,
      "step": 522820
    },
    {
      "epoch": 0.8556391272755838,
      "grad_norm": 2.6746368408203125,
      "learning_rate": 8.343337967752352e-06,
      "loss": 0.0362,
      "step": 522840
    },
    {
      "epoch": 0.855671857714237,
      "grad_norm": 0.6261796951293945,
      "learning_rate": 8.343272075538834e-06,
      "loss": 0.0355,
      "step": 522860
    },
    {
      "epoch": 0.8557045881528904,
      "grad_norm": 1.517557144165039,
      "learning_rate": 8.343206183325318e-06,
      "loss": 0.0242,
      "step": 522880
    },
    {
      "epoch": 0.8557373185915438,
      "grad_norm": 0.9216288924217224,
      "learning_rate": 8.3431402911118e-06,
      "loss": 0.031,
      "step": 522900
    },
    {
      "epoch": 0.8557700490301972,
      "grad_norm": 4.5249457359313965,
      "learning_rate": 8.343074398898283e-06,
      "loss": 0.0273,
      "step": 522920
    },
    {
      "epoch": 0.8558027794688504,
      "grad_norm": 0.5787938833236694,
      "learning_rate": 8.343008506684765e-06,
      "loss": 0.0252,
      "step": 522940
    },
    {
      "epoch": 0.8558355099075038,
      "grad_norm": 2.090376853942871,
      "learning_rate": 8.342942614471248e-06,
      "loss": 0.0353,
      "step": 522960
    },
    {
      "epoch": 0.8558682403461572,
      "grad_norm": 3.1130149364471436,
      "learning_rate": 8.34287672225773e-06,
      "loss": 0.0235,
      "step": 522980
    },
    {
      "epoch": 0.8559009707848104,
      "grad_norm": 0.07184718549251556,
      "learning_rate": 8.342810830044214e-06,
      "loss": 0.0215,
      "step": 523000
    },
    {
      "epoch": 0.8559337012234638,
      "grad_norm": 0.48453328013420105,
      "learning_rate": 8.342744937830698e-06,
      "loss": 0.0261,
      "step": 523020
    },
    {
      "epoch": 0.8559664316621172,
      "grad_norm": 4.183010578155518,
      "learning_rate": 8.34267904561718e-06,
      "loss": 0.0281,
      "step": 523040
    },
    {
      "epoch": 0.8559991621007704,
      "grad_norm": 1.4677685499191284,
      "learning_rate": 8.342613153403663e-06,
      "loss": 0.0198,
      "step": 523060
    },
    {
      "epoch": 0.8560318925394238,
      "grad_norm": 0.968522310256958,
      "learning_rate": 8.342547261190147e-06,
      "loss": 0.0346,
      "step": 523080
    },
    {
      "epoch": 0.8560646229780772,
      "grad_norm": 0.4064463973045349,
      "learning_rate": 8.342481368976628e-06,
      "loss": 0.0257,
      "step": 523100
    },
    {
      "epoch": 0.8560973534167305,
      "grad_norm": 0.29241275787353516,
      "learning_rate": 8.342415476763112e-06,
      "loss": 0.0298,
      "step": 523120
    },
    {
      "epoch": 0.8561300838553838,
      "grad_norm": 1.7788429260253906,
      "learning_rate": 8.342349584549596e-06,
      "loss": 0.0307,
      "step": 523140
    },
    {
      "epoch": 0.8561628142940372,
      "grad_norm": 2.3939895629882812,
      "learning_rate": 8.342283692336078e-06,
      "loss": 0.0348,
      "step": 523160
    },
    {
      "epoch": 0.8561955447326905,
      "grad_norm": 1.0886890888214111,
      "learning_rate": 8.342217800122561e-06,
      "loss": 0.0248,
      "step": 523180
    },
    {
      "epoch": 0.8562282751713438,
      "grad_norm": 0.9520276188850403,
      "learning_rate": 8.342151907909043e-06,
      "loss": 0.0237,
      "step": 523200
    },
    {
      "epoch": 0.8562610056099972,
      "grad_norm": 0.5609878301620483,
      "learning_rate": 8.342086015695527e-06,
      "loss": 0.0353,
      "step": 523220
    },
    {
      "epoch": 0.8562937360486506,
      "grad_norm": 0.5981550216674805,
      "learning_rate": 8.342020123482009e-06,
      "loss": 0.0281,
      "step": 523240
    },
    {
      "epoch": 0.8563264664873038,
      "grad_norm": 0.6004531383514404,
      "learning_rate": 8.341954231268492e-06,
      "loss": 0.0337,
      "step": 523260
    },
    {
      "epoch": 0.8563591969259572,
      "grad_norm": 0.6959572434425354,
      "learning_rate": 8.341888339054974e-06,
      "loss": 0.0292,
      "step": 523280
    },
    {
      "epoch": 0.8563919273646106,
      "grad_norm": 1.7034581899642944,
      "learning_rate": 8.341822446841458e-06,
      "loss": 0.024,
      "step": 523300
    },
    {
      "epoch": 0.8564246578032639,
      "grad_norm": 0.352072536945343,
      "learning_rate": 8.34175655462794e-06,
      "loss": 0.0289,
      "step": 523320
    },
    {
      "epoch": 0.8564573882419172,
      "grad_norm": 1.1752961874008179,
      "learning_rate": 8.341690662414423e-06,
      "loss": 0.039,
      "step": 523340
    },
    {
      "epoch": 0.8564901186805706,
      "grad_norm": 0.38423770666122437,
      "learning_rate": 8.341624770200905e-06,
      "loss": 0.0348,
      "step": 523360
    },
    {
      "epoch": 0.8565228491192239,
      "grad_norm": 0.4942771792411804,
      "learning_rate": 8.341558877987389e-06,
      "loss": 0.0227,
      "step": 523380
    },
    {
      "epoch": 0.8565555795578772,
      "grad_norm": 0.43620172142982483,
      "learning_rate": 8.341492985773872e-06,
      "loss": 0.0313,
      "step": 523400
    },
    {
      "epoch": 0.8565883099965306,
      "grad_norm": 0.8460902571678162,
      "learning_rate": 8.341427093560354e-06,
      "loss": 0.0318,
      "step": 523420
    },
    {
      "epoch": 0.856621040435184,
      "grad_norm": 0.40182292461395264,
      "learning_rate": 8.341361201346838e-06,
      "loss": 0.0287,
      "step": 523440
    },
    {
      "epoch": 0.8566537708738372,
      "grad_norm": 1.6095857620239258,
      "learning_rate": 8.341295309133321e-06,
      "loss": 0.041,
      "step": 523460
    },
    {
      "epoch": 0.8566865013124906,
      "grad_norm": 0.5019539594650269,
      "learning_rate": 8.341229416919803e-06,
      "loss": 0.0273,
      "step": 523480
    },
    {
      "epoch": 0.856719231751144,
      "grad_norm": 0.5170872807502747,
      "learning_rate": 8.341163524706287e-06,
      "loss": 0.0345,
      "step": 523500
    },
    {
      "epoch": 0.8567519621897973,
      "grad_norm": 0.311374306678772,
      "learning_rate": 8.34109763249277e-06,
      "loss": 0.0326,
      "step": 523520
    },
    {
      "epoch": 0.8567846926284506,
      "grad_norm": 1.1331804990768433,
      "learning_rate": 8.341031740279252e-06,
      "loss": 0.0278,
      "step": 523540
    },
    {
      "epoch": 0.856817423067104,
      "grad_norm": 2.323254346847534,
      "learning_rate": 8.340965848065736e-06,
      "loss": 0.0272,
      "step": 523560
    },
    {
      "epoch": 0.8568501535057573,
      "grad_norm": 2.0352914333343506,
      "learning_rate": 8.340899955852218e-06,
      "loss": 0.0426,
      "step": 523580
    },
    {
      "epoch": 0.8568828839444106,
      "grad_norm": 1.818787932395935,
      "learning_rate": 8.340834063638701e-06,
      "loss": 0.0307,
      "step": 523600
    },
    {
      "epoch": 0.856915614383064,
      "grad_norm": 1.676322102546692,
      "learning_rate": 8.340768171425183e-06,
      "loss": 0.0239,
      "step": 523620
    },
    {
      "epoch": 0.8569483448217173,
      "grad_norm": 1.0540366172790527,
      "learning_rate": 8.340702279211667e-06,
      "loss": 0.0332,
      "step": 523640
    },
    {
      "epoch": 0.8569810752603706,
      "grad_norm": 0.7160435914993286,
      "learning_rate": 8.340636386998149e-06,
      "loss": 0.0337,
      "step": 523660
    },
    {
      "epoch": 0.857013805699024,
      "grad_norm": 0.5145549774169922,
      "learning_rate": 8.340570494784632e-06,
      "loss": 0.0274,
      "step": 523680
    },
    {
      "epoch": 0.8570465361376773,
      "grad_norm": 0.3722764849662781,
      "learning_rate": 8.340504602571114e-06,
      "loss": 0.0313,
      "step": 523700
    },
    {
      "epoch": 0.8570792665763306,
      "grad_norm": 0.7150528430938721,
      "learning_rate": 8.340438710357598e-06,
      "loss": 0.0415,
      "step": 523720
    },
    {
      "epoch": 0.857111997014984,
      "grad_norm": 1.2490794658660889,
      "learning_rate": 8.34037281814408e-06,
      "loss": 0.0383,
      "step": 523740
    },
    {
      "epoch": 0.8571447274536373,
      "grad_norm": 0.4427542984485626,
      "learning_rate": 8.340306925930563e-06,
      "loss": 0.0269,
      "step": 523760
    },
    {
      "epoch": 0.8571774578922907,
      "grad_norm": 0.6079940795898438,
      "learning_rate": 8.340241033717045e-06,
      "loss": 0.0275,
      "step": 523780
    },
    {
      "epoch": 0.857210188330944,
      "grad_norm": 0.5627396106719971,
      "learning_rate": 8.340175141503529e-06,
      "loss": 0.0211,
      "step": 523800
    },
    {
      "epoch": 0.8572429187695974,
      "grad_norm": 2.5391063690185547,
      "learning_rate": 8.340109249290012e-06,
      "loss": 0.0251,
      "step": 523820
    },
    {
      "epoch": 0.8572756492082507,
      "grad_norm": 3.854978561401367,
      "learning_rate": 8.340043357076494e-06,
      "loss": 0.029,
      "step": 523840
    },
    {
      "epoch": 0.857308379646904,
      "grad_norm": 1.8563021421432495,
      "learning_rate": 8.339977464862978e-06,
      "loss": 0.0238,
      "step": 523860
    },
    {
      "epoch": 0.8573411100855574,
      "grad_norm": 1.2787188291549683,
      "learning_rate": 8.339911572649461e-06,
      "loss": 0.0358,
      "step": 523880
    },
    {
      "epoch": 0.8573738405242107,
      "grad_norm": 1.0306730270385742,
      "learning_rate": 8.339845680435943e-06,
      "loss": 0.0318,
      "step": 523900
    },
    {
      "epoch": 0.857406570962864,
      "grad_norm": 1.6611073017120361,
      "learning_rate": 8.339779788222427e-06,
      "loss": 0.0318,
      "step": 523920
    },
    {
      "epoch": 0.8574393014015174,
      "grad_norm": 0.9954965710639954,
      "learning_rate": 8.33971389600891e-06,
      "loss": 0.0211,
      "step": 523940
    },
    {
      "epoch": 0.8574720318401707,
      "grad_norm": 0.3827931582927704,
      "learning_rate": 8.339648003795392e-06,
      "loss": 0.0326,
      "step": 523960
    },
    {
      "epoch": 0.8575047622788241,
      "grad_norm": 1.0357894897460938,
      "learning_rate": 8.339582111581876e-06,
      "loss": 0.0333,
      "step": 523980
    },
    {
      "epoch": 0.8575374927174774,
      "grad_norm": 1.7176772356033325,
      "learning_rate": 8.339516219368358e-06,
      "loss": 0.0332,
      "step": 524000
    },
    {
      "epoch": 0.8575702231561307,
      "grad_norm": 1.3256479501724243,
      "learning_rate": 8.339450327154841e-06,
      "loss": 0.0389,
      "step": 524020
    },
    {
      "epoch": 0.8576029535947841,
      "grad_norm": 2.2394957542419434,
      "learning_rate": 8.339384434941323e-06,
      "loss": 0.0332,
      "step": 524040
    },
    {
      "epoch": 0.8576356840334374,
      "grad_norm": 1.6046706438064575,
      "learning_rate": 8.339318542727807e-06,
      "loss": 0.0363,
      "step": 524060
    },
    {
      "epoch": 0.8576684144720907,
      "grad_norm": 1.1888757944107056,
      "learning_rate": 8.339252650514289e-06,
      "loss": 0.0316,
      "step": 524080
    },
    {
      "epoch": 0.8577011449107441,
      "grad_norm": 0.40828225016593933,
      "learning_rate": 8.339186758300772e-06,
      "loss": 0.0256,
      "step": 524100
    },
    {
      "epoch": 0.8577338753493974,
      "grad_norm": 2.1337528228759766,
      "learning_rate": 8.339120866087254e-06,
      "loss": 0.0326,
      "step": 524120
    },
    {
      "epoch": 0.8577666057880508,
      "grad_norm": 0.6043186783790588,
      "learning_rate": 8.339054973873738e-06,
      "loss": 0.0168,
      "step": 524140
    },
    {
      "epoch": 0.8577993362267041,
      "grad_norm": 1.0922499895095825,
      "learning_rate": 8.33898908166022e-06,
      "loss": 0.0303,
      "step": 524160
    },
    {
      "epoch": 0.8578320666653575,
      "grad_norm": 1.404451608657837,
      "learning_rate": 8.338923189446703e-06,
      "loss": 0.0275,
      "step": 524180
    },
    {
      "epoch": 0.8578647971040108,
      "grad_norm": 0.4141198694705963,
      "learning_rate": 8.338857297233187e-06,
      "loss": 0.0296,
      "step": 524200
    },
    {
      "epoch": 0.8578975275426641,
      "grad_norm": 1.2560173273086548,
      "learning_rate": 8.338791405019669e-06,
      "loss": 0.0379,
      "step": 524220
    },
    {
      "epoch": 0.8579302579813175,
      "grad_norm": 3.294482469558716,
      "learning_rate": 8.338725512806152e-06,
      "loss": 0.0291,
      "step": 524240
    },
    {
      "epoch": 0.8579629884199708,
      "grad_norm": 0.5929545760154724,
      "learning_rate": 8.338659620592636e-06,
      "loss": 0.0266,
      "step": 524260
    },
    {
      "epoch": 0.8579957188586241,
      "grad_norm": 0.2357717603445053,
      "learning_rate": 8.338593728379118e-06,
      "loss": 0.0367,
      "step": 524280
    },
    {
      "epoch": 0.8580284492972775,
      "grad_norm": 0.8526532649993896,
      "learning_rate": 8.338527836165601e-06,
      "loss": 0.034,
      "step": 524300
    },
    {
      "epoch": 0.8580611797359308,
      "grad_norm": 0.7467694282531738,
      "learning_rate": 8.338461943952085e-06,
      "loss": 0.0265,
      "step": 524320
    },
    {
      "epoch": 0.8580939101745841,
      "grad_norm": 0.6689186096191406,
      "learning_rate": 8.338396051738567e-06,
      "loss": 0.0398,
      "step": 524340
    },
    {
      "epoch": 0.8581266406132375,
      "grad_norm": 1.5505263805389404,
      "learning_rate": 8.33833015952505e-06,
      "loss": 0.0415,
      "step": 524360
    },
    {
      "epoch": 0.8581593710518909,
      "grad_norm": 2.088951826095581,
      "learning_rate": 8.338264267311532e-06,
      "loss": 0.0331,
      "step": 524380
    },
    {
      "epoch": 0.8581921014905441,
      "grad_norm": 0.4220389425754547,
      "learning_rate": 8.338198375098016e-06,
      "loss": 0.0257,
      "step": 524400
    },
    {
      "epoch": 0.8582248319291975,
      "grad_norm": 0.33967912197113037,
      "learning_rate": 8.338132482884498e-06,
      "loss": 0.0258,
      "step": 524420
    },
    {
      "epoch": 0.8582575623678509,
      "grad_norm": 1.0403296947479248,
      "learning_rate": 8.338066590670981e-06,
      "loss": 0.0331,
      "step": 524440
    },
    {
      "epoch": 0.8582902928065042,
      "grad_norm": 0.3189518451690674,
      "learning_rate": 8.338000698457463e-06,
      "loss": 0.0285,
      "step": 524460
    },
    {
      "epoch": 0.8583230232451575,
      "grad_norm": 1.0687764883041382,
      "learning_rate": 8.337934806243947e-06,
      "loss": 0.0258,
      "step": 524480
    },
    {
      "epoch": 0.8583557536838109,
      "grad_norm": 0.23790785670280457,
      "learning_rate": 8.337868914030429e-06,
      "loss": 0.0271,
      "step": 524500
    },
    {
      "epoch": 0.8583884841224642,
      "grad_norm": 1.5828113555908203,
      "learning_rate": 8.337803021816912e-06,
      "loss": 0.0273,
      "step": 524520
    },
    {
      "epoch": 0.8584212145611175,
      "grad_norm": 0.6646261811256409,
      "learning_rate": 8.337737129603396e-06,
      "loss": 0.0281,
      "step": 524540
    },
    {
      "epoch": 0.8584539449997709,
      "grad_norm": 1.2376664876937866,
      "learning_rate": 8.337671237389878e-06,
      "loss": 0.0287,
      "step": 524560
    },
    {
      "epoch": 0.8584866754384243,
      "grad_norm": 0.278017520904541,
      "learning_rate": 8.337605345176361e-06,
      "loss": 0.0316,
      "step": 524580
    },
    {
      "epoch": 0.8585194058770775,
      "grad_norm": NaN,
      "learning_rate": 8.337539452962843e-06,
      "loss": 0.0323,
      "step": 524600
    },
    {
      "epoch": 0.8585521363157309,
      "grad_norm": 0.7680651545524597,
      "learning_rate": 8.337473560749327e-06,
      "loss": 0.026,
      "step": 524620
    },
    {
      "epoch": 0.8585848667543843,
      "grad_norm": 0.713884711265564,
      "learning_rate": 8.33740766853581e-06,
      "loss": 0.0322,
      "step": 524640
    },
    {
      "epoch": 0.8586175971930375,
      "grad_norm": 1.464001178741455,
      "learning_rate": 8.337341776322292e-06,
      "loss": 0.0245,
      "step": 524660
    },
    {
      "epoch": 0.8586503276316909,
      "grad_norm": 0.16129930317401886,
      "learning_rate": 8.337275884108776e-06,
      "loss": 0.0293,
      "step": 524680
    },
    {
      "epoch": 0.8586830580703443,
      "grad_norm": 0.833046019077301,
      "learning_rate": 8.33720999189526e-06,
      "loss": 0.0314,
      "step": 524700
    },
    {
      "epoch": 0.8587157885089975,
      "grad_norm": 1.098626732826233,
      "learning_rate": 8.337144099681741e-06,
      "loss": 0.0246,
      "step": 524720
    },
    {
      "epoch": 0.8587485189476509,
      "grad_norm": 1.027295708656311,
      "learning_rate": 8.337078207468225e-06,
      "loss": 0.0408,
      "step": 524740
    },
    {
      "epoch": 0.8587812493863043,
      "grad_norm": 1.220577597618103,
      "learning_rate": 8.337012315254707e-06,
      "loss": 0.0319,
      "step": 524760
    },
    {
      "epoch": 0.8588139798249577,
      "grad_norm": 0.8272277116775513,
      "learning_rate": 8.33694642304119e-06,
      "loss": 0.0286,
      "step": 524780
    },
    {
      "epoch": 0.8588467102636109,
      "grad_norm": 0.949062705039978,
      "learning_rate": 8.336880530827672e-06,
      "loss": 0.0418,
      "step": 524800
    },
    {
      "epoch": 0.8588794407022643,
      "grad_norm": 0.609041154384613,
      "learning_rate": 8.336814638614156e-06,
      "loss": 0.0266,
      "step": 524820
    },
    {
      "epoch": 0.8589121711409177,
      "grad_norm": 0.793178141117096,
      "learning_rate": 8.336748746400638e-06,
      "loss": 0.0281,
      "step": 524840
    },
    {
      "epoch": 0.8589449015795709,
      "grad_norm": 0.6110968589782715,
      "learning_rate": 8.336682854187121e-06,
      "loss": 0.0358,
      "step": 524860
    },
    {
      "epoch": 0.8589776320182243,
      "grad_norm": 0.7514333128929138,
      "learning_rate": 8.336616961973605e-06,
      "loss": 0.03,
      "step": 524880
    },
    {
      "epoch": 0.8590103624568777,
      "grad_norm": 0.6112827062606812,
      "learning_rate": 8.336551069760087e-06,
      "loss": 0.0322,
      "step": 524900
    },
    {
      "epoch": 0.8590430928955309,
      "grad_norm": 1.6623550653457642,
      "learning_rate": 8.33648517754657e-06,
      "loss": 0.0361,
      "step": 524920
    },
    {
      "epoch": 0.8590758233341843,
      "grad_norm": 1.2628697156906128,
      "learning_rate": 8.336419285333052e-06,
      "loss": 0.0382,
      "step": 524940
    },
    {
      "epoch": 0.8591085537728377,
      "grad_norm": 0.5457464456558228,
      "learning_rate": 8.336353393119536e-06,
      "loss": 0.0236,
      "step": 524960
    },
    {
      "epoch": 0.859141284211491,
      "grad_norm": 1.4576047658920288,
      "learning_rate": 8.336287500906018e-06,
      "loss": 0.026,
      "step": 524980
    },
    {
      "epoch": 0.8591740146501443,
      "grad_norm": 1.2723438739776611,
      "learning_rate": 8.336221608692501e-06,
      "loss": 0.0284,
      "step": 525000
    },
    {
      "epoch": 0.8592067450887977,
      "grad_norm": 0.7491565942764282,
      "learning_rate": 8.336155716478983e-06,
      "loss": 0.0263,
      "step": 525020
    },
    {
      "epoch": 0.8592394755274511,
      "grad_norm": 1.6975980997085571,
      "learning_rate": 8.336089824265467e-06,
      "loss": 0.0316,
      "step": 525040
    },
    {
      "epoch": 0.8592722059661043,
      "grad_norm": 0.46914589405059814,
      "learning_rate": 8.33602393205195e-06,
      "loss": 0.0243,
      "step": 525060
    },
    {
      "epoch": 0.8593049364047577,
      "grad_norm": 0.24113795161247253,
      "learning_rate": 8.335958039838432e-06,
      "loss": 0.0278,
      "step": 525080
    },
    {
      "epoch": 0.8593376668434111,
      "grad_norm": 0.22625447809696198,
      "learning_rate": 8.335892147624916e-06,
      "loss": 0.0387,
      "step": 525100
    },
    {
      "epoch": 0.8593703972820643,
      "grad_norm": 1.2583695650100708,
      "learning_rate": 8.3358262554114e-06,
      "loss": 0.0386,
      "step": 525120
    },
    {
      "epoch": 0.8594031277207177,
      "grad_norm": 2.8209831714630127,
      "learning_rate": 8.335760363197881e-06,
      "loss": 0.0386,
      "step": 525140
    },
    {
      "epoch": 0.8594358581593711,
      "grad_norm": 1.5299667119979858,
      "learning_rate": 8.335694470984365e-06,
      "loss": 0.0209,
      "step": 525160
    },
    {
      "epoch": 0.8594685885980244,
      "grad_norm": 0.5169700384140015,
      "learning_rate": 8.335628578770847e-06,
      "loss": 0.0253,
      "step": 525180
    },
    {
      "epoch": 0.8595013190366777,
      "grad_norm": 0.4894735813140869,
      "learning_rate": 8.33556268655733e-06,
      "loss": 0.0201,
      "step": 525200
    },
    {
      "epoch": 0.8595340494753311,
      "grad_norm": 2.041800022125244,
      "learning_rate": 8.335496794343812e-06,
      "loss": 0.0314,
      "step": 525220
    },
    {
      "epoch": 0.8595667799139844,
      "grad_norm": 0.36799055337905884,
      "learning_rate": 8.335430902130296e-06,
      "loss": 0.0316,
      "step": 525240
    },
    {
      "epoch": 0.8595995103526377,
      "grad_norm": 0.5611576437950134,
      "learning_rate": 8.33536500991678e-06,
      "loss": 0.0272,
      "step": 525260
    },
    {
      "epoch": 0.8596322407912911,
      "grad_norm": 3.1109023094177246,
      "learning_rate": 8.335299117703262e-06,
      "loss": 0.0288,
      "step": 525280
    },
    {
      "epoch": 0.8596649712299445,
      "grad_norm": 1.0805482864379883,
      "learning_rate": 8.335233225489745e-06,
      "loss": 0.0259,
      "step": 525300
    },
    {
      "epoch": 0.8596977016685977,
      "grad_norm": 3.040442943572998,
      "learning_rate": 8.335167333276227e-06,
      "loss": 0.0207,
      "step": 525320
    },
    {
      "epoch": 0.8597304321072511,
      "grad_norm": 0.6147438883781433,
      "learning_rate": 8.33510144106271e-06,
      "loss": 0.0338,
      "step": 525340
    },
    {
      "epoch": 0.8597631625459045,
      "grad_norm": 0.8314790725708008,
      "learning_rate": 8.335035548849192e-06,
      "loss": 0.033,
      "step": 525360
    },
    {
      "epoch": 0.8597958929845578,
      "grad_norm": 0.3876204788684845,
      "learning_rate": 8.334969656635676e-06,
      "loss": 0.0418,
      "step": 525380
    },
    {
      "epoch": 0.8598286234232111,
      "grad_norm": 0.2748672366142273,
      "learning_rate": 8.334903764422158e-06,
      "loss": 0.0259,
      "step": 525400
    },
    {
      "epoch": 0.8598613538618645,
      "grad_norm": 1.1722873449325562,
      "learning_rate": 8.334837872208642e-06,
      "loss": 0.0243,
      "step": 525420
    },
    {
      "epoch": 0.8598940843005178,
      "grad_norm": 0.9837470054626465,
      "learning_rate": 8.334771979995125e-06,
      "loss": 0.036,
      "step": 525440
    },
    {
      "epoch": 0.8599268147391711,
      "grad_norm": 1.5375910997390747,
      "learning_rate": 8.334706087781607e-06,
      "loss": 0.0245,
      "step": 525460
    },
    {
      "epoch": 0.8599595451778245,
      "grad_norm": 1.1101207733154297,
      "learning_rate": 8.33464019556809e-06,
      "loss": 0.0334,
      "step": 525480
    },
    {
      "epoch": 0.8599922756164778,
      "grad_norm": 1.312499761581421,
      "learning_rate": 8.334574303354574e-06,
      "loss": 0.0396,
      "step": 525500
    },
    {
      "epoch": 0.8600250060551311,
      "grad_norm": 0.5584304928779602,
      "learning_rate": 8.334508411141056e-06,
      "loss": 0.0206,
      "step": 525520
    },
    {
      "epoch": 0.8600577364937845,
      "grad_norm": 0.5186758041381836,
      "learning_rate": 8.33444251892754e-06,
      "loss": 0.0415,
      "step": 525540
    },
    {
      "epoch": 0.8600904669324378,
      "grad_norm": 0.6580076217651367,
      "learning_rate": 8.334376626714022e-06,
      "loss": 0.0309,
      "step": 525560
    },
    {
      "epoch": 0.8601231973710912,
      "grad_norm": 0.12935812771320343,
      "learning_rate": 8.334310734500505e-06,
      "loss": 0.0374,
      "step": 525580
    },
    {
      "epoch": 0.8601559278097445,
      "grad_norm": 0.4370206892490387,
      "learning_rate": 8.334244842286989e-06,
      "loss": 0.0337,
      "step": 525600
    },
    {
      "epoch": 0.8601886582483979,
      "grad_norm": 0.6001372337341309,
      "learning_rate": 8.33417895007347e-06,
      "loss": 0.03,
      "step": 525620
    },
    {
      "epoch": 0.8602213886870512,
      "grad_norm": 1.0466992855072021,
      "learning_rate": 8.334113057859954e-06,
      "loss": 0.0405,
      "step": 525640
    },
    {
      "epoch": 0.8602541191257045,
      "grad_norm": 1.901766300201416,
      "learning_rate": 8.334047165646436e-06,
      "loss": 0.0339,
      "step": 525660
    },
    {
      "epoch": 0.8602868495643579,
      "grad_norm": 0.29330191016197205,
      "learning_rate": 8.33398127343292e-06,
      "loss": 0.0279,
      "step": 525680
    },
    {
      "epoch": 0.8603195800030112,
      "grad_norm": 0.6905872821807861,
      "learning_rate": 8.333915381219402e-06,
      "loss": 0.033,
      "step": 525700
    },
    {
      "epoch": 0.8603523104416645,
      "grad_norm": 1.511339783668518,
      "learning_rate": 8.333849489005885e-06,
      "loss": 0.0358,
      "step": 525720
    },
    {
      "epoch": 0.8603850408803179,
      "grad_norm": 0.6206815838813782,
      "learning_rate": 8.333783596792367e-06,
      "loss": 0.0296,
      "step": 525740
    },
    {
      "epoch": 0.8604177713189712,
      "grad_norm": 1.2483465671539307,
      "learning_rate": 8.33371770457885e-06,
      "loss": 0.0289,
      "step": 525760
    },
    {
      "epoch": 0.8604505017576246,
      "grad_norm": 1.7210416793823242,
      "learning_rate": 8.333651812365333e-06,
      "loss": 0.0338,
      "step": 525780
    },
    {
      "epoch": 0.8604832321962779,
      "grad_norm": 0.3668636083602905,
      "learning_rate": 8.333585920151816e-06,
      "loss": 0.0223,
      "step": 525800
    },
    {
      "epoch": 0.8605159626349312,
      "grad_norm": 1.932292103767395,
      "learning_rate": 8.333520027938298e-06,
      "loss": 0.0259,
      "step": 525820
    },
    {
      "epoch": 0.8605486930735846,
      "grad_norm": 0.5506413578987122,
      "learning_rate": 8.333454135724782e-06,
      "loss": 0.0406,
      "step": 525840
    },
    {
      "epoch": 0.8605814235122379,
      "grad_norm": 1.03274667263031,
      "learning_rate": 8.333388243511265e-06,
      "loss": 0.0343,
      "step": 525860
    },
    {
      "epoch": 0.8606141539508912,
      "grad_norm": 1.813222885131836,
      "learning_rate": 8.333322351297747e-06,
      "loss": 0.0371,
      "step": 525880
    },
    {
      "epoch": 0.8606468843895446,
      "grad_norm": 6.822674751281738,
      "learning_rate": 8.33325645908423e-06,
      "loss": 0.0296,
      "step": 525900
    },
    {
      "epoch": 0.8606796148281979,
      "grad_norm": 1.6888929605484009,
      "learning_rate": 8.333190566870714e-06,
      "loss": 0.0291,
      "step": 525920
    },
    {
      "epoch": 0.8607123452668513,
      "grad_norm": 0.4810435175895691,
      "learning_rate": 8.333124674657196e-06,
      "loss": 0.0268,
      "step": 525940
    },
    {
      "epoch": 0.8607450757055046,
      "grad_norm": 1.7542065382003784,
      "learning_rate": 8.33305878244368e-06,
      "loss": 0.0299,
      "step": 525960
    },
    {
      "epoch": 0.860777806144158,
      "grad_norm": 2.6577744483947754,
      "learning_rate": 8.332992890230163e-06,
      "loss": 0.032,
      "step": 525980
    },
    {
      "epoch": 0.8608105365828113,
      "grad_norm": 0.8627340793609619,
      "learning_rate": 8.332926998016645e-06,
      "loss": 0.0232,
      "step": 526000
    },
    {
      "epoch": 0.8608432670214646,
      "grad_norm": 0.3181978166103363,
      "learning_rate": 8.332861105803129e-06,
      "loss": 0.0363,
      "step": 526020
    },
    {
      "epoch": 0.860875997460118,
      "grad_norm": 1.4330666065216064,
      "learning_rate": 8.33279521358961e-06,
      "loss": 0.0448,
      "step": 526040
    },
    {
      "epoch": 0.8609087278987713,
      "grad_norm": 3.298774480819702,
      "learning_rate": 8.332729321376094e-06,
      "loss": 0.0349,
      "step": 526060
    },
    {
      "epoch": 0.8609414583374246,
      "grad_norm": 0.2609627842903137,
      "learning_rate": 8.332663429162576e-06,
      "loss": 0.0295,
      "step": 526080
    },
    {
      "epoch": 0.860974188776078,
      "grad_norm": 1.7467718124389648,
      "learning_rate": 8.33259753694906e-06,
      "loss": 0.0293,
      "step": 526100
    },
    {
      "epoch": 0.8610069192147313,
      "grad_norm": 0.2586613595485687,
      "learning_rate": 8.332531644735542e-06,
      "loss": 0.0319,
      "step": 526120
    },
    {
      "epoch": 0.8610396496533846,
      "grad_norm": 3.1167984008789062,
      "learning_rate": 8.332465752522025e-06,
      "loss": 0.0214,
      "step": 526140
    },
    {
      "epoch": 0.861072380092038,
      "grad_norm": 1.315398931503296,
      "learning_rate": 8.332399860308507e-06,
      "loss": 0.028,
      "step": 526160
    },
    {
      "epoch": 0.8611051105306914,
      "grad_norm": 0.7049973011016846,
      "learning_rate": 8.33233396809499e-06,
      "loss": 0.0256,
      "step": 526180
    },
    {
      "epoch": 0.8611378409693446,
      "grad_norm": 0.23348169028759003,
      "learning_rate": 8.332268075881473e-06,
      "loss": 0.0297,
      "step": 526200
    },
    {
      "epoch": 0.861170571407998,
      "grad_norm": 0.6301634907722473,
      "learning_rate": 8.332202183667956e-06,
      "loss": 0.0253,
      "step": 526220
    },
    {
      "epoch": 0.8612033018466514,
      "grad_norm": 1.1470733880996704,
      "learning_rate": 8.33213629145444e-06,
      "loss": 0.0298,
      "step": 526240
    },
    {
      "epoch": 0.8612360322853047,
      "grad_norm": 1.3528313636779785,
      "learning_rate": 8.332070399240922e-06,
      "loss": 0.0289,
      "step": 526260
    },
    {
      "epoch": 0.861268762723958,
      "grad_norm": 0.7126771807670593,
      "learning_rate": 8.332004507027405e-06,
      "loss": 0.0324,
      "step": 526280
    },
    {
      "epoch": 0.8613014931626114,
      "grad_norm": 0.3760344684123993,
      "learning_rate": 8.331938614813889e-06,
      "loss": 0.0263,
      "step": 526300
    },
    {
      "epoch": 0.8613342236012647,
      "grad_norm": 0.4140453040599823,
      "learning_rate": 8.33187272260037e-06,
      "loss": 0.0295,
      "step": 526320
    },
    {
      "epoch": 0.861366954039918,
      "grad_norm": 5.722790718078613,
      "learning_rate": 8.331806830386854e-06,
      "loss": 0.0379,
      "step": 526340
    },
    {
      "epoch": 0.8613996844785714,
      "grad_norm": 0.9355124235153198,
      "learning_rate": 8.331740938173338e-06,
      "loss": 0.0309,
      "step": 526360
    },
    {
      "epoch": 0.8614324149172248,
      "grad_norm": 0.39834824204444885,
      "learning_rate": 8.33167504595982e-06,
      "loss": 0.0261,
      "step": 526380
    },
    {
      "epoch": 0.861465145355878,
      "grad_norm": 0.5270295143127441,
      "learning_rate": 8.331609153746303e-06,
      "loss": 0.0163,
      "step": 526400
    },
    {
      "epoch": 0.8614978757945314,
      "grad_norm": 1.4704965353012085,
      "learning_rate": 8.331543261532785e-06,
      "loss": 0.0234,
      "step": 526420
    },
    {
      "epoch": 0.8615306062331848,
      "grad_norm": 0.6886498332023621,
      "learning_rate": 8.331477369319269e-06,
      "loss": 0.0311,
      "step": 526440
    },
    {
      "epoch": 0.861563336671838,
      "grad_norm": 0.7354624271392822,
      "learning_rate": 8.33141147710575e-06,
      "loss": 0.0292,
      "step": 526460
    },
    {
      "epoch": 0.8615960671104914,
      "grad_norm": 1.1469942331314087,
      "learning_rate": 8.331345584892234e-06,
      "loss": 0.0248,
      "step": 526480
    },
    {
      "epoch": 0.8616287975491448,
      "grad_norm": 0.2518848776817322,
      "learning_rate": 8.331279692678716e-06,
      "loss": 0.02,
      "step": 526500
    },
    {
      "epoch": 0.861661527987798,
      "grad_norm": 0.26493075489997864,
      "learning_rate": 8.3312138004652e-06,
      "loss": 0.0228,
      "step": 526520
    },
    {
      "epoch": 0.8616942584264514,
      "grad_norm": 1.3632298707962036,
      "learning_rate": 8.331147908251682e-06,
      "loss": 0.0425,
      "step": 526540
    },
    {
      "epoch": 0.8617269888651048,
      "grad_norm": 0.17832717299461365,
      "learning_rate": 8.331082016038165e-06,
      "loss": 0.0266,
      "step": 526560
    },
    {
      "epoch": 0.861759719303758,
      "grad_norm": 0.36960217356681824,
      "learning_rate": 8.331016123824647e-06,
      "loss": 0.0261,
      "step": 526580
    },
    {
      "epoch": 0.8617924497424114,
      "grad_norm": 0.46425697207450867,
      "learning_rate": 8.33095023161113e-06,
      "loss": 0.0391,
      "step": 526600
    },
    {
      "epoch": 0.8618251801810648,
      "grad_norm": 1.277347445487976,
      "learning_rate": 8.330884339397613e-06,
      "loss": 0.0286,
      "step": 526620
    },
    {
      "epoch": 0.8618579106197182,
      "grad_norm": 0.7769776582717896,
      "learning_rate": 8.330818447184096e-06,
      "loss": 0.0263,
      "step": 526640
    },
    {
      "epoch": 0.8618906410583714,
      "grad_norm": 0.6707631945610046,
      "learning_rate": 8.33075255497058e-06,
      "loss": 0.0171,
      "step": 526660
    },
    {
      "epoch": 0.8619233714970248,
      "grad_norm": 2.4080862998962402,
      "learning_rate": 8.330686662757062e-06,
      "loss": 0.0314,
      "step": 526680
    },
    {
      "epoch": 0.8619561019356782,
      "grad_norm": 1.668528437614441,
      "learning_rate": 8.330620770543545e-06,
      "loss": 0.0308,
      "step": 526700
    },
    {
      "epoch": 0.8619888323743314,
      "grad_norm": 1.4886242151260376,
      "learning_rate": 8.330554878330029e-06,
      "loss": 0.0258,
      "step": 526720
    },
    {
      "epoch": 0.8620215628129848,
      "grad_norm": 3.1133315563201904,
      "learning_rate": 8.330488986116513e-06,
      "loss": 0.0237,
      "step": 526740
    },
    {
      "epoch": 0.8620542932516382,
      "grad_norm": 1.5115588903427124,
      "learning_rate": 8.330423093902994e-06,
      "loss": 0.0374,
      "step": 526760
    },
    {
      "epoch": 0.8620870236902914,
      "grad_norm": 1.5246410369873047,
      "learning_rate": 8.330357201689478e-06,
      "loss": 0.0303,
      "step": 526780
    },
    {
      "epoch": 0.8621197541289448,
      "grad_norm": 2.0318381786346436,
      "learning_rate": 8.33029130947596e-06,
      "loss": 0.0326,
      "step": 526800
    },
    {
      "epoch": 0.8621524845675982,
      "grad_norm": 0.44289395213127136,
      "learning_rate": 8.330225417262443e-06,
      "loss": 0.0256,
      "step": 526820
    },
    {
      "epoch": 0.8621852150062516,
      "grad_norm": 0.38345229625701904,
      "learning_rate": 8.330159525048925e-06,
      "loss": 0.0151,
      "step": 526840
    },
    {
      "epoch": 0.8622179454449048,
      "grad_norm": 0.9302145838737488,
      "learning_rate": 8.330093632835409e-06,
      "loss": 0.0259,
      "step": 526860
    },
    {
      "epoch": 0.8622506758835582,
      "grad_norm": 1.4113943576812744,
      "learning_rate": 8.330027740621891e-06,
      "loss": 0.0378,
      "step": 526880
    },
    {
      "epoch": 0.8622834063222116,
      "grad_norm": 2.581951856613159,
      "learning_rate": 8.329961848408374e-06,
      "loss": 0.0326,
      "step": 526900
    },
    {
      "epoch": 0.8623161367608648,
      "grad_norm": 1.0208793878555298,
      "learning_rate": 8.329895956194856e-06,
      "loss": 0.026,
      "step": 526920
    },
    {
      "epoch": 0.8623488671995182,
      "grad_norm": 2.0500974655151367,
      "learning_rate": 8.32983006398134e-06,
      "loss": 0.0261,
      "step": 526940
    },
    {
      "epoch": 0.8623815976381716,
      "grad_norm": 0.9213200807571411,
      "learning_rate": 8.329764171767822e-06,
      "loss": 0.0226,
      "step": 526960
    },
    {
      "epoch": 0.8624143280768248,
      "grad_norm": 3.8733904361724854,
      "learning_rate": 8.329698279554305e-06,
      "loss": 0.029,
      "step": 526980
    },
    {
      "epoch": 0.8624470585154782,
      "grad_norm": 1.7555277347564697,
      "learning_rate": 8.329632387340789e-06,
      "loss": 0.0283,
      "step": 527000
    },
    {
      "epoch": 0.8624797889541316,
      "grad_norm": 2.4118564128875732,
      "learning_rate": 8.329566495127271e-06,
      "loss": 0.0351,
      "step": 527020
    },
    {
      "epoch": 0.862512519392785,
      "grad_norm": 1.0735872983932495,
      "learning_rate": 8.329500602913754e-06,
      "loss": 0.0359,
      "step": 527040
    },
    {
      "epoch": 0.8625452498314382,
      "grad_norm": 1.7696980237960815,
      "learning_rate": 8.329434710700236e-06,
      "loss": 0.0298,
      "step": 527060
    },
    {
      "epoch": 0.8625779802700916,
      "grad_norm": 0.5932992100715637,
      "learning_rate": 8.32936881848672e-06,
      "loss": 0.0239,
      "step": 527080
    },
    {
      "epoch": 0.862610710708745,
      "grad_norm": 1.4379258155822754,
      "learning_rate": 8.329302926273204e-06,
      "loss": 0.0312,
      "step": 527100
    },
    {
      "epoch": 0.8626434411473982,
      "grad_norm": 2.1365346908569336,
      "learning_rate": 8.329237034059685e-06,
      "loss": 0.0448,
      "step": 527120
    },
    {
      "epoch": 0.8626761715860516,
      "grad_norm": 0.22434063255786896,
      "learning_rate": 8.329171141846169e-06,
      "loss": 0.0365,
      "step": 527140
    },
    {
      "epoch": 0.862708902024705,
      "grad_norm": 0.9817307591438293,
      "learning_rate": 8.329105249632653e-06,
      "loss": 0.0311,
      "step": 527160
    },
    {
      "epoch": 0.8627416324633582,
      "grad_norm": 1.5363069772720337,
      "learning_rate": 8.329039357419134e-06,
      "loss": 0.0252,
      "step": 527180
    },
    {
      "epoch": 0.8627743629020116,
      "grad_norm": 0.2992244362831116,
      "learning_rate": 8.328973465205618e-06,
      "loss": 0.0273,
      "step": 527200
    },
    {
      "epoch": 0.862807093340665,
      "grad_norm": 1.0148569345474243,
      "learning_rate": 8.3289075729921e-06,
      "loss": 0.0262,
      "step": 527220
    },
    {
      "epoch": 0.8628398237793183,
      "grad_norm": 0.37621524930000305,
      "learning_rate": 8.328841680778584e-06,
      "loss": 0.0308,
      "step": 527240
    },
    {
      "epoch": 0.8628725542179716,
      "grad_norm": 0.3548121750354767,
      "learning_rate": 8.328775788565065e-06,
      "loss": 0.0271,
      "step": 527260
    },
    {
      "epoch": 0.862905284656625,
      "grad_norm": 5.254439353942871,
      "learning_rate": 8.328709896351549e-06,
      "loss": 0.0263,
      "step": 527280
    },
    {
      "epoch": 0.8629380150952783,
      "grad_norm": 0.5790983438491821,
      "learning_rate": 8.328644004138031e-06,
      "loss": 0.0254,
      "step": 527300
    },
    {
      "epoch": 0.8629707455339316,
      "grad_norm": 0.5615459680557251,
      "learning_rate": 8.328578111924515e-06,
      "loss": 0.0338,
      "step": 527320
    },
    {
      "epoch": 0.863003475972585,
      "grad_norm": 0.25033482909202576,
      "learning_rate": 8.328512219710996e-06,
      "loss": 0.0235,
      "step": 527340
    },
    {
      "epoch": 0.8630362064112383,
      "grad_norm": 0.16244958341121674,
      "learning_rate": 8.32844632749748e-06,
      "loss": 0.025,
      "step": 527360
    },
    {
      "epoch": 0.8630689368498916,
      "grad_norm": 2.718107223510742,
      "learning_rate": 8.328380435283964e-06,
      "loss": 0.0387,
      "step": 527380
    },
    {
      "epoch": 0.863101667288545,
      "grad_norm": 1.608252763748169,
      "learning_rate": 8.328314543070445e-06,
      "loss": 0.0393,
      "step": 527400
    },
    {
      "epoch": 0.8631343977271984,
      "grad_norm": 0.6923239231109619,
      "learning_rate": 8.328248650856929e-06,
      "loss": 0.0323,
      "step": 527420
    },
    {
      "epoch": 0.8631671281658517,
      "grad_norm": 3.082709789276123,
      "learning_rate": 8.328182758643411e-06,
      "loss": 0.0242,
      "step": 527440
    },
    {
      "epoch": 0.863199858604505,
      "grad_norm": 1.5675957202911377,
      "learning_rate": 8.328116866429895e-06,
      "loss": 0.0375,
      "step": 527460
    },
    {
      "epoch": 0.8632325890431584,
      "grad_norm": 1.0479614734649658,
      "learning_rate": 8.328050974216378e-06,
      "loss": 0.0231,
      "step": 527480
    },
    {
      "epoch": 0.8632653194818117,
      "grad_norm": 0.6044169664382935,
      "learning_rate": 8.32798508200286e-06,
      "loss": 0.0318,
      "step": 527500
    },
    {
      "epoch": 0.863298049920465,
      "grad_norm": 0.46762359142303467,
      "learning_rate": 8.327919189789344e-06,
      "loss": 0.0246,
      "step": 527520
    },
    {
      "epoch": 0.8633307803591184,
      "grad_norm": 0.2160458117723465,
      "learning_rate": 8.327853297575827e-06,
      "loss": 0.0237,
      "step": 527540
    },
    {
      "epoch": 0.8633635107977717,
      "grad_norm": 2.3048360347747803,
      "learning_rate": 8.327787405362309e-06,
      "loss": 0.0441,
      "step": 527560
    },
    {
      "epoch": 0.863396241236425,
      "grad_norm": 0.3722016215324402,
      "learning_rate": 8.327721513148793e-06,
      "loss": 0.0304,
      "step": 527580
    },
    {
      "epoch": 0.8634289716750784,
      "grad_norm": 1.10134756565094,
      "learning_rate": 8.327655620935275e-06,
      "loss": 0.0294,
      "step": 527600
    },
    {
      "epoch": 0.8634617021137317,
      "grad_norm": 1.2667512893676758,
      "learning_rate": 8.327589728721758e-06,
      "loss": 0.0374,
      "step": 527620
    },
    {
      "epoch": 0.8634944325523851,
      "grad_norm": 1.4303981065750122,
      "learning_rate": 8.32752383650824e-06,
      "loss": 0.0312,
      "step": 527640
    },
    {
      "epoch": 0.8635271629910384,
      "grad_norm": 0.892500638961792,
      "learning_rate": 8.327457944294724e-06,
      "loss": 0.0296,
      "step": 527660
    },
    {
      "epoch": 0.8635598934296917,
      "grad_norm": 1.0220481157302856,
      "learning_rate": 8.327392052081206e-06,
      "loss": 0.0247,
      "step": 527680
    },
    {
      "epoch": 0.8635926238683451,
      "grad_norm": 1.1270016431808472,
      "learning_rate": 8.327326159867689e-06,
      "loss": 0.025,
      "step": 527700
    },
    {
      "epoch": 0.8636253543069984,
      "grad_norm": 0.9641004800796509,
      "learning_rate": 8.327260267654173e-06,
      "loss": 0.0163,
      "step": 527720
    },
    {
      "epoch": 0.8636580847456518,
      "grad_norm": 0.34996291995048523,
      "learning_rate": 8.327194375440655e-06,
      "loss": 0.0266,
      "step": 527740
    },
    {
      "epoch": 0.8636908151843051,
      "grad_norm": 1.5212087631225586,
      "learning_rate": 8.327128483227138e-06,
      "loss": 0.0291,
      "step": 527760
    },
    {
      "epoch": 0.8637235456229584,
      "grad_norm": 2.2031409740448,
      "learning_rate": 8.32706259101362e-06,
      "loss": 0.0267,
      "step": 527780
    },
    {
      "epoch": 0.8637562760616118,
      "grad_norm": 1.4161040782928467,
      "learning_rate": 8.326996698800104e-06,
      "loss": 0.0262,
      "step": 527800
    },
    {
      "epoch": 0.8637890065002651,
      "grad_norm": 1.595401406288147,
      "learning_rate": 8.326930806586586e-06,
      "loss": 0.0229,
      "step": 527820
    },
    {
      "epoch": 0.8638217369389185,
      "grad_norm": 0.20813791453838348,
      "learning_rate": 8.326864914373069e-06,
      "loss": 0.0396,
      "step": 527840
    },
    {
      "epoch": 0.8638544673775718,
      "grad_norm": 1.3283021450042725,
      "learning_rate": 8.326799022159551e-06,
      "loss": 0.036,
      "step": 527860
    },
    {
      "epoch": 0.8638871978162251,
      "grad_norm": 1.0074920654296875,
      "learning_rate": 8.326733129946035e-06,
      "loss": 0.0295,
      "step": 527880
    },
    {
      "epoch": 0.8639199282548785,
      "grad_norm": 1.3027667999267578,
      "learning_rate": 8.326667237732518e-06,
      "loss": 0.04,
      "step": 527900
    },
    {
      "epoch": 0.8639526586935318,
      "grad_norm": 0.8622549176216125,
      "learning_rate": 8.326601345519e-06,
      "loss": 0.0357,
      "step": 527920
    },
    {
      "epoch": 0.8639853891321851,
      "grad_norm": 0.4991205930709839,
      "learning_rate": 8.326535453305484e-06,
      "loss": 0.0352,
      "step": 527940
    },
    {
      "epoch": 0.8640181195708385,
      "grad_norm": 0.9028831124305725,
      "learning_rate": 8.326469561091967e-06,
      "loss": 0.0266,
      "step": 527960
    },
    {
      "epoch": 0.8640508500094918,
      "grad_norm": 0.60210120677948,
      "learning_rate": 8.32640366887845e-06,
      "loss": 0.0342,
      "step": 527980
    },
    {
      "epoch": 0.8640835804481451,
      "grad_norm": 2.1578667163848877,
      "learning_rate": 8.326337776664933e-06,
      "loss": 0.026,
      "step": 528000
    },
    {
      "epoch": 0.8641163108867985,
      "grad_norm": 1.2823272943496704,
      "learning_rate": 8.326271884451415e-06,
      "loss": 0.034,
      "step": 528020
    },
    {
      "epoch": 0.8641490413254519,
      "grad_norm": 0.9207686185836792,
      "learning_rate": 8.326205992237898e-06,
      "loss": 0.0295,
      "step": 528040
    },
    {
      "epoch": 0.8641817717641052,
      "grad_norm": 0.7608479857444763,
      "learning_rate": 8.326140100024382e-06,
      "loss": 0.0222,
      "step": 528060
    },
    {
      "epoch": 0.8642145022027585,
      "grad_norm": 1.9077943563461304,
      "learning_rate": 8.326074207810864e-06,
      "loss": 0.0335,
      "step": 528080
    },
    {
      "epoch": 0.8642472326414119,
      "grad_norm": 0.5890103578567505,
      "learning_rate": 8.326008315597347e-06,
      "loss": 0.0319,
      "step": 528100
    },
    {
      "epoch": 0.8642799630800652,
      "grad_norm": 1.4498555660247803,
      "learning_rate": 8.32594242338383e-06,
      "loss": 0.0278,
      "step": 528120
    },
    {
      "epoch": 0.8643126935187185,
      "grad_norm": 2.186298370361328,
      "learning_rate": 8.325876531170313e-06,
      "loss": 0.0388,
      "step": 528140
    },
    {
      "epoch": 0.8643454239573719,
      "grad_norm": 1.7284852266311646,
      "learning_rate": 8.325810638956795e-06,
      "loss": 0.0242,
      "step": 528160
    },
    {
      "epoch": 0.8643781543960252,
      "grad_norm": 0.4573356807231903,
      "learning_rate": 8.325744746743278e-06,
      "loss": 0.0296,
      "step": 528180
    },
    {
      "epoch": 0.8644108848346785,
      "grad_norm": 1.9875388145446777,
      "learning_rate": 8.32567885452976e-06,
      "loss": 0.0354,
      "step": 528200
    },
    {
      "epoch": 0.8644436152733319,
      "grad_norm": 1.2491600513458252,
      "learning_rate": 8.325612962316244e-06,
      "loss": 0.0295,
      "step": 528220
    },
    {
      "epoch": 0.8644763457119853,
      "grad_norm": 0.577143669128418,
      "learning_rate": 8.325547070102726e-06,
      "loss": 0.0238,
      "step": 528240
    },
    {
      "epoch": 0.8645090761506385,
      "grad_norm": 1.4289007186889648,
      "learning_rate": 8.32548117788921e-06,
      "loss": 0.0249,
      "step": 528260
    },
    {
      "epoch": 0.8645418065892919,
      "grad_norm": 0.3013670742511749,
      "learning_rate": 8.325415285675693e-06,
      "loss": 0.0321,
      "step": 528280
    },
    {
      "epoch": 0.8645745370279453,
      "grad_norm": 4.149796485900879,
      "learning_rate": 8.325349393462175e-06,
      "loss": 0.0218,
      "step": 528300
    },
    {
      "epoch": 0.8646072674665986,
      "grad_norm": 0.4012623429298401,
      "learning_rate": 8.325283501248658e-06,
      "loss": 0.0403,
      "step": 528320
    },
    {
      "epoch": 0.8646399979052519,
      "grad_norm": 1.5859204530715942,
      "learning_rate": 8.325217609035142e-06,
      "loss": 0.0316,
      "step": 528340
    },
    {
      "epoch": 0.8646727283439053,
      "grad_norm": 0.11192266643047333,
      "learning_rate": 8.325151716821624e-06,
      "loss": 0.0208,
      "step": 528360
    },
    {
      "epoch": 0.8647054587825586,
      "grad_norm": 1.2881364822387695,
      "learning_rate": 8.325085824608107e-06,
      "loss": 0.0241,
      "step": 528380
    },
    {
      "epoch": 0.8647381892212119,
      "grad_norm": 2.641117811203003,
      "learning_rate": 8.325019932394591e-06,
      "loss": 0.0275,
      "step": 528400
    },
    {
      "epoch": 0.8647709196598653,
      "grad_norm": 0.8277157545089722,
      "learning_rate": 8.324954040181073e-06,
      "loss": 0.0254,
      "step": 528420
    },
    {
      "epoch": 0.8648036500985187,
      "grad_norm": 0.548283040523529,
      "learning_rate": 8.324888147967556e-06,
      "loss": 0.026,
      "step": 528440
    },
    {
      "epoch": 0.8648363805371719,
      "grad_norm": 0.9011216163635254,
      "learning_rate": 8.324822255754038e-06,
      "loss": 0.0284,
      "step": 528460
    },
    {
      "epoch": 0.8648691109758253,
      "grad_norm": 0.6191990375518799,
      "learning_rate": 8.324756363540522e-06,
      "loss": 0.0206,
      "step": 528480
    },
    {
      "epoch": 0.8649018414144787,
      "grad_norm": 0.6525639295578003,
      "learning_rate": 8.324690471327004e-06,
      "loss": 0.0232,
      "step": 528500
    },
    {
      "epoch": 0.8649345718531319,
      "grad_norm": 0.6983832716941833,
      "learning_rate": 8.324624579113487e-06,
      "loss": 0.0332,
      "step": 528520
    },
    {
      "epoch": 0.8649673022917853,
      "grad_norm": 1.5096276998519897,
      "learning_rate": 8.32455868689997e-06,
      "loss": 0.0167,
      "step": 528540
    },
    {
      "epoch": 0.8650000327304387,
      "grad_norm": 0.4159318506717682,
      "learning_rate": 8.324492794686453e-06,
      "loss": 0.0197,
      "step": 528560
    },
    {
      "epoch": 0.865032763169092,
      "grad_norm": 1.001823902130127,
      "learning_rate": 8.324426902472935e-06,
      "loss": 0.0292,
      "step": 528580
    },
    {
      "epoch": 0.8650654936077453,
      "grad_norm": 2.2983005046844482,
      "learning_rate": 8.324361010259418e-06,
      "loss": 0.0226,
      "step": 528600
    },
    {
      "epoch": 0.8650982240463987,
      "grad_norm": 0.7391211986541748,
      "learning_rate": 8.3242951180459e-06,
      "loss": 0.0257,
      "step": 528620
    },
    {
      "epoch": 0.8651309544850521,
      "grad_norm": 1.3083455562591553,
      "learning_rate": 8.324229225832384e-06,
      "loss": 0.0266,
      "step": 528640
    },
    {
      "epoch": 0.8651636849237053,
      "grad_norm": 0.8480280637741089,
      "learning_rate": 8.324163333618866e-06,
      "loss": 0.0255,
      "step": 528660
    },
    {
      "epoch": 0.8651964153623587,
      "grad_norm": 0.29730620980262756,
      "learning_rate": 8.32409744140535e-06,
      "loss": 0.0183,
      "step": 528680
    },
    {
      "epoch": 0.8652291458010121,
      "grad_norm": 1.078392505645752,
      "learning_rate": 8.324031549191833e-06,
      "loss": 0.0318,
      "step": 528700
    },
    {
      "epoch": 0.8652618762396653,
      "grad_norm": 1.8634881973266602,
      "learning_rate": 8.323965656978315e-06,
      "loss": 0.0345,
      "step": 528720
    },
    {
      "epoch": 0.8652946066783187,
      "grad_norm": 0.8425165414810181,
      "learning_rate": 8.323899764764798e-06,
      "loss": 0.0237,
      "step": 528740
    },
    {
      "epoch": 0.8653273371169721,
      "grad_norm": 2.3587253093719482,
      "learning_rate": 8.323833872551282e-06,
      "loss": 0.0311,
      "step": 528760
    },
    {
      "epoch": 0.8653600675556253,
      "grad_norm": 0.7826652526855469,
      "learning_rate": 8.323767980337764e-06,
      "loss": 0.0348,
      "step": 528780
    },
    {
      "epoch": 0.8653927979942787,
      "grad_norm": 0.9064082503318787,
      "learning_rate": 8.323702088124247e-06,
      "loss": 0.0321,
      "step": 528800
    },
    {
      "epoch": 0.8654255284329321,
      "grad_norm": 0.6910713315010071,
      "learning_rate": 8.323636195910731e-06,
      "loss": 0.0346,
      "step": 528820
    },
    {
      "epoch": 0.8654582588715855,
      "grad_norm": 1.3328019380569458,
      "learning_rate": 8.323570303697213e-06,
      "loss": 0.0204,
      "step": 528840
    },
    {
      "epoch": 0.8654909893102387,
      "grad_norm": 1.7594631910324097,
      "learning_rate": 8.323504411483696e-06,
      "loss": 0.033,
      "step": 528860
    },
    {
      "epoch": 0.8655237197488921,
      "grad_norm": 1.250282883644104,
      "learning_rate": 8.323438519270178e-06,
      "loss": 0.0262,
      "step": 528880
    },
    {
      "epoch": 0.8655564501875455,
      "grad_norm": 4.950135231018066,
      "learning_rate": 8.323372627056662e-06,
      "loss": 0.0275,
      "step": 528900
    },
    {
      "epoch": 0.8655891806261987,
      "grad_norm": 1.2562379837036133,
      "learning_rate": 8.323306734843144e-06,
      "loss": 0.0351,
      "step": 528920
    },
    {
      "epoch": 0.8656219110648521,
      "grad_norm": 0.35193273425102234,
      "learning_rate": 8.323240842629627e-06,
      "loss": 0.0383,
      "step": 528940
    },
    {
      "epoch": 0.8656546415035055,
      "grad_norm": 1.5026195049285889,
      "learning_rate": 8.32317495041611e-06,
      "loss": 0.0366,
      "step": 528960
    },
    {
      "epoch": 0.8656873719421587,
      "grad_norm": 1.0010231733322144,
      "learning_rate": 8.323109058202593e-06,
      "loss": 0.0391,
      "step": 528980
    },
    {
      "epoch": 0.8657201023808121,
      "grad_norm": 1.8254835605621338,
      "learning_rate": 8.323043165989075e-06,
      "loss": 0.03,
      "step": 529000
    },
    {
      "epoch": 0.8657528328194655,
      "grad_norm": 1.3347886800765991,
      "learning_rate": 8.322977273775558e-06,
      "loss": 0.0329,
      "step": 529020
    },
    {
      "epoch": 0.8657855632581188,
      "grad_norm": 3.1308529376983643,
      "learning_rate": 8.32291138156204e-06,
      "loss": 0.0291,
      "step": 529040
    },
    {
      "epoch": 0.8658182936967721,
      "grad_norm": 2.3160479068756104,
      "learning_rate": 8.322845489348524e-06,
      "loss": 0.0252,
      "step": 529060
    },
    {
      "epoch": 0.8658510241354255,
      "grad_norm": 0.1380583494901657,
      "learning_rate": 8.322779597135007e-06,
      "loss": 0.0307,
      "step": 529080
    },
    {
      "epoch": 0.8658837545740788,
      "grad_norm": 0.7281832695007324,
      "learning_rate": 8.32271370492149e-06,
      "loss": 0.0352,
      "step": 529100
    },
    {
      "epoch": 0.8659164850127321,
      "grad_norm": 1.242234230041504,
      "learning_rate": 8.322647812707973e-06,
      "loss": 0.0318,
      "step": 529120
    },
    {
      "epoch": 0.8659492154513855,
      "grad_norm": 1.3245151042938232,
      "learning_rate": 8.322581920494457e-06,
      "loss": 0.0323,
      "step": 529140
    },
    {
      "epoch": 0.8659819458900389,
      "grad_norm": 0.371908038854599,
      "learning_rate": 8.322516028280938e-06,
      "loss": 0.0297,
      "step": 529160
    },
    {
      "epoch": 0.8660146763286921,
      "grad_norm": 1.4013923406600952,
      "learning_rate": 8.322450136067422e-06,
      "loss": 0.029,
      "step": 529180
    },
    {
      "epoch": 0.8660474067673455,
      "grad_norm": 0.7557905316352844,
      "learning_rate": 8.322384243853906e-06,
      "loss": 0.0371,
      "step": 529200
    },
    {
      "epoch": 0.8660801372059989,
      "grad_norm": 0.7695618271827698,
      "learning_rate": 8.322318351640388e-06,
      "loss": 0.0235,
      "step": 529220
    },
    {
      "epoch": 0.8661128676446522,
      "grad_norm": 1.3621611595153809,
      "learning_rate": 8.322252459426871e-06,
      "loss": 0.0274,
      "step": 529240
    },
    {
      "epoch": 0.8661455980833055,
      "grad_norm": 1.6350762844085693,
      "learning_rate": 8.322186567213353e-06,
      "loss": 0.0343,
      "step": 529260
    },
    {
      "epoch": 0.8661783285219589,
      "grad_norm": 0.35009634494781494,
      "learning_rate": 8.322120674999837e-06,
      "loss": 0.0282,
      "step": 529280
    },
    {
      "epoch": 0.8662110589606122,
      "grad_norm": 0.11811608076095581,
      "learning_rate": 8.322054782786318e-06,
      "loss": 0.0188,
      "step": 529300
    },
    {
      "epoch": 0.8662437893992655,
      "grad_norm": 1.0241186618804932,
      "learning_rate": 8.321988890572802e-06,
      "loss": 0.0333,
      "step": 529320
    },
    {
      "epoch": 0.8662765198379189,
      "grad_norm": 0.19539709389209747,
      "learning_rate": 8.321922998359284e-06,
      "loss": 0.0297,
      "step": 529340
    },
    {
      "epoch": 0.8663092502765722,
      "grad_norm": 0.723156750202179,
      "learning_rate": 8.321857106145768e-06,
      "loss": 0.0176,
      "step": 529360
    },
    {
      "epoch": 0.8663419807152255,
      "grad_norm": 0.8603516221046448,
      "learning_rate": 8.32179121393225e-06,
      "loss": 0.0254,
      "step": 529380
    },
    {
      "epoch": 0.8663747111538789,
      "grad_norm": 1.0896761417388916,
      "learning_rate": 8.321725321718733e-06,
      "loss": 0.037,
      "step": 529400
    },
    {
      "epoch": 0.8664074415925322,
      "grad_norm": 0.939877986907959,
      "learning_rate": 8.321659429505215e-06,
      "loss": 0.0251,
      "step": 529420
    },
    {
      "epoch": 0.8664401720311855,
      "grad_norm": 0.6386693716049194,
      "learning_rate": 8.321593537291698e-06,
      "loss": 0.0274,
      "step": 529440
    },
    {
      "epoch": 0.8664729024698389,
      "grad_norm": 0.4025266766548157,
      "learning_rate": 8.321527645078182e-06,
      "loss": 0.0243,
      "step": 529460
    },
    {
      "epoch": 0.8665056329084923,
      "grad_norm": 1.2364649772644043,
      "learning_rate": 8.321461752864664e-06,
      "loss": 0.02,
      "step": 529480
    },
    {
      "epoch": 0.8665383633471456,
      "grad_norm": 0.47566160559654236,
      "learning_rate": 8.321395860651148e-06,
      "loss": 0.0213,
      "step": 529500
    },
    {
      "epoch": 0.8665710937857989,
      "grad_norm": 0.37345725297927856,
      "learning_rate": 8.321329968437631e-06,
      "loss": 0.0333,
      "step": 529520
    },
    {
      "epoch": 0.8666038242244523,
      "grad_norm": 0.7550824880599976,
      "learning_rate": 8.321264076224113e-06,
      "loss": 0.0351,
      "step": 529540
    },
    {
      "epoch": 0.8666365546631056,
      "grad_norm": 0.7202475070953369,
      "learning_rate": 8.321198184010597e-06,
      "loss": 0.0376,
      "step": 529560
    },
    {
      "epoch": 0.8666692851017589,
      "grad_norm": 0.8384798765182495,
      "learning_rate": 8.32113229179708e-06,
      "loss": 0.033,
      "step": 529580
    },
    {
      "epoch": 0.8667020155404123,
      "grad_norm": 0.34899985790252686,
      "learning_rate": 8.321066399583562e-06,
      "loss": 0.0288,
      "step": 529600
    },
    {
      "epoch": 0.8667347459790656,
      "grad_norm": 0.6087469458580017,
      "learning_rate": 8.321000507370046e-06,
      "loss": 0.0265,
      "step": 529620
    },
    {
      "epoch": 0.8667674764177189,
      "grad_norm": 0.4293595254421234,
      "learning_rate": 8.320934615156528e-06,
      "loss": 0.0382,
      "step": 529640
    },
    {
      "epoch": 0.8668002068563723,
      "grad_norm": 0.5165281295776367,
      "learning_rate": 8.320868722943011e-06,
      "loss": 0.0221,
      "step": 529660
    },
    {
      "epoch": 0.8668329372950256,
      "grad_norm": 0.6280739903450012,
      "learning_rate": 8.320802830729493e-06,
      "loss": 0.0261,
      "step": 529680
    },
    {
      "epoch": 0.866865667733679,
      "grad_norm": 1.7815114259719849,
      "learning_rate": 8.320736938515977e-06,
      "loss": 0.0186,
      "step": 529700
    },
    {
      "epoch": 0.8668983981723323,
      "grad_norm": 0.5997102856636047,
      "learning_rate": 8.320671046302459e-06,
      "loss": 0.0282,
      "step": 529720
    },
    {
      "epoch": 0.8669311286109856,
      "grad_norm": 0.7636722922325134,
      "learning_rate": 8.320605154088942e-06,
      "loss": 0.0296,
      "step": 529740
    },
    {
      "epoch": 0.866963859049639,
      "grad_norm": 0.48832568526268005,
      "learning_rate": 8.320539261875424e-06,
      "loss": 0.025,
      "step": 529760
    },
    {
      "epoch": 0.8669965894882923,
      "grad_norm": 0.5680773258209229,
      "learning_rate": 8.320473369661908e-06,
      "loss": 0.0296,
      "step": 529780
    },
    {
      "epoch": 0.8670293199269457,
      "grad_norm": 0.8574545979499817,
      "learning_rate": 8.32040747744839e-06,
      "loss": 0.0275,
      "step": 529800
    },
    {
      "epoch": 0.867062050365599,
      "grad_norm": 1.579884648323059,
      "learning_rate": 8.320341585234873e-06,
      "loss": 0.031,
      "step": 529820
    },
    {
      "epoch": 0.8670947808042523,
      "grad_norm": 0.4221930503845215,
      "learning_rate": 8.320275693021357e-06,
      "loss": 0.0275,
      "step": 529840
    },
    {
      "epoch": 0.8671275112429057,
      "grad_norm": 0.6402816772460938,
      "learning_rate": 8.320209800807839e-06,
      "loss": 0.0302,
      "step": 529860
    },
    {
      "epoch": 0.867160241681559,
      "grad_norm": 0.7694893479347229,
      "learning_rate": 8.320143908594322e-06,
      "loss": 0.0211,
      "step": 529880
    },
    {
      "epoch": 0.8671929721202124,
      "grad_norm": 10.222168922424316,
      "learning_rate": 8.320078016380804e-06,
      "loss": 0.0382,
      "step": 529900
    },
    {
      "epoch": 0.8672257025588657,
      "grad_norm": 0.9047702550888062,
      "learning_rate": 8.320012124167288e-06,
      "loss": 0.0265,
      "step": 529920
    },
    {
      "epoch": 0.867258432997519,
      "grad_norm": 0.644302248954773,
      "learning_rate": 8.319946231953771e-06,
      "loss": 0.0283,
      "step": 529940
    },
    {
      "epoch": 0.8672911634361724,
      "grad_norm": 2.2120022773742676,
      "learning_rate": 8.319880339740253e-06,
      "loss": 0.0358,
      "step": 529960
    },
    {
      "epoch": 0.8673238938748257,
      "grad_norm": 2.358736991882324,
      "learning_rate": 8.319814447526737e-06,
      "loss": 0.0349,
      "step": 529980
    },
    {
      "epoch": 0.867356624313479,
      "grad_norm": 0.459715336561203,
      "learning_rate": 8.31974855531322e-06,
      "loss": 0.0185,
      "step": 530000
    },
    {
      "epoch": 0.8673893547521324,
      "grad_norm": 1.3381556272506714,
      "learning_rate": 8.319682663099702e-06,
      "loss": 0.0296,
      "step": 530020
    },
    {
      "epoch": 0.8674220851907857,
      "grad_norm": 0.48584410548210144,
      "learning_rate": 8.319616770886186e-06,
      "loss": 0.0277,
      "step": 530040
    },
    {
      "epoch": 0.867454815629439,
      "grad_norm": 1.969588041305542,
      "learning_rate": 8.319550878672668e-06,
      "loss": 0.0247,
      "step": 530060
    },
    {
      "epoch": 0.8674875460680924,
      "grad_norm": 0.3129746913909912,
      "learning_rate": 8.319484986459151e-06,
      "loss": 0.0304,
      "step": 530080
    },
    {
      "epoch": 0.8675202765067458,
      "grad_norm": 1.2752370834350586,
      "learning_rate": 8.319419094245633e-06,
      "loss": 0.0299,
      "step": 530100
    },
    {
      "epoch": 0.867553006945399,
      "grad_norm": 0.5754455924034119,
      "learning_rate": 8.319353202032117e-06,
      "loss": 0.0386,
      "step": 530120
    },
    {
      "epoch": 0.8675857373840524,
      "grad_norm": 1.1077711582183838,
      "learning_rate": 8.319287309818599e-06,
      "loss": 0.0353,
      "step": 530140
    },
    {
      "epoch": 0.8676184678227058,
      "grad_norm": 3.679356575012207,
      "learning_rate": 8.319221417605082e-06,
      "loss": 0.0495,
      "step": 530160
    },
    {
      "epoch": 0.8676511982613591,
      "grad_norm": 0.9790607690811157,
      "learning_rate": 8.319155525391566e-06,
      "loss": 0.0272,
      "step": 530180
    },
    {
      "epoch": 0.8676839287000124,
      "grad_norm": 0.45547419786453247,
      "learning_rate": 8.319089633178048e-06,
      "loss": 0.036,
      "step": 530200
    },
    {
      "epoch": 0.8677166591386658,
      "grad_norm": 3.9580729007720947,
      "learning_rate": 8.319023740964531e-06,
      "loss": 0.0256,
      "step": 530220
    },
    {
      "epoch": 0.8677493895773191,
      "grad_norm": 0.8403286337852478,
      "learning_rate": 8.318957848751013e-06,
      "loss": 0.0313,
      "step": 530240
    },
    {
      "epoch": 0.8677821200159724,
      "grad_norm": 0.8480373620986938,
      "learning_rate": 8.318891956537497e-06,
      "loss": 0.0272,
      "step": 530260
    },
    {
      "epoch": 0.8678148504546258,
      "grad_norm": 1.1119781732559204,
      "learning_rate": 8.318826064323979e-06,
      "loss": 0.036,
      "step": 530280
    },
    {
      "epoch": 0.8678475808932792,
      "grad_norm": 1.4714564085006714,
      "learning_rate": 8.318760172110462e-06,
      "loss": 0.0246,
      "step": 530300
    },
    {
      "epoch": 0.8678803113319324,
      "grad_norm": 1.7416564226150513,
      "learning_rate": 8.318694279896946e-06,
      "loss": 0.0274,
      "step": 530320
    },
    {
      "epoch": 0.8679130417705858,
      "grad_norm": 1.5405089855194092,
      "learning_rate": 8.318628387683428e-06,
      "loss": 0.0211,
      "step": 530340
    },
    {
      "epoch": 0.8679457722092392,
      "grad_norm": 0.8916006088256836,
      "learning_rate": 8.318562495469911e-06,
      "loss": 0.027,
      "step": 530360
    },
    {
      "epoch": 0.8679785026478924,
      "grad_norm": 0.8483636975288391,
      "learning_rate": 8.318496603256395e-06,
      "loss": 0.0338,
      "step": 530380
    },
    {
      "epoch": 0.8680112330865458,
      "grad_norm": 0.754559338092804,
      "learning_rate": 8.318430711042877e-06,
      "loss": 0.0256,
      "step": 530400
    },
    {
      "epoch": 0.8680439635251992,
      "grad_norm": 0.8979173302650452,
      "learning_rate": 8.31836481882936e-06,
      "loss": 0.0411,
      "step": 530420
    },
    {
      "epoch": 0.8680766939638525,
      "grad_norm": 1.5707565546035767,
      "learning_rate": 8.318298926615842e-06,
      "loss": 0.0372,
      "step": 530440
    },
    {
      "epoch": 0.8681094244025058,
      "grad_norm": 0.40648317337036133,
      "learning_rate": 8.318233034402326e-06,
      "loss": 0.0235,
      "step": 530460
    },
    {
      "epoch": 0.8681421548411592,
      "grad_norm": 1.2686132192611694,
      "learning_rate": 8.318167142188808e-06,
      "loss": 0.0189,
      "step": 530480
    },
    {
      "epoch": 0.8681748852798126,
      "grad_norm": 2.864063262939453,
      "learning_rate": 8.318101249975291e-06,
      "loss": 0.0298,
      "step": 530500
    },
    {
      "epoch": 0.8682076157184658,
      "grad_norm": 6.503546237945557,
      "learning_rate": 8.318035357761775e-06,
      "loss": 0.0318,
      "step": 530520
    },
    {
      "epoch": 0.8682403461571192,
      "grad_norm": 0.664519727230072,
      "learning_rate": 8.317969465548257e-06,
      "loss": 0.029,
      "step": 530540
    },
    {
      "epoch": 0.8682730765957726,
      "grad_norm": 1.2299915552139282,
      "learning_rate": 8.31790357333474e-06,
      "loss": 0.0256,
      "step": 530560
    },
    {
      "epoch": 0.8683058070344258,
      "grad_norm": 1.1882658004760742,
      "learning_rate": 8.317837681121222e-06,
      "loss": 0.0258,
      "step": 530580
    },
    {
      "epoch": 0.8683385374730792,
      "grad_norm": 2.591055393218994,
      "learning_rate": 8.317771788907706e-06,
      "loss": 0.0382,
      "step": 530600
    },
    {
      "epoch": 0.8683712679117326,
      "grad_norm": 0.8047334551811218,
      "learning_rate": 8.317705896694188e-06,
      "loss": 0.0332,
      "step": 530620
    },
    {
      "epoch": 0.8684039983503858,
      "grad_norm": 1.2193570137023926,
      "learning_rate": 8.317640004480671e-06,
      "loss": 0.0367,
      "step": 530640
    },
    {
      "epoch": 0.8684367287890392,
      "grad_norm": 0.7986655235290527,
      "learning_rate": 8.317574112267153e-06,
      "loss": 0.0193,
      "step": 530660
    },
    {
      "epoch": 0.8684694592276926,
      "grad_norm": 1.6129101514816284,
      "learning_rate": 8.317508220053637e-06,
      "loss": 0.0375,
      "step": 530680
    },
    {
      "epoch": 0.868502189666346,
      "grad_norm": 3.1793975830078125,
      "learning_rate": 8.317442327840119e-06,
      "loss": 0.0293,
      "step": 530700
    },
    {
      "epoch": 0.8685349201049992,
      "grad_norm": 0.23287954926490784,
      "learning_rate": 8.317376435626602e-06,
      "loss": 0.0409,
      "step": 530720
    },
    {
      "epoch": 0.8685676505436526,
      "grad_norm": 0.8457019925117493,
      "learning_rate": 8.317310543413086e-06,
      "loss": 0.0289,
      "step": 530740
    },
    {
      "epoch": 0.868600380982306,
      "grad_norm": 1.114308476448059,
      "learning_rate": 8.317244651199568e-06,
      "loss": 0.0365,
      "step": 530760
    },
    {
      "epoch": 0.8686331114209592,
      "grad_norm": 0.5807024240493774,
      "learning_rate": 8.317178758986051e-06,
      "loss": 0.0336,
      "step": 530780
    },
    {
      "epoch": 0.8686658418596126,
      "grad_norm": 1.5792206525802612,
      "learning_rate": 8.317112866772535e-06,
      "loss": 0.0209,
      "step": 530800
    },
    {
      "epoch": 0.868698572298266,
      "grad_norm": 0.4145898222923279,
      "learning_rate": 8.317046974559017e-06,
      "loss": 0.0301,
      "step": 530820
    },
    {
      "epoch": 0.8687313027369192,
      "grad_norm": 5.964541435241699,
      "learning_rate": 8.3169810823455e-06,
      "loss": 0.0281,
      "step": 530840
    },
    {
      "epoch": 0.8687640331755726,
      "grad_norm": 1.4135335683822632,
      "learning_rate": 8.316915190131982e-06,
      "loss": 0.029,
      "step": 530860
    },
    {
      "epoch": 0.868796763614226,
      "grad_norm": 0.17509810626506805,
      "learning_rate": 8.316849297918466e-06,
      "loss": 0.0254,
      "step": 530880
    },
    {
      "epoch": 0.8688294940528793,
      "grad_norm": 0.2942972183227539,
      "learning_rate": 8.31678340570495e-06,
      "loss": 0.0241,
      "step": 530900
    },
    {
      "epoch": 0.8688622244915326,
      "grad_norm": 1.8551406860351562,
      "learning_rate": 8.316717513491431e-06,
      "loss": 0.0277,
      "step": 530920
    },
    {
      "epoch": 0.868894954930186,
      "grad_norm": 0.2747792601585388,
      "learning_rate": 8.316651621277915e-06,
      "loss": 0.0206,
      "step": 530940
    },
    {
      "epoch": 0.8689276853688394,
      "grad_norm": 0.7535421848297119,
      "learning_rate": 8.316585729064397e-06,
      "loss": 0.024,
      "step": 530960
    },
    {
      "epoch": 0.8689604158074926,
      "grad_norm": 0.8624778389930725,
      "learning_rate": 8.31651983685088e-06,
      "loss": 0.0203,
      "step": 530980
    },
    {
      "epoch": 0.868993146246146,
      "grad_norm": 3.0991764068603516,
      "learning_rate": 8.316453944637362e-06,
      "loss": 0.0291,
      "step": 531000
    },
    {
      "epoch": 0.8690258766847994,
      "grad_norm": 0.5413619875907898,
      "learning_rate": 8.316388052423846e-06,
      "loss": 0.0313,
      "step": 531020
    },
    {
      "epoch": 0.8690586071234526,
      "grad_norm": 1.4502573013305664,
      "learning_rate": 8.316322160210328e-06,
      "loss": 0.0319,
      "step": 531040
    },
    {
      "epoch": 0.869091337562106,
      "grad_norm": 2.4391841888427734,
      "learning_rate": 8.316256267996811e-06,
      "loss": 0.032,
      "step": 531060
    },
    {
      "epoch": 0.8691240680007594,
      "grad_norm": 1.9646611213684082,
      "learning_rate": 8.316190375783293e-06,
      "loss": 0.0303,
      "step": 531080
    },
    {
      "epoch": 0.8691567984394127,
      "grad_norm": 0.39825713634490967,
      "learning_rate": 8.316124483569777e-06,
      "loss": 0.0237,
      "step": 531100
    },
    {
      "epoch": 0.869189528878066,
      "grad_norm": 1.0105518102645874,
      "learning_rate": 8.31605859135626e-06,
      "loss": 0.0315,
      "step": 531120
    },
    {
      "epoch": 0.8692222593167194,
      "grad_norm": 0.21588510274887085,
      "learning_rate": 8.315992699142742e-06,
      "loss": 0.0337,
      "step": 531140
    },
    {
      "epoch": 0.8692549897553727,
      "grad_norm": 0.7191116213798523,
      "learning_rate": 8.315926806929226e-06,
      "loss": 0.0287,
      "step": 531160
    },
    {
      "epoch": 0.869287720194026,
      "grad_norm": 0.5029235482215881,
      "learning_rate": 8.31586091471571e-06,
      "loss": 0.0404,
      "step": 531180
    },
    {
      "epoch": 0.8693204506326794,
      "grad_norm": 0.7680485844612122,
      "learning_rate": 8.315795022502191e-06,
      "loss": 0.0294,
      "step": 531200
    },
    {
      "epoch": 0.8693531810713327,
      "grad_norm": 1.1262644529342651,
      "learning_rate": 8.315729130288675e-06,
      "loss": 0.0271,
      "step": 531220
    },
    {
      "epoch": 0.869385911509986,
      "grad_norm": 0.5942925214767456,
      "learning_rate": 8.315663238075159e-06,
      "loss": 0.0301,
      "step": 531240
    },
    {
      "epoch": 0.8694186419486394,
      "grad_norm": 0.8518444895744324,
      "learning_rate": 8.31559734586164e-06,
      "loss": 0.0391,
      "step": 531260
    },
    {
      "epoch": 0.8694513723872928,
      "grad_norm": 1.130781888961792,
      "learning_rate": 8.315531453648124e-06,
      "loss": 0.0372,
      "step": 531280
    },
    {
      "epoch": 0.8694841028259461,
      "grad_norm": 0.4597526490688324,
      "learning_rate": 8.315465561434606e-06,
      "loss": 0.0281,
      "step": 531300
    },
    {
      "epoch": 0.8695168332645994,
      "grad_norm": 0.4635761082172394,
      "learning_rate": 8.31539966922109e-06,
      "loss": 0.0315,
      "step": 531320
    },
    {
      "epoch": 0.8695495637032528,
      "grad_norm": 1.0804617404937744,
      "learning_rate": 8.315333777007571e-06,
      "loss": 0.0313,
      "step": 531340
    },
    {
      "epoch": 0.8695822941419061,
      "grad_norm": 1.2046949863433838,
      "learning_rate": 8.315267884794055e-06,
      "loss": 0.0383,
      "step": 531360
    },
    {
      "epoch": 0.8696150245805594,
      "grad_norm": 1.124364972114563,
      "learning_rate": 8.315201992580537e-06,
      "loss": 0.025,
      "step": 531380
    },
    {
      "epoch": 0.8696477550192128,
      "grad_norm": 0.21648041903972626,
      "learning_rate": 8.31513610036702e-06,
      "loss": 0.0314,
      "step": 531400
    },
    {
      "epoch": 0.8696804854578661,
      "grad_norm": 0.745612621307373,
      "learning_rate": 8.315070208153502e-06,
      "loss": 0.0195,
      "step": 531420
    },
    {
      "epoch": 0.8697132158965194,
      "grad_norm": 0.5233011245727539,
      "learning_rate": 8.315004315939986e-06,
      "loss": 0.0255,
      "step": 531440
    },
    {
      "epoch": 0.8697459463351728,
      "grad_norm": 0.9819817543029785,
      "learning_rate": 8.314938423726468e-06,
      "loss": 0.0341,
      "step": 531460
    },
    {
      "epoch": 0.8697786767738261,
      "grad_norm": 1.3713034391403198,
      "learning_rate": 8.314872531512951e-06,
      "loss": 0.0369,
      "step": 531480
    },
    {
      "epoch": 0.8698114072124795,
      "grad_norm": 0.40760648250579834,
      "learning_rate": 8.314806639299433e-06,
      "loss": 0.0296,
      "step": 531500
    },
    {
      "epoch": 0.8698441376511328,
      "grad_norm": 0.22420741617679596,
      "learning_rate": 8.314740747085917e-06,
      "loss": 0.0342,
      "step": 531520
    },
    {
      "epoch": 0.8698768680897861,
      "grad_norm": 0.6433711647987366,
      "learning_rate": 8.3146748548724e-06,
      "loss": 0.0209,
      "step": 531540
    },
    {
      "epoch": 0.8699095985284395,
      "grad_norm": 0.37123140692710876,
      "learning_rate": 8.314608962658882e-06,
      "loss": 0.0337,
      "step": 531560
    },
    {
      "epoch": 0.8699423289670928,
      "grad_norm": 0.8181301951408386,
      "learning_rate": 8.314543070445366e-06,
      "loss": 0.0345,
      "step": 531580
    },
    {
      "epoch": 0.8699750594057462,
      "grad_norm": 0.3485803008079529,
      "learning_rate": 8.31447717823185e-06,
      "loss": 0.0394,
      "step": 531600
    },
    {
      "epoch": 0.8700077898443995,
      "grad_norm": 4.381295204162598,
      "learning_rate": 8.314411286018332e-06,
      "loss": 0.0313,
      "step": 531620
    },
    {
      "epoch": 0.8700405202830528,
      "grad_norm": 0.8476465940475464,
      "learning_rate": 8.314345393804815e-06,
      "loss": 0.0302,
      "step": 531640
    },
    {
      "epoch": 0.8700732507217062,
      "grad_norm": 0.5367317199707031,
      "learning_rate": 8.314279501591299e-06,
      "loss": 0.0344,
      "step": 531660
    },
    {
      "epoch": 0.8701059811603595,
      "grad_norm": 0.8648319840431213,
      "learning_rate": 8.31421360937778e-06,
      "loss": 0.0341,
      "step": 531680
    },
    {
      "epoch": 0.8701387115990129,
      "grad_norm": 1.7743141651153564,
      "learning_rate": 8.314147717164264e-06,
      "loss": 0.0193,
      "step": 531700
    },
    {
      "epoch": 0.8701714420376662,
      "grad_norm": 0.3194003999233246,
      "learning_rate": 8.314081824950746e-06,
      "loss": 0.0247,
      "step": 531720
    },
    {
      "epoch": 0.8702041724763195,
      "grad_norm": 3.4999401569366455,
      "learning_rate": 8.31401593273723e-06,
      "loss": 0.0421,
      "step": 531740
    },
    {
      "epoch": 0.8702369029149729,
      "grad_norm": 0.9238458871841431,
      "learning_rate": 8.313950040523712e-06,
      "loss": 0.0374,
      "step": 531760
    },
    {
      "epoch": 0.8702696333536262,
      "grad_norm": 1.14907705783844,
      "learning_rate": 8.313884148310195e-06,
      "loss": 0.031,
      "step": 531780
    },
    {
      "epoch": 0.8703023637922795,
      "grad_norm": 0.37809085845947266,
      "learning_rate": 8.313818256096677e-06,
      "loss": 0.0392,
      "step": 531800
    },
    {
      "epoch": 0.8703350942309329,
      "grad_norm": 1.2283345460891724,
      "learning_rate": 8.31375236388316e-06,
      "loss": 0.0305,
      "step": 531820
    },
    {
      "epoch": 0.8703678246695862,
      "grad_norm": 0.6676774621009827,
      "learning_rate": 8.313686471669643e-06,
      "loss": 0.0196,
      "step": 531840
    },
    {
      "epoch": 0.8704005551082395,
      "grad_norm": 0.7553445100784302,
      "learning_rate": 8.313620579456126e-06,
      "loss": 0.018,
      "step": 531860
    },
    {
      "epoch": 0.8704332855468929,
      "grad_norm": 1.1714783906936646,
      "learning_rate": 8.313554687242608e-06,
      "loss": 0.0272,
      "step": 531880
    },
    {
      "epoch": 0.8704660159855463,
      "grad_norm": 0.6825314164161682,
      "learning_rate": 8.313488795029092e-06,
      "loss": 0.0219,
      "step": 531900
    },
    {
      "epoch": 0.8704987464241996,
      "grad_norm": 1.4656089544296265,
      "learning_rate": 8.313422902815575e-06,
      "loss": 0.0234,
      "step": 531920
    },
    {
      "epoch": 0.8705314768628529,
      "grad_norm": 0.41007328033447266,
      "learning_rate": 8.313357010602057e-06,
      "loss": 0.031,
      "step": 531940
    },
    {
      "epoch": 0.8705642073015063,
      "grad_norm": 0.8050817847251892,
      "learning_rate": 8.31329111838854e-06,
      "loss": 0.02,
      "step": 531960
    },
    {
      "epoch": 0.8705969377401596,
      "grad_norm": 1.5003620386123657,
      "learning_rate": 8.313225226175024e-06,
      "loss": 0.0262,
      "step": 531980
    },
    {
      "epoch": 0.8706296681788129,
      "grad_norm": 0.38662636280059814,
      "learning_rate": 8.313159333961506e-06,
      "loss": 0.0243,
      "step": 532000
    },
    {
      "epoch": 0.8706623986174663,
      "grad_norm": 0.20676544308662415,
      "learning_rate": 8.31309344174799e-06,
      "loss": 0.0294,
      "step": 532020
    },
    {
      "epoch": 0.8706951290561196,
      "grad_norm": 3.9000837802886963,
      "learning_rate": 8.313027549534473e-06,
      "loss": 0.0187,
      "step": 532040
    },
    {
      "epoch": 0.8707278594947729,
      "grad_norm": 0.8625229001045227,
      "learning_rate": 8.312961657320955e-06,
      "loss": 0.0285,
      "step": 532060
    },
    {
      "epoch": 0.8707605899334263,
      "grad_norm": 0.7792020440101624,
      "learning_rate": 8.312895765107439e-06,
      "loss": 0.0268,
      "step": 532080
    },
    {
      "epoch": 0.8707933203720796,
      "grad_norm": 0.7257170081138611,
      "learning_rate": 8.31282987289392e-06,
      "loss": 0.0299,
      "step": 532100
    },
    {
      "epoch": 0.8708260508107329,
      "grad_norm": 0.818613588809967,
      "learning_rate": 8.312763980680404e-06,
      "loss": 0.031,
      "step": 532120
    },
    {
      "epoch": 0.8708587812493863,
      "grad_norm": 0.6199223399162292,
      "learning_rate": 8.312698088466886e-06,
      "loss": 0.0292,
      "step": 532140
    },
    {
      "epoch": 0.8708915116880397,
      "grad_norm": 1.5237572193145752,
      "learning_rate": 8.31263219625337e-06,
      "loss": 0.0292,
      "step": 532160
    },
    {
      "epoch": 0.870924242126693,
      "grad_norm": 0.26674675941467285,
      "learning_rate": 8.312566304039852e-06,
      "loss": 0.0225,
      "step": 532180
    },
    {
      "epoch": 0.8709569725653463,
      "grad_norm": 0.676220178604126,
      "learning_rate": 8.312500411826335e-06,
      "loss": 0.0265,
      "step": 532200
    },
    {
      "epoch": 0.8709897030039997,
      "grad_norm": 0.24707956612110138,
      "learning_rate": 8.312434519612817e-06,
      "loss": 0.0353,
      "step": 532220
    },
    {
      "epoch": 0.871022433442653,
      "grad_norm": 1.3903981447219849,
      "learning_rate": 8.3123686273993e-06,
      "loss": 0.0346,
      "step": 532240
    },
    {
      "epoch": 0.8710551638813063,
      "grad_norm": 3.011040210723877,
      "learning_rate": 8.312302735185783e-06,
      "loss": 0.0284,
      "step": 532260
    },
    {
      "epoch": 0.8710878943199597,
      "grad_norm": 0.5516126751899719,
      "learning_rate": 8.312236842972266e-06,
      "loss": 0.026,
      "step": 532280
    },
    {
      "epoch": 0.871120624758613,
      "grad_norm": 0.4747665822505951,
      "learning_rate": 8.31217095075875e-06,
      "loss": 0.0262,
      "step": 532300
    },
    {
      "epoch": 0.8711533551972663,
      "grad_norm": 0.16748744249343872,
      "learning_rate": 8.312105058545232e-06,
      "loss": 0.026,
      "step": 532320
    },
    {
      "epoch": 0.8711860856359197,
      "grad_norm": 1.125693917274475,
      "learning_rate": 8.312039166331715e-06,
      "loss": 0.0312,
      "step": 532340
    },
    {
      "epoch": 0.8712188160745731,
      "grad_norm": 1.3543213605880737,
      "learning_rate": 8.311973274118199e-06,
      "loss": 0.0321,
      "step": 532360
    },
    {
      "epoch": 0.8712515465132263,
      "grad_norm": 0.48993557691574097,
      "learning_rate": 8.31190738190468e-06,
      "loss": 0.0313,
      "step": 532380
    },
    {
      "epoch": 0.8712842769518797,
      "grad_norm": 0.9574422836303711,
      "learning_rate": 8.311841489691164e-06,
      "loss": 0.0247,
      "step": 532400
    },
    {
      "epoch": 0.8713170073905331,
      "grad_norm": 0.7308483719825745,
      "learning_rate": 8.311775597477648e-06,
      "loss": 0.0192,
      "step": 532420
    },
    {
      "epoch": 0.8713497378291863,
      "grad_norm": 0.38769519329071045,
      "learning_rate": 8.31170970526413e-06,
      "loss": 0.0319,
      "step": 532440
    },
    {
      "epoch": 0.8713824682678397,
      "grad_norm": 0.3641234338283539,
      "learning_rate": 8.311643813050613e-06,
      "loss": 0.0262,
      "step": 532460
    },
    {
      "epoch": 0.8714151987064931,
      "grad_norm": 0.7766700983047485,
      "learning_rate": 8.311577920837095e-06,
      "loss": 0.0249,
      "step": 532480
    },
    {
      "epoch": 0.8714479291451463,
      "grad_norm": 0.2874257564544678,
      "learning_rate": 8.311512028623579e-06,
      "loss": 0.0254,
      "step": 532500
    },
    {
      "epoch": 0.8714806595837997,
      "grad_norm": 3.8474769592285156,
      "learning_rate": 8.31144613641006e-06,
      "loss": 0.0487,
      "step": 532520
    },
    {
      "epoch": 0.8715133900224531,
      "grad_norm": 1.541957974433899,
      "learning_rate": 8.311380244196544e-06,
      "loss": 0.0262,
      "step": 532540
    },
    {
      "epoch": 0.8715461204611065,
      "grad_norm": 0.40541523694992065,
      "learning_rate": 8.311314351983026e-06,
      "loss": 0.027,
      "step": 532560
    },
    {
      "epoch": 0.8715788508997597,
      "grad_norm": 1.8040637969970703,
      "learning_rate": 8.31124845976951e-06,
      "loss": 0.0334,
      "step": 532580
    },
    {
      "epoch": 0.8716115813384131,
      "grad_norm": 1.0174916982650757,
      "learning_rate": 8.311182567555992e-06,
      "loss": 0.0338,
      "step": 532600
    },
    {
      "epoch": 0.8716443117770665,
      "grad_norm": 5.306748867034912,
      "learning_rate": 8.311116675342475e-06,
      "loss": 0.0282,
      "step": 532620
    },
    {
      "epoch": 0.8716770422157197,
      "grad_norm": 0.9301267862319946,
      "learning_rate": 8.311050783128959e-06,
      "loss": 0.0202,
      "step": 532640
    },
    {
      "epoch": 0.8717097726543731,
      "grad_norm": 0.6788204312324524,
      "learning_rate": 8.31098489091544e-06,
      "loss": 0.0356,
      "step": 532660
    },
    {
      "epoch": 0.8717425030930265,
      "grad_norm": 1.8655024766921997,
      "learning_rate": 8.310918998701924e-06,
      "loss": 0.032,
      "step": 532680
    },
    {
      "epoch": 0.8717752335316797,
      "grad_norm": 0.3742636740207672,
      "learning_rate": 8.310853106488406e-06,
      "loss": 0.0276,
      "step": 532700
    },
    {
      "epoch": 0.8718079639703331,
      "grad_norm": 1.1970651149749756,
      "learning_rate": 8.31078721427489e-06,
      "loss": 0.0346,
      "step": 532720
    },
    {
      "epoch": 0.8718406944089865,
      "grad_norm": 0.8944700956344604,
      "learning_rate": 8.310721322061372e-06,
      "loss": 0.0254,
      "step": 532740
    },
    {
      "epoch": 0.8718734248476399,
      "grad_norm": 0.7780928611755371,
      "learning_rate": 8.310655429847855e-06,
      "loss": 0.024,
      "step": 532760
    },
    {
      "epoch": 0.8719061552862931,
      "grad_norm": 0.3902156352996826,
      "learning_rate": 8.310589537634339e-06,
      "loss": 0.0339,
      "step": 532780
    },
    {
      "epoch": 0.8719388857249465,
      "grad_norm": 0.19119879603385925,
      "learning_rate": 8.31052364542082e-06,
      "loss": 0.0345,
      "step": 532800
    },
    {
      "epoch": 0.8719716161635999,
      "grad_norm": 0.6307907104492188,
      "learning_rate": 8.310457753207304e-06,
      "loss": 0.0281,
      "step": 532820
    },
    {
      "epoch": 0.8720043466022531,
      "grad_norm": 0.5714352130889893,
      "learning_rate": 8.310391860993788e-06,
      "loss": 0.02,
      "step": 532840
    },
    {
      "epoch": 0.8720370770409065,
      "grad_norm": 3.179605722427368,
      "learning_rate": 8.31032596878027e-06,
      "loss": 0.0319,
      "step": 532860
    },
    {
      "epoch": 0.8720698074795599,
      "grad_norm": 1.52925443649292,
      "learning_rate": 8.310260076566753e-06,
      "loss": 0.0353,
      "step": 532880
    },
    {
      "epoch": 0.8721025379182131,
      "grad_norm": 1.464200496673584,
      "learning_rate": 8.310194184353235e-06,
      "loss": 0.0298,
      "step": 532900
    },
    {
      "epoch": 0.8721352683568665,
      "grad_norm": 0.48259931802749634,
      "learning_rate": 8.310128292139719e-06,
      "loss": 0.0293,
      "step": 532920
    },
    {
      "epoch": 0.8721679987955199,
      "grad_norm": 1.0873723030090332,
      "learning_rate": 8.3100623999262e-06,
      "loss": 0.0301,
      "step": 532940
    },
    {
      "epoch": 0.8722007292341732,
      "grad_norm": 0.8290788531303406,
      "learning_rate": 8.309996507712684e-06,
      "loss": 0.0264,
      "step": 532960
    },
    {
      "epoch": 0.8722334596728265,
      "grad_norm": 1.8692946434020996,
      "learning_rate": 8.309930615499168e-06,
      "loss": 0.0315,
      "step": 532980
    },
    {
      "epoch": 0.8722661901114799,
      "grad_norm": 2.272103786468506,
      "learning_rate": 8.30986472328565e-06,
      "loss": 0.0333,
      "step": 533000
    },
    {
      "epoch": 0.8722989205501332,
      "grad_norm": 0.6570417881011963,
      "learning_rate": 8.309798831072133e-06,
      "loss": 0.0291,
      "step": 533020
    },
    {
      "epoch": 0.8723316509887865,
      "grad_norm": 1.4708635807037354,
      "learning_rate": 8.309732938858615e-06,
      "loss": 0.0238,
      "step": 533040
    },
    {
      "epoch": 0.8723643814274399,
      "grad_norm": 0.34610679745674133,
      "learning_rate": 8.309667046645099e-06,
      "loss": 0.0234,
      "step": 533060
    },
    {
      "epoch": 0.8723971118660933,
      "grad_norm": 2.4178738594055176,
      "learning_rate": 8.30960115443158e-06,
      "loss": 0.0343,
      "step": 533080
    },
    {
      "epoch": 0.8724298423047465,
      "grad_norm": 0.4570692181587219,
      "learning_rate": 8.309535262218064e-06,
      "loss": 0.0278,
      "step": 533100
    },
    {
      "epoch": 0.8724625727433999,
      "grad_norm": 0.7704369425773621,
      "learning_rate": 8.309469370004546e-06,
      "loss": 0.0334,
      "step": 533120
    },
    {
      "epoch": 0.8724953031820533,
      "grad_norm": 0.4832782447338104,
      "learning_rate": 8.30940347779103e-06,
      "loss": 0.0354,
      "step": 533140
    },
    {
      "epoch": 0.8725280336207066,
      "grad_norm": 0.9106490612030029,
      "learning_rate": 8.309337585577513e-06,
      "loss": 0.0224,
      "step": 533160
    },
    {
      "epoch": 0.8725607640593599,
      "grad_norm": 0.6523871421813965,
      "learning_rate": 8.309271693363995e-06,
      "loss": 0.0282,
      "step": 533180
    },
    {
      "epoch": 0.8725934944980133,
      "grad_norm": 0.36834555864334106,
      "learning_rate": 8.309205801150479e-06,
      "loss": 0.0224,
      "step": 533200
    },
    {
      "epoch": 0.8726262249366666,
      "grad_norm": 1.0978437662124634,
      "learning_rate": 8.309139908936963e-06,
      "loss": 0.0371,
      "step": 533220
    },
    {
      "epoch": 0.8726589553753199,
      "grad_norm": 7.055850982666016,
      "learning_rate": 8.309074016723444e-06,
      "loss": 0.0474,
      "step": 533240
    },
    {
      "epoch": 0.8726916858139733,
      "grad_norm": 0.7404686808586121,
      "learning_rate": 8.309008124509928e-06,
      "loss": 0.0336,
      "step": 533260
    },
    {
      "epoch": 0.8727244162526266,
      "grad_norm": 0.5080567598342896,
      "learning_rate": 8.30894223229641e-06,
      "loss": 0.0349,
      "step": 533280
    },
    {
      "epoch": 0.8727571466912799,
      "grad_norm": 0.6657162308692932,
      "learning_rate": 8.308876340082894e-06,
      "loss": 0.027,
      "step": 533300
    },
    {
      "epoch": 0.8727898771299333,
      "grad_norm": 0.6289175748825073,
      "learning_rate": 8.308810447869375e-06,
      "loss": 0.0246,
      "step": 533320
    },
    {
      "epoch": 0.8728226075685867,
      "grad_norm": 0.7764289379119873,
      "learning_rate": 8.308744555655859e-06,
      "loss": 0.0231,
      "step": 533340
    },
    {
      "epoch": 0.87285533800724,
      "grad_norm": 1.036690354347229,
      "learning_rate": 8.308678663442343e-06,
      "loss": 0.0273,
      "step": 533360
    },
    {
      "epoch": 0.8728880684458933,
      "grad_norm": 0.7051299810409546,
      "learning_rate": 8.308612771228824e-06,
      "loss": 0.0243,
      "step": 533380
    },
    {
      "epoch": 0.8729207988845467,
      "grad_norm": 0.8721985220909119,
      "learning_rate": 8.308546879015308e-06,
      "loss": 0.0345,
      "step": 533400
    },
    {
      "epoch": 0.8729535293232,
      "grad_norm": 0.924589216709137,
      "learning_rate": 8.30848098680179e-06,
      "loss": 0.0383,
      "step": 533420
    },
    {
      "epoch": 0.8729862597618533,
      "grad_norm": 1.2172598838806152,
      "learning_rate": 8.308415094588274e-06,
      "loss": 0.0279,
      "step": 533440
    },
    {
      "epoch": 0.8730189902005067,
      "grad_norm": 0.1522035449743271,
      "learning_rate": 8.308349202374755e-06,
      "loss": 0.0263,
      "step": 533460
    },
    {
      "epoch": 0.87305172063916,
      "grad_norm": 1.6711344718933105,
      "learning_rate": 8.308283310161239e-06,
      "loss": 0.0372,
      "step": 533480
    },
    {
      "epoch": 0.8730844510778133,
      "grad_norm": 1.6788948774337769,
      "learning_rate": 8.308217417947721e-06,
      "loss": 0.0329,
      "step": 533500
    },
    {
      "epoch": 0.8731171815164667,
      "grad_norm": 0.2558366060256958,
      "learning_rate": 8.308151525734205e-06,
      "loss": 0.0237,
      "step": 533520
    },
    {
      "epoch": 0.87314991195512,
      "grad_norm": 3.5959413051605225,
      "learning_rate": 8.308085633520686e-06,
      "loss": 0.0332,
      "step": 533540
    },
    {
      "epoch": 0.8731826423937734,
      "grad_norm": 0.6897724270820618,
      "learning_rate": 8.30801974130717e-06,
      "loss": 0.0324,
      "step": 533560
    },
    {
      "epoch": 0.8732153728324267,
      "grad_norm": 1.005313515663147,
      "learning_rate": 8.307953849093654e-06,
      "loss": 0.0258,
      "step": 533580
    },
    {
      "epoch": 0.87324810327108,
      "grad_norm": 0.37161847949028015,
      "learning_rate": 8.307887956880135e-06,
      "loss": 0.0276,
      "step": 533600
    },
    {
      "epoch": 0.8732808337097334,
      "grad_norm": 1.2308423519134521,
      "learning_rate": 8.307822064666619e-06,
      "loss": 0.0269,
      "step": 533620
    },
    {
      "epoch": 0.8733135641483867,
      "grad_norm": 2.6604514122009277,
      "learning_rate": 8.307756172453103e-06,
      "loss": 0.0262,
      "step": 533640
    },
    {
      "epoch": 0.87334629458704,
      "grad_norm": 0.5037601590156555,
      "learning_rate": 8.307690280239585e-06,
      "loss": 0.0284,
      "step": 533660
    },
    {
      "epoch": 0.8733790250256934,
      "grad_norm": 0.8461869955062866,
      "learning_rate": 8.307624388026068e-06,
      "loss": 0.0238,
      "step": 533680
    },
    {
      "epoch": 0.8734117554643467,
      "grad_norm": 1.6035170555114746,
      "learning_rate": 8.307558495812552e-06,
      "loss": 0.0277,
      "step": 533700
    },
    {
      "epoch": 0.8734444859030001,
      "grad_norm": 1.1418890953063965,
      "learning_rate": 8.307492603599034e-06,
      "loss": 0.0323,
      "step": 533720
    },
    {
      "epoch": 0.8734772163416534,
      "grad_norm": 0.8821188807487488,
      "learning_rate": 8.307426711385517e-06,
      "loss": 0.0318,
      "step": 533740
    },
    {
      "epoch": 0.8735099467803068,
      "grad_norm": 0.2806166112422943,
      "learning_rate": 8.307360819171999e-06,
      "loss": 0.0287,
      "step": 533760
    },
    {
      "epoch": 0.8735426772189601,
      "grad_norm": 0.9442863464355469,
      "learning_rate": 8.307294926958483e-06,
      "loss": 0.0312,
      "step": 533780
    },
    {
      "epoch": 0.8735754076576134,
      "grad_norm": 1.970871090888977,
      "learning_rate": 8.307229034744965e-06,
      "loss": 0.0311,
      "step": 533800
    },
    {
      "epoch": 0.8736081380962668,
      "grad_norm": 1.9554141759872437,
      "learning_rate": 8.307163142531448e-06,
      "loss": 0.0265,
      "step": 533820
    },
    {
      "epoch": 0.8736408685349201,
      "grad_norm": 1.3652148246765137,
      "learning_rate": 8.30709725031793e-06,
      "loss": 0.0328,
      "step": 533840
    },
    {
      "epoch": 0.8736735989735734,
      "grad_norm": 8.428858757019043,
      "learning_rate": 8.307031358104414e-06,
      "loss": 0.0297,
      "step": 533860
    },
    {
      "epoch": 0.8737063294122268,
      "grad_norm": 0.8578200936317444,
      "learning_rate": 8.306965465890896e-06,
      "loss": 0.0231,
      "step": 533880
    },
    {
      "epoch": 0.8737390598508801,
      "grad_norm": 0.5933570861816406,
      "learning_rate": 8.306899573677379e-06,
      "loss": 0.0204,
      "step": 533900
    },
    {
      "epoch": 0.8737717902895334,
      "grad_norm": 1.208555817604065,
      "learning_rate": 8.306833681463861e-06,
      "loss": 0.0304,
      "step": 533920
    },
    {
      "epoch": 0.8738045207281868,
      "grad_norm": 1.3434271812438965,
      "learning_rate": 8.306767789250345e-06,
      "loss": 0.0322,
      "step": 533940
    },
    {
      "epoch": 0.8738372511668402,
      "grad_norm": 1.1105314493179321,
      "learning_rate": 8.306701897036828e-06,
      "loss": 0.0254,
      "step": 533960
    },
    {
      "epoch": 0.8738699816054935,
      "grad_norm": 0.7123737931251526,
      "learning_rate": 8.30663600482331e-06,
      "loss": 0.0351,
      "step": 533980
    },
    {
      "epoch": 0.8739027120441468,
      "grad_norm": 1.9660545587539673,
      "learning_rate": 8.306570112609794e-06,
      "loss": 0.0275,
      "step": 534000
    },
    {
      "epoch": 0.8739354424828002,
      "grad_norm": 0.608465313911438,
      "learning_rate": 8.306504220396277e-06,
      "loss": 0.0217,
      "step": 534020
    },
    {
      "epoch": 0.8739681729214535,
      "grad_norm": 0.4935266077518463,
      "learning_rate": 8.306438328182759e-06,
      "loss": 0.0303,
      "step": 534040
    },
    {
      "epoch": 0.8740009033601068,
      "grad_norm": 0.8167724609375,
      "learning_rate": 8.306372435969243e-06,
      "loss": 0.0293,
      "step": 534060
    },
    {
      "epoch": 0.8740336337987602,
      "grad_norm": 1.6402541399002075,
      "learning_rate": 8.306306543755726e-06,
      "loss": 0.0253,
      "step": 534080
    },
    {
      "epoch": 0.8740663642374135,
      "grad_norm": 0.5321956276893616,
      "learning_rate": 8.306240651542208e-06,
      "loss": 0.0295,
      "step": 534100
    },
    {
      "epoch": 0.8740990946760668,
      "grad_norm": 0.6171144247055054,
      "learning_rate": 8.306174759328692e-06,
      "loss": 0.0353,
      "step": 534120
    },
    {
      "epoch": 0.8741318251147202,
      "grad_norm": 0.30962368845939636,
      "learning_rate": 8.306108867115174e-06,
      "loss": 0.0302,
      "step": 534140
    },
    {
      "epoch": 0.8741645555533736,
      "grad_norm": 3.5503852367401123,
      "learning_rate": 8.306042974901657e-06,
      "loss": 0.0275,
      "step": 534160
    },
    {
      "epoch": 0.8741972859920268,
      "grad_norm": 2.0272109508514404,
      "learning_rate": 8.305977082688139e-06,
      "loss": 0.0231,
      "step": 534180
    },
    {
      "epoch": 0.8742300164306802,
      "grad_norm": 0.20125487446784973,
      "learning_rate": 8.305911190474623e-06,
      "loss": 0.0256,
      "step": 534200
    },
    {
      "epoch": 0.8742627468693336,
      "grad_norm": 0.6303601264953613,
      "learning_rate": 8.305845298261105e-06,
      "loss": 0.0231,
      "step": 534220
    },
    {
      "epoch": 0.8742954773079868,
      "grad_norm": 1.541120171546936,
      "learning_rate": 8.305779406047588e-06,
      "loss": 0.0318,
      "step": 534240
    },
    {
      "epoch": 0.8743282077466402,
      "grad_norm": 0.5975573658943176,
      "learning_rate": 8.30571351383407e-06,
      "loss": 0.0224,
      "step": 534260
    },
    {
      "epoch": 0.8743609381852936,
      "grad_norm": 1.1245455741882324,
      "learning_rate": 8.305647621620554e-06,
      "loss": 0.0249,
      "step": 534280
    },
    {
      "epoch": 0.8743936686239469,
      "grad_norm": 0.8506134152412415,
      "learning_rate": 8.305581729407036e-06,
      "loss": 0.0259,
      "step": 534300
    },
    {
      "epoch": 0.8744263990626002,
      "grad_norm": 0.3933793902397156,
      "learning_rate": 8.30551583719352e-06,
      "loss": 0.0319,
      "step": 534320
    },
    {
      "epoch": 0.8744591295012536,
      "grad_norm": 2.002380847930908,
      "learning_rate": 8.305449944980001e-06,
      "loss": 0.0249,
      "step": 534340
    },
    {
      "epoch": 0.874491859939907,
      "grad_norm": 0.7655006051063538,
      "learning_rate": 8.305384052766485e-06,
      "loss": 0.0395,
      "step": 534360
    },
    {
      "epoch": 0.8745245903785602,
      "grad_norm": 0.5390836596488953,
      "learning_rate": 8.305318160552968e-06,
      "loss": 0.0262,
      "step": 534380
    },
    {
      "epoch": 0.8745573208172136,
      "grad_norm": 2.002718925476074,
      "learning_rate": 8.30525226833945e-06,
      "loss": 0.0401,
      "step": 534400
    },
    {
      "epoch": 0.874590051255867,
      "grad_norm": 0.38217687606811523,
      "learning_rate": 8.305186376125934e-06,
      "loss": 0.04,
      "step": 534420
    },
    {
      "epoch": 0.8746227816945202,
      "grad_norm": 0.9781291484832764,
      "learning_rate": 8.305120483912417e-06,
      "loss": 0.0277,
      "step": 534440
    },
    {
      "epoch": 0.8746555121331736,
      "grad_norm": 0.21201910078525543,
      "learning_rate": 8.3050545916989e-06,
      "loss": 0.0272,
      "step": 534460
    },
    {
      "epoch": 0.874688242571827,
      "grad_norm": 0.5980165004730225,
      "learning_rate": 8.304988699485383e-06,
      "loss": 0.0242,
      "step": 534480
    },
    {
      "epoch": 0.8747209730104802,
      "grad_norm": 1.216289758682251,
      "learning_rate": 8.304922807271866e-06,
      "loss": 0.0305,
      "step": 534500
    },
    {
      "epoch": 0.8747537034491336,
      "grad_norm": 1.4003698825836182,
      "learning_rate": 8.304856915058348e-06,
      "loss": 0.0265,
      "step": 534520
    },
    {
      "epoch": 0.874786433887787,
      "grad_norm": 2.2481000423431396,
      "learning_rate": 8.304791022844832e-06,
      "loss": 0.031,
      "step": 534540
    },
    {
      "epoch": 0.8748191643264404,
      "grad_norm": 0.7429332733154297,
      "learning_rate": 8.304725130631314e-06,
      "loss": 0.0337,
      "step": 534560
    },
    {
      "epoch": 0.8748518947650936,
      "grad_norm": 0.6705711483955383,
      "learning_rate": 8.304659238417797e-06,
      "loss": 0.0235,
      "step": 534580
    },
    {
      "epoch": 0.874884625203747,
      "grad_norm": 0.5607765316963196,
      "learning_rate": 8.30459334620428e-06,
      "loss": 0.0333,
      "step": 534600
    },
    {
      "epoch": 0.8749173556424004,
      "grad_norm": 1.5703365802764893,
      "learning_rate": 8.304527453990763e-06,
      "loss": 0.0231,
      "step": 534620
    },
    {
      "epoch": 0.8749500860810536,
      "grad_norm": 0.33685368299484253,
      "learning_rate": 8.304461561777245e-06,
      "loss": 0.0344,
      "step": 534640
    },
    {
      "epoch": 0.874982816519707,
      "grad_norm": 0.5584602355957031,
      "learning_rate": 8.304395669563728e-06,
      "loss": 0.0323,
      "step": 534660
    },
    {
      "epoch": 0.8750155469583604,
      "grad_norm": 1.1189548969268799,
      "learning_rate": 8.30432977735021e-06,
      "loss": 0.0322,
      "step": 534680
    },
    {
      "epoch": 0.8750482773970136,
      "grad_norm": 2.460353374481201,
      "learning_rate": 8.304263885136694e-06,
      "loss": 0.0299,
      "step": 534700
    },
    {
      "epoch": 0.875081007835667,
      "grad_norm": 1.0464388132095337,
      "learning_rate": 8.304197992923176e-06,
      "loss": 0.0324,
      "step": 534720
    },
    {
      "epoch": 0.8751137382743204,
      "grad_norm": 2.0622634887695312,
      "learning_rate": 8.30413210070966e-06,
      "loss": 0.0238,
      "step": 534740
    },
    {
      "epoch": 0.8751464687129737,
      "grad_norm": 0.9838355779647827,
      "learning_rate": 8.304066208496143e-06,
      "loss": 0.0296,
      "step": 534760
    },
    {
      "epoch": 0.875179199151627,
      "grad_norm": 0.49305909872055054,
      "learning_rate": 8.304000316282625e-06,
      "loss": 0.0295,
      "step": 534780
    },
    {
      "epoch": 0.8752119295902804,
      "grad_norm": 0.13904282450675964,
      "learning_rate": 8.303934424069108e-06,
      "loss": 0.024,
      "step": 534800
    },
    {
      "epoch": 0.8752446600289338,
      "grad_norm": 0.5633010864257812,
      "learning_rate": 8.303868531855592e-06,
      "loss": 0.0238,
      "step": 534820
    },
    {
      "epoch": 0.875277390467587,
      "grad_norm": 1.3372788429260254,
      "learning_rate": 8.303802639642074e-06,
      "loss": 0.0363,
      "step": 534840
    },
    {
      "epoch": 0.8753101209062404,
      "grad_norm": 0.6906409859657288,
      "learning_rate": 8.303736747428557e-06,
      "loss": 0.032,
      "step": 534860
    },
    {
      "epoch": 0.8753428513448938,
      "grad_norm": 2.257775068283081,
      "learning_rate": 8.303670855215041e-06,
      "loss": 0.0295,
      "step": 534880
    },
    {
      "epoch": 0.875375581783547,
      "grad_norm": 0.6363586187362671,
      "learning_rate": 8.303604963001523e-06,
      "loss": 0.0262,
      "step": 534900
    },
    {
      "epoch": 0.8754083122222004,
      "grad_norm": 0.6789475679397583,
      "learning_rate": 8.303539070788006e-06,
      "loss": 0.0241,
      "step": 534920
    },
    {
      "epoch": 0.8754410426608538,
      "grad_norm": 0.49342358112335205,
      "learning_rate": 8.303473178574488e-06,
      "loss": 0.0227,
      "step": 534940
    },
    {
      "epoch": 0.875473773099507,
      "grad_norm": 1.7473459243774414,
      "learning_rate": 8.303407286360972e-06,
      "loss": 0.0283,
      "step": 534960
    },
    {
      "epoch": 0.8755065035381604,
      "grad_norm": 0.41451409459114075,
      "learning_rate": 8.303341394147454e-06,
      "loss": 0.0333,
      "step": 534980
    },
    {
      "epoch": 0.8755392339768138,
      "grad_norm": 0.7586088180541992,
      "learning_rate": 8.303275501933937e-06,
      "loss": 0.0347,
      "step": 535000
    },
    {
      "epoch": 0.8755719644154671,
      "grad_norm": 1.1671152114868164,
      "learning_rate": 8.30320960972042e-06,
      "loss": 0.0302,
      "step": 535020
    },
    {
      "epoch": 0.8756046948541204,
      "grad_norm": 3.1422181129455566,
      "learning_rate": 8.303143717506903e-06,
      "loss": 0.025,
      "step": 535040
    },
    {
      "epoch": 0.8756374252927738,
      "grad_norm": 0.45522820949554443,
      "learning_rate": 8.303077825293385e-06,
      "loss": 0.0236,
      "step": 535060
    },
    {
      "epoch": 0.8756701557314271,
      "grad_norm": 0.8331989049911499,
      "learning_rate": 8.303011933079868e-06,
      "loss": 0.031,
      "step": 535080
    },
    {
      "epoch": 0.8757028861700804,
      "grad_norm": 0.37380561232566833,
      "learning_rate": 8.302946040866352e-06,
      "loss": 0.0202,
      "step": 535100
    },
    {
      "epoch": 0.8757356166087338,
      "grad_norm": 0.792121171951294,
      "learning_rate": 8.302880148652834e-06,
      "loss": 0.0269,
      "step": 535120
    },
    {
      "epoch": 0.8757683470473872,
      "grad_norm": 0.34025922417640686,
      "learning_rate": 8.302814256439317e-06,
      "loss": 0.0359,
      "step": 535140
    },
    {
      "epoch": 0.8758010774860404,
      "grad_norm": 1.4496371746063232,
      "learning_rate": 8.3027483642258e-06,
      "loss": 0.0204,
      "step": 535160
    },
    {
      "epoch": 0.8758338079246938,
      "grad_norm": 0.8296231031417847,
      "learning_rate": 8.302682472012283e-06,
      "loss": 0.0307,
      "step": 535180
    },
    {
      "epoch": 0.8758665383633472,
      "grad_norm": 1.1881828308105469,
      "learning_rate": 8.302616579798767e-06,
      "loss": 0.0291,
      "step": 535200
    },
    {
      "epoch": 0.8758992688020005,
      "grad_norm": 1.178450345993042,
      "learning_rate": 8.302550687585248e-06,
      "loss": 0.0374,
      "step": 535220
    },
    {
      "epoch": 0.8759319992406538,
      "grad_norm": 1.6163690090179443,
      "learning_rate": 8.302484795371732e-06,
      "loss": 0.0267,
      "step": 535240
    },
    {
      "epoch": 0.8759647296793072,
      "grad_norm": 1.8009793758392334,
      "learning_rate": 8.302418903158216e-06,
      "loss": 0.0303,
      "step": 535260
    },
    {
      "epoch": 0.8759974601179605,
      "grad_norm": 1.6414227485656738,
      "learning_rate": 8.302353010944697e-06,
      "loss": 0.0269,
      "step": 535280
    },
    {
      "epoch": 0.8760301905566138,
      "grad_norm": 6.814492702484131,
      "learning_rate": 8.302287118731181e-06,
      "loss": 0.0253,
      "step": 535300
    },
    {
      "epoch": 0.8760629209952672,
      "grad_norm": 0.7361463904380798,
      "learning_rate": 8.302221226517663e-06,
      "loss": 0.028,
      "step": 535320
    },
    {
      "epoch": 0.8760956514339205,
      "grad_norm": 1.642210602760315,
      "learning_rate": 8.302155334304147e-06,
      "loss": 0.0358,
      "step": 535340
    },
    {
      "epoch": 0.8761283818725738,
      "grad_norm": 0.47091183066368103,
      "learning_rate": 8.302089442090628e-06,
      "loss": 0.0257,
      "step": 535360
    },
    {
      "epoch": 0.8761611123112272,
      "grad_norm": 0.5760185122489929,
      "learning_rate": 8.302023549877112e-06,
      "loss": 0.0236,
      "step": 535380
    },
    {
      "epoch": 0.8761938427498805,
      "grad_norm": 1.0365761518478394,
      "learning_rate": 8.301957657663594e-06,
      "loss": 0.0256,
      "step": 535400
    },
    {
      "epoch": 0.8762265731885339,
      "grad_norm": 0.8694930076599121,
      "learning_rate": 8.301891765450077e-06,
      "loss": 0.0339,
      "step": 535420
    },
    {
      "epoch": 0.8762593036271872,
      "grad_norm": 0.26367664337158203,
      "learning_rate": 8.301825873236561e-06,
      "loss": 0.0392,
      "step": 535440
    },
    {
      "epoch": 0.8762920340658406,
      "grad_norm": 1.5838700532913208,
      "learning_rate": 8.301759981023043e-06,
      "loss": 0.0252,
      "step": 535460
    },
    {
      "epoch": 0.8763247645044939,
      "grad_norm": 0.3447953164577484,
      "learning_rate": 8.301694088809527e-06,
      "loss": 0.0301,
      "step": 535480
    },
    {
      "epoch": 0.8763574949431472,
      "grad_norm": 0.7892339825630188,
      "learning_rate": 8.301628196596008e-06,
      "loss": 0.044,
      "step": 535500
    },
    {
      "epoch": 0.8763902253818006,
      "grad_norm": 0.9017508625984192,
      "learning_rate": 8.301562304382492e-06,
      "loss": 0.0364,
      "step": 535520
    },
    {
      "epoch": 0.8764229558204539,
      "grad_norm": 0.3445199728012085,
      "learning_rate": 8.301496412168974e-06,
      "loss": 0.0242,
      "step": 535540
    },
    {
      "epoch": 0.8764556862591072,
      "grad_norm": 0.8596570491790771,
      "learning_rate": 8.301430519955458e-06,
      "loss": 0.0255,
      "step": 535560
    },
    {
      "epoch": 0.8764884166977606,
      "grad_norm": 0.3704357445240021,
      "learning_rate": 8.30136462774194e-06,
      "loss": 0.0318,
      "step": 535580
    },
    {
      "epoch": 0.8765211471364139,
      "grad_norm": 0.9240343570709229,
      "learning_rate": 8.301298735528423e-06,
      "loss": 0.0326,
      "step": 535600
    },
    {
      "epoch": 0.8765538775750673,
      "grad_norm": 0.13235339522361755,
      "learning_rate": 8.301232843314907e-06,
      "loss": 0.0311,
      "step": 535620
    },
    {
      "epoch": 0.8765866080137206,
      "grad_norm": 0.27324098348617554,
      "learning_rate": 8.301166951101388e-06,
      "loss": 0.0339,
      "step": 535640
    },
    {
      "epoch": 0.8766193384523739,
      "grad_norm": 0.6652560830116272,
      "learning_rate": 8.301101058887872e-06,
      "loss": 0.0316,
      "step": 535660
    },
    {
      "epoch": 0.8766520688910273,
      "grad_norm": 0.9511334896087646,
      "learning_rate": 8.301035166674356e-06,
      "loss": 0.0269,
      "step": 535680
    },
    {
      "epoch": 0.8766847993296806,
      "grad_norm": 1.0874781608581543,
      "learning_rate": 8.300969274460838e-06,
      "loss": 0.0253,
      "step": 535700
    },
    {
      "epoch": 0.876717529768334,
      "grad_norm": 1.3162899017333984,
      "learning_rate": 8.300903382247321e-06,
      "loss": 0.0259,
      "step": 535720
    },
    {
      "epoch": 0.8767502602069873,
      "grad_norm": 5.519381523132324,
      "learning_rate": 8.300837490033803e-06,
      "loss": 0.0298,
      "step": 535740
    },
    {
      "epoch": 0.8767829906456406,
      "grad_norm": 0.993410050868988,
      "learning_rate": 8.300771597820287e-06,
      "loss": 0.0358,
      "step": 535760
    },
    {
      "epoch": 0.876815721084294,
      "grad_norm": 0.8210410475730896,
      "learning_rate": 8.300705705606768e-06,
      "loss": 0.0387,
      "step": 535780
    },
    {
      "epoch": 0.8768484515229473,
      "grad_norm": 0.5818430185317993,
      "learning_rate": 8.300639813393252e-06,
      "loss": 0.0285,
      "step": 535800
    },
    {
      "epoch": 0.8768811819616007,
      "grad_norm": 0.3663722276687622,
      "learning_rate": 8.300573921179736e-06,
      "loss": 0.0298,
      "step": 535820
    },
    {
      "epoch": 0.876913912400254,
      "grad_norm": 0.6060450077056885,
      "learning_rate": 8.300508028966218e-06,
      "loss": 0.0303,
      "step": 535840
    },
    {
      "epoch": 0.8769466428389073,
      "grad_norm": 3.609016180038452,
      "learning_rate": 8.300442136752701e-06,
      "loss": 0.0319,
      "step": 535860
    },
    {
      "epoch": 0.8769793732775607,
      "grad_norm": 0.43482688069343567,
      "learning_rate": 8.300376244539183e-06,
      "loss": 0.0183,
      "step": 535880
    },
    {
      "epoch": 0.877012103716214,
      "grad_norm": 0.8050198554992676,
      "learning_rate": 8.300310352325667e-06,
      "loss": 0.0367,
      "step": 535900
    },
    {
      "epoch": 0.8770448341548673,
      "grad_norm": 2.3051397800445557,
      "learning_rate": 8.300244460112149e-06,
      "loss": 0.0263,
      "step": 535920
    },
    {
      "epoch": 0.8770775645935207,
      "grad_norm": 0.964407205581665,
      "learning_rate": 8.300178567898632e-06,
      "loss": 0.0244,
      "step": 535940
    },
    {
      "epoch": 0.877110295032174,
      "grad_norm": 1.3940777778625488,
      "learning_rate": 8.300112675685114e-06,
      "loss": 0.0349,
      "step": 535960
    },
    {
      "epoch": 0.8771430254708273,
      "grad_norm": 0.6775522828102112,
      "learning_rate": 8.300046783471598e-06,
      "loss": 0.0247,
      "step": 535980
    },
    {
      "epoch": 0.8771757559094807,
      "grad_norm": 1.5898550748825073,
      "learning_rate": 8.299980891258081e-06,
      "loss": 0.0287,
      "step": 536000
    },
    {
      "epoch": 0.8772084863481341,
      "grad_norm": 0.9303192496299744,
      "learning_rate": 8.299914999044563e-06,
      "loss": 0.0388,
      "step": 536020
    },
    {
      "epoch": 0.8772412167867873,
      "grad_norm": 0.930767297744751,
      "learning_rate": 8.299849106831047e-06,
      "loss": 0.0277,
      "step": 536040
    },
    {
      "epoch": 0.8772739472254407,
      "grad_norm": 0.5551193356513977,
      "learning_rate": 8.29978321461753e-06,
      "loss": 0.0367,
      "step": 536060
    },
    {
      "epoch": 0.8773066776640941,
      "grad_norm": 0.9733368754386902,
      "learning_rate": 8.299717322404012e-06,
      "loss": 0.0287,
      "step": 536080
    },
    {
      "epoch": 0.8773394081027474,
      "grad_norm": 0.9723724722862244,
      "learning_rate": 8.299651430190496e-06,
      "loss": 0.0254,
      "step": 536100
    },
    {
      "epoch": 0.8773721385414007,
      "grad_norm": 0.35933130979537964,
      "learning_rate": 8.299585537976978e-06,
      "loss": 0.0248,
      "step": 536120
    },
    {
      "epoch": 0.8774048689800541,
      "grad_norm": 0.5174173712730408,
      "learning_rate": 8.299519645763461e-06,
      "loss": 0.024,
      "step": 536140
    },
    {
      "epoch": 0.8774375994187074,
      "grad_norm": 0.32987555861473083,
      "learning_rate": 8.299453753549945e-06,
      "loss": 0.0305,
      "step": 536160
    },
    {
      "epoch": 0.8774703298573607,
      "grad_norm": 0.7612488865852356,
      "learning_rate": 8.299387861336427e-06,
      "loss": 0.0266,
      "step": 536180
    },
    {
      "epoch": 0.8775030602960141,
      "grad_norm": 0.32343995571136475,
      "learning_rate": 8.29932196912291e-06,
      "loss": 0.0269,
      "step": 536200
    },
    {
      "epoch": 0.8775357907346675,
      "grad_norm": 0.5729585289955139,
      "learning_rate": 8.299256076909392e-06,
      "loss": 0.0274,
      "step": 536220
    },
    {
      "epoch": 0.8775685211733207,
      "grad_norm": 1.1012440919876099,
      "learning_rate": 8.299190184695876e-06,
      "loss": 0.0246,
      "step": 536240
    },
    {
      "epoch": 0.8776012516119741,
      "grad_norm": 0.3004780411720276,
      "learning_rate": 8.299124292482358e-06,
      "loss": 0.0289,
      "step": 536260
    },
    {
      "epoch": 0.8776339820506275,
      "grad_norm": 0.81999272108078,
      "learning_rate": 8.299058400268841e-06,
      "loss": 0.0207,
      "step": 536280
    },
    {
      "epoch": 0.8776667124892807,
      "grad_norm": 0.8685409426689148,
      "learning_rate": 8.298992508055323e-06,
      "loss": 0.0368,
      "step": 536300
    },
    {
      "epoch": 0.8776994429279341,
      "grad_norm": 0.38831469416618347,
      "learning_rate": 8.298926615841807e-06,
      "loss": 0.0232,
      "step": 536320
    },
    {
      "epoch": 0.8777321733665875,
      "grad_norm": 1.56279718875885,
      "learning_rate": 8.298860723628289e-06,
      "loss": 0.028,
      "step": 536340
    },
    {
      "epoch": 0.8777649038052407,
      "grad_norm": 0.37397265434265137,
      "learning_rate": 8.298794831414772e-06,
      "loss": 0.0227,
      "step": 536360
    },
    {
      "epoch": 0.8777976342438941,
      "grad_norm": 0.8961035013198853,
      "learning_rate": 8.298728939201254e-06,
      "loss": 0.0254,
      "step": 536380
    },
    {
      "epoch": 0.8778303646825475,
      "grad_norm": 0.35526707768440247,
      "learning_rate": 8.298663046987738e-06,
      "loss": 0.0306,
      "step": 536400
    },
    {
      "epoch": 0.8778630951212009,
      "grad_norm": 0.2319166213274002,
      "learning_rate": 8.298597154774221e-06,
      "loss": 0.0313,
      "step": 536420
    },
    {
      "epoch": 0.8778958255598541,
      "grad_norm": 1.0954749584197998,
      "learning_rate": 8.298531262560703e-06,
      "loss": 0.0423,
      "step": 536440
    },
    {
      "epoch": 0.8779285559985075,
      "grad_norm": 2.268993616104126,
      "learning_rate": 8.298465370347187e-06,
      "loss": 0.0428,
      "step": 536460
    },
    {
      "epoch": 0.8779612864371609,
      "grad_norm": 0.6179517507553101,
      "learning_rate": 8.29839947813367e-06,
      "loss": 0.0302,
      "step": 536480
    },
    {
      "epoch": 0.8779940168758141,
      "grad_norm": 1.6686546802520752,
      "learning_rate": 8.298333585920152e-06,
      "loss": 0.0245,
      "step": 536500
    },
    {
      "epoch": 0.8780267473144675,
      "grad_norm": 0.5471028089523315,
      "learning_rate": 8.298267693706636e-06,
      "loss": 0.0225,
      "step": 536520
    },
    {
      "epoch": 0.8780594777531209,
      "grad_norm": 0.5802438855171204,
      "learning_rate": 8.29820180149312e-06,
      "loss": 0.0321,
      "step": 536540
    },
    {
      "epoch": 0.8780922081917741,
      "grad_norm": 0.8469368815422058,
      "learning_rate": 8.298135909279601e-06,
      "loss": 0.0353,
      "step": 536560
    },
    {
      "epoch": 0.8781249386304275,
      "grad_norm": 1.1850650310516357,
      "learning_rate": 8.298070017066085e-06,
      "loss": 0.0474,
      "step": 536580
    },
    {
      "epoch": 0.8781576690690809,
      "grad_norm": 1.0459538698196411,
      "learning_rate": 8.298004124852567e-06,
      "loss": 0.0292,
      "step": 536600
    },
    {
      "epoch": 0.8781903995077343,
      "grad_norm": 2.489851951599121,
      "learning_rate": 8.29793823263905e-06,
      "loss": 0.0472,
      "step": 536620
    },
    {
      "epoch": 0.8782231299463875,
      "grad_norm": 0.3813902735710144,
      "learning_rate": 8.297872340425532e-06,
      "loss": 0.0416,
      "step": 536640
    },
    {
      "epoch": 0.8782558603850409,
      "grad_norm": 1.743148684501648,
      "learning_rate": 8.297806448212016e-06,
      "loss": 0.0262,
      "step": 536660
    },
    {
      "epoch": 0.8782885908236943,
      "grad_norm": 0.7871232032775879,
      "learning_rate": 8.297740555998498e-06,
      "loss": 0.0281,
      "step": 536680
    },
    {
      "epoch": 0.8783213212623475,
      "grad_norm": 0.6974905729293823,
      "learning_rate": 8.297674663784981e-06,
      "loss": 0.0349,
      "step": 536700
    },
    {
      "epoch": 0.8783540517010009,
      "grad_norm": 1.6766502857208252,
      "learning_rate": 8.297608771571463e-06,
      "loss": 0.0296,
      "step": 536720
    },
    {
      "epoch": 0.8783867821396543,
      "grad_norm": 0.7325913906097412,
      "learning_rate": 8.297542879357947e-06,
      "loss": 0.0233,
      "step": 536740
    },
    {
      "epoch": 0.8784195125783075,
      "grad_norm": 0.3008859157562256,
      "learning_rate": 8.297476987144429e-06,
      "loss": 0.0256,
      "step": 536760
    },
    {
      "epoch": 0.8784522430169609,
      "grad_norm": 1.2037420272827148,
      "learning_rate": 8.297411094930912e-06,
      "loss": 0.0345,
      "step": 536780
    },
    {
      "epoch": 0.8784849734556143,
      "grad_norm": 0.3379954695701599,
      "learning_rate": 8.297345202717396e-06,
      "loss": 0.0286,
      "step": 536800
    },
    {
      "epoch": 0.8785177038942676,
      "grad_norm": 0.47098737955093384,
      "learning_rate": 8.297279310503878e-06,
      "loss": 0.0289,
      "step": 536820
    },
    {
      "epoch": 0.8785504343329209,
      "grad_norm": 0.7851662635803223,
      "learning_rate": 8.297213418290361e-06,
      "loss": 0.0268,
      "step": 536840
    },
    {
      "epoch": 0.8785831647715743,
      "grad_norm": 5.5315752029418945,
      "learning_rate": 8.297147526076845e-06,
      "loss": 0.0354,
      "step": 536860
    },
    {
      "epoch": 0.8786158952102276,
      "grad_norm": 0.47889089584350586,
      "learning_rate": 8.297081633863327e-06,
      "loss": 0.0322,
      "step": 536880
    },
    {
      "epoch": 0.8786486256488809,
      "grad_norm": 0.9925861358642578,
      "learning_rate": 8.29701574164981e-06,
      "loss": 0.0289,
      "step": 536900
    },
    {
      "epoch": 0.8786813560875343,
      "grad_norm": 1.3438829183578491,
      "learning_rate": 8.296949849436294e-06,
      "loss": 0.0337,
      "step": 536920
    },
    {
      "epoch": 0.8787140865261877,
      "grad_norm": 1.3189507722854614,
      "learning_rate": 8.296883957222776e-06,
      "loss": 0.0284,
      "step": 536940
    },
    {
      "epoch": 0.8787468169648409,
      "grad_norm": 0.511412501335144,
      "learning_rate": 8.29681806500926e-06,
      "loss": 0.0322,
      "step": 536960
    },
    {
      "epoch": 0.8787795474034943,
      "grad_norm": 0.4559832513332367,
      "learning_rate": 8.296752172795741e-06,
      "loss": 0.0412,
      "step": 536980
    },
    {
      "epoch": 0.8788122778421477,
      "grad_norm": 0.6377436518669128,
      "learning_rate": 8.296686280582225e-06,
      "loss": 0.0268,
      "step": 537000
    },
    {
      "epoch": 0.878845008280801,
      "grad_norm": 0.3452337086200714,
      "learning_rate": 8.296620388368707e-06,
      "loss": 0.0383,
      "step": 537020
    },
    {
      "epoch": 0.8788777387194543,
      "grad_norm": 0.8960562944412231,
      "learning_rate": 8.29655449615519e-06,
      "loss": 0.0275,
      "step": 537040
    },
    {
      "epoch": 0.8789104691581077,
      "grad_norm": 0.37957918643951416,
      "learning_rate": 8.296488603941672e-06,
      "loss": 0.0219,
      "step": 537060
    },
    {
      "epoch": 0.878943199596761,
      "grad_norm": 6.362910270690918,
      "learning_rate": 8.296422711728156e-06,
      "loss": 0.0275,
      "step": 537080
    },
    {
      "epoch": 0.8789759300354143,
      "grad_norm": 0.9922223091125488,
      "learning_rate": 8.296356819514638e-06,
      "loss": 0.0238,
      "step": 537100
    },
    {
      "epoch": 0.8790086604740677,
      "grad_norm": 0.9347914457321167,
      "learning_rate": 8.296290927301121e-06,
      "loss": 0.0246,
      "step": 537120
    },
    {
      "epoch": 0.879041390912721,
      "grad_norm": 0.929553210735321,
      "learning_rate": 8.296225035087603e-06,
      "loss": 0.0331,
      "step": 537140
    },
    {
      "epoch": 0.8790741213513743,
      "grad_norm": 1.083602786064148,
      "learning_rate": 8.296159142874087e-06,
      "loss": 0.0212,
      "step": 537160
    },
    {
      "epoch": 0.8791068517900277,
      "grad_norm": 2.258868455886841,
      "learning_rate": 8.296093250660569e-06,
      "loss": 0.0229,
      "step": 537180
    },
    {
      "epoch": 0.879139582228681,
      "grad_norm": 0.31906619668006897,
      "learning_rate": 8.296027358447052e-06,
      "loss": 0.0325,
      "step": 537200
    },
    {
      "epoch": 0.8791723126673344,
      "grad_norm": 0.5149238705635071,
      "learning_rate": 8.295961466233536e-06,
      "loss": 0.0249,
      "step": 537220
    },
    {
      "epoch": 0.8792050431059877,
      "grad_norm": 1.5391236543655396,
      "learning_rate": 8.295895574020018e-06,
      "loss": 0.0239,
      "step": 537240
    },
    {
      "epoch": 0.879237773544641,
      "grad_norm": 0.7657677531242371,
      "learning_rate": 8.295829681806501e-06,
      "loss": 0.0321,
      "step": 537260
    },
    {
      "epoch": 0.8792705039832944,
      "grad_norm": 1.0034329891204834,
      "learning_rate": 8.295763789592985e-06,
      "loss": 0.0281,
      "step": 537280
    },
    {
      "epoch": 0.8793032344219477,
      "grad_norm": 1.1764843463897705,
      "learning_rate": 8.295697897379469e-06,
      "loss": 0.0369,
      "step": 537300
    },
    {
      "epoch": 0.8793359648606011,
      "grad_norm": 1.7908902168273926,
      "learning_rate": 8.29563200516595e-06,
      "loss": 0.0293,
      "step": 537320
    },
    {
      "epoch": 0.8793686952992544,
      "grad_norm": 0.6344556212425232,
      "learning_rate": 8.295566112952434e-06,
      "loss": 0.0269,
      "step": 537340
    },
    {
      "epoch": 0.8794014257379077,
      "grad_norm": 0.3929785490036011,
      "learning_rate": 8.295500220738916e-06,
      "loss": 0.0306,
      "step": 537360
    },
    {
      "epoch": 0.8794341561765611,
      "grad_norm": 0.9850530624389648,
      "learning_rate": 8.2954343285254e-06,
      "loss": 0.0289,
      "step": 537380
    },
    {
      "epoch": 0.8794668866152144,
      "grad_norm": 2.648074150085449,
      "learning_rate": 8.295368436311881e-06,
      "loss": 0.0282,
      "step": 537400
    },
    {
      "epoch": 0.8794996170538678,
      "grad_norm": 3.139155626296997,
      "learning_rate": 8.295302544098365e-06,
      "loss": 0.0179,
      "step": 537420
    },
    {
      "epoch": 0.8795323474925211,
      "grad_norm": 2.5357859134674072,
      "learning_rate": 8.295236651884847e-06,
      "loss": 0.0247,
      "step": 537440
    },
    {
      "epoch": 0.8795650779311744,
      "grad_norm": 0.845116376876831,
      "learning_rate": 8.29517075967133e-06,
      "loss": 0.0312,
      "step": 537460
    },
    {
      "epoch": 0.8795978083698278,
      "grad_norm": 0.6525449752807617,
      "learning_rate": 8.295104867457812e-06,
      "loss": 0.0188,
      "step": 537480
    },
    {
      "epoch": 0.8796305388084811,
      "grad_norm": 0.6362116932868958,
      "learning_rate": 8.295038975244296e-06,
      "loss": 0.018,
      "step": 537500
    },
    {
      "epoch": 0.8796632692471344,
      "grad_norm": 0.27126002311706543,
      "learning_rate": 8.294973083030778e-06,
      "loss": 0.0248,
      "step": 537520
    },
    {
      "epoch": 0.8796959996857878,
      "grad_norm": 2.0828676223754883,
      "learning_rate": 8.294907190817261e-06,
      "loss": 0.0218,
      "step": 537540
    },
    {
      "epoch": 0.8797287301244411,
      "grad_norm": 0.3298657238483429,
      "learning_rate": 8.294841298603745e-06,
      "loss": 0.0261,
      "step": 537560
    },
    {
      "epoch": 0.8797614605630945,
      "grad_norm": 0.5780231952667236,
      "learning_rate": 8.294775406390227e-06,
      "loss": 0.0236,
      "step": 537580
    },
    {
      "epoch": 0.8797941910017478,
      "grad_norm": 0.47622063755989075,
      "learning_rate": 8.29470951417671e-06,
      "loss": 0.0287,
      "step": 537600
    },
    {
      "epoch": 0.8798269214404012,
      "grad_norm": 2.5669660568237305,
      "learning_rate": 8.294643621963192e-06,
      "loss": 0.0378,
      "step": 537620
    },
    {
      "epoch": 0.8798596518790545,
      "grad_norm": 0.8798154592514038,
      "learning_rate": 8.294577729749676e-06,
      "loss": 0.0309,
      "step": 537640
    },
    {
      "epoch": 0.8798923823177078,
      "grad_norm": 0.5514254570007324,
      "learning_rate": 8.29451183753616e-06,
      "loss": 0.0258,
      "step": 537660
    },
    {
      "epoch": 0.8799251127563612,
      "grad_norm": 0.45944124460220337,
      "learning_rate": 8.294445945322641e-06,
      "loss": 0.0397,
      "step": 537680
    },
    {
      "epoch": 0.8799578431950145,
      "grad_norm": 0.7441509366035461,
      "learning_rate": 8.294380053109125e-06,
      "loss": 0.0288,
      "step": 537700
    },
    {
      "epoch": 0.8799905736336678,
      "grad_norm": 0.6838583946228027,
      "learning_rate": 8.294314160895609e-06,
      "loss": 0.0304,
      "step": 537720
    },
    {
      "epoch": 0.8800233040723212,
      "grad_norm": 1.0983175039291382,
      "learning_rate": 8.29424826868209e-06,
      "loss": 0.0206,
      "step": 537740
    },
    {
      "epoch": 0.8800560345109745,
      "grad_norm": 0.49301162362098694,
      "learning_rate": 8.294182376468574e-06,
      "loss": 0.026,
      "step": 537760
    },
    {
      "epoch": 0.8800887649496278,
      "grad_norm": 0.4575393497943878,
      "learning_rate": 8.294116484255056e-06,
      "loss": 0.0324,
      "step": 537780
    },
    {
      "epoch": 0.8801214953882812,
      "grad_norm": 0.6083167791366577,
      "learning_rate": 8.29405059204154e-06,
      "loss": 0.0248,
      "step": 537800
    },
    {
      "epoch": 0.8801542258269345,
      "grad_norm": 1.4493908882141113,
      "learning_rate": 8.293984699828022e-06,
      "loss": 0.0282,
      "step": 537820
    },
    {
      "epoch": 0.8801869562655878,
      "grad_norm": 0.6806554794311523,
      "learning_rate": 8.293918807614505e-06,
      "loss": 0.0209,
      "step": 537840
    },
    {
      "epoch": 0.8802196867042412,
      "grad_norm": 1.4953978061676025,
      "learning_rate": 8.293852915400987e-06,
      "loss": 0.0307,
      "step": 537860
    },
    {
      "epoch": 0.8802524171428946,
      "grad_norm": 0.5806083083152771,
      "learning_rate": 8.29378702318747e-06,
      "loss": 0.0308,
      "step": 537880
    },
    {
      "epoch": 0.8802851475815479,
      "grad_norm": 0.6321797370910645,
      "learning_rate": 8.293721130973954e-06,
      "loss": 0.0213,
      "step": 537900
    },
    {
      "epoch": 0.8803178780202012,
      "grad_norm": 0.5051185488700867,
      "learning_rate": 8.293655238760436e-06,
      "loss": 0.0174,
      "step": 537920
    },
    {
      "epoch": 0.8803506084588546,
      "grad_norm": 0.39519697427749634,
      "learning_rate": 8.29358934654692e-06,
      "loss": 0.0216,
      "step": 537940
    },
    {
      "epoch": 0.8803833388975079,
      "grad_norm": 0.701417863368988,
      "learning_rate": 8.293523454333402e-06,
      "loss": 0.0259,
      "step": 537960
    },
    {
      "epoch": 0.8804160693361612,
      "grad_norm": 0.9381015300750732,
      "learning_rate": 8.293457562119885e-06,
      "loss": 0.0266,
      "step": 537980
    },
    {
      "epoch": 0.8804487997748146,
      "grad_norm": 3.056990146636963,
      "learning_rate": 8.293391669906367e-06,
      "loss": 0.0361,
      "step": 538000
    },
    {
      "epoch": 0.8804815302134679,
      "grad_norm": 0.8340107202529907,
      "learning_rate": 8.29332577769285e-06,
      "loss": 0.0386,
      "step": 538020
    },
    {
      "epoch": 0.8805142606521212,
      "grad_norm": 0.9821218848228455,
      "learning_rate": 8.293259885479334e-06,
      "loss": 0.0382,
      "step": 538040
    },
    {
      "epoch": 0.8805469910907746,
      "grad_norm": 1.0481826066970825,
      "learning_rate": 8.293193993265816e-06,
      "loss": 0.0417,
      "step": 538060
    },
    {
      "epoch": 0.880579721529428,
      "grad_norm": 0.9611753821372986,
      "learning_rate": 8.2931281010523e-06,
      "loss": 0.0395,
      "step": 538080
    },
    {
      "epoch": 0.8806124519680812,
      "grad_norm": 0.6113828420639038,
      "learning_rate": 8.293062208838783e-06,
      "loss": 0.0324,
      "step": 538100
    },
    {
      "epoch": 0.8806451824067346,
      "grad_norm": 1.1666539907455444,
      "learning_rate": 8.292996316625265e-06,
      "loss": 0.0269,
      "step": 538120
    },
    {
      "epoch": 0.880677912845388,
      "grad_norm": 0.4112299978733063,
      "learning_rate": 8.292930424411749e-06,
      "loss": 0.0243,
      "step": 538140
    },
    {
      "epoch": 0.8807106432840413,
      "grad_norm": 1.3572202920913696,
      "learning_rate": 8.29286453219823e-06,
      "loss": 0.0267,
      "step": 538160
    },
    {
      "epoch": 0.8807433737226946,
      "grad_norm": 0.49009576439857483,
      "learning_rate": 8.292798639984714e-06,
      "loss": 0.0235,
      "step": 538180
    },
    {
      "epoch": 0.880776104161348,
      "grad_norm": 1.3572443723678589,
      "learning_rate": 8.292732747771196e-06,
      "loss": 0.0284,
      "step": 538200
    },
    {
      "epoch": 0.8808088346000013,
      "grad_norm": 1.1087262630462646,
      "learning_rate": 8.29266685555768e-06,
      "loss": 0.035,
      "step": 538220
    },
    {
      "epoch": 0.8808415650386546,
      "grad_norm": 0.8676367402076721,
      "learning_rate": 8.292600963344162e-06,
      "loss": 0.0273,
      "step": 538240
    },
    {
      "epoch": 0.880874295477308,
      "grad_norm": 0.6307790279388428,
      "learning_rate": 8.292535071130645e-06,
      "loss": 0.0253,
      "step": 538260
    },
    {
      "epoch": 0.8809070259159614,
      "grad_norm": 0.6962662935256958,
      "learning_rate": 8.292469178917129e-06,
      "loss": 0.0212,
      "step": 538280
    },
    {
      "epoch": 0.8809397563546146,
      "grad_norm": 0.0833471491932869,
      "learning_rate": 8.29240328670361e-06,
      "loss": 0.0273,
      "step": 538300
    },
    {
      "epoch": 0.880972486793268,
      "grad_norm": 0.6492610573768616,
      "learning_rate": 8.292337394490094e-06,
      "loss": 0.0246,
      "step": 538320
    },
    {
      "epoch": 0.8810052172319214,
      "grad_norm": 0.5635924935340881,
      "learning_rate": 8.292271502276576e-06,
      "loss": 0.0295,
      "step": 538340
    },
    {
      "epoch": 0.8810379476705746,
      "grad_norm": 1.7747536897659302,
      "learning_rate": 8.29220561006306e-06,
      "loss": 0.0291,
      "step": 538360
    },
    {
      "epoch": 0.881070678109228,
      "grad_norm": 0.6798167824745178,
      "learning_rate": 8.292139717849542e-06,
      "loss": 0.0363,
      "step": 538380
    },
    {
      "epoch": 0.8811034085478814,
      "grad_norm": 2.8833603858947754,
      "learning_rate": 8.292073825636025e-06,
      "loss": 0.0287,
      "step": 538400
    },
    {
      "epoch": 0.8811361389865346,
      "grad_norm": 1.262761116027832,
      "learning_rate": 8.292007933422507e-06,
      "loss": 0.0313,
      "step": 538420
    },
    {
      "epoch": 0.881168869425188,
      "grad_norm": 0.9517382383346558,
      "learning_rate": 8.29194204120899e-06,
      "loss": 0.0232,
      "step": 538440
    },
    {
      "epoch": 0.8812015998638414,
      "grad_norm": 1.0239031314849854,
      "learning_rate": 8.291876148995474e-06,
      "loss": 0.0206,
      "step": 538460
    },
    {
      "epoch": 0.8812343303024948,
      "grad_norm": 1.3299442529678345,
      "learning_rate": 8.291810256781956e-06,
      "loss": 0.0264,
      "step": 538480
    },
    {
      "epoch": 0.881267060741148,
      "grad_norm": 0.3798946440219879,
      "learning_rate": 8.29174436456844e-06,
      "loss": 0.0322,
      "step": 538500
    },
    {
      "epoch": 0.8812997911798014,
      "grad_norm": 2.7747230529785156,
      "learning_rate": 8.291678472354923e-06,
      "loss": 0.0295,
      "step": 538520
    },
    {
      "epoch": 0.8813325216184548,
      "grad_norm": 0.7070407867431641,
      "learning_rate": 8.291612580141405e-06,
      "loss": 0.0272,
      "step": 538540
    },
    {
      "epoch": 0.881365252057108,
      "grad_norm": 0.3232225179672241,
      "learning_rate": 8.291546687927889e-06,
      "loss": 0.0195,
      "step": 538560
    },
    {
      "epoch": 0.8813979824957614,
      "grad_norm": 0.4267636239528656,
      "learning_rate": 8.29148079571437e-06,
      "loss": 0.0297,
      "step": 538580
    },
    {
      "epoch": 0.8814307129344148,
      "grad_norm": 0.728112518787384,
      "learning_rate": 8.291414903500854e-06,
      "loss": 0.0289,
      "step": 538600
    },
    {
      "epoch": 0.881463443373068,
      "grad_norm": 1.3847737312316895,
      "learning_rate": 8.291349011287338e-06,
      "loss": 0.0337,
      "step": 538620
    },
    {
      "epoch": 0.8814961738117214,
      "grad_norm": 0.4522049129009247,
      "learning_rate": 8.29128311907382e-06,
      "loss": 0.0275,
      "step": 538640
    },
    {
      "epoch": 0.8815289042503748,
      "grad_norm": 0.6988471150398254,
      "learning_rate": 8.291217226860303e-06,
      "loss": 0.0394,
      "step": 538660
    },
    {
      "epoch": 0.8815616346890282,
      "grad_norm": 1.7500287294387817,
      "learning_rate": 8.291151334646785e-06,
      "loss": 0.0383,
      "step": 538680
    },
    {
      "epoch": 0.8815943651276814,
      "grad_norm": 0.5577055811882019,
      "learning_rate": 8.291085442433269e-06,
      "loss": 0.0415,
      "step": 538700
    },
    {
      "epoch": 0.8816270955663348,
      "grad_norm": 1.2411340475082397,
      "learning_rate": 8.29101955021975e-06,
      "loss": 0.0295,
      "step": 538720
    },
    {
      "epoch": 0.8816598260049882,
      "grad_norm": 1.7146416902542114,
      "learning_rate": 8.290953658006234e-06,
      "loss": 0.0307,
      "step": 538740
    },
    {
      "epoch": 0.8816925564436414,
      "grad_norm": 2.024754047393799,
      "learning_rate": 8.290887765792716e-06,
      "loss": 0.0385,
      "step": 538760
    },
    {
      "epoch": 0.8817252868822948,
      "grad_norm": 0.39405736327171326,
      "learning_rate": 8.2908218735792e-06,
      "loss": 0.0302,
      "step": 538780
    },
    {
      "epoch": 0.8817580173209482,
      "grad_norm": 0.4927791953086853,
      "learning_rate": 8.290755981365682e-06,
      "loss": 0.021,
      "step": 538800
    },
    {
      "epoch": 0.8817907477596014,
      "grad_norm": 0.6467844843864441,
      "learning_rate": 8.290690089152165e-06,
      "loss": 0.0298,
      "step": 538820
    },
    {
      "epoch": 0.8818234781982548,
      "grad_norm": 1.0552934408187866,
      "learning_rate": 8.290624196938649e-06,
      "loss": 0.0341,
      "step": 538840
    },
    {
      "epoch": 0.8818562086369082,
      "grad_norm": 0.9707609415054321,
      "learning_rate": 8.29055830472513e-06,
      "loss": 0.0278,
      "step": 538860
    },
    {
      "epoch": 0.8818889390755615,
      "grad_norm": 0.30310043692588806,
      "learning_rate": 8.290492412511614e-06,
      "loss": 0.0224,
      "step": 538880
    },
    {
      "epoch": 0.8819216695142148,
      "grad_norm": 1.0458229780197144,
      "learning_rate": 8.290426520298098e-06,
      "loss": 0.035,
      "step": 538900
    },
    {
      "epoch": 0.8819543999528682,
      "grad_norm": 4.246644020080566,
      "learning_rate": 8.29036062808458e-06,
      "loss": 0.0231,
      "step": 538920
    },
    {
      "epoch": 0.8819871303915215,
      "grad_norm": 1.3370709419250488,
      "learning_rate": 8.290294735871063e-06,
      "loss": 0.0299,
      "step": 538940
    },
    {
      "epoch": 0.8820198608301748,
      "grad_norm": 0.10825373977422714,
      "learning_rate": 8.290228843657547e-06,
      "loss": 0.0349,
      "step": 538960
    },
    {
      "epoch": 0.8820525912688282,
      "grad_norm": 0.6322203874588013,
      "learning_rate": 8.290162951444029e-06,
      "loss": 0.0203,
      "step": 538980
    },
    {
      "epoch": 0.8820853217074816,
      "grad_norm": 0.8685424327850342,
      "learning_rate": 8.290097059230512e-06,
      "loss": 0.0278,
      "step": 539000
    },
    {
      "epoch": 0.8821180521461348,
      "grad_norm": 2.036693572998047,
      "learning_rate": 8.290031167016994e-06,
      "loss": 0.0301,
      "step": 539020
    },
    {
      "epoch": 0.8821507825847882,
      "grad_norm": 1.1369184255599976,
      "learning_rate": 8.289965274803478e-06,
      "loss": 0.029,
      "step": 539040
    },
    {
      "epoch": 0.8821835130234416,
      "grad_norm": 0.8248852491378784,
      "learning_rate": 8.28989938258996e-06,
      "loss": 0.0289,
      "step": 539060
    },
    {
      "epoch": 0.8822162434620949,
      "grad_norm": 0.6326245665550232,
      "learning_rate": 8.289833490376443e-06,
      "loss": 0.0194,
      "step": 539080
    },
    {
      "epoch": 0.8822489739007482,
      "grad_norm": 1.3046228885650635,
      "learning_rate": 8.289767598162925e-06,
      "loss": 0.0275,
      "step": 539100
    },
    {
      "epoch": 0.8822817043394016,
      "grad_norm": 0.9567867517471313,
      "learning_rate": 8.289701705949409e-06,
      "loss": 0.0221,
      "step": 539120
    },
    {
      "epoch": 0.8823144347780549,
      "grad_norm": 0.9757305383682251,
      "learning_rate": 8.28963581373589e-06,
      "loss": 0.0238,
      "step": 539140
    },
    {
      "epoch": 0.8823471652167082,
      "grad_norm": 1.1218466758728027,
      "learning_rate": 8.289569921522374e-06,
      "loss": 0.0229,
      "step": 539160
    },
    {
      "epoch": 0.8823798956553616,
      "grad_norm": 1.2235296964645386,
      "learning_rate": 8.289504029308856e-06,
      "loss": 0.029,
      "step": 539180
    },
    {
      "epoch": 0.8824126260940149,
      "grad_norm": 0.2533578872680664,
      "learning_rate": 8.28943813709534e-06,
      "loss": 0.0259,
      "step": 539200
    },
    {
      "epoch": 0.8824453565326682,
      "grad_norm": 0.5446888208389282,
      "learning_rate": 8.289372244881822e-06,
      "loss": 0.0392,
      "step": 539220
    },
    {
      "epoch": 0.8824780869713216,
      "grad_norm": 1.7557787895202637,
      "learning_rate": 8.289306352668305e-06,
      "loss": 0.0382,
      "step": 539240
    },
    {
      "epoch": 0.882510817409975,
      "grad_norm": 0.6653765439987183,
      "learning_rate": 8.289240460454789e-06,
      "loss": 0.0234,
      "step": 539260
    },
    {
      "epoch": 0.8825435478486283,
      "grad_norm": 1.0200611352920532,
      "learning_rate": 8.28917456824127e-06,
      "loss": 0.0296,
      "step": 539280
    },
    {
      "epoch": 0.8825762782872816,
      "grad_norm": 0.7271296977996826,
      "learning_rate": 8.289108676027754e-06,
      "loss": 0.035,
      "step": 539300
    },
    {
      "epoch": 0.882609008725935,
      "grad_norm": 0.7166494131088257,
      "learning_rate": 8.289042783814238e-06,
      "loss": 0.0319,
      "step": 539320
    },
    {
      "epoch": 0.8826417391645883,
      "grad_norm": 0.30884435772895813,
      "learning_rate": 8.28897689160072e-06,
      "loss": 0.0398,
      "step": 539340
    },
    {
      "epoch": 0.8826744696032416,
      "grad_norm": 0.6631876230239868,
      "learning_rate": 8.288910999387203e-06,
      "loss": 0.0257,
      "step": 539360
    },
    {
      "epoch": 0.882707200041895,
      "grad_norm": 2.7415075302124023,
      "learning_rate": 8.288845107173687e-06,
      "loss": 0.0387,
      "step": 539380
    },
    {
      "epoch": 0.8827399304805483,
      "grad_norm": 0.42359375953674316,
      "learning_rate": 8.288779214960169e-06,
      "loss": 0.0271,
      "step": 539400
    },
    {
      "epoch": 0.8827726609192016,
      "grad_norm": 1.5516868829727173,
      "learning_rate": 8.288713322746653e-06,
      "loss": 0.0349,
      "step": 539420
    },
    {
      "epoch": 0.882805391357855,
      "grad_norm": 1.0963661670684814,
      "learning_rate": 8.288647430533134e-06,
      "loss": 0.0177,
      "step": 539440
    },
    {
      "epoch": 0.8828381217965083,
      "grad_norm": 1.5415986776351929,
      "learning_rate": 8.288581538319618e-06,
      "loss": 0.0245,
      "step": 539460
    },
    {
      "epoch": 0.8828708522351617,
      "grad_norm": 0.9385822415351868,
      "learning_rate": 8.2885156461061e-06,
      "loss": 0.0259,
      "step": 539480
    },
    {
      "epoch": 0.882903582673815,
      "grad_norm": 0.6182423830032349,
      "learning_rate": 8.288449753892583e-06,
      "loss": 0.0198,
      "step": 539500
    },
    {
      "epoch": 0.8829363131124683,
      "grad_norm": 2.121826648712158,
      "learning_rate": 8.288383861679065e-06,
      "loss": 0.0362,
      "step": 539520
    },
    {
      "epoch": 0.8829690435511217,
      "grad_norm": 0.4660954773426056,
      "learning_rate": 8.288317969465549e-06,
      "loss": 0.0273,
      "step": 539540
    },
    {
      "epoch": 0.883001773989775,
      "grad_norm": 1.6090706586837769,
      "learning_rate": 8.288252077252031e-06,
      "loss": 0.0268,
      "step": 539560
    },
    {
      "epoch": 0.8830345044284283,
      "grad_norm": 0.9643650054931641,
      "learning_rate": 8.288186185038514e-06,
      "loss": 0.0266,
      "step": 539580
    },
    {
      "epoch": 0.8830672348670817,
      "grad_norm": 0.4310435652732849,
      "learning_rate": 8.288120292824996e-06,
      "loss": 0.0233,
      "step": 539600
    },
    {
      "epoch": 0.883099965305735,
      "grad_norm": 3.3041696548461914,
      "learning_rate": 8.28805440061148e-06,
      "loss": 0.0342,
      "step": 539620
    },
    {
      "epoch": 0.8831326957443884,
      "grad_norm": 1.3094271421432495,
      "learning_rate": 8.287988508397964e-06,
      "loss": 0.0315,
      "step": 539640
    },
    {
      "epoch": 0.8831654261830417,
      "grad_norm": 0.6146628260612488,
      "learning_rate": 8.287922616184445e-06,
      "loss": 0.0315,
      "step": 539660
    },
    {
      "epoch": 0.8831981566216951,
      "grad_norm": 1.0609790086746216,
      "learning_rate": 8.287856723970929e-06,
      "loss": 0.0261,
      "step": 539680
    },
    {
      "epoch": 0.8832308870603484,
      "grad_norm": 1.219718337059021,
      "learning_rate": 8.287790831757413e-06,
      "loss": 0.0242,
      "step": 539700
    },
    {
      "epoch": 0.8832636174990017,
      "grad_norm": 0.716396152973175,
      "learning_rate": 8.287724939543894e-06,
      "loss": 0.0301,
      "step": 539720
    },
    {
      "epoch": 0.8832963479376551,
      "grad_norm": 0.3848353922367096,
      "learning_rate": 8.287659047330378e-06,
      "loss": 0.0186,
      "step": 539740
    },
    {
      "epoch": 0.8833290783763084,
      "grad_norm": 1.454846978187561,
      "learning_rate": 8.287593155116862e-06,
      "loss": 0.0263,
      "step": 539760
    },
    {
      "epoch": 0.8833618088149617,
      "grad_norm": 0.7129600048065186,
      "learning_rate": 8.287527262903344e-06,
      "loss": 0.0233,
      "step": 539780
    },
    {
      "epoch": 0.8833945392536151,
      "grad_norm": 1.2606213092803955,
      "learning_rate": 8.287461370689827e-06,
      "loss": 0.0364,
      "step": 539800
    },
    {
      "epoch": 0.8834272696922684,
      "grad_norm": 0.24313417077064514,
      "learning_rate": 8.287395478476309e-06,
      "loss": 0.0327,
      "step": 539820
    },
    {
      "epoch": 0.8834600001309217,
      "grad_norm": 2.5344009399414062,
      "learning_rate": 8.287329586262793e-06,
      "loss": 0.0311,
      "step": 539840
    },
    {
      "epoch": 0.8834927305695751,
      "grad_norm": 0.8372963070869446,
      "learning_rate": 8.287263694049275e-06,
      "loss": 0.0346,
      "step": 539860
    },
    {
      "epoch": 0.8835254610082285,
      "grad_norm": 0.3192761242389679,
      "learning_rate": 8.287197801835758e-06,
      "loss": 0.0228,
      "step": 539880
    },
    {
      "epoch": 0.8835581914468817,
      "grad_norm": 1.9281458854675293,
      "learning_rate": 8.28713190962224e-06,
      "loss": 0.0301,
      "step": 539900
    },
    {
      "epoch": 0.8835909218855351,
      "grad_norm": 0.32999560236930847,
      "learning_rate": 8.287066017408724e-06,
      "loss": 0.0206,
      "step": 539920
    },
    {
      "epoch": 0.8836236523241885,
      "grad_norm": 0.4456034302711487,
      "learning_rate": 8.287000125195205e-06,
      "loss": 0.0281,
      "step": 539940
    },
    {
      "epoch": 0.8836563827628418,
      "grad_norm": 1.4644430875778198,
      "learning_rate": 8.286934232981689e-06,
      "loss": 0.0299,
      "step": 539960
    },
    {
      "epoch": 0.8836891132014951,
      "grad_norm": 1.0742732286453247,
      "learning_rate": 8.286868340768171e-06,
      "loss": 0.0194,
      "step": 539980
    },
    {
      "epoch": 0.8837218436401485,
      "grad_norm": 0.1841525286436081,
      "learning_rate": 8.286802448554655e-06,
      "loss": 0.0255,
      "step": 540000
    },
    {
      "epoch": 0.8837545740788018,
      "grad_norm": 0.766701877117157,
      "learning_rate": 8.286736556341138e-06,
      "loss": 0.0193,
      "step": 540020
    },
    {
      "epoch": 0.8837873045174551,
      "grad_norm": 0.5968343615531921,
      "learning_rate": 8.28667066412762e-06,
      "loss": 0.0369,
      "step": 540040
    },
    {
      "epoch": 0.8838200349561085,
      "grad_norm": 0.32487478852272034,
      "learning_rate": 8.286604771914104e-06,
      "loss": 0.0225,
      "step": 540060
    },
    {
      "epoch": 0.8838527653947619,
      "grad_norm": 0.886654257774353,
      "learning_rate": 8.286538879700587e-06,
      "loss": 0.0257,
      "step": 540080
    },
    {
      "epoch": 0.8838854958334151,
      "grad_norm": 0.7198247909545898,
      "learning_rate": 8.286472987487069e-06,
      "loss": 0.0305,
      "step": 540100
    },
    {
      "epoch": 0.8839182262720685,
      "grad_norm": 0.7303560972213745,
      "learning_rate": 8.286407095273553e-06,
      "loss": 0.0314,
      "step": 540120
    },
    {
      "epoch": 0.8839509567107219,
      "grad_norm": 0.5067564845085144,
      "learning_rate": 8.286341203060036e-06,
      "loss": 0.0342,
      "step": 540140
    },
    {
      "epoch": 0.8839836871493751,
      "grad_norm": 0.5405961275100708,
      "learning_rate": 8.286275310846518e-06,
      "loss": 0.0372,
      "step": 540160
    },
    {
      "epoch": 0.8840164175880285,
      "grad_norm": 0.9755562543869019,
      "learning_rate": 8.286209418633002e-06,
      "loss": 0.029,
      "step": 540180
    },
    {
      "epoch": 0.8840491480266819,
      "grad_norm": 0.8438456058502197,
      "learning_rate": 8.286143526419484e-06,
      "loss": 0.0324,
      "step": 540200
    },
    {
      "epoch": 0.8840818784653351,
      "grad_norm": 0.808769702911377,
      "learning_rate": 8.286077634205967e-06,
      "loss": 0.0256,
      "step": 540220
    },
    {
      "epoch": 0.8841146089039885,
      "grad_norm": 0.40111419558525085,
      "learning_rate": 8.286011741992449e-06,
      "loss": 0.0303,
      "step": 540240
    },
    {
      "epoch": 0.8841473393426419,
      "grad_norm": 0.436900794506073,
      "learning_rate": 8.285945849778933e-06,
      "loss": 0.0263,
      "step": 540260
    },
    {
      "epoch": 0.8841800697812953,
      "grad_norm": 0.6762847900390625,
      "learning_rate": 8.285879957565415e-06,
      "loss": 0.0298,
      "step": 540280
    },
    {
      "epoch": 0.8842128002199485,
      "grad_norm": 1.4342342615127563,
      "learning_rate": 8.285814065351898e-06,
      "loss": 0.0409,
      "step": 540300
    },
    {
      "epoch": 0.8842455306586019,
      "grad_norm": 0.627432107925415,
      "learning_rate": 8.28574817313838e-06,
      "loss": 0.0297,
      "step": 540320
    },
    {
      "epoch": 0.8842782610972553,
      "grad_norm": 0.810319721698761,
      "learning_rate": 8.285682280924864e-06,
      "loss": 0.0444,
      "step": 540340
    },
    {
      "epoch": 0.8843109915359085,
      "grad_norm": 0.4598536789417267,
      "learning_rate": 8.285616388711346e-06,
      "loss": 0.0246,
      "step": 540360
    },
    {
      "epoch": 0.8843437219745619,
      "grad_norm": 0.34835872054100037,
      "learning_rate": 8.285550496497829e-06,
      "loss": 0.0265,
      "step": 540380
    },
    {
      "epoch": 0.8843764524132153,
      "grad_norm": 0.49500611424446106,
      "learning_rate": 8.285484604284313e-06,
      "loss": 0.0314,
      "step": 540400
    },
    {
      "epoch": 0.8844091828518685,
      "grad_norm": 0.7467554807662964,
      "learning_rate": 8.285418712070795e-06,
      "loss": 0.028,
      "step": 540420
    },
    {
      "epoch": 0.8844419132905219,
      "grad_norm": 2.5904407501220703,
      "learning_rate": 8.285352819857278e-06,
      "loss": 0.0328,
      "step": 540440
    },
    {
      "epoch": 0.8844746437291753,
      "grad_norm": 0.4363350570201874,
      "learning_rate": 8.28528692764376e-06,
      "loss": 0.0252,
      "step": 540460
    },
    {
      "epoch": 0.8845073741678287,
      "grad_norm": 0.6461254358291626,
      "learning_rate": 8.285221035430244e-06,
      "loss": 0.0289,
      "step": 540480
    },
    {
      "epoch": 0.8845401046064819,
      "grad_norm": 0.5199773907661438,
      "learning_rate": 8.285155143216727e-06,
      "loss": 0.0302,
      "step": 540500
    },
    {
      "epoch": 0.8845728350451353,
      "grad_norm": 0.9528919458389282,
      "learning_rate": 8.28508925100321e-06,
      "loss": 0.0213,
      "step": 540520
    },
    {
      "epoch": 0.8846055654837887,
      "grad_norm": 0.35736480355262756,
      "learning_rate": 8.285023358789693e-06,
      "loss": 0.0245,
      "step": 540540
    },
    {
      "epoch": 0.8846382959224419,
      "grad_norm": 0.6367403864860535,
      "learning_rate": 8.284957466576176e-06,
      "loss": 0.0206,
      "step": 540560
    },
    {
      "epoch": 0.8846710263610953,
      "grad_norm": 0.6745142340660095,
      "learning_rate": 8.284891574362658e-06,
      "loss": 0.0283,
      "step": 540580
    },
    {
      "epoch": 0.8847037567997487,
      "grad_norm": 1.2438119649887085,
      "learning_rate": 8.284825682149142e-06,
      "loss": 0.029,
      "step": 540600
    },
    {
      "epoch": 0.8847364872384019,
      "grad_norm": 0.45653587579727173,
      "learning_rate": 8.284759789935624e-06,
      "loss": 0.0188,
      "step": 540620
    },
    {
      "epoch": 0.8847692176770553,
      "grad_norm": 1.6470646858215332,
      "learning_rate": 8.284693897722107e-06,
      "loss": 0.0375,
      "step": 540640
    },
    {
      "epoch": 0.8848019481157087,
      "grad_norm": 0.7054855823516846,
      "learning_rate": 8.28462800550859e-06,
      "loss": 0.0266,
      "step": 540660
    },
    {
      "epoch": 0.8848346785543619,
      "grad_norm": 0.661323606967926,
      "learning_rate": 8.284562113295073e-06,
      "loss": 0.0446,
      "step": 540680
    },
    {
      "epoch": 0.8848674089930153,
      "grad_norm": 0.3041393756866455,
      "learning_rate": 8.284496221081555e-06,
      "loss": 0.0322,
      "step": 540700
    },
    {
      "epoch": 0.8849001394316687,
      "grad_norm": 1.3581445217132568,
      "learning_rate": 8.284430328868038e-06,
      "loss": 0.0302,
      "step": 540720
    },
    {
      "epoch": 0.884932869870322,
      "grad_norm": 0.8881554007530212,
      "learning_rate": 8.284364436654522e-06,
      "loss": 0.0277,
      "step": 540740
    },
    {
      "epoch": 0.8849656003089753,
      "grad_norm": 0.7394424080848694,
      "learning_rate": 8.284298544441004e-06,
      "loss": 0.0239,
      "step": 540760
    },
    {
      "epoch": 0.8849983307476287,
      "grad_norm": 0.7175396680831909,
      "learning_rate": 8.284232652227487e-06,
      "loss": 0.0238,
      "step": 540780
    },
    {
      "epoch": 0.885031061186282,
      "grad_norm": 0.35605406761169434,
      "learning_rate": 8.28416676001397e-06,
      "loss": 0.0287,
      "step": 540800
    },
    {
      "epoch": 0.8850637916249353,
      "grad_norm": 0.7078781723976135,
      "learning_rate": 8.284100867800453e-06,
      "loss": 0.0326,
      "step": 540820
    },
    {
      "epoch": 0.8850965220635887,
      "grad_norm": 1.7453231811523438,
      "learning_rate": 8.284034975586935e-06,
      "loss": 0.028,
      "step": 540840
    },
    {
      "epoch": 0.8851292525022421,
      "grad_norm": 0.7037231922149658,
      "learning_rate": 8.283969083373418e-06,
      "loss": 0.0296,
      "step": 540860
    },
    {
      "epoch": 0.8851619829408953,
      "grad_norm": 2.078569173812866,
      "learning_rate": 8.283903191159902e-06,
      "loss": 0.0225,
      "step": 540880
    },
    {
      "epoch": 0.8851947133795487,
      "grad_norm": 0.4810028076171875,
      "learning_rate": 8.283837298946384e-06,
      "loss": 0.0314,
      "step": 540900
    },
    {
      "epoch": 0.8852274438182021,
      "grad_norm": 1.2567026615142822,
      "learning_rate": 8.283771406732867e-06,
      "loss": 0.0268,
      "step": 540920
    },
    {
      "epoch": 0.8852601742568554,
      "grad_norm": 1.452471375465393,
      "learning_rate": 8.283705514519351e-06,
      "loss": 0.0277,
      "step": 540940
    },
    {
      "epoch": 0.8852929046955087,
      "grad_norm": 0.981948971748352,
      "learning_rate": 8.283639622305833e-06,
      "loss": 0.0373,
      "step": 540960
    },
    {
      "epoch": 0.8853256351341621,
      "grad_norm": 2.063321352005005,
      "learning_rate": 8.283573730092316e-06,
      "loss": 0.0242,
      "step": 540980
    },
    {
      "epoch": 0.8853583655728154,
      "grad_norm": 0.6821326017379761,
      "learning_rate": 8.283507837878798e-06,
      "loss": 0.0222,
      "step": 541000
    },
    {
      "epoch": 0.8853910960114687,
      "grad_norm": 0.7390217781066895,
      "learning_rate": 8.283441945665282e-06,
      "loss": 0.025,
      "step": 541020
    },
    {
      "epoch": 0.8854238264501221,
      "grad_norm": 0.7430267930030823,
      "learning_rate": 8.283376053451764e-06,
      "loss": 0.0252,
      "step": 541040
    },
    {
      "epoch": 0.8854565568887754,
      "grad_norm": 0.26417791843414307,
      "learning_rate": 8.283310161238247e-06,
      "loss": 0.0364,
      "step": 541060
    },
    {
      "epoch": 0.8854892873274287,
      "grad_norm": 1.1369847059249878,
      "learning_rate": 8.283244269024731e-06,
      "loss": 0.0377,
      "step": 541080
    },
    {
      "epoch": 0.8855220177660821,
      "grad_norm": 1.031947374343872,
      "learning_rate": 8.283178376811213e-06,
      "loss": 0.0342,
      "step": 541100
    },
    {
      "epoch": 0.8855547482047355,
      "grad_norm": 1.5604674816131592,
      "learning_rate": 8.283112484597696e-06,
      "loss": 0.0276,
      "step": 541120
    },
    {
      "epoch": 0.8855874786433888,
      "grad_norm": 0.5168048143386841,
      "learning_rate": 8.283046592384178e-06,
      "loss": 0.0206,
      "step": 541140
    },
    {
      "epoch": 0.8856202090820421,
      "grad_norm": 1.6844227313995361,
      "learning_rate": 8.282980700170662e-06,
      "loss": 0.034,
      "step": 541160
    },
    {
      "epoch": 0.8856529395206955,
      "grad_norm": 0.2245921790599823,
      "learning_rate": 8.282914807957144e-06,
      "loss": 0.025,
      "step": 541180
    },
    {
      "epoch": 0.8856856699593488,
      "grad_norm": 1.41788911819458,
      "learning_rate": 8.282848915743627e-06,
      "loss": 0.0325,
      "step": 541200
    },
    {
      "epoch": 0.8857184003980021,
      "grad_norm": 0.36947014927864075,
      "learning_rate": 8.28278302353011e-06,
      "loss": 0.0315,
      "step": 541220
    },
    {
      "epoch": 0.8857511308366555,
      "grad_norm": 1.171569585800171,
      "learning_rate": 8.282717131316593e-06,
      "loss": 0.0434,
      "step": 541240
    },
    {
      "epoch": 0.8857838612753088,
      "grad_norm": 0.7453356981277466,
      "learning_rate": 8.282651239103075e-06,
      "loss": 0.031,
      "step": 541260
    },
    {
      "epoch": 0.8858165917139621,
      "grad_norm": 1.5106699466705322,
      "learning_rate": 8.282585346889558e-06,
      "loss": 0.0291,
      "step": 541280
    },
    {
      "epoch": 0.8858493221526155,
      "grad_norm": 1.5326706171035767,
      "learning_rate": 8.282519454676042e-06,
      "loss": 0.0463,
      "step": 541300
    },
    {
      "epoch": 0.8858820525912688,
      "grad_norm": 1.3897053003311157,
      "learning_rate": 8.282453562462524e-06,
      "loss": 0.0272,
      "step": 541320
    },
    {
      "epoch": 0.8859147830299222,
      "grad_norm": 3.1709494590759277,
      "learning_rate": 8.282387670249007e-06,
      "loss": 0.0256,
      "step": 541340
    },
    {
      "epoch": 0.8859475134685755,
      "grad_norm": 1.2213467359542847,
      "learning_rate": 8.282321778035491e-06,
      "loss": 0.0339,
      "step": 541360
    },
    {
      "epoch": 0.8859802439072288,
      "grad_norm": 3.5532243251800537,
      "learning_rate": 8.282255885821973e-06,
      "loss": 0.0299,
      "step": 541380
    },
    {
      "epoch": 0.8860129743458822,
      "grad_norm": 2.128150701522827,
      "learning_rate": 8.282189993608456e-06,
      "loss": 0.0312,
      "step": 541400
    },
    {
      "epoch": 0.8860457047845355,
      "grad_norm": 0.404730886220932,
      "learning_rate": 8.28212410139494e-06,
      "loss": 0.0246,
      "step": 541420
    },
    {
      "epoch": 0.8860784352231889,
      "grad_norm": 1.535872459411621,
      "learning_rate": 8.282058209181422e-06,
      "loss": 0.0288,
      "step": 541440
    },
    {
      "epoch": 0.8861111656618422,
      "grad_norm": 1.3358850479125977,
      "learning_rate": 8.281992316967906e-06,
      "loss": 0.026,
      "step": 541460
    },
    {
      "epoch": 0.8861438961004955,
      "grad_norm": 1.4612919092178345,
      "learning_rate": 8.281926424754387e-06,
      "loss": 0.0198,
      "step": 541480
    },
    {
      "epoch": 0.8861766265391489,
      "grad_norm": 0.7565774917602539,
      "learning_rate": 8.281860532540871e-06,
      "loss": 0.0234,
      "step": 541500
    },
    {
      "epoch": 0.8862093569778022,
      "grad_norm": 1.6297489404678345,
      "learning_rate": 8.281794640327353e-06,
      "loss": 0.0202,
      "step": 541520
    },
    {
      "epoch": 0.8862420874164556,
      "grad_norm": 0.5361827611923218,
      "learning_rate": 8.281728748113837e-06,
      "loss": 0.0336,
      "step": 541540
    },
    {
      "epoch": 0.8862748178551089,
      "grad_norm": 4.527859687805176,
      "learning_rate": 8.281662855900318e-06,
      "loss": 0.0341,
      "step": 541560
    },
    {
      "epoch": 0.8863075482937622,
      "grad_norm": 0.5981490612030029,
      "learning_rate": 8.281596963686802e-06,
      "loss": 0.0328,
      "step": 541580
    },
    {
      "epoch": 0.8863402787324156,
      "grad_norm": 0.49399760365486145,
      "learning_rate": 8.281531071473284e-06,
      "loss": 0.0269,
      "step": 541600
    },
    {
      "epoch": 0.8863730091710689,
      "grad_norm": 0.7427088618278503,
      "learning_rate": 8.281465179259767e-06,
      "loss": 0.0273,
      "step": 541620
    },
    {
      "epoch": 0.8864057396097222,
      "grad_norm": 0.44567134976387024,
      "learning_rate": 8.28139928704625e-06,
      "loss": 0.0292,
      "step": 541640
    },
    {
      "epoch": 0.8864384700483756,
      "grad_norm": 0.1678522527217865,
      "learning_rate": 8.281333394832733e-06,
      "loss": 0.0313,
      "step": 541660
    },
    {
      "epoch": 0.8864712004870289,
      "grad_norm": 0.6192223429679871,
      "learning_rate": 8.281267502619217e-06,
      "loss": 0.0184,
      "step": 541680
    },
    {
      "epoch": 0.8865039309256822,
      "grad_norm": 1.1723634004592896,
      "learning_rate": 8.281201610405698e-06,
      "loss": 0.0254,
      "step": 541700
    },
    {
      "epoch": 0.8865366613643356,
      "grad_norm": 0.9416537880897522,
      "learning_rate": 8.281135718192182e-06,
      "loss": 0.0354,
      "step": 541720
    },
    {
      "epoch": 0.886569391802989,
      "grad_norm": 0.3402463495731354,
      "learning_rate": 8.281069825978666e-06,
      "loss": 0.0279,
      "step": 541740
    },
    {
      "epoch": 0.8866021222416423,
      "grad_norm": 0.19253134727478027,
      "learning_rate": 8.281003933765147e-06,
      "loss": 0.0232,
      "step": 541760
    },
    {
      "epoch": 0.8866348526802956,
      "grad_norm": 1.1475446224212646,
      "learning_rate": 8.280938041551631e-06,
      "loss": 0.0319,
      "step": 541780
    },
    {
      "epoch": 0.886667583118949,
      "grad_norm": 4.250207901000977,
      "learning_rate": 8.280872149338115e-06,
      "loss": 0.0349,
      "step": 541800
    },
    {
      "epoch": 0.8867003135576023,
      "grad_norm": 0.13160066306591034,
      "learning_rate": 8.280806257124597e-06,
      "loss": 0.0393,
      "step": 541820
    },
    {
      "epoch": 0.8867330439962556,
      "grad_norm": 0.4939388334751129,
      "learning_rate": 8.28074036491108e-06,
      "loss": 0.0275,
      "step": 541840
    },
    {
      "epoch": 0.886765774434909,
      "grad_norm": 0.7112283110618591,
      "learning_rate": 8.280674472697562e-06,
      "loss": 0.0328,
      "step": 541860
    },
    {
      "epoch": 0.8867985048735623,
      "grad_norm": 0.4466538727283478,
      "learning_rate": 8.280608580484046e-06,
      "loss": 0.0272,
      "step": 541880
    },
    {
      "epoch": 0.8868312353122156,
      "grad_norm": 1.7787292003631592,
      "learning_rate": 8.280542688270528e-06,
      "loss": 0.0254,
      "step": 541900
    },
    {
      "epoch": 0.886863965750869,
      "grad_norm": 0.7554514408111572,
      "learning_rate": 8.280476796057011e-06,
      "loss": 0.0286,
      "step": 541920
    },
    {
      "epoch": 0.8868966961895224,
      "grad_norm": 1.0727654695510864,
      "learning_rate": 8.280410903843493e-06,
      "loss": 0.0313,
      "step": 541940
    },
    {
      "epoch": 0.8869294266281756,
      "grad_norm": 1.5872918367385864,
      "learning_rate": 8.280345011629977e-06,
      "loss": 0.0282,
      "step": 541960
    },
    {
      "epoch": 0.886962157066829,
      "grad_norm": 1.390565276145935,
      "learning_rate": 8.280279119416458e-06,
      "loss": 0.0302,
      "step": 541980
    },
    {
      "epoch": 0.8869948875054824,
      "grad_norm": 0.710742175579071,
      "learning_rate": 8.280213227202942e-06,
      "loss": 0.0267,
      "step": 542000
    },
    {
      "epoch": 0.8870276179441356,
      "grad_norm": 0.8918198943138123,
      "learning_rate": 8.280147334989424e-06,
      "loss": 0.02,
      "step": 542020
    },
    {
      "epoch": 0.887060348382789,
      "grad_norm": 0.7621379494667053,
      "learning_rate": 8.280081442775908e-06,
      "loss": 0.0273,
      "step": 542040
    },
    {
      "epoch": 0.8870930788214424,
      "grad_norm": 0.4681369960308075,
      "learning_rate": 8.28001555056239e-06,
      "loss": 0.0336,
      "step": 542060
    },
    {
      "epoch": 0.8871258092600957,
      "grad_norm": 0.9032283425331116,
      "learning_rate": 8.279949658348873e-06,
      "loss": 0.0225,
      "step": 542080
    },
    {
      "epoch": 0.887158539698749,
      "grad_norm": 1.6225636005401611,
      "learning_rate": 8.279883766135357e-06,
      "loss": 0.0216,
      "step": 542100
    },
    {
      "epoch": 0.8871912701374024,
      "grad_norm": 1.388129711151123,
      "learning_rate": 8.279817873921838e-06,
      "loss": 0.0354,
      "step": 542120
    },
    {
      "epoch": 0.8872240005760558,
      "grad_norm": 1.461389183998108,
      "learning_rate": 8.279751981708322e-06,
      "loss": 0.0222,
      "step": 542140
    },
    {
      "epoch": 0.887256731014709,
      "grad_norm": 0.44855397939682007,
      "learning_rate": 8.279686089494806e-06,
      "loss": 0.0292,
      "step": 542160
    },
    {
      "epoch": 0.8872894614533624,
      "grad_norm": 0.09270262718200684,
      "learning_rate": 8.279620197281288e-06,
      "loss": 0.0271,
      "step": 542180
    },
    {
      "epoch": 0.8873221918920158,
      "grad_norm": 2.185962200164795,
      "learning_rate": 8.279554305067771e-06,
      "loss": 0.0328,
      "step": 542200
    },
    {
      "epoch": 0.887354922330669,
      "grad_norm": 0.7262395620346069,
      "learning_rate": 8.279488412854255e-06,
      "loss": 0.0271,
      "step": 542220
    },
    {
      "epoch": 0.8873876527693224,
      "grad_norm": 0.2524733543395996,
      "learning_rate": 8.279422520640737e-06,
      "loss": 0.0233,
      "step": 542240
    },
    {
      "epoch": 0.8874203832079758,
      "grad_norm": 0.6602946519851685,
      "learning_rate": 8.27935662842722e-06,
      "loss": 0.0274,
      "step": 542260
    },
    {
      "epoch": 0.887453113646629,
      "grad_norm": 1.192598581314087,
      "learning_rate": 8.279290736213702e-06,
      "loss": 0.0245,
      "step": 542280
    },
    {
      "epoch": 0.8874858440852824,
      "grad_norm": 1.3467366695404053,
      "learning_rate": 8.279224844000186e-06,
      "loss": 0.0216,
      "step": 542300
    },
    {
      "epoch": 0.8875185745239358,
      "grad_norm": 1.2989468574523926,
      "learning_rate": 8.279158951786668e-06,
      "loss": 0.0214,
      "step": 542320
    },
    {
      "epoch": 0.8875513049625892,
      "grad_norm": 0.19412656128406525,
      "learning_rate": 8.279093059573151e-06,
      "loss": 0.0258,
      "step": 542340
    },
    {
      "epoch": 0.8875840354012424,
      "grad_norm": 0.6443975567817688,
      "learning_rate": 8.279027167359633e-06,
      "loss": 0.0289,
      "step": 542360
    },
    {
      "epoch": 0.8876167658398958,
      "grad_norm": 1.7590911388397217,
      "learning_rate": 8.278961275146117e-06,
      "loss": 0.0209,
      "step": 542380
    },
    {
      "epoch": 0.8876494962785492,
      "grad_norm": 0.8235226273536682,
      "learning_rate": 8.278895382932599e-06,
      "loss": 0.0353,
      "step": 542400
    },
    {
      "epoch": 0.8876822267172024,
      "grad_norm": 0.8599516153335571,
      "learning_rate": 8.278829490719082e-06,
      "loss": 0.0212,
      "step": 542420
    },
    {
      "epoch": 0.8877149571558558,
      "grad_norm": 0.7411699295043945,
      "learning_rate": 8.278763598505564e-06,
      "loss": 0.0393,
      "step": 542440
    },
    {
      "epoch": 0.8877476875945092,
      "grad_norm": 1.0334787368774414,
      "learning_rate": 8.278697706292048e-06,
      "loss": 0.0332,
      "step": 542460
    },
    {
      "epoch": 0.8877804180331624,
      "grad_norm": 0.5919075608253479,
      "learning_rate": 8.278631814078531e-06,
      "loss": 0.034,
      "step": 542480
    },
    {
      "epoch": 0.8878131484718158,
      "grad_norm": 2.271052122116089,
      "learning_rate": 8.278565921865013e-06,
      "loss": 0.0414,
      "step": 542500
    },
    {
      "epoch": 0.8878458789104692,
      "grad_norm": 2.499368667602539,
      "learning_rate": 8.278500029651497e-06,
      "loss": 0.0232,
      "step": 542520
    },
    {
      "epoch": 0.8878786093491225,
      "grad_norm": 37.44330596923828,
      "learning_rate": 8.27843413743798e-06,
      "loss": 0.0213,
      "step": 542540
    },
    {
      "epoch": 0.8879113397877758,
      "grad_norm": 0.45844078063964844,
      "learning_rate": 8.278368245224462e-06,
      "loss": 0.0262,
      "step": 542560
    },
    {
      "epoch": 0.8879440702264292,
      "grad_norm": 0.7150553464889526,
      "learning_rate": 8.278302353010946e-06,
      "loss": 0.0311,
      "step": 542580
    },
    {
      "epoch": 0.8879768006650826,
      "grad_norm": 0.6229350566864014,
      "learning_rate": 8.27823646079743e-06,
      "loss": 0.0265,
      "step": 542600
    },
    {
      "epoch": 0.8880095311037358,
      "grad_norm": 0.4218045175075531,
      "learning_rate": 8.278170568583911e-06,
      "loss": 0.0248,
      "step": 542620
    },
    {
      "epoch": 0.8880422615423892,
      "grad_norm": 0.4411175847053528,
      "learning_rate": 8.278104676370395e-06,
      "loss": 0.0273,
      "step": 542640
    },
    {
      "epoch": 0.8880749919810426,
      "grad_norm": 0.9264460802078247,
      "learning_rate": 8.278038784156877e-06,
      "loss": 0.0306,
      "step": 542660
    },
    {
      "epoch": 0.8881077224196958,
      "grad_norm": 0.3168182373046875,
      "learning_rate": 8.27797289194336e-06,
      "loss": 0.0229,
      "step": 542680
    },
    {
      "epoch": 0.8881404528583492,
      "grad_norm": 0.8867613673210144,
      "learning_rate": 8.277906999729842e-06,
      "loss": 0.0221,
      "step": 542700
    },
    {
      "epoch": 0.8881731832970026,
      "grad_norm": 2.6268975734710693,
      "learning_rate": 8.277841107516326e-06,
      "loss": 0.0297,
      "step": 542720
    },
    {
      "epoch": 0.8882059137356559,
      "grad_norm": 4.02764368057251,
      "learning_rate": 8.277775215302808e-06,
      "loss": 0.0349,
      "step": 542740
    },
    {
      "epoch": 0.8882386441743092,
      "grad_norm": 2.1041789054870605,
      "learning_rate": 8.277709323089291e-06,
      "loss": 0.0293,
      "step": 542760
    },
    {
      "epoch": 0.8882713746129626,
      "grad_norm": 0.7851817011833191,
      "learning_rate": 8.277643430875773e-06,
      "loss": 0.0304,
      "step": 542780
    },
    {
      "epoch": 0.8883041050516159,
      "grad_norm": 1.8877815008163452,
      "learning_rate": 8.277577538662257e-06,
      "loss": 0.0281,
      "step": 542800
    },
    {
      "epoch": 0.8883368354902692,
      "grad_norm": 0.6800365447998047,
      "learning_rate": 8.277511646448739e-06,
      "loss": 0.0289,
      "step": 542820
    },
    {
      "epoch": 0.8883695659289226,
      "grad_norm": 2.312917709350586,
      "learning_rate": 8.277445754235222e-06,
      "loss": 0.0305,
      "step": 542840
    },
    {
      "epoch": 0.888402296367576,
      "grad_norm": 0.8184260129928589,
      "learning_rate": 8.277379862021706e-06,
      "loss": 0.0236,
      "step": 542860
    },
    {
      "epoch": 0.8884350268062292,
      "grad_norm": 1.1526507139205933,
      "learning_rate": 8.277313969808188e-06,
      "loss": 0.0315,
      "step": 542880
    },
    {
      "epoch": 0.8884677572448826,
      "grad_norm": 0.6165989637374878,
      "learning_rate": 8.277248077594671e-06,
      "loss": 0.0353,
      "step": 542900
    },
    {
      "epoch": 0.888500487683536,
      "grad_norm": 6.971653938293457,
      "learning_rate": 8.277182185381155e-06,
      "loss": 0.0315,
      "step": 542920
    },
    {
      "epoch": 0.8885332181221893,
      "grad_norm": 0.15983548760414124,
      "learning_rate": 8.277116293167637e-06,
      "loss": 0.0239,
      "step": 542940
    },
    {
      "epoch": 0.8885659485608426,
      "grad_norm": 0.6368796229362488,
      "learning_rate": 8.27705040095412e-06,
      "loss": 0.0215,
      "step": 542960
    },
    {
      "epoch": 0.888598678999496,
      "grad_norm": 0.541260302066803,
      "learning_rate": 8.276984508740604e-06,
      "loss": 0.0284,
      "step": 542980
    },
    {
      "epoch": 0.8886314094381493,
      "grad_norm": 3.9184932708740234,
      "learning_rate": 8.276918616527086e-06,
      "loss": 0.0277,
      "step": 543000
    },
    {
      "epoch": 0.8886641398768026,
      "grad_norm": 2.4401443004608154,
      "learning_rate": 8.27685272431357e-06,
      "loss": 0.0418,
      "step": 543020
    },
    {
      "epoch": 0.888696870315456,
      "grad_norm": 1.2825260162353516,
      "learning_rate": 8.276786832100051e-06,
      "loss": 0.025,
      "step": 543040
    },
    {
      "epoch": 0.8887296007541093,
      "grad_norm": 2.1091551780700684,
      "learning_rate": 8.276720939886535e-06,
      "loss": 0.0273,
      "step": 543060
    },
    {
      "epoch": 0.8887623311927626,
      "grad_norm": 2.8929378986358643,
      "learning_rate": 8.276655047673017e-06,
      "loss": 0.0328,
      "step": 543080
    },
    {
      "epoch": 0.888795061631416,
      "grad_norm": 0.4712539613246918,
      "learning_rate": 8.2765891554595e-06,
      "loss": 0.0358,
      "step": 543100
    },
    {
      "epoch": 0.8888277920700693,
      "grad_norm": 1.2546919584274292,
      "learning_rate": 8.276523263245982e-06,
      "loss": 0.0339,
      "step": 543120
    },
    {
      "epoch": 0.8888605225087227,
      "grad_norm": 1.5946143865585327,
      "learning_rate": 8.276457371032466e-06,
      "loss": 0.0341,
      "step": 543140
    },
    {
      "epoch": 0.888893252947376,
      "grad_norm": 1.607269287109375,
      "learning_rate": 8.276391478818948e-06,
      "loss": 0.0234,
      "step": 543160
    },
    {
      "epoch": 0.8889259833860294,
      "grad_norm": 0.14342953264713287,
      "learning_rate": 8.276325586605431e-06,
      "loss": 0.0265,
      "step": 543180
    },
    {
      "epoch": 0.8889587138246827,
      "grad_norm": 1.309055209159851,
      "learning_rate": 8.276259694391915e-06,
      "loss": 0.039,
      "step": 543200
    },
    {
      "epoch": 0.888991444263336,
      "grad_norm": 1.011907696723938,
      "learning_rate": 8.276193802178397e-06,
      "loss": 0.0344,
      "step": 543220
    },
    {
      "epoch": 0.8890241747019894,
      "grad_norm": 8.438612937927246,
      "learning_rate": 8.27612790996488e-06,
      "loss": 0.0231,
      "step": 543240
    },
    {
      "epoch": 0.8890569051406427,
      "grad_norm": 0.8368544578552246,
      "learning_rate": 8.276062017751362e-06,
      "loss": 0.0338,
      "step": 543260
    },
    {
      "epoch": 0.889089635579296,
      "grad_norm": 1.2460191249847412,
      "learning_rate": 8.275996125537846e-06,
      "loss": 0.0204,
      "step": 543280
    },
    {
      "epoch": 0.8891223660179494,
      "grad_norm": 0.5336815118789673,
      "learning_rate": 8.275930233324328e-06,
      "loss": 0.0345,
      "step": 543300
    },
    {
      "epoch": 0.8891550964566027,
      "grad_norm": 1.266247272491455,
      "learning_rate": 8.275864341110811e-06,
      "loss": 0.0332,
      "step": 543320
    },
    {
      "epoch": 0.889187826895256,
      "grad_norm": 0.5618597269058228,
      "learning_rate": 8.275798448897295e-06,
      "loss": 0.0199,
      "step": 543340
    },
    {
      "epoch": 0.8892205573339094,
      "grad_norm": 0.3719799518585205,
      "learning_rate": 8.275732556683777e-06,
      "loss": 0.0261,
      "step": 543360
    },
    {
      "epoch": 0.8892532877725627,
      "grad_norm": 0.9419049024581909,
      "learning_rate": 8.27566666447026e-06,
      "loss": 0.0184,
      "step": 543380
    },
    {
      "epoch": 0.8892860182112161,
      "grad_norm": 0.2644667625427246,
      "learning_rate": 8.275600772256744e-06,
      "loss": 0.0312,
      "step": 543400
    },
    {
      "epoch": 0.8893187486498694,
      "grad_norm": 1.3787297010421753,
      "learning_rate": 8.275534880043226e-06,
      "loss": 0.0285,
      "step": 543420
    },
    {
      "epoch": 0.8893514790885227,
      "grad_norm": 0.6510403752326965,
      "learning_rate": 8.27546898782971e-06,
      "loss": 0.0247,
      "step": 543440
    },
    {
      "epoch": 0.8893842095271761,
      "grad_norm": 0.43121641874313354,
      "learning_rate": 8.275403095616191e-06,
      "loss": 0.0257,
      "step": 543460
    },
    {
      "epoch": 0.8894169399658294,
      "grad_norm": 0.7488998770713806,
      "learning_rate": 8.275337203402675e-06,
      "loss": 0.0333,
      "step": 543480
    },
    {
      "epoch": 0.8894496704044828,
      "grad_norm": 1.7130155563354492,
      "learning_rate": 8.275271311189157e-06,
      "loss": 0.0413,
      "step": 543500
    },
    {
      "epoch": 0.8894824008431361,
      "grad_norm": 0.6662339568138123,
      "learning_rate": 8.27520541897564e-06,
      "loss": 0.015,
      "step": 543520
    },
    {
      "epoch": 0.8895151312817894,
      "grad_norm": 0.24667078256607056,
      "learning_rate": 8.275139526762124e-06,
      "loss": 0.0261,
      "step": 543540
    },
    {
      "epoch": 0.8895478617204428,
      "grad_norm": 0.6660438179969788,
      "learning_rate": 8.275073634548606e-06,
      "loss": 0.0346,
      "step": 543560
    },
    {
      "epoch": 0.8895805921590961,
      "grad_norm": 1.2182410955429077,
      "learning_rate": 8.27500774233509e-06,
      "loss": 0.0295,
      "step": 543580
    },
    {
      "epoch": 0.8896133225977495,
      "grad_norm": 1.6555967330932617,
      "learning_rate": 8.274941850121571e-06,
      "loss": 0.038,
      "step": 543600
    },
    {
      "epoch": 0.8896460530364028,
      "grad_norm": 2.370288133621216,
      "learning_rate": 8.274875957908055e-06,
      "loss": 0.0288,
      "step": 543620
    },
    {
      "epoch": 0.8896787834750561,
      "grad_norm": 0.806454062461853,
      "learning_rate": 8.274810065694537e-06,
      "loss": 0.0242,
      "step": 543640
    },
    {
      "epoch": 0.8897115139137095,
      "grad_norm": 1.154952049255371,
      "learning_rate": 8.27474417348102e-06,
      "loss": 0.0313,
      "step": 543660
    },
    {
      "epoch": 0.8897442443523628,
      "grad_norm": 0.5335990786552429,
      "learning_rate": 8.274678281267502e-06,
      "loss": 0.0303,
      "step": 543680
    },
    {
      "epoch": 0.8897769747910161,
      "grad_norm": 0.4565604031085968,
      "learning_rate": 8.274612389053986e-06,
      "loss": 0.0365,
      "step": 543700
    },
    {
      "epoch": 0.8898097052296695,
      "grad_norm": 0.8678351640701294,
      "learning_rate": 8.27454649684047e-06,
      "loss": 0.0378,
      "step": 543720
    },
    {
      "epoch": 0.8898424356683228,
      "grad_norm": 0.2950756549835205,
      "learning_rate": 8.274480604626951e-06,
      "loss": 0.0317,
      "step": 543740
    },
    {
      "epoch": 0.8898751661069761,
      "grad_norm": 1.6876094341278076,
      "learning_rate": 8.274414712413435e-06,
      "loss": 0.0324,
      "step": 543760
    },
    {
      "epoch": 0.8899078965456295,
      "grad_norm": 4.149659633636475,
      "learning_rate": 8.274348820199919e-06,
      "loss": 0.0286,
      "step": 543780
    },
    {
      "epoch": 0.8899406269842829,
      "grad_norm": 1.2873440980911255,
      "learning_rate": 8.2742829279864e-06,
      "loss": 0.0346,
      "step": 543800
    },
    {
      "epoch": 0.8899733574229362,
      "grad_norm": 1.5233334302902222,
      "learning_rate": 8.274217035772884e-06,
      "loss": 0.0296,
      "step": 543820
    },
    {
      "epoch": 0.8900060878615895,
      "grad_norm": 1.5786598920822144,
      "learning_rate": 8.274151143559366e-06,
      "loss": 0.0264,
      "step": 543840
    },
    {
      "epoch": 0.8900388183002429,
      "grad_norm": 1.5633879899978638,
      "learning_rate": 8.27408525134585e-06,
      "loss": 0.025,
      "step": 543860
    },
    {
      "epoch": 0.8900715487388962,
      "grad_norm": 1.014552354812622,
      "learning_rate": 8.274019359132333e-06,
      "loss": 0.0276,
      "step": 543880
    },
    {
      "epoch": 0.8901042791775495,
      "grad_norm": 0.6275533437728882,
      "learning_rate": 8.273953466918815e-06,
      "loss": 0.0302,
      "step": 543900
    },
    {
      "epoch": 0.8901370096162029,
      "grad_norm": 0.5025728344917297,
      "learning_rate": 8.273887574705299e-06,
      "loss": 0.0194,
      "step": 543920
    },
    {
      "epoch": 0.8901697400548562,
      "grad_norm": 0.6497689485549927,
      "learning_rate": 8.27382168249178e-06,
      "loss": 0.0208,
      "step": 543940
    },
    {
      "epoch": 0.8902024704935095,
      "grad_norm": 1.438275933265686,
      "learning_rate": 8.273755790278264e-06,
      "loss": 0.0209,
      "step": 543960
    },
    {
      "epoch": 0.8902352009321629,
      "grad_norm": 1.0221261978149414,
      "learning_rate": 8.273689898064746e-06,
      "loss": 0.0288,
      "step": 543980
    },
    {
      "epoch": 0.8902679313708163,
      "grad_norm": 0.5292627811431885,
      "learning_rate": 8.27362400585123e-06,
      "loss": 0.0212,
      "step": 544000
    },
    {
      "epoch": 0.8903006618094695,
      "grad_norm": 1.6361674070358276,
      "learning_rate": 8.273558113637711e-06,
      "loss": 0.0458,
      "step": 544020
    },
    {
      "epoch": 0.8903333922481229,
      "grad_norm": 0.7379261255264282,
      "learning_rate": 8.273492221424195e-06,
      "loss": 0.0311,
      "step": 544040
    },
    {
      "epoch": 0.8903661226867763,
      "grad_norm": 0.3339177668094635,
      "learning_rate": 8.273426329210677e-06,
      "loss": 0.0248,
      "step": 544060
    },
    {
      "epoch": 0.8903988531254295,
      "grad_norm": 1.1413780450820923,
      "learning_rate": 8.27336043699716e-06,
      "loss": 0.0261,
      "step": 544080
    },
    {
      "epoch": 0.8904315835640829,
      "grad_norm": 0.22155161201953888,
      "learning_rate": 8.273294544783642e-06,
      "loss": 0.0238,
      "step": 544100
    },
    {
      "epoch": 0.8904643140027363,
      "grad_norm": 1.1143271923065186,
      "learning_rate": 8.273228652570126e-06,
      "loss": 0.0269,
      "step": 544120
    },
    {
      "epoch": 0.8904970444413896,
      "grad_norm": 0.5613478422164917,
      "learning_rate": 8.27316276035661e-06,
      "loss": 0.0263,
      "step": 544140
    },
    {
      "epoch": 0.8905297748800429,
      "grad_norm": 0.49812063574790955,
      "learning_rate": 8.273096868143092e-06,
      "loss": 0.0232,
      "step": 544160
    },
    {
      "epoch": 0.8905625053186963,
      "grad_norm": 0.22108834981918335,
      "learning_rate": 8.273030975929575e-06,
      "loss": 0.0318,
      "step": 544180
    },
    {
      "epoch": 0.8905952357573497,
      "grad_norm": 1.5498948097229004,
      "learning_rate": 8.272965083716059e-06,
      "loss": 0.031,
      "step": 544200
    },
    {
      "epoch": 0.8906279661960029,
      "grad_norm": 0.4611181914806366,
      "learning_rate": 8.27289919150254e-06,
      "loss": 0.0278,
      "step": 544220
    },
    {
      "epoch": 0.8906606966346563,
      "grad_norm": 0.3529638648033142,
      "learning_rate": 8.272833299289024e-06,
      "loss": 0.0337,
      "step": 544240
    },
    {
      "epoch": 0.8906934270733097,
      "grad_norm": 0.6761221289634705,
      "learning_rate": 8.272767407075508e-06,
      "loss": 0.0326,
      "step": 544260
    },
    {
      "epoch": 0.8907261575119629,
      "grad_norm": 0.6606928706169128,
      "learning_rate": 8.27270151486199e-06,
      "loss": 0.0252,
      "step": 544280
    },
    {
      "epoch": 0.8907588879506163,
      "grad_norm": 0.24360710382461548,
      "learning_rate": 8.272635622648473e-06,
      "loss": 0.0198,
      "step": 544300
    },
    {
      "epoch": 0.8907916183892697,
      "grad_norm": 1.0083242654800415,
      "learning_rate": 8.272569730434955e-06,
      "loss": 0.0355,
      "step": 544320
    },
    {
      "epoch": 0.8908243488279229,
      "grad_norm": 2.5420985221862793,
      "learning_rate": 8.272503838221439e-06,
      "loss": 0.0293,
      "step": 544340
    },
    {
      "epoch": 0.8908570792665763,
      "grad_norm": 1.5422011613845825,
      "learning_rate": 8.27243794600792e-06,
      "loss": 0.0325,
      "step": 544360
    },
    {
      "epoch": 0.8908898097052297,
      "grad_norm": 6.607679843902588,
      "learning_rate": 8.272372053794404e-06,
      "loss": 0.0344,
      "step": 544380
    },
    {
      "epoch": 0.8909225401438831,
      "grad_norm": 0.464203804731369,
      "learning_rate": 8.272306161580886e-06,
      "loss": 0.0312,
      "step": 544400
    },
    {
      "epoch": 0.8909552705825363,
      "grad_norm": 0.787148654460907,
      "learning_rate": 8.27224026936737e-06,
      "loss": 0.0211,
      "step": 544420
    },
    {
      "epoch": 0.8909880010211897,
      "grad_norm": 2.366079807281494,
      "learning_rate": 8.272174377153852e-06,
      "loss": 0.028,
      "step": 544440
    },
    {
      "epoch": 0.8910207314598431,
      "grad_norm": 1.56974458694458,
      "learning_rate": 8.272108484940335e-06,
      "loss": 0.0391,
      "step": 544460
    },
    {
      "epoch": 0.8910534618984963,
      "grad_norm": 0.7692531943321228,
      "learning_rate": 8.272042592726817e-06,
      "loss": 0.0214,
      "step": 544480
    },
    {
      "epoch": 0.8910861923371497,
      "grad_norm": 0.5988761782646179,
      "learning_rate": 8.2719767005133e-06,
      "loss": 0.0317,
      "step": 544500
    },
    {
      "epoch": 0.8911189227758031,
      "grad_norm": 1.0958799123764038,
      "learning_rate": 8.271910808299784e-06,
      "loss": 0.0322,
      "step": 544520
    },
    {
      "epoch": 0.8911516532144563,
      "grad_norm": 1.8652620315551758,
      "learning_rate": 8.271844916086266e-06,
      "loss": 0.0255,
      "step": 544540
    },
    {
      "epoch": 0.8911843836531097,
      "grad_norm": 0.7571821808815002,
      "learning_rate": 8.27177902387275e-06,
      "loss": 0.0233,
      "step": 544560
    },
    {
      "epoch": 0.8912171140917631,
      "grad_norm": 1.1297063827514648,
      "learning_rate": 8.271713131659233e-06,
      "loss": 0.0203,
      "step": 544580
    },
    {
      "epoch": 0.8912498445304164,
      "grad_norm": 0.6600807309150696,
      "learning_rate": 8.271647239445715e-06,
      "loss": 0.0242,
      "step": 544600
    },
    {
      "epoch": 0.8912825749690697,
      "grad_norm": 1.188948631286621,
      "learning_rate": 8.271581347232199e-06,
      "loss": 0.0407,
      "step": 544620
    },
    {
      "epoch": 0.8913153054077231,
      "grad_norm": 1.269942283630371,
      "learning_rate": 8.271515455018682e-06,
      "loss": 0.0256,
      "step": 544640
    },
    {
      "epoch": 0.8913480358463765,
      "grad_norm": 2.06620454788208,
      "learning_rate": 8.271449562805164e-06,
      "loss": 0.0208,
      "step": 544660
    },
    {
      "epoch": 0.8913807662850297,
      "grad_norm": 0.8536990284919739,
      "learning_rate": 8.271383670591648e-06,
      "loss": 0.0239,
      "step": 544680
    },
    {
      "epoch": 0.8914134967236831,
      "grad_norm": 1.5297530889511108,
      "learning_rate": 8.27131777837813e-06,
      "loss": 0.0251,
      "step": 544700
    },
    {
      "epoch": 0.8914462271623365,
      "grad_norm": 0.7011314630508423,
      "learning_rate": 8.271251886164613e-06,
      "loss": 0.0267,
      "step": 544720
    },
    {
      "epoch": 0.8914789576009897,
      "grad_norm": 1.6939051151275635,
      "learning_rate": 8.271185993951095e-06,
      "loss": 0.0345,
      "step": 544740
    },
    {
      "epoch": 0.8915116880396431,
      "grad_norm": 0.8163391947746277,
      "learning_rate": 8.271120101737579e-06,
      "loss": 0.0271,
      "step": 544760
    },
    {
      "epoch": 0.8915444184782965,
      "grad_norm": 0.8319191336631775,
      "learning_rate": 8.27105420952406e-06,
      "loss": 0.0222,
      "step": 544780
    },
    {
      "epoch": 0.8915771489169498,
      "grad_norm": 2.285726547241211,
      "learning_rate": 8.270988317310544e-06,
      "loss": 0.0234,
      "step": 544800
    },
    {
      "epoch": 0.8916098793556031,
      "grad_norm": 0.9766407608985901,
      "learning_rate": 8.270922425097026e-06,
      "loss": 0.0187,
      "step": 544820
    },
    {
      "epoch": 0.8916426097942565,
      "grad_norm": 0.3386170268058777,
      "learning_rate": 8.27085653288351e-06,
      "loss": 0.0327,
      "step": 544840
    },
    {
      "epoch": 0.8916753402329098,
      "grad_norm": 0.8527113795280457,
      "learning_rate": 8.270790640669992e-06,
      "loss": 0.0276,
      "step": 544860
    },
    {
      "epoch": 0.8917080706715631,
      "grad_norm": 0.27668118476867676,
      "learning_rate": 8.270724748456475e-06,
      "loss": 0.0246,
      "step": 544880
    },
    {
      "epoch": 0.8917408011102165,
      "grad_norm": 1.3454314470291138,
      "learning_rate": 8.270658856242957e-06,
      "loss": 0.0243,
      "step": 544900
    },
    {
      "epoch": 0.8917735315488698,
      "grad_norm": 0.4980156421661377,
      "learning_rate": 8.27059296402944e-06,
      "loss": 0.0217,
      "step": 544920
    },
    {
      "epoch": 0.8918062619875231,
      "grad_norm": 1.0233923196792603,
      "learning_rate": 8.270527071815924e-06,
      "loss": 0.0262,
      "step": 544940
    },
    {
      "epoch": 0.8918389924261765,
      "grad_norm": 1.068701148033142,
      "learning_rate": 8.270461179602406e-06,
      "loss": 0.0302,
      "step": 544960
    },
    {
      "epoch": 0.8918717228648299,
      "grad_norm": 0.6563670039176941,
      "learning_rate": 8.27039528738889e-06,
      "loss": 0.0249,
      "step": 544980
    },
    {
      "epoch": 0.8919044533034832,
      "grad_norm": 0.34148555994033813,
      "learning_rate": 8.270329395175373e-06,
      "loss": 0.0284,
      "step": 545000
    },
    {
      "epoch": 0.8919371837421365,
      "grad_norm": 1.2702984809875488,
      "learning_rate": 8.270263502961857e-06,
      "loss": 0.029,
      "step": 545020
    },
    {
      "epoch": 0.8919699141807899,
      "grad_norm": 0.38366279006004333,
      "learning_rate": 8.270197610748339e-06,
      "loss": 0.0365,
      "step": 545040
    },
    {
      "epoch": 0.8920026446194432,
      "grad_norm": 0.6737252473831177,
      "learning_rate": 8.270131718534822e-06,
      "loss": 0.0292,
      "step": 545060
    },
    {
      "epoch": 0.8920353750580965,
      "grad_norm": 0.40792784094810486,
      "learning_rate": 8.270065826321304e-06,
      "loss": 0.0307,
      "step": 545080
    },
    {
      "epoch": 0.8920681054967499,
      "grad_norm": 0.5112905502319336,
      "learning_rate": 8.269999934107788e-06,
      "loss": 0.0288,
      "step": 545100
    },
    {
      "epoch": 0.8921008359354032,
      "grad_norm": 1.9666500091552734,
      "learning_rate": 8.26993404189427e-06,
      "loss": 0.0338,
      "step": 545120
    },
    {
      "epoch": 0.8921335663740565,
      "grad_norm": 0.4406377673149109,
      "learning_rate": 8.269868149680753e-06,
      "loss": 0.0241,
      "step": 545140
    },
    {
      "epoch": 0.8921662968127099,
      "grad_norm": 0.5823836922645569,
      "learning_rate": 8.269802257467235e-06,
      "loss": 0.021,
      "step": 545160
    },
    {
      "epoch": 0.8921990272513632,
      "grad_norm": 1.2027649879455566,
      "learning_rate": 8.269736365253719e-06,
      "loss": 0.0324,
      "step": 545180
    },
    {
      "epoch": 0.8922317576900166,
      "grad_norm": 0.21559284627437592,
      "learning_rate": 8.2696704730402e-06,
      "loss": 0.0224,
      "step": 545200
    },
    {
      "epoch": 0.8922644881286699,
      "grad_norm": 2.131844997406006,
      "learning_rate": 8.269604580826684e-06,
      "loss": 0.0283,
      "step": 545220
    },
    {
      "epoch": 0.8922972185673232,
      "grad_norm": 0.4369182586669922,
      "learning_rate": 8.269538688613166e-06,
      "loss": 0.0251,
      "step": 545240
    },
    {
      "epoch": 0.8923299490059766,
      "grad_norm": 0.9652358889579773,
      "learning_rate": 8.26947279639965e-06,
      "loss": 0.0305,
      "step": 545260
    },
    {
      "epoch": 0.8923626794446299,
      "grad_norm": 1.0426949262619019,
      "learning_rate": 8.269406904186132e-06,
      "loss": 0.027,
      "step": 545280
    },
    {
      "epoch": 0.8923954098832833,
      "grad_norm": 0.2807838022708893,
      "learning_rate": 8.269341011972615e-06,
      "loss": 0.0342,
      "step": 545300
    },
    {
      "epoch": 0.8924281403219366,
      "grad_norm": 2.784144639968872,
      "learning_rate": 8.269275119759099e-06,
      "loss": 0.0378,
      "step": 545320
    },
    {
      "epoch": 0.8924608707605899,
      "grad_norm": 1.1586133241653442,
      "learning_rate": 8.26920922754558e-06,
      "loss": 0.0375,
      "step": 545340
    },
    {
      "epoch": 0.8924936011992433,
      "grad_norm": 0.4055333435535431,
      "learning_rate": 8.269143335332064e-06,
      "loss": 0.0386,
      "step": 545360
    },
    {
      "epoch": 0.8925263316378966,
      "grad_norm": 1.865913987159729,
      "learning_rate": 8.269077443118548e-06,
      "loss": 0.0249,
      "step": 545380
    },
    {
      "epoch": 0.89255906207655,
      "grad_norm": 1.4197790622711182,
      "learning_rate": 8.26901155090503e-06,
      "loss": 0.0449,
      "step": 545400
    },
    {
      "epoch": 0.8925917925152033,
      "grad_norm": 1.0837827920913696,
      "learning_rate": 8.268945658691513e-06,
      "loss": 0.0254,
      "step": 545420
    },
    {
      "epoch": 0.8926245229538566,
      "grad_norm": 0.7457870841026306,
      "learning_rate": 8.268879766477997e-06,
      "loss": 0.031,
      "step": 545440
    },
    {
      "epoch": 0.89265725339251,
      "grad_norm": 1.10141122341156,
      "learning_rate": 8.268813874264479e-06,
      "loss": 0.0348,
      "step": 545460
    },
    {
      "epoch": 0.8926899838311633,
      "grad_norm": 1.1618117094039917,
      "learning_rate": 8.268747982050962e-06,
      "loss": 0.0317,
      "step": 545480
    },
    {
      "epoch": 0.8927227142698166,
      "grad_norm": 1.637047290802002,
      "learning_rate": 8.268682089837444e-06,
      "loss": 0.0308,
      "step": 545500
    },
    {
      "epoch": 0.89275544470847,
      "grad_norm": 0.7568979263305664,
      "learning_rate": 8.268616197623928e-06,
      "loss": 0.0181,
      "step": 545520
    },
    {
      "epoch": 0.8927881751471233,
      "grad_norm": 0.4316524267196655,
      "learning_rate": 8.26855030541041e-06,
      "loss": 0.0338,
      "step": 545540
    },
    {
      "epoch": 0.8928209055857766,
      "grad_norm": 1.8108450174331665,
      "learning_rate": 8.268484413196893e-06,
      "loss": 0.03,
      "step": 545560
    },
    {
      "epoch": 0.89285363602443,
      "grad_norm": 0.9889514446258545,
      "learning_rate": 8.268418520983375e-06,
      "loss": 0.0463,
      "step": 545580
    },
    {
      "epoch": 0.8928863664630834,
      "grad_norm": 0.3411144018173218,
      "learning_rate": 8.268352628769859e-06,
      "loss": 0.0233,
      "step": 545600
    },
    {
      "epoch": 0.8929190969017367,
      "grad_norm": 0.2681260406970978,
      "learning_rate": 8.26828673655634e-06,
      "loss": 0.0275,
      "step": 545620
    },
    {
      "epoch": 0.89295182734039,
      "grad_norm": 0.691831648349762,
      "learning_rate": 8.268220844342824e-06,
      "loss": 0.0171,
      "step": 545640
    },
    {
      "epoch": 0.8929845577790434,
      "grad_norm": 1.1058712005615234,
      "learning_rate": 8.268154952129308e-06,
      "loss": 0.0271,
      "step": 545660
    },
    {
      "epoch": 0.8930172882176967,
      "grad_norm": 0.7800077199935913,
      "learning_rate": 8.26808905991579e-06,
      "loss": 0.021,
      "step": 545680
    },
    {
      "epoch": 0.89305001865635,
      "grad_norm": 2.025139570236206,
      "learning_rate": 8.268023167702273e-06,
      "loss": 0.0301,
      "step": 545700
    },
    {
      "epoch": 0.8930827490950034,
      "grad_norm": 0.6839855313301086,
      "learning_rate": 8.267957275488755e-06,
      "loss": 0.0237,
      "step": 545720
    },
    {
      "epoch": 0.8931154795336567,
      "grad_norm": 1.5891239643096924,
      "learning_rate": 8.267891383275239e-06,
      "loss": 0.0248,
      "step": 545740
    },
    {
      "epoch": 0.89314820997231,
      "grad_norm": 0.6168189644813538,
      "learning_rate": 8.267825491061723e-06,
      "loss": 0.0191,
      "step": 545760
    },
    {
      "epoch": 0.8931809404109634,
      "grad_norm": 0.5725018978118896,
      "learning_rate": 8.267759598848204e-06,
      "loss": 0.0216,
      "step": 545780
    },
    {
      "epoch": 0.8932136708496168,
      "grad_norm": 2.1889448165893555,
      "learning_rate": 8.267693706634688e-06,
      "loss": 0.0307,
      "step": 545800
    },
    {
      "epoch": 0.89324640128827,
      "grad_norm": 0.4861636161804199,
      "learning_rate": 8.267627814421172e-06,
      "loss": 0.0169,
      "step": 545820
    },
    {
      "epoch": 0.8932791317269234,
      "grad_norm": 1.463392734527588,
      "learning_rate": 8.267561922207654e-06,
      "loss": 0.0246,
      "step": 545840
    },
    {
      "epoch": 0.8933118621655768,
      "grad_norm": 0.8416303396224976,
      "learning_rate": 8.267496029994137e-06,
      "loss": 0.0263,
      "step": 545860
    },
    {
      "epoch": 0.89334459260423,
      "grad_norm": 1.2490483522415161,
      "learning_rate": 8.267430137780619e-06,
      "loss": 0.0317,
      "step": 545880
    },
    {
      "epoch": 0.8933773230428834,
      "grad_norm": 0.44597429037094116,
      "learning_rate": 8.267364245567103e-06,
      "loss": 0.0366,
      "step": 545900
    },
    {
      "epoch": 0.8934100534815368,
      "grad_norm": 0.24053362011909485,
      "learning_rate": 8.267298353353584e-06,
      "loss": 0.0285,
      "step": 545920
    },
    {
      "epoch": 0.89344278392019,
      "grad_norm": 0.5685617923736572,
      "learning_rate": 8.267232461140068e-06,
      "loss": 0.0193,
      "step": 545940
    },
    {
      "epoch": 0.8934755143588434,
      "grad_norm": 2.0837223529815674,
      "learning_rate": 8.26716656892655e-06,
      "loss": 0.0364,
      "step": 545960
    },
    {
      "epoch": 0.8935082447974968,
      "grad_norm": 2.115656852722168,
      "learning_rate": 8.267100676713034e-06,
      "loss": 0.0276,
      "step": 545980
    },
    {
      "epoch": 0.8935409752361502,
      "grad_norm": 0.6029351949691772,
      "learning_rate": 8.267034784499517e-06,
      "loss": 0.0323,
      "step": 546000
    },
    {
      "epoch": 0.8935737056748034,
      "grad_norm": 0.1503879725933075,
      "learning_rate": 8.266968892285999e-06,
      "loss": 0.0296,
      "step": 546020
    },
    {
      "epoch": 0.8936064361134568,
      "grad_norm": 0.22136425971984863,
      "learning_rate": 8.266903000072483e-06,
      "loss": 0.0239,
      "step": 546040
    },
    {
      "epoch": 0.8936391665521102,
      "grad_norm": 2.2334885597229004,
      "learning_rate": 8.266837107858964e-06,
      "loss": 0.0419,
      "step": 546060
    },
    {
      "epoch": 0.8936718969907634,
      "grad_norm": 0.7456322908401489,
      "learning_rate": 8.266771215645448e-06,
      "loss": 0.0309,
      "step": 546080
    },
    {
      "epoch": 0.8937046274294168,
      "grad_norm": 0.5866202712059021,
      "learning_rate": 8.26670532343193e-06,
      "loss": 0.0282,
      "step": 546100
    },
    {
      "epoch": 0.8937373578680702,
      "grad_norm": 0.42458921670913696,
      "learning_rate": 8.266639431218414e-06,
      "loss": 0.0157,
      "step": 546120
    },
    {
      "epoch": 0.8937700883067234,
      "grad_norm": 0.8941774368286133,
      "learning_rate": 8.266573539004895e-06,
      "loss": 0.0294,
      "step": 546140
    },
    {
      "epoch": 0.8938028187453768,
      "grad_norm": 1.7784814834594727,
      "learning_rate": 8.266507646791379e-06,
      "loss": 0.022,
      "step": 546160
    },
    {
      "epoch": 0.8938355491840302,
      "grad_norm": 0.8052051067352295,
      "learning_rate": 8.266441754577863e-06,
      "loss": 0.0268,
      "step": 546180
    },
    {
      "epoch": 0.8938682796226834,
      "grad_norm": 0.8415780663490295,
      "learning_rate": 8.266375862364345e-06,
      "loss": 0.0184,
      "step": 546200
    },
    {
      "epoch": 0.8939010100613368,
      "grad_norm": 0.5925573110580444,
      "learning_rate": 8.266309970150828e-06,
      "loss": 0.0285,
      "step": 546220
    },
    {
      "epoch": 0.8939337404999902,
      "grad_norm": 0.4830119013786316,
      "learning_rate": 8.266244077937312e-06,
      "loss": 0.03,
      "step": 546240
    },
    {
      "epoch": 0.8939664709386436,
      "grad_norm": 0.4666842222213745,
      "learning_rate": 8.266178185723794e-06,
      "loss": 0.0167,
      "step": 546260
    },
    {
      "epoch": 0.8939992013772968,
      "grad_norm": 0.6298579573631287,
      "learning_rate": 8.266112293510277e-06,
      "loss": 0.0166,
      "step": 546280
    },
    {
      "epoch": 0.8940319318159502,
      "grad_norm": 1.0321497917175293,
      "learning_rate": 8.266046401296759e-06,
      "loss": 0.0247,
      "step": 546300
    },
    {
      "epoch": 0.8940646622546036,
      "grad_norm": 1.3784589767456055,
      "learning_rate": 8.265980509083243e-06,
      "loss": 0.038,
      "step": 546320
    },
    {
      "epoch": 0.8940973926932568,
      "grad_norm": 1.932081937789917,
      "learning_rate": 8.265914616869725e-06,
      "loss": 0.0273,
      "step": 546340
    },
    {
      "epoch": 0.8941301231319102,
      "grad_norm": 0.9683053493499756,
      "learning_rate": 8.265848724656208e-06,
      "loss": 0.0296,
      "step": 546360
    },
    {
      "epoch": 0.8941628535705636,
      "grad_norm": 0.9621939659118652,
      "learning_rate": 8.265782832442692e-06,
      "loss": 0.0249,
      "step": 546380
    },
    {
      "epoch": 0.8941955840092168,
      "grad_norm": 0.7936326265335083,
      "learning_rate": 8.265716940229174e-06,
      "loss": 0.0304,
      "step": 546400
    },
    {
      "epoch": 0.8942283144478702,
      "grad_norm": 4.965668678283691,
      "learning_rate": 8.265651048015657e-06,
      "loss": 0.0232,
      "step": 546420
    },
    {
      "epoch": 0.8942610448865236,
      "grad_norm": 1.3709020614624023,
      "learning_rate": 8.265585155802139e-06,
      "loss": 0.0293,
      "step": 546440
    },
    {
      "epoch": 0.894293775325177,
      "grad_norm": 0.5060122013092041,
      "learning_rate": 8.265519263588623e-06,
      "loss": 0.0276,
      "step": 546460
    },
    {
      "epoch": 0.8943265057638302,
      "grad_norm": 2.1758759021759033,
      "learning_rate": 8.265453371375105e-06,
      "loss": 0.0325,
      "step": 546480
    },
    {
      "epoch": 0.8943592362024836,
      "grad_norm": 1.328293800354004,
      "learning_rate": 8.265387479161588e-06,
      "loss": 0.0225,
      "step": 546500
    },
    {
      "epoch": 0.894391966641137,
      "grad_norm": 0.544666051864624,
      "learning_rate": 8.26532158694807e-06,
      "loss": 0.0367,
      "step": 546520
    },
    {
      "epoch": 0.8944246970797902,
      "grad_norm": 0.9860081076622009,
      "learning_rate": 8.265255694734554e-06,
      "loss": 0.0319,
      "step": 546540
    },
    {
      "epoch": 0.8944574275184436,
      "grad_norm": 1.60623037815094,
      "learning_rate": 8.265189802521037e-06,
      "loss": 0.033,
      "step": 546560
    },
    {
      "epoch": 0.894490157957097,
      "grad_norm": 0.7003941535949707,
      "learning_rate": 8.265123910307519e-06,
      "loss": 0.019,
      "step": 546580
    },
    {
      "epoch": 0.8945228883957502,
      "grad_norm": 0.9882894158363342,
      "learning_rate": 8.265058018094003e-06,
      "loss": 0.034,
      "step": 546600
    },
    {
      "epoch": 0.8945556188344036,
      "grad_norm": 0.21217308938503265,
      "learning_rate": 8.264992125880486e-06,
      "loss": 0.0283,
      "step": 546620
    },
    {
      "epoch": 0.894588349273057,
      "grad_norm": 1.7826234102249146,
      "learning_rate": 8.264926233666968e-06,
      "loss": 0.0215,
      "step": 546640
    },
    {
      "epoch": 0.8946210797117103,
      "grad_norm": 1.3890087604522705,
      "learning_rate": 8.264860341453452e-06,
      "loss": 0.0201,
      "step": 546660
    },
    {
      "epoch": 0.8946538101503636,
      "grad_norm": 1.1589579582214355,
      "learning_rate": 8.264794449239934e-06,
      "loss": 0.0322,
      "step": 546680
    },
    {
      "epoch": 0.894686540589017,
      "grad_norm": 1.2073003053665161,
      "learning_rate": 8.264728557026417e-06,
      "loss": 0.0236,
      "step": 546700
    },
    {
      "epoch": 0.8947192710276703,
      "grad_norm": 2.848850965499878,
      "learning_rate": 8.2646626648129e-06,
      "loss": 0.0343,
      "step": 546720
    },
    {
      "epoch": 0.8947520014663236,
      "grad_norm": 1.8600715398788452,
      "learning_rate": 8.264596772599383e-06,
      "loss": 0.0419,
      "step": 546740
    },
    {
      "epoch": 0.894784731904977,
      "grad_norm": 1.069145917892456,
      "learning_rate": 8.264530880385866e-06,
      "loss": 0.0298,
      "step": 546760
    },
    {
      "epoch": 0.8948174623436304,
      "grad_norm": 0.18398000299930573,
      "learning_rate": 8.264464988172348e-06,
      "loss": 0.0219,
      "step": 546780
    },
    {
      "epoch": 0.8948501927822836,
      "grad_norm": 0.7453151345252991,
      "learning_rate": 8.264399095958832e-06,
      "loss": 0.0318,
      "step": 546800
    },
    {
      "epoch": 0.894882923220937,
      "grad_norm": 1.2703654766082764,
      "learning_rate": 8.264333203745314e-06,
      "loss": 0.0292,
      "step": 546820
    },
    {
      "epoch": 0.8949156536595904,
      "grad_norm": 0.17373067140579224,
      "learning_rate": 8.264267311531797e-06,
      "loss": 0.0273,
      "step": 546840
    },
    {
      "epoch": 0.8949483840982437,
      "grad_norm": 1.068207025527954,
      "learning_rate": 8.26420141931828e-06,
      "loss": 0.0327,
      "step": 546860
    },
    {
      "epoch": 0.894981114536897,
      "grad_norm": 1.4230424165725708,
      "learning_rate": 8.264135527104763e-06,
      "loss": 0.0264,
      "step": 546880
    },
    {
      "epoch": 0.8950138449755504,
      "grad_norm": 2.870878219604492,
      "learning_rate": 8.264069634891245e-06,
      "loss": 0.0272,
      "step": 546900
    },
    {
      "epoch": 0.8950465754142037,
      "grad_norm": 1.167133092880249,
      "learning_rate": 8.264003742677728e-06,
      "loss": 0.0255,
      "step": 546920
    },
    {
      "epoch": 0.895079305852857,
      "grad_norm": 0.6135163307189941,
      "learning_rate": 8.26393785046421e-06,
      "loss": 0.0289,
      "step": 546940
    },
    {
      "epoch": 0.8951120362915104,
      "grad_norm": 1.2328529357910156,
      "learning_rate": 8.263871958250694e-06,
      "loss": 0.0401,
      "step": 546960
    },
    {
      "epoch": 0.8951447667301637,
      "grad_norm": 0.8297802805900574,
      "learning_rate": 8.263806066037177e-06,
      "loss": 0.0296,
      "step": 546980
    },
    {
      "epoch": 0.895177497168817,
      "grad_norm": 2.4687860012054443,
      "learning_rate": 8.26374017382366e-06,
      "loss": 0.0194,
      "step": 547000
    },
    {
      "epoch": 0.8952102276074704,
      "grad_norm": 0.08746124804019928,
      "learning_rate": 8.263674281610143e-06,
      "loss": 0.0209,
      "step": 547020
    },
    {
      "epoch": 0.8952429580461237,
      "grad_norm": 0.5520233511924744,
      "learning_rate": 8.263608389396626e-06,
      "loss": 0.0251,
      "step": 547040
    },
    {
      "epoch": 0.8952756884847771,
      "grad_norm": 0.5996923446655273,
      "learning_rate": 8.263542497183108e-06,
      "loss": 0.0316,
      "step": 547060
    },
    {
      "epoch": 0.8953084189234304,
      "grad_norm": 0.16877774894237518,
      "learning_rate": 8.263476604969592e-06,
      "loss": 0.0349,
      "step": 547080
    },
    {
      "epoch": 0.8953411493620838,
      "grad_norm": 1.476836085319519,
      "learning_rate": 8.263410712756075e-06,
      "loss": 0.0402,
      "step": 547100
    },
    {
      "epoch": 0.8953738798007371,
      "grad_norm": 1.9529885053634644,
      "learning_rate": 8.263344820542557e-06,
      "loss": 0.0309,
      "step": 547120
    },
    {
      "epoch": 0.8954066102393904,
      "grad_norm": 1.572510004043579,
      "learning_rate": 8.263278928329041e-06,
      "loss": 0.0361,
      "step": 547140
    },
    {
      "epoch": 0.8954393406780438,
      "grad_norm": 0.2661871910095215,
      "learning_rate": 8.263213036115523e-06,
      "loss": 0.0275,
      "step": 547160
    },
    {
      "epoch": 0.8954720711166971,
      "grad_norm": 0.3373045027256012,
      "learning_rate": 8.263147143902006e-06,
      "loss": 0.0234,
      "step": 547180
    },
    {
      "epoch": 0.8955048015553504,
      "grad_norm": 0.7286802530288696,
      "learning_rate": 8.263081251688488e-06,
      "loss": 0.0319,
      "step": 547200
    },
    {
      "epoch": 0.8955375319940038,
      "grad_norm": 1.0257818698883057,
      "learning_rate": 8.263015359474972e-06,
      "loss": 0.0227,
      "step": 547220
    },
    {
      "epoch": 0.8955702624326571,
      "grad_norm": 0.3620224893093109,
      "learning_rate": 8.262949467261454e-06,
      "loss": 0.0208,
      "step": 547240
    },
    {
      "epoch": 0.8956029928713105,
      "grad_norm": 0.8750974535942078,
      "learning_rate": 8.262883575047937e-06,
      "loss": 0.023,
      "step": 547260
    },
    {
      "epoch": 0.8956357233099638,
      "grad_norm": 1.6271754503250122,
      "learning_rate": 8.26281768283442e-06,
      "loss": 0.0228,
      "step": 547280
    },
    {
      "epoch": 0.8956684537486171,
      "grad_norm": 0.9674031138420105,
      "learning_rate": 8.262751790620903e-06,
      "loss": 0.0239,
      "step": 547300
    },
    {
      "epoch": 0.8957011841872705,
      "grad_norm": 1.4067503213882446,
      "learning_rate": 8.262685898407385e-06,
      "loss": 0.0247,
      "step": 547320
    },
    {
      "epoch": 0.8957339146259238,
      "grad_norm": 0.4435911178588867,
      "learning_rate": 8.262620006193868e-06,
      "loss": 0.0292,
      "step": 547340
    },
    {
      "epoch": 0.8957666450645771,
      "grad_norm": 0.41882625222206116,
      "learning_rate": 8.262554113980352e-06,
      "loss": 0.0262,
      "step": 547360
    },
    {
      "epoch": 0.8957993755032305,
      "grad_norm": 1.5797803401947021,
      "learning_rate": 8.262488221766834e-06,
      "loss": 0.0283,
      "step": 547380
    },
    {
      "epoch": 0.8958321059418838,
      "grad_norm": 0.5929246544837952,
      "learning_rate": 8.262422329553317e-06,
      "loss": 0.0422,
      "step": 547400
    },
    {
      "epoch": 0.8958648363805372,
      "grad_norm": 0.5161735415458679,
      "learning_rate": 8.262356437339801e-06,
      "loss": 0.0238,
      "step": 547420
    },
    {
      "epoch": 0.8958975668191905,
      "grad_norm": 0.22347719967365265,
      "learning_rate": 8.262290545126283e-06,
      "loss": 0.0272,
      "step": 547440
    },
    {
      "epoch": 0.8959302972578439,
      "grad_norm": 1.668257236480713,
      "learning_rate": 8.262224652912766e-06,
      "loss": 0.0311,
      "step": 547460
    },
    {
      "epoch": 0.8959630276964972,
      "grad_norm": 0.5090743899345398,
      "learning_rate": 8.26215876069925e-06,
      "loss": 0.0295,
      "step": 547480
    },
    {
      "epoch": 0.8959957581351505,
      "grad_norm": 0.22063496708869934,
      "learning_rate": 8.262092868485732e-06,
      "loss": 0.0294,
      "step": 547500
    },
    {
      "epoch": 0.8960284885738039,
      "grad_norm": 1.7433243989944458,
      "learning_rate": 8.262026976272215e-06,
      "loss": 0.036,
      "step": 547520
    },
    {
      "epoch": 0.8960612190124572,
      "grad_norm": 0.476853609085083,
      "learning_rate": 8.261961084058697e-06,
      "loss": 0.033,
      "step": 547540
    },
    {
      "epoch": 0.8960939494511105,
      "grad_norm": 0.7132785320281982,
      "learning_rate": 8.261895191845181e-06,
      "loss": 0.0223,
      "step": 547560
    },
    {
      "epoch": 0.8961266798897639,
      "grad_norm": 0.5716043710708618,
      "learning_rate": 8.261829299631663e-06,
      "loss": 0.0219,
      "step": 547580
    },
    {
      "epoch": 0.8961594103284172,
      "grad_norm": 1.4450944662094116,
      "learning_rate": 8.261763407418146e-06,
      "loss": 0.0315,
      "step": 547600
    },
    {
      "epoch": 0.8961921407670705,
      "grad_norm": 2.5059654712677,
      "learning_rate": 8.261697515204628e-06,
      "loss": 0.0321,
      "step": 547620
    },
    {
      "epoch": 0.8962248712057239,
      "grad_norm": 2.04992413520813,
      "learning_rate": 8.261631622991112e-06,
      "loss": 0.0293,
      "step": 547640
    },
    {
      "epoch": 0.8962576016443773,
      "grad_norm": 0.5723841190338135,
      "learning_rate": 8.261565730777594e-06,
      "loss": 0.0266,
      "step": 547660
    },
    {
      "epoch": 0.8962903320830305,
      "grad_norm": 0.19446276128292084,
      "learning_rate": 8.261499838564077e-06,
      "loss": 0.0326,
      "step": 547680
    },
    {
      "epoch": 0.8963230625216839,
      "grad_norm": 2.348764657974243,
      "learning_rate": 8.26143394635056e-06,
      "loss": 0.0331,
      "step": 547700
    },
    {
      "epoch": 0.8963557929603373,
      "grad_norm": 0.7788733243942261,
      "learning_rate": 8.261368054137043e-06,
      "loss": 0.022,
      "step": 547720
    },
    {
      "epoch": 0.8963885233989906,
      "grad_norm": 0.6532886028289795,
      "learning_rate": 8.261302161923525e-06,
      "loss": 0.021,
      "step": 547740
    },
    {
      "epoch": 0.8964212538376439,
      "grad_norm": 0.5111812949180603,
      "learning_rate": 8.261236269710008e-06,
      "loss": 0.0223,
      "step": 547760
    },
    {
      "epoch": 0.8964539842762973,
      "grad_norm": 0.333932101726532,
      "learning_rate": 8.261170377496492e-06,
      "loss": 0.0266,
      "step": 547780
    },
    {
      "epoch": 0.8964867147149506,
      "grad_norm": 0.8582072854042053,
      "learning_rate": 8.261104485282976e-06,
      "loss": 0.0241,
      "step": 547800
    },
    {
      "epoch": 0.8965194451536039,
      "grad_norm": 0.08760369569063187,
      "learning_rate": 8.261038593069457e-06,
      "loss": 0.0302,
      "step": 547820
    },
    {
      "epoch": 0.8965521755922573,
      "grad_norm": 0.6900937557220459,
      "learning_rate": 8.260972700855941e-06,
      "loss": 0.0274,
      "step": 547840
    },
    {
      "epoch": 0.8965849060309107,
      "grad_norm": 0.6368681192398071,
      "learning_rate": 8.260906808642425e-06,
      "loss": 0.0288,
      "step": 547860
    },
    {
      "epoch": 0.8966176364695639,
      "grad_norm": 0.1267307996749878,
      "learning_rate": 8.260840916428907e-06,
      "loss": 0.0243,
      "step": 547880
    },
    {
      "epoch": 0.8966503669082173,
      "grad_norm": 0.07635624706745148,
      "learning_rate": 8.26077502421539e-06,
      "loss": 0.0335,
      "step": 547900
    },
    {
      "epoch": 0.8966830973468707,
      "grad_norm": 0.6571504473686218,
      "learning_rate": 8.260709132001872e-06,
      "loss": 0.0269,
      "step": 547920
    },
    {
      "epoch": 0.896715827785524,
      "grad_norm": 0.42245155572891235,
      "learning_rate": 8.260643239788356e-06,
      "loss": 0.0239,
      "step": 547940
    },
    {
      "epoch": 0.8967485582241773,
      "grad_norm": 1.5390387773513794,
      "learning_rate": 8.260577347574837e-06,
      "loss": 0.0286,
      "step": 547960
    },
    {
      "epoch": 0.8967812886628307,
      "grad_norm": 2.5172505378723145,
      "learning_rate": 8.260511455361321e-06,
      "loss": 0.0358,
      "step": 547980
    },
    {
      "epoch": 0.896814019101484,
      "grad_norm": 0.7130230665206909,
      "learning_rate": 8.260445563147803e-06,
      "loss": 0.0224,
      "step": 548000
    },
    {
      "epoch": 0.8968467495401373,
      "grad_norm": 0.9732676148414612,
      "learning_rate": 8.260379670934287e-06,
      "loss": 0.026,
      "step": 548020
    },
    {
      "epoch": 0.8968794799787907,
      "grad_norm": 0.6190518140792847,
      "learning_rate": 8.260313778720768e-06,
      "loss": 0.0214,
      "step": 548040
    },
    {
      "epoch": 0.8969122104174441,
      "grad_norm": 1.4570086002349854,
      "learning_rate": 8.260247886507252e-06,
      "loss": 0.0319,
      "step": 548060
    },
    {
      "epoch": 0.8969449408560973,
      "grad_norm": 2.5105443000793457,
      "learning_rate": 8.260181994293734e-06,
      "loss": 0.0239,
      "step": 548080
    },
    {
      "epoch": 0.8969776712947507,
      "grad_norm": 0.3556373715400696,
      "learning_rate": 8.260116102080217e-06,
      "loss": 0.022,
      "step": 548100
    },
    {
      "epoch": 0.8970104017334041,
      "grad_norm": 0.37133121490478516,
      "learning_rate": 8.260050209866701e-06,
      "loss": 0.0404,
      "step": 548120
    },
    {
      "epoch": 0.8970431321720573,
      "grad_norm": 0.8204532265663147,
      "learning_rate": 8.259984317653183e-06,
      "loss": 0.0278,
      "step": 548140
    },
    {
      "epoch": 0.8970758626107107,
      "grad_norm": 10.881806373596191,
      "learning_rate": 8.259918425439667e-06,
      "loss": 0.0302,
      "step": 548160
    },
    {
      "epoch": 0.8971085930493641,
      "grad_norm": 0.7093566656112671,
      "learning_rate": 8.259852533226148e-06,
      "loss": 0.0263,
      "step": 548180
    },
    {
      "epoch": 0.8971413234880173,
      "grad_norm": 1.35785973072052,
      "learning_rate": 8.259786641012632e-06,
      "loss": 0.0205,
      "step": 548200
    },
    {
      "epoch": 0.8971740539266707,
      "grad_norm": 1.394286870956421,
      "learning_rate": 8.259720748799116e-06,
      "loss": 0.0235,
      "step": 548220
    },
    {
      "epoch": 0.8972067843653241,
      "grad_norm": 0.8966095447540283,
      "learning_rate": 8.259654856585598e-06,
      "loss": 0.0317,
      "step": 548240
    },
    {
      "epoch": 0.8972395148039775,
      "grad_norm": 0.672584593296051,
      "learning_rate": 8.259588964372081e-06,
      "loss": 0.0211,
      "step": 548260
    },
    {
      "epoch": 0.8972722452426307,
      "grad_norm": 0.993823766708374,
      "learning_rate": 8.259523072158565e-06,
      "loss": 0.0254,
      "step": 548280
    },
    {
      "epoch": 0.8973049756812841,
      "grad_norm": 0.2993749976158142,
      "learning_rate": 8.259457179945047e-06,
      "loss": 0.0269,
      "step": 548300
    },
    {
      "epoch": 0.8973377061199375,
      "grad_norm": 1.1511471271514893,
      "learning_rate": 8.25939128773153e-06,
      "loss": 0.0495,
      "step": 548320
    },
    {
      "epoch": 0.8973704365585907,
      "grad_norm": 2.4131968021392822,
      "learning_rate": 8.259325395518012e-06,
      "loss": 0.0413,
      "step": 548340
    },
    {
      "epoch": 0.8974031669972441,
      "grad_norm": 0.5800583362579346,
      "learning_rate": 8.259259503304496e-06,
      "loss": 0.0313,
      "step": 548360
    },
    {
      "epoch": 0.8974358974358975,
      "grad_norm": 0.6224412322044373,
      "learning_rate": 8.259193611090978e-06,
      "loss": 0.0259,
      "step": 548380
    },
    {
      "epoch": 0.8974686278745507,
      "grad_norm": 2.51646089553833,
      "learning_rate": 8.259127718877461e-06,
      "loss": 0.0433,
      "step": 548400
    },
    {
      "epoch": 0.8975013583132041,
      "grad_norm": 0.9798787236213684,
      "learning_rate": 8.259061826663943e-06,
      "loss": 0.024,
      "step": 548420
    },
    {
      "epoch": 0.8975340887518575,
      "grad_norm": 2.196084976196289,
      "learning_rate": 8.258995934450427e-06,
      "loss": 0.0193,
      "step": 548440
    },
    {
      "epoch": 0.8975668191905108,
      "grad_norm": 0.8233957290649414,
      "learning_rate": 8.25893004223691e-06,
      "loss": 0.0266,
      "step": 548460
    },
    {
      "epoch": 0.8975995496291641,
      "grad_norm": 0.24351611733436584,
      "learning_rate": 8.258864150023392e-06,
      "loss": 0.0222,
      "step": 548480
    },
    {
      "epoch": 0.8976322800678175,
      "grad_norm": 2.7189409732818604,
      "learning_rate": 8.258798257809876e-06,
      "loss": 0.0283,
      "step": 548500
    },
    {
      "epoch": 0.8976650105064709,
      "grad_norm": 0.09948374330997467,
      "learning_rate": 8.258732365596358e-06,
      "loss": 0.0196,
      "step": 548520
    },
    {
      "epoch": 0.8976977409451241,
      "grad_norm": 0.17942266166210175,
      "learning_rate": 8.258666473382841e-06,
      "loss": 0.0261,
      "step": 548540
    },
    {
      "epoch": 0.8977304713837775,
      "grad_norm": 1.6485410928726196,
      "learning_rate": 8.258600581169323e-06,
      "loss": 0.018,
      "step": 548560
    },
    {
      "epoch": 0.8977632018224309,
      "grad_norm": 0.45106691122055054,
      "learning_rate": 8.258534688955807e-06,
      "loss": 0.0234,
      "step": 548580
    },
    {
      "epoch": 0.8977959322610841,
      "grad_norm": 0.5790374875068665,
      "learning_rate": 8.25846879674229e-06,
      "loss": 0.0275,
      "step": 548600
    },
    {
      "epoch": 0.8978286626997375,
      "grad_norm": 0.614236056804657,
      "learning_rate": 8.258402904528772e-06,
      "loss": 0.0258,
      "step": 548620
    },
    {
      "epoch": 0.8978613931383909,
      "grad_norm": 1.2047510147094727,
      "learning_rate": 8.258337012315256e-06,
      "loss": 0.0299,
      "step": 548640
    },
    {
      "epoch": 0.8978941235770442,
      "grad_norm": 1.4824007749557495,
      "learning_rate": 8.25827112010174e-06,
      "loss": 0.0292,
      "step": 548660
    },
    {
      "epoch": 0.8979268540156975,
      "grad_norm": 1.0043208599090576,
      "learning_rate": 8.258205227888221e-06,
      "loss": 0.0266,
      "step": 548680
    },
    {
      "epoch": 0.8979595844543509,
      "grad_norm": 0.23970866203308105,
      "learning_rate": 8.258139335674705e-06,
      "loss": 0.0254,
      "step": 548700
    },
    {
      "epoch": 0.8979923148930042,
      "grad_norm": 0.7952190637588501,
      "learning_rate": 8.258073443461187e-06,
      "loss": 0.0224,
      "step": 548720
    },
    {
      "epoch": 0.8980250453316575,
      "grad_norm": 0.38597819209098816,
      "learning_rate": 8.25800755124767e-06,
      "loss": 0.0339,
      "step": 548740
    },
    {
      "epoch": 0.8980577757703109,
      "grad_norm": 0.7147435545921326,
      "learning_rate": 8.257941659034152e-06,
      "loss": 0.0281,
      "step": 548760
    },
    {
      "epoch": 0.8980905062089642,
      "grad_norm": 2.246286630630493,
      "learning_rate": 8.257875766820636e-06,
      "loss": 0.0322,
      "step": 548780
    },
    {
      "epoch": 0.8981232366476175,
      "grad_norm": 1.2014163732528687,
      "learning_rate": 8.257809874607118e-06,
      "loss": 0.0255,
      "step": 548800
    },
    {
      "epoch": 0.8981559670862709,
      "grad_norm": 0.5081220865249634,
      "learning_rate": 8.257743982393601e-06,
      "loss": 0.0415,
      "step": 548820
    },
    {
      "epoch": 0.8981886975249243,
      "grad_norm": 0.7345765233039856,
      "learning_rate": 8.257678090180085e-06,
      "loss": 0.0336,
      "step": 548840
    },
    {
      "epoch": 0.8982214279635776,
      "grad_norm": 1.0063655376434326,
      "learning_rate": 8.257612197966567e-06,
      "loss": 0.0272,
      "step": 548860
    },
    {
      "epoch": 0.8982541584022309,
      "grad_norm": 0.25313228368759155,
      "learning_rate": 8.25754630575305e-06,
      "loss": 0.0246,
      "step": 548880
    },
    {
      "epoch": 0.8982868888408843,
      "grad_norm": 0.6216698288917542,
      "learning_rate": 8.257480413539532e-06,
      "loss": 0.0333,
      "step": 548900
    },
    {
      "epoch": 0.8983196192795376,
      "grad_norm": 0.679456353187561,
      "learning_rate": 8.257414521326016e-06,
      "loss": 0.0246,
      "step": 548920
    },
    {
      "epoch": 0.8983523497181909,
      "grad_norm": 0.8830099105834961,
      "learning_rate": 8.257348629112498e-06,
      "loss": 0.0246,
      "step": 548940
    },
    {
      "epoch": 0.8983850801568443,
      "grad_norm": 0.11067884415388107,
      "learning_rate": 8.257282736898981e-06,
      "loss": 0.0301,
      "step": 548960
    },
    {
      "epoch": 0.8984178105954976,
      "grad_norm": 1.2472481727600098,
      "learning_rate": 8.257216844685463e-06,
      "loss": 0.0274,
      "step": 548980
    },
    {
      "epoch": 0.8984505410341509,
      "grad_norm": 0.4344797134399414,
      "learning_rate": 8.257150952471947e-06,
      "loss": 0.0207,
      "step": 549000
    },
    {
      "epoch": 0.8984832714728043,
      "grad_norm": 1.2973463535308838,
      "learning_rate": 8.25708506025843e-06,
      "loss": 0.0252,
      "step": 549020
    },
    {
      "epoch": 0.8985160019114576,
      "grad_norm": 0.14848394691944122,
      "learning_rate": 8.257019168044912e-06,
      "loss": 0.0224,
      "step": 549040
    },
    {
      "epoch": 0.8985487323501109,
      "grad_norm": 0.29014822840690613,
      "learning_rate": 8.256953275831396e-06,
      "loss": 0.0222,
      "step": 549060
    },
    {
      "epoch": 0.8985814627887643,
      "grad_norm": 0.8393876552581787,
      "learning_rate": 8.25688738361788e-06,
      "loss": 0.0339,
      "step": 549080
    },
    {
      "epoch": 0.8986141932274176,
      "grad_norm": 1.3461987972259521,
      "learning_rate": 8.256821491404361e-06,
      "loss": 0.0341,
      "step": 549100
    },
    {
      "epoch": 0.898646923666071,
      "grad_norm": 0.9653303027153015,
      "learning_rate": 8.256755599190845e-06,
      "loss": 0.034,
      "step": 549120
    },
    {
      "epoch": 0.8986796541047243,
      "grad_norm": 1.3175307512283325,
      "learning_rate": 8.256689706977327e-06,
      "loss": 0.0237,
      "step": 549140
    },
    {
      "epoch": 0.8987123845433777,
      "grad_norm": 0.7568111419677734,
      "learning_rate": 8.25662381476381e-06,
      "loss": 0.0171,
      "step": 549160
    },
    {
      "epoch": 0.898745114982031,
      "grad_norm": 0.17501084506511688,
      "learning_rate": 8.256557922550294e-06,
      "loss": 0.0184,
      "step": 549180
    },
    {
      "epoch": 0.8987778454206843,
      "grad_norm": 2.276233434677124,
      "learning_rate": 8.256492030336776e-06,
      "loss": 0.0308,
      "step": 549200
    },
    {
      "epoch": 0.8988105758593377,
      "grad_norm": 2.1385080814361572,
      "learning_rate": 8.25642613812326e-06,
      "loss": 0.029,
      "step": 549220
    },
    {
      "epoch": 0.898843306297991,
      "grad_norm": 0.4249245524406433,
      "learning_rate": 8.256360245909741e-06,
      "loss": 0.0273,
      "step": 549240
    },
    {
      "epoch": 0.8988760367366443,
      "grad_norm": 1.5189039707183838,
      "learning_rate": 8.256294353696225e-06,
      "loss": 0.0219,
      "step": 549260
    },
    {
      "epoch": 0.8989087671752977,
      "grad_norm": 0.2745778560638428,
      "learning_rate": 8.256228461482707e-06,
      "loss": 0.0235,
      "step": 549280
    },
    {
      "epoch": 0.898941497613951,
      "grad_norm": 0.43576690554618835,
      "learning_rate": 8.25616256926919e-06,
      "loss": 0.0291,
      "step": 549300
    },
    {
      "epoch": 0.8989742280526044,
      "grad_norm": 0.901409924030304,
      "learning_rate": 8.256096677055672e-06,
      "loss": 0.0343,
      "step": 549320
    },
    {
      "epoch": 0.8990069584912577,
      "grad_norm": 0.7091100215911865,
      "learning_rate": 8.256030784842156e-06,
      "loss": 0.0327,
      "step": 549340
    },
    {
      "epoch": 0.899039688929911,
      "grad_norm": 0.47798487544059753,
      "learning_rate": 8.255964892628638e-06,
      "loss": 0.0336,
      "step": 549360
    },
    {
      "epoch": 0.8990724193685644,
      "grad_norm": 0.8016530275344849,
      "learning_rate": 8.255899000415121e-06,
      "loss": 0.035,
      "step": 549380
    },
    {
      "epoch": 0.8991051498072177,
      "grad_norm": 0.6343863010406494,
      "learning_rate": 8.255833108201605e-06,
      "loss": 0.0249,
      "step": 549400
    },
    {
      "epoch": 0.899137880245871,
      "grad_norm": 1.159541368484497,
      "learning_rate": 8.255767215988087e-06,
      "loss": 0.0269,
      "step": 549420
    },
    {
      "epoch": 0.8991706106845244,
      "grad_norm": 2.1396169662475586,
      "learning_rate": 8.25570132377457e-06,
      "loss": 0.0244,
      "step": 549440
    },
    {
      "epoch": 0.8992033411231777,
      "grad_norm": 0.3104341924190521,
      "learning_rate": 8.255635431561054e-06,
      "loss": 0.0168,
      "step": 549460
    },
    {
      "epoch": 0.899236071561831,
      "grad_norm": 1.382080316543579,
      "learning_rate": 8.255569539347536e-06,
      "loss": 0.0383,
      "step": 549480
    },
    {
      "epoch": 0.8992688020004844,
      "grad_norm": 0.6081656217575073,
      "learning_rate": 8.25550364713402e-06,
      "loss": 0.0335,
      "step": 549500
    },
    {
      "epoch": 0.8993015324391378,
      "grad_norm": 0.5160934329032898,
      "learning_rate": 8.255437754920503e-06,
      "loss": 0.0237,
      "step": 549520
    },
    {
      "epoch": 0.8993342628777911,
      "grad_norm": 0.5615549683570862,
      "learning_rate": 8.255371862706985e-06,
      "loss": 0.0291,
      "step": 549540
    },
    {
      "epoch": 0.8993669933164444,
      "grad_norm": 0.9438377022743225,
      "learning_rate": 8.255305970493469e-06,
      "loss": 0.0245,
      "step": 549560
    },
    {
      "epoch": 0.8993997237550978,
      "grad_norm": 0.42800018191337585,
      "learning_rate": 8.25524007827995e-06,
      "loss": 0.0354,
      "step": 549580
    },
    {
      "epoch": 0.8994324541937511,
      "grad_norm": 0.594998836517334,
      "learning_rate": 8.255174186066434e-06,
      "loss": 0.0181,
      "step": 549600
    },
    {
      "epoch": 0.8994651846324044,
      "grad_norm": 1.4848486185073853,
      "learning_rate": 8.255108293852916e-06,
      "loss": 0.0289,
      "step": 549620
    },
    {
      "epoch": 0.8994979150710578,
      "grad_norm": 0.7790893912315369,
      "learning_rate": 8.2550424016394e-06,
      "loss": 0.0271,
      "step": 549640
    },
    {
      "epoch": 0.8995306455097111,
      "grad_norm": 1.4716931581497192,
      "learning_rate": 8.254976509425881e-06,
      "loss": 0.0353,
      "step": 549660
    },
    {
      "epoch": 0.8995633759483644,
      "grad_norm": 1.1608930826187134,
      "learning_rate": 8.254910617212365e-06,
      "loss": 0.0251,
      "step": 549680
    },
    {
      "epoch": 0.8995961063870178,
      "grad_norm": 1.1439921855926514,
      "learning_rate": 8.254844724998847e-06,
      "loss": 0.0236,
      "step": 549700
    },
    {
      "epoch": 0.8996288368256712,
      "grad_norm": 0.7528538703918457,
      "learning_rate": 8.25477883278533e-06,
      "loss": 0.0392,
      "step": 549720
    },
    {
      "epoch": 0.8996615672643244,
      "grad_norm": 0.6668612957000732,
      "learning_rate": 8.254712940571812e-06,
      "loss": 0.0274,
      "step": 549740
    },
    {
      "epoch": 0.8996942977029778,
      "grad_norm": 0.4783798158168793,
      "learning_rate": 8.254647048358296e-06,
      "loss": 0.0235,
      "step": 549760
    },
    {
      "epoch": 0.8997270281416312,
      "grad_norm": 1.6427284479141235,
      "learning_rate": 8.254581156144778e-06,
      "loss": 0.0296,
      "step": 549780
    },
    {
      "epoch": 0.8997597585802845,
      "grad_norm": 2.4225001335144043,
      "learning_rate": 8.254515263931261e-06,
      "loss": 0.0341,
      "step": 549800
    },
    {
      "epoch": 0.8997924890189378,
      "grad_norm": 2.887982130050659,
      "learning_rate": 8.254449371717745e-06,
      "loss": 0.0344,
      "step": 549820
    },
    {
      "epoch": 0.8998252194575912,
      "grad_norm": 1.136066198348999,
      "learning_rate": 8.254383479504227e-06,
      "loss": 0.0224,
      "step": 549840
    },
    {
      "epoch": 0.8998579498962445,
      "grad_norm": 0.5421342253684998,
      "learning_rate": 8.25431758729071e-06,
      "loss": 0.023,
      "step": 549860
    },
    {
      "epoch": 0.8998906803348978,
      "grad_norm": 2.2450499534606934,
      "learning_rate": 8.254251695077194e-06,
      "loss": 0.039,
      "step": 549880
    },
    {
      "epoch": 0.8999234107735512,
      "grad_norm": 1.3175318241119385,
      "learning_rate": 8.254185802863676e-06,
      "loss": 0.0336,
      "step": 549900
    },
    {
      "epoch": 0.8999561412122046,
      "grad_norm": 1.0173147916793823,
      "learning_rate": 8.25411991065016e-06,
      "loss": 0.0336,
      "step": 549920
    },
    {
      "epoch": 0.8999888716508578,
      "grad_norm": 1.4899306297302246,
      "learning_rate": 8.254054018436643e-06,
      "loss": 0.0293,
      "step": 549940
    },
    {
      "epoch": 0.9000216020895112,
      "grad_norm": 0.21859318017959595,
      "learning_rate": 8.253988126223125e-06,
      "loss": 0.0266,
      "step": 549960
    },
    {
      "epoch": 0.9000543325281646,
      "grad_norm": 1.0415749549865723,
      "learning_rate": 8.253922234009609e-06,
      "loss": 0.0222,
      "step": 549980
    },
    {
      "epoch": 0.9000870629668178,
      "grad_norm": 1.123157262802124,
      "learning_rate": 8.25385634179609e-06,
      "loss": 0.0222,
      "step": 550000
    },
    {
      "epoch": 0.9000870629668178,
      "eval_loss": 0.014608035795390606,
      "eval_runtime": 6501.8264,
      "eval_samples_per_second": 158.088,
      "eval_steps_per_second": 15.809,
      "eval_sts-dev_pearson_cosine": 0.9653939487511833,
      "eval_sts-dev_spearman_cosine": 0.8832580914526206,
      "step": 550000
    },
    {
      "epoch": 0.9001197934054712,
      "grad_norm": 1.0802170038223267,
      "learning_rate": 8.253790449582574e-06,
      "loss": 0.031,
      "step": 550020
    },
    {
      "epoch": 0.9001525238441246,
      "grad_norm": 0.2486262321472168,
      "learning_rate": 8.253724557369056e-06,
      "loss": 0.0403,
      "step": 550040
    },
    {
      "epoch": 0.9001852542827778,
      "grad_norm": 0.5896999835968018,
      "learning_rate": 8.25365866515554e-06,
      "loss": 0.0286,
      "step": 550060
    },
    {
      "epoch": 0.9002179847214312,
      "grad_norm": 2.346525192260742,
      "learning_rate": 8.253592772942021e-06,
      "loss": 0.0339,
      "step": 550080
    },
    {
      "epoch": 0.9002507151600846,
      "grad_norm": 1.6303666830062866,
      "learning_rate": 8.253526880728505e-06,
      "loss": 0.0293,
      "step": 550100
    },
    {
      "epoch": 0.900283445598738,
      "grad_norm": 0.34596624970436096,
      "learning_rate": 8.253460988514987e-06,
      "loss": 0.0246,
      "step": 550120
    },
    {
      "epoch": 0.9003161760373912,
      "grad_norm": 1.6285384893417358,
      "learning_rate": 8.25339509630147e-06,
      "loss": 0.0344,
      "step": 550140
    },
    {
      "epoch": 0.9003489064760446,
      "grad_norm": 1.8588367700576782,
      "learning_rate": 8.253329204087952e-06,
      "loss": 0.0284,
      "step": 550160
    },
    {
      "epoch": 0.900381636914698,
      "grad_norm": 0.7750234603881836,
      "learning_rate": 8.253263311874436e-06,
      "loss": 0.0198,
      "step": 550180
    },
    {
      "epoch": 0.9004143673533512,
      "grad_norm": 0.7216748595237732,
      "learning_rate": 8.25319741966092e-06,
      "loss": 0.0192,
      "step": 550200
    },
    {
      "epoch": 0.9004470977920046,
      "grad_norm": 0.13812404870986938,
      "learning_rate": 8.253131527447401e-06,
      "loss": 0.02,
      "step": 550220
    },
    {
      "epoch": 0.900479828230658,
      "grad_norm": 0.30351874232292175,
      "learning_rate": 8.253065635233885e-06,
      "loss": 0.0292,
      "step": 550240
    },
    {
      "epoch": 0.9005125586693112,
      "grad_norm": 1.0863884687423706,
      "learning_rate": 8.252999743020369e-06,
      "loss": 0.0344,
      "step": 550260
    },
    {
      "epoch": 0.9005452891079646,
      "grad_norm": 1.1138640642166138,
      "learning_rate": 8.25293385080685e-06,
      "loss": 0.0319,
      "step": 550280
    },
    {
      "epoch": 0.900578019546618,
      "grad_norm": 0.7279845476150513,
      "learning_rate": 8.252867958593334e-06,
      "loss": 0.0345,
      "step": 550300
    },
    {
      "epoch": 0.9006107499852714,
      "grad_norm": 0.28245148062705994,
      "learning_rate": 8.252802066379818e-06,
      "loss": 0.0227,
      "step": 550320
    },
    {
      "epoch": 0.9006434804239246,
      "grad_norm": 0.9223577380180359,
      "learning_rate": 8.2527361741663e-06,
      "loss": 0.0348,
      "step": 550340
    },
    {
      "epoch": 0.900676210862578,
      "grad_norm": 0.5958155393600464,
      "learning_rate": 8.252670281952783e-06,
      "loss": 0.029,
      "step": 550360
    },
    {
      "epoch": 0.9007089413012314,
      "grad_norm": 0.7668684124946594,
      "learning_rate": 8.252604389739265e-06,
      "loss": 0.0202,
      "step": 550380
    },
    {
      "epoch": 0.9007416717398846,
      "grad_norm": 1.149767518043518,
      "learning_rate": 8.252538497525749e-06,
      "loss": 0.0378,
      "step": 550400
    },
    {
      "epoch": 0.900774402178538,
      "grad_norm": 0.4974653720855713,
      "learning_rate": 8.25247260531223e-06,
      "loss": 0.0278,
      "step": 550420
    },
    {
      "epoch": 0.9008071326171914,
      "grad_norm": 0.5095345377922058,
      "learning_rate": 8.252406713098714e-06,
      "loss": 0.0325,
      "step": 550440
    },
    {
      "epoch": 0.9008398630558446,
      "grad_norm": 1.2801403999328613,
      "learning_rate": 8.252340820885196e-06,
      "loss": 0.0345,
      "step": 550460
    },
    {
      "epoch": 0.900872593494498,
      "grad_norm": 3.1176674365997314,
      "learning_rate": 8.25227492867168e-06,
      "loss": 0.0373,
      "step": 550480
    },
    {
      "epoch": 0.9009053239331514,
      "grad_norm": 2.1690309047698975,
      "learning_rate": 8.252209036458162e-06,
      "loss": 0.0331,
      "step": 550500
    },
    {
      "epoch": 0.9009380543718047,
      "grad_norm": 0.8663259148597717,
      "learning_rate": 8.252143144244645e-06,
      "loss": 0.033,
      "step": 550520
    },
    {
      "epoch": 0.900970784810458,
      "grad_norm": 1.591011881828308,
      "learning_rate": 8.252077252031127e-06,
      "loss": 0.0238,
      "step": 550540
    },
    {
      "epoch": 0.9010035152491114,
      "grad_norm": 0.5012558698654175,
      "learning_rate": 8.25201135981761e-06,
      "loss": 0.0268,
      "step": 550560
    },
    {
      "epoch": 0.9010362456877647,
      "grad_norm": 1.11801016330719,
      "learning_rate": 8.251945467604094e-06,
      "loss": 0.0254,
      "step": 550580
    },
    {
      "epoch": 0.901068976126418,
      "grad_norm": 0.5811746120452881,
      "learning_rate": 8.251879575390576e-06,
      "loss": 0.0315,
      "step": 550600
    },
    {
      "epoch": 0.9011017065650714,
      "grad_norm": 0.1523088961839676,
      "learning_rate": 8.25181368317706e-06,
      "loss": 0.023,
      "step": 550620
    },
    {
      "epoch": 0.9011344370037248,
      "grad_norm": 1.427830696105957,
      "learning_rate": 8.251747790963543e-06,
      "loss": 0.0378,
      "step": 550640
    },
    {
      "epoch": 0.901167167442378,
      "grad_norm": 0.7622789144515991,
      "learning_rate": 8.251681898750025e-06,
      "loss": 0.0241,
      "step": 550660
    },
    {
      "epoch": 0.9011998978810314,
      "grad_norm": 0.6034154295921326,
      "learning_rate": 8.251616006536509e-06,
      "loss": 0.0203,
      "step": 550680
    },
    {
      "epoch": 0.9012326283196848,
      "grad_norm": 2.1405880451202393,
      "learning_rate": 8.251550114322992e-06,
      "loss": 0.0381,
      "step": 550700
    },
    {
      "epoch": 0.9012653587583381,
      "grad_norm": 1.097402572631836,
      "learning_rate": 8.251484222109474e-06,
      "loss": 0.0249,
      "step": 550720
    },
    {
      "epoch": 0.9012980891969914,
      "grad_norm": 1.7901726961135864,
      "learning_rate": 8.251418329895958e-06,
      "loss": 0.029,
      "step": 550740
    },
    {
      "epoch": 0.9013308196356448,
      "grad_norm": 0.5841827392578125,
      "learning_rate": 8.25135243768244e-06,
      "loss": 0.0318,
      "step": 550760
    },
    {
      "epoch": 0.9013635500742981,
      "grad_norm": 1.1036159992218018,
      "learning_rate": 8.251286545468923e-06,
      "loss": 0.0296,
      "step": 550780
    },
    {
      "epoch": 0.9013962805129514,
      "grad_norm": 0.6215741038322449,
      "learning_rate": 8.251220653255405e-06,
      "loss": 0.0244,
      "step": 550800
    },
    {
      "epoch": 0.9014290109516048,
      "grad_norm": 1.5128707885742188,
      "learning_rate": 8.251154761041889e-06,
      "loss": 0.022,
      "step": 550820
    },
    {
      "epoch": 0.9014617413902581,
      "grad_norm": 1.0542253255844116,
      "learning_rate": 8.25108886882837e-06,
      "loss": 0.024,
      "step": 550840
    },
    {
      "epoch": 0.9014944718289114,
      "grad_norm": 0.9347643256187439,
      "learning_rate": 8.251022976614854e-06,
      "loss": 0.031,
      "step": 550860
    },
    {
      "epoch": 0.9015272022675648,
      "grad_norm": 0.7546252608299255,
      "learning_rate": 8.250957084401336e-06,
      "loss": 0.0231,
      "step": 550880
    },
    {
      "epoch": 0.9015599327062181,
      "grad_norm": 1.028233528137207,
      "learning_rate": 8.25089119218782e-06,
      "loss": 0.0226,
      "step": 550900
    },
    {
      "epoch": 0.9015926631448715,
      "grad_norm": 0.5200424194335938,
      "learning_rate": 8.250825299974303e-06,
      "loss": 0.0293,
      "step": 550920
    },
    {
      "epoch": 0.9016253935835248,
      "grad_norm": 1.6863081455230713,
      "learning_rate": 8.250759407760785e-06,
      "loss": 0.0301,
      "step": 550940
    },
    {
      "epoch": 0.9016581240221782,
      "grad_norm": 0.09435533732175827,
      "learning_rate": 8.250693515547269e-06,
      "loss": 0.0199,
      "step": 550960
    },
    {
      "epoch": 0.9016908544608315,
      "grad_norm": 1.4241842031478882,
      "learning_rate": 8.25062762333375e-06,
      "loss": 0.0356,
      "step": 550980
    },
    {
      "epoch": 0.9017235848994848,
      "grad_norm": 3.4889304637908936,
      "learning_rate": 8.250561731120234e-06,
      "loss": 0.029,
      "step": 551000
    },
    {
      "epoch": 0.9017563153381382,
      "grad_norm": 1.1880064010620117,
      "learning_rate": 8.250495838906716e-06,
      "loss": 0.0216,
      "step": 551020
    },
    {
      "epoch": 0.9017890457767915,
      "grad_norm": 0.7852798700332642,
      "learning_rate": 8.2504299466932e-06,
      "loss": 0.0337,
      "step": 551040
    },
    {
      "epoch": 0.9018217762154448,
      "grad_norm": 1.3582626581192017,
      "learning_rate": 8.250364054479683e-06,
      "loss": 0.0314,
      "step": 551060
    },
    {
      "epoch": 0.9018545066540982,
      "grad_norm": 0.29384997487068176,
      "learning_rate": 8.250298162266165e-06,
      "loss": 0.0248,
      "step": 551080
    },
    {
      "epoch": 0.9018872370927515,
      "grad_norm": 0.1770666241645813,
      "learning_rate": 8.250232270052649e-06,
      "loss": 0.0266,
      "step": 551100
    },
    {
      "epoch": 0.9019199675314049,
      "grad_norm": 1.2189104557037354,
      "learning_rate": 8.250166377839132e-06,
      "loss": 0.0302,
      "step": 551120
    },
    {
      "epoch": 0.9019526979700582,
      "grad_norm": 0.08269309997558594,
      "learning_rate": 8.250100485625614e-06,
      "loss": 0.0285,
      "step": 551140
    },
    {
      "epoch": 0.9019854284087115,
      "grad_norm": 0.4105132818222046,
      "learning_rate": 8.250034593412098e-06,
      "loss": 0.0332,
      "step": 551160
    },
    {
      "epoch": 0.9020181588473649,
      "grad_norm": 1.3529969453811646,
      "learning_rate": 8.24996870119858e-06,
      "loss": 0.0327,
      "step": 551180
    },
    {
      "epoch": 0.9020508892860182,
      "grad_norm": 1.6309436559677124,
      "learning_rate": 8.249902808985063e-06,
      "loss": 0.0367,
      "step": 551200
    },
    {
      "epoch": 0.9020836197246715,
      "grad_norm": 1.0376873016357422,
      "learning_rate": 8.249836916771545e-06,
      "loss": 0.0286,
      "step": 551220
    },
    {
      "epoch": 0.9021163501633249,
      "grad_norm": 1.1527811288833618,
      "learning_rate": 8.249771024558029e-06,
      "loss": 0.0259,
      "step": 551240
    },
    {
      "epoch": 0.9021490806019782,
      "grad_norm": 0.7772173285484314,
      "learning_rate": 8.24970513234451e-06,
      "loss": 0.0255,
      "step": 551260
    },
    {
      "epoch": 0.9021818110406316,
      "grad_norm": 0.14856426417827606,
      "learning_rate": 8.249639240130994e-06,
      "loss": 0.0278,
      "step": 551280
    },
    {
      "epoch": 0.9022145414792849,
      "grad_norm": 0.6641857028007507,
      "learning_rate": 8.249573347917478e-06,
      "loss": 0.0236,
      "step": 551300
    },
    {
      "epoch": 0.9022472719179383,
      "grad_norm": 0.9396137595176697,
      "learning_rate": 8.24950745570396e-06,
      "loss": 0.0294,
      "step": 551320
    },
    {
      "epoch": 0.9022800023565916,
      "grad_norm": 0.33334407210350037,
      "learning_rate": 8.249441563490443e-06,
      "loss": 0.0268,
      "step": 551340
    },
    {
      "epoch": 0.9023127327952449,
      "grad_norm": 0.4097505509853363,
      "learning_rate": 8.249375671276925e-06,
      "loss": 0.026,
      "step": 551360
    },
    {
      "epoch": 0.9023454632338983,
      "grad_norm": 2.460695505142212,
      "learning_rate": 8.249309779063409e-06,
      "loss": 0.0283,
      "step": 551380
    },
    {
      "epoch": 0.9023781936725516,
      "grad_norm": 0.3341403305530548,
      "learning_rate": 8.24924388684989e-06,
      "loss": 0.0322,
      "step": 551400
    },
    {
      "epoch": 0.9024109241112049,
      "grad_norm": 1.1016122102737427,
      "learning_rate": 8.249177994636374e-06,
      "loss": 0.0351,
      "step": 551420
    },
    {
      "epoch": 0.9024436545498583,
      "grad_norm": 0.6831485629081726,
      "learning_rate": 8.249112102422858e-06,
      "loss": 0.0235,
      "step": 551440
    },
    {
      "epoch": 0.9024763849885116,
      "grad_norm": 0.7003922462463379,
      "learning_rate": 8.24904621020934e-06,
      "loss": 0.032,
      "step": 551460
    },
    {
      "epoch": 0.9025091154271649,
      "grad_norm": 10.709710121154785,
      "learning_rate": 8.248980317995823e-06,
      "loss": 0.0403,
      "step": 551480
    },
    {
      "epoch": 0.9025418458658183,
      "grad_norm": 1.0608998537063599,
      "learning_rate": 8.248914425782307e-06,
      "loss": 0.0313,
      "step": 551500
    },
    {
      "epoch": 0.9025745763044717,
      "grad_norm": 0.6838059425354004,
      "learning_rate": 8.248848533568789e-06,
      "loss": 0.04,
      "step": 551520
    },
    {
      "epoch": 0.902607306743125,
      "grad_norm": 0.3015550971031189,
      "learning_rate": 8.248782641355272e-06,
      "loss": 0.0346,
      "step": 551540
    },
    {
      "epoch": 0.9026400371817783,
      "grad_norm": 2.629566192626953,
      "learning_rate": 8.248716749141754e-06,
      "loss": 0.0286,
      "step": 551560
    },
    {
      "epoch": 0.9026727676204317,
      "grad_norm": 1.3546080589294434,
      "learning_rate": 8.248650856928238e-06,
      "loss": 0.0273,
      "step": 551580
    },
    {
      "epoch": 0.902705498059085,
      "grad_norm": 1.6517412662506104,
      "learning_rate": 8.24858496471472e-06,
      "loss": 0.0334,
      "step": 551600
    },
    {
      "epoch": 0.9027382284977383,
      "grad_norm": 1.2542788982391357,
      "learning_rate": 8.248519072501203e-06,
      "loss": 0.0241,
      "step": 551620
    },
    {
      "epoch": 0.9027709589363917,
      "grad_norm": 1.2271956205368042,
      "learning_rate": 8.248453180287687e-06,
      "loss": 0.0225,
      "step": 551640
    },
    {
      "epoch": 0.902803689375045,
      "grad_norm": 0.48393312096595764,
      "learning_rate": 8.248387288074169e-06,
      "loss": 0.0241,
      "step": 551660
    },
    {
      "epoch": 0.9028364198136983,
      "grad_norm": 0.3292083442211151,
      "learning_rate": 8.248321395860652e-06,
      "loss": 0.0274,
      "step": 551680
    },
    {
      "epoch": 0.9028691502523517,
      "grad_norm": 1.742337703704834,
      "learning_rate": 8.248255503647134e-06,
      "loss": 0.0237,
      "step": 551700
    },
    {
      "epoch": 0.902901880691005,
      "grad_norm": 0.8689019680023193,
      "learning_rate": 8.248189611433618e-06,
      "loss": 0.0265,
      "step": 551720
    },
    {
      "epoch": 0.9029346111296583,
      "grad_norm": 0.5895596742630005,
      "learning_rate": 8.2481237192201e-06,
      "loss": 0.0294,
      "step": 551740
    },
    {
      "epoch": 0.9029673415683117,
      "grad_norm": 0.9266477823257446,
      "learning_rate": 8.248057827006583e-06,
      "loss": 0.0275,
      "step": 551760
    },
    {
      "epoch": 0.9030000720069651,
      "grad_norm": 0.39880457520484924,
      "learning_rate": 8.247991934793065e-06,
      "loss": 0.0258,
      "step": 551780
    },
    {
      "epoch": 0.9030328024456183,
      "grad_norm": 0.3269397020339966,
      "learning_rate": 8.247926042579549e-06,
      "loss": 0.0257,
      "step": 551800
    },
    {
      "epoch": 0.9030655328842717,
      "grad_norm": 1.2957007884979248,
      "learning_rate": 8.24786015036603e-06,
      "loss": 0.0315,
      "step": 551820
    },
    {
      "epoch": 0.9030982633229251,
      "grad_norm": 0.9417505264282227,
      "learning_rate": 8.247794258152514e-06,
      "loss": 0.0253,
      "step": 551840
    },
    {
      "epoch": 0.9031309937615783,
      "grad_norm": 0.13794174790382385,
      "learning_rate": 8.247728365938998e-06,
      "loss": 0.0288,
      "step": 551860
    },
    {
      "epoch": 0.9031637242002317,
      "grad_norm": 4.412942886352539,
      "learning_rate": 8.24766247372548e-06,
      "loss": 0.0309,
      "step": 551880
    },
    {
      "epoch": 0.9031964546388851,
      "grad_norm": 5.9430460929870605,
      "learning_rate": 8.247596581511963e-06,
      "loss": 0.0263,
      "step": 551900
    },
    {
      "epoch": 0.9032291850775384,
      "grad_norm": 0.46061909198760986,
      "learning_rate": 8.247530689298447e-06,
      "loss": 0.025,
      "step": 551920
    },
    {
      "epoch": 0.9032619155161917,
      "grad_norm": 0.27717483043670654,
      "learning_rate": 8.247464797084929e-06,
      "loss": 0.0379,
      "step": 551940
    },
    {
      "epoch": 0.9032946459548451,
      "grad_norm": 0.2542276680469513,
      "learning_rate": 8.247398904871413e-06,
      "loss": 0.0312,
      "step": 551960
    },
    {
      "epoch": 0.9033273763934985,
      "grad_norm": 0.40400928258895874,
      "learning_rate": 8.247333012657896e-06,
      "loss": 0.0244,
      "step": 551980
    },
    {
      "epoch": 0.9033601068321517,
      "grad_norm": 3.8683323860168457,
      "learning_rate": 8.247267120444378e-06,
      "loss": 0.0405,
      "step": 552000
    },
    {
      "epoch": 0.9033928372708051,
      "grad_norm": 1.1016610860824585,
      "learning_rate": 8.247201228230862e-06,
      "loss": 0.0341,
      "step": 552020
    },
    {
      "epoch": 0.9034255677094585,
      "grad_norm": 2.082542896270752,
      "learning_rate": 8.247135336017343e-06,
      "loss": 0.0272,
      "step": 552040
    },
    {
      "epoch": 0.9034582981481117,
      "grad_norm": 2.2806618213653564,
      "learning_rate": 8.247069443803827e-06,
      "loss": 0.0268,
      "step": 552060
    },
    {
      "epoch": 0.9034910285867651,
      "grad_norm": 0.2777177095413208,
      "learning_rate": 8.247003551590309e-06,
      "loss": 0.0291,
      "step": 552080
    },
    {
      "epoch": 0.9035237590254185,
      "grad_norm": 0.4001029431819916,
      "learning_rate": 8.246937659376793e-06,
      "loss": 0.0327,
      "step": 552100
    },
    {
      "epoch": 0.9035564894640717,
      "grad_norm": 0.45917651057243347,
      "learning_rate": 8.246871767163274e-06,
      "loss": 0.0353,
      "step": 552120
    },
    {
      "epoch": 0.9035892199027251,
      "grad_norm": 1.4256435632705688,
      "learning_rate": 8.246805874949758e-06,
      "loss": 0.0283,
      "step": 552140
    },
    {
      "epoch": 0.9036219503413785,
      "grad_norm": 0.639808714389801,
      "learning_rate": 8.24673998273624e-06,
      "loss": 0.0235,
      "step": 552160
    },
    {
      "epoch": 0.9036546807800319,
      "grad_norm": 0.8425635099411011,
      "learning_rate": 8.246674090522724e-06,
      "loss": 0.0247,
      "step": 552180
    },
    {
      "epoch": 0.9036874112186851,
      "grad_norm": 0.3258146643638611,
      "learning_rate": 8.246608198309205e-06,
      "loss": 0.0206,
      "step": 552200
    },
    {
      "epoch": 0.9037201416573385,
      "grad_norm": 2.027296304702759,
      "learning_rate": 8.246542306095689e-06,
      "loss": 0.032,
      "step": 552220
    },
    {
      "epoch": 0.9037528720959919,
      "grad_norm": 0.9078141450881958,
      "learning_rate": 8.246476413882173e-06,
      "loss": 0.0194,
      "step": 552240
    },
    {
      "epoch": 0.9037856025346451,
      "grad_norm": 1.6035910844802856,
      "learning_rate": 8.246410521668654e-06,
      "loss": 0.0259,
      "step": 552260
    },
    {
      "epoch": 0.9038183329732985,
      "grad_norm": 0.9646168947219849,
      "learning_rate": 8.246344629455138e-06,
      "loss": 0.0283,
      "step": 552280
    },
    {
      "epoch": 0.9038510634119519,
      "grad_norm": 0.9414987564086914,
      "learning_rate": 8.246278737241622e-06,
      "loss": 0.0326,
      "step": 552300
    },
    {
      "epoch": 0.9038837938506051,
      "grad_norm": 1.0667994022369385,
      "learning_rate": 8.246212845028104e-06,
      "loss": 0.0241,
      "step": 552320
    },
    {
      "epoch": 0.9039165242892585,
      "grad_norm": 0.6775602102279663,
      "learning_rate": 8.246146952814587e-06,
      "loss": 0.0287,
      "step": 552340
    },
    {
      "epoch": 0.9039492547279119,
      "grad_norm": 0.5240116119384766,
      "learning_rate": 8.24608106060107e-06,
      "loss": 0.0322,
      "step": 552360
    },
    {
      "epoch": 0.9039819851665652,
      "grad_norm": 0.5989300012588501,
      "learning_rate": 8.246015168387553e-06,
      "loss": 0.0219,
      "step": 552380
    },
    {
      "epoch": 0.9040147156052185,
      "grad_norm": 0.24052681028842926,
      "learning_rate": 8.245949276174036e-06,
      "loss": 0.0352,
      "step": 552400
    },
    {
      "epoch": 0.9040474460438719,
      "grad_norm": 0.4943685233592987,
      "learning_rate": 8.245883383960518e-06,
      "loss": 0.0302,
      "step": 552420
    },
    {
      "epoch": 0.9040801764825253,
      "grad_norm": 0.45891880989074707,
      "learning_rate": 8.245817491747002e-06,
      "loss": 0.0417,
      "step": 552440
    },
    {
      "epoch": 0.9041129069211785,
      "grad_norm": 0.6923113465309143,
      "learning_rate": 8.245751599533484e-06,
      "loss": 0.0219,
      "step": 552460
    },
    {
      "epoch": 0.9041456373598319,
      "grad_norm": 1.3493951559066772,
      "learning_rate": 8.245685707319967e-06,
      "loss": 0.0335,
      "step": 552480
    },
    {
      "epoch": 0.9041783677984853,
      "grad_norm": 1.216424584388733,
      "learning_rate": 8.245619815106449e-06,
      "loss": 0.0282,
      "step": 552500
    },
    {
      "epoch": 0.9042110982371385,
      "grad_norm": 0.8325062990188599,
      "learning_rate": 8.245553922892933e-06,
      "loss": 0.0248,
      "step": 552520
    },
    {
      "epoch": 0.9042438286757919,
      "grad_norm": 0.896580159664154,
      "learning_rate": 8.245488030679415e-06,
      "loss": 0.0287,
      "step": 552540
    },
    {
      "epoch": 0.9042765591144453,
      "grad_norm": 0.996102511882782,
      "learning_rate": 8.245422138465898e-06,
      "loss": 0.0415,
      "step": 552560
    },
    {
      "epoch": 0.9043092895530986,
      "grad_norm": 2.062122344970703,
      "learning_rate": 8.24535624625238e-06,
      "loss": 0.0233,
      "step": 552580
    },
    {
      "epoch": 0.9043420199917519,
      "grad_norm": 1.082656741142273,
      "learning_rate": 8.245290354038864e-06,
      "loss": 0.0315,
      "step": 552600
    },
    {
      "epoch": 0.9043747504304053,
      "grad_norm": 1.6867525577545166,
      "learning_rate": 8.245224461825345e-06,
      "loss": 0.0296,
      "step": 552620
    },
    {
      "epoch": 0.9044074808690586,
      "grad_norm": 0.32774844765663147,
      "learning_rate": 8.245158569611829e-06,
      "loss": 0.0355,
      "step": 552640
    },
    {
      "epoch": 0.9044402113077119,
      "grad_norm": 5.5929341316223145,
      "learning_rate": 8.245092677398313e-06,
      "loss": 0.0261,
      "step": 552660
    },
    {
      "epoch": 0.9044729417463653,
      "grad_norm": 0.38782820105552673,
      "learning_rate": 8.245026785184795e-06,
      "loss": 0.0333,
      "step": 552680
    },
    {
      "epoch": 0.9045056721850186,
      "grad_norm": 1.3307795524597168,
      "learning_rate": 8.244960892971278e-06,
      "loss": 0.0273,
      "step": 552700
    },
    {
      "epoch": 0.9045384026236719,
      "grad_norm": 0.9208396673202515,
      "learning_rate": 8.244895000757762e-06,
      "loss": 0.0322,
      "step": 552720
    },
    {
      "epoch": 0.9045711330623253,
      "grad_norm": 0.22142280638217926,
      "learning_rate": 8.244829108544244e-06,
      "loss": 0.0366,
      "step": 552740
    },
    {
      "epoch": 0.9046038635009787,
      "grad_norm": 0.5367141962051392,
      "learning_rate": 8.244763216330727e-06,
      "loss": 0.0276,
      "step": 552760
    },
    {
      "epoch": 0.904636593939632,
      "grad_norm": 1.199613094329834,
      "learning_rate": 8.24469732411721e-06,
      "loss": 0.0265,
      "step": 552780
    },
    {
      "epoch": 0.9046693243782853,
      "grad_norm": 0.8747606873512268,
      "learning_rate": 8.244631431903693e-06,
      "loss": 0.0419,
      "step": 552800
    },
    {
      "epoch": 0.9047020548169387,
      "grad_norm": 0.62901371717453,
      "learning_rate": 8.244565539690176e-06,
      "loss": 0.0336,
      "step": 552820
    },
    {
      "epoch": 0.904734785255592,
      "grad_norm": 1.276455044746399,
      "learning_rate": 8.244499647476658e-06,
      "loss": 0.0351,
      "step": 552840
    },
    {
      "epoch": 0.9047675156942453,
      "grad_norm": 0.6331718564033508,
      "learning_rate": 8.244433755263142e-06,
      "loss": 0.0282,
      "step": 552860
    },
    {
      "epoch": 0.9048002461328987,
      "grad_norm": 0.48148608207702637,
      "learning_rate": 8.244367863049624e-06,
      "loss": 0.0315,
      "step": 552880
    },
    {
      "epoch": 0.904832976571552,
      "grad_norm": 0.5400993824005127,
      "learning_rate": 8.244301970836107e-06,
      "loss": 0.0345,
      "step": 552900
    },
    {
      "epoch": 0.9048657070102053,
      "grad_norm": 0.8111248016357422,
      "learning_rate": 8.244236078622589e-06,
      "loss": 0.0392,
      "step": 552920
    },
    {
      "epoch": 0.9048984374488587,
      "grad_norm": 1.8523964881896973,
      "learning_rate": 8.244170186409073e-06,
      "loss": 0.0324,
      "step": 552940
    },
    {
      "epoch": 0.904931167887512,
      "grad_norm": 0.5642110109329224,
      "learning_rate": 8.244104294195555e-06,
      "loss": 0.0334,
      "step": 552960
    },
    {
      "epoch": 0.9049638983261654,
      "grad_norm": 0.5967097878456116,
      "learning_rate": 8.244038401982038e-06,
      "loss": 0.0278,
      "step": 552980
    },
    {
      "epoch": 0.9049966287648187,
      "grad_norm": 2.883734941482544,
      "learning_rate": 8.24397250976852e-06,
      "loss": 0.0306,
      "step": 553000
    },
    {
      "epoch": 0.905029359203472,
      "grad_norm": 0.14180085062980652,
      "learning_rate": 8.243906617555004e-06,
      "loss": 0.0309,
      "step": 553020
    },
    {
      "epoch": 0.9050620896421254,
      "grad_norm": 0.40149030089378357,
      "learning_rate": 8.243840725341487e-06,
      "loss": 0.0301,
      "step": 553040
    },
    {
      "epoch": 0.9050948200807787,
      "grad_norm": 0.23331588506698608,
      "learning_rate": 8.243774833127969e-06,
      "loss": 0.0261,
      "step": 553060
    },
    {
      "epoch": 0.9051275505194321,
      "grad_norm": 1.0892841815948486,
      "learning_rate": 8.243708940914453e-06,
      "loss": 0.0242,
      "step": 553080
    },
    {
      "epoch": 0.9051602809580854,
      "grad_norm": 1.0942414999008179,
      "learning_rate": 8.243643048700936e-06,
      "loss": 0.0375,
      "step": 553100
    },
    {
      "epoch": 0.9051930113967387,
      "grad_norm": 0.9404061436653137,
      "learning_rate": 8.243577156487418e-06,
      "loss": 0.0285,
      "step": 553120
    },
    {
      "epoch": 0.9052257418353921,
      "grad_norm": 1.4531910419464111,
      "learning_rate": 8.243511264273902e-06,
      "loss": 0.0275,
      "step": 553140
    },
    {
      "epoch": 0.9052584722740454,
      "grad_norm": 0.5197938084602356,
      "learning_rate": 8.243445372060385e-06,
      "loss": 0.0314,
      "step": 553160
    },
    {
      "epoch": 0.9052912027126988,
      "grad_norm": 0.06576835364103317,
      "learning_rate": 8.243379479846867e-06,
      "loss": 0.0228,
      "step": 553180
    },
    {
      "epoch": 0.9053239331513521,
      "grad_norm": 1.0885323286056519,
      "learning_rate": 8.243313587633351e-06,
      "loss": 0.0316,
      "step": 553200
    },
    {
      "epoch": 0.9053566635900054,
      "grad_norm": 0.9062804579734802,
      "learning_rate": 8.243247695419833e-06,
      "loss": 0.0214,
      "step": 553220
    },
    {
      "epoch": 0.9053893940286588,
      "grad_norm": 0.32051989436149597,
      "learning_rate": 8.243181803206316e-06,
      "loss": 0.0215,
      "step": 553240
    },
    {
      "epoch": 0.9054221244673121,
      "grad_norm": 0.3094969689846039,
      "learning_rate": 8.243115910992798e-06,
      "loss": 0.0219,
      "step": 553260
    },
    {
      "epoch": 0.9054548549059654,
      "grad_norm": 1.4573419094085693,
      "learning_rate": 8.243050018779282e-06,
      "loss": 0.0267,
      "step": 553280
    },
    {
      "epoch": 0.9054875853446188,
      "grad_norm": 1.232087254524231,
      "learning_rate": 8.242984126565764e-06,
      "loss": 0.0298,
      "step": 553300
    },
    {
      "epoch": 0.9055203157832721,
      "grad_norm": 0.692294180393219,
      "learning_rate": 8.242918234352247e-06,
      "loss": 0.0312,
      "step": 553320
    },
    {
      "epoch": 0.9055530462219255,
      "grad_norm": 1.9474642276763916,
      "learning_rate": 8.24285234213873e-06,
      "loss": 0.0295,
      "step": 553340
    },
    {
      "epoch": 0.9055857766605788,
      "grad_norm": 1.1010363101959229,
      "learning_rate": 8.242786449925213e-06,
      "loss": 0.0282,
      "step": 553360
    },
    {
      "epoch": 0.9056185070992322,
      "grad_norm": 1.855315089225769,
      "learning_rate": 8.242720557711696e-06,
      "loss": 0.0337,
      "step": 553380
    },
    {
      "epoch": 0.9056512375378855,
      "grad_norm": 0.690209150314331,
      "learning_rate": 8.242654665498178e-06,
      "loss": 0.0354,
      "step": 553400
    },
    {
      "epoch": 0.9056839679765388,
      "grad_norm": 0.6466368436813354,
      "learning_rate": 8.242588773284662e-06,
      "loss": 0.0178,
      "step": 553420
    },
    {
      "epoch": 0.9057166984151922,
      "grad_norm": 0.6820545792579651,
      "learning_rate": 8.242522881071144e-06,
      "loss": 0.0238,
      "step": 553440
    },
    {
      "epoch": 0.9057494288538455,
      "grad_norm": 1.1578701734542847,
      "learning_rate": 8.242456988857627e-06,
      "loss": 0.031,
      "step": 553460
    },
    {
      "epoch": 0.9057821592924988,
      "grad_norm": 0.16369889676570892,
      "learning_rate": 8.242391096644111e-06,
      "loss": 0.0289,
      "step": 553480
    },
    {
      "epoch": 0.9058148897311522,
      "grad_norm": 0.7625933885574341,
      "learning_rate": 8.242325204430593e-06,
      "loss": 0.0204,
      "step": 553500
    },
    {
      "epoch": 0.9058476201698055,
      "grad_norm": 0.6603335738182068,
      "learning_rate": 8.242259312217076e-06,
      "loss": 0.0186,
      "step": 553520
    },
    {
      "epoch": 0.9058803506084588,
      "grad_norm": 0.2596680223941803,
      "learning_rate": 8.24219342000356e-06,
      "loss": 0.0297,
      "step": 553540
    },
    {
      "epoch": 0.9059130810471122,
      "grad_norm": 2.1859970092773438,
      "learning_rate": 8.242127527790042e-06,
      "loss": 0.0394,
      "step": 553560
    },
    {
      "epoch": 0.9059458114857656,
      "grad_norm": 0.884549081325531,
      "learning_rate": 8.242061635576525e-06,
      "loss": 0.0193,
      "step": 553580
    },
    {
      "epoch": 0.9059785419244188,
      "grad_norm": 0.18293657898902893,
      "learning_rate": 8.241995743363007e-06,
      "loss": 0.0352,
      "step": 553600
    },
    {
      "epoch": 0.9060112723630722,
      "grad_norm": 2.005084991455078,
      "learning_rate": 8.241929851149491e-06,
      "loss": 0.0264,
      "step": 553620
    },
    {
      "epoch": 0.9060440028017256,
      "grad_norm": 1.6128019094467163,
      "learning_rate": 8.241863958935973e-06,
      "loss": 0.0322,
      "step": 553640
    },
    {
      "epoch": 0.9060767332403789,
      "grad_norm": 0.541814386844635,
      "learning_rate": 8.241798066722456e-06,
      "loss": 0.0399,
      "step": 553660
    },
    {
      "epoch": 0.9061094636790322,
      "grad_norm": 0.6738142371177673,
      "learning_rate": 8.241732174508938e-06,
      "loss": 0.0353,
      "step": 553680
    },
    {
      "epoch": 0.9061421941176856,
      "grad_norm": 1.226399302482605,
      "learning_rate": 8.241666282295422e-06,
      "loss": 0.0371,
      "step": 553700
    },
    {
      "epoch": 0.9061749245563389,
      "grad_norm": 0.7047261595726013,
      "learning_rate": 8.241600390081904e-06,
      "loss": 0.0267,
      "step": 553720
    },
    {
      "epoch": 0.9062076549949922,
      "grad_norm": 0.7585188746452332,
      "learning_rate": 8.241534497868387e-06,
      "loss": 0.0339,
      "step": 553740
    },
    {
      "epoch": 0.9062403854336456,
      "grad_norm": 0.2693391740322113,
      "learning_rate": 8.241468605654871e-06,
      "loss": 0.0291,
      "step": 553760
    },
    {
      "epoch": 0.906273115872299,
      "grad_norm": 0.2065201997756958,
      "learning_rate": 8.241402713441353e-06,
      "loss": 0.0208,
      "step": 553780
    },
    {
      "epoch": 0.9063058463109522,
      "grad_norm": 0.9388627409934998,
      "learning_rate": 8.241336821227836e-06,
      "loss": 0.0211,
      "step": 553800
    },
    {
      "epoch": 0.9063385767496056,
      "grad_norm": 2.5327186584472656,
      "learning_rate": 8.241270929014318e-06,
      "loss": 0.0321,
      "step": 553820
    },
    {
      "epoch": 0.906371307188259,
      "grad_norm": 1.5390325784683228,
      "learning_rate": 8.241205036800802e-06,
      "loss": 0.0258,
      "step": 553840
    },
    {
      "epoch": 0.9064040376269122,
      "grad_norm": 3.0975232124328613,
      "learning_rate": 8.241139144587284e-06,
      "loss": 0.0263,
      "step": 553860
    },
    {
      "epoch": 0.9064367680655656,
      "grad_norm": 1.2900322675704956,
      "learning_rate": 8.241073252373767e-06,
      "loss": 0.0327,
      "step": 553880
    },
    {
      "epoch": 0.906469498504219,
      "grad_norm": 0.7896534204483032,
      "learning_rate": 8.241007360160251e-06,
      "loss": 0.0297,
      "step": 553900
    },
    {
      "epoch": 0.9065022289428722,
      "grad_norm": 1.699607014656067,
      "learning_rate": 8.240941467946733e-06,
      "loss": 0.0259,
      "step": 553920
    },
    {
      "epoch": 0.9065349593815256,
      "grad_norm": 0.9382084012031555,
      "learning_rate": 8.240875575733216e-06,
      "loss": 0.0435,
      "step": 553940
    },
    {
      "epoch": 0.906567689820179,
      "grad_norm": 0.6955414414405823,
      "learning_rate": 8.2408096835197e-06,
      "loss": 0.0283,
      "step": 553960
    },
    {
      "epoch": 0.9066004202588324,
      "grad_norm": 1.3021214008331299,
      "learning_rate": 8.240743791306182e-06,
      "loss": 0.0372,
      "step": 553980
    },
    {
      "epoch": 0.9066331506974856,
      "grad_norm": 0.4879361689090729,
      "learning_rate": 8.240677899092666e-06,
      "loss": 0.03,
      "step": 554000
    },
    {
      "epoch": 0.906665881136139,
      "grad_norm": 1.257690191268921,
      "learning_rate": 8.240612006879147e-06,
      "loss": 0.0296,
      "step": 554020
    },
    {
      "epoch": 0.9066986115747924,
      "grad_norm": 0.7203492522239685,
      "learning_rate": 8.240546114665631e-06,
      "loss": 0.0321,
      "step": 554040
    },
    {
      "epoch": 0.9067313420134456,
      "grad_norm": 1.5977002382278442,
      "learning_rate": 8.240480222452113e-06,
      "loss": 0.0352,
      "step": 554060
    },
    {
      "epoch": 0.906764072452099,
      "grad_norm": 1.038833498954773,
      "learning_rate": 8.240414330238596e-06,
      "loss": 0.0244,
      "step": 554080
    },
    {
      "epoch": 0.9067968028907524,
      "grad_norm": 0.2960529327392578,
      "learning_rate": 8.24034843802508e-06,
      "loss": 0.0287,
      "step": 554100
    },
    {
      "epoch": 0.9068295333294056,
      "grad_norm": 1.09328293800354,
      "learning_rate": 8.240282545811562e-06,
      "loss": 0.0227,
      "step": 554120
    },
    {
      "epoch": 0.906862263768059,
      "grad_norm": 0.6672782301902771,
      "learning_rate": 8.240216653598046e-06,
      "loss": 0.0239,
      "step": 554140
    },
    {
      "epoch": 0.9068949942067124,
      "grad_norm": 0.3095718324184418,
      "learning_rate": 8.240150761384527e-06,
      "loss": 0.0429,
      "step": 554160
    },
    {
      "epoch": 0.9069277246453658,
      "grad_norm": 0.8343818187713623,
      "learning_rate": 8.240084869171011e-06,
      "loss": 0.0418,
      "step": 554180
    },
    {
      "epoch": 0.906960455084019,
      "grad_norm": 0.8339144587516785,
      "learning_rate": 8.240018976957493e-06,
      "loss": 0.0256,
      "step": 554200
    },
    {
      "epoch": 0.9069931855226724,
      "grad_norm": 0.6408332586288452,
      "learning_rate": 8.239953084743977e-06,
      "loss": 0.0335,
      "step": 554220
    },
    {
      "epoch": 0.9070259159613258,
      "grad_norm": 2.4707276821136475,
      "learning_rate": 8.239887192530458e-06,
      "loss": 0.034,
      "step": 554240
    },
    {
      "epoch": 0.907058646399979,
      "grad_norm": 0.3577839434146881,
      "learning_rate": 8.239821300316942e-06,
      "loss": 0.0249,
      "step": 554260
    },
    {
      "epoch": 0.9070913768386324,
      "grad_norm": 1.1892650127410889,
      "learning_rate": 8.239755408103426e-06,
      "loss": 0.0258,
      "step": 554280
    },
    {
      "epoch": 0.9071241072772858,
      "grad_norm": 0.28779998421669006,
      "learning_rate": 8.239689515889907e-06,
      "loss": 0.0335,
      "step": 554300
    },
    {
      "epoch": 0.907156837715939,
      "grad_norm": 0.31932154297828674,
      "learning_rate": 8.239623623676391e-06,
      "loss": 0.0231,
      "step": 554320
    },
    {
      "epoch": 0.9071895681545924,
      "grad_norm": 1.0902109146118164,
      "learning_rate": 8.239557731462875e-06,
      "loss": 0.0354,
      "step": 554340
    },
    {
      "epoch": 0.9072222985932458,
      "grad_norm": 0.881921648979187,
      "learning_rate": 8.239491839249357e-06,
      "loss": 0.0316,
      "step": 554360
    },
    {
      "epoch": 0.9072550290318991,
      "grad_norm": 0.31525298953056335,
      "learning_rate": 8.23942594703584e-06,
      "loss": 0.0267,
      "step": 554380
    },
    {
      "epoch": 0.9072877594705524,
      "grad_norm": 0.9434558153152466,
      "learning_rate": 8.239360054822322e-06,
      "loss": 0.033,
      "step": 554400
    },
    {
      "epoch": 0.9073204899092058,
      "grad_norm": 0.963737964630127,
      "learning_rate": 8.239294162608806e-06,
      "loss": 0.0238,
      "step": 554420
    },
    {
      "epoch": 0.9073532203478591,
      "grad_norm": 0.7116775512695312,
      "learning_rate": 8.23922827039529e-06,
      "loss": 0.0253,
      "step": 554440
    },
    {
      "epoch": 0.9073859507865124,
      "grad_norm": 0.3745262920856476,
      "learning_rate": 8.239162378181771e-06,
      "loss": 0.0346,
      "step": 554460
    },
    {
      "epoch": 0.9074186812251658,
      "grad_norm": 0.6514769196510315,
      "learning_rate": 8.239096485968255e-06,
      "loss": 0.0232,
      "step": 554480
    },
    {
      "epoch": 0.9074514116638192,
      "grad_norm": 1.4128268957138062,
      "learning_rate": 8.239030593754737e-06,
      "loss": 0.0228,
      "step": 554500
    },
    {
      "epoch": 0.9074841421024724,
      "grad_norm": 2.8686838150024414,
      "learning_rate": 8.23896470154122e-06,
      "loss": 0.026,
      "step": 554520
    },
    {
      "epoch": 0.9075168725411258,
      "grad_norm": 0.5143681764602661,
      "learning_rate": 8.238898809327702e-06,
      "loss": 0.0252,
      "step": 554540
    },
    {
      "epoch": 0.9075496029797792,
      "grad_norm": 1.9953186511993408,
      "learning_rate": 8.238832917114186e-06,
      "loss": 0.0302,
      "step": 554560
    },
    {
      "epoch": 0.9075823334184324,
      "grad_norm": 0.9602795839309692,
      "learning_rate": 8.238767024900668e-06,
      "loss": 0.0336,
      "step": 554580
    },
    {
      "epoch": 0.9076150638570858,
      "grad_norm": 0.7980068922042847,
      "learning_rate": 8.238701132687151e-06,
      "loss": 0.0245,
      "step": 554600
    },
    {
      "epoch": 0.9076477942957392,
      "grad_norm": 0.3432011604309082,
      "learning_rate": 8.238635240473633e-06,
      "loss": 0.0356,
      "step": 554620
    },
    {
      "epoch": 0.9076805247343925,
      "grad_norm": 1.7284821271896362,
      "learning_rate": 8.238569348260117e-06,
      "loss": 0.0271,
      "step": 554640
    },
    {
      "epoch": 0.9077132551730458,
      "grad_norm": 2.936344861984253,
      "learning_rate": 8.238503456046598e-06,
      "loss": 0.0306,
      "step": 554660
    },
    {
      "epoch": 0.9077459856116992,
      "grad_norm": 2.4380595684051514,
      "learning_rate": 8.238437563833082e-06,
      "loss": 0.039,
      "step": 554680
    },
    {
      "epoch": 0.9077787160503525,
      "grad_norm": 0.7138835787773132,
      "learning_rate": 8.238371671619566e-06,
      "loss": 0.0267,
      "step": 554700
    },
    {
      "epoch": 0.9078114464890058,
      "grad_norm": 0.5586949586868286,
      "learning_rate": 8.238305779406048e-06,
      "loss": 0.017,
      "step": 554720
    },
    {
      "epoch": 0.9078441769276592,
      "grad_norm": 0.4794216752052307,
      "learning_rate": 8.238239887192531e-06,
      "loss": 0.036,
      "step": 554740
    },
    {
      "epoch": 0.9078769073663125,
      "grad_norm": 1.6191349029541016,
      "learning_rate": 8.238173994979015e-06,
      "loss": 0.0414,
      "step": 554760
    },
    {
      "epoch": 0.9079096378049658,
      "grad_norm": 0.5723696351051331,
      "learning_rate": 8.238108102765497e-06,
      "loss": 0.0324,
      "step": 554780
    },
    {
      "epoch": 0.9079423682436192,
      "grad_norm": 0.5420365929603577,
      "learning_rate": 8.23804221055198e-06,
      "loss": 0.0323,
      "step": 554800
    },
    {
      "epoch": 0.9079750986822726,
      "grad_norm": 0.9671574234962463,
      "learning_rate": 8.237976318338464e-06,
      "loss": 0.0416,
      "step": 554820
    },
    {
      "epoch": 0.9080078291209259,
      "grad_norm": 4.877580642700195,
      "learning_rate": 8.237910426124946e-06,
      "loss": 0.0245,
      "step": 554840
    },
    {
      "epoch": 0.9080405595595792,
      "grad_norm": 0.4496277868747711,
      "learning_rate": 8.23784453391143e-06,
      "loss": 0.0371,
      "step": 554860
    },
    {
      "epoch": 0.9080732899982326,
      "grad_norm": 0.6441579461097717,
      "learning_rate": 8.237778641697911e-06,
      "loss": 0.0235,
      "step": 554880
    },
    {
      "epoch": 0.9081060204368859,
      "grad_norm": 0.6599580645561218,
      "learning_rate": 8.237712749484395e-06,
      "loss": 0.0326,
      "step": 554900
    },
    {
      "epoch": 0.9081387508755392,
      "grad_norm": 1.5676084756851196,
      "learning_rate": 8.237646857270877e-06,
      "loss": 0.0256,
      "step": 554920
    },
    {
      "epoch": 0.9081714813141926,
      "grad_norm": 2.645019054412842,
      "learning_rate": 8.23758096505736e-06,
      "loss": 0.0301,
      "step": 554940
    },
    {
      "epoch": 0.9082042117528459,
      "grad_norm": 0.533531665802002,
      "learning_rate": 8.237515072843842e-06,
      "loss": 0.0332,
      "step": 554960
    },
    {
      "epoch": 0.9082369421914992,
      "grad_norm": 0.7318982481956482,
      "learning_rate": 8.237449180630326e-06,
      "loss": 0.0329,
      "step": 554980
    },
    {
      "epoch": 0.9082696726301526,
      "grad_norm": 0.7298321723937988,
      "learning_rate": 8.237383288416808e-06,
      "loss": 0.02,
      "step": 555000
    },
    {
      "epoch": 0.9083024030688059,
      "grad_norm": 1.1206231117248535,
      "learning_rate": 8.237317396203291e-06,
      "loss": 0.0309,
      "step": 555020
    },
    {
      "epoch": 0.9083351335074593,
      "grad_norm": 0.9169818162918091,
      "learning_rate": 8.237251503989773e-06,
      "loss": 0.034,
      "step": 555040
    },
    {
      "epoch": 0.9083678639461126,
      "grad_norm": 0.9851921796798706,
      "learning_rate": 8.237185611776257e-06,
      "loss": 0.0295,
      "step": 555060
    },
    {
      "epoch": 0.908400594384766,
      "grad_norm": 0.8376902341842651,
      "learning_rate": 8.23711971956274e-06,
      "loss": 0.0218,
      "step": 555080
    },
    {
      "epoch": 0.9084333248234193,
      "grad_norm": 0.3373756408691406,
      "learning_rate": 8.237053827349222e-06,
      "loss": 0.026,
      "step": 555100
    },
    {
      "epoch": 0.9084660552620726,
      "grad_norm": 0.3001887798309326,
      "learning_rate": 8.236987935135706e-06,
      "loss": 0.0207,
      "step": 555120
    },
    {
      "epoch": 0.908498785700726,
      "grad_norm": 0.40351033210754395,
      "learning_rate": 8.23692204292219e-06,
      "loss": 0.0371,
      "step": 555140
    },
    {
      "epoch": 0.9085315161393793,
      "grad_norm": 0.5239161849021912,
      "learning_rate": 8.236856150708671e-06,
      "loss": 0.0194,
      "step": 555160
    },
    {
      "epoch": 0.9085642465780326,
      "grad_norm": 0.3431221842765808,
      "learning_rate": 8.236790258495155e-06,
      "loss": 0.0212,
      "step": 555180
    },
    {
      "epoch": 0.908596977016686,
      "grad_norm": 1.0722171068191528,
      "learning_rate": 8.236724366281638e-06,
      "loss": 0.0311,
      "step": 555200
    },
    {
      "epoch": 0.9086297074553393,
      "grad_norm": 3.251038074493408,
      "learning_rate": 8.23665847406812e-06,
      "loss": 0.0319,
      "step": 555220
    },
    {
      "epoch": 0.9086624378939927,
      "grad_norm": 0.3898749351501465,
      "learning_rate": 8.236592581854604e-06,
      "loss": 0.0246,
      "step": 555240
    },
    {
      "epoch": 0.908695168332646,
      "grad_norm": 0.9567729234695435,
      "learning_rate": 8.236526689641086e-06,
      "loss": 0.0298,
      "step": 555260
    },
    {
      "epoch": 0.9087278987712993,
      "grad_norm": 2.4228460788726807,
      "learning_rate": 8.23646079742757e-06,
      "loss": 0.0289,
      "step": 555280
    },
    {
      "epoch": 0.9087606292099527,
      "grad_norm": 0.5214365720748901,
      "learning_rate": 8.236394905214051e-06,
      "loss": 0.0214,
      "step": 555300
    },
    {
      "epoch": 0.908793359648606,
      "grad_norm": 0.9547585844993591,
      "learning_rate": 8.236329013000535e-06,
      "loss": 0.0224,
      "step": 555320
    },
    {
      "epoch": 0.9088260900872593,
      "grad_norm": 1.089624285697937,
      "learning_rate": 8.236263120787017e-06,
      "loss": 0.034,
      "step": 555340
    },
    {
      "epoch": 0.9088588205259127,
      "grad_norm": 2.238487482070923,
      "learning_rate": 8.2361972285735e-06,
      "loss": 0.0344,
      "step": 555360
    },
    {
      "epoch": 0.908891550964566,
      "grad_norm": 0.41854631900787354,
      "learning_rate": 8.236131336359982e-06,
      "loss": 0.028,
      "step": 555380
    },
    {
      "epoch": 0.9089242814032193,
      "grad_norm": 0.9939761757850647,
      "learning_rate": 8.236065444146466e-06,
      "loss": 0.0289,
      "step": 555400
    },
    {
      "epoch": 0.9089570118418727,
      "grad_norm": 0.8183557391166687,
      "learning_rate": 8.235999551932948e-06,
      "loss": 0.0257,
      "step": 555420
    },
    {
      "epoch": 0.9089897422805261,
      "grad_norm": 1.4583923816680908,
      "learning_rate": 8.235933659719431e-06,
      "loss": 0.0289,
      "step": 555440
    },
    {
      "epoch": 0.9090224727191794,
      "grad_norm": 3.2640180587768555,
      "learning_rate": 8.235867767505913e-06,
      "loss": 0.0338,
      "step": 555460
    },
    {
      "epoch": 0.9090552031578327,
      "grad_norm": 1.2107295989990234,
      "learning_rate": 8.235801875292397e-06,
      "loss": 0.0213,
      "step": 555480
    },
    {
      "epoch": 0.9090879335964861,
      "grad_norm": 1.1844531297683716,
      "learning_rate": 8.23573598307888e-06,
      "loss": 0.0229,
      "step": 555500
    },
    {
      "epoch": 0.9091206640351394,
      "grad_norm": 0.09822473675012589,
      "learning_rate": 8.235670090865362e-06,
      "loss": 0.0173,
      "step": 555520
    },
    {
      "epoch": 0.9091533944737927,
      "grad_norm": 1.726266622543335,
      "learning_rate": 8.235604198651846e-06,
      "loss": 0.0272,
      "step": 555540
    },
    {
      "epoch": 0.9091861249124461,
      "grad_norm": 0.4686988890171051,
      "learning_rate": 8.23553830643833e-06,
      "loss": 0.0269,
      "step": 555560
    },
    {
      "epoch": 0.9092188553510994,
      "grad_norm": 2.0887579917907715,
      "learning_rate": 8.235472414224813e-06,
      "loss": 0.036,
      "step": 555580
    },
    {
      "epoch": 0.9092515857897527,
      "grad_norm": 0.7439789175987244,
      "learning_rate": 8.235406522011295e-06,
      "loss": 0.0365,
      "step": 555600
    },
    {
      "epoch": 0.9092843162284061,
      "grad_norm": 0.6393487453460693,
      "learning_rate": 8.235340629797778e-06,
      "loss": 0.0286,
      "step": 555620
    },
    {
      "epoch": 0.9093170466670595,
      "grad_norm": 0.8939751386642456,
      "learning_rate": 8.23527473758426e-06,
      "loss": 0.0365,
      "step": 555640
    },
    {
      "epoch": 0.9093497771057127,
      "grad_norm": 0.6620960235595703,
      "learning_rate": 8.235208845370744e-06,
      "loss": 0.0303,
      "step": 555660
    },
    {
      "epoch": 0.9093825075443661,
      "grad_norm": 1.0302495956420898,
      "learning_rate": 8.235142953157226e-06,
      "loss": 0.0378,
      "step": 555680
    },
    {
      "epoch": 0.9094152379830195,
      "grad_norm": 0.4106653928756714,
      "learning_rate": 8.23507706094371e-06,
      "loss": 0.0306,
      "step": 555700
    },
    {
      "epoch": 0.9094479684216727,
      "grad_norm": 0.6692940592765808,
      "learning_rate": 8.235011168730191e-06,
      "loss": 0.0314,
      "step": 555720
    },
    {
      "epoch": 0.9094806988603261,
      "grad_norm": 1.586325764656067,
      "learning_rate": 8.234945276516675e-06,
      "loss": 0.031,
      "step": 555740
    },
    {
      "epoch": 0.9095134292989795,
      "grad_norm": 0.7936014533042908,
      "learning_rate": 8.234879384303157e-06,
      "loss": 0.0225,
      "step": 555760
    },
    {
      "epoch": 0.9095461597376328,
      "grad_norm": 1.4096513986587524,
      "learning_rate": 8.23481349208964e-06,
      "loss": 0.0288,
      "step": 555780
    },
    {
      "epoch": 0.9095788901762861,
      "grad_norm": 2.0511226654052734,
      "learning_rate": 8.234747599876122e-06,
      "loss": 0.0259,
      "step": 555800
    },
    {
      "epoch": 0.9096116206149395,
      "grad_norm": 0.41009950637817383,
      "learning_rate": 8.234681707662606e-06,
      "loss": 0.0365,
      "step": 555820
    },
    {
      "epoch": 0.9096443510535929,
      "grad_norm": 0.6444949507713318,
      "learning_rate": 8.234615815449088e-06,
      "loss": 0.0218,
      "step": 555840
    },
    {
      "epoch": 0.9096770814922461,
      "grad_norm": 1.1242586374282837,
      "learning_rate": 8.234549923235571e-06,
      "loss": 0.0342,
      "step": 555860
    },
    {
      "epoch": 0.9097098119308995,
      "grad_norm": 4.644964694976807,
      "learning_rate": 8.234484031022055e-06,
      "loss": 0.0284,
      "step": 555880
    },
    {
      "epoch": 0.9097425423695529,
      "grad_norm": 0.4298100173473358,
      "learning_rate": 8.234418138808537e-06,
      "loss": 0.029,
      "step": 555900
    },
    {
      "epoch": 0.9097752728082061,
      "grad_norm": 0.27486252784729004,
      "learning_rate": 8.23435224659502e-06,
      "loss": 0.0286,
      "step": 555920
    },
    {
      "epoch": 0.9098080032468595,
      "grad_norm": 1.992591381072998,
      "learning_rate": 8.234286354381504e-06,
      "loss": 0.0327,
      "step": 555940
    },
    {
      "epoch": 0.9098407336855129,
      "grad_norm": 0.6389259099960327,
      "learning_rate": 8.234220462167986e-06,
      "loss": 0.0202,
      "step": 555960
    },
    {
      "epoch": 0.9098734641241661,
      "grad_norm": 0.6718639731407166,
      "learning_rate": 8.23415456995447e-06,
      "loss": 0.0218,
      "step": 555980
    },
    {
      "epoch": 0.9099061945628195,
      "grad_norm": 0.4631768763065338,
      "learning_rate": 8.234088677740953e-06,
      "loss": 0.0247,
      "step": 556000
    },
    {
      "epoch": 0.9099389250014729,
      "grad_norm": 0.91615891456604,
      "learning_rate": 8.234022785527435e-06,
      "loss": 0.0303,
      "step": 556020
    },
    {
      "epoch": 0.9099716554401263,
      "grad_norm": 0.853884220123291,
      "learning_rate": 8.233956893313919e-06,
      "loss": 0.0322,
      "step": 556040
    },
    {
      "epoch": 0.9100043858787795,
      "grad_norm": 1.6856297254562378,
      "learning_rate": 8.2338910011004e-06,
      "loss": 0.0226,
      "step": 556060
    },
    {
      "epoch": 0.9100371163174329,
      "grad_norm": 0.6490638852119446,
      "learning_rate": 8.233825108886884e-06,
      "loss": 0.0245,
      "step": 556080
    },
    {
      "epoch": 0.9100698467560863,
      "grad_norm": 0.7325061559677124,
      "learning_rate": 8.233759216673366e-06,
      "loss": 0.0211,
      "step": 556100
    },
    {
      "epoch": 0.9101025771947395,
      "grad_norm": 0.43277400732040405,
      "learning_rate": 8.23369332445985e-06,
      "loss": 0.0287,
      "step": 556120
    },
    {
      "epoch": 0.9101353076333929,
      "grad_norm": 0.3530571460723877,
      "learning_rate": 8.233627432246331e-06,
      "loss": 0.0265,
      "step": 556140
    },
    {
      "epoch": 0.9101680380720463,
      "grad_norm": 1.112059473991394,
      "learning_rate": 8.233561540032815e-06,
      "loss": 0.0188,
      "step": 556160
    },
    {
      "epoch": 0.9102007685106995,
      "grad_norm": 1.1346231698989868,
      "learning_rate": 8.233495647819297e-06,
      "loss": 0.0269,
      "step": 556180
    },
    {
      "epoch": 0.9102334989493529,
      "grad_norm": 0.6187973022460938,
      "learning_rate": 8.23342975560578e-06,
      "loss": 0.0229,
      "step": 556200
    },
    {
      "epoch": 0.9102662293880063,
      "grad_norm": 1.1469175815582275,
      "learning_rate": 8.233363863392264e-06,
      "loss": 0.0224,
      "step": 556220
    },
    {
      "epoch": 0.9102989598266596,
      "grad_norm": 0.7032986283302307,
      "learning_rate": 8.233297971178746e-06,
      "loss": 0.0348,
      "step": 556240
    },
    {
      "epoch": 0.9103316902653129,
      "grad_norm": 0.42113009095191956,
      "learning_rate": 8.23323207896523e-06,
      "loss": 0.0251,
      "step": 556260
    },
    {
      "epoch": 0.9103644207039663,
      "grad_norm": 0.8493440747261047,
      "learning_rate": 8.233166186751711e-06,
      "loss": 0.0232,
      "step": 556280
    },
    {
      "epoch": 0.9103971511426197,
      "grad_norm": 0.16079778969287872,
      "learning_rate": 8.233100294538195e-06,
      "loss": 0.0262,
      "step": 556300
    },
    {
      "epoch": 0.9104298815812729,
      "grad_norm": 0.6700881719589233,
      "learning_rate": 8.233034402324679e-06,
      "loss": 0.0259,
      "step": 556320
    },
    {
      "epoch": 0.9104626120199263,
      "grad_norm": 0.8798713088035583,
      "learning_rate": 8.23296851011116e-06,
      "loss": 0.0217,
      "step": 556340
    },
    {
      "epoch": 0.9104953424585797,
      "grad_norm": 0.40451672673225403,
      "learning_rate": 8.232902617897644e-06,
      "loss": 0.0244,
      "step": 556360
    },
    {
      "epoch": 0.9105280728972329,
      "grad_norm": 1.6282051801681519,
      "learning_rate": 8.232836725684128e-06,
      "loss": 0.0239,
      "step": 556380
    },
    {
      "epoch": 0.9105608033358863,
      "grad_norm": 0.8334681391716003,
      "learning_rate": 8.23277083347061e-06,
      "loss": 0.0201,
      "step": 556400
    },
    {
      "epoch": 0.9105935337745397,
      "grad_norm": 0.7657527923583984,
      "learning_rate": 8.232704941257093e-06,
      "loss": 0.0272,
      "step": 556420
    },
    {
      "epoch": 0.910626264213193,
      "grad_norm": 0.9687127470970154,
      "learning_rate": 8.232639049043575e-06,
      "loss": 0.0245,
      "step": 556440
    },
    {
      "epoch": 0.9106589946518463,
      "grad_norm": 1.247573971748352,
      "learning_rate": 8.232573156830059e-06,
      "loss": 0.0341,
      "step": 556460
    },
    {
      "epoch": 0.9106917250904997,
      "grad_norm": 0.3669157326221466,
      "learning_rate": 8.23250726461654e-06,
      "loss": 0.0253,
      "step": 556480
    },
    {
      "epoch": 0.910724455529153,
      "grad_norm": 0.9790381193161011,
      "learning_rate": 8.232441372403024e-06,
      "loss": 0.0212,
      "step": 556500
    },
    {
      "epoch": 0.9107571859678063,
      "grad_norm": 5.8532490730285645,
      "learning_rate": 8.232375480189506e-06,
      "loss": 0.0355,
      "step": 556520
    },
    {
      "epoch": 0.9107899164064597,
      "grad_norm": 1.1934239864349365,
      "learning_rate": 8.23230958797599e-06,
      "loss": 0.0295,
      "step": 556540
    },
    {
      "epoch": 0.910822646845113,
      "grad_norm": 0.5834069848060608,
      "learning_rate": 8.232243695762473e-06,
      "loss": 0.0275,
      "step": 556560
    },
    {
      "epoch": 0.9108553772837663,
      "grad_norm": 0.2523190975189209,
      "learning_rate": 8.232177803548955e-06,
      "loss": 0.0292,
      "step": 556580
    },
    {
      "epoch": 0.9108881077224197,
      "grad_norm": 0.5839489102363586,
      "learning_rate": 8.232111911335439e-06,
      "loss": 0.0332,
      "step": 556600
    },
    {
      "epoch": 0.910920838161073,
      "grad_norm": 0.4871921241283417,
      "learning_rate": 8.23204601912192e-06,
      "loss": 0.0254,
      "step": 556620
    },
    {
      "epoch": 0.9109535685997264,
      "grad_norm": 0.7022918462753296,
      "learning_rate": 8.231980126908404e-06,
      "loss": 0.03,
      "step": 556640
    },
    {
      "epoch": 0.9109862990383797,
      "grad_norm": 0.8484358787536621,
      "learning_rate": 8.231914234694886e-06,
      "loss": 0.0276,
      "step": 556660
    },
    {
      "epoch": 0.9110190294770331,
      "grad_norm": 0.43711480498313904,
      "learning_rate": 8.23184834248137e-06,
      "loss": 0.0379,
      "step": 556680
    },
    {
      "epoch": 0.9110517599156864,
      "grad_norm": 0.6252097487449646,
      "learning_rate": 8.231782450267851e-06,
      "loss": 0.0221,
      "step": 556700
    },
    {
      "epoch": 0.9110844903543397,
      "grad_norm": 2.8407483100891113,
      "learning_rate": 8.231716558054335e-06,
      "loss": 0.0299,
      "step": 556720
    },
    {
      "epoch": 0.9111172207929931,
      "grad_norm": 0.4680934548377991,
      "learning_rate": 8.231650665840819e-06,
      "loss": 0.0293,
      "step": 556740
    },
    {
      "epoch": 0.9111499512316464,
      "grad_norm": 0.9363439679145813,
      "learning_rate": 8.2315847736273e-06,
      "loss": 0.0201,
      "step": 556760
    },
    {
      "epoch": 0.9111826816702997,
      "grad_norm": 1.2692033052444458,
      "learning_rate": 8.231518881413784e-06,
      "loss": 0.0289,
      "step": 556780
    },
    {
      "epoch": 0.9112154121089531,
      "grad_norm": 1.7598737478256226,
      "learning_rate": 8.231452989200268e-06,
      "loss": 0.029,
      "step": 556800
    },
    {
      "epoch": 0.9112481425476064,
      "grad_norm": 1.5231029987335205,
      "learning_rate": 8.23138709698675e-06,
      "loss": 0.0294,
      "step": 556820
    },
    {
      "epoch": 0.9112808729862598,
      "grad_norm": 0.805332362651825,
      "learning_rate": 8.231321204773233e-06,
      "loss": 0.0216,
      "step": 556840
    },
    {
      "epoch": 0.9113136034249131,
      "grad_norm": 1.4198349714279175,
      "learning_rate": 8.231255312559715e-06,
      "loss": 0.0294,
      "step": 556860
    },
    {
      "epoch": 0.9113463338635664,
      "grad_norm": 0.32677412033081055,
      "learning_rate": 8.231189420346199e-06,
      "loss": 0.0262,
      "step": 556880
    },
    {
      "epoch": 0.9113790643022198,
      "grad_norm": 0.24989621341228485,
      "learning_rate": 8.231123528132682e-06,
      "loss": 0.0263,
      "step": 556900
    },
    {
      "epoch": 0.9114117947408731,
      "grad_norm": 1.530605435371399,
      "learning_rate": 8.231057635919164e-06,
      "loss": 0.026,
      "step": 556920
    },
    {
      "epoch": 0.9114445251795265,
      "grad_norm": 0.5424894094467163,
      "learning_rate": 8.230991743705648e-06,
      "loss": 0.0317,
      "step": 556940
    },
    {
      "epoch": 0.9114772556181798,
      "grad_norm": 0.692846417427063,
      "learning_rate": 8.23092585149213e-06,
      "loss": 0.0345,
      "step": 556960
    },
    {
      "epoch": 0.9115099860568331,
      "grad_norm": 0.6252384781837463,
      "learning_rate": 8.230859959278613e-06,
      "loss": 0.0329,
      "step": 556980
    },
    {
      "epoch": 0.9115427164954865,
      "grad_norm": 0.907465398311615,
      "learning_rate": 8.230794067065095e-06,
      "loss": 0.027,
      "step": 557000
    },
    {
      "epoch": 0.9115754469341398,
      "grad_norm": 0.7435397505760193,
      "learning_rate": 8.230728174851579e-06,
      "loss": 0.0257,
      "step": 557020
    },
    {
      "epoch": 0.9116081773727932,
      "grad_norm": 0.5886251926422119,
      "learning_rate": 8.23066228263806e-06,
      "loss": 0.03,
      "step": 557040
    },
    {
      "epoch": 0.9116409078114465,
      "grad_norm": 0.10309240221977234,
      "learning_rate": 8.230596390424544e-06,
      "loss": 0.0301,
      "step": 557060
    },
    {
      "epoch": 0.9116736382500998,
      "grad_norm": 0.7466391324996948,
      "learning_rate": 8.230530498211026e-06,
      "loss": 0.0361,
      "step": 557080
    },
    {
      "epoch": 0.9117063686887532,
      "grad_norm": 1.211567997932434,
      "learning_rate": 8.23046460599751e-06,
      "loss": 0.029,
      "step": 557100
    },
    {
      "epoch": 0.9117390991274065,
      "grad_norm": 0.7943692803382874,
      "learning_rate": 8.230398713783993e-06,
      "loss": 0.0281,
      "step": 557120
    },
    {
      "epoch": 0.9117718295660598,
      "grad_norm": 0.22765964269638062,
      "learning_rate": 8.230332821570475e-06,
      "loss": 0.0225,
      "step": 557140
    },
    {
      "epoch": 0.9118045600047132,
      "grad_norm": 1.238078236579895,
      "learning_rate": 8.230266929356959e-06,
      "loss": 0.0277,
      "step": 557160
    },
    {
      "epoch": 0.9118372904433665,
      "grad_norm": 0.571852445602417,
      "learning_rate": 8.230201037143442e-06,
      "loss": 0.0345,
      "step": 557180
    },
    {
      "epoch": 0.9118700208820198,
      "grad_norm": 0.7508916258811951,
      "learning_rate": 8.230135144929924e-06,
      "loss": 0.0317,
      "step": 557200
    },
    {
      "epoch": 0.9119027513206732,
      "grad_norm": 0.3918705880641937,
      "learning_rate": 8.230069252716408e-06,
      "loss": 0.0235,
      "step": 557220
    },
    {
      "epoch": 0.9119354817593266,
      "grad_norm": 0.2412092238664627,
      "learning_rate": 8.23000336050289e-06,
      "loss": 0.0282,
      "step": 557240
    },
    {
      "epoch": 0.9119682121979799,
      "grad_norm": 0.4782468378543854,
      "learning_rate": 8.229937468289373e-06,
      "loss": 0.0288,
      "step": 557260
    },
    {
      "epoch": 0.9120009426366332,
      "grad_norm": 0.7491797208786011,
      "learning_rate": 8.229871576075857e-06,
      "loss": 0.0306,
      "step": 557280
    },
    {
      "epoch": 0.9120336730752866,
      "grad_norm": 0.2216835767030716,
      "learning_rate": 8.229805683862339e-06,
      "loss": 0.0304,
      "step": 557300
    },
    {
      "epoch": 0.9120664035139399,
      "grad_norm": 0.6357563138008118,
      "learning_rate": 8.229739791648822e-06,
      "loss": 0.0243,
      "step": 557320
    },
    {
      "epoch": 0.9120991339525932,
      "grad_norm": 1.2342233657836914,
      "learning_rate": 8.229673899435304e-06,
      "loss": 0.0248,
      "step": 557340
    },
    {
      "epoch": 0.9121318643912466,
      "grad_norm": 1.9676520824432373,
      "learning_rate": 8.229608007221788e-06,
      "loss": 0.0354,
      "step": 557360
    },
    {
      "epoch": 0.9121645948298999,
      "grad_norm": 0.781868040561676,
      "learning_rate": 8.22954211500827e-06,
      "loss": 0.0285,
      "step": 557380
    },
    {
      "epoch": 0.9121973252685532,
      "grad_norm": 0.6998133063316345,
      "learning_rate": 8.229476222794753e-06,
      "loss": 0.0278,
      "step": 557400
    },
    {
      "epoch": 0.9122300557072066,
      "grad_norm": 0.412490576505661,
      "learning_rate": 8.229410330581235e-06,
      "loss": 0.0275,
      "step": 557420
    },
    {
      "epoch": 0.9122627861458599,
      "grad_norm": 1.084915280342102,
      "learning_rate": 8.229344438367719e-06,
      "loss": 0.0266,
      "step": 557440
    },
    {
      "epoch": 0.9122955165845132,
      "grad_norm": 1.369922399520874,
      "learning_rate": 8.2292785461542e-06,
      "loss": 0.0344,
      "step": 557460
    },
    {
      "epoch": 0.9123282470231666,
      "grad_norm": 0.5841761231422424,
      "learning_rate": 8.229212653940684e-06,
      "loss": 0.0346,
      "step": 557480
    },
    {
      "epoch": 0.91236097746182,
      "grad_norm": 1.1192548274993896,
      "learning_rate": 8.229146761727166e-06,
      "loss": 0.0381,
      "step": 557500
    },
    {
      "epoch": 0.9123937079004732,
      "grad_norm": 1.9545197486877441,
      "learning_rate": 8.22908086951365e-06,
      "loss": 0.0341,
      "step": 557520
    },
    {
      "epoch": 0.9124264383391266,
      "grad_norm": 1.2089009284973145,
      "learning_rate": 8.229014977300133e-06,
      "loss": 0.0268,
      "step": 557540
    },
    {
      "epoch": 0.91245916877778,
      "grad_norm": 0.7808383107185364,
      "learning_rate": 8.228949085086615e-06,
      "loss": 0.0236,
      "step": 557560
    },
    {
      "epoch": 0.9124918992164333,
      "grad_norm": 2.6911139488220215,
      "learning_rate": 8.228883192873099e-06,
      "loss": 0.0275,
      "step": 557580
    },
    {
      "epoch": 0.9125246296550866,
      "grad_norm": 1.618853211402893,
      "learning_rate": 8.228817300659582e-06,
      "loss": 0.026,
      "step": 557600
    },
    {
      "epoch": 0.91255736009374,
      "grad_norm": 0.6470089554786682,
      "learning_rate": 8.228751408446064e-06,
      "loss": 0.0342,
      "step": 557620
    },
    {
      "epoch": 0.9125900905323933,
      "grad_norm": 0.6481377482414246,
      "learning_rate": 8.228685516232548e-06,
      "loss": 0.024,
      "step": 557640
    },
    {
      "epoch": 0.9126228209710466,
      "grad_norm": 0.7956836819648743,
      "learning_rate": 8.228619624019031e-06,
      "loss": 0.0295,
      "step": 557660
    },
    {
      "epoch": 0.9126555514097,
      "grad_norm": 1.0848894119262695,
      "learning_rate": 8.228553731805513e-06,
      "loss": 0.0295,
      "step": 557680
    },
    {
      "epoch": 0.9126882818483534,
      "grad_norm": 0.5930840373039246,
      "learning_rate": 8.228487839591997e-06,
      "loss": 0.0293,
      "step": 557700
    },
    {
      "epoch": 0.9127210122870066,
      "grad_norm": 0.46852198243141174,
      "learning_rate": 8.228421947378479e-06,
      "loss": 0.0332,
      "step": 557720
    },
    {
      "epoch": 0.91275374272566,
      "grad_norm": 1.2859224081039429,
      "learning_rate": 8.228356055164962e-06,
      "loss": 0.037,
      "step": 557740
    },
    {
      "epoch": 0.9127864731643134,
      "grad_norm": 0.7897937297821045,
      "learning_rate": 8.228290162951444e-06,
      "loss": 0.025,
      "step": 557760
    },
    {
      "epoch": 0.9128192036029666,
      "grad_norm": 1.4512107372283936,
      "learning_rate": 8.228224270737928e-06,
      "loss": 0.0387,
      "step": 557780
    },
    {
      "epoch": 0.91285193404162,
      "grad_norm": 0.5638776421546936,
      "learning_rate": 8.22815837852441e-06,
      "loss": 0.0251,
      "step": 557800
    },
    {
      "epoch": 0.9128846644802734,
      "grad_norm": 4.077264308929443,
      "learning_rate": 8.228092486310893e-06,
      "loss": 0.0301,
      "step": 557820
    },
    {
      "epoch": 0.9129173949189267,
      "grad_norm": 0.5978660583496094,
      "learning_rate": 8.228026594097375e-06,
      "loss": 0.0403,
      "step": 557840
    },
    {
      "epoch": 0.91295012535758,
      "grad_norm": 1.8828800916671753,
      "learning_rate": 8.227960701883859e-06,
      "loss": 0.0351,
      "step": 557860
    },
    {
      "epoch": 0.9129828557962334,
      "grad_norm": 0.9116337299346924,
      "learning_rate": 8.22789480967034e-06,
      "loss": 0.0284,
      "step": 557880
    },
    {
      "epoch": 0.9130155862348868,
      "grad_norm": 0.48257535696029663,
      "learning_rate": 8.227828917456824e-06,
      "loss": 0.029,
      "step": 557900
    },
    {
      "epoch": 0.91304831667354,
      "grad_norm": 0.3321538269519806,
      "learning_rate": 8.227763025243308e-06,
      "loss": 0.0304,
      "step": 557920
    },
    {
      "epoch": 0.9130810471121934,
      "grad_norm": 0.547141432762146,
      "learning_rate": 8.22769713302979e-06,
      "loss": 0.0225,
      "step": 557940
    },
    {
      "epoch": 0.9131137775508468,
      "grad_norm": 0.40871626138687134,
      "learning_rate": 8.227631240816273e-06,
      "loss": 0.0253,
      "step": 557960
    },
    {
      "epoch": 0.9131465079895,
      "grad_norm": 3.0106611251831055,
      "learning_rate": 8.227565348602757e-06,
      "loss": 0.0295,
      "step": 557980
    },
    {
      "epoch": 0.9131792384281534,
      "grad_norm": 0.5810761451721191,
      "learning_rate": 8.227499456389239e-06,
      "loss": 0.0369,
      "step": 558000
    },
    {
      "epoch": 0.9132119688668068,
      "grad_norm": 0.47284936904907227,
      "learning_rate": 8.227433564175722e-06,
      "loss": 0.035,
      "step": 558020
    },
    {
      "epoch": 0.91324469930546,
      "grad_norm": 0.6429791450500488,
      "learning_rate": 8.227367671962206e-06,
      "loss": 0.0235,
      "step": 558040
    },
    {
      "epoch": 0.9132774297441134,
      "grad_norm": 1.197764277458191,
      "learning_rate": 8.227301779748688e-06,
      "loss": 0.021,
      "step": 558060
    },
    {
      "epoch": 0.9133101601827668,
      "grad_norm": 0.5057766437530518,
      "learning_rate": 8.227235887535172e-06,
      "loss": 0.0239,
      "step": 558080
    },
    {
      "epoch": 0.9133428906214202,
      "grad_norm": 1.5168156623840332,
      "learning_rate": 8.227169995321653e-06,
      "loss": 0.0293,
      "step": 558100
    },
    {
      "epoch": 0.9133756210600734,
      "grad_norm": 1.1320414543151855,
      "learning_rate": 8.227104103108137e-06,
      "loss": 0.0247,
      "step": 558120
    },
    {
      "epoch": 0.9134083514987268,
      "grad_norm": 0.7639913558959961,
      "learning_rate": 8.227038210894619e-06,
      "loss": 0.0299,
      "step": 558140
    },
    {
      "epoch": 0.9134410819373802,
      "grad_norm": 1.4370611906051636,
      "learning_rate": 8.226972318681103e-06,
      "loss": 0.0296,
      "step": 558160
    },
    {
      "epoch": 0.9134738123760334,
      "grad_norm": 4.141943454742432,
      "learning_rate": 8.226906426467584e-06,
      "loss": 0.034,
      "step": 558180
    },
    {
      "epoch": 0.9135065428146868,
      "grad_norm": 1.1317874193191528,
      "learning_rate": 8.226840534254068e-06,
      "loss": 0.0216,
      "step": 558200
    },
    {
      "epoch": 0.9135392732533402,
      "grad_norm": 1.1446552276611328,
      "learning_rate": 8.22677464204055e-06,
      "loss": 0.0333,
      "step": 558220
    },
    {
      "epoch": 0.9135720036919934,
      "grad_norm": 0.3414538502693176,
      "learning_rate": 8.226708749827033e-06,
      "loss": 0.0236,
      "step": 558240
    },
    {
      "epoch": 0.9136047341306468,
      "grad_norm": 1.1505131721496582,
      "learning_rate": 8.226642857613515e-06,
      "loss": 0.0293,
      "step": 558260
    },
    {
      "epoch": 0.9136374645693002,
      "grad_norm": 0.9986685514450073,
      "learning_rate": 8.226576965399999e-06,
      "loss": 0.0286,
      "step": 558280
    },
    {
      "epoch": 0.9136701950079535,
      "grad_norm": 0.923816442489624,
      "learning_rate": 8.22651107318648e-06,
      "loss": 0.0332,
      "step": 558300
    },
    {
      "epoch": 0.9137029254466068,
      "grad_norm": 0.17017041146755219,
      "learning_rate": 8.226445180972964e-06,
      "loss": 0.0332,
      "step": 558320
    },
    {
      "epoch": 0.9137356558852602,
      "grad_norm": 1.2931190729141235,
      "learning_rate": 8.226379288759448e-06,
      "loss": 0.033,
      "step": 558340
    },
    {
      "epoch": 0.9137683863239136,
      "grad_norm": 1.485662579536438,
      "learning_rate": 8.226313396545932e-06,
      "loss": 0.0301,
      "step": 558360
    },
    {
      "epoch": 0.9138011167625668,
      "grad_norm": 0.594680905342102,
      "learning_rate": 8.226247504332413e-06,
      "loss": 0.0297,
      "step": 558380
    },
    {
      "epoch": 0.9138338472012202,
      "grad_norm": 0.9284268617630005,
      "learning_rate": 8.226181612118897e-06,
      "loss": 0.0307,
      "step": 558400
    },
    {
      "epoch": 0.9138665776398736,
      "grad_norm": 0.41888388991355896,
      "learning_rate": 8.22611571990538e-06,
      "loss": 0.0218,
      "step": 558420
    },
    {
      "epoch": 0.9138993080785268,
      "grad_norm": 0.23354290425777435,
      "learning_rate": 8.226049827691863e-06,
      "loss": 0.0257,
      "step": 558440
    },
    {
      "epoch": 0.9139320385171802,
      "grad_norm": 1.499815583229065,
      "learning_rate": 8.225983935478346e-06,
      "loss": 0.0377,
      "step": 558460
    },
    {
      "epoch": 0.9139647689558336,
      "grad_norm": 0.8397736549377441,
      "learning_rate": 8.225918043264828e-06,
      "loss": 0.0411,
      "step": 558480
    },
    {
      "epoch": 0.9139974993944869,
      "grad_norm": 4.022144794464111,
      "learning_rate": 8.225852151051312e-06,
      "loss": 0.0356,
      "step": 558500
    },
    {
      "epoch": 0.9140302298331402,
      "grad_norm": 0.7043977975845337,
      "learning_rate": 8.225786258837794e-06,
      "loss": 0.0291,
      "step": 558520
    },
    {
      "epoch": 0.9140629602717936,
      "grad_norm": 1.0850576162338257,
      "learning_rate": 8.225720366624277e-06,
      "loss": 0.0228,
      "step": 558540
    },
    {
      "epoch": 0.9140956907104469,
      "grad_norm": 0.7776739001274109,
      "learning_rate": 8.225654474410759e-06,
      "loss": 0.0217,
      "step": 558560
    },
    {
      "epoch": 0.9141284211491002,
      "grad_norm": 1.4468908309936523,
      "learning_rate": 8.225588582197243e-06,
      "loss": 0.0328,
      "step": 558580
    },
    {
      "epoch": 0.9141611515877536,
      "grad_norm": 1.3155087232589722,
      "learning_rate": 8.225522689983724e-06,
      "loss": 0.0211,
      "step": 558600
    },
    {
      "epoch": 0.914193882026407,
      "grad_norm": 1.1256952285766602,
      "learning_rate": 8.225456797770208e-06,
      "loss": 0.0305,
      "step": 558620
    },
    {
      "epoch": 0.9142266124650602,
      "grad_norm": 0.5008402466773987,
      "learning_rate": 8.22539090555669e-06,
      "loss": 0.0222,
      "step": 558640
    },
    {
      "epoch": 0.9142593429037136,
      "grad_norm": 0.3199000656604767,
      "learning_rate": 8.225325013343174e-06,
      "loss": 0.0212,
      "step": 558660
    },
    {
      "epoch": 0.914292073342367,
      "grad_norm": 1.6089133024215698,
      "learning_rate": 8.225259121129657e-06,
      "loss": 0.0299,
      "step": 558680
    },
    {
      "epoch": 0.9143248037810203,
      "grad_norm": 1.2669483423233032,
      "learning_rate": 8.225193228916139e-06,
      "loss": 0.0268,
      "step": 558700
    },
    {
      "epoch": 0.9143575342196736,
      "grad_norm": 0.536466121673584,
      "learning_rate": 8.225127336702623e-06,
      "loss": 0.0239,
      "step": 558720
    },
    {
      "epoch": 0.914390264658327,
      "grad_norm": 0.7973830103874207,
      "learning_rate": 8.225061444489104e-06,
      "loss": 0.0304,
      "step": 558740
    },
    {
      "epoch": 0.9144229950969803,
      "grad_norm": 1.1067092418670654,
      "learning_rate": 8.224995552275588e-06,
      "loss": 0.0339,
      "step": 558760
    },
    {
      "epoch": 0.9144557255356336,
      "grad_norm": 0.8875207901000977,
      "learning_rate": 8.224929660062072e-06,
      "loss": 0.0319,
      "step": 558780
    },
    {
      "epoch": 0.914488455974287,
      "grad_norm": 0.3959920108318329,
      "learning_rate": 8.224863767848554e-06,
      "loss": 0.0208,
      "step": 558800
    },
    {
      "epoch": 0.9145211864129403,
      "grad_norm": 0.7699127793312073,
      "learning_rate": 8.224797875635037e-06,
      "loss": 0.0323,
      "step": 558820
    },
    {
      "epoch": 0.9145539168515936,
      "grad_norm": 1.02838134765625,
      "learning_rate": 8.22473198342152e-06,
      "loss": 0.0192,
      "step": 558840
    },
    {
      "epoch": 0.914586647290247,
      "grad_norm": 0.40923213958740234,
      "learning_rate": 8.224666091208003e-06,
      "loss": 0.0206,
      "step": 558860
    },
    {
      "epoch": 0.9146193777289003,
      "grad_norm": 0.1989988088607788,
      "learning_rate": 8.224600198994486e-06,
      "loss": 0.0253,
      "step": 558880
    },
    {
      "epoch": 0.9146521081675537,
      "grad_norm": 0.46441417932510376,
      "learning_rate": 8.224534306780968e-06,
      "loss": 0.0343,
      "step": 558900
    },
    {
      "epoch": 0.914684838606207,
      "grad_norm": 0.3126736879348755,
      "learning_rate": 8.224468414567452e-06,
      "loss": 0.0297,
      "step": 558920
    },
    {
      "epoch": 0.9147175690448603,
      "grad_norm": 1.060922622680664,
      "learning_rate": 8.224402522353934e-06,
      "loss": 0.028,
      "step": 558940
    },
    {
      "epoch": 0.9147502994835137,
      "grad_norm": 1.182712197303772,
      "learning_rate": 8.224336630140417e-06,
      "loss": 0.028,
      "step": 558960
    },
    {
      "epoch": 0.914783029922167,
      "grad_norm": 0.6304833889007568,
      "learning_rate": 8.224270737926899e-06,
      "loss": 0.0189,
      "step": 558980
    },
    {
      "epoch": 0.9148157603608204,
      "grad_norm": 0.18893690407276154,
      "learning_rate": 8.224204845713383e-06,
      "loss": 0.0233,
      "step": 559000
    },
    {
      "epoch": 0.9148484907994737,
      "grad_norm": 0.717688798904419,
      "learning_rate": 8.224138953499866e-06,
      "loss": 0.0296,
      "step": 559020
    },
    {
      "epoch": 0.914881221238127,
      "grad_norm": 1.3592703342437744,
      "learning_rate": 8.224073061286348e-06,
      "loss": 0.0217,
      "step": 559040
    },
    {
      "epoch": 0.9149139516767804,
      "grad_norm": 1.5856941938400269,
      "learning_rate": 8.224007169072832e-06,
      "loss": 0.0362,
      "step": 559060
    },
    {
      "epoch": 0.9149466821154337,
      "grad_norm": 0.893864095211029,
      "learning_rate": 8.223941276859314e-06,
      "loss": 0.0305,
      "step": 559080
    },
    {
      "epoch": 0.9149794125540871,
      "grad_norm": 0.9781793355941772,
      "learning_rate": 8.223875384645797e-06,
      "loss": 0.0305,
      "step": 559100
    },
    {
      "epoch": 0.9150121429927404,
      "grad_norm": 0.3232499361038208,
      "learning_rate": 8.223809492432279e-06,
      "loss": 0.0343,
      "step": 559120
    },
    {
      "epoch": 0.9150448734313937,
      "grad_norm": 10.316184997558594,
      "learning_rate": 8.223743600218763e-06,
      "loss": 0.0275,
      "step": 559140
    },
    {
      "epoch": 0.9150776038700471,
      "grad_norm": 1.0485919713974,
      "learning_rate": 8.223677708005246e-06,
      "loss": 0.0284,
      "step": 559160
    },
    {
      "epoch": 0.9151103343087004,
      "grad_norm": 0.6848430633544922,
      "learning_rate": 8.223611815791728e-06,
      "loss": 0.0402,
      "step": 559180
    },
    {
      "epoch": 0.9151430647473537,
      "grad_norm": 2.4859254360198975,
      "learning_rate": 8.223545923578212e-06,
      "loss": 0.0323,
      "step": 559200
    },
    {
      "epoch": 0.9151757951860071,
      "grad_norm": 1.5215049982070923,
      "learning_rate": 8.223480031364695e-06,
      "loss": 0.0364,
      "step": 559220
    },
    {
      "epoch": 0.9152085256246604,
      "grad_norm": 0.2705143094062805,
      "learning_rate": 8.223414139151177e-06,
      "loss": 0.0272,
      "step": 559240
    },
    {
      "epoch": 0.9152412560633137,
      "grad_norm": 0.586871862411499,
      "learning_rate": 8.22334824693766e-06,
      "loss": 0.034,
      "step": 559260
    },
    {
      "epoch": 0.9152739865019671,
      "grad_norm": 0.7555010318756104,
      "learning_rate": 8.223282354724143e-06,
      "loss": 0.0314,
      "step": 559280
    },
    {
      "epoch": 0.9153067169406205,
      "grad_norm": 1.6970027685165405,
      "learning_rate": 8.223216462510626e-06,
      "loss": 0.0401,
      "step": 559300
    },
    {
      "epoch": 0.9153394473792738,
      "grad_norm": 0.7711119055747986,
      "learning_rate": 8.223150570297108e-06,
      "loss": 0.0261,
      "step": 559320
    },
    {
      "epoch": 0.9153721778179271,
      "grad_norm": 0.3299562931060791,
      "learning_rate": 8.223084678083592e-06,
      "loss": 0.0196,
      "step": 559340
    },
    {
      "epoch": 0.9154049082565805,
      "grad_norm": 0.2929597795009613,
      "learning_rate": 8.223018785870075e-06,
      "loss": 0.0184,
      "step": 559360
    },
    {
      "epoch": 0.9154376386952338,
      "grad_norm": 0.3490411043167114,
      "learning_rate": 8.222952893656557e-06,
      "loss": 0.0308,
      "step": 559380
    },
    {
      "epoch": 0.9154703691338871,
      "grad_norm": 0.4287997782230377,
      "learning_rate": 8.22288700144304e-06,
      "loss": 0.0381,
      "step": 559400
    },
    {
      "epoch": 0.9155030995725405,
      "grad_norm": 1.338797688484192,
      "learning_rate": 8.222821109229523e-06,
      "loss": 0.0323,
      "step": 559420
    },
    {
      "epoch": 0.9155358300111938,
      "grad_norm": 0.8210628032684326,
      "learning_rate": 8.222755217016006e-06,
      "loss": 0.0297,
      "step": 559440
    },
    {
      "epoch": 0.9155685604498471,
      "grad_norm": 0.5746875405311584,
      "learning_rate": 8.222689324802488e-06,
      "loss": 0.0215,
      "step": 559460
    },
    {
      "epoch": 0.9156012908885005,
      "grad_norm": 1.0787957906723022,
      "learning_rate": 8.222623432588972e-06,
      "loss": 0.0274,
      "step": 559480
    },
    {
      "epoch": 0.9156340213271539,
      "grad_norm": 0.47465234994888306,
      "learning_rate": 8.222557540375454e-06,
      "loss": 0.0275,
      "step": 559500
    },
    {
      "epoch": 0.9156667517658071,
      "grad_norm": 0.3452729284763336,
      "learning_rate": 8.222491648161937e-06,
      "loss": 0.0189,
      "step": 559520
    },
    {
      "epoch": 0.9156994822044605,
      "grad_norm": 0.45270994305610657,
      "learning_rate": 8.22242575594842e-06,
      "loss": 0.0319,
      "step": 559540
    },
    {
      "epoch": 0.9157322126431139,
      "grad_norm": 1.033486008644104,
      "learning_rate": 8.222359863734903e-06,
      "loss": 0.0297,
      "step": 559560
    },
    {
      "epoch": 0.9157649430817671,
      "grad_norm": 0.5545026063919067,
      "learning_rate": 8.222293971521386e-06,
      "loss": 0.0152,
      "step": 559580
    },
    {
      "epoch": 0.9157976735204205,
      "grad_norm": 1.609550952911377,
      "learning_rate": 8.222228079307868e-06,
      "loss": 0.0292,
      "step": 559600
    },
    {
      "epoch": 0.9158304039590739,
      "grad_norm": 1.3614217042922974,
      "learning_rate": 8.222162187094352e-06,
      "loss": 0.0253,
      "step": 559620
    },
    {
      "epoch": 0.9158631343977272,
      "grad_norm": 0.569722592830658,
      "learning_rate": 8.222096294880835e-06,
      "loss": 0.0324,
      "step": 559640
    },
    {
      "epoch": 0.9158958648363805,
      "grad_norm": 0.32443732023239136,
      "learning_rate": 8.222030402667317e-06,
      "loss": 0.0199,
      "step": 559660
    },
    {
      "epoch": 0.9159285952750339,
      "grad_norm": 0.5675288438796997,
      "learning_rate": 8.221964510453801e-06,
      "loss": 0.034,
      "step": 559680
    },
    {
      "epoch": 0.9159613257136873,
      "grad_norm": 0.3569909632205963,
      "learning_rate": 8.221898618240283e-06,
      "loss": 0.0264,
      "step": 559700
    },
    {
      "epoch": 0.9159940561523405,
      "grad_norm": 0.5299392342567444,
      "learning_rate": 8.221832726026766e-06,
      "loss": 0.028,
      "step": 559720
    },
    {
      "epoch": 0.9160267865909939,
      "grad_norm": 1.0971487760543823,
      "learning_rate": 8.22176683381325e-06,
      "loss": 0.036,
      "step": 559740
    },
    {
      "epoch": 0.9160595170296473,
      "grad_norm": 3.1799254417419434,
      "learning_rate": 8.221700941599732e-06,
      "loss": 0.0296,
      "step": 559760
    },
    {
      "epoch": 0.9160922474683005,
      "grad_norm": 1.4779304265975952,
      "learning_rate": 8.221635049386215e-06,
      "loss": 0.0244,
      "step": 559780
    },
    {
      "epoch": 0.9161249779069539,
      "grad_norm": 0.6557939648628235,
      "learning_rate": 8.221569157172697e-06,
      "loss": 0.0294,
      "step": 559800
    },
    {
      "epoch": 0.9161577083456073,
      "grad_norm": 1.4663406610488892,
      "learning_rate": 8.221503264959181e-06,
      "loss": 0.025,
      "step": 559820
    },
    {
      "epoch": 0.9161904387842605,
      "grad_norm": 1.757954478263855,
      "learning_rate": 8.221437372745663e-06,
      "loss": 0.0299,
      "step": 559840
    },
    {
      "epoch": 0.9162231692229139,
      "grad_norm": 0.6906337141990662,
      "learning_rate": 8.221371480532146e-06,
      "loss": 0.0233,
      "step": 559860
    },
    {
      "epoch": 0.9162558996615673,
      "grad_norm": 0.22532612085342407,
      "learning_rate": 8.221305588318628e-06,
      "loss": 0.0216,
      "step": 559880
    },
    {
      "epoch": 0.9162886301002207,
      "grad_norm": 0.9311667084693909,
      "learning_rate": 8.221239696105112e-06,
      "loss": 0.0328,
      "step": 559900
    },
    {
      "epoch": 0.9163213605388739,
      "grad_norm": 0.4476010203361511,
      "learning_rate": 8.221173803891594e-06,
      "loss": 0.032,
      "step": 559920
    },
    {
      "epoch": 0.9163540909775273,
      "grad_norm": 1.153306007385254,
      "learning_rate": 8.221107911678077e-06,
      "loss": 0.0278,
      "step": 559940
    },
    {
      "epoch": 0.9163868214161807,
      "grad_norm": 0.7199722528457642,
      "learning_rate": 8.221042019464561e-06,
      "loss": 0.0397,
      "step": 559960
    },
    {
      "epoch": 0.9164195518548339,
      "grad_norm": 0.4974721372127533,
      "learning_rate": 8.220976127251043e-06,
      "loss": 0.0254,
      "step": 559980
    },
    {
      "epoch": 0.9164522822934873,
      "grad_norm": 1.177160620689392,
      "learning_rate": 8.220910235037526e-06,
      "loss": 0.0303,
      "step": 560000
    },
    {
      "epoch": 0.9164850127321407,
      "grad_norm": 0.47442182898521423,
      "learning_rate": 8.22084434282401e-06,
      "loss": 0.034,
      "step": 560020
    },
    {
      "epoch": 0.9165177431707939,
      "grad_norm": 0.2967958152294159,
      "learning_rate": 8.220778450610492e-06,
      "loss": 0.0321,
      "step": 560040
    },
    {
      "epoch": 0.9165504736094473,
      "grad_norm": 0.5480490326881409,
      "learning_rate": 8.220712558396975e-06,
      "loss": 0.0268,
      "step": 560060
    },
    {
      "epoch": 0.9165832040481007,
      "grad_norm": 0.7105197310447693,
      "learning_rate": 8.220646666183459e-06,
      "loss": 0.0183,
      "step": 560080
    },
    {
      "epoch": 0.9166159344867539,
      "grad_norm": 1.0208959579467773,
      "learning_rate": 8.220580773969941e-06,
      "loss": 0.0326,
      "step": 560100
    },
    {
      "epoch": 0.9166486649254073,
      "grad_norm": 0.9823235869407654,
      "learning_rate": 8.220514881756425e-06,
      "loss": 0.0206,
      "step": 560120
    },
    {
      "epoch": 0.9166813953640607,
      "grad_norm": 0.33558785915374756,
      "learning_rate": 8.220448989542906e-06,
      "loss": 0.0239,
      "step": 560140
    },
    {
      "epoch": 0.916714125802714,
      "grad_norm": 1.1703479290008545,
      "learning_rate": 8.22038309732939e-06,
      "loss": 0.0187,
      "step": 560160
    },
    {
      "epoch": 0.9167468562413673,
      "grad_norm": 1.5170989036560059,
      "learning_rate": 8.220317205115872e-06,
      "loss": 0.0203,
      "step": 560180
    },
    {
      "epoch": 0.9167795866800207,
      "grad_norm": 0.1644229292869568,
      "learning_rate": 8.220251312902356e-06,
      "loss": 0.0372,
      "step": 560200
    },
    {
      "epoch": 0.9168123171186741,
      "grad_norm": 1.2693722248077393,
      "learning_rate": 8.220185420688837e-06,
      "loss": 0.0282,
      "step": 560220
    },
    {
      "epoch": 0.9168450475573273,
      "grad_norm": 1.474850058555603,
      "learning_rate": 8.220119528475321e-06,
      "loss": 0.0303,
      "step": 560240
    },
    {
      "epoch": 0.9168777779959807,
      "grad_norm": 8.386054039001465,
      "learning_rate": 8.220053636261803e-06,
      "loss": 0.0318,
      "step": 560260
    },
    {
      "epoch": 0.9169105084346341,
      "grad_norm": 1.5564093589782715,
      "learning_rate": 8.219987744048286e-06,
      "loss": 0.0258,
      "step": 560280
    },
    {
      "epoch": 0.9169432388732873,
      "grad_norm": 2.2106423377990723,
      "learning_rate": 8.219921851834768e-06,
      "loss": 0.0296,
      "step": 560300
    },
    {
      "epoch": 0.9169759693119407,
      "grad_norm": 0.5123931169509888,
      "learning_rate": 8.219855959621252e-06,
      "loss": 0.0241,
      "step": 560320
    },
    {
      "epoch": 0.9170086997505941,
      "grad_norm": 0.42578503489494324,
      "learning_rate": 8.219790067407734e-06,
      "loss": 0.0337,
      "step": 560340
    },
    {
      "epoch": 0.9170414301892474,
      "grad_norm": 0.5591793060302734,
      "learning_rate": 8.219724175194217e-06,
      "loss": 0.0233,
      "step": 560360
    },
    {
      "epoch": 0.9170741606279007,
      "grad_norm": 1.2247614860534668,
      "learning_rate": 8.219658282980701e-06,
      "loss": 0.0314,
      "step": 560380
    },
    {
      "epoch": 0.9171068910665541,
      "grad_norm": 0.39026886224746704,
      "learning_rate": 8.219592390767183e-06,
      "loss": 0.0269,
      "step": 560400
    },
    {
      "epoch": 0.9171396215052074,
      "grad_norm": 1.552894115447998,
      "learning_rate": 8.219526498553666e-06,
      "loss": 0.0239,
      "step": 560420
    },
    {
      "epoch": 0.9171723519438607,
      "grad_norm": 1.3686882257461548,
      "learning_rate": 8.21946060634015e-06,
      "loss": 0.0257,
      "step": 560440
    },
    {
      "epoch": 0.9172050823825141,
      "grad_norm": 0.6066478490829468,
      "learning_rate": 8.219394714126632e-06,
      "loss": 0.0386,
      "step": 560460
    },
    {
      "epoch": 0.9172378128211675,
      "grad_norm": 0.33826765418052673,
      "learning_rate": 8.219328821913116e-06,
      "loss": 0.0259,
      "step": 560480
    },
    {
      "epoch": 0.9172705432598207,
      "grad_norm": 0.46427974104881287,
      "learning_rate": 8.219262929699599e-06,
      "loss": 0.0192,
      "step": 560500
    },
    {
      "epoch": 0.9173032736984741,
      "grad_norm": 0.43299058079719543,
      "learning_rate": 8.219197037486081e-06,
      "loss": 0.0347,
      "step": 560520
    },
    {
      "epoch": 0.9173360041371275,
      "grad_norm": 0.5626626014709473,
      "learning_rate": 8.219131145272565e-06,
      "loss": 0.0273,
      "step": 560540
    },
    {
      "epoch": 0.9173687345757808,
      "grad_norm": 1.7391082048416138,
      "learning_rate": 8.219065253059047e-06,
      "loss": 0.027,
      "step": 560560
    },
    {
      "epoch": 0.9174014650144341,
      "grad_norm": 2.7304348945617676,
      "learning_rate": 8.21899936084553e-06,
      "loss": 0.0414,
      "step": 560580
    },
    {
      "epoch": 0.9174341954530875,
      "grad_norm": 0.35526522994041443,
      "learning_rate": 8.218933468632012e-06,
      "loss": 0.0426,
      "step": 560600
    },
    {
      "epoch": 0.9174669258917408,
      "grad_norm": 0.6052660346031189,
      "learning_rate": 8.218867576418496e-06,
      "loss": 0.0295,
      "step": 560620
    },
    {
      "epoch": 0.9174996563303941,
      "grad_norm": 0.9229189157485962,
      "learning_rate": 8.218801684204977e-06,
      "loss": 0.0232,
      "step": 560640
    },
    {
      "epoch": 0.9175323867690475,
      "grad_norm": 2.3031578063964844,
      "learning_rate": 8.218735791991461e-06,
      "loss": 0.0261,
      "step": 560660
    },
    {
      "epoch": 0.9175651172077008,
      "grad_norm": 0.6312037110328674,
      "learning_rate": 8.218669899777943e-06,
      "loss": 0.0228,
      "step": 560680
    },
    {
      "epoch": 0.9175978476463541,
      "grad_norm": 0.3398519456386566,
      "learning_rate": 8.218604007564427e-06,
      "loss": 0.0323,
      "step": 560700
    },
    {
      "epoch": 0.9176305780850075,
      "grad_norm": 1.9132486581802368,
      "learning_rate": 8.218538115350908e-06,
      "loss": 0.0319,
      "step": 560720
    },
    {
      "epoch": 0.9176633085236608,
      "grad_norm": 0.4100879430770874,
      "learning_rate": 8.218472223137392e-06,
      "loss": 0.0274,
      "step": 560740
    },
    {
      "epoch": 0.9176960389623142,
      "grad_norm": 0.9032325148582458,
      "learning_rate": 8.218406330923876e-06,
      "loss": 0.0266,
      "step": 560760
    },
    {
      "epoch": 0.9177287694009675,
      "grad_norm": 0.6026409268379211,
      "learning_rate": 8.218340438710358e-06,
      "loss": 0.0225,
      "step": 560780
    },
    {
      "epoch": 0.9177614998396209,
      "grad_norm": 0.9527063369750977,
      "learning_rate": 8.218274546496841e-06,
      "loss": 0.0217,
      "step": 560800
    },
    {
      "epoch": 0.9177942302782742,
      "grad_norm": 1.036311149597168,
      "learning_rate": 8.218208654283325e-06,
      "loss": 0.0361,
      "step": 560820
    },
    {
      "epoch": 0.9178269607169275,
      "grad_norm": 1.0895761251449585,
      "learning_rate": 8.218142762069807e-06,
      "loss": 0.0439,
      "step": 560840
    },
    {
      "epoch": 0.9178596911555809,
      "grad_norm": 1.4248631000518799,
      "learning_rate": 8.21807686985629e-06,
      "loss": 0.0182,
      "step": 560860
    },
    {
      "epoch": 0.9178924215942342,
      "grad_norm": 0.8897237777709961,
      "learning_rate": 8.218010977642774e-06,
      "loss": 0.0278,
      "step": 560880
    },
    {
      "epoch": 0.9179251520328875,
      "grad_norm": 0.7363410592079163,
      "learning_rate": 8.217945085429256e-06,
      "loss": 0.0303,
      "step": 560900
    },
    {
      "epoch": 0.9179578824715409,
      "grad_norm": 0.9087110757827759,
      "learning_rate": 8.21787919321574e-06,
      "loss": 0.0323,
      "step": 560920
    },
    {
      "epoch": 0.9179906129101942,
      "grad_norm": 0.5640463829040527,
      "learning_rate": 8.217813301002221e-06,
      "loss": 0.0392,
      "step": 560940
    },
    {
      "epoch": 0.9180233433488476,
      "grad_norm": 0.5544646382331848,
      "learning_rate": 8.217747408788705e-06,
      "loss": 0.0275,
      "step": 560960
    },
    {
      "epoch": 0.9180560737875009,
      "grad_norm": 0.7245122194290161,
      "learning_rate": 8.217681516575187e-06,
      "loss": 0.0387,
      "step": 560980
    },
    {
      "epoch": 0.9180888042261542,
      "grad_norm": 0.6017231941223145,
      "learning_rate": 8.21761562436167e-06,
      "loss": 0.0306,
      "step": 561000
    },
    {
      "epoch": 0.9181215346648076,
      "grad_norm": 1.0251046419143677,
      "learning_rate": 8.217549732148152e-06,
      "loss": 0.0312,
      "step": 561020
    },
    {
      "epoch": 0.9181542651034609,
      "grad_norm": 0.7742783427238464,
      "learning_rate": 8.217483839934636e-06,
      "loss": 0.0233,
      "step": 561040
    },
    {
      "epoch": 0.9181869955421142,
      "grad_norm": 1.4293200969696045,
      "learning_rate": 8.217417947721118e-06,
      "loss": 0.0297,
      "step": 561060
    },
    {
      "epoch": 0.9182197259807676,
      "grad_norm": 0.5931880474090576,
      "learning_rate": 8.217352055507601e-06,
      "loss": 0.0335,
      "step": 561080
    },
    {
      "epoch": 0.9182524564194209,
      "grad_norm": 0.16959379613399506,
      "learning_rate": 8.217286163294083e-06,
      "loss": 0.0282,
      "step": 561100
    },
    {
      "epoch": 0.9182851868580743,
      "grad_norm": 0.7086927890777588,
      "learning_rate": 8.217220271080567e-06,
      "loss": 0.0445,
      "step": 561120
    },
    {
      "epoch": 0.9183179172967276,
      "grad_norm": 0.38179662823677063,
      "learning_rate": 8.21715437886705e-06,
      "loss": 0.0233,
      "step": 561140
    },
    {
      "epoch": 0.918350647735381,
      "grad_norm": 1.912876844406128,
      "learning_rate": 8.217088486653532e-06,
      "loss": 0.0189,
      "step": 561160
    },
    {
      "epoch": 0.9183833781740343,
      "grad_norm": 0.30417659878730774,
      "learning_rate": 8.217022594440016e-06,
      "loss": 0.0326,
      "step": 561180
    },
    {
      "epoch": 0.9184161086126876,
      "grad_norm": 0.5297319293022156,
      "learning_rate": 8.2169567022265e-06,
      "loss": 0.0385,
      "step": 561200
    },
    {
      "epoch": 0.918448839051341,
      "grad_norm": 1.128371238708496,
      "learning_rate": 8.216890810012981e-06,
      "loss": 0.0243,
      "step": 561220
    },
    {
      "epoch": 0.9184815694899943,
      "grad_norm": 7.54627799987793,
      "learning_rate": 8.216824917799465e-06,
      "loss": 0.0285,
      "step": 561240
    },
    {
      "epoch": 0.9185142999286476,
      "grad_norm": 0.48161548376083374,
      "learning_rate": 8.216759025585948e-06,
      "loss": 0.027,
      "step": 561260
    },
    {
      "epoch": 0.918547030367301,
      "grad_norm": 0.5066095590591431,
      "learning_rate": 8.21669313337243e-06,
      "loss": 0.0332,
      "step": 561280
    },
    {
      "epoch": 0.9185797608059543,
      "grad_norm": 0.6556078791618347,
      "learning_rate": 8.216627241158914e-06,
      "loss": 0.0293,
      "step": 561300
    },
    {
      "epoch": 0.9186124912446076,
      "grad_norm": 0.43320518732070923,
      "learning_rate": 8.216561348945396e-06,
      "loss": 0.0203,
      "step": 561320
    },
    {
      "epoch": 0.918645221683261,
      "grad_norm": 4.301849842071533,
      "learning_rate": 8.21649545673188e-06,
      "loss": 0.0327,
      "step": 561340
    },
    {
      "epoch": 0.9186779521219144,
      "grad_norm": 0.7231854200363159,
      "learning_rate": 8.216429564518361e-06,
      "loss": 0.0264,
      "step": 561360
    },
    {
      "epoch": 0.9187106825605676,
      "grad_norm": 0.806861400604248,
      "learning_rate": 8.216363672304845e-06,
      "loss": 0.0368,
      "step": 561380
    },
    {
      "epoch": 0.918743412999221,
      "grad_norm": 0.563024640083313,
      "learning_rate": 8.216297780091327e-06,
      "loss": 0.0207,
      "step": 561400
    },
    {
      "epoch": 0.9187761434378744,
      "grad_norm": 0.9135335087776184,
      "learning_rate": 8.21623188787781e-06,
      "loss": 0.0312,
      "step": 561420
    },
    {
      "epoch": 0.9188088738765277,
      "grad_norm": 0.5777857899665833,
      "learning_rate": 8.216165995664292e-06,
      "loss": 0.0332,
      "step": 561440
    },
    {
      "epoch": 0.918841604315181,
      "grad_norm": 0.5959970951080322,
      "learning_rate": 8.216100103450776e-06,
      "loss": 0.0445,
      "step": 561460
    },
    {
      "epoch": 0.9188743347538344,
      "grad_norm": 0.5698937773704529,
      "learning_rate": 8.21603421123726e-06,
      "loss": 0.0211,
      "step": 561480
    },
    {
      "epoch": 0.9189070651924877,
      "grad_norm": 3.6715166568756104,
      "learning_rate": 8.215968319023741e-06,
      "loss": 0.0208,
      "step": 561500
    },
    {
      "epoch": 0.918939795631141,
      "grad_norm": 0.6893078088760376,
      "learning_rate": 8.215902426810225e-06,
      "loss": 0.0236,
      "step": 561520
    },
    {
      "epoch": 0.9189725260697944,
      "grad_norm": 0.8212481141090393,
      "learning_rate": 8.215836534596707e-06,
      "loss": 0.0331,
      "step": 561540
    },
    {
      "epoch": 0.9190052565084478,
      "grad_norm": 1.0821455717086792,
      "learning_rate": 8.21577064238319e-06,
      "loss": 0.0188,
      "step": 561560
    },
    {
      "epoch": 0.919037986947101,
      "grad_norm": 2.8452539443969727,
      "learning_rate": 8.215704750169672e-06,
      "loss": 0.0366,
      "step": 561580
    },
    {
      "epoch": 0.9190707173857544,
      "grad_norm": 0.678216278553009,
      "learning_rate": 8.215638857956156e-06,
      "loss": 0.0243,
      "step": 561600
    },
    {
      "epoch": 0.9191034478244078,
      "grad_norm": 2.26745343208313,
      "learning_rate": 8.21557296574264e-06,
      "loss": 0.0216,
      "step": 561620
    },
    {
      "epoch": 0.919136178263061,
      "grad_norm": 2.8211653232574463,
      "learning_rate": 8.215507073529121e-06,
      "loss": 0.0289,
      "step": 561640
    },
    {
      "epoch": 0.9191689087017144,
      "grad_norm": 1.1581029891967773,
      "learning_rate": 8.215441181315605e-06,
      "loss": 0.0302,
      "step": 561660
    },
    {
      "epoch": 0.9192016391403678,
      "grad_norm": 2.014404296875,
      "learning_rate": 8.215375289102088e-06,
      "loss": 0.0363,
      "step": 561680
    },
    {
      "epoch": 0.919234369579021,
      "grad_norm": 0.857499361038208,
      "learning_rate": 8.21530939688857e-06,
      "loss": 0.0243,
      "step": 561700
    },
    {
      "epoch": 0.9192671000176744,
      "grad_norm": 0.6496660709381104,
      "learning_rate": 8.215243504675054e-06,
      "loss": 0.0329,
      "step": 561720
    },
    {
      "epoch": 0.9192998304563278,
      "grad_norm": 0.6377732157707214,
      "learning_rate": 8.215177612461536e-06,
      "loss": 0.0254,
      "step": 561740
    },
    {
      "epoch": 0.9193325608949812,
      "grad_norm": 1.7411301136016846,
      "learning_rate": 8.21511172024802e-06,
      "loss": 0.0263,
      "step": 561760
    },
    {
      "epoch": 0.9193652913336344,
      "grad_norm": 1.3298662900924683,
      "learning_rate": 8.215045828034501e-06,
      "loss": 0.0222,
      "step": 561780
    },
    {
      "epoch": 0.9193980217722878,
      "grad_norm": 0.5340608954429626,
      "learning_rate": 8.214979935820985e-06,
      "loss": 0.0244,
      "step": 561800
    },
    {
      "epoch": 0.9194307522109412,
      "grad_norm": 1.2248791456222534,
      "learning_rate": 8.214914043607467e-06,
      "loss": 0.0315,
      "step": 561820
    },
    {
      "epoch": 0.9194634826495944,
      "grad_norm": 0.461229145526886,
      "learning_rate": 8.21484815139395e-06,
      "loss": 0.027,
      "step": 561840
    },
    {
      "epoch": 0.9194962130882478,
      "grad_norm": 0.5386750102043152,
      "learning_rate": 8.214782259180434e-06,
      "loss": 0.0192,
      "step": 561860
    },
    {
      "epoch": 0.9195289435269012,
      "grad_norm": 0.9387890696525574,
      "learning_rate": 8.214716366966916e-06,
      "loss": 0.0201,
      "step": 561880
    },
    {
      "epoch": 0.9195616739655544,
      "grad_norm": 0.4243740141391754,
      "learning_rate": 8.2146504747534e-06,
      "loss": 0.0288,
      "step": 561900
    },
    {
      "epoch": 0.9195944044042078,
      "grad_norm": 0.6201169490814209,
      "learning_rate": 8.214584582539881e-06,
      "loss": 0.0258,
      "step": 561920
    },
    {
      "epoch": 0.9196271348428612,
      "grad_norm": 1.2552746534347534,
      "learning_rate": 8.214518690326365e-06,
      "loss": 0.0286,
      "step": 561940
    },
    {
      "epoch": 0.9196598652815146,
      "grad_norm": 1.2530375719070435,
      "learning_rate": 8.214452798112847e-06,
      "loss": 0.0241,
      "step": 561960
    },
    {
      "epoch": 0.9196925957201678,
      "grad_norm": 1.6208940744400024,
      "learning_rate": 8.21438690589933e-06,
      "loss": 0.0407,
      "step": 561980
    },
    {
      "epoch": 0.9197253261588212,
      "grad_norm": 0.39431217312812805,
      "learning_rate": 8.214321013685814e-06,
      "loss": 0.0206,
      "step": 562000
    },
    {
      "epoch": 0.9197580565974746,
      "grad_norm": 0.1783248782157898,
      "learning_rate": 8.214255121472296e-06,
      "loss": 0.0276,
      "step": 562020
    },
    {
      "epoch": 0.9197907870361278,
      "grad_norm": 0.49972018599510193,
      "learning_rate": 8.21418922925878e-06,
      "loss": 0.0246,
      "step": 562040
    },
    {
      "epoch": 0.9198235174747812,
      "grad_norm": 1.6029539108276367,
      "learning_rate": 8.214123337045263e-06,
      "loss": 0.021,
      "step": 562060
    },
    {
      "epoch": 0.9198562479134346,
      "grad_norm": 0.7550228834152222,
      "learning_rate": 8.214057444831745e-06,
      "loss": 0.0249,
      "step": 562080
    },
    {
      "epoch": 0.9198889783520878,
      "grad_norm": 0.46701058745384216,
      "learning_rate": 8.213991552618228e-06,
      "loss": 0.0234,
      "step": 562100
    },
    {
      "epoch": 0.9199217087907412,
      "grad_norm": 1.7637742757797241,
      "learning_rate": 8.21392566040471e-06,
      "loss": 0.0232,
      "step": 562120
    },
    {
      "epoch": 0.9199544392293946,
      "grad_norm": 0.4650477170944214,
      "learning_rate": 8.213859768191194e-06,
      "loss": 0.0228,
      "step": 562140
    },
    {
      "epoch": 0.9199871696680479,
      "grad_norm": 1.0215784311294556,
      "learning_rate": 8.213793875977676e-06,
      "loss": 0.0265,
      "step": 562160
    },
    {
      "epoch": 0.9200199001067012,
      "grad_norm": 2.3628432750701904,
      "learning_rate": 8.21372798376416e-06,
      "loss": 0.0263,
      "step": 562180
    },
    {
      "epoch": 0.9200526305453546,
      "grad_norm": 0.6732039451599121,
      "learning_rate": 8.213662091550643e-06,
      "loss": 0.0268,
      "step": 562200
    },
    {
      "epoch": 0.920085360984008,
      "grad_norm": 0.45539894700050354,
      "learning_rate": 8.213596199337125e-06,
      "loss": 0.0235,
      "step": 562220
    },
    {
      "epoch": 0.9201180914226612,
      "grad_norm": 0.25165343284606934,
      "learning_rate": 8.213530307123609e-06,
      "loss": 0.0281,
      "step": 562240
    },
    {
      "epoch": 0.9201508218613146,
      "grad_norm": 0.5881196856498718,
      "learning_rate": 8.21346441491009e-06,
      "loss": 0.0236,
      "step": 562260
    },
    {
      "epoch": 0.920183552299968,
      "grad_norm": 2.5165960788726807,
      "learning_rate": 8.213398522696574e-06,
      "loss": 0.0335,
      "step": 562280
    },
    {
      "epoch": 0.9202162827386212,
      "grad_norm": 1.5378116369247437,
      "learning_rate": 8.213332630483056e-06,
      "loss": 0.0254,
      "step": 562300
    },
    {
      "epoch": 0.9202490131772746,
      "grad_norm": 5.566072940826416,
      "learning_rate": 8.21326673826954e-06,
      "loss": 0.0338,
      "step": 562320
    },
    {
      "epoch": 0.920281743615928,
      "grad_norm": 2.536428928375244,
      "learning_rate": 8.213200846056021e-06,
      "loss": 0.0232,
      "step": 562340
    },
    {
      "epoch": 0.9203144740545813,
      "grad_norm": 0.16495391726493835,
      "learning_rate": 8.213134953842505e-06,
      "loss": 0.0255,
      "step": 562360
    },
    {
      "epoch": 0.9203472044932346,
      "grad_norm": 0.5751962661743164,
      "learning_rate": 8.213069061628987e-06,
      "loss": 0.0228,
      "step": 562380
    },
    {
      "epoch": 0.920379934931888,
      "grad_norm": 0.43024465441703796,
      "learning_rate": 8.21300316941547e-06,
      "loss": 0.0246,
      "step": 562400
    },
    {
      "epoch": 0.9204126653705413,
      "grad_norm": 0.842841625213623,
      "learning_rate": 8.212937277201954e-06,
      "loss": 0.0384,
      "step": 562420
    },
    {
      "epoch": 0.9204453958091946,
      "grad_norm": 1.0155471563339233,
      "learning_rate": 8.212871384988436e-06,
      "loss": 0.0206,
      "step": 562440
    },
    {
      "epoch": 0.920478126247848,
      "grad_norm": 1.4012919664382935,
      "learning_rate": 8.21280549277492e-06,
      "loss": 0.0294,
      "step": 562460
    },
    {
      "epoch": 0.9205108566865013,
      "grad_norm": 0.3201346695423126,
      "learning_rate": 8.212739600561403e-06,
      "loss": 0.0216,
      "step": 562480
    },
    {
      "epoch": 0.9205435871251546,
      "grad_norm": 0.4694725573062897,
      "learning_rate": 8.212673708347885e-06,
      "loss": 0.0165,
      "step": 562500
    },
    {
      "epoch": 0.920576317563808,
      "grad_norm": 1.1972423791885376,
      "learning_rate": 8.212607816134369e-06,
      "loss": 0.0293,
      "step": 562520
    },
    {
      "epoch": 0.9206090480024613,
      "grad_norm": 0.7590920329093933,
      "learning_rate": 8.212541923920852e-06,
      "loss": 0.0318,
      "step": 562540
    },
    {
      "epoch": 0.9206417784411147,
      "grad_norm": 0.4393557608127594,
      "learning_rate": 8.212476031707334e-06,
      "loss": 0.028,
      "step": 562560
    },
    {
      "epoch": 0.920674508879768,
      "grad_norm": 2.242853879928589,
      "learning_rate": 8.212410139493818e-06,
      "loss": 0.0386,
      "step": 562580
    },
    {
      "epoch": 0.9207072393184214,
      "grad_norm": 10.21412181854248,
      "learning_rate": 8.2123442472803e-06,
      "loss": 0.0326,
      "step": 562600
    },
    {
      "epoch": 0.9207399697570747,
      "grad_norm": 1.33782958984375,
      "learning_rate": 8.212278355066783e-06,
      "loss": 0.0317,
      "step": 562620
    },
    {
      "epoch": 0.920772700195728,
      "grad_norm": 1.8322415351867676,
      "learning_rate": 8.212212462853265e-06,
      "loss": 0.0204,
      "step": 562640
    },
    {
      "epoch": 0.9208054306343814,
      "grad_norm": 0.7685093283653259,
      "learning_rate": 8.212146570639749e-06,
      "loss": 0.0324,
      "step": 562660
    },
    {
      "epoch": 0.9208381610730347,
      "grad_norm": 0.9092811942100525,
      "learning_rate": 8.21208067842623e-06,
      "loss": 0.0237,
      "step": 562680
    },
    {
      "epoch": 0.920870891511688,
      "grad_norm": 0.15304480493068695,
      "learning_rate": 8.212014786212714e-06,
      "loss": 0.0263,
      "step": 562700
    },
    {
      "epoch": 0.9209036219503414,
      "grad_norm": 1.7476319074630737,
      "learning_rate": 8.211948893999196e-06,
      "loss": 0.0372,
      "step": 562720
    },
    {
      "epoch": 0.9209363523889947,
      "grad_norm": 4.170165538787842,
      "learning_rate": 8.21188300178568e-06,
      "loss": 0.026,
      "step": 562740
    },
    {
      "epoch": 0.9209690828276481,
      "grad_norm": 0.6580075621604919,
      "learning_rate": 8.211817109572161e-06,
      "loss": 0.0284,
      "step": 562760
    },
    {
      "epoch": 0.9210018132663014,
      "grad_norm": 1.4048898220062256,
      "learning_rate": 8.211751217358645e-06,
      "loss": 0.0259,
      "step": 562780
    },
    {
      "epoch": 0.9210345437049547,
      "grad_norm": 1.6759181022644043,
      "learning_rate": 8.211685325145129e-06,
      "loss": 0.038,
      "step": 562800
    },
    {
      "epoch": 0.9210672741436081,
      "grad_norm": 0.6797237396240234,
      "learning_rate": 8.21161943293161e-06,
      "loss": 0.0307,
      "step": 562820
    },
    {
      "epoch": 0.9211000045822614,
      "grad_norm": 0.19548995792865753,
      "learning_rate": 8.211553540718094e-06,
      "loss": 0.028,
      "step": 562840
    },
    {
      "epoch": 0.9211327350209148,
      "grad_norm": 1.1944248676300049,
      "learning_rate": 8.211487648504578e-06,
      "loss": 0.0266,
      "step": 562860
    },
    {
      "epoch": 0.9211654654595681,
      "grad_norm": 1.3941750526428223,
      "learning_rate": 8.21142175629106e-06,
      "loss": 0.0382,
      "step": 562880
    },
    {
      "epoch": 0.9211981958982214,
      "grad_norm": 1.038508415222168,
      "learning_rate": 8.211355864077543e-06,
      "loss": 0.0283,
      "step": 562900
    },
    {
      "epoch": 0.9212309263368748,
      "grad_norm": 0.3155822157859802,
      "learning_rate": 8.211289971864027e-06,
      "loss": 0.0343,
      "step": 562920
    },
    {
      "epoch": 0.9212636567755281,
      "grad_norm": 1.4302706718444824,
      "learning_rate": 8.211224079650509e-06,
      "loss": 0.0244,
      "step": 562940
    },
    {
      "epoch": 0.9212963872141814,
      "grad_norm": 0.5653213858604431,
      "learning_rate": 8.211158187436992e-06,
      "loss": 0.0382,
      "step": 562960
    },
    {
      "epoch": 0.9213291176528348,
      "grad_norm": 0.8553973436355591,
      "learning_rate": 8.211092295223474e-06,
      "loss": 0.0337,
      "step": 562980
    },
    {
      "epoch": 0.9213618480914881,
      "grad_norm": 0.3017207086086273,
      "learning_rate": 8.211026403009958e-06,
      "loss": 0.0309,
      "step": 563000
    },
    {
      "epoch": 0.9213945785301415,
      "grad_norm": 1.2836607694625854,
      "learning_rate": 8.21096051079644e-06,
      "loss": 0.037,
      "step": 563020
    },
    {
      "epoch": 0.9214273089687948,
      "grad_norm": 0.21290944516658783,
      "learning_rate": 8.210894618582923e-06,
      "loss": 0.0297,
      "step": 563040
    },
    {
      "epoch": 0.9214600394074481,
      "grad_norm": 0.2795339524745941,
      "learning_rate": 8.210828726369405e-06,
      "loss": 0.0239,
      "step": 563060
    },
    {
      "epoch": 0.9214927698461015,
      "grad_norm": 1.4361299276351929,
      "learning_rate": 8.210762834155889e-06,
      "loss": 0.0272,
      "step": 563080
    },
    {
      "epoch": 0.9215255002847548,
      "grad_norm": 1.1478368043899536,
      "learning_rate": 8.21069694194237e-06,
      "loss": 0.0214,
      "step": 563100
    },
    {
      "epoch": 0.9215582307234081,
      "grad_norm": 1.457595705986023,
      "learning_rate": 8.210631049728854e-06,
      "loss": 0.0344,
      "step": 563120
    },
    {
      "epoch": 0.9215909611620615,
      "grad_norm": 1.9166160821914673,
      "learning_rate": 8.210565157515336e-06,
      "loss": 0.027,
      "step": 563140
    },
    {
      "epoch": 0.9216236916007148,
      "grad_norm": 0.1332743912935257,
      "learning_rate": 8.21049926530182e-06,
      "loss": 0.0244,
      "step": 563160
    },
    {
      "epoch": 0.9216564220393682,
      "grad_norm": 0.44823893904685974,
      "learning_rate": 8.210433373088302e-06,
      "loss": 0.0176,
      "step": 563180
    },
    {
      "epoch": 0.9216891524780215,
      "grad_norm": 1.5725191831588745,
      "learning_rate": 8.210367480874785e-06,
      "loss": 0.0324,
      "step": 563200
    },
    {
      "epoch": 0.9217218829166749,
      "grad_norm": 0.5681530237197876,
      "learning_rate": 8.210301588661269e-06,
      "loss": 0.03,
      "step": 563220
    },
    {
      "epoch": 0.9217546133553282,
      "grad_norm": 0.9988436698913574,
      "learning_rate": 8.21023569644775e-06,
      "loss": 0.0282,
      "step": 563240
    },
    {
      "epoch": 0.9217873437939815,
      "grad_norm": 1.163382887840271,
      "learning_rate": 8.210169804234234e-06,
      "loss": 0.0352,
      "step": 563260
    },
    {
      "epoch": 0.9218200742326349,
      "grad_norm": 1.5132672786712646,
      "learning_rate": 8.210103912020718e-06,
      "loss": 0.0383,
      "step": 563280
    },
    {
      "epoch": 0.9218528046712882,
      "grad_norm": 0.7822005152702332,
      "learning_rate": 8.210038019807201e-06,
      "loss": 0.0333,
      "step": 563300
    },
    {
      "epoch": 0.9218855351099415,
      "grad_norm": 1.1070966720581055,
      "learning_rate": 8.209972127593683e-06,
      "loss": 0.025,
      "step": 563320
    },
    {
      "epoch": 0.9219182655485949,
      "grad_norm": 0.489111065864563,
      "learning_rate": 8.209906235380167e-06,
      "loss": 0.0332,
      "step": 563340
    },
    {
      "epoch": 0.9219509959872482,
      "grad_norm": 0.7893881797790527,
      "learning_rate": 8.209840343166649e-06,
      "loss": 0.0224,
      "step": 563360
    },
    {
      "epoch": 0.9219837264259015,
      "grad_norm": 0.520380437374115,
      "learning_rate": 8.209774450953132e-06,
      "loss": 0.0205,
      "step": 563380
    },
    {
      "epoch": 0.9220164568645549,
      "grad_norm": 2.2284328937530518,
      "learning_rate": 8.209708558739614e-06,
      "loss": 0.0355,
      "step": 563400
    },
    {
      "epoch": 0.9220491873032083,
      "grad_norm": 1.7004332542419434,
      "learning_rate": 8.209642666526098e-06,
      "loss": 0.0376,
      "step": 563420
    },
    {
      "epoch": 0.9220819177418615,
      "grad_norm": 1.0979983806610107,
      "learning_rate": 8.20957677431258e-06,
      "loss": 0.0399,
      "step": 563440
    },
    {
      "epoch": 0.9221146481805149,
      "grad_norm": 1.2933822870254517,
      "learning_rate": 8.209510882099063e-06,
      "loss": 0.047,
      "step": 563460
    },
    {
      "epoch": 0.9221473786191683,
      "grad_norm": 1.2942289113998413,
      "learning_rate": 8.209444989885545e-06,
      "loss": 0.03,
      "step": 563480
    },
    {
      "epoch": 0.9221801090578216,
      "grad_norm": 1.7499293088912964,
      "learning_rate": 8.209379097672029e-06,
      "loss": 0.0258,
      "step": 563500
    },
    {
      "epoch": 0.9222128394964749,
      "grad_norm": 0.3939698338508606,
      "learning_rate": 8.20931320545851e-06,
      "loss": 0.0273,
      "step": 563520
    },
    {
      "epoch": 0.9222455699351283,
      "grad_norm": 0.9540333151817322,
      "learning_rate": 8.209247313244994e-06,
      "loss": 0.0256,
      "step": 563540
    },
    {
      "epoch": 0.9222783003737816,
      "grad_norm": 0.2552799880504608,
      "learning_rate": 8.209181421031476e-06,
      "loss": 0.0267,
      "step": 563560
    },
    {
      "epoch": 0.9223110308124349,
      "grad_norm": 0.3073217272758484,
      "learning_rate": 8.20911552881796e-06,
      "loss": 0.03,
      "step": 563580
    },
    {
      "epoch": 0.9223437612510883,
      "grad_norm": 3.178797960281372,
      "learning_rate": 8.209049636604443e-06,
      "loss": 0.0337,
      "step": 563600
    },
    {
      "epoch": 0.9223764916897417,
      "grad_norm": 2.4559829235076904,
      "learning_rate": 8.208983744390925e-06,
      "loss": 0.03,
      "step": 563620
    },
    {
      "epoch": 0.9224092221283949,
      "grad_norm": 1.4445675611495972,
      "learning_rate": 8.208917852177409e-06,
      "loss": 0.037,
      "step": 563640
    },
    {
      "epoch": 0.9224419525670483,
      "grad_norm": 1.4337928295135498,
      "learning_rate": 8.208851959963892e-06,
      "loss": 0.0236,
      "step": 563660
    },
    {
      "epoch": 0.9224746830057017,
      "grad_norm": 1.2875993251800537,
      "learning_rate": 8.208786067750374e-06,
      "loss": 0.0253,
      "step": 563680
    },
    {
      "epoch": 0.9225074134443549,
      "grad_norm": 1.2538875341415405,
      "learning_rate": 8.208720175536858e-06,
      "loss": 0.0285,
      "step": 563700
    },
    {
      "epoch": 0.9225401438830083,
      "grad_norm": 0.3947708010673523,
      "learning_rate": 8.208654283323341e-06,
      "loss": 0.0294,
      "step": 563720
    },
    {
      "epoch": 0.9225728743216617,
      "grad_norm": 1.6185561418533325,
      "learning_rate": 8.208588391109823e-06,
      "loss": 0.0241,
      "step": 563740
    },
    {
      "epoch": 0.922605604760315,
      "grad_norm": 0.5572506785392761,
      "learning_rate": 8.208522498896307e-06,
      "loss": 0.0272,
      "step": 563760
    },
    {
      "epoch": 0.9226383351989683,
      "grad_norm": 0.5733077526092529,
      "learning_rate": 8.208456606682789e-06,
      "loss": 0.0347,
      "step": 563780
    },
    {
      "epoch": 0.9226710656376217,
      "grad_norm": 0.3760681748390198,
      "learning_rate": 8.208390714469272e-06,
      "loss": 0.0249,
      "step": 563800
    },
    {
      "epoch": 0.9227037960762751,
      "grad_norm": 0.8651511669158936,
      "learning_rate": 8.208324822255754e-06,
      "loss": 0.0212,
      "step": 563820
    },
    {
      "epoch": 0.9227365265149283,
      "grad_norm": 0.5845814347267151,
      "learning_rate": 8.208258930042238e-06,
      "loss": 0.0236,
      "step": 563840
    },
    {
      "epoch": 0.9227692569535817,
      "grad_norm": 1.0001970529556274,
      "learning_rate": 8.20819303782872e-06,
      "loss": 0.0294,
      "step": 563860
    },
    {
      "epoch": 0.9228019873922351,
      "grad_norm": 2.207267999649048,
      "learning_rate": 8.208127145615203e-06,
      "loss": 0.0345,
      "step": 563880
    },
    {
      "epoch": 0.9228347178308883,
      "grad_norm": 0.800439178943634,
      "learning_rate": 8.208061253401685e-06,
      "loss": 0.027,
      "step": 563900
    },
    {
      "epoch": 0.9228674482695417,
      "grad_norm": 2.9069056510925293,
      "learning_rate": 8.207995361188169e-06,
      "loss": 0.0265,
      "step": 563920
    },
    {
      "epoch": 0.9229001787081951,
      "grad_norm": 0.5132282972335815,
      "learning_rate": 8.207929468974652e-06,
      "loss": 0.0284,
      "step": 563940
    },
    {
      "epoch": 0.9229329091468483,
      "grad_norm": 0.7740107774734497,
      "learning_rate": 8.207863576761134e-06,
      "loss": 0.0314,
      "step": 563960
    },
    {
      "epoch": 0.9229656395855017,
      "grad_norm": 1.13473641872406,
      "learning_rate": 8.207797684547618e-06,
      "loss": 0.0294,
      "step": 563980
    },
    {
      "epoch": 0.9229983700241551,
      "grad_norm": 0.6855960488319397,
      "learning_rate": 8.2077317923341e-06,
      "loss": 0.0317,
      "step": 564000
    },
    {
      "epoch": 0.9230311004628085,
      "grad_norm": 0.19822004437446594,
      "learning_rate": 8.207665900120583e-06,
      "loss": 0.0336,
      "step": 564020
    },
    {
      "epoch": 0.9230638309014617,
      "grad_norm": 0.24210022389888763,
      "learning_rate": 8.207600007907067e-06,
      "loss": 0.0236,
      "step": 564040
    },
    {
      "epoch": 0.9230965613401151,
      "grad_norm": 0.8693530559539795,
      "learning_rate": 8.207534115693549e-06,
      "loss": 0.0267,
      "step": 564060
    },
    {
      "epoch": 0.9231292917787685,
      "grad_norm": 1.4513529539108276,
      "learning_rate": 8.207468223480032e-06,
      "loss": 0.0297,
      "step": 564080
    },
    {
      "epoch": 0.9231620222174217,
      "grad_norm": 0.5666004419326782,
      "learning_rate": 8.207402331266516e-06,
      "loss": 0.0223,
      "step": 564100
    },
    {
      "epoch": 0.9231947526560751,
      "grad_norm": 0.2605530023574829,
      "learning_rate": 8.207336439052998e-06,
      "loss": 0.0299,
      "step": 564120
    },
    {
      "epoch": 0.9232274830947285,
      "grad_norm": 1.5890591144561768,
      "learning_rate": 8.207270546839481e-06,
      "loss": 0.0267,
      "step": 564140
    },
    {
      "epoch": 0.9232602135333817,
      "grad_norm": 2.5473594665527344,
      "learning_rate": 8.207204654625963e-06,
      "loss": 0.0293,
      "step": 564160
    },
    {
      "epoch": 0.9232929439720351,
      "grad_norm": 1.442905068397522,
      "learning_rate": 8.207138762412447e-06,
      "loss": 0.0292,
      "step": 564180
    },
    {
      "epoch": 0.9233256744106885,
      "grad_norm": 0.9903773665428162,
      "learning_rate": 8.207072870198929e-06,
      "loss": 0.026,
      "step": 564200
    },
    {
      "epoch": 0.9233584048493418,
      "grad_norm": 0.8046498894691467,
      "learning_rate": 8.207006977985412e-06,
      "loss": 0.0415,
      "step": 564220
    },
    {
      "epoch": 0.9233911352879951,
      "grad_norm": 0.6912850737571716,
      "learning_rate": 8.206941085771894e-06,
      "loss": 0.0206,
      "step": 564240
    },
    {
      "epoch": 0.9234238657266485,
      "grad_norm": 1.5409189462661743,
      "learning_rate": 8.206875193558378e-06,
      "loss": 0.023,
      "step": 564260
    },
    {
      "epoch": 0.9234565961653018,
      "grad_norm": 0.8973252773284912,
      "learning_rate": 8.20680930134486e-06,
      "loss": 0.0268,
      "step": 564280
    },
    {
      "epoch": 0.9234893266039551,
      "grad_norm": 0.24051529169082642,
      "learning_rate": 8.206743409131343e-06,
      "loss": 0.0302,
      "step": 564300
    },
    {
      "epoch": 0.9235220570426085,
      "grad_norm": 0.4292464256286621,
      "learning_rate": 8.206677516917827e-06,
      "loss": 0.0229,
      "step": 564320
    },
    {
      "epoch": 0.9235547874812619,
      "grad_norm": 0.9517582058906555,
      "learning_rate": 8.206611624704309e-06,
      "loss": 0.0224,
      "step": 564340
    },
    {
      "epoch": 0.9235875179199151,
      "grad_norm": 0.5720941424369812,
      "learning_rate": 8.206545732490792e-06,
      "loss": 0.0213,
      "step": 564360
    },
    {
      "epoch": 0.9236202483585685,
      "grad_norm": 0.483419805765152,
      "learning_rate": 8.206479840277274e-06,
      "loss": 0.0231,
      "step": 564380
    },
    {
      "epoch": 0.9236529787972219,
      "grad_norm": 1.464349627494812,
      "learning_rate": 8.206413948063758e-06,
      "loss": 0.0309,
      "step": 564400
    },
    {
      "epoch": 0.9236857092358752,
      "grad_norm": 1.013956069946289,
      "learning_rate": 8.20634805585024e-06,
      "loss": 0.0311,
      "step": 564420
    },
    {
      "epoch": 0.9237184396745285,
      "grad_norm": 0.6608287692070007,
      "learning_rate": 8.206282163636723e-06,
      "loss": 0.0218,
      "step": 564440
    },
    {
      "epoch": 0.9237511701131819,
      "grad_norm": 0.6096571683883667,
      "learning_rate": 8.206216271423207e-06,
      "loss": 0.0427,
      "step": 564460
    },
    {
      "epoch": 0.9237839005518352,
      "grad_norm": 0.5147418975830078,
      "learning_rate": 8.206150379209689e-06,
      "loss": 0.0217,
      "step": 564480
    },
    {
      "epoch": 0.9238166309904885,
      "grad_norm": 0.26172080636024475,
      "learning_rate": 8.206084486996173e-06,
      "loss": 0.0278,
      "step": 564500
    },
    {
      "epoch": 0.9238493614291419,
      "grad_norm": 0.22978976368904114,
      "learning_rate": 8.206018594782656e-06,
      "loss": 0.0266,
      "step": 564520
    },
    {
      "epoch": 0.9238820918677952,
      "grad_norm": 1.0085593461990356,
      "learning_rate": 8.205952702569138e-06,
      "loss": 0.0249,
      "step": 564540
    },
    {
      "epoch": 0.9239148223064485,
      "grad_norm": 0.15949708223342896,
      "learning_rate": 8.205886810355622e-06,
      "loss": 0.0311,
      "step": 564560
    },
    {
      "epoch": 0.9239475527451019,
      "grad_norm": 0.400213360786438,
      "learning_rate": 8.205820918142103e-06,
      "loss": 0.0264,
      "step": 564580
    },
    {
      "epoch": 0.9239802831837552,
      "grad_norm": 19.07670783996582,
      "learning_rate": 8.205755025928587e-06,
      "loss": 0.0306,
      "step": 564600
    },
    {
      "epoch": 0.9240130136224086,
      "grad_norm": 0.9643270373344421,
      "learning_rate": 8.205689133715069e-06,
      "loss": 0.0374,
      "step": 564620
    },
    {
      "epoch": 0.9240457440610619,
      "grad_norm": 0.7105797529220581,
      "learning_rate": 8.205623241501553e-06,
      "loss": 0.0289,
      "step": 564640
    },
    {
      "epoch": 0.9240784744997153,
      "grad_norm": 0.4152413308620453,
      "learning_rate": 8.205557349288036e-06,
      "loss": 0.0276,
      "step": 564660
    },
    {
      "epoch": 0.9241112049383686,
      "grad_norm": 0.3111608326435089,
      "learning_rate": 8.205491457074518e-06,
      "loss": 0.0245,
      "step": 564680
    },
    {
      "epoch": 0.9241439353770219,
      "grad_norm": 0.669001042842865,
      "learning_rate": 8.205425564861002e-06,
      "loss": 0.0313,
      "step": 564700
    },
    {
      "epoch": 0.9241766658156753,
      "grad_norm": 0.4192931652069092,
      "learning_rate": 8.205359672647483e-06,
      "loss": 0.0234,
      "step": 564720
    },
    {
      "epoch": 0.9242093962543286,
      "grad_norm": 0.17310737073421478,
      "learning_rate": 8.205293780433967e-06,
      "loss": 0.0303,
      "step": 564740
    },
    {
      "epoch": 0.9242421266929819,
      "grad_norm": 2.866022825241089,
      "learning_rate": 8.205227888220449e-06,
      "loss": 0.034,
      "step": 564760
    },
    {
      "epoch": 0.9242748571316353,
      "grad_norm": 0.3047448992729187,
      "learning_rate": 8.205161996006933e-06,
      "loss": 0.0332,
      "step": 564780
    },
    {
      "epoch": 0.9243075875702886,
      "grad_norm": 0.7855876088142395,
      "learning_rate": 8.205096103793414e-06,
      "loss": 0.0255,
      "step": 564800
    },
    {
      "epoch": 0.924340318008942,
      "grad_norm": 0.4491584599018097,
      "learning_rate": 8.205030211579898e-06,
      "loss": 0.028,
      "step": 564820
    },
    {
      "epoch": 0.9243730484475953,
      "grad_norm": 1.2417752742767334,
      "learning_rate": 8.204964319366382e-06,
      "loss": 0.0329,
      "step": 564840
    },
    {
      "epoch": 0.9244057788862486,
      "grad_norm": 1.828262448310852,
      "learning_rate": 8.204898427152864e-06,
      "loss": 0.0309,
      "step": 564860
    },
    {
      "epoch": 0.924438509324902,
      "grad_norm": 1.0221911668777466,
      "learning_rate": 8.204832534939347e-06,
      "loss": 0.0285,
      "step": 564880
    },
    {
      "epoch": 0.9244712397635553,
      "grad_norm": 0.5885097980499268,
      "learning_rate": 8.20476664272583e-06,
      "loss": 0.0327,
      "step": 564900
    },
    {
      "epoch": 0.9245039702022086,
      "grad_norm": 3.2831766605377197,
      "learning_rate": 8.204700750512313e-06,
      "loss": 0.0374,
      "step": 564920
    },
    {
      "epoch": 0.924536700640862,
      "grad_norm": 0.40203213691711426,
      "learning_rate": 8.204634858298796e-06,
      "loss": 0.0345,
      "step": 564940
    },
    {
      "epoch": 0.9245694310795153,
      "grad_norm": 0.5552248954772949,
      "learning_rate": 8.204568966085278e-06,
      "loss": 0.0224,
      "step": 564960
    },
    {
      "epoch": 0.9246021615181687,
      "grad_norm": 0.7705329656600952,
      "learning_rate": 8.204503073871762e-06,
      "loss": 0.0257,
      "step": 564980
    },
    {
      "epoch": 0.924634891956822,
      "grad_norm": 1.227203369140625,
      "learning_rate": 8.204437181658245e-06,
      "loss": 0.0324,
      "step": 565000
    },
    {
      "epoch": 0.9246676223954754,
      "grad_norm": 1.061838984489441,
      "learning_rate": 8.204371289444727e-06,
      "loss": 0.028,
      "step": 565020
    },
    {
      "epoch": 0.9247003528341287,
      "grad_norm": 1.3685287237167358,
      "learning_rate": 8.20430539723121e-06,
      "loss": 0.0337,
      "step": 565040
    },
    {
      "epoch": 0.924733083272782,
      "grad_norm": 0.282112181186676,
      "learning_rate": 8.204239505017693e-06,
      "loss": 0.0239,
      "step": 565060
    },
    {
      "epoch": 0.9247658137114354,
      "grad_norm": 0.5840075016021729,
      "learning_rate": 8.204173612804176e-06,
      "loss": 0.0354,
      "step": 565080
    },
    {
      "epoch": 0.9247985441500887,
      "grad_norm": 0.17405381798744202,
      "learning_rate": 8.204107720590658e-06,
      "loss": 0.0325,
      "step": 565100
    },
    {
      "epoch": 0.924831274588742,
      "grad_norm": 0.88495272397995,
      "learning_rate": 8.204041828377142e-06,
      "loss": 0.0368,
      "step": 565120
    },
    {
      "epoch": 0.9248640050273954,
      "grad_norm": 1.7741190195083618,
      "learning_rate": 8.203975936163624e-06,
      "loss": 0.034,
      "step": 565140
    },
    {
      "epoch": 0.9248967354660487,
      "grad_norm": 0.2914626896381378,
      "learning_rate": 8.203910043950107e-06,
      "loss": 0.0312,
      "step": 565160
    },
    {
      "epoch": 0.924929465904702,
      "grad_norm": 1.1682171821594238,
      "learning_rate": 8.203844151736589e-06,
      "loss": 0.0306,
      "step": 565180
    },
    {
      "epoch": 0.9249621963433554,
      "grad_norm": 0.5425290465354919,
      "learning_rate": 8.203778259523073e-06,
      "loss": 0.0214,
      "step": 565200
    },
    {
      "epoch": 0.9249949267820088,
      "grad_norm": 0.22731173038482666,
      "learning_rate": 8.203712367309555e-06,
      "loss": 0.0252,
      "step": 565220
    },
    {
      "epoch": 0.925027657220662,
      "grad_norm": 0.8761880993843079,
      "learning_rate": 8.203646475096038e-06,
      "loss": 0.0315,
      "step": 565240
    },
    {
      "epoch": 0.9250603876593154,
      "grad_norm": 1.391910433769226,
      "learning_rate": 8.203580582882522e-06,
      "loss": 0.0273,
      "step": 565260
    },
    {
      "epoch": 0.9250931180979688,
      "grad_norm": 0.5751426815986633,
      "learning_rate": 8.203514690669004e-06,
      "loss": 0.0241,
      "step": 565280
    },
    {
      "epoch": 0.925125848536622,
      "grad_norm": 1.1698461771011353,
      "learning_rate": 8.203448798455487e-06,
      "loss": 0.0297,
      "step": 565300
    },
    {
      "epoch": 0.9251585789752754,
      "grad_norm": 0.4105185866355896,
      "learning_rate": 8.20338290624197e-06,
      "loss": 0.0354,
      "step": 565320
    },
    {
      "epoch": 0.9251913094139288,
      "grad_norm": 1.1928478479385376,
      "learning_rate": 8.203317014028453e-06,
      "loss": 0.0346,
      "step": 565340
    },
    {
      "epoch": 0.9252240398525821,
      "grad_norm": 1.1316171884536743,
      "learning_rate": 8.203251121814936e-06,
      "loss": 0.0287,
      "step": 565360
    },
    {
      "epoch": 0.9252567702912354,
      "grad_norm": 1.1527469158172607,
      "learning_rate": 8.20318522960142e-06,
      "loss": 0.0297,
      "step": 565380
    },
    {
      "epoch": 0.9252895007298888,
      "grad_norm": 1.1538196802139282,
      "learning_rate": 8.203119337387902e-06,
      "loss": 0.0332,
      "step": 565400
    },
    {
      "epoch": 0.9253222311685422,
      "grad_norm": 0.7770662307739258,
      "learning_rate": 8.203053445174385e-06,
      "loss": 0.0309,
      "step": 565420
    },
    {
      "epoch": 0.9253549616071954,
      "grad_norm": 1.3339284658432007,
      "learning_rate": 8.202987552960867e-06,
      "loss": 0.0347,
      "step": 565440
    },
    {
      "epoch": 0.9253876920458488,
      "grad_norm": 0.20095942914485931,
      "learning_rate": 8.20292166074735e-06,
      "loss": 0.0195,
      "step": 565460
    },
    {
      "epoch": 0.9254204224845022,
      "grad_norm": 3.4721322059631348,
      "learning_rate": 8.202855768533833e-06,
      "loss": 0.0247,
      "step": 565480
    },
    {
      "epoch": 0.9254531529231554,
      "grad_norm": 0.6898736953735352,
      "learning_rate": 8.202789876320316e-06,
      "loss": 0.0236,
      "step": 565500
    },
    {
      "epoch": 0.9254858833618088,
      "grad_norm": 2.884223461151123,
      "learning_rate": 8.202723984106798e-06,
      "loss": 0.0231,
      "step": 565520
    },
    {
      "epoch": 0.9255186138004622,
      "grad_norm": 0.3275693356990814,
      "learning_rate": 8.202658091893282e-06,
      "loss": 0.0226,
      "step": 565540
    },
    {
      "epoch": 0.9255513442391154,
      "grad_norm": 0.65440434217453,
      "learning_rate": 8.202592199679764e-06,
      "loss": 0.0326,
      "step": 565560
    },
    {
      "epoch": 0.9255840746777688,
      "grad_norm": 0.5679174065589905,
      "learning_rate": 8.202526307466247e-06,
      "loss": 0.0322,
      "step": 565580
    },
    {
      "epoch": 0.9256168051164222,
      "grad_norm": 1.604383111000061,
      "learning_rate": 8.202460415252729e-06,
      "loss": 0.018,
      "step": 565600
    },
    {
      "epoch": 0.9256495355550756,
      "grad_norm": 2.2359204292297363,
      "learning_rate": 8.202394523039213e-06,
      "loss": 0.0271,
      "step": 565620
    },
    {
      "epoch": 0.9256822659937288,
      "grad_norm": 0.11043231934309006,
      "learning_rate": 8.202328630825696e-06,
      "loss": 0.0211,
      "step": 565640
    },
    {
      "epoch": 0.9257149964323822,
      "grad_norm": 2.724168062210083,
      "learning_rate": 8.202262738612178e-06,
      "loss": 0.0306,
      "step": 565660
    },
    {
      "epoch": 0.9257477268710356,
      "grad_norm": 1.5519883632659912,
      "learning_rate": 8.202196846398662e-06,
      "loss": 0.0395,
      "step": 565680
    },
    {
      "epoch": 0.9257804573096888,
      "grad_norm": 1.9893074035644531,
      "learning_rate": 8.202130954185145e-06,
      "loss": 0.0306,
      "step": 565700
    },
    {
      "epoch": 0.9258131877483422,
      "grad_norm": 0.46746551990509033,
      "learning_rate": 8.202065061971627e-06,
      "loss": 0.0336,
      "step": 565720
    },
    {
      "epoch": 0.9258459181869956,
      "grad_norm": 0.7337107062339783,
      "learning_rate": 8.20199916975811e-06,
      "loss": 0.0272,
      "step": 565740
    },
    {
      "epoch": 0.9258786486256488,
      "grad_norm": 1.332737922668457,
      "learning_rate": 8.201933277544594e-06,
      "loss": 0.0281,
      "step": 565760
    },
    {
      "epoch": 0.9259113790643022,
      "grad_norm": 0.32907938957214355,
      "learning_rate": 8.201867385331076e-06,
      "loss": 0.0323,
      "step": 565780
    },
    {
      "epoch": 0.9259441095029556,
      "grad_norm": 0.7754407525062561,
      "learning_rate": 8.20180149311756e-06,
      "loss": 0.0226,
      "step": 565800
    },
    {
      "epoch": 0.9259768399416088,
      "grad_norm": 1.188580870628357,
      "learning_rate": 8.201735600904042e-06,
      "loss": 0.0163,
      "step": 565820
    },
    {
      "epoch": 0.9260095703802622,
      "grad_norm": 1.7106462717056274,
      "learning_rate": 8.201669708690525e-06,
      "loss": 0.0272,
      "step": 565840
    },
    {
      "epoch": 0.9260423008189156,
      "grad_norm": 0.9815688133239746,
      "learning_rate": 8.201603816477007e-06,
      "loss": 0.0273,
      "step": 565860
    },
    {
      "epoch": 0.926075031257569,
      "grad_norm": 0.4137115776538849,
      "learning_rate": 8.201537924263491e-06,
      "loss": 0.0339,
      "step": 565880
    },
    {
      "epoch": 0.9261077616962222,
      "grad_norm": 0.8213108777999878,
      "learning_rate": 8.201472032049973e-06,
      "loss": 0.0274,
      "step": 565900
    },
    {
      "epoch": 0.9261404921348756,
      "grad_norm": 1.3802891969680786,
      "learning_rate": 8.201406139836456e-06,
      "loss": 0.0282,
      "step": 565920
    },
    {
      "epoch": 0.926173222573529,
      "grad_norm": 1.7107760906219482,
      "learning_rate": 8.201340247622938e-06,
      "loss": 0.0412,
      "step": 565940
    },
    {
      "epoch": 0.9262059530121822,
      "grad_norm": 0.3558506667613983,
      "learning_rate": 8.201274355409422e-06,
      "loss": 0.0339,
      "step": 565960
    },
    {
      "epoch": 0.9262386834508356,
      "grad_norm": 0.7194328308105469,
      "learning_rate": 8.201208463195904e-06,
      "loss": 0.0275,
      "step": 565980
    },
    {
      "epoch": 0.926271413889489,
      "grad_norm": 0.6788367033004761,
      "learning_rate": 8.201142570982387e-06,
      "loss": 0.029,
      "step": 566000
    },
    {
      "epoch": 0.9263041443281422,
      "grad_norm": 1.1580007076263428,
      "learning_rate": 8.20107667876887e-06,
      "loss": 0.0245,
      "step": 566020
    },
    {
      "epoch": 0.9263368747667956,
      "grad_norm": 0.8137844800949097,
      "learning_rate": 8.201010786555353e-06,
      "loss": 0.0294,
      "step": 566040
    },
    {
      "epoch": 0.926369605205449,
      "grad_norm": 1.204506516456604,
      "learning_rate": 8.200944894341836e-06,
      "loss": 0.0318,
      "step": 566060
    },
    {
      "epoch": 0.9264023356441023,
      "grad_norm": 0.6538791060447693,
      "learning_rate": 8.20087900212832e-06,
      "loss": 0.0437,
      "step": 566080
    },
    {
      "epoch": 0.9264350660827556,
      "grad_norm": 1.6798219680786133,
      "learning_rate": 8.200813109914802e-06,
      "loss": 0.0323,
      "step": 566100
    },
    {
      "epoch": 0.926467796521409,
      "grad_norm": 0.1723124086856842,
      "learning_rate": 8.200747217701285e-06,
      "loss": 0.0347,
      "step": 566120
    },
    {
      "epoch": 0.9265005269600624,
      "grad_norm": 1.310183048248291,
      "learning_rate": 8.200681325487769e-06,
      "loss": 0.0242,
      "step": 566140
    },
    {
      "epoch": 0.9265332573987156,
      "grad_norm": 1.316111445426941,
      "learning_rate": 8.200615433274251e-06,
      "loss": 0.0223,
      "step": 566160
    },
    {
      "epoch": 0.926565987837369,
      "grad_norm": 0.761748194694519,
      "learning_rate": 8.200549541060735e-06,
      "loss": 0.0336,
      "step": 566180
    },
    {
      "epoch": 0.9265987182760224,
      "grad_norm": 0.2380804568529129,
      "learning_rate": 8.200483648847216e-06,
      "loss": 0.0296,
      "step": 566200
    },
    {
      "epoch": 0.9266314487146756,
      "grad_norm": 0.5098458528518677,
      "learning_rate": 8.2004177566337e-06,
      "loss": 0.0196,
      "step": 566220
    },
    {
      "epoch": 0.926664179153329,
      "grad_norm": 0.46148407459259033,
      "learning_rate": 8.200351864420182e-06,
      "loss": 0.0263,
      "step": 566240
    },
    {
      "epoch": 0.9266969095919824,
      "grad_norm": 0.4172861874103546,
      "learning_rate": 8.200285972206665e-06,
      "loss": 0.0164,
      "step": 566260
    },
    {
      "epoch": 0.9267296400306357,
      "grad_norm": 4.133512020111084,
      "learning_rate": 8.200220079993147e-06,
      "loss": 0.03,
      "step": 566280
    },
    {
      "epoch": 0.926762370469289,
      "grad_norm": 0.277261346578598,
      "learning_rate": 8.200154187779631e-06,
      "loss": 0.0265,
      "step": 566300
    },
    {
      "epoch": 0.9267951009079424,
      "grad_norm": 1.3858604431152344,
      "learning_rate": 8.200088295566113e-06,
      "loss": 0.0337,
      "step": 566320
    },
    {
      "epoch": 0.9268278313465957,
      "grad_norm": 0.3510752022266388,
      "learning_rate": 8.200022403352596e-06,
      "loss": 0.0393,
      "step": 566340
    },
    {
      "epoch": 0.926860561785249,
      "grad_norm": 1.0215425491333008,
      "learning_rate": 8.199956511139078e-06,
      "loss": 0.0335,
      "step": 566360
    },
    {
      "epoch": 0.9268932922239024,
      "grad_norm": 0.15020957589149475,
      "learning_rate": 8.199890618925562e-06,
      "loss": 0.041,
      "step": 566380
    },
    {
      "epoch": 0.9269260226625557,
      "grad_norm": 1.1617916822433472,
      "learning_rate": 8.199824726712045e-06,
      "loss": 0.0252,
      "step": 566400
    },
    {
      "epoch": 0.926958753101209,
      "grad_norm": 0.6247428059577942,
      "learning_rate": 8.199758834498527e-06,
      "loss": 0.0205,
      "step": 566420
    },
    {
      "epoch": 0.9269914835398624,
      "grad_norm": 1.2835421562194824,
      "learning_rate": 8.199692942285011e-06,
      "loss": 0.0359,
      "step": 566440
    },
    {
      "epoch": 0.9270242139785158,
      "grad_norm": 1.0316846370697021,
      "learning_rate": 8.199627050071493e-06,
      "loss": 0.0272,
      "step": 566460
    },
    {
      "epoch": 0.9270569444171691,
      "grad_norm": 1.821895956993103,
      "learning_rate": 8.199561157857976e-06,
      "loss": 0.0331,
      "step": 566480
    },
    {
      "epoch": 0.9270896748558224,
      "grad_norm": 0.5731073617935181,
      "learning_rate": 8.19949526564446e-06,
      "loss": 0.0251,
      "step": 566500
    },
    {
      "epoch": 0.9271224052944758,
      "grad_norm": 2.29508376121521,
      "learning_rate": 8.199429373430942e-06,
      "loss": 0.027,
      "step": 566520
    },
    {
      "epoch": 0.9271551357331291,
      "grad_norm": 0.7022382616996765,
      "learning_rate": 8.199363481217426e-06,
      "loss": 0.0196,
      "step": 566540
    },
    {
      "epoch": 0.9271878661717824,
      "grad_norm": 0.21219560503959656,
      "learning_rate": 8.199297589003909e-06,
      "loss": 0.0195,
      "step": 566560
    },
    {
      "epoch": 0.9272205966104358,
      "grad_norm": 2.9087555408477783,
      "learning_rate": 8.199231696790391e-06,
      "loss": 0.0346,
      "step": 566580
    },
    {
      "epoch": 0.9272533270490891,
      "grad_norm": 2.749866485595703,
      "learning_rate": 8.199165804576875e-06,
      "loss": 0.0392,
      "step": 566600
    },
    {
      "epoch": 0.9272860574877424,
      "grad_norm": 1.4301238059997559,
      "learning_rate": 8.199099912363356e-06,
      "loss": 0.0288,
      "step": 566620
    },
    {
      "epoch": 0.9273187879263958,
      "grad_norm": 0.6186710000038147,
      "learning_rate": 8.19903402014984e-06,
      "loss": 0.0299,
      "step": 566640
    },
    {
      "epoch": 0.9273515183650491,
      "grad_norm": 1.38136625289917,
      "learning_rate": 8.198968127936322e-06,
      "loss": 0.0221,
      "step": 566660
    },
    {
      "epoch": 0.9273842488037025,
      "grad_norm": 0.9001797437667847,
      "learning_rate": 8.198902235722806e-06,
      "loss": 0.0308,
      "step": 566680
    },
    {
      "epoch": 0.9274169792423558,
      "grad_norm": 1.2171281576156616,
      "learning_rate": 8.198836343509287e-06,
      "loss": 0.0243,
      "step": 566700
    },
    {
      "epoch": 0.9274497096810091,
      "grad_norm": 0.7278490662574768,
      "learning_rate": 8.198770451295771e-06,
      "loss": 0.0308,
      "step": 566720
    },
    {
      "epoch": 0.9274824401196625,
      "grad_norm": 0.8706595301628113,
      "learning_rate": 8.198704559082253e-06,
      "loss": 0.0347,
      "step": 566740
    },
    {
      "epoch": 0.9275151705583158,
      "grad_norm": 0.3293488025665283,
      "learning_rate": 8.198638666868736e-06,
      "loss": 0.0186,
      "step": 566760
    },
    {
      "epoch": 0.9275479009969692,
      "grad_norm": 1.5332118272781372,
      "learning_rate": 8.19857277465522e-06,
      "loss": 0.0292,
      "step": 566780
    },
    {
      "epoch": 0.9275806314356225,
      "grad_norm": 0.13801193237304688,
      "learning_rate": 8.198506882441702e-06,
      "loss": 0.0341,
      "step": 566800
    },
    {
      "epoch": 0.9276133618742758,
      "grad_norm": 0.7108616828918457,
      "learning_rate": 8.198440990228186e-06,
      "loss": 0.0249,
      "step": 566820
    },
    {
      "epoch": 0.9276460923129292,
      "grad_norm": 2.8200740814208984,
      "learning_rate": 8.198375098014667e-06,
      "loss": 0.0346,
      "step": 566840
    },
    {
      "epoch": 0.9276788227515825,
      "grad_norm": 0.8243271112442017,
      "learning_rate": 8.198309205801151e-06,
      "loss": 0.0311,
      "step": 566860
    },
    {
      "epoch": 0.9277115531902359,
      "grad_norm": 1.5162452459335327,
      "learning_rate": 8.198243313587635e-06,
      "loss": 0.0268,
      "step": 566880
    },
    {
      "epoch": 0.9277442836288892,
      "grad_norm": 0.651970386505127,
      "learning_rate": 8.198177421374117e-06,
      "loss": 0.0249,
      "step": 566900
    },
    {
      "epoch": 0.9277770140675425,
      "grad_norm": 1.527321696281433,
      "learning_rate": 8.1981115291606e-06,
      "loss": 0.0248,
      "step": 566920
    },
    {
      "epoch": 0.9278097445061959,
      "grad_norm": 2.20603084564209,
      "learning_rate": 8.198045636947084e-06,
      "loss": 0.0278,
      "step": 566940
    },
    {
      "epoch": 0.9278424749448492,
      "grad_norm": 0.5555782914161682,
      "learning_rate": 8.197979744733566e-06,
      "loss": 0.0284,
      "step": 566960
    },
    {
      "epoch": 0.9278752053835025,
      "grad_norm": 0.24484147131443024,
      "learning_rate": 8.19791385252005e-06,
      "loss": 0.0284,
      "step": 566980
    },
    {
      "epoch": 0.9279079358221559,
      "grad_norm": 0.9747109413146973,
      "learning_rate": 8.197847960306531e-06,
      "loss": 0.0444,
      "step": 567000
    },
    {
      "epoch": 0.9279406662608092,
      "grad_norm": 0.6821991801261902,
      "learning_rate": 8.197782068093015e-06,
      "loss": 0.031,
      "step": 567020
    },
    {
      "epoch": 0.9279733966994625,
      "grad_norm": 0.46751701831817627,
      "learning_rate": 8.197716175879497e-06,
      "loss": 0.0303,
      "step": 567040
    },
    {
      "epoch": 0.9280061271381159,
      "grad_norm": 1.005030870437622,
      "learning_rate": 8.19765028366598e-06,
      "loss": 0.0327,
      "step": 567060
    },
    {
      "epoch": 0.9280388575767693,
      "grad_norm": 3.5238513946533203,
      "learning_rate": 8.197584391452462e-06,
      "loss": 0.033,
      "step": 567080
    },
    {
      "epoch": 0.9280715880154226,
      "grad_norm": 1.0861490964889526,
      "learning_rate": 8.197518499238946e-06,
      "loss": 0.0216,
      "step": 567100
    },
    {
      "epoch": 0.9281043184540759,
      "grad_norm": 1.1591132879257202,
      "learning_rate": 8.19745260702543e-06,
      "loss": 0.0253,
      "step": 567120
    },
    {
      "epoch": 0.9281370488927293,
      "grad_norm": 0.7768520712852478,
      "learning_rate": 8.197386714811911e-06,
      "loss": 0.027,
      "step": 567140
    },
    {
      "epoch": 0.9281697793313826,
      "grad_norm": 0.731214702129364,
      "learning_rate": 8.197320822598395e-06,
      "loss": 0.0277,
      "step": 567160
    },
    {
      "epoch": 0.9282025097700359,
      "grad_norm": 0.8921361565589905,
      "learning_rate": 8.197254930384877e-06,
      "loss": 0.0226,
      "step": 567180
    },
    {
      "epoch": 0.9282352402086893,
      "grad_norm": 0.5383836627006531,
      "learning_rate": 8.19718903817136e-06,
      "loss": 0.0242,
      "step": 567200
    },
    {
      "epoch": 0.9282679706473426,
      "grad_norm": 0.9650996327400208,
      "learning_rate": 8.197123145957842e-06,
      "loss": 0.0216,
      "step": 567220
    },
    {
      "epoch": 0.9283007010859959,
      "grad_norm": 1.231337070465088,
      "learning_rate": 8.197057253744326e-06,
      "loss": 0.0249,
      "step": 567240
    },
    {
      "epoch": 0.9283334315246493,
      "grad_norm": 0.7178112864494324,
      "learning_rate": 8.196991361530808e-06,
      "loss": 0.0194,
      "step": 567260
    },
    {
      "epoch": 0.9283661619633027,
      "grad_norm": 0.17939314246177673,
      "learning_rate": 8.196925469317291e-06,
      "loss": 0.038,
      "step": 567280
    },
    {
      "epoch": 0.9283988924019559,
      "grad_norm": 1.6327013969421387,
      "learning_rate": 8.196859577103775e-06,
      "loss": 0.0271,
      "step": 567300
    },
    {
      "epoch": 0.9284316228406093,
      "grad_norm": 1.0472074747085571,
      "learning_rate": 8.196793684890257e-06,
      "loss": 0.0277,
      "step": 567320
    },
    {
      "epoch": 0.9284643532792627,
      "grad_norm": 0.972420334815979,
      "learning_rate": 8.19672779267674e-06,
      "loss": 0.0267,
      "step": 567340
    },
    {
      "epoch": 0.928497083717916,
      "grad_norm": 0.8984687924385071,
      "learning_rate": 8.196661900463224e-06,
      "loss": 0.0193,
      "step": 567360
    },
    {
      "epoch": 0.9285298141565693,
      "grad_norm": 2.132798194885254,
      "learning_rate": 8.196596008249706e-06,
      "loss": 0.0313,
      "step": 567380
    },
    {
      "epoch": 0.9285625445952227,
      "grad_norm": 1.170644998550415,
      "learning_rate": 8.19653011603619e-06,
      "loss": 0.0335,
      "step": 567400
    },
    {
      "epoch": 0.928595275033876,
      "grad_norm": 0.7115474939346313,
      "learning_rate": 8.196464223822671e-06,
      "loss": 0.0195,
      "step": 567420
    },
    {
      "epoch": 0.9286280054725293,
      "grad_norm": 1.0037717819213867,
      "learning_rate": 8.196398331609155e-06,
      "loss": 0.0251,
      "step": 567440
    },
    {
      "epoch": 0.9286607359111827,
      "grad_norm": 0.5694165825843811,
      "learning_rate": 8.196332439395638e-06,
      "loss": 0.0198,
      "step": 567460
    },
    {
      "epoch": 0.9286934663498361,
      "grad_norm": 0.40183600783348083,
      "learning_rate": 8.19626654718212e-06,
      "loss": 0.0391,
      "step": 567480
    },
    {
      "epoch": 0.9287261967884893,
      "grad_norm": 1.2231701612472534,
      "learning_rate": 8.196200654968604e-06,
      "loss": 0.0353,
      "step": 567500
    },
    {
      "epoch": 0.9287589272271427,
      "grad_norm": 0.5169795751571655,
      "learning_rate": 8.196134762755086e-06,
      "loss": 0.026,
      "step": 567520
    },
    {
      "epoch": 0.9287916576657961,
      "grad_norm": 0.2536160349845886,
      "learning_rate": 8.19606887054157e-06,
      "loss": 0.0355,
      "step": 567540
    },
    {
      "epoch": 0.9288243881044493,
      "grad_norm": 0.8065741658210754,
      "learning_rate": 8.196002978328051e-06,
      "loss": 0.0273,
      "step": 567560
    },
    {
      "epoch": 0.9288571185431027,
      "grad_norm": 1.1092036962509155,
      "learning_rate": 8.195937086114535e-06,
      "loss": 0.0215,
      "step": 567580
    },
    {
      "epoch": 0.9288898489817561,
      "grad_norm": 0.5171429514884949,
      "learning_rate": 8.195871193901017e-06,
      "loss": 0.0299,
      "step": 567600
    },
    {
      "epoch": 0.9289225794204093,
      "grad_norm": 1.4497805833816528,
      "learning_rate": 8.1958053016875e-06,
      "loss": 0.028,
      "step": 567620
    },
    {
      "epoch": 0.9289553098590627,
      "grad_norm": 1.2720086574554443,
      "learning_rate": 8.195739409473982e-06,
      "loss": 0.0244,
      "step": 567640
    },
    {
      "epoch": 0.9289880402977161,
      "grad_norm": 0.611589252948761,
      "learning_rate": 8.195673517260466e-06,
      "loss": 0.0247,
      "step": 567660
    },
    {
      "epoch": 0.9290207707363695,
      "grad_norm": 0.9745793342590332,
      "learning_rate": 8.19560762504695e-06,
      "loss": 0.0357,
      "step": 567680
    },
    {
      "epoch": 0.9290535011750227,
      "grad_norm": 1.1833362579345703,
      "learning_rate": 8.195541732833431e-06,
      "loss": 0.0225,
      "step": 567700
    },
    {
      "epoch": 0.9290862316136761,
      "grad_norm": 0.18043667078018188,
      "learning_rate": 8.195475840619915e-06,
      "loss": 0.0218,
      "step": 567720
    },
    {
      "epoch": 0.9291189620523295,
      "grad_norm": 1.149268388748169,
      "learning_rate": 8.195409948406398e-06,
      "loss": 0.0387,
      "step": 567740
    },
    {
      "epoch": 0.9291516924909827,
      "grad_norm": 1.9290212392807007,
      "learning_rate": 8.19534405619288e-06,
      "loss": 0.0309,
      "step": 567760
    },
    {
      "epoch": 0.9291844229296361,
      "grad_norm": 1.8807032108306885,
      "learning_rate": 8.195278163979364e-06,
      "loss": 0.0338,
      "step": 567780
    },
    {
      "epoch": 0.9292171533682895,
      "grad_norm": 0.48650816082954407,
      "learning_rate": 8.195212271765846e-06,
      "loss": 0.0214,
      "step": 567800
    },
    {
      "epoch": 0.9292498838069427,
      "grad_norm": 1.0908236503601074,
      "learning_rate": 8.19514637955233e-06,
      "loss": 0.0255,
      "step": 567820
    },
    {
      "epoch": 0.9292826142455961,
      "grad_norm": 0.9500150084495544,
      "learning_rate": 8.195080487338813e-06,
      "loss": 0.0234,
      "step": 567840
    },
    {
      "epoch": 0.9293153446842495,
      "grad_norm": 1.22171950340271,
      "learning_rate": 8.195014595125295e-06,
      "loss": 0.0246,
      "step": 567860
    },
    {
      "epoch": 0.9293480751229029,
      "grad_norm": 0.1588493436574936,
      "learning_rate": 8.194948702911778e-06,
      "loss": 0.0326,
      "step": 567880
    },
    {
      "epoch": 0.9293808055615561,
      "grad_norm": 1.8190897703170776,
      "learning_rate": 8.19488281069826e-06,
      "loss": 0.0268,
      "step": 567900
    },
    {
      "epoch": 0.9294135360002095,
      "grad_norm": 0.9040861129760742,
      "learning_rate": 8.194816918484744e-06,
      "loss": 0.0225,
      "step": 567920
    },
    {
      "epoch": 0.9294462664388629,
      "grad_norm": 0.8681333065032959,
      "learning_rate": 8.194751026271226e-06,
      "loss": 0.0276,
      "step": 567940
    },
    {
      "epoch": 0.9294789968775161,
      "grad_norm": 0.19670915603637695,
      "learning_rate": 8.19468513405771e-06,
      "loss": 0.0218,
      "step": 567960
    },
    {
      "epoch": 0.9295117273161695,
      "grad_norm": 1.2536624670028687,
      "learning_rate": 8.194619241844191e-06,
      "loss": 0.0363,
      "step": 567980
    },
    {
      "epoch": 0.9295444577548229,
      "grad_norm": 1.2539206743240356,
      "learning_rate": 8.194553349630675e-06,
      "loss": 0.0309,
      "step": 568000
    },
    {
      "epoch": 0.9295771881934761,
      "grad_norm": 0.6278238296508789,
      "learning_rate": 8.194487457417157e-06,
      "loss": 0.0347,
      "step": 568020
    },
    {
      "epoch": 0.9296099186321295,
      "grad_norm": 1.1760340929031372,
      "learning_rate": 8.19442156520364e-06,
      "loss": 0.0257,
      "step": 568040
    },
    {
      "epoch": 0.9296426490707829,
      "grad_norm": 0.30739858746528625,
      "learning_rate": 8.194355672990122e-06,
      "loss": 0.0221,
      "step": 568060
    },
    {
      "epoch": 0.9296753795094362,
      "grad_norm": 0.4144796133041382,
      "learning_rate": 8.194289780776606e-06,
      "loss": 0.0189,
      "step": 568080
    },
    {
      "epoch": 0.9297081099480895,
      "grad_norm": 1.2445852756500244,
      "learning_rate": 8.19422388856309e-06,
      "loss": 0.0245,
      "step": 568100
    },
    {
      "epoch": 0.9297408403867429,
      "grad_norm": 0.9041357040405273,
      "learning_rate": 8.194157996349571e-06,
      "loss": 0.0301,
      "step": 568120
    },
    {
      "epoch": 0.9297735708253962,
      "grad_norm": 0.571181058883667,
      "learning_rate": 8.194092104136055e-06,
      "loss": 0.02,
      "step": 568140
    },
    {
      "epoch": 0.9298063012640495,
      "grad_norm": 1.350382924079895,
      "learning_rate": 8.194026211922538e-06,
      "loss": 0.0257,
      "step": 568160
    },
    {
      "epoch": 0.9298390317027029,
      "grad_norm": 0.18855243921279907,
      "learning_rate": 8.19396031970902e-06,
      "loss": 0.0312,
      "step": 568180
    },
    {
      "epoch": 0.9298717621413563,
      "grad_norm": 0.21334220468997955,
      "learning_rate": 8.193894427495504e-06,
      "loss": 0.025,
      "step": 568200
    },
    {
      "epoch": 0.9299044925800095,
      "grad_norm": 0.8312978744506836,
      "learning_rate": 8.193828535281988e-06,
      "loss": 0.0307,
      "step": 568220
    },
    {
      "epoch": 0.9299372230186629,
      "grad_norm": 1.1707348823547363,
      "learning_rate": 8.19376264306847e-06,
      "loss": 0.0304,
      "step": 568240
    },
    {
      "epoch": 0.9299699534573163,
      "grad_norm": 1.787077784538269,
      "learning_rate": 8.193696750854953e-06,
      "loss": 0.0259,
      "step": 568260
    },
    {
      "epoch": 0.9300026838959696,
      "grad_norm": 0.7309574484825134,
      "learning_rate": 8.193630858641435e-06,
      "loss": 0.0324,
      "step": 568280
    },
    {
      "epoch": 0.9300354143346229,
      "grad_norm": 1.089989423751831,
      "learning_rate": 8.193564966427918e-06,
      "loss": 0.0282,
      "step": 568300
    },
    {
      "epoch": 0.9300681447732763,
      "grad_norm": 1.0108610391616821,
      "learning_rate": 8.1934990742144e-06,
      "loss": 0.0227,
      "step": 568320
    },
    {
      "epoch": 0.9301008752119296,
      "grad_norm": 6.317454814910889,
      "learning_rate": 8.193433182000884e-06,
      "loss": 0.0338,
      "step": 568340
    },
    {
      "epoch": 0.9301336056505829,
      "grad_norm": 0.4872981309890747,
      "learning_rate": 8.193367289787366e-06,
      "loss": 0.0335,
      "step": 568360
    },
    {
      "epoch": 0.9301663360892363,
      "grad_norm": 0.3894246816635132,
      "learning_rate": 8.19330139757385e-06,
      "loss": 0.0324,
      "step": 568380
    },
    {
      "epoch": 0.9301990665278896,
      "grad_norm": 0.3700191378593445,
      "learning_rate": 8.193235505360331e-06,
      "loss": 0.0172,
      "step": 568400
    },
    {
      "epoch": 0.9302317969665429,
      "grad_norm": 0.5369532704353333,
      "learning_rate": 8.193169613146815e-06,
      "loss": 0.0251,
      "step": 568420
    },
    {
      "epoch": 0.9302645274051963,
      "grad_norm": 1.0022456645965576,
      "learning_rate": 8.193103720933297e-06,
      "loss": 0.0253,
      "step": 568440
    },
    {
      "epoch": 0.9302972578438496,
      "grad_norm": 0.23865166306495667,
      "learning_rate": 8.19303782871978e-06,
      "loss": 0.0276,
      "step": 568460
    },
    {
      "epoch": 0.930329988282503,
      "grad_norm": 2.2465293407440186,
      "learning_rate": 8.192971936506264e-06,
      "loss": 0.0327,
      "step": 568480
    },
    {
      "epoch": 0.9303627187211563,
      "grad_norm": 1.1835627555847168,
      "learning_rate": 8.192906044292746e-06,
      "loss": 0.0329,
      "step": 568500
    },
    {
      "epoch": 0.9303954491598097,
      "grad_norm": 2.6313812732696533,
      "learning_rate": 8.19284015207923e-06,
      "loss": 0.0222,
      "step": 568520
    },
    {
      "epoch": 0.930428179598463,
      "grad_norm": 0.32319796085357666,
      "learning_rate": 8.192774259865713e-06,
      "loss": 0.0348,
      "step": 568540
    },
    {
      "epoch": 0.9304609100371163,
      "grad_norm": 1.3516250848770142,
      "learning_rate": 8.192708367652195e-06,
      "loss": 0.0288,
      "step": 568560
    },
    {
      "epoch": 0.9304936404757697,
      "grad_norm": 0.3450528383255005,
      "learning_rate": 8.192642475438679e-06,
      "loss": 0.0298,
      "step": 568580
    },
    {
      "epoch": 0.930526370914423,
      "grad_norm": 0.7937999367713928,
      "learning_rate": 8.192576583225162e-06,
      "loss": 0.0158,
      "step": 568600
    },
    {
      "epoch": 0.9305591013530763,
      "grad_norm": 0.5902780890464783,
      "learning_rate": 8.192510691011644e-06,
      "loss": 0.034,
      "step": 568620
    },
    {
      "epoch": 0.9305918317917297,
      "grad_norm": 0.670451819896698,
      "learning_rate": 8.192444798798128e-06,
      "loss": 0.0211,
      "step": 568640
    },
    {
      "epoch": 0.930624562230383,
      "grad_norm": 1.5217435359954834,
      "learning_rate": 8.19237890658461e-06,
      "loss": 0.0219,
      "step": 568660
    },
    {
      "epoch": 0.9306572926690363,
      "grad_norm": 0.4639054536819458,
      "learning_rate": 8.192313014371093e-06,
      "loss": 0.0326,
      "step": 568680
    },
    {
      "epoch": 0.9306900231076897,
      "grad_norm": 0.46319374442100525,
      "learning_rate": 8.192247122157575e-06,
      "loss": 0.0296,
      "step": 568700
    },
    {
      "epoch": 0.930722753546343,
      "grad_norm": 1.1650614738464355,
      "learning_rate": 8.192181229944059e-06,
      "loss": 0.0155,
      "step": 568720
    },
    {
      "epoch": 0.9307554839849964,
      "grad_norm": 0.7304859161376953,
      "learning_rate": 8.19211533773054e-06,
      "loss": 0.0243,
      "step": 568740
    },
    {
      "epoch": 0.9307882144236497,
      "grad_norm": 0.7943270802497864,
      "learning_rate": 8.192049445517024e-06,
      "loss": 0.0223,
      "step": 568760
    },
    {
      "epoch": 0.930820944862303,
      "grad_norm": 0.8221593499183655,
      "learning_rate": 8.191983553303506e-06,
      "loss": 0.0244,
      "step": 568780
    },
    {
      "epoch": 0.9308536753009564,
      "grad_norm": 1.0215470790863037,
      "learning_rate": 8.19191766108999e-06,
      "loss": 0.0214,
      "step": 568800
    },
    {
      "epoch": 0.9308864057396097,
      "grad_norm": 0.2904086709022522,
      "learning_rate": 8.191851768876471e-06,
      "loss": 0.0288,
      "step": 568820
    },
    {
      "epoch": 0.930919136178263,
      "grad_norm": 0.24132175743579865,
      "learning_rate": 8.191785876662955e-06,
      "loss": 0.0288,
      "step": 568840
    },
    {
      "epoch": 0.9309518666169164,
      "grad_norm": 0.6270257830619812,
      "learning_rate": 8.191719984449439e-06,
      "loss": 0.023,
      "step": 568860
    },
    {
      "epoch": 0.9309845970555697,
      "grad_norm": 0.5774478912353516,
      "learning_rate": 8.19165409223592e-06,
      "loss": 0.0221,
      "step": 568880
    },
    {
      "epoch": 0.9310173274942231,
      "grad_norm": 0.5909693837165833,
      "learning_rate": 8.191588200022404e-06,
      "loss": 0.0241,
      "step": 568900
    },
    {
      "epoch": 0.9310500579328764,
      "grad_norm": 1.2073074579238892,
      "learning_rate": 8.191522307808888e-06,
      "loss": 0.0292,
      "step": 568920
    },
    {
      "epoch": 0.9310827883715298,
      "grad_norm": 0.3599855303764343,
      "learning_rate": 8.19145641559537e-06,
      "loss": 0.0296,
      "step": 568940
    },
    {
      "epoch": 0.9311155188101831,
      "grad_norm": 0.5327100157737732,
      "learning_rate": 8.191390523381853e-06,
      "loss": 0.0325,
      "step": 568960
    },
    {
      "epoch": 0.9311482492488364,
      "grad_norm": 1.086004376411438,
      "learning_rate": 8.191324631168337e-06,
      "loss": 0.0344,
      "step": 568980
    },
    {
      "epoch": 0.9311809796874898,
      "grad_norm": 1.0173046588897705,
      "learning_rate": 8.191258738954819e-06,
      "loss": 0.0253,
      "step": 569000
    },
    {
      "epoch": 0.9312137101261431,
      "grad_norm": 4.308585166931152,
      "learning_rate": 8.191192846741302e-06,
      "loss": 0.0241,
      "step": 569020
    },
    {
      "epoch": 0.9312464405647964,
      "grad_norm": 0.7800381779670715,
      "learning_rate": 8.191126954527784e-06,
      "loss": 0.0316,
      "step": 569040
    },
    {
      "epoch": 0.9312791710034498,
      "grad_norm": 0.5255568027496338,
      "learning_rate": 8.191061062314268e-06,
      "loss": 0.0362,
      "step": 569060
    },
    {
      "epoch": 0.9313119014421031,
      "grad_norm": 0.37419968843460083,
      "learning_rate": 8.19099517010075e-06,
      "loss": 0.0236,
      "step": 569080
    },
    {
      "epoch": 0.9313446318807564,
      "grad_norm": 0.23318588733673096,
      "learning_rate": 8.190929277887233e-06,
      "loss": 0.0285,
      "step": 569100
    },
    {
      "epoch": 0.9313773623194098,
      "grad_norm": 0.9996266961097717,
      "learning_rate": 8.190863385673715e-06,
      "loss": 0.0338,
      "step": 569120
    },
    {
      "epoch": 0.9314100927580632,
      "grad_norm": 1.4911925792694092,
      "learning_rate": 8.190797493460199e-06,
      "loss": 0.0233,
      "step": 569140
    },
    {
      "epoch": 0.9314428231967165,
      "grad_norm": 1.3027986288070679,
      "learning_rate": 8.19073160124668e-06,
      "loss": 0.0281,
      "step": 569160
    },
    {
      "epoch": 0.9314755536353698,
      "grad_norm": 0.5847910046577454,
      "learning_rate": 8.190665709033164e-06,
      "loss": 0.0308,
      "step": 569180
    },
    {
      "epoch": 0.9315082840740232,
      "grad_norm": 0.2505892813205719,
      "learning_rate": 8.190599816819646e-06,
      "loss": 0.0185,
      "step": 569200
    },
    {
      "epoch": 0.9315410145126765,
      "grad_norm": 1.0317012071609497,
      "learning_rate": 8.19053392460613e-06,
      "loss": 0.025,
      "step": 569220
    },
    {
      "epoch": 0.9315737449513298,
      "grad_norm": 0.617363452911377,
      "learning_rate": 8.190468032392613e-06,
      "loss": 0.0269,
      "step": 569240
    },
    {
      "epoch": 0.9316064753899832,
      "grad_norm": 0.6572140455245972,
      "learning_rate": 8.190402140179095e-06,
      "loss": 0.0367,
      "step": 569260
    },
    {
      "epoch": 0.9316392058286365,
      "grad_norm": 1.3003616333007812,
      "learning_rate": 8.190336247965579e-06,
      "loss": 0.0322,
      "step": 569280
    },
    {
      "epoch": 0.9316719362672898,
      "grad_norm": 0.7806118726730347,
      "learning_rate": 8.19027035575206e-06,
      "loss": 0.0234,
      "step": 569300
    },
    {
      "epoch": 0.9317046667059432,
      "grad_norm": 0.21005567908287048,
      "learning_rate": 8.190204463538544e-06,
      "loss": 0.028,
      "step": 569320
    },
    {
      "epoch": 0.9317373971445966,
      "grad_norm": 0.5204228162765503,
      "learning_rate": 8.190138571325028e-06,
      "loss": 0.0312,
      "step": 569340
    },
    {
      "epoch": 0.9317701275832498,
      "grad_norm": 0.7481722235679626,
      "learning_rate": 8.19007267911151e-06,
      "loss": 0.0163,
      "step": 569360
    },
    {
      "epoch": 0.9318028580219032,
      "grad_norm": 0.7423667907714844,
      "learning_rate": 8.190006786897993e-06,
      "loss": 0.0299,
      "step": 569380
    },
    {
      "epoch": 0.9318355884605566,
      "grad_norm": 0.9916850328445435,
      "learning_rate": 8.189940894684477e-06,
      "loss": 0.0346,
      "step": 569400
    },
    {
      "epoch": 0.9318683188992098,
      "grad_norm": 0.6303216814994812,
      "learning_rate": 8.189875002470959e-06,
      "loss": 0.0424,
      "step": 569420
    },
    {
      "epoch": 0.9319010493378632,
      "grad_norm": 2.9935033321380615,
      "learning_rate": 8.189809110257442e-06,
      "loss": 0.0394,
      "step": 569440
    },
    {
      "epoch": 0.9319337797765166,
      "grad_norm": 0.2457238733768463,
      "learning_rate": 8.189743218043924e-06,
      "loss": 0.0274,
      "step": 569460
    },
    {
      "epoch": 0.9319665102151699,
      "grad_norm": 0.44852229952812195,
      "learning_rate": 8.189677325830408e-06,
      "loss": 0.0283,
      "step": 569480
    },
    {
      "epoch": 0.9319992406538232,
      "grad_norm": 0.8338049054145813,
      "learning_rate": 8.18961143361689e-06,
      "loss": 0.028,
      "step": 569500
    },
    {
      "epoch": 0.9320319710924766,
      "grad_norm": 0.2666231393814087,
      "learning_rate": 8.189545541403373e-06,
      "loss": 0.0249,
      "step": 569520
    },
    {
      "epoch": 0.93206470153113,
      "grad_norm": 1.5685192346572876,
      "learning_rate": 8.189479649189855e-06,
      "loss": 0.0243,
      "step": 569540
    },
    {
      "epoch": 0.9320974319697832,
      "grad_norm": 0.08717455714941025,
      "learning_rate": 8.189413756976339e-06,
      "loss": 0.0248,
      "step": 569560
    },
    {
      "epoch": 0.9321301624084366,
      "grad_norm": 1.344721794128418,
      "learning_rate": 8.189347864762822e-06,
      "loss": 0.0338,
      "step": 569580
    },
    {
      "epoch": 0.93216289284709,
      "grad_norm": 0.826682448387146,
      "learning_rate": 8.189281972549304e-06,
      "loss": 0.0322,
      "step": 569600
    },
    {
      "epoch": 0.9321956232857432,
      "grad_norm": 1.1068532466888428,
      "learning_rate": 8.189216080335788e-06,
      "loss": 0.0327,
      "step": 569620
    },
    {
      "epoch": 0.9322283537243966,
      "grad_norm": 1.654894232749939,
      "learning_rate": 8.18915018812227e-06,
      "loss": 0.0309,
      "step": 569640
    },
    {
      "epoch": 0.93226108416305,
      "grad_norm": 0.2511451542377472,
      "learning_rate": 8.189084295908753e-06,
      "loss": 0.0325,
      "step": 569660
    },
    {
      "epoch": 0.9322938146017032,
      "grad_norm": 0.4330693185329437,
      "learning_rate": 8.189018403695235e-06,
      "loss": 0.0229,
      "step": 569680
    },
    {
      "epoch": 0.9323265450403566,
      "grad_norm": 0.4146186411380768,
      "learning_rate": 8.188952511481719e-06,
      "loss": 0.0215,
      "step": 569700
    },
    {
      "epoch": 0.93235927547901,
      "grad_norm": 1.2414616346359253,
      "learning_rate": 8.188886619268202e-06,
      "loss": 0.025,
      "step": 569720
    },
    {
      "epoch": 0.9323920059176634,
      "grad_norm": 0.44186320900917053,
      "learning_rate": 8.188820727054684e-06,
      "loss": 0.0265,
      "step": 569740
    },
    {
      "epoch": 0.9324247363563166,
      "grad_norm": 0.49768760800361633,
      "learning_rate": 8.188754834841168e-06,
      "loss": 0.0266,
      "step": 569760
    },
    {
      "epoch": 0.93245746679497,
      "grad_norm": 0.865134596824646,
      "learning_rate": 8.188688942627651e-06,
      "loss": 0.0248,
      "step": 569780
    },
    {
      "epoch": 0.9324901972336234,
      "grad_norm": 0.5527679324150085,
      "learning_rate": 8.188623050414133e-06,
      "loss": 0.0255,
      "step": 569800
    },
    {
      "epoch": 0.9325229276722766,
      "grad_norm": 1.1228532791137695,
      "learning_rate": 8.188557158200617e-06,
      "loss": 0.033,
      "step": 569820
    },
    {
      "epoch": 0.93255565811093,
      "grad_norm": 0.2501923739910126,
      "learning_rate": 8.188491265987099e-06,
      "loss": 0.0239,
      "step": 569840
    },
    {
      "epoch": 0.9325883885495834,
      "grad_norm": 0.31944411993026733,
      "learning_rate": 8.188425373773582e-06,
      "loss": 0.0185,
      "step": 569860
    },
    {
      "epoch": 0.9326211189882366,
      "grad_norm": 0.8250948786735535,
      "learning_rate": 8.188359481560064e-06,
      "loss": 0.0265,
      "step": 569880
    },
    {
      "epoch": 0.93265384942689,
      "grad_norm": 11.618866920471191,
      "learning_rate": 8.188293589346548e-06,
      "loss": 0.0204,
      "step": 569900
    },
    {
      "epoch": 0.9326865798655434,
      "grad_norm": 0.4468975365161896,
      "learning_rate": 8.188227697133031e-06,
      "loss": 0.0306,
      "step": 569920
    },
    {
      "epoch": 0.9327193103041967,
      "grad_norm": 1.773675560951233,
      "learning_rate": 8.188161804919513e-06,
      "loss": 0.0494,
      "step": 569940
    },
    {
      "epoch": 0.93275204074285,
      "grad_norm": 0.5304828882217407,
      "learning_rate": 8.188095912705997e-06,
      "loss": 0.0265,
      "step": 569960
    },
    {
      "epoch": 0.9327847711815034,
      "grad_norm": 0.20368307828903198,
      "learning_rate": 8.188030020492479e-06,
      "loss": 0.0387,
      "step": 569980
    },
    {
      "epoch": 0.9328175016201568,
      "grad_norm": 1.682845115661621,
      "learning_rate": 8.187964128278962e-06,
      "loss": 0.0429,
      "step": 570000
    },
    {
      "epoch": 0.93285023205881,
      "grad_norm": 0.6183034777641296,
      "learning_rate": 8.187898236065444e-06,
      "loss": 0.0327,
      "step": 570020
    },
    {
      "epoch": 0.9328829624974634,
      "grad_norm": 0.23769278824329376,
      "learning_rate": 8.187832343851928e-06,
      "loss": 0.0328,
      "step": 570040
    },
    {
      "epoch": 0.9329156929361168,
      "grad_norm": 0.2538851201534271,
      "learning_rate": 8.18776645163841e-06,
      "loss": 0.0209,
      "step": 570060
    },
    {
      "epoch": 0.93294842337477,
      "grad_norm": 0.8478911519050598,
      "learning_rate": 8.187700559424893e-06,
      "loss": 0.0269,
      "step": 570080
    },
    {
      "epoch": 0.9329811538134234,
      "grad_norm": 0.6144652366638184,
      "learning_rate": 8.187634667211375e-06,
      "loss": 0.0175,
      "step": 570100
    },
    {
      "epoch": 0.9330138842520768,
      "grad_norm": 0.5192290544509888,
      "learning_rate": 8.187568774997859e-06,
      "loss": 0.0212,
      "step": 570120
    },
    {
      "epoch": 0.9330466146907301,
      "grad_norm": 0.17537476122379303,
      "learning_rate": 8.187502882784342e-06,
      "loss": 0.0163,
      "step": 570140
    },
    {
      "epoch": 0.9330793451293834,
      "grad_norm": 0.4555198848247528,
      "learning_rate": 8.187436990570824e-06,
      "loss": 0.0243,
      "step": 570160
    },
    {
      "epoch": 0.9331120755680368,
      "grad_norm": 1.6936599016189575,
      "learning_rate": 8.187371098357308e-06,
      "loss": 0.0439,
      "step": 570180
    },
    {
      "epoch": 0.9331448060066901,
      "grad_norm": 0.3880150318145752,
      "learning_rate": 8.187305206143791e-06,
      "loss": 0.0271,
      "step": 570200
    },
    {
      "epoch": 0.9331775364453434,
      "grad_norm": 1.0847972631454468,
      "learning_rate": 8.187239313930273e-06,
      "loss": 0.0295,
      "step": 570220
    },
    {
      "epoch": 0.9332102668839968,
      "grad_norm": 0.6511276960372925,
      "learning_rate": 8.187173421716757e-06,
      "loss": 0.0303,
      "step": 570240
    },
    {
      "epoch": 0.9332429973226501,
      "grad_norm": 1.1436078548431396,
      "learning_rate": 8.187107529503239e-06,
      "loss": 0.0337,
      "step": 570260
    },
    {
      "epoch": 0.9332757277613034,
      "grad_norm": 1.3754202127456665,
      "learning_rate": 8.187041637289722e-06,
      "loss": 0.0293,
      "step": 570280
    },
    {
      "epoch": 0.9333084581999568,
      "grad_norm": 2.3547005653381348,
      "learning_rate": 8.186975745076206e-06,
      "loss": 0.0215,
      "step": 570300
    },
    {
      "epoch": 0.9333411886386102,
      "grad_norm": 0.28490468859672546,
      "learning_rate": 8.186909852862688e-06,
      "loss": 0.0328,
      "step": 570320
    },
    {
      "epoch": 0.9333739190772635,
      "grad_norm": 0.5733619332313538,
      "learning_rate": 8.186843960649171e-06,
      "loss": 0.0247,
      "step": 570340
    },
    {
      "epoch": 0.9334066495159168,
      "grad_norm": 2.38307523727417,
      "learning_rate": 8.186778068435653e-06,
      "loss": 0.0347,
      "step": 570360
    },
    {
      "epoch": 0.9334393799545702,
      "grad_norm": 0.9394821524620056,
      "learning_rate": 8.186712176222137e-06,
      "loss": 0.0482,
      "step": 570380
    },
    {
      "epoch": 0.9334721103932235,
      "grad_norm": 0.5231879949569702,
      "learning_rate": 8.186646284008619e-06,
      "loss": 0.0245,
      "step": 570400
    },
    {
      "epoch": 0.9335048408318768,
      "grad_norm": 1.532005786895752,
      "learning_rate": 8.186580391795102e-06,
      "loss": 0.0267,
      "step": 570420
    },
    {
      "epoch": 0.9335375712705302,
      "grad_norm": 0.7971453666687012,
      "learning_rate": 8.186514499581584e-06,
      "loss": 0.0245,
      "step": 570440
    },
    {
      "epoch": 0.9335703017091835,
      "grad_norm": 0.6760197281837463,
      "learning_rate": 8.186448607368068e-06,
      "loss": 0.0241,
      "step": 570460
    },
    {
      "epoch": 0.9336030321478368,
      "grad_norm": 3.0417182445526123,
      "learning_rate": 8.18638271515455e-06,
      "loss": 0.024,
      "step": 570480
    },
    {
      "epoch": 0.9336357625864902,
      "grad_norm": 1.330509066581726,
      "learning_rate": 8.186316822941033e-06,
      "loss": 0.0237,
      "step": 570500
    },
    {
      "epoch": 0.9336684930251435,
      "grad_norm": 0.2856959104537964,
      "learning_rate": 8.186250930727517e-06,
      "loss": 0.0182,
      "step": 570520
    },
    {
      "epoch": 0.9337012234637969,
      "grad_norm": 0.351778507232666,
      "learning_rate": 8.186185038513999e-06,
      "loss": 0.0331,
      "step": 570540
    },
    {
      "epoch": 0.9337339539024502,
      "grad_norm": 0.1648728996515274,
      "learning_rate": 8.186119146300482e-06,
      "loss": 0.0306,
      "step": 570560
    },
    {
      "epoch": 0.9337666843411035,
      "grad_norm": 0.5452023148536682,
      "learning_rate": 8.186053254086966e-06,
      "loss": 0.0263,
      "step": 570580
    },
    {
      "epoch": 0.9337994147797569,
      "grad_norm": 2.4538331031799316,
      "learning_rate": 8.185987361873448e-06,
      "loss": 0.0249,
      "step": 570600
    },
    {
      "epoch": 0.9338321452184102,
      "grad_norm": 0.2805023193359375,
      "learning_rate": 8.185921469659932e-06,
      "loss": 0.0257,
      "step": 570620
    },
    {
      "epoch": 0.9338648756570636,
      "grad_norm": 0.2803742587566376,
      "learning_rate": 8.185855577446415e-06,
      "loss": 0.0304,
      "step": 570640
    },
    {
      "epoch": 0.9338976060957169,
      "grad_norm": 0.659726083278656,
      "learning_rate": 8.185789685232897e-06,
      "loss": 0.0351,
      "step": 570660
    },
    {
      "epoch": 0.9339303365343702,
      "grad_norm": 0.7367510795593262,
      "learning_rate": 8.18572379301938e-06,
      "loss": 0.0261,
      "step": 570680
    },
    {
      "epoch": 0.9339630669730236,
      "grad_norm": 2.043060302734375,
      "learning_rate": 8.185657900805862e-06,
      "loss": 0.0214,
      "step": 570700
    },
    {
      "epoch": 0.9339957974116769,
      "grad_norm": 1.9582692384719849,
      "learning_rate": 8.185592008592346e-06,
      "loss": 0.0481,
      "step": 570720
    },
    {
      "epoch": 0.9340285278503303,
      "grad_norm": 2.519274950027466,
      "learning_rate": 8.185526116378828e-06,
      "loss": 0.023,
      "step": 570740
    },
    {
      "epoch": 0.9340612582889836,
      "grad_norm": 0.7136896252632141,
      "learning_rate": 8.185460224165312e-06,
      "loss": 0.0255,
      "step": 570760
    },
    {
      "epoch": 0.9340939887276369,
      "grad_norm": 1.0411432981491089,
      "learning_rate": 8.185394331951793e-06,
      "loss": 0.0349,
      "step": 570780
    },
    {
      "epoch": 0.9341267191662903,
      "grad_norm": 0.6897560358047485,
      "learning_rate": 8.185328439738277e-06,
      "loss": 0.0276,
      "step": 570800
    },
    {
      "epoch": 0.9341594496049436,
      "grad_norm": 1.0639890432357788,
      "learning_rate": 8.185262547524759e-06,
      "loss": 0.0253,
      "step": 570820
    },
    {
      "epoch": 0.9341921800435969,
      "grad_norm": 1.6399097442626953,
      "learning_rate": 8.185196655311243e-06,
      "loss": 0.0281,
      "step": 570840
    },
    {
      "epoch": 0.9342249104822503,
      "grad_norm": 0.5553501844406128,
      "learning_rate": 8.185130763097724e-06,
      "loss": 0.0339,
      "step": 570860
    },
    {
      "epoch": 0.9342576409209036,
      "grad_norm": 0.2719303071498871,
      "learning_rate": 8.185064870884208e-06,
      "loss": 0.0227,
      "step": 570880
    },
    {
      "epoch": 0.934290371359557,
      "grad_norm": 2.2934622764587402,
      "learning_rate": 8.18499897867069e-06,
      "loss": 0.0359,
      "step": 570900
    },
    {
      "epoch": 0.9343231017982103,
      "grad_norm": 0.6235265731811523,
      "learning_rate": 8.184933086457173e-06,
      "loss": 0.022,
      "step": 570920
    },
    {
      "epoch": 0.9343558322368637,
      "grad_norm": 0.10723894834518433,
      "learning_rate": 8.184867194243657e-06,
      "loss": 0.0239,
      "step": 570940
    },
    {
      "epoch": 0.934388562675517,
      "grad_norm": 0.33254683017730713,
      "learning_rate": 8.184801302030139e-06,
      "loss": 0.0219,
      "step": 570960
    },
    {
      "epoch": 0.9344212931141703,
      "grad_norm": 0.6838940382003784,
      "learning_rate": 8.184735409816623e-06,
      "loss": 0.0258,
      "step": 570980
    },
    {
      "epoch": 0.9344540235528237,
      "grad_norm": 0.22254958748817444,
      "learning_rate": 8.184669517603106e-06,
      "loss": 0.0273,
      "step": 571000
    },
    {
      "epoch": 0.934486753991477,
      "grad_norm": 0.23248372972011566,
      "learning_rate": 8.184603625389588e-06,
      "loss": 0.0272,
      "step": 571020
    },
    {
      "epoch": 0.9345194844301303,
      "grad_norm": 3.8945670127868652,
      "learning_rate": 8.184537733176072e-06,
      "loss": 0.0276,
      "step": 571040
    },
    {
      "epoch": 0.9345522148687837,
      "grad_norm": 0.44521939754486084,
      "learning_rate": 8.184471840962555e-06,
      "loss": 0.0264,
      "step": 571060
    },
    {
      "epoch": 0.934584945307437,
      "grad_norm": 0.8379467725753784,
      "learning_rate": 8.184405948749037e-06,
      "loss": 0.0249,
      "step": 571080
    },
    {
      "epoch": 0.9346176757460903,
      "grad_norm": 1.2205153703689575,
      "learning_rate": 8.18434005653552e-06,
      "loss": 0.0302,
      "step": 571100
    },
    {
      "epoch": 0.9346504061847437,
      "grad_norm": 0.1957186758518219,
      "learning_rate": 8.184274164322003e-06,
      "loss": 0.0272,
      "step": 571120
    },
    {
      "epoch": 0.9346831366233971,
      "grad_norm": 0.6650633215904236,
      "learning_rate": 8.184208272108486e-06,
      "loss": 0.0246,
      "step": 571140
    },
    {
      "epoch": 0.9347158670620503,
      "grad_norm": 0.3106420338153839,
      "learning_rate": 8.184142379894968e-06,
      "loss": 0.0242,
      "step": 571160
    },
    {
      "epoch": 0.9347485975007037,
      "grad_norm": 0.550942063331604,
      "learning_rate": 8.184076487681452e-06,
      "loss": 0.029,
      "step": 571180
    },
    {
      "epoch": 0.9347813279393571,
      "grad_norm": 0.17962907254695892,
      "learning_rate": 8.184010595467934e-06,
      "loss": 0.0313,
      "step": 571200
    },
    {
      "epoch": 0.9348140583780103,
      "grad_norm": 0.39673614501953125,
      "learning_rate": 8.183944703254417e-06,
      "loss": 0.0302,
      "step": 571220
    },
    {
      "epoch": 0.9348467888166637,
      "grad_norm": 0.42756742238998413,
      "learning_rate": 8.183878811040899e-06,
      "loss": 0.0242,
      "step": 571240
    },
    {
      "epoch": 0.9348795192553171,
      "grad_norm": 0.38176414370536804,
      "learning_rate": 8.183812918827383e-06,
      "loss": 0.0223,
      "step": 571260
    },
    {
      "epoch": 0.9349122496939704,
      "grad_norm": 1.2253572940826416,
      "learning_rate": 8.183747026613864e-06,
      "loss": 0.044,
      "step": 571280
    },
    {
      "epoch": 0.9349449801326237,
      "grad_norm": 0.35549575090408325,
      "learning_rate": 8.183681134400348e-06,
      "loss": 0.0436,
      "step": 571300
    },
    {
      "epoch": 0.9349777105712771,
      "grad_norm": 2.8811094760894775,
      "learning_rate": 8.183615242186832e-06,
      "loss": 0.0302,
      "step": 571320
    },
    {
      "epoch": 0.9350104410099304,
      "grad_norm": 1.1093167066574097,
      "learning_rate": 8.183549349973314e-06,
      "loss": 0.0316,
      "step": 571340
    },
    {
      "epoch": 0.9350431714485837,
      "grad_norm": 0.6956616640090942,
      "learning_rate": 8.183483457759797e-06,
      "loss": 0.0298,
      "step": 571360
    },
    {
      "epoch": 0.9350759018872371,
      "grad_norm": 0.554139256477356,
      "learning_rate": 8.18341756554628e-06,
      "loss": 0.0348,
      "step": 571380
    },
    {
      "epoch": 0.9351086323258905,
      "grad_norm": 0.35975393652915955,
      "learning_rate": 8.183351673332763e-06,
      "loss": 0.019,
      "step": 571400
    },
    {
      "epoch": 0.9351413627645437,
      "grad_norm": 0.48467227816581726,
      "learning_rate": 8.183285781119246e-06,
      "loss": 0.041,
      "step": 571420
    },
    {
      "epoch": 0.9351740932031971,
      "grad_norm": 1.3860681056976318,
      "learning_rate": 8.18321988890573e-06,
      "loss": 0.0274,
      "step": 571440
    },
    {
      "epoch": 0.9352068236418505,
      "grad_norm": 0.2660005986690521,
      "learning_rate": 8.183153996692212e-06,
      "loss": 0.0269,
      "step": 571460
    },
    {
      "epoch": 0.9352395540805037,
      "grad_norm": 3.49824857711792,
      "learning_rate": 8.183088104478695e-06,
      "loss": 0.0305,
      "step": 571480
    },
    {
      "epoch": 0.9352722845191571,
      "grad_norm": 0.2624242305755615,
      "learning_rate": 8.183022212265177e-06,
      "loss": 0.0222,
      "step": 571500
    },
    {
      "epoch": 0.9353050149578105,
      "grad_norm": 1.2100653648376465,
      "learning_rate": 8.18295632005166e-06,
      "loss": 0.0325,
      "step": 571520
    },
    {
      "epoch": 0.9353377453964637,
      "grad_norm": 0.32313311100006104,
      "learning_rate": 8.182890427838143e-06,
      "loss": 0.0284,
      "step": 571540
    },
    {
      "epoch": 0.9353704758351171,
      "grad_norm": 0.7651243209838867,
      "learning_rate": 8.182824535624626e-06,
      "loss": 0.0243,
      "step": 571560
    },
    {
      "epoch": 0.9354032062737705,
      "grad_norm": 0.533550500869751,
      "learning_rate": 8.182758643411108e-06,
      "loss": 0.026,
      "step": 571580
    },
    {
      "epoch": 0.9354359367124239,
      "grad_norm": 1.7752312421798706,
      "learning_rate": 8.182692751197592e-06,
      "loss": 0.0236,
      "step": 571600
    },
    {
      "epoch": 0.9354686671510771,
      "grad_norm": 5.451225757598877,
      "learning_rate": 8.182626858984074e-06,
      "loss": 0.0342,
      "step": 571620
    },
    {
      "epoch": 0.9355013975897305,
      "grad_norm": 0.6473493576049805,
      "learning_rate": 8.182560966770557e-06,
      "loss": 0.0194,
      "step": 571640
    },
    {
      "epoch": 0.9355341280283839,
      "grad_norm": 0.5359275937080383,
      "learning_rate": 8.182495074557039e-06,
      "loss": 0.0224,
      "step": 571660
    },
    {
      "epoch": 0.9355668584670371,
      "grad_norm": 0.7687920331954956,
      "learning_rate": 8.182429182343523e-06,
      "loss": 0.0271,
      "step": 571680
    },
    {
      "epoch": 0.9355995889056905,
      "grad_norm": 0.9605244994163513,
      "learning_rate": 8.182363290130006e-06,
      "loss": 0.0204,
      "step": 571700
    },
    {
      "epoch": 0.9356323193443439,
      "grad_norm": 1.42997407913208,
      "learning_rate": 8.182297397916488e-06,
      "loss": 0.0353,
      "step": 571720
    },
    {
      "epoch": 0.9356650497829971,
      "grad_norm": 1.6831320524215698,
      "learning_rate": 8.182231505702972e-06,
      "loss": 0.027,
      "step": 571740
    },
    {
      "epoch": 0.9356977802216505,
      "grad_norm": 0.44336119294166565,
      "learning_rate": 8.182165613489455e-06,
      "loss": 0.0252,
      "step": 571760
    },
    {
      "epoch": 0.9357305106603039,
      "grad_norm": 1.1631358861923218,
      "learning_rate": 8.182099721275937e-06,
      "loss": 0.0198,
      "step": 571780
    },
    {
      "epoch": 0.9357632410989573,
      "grad_norm": 0.5297731757164001,
      "learning_rate": 8.18203382906242e-06,
      "loss": 0.0364,
      "step": 571800
    },
    {
      "epoch": 0.9357959715376105,
      "grad_norm": 0.39668145775794983,
      "learning_rate": 8.181967936848904e-06,
      "loss": 0.0252,
      "step": 571820
    },
    {
      "epoch": 0.9358287019762639,
      "grad_norm": 0.960633397102356,
      "learning_rate": 8.181902044635386e-06,
      "loss": 0.0195,
      "step": 571840
    },
    {
      "epoch": 0.9358614324149173,
      "grad_norm": 0.4355238080024719,
      "learning_rate": 8.18183615242187e-06,
      "loss": 0.0206,
      "step": 571860
    },
    {
      "epoch": 0.9358941628535705,
      "grad_norm": 0.5610692501068115,
      "learning_rate": 8.181770260208352e-06,
      "loss": 0.0289,
      "step": 571880
    },
    {
      "epoch": 0.9359268932922239,
      "grad_norm": 0.7546525597572327,
      "learning_rate": 8.181704367994835e-06,
      "loss": 0.0177,
      "step": 571900
    },
    {
      "epoch": 0.9359596237308773,
      "grad_norm": 0.7841636538505554,
      "learning_rate": 8.181638475781317e-06,
      "loss": 0.0373,
      "step": 571920
    },
    {
      "epoch": 0.9359923541695305,
      "grad_norm": 1.0752124786376953,
      "learning_rate": 8.1815725835678e-06,
      "loss": 0.0288,
      "step": 571940
    },
    {
      "epoch": 0.9360250846081839,
      "grad_norm": 0.4013417661190033,
      "learning_rate": 8.181506691354283e-06,
      "loss": 0.029,
      "step": 571960
    },
    {
      "epoch": 0.9360578150468373,
      "grad_norm": 2.163273572921753,
      "learning_rate": 8.181440799140766e-06,
      "loss": 0.043,
      "step": 571980
    },
    {
      "epoch": 0.9360905454854906,
      "grad_norm": 3.0656795501708984,
      "learning_rate": 8.181374906927248e-06,
      "loss": 0.027,
      "step": 572000
    },
    {
      "epoch": 0.9361232759241439,
      "grad_norm": 1.280733585357666,
      "learning_rate": 8.181309014713732e-06,
      "loss": 0.0223,
      "step": 572020
    },
    {
      "epoch": 0.9361560063627973,
      "grad_norm": 2.9242429733276367,
      "learning_rate": 8.181243122500215e-06,
      "loss": 0.0349,
      "step": 572040
    },
    {
      "epoch": 0.9361887368014506,
      "grad_norm": 0.8027554750442505,
      "learning_rate": 8.181177230286697e-06,
      "loss": 0.0194,
      "step": 572060
    },
    {
      "epoch": 0.9362214672401039,
      "grad_norm": 0.8724696040153503,
      "learning_rate": 8.18111133807318e-06,
      "loss": 0.0225,
      "step": 572080
    },
    {
      "epoch": 0.9362541976787573,
      "grad_norm": 0.15678079426288605,
      "learning_rate": 8.181045445859663e-06,
      "loss": 0.0313,
      "step": 572100
    },
    {
      "epoch": 0.9362869281174107,
      "grad_norm": 1.5491514205932617,
      "learning_rate": 8.180979553646146e-06,
      "loss": 0.0288,
      "step": 572120
    },
    {
      "epoch": 0.9363196585560639,
      "grad_norm": 0.3692173659801483,
      "learning_rate": 8.180913661432628e-06,
      "loss": 0.0282,
      "step": 572140
    },
    {
      "epoch": 0.9363523889947173,
      "grad_norm": 0.8474368453025818,
      "learning_rate": 8.180847769219112e-06,
      "loss": 0.0446,
      "step": 572160
    },
    {
      "epoch": 0.9363851194333707,
      "grad_norm": 0.5373128056526184,
      "learning_rate": 8.180781877005595e-06,
      "loss": 0.0207,
      "step": 572180
    },
    {
      "epoch": 0.936417849872024,
      "grad_norm": 0.7565587759017944,
      "learning_rate": 8.180715984792077e-06,
      "loss": 0.0362,
      "step": 572200
    },
    {
      "epoch": 0.9364505803106773,
      "grad_norm": 0.48420456051826477,
      "learning_rate": 8.180650092578561e-06,
      "loss": 0.026,
      "step": 572220
    },
    {
      "epoch": 0.9364833107493307,
      "grad_norm": 0.5141149163246155,
      "learning_rate": 8.180584200365044e-06,
      "loss": 0.0203,
      "step": 572240
    },
    {
      "epoch": 0.936516041187984,
      "grad_norm": 1.1786670684814453,
      "learning_rate": 8.180518308151526e-06,
      "loss": 0.0323,
      "step": 572260
    },
    {
      "epoch": 0.9365487716266373,
      "grad_norm": 0.8309484124183655,
      "learning_rate": 8.18045241593801e-06,
      "loss": 0.0255,
      "step": 572280
    },
    {
      "epoch": 0.9365815020652907,
      "grad_norm": 0.8324892520904541,
      "learning_rate": 8.180386523724492e-06,
      "loss": 0.0385,
      "step": 572300
    },
    {
      "epoch": 0.936614232503944,
      "grad_norm": 1.722583532333374,
      "learning_rate": 8.180320631510975e-06,
      "loss": 0.027,
      "step": 572320
    },
    {
      "epoch": 0.9366469629425973,
      "grad_norm": 0.45313286781311035,
      "learning_rate": 8.180254739297457e-06,
      "loss": 0.0305,
      "step": 572340
    },
    {
      "epoch": 0.9366796933812507,
      "grad_norm": 0.9981666803359985,
      "learning_rate": 8.180188847083941e-06,
      "loss": 0.0293,
      "step": 572360
    },
    {
      "epoch": 0.936712423819904,
      "grad_norm": 1.3856452703475952,
      "learning_rate": 8.180122954870424e-06,
      "loss": 0.0392,
      "step": 572380
    },
    {
      "epoch": 0.9367451542585574,
      "grad_norm": 2.0209221839904785,
      "learning_rate": 8.180057062656906e-06,
      "loss": 0.0299,
      "step": 572400
    },
    {
      "epoch": 0.9367778846972107,
      "grad_norm": 0.6517683863639832,
      "learning_rate": 8.17999117044339e-06,
      "loss": 0.0264,
      "step": 572420
    },
    {
      "epoch": 0.936810615135864,
      "grad_norm": 0.46221113204956055,
      "learning_rate": 8.179925278229872e-06,
      "loss": 0.0344,
      "step": 572440
    },
    {
      "epoch": 0.9368433455745174,
      "grad_norm": 1.3936896324157715,
      "learning_rate": 8.179859386016355e-06,
      "loss": 0.0402,
      "step": 572460
    },
    {
      "epoch": 0.9368760760131707,
      "grad_norm": 1.3092617988586426,
      "learning_rate": 8.179793493802837e-06,
      "loss": 0.0357,
      "step": 572480
    },
    {
      "epoch": 0.9369088064518241,
      "grad_norm": 1.0900113582611084,
      "learning_rate": 8.179727601589321e-06,
      "loss": 0.0333,
      "step": 572500
    },
    {
      "epoch": 0.9369415368904774,
      "grad_norm": 1.205610752105713,
      "learning_rate": 8.179661709375803e-06,
      "loss": 0.0211,
      "step": 572520
    },
    {
      "epoch": 0.9369742673291307,
      "grad_norm": 0.2939329445362091,
      "learning_rate": 8.179595817162286e-06,
      "loss": 0.0221,
      "step": 572540
    },
    {
      "epoch": 0.9370069977677841,
      "grad_norm": 0.7698156833648682,
      "learning_rate": 8.17952992494877e-06,
      "loss": 0.0221,
      "step": 572560
    },
    {
      "epoch": 0.9370397282064374,
      "grad_norm": 1.2312180995941162,
      "learning_rate": 8.179464032735252e-06,
      "loss": 0.0182,
      "step": 572580
    },
    {
      "epoch": 0.9370724586450908,
      "grad_norm": 0.9315323829650879,
      "learning_rate": 8.179398140521735e-06,
      "loss": 0.0215,
      "step": 572600
    },
    {
      "epoch": 0.9371051890837441,
      "grad_norm": 1.0866378545761108,
      "learning_rate": 8.179332248308219e-06,
      "loss": 0.017,
      "step": 572620
    },
    {
      "epoch": 0.9371379195223974,
      "grad_norm": 0.8513332605361938,
      "learning_rate": 8.179266356094701e-06,
      "loss": 0.0234,
      "step": 572640
    },
    {
      "epoch": 0.9371706499610508,
      "grad_norm": 0.6103399395942688,
      "learning_rate": 8.179200463881185e-06,
      "loss": 0.0303,
      "step": 572660
    },
    {
      "epoch": 0.9372033803997041,
      "grad_norm": 0.531003475189209,
      "learning_rate": 8.179134571667666e-06,
      "loss": 0.0238,
      "step": 572680
    },
    {
      "epoch": 0.9372361108383575,
      "grad_norm": 0.2922702729701996,
      "learning_rate": 8.17906867945415e-06,
      "loss": 0.0327,
      "step": 572700
    },
    {
      "epoch": 0.9372688412770108,
      "grad_norm": 0.5231965780258179,
      "learning_rate": 8.179002787240632e-06,
      "loss": 0.0251,
      "step": 572720
    },
    {
      "epoch": 0.9373015717156641,
      "grad_norm": 0.6187924742698669,
      "learning_rate": 8.178936895027115e-06,
      "loss": 0.0352,
      "step": 572740
    },
    {
      "epoch": 0.9373343021543175,
      "grad_norm": 1.9737356901168823,
      "learning_rate": 8.178871002813599e-06,
      "loss": 0.0265,
      "step": 572760
    },
    {
      "epoch": 0.9373670325929708,
      "grad_norm": 0.6211634874343872,
      "learning_rate": 8.178805110600081e-06,
      "loss": 0.0231,
      "step": 572780
    },
    {
      "epoch": 0.9373997630316242,
      "grad_norm": 0.19706836342811584,
      "learning_rate": 8.178739218386565e-06,
      "loss": 0.0351,
      "step": 572800
    },
    {
      "epoch": 0.9374324934702775,
      "grad_norm": 1.113659143447876,
      "learning_rate": 8.178673326173046e-06,
      "loss": 0.0296,
      "step": 572820
    },
    {
      "epoch": 0.9374652239089308,
      "grad_norm": 0.767763078212738,
      "learning_rate": 8.17860743395953e-06,
      "loss": 0.0318,
      "step": 572840
    },
    {
      "epoch": 0.9374979543475842,
      "grad_norm": 0.3359476327896118,
      "learning_rate": 8.178541541746012e-06,
      "loss": 0.0137,
      "step": 572860
    },
    {
      "epoch": 0.9375306847862375,
      "grad_norm": 0.2789701819419861,
      "learning_rate": 8.178475649532496e-06,
      "loss": 0.0403,
      "step": 572880
    },
    {
      "epoch": 0.9375634152248908,
      "grad_norm": 5.948967933654785,
      "learning_rate": 8.178409757318977e-06,
      "loss": 0.0272,
      "step": 572900
    },
    {
      "epoch": 0.9375961456635442,
      "grad_norm": 0.6236950755119324,
      "learning_rate": 8.178343865105461e-06,
      "loss": 0.0308,
      "step": 572920
    },
    {
      "epoch": 0.9376288761021975,
      "grad_norm": 1.1776739358901978,
      "learning_rate": 8.178277972891943e-06,
      "loss": 0.0228,
      "step": 572940
    },
    {
      "epoch": 0.9376616065408508,
      "grad_norm": 0.27424147725105286,
      "learning_rate": 8.178212080678426e-06,
      "loss": 0.0217,
      "step": 572960
    },
    {
      "epoch": 0.9376943369795042,
      "grad_norm": 4.139364719390869,
      "learning_rate": 8.17814618846491e-06,
      "loss": 0.036,
      "step": 572980
    },
    {
      "epoch": 0.9377270674181576,
      "grad_norm": 0.9960700869560242,
      "learning_rate": 8.178080296251392e-06,
      "loss": 0.0257,
      "step": 573000
    },
    {
      "epoch": 0.9377597978568109,
      "grad_norm": 0.4075394570827484,
      "learning_rate": 8.178014404037876e-06,
      "loss": 0.0286,
      "step": 573020
    },
    {
      "epoch": 0.9377925282954642,
      "grad_norm": 0.8881039619445801,
      "learning_rate": 8.177948511824359e-06,
      "loss": 0.0372,
      "step": 573040
    },
    {
      "epoch": 0.9378252587341176,
      "grad_norm": 1.0472447872161865,
      "learning_rate": 8.177882619610841e-06,
      "loss": 0.0208,
      "step": 573060
    },
    {
      "epoch": 0.9378579891727709,
      "grad_norm": 0.9560068845748901,
      "learning_rate": 8.177816727397325e-06,
      "loss": 0.0262,
      "step": 573080
    },
    {
      "epoch": 0.9378907196114242,
      "grad_norm": 1.4628931283950806,
      "learning_rate": 8.177750835183808e-06,
      "loss": 0.0327,
      "step": 573100
    },
    {
      "epoch": 0.9379234500500776,
      "grad_norm": 2.0118443965911865,
      "learning_rate": 8.17768494297029e-06,
      "loss": 0.0319,
      "step": 573120
    },
    {
      "epoch": 0.9379561804887309,
      "grad_norm": 0.49241864681243896,
      "learning_rate": 8.177619050756774e-06,
      "loss": 0.0228,
      "step": 573140
    },
    {
      "epoch": 0.9379889109273842,
      "grad_norm": 0.5703253149986267,
      "learning_rate": 8.177553158543256e-06,
      "loss": 0.0283,
      "step": 573160
    },
    {
      "epoch": 0.9380216413660376,
      "grad_norm": 1.477813959121704,
      "learning_rate": 8.17748726632974e-06,
      "loss": 0.0312,
      "step": 573180
    },
    {
      "epoch": 0.938054371804691,
      "grad_norm": 1.404767632484436,
      "learning_rate": 8.177421374116221e-06,
      "loss": 0.0336,
      "step": 573200
    },
    {
      "epoch": 0.9380871022433442,
      "grad_norm": 0.8363160490989685,
      "learning_rate": 8.177355481902705e-06,
      "loss": 0.024,
      "step": 573220
    },
    {
      "epoch": 0.9381198326819976,
      "grad_norm": 1.6348490715026855,
      "learning_rate": 8.177289589689187e-06,
      "loss": 0.0313,
      "step": 573240
    },
    {
      "epoch": 0.938152563120651,
      "grad_norm": 0.17588303983211517,
      "learning_rate": 8.17722369747567e-06,
      "loss": 0.0296,
      "step": 573260
    },
    {
      "epoch": 0.9381852935593042,
      "grad_norm": 1.619365930557251,
      "learning_rate": 8.177157805262152e-06,
      "loss": 0.0306,
      "step": 573280
    },
    {
      "epoch": 0.9382180239979576,
      "grad_norm": 1.4038212299346924,
      "learning_rate": 8.177091913048636e-06,
      "loss": 0.0238,
      "step": 573300
    },
    {
      "epoch": 0.938250754436611,
      "grad_norm": 0.5166808366775513,
      "learning_rate": 8.177026020835117e-06,
      "loss": 0.024,
      "step": 573320
    },
    {
      "epoch": 0.9382834848752643,
      "grad_norm": 0.18347065150737762,
      "learning_rate": 8.176960128621601e-06,
      "loss": 0.0288,
      "step": 573340
    },
    {
      "epoch": 0.9383162153139176,
      "grad_norm": 1.1971379518508911,
      "learning_rate": 8.176894236408085e-06,
      "loss": 0.0254,
      "step": 573360
    },
    {
      "epoch": 0.938348945752571,
      "grad_norm": 1.0574465990066528,
      "learning_rate": 8.176828344194567e-06,
      "loss": 0.0266,
      "step": 573380
    },
    {
      "epoch": 0.9383816761912244,
      "grad_norm": 0.32294225692749023,
      "learning_rate": 8.17676245198105e-06,
      "loss": 0.0198,
      "step": 573400
    },
    {
      "epoch": 0.9384144066298776,
      "grad_norm": 3.9365899562835693,
      "learning_rate": 8.176696559767534e-06,
      "loss": 0.0343,
      "step": 573420
    },
    {
      "epoch": 0.938447137068531,
      "grad_norm": 0.36761409044265747,
      "learning_rate": 8.176630667554016e-06,
      "loss": 0.0296,
      "step": 573440
    },
    {
      "epoch": 0.9384798675071844,
      "grad_norm": 1.1811870336532593,
      "learning_rate": 8.1765647753405e-06,
      "loss": 0.0239,
      "step": 573460
    },
    {
      "epoch": 0.9385125979458376,
      "grad_norm": 0.5811513066291809,
      "learning_rate": 8.176498883126983e-06,
      "loss": 0.0294,
      "step": 573480
    },
    {
      "epoch": 0.938545328384491,
      "grad_norm": 0.6283374428749084,
      "learning_rate": 8.176432990913465e-06,
      "loss": 0.0308,
      "step": 573500
    },
    {
      "epoch": 0.9385780588231444,
      "grad_norm": 0.4826717972755432,
      "learning_rate": 8.176367098699948e-06,
      "loss": 0.0304,
      "step": 573520
    },
    {
      "epoch": 0.9386107892617976,
      "grad_norm": 1.3846640586853027,
      "learning_rate": 8.17630120648643e-06,
      "loss": 0.0235,
      "step": 573540
    },
    {
      "epoch": 0.938643519700451,
      "grad_norm": 0.9933379292488098,
      "learning_rate": 8.176235314272914e-06,
      "loss": 0.0251,
      "step": 573560
    },
    {
      "epoch": 0.9386762501391044,
      "grad_norm": 0.49511709809303284,
      "learning_rate": 8.176169422059396e-06,
      "loss": 0.038,
      "step": 573580
    },
    {
      "epoch": 0.9387089805777578,
      "grad_norm": 1.4412119388580322,
      "learning_rate": 8.17610352984588e-06,
      "loss": 0.0242,
      "step": 573600
    },
    {
      "epoch": 0.938741711016411,
      "grad_norm": 0.655999481678009,
      "learning_rate": 8.176037637632361e-06,
      "loss": 0.0324,
      "step": 573620
    },
    {
      "epoch": 0.9387744414550644,
      "grad_norm": 0.6697399020195007,
      "learning_rate": 8.175971745418845e-06,
      "loss": 0.0265,
      "step": 573640
    },
    {
      "epoch": 0.9388071718937178,
      "grad_norm": 2.0802321434020996,
      "learning_rate": 8.175905853205327e-06,
      "loss": 0.0408,
      "step": 573660
    },
    {
      "epoch": 0.938839902332371,
      "grad_norm": 0.7595812678337097,
      "learning_rate": 8.17583996099181e-06,
      "loss": 0.0354,
      "step": 573680
    },
    {
      "epoch": 0.9388726327710244,
      "grad_norm": 1.3645844459533691,
      "learning_rate": 8.175774068778292e-06,
      "loss": 0.0241,
      "step": 573700
    },
    {
      "epoch": 0.9389053632096778,
      "grad_norm": 1.5153168439865112,
      "learning_rate": 8.175708176564776e-06,
      "loss": 0.0433,
      "step": 573720
    },
    {
      "epoch": 0.938938093648331,
      "grad_norm": 0.9310182929039001,
      "learning_rate": 8.175642284351258e-06,
      "loss": 0.0273,
      "step": 573740
    },
    {
      "epoch": 0.9389708240869844,
      "grad_norm": 0.8641079664230347,
      "learning_rate": 8.175576392137741e-06,
      "loss": 0.027,
      "step": 573760
    },
    {
      "epoch": 0.9390035545256378,
      "grad_norm": 1.2677727937698364,
      "learning_rate": 8.175510499924225e-06,
      "loss": 0.0341,
      "step": 573780
    },
    {
      "epoch": 0.9390362849642911,
      "grad_norm": 1.0567797422409058,
      "learning_rate": 8.175444607710707e-06,
      "loss": 0.0277,
      "step": 573800
    },
    {
      "epoch": 0.9390690154029444,
      "grad_norm": 0.8569226264953613,
      "learning_rate": 8.17537871549719e-06,
      "loss": 0.0287,
      "step": 573820
    },
    {
      "epoch": 0.9391017458415978,
      "grad_norm": 1.3784700632095337,
      "learning_rate": 8.175312823283674e-06,
      "loss": 0.0316,
      "step": 573840
    },
    {
      "epoch": 0.9391344762802512,
      "grad_norm": 0.6088352799415588,
      "learning_rate": 8.175246931070157e-06,
      "loss": 0.0255,
      "step": 573860
    },
    {
      "epoch": 0.9391672067189044,
      "grad_norm": 0.6510419249534607,
      "learning_rate": 8.17518103885664e-06,
      "loss": 0.018,
      "step": 573880
    },
    {
      "epoch": 0.9391999371575578,
      "grad_norm": 0.18689514696598053,
      "learning_rate": 8.175115146643123e-06,
      "loss": 0.0273,
      "step": 573900
    },
    {
      "epoch": 0.9392326675962112,
      "grad_norm": 0.6257746815681458,
      "learning_rate": 8.175049254429605e-06,
      "loss": 0.0267,
      "step": 573920
    },
    {
      "epoch": 0.9392653980348644,
      "grad_norm": 0.5615755319595337,
      "learning_rate": 8.174983362216088e-06,
      "loss": 0.0343,
      "step": 573940
    },
    {
      "epoch": 0.9392981284735178,
      "grad_norm": 0.6217858791351318,
      "learning_rate": 8.17491747000257e-06,
      "loss": 0.0274,
      "step": 573960
    },
    {
      "epoch": 0.9393308589121712,
      "grad_norm": 0.7642466425895691,
      "learning_rate": 8.174851577789054e-06,
      "loss": 0.0312,
      "step": 573980
    },
    {
      "epoch": 0.9393635893508245,
      "grad_norm": 0.41475921869277954,
      "learning_rate": 8.174785685575536e-06,
      "loss": 0.0228,
      "step": 574000
    },
    {
      "epoch": 0.9393963197894778,
      "grad_norm": 0.7445623278617859,
      "learning_rate": 8.17471979336202e-06,
      "loss": 0.0254,
      "step": 574020
    },
    {
      "epoch": 0.9394290502281312,
      "grad_norm": 2.2262845039367676,
      "learning_rate": 8.174653901148501e-06,
      "loss": 0.0205,
      "step": 574040
    },
    {
      "epoch": 0.9394617806667845,
      "grad_norm": 0.468827486038208,
      "learning_rate": 8.174588008934985e-06,
      "loss": 0.0199,
      "step": 574060
    },
    {
      "epoch": 0.9394945111054378,
      "grad_norm": 1.1376395225524902,
      "learning_rate": 8.174522116721467e-06,
      "loss": 0.0298,
      "step": 574080
    },
    {
      "epoch": 0.9395272415440912,
      "grad_norm": 0.7291345596313477,
      "learning_rate": 8.17445622450795e-06,
      "loss": 0.0266,
      "step": 574100
    },
    {
      "epoch": 0.9395599719827445,
      "grad_norm": 1.2379165887832642,
      "learning_rate": 8.174390332294432e-06,
      "loss": 0.0281,
      "step": 574120
    },
    {
      "epoch": 0.9395927024213978,
      "grad_norm": 0.3815965950489044,
      "learning_rate": 8.174324440080916e-06,
      "loss": 0.0331,
      "step": 574140
    },
    {
      "epoch": 0.9396254328600512,
      "grad_norm": 0.7466104030609131,
      "learning_rate": 8.1742585478674e-06,
      "loss": 0.0335,
      "step": 574160
    },
    {
      "epoch": 0.9396581632987046,
      "grad_norm": 1.0319180488586426,
      "learning_rate": 8.174192655653881e-06,
      "loss": 0.0267,
      "step": 574180
    },
    {
      "epoch": 0.9396908937373578,
      "grad_norm": 4.681359767913818,
      "learning_rate": 8.174126763440365e-06,
      "loss": 0.0307,
      "step": 574200
    },
    {
      "epoch": 0.9397236241760112,
      "grad_norm": 0.44053325057029724,
      "learning_rate": 8.174060871226848e-06,
      "loss": 0.0207,
      "step": 574220
    },
    {
      "epoch": 0.9397563546146646,
      "grad_norm": 0.7524619102478027,
      "learning_rate": 8.17399497901333e-06,
      "loss": 0.0287,
      "step": 574240
    },
    {
      "epoch": 0.9397890850533179,
      "grad_norm": 0.8365668058395386,
      "learning_rate": 8.173929086799814e-06,
      "loss": 0.023,
      "step": 574260
    },
    {
      "epoch": 0.9398218154919712,
      "grad_norm": 1.007257342338562,
      "learning_rate": 8.173863194586297e-06,
      "loss": 0.0282,
      "step": 574280
    },
    {
      "epoch": 0.9398545459306246,
      "grad_norm": 0.6374409794807434,
      "learning_rate": 8.17379730237278e-06,
      "loss": 0.0275,
      "step": 574300
    },
    {
      "epoch": 0.9398872763692779,
      "grad_norm": 1.1326595544815063,
      "learning_rate": 8.173731410159263e-06,
      "loss": 0.031,
      "step": 574320
    },
    {
      "epoch": 0.9399200068079312,
      "grad_norm": 0.46453139185905457,
      "learning_rate": 8.173665517945745e-06,
      "loss": 0.032,
      "step": 574340
    },
    {
      "epoch": 0.9399527372465846,
      "grad_norm": 1.0762032270431519,
      "learning_rate": 8.173599625732228e-06,
      "loss": 0.0376,
      "step": 574360
    },
    {
      "epoch": 0.9399854676852379,
      "grad_norm": 0.5898824334144592,
      "learning_rate": 8.17353373351871e-06,
      "loss": 0.0258,
      "step": 574380
    },
    {
      "epoch": 0.9400181981238912,
      "grad_norm": 0.8589808344841003,
      "learning_rate": 8.173467841305194e-06,
      "loss": 0.0237,
      "step": 574400
    },
    {
      "epoch": 0.9400509285625446,
      "grad_norm": 0.8050659894943237,
      "learning_rate": 8.173401949091676e-06,
      "loss": 0.0307,
      "step": 574420
    },
    {
      "epoch": 0.940083659001198,
      "grad_norm": 1.377704381942749,
      "learning_rate": 8.17333605687816e-06,
      "loss": 0.023,
      "step": 574440
    },
    {
      "epoch": 0.9401163894398513,
      "grad_norm": 0.7602092027664185,
      "learning_rate": 8.173270164664641e-06,
      "loss": 0.0278,
      "step": 574460
    },
    {
      "epoch": 0.9401491198785046,
      "grad_norm": 0.3802644908428192,
      "learning_rate": 8.173204272451125e-06,
      "loss": 0.0236,
      "step": 574480
    },
    {
      "epoch": 0.940181850317158,
      "grad_norm": 1.9484425783157349,
      "learning_rate": 8.173138380237608e-06,
      "loss": 0.0267,
      "step": 574500
    },
    {
      "epoch": 0.9402145807558113,
      "grad_norm": 0.8499966263771057,
      "learning_rate": 8.17307248802409e-06,
      "loss": 0.0386,
      "step": 574520
    },
    {
      "epoch": 0.9402473111944646,
      "grad_norm": 0.39661383628845215,
      "learning_rate": 8.173006595810574e-06,
      "loss": 0.0236,
      "step": 574540
    },
    {
      "epoch": 0.940280041633118,
      "grad_norm": 0.9855844378471375,
      "learning_rate": 8.172940703597056e-06,
      "loss": 0.04,
      "step": 574560
    },
    {
      "epoch": 0.9403127720717713,
      "grad_norm": 0.6209911108016968,
      "learning_rate": 8.17287481138354e-06,
      "loss": 0.0227,
      "step": 574580
    },
    {
      "epoch": 0.9403455025104246,
      "grad_norm": 1.3098093271255493,
      "learning_rate": 8.172808919170023e-06,
      "loss": 0.0309,
      "step": 574600
    },
    {
      "epoch": 0.940378232949078,
      "grad_norm": 0.8745725750923157,
      "learning_rate": 8.172743026956505e-06,
      "loss": 0.0298,
      "step": 574620
    },
    {
      "epoch": 0.9404109633877313,
      "grad_norm": 1.4424041509628296,
      "learning_rate": 8.172677134742988e-06,
      "loss": 0.0258,
      "step": 574640
    },
    {
      "epoch": 0.9404436938263847,
      "grad_norm": 0.7695899605751038,
      "learning_rate": 8.172611242529472e-06,
      "loss": 0.0321,
      "step": 574660
    },
    {
      "epoch": 0.940476424265038,
      "grad_norm": 0.31440311670303345,
      "learning_rate": 8.172545350315954e-06,
      "loss": 0.0236,
      "step": 574680
    },
    {
      "epoch": 0.9405091547036913,
      "grad_norm": 0.9794439673423767,
      "learning_rate": 8.172479458102438e-06,
      "loss": 0.0371,
      "step": 574700
    },
    {
      "epoch": 0.9405418851423447,
      "grad_norm": 1.1472920179367065,
      "learning_rate": 8.17241356588892e-06,
      "loss": 0.0206,
      "step": 574720
    },
    {
      "epoch": 0.940574615580998,
      "grad_norm": 0.26814478635787964,
      "learning_rate": 8.172347673675403e-06,
      "loss": 0.0253,
      "step": 574740
    },
    {
      "epoch": 0.9406073460196513,
      "grad_norm": 1.070746898651123,
      "learning_rate": 8.172281781461885e-06,
      "loss": 0.0237,
      "step": 574760
    },
    {
      "epoch": 0.9406400764583047,
      "grad_norm": 0.537138819694519,
      "learning_rate": 8.172215889248368e-06,
      "loss": 0.0279,
      "step": 574780
    },
    {
      "epoch": 0.940672806896958,
      "grad_norm": 0.39807769656181335,
      "learning_rate": 8.17214999703485e-06,
      "loss": 0.0263,
      "step": 574800
    },
    {
      "epoch": 0.9407055373356114,
      "grad_norm": 1.689964771270752,
      "learning_rate": 8.172084104821334e-06,
      "loss": 0.0299,
      "step": 574820
    },
    {
      "epoch": 0.9407382677742647,
      "grad_norm": 0.9058238863945007,
      "learning_rate": 8.172018212607816e-06,
      "loss": 0.017,
      "step": 574840
    },
    {
      "epoch": 0.9407709982129181,
      "grad_norm": 0.7077751755714417,
      "learning_rate": 8.1719523203943e-06,
      "loss": 0.0366,
      "step": 574860
    },
    {
      "epoch": 0.9408037286515714,
      "grad_norm": 3.825171947479248,
      "learning_rate": 8.171886428180783e-06,
      "loss": 0.0369,
      "step": 574880
    },
    {
      "epoch": 0.9408364590902247,
      "grad_norm": 0.9198197722434998,
      "learning_rate": 8.171820535967265e-06,
      "loss": 0.0338,
      "step": 574900
    },
    {
      "epoch": 0.9408691895288781,
      "grad_norm": 0.6037735342979431,
      "learning_rate": 8.171754643753749e-06,
      "loss": 0.0194,
      "step": 574920
    },
    {
      "epoch": 0.9409019199675314,
      "grad_norm": 0.8696983456611633,
      "learning_rate": 8.17168875154023e-06,
      "loss": 0.026,
      "step": 574940
    },
    {
      "epoch": 0.9409346504061847,
      "grad_norm": 0.9056373238563538,
      "learning_rate": 8.171622859326714e-06,
      "loss": 0.0235,
      "step": 574960
    },
    {
      "epoch": 0.9409673808448381,
      "grad_norm": 1.2521451711654663,
      "learning_rate": 8.171556967113196e-06,
      "loss": 0.0236,
      "step": 574980
    },
    {
      "epoch": 0.9410001112834914,
      "grad_norm": 1.1001838445663452,
      "learning_rate": 8.17149107489968e-06,
      "loss": 0.0333,
      "step": 575000
    },
    {
      "epoch": 0.9410328417221447,
      "grad_norm": 0.9448057413101196,
      "learning_rate": 8.171425182686163e-06,
      "loss": 0.0349,
      "step": 575020
    },
    {
      "epoch": 0.9410655721607981,
      "grad_norm": 0.5697293281555176,
      "learning_rate": 8.171359290472645e-06,
      "loss": 0.0325,
      "step": 575040
    },
    {
      "epoch": 0.9410983025994515,
      "grad_norm": 0.718334436416626,
      "learning_rate": 8.171293398259129e-06,
      "loss": 0.0269,
      "step": 575060
    },
    {
      "epoch": 0.9411310330381047,
      "grad_norm": 0.4062264561653137,
      "learning_rate": 8.171227506045612e-06,
      "loss": 0.0255,
      "step": 575080
    },
    {
      "epoch": 0.9411637634767581,
      "grad_norm": 1.7845776081085205,
      "learning_rate": 8.171161613832094e-06,
      "loss": 0.0341,
      "step": 575100
    },
    {
      "epoch": 0.9411964939154115,
      "grad_norm": 0.5896055698394775,
      "learning_rate": 8.171095721618578e-06,
      "loss": 0.0237,
      "step": 575120
    },
    {
      "epoch": 0.9412292243540648,
      "grad_norm": 0.5842779278755188,
      "learning_rate": 8.17102982940506e-06,
      "loss": 0.0157,
      "step": 575140
    },
    {
      "epoch": 0.9412619547927181,
      "grad_norm": 0.7016470432281494,
      "learning_rate": 8.170963937191543e-06,
      "loss": 0.034,
      "step": 575160
    },
    {
      "epoch": 0.9412946852313715,
      "grad_norm": 0.5013595819473267,
      "learning_rate": 8.170898044978025e-06,
      "loss": 0.0276,
      "step": 575180
    },
    {
      "epoch": 0.9413274156700248,
      "grad_norm": 1.3510006666183472,
      "learning_rate": 8.170832152764509e-06,
      "loss": 0.0287,
      "step": 575200
    },
    {
      "epoch": 0.9413601461086781,
      "grad_norm": 0.5476391911506653,
      "learning_rate": 8.170766260550992e-06,
      "loss": 0.0224,
      "step": 575220
    },
    {
      "epoch": 0.9413928765473315,
      "grad_norm": 0.4179339110851288,
      "learning_rate": 8.170700368337474e-06,
      "loss": 0.0272,
      "step": 575240
    },
    {
      "epoch": 0.9414256069859849,
      "grad_norm": 1.0460633039474487,
      "learning_rate": 8.170634476123958e-06,
      "loss": 0.0282,
      "step": 575260
    },
    {
      "epoch": 0.9414583374246381,
      "grad_norm": 0.7497777938842773,
      "learning_rate": 8.17056858391044e-06,
      "loss": 0.0403,
      "step": 575280
    },
    {
      "epoch": 0.9414910678632915,
      "grad_norm": 0.864055335521698,
      "learning_rate": 8.170502691696923e-06,
      "loss": 0.0268,
      "step": 575300
    },
    {
      "epoch": 0.9415237983019449,
      "grad_norm": 0.480647474527359,
      "learning_rate": 8.170436799483405e-06,
      "loss": 0.02,
      "step": 575320
    },
    {
      "epoch": 0.9415565287405981,
      "grad_norm": 0.8368321657180786,
      "learning_rate": 8.170370907269889e-06,
      "loss": 0.0329,
      "step": 575340
    },
    {
      "epoch": 0.9415892591792515,
      "grad_norm": 0.7327882647514343,
      "learning_rate": 8.17030501505637e-06,
      "loss": 0.0276,
      "step": 575360
    },
    {
      "epoch": 0.9416219896179049,
      "grad_norm": 0.26827389001846313,
      "learning_rate": 8.170239122842854e-06,
      "loss": 0.0254,
      "step": 575380
    },
    {
      "epoch": 0.9416547200565581,
      "grad_norm": 2.2656352519989014,
      "learning_rate": 8.170173230629338e-06,
      "loss": 0.0246,
      "step": 575400
    },
    {
      "epoch": 0.9416874504952115,
      "grad_norm": 1.8327206373214722,
      "learning_rate": 8.17010733841582e-06,
      "loss": 0.0303,
      "step": 575420
    },
    {
      "epoch": 0.9417201809338649,
      "grad_norm": 1.3804539442062378,
      "learning_rate": 8.170041446202303e-06,
      "loss": 0.0285,
      "step": 575440
    },
    {
      "epoch": 0.9417529113725183,
      "grad_norm": 0.19817198812961578,
      "learning_rate": 8.169975553988787e-06,
      "loss": 0.0269,
      "step": 575460
    },
    {
      "epoch": 0.9417856418111715,
      "grad_norm": 0.7116624712944031,
      "learning_rate": 8.169909661775269e-06,
      "loss": 0.0346,
      "step": 575480
    },
    {
      "epoch": 0.9418183722498249,
      "grad_norm": 0.2347058802843094,
      "learning_rate": 8.169843769561752e-06,
      "loss": 0.0279,
      "step": 575500
    },
    {
      "epoch": 0.9418511026884783,
      "grad_norm": 3.125136613845825,
      "learning_rate": 8.169777877348234e-06,
      "loss": 0.0293,
      "step": 575520
    },
    {
      "epoch": 0.9418838331271315,
      "grad_norm": 0.3142378330230713,
      "learning_rate": 8.169711985134718e-06,
      "loss": 0.0228,
      "step": 575540
    },
    {
      "epoch": 0.9419165635657849,
      "grad_norm": 1.3139443397521973,
      "learning_rate": 8.169646092921201e-06,
      "loss": 0.0253,
      "step": 575560
    },
    {
      "epoch": 0.9419492940044383,
      "grad_norm": 1.221746802330017,
      "learning_rate": 8.169580200707683e-06,
      "loss": 0.0272,
      "step": 575580
    },
    {
      "epoch": 0.9419820244430915,
      "grad_norm": 0.820327639579773,
      "learning_rate": 8.169514308494167e-06,
      "loss": 0.0296,
      "step": 575600
    },
    {
      "epoch": 0.9420147548817449,
      "grad_norm": 1.464638113975525,
      "learning_rate": 8.169448416280649e-06,
      "loss": 0.0348,
      "step": 575620
    },
    {
      "epoch": 0.9420474853203983,
      "grad_norm": 0.6000735759735107,
      "learning_rate": 8.169382524067132e-06,
      "loss": 0.0179,
      "step": 575640
    },
    {
      "epoch": 0.9420802157590517,
      "grad_norm": 1.4586727619171143,
      "learning_rate": 8.169316631853614e-06,
      "loss": 0.0219,
      "step": 575660
    },
    {
      "epoch": 0.9421129461977049,
      "grad_norm": 0.9421263933181763,
      "learning_rate": 8.169250739640098e-06,
      "loss": 0.0301,
      "step": 575680
    },
    {
      "epoch": 0.9421456766363583,
      "grad_norm": 0.3549332320690155,
      "learning_rate": 8.16918484742658e-06,
      "loss": 0.0242,
      "step": 575700
    },
    {
      "epoch": 0.9421784070750117,
      "grad_norm": 0.6092018485069275,
      "learning_rate": 8.169118955213063e-06,
      "loss": 0.025,
      "step": 575720
    },
    {
      "epoch": 0.9422111375136649,
      "grad_norm": 0.6691534519195557,
      "learning_rate": 8.169053062999545e-06,
      "loss": 0.0272,
      "step": 575740
    },
    {
      "epoch": 0.9422438679523183,
      "grad_norm": 1.6004778146743774,
      "learning_rate": 8.168987170786029e-06,
      "loss": 0.0253,
      "step": 575760
    },
    {
      "epoch": 0.9422765983909717,
      "grad_norm": 1.237555980682373,
      "learning_rate": 8.16892127857251e-06,
      "loss": 0.0317,
      "step": 575780
    },
    {
      "epoch": 0.9423093288296249,
      "grad_norm": 0.3414287865161896,
      "learning_rate": 8.168855386358994e-06,
      "loss": 0.0248,
      "step": 575800
    },
    {
      "epoch": 0.9423420592682783,
      "grad_norm": 1.7137364149093628,
      "learning_rate": 8.168789494145478e-06,
      "loss": 0.0251,
      "step": 575820
    },
    {
      "epoch": 0.9423747897069317,
      "grad_norm": 0.244102343916893,
      "learning_rate": 8.16872360193196e-06,
      "loss": 0.0198,
      "step": 575840
    },
    {
      "epoch": 0.942407520145585,
      "grad_norm": 0.6308451890945435,
      "learning_rate": 8.168657709718443e-06,
      "loss": 0.0316,
      "step": 575860
    },
    {
      "epoch": 0.9424402505842383,
      "grad_norm": 0.13940274715423584,
      "learning_rate": 8.168591817504927e-06,
      "loss": 0.0182,
      "step": 575880
    },
    {
      "epoch": 0.9424729810228917,
      "grad_norm": 2.117046356201172,
      "learning_rate": 8.168525925291409e-06,
      "loss": 0.0275,
      "step": 575900
    },
    {
      "epoch": 0.942505711461545,
      "grad_norm": 0.7501347064971924,
      "learning_rate": 8.168460033077892e-06,
      "loss": 0.0238,
      "step": 575920
    },
    {
      "epoch": 0.9425384419001983,
      "grad_norm": 0.393216073513031,
      "learning_rate": 8.168394140864376e-06,
      "loss": 0.0259,
      "step": 575940
    },
    {
      "epoch": 0.9425711723388517,
      "grad_norm": 0.2623993456363678,
      "learning_rate": 8.168328248650858e-06,
      "loss": 0.022,
      "step": 575960
    },
    {
      "epoch": 0.942603902777505,
      "grad_norm": 1.1261979341506958,
      "learning_rate": 8.168262356437341e-06,
      "loss": 0.0279,
      "step": 575980
    },
    {
      "epoch": 0.9426366332161583,
      "grad_norm": 0.722690999507904,
      "learning_rate": 8.168196464223823e-06,
      "loss": 0.0269,
      "step": 576000
    },
    {
      "epoch": 0.9426693636548117,
      "grad_norm": 0.8250777721405029,
      "learning_rate": 8.168130572010307e-06,
      "loss": 0.0256,
      "step": 576020
    },
    {
      "epoch": 0.9427020940934651,
      "grad_norm": 0.5016655325889587,
      "learning_rate": 8.168064679796789e-06,
      "loss": 0.0282,
      "step": 576040
    },
    {
      "epoch": 0.9427348245321184,
      "grad_norm": 0.528261125087738,
      "learning_rate": 8.167998787583272e-06,
      "loss": 0.0319,
      "step": 576060
    },
    {
      "epoch": 0.9427675549707717,
      "grad_norm": 0.26088786125183105,
      "learning_rate": 8.167932895369754e-06,
      "loss": 0.0272,
      "step": 576080
    },
    {
      "epoch": 0.9428002854094251,
      "grad_norm": 0.25941523909568787,
      "learning_rate": 8.167867003156238e-06,
      "loss": 0.0282,
      "step": 576100
    },
    {
      "epoch": 0.9428330158480784,
      "grad_norm": 0.30339279770851135,
      "learning_rate": 8.16780111094272e-06,
      "loss": 0.0231,
      "step": 576120
    },
    {
      "epoch": 0.9428657462867317,
      "grad_norm": 0.15812993049621582,
      "learning_rate": 8.167735218729203e-06,
      "loss": 0.0294,
      "step": 576140
    },
    {
      "epoch": 0.9428984767253851,
      "grad_norm": 2.2276275157928467,
      "learning_rate": 8.167669326515685e-06,
      "loss": 0.0297,
      "step": 576160
    },
    {
      "epoch": 0.9429312071640384,
      "grad_norm": 1.725728154182434,
      "learning_rate": 8.167603434302169e-06,
      "loss": 0.0421,
      "step": 576180
    },
    {
      "epoch": 0.9429639376026917,
      "grad_norm": 1.5263646841049194,
      "learning_rate": 8.167537542088652e-06,
      "loss": 0.0306,
      "step": 576200
    },
    {
      "epoch": 0.9429966680413451,
      "grad_norm": 0.6336717009544373,
      "learning_rate": 8.167471649875134e-06,
      "loss": 0.0378,
      "step": 576220
    },
    {
      "epoch": 0.9430293984799984,
      "grad_norm": 0.17040197551250458,
      "learning_rate": 8.167405757661618e-06,
      "loss": 0.0327,
      "step": 576240
    },
    {
      "epoch": 0.9430621289186518,
      "grad_norm": 0.5694825649261475,
      "learning_rate": 8.167339865448101e-06,
      "loss": 0.0258,
      "step": 576260
    },
    {
      "epoch": 0.9430948593573051,
      "grad_norm": 1.0333259105682373,
      "learning_rate": 8.167273973234583e-06,
      "loss": 0.0301,
      "step": 576280
    },
    {
      "epoch": 0.9431275897959585,
      "grad_norm": 0.6301881670951843,
      "learning_rate": 8.167208081021067e-06,
      "loss": 0.0319,
      "step": 576300
    },
    {
      "epoch": 0.9431603202346118,
      "grad_norm": 1.271268367767334,
      "learning_rate": 8.16714218880755e-06,
      "loss": 0.0357,
      "step": 576320
    },
    {
      "epoch": 0.9431930506732651,
      "grad_norm": 2.0598790645599365,
      "learning_rate": 8.167076296594032e-06,
      "loss": 0.0324,
      "step": 576340
    },
    {
      "epoch": 0.9432257811119185,
      "grad_norm": 0.423684298992157,
      "learning_rate": 8.167010404380516e-06,
      "loss": 0.0249,
      "step": 576360
    },
    {
      "epoch": 0.9432585115505718,
      "grad_norm": 0.7189200520515442,
      "learning_rate": 8.166944512166998e-06,
      "loss": 0.0332,
      "step": 576380
    },
    {
      "epoch": 0.9432912419892251,
      "grad_norm": 0.24466301500797272,
      "learning_rate": 8.166878619953481e-06,
      "loss": 0.0268,
      "step": 576400
    },
    {
      "epoch": 0.9433239724278785,
      "grad_norm": 0.8579014539718628,
      "learning_rate": 8.166812727739963e-06,
      "loss": 0.0215,
      "step": 576420
    },
    {
      "epoch": 0.9433567028665318,
      "grad_norm": 1.740946888923645,
      "learning_rate": 8.166746835526447e-06,
      "loss": 0.0291,
      "step": 576440
    },
    {
      "epoch": 0.9433894333051852,
      "grad_norm": 0.4062740206718445,
      "learning_rate": 8.166680943312929e-06,
      "loss": 0.0228,
      "step": 576460
    },
    {
      "epoch": 0.9434221637438385,
      "grad_norm": 0.6737253069877625,
      "learning_rate": 8.166615051099412e-06,
      "loss": 0.0291,
      "step": 576480
    },
    {
      "epoch": 0.9434548941824918,
      "grad_norm": 0.9896820783615112,
      "learning_rate": 8.166549158885894e-06,
      "loss": 0.022,
      "step": 576500
    },
    {
      "epoch": 0.9434876246211452,
      "grad_norm": 1.1980446577072144,
      "learning_rate": 8.166483266672378e-06,
      "loss": 0.022,
      "step": 576520
    },
    {
      "epoch": 0.9435203550597985,
      "grad_norm": 0.4961891174316406,
      "learning_rate": 8.16641737445886e-06,
      "loss": 0.0279,
      "step": 576540
    },
    {
      "epoch": 0.9435530854984518,
      "grad_norm": 0.18909673392772675,
      "learning_rate": 8.166351482245343e-06,
      "loss": 0.0405,
      "step": 576560
    },
    {
      "epoch": 0.9435858159371052,
      "grad_norm": 0.8356769680976868,
      "learning_rate": 8.166285590031825e-06,
      "loss": 0.0386,
      "step": 576580
    },
    {
      "epoch": 0.9436185463757585,
      "grad_norm": 1.5907540321350098,
      "learning_rate": 8.166219697818309e-06,
      "loss": 0.0237,
      "step": 576600
    },
    {
      "epoch": 0.9436512768144119,
      "grad_norm": 0.2051156908273697,
      "learning_rate": 8.166153805604792e-06,
      "loss": 0.0336,
      "step": 576620
    },
    {
      "epoch": 0.9436840072530652,
      "grad_norm": 0.7339624166488647,
      "learning_rate": 8.166087913391276e-06,
      "loss": 0.0369,
      "step": 576640
    },
    {
      "epoch": 0.9437167376917186,
      "grad_norm": 1.0092356204986572,
      "learning_rate": 8.166022021177758e-06,
      "loss": 0.0211,
      "step": 576660
    },
    {
      "epoch": 0.9437494681303719,
      "grad_norm": 1.942582607269287,
      "learning_rate": 8.165956128964241e-06,
      "loss": 0.0218,
      "step": 576680
    },
    {
      "epoch": 0.9437821985690252,
      "grad_norm": 0.19216232001781464,
      "learning_rate": 8.165890236750725e-06,
      "loss": 0.0274,
      "step": 576700
    },
    {
      "epoch": 0.9438149290076786,
      "grad_norm": 1.375124216079712,
      "learning_rate": 8.165824344537207e-06,
      "loss": 0.0373,
      "step": 576720
    },
    {
      "epoch": 0.9438476594463319,
      "grad_norm": 0.9796796441078186,
      "learning_rate": 8.16575845232369e-06,
      "loss": 0.0255,
      "step": 576740
    },
    {
      "epoch": 0.9438803898849852,
      "grad_norm": 0.9953264594078064,
      "learning_rate": 8.165692560110172e-06,
      "loss": 0.0189,
      "step": 576760
    },
    {
      "epoch": 0.9439131203236386,
      "grad_norm": 0.9789867401123047,
      "learning_rate": 8.165626667896656e-06,
      "loss": 0.0293,
      "step": 576780
    },
    {
      "epoch": 0.9439458507622919,
      "grad_norm": 0.3125617504119873,
      "learning_rate": 8.165560775683138e-06,
      "loss": 0.0404,
      "step": 576800
    },
    {
      "epoch": 0.9439785812009452,
      "grad_norm": 0.34294769167900085,
      "learning_rate": 8.165494883469622e-06,
      "loss": 0.0335,
      "step": 576820
    },
    {
      "epoch": 0.9440113116395986,
      "grad_norm": 0.7501233220100403,
      "learning_rate": 8.165428991256103e-06,
      "loss": 0.0278,
      "step": 576840
    },
    {
      "epoch": 0.944044042078252,
      "grad_norm": 0.334783136844635,
      "learning_rate": 8.165363099042587e-06,
      "loss": 0.0348,
      "step": 576860
    },
    {
      "epoch": 0.9440767725169052,
      "grad_norm": 2.4606845378875732,
      "learning_rate": 8.165297206829069e-06,
      "loss": 0.0374,
      "step": 576880
    },
    {
      "epoch": 0.9441095029555586,
      "grad_norm": 0.48594632744789124,
      "learning_rate": 8.165231314615552e-06,
      "loss": 0.028,
      "step": 576900
    },
    {
      "epoch": 0.944142233394212,
      "grad_norm": 0.2547251284122467,
      "learning_rate": 8.165165422402034e-06,
      "loss": 0.032,
      "step": 576920
    },
    {
      "epoch": 0.9441749638328653,
      "grad_norm": 0.5252871513366699,
      "learning_rate": 8.165099530188518e-06,
      "loss": 0.021,
      "step": 576940
    },
    {
      "epoch": 0.9442076942715186,
      "grad_norm": 0.43148502707481384,
      "learning_rate": 8.165033637975002e-06,
      "loss": 0.0308,
      "step": 576960
    },
    {
      "epoch": 0.944240424710172,
      "grad_norm": 0.09176282584667206,
      "learning_rate": 8.164967745761483e-06,
      "loss": 0.0294,
      "step": 576980
    },
    {
      "epoch": 0.9442731551488253,
      "grad_norm": 1.9528697729110718,
      "learning_rate": 8.164901853547967e-06,
      "loss": 0.0336,
      "step": 577000
    },
    {
      "epoch": 0.9443058855874786,
      "grad_norm": 0.9918391108512878,
      "learning_rate": 8.164835961334449e-06,
      "loss": 0.0273,
      "step": 577020
    },
    {
      "epoch": 0.944338616026132,
      "grad_norm": 0.8779186010360718,
      "learning_rate": 8.164770069120932e-06,
      "loss": 0.0264,
      "step": 577040
    },
    {
      "epoch": 0.9443713464647853,
      "grad_norm": 1.0484251976013184,
      "learning_rate": 8.164704176907416e-06,
      "loss": 0.0247,
      "step": 577060
    },
    {
      "epoch": 0.9444040769034386,
      "grad_norm": 1.7776615619659424,
      "learning_rate": 8.164638284693898e-06,
      "loss": 0.039,
      "step": 577080
    },
    {
      "epoch": 0.944436807342092,
      "grad_norm": 0.5995484590530396,
      "learning_rate": 8.164572392480382e-06,
      "loss": 0.0321,
      "step": 577100
    },
    {
      "epoch": 0.9444695377807454,
      "grad_norm": 0.41927286982536316,
      "learning_rate": 8.164506500266865e-06,
      "loss": 0.0206,
      "step": 577120
    },
    {
      "epoch": 0.9445022682193986,
      "grad_norm": 0.39384111762046814,
      "learning_rate": 8.164440608053347e-06,
      "loss": 0.024,
      "step": 577140
    },
    {
      "epoch": 0.944534998658052,
      "grad_norm": 0.21257157623767853,
      "learning_rate": 8.16437471583983e-06,
      "loss": 0.0264,
      "step": 577160
    },
    {
      "epoch": 0.9445677290967054,
      "grad_norm": 0.08793334662914276,
      "learning_rate": 8.164308823626313e-06,
      "loss": 0.0273,
      "step": 577180
    },
    {
      "epoch": 0.9446004595353586,
      "grad_norm": 0.6799682974815369,
      "learning_rate": 8.164242931412796e-06,
      "loss": 0.0296,
      "step": 577200
    },
    {
      "epoch": 0.944633189974012,
      "grad_norm": 1.7549189329147339,
      "learning_rate": 8.164177039199278e-06,
      "loss": 0.0331,
      "step": 577220
    },
    {
      "epoch": 0.9446659204126654,
      "grad_norm": 0.8575615882873535,
      "learning_rate": 8.164111146985762e-06,
      "loss": 0.0264,
      "step": 577240
    },
    {
      "epoch": 0.9446986508513187,
      "grad_norm": 1.5182790756225586,
      "learning_rate": 8.164045254772243e-06,
      "loss": 0.0338,
      "step": 577260
    },
    {
      "epoch": 0.944731381289972,
      "grad_norm": 0.7874020934104919,
      "learning_rate": 8.163979362558727e-06,
      "loss": 0.0289,
      "step": 577280
    },
    {
      "epoch": 0.9447641117286254,
      "grad_norm": 0.7385541796684265,
      "learning_rate": 8.163913470345209e-06,
      "loss": 0.0306,
      "step": 577300
    },
    {
      "epoch": 0.9447968421672788,
      "grad_norm": 0.5027074217796326,
      "learning_rate": 8.163847578131693e-06,
      "loss": 0.0256,
      "step": 577320
    },
    {
      "epoch": 0.944829572605932,
      "grad_norm": 2.443692445755005,
      "learning_rate": 8.163781685918176e-06,
      "loss": 0.0204,
      "step": 577340
    },
    {
      "epoch": 0.9448623030445854,
      "grad_norm": 0.5872433185577393,
      "learning_rate": 8.163715793704658e-06,
      "loss": 0.0249,
      "step": 577360
    },
    {
      "epoch": 0.9448950334832388,
      "grad_norm": 1.1405372619628906,
      "learning_rate": 8.163649901491142e-06,
      "loss": 0.0318,
      "step": 577380
    },
    {
      "epoch": 0.944927763921892,
      "grad_norm": 0.7140499949455261,
      "learning_rate": 8.163584009277623e-06,
      "loss": 0.0252,
      "step": 577400
    },
    {
      "epoch": 0.9449604943605454,
      "grad_norm": 0.946392834186554,
      "learning_rate": 8.163518117064107e-06,
      "loss": 0.0277,
      "step": 577420
    },
    {
      "epoch": 0.9449932247991988,
      "grad_norm": 1.629537582397461,
      "learning_rate": 8.16345222485059e-06,
      "loss": 0.0326,
      "step": 577440
    },
    {
      "epoch": 0.945025955237852,
      "grad_norm": 0.25794318318367004,
      "learning_rate": 8.163386332637073e-06,
      "loss": 0.0292,
      "step": 577460
    },
    {
      "epoch": 0.9450586856765054,
      "grad_norm": 1.517440676689148,
      "learning_rate": 8.163320440423556e-06,
      "loss": 0.0284,
      "step": 577480
    },
    {
      "epoch": 0.9450914161151588,
      "grad_norm": 1.5258642435073853,
      "learning_rate": 8.16325454821004e-06,
      "loss": 0.0269,
      "step": 577500
    },
    {
      "epoch": 0.9451241465538122,
      "grad_norm": 0.5431860089302063,
      "learning_rate": 8.163188655996522e-06,
      "loss": 0.0225,
      "step": 577520
    },
    {
      "epoch": 0.9451568769924654,
      "grad_norm": 0.31745457649230957,
      "learning_rate": 8.163122763783005e-06,
      "loss": 0.023,
      "step": 577540
    },
    {
      "epoch": 0.9451896074311188,
      "grad_norm": 0.28763043880462646,
      "learning_rate": 8.163056871569487e-06,
      "loss": 0.0225,
      "step": 577560
    },
    {
      "epoch": 0.9452223378697722,
      "grad_norm": 1.3765459060668945,
      "learning_rate": 8.16299097935597e-06,
      "loss": 0.0167,
      "step": 577580
    },
    {
      "epoch": 0.9452550683084254,
      "grad_norm": 0.39954254031181335,
      "learning_rate": 8.162925087142453e-06,
      "loss": 0.0286,
      "step": 577600
    },
    {
      "epoch": 0.9452877987470788,
      "grad_norm": 2.031453847885132,
      "learning_rate": 8.162859194928936e-06,
      "loss": 0.0259,
      "step": 577620
    },
    {
      "epoch": 0.9453205291857322,
      "grad_norm": 0.2325514703989029,
      "learning_rate": 8.162793302715418e-06,
      "loss": 0.0279,
      "step": 577640
    },
    {
      "epoch": 0.9453532596243854,
      "grad_norm": 1.2159738540649414,
      "learning_rate": 8.162727410501902e-06,
      "loss": 0.0309,
      "step": 577660
    },
    {
      "epoch": 0.9453859900630388,
      "grad_norm": 0.49801015853881836,
      "learning_rate": 8.162661518288385e-06,
      "loss": 0.0291,
      "step": 577680
    },
    {
      "epoch": 0.9454187205016922,
      "grad_norm": 0.6972010731697083,
      "learning_rate": 8.162595626074867e-06,
      "loss": 0.0326,
      "step": 577700
    },
    {
      "epoch": 0.9454514509403456,
      "grad_norm": 0.798450767993927,
      "learning_rate": 8.16252973386135e-06,
      "loss": 0.0269,
      "step": 577720
    },
    {
      "epoch": 0.9454841813789988,
      "grad_norm": 1.4470322132110596,
      "learning_rate": 8.162463841647833e-06,
      "loss": 0.0299,
      "step": 577740
    },
    {
      "epoch": 0.9455169118176522,
      "grad_norm": 0.9463197588920593,
      "learning_rate": 8.162397949434316e-06,
      "loss": 0.0255,
      "step": 577760
    },
    {
      "epoch": 0.9455496422563056,
      "grad_norm": 2.5779645442962646,
      "learning_rate": 8.162332057220798e-06,
      "loss": 0.0484,
      "step": 577780
    },
    {
      "epoch": 0.9455823726949588,
      "grad_norm": 0.40103527903556824,
      "learning_rate": 8.162266165007282e-06,
      "loss": 0.0322,
      "step": 577800
    },
    {
      "epoch": 0.9456151031336122,
      "grad_norm": 0.43833428621292114,
      "learning_rate": 8.162200272793764e-06,
      "loss": 0.0403,
      "step": 577820
    },
    {
      "epoch": 0.9456478335722656,
      "grad_norm": 1.5028258562088013,
      "learning_rate": 8.162134380580247e-06,
      "loss": 0.0367,
      "step": 577840
    },
    {
      "epoch": 0.9456805640109188,
      "grad_norm": 0.6051438450813293,
      "learning_rate": 8.16206848836673e-06,
      "loss": 0.0294,
      "step": 577860
    },
    {
      "epoch": 0.9457132944495722,
      "grad_norm": 0.760688304901123,
      "learning_rate": 8.162002596153213e-06,
      "loss": 0.0313,
      "step": 577880
    },
    {
      "epoch": 0.9457460248882256,
      "grad_norm": 0.9977420568466187,
      "learning_rate": 8.161936703939696e-06,
      "loss": 0.0248,
      "step": 577900
    },
    {
      "epoch": 0.9457787553268789,
      "grad_norm": 0.39334428310394287,
      "learning_rate": 8.16187081172618e-06,
      "loss": 0.0348,
      "step": 577920
    },
    {
      "epoch": 0.9458114857655322,
      "grad_norm": 0.2310582399368286,
      "learning_rate": 8.161804919512662e-06,
      "loss": 0.021,
      "step": 577940
    },
    {
      "epoch": 0.9458442162041856,
      "grad_norm": 0.6411159038543701,
      "learning_rate": 8.161739027299145e-06,
      "loss": 0.0339,
      "step": 577960
    },
    {
      "epoch": 0.945876946642839,
      "grad_norm": 1.6015245914459229,
      "learning_rate": 8.161673135085627e-06,
      "loss": 0.0331,
      "step": 577980
    },
    {
      "epoch": 0.9459096770814922,
      "grad_norm": 1.65251624584198,
      "learning_rate": 8.16160724287211e-06,
      "loss": 0.0228,
      "step": 578000
    },
    {
      "epoch": 0.9459424075201456,
      "grad_norm": 0.8053021430969238,
      "learning_rate": 8.161541350658594e-06,
      "loss": 0.0279,
      "step": 578020
    },
    {
      "epoch": 0.945975137958799,
      "grad_norm": 0.6079897880554199,
      "learning_rate": 8.161475458445076e-06,
      "loss": 0.0383,
      "step": 578040
    },
    {
      "epoch": 0.9460078683974522,
      "grad_norm": 2.349902391433716,
      "learning_rate": 8.16140956623156e-06,
      "loss": 0.0251,
      "step": 578060
    },
    {
      "epoch": 0.9460405988361056,
      "grad_norm": 0.15689149498939514,
      "learning_rate": 8.161343674018042e-06,
      "loss": 0.037,
      "step": 578080
    },
    {
      "epoch": 0.946073329274759,
      "grad_norm": 0.6451950073242188,
      "learning_rate": 8.161277781804525e-06,
      "loss": 0.0249,
      "step": 578100
    },
    {
      "epoch": 0.9461060597134123,
      "grad_norm": 0.8452852964401245,
      "learning_rate": 8.161211889591007e-06,
      "loss": 0.0316,
      "step": 578120
    },
    {
      "epoch": 0.9461387901520656,
      "grad_norm": 2.9086802005767822,
      "learning_rate": 8.16114599737749e-06,
      "loss": 0.0467,
      "step": 578140
    },
    {
      "epoch": 0.946171520590719,
      "grad_norm": 0.593664288520813,
      "learning_rate": 8.161080105163973e-06,
      "loss": 0.0193,
      "step": 578160
    },
    {
      "epoch": 0.9462042510293723,
      "grad_norm": 1.7039995193481445,
      "learning_rate": 8.161014212950456e-06,
      "loss": 0.0358,
      "step": 578180
    },
    {
      "epoch": 0.9462369814680256,
      "grad_norm": 0.40804722905158997,
      "learning_rate": 8.160948320736938e-06,
      "loss": 0.0263,
      "step": 578200
    },
    {
      "epoch": 0.946269711906679,
      "grad_norm": 0.7840493321418762,
      "learning_rate": 8.160882428523422e-06,
      "loss": 0.027,
      "step": 578220
    },
    {
      "epoch": 0.9463024423453323,
      "grad_norm": 2.4336798191070557,
      "learning_rate": 8.160816536309905e-06,
      "loss": 0.0368,
      "step": 578240
    },
    {
      "epoch": 0.9463351727839856,
      "grad_norm": 0.4538840353488922,
      "learning_rate": 8.160750644096387e-06,
      "loss": 0.0241,
      "step": 578260
    },
    {
      "epoch": 0.946367903222639,
      "grad_norm": 0.9147711396217346,
      "learning_rate": 8.16068475188287e-06,
      "loss": 0.0285,
      "step": 578280
    },
    {
      "epoch": 0.9464006336612923,
      "grad_norm": 1.074924349784851,
      "learning_rate": 8.160618859669354e-06,
      "loss": 0.0227,
      "step": 578300
    },
    {
      "epoch": 0.9464333640999457,
      "grad_norm": 0.7654970288276672,
      "learning_rate": 8.160552967455836e-06,
      "loss": 0.0304,
      "step": 578320
    },
    {
      "epoch": 0.946466094538599,
      "grad_norm": 1.4693115949630737,
      "learning_rate": 8.16048707524232e-06,
      "loss": 0.026,
      "step": 578340
    },
    {
      "epoch": 0.9464988249772524,
      "grad_norm": 0.9224578142166138,
      "learning_rate": 8.160421183028803e-06,
      "loss": 0.0321,
      "step": 578360
    },
    {
      "epoch": 0.9465315554159057,
      "grad_norm": 0.06630507856607437,
      "learning_rate": 8.160355290815285e-06,
      "loss": 0.0315,
      "step": 578380
    },
    {
      "epoch": 0.946564285854559,
      "grad_norm": 1.3734099864959717,
      "learning_rate": 8.160289398601769e-06,
      "loss": 0.0274,
      "step": 578400
    },
    {
      "epoch": 0.9465970162932124,
      "grad_norm": 3.5684330463409424,
      "learning_rate": 8.160223506388251e-06,
      "loss": 0.0224,
      "step": 578420
    },
    {
      "epoch": 0.9466297467318657,
      "grad_norm": 1.13821280002594,
      "learning_rate": 8.160157614174734e-06,
      "loss": 0.0223,
      "step": 578440
    },
    {
      "epoch": 0.946662477170519,
      "grad_norm": 1.0249738693237305,
      "learning_rate": 8.160091721961216e-06,
      "loss": 0.0262,
      "step": 578460
    },
    {
      "epoch": 0.9466952076091724,
      "grad_norm": 0.5189863443374634,
      "learning_rate": 8.1600258297477e-06,
      "loss": 0.0344,
      "step": 578480
    },
    {
      "epoch": 0.9467279380478257,
      "grad_norm": 5.93515157699585,
      "learning_rate": 8.159959937534182e-06,
      "loss": 0.0342,
      "step": 578500
    },
    {
      "epoch": 0.9467606684864791,
      "grad_norm": 0.9180163741111755,
      "learning_rate": 8.159894045320665e-06,
      "loss": 0.024,
      "step": 578520
    },
    {
      "epoch": 0.9467933989251324,
      "grad_norm": 0.4031853675842285,
      "learning_rate": 8.159828153107147e-06,
      "loss": 0.0249,
      "step": 578540
    },
    {
      "epoch": 0.9468261293637857,
      "grad_norm": 0.5366024374961853,
      "learning_rate": 8.159762260893631e-06,
      "loss": 0.0318,
      "step": 578560
    },
    {
      "epoch": 0.9468588598024391,
      "grad_norm": 0.8672323226928711,
      "learning_rate": 8.159696368680113e-06,
      "loss": 0.0321,
      "step": 578580
    },
    {
      "epoch": 0.9468915902410924,
      "grad_norm": 1.5013179779052734,
      "learning_rate": 8.159630476466596e-06,
      "loss": 0.023,
      "step": 578600
    },
    {
      "epoch": 0.9469243206797457,
      "grad_norm": 0.7497053146362305,
      "learning_rate": 8.159564584253078e-06,
      "loss": 0.0197,
      "step": 578620
    },
    {
      "epoch": 0.9469570511183991,
      "grad_norm": 0.999165952205658,
      "learning_rate": 8.159498692039562e-06,
      "loss": 0.0339,
      "step": 578640
    },
    {
      "epoch": 0.9469897815570524,
      "grad_norm": 0.7436931133270264,
      "learning_rate": 8.159432799826045e-06,
      "loss": 0.0306,
      "step": 578660
    },
    {
      "epoch": 0.9470225119957058,
      "grad_norm": 2.7159039974212646,
      "learning_rate": 8.159366907612527e-06,
      "loss": 0.0241,
      "step": 578680
    },
    {
      "epoch": 0.9470552424343591,
      "grad_norm": 0.9790294766426086,
      "learning_rate": 8.159301015399011e-06,
      "loss": 0.0248,
      "step": 578700
    },
    {
      "epoch": 0.9470879728730125,
      "grad_norm": 1.1778266429901123,
      "learning_rate": 8.159235123185494e-06,
      "loss": 0.0384,
      "step": 578720
    },
    {
      "epoch": 0.9471207033116658,
      "grad_norm": 2.5630435943603516,
      "learning_rate": 8.159169230971976e-06,
      "loss": 0.0294,
      "step": 578740
    },
    {
      "epoch": 0.9471534337503191,
      "grad_norm": 1.7463371753692627,
      "learning_rate": 8.15910333875846e-06,
      "loss": 0.0339,
      "step": 578760
    },
    {
      "epoch": 0.9471861641889725,
      "grad_norm": 0.5888422727584839,
      "learning_rate": 8.159037446544944e-06,
      "loss": 0.0262,
      "step": 578780
    },
    {
      "epoch": 0.9472188946276258,
      "grad_norm": 1.224616289138794,
      "learning_rate": 8.158971554331425e-06,
      "loss": 0.0196,
      "step": 578800
    },
    {
      "epoch": 0.9472516250662791,
      "grad_norm": 0.8069507479667664,
      "learning_rate": 8.158905662117909e-06,
      "loss": 0.024,
      "step": 578820
    },
    {
      "epoch": 0.9472843555049325,
      "grad_norm": 0.5553566813468933,
      "learning_rate": 8.158839769904391e-06,
      "loss": 0.0222,
      "step": 578840
    },
    {
      "epoch": 0.9473170859435858,
      "grad_norm": 1.6188054084777832,
      "learning_rate": 8.158773877690875e-06,
      "loss": 0.0253,
      "step": 578860
    },
    {
      "epoch": 0.9473498163822391,
      "grad_norm": 1.5373539924621582,
      "learning_rate": 8.158707985477356e-06,
      "loss": 0.0293,
      "step": 578880
    },
    {
      "epoch": 0.9473825468208925,
      "grad_norm": 0.418410062789917,
      "learning_rate": 8.15864209326384e-06,
      "loss": 0.026,
      "step": 578900
    },
    {
      "epoch": 0.9474152772595459,
      "grad_norm": 1.087123155593872,
      "learning_rate": 8.158576201050322e-06,
      "loss": 0.0213,
      "step": 578920
    },
    {
      "epoch": 0.9474480076981991,
      "grad_norm": 0.40011921525001526,
      "learning_rate": 8.158510308836805e-06,
      "loss": 0.037,
      "step": 578940
    },
    {
      "epoch": 0.9474807381368525,
      "grad_norm": 1.157828688621521,
      "learning_rate": 8.158444416623287e-06,
      "loss": 0.0259,
      "step": 578960
    },
    {
      "epoch": 0.9475134685755059,
      "grad_norm": 0.25168514251708984,
      "learning_rate": 8.158378524409771e-06,
      "loss": 0.0217,
      "step": 578980
    },
    {
      "epoch": 0.9475461990141592,
      "grad_norm": 0.15465499460697174,
      "learning_rate": 8.158312632196253e-06,
      "loss": 0.0271,
      "step": 579000
    },
    {
      "epoch": 0.9475789294528125,
      "grad_norm": 1.2632536888122559,
      "learning_rate": 8.158246739982736e-06,
      "loss": 0.0293,
      "step": 579020
    },
    {
      "epoch": 0.9476116598914659,
      "grad_norm": 0.6852628588676453,
      "learning_rate": 8.15818084776922e-06,
      "loss": 0.0358,
      "step": 579040
    },
    {
      "epoch": 0.9476443903301192,
      "grad_norm": 2.076524019241333,
      "learning_rate": 8.158114955555702e-06,
      "loss": 0.0224,
      "step": 579060
    },
    {
      "epoch": 0.9476771207687725,
      "grad_norm": 0.4422486126422882,
      "learning_rate": 8.158049063342185e-06,
      "loss": 0.0253,
      "step": 579080
    },
    {
      "epoch": 0.9477098512074259,
      "grad_norm": 1.2144510746002197,
      "learning_rate": 8.157983171128669e-06,
      "loss": 0.0178,
      "step": 579100
    },
    {
      "epoch": 0.9477425816460793,
      "grad_norm": 0.6354657411575317,
      "learning_rate": 8.157917278915151e-06,
      "loss": 0.0171,
      "step": 579120
    },
    {
      "epoch": 0.9477753120847325,
      "grad_norm": 1.9341132640838623,
      "learning_rate": 8.157851386701635e-06,
      "loss": 0.0185,
      "step": 579140
    },
    {
      "epoch": 0.9478080425233859,
      "grad_norm": 0.8540303111076355,
      "learning_rate": 8.157785494488118e-06,
      "loss": 0.0338,
      "step": 579160
    },
    {
      "epoch": 0.9478407729620393,
      "grad_norm": 1.199869155883789,
      "learning_rate": 8.1577196022746e-06,
      "loss": 0.0215,
      "step": 579180
    },
    {
      "epoch": 0.9478735034006925,
      "grad_norm": 0.7816100120544434,
      "learning_rate": 8.157653710061084e-06,
      "loss": 0.0326,
      "step": 579200
    },
    {
      "epoch": 0.9479062338393459,
      "grad_norm": 0.6703775525093079,
      "learning_rate": 8.157587817847566e-06,
      "loss": 0.0218,
      "step": 579220
    },
    {
      "epoch": 0.9479389642779993,
      "grad_norm": 2.2550408840179443,
      "learning_rate": 8.157521925634049e-06,
      "loss": 0.0238,
      "step": 579240
    },
    {
      "epoch": 0.9479716947166525,
      "grad_norm": 0.6144381165504456,
      "learning_rate": 8.157456033420531e-06,
      "loss": 0.023,
      "step": 579260
    },
    {
      "epoch": 0.9480044251553059,
      "grad_norm": 0.2823203206062317,
      "learning_rate": 8.157390141207015e-06,
      "loss": 0.027,
      "step": 579280
    },
    {
      "epoch": 0.9480371555939593,
      "grad_norm": 0.35382482409477234,
      "learning_rate": 8.157324248993496e-06,
      "loss": 0.0341,
      "step": 579300
    },
    {
      "epoch": 0.9480698860326127,
      "grad_norm": 1.1994872093200684,
      "learning_rate": 8.15725835677998e-06,
      "loss": 0.0198,
      "step": 579320
    },
    {
      "epoch": 0.9481026164712659,
      "grad_norm": 0.7902274131774902,
      "learning_rate": 8.157192464566462e-06,
      "loss": 0.0227,
      "step": 579340
    },
    {
      "epoch": 0.9481353469099193,
      "grad_norm": 1.075270414352417,
      "learning_rate": 8.157126572352946e-06,
      "loss": 0.0311,
      "step": 579360
    },
    {
      "epoch": 0.9481680773485727,
      "grad_norm": 0.35092097520828247,
      "learning_rate": 8.157060680139427e-06,
      "loss": 0.0216,
      "step": 579380
    },
    {
      "epoch": 0.9482008077872259,
      "grad_norm": 0.31881582736968994,
      "learning_rate": 8.156994787925911e-06,
      "loss": 0.0195,
      "step": 579400
    },
    {
      "epoch": 0.9482335382258793,
      "grad_norm": 2.4125256538391113,
      "learning_rate": 8.156928895712395e-06,
      "loss": 0.0296,
      "step": 579420
    },
    {
      "epoch": 0.9482662686645327,
      "grad_norm": 1.167722463607788,
      "learning_rate": 8.156863003498877e-06,
      "loss": 0.0292,
      "step": 579440
    },
    {
      "epoch": 0.9482989991031859,
      "grad_norm": 1.4635353088378906,
      "learning_rate": 8.15679711128536e-06,
      "loss": 0.0266,
      "step": 579460
    },
    {
      "epoch": 0.9483317295418393,
      "grad_norm": 11.58254337310791,
      "learning_rate": 8.156731219071844e-06,
      "loss": 0.0299,
      "step": 579480
    },
    {
      "epoch": 0.9483644599804927,
      "grad_norm": 0.32289525866508484,
      "learning_rate": 8.156665326858326e-06,
      "loss": 0.0288,
      "step": 579500
    },
    {
      "epoch": 0.948397190419146,
      "grad_norm": 0.7438805103302002,
      "learning_rate": 8.15659943464481e-06,
      "loss": 0.0285,
      "step": 579520
    },
    {
      "epoch": 0.9484299208577993,
      "grad_norm": 0.5186460018157959,
      "learning_rate": 8.156533542431293e-06,
      "loss": 0.0291,
      "step": 579540
    },
    {
      "epoch": 0.9484626512964527,
      "grad_norm": 1.3011306524276733,
      "learning_rate": 8.156467650217775e-06,
      "loss": 0.032,
      "step": 579560
    },
    {
      "epoch": 0.9484953817351061,
      "grad_norm": 1.5991284847259521,
      "learning_rate": 8.156401758004258e-06,
      "loss": 0.0288,
      "step": 579580
    },
    {
      "epoch": 0.9485281121737593,
      "grad_norm": 0.9473578333854675,
      "learning_rate": 8.15633586579074e-06,
      "loss": 0.0252,
      "step": 579600
    },
    {
      "epoch": 0.9485608426124127,
      "grad_norm": 1.3399877548217773,
      "learning_rate": 8.156269973577224e-06,
      "loss": 0.0499,
      "step": 579620
    },
    {
      "epoch": 0.9485935730510661,
      "grad_norm": 1.1375168561935425,
      "learning_rate": 8.156204081363706e-06,
      "loss": 0.0258,
      "step": 579640
    },
    {
      "epoch": 0.9486263034897193,
      "grad_norm": 0.6140706539154053,
      "learning_rate": 8.15613818915019e-06,
      "loss": 0.0296,
      "step": 579660
    },
    {
      "epoch": 0.9486590339283727,
      "grad_norm": 1.1043932437896729,
      "learning_rate": 8.156072296936671e-06,
      "loss": 0.0238,
      "step": 579680
    },
    {
      "epoch": 0.9486917643670261,
      "grad_norm": 1.0451053380966187,
      "learning_rate": 8.156006404723155e-06,
      "loss": 0.0314,
      "step": 579700
    },
    {
      "epoch": 0.9487244948056793,
      "grad_norm": 0.7428964376449585,
      "learning_rate": 8.155940512509637e-06,
      "loss": 0.0394,
      "step": 579720
    },
    {
      "epoch": 0.9487572252443327,
      "grad_norm": 1.3226362466812134,
      "learning_rate": 8.15587462029612e-06,
      "loss": 0.0341,
      "step": 579740
    },
    {
      "epoch": 0.9487899556829861,
      "grad_norm": 2.450953483581543,
      "learning_rate": 8.155808728082602e-06,
      "loss": 0.0302,
      "step": 579760
    },
    {
      "epoch": 0.9488226861216394,
      "grad_norm": 0.3550550639629364,
      "learning_rate": 8.155742835869086e-06,
      "loss": 0.0347,
      "step": 579780
    },
    {
      "epoch": 0.9488554165602927,
      "grad_norm": 0.4540172517299652,
      "learning_rate": 8.15567694365557e-06,
      "loss": 0.0292,
      "step": 579800
    },
    {
      "epoch": 0.9488881469989461,
      "grad_norm": 0.5330780148506165,
      "learning_rate": 8.155611051442051e-06,
      "loss": 0.0182,
      "step": 579820
    },
    {
      "epoch": 0.9489208774375995,
      "grad_norm": 0.6943979859352112,
      "learning_rate": 8.155545159228535e-06,
      "loss": 0.0206,
      "step": 579840
    },
    {
      "epoch": 0.9489536078762527,
      "grad_norm": 0.6967758536338806,
      "learning_rate": 8.155479267015017e-06,
      "loss": 0.0275,
      "step": 579860
    },
    {
      "epoch": 0.9489863383149061,
      "grad_norm": 2.7882769107818604,
      "learning_rate": 8.1554133748015e-06,
      "loss": 0.0337,
      "step": 579880
    },
    {
      "epoch": 0.9490190687535595,
      "grad_norm": 0.24074941873550415,
      "learning_rate": 8.155347482587984e-06,
      "loss": 0.0291,
      "step": 579900
    },
    {
      "epoch": 0.9490517991922127,
      "grad_norm": 0.4821411669254303,
      "learning_rate": 8.155281590374466e-06,
      "loss": 0.0178,
      "step": 579920
    },
    {
      "epoch": 0.9490845296308661,
      "grad_norm": 0.4939671456813812,
      "learning_rate": 8.15521569816095e-06,
      "loss": 0.0316,
      "step": 579940
    },
    {
      "epoch": 0.9491172600695195,
      "grad_norm": 0.3045426309108734,
      "learning_rate": 8.155149805947433e-06,
      "loss": 0.0263,
      "step": 579960
    },
    {
      "epoch": 0.9491499905081728,
      "grad_norm": 0.6481198072433472,
      "learning_rate": 8.155083913733915e-06,
      "loss": 0.0357,
      "step": 579980
    },
    {
      "epoch": 0.9491827209468261,
      "grad_norm": 1.0418907403945923,
      "learning_rate": 8.155018021520398e-06,
      "loss": 0.0242,
      "step": 580000
    },
    {
      "epoch": 0.9492154513854795,
      "grad_norm": 0.6319218277931213,
      "learning_rate": 8.15495212930688e-06,
      "loss": 0.0343,
      "step": 580020
    },
    {
      "epoch": 0.9492481818241328,
      "grad_norm": 0.5478922128677368,
      "learning_rate": 8.154886237093364e-06,
      "loss": 0.0327,
      "step": 580040
    },
    {
      "epoch": 0.9492809122627861,
      "grad_norm": 0.20566566288471222,
      "learning_rate": 8.154820344879846e-06,
      "loss": 0.0353,
      "step": 580060
    },
    {
      "epoch": 0.9493136427014395,
      "grad_norm": 1.1959766149520874,
      "learning_rate": 8.15475445266633e-06,
      "loss": 0.0329,
      "step": 580080
    },
    {
      "epoch": 0.9493463731400928,
      "grad_norm": 0.6436387896537781,
      "learning_rate": 8.154688560452811e-06,
      "loss": 0.0218,
      "step": 580100
    },
    {
      "epoch": 0.9493791035787461,
      "grad_norm": 1.1787445545196533,
      "learning_rate": 8.154622668239295e-06,
      "loss": 0.0402,
      "step": 580120
    },
    {
      "epoch": 0.9494118340173995,
      "grad_norm": 0.14665617048740387,
      "learning_rate": 8.154556776025778e-06,
      "loss": 0.0303,
      "step": 580140
    },
    {
      "epoch": 0.9494445644560529,
      "grad_norm": 0.9978302717208862,
      "learning_rate": 8.15449088381226e-06,
      "loss": 0.0282,
      "step": 580160
    },
    {
      "epoch": 0.9494772948947062,
      "grad_norm": 0.5028682947158813,
      "learning_rate": 8.154424991598744e-06,
      "loss": 0.0288,
      "step": 580180
    },
    {
      "epoch": 0.9495100253333595,
      "grad_norm": 0.5173102021217346,
      "learning_rate": 8.154359099385226e-06,
      "loss": 0.0213,
      "step": 580200
    },
    {
      "epoch": 0.9495427557720129,
      "grad_norm": 2.291292428970337,
      "learning_rate": 8.15429320717171e-06,
      "loss": 0.0193,
      "step": 580220
    },
    {
      "epoch": 0.9495754862106662,
      "grad_norm": 0.7560003399848938,
      "learning_rate": 8.154227314958191e-06,
      "loss": 0.0257,
      "step": 580240
    },
    {
      "epoch": 0.9496082166493195,
      "grad_norm": 0.8657375574111938,
      "learning_rate": 8.154161422744675e-06,
      "loss": 0.04,
      "step": 580260
    },
    {
      "epoch": 0.9496409470879729,
      "grad_norm": 0.5911156535148621,
      "learning_rate": 8.154095530531158e-06,
      "loss": 0.029,
      "step": 580280
    },
    {
      "epoch": 0.9496736775266262,
      "grad_norm": 0.9031849503517151,
      "learning_rate": 8.15402963831764e-06,
      "loss": 0.0315,
      "step": 580300
    },
    {
      "epoch": 0.9497064079652795,
      "grad_norm": 0.9077373743057251,
      "learning_rate": 8.153963746104124e-06,
      "loss": 0.0245,
      "step": 580320
    },
    {
      "epoch": 0.9497391384039329,
      "grad_norm": 0.7670289278030396,
      "learning_rate": 8.153897853890607e-06,
      "loss": 0.0347,
      "step": 580340
    },
    {
      "epoch": 0.9497718688425862,
      "grad_norm": 0.9259672164916992,
      "learning_rate": 8.15383196167709e-06,
      "loss": 0.0389,
      "step": 580360
    },
    {
      "epoch": 0.9498045992812396,
      "grad_norm": 0.42850637435913086,
      "learning_rate": 8.153766069463573e-06,
      "loss": 0.0301,
      "step": 580380
    },
    {
      "epoch": 0.9498373297198929,
      "grad_norm": 1.5447543859481812,
      "learning_rate": 8.153700177250055e-06,
      "loss": 0.0306,
      "step": 580400
    },
    {
      "epoch": 0.9498700601585462,
      "grad_norm": 2.0203521251678467,
      "learning_rate": 8.153634285036538e-06,
      "loss": 0.025,
      "step": 580420
    },
    {
      "epoch": 0.9499027905971996,
      "grad_norm": 0.19661350548267365,
      "learning_rate": 8.15356839282302e-06,
      "loss": 0.0194,
      "step": 580440
    },
    {
      "epoch": 0.9499355210358529,
      "grad_norm": 0.3787577748298645,
      "learning_rate": 8.153502500609504e-06,
      "loss": 0.0239,
      "step": 580460
    },
    {
      "epoch": 0.9499682514745063,
      "grad_norm": 1.4344732761383057,
      "learning_rate": 8.153436608395987e-06,
      "loss": 0.0321,
      "step": 580480
    },
    {
      "epoch": 0.9500009819131596,
      "grad_norm": 0.21856637299060822,
      "learning_rate": 8.15337071618247e-06,
      "loss": 0.02,
      "step": 580500
    },
    {
      "epoch": 0.9500337123518129,
      "grad_norm": 0.6292027831077576,
      "learning_rate": 8.153304823968953e-06,
      "loss": 0.0312,
      "step": 580520
    },
    {
      "epoch": 0.9500664427904663,
      "grad_norm": 1.433383822441101,
      "learning_rate": 8.153238931755435e-06,
      "loss": 0.0286,
      "step": 580540
    },
    {
      "epoch": 0.9500991732291196,
      "grad_norm": 0.36194124817848206,
      "learning_rate": 8.153173039541918e-06,
      "loss": 0.0214,
      "step": 580560
    },
    {
      "epoch": 0.950131903667773,
      "grad_norm": 1.3074893951416016,
      "learning_rate": 8.1531071473284e-06,
      "loss": 0.0299,
      "step": 580580
    },
    {
      "epoch": 0.9501646341064263,
      "grad_norm": 0.23633891344070435,
      "learning_rate": 8.153041255114884e-06,
      "loss": 0.0235,
      "step": 580600
    },
    {
      "epoch": 0.9501973645450796,
      "grad_norm": 0.6859444379806519,
      "learning_rate": 8.152975362901366e-06,
      "loss": 0.0328,
      "step": 580620
    },
    {
      "epoch": 0.950230094983733,
      "grad_norm": 0.6905183792114258,
      "learning_rate": 8.15290947068785e-06,
      "loss": 0.0297,
      "step": 580640
    },
    {
      "epoch": 0.9502628254223863,
      "grad_norm": 0.27747178077697754,
      "learning_rate": 8.152843578474331e-06,
      "loss": 0.0183,
      "step": 580660
    },
    {
      "epoch": 0.9502955558610396,
      "grad_norm": 1.3103227615356445,
      "learning_rate": 8.152777686260815e-06,
      "loss": 0.0344,
      "step": 580680
    },
    {
      "epoch": 0.950328286299693,
      "grad_norm": 3.86796236038208,
      "learning_rate": 8.152711794047298e-06,
      "loss": 0.026,
      "step": 580700
    },
    {
      "epoch": 0.9503610167383463,
      "grad_norm": 0.2513647973537445,
      "learning_rate": 8.15264590183378e-06,
      "loss": 0.0248,
      "step": 580720
    },
    {
      "epoch": 0.9503937471769996,
      "grad_norm": 0.6982454061508179,
      "learning_rate": 8.152580009620264e-06,
      "loss": 0.025,
      "step": 580740
    },
    {
      "epoch": 0.950426477615653,
      "grad_norm": 1.5832880735397339,
      "learning_rate": 8.152514117406747e-06,
      "loss": 0.0314,
      "step": 580760
    },
    {
      "epoch": 0.9504592080543064,
      "grad_norm": 0.9985358715057373,
      "learning_rate": 8.15244822519323e-06,
      "loss": 0.0261,
      "step": 580780
    },
    {
      "epoch": 0.9504919384929597,
      "grad_norm": 1.4868451356887817,
      "learning_rate": 8.152382332979713e-06,
      "loss": 0.0281,
      "step": 580800
    },
    {
      "epoch": 0.950524668931613,
      "grad_norm": 1.466354250907898,
      "learning_rate": 8.152316440766195e-06,
      "loss": 0.0303,
      "step": 580820
    },
    {
      "epoch": 0.9505573993702664,
      "grad_norm": 0.9842845797538757,
      "learning_rate": 8.152250548552678e-06,
      "loss": 0.0243,
      "step": 580840
    },
    {
      "epoch": 0.9505901298089197,
      "grad_norm": 0.4745856523513794,
      "learning_rate": 8.152184656339162e-06,
      "loss": 0.0333,
      "step": 580860
    },
    {
      "epoch": 0.950622860247573,
      "grad_norm": 0.23279161751270294,
      "learning_rate": 8.152118764125644e-06,
      "loss": 0.0217,
      "step": 580880
    },
    {
      "epoch": 0.9506555906862264,
      "grad_norm": 1.1399903297424316,
      "learning_rate": 8.152052871912128e-06,
      "loss": 0.0378,
      "step": 580900
    },
    {
      "epoch": 0.9506883211248797,
      "grad_norm": 1.958376169204712,
      "learning_rate": 8.15198697969861e-06,
      "loss": 0.0269,
      "step": 580920
    },
    {
      "epoch": 0.950721051563533,
      "grad_norm": 0.8352304100990295,
      "learning_rate": 8.151921087485093e-06,
      "loss": 0.0297,
      "step": 580940
    },
    {
      "epoch": 0.9507537820021864,
      "grad_norm": 1.6519591808319092,
      "learning_rate": 8.151855195271575e-06,
      "loss": 0.0284,
      "step": 580960
    },
    {
      "epoch": 0.9507865124408398,
      "grad_norm": 2.012587070465088,
      "learning_rate": 8.151789303058058e-06,
      "loss": 0.0341,
      "step": 580980
    },
    {
      "epoch": 0.950819242879493,
      "grad_norm": 4.191926956176758,
      "learning_rate": 8.15172341084454e-06,
      "loss": 0.0268,
      "step": 581000
    },
    {
      "epoch": 0.9508519733181464,
      "grad_norm": 1.5675965547561646,
      "learning_rate": 8.151657518631024e-06,
      "loss": 0.0273,
      "step": 581020
    },
    {
      "epoch": 0.9508847037567998,
      "grad_norm": 0.9067162871360779,
      "learning_rate": 8.151591626417506e-06,
      "loss": 0.0335,
      "step": 581040
    },
    {
      "epoch": 0.950917434195453,
      "grad_norm": 0.8642017245292664,
      "learning_rate": 8.15152573420399e-06,
      "loss": 0.0276,
      "step": 581060
    },
    {
      "epoch": 0.9509501646341064,
      "grad_norm": 3.4543049335479736,
      "learning_rate": 8.151459841990473e-06,
      "loss": 0.0297,
      "step": 581080
    },
    {
      "epoch": 0.9509828950727598,
      "grad_norm": 0.3722038269042969,
      "learning_rate": 8.151393949776955e-06,
      "loss": 0.0329,
      "step": 581100
    },
    {
      "epoch": 0.951015625511413,
      "grad_norm": 1.1184016466140747,
      "learning_rate": 8.151328057563439e-06,
      "loss": 0.0199,
      "step": 581120
    },
    {
      "epoch": 0.9510483559500664,
      "grad_norm": 0.20114047825336456,
      "learning_rate": 8.151262165349922e-06,
      "loss": 0.0226,
      "step": 581140
    },
    {
      "epoch": 0.9510810863887198,
      "grad_norm": 2.8550915718078613,
      "learning_rate": 8.151196273136404e-06,
      "loss": 0.0308,
      "step": 581160
    },
    {
      "epoch": 0.9511138168273732,
      "grad_norm": 0.8346529006958008,
      "learning_rate": 8.151130380922888e-06,
      "loss": 0.0204,
      "step": 581180
    },
    {
      "epoch": 0.9511465472660264,
      "grad_norm": 1.2128429412841797,
      "learning_rate": 8.151064488709371e-06,
      "loss": 0.0292,
      "step": 581200
    },
    {
      "epoch": 0.9511792777046798,
      "grad_norm": 1.5226223468780518,
      "learning_rate": 8.150998596495853e-06,
      "loss": 0.0243,
      "step": 581220
    },
    {
      "epoch": 0.9512120081433332,
      "grad_norm": 0.9009204506874084,
      "learning_rate": 8.150932704282337e-06,
      "loss": 0.0267,
      "step": 581240
    },
    {
      "epoch": 0.9512447385819864,
      "grad_norm": 3.862708330154419,
      "learning_rate": 8.150866812068819e-06,
      "loss": 0.0327,
      "step": 581260
    },
    {
      "epoch": 0.9512774690206398,
      "grad_norm": 0.36969324946403503,
      "learning_rate": 8.150800919855302e-06,
      "loss": 0.0229,
      "step": 581280
    },
    {
      "epoch": 0.9513101994592932,
      "grad_norm": 0.5672000646591187,
      "learning_rate": 8.150735027641784e-06,
      "loss": 0.0234,
      "step": 581300
    },
    {
      "epoch": 0.9513429298979464,
      "grad_norm": 0.19836485385894775,
      "learning_rate": 8.150669135428268e-06,
      "loss": 0.0281,
      "step": 581320
    },
    {
      "epoch": 0.9513756603365998,
      "grad_norm": 0.8676614761352539,
      "learning_rate": 8.15060324321475e-06,
      "loss": 0.0373,
      "step": 581340
    },
    {
      "epoch": 0.9514083907752532,
      "grad_norm": 1.1776206493377686,
      "learning_rate": 8.150537351001233e-06,
      "loss": 0.0361,
      "step": 581360
    },
    {
      "epoch": 0.9514411212139066,
      "grad_norm": 0.34566056728363037,
      "learning_rate": 8.150471458787715e-06,
      "loss": 0.0249,
      "step": 581380
    },
    {
      "epoch": 0.9514738516525598,
      "grad_norm": 0.38825520873069763,
      "learning_rate": 8.150405566574199e-06,
      "loss": 0.0251,
      "step": 581400
    },
    {
      "epoch": 0.9515065820912132,
      "grad_norm": 0.19957563281059265,
      "learning_rate": 8.15033967436068e-06,
      "loss": 0.0363,
      "step": 581420
    },
    {
      "epoch": 0.9515393125298666,
      "grad_norm": 1.1183054447174072,
      "learning_rate": 8.150273782147164e-06,
      "loss": 0.0155,
      "step": 581440
    },
    {
      "epoch": 0.9515720429685198,
      "grad_norm": 0.2843243181705475,
      "learning_rate": 8.150207889933646e-06,
      "loss": 0.0274,
      "step": 581460
    },
    {
      "epoch": 0.9516047734071732,
      "grad_norm": 0.8779716491699219,
      "learning_rate": 8.15014199772013e-06,
      "loss": 0.0341,
      "step": 581480
    },
    {
      "epoch": 0.9516375038458266,
      "grad_norm": 0.5866679549217224,
      "learning_rate": 8.150076105506613e-06,
      "loss": 0.0278,
      "step": 581500
    },
    {
      "epoch": 0.9516702342844798,
      "grad_norm": 3.125246524810791,
      "learning_rate": 8.150010213293095e-06,
      "loss": 0.0351,
      "step": 581520
    },
    {
      "epoch": 0.9517029647231332,
      "grad_norm": 0.8904987573623657,
      "learning_rate": 8.149944321079579e-06,
      "loss": 0.0314,
      "step": 581540
    },
    {
      "epoch": 0.9517356951617866,
      "grad_norm": 1.5210598707199097,
      "learning_rate": 8.149878428866062e-06,
      "loss": 0.0331,
      "step": 581560
    },
    {
      "epoch": 0.95176842560044,
      "grad_norm": 0.19959653913974762,
      "learning_rate": 8.149812536652544e-06,
      "loss": 0.0229,
      "step": 581580
    },
    {
      "epoch": 0.9518011560390932,
      "grad_norm": 0.38750243186950684,
      "learning_rate": 8.149746644439028e-06,
      "loss": 0.0275,
      "step": 581600
    },
    {
      "epoch": 0.9518338864777466,
      "grad_norm": 0.27824270725250244,
      "learning_rate": 8.149680752225511e-06,
      "loss": 0.0266,
      "step": 581620
    },
    {
      "epoch": 0.9518666169164,
      "grad_norm": 0.6259394288063049,
      "learning_rate": 8.149614860011993e-06,
      "loss": 0.0264,
      "step": 581640
    },
    {
      "epoch": 0.9518993473550532,
      "grad_norm": 0.22335998713970184,
      "learning_rate": 8.149548967798477e-06,
      "loss": 0.0239,
      "step": 581660
    },
    {
      "epoch": 0.9519320777937066,
      "grad_norm": 1.1993849277496338,
      "learning_rate": 8.149483075584959e-06,
      "loss": 0.0226,
      "step": 581680
    },
    {
      "epoch": 0.95196480823236,
      "grad_norm": 1.915524959564209,
      "learning_rate": 8.149417183371442e-06,
      "loss": 0.027,
      "step": 581700
    },
    {
      "epoch": 0.9519975386710132,
      "grad_norm": 1.7344685792922974,
      "learning_rate": 8.149351291157924e-06,
      "loss": 0.0258,
      "step": 581720
    },
    {
      "epoch": 0.9520302691096666,
      "grad_norm": 0.6642435193061829,
      "learning_rate": 8.149285398944408e-06,
      "loss": 0.0244,
      "step": 581740
    },
    {
      "epoch": 0.95206299954832,
      "grad_norm": 1.2359974384307861,
      "learning_rate": 8.14921950673089e-06,
      "loss": 0.0346,
      "step": 581760
    },
    {
      "epoch": 0.9520957299869733,
      "grad_norm": 0.7736499309539795,
      "learning_rate": 8.149153614517373e-06,
      "loss": 0.0236,
      "step": 581780
    },
    {
      "epoch": 0.9521284604256266,
      "grad_norm": 0.7523752450942993,
      "learning_rate": 8.149087722303855e-06,
      "loss": 0.0204,
      "step": 581800
    },
    {
      "epoch": 0.95216119086428,
      "grad_norm": 0.5461419820785522,
      "learning_rate": 8.149021830090339e-06,
      "loss": 0.0265,
      "step": 581820
    },
    {
      "epoch": 0.9521939213029333,
      "grad_norm": 0.6467350721359253,
      "learning_rate": 8.14895593787682e-06,
      "loss": 0.0317,
      "step": 581840
    },
    {
      "epoch": 0.9522266517415866,
      "grad_norm": 1.1581937074661255,
      "learning_rate": 8.148890045663304e-06,
      "loss": 0.035,
      "step": 581860
    },
    {
      "epoch": 0.95225938218024,
      "grad_norm": 0.5806291103363037,
      "learning_rate": 8.148824153449788e-06,
      "loss": 0.0156,
      "step": 581880
    },
    {
      "epoch": 0.9522921126188933,
      "grad_norm": 1.1807293891906738,
      "learning_rate": 8.14875826123627e-06,
      "loss": 0.0241,
      "step": 581900
    },
    {
      "epoch": 0.9523248430575466,
      "grad_norm": 0.6986583471298218,
      "learning_rate": 8.148692369022753e-06,
      "loss": 0.027,
      "step": 581920
    },
    {
      "epoch": 0.9523575734962,
      "grad_norm": NaN,
      "learning_rate": 8.148626476809237e-06,
      "loss": 0.0253,
      "step": 581940
    },
    {
      "epoch": 0.9523903039348534,
      "grad_norm": 0.9240137338638306,
      "learning_rate": 8.148560584595719e-06,
      "loss": 0.022,
      "step": 581960
    },
    {
      "epoch": 0.9524230343735067,
      "grad_norm": 0.6515289545059204,
      "learning_rate": 8.148494692382202e-06,
      "loss": 0.0349,
      "step": 581980
    },
    {
      "epoch": 0.95245576481216,
      "grad_norm": 0.8906413912773132,
      "learning_rate": 8.148428800168686e-06,
      "loss": 0.0246,
      "step": 582000
    },
    {
      "epoch": 0.9524884952508134,
      "grad_norm": 0.29596781730651855,
      "learning_rate": 8.148362907955168e-06,
      "loss": 0.0239,
      "step": 582020
    },
    {
      "epoch": 0.9525212256894667,
      "grad_norm": 0.30337026715278625,
      "learning_rate": 8.148297015741651e-06,
      "loss": 0.0291,
      "step": 582040
    },
    {
      "epoch": 0.95255395612812,
      "grad_norm": 0.29842859506607056,
      "learning_rate": 8.148231123528133e-06,
      "loss": 0.029,
      "step": 582060
    },
    {
      "epoch": 0.9525866865667734,
      "grad_norm": 1.018418788909912,
      "learning_rate": 8.148165231314617e-06,
      "loss": 0.0325,
      "step": 582080
    },
    {
      "epoch": 0.9526194170054267,
      "grad_norm": 0.6105679273605347,
      "learning_rate": 8.148099339101099e-06,
      "loss": 0.0209,
      "step": 582100
    },
    {
      "epoch": 0.95265214744408,
      "grad_norm": 1.1369032859802246,
      "learning_rate": 8.148033446887582e-06,
      "loss": 0.0344,
      "step": 582120
    },
    {
      "epoch": 0.9526848778827334,
      "grad_norm": 1.5235874652862549,
      "learning_rate": 8.147967554674064e-06,
      "loss": 0.0241,
      "step": 582140
    },
    {
      "epoch": 0.9527176083213867,
      "grad_norm": 0.6889663338661194,
      "learning_rate": 8.147901662460548e-06,
      "loss": 0.0215,
      "step": 582160
    },
    {
      "epoch": 0.9527503387600401,
      "grad_norm": 1.0883270502090454,
      "learning_rate": 8.14783577024703e-06,
      "loss": 0.0379,
      "step": 582180
    },
    {
      "epoch": 0.9527830691986934,
      "grad_norm": 0.39036354422569275,
      "learning_rate": 8.147769878033513e-06,
      "loss": 0.0236,
      "step": 582200
    },
    {
      "epoch": 0.9528157996373467,
      "grad_norm": 0.27012255787849426,
      "learning_rate": 8.147703985819995e-06,
      "loss": 0.0255,
      "step": 582220
    },
    {
      "epoch": 0.9528485300760001,
      "grad_norm": 0.266296923160553,
      "learning_rate": 8.147638093606479e-06,
      "loss": 0.0251,
      "step": 582240
    },
    {
      "epoch": 0.9528812605146534,
      "grad_norm": 0.21433892846107483,
      "learning_rate": 8.147572201392962e-06,
      "loss": 0.02,
      "step": 582260
    },
    {
      "epoch": 0.9529139909533068,
      "grad_norm": 0.7121821641921997,
      "learning_rate": 8.147506309179444e-06,
      "loss": 0.0319,
      "step": 582280
    },
    {
      "epoch": 0.9529467213919601,
      "grad_norm": 0.572354257106781,
      "learning_rate": 8.147440416965928e-06,
      "loss": 0.0235,
      "step": 582300
    },
    {
      "epoch": 0.9529794518306134,
      "grad_norm": 0.7580428123474121,
      "learning_rate": 8.147374524752411e-06,
      "loss": 0.0381,
      "step": 582320
    },
    {
      "epoch": 0.9530121822692668,
      "grad_norm": 2.009247064590454,
      "learning_rate": 8.147308632538893e-06,
      "loss": 0.0293,
      "step": 582340
    },
    {
      "epoch": 0.9530449127079201,
      "grad_norm": 0.5688244700431824,
      "learning_rate": 8.147242740325377e-06,
      "loss": 0.0297,
      "step": 582360
    },
    {
      "epoch": 0.9530776431465735,
      "grad_norm": 0.29211676120758057,
      "learning_rate": 8.14717684811186e-06,
      "loss": 0.02,
      "step": 582380
    },
    {
      "epoch": 0.9531103735852268,
      "grad_norm": 0.5303824543952942,
      "learning_rate": 8.147110955898342e-06,
      "loss": 0.0306,
      "step": 582400
    },
    {
      "epoch": 0.9531431040238801,
      "grad_norm": 0.7306591868400574,
      "learning_rate": 8.147045063684826e-06,
      "loss": 0.0366,
      "step": 582420
    },
    {
      "epoch": 0.9531758344625335,
      "grad_norm": 1.1251791715621948,
      "learning_rate": 8.146979171471308e-06,
      "loss": 0.0357,
      "step": 582440
    },
    {
      "epoch": 0.9532085649011868,
      "grad_norm": 3.9446678161621094,
      "learning_rate": 8.146913279257791e-06,
      "loss": 0.0323,
      "step": 582460
    },
    {
      "epoch": 0.9532412953398401,
      "grad_norm": 0.2031874656677246,
      "learning_rate": 8.146847387044273e-06,
      "loss": 0.0234,
      "step": 582480
    },
    {
      "epoch": 0.9532740257784935,
      "grad_norm": 1.163699984550476,
      "learning_rate": 8.146781494830757e-06,
      "loss": 0.0235,
      "step": 582500
    },
    {
      "epoch": 0.9533067562171468,
      "grad_norm": 0.5506287217140198,
      "learning_rate": 8.146715602617239e-06,
      "loss": 0.0313,
      "step": 582520
    },
    {
      "epoch": 0.9533394866558002,
      "grad_norm": 0.3192112445831299,
      "learning_rate": 8.146649710403722e-06,
      "loss": 0.0315,
      "step": 582540
    },
    {
      "epoch": 0.9533722170944535,
      "grad_norm": 0.3906911313533783,
      "learning_rate": 8.146583818190204e-06,
      "loss": 0.0264,
      "step": 582560
    },
    {
      "epoch": 0.9534049475331068,
      "grad_norm": 2.6584391593933105,
      "learning_rate": 8.146517925976688e-06,
      "loss": 0.0427,
      "step": 582580
    },
    {
      "epoch": 0.9534376779717602,
      "grad_norm": 0.4915010333061218,
      "learning_rate": 8.146452033763171e-06,
      "loss": 0.0157,
      "step": 582600
    },
    {
      "epoch": 0.9534704084104135,
      "grad_norm": 0.785760760307312,
      "learning_rate": 8.146386141549653e-06,
      "loss": 0.029,
      "step": 582620
    },
    {
      "epoch": 0.9535031388490669,
      "grad_norm": 0.16285529732704163,
      "learning_rate": 8.146320249336137e-06,
      "loss": 0.0233,
      "step": 582640
    },
    {
      "epoch": 0.9535358692877202,
      "grad_norm": 1.1200371980667114,
      "learning_rate": 8.146254357122619e-06,
      "loss": 0.0343,
      "step": 582660
    },
    {
      "epoch": 0.9535685997263735,
      "grad_norm": 1.1096091270446777,
      "learning_rate": 8.146188464909102e-06,
      "loss": 0.0272,
      "step": 582680
    },
    {
      "epoch": 0.9536013301650269,
      "grad_norm": 1.3220789432525635,
      "learning_rate": 8.146122572695584e-06,
      "loss": 0.0325,
      "step": 582700
    },
    {
      "epoch": 0.9536340606036802,
      "grad_norm": 0.5865054726600647,
      "learning_rate": 8.146056680482068e-06,
      "loss": 0.0363,
      "step": 582720
    },
    {
      "epoch": 0.9536667910423335,
      "grad_norm": 1.1469539403915405,
      "learning_rate": 8.145990788268551e-06,
      "loss": 0.0241,
      "step": 582740
    },
    {
      "epoch": 0.9536995214809869,
      "grad_norm": 0.66670823097229,
      "learning_rate": 8.145924896055033e-06,
      "loss": 0.0225,
      "step": 582760
    },
    {
      "epoch": 0.9537322519196402,
      "grad_norm": 1.0948293209075928,
      "learning_rate": 8.145859003841517e-06,
      "loss": 0.027,
      "step": 582780
    },
    {
      "epoch": 0.9537649823582935,
      "grad_norm": 0.44578659534454346,
      "learning_rate": 8.145793111628e-06,
      "loss": 0.0327,
      "step": 582800
    },
    {
      "epoch": 0.9537977127969469,
      "grad_norm": 1.3571909666061401,
      "learning_rate": 8.145727219414482e-06,
      "loss": 0.0244,
      "step": 582820
    },
    {
      "epoch": 0.9538304432356003,
      "grad_norm": 1.4029735326766968,
      "learning_rate": 8.145661327200966e-06,
      "loss": 0.0343,
      "step": 582840
    },
    {
      "epoch": 0.9538631736742536,
      "grad_norm": 0.5218768119812012,
      "learning_rate": 8.145595434987448e-06,
      "loss": 0.0274,
      "step": 582860
    },
    {
      "epoch": 0.9538959041129069,
      "grad_norm": 0.4040225148200989,
      "learning_rate": 8.145529542773931e-06,
      "loss": 0.0212,
      "step": 582880
    },
    {
      "epoch": 0.9539286345515603,
      "grad_norm": 0.6979998350143433,
      "learning_rate": 8.145463650560413e-06,
      "loss": 0.0305,
      "step": 582900
    },
    {
      "epoch": 0.9539613649902136,
      "grad_norm": 0.6462616324424744,
      "learning_rate": 8.145397758346897e-06,
      "loss": 0.0192,
      "step": 582920
    },
    {
      "epoch": 0.9539940954288669,
      "grad_norm": 0.7317920327186584,
      "learning_rate": 8.14533186613338e-06,
      "loss": 0.0288,
      "step": 582940
    },
    {
      "epoch": 0.9540268258675203,
      "grad_norm": 0.6958191394805908,
      "learning_rate": 8.145265973919862e-06,
      "loss": 0.0304,
      "step": 582960
    },
    {
      "epoch": 0.9540595563061736,
      "grad_norm": 1.8788007497787476,
      "learning_rate": 8.145200081706346e-06,
      "loss": 0.0343,
      "step": 582980
    },
    {
      "epoch": 0.9540922867448269,
      "grad_norm": 0.8439168334007263,
      "learning_rate": 8.145134189492828e-06,
      "loss": 0.0193,
      "step": 583000
    },
    {
      "epoch": 0.9541250171834803,
      "grad_norm": 3.070139169692993,
      "learning_rate": 8.145068297279311e-06,
      "loss": 0.0346,
      "step": 583020
    },
    {
      "epoch": 0.9541577476221337,
      "grad_norm": 1.7281410694122314,
      "learning_rate": 8.145002405065793e-06,
      "loss": 0.033,
      "step": 583040
    },
    {
      "epoch": 0.9541904780607869,
      "grad_norm": 0.9912238717079163,
      "learning_rate": 8.144936512852277e-06,
      "loss": 0.027,
      "step": 583060
    },
    {
      "epoch": 0.9542232084994403,
      "grad_norm": 0.9596736431121826,
      "learning_rate": 8.144870620638759e-06,
      "loss": 0.0228,
      "step": 583080
    },
    {
      "epoch": 0.9542559389380937,
      "grad_norm": 0.253118634223938,
      "learning_rate": 8.144804728425242e-06,
      "loss": 0.0246,
      "step": 583100
    },
    {
      "epoch": 0.954288669376747,
      "grad_norm": 0.6445939540863037,
      "learning_rate": 8.144738836211726e-06,
      "loss": 0.0237,
      "step": 583120
    },
    {
      "epoch": 0.9543213998154003,
      "grad_norm": 0.9662173390388489,
      "learning_rate": 8.144672943998208e-06,
      "loss": 0.0381,
      "step": 583140
    },
    {
      "epoch": 0.9543541302540537,
      "grad_norm": 0.7211106419563293,
      "learning_rate": 8.144607051784692e-06,
      "loss": 0.0271,
      "step": 583160
    },
    {
      "epoch": 0.954386860692707,
      "grad_norm": 1.4663350582122803,
      "learning_rate": 8.144541159571175e-06,
      "loss": 0.0245,
      "step": 583180
    },
    {
      "epoch": 0.9544195911313603,
      "grad_norm": 1.2264357805252075,
      "learning_rate": 8.144475267357657e-06,
      "loss": 0.0271,
      "step": 583200
    },
    {
      "epoch": 0.9544523215700137,
      "grad_norm": 0.6524218916893005,
      "learning_rate": 8.14440937514414e-06,
      "loss": 0.036,
      "step": 583220
    },
    {
      "epoch": 0.9544850520086671,
      "grad_norm": 1.1603903770446777,
      "learning_rate": 8.144343482930622e-06,
      "loss": 0.0378,
      "step": 583240
    },
    {
      "epoch": 0.9545177824473203,
      "grad_norm": 1.606754183769226,
      "learning_rate": 8.144277590717106e-06,
      "loss": 0.0289,
      "step": 583260
    },
    {
      "epoch": 0.9545505128859737,
      "grad_norm": 0.6851134896278381,
      "learning_rate": 8.144211698503588e-06,
      "loss": 0.0245,
      "step": 583280
    },
    {
      "epoch": 0.9545832433246271,
      "grad_norm": 2.236797332763672,
      "learning_rate": 8.144145806290072e-06,
      "loss": 0.0339,
      "step": 583300
    },
    {
      "epoch": 0.9546159737632803,
      "grad_norm": 0.27794018387794495,
      "learning_rate": 8.144079914076555e-06,
      "loss": 0.0257,
      "step": 583320
    },
    {
      "epoch": 0.9546487042019337,
      "grad_norm": 0.6774106621742249,
      "learning_rate": 8.144014021863037e-06,
      "loss": 0.0302,
      "step": 583340
    },
    {
      "epoch": 0.9546814346405871,
      "grad_norm": 1.0560448169708252,
      "learning_rate": 8.14394812964952e-06,
      "loss": 0.0234,
      "step": 583360
    },
    {
      "epoch": 0.9547141650792403,
      "grad_norm": 0.7844693064689636,
      "learning_rate": 8.143882237436002e-06,
      "loss": 0.0239,
      "step": 583380
    },
    {
      "epoch": 0.9547468955178937,
      "grad_norm": 0.38634026050567627,
      "learning_rate": 8.143816345222486e-06,
      "loss": 0.0271,
      "step": 583400
    },
    {
      "epoch": 0.9547796259565471,
      "grad_norm": 1.2302030324935913,
      "learning_rate": 8.143750453008968e-06,
      "loss": 0.0372,
      "step": 583420
    },
    {
      "epoch": 0.9548123563952005,
      "grad_norm": 6.183662414550781,
      "learning_rate": 8.143684560795452e-06,
      "loss": 0.0217,
      "step": 583440
    },
    {
      "epoch": 0.9548450868338537,
      "grad_norm": 3.4650368690490723,
      "learning_rate": 8.143618668581933e-06,
      "loss": 0.0257,
      "step": 583460
    },
    {
      "epoch": 0.9548778172725071,
      "grad_norm": 0.5662123560905457,
      "learning_rate": 8.143552776368417e-06,
      "loss": 0.0353,
      "step": 583480
    },
    {
      "epoch": 0.9549105477111605,
      "grad_norm": 0.3207554221153259,
      "learning_rate": 8.143486884154899e-06,
      "loss": 0.0205,
      "step": 583500
    },
    {
      "epoch": 0.9549432781498137,
      "grad_norm": 0.2675761878490448,
      "learning_rate": 8.143420991941383e-06,
      "loss": 0.0319,
      "step": 583520
    },
    {
      "epoch": 0.9549760085884671,
      "grad_norm": 0.8379805684089661,
      "learning_rate": 8.143355099727866e-06,
      "loss": 0.0231,
      "step": 583540
    },
    {
      "epoch": 0.9550087390271205,
      "grad_norm": 0.4883914589881897,
      "learning_rate": 8.143289207514348e-06,
      "loss": 0.0246,
      "step": 583560
    },
    {
      "epoch": 0.9550414694657737,
      "grad_norm": 1.0837424993515015,
      "learning_rate": 8.143223315300832e-06,
      "loss": 0.026,
      "step": 583580
    },
    {
      "epoch": 0.9550741999044271,
      "grad_norm": 0.5519116520881653,
      "learning_rate": 8.143157423087315e-06,
      "loss": 0.0233,
      "step": 583600
    },
    {
      "epoch": 0.9551069303430805,
      "grad_norm": 0.30225691199302673,
      "learning_rate": 8.143091530873797e-06,
      "loss": 0.0272,
      "step": 583620
    },
    {
      "epoch": 0.9551396607817338,
      "grad_norm": 0.7559595704078674,
      "learning_rate": 8.14302563866028e-06,
      "loss": 0.0281,
      "step": 583640
    },
    {
      "epoch": 0.9551723912203871,
      "grad_norm": 0.21725435554981232,
      "learning_rate": 8.142959746446764e-06,
      "loss": 0.0236,
      "step": 583660
    },
    {
      "epoch": 0.9552051216590405,
      "grad_norm": 0.5045779347419739,
      "learning_rate": 8.142893854233246e-06,
      "loss": 0.0248,
      "step": 583680
    },
    {
      "epoch": 0.9552378520976939,
      "grad_norm": 1.2564541101455688,
      "learning_rate": 8.14282796201973e-06,
      "loss": 0.0319,
      "step": 583700
    },
    {
      "epoch": 0.9552705825363471,
      "grad_norm": 0.344882607460022,
      "learning_rate": 8.142762069806212e-06,
      "loss": 0.0362,
      "step": 583720
    },
    {
      "epoch": 0.9553033129750005,
      "grad_norm": 1.1397385597229004,
      "learning_rate": 8.142696177592695e-06,
      "loss": 0.0229,
      "step": 583740
    },
    {
      "epoch": 0.9553360434136539,
      "grad_norm": 1.3213847875595093,
      "learning_rate": 8.142630285379177e-06,
      "loss": 0.0271,
      "step": 583760
    },
    {
      "epoch": 0.9553687738523071,
      "grad_norm": 0.8312130570411682,
      "learning_rate": 8.14256439316566e-06,
      "loss": 0.0381,
      "step": 583780
    },
    {
      "epoch": 0.9554015042909605,
      "grad_norm": 0.07623475044965744,
      "learning_rate": 8.142498500952143e-06,
      "loss": 0.0249,
      "step": 583800
    },
    {
      "epoch": 0.9554342347296139,
      "grad_norm": 0.5025660991668701,
      "learning_rate": 8.142432608738626e-06,
      "loss": 0.0246,
      "step": 583820
    },
    {
      "epoch": 0.9554669651682672,
      "grad_norm": 2.819934129714966,
      "learning_rate": 8.142366716525108e-06,
      "loss": 0.0281,
      "step": 583840
    },
    {
      "epoch": 0.9554996956069205,
      "grad_norm": 0.973739743232727,
      "learning_rate": 8.142300824311592e-06,
      "loss": 0.0306,
      "step": 583860
    },
    {
      "epoch": 0.9555324260455739,
      "grad_norm": 0.7956907153129578,
      "learning_rate": 8.142234932098074e-06,
      "loss": 0.0208,
      "step": 583880
    },
    {
      "epoch": 0.9555651564842272,
      "grad_norm": 0.5182942748069763,
      "learning_rate": 8.142169039884557e-06,
      "loss": 0.0331,
      "step": 583900
    },
    {
      "epoch": 0.9555978869228805,
      "grad_norm": 0.43596383929252625,
      "learning_rate": 8.14210314767104e-06,
      "loss": 0.0217,
      "step": 583920
    },
    {
      "epoch": 0.9556306173615339,
      "grad_norm": 0.37063631415367126,
      "learning_rate": 8.142037255457523e-06,
      "loss": 0.0298,
      "step": 583940
    },
    {
      "epoch": 0.9556633478001872,
      "grad_norm": 0.7788253426551819,
      "learning_rate": 8.141971363244006e-06,
      "loss": 0.0321,
      "step": 583960
    },
    {
      "epoch": 0.9556960782388405,
      "grad_norm": 0.9833594560623169,
      "learning_rate": 8.14190547103049e-06,
      "loss": 0.0387,
      "step": 583980
    },
    {
      "epoch": 0.9557288086774939,
      "grad_norm": 0.9509099125862122,
      "learning_rate": 8.141839578816972e-06,
      "loss": 0.0236,
      "step": 584000
    },
    {
      "epoch": 0.9557615391161473,
      "grad_norm": 0.4551282823085785,
      "learning_rate": 8.141773686603455e-06,
      "loss": 0.0241,
      "step": 584020
    },
    {
      "epoch": 0.9557942695548006,
      "grad_norm": 0.3516935110092163,
      "learning_rate": 8.141707794389939e-06,
      "loss": 0.025,
      "step": 584040
    },
    {
      "epoch": 0.9558269999934539,
      "grad_norm": 1.0952435731887817,
      "learning_rate": 8.14164190217642e-06,
      "loss": 0.0163,
      "step": 584060
    },
    {
      "epoch": 0.9558597304321073,
      "grad_norm": 1.383307933807373,
      "learning_rate": 8.141576009962904e-06,
      "loss": 0.0268,
      "step": 584080
    },
    {
      "epoch": 0.9558924608707606,
      "grad_norm": 0.6534109711647034,
      "learning_rate": 8.141510117749386e-06,
      "loss": 0.0236,
      "step": 584100
    },
    {
      "epoch": 0.9559251913094139,
      "grad_norm": 0.5947386026382446,
      "learning_rate": 8.14144422553587e-06,
      "loss": 0.0268,
      "step": 584120
    },
    {
      "epoch": 0.9559579217480673,
      "grad_norm": 1.0874972343444824,
      "learning_rate": 8.141378333322352e-06,
      "loss": 0.0265,
      "step": 584140
    },
    {
      "epoch": 0.9559906521867206,
      "grad_norm": 0.34210070967674255,
      "learning_rate": 8.141312441108835e-06,
      "loss": 0.0187,
      "step": 584160
    },
    {
      "epoch": 0.9560233826253739,
      "grad_norm": 1.6212927103042603,
      "learning_rate": 8.141246548895317e-06,
      "loss": 0.0274,
      "step": 584180
    },
    {
      "epoch": 0.9560561130640273,
      "grad_norm": 1.4189600944519043,
      "learning_rate": 8.1411806566818e-06,
      "loss": 0.0189,
      "step": 584200
    },
    {
      "epoch": 0.9560888435026806,
      "grad_norm": 0.33857810497283936,
      "learning_rate": 8.141114764468283e-06,
      "loss": 0.0289,
      "step": 584220
    },
    {
      "epoch": 0.956121573941334,
      "grad_norm": 0.7196222543716431,
      "learning_rate": 8.141048872254766e-06,
      "loss": 0.0218,
      "step": 584240
    },
    {
      "epoch": 0.9561543043799873,
      "grad_norm": 0.7396405339241028,
      "learning_rate": 8.140982980041248e-06,
      "loss": 0.0282,
      "step": 584260
    },
    {
      "epoch": 0.9561870348186406,
      "grad_norm": 1.181164026260376,
      "learning_rate": 8.140917087827732e-06,
      "loss": 0.0325,
      "step": 584280
    },
    {
      "epoch": 0.956219765257294,
      "grad_norm": 0.815510630607605,
      "learning_rate": 8.140851195614214e-06,
      "loss": 0.0272,
      "step": 584300
    },
    {
      "epoch": 0.9562524956959473,
      "grad_norm": 0.28283607959747314,
      "learning_rate": 8.140785303400697e-06,
      "loss": 0.0267,
      "step": 584320
    },
    {
      "epoch": 0.9562852261346007,
      "grad_norm": 0.26698610186576843,
      "learning_rate": 8.14071941118718e-06,
      "loss": 0.0322,
      "step": 584340
    },
    {
      "epoch": 0.956317956573254,
      "grad_norm": 1.1830672025680542,
      "learning_rate": 8.140653518973663e-06,
      "loss": 0.0365,
      "step": 584360
    },
    {
      "epoch": 0.9563506870119073,
      "grad_norm": 0.33402982354164124,
      "learning_rate": 8.140587626760146e-06,
      "loss": 0.0302,
      "step": 584380
    },
    {
      "epoch": 0.9563834174505607,
      "grad_norm": 1.2923177480697632,
      "learning_rate": 8.14052173454663e-06,
      "loss": 0.0275,
      "step": 584400
    },
    {
      "epoch": 0.956416147889214,
      "grad_norm": 0.5119714140892029,
      "learning_rate": 8.140455842333113e-06,
      "loss": 0.0361,
      "step": 584420
    },
    {
      "epoch": 0.9564488783278674,
      "grad_norm": 3.552997589111328,
      "learning_rate": 8.140389950119595e-06,
      "loss": 0.0346,
      "step": 584440
    },
    {
      "epoch": 0.9564816087665207,
      "grad_norm": 0.6607533693313599,
      "learning_rate": 8.140324057906079e-06,
      "loss": 0.0319,
      "step": 584460
    },
    {
      "epoch": 0.956514339205174,
      "grad_norm": 0.17075608670711517,
      "learning_rate": 8.14025816569256e-06,
      "loss": 0.0299,
      "step": 584480
    },
    {
      "epoch": 0.9565470696438274,
      "grad_norm": 0.6642263531684875,
      "learning_rate": 8.140192273479044e-06,
      "loss": 0.0325,
      "step": 584500
    },
    {
      "epoch": 0.9565798000824807,
      "grad_norm": 0.38493505120277405,
      "learning_rate": 8.140126381265526e-06,
      "loss": 0.0363,
      "step": 584520
    },
    {
      "epoch": 0.956612530521134,
      "grad_norm": 1.3195606470108032,
      "learning_rate": 8.14006048905201e-06,
      "loss": 0.0257,
      "step": 584540
    },
    {
      "epoch": 0.9566452609597874,
      "grad_norm": 0.9527838826179504,
      "learning_rate": 8.139994596838492e-06,
      "loss": 0.0303,
      "step": 584560
    },
    {
      "epoch": 0.9566779913984407,
      "grad_norm": 1.4767853021621704,
      "learning_rate": 8.139928704624975e-06,
      "loss": 0.0257,
      "step": 584580
    },
    {
      "epoch": 0.956710721837094,
      "grad_norm": 0.15531378984451294,
      "learning_rate": 8.139862812411457e-06,
      "loss": 0.0266,
      "step": 584600
    },
    {
      "epoch": 0.9567434522757474,
      "grad_norm": 2.295686721801758,
      "learning_rate": 8.13979692019794e-06,
      "loss": 0.0371,
      "step": 584620
    },
    {
      "epoch": 0.9567761827144008,
      "grad_norm": 0.5089336037635803,
      "learning_rate": 8.139731027984423e-06,
      "loss": 0.0176,
      "step": 584640
    },
    {
      "epoch": 0.956808913153054,
      "grad_norm": 0.6736505031585693,
      "learning_rate": 8.139665135770906e-06,
      "loss": 0.0312,
      "step": 584660
    },
    {
      "epoch": 0.9568416435917074,
      "grad_norm": 2.0008440017700195,
      "learning_rate": 8.139599243557388e-06,
      "loss": 0.0246,
      "step": 584680
    },
    {
      "epoch": 0.9568743740303608,
      "grad_norm": 0.16552917659282684,
      "learning_rate": 8.139533351343872e-06,
      "loss": 0.0296,
      "step": 584700
    },
    {
      "epoch": 0.9569071044690141,
      "grad_norm": 4.279906272888184,
      "learning_rate": 8.139467459130355e-06,
      "loss": 0.0319,
      "step": 584720
    },
    {
      "epoch": 0.9569398349076674,
      "grad_norm": 0.7031219601631165,
      "learning_rate": 8.139401566916837e-06,
      "loss": 0.0294,
      "step": 584740
    },
    {
      "epoch": 0.9569725653463208,
      "grad_norm": 0.3894021213054657,
      "learning_rate": 8.139335674703321e-06,
      "loss": 0.0273,
      "step": 584760
    },
    {
      "epoch": 0.9570052957849741,
      "grad_norm": 0.7098264694213867,
      "learning_rate": 8.139269782489804e-06,
      "loss": 0.0376,
      "step": 584780
    },
    {
      "epoch": 0.9570380262236274,
      "grad_norm": 1.5656359195709229,
      "learning_rate": 8.139203890276286e-06,
      "loss": 0.023,
      "step": 584800
    },
    {
      "epoch": 0.9570707566622808,
      "grad_norm": 0.5064818263053894,
      "learning_rate": 8.13913799806277e-06,
      "loss": 0.0278,
      "step": 584820
    },
    {
      "epoch": 0.9571034871009342,
      "grad_norm": 0.2324463427066803,
      "learning_rate": 8.139072105849254e-06,
      "loss": 0.0223,
      "step": 584840
    },
    {
      "epoch": 0.9571362175395874,
      "grad_norm": 0.33814582228660583,
      "learning_rate": 8.139006213635735e-06,
      "loss": 0.0222,
      "step": 584860
    },
    {
      "epoch": 0.9571689479782408,
      "grad_norm": 1.400822639465332,
      "learning_rate": 8.138940321422219e-06,
      "loss": 0.0341,
      "step": 584880
    },
    {
      "epoch": 0.9572016784168942,
      "grad_norm": 0.7130916118621826,
      "learning_rate": 8.138874429208701e-06,
      "loss": 0.0266,
      "step": 584900
    },
    {
      "epoch": 0.9572344088555474,
      "grad_norm": 0.645945131778717,
      "learning_rate": 8.138808536995184e-06,
      "loss": 0.0151,
      "step": 584920
    },
    {
      "epoch": 0.9572671392942008,
      "grad_norm": 0.7771583795547485,
      "learning_rate": 8.138742644781666e-06,
      "loss": 0.0264,
      "step": 584940
    },
    {
      "epoch": 0.9572998697328542,
      "grad_norm": 0.2898525893688202,
      "learning_rate": 8.13867675256815e-06,
      "loss": 0.0293,
      "step": 584960
    },
    {
      "epoch": 0.9573326001715075,
      "grad_norm": 0.9125385880470276,
      "learning_rate": 8.138610860354632e-06,
      "loss": 0.0306,
      "step": 584980
    },
    {
      "epoch": 0.9573653306101608,
      "grad_norm": 0.46778780221939087,
      "learning_rate": 8.138544968141115e-06,
      "loss": 0.0444,
      "step": 585000
    },
    {
      "epoch": 0.9573980610488142,
      "grad_norm": 1.522627353668213,
      "learning_rate": 8.138479075927597e-06,
      "loss": 0.0381,
      "step": 585020
    },
    {
      "epoch": 0.9574307914874676,
      "grad_norm": 1.3295021057128906,
      "learning_rate": 8.138413183714081e-06,
      "loss": 0.0391,
      "step": 585040
    },
    {
      "epoch": 0.9574635219261208,
      "grad_norm": 1.3366385698318481,
      "learning_rate": 8.138347291500564e-06,
      "loss": 0.0252,
      "step": 585060
    },
    {
      "epoch": 0.9574962523647742,
      "grad_norm": 1.4577113389968872,
      "learning_rate": 8.138281399287046e-06,
      "loss": 0.0274,
      "step": 585080
    },
    {
      "epoch": 0.9575289828034276,
      "grad_norm": 0.9047006964683533,
      "learning_rate": 8.13821550707353e-06,
      "loss": 0.0239,
      "step": 585100
    },
    {
      "epoch": 0.9575617132420808,
      "grad_norm": 0.6691707372665405,
      "learning_rate": 8.138149614860012e-06,
      "loss": 0.0227,
      "step": 585120
    },
    {
      "epoch": 0.9575944436807342,
      "grad_norm": 0.5672834515571594,
      "learning_rate": 8.138083722646495e-06,
      "loss": 0.0194,
      "step": 585140
    },
    {
      "epoch": 0.9576271741193876,
      "grad_norm": 0.5120121240615845,
      "learning_rate": 8.138017830432979e-06,
      "loss": 0.0366,
      "step": 585160
    },
    {
      "epoch": 0.9576599045580408,
      "grad_norm": 1.3480602502822876,
      "learning_rate": 8.137951938219461e-06,
      "loss": 0.0251,
      "step": 585180
    },
    {
      "epoch": 0.9576926349966942,
      "grad_norm": 2.0321309566497803,
      "learning_rate": 8.137886046005945e-06,
      "loss": 0.0274,
      "step": 585200
    },
    {
      "epoch": 0.9577253654353476,
      "grad_norm": 1.0391254425048828,
      "learning_rate": 8.137820153792428e-06,
      "loss": 0.0251,
      "step": 585220
    },
    {
      "epoch": 0.957758095874001,
      "grad_norm": 0.6997674107551575,
      "learning_rate": 8.13775426157891e-06,
      "loss": 0.0321,
      "step": 585240
    },
    {
      "epoch": 0.9577908263126542,
      "grad_norm": 0.9498279690742493,
      "learning_rate": 8.137688369365394e-06,
      "loss": 0.0181,
      "step": 585260
    },
    {
      "epoch": 0.9578235567513076,
      "grad_norm": 0.4891347289085388,
      "learning_rate": 8.137622477151875e-06,
      "loss": 0.0328,
      "step": 585280
    },
    {
      "epoch": 0.957856287189961,
      "grad_norm": 0.541074275970459,
      "learning_rate": 8.137556584938359e-06,
      "loss": 0.023,
      "step": 585300
    },
    {
      "epoch": 0.9578890176286142,
      "grad_norm": 0.6886613368988037,
      "learning_rate": 8.137490692724841e-06,
      "loss": 0.0319,
      "step": 585320
    },
    {
      "epoch": 0.9579217480672676,
      "grad_norm": 1.7547786235809326,
      "learning_rate": 8.137424800511325e-06,
      "loss": 0.025,
      "step": 585340
    },
    {
      "epoch": 0.957954478505921,
      "grad_norm": 0.2719634771347046,
      "learning_rate": 8.137358908297806e-06,
      "loss": 0.0207,
      "step": 585360
    },
    {
      "epoch": 0.9579872089445742,
      "grad_norm": 0.633111834526062,
      "learning_rate": 8.13729301608429e-06,
      "loss": 0.0273,
      "step": 585380
    },
    {
      "epoch": 0.9580199393832276,
      "grad_norm": 7.348052024841309,
      "learning_rate": 8.137227123870774e-06,
      "loss": 0.0297,
      "step": 585400
    },
    {
      "epoch": 0.958052669821881,
      "grad_norm": 1.0158281326293945,
      "learning_rate": 8.137161231657255e-06,
      "loss": 0.0258,
      "step": 585420
    },
    {
      "epoch": 0.9580854002605342,
      "grad_norm": 0.32343629002571106,
      "learning_rate": 8.137095339443739e-06,
      "loss": 0.0276,
      "step": 585440
    },
    {
      "epoch": 0.9581181306991876,
      "grad_norm": 1.8614253997802734,
      "learning_rate": 8.137029447230221e-06,
      "loss": 0.0203,
      "step": 585460
    },
    {
      "epoch": 0.958150861137841,
      "grad_norm": 0.6993054747581482,
      "learning_rate": 8.136963555016705e-06,
      "loss": 0.0276,
      "step": 585480
    },
    {
      "epoch": 0.9581835915764944,
      "grad_norm": 0.5282862782478333,
      "learning_rate": 8.136897662803186e-06,
      "loss": 0.0242,
      "step": 585500
    },
    {
      "epoch": 0.9582163220151476,
      "grad_norm": 0.9102561473846436,
      "learning_rate": 8.13683177058967e-06,
      "loss": 0.0382,
      "step": 585520
    },
    {
      "epoch": 0.958249052453801,
      "grad_norm": 2.743732213973999,
      "learning_rate": 8.136765878376152e-06,
      "loss": 0.0296,
      "step": 585540
    },
    {
      "epoch": 0.9582817828924544,
      "grad_norm": 0.4755643904209137,
      "learning_rate": 8.136699986162636e-06,
      "loss": 0.0211,
      "step": 585560
    },
    {
      "epoch": 0.9583145133311076,
      "grad_norm": 0.4039231836795807,
      "learning_rate": 8.136634093949119e-06,
      "loss": 0.0281,
      "step": 585580
    },
    {
      "epoch": 0.958347243769761,
      "grad_norm": 0.5065407752990723,
      "learning_rate": 8.136568201735601e-06,
      "loss": 0.0257,
      "step": 585600
    },
    {
      "epoch": 0.9583799742084144,
      "grad_norm": 3.667628765106201,
      "learning_rate": 8.136502309522085e-06,
      "loss": 0.0319,
      "step": 585620
    },
    {
      "epoch": 0.9584127046470676,
      "grad_norm": 1.3088929653167725,
      "learning_rate": 8.136436417308568e-06,
      "loss": 0.0281,
      "step": 585640
    },
    {
      "epoch": 0.958445435085721,
      "grad_norm": 1.3537561893463135,
      "learning_rate": 8.13637052509505e-06,
      "loss": 0.0264,
      "step": 585660
    },
    {
      "epoch": 0.9584781655243744,
      "grad_norm": 1.178423523902893,
      "learning_rate": 8.136304632881534e-06,
      "loss": 0.0323,
      "step": 585680
    },
    {
      "epoch": 0.9585108959630277,
      "grad_norm": 1.430471420288086,
      "learning_rate": 8.136238740668016e-06,
      "loss": 0.0305,
      "step": 585700
    },
    {
      "epoch": 0.958543626401681,
      "grad_norm": 0.5230157375335693,
      "learning_rate": 8.136172848454499e-06,
      "loss": 0.0339,
      "step": 585720
    },
    {
      "epoch": 0.9585763568403344,
      "grad_norm": 1.0001184940338135,
      "learning_rate": 8.136106956240981e-06,
      "loss": 0.0234,
      "step": 585740
    },
    {
      "epoch": 0.9586090872789877,
      "grad_norm": 0.3846847414970398,
      "learning_rate": 8.136041064027465e-06,
      "loss": 0.0355,
      "step": 585760
    },
    {
      "epoch": 0.958641817717641,
      "grad_norm": 0.8860448598861694,
      "learning_rate": 8.135975171813948e-06,
      "loss": 0.0288,
      "step": 585780
    },
    {
      "epoch": 0.9586745481562944,
      "grad_norm": 0.5683642625808716,
      "learning_rate": 8.13590927960043e-06,
      "loss": 0.0268,
      "step": 585800
    },
    {
      "epoch": 0.9587072785949478,
      "grad_norm": 4.603201866149902,
      "learning_rate": 8.135843387386914e-06,
      "loss": 0.0345,
      "step": 585820
    },
    {
      "epoch": 0.958740009033601,
      "grad_norm": 0.5292919278144836,
      "learning_rate": 8.135777495173396e-06,
      "loss": 0.0332,
      "step": 585840
    },
    {
      "epoch": 0.9587727394722544,
      "grad_norm": 0.5582185387611389,
      "learning_rate": 8.13571160295988e-06,
      "loss": 0.0321,
      "step": 585860
    },
    {
      "epoch": 0.9588054699109078,
      "grad_norm": 0.681499719619751,
      "learning_rate": 8.135645710746361e-06,
      "loss": 0.0192,
      "step": 585880
    },
    {
      "epoch": 0.9588382003495611,
      "grad_norm": 0.44777578115463257,
      "learning_rate": 8.135579818532845e-06,
      "loss": 0.0206,
      "step": 585900
    },
    {
      "epoch": 0.9588709307882144,
      "grad_norm": 0.7019476890563965,
      "learning_rate": 8.135513926319327e-06,
      "loss": 0.0364,
      "step": 585920
    },
    {
      "epoch": 0.9589036612268678,
      "grad_norm": 0.4505389630794525,
      "learning_rate": 8.13544803410581e-06,
      "loss": 0.0237,
      "step": 585940
    },
    {
      "epoch": 0.9589363916655211,
      "grad_norm": 2.423799514770508,
      "learning_rate": 8.135382141892294e-06,
      "loss": 0.0332,
      "step": 585960
    },
    {
      "epoch": 0.9589691221041744,
      "grad_norm": 0.6486983299255371,
      "learning_rate": 8.135316249678776e-06,
      "loss": 0.0212,
      "step": 585980
    },
    {
      "epoch": 0.9590018525428278,
      "grad_norm": 0.38276252150535583,
      "learning_rate": 8.13525035746526e-06,
      "loss": 0.0278,
      "step": 586000
    },
    {
      "epoch": 0.9590345829814811,
      "grad_norm": 0.9872370958328247,
      "learning_rate": 8.135184465251743e-06,
      "loss": 0.0298,
      "step": 586020
    },
    {
      "epoch": 0.9590673134201344,
      "grad_norm": 0.9567499756813049,
      "learning_rate": 8.135118573038225e-06,
      "loss": 0.0373,
      "step": 586040
    },
    {
      "epoch": 0.9591000438587878,
      "grad_norm": 0.31928977370262146,
      "learning_rate": 8.135052680824708e-06,
      "loss": 0.0406,
      "step": 586060
    },
    {
      "epoch": 0.9591327742974411,
      "grad_norm": 0.2147705852985382,
      "learning_rate": 8.13498678861119e-06,
      "loss": 0.0276,
      "step": 586080
    },
    {
      "epoch": 0.9591655047360945,
      "grad_norm": 0.8174648284912109,
      "learning_rate": 8.134920896397674e-06,
      "loss": 0.0295,
      "step": 586100
    },
    {
      "epoch": 0.9591982351747478,
      "grad_norm": 0.47204214334487915,
      "learning_rate": 8.134855004184157e-06,
      "loss": 0.0278,
      "step": 586120
    },
    {
      "epoch": 0.9592309656134012,
      "grad_norm": 0.18412679433822632,
      "learning_rate": 8.13478911197064e-06,
      "loss": 0.0253,
      "step": 586140
    },
    {
      "epoch": 0.9592636960520545,
      "grad_norm": 0.6267938017845154,
      "learning_rate": 8.134723219757123e-06,
      "loss": 0.0288,
      "step": 586160
    },
    {
      "epoch": 0.9592964264907078,
      "grad_norm": 0.9305625557899475,
      "learning_rate": 8.134657327543605e-06,
      "loss": 0.0276,
      "step": 586180
    },
    {
      "epoch": 0.9593291569293612,
      "grad_norm": 1.374882459640503,
      "learning_rate": 8.134591435330088e-06,
      "loss": 0.0255,
      "step": 586200
    },
    {
      "epoch": 0.9593618873680145,
      "grad_norm": 0.313266783952713,
      "learning_rate": 8.13452554311657e-06,
      "loss": 0.0222,
      "step": 586220
    },
    {
      "epoch": 0.9593946178066678,
      "grad_norm": 0.6748858094215393,
      "learning_rate": 8.134459650903054e-06,
      "loss": 0.0342,
      "step": 586240
    },
    {
      "epoch": 0.9594273482453212,
      "grad_norm": 3.042907238006592,
      "learning_rate": 8.134393758689536e-06,
      "loss": 0.0362,
      "step": 586260
    },
    {
      "epoch": 0.9594600786839745,
      "grad_norm": 0.9771036505699158,
      "learning_rate": 8.13432786647602e-06,
      "loss": 0.015,
      "step": 586280
    },
    {
      "epoch": 0.9594928091226279,
      "grad_norm": 1.0858893394470215,
      "learning_rate": 8.134261974262501e-06,
      "loss": 0.0271,
      "step": 586300
    },
    {
      "epoch": 0.9595255395612812,
      "grad_norm": 2.372077226638794,
      "learning_rate": 8.134196082048985e-06,
      "loss": 0.0305,
      "step": 586320
    },
    {
      "epoch": 0.9595582699999345,
      "grad_norm": 3.782621383666992,
      "learning_rate": 8.134130189835467e-06,
      "loss": 0.0245,
      "step": 586340
    },
    {
      "epoch": 0.9595910004385879,
      "grad_norm": 0.1744956225156784,
      "learning_rate": 8.13406429762195e-06,
      "loss": 0.0302,
      "step": 586360
    },
    {
      "epoch": 0.9596237308772412,
      "grad_norm": 0.24723945558071136,
      "learning_rate": 8.133998405408434e-06,
      "loss": 0.0191,
      "step": 586380
    },
    {
      "epoch": 0.9596564613158945,
      "grad_norm": 0.781593382358551,
      "learning_rate": 8.133932513194916e-06,
      "loss": 0.0254,
      "step": 586400
    },
    {
      "epoch": 0.9596891917545479,
      "grad_norm": 1.5088422298431396,
      "learning_rate": 8.1338666209814e-06,
      "loss": 0.0282,
      "step": 586420
    },
    {
      "epoch": 0.9597219221932012,
      "grad_norm": 0.4104056656360626,
      "learning_rate": 8.133800728767883e-06,
      "loss": 0.027,
      "step": 586440
    },
    {
      "epoch": 0.9597546526318546,
      "grad_norm": 1.3862653970718384,
      "learning_rate": 8.133734836554365e-06,
      "loss": 0.0368,
      "step": 586460
    },
    {
      "epoch": 0.9597873830705079,
      "grad_norm": 1.362921118736267,
      "learning_rate": 8.133668944340848e-06,
      "loss": 0.025,
      "step": 586480
    },
    {
      "epoch": 0.9598201135091613,
      "grad_norm": 1.4499449729919434,
      "learning_rate": 8.133603052127332e-06,
      "loss": 0.025,
      "step": 586500
    },
    {
      "epoch": 0.9598528439478146,
      "grad_norm": 0.9217769503593445,
      "learning_rate": 8.133537159913814e-06,
      "loss": 0.0364,
      "step": 586520
    },
    {
      "epoch": 0.9598855743864679,
      "grad_norm": 0.9754868149757385,
      "learning_rate": 8.133471267700297e-06,
      "loss": 0.0194,
      "step": 586540
    },
    {
      "epoch": 0.9599183048251213,
      "grad_norm": 0.5312763452529907,
      "learning_rate": 8.13340537548678e-06,
      "loss": 0.0227,
      "step": 586560
    },
    {
      "epoch": 0.9599510352637746,
      "grad_norm": 0.7370017766952515,
      "learning_rate": 8.133339483273263e-06,
      "loss": 0.0189,
      "step": 586580
    },
    {
      "epoch": 0.9599837657024279,
      "grad_norm": 0.7278198003768921,
      "learning_rate": 8.133273591059745e-06,
      "loss": 0.0267,
      "step": 586600
    },
    {
      "epoch": 0.9600164961410813,
      "grad_norm": 1.20412015914917,
      "learning_rate": 8.133207698846228e-06,
      "loss": 0.025,
      "step": 586620
    },
    {
      "epoch": 0.9600492265797346,
      "grad_norm": 1.6129648685455322,
      "learning_rate": 8.13314180663271e-06,
      "loss": 0.0281,
      "step": 586640
    },
    {
      "epoch": 0.9600819570183879,
      "grad_norm": 0.441304475069046,
      "learning_rate": 8.133075914419194e-06,
      "loss": 0.0182,
      "step": 586660
    },
    {
      "epoch": 0.9601146874570413,
      "grad_norm": 0.8255406618118286,
      "learning_rate": 8.133010022205676e-06,
      "loss": 0.0281,
      "step": 586680
    },
    {
      "epoch": 0.9601474178956947,
      "grad_norm": 0.6523085832595825,
      "learning_rate": 8.13294412999216e-06,
      "loss": 0.018,
      "step": 586700
    },
    {
      "epoch": 0.960180148334348,
      "grad_norm": 0.4541500210762024,
      "learning_rate": 8.132878237778641e-06,
      "loss": 0.0267,
      "step": 586720
    },
    {
      "epoch": 0.9602128787730013,
      "grad_norm": 0.24064163863658905,
      "learning_rate": 8.132812345565125e-06,
      "loss": 0.0322,
      "step": 586740
    },
    {
      "epoch": 0.9602456092116547,
      "grad_norm": 0.98354172706604,
      "learning_rate": 8.132746453351608e-06,
      "loss": 0.0366,
      "step": 586760
    },
    {
      "epoch": 0.960278339650308,
      "grad_norm": 1.3503025770187378,
      "learning_rate": 8.13268056113809e-06,
      "loss": 0.0284,
      "step": 586780
    },
    {
      "epoch": 0.9603110700889613,
      "grad_norm": 0.2491651326417923,
      "learning_rate": 8.132614668924574e-06,
      "loss": 0.0282,
      "step": 586800
    },
    {
      "epoch": 0.9603438005276147,
      "grad_norm": 0.5559443235397339,
      "learning_rate": 8.132548776711057e-06,
      "loss": 0.0202,
      "step": 586820
    },
    {
      "epoch": 0.960376530966268,
      "grad_norm": 1.3455561399459839,
      "learning_rate": 8.13248288449754e-06,
      "loss": 0.0251,
      "step": 586840
    },
    {
      "epoch": 0.9604092614049213,
      "grad_norm": 1.5226585865020752,
      "learning_rate": 8.132416992284023e-06,
      "loss": 0.0213,
      "step": 586860
    },
    {
      "epoch": 0.9604419918435747,
      "grad_norm": 0.7031468749046326,
      "learning_rate": 8.132351100070507e-06,
      "loss": 0.0326,
      "step": 586880
    },
    {
      "epoch": 0.9604747222822281,
      "grad_norm": 0.992672860622406,
      "learning_rate": 8.132285207856988e-06,
      "loss": 0.0238,
      "step": 586900
    },
    {
      "epoch": 0.9605074527208813,
      "grad_norm": 0.326462984085083,
      "learning_rate": 8.132219315643472e-06,
      "loss": 0.0317,
      "step": 586920
    },
    {
      "epoch": 0.9605401831595347,
      "grad_norm": 0.25689247250556946,
      "learning_rate": 8.132153423429954e-06,
      "loss": 0.0244,
      "step": 586940
    },
    {
      "epoch": 0.9605729135981881,
      "grad_norm": 1.394849181175232,
      "learning_rate": 8.132087531216437e-06,
      "loss": 0.0232,
      "step": 586960
    },
    {
      "epoch": 0.9606056440368413,
      "grad_norm": 1.89951491355896,
      "learning_rate": 8.13202163900292e-06,
      "loss": 0.0191,
      "step": 586980
    },
    {
      "epoch": 0.9606383744754947,
      "grad_norm": 0.9970517158508301,
      "learning_rate": 8.131955746789403e-06,
      "loss": 0.0149,
      "step": 587000
    },
    {
      "epoch": 0.9606711049141481,
      "grad_norm": 0.7438925504684448,
      "learning_rate": 8.131889854575885e-06,
      "loss": 0.0175,
      "step": 587020
    },
    {
      "epoch": 0.9607038353528013,
      "grad_norm": 3.522901773452759,
      "learning_rate": 8.131823962362368e-06,
      "loss": 0.0289,
      "step": 587040
    },
    {
      "epoch": 0.9607365657914547,
      "grad_norm": 0.3320866525173187,
      "learning_rate": 8.13175807014885e-06,
      "loss": 0.0294,
      "step": 587060
    },
    {
      "epoch": 0.9607692962301081,
      "grad_norm": 1.9497839212417603,
      "learning_rate": 8.131692177935334e-06,
      "loss": 0.0236,
      "step": 587080
    },
    {
      "epoch": 0.9608020266687615,
      "grad_norm": 1.4221440553665161,
      "learning_rate": 8.131626285721816e-06,
      "loss": 0.0288,
      "step": 587100
    },
    {
      "epoch": 0.9608347571074147,
      "grad_norm": 0.4085850417613983,
      "learning_rate": 8.1315603935083e-06,
      "loss": 0.0283,
      "step": 587120
    },
    {
      "epoch": 0.9608674875460681,
      "grad_norm": 1.0145777463912964,
      "learning_rate": 8.131494501294781e-06,
      "loss": 0.0321,
      "step": 587140
    },
    {
      "epoch": 0.9609002179847215,
      "grad_norm": 1.3039791584014893,
      "learning_rate": 8.131428609081265e-06,
      "loss": 0.0277,
      "step": 587160
    },
    {
      "epoch": 0.9609329484233747,
      "grad_norm": 0.6700736284255981,
      "learning_rate": 8.131362716867748e-06,
      "loss": 0.0268,
      "step": 587180
    },
    {
      "epoch": 0.9609656788620281,
      "grad_norm": 0.5931949615478516,
      "learning_rate": 8.131296824654232e-06,
      "loss": 0.0211,
      "step": 587200
    },
    {
      "epoch": 0.9609984093006815,
      "grad_norm": 0.7085243463516235,
      "learning_rate": 8.131230932440714e-06,
      "loss": 0.0327,
      "step": 587220
    },
    {
      "epoch": 0.9610311397393347,
      "grad_norm": 0.7435715198516846,
      "learning_rate": 8.131165040227198e-06,
      "loss": 0.0281,
      "step": 587240
    },
    {
      "epoch": 0.9610638701779881,
      "grad_norm": 0.8251907825469971,
      "learning_rate": 8.131099148013681e-06,
      "loss": 0.029,
      "step": 587260
    },
    {
      "epoch": 0.9610966006166415,
      "grad_norm": 1.0118274688720703,
      "learning_rate": 8.131033255800163e-06,
      "loss": 0.0252,
      "step": 587280
    },
    {
      "epoch": 0.9611293310552949,
      "grad_norm": 0.42582038044929504,
      "learning_rate": 8.130967363586647e-06,
      "loss": 0.0343,
      "step": 587300
    },
    {
      "epoch": 0.9611620614939481,
      "grad_norm": 0.22519604861736298,
      "learning_rate": 8.130901471373128e-06,
      "loss": 0.0247,
      "step": 587320
    },
    {
      "epoch": 0.9611947919326015,
      "grad_norm": 1.2131491899490356,
      "learning_rate": 8.130835579159612e-06,
      "loss": 0.0369,
      "step": 587340
    },
    {
      "epoch": 0.9612275223712549,
      "grad_norm": 0.9914535284042358,
      "learning_rate": 8.130769686946094e-06,
      "loss": 0.0268,
      "step": 587360
    },
    {
      "epoch": 0.9612602528099081,
      "grad_norm": 1.0971765518188477,
      "learning_rate": 8.130703794732578e-06,
      "loss": 0.0322,
      "step": 587380
    },
    {
      "epoch": 0.9612929832485615,
      "grad_norm": 0.5739445090293884,
      "learning_rate": 8.13063790251906e-06,
      "loss": 0.0276,
      "step": 587400
    },
    {
      "epoch": 0.9613257136872149,
      "grad_norm": 0.4287845194339752,
      "learning_rate": 8.130572010305543e-06,
      "loss": 0.0297,
      "step": 587420
    },
    {
      "epoch": 0.9613584441258681,
      "grad_norm": 0.8778263926506042,
      "learning_rate": 8.130506118092025e-06,
      "loss": 0.0226,
      "step": 587440
    },
    {
      "epoch": 0.9613911745645215,
      "grad_norm": 0.5287996530532837,
      "learning_rate": 8.130440225878509e-06,
      "loss": 0.027,
      "step": 587460
    },
    {
      "epoch": 0.9614239050031749,
      "grad_norm": 2.032642364501953,
      "learning_rate": 8.13037433366499e-06,
      "loss": 0.0286,
      "step": 587480
    },
    {
      "epoch": 0.9614566354418282,
      "grad_norm": 0.7499158382415771,
      "learning_rate": 8.130308441451474e-06,
      "loss": 0.0202,
      "step": 587500
    },
    {
      "epoch": 0.9614893658804815,
      "grad_norm": 0.39117515087127686,
      "learning_rate": 8.130242549237958e-06,
      "loss": 0.0208,
      "step": 587520
    },
    {
      "epoch": 0.9615220963191349,
      "grad_norm": 0.9562186002731323,
      "learning_rate": 8.13017665702444e-06,
      "loss": 0.0249,
      "step": 587540
    },
    {
      "epoch": 0.9615548267577883,
      "grad_norm": 1.3786520957946777,
      "learning_rate": 8.130110764810923e-06,
      "loss": 0.029,
      "step": 587560
    },
    {
      "epoch": 0.9615875571964415,
      "grad_norm": 1.2749783992767334,
      "learning_rate": 8.130044872597405e-06,
      "loss": 0.0312,
      "step": 587580
    },
    {
      "epoch": 0.9616202876350949,
      "grad_norm": 0.6985549330711365,
      "learning_rate": 8.129978980383889e-06,
      "loss": 0.0329,
      "step": 587600
    },
    {
      "epoch": 0.9616530180737483,
      "grad_norm": 3.824904441833496,
      "learning_rate": 8.129913088170372e-06,
      "loss": 0.028,
      "step": 587620
    },
    {
      "epoch": 0.9616857485124015,
      "grad_norm": 0.7903786301612854,
      "learning_rate": 8.129847195956854e-06,
      "loss": 0.0334,
      "step": 587640
    },
    {
      "epoch": 0.9617184789510549,
      "grad_norm": 1.0439616441726685,
      "learning_rate": 8.129781303743338e-06,
      "loss": 0.0239,
      "step": 587660
    },
    {
      "epoch": 0.9617512093897083,
      "grad_norm": 1.73347008228302,
      "learning_rate": 8.129715411529821e-06,
      "loss": 0.0334,
      "step": 587680
    },
    {
      "epoch": 0.9617839398283616,
      "grad_norm": 0.9534507393836975,
      "learning_rate": 8.129649519316303e-06,
      "loss": 0.0195,
      "step": 587700
    },
    {
      "epoch": 0.9618166702670149,
      "grad_norm": 0.4390757381916046,
      "learning_rate": 8.129583627102787e-06,
      "loss": 0.0264,
      "step": 587720
    },
    {
      "epoch": 0.9618494007056683,
      "grad_norm": 0.5486384630203247,
      "learning_rate": 8.129517734889269e-06,
      "loss": 0.0284,
      "step": 587740
    },
    {
      "epoch": 0.9618821311443216,
      "grad_norm": 0.5877203345298767,
      "learning_rate": 8.129451842675752e-06,
      "loss": 0.0335,
      "step": 587760
    },
    {
      "epoch": 0.9619148615829749,
      "grad_norm": 0.9488940834999084,
      "learning_rate": 8.129385950462234e-06,
      "loss": 0.0368,
      "step": 587780
    },
    {
      "epoch": 0.9619475920216283,
      "grad_norm": 0.8505862355232239,
      "learning_rate": 8.129320058248718e-06,
      "loss": 0.0291,
      "step": 587800
    },
    {
      "epoch": 0.9619803224602816,
      "grad_norm": 0.23422342538833618,
      "learning_rate": 8.1292541660352e-06,
      "loss": 0.0214,
      "step": 587820
    },
    {
      "epoch": 0.9620130528989349,
      "grad_norm": 0.8947767615318298,
      "learning_rate": 8.129188273821683e-06,
      "loss": 0.0264,
      "step": 587840
    },
    {
      "epoch": 0.9620457833375883,
      "grad_norm": 0.8289175629615784,
      "learning_rate": 8.129122381608167e-06,
      "loss": 0.0302,
      "step": 587860
    },
    {
      "epoch": 0.9620785137762417,
      "grad_norm": 0.3100802004337311,
      "learning_rate": 8.129056489394649e-06,
      "loss": 0.0246,
      "step": 587880
    },
    {
      "epoch": 0.962111244214895,
      "grad_norm": 1.1863949298858643,
      "learning_rate": 8.128990597181132e-06,
      "loss": 0.0288,
      "step": 587900
    },
    {
      "epoch": 0.9621439746535483,
      "grad_norm": 1.1104137897491455,
      "learning_rate": 8.128924704967614e-06,
      "loss": 0.0193,
      "step": 587920
    },
    {
      "epoch": 0.9621767050922017,
      "grad_norm": 0.6804127097129822,
      "learning_rate": 8.128858812754098e-06,
      "loss": 0.024,
      "step": 587940
    },
    {
      "epoch": 0.962209435530855,
      "grad_norm": 0.9364744424819946,
      "learning_rate": 8.12879292054058e-06,
      "loss": 0.0304,
      "step": 587960
    },
    {
      "epoch": 0.9622421659695083,
      "grad_norm": 0.5051937103271484,
      "learning_rate": 8.128727028327063e-06,
      "loss": 0.025,
      "step": 587980
    },
    {
      "epoch": 0.9622748964081617,
      "grad_norm": 0.4487352669239044,
      "learning_rate": 8.128661136113547e-06,
      "loss": 0.0381,
      "step": 588000
    },
    {
      "epoch": 0.962307626846815,
      "grad_norm": 0.3837146461009979,
      "learning_rate": 8.128595243900029e-06,
      "loss": 0.0315,
      "step": 588020
    },
    {
      "epoch": 0.9623403572854683,
      "grad_norm": 0.8217980265617371,
      "learning_rate": 8.128529351686512e-06,
      "loss": 0.0212,
      "step": 588040
    },
    {
      "epoch": 0.9623730877241217,
      "grad_norm": 0.20740674436092377,
      "learning_rate": 8.128463459472996e-06,
      "loss": 0.0208,
      "step": 588060
    },
    {
      "epoch": 0.962405818162775,
      "grad_norm": 0.5883300304412842,
      "learning_rate": 8.128397567259478e-06,
      "loss": 0.0283,
      "step": 588080
    },
    {
      "epoch": 0.9624385486014283,
      "grad_norm": 0.5416842103004456,
      "learning_rate": 8.128331675045961e-06,
      "loss": 0.0277,
      "step": 588100
    },
    {
      "epoch": 0.9624712790400817,
      "grad_norm": 0.6220200061798096,
      "learning_rate": 8.128265782832443e-06,
      "loss": 0.025,
      "step": 588120
    },
    {
      "epoch": 0.962504009478735,
      "grad_norm": 0.8172366619110107,
      "learning_rate": 8.128199890618927e-06,
      "loss": 0.0391,
      "step": 588140
    },
    {
      "epoch": 0.9625367399173884,
      "grad_norm": 0.9567011594772339,
      "learning_rate": 8.128133998405409e-06,
      "loss": 0.0195,
      "step": 588160
    },
    {
      "epoch": 0.9625694703560417,
      "grad_norm": 0.7938311100006104,
      "learning_rate": 8.128068106191892e-06,
      "loss": 0.0257,
      "step": 588180
    },
    {
      "epoch": 0.962602200794695,
      "grad_norm": 0.8874706029891968,
      "learning_rate": 8.128002213978374e-06,
      "loss": 0.0362,
      "step": 588200
    },
    {
      "epoch": 0.9626349312333484,
      "grad_norm": 0.2931434214115143,
      "learning_rate": 8.127936321764858e-06,
      "loss": 0.021,
      "step": 588220
    },
    {
      "epoch": 0.9626676616720017,
      "grad_norm": 0.6927542090415955,
      "learning_rate": 8.127870429551341e-06,
      "loss": 0.0287,
      "step": 588240
    },
    {
      "epoch": 0.9627003921106551,
      "grad_norm": 1.4310474395751953,
      "learning_rate": 8.127804537337823e-06,
      "loss": 0.0332,
      "step": 588260
    },
    {
      "epoch": 0.9627331225493084,
      "grad_norm": 1.2902345657348633,
      "learning_rate": 8.127738645124307e-06,
      "loss": 0.0198,
      "step": 588280
    },
    {
      "epoch": 0.9627658529879617,
      "grad_norm": 1.1108120679855347,
      "learning_rate": 8.127672752910789e-06,
      "loss": 0.0322,
      "step": 588300
    },
    {
      "epoch": 0.9627985834266151,
      "grad_norm": 0.7882229089736938,
      "learning_rate": 8.127606860697272e-06,
      "loss": 0.0305,
      "step": 588320
    },
    {
      "epoch": 0.9628313138652684,
      "grad_norm": 0.4791402220726013,
      "learning_rate": 8.127540968483754e-06,
      "loss": 0.0249,
      "step": 588340
    },
    {
      "epoch": 0.9628640443039218,
      "grad_norm": 1.6585257053375244,
      "learning_rate": 8.127475076270238e-06,
      "loss": 0.0274,
      "step": 588360
    },
    {
      "epoch": 0.9628967747425751,
      "grad_norm": 0.5605455040931702,
      "learning_rate": 8.12740918405672e-06,
      "loss": 0.0277,
      "step": 588380
    },
    {
      "epoch": 0.9629295051812284,
      "grad_norm": 0.6549240946769714,
      "learning_rate": 8.127343291843203e-06,
      "loss": 0.0294,
      "step": 588400
    },
    {
      "epoch": 0.9629622356198818,
      "grad_norm": 0.5800123810768127,
      "learning_rate": 8.127277399629687e-06,
      "loss": 0.0298,
      "step": 588420
    },
    {
      "epoch": 0.9629949660585351,
      "grad_norm": 0.19366849958896637,
      "learning_rate": 8.127211507416169e-06,
      "loss": 0.0306,
      "step": 588440
    },
    {
      "epoch": 0.9630276964971884,
      "grad_norm": 0.8638285398483276,
      "learning_rate": 8.127145615202652e-06,
      "loss": 0.0329,
      "step": 588460
    },
    {
      "epoch": 0.9630604269358418,
      "grad_norm": 0.8931167125701904,
      "learning_rate": 8.127079722989136e-06,
      "loss": 0.023,
      "step": 588480
    },
    {
      "epoch": 0.9630931573744951,
      "grad_norm": 0.44144535064697266,
      "learning_rate": 8.127013830775618e-06,
      "loss": 0.026,
      "step": 588500
    },
    {
      "epoch": 0.9631258878131485,
      "grad_norm": 3.7145023345947266,
      "learning_rate": 8.126947938562101e-06,
      "loss": 0.0302,
      "step": 588520
    },
    {
      "epoch": 0.9631586182518018,
      "grad_norm": 1.3287593126296997,
      "learning_rate": 8.126882046348583e-06,
      "loss": 0.0378,
      "step": 588540
    },
    {
      "epoch": 0.9631913486904552,
      "grad_norm": 0.27351856231689453,
      "learning_rate": 8.126816154135067e-06,
      "loss": 0.0276,
      "step": 588560
    },
    {
      "epoch": 0.9632240791291085,
      "grad_norm": 0.5676729679107666,
      "learning_rate": 8.12675026192155e-06,
      "loss": 0.0253,
      "step": 588580
    },
    {
      "epoch": 0.9632568095677618,
      "grad_norm": 0.39448311924934387,
      "learning_rate": 8.126684369708032e-06,
      "loss": 0.0265,
      "step": 588600
    },
    {
      "epoch": 0.9632895400064152,
      "grad_norm": 0.7606938481330872,
      "learning_rate": 8.126618477494516e-06,
      "loss": 0.025,
      "step": 588620
    },
    {
      "epoch": 0.9633222704450685,
      "grad_norm": 4.261073112487793,
      "learning_rate": 8.126552585280998e-06,
      "loss": 0.0282,
      "step": 588640
    },
    {
      "epoch": 0.9633550008837218,
      "grad_norm": 0.6784936189651489,
      "learning_rate": 8.126486693067481e-06,
      "loss": 0.0247,
      "step": 588660
    },
    {
      "epoch": 0.9633877313223752,
      "grad_norm": 0.6120280027389526,
      "learning_rate": 8.126420800853963e-06,
      "loss": 0.032,
      "step": 588680
    },
    {
      "epoch": 0.9634204617610285,
      "grad_norm": 2.0919477939605713,
      "learning_rate": 8.126354908640447e-06,
      "loss": 0.0307,
      "step": 588700
    },
    {
      "epoch": 0.9634531921996818,
      "grad_norm": 0.9188101887702942,
      "learning_rate": 8.126289016426929e-06,
      "loss": 0.0349,
      "step": 588720
    },
    {
      "epoch": 0.9634859226383352,
      "grad_norm": 0.803062915802002,
      "learning_rate": 8.126223124213412e-06,
      "loss": 0.0442,
      "step": 588740
    },
    {
      "epoch": 0.9635186530769886,
      "grad_norm": 0.09832776337862015,
      "learning_rate": 8.126157231999894e-06,
      "loss": 0.0236,
      "step": 588760
    },
    {
      "epoch": 0.9635513835156418,
      "grad_norm": 0.9181336164474487,
      "learning_rate": 8.126091339786378e-06,
      "loss": 0.0203,
      "step": 588780
    },
    {
      "epoch": 0.9635841139542952,
      "grad_norm": 0.7723488807678223,
      "learning_rate": 8.126025447572861e-06,
      "loss": 0.0293,
      "step": 588800
    },
    {
      "epoch": 0.9636168443929486,
      "grad_norm": 0.9259589910507202,
      "learning_rate": 8.125959555359343e-06,
      "loss": 0.0365,
      "step": 588820
    },
    {
      "epoch": 0.9636495748316019,
      "grad_norm": 0.11834291368722916,
      "learning_rate": 8.125893663145827e-06,
      "loss": 0.0225,
      "step": 588840
    },
    {
      "epoch": 0.9636823052702552,
      "grad_norm": 1.127281904220581,
      "learning_rate": 8.12582777093231e-06,
      "loss": 0.022,
      "step": 588860
    },
    {
      "epoch": 0.9637150357089086,
      "grad_norm": 0.7702616453170776,
      "learning_rate": 8.125761878718792e-06,
      "loss": 0.0289,
      "step": 588880
    },
    {
      "epoch": 0.9637477661475619,
      "grad_norm": 0.924264669418335,
      "learning_rate": 8.125695986505276e-06,
      "loss": 0.02,
      "step": 588900
    },
    {
      "epoch": 0.9637804965862152,
      "grad_norm": 0.10770613700151443,
      "learning_rate": 8.12563009429176e-06,
      "loss": 0.0178,
      "step": 588920
    },
    {
      "epoch": 0.9638132270248686,
      "grad_norm": 0.9709678888320923,
      "learning_rate": 8.125564202078241e-06,
      "loss": 0.028,
      "step": 588940
    },
    {
      "epoch": 0.963845957463522,
      "grad_norm": 0.6854819059371948,
      "learning_rate": 8.125498309864725e-06,
      "loss": 0.0178,
      "step": 588960
    },
    {
      "epoch": 0.9638786879021752,
      "grad_norm": 0.5840540528297424,
      "learning_rate": 8.125432417651207e-06,
      "loss": 0.0355,
      "step": 588980
    },
    {
      "epoch": 0.9639114183408286,
      "grad_norm": 1.6498554944992065,
      "learning_rate": 8.12536652543769e-06,
      "loss": 0.0285,
      "step": 589000
    },
    {
      "epoch": 0.963944148779482,
      "grad_norm": 7.842516899108887,
      "learning_rate": 8.125300633224172e-06,
      "loss": 0.0183,
      "step": 589020
    },
    {
      "epoch": 0.9639768792181352,
      "grad_norm": 0.20842808485031128,
      "learning_rate": 8.125234741010656e-06,
      "loss": 0.0309,
      "step": 589040
    },
    {
      "epoch": 0.9640096096567886,
      "grad_norm": 0.42778992652893066,
      "learning_rate": 8.125168848797138e-06,
      "loss": 0.0343,
      "step": 589060
    },
    {
      "epoch": 0.964042340095442,
      "grad_norm": 0.12339115887880325,
      "learning_rate": 8.125102956583621e-06,
      "loss": 0.0261,
      "step": 589080
    },
    {
      "epoch": 0.9640750705340952,
      "grad_norm": 1.304508090019226,
      "learning_rate": 8.125037064370103e-06,
      "loss": 0.0351,
      "step": 589100
    },
    {
      "epoch": 0.9641078009727486,
      "grad_norm": 0.7691141963005066,
      "learning_rate": 8.124971172156587e-06,
      "loss": 0.0237,
      "step": 589120
    },
    {
      "epoch": 0.964140531411402,
      "grad_norm": 3.7101504802703857,
      "learning_rate": 8.124905279943069e-06,
      "loss": 0.0292,
      "step": 589140
    },
    {
      "epoch": 0.9641732618500554,
      "grad_norm": 0.8537887334823608,
      "learning_rate": 8.124839387729552e-06,
      "loss": 0.0212,
      "step": 589160
    },
    {
      "epoch": 0.9642059922887086,
      "grad_norm": 0.2953914701938629,
      "learning_rate": 8.124773495516034e-06,
      "loss": 0.0238,
      "step": 589180
    },
    {
      "epoch": 0.964238722727362,
      "grad_norm": 0.5899668335914612,
      "learning_rate": 8.124707603302518e-06,
      "loss": 0.0264,
      "step": 589200
    },
    {
      "epoch": 0.9642714531660154,
      "grad_norm": 2.1972875595092773,
      "learning_rate": 8.124641711089001e-06,
      "loss": 0.0279,
      "step": 589220
    },
    {
      "epoch": 0.9643041836046686,
      "grad_norm": 0.09652022272348404,
      "learning_rate": 8.124575818875483e-06,
      "loss": 0.0244,
      "step": 589240
    },
    {
      "epoch": 0.964336914043322,
      "grad_norm": 1.1200662851333618,
      "learning_rate": 8.124509926661967e-06,
      "loss": 0.0259,
      "step": 589260
    },
    {
      "epoch": 0.9643696444819754,
      "grad_norm": 1.1262274980545044,
      "learning_rate": 8.12444403444845e-06,
      "loss": 0.0268,
      "step": 589280
    },
    {
      "epoch": 0.9644023749206286,
      "grad_norm": 0.5748096704483032,
      "learning_rate": 8.124378142234932e-06,
      "loss": 0.0298,
      "step": 589300
    },
    {
      "epoch": 0.964435105359282,
      "grad_norm": 0.3773728907108307,
      "learning_rate": 8.124312250021416e-06,
      "loss": 0.0269,
      "step": 589320
    },
    {
      "epoch": 0.9644678357979354,
      "grad_norm": 0.4747249186038971,
      "learning_rate": 8.1242463578079e-06,
      "loss": 0.0236,
      "step": 589340
    },
    {
      "epoch": 0.9645005662365888,
      "grad_norm": 0.29527345299720764,
      "learning_rate": 8.124180465594381e-06,
      "loss": 0.0248,
      "step": 589360
    },
    {
      "epoch": 0.964533296675242,
      "grad_norm": 1.1488269567489624,
      "learning_rate": 8.124114573380865e-06,
      "loss": 0.0291,
      "step": 589380
    },
    {
      "epoch": 0.9645660271138954,
      "grad_norm": 1.1529868841171265,
      "learning_rate": 8.124048681167347e-06,
      "loss": 0.0249,
      "step": 589400
    },
    {
      "epoch": 0.9645987575525488,
      "grad_norm": 0.5742107629776001,
      "learning_rate": 8.12398278895383e-06,
      "loss": 0.0355,
      "step": 589420
    },
    {
      "epoch": 0.964631487991202,
      "grad_norm": 1.6627742052078247,
      "learning_rate": 8.123916896740312e-06,
      "loss": 0.0263,
      "step": 589440
    },
    {
      "epoch": 0.9646642184298554,
      "grad_norm": 0.3700195550918579,
      "learning_rate": 8.123851004526796e-06,
      "loss": 0.0194,
      "step": 589460
    },
    {
      "epoch": 0.9646969488685088,
      "grad_norm": 1.1758173704147339,
      "learning_rate": 8.123785112313278e-06,
      "loss": 0.0297,
      "step": 589480
    },
    {
      "epoch": 0.964729679307162,
      "grad_norm": 0.20196183025836945,
      "learning_rate": 8.123719220099762e-06,
      "loss": 0.023,
      "step": 589500
    },
    {
      "epoch": 0.9647624097458154,
      "grad_norm": 1.6770151853561401,
      "learning_rate": 8.123653327886243e-06,
      "loss": 0.0216,
      "step": 589520
    },
    {
      "epoch": 0.9647951401844688,
      "grad_norm": 0.6154816746711731,
      "learning_rate": 8.123587435672727e-06,
      "loss": 0.0349,
      "step": 589540
    },
    {
      "epoch": 0.9648278706231221,
      "grad_norm": 0.17491282522678375,
      "learning_rate": 8.123521543459209e-06,
      "loss": 0.0267,
      "step": 589560
    },
    {
      "epoch": 0.9648606010617754,
      "grad_norm": 5.240107536315918,
      "learning_rate": 8.123455651245692e-06,
      "loss": 0.0384,
      "step": 589580
    },
    {
      "epoch": 0.9648933315004288,
      "grad_norm": 0.8632492423057556,
      "learning_rate": 8.123389759032176e-06,
      "loss": 0.0218,
      "step": 589600
    },
    {
      "epoch": 0.9649260619390821,
      "grad_norm": 0.9513200521469116,
      "learning_rate": 8.123323866818658e-06,
      "loss": 0.0285,
      "step": 589620
    },
    {
      "epoch": 0.9649587923777354,
      "grad_norm": 0.9900256395339966,
      "learning_rate": 8.123257974605142e-06,
      "loss": 0.0272,
      "step": 589640
    },
    {
      "epoch": 0.9649915228163888,
      "grad_norm": 2.5484113693237305,
      "learning_rate": 8.123192082391625e-06,
      "loss": 0.0328,
      "step": 589660
    },
    {
      "epoch": 0.9650242532550422,
      "grad_norm": 0.17137520015239716,
      "learning_rate": 8.123126190178107e-06,
      "loss": 0.0363,
      "step": 589680
    },
    {
      "epoch": 0.9650569836936954,
      "grad_norm": 1.8853099346160889,
      "learning_rate": 8.12306029796459e-06,
      "loss": 0.0275,
      "step": 589700
    },
    {
      "epoch": 0.9650897141323488,
      "grad_norm": 0.3294713795185089,
      "learning_rate": 8.122994405751074e-06,
      "loss": 0.0241,
      "step": 589720
    },
    {
      "epoch": 0.9651224445710022,
      "grad_norm": 1.8040536642074585,
      "learning_rate": 8.122928513537556e-06,
      "loss": 0.026,
      "step": 589740
    },
    {
      "epoch": 0.9651551750096555,
      "grad_norm": 0.803375244140625,
      "learning_rate": 8.12286262132404e-06,
      "loss": 0.0242,
      "step": 589760
    },
    {
      "epoch": 0.9651879054483088,
      "grad_norm": 1.9775038957595825,
      "learning_rate": 8.122796729110522e-06,
      "loss": 0.028,
      "step": 589780
    },
    {
      "epoch": 0.9652206358869622,
      "grad_norm": 0.33649054169654846,
      "learning_rate": 8.122730836897005e-06,
      "loss": 0.0202,
      "step": 589800
    },
    {
      "epoch": 0.9652533663256155,
      "grad_norm": 0.8787589073181152,
      "learning_rate": 8.122664944683487e-06,
      "loss": 0.0259,
      "step": 589820
    },
    {
      "epoch": 0.9652860967642688,
      "grad_norm": 0.7624295353889465,
      "learning_rate": 8.12259905246997e-06,
      "loss": 0.0166,
      "step": 589840
    },
    {
      "epoch": 0.9653188272029222,
      "grad_norm": 1.1304059028625488,
      "learning_rate": 8.122533160256453e-06,
      "loss": 0.0289,
      "step": 589860
    },
    {
      "epoch": 0.9653515576415755,
      "grad_norm": 0.36593589186668396,
      "learning_rate": 8.122467268042936e-06,
      "loss": 0.0243,
      "step": 589880
    },
    {
      "epoch": 0.9653842880802288,
      "grad_norm": 1.8861185312271118,
      "learning_rate": 8.122401375829418e-06,
      "loss": 0.0242,
      "step": 589900
    },
    {
      "epoch": 0.9654170185188822,
      "grad_norm": 1.1074942350387573,
      "learning_rate": 8.122335483615902e-06,
      "loss": 0.0343,
      "step": 589920
    },
    {
      "epoch": 0.9654497489575355,
      "grad_norm": 0.09574983268976212,
      "learning_rate": 8.122269591402383e-06,
      "loss": 0.0286,
      "step": 589940
    },
    {
      "epoch": 0.9654824793961889,
      "grad_norm": 0.7772297859191895,
      "learning_rate": 8.122203699188867e-06,
      "loss": 0.0255,
      "step": 589960
    },
    {
      "epoch": 0.9655152098348422,
      "grad_norm": 0.3865524232387543,
      "learning_rate": 8.12213780697535e-06,
      "loss": 0.0301,
      "step": 589980
    },
    {
      "epoch": 0.9655479402734956,
      "grad_norm": 0.9008937478065491,
      "learning_rate": 8.122071914761833e-06,
      "loss": 0.0271,
      "step": 590000
    },
    {
      "epoch": 0.9655806707121489,
      "grad_norm": 0.3472987711429596,
      "learning_rate": 8.122006022548316e-06,
      "loss": 0.0308,
      "step": 590020
    },
    {
      "epoch": 0.9656134011508022,
      "grad_norm": 1.3683627843856812,
      "learning_rate": 8.1219401303348e-06,
      "loss": 0.0301,
      "step": 590040
    },
    {
      "epoch": 0.9656461315894556,
      "grad_norm": 0.7995193004608154,
      "learning_rate": 8.121874238121282e-06,
      "loss": 0.036,
      "step": 590060
    },
    {
      "epoch": 0.9656788620281089,
      "grad_norm": 0.20305530726909637,
      "learning_rate": 8.121808345907765e-06,
      "loss": 0.0341,
      "step": 590080
    },
    {
      "epoch": 0.9657115924667622,
      "grad_norm": 0.31629717350006104,
      "learning_rate": 8.121742453694249e-06,
      "loss": 0.026,
      "step": 590100
    },
    {
      "epoch": 0.9657443229054156,
      "grad_norm": 1.4978375434875488,
      "learning_rate": 8.12167656148073e-06,
      "loss": 0.0366,
      "step": 590120
    },
    {
      "epoch": 0.9657770533440689,
      "grad_norm": 1.6629889011383057,
      "learning_rate": 8.121610669267214e-06,
      "loss": 0.0315,
      "step": 590140
    },
    {
      "epoch": 0.9658097837827223,
      "grad_norm": 0.5395646095275879,
      "learning_rate": 8.121544777053696e-06,
      "loss": 0.0275,
      "step": 590160
    },
    {
      "epoch": 0.9658425142213756,
      "grad_norm": 0.3948020339012146,
      "learning_rate": 8.12147888484018e-06,
      "loss": 0.0206,
      "step": 590180
    },
    {
      "epoch": 0.9658752446600289,
      "grad_norm": 0.9982050061225891,
      "learning_rate": 8.121412992626662e-06,
      "loss": 0.0305,
      "step": 590200
    },
    {
      "epoch": 0.9659079750986823,
      "grad_norm": 0.5258867144584656,
      "learning_rate": 8.121347100413145e-06,
      "loss": 0.0293,
      "step": 590220
    },
    {
      "epoch": 0.9659407055373356,
      "grad_norm": 2.0416104793548584,
      "learning_rate": 8.121281208199627e-06,
      "loss": 0.0226,
      "step": 590240
    },
    {
      "epoch": 0.965973435975989,
      "grad_norm": 0.8194543719291687,
      "learning_rate": 8.12121531598611e-06,
      "loss": 0.0221,
      "step": 590260
    },
    {
      "epoch": 0.9660061664146423,
      "grad_norm": 1.2781420946121216,
      "learning_rate": 8.121149423772593e-06,
      "loss": 0.0346,
      "step": 590280
    },
    {
      "epoch": 0.9660388968532956,
      "grad_norm": 0.28070810437202454,
      "learning_rate": 8.121083531559076e-06,
      "loss": 0.032,
      "step": 590300
    },
    {
      "epoch": 0.966071627291949,
      "grad_norm": 0.11972001940011978,
      "learning_rate": 8.121017639345558e-06,
      "loss": 0.0232,
      "step": 590320
    },
    {
      "epoch": 0.9661043577306023,
      "grad_norm": 0.5793254971504211,
      "learning_rate": 8.120951747132042e-06,
      "loss": 0.0309,
      "step": 590340
    },
    {
      "epoch": 0.9661370881692557,
      "grad_norm": 0.38721367716789246,
      "learning_rate": 8.120885854918525e-06,
      "loss": 0.023,
      "step": 590360
    },
    {
      "epoch": 0.966169818607909,
      "grad_norm": 0.07116484642028809,
      "learning_rate": 8.120819962705007e-06,
      "loss": 0.0271,
      "step": 590380
    },
    {
      "epoch": 0.9662025490465623,
      "grad_norm": 1.8905582427978516,
      "learning_rate": 8.12075407049149e-06,
      "loss": 0.0274,
      "step": 590400
    },
    {
      "epoch": 0.9662352794852157,
      "grad_norm": 1.232913613319397,
      "learning_rate": 8.120688178277973e-06,
      "loss": 0.0369,
      "step": 590420
    },
    {
      "epoch": 0.966268009923869,
      "grad_norm": 1.4389636516571045,
      "learning_rate": 8.120622286064456e-06,
      "loss": 0.0286,
      "step": 590440
    },
    {
      "epoch": 0.9663007403625223,
      "grad_norm": 0.5201030373573303,
      "learning_rate": 8.12055639385094e-06,
      "loss": 0.0275,
      "step": 590460
    },
    {
      "epoch": 0.9663334708011757,
      "grad_norm": 0.3180224299430847,
      "learning_rate": 8.120490501637422e-06,
      "loss": 0.0178,
      "step": 590480
    },
    {
      "epoch": 0.966366201239829,
      "grad_norm": 0.36072078347206116,
      "learning_rate": 8.120424609423905e-06,
      "loss": 0.0227,
      "step": 590500
    },
    {
      "epoch": 0.9663989316784823,
      "grad_norm": 0.49169209599494934,
      "learning_rate": 8.120358717210389e-06,
      "loss": 0.0265,
      "step": 590520
    },
    {
      "epoch": 0.9664316621171357,
      "grad_norm": 0.05485743656754494,
      "learning_rate": 8.12029282499687e-06,
      "loss": 0.0251,
      "step": 590540
    },
    {
      "epoch": 0.9664643925557891,
      "grad_norm": 0.9143571257591248,
      "learning_rate": 8.120226932783354e-06,
      "loss": 0.032,
      "step": 590560
    },
    {
      "epoch": 0.9664971229944423,
      "grad_norm": 1.2462600469589233,
      "learning_rate": 8.120161040569836e-06,
      "loss": 0.0327,
      "step": 590580
    },
    {
      "epoch": 0.9665298534330957,
      "grad_norm": 0.253949910402298,
      "learning_rate": 8.12009514835632e-06,
      "loss": 0.031,
      "step": 590600
    },
    {
      "epoch": 0.9665625838717491,
      "grad_norm": 0.8240879774093628,
      "learning_rate": 8.120029256142802e-06,
      "loss": 0.0276,
      "step": 590620
    },
    {
      "epoch": 0.9665953143104024,
      "grad_norm": 1.2288893461227417,
      "learning_rate": 8.119963363929285e-06,
      "loss": 0.031,
      "step": 590640
    },
    {
      "epoch": 0.9666280447490557,
      "grad_norm": 0.4803002178668976,
      "learning_rate": 8.119897471715767e-06,
      "loss": 0.0278,
      "step": 590660
    },
    {
      "epoch": 0.9666607751877091,
      "grad_norm": 1.2931544780731201,
      "learning_rate": 8.11983157950225e-06,
      "loss": 0.0256,
      "step": 590680
    },
    {
      "epoch": 0.9666935056263624,
      "grad_norm": 0.581993818283081,
      "learning_rate": 8.119765687288734e-06,
      "loss": 0.0236,
      "step": 590700
    },
    {
      "epoch": 0.9667262360650157,
      "grad_norm": 1.076889991760254,
      "learning_rate": 8.119699795075216e-06,
      "loss": 0.0201,
      "step": 590720
    },
    {
      "epoch": 0.9667589665036691,
      "grad_norm": 3.6625916957855225,
      "learning_rate": 8.1196339028617e-06,
      "loss": 0.0279,
      "step": 590740
    },
    {
      "epoch": 0.9667916969423225,
      "grad_norm": 0.8566787242889404,
      "learning_rate": 8.119568010648182e-06,
      "loss": 0.0252,
      "step": 590760
    },
    {
      "epoch": 0.9668244273809757,
      "grad_norm": 1.672115683555603,
      "learning_rate": 8.119502118434665e-06,
      "loss": 0.0264,
      "step": 590780
    },
    {
      "epoch": 0.9668571578196291,
      "grad_norm": 1.0027464628219604,
      "learning_rate": 8.119436226221147e-06,
      "loss": 0.0253,
      "step": 590800
    },
    {
      "epoch": 0.9668898882582825,
      "grad_norm": 0.8296735882759094,
      "learning_rate": 8.11937033400763e-06,
      "loss": 0.0264,
      "step": 590820
    },
    {
      "epoch": 0.9669226186969357,
      "grad_norm": 0.3807251453399658,
      "learning_rate": 8.119304441794114e-06,
      "loss": 0.0301,
      "step": 590840
    },
    {
      "epoch": 0.9669553491355891,
      "grad_norm": 1.9726097583770752,
      "learning_rate": 8.119238549580596e-06,
      "loss": 0.0167,
      "step": 590860
    },
    {
      "epoch": 0.9669880795742425,
      "grad_norm": 0.2226504385471344,
      "learning_rate": 8.11917265736708e-06,
      "loss": 0.0245,
      "step": 590880
    },
    {
      "epoch": 0.9670208100128957,
      "grad_norm": 0.6273804306983948,
      "learning_rate": 8.119106765153563e-06,
      "loss": 0.0252,
      "step": 590900
    },
    {
      "epoch": 0.9670535404515491,
      "grad_norm": 0.2761267423629761,
      "learning_rate": 8.119040872940045e-06,
      "loss": 0.0213,
      "step": 590920
    },
    {
      "epoch": 0.9670862708902025,
      "grad_norm": 0.3173444867134094,
      "learning_rate": 8.118974980726529e-06,
      "loss": 0.0233,
      "step": 590940
    },
    {
      "epoch": 0.9671190013288558,
      "grad_norm": 1.8558818101882935,
      "learning_rate": 8.11890908851301e-06,
      "loss": 0.0221,
      "step": 590960
    },
    {
      "epoch": 0.9671517317675091,
      "grad_norm": 1.2161637544631958,
      "learning_rate": 8.118843196299494e-06,
      "loss": 0.0242,
      "step": 590980
    },
    {
      "epoch": 0.9671844622061625,
      "grad_norm": 0.48497575521469116,
      "learning_rate": 8.118777304085976e-06,
      "loss": 0.0271,
      "step": 591000
    },
    {
      "epoch": 0.9672171926448159,
      "grad_norm": 0.4879342019557953,
      "learning_rate": 8.11871141187246e-06,
      "loss": 0.0357,
      "step": 591020
    },
    {
      "epoch": 0.9672499230834691,
      "grad_norm": 0.23910488188266754,
      "learning_rate": 8.118645519658943e-06,
      "loss": 0.0265,
      "step": 591040
    },
    {
      "epoch": 0.9672826535221225,
      "grad_norm": 0.8275331854820251,
      "learning_rate": 8.118579627445425e-06,
      "loss": 0.0321,
      "step": 591060
    },
    {
      "epoch": 0.9673153839607759,
      "grad_norm": 0.3487432599067688,
      "learning_rate": 8.118513735231909e-06,
      "loss": 0.0341,
      "step": 591080
    },
    {
      "epoch": 0.9673481143994291,
      "grad_norm": 0.6831853985786438,
      "learning_rate": 8.118447843018391e-06,
      "loss": 0.0273,
      "step": 591100
    },
    {
      "epoch": 0.9673808448380825,
      "grad_norm": 1.0114998817443848,
      "learning_rate": 8.118381950804874e-06,
      "loss": 0.0294,
      "step": 591120
    },
    {
      "epoch": 0.9674135752767359,
      "grad_norm": 1.2759860754013062,
      "learning_rate": 8.118316058591356e-06,
      "loss": 0.0302,
      "step": 591140
    },
    {
      "epoch": 0.9674463057153891,
      "grad_norm": 0.4485667645931244,
      "learning_rate": 8.11825016637784e-06,
      "loss": 0.0199,
      "step": 591160
    },
    {
      "epoch": 0.9674790361540425,
      "grad_norm": 2.132524013519287,
      "learning_rate": 8.118184274164322e-06,
      "loss": 0.0325,
      "step": 591180
    },
    {
      "epoch": 0.9675117665926959,
      "grad_norm": 0.3886720538139343,
      "learning_rate": 8.118118381950805e-06,
      "loss": 0.0211,
      "step": 591200
    },
    {
      "epoch": 0.9675444970313493,
      "grad_norm": 0.5354305505752563,
      "learning_rate": 8.118052489737287e-06,
      "loss": 0.0233,
      "step": 591220
    },
    {
      "epoch": 0.9675772274700025,
      "grad_norm": 0.45106953382492065,
      "learning_rate": 8.117986597523771e-06,
      "loss": 0.0266,
      "step": 591240
    },
    {
      "epoch": 0.9676099579086559,
      "grad_norm": 0.7179602384567261,
      "learning_rate": 8.117920705310254e-06,
      "loss": 0.0273,
      "step": 591260
    },
    {
      "epoch": 0.9676426883473093,
      "grad_norm": 2.172041416168213,
      "learning_rate": 8.117854813096736e-06,
      "loss": 0.0439,
      "step": 591280
    },
    {
      "epoch": 0.9676754187859625,
      "grad_norm": 0.3564489483833313,
      "learning_rate": 8.11778892088322e-06,
      "loss": 0.0186,
      "step": 591300
    },
    {
      "epoch": 0.9677081492246159,
      "grad_norm": 1.0338083505630493,
      "learning_rate": 8.117723028669704e-06,
      "loss": 0.0215,
      "step": 591320
    },
    {
      "epoch": 0.9677408796632693,
      "grad_norm": 0.8658697009086609,
      "learning_rate": 8.117657136456185e-06,
      "loss": 0.0242,
      "step": 591340
    },
    {
      "epoch": 0.9677736101019225,
      "grad_norm": 0.8957462906837463,
      "learning_rate": 8.117591244242669e-06,
      "loss": 0.0329,
      "step": 591360
    },
    {
      "epoch": 0.9678063405405759,
      "grad_norm": 2.2491953372955322,
      "learning_rate": 8.117525352029153e-06,
      "loss": 0.04,
      "step": 591380
    },
    {
      "epoch": 0.9678390709792293,
      "grad_norm": 0.2735038697719574,
      "learning_rate": 8.117459459815634e-06,
      "loss": 0.0277,
      "step": 591400
    },
    {
      "epoch": 0.9678718014178826,
      "grad_norm": 0.6054747700691223,
      "learning_rate": 8.117393567602118e-06,
      "loss": 0.0313,
      "step": 591420
    },
    {
      "epoch": 0.9679045318565359,
      "grad_norm": 2.6137478351593018,
      "learning_rate": 8.1173276753886e-06,
      "loss": 0.0267,
      "step": 591440
    },
    {
      "epoch": 0.9679372622951893,
      "grad_norm": 0.45608580112457275,
      "learning_rate": 8.117261783175084e-06,
      "loss": 0.0293,
      "step": 591460
    },
    {
      "epoch": 0.9679699927338427,
      "grad_norm": 1.5526798963546753,
      "learning_rate": 8.117195890961565e-06,
      "loss": 0.0291,
      "step": 591480
    },
    {
      "epoch": 0.9680027231724959,
      "grad_norm": 0.9837782979011536,
      "learning_rate": 8.117129998748049e-06,
      "loss": 0.0313,
      "step": 591500
    },
    {
      "epoch": 0.9680354536111493,
      "grad_norm": 0.8369980454444885,
      "learning_rate": 8.117064106534531e-06,
      "loss": 0.0321,
      "step": 591520
    },
    {
      "epoch": 0.9680681840498027,
      "grad_norm": 1.3735754489898682,
      "learning_rate": 8.116998214321015e-06,
      "loss": 0.0271,
      "step": 591540
    },
    {
      "epoch": 0.9681009144884559,
      "grad_norm": 0.2932445704936981,
      "learning_rate": 8.116932322107496e-06,
      "loss": 0.0293,
      "step": 591560
    },
    {
      "epoch": 0.9681336449271093,
      "grad_norm": 1.7779614925384521,
      "learning_rate": 8.11686642989398e-06,
      "loss": 0.0254,
      "step": 591580
    },
    {
      "epoch": 0.9681663753657627,
      "grad_norm": 1.1661134958267212,
      "learning_rate": 8.116800537680462e-06,
      "loss": 0.0266,
      "step": 591600
    },
    {
      "epoch": 0.968199105804416,
      "grad_norm": 1.1572527885437012,
      "learning_rate": 8.116734645466945e-06,
      "loss": 0.0372,
      "step": 591620
    },
    {
      "epoch": 0.9682318362430693,
      "grad_norm": 1.684159278869629,
      "learning_rate": 8.116668753253429e-06,
      "loss": 0.0294,
      "step": 591640
    },
    {
      "epoch": 0.9682645666817227,
      "grad_norm": 0.7355841994285583,
      "learning_rate": 8.116602861039911e-06,
      "loss": 0.0234,
      "step": 591660
    },
    {
      "epoch": 0.968297297120376,
      "grad_norm": 1.2701603174209595,
      "learning_rate": 8.116536968826395e-06,
      "loss": 0.0249,
      "step": 591680
    },
    {
      "epoch": 0.9683300275590293,
      "grad_norm": 1.7168245315551758,
      "learning_rate": 8.116471076612878e-06,
      "loss": 0.0363,
      "step": 591700
    },
    {
      "epoch": 0.9683627579976827,
      "grad_norm": 0.32400640845298767,
      "learning_rate": 8.11640518439936e-06,
      "loss": 0.0315,
      "step": 591720
    },
    {
      "epoch": 0.968395488436336,
      "grad_norm": 0.5030375123023987,
      "learning_rate": 8.116339292185844e-06,
      "loss": 0.0307,
      "step": 591740
    },
    {
      "epoch": 0.9684282188749893,
      "grad_norm": 1.9181358814239502,
      "learning_rate": 8.116273399972327e-06,
      "loss": 0.0336,
      "step": 591760
    },
    {
      "epoch": 0.9684609493136427,
      "grad_norm": 0.49411532282829285,
      "learning_rate": 8.116207507758809e-06,
      "loss": 0.0291,
      "step": 591780
    },
    {
      "epoch": 0.968493679752296,
      "grad_norm": 0.7195260524749756,
      "learning_rate": 8.116141615545293e-06,
      "loss": 0.0255,
      "step": 591800
    },
    {
      "epoch": 0.9685264101909494,
      "grad_norm": 1.0463836193084717,
      "learning_rate": 8.116075723331775e-06,
      "loss": 0.0299,
      "step": 591820
    },
    {
      "epoch": 0.9685591406296027,
      "grad_norm": 2.1444098949432373,
      "learning_rate": 8.116009831118258e-06,
      "loss": 0.022,
      "step": 591840
    },
    {
      "epoch": 0.9685918710682561,
      "grad_norm": 0.5474197268486023,
      "learning_rate": 8.11594393890474e-06,
      "loss": 0.0328,
      "step": 591860
    },
    {
      "epoch": 0.9686246015069094,
      "grad_norm": 1.4168109893798828,
      "learning_rate": 8.115878046691224e-06,
      "loss": 0.032,
      "step": 591880
    },
    {
      "epoch": 0.9686573319455627,
      "grad_norm": 0.31702667474746704,
      "learning_rate": 8.115812154477706e-06,
      "loss": 0.0235,
      "step": 591900
    },
    {
      "epoch": 0.9686900623842161,
      "grad_norm": 3.557765245437622,
      "learning_rate": 8.115746262264189e-06,
      "loss": 0.0281,
      "step": 591920
    },
    {
      "epoch": 0.9687227928228694,
      "grad_norm": 0.6598094701766968,
      "learning_rate": 8.115680370050671e-06,
      "loss": 0.0369,
      "step": 591940
    },
    {
      "epoch": 0.9687555232615227,
      "grad_norm": 2.2918665409088135,
      "learning_rate": 8.115614477837155e-06,
      "loss": 0.0242,
      "step": 591960
    },
    {
      "epoch": 0.9687882537001761,
      "grad_norm": 1.4787054061889648,
      "learning_rate": 8.115548585623636e-06,
      "loss": 0.0236,
      "step": 591980
    },
    {
      "epoch": 0.9688209841388294,
      "grad_norm": 0.495939165353775,
      "learning_rate": 8.11548269341012e-06,
      "loss": 0.0252,
      "step": 592000
    },
    {
      "epoch": 0.9688537145774828,
      "grad_norm": 0.5593169331550598,
      "learning_rate": 8.115416801196602e-06,
      "loss": 0.0282,
      "step": 592020
    },
    {
      "epoch": 0.9688864450161361,
      "grad_norm": 0.78855961561203,
      "learning_rate": 8.115350908983086e-06,
      "loss": 0.0255,
      "step": 592040
    },
    {
      "epoch": 0.9689191754547894,
      "grad_norm": 1.093864917755127,
      "learning_rate": 8.115285016769569e-06,
      "loss": 0.0276,
      "step": 592060
    },
    {
      "epoch": 0.9689519058934428,
      "grad_norm": 0.45540332794189453,
      "learning_rate": 8.115219124556051e-06,
      "loss": 0.03,
      "step": 592080
    },
    {
      "epoch": 0.9689846363320961,
      "grad_norm": 1.504164695739746,
      "learning_rate": 8.115153232342535e-06,
      "loss": 0.0277,
      "step": 592100
    },
    {
      "epoch": 0.9690173667707495,
      "grad_norm": 0.9333575963973999,
      "learning_rate": 8.115087340129018e-06,
      "loss": 0.0356,
      "step": 592120
    },
    {
      "epoch": 0.9690500972094028,
      "grad_norm": 0.8894002437591553,
      "learning_rate": 8.115021447915502e-06,
      "loss": 0.0215,
      "step": 592140
    },
    {
      "epoch": 0.9690828276480561,
      "grad_norm": 0.5721922516822815,
      "learning_rate": 8.114955555701984e-06,
      "loss": 0.0281,
      "step": 592160
    },
    {
      "epoch": 0.9691155580867095,
      "grad_norm": 0.583844780921936,
      "learning_rate": 8.114889663488467e-06,
      "loss": 0.0407,
      "step": 592180
    },
    {
      "epoch": 0.9691482885253628,
      "grad_norm": 0.8906933665275574,
      "learning_rate": 8.11482377127495e-06,
      "loss": 0.0333,
      "step": 592200
    },
    {
      "epoch": 0.9691810189640162,
      "grad_norm": 1.0366442203521729,
      "learning_rate": 8.114757879061433e-06,
      "loss": 0.0299,
      "step": 592220
    },
    {
      "epoch": 0.9692137494026695,
      "grad_norm": 0.6397222280502319,
      "learning_rate": 8.114691986847915e-06,
      "loss": 0.023,
      "step": 592240
    },
    {
      "epoch": 0.9692464798413228,
      "grad_norm": 2.793743371963501,
      "learning_rate": 8.114626094634398e-06,
      "loss": 0.0309,
      "step": 592260
    },
    {
      "epoch": 0.9692792102799762,
      "grad_norm": 0.3680596649646759,
      "learning_rate": 8.11456020242088e-06,
      "loss": 0.0287,
      "step": 592280
    },
    {
      "epoch": 0.9693119407186295,
      "grad_norm": 0.631503164768219,
      "learning_rate": 8.114494310207364e-06,
      "loss": 0.0222,
      "step": 592300
    },
    {
      "epoch": 0.9693446711572828,
      "grad_norm": 0.4517033100128174,
      "learning_rate": 8.114428417993846e-06,
      "loss": 0.0244,
      "step": 592320
    },
    {
      "epoch": 0.9693774015959362,
      "grad_norm": 0.3214428424835205,
      "learning_rate": 8.11436252578033e-06,
      "loss": 0.0364,
      "step": 592340
    },
    {
      "epoch": 0.9694101320345895,
      "grad_norm": 0.4343135356903076,
      "learning_rate": 8.114296633566811e-06,
      "loss": 0.0339,
      "step": 592360
    },
    {
      "epoch": 0.9694428624732429,
      "grad_norm": 1.1484092473983765,
      "learning_rate": 8.114230741353295e-06,
      "loss": 0.0277,
      "step": 592380
    },
    {
      "epoch": 0.9694755929118962,
      "grad_norm": 0.3674507141113281,
      "learning_rate": 8.114164849139777e-06,
      "loss": 0.0325,
      "step": 592400
    },
    {
      "epoch": 0.9695083233505496,
      "grad_norm": 2.4404218196868896,
      "learning_rate": 8.11409895692626e-06,
      "loss": 0.0306,
      "step": 592420
    },
    {
      "epoch": 0.9695410537892029,
      "grad_norm": 2.1535446643829346,
      "learning_rate": 8.114033064712744e-06,
      "loss": 0.0262,
      "step": 592440
    },
    {
      "epoch": 0.9695737842278562,
      "grad_norm": 1.3496934175491333,
      "learning_rate": 8.113967172499226e-06,
      "loss": 0.0357,
      "step": 592460
    },
    {
      "epoch": 0.9696065146665096,
      "grad_norm": 0.3420432507991791,
      "learning_rate": 8.11390128028571e-06,
      "loss": 0.0312,
      "step": 592480
    },
    {
      "epoch": 0.9696392451051629,
      "grad_norm": 1.3695926666259766,
      "learning_rate": 8.113835388072193e-06,
      "loss": 0.0321,
      "step": 592500
    },
    {
      "epoch": 0.9696719755438162,
      "grad_norm": 0.6692858338356018,
      "learning_rate": 8.113769495858675e-06,
      "loss": 0.022,
      "step": 592520
    },
    {
      "epoch": 0.9697047059824696,
      "grad_norm": 0.4772091805934906,
      "learning_rate": 8.113703603645158e-06,
      "loss": 0.0216,
      "step": 592540
    },
    {
      "epoch": 0.9697374364211229,
      "grad_norm": 1.3798930644989014,
      "learning_rate": 8.113637711431642e-06,
      "loss": 0.0272,
      "step": 592560
    },
    {
      "epoch": 0.9697701668597762,
      "grad_norm": 0.9512750506401062,
      "learning_rate": 8.113571819218124e-06,
      "loss": 0.0325,
      "step": 592580
    },
    {
      "epoch": 0.9698028972984296,
      "grad_norm": 7.538036346435547,
      "learning_rate": 8.113505927004607e-06,
      "loss": 0.024,
      "step": 592600
    },
    {
      "epoch": 0.969835627737083,
      "grad_norm": 1.9432049989700317,
      "learning_rate": 8.11344003479109e-06,
      "loss": 0.0403,
      "step": 592620
    },
    {
      "epoch": 0.9698683581757362,
      "grad_norm": 2.6303272247314453,
      "learning_rate": 8.113374142577573e-06,
      "loss": 0.021,
      "step": 592640
    },
    {
      "epoch": 0.9699010886143896,
      "grad_norm": 0.597761869430542,
      "learning_rate": 8.113308250364055e-06,
      "loss": 0.0258,
      "step": 592660
    },
    {
      "epoch": 0.969933819053043,
      "grad_norm": 0.7301064729690552,
      "learning_rate": 8.113242358150538e-06,
      "loss": 0.0371,
      "step": 592680
    },
    {
      "epoch": 0.9699665494916963,
      "grad_norm": 1.4376587867736816,
      "learning_rate": 8.11317646593702e-06,
      "loss": 0.0362,
      "step": 592700
    },
    {
      "epoch": 0.9699992799303496,
      "grad_norm": 1.6049000024795532,
      "learning_rate": 8.113110573723504e-06,
      "loss": 0.0232,
      "step": 592720
    },
    {
      "epoch": 0.970032010369003,
      "grad_norm": 1.0432454347610474,
      "learning_rate": 8.113044681509986e-06,
      "loss": 0.0306,
      "step": 592740
    },
    {
      "epoch": 0.9700647408076563,
      "grad_norm": 0.4712539315223694,
      "learning_rate": 8.11297878929647e-06,
      "loss": 0.0348,
      "step": 592760
    },
    {
      "epoch": 0.9700974712463096,
      "grad_norm": 0.47787556052207947,
      "learning_rate": 8.112912897082951e-06,
      "loss": 0.0264,
      "step": 592780
    },
    {
      "epoch": 0.970130201684963,
      "grad_norm": 8.457056045532227,
      "learning_rate": 8.112847004869435e-06,
      "loss": 0.0239,
      "step": 592800
    },
    {
      "epoch": 0.9701629321236164,
      "grad_norm": 0.3873533308506012,
      "learning_rate": 8.112781112655918e-06,
      "loss": 0.0165,
      "step": 592820
    },
    {
      "epoch": 0.9701956625622696,
      "grad_norm": 0.4110300838947296,
      "learning_rate": 8.1127152204424e-06,
      "loss": 0.0304,
      "step": 592840
    },
    {
      "epoch": 0.970228393000923,
      "grad_norm": 1.9926332235336304,
      "learning_rate": 8.112649328228884e-06,
      "loss": 0.0342,
      "step": 592860
    },
    {
      "epoch": 0.9702611234395764,
      "grad_norm": 0.6709316372871399,
      "learning_rate": 8.112583436015367e-06,
      "loss": 0.0303,
      "step": 592880
    },
    {
      "epoch": 0.9702938538782296,
      "grad_norm": 0.6977631449699402,
      "learning_rate": 8.11251754380185e-06,
      "loss": 0.029,
      "step": 592900
    },
    {
      "epoch": 0.970326584316883,
      "grad_norm": 0.5485327243804932,
      "learning_rate": 8.112451651588333e-06,
      "loss": 0.0365,
      "step": 592920
    },
    {
      "epoch": 0.9703593147555364,
      "grad_norm": 2.597034454345703,
      "learning_rate": 8.112385759374816e-06,
      "loss": 0.0366,
      "step": 592940
    },
    {
      "epoch": 0.9703920451941896,
      "grad_norm": 1.3293848037719727,
      "learning_rate": 8.112319867161298e-06,
      "loss": 0.0361,
      "step": 592960
    },
    {
      "epoch": 0.970424775632843,
      "grad_norm": 1.4530742168426514,
      "learning_rate": 8.112253974947782e-06,
      "loss": 0.0307,
      "step": 592980
    },
    {
      "epoch": 0.9704575060714964,
      "grad_norm": 0.8221903443336487,
      "learning_rate": 8.112188082734264e-06,
      "loss": 0.0291,
      "step": 593000
    },
    {
      "epoch": 0.9704902365101498,
      "grad_norm": 0.651584267616272,
      "learning_rate": 8.112122190520747e-06,
      "loss": 0.0231,
      "step": 593020
    },
    {
      "epoch": 0.970522966948803,
      "grad_norm": 0.6891920566558838,
      "learning_rate": 8.11205629830723e-06,
      "loss": 0.0282,
      "step": 593040
    },
    {
      "epoch": 0.9705556973874564,
      "grad_norm": 0.46166741847991943,
      "learning_rate": 8.111990406093713e-06,
      "loss": 0.0269,
      "step": 593060
    },
    {
      "epoch": 0.9705884278261098,
      "grad_norm": 0.3969000279903412,
      "learning_rate": 8.111924513880195e-06,
      "loss": 0.0324,
      "step": 593080
    },
    {
      "epoch": 0.970621158264763,
      "grad_norm": 0.5968967080116272,
      "learning_rate": 8.111858621666678e-06,
      "loss": 0.0261,
      "step": 593100
    },
    {
      "epoch": 0.9706538887034164,
      "grad_norm": 1.0520761013031006,
      "learning_rate": 8.11179272945316e-06,
      "loss": 0.0315,
      "step": 593120
    },
    {
      "epoch": 0.9706866191420698,
      "grad_norm": 2.5054187774658203,
      "learning_rate": 8.111726837239644e-06,
      "loss": 0.0257,
      "step": 593140
    },
    {
      "epoch": 0.970719349580723,
      "grad_norm": 0.8509408831596375,
      "learning_rate": 8.111660945026127e-06,
      "loss": 0.0247,
      "step": 593160
    },
    {
      "epoch": 0.9707520800193764,
      "grad_norm": 0.40604159235954285,
      "learning_rate": 8.11159505281261e-06,
      "loss": 0.0237,
      "step": 593180
    },
    {
      "epoch": 0.9707848104580298,
      "grad_norm": 1.6505056619644165,
      "learning_rate": 8.111529160599093e-06,
      "loss": 0.0262,
      "step": 593200
    },
    {
      "epoch": 0.9708175408966832,
      "grad_norm": 1.225877285003662,
      "learning_rate": 8.111463268385575e-06,
      "loss": 0.0168,
      "step": 593220
    },
    {
      "epoch": 0.9708502713353364,
      "grad_norm": 0.5419827699661255,
      "learning_rate": 8.111397376172058e-06,
      "loss": 0.0198,
      "step": 593240
    },
    {
      "epoch": 0.9708830017739898,
      "grad_norm": 0.9608742594718933,
      "learning_rate": 8.11133148395854e-06,
      "loss": 0.0273,
      "step": 593260
    },
    {
      "epoch": 0.9709157322126432,
      "grad_norm": 0.8303335905075073,
      "learning_rate": 8.111265591745024e-06,
      "loss": 0.0278,
      "step": 593280
    },
    {
      "epoch": 0.9709484626512964,
      "grad_norm": 1.779160976409912,
      "learning_rate": 8.111199699531507e-06,
      "loss": 0.0311,
      "step": 593300
    },
    {
      "epoch": 0.9709811930899498,
      "grad_norm": 0.7624121308326721,
      "learning_rate": 8.11113380731799e-06,
      "loss": 0.0257,
      "step": 593320
    },
    {
      "epoch": 0.9710139235286032,
      "grad_norm": 0.7886026501655579,
      "learning_rate": 8.111067915104473e-06,
      "loss": 0.032,
      "step": 593340
    },
    {
      "epoch": 0.9710466539672564,
      "grad_norm": 0.4452826976776123,
      "learning_rate": 8.111002022890957e-06,
      "loss": 0.0277,
      "step": 593360
    },
    {
      "epoch": 0.9710793844059098,
      "grad_norm": 0.6021853685379028,
      "learning_rate": 8.110936130677438e-06,
      "loss": 0.0232,
      "step": 593380
    },
    {
      "epoch": 0.9711121148445632,
      "grad_norm": 0.7658814787864685,
      "learning_rate": 8.110870238463922e-06,
      "loss": 0.025,
      "step": 593400
    },
    {
      "epoch": 0.9711448452832165,
      "grad_norm": 1.3733810186386108,
      "learning_rate": 8.110804346250404e-06,
      "loss": 0.0244,
      "step": 593420
    },
    {
      "epoch": 0.9711775757218698,
      "grad_norm": 0.5524302124977112,
      "learning_rate": 8.110738454036888e-06,
      "loss": 0.0292,
      "step": 593440
    },
    {
      "epoch": 0.9712103061605232,
      "grad_norm": 2.1602962017059326,
      "learning_rate": 8.11067256182337e-06,
      "loss": 0.0283,
      "step": 593460
    },
    {
      "epoch": 0.9712430365991765,
      "grad_norm": 0.7755782604217529,
      "learning_rate": 8.110606669609853e-06,
      "loss": 0.0315,
      "step": 593480
    },
    {
      "epoch": 0.9712757670378298,
      "grad_norm": 1.4909381866455078,
      "learning_rate": 8.110540777396337e-06,
      "loss": 0.0352,
      "step": 593500
    },
    {
      "epoch": 0.9713084974764832,
      "grad_norm": 0.6860302686691284,
      "learning_rate": 8.110474885182818e-06,
      "loss": 0.032,
      "step": 593520
    },
    {
      "epoch": 0.9713412279151366,
      "grad_norm": 0.5451131463050842,
      "learning_rate": 8.110408992969302e-06,
      "loss": 0.0296,
      "step": 593540
    },
    {
      "epoch": 0.9713739583537898,
      "grad_norm": 0.7700971961021423,
      "learning_rate": 8.110343100755784e-06,
      "loss": 0.0282,
      "step": 593560
    },
    {
      "epoch": 0.9714066887924432,
      "grad_norm": 0.9488266110420227,
      "learning_rate": 8.110277208542268e-06,
      "loss": 0.0234,
      "step": 593580
    },
    {
      "epoch": 0.9714394192310966,
      "grad_norm": 1.1377497911453247,
      "learning_rate": 8.11021131632875e-06,
      "loss": 0.0263,
      "step": 593600
    },
    {
      "epoch": 0.9714721496697499,
      "grad_norm": 0.4565676152706146,
      "learning_rate": 8.110145424115233e-06,
      "loss": 0.0248,
      "step": 593620
    },
    {
      "epoch": 0.9715048801084032,
      "grad_norm": 1.039952278137207,
      "learning_rate": 8.110079531901715e-06,
      "loss": 0.0391,
      "step": 593640
    },
    {
      "epoch": 0.9715376105470566,
      "grad_norm": 0.8154538869857788,
      "learning_rate": 8.110013639688198e-06,
      "loss": 0.0285,
      "step": 593660
    },
    {
      "epoch": 0.9715703409857099,
      "grad_norm": 1.298926591873169,
      "learning_rate": 8.109947747474682e-06,
      "loss": 0.0332,
      "step": 593680
    },
    {
      "epoch": 0.9716030714243632,
      "grad_norm": 0.8606240153312683,
      "learning_rate": 8.109881855261164e-06,
      "loss": 0.0347,
      "step": 593700
    },
    {
      "epoch": 0.9716358018630166,
      "grad_norm": 0.5631433725357056,
      "learning_rate": 8.109815963047648e-06,
      "loss": 0.027,
      "step": 593720
    },
    {
      "epoch": 0.9716685323016699,
      "grad_norm": 0.6893395185470581,
      "learning_rate": 8.109750070834131e-06,
      "loss": 0.0244,
      "step": 593740
    },
    {
      "epoch": 0.9717012627403232,
      "grad_norm": 0.732815146446228,
      "learning_rate": 8.109684178620613e-06,
      "loss": 0.0229,
      "step": 593760
    },
    {
      "epoch": 0.9717339931789766,
      "grad_norm": 0.8771488666534424,
      "learning_rate": 8.109618286407097e-06,
      "loss": 0.0347,
      "step": 593780
    },
    {
      "epoch": 0.97176672361763,
      "grad_norm": 0.4761851727962494,
      "learning_rate": 8.109552394193579e-06,
      "loss": 0.0215,
      "step": 593800
    },
    {
      "epoch": 0.9717994540562832,
      "grad_norm": 0.6227900981903076,
      "learning_rate": 8.109486501980062e-06,
      "loss": 0.0243,
      "step": 593820
    },
    {
      "epoch": 0.9718321844949366,
      "grad_norm": 0.3836141526699066,
      "learning_rate": 8.109420609766546e-06,
      "loss": 0.0201,
      "step": 593840
    },
    {
      "epoch": 0.97186491493359,
      "grad_norm": 0.4163009524345398,
      "learning_rate": 8.109354717553028e-06,
      "loss": 0.0228,
      "step": 593860
    },
    {
      "epoch": 0.9718976453722433,
      "grad_norm": 0.7689991593360901,
      "learning_rate": 8.109288825339511e-06,
      "loss": 0.0204,
      "step": 593880
    },
    {
      "epoch": 0.9719303758108966,
      "grad_norm": 0.3693946599960327,
      "learning_rate": 8.109222933125993e-06,
      "loss": 0.0224,
      "step": 593900
    },
    {
      "epoch": 0.97196310624955,
      "grad_norm": 2.0614378452301025,
      "learning_rate": 8.109157040912477e-06,
      "loss": 0.0385,
      "step": 593920
    },
    {
      "epoch": 0.9719958366882033,
      "grad_norm": 0.823710024356842,
      "learning_rate": 8.109091148698959e-06,
      "loss": 0.0266,
      "step": 593940
    },
    {
      "epoch": 0.9720285671268566,
      "grad_norm": 0.651252269744873,
      "learning_rate": 8.109025256485442e-06,
      "loss": 0.0383,
      "step": 593960
    },
    {
      "epoch": 0.97206129756551,
      "grad_norm": 0.8959705233573914,
      "learning_rate": 8.108959364271924e-06,
      "loss": 0.0298,
      "step": 593980
    },
    {
      "epoch": 0.9720940280041633,
      "grad_norm": 0.7253846526145935,
      "learning_rate": 8.108893472058408e-06,
      "loss": 0.0208,
      "step": 594000
    },
    {
      "epoch": 0.9721267584428166,
      "grad_norm": 0.41667723655700684,
      "learning_rate": 8.10882757984489e-06,
      "loss": 0.0245,
      "step": 594020
    },
    {
      "epoch": 0.97215948888147,
      "grad_norm": 2.1581475734710693,
      "learning_rate": 8.108761687631373e-06,
      "loss": 0.0285,
      "step": 594040
    },
    {
      "epoch": 0.9721922193201233,
      "grad_norm": 2.119926929473877,
      "learning_rate": 8.108695795417855e-06,
      "loss": 0.0425,
      "step": 594060
    },
    {
      "epoch": 0.9722249497587767,
      "grad_norm": 1.0845304727554321,
      "learning_rate": 8.108629903204339e-06,
      "loss": 0.0274,
      "step": 594080
    },
    {
      "epoch": 0.97225768019743,
      "grad_norm": 0.5103897452354431,
      "learning_rate": 8.108564010990822e-06,
      "loss": 0.0341,
      "step": 594100
    },
    {
      "epoch": 0.9722904106360833,
      "grad_norm": 1.303938865661621,
      "learning_rate": 8.108498118777304e-06,
      "loss": 0.0234,
      "step": 594120
    },
    {
      "epoch": 0.9723231410747367,
      "grad_norm": 1.1967579126358032,
      "learning_rate": 8.108432226563788e-06,
      "loss": 0.0246,
      "step": 594140
    },
    {
      "epoch": 0.97235587151339,
      "grad_norm": 0.11842340975999832,
      "learning_rate": 8.108366334350271e-06,
      "loss": 0.0255,
      "step": 594160
    },
    {
      "epoch": 0.9723886019520434,
      "grad_norm": 1.0452321767807007,
      "learning_rate": 8.108300442136753e-06,
      "loss": 0.0249,
      "step": 594180
    },
    {
      "epoch": 0.9724213323906967,
      "grad_norm": 1.708146572113037,
      "learning_rate": 8.108234549923237e-06,
      "loss": 0.0437,
      "step": 594200
    },
    {
      "epoch": 0.97245406282935,
      "grad_norm": 0.16845184564590454,
      "learning_rate": 8.10816865770972e-06,
      "loss": 0.03,
      "step": 594220
    },
    {
      "epoch": 0.9724867932680034,
      "grad_norm": 0.5704830884933472,
      "learning_rate": 8.108102765496202e-06,
      "loss": 0.026,
      "step": 594240
    },
    {
      "epoch": 0.9725195237066567,
      "grad_norm": 1.4506622552871704,
      "learning_rate": 8.108036873282686e-06,
      "loss": 0.0373,
      "step": 594260
    },
    {
      "epoch": 0.9725522541453101,
      "grad_norm": 0.39037859439849854,
      "learning_rate": 8.107970981069168e-06,
      "loss": 0.0285,
      "step": 594280
    },
    {
      "epoch": 0.9725849845839634,
      "grad_norm": 0.8528926968574524,
      "learning_rate": 8.107905088855651e-06,
      "loss": 0.026,
      "step": 594300
    },
    {
      "epoch": 0.9726177150226167,
      "grad_norm": 1.0549207925796509,
      "learning_rate": 8.107839196642133e-06,
      "loss": 0.0241,
      "step": 594320
    },
    {
      "epoch": 0.9726504454612701,
      "grad_norm": 0.1415410041809082,
      "learning_rate": 8.107773304428617e-06,
      "loss": 0.0269,
      "step": 594340
    },
    {
      "epoch": 0.9726831758999234,
      "grad_norm": 0.45341965556144714,
      "learning_rate": 8.107707412215099e-06,
      "loss": 0.0303,
      "step": 594360
    },
    {
      "epoch": 0.9727159063385767,
      "grad_norm": 0.7070332169532776,
      "learning_rate": 8.107641520001582e-06,
      "loss": 0.025,
      "step": 594380
    },
    {
      "epoch": 0.9727486367772301,
      "grad_norm": 0.3760767877101898,
      "learning_rate": 8.107575627788064e-06,
      "loss": 0.0343,
      "step": 594400
    },
    {
      "epoch": 0.9727813672158834,
      "grad_norm": 0.7642124891281128,
      "learning_rate": 8.107509735574548e-06,
      "loss": 0.028,
      "step": 594420
    },
    {
      "epoch": 0.9728140976545367,
      "grad_norm": 0.6674824357032776,
      "learning_rate": 8.10744384336103e-06,
      "loss": 0.0268,
      "step": 594440
    },
    {
      "epoch": 0.9728468280931901,
      "grad_norm": 0.4400009512901306,
      "learning_rate": 8.107377951147513e-06,
      "loss": 0.0252,
      "step": 594460
    },
    {
      "epoch": 0.9728795585318435,
      "grad_norm": 1.2645891904830933,
      "learning_rate": 8.107312058933997e-06,
      "loss": 0.0315,
      "step": 594480
    },
    {
      "epoch": 0.9729122889704968,
      "grad_norm": 0.38922765851020813,
      "learning_rate": 8.107246166720479e-06,
      "loss": 0.0263,
      "step": 594500
    },
    {
      "epoch": 0.9729450194091501,
      "grad_norm": 2.214728593826294,
      "learning_rate": 8.107180274506962e-06,
      "loss": 0.0273,
      "step": 594520
    },
    {
      "epoch": 0.9729777498478035,
      "grad_norm": 1.2856868505477905,
      "learning_rate": 8.107114382293446e-06,
      "loss": 0.0259,
      "step": 594540
    },
    {
      "epoch": 0.9730104802864568,
      "grad_norm": 0.3399985134601593,
      "learning_rate": 8.107048490079928e-06,
      "loss": 0.0338,
      "step": 594560
    },
    {
      "epoch": 0.9730432107251101,
      "grad_norm": 1.0058990716934204,
      "learning_rate": 8.106982597866411e-06,
      "loss": 0.0316,
      "step": 594580
    },
    {
      "epoch": 0.9730759411637635,
      "grad_norm": 0.2060670107603073,
      "learning_rate": 8.106916705652895e-06,
      "loss": 0.0221,
      "step": 594600
    },
    {
      "epoch": 0.9731086716024168,
      "grad_norm": 0.5099118947982788,
      "learning_rate": 8.106850813439377e-06,
      "loss": 0.025,
      "step": 594620
    },
    {
      "epoch": 0.9731414020410701,
      "grad_norm": 1.6683989763259888,
      "learning_rate": 8.10678492122586e-06,
      "loss": 0.0326,
      "step": 594640
    },
    {
      "epoch": 0.9731741324797235,
      "grad_norm": 0.5809614062309265,
      "learning_rate": 8.106719029012342e-06,
      "loss": 0.0235,
      "step": 594660
    },
    {
      "epoch": 0.9732068629183769,
      "grad_norm": 0.5092218518257141,
      "learning_rate": 8.106653136798826e-06,
      "loss": 0.028,
      "step": 594680
    },
    {
      "epoch": 0.9732395933570301,
      "grad_norm": 1.8410981893539429,
      "learning_rate": 8.106587244585308e-06,
      "loss": 0.0239,
      "step": 594700
    },
    {
      "epoch": 0.9732723237956835,
      "grad_norm": 0.4157778322696686,
      "learning_rate": 8.106521352371791e-06,
      "loss": 0.0349,
      "step": 594720
    },
    {
      "epoch": 0.9733050542343369,
      "grad_norm": 0.5846260190010071,
      "learning_rate": 8.106455460158273e-06,
      "loss": 0.0214,
      "step": 594740
    },
    {
      "epoch": 0.9733377846729901,
      "grad_norm": 1.944629192352295,
      "learning_rate": 8.106389567944757e-06,
      "loss": 0.0165,
      "step": 594760
    },
    {
      "epoch": 0.9733705151116435,
      "grad_norm": 1.3792150020599365,
      "learning_rate": 8.106323675731239e-06,
      "loss": 0.0278,
      "step": 594780
    },
    {
      "epoch": 0.9734032455502969,
      "grad_norm": 0.5205491185188293,
      "learning_rate": 8.106257783517722e-06,
      "loss": 0.033,
      "step": 594800
    },
    {
      "epoch": 0.9734359759889502,
      "grad_norm": 0.6112712621688843,
      "learning_rate": 8.106191891304204e-06,
      "loss": 0.0258,
      "step": 594820
    },
    {
      "epoch": 0.9734687064276035,
      "grad_norm": 1.0943493843078613,
      "learning_rate": 8.106125999090688e-06,
      "loss": 0.0247,
      "step": 594840
    },
    {
      "epoch": 0.9735014368662569,
      "grad_norm": 0.305097758769989,
      "learning_rate": 8.10606010687717e-06,
      "loss": 0.0239,
      "step": 594860
    },
    {
      "epoch": 0.9735341673049103,
      "grad_norm": 1.3782583475112915,
      "learning_rate": 8.105994214663653e-06,
      "loss": 0.0254,
      "step": 594880
    },
    {
      "epoch": 0.9735668977435635,
      "grad_norm": 0.8214775323867798,
      "learning_rate": 8.105928322450137e-06,
      "loss": 0.0204,
      "step": 594900
    },
    {
      "epoch": 0.9735996281822169,
      "grad_norm": 1.3989386558532715,
      "learning_rate": 8.10586243023662e-06,
      "loss": 0.0348,
      "step": 594920
    },
    {
      "epoch": 0.9736323586208703,
      "grad_norm": 0.7601151466369629,
      "learning_rate": 8.105796538023102e-06,
      "loss": 0.0283,
      "step": 594940
    },
    {
      "epoch": 0.9736650890595235,
      "grad_norm": 1.40720534324646,
      "learning_rate": 8.105730645809586e-06,
      "loss": 0.0397,
      "step": 594960
    },
    {
      "epoch": 0.9736978194981769,
      "grad_norm": 8.24647045135498,
      "learning_rate": 8.10566475359607e-06,
      "loss": 0.0322,
      "step": 594980
    },
    {
      "epoch": 0.9737305499368303,
      "grad_norm": 0.2600216865539551,
      "learning_rate": 8.105598861382551e-06,
      "loss": 0.0208,
      "step": 595000
    },
    {
      "epoch": 0.9737632803754835,
      "grad_norm": 0.5130589008331299,
      "learning_rate": 8.105532969169035e-06,
      "loss": 0.0235,
      "step": 595020
    },
    {
      "epoch": 0.9737960108141369,
      "grad_norm": 0.9553026556968689,
      "learning_rate": 8.105467076955517e-06,
      "loss": 0.0263,
      "step": 595040
    },
    {
      "epoch": 0.9738287412527903,
      "grad_norm": 2.5974442958831787,
      "learning_rate": 8.105401184742e-06,
      "loss": 0.0284,
      "step": 595060
    },
    {
      "epoch": 0.9738614716914437,
      "grad_norm": 1.4248524904251099,
      "learning_rate": 8.105335292528482e-06,
      "loss": 0.0246,
      "step": 595080
    },
    {
      "epoch": 0.9738942021300969,
      "grad_norm": 0.24691292643547058,
      "learning_rate": 8.105269400314966e-06,
      "loss": 0.0237,
      "step": 595100
    },
    {
      "epoch": 0.9739269325687503,
      "grad_norm": 1.628208875656128,
      "learning_rate": 8.105203508101448e-06,
      "loss": 0.0198,
      "step": 595120
    },
    {
      "epoch": 0.9739596630074037,
      "grad_norm": 0.5247026681900024,
      "learning_rate": 8.105137615887931e-06,
      "loss": 0.0237,
      "step": 595140
    },
    {
      "epoch": 0.9739923934460569,
      "grad_norm": 0.8987652659416199,
      "learning_rate": 8.105071723674413e-06,
      "loss": 0.0291,
      "step": 595160
    },
    {
      "epoch": 0.9740251238847103,
      "grad_norm": 0.6422662138938904,
      "learning_rate": 8.105005831460897e-06,
      "loss": 0.0284,
      "step": 595180
    },
    {
      "epoch": 0.9740578543233637,
      "grad_norm": 0.12434326857328415,
      "learning_rate": 8.104939939247379e-06,
      "loss": 0.0257,
      "step": 595200
    },
    {
      "epoch": 0.9740905847620169,
      "grad_norm": 0.2991962432861328,
      "learning_rate": 8.104874047033862e-06,
      "loss": 0.0326,
      "step": 595220
    },
    {
      "epoch": 0.9741233152006703,
      "grad_norm": 1.0518428087234497,
      "learning_rate": 8.104808154820344e-06,
      "loss": 0.0267,
      "step": 595240
    },
    {
      "epoch": 0.9741560456393237,
      "grad_norm": 1.0319141149520874,
      "learning_rate": 8.104742262606828e-06,
      "loss": 0.0327,
      "step": 595260
    },
    {
      "epoch": 0.974188776077977,
      "grad_norm": 0.8414089679718018,
      "learning_rate": 8.104676370393311e-06,
      "loss": 0.0259,
      "step": 595280
    },
    {
      "epoch": 0.9742215065166303,
      "grad_norm": 0.8521518707275391,
      "learning_rate": 8.104610478179793e-06,
      "loss": 0.0238,
      "step": 595300
    },
    {
      "epoch": 0.9742542369552837,
      "grad_norm": 1.8438998460769653,
      "learning_rate": 8.104544585966277e-06,
      "loss": 0.028,
      "step": 595320
    },
    {
      "epoch": 0.974286967393937,
      "grad_norm": 0.46864932775497437,
      "learning_rate": 8.10447869375276e-06,
      "loss": 0.0197,
      "step": 595340
    },
    {
      "epoch": 0.9743196978325903,
      "grad_norm": 0.7196453809738159,
      "learning_rate": 8.104412801539242e-06,
      "loss": 0.0321,
      "step": 595360
    },
    {
      "epoch": 0.9743524282712437,
      "grad_norm": 2.467099189758301,
      "learning_rate": 8.104346909325726e-06,
      "loss": 0.0278,
      "step": 595380
    },
    {
      "epoch": 0.9743851587098971,
      "grad_norm": 0.7948815822601318,
      "learning_rate": 8.10428101711221e-06,
      "loss": 0.0288,
      "step": 595400
    },
    {
      "epoch": 0.9744178891485503,
      "grad_norm": 1.3457492589950562,
      "learning_rate": 8.104215124898691e-06,
      "loss": 0.0226,
      "step": 595420
    },
    {
      "epoch": 0.9744506195872037,
      "grad_norm": 1.8727669715881348,
      "learning_rate": 8.104149232685175e-06,
      "loss": 0.026,
      "step": 595440
    },
    {
      "epoch": 0.9744833500258571,
      "grad_norm": 0.9434325098991394,
      "learning_rate": 8.104083340471657e-06,
      "loss": 0.0278,
      "step": 595460
    },
    {
      "epoch": 0.9745160804645104,
      "grad_norm": 0.34126460552215576,
      "learning_rate": 8.10401744825814e-06,
      "loss": 0.0218,
      "step": 595480
    },
    {
      "epoch": 0.9745488109031637,
      "grad_norm": 1.2997318506240845,
      "learning_rate": 8.103951556044622e-06,
      "loss": 0.0248,
      "step": 595500
    },
    {
      "epoch": 0.9745815413418171,
      "grad_norm": 1.3841466903686523,
      "learning_rate": 8.103885663831106e-06,
      "loss": 0.0396,
      "step": 595520
    },
    {
      "epoch": 0.9746142717804704,
      "grad_norm": 0.29123130440711975,
      "learning_rate": 8.103819771617588e-06,
      "loss": 0.0338,
      "step": 595540
    },
    {
      "epoch": 0.9746470022191237,
      "grad_norm": 0.7624942064285278,
      "learning_rate": 8.103753879404071e-06,
      "loss": 0.0314,
      "step": 595560
    },
    {
      "epoch": 0.9746797326577771,
      "grad_norm": 0.24715903401374817,
      "learning_rate": 8.103687987190553e-06,
      "loss": 0.0263,
      "step": 595580
    },
    {
      "epoch": 0.9747124630964304,
      "grad_norm": 0.42106255888938904,
      "learning_rate": 8.103622094977037e-06,
      "loss": 0.0301,
      "step": 595600
    },
    {
      "epoch": 0.9747451935350837,
      "grad_norm": 0.8388290405273438,
      "learning_rate": 8.10355620276352e-06,
      "loss": 0.0269,
      "step": 595620
    },
    {
      "epoch": 0.9747779239737371,
      "grad_norm": 1.446484923362732,
      "learning_rate": 8.103490310550002e-06,
      "loss": 0.0314,
      "step": 595640
    },
    {
      "epoch": 0.9748106544123905,
      "grad_norm": 1.055936336517334,
      "learning_rate": 8.103424418336486e-06,
      "loss": 0.0424,
      "step": 595660
    },
    {
      "epoch": 0.9748433848510438,
      "grad_norm": 0.7455595135688782,
      "learning_rate": 8.103358526122968e-06,
      "loss": 0.0225,
      "step": 595680
    },
    {
      "epoch": 0.9748761152896971,
      "grad_norm": 0.9990159273147583,
      "learning_rate": 8.103292633909451e-06,
      "loss": 0.0346,
      "step": 595700
    },
    {
      "epoch": 0.9749088457283505,
      "grad_norm": 1.328074336051941,
      "learning_rate": 8.103226741695935e-06,
      "loss": 0.0383,
      "step": 595720
    },
    {
      "epoch": 0.9749415761670038,
      "grad_norm": 4.001338005065918,
      "learning_rate": 8.103160849482417e-06,
      "loss": 0.0294,
      "step": 595740
    },
    {
      "epoch": 0.9749743066056571,
      "grad_norm": 1.04349684715271,
      "learning_rate": 8.1030949572689e-06,
      "loss": 0.0247,
      "step": 595760
    },
    {
      "epoch": 0.9750070370443105,
      "grad_norm": 2.0351576805114746,
      "learning_rate": 8.103029065055384e-06,
      "loss": 0.0316,
      "step": 595780
    },
    {
      "epoch": 0.9750397674829638,
      "grad_norm": 1.0279065370559692,
      "learning_rate": 8.102963172841866e-06,
      "loss": 0.028,
      "step": 595800
    },
    {
      "epoch": 0.9750724979216171,
      "grad_norm": 1.3899341821670532,
      "learning_rate": 8.10289728062835e-06,
      "loss": 0.0205,
      "step": 595820
    },
    {
      "epoch": 0.9751052283602705,
      "grad_norm": 0.6461832523345947,
      "learning_rate": 8.102831388414832e-06,
      "loss": 0.0255,
      "step": 595840
    },
    {
      "epoch": 0.9751379587989238,
      "grad_norm": 0.4321225583553314,
      "learning_rate": 8.102765496201315e-06,
      "loss": 0.0172,
      "step": 595860
    },
    {
      "epoch": 0.9751706892375772,
      "grad_norm": 0.9602458477020264,
      "learning_rate": 8.102699603987797e-06,
      "loss": 0.0326,
      "step": 595880
    },
    {
      "epoch": 0.9752034196762305,
      "grad_norm": 1.0182549953460693,
      "learning_rate": 8.10263371177428e-06,
      "loss": 0.0207,
      "step": 595900
    },
    {
      "epoch": 0.9752361501148838,
      "grad_norm": 0.9232617616653442,
      "learning_rate": 8.102567819560762e-06,
      "loss": 0.0301,
      "step": 595920
    },
    {
      "epoch": 0.9752688805535372,
      "grad_norm": 1.358214020729065,
      "learning_rate": 8.102501927347246e-06,
      "loss": 0.0235,
      "step": 595940
    },
    {
      "epoch": 0.9753016109921905,
      "grad_norm": 1.1450620889663696,
      "learning_rate": 8.10243603513373e-06,
      "loss": 0.0382,
      "step": 595960
    },
    {
      "epoch": 0.9753343414308439,
      "grad_norm": 0.6034504175186157,
      "learning_rate": 8.102370142920212e-06,
      "loss": 0.0204,
      "step": 595980
    },
    {
      "epoch": 0.9753670718694972,
      "grad_norm": 0.7468099594116211,
      "learning_rate": 8.102304250706695e-06,
      "loss": 0.0314,
      "step": 596000
    },
    {
      "epoch": 0.9753998023081505,
      "grad_norm": 0.6014844179153442,
      "learning_rate": 8.102238358493177e-06,
      "loss": 0.0302,
      "step": 596020
    },
    {
      "epoch": 0.9754325327468039,
      "grad_norm": 0.5201706290245056,
      "learning_rate": 8.10217246627966e-06,
      "loss": 0.0351,
      "step": 596040
    },
    {
      "epoch": 0.9754652631854572,
      "grad_norm": 1.574593424797058,
      "learning_rate": 8.102106574066143e-06,
      "loss": 0.0293,
      "step": 596060
    },
    {
      "epoch": 0.9754979936241106,
      "grad_norm": 1.0264133214950562,
      "learning_rate": 8.102040681852626e-06,
      "loss": 0.0347,
      "step": 596080
    },
    {
      "epoch": 0.9755307240627639,
      "grad_norm": 1.067238450050354,
      "learning_rate": 8.101974789639108e-06,
      "loss": 0.0296,
      "step": 596100
    },
    {
      "epoch": 0.9755634545014172,
      "grad_norm": 0.5597115755081177,
      "learning_rate": 8.101908897425592e-06,
      "loss": 0.0245,
      "step": 596120
    },
    {
      "epoch": 0.9755961849400706,
      "grad_norm": 0.7841558456420898,
      "learning_rate": 8.101843005212075e-06,
      "loss": 0.0222,
      "step": 596140
    },
    {
      "epoch": 0.9756289153787239,
      "grad_norm": 0.6477239727973938,
      "learning_rate": 8.101777112998557e-06,
      "loss": 0.0191,
      "step": 596160
    },
    {
      "epoch": 0.9756616458173772,
      "grad_norm": 1.2935055494308472,
      "learning_rate": 8.10171122078504e-06,
      "loss": 0.0328,
      "step": 596180
    },
    {
      "epoch": 0.9756943762560306,
      "grad_norm": 1.2183911800384521,
      "learning_rate": 8.101645328571524e-06,
      "loss": 0.0214,
      "step": 596200
    },
    {
      "epoch": 0.9757271066946839,
      "grad_norm": 0.4681936502456665,
      "learning_rate": 8.101579436358006e-06,
      "loss": 0.0204,
      "step": 596220
    },
    {
      "epoch": 0.9757598371333372,
      "grad_norm": 0.2745567262172699,
      "learning_rate": 8.10151354414449e-06,
      "loss": 0.0191,
      "step": 596240
    },
    {
      "epoch": 0.9757925675719906,
      "grad_norm": 0.5620705485343933,
      "learning_rate": 8.101447651930972e-06,
      "loss": 0.0224,
      "step": 596260
    },
    {
      "epoch": 0.975825298010644,
      "grad_norm": 0.2280867099761963,
      "learning_rate": 8.101381759717455e-06,
      "loss": 0.0241,
      "step": 596280
    },
    {
      "epoch": 0.9758580284492973,
      "grad_norm": 0.4580997824668884,
      "learning_rate": 8.101315867503937e-06,
      "loss": 0.0307,
      "step": 596300
    },
    {
      "epoch": 0.9758907588879506,
      "grad_norm": 0.9990136623382568,
      "learning_rate": 8.10124997529042e-06,
      "loss": 0.0259,
      "step": 596320
    },
    {
      "epoch": 0.975923489326604,
      "grad_norm": 0.7691787481307983,
      "learning_rate": 8.101184083076904e-06,
      "loss": 0.0325,
      "step": 596340
    },
    {
      "epoch": 0.9759562197652573,
      "grad_norm": 1.7504684925079346,
      "learning_rate": 8.101118190863386e-06,
      "loss": 0.0198,
      "step": 596360
    },
    {
      "epoch": 0.9759889502039106,
      "grad_norm": 0.7619416117668152,
      "learning_rate": 8.10105229864987e-06,
      "loss": 0.021,
      "step": 596380
    },
    {
      "epoch": 0.976021680642564,
      "grad_norm": 1.5595765113830566,
      "learning_rate": 8.100986406436352e-06,
      "loss": 0.0304,
      "step": 596400
    },
    {
      "epoch": 0.9760544110812173,
      "grad_norm": 3.129178047180176,
      "learning_rate": 8.100920514222835e-06,
      "loss": 0.027,
      "step": 596420
    },
    {
      "epoch": 0.9760871415198706,
      "grad_norm": 1.08138108253479,
      "learning_rate": 8.100854622009317e-06,
      "loss": 0.0256,
      "step": 596440
    },
    {
      "epoch": 0.976119871958524,
      "grad_norm": 0.8474756479263306,
      "learning_rate": 8.1007887297958e-06,
      "loss": 0.0205,
      "step": 596460
    },
    {
      "epoch": 0.9761526023971774,
      "grad_norm": 0.37759682536125183,
      "learning_rate": 8.100722837582283e-06,
      "loss": 0.0257,
      "step": 596480
    },
    {
      "epoch": 0.9761853328358306,
      "grad_norm": 1.853650689125061,
      "learning_rate": 8.100656945368766e-06,
      "loss": 0.0362,
      "step": 596500
    },
    {
      "epoch": 0.976218063274484,
      "grad_norm": 0.3434738516807556,
      "learning_rate": 8.10059105315525e-06,
      "loss": 0.0246,
      "step": 596520
    },
    {
      "epoch": 0.9762507937131374,
      "grad_norm": 0.09900765120983124,
      "learning_rate": 8.100525160941732e-06,
      "loss": 0.022,
      "step": 596540
    },
    {
      "epoch": 0.9762835241517906,
      "grad_norm": 0.7041327357292175,
      "learning_rate": 8.100459268728215e-06,
      "loss": 0.0264,
      "step": 596560
    },
    {
      "epoch": 0.976316254590444,
      "grad_norm": 1.3558205366134644,
      "learning_rate": 8.100393376514699e-06,
      "loss": 0.0395,
      "step": 596580
    },
    {
      "epoch": 0.9763489850290974,
      "grad_norm": 1.9289814233779907,
      "learning_rate": 8.10032748430118e-06,
      "loss": 0.0283,
      "step": 596600
    },
    {
      "epoch": 0.9763817154677507,
      "grad_norm": 1.7768882513046265,
      "learning_rate": 8.100261592087664e-06,
      "loss": 0.0317,
      "step": 596620
    },
    {
      "epoch": 0.976414445906404,
      "grad_norm": 0.7345539927482605,
      "learning_rate": 8.100195699874146e-06,
      "loss": 0.032,
      "step": 596640
    },
    {
      "epoch": 0.9764471763450574,
      "grad_norm": 0.3949887752532959,
      "learning_rate": 8.10012980766063e-06,
      "loss": 0.0277,
      "step": 596660
    },
    {
      "epoch": 0.9764799067837107,
      "grad_norm": 1.4577040672302246,
      "learning_rate": 8.100063915447113e-06,
      "loss": 0.0238,
      "step": 596680
    },
    {
      "epoch": 0.976512637222364,
      "grad_norm": 0.09214033931493759,
      "learning_rate": 8.099998023233595e-06,
      "loss": 0.0201,
      "step": 596700
    },
    {
      "epoch": 0.9765453676610174,
      "grad_norm": 1.124902606010437,
      "learning_rate": 8.099932131020079e-06,
      "loss": 0.025,
      "step": 596720
    },
    {
      "epoch": 0.9765780980996708,
      "grad_norm": 0.2236122190952301,
      "learning_rate": 8.09986623880656e-06,
      "loss": 0.026,
      "step": 596740
    },
    {
      "epoch": 0.976610828538324,
      "grad_norm": 1.1487833261489868,
      "learning_rate": 8.099800346593044e-06,
      "loss": 0.0238,
      "step": 596760
    },
    {
      "epoch": 0.9766435589769774,
      "grad_norm": 2.636711597442627,
      "learning_rate": 8.099734454379526e-06,
      "loss": 0.0237,
      "step": 596780
    },
    {
      "epoch": 0.9766762894156308,
      "grad_norm": 0.7860778570175171,
      "learning_rate": 8.09966856216601e-06,
      "loss": 0.033,
      "step": 596800
    },
    {
      "epoch": 0.976709019854284,
      "grad_norm": 0.8613466024398804,
      "learning_rate": 8.099602669952492e-06,
      "loss": 0.0251,
      "step": 596820
    },
    {
      "epoch": 0.9767417502929374,
      "grad_norm": 0.9427706599235535,
      "learning_rate": 8.099536777738975e-06,
      "loss": 0.0371,
      "step": 596840
    },
    {
      "epoch": 0.9767744807315908,
      "grad_norm": 0.43237146735191345,
      "learning_rate": 8.099470885525457e-06,
      "loss": 0.0197,
      "step": 596860
    },
    {
      "epoch": 0.976807211170244,
      "grad_norm": 0.6705803871154785,
      "learning_rate": 8.09940499331194e-06,
      "loss": 0.0212,
      "step": 596880
    },
    {
      "epoch": 0.9768399416088974,
      "grad_norm": 0.8764396905899048,
      "learning_rate": 8.099339101098423e-06,
      "loss": 0.0169,
      "step": 596900
    },
    {
      "epoch": 0.9768726720475508,
      "grad_norm": 0.3278566896915436,
      "learning_rate": 8.099273208884906e-06,
      "loss": 0.0318,
      "step": 596920
    },
    {
      "epoch": 0.9769054024862042,
      "grad_norm": 0.4955238699913025,
      "learning_rate": 8.09920731667139e-06,
      "loss": 0.0195,
      "step": 596940
    },
    {
      "epoch": 0.9769381329248574,
      "grad_norm": 0.7272984981536865,
      "learning_rate": 8.099141424457872e-06,
      "loss": 0.0217,
      "step": 596960
    },
    {
      "epoch": 0.9769708633635108,
      "grad_norm": 0.49757033586502075,
      "learning_rate": 8.099075532244355e-06,
      "loss": 0.0208,
      "step": 596980
    },
    {
      "epoch": 0.9770035938021642,
      "grad_norm": 1.631013035774231,
      "learning_rate": 8.099009640030839e-06,
      "loss": 0.0202,
      "step": 597000
    },
    {
      "epoch": 0.9770363242408174,
      "grad_norm": 1.1664540767669678,
      "learning_rate": 8.09894374781732e-06,
      "loss": 0.0326,
      "step": 597020
    },
    {
      "epoch": 0.9770690546794708,
      "grad_norm": 0.5775046348571777,
      "learning_rate": 8.098877855603804e-06,
      "loss": 0.0365,
      "step": 597040
    },
    {
      "epoch": 0.9771017851181242,
      "grad_norm": 0.870503306388855,
      "learning_rate": 8.098811963390288e-06,
      "loss": 0.0372,
      "step": 597060
    },
    {
      "epoch": 0.9771345155567774,
      "grad_norm": 0.1835622787475586,
      "learning_rate": 8.09874607117677e-06,
      "loss": 0.0301,
      "step": 597080
    },
    {
      "epoch": 0.9771672459954308,
      "grad_norm": 0.5347439050674438,
      "learning_rate": 8.098680178963253e-06,
      "loss": 0.0202,
      "step": 597100
    },
    {
      "epoch": 0.9771999764340842,
      "grad_norm": 1.5345128774642944,
      "learning_rate": 8.098614286749735e-06,
      "loss": 0.0327,
      "step": 597120
    },
    {
      "epoch": 0.9772327068727376,
      "grad_norm": 1.095062017440796,
      "learning_rate": 8.098548394536219e-06,
      "loss": 0.0337,
      "step": 597140
    },
    {
      "epoch": 0.9772654373113908,
      "grad_norm": 0.4474034607410431,
      "learning_rate": 8.0984825023227e-06,
      "loss": 0.0242,
      "step": 597160
    },
    {
      "epoch": 0.9772981677500442,
      "grad_norm": 0.532600998878479,
      "learning_rate": 8.098416610109184e-06,
      "loss": 0.0276,
      "step": 597180
    },
    {
      "epoch": 0.9773308981886976,
      "grad_norm": 0.5063484311103821,
      "learning_rate": 8.098350717895666e-06,
      "loss": 0.024,
      "step": 597200
    },
    {
      "epoch": 0.9773636286273508,
      "grad_norm": 2.06008243560791,
      "learning_rate": 8.09828482568215e-06,
      "loss": 0.0223,
      "step": 597220
    },
    {
      "epoch": 0.9773963590660042,
      "grad_norm": 1.2994312047958374,
      "learning_rate": 8.098218933468632e-06,
      "loss": 0.0344,
      "step": 597240
    },
    {
      "epoch": 0.9774290895046576,
      "grad_norm": 0.2346024364233017,
      "learning_rate": 8.098153041255115e-06,
      "loss": 0.0248,
      "step": 597260
    },
    {
      "epoch": 0.9774618199433108,
      "grad_norm": 1.1573187112808228,
      "learning_rate": 8.098087149041597e-06,
      "loss": 0.0295,
      "step": 597280
    },
    {
      "epoch": 0.9774945503819642,
      "grad_norm": 0.5697221159934998,
      "learning_rate": 8.09802125682808e-06,
      "loss": 0.0165,
      "step": 597300
    },
    {
      "epoch": 0.9775272808206176,
      "grad_norm": 1.1733176708221436,
      "learning_rate": 8.097955364614564e-06,
      "loss": 0.0295,
      "step": 597320
    },
    {
      "epoch": 0.977560011259271,
      "grad_norm": 0.9363771080970764,
      "learning_rate": 8.097889472401046e-06,
      "loss": 0.0189,
      "step": 597340
    },
    {
      "epoch": 0.9775927416979242,
      "grad_norm": 1.209478497505188,
      "learning_rate": 8.09782358018753e-06,
      "loss": 0.0294,
      "step": 597360
    },
    {
      "epoch": 0.9776254721365776,
      "grad_norm": 0.7151874899864197,
      "learning_rate": 8.097757687974013e-06,
      "loss": 0.0428,
      "step": 597380
    },
    {
      "epoch": 0.977658202575231,
      "grad_norm": 1.4369703531265259,
      "learning_rate": 8.097691795760495e-06,
      "loss": 0.0297,
      "step": 597400
    },
    {
      "epoch": 0.9776909330138842,
      "grad_norm": 1.2225580215454102,
      "learning_rate": 8.097625903546979e-06,
      "loss": 0.0304,
      "step": 597420
    },
    {
      "epoch": 0.9777236634525376,
      "grad_norm": 1.1710219383239746,
      "learning_rate": 8.097560011333463e-06,
      "loss": 0.0284,
      "step": 597440
    },
    {
      "epoch": 0.977756393891191,
      "grad_norm": 0.38417771458625793,
      "learning_rate": 8.097494119119944e-06,
      "loss": 0.0205,
      "step": 597460
    },
    {
      "epoch": 0.9777891243298442,
      "grad_norm": 1.0921580791473389,
      "learning_rate": 8.097428226906428e-06,
      "loss": 0.0272,
      "step": 597480
    },
    {
      "epoch": 0.9778218547684976,
      "grad_norm": 0.8205825090408325,
      "learning_rate": 8.09736233469291e-06,
      "loss": 0.0249,
      "step": 597500
    },
    {
      "epoch": 0.977854585207151,
      "grad_norm": 1.0977922677993774,
      "learning_rate": 8.097296442479394e-06,
      "loss": 0.0267,
      "step": 597520
    },
    {
      "epoch": 0.9778873156458043,
      "grad_norm": 1.4711675643920898,
      "learning_rate": 8.097230550265875e-06,
      "loss": 0.0301,
      "step": 597540
    },
    {
      "epoch": 0.9779200460844576,
      "grad_norm": 0.7501368522644043,
      "learning_rate": 8.097164658052359e-06,
      "loss": 0.0306,
      "step": 597560
    },
    {
      "epoch": 0.977952776523111,
      "grad_norm": 1.2015860080718994,
      "learning_rate": 8.097098765838841e-06,
      "loss": 0.0279,
      "step": 597580
    },
    {
      "epoch": 0.9779855069617643,
      "grad_norm": 0.9924634099006653,
      "learning_rate": 8.097032873625324e-06,
      "loss": 0.0261,
      "step": 597600
    },
    {
      "epoch": 0.9780182374004176,
      "grad_norm": 1.909675121307373,
      "learning_rate": 8.096966981411806e-06,
      "loss": 0.0333,
      "step": 597620
    },
    {
      "epoch": 0.978050967839071,
      "grad_norm": 0.8220877051353455,
      "learning_rate": 8.09690108919829e-06,
      "loss": 0.0244,
      "step": 597640
    },
    {
      "epoch": 0.9780836982777243,
      "grad_norm": 0.5618101358413696,
      "learning_rate": 8.096835196984772e-06,
      "loss": 0.0243,
      "step": 597660
    },
    {
      "epoch": 0.9781164287163776,
      "grad_norm": 0.3798353970050812,
      "learning_rate": 8.096769304771255e-06,
      "loss": 0.0236,
      "step": 597680
    },
    {
      "epoch": 0.978149159155031,
      "grad_norm": 0.27618905901908875,
      "learning_rate": 8.096703412557739e-06,
      "loss": 0.0271,
      "step": 597700
    },
    {
      "epoch": 0.9781818895936844,
      "grad_norm": 2.1611175537109375,
      "learning_rate": 8.096637520344221e-06,
      "loss": 0.0305,
      "step": 597720
    },
    {
      "epoch": 0.9782146200323377,
      "grad_norm": 0.16097941994667053,
      "learning_rate": 8.096571628130704e-06,
      "loss": 0.0183,
      "step": 597740
    },
    {
      "epoch": 0.978247350470991,
      "grad_norm": 0.9088170528411865,
      "learning_rate": 8.096505735917188e-06,
      "loss": 0.0259,
      "step": 597760
    },
    {
      "epoch": 0.9782800809096444,
      "grad_norm": 0.8072405457496643,
      "learning_rate": 8.09643984370367e-06,
      "loss": 0.0347,
      "step": 597780
    },
    {
      "epoch": 0.9783128113482977,
      "grad_norm": 0.3458361327648163,
      "learning_rate": 8.096373951490154e-06,
      "loss": 0.0327,
      "step": 597800
    },
    {
      "epoch": 0.978345541786951,
      "grad_norm": 1.9064691066741943,
      "learning_rate": 8.096308059276637e-06,
      "loss": 0.0344,
      "step": 597820
    },
    {
      "epoch": 0.9783782722256044,
      "grad_norm": 0.19753792881965637,
      "learning_rate": 8.096242167063119e-06,
      "loss": 0.0197,
      "step": 597840
    },
    {
      "epoch": 0.9784110026642577,
      "grad_norm": 0.9899634718894958,
      "learning_rate": 8.096176274849603e-06,
      "loss": 0.0216,
      "step": 597860
    },
    {
      "epoch": 0.978443733102911,
      "grad_norm": 1.4015331268310547,
      "learning_rate": 8.096110382636085e-06,
      "loss": 0.0294,
      "step": 597880
    },
    {
      "epoch": 0.9784764635415644,
      "grad_norm": 0.24184364080429077,
      "learning_rate": 8.096044490422568e-06,
      "loss": 0.0276,
      "step": 597900
    },
    {
      "epoch": 0.9785091939802177,
      "grad_norm": 1.310836672782898,
      "learning_rate": 8.09597859820905e-06,
      "loss": 0.0277,
      "step": 597920
    },
    {
      "epoch": 0.9785419244188711,
      "grad_norm": 0.9371964335441589,
      "learning_rate": 8.095912705995534e-06,
      "loss": 0.0301,
      "step": 597940
    },
    {
      "epoch": 0.9785746548575244,
      "grad_norm": 1.3387848138809204,
      "learning_rate": 8.095846813782015e-06,
      "loss": 0.0279,
      "step": 597960
    },
    {
      "epoch": 0.9786073852961777,
      "grad_norm": 0.4994441270828247,
      "learning_rate": 8.095780921568499e-06,
      "loss": 0.0203,
      "step": 597980
    },
    {
      "epoch": 0.9786401157348311,
      "grad_norm": 1.038642168045044,
      "learning_rate": 8.095715029354981e-06,
      "loss": 0.023,
      "step": 598000
    },
    {
      "epoch": 0.9786728461734844,
      "grad_norm": 0.8179655075073242,
      "learning_rate": 8.095649137141465e-06,
      "loss": 0.0371,
      "step": 598020
    },
    {
      "epoch": 0.9787055766121378,
      "grad_norm": 1.2357770204544067,
      "learning_rate": 8.095583244927946e-06,
      "loss": 0.0315,
      "step": 598040
    },
    {
      "epoch": 0.9787383070507911,
      "grad_norm": 0.4842040538787842,
      "learning_rate": 8.09551735271443e-06,
      "loss": 0.0281,
      "step": 598060
    },
    {
      "epoch": 0.9787710374894444,
      "grad_norm": 0.25302064418792725,
      "learning_rate": 8.095451460500914e-06,
      "loss": 0.0205,
      "step": 598080
    },
    {
      "epoch": 0.9788037679280978,
      "grad_norm": 0.19348376989364624,
      "learning_rate": 8.095385568287396e-06,
      "loss": 0.0375,
      "step": 598100
    },
    {
      "epoch": 0.9788364983667511,
      "grad_norm": 1.4619629383087158,
      "learning_rate": 8.095319676073879e-06,
      "loss": 0.0166,
      "step": 598120
    },
    {
      "epoch": 0.9788692288054045,
      "grad_norm": 0.5099601149559021,
      "learning_rate": 8.095253783860361e-06,
      "loss": 0.0327,
      "step": 598140
    },
    {
      "epoch": 0.9789019592440578,
      "grad_norm": 0.4261626601219177,
      "learning_rate": 8.095187891646845e-06,
      "loss": 0.026,
      "step": 598160
    },
    {
      "epoch": 0.9789346896827111,
      "grad_norm": 1.8515819311141968,
      "learning_rate": 8.095121999433328e-06,
      "loss": 0.0317,
      "step": 598180
    },
    {
      "epoch": 0.9789674201213645,
      "grad_norm": 1.3441637754440308,
      "learning_rate": 8.09505610721981e-06,
      "loss": 0.0303,
      "step": 598200
    },
    {
      "epoch": 0.9790001505600178,
      "grad_norm": 0.9790938496589661,
      "learning_rate": 8.094990215006294e-06,
      "loss": 0.0309,
      "step": 598220
    },
    {
      "epoch": 0.9790328809986711,
      "grad_norm": 2.38374924659729,
      "learning_rate": 8.094924322792777e-06,
      "loss": 0.0347,
      "step": 598240
    },
    {
      "epoch": 0.9790656114373245,
      "grad_norm": 0.3383873999118805,
      "learning_rate": 8.094858430579259e-06,
      "loss": 0.0214,
      "step": 598260
    },
    {
      "epoch": 0.9790983418759778,
      "grad_norm": 0.3780117630958557,
      "learning_rate": 8.094792538365743e-06,
      "loss": 0.0192,
      "step": 598280
    },
    {
      "epoch": 0.9791310723146311,
      "grad_norm": 0.4491097033023834,
      "learning_rate": 8.094726646152225e-06,
      "loss": 0.0255,
      "step": 598300
    },
    {
      "epoch": 0.9791638027532845,
      "grad_norm": 0.2725990116596222,
      "learning_rate": 8.094660753938708e-06,
      "loss": 0.0318,
      "step": 598320
    },
    {
      "epoch": 0.9791965331919379,
      "grad_norm": 1.2270737886428833,
      "learning_rate": 8.09459486172519e-06,
      "loss": 0.0356,
      "step": 598340
    },
    {
      "epoch": 0.9792292636305912,
      "grad_norm": 0.8901687860488892,
      "learning_rate": 8.094528969511674e-06,
      "loss": 0.0268,
      "step": 598360
    },
    {
      "epoch": 0.9792619940692445,
      "grad_norm": 0.6557226181030273,
      "learning_rate": 8.094463077298156e-06,
      "loss": 0.0389,
      "step": 598380
    },
    {
      "epoch": 0.9792947245078979,
      "grad_norm": 2.3519511222839355,
      "learning_rate": 8.094397185084639e-06,
      "loss": 0.0183,
      "step": 598400
    },
    {
      "epoch": 0.9793274549465512,
      "grad_norm": 0.6433534026145935,
      "learning_rate": 8.094331292871123e-06,
      "loss": 0.0284,
      "step": 598420
    },
    {
      "epoch": 0.9793601853852045,
      "grad_norm": 3.427110433578491,
      "learning_rate": 8.094265400657605e-06,
      "loss": 0.0194,
      "step": 598440
    },
    {
      "epoch": 0.9793929158238579,
      "grad_norm": 0.6986231803894043,
      "learning_rate": 8.094199508444088e-06,
      "loss": 0.0235,
      "step": 598460
    },
    {
      "epoch": 0.9794256462625112,
      "grad_norm": 0.7942545413970947,
      "learning_rate": 8.09413361623057e-06,
      "loss": 0.0265,
      "step": 598480
    },
    {
      "epoch": 0.9794583767011645,
      "grad_norm": 0.5769575834274292,
      "learning_rate": 8.094067724017054e-06,
      "loss": 0.0299,
      "step": 598500
    },
    {
      "epoch": 0.9794911071398179,
      "grad_norm": 0.3757363557815552,
      "learning_rate": 8.094001831803536e-06,
      "loss": 0.0282,
      "step": 598520
    },
    {
      "epoch": 0.9795238375784713,
      "grad_norm": 0.3548974394798279,
      "learning_rate": 8.09393593959002e-06,
      "loss": 0.0287,
      "step": 598540
    },
    {
      "epoch": 0.9795565680171245,
      "grad_norm": 1.0380183458328247,
      "learning_rate": 8.093870047376503e-06,
      "loss": 0.0357,
      "step": 598560
    },
    {
      "epoch": 0.9795892984557779,
      "grad_norm": 1.079972267150879,
      "learning_rate": 8.093804155162985e-06,
      "loss": 0.023,
      "step": 598580
    },
    {
      "epoch": 0.9796220288944313,
      "grad_norm": 0.09183119982481003,
      "learning_rate": 8.093738262949468e-06,
      "loss": 0.0296,
      "step": 598600
    },
    {
      "epoch": 0.9796547593330845,
      "grad_norm": 1.0494506359100342,
      "learning_rate": 8.093672370735952e-06,
      "loss": 0.0238,
      "step": 598620
    },
    {
      "epoch": 0.9796874897717379,
      "grad_norm": 0.5503877997398376,
      "learning_rate": 8.093606478522434e-06,
      "loss": 0.0196,
      "step": 598640
    },
    {
      "epoch": 0.9797202202103913,
      "grad_norm": 0.30059266090393066,
      "learning_rate": 8.093540586308917e-06,
      "loss": 0.0342,
      "step": 598660
    },
    {
      "epoch": 0.9797529506490446,
      "grad_norm": 0.3717862069606781,
      "learning_rate": 8.0934746940954e-06,
      "loss": 0.0394,
      "step": 598680
    },
    {
      "epoch": 0.9797856810876979,
      "grad_norm": 2.2529571056365967,
      "learning_rate": 8.093408801881883e-06,
      "loss": 0.0329,
      "step": 598700
    },
    {
      "epoch": 0.9798184115263513,
      "grad_norm": 1.0316531658172607,
      "learning_rate": 8.093342909668365e-06,
      "loss": 0.0205,
      "step": 598720
    },
    {
      "epoch": 0.9798511419650047,
      "grad_norm": 0.46016496419906616,
      "learning_rate": 8.093277017454848e-06,
      "loss": 0.0106,
      "step": 598740
    },
    {
      "epoch": 0.9798838724036579,
      "grad_norm": 1.4324702024459839,
      "learning_rate": 8.09321112524133e-06,
      "loss": 0.0291,
      "step": 598760
    },
    {
      "epoch": 0.9799166028423113,
      "grad_norm": 1.9970614910125732,
      "learning_rate": 8.093145233027814e-06,
      "loss": 0.0247,
      "step": 598780
    },
    {
      "epoch": 0.9799493332809647,
      "grad_norm": 0.30975058674812317,
      "learning_rate": 8.093079340814297e-06,
      "loss": 0.029,
      "step": 598800
    },
    {
      "epoch": 0.9799820637196179,
      "grad_norm": 0.4827158451080322,
      "learning_rate": 8.09301344860078e-06,
      "loss": 0.0243,
      "step": 598820
    },
    {
      "epoch": 0.9800147941582713,
      "grad_norm": 4.2802605628967285,
      "learning_rate": 8.092947556387263e-06,
      "loss": 0.0309,
      "step": 598840
    },
    {
      "epoch": 0.9800475245969247,
      "grad_norm": 0.26574328541755676,
      "learning_rate": 8.092881664173745e-06,
      "loss": 0.0192,
      "step": 598860
    },
    {
      "epoch": 0.9800802550355779,
      "grad_norm": 1.0113626718521118,
      "learning_rate": 8.092815771960228e-06,
      "loss": 0.0212,
      "step": 598880
    },
    {
      "epoch": 0.9801129854742313,
      "grad_norm": 0.777635395526886,
      "learning_rate": 8.09274987974671e-06,
      "loss": 0.0307,
      "step": 598900
    },
    {
      "epoch": 0.9801457159128847,
      "grad_norm": 0.7771590948104858,
      "learning_rate": 8.092683987533194e-06,
      "loss": 0.0285,
      "step": 598920
    },
    {
      "epoch": 0.9801784463515381,
      "grad_norm": 2.559797525405884,
      "learning_rate": 8.092618095319676e-06,
      "loss": 0.03,
      "step": 598940
    },
    {
      "epoch": 0.9802111767901913,
      "grad_norm": 0.8634541034698486,
      "learning_rate": 8.09255220310616e-06,
      "loss": 0.0258,
      "step": 598960
    },
    {
      "epoch": 0.9802439072288447,
      "grad_norm": 2.046642541885376,
      "learning_rate": 8.092486310892643e-06,
      "loss": 0.0339,
      "step": 598980
    },
    {
      "epoch": 0.9802766376674981,
      "grad_norm": 1.3609756231307983,
      "learning_rate": 8.092420418679125e-06,
      "loss": 0.0257,
      "step": 599000
    },
    {
      "epoch": 0.9803093681061513,
      "grad_norm": 2.9141957759857178,
      "learning_rate": 8.092354526465608e-06,
      "loss": 0.0254,
      "step": 599020
    },
    {
      "epoch": 0.9803420985448047,
      "grad_norm": 3.1121909618377686,
      "learning_rate": 8.092288634252092e-06,
      "loss": 0.0248,
      "step": 599040
    },
    {
      "epoch": 0.9803748289834581,
      "grad_norm": 0.4284226894378662,
      "learning_rate": 8.092222742038574e-06,
      "loss": 0.0318,
      "step": 599060
    },
    {
      "epoch": 0.9804075594221113,
      "grad_norm": 0.26177072525024414,
      "learning_rate": 8.092156849825057e-06,
      "loss": 0.0256,
      "step": 599080
    },
    {
      "epoch": 0.9804402898607647,
      "grad_norm": 0.8092448711395264,
      "learning_rate": 8.09209095761154e-06,
      "loss": 0.0264,
      "step": 599100
    },
    {
      "epoch": 0.9804730202994181,
      "grad_norm": 1.709818959236145,
      "learning_rate": 8.092025065398023e-06,
      "loss": 0.0279,
      "step": 599120
    },
    {
      "epoch": 0.9805057507380714,
      "grad_norm": 0.3851005434989929,
      "learning_rate": 8.091959173184506e-06,
      "loss": 0.0254,
      "step": 599140
    },
    {
      "epoch": 0.9805384811767247,
      "grad_norm": 0.6766605973243713,
      "learning_rate": 8.091893280970988e-06,
      "loss": 0.0146,
      "step": 599160
    },
    {
      "epoch": 0.9805712116153781,
      "grad_norm": 0.8517314791679382,
      "learning_rate": 8.091827388757472e-06,
      "loss": 0.0249,
      "step": 599180
    },
    {
      "epoch": 0.9806039420540315,
      "grad_norm": 0.7479945421218872,
      "learning_rate": 8.091761496543954e-06,
      "loss": 0.024,
      "step": 599200
    },
    {
      "epoch": 0.9806366724926847,
      "grad_norm": 2.584528684616089,
      "learning_rate": 8.091695604330437e-06,
      "loss": 0.0409,
      "step": 599220
    },
    {
      "epoch": 0.9806694029313381,
      "grad_norm": 0.7081330418586731,
      "learning_rate": 8.09162971211692e-06,
      "loss": 0.0322,
      "step": 599240
    },
    {
      "epoch": 0.9807021333699915,
      "grad_norm": 0.6978440284729004,
      "learning_rate": 8.091563819903403e-06,
      "loss": 0.0365,
      "step": 599260
    },
    {
      "epoch": 0.9807348638086447,
      "grad_norm": 1.6096619367599487,
      "learning_rate": 8.091497927689885e-06,
      "loss": 0.0296,
      "step": 599280
    },
    {
      "epoch": 0.9807675942472981,
      "grad_norm": 0.582886278629303,
      "learning_rate": 8.091432035476368e-06,
      "loss": 0.0344,
      "step": 599300
    },
    {
      "epoch": 0.9808003246859515,
      "grad_norm": 1.278816819190979,
      "learning_rate": 8.09136614326285e-06,
      "loss": 0.0299,
      "step": 599320
    },
    {
      "epoch": 0.9808330551246047,
      "grad_norm": 1.0088634490966797,
      "learning_rate": 8.091300251049334e-06,
      "loss": 0.0359,
      "step": 599340
    },
    {
      "epoch": 0.9808657855632581,
      "grad_norm": 0.7485562562942505,
      "learning_rate": 8.091234358835817e-06,
      "loss": 0.0234,
      "step": 599360
    },
    {
      "epoch": 0.9808985160019115,
      "grad_norm": 0.29957520961761475,
      "learning_rate": 8.0911684666223e-06,
      "loss": 0.0203,
      "step": 599380
    },
    {
      "epoch": 0.9809312464405648,
      "grad_norm": 1.3949639797210693,
      "learning_rate": 8.091102574408783e-06,
      "loss": 0.0309,
      "step": 599400
    },
    {
      "epoch": 0.9809639768792181,
      "grad_norm": 0.9395632147789001,
      "learning_rate": 8.091036682195266e-06,
      "loss": 0.0321,
      "step": 599420
    },
    {
      "epoch": 0.9809967073178715,
      "grad_norm": 0.940696656703949,
      "learning_rate": 8.090970789981748e-06,
      "loss": 0.0303,
      "step": 599440
    },
    {
      "epoch": 0.9810294377565248,
      "grad_norm": 0.24971486628055573,
      "learning_rate": 8.090904897768232e-06,
      "loss": 0.0194,
      "step": 599460
    },
    {
      "epoch": 0.9810621681951781,
      "grad_norm": 0.26938408613204956,
      "learning_rate": 8.090839005554716e-06,
      "loss": 0.0209,
      "step": 599480
    },
    {
      "epoch": 0.9810948986338315,
      "grad_norm": 0.05178932100534439,
      "learning_rate": 8.090773113341197e-06,
      "loss": 0.0338,
      "step": 599500
    },
    {
      "epoch": 0.9811276290724849,
      "grad_norm": 0.15792037546634674,
      "learning_rate": 8.090707221127681e-06,
      "loss": 0.0282,
      "step": 599520
    },
    {
      "epoch": 0.9811603595111381,
      "grad_norm": 1.0138593912124634,
      "learning_rate": 8.090641328914163e-06,
      "loss": 0.0329,
      "step": 599540
    },
    {
      "epoch": 0.9811930899497915,
      "grad_norm": 0.9059893488883972,
      "learning_rate": 8.090575436700647e-06,
      "loss": 0.0362,
      "step": 599560
    },
    {
      "epoch": 0.9812258203884449,
      "grad_norm": 0.42679473757743835,
      "learning_rate": 8.090509544487128e-06,
      "loss": 0.0279,
      "step": 599580
    },
    {
      "epoch": 0.9812585508270982,
      "grad_norm": 1.67708420753479,
      "learning_rate": 8.090443652273612e-06,
      "loss": 0.0271,
      "step": 599600
    },
    {
      "epoch": 0.9812912812657515,
      "grad_norm": 1.0771993398666382,
      "learning_rate": 8.090377760060094e-06,
      "loss": 0.0334,
      "step": 599620
    },
    {
      "epoch": 0.9813240117044049,
      "grad_norm": 0.26274555921554565,
      "learning_rate": 8.090311867846577e-06,
      "loss": 0.0192,
      "step": 599640
    },
    {
      "epoch": 0.9813567421430582,
      "grad_norm": 1.6809383630752563,
      "learning_rate": 8.09024597563306e-06,
      "loss": 0.0273,
      "step": 599660
    },
    {
      "epoch": 0.9813894725817115,
      "grad_norm": 0.9791613817214966,
      "learning_rate": 8.090180083419543e-06,
      "loss": 0.019,
      "step": 599680
    },
    {
      "epoch": 0.9814222030203649,
      "grad_norm": 0.8374671936035156,
      "learning_rate": 8.090114191206025e-06,
      "loss": 0.02,
      "step": 599700
    },
    {
      "epoch": 0.9814549334590182,
      "grad_norm": 1.7527673244476318,
      "learning_rate": 8.090048298992508e-06,
      "loss": 0.0177,
      "step": 599720
    },
    {
      "epoch": 0.9814876638976715,
      "grad_norm": 0.39178913831710815,
      "learning_rate": 8.08998240677899e-06,
      "loss": 0.0307,
      "step": 599740
    },
    {
      "epoch": 0.9815203943363249,
      "grad_norm": 1.3158667087554932,
      "learning_rate": 8.089916514565474e-06,
      "loss": 0.0306,
      "step": 599760
    },
    {
      "epoch": 0.9815531247749782,
      "grad_norm": 0.4110231399536133,
      "learning_rate": 8.089850622351958e-06,
      "loss": 0.0351,
      "step": 599780
    },
    {
      "epoch": 0.9815858552136316,
      "grad_norm": 1.8708513975143433,
      "learning_rate": 8.08978473013844e-06,
      "loss": 0.0198,
      "step": 599800
    },
    {
      "epoch": 0.9816185856522849,
      "grad_norm": 0.9355229735374451,
      "learning_rate": 8.089718837924923e-06,
      "loss": 0.0265,
      "step": 599820
    },
    {
      "epoch": 0.9816513160909383,
      "grad_norm": 1.037864327430725,
      "learning_rate": 8.089652945711407e-06,
      "loss": 0.03,
      "step": 599840
    },
    {
      "epoch": 0.9816840465295916,
      "grad_norm": 0.7177804112434387,
      "learning_rate": 8.089587053497888e-06,
      "loss": 0.0194,
      "step": 599860
    },
    {
      "epoch": 0.9817167769682449,
      "grad_norm": 1.471734642982483,
      "learning_rate": 8.089521161284372e-06,
      "loss": 0.025,
      "step": 599880
    },
    {
      "epoch": 0.9817495074068983,
      "grad_norm": 3.7043352127075195,
      "learning_rate": 8.089455269070856e-06,
      "loss": 0.0373,
      "step": 599900
    },
    {
      "epoch": 0.9817822378455516,
      "grad_norm": 0.8009750247001648,
      "learning_rate": 8.089389376857338e-06,
      "loss": 0.0218,
      "step": 599920
    },
    {
      "epoch": 0.9818149682842049,
      "grad_norm": 0.7090319395065308,
      "learning_rate": 8.089323484643821e-06,
      "loss": 0.0267,
      "step": 599940
    },
    {
      "epoch": 0.9818476987228583,
      "grad_norm": 1.9818401336669922,
      "learning_rate": 8.089257592430303e-06,
      "loss": 0.0243,
      "step": 599960
    },
    {
      "epoch": 0.9818804291615116,
      "grad_norm": 1.3436249494552612,
      "learning_rate": 8.089191700216787e-06,
      "loss": 0.0326,
      "step": 599980
    },
    {
      "epoch": 0.981913159600165,
      "grad_norm": 0.8280138373374939,
      "learning_rate": 8.089125808003268e-06,
      "loss": 0.0267,
      "step": 600000
    },
    {
      "epoch": 0.981913159600165,
      "eval_loss": 0.013753429055213928,
      "eval_runtime": 6511.2547,
      "eval_samples_per_second": 157.859,
      "eval_steps_per_second": 15.786,
      "eval_sts-dev_pearson_cosine": 0.9676305953402954,
      "eval_sts-dev_spearman_cosine": 0.8848807207216448,
      "step": 600000
    },
    {
      "epoch": 0.9819458900388183,
      "grad_norm": 0.1397695690393448,
      "learning_rate": 8.089059915789752e-06,
      "loss": 0.0334,
      "step": 600020
    },
    {
      "epoch": 0.9819786204774716,
      "grad_norm": 1.4379937648773193,
      "learning_rate": 8.088994023576234e-06,
      "loss": 0.0269,
      "step": 600040
    },
    {
      "epoch": 0.982011350916125,
      "grad_norm": 1.98301100730896,
      "learning_rate": 8.088928131362718e-06,
      "loss": 0.0343,
      "step": 600060
    },
    {
      "epoch": 0.9820440813547783,
      "grad_norm": 0.7015259861946106,
      "learning_rate": 8.0888622391492e-06,
      "loss": 0.0287,
      "step": 600080
    },
    {
      "epoch": 0.9820768117934316,
      "grad_norm": 1.238803505897522,
      "learning_rate": 8.088796346935683e-06,
      "loss": 0.03,
      "step": 600100
    },
    {
      "epoch": 0.982109542232085,
      "grad_norm": 2.0664796829223633,
      "learning_rate": 8.088730454722165e-06,
      "loss": 0.0298,
      "step": 600120
    },
    {
      "epoch": 0.9821422726707383,
      "grad_norm": 1.7804142236709595,
      "learning_rate": 8.088664562508649e-06,
      "loss": 0.0324,
      "step": 600140
    },
    {
      "epoch": 0.9821750031093917,
      "grad_norm": 0.9388571381568909,
      "learning_rate": 8.088598670295132e-06,
      "loss": 0.0273,
      "step": 600160
    },
    {
      "epoch": 0.982207733548045,
      "grad_norm": 1.084277629852295,
      "learning_rate": 8.088532778081614e-06,
      "loss": 0.0287,
      "step": 600180
    },
    {
      "epoch": 0.9822404639866984,
      "grad_norm": 2.194735288619995,
      "learning_rate": 8.088466885868098e-06,
      "loss": 0.0321,
      "step": 600200
    },
    {
      "epoch": 0.9822731944253517,
      "grad_norm": 0.6901341676712036,
      "learning_rate": 8.088400993654581e-06,
      "loss": 0.0218,
      "step": 600220
    },
    {
      "epoch": 0.982305924864005,
      "grad_norm": 1.301964282989502,
      "learning_rate": 8.088335101441063e-06,
      "loss": 0.035,
      "step": 600240
    },
    {
      "epoch": 0.9823386553026584,
      "grad_norm": 0.861004114151001,
      "learning_rate": 8.088269209227547e-06,
      "loss": 0.0274,
      "step": 600260
    },
    {
      "epoch": 0.9823713857413117,
      "grad_norm": 0.3995685577392578,
      "learning_rate": 8.08820331701403e-06,
      "loss": 0.021,
      "step": 600280
    },
    {
      "epoch": 0.982404116179965,
      "grad_norm": 0.2847009301185608,
      "learning_rate": 8.088137424800512e-06,
      "loss": 0.0331,
      "step": 600300
    },
    {
      "epoch": 0.9824368466186184,
      "grad_norm": 0.5450962781906128,
      "learning_rate": 8.088071532586996e-06,
      "loss": 0.0203,
      "step": 600320
    },
    {
      "epoch": 0.9824695770572717,
      "grad_norm": 2.0278048515319824,
      "learning_rate": 8.088005640373478e-06,
      "loss": 0.0321,
      "step": 600340
    },
    {
      "epoch": 0.982502307495925,
      "grad_norm": 0.37244927883148193,
      "learning_rate": 8.087939748159961e-06,
      "loss": 0.0351,
      "step": 600360
    },
    {
      "epoch": 0.9825350379345784,
      "grad_norm": 1.3282403945922852,
      "learning_rate": 8.087873855946443e-06,
      "loss": 0.0273,
      "step": 600380
    },
    {
      "epoch": 0.9825677683732318,
      "grad_norm": 1.8536854982376099,
      "learning_rate": 8.087807963732927e-06,
      "loss": 0.0186,
      "step": 600400
    },
    {
      "epoch": 0.982600498811885,
      "grad_norm": 0.33786317706108093,
      "learning_rate": 8.087742071519409e-06,
      "loss": 0.0266,
      "step": 600420
    },
    {
      "epoch": 0.9826332292505384,
      "grad_norm": 0.6336368322372437,
      "learning_rate": 8.087676179305892e-06,
      "loss": 0.0316,
      "step": 600440
    },
    {
      "epoch": 0.9826659596891918,
      "grad_norm": 0.534278154373169,
      "learning_rate": 8.087610287092374e-06,
      "loss": 0.0289,
      "step": 600460
    },
    {
      "epoch": 0.982698690127845,
      "grad_norm": 0.5596733093261719,
      "learning_rate": 8.087544394878858e-06,
      "loss": 0.0318,
      "step": 600480
    },
    {
      "epoch": 0.9827314205664984,
      "grad_norm": 0.9196040034294128,
      "learning_rate": 8.08747850266534e-06,
      "loss": 0.022,
      "step": 600500
    },
    {
      "epoch": 0.9827641510051518,
      "grad_norm": 1.6734181642532349,
      "learning_rate": 8.087412610451823e-06,
      "loss": 0.026,
      "step": 600520
    },
    {
      "epoch": 0.9827968814438051,
      "grad_norm": 0.26633796095848083,
      "learning_rate": 8.087346718238307e-06,
      "loss": 0.0221,
      "step": 600540
    },
    {
      "epoch": 0.9828296118824584,
      "grad_norm": 0.4804593026638031,
      "learning_rate": 8.087280826024789e-06,
      "loss": 0.0282,
      "step": 600560
    },
    {
      "epoch": 0.9828623423211118,
      "grad_norm": 0.37064751982688904,
      "learning_rate": 8.087214933811272e-06,
      "loss": 0.0242,
      "step": 600580
    },
    {
      "epoch": 0.9828950727597652,
      "grad_norm": 0.9027448892593384,
      "learning_rate": 8.087149041597756e-06,
      "loss": 0.0245,
      "step": 600600
    },
    {
      "epoch": 0.9829278031984184,
      "grad_norm": 0.5614308714866638,
      "learning_rate": 8.087083149384238e-06,
      "loss": 0.0339,
      "step": 600620
    },
    {
      "epoch": 0.9829605336370718,
      "grad_norm": 0.2517964839935303,
      "learning_rate": 8.087017257170721e-06,
      "loss": 0.0315,
      "step": 600640
    },
    {
      "epoch": 0.9829932640757252,
      "grad_norm": 3.365827798843384,
      "learning_rate": 8.086951364957205e-06,
      "loss": 0.0312,
      "step": 600660
    },
    {
      "epoch": 0.9830259945143784,
      "grad_norm": 1.9045708179473877,
      "learning_rate": 8.086885472743687e-06,
      "loss": 0.0308,
      "step": 600680
    },
    {
      "epoch": 0.9830587249530318,
      "grad_norm": 0.9802931547164917,
      "learning_rate": 8.08681958053017e-06,
      "loss": 0.0288,
      "step": 600700
    },
    {
      "epoch": 0.9830914553916852,
      "grad_norm": 0.3236531913280487,
      "learning_rate": 8.086753688316652e-06,
      "loss": 0.0266,
      "step": 600720
    },
    {
      "epoch": 0.9831241858303384,
      "grad_norm": 0.8865551352500916,
      "learning_rate": 8.086687796103136e-06,
      "loss": 0.0177,
      "step": 600740
    },
    {
      "epoch": 0.9831569162689918,
      "grad_norm": 4.034033298492432,
      "learning_rate": 8.086621903889618e-06,
      "loss": 0.0318,
      "step": 600760
    },
    {
      "epoch": 0.9831896467076452,
      "grad_norm": 0.2568628489971161,
      "learning_rate": 8.086556011676101e-06,
      "loss": 0.0181,
      "step": 600780
    },
    {
      "epoch": 0.9832223771462986,
      "grad_norm": 1.4970920085906982,
      "learning_rate": 8.086490119462583e-06,
      "loss": 0.0238,
      "step": 600800
    },
    {
      "epoch": 0.9832551075849518,
      "grad_norm": 2.625617742538452,
      "learning_rate": 8.086424227249067e-06,
      "loss": 0.0219,
      "step": 600820
    },
    {
      "epoch": 0.9832878380236052,
      "grad_norm": 1.0900764465332031,
      "learning_rate": 8.086358335035549e-06,
      "loss": 0.0307,
      "step": 600840
    },
    {
      "epoch": 0.9833205684622586,
      "grad_norm": 1.3601783514022827,
      "learning_rate": 8.086292442822032e-06,
      "loss": 0.0245,
      "step": 600860
    },
    {
      "epoch": 0.9833532989009118,
      "grad_norm": 0.38862481713294983,
      "learning_rate": 8.086226550608516e-06,
      "loss": 0.0268,
      "step": 600880
    },
    {
      "epoch": 0.9833860293395652,
      "grad_norm": 0.9438635110855103,
      "learning_rate": 8.086160658394998e-06,
      "loss": 0.0253,
      "step": 600900
    },
    {
      "epoch": 0.9834187597782186,
      "grad_norm": 0.40574419498443604,
      "learning_rate": 8.086094766181481e-06,
      "loss": 0.0313,
      "step": 600920
    },
    {
      "epoch": 0.9834514902168718,
      "grad_norm": 1.125439167022705,
      "learning_rate": 8.086028873967963e-06,
      "loss": 0.0314,
      "step": 600940
    },
    {
      "epoch": 0.9834842206555252,
      "grad_norm": 1.79803466796875,
      "learning_rate": 8.085962981754447e-06,
      "loss": 0.0258,
      "step": 600960
    },
    {
      "epoch": 0.9835169510941786,
      "grad_norm": 1.0897231101989746,
      "learning_rate": 8.085897089540929e-06,
      "loss": 0.0341,
      "step": 600980
    },
    {
      "epoch": 0.983549681532832,
      "grad_norm": 1.3355212211608887,
      "learning_rate": 8.085831197327412e-06,
      "loss": 0.0208,
      "step": 601000
    },
    {
      "epoch": 0.9835824119714852,
      "grad_norm": 2.5813543796539307,
      "learning_rate": 8.085765305113896e-06,
      "loss": 0.0293,
      "step": 601020
    },
    {
      "epoch": 0.9836151424101386,
      "grad_norm": 0.6141006350517273,
      "learning_rate": 8.085699412900378e-06,
      "loss": 0.0287,
      "step": 601040
    },
    {
      "epoch": 0.983647872848792,
      "grad_norm": 1.4679895639419556,
      "learning_rate": 8.085633520686861e-06,
      "loss": 0.0307,
      "step": 601060
    },
    {
      "epoch": 0.9836806032874452,
      "grad_norm": 0.777114987373352,
      "learning_rate": 8.085567628473345e-06,
      "loss": 0.026,
      "step": 601080
    },
    {
      "epoch": 0.9837133337260986,
      "grad_norm": 0.378774493932724,
      "learning_rate": 8.085501736259827e-06,
      "loss": 0.032,
      "step": 601100
    },
    {
      "epoch": 0.983746064164752,
      "grad_norm": 1.9357075691223145,
      "learning_rate": 8.08543584404631e-06,
      "loss": 0.0311,
      "step": 601120
    },
    {
      "epoch": 0.9837787946034052,
      "grad_norm": 4.806457042694092,
      "learning_rate": 8.085369951832792e-06,
      "loss": 0.0259,
      "step": 601140
    },
    {
      "epoch": 0.9838115250420586,
      "grad_norm": 0.6531996726989746,
      "learning_rate": 8.085304059619276e-06,
      "loss": 0.0292,
      "step": 601160
    },
    {
      "epoch": 0.983844255480712,
      "grad_norm": 0.9795743227005005,
      "learning_rate": 8.085238167405758e-06,
      "loss": 0.0332,
      "step": 601180
    },
    {
      "epoch": 0.9838769859193653,
      "grad_norm": 0.5332682728767395,
      "learning_rate": 8.085172275192241e-06,
      "loss": 0.0257,
      "step": 601200
    },
    {
      "epoch": 0.9839097163580186,
      "grad_norm": 0.2773374021053314,
      "learning_rate": 8.085106382978723e-06,
      "loss": 0.0311,
      "step": 601220
    },
    {
      "epoch": 0.983942446796672,
      "grad_norm": 0.4217444956302643,
      "learning_rate": 8.085040490765207e-06,
      "loss": 0.0252,
      "step": 601240
    },
    {
      "epoch": 0.9839751772353253,
      "grad_norm": 3.406881809234619,
      "learning_rate": 8.08497459855169e-06,
      "loss": 0.0269,
      "step": 601260
    },
    {
      "epoch": 0.9840079076739786,
      "grad_norm": 1.0842410326004028,
      "learning_rate": 8.084908706338172e-06,
      "loss": 0.0233,
      "step": 601280
    },
    {
      "epoch": 0.984040638112632,
      "grad_norm": 0.29625701904296875,
      "learning_rate": 8.084842814124656e-06,
      "loss": 0.0247,
      "step": 601300
    },
    {
      "epoch": 0.9840733685512854,
      "grad_norm": 0.5924898386001587,
      "learning_rate": 8.084776921911138e-06,
      "loss": 0.0207,
      "step": 601320
    },
    {
      "epoch": 0.9841060989899386,
      "grad_norm": 1.2070379257202148,
      "learning_rate": 8.084711029697621e-06,
      "loss": 0.0309,
      "step": 601340
    },
    {
      "epoch": 0.984138829428592,
      "grad_norm": 0.8243126273155212,
      "learning_rate": 8.084645137484103e-06,
      "loss": 0.031,
      "step": 601360
    },
    {
      "epoch": 0.9841715598672454,
      "grad_norm": 1.0803813934326172,
      "learning_rate": 8.084579245270587e-06,
      "loss": 0.0278,
      "step": 601380
    },
    {
      "epoch": 0.9842042903058987,
      "grad_norm": 0.2872942090034485,
      "learning_rate": 8.08451335305707e-06,
      "loss": 0.0242,
      "step": 601400
    },
    {
      "epoch": 0.984237020744552,
      "grad_norm": 0.9772357940673828,
      "learning_rate": 8.084447460843552e-06,
      "loss": 0.0264,
      "step": 601420
    },
    {
      "epoch": 0.9842697511832054,
      "grad_norm": 0.9509061574935913,
      "learning_rate": 8.084381568630036e-06,
      "loss": 0.0287,
      "step": 601440
    },
    {
      "epoch": 0.9843024816218587,
      "grad_norm": 0.32448509335517883,
      "learning_rate": 8.08431567641652e-06,
      "loss": 0.0229,
      "step": 601460
    },
    {
      "epoch": 0.984335212060512,
      "grad_norm": 0.38148626685142517,
      "learning_rate": 8.084249784203001e-06,
      "loss": 0.0235,
      "step": 601480
    },
    {
      "epoch": 0.9843679424991654,
      "grad_norm": 2.044816732406616,
      "learning_rate": 8.084183891989485e-06,
      "loss": 0.0247,
      "step": 601500
    },
    {
      "epoch": 0.9844006729378187,
      "grad_norm": 0.8389599323272705,
      "learning_rate": 8.084117999775967e-06,
      "loss": 0.0279,
      "step": 601520
    },
    {
      "epoch": 0.984433403376472,
      "grad_norm": 1.0994703769683838,
      "learning_rate": 8.08405210756245e-06,
      "loss": 0.022,
      "step": 601540
    },
    {
      "epoch": 0.9844661338151254,
      "grad_norm": 1.6051942110061646,
      "learning_rate": 8.083986215348932e-06,
      "loss": 0.0261,
      "step": 601560
    },
    {
      "epoch": 0.9844988642537787,
      "grad_norm": 0.8931193351745605,
      "learning_rate": 8.083920323135416e-06,
      "loss": 0.0291,
      "step": 601580
    },
    {
      "epoch": 0.9845315946924321,
      "grad_norm": 5.116707801818848,
      "learning_rate": 8.0838544309219e-06,
      "loss": 0.0314,
      "step": 601600
    },
    {
      "epoch": 0.9845643251310854,
      "grad_norm": 0.55808424949646,
      "learning_rate": 8.083788538708381e-06,
      "loss": 0.0244,
      "step": 601620
    },
    {
      "epoch": 0.9845970555697388,
      "grad_norm": 2.728818655014038,
      "learning_rate": 8.083722646494865e-06,
      "loss": 0.0293,
      "step": 601640
    },
    {
      "epoch": 0.9846297860083921,
      "grad_norm": 1.8829245567321777,
      "learning_rate": 8.083656754281347e-06,
      "loss": 0.0213,
      "step": 601660
    },
    {
      "epoch": 0.9846625164470454,
      "grad_norm": 0.19220426678657532,
      "learning_rate": 8.08359086206783e-06,
      "loss": 0.0311,
      "step": 601680
    },
    {
      "epoch": 0.9846952468856988,
      "grad_norm": 0.1900331974029541,
      "learning_rate": 8.083524969854312e-06,
      "loss": 0.0331,
      "step": 601700
    },
    {
      "epoch": 0.9847279773243521,
      "grad_norm": 2.3165535926818848,
      "learning_rate": 8.083459077640796e-06,
      "loss": 0.0266,
      "step": 601720
    },
    {
      "epoch": 0.9847607077630054,
      "grad_norm": 0.47540709376335144,
      "learning_rate": 8.083393185427278e-06,
      "loss": 0.0254,
      "step": 601740
    },
    {
      "epoch": 0.9847934382016588,
      "grad_norm": 1.87606680393219,
      "learning_rate": 8.083327293213761e-06,
      "loss": 0.0323,
      "step": 601760
    },
    {
      "epoch": 0.9848261686403121,
      "grad_norm": 1.5008772611618042,
      "learning_rate": 8.083261401000243e-06,
      "loss": 0.0266,
      "step": 601780
    },
    {
      "epoch": 0.9848588990789655,
      "grad_norm": 1.9789167642593384,
      "learning_rate": 8.083195508786727e-06,
      "loss": 0.0305,
      "step": 601800
    },
    {
      "epoch": 0.9848916295176188,
      "grad_norm": 1.0655068159103394,
      "learning_rate": 8.08312961657321e-06,
      "loss": 0.0228,
      "step": 601820
    },
    {
      "epoch": 0.9849243599562721,
      "grad_norm": 0.46924322843551636,
      "learning_rate": 8.083063724359692e-06,
      "loss": 0.0243,
      "step": 601840
    },
    {
      "epoch": 0.9849570903949255,
      "grad_norm": 1.4162451028823853,
      "learning_rate": 8.082997832146176e-06,
      "loss": 0.0319,
      "step": 601860
    },
    {
      "epoch": 0.9849898208335788,
      "grad_norm": 1.0597323179244995,
      "learning_rate": 8.08293193993266e-06,
      "loss": 0.0329,
      "step": 601880
    },
    {
      "epoch": 0.9850225512722321,
      "grad_norm": 1.579918622970581,
      "learning_rate": 8.082866047719141e-06,
      "loss": 0.0309,
      "step": 601900
    },
    {
      "epoch": 0.9850552817108855,
      "grad_norm": 2.9764225482940674,
      "learning_rate": 8.082800155505625e-06,
      "loss": 0.0231,
      "step": 601920
    },
    {
      "epoch": 0.9850880121495388,
      "grad_norm": 2.575784683227539,
      "learning_rate": 8.082734263292109e-06,
      "loss": 0.0505,
      "step": 601940
    },
    {
      "epoch": 0.9851207425881922,
      "grad_norm": 0.23652289807796478,
      "learning_rate": 8.08266837107859e-06,
      "loss": 0.0283,
      "step": 601960
    },
    {
      "epoch": 0.9851534730268455,
      "grad_norm": 1.2863744497299194,
      "learning_rate": 8.082602478865074e-06,
      "loss": 0.0358,
      "step": 601980
    },
    {
      "epoch": 0.9851862034654989,
      "grad_norm": 0.6065480709075928,
      "learning_rate": 8.082536586651556e-06,
      "loss": 0.0245,
      "step": 602000
    },
    {
      "epoch": 0.9852189339041522,
      "grad_norm": 0.7107303738594055,
      "learning_rate": 8.08247069443804e-06,
      "loss": 0.0329,
      "step": 602020
    },
    {
      "epoch": 0.9852516643428055,
      "grad_norm": 1.1035785675048828,
      "learning_rate": 8.082404802224521e-06,
      "loss": 0.0224,
      "step": 602040
    },
    {
      "epoch": 0.9852843947814589,
      "grad_norm": 1.8797768354415894,
      "learning_rate": 8.082338910011005e-06,
      "loss": 0.0247,
      "step": 602060
    },
    {
      "epoch": 0.9853171252201122,
      "grad_norm": 0.7086325287818909,
      "learning_rate": 8.082273017797487e-06,
      "loss": 0.0267,
      "step": 602080
    },
    {
      "epoch": 0.9853498556587655,
      "grad_norm": 2.0546255111694336,
      "learning_rate": 8.08220712558397e-06,
      "loss": 0.0235,
      "step": 602100
    },
    {
      "epoch": 0.9853825860974189,
      "grad_norm": 0.45807477831840515,
      "learning_rate": 8.082141233370452e-06,
      "loss": 0.025,
      "step": 602120
    },
    {
      "epoch": 0.9854153165360722,
      "grad_norm": 4.220221996307373,
      "learning_rate": 8.082075341156936e-06,
      "loss": 0.0213,
      "step": 602140
    },
    {
      "epoch": 0.9854480469747255,
      "grad_norm": 0.6626695990562439,
      "learning_rate": 8.082009448943418e-06,
      "loss": 0.0196,
      "step": 602160
    },
    {
      "epoch": 0.9854807774133789,
      "grad_norm": 0.5559340119361877,
      "learning_rate": 8.081943556729902e-06,
      "loss": 0.0281,
      "step": 602180
    },
    {
      "epoch": 0.9855135078520322,
      "grad_norm": 0.7016650438308716,
      "learning_rate": 8.081877664516385e-06,
      "loss": 0.029,
      "step": 602200
    },
    {
      "epoch": 0.9855462382906856,
      "grad_norm": 0.48416242003440857,
      "learning_rate": 8.081811772302867e-06,
      "loss": 0.0264,
      "step": 602220
    },
    {
      "epoch": 0.9855789687293389,
      "grad_norm": 2.7200629711151123,
      "learning_rate": 8.08174588008935e-06,
      "loss": 0.0274,
      "step": 602240
    },
    {
      "epoch": 0.9856116991679923,
      "grad_norm": 0.4980579614639282,
      "learning_rate": 8.081679987875834e-06,
      "loss": 0.0278,
      "step": 602260
    },
    {
      "epoch": 0.9856444296066456,
      "grad_norm": 0.7029128074645996,
      "learning_rate": 8.081614095662316e-06,
      "loss": 0.0211,
      "step": 602280
    },
    {
      "epoch": 0.9856771600452989,
      "grad_norm": 2.3863320350646973,
      "learning_rate": 8.0815482034488e-06,
      "loss": 0.0374,
      "step": 602300
    },
    {
      "epoch": 0.9857098904839523,
      "grad_norm": 3.5360963344573975,
      "learning_rate": 8.081482311235283e-06,
      "loss": 0.0312,
      "step": 602320
    },
    {
      "epoch": 0.9857426209226056,
      "grad_norm": 0.3288269340991974,
      "learning_rate": 8.081416419021765e-06,
      "loss": 0.0317,
      "step": 602340
    },
    {
      "epoch": 0.9857753513612589,
      "grad_norm": 1.0273070335388184,
      "learning_rate": 8.081350526808249e-06,
      "loss": 0.031,
      "step": 602360
    },
    {
      "epoch": 0.9858080817999123,
      "grad_norm": 0.5423256754875183,
      "learning_rate": 8.08128463459473e-06,
      "loss": 0.0249,
      "step": 602380
    },
    {
      "epoch": 0.9858408122385656,
      "grad_norm": 2.084705114364624,
      "learning_rate": 8.081218742381214e-06,
      "loss": 0.0225,
      "step": 602400
    },
    {
      "epoch": 0.9858735426772189,
      "grad_norm": 0.650714099407196,
      "learning_rate": 8.081152850167696e-06,
      "loss": 0.0304,
      "step": 602420
    },
    {
      "epoch": 0.9859062731158723,
      "grad_norm": 0.26154831051826477,
      "learning_rate": 8.08108695795418e-06,
      "loss": 0.0252,
      "step": 602440
    },
    {
      "epoch": 0.9859390035545257,
      "grad_norm": 0.4820469617843628,
      "learning_rate": 8.081021065740662e-06,
      "loss": 0.0196,
      "step": 602460
    },
    {
      "epoch": 0.985971733993179,
      "grad_norm": 1.5750830173492432,
      "learning_rate": 8.080955173527145e-06,
      "loss": 0.033,
      "step": 602480
    },
    {
      "epoch": 0.9860044644318323,
      "grad_norm": 0.12502078711986542,
      "learning_rate": 8.080889281313627e-06,
      "loss": 0.0206,
      "step": 602500
    },
    {
      "epoch": 0.9860371948704857,
      "grad_norm": 1.7099130153656006,
      "learning_rate": 8.08082338910011e-06,
      "loss": 0.0329,
      "step": 602520
    },
    {
      "epoch": 0.986069925309139,
      "grad_norm": 0.46989044547080994,
      "learning_rate": 8.080757496886593e-06,
      "loss": 0.0295,
      "step": 602540
    },
    {
      "epoch": 0.9861026557477923,
      "grad_norm": 0.9715318083763123,
      "learning_rate": 8.080691604673076e-06,
      "loss": 0.0282,
      "step": 602560
    },
    {
      "epoch": 0.9861353861864457,
      "grad_norm": 1.0200984477996826,
      "learning_rate": 8.080625712459558e-06,
      "loss": 0.0262,
      "step": 602580
    },
    {
      "epoch": 0.986168116625099,
      "grad_norm": 2.4820921421051025,
      "learning_rate": 8.080559820246042e-06,
      "loss": 0.0302,
      "step": 602600
    },
    {
      "epoch": 0.9862008470637523,
      "grad_norm": 0.4490968883037567,
      "learning_rate": 8.080493928032525e-06,
      "loss": 0.022,
      "step": 602620
    },
    {
      "epoch": 0.9862335775024057,
      "grad_norm": 2.3836567401885986,
      "learning_rate": 8.080428035819007e-06,
      "loss": 0.0325,
      "step": 602640
    },
    {
      "epoch": 0.9862663079410591,
      "grad_norm": 0.23118376731872559,
      "learning_rate": 8.08036214360549e-06,
      "loss": 0.0215,
      "step": 602660
    },
    {
      "epoch": 0.9862990383797123,
      "grad_norm": 2.1321053504943848,
      "learning_rate": 8.080296251391974e-06,
      "loss": 0.0244,
      "step": 602680
    },
    {
      "epoch": 0.9863317688183657,
      "grad_norm": 0.9347167611122131,
      "learning_rate": 8.080230359178458e-06,
      "loss": 0.0223,
      "step": 602700
    },
    {
      "epoch": 0.9863644992570191,
      "grad_norm": 0.9350869655609131,
      "learning_rate": 8.08016446696494e-06,
      "loss": 0.0355,
      "step": 602720
    },
    {
      "epoch": 0.9863972296956723,
      "grad_norm": 0.2747020423412323,
      "learning_rate": 8.080098574751423e-06,
      "loss": 0.0343,
      "step": 602740
    },
    {
      "epoch": 0.9864299601343257,
      "grad_norm": 0.3497268557548523,
      "learning_rate": 8.080032682537905e-06,
      "loss": 0.0156,
      "step": 602760
    },
    {
      "epoch": 0.9864626905729791,
      "grad_norm": 1.6425683498382568,
      "learning_rate": 8.079966790324389e-06,
      "loss": 0.0372,
      "step": 602780
    },
    {
      "epoch": 0.9864954210116323,
      "grad_norm": 0.44927141070365906,
      "learning_rate": 8.07990089811087e-06,
      "loss": 0.0256,
      "step": 602800
    },
    {
      "epoch": 0.9865281514502857,
      "grad_norm": 0.502081573009491,
      "learning_rate": 8.079835005897354e-06,
      "loss": 0.0277,
      "step": 602820
    },
    {
      "epoch": 0.9865608818889391,
      "grad_norm": 1.1794428825378418,
      "learning_rate": 8.079769113683836e-06,
      "loss": 0.0321,
      "step": 602840
    },
    {
      "epoch": 0.9865936123275925,
      "grad_norm": 0.9246889352798462,
      "learning_rate": 8.07970322147032e-06,
      "loss": 0.0243,
      "step": 602860
    },
    {
      "epoch": 0.9866263427662457,
      "grad_norm": 0.8785728216171265,
      "learning_rate": 8.079637329256802e-06,
      "loss": 0.0247,
      "step": 602880
    },
    {
      "epoch": 0.9866590732048991,
      "grad_norm": 1.0059295892715454,
      "learning_rate": 8.079571437043285e-06,
      "loss": 0.0267,
      "step": 602900
    },
    {
      "epoch": 0.9866918036435525,
      "grad_norm": 1.9180363416671753,
      "learning_rate": 8.079505544829767e-06,
      "loss": 0.0257,
      "step": 602920
    },
    {
      "epoch": 0.9867245340822057,
      "grad_norm": 1.1145650148391724,
      "learning_rate": 8.07943965261625e-06,
      "loss": 0.0289,
      "step": 602940
    },
    {
      "epoch": 0.9867572645208591,
      "grad_norm": 2.7507855892181396,
      "learning_rate": 8.079373760402733e-06,
      "loss": 0.0366,
      "step": 602960
    },
    {
      "epoch": 0.9867899949595125,
      "grad_norm": 1.6392899751663208,
      "learning_rate": 8.079307868189216e-06,
      "loss": 0.0244,
      "step": 602980
    },
    {
      "epoch": 0.9868227253981657,
      "grad_norm": 0.7108741402626038,
      "learning_rate": 8.0792419759757e-06,
      "loss": 0.032,
      "step": 603000
    },
    {
      "epoch": 0.9868554558368191,
      "grad_norm": 4.957164764404297,
      "learning_rate": 8.079176083762182e-06,
      "loss": 0.0307,
      "step": 603020
    },
    {
      "epoch": 0.9868881862754725,
      "grad_norm": 0.2395278364419937,
      "learning_rate": 8.079110191548665e-06,
      "loss": 0.0417,
      "step": 603040
    },
    {
      "epoch": 0.9869209167141259,
      "grad_norm": 0.997836172580719,
      "learning_rate": 8.079044299335149e-06,
      "loss": 0.0269,
      "step": 603060
    },
    {
      "epoch": 0.9869536471527791,
      "grad_norm": 0.8608161211013794,
      "learning_rate": 8.07897840712163e-06,
      "loss": 0.0311,
      "step": 603080
    },
    {
      "epoch": 0.9869863775914325,
      "grad_norm": 0.9623925685882568,
      "learning_rate": 8.078912514908114e-06,
      "loss": 0.024,
      "step": 603100
    },
    {
      "epoch": 0.9870191080300859,
      "grad_norm": 1.6805403232574463,
      "learning_rate": 8.078846622694598e-06,
      "loss": 0.0336,
      "step": 603120
    },
    {
      "epoch": 0.9870518384687391,
      "grad_norm": 0.784695565700531,
      "learning_rate": 8.07878073048108e-06,
      "loss": 0.0234,
      "step": 603140
    },
    {
      "epoch": 0.9870845689073925,
      "grad_norm": 3.3895764350891113,
      "learning_rate": 8.078714838267563e-06,
      "loss": 0.0308,
      "step": 603160
    },
    {
      "epoch": 0.9871172993460459,
      "grad_norm": 2.0561084747314453,
      "learning_rate": 8.078648946054045e-06,
      "loss": 0.0218,
      "step": 603180
    },
    {
      "epoch": 0.9871500297846991,
      "grad_norm": 1.8494206666946411,
      "learning_rate": 8.078583053840529e-06,
      "loss": 0.0289,
      "step": 603200
    },
    {
      "epoch": 0.9871827602233525,
      "grad_norm": 0.8608791828155518,
      "learning_rate": 8.07851716162701e-06,
      "loss": 0.0254,
      "step": 603220
    },
    {
      "epoch": 0.9872154906620059,
      "grad_norm": 0.6775484085083008,
      "learning_rate": 8.078451269413494e-06,
      "loss": 0.0253,
      "step": 603240
    },
    {
      "epoch": 0.9872482211006592,
      "grad_norm": 1.1268435716629028,
      "learning_rate": 8.078385377199976e-06,
      "loss": 0.0205,
      "step": 603260
    },
    {
      "epoch": 0.9872809515393125,
      "grad_norm": 0.5919904708862305,
      "learning_rate": 8.07831948498646e-06,
      "loss": 0.0276,
      "step": 603280
    },
    {
      "epoch": 0.9873136819779659,
      "grad_norm": 0.30594828724861145,
      "learning_rate": 8.078253592772942e-06,
      "loss": 0.0307,
      "step": 603300
    },
    {
      "epoch": 0.9873464124166192,
      "grad_norm": 1.098429560661316,
      "learning_rate": 8.078187700559425e-06,
      "loss": 0.0261,
      "step": 603320
    },
    {
      "epoch": 0.9873791428552725,
      "grad_norm": 2.086273431777954,
      "learning_rate": 8.078121808345909e-06,
      "loss": 0.0222,
      "step": 603340
    },
    {
      "epoch": 0.9874118732939259,
      "grad_norm": 0.45114049315452576,
      "learning_rate": 8.07805591613239e-06,
      "loss": 0.0194,
      "step": 603360
    },
    {
      "epoch": 0.9874446037325793,
      "grad_norm": 0.23870012164115906,
      "learning_rate": 8.077990023918874e-06,
      "loss": 0.0307,
      "step": 603380
    },
    {
      "epoch": 0.9874773341712325,
      "grad_norm": 0.4001120626926422,
      "learning_rate": 8.077924131705356e-06,
      "loss": 0.0278,
      "step": 603400
    },
    {
      "epoch": 0.9875100646098859,
      "grad_norm": 0.2522692382335663,
      "learning_rate": 8.07785823949184e-06,
      "loss": 0.0179,
      "step": 603420
    },
    {
      "epoch": 0.9875427950485393,
      "grad_norm": 0.7937575578689575,
      "learning_rate": 8.077792347278323e-06,
      "loss": 0.0257,
      "step": 603440
    },
    {
      "epoch": 0.9875755254871926,
      "grad_norm": 1.8390964269638062,
      "learning_rate": 8.077726455064805e-06,
      "loss": 0.0258,
      "step": 603460
    },
    {
      "epoch": 0.9876082559258459,
      "grad_norm": 0.6506341099739075,
      "learning_rate": 8.077660562851289e-06,
      "loss": 0.0287,
      "step": 603480
    },
    {
      "epoch": 0.9876409863644993,
      "grad_norm": 3.654844284057617,
      "learning_rate": 8.077594670637773e-06,
      "loss": 0.0279,
      "step": 603500
    },
    {
      "epoch": 0.9876737168031526,
      "grad_norm": 1.5558180809020996,
      "learning_rate": 8.077528778424254e-06,
      "loss": 0.0251,
      "step": 603520
    },
    {
      "epoch": 0.9877064472418059,
      "grad_norm": 1.355033278465271,
      "learning_rate": 8.077462886210738e-06,
      "loss": 0.0197,
      "step": 603540
    },
    {
      "epoch": 0.9877391776804593,
      "grad_norm": 0.3888961672782898,
      "learning_rate": 8.07739699399722e-06,
      "loss": 0.0204,
      "step": 603560
    },
    {
      "epoch": 0.9877719081191126,
      "grad_norm": 2.3939549922943115,
      "learning_rate": 8.077331101783703e-06,
      "loss": 0.0297,
      "step": 603580
    },
    {
      "epoch": 0.9878046385577659,
      "grad_norm": 0.9100880026817322,
      "learning_rate": 8.077265209570185e-06,
      "loss": 0.0324,
      "step": 603600
    },
    {
      "epoch": 0.9878373689964193,
      "grad_norm": 0.9558305144309998,
      "learning_rate": 8.077199317356669e-06,
      "loss": 0.0274,
      "step": 603620
    },
    {
      "epoch": 0.9878700994350726,
      "grad_norm": 0.9485105872154236,
      "learning_rate": 8.07713342514315e-06,
      "loss": 0.0281,
      "step": 603640
    },
    {
      "epoch": 0.987902829873726,
      "grad_norm": 0.7361582517623901,
      "learning_rate": 8.077067532929634e-06,
      "loss": 0.0287,
      "step": 603660
    },
    {
      "epoch": 0.9879355603123793,
      "grad_norm": 0.33520063757896423,
      "learning_rate": 8.077001640716116e-06,
      "loss": 0.0313,
      "step": 603680
    },
    {
      "epoch": 0.9879682907510327,
      "grad_norm": 0.07263349741697311,
      "learning_rate": 8.0769357485026e-06,
      "loss": 0.0241,
      "step": 603700
    },
    {
      "epoch": 0.988001021189686,
      "grad_norm": 1.1365712881088257,
      "learning_rate": 8.076869856289083e-06,
      "loss": 0.0423,
      "step": 603720
    },
    {
      "epoch": 0.9880337516283393,
      "grad_norm": 0.1247234046459198,
      "learning_rate": 8.076803964075565e-06,
      "loss": 0.0366,
      "step": 603740
    },
    {
      "epoch": 0.9880664820669927,
      "grad_norm": 0.9021105766296387,
      "learning_rate": 8.076738071862049e-06,
      "loss": 0.0264,
      "step": 603760
    },
    {
      "epoch": 0.988099212505646,
      "grad_norm": 0.7482732534408569,
      "learning_rate": 8.076672179648531e-06,
      "loss": 0.0329,
      "step": 603780
    },
    {
      "epoch": 0.9881319429442993,
      "grad_norm": 2.533141851425171,
      "learning_rate": 8.076606287435014e-06,
      "loss": 0.0237,
      "step": 603800
    },
    {
      "epoch": 0.9881646733829527,
      "grad_norm": 0.20709791779518127,
      "learning_rate": 8.076540395221496e-06,
      "loss": 0.0192,
      "step": 603820
    },
    {
      "epoch": 0.988197403821606,
      "grad_norm": 0.8603616952896118,
      "learning_rate": 8.07647450300798e-06,
      "loss": 0.0326,
      "step": 603840
    },
    {
      "epoch": 0.9882301342602594,
      "grad_norm": 0.5559505820274353,
      "learning_rate": 8.076408610794464e-06,
      "loss": 0.0298,
      "step": 603860
    },
    {
      "epoch": 0.9882628646989127,
      "grad_norm": 1.1234930753707886,
      "learning_rate": 8.076342718580945e-06,
      "loss": 0.0287,
      "step": 603880
    },
    {
      "epoch": 0.988295595137566,
      "grad_norm": 3.8069398403167725,
      "learning_rate": 8.076276826367429e-06,
      "loss": 0.0163,
      "step": 603900
    },
    {
      "epoch": 0.9883283255762194,
      "grad_norm": 0.5208881497383118,
      "learning_rate": 8.076210934153913e-06,
      "loss": 0.0235,
      "step": 603920
    },
    {
      "epoch": 0.9883610560148727,
      "grad_norm": 1.4368191957473755,
      "learning_rate": 8.076145041940394e-06,
      "loss": 0.02,
      "step": 603940
    },
    {
      "epoch": 0.988393786453526,
      "grad_norm": 1.1996374130249023,
      "learning_rate": 8.076079149726878e-06,
      "loss": 0.0301,
      "step": 603960
    },
    {
      "epoch": 0.9884265168921794,
      "grad_norm": 0.9895201325416565,
      "learning_rate": 8.07601325751336e-06,
      "loss": 0.0233,
      "step": 603980
    },
    {
      "epoch": 0.9884592473308327,
      "grad_norm": 0.5257106423377991,
      "learning_rate": 8.075947365299844e-06,
      "loss": 0.017,
      "step": 604000
    },
    {
      "epoch": 0.988491977769486,
      "grad_norm": 0.26757627725601196,
      "learning_rate": 8.075881473086325e-06,
      "loss": 0.0238,
      "step": 604020
    },
    {
      "epoch": 0.9885247082081394,
      "grad_norm": 1.0187702178955078,
      "learning_rate": 8.075815580872809e-06,
      "loss": 0.0219,
      "step": 604040
    },
    {
      "epoch": 0.9885574386467928,
      "grad_norm": 1.0099869966506958,
      "learning_rate": 8.075749688659293e-06,
      "loss": 0.0213,
      "step": 604060
    },
    {
      "epoch": 0.9885901690854461,
      "grad_norm": 0.2662406861782074,
      "learning_rate": 8.075683796445775e-06,
      "loss": 0.019,
      "step": 604080
    },
    {
      "epoch": 0.9886228995240994,
      "grad_norm": 0.3461116850376129,
      "learning_rate": 8.075617904232258e-06,
      "loss": 0.0255,
      "step": 604100
    },
    {
      "epoch": 0.9886556299627528,
      "grad_norm": 0.5189487934112549,
      "learning_rate": 8.07555201201874e-06,
      "loss": 0.0282,
      "step": 604120
    },
    {
      "epoch": 0.9886883604014061,
      "grad_norm": 0.5691332221031189,
      "learning_rate": 8.075486119805224e-06,
      "loss": 0.0203,
      "step": 604140
    },
    {
      "epoch": 0.9887210908400594,
      "grad_norm": 0.5991542935371399,
      "learning_rate": 8.075420227591705e-06,
      "loss": 0.0323,
      "step": 604160
    },
    {
      "epoch": 0.9887538212787128,
      "grad_norm": 2.487396240234375,
      "learning_rate": 8.075354335378189e-06,
      "loss": 0.0369,
      "step": 604180
    },
    {
      "epoch": 0.9887865517173661,
      "grad_norm": 1.1598440408706665,
      "learning_rate": 8.075288443164671e-06,
      "loss": 0.0207,
      "step": 604200
    },
    {
      "epoch": 0.9888192821560194,
      "grad_norm": 4.927185535430908,
      "learning_rate": 8.075222550951155e-06,
      "loss": 0.0317,
      "step": 604220
    },
    {
      "epoch": 0.9888520125946728,
      "grad_norm": 1.241983413696289,
      "learning_rate": 8.075156658737638e-06,
      "loss": 0.0214,
      "step": 604240
    },
    {
      "epoch": 0.9888847430333262,
      "grad_norm": 1.989405632019043,
      "learning_rate": 8.07509076652412e-06,
      "loss": 0.023,
      "step": 604260
    },
    {
      "epoch": 0.9889174734719794,
      "grad_norm": 0.6552406549453735,
      "learning_rate": 8.075024874310604e-06,
      "loss": 0.0436,
      "step": 604280
    },
    {
      "epoch": 0.9889502039106328,
      "grad_norm": 1.2500295639038086,
      "learning_rate": 8.074958982097087e-06,
      "loss": 0.0329,
      "step": 604300
    },
    {
      "epoch": 0.9889829343492862,
      "grad_norm": 0.3518747389316559,
      "learning_rate": 8.074893089883569e-06,
      "loss": 0.0252,
      "step": 604320
    },
    {
      "epoch": 0.9890156647879395,
      "grad_norm": 1.1957762241363525,
      "learning_rate": 8.074827197670053e-06,
      "loss": 0.0247,
      "step": 604340
    },
    {
      "epoch": 0.9890483952265928,
      "grad_norm": 0.5844027996063232,
      "learning_rate": 8.074761305456535e-06,
      "loss": 0.0282,
      "step": 604360
    },
    {
      "epoch": 0.9890811256652462,
      "grad_norm": 0.47774258255958557,
      "learning_rate": 8.074695413243018e-06,
      "loss": 0.0398,
      "step": 604380
    },
    {
      "epoch": 0.9891138561038995,
      "grad_norm": 0.8734597563743591,
      "learning_rate": 8.074629521029502e-06,
      "loss": 0.0326,
      "step": 604400
    },
    {
      "epoch": 0.9891465865425528,
      "grad_norm": 0.8744085431098938,
      "learning_rate": 8.074563628815984e-06,
      "loss": 0.028,
      "step": 604420
    },
    {
      "epoch": 0.9891793169812062,
      "grad_norm": 0.9669787883758545,
      "learning_rate": 8.074497736602467e-06,
      "loss": 0.0245,
      "step": 604440
    },
    {
      "epoch": 0.9892120474198596,
      "grad_norm": 1.3020325899124146,
      "learning_rate": 8.074431844388949e-06,
      "loss": 0.0234,
      "step": 604460
    },
    {
      "epoch": 0.9892447778585128,
      "grad_norm": 0.6442553997039795,
      "learning_rate": 8.074365952175433e-06,
      "loss": 0.0217,
      "step": 604480
    },
    {
      "epoch": 0.9892775082971662,
      "grad_norm": 0.38682734966278076,
      "learning_rate": 8.074300059961915e-06,
      "loss": 0.0248,
      "step": 604500
    },
    {
      "epoch": 0.9893102387358196,
      "grad_norm": 0.24893780052661896,
      "learning_rate": 8.074234167748398e-06,
      "loss": 0.0206,
      "step": 604520
    },
    {
      "epoch": 0.9893429691744728,
      "grad_norm": 0.8580633997917175,
      "learning_rate": 8.07416827553488e-06,
      "loss": 0.032,
      "step": 604540
    },
    {
      "epoch": 0.9893756996131262,
      "grad_norm": 0.6398657560348511,
      "learning_rate": 8.074102383321364e-06,
      "loss": 0.0193,
      "step": 604560
    },
    {
      "epoch": 0.9894084300517796,
      "grad_norm": 2.6799728870391846,
      "learning_rate": 8.074036491107846e-06,
      "loss": 0.0362,
      "step": 604580
    },
    {
      "epoch": 0.9894411604904328,
      "grad_norm": 2.0361196994781494,
      "learning_rate": 8.073970598894329e-06,
      "loss": 0.026,
      "step": 604600
    },
    {
      "epoch": 0.9894738909290862,
      "grad_norm": 0.2541336119174957,
      "learning_rate": 8.073904706680811e-06,
      "loss": 0.0297,
      "step": 604620
    },
    {
      "epoch": 0.9895066213677396,
      "grad_norm": 0.37351128458976746,
      "learning_rate": 8.073838814467295e-06,
      "loss": 0.0359,
      "step": 604640
    },
    {
      "epoch": 0.989539351806393,
      "grad_norm": 0.6050093173980713,
      "learning_rate": 8.073772922253778e-06,
      "loss": 0.0242,
      "step": 604660
    },
    {
      "epoch": 0.9895720822450462,
      "grad_norm": 1.3901054859161377,
      "learning_rate": 8.07370703004026e-06,
      "loss": 0.0247,
      "step": 604680
    },
    {
      "epoch": 0.9896048126836996,
      "grad_norm": 0.4573255479335785,
      "learning_rate": 8.073641137826744e-06,
      "loss": 0.0233,
      "step": 604700
    },
    {
      "epoch": 0.989637543122353,
      "grad_norm": 1.7135114669799805,
      "learning_rate": 8.073575245613227e-06,
      "loss": 0.0238,
      "step": 604720
    },
    {
      "epoch": 0.9896702735610062,
      "grad_norm": 1.1676054000854492,
      "learning_rate": 8.073509353399709e-06,
      "loss": 0.0262,
      "step": 604740
    },
    {
      "epoch": 0.9897030039996596,
      "grad_norm": 0.2738632559776306,
      "learning_rate": 8.073443461186193e-06,
      "loss": 0.0156,
      "step": 604760
    },
    {
      "epoch": 0.989735734438313,
      "grad_norm": 0.4957289695739746,
      "learning_rate": 8.073377568972676e-06,
      "loss": 0.0249,
      "step": 604780
    },
    {
      "epoch": 0.9897684648769662,
      "grad_norm": 0.1719970554113388,
      "learning_rate": 8.073311676759158e-06,
      "loss": 0.029,
      "step": 604800
    },
    {
      "epoch": 0.9898011953156196,
      "grad_norm": 0.5742407441139221,
      "learning_rate": 8.073245784545642e-06,
      "loss": 0.023,
      "step": 604820
    },
    {
      "epoch": 0.989833925754273,
      "grad_norm": 0.6611279249191284,
      "learning_rate": 8.073179892332124e-06,
      "loss": 0.0148,
      "step": 604840
    },
    {
      "epoch": 0.9898666561929264,
      "grad_norm": 0.20293810963630676,
      "learning_rate": 8.073114000118607e-06,
      "loss": 0.0278,
      "step": 604860
    },
    {
      "epoch": 0.9898993866315796,
      "grad_norm": 0.5245400667190552,
      "learning_rate": 8.07304810790509e-06,
      "loss": 0.0301,
      "step": 604880
    },
    {
      "epoch": 0.989932117070233,
      "grad_norm": 0.3602302372455597,
      "learning_rate": 8.072982215691573e-06,
      "loss": 0.0198,
      "step": 604900
    },
    {
      "epoch": 0.9899648475088864,
      "grad_norm": 1.399489402770996,
      "learning_rate": 8.072916323478055e-06,
      "loss": 0.0333,
      "step": 604920
    },
    {
      "epoch": 0.9899975779475396,
      "grad_norm": 0.39145806431770325,
      "learning_rate": 8.072850431264538e-06,
      "loss": 0.0233,
      "step": 604940
    },
    {
      "epoch": 0.990030308386193,
      "grad_norm": 0.44831958413124084,
      "learning_rate": 8.07278453905102e-06,
      "loss": 0.0238,
      "step": 604960
    },
    {
      "epoch": 0.9900630388248464,
      "grad_norm": 1.0434224605560303,
      "learning_rate": 8.072718646837504e-06,
      "loss": 0.0339,
      "step": 604980
    },
    {
      "epoch": 0.9900957692634996,
      "grad_norm": 0.455571711063385,
      "learning_rate": 8.072652754623986e-06,
      "loss": 0.0317,
      "step": 605000
    },
    {
      "epoch": 0.990128499702153,
      "grad_norm": 2.608083963394165,
      "learning_rate": 8.07258686241047e-06,
      "loss": 0.0364,
      "step": 605020
    },
    {
      "epoch": 0.9901612301408064,
      "grad_norm": 0.7517668008804321,
      "learning_rate": 8.072520970196953e-06,
      "loss": 0.0322,
      "step": 605040
    },
    {
      "epoch": 0.9901939605794596,
      "grad_norm": 0.7444202899932861,
      "learning_rate": 8.072455077983435e-06,
      "loss": 0.0273,
      "step": 605060
    },
    {
      "epoch": 0.990226691018113,
      "grad_norm": 0.5074459910392761,
      "learning_rate": 8.072389185769918e-06,
      "loss": 0.0273,
      "step": 605080
    },
    {
      "epoch": 0.9902594214567664,
      "grad_norm": 0.8683751821517944,
      "learning_rate": 8.072323293556402e-06,
      "loss": 0.0317,
      "step": 605100
    },
    {
      "epoch": 0.9902921518954197,
      "grad_norm": 1.121355414390564,
      "learning_rate": 8.072257401342884e-06,
      "loss": 0.0311,
      "step": 605120
    },
    {
      "epoch": 0.990324882334073,
      "grad_norm": 0.3313802480697632,
      "learning_rate": 8.072191509129367e-06,
      "loss": 0.0217,
      "step": 605140
    },
    {
      "epoch": 0.9903576127727264,
      "grad_norm": 1.0353049039840698,
      "learning_rate": 8.072125616915851e-06,
      "loss": 0.0338,
      "step": 605160
    },
    {
      "epoch": 0.9903903432113798,
      "grad_norm": 1.052624225616455,
      "learning_rate": 8.072059724702333e-06,
      "loss": 0.0297,
      "step": 605180
    },
    {
      "epoch": 0.990423073650033,
      "grad_norm": 0.8072554469108582,
      "learning_rate": 8.071993832488816e-06,
      "loss": 0.0308,
      "step": 605200
    },
    {
      "epoch": 0.9904558040886864,
      "grad_norm": 0.37814125418663025,
      "learning_rate": 8.071927940275298e-06,
      "loss": 0.0344,
      "step": 605220
    },
    {
      "epoch": 0.9904885345273398,
      "grad_norm": 0.6305178999900818,
      "learning_rate": 8.071862048061782e-06,
      "loss": 0.0138,
      "step": 605240
    },
    {
      "epoch": 0.990521264965993,
      "grad_norm": 2.71743106842041,
      "learning_rate": 8.071796155848264e-06,
      "loss": 0.0344,
      "step": 605260
    },
    {
      "epoch": 0.9905539954046464,
      "grad_norm": 0.5167441368103027,
      "learning_rate": 8.071730263634747e-06,
      "loss": 0.0287,
      "step": 605280
    },
    {
      "epoch": 0.9905867258432998,
      "grad_norm": 1.0544852018356323,
      "learning_rate": 8.07166437142123e-06,
      "loss": 0.0281,
      "step": 605300
    },
    {
      "epoch": 0.9906194562819531,
      "grad_norm": 0.5117526650428772,
      "learning_rate": 8.071598479207713e-06,
      "loss": 0.0268,
      "step": 605320
    },
    {
      "epoch": 0.9906521867206064,
      "grad_norm": 0.55596923828125,
      "learning_rate": 8.071532586994195e-06,
      "loss": 0.0218,
      "step": 605340
    },
    {
      "epoch": 0.9906849171592598,
      "grad_norm": 0.15559282898902893,
      "learning_rate": 8.071466694780678e-06,
      "loss": 0.0234,
      "step": 605360
    },
    {
      "epoch": 0.9907176475979131,
      "grad_norm": 0.2391434907913208,
      "learning_rate": 8.07140080256716e-06,
      "loss": 0.0224,
      "step": 605380
    },
    {
      "epoch": 0.9907503780365664,
      "grad_norm": 0.5673677921295166,
      "learning_rate": 8.071334910353644e-06,
      "loss": 0.0278,
      "step": 605400
    },
    {
      "epoch": 0.9907831084752198,
      "grad_norm": 0.24029062688350677,
      "learning_rate": 8.071269018140126e-06,
      "loss": 0.0266,
      "step": 605420
    },
    {
      "epoch": 0.9908158389138731,
      "grad_norm": 1.0206838846206665,
      "learning_rate": 8.07120312592661e-06,
      "loss": 0.0234,
      "step": 605440
    },
    {
      "epoch": 0.9908485693525264,
      "grad_norm": 1.8583409786224365,
      "learning_rate": 8.071137233713093e-06,
      "loss": 0.0337,
      "step": 605460
    },
    {
      "epoch": 0.9908812997911798,
      "grad_norm": 0.1331576555967331,
      "learning_rate": 8.071071341499576e-06,
      "loss": 0.0259,
      "step": 605480
    },
    {
      "epoch": 0.9909140302298332,
      "grad_norm": 0.4689229130744934,
      "learning_rate": 8.071005449286058e-06,
      "loss": 0.0267,
      "step": 605500
    },
    {
      "epoch": 0.9909467606684865,
      "grad_norm": 0.5847957134246826,
      "learning_rate": 8.070939557072542e-06,
      "loss": 0.0282,
      "step": 605520
    },
    {
      "epoch": 0.9909794911071398,
      "grad_norm": 0.48382848501205444,
      "learning_rate": 8.070873664859026e-06,
      "loss": 0.0276,
      "step": 605540
    },
    {
      "epoch": 0.9910122215457932,
      "grad_norm": 0.866437554359436,
      "learning_rate": 8.070807772645507e-06,
      "loss": 0.0259,
      "step": 605560
    },
    {
      "epoch": 0.9910449519844465,
      "grad_norm": 9.026890754699707,
      "learning_rate": 8.070741880431991e-06,
      "loss": 0.0289,
      "step": 605580
    },
    {
      "epoch": 0.9910776824230998,
      "grad_norm": 0.6850886344909668,
      "learning_rate": 8.070675988218473e-06,
      "loss": 0.0221,
      "step": 605600
    },
    {
      "epoch": 0.9911104128617532,
      "grad_norm": 1.3753743171691895,
      "learning_rate": 8.070610096004956e-06,
      "loss": 0.0277,
      "step": 605620
    },
    {
      "epoch": 0.9911431433004065,
      "grad_norm": 0.1451910436153412,
      "learning_rate": 8.070544203791438e-06,
      "loss": 0.0179,
      "step": 605640
    },
    {
      "epoch": 0.9911758737390598,
      "grad_norm": 1.0931166410446167,
      "learning_rate": 8.070478311577922e-06,
      "loss": 0.0297,
      "step": 605660
    },
    {
      "epoch": 0.9912086041777132,
      "grad_norm": 0.7560516595840454,
      "learning_rate": 8.070412419364404e-06,
      "loss": 0.0403,
      "step": 605680
    },
    {
      "epoch": 0.9912413346163665,
      "grad_norm": 0.10251373052597046,
      "learning_rate": 8.070346527150887e-06,
      "loss": 0.0238,
      "step": 605700
    },
    {
      "epoch": 0.9912740650550199,
      "grad_norm": 0.08844749629497528,
      "learning_rate": 8.07028063493737e-06,
      "loss": 0.0296,
      "step": 605720
    },
    {
      "epoch": 0.9913067954936732,
      "grad_norm": 0.9603040814399719,
      "learning_rate": 8.070214742723853e-06,
      "loss": 0.0223,
      "step": 605740
    },
    {
      "epoch": 0.9913395259323265,
      "grad_norm": 2.028700351715088,
      "learning_rate": 8.070148850510335e-06,
      "loss": 0.0276,
      "step": 605760
    },
    {
      "epoch": 0.9913722563709799,
      "grad_norm": 3.3129961490631104,
      "learning_rate": 8.070082958296818e-06,
      "loss": 0.0274,
      "step": 605780
    },
    {
      "epoch": 0.9914049868096332,
      "grad_norm": 0.7824543118476868,
      "learning_rate": 8.0700170660833e-06,
      "loss": 0.0233,
      "step": 605800
    },
    {
      "epoch": 0.9914377172482866,
      "grad_norm": 0.7486721873283386,
      "learning_rate": 8.069951173869784e-06,
      "loss": 0.0259,
      "step": 605820
    },
    {
      "epoch": 0.9914704476869399,
      "grad_norm": 0.21604023873806,
      "learning_rate": 8.069885281656267e-06,
      "loss": 0.0322,
      "step": 605840
    },
    {
      "epoch": 0.9915031781255932,
      "grad_norm": 0.68186354637146,
      "learning_rate": 8.06981938944275e-06,
      "loss": 0.0288,
      "step": 605860
    },
    {
      "epoch": 0.9915359085642466,
      "grad_norm": 0.4049902856349945,
      "learning_rate": 8.069753497229233e-06,
      "loss": 0.0245,
      "step": 605880
    },
    {
      "epoch": 0.9915686390028999,
      "grad_norm": 0.7706247568130493,
      "learning_rate": 8.069687605015717e-06,
      "loss": 0.0283,
      "step": 605900
    },
    {
      "epoch": 0.9916013694415533,
      "grad_norm": 0.6922720074653625,
      "learning_rate": 8.069621712802198e-06,
      "loss": 0.0332,
      "step": 605920
    },
    {
      "epoch": 0.9916340998802066,
      "grad_norm": 1.072981357574463,
      "learning_rate": 8.069555820588682e-06,
      "loss": 0.0347,
      "step": 605940
    },
    {
      "epoch": 0.9916668303188599,
      "grad_norm": 0.9481187462806702,
      "learning_rate": 8.069489928375166e-06,
      "loss": 0.0248,
      "step": 605960
    },
    {
      "epoch": 0.9916995607575133,
      "grad_norm": 0.25361156463623047,
      "learning_rate": 8.069424036161647e-06,
      "loss": 0.0306,
      "step": 605980
    },
    {
      "epoch": 0.9917322911961666,
      "grad_norm": 1.708698034286499,
      "learning_rate": 8.069358143948131e-06,
      "loss": 0.0234,
      "step": 606000
    },
    {
      "epoch": 0.9917650216348199,
      "grad_norm": 0.2992591857910156,
      "learning_rate": 8.069292251734613e-06,
      "loss": 0.0224,
      "step": 606020
    },
    {
      "epoch": 0.9917977520734733,
      "grad_norm": 1.1693171262741089,
      "learning_rate": 8.069226359521097e-06,
      "loss": 0.0211,
      "step": 606040
    },
    {
      "epoch": 0.9918304825121266,
      "grad_norm": 0.7502577304840088,
      "learning_rate": 8.069160467307578e-06,
      "loss": 0.0308,
      "step": 606060
    },
    {
      "epoch": 0.99186321295078,
      "grad_norm": 1.185540795326233,
      "learning_rate": 8.069094575094062e-06,
      "loss": 0.0223,
      "step": 606080
    },
    {
      "epoch": 0.9918959433894333,
      "grad_norm": 0.9510523080825806,
      "learning_rate": 8.069028682880544e-06,
      "loss": 0.0299,
      "step": 606100
    },
    {
      "epoch": 0.9919286738280867,
      "grad_norm": 0.35065901279449463,
      "learning_rate": 8.068962790667028e-06,
      "loss": 0.0265,
      "step": 606120
    },
    {
      "epoch": 0.99196140426674,
      "grad_norm": 0.9734389185905457,
      "learning_rate": 8.06889689845351e-06,
      "loss": 0.0251,
      "step": 606140
    },
    {
      "epoch": 0.9919941347053933,
      "grad_norm": 0.26104438304901123,
      "learning_rate": 8.068831006239993e-06,
      "loss": 0.0189,
      "step": 606160
    },
    {
      "epoch": 0.9920268651440467,
      "grad_norm": 1.609896183013916,
      "learning_rate": 8.068765114026477e-06,
      "loss": 0.021,
      "step": 606180
    },
    {
      "epoch": 0.9920595955827,
      "grad_norm": 1.187135934829712,
      "learning_rate": 8.068699221812958e-06,
      "loss": 0.0288,
      "step": 606200
    },
    {
      "epoch": 0.9920923260213533,
      "grad_norm": 0.5031182169914246,
      "learning_rate": 8.068633329599442e-06,
      "loss": 0.0243,
      "step": 606220
    },
    {
      "epoch": 0.9921250564600067,
      "grad_norm": 0.608471155166626,
      "learning_rate": 8.068567437385924e-06,
      "loss": 0.0299,
      "step": 606240
    },
    {
      "epoch": 0.99215778689866,
      "grad_norm": 2.361427068710327,
      "learning_rate": 8.068501545172408e-06,
      "loss": 0.026,
      "step": 606260
    },
    {
      "epoch": 0.9921905173373133,
      "grad_norm": 0.5558742880821228,
      "learning_rate": 8.068435652958891e-06,
      "loss": 0.0244,
      "step": 606280
    },
    {
      "epoch": 0.9922232477759667,
      "grad_norm": 0.3850342631340027,
      "learning_rate": 8.068369760745373e-06,
      "loss": 0.0248,
      "step": 606300
    },
    {
      "epoch": 0.9922559782146201,
      "grad_norm": 0.7205862402915955,
      "learning_rate": 8.068303868531857e-06,
      "loss": 0.0242,
      "step": 606320
    },
    {
      "epoch": 0.9922887086532733,
      "grad_norm": 1.3607769012451172,
      "learning_rate": 8.06823797631834e-06,
      "loss": 0.0194,
      "step": 606340
    },
    {
      "epoch": 0.9923214390919267,
      "grad_norm": 0.46154889464378357,
      "learning_rate": 8.068172084104822e-06,
      "loss": 0.0281,
      "step": 606360
    },
    {
      "epoch": 0.9923541695305801,
      "grad_norm": 3.7816545963287354,
      "learning_rate": 8.068106191891306e-06,
      "loss": 0.0392,
      "step": 606380
    },
    {
      "epoch": 0.9923868999692333,
      "grad_norm": 4.663812160491943,
      "learning_rate": 8.068040299677788e-06,
      "loss": 0.0269,
      "step": 606400
    },
    {
      "epoch": 0.9924196304078867,
      "grad_norm": 0.17375347018241882,
      "learning_rate": 8.067974407464271e-06,
      "loss": 0.0375,
      "step": 606420
    },
    {
      "epoch": 0.9924523608465401,
      "grad_norm": 0.5620856285095215,
      "learning_rate": 8.067908515250753e-06,
      "loss": 0.0265,
      "step": 606440
    },
    {
      "epoch": 0.9924850912851934,
      "grad_norm": 1.2883481979370117,
      "learning_rate": 8.067842623037237e-06,
      "loss": 0.0235,
      "step": 606460
    },
    {
      "epoch": 0.9925178217238467,
      "grad_norm": 0.4484613835811615,
      "learning_rate": 8.067776730823719e-06,
      "loss": 0.0214,
      "step": 606480
    },
    {
      "epoch": 0.9925505521625001,
      "grad_norm": 1.0251320600509644,
      "learning_rate": 8.067710838610202e-06,
      "loss": 0.0242,
      "step": 606500
    },
    {
      "epoch": 0.9925832826011535,
      "grad_norm": 0.4347858428955078,
      "learning_rate": 8.067644946396686e-06,
      "loss": 0.0279,
      "step": 606520
    },
    {
      "epoch": 0.9926160130398067,
      "grad_norm": 0.3184017539024353,
      "learning_rate": 8.067579054183168e-06,
      "loss": 0.0237,
      "step": 606540
    },
    {
      "epoch": 0.9926487434784601,
      "grad_norm": 1.1720319986343384,
      "learning_rate": 8.067513161969651e-06,
      "loss": 0.019,
      "step": 606560
    },
    {
      "epoch": 0.9926814739171135,
      "grad_norm": 0.614104688167572,
      "learning_rate": 8.067447269756133e-06,
      "loss": 0.032,
      "step": 606580
    },
    {
      "epoch": 0.9927142043557667,
      "grad_norm": 1.0038634538650513,
      "learning_rate": 8.067381377542617e-06,
      "loss": 0.0199,
      "step": 606600
    },
    {
      "epoch": 0.9927469347944201,
      "grad_norm": 0.6017940640449524,
      "learning_rate": 8.067315485329099e-06,
      "loss": 0.0315,
      "step": 606620
    },
    {
      "epoch": 0.9927796652330735,
      "grad_norm": 0.7800701260566711,
      "learning_rate": 8.067249593115582e-06,
      "loss": 0.0253,
      "step": 606640
    },
    {
      "epoch": 0.9928123956717267,
      "grad_norm": 1.3088792562484741,
      "learning_rate": 8.067183700902064e-06,
      "loss": 0.0271,
      "step": 606660
    },
    {
      "epoch": 0.9928451261103801,
      "grad_norm": 0.11023068428039551,
      "learning_rate": 8.067117808688548e-06,
      "loss": 0.0268,
      "step": 606680
    },
    {
      "epoch": 0.9928778565490335,
      "grad_norm": 0.9973756670951843,
      "learning_rate": 8.067051916475031e-06,
      "loss": 0.0224,
      "step": 606700
    },
    {
      "epoch": 0.9929105869876869,
      "grad_norm": 1.1318453550338745,
      "learning_rate": 8.066986024261513e-06,
      "loss": 0.0315,
      "step": 606720
    },
    {
      "epoch": 0.9929433174263401,
      "grad_norm": 0.8642081022262573,
      "learning_rate": 8.066920132047997e-06,
      "loss": 0.0256,
      "step": 606740
    },
    {
      "epoch": 0.9929760478649935,
      "grad_norm": 0.3353133201599121,
      "learning_rate": 8.06685423983448e-06,
      "loss": 0.0429,
      "step": 606760
    },
    {
      "epoch": 0.9930087783036469,
      "grad_norm": 1.2907392978668213,
      "learning_rate": 8.066788347620962e-06,
      "loss": 0.0289,
      "step": 606780
    },
    {
      "epoch": 0.9930415087423001,
      "grad_norm": 1.3907581567764282,
      "learning_rate": 8.066722455407446e-06,
      "loss": 0.0307,
      "step": 606800
    },
    {
      "epoch": 0.9930742391809535,
      "grad_norm": 1.1475718021392822,
      "learning_rate": 8.066656563193928e-06,
      "loss": 0.0233,
      "step": 606820
    },
    {
      "epoch": 0.9931069696196069,
      "grad_norm": 0.6283518671989441,
      "learning_rate": 8.066590670980411e-06,
      "loss": 0.033,
      "step": 606840
    },
    {
      "epoch": 0.9931397000582601,
      "grad_norm": 0.2875696122646332,
      "learning_rate": 8.066524778766895e-06,
      "loss": 0.0294,
      "step": 606860
    },
    {
      "epoch": 0.9931724304969135,
      "grad_norm": 1.4150004386901855,
      "learning_rate": 8.066458886553377e-06,
      "loss": 0.0255,
      "step": 606880
    },
    {
      "epoch": 0.9932051609355669,
      "grad_norm": 0.47730761766433716,
      "learning_rate": 8.06639299433986e-06,
      "loss": 0.0274,
      "step": 606900
    },
    {
      "epoch": 0.9932378913742202,
      "grad_norm": 0.4565196931362152,
      "learning_rate": 8.066327102126342e-06,
      "loss": 0.024,
      "step": 606920
    },
    {
      "epoch": 0.9932706218128735,
      "grad_norm": 1.6649775505065918,
      "learning_rate": 8.066261209912826e-06,
      "loss": 0.0229,
      "step": 606940
    },
    {
      "epoch": 0.9933033522515269,
      "grad_norm": 0.3681179881095886,
      "learning_rate": 8.066195317699308e-06,
      "loss": 0.032,
      "step": 606960
    },
    {
      "epoch": 0.9933360826901803,
      "grad_norm": 0.7929077744483948,
      "learning_rate": 8.066129425485791e-06,
      "loss": 0.0214,
      "step": 606980
    },
    {
      "epoch": 0.9933688131288335,
      "grad_norm": 1.0711913108825684,
      "learning_rate": 8.066063533272273e-06,
      "loss": 0.026,
      "step": 607000
    },
    {
      "epoch": 0.9934015435674869,
      "grad_norm": 0.37190622091293335,
      "learning_rate": 8.065997641058757e-06,
      "loss": 0.0225,
      "step": 607020
    },
    {
      "epoch": 0.9934342740061403,
      "grad_norm": 1.3433377742767334,
      "learning_rate": 8.065931748845239e-06,
      "loss": 0.0253,
      "step": 607040
    },
    {
      "epoch": 0.9934670044447935,
      "grad_norm": 0.3379421532154083,
      "learning_rate": 8.065865856631722e-06,
      "loss": 0.0236,
      "step": 607060
    },
    {
      "epoch": 0.9934997348834469,
      "grad_norm": 0.390848845243454,
      "learning_rate": 8.065799964418206e-06,
      "loss": 0.029,
      "step": 607080
    },
    {
      "epoch": 0.9935324653221003,
      "grad_norm": 0.7871549129486084,
      "learning_rate": 8.065734072204688e-06,
      "loss": 0.0226,
      "step": 607100
    },
    {
      "epoch": 0.9935651957607536,
      "grad_norm": 0.405150830745697,
      "learning_rate": 8.065668179991171e-06,
      "loss": 0.0201,
      "step": 607120
    },
    {
      "epoch": 0.9935979261994069,
      "grad_norm": 2.082280397415161,
      "learning_rate": 8.065602287777655e-06,
      "loss": 0.0298,
      "step": 607140
    },
    {
      "epoch": 0.9936306566380603,
      "grad_norm": 0.9286414980888367,
      "learning_rate": 8.065536395564137e-06,
      "loss": 0.0341,
      "step": 607160
    },
    {
      "epoch": 0.9936633870767136,
      "grad_norm": 1.5625145435333252,
      "learning_rate": 8.06547050335062e-06,
      "loss": 0.0309,
      "step": 607180
    },
    {
      "epoch": 0.9936961175153669,
      "grad_norm": 0.41729864478111267,
      "learning_rate": 8.065404611137102e-06,
      "loss": 0.0226,
      "step": 607200
    },
    {
      "epoch": 0.9937288479540203,
      "grad_norm": 1.242246389389038,
      "learning_rate": 8.065338718923586e-06,
      "loss": 0.0312,
      "step": 607220
    },
    {
      "epoch": 0.9937615783926737,
      "grad_norm": 1.4289848804473877,
      "learning_rate": 8.06527282671007e-06,
      "loss": 0.03,
      "step": 607240
    },
    {
      "epoch": 0.9937943088313269,
      "grad_norm": 0.8117544651031494,
      "learning_rate": 8.065206934496551e-06,
      "loss": 0.0219,
      "step": 607260
    },
    {
      "epoch": 0.9938270392699803,
      "grad_norm": 1.1163207292556763,
      "learning_rate": 8.065141042283035e-06,
      "loss": 0.0305,
      "step": 607280
    },
    {
      "epoch": 0.9938597697086337,
      "grad_norm": 1.5233243703842163,
      "learning_rate": 8.065075150069517e-06,
      "loss": 0.0219,
      "step": 607300
    },
    {
      "epoch": 0.993892500147287,
      "grad_norm": 0.34989699721336365,
      "learning_rate": 8.065009257856e-06,
      "loss": 0.0248,
      "step": 607320
    },
    {
      "epoch": 0.9939252305859403,
      "grad_norm": 0.6046262383460999,
      "learning_rate": 8.064943365642482e-06,
      "loss": 0.0208,
      "step": 607340
    },
    {
      "epoch": 0.9939579610245937,
      "grad_norm": 0.7803442478179932,
      "learning_rate": 8.064877473428966e-06,
      "loss": 0.0274,
      "step": 607360
    },
    {
      "epoch": 0.993990691463247,
      "grad_norm": 0.9147172570228577,
      "learning_rate": 8.064811581215448e-06,
      "loss": 0.0316,
      "step": 607380
    },
    {
      "epoch": 0.9940234219019003,
      "grad_norm": 0.6932003498077393,
      "learning_rate": 8.064745689001931e-06,
      "loss": 0.0266,
      "step": 607400
    },
    {
      "epoch": 0.9940561523405537,
      "grad_norm": 2.2314348220825195,
      "learning_rate": 8.064679796788413e-06,
      "loss": 0.0256,
      "step": 607420
    },
    {
      "epoch": 0.994088882779207,
      "grad_norm": 0.3246833384037018,
      "learning_rate": 8.064613904574897e-06,
      "loss": 0.0277,
      "step": 607440
    },
    {
      "epoch": 0.9941216132178603,
      "grad_norm": 0.908197820186615,
      "learning_rate": 8.064548012361379e-06,
      "loss": 0.0245,
      "step": 607460
    },
    {
      "epoch": 0.9941543436565137,
      "grad_norm": 0.7757177352905273,
      "learning_rate": 8.064482120147862e-06,
      "loss": 0.022,
      "step": 607480
    },
    {
      "epoch": 0.994187074095167,
      "grad_norm": 0.3854769170284271,
      "learning_rate": 8.064416227934346e-06,
      "loss": 0.0309,
      "step": 607500
    },
    {
      "epoch": 0.9942198045338204,
      "grad_norm": 0.48449456691741943,
      "learning_rate": 8.064350335720828e-06,
      "loss": 0.0205,
      "step": 607520
    },
    {
      "epoch": 0.9942525349724737,
      "grad_norm": 0.44135886430740356,
      "learning_rate": 8.064284443507311e-06,
      "loss": 0.0223,
      "step": 607540
    },
    {
      "epoch": 0.994285265411127,
      "grad_norm": 1.0252032279968262,
      "learning_rate": 8.064218551293795e-06,
      "loss": 0.0188,
      "step": 607560
    },
    {
      "epoch": 0.9943179958497804,
      "grad_norm": 0.5266233682632446,
      "learning_rate": 8.064152659080277e-06,
      "loss": 0.0245,
      "step": 607580
    },
    {
      "epoch": 0.9943507262884337,
      "grad_norm": 1.2393165826797485,
      "learning_rate": 8.06408676686676e-06,
      "loss": 0.0331,
      "step": 607600
    },
    {
      "epoch": 0.9943834567270871,
      "grad_norm": 2.9785265922546387,
      "learning_rate": 8.064020874653244e-06,
      "loss": 0.0329,
      "step": 607620
    },
    {
      "epoch": 0.9944161871657404,
      "grad_norm": 0.8133003115653992,
      "learning_rate": 8.063954982439726e-06,
      "loss": 0.021,
      "step": 607640
    },
    {
      "epoch": 0.9944489176043937,
      "grad_norm": 1.0246769189834595,
      "learning_rate": 8.06388909022621e-06,
      "loss": 0.0329,
      "step": 607660
    },
    {
      "epoch": 0.9944816480430471,
      "grad_norm": 0.20034615695476532,
      "learning_rate": 8.063823198012691e-06,
      "loss": 0.032,
      "step": 607680
    },
    {
      "epoch": 0.9945143784817004,
      "grad_norm": 1.1166832447052002,
      "learning_rate": 8.063757305799175e-06,
      "loss": 0.0282,
      "step": 607700
    },
    {
      "epoch": 0.9945471089203537,
      "grad_norm": 0.6227537989616394,
      "learning_rate": 8.063691413585657e-06,
      "loss": 0.0192,
      "step": 607720
    },
    {
      "epoch": 0.9945798393590071,
      "grad_norm": 0.5264438986778259,
      "learning_rate": 8.06362552137214e-06,
      "loss": 0.0303,
      "step": 607740
    },
    {
      "epoch": 0.9946125697976604,
      "grad_norm": 0.3459028899669647,
      "learning_rate": 8.063559629158622e-06,
      "loss": 0.0312,
      "step": 607760
    },
    {
      "epoch": 0.9946453002363138,
      "grad_norm": 1.670217514038086,
      "learning_rate": 8.063493736945106e-06,
      "loss": 0.0323,
      "step": 607780
    },
    {
      "epoch": 0.9946780306749671,
      "grad_norm": 0.6293854117393494,
      "learning_rate": 8.063427844731588e-06,
      "loss": 0.0269,
      "step": 607800
    },
    {
      "epoch": 0.9947107611136204,
      "grad_norm": 0.565148651599884,
      "learning_rate": 8.063361952518071e-06,
      "loss": 0.0184,
      "step": 607820
    },
    {
      "epoch": 0.9947434915522738,
      "grad_norm": 0.09470181167125702,
      "learning_rate": 8.063296060304553e-06,
      "loss": 0.0235,
      "step": 607840
    },
    {
      "epoch": 0.9947762219909271,
      "grad_norm": 1.079236626625061,
      "learning_rate": 8.063230168091037e-06,
      "loss": 0.0301,
      "step": 607860
    },
    {
      "epoch": 0.9948089524295805,
      "grad_norm": 0.2771570682525635,
      "learning_rate": 8.06316427587752e-06,
      "loss": 0.0303,
      "step": 607880
    },
    {
      "epoch": 0.9948416828682338,
      "grad_norm": 0.4887157678604126,
      "learning_rate": 8.063098383664002e-06,
      "loss": 0.027,
      "step": 607900
    },
    {
      "epoch": 0.9948744133068871,
      "grad_norm": 0.22446347773075104,
      "learning_rate": 8.063032491450486e-06,
      "loss": 0.0237,
      "step": 607920
    },
    {
      "epoch": 0.9949071437455405,
      "grad_norm": 1.0626825094223022,
      "learning_rate": 8.06296659923697e-06,
      "loss": 0.0212,
      "step": 607940
    },
    {
      "epoch": 0.9949398741841938,
      "grad_norm": 0.379512220621109,
      "learning_rate": 8.062900707023451e-06,
      "loss": 0.0228,
      "step": 607960
    },
    {
      "epoch": 0.9949726046228472,
      "grad_norm": 4.509729385375977,
      "learning_rate": 8.062834814809935e-06,
      "loss": 0.0276,
      "step": 607980
    },
    {
      "epoch": 0.9950053350615005,
      "grad_norm": 1.6054904460906982,
      "learning_rate": 8.062768922596419e-06,
      "loss": 0.0262,
      "step": 608000
    },
    {
      "epoch": 0.9950380655001538,
      "grad_norm": 1.5798554420471191,
      "learning_rate": 8.0627030303829e-06,
      "loss": 0.0175,
      "step": 608020
    },
    {
      "epoch": 0.9950707959388072,
      "grad_norm": 1.4103690385818481,
      "learning_rate": 8.062637138169384e-06,
      "loss": 0.014,
      "step": 608040
    },
    {
      "epoch": 0.9951035263774605,
      "grad_norm": 0.3470999300479889,
      "learning_rate": 8.062571245955866e-06,
      "loss": 0.0317,
      "step": 608060
    },
    {
      "epoch": 0.9951362568161138,
      "grad_norm": 0.6952672600746155,
      "learning_rate": 8.06250535374235e-06,
      "loss": 0.03,
      "step": 608080
    },
    {
      "epoch": 0.9951689872547672,
      "grad_norm": 0.6813602447509766,
      "learning_rate": 8.062439461528831e-06,
      "loss": 0.0218,
      "step": 608100
    },
    {
      "epoch": 0.9952017176934205,
      "grad_norm": 1.5460774898529053,
      "learning_rate": 8.062373569315315e-06,
      "loss": 0.0262,
      "step": 608120
    },
    {
      "epoch": 0.9952344481320738,
      "grad_norm": 0.4654696583747864,
      "learning_rate": 8.062307677101797e-06,
      "loss": 0.0253,
      "step": 608140
    },
    {
      "epoch": 0.9952671785707272,
      "grad_norm": 1.3234368562698364,
      "learning_rate": 8.06224178488828e-06,
      "loss": 0.0256,
      "step": 608160
    },
    {
      "epoch": 0.9952999090093806,
      "grad_norm": 1.7293881177902222,
      "learning_rate": 8.062175892674762e-06,
      "loss": 0.0295,
      "step": 608180
    },
    {
      "epoch": 0.9953326394480339,
      "grad_norm": 0.8415185809135437,
      "learning_rate": 8.062110000461246e-06,
      "loss": 0.0245,
      "step": 608200
    },
    {
      "epoch": 0.9953653698866872,
      "grad_norm": 0.8993229866027832,
      "learning_rate": 8.062044108247728e-06,
      "loss": 0.0287,
      "step": 608220
    },
    {
      "epoch": 0.9953981003253406,
      "grad_norm": 1.5287230014801025,
      "learning_rate": 8.061978216034211e-06,
      "loss": 0.0187,
      "step": 608240
    },
    {
      "epoch": 0.9954308307639939,
      "grad_norm": 1.032782793045044,
      "learning_rate": 8.061912323820695e-06,
      "loss": 0.0262,
      "step": 608260
    },
    {
      "epoch": 0.9954635612026472,
      "grad_norm": 0.6193127632141113,
      "learning_rate": 8.061846431607177e-06,
      "loss": 0.0228,
      "step": 608280
    },
    {
      "epoch": 0.9954962916413006,
      "grad_norm": 0.7731715440750122,
      "learning_rate": 8.06178053939366e-06,
      "loss": 0.0317,
      "step": 608300
    },
    {
      "epoch": 0.9955290220799539,
      "grad_norm": 2.0250747203826904,
      "learning_rate": 8.061714647180144e-06,
      "loss": 0.0255,
      "step": 608320
    },
    {
      "epoch": 0.9955617525186072,
      "grad_norm": 0.5902040004730225,
      "learning_rate": 8.061648754966626e-06,
      "loss": 0.0234,
      "step": 608340
    },
    {
      "epoch": 0.9955944829572606,
      "grad_norm": 0.8987645506858826,
      "learning_rate": 8.06158286275311e-06,
      "loss": 0.0186,
      "step": 608360
    },
    {
      "epoch": 0.995627213395914,
      "grad_norm": 0.4924452304840088,
      "learning_rate": 8.061516970539593e-06,
      "loss": 0.0365,
      "step": 608380
    },
    {
      "epoch": 0.9956599438345672,
      "grad_norm": 1.6404119729995728,
      "learning_rate": 8.061451078326075e-06,
      "loss": 0.026,
      "step": 608400
    },
    {
      "epoch": 0.9956926742732206,
      "grad_norm": 1.0581142902374268,
      "learning_rate": 8.061385186112559e-06,
      "loss": 0.0329,
      "step": 608420
    },
    {
      "epoch": 0.995725404711874,
      "grad_norm": 0.8043068051338196,
      "learning_rate": 8.06131929389904e-06,
      "loss": 0.0175,
      "step": 608440
    },
    {
      "epoch": 0.9957581351505272,
      "grad_norm": 1.0151095390319824,
      "learning_rate": 8.061253401685524e-06,
      "loss": 0.0315,
      "step": 608460
    },
    {
      "epoch": 0.9957908655891806,
      "grad_norm": 1.5552314519882202,
      "learning_rate": 8.061187509472006e-06,
      "loss": 0.0234,
      "step": 608480
    },
    {
      "epoch": 0.995823596027834,
      "grad_norm": 0.7593640089035034,
      "learning_rate": 8.06112161725849e-06,
      "loss": 0.0265,
      "step": 608500
    },
    {
      "epoch": 0.9958563264664873,
      "grad_norm": 1.7763267755508423,
      "learning_rate": 8.061055725044972e-06,
      "loss": 0.0236,
      "step": 608520
    },
    {
      "epoch": 0.9958890569051406,
      "grad_norm": 1.065043330192566,
      "learning_rate": 8.060989832831455e-06,
      "loss": 0.0376,
      "step": 608540
    },
    {
      "epoch": 0.995921787343794,
      "grad_norm": 1.891223430633545,
      "learning_rate": 8.060923940617937e-06,
      "loss": 0.0291,
      "step": 608560
    },
    {
      "epoch": 0.9959545177824474,
      "grad_norm": 0.5224106311798096,
      "learning_rate": 8.06085804840442e-06,
      "loss": 0.0234,
      "step": 608580
    },
    {
      "epoch": 0.9959872482211006,
      "grad_norm": 1.2715773582458496,
      "learning_rate": 8.060792156190902e-06,
      "loss": 0.0261,
      "step": 608600
    },
    {
      "epoch": 0.996019978659754,
      "grad_norm": 0.47938072681427,
      "learning_rate": 8.060726263977386e-06,
      "loss": 0.0219,
      "step": 608620
    },
    {
      "epoch": 0.9960527090984074,
      "grad_norm": 0.5080609321594238,
      "learning_rate": 8.06066037176387e-06,
      "loss": 0.0202,
      "step": 608640
    },
    {
      "epoch": 0.9960854395370606,
      "grad_norm": 1.050761103630066,
      "learning_rate": 8.060594479550352e-06,
      "loss": 0.0234,
      "step": 608660
    },
    {
      "epoch": 0.996118169975714,
      "grad_norm": 0.9494441151618958,
      "learning_rate": 8.060528587336835e-06,
      "loss": 0.0336,
      "step": 608680
    },
    {
      "epoch": 0.9961509004143674,
      "grad_norm": 1.319980502128601,
      "learning_rate": 8.060462695123317e-06,
      "loss": 0.0297,
      "step": 608700
    },
    {
      "epoch": 0.9961836308530206,
      "grad_norm": 0.7032473087310791,
      "learning_rate": 8.0603968029098e-06,
      "loss": 0.0297,
      "step": 608720
    },
    {
      "epoch": 0.996216361291674,
      "grad_norm": 0.5513163208961487,
      "learning_rate": 8.060330910696284e-06,
      "loss": 0.0236,
      "step": 608740
    },
    {
      "epoch": 0.9962490917303274,
      "grad_norm": 2.275109052658081,
      "learning_rate": 8.060265018482766e-06,
      "loss": 0.0446,
      "step": 608760
    },
    {
      "epoch": 0.9962818221689808,
      "grad_norm": 0.22084295749664307,
      "learning_rate": 8.06019912626925e-06,
      "loss": 0.0255,
      "step": 608780
    },
    {
      "epoch": 0.996314552607634,
      "grad_norm": 2.0786867141723633,
      "learning_rate": 8.060133234055733e-06,
      "loss": 0.0257,
      "step": 608800
    },
    {
      "epoch": 0.9963472830462874,
      "grad_norm": 0.9669113755226135,
      "learning_rate": 8.060067341842215e-06,
      "loss": 0.0248,
      "step": 608820
    },
    {
      "epoch": 0.9963800134849408,
      "grad_norm": 0.4448190927505493,
      "learning_rate": 8.060001449628699e-06,
      "loss": 0.0269,
      "step": 608840
    },
    {
      "epoch": 0.996412743923594,
      "grad_norm": 1.5605570077896118,
      "learning_rate": 8.05993555741518e-06,
      "loss": 0.0227,
      "step": 608860
    },
    {
      "epoch": 0.9964454743622474,
      "grad_norm": 1.087622880935669,
      "learning_rate": 8.059869665201664e-06,
      "loss": 0.0276,
      "step": 608880
    },
    {
      "epoch": 0.9964782048009008,
      "grad_norm": 0.41539984941482544,
      "learning_rate": 8.059803772988146e-06,
      "loss": 0.0257,
      "step": 608900
    },
    {
      "epoch": 0.996510935239554,
      "grad_norm": 0.36466214060783386,
      "learning_rate": 8.05973788077463e-06,
      "loss": 0.0266,
      "step": 608920
    },
    {
      "epoch": 0.9965436656782074,
      "grad_norm": 1.229049801826477,
      "learning_rate": 8.059671988561112e-06,
      "loss": 0.0205,
      "step": 608940
    },
    {
      "epoch": 0.9965763961168608,
      "grad_norm": 0.3913511633872986,
      "learning_rate": 8.059606096347595e-06,
      "loss": 0.0275,
      "step": 608960
    },
    {
      "epoch": 0.9966091265555141,
      "grad_norm": 0.6265491843223572,
      "learning_rate": 8.059540204134079e-06,
      "loss": 0.0352,
      "step": 608980
    },
    {
      "epoch": 0.9966418569941674,
      "grad_norm": 0.8534446358680725,
      "learning_rate": 8.05947431192056e-06,
      "loss": 0.0245,
      "step": 609000
    },
    {
      "epoch": 0.9966745874328208,
      "grad_norm": 0.18249249458312988,
      "learning_rate": 8.059408419707044e-06,
      "loss": 0.0145,
      "step": 609020
    },
    {
      "epoch": 0.9967073178714742,
      "grad_norm": 1.512600064277649,
      "learning_rate": 8.059342527493526e-06,
      "loss": 0.0272,
      "step": 609040
    },
    {
      "epoch": 0.9967400483101274,
      "grad_norm": 1.3907203674316406,
      "learning_rate": 8.05927663528001e-06,
      "loss": 0.0232,
      "step": 609060
    },
    {
      "epoch": 0.9967727787487808,
      "grad_norm": 0.3198358714580536,
      "learning_rate": 8.059210743066492e-06,
      "loss": 0.0223,
      "step": 609080
    },
    {
      "epoch": 0.9968055091874342,
      "grad_norm": 0.10122410953044891,
      "learning_rate": 8.059144850852975e-06,
      "loss": 0.0145,
      "step": 609100
    },
    {
      "epoch": 0.9968382396260874,
      "grad_norm": 0.22587884962558746,
      "learning_rate": 8.059078958639459e-06,
      "loss": 0.0194,
      "step": 609120
    },
    {
      "epoch": 0.9968709700647408,
      "grad_norm": 1.1790711879730225,
      "learning_rate": 8.05901306642594e-06,
      "loss": 0.0271,
      "step": 609140
    },
    {
      "epoch": 0.9969037005033942,
      "grad_norm": 1.4916670322418213,
      "learning_rate": 8.058947174212424e-06,
      "loss": 0.0168,
      "step": 609160
    },
    {
      "epoch": 0.9969364309420475,
      "grad_norm": 0.4468793570995331,
      "learning_rate": 8.058881281998908e-06,
      "loss": 0.029,
      "step": 609180
    },
    {
      "epoch": 0.9969691613807008,
      "grad_norm": 1.371008276939392,
      "learning_rate": 8.05881538978539e-06,
      "loss": 0.0323,
      "step": 609200
    },
    {
      "epoch": 0.9970018918193542,
      "grad_norm": 0.23892197012901306,
      "learning_rate": 8.058749497571873e-06,
      "loss": 0.0345,
      "step": 609220
    },
    {
      "epoch": 0.9970346222580075,
      "grad_norm": 0.10669007152318954,
      "learning_rate": 8.058683605358355e-06,
      "loss": 0.0254,
      "step": 609240
    },
    {
      "epoch": 0.9970673526966608,
      "grad_norm": 0.4455398917198181,
      "learning_rate": 8.058617713144839e-06,
      "loss": 0.0348,
      "step": 609260
    },
    {
      "epoch": 0.9971000831353142,
      "grad_norm": 0.8302577137947083,
      "learning_rate": 8.05855182093132e-06,
      "loss": 0.0268,
      "step": 609280
    },
    {
      "epoch": 0.9971328135739675,
      "grad_norm": 0.3729323744773865,
      "learning_rate": 8.058485928717804e-06,
      "loss": 0.03,
      "step": 609300
    },
    {
      "epoch": 0.9971655440126208,
      "grad_norm": 0.6274260878562927,
      "learning_rate": 8.058420036504288e-06,
      "loss": 0.0196,
      "step": 609320
    },
    {
      "epoch": 0.9971982744512742,
      "grad_norm": 0.3157738149166107,
      "learning_rate": 8.05835414429077e-06,
      "loss": 0.03,
      "step": 609340
    },
    {
      "epoch": 0.9972310048899276,
      "grad_norm": 1.461518406867981,
      "learning_rate": 8.058288252077253e-06,
      "loss": 0.0264,
      "step": 609360
    },
    {
      "epoch": 0.9972637353285809,
      "grad_norm": 2.321272611618042,
      "learning_rate": 8.058222359863735e-06,
      "loss": 0.0308,
      "step": 609380
    },
    {
      "epoch": 0.9972964657672342,
      "grad_norm": 1.8643759489059448,
      "learning_rate": 8.058156467650219e-06,
      "loss": 0.0232,
      "step": 609400
    },
    {
      "epoch": 0.9973291962058876,
      "grad_norm": 1.0243124961853027,
      "learning_rate": 8.0580905754367e-06,
      "loss": 0.0223,
      "step": 609420
    },
    {
      "epoch": 0.9973619266445409,
      "grad_norm": 0.4832715392112732,
      "learning_rate": 8.058024683223184e-06,
      "loss": 0.0329,
      "step": 609440
    },
    {
      "epoch": 0.9973946570831942,
      "grad_norm": 0.3060508072376251,
      "learning_rate": 8.057958791009666e-06,
      "loss": 0.0272,
      "step": 609460
    },
    {
      "epoch": 0.9974273875218476,
      "grad_norm": 0.90623539686203,
      "learning_rate": 8.05789289879615e-06,
      "loss": 0.0232,
      "step": 609480
    },
    {
      "epoch": 0.9974601179605009,
      "grad_norm": 3.444385290145874,
      "learning_rate": 8.057827006582632e-06,
      "loss": 0.022,
      "step": 609500
    },
    {
      "epoch": 0.9974928483991542,
      "grad_norm": 2.026333808898926,
      "learning_rate": 8.057761114369115e-06,
      "loss": 0.0292,
      "step": 609520
    },
    {
      "epoch": 0.9975255788378076,
      "grad_norm": 0.916042685508728,
      "learning_rate": 8.057695222155599e-06,
      "loss": 0.0247,
      "step": 609540
    },
    {
      "epoch": 0.9975583092764609,
      "grad_norm": 0.2529047727584839,
      "learning_rate": 8.05762932994208e-06,
      "loss": 0.0269,
      "step": 609560
    },
    {
      "epoch": 0.9975910397151143,
      "grad_norm": 0.8669314980506897,
      "learning_rate": 8.057563437728564e-06,
      "loss": 0.0289,
      "step": 609580
    },
    {
      "epoch": 0.9976237701537676,
      "grad_norm": 0.5857068300247192,
      "learning_rate": 8.057497545515048e-06,
      "loss": 0.0234,
      "step": 609600
    },
    {
      "epoch": 0.997656500592421,
      "grad_norm": 1.2238774299621582,
      "learning_rate": 8.05743165330153e-06,
      "loss": 0.0322,
      "step": 609620
    },
    {
      "epoch": 0.9976892310310743,
      "grad_norm": 0.6445315480232239,
      "learning_rate": 8.057365761088013e-06,
      "loss": 0.0295,
      "step": 609640
    },
    {
      "epoch": 0.9977219614697276,
      "grad_norm": 1.4006062746047974,
      "learning_rate": 8.057299868874495e-06,
      "loss": 0.0225,
      "step": 609660
    },
    {
      "epoch": 0.997754691908381,
      "grad_norm": 0.4469965100288391,
      "learning_rate": 8.057233976660979e-06,
      "loss": 0.0285,
      "step": 609680
    },
    {
      "epoch": 0.9977874223470343,
      "grad_norm": 0.789824366569519,
      "learning_rate": 8.057168084447462e-06,
      "loss": 0.0243,
      "step": 609700
    },
    {
      "epoch": 0.9978201527856876,
      "grad_norm": 4.577872276306152,
      "learning_rate": 8.057102192233944e-06,
      "loss": 0.0336,
      "step": 609720
    },
    {
      "epoch": 0.997852883224341,
      "grad_norm": 0.21928253769874573,
      "learning_rate": 8.057036300020428e-06,
      "loss": 0.0374,
      "step": 609740
    },
    {
      "epoch": 0.9978856136629943,
      "grad_norm": 1.0597634315490723,
      "learning_rate": 8.05697040780691e-06,
      "loss": 0.0304,
      "step": 609760
    },
    {
      "epoch": 0.9979183441016477,
      "grad_norm": 0.6624665260314941,
      "learning_rate": 8.056904515593393e-06,
      "loss": 0.0351,
      "step": 609780
    },
    {
      "epoch": 0.997951074540301,
      "grad_norm": 0.13517019152641296,
      "learning_rate": 8.056838623379875e-06,
      "loss": 0.0198,
      "step": 609800
    },
    {
      "epoch": 0.9979838049789543,
      "grad_norm": 0.4880954325199127,
      "learning_rate": 8.056772731166359e-06,
      "loss": 0.0266,
      "step": 609820
    },
    {
      "epoch": 0.9980165354176077,
      "grad_norm": 0.9028589129447937,
      "learning_rate": 8.05670683895284e-06,
      "loss": 0.0252,
      "step": 609840
    },
    {
      "epoch": 0.998049265856261,
      "grad_norm": 1.3247829675674438,
      "learning_rate": 8.056640946739324e-06,
      "loss": 0.0234,
      "step": 609860
    },
    {
      "epoch": 0.9980819962949143,
      "grad_norm": 0.8500698804855347,
      "learning_rate": 8.056575054525806e-06,
      "loss": 0.0391,
      "step": 609880
    },
    {
      "epoch": 0.9981147267335677,
      "grad_norm": 1.247732162475586,
      "learning_rate": 8.05650916231229e-06,
      "loss": 0.0363,
      "step": 609900
    },
    {
      "epoch": 0.998147457172221,
      "grad_norm": 1.049010157585144,
      "learning_rate": 8.056443270098773e-06,
      "loss": 0.0285,
      "step": 609920
    },
    {
      "epoch": 0.9981801876108743,
      "grad_norm": 0.8458340167999268,
      "learning_rate": 8.056377377885255e-06,
      "loss": 0.0372,
      "step": 609940
    },
    {
      "epoch": 0.9982129180495277,
      "grad_norm": 0.42279958724975586,
      "learning_rate": 8.056311485671739e-06,
      "loss": 0.0207,
      "step": 609960
    },
    {
      "epoch": 0.9982456484881811,
      "grad_norm": 1.6009228229522705,
      "learning_rate": 8.056245593458223e-06,
      "loss": 0.0184,
      "step": 609980
    },
    {
      "epoch": 0.9982783789268344,
      "grad_norm": 1.1209635734558105,
      "learning_rate": 8.056179701244704e-06,
      "loss": 0.0317,
      "step": 610000
    },
    {
      "epoch": 0.9983111093654877,
      "grad_norm": 0.7974423766136169,
      "learning_rate": 8.056113809031188e-06,
      "loss": 0.0226,
      "step": 610020
    },
    {
      "epoch": 0.9983438398041411,
      "grad_norm": 0.8643284440040588,
      "learning_rate": 8.056047916817672e-06,
      "loss": 0.0407,
      "step": 610040
    },
    {
      "epoch": 0.9983765702427944,
      "grad_norm": 0.8365999460220337,
      "learning_rate": 8.055982024604153e-06,
      "loss": 0.0276,
      "step": 610060
    },
    {
      "epoch": 0.9984093006814477,
      "grad_norm": 0.8259379267692566,
      "learning_rate": 8.055916132390637e-06,
      "loss": 0.0272,
      "step": 610080
    },
    {
      "epoch": 0.9984420311201011,
      "grad_norm": 0.6267878413200378,
      "learning_rate": 8.055850240177119e-06,
      "loss": 0.0314,
      "step": 610100
    },
    {
      "epoch": 0.9984747615587544,
      "grad_norm": 0.8389390110969543,
      "learning_rate": 8.055784347963603e-06,
      "loss": 0.028,
      "step": 610120
    },
    {
      "epoch": 0.9985074919974077,
      "grad_norm": 0.5938450694084167,
      "learning_rate": 8.055718455750084e-06,
      "loss": 0.0231,
      "step": 610140
    },
    {
      "epoch": 0.9985402224360611,
      "grad_norm": 0.6682674288749695,
      "learning_rate": 8.055652563536568e-06,
      "loss": 0.0166,
      "step": 610160
    },
    {
      "epoch": 0.9985729528747145,
      "grad_norm": 1.1538684368133545,
      "learning_rate": 8.05558667132305e-06,
      "loss": 0.0155,
      "step": 610180
    },
    {
      "epoch": 0.9986056833133677,
      "grad_norm": 0.6526269316673279,
      "learning_rate": 8.055520779109534e-06,
      "loss": 0.0324,
      "step": 610200
    },
    {
      "epoch": 0.9986384137520211,
      "grad_norm": 0.448464959859848,
      "learning_rate": 8.055454886896015e-06,
      "loss": 0.0247,
      "step": 610220
    },
    {
      "epoch": 0.9986711441906745,
      "grad_norm": 0.23962877690792084,
      "learning_rate": 8.055388994682499e-06,
      "loss": 0.0291,
      "step": 610240
    },
    {
      "epoch": 0.9987038746293277,
      "grad_norm": 1.6971312761306763,
      "learning_rate": 8.055323102468981e-06,
      "loss": 0.0292,
      "step": 610260
    },
    {
      "epoch": 0.9987366050679811,
      "grad_norm": 2.2089343070983887,
      "learning_rate": 8.055257210255464e-06,
      "loss": 0.032,
      "step": 610280
    },
    {
      "epoch": 0.9987693355066345,
      "grad_norm": 1.5971896648406982,
      "learning_rate": 8.055191318041946e-06,
      "loss": 0.0162,
      "step": 610300
    },
    {
      "epoch": 0.9988020659452878,
      "grad_norm": 1.2974907159805298,
      "learning_rate": 8.05512542582843e-06,
      "loss": 0.0302,
      "step": 610320
    },
    {
      "epoch": 0.9988347963839411,
      "grad_norm": 0.3922971189022064,
      "learning_rate": 8.055059533614914e-06,
      "loss": 0.0241,
      "step": 610340
    },
    {
      "epoch": 0.9988675268225945,
      "grad_norm": 0.4517768621444702,
      "learning_rate": 8.054993641401395e-06,
      "loss": 0.0257,
      "step": 610360
    },
    {
      "epoch": 0.9989002572612479,
      "grad_norm": 1.6910912990570068,
      "learning_rate": 8.054927749187879e-06,
      "loss": 0.0301,
      "step": 610380
    },
    {
      "epoch": 0.9989329876999011,
      "grad_norm": 0.5467309951782227,
      "learning_rate": 8.054861856974363e-06,
      "loss": 0.023,
      "step": 610400
    },
    {
      "epoch": 0.9989657181385545,
      "grad_norm": 2.042813777923584,
      "learning_rate": 8.054795964760846e-06,
      "loss": 0.0353,
      "step": 610420
    },
    {
      "epoch": 0.9989984485772079,
      "grad_norm": 2.144059181213379,
      "learning_rate": 8.054730072547328e-06,
      "loss": 0.0212,
      "step": 610440
    },
    {
      "epoch": 0.9990311790158611,
      "grad_norm": 0.7129248380661011,
      "learning_rate": 8.054664180333812e-06,
      "loss": 0.0236,
      "step": 610460
    },
    {
      "epoch": 0.9990639094545145,
      "grad_norm": 3.4457366466522217,
      "learning_rate": 8.054598288120294e-06,
      "loss": 0.0368,
      "step": 610480
    },
    {
      "epoch": 0.9990966398931679,
      "grad_norm": 0.44381067156791687,
      "learning_rate": 8.054532395906777e-06,
      "loss": 0.0324,
      "step": 610500
    },
    {
      "epoch": 0.9991293703318211,
      "grad_norm": 1.2202900648117065,
      "learning_rate": 8.054466503693259e-06,
      "loss": 0.0363,
      "step": 610520
    },
    {
      "epoch": 0.9991621007704745,
      "grad_norm": 0.3375411331653595,
      "learning_rate": 8.054400611479743e-06,
      "loss": 0.0257,
      "step": 610540
    },
    {
      "epoch": 0.9991948312091279,
      "grad_norm": 0.6881308555603027,
      "learning_rate": 8.054334719266225e-06,
      "loss": 0.0245,
      "step": 610560
    },
    {
      "epoch": 0.9992275616477811,
      "grad_norm": 0.8021858930587769,
      "learning_rate": 8.054268827052708e-06,
      "loss": 0.0237,
      "step": 610580
    },
    {
      "epoch": 0.9992602920864345,
      "grad_norm": 0.45153412222862244,
      "learning_rate": 8.05420293483919e-06,
      "loss": 0.0261,
      "step": 610600
    },
    {
      "epoch": 0.9992930225250879,
      "grad_norm": 0.4223047196865082,
      "learning_rate": 8.054137042625674e-06,
      "loss": 0.0309,
      "step": 610620
    },
    {
      "epoch": 0.9993257529637413,
      "grad_norm": 4.439981937408447,
      "learning_rate": 8.054071150412155e-06,
      "loss": 0.0176,
      "step": 610640
    },
    {
      "epoch": 0.9993584834023945,
      "grad_norm": 1.1478434801101685,
      "learning_rate": 8.054005258198639e-06,
      "loss": 0.0229,
      "step": 610660
    },
    {
      "epoch": 0.9993912138410479,
      "grad_norm": 0.49439477920532227,
      "learning_rate": 8.053939365985121e-06,
      "loss": 0.0275,
      "step": 610680
    },
    {
      "epoch": 0.9994239442797013,
      "grad_norm": 1.1560293436050415,
      "learning_rate": 8.053873473771605e-06,
      "loss": 0.0291,
      "step": 610700
    },
    {
      "epoch": 0.9994566747183545,
      "grad_norm": 1.0712857246398926,
      "learning_rate": 8.053807581558088e-06,
      "loss": 0.0226,
      "step": 610720
    },
    {
      "epoch": 0.9994894051570079,
      "grad_norm": 0.20533022284507751,
      "learning_rate": 8.05374168934457e-06,
      "loss": 0.0254,
      "step": 610740
    },
    {
      "epoch": 0.9995221355956613,
      "grad_norm": 1.3106857538223267,
      "learning_rate": 8.053675797131054e-06,
      "loss": 0.0246,
      "step": 610760
    },
    {
      "epoch": 0.9995548660343145,
      "grad_norm": 0.3535586893558502,
      "learning_rate": 8.053609904917537e-06,
      "loss": 0.0248,
      "step": 610780
    },
    {
      "epoch": 0.9995875964729679,
      "grad_norm": 0.3732336163520813,
      "learning_rate": 8.053544012704019e-06,
      "loss": 0.0213,
      "step": 610800
    },
    {
      "epoch": 0.9996203269116213,
      "grad_norm": 1.150493860244751,
      "learning_rate": 8.053478120490503e-06,
      "loss": 0.0275,
      "step": 610820
    },
    {
      "epoch": 0.9996530573502747,
      "grad_norm": 0.9825506806373596,
      "learning_rate": 8.053412228276986e-06,
      "loss": 0.0177,
      "step": 610840
    },
    {
      "epoch": 0.9996857877889279,
      "grad_norm": 0.6164593696594238,
      "learning_rate": 8.053346336063468e-06,
      "loss": 0.0204,
      "step": 610860
    },
    {
      "epoch": 0.9997185182275813,
      "grad_norm": 0.8798714876174927,
      "learning_rate": 8.053280443849952e-06,
      "loss": 0.029,
      "step": 610880
    },
    {
      "epoch": 0.9997512486662347,
      "grad_norm": 0.03726286441087723,
      "learning_rate": 8.053214551636434e-06,
      "loss": 0.0226,
      "step": 610900
    },
    {
      "epoch": 0.9997839791048879,
      "grad_norm": 0.15296369791030884,
      "learning_rate": 8.053148659422917e-06,
      "loss": 0.0223,
      "step": 610920
    },
    {
      "epoch": 0.9998167095435413,
      "grad_norm": 0.9175844788551331,
      "learning_rate": 8.053082767209399e-06,
      "loss": 0.0208,
      "step": 610940
    },
    {
      "epoch": 0.9998494399821947,
      "grad_norm": 0.2283260077238083,
      "learning_rate": 8.053016874995883e-06,
      "loss": 0.021,
      "step": 610960
    },
    {
      "epoch": 0.9998821704208479,
      "grad_norm": 2.178366184234619,
      "learning_rate": 8.052950982782365e-06,
      "loss": 0.0331,
      "step": 610980
    },
    {
      "epoch": 0.9999149008595013,
      "grad_norm": 0.5524130463600159,
      "learning_rate": 8.052885090568848e-06,
      "loss": 0.025,
      "step": 611000
    },
    {
      "epoch": 0.9999476312981547,
      "grad_norm": 0.4245898425579071,
      "learning_rate": 8.05281919835533e-06,
      "loss": 0.0275,
      "step": 611020
    },
    {
      "epoch": 0.999980361736808,
      "grad_norm": 1.7761001586914062,
      "learning_rate": 8.052753306141814e-06,
      "loss": 0.0379,
      "step": 611040
    },
    {
      "epoch": 1.0000130921754613,
      "grad_norm": 0.9240568280220032,
      "learning_rate": 8.052687413928296e-06,
      "loss": 0.0255,
      "step": 611060
    },
    {
      "epoch": 1.0000458226141147,
      "grad_norm": 0.7852517366409302,
      "learning_rate": 8.052621521714779e-06,
      "loss": 0.0225,
      "step": 611080
    },
    {
      "epoch": 1.000078553052768,
      "grad_norm": 2.0391461849212646,
      "learning_rate": 8.052555629501263e-06,
      "loss": 0.0305,
      "step": 611100
    },
    {
      "epoch": 1.0001112834914214,
      "grad_norm": 1.4872798919677734,
      "learning_rate": 8.052489737287745e-06,
      "loss": 0.0165,
      "step": 611120
    },
    {
      "epoch": 1.0001440139300748,
      "grad_norm": 0.7298617362976074,
      "learning_rate": 8.052423845074228e-06,
      "loss": 0.0217,
      "step": 611140
    },
    {
      "epoch": 1.000176744368728,
      "grad_norm": 0.6519386768341064,
      "learning_rate": 8.052357952860712e-06,
      "loss": 0.0283,
      "step": 611160
    },
    {
      "epoch": 1.0002094748073813,
      "grad_norm": 0.40763717889785767,
      "learning_rate": 8.052292060647194e-06,
      "loss": 0.0168,
      "step": 611180
    },
    {
      "epoch": 1.0002422052460347,
      "grad_norm": 0.5209892988204956,
      "learning_rate": 8.052226168433677e-06,
      "loss": 0.0218,
      "step": 611200
    },
    {
      "epoch": 1.000274935684688,
      "grad_norm": 0.3192004859447479,
      "learning_rate": 8.052160276220161e-06,
      "loss": 0.0274,
      "step": 611220
    },
    {
      "epoch": 1.0003076661233414,
      "grad_norm": 0.189956933259964,
      "learning_rate": 8.052094384006643e-06,
      "loss": 0.0253,
      "step": 611240
    },
    {
      "epoch": 1.0003403965619948,
      "grad_norm": 1.2921150922775269,
      "learning_rate": 8.052028491793126e-06,
      "loss": 0.0248,
      "step": 611260
    },
    {
      "epoch": 1.000373127000648,
      "grad_norm": 0.19471876323223114,
      "learning_rate": 8.051962599579608e-06,
      "loss": 0.0283,
      "step": 611280
    },
    {
      "epoch": 1.0004058574393013,
      "grad_norm": 1.1226849555969238,
      "learning_rate": 8.051896707366092e-06,
      "loss": 0.0236,
      "step": 611300
    },
    {
      "epoch": 1.0004385878779547,
      "grad_norm": 0.7119356393814087,
      "learning_rate": 8.051830815152574e-06,
      "loss": 0.0202,
      "step": 611320
    },
    {
      "epoch": 1.000471318316608,
      "grad_norm": 0.9172895550727844,
      "learning_rate": 8.051764922939057e-06,
      "loss": 0.0257,
      "step": 611340
    },
    {
      "epoch": 1.0005040487552614,
      "grad_norm": 0.7441724538803101,
      "learning_rate": 8.05169903072554e-06,
      "loss": 0.022,
      "step": 611360
    },
    {
      "epoch": 1.0005367791939148,
      "grad_norm": 1.1724722385406494,
      "learning_rate": 8.051633138512023e-06,
      "loss": 0.0197,
      "step": 611380
    },
    {
      "epoch": 1.0005695096325682,
      "grad_norm": 3.3116486072540283,
      "learning_rate": 8.051567246298505e-06,
      "loss": 0.0163,
      "step": 611400
    },
    {
      "epoch": 1.0006022400712213,
      "grad_norm": 0.3155095875263214,
      "learning_rate": 8.051501354084988e-06,
      "loss": 0.0178,
      "step": 611420
    },
    {
      "epoch": 1.0006349705098747,
      "grad_norm": 0.40106263756752014,
      "learning_rate": 8.051435461871472e-06,
      "loss": 0.0168,
      "step": 611440
    },
    {
      "epoch": 1.000667700948528,
      "grad_norm": 0.5923861861228943,
      "learning_rate": 8.051369569657954e-06,
      "loss": 0.0302,
      "step": 611460
    },
    {
      "epoch": 1.0007004313871815,
      "grad_norm": 1.3923757076263428,
      "learning_rate": 8.051303677444437e-06,
      "loss": 0.0249,
      "step": 611480
    },
    {
      "epoch": 1.0007331618258348,
      "grad_norm": 1.4008541107177734,
      "learning_rate": 8.05123778523092e-06,
      "loss": 0.024,
      "step": 611500
    },
    {
      "epoch": 1.0007658922644882,
      "grad_norm": 0.39483511447906494,
      "learning_rate": 8.051171893017403e-06,
      "loss": 0.024,
      "step": 611520
    },
    {
      "epoch": 1.0007986227031416,
      "grad_norm": 2.063692331314087,
      "learning_rate": 8.051106000803885e-06,
      "loss": 0.0394,
      "step": 611540
    },
    {
      "epoch": 1.0008313531417947,
      "grad_norm": 2.810065269470215,
      "learning_rate": 8.051040108590368e-06,
      "loss": 0.0348,
      "step": 611560
    },
    {
      "epoch": 1.000864083580448,
      "grad_norm": 0.4262116849422455,
      "learning_rate": 8.050974216376852e-06,
      "loss": 0.0221,
      "step": 611580
    },
    {
      "epoch": 1.0008968140191015,
      "grad_norm": 0.6543436050415039,
      "learning_rate": 8.050908324163334e-06,
      "loss": 0.0298,
      "step": 611600
    },
    {
      "epoch": 1.0009295444577548,
      "grad_norm": 1.3067106008529663,
      "learning_rate": 8.050842431949817e-06,
      "loss": 0.0188,
      "step": 611620
    },
    {
      "epoch": 1.0009622748964082,
      "grad_norm": 0.7073385119438171,
      "learning_rate": 8.050776539736301e-06,
      "loss": 0.0195,
      "step": 611640
    },
    {
      "epoch": 1.0009950053350616,
      "grad_norm": 0.5339452028274536,
      "learning_rate": 8.050710647522783e-06,
      "loss": 0.0242,
      "step": 611660
    },
    {
      "epoch": 1.0010277357737147,
      "grad_norm": 0.5052847266197205,
      "learning_rate": 8.050644755309266e-06,
      "loss": 0.0186,
      "step": 611680
    },
    {
      "epoch": 1.001060466212368,
      "grad_norm": 0.6109067797660828,
      "learning_rate": 8.050578863095748e-06,
      "loss": 0.0224,
      "step": 611700
    },
    {
      "epoch": 1.0010931966510215,
      "grad_norm": 0.630236029624939,
      "learning_rate": 8.050512970882232e-06,
      "loss": 0.0337,
      "step": 611720
    },
    {
      "epoch": 1.0011259270896748,
      "grad_norm": 1.0055928230285645,
      "learning_rate": 8.050447078668714e-06,
      "loss": 0.0152,
      "step": 611740
    },
    {
      "epoch": 1.0011586575283282,
      "grad_norm": 0.5472261905670166,
      "learning_rate": 8.050381186455197e-06,
      "loss": 0.0269,
      "step": 611760
    },
    {
      "epoch": 1.0011913879669816,
      "grad_norm": 0.46241429448127747,
      "learning_rate": 8.05031529424168e-06,
      "loss": 0.0187,
      "step": 611780
    },
    {
      "epoch": 1.001224118405635,
      "grad_norm": 7.101539611816406,
      "learning_rate": 8.050249402028163e-06,
      "loss": 0.0367,
      "step": 611800
    },
    {
      "epoch": 1.0012568488442881,
      "grad_norm": 0.2979724407196045,
      "learning_rate": 8.050183509814646e-06,
      "loss": 0.0215,
      "step": 611820
    },
    {
      "epoch": 1.0012895792829415,
      "grad_norm": 1.2464056015014648,
      "learning_rate": 8.050117617601128e-06,
      "loss": 0.0223,
      "step": 611840
    },
    {
      "epoch": 1.0013223097215949,
      "grad_norm": 4.955302715301514,
      "learning_rate": 8.050051725387612e-06,
      "loss": 0.0243,
      "step": 611860
    },
    {
      "epoch": 1.0013550401602482,
      "grad_norm": 0.7553991079330444,
      "learning_rate": 8.049985833174094e-06,
      "loss": 0.0221,
      "step": 611880
    },
    {
      "epoch": 1.0013877705989016,
      "grad_norm": 1.1145031452178955,
      "learning_rate": 8.049919940960577e-06,
      "loss": 0.0218,
      "step": 611900
    },
    {
      "epoch": 1.001420501037555,
      "grad_norm": 9.1748046875,
      "learning_rate": 8.04985404874706e-06,
      "loss": 0.0249,
      "step": 611920
    },
    {
      "epoch": 1.0014532314762083,
      "grad_norm": 1.1059508323669434,
      "learning_rate": 8.049788156533543e-06,
      "loss": 0.0248,
      "step": 611940
    },
    {
      "epoch": 1.0014859619148615,
      "grad_norm": 0.9661370515823364,
      "learning_rate": 8.049722264320026e-06,
      "loss": 0.0257,
      "step": 611960
    },
    {
      "epoch": 1.0015186923535149,
      "grad_norm": 1.1343120336532593,
      "learning_rate": 8.049656372106508e-06,
      "loss": 0.0227,
      "step": 611980
    },
    {
      "epoch": 1.0015514227921682,
      "grad_norm": 1.633935809135437,
      "learning_rate": 8.049590479892992e-06,
      "loss": 0.018,
      "step": 612000
    },
    {
      "epoch": 1.0015841532308216,
      "grad_norm": 0.43959712982177734,
      "learning_rate": 8.049524587679476e-06,
      "loss": 0.0242,
      "step": 612020
    },
    {
      "epoch": 1.001616883669475,
      "grad_norm": 1.149544358253479,
      "learning_rate": 8.049458695465957e-06,
      "loss": 0.0299,
      "step": 612040
    },
    {
      "epoch": 1.0016496141081284,
      "grad_norm": 0.3235681653022766,
      "learning_rate": 8.049392803252441e-06,
      "loss": 0.025,
      "step": 612060
    },
    {
      "epoch": 1.0016823445467815,
      "grad_norm": 0.43708646297454834,
      "learning_rate": 8.049326911038923e-06,
      "loss": 0.0285,
      "step": 612080
    },
    {
      "epoch": 1.0017150749854349,
      "grad_norm": 0.18081755936145782,
      "learning_rate": 8.049261018825407e-06,
      "loss": 0.0269,
      "step": 612100
    },
    {
      "epoch": 1.0017478054240883,
      "grad_norm": 0.5437123775482178,
      "learning_rate": 8.049195126611888e-06,
      "loss": 0.023,
      "step": 612120
    },
    {
      "epoch": 1.0017805358627416,
      "grad_norm": 0.4291220009326935,
      "learning_rate": 8.049129234398372e-06,
      "loss": 0.0282,
      "step": 612140
    },
    {
      "epoch": 1.001813266301395,
      "grad_norm": 0.13301301002502441,
      "learning_rate": 8.049063342184856e-06,
      "loss": 0.0147,
      "step": 612160
    },
    {
      "epoch": 1.0018459967400484,
      "grad_norm": 0.48379382491111755,
      "learning_rate": 8.048997449971337e-06,
      "loss": 0.0184,
      "step": 612180
    },
    {
      "epoch": 1.0018787271787017,
      "grad_norm": 0.8801393508911133,
      "learning_rate": 8.048931557757821e-06,
      "loss": 0.0278,
      "step": 612200
    },
    {
      "epoch": 1.001911457617355,
      "grad_norm": 0.5979307889938354,
      "learning_rate": 8.048865665544303e-06,
      "loss": 0.0242,
      "step": 612220
    },
    {
      "epoch": 1.0019441880560083,
      "grad_norm": 1.742864727973938,
      "learning_rate": 8.048799773330787e-06,
      "loss": 0.0313,
      "step": 612240
    },
    {
      "epoch": 1.0019769184946616,
      "grad_norm": 2.296133041381836,
      "learning_rate": 8.048733881117268e-06,
      "loss": 0.0286,
      "step": 612260
    },
    {
      "epoch": 1.002009648933315,
      "grad_norm": 0.6819851398468018,
      "learning_rate": 8.048667988903752e-06,
      "loss": 0.021,
      "step": 612280
    },
    {
      "epoch": 1.0020423793719684,
      "grad_norm": 1.5749531984329224,
      "learning_rate": 8.048602096690234e-06,
      "loss": 0.0208,
      "step": 612300
    },
    {
      "epoch": 1.0020751098106218,
      "grad_norm": 0.2018677294254303,
      "learning_rate": 8.048536204476717e-06,
      "loss": 0.0205,
      "step": 612320
    },
    {
      "epoch": 1.0021078402492751,
      "grad_norm": 0.6576026678085327,
      "learning_rate": 8.0484703122632e-06,
      "loss": 0.0231,
      "step": 612340
    },
    {
      "epoch": 1.0021405706879283,
      "grad_norm": 0.39960548281669617,
      "learning_rate": 8.048404420049683e-06,
      "loss": 0.0237,
      "step": 612360
    },
    {
      "epoch": 1.0021733011265817,
      "grad_norm": 0.7667418718338013,
      "learning_rate": 8.048338527836167e-06,
      "loss": 0.0195,
      "step": 612380
    },
    {
      "epoch": 1.002206031565235,
      "grad_norm": 0.25288859009742737,
      "learning_rate": 8.048272635622648e-06,
      "loss": 0.0209,
      "step": 612400
    },
    {
      "epoch": 1.0022387620038884,
      "grad_norm": 1.6239742040634155,
      "learning_rate": 8.048206743409132e-06,
      "loss": 0.0269,
      "step": 612420
    },
    {
      "epoch": 1.0022714924425418,
      "grad_norm": 1.5494604110717773,
      "learning_rate": 8.048140851195616e-06,
      "loss": 0.0172,
      "step": 612440
    },
    {
      "epoch": 1.0023042228811951,
      "grad_norm": 0.4858291745185852,
      "learning_rate": 8.048074958982098e-06,
      "loss": 0.0265,
      "step": 612460
    },
    {
      "epoch": 1.0023369533198483,
      "grad_norm": 0.644912838935852,
      "learning_rate": 8.048009066768581e-06,
      "loss": 0.0207,
      "step": 612480
    },
    {
      "epoch": 1.0023696837585017,
      "grad_norm": 1.2014150619506836,
      "learning_rate": 8.047943174555065e-06,
      "loss": 0.0245,
      "step": 612500
    },
    {
      "epoch": 1.002402414197155,
      "grad_norm": 0.45362764596939087,
      "learning_rate": 8.047877282341547e-06,
      "loss": 0.0215,
      "step": 612520
    },
    {
      "epoch": 1.0024351446358084,
      "grad_norm": 0.9324662685394287,
      "learning_rate": 8.04781139012803e-06,
      "loss": 0.0273,
      "step": 612540
    },
    {
      "epoch": 1.0024678750744618,
      "grad_norm": 0.4435003697872162,
      "learning_rate": 8.047745497914512e-06,
      "loss": 0.0306,
      "step": 612560
    },
    {
      "epoch": 1.0025006055131152,
      "grad_norm": 0.5036036372184753,
      "learning_rate": 8.047679605700996e-06,
      "loss": 0.027,
      "step": 612580
    },
    {
      "epoch": 1.0025333359517685,
      "grad_norm": 1.605676293373108,
      "learning_rate": 8.047613713487478e-06,
      "loss": 0.0243,
      "step": 612600
    },
    {
      "epoch": 1.0025660663904217,
      "grad_norm": 1.0109727382659912,
      "learning_rate": 8.047547821273961e-06,
      "loss": 0.0255,
      "step": 612620
    },
    {
      "epoch": 1.002598796829075,
      "grad_norm": 0.29057547450065613,
      "learning_rate": 8.047481929060443e-06,
      "loss": 0.0203,
      "step": 612640
    },
    {
      "epoch": 1.0026315272677284,
      "grad_norm": 0.7390971183776855,
      "learning_rate": 8.047416036846927e-06,
      "loss": 0.0266,
      "step": 612660
    },
    {
      "epoch": 1.0026642577063818,
      "grad_norm": 0.3818065822124481,
      "learning_rate": 8.047350144633408e-06,
      "loss": 0.0175,
      "step": 612680
    },
    {
      "epoch": 1.0026969881450352,
      "grad_norm": 0.3765552043914795,
      "learning_rate": 8.047284252419892e-06,
      "loss": 0.0308,
      "step": 612700
    },
    {
      "epoch": 1.0027297185836885,
      "grad_norm": 1.2789356708526611,
      "learning_rate": 8.047218360206374e-06,
      "loss": 0.0254,
      "step": 612720
    },
    {
      "epoch": 1.002762449022342,
      "grad_norm": 0.49736058712005615,
      "learning_rate": 8.047152467992858e-06,
      "loss": 0.0203,
      "step": 612740
    },
    {
      "epoch": 1.002795179460995,
      "grad_norm": 0.4835604727268219,
      "learning_rate": 8.047086575779341e-06,
      "loss": 0.0237,
      "step": 612760
    },
    {
      "epoch": 1.0028279098996484,
      "grad_norm": 0.5849551558494568,
      "learning_rate": 8.047020683565823e-06,
      "loss": 0.0299,
      "step": 612780
    },
    {
      "epoch": 1.0028606403383018,
      "grad_norm": 1.321415901184082,
      "learning_rate": 8.046954791352307e-06,
      "loss": 0.0291,
      "step": 612800
    },
    {
      "epoch": 1.0028933707769552,
      "grad_norm": 0.2780528962612152,
      "learning_rate": 8.04688889913879e-06,
      "loss": 0.0226,
      "step": 612820
    },
    {
      "epoch": 1.0029261012156085,
      "grad_norm": 0.4650755524635315,
      "learning_rate": 8.046823006925272e-06,
      "loss": 0.0185,
      "step": 612840
    },
    {
      "epoch": 1.002958831654262,
      "grad_norm": 0.6389074921607971,
      "learning_rate": 8.046757114711756e-06,
      "loss": 0.0184,
      "step": 612860
    },
    {
      "epoch": 1.002991562092915,
      "grad_norm": 0.9917858839035034,
      "learning_rate": 8.04669122249824e-06,
      "loss": 0.0228,
      "step": 612880
    },
    {
      "epoch": 1.0030242925315684,
      "grad_norm": 0.6070234179496765,
      "learning_rate": 8.046625330284721e-06,
      "loss": 0.0232,
      "step": 612900
    },
    {
      "epoch": 1.0030570229702218,
      "grad_norm": 0.9089797735214233,
      "learning_rate": 8.046559438071205e-06,
      "loss": 0.0174,
      "step": 612920
    },
    {
      "epoch": 1.0030897534088752,
      "grad_norm": 0.9251002073287964,
      "learning_rate": 8.046493545857687e-06,
      "loss": 0.0343,
      "step": 612940
    },
    {
      "epoch": 1.0031224838475286,
      "grad_norm": 0.20450523495674133,
      "learning_rate": 8.04642765364417e-06,
      "loss": 0.0253,
      "step": 612960
    },
    {
      "epoch": 1.003155214286182,
      "grad_norm": 0.36471080780029297,
      "learning_rate": 8.046361761430652e-06,
      "loss": 0.023,
      "step": 612980
    },
    {
      "epoch": 1.0031879447248353,
      "grad_norm": 0.665977954864502,
      "learning_rate": 8.046295869217136e-06,
      "loss": 0.024,
      "step": 613000
    },
    {
      "epoch": 1.0032206751634885,
      "grad_norm": 0.2719896733760834,
      "learning_rate": 8.046229977003618e-06,
      "loss": 0.0198,
      "step": 613020
    },
    {
      "epoch": 1.0032534056021418,
      "grad_norm": 0.10373970866203308,
      "learning_rate": 8.046164084790101e-06,
      "loss": 0.0196,
      "step": 613040
    },
    {
      "epoch": 1.0032861360407952,
      "grad_norm": 0.8805432915687561,
      "learning_rate": 8.046098192576583e-06,
      "loss": 0.0277,
      "step": 613060
    },
    {
      "epoch": 1.0033188664794486,
      "grad_norm": 1.3283483982086182,
      "learning_rate": 8.046032300363067e-06,
      "loss": 0.0239,
      "step": 613080
    },
    {
      "epoch": 1.003351596918102,
      "grad_norm": 0.8840759992599487,
      "learning_rate": 8.045966408149549e-06,
      "loss": 0.0203,
      "step": 613100
    },
    {
      "epoch": 1.0033843273567553,
      "grad_norm": 0.24194669723510742,
      "learning_rate": 8.045900515936032e-06,
      "loss": 0.0241,
      "step": 613120
    },
    {
      "epoch": 1.0034170577954087,
      "grad_norm": 0.904923141002655,
      "learning_rate": 8.045834623722514e-06,
      "loss": 0.0212,
      "step": 613140
    },
    {
      "epoch": 1.0034497882340618,
      "grad_norm": 0.7881981134414673,
      "learning_rate": 8.045768731508998e-06,
      "loss": 0.0232,
      "step": 613160
    },
    {
      "epoch": 1.0034825186727152,
      "grad_norm": 0.37115153670310974,
      "learning_rate": 8.045702839295481e-06,
      "loss": 0.026,
      "step": 613180
    },
    {
      "epoch": 1.0035152491113686,
      "grad_norm": 0.8964555263519287,
      "learning_rate": 8.045636947081965e-06,
      "loss": 0.0248,
      "step": 613200
    },
    {
      "epoch": 1.003547979550022,
      "grad_norm": 0.1752842217683792,
      "learning_rate": 8.045571054868447e-06,
      "loss": 0.0203,
      "step": 613220
    },
    {
      "epoch": 1.0035807099886753,
      "grad_norm": 0.6572861671447754,
      "learning_rate": 8.04550516265493e-06,
      "loss": 0.0334,
      "step": 613240
    },
    {
      "epoch": 1.0036134404273287,
      "grad_norm": 0.6012293100357056,
      "learning_rate": 8.045439270441414e-06,
      "loss": 0.0237,
      "step": 613260
    },
    {
      "epoch": 1.0036461708659818,
      "grad_norm": 0.3030131757259369,
      "learning_rate": 8.045373378227896e-06,
      "loss": 0.0147,
      "step": 613280
    },
    {
      "epoch": 1.0036789013046352,
      "grad_norm": 0.46922773122787476,
      "learning_rate": 8.04530748601438e-06,
      "loss": 0.027,
      "step": 613300
    },
    {
      "epoch": 1.0037116317432886,
      "grad_norm": 0.8448807001113892,
      "learning_rate": 8.045241593800861e-06,
      "loss": 0.0318,
      "step": 613320
    },
    {
      "epoch": 1.003744362181942,
      "grad_norm": 0.6004451513290405,
      "learning_rate": 8.045175701587345e-06,
      "loss": 0.022,
      "step": 613340
    },
    {
      "epoch": 1.0037770926205953,
      "grad_norm": 1.2104283571243286,
      "learning_rate": 8.045109809373827e-06,
      "loss": 0.0233,
      "step": 613360
    },
    {
      "epoch": 1.0038098230592487,
      "grad_norm": 1.1923775672912598,
      "learning_rate": 8.04504391716031e-06,
      "loss": 0.0155,
      "step": 613380
    },
    {
      "epoch": 1.003842553497902,
      "grad_norm": 0.4220934510231018,
      "learning_rate": 8.044978024946792e-06,
      "loss": 0.0217,
      "step": 613400
    },
    {
      "epoch": 1.0038752839365552,
      "grad_norm": 1.126142144203186,
      "learning_rate": 8.044912132733276e-06,
      "loss": 0.0244,
      "step": 613420
    },
    {
      "epoch": 1.0039080143752086,
      "grad_norm": 0.6064876914024353,
      "learning_rate": 8.044846240519758e-06,
      "loss": 0.0229,
      "step": 613440
    },
    {
      "epoch": 1.003940744813862,
      "grad_norm": 0.6026327610015869,
      "learning_rate": 8.044780348306241e-06,
      "loss": 0.0289,
      "step": 613460
    },
    {
      "epoch": 1.0039734752525153,
      "grad_norm": 0.7014438509941101,
      "learning_rate": 8.044714456092723e-06,
      "loss": 0.0193,
      "step": 613480
    },
    {
      "epoch": 1.0040062056911687,
      "grad_norm": 0.4766727685928345,
      "learning_rate": 8.044648563879207e-06,
      "loss": 0.015,
      "step": 613500
    },
    {
      "epoch": 1.004038936129822,
      "grad_norm": 1.2113909721374512,
      "learning_rate": 8.044582671665689e-06,
      "loss": 0.0267,
      "step": 613520
    },
    {
      "epoch": 1.0040716665684752,
      "grad_norm": 0.2725053131580353,
      "learning_rate": 8.044516779452172e-06,
      "loss": 0.0262,
      "step": 613540
    },
    {
      "epoch": 1.0041043970071286,
      "grad_norm": 0.37239962816238403,
      "learning_rate": 8.044450887238656e-06,
      "loss": 0.0148,
      "step": 613560
    },
    {
      "epoch": 1.004137127445782,
      "grad_norm": 0.7459746599197388,
      "learning_rate": 8.044384995025138e-06,
      "loss": 0.0208,
      "step": 613580
    },
    {
      "epoch": 1.0041698578844354,
      "grad_norm": 0.8634003400802612,
      "learning_rate": 8.044319102811621e-06,
      "loss": 0.027,
      "step": 613600
    },
    {
      "epoch": 1.0042025883230887,
      "grad_norm": 0.5747470259666443,
      "learning_rate": 8.044253210598105e-06,
      "loss": 0.0226,
      "step": 613620
    },
    {
      "epoch": 1.004235318761742,
      "grad_norm": 0.8588039875030518,
      "learning_rate": 8.044187318384587e-06,
      "loss": 0.0262,
      "step": 613640
    },
    {
      "epoch": 1.0042680492003955,
      "grad_norm": 0.33810147643089294,
      "learning_rate": 8.04412142617107e-06,
      "loss": 0.0227,
      "step": 613660
    },
    {
      "epoch": 1.0043007796390486,
      "grad_norm": 0.23745490610599518,
      "learning_rate": 8.044055533957554e-06,
      "loss": 0.0243,
      "step": 613680
    },
    {
      "epoch": 1.004333510077702,
      "grad_norm": 0.9688998460769653,
      "learning_rate": 8.043989641744036e-06,
      "loss": 0.0239,
      "step": 613700
    },
    {
      "epoch": 1.0043662405163554,
      "grad_norm": 0.128213033080101,
      "learning_rate": 8.04392374953052e-06,
      "loss": 0.0203,
      "step": 613720
    },
    {
      "epoch": 1.0043989709550087,
      "grad_norm": 0.773291826248169,
      "learning_rate": 8.043857857317001e-06,
      "loss": 0.0243,
      "step": 613740
    },
    {
      "epoch": 1.0044317013936621,
      "grad_norm": 0.3302006721496582,
      "learning_rate": 8.043791965103485e-06,
      "loss": 0.0161,
      "step": 613760
    },
    {
      "epoch": 1.0044644318323155,
      "grad_norm": 0.8572946786880493,
      "learning_rate": 8.043726072889967e-06,
      "loss": 0.0231,
      "step": 613780
    },
    {
      "epoch": 1.0044971622709689,
      "grad_norm": 0.214640274643898,
      "learning_rate": 8.04366018067645e-06,
      "loss": 0.0266,
      "step": 613800
    },
    {
      "epoch": 1.004529892709622,
      "grad_norm": 0.5103539824485779,
      "learning_rate": 8.043594288462932e-06,
      "loss": 0.0247,
      "step": 613820
    },
    {
      "epoch": 1.0045626231482754,
      "grad_norm": 0.6517921090126038,
      "learning_rate": 8.043528396249416e-06,
      "loss": 0.0251,
      "step": 613840
    },
    {
      "epoch": 1.0045953535869288,
      "grad_norm": 0.28421029448509216,
      "learning_rate": 8.043462504035898e-06,
      "loss": 0.0155,
      "step": 613860
    },
    {
      "epoch": 1.0046280840255821,
      "grad_norm": 0.6664313077926636,
      "learning_rate": 8.043396611822381e-06,
      "loss": 0.0234,
      "step": 613880
    },
    {
      "epoch": 1.0046608144642355,
      "grad_norm": 0.4565615952014923,
      "learning_rate": 8.043330719608865e-06,
      "loss": 0.0256,
      "step": 613900
    },
    {
      "epoch": 1.0046935449028889,
      "grad_norm": 0.49557003378868103,
      "learning_rate": 8.043264827395347e-06,
      "loss": 0.0229,
      "step": 613920
    },
    {
      "epoch": 1.004726275341542,
      "grad_norm": 0.5496581792831421,
      "learning_rate": 8.04319893518183e-06,
      "loss": 0.0175,
      "step": 613940
    },
    {
      "epoch": 1.0047590057801954,
      "grad_norm": 1.4881176948547363,
      "learning_rate": 8.043133042968312e-06,
      "loss": 0.0252,
      "step": 613960
    },
    {
      "epoch": 1.0047917362188488,
      "grad_norm": 1.6976335048675537,
      "learning_rate": 8.043067150754796e-06,
      "loss": 0.0287,
      "step": 613980
    },
    {
      "epoch": 1.0048244666575021,
      "grad_norm": 0.7198834419250488,
      "learning_rate": 8.04300125854128e-06,
      "loss": 0.0272,
      "step": 614000
    },
    {
      "epoch": 1.0048571970961555,
      "grad_norm": 0.727559506893158,
      "learning_rate": 8.042935366327761e-06,
      "loss": 0.0213,
      "step": 614020
    },
    {
      "epoch": 1.0048899275348089,
      "grad_norm": 0.3874017894268036,
      "learning_rate": 8.042869474114245e-06,
      "loss": 0.027,
      "step": 614040
    },
    {
      "epoch": 1.0049226579734623,
      "grad_norm": 0.1819138079881668,
      "learning_rate": 8.042803581900729e-06,
      "loss": 0.027,
      "step": 614060
    },
    {
      "epoch": 1.0049553884121154,
      "grad_norm": 0.18866975605487823,
      "learning_rate": 8.04273768968721e-06,
      "loss": 0.0302,
      "step": 614080
    },
    {
      "epoch": 1.0049881188507688,
      "grad_norm": 1.5825008153915405,
      "learning_rate": 8.042671797473694e-06,
      "loss": 0.0248,
      "step": 614100
    },
    {
      "epoch": 1.0050208492894221,
      "grad_norm": 1.274532675743103,
      "learning_rate": 8.042605905260176e-06,
      "loss": 0.0288,
      "step": 614120
    },
    {
      "epoch": 1.0050535797280755,
      "grad_norm": 1.7961612939834595,
      "learning_rate": 8.04254001304666e-06,
      "loss": 0.0234,
      "step": 614140
    },
    {
      "epoch": 1.005086310166729,
      "grad_norm": 0.3283461034297943,
      "learning_rate": 8.042474120833141e-06,
      "loss": 0.0219,
      "step": 614160
    },
    {
      "epoch": 1.0051190406053823,
      "grad_norm": 1.1363192796707153,
      "learning_rate": 8.042408228619625e-06,
      "loss": 0.0187,
      "step": 614180
    },
    {
      "epoch": 1.0051517710440356,
      "grad_norm": 1.2280784845352173,
      "learning_rate": 8.042342336406107e-06,
      "loss": 0.0273,
      "step": 614200
    },
    {
      "epoch": 1.0051845014826888,
      "grad_norm": 0.6705624461174011,
      "learning_rate": 8.04227644419259e-06,
      "loss": 0.0322,
      "step": 614220
    },
    {
      "epoch": 1.0052172319213422,
      "grad_norm": 0.4338054656982422,
      "learning_rate": 8.042210551979072e-06,
      "loss": 0.0228,
      "step": 614240
    },
    {
      "epoch": 1.0052499623599955,
      "grad_norm": 0.3609202802181244,
      "learning_rate": 8.042144659765556e-06,
      "loss": 0.0226,
      "step": 614260
    },
    {
      "epoch": 1.005282692798649,
      "grad_norm": 0.649734377861023,
      "learning_rate": 8.04207876755204e-06,
      "loss": 0.0327,
      "step": 614280
    },
    {
      "epoch": 1.0053154232373023,
      "grad_norm": 0.9237532019615173,
      "learning_rate": 8.042012875338521e-06,
      "loss": 0.0191,
      "step": 614300
    },
    {
      "epoch": 1.0053481536759556,
      "grad_norm": 0.5441027283668518,
      "learning_rate": 8.041946983125005e-06,
      "loss": 0.0236,
      "step": 614320
    },
    {
      "epoch": 1.0053808841146088,
      "grad_norm": 0.6520411372184753,
      "learning_rate": 8.041881090911487e-06,
      "loss": 0.0283,
      "step": 614340
    },
    {
      "epoch": 1.0054136145532622,
      "grad_norm": 0.43902501463890076,
      "learning_rate": 8.04181519869797e-06,
      "loss": 0.0223,
      "step": 614360
    },
    {
      "epoch": 1.0054463449919155,
      "grad_norm": 1.1431975364685059,
      "learning_rate": 8.041749306484452e-06,
      "loss": 0.0246,
      "step": 614380
    },
    {
      "epoch": 1.005479075430569,
      "grad_norm": 3.0923147201538086,
      "learning_rate": 8.041683414270936e-06,
      "loss": 0.019,
      "step": 614400
    },
    {
      "epoch": 1.0055118058692223,
      "grad_norm": 0.8211331367492676,
      "learning_rate": 8.04161752205742e-06,
      "loss": 0.023,
      "step": 614420
    },
    {
      "epoch": 1.0055445363078757,
      "grad_norm": 1.6025553941726685,
      "learning_rate": 8.041551629843901e-06,
      "loss": 0.0293,
      "step": 614440
    },
    {
      "epoch": 1.005577266746529,
      "grad_norm": 2.147448778152466,
      "learning_rate": 8.041485737630385e-06,
      "loss": 0.0208,
      "step": 614460
    },
    {
      "epoch": 1.0056099971851822,
      "grad_norm": 0.6393560171127319,
      "learning_rate": 8.041419845416869e-06,
      "loss": 0.0178,
      "step": 614480
    },
    {
      "epoch": 1.0056427276238356,
      "grad_norm": 1.1595224142074585,
      "learning_rate": 8.04135395320335e-06,
      "loss": 0.0307,
      "step": 614500
    },
    {
      "epoch": 1.005675458062489,
      "grad_norm": 1.2963861227035522,
      "learning_rate": 8.041288060989834e-06,
      "loss": 0.0289,
      "step": 614520
    },
    {
      "epoch": 1.0057081885011423,
      "grad_norm": 0.444418340921402,
      "learning_rate": 8.041222168776316e-06,
      "loss": 0.0316,
      "step": 614540
    },
    {
      "epoch": 1.0057409189397957,
      "grad_norm": 0.418495774269104,
      "learning_rate": 8.0411562765628e-06,
      "loss": 0.0285,
      "step": 614560
    },
    {
      "epoch": 1.005773649378449,
      "grad_norm": 3.6206600666046143,
      "learning_rate": 8.041090384349281e-06,
      "loss": 0.0234,
      "step": 614580
    },
    {
      "epoch": 1.0058063798171024,
      "grad_norm": 0.5024816989898682,
      "learning_rate": 8.041024492135765e-06,
      "loss": 0.0263,
      "step": 614600
    },
    {
      "epoch": 1.0058391102557556,
      "grad_norm": 0.7603015899658203,
      "learning_rate": 8.040958599922249e-06,
      "loss": 0.0259,
      "step": 614620
    },
    {
      "epoch": 1.005871840694409,
      "grad_norm": 0.8121538162231445,
      "learning_rate": 8.04089270770873e-06,
      "loss": 0.0242,
      "step": 614640
    },
    {
      "epoch": 1.0059045711330623,
      "grad_norm": 1.1620259284973145,
      "learning_rate": 8.040826815495214e-06,
      "loss": 0.023,
      "step": 614660
    },
    {
      "epoch": 1.0059373015717157,
      "grad_norm": 0.27360326051712036,
      "learning_rate": 8.040760923281696e-06,
      "loss": 0.0274,
      "step": 614680
    },
    {
      "epoch": 1.005970032010369,
      "grad_norm": 0.3719613254070282,
      "learning_rate": 8.04069503106818e-06,
      "loss": 0.0221,
      "step": 614700
    },
    {
      "epoch": 1.0060027624490224,
      "grad_norm": 0.6642730832099915,
      "learning_rate": 8.040629138854662e-06,
      "loss": 0.0363,
      "step": 614720
    },
    {
      "epoch": 1.0060354928876756,
      "grad_norm": 1.4836485385894775,
      "learning_rate": 8.040563246641145e-06,
      "loss": 0.0245,
      "step": 614740
    },
    {
      "epoch": 1.006068223326329,
      "grad_norm": 1.481269359588623,
      "learning_rate": 8.040497354427627e-06,
      "loss": 0.0222,
      "step": 614760
    },
    {
      "epoch": 1.0061009537649823,
      "grad_norm": 0.14151808619499207,
      "learning_rate": 8.04043146221411e-06,
      "loss": 0.0185,
      "step": 614780
    },
    {
      "epoch": 1.0061336842036357,
      "grad_norm": 0.9920742511749268,
      "learning_rate": 8.040365570000594e-06,
      "loss": 0.0214,
      "step": 614800
    },
    {
      "epoch": 1.006166414642289,
      "grad_norm": 1.2797960042953491,
      "learning_rate": 8.040299677787076e-06,
      "loss": 0.0234,
      "step": 614820
    },
    {
      "epoch": 1.0061991450809424,
      "grad_norm": 0.40544039011001587,
      "learning_rate": 8.04023378557356e-06,
      "loss": 0.0176,
      "step": 614840
    },
    {
      "epoch": 1.0062318755195958,
      "grad_norm": 1.3672341108322144,
      "learning_rate": 8.040167893360043e-06,
      "loss": 0.0293,
      "step": 614860
    },
    {
      "epoch": 1.006264605958249,
      "grad_norm": 0.30193695425987244,
      "learning_rate": 8.040102001146525e-06,
      "loss": 0.0216,
      "step": 614880
    },
    {
      "epoch": 1.0062973363969023,
      "grad_norm": 1.5013062953948975,
      "learning_rate": 8.040036108933009e-06,
      "loss": 0.0236,
      "step": 614900
    },
    {
      "epoch": 1.0063300668355557,
      "grad_norm": 0.08616805821657181,
      "learning_rate": 8.03997021671949e-06,
      "loss": 0.0277,
      "step": 614920
    },
    {
      "epoch": 1.006362797274209,
      "grad_norm": 0.6016422510147095,
      "learning_rate": 8.039904324505974e-06,
      "loss": 0.0188,
      "step": 614940
    },
    {
      "epoch": 1.0063955277128624,
      "grad_norm": 0.7220706343650818,
      "learning_rate": 8.039838432292458e-06,
      "loss": 0.0227,
      "step": 614960
    },
    {
      "epoch": 1.0064282581515158,
      "grad_norm": 0.48799940943717957,
      "learning_rate": 8.03977254007894e-06,
      "loss": 0.0269,
      "step": 614980
    },
    {
      "epoch": 1.0064609885901692,
      "grad_norm": 0.4532610774040222,
      "learning_rate": 8.039706647865423e-06,
      "loss": 0.0284,
      "step": 615000
    },
    {
      "epoch": 1.0064937190288223,
      "grad_norm": 1.0931007862091064,
      "learning_rate": 8.039640755651905e-06,
      "loss": 0.0308,
      "step": 615020
    },
    {
      "epoch": 1.0065264494674757,
      "grad_norm": 0.8231542110443115,
      "learning_rate": 8.039574863438389e-06,
      "loss": 0.0275,
      "step": 615040
    },
    {
      "epoch": 1.006559179906129,
      "grad_norm": 0.1932741105556488,
      "learning_rate": 8.03950897122487e-06,
      "loss": 0.0214,
      "step": 615060
    },
    {
      "epoch": 1.0065919103447825,
      "grad_norm": 0.2727898061275482,
      "learning_rate": 8.039443079011354e-06,
      "loss": 0.0323,
      "step": 615080
    },
    {
      "epoch": 1.0066246407834358,
      "grad_norm": 0.350790411233902,
      "learning_rate": 8.039377186797836e-06,
      "loss": 0.0335,
      "step": 615100
    },
    {
      "epoch": 1.0066573712220892,
      "grad_norm": 2.0130631923675537,
      "learning_rate": 8.03931129458432e-06,
      "loss": 0.0218,
      "step": 615120
    },
    {
      "epoch": 1.0066901016607424,
      "grad_norm": 1.1581425666809082,
      "learning_rate": 8.039245402370802e-06,
      "loss": 0.0229,
      "step": 615140
    },
    {
      "epoch": 1.0067228320993957,
      "grad_norm": 0.14662224054336548,
      "learning_rate": 8.039179510157285e-06,
      "loss": 0.022,
      "step": 615160
    },
    {
      "epoch": 1.006755562538049,
      "grad_norm": 1.3050886392593384,
      "learning_rate": 8.039113617943767e-06,
      "loss": 0.0388,
      "step": 615180
    },
    {
      "epoch": 1.0067882929767025,
      "grad_norm": 0.33171793818473816,
      "learning_rate": 8.03904772573025e-06,
      "loss": 0.0176,
      "step": 615200
    },
    {
      "epoch": 1.0068210234153558,
      "grad_norm": 0.849527895450592,
      "learning_rate": 8.038981833516734e-06,
      "loss": 0.0252,
      "step": 615220
    },
    {
      "epoch": 1.0068537538540092,
      "grad_norm": 0.7682775259017944,
      "learning_rate": 8.038915941303216e-06,
      "loss": 0.0223,
      "step": 615240
    },
    {
      "epoch": 1.0068864842926626,
      "grad_norm": 0.7326157093048096,
      "learning_rate": 8.0388500490897e-06,
      "loss": 0.0281,
      "step": 615260
    },
    {
      "epoch": 1.0069192147313157,
      "grad_norm": 0.19522733986377716,
      "learning_rate": 8.038784156876183e-06,
      "loss": 0.021,
      "step": 615280
    },
    {
      "epoch": 1.006951945169969,
      "grad_norm": 0.2242424488067627,
      "learning_rate": 8.038718264662665e-06,
      "loss": 0.0205,
      "step": 615300
    },
    {
      "epoch": 1.0069846756086225,
      "grad_norm": 0.9630858302116394,
      "learning_rate": 8.038652372449149e-06,
      "loss": 0.0289,
      "step": 615320
    },
    {
      "epoch": 1.0070174060472759,
      "grad_norm": 0.49208900332450867,
      "learning_rate": 8.038586480235632e-06,
      "loss": 0.0174,
      "step": 615340
    },
    {
      "epoch": 1.0070501364859292,
      "grad_norm": 0.8432939052581787,
      "learning_rate": 8.038520588022114e-06,
      "loss": 0.0353,
      "step": 615360
    },
    {
      "epoch": 1.0070828669245826,
      "grad_norm": 5.319905757904053,
      "learning_rate": 8.038454695808598e-06,
      "loss": 0.0339,
      "step": 615380
    },
    {
      "epoch": 1.007115597363236,
      "grad_norm": 2.0852372646331787,
      "learning_rate": 8.03838880359508e-06,
      "loss": 0.0199,
      "step": 615400
    },
    {
      "epoch": 1.0071483278018891,
      "grad_norm": 0.9406936764717102,
      "learning_rate": 8.038322911381563e-06,
      "loss": 0.0283,
      "step": 615420
    },
    {
      "epoch": 1.0071810582405425,
      "grad_norm": 1.021587610244751,
      "learning_rate": 8.038257019168045e-06,
      "loss": 0.0223,
      "step": 615440
    },
    {
      "epoch": 1.0072137886791959,
      "grad_norm": 2.3948652744293213,
      "learning_rate": 8.038191126954529e-06,
      "loss": 0.029,
      "step": 615460
    },
    {
      "epoch": 1.0072465191178492,
      "grad_norm": 0.5382170677185059,
      "learning_rate": 8.03812523474101e-06,
      "loss": 0.0267,
      "step": 615480
    },
    {
      "epoch": 1.0072792495565026,
      "grad_norm": 1.4141006469726562,
      "learning_rate": 8.038059342527494e-06,
      "loss": 0.0235,
      "step": 615500
    },
    {
      "epoch": 1.007311979995156,
      "grad_norm": 0.27556735277175903,
      "learning_rate": 8.037993450313976e-06,
      "loss": 0.0231,
      "step": 615520
    },
    {
      "epoch": 1.0073447104338091,
      "grad_norm": 1.098380208015442,
      "learning_rate": 8.03792755810046e-06,
      "loss": 0.0276,
      "step": 615540
    },
    {
      "epoch": 1.0073774408724625,
      "grad_norm": 1.1243836879730225,
      "learning_rate": 8.037861665886942e-06,
      "loss": 0.0304,
      "step": 615560
    },
    {
      "epoch": 1.0074101713111159,
      "grad_norm": 1.364117980003357,
      "learning_rate": 8.037795773673425e-06,
      "loss": 0.0212,
      "step": 615580
    },
    {
      "epoch": 1.0074429017497692,
      "grad_norm": 1.0351961851119995,
      "learning_rate": 8.037729881459909e-06,
      "loss": 0.0231,
      "step": 615600
    },
    {
      "epoch": 1.0074756321884226,
      "grad_norm": 0.6945729851722717,
      "learning_rate": 8.03766398924639e-06,
      "loss": 0.0273,
      "step": 615620
    },
    {
      "epoch": 1.007508362627076,
      "grad_norm": 1.1782922744750977,
      "learning_rate": 8.037598097032874e-06,
      "loss": 0.0251,
      "step": 615640
    },
    {
      "epoch": 1.0075410930657294,
      "grad_norm": 0.4583098292350769,
      "learning_rate": 8.037532204819358e-06,
      "loss": 0.0292,
      "step": 615660
    },
    {
      "epoch": 1.0075738235043825,
      "grad_norm": 0.5991308093070984,
      "learning_rate": 8.03746631260584e-06,
      "loss": 0.0198,
      "step": 615680
    },
    {
      "epoch": 1.0076065539430359,
      "grad_norm": 1.2252044677734375,
      "learning_rate": 8.037400420392323e-06,
      "loss": 0.0295,
      "step": 615700
    },
    {
      "epoch": 1.0076392843816893,
      "grad_norm": 1.5109503269195557,
      "learning_rate": 8.037334528178807e-06,
      "loss": 0.0204,
      "step": 615720
    },
    {
      "epoch": 1.0076720148203426,
      "grad_norm": 0.622398853302002,
      "learning_rate": 8.037268635965289e-06,
      "loss": 0.0162,
      "step": 615740
    },
    {
      "epoch": 1.007704745258996,
      "grad_norm": 0.9615182280540466,
      "learning_rate": 8.037202743751772e-06,
      "loss": 0.0249,
      "step": 615760
    },
    {
      "epoch": 1.0077374756976494,
      "grad_norm": 2.0063979625701904,
      "learning_rate": 8.037136851538254e-06,
      "loss": 0.0212,
      "step": 615780
    },
    {
      "epoch": 1.0077702061363027,
      "grad_norm": 1.023341178894043,
      "learning_rate": 8.037070959324738e-06,
      "loss": 0.0248,
      "step": 615800
    },
    {
      "epoch": 1.007802936574956,
      "grad_norm": 0.5298010110855103,
      "learning_rate": 8.03700506711122e-06,
      "loss": 0.022,
      "step": 615820
    },
    {
      "epoch": 1.0078356670136093,
      "grad_norm": 1.0337064266204834,
      "learning_rate": 8.036939174897703e-06,
      "loss": 0.0219,
      "step": 615840
    },
    {
      "epoch": 1.0078683974522626,
      "grad_norm": 0.5215879678726196,
      "learning_rate": 8.036873282684185e-06,
      "loss": 0.0241,
      "step": 615860
    },
    {
      "epoch": 1.007901127890916,
      "grad_norm": 0.8055711984634399,
      "learning_rate": 8.036807390470669e-06,
      "loss": 0.0277,
      "step": 615880
    },
    {
      "epoch": 1.0079338583295694,
      "grad_norm": 0.9062952995300293,
      "learning_rate": 8.03674149825715e-06,
      "loss": 0.0312,
      "step": 615900
    },
    {
      "epoch": 1.0079665887682228,
      "grad_norm": 3.3933379650115967,
      "learning_rate": 8.036675606043634e-06,
      "loss": 0.0277,
      "step": 615920
    },
    {
      "epoch": 1.007999319206876,
      "grad_norm": 0.44865453243255615,
      "learning_rate": 8.036609713830116e-06,
      "loss": 0.0303,
      "step": 615940
    },
    {
      "epoch": 1.0080320496455293,
      "grad_norm": 0.6161346435546875,
      "learning_rate": 8.0365438216166e-06,
      "loss": 0.0186,
      "step": 615960
    },
    {
      "epoch": 1.0080647800841827,
      "grad_norm": 0.5523677468299866,
      "learning_rate": 8.036477929403083e-06,
      "loss": 0.0312,
      "step": 615980
    },
    {
      "epoch": 1.008097510522836,
      "grad_norm": 0.6472889184951782,
      "learning_rate": 8.036412037189565e-06,
      "loss": 0.0294,
      "step": 616000
    },
    {
      "epoch": 1.0081302409614894,
      "grad_norm": 1.2881464958190918,
      "learning_rate": 8.036346144976049e-06,
      "loss": 0.025,
      "step": 616020
    },
    {
      "epoch": 1.0081629714001428,
      "grad_norm": 0.3892882466316223,
      "learning_rate": 8.036280252762532e-06,
      "loss": 0.0247,
      "step": 616040
    },
    {
      "epoch": 1.0081957018387961,
      "grad_norm": 2.007031202316284,
      "learning_rate": 8.036214360549014e-06,
      "loss": 0.0265,
      "step": 616060
    },
    {
      "epoch": 1.0082284322774493,
      "grad_norm": 0.6603956818580627,
      "learning_rate": 8.036148468335498e-06,
      "loss": 0.0216,
      "step": 616080
    },
    {
      "epoch": 1.0082611627161027,
      "grad_norm": 2.501709461212158,
      "learning_rate": 8.036082576121982e-06,
      "loss": 0.0307,
      "step": 616100
    },
    {
      "epoch": 1.008293893154756,
      "grad_norm": 1.3845196962356567,
      "learning_rate": 8.036016683908463e-06,
      "loss": 0.0246,
      "step": 616120
    },
    {
      "epoch": 1.0083266235934094,
      "grad_norm": 0.3885076344013214,
      "learning_rate": 8.035950791694947e-06,
      "loss": 0.0271,
      "step": 616140
    },
    {
      "epoch": 1.0083593540320628,
      "grad_norm": 1.2002496719360352,
      "learning_rate": 8.035884899481429e-06,
      "loss": 0.0336,
      "step": 616160
    },
    {
      "epoch": 1.0083920844707162,
      "grad_norm": 0.6721939444541931,
      "learning_rate": 8.035819007267913e-06,
      "loss": 0.0275,
      "step": 616180
    },
    {
      "epoch": 1.0084248149093695,
      "grad_norm": 0.996796190738678,
      "learning_rate": 8.035753115054394e-06,
      "loss": 0.0253,
      "step": 616200
    },
    {
      "epoch": 1.0084575453480227,
      "grad_norm": 0.8539770841598511,
      "learning_rate": 8.035687222840878e-06,
      "loss": 0.0173,
      "step": 616220
    },
    {
      "epoch": 1.008490275786676,
      "grad_norm": 0.6350055932998657,
      "learning_rate": 8.03562133062736e-06,
      "loss": 0.0315,
      "step": 616240
    },
    {
      "epoch": 1.0085230062253294,
      "grad_norm": 0.6845566630363464,
      "learning_rate": 8.035555438413843e-06,
      "loss": 0.0188,
      "step": 616260
    },
    {
      "epoch": 1.0085557366639828,
      "grad_norm": 0.30462464690208435,
      "learning_rate": 8.035489546200325e-06,
      "loss": 0.0313,
      "step": 616280
    },
    {
      "epoch": 1.0085884671026362,
      "grad_norm": 0.4071652889251709,
      "learning_rate": 8.035423653986809e-06,
      "loss": 0.0181,
      "step": 616300
    },
    {
      "epoch": 1.0086211975412895,
      "grad_norm": 1.909176230430603,
      "learning_rate": 8.035357761773291e-06,
      "loss": 0.0292,
      "step": 616320
    },
    {
      "epoch": 1.0086539279799427,
      "grad_norm": 1.5710078477859497,
      "learning_rate": 8.035291869559774e-06,
      "loss": 0.0224,
      "step": 616340
    },
    {
      "epoch": 1.008686658418596,
      "grad_norm": 1.277864694595337,
      "learning_rate": 8.035225977346258e-06,
      "loss": 0.0191,
      "step": 616360
    },
    {
      "epoch": 1.0087193888572494,
      "grad_norm": 0.8287782073020935,
      "learning_rate": 8.03516008513274e-06,
      "loss": 0.0225,
      "step": 616380
    },
    {
      "epoch": 1.0087521192959028,
      "grad_norm": 0.5373364686965942,
      "learning_rate": 8.035094192919224e-06,
      "loss": 0.026,
      "step": 616400
    },
    {
      "epoch": 1.0087848497345562,
      "grad_norm": 0.7581901550292969,
      "learning_rate": 8.035028300705705e-06,
      "loss": 0.0196,
      "step": 616420
    },
    {
      "epoch": 1.0088175801732095,
      "grad_norm": 1.1840593814849854,
      "learning_rate": 8.034962408492189e-06,
      "loss": 0.0297,
      "step": 616440
    },
    {
      "epoch": 1.008850310611863,
      "grad_norm": 1.5090478658676147,
      "learning_rate": 8.034896516278673e-06,
      "loss": 0.0261,
      "step": 616460
    },
    {
      "epoch": 1.008883041050516,
      "grad_norm": 4.499931335449219,
      "learning_rate": 8.034830624065154e-06,
      "loss": 0.0236,
      "step": 616480
    },
    {
      "epoch": 1.0089157714891694,
      "grad_norm": 0.6923920512199402,
      "learning_rate": 8.034764731851638e-06,
      "loss": 0.0228,
      "step": 616500
    },
    {
      "epoch": 1.0089485019278228,
      "grad_norm": 0.7540190815925598,
      "learning_rate": 8.034698839638122e-06,
      "loss": 0.0267,
      "step": 616520
    },
    {
      "epoch": 1.0089812323664762,
      "grad_norm": 0.223423033952713,
      "learning_rate": 8.034632947424604e-06,
      "loss": 0.0164,
      "step": 616540
    },
    {
      "epoch": 1.0090139628051296,
      "grad_norm": 0.6558837294578552,
      "learning_rate": 8.034567055211087e-06,
      "loss": 0.0228,
      "step": 616560
    },
    {
      "epoch": 1.009046693243783,
      "grad_norm": 0.37609440088272095,
      "learning_rate": 8.034501162997569e-06,
      "loss": 0.0207,
      "step": 616580
    },
    {
      "epoch": 1.009079423682436,
      "grad_norm": 0.36678346991539,
      "learning_rate": 8.034435270784053e-06,
      "loss": 0.0186,
      "step": 616600
    },
    {
      "epoch": 1.0091121541210895,
      "grad_norm": 0.22026540338993073,
      "learning_rate": 8.034369378570534e-06,
      "loss": 0.0233,
      "step": 616620
    },
    {
      "epoch": 1.0091448845597428,
      "grad_norm": 0.5328134298324585,
      "learning_rate": 8.034303486357018e-06,
      "loss": 0.0226,
      "step": 616640
    },
    {
      "epoch": 1.0091776149983962,
      "grad_norm": 0.4155433773994446,
      "learning_rate": 8.0342375941435e-06,
      "loss": 0.0218,
      "step": 616660
    },
    {
      "epoch": 1.0092103454370496,
      "grad_norm": 2.4025163650512695,
      "learning_rate": 8.034171701929984e-06,
      "loss": 0.0264,
      "step": 616680
    },
    {
      "epoch": 1.009243075875703,
      "grad_norm": 2.4485819339752197,
      "learning_rate": 8.034105809716465e-06,
      "loss": 0.0357,
      "step": 616700
    },
    {
      "epoch": 1.0092758063143563,
      "grad_norm": 0.49089014530181885,
      "learning_rate": 8.034039917502949e-06,
      "loss": 0.0269,
      "step": 616720
    },
    {
      "epoch": 1.0093085367530095,
      "grad_norm": 1.3520550727844238,
      "learning_rate": 8.033974025289433e-06,
      "loss": 0.0249,
      "step": 616740
    },
    {
      "epoch": 1.0093412671916628,
      "grad_norm": 1.629130482673645,
      "learning_rate": 8.033908133075915e-06,
      "loss": 0.0272,
      "step": 616760
    },
    {
      "epoch": 1.0093739976303162,
      "grad_norm": 1.7129696607589722,
      "learning_rate": 8.033842240862398e-06,
      "loss": 0.0178,
      "step": 616780
    },
    {
      "epoch": 1.0094067280689696,
      "grad_norm": 0.7464966177940369,
      "learning_rate": 8.03377634864888e-06,
      "loss": 0.0177,
      "step": 616800
    },
    {
      "epoch": 1.009439458507623,
      "grad_norm": 0.455683171749115,
      "learning_rate": 8.033710456435364e-06,
      "loss": 0.0236,
      "step": 616820
    },
    {
      "epoch": 1.0094721889462763,
      "grad_norm": 0.5380548238754272,
      "learning_rate": 8.033644564221847e-06,
      "loss": 0.0329,
      "step": 616840
    },
    {
      "epoch": 1.0095049193849297,
      "grad_norm": 0.36275142431259155,
      "learning_rate": 8.033578672008329e-06,
      "loss": 0.0162,
      "step": 616860
    },
    {
      "epoch": 1.0095376498235828,
      "grad_norm": 0.823347806930542,
      "learning_rate": 8.033512779794813e-06,
      "loss": 0.0301,
      "step": 616880
    },
    {
      "epoch": 1.0095703802622362,
      "grad_norm": 0.7000023126602173,
      "learning_rate": 8.033446887581296e-06,
      "loss": 0.0293,
      "step": 616900
    },
    {
      "epoch": 1.0096031107008896,
      "grad_norm": 0.3138120174407959,
      "learning_rate": 8.033380995367778e-06,
      "loss": 0.0249,
      "step": 616920
    },
    {
      "epoch": 1.009635841139543,
      "grad_norm": 1.143599033355713,
      "learning_rate": 8.033315103154262e-06,
      "loss": 0.0259,
      "step": 616940
    },
    {
      "epoch": 1.0096685715781963,
      "grad_norm": 0.4983730912208557,
      "learning_rate": 8.033249210940744e-06,
      "loss": 0.0146,
      "step": 616960
    },
    {
      "epoch": 1.0097013020168497,
      "grad_norm": 1.5431859493255615,
      "learning_rate": 8.033183318727227e-06,
      "loss": 0.0262,
      "step": 616980
    },
    {
      "epoch": 1.0097340324555029,
      "grad_norm": 0.3388306200504303,
      "learning_rate": 8.033117426513709e-06,
      "loss": 0.0286,
      "step": 617000
    },
    {
      "epoch": 1.0097667628941562,
      "grad_norm": 0.8754456639289856,
      "learning_rate": 8.033051534300193e-06,
      "loss": 0.0182,
      "step": 617020
    },
    {
      "epoch": 1.0097994933328096,
      "grad_norm": 1.7918061017990112,
      "learning_rate": 8.032985642086675e-06,
      "loss": 0.0215,
      "step": 617040
    },
    {
      "epoch": 1.009832223771463,
      "grad_norm": 1.1005502939224243,
      "learning_rate": 8.032919749873158e-06,
      "loss": 0.0197,
      "step": 617060
    },
    {
      "epoch": 1.0098649542101164,
      "grad_norm": 0.8175278306007385,
      "learning_rate": 8.032853857659642e-06,
      "loss": 0.022,
      "step": 617080
    },
    {
      "epoch": 1.0098976846487697,
      "grad_norm": 0.5828675627708435,
      "learning_rate": 8.032787965446124e-06,
      "loss": 0.0258,
      "step": 617100
    },
    {
      "epoch": 1.009930415087423,
      "grad_norm": 0.23707528412342072,
      "learning_rate": 8.032722073232607e-06,
      "loss": 0.0296,
      "step": 617120
    },
    {
      "epoch": 1.0099631455260762,
      "grad_norm": 0.819974422454834,
      "learning_rate": 8.032656181019089e-06,
      "loss": 0.0231,
      "step": 617140
    },
    {
      "epoch": 1.0099958759647296,
      "grad_norm": 0.8182808756828308,
      "learning_rate": 8.032590288805573e-06,
      "loss": 0.0241,
      "step": 617160
    },
    {
      "epoch": 1.010028606403383,
      "grad_norm": 0.3803601562976837,
      "learning_rate": 8.032524396592055e-06,
      "loss": 0.0196,
      "step": 617180
    },
    {
      "epoch": 1.0100613368420364,
      "grad_norm": 0.3894236981868744,
      "learning_rate": 8.032458504378538e-06,
      "loss": 0.0313,
      "step": 617200
    },
    {
      "epoch": 1.0100940672806897,
      "grad_norm": 0.6411937475204468,
      "learning_rate": 8.03239261216502e-06,
      "loss": 0.0228,
      "step": 617220
    },
    {
      "epoch": 1.010126797719343,
      "grad_norm": 4.334962368011475,
      "learning_rate": 8.032326719951504e-06,
      "loss": 0.0244,
      "step": 617240
    },
    {
      "epoch": 1.0101595281579965,
      "grad_norm": 0.7852419018745422,
      "learning_rate": 8.032260827737987e-06,
      "loss": 0.0212,
      "step": 617260
    },
    {
      "epoch": 1.0101922585966496,
      "grad_norm": 0.22031216323375702,
      "learning_rate": 8.032194935524469e-06,
      "loss": 0.0253,
      "step": 617280
    },
    {
      "epoch": 1.010224989035303,
      "grad_norm": 0.6898993253707886,
      "learning_rate": 8.032129043310953e-06,
      "loss": 0.0257,
      "step": 617300
    },
    {
      "epoch": 1.0102577194739564,
      "grad_norm": 0.9585237503051758,
      "learning_rate": 8.032063151097436e-06,
      "loss": 0.032,
      "step": 617320
    },
    {
      "epoch": 1.0102904499126097,
      "grad_norm": 1.5106531381607056,
      "learning_rate": 8.031997258883918e-06,
      "loss": 0.0239,
      "step": 617340
    },
    {
      "epoch": 1.0103231803512631,
      "grad_norm": 0.3203570544719696,
      "learning_rate": 8.031931366670402e-06,
      "loss": 0.0195,
      "step": 617360
    },
    {
      "epoch": 1.0103559107899165,
      "grad_norm": 0.5487086176872253,
      "learning_rate": 8.031865474456884e-06,
      "loss": 0.0252,
      "step": 617380
    },
    {
      "epoch": 1.0103886412285696,
      "grad_norm": 4.375487327575684,
      "learning_rate": 8.031799582243367e-06,
      "loss": 0.0214,
      "step": 617400
    },
    {
      "epoch": 1.010421371667223,
      "grad_norm": 0.4459480345249176,
      "learning_rate": 8.031733690029851e-06,
      "loss": 0.0205,
      "step": 617420
    },
    {
      "epoch": 1.0104541021058764,
      "grad_norm": 0.4890625476837158,
      "learning_rate": 8.031667797816333e-06,
      "loss": 0.0234,
      "step": 617440
    },
    {
      "epoch": 1.0104868325445298,
      "grad_norm": 0.522263765335083,
      "learning_rate": 8.031601905602816e-06,
      "loss": 0.0223,
      "step": 617460
    },
    {
      "epoch": 1.0105195629831831,
      "grad_norm": 2.5292255878448486,
      "learning_rate": 8.031536013389298e-06,
      "loss": 0.0309,
      "step": 617480
    },
    {
      "epoch": 1.0105522934218365,
      "grad_norm": 0.8458725810050964,
      "learning_rate": 8.031470121175782e-06,
      "loss": 0.0338,
      "step": 617500
    },
    {
      "epoch": 1.0105850238604899,
      "grad_norm": 0.9165865778923035,
      "learning_rate": 8.031404228962264e-06,
      "loss": 0.0253,
      "step": 617520
    },
    {
      "epoch": 1.010617754299143,
      "grad_norm": 0.5787189602851868,
      "learning_rate": 8.031338336748747e-06,
      "loss": 0.0211,
      "step": 617540
    },
    {
      "epoch": 1.0106504847377964,
      "grad_norm": 0.13029316067695618,
      "learning_rate": 8.03127244453523e-06,
      "loss": 0.0283,
      "step": 617560
    },
    {
      "epoch": 1.0106832151764498,
      "grad_norm": 0.11714360862970352,
      "learning_rate": 8.031206552321713e-06,
      "loss": 0.0197,
      "step": 617580
    },
    {
      "epoch": 1.0107159456151031,
      "grad_norm": 1.3490662574768066,
      "learning_rate": 8.031140660108195e-06,
      "loss": 0.0278,
      "step": 617600
    },
    {
      "epoch": 1.0107486760537565,
      "grad_norm": 0.7101351022720337,
      "learning_rate": 8.031074767894678e-06,
      "loss": 0.0251,
      "step": 617620
    },
    {
      "epoch": 1.0107814064924099,
      "grad_norm": 1.1171320676803589,
      "learning_rate": 8.031008875681162e-06,
      "loss": 0.0275,
      "step": 617640
    },
    {
      "epoch": 1.0108141369310633,
      "grad_norm": 2.032367467880249,
      "learning_rate": 8.030942983467644e-06,
      "loss": 0.0173,
      "step": 617660
    },
    {
      "epoch": 1.0108468673697164,
      "grad_norm": 0.3243446350097656,
      "learning_rate": 8.030877091254127e-06,
      "loss": 0.0213,
      "step": 617680
    },
    {
      "epoch": 1.0108795978083698,
      "grad_norm": 1.0514987707138062,
      "learning_rate": 8.030811199040611e-06,
      "loss": 0.0186,
      "step": 617700
    },
    {
      "epoch": 1.0109123282470232,
      "grad_norm": 0.3018683195114136,
      "learning_rate": 8.030745306827093e-06,
      "loss": 0.0196,
      "step": 617720
    },
    {
      "epoch": 1.0109450586856765,
      "grad_norm": 0.4810647964477539,
      "learning_rate": 8.030679414613576e-06,
      "loss": 0.0228,
      "step": 617740
    },
    {
      "epoch": 1.01097778912433,
      "grad_norm": 0.6104165315628052,
      "learning_rate": 8.030613522400058e-06,
      "loss": 0.024,
      "step": 617760
    },
    {
      "epoch": 1.0110105195629833,
      "grad_norm": 0.6468852758407593,
      "learning_rate": 8.030547630186542e-06,
      "loss": 0.0252,
      "step": 617780
    },
    {
      "epoch": 1.0110432500016364,
      "grad_norm": 1.5627120733261108,
      "learning_rate": 8.030481737973025e-06,
      "loss": 0.0202,
      "step": 617800
    },
    {
      "epoch": 1.0110759804402898,
      "grad_norm": 0.29138511419296265,
      "learning_rate": 8.030415845759507e-06,
      "loss": 0.0184,
      "step": 617820
    },
    {
      "epoch": 1.0111087108789432,
      "grad_norm": 1.1749578714370728,
      "learning_rate": 8.030349953545991e-06,
      "loss": 0.0349,
      "step": 617840
    },
    {
      "epoch": 1.0111414413175965,
      "grad_norm": 0.36631032824516296,
      "learning_rate": 8.030284061332473e-06,
      "loss": 0.031,
      "step": 617860
    },
    {
      "epoch": 1.01117417175625,
      "grad_norm": 0.5960159301757812,
      "learning_rate": 8.030218169118956e-06,
      "loss": 0.0221,
      "step": 617880
    },
    {
      "epoch": 1.0112069021949033,
      "grad_norm": 0.7167179584503174,
      "learning_rate": 8.030152276905438e-06,
      "loss": 0.0296,
      "step": 617900
    },
    {
      "epoch": 1.0112396326335567,
      "grad_norm": 0.26980382204055786,
      "learning_rate": 8.030086384691922e-06,
      "loss": 0.0327,
      "step": 617920
    },
    {
      "epoch": 1.0112723630722098,
      "grad_norm": 0.7051603198051453,
      "learning_rate": 8.030020492478404e-06,
      "loss": 0.0267,
      "step": 617940
    },
    {
      "epoch": 1.0113050935108632,
      "grad_norm": 3.1562678813934326,
      "learning_rate": 8.029954600264887e-06,
      "loss": 0.0242,
      "step": 617960
    },
    {
      "epoch": 1.0113378239495165,
      "grad_norm": 0.729628324508667,
      "learning_rate": 8.02988870805137e-06,
      "loss": 0.024,
      "step": 617980
    },
    {
      "epoch": 1.01137055438817,
      "grad_norm": 0.3706815838813782,
      "learning_rate": 8.029822815837853e-06,
      "loss": 0.0224,
      "step": 618000
    },
    {
      "epoch": 1.0114032848268233,
      "grad_norm": 0.4553952217102051,
      "learning_rate": 8.029756923624335e-06,
      "loss": 0.029,
      "step": 618020
    },
    {
      "epoch": 1.0114360152654767,
      "grad_norm": 0.7622265219688416,
      "learning_rate": 8.029691031410818e-06,
      "loss": 0.0257,
      "step": 618040
    },
    {
      "epoch": 1.01146874570413,
      "grad_norm": 0.45495471358299255,
      "learning_rate": 8.029625139197302e-06,
      "loss": 0.017,
      "step": 618060
    },
    {
      "epoch": 1.0115014761427832,
      "grad_norm": 0.1910443902015686,
      "learning_rate": 8.029559246983784e-06,
      "loss": 0.0262,
      "step": 618080
    },
    {
      "epoch": 1.0115342065814366,
      "grad_norm": 0.38347944617271423,
      "learning_rate": 8.029493354770267e-06,
      "loss": 0.0221,
      "step": 618100
    },
    {
      "epoch": 1.01156693702009,
      "grad_norm": 1.575173020362854,
      "learning_rate": 8.029427462556751e-06,
      "loss": 0.021,
      "step": 618120
    },
    {
      "epoch": 1.0115996674587433,
      "grad_norm": 0.9897276759147644,
      "learning_rate": 8.029361570343233e-06,
      "loss": 0.025,
      "step": 618140
    },
    {
      "epoch": 1.0116323978973967,
      "grad_norm": 1.7127891778945923,
      "learning_rate": 8.029295678129716e-06,
      "loss": 0.0297,
      "step": 618160
    },
    {
      "epoch": 1.01166512833605,
      "grad_norm": 0.8223360776901245,
      "learning_rate": 8.0292297859162e-06,
      "loss": 0.0403,
      "step": 618180
    },
    {
      "epoch": 1.0116978587747032,
      "grad_norm": 0.6259421110153198,
      "learning_rate": 8.029163893702682e-06,
      "loss": 0.024,
      "step": 618200
    },
    {
      "epoch": 1.0117305892133566,
      "grad_norm": 0.6964263916015625,
      "learning_rate": 8.029098001489166e-06,
      "loss": 0.0201,
      "step": 618220
    },
    {
      "epoch": 1.01176331965201,
      "grad_norm": 0.6335857510566711,
      "learning_rate": 8.029032109275647e-06,
      "loss": 0.0286,
      "step": 618240
    },
    {
      "epoch": 1.0117960500906633,
      "grad_norm": 1.1828465461730957,
      "learning_rate": 8.028966217062131e-06,
      "loss": 0.0334,
      "step": 618260
    },
    {
      "epoch": 1.0118287805293167,
      "grad_norm": 0.3362232446670532,
      "learning_rate": 8.028900324848613e-06,
      "loss": 0.0255,
      "step": 618280
    },
    {
      "epoch": 1.01186151096797,
      "grad_norm": 3.125666856765747,
      "learning_rate": 8.028834432635096e-06,
      "loss": 0.0293,
      "step": 618300
    },
    {
      "epoch": 1.0118942414066234,
      "grad_norm": 0.6557448506355286,
      "learning_rate": 8.028768540421578e-06,
      "loss": 0.0199,
      "step": 618320
    },
    {
      "epoch": 1.0119269718452766,
      "grad_norm": 1.2961184978485107,
      "learning_rate": 8.028702648208062e-06,
      "loss": 0.0256,
      "step": 618340
    },
    {
      "epoch": 1.01195970228393,
      "grad_norm": 0.756041407585144,
      "learning_rate": 8.028636755994544e-06,
      "loss": 0.0265,
      "step": 618360
    },
    {
      "epoch": 1.0119924327225833,
      "grad_norm": 1.436479926109314,
      "learning_rate": 8.028570863781027e-06,
      "loss": 0.0245,
      "step": 618380
    },
    {
      "epoch": 1.0120251631612367,
      "grad_norm": 0.567021906375885,
      "learning_rate": 8.02850497156751e-06,
      "loss": 0.0209,
      "step": 618400
    },
    {
      "epoch": 1.01205789359989,
      "grad_norm": 1.1488879919052124,
      "learning_rate": 8.028439079353993e-06,
      "loss": 0.0236,
      "step": 618420
    },
    {
      "epoch": 1.0120906240385434,
      "grad_norm": 0.2414541244506836,
      "learning_rate": 8.028373187140477e-06,
      "loss": 0.0167,
      "step": 618440
    },
    {
      "epoch": 1.0121233544771968,
      "grad_norm": 0.5484617352485657,
      "learning_rate": 8.028307294926958e-06,
      "loss": 0.0268,
      "step": 618460
    },
    {
      "epoch": 1.01215608491585,
      "grad_norm": 1.4405776262283325,
      "learning_rate": 8.028241402713442e-06,
      "loss": 0.0229,
      "step": 618480
    },
    {
      "epoch": 1.0121888153545033,
      "grad_norm": 1.46001136302948,
      "learning_rate": 8.028175510499926e-06,
      "loss": 0.0319,
      "step": 618500
    },
    {
      "epoch": 1.0122215457931567,
      "grad_norm": 1.4424216747283936,
      "learning_rate": 8.028109618286407e-06,
      "loss": 0.021,
      "step": 618520
    },
    {
      "epoch": 1.01225427623181,
      "grad_norm": 0.2571144998073578,
      "learning_rate": 8.028043726072891e-06,
      "loss": 0.0247,
      "step": 618540
    },
    {
      "epoch": 1.0122870066704635,
      "grad_norm": 0.4389902651309967,
      "learning_rate": 8.027977833859375e-06,
      "loss": 0.0311,
      "step": 618560
    },
    {
      "epoch": 1.0123197371091168,
      "grad_norm": 0.46898555755615234,
      "learning_rate": 8.027911941645857e-06,
      "loss": 0.0199,
      "step": 618580
    },
    {
      "epoch": 1.01235246754777,
      "grad_norm": 0.6285470724105835,
      "learning_rate": 8.02784604943234e-06,
      "loss": 0.0183,
      "step": 618600
    },
    {
      "epoch": 1.0123851979864233,
      "grad_norm": 0.28519999980926514,
      "learning_rate": 8.027780157218822e-06,
      "loss": 0.0208,
      "step": 618620
    },
    {
      "epoch": 1.0124179284250767,
      "grad_norm": 0.964541494846344,
      "learning_rate": 8.027714265005306e-06,
      "loss": 0.0235,
      "step": 618640
    },
    {
      "epoch": 1.01245065886373,
      "grad_norm": 0.6660223603248596,
      "learning_rate": 8.027648372791787e-06,
      "loss": 0.026,
      "step": 618660
    },
    {
      "epoch": 1.0124833893023835,
      "grad_norm": 1.7545549869537354,
      "learning_rate": 8.027582480578271e-06,
      "loss": 0.0188,
      "step": 618680
    },
    {
      "epoch": 1.0125161197410368,
      "grad_norm": 4.2572126388549805,
      "learning_rate": 8.027516588364753e-06,
      "loss": 0.0314,
      "step": 618700
    },
    {
      "epoch": 1.0125488501796902,
      "grad_norm": 1.7617688179016113,
      "learning_rate": 8.027450696151237e-06,
      "loss": 0.0185,
      "step": 618720
    },
    {
      "epoch": 1.0125815806183434,
      "grad_norm": 0.44384467601776123,
      "learning_rate": 8.027384803937718e-06,
      "loss": 0.024,
      "step": 618740
    },
    {
      "epoch": 1.0126143110569967,
      "grad_norm": 0.4065228998661041,
      "learning_rate": 8.027318911724202e-06,
      "loss": 0.0343,
      "step": 618760
    },
    {
      "epoch": 1.01264704149565,
      "grad_norm": 0.9641813039779663,
      "learning_rate": 8.027253019510684e-06,
      "loss": 0.0208,
      "step": 618780
    },
    {
      "epoch": 1.0126797719343035,
      "grad_norm": 0.7079492211341858,
      "learning_rate": 8.027187127297168e-06,
      "loss": 0.0242,
      "step": 618800
    },
    {
      "epoch": 1.0127125023729568,
      "grad_norm": 0.9492757320404053,
      "learning_rate": 8.027121235083651e-06,
      "loss": 0.0243,
      "step": 618820
    },
    {
      "epoch": 1.0127452328116102,
      "grad_norm": 0.41678136587142944,
      "learning_rate": 8.027055342870133e-06,
      "loss": 0.028,
      "step": 618840
    },
    {
      "epoch": 1.0127779632502634,
      "grad_norm": 0.6566224694252014,
      "learning_rate": 8.026989450656617e-06,
      "loss": 0.0256,
      "step": 618860
    },
    {
      "epoch": 1.0128106936889167,
      "grad_norm": 1.5097171068191528,
      "learning_rate": 8.0269235584431e-06,
      "loss": 0.0307,
      "step": 618880
    },
    {
      "epoch": 1.0128434241275701,
      "grad_norm": 1.3280731439590454,
      "learning_rate": 8.026857666229582e-06,
      "loss": 0.0298,
      "step": 618900
    },
    {
      "epoch": 1.0128761545662235,
      "grad_norm": 0.06957191228866577,
      "learning_rate": 8.026791774016066e-06,
      "loss": 0.0209,
      "step": 618920
    },
    {
      "epoch": 1.0129088850048769,
      "grad_norm": 1.298585295677185,
      "learning_rate": 8.02672588180255e-06,
      "loss": 0.0299,
      "step": 618940
    },
    {
      "epoch": 1.0129416154435302,
      "grad_norm": 1.1076726913452148,
      "learning_rate": 8.026659989589031e-06,
      "loss": 0.0205,
      "step": 618960
    },
    {
      "epoch": 1.0129743458821836,
      "grad_norm": 1.4849904775619507,
      "learning_rate": 8.026594097375515e-06,
      "loss": 0.0264,
      "step": 618980
    },
    {
      "epoch": 1.0130070763208368,
      "grad_norm": 0.7467060685157776,
      "learning_rate": 8.026528205161997e-06,
      "loss": 0.0283,
      "step": 619000
    },
    {
      "epoch": 1.0130398067594901,
      "grad_norm": 1.1419696807861328,
      "learning_rate": 8.02646231294848e-06,
      "loss": 0.0248,
      "step": 619020
    },
    {
      "epoch": 1.0130725371981435,
      "grad_norm": 1.5278403759002686,
      "learning_rate": 8.026396420734962e-06,
      "loss": 0.0282,
      "step": 619040
    },
    {
      "epoch": 1.0131052676367969,
      "grad_norm": 1.1638771295547485,
      "learning_rate": 8.026330528521446e-06,
      "loss": 0.0247,
      "step": 619060
    },
    {
      "epoch": 1.0131379980754502,
      "grad_norm": 0.19573165476322174,
      "learning_rate": 8.026264636307928e-06,
      "loss": 0.032,
      "step": 619080
    },
    {
      "epoch": 1.0131707285141036,
      "grad_norm": 0.3805246353149414,
      "learning_rate": 8.026198744094411e-06,
      "loss": 0.0293,
      "step": 619100
    },
    {
      "epoch": 1.013203458952757,
      "grad_norm": 0.3751690983772278,
      "learning_rate": 8.026132851880893e-06,
      "loss": 0.0246,
      "step": 619120
    },
    {
      "epoch": 1.0132361893914101,
      "grad_norm": 3.6312146186828613,
      "learning_rate": 8.026066959667377e-06,
      "loss": 0.0217,
      "step": 619140
    },
    {
      "epoch": 1.0132689198300635,
      "grad_norm": 0.24801303446292877,
      "learning_rate": 8.026001067453859e-06,
      "loss": 0.0213,
      "step": 619160
    },
    {
      "epoch": 1.0133016502687169,
      "grad_norm": 0.6621652245521545,
      "learning_rate": 8.025935175240342e-06,
      "loss": 0.023,
      "step": 619180
    },
    {
      "epoch": 1.0133343807073703,
      "grad_norm": 1.760900855064392,
      "learning_rate": 8.025869283026826e-06,
      "loss": 0.0317,
      "step": 619200
    },
    {
      "epoch": 1.0133671111460236,
      "grad_norm": 0.1615467220544815,
      "learning_rate": 8.025803390813308e-06,
      "loss": 0.0227,
      "step": 619220
    },
    {
      "epoch": 1.013399841584677,
      "grad_norm": 1.3435527086257935,
      "learning_rate": 8.025737498599791e-06,
      "loss": 0.0222,
      "step": 619240
    },
    {
      "epoch": 1.0134325720233301,
      "grad_norm": 1.6660280227661133,
      "learning_rate": 8.025671606386273e-06,
      "loss": 0.0229,
      "step": 619260
    },
    {
      "epoch": 1.0134653024619835,
      "grad_norm": 0.22570733726024628,
      "learning_rate": 8.025605714172757e-06,
      "loss": 0.0261,
      "step": 619280
    },
    {
      "epoch": 1.013498032900637,
      "grad_norm": 1.2912323474884033,
      "learning_rate": 8.02553982195924e-06,
      "loss": 0.0291,
      "step": 619300
    },
    {
      "epoch": 1.0135307633392903,
      "grad_norm": 0.5737009644508362,
      "learning_rate": 8.025473929745722e-06,
      "loss": 0.0203,
      "step": 619320
    },
    {
      "epoch": 1.0135634937779436,
      "grad_norm": 0.7364547252655029,
      "learning_rate": 8.025408037532206e-06,
      "loss": 0.0237,
      "step": 619340
    },
    {
      "epoch": 1.013596224216597,
      "grad_norm": 0.8415517210960388,
      "learning_rate": 8.02534214531869e-06,
      "loss": 0.0267,
      "step": 619360
    },
    {
      "epoch": 1.0136289546552504,
      "grad_norm": 0.7002882957458496,
      "learning_rate": 8.025276253105171e-06,
      "loss": 0.0246,
      "step": 619380
    },
    {
      "epoch": 1.0136616850939035,
      "grad_norm": 0.713813066482544,
      "learning_rate": 8.025210360891655e-06,
      "loss": 0.0241,
      "step": 619400
    },
    {
      "epoch": 1.013694415532557,
      "grad_norm": 0.8791009187698364,
      "learning_rate": 8.025144468678137e-06,
      "loss": 0.0276,
      "step": 619420
    },
    {
      "epoch": 1.0137271459712103,
      "grad_norm": 0.5932020545005798,
      "learning_rate": 8.02507857646462e-06,
      "loss": 0.025,
      "step": 619440
    },
    {
      "epoch": 1.0137598764098636,
      "grad_norm": 0.47536706924438477,
      "learning_rate": 8.025012684251102e-06,
      "loss": 0.0218,
      "step": 619460
    },
    {
      "epoch": 1.013792606848517,
      "grad_norm": 0.563014566898346,
      "learning_rate": 8.024946792037586e-06,
      "loss": 0.0244,
      "step": 619480
    },
    {
      "epoch": 1.0138253372871704,
      "grad_norm": 0.5978670716285706,
      "learning_rate": 8.024880899824068e-06,
      "loss": 0.0164,
      "step": 619500
    },
    {
      "epoch": 1.0138580677258238,
      "grad_norm": 0.5026735067367554,
      "learning_rate": 8.024815007610551e-06,
      "loss": 0.0172,
      "step": 619520
    },
    {
      "epoch": 1.013890798164477,
      "grad_norm": 0.3033941984176636,
      "learning_rate": 8.024749115397035e-06,
      "loss": 0.0159,
      "step": 619540
    },
    {
      "epoch": 1.0139235286031303,
      "grad_norm": 0.4115770161151886,
      "learning_rate": 8.024683223183517e-06,
      "loss": 0.0353,
      "step": 619560
    },
    {
      "epoch": 1.0139562590417837,
      "grad_norm": 0.9578311443328857,
      "learning_rate": 8.02461733097e-06,
      "loss": 0.0192,
      "step": 619580
    },
    {
      "epoch": 1.013988989480437,
      "grad_norm": 0.45537716150283813,
      "learning_rate": 8.024551438756482e-06,
      "loss": 0.0222,
      "step": 619600
    },
    {
      "epoch": 1.0140217199190904,
      "grad_norm": 0.26164910197257996,
      "learning_rate": 8.024485546542966e-06,
      "loss": 0.0184,
      "step": 619620
    },
    {
      "epoch": 1.0140544503577438,
      "grad_norm": 0.37384340167045593,
      "learning_rate": 8.024419654329448e-06,
      "loss": 0.0179,
      "step": 619640
    },
    {
      "epoch": 1.014087180796397,
      "grad_norm": 0.9334162473678589,
      "learning_rate": 8.024353762115931e-06,
      "loss": 0.0241,
      "step": 619660
    },
    {
      "epoch": 1.0141199112350503,
      "grad_norm": 1.1376806497573853,
      "learning_rate": 8.024287869902415e-06,
      "loss": 0.0241,
      "step": 619680
    },
    {
      "epoch": 1.0141526416737037,
      "grad_norm": 1.101074457168579,
      "learning_rate": 8.024221977688897e-06,
      "loss": 0.0269,
      "step": 619700
    },
    {
      "epoch": 1.014185372112357,
      "grad_norm": 0.6292484998703003,
      "learning_rate": 8.02415608547538e-06,
      "loss": 0.0221,
      "step": 619720
    },
    {
      "epoch": 1.0142181025510104,
      "grad_norm": 0.4466230869293213,
      "learning_rate": 8.024090193261864e-06,
      "loss": 0.0179,
      "step": 619740
    },
    {
      "epoch": 1.0142508329896638,
      "grad_norm": 1.4228541851043701,
      "learning_rate": 8.024024301048346e-06,
      "loss": 0.025,
      "step": 619760
    },
    {
      "epoch": 1.0142835634283172,
      "grad_norm": 0.9176668524742126,
      "learning_rate": 8.02395840883483e-06,
      "loss": 0.0279,
      "step": 619780
    },
    {
      "epoch": 1.0143162938669703,
      "grad_norm": 0.6849477291107178,
      "learning_rate": 8.023892516621311e-06,
      "loss": 0.03,
      "step": 619800
    },
    {
      "epoch": 1.0143490243056237,
      "grad_norm": 0.5042140483856201,
      "learning_rate": 8.023826624407795e-06,
      "loss": 0.0241,
      "step": 619820
    },
    {
      "epoch": 1.014381754744277,
      "grad_norm": 0.5234638452529907,
      "learning_rate": 8.023760732194277e-06,
      "loss": 0.0195,
      "step": 619840
    },
    {
      "epoch": 1.0144144851829304,
      "grad_norm": 2.1238906383514404,
      "learning_rate": 8.02369483998076e-06,
      "loss": 0.0213,
      "step": 619860
    },
    {
      "epoch": 1.0144472156215838,
      "grad_norm": 1.2724283933639526,
      "learning_rate": 8.023628947767244e-06,
      "loss": 0.0249,
      "step": 619880
    },
    {
      "epoch": 1.0144799460602372,
      "grad_norm": 3.0405988693237305,
      "learning_rate": 8.023563055553726e-06,
      "loss": 0.0237,
      "step": 619900
    },
    {
      "epoch": 1.0145126764988905,
      "grad_norm": 1.439446210861206,
      "learning_rate": 8.02349716334021e-06,
      "loss": 0.0325,
      "step": 619920
    },
    {
      "epoch": 1.0145454069375437,
      "grad_norm": 0.3805128335952759,
      "learning_rate": 8.023431271126691e-06,
      "loss": 0.015,
      "step": 619940
    },
    {
      "epoch": 1.014578137376197,
      "grad_norm": 0.524655282497406,
      "learning_rate": 8.023365378913175e-06,
      "loss": 0.0221,
      "step": 619960
    },
    {
      "epoch": 1.0146108678148504,
      "grad_norm": 0.9010697603225708,
      "learning_rate": 8.023299486699657e-06,
      "loss": 0.026,
      "step": 619980
    },
    {
      "epoch": 1.0146435982535038,
      "grad_norm": 1.421976089477539,
      "learning_rate": 8.02323359448614e-06,
      "loss": 0.0136,
      "step": 620000
    },
    {
      "epoch": 1.0146763286921572,
      "grad_norm": 0.6878594160079956,
      "learning_rate": 8.023167702272622e-06,
      "loss": 0.0274,
      "step": 620020
    },
    {
      "epoch": 1.0147090591308106,
      "grad_norm": 0.7920581698417664,
      "learning_rate": 8.023101810059106e-06,
      "loss": 0.0184,
      "step": 620040
    },
    {
      "epoch": 1.0147417895694637,
      "grad_norm": 0.3766975998878479,
      "learning_rate": 8.023035917845588e-06,
      "loss": 0.0253,
      "step": 620060
    },
    {
      "epoch": 1.014774520008117,
      "grad_norm": 0.42272570729255676,
      "learning_rate": 8.022970025632071e-06,
      "loss": 0.0174,
      "step": 620080
    },
    {
      "epoch": 1.0148072504467704,
      "grad_norm": 0.7047095894813538,
      "learning_rate": 8.022904133418555e-06,
      "loss": 0.017,
      "step": 620100
    },
    {
      "epoch": 1.0148399808854238,
      "grad_norm": 0.1189398393034935,
      "learning_rate": 8.022838241205037e-06,
      "loss": 0.0379,
      "step": 620120
    },
    {
      "epoch": 1.0148727113240772,
      "grad_norm": 0.479995995759964,
      "learning_rate": 8.02277234899152e-06,
      "loss": 0.0228,
      "step": 620140
    },
    {
      "epoch": 1.0149054417627306,
      "grad_norm": 0.2629096806049347,
      "learning_rate": 8.022706456778004e-06,
      "loss": 0.0163,
      "step": 620160
    },
    {
      "epoch": 1.014938172201384,
      "grad_norm": 2.1786367893218994,
      "learning_rate": 8.022640564564486e-06,
      "loss": 0.0294,
      "step": 620180
    },
    {
      "epoch": 1.014970902640037,
      "grad_norm": 0.40331903100013733,
      "learning_rate": 8.02257467235097e-06,
      "loss": 0.0211,
      "step": 620200
    },
    {
      "epoch": 1.0150036330786905,
      "grad_norm": 1.4898686408996582,
      "learning_rate": 8.022508780137451e-06,
      "loss": 0.0357,
      "step": 620220
    },
    {
      "epoch": 1.0150363635173438,
      "grad_norm": 0.49891147017478943,
      "learning_rate": 8.022442887923935e-06,
      "loss": 0.0266,
      "step": 620240
    },
    {
      "epoch": 1.0150690939559972,
      "grad_norm": 1.5224699974060059,
      "learning_rate": 8.022376995710419e-06,
      "loss": 0.0258,
      "step": 620260
    },
    {
      "epoch": 1.0151018243946506,
      "grad_norm": 0.13873741030693054,
      "learning_rate": 8.0223111034969e-06,
      "loss": 0.0243,
      "step": 620280
    },
    {
      "epoch": 1.015134554833304,
      "grad_norm": 0.8402119874954224,
      "learning_rate": 8.022245211283384e-06,
      "loss": 0.0166,
      "step": 620300
    },
    {
      "epoch": 1.0151672852719573,
      "grad_norm": 0.2650063633918762,
      "learning_rate": 8.022179319069866e-06,
      "loss": 0.0175,
      "step": 620320
    },
    {
      "epoch": 1.0152000157106105,
      "grad_norm": 0.4234641492366791,
      "learning_rate": 8.02211342685635e-06,
      "loss": 0.0317,
      "step": 620340
    },
    {
      "epoch": 1.0152327461492638,
      "grad_norm": 0.3675720691680908,
      "learning_rate": 8.022047534642831e-06,
      "loss": 0.0251,
      "step": 620360
    },
    {
      "epoch": 1.0152654765879172,
      "grad_norm": 0.49400076270103455,
      "learning_rate": 8.021981642429315e-06,
      "loss": 0.0243,
      "step": 620380
    },
    {
      "epoch": 1.0152982070265706,
      "grad_norm": 0.35956695675849915,
      "learning_rate": 8.021915750215797e-06,
      "loss": 0.0192,
      "step": 620400
    },
    {
      "epoch": 1.015330937465224,
      "grad_norm": 0.5157426595687866,
      "learning_rate": 8.02184985800228e-06,
      "loss": 0.0328,
      "step": 620420
    },
    {
      "epoch": 1.0153636679038773,
      "grad_norm": 0.7034105658531189,
      "learning_rate": 8.021783965788762e-06,
      "loss": 0.0212,
      "step": 620440
    },
    {
      "epoch": 1.0153963983425305,
      "grad_norm": 0.6338512897491455,
      "learning_rate": 8.021718073575246e-06,
      "loss": 0.0316,
      "step": 620460
    },
    {
      "epoch": 1.0154291287811839,
      "grad_norm": 2.275087594985962,
      "learning_rate": 8.02165218136173e-06,
      "loss": 0.0246,
      "step": 620480
    },
    {
      "epoch": 1.0154618592198372,
      "grad_norm": 0.43123874068260193,
      "learning_rate": 8.021586289148211e-06,
      "loss": 0.0239,
      "step": 620500
    },
    {
      "epoch": 1.0154945896584906,
      "grad_norm": 0.5886520147323608,
      "learning_rate": 8.021520396934695e-06,
      "loss": 0.0229,
      "step": 620520
    },
    {
      "epoch": 1.015527320097144,
      "grad_norm": 2.5911693572998047,
      "learning_rate": 8.021454504721179e-06,
      "loss": 0.0203,
      "step": 620540
    },
    {
      "epoch": 1.0155600505357973,
      "grad_norm": 0.3242604434490204,
      "learning_rate": 8.02138861250766e-06,
      "loss": 0.0191,
      "step": 620560
    },
    {
      "epoch": 1.0155927809744507,
      "grad_norm": 0.8152474164962769,
      "learning_rate": 8.021322720294144e-06,
      "loss": 0.0342,
      "step": 620580
    },
    {
      "epoch": 1.0156255114131039,
      "grad_norm": 1.725714921951294,
      "learning_rate": 8.021256828080628e-06,
      "loss": 0.0261,
      "step": 620600
    },
    {
      "epoch": 1.0156582418517572,
      "grad_norm": 0.34224316477775574,
      "learning_rate": 8.02119093586711e-06,
      "loss": 0.0161,
      "step": 620620
    },
    {
      "epoch": 1.0156909722904106,
      "grad_norm": 0.7292910814285278,
      "learning_rate": 8.021125043653593e-06,
      "loss": 0.0253,
      "step": 620640
    },
    {
      "epoch": 1.015723702729064,
      "grad_norm": 0.37268561124801636,
      "learning_rate": 8.021059151440075e-06,
      "loss": 0.0276,
      "step": 620660
    },
    {
      "epoch": 1.0157564331677174,
      "grad_norm": 1.489809274673462,
      "learning_rate": 8.020993259226559e-06,
      "loss": 0.0195,
      "step": 620680
    },
    {
      "epoch": 1.0157891636063707,
      "grad_norm": 0.6032610535621643,
      "learning_rate": 8.02092736701304e-06,
      "loss": 0.0264,
      "step": 620700
    },
    {
      "epoch": 1.015821894045024,
      "grad_norm": 0.44134584069252014,
      "learning_rate": 8.020861474799524e-06,
      "loss": 0.0224,
      "step": 620720
    },
    {
      "epoch": 1.0158546244836772,
      "grad_norm": 0.9218798279762268,
      "learning_rate": 8.020795582586006e-06,
      "loss": 0.0244,
      "step": 620740
    },
    {
      "epoch": 1.0158873549223306,
      "grad_norm": 0.49135202169418335,
      "learning_rate": 8.02072969037249e-06,
      "loss": 0.018,
      "step": 620760
    },
    {
      "epoch": 1.015920085360984,
      "grad_norm": 0.1767725944519043,
      "learning_rate": 8.020663798158971e-06,
      "loss": 0.034,
      "step": 620780
    },
    {
      "epoch": 1.0159528157996374,
      "grad_norm": 1.8196276426315308,
      "learning_rate": 8.020597905945455e-06,
      "loss": 0.0258,
      "step": 620800
    },
    {
      "epoch": 1.0159855462382907,
      "grad_norm": 0.28601548075675964,
      "learning_rate": 8.020532013731937e-06,
      "loss": 0.026,
      "step": 620820
    },
    {
      "epoch": 1.016018276676944,
      "grad_norm": 0.6431251168251038,
      "learning_rate": 8.02046612151842e-06,
      "loss": 0.0282,
      "step": 620840
    },
    {
      "epoch": 1.0160510071155973,
      "grad_norm": 0.9709038138389587,
      "learning_rate": 8.020400229304902e-06,
      "loss": 0.0309,
      "step": 620860
    },
    {
      "epoch": 1.0160837375542506,
      "grad_norm": 0.3414960205554962,
      "learning_rate": 8.020334337091386e-06,
      "loss": 0.0203,
      "step": 620880
    },
    {
      "epoch": 1.016116467992904,
      "grad_norm": 1.3103041648864746,
      "learning_rate": 8.02026844487787e-06,
      "loss": 0.0204,
      "step": 620900
    },
    {
      "epoch": 1.0161491984315574,
      "grad_norm": 0.9648706912994385,
      "learning_rate": 8.020202552664351e-06,
      "loss": 0.0248,
      "step": 620920
    },
    {
      "epoch": 1.0161819288702107,
      "grad_norm": 0.22134721279144287,
      "learning_rate": 8.020136660450835e-06,
      "loss": 0.0176,
      "step": 620940
    },
    {
      "epoch": 1.0162146593088641,
      "grad_norm": 0.7241575717926025,
      "learning_rate": 8.020070768237319e-06,
      "loss": 0.0208,
      "step": 620960
    },
    {
      "epoch": 1.0162473897475175,
      "grad_norm": 0.7450786828994751,
      "learning_rate": 8.020004876023802e-06,
      "loss": 0.0255,
      "step": 620980
    },
    {
      "epoch": 1.0162801201861706,
      "grad_norm": 0.8002600073814392,
      "learning_rate": 8.019938983810284e-06,
      "loss": 0.0339,
      "step": 621000
    },
    {
      "epoch": 1.016312850624824,
      "grad_norm": 0.7797948718070984,
      "learning_rate": 8.019873091596768e-06,
      "loss": 0.027,
      "step": 621020
    },
    {
      "epoch": 1.0163455810634774,
      "grad_norm": 1.7486844062805176,
      "learning_rate": 8.01980719938325e-06,
      "loss": 0.0151,
      "step": 621040
    },
    {
      "epoch": 1.0163783115021308,
      "grad_norm": 1.7284951210021973,
      "learning_rate": 8.019741307169733e-06,
      "loss": 0.0226,
      "step": 621060
    },
    {
      "epoch": 1.0164110419407841,
      "grad_norm": 0.7915496826171875,
      "learning_rate": 8.019675414956215e-06,
      "loss": 0.0192,
      "step": 621080
    },
    {
      "epoch": 1.0164437723794375,
      "grad_norm": 0.9816346764564514,
      "learning_rate": 8.019609522742699e-06,
      "loss": 0.0199,
      "step": 621100
    },
    {
      "epoch": 1.0164765028180909,
      "grad_norm": 0.7789867520332336,
      "learning_rate": 8.01954363052918e-06,
      "loss": 0.0172,
      "step": 621120
    },
    {
      "epoch": 1.016509233256744,
      "grad_norm": 0.737770140171051,
      "learning_rate": 8.019477738315664e-06,
      "loss": 0.0283,
      "step": 621140
    },
    {
      "epoch": 1.0165419636953974,
      "grad_norm": 0.36501240730285645,
      "learning_rate": 8.019411846102146e-06,
      "loss": 0.029,
      "step": 621160
    },
    {
      "epoch": 1.0165746941340508,
      "grad_norm": 0.40657925605773926,
      "learning_rate": 8.01934595388863e-06,
      "loss": 0.0291,
      "step": 621180
    },
    {
      "epoch": 1.0166074245727041,
      "grad_norm": 0.5455703139305115,
      "learning_rate": 8.019280061675112e-06,
      "loss": 0.0227,
      "step": 621200
    },
    {
      "epoch": 1.0166401550113575,
      "grad_norm": 0.6879454851150513,
      "learning_rate": 8.019214169461595e-06,
      "loss": 0.0179,
      "step": 621220
    },
    {
      "epoch": 1.0166728854500109,
      "grad_norm": 0.2667091190814972,
      "learning_rate": 8.019148277248077e-06,
      "loss": 0.0279,
      "step": 621240
    },
    {
      "epoch": 1.016705615888664,
      "grad_norm": 0.4227379262447357,
      "learning_rate": 8.01908238503456e-06,
      "loss": 0.0173,
      "step": 621260
    },
    {
      "epoch": 1.0167383463273174,
      "grad_norm": 0.7199681997299194,
      "learning_rate": 8.019016492821044e-06,
      "loss": 0.0183,
      "step": 621280
    },
    {
      "epoch": 1.0167710767659708,
      "grad_norm": 1.8797520399093628,
      "learning_rate": 8.018950600607526e-06,
      "loss": 0.0309,
      "step": 621300
    },
    {
      "epoch": 1.0168038072046242,
      "grad_norm": 0.646630048751831,
      "learning_rate": 8.01888470839401e-06,
      "loss": 0.0167,
      "step": 621320
    },
    {
      "epoch": 1.0168365376432775,
      "grad_norm": 0.33363983035087585,
      "learning_rate": 8.018818816180493e-06,
      "loss": 0.0268,
      "step": 621340
    },
    {
      "epoch": 1.016869268081931,
      "grad_norm": 1.2278335094451904,
      "learning_rate": 8.018752923966975e-06,
      "loss": 0.0267,
      "step": 621360
    },
    {
      "epoch": 1.0169019985205843,
      "grad_norm": 1.2231069803237915,
      "learning_rate": 8.018687031753459e-06,
      "loss": 0.0225,
      "step": 621380
    },
    {
      "epoch": 1.0169347289592374,
      "grad_norm": 0.8514462113380432,
      "learning_rate": 8.018621139539942e-06,
      "loss": 0.0328,
      "step": 621400
    },
    {
      "epoch": 1.0169674593978908,
      "grad_norm": 0.6215091347694397,
      "learning_rate": 8.018555247326424e-06,
      "loss": 0.0166,
      "step": 621420
    },
    {
      "epoch": 1.0170001898365442,
      "grad_norm": 0.7973727583885193,
      "learning_rate": 8.018489355112908e-06,
      "loss": 0.0269,
      "step": 621440
    },
    {
      "epoch": 1.0170329202751975,
      "grad_norm": 0.5625269412994385,
      "learning_rate": 8.01842346289939e-06,
      "loss": 0.0236,
      "step": 621460
    },
    {
      "epoch": 1.017065650713851,
      "grad_norm": 1.830324411392212,
      "learning_rate": 8.018357570685873e-06,
      "loss": 0.0348,
      "step": 621480
    },
    {
      "epoch": 1.0170983811525043,
      "grad_norm": 0.7847980856895447,
      "learning_rate": 8.018291678472355e-06,
      "loss": 0.0226,
      "step": 621500
    },
    {
      "epoch": 1.0171311115911577,
      "grad_norm": 0.404100239276886,
      "learning_rate": 8.018225786258839e-06,
      "loss": 0.0361,
      "step": 621520
    },
    {
      "epoch": 1.0171638420298108,
      "grad_norm": 0.8498667478561401,
      "learning_rate": 8.01815989404532e-06,
      "loss": 0.0194,
      "step": 621540
    },
    {
      "epoch": 1.0171965724684642,
      "grad_norm": 1.1127575635910034,
      "learning_rate": 8.018094001831804e-06,
      "loss": 0.023,
      "step": 621560
    },
    {
      "epoch": 1.0172293029071175,
      "grad_norm": 2.1208913326263428,
      "learning_rate": 8.018028109618286e-06,
      "loss": 0.021,
      "step": 621580
    },
    {
      "epoch": 1.017262033345771,
      "grad_norm": 0.6237638592720032,
      "learning_rate": 8.01796221740477e-06,
      "loss": 0.0189,
      "step": 621600
    },
    {
      "epoch": 1.0172947637844243,
      "grad_norm": 1.8753196001052856,
      "learning_rate": 8.017896325191252e-06,
      "loss": 0.027,
      "step": 621620
    },
    {
      "epoch": 1.0173274942230777,
      "grad_norm": 0.9897651076316833,
      "learning_rate": 8.017830432977735e-06,
      "loss": 0.0195,
      "step": 621640
    },
    {
      "epoch": 1.0173602246617308,
      "grad_norm": 0.36412134766578674,
      "learning_rate": 8.017764540764219e-06,
      "loss": 0.0303,
      "step": 621660
    },
    {
      "epoch": 1.0173929551003842,
      "grad_norm": 0.3696024417877197,
      "learning_rate": 8.0176986485507e-06,
      "loss": 0.0202,
      "step": 621680
    },
    {
      "epoch": 1.0174256855390376,
      "grad_norm": 1.1478707790374756,
      "learning_rate": 8.017632756337184e-06,
      "loss": 0.0256,
      "step": 621700
    },
    {
      "epoch": 1.017458415977691,
      "grad_norm": 1.7389404773712158,
      "learning_rate": 8.017566864123668e-06,
      "loss": 0.0229,
      "step": 621720
    },
    {
      "epoch": 1.0174911464163443,
      "grad_norm": 0.8459414839744568,
      "learning_rate": 8.01750097191015e-06,
      "loss": 0.0305,
      "step": 621740
    },
    {
      "epoch": 1.0175238768549977,
      "grad_norm": 1.3796383142471313,
      "learning_rate": 8.017435079696633e-06,
      "loss": 0.0283,
      "step": 621760
    },
    {
      "epoch": 1.017556607293651,
      "grad_norm": 0.7646963000297546,
      "learning_rate": 8.017369187483117e-06,
      "loss": 0.0153,
      "step": 621780
    },
    {
      "epoch": 1.0175893377323042,
      "grad_norm": 4.09385347366333,
      "learning_rate": 8.017303295269599e-06,
      "loss": 0.0184,
      "step": 621800
    },
    {
      "epoch": 1.0176220681709576,
      "grad_norm": 0.28173956274986267,
      "learning_rate": 8.017237403056082e-06,
      "loss": 0.0213,
      "step": 621820
    },
    {
      "epoch": 1.017654798609611,
      "grad_norm": 15.262070655822754,
      "learning_rate": 8.017171510842564e-06,
      "loss": 0.0265,
      "step": 621840
    },
    {
      "epoch": 1.0176875290482643,
      "grad_norm": 0.24058109521865845,
      "learning_rate": 8.017105618629048e-06,
      "loss": 0.0242,
      "step": 621860
    },
    {
      "epoch": 1.0177202594869177,
      "grad_norm": 0.41773688793182373,
      "learning_rate": 8.01703972641553e-06,
      "loss": 0.0263,
      "step": 621880
    },
    {
      "epoch": 1.017752989925571,
      "grad_norm": 0.3351733386516571,
      "learning_rate": 8.016973834202013e-06,
      "loss": 0.0216,
      "step": 621900
    },
    {
      "epoch": 1.0177857203642242,
      "grad_norm": 1.1799523830413818,
      "learning_rate": 8.016907941988495e-06,
      "loss": 0.0217,
      "step": 621920
    },
    {
      "epoch": 1.0178184508028776,
      "grad_norm": 0.5093415975570679,
      "learning_rate": 8.016842049774979e-06,
      "loss": 0.0284,
      "step": 621940
    },
    {
      "epoch": 1.017851181241531,
      "grad_norm": 0.7611352205276489,
      "learning_rate": 8.01677615756146e-06,
      "loss": 0.023,
      "step": 621960
    },
    {
      "epoch": 1.0178839116801843,
      "grad_norm": 0.513385534286499,
      "learning_rate": 8.016710265347944e-06,
      "loss": 0.0212,
      "step": 621980
    },
    {
      "epoch": 1.0179166421188377,
      "grad_norm": 0.5539776682853699,
      "learning_rate": 8.016644373134428e-06,
      "loss": 0.0189,
      "step": 622000
    },
    {
      "epoch": 1.017949372557491,
      "grad_norm": 1.5845539569854736,
      "learning_rate": 8.01657848092091e-06,
      "loss": 0.0275,
      "step": 622020
    },
    {
      "epoch": 1.0179821029961444,
      "grad_norm": 0.27191969752311707,
      "learning_rate": 8.016512588707393e-06,
      "loss": 0.0341,
      "step": 622040
    },
    {
      "epoch": 1.0180148334347976,
      "grad_norm": 1.5056421756744385,
      "learning_rate": 8.016446696493875e-06,
      "loss": 0.0224,
      "step": 622060
    },
    {
      "epoch": 1.018047563873451,
      "grad_norm": 0.682551920413971,
      "learning_rate": 8.016380804280359e-06,
      "loss": 0.0313,
      "step": 622080
    },
    {
      "epoch": 1.0180802943121043,
      "grad_norm": 0.7659569978713989,
      "learning_rate": 8.01631491206684e-06,
      "loss": 0.02,
      "step": 622100
    },
    {
      "epoch": 1.0181130247507577,
      "grad_norm": 3.1923248767852783,
      "learning_rate": 8.016249019853324e-06,
      "loss": 0.0302,
      "step": 622120
    },
    {
      "epoch": 1.018145755189411,
      "grad_norm": 0.31666460633277893,
      "learning_rate": 8.016183127639808e-06,
      "loss": 0.0356,
      "step": 622140
    },
    {
      "epoch": 1.0181784856280645,
      "grad_norm": 0.19610655307769775,
      "learning_rate": 8.01611723542629e-06,
      "loss": 0.0245,
      "step": 622160
    },
    {
      "epoch": 1.0182112160667178,
      "grad_norm": 0.2785114049911499,
      "learning_rate": 8.016051343212773e-06,
      "loss": 0.0225,
      "step": 622180
    },
    {
      "epoch": 1.018243946505371,
      "grad_norm": 0.46637237071990967,
      "learning_rate": 8.015985450999257e-06,
      "loss": 0.0213,
      "step": 622200
    },
    {
      "epoch": 1.0182766769440244,
      "grad_norm": 1.0096049308776855,
      "learning_rate": 8.015919558785739e-06,
      "loss": 0.0232,
      "step": 622220
    },
    {
      "epoch": 1.0183094073826777,
      "grad_norm": 0.9845226407051086,
      "learning_rate": 8.015853666572222e-06,
      "loss": 0.0159,
      "step": 622240
    },
    {
      "epoch": 1.018342137821331,
      "grad_norm": 0.23032094538211823,
      "learning_rate": 8.015787774358704e-06,
      "loss": 0.03,
      "step": 622260
    },
    {
      "epoch": 1.0183748682599845,
      "grad_norm": 0.6682580709457397,
      "learning_rate": 8.015721882145188e-06,
      "loss": 0.0226,
      "step": 622280
    },
    {
      "epoch": 1.0184075986986378,
      "grad_norm": 1.563982605934143,
      "learning_rate": 8.01565598993167e-06,
      "loss": 0.0213,
      "step": 622300
    },
    {
      "epoch": 1.018440329137291,
      "grad_norm": 0.4032876193523407,
      "learning_rate": 8.015590097718153e-06,
      "loss": 0.0268,
      "step": 622320
    },
    {
      "epoch": 1.0184730595759444,
      "grad_norm": 0.6769538521766663,
      "learning_rate": 8.015524205504637e-06,
      "loss": 0.0277,
      "step": 622340
    },
    {
      "epoch": 1.0185057900145977,
      "grad_norm": 1.420379400253296,
      "learning_rate": 8.015458313291119e-06,
      "loss": 0.0242,
      "step": 622360
    },
    {
      "epoch": 1.018538520453251,
      "grad_norm": 0.504649817943573,
      "learning_rate": 8.015392421077602e-06,
      "loss": 0.029,
      "step": 622380
    },
    {
      "epoch": 1.0185712508919045,
      "grad_norm": 0.7667542099952698,
      "learning_rate": 8.015326528864084e-06,
      "loss": 0.0138,
      "step": 622400
    },
    {
      "epoch": 1.0186039813305579,
      "grad_norm": 0.7255716323852539,
      "learning_rate": 8.015260636650568e-06,
      "loss": 0.0228,
      "step": 622420
    },
    {
      "epoch": 1.0186367117692112,
      "grad_norm": 1.7420096397399902,
      "learning_rate": 8.01519474443705e-06,
      "loss": 0.0205,
      "step": 622440
    },
    {
      "epoch": 1.0186694422078644,
      "grad_norm": 0.8006117939949036,
      "learning_rate": 8.015128852223533e-06,
      "loss": 0.0264,
      "step": 622460
    },
    {
      "epoch": 1.0187021726465177,
      "grad_norm": 0.1763789802789688,
      "learning_rate": 8.015062960010015e-06,
      "loss": 0.0237,
      "step": 622480
    },
    {
      "epoch": 1.0187349030851711,
      "grad_norm": 0.7112062573432922,
      "learning_rate": 8.014997067796499e-06,
      "loss": 0.0243,
      "step": 622500
    },
    {
      "epoch": 1.0187676335238245,
      "grad_norm": 0.5962949395179749,
      "learning_rate": 8.014931175582983e-06,
      "loss": 0.0258,
      "step": 622520
    },
    {
      "epoch": 1.0188003639624779,
      "grad_norm": 0.34974345564842224,
      "learning_rate": 8.014865283369464e-06,
      "loss": 0.0256,
      "step": 622540
    },
    {
      "epoch": 1.0188330944011312,
      "grad_norm": 1.2675358057022095,
      "learning_rate": 8.014799391155948e-06,
      "loss": 0.0172,
      "step": 622560
    },
    {
      "epoch": 1.0188658248397846,
      "grad_norm": 0.25922858715057373,
      "learning_rate": 8.014733498942432e-06,
      "loss": 0.0272,
      "step": 622580
    },
    {
      "epoch": 1.0188985552784378,
      "grad_norm": 0.36068207025527954,
      "learning_rate": 8.014667606728913e-06,
      "loss": 0.0272,
      "step": 622600
    },
    {
      "epoch": 1.0189312857170911,
      "grad_norm": 0.28718623518943787,
      "learning_rate": 8.014601714515397e-06,
      "loss": 0.0261,
      "step": 622620
    },
    {
      "epoch": 1.0189640161557445,
      "grad_norm": 0.8965145349502563,
      "learning_rate": 8.014535822301879e-06,
      "loss": 0.0268,
      "step": 622640
    },
    {
      "epoch": 1.0189967465943979,
      "grad_norm": 1.712953805923462,
      "learning_rate": 8.014469930088363e-06,
      "loss": 0.0276,
      "step": 622660
    },
    {
      "epoch": 1.0190294770330512,
      "grad_norm": 0.7491627931594849,
      "learning_rate": 8.014404037874844e-06,
      "loss": 0.0282,
      "step": 622680
    },
    {
      "epoch": 1.0190622074717046,
      "grad_norm": 0.8021244406700134,
      "learning_rate": 8.014338145661328e-06,
      "loss": 0.0277,
      "step": 622700
    },
    {
      "epoch": 1.0190949379103578,
      "grad_norm": 0.780178427696228,
      "learning_rate": 8.014272253447812e-06,
      "loss": 0.0284,
      "step": 622720
    },
    {
      "epoch": 1.0191276683490111,
      "grad_norm": 0.15820689499378204,
      "learning_rate": 8.014206361234294e-06,
      "loss": 0.0316,
      "step": 622740
    },
    {
      "epoch": 1.0191603987876645,
      "grad_norm": 1.0884144306182861,
      "learning_rate": 8.014140469020777e-06,
      "loss": 0.0333,
      "step": 622760
    },
    {
      "epoch": 1.0191931292263179,
      "grad_norm": 0.29177024960517883,
      "learning_rate": 8.014074576807259e-06,
      "loss": 0.0235,
      "step": 622780
    },
    {
      "epoch": 1.0192258596649713,
      "grad_norm": 0.5183728337287903,
      "learning_rate": 8.014008684593743e-06,
      "loss": 0.0279,
      "step": 622800
    },
    {
      "epoch": 1.0192585901036246,
      "grad_norm": 1.5338820219039917,
      "learning_rate": 8.013942792380224e-06,
      "loss": 0.0314,
      "step": 622820
    },
    {
      "epoch": 1.019291320542278,
      "grad_norm": 1.3744467496871948,
      "learning_rate": 8.013876900166708e-06,
      "loss": 0.0373,
      "step": 622840
    },
    {
      "epoch": 1.0193240509809312,
      "grad_norm": 0.5083406567573547,
      "learning_rate": 8.01381100795319e-06,
      "loss": 0.0215,
      "step": 622860
    },
    {
      "epoch": 1.0193567814195845,
      "grad_norm": 0.7968457341194153,
      "learning_rate": 8.013745115739674e-06,
      "loss": 0.0278,
      "step": 622880
    },
    {
      "epoch": 1.019389511858238,
      "grad_norm": 0.5432807207107544,
      "learning_rate": 8.013679223526155e-06,
      "loss": 0.0168,
      "step": 622900
    },
    {
      "epoch": 1.0194222422968913,
      "grad_norm": 0.7114837169647217,
      "learning_rate": 8.013613331312639e-06,
      "loss": 0.0274,
      "step": 622920
    },
    {
      "epoch": 1.0194549727355446,
      "grad_norm": 0.558726966381073,
      "learning_rate": 8.013547439099123e-06,
      "loss": 0.0215,
      "step": 622940
    },
    {
      "epoch": 1.019487703174198,
      "grad_norm": 1.5875385999679565,
      "learning_rate": 8.013481546885604e-06,
      "loss": 0.0219,
      "step": 622960
    },
    {
      "epoch": 1.0195204336128514,
      "grad_norm": 0.18453501164913177,
      "learning_rate": 8.013415654672088e-06,
      "loss": 0.0189,
      "step": 622980
    },
    {
      "epoch": 1.0195531640515045,
      "grad_norm": 0.2273588925600052,
      "learning_rate": 8.013349762458572e-06,
      "loss": 0.0145,
      "step": 623000
    },
    {
      "epoch": 1.019585894490158,
      "grad_norm": 0.4149863123893738,
      "learning_rate": 8.013283870245054e-06,
      "loss": 0.0271,
      "step": 623020
    },
    {
      "epoch": 1.0196186249288113,
      "grad_norm": 0.5566956400871277,
      "learning_rate": 8.013217978031537e-06,
      "loss": 0.0264,
      "step": 623040
    },
    {
      "epoch": 1.0196513553674647,
      "grad_norm": 0.6043300628662109,
      "learning_rate": 8.01315208581802e-06,
      "loss": 0.031,
      "step": 623060
    },
    {
      "epoch": 1.019684085806118,
      "grad_norm": 0.25547122955322266,
      "learning_rate": 8.013086193604503e-06,
      "loss": 0.0211,
      "step": 623080
    },
    {
      "epoch": 1.0197168162447714,
      "grad_norm": 0.5960817933082581,
      "learning_rate": 8.013020301390986e-06,
      "loss": 0.0302,
      "step": 623100
    },
    {
      "epoch": 1.0197495466834245,
      "grad_norm": 0.8552328944206238,
      "learning_rate": 8.012954409177468e-06,
      "loss": 0.0261,
      "step": 623120
    },
    {
      "epoch": 1.019782277122078,
      "grad_norm": 1.131960153579712,
      "learning_rate": 8.012888516963952e-06,
      "loss": 0.0268,
      "step": 623140
    },
    {
      "epoch": 1.0198150075607313,
      "grad_norm": 0.963532030582428,
      "learning_rate": 8.012822624750434e-06,
      "loss": 0.0273,
      "step": 623160
    },
    {
      "epoch": 1.0198477379993847,
      "grad_norm": 0.8030621409416199,
      "learning_rate": 8.012756732536917e-06,
      "loss": 0.0324,
      "step": 623180
    },
    {
      "epoch": 1.019880468438038,
      "grad_norm": 0.47327694296836853,
      "learning_rate": 8.012690840323399e-06,
      "loss": 0.0155,
      "step": 623200
    },
    {
      "epoch": 1.0199131988766914,
      "grad_norm": 1.2096103429794312,
      "learning_rate": 8.012624948109883e-06,
      "loss": 0.0258,
      "step": 623220
    },
    {
      "epoch": 1.0199459293153448,
      "grad_norm": 0.47379887104034424,
      "learning_rate": 8.012559055896365e-06,
      "loss": 0.0247,
      "step": 623240
    },
    {
      "epoch": 1.019978659753998,
      "grad_norm": 0.8493135571479797,
      "learning_rate": 8.012493163682848e-06,
      "loss": 0.023,
      "step": 623260
    },
    {
      "epoch": 1.0200113901926513,
      "grad_norm": 0.4754682183265686,
      "learning_rate": 8.01242727146933e-06,
      "loss": 0.0126,
      "step": 623280
    },
    {
      "epoch": 1.0200441206313047,
      "grad_norm": 0.8034705519676208,
      "learning_rate": 8.012361379255814e-06,
      "loss": 0.0228,
      "step": 623300
    },
    {
      "epoch": 1.020076851069958,
      "grad_norm": 0.7019229531288147,
      "learning_rate": 8.012295487042297e-06,
      "loss": 0.0223,
      "step": 623320
    },
    {
      "epoch": 1.0201095815086114,
      "grad_norm": 0.29854145646095276,
      "learning_rate": 8.012229594828779e-06,
      "loss": 0.0325,
      "step": 623340
    },
    {
      "epoch": 1.0201423119472648,
      "grad_norm": 10.402128219604492,
      "learning_rate": 8.012163702615263e-06,
      "loss": 0.0302,
      "step": 623360
    },
    {
      "epoch": 1.0201750423859182,
      "grad_norm": 0.3212128281593323,
      "learning_rate": 8.012097810401746e-06,
      "loss": 0.0198,
      "step": 623380
    },
    {
      "epoch": 1.0202077728245713,
      "grad_norm": 0.5370184183120728,
      "learning_rate": 8.012031918188228e-06,
      "loss": 0.037,
      "step": 623400
    },
    {
      "epoch": 1.0202405032632247,
      "grad_norm": 2.527594804763794,
      "learning_rate": 8.011966025974712e-06,
      "loss": 0.034,
      "step": 623420
    },
    {
      "epoch": 1.020273233701878,
      "grad_norm": 1.4742251634597778,
      "learning_rate": 8.011900133761195e-06,
      "loss": 0.0326,
      "step": 623440
    },
    {
      "epoch": 1.0203059641405314,
      "grad_norm": 3.8263347148895264,
      "learning_rate": 8.011834241547677e-06,
      "loss": 0.0264,
      "step": 623460
    },
    {
      "epoch": 1.0203386945791848,
      "grad_norm": 0.5788700580596924,
      "learning_rate": 8.01176834933416e-06,
      "loss": 0.0314,
      "step": 623480
    },
    {
      "epoch": 1.0203714250178382,
      "grad_norm": 0.6329806447029114,
      "learning_rate": 8.011702457120643e-06,
      "loss": 0.0213,
      "step": 623500
    },
    {
      "epoch": 1.0204041554564913,
      "grad_norm": 0.3127620220184326,
      "learning_rate": 8.011636564907126e-06,
      "loss": 0.0249,
      "step": 623520
    },
    {
      "epoch": 1.0204368858951447,
      "grad_norm": 0.46969127655029297,
      "learning_rate": 8.011570672693608e-06,
      "loss": 0.0291,
      "step": 623540
    },
    {
      "epoch": 1.020469616333798,
      "grad_norm": 0.6723307967185974,
      "learning_rate": 8.011504780480092e-06,
      "loss": 0.0179,
      "step": 623560
    },
    {
      "epoch": 1.0205023467724514,
      "grad_norm": 1.57688307762146,
      "learning_rate": 8.011438888266574e-06,
      "loss": 0.0143,
      "step": 623580
    },
    {
      "epoch": 1.0205350772111048,
      "grad_norm": 0.4467007517814636,
      "learning_rate": 8.011372996053057e-06,
      "loss": 0.0251,
      "step": 623600
    },
    {
      "epoch": 1.0205678076497582,
      "grad_norm": 1.4285954236984253,
      "learning_rate": 8.011307103839539e-06,
      "loss": 0.0206,
      "step": 623620
    },
    {
      "epoch": 1.0206005380884116,
      "grad_norm": 1.2790602445602417,
      "learning_rate": 8.011241211626023e-06,
      "loss": 0.0228,
      "step": 623640
    },
    {
      "epoch": 1.0206332685270647,
      "grad_norm": 0.34902796149253845,
      "learning_rate": 8.011175319412505e-06,
      "loss": 0.0252,
      "step": 623660
    },
    {
      "epoch": 1.020665998965718,
      "grad_norm": 1.7234805822372437,
      "learning_rate": 8.011109427198988e-06,
      "loss": 0.0259,
      "step": 623680
    },
    {
      "epoch": 1.0206987294043715,
      "grad_norm": 0.17479868233203888,
      "learning_rate": 8.01104353498547e-06,
      "loss": 0.0239,
      "step": 623700
    },
    {
      "epoch": 1.0207314598430248,
      "grad_norm": 8.773740768432617,
      "learning_rate": 8.010977642771954e-06,
      "loss": 0.0241,
      "step": 623720
    },
    {
      "epoch": 1.0207641902816782,
      "grad_norm": 0.5532703399658203,
      "learning_rate": 8.010911750558437e-06,
      "loss": 0.023,
      "step": 623740
    },
    {
      "epoch": 1.0207969207203316,
      "grad_norm": 6.945309162139893,
      "learning_rate": 8.010845858344921e-06,
      "loss": 0.0222,
      "step": 623760
    },
    {
      "epoch": 1.020829651158985,
      "grad_norm": 4.062658786773682,
      "learning_rate": 8.010779966131403e-06,
      "loss": 0.0324,
      "step": 623780
    },
    {
      "epoch": 1.020862381597638,
      "grad_norm": 1.2143492698669434,
      "learning_rate": 8.010714073917886e-06,
      "loss": 0.021,
      "step": 623800
    },
    {
      "epoch": 1.0208951120362915,
      "grad_norm": 0.4739822447299957,
      "learning_rate": 8.01064818170437e-06,
      "loss": 0.032,
      "step": 623820
    },
    {
      "epoch": 1.0209278424749448,
      "grad_norm": 0.7372342348098755,
      "learning_rate": 8.010582289490852e-06,
      "loss": 0.0354,
      "step": 623840
    },
    {
      "epoch": 1.0209605729135982,
      "grad_norm": 0.3025246858596802,
      "learning_rate": 8.010516397277335e-06,
      "loss": 0.0142,
      "step": 623860
    },
    {
      "epoch": 1.0209933033522516,
      "grad_norm": 0.3487544059753418,
      "learning_rate": 8.010450505063817e-06,
      "loss": 0.0258,
      "step": 623880
    },
    {
      "epoch": 1.021026033790905,
      "grad_norm": 0.35577040910720825,
      "learning_rate": 8.010384612850301e-06,
      "loss": 0.014,
      "step": 623900
    },
    {
      "epoch": 1.021058764229558,
      "grad_norm": 0.4398156702518463,
      "learning_rate": 8.010318720636783e-06,
      "loss": 0.0255,
      "step": 623920
    },
    {
      "epoch": 1.0210914946682115,
      "grad_norm": 0.5412168502807617,
      "learning_rate": 8.010252828423266e-06,
      "loss": 0.0235,
      "step": 623940
    },
    {
      "epoch": 1.0211242251068648,
      "grad_norm": 0.46389028429985046,
      "learning_rate": 8.010186936209748e-06,
      "loss": 0.0321,
      "step": 623960
    },
    {
      "epoch": 1.0211569555455182,
      "grad_norm": 0.8370296955108643,
      "learning_rate": 8.010121043996232e-06,
      "loss": 0.0283,
      "step": 623980
    },
    {
      "epoch": 1.0211896859841716,
      "grad_norm": 1.1073795557022095,
      "learning_rate": 8.010055151782714e-06,
      "loss": 0.0179,
      "step": 624000
    },
    {
      "epoch": 1.021222416422825,
      "grad_norm": 0.2559504210948944,
      "learning_rate": 8.009989259569197e-06,
      "loss": 0.023,
      "step": 624020
    },
    {
      "epoch": 1.0212551468614783,
      "grad_norm": 1.6343722343444824,
      "learning_rate": 8.00992336735568e-06,
      "loss": 0.0307,
      "step": 624040
    },
    {
      "epoch": 1.0212878773001315,
      "grad_norm": 0.6261459589004517,
      "learning_rate": 8.009857475142163e-06,
      "loss": 0.0232,
      "step": 624060
    },
    {
      "epoch": 1.0213206077387849,
      "grad_norm": 0.32138094305992126,
      "learning_rate": 8.009791582928645e-06,
      "loss": 0.0271,
      "step": 624080
    },
    {
      "epoch": 1.0213533381774382,
      "grad_norm": 1.1130880117416382,
      "learning_rate": 8.009725690715128e-06,
      "loss": 0.0353,
      "step": 624100
    },
    {
      "epoch": 1.0213860686160916,
      "grad_norm": 0.42020079493522644,
      "learning_rate": 8.009659798501612e-06,
      "loss": 0.0215,
      "step": 624120
    },
    {
      "epoch": 1.021418799054745,
      "grad_norm": 1.0956783294677734,
      "learning_rate": 8.009593906288094e-06,
      "loss": 0.0248,
      "step": 624140
    },
    {
      "epoch": 1.0214515294933983,
      "grad_norm": 0.6540212035179138,
      "learning_rate": 8.009528014074577e-06,
      "loss": 0.0293,
      "step": 624160
    },
    {
      "epoch": 1.0214842599320515,
      "grad_norm": 0.8068288564682007,
      "learning_rate": 8.009462121861061e-06,
      "loss": 0.0161,
      "step": 624180
    },
    {
      "epoch": 1.0215169903707049,
      "grad_norm": 0.2943994104862213,
      "learning_rate": 8.009396229647543e-06,
      "loss": 0.0213,
      "step": 624200
    },
    {
      "epoch": 1.0215497208093582,
      "grad_norm": 0.9681114554405212,
      "learning_rate": 8.009330337434026e-06,
      "loss": 0.0203,
      "step": 624220
    },
    {
      "epoch": 1.0215824512480116,
      "grad_norm": 0.5562782883644104,
      "learning_rate": 8.00926444522051e-06,
      "loss": 0.0271,
      "step": 624240
    },
    {
      "epoch": 1.021615181686665,
      "grad_norm": 1.3548099994659424,
      "learning_rate": 8.009198553006992e-06,
      "loss": 0.0212,
      "step": 624260
    },
    {
      "epoch": 1.0216479121253184,
      "grad_norm": 0.49579960107803345,
      "learning_rate": 8.009132660793475e-06,
      "loss": 0.0279,
      "step": 624280
    },
    {
      "epoch": 1.0216806425639717,
      "grad_norm": 0.5667285919189453,
      "learning_rate": 8.009066768579957e-06,
      "loss": 0.0227,
      "step": 624300
    },
    {
      "epoch": 1.0217133730026249,
      "grad_norm": 0.6467990875244141,
      "learning_rate": 8.009000876366441e-06,
      "loss": 0.0334,
      "step": 624320
    },
    {
      "epoch": 1.0217461034412783,
      "grad_norm": 1.6446794271469116,
      "learning_rate": 8.008934984152923e-06,
      "loss": 0.0282,
      "step": 624340
    },
    {
      "epoch": 1.0217788338799316,
      "grad_norm": 1.4803403615951538,
      "learning_rate": 8.008869091939406e-06,
      "loss": 0.0308,
      "step": 624360
    },
    {
      "epoch": 1.021811564318585,
      "grad_norm": 0.4632743299007416,
      "learning_rate": 8.008803199725888e-06,
      "loss": 0.0188,
      "step": 624380
    },
    {
      "epoch": 1.0218442947572384,
      "grad_norm": 3.869387626647949,
      "learning_rate": 8.008737307512372e-06,
      "loss": 0.024,
      "step": 624400
    },
    {
      "epoch": 1.0218770251958917,
      "grad_norm": 0.9946185946464539,
      "learning_rate": 8.008671415298854e-06,
      "loss": 0.0287,
      "step": 624420
    },
    {
      "epoch": 1.0219097556345451,
      "grad_norm": 0.5960222482681274,
      "learning_rate": 8.008605523085337e-06,
      "loss": 0.0243,
      "step": 624440
    },
    {
      "epoch": 1.0219424860731983,
      "grad_norm": 2.349235773086548,
      "learning_rate": 8.008539630871821e-06,
      "loss": 0.023,
      "step": 624460
    },
    {
      "epoch": 1.0219752165118516,
      "grad_norm": 1.9214016199111938,
      "learning_rate": 8.008473738658303e-06,
      "loss": 0.0266,
      "step": 624480
    },
    {
      "epoch": 1.022007946950505,
      "grad_norm": 0.7520540356636047,
      "learning_rate": 8.008407846444786e-06,
      "loss": 0.0219,
      "step": 624500
    },
    {
      "epoch": 1.0220406773891584,
      "grad_norm": 0.2779979109764099,
      "learning_rate": 8.008341954231268e-06,
      "loss": 0.0171,
      "step": 624520
    },
    {
      "epoch": 1.0220734078278118,
      "grad_norm": 0.3658640682697296,
      "learning_rate": 8.008276062017752e-06,
      "loss": 0.0207,
      "step": 624540
    },
    {
      "epoch": 1.0221061382664651,
      "grad_norm": 2.2606847286224365,
      "learning_rate": 8.008210169804236e-06,
      "loss": 0.0275,
      "step": 624560
    },
    {
      "epoch": 1.0221388687051185,
      "grad_norm": 0.30672356486320496,
      "learning_rate": 8.008144277590717e-06,
      "loss": 0.0269,
      "step": 624580
    },
    {
      "epoch": 1.0221715991437716,
      "grad_norm": 0.19676315784454346,
      "learning_rate": 8.008078385377201e-06,
      "loss": 0.0165,
      "step": 624600
    },
    {
      "epoch": 1.022204329582425,
      "grad_norm": 1.447985291481018,
      "learning_rate": 8.008012493163685e-06,
      "loss": 0.0347,
      "step": 624620
    },
    {
      "epoch": 1.0222370600210784,
      "grad_norm": 0.3829492926597595,
      "learning_rate": 8.007946600950166e-06,
      "loss": 0.0247,
      "step": 624640
    },
    {
      "epoch": 1.0222697904597318,
      "grad_norm": 0.5345079898834229,
      "learning_rate": 8.00788070873665e-06,
      "loss": 0.0269,
      "step": 624660
    },
    {
      "epoch": 1.0223025208983851,
      "grad_norm": 1.7719238996505737,
      "learning_rate": 8.007814816523132e-06,
      "loss": 0.0301,
      "step": 624680
    },
    {
      "epoch": 1.0223352513370385,
      "grad_norm": 0.2576887607574463,
      "learning_rate": 8.007748924309616e-06,
      "loss": 0.0154,
      "step": 624700
    },
    {
      "epoch": 1.0223679817756917,
      "grad_norm": 1.2453980445861816,
      "learning_rate": 8.007683032096097e-06,
      "loss": 0.0267,
      "step": 624720
    },
    {
      "epoch": 1.022400712214345,
      "grad_norm": 0.545067548751831,
      "learning_rate": 8.007617139882581e-06,
      "loss": 0.0217,
      "step": 624740
    },
    {
      "epoch": 1.0224334426529984,
      "grad_norm": 1.2468328475952148,
      "learning_rate": 8.007551247669063e-06,
      "loss": 0.0305,
      "step": 624760
    },
    {
      "epoch": 1.0224661730916518,
      "grad_norm": 1.2919942140579224,
      "learning_rate": 8.007485355455547e-06,
      "loss": 0.0252,
      "step": 624780
    },
    {
      "epoch": 1.0224989035303051,
      "grad_norm": 1.1106622219085693,
      "learning_rate": 8.007419463242028e-06,
      "loss": 0.0211,
      "step": 624800
    },
    {
      "epoch": 1.0225316339689585,
      "grad_norm": 1.8630458116531372,
      "learning_rate": 8.007353571028512e-06,
      "loss": 0.0322,
      "step": 624820
    },
    {
      "epoch": 1.022564364407612,
      "grad_norm": 1.5436160564422607,
      "learning_rate": 8.007287678814996e-06,
      "loss": 0.0339,
      "step": 624840
    },
    {
      "epoch": 1.022597094846265,
      "grad_norm": 0.27568209171295166,
      "learning_rate": 8.007221786601477e-06,
      "loss": 0.0251,
      "step": 624860
    },
    {
      "epoch": 1.0226298252849184,
      "grad_norm": 1.2731767892837524,
      "learning_rate": 8.007155894387961e-06,
      "loss": 0.0247,
      "step": 624880
    },
    {
      "epoch": 1.0226625557235718,
      "grad_norm": 0.6303461194038391,
      "learning_rate": 8.007090002174443e-06,
      "loss": 0.0243,
      "step": 624900
    },
    {
      "epoch": 1.0226952861622252,
      "grad_norm": 0.35794222354888916,
      "learning_rate": 8.007024109960927e-06,
      "loss": 0.0181,
      "step": 624920
    },
    {
      "epoch": 1.0227280166008785,
      "grad_norm": 1.1430737972259521,
      "learning_rate": 8.006958217747408e-06,
      "loss": 0.0287,
      "step": 624940
    },
    {
      "epoch": 1.022760747039532,
      "grad_norm": 0.6617687940597534,
      "learning_rate": 8.006892325533892e-06,
      "loss": 0.0223,
      "step": 624960
    },
    {
      "epoch": 1.022793477478185,
      "grad_norm": 0.4756304919719696,
      "learning_rate": 8.006826433320376e-06,
      "loss": 0.0201,
      "step": 624980
    },
    {
      "epoch": 1.0228262079168384,
      "grad_norm": 0.2445840686559677,
      "learning_rate": 8.006760541106857e-06,
      "loss": 0.0248,
      "step": 625000
    },
    {
      "epoch": 1.0228589383554918,
      "grad_norm": 0.4491327702999115,
      "learning_rate": 8.006694648893341e-06,
      "loss": 0.0182,
      "step": 625020
    },
    {
      "epoch": 1.0228916687941452,
      "grad_norm": 1.4698349237442017,
      "learning_rate": 8.006628756679825e-06,
      "loss": 0.0216,
      "step": 625040
    },
    {
      "epoch": 1.0229243992327985,
      "grad_norm": 0.7124069929122925,
      "learning_rate": 8.006562864466307e-06,
      "loss": 0.023,
      "step": 625060
    },
    {
      "epoch": 1.022957129671452,
      "grad_norm": 0.1765119731426239,
      "learning_rate": 8.00649697225279e-06,
      "loss": 0.0236,
      "step": 625080
    },
    {
      "epoch": 1.0229898601101053,
      "grad_norm": 0.27182555198669434,
      "learning_rate": 8.006431080039272e-06,
      "loss": 0.0203,
      "step": 625100
    },
    {
      "epoch": 1.0230225905487584,
      "grad_norm": 0.9321435689926147,
      "learning_rate": 8.006365187825756e-06,
      "loss": 0.0252,
      "step": 625120
    },
    {
      "epoch": 1.0230553209874118,
      "grad_norm": 0.26774922013282776,
      "learning_rate": 8.006299295612238e-06,
      "loss": 0.0207,
      "step": 625140
    },
    {
      "epoch": 1.0230880514260652,
      "grad_norm": 0.7407320737838745,
      "learning_rate": 8.006233403398721e-06,
      "loss": 0.0245,
      "step": 625160
    },
    {
      "epoch": 1.0231207818647186,
      "grad_norm": 0.5725851655006409,
      "learning_rate": 8.006167511185205e-06,
      "loss": 0.0362,
      "step": 625180
    },
    {
      "epoch": 1.023153512303372,
      "grad_norm": 0.4213940501213074,
      "learning_rate": 8.006101618971687e-06,
      "loss": 0.0203,
      "step": 625200
    },
    {
      "epoch": 1.0231862427420253,
      "grad_norm": 0.5801243782043457,
      "learning_rate": 8.00603572675817e-06,
      "loss": 0.0258,
      "step": 625220
    },
    {
      "epoch": 1.0232189731806787,
      "grad_norm": 1.0636910200119019,
      "learning_rate": 8.005969834544652e-06,
      "loss": 0.0293,
      "step": 625240
    },
    {
      "epoch": 1.0232517036193318,
      "grad_norm": 0.8052767515182495,
      "learning_rate": 8.005903942331136e-06,
      "loss": 0.0284,
      "step": 625260
    },
    {
      "epoch": 1.0232844340579852,
      "grad_norm": 0.31299540400505066,
      "learning_rate": 8.005838050117618e-06,
      "loss": 0.0156,
      "step": 625280
    },
    {
      "epoch": 1.0233171644966386,
      "grad_norm": 0.378627747297287,
      "learning_rate": 8.005772157904101e-06,
      "loss": 0.015,
      "step": 625300
    },
    {
      "epoch": 1.023349894935292,
      "grad_norm": 0.5495824813842773,
      "learning_rate": 8.005706265690583e-06,
      "loss": 0.0204,
      "step": 625320
    },
    {
      "epoch": 1.0233826253739453,
      "grad_norm": 0.4047989845275879,
      "learning_rate": 8.005640373477067e-06,
      "loss": 0.0263,
      "step": 625340
    },
    {
      "epoch": 1.0234153558125987,
      "grad_norm": 0.23967432975769043,
      "learning_rate": 8.00557448126355e-06,
      "loss": 0.0167,
      "step": 625360
    },
    {
      "epoch": 1.0234480862512518,
      "grad_norm": 0.9853530526161194,
      "learning_rate": 8.005508589050032e-06,
      "loss": 0.0287,
      "step": 625380
    },
    {
      "epoch": 1.0234808166899052,
      "grad_norm": 0.8168850541114807,
      "learning_rate": 8.005442696836516e-06,
      "loss": 0.0233,
      "step": 625400
    },
    {
      "epoch": 1.0235135471285586,
      "grad_norm": 0.49292799830436707,
      "learning_rate": 8.005376804623e-06,
      "loss": 0.0192,
      "step": 625420
    },
    {
      "epoch": 1.023546277567212,
      "grad_norm": 0.49122780561447144,
      "learning_rate": 8.005310912409481e-06,
      "loss": 0.0261,
      "step": 625440
    },
    {
      "epoch": 1.0235790080058653,
      "grad_norm": 2.2500717639923096,
      "learning_rate": 8.005245020195965e-06,
      "loss": 0.0284,
      "step": 625460
    },
    {
      "epoch": 1.0236117384445187,
      "grad_norm": 0.8318456411361694,
      "learning_rate": 8.005179127982447e-06,
      "loss": 0.0272,
      "step": 625480
    },
    {
      "epoch": 1.023644468883172,
      "grad_norm": 0.3812616765499115,
      "learning_rate": 8.00511323576893e-06,
      "loss": 0.0266,
      "step": 625500
    },
    {
      "epoch": 1.0236771993218252,
      "grad_norm": 0.6844154000282288,
      "learning_rate": 8.005047343555414e-06,
      "loss": 0.021,
      "step": 625520
    },
    {
      "epoch": 1.0237099297604786,
      "grad_norm": 2.2058377265930176,
      "learning_rate": 8.004981451341896e-06,
      "loss": 0.0258,
      "step": 625540
    },
    {
      "epoch": 1.023742660199132,
      "grad_norm": 0.9997652173042297,
      "learning_rate": 8.00491555912838e-06,
      "loss": 0.0333,
      "step": 625560
    },
    {
      "epoch": 1.0237753906377853,
      "grad_norm": 1.5111497640609741,
      "learning_rate": 8.004849666914861e-06,
      "loss": 0.0301,
      "step": 625580
    },
    {
      "epoch": 1.0238081210764387,
      "grad_norm": 2.152299404144287,
      "learning_rate": 8.004783774701345e-06,
      "loss": 0.0237,
      "step": 625600
    },
    {
      "epoch": 1.023840851515092,
      "grad_norm": 0.3282630145549774,
      "learning_rate": 8.004717882487827e-06,
      "loss": 0.0215,
      "step": 625620
    },
    {
      "epoch": 1.0238735819537454,
      "grad_norm": 1.317292332649231,
      "learning_rate": 8.00465199027431e-06,
      "loss": 0.0162,
      "step": 625640
    },
    {
      "epoch": 1.0239063123923986,
      "grad_norm": 0.49629467725753784,
      "learning_rate": 8.004586098060792e-06,
      "loss": 0.0308,
      "step": 625660
    },
    {
      "epoch": 1.023939042831052,
      "grad_norm": 1.2486995458602905,
      "learning_rate": 8.004520205847276e-06,
      "loss": 0.0237,
      "step": 625680
    },
    {
      "epoch": 1.0239717732697053,
      "grad_norm": 0.4567444324493408,
      "learning_rate": 8.004454313633758e-06,
      "loss": 0.0253,
      "step": 625700
    },
    {
      "epoch": 1.0240045037083587,
      "grad_norm": 0.3226228356361389,
      "learning_rate": 8.004388421420241e-06,
      "loss": 0.0272,
      "step": 625720
    },
    {
      "epoch": 1.024037234147012,
      "grad_norm": 0.6898571252822876,
      "learning_rate": 8.004322529206723e-06,
      "loss": 0.027,
      "step": 625740
    },
    {
      "epoch": 1.0240699645856655,
      "grad_norm": 1.0321526527404785,
      "learning_rate": 8.004256636993207e-06,
      "loss": 0.0197,
      "step": 625760
    },
    {
      "epoch": 1.0241026950243186,
      "grad_norm": 0.6429665684700012,
      "learning_rate": 8.00419074477969e-06,
      "loss": 0.0257,
      "step": 625780
    },
    {
      "epoch": 1.024135425462972,
      "grad_norm": 1.1924420595169067,
      "learning_rate": 8.004124852566172e-06,
      "loss": 0.0262,
      "step": 625800
    },
    {
      "epoch": 1.0241681559016254,
      "grad_norm": 0.7304521203041077,
      "learning_rate": 8.004058960352656e-06,
      "loss": 0.0231,
      "step": 625820
    },
    {
      "epoch": 1.0242008863402787,
      "grad_norm": 0.6402755975723267,
      "learning_rate": 8.00399306813914e-06,
      "loss": 0.0254,
      "step": 625840
    },
    {
      "epoch": 1.024233616778932,
      "grad_norm": 0.8198480010032654,
      "learning_rate": 8.003927175925621e-06,
      "loss": 0.0297,
      "step": 625860
    },
    {
      "epoch": 1.0242663472175855,
      "grad_norm": 0.8764432072639465,
      "learning_rate": 8.003861283712105e-06,
      "loss": 0.0236,
      "step": 625880
    },
    {
      "epoch": 1.0242990776562388,
      "grad_norm": 0.5653712749481201,
      "learning_rate": 8.003795391498588e-06,
      "loss": 0.0158,
      "step": 625900
    },
    {
      "epoch": 1.024331808094892,
      "grad_norm": 2.032837152481079,
      "learning_rate": 8.00372949928507e-06,
      "loss": 0.0247,
      "step": 625920
    },
    {
      "epoch": 1.0243645385335454,
      "grad_norm": 0.9675953984260559,
      "learning_rate": 8.003663607071554e-06,
      "loss": 0.0205,
      "step": 625940
    },
    {
      "epoch": 1.0243972689721987,
      "grad_norm": 0.6937581300735474,
      "learning_rate": 8.003597714858036e-06,
      "loss": 0.0166,
      "step": 625960
    },
    {
      "epoch": 1.024429999410852,
      "grad_norm": 0.7473134398460388,
      "learning_rate": 8.00353182264452e-06,
      "loss": 0.0242,
      "step": 625980
    },
    {
      "epoch": 1.0244627298495055,
      "grad_norm": 1.2628090381622314,
      "learning_rate": 8.003465930431001e-06,
      "loss": 0.0279,
      "step": 626000
    },
    {
      "epoch": 1.0244954602881589,
      "grad_norm": 0.8684787154197693,
      "learning_rate": 8.003400038217485e-06,
      "loss": 0.0265,
      "step": 626020
    },
    {
      "epoch": 1.0245281907268122,
      "grad_norm": 0.3088054060935974,
      "learning_rate": 8.003334146003967e-06,
      "loss": 0.0245,
      "step": 626040
    },
    {
      "epoch": 1.0245609211654654,
      "grad_norm": 0.7244008183479309,
      "learning_rate": 8.00326825379045e-06,
      "loss": 0.0224,
      "step": 626060
    },
    {
      "epoch": 1.0245936516041187,
      "grad_norm": 2.5866589546203613,
      "learning_rate": 8.003202361576932e-06,
      "loss": 0.0267,
      "step": 626080
    },
    {
      "epoch": 1.0246263820427721,
      "grad_norm": 0.4870997369289398,
      "learning_rate": 8.003136469363416e-06,
      "loss": 0.0178,
      "step": 626100
    },
    {
      "epoch": 1.0246591124814255,
      "grad_norm": 0.32104694843292236,
      "learning_rate": 8.003070577149898e-06,
      "loss": 0.031,
      "step": 626120
    },
    {
      "epoch": 1.0246918429200789,
      "grad_norm": 1.0401520729064941,
      "learning_rate": 8.003004684936381e-06,
      "loss": 0.0317,
      "step": 626140
    },
    {
      "epoch": 1.0247245733587322,
      "grad_norm": 1.0376348495483398,
      "learning_rate": 8.002938792722865e-06,
      "loss": 0.0328,
      "step": 626160
    },
    {
      "epoch": 1.0247573037973854,
      "grad_norm": 0.4270121157169342,
      "learning_rate": 8.002872900509347e-06,
      "loss": 0.0195,
      "step": 626180
    },
    {
      "epoch": 1.0247900342360388,
      "grad_norm": 0.39313793182373047,
      "learning_rate": 8.00280700829583e-06,
      "loss": 0.0297,
      "step": 626200
    },
    {
      "epoch": 1.0248227646746921,
      "grad_norm": 0.4666244089603424,
      "learning_rate": 8.002741116082314e-06,
      "loss": 0.0225,
      "step": 626220
    },
    {
      "epoch": 1.0248554951133455,
      "grad_norm": 0.5633506178855896,
      "learning_rate": 8.002675223868796e-06,
      "loss": 0.021,
      "step": 626240
    },
    {
      "epoch": 1.0248882255519989,
      "grad_norm": 2.135178565979004,
      "learning_rate": 8.00260933165528e-06,
      "loss": 0.0266,
      "step": 626260
    },
    {
      "epoch": 1.0249209559906522,
      "grad_norm": 1.0624808073043823,
      "learning_rate": 8.002543439441763e-06,
      "loss": 0.0251,
      "step": 626280
    },
    {
      "epoch": 1.0249536864293056,
      "grad_norm": 4.995696067810059,
      "learning_rate": 8.002477547228245e-06,
      "loss": 0.0271,
      "step": 626300
    },
    {
      "epoch": 1.0249864168679588,
      "grad_norm": 0.5613435506820679,
      "learning_rate": 8.002411655014728e-06,
      "loss": 0.0292,
      "step": 626320
    },
    {
      "epoch": 1.0250191473066121,
      "grad_norm": 1.122131586074829,
      "learning_rate": 8.00234576280121e-06,
      "loss": 0.0183,
      "step": 626340
    },
    {
      "epoch": 1.0250518777452655,
      "grad_norm": 1.3636815547943115,
      "learning_rate": 8.002279870587694e-06,
      "loss": 0.0272,
      "step": 626360
    },
    {
      "epoch": 1.0250846081839189,
      "grad_norm": 0.1227453276515007,
      "learning_rate": 8.002213978374176e-06,
      "loss": 0.0268,
      "step": 626380
    },
    {
      "epoch": 1.0251173386225723,
      "grad_norm": 1.2922821044921875,
      "learning_rate": 8.00214808616066e-06,
      "loss": 0.0237,
      "step": 626400
    },
    {
      "epoch": 1.0251500690612256,
      "grad_norm": 0.8676868677139282,
      "learning_rate": 8.002082193947141e-06,
      "loss": 0.0243,
      "step": 626420
    },
    {
      "epoch": 1.025182799499879,
      "grad_norm": 0.6525790691375732,
      "learning_rate": 8.002016301733625e-06,
      "loss": 0.0228,
      "step": 626440
    },
    {
      "epoch": 1.0252155299385322,
      "grad_norm": 0.7660719156265259,
      "learning_rate": 8.001950409520107e-06,
      "loss": 0.0379,
      "step": 626460
    },
    {
      "epoch": 1.0252482603771855,
      "grad_norm": 2.126469373703003,
      "learning_rate": 8.00188451730659e-06,
      "loss": 0.0254,
      "step": 626480
    },
    {
      "epoch": 1.025280990815839,
      "grad_norm": 1.2140288352966309,
      "learning_rate": 8.001818625093072e-06,
      "loss": 0.0289,
      "step": 626500
    },
    {
      "epoch": 1.0253137212544923,
      "grad_norm": 0.45724940299987793,
      "learning_rate": 8.001752732879556e-06,
      "loss": 0.0222,
      "step": 626520
    },
    {
      "epoch": 1.0253464516931456,
      "grad_norm": 0.6262626051902771,
      "learning_rate": 8.00168684066604e-06,
      "loss": 0.0238,
      "step": 626540
    },
    {
      "epoch": 1.025379182131799,
      "grad_norm": 0.3919842839241028,
      "learning_rate": 8.001620948452521e-06,
      "loss": 0.0274,
      "step": 626560
    },
    {
      "epoch": 1.0254119125704522,
      "grad_norm": 0.6703901290893555,
      "learning_rate": 8.001555056239005e-06,
      "loss": 0.0306,
      "step": 626580
    },
    {
      "epoch": 1.0254446430091055,
      "grad_norm": 0.4450572729110718,
      "learning_rate": 8.001489164025489e-06,
      "loss": 0.0202,
      "step": 626600
    },
    {
      "epoch": 1.025477373447759,
      "grad_norm": 0.266408234834671,
      "learning_rate": 8.00142327181197e-06,
      "loss": 0.021,
      "step": 626620
    },
    {
      "epoch": 1.0255101038864123,
      "grad_norm": 0.5350415110588074,
      "learning_rate": 8.001357379598454e-06,
      "loss": 0.0215,
      "step": 626640
    },
    {
      "epoch": 1.0255428343250657,
      "grad_norm": 0.5887371301651001,
      "learning_rate": 8.001291487384938e-06,
      "loss": 0.0257,
      "step": 626660
    },
    {
      "epoch": 1.025575564763719,
      "grad_norm": 0.6548394560813904,
      "learning_rate": 8.00122559517142e-06,
      "loss": 0.0261,
      "step": 626680
    },
    {
      "epoch": 1.0256082952023724,
      "grad_norm": 0.19694462418556213,
      "learning_rate": 8.001159702957903e-06,
      "loss": 0.0316,
      "step": 626700
    },
    {
      "epoch": 1.0256410256410255,
      "grad_norm": 0.3072262406349182,
      "learning_rate": 8.001093810744385e-06,
      "loss": 0.0311,
      "step": 626720
    },
    {
      "epoch": 1.025673756079679,
      "grad_norm": 0.3369370698928833,
      "learning_rate": 8.001027918530869e-06,
      "loss": 0.0312,
      "step": 626740
    },
    {
      "epoch": 1.0257064865183323,
      "grad_norm": 0.3238648772239685,
      "learning_rate": 8.00096202631735e-06,
      "loss": 0.0235,
      "step": 626760
    },
    {
      "epoch": 1.0257392169569857,
      "grad_norm": 0.39696210622787476,
      "learning_rate": 8.000896134103834e-06,
      "loss": 0.0383,
      "step": 626780
    },
    {
      "epoch": 1.025771947395639,
      "grad_norm": 0.42713287472724915,
      "learning_rate": 8.000830241890316e-06,
      "loss": 0.0151,
      "step": 626800
    },
    {
      "epoch": 1.0258046778342924,
      "grad_norm": 0.08062797784805298,
      "learning_rate": 8.0007643496768e-06,
      "loss": 0.0326,
      "step": 626820
    },
    {
      "epoch": 1.0258374082729458,
      "grad_norm": 1.2629338502883911,
      "learning_rate": 8.000698457463281e-06,
      "loss": 0.03,
      "step": 626840
    },
    {
      "epoch": 1.025870138711599,
      "grad_norm": 0.9028367400169373,
      "learning_rate": 8.000632565249765e-06,
      "loss": 0.0179,
      "step": 626860
    },
    {
      "epoch": 1.0259028691502523,
      "grad_norm": 0.6115388870239258,
      "learning_rate": 8.000566673036247e-06,
      "loss": 0.0244,
      "step": 626880
    },
    {
      "epoch": 1.0259355995889057,
      "grad_norm": 0.20627783238887787,
      "learning_rate": 8.00050078082273e-06,
      "loss": 0.0295,
      "step": 626900
    },
    {
      "epoch": 1.025968330027559,
      "grad_norm": 0.3547878563404083,
      "learning_rate": 8.000434888609214e-06,
      "loss": 0.0204,
      "step": 626920
    },
    {
      "epoch": 1.0260010604662124,
      "grad_norm": 0.2968188524246216,
      "learning_rate": 8.000368996395696e-06,
      "loss": 0.0277,
      "step": 626940
    },
    {
      "epoch": 1.0260337909048658,
      "grad_norm": 0.07640032470226288,
      "learning_rate": 8.00030310418218e-06,
      "loss": 0.0232,
      "step": 626960
    },
    {
      "epoch": 1.026066521343519,
      "grad_norm": 0.7175748348236084,
      "learning_rate": 8.000237211968661e-06,
      "loss": 0.0269,
      "step": 626980
    },
    {
      "epoch": 1.0260992517821723,
      "grad_norm": 2.0522210597991943,
      "learning_rate": 8.000171319755145e-06,
      "loss": 0.0255,
      "step": 627000
    },
    {
      "epoch": 1.0261319822208257,
      "grad_norm": 2.363400459289551,
      "learning_rate": 8.000105427541629e-06,
      "loss": 0.0312,
      "step": 627020
    },
    {
      "epoch": 1.026164712659479,
      "grad_norm": 0.5819412469863892,
      "learning_rate": 8.00003953532811e-06,
      "loss": 0.026,
      "step": 627040
    },
    {
      "epoch": 1.0261974430981324,
      "grad_norm": 0.5184900760650635,
      "learning_rate": 7.999973643114594e-06,
      "loss": 0.0154,
      "step": 627060
    },
    {
      "epoch": 1.0262301735367858,
      "grad_norm": 0.6334632039070129,
      "learning_rate": 7.999907750901078e-06,
      "loss": 0.0265,
      "step": 627080
    },
    {
      "epoch": 1.0262629039754392,
      "grad_norm": 0.6535117626190186,
      "learning_rate": 7.99984185868756e-06,
      "loss": 0.0232,
      "step": 627100
    },
    {
      "epoch": 1.0262956344140923,
      "grad_norm": 0.38951730728149414,
      "learning_rate": 7.999775966474043e-06,
      "loss": 0.0203,
      "step": 627120
    },
    {
      "epoch": 1.0263283648527457,
      "grad_norm": 1.6585333347320557,
      "learning_rate": 7.999710074260525e-06,
      "loss": 0.0313,
      "step": 627140
    },
    {
      "epoch": 1.026361095291399,
      "grad_norm": 2.1659531593322754,
      "learning_rate": 7.999644182047009e-06,
      "loss": 0.024,
      "step": 627160
    },
    {
      "epoch": 1.0263938257300524,
      "grad_norm": 0.7455751299858093,
      "learning_rate": 7.99957828983349e-06,
      "loss": 0.0178,
      "step": 627180
    },
    {
      "epoch": 1.0264265561687058,
      "grad_norm": 1.91641104221344,
      "learning_rate": 7.999512397619974e-06,
      "loss": 0.016,
      "step": 627200
    },
    {
      "epoch": 1.0264592866073592,
      "grad_norm": 0.7081521153450012,
      "learning_rate": 7.999446505406456e-06,
      "loss": 0.0286,
      "step": 627220
    },
    {
      "epoch": 1.0264920170460123,
      "grad_norm": 1.1633877754211426,
      "learning_rate": 7.99938061319294e-06,
      "loss": 0.0235,
      "step": 627240
    },
    {
      "epoch": 1.0265247474846657,
      "grad_norm": 0.8938809633255005,
      "learning_rate": 7.999314720979421e-06,
      "loss": 0.0255,
      "step": 627260
    },
    {
      "epoch": 1.026557477923319,
      "grad_norm": 0.9459730982780457,
      "learning_rate": 7.999248828765905e-06,
      "loss": 0.0305,
      "step": 627280
    },
    {
      "epoch": 1.0265902083619725,
      "grad_norm": 0.6738806366920471,
      "learning_rate": 7.999182936552389e-06,
      "loss": 0.0195,
      "step": 627300
    },
    {
      "epoch": 1.0266229388006258,
      "grad_norm": 0.28850236535072327,
      "learning_rate": 7.99911704433887e-06,
      "loss": 0.0198,
      "step": 627320
    },
    {
      "epoch": 1.0266556692392792,
      "grad_norm": 0.5298376083374023,
      "learning_rate": 7.999051152125354e-06,
      "loss": 0.033,
      "step": 627340
    },
    {
      "epoch": 1.0266883996779326,
      "grad_norm": 1.2973427772521973,
      "learning_rate": 7.998985259911836e-06,
      "loss": 0.0313,
      "step": 627360
    },
    {
      "epoch": 1.0267211301165857,
      "grad_norm": 1.3065565824508667,
      "learning_rate": 7.99891936769832e-06,
      "loss": 0.0258,
      "step": 627380
    },
    {
      "epoch": 1.026753860555239,
      "grad_norm": 0.26612070202827454,
      "learning_rate": 7.998853475484803e-06,
      "loss": 0.0241,
      "step": 627400
    },
    {
      "epoch": 1.0267865909938925,
      "grad_norm": 0.9662342667579651,
      "learning_rate": 7.998787583271285e-06,
      "loss": 0.0159,
      "step": 627420
    },
    {
      "epoch": 1.0268193214325458,
      "grad_norm": 0.2816471457481384,
      "learning_rate": 7.998721691057769e-06,
      "loss": 0.0107,
      "step": 627440
    },
    {
      "epoch": 1.0268520518711992,
      "grad_norm": 0.4227156639099121,
      "learning_rate": 7.998655798844252e-06,
      "loss": 0.0193,
      "step": 627460
    },
    {
      "epoch": 1.0268847823098526,
      "grad_norm": 1.473987102508545,
      "learning_rate": 7.998589906630734e-06,
      "loss": 0.0332,
      "step": 627480
    },
    {
      "epoch": 1.026917512748506,
      "grad_norm": 0.2511398494243622,
      "learning_rate": 7.998524014417218e-06,
      "loss": 0.0269,
      "step": 627500
    },
    {
      "epoch": 1.026950243187159,
      "grad_norm": 0.6688551306724548,
      "learning_rate": 7.9984581222037e-06,
      "loss": 0.0225,
      "step": 627520
    },
    {
      "epoch": 1.0269829736258125,
      "grad_norm": 0.6412619948387146,
      "learning_rate": 7.998392229990183e-06,
      "loss": 0.0239,
      "step": 627540
    },
    {
      "epoch": 1.0270157040644659,
      "grad_norm": 0.25647884607315063,
      "learning_rate": 7.998326337776665e-06,
      "loss": 0.0253,
      "step": 627560
    },
    {
      "epoch": 1.0270484345031192,
      "grad_norm": 0.6337825059890747,
      "learning_rate": 7.998260445563149e-06,
      "loss": 0.0161,
      "step": 627580
    },
    {
      "epoch": 1.0270811649417726,
      "grad_norm": 0.788362443447113,
      "learning_rate": 7.99819455334963e-06,
      "loss": 0.0159,
      "step": 627600
    },
    {
      "epoch": 1.027113895380426,
      "grad_norm": 0.9362658262252808,
      "learning_rate": 7.998128661136114e-06,
      "loss": 0.0291,
      "step": 627620
    },
    {
      "epoch": 1.0271466258190793,
      "grad_norm": 0.7333886623382568,
      "learning_rate": 7.998062768922598e-06,
      "loss": 0.0292,
      "step": 627640
    },
    {
      "epoch": 1.0271793562577325,
      "grad_norm": 0.5264026522636414,
      "learning_rate": 7.99799687670908e-06,
      "loss": 0.0261,
      "step": 627660
    },
    {
      "epoch": 1.0272120866963859,
      "grad_norm": 0.775676965713501,
      "learning_rate": 7.997930984495563e-06,
      "loss": 0.0276,
      "step": 627680
    },
    {
      "epoch": 1.0272448171350392,
      "grad_norm": 0.24349874258041382,
      "learning_rate": 7.997865092282045e-06,
      "loss": 0.0179,
      "step": 627700
    },
    {
      "epoch": 1.0272775475736926,
      "grad_norm": 0.5664140582084656,
      "learning_rate": 7.997799200068529e-06,
      "loss": 0.0235,
      "step": 627720
    },
    {
      "epoch": 1.027310278012346,
      "grad_norm": 1.0207244157791138,
      "learning_rate": 7.99773330785501e-06,
      "loss": 0.0238,
      "step": 627740
    },
    {
      "epoch": 1.0273430084509994,
      "grad_norm": 3.683393955230713,
      "learning_rate": 7.997667415641494e-06,
      "loss": 0.023,
      "step": 627760
    },
    {
      "epoch": 1.0273757388896525,
      "grad_norm": 0.3581734001636505,
      "learning_rate": 7.997601523427976e-06,
      "loss": 0.0148,
      "step": 627780
    },
    {
      "epoch": 1.0274084693283059,
      "grad_norm": 0.7455745935440063,
      "learning_rate": 7.99753563121446e-06,
      "loss": 0.0275,
      "step": 627800
    },
    {
      "epoch": 1.0274411997669592,
      "grad_norm": 0.6096510291099548,
      "learning_rate": 7.997469739000943e-06,
      "loss": 0.0291,
      "step": 627820
    },
    {
      "epoch": 1.0274739302056126,
      "grad_norm": 0.11703919619321823,
      "learning_rate": 7.997403846787425e-06,
      "loss": 0.0186,
      "step": 627840
    },
    {
      "epoch": 1.027506660644266,
      "grad_norm": 0.3664426803588867,
      "learning_rate": 7.997337954573909e-06,
      "loss": 0.0317,
      "step": 627860
    },
    {
      "epoch": 1.0275393910829194,
      "grad_norm": 1.5452386140823364,
      "learning_rate": 7.997272062360392e-06,
      "loss": 0.0237,
      "step": 627880
    },
    {
      "epoch": 1.0275721215215727,
      "grad_norm": 0.732933759689331,
      "learning_rate": 7.997206170146874e-06,
      "loss": 0.0287,
      "step": 627900
    },
    {
      "epoch": 1.0276048519602259,
      "grad_norm": 0.7361260652542114,
      "learning_rate": 7.997140277933358e-06,
      "loss": 0.0316,
      "step": 627920
    },
    {
      "epoch": 1.0276375823988793,
      "grad_norm": 0.19478482007980347,
      "learning_rate": 7.99707438571984e-06,
      "loss": 0.0203,
      "step": 627940
    },
    {
      "epoch": 1.0276703128375326,
      "grad_norm": 1.2900054454803467,
      "learning_rate": 7.997008493506323e-06,
      "loss": 0.022,
      "step": 627960
    },
    {
      "epoch": 1.027703043276186,
      "grad_norm": 1.176424503326416,
      "learning_rate": 7.996942601292807e-06,
      "loss": 0.0178,
      "step": 627980
    },
    {
      "epoch": 1.0277357737148394,
      "grad_norm": 1.0900322198867798,
      "learning_rate": 7.996876709079289e-06,
      "loss": 0.0267,
      "step": 628000
    },
    {
      "epoch": 1.0277685041534927,
      "grad_norm": 0.9498129487037659,
      "learning_rate": 7.996810816865772e-06,
      "loss": 0.0294,
      "step": 628020
    },
    {
      "epoch": 1.027801234592146,
      "grad_norm": 0.783626914024353,
      "learning_rate": 7.996744924652254e-06,
      "loss": 0.0248,
      "step": 628040
    },
    {
      "epoch": 1.0278339650307993,
      "grad_norm": 1.7746573686599731,
      "learning_rate": 7.996679032438738e-06,
      "loss": 0.0225,
      "step": 628060
    },
    {
      "epoch": 1.0278666954694526,
      "grad_norm": 0.21539723873138428,
      "learning_rate": 7.99661314022522e-06,
      "loss": 0.0257,
      "step": 628080
    },
    {
      "epoch": 1.027899425908106,
      "grad_norm": 1.4957083463668823,
      "learning_rate": 7.996547248011703e-06,
      "loss": 0.0324,
      "step": 628100
    },
    {
      "epoch": 1.0279321563467594,
      "grad_norm": 0.4037673771381378,
      "learning_rate": 7.996481355798185e-06,
      "loss": 0.0201,
      "step": 628120
    },
    {
      "epoch": 1.0279648867854128,
      "grad_norm": 0.3644205629825592,
      "learning_rate": 7.996415463584669e-06,
      "loss": 0.0255,
      "step": 628140
    },
    {
      "epoch": 1.0279976172240661,
      "grad_norm": 0.8408507108688354,
      "learning_rate": 7.99634957137115e-06,
      "loss": 0.0258,
      "step": 628160
    },
    {
      "epoch": 1.0280303476627193,
      "grad_norm": 1.0471220016479492,
      "learning_rate": 7.996283679157634e-06,
      "loss": 0.0175,
      "step": 628180
    },
    {
      "epoch": 1.0280630781013727,
      "grad_norm": 2.288356304168701,
      "learning_rate": 7.996217786944118e-06,
      "loss": 0.0236,
      "step": 628200
    },
    {
      "epoch": 1.028095808540026,
      "grad_norm": 1.4467042684555054,
      "learning_rate": 7.9961518947306e-06,
      "loss": 0.023,
      "step": 628220
    },
    {
      "epoch": 1.0281285389786794,
      "grad_norm": 0.5187520980834961,
      "learning_rate": 7.996086002517083e-06,
      "loss": 0.0306,
      "step": 628240
    },
    {
      "epoch": 1.0281612694173328,
      "grad_norm": 0.4265299141407013,
      "learning_rate": 7.996020110303567e-06,
      "loss": 0.0237,
      "step": 628260
    },
    {
      "epoch": 1.0281939998559861,
      "grad_norm": 0.4299844205379486,
      "learning_rate": 7.995954218090049e-06,
      "loss": 0.0227,
      "step": 628280
    },
    {
      "epoch": 1.0282267302946395,
      "grad_norm": 0.5983567237854004,
      "learning_rate": 7.995888325876532e-06,
      "loss": 0.0306,
      "step": 628300
    },
    {
      "epoch": 1.0282594607332927,
      "grad_norm": 0.32476526498794556,
      "learning_rate": 7.995822433663016e-06,
      "loss": 0.0192,
      "step": 628320
    },
    {
      "epoch": 1.028292191171946,
      "grad_norm": 0.4090033769607544,
      "learning_rate": 7.995756541449498e-06,
      "loss": 0.0188,
      "step": 628340
    },
    {
      "epoch": 1.0283249216105994,
      "grad_norm": 0.8493261337280273,
      "learning_rate": 7.995690649235981e-06,
      "loss": 0.0259,
      "step": 628360
    },
    {
      "epoch": 1.0283576520492528,
      "grad_norm": 1.0304639339447021,
      "learning_rate": 7.995624757022463e-06,
      "loss": 0.0236,
      "step": 628380
    },
    {
      "epoch": 1.0283903824879062,
      "grad_norm": 0.22817906737327576,
      "learning_rate": 7.995558864808947e-06,
      "loss": 0.0294,
      "step": 628400
    },
    {
      "epoch": 1.0284231129265595,
      "grad_norm": 0.43922990560531616,
      "learning_rate": 7.995492972595429e-06,
      "loss": 0.0214,
      "step": 628420
    },
    {
      "epoch": 1.0284558433652127,
      "grad_norm": 0.3049697279930115,
      "learning_rate": 7.995427080381912e-06,
      "loss": 0.0205,
      "step": 628440
    },
    {
      "epoch": 1.028488573803866,
      "grad_norm": 0.8689291477203369,
      "learning_rate": 7.995361188168394e-06,
      "loss": 0.0215,
      "step": 628460
    },
    {
      "epoch": 1.0285213042425194,
      "grad_norm": 0.5578646063804626,
      "learning_rate": 7.995295295954878e-06,
      "loss": 0.0217,
      "step": 628480
    },
    {
      "epoch": 1.0285540346811728,
      "grad_norm": 0.1725436896085739,
      "learning_rate": 7.99522940374136e-06,
      "loss": 0.0254,
      "step": 628500
    },
    {
      "epoch": 1.0285867651198262,
      "grad_norm": 0.9920802116394043,
      "learning_rate": 7.995163511527843e-06,
      "loss": 0.0242,
      "step": 628520
    },
    {
      "epoch": 1.0286194955584795,
      "grad_norm": 1.5440247058868408,
      "learning_rate": 7.995097619314325e-06,
      "loss": 0.0261,
      "step": 628540
    },
    {
      "epoch": 1.028652225997133,
      "grad_norm": 0.0883168950676918,
      "learning_rate": 7.995031727100809e-06,
      "loss": 0.0273,
      "step": 628560
    },
    {
      "epoch": 1.028684956435786,
      "grad_norm": 1.2707215547561646,
      "learning_rate": 7.99496583488729e-06,
      "loss": 0.0202,
      "step": 628580
    },
    {
      "epoch": 1.0287176868744394,
      "grad_norm": 0.3411855399608612,
      "learning_rate": 7.994899942673774e-06,
      "loss": 0.0225,
      "step": 628600
    },
    {
      "epoch": 1.0287504173130928,
      "grad_norm": 0.485256165266037,
      "learning_rate": 7.994834050460258e-06,
      "loss": 0.0204,
      "step": 628620
    },
    {
      "epoch": 1.0287831477517462,
      "grad_norm": 0.7554578185081482,
      "learning_rate": 7.99476815824674e-06,
      "loss": 0.0276,
      "step": 628640
    },
    {
      "epoch": 1.0288158781903995,
      "grad_norm": 1.6745052337646484,
      "learning_rate": 7.994702266033223e-06,
      "loss": 0.0215,
      "step": 628660
    },
    {
      "epoch": 1.028848608629053,
      "grad_norm": 0.3793298304080963,
      "learning_rate": 7.994636373819707e-06,
      "loss": 0.0264,
      "step": 628680
    },
    {
      "epoch": 1.0288813390677063,
      "grad_norm": 2.1159777641296387,
      "learning_rate": 7.994570481606189e-06,
      "loss": 0.0221,
      "step": 628700
    },
    {
      "epoch": 1.0289140695063594,
      "grad_norm": 0.6935148239135742,
      "learning_rate": 7.994504589392672e-06,
      "loss": 0.0258,
      "step": 628720
    },
    {
      "epoch": 1.0289467999450128,
      "grad_norm": 0.3583630919456482,
      "learning_rate": 7.994438697179156e-06,
      "loss": 0.0298,
      "step": 628740
    },
    {
      "epoch": 1.0289795303836662,
      "grad_norm": 0.3605462610721588,
      "learning_rate": 7.994372804965638e-06,
      "loss": 0.0242,
      "step": 628760
    },
    {
      "epoch": 1.0290122608223196,
      "grad_norm": 0.4851061999797821,
      "learning_rate": 7.994306912752122e-06,
      "loss": 0.0236,
      "step": 628780
    },
    {
      "epoch": 1.029044991260973,
      "grad_norm": 0.769138514995575,
      "learning_rate": 7.994241020538603e-06,
      "loss": 0.0296,
      "step": 628800
    },
    {
      "epoch": 1.0290777216996263,
      "grad_norm": 1.4254025220870972,
      "learning_rate": 7.994175128325087e-06,
      "loss": 0.0397,
      "step": 628820
    },
    {
      "epoch": 1.0291104521382795,
      "grad_norm": 1.092426061630249,
      "learning_rate": 7.994109236111569e-06,
      "loss": 0.0269,
      "step": 628840
    },
    {
      "epoch": 1.0291431825769328,
      "grad_norm": 0.903052031993866,
      "learning_rate": 7.994043343898053e-06,
      "loss": 0.0246,
      "step": 628860
    },
    {
      "epoch": 1.0291759130155862,
      "grad_norm": 0.7946969866752625,
      "learning_rate": 7.993977451684534e-06,
      "loss": 0.0213,
      "step": 628880
    },
    {
      "epoch": 1.0292086434542396,
      "grad_norm": 1.0529171228408813,
      "learning_rate": 7.993911559471018e-06,
      "loss": 0.0339,
      "step": 628900
    },
    {
      "epoch": 1.029241373892893,
      "grad_norm": 0.4867667555809021,
      "learning_rate": 7.9938456672575e-06,
      "loss": 0.0251,
      "step": 628920
    },
    {
      "epoch": 1.0292741043315463,
      "grad_norm": 0.3311236798763275,
      "learning_rate": 7.993779775043983e-06,
      "loss": 0.0261,
      "step": 628940
    },
    {
      "epoch": 1.0293068347701997,
      "grad_norm": 1.3837674856185913,
      "learning_rate": 7.993713882830465e-06,
      "loss": 0.0195,
      "step": 628960
    },
    {
      "epoch": 1.0293395652088528,
      "grad_norm": 0.3572867810726166,
      "learning_rate": 7.993647990616949e-06,
      "loss": 0.0313,
      "step": 628980
    },
    {
      "epoch": 1.0293722956475062,
      "grad_norm": 1.04619562625885,
      "learning_rate": 7.993582098403433e-06,
      "loss": 0.0271,
      "step": 629000
    },
    {
      "epoch": 1.0294050260861596,
      "grad_norm": 0.7594192624092102,
      "learning_rate": 7.993516206189914e-06,
      "loss": 0.0303,
      "step": 629020
    },
    {
      "epoch": 1.029437756524813,
      "grad_norm": 0.4696304500102997,
      "learning_rate": 7.993450313976398e-06,
      "loss": 0.0179,
      "step": 629040
    },
    {
      "epoch": 1.0294704869634663,
      "grad_norm": 0.6014072895050049,
      "learning_rate": 7.993384421762882e-06,
      "loss": 0.0336,
      "step": 629060
    },
    {
      "epoch": 1.0295032174021197,
      "grad_norm": 0.6489918231964111,
      "learning_rate": 7.993318529549364e-06,
      "loss": 0.0239,
      "step": 629080
    },
    {
      "epoch": 1.029535947840773,
      "grad_norm": 0.70937180519104,
      "learning_rate": 7.993252637335847e-06,
      "loss": 0.0265,
      "step": 629100
    },
    {
      "epoch": 1.0295686782794262,
      "grad_norm": 0.44511160254478455,
      "learning_rate": 7.99318674512233e-06,
      "loss": 0.0225,
      "step": 629120
    },
    {
      "epoch": 1.0296014087180796,
      "grad_norm": 0.669162392616272,
      "learning_rate": 7.993120852908813e-06,
      "loss": 0.0284,
      "step": 629140
    },
    {
      "epoch": 1.029634139156733,
      "grad_norm": 0.6920326352119446,
      "learning_rate": 7.993054960695296e-06,
      "loss": 0.0189,
      "step": 629160
    },
    {
      "epoch": 1.0296668695953863,
      "grad_norm": 1.0091261863708496,
      "learning_rate": 7.992989068481778e-06,
      "loss": 0.0247,
      "step": 629180
    },
    {
      "epoch": 1.0296996000340397,
      "grad_norm": 2.479264497756958,
      "learning_rate": 7.992923176268262e-06,
      "loss": 0.023,
      "step": 629200
    },
    {
      "epoch": 1.029732330472693,
      "grad_norm": 2.5445544719696045,
      "learning_rate": 7.992857284054744e-06,
      "loss": 0.0311,
      "step": 629220
    },
    {
      "epoch": 1.0297650609113462,
      "grad_norm": 0.5980892181396484,
      "learning_rate": 7.992791391841227e-06,
      "loss": 0.0199,
      "step": 629240
    },
    {
      "epoch": 1.0297977913499996,
      "grad_norm": 0.5538871884346008,
      "learning_rate": 7.992725499627709e-06,
      "loss": 0.023,
      "step": 629260
    },
    {
      "epoch": 1.029830521788653,
      "grad_norm": 1.2467790842056274,
      "learning_rate": 7.992659607414193e-06,
      "loss": 0.0245,
      "step": 629280
    },
    {
      "epoch": 1.0298632522273063,
      "grad_norm": 0.8537368178367615,
      "learning_rate": 7.992593715200674e-06,
      "loss": 0.0265,
      "step": 629300
    },
    {
      "epoch": 1.0298959826659597,
      "grad_norm": 0.7896560430526733,
      "learning_rate": 7.992527822987158e-06,
      "loss": 0.0213,
      "step": 629320
    },
    {
      "epoch": 1.029928713104613,
      "grad_norm": 1.2257575988769531,
      "learning_rate": 7.99246193077364e-06,
      "loss": 0.0254,
      "step": 629340
    },
    {
      "epoch": 1.0299614435432665,
      "grad_norm": 0.348247230052948,
      "learning_rate": 7.992396038560124e-06,
      "loss": 0.0264,
      "step": 629360
    },
    {
      "epoch": 1.0299941739819196,
      "grad_norm": 2.103875160217285,
      "learning_rate": 7.992330146346607e-06,
      "loss": 0.0347,
      "step": 629380
    },
    {
      "epoch": 1.030026904420573,
      "grad_norm": 0.30246490240097046,
      "learning_rate": 7.992264254133089e-06,
      "loss": 0.0189,
      "step": 629400
    },
    {
      "epoch": 1.0300596348592264,
      "grad_norm": 1.2277538776397705,
      "learning_rate": 7.992198361919573e-06,
      "loss": 0.0207,
      "step": 629420
    },
    {
      "epoch": 1.0300923652978797,
      "grad_norm": 2.4915246963500977,
      "learning_rate": 7.992132469706056e-06,
      "loss": 0.022,
      "step": 629440
    },
    {
      "epoch": 1.030125095736533,
      "grad_norm": 2.371838092803955,
      "learning_rate": 7.992066577492538e-06,
      "loss": 0.0254,
      "step": 629460
    },
    {
      "epoch": 1.0301578261751865,
      "grad_norm": 0.7417274117469788,
      "learning_rate": 7.992000685279022e-06,
      "loss": 0.0191,
      "step": 629480
    },
    {
      "epoch": 1.0301905566138398,
      "grad_norm": 0.3726671636104584,
      "learning_rate": 7.991934793065505e-06,
      "loss": 0.0193,
      "step": 629500
    },
    {
      "epoch": 1.030223287052493,
      "grad_norm": 0.7659677863121033,
      "learning_rate": 7.991868900851987e-06,
      "loss": 0.0245,
      "step": 629520
    },
    {
      "epoch": 1.0302560174911464,
      "grad_norm": 0.5928779244422913,
      "learning_rate": 7.99180300863847e-06,
      "loss": 0.0235,
      "step": 629540
    },
    {
      "epoch": 1.0302887479297997,
      "grad_norm": 0.8055369853973389,
      "learning_rate": 7.991737116424953e-06,
      "loss": 0.0209,
      "step": 629560
    },
    {
      "epoch": 1.0303214783684531,
      "grad_norm": 1.1689449548721313,
      "learning_rate": 7.991671224211436e-06,
      "loss": 0.0265,
      "step": 629580
    },
    {
      "epoch": 1.0303542088071065,
      "grad_norm": 1.4826397895812988,
      "learning_rate": 7.991605331997918e-06,
      "loss": 0.0225,
      "step": 629600
    },
    {
      "epoch": 1.0303869392457599,
      "grad_norm": 0.5803695917129517,
      "learning_rate": 7.991539439784402e-06,
      "loss": 0.0241,
      "step": 629620
    },
    {
      "epoch": 1.030419669684413,
      "grad_norm": 0.1642185002565384,
      "learning_rate": 7.991473547570884e-06,
      "loss": 0.0177,
      "step": 629640
    },
    {
      "epoch": 1.0304524001230664,
      "grad_norm": 1.2297031879425049,
      "learning_rate": 7.991407655357367e-06,
      "loss": 0.0236,
      "step": 629660
    },
    {
      "epoch": 1.0304851305617198,
      "grad_norm": 1.101507306098938,
      "learning_rate": 7.991341763143849e-06,
      "loss": 0.0262,
      "step": 629680
    },
    {
      "epoch": 1.0305178610003731,
      "grad_norm": 0.6184694766998291,
      "learning_rate": 7.991275870930333e-06,
      "loss": 0.0262,
      "step": 629700
    },
    {
      "epoch": 1.0305505914390265,
      "grad_norm": 1.0322909355163574,
      "learning_rate": 7.991209978716815e-06,
      "loss": 0.0186,
      "step": 629720
    },
    {
      "epoch": 1.0305833218776799,
      "grad_norm": 0.46538046002388,
      "learning_rate": 7.991144086503298e-06,
      "loss": 0.0229,
      "step": 629740
    },
    {
      "epoch": 1.0306160523163332,
      "grad_norm": 2.1792969703674316,
      "learning_rate": 7.991078194289782e-06,
      "loss": 0.0217,
      "step": 629760
    },
    {
      "epoch": 1.0306487827549864,
      "grad_norm": 0.7455559372901917,
      "learning_rate": 7.991012302076264e-06,
      "loss": 0.0171,
      "step": 629780
    },
    {
      "epoch": 1.0306815131936398,
      "grad_norm": 5.073545932769775,
      "learning_rate": 7.990946409862747e-06,
      "loss": 0.0206,
      "step": 629800
    },
    {
      "epoch": 1.0307142436322931,
      "grad_norm": 1.273877501487732,
      "learning_rate": 7.990880517649229e-06,
      "loss": 0.0241,
      "step": 629820
    },
    {
      "epoch": 1.0307469740709465,
      "grad_norm": 0.7296600937843323,
      "learning_rate": 7.990814625435713e-06,
      "loss": 0.0227,
      "step": 629840
    },
    {
      "epoch": 1.0307797045095999,
      "grad_norm": 1.715715765953064,
      "learning_rate": 7.990748733222196e-06,
      "loss": 0.031,
      "step": 629860
    },
    {
      "epoch": 1.0308124349482533,
      "grad_norm": 0.7161368131637573,
      "learning_rate": 7.990682841008678e-06,
      "loss": 0.0192,
      "step": 629880
    },
    {
      "epoch": 1.0308451653869066,
      "grad_norm": 1.3190349340438843,
      "learning_rate": 7.990616948795162e-06,
      "loss": 0.0259,
      "step": 629900
    },
    {
      "epoch": 1.0308778958255598,
      "grad_norm": 2.5176188945770264,
      "learning_rate": 7.990551056581645e-06,
      "loss": 0.0216,
      "step": 629920
    },
    {
      "epoch": 1.0309106262642131,
      "grad_norm": 0.6189456582069397,
      "learning_rate": 7.990485164368127e-06,
      "loss": 0.0226,
      "step": 629940
    },
    {
      "epoch": 1.0309433567028665,
      "grad_norm": 0.3219544291496277,
      "learning_rate": 7.99041927215461e-06,
      "loss": 0.0259,
      "step": 629960
    },
    {
      "epoch": 1.03097608714152,
      "grad_norm": 1.1454120874404907,
      "learning_rate": 7.990353379941093e-06,
      "loss": 0.0373,
      "step": 629980
    },
    {
      "epoch": 1.0310088175801733,
      "grad_norm": 0.26415976881980896,
      "learning_rate": 7.990287487727576e-06,
      "loss": 0.0266,
      "step": 630000
    },
    {
      "epoch": 1.0310415480188266,
      "grad_norm": 0.52193683385849,
      "learning_rate": 7.990221595514058e-06,
      "loss": 0.0261,
      "step": 630020
    },
    {
      "epoch": 1.0310742784574798,
      "grad_norm": 0.7955138087272644,
      "learning_rate": 7.990155703300542e-06,
      "loss": 0.0233,
      "step": 630040
    },
    {
      "epoch": 1.0311070088961332,
      "grad_norm": 0.6899061799049377,
      "learning_rate": 7.990089811087024e-06,
      "loss": 0.0274,
      "step": 630060
    },
    {
      "epoch": 1.0311397393347865,
      "grad_norm": 0.9207672476768494,
      "learning_rate": 7.990023918873507e-06,
      "loss": 0.0239,
      "step": 630080
    },
    {
      "epoch": 1.03117246977344,
      "grad_norm": 0.7098013162612915,
      "learning_rate": 7.989958026659991e-06,
      "loss": 0.0208,
      "step": 630100
    },
    {
      "epoch": 1.0312052002120933,
      "grad_norm": 0.550613284111023,
      "learning_rate": 7.989892134446473e-06,
      "loss": 0.0233,
      "step": 630120
    },
    {
      "epoch": 1.0312379306507466,
      "grad_norm": 0.8020250201225281,
      "learning_rate": 7.989826242232956e-06,
      "loss": 0.0266,
      "step": 630140
    },
    {
      "epoch": 1.0312706610894,
      "grad_norm": 0.7302792072296143,
      "learning_rate": 7.989760350019438e-06,
      "loss": 0.0251,
      "step": 630160
    },
    {
      "epoch": 1.0313033915280532,
      "grad_norm": 0.34844091534614563,
      "learning_rate": 7.989694457805922e-06,
      "loss": 0.0249,
      "step": 630180
    },
    {
      "epoch": 1.0313361219667065,
      "grad_norm": 1.7577710151672363,
      "learning_rate": 7.989628565592404e-06,
      "loss": 0.0259,
      "step": 630200
    },
    {
      "epoch": 1.03136885240536,
      "grad_norm": 1.358721375465393,
      "learning_rate": 7.989562673378887e-06,
      "loss": 0.028,
      "step": 630220
    },
    {
      "epoch": 1.0314015828440133,
      "grad_norm": 1.3268996477127075,
      "learning_rate": 7.989496781165371e-06,
      "loss": 0.0264,
      "step": 630240
    },
    {
      "epoch": 1.0314343132826667,
      "grad_norm": 0.24952736496925354,
      "learning_rate": 7.989430888951853e-06,
      "loss": 0.0195,
      "step": 630260
    },
    {
      "epoch": 1.03146704372132,
      "grad_norm": 2.1352128982543945,
      "learning_rate": 7.989364996738336e-06,
      "loss": 0.0331,
      "step": 630280
    },
    {
      "epoch": 1.0314997741599732,
      "grad_norm": 0.9178041815757751,
      "learning_rate": 7.98929910452482e-06,
      "loss": 0.0175,
      "step": 630300
    },
    {
      "epoch": 1.0315325045986266,
      "grad_norm": 1.554560661315918,
      "learning_rate": 7.989233212311302e-06,
      "loss": 0.0264,
      "step": 630320
    },
    {
      "epoch": 1.03156523503728,
      "grad_norm": 0.8473882675170898,
      "learning_rate": 7.989167320097785e-06,
      "loss": 0.0172,
      "step": 630340
    },
    {
      "epoch": 1.0315979654759333,
      "grad_norm": 1.4784730672836304,
      "learning_rate": 7.989101427884267e-06,
      "loss": 0.0267,
      "step": 630360
    },
    {
      "epoch": 1.0316306959145867,
      "grad_norm": 1.3854131698608398,
      "learning_rate": 7.989035535670751e-06,
      "loss": 0.0156,
      "step": 630380
    },
    {
      "epoch": 1.03166342635324,
      "grad_norm": 0.7924357652664185,
      "learning_rate": 7.988969643457233e-06,
      "loss": 0.0299,
      "step": 630400
    },
    {
      "epoch": 1.0316961567918934,
      "grad_norm": 1.6594390869140625,
      "learning_rate": 7.988903751243716e-06,
      "loss": 0.0288,
      "step": 630420
    },
    {
      "epoch": 1.0317288872305466,
      "grad_norm": 1.1698317527770996,
      "learning_rate": 7.9888378590302e-06,
      "loss": 0.0258,
      "step": 630440
    },
    {
      "epoch": 1.0317616176692,
      "grad_norm": 0.7464151382446289,
      "learning_rate": 7.988771966816682e-06,
      "loss": 0.0239,
      "step": 630460
    },
    {
      "epoch": 1.0317943481078533,
      "grad_norm": 0.7048298120498657,
      "learning_rate": 7.988706074603165e-06,
      "loss": 0.0195,
      "step": 630480
    },
    {
      "epoch": 1.0318270785465067,
      "grad_norm": 0.6151677370071411,
      "learning_rate": 7.988640182389647e-06,
      "loss": 0.0251,
      "step": 630500
    },
    {
      "epoch": 1.03185980898516,
      "grad_norm": 1.0186747312545776,
      "learning_rate": 7.988574290176131e-06,
      "loss": 0.0161,
      "step": 630520
    },
    {
      "epoch": 1.0318925394238134,
      "grad_norm": 0.5475761890411377,
      "learning_rate": 7.988508397962613e-06,
      "loss": 0.0225,
      "step": 630540
    },
    {
      "epoch": 1.0319252698624668,
      "grad_norm": 0.6270574927330017,
      "learning_rate": 7.988442505749096e-06,
      "loss": 0.027,
      "step": 630560
    },
    {
      "epoch": 1.03195800030112,
      "grad_norm": 0.6647083163261414,
      "learning_rate": 7.988376613535578e-06,
      "loss": 0.0281,
      "step": 630580
    },
    {
      "epoch": 1.0319907307397733,
      "grad_norm": 1.2902939319610596,
      "learning_rate": 7.988310721322062e-06,
      "loss": 0.0231,
      "step": 630600
    },
    {
      "epoch": 1.0320234611784267,
      "grad_norm": 1.0685588121414185,
      "learning_rate": 7.988244829108544e-06,
      "loss": 0.0185,
      "step": 630620
    },
    {
      "epoch": 1.03205619161708,
      "grad_norm": 0.3791767656803131,
      "learning_rate": 7.988178936895027e-06,
      "loss": 0.0241,
      "step": 630640
    },
    {
      "epoch": 1.0320889220557334,
      "grad_norm": 0.5625857710838318,
      "learning_rate": 7.988113044681511e-06,
      "loss": 0.0269,
      "step": 630660
    },
    {
      "epoch": 1.0321216524943868,
      "grad_norm": 2.0896811485290527,
      "learning_rate": 7.988047152467993e-06,
      "loss": 0.0255,
      "step": 630680
    },
    {
      "epoch": 1.0321543829330402,
      "grad_norm": 1.8260488510131836,
      "learning_rate": 7.987981260254476e-06,
      "loss": 0.0261,
      "step": 630700
    },
    {
      "epoch": 1.0321871133716933,
      "grad_norm": 0.48663607239723206,
      "learning_rate": 7.98791536804096e-06,
      "loss": 0.0272,
      "step": 630720
    },
    {
      "epoch": 1.0322198438103467,
      "grad_norm": 1.4444130659103394,
      "learning_rate": 7.987849475827442e-06,
      "loss": 0.0265,
      "step": 630740
    },
    {
      "epoch": 1.032252574249,
      "grad_norm": 1.123557686805725,
      "learning_rate": 7.987783583613926e-06,
      "loss": 0.0287,
      "step": 630760
    },
    {
      "epoch": 1.0322853046876534,
      "grad_norm": 0.6022923588752747,
      "learning_rate": 7.987717691400407e-06,
      "loss": 0.0184,
      "step": 630780
    },
    {
      "epoch": 1.0323180351263068,
      "grad_norm": 0.6354218125343323,
      "learning_rate": 7.987651799186891e-06,
      "loss": 0.0266,
      "step": 630800
    },
    {
      "epoch": 1.0323507655649602,
      "grad_norm": 0.35973814129829407,
      "learning_rate": 7.987585906973375e-06,
      "loss": 0.0201,
      "step": 630820
    },
    {
      "epoch": 1.0323834960036133,
      "grad_norm": 1.1045632362365723,
      "learning_rate": 7.987520014759856e-06,
      "loss": 0.0234,
      "step": 630840
    },
    {
      "epoch": 1.0324162264422667,
      "grad_norm": 0.8894467949867249,
      "learning_rate": 7.98745412254634e-06,
      "loss": 0.0247,
      "step": 630860
    },
    {
      "epoch": 1.03244895688092,
      "grad_norm": 0.5829687714576721,
      "learning_rate": 7.987388230332822e-06,
      "loss": 0.0233,
      "step": 630880
    },
    {
      "epoch": 1.0324816873195735,
      "grad_norm": 0.2184882014989853,
      "learning_rate": 7.987322338119306e-06,
      "loss": 0.0313,
      "step": 630900
    },
    {
      "epoch": 1.0325144177582268,
      "grad_norm": 0.22378666698932648,
      "learning_rate": 7.987256445905787e-06,
      "loss": 0.0228,
      "step": 630920
    },
    {
      "epoch": 1.0325471481968802,
      "grad_norm": 0.3858475983142853,
      "learning_rate": 7.987190553692271e-06,
      "loss": 0.0225,
      "step": 630940
    },
    {
      "epoch": 1.0325798786355336,
      "grad_norm": 3.5487728118896484,
      "learning_rate": 7.987124661478753e-06,
      "loss": 0.0273,
      "step": 630960
    },
    {
      "epoch": 1.0326126090741867,
      "grad_norm": 0.4249304234981537,
      "learning_rate": 7.987058769265236e-06,
      "loss": 0.0227,
      "step": 630980
    },
    {
      "epoch": 1.03264533951284,
      "grad_norm": 0.835258424282074,
      "learning_rate": 7.986992877051718e-06,
      "loss": 0.0253,
      "step": 631000
    },
    {
      "epoch": 1.0326780699514935,
      "grad_norm": 1.2955905199050903,
      "learning_rate": 7.986926984838202e-06,
      "loss": 0.0172,
      "step": 631020
    },
    {
      "epoch": 1.0327108003901468,
      "grad_norm": 1.7613836526870728,
      "learning_rate": 7.986861092624686e-06,
      "loss": 0.02,
      "step": 631040
    },
    {
      "epoch": 1.0327435308288002,
      "grad_norm": 0.8811008930206299,
      "learning_rate": 7.986795200411167e-06,
      "loss": 0.0217,
      "step": 631060
    },
    {
      "epoch": 1.0327762612674536,
      "grad_norm": 1.3588494062423706,
      "learning_rate": 7.986729308197651e-06,
      "loss": 0.0219,
      "step": 631080
    },
    {
      "epoch": 1.0328089917061067,
      "grad_norm": 0.8970102071762085,
      "learning_rate": 7.986663415984135e-06,
      "loss": 0.0245,
      "step": 631100
    },
    {
      "epoch": 1.03284172214476,
      "grad_norm": 0.9271741509437561,
      "learning_rate": 7.986597523770617e-06,
      "loss": 0.0271,
      "step": 631120
    },
    {
      "epoch": 1.0328744525834135,
      "grad_norm": 0.5865534543991089,
      "learning_rate": 7.9865316315571e-06,
      "loss": 0.0272,
      "step": 631140
    },
    {
      "epoch": 1.0329071830220669,
      "grad_norm": 0.5085709691047668,
      "learning_rate": 7.986465739343584e-06,
      "loss": 0.0303,
      "step": 631160
    },
    {
      "epoch": 1.0329399134607202,
      "grad_norm": 1.1351011991500854,
      "learning_rate": 7.986399847130066e-06,
      "loss": 0.0269,
      "step": 631180
    },
    {
      "epoch": 1.0329726438993736,
      "grad_norm": 0.5754473805427551,
      "learning_rate": 7.98633395491655e-06,
      "loss": 0.02,
      "step": 631200
    },
    {
      "epoch": 1.033005374338027,
      "grad_norm": 0.8025310635566711,
      "learning_rate": 7.986268062703031e-06,
      "loss": 0.0237,
      "step": 631220
    },
    {
      "epoch": 1.0330381047766801,
      "grad_norm": 3.9057555198669434,
      "learning_rate": 7.986202170489515e-06,
      "loss": 0.0149,
      "step": 631240
    },
    {
      "epoch": 1.0330708352153335,
      "grad_norm": 0.48828646540641785,
      "learning_rate": 7.986136278275997e-06,
      "loss": 0.024,
      "step": 631260
    },
    {
      "epoch": 1.0331035656539869,
      "grad_norm": 0.6725735068321228,
      "learning_rate": 7.98607038606248e-06,
      "loss": 0.0239,
      "step": 631280
    },
    {
      "epoch": 1.0331362960926402,
      "grad_norm": 0.8744640350341797,
      "learning_rate": 7.986004493848962e-06,
      "loss": 0.0152,
      "step": 631300
    },
    {
      "epoch": 1.0331690265312936,
      "grad_norm": 1.1391633749008179,
      "learning_rate": 7.985938601635446e-06,
      "loss": 0.0214,
      "step": 631320
    },
    {
      "epoch": 1.033201756969947,
      "grad_norm": 2.2233059406280518,
      "learning_rate": 7.985872709421927e-06,
      "loss": 0.0273,
      "step": 631340
    },
    {
      "epoch": 1.0332344874086004,
      "grad_norm": 0.3486408591270447,
      "learning_rate": 7.985806817208411e-06,
      "loss": 0.0299,
      "step": 631360
    },
    {
      "epoch": 1.0332672178472535,
      "grad_norm": 0.9356427192687988,
      "learning_rate": 7.985740924994893e-06,
      "loss": 0.0207,
      "step": 631380
    },
    {
      "epoch": 1.0332999482859069,
      "grad_norm": 0.26455262303352356,
      "learning_rate": 7.985675032781377e-06,
      "loss": 0.0217,
      "step": 631400
    },
    {
      "epoch": 1.0333326787245602,
      "grad_norm": 0.21601273119449615,
      "learning_rate": 7.985609140567858e-06,
      "loss": 0.0147,
      "step": 631420
    },
    {
      "epoch": 1.0333654091632136,
      "grad_norm": 1.0472732782363892,
      "learning_rate": 7.985543248354342e-06,
      "loss": 0.0285,
      "step": 631440
    },
    {
      "epoch": 1.033398139601867,
      "grad_norm": 0.3373474180698395,
      "learning_rate": 7.985477356140826e-06,
      "loss": 0.0309,
      "step": 631460
    },
    {
      "epoch": 1.0334308700405204,
      "grad_norm": 0.41119688749313354,
      "learning_rate": 7.985411463927308e-06,
      "loss": 0.0194,
      "step": 631480
    },
    {
      "epoch": 1.0334636004791735,
      "grad_norm": 0.29831013083457947,
      "learning_rate": 7.985345571713791e-06,
      "loss": 0.0232,
      "step": 631500
    },
    {
      "epoch": 1.0334963309178269,
      "grad_norm": 2.15731143951416,
      "learning_rate": 7.985279679500275e-06,
      "loss": 0.0181,
      "step": 631520
    },
    {
      "epoch": 1.0335290613564803,
      "grad_norm": 0.8564084768295288,
      "learning_rate": 7.985213787286758e-06,
      "loss": 0.0191,
      "step": 631540
    },
    {
      "epoch": 1.0335617917951336,
      "grad_norm": 0.40434566140174866,
      "learning_rate": 7.98514789507324e-06,
      "loss": 0.0236,
      "step": 631560
    },
    {
      "epoch": 1.033594522233787,
      "grad_norm": 0.8722676038742065,
      "learning_rate": 7.985082002859724e-06,
      "loss": 0.0309,
      "step": 631580
    },
    {
      "epoch": 1.0336272526724404,
      "grad_norm": 0.6266977190971375,
      "learning_rate": 7.985016110646206e-06,
      "loss": 0.0195,
      "step": 631600
    },
    {
      "epoch": 1.0336599831110937,
      "grad_norm": 0.3697849214076996,
      "learning_rate": 7.98495021843269e-06,
      "loss": 0.0273,
      "step": 631620
    },
    {
      "epoch": 1.033692713549747,
      "grad_norm": 0.3229845464229584,
      "learning_rate": 7.984884326219171e-06,
      "loss": 0.0242,
      "step": 631640
    },
    {
      "epoch": 1.0337254439884003,
      "grad_norm": 0.5902789831161499,
      "learning_rate": 7.984818434005655e-06,
      "loss": 0.0273,
      "step": 631660
    },
    {
      "epoch": 1.0337581744270536,
      "grad_norm": 0.3800279498100281,
      "learning_rate": 7.984752541792137e-06,
      "loss": 0.0298,
      "step": 631680
    },
    {
      "epoch": 1.033790904865707,
      "grad_norm": 0.2940707802772522,
      "learning_rate": 7.98468664957862e-06,
      "loss": 0.023,
      "step": 631700
    },
    {
      "epoch": 1.0338236353043604,
      "grad_norm": 0.7641664147377014,
      "learning_rate": 7.984620757365102e-06,
      "loss": 0.0317,
      "step": 631720
    },
    {
      "epoch": 1.0338563657430138,
      "grad_norm": 2.724040985107422,
      "learning_rate": 7.984554865151586e-06,
      "loss": 0.0267,
      "step": 631740
    },
    {
      "epoch": 1.0338890961816671,
      "grad_norm": 1.0793190002441406,
      "learning_rate": 7.984488972938068e-06,
      "loss": 0.0209,
      "step": 631760
    },
    {
      "epoch": 1.0339218266203203,
      "grad_norm": 1.1451905965805054,
      "learning_rate": 7.984423080724551e-06,
      "loss": 0.0264,
      "step": 631780
    },
    {
      "epoch": 1.0339545570589737,
      "grad_norm": 1.3138102293014526,
      "learning_rate": 7.984357188511033e-06,
      "loss": 0.0317,
      "step": 631800
    },
    {
      "epoch": 1.033987287497627,
      "grad_norm": 0.336605042219162,
      "learning_rate": 7.984291296297517e-06,
      "loss": 0.0273,
      "step": 631820
    },
    {
      "epoch": 1.0340200179362804,
      "grad_norm": 0.6896088719367981,
      "learning_rate": 7.984225404084e-06,
      "loss": 0.0245,
      "step": 631840
    },
    {
      "epoch": 1.0340527483749338,
      "grad_norm": 1.712099313735962,
      "learning_rate": 7.984159511870482e-06,
      "loss": 0.0211,
      "step": 631860
    },
    {
      "epoch": 1.0340854788135871,
      "grad_norm": 0.37223851680755615,
      "learning_rate": 7.984093619656966e-06,
      "loss": 0.0157,
      "step": 631880
    },
    {
      "epoch": 1.0341182092522403,
      "grad_norm": 1.3678677082061768,
      "learning_rate": 7.98402772744345e-06,
      "loss": 0.0306,
      "step": 631900
    },
    {
      "epoch": 1.0341509396908937,
      "grad_norm": 0.5293753147125244,
      "learning_rate": 7.983961835229931e-06,
      "loss": 0.0232,
      "step": 631920
    },
    {
      "epoch": 1.034183670129547,
      "grad_norm": 1.743417501449585,
      "learning_rate": 7.983895943016415e-06,
      "loss": 0.0225,
      "step": 631940
    },
    {
      "epoch": 1.0342164005682004,
      "grad_norm": 0.8728868365287781,
      "learning_rate": 7.983830050802898e-06,
      "loss": 0.0343,
      "step": 631960
    },
    {
      "epoch": 1.0342491310068538,
      "grad_norm": 1.4183639287948608,
      "learning_rate": 7.98376415858938e-06,
      "loss": 0.0223,
      "step": 631980
    },
    {
      "epoch": 1.0342818614455072,
      "grad_norm": 0.9934796690940857,
      "learning_rate": 7.983698266375864e-06,
      "loss": 0.0336,
      "step": 632000
    },
    {
      "epoch": 1.0343145918841605,
      "grad_norm": 0.3816174864768982,
      "learning_rate": 7.983632374162346e-06,
      "loss": 0.0194,
      "step": 632020
    },
    {
      "epoch": 1.0343473223228137,
      "grad_norm": 0.8317691087722778,
      "learning_rate": 7.98356648194883e-06,
      "loss": 0.0248,
      "step": 632040
    },
    {
      "epoch": 1.034380052761467,
      "grad_norm": 1.7056804895401,
      "learning_rate": 7.983500589735311e-06,
      "loss": 0.0209,
      "step": 632060
    },
    {
      "epoch": 1.0344127832001204,
      "grad_norm": 0.299850732088089,
      "learning_rate": 7.983434697521795e-06,
      "loss": 0.0296,
      "step": 632080
    },
    {
      "epoch": 1.0344455136387738,
      "grad_norm": 0.5834935903549194,
      "learning_rate": 7.983368805308277e-06,
      "loss": 0.0215,
      "step": 632100
    },
    {
      "epoch": 1.0344782440774272,
      "grad_norm": 0.24780604243278503,
      "learning_rate": 7.98330291309476e-06,
      "loss": 0.0231,
      "step": 632120
    },
    {
      "epoch": 1.0345109745160805,
      "grad_norm": 0.38457348942756653,
      "learning_rate": 7.983237020881242e-06,
      "loss": 0.0228,
      "step": 632140
    },
    {
      "epoch": 1.034543704954734,
      "grad_norm": 0.2802947163581848,
      "learning_rate": 7.983171128667726e-06,
      "loss": 0.0195,
      "step": 632160
    },
    {
      "epoch": 1.034576435393387,
      "grad_norm": 0.2860170304775238,
      "learning_rate": 7.983105236454208e-06,
      "loss": 0.0202,
      "step": 632180
    },
    {
      "epoch": 1.0346091658320404,
      "grad_norm": 0.7369224429130554,
      "learning_rate": 7.983039344240691e-06,
      "loss": 0.0164,
      "step": 632200
    },
    {
      "epoch": 1.0346418962706938,
      "grad_norm": 0.3256954550743103,
      "learning_rate": 7.982973452027175e-06,
      "loss": 0.0257,
      "step": 632220
    },
    {
      "epoch": 1.0346746267093472,
      "grad_norm": 0.8355650901794434,
      "learning_rate": 7.982907559813657e-06,
      "loss": 0.0182,
      "step": 632240
    },
    {
      "epoch": 1.0347073571480006,
      "grad_norm": 0.46065446734428406,
      "learning_rate": 7.98284166760014e-06,
      "loss": 0.0251,
      "step": 632260
    },
    {
      "epoch": 1.034740087586654,
      "grad_norm": 1.1348729133605957,
      "learning_rate": 7.982775775386624e-06,
      "loss": 0.0286,
      "step": 632280
    },
    {
      "epoch": 1.034772818025307,
      "grad_norm": 0.5663670897483826,
      "learning_rate": 7.982709883173106e-06,
      "loss": 0.0199,
      "step": 632300
    },
    {
      "epoch": 1.0348055484639604,
      "grad_norm": 0.4133518934249878,
      "learning_rate": 7.98264399095959e-06,
      "loss": 0.0244,
      "step": 632320
    },
    {
      "epoch": 1.0348382789026138,
      "grad_norm": 3.393404006958008,
      "learning_rate": 7.982578098746073e-06,
      "loss": 0.0316,
      "step": 632340
    },
    {
      "epoch": 1.0348710093412672,
      "grad_norm": 0.5329160094261169,
      "learning_rate": 7.982512206532555e-06,
      "loss": 0.0179,
      "step": 632360
    },
    {
      "epoch": 1.0349037397799206,
      "grad_norm": 0.8133667707443237,
      "learning_rate": 7.982446314319038e-06,
      "loss": 0.0268,
      "step": 632380
    },
    {
      "epoch": 1.034936470218574,
      "grad_norm": 0.3438583016395569,
      "learning_rate": 7.98238042210552e-06,
      "loss": 0.0169,
      "step": 632400
    },
    {
      "epoch": 1.0349692006572273,
      "grad_norm": 0.44908735156059265,
      "learning_rate": 7.982314529892004e-06,
      "loss": 0.0263,
      "step": 632420
    },
    {
      "epoch": 1.0350019310958805,
      "grad_norm": 2.488158941268921,
      "learning_rate": 7.982248637678486e-06,
      "loss": 0.0319,
      "step": 632440
    },
    {
      "epoch": 1.0350346615345338,
      "grad_norm": 0.26910004019737244,
      "learning_rate": 7.98218274546497e-06,
      "loss": 0.0248,
      "step": 632460
    },
    {
      "epoch": 1.0350673919731872,
      "grad_norm": 0.4880842864513397,
      "learning_rate": 7.982116853251451e-06,
      "loss": 0.0221,
      "step": 632480
    },
    {
      "epoch": 1.0351001224118406,
      "grad_norm": 1.0843064785003662,
      "learning_rate": 7.982050961037935e-06,
      "loss": 0.0269,
      "step": 632500
    },
    {
      "epoch": 1.035132852850494,
      "grad_norm": 0.9786447882652283,
      "learning_rate": 7.981985068824417e-06,
      "loss": 0.0221,
      "step": 632520
    },
    {
      "epoch": 1.0351655832891473,
      "grad_norm": 0.22159124910831451,
      "learning_rate": 7.9819191766109e-06,
      "loss": 0.0209,
      "step": 632540
    },
    {
      "epoch": 1.0351983137278005,
      "grad_norm": 0.34099307656288147,
      "learning_rate": 7.981853284397384e-06,
      "loss": 0.028,
      "step": 632560
    },
    {
      "epoch": 1.0352310441664538,
      "grad_norm": 0.961857259273529,
      "learning_rate": 7.981787392183866e-06,
      "loss": 0.0211,
      "step": 632580
    },
    {
      "epoch": 1.0352637746051072,
      "grad_norm": 1.003448486328125,
      "learning_rate": 7.98172149997035e-06,
      "loss": 0.0245,
      "step": 632600
    },
    {
      "epoch": 1.0352965050437606,
      "grad_norm": 1.5327210426330566,
      "learning_rate": 7.981655607756831e-06,
      "loss": 0.0179,
      "step": 632620
    },
    {
      "epoch": 1.035329235482414,
      "grad_norm": 0.30293402075767517,
      "learning_rate": 7.981589715543315e-06,
      "loss": 0.0178,
      "step": 632640
    },
    {
      "epoch": 1.0353619659210673,
      "grad_norm": 0.8824048638343811,
      "learning_rate": 7.981523823329797e-06,
      "loss": 0.0285,
      "step": 632660
    },
    {
      "epoch": 1.0353946963597207,
      "grad_norm": 0.20538602769374847,
      "learning_rate": 7.98145793111628e-06,
      "loss": 0.0194,
      "step": 632680
    },
    {
      "epoch": 1.0354274267983739,
      "grad_norm": 1.9018064737319946,
      "learning_rate": 7.981392038902764e-06,
      "loss": 0.0303,
      "step": 632700
    },
    {
      "epoch": 1.0354601572370272,
      "grad_norm": 1.4782495498657227,
      "learning_rate": 7.981326146689246e-06,
      "loss": 0.0246,
      "step": 632720
    },
    {
      "epoch": 1.0354928876756806,
      "grad_norm": 1.5998671054840088,
      "learning_rate": 7.98126025447573e-06,
      "loss": 0.0217,
      "step": 632740
    },
    {
      "epoch": 1.035525618114334,
      "grad_norm": 0.3358275890350342,
      "learning_rate": 7.981194362262213e-06,
      "loss": 0.018,
      "step": 632760
    },
    {
      "epoch": 1.0355583485529873,
      "grad_norm": 0.35964247584342957,
      "learning_rate": 7.981128470048695e-06,
      "loss": 0.0225,
      "step": 632780
    },
    {
      "epoch": 1.0355910789916407,
      "grad_norm": 0.608662486076355,
      "learning_rate": 7.981062577835179e-06,
      "loss": 0.0156,
      "step": 632800
    },
    {
      "epoch": 1.035623809430294,
      "grad_norm": 0.3269917666912079,
      "learning_rate": 7.98099668562166e-06,
      "loss": 0.0273,
      "step": 632820
    },
    {
      "epoch": 1.0356565398689472,
      "grad_norm": 0.39655908942222595,
      "learning_rate": 7.980930793408144e-06,
      "loss": 0.021,
      "step": 632840
    },
    {
      "epoch": 1.0356892703076006,
      "grad_norm": 0.7650326490402222,
      "learning_rate": 7.980864901194626e-06,
      "loss": 0.0183,
      "step": 632860
    },
    {
      "epoch": 1.035722000746254,
      "grad_norm": 0.23526796698570251,
      "learning_rate": 7.98079900898111e-06,
      "loss": 0.0173,
      "step": 632880
    },
    {
      "epoch": 1.0357547311849074,
      "grad_norm": 0.33956190943717957,
      "learning_rate": 7.980733116767593e-06,
      "loss": 0.0226,
      "step": 632900
    },
    {
      "epoch": 1.0357874616235607,
      "grad_norm": 0.9286637306213379,
      "learning_rate": 7.980667224554075e-06,
      "loss": 0.025,
      "step": 632920
    },
    {
      "epoch": 1.035820192062214,
      "grad_norm": 0.30383700132369995,
      "learning_rate": 7.980601332340559e-06,
      "loss": 0.0273,
      "step": 632940
    },
    {
      "epoch": 1.0358529225008675,
      "grad_norm": 0.6916303634643555,
      "learning_rate": 7.98053544012704e-06,
      "loss": 0.0199,
      "step": 632960
    },
    {
      "epoch": 1.0358856529395206,
      "grad_norm": 0.20986588299274445,
      "learning_rate": 7.980469547913524e-06,
      "loss": 0.0223,
      "step": 632980
    },
    {
      "epoch": 1.035918383378174,
      "grad_norm": 0.8830609917640686,
      "learning_rate": 7.980403655700006e-06,
      "loss": 0.0283,
      "step": 633000
    },
    {
      "epoch": 1.0359511138168274,
      "grad_norm": 1.0172802209854126,
      "learning_rate": 7.98033776348649e-06,
      "loss": 0.0302,
      "step": 633020
    },
    {
      "epoch": 1.0359838442554807,
      "grad_norm": 1.2559819221496582,
      "learning_rate": 7.980271871272971e-06,
      "loss": 0.0252,
      "step": 633040
    },
    {
      "epoch": 1.036016574694134,
      "grad_norm": 0.5781093239784241,
      "learning_rate": 7.980205979059455e-06,
      "loss": 0.0226,
      "step": 633060
    },
    {
      "epoch": 1.0360493051327875,
      "grad_norm": 0.4288812577724457,
      "learning_rate": 7.980140086845939e-06,
      "loss": 0.0206,
      "step": 633080
    },
    {
      "epoch": 1.0360820355714406,
      "grad_norm": 0.3509998023509979,
      "learning_rate": 7.98007419463242e-06,
      "loss": 0.0135,
      "step": 633100
    },
    {
      "epoch": 1.036114766010094,
      "grad_norm": 0.938312292098999,
      "learning_rate": 7.980008302418904e-06,
      "loss": 0.0206,
      "step": 633120
    },
    {
      "epoch": 1.0361474964487474,
      "grad_norm": 0.6145842671394348,
      "learning_rate": 7.979942410205388e-06,
      "loss": 0.0255,
      "step": 633140
    },
    {
      "epoch": 1.0361802268874007,
      "grad_norm": 0.26114022731781006,
      "learning_rate": 7.97987651799187e-06,
      "loss": 0.029,
      "step": 633160
    },
    {
      "epoch": 1.0362129573260541,
      "grad_norm": 0.3496348261833191,
      "learning_rate": 7.979810625778353e-06,
      "loss": 0.0268,
      "step": 633180
    },
    {
      "epoch": 1.0362456877647075,
      "grad_norm": 0.4489469528198242,
      "learning_rate": 7.979744733564835e-06,
      "loss": 0.0225,
      "step": 633200
    },
    {
      "epoch": 1.0362784182033609,
      "grad_norm": 0.3950493633747101,
      "learning_rate": 7.979678841351319e-06,
      "loss": 0.0278,
      "step": 633220
    },
    {
      "epoch": 1.036311148642014,
      "grad_norm": 1.4070364236831665,
      "learning_rate": 7.9796129491378e-06,
      "loss": 0.027,
      "step": 633240
    },
    {
      "epoch": 1.0363438790806674,
      "grad_norm": 0.7089062929153442,
      "learning_rate": 7.979547056924284e-06,
      "loss": 0.0186,
      "step": 633260
    },
    {
      "epoch": 1.0363766095193208,
      "grad_norm": 0.4549848437309265,
      "learning_rate": 7.979481164710768e-06,
      "loss": 0.0223,
      "step": 633280
    },
    {
      "epoch": 1.0364093399579741,
      "grad_norm": 0.9309259057044983,
      "learning_rate": 7.97941527249725e-06,
      "loss": 0.0196,
      "step": 633300
    },
    {
      "epoch": 1.0364420703966275,
      "grad_norm": 0.49751558899879456,
      "learning_rate": 7.979349380283733e-06,
      "loss": 0.0242,
      "step": 633320
    },
    {
      "epoch": 1.0364748008352809,
      "grad_norm": 0.3945072889328003,
      "learning_rate": 7.979283488070215e-06,
      "loss": 0.0272,
      "step": 633340
    },
    {
      "epoch": 1.036507531273934,
      "grad_norm": 1.4630380868911743,
      "learning_rate": 7.979217595856699e-06,
      "loss": 0.0276,
      "step": 633360
    },
    {
      "epoch": 1.0365402617125874,
      "grad_norm": 1.4900131225585938,
      "learning_rate": 7.97915170364318e-06,
      "loss": 0.0321,
      "step": 633380
    },
    {
      "epoch": 1.0365729921512408,
      "grad_norm": 1.104240894317627,
      "learning_rate": 7.979085811429664e-06,
      "loss": 0.0219,
      "step": 633400
    },
    {
      "epoch": 1.0366057225898941,
      "grad_norm": 0.13348916172981262,
      "learning_rate": 7.979019919216146e-06,
      "loss": 0.02,
      "step": 633420
    },
    {
      "epoch": 1.0366384530285475,
      "grad_norm": 0.425429105758667,
      "learning_rate": 7.97895402700263e-06,
      "loss": 0.0222,
      "step": 633440
    },
    {
      "epoch": 1.0366711834672009,
      "grad_norm": 1.6808645725250244,
      "learning_rate": 7.978888134789111e-06,
      "loss": 0.0263,
      "step": 633460
    },
    {
      "epoch": 1.0367039139058543,
      "grad_norm": 1.992138385772705,
      "learning_rate": 7.978822242575595e-06,
      "loss": 0.0298,
      "step": 633480
    },
    {
      "epoch": 1.0367366443445074,
      "grad_norm": 0.16237321496009827,
      "learning_rate": 7.978756350362079e-06,
      "loss": 0.0227,
      "step": 633500
    },
    {
      "epoch": 1.0367693747831608,
      "grad_norm": 0.408159464597702,
      "learning_rate": 7.97869045814856e-06,
      "loss": 0.033,
      "step": 633520
    },
    {
      "epoch": 1.0368021052218142,
      "grad_norm": 0.2941608130931854,
      "learning_rate": 7.978624565935044e-06,
      "loss": 0.021,
      "step": 633540
    },
    {
      "epoch": 1.0368348356604675,
      "grad_norm": 0.7822009921073914,
      "learning_rate": 7.978558673721528e-06,
      "loss": 0.0308,
      "step": 633560
    },
    {
      "epoch": 1.036867566099121,
      "grad_norm": 1.6418243646621704,
      "learning_rate": 7.97849278150801e-06,
      "loss": 0.0361,
      "step": 633580
    },
    {
      "epoch": 1.0369002965377743,
      "grad_norm": 0.3430505394935608,
      "learning_rate": 7.978426889294493e-06,
      "loss": 0.0205,
      "step": 633600
    },
    {
      "epoch": 1.0369330269764276,
      "grad_norm": 0.7839996218681335,
      "learning_rate": 7.978360997080977e-06,
      "loss": 0.0226,
      "step": 633620
    },
    {
      "epoch": 1.0369657574150808,
      "grad_norm": 1.6590861082077026,
      "learning_rate": 7.978295104867459e-06,
      "loss": 0.0288,
      "step": 633640
    },
    {
      "epoch": 1.0369984878537342,
      "grad_norm": 0.5589578151702881,
      "learning_rate": 7.978229212653942e-06,
      "loss": 0.0269,
      "step": 633660
    },
    {
      "epoch": 1.0370312182923875,
      "grad_norm": 2.034726619720459,
      "learning_rate": 7.978163320440424e-06,
      "loss": 0.0254,
      "step": 633680
    },
    {
      "epoch": 1.037063948731041,
      "grad_norm": 0.7236777544021606,
      "learning_rate": 7.978097428226908e-06,
      "loss": 0.0162,
      "step": 633700
    },
    {
      "epoch": 1.0370966791696943,
      "grad_norm": 0.2756901681423187,
      "learning_rate": 7.97803153601339e-06,
      "loss": 0.0206,
      "step": 633720
    },
    {
      "epoch": 1.0371294096083477,
      "grad_norm": 0.4348454773426056,
      "learning_rate": 7.977965643799873e-06,
      "loss": 0.0262,
      "step": 633740
    },
    {
      "epoch": 1.037162140047001,
      "grad_norm": 0.2252575010061264,
      "learning_rate": 7.977899751586355e-06,
      "loss": 0.0182,
      "step": 633760
    },
    {
      "epoch": 1.0371948704856542,
      "grad_norm": 1.0045101642608643,
      "learning_rate": 7.977833859372839e-06,
      "loss": 0.0259,
      "step": 633780
    },
    {
      "epoch": 1.0372276009243075,
      "grad_norm": 0.8932802081108093,
      "learning_rate": 7.97776796715932e-06,
      "loss": 0.0247,
      "step": 633800
    },
    {
      "epoch": 1.037260331362961,
      "grad_norm": 0.18015609681606293,
      "learning_rate": 7.977702074945804e-06,
      "loss": 0.0374,
      "step": 633820
    },
    {
      "epoch": 1.0372930618016143,
      "grad_norm": 1.000493049621582,
      "learning_rate": 7.977636182732286e-06,
      "loss": 0.0308,
      "step": 633840
    },
    {
      "epoch": 1.0373257922402677,
      "grad_norm": 1.8208004236221313,
      "learning_rate": 7.97757029051877e-06,
      "loss": 0.0331,
      "step": 633860
    },
    {
      "epoch": 1.037358522678921,
      "grad_norm": 0.47187700867652893,
      "learning_rate": 7.977504398305253e-06,
      "loss": 0.029,
      "step": 633880
    },
    {
      "epoch": 1.0373912531175742,
      "grad_norm": 1.5132577419281006,
      "learning_rate": 7.977438506091735e-06,
      "loss": 0.0218,
      "step": 633900
    },
    {
      "epoch": 1.0374239835562276,
      "grad_norm": 2.2528624534606934,
      "learning_rate": 7.977372613878219e-06,
      "loss": 0.0202,
      "step": 633920
    },
    {
      "epoch": 1.037456713994881,
      "grad_norm": 0.5416388511657715,
      "learning_rate": 7.977306721664702e-06,
      "loss": 0.0257,
      "step": 633940
    },
    {
      "epoch": 1.0374894444335343,
      "grad_norm": 0.5382673740386963,
      "learning_rate": 7.977240829451184e-06,
      "loss": 0.0177,
      "step": 633960
    },
    {
      "epoch": 1.0375221748721877,
      "grad_norm": 0.4310952126979828,
      "learning_rate": 7.977174937237668e-06,
      "loss": 0.0215,
      "step": 633980
    },
    {
      "epoch": 1.037554905310841,
      "grad_norm": 0.11047891527414322,
      "learning_rate": 7.977109045024151e-06,
      "loss": 0.0174,
      "step": 634000
    },
    {
      "epoch": 1.0375876357494944,
      "grad_norm": 0.41516128182411194,
      "learning_rate": 7.977043152810633e-06,
      "loss": 0.019,
      "step": 634020
    },
    {
      "epoch": 1.0376203661881476,
      "grad_norm": 0.5038798451423645,
      "learning_rate": 7.976977260597117e-06,
      "loss": 0.0218,
      "step": 634040
    },
    {
      "epoch": 1.037653096626801,
      "grad_norm": 0.4798121452331543,
      "learning_rate": 7.976911368383599e-06,
      "loss": 0.0109,
      "step": 634060
    },
    {
      "epoch": 1.0376858270654543,
      "grad_norm": 0.5970739722251892,
      "learning_rate": 7.976845476170082e-06,
      "loss": 0.0152,
      "step": 634080
    },
    {
      "epoch": 1.0377185575041077,
      "grad_norm": 0.8731133341789246,
      "learning_rate": 7.976779583956564e-06,
      "loss": 0.0237,
      "step": 634100
    },
    {
      "epoch": 1.037751287942761,
      "grad_norm": 2.042830228805542,
      "learning_rate": 7.976713691743048e-06,
      "loss": 0.0285,
      "step": 634120
    },
    {
      "epoch": 1.0377840183814144,
      "grad_norm": 0.4937235713005066,
      "learning_rate": 7.97664779952953e-06,
      "loss": 0.0242,
      "step": 634140
    },
    {
      "epoch": 1.0378167488200676,
      "grad_norm": 0.556005597114563,
      "learning_rate": 7.976581907316013e-06,
      "loss": 0.0237,
      "step": 634160
    },
    {
      "epoch": 1.037849479258721,
      "grad_norm": 0.5414639711380005,
      "learning_rate": 7.976516015102495e-06,
      "loss": 0.0223,
      "step": 634180
    },
    {
      "epoch": 1.0378822096973743,
      "grad_norm": 1.0617257356643677,
      "learning_rate": 7.976450122888979e-06,
      "loss": 0.0193,
      "step": 634200
    },
    {
      "epoch": 1.0379149401360277,
      "grad_norm": 0.2028605043888092,
      "learning_rate": 7.97638423067546e-06,
      "loss": 0.0221,
      "step": 634220
    },
    {
      "epoch": 1.037947670574681,
      "grad_norm": 0.16802053153514862,
      "learning_rate": 7.976318338461944e-06,
      "loss": 0.0259,
      "step": 634240
    },
    {
      "epoch": 1.0379804010133344,
      "grad_norm": 0.6017270088195801,
      "learning_rate": 7.976252446248426e-06,
      "loss": 0.0356,
      "step": 634260
    },
    {
      "epoch": 1.0380131314519878,
      "grad_norm": 1.0693930387496948,
      "learning_rate": 7.97618655403491e-06,
      "loss": 0.0267,
      "step": 634280
    },
    {
      "epoch": 1.038045861890641,
      "grad_norm": 0.4705166518688202,
      "learning_rate": 7.976120661821393e-06,
      "loss": 0.0322,
      "step": 634300
    },
    {
      "epoch": 1.0380785923292943,
      "grad_norm": 0.9951871633529663,
      "learning_rate": 7.976054769607877e-06,
      "loss": 0.0234,
      "step": 634320
    },
    {
      "epoch": 1.0381113227679477,
      "grad_norm": 1.6747840642929077,
      "learning_rate": 7.975988877394359e-06,
      "loss": 0.0276,
      "step": 634340
    },
    {
      "epoch": 1.038144053206601,
      "grad_norm": 0.16944502294063568,
      "learning_rate": 7.975922985180842e-06,
      "loss": 0.0176,
      "step": 634360
    },
    {
      "epoch": 1.0381767836452545,
      "grad_norm": 1.264631986618042,
      "learning_rate": 7.975857092967326e-06,
      "loss": 0.0244,
      "step": 634380
    },
    {
      "epoch": 1.0382095140839078,
      "grad_norm": 0.6574393510818481,
      "learning_rate": 7.975791200753808e-06,
      "loss": 0.0261,
      "step": 634400
    },
    {
      "epoch": 1.0382422445225612,
      "grad_norm": 0.2851324677467346,
      "learning_rate": 7.975725308540291e-06,
      "loss": 0.0216,
      "step": 634420
    },
    {
      "epoch": 1.0382749749612143,
      "grad_norm": 0.8394522070884705,
      "learning_rate": 7.975659416326773e-06,
      "loss": 0.0208,
      "step": 634440
    },
    {
      "epoch": 1.0383077053998677,
      "grad_norm": 0.5286898612976074,
      "learning_rate": 7.975593524113257e-06,
      "loss": 0.0258,
      "step": 634460
    },
    {
      "epoch": 1.038340435838521,
      "grad_norm": 0.7531529664993286,
      "learning_rate": 7.975527631899739e-06,
      "loss": 0.0279,
      "step": 634480
    },
    {
      "epoch": 1.0383731662771745,
      "grad_norm": 0.7290992140769958,
      "learning_rate": 7.975461739686222e-06,
      "loss": 0.0298,
      "step": 634500
    },
    {
      "epoch": 1.0384058967158278,
      "grad_norm": 0.8313829898834229,
      "learning_rate": 7.975395847472704e-06,
      "loss": 0.0275,
      "step": 634520
    },
    {
      "epoch": 1.0384386271544812,
      "grad_norm": 0.5280731916427612,
      "learning_rate": 7.975329955259188e-06,
      "loss": 0.0181,
      "step": 634540
    },
    {
      "epoch": 1.0384713575931344,
      "grad_norm": 3.0497872829437256,
      "learning_rate": 7.97526406304567e-06,
      "loss": 0.025,
      "step": 634560
    },
    {
      "epoch": 1.0385040880317877,
      "grad_norm": 1.6423346996307373,
      "learning_rate": 7.975198170832153e-06,
      "loss": 0.0242,
      "step": 634580
    },
    {
      "epoch": 1.038536818470441,
      "grad_norm": 0.1698981374502182,
      "learning_rate": 7.975132278618635e-06,
      "loss": 0.0298,
      "step": 634600
    },
    {
      "epoch": 1.0385695489090945,
      "grad_norm": 0.41141045093536377,
      "learning_rate": 7.975066386405119e-06,
      "loss": 0.0175,
      "step": 634620
    },
    {
      "epoch": 1.0386022793477478,
      "grad_norm": 1.0496128797531128,
      "learning_rate": 7.9750004941916e-06,
      "loss": 0.0189,
      "step": 634640
    },
    {
      "epoch": 1.0386350097864012,
      "grad_norm": 0.1567399948835373,
      "learning_rate": 7.974934601978084e-06,
      "loss": 0.0194,
      "step": 634660
    },
    {
      "epoch": 1.0386677402250546,
      "grad_norm": 0.16243721544742584,
      "learning_rate": 7.974868709764568e-06,
      "loss": 0.0303,
      "step": 634680
    },
    {
      "epoch": 1.0387004706637077,
      "grad_norm": 2.081197738647461,
      "learning_rate": 7.97480281755105e-06,
      "loss": 0.0247,
      "step": 634700
    },
    {
      "epoch": 1.0387332011023611,
      "grad_norm": 0.8668597936630249,
      "learning_rate": 7.974736925337533e-06,
      "loss": 0.0191,
      "step": 634720
    },
    {
      "epoch": 1.0387659315410145,
      "grad_norm": 0.3162187933921814,
      "learning_rate": 7.974671033124017e-06,
      "loss": 0.0343,
      "step": 634740
    },
    {
      "epoch": 1.0387986619796679,
      "grad_norm": 0.5884535908699036,
      "learning_rate": 7.974605140910499e-06,
      "loss": 0.0318,
      "step": 634760
    },
    {
      "epoch": 1.0388313924183212,
      "grad_norm": 0.31755897402763367,
      "learning_rate": 7.974539248696982e-06,
      "loss": 0.0235,
      "step": 634780
    },
    {
      "epoch": 1.0388641228569746,
      "grad_norm": 1.2473607063293457,
      "learning_rate": 7.974473356483466e-06,
      "loss": 0.021,
      "step": 634800
    },
    {
      "epoch": 1.038896853295628,
      "grad_norm": 0.35708165168762207,
      "learning_rate": 7.974407464269948e-06,
      "loss": 0.0312,
      "step": 634820
    },
    {
      "epoch": 1.0389295837342811,
      "grad_norm": 0.22765713930130005,
      "learning_rate": 7.974341572056432e-06,
      "loss": 0.0248,
      "step": 634840
    },
    {
      "epoch": 1.0389623141729345,
      "grad_norm": 0.25725147128105164,
      "learning_rate": 7.974275679842913e-06,
      "loss": 0.0239,
      "step": 634860
    },
    {
      "epoch": 1.0389950446115879,
      "grad_norm": 5.752366065979004,
      "learning_rate": 7.974209787629397e-06,
      "loss": 0.0212,
      "step": 634880
    },
    {
      "epoch": 1.0390277750502412,
      "grad_norm": 0.8838821649551392,
      "learning_rate": 7.974143895415879e-06,
      "loss": 0.0175,
      "step": 634900
    },
    {
      "epoch": 1.0390605054888946,
      "grad_norm": 0.24619488418102264,
      "learning_rate": 7.974078003202362e-06,
      "loss": 0.03,
      "step": 634920
    },
    {
      "epoch": 1.039093235927548,
      "grad_norm": 0.3137626647949219,
      "learning_rate": 7.974012110988844e-06,
      "loss": 0.0271,
      "step": 634940
    },
    {
      "epoch": 1.0391259663662011,
      "grad_norm": 1.3279305696487427,
      "learning_rate": 7.973946218775328e-06,
      "loss": 0.0289,
      "step": 634960
    },
    {
      "epoch": 1.0391586968048545,
      "grad_norm": 0.8560894131660461,
      "learning_rate": 7.97388032656181e-06,
      "loss": 0.0212,
      "step": 634980
    },
    {
      "epoch": 1.0391914272435079,
      "grad_norm": 0.4130186140537262,
      "learning_rate": 7.973814434348293e-06,
      "loss": 0.0172,
      "step": 635000
    },
    {
      "epoch": 1.0392241576821613,
      "grad_norm": 3.786371946334839,
      "learning_rate": 7.973748542134777e-06,
      "loss": 0.0241,
      "step": 635020
    },
    {
      "epoch": 1.0392568881208146,
      "grad_norm": 0.7504748702049255,
      "learning_rate": 7.973682649921259e-06,
      "loss": 0.0294,
      "step": 635040
    },
    {
      "epoch": 1.039289618559468,
      "grad_norm": 0.8787486553192139,
      "learning_rate": 7.973616757707743e-06,
      "loss": 0.0228,
      "step": 635060
    },
    {
      "epoch": 1.0393223489981214,
      "grad_norm": 1.314583420753479,
      "learning_rate": 7.973550865494224e-06,
      "loss": 0.0206,
      "step": 635080
    },
    {
      "epoch": 1.0393550794367745,
      "grad_norm": 0.6278107762336731,
      "learning_rate": 7.973484973280708e-06,
      "loss": 0.0219,
      "step": 635100
    },
    {
      "epoch": 1.039387809875428,
      "grad_norm": 1.0166163444519043,
      "learning_rate": 7.973419081067192e-06,
      "loss": 0.0313,
      "step": 635120
    },
    {
      "epoch": 1.0394205403140813,
      "grad_norm": 0.22553040087223053,
      "learning_rate": 7.973353188853673e-06,
      "loss": 0.0299,
      "step": 635140
    },
    {
      "epoch": 1.0394532707527346,
      "grad_norm": 0.49666324257850647,
      "learning_rate": 7.973287296640157e-06,
      "loss": 0.026,
      "step": 635160
    },
    {
      "epoch": 1.039486001191388,
      "grad_norm": 0.8000449538230896,
      "learning_rate": 7.97322140442664e-06,
      "loss": 0.0187,
      "step": 635180
    },
    {
      "epoch": 1.0395187316300414,
      "grad_norm": 0.8459562063217163,
      "learning_rate": 7.973155512213123e-06,
      "loss": 0.03,
      "step": 635200
    },
    {
      "epoch": 1.0395514620686948,
      "grad_norm": 0.5724855065345764,
      "learning_rate": 7.973089619999606e-06,
      "loss": 0.0223,
      "step": 635220
    },
    {
      "epoch": 1.039584192507348,
      "grad_norm": 0.432272732257843,
      "learning_rate": 7.973023727786088e-06,
      "loss": 0.0237,
      "step": 635240
    },
    {
      "epoch": 1.0396169229460013,
      "grad_norm": 1.0683948993682861,
      "learning_rate": 7.972957835572572e-06,
      "loss": 0.0258,
      "step": 635260
    },
    {
      "epoch": 1.0396496533846546,
      "grad_norm": 1.7196369171142578,
      "learning_rate": 7.972891943359053e-06,
      "loss": 0.0183,
      "step": 635280
    },
    {
      "epoch": 1.039682383823308,
      "grad_norm": 0.9638985991477966,
      "learning_rate": 7.972826051145537e-06,
      "loss": 0.0181,
      "step": 635300
    },
    {
      "epoch": 1.0397151142619614,
      "grad_norm": 0.8872466087341309,
      "learning_rate": 7.972760158932019e-06,
      "loss": 0.0207,
      "step": 635320
    },
    {
      "epoch": 1.0397478447006148,
      "grad_norm": 0.06238707900047302,
      "learning_rate": 7.972694266718503e-06,
      "loss": 0.0298,
      "step": 635340
    },
    {
      "epoch": 1.039780575139268,
      "grad_norm": 0.21566908061504364,
      "learning_rate": 7.972628374504986e-06,
      "loss": 0.0286,
      "step": 635360
    },
    {
      "epoch": 1.0398133055779213,
      "grad_norm": 0.4765593111515045,
      "learning_rate": 7.972562482291468e-06,
      "loss": 0.0184,
      "step": 635380
    },
    {
      "epoch": 1.0398460360165747,
      "grad_norm": 1.8865500688552856,
      "learning_rate": 7.972496590077952e-06,
      "loss": 0.0174,
      "step": 635400
    },
    {
      "epoch": 1.039878766455228,
      "grad_norm": 1.2332168817520142,
      "learning_rate": 7.972430697864434e-06,
      "loss": 0.0256,
      "step": 635420
    },
    {
      "epoch": 1.0399114968938814,
      "grad_norm": 0.32623228430747986,
      "learning_rate": 7.972364805650917e-06,
      "loss": 0.0239,
      "step": 635440
    },
    {
      "epoch": 1.0399442273325348,
      "grad_norm": 0.43234238028526306,
      "learning_rate": 7.972298913437399e-06,
      "loss": 0.0266,
      "step": 635460
    },
    {
      "epoch": 1.0399769577711881,
      "grad_norm": 0.13265585899353027,
      "learning_rate": 7.972233021223883e-06,
      "loss": 0.0141,
      "step": 635480
    },
    {
      "epoch": 1.0400096882098413,
      "grad_norm": 0.4871480464935303,
      "learning_rate": 7.972167129010364e-06,
      "loss": 0.0243,
      "step": 635500
    },
    {
      "epoch": 1.0400424186484947,
      "grad_norm": 0.2123507857322693,
      "learning_rate": 7.972101236796848e-06,
      "loss": 0.0234,
      "step": 635520
    },
    {
      "epoch": 1.040075149087148,
      "grad_norm": 0.9487898349761963,
      "learning_rate": 7.972035344583332e-06,
      "loss": 0.0165,
      "step": 635540
    },
    {
      "epoch": 1.0401078795258014,
      "grad_norm": 0.8136958479881287,
      "learning_rate": 7.971969452369814e-06,
      "loss": 0.018,
      "step": 635560
    },
    {
      "epoch": 1.0401406099644548,
      "grad_norm": 1.1845977306365967,
      "learning_rate": 7.971903560156297e-06,
      "loss": 0.0259,
      "step": 635580
    },
    {
      "epoch": 1.0401733404031082,
      "grad_norm": 1.5628727674484253,
      "learning_rate": 7.97183766794278e-06,
      "loss": 0.0237,
      "step": 635600
    },
    {
      "epoch": 1.0402060708417613,
      "grad_norm": 1.2713086605072021,
      "learning_rate": 7.971771775729263e-06,
      "loss": 0.022,
      "step": 635620
    },
    {
      "epoch": 1.0402388012804147,
      "grad_norm": 0.8902881741523743,
      "learning_rate": 7.971705883515746e-06,
      "loss": 0.0264,
      "step": 635640
    },
    {
      "epoch": 1.040271531719068,
      "grad_norm": 1.2476553916931152,
      "learning_rate": 7.971639991302228e-06,
      "loss": 0.0205,
      "step": 635660
    },
    {
      "epoch": 1.0403042621577214,
      "grad_norm": 0.3342527151107788,
      "learning_rate": 7.971574099088712e-06,
      "loss": 0.0162,
      "step": 635680
    },
    {
      "epoch": 1.0403369925963748,
      "grad_norm": 0.34468889236450195,
      "learning_rate": 7.971508206875194e-06,
      "loss": 0.0188,
      "step": 635700
    },
    {
      "epoch": 1.0403697230350282,
      "grad_norm": 0.32634681463241577,
      "learning_rate": 7.971442314661677e-06,
      "loss": 0.0204,
      "step": 635720
    },
    {
      "epoch": 1.0404024534736815,
      "grad_norm": 0.9753401279449463,
      "learning_rate": 7.97137642244816e-06,
      "loss": 0.0188,
      "step": 635740
    },
    {
      "epoch": 1.0404351839123347,
      "grad_norm": 0.8176445364952087,
      "learning_rate": 7.971310530234643e-06,
      "loss": 0.0205,
      "step": 635760
    },
    {
      "epoch": 1.040467914350988,
      "grad_norm": 0.255595862865448,
      "learning_rate": 7.971244638021126e-06,
      "loss": 0.0193,
      "step": 635780
    },
    {
      "epoch": 1.0405006447896414,
      "grad_norm": 1.7369544506072998,
      "learning_rate": 7.971178745807608e-06,
      "loss": 0.0228,
      "step": 635800
    },
    {
      "epoch": 1.0405333752282948,
      "grad_norm": 0.22835971415042877,
      "learning_rate": 7.971112853594092e-06,
      "loss": 0.033,
      "step": 635820
    },
    {
      "epoch": 1.0405661056669482,
      "grad_norm": 0.26944106817245483,
      "learning_rate": 7.971046961380574e-06,
      "loss": 0.0216,
      "step": 635840
    },
    {
      "epoch": 1.0405988361056016,
      "grad_norm": 0.7735096216201782,
      "learning_rate": 7.970981069167057e-06,
      "loss": 0.0293,
      "step": 635860
    },
    {
      "epoch": 1.040631566544255,
      "grad_norm": 0.6767929196357727,
      "learning_rate": 7.970915176953539e-06,
      "loss": 0.023,
      "step": 635880
    },
    {
      "epoch": 1.040664296982908,
      "grad_norm": 0.8068480491638184,
      "learning_rate": 7.970849284740023e-06,
      "loss": 0.0207,
      "step": 635900
    },
    {
      "epoch": 1.0406970274215614,
      "grad_norm": 0.2888418138027191,
      "learning_rate": 7.970783392526506e-06,
      "loss": 0.0253,
      "step": 635920
    },
    {
      "epoch": 1.0407297578602148,
      "grad_norm": 0.2986043691635132,
      "learning_rate": 7.970717500312988e-06,
      "loss": 0.0169,
      "step": 635940
    },
    {
      "epoch": 1.0407624882988682,
      "grad_norm": 0.5273104906082153,
      "learning_rate": 7.970651608099472e-06,
      "loss": 0.0181,
      "step": 635960
    },
    {
      "epoch": 1.0407952187375216,
      "grad_norm": 0.6738103032112122,
      "learning_rate": 7.970585715885955e-06,
      "loss": 0.0215,
      "step": 635980
    },
    {
      "epoch": 1.040827949176175,
      "grad_norm": 0.7537998557090759,
      "learning_rate": 7.970519823672437e-06,
      "loss": 0.0275,
      "step": 636000
    },
    {
      "epoch": 1.0408606796148283,
      "grad_norm": 0.25048550963401794,
      "learning_rate": 7.97045393145892e-06,
      "loss": 0.0221,
      "step": 636020
    },
    {
      "epoch": 1.0408934100534815,
      "grad_norm": 2.013282299041748,
      "learning_rate": 7.970388039245403e-06,
      "loss": 0.0191,
      "step": 636040
    },
    {
      "epoch": 1.0409261404921348,
      "grad_norm": 0.7303508520126343,
      "learning_rate": 7.970322147031886e-06,
      "loss": 0.025,
      "step": 636060
    },
    {
      "epoch": 1.0409588709307882,
      "grad_norm": 0.7816318869590759,
      "learning_rate": 7.97025625481837e-06,
      "loss": 0.0248,
      "step": 636080
    },
    {
      "epoch": 1.0409916013694416,
      "grad_norm": 0.4688681960105896,
      "learning_rate": 7.970190362604852e-06,
      "loss": 0.0207,
      "step": 636100
    },
    {
      "epoch": 1.041024331808095,
      "grad_norm": 0.5295764207839966,
      "learning_rate": 7.970124470391335e-06,
      "loss": 0.0211,
      "step": 636120
    },
    {
      "epoch": 1.0410570622467483,
      "grad_norm": 2.2646241188049316,
      "learning_rate": 7.970058578177817e-06,
      "loss": 0.0351,
      "step": 636140
    },
    {
      "epoch": 1.0410897926854015,
      "grad_norm": 2.315432071685791,
      "learning_rate": 7.9699926859643e-06,
      "loss": 0.0263,
      "step": 636160
    },
    {
      "epoch": 1.0411225231240548,
      "grad_norm": 0.6087450385093689,
      "learning_rate": 7.969926793750783e-06,
      "loss": 0.0222,
      "step": 636180
    },
    {
      "epoch": 1.0411552535627082,
      "grad_norm": 1.1656185388565063,
      "learning_rate": 7.969860901537266e-06,
      "loss": 0.0176,
      "step": 636200
    },
    {
      "epoch": 1.0411879840013616,
      "grad_norm": 0.689594566822052,
      "learning_rate": 7.969795009323748e-06,
      "loss": 0.0173,
      "step": 636220
    },
    {
      "epoch": 1.041220714440015,
      "grad_norm": 0.4735412895679474,
      "learning_rate": 7.969729117110232e-06,
      "loss": 0.0216,
      "step": 636240
    },
    {
      "epoch": 1.0412534448786683,
      "grad_norm": 0.21766163408756256,
      "learning_rate": 7.969663224896714e-06,
      "loss": 0.0254,
      "step": 636260
    },
    {
      "epoch": 1.0412861753173217,
      "grad_norm": 0.3273330628871918,
      "learning_rate": 7.969597332683197e-06,
      "loss": 0.0316,
      "step": 636280
    },
    {
      "epoch": 1.0413189057559749,
      "grad_norm": 0.35118064284324646,
      "learning_rate": 7.969531440469679e-06,
      "loss": 0.0188,
      "step": 636300
    },
    {
      "epoch": 1.0413516361946282,
      "grad_norm": 0.5637944936752319,
      "learning_rate": 7.969465548256163e-06,
      "loss": 0.0229,
      "step": 636320
    },
    {
      "epoch": 1.0413843666332816,
      "grad_norm": 1.152515172958374,
      "learning_rate": 7.969399656042646e-06,
      "loss": 0.0264,
      "step": 636340
    },
    {
      "epoch": 1.041417097071935,
      "grad_norm": 1.1162683963775635,
      "learning_rate": 7.969333763829128e-06,
      "loss": 0.0343,
      "step": 636360
    },
    {
      "epoch": 1.0414498275105883,
      "grad_norm": 0.8584222793579102,
      "learning_rate": 7.969267871615612e-06,
      "loss": 0.0176,
      "step": 636380
    },
    {
      "epoch": 1.0414825579492417,
      "grad_norm": 0.1371384710073471,
      "learning_rate": 7.969201979402095e-06,
      "loss": 0.0258,
      "step": 636400
    },
    {
      "epoch": 1.0415152883878949,
      "grad_norm": 0.6565108299255371,
      "learning_rate": 7.969136087188577e-06,
      "loss": 0.0219,
      "step": 636420
    },
    {
      "epoch": 1.0415480188265482,
      "grad_norm": 0.44112494587898254,
      "learning_rate": 7.969070194975061e-06,
      "loss": 0.0243,
      "step": 636440
    },
    {
      "epoch": 1.0415807492652016,
      "grad_norm": 0.331971675157547,
      "learning_rate": 7.969004302761544e-06,
      "loss": 0.0176,
      "step": 636460
    },
    {
      "epoch": 1.041613479703855,
      "grad_norm": 0.4290383458137512,
      "learning_rate": 7.968938410548026e-06,
      "loss": 0.0266,
      "step": 636480
    },
    {
      "epoch": 1.0416462101425084,
      "grad_norm": 1.1361786127090454,
      "learning_rate": 7.96887251833451e-06,
      "loss": 0.0281,
      "step": 636500
    },
    {
      "epoch": 1.0416789405811617,
      "grad_norm": 1.0491435527801514,
      "learning_rate": 7.968806626120992e-06,
      "loss": 0.0271,
      "step": 636520
    },
    {
      "epoch": 1.041711671019815,
      "grad_norm": 1.7094731330871582,
      "learning_rate": 7.968740733907475e-06,
      "loss": 0.028,
      "step": 636540
    },
    {
      "epoch": 1.0417444014584682,
      "grad_norm": 1.1980377435684204,
      "learning_rate": 7.968674841693957e-06,
      "loss": 0.0217,
      "step": 636560
    },
    {
      "epoch": 1.0417771318971216,
      "grad_norm": 0.6211863160133362,
      "learning_rate": 7.968608949480441e-06,
      "loss": 0.0249,
      "step": 636580
    },
    {
      "epoch": 1.041809862335775,
      "grad_norm": 0.08887798339128494,
      "learning_rate": 7.968543057266923e-06,
      "loss": 0.0306,
      "step": 636600
    },
    {
      "epoch": 1.0418425927744284,
      "grad_norm": 0.1360340416431427,
      "learning_rate": 7.968477165053406e-06,
      "loss": 0.027,
      "step": 636620
    },
    {
      "epoch": 1.0418753232130817,
      "grad_norm": 2.405101776123047,
      "learning_rate": 7.968411272839888e-06,
      "loss": 0.0297,
      "step": 636640
    },
    {
      "epoch": 1.041908053651735,
      "grad_norm": 0.7058767676353455,
      "learning_rate": 7.968345380626372e-06,
      "loss": 0.0291,
      "step": 636660
    },
    {
      "epoch": 1.0419407840903885,
      "grad_norm": 1.0919016599655151,
      "learning_rate": 7.968279488412854e-06,
      "loss": 0.0231,
      "step": 636680
    },
    {
      "epoch": 1.0419735145290416,
      "grad_norm": 0.6932663917541504,
      "learning_rate": 7.968213596199337e-06,
      "loss": 0.0358,
      "step": 636700
    },
    {
      "epoch": 1.042006244967695,
      "grad_norm": 0.20878665149211884,
      "learning_rate": 7.968147703985821e-06,
      "loss": 0.0231,
      "step": 636720
    },
    {
      "epoch": 1.0420389754063484,
      "grad_norm": 1.0245598554611206,
      "learning_rate": 7.968081811772303e-06,
      "loss": 0.0214,
      "step": 636740
    },
    {
      "epoch": 1.0420717058450018,
      "grad_norm": 0.8352989554405212,
      "learning_rate": 7.968015919558786e-06,
      "loss": 0.0267,
      "step": 636760
    },
    {
      "epoch": 1.0421044362836551,
      "grad_norm": 0.39848601818084717,
      "learning_rate": 7.96795002734527e-06,
      "loss": 0.0248,
      "step": 636780
    },
    {
      "epoch": 1.0421371667223085,
      "grad_norm": 1.2622560262680054,
      "learning_rate": 7.967884135131752e-06,
      "loss": 0.0253,
      "step": 636800
    },
    {
      "epoch": 1.0421698971609616,
      "grad_norm": 0.4440380334854126,
      "learning_rate": 7.967818242918235e-06,
      "loss": 0.023,
      "step": 636820
    },
    {
      "epoch": 1.042202627599615,
      "grad_norm": 0.38444653153419495,
      "learning_rate": 7.967752350704719e-06,
      "loss": 0.0245,
      "step": 636840
    },
    {
      "epoch": 1.0422353580382684,
      "grad_norm": 1.4801726341247559,
      "learning_rate": 7.967686458491201e-06,
      "loss": 0.0274,
      "step": 636860
    },
    {
      "epoch": 1.0422680884769218,
      "grad_norm": 0.8046409487724304,
      "learning_rate": 7.967620566277685e-06,
      "loss": 0.0227,
      "step": 636880
    },
    {
      "epoch": 1.0423008189155751,
      "grad_norm": 0.7811294198036194,
      "learning_rate": 7.967554674064166e-06,
      "loss": 0.0218,
      "step": 636900
    },
    {
      "epoch": 1.0423335493542285,
      "grad_norm": 0.6879966855049133,
      "learning_rate": 7.96748878185065e-06,
      "loss": 0.0296,
      "step": 636920
    },
    {
      "epoch": 1.0423662797928819,
      "grad_norm": 0.4651423990726471,
      "learning_rate": 7.967422889637132e-06,
      "loss": 0.0282,
      "step": 636940
    },
    {
      "epoch": 1.042399010231535,
      "grad_norm": 0.5310773849487305,
      "learning_rate": 7.967356997423615e-06,
      "loss": 0.0297,
      "step": 636960
    },
    {
      "epoch": 1.0424317406701884,
      "grad_norm": 0.35190850496292114,
      "learning_rate": 7.967291105210097e-06,
      "loss": 0.0202,
      "step": 636980
    },
    {
      "epoch": 1.0424644711088418,
      "grad_norm": 1.01516854763031,
      "learning_rate": 7.967225212996581e-06,
      "loss": 0.0273,
      "step": 637000
    },
    {
      "epoch": 1.0424972015474951,
      "grad_norm": 0.8814393877983093,
      "learning_rate": 7.967159320783063e-06,
      "loss": 0.027,
      "step": 637020
    },
    {
      "epoch": 1.0425299319861485,
      "grad_norm": 0.5221932530403137,
      "learning_rate": 7.967093428569546e-06,
      "loss": 0.02,
      "step": 637040
    },
    {
      "epoch": 1.042562662424802,
      "grad_norm": 0.6607252359390259,
      "learning_rate": 7.967027536356028e-06,
      "loss": 0.0275,
      "step": 637060
    },
    {
      "epoch": 1.0425953928634553,
      "grad_norm": 0.25444740056991577,
      "learning_rate": 7.966961644142512e-06,
      "loss": 0.0184,
      "step": 637080
    },
    {
      "epoch": 1.0426281233021084,
      "grad_norm": 1.2553950548171997,
      "learning_rate": 7.966895751928996e-06,
      "loss": 0.0332,
      "step": 637100
    },
    {
      "epoch": 1.0426608537407618,
      "grad_norm": 0.7621043920516968,
      "learning_rate": 7.966829859715477e-06,
      "loss": 0.0299,
      "step": 637120
    },
    {
      "epoch": 1.0426935841794152,
      "grad_norm": 1.200143575668335,
      "learning_rate": 7.966763967501961e-06,
      "loss": 0.0318,
      "step": 637140
    },
    {
      "epoch": 1.0427263146180685,
      "grad_norm": 0.6424283385276794,
      "learning_rate": 7.966698075288445e-06,
      "loss": 0.0367,
      "step": 637160
    },
    {
      "epoch": 1.042759045056722,
      "grad_norm": 0.5798333287239075,
      "learning_rate": 7.966632183074926e-06,
      "loss": 0.0195,
      "step": 637180
    },
    {
      "epoch": 1.0427917754953753,
      "grad_norm": 0.28927430510520935,
      "learning_rate": 7.96656629086141e-06,
      "loss": 0.026,
      "step": 637200
    },
    {
      "epoch": 1.0428245059340284,
      "grad_norm": 0.6294942498207092,
      "learning_rate": 7.966500398647894e-06,
      "loss": 0.0281,
      "step": 637220
    },
    {
      "epoch": 1.0428572363726818,
      "grad_norm": 0.8891978859901428,
      "learning_rate": 7.966434506434376e-06,
      "loss": 0.0208,
      "step": 637240
    },
    {
      "epoch": 1.0428899668113352,
      "grad_norm": 0.6683357357978821,
      "learning_rate": 7.966368614220859e-06,
      "loss": 0.0215,
      "step": 637260
    },
    {
      "epoch": 1.0429226972499885,
      "grad_norm": 0.5863068103790283,
      "learning_rate": 7.966302722007341e-06,
      "loss": 0.0282,
      "step": 637280
    },
    {
      "epoch": 1.042955427688642,
      "grad_norm": 0.44077691435813904,
      "learning_rate": 7.966236829793825e-06,
      "loss": 0.0293,
      "step": 637300
    },
    {
      "epoch": 1.0429881581272953,
      "grad_norm": 0.9113254547119141,
      "learning_rate": 7.966170937580306e-06,
      "loss": 0.0315,
      "step": 637320
    },
    {
      "epoch": 1.0430208885659487,
      "grad_norm": 0.613450825214386,
      "learning_rate": 7.96610504536679e-06,
      "loss": 0.0198,
      "step": 637340
    },
    {
      "epoch": 1.0430536190046018,
      "grad_norm": 0.9468817710876465,
      "learning_rate": 7.966039153153272e-06,
      "loss": 0.0279,
      "step": 637360
    },
    {
      "epoch": 1.0430863494432552,
      "grad_norm": 0.20612943172454834,
      "learning_rate": 7.965973260939756e-06,
      "loss": 0.0216,
      "step": 637380
    },
    {
      "epoch": 1.0431190798819086,
      "grad_norm": 1.5209351778030396,
      "learning_rate": 7.965907368726237e-06,
      "loss": 0.025,
      "step": 637400
    },
    {
      "epoch": 1.043151810320562,
      "grad_norm": 0.521867573261261,
      "learning_rate": 7.965841476512721e-06,
      "loss": 0.0202,
      "step": 637420
    },
    {
      "epoch": 1.0431845407592153,
      "grad_norm": 0.1701885610818863,
      "learning_rate": 7.965775584299203e-06,
      "loss": 0.03,
      "step": 637440
    },
    {
      "epoch": 1.0432172711978687,
      "grad_norm": 1.1009728908538818,
      "learning_rate": 7.965709692085687e-06,
      "loss": 0.0278,
      "step": 637460
    },
    {
      "epoch": 1.043250001636522,
      "grad_norm": 0.23737846314907074,
      "learning_rate": 7.96564379987217e-06,
      "loss": 0.0173,
      "step": 637480
    },
    {
      "epoch": 1.0432827320751752,
      "grad_norm": 0.3831292688846588,
      "learning_rate": 7.965577907658652e-06,
      "loss": 0.0267,
      "step": 637500
    },
    {
      "epoch": 1.0433154625138286,
      "grad_norm": 1.2167232036590576,
      "learning_rate": 7.965512015445136e-06,
      "loss": 0.0187,
      "step": 637520
    },
    {
      "epoch": 1.043348192952482,
      "grad_norm": 0.15747426450252533,
      "learning_rate": 7.965446123231617e-06,
      "loss": 0.0284,
      "step": 637540
    },
    {
      "epoch": 1.0433809233911353,
      "grad_norm": 0.6588906049728394,
      "learning_rate": 7.965380231018101e-06,
      "loss": 0.0269,
      "step": 637560
    },
    {
      "epoch": 1.0434136538297887,
      "grad_norm": 2.6259219646453857,
      "learning_rate": 7.965314338804585e-06,
      "loss": 0.0298,
      "step": 637580
    },
    {
      "epoch": 1.043446384268442,
      "grad_norm": 0.34277796745300293,
      "learning_rate": 7.965248446591067e-06,
      "loss": 0.0212,
      "step": 637600
    },
    {
      "epoch": 1.0434791147070952,
      "grad_norm": 2.565676212310791,
      "learning_rate": 7.96518255437755e-06,
      "loss": 0.0224,
      "step": 637620
    },
    {
      "epoch": 1.0435118451457486,
      "grad_norm": 0.6118470430374146,
      "learning_rate": 7.965116662164034e-06,
      "loss": 0.0229,
      "step": 637640
    },
    {
      "epoch": 1.043544575584402,
      "grad_norm": 0.8551741242408752,
      "learning_rate": 7.965050769950516e-06,
      "loss": 0.0175,
      "step": 637660
    },
    {
      "epoch": 1.0435773060230553,
      "grad_norm": 0.3102310001850128,
      "learning_rate": 7.964984877737e-06,
      "loss": 0.0254,
      "step": 637680
    },
    {
      "epoch": 1.0436100364617087,
      "grad_norm": 1.1821956634521484,
      "learning_rate": 7.964918985523481e-06,
      "loss": 0.0205,
      "step": 637700
    },
    {
      "epoch": 1.043642766900362,
      "grad_norm": 0.685796856880188,
      "learning_rate": 7.964853093309965e-06,
      "loss": 0.0173,
      "step": 637720
    },
    {
      "epoch": 1.0436754973390154,
      "grad_norm": 1.131998062133789,
      "learning_rate": 7.964787201096447e-06,
      "loss": 0.027,
      "step": 637740
    },
    {
      "epoch": 1.0437082277776686,
      "grad_norm": 0.37733641266822815,
      "learning_rate": 7.96472130888293e-06,
      "loss": 0.0135,
      "step": 637760
    },
    {
      "epoch": 1.043740958216322,
      "grad_norm": 0.40139690041542053,
      "learning_rate": 7.964655416669412e-06,
      "loss": 0.0208,
      "step": 637780
    },
    {
      "epoch": 1.0437736886549753,
      "grad_norm": 0.48805731534957886,
      "learning_rate": 7.964589524455896e-06,
      "loss": 0.0218,
      "step": 637800
    },
    {
      "epoch": 1.0438064190936287,
      "grad_norm": 0.8168584704399109,
      "learning_rate": 7.96452363224238e-06,
      "loss": 0.0293,
      "step": 637820
    },
    {
      "epoch": 1.043839149532282,
      "grad_norm": 0.27444973587989807,
      "learning_rate": 7.964457740028861e-06,
      "loss": 0.025,
      "step": 637840
    },
    {
      "epoch": 1.0438718799709354,
      "grad_norm": 2.871030569076538,
      "learning_rate": 7.964391847815345e-06,
      "loss": 0.017,
      "step": 637860
    },
    {
      "epoch": 1.0439046104095886,
      "grad_norm": 0.31296560168266296,
      "learning_rate": 7.964325955601827e-06,
      "loss": 0.0224,
      "step": 637880
    },
    {
      "epoch": 1.043937340848242,
      "grad_norm": 1.2446905374526978,
      "learning_rate": 7.96426006338831e-06,
      "loss": 0.0312,
      "step": 637900
    },
    {
      "epoch": 1.0439700712868953,
      "grad_norm": 1.3241053819656372,
      "learning_rate": 7.964194171174792e-06,
      "loss": 0.0259,
      "step": 637920
    },
    {
      "epoch": 1.0440028017255487,
      "grad_norm": 0.5551263689994812,
      "learning_rate": 7.964128278961276e-06,
      "loss": 0.0235,
      "step": 637940
    },
    {
      "epoch": 1.044035532164202,
      "grad_norm": 0.7808509469032288,
      "learning_rate": 7.96406238674776e-06,
      "loss": 0.0238,
      "step": 637960
    },
    {
      "epoch": 1.0440682626028555,
      "grad_norm": 0.23906131088733673,
      "learning_rate": 7.963996494534241e-06,
      "loss": 0.0282,
      "step": 637980
    },
    {
      "epoch": 1.0441009930415088,
      "grad_norm": 0.6402125954627991,
      "learning_rate": 7.963930602320725e-06,
      "loss": 0.0188,
      "step": 638000
    },
    {
      "epoch": 1.044133723480162,
      "grad_norm": 0.5371330380439758,
      "learning_rate": 7.963864710107208e-06,
      "loss": 0.0259,
      "step": 638020
    },
    {
      "epoch": 1.0441664539188154,
      "grad_norm": 0.193306103348732,
      "learning_rate": 7.96379881789369e-06,
      "loss": 0.03,
      "step": 638040
    },
    {
      "epoch": 1.0441991843574687,
      "grad_norm": 0.23743145167827606,
      "learning_rate": 7.963732925680174e-06,
      "loss": 0.0157,
      "step": 638060
    },
    {
      "epoch": 1.044231914796122,
      "grad_norm": 0.39081552624702454,
      "learning_rate": 7.963667033466656e-06,
      "loss": 0.0162,
      "step": 638080
    },
    {
      "epoch": 1.0442646452347755,
      "grad_norm": 0.3922674357891083,
      "learning_rate": 7.96360114125314e-06,
      "loss": 0.0201,
      "step": 638100
    },
    {
      "epoch": 1.0442973756734288,
      "grad_norm": 0.31707239151000977,
      "learning_rate": 7.963535249039621e-06,
      "loss": 0.0277,
      "step": 638120
    },
    {
      "epoch": 1.0443301061120822,
      "grad_norm": 0.7114633917808533,
      "learning_rate": 7.963469356826105e-06,
      "loss": 0.0225,
      "step": 638140
    },
    {
      "epoch": 1.0443628365507354,
      "grad_norm": 0.8368386030197144,
      "learning_rate": 7.963403464612587e-06,
      "loss": 0.0268,
      "step": 638160
    },
    {
      "epoch": 1.0443955669893887,
      "grad_norm": 0.9054651856422424,
      "learning_rate": 7.96333757239907e-06,
      "loss": 0.0364,
      "step": 638180
    },
    {
      "epoch": 1.044428297428042,
      "grad_norm": 0.535192608833313,
      "learning_rate": 7.963271680185554e-06,
      "loss": 0.0204,
      "step": 638200
    },
    {
      "epoch": 1.0444610278666955,
      "grad_norm": 1.276458740234375,
      "learning_rate": 7.963205787972036e-06,
      "loss": 0.0271,
      "step": 638220
    },
    {
      "epoch": 1.0444937583053489,
      "grad_norm": 0.8115432858467102,
      "learning_rate": 7.96313989575852e-06,
      "loss": 0.0216,
      "step": 638240
    },
    {
      "epoch": 1.0445264887440022,
      "grad_norm": 0.9845937490463257,
      "learning_rate": 7.963074003545001e-06,
      "loss": 0.0238,
      "step": 638260
    },
    {
      "epoch": 1.0445592191826556,
      "grad_norm": 0.282906711101532,
      "learning_rate": 7.963008111331485e-06,
      "loss": 0.0204,
      "step": 638280
    },
    {
      "epoch": 1.0445919496213087,
      "grad_norm": 0.9679481387138367,
      "learning_rate": 7.962942219117967e-06,
      "loss": 0.0254,
      "step": 638300
    },
    {
      "epoch": 1.0446246800599621,
      "grad_norm": 0.9012042880058289,
      "learning_rate": 7.96287632690445e-06,
      "loss": 0.0213,
      "step": 638320
    },
    {
      "epoch": 1.0446574104986155,
      "grad_norm": 0.8970032930374146,
      "learning_rate": 7.962810434690932e-06,
      "loss": 0.0221,
      "step": 638340
    },
    {
      "epoch": 1.0446901409372689,
      "grad_norm": 0.39173048734664917,
      "learning_rate": 7.962744542477416e-06,
      "loss": 0.0271,
      "step": 638360
    },
    {
      "epoch": 1.0447228713759222,
      "grad_norm": 0.3725907802581787,
      "learning_rate": 7.9626786502639e-06,
      "loss": 0.0267,
      "step": 638380
    },
    {
      "epoch": 1.0447556018145756,
      "grad_norm": 0.522826075553894,
      "learning_rate": 7.962612758050381e-06,
      "loss": 0.0195,
      "step": 638400
    },
    {
      "epoch": 1.0447883322532288,
      "grad_norm": 0.3852488696575165,
      "learning_rate": 7.962546865836865e-06,
      "loss": 0.0316,
      "step": 638420
    },
    {
      "epoch": 1.0448210626918821,
      "grad_norm": 0.7480323910713196,
      "learning_rate": 7.962480973623348e-06,
      "loss": 0.0244,
      "step": 638440
    },
    {
      "epoch": 1.0448537931305355,
      "grad_norm": 1.503772258758545,
      "learning_rate": 7.96241508140983e-06,
      "loss": 0.0382,
      "step": 638460
    },
    {
      "epoch": 1.0448865235691889,
      "grad_norm": 0.31582966446876526,
      "learning_rate": 7.962349189196314e-06,
      "loss": 0.0247,
      "step": 638480
    },
    {
      "epoch": 1.0449192540078422,
      "grad_norm": 1.1401311159133911,
      "learning_rate": 7.962283296982796e-06,
      "loss": 0.0243,
      "step": 638500
    },
    {
      "epoch": 1.0449519844464956,
      "grad_norm": 1.0542824268341064,
      "learning_rate": 7.96221740476928e-06,
      "loss": 0.0259,
      "step": 638520
    },
    {
      "epoch": 1.044984714885149,
      "grad_norm": 1.448162317276001,
      "learning_rate": 7.962151512555763e-06,
      "loss": 0.0298,
      "step": 638540
    },
    {
      "epoch": 1.0450174453238021,
      "grad_norm": 1.591630220413208,
      "learning_rate": 7.962085620342245e-06,
      "loss": 0.0214,
      "step": 638560
    },
    {
      "epoch": 1.0450501757624555,
      "grad_norm": 0.2928105890750885,
      "learning_rate": 7.962019728128728e-06,
      "loss": 0.0198,
      "step": 638580
    },
    {
      "epoch": 1.0450829062011089,
      "grad_norm": 0.25684407353401184,
      "learning_rate": 7.96195383591521e-06,
      "loss": 0.0251,
      "step": 638600
    },
    {
      "epoch": 1.0451156366397623,
      "grad_norm": 0.9898353219032288,
      "learning_rate": 7.961887943701694e-06,
      "loss": 0.0387,
      "step": 638620
    },
    {
      "epoch": 1.0451483670784156,
      "grad_norm": 1.1795048713684082,
      "learning_rate": 7.961822051488176e-06,
      "loss": 0.0303,
      "step": 638640
    },
    {
      "epoch": 1.045181097517069,
      "grad_norm": 1.020631194114685,
      "learning_rate": 7.96175615927466e-06,
      "loss": 0.0282,
      "step": 638660
    },
    {
      "epoch": 1.0452138279557222,
      "grad_norm": 0.1747753769159317,
      "learning_rate": 7.961690267061141e-06,
      "loss": 0.018,
      "step": 638680
    },
    {
      "epoch": 1.0452465583943755,
      "grad_norm": 1.3679077625274658,
      "learning_rate": 7.961624374847625e-06,
      "loss": 0.0198,
      "step": 638700
    },
    {
      "epoch": 1.045279288833029,
      "grad_norm": 1.0304083824157715,
      "learning_rate": 7.961558482634107e-06,
      "loss": 0.0238,
      "step": 638720
    },
    {
      "epoch": 1.0453120192716823,
      "grad_norm": 0.47706711292266846,
      "learning_rate": 7.96149259042059e-06,
      "loss": 0.0295,
      "step": 638740
    },
    {
      "epoch": 1.0453447497103356,
      "grad_norm": 0.13630720973014832,
      "learning_rate": 7.961426698207074e-06,
      "loss": 0.0262,
      "step": 638760
    },
    {
      "epoch": 1.045377480148989,
      "grad_norm": 0.25032034516334534,
      "learning_rate": 7.961360805993556e-06,
      "loss": 0.0269,
      "step": 638780
    },
    {
      "epoch": 1.0454102105876424,
      "grad_norm": 0.7729740738868713,
      "learning_rate": 7.96129491378004e-06,
      "loss": 0.0285,
      "step": 638800
    },
    {
      "epoch": 1.0454429410262955,
      "grad_norm": 0.3175477385520935,
      "learning_rate": 7.961229021566523e-06,
      "loss": 0.0231,
      "step": 638820
    },
    {
      "epoch": 1.045475671464949,
      "grad_norm": 0.9129820466041565,
      "learning_rate": 7.961163129353005e-06,
      "loss": 0.0225,
      "step": 638840
    },
    {
      "epoch": 1.0455084019036023,
      "grad_norm": 1.070387363433838,
      "learning_rate": 7.961097237139488e-06,
      "loss": 0.0235,
      "step": 638860
    },
    {
      "epoch": 1.0455411323422557,
      "grad_norm": 2.172879219055176,
      "learning_rate": 7.961031344925972e-06,
      "loss": 0.033,
      "step": 638880
    },
    {
      "epoch": 1.045573862780909,
      "grad_norm": 1.4354760646820068,
      "learning_rate": 7.960965452712454e-06,
      "loss": 0.0277,
      "step": 638900
    },
    {
      "epoch": 1.0456065932195624,
      "grad_norm": 0.46141988039016724,
      "learning_rate": 7.960899560498938e-06,
      "loss": 0.0302,
      "step": 638920
    },
    {
      "epoch": 1.0456393236582158,
      "grad_norm": 0.5427768230438232,
      "learning_rate": 7.96083366828542e-06,
      "loss": 0.0225,
      "step": 638940
    },
    {
      "epoch": 1.045672054096869,
      "grad_norm": 0.38301026821136475,
      "learning_rate": 7.960767776071903e-06,
      "loss": 0.0161,
      "step": 638960
    },
    {
      "epoch": 1.0457047845355223,
      "grad_norm": 2.0408642292022705,
      "learning_rate": 7.960701883858385e-06,
      "loss": 0.0232,
      "step": 638980
    },
    {
      "epoch": 1.0457375149741757,
      "grad_norm": 0.6980719566345215,
      "learning_rate": 7.960635991644868e-06,
      "loss": 0.0241,
      "step": 639000
    },
    {
      "epoch": 1.045770245412829,
      "grad_norm": 0.5681838393211365,
      "learning_rate": 7.96057009943135e-06,
      "loss": 0.0337,
      "step": 639020
    },
    {
      "epoch": 1.0458029758514824,
      "grad_norm": 0.744211733341217,
      "learning_rate": 7.960504207217834e-06,
      "loss": 0.0218,
      "step": 639040
    },
    {
      "epoch": 1.0458357062901358,
      "grad_norm": 0.8105484843254089,
      "learning_rate": 7.960438315004316e-06,
      "loss": 0.0174,
      "step": 639060
    },
    {
      "epoch": 1.0458684367287892,
      "grad_norm": 0.7040067911148071,
      "learning_rate": 7.9603724227908e-06,
      "loss": 0.0198,
      "step": 639080
    },
    {
      "epoch": 1.0459011671674423,
      "grad_norm": 0.7363675832748413,
      "learning_rate": 7.960306530577281e-06,
      "loss": 0.0312,
      "step": 639100
    },
    {
      "epoch": 1.0459338976060957,
      "grad_norm": 1.3020251989364624,
      "learning_rate": 7.960240638363765e-06,
      "loss": 0.0298,
      "step": 639120
    },
    {
      "epoch": 1.045966628044749,
      "grad_norm": 0.6387957334518433,
      "learning_rate": 7.960174746150247e-06,
      "loss": 0.0203,
      "step": 639140
    },
    {
      "epoch": 1.0459993584834024,
      "grad_norm": 2.038114070892334,
      "learning_rate": 7.96010885393673e-06,
      "loss": 0.0177,
      "step": 639160
    },
    {
      "epoch": 1.0460320889220558,
      "grad_norm": 1.864748477935791,
      "learning_rate": 7.960042961723214e-06,
      "loss": 0.0304,
      "step": 639180
    },
    {
      "epoch": 1.0460648193607092,
      "grad_norm": 0.45116400718688965,
      "learning_rate": 7.959977069509696e-06,
      "loss": 0.0228,
      "step": 639200
    },
    {
      "epoch": 1.0460975497993623,
      "grad_norm": 0.786657989025116,
      "learning_rate": 7.95991117729618e-06,
      "loss": 0.0224,
      "step": 639220
    },
    {
      "epoch": 1.0461302802380157,
      "grad_norm": 1.8749827146530151,
      "learning_rate": 7.959845285082663e-06,
      "loss": 0.0314,
      "step": 639240
    },
    {
      "epoch": 1.046163010676669,
      "grad_norm": 0.5617944002151489,
      "learning_rate": 7.959779392869147e-06,
      "loss": 0.0249,
      "step": 639260
    },
    {
      "epoch": 1.0461957411153224,
      "grad_norm": 1.546508550643921,
      "learning_rate": 7.959713500655629e-06,
      "loss": 0.0209,
      "step": 639280
    },
    {
      "epoch": 1.0462284715539758,
      "grad_norm": 1.454899549484253,
      "learning_rate": 7.959647608442112e-06,
      "loss": 0.0378,
      "step": 639300
    },
    {
      "epoch": 1.0462612019926292,
      "grad_norm": 0.6443853378295898,
      "learning_rate": 7.959581716228594e-06,
      "loss": 0.0193,
      "step": 639320
    },
    {
      "epoch": 1.0462939324312825,
      "grad_norm": 0.487668514251709,
      "learning_rate": 7.959515824015078e-06,
      "loss": 0.0204,
      "step": 639340
    },
    {
      "epoch": 1.0463266628699357,
      "grad_norm": 0.8072832822799683,
      "learning_rate": 7.95944993180156e-06,
      "loss": 0.0217,
      "step": 639360
    },
    {
      "epoch": 1.046359393308589,
      "grad_norm": 0.5180121064186096,
      "learning_rate": 7.959384039588043e-06,
      "loss": 0.0285,
      "step": 639380
    },
    {
      "epoch": 1.0463921237472424,
      "grad_norm": 0.6174861192703247,
      "learning_rate": 7.959318147374525e-06,
      "loss": 0.0218,
      "step": 639400
    },
    {
      "epoch": 1.0464248541858958,
      "grad_norm": 0.9037305116653442,
      "learning_rate": 7.959252255161009e-06,
      "loss": 0.0231,
      "step": 639420
    },
    {
      "epoch": 1.0464575846245492,
      "grad_norm": 0.7713587880134583,
      "learning_rate": 7.95918636294749e-06,
      "loss": 0.0203,
      "step": 639440
    },
    {
      "epoch": 1.0464903150632026,
      "grad_norm": 0.3455013334751129,
      "learning_rate": 7.959120470733974e-06,
      "loss": 0.0176,
      "step": 639460
    },
    {
      "epoch": 1.0465230455018557,
      "grad_norm": 0.28968533873558044,
      "learning_rate": 7.959054578520456e-06,
      "loss": 0.0182,
      "step": 639480
    },
    {
      "epoch": 1.046555775940509,
      "grad_norm": 0.801321804523468,
      "learning_rate": 7.95898868630694e-06,
      "loss": 0.0293,
      "step": 639500
    },
    {
      "epoch": 1.0465885063791625,
      "grad_norm": 0.5355615019798279,
      "learning_rate": 7.958922794093421e-06,
      "loss": 0.0204,
      "step": 639520
    },
    {
      "epoch": 1.0466212368178158,
      "grad_norm": 0.46618369221687317,
      "learning_rate": 7.958856901879905e-06,
      "loss": 0.0166,
      "step": 639540
    },
    {
      "epoch": 1.0466539672564692,
      "grad_norm": 1.0231823921203613,
      "learning_rate": 7.958791009666389e-06,
      "loss": 0.0274,
      "step": 639560
    },
    {
      "epoch": 1.0466866976951226,
      "grad_norm": 0.439449280500412,
      "learning_rate": 7.95872511745287e-06,
      "loss": 0.0247,
      "step": 639580
    },
    {
      "epoch": 1.046719428133776,
      "grad_norm": 1.2144020795822144,
      "learning_rate": 7.958659225239354e-06,
      "loss": 0.0268,
      "step": 639600
    },
    {
      "epoch": 1.046752158572429,
      "grad_norm": 1.0771437883377075,
      "learning_rate": 7.958593333025838e-06,
      "loss": 0.0275,
      "step": 639620
    },
    {
      "epoch": 1.0467848890110825,
      "grad_norm": 0.5042208433151245,
      "learning_rate": 7.95852744081232e-06,
      "loss": 0.0197,
      "step": 639640
    },
    {
      "epoch": 1.0468176194497358,
      "grad_norm": 2.5361154079437256,
      "learning_rate": 7.958461548598803e-06,
      "loss": 0.0243,
      "step": 639660
    },
    {
      "epoch": 1.0468503498883892,
      "grad_norm": 1.2626197338104248,
      "learning_rate": 7.958395656385287e-06,
      "loss": 0.0277,
      "step": 639680
    },
    {
      "epoch": 1.0468830803270426,
      "grad_norm": 0.46627724170684814,
      "learning_rate": 7.958329764171769e-06,
      "loss": 0.0204,
      "step": 639700
    },
    {
      "epoch": 1.046915810765696,
      "grad_norm": 0.5668379068374634,
      "learning_rate": 7.958263871958252e-06,
      "loss": 0.0274,
      "step": 639720
    },
    {
      "epoch": 1.0469485412043493,
      "grad_norm": 0.8723594546318054,
      "learning_rate": 7.958197979744734e-06,
      "loss": 0.0257,
      "step": 639740
    },
    {
      "epoch": 1.0469812716430025,
      "grad_norm": 2.084894895553589,
      "learning_rate": 7.958132087531218e-06,
      "loss": 0.0196,
      "step": 639760
    },
    {
      "epoch": 1.0470140020816558,
      "grad_norm": 0.6737750172615051,
      "learning_rate": 7.9580661953177e-06,
      "loss": 0.0221,
      "step": 639780
    },
    {
      "epoch": 1.0470467325203092,
      "grad_norm": 0.5417337417602539,
      "learning_rate": 7.958000303104183e-06,
      "loss": 0.0316,
      "step": 639800
    },
    {
      "epoch": 1.0470794629589626,
      "grad_norm": 0.36352911591529846,
      "learning_rate": 7.957934410890665e-06,
      "loss": 0.0222,
      "step": 639820
    },
    {
      "epoch": 1.047112193397616,
      "grad_norm": 0.8066486716270447,
      "learning_rate": 7.957868518677149e-06,
      "loss": 0.0205,
      "step": 639840
    },
    {
      "epoch": 1.0471449238362693,
      "grad_norm": 0.819900393486023,
      "learning_rate": 7.95780262646363e-06,
      "loss": 0.0245,
      "step": 639860
    },
    {
      "epoch": 1.0471776542749225,
      "grad_norm": 0.7221766114234924,
      "learning_rate": 7.957736734250114e-06,
      "loss": 0.0232,
      "step": 639880
    },
    {
      "epoch": 1.0472103847135759,
      "grad_norm": 0.6747269630432129,
      "learning_rate": 7.957670842036596e-06,
      "loss": 0.016,
      "step": 639900
    },
    {
      "epoch": 1.0472431151522292,
      "grad_norm": 0.8931744694709778,
      "learning_rate": 7.95760494982308e-06,
      "loss": 0.0238,
      "step": 639920
    },
    {
      "epoch": 1.0472758455908826,
      "grad_norm": 1.5416066646575928,
      "learning_rate": 7.957539057609563e-06,
      "loss": 0.0234,
      "step": 639940
    },
    {
      "epoch": 1.047308576029536,
      "grad_norm": 0.866240918636322,
      "learning_rate": 7.957473165396045e-06,
      "loss": 0.0309,
      "step": 639960
    },
    {
      "epoch": 1.0473413064681893,
      "grad_norm": 0.7851696610450745,
      "learning_rate": 7.957407273182529e-06,
      "loss": 0.0368,
      "step": 639980
    },
    {
      "epoch": 1.0473740369068427,
      "grad_norm": 0.9655498266220093,
      "learning_rate": 7.957341380969012e-06,
      "loss": 0.0218,
      "step": 640000
    },
    {
      "epoch": 1.0474067673454959,
      "grad_norm": 0.2103736847639084,
      "learning_rate": 7.957275488755494e-06,
      "loss": 0.0123,
      "step": 640020
    },
    {
      "epoch": 1.0474394977841492,
      "grad_norm": 1.3972902297973633,
      "learning_rate": 7.957209596541978e-06,
      "loss": 0.0237,
      "step": 640040
    },
    {
      "epoch": 1.0474722282228026,
      "grad_norm": 0.4008624255657196,
      "learning_rate": 7.957143704328461e-06,
      "loss": 0.0275,
      "step": 640060
    },
    {
      "epoch": 1.047504958661456,
      "grad_norm": 0.9341450333595276,
      "learning_rate": 7.957077812114943e-06,
      "loss": 0.0183,
      "step": 640080
    },
    {
      "epoch": 1.0475376891001094,
      "grad_norm": 1.196965217590332,
      "learning_rate": 7.957011919901427e-06,
      "loss": 0.0253,
      "step": 640100
    },
    {
      "epoch": 1.0475704195387627,
      "grad_norm": 0.26360344886779785,
      "learning_rate": 7.956946027687909e-06,
      "loss": 0.0286,
      "step": 640120
    },
    {
      "epoch": 1.047603149977416,
      "grad_norm": 0.4671342670917511,
      "learning_rate": 7.956880135474392e-06,
      "loss": 0.0212,
      "step": 640140
    },
    {
      "epoch": 1.0476358804160693,
      "grad_norm": 0.7127898931503296,
      "learning_rate": 7.956814243260874e-06,
      "loss": 0.0153,
      "step": 640160
    },
    {
      "epoch": 1.0476686108547226,
      "grad_norm": 0.5830172300338745,
      "learning_rate": 7.956748351047358e-06,
      "loss": 0.0199,
      "step": 640180
    },
    {
      "epoch": 1.047701341293376,
      "grad_norm": 1.0781188011169434,
      "learning_rate": 7.95668245883384e-06,
      "loss": 0.0172,
      "step": 640200
    },
    {
      "epoch": 1.0477340717320294,
      "grad_norm": 2.216310501098633,
      "learning_rate": 7.956616566620323e-06,
      "loss": 0.0271,
      "step": 640220
    },
    {
      "epoch": 1.0477668021706827,
      "grad_norm": 0.3894141614437103,
      "learning_rate": 7.956550674406805e-06,
      "loss": 0.0216,
      "step": 640240
    },
    {
      "epoch": 1.0477995326093361,
      "grad_norm": 0.7498672008514404,
      "learning_rate": 7.956484782193289e-06,
      "loss": 0.0198,
      "step": 640260
    },
    {
      "epoch": 1.0478322630479893,
      "grad_norm": 0.3113398849964142,
      "learning_rate": 7.95641888997977e-06,
      "loss": 0.0279,
      "step": 640280
    },
    {
      "epoch": 1.0478649934866426,
      "grad_norm": 0.5055916905403137,
      "learning_rate": 7.956352997766254e-06,
      "loss": 0.0158,
      "step": 640300
    },
    {
      "epoch": 1.047897723925296,
      "grad_norm": 0.889961302280426,
      "learning_rate": 7.956287105552738e-06,
      "loss": 0.0262,
      "step": 640320
    },
    {
      "epoch": 1.0479304543639494,
      "grad_norm": 2.5111076831817627,
      "learning_rate": 7.95622121333922e-06,
      "loss": 0.0242,
      "step": 640340
    },
    {
      "epoch": 1.0479631848026028,
      "grad_norm": 0.7359116673469543,
      "learning_rate": 7.956155321125703e-06,
      "loss": 0.0258,
      "step": 640360
    },
    {
      "epoch": 1.0479959152412561,
      "grad_norm": 1.2507165670394897,
      "learning_rate": 7.956089428912185e-06,
      "loss": 0.0257,
      "step": 640380
    },
    {
      "epoch": 1.0480286456799095,
      "grad_norm": 1.9889367818832397,
      "learning_rate": 7.956023536698669e-06,
      "loss": 0.025,
      "step": 640400
    },
    {
      "epoch": 1.0480613761185626,
      "grad_norm": 0.386340856552124,
      "learning_rate": 7.955957644485152e-06,
      "loss": 0.0229,
      "step": 640420
    },
    {
      "epoch": 1.048094106557216,
      "grad_norm": 2.9356369972229004,
      "learning_rate": 7.955891752271634e-06,
      "loss": 0.0249,
      "step": 640440
    },
    {
      "epoch": 1.0481268369958694,
      "grad_norm": 0.645854651927948,
      "learning_rate": 7.955825860058118e-06,
      "loss": 0.0261,
      "step": 640460
    },
    {
      "epoch": 1.0481595674345228,
      "grad_norm": 0.6437215209007263,
      "learning_rate": 7.955759967844601e-06,
      "loss": 0.0291,
      "step": 640480
    },
    {
      "epoch": 1.0481922978731761,
      "grad_norm": 0.469230592250824,
      "learning_rate": 7.955694075631083e-06,
      "loss": 0.0223,
      "step": 640500
    },
    {
      "epoch": 1.0482250283118295,
      "grad_norm": 0.6643033027648926,
      "learning_rate": 7.955628183417567e-06,
      "loss": 0.0252,
      "step": 640520
    },
    {
      "epoch": 1.0482577587504829,
      "grad_norm": 0.5154387950897217,
      "learning_rate": 7.955562291204049e-06,
      "loss": 0.0293,
      "step": 640540
    },
    {
      "epoch": 1.048290489189136,
      "grad_norm": 1.7876994609832764,
      "learning_rate": 7.955496398990532e-06,
      "loss": 0.0254,
      "step": 640560
    },
    {
      "epoch": 1.0483232196277894,
      "grad_norm": 0.6052606701850891,
      "learning_rate": 7.955430506777014e-06,
      "loss": 0.029,
      "step": 640580
    },
    {
      "epoch": 1.0483559500664428,
      "grad_norm": 1.1881219148635864,
      "learning_rate": 7.955364614563498e-06,
      "loss": 0.0219,
      "step": 640600
    },
    {
      "epoch": 1.0483886805050961,
      "grad_norm": 1.4150878190994263,
      "learning_rate": 7.95529872234998e-06,
      "loss": 0.0266,
      "step": 640620
    },
    {
      "epoch": 1.0484214109437495,
      "grad_norm": 1.4824118614196777,
      "learning_rate": 7.955232830136463e-06,
      "loss": 0.0257,
      "step": 640640
    },
    {
      "epoch": 1.048454141382403,
      "grad_norm": 0.6751510500907898,
      "learning_rate": 7.955166937922947e-06,
      "loss": 0.0285,
      "step": 640660
    },
    {
      "epoch": 1.048486871821056,
      "grad_norm": 0.35101598501205444,
      "learning_rate": 7.955101045709429e-06,
      "loss": 0.022,
      "step": 640680
    },
    {
      "epoch": 1.0485196022597094,
      "grad_norm": 1.015417456626892,
      "learning_rate": 7.955035153495912e-06,
      "loss": 0.0307,
      "step": 640700
    },
    {
      "epoch": 1.0485523326983628,
      "grad_norm": 1.2078356742858887,
      "learning_rate": 7.954969261282394e-06,
      "loss": 0.0288,
      "step": 640720
    },
    {
      "epoch": 1.0485850631370162,
      "grad_norm": 1.520672082901001,
      "learning_rate": 7.954903369068878e-06,
      "loss": 0.0375,
      "step": 640740
    },
    {
      "epoch": 1.0486177935756695,
      "grad_norm": 2.2891838550567627,
      "learning_rate": 7.95483747685536e-06,
      "loss": 0.0327,
      "step": 640760
    },
    {
      "epoch": 1.048650524014323,
      "grad_norm": 1.8472952842712402,
      "learning_rate": 7.954771584641843e-06,
      "loss": 0.0224,
      "step": 640780
    },
    {
      "epoch": 1.0486832544529763,
      "grad_norm": 0.15746338665485382,
      "learning_rate": 7.954705692428327e-06,
      "loss": 0.027,
      "step": 640800
    },
    {
      "epoch": 1.0487159848916294,
      "grad_norm": 0.3869546055793762,
      "learning_rate": 7.954639800214809e-06,
      "loss": 0.0322,
      "step": 640820
    },
    {
      "epoch": 1.0487487153302828,
      "grad_norm": 1.9216973781585693,
      "learning_rate": 7.954573908001292e-06,
      "loss": 0.0287,
      "step": 640840
    },
    {
      "epoch": 1.0487814457689362,
      "grad_norm": 0.8570582866668701,
      "learning_rate": 7.954508015787776e-06,
      "loss": 0.0319,
      "step": 640860
    },
    {
      "epoch": 1.0488141762075895,
      "grad_norm": 1.4957219362258911,
      "learning_rate": 7.954442123574258e-06,
      "loss": 0.0207,
      "step": 640880
    },
    {
      "epoch": 1.048846906646243,
      "grad_norm": 0.813130795955658,
      "learning_rate": 7.954376231360741e-06,
      "loss": 0.0345,
      "step": 640900
    },
    {
      "epoch": 1.0488796370848963,
      "grad_norm": 0.719556987285614,
      "learning_rate": 7.954310339147223e-06,
      "loss": 0.0235,
      "step": 640920
    },
    {
      "epoch": 1.0489123675235494,
      "grad_norm": 0.43362554907798767,
      "learning_rate": 7.954244446933707e-06,
      "loss": 0.0216,
      "step": 640940
    },
    {
      "epoch": 1.0489450979622028,
      "grad_norm": 0.32041794061660767,
      "learning_rate": 7.954178554720189e-06,
      "loss": 0.0149,
      "step": 640960
    },
    {
      "epoch": 1.0489778284008562,
      "grad_norm": 1.0416606664657593,
      "learning_rate": 7.954112662506672e-06,
      "loss": 0.0239,
      "step": 640980
    },
    {
      "epoch": 1.0490105588395096,
      "grad_norm": 0.5017803907394409,
      "learning_rate": 7.954046770293156e-06,
      "loss": 0.0202,
      "step": 641000
    },
    {
      "epoch": 1.049043289278163,
      "grad_norm": 0.6452468633651733,
      "learning_rate": 7.953980878079638e-06,
      "loss": 0.0368,
      "step": 641020
    },
    {
      "epoch": 1.0490760197168163,
      "grad_norm": 0.7947470545768738,
      "learning_rate": 7.953914985866121e-06,
      "loss": 0.0302,
      "step": 641040
    },
    {
      "epoch": 1.0491087501554697,
      "grad_norm": 1.2769250869750977,
      "learning_rate": 7.953849093652603e-06,
      "loss": 0.0234,
      "step": 641060
    },
    {
      "epoch": 1.0491414805941228,
      "grad_norm": 0.21436519920825958,
      "learning_rate": 7.953783201439087e-06,
      "loss": 0.0188,
      "step": 641080
    },
    {
      "epoch": 1.0491742110327762,
      "grad_norm": 0.5956847667694092,
      "learning_rate": 7.953717309225569e-06,
      "loss": 0.0224,
      "step": 641100
    },
    {
      "epoch": 1.0492069414714296,
      "grad_norm": 0.2637826204299927,
      "learning_rate": 7.953651417012052e-06,
      "loss": 0.0247,
      "step": 641120
    },
    {
      "epoch": 1.049239671910083,
      "grad_norm": 0.6013228297233582,
      "learning_rate": 7.953585524798534e-06,
      "loss": 0.0182,
      "step": 641140
    },
    {
      "epoch": 1.0492724023487363,
      "grad_norm": 0.9125030636787415,
      "learning_rate": 7.953519632585018e-06,
      "loss": 0.0228,
      "step": 641160
    },
    {
      "epoch": 1.0493051327873897,
      "grad_norm": 0.7289138436317444,
      "learning_rate": 7.9534537403715e-06,
      "loss": 0.0161,
      "step": 641180
    },
    {
      "epoch": 1.049337863226043,
      "grad_norm": 0.34768277406692505,
      "learning_rate": 7.953387848157983e-06,
      "loss": 0.0228,
      "step": 641200
    },
    {
      "epoch": 1.0493705936646962,
      "grad_norm": 0.8363901972770691,
      "learning_rate": 7.953321955944467e-06,
      "loss": 0.018,
      "step": 641220
    },
    {
      "epoch": 1.0494033241033496,
      "grad_norm": 0.626285970211029,
      "learning_rate": 7.953256063730949e-06,
      "loss": 0.0209,
      "step": 641240
    },
    {
      "epoch": 1.049436054542003,
      "grad_norm": 0.22601546347141266,
      "learning_rate": 7.953190171517432e-06,
      "loss": 0.0279,
      "step": 641260
    },
    {
      "epoch": 1.0494687849806563,
      "grad_norm": 1.006406545639038,
      "learning_rate": 7.953124279303916e-06,
      "loss": 0.0221,
      "step": 641280
    },
    {
      "epoch": 1.0495015154193097,
      "grad_norm": 0.7039635181427002,
      "learning_rate": 7.953058387090398e-06,
      "loss": 0.0187,
      "step": 641300
    },
    {
      "epoch": 1.049534245857963,
      "grad_norm": 0.8058484196662903,
      "learning_rate": 7.952992494876882e-06,
      "loss": 0.0149,
      "step": 641320
    },
    {
      "epoch": 1.0495669762966164,
      "grad_norm": 0.45426395535469055,
      "learning_rate": 7.952926602663365e-06,
      "loss": 0.0208,
      "step": 641340
    },
    {
      "epoch": 1.0495997067352696,
      "grad_norm": 0.20429742336273193,
      "learning_rate": 7.952860710449847e-06,
      "loss": 0.0253,
      "step": 641360
    },
    {
      "epoch": 1.049632437173923,
      "grad_norm": 0.2591499984264374,
      "learning_rate": 7.95279481823633e-06,
      "loss": 0.0213,
      "step": 641380
    },
    {
      "epoch": 1.0496651676125763,
      "grad_norm": 0.6175360083580017,
      "learning_rate": 7.952728926022813e-06,
      "loss": 0.0221,
      "step": 641400
    },
    {
      "epoch": 1.0496978980512297,
      "grad_norm": 0.8821512460708618,
      "learning_rate": 7.952663033809296e-06,
      "loss": 0.0199,
      "step": 641420
    },
    {
      "epoch": 1.049730628489883,
      "grad_norm": 1.6982909440994263,
      "learning_rate": 7.952597141595778e-06,
      "loss": 0.0257,
      "step": 641440
    },
    {
      "epoch": 1.0497633589285364,
      "grad_norm": 0.38845953345298767,
      "learning_rate": 7.952531249382262e-06,
      "loss": 0.0312,
      "step": 641460
    },
    {
      "epoch": 1.0497960893671896,
      "grad_norm": 1.2983198165893555,
      "learning_rate": 7.952465357168743e-06,
      "loss": 0.0315,
      "step": 641480
    },
    {
      "epoch": 1.049828819805843,
      "grad_norm": 1.5164507627487183,
      "learning_rate": 7.952399464955227e-06,
      "loss": 0.0258,
      "step": 641500
    },
    {
      "epoch": 1.0498615502444963,
      "grad_norm": 0.7198176980018616,
      "learning_rate": 7.952333572741709e-06,
      "loss": 0.0236,
      "step": 641520
    },
    {
      "epoch": 1.0498942806831497,
      "grad_norm": 0.5115192532539368,
      "learning_rate": 7.952267680528193e-06,
      "loss": 0.0201,
      "step": 641540
    },
    {
      "epoch": 1.049927011121803,
      "grad_norm": 0.5808590650558472,
      "learning_rate": 7.952201788314674e-06,
      "loss": 0.0225,
      "step": 641560
    },
    {
      "epoch": 1.0499597415604565,
      "grad_norm": 0.8627385497093201,
      "learning_rate": 7.952135896101158e-06,
      "loss": 0.0252,
      "step": 641580
    },
    {
      "epoch": 1.0499924719991098,
      "grad_norm": 0.47453299164772034,
      "learning_rate": 7.952070003887642e-06,
      "loss": 0.0208,
      "step": 641600
    },
    {
      "epoch": 1.050025202437763,
      "grad_norm": 0.9121682047843933,
      "learning_rate": 7.952004111674123e-06,
      "loss": 0.0316,
      "step": 641620
    },
    {
      "epoch": 1.0500579328764164,
      "grad_norm": 0.3833578824996948,
      "learning_rate": 7.951938219460607e-06,
      "loss": 0.0243,
      "step": 641640
    },
    {
      "epoch": 1.0500906633150697,
      "grad_norm": 1.5889476537704468,
      "learning_rate": 7.95187232724709e-06,
      "loss": 0.0226,
      "step": 641660
    },
    {
      "epoch": 1.050123393753723,
      "grad_norm": 0.9763363003730774,
      "learning_rate": 7.951806435033573e-06,
      "loss": 0.0269,
      "step": 641680
    },
    {
      "epoch": 1.0501561241923765,
      "grad_norm": 0.9500167965888977,
      "learning_rate": 7.951740542820056e-06,
      "loss": 0.0204,
      "step": 641700
    },
    {
      "epoch": 1.0501888546310298,
      "grad_norm": 2.213005542755127,
      "learning_rate": 7.95167465060654e-06,
      "loss": 0.0224,
      "step": 641720
    },
    {
      "epoch": 1.050221585069683,
      "grad_norm": 0.8336559534072876,
      "learning_rate": 7.951608758393022e-06,
      "loss": 0.0236,
      "step": 641740
    },
    {
      "epoch": 1.0502543155083364,
      "grad_norm": 0.8295921683311462,
      "learning_rate": 7.951542866179505e-06,
      "loss": 0.0282,
      "step": 641760
    },
    {
      "epoch": 1.0502870459469897,
      "grad_norm": 0.49630382657051086,
      "learning_rate": 7.951476973965987e-06,
      "loss": 0.0248,
      "step": 641780
    },
    {
      "epoch": 1.0503197763856431,
      "grad_norm": 3.142932891845703,
      "learning_rate": 7.95141108175247e-06,
      "loss": 0.023,
      "step": 641800
    },
    {
      "epoch": 1.0503525068242965,
      "grad_norm": 0.5213487148284912,
      "learning_rate": 7.951345189538953e-06,
      "loss": 0.0321,
      "step": 641820
    },
    {
      "epoch": 1.0503852372629499,
      "grad_norm": 0.6524885296821594,
      "learning_rate": 7.951279297325436e-06,
      "loss": 0.0217,
      "step": 641840
    },
    {
      "epoch": 1.0504179677016032,
      "grad_norm": 0.4155158996582031,
      "learning_rate": 7.951213405111918e-06,
      "loss": 0.0155,
      "step": 641860
    },
    {
      "epoch": 1.0504506981402564,
      "grad_norm": 1.45876944065094,
      "learning_rate": 7.951147512898402e-06,
      "loss": 0.0268,
      "step": 641880
    },
    {
      "epoch": 1.0504834285789098,
      "grad_norm": 0.5222590565681458,
      "learning_rate": 7.951081620684884e-06,
      "loss": 0.0238,
      "step": 641900
    },
    {
      "epoch": 1.0505161590175631,
      "grad_norm": 0.30003073811531067,
      "learning_rate": 7.951015728471367e-06,
      "loss": 0.0219,
      "step": 641920
    },
    {
      "epoch": 1.0505488894562165,
      "grad_norm": 0.7442569732666016,
      "learning_rate": 7.950949836257849e-06,
      "loss": 0.0277,
      "step": 641940
    },
    {
      "epoch": 1.0505816198948699,
      "grad_norm": 0.32095879316329956,
      "learning_rate": 7.950883944044333e-06,
      "loss": 0.0176,
      "step": 641960
    },
    {
      "epoch": 1.0506143503335232,
      "grad_norm": 0.6077907085418701,
      "learning_rate": 7.950818051830815e-06,
      "loss": 0.0305,
      "step": 641980
    },
    {
      "epoch": 1.0506470807721766,
      "grad_norm": 0.7364236116409302,
      "learning_rate": 7.950752159617298e-06,
      "loss": 0.023,
      "step": 642000
    },
    {
      "epoch": 1.0506798112108298,
      "grad_norm": 0.6193969249725342,
      "learning_rate": 7.950686267403782e-06,
      "loss": 0.027,
      "step": 642020
    },
    {
      "epoch": 1.0507125416494831,
      "grad_norm": 0.5374811887741089,
      "learning_rate": 7.950620375190265e-06,
      "loss": 0.0245,
      "step": 642040
    },
    {
      "epoch": 1.0507452720881365,
      "grad_norm": 0.299063116312027,
      "learning_rate": 7.950554482976747e-06,
      "loss": 0.0319,
      "step": 642060
    },
    {
      "epoch": 1.0507780025267899,
      "grad_norm": 1.2424572706222534,
      "learning_rate": 7.95048859076323e-06,
      "loss": 0.0325,
      "step": 642080
    },
    {
      "epoch": 1.0508107329654433,
      "grad_norm": 1.2938075065612793,
      "learning_rate": 7.950422698549714e-06,
      "loss": 0.0281,
      "step": 642100
    },
    {
      "epoch": 1.0508434634040966,
      "grad_norm": 0.7017377018928528,
      "learning_rate": 7.950356806336196e-06,
      "loss": 0.0217,
      "step": 642120
    },
    {
      "epoch": 1.05087619384275,
      "grad_norm": 0.226319819688797,
      "learning_rate": 7.95029091412268e-06,
      "loss": 0.0276,
      "step": 642140
    },
    {
      "epoch": 1.0509089242814031,
      "grad_norm": 0.22216640412807465,
      "learning_rate": 7.950225021909162e-06,
      "loss": 0.0278,
      "step": 642160
    },
    {
      "epoch": 1.0509416547200565,
      "grad_norm": 1.122563123703003,
      "learning_rate": 7.950159129695645e-06,
      "loss": 0.0238,
      "step": 642180
    },
    {
      "epoch": 1.05097438515871,
      "grad_norm": 1.1562458276748657,
      "learning_rate": 7.950093237482127e-06,
      "loss": 0.0313,
      "step": 642200
    },
    {
      "epoch": 1.0510071155973633,
      "grad_norm": 0.19081777334213257,
      "learning_rate": 7.95002734526861e-06,
      "loss": 0.0199,
      "step": 642220
    },
    {
      "epoch": 1.0510398460360166,
      "grad_norm": 0.30505046248435974,
      "learning_rate": 7.949961453055093e-06,
      "loss": 0.026,
      "step": 642240
    },
    {
      "epoch": 1.05107257647467,
      "grad_norm": 2.1642720699310303,
      "learning_rate": 7.949895560841576e-06,
      "loss": 0.0256,
      "step": 642260
    },
    {
      "epoch": 1.0511053069133232,
      "grad_norm": 0.7778229713439941,
      "learning_rate": 7.949829668628058e-06,
      "loss": 0.0203,
      "step": 642280
    },
    {
      "epoch": 1.0511380373519765,
      "grad_norm": 1.1974321603775024,
      "learning_rate": 7.949763776414542e-06,
      "loss": 0.0251,
      "step": 642300
    },
    {
      "epoch": 1.05117076779063,
      "grad_norm": 1.207188606262207,
      "learning_rate": 7.949697884201024e-06,
      "loss": 0.0244,
      "step": 642320
    },
    {
      "epoch": 1.0512034982292833,
      "grad_norm": 0.9394305944442749,
      "learning_rate": 7.949631991987507e-06,
      "loss": 0.033,
      "step": 642340
    },
    {
      "epoch": 1.0512362286679366,
      "grad_norm": 0.45011022686958313,
      "learning_rate": 7.949566099773989e-06,
      "loss": 0.0234,
      "step": 642360
    },
    {
      "epoch": 1.05126895910659,
      "grad_norm": 1.2017678022384644,
      "learning_rate": 7.949500207560473e-06,
      "loss": 0.0286,
      "step": 642380
    },
    {
      "epoch": 1.0513016895452434,
      "grad_norm": 0.3903176486492157,
      "learning_rate": 7.949434315346956e-06,
      "loss": 0.0233,
      "step": 642400
    },
    {
      "epoch": 1.0513344199838965,
      "grad_norm": 0.9887271523475647,
      "learning_rate": 7.949368423133438e-06,
      "loss": 0.0214,
      "step": 642420
    },
    {
      "epoch": 1.05136715042255,
      "grad_norm": 0.8470895290374756,
      "learning_rate": 7.949302530919922e-06,
      "loss": 0.023,
      "step": 642440
    },
    {
      "epoch": 1.0513998808612033,
      "grad_norm": 13.422719955444336,
      "learning_rate": 7.949236638706405e-06,
      "loss": 0.0318,
      "step": 642460
    },
    {
      "epoch": 1.0514326112998567,
      "grad_norm": 0.7345203757286072,
      "learning_rate": 7.949170746492887e-06,
      "loss": 0.0226,
      "step": 642480
    },
    {
      "epoch": 1.05146534173851,
      "grad_norm": 0.4086010754108429,
      "learning_rate": 7.94910485427937e-06,
      "loss": 0.0201,
      "step": 642500
    },
    {
      "epoch": 1.0514980721771634,
      "grad_norm": 1.6133943796157837,
      "learning_rate": 7.949038962065854e-06,
      "loss": 0.0236,
      "step": 642520
    },
    {
      "epoch": 1.0515308026158166,
      "grad_norm": 0.5970011353492737,
      "learning_rate": 7.948973069852336e-06,
      "loss": 0.0232,
      "step": 642540
    },
    {
      "epoch": 1.05156353305447,
      "grad_norm": 1.0456315279006958,
      "learning_rate": 7.94890717763882e-06,
      "loss": 0.0139,
      "step": 642560
    },
    {
      "epoch": 1.0515962634931233,
      "grad_norm": 0.3249795138835907,
      "learning_rate": 7.948841285425302e-06,
      "loss": 0.0242,
      "step": 642580
    },
    {
      "epoch": 1.0516289939317767,
      "grad_norm": 3.2345786094665527,
      "learning_rate": 7.948775393211785e-06,
      "loss": 0.0301,
      "step": 642600
    },
    {
      "epoch": 1.05166172437043,
      "grad_norm": 0.37058186531066895,
      "learning_rate": 7.948709500998267e-06,
      "loss": 0.0281,
      "step": 642620
    },
    {
      "epoch": 1.0516944548090834,
      "grad_norm": 4.985763072967529,
      "learning_rate": 7.94864360878475e-06,
      "loss": 0.0273,
      "step": 642640
    },
    {
      "epoch": 1.0517271852477368,
      "grad_norm": 0.5703983306884766,
      "learning_rate": 7.948577716571233e-06,
      "loss": 0.0304,
      "step": 642660
    },
    {
      "epoch": 1.05175991568639,
      "grad_norm": 0.6649165749549866,
      "learning_rate": 7.948511824357716e-06,
      "loss": 0.028,
      "step": 642680
    },
    {
      "epoch": 1.0517926461250433,
      "grad_norm": 1.569889783859253,
      "learning_rate": 7.948445932144198e-06,
      "loss": 0.0162,
      "step": 642700
    },
    {
      "epoch": 1.0518253765636967,
      "grad_norm": 1.320889949798584,
      "learning_rate": 7.948380039930682e-06,
      "loss": 0.0308,
      "step": 642720
    },
    {
      "epoch": 1.05185810700235,
      "grad_norm": 0.9726975560188293,
      "learning_rate": 7.948314147717164e-06,
      "loss": 0.0301,
      "step": 642740
    },
    {
      "epoch": 1.0518908374410034,
      "grad_norm": 0.6559373140335083,
      "learning_rate": 7.948248255503647e-06,
      "loss": 0.0223,
      "step": 642760
    },
    {
      "epoch": 1.0519235678796568,
      "grad_norm": 0.6490591168403625,
      "learning_rate": 7.948182363290131e-06,
      "loss": 0.0218,
      "step": 642780
    },
    {
      "epoch": 1.0519562983183102,
      "grad_norm": 1.1990034580230713,
      "learning_rate": 7.948116471076613e-06,
      "loss": 0.0209,
      "step": 642800
    },
    {
      "epoch": 1.0519890287569633,
      "grad_norm": 8.14806842803955,
      "learning_rate": 7.948050578863096e-06,
      "loss": 0.0204,
      "step": 642820
    },
    {
      "epoch": 1.0520217591956167,
      "grad_norm": 0.3052324950695038,
      "learning_rate": 7.94798468664958e-06,
      "loss": 0.0142,
      "step": 642840
    },
    {
      "epoch": 1.05205448963427,
      "grad_norm": 1.2136045694351196,
      "learning_rate": 7.947918794436062e-06,
      "loss": 0.0291,
      "step": 642860
    },
    {
      "epoch": 1.0520872200729234,
      "grad_norm": 0.4370061159133911,
      "learning_rate": 7.947852902222545e-06,
      "loss": 0.0203,
      "step": 642880
    },
    {
      "epoch": 1.0521199505115768,
      "grad_norm": 0.43364688754081726,
      "learning_rate": 7.947787010009029e-06,
      "loss": 0.0214,
      "step": 642900
    },
    {
      "epoch": 1.0521526809502302,
      "grad_norm": 0.8074666261672974,
      "learning_rate": 7.947721117795511e-06,
      "loss": 0.0159,
      "step": 642920
    },
    {
      "epoch": 1.0521854113888833,
      "grad_norm": 1.8623961210250854,
      "learning_rate": 7.947655225581994e-06,
      "loss": 0.0216,
      "step": 642940
    },
    {
      "epoch": 1.0522181418275367,
      "grad_norm": 0.3808854818344116,
      "learning_rate": 7.947589333368476e-06,
      "loss": 0.0277,
      "step": 642960
    },
    {
      "epoch": 1.05225087226619,
      "grad_norm": 0.6480966806411743,
      "learning_rate": 7.94752344115496e-06,
      "loss": 0.0278,
      "step": 642980
    },
    {
      "epoch": 1.0522836027048434,
      "grad_norm": 0.3743284344673157,
      "learning_rate": 7.947457548941442e-06,
      "loss": 0.0242,
      "step": 643000
    },
    {
      "epoch": 1.0523163331434968,
      "grad_norm": 1.1382534503936768,
      "learning_rate": 7.947391656727925e-06,
      "loss": 0.0334,
      "step": 643020
    },
    {
      "epoch": 1.0523490635821502,
      "grad_norm": 0.9097972512245178,
      "learning_rate": 7.947325764514407e-06,
      "loss": 0.0222,
      "step": 643040
    },
    {
      "epoch": 1.0523817940208036,
      "grad_norm": 0.5869849920272827,
      "learning_rate": 7.947259872300891e-06,
      "loss": 0.0307,
      "step": 643060
    },
    {
      "epoch": 1.0524145244594567,
      "grad_norm": 0.7291895747184753,
      "learning_rate": 7.947193980087373e-06,
      "loss": 0.0188,
      "step": 643080
    },
    {
      "epoch": 1.05244725489811,
      "grad_norm": 0.46454015374183655,
      "learning_rate": 7.947128087873856e-06,
      "loss": 0.0245,
      "step": 643100
    },
    {
      "epoch": 1.0524799853367635,
      "grad_norm": 0.41508495807647705,
      "learning_rate": 7.94706219566034e-06,
      "loss": 0.0277,
      "step": 643120
    },
    {
      "epoch": 1.0525127157754168,
      "grad_norm": 0.8290354013442993,
      "learning_rate": 7.946996303446822e-06,
      "loss": 0.0133,
      "step": 643140
    },
    {
      "epoch": 1.0525454462140702,
      "grad_norm": 2.333848714828491,
      "learning_rate": 7.946930411233305e-06,
      "loss": 0.019,
      "step": 643160
    },
    {
      "epoch": 1.0525781766527236,
      "grad_norm": 0.2070131152868271,
      "learning_rate": 7.946864519019787e-06,
      "loss": 0.0179,
      "step": 643180
    },
    {
      "epoch": 1.052610907091377,
      "grad_norm": 0.20510609447956085,
      "learning_rate": 7.946798626806271e-06,
      "loss": 0.0243,
      "step": 643200
    },
    {
      "epoch": 1.05264363753003,
      "grad_norm": 0.15328866243362427,
      "learning_rate": 7.946732734592753e-06,
      "loss": 0.0192,
      "step": 643220
    },
    {
      "epoch": 1.0526763679686835,
      "grad_norm": 0.06572393327951431,
      "learning_rate": 7.946666842379236e-06,
      "loss": 0.0271,
      "step": 643240
    },
    {
      "epoch": 1.0527090984073368,
      "grad_norm": 1.0659812688827515,
      "learning_rate": 7.94660095016572e-06,
      "loss": 0.0262,
      "step": 643260
    },
    {
      "epoch": 1.0527418288459902,
      "grad_norm": 1.3279285430908203,
      "learning_rate": 7.946535057952202e-06,
      "loss": 0.0229,
      "step": 643280
    },
    {
      "epoch": 1.0527745592846436,
      "grad_norm": 1.4305697679519653,
      "learning_rate": 7.946469165738685e-06,
      "loss": 0.0265,
      "step": 643300
    },
    {
      "epoch": 1.052807289723297,
      "grad_norm": 0.41341498494148254,
      "learning_rate": 7.946403273525169e-06,
      "loss": 0.0272,
      "step": 643320
    },
    {
      "epoch": 1.05284002016195,
      "grad_norm": 1.01863431930542,
      "learning_rate": 7.946337381311651e-06,
      "loss": 0.0232,
      "step": 643340
    },
    {
      "epoch": 1.0528727506006035,
      "grad_norm": 1.0129164457321167,
      "learning_rate": 7.946271489098135e-06,
      "loss": 0.0276,
      "step": 643360
    },
    {
      "epoch": 1.0529054810392569,
      "grad_norm": 0.7255627512931824,
      "learning_rate": 7.946205596884616e-06,
      "loss": 0.0212,
      "step": 643380
    },
    {
      "epoch": 1.0529382114779102,
      "grad_norm": 0.6791185736656189,
      "learning_rate": 7.9461397046711e-06,
      "loss": 0.0211,
      "step": 643400
    },
    {
      "epoch": 1.0529709419165636,
      "grad_norm": 0.15130075812339783,
      "learning_rate": 7.946073812457582e-06,
      "loss": 0.0243,
      "step": 643420
    },
    {
      "epoch": 1.053003672355217,
      "grad_norm": 1.1144990921020508,
      "learning_rate": 7.946007920244066e-06,
      "loss": 0.0264,
      "step": 643440
    },
    {
      "epoch": 1.0530364027938703,
      "grad_norm": 0.7200478315353394,
      "learning_rate": 7.945942028030549e-06,
      "loss": 0.0221,
      "step": 643460
    },
    {
      "epoch": 1.0530691332325235,
      "grad_norm": 0.3831694424152374,
      "learning_rate": 7.945876135817031e-06,
      "loss": 0.0253,
      "step": 643480
    },
    {
      "epoch": 1.0531018636711769,
      "grad_norm": 0.3017975687980652,
      "learning_rate": 7.945810243603515e-06,
      "loss": 0.0245,
      "step": 643500
    },
    {
      "epoch": 1.0531345941098302,
      "grad_norm": 1.203028917312622,
      "learning_rate": 7.945744351389996e-06,
      "loss": 0.0283,
      "step": 643520
    },
    {
      "epoch": 1.0531673245484836,
      "grad_norm": 0.6871848106384277,
      "learning_rate": 7.94567845917648e-06,
      "loss": 0.0233,
      "step": 643540
    },
    {
      "epoch": 1.053200054987137,
      "grad_norm": 1.0631972551345825,
      "learning_rate": 7.945612566962962e-06,
      "loss": 0.0231,
      "step": 643560
    },
    {
      "epoch": 1.0532327854257904,
      "grad_norm": 0.6994937658309937,
      "learning_rate": 7.945546674749446e-06,
      "loss": 0.0367,
      "step": 643580
    },
    {
      "epoch": 1.0532655158644437,
      "grad_norm": 0.964267909526825,
      "learning_rate": 7.945480782535927e-06,
      "loss": 0.0224,
      "step": 643600
    },
    {
      "epoch": 1.0532982463030969,
      "grad_norm": 1.030067801475525,
      "learning_rate": 7.945414890322411e-06,
      "loss": 0.0242,
      "step": 643620
    },
    {
      "epoch": 1.0533309767417502,
      "grad_norm": 0.27244678139686584,
      "learning_rate": 7.945348998108895e-06,
      "loss": 0.0285,
      "step": 643640
    },
    {
      "epoch": 1.0533637071804036,
      "grad_norm": 1.1757524013519287,
      "learning_rate": 7.945283105895376e-06,
      "loss": 0.0186,
      "step": 643660
    },
    {
      "epoch": 1.053396437619057,
      "grad_norm": 1.0325027704238892,
      "learning_rate": 7.94521721368186e-06,
      "loss": 0.0234,
      "step": 643680
    },
    {
      "epoch": 1.0534291680577104,
      "grad_norm": 0.5844066739082336,
      "learning_rate": 7.945151321468344e-06,
      "loss": 0.0239,
      "step": 643700
    },
    {
      "epoch": 1.0534618984963637,
      "grad_norm": 0.6688163876533508,
      "learning_rate": 7.945085429254826e-06,
      "loss": 0.0238,
      "step": 643720
    },
    {
      "epoch": 1.0534946289350169,
      "grad_norm": 0.14380142092704773,
      "learning_rate": 7.945019537041309e-06,
      "loss": 0.0247,
      "step": 643740
    },
    {
      "epoch": 1.0535273593736703,
      "grad_norm": 0.6278215646743774,
      "learning_rate": 7.944953644827791e-06,
      "loss": 0.0276,
      "step": 643760
    },
    {
      "epoch": 1.0535600898123236,
      "grad_norm": 0.5173618197441101,
      "learning_rate": 7.944887752614275e-06,
      "loss": 0.0257,
      "step": 643780
    },
    {
      "epoch": 1.053592820250977,
      "grad_norm": 0.18804773688316345,
      "learning_rate": 7.944821860400758e-06,
      "loss": 0.0254,
      "step": 643800
    },
    {
      "epoch": 1.0536255506896304,
      "grad_norm": 2.3786461353302,
      "learning_rate": 7.94475596818724e-06,
      "loss": 0.0292,
      "step": 643820
    },
    {
      "epoch": 1.0536582811282837,
      "grad_norm": 0.2604256570339203,
      "learning_rate": 7.944690075973724e-06,
      "loss": 0.0181,
      "step": 643840
    },
    {
      "epoch": 1.0536910115669371,
      "grad_norm": 0.45829135179519653,
      "learning_rate": 7.944624183760206e-06,
      "loss": 0.0321,
      "step": 643860
    },
    {
      "epoch": 1.0537237420055903,
      "grad_norm": 1.1554793119430542,
      "learning_rate": 7.94455829154669e-06,
      "loss": 0.023,
      "step": 643880
    },
    {
      "epoch": 1.0537564724442436,
      "grad_norm": 0.4245213270187378,
      "learning_rate": 7.944492399333171e-06,
      "loss": 0.0208,
      "step": 643900
    },
    {
      "epoch": 1.053789202882897,
      "grad_norm": 0.6757422089576721,
      "learning_rate": 7.944426507119655e-06,
      "loss": 0.0325,
      "step": 643920
    },
    {
      "epoch": 1.0538219333215504,
      "grad_norm": 1.7089803218841553,
      "learning_rate": 7.944360614906137e-06,
      "loss": 0.027,
      "step": 643940
    },
    {
      "epoch": 1.0538546637602038,
      "grad_norm": 0.8053734302520752,
      "learning_rate": 7.94429472269262e-06,
      "loss": 0.0187,
      "step": 643960
    },
    {
      "epoch": 1.0538873941988571,
      "grad_norm": 1.2158915996551514,
      "learning_rate": 7.944228830479102e-06,
      "loss": 0.0238,
      "step": 643980
    },
    {
      "epoch": 1.0539201246375103,
      "grad_norm": 0.2450798749923706,
      "learning_rate": 7.944162938265586e-06,
      "loss": 0.0322,
      "step": 644000
    },
    {
      "epoch": 1.0539528550761637,
      "grad_norm": 0.7158199548721313,
      "learning_rate": 7.944097046052068e-06,
      "loss": 0.0387,
      "step": 644020
    },
    {
      "epoch": 1.053985585514817,
      "grad_norm": 0.853987455368042,
      "learning_rate": 7.944031153838551e-06,
      "loss": 0.0279,
      "step": 644040
    },
    {
      "epoch": 1.0540183159534704,
      "grad_norm": 0.3466523587703705,
      "learning_rate": 7.943965261625035e-06,
      "loss": 0.0163,
      "step": 644060
    },
    {
      "epoch": 1.0540510463921238,
      "grad_norm": 0.5027747750282288,
      "learning_rate": 7.943899369411517e-06,
      "loss": 0.0248,
      "step": 644080
    },
    {
      "epoch": 1.0540837768307771,
      "grad_norm": 0.3089873492717743,
      "learning_rate": 7.943833477198e-06,
      "loss": 0.0247,
      "step": 644100
    },
    {
      "epoch": 1.0541165072694305,
      "grad_norm": 0.3370104730129242,
      "learning_rate": 7.943767584984484e-06,
      "loss": 0.0146,
      "step": 644120
    },
    {
      "epoch": 1.0541492377080837,
      "grad_norm": 0.3002457022666931,
      "learning_rate": 7.943701692770966e-06,
      "loss": 0.0207,
      "step": 644140
    },
    {
      "epoch": 1.054181968146737,
      "grad_norm": 1.0834460258483887,
      "learning_rate": 7.94363580055745e-06,
      "loss": 0.0276,
      "step": 644160
    },
    {
      "epoch": 1.0542146985853904,
      "grad_norm": 1.1480534076690674,
      "learning_rate": 7.943569908343933e-06,
      "loss": 0.029,
      "step": 644180
    },
    {
      "epoch": 1.0542474290240438,
      "grad_norm": 0.991206169128418,
      "learning_rate": 7.943504016130415e-06,
      "loss": 0.0171,
      "step": 644200
    },
    {
      "epoch": 1.0542801594626972,
      "grad_norm": 1.229933738708496,
      "learning_rate": 7.943438123916898e-06,
      "loss": 0.0297,
      "step": 644220
    },
    {
      "epoch": 1.0543128899013505,
      "grad_norm": 0.11343541741371155,
      "learning_rate": 7.94337223170338e-06,
      "loss": 0.029,
      "step": 644240
    },
    {
      "epoch": 1.054345620340004,
      "grad_norm": 0.589989423751831,
      "learning_rate": 7.943306339489864e-06,
      "loss": 0.0285,
      "step": 644260
    },
    {
      "epoch": 1.054378350778657,
      "grad_norm": 0.5940768718719482,
      "learning_rate": 7.943240447276346e-06,
      "loss": 0.0305,
      "step": 644280
    },
    {
      "epoch": 1.0544110812173104,
      "grad_norm": 1.4465723037719727,
      "learning_rate": 7.94317455506283e-06,
      "loss": 0.0233,
      "step": 644300
    },
    {
      "epoch": 1.0544438116559638,
      "grad_norm": 0.07910693436861038,
      "learning_rate": 7.943108662849311e-06,
      "loss": 0.0244,
      "step": 644320
    },
    {
      "epoch": 1.0544765420946172,
      "grad_norm": 0.8454017043113708,
      "learning_rate": 7.943042770635795e-06,
      "loss": 0.0229,
      "step": 644340
    },
    {
      "epoch": 1.0545092725332705,
      "grad_norm": 1.4041526317596436,
      "learning_rate": 7.942976878422277e-06,
      "loss": 0.0216,
      "step": 644360
    },
    {
      "epoch": 1.054542002971924,
      "grad_norm": 0.6761237382888794,
      "learning_rate": 7.94291098620876e-06,
      "loss": 0.0221,
      "step": 644380
    },
    {
      "epoch": 1.0545747334105773,
      "grad_norm": 0.9489110112190247,
      "learning_rate": 7.942845093995242e-06,
      "loss": 0.0233,
      "step": 644400
    },
    {
      "epoch": 1.0546074638492304,
      "grad_norm": 0.22001959383487701,
      "learning_rate": 7.942779201781726e-06,
      "loss": 0.022,
      "step": 644420
    },
    {
      "epoch": 1.0546401942878838,
      "grad_norm": 0.7295860648155212,
      "learning_rate": 7.94271330956821e-06,
      "loss": 0.0222,
      "step": 644440
    },
    {
      "epoch": 1.0546729247265372,
      "grad_norm": 0.8402767777442932,
      "learning_rate": 7.942647417354691e-06,
      "loss": 0.0258,
      "step": 644460
    },
    {
      "epoch": 1.0547056551651905,
      "grad_norm": 1.1623271703720093,
      "learning_rate": 7.942581525141175e-06,
      "loss": 0.0298,
      "step": 644480
    },
    {
      "epoch": 1.054738385603844,
      "grad_norm": 1.6150840520858765,
      "learning_rate": 7.942515632927658e-06,
      "loss": 0.0214,
      "step": 644500
    },
    {
      "epoch": 1.0547711160424973,
      "grad_norm": 0.44084131717681885,
      "learning_rate": 7.94244974071414e-06,
      "loss": 0.0327,
      "step": 644520
    },
    {
      "epoch": 1.0548038464811504,
      "grad_norm": 1.1214417219161987,
      "learning_rate": 7.942383848500624e-06,
      "loss": 0.0157,
      "step": 644540
    },
    {
      "epoch": 1.0548365769198038,
      "grad_norm": 0.7872112393379211,
      "learning_rate": 7.942317956287107e-06,
      "loss": 0.0301,
      "step": 644560
    },
    {
      "epoch": 1.0548693073584572,
      "grad_norm": 0.8906666040420532,
      "learning_rate": 7.94225206407359e-06,
      "loss": 0.0231,
      "step": 644580
    },
    {
      "epoch": 1.0549020377971106,
      "grad_norm": 0.2770046889781952,
      "learning_rate": 7.942186171860073e-06,
      "loss": 0.0274,
      "step": 644600
    },
    {
      "epoch": 1.054934768235764,
      "grad_norm": 0.9464901685714722,
      "learning_rate": 7.942120279646555e-06,
      "loss": 0.032,
      "step": 644620
    },
    {
      "epoch": 1.0549674986744173,
      "grad_norm": 0.15378573536872864,
      "learning_rate": 7.942054387433038e-06,
      "loss": 0.0196,
      "step": 644640
    },
    {
      "epoch": 1.0550002291130707,
      "grad_norm": 1.1247351169586182,
      "learning_rate": 7.94198849521952e-06,
      "loss": 0.0216,
      "step": 644660
    },
    {
      "epoch": 1.0550329595517238,
      "grad_norm": 0.8825138807296753,
      "learning_rate": 7.941922603006004e-06,
      "loss": 0.0295,
      "step": 644680
    },
    {
      "epoch": 1.0550656899903772,
      "grad_norm": 0.4253391921520233,
      "learning_rate": 7.941856710792486e-06,
      "loss": 0.0233,
      "step": 644700
    },
    {
      "epoch": 1.0550984204290306,
      "grad_norm": 0.09818951785564423,
      "learning_rate": 7.94179081857897e-06,
      "loss": 0.0265,
      "step": 644720
    },
    {
      "epoch": 1.055131150867684,
      "grad_norm": 0.7243660688400269,
      "learning_rate": 7.941724926365451e-06,
      "loss": 0.0331,
      "step": 644740
    },
    {
      "epoch": 1.0551638813063373,
      "grad_norm": 1.0836797952651978,
      "learning_rate": 7.941659034151935e-06,
      "loss": 0.0312,
      "step": 644760
    },
    {
      "epoch": 1.0551966117449907,
      "grad_norm": 0.46007484197616577,
      "learning_rate": 7.941593141938417e-06,
      "loss": 0.0214,
      "step": 644780
    },
    {
      "epoch": 1.0552293421836438,
      "grad_norm": 0.43087705969810486,
      "learning_rate": 7.9415272497249e-06,
      "loss": 0.0335,
      "step": 644800
    },
    {
      "epoch": 1.0552620726222972,
      "grad_norm": 0.6982930302619934,
      "learning_rate": 7.941461357511384e-06,
      "loss": 0.0277,
      "step": 644820
    },
    {
      "epoch": 1.0552948030609506,
      "grad_norm": 0.9483588933944702,
      "learning_rate": 7.941395465297866e-06,
      "loss": 0.0222,
      "step": 644840
    },
    {
      "epoch": 1.055327533499604,
      "grad_norm": 0.9346505999565125,
      "learning_rate": 7.94132957308435e-06,
      "loss": 0.0174,
      "step": 644860
    },
    {
      "epoch": 1.0553602639382573,
      "grad_norm": 2.572291612625122,
      "learning_rate": 7.941263680870833e-06,
      "loss": 0.0325,
      "step": 644880
    },
    {
      "epoch": 1.0553929943769107,
      "grad_norm": 2.217297077178955,
      "learning_rate": 7.941197788657315e-06,
      "loss": 0.0329,
      "step": 644900
    },
    {
      "epoch": 1.055425724815564,
      "grad_norm": 1.3275700807571411,
      "learning_rate": 7.941131896443798e-06,
      "loss": 0.0307,
      "step": 644920
    },
    {
      "epoch": 1.0554584552542172,
      "grad_norm": 0.6093493103981018,
      "learning_rate": 7.941066004230282e-06,
      "loss": 0.0271,
      "step": 644940
    },
    {
      "epoch": 1.0554911856928706,
      "grad_norm": 1.3753750324249268,
      "learning_rate": 7.941000112016764e-06,
      "loss": 0.0276,
      "step": 644960
    },
    {
      "epoch": 1.055523916131524,
      "grad_norm": 0.4838959276676178,
      "learning_rate": 7.940934219803247e-06,
      "loss": 0.0209,
      "step": 644980
    },
    {
      "epoch": 1.0555566465701773,
      "grad_norm": 1.1913480758666992,
      "learning_rate": 7.94086832758973e-06,
      "loss": 0.0279,
      "step": 645000
    },
    {
      "epoch": 1.0555893770088307,
      "grad_norm": 0.29331159591674805,
      "learning_rate": 7.940802435376213e-06,
      "loss": 0.0255,
      "step": 645020
    },
    {
      "epoch": 1.055622107447484,
      "grad_norm": 0.7412802577018738,
      "learning_rate": 7.940736543162695e-06,
      "loss": 0.0222,
      "step": 645040
    },
    {
      "epoch": 1.0556548378861375,
      "grad_norm": 0.26645198464393616,
      "learning_rate": 7.940670650949178e-06,
      "loss": 0.0146,
      "step": 645060
    },
    {
      "epoch": 1.0556875683247906,
      "grad_norm": 2.4566383361816406,
      "learning_rate": 7.94060475873566e-06,
      "loss": 0.0307,
      "step": 645080
    },
    {
      "epoch": 1.055720298763444,
      "grad_norm": 0.6607604026794434,
      "learning_rate": 7.940538866522144e-06,
      "loss": 0.0196,
      "step": 645100
    },
    {
      "epoch": 1.0557530292020973,
      "grad_norm": 0.476082980632782,
      "learning_rate": 7.940472974308626e-06,
      "loss": 0.029,
      "step": 645120
    },
    {
      "epoch": 1.0557857596407507,
      "grad_norm": 0.46512705087661743,
      "learning_rate": 7.94040708209511e-06,
      "loss": 0.0295,
      "step": 645140
    },
    {
      "epoch": 1.055818490079404,
      "grad_norm": 0.7450844645500183,
      "learning_rate": 7.940341189881591e-06,
      "loss": 0.0295,
      "step": 645160
    },
    {
      "epoch": 1.0558512205180575,
      "grad_norm": 0.37658634781837463,
      "learning_rate": 7.940275297668075e-06,
      "loss": 0.0197,
      "step": 645180
    },
    {
      "epoch": 1.0558839509567106,
      "grad_norm": 0.3446309566497803,
      "learning_rate": 7.940209405454557e-06,
      "loss": 0.0197,
      "step": 645200
    },
    {
      "epoch": 1.055916681395364,
      "grad_norm": 0.7043595314025879,
      "learning_rate": 7.94014351324104e-06,
      "loss": 0.0285,
      "step": 645220
    },
    {
      "epoch": 1.0559494118340174,
      "grad_norm": 0.36962053179740906,
      "learning_rate": 7.940077621027524e-06,
      "loss": 0.0224,
      "step": 645240
    },
    {
      "epoch": 1.0559821422726707,
      "grad_norm": 0.6116007566452026,
      "learning_rate": 7.940011728814006e-06,
      "loss": 0.0229,
      "step": 645260
    },
    {
      "epoch": 1.056014872711324,
      "grad_norm": 0.5191944241523743,
      "learning_rate": 7.93994583660049e-06,
      "loss": 0.0324,
      "step": 645280
    },
    {
      "epoch": 1.0560476031499775,
      "grad_norm": 0.1467069536447525,
      "learning_rate": 7.939879944386973e-06,
      "loss": 0.0159,
      "step": 645300
    },
    {
      "epoch": 1.0560803335886308,
      "grad_norm": 0.7330906391143799,
      "learning_rate": 7.939814052173455e-06,
      "loss": 0.0204,
      "step": 645320
    },
    {
      "epoch": 1.056113064027284,
      "grad_norm": 0.29835906624794006,
      "learning_rate": 7.939748159959938e-06,
      "loss": 0.0209,
      "step": 645340
    },
    {
      "epoch": 1.0561457944659374,
      "grad_norm": 0.4692497253417969,
      "learning_rate": 7.939682267746422e-06,
      "loss": 0.0163,
      "step": 645360
    },
    {
      "epoch": 1.0561785249045907,
      "grad_norm": 1.9586020708084106,
      "learning_rate": 7.939616375532904e-06,
      "loss": 0.0239,
      "step": 645380
    },
    {
      "epoch": 1.0562112553432441,
      "grad_norm": 0.4240594804286957,
      "learning_rate": 7.939550483319388e-06,
      "loss": 0.0204,
      "step": 645400
    },
    {
      "epoch": 1.0562439857818975,
      "grad_norm": 0.712841808795929,
      "learning_rate": 7.93948459110587e-06,
      "loss": 0.0277,
      "step": 645420
    },
    {
      "epoch": 1.0562767162205509,
      "grad_norm": 0.9602397680282593,
      "learning_rate": 7.939418698892353e-06,
      "loss": 0.0181,
      "step": 645440
    },
    {
      "epoch": 1.0563094466592042,
      "grad_norm": 1.3468096256256104,
      "learning_rate": 7.939352806678835e-06,
      "loss": 0.0183,
      "step": 645460
    },
    {
      "epoch": 1.0563421770978574,
      "grad_norm": 1.8900350332260132,
      "learning_rate": 7.939286914465319e-06,
      "loss": 0.0322,
      "step": 645480
    },
    {
      "epoch": 1.0563749075365108,
      "grad_norm": 0.3776545226573944,
      "learning_rate": 7.9392210222518e-06,
      "loss": 0.0192,
      "step": 645500
    },
    {
      "epoch": 1.0564076379751641,
      "grad_norm": 0.3355334401130676,
      "learning_rate": 7.939155130038284e-06,
      "loss": 0.0264,
      "step": 645520
    },
    {
      "epoch": 1.0564403684138175,
      "grad_norm": 0.28856801986694336,
      "learning_rate": 7.939089237824766e-06,
      "loss": 0.0168,
      "step": 645540
    },
    {
      "epoch": 1.0564730988524709,
      "grad_norm": 0.3539825975894928,
      "learning_rate": 7.93902334561125e-06,
      "loss": 0.0313,
      "step": 645560
    },
    {
      "epoch": 1.0565058292911242,
      "grad_norm": 1.8081467151641846,
      "learning_rate": 7.938957453397733e-06,
      "loss": 0.0229,
      "step": 645580
    },
    {
      "epoch": 1.0565385597297774,
      "grad_norm": 1.7310796976089478,
      "learning_rate": 7.938891561184215e-06,
      "loss": 0.0229,
      "step": 645600
    },
    {
      "epoch": 1.0565712901684308,
      "grad_norm": 0.48149898648262024,
      "learning_rate": 7.938825668970699e-06,
      "loss": 0.0191,
      "step": 645620
    },
    {
      "epoch": 1.0566040206070841,
      "grad_norm": 0.746838390827179,
      "learning_rate": 7.93875977675718e-06,
      "loss": 0.0248,
      "step": 645640
    },
    {
      "epoch": 1.0566367510457375,
      "grad_norm": 0.6188190579414368,
      "learning_rate": 7.938693884543664e-06,
      "loss": 0.0262,
      "step": 645660
    },
    {
      "epoch": 1.0566694814843909,
      "grad_norm": 0.3314574360847473,
      "learning_rate": 7.938627992330148e-06,
      "loss": 0.0172,
      "step": 645680
    },
    {
      "epoch": 1.0567022119230443,
      "grad_norm": 1.0134645700454712,
      "learning_rate": 7.93856210011663e-06,
      "loss": 0.0217,
      "step": 645700
    },
    {
      "epoch": 1.0567349423616976,
      "grad_norm": 0.1965438425540924,
      "learning_rate": 7.938496207903113e-06,
      "loss": 0.0209,
      "step": 645720
    },
    {
      "epoch": 1.0567676728003508,
      "grad_norm": 0.6170918345451355,
      "learning_rate": 7.938430315689597e-06,
      "loss": 0.0327,
      "step": 645740
    },
    {
      "epoch": 1.0568004032390041,
      "grad_norm": 0.7222482562065125,
      "learning_rate": 7.938364423476079e-06,
      "loss": 0.0214,
      "step": 645760
    },
    {
      "epoch": 1.0568331336776575,
      "grad_norm": 0.6850947141647339,
      "learning_rate": 7.938298531262562e-06,
      "loss": 0.025,
      "step": 645780
    },
    {
      "epoch": 1.056865864116311,
      "grad_norm": 1.1683639287948608,
      "learning_rate": 7.938232639049044e-06,
      "loss": 0.0338,
      "step": 645800
    },
    {
      "epoch": 1.0568985945549643,
      "grad_norm": 1.510485053062439,
      "learning_rate": 7.938166746835528e-06,
      "loss": 0.0273,
      "step": 645820
    },
    {
      "epoch": 1.0569313249936176,
      "grad_norm": 2.3386847972869873,
      "learning_rate": 7.93810085462201e-06,
      "loss": 0.0301,
      "step": 645840
    },
    {
      "epoch": 1.056964055432271,
      "grad_norm": 0.9181365370750427,
      "learning_rate": 7.938034962408493e-06,
      "loss": 0.0177,
      "step": 645860
    },
    {
      "epoch": 1.0569967858709242,
      "grad_norm": 0.6510272026062012,
      "learning_rate": 7.937969070194975e-06,
      "loss": 0.0251,
      "step": 645880
    },
    {
      "epoch": 1.0570295163095775,
      "grad_norm": 1.1253955364227295,
      "learning_rate": 7.937903177981459e-06,
      "loss": 0.0368,
      "step": 645900
    },
    {
      "epoch": 1.057062246748231,
      "grad_norm": 1.9592443704605103,
      "learning_rate": 7.937837285767942e-06,
      "loss": 0.0387,
      "step": 645920
    },
    {
      "epoch": 1.0570949771868843,
      "grad_norm": 0.35014769434928894,
      "learning_rate": 7.937771393554424e-06,
      "loss": 0.0238,
      "step": 645940
    },
    {
      "epoch": 1.0571277076255376,
      "grad_norm": 1.5718326568603516,
      "learning_rate": 7.937705501340908e-06,
      "loss": 0.0279,
      "step": 645960
    },
    {
      "epoch": 1.057160438064191,
      "grad_norm": 2.164700746536255,
      "learning_rate": 7.93763960912739e-06,
      "loss": 0.0299,
      "step": 645980
    },
    {
      "epoch": 1.0571931685028442,
      "grad_norm": 0.3008027672767639,
      "learning_rate": 7.937573716913873e-06,
      "loss": 0.0217,
      "step": 646000
    },
    {
      "epoch": 1.0572258989414975,
      "grad_norm": 1.4032208919525146,
      "learning_rate": 7.937507824700355e-06,
      "loss": 0.0142,
      "step": 646020
    },
    {
      "epoch": 1.057258629380151,
      "grad_norm": 0.6555770039558411,
      "learning_rate": 7.937441932486839e-06,
      "loss": 0.0183,
      "step": 646040
    },
    {
      "epoch": 1.0572913598188043,
      "grad_norm": 1.1832177639007568,
      "learning_rate": 7.93737604027332e-06,
      "loss": 0.0229,
      "step": 646060
    },
    {
      "epoch": 1.0573240902574577,
      "grad_norm": 0.25456079840660095,
      "learning_rate": 7.937310148059804e-06,
      "loss": 0.0269,
      "step": 646080
    },
    {
      "epoch": 1.057356820696111,
      "grad_norm": 0.5913582444190979,
      "learning_rate": 7.937244255846288e-06,
      "loss": 0.0323,
      "step": 646100
    },
    {
      "epoch": 1.0573895511347644,
      "grad_norm": 1.0063302516937256,
      "learning_rate": 7.93717836363277e-06,
      "loss": 0.0188,
      "step": 646120
    },
    {
      "epoch": 1.0574222815734176,
      "grad_norm": 1.3652770519256592,
      "learning_rate": 7.937112471419253e-06,
      "loss": 0.0226,
      "step": 646140
    },
    {
      "epoch": 1.057455012012071,
      "grad_norm": 0.68257075548172,
      "learning_rate": 7.937046579205737e-06,
      "loss": 0.017,
      "step": 646160
    },
    {
      "epoch": 1.0574877424507243,
      "grad_norm": 0.5253919959068298,
      "learning_rate": 7.936980686992219e-06,
      "loss": 0.0214,
      "step": 646180
    },
    {
      "epoch": 1.0575204728893777,
      "grad_norm": 0.6841594576835632,
      "learning_rate": 7.936914794778702e-06,
      "loss": 0.0259,
      "step": 646200
    },
    {
      "epoch": 1.057553203328031,
      "grad_norm": 0.09969926625490189,
      "learning_rate": 7.936848902565184e-06,
      "loss": 0.0271,
      "step": 646220
    },
    {
      "epoch": 1.0575859337666844,
      "grad_norm": 0.21962936222553253,
      "learning_rate": 7.936783010351668e-06,
      "loss": 0.0168,
      "step": 646240
    },
    {
      "epoch": 1.0576186642053376,
      "grad_norm": 1.0248719453811646,
      "learning_rate": 7.93671711813815e-06,
      "loss": 0.0183,
      "step": 646260
    },
    {
      "epoch": 1.057651394643991,
      "grad_norm": 1.2420105934143066,
      "learning_rate": 7.936651225924633e-06,
      "loss": 0.0274,
      "step": 646280
    },
    {
      "epoch": 1.0576841250826443,
      "grad_norm": 0.35773929953575134,
      "learning_rate": 7.936585333711117e-06,
      "loss": 0.0242,
      "step": 646300
    },
    {
      "epoch": 1.0577168555212977,
      "grad_norm": 0.2706080675125122,
      "learning_rate": 7.936519441497599e-06,
      "loss": 0.0186,
      "step": 646320
    },
    {
      "epoch": 1.057749585959951,
      "grad_norm": 0.49719253182411194,
      "learning_rate": 7.936453549284082e-06,
      "loss": 0.0224,
      "step": 646340
    },
    {
      "epoch": 1.0577823163986044,
      "grad_norm": 0.34818825125694275,
      "learning_rate": 7.936387657070564e-06,
      "loss": 0.0212,
      "step": 646360
    },
    {
      "epoch": 1.0578150468372578,
      "grad_norm": 0.624721109867096,
      "learning_rate": 7.936321764857048e-06,
      "loss": 0.0277,
      "step": 646380
    },
    {
      "epoch": 1.057847777275911,
      "grad_norm": 0.27499398589134216,
      "learning_rate": 7.93625587264353e-06,
      "loss": 0.035,
      "step": 646400
    },
    {
      "epoch": 1.0578805077145643,
      "grad_norm": 0.5702966451644897,
      "learning_rate": 7.936189980430013e-06,
      "loss": 0.0364,
      "step": 646420
    },
    {
      "epoch": 1.0579132381532177,
      "grad_norm": 0.6952499747276306,
      "learning_rate": 7.936124088216495e-06,
      "loss": 0.0305,
      "step": 646440
    },
    {
      "epoch": 1.057945968591871,
      "grad_norm": 1.0577117204666138,
      "learning_rate": 7.936058196002979e-06,
      "loss": 0.0275,
      "step": 646460
    },
    {
      "epoch": 1.0579786990305244,
      "grad_norm": 0.2606375515460968,
      "learning_rate": 7.935992303789462e-06,
      "loss": 0.021,
      "step": 646480
    },
    {
      "epoch": 1.0580114294691778,
      "grad_norm": 1.2856111526489258,
      "learning_rate": 7.935926411575944e-06,
      "loss": 0.0292,
      "step": 646500
    },
    {
      "epoch": 1.0580441599078312,
      "grad_norm": 0.947490394115448,
      "learning_rate": 7.935860519362428e-06,
      "loss": 0.0278,
      "step": 646520
    },
    {
      "epoch": 1.0580768903464843,
      "grad_norm": 0.35996901988983154,
      "learning_rate": 7.935794627148911e-06,
      "loss": 0.0241,
      "step": 646540
    },
    {
      "epoch": 1.0581096207851377,
      "grad_norm": 0.3928663730621338,
      "learning_rate": 7.935728734935393e-06,
      "loss": 0.0223,
      "step": 646560
    },
    {
      "epoch": 1.058142351223791,
      "grad_norm": 0.4057442843914032,
      "learning_rate": 7.935662842721877e-06,
      "loss": 0.0256,
      "step": 646580
    },
    {
      "epoch": 1.0581750816624445,
      "grad_norm": 2.0969948768615723,
      "learning_rate": 7.935596950508359e-06,
      "loss": 0.0287,
      "step": 646600
    },
    {
      "epoch": 1.0582078121010978,
      "grad_norm": 0.7123247385025024,
      "learning_rate": 7.935531058294842e-06,
      "loss": 0.0282,
      "step": 646620
    },
    {
      "epoch": 1.0582405425397512,
      "grad_norm": 0.994947612285614,
      "learning_rate": 7.935465166081326e-06,
      "loss": 0.0204,
      "step": 646640
    },
    {
      "epoch": 1.0582732729784046,
      "grad_norm": 0.3589521050453186,
      "learning_rate": 7.935399273867808e-06,
      "loss": 0.0289,
      "step": 646660
    },
    {
      "epoch": 1.0583060034170577,
      "grad_norm": 0.5113286972045898,
      "learning_rate": 7.935333381654291e-06,
      "loss": 0.0286,
      "step": 646680
    },
    {
      "epoch": 1.058338733855711,
      "grad_norm": 1.243902564048767,
      "learning_rate": 7.935267489440773e-06,
      "loss": 0.0325,
      "step": 646700
    },
    {
      "epoch": 1.0583714642943645,
      "grad_norm": 1.269891619682312,
      "learning_rate": 7.935201597227257e-06,
      "loss": 0.0316,
      "step": 646720
    },
    {
      "epoch": 1.0584041947330178,
      "grad_norm": 1.3435455560684204,
      "learning_rate": 7.935135705013739e-06,
      "loss": 0.0311,
      "step": 646740
    },
    {
      "epoch": 1.0584369251716712,
      "grad_norm": 0.4217357933521271,
      "learning_rate": 7.935069812800222e-06,
      "loss": 0.0232,
      "step": 646760
    },
    {
      "epoch": 1.0584696556103246,
      "grad_norm": 0.45826494693756104,
      "learning_rate": 7.935003920586704e-06,
      "loss": 0.0245,
      "step": 646780
    },
    {
      "epoch": 1.0585023860489777,
      "grad_norm": 0.7677686810493469,
      "learning_rate": 7.934938028373188e-06,
      "loss": 0.0266,
      "step": 646800
    },
    {
      "epoch": 1.058535116487631,
      "grad_norm": 1.1169630289077759,
      "learning_rate": 7.93487213615967e-06,
      "loss": 0.0228,
      "step": 646820
    },
    {
      "epoch": 1.0585678469262845,
      "grad_norm": 1.9897005558013916,
      "learning_rate": 7.934806243946153e-06,
      "loss": 0.0241,
      "step": 646840
    },
    {
      "epoch": 1.0586005773649378,
      "grad_norm": 1.3439468145370483,
      "learning_rate": 7.934740351732635e-06,
      "loss": 0.025,
      "step": 646860
    },
    {
      "epoch": 1.0586333078035912,
      "grad_norm": 0.810137152671814,
      "learning_rate": 7.934674459519119e-06,
      "loss": 0.0232,
      "step": 646880
    },
    {
      "epoch": 1.0586660382422446,
      "grad_norm": 0.37463030219078064,
      "learning_rate": 7.934608567305602e-06,
      "loss": 0.0266,
      "step": 646900
    },
    {
      "epoch": 1.058698768680898,
      "grad_norm": 0.38583001494407654,
      "learning_rate": 7.934542675092084e-06,
      "loss": 0.0284,
      "step": 646920
    },
    {
      "epoch": 1.0587314991195511,
      "grad_norm": 1.2684682607650757,
      "learning_rate": 7.934476782878568e-06,
      "loss": 0.0276,
      "step": 646940
    },
    {
      "epoch": 1.0587642295582045,
      "grad_norm": 0.2850494086742401,
      "learning_rate": 7.934410890665051e-06,
      "loss": 0.0249,
      "step": 646960
    },
    {
      "epoch": 1.0587969599968579,
      "grad_norm": 0.22309716045856476,
      "learning_rate": 7.934344998451533e-06,
      "loss": 0.0266,
      "step": 646980
    },
    {
      "epoch": 1.0588296904355112,
      "grad_norm": 0.39269161224365234,
      "learning_rate": 7.934279106238017e-06,
      "loss": 0.0167,
      "step": 647000
    },
    {
      "epoch": 1.0588624208741646,
      "grad_norm": 0.4225712716579437,
      "learning_rate": 7.9342132140245e-06,
      "loss": 0.021,
      "step": 647020
    },
    {
      "epoch": 1.058895151312818,
      "grad_norm": 0.5622628331184387,
      "learning_rate": 7.934147321810982e-06,
      "loss": 0.0242,
      "step": 647040
    },
    {
      "epoch": 1.0589278817514711,
      "grad_norm": 0.32055941224098206,
      "learning_rate": 7.934081429597466e-06,
      "loss": 0.0227,
      "step": 647060
    },
    {
      "epoch": 1.0589606121901245,
      "grad_norm": 0.6748629808425903,
      "learning_rate": 7.934015537383948e-06,
      "loss": 0.0146,
      "step": 647080
    },
    {
      "epoch": 1.0589933426287779,
      "grad_norm": 1.5999785661697388,
      "learning_rate": 7.933949645170431e-06,
      "loss": 0.0193,
      "step": 647100
    },
    {
      "epoch": 1.0590260730674312,
      "grad_norm": 0.3490390479564667,
      "learning_rate": 7.933883752956913e-06,
      "loss": 0.0207,
      "step": 647120
    },
    {
      "epoch": 1.0590588035060846,
      "grad_norm": 1.1599174737930298,
      "learning_rate": 7.933817860743397e-06,
      "loss": 0.0302,
      "step": 647140
    },
    {
      "epoch": 1.059091533944738,
      "grad_norm": 0.9413576126098633,
      "learning_rate": 7.933751968529879e-06,
      "loss": 0.0236,
      "step": 647160
    },
    {
      "epoch": 1.0591242643833914,
      "grad_norm": 1.3279036283493042,
      "learning_rate": 7.933686076316362e-06,
      "loss": 0.0285,
      "step": 647180
    },
    {
      "epoch": 1.0591569948220445,
      "grad_norm": 0.5606353282928467,
      "learning_rate": 7.933620184102844e-06,
      "loss": 0.0192,
      "step": 647200
    },
    {
      "epoch": 1.0591897252606979,
      "grad_norm": 1.800668478012085,
      "learning_rate": 7.933554291889328e-06,
      "loss": 0.0256,
      "step": 647220
    },
    {
      "epoch": 1.0592224556993513,
      "grad_norm": 0.3549938201904297,
      "learning_rate": 7.93348839967581e-06,
      "loss": 0.028,
      "step": 647240
    },
    {
      "epoch": 1.0592551861380046,
      "grad_norm": 0.45378607511520386,
      "learning_rate": 7.933422507462293e-06,
      "loss": 0.022,
      "step": 647260
    },
    {
      "epoch": 1.059287916576658,
      "grad_norm": 1.2162138223648071,
      "learning_rate": 7.933356615248777e-06,
      "loss": 0.022,
      "step": 647280
    },
    {
      "epoch": 1.0593206470153114,
      "grad_norm": 1.547171711921692,
      "learning_rate": 7.933290723035259e-06,
      "loss": 0.0216,
      "step": 647300
    },
    {
      "epoch": 1.0593533774539647,
      "grad_norm": 0.4258944094181061,
      "learning_rate": 7.933224830821742e-06,
      "loss": 0.0226,
      "step": 647320
    },
    {
      "epoch": 1.059386107892618,
      "grad_norm": 0.7153087854385376,
      "learning_rate": 7.933158938608226e-06,
      "loss": 0.0156,
      "step": 647340
    },
    {
      "epoch": 1.0594188383312713,
      "grad_norm": 0.2830379903316498,
      "learning_rate": 7.933093046394708e-06,
      "loss": 0.0227,
      "step": 647360
    },
    {
      "epoch": 1.0594515687699246,
      "grad_norm": 0.40028050541877747,
      "learning_rate": 7.933027154181192e-06,
      "loss": 0.0243,
      "step": 647380
    },
    {
      "epoch": 1.059484299208578,
      "grad_norm": 0.9924363493919373,
      "learning_rate": 7.932961261967675e-06,
      "loss": 0.0221,
      "step": 647400
    },
    {
      "epoch": 1.0595170296472314,
      "grad_norm": 0.5532442927360535,
      "learning_rate": 7.932895369754157e-06,
      "loss": 0.0259,
      "step": 647420
    },
    {
      "epoch": 1.0595497600858848,
      "grad_norm": 1.0941600799560547,
      "learning_rate": 7.93282947754064e-06,
      "loss": 0.0381,
      "step": 647440
    },
    {
      "epoch": 1.0595824905245381,
      "grad_norm": 0.3091575801372528,
      "learning_rate": 7.932763585327122e-06,
      "loss": 0.0253,
      "step": 647460
    },
    {
      "epoch": 1.0596152209631913,
      "grad_norm": 1.570370078086853,
      "learning_rate": 7.932697693113606e-06,
      "loss": 0.0284,
      "step": 647480
    },
    {
      "epoch": 1.0596479514018446,
      "grad_norm": 0.7608465552330017,
      "learning_rate": 7.932631800900088e-06,
      "loss": 0.0274,
      "step": 647500
    },
    {
      "epoch": 1.059680681840498,
      "grad_norm": 0.46777310967445374,
      "learning_rate": 7.932565908686572e-06,
      "loss": 0.0208,
      "step": 647520
    },
    {
      "epoch": 1.0597134122791514,
      "grad_norm": 0.7452170848846436,
      "learning_rate": 7.932500016473053e-06,
      "loss": 0.032,
      "step": 647540
    },
    {
      "epoch": 1.0597461427178048,
      "grad_norm": 1.9806602001190186,
      "learning_rate": 7.932434124259537e-06,
      "loss": 0.0269,
      "step": 647560
    },
    {
      "epoch": 1.0597788731564581,
      "grad_norm": 1.8562476634979248,
      "learning_rate": 7.932368232046019e-06,
      "loss": 0.031,
      "step": 647580
    },
    {
      "epoch": 1.0598116035951113,
      "grad_norm": 1.553464412689209,
      "learning_rate": 7.932302339832502e-06,
      "loss": 0.0245,
      "step": 647600
    },
    {
      "epoch": 1.0598443340337647,
      "grad_norm": Infinity,
      "learning_rate": 7.932236447618984e-06,
      "loss": 0.0307,
      "step": 647620
    },
    {
      "epoch": 1.059877064472418,
      "grad_norm": 0.4544002115726471,
      "learning_rate": 7.932170555405468e-06,
      "loss": 0.0142,
      "step": 647640
    },
    {
      "epoch": 1.0599097949110714,
      "grad_norm": 1.6302775144577026,
      "learning_rate": 7.932104663191952e-06,
      "loss": 0.0267,
      "step": 647660
    },
    {
      "epoch": 1.0599425253497248,
      "grad_norm": 0.363148033618927,
      "learning_rate": 7.932038770978433e-06,
      "loss": 0.0263,
      "step": 647680
    },
    {
      "epoch": 1.0599752557883781,
      "grad_norm": 0.5894848108291626,
      "learning_rate": 7.931972878764917e-06,
      "loss": 0.0199,
      "step": 647700
    },
    {
      "epoch": 1.0600079862270315,
      "grad_norm": 0.7453020811080933,
      "learning_rate": 7.9319069865514e-06,
      "loss": 0.0205,
      "step": 647720
    },
    {
      "epoch": 1.0600407166656847,
      "grad_norm": 0.4164314866065979,
      "learning_rate": 7.931841094337883e-06,
      "loss": 0.0307,
      "step": 647740
    },
    {
      "epoch": 1.060073447104338,
      "grad_norm": 0.30912289023399353,
      "learning_rate": 7.931775202124366e-06,
      "loss": 0.0308,
      "step": 647760
    },
    {
      "epoch": 1.0601061775429914,
      "grad_norm": 0.8914699554443359,
      "learning_rate": 7.93170930991085e-06,
      "loss": 0.018,
      "step": 647780
    },
    {
      "epoch": 1.0601389079816448,
      "grad_norm": 0.8357082009315491,
      "learning_rate": 7.931643417697332e-06,
      "loss": 0.0224,
      "step": 647800
    },
    {
      "epoch": 1.0601716384202982,
      "grad_norm": 0.3936495780944824,
      "learning_rate": 7.931577525483815e-06,
      "loss": 0.0303,
      "step": 647820
    },
    {
      "epoch": 1.0602043688589515,
      "grad_norm": 1.031976580619812,
      "learning_rate": 7.931511633270297e-06,
      "loss": 0.0286,
      "step": 647840
    },
    {
      "epoch": 1.0602370992976047,
      "grad_norm": 0.38593971729278564,
      "learning_rate": 7.93144574105678e-06,
      "loss": 0.0222,
      "step": 647860
    },
    {
      "epoch": 1.060269829736258,
      "grad_norm": 0.20895808935165405,
      "learning_rate": 7.931379848843263e-06,
      "loss": 0.0196,
      "step": 647880
    },
    {
      "epoch": 1.0603025601749114,
      "grad_norm": 0.9417349696159363,
      "learning_rate": 7.931313956629746e-06,
      "loss": 0.03,
      "step": 647900
    },
    {
      "epoch": 1.0603352906135648,
      "grad_norm": 0.49546316266059875,
      "learning_rate": 7.931248064416228e-06,
      "loss": 0.0164,
      "step": 647920
    },
    {
      "epoch": 1.0603680210522182,
      "grad_norm": 1.493430495262146,
      "learning_rate": 7.931182172202712e-06,
      "loss": 0.0281,
      "step": 647940
    },
    {
      "epoch": 1.0604007514908715,
      "grad_norm": 0.4398152232170105,
      "learning_rate": 7.931116279989193e-06,
      "loss": 0.0168,
      "step": 647960
    },
    {
      "epoch": 1.060433481929525,
      "grad_norm": 0.4413822293281555,
      "learning_rate": 7.931050387775677e-06,
      "loss": 0.0231,
      "step": 647980
    },
    {
      "epoch": 1.060466212368178,
      "grad_norm": 0.8874789476394653,
      "learning_rate": 7.930984495562159e-06,
      "loss": 0.0204,
      "step": 648000
    },
    {
      "epoch": 1.0604989428068314,
      "grad_norm": 0.35440683364868164,
      "learning_rate": 7.930918603348643e-06,
      "loss": 0.0254,
      "step": 648020
    },
    {
      "epoch": 1.0605316732454848,
      "grad_norm": 0.5352810025215149,
      "learning_rate": 7.930852711135126e-06,
      "loss": 0.0272,
      "step": 648040
    },
    {
      "epoch": 1.0605644036841382,
      "grad_norm": 0.5596171617507935,
      "learning_rate": 7.930786818921608e-06,
      "loss": 0.0237,
      "step": 648060
    },
    {
      "epoch": 1.0605971341227916,
      "grad_norm": 0.19424910843372345,
      "learning_rate": 7.930720926708092e-06,
      "loss": 0.0175,
      "step": 648080
    },
    {
      "epoch": 1.060629864561445,
      "grad_norm": 0.31750982999801636,
      "learning_rate": 7.930655034494574e-06,
      "loss": 0.0191,
      "step": 648100
    },
    {
      "epoch": 1.0606625950000983,
      "grad_norm": 0.6015394926071167,
      "learning_rate": 7.930589142281057e-06,
      "loss": 0.017,
      "step": 648120
    },
    {
      "epoch": 1.0606953254387514,
      "grad_norm": 0.44275861978530884,
      "learning_rate": 7.93052325006754e-06,
      "loss": 0.0284,
      "step": 648140
    },
    {
      "epoch": 1.0607280558774048,
      "grad_norm": 4.805600166320801,
      "learning_rate": 7.930457357854023e-06,
      "loss": 0.0287,
      "step": 648160
    },
    {
      "epoch": 1.0607607863160582,
      "grad_norm": 0.41895315051078796,
      "learning_rate": 7.930391465640506e-06,
      "loss": 0.0294,
      "step": 648180
    },
    {
      "epoch": 1.0607935167547116,
      "grad_norm": 0.5336277484893799,
      "learning_rate": 7.93032557342699e-06,
      "loss": 0.0293,
      "step": 648200
    },
    {
      "epoch": 1.060826247193365,
      "grad_norm": 0.32162222266197205,
      "learning_rate": 7.930259681213472e-06,
      "loss": 0.04,
      "step": 648220
    },
    {
      "epoch": 1.0608589776320183,
      "grad_norm": 0.6723785996437073,
      "learning_rate": 7.930193788999955e-06,
      "loss": 0.0336,
      "step": 648240
    },
    {
      "epoch": 1.0608917080706715,
      "grad_norm": 0.18004581332206726,
      "learning_rate": 7.930127896786437e-06,
      "loss": 0.027,
      "step": 648260
    },
    {
      "epoch": 1.0609244385093248,
      "grad_norm": 0.6840214729309082,
      "learning_rate": 7.93006200457292e-06,
      "loss": 0.0246,
      "step": 648280
    },
    {
      "epoch": 1.0609571689479782,
      "grad_norm": 0.5684649348258972,
      "learning_rate": 7.929996112359403e-06,
      "loss": 0.0192,
      "step": 648300
    },
    {
      "epoch": 1.0609898993866316,
      "grad_norm": 0.9993144273757935,
      "learning_rate": 7.929930220145886e-06,
      "loss": 0.0292,
      "step": 648320
    },
    {
      "epoch": 1.061022629825285,
      "grad_norm": 0.37210285663604736,
      "learning_rate": 7.929864327932368e-06,
      "loss": 0.0231,
      "step": 648340
    },
    {
      "epoch": 1.0610553602639383,
      "grad_norm": 0.5926851630210876,
      "learning_rate": 7.929798435718852e-06,
      "loss": 0.0218,
      "step": 648360
    },
    {
      "epoch": 1.0610880907025917,
      "grad_norm": 0.4576297700405121,
      "learning_rate": 7.929732543505335e-06,
      "loss": 0.0274,
      "step": 648380
    },
    {
      "epoch": 1.0611208211412448,
      "grad_norm": 1.1355727910995483,
      "learning_rate": 7.929666651291817e-06,
      "loss": 0.025,
      "step": 648400
    },
    {
      "epoch": 1.0611535515798982,
      "grad_norm": 1.2446869611740112,
      "learning_rate": 7.9296007590783e-06,
      "loss": 0.0243,
      "step": 648420
    },
    {
      "epoch": 1.0611862820185516,
      "grad_norm": 1.0442153215408325,
      "learning_rate": 7.929534866864783e-06,
      "loss": 0.025,
      "step": 648440
    },
    {
      "epoch": 1.061219012457205,
      "grad_norm": 0.6487406492233276,
      "learning_rate": 7.929468974651266e-06,
      "loss": 0.0309,
      "step": 648460
    },
    {
      "epoch": 1.0612517428958583,
      "grad_norm": 0.809923529624939,
      "learning_rate": 7.929403082437748e-06,
      "loss": 0.0286,
      "step": 648480
    },
    {
      "epoch": 1.0612844733345117,
      "grad_norm": 0.551502525806427,
      "learning_rate": 7.929337190224232e-06,
      "loss": 0.0273,
      "step": 648500
    },
    {
      "epoch": 1.061317203773165,
      "grad_norm": 0.5080583691596985,
      "learning_rate": 7.929271298010715e-06,
      "loss": 0.026,
      "step": 648520
    },
    {
      "epoch": 1.0613499342118182,
      "grad_norm": 0.19163553416728973,
      "learning_rate": 7.929205405797197e-06,
      "loss": 0.0308,
      "step": 648540
    },
    {
      "epoch": 1.0613826646504716,
      "grad_norm": 0.7680811285972595,
      "learning_rate": 7.92913951358368e-06,
      "loss": 0.0226,
      "step": 648560
    },
    {
      "epoch": 1.061415395089125,
      "grad_norm": 0.3392524719238281,
      "learning_rate": 7.929073621370164e-06,
      "loss": 0.0163,
      "step": 648580
    },
    {
      "epoch": 1.0614481255277783,
      "grad_norm": 3.929246664047241,
      "learning_rate": 7.929007729156646e-06,
      "loss": 0.0201,
      "step": 648600
    },
    {
      "epoch": 1.0614808559664317,
      "grad_norm": 0.41892582178115845,
      "learning_rate": 7.92894183694313e-06,
      "loss": 0.0304,
      "step": 648620
    },
    {
      "epoch": 1.061513586405085,
      "grad_norm": 0.45112183690071106,
      "learning_rate": 7.928875944729612e-06,
      "loss": 0.0194,
      "step": 648640
    },
    {
      "epoch": 1.0615463168437382,
      "grad_norm": 1.506592035293579,
      "learning_rate": 7.928810052516095e-06,
      "loss": 0.0331,
      "step": 648660
    },
    {
      "epoch": 1.0615790472823916,
      "grad_norm": 0.3760337829589844,
      "learning_rate": 7.928744160302577e-06,
      "loss": 0.0207,
      "step": 648680
    },
    {
      "epoch": 1.061611777721045,
      "grad_norm": 0.4947398602962494,
      "learning_rate": 7.92867826808906e-06,
      "loss": 0.0261,
      "step": 648700
    },
    {
      "epoch": 1.0616445081596984,
      "grad_norm": 1.1686512231826782,
      "learning_rate": 7.928612375875543e-06,
      "loss": 0.0252,
      "step": 648720
    },
    {
      "epoch": 1.0616772385983517,
      "grad_norm": 1.7771304845809937,
      "learning_rate": 7.928546483662026e-06,
      "loss": 0.0262,
      "step": 648740
    },
    {
      "epoch": 1.061709969037005,
      "grad_norm": 0.30050864815711975,
      "learning_rate": 7.92848059144851e-06,
      "loss": 0.0237,
      "step": 648760
    },
    {
      "epoch": 1.0617426994756585,
      "grad_norm": 0.7148914933204651,
      "learning_rate": 7.928414699234992e-06,
      "loss": 0.0216,
      "step": 648780
    },
    {
      "epoch": 1.0617754299143116,
      "grad_norm": 0.4517405927181244,
      "learning_rate": 7.928348807021475e-06,
      "loss": 0.0222,
      "step": 648800
    },
    {
      "epoch": 1.061808160352965,
      "grad_norm": 0.2640455365180969,
      "learning_rate": 7.928282914807957e-06,
      "loss": 0.0269,
      "step": 648820
    },
    {
      "epoch": 1.0618408907916184,
      "grad_norm": 0.2581682503223419,
      "learning_rate": 7.92821702259444e-06,
      "loss": 0.0295,
      "step": 648840
    },
    {
      "epoch": 1.0618736212302717,
      "grad_norm": 1.5250680446624756,
      "learning_rate": 7.928151130380923e-06,
      "loss": 0.025,
      "step": 648860
    },
    {
      "epoch": 1.061906351668925,
      "grad_norm": 0.3176679313182831,
      "learning_rate": 7.928085238167406e-06,
      "loss": 0.0259,
      "step": 648880
    },
    {
      "epoch": 1.0619390821075785,
      "grad_norm": 0.37007999420166016,
      "learning_rate": 7.928019345953888e-06,
      "loss": 0.0369,
      "step": 648900
    },
    {
      "epoch": 1.0619718125462319,
      "grad_norm": 0.46273234486579895,
      "learning_rate": 7.927953453740372e-06,
      "loss": 0.0201,
      "step": 648920
    },
    {
      "epoch": 1.062004542984885,
      "grad_norm": 0.151239812374115,
      "learning_rate": 7.927887561526855e-06,
      "loss": 0.0227,
      "step": 648940
    },
    {
      "epoch": 1.0620372734235384,
      "grad_norm": 0.7049453854560852,
      "learning_rate": 7.927821669313337e-06,
      "loss": 0.022,
      "step": 648960
    },
    {
      "epoch": 1.0620700038621917,
      "grad_norm": 0.46760961413383484,
      "learning_rate": 7.92775577709982e-06,
      "loss": 0.0214,
      "step": 648980
    },
    {
      "epoch": 1.0621027343008451,
      "grad_norm": 1.9983303546905518,
      "learning_rate": 7.927689884886304e-06,
      "loss": 0.0252,
      "step": 649000
    },
    {
      "epoch": 1.0621354647394985,
      "grad_norm": 1.1620593070983887,
      "learning_rate": 7.927623992672786e-06,
      "loss": 0.029,
      "step": 649020
    },
    {
      "epoch": 1.0621681951781519,
      "grad_norm": 0.45828208327293396,
      "learning_rate": 7.92755810045927e-06,
      "loss": 0.0183,
      "step": 649040
    },
    {
      "epoch": 1.062200925616805,
      "grad_norm": 0.1978803277015686,
      "learning_rate": 7.927492208245752e-06,
      "loss": 0.034,
      "step": 649060
    },
    {
      "epoch": 1.0622336560554584,
      "grad_norm": 1.7660249471664429,
      "learning_rate": 7.927426316032235e-06,
      "loss": 0.0363,
      "step": 649080
    },
    {
      "epoch": 1.0622663864941118,
      "grad_norm": 0.7853191494941711,
      "learning_rate": 7.927360423818719e-06,
      "loss": 0.0224,
      "step": 649100
    },
    {
      "epoch": 1.0622991169327651,
      "grad_norm": 7.701600074768066,
      "learning_rate": 7.927294531605201e-06,
      "loss": 0.0201,
      "step": 649120
    },
    {
      "epoch": 1.0623318473714185,
      "grad_norm": 0.5025959014892578,
      "learning_rate": 7.927228639391684e-06,
      "loss": 0.0134,
      "step": 649140
    },
    {
      "epoch": 1.0623645778100719,
      "grad_norm": 0.9024820923805237,
      "learning_rate": 7.927162747178166e-06,
      "loss": 0.0306,
      "step": 649160
    },
    {
      "epoch": 1.0623973082487252,
      "grad_norm": 1.833688497543335,
      "learning_rate": 7.92709685496465e-06,
      "loss": 0.0183,
      "step": 649180
    },
    {
      "epoch": 1.0624300386873784,
      "grad_norm": 0.5365063548088074,
      "learning_rate": 7.927030962751132e-06,
      "loss": 0.0223,
      "step": 649200
    },
    {
      "epoch": 1.0624627691260318,
      "grad_norm": 0.4681585729122162,
      "learning_rate": 7.926965070537615e-06,
      "loss": 0.023,
      "step": 649220
    },
    {
      "epoch": 1.0624954995646851,
      "grad_norm": 2.7043750286102295,
      "learning_rate": 7.926899178324097e-06,
      "loss": 0.0279,
      "step": 649240
    },
    {
      "epoch": 1.0625282300033385,
      "grad_norm": 0.732403576374054,
      "learning_rate": 7.926833286110581e-06,
      "loss": 0.0271,
      "step": 649260
    },
    {
      "epoch": 1.0625609604419919,
      "grad_norm": 1.6237517595291138,
      "learning_rate": 7.926767393897063e-06,
      "loss": 0.026,
      "step": 649280
    },
    {
      "epoch": 1.0625936908806453,
      "grad_norm": 0.8688993453979492,
      "learning_rate": 7.926701501683546e-06,
      "loss": 0.0361,
      "step": 649300
    },
    {
      "epoch": 1.0626264213192984,
      "grad_norm": 2.0233025550842285,
      "learning_rate": 7.92663560947003e-06,
      "loss": 0.0223,
      "step": 649320
    },
    {
      "epoch": 1.0626591517579518,
      "grad_norm": 0.3098513185977936,
      "learning_rate": 7.926569717256512e-06,
      "loss": 0.02,
      "step": 649340
    },
    {
      "epoch": 1.0626918821966052,
      "grad_norm": 1.2897348403930664,
      "learning_rate": 7.926503825042995e-06,
      "loss": 0.0269,
      "step": 649360
    },
    {
      "epoch": 1.0627246126352585,
      "grad_norm": 0.3856617510318756,
      "learning_rate": 7.926437932829479e-06,
      "loss": 0.0282,
      "step": 649380
    },
    {
      "epoch": 1.062757343073912,
      "grad_norm": 0.4859670400619507,
      "learning_rate": 7.926372040615961e-06,
      "loss": 0.0234,
      "step": 649400
    },
    {
      "epoch": 1.0627900735125653,
      "grad_norm": 1.9807307720184326,
      "learning_rate": 7.926306148402445e-06,
      "loss": 0.0287,
      "step": 649420
    },
    {
      "epoch": 1.0628228039512186,
      "grad_norm": 0.413913756608963,
      "learning_rate": 7.926240256188928e-06,
      "loss": 0.018,
      "step": 649440
    },
    {
      "epoch": 1.0628555343898718,
      "grad_norm": 0.9140040874481201,
      "learning_rate": 7.92617436397541e-06,
      "loss": 0.0301,
      "step": 649460
    },
    {
      "epoch": 1.0628882648285252,
      "grad_norm": 0.9795663952827454,
      "learning_rate": 7.926108471761894e-06,
      "loss": 0.0241,
      "step": 649480
    },
    {
      "epoch": 1.0629209952671785,
      "grad_norm": 0.8768467903137207,
      "learning_rate": 7.926042579548375e-06,
      "loss": 0.0293,
      "step": 649500
    },
    {
      "epoch": 1.062953725705832,
      "grad_norm": 0.2880995571613312,
      "learning_rate": 7.925976687334859e-06,
      "loss": 0.0228,
      "step": 649520
    },
    {
      "epoch": 1.0629864561444853,
      "grad_norm": 0.6223722696304321,
      "learning_rate": 7.925910795121341e-06,
      "loss": 0.0197,
      "step": 649540
    },
    {
      "epoch": 1.0630191865831387,
      "grad_norm": 0.5343726873397827,
      "learning_rate": 7.925844902907825e-06,
      "loss": 0.0242,
      "step": 649560
    },
    {
      "epoch": 1.063051917021792,
      "grad_norm": 1.0738799571990967,
      "learning_rate": 7.925779010694306e-06,
      "loss": 0.0185,
      "step": 649580
    },
    {
      "epoch": 1.0630846474604452,
      "grad_norm": 0.9731211066246033,
      "learning_rate": 7.92571311848079e-06,
      "loss": 0.019,
      "step": 649600
    },
    {
      "epoch": 1.0631173778990985,
      "grad_norm": 0.4537755250930786,
      "learning_rate": 7.925647226267272e-06,
      "loss": 0.0267,
      "step": 649620
    },
    {
      "epoch": 1.063150108337752,
      "grad_norm": 0.4339697062969208,
      "learning_rate": 7.925581334053755e-06,
      "loss": 0.0168,
      "step": 649640
    },
    {
      "epoch": 1.0631828387764053,
      "grad_norm": 1.2759729623794556,
      "learning_rate": 7.925515441840237e-06,
      "loss": 0.0271,
      "step": 649660
    },
    {
      "epoch": 1.0632155692150587,
      "grad_norm": 0.12813064455986023,
      "learning_rate": 7.925449549626721e-06,
      "loss": 0.0172,
      "step": 649680
    },
    {
      "epoch": 1.063248299653712,
      "grad_norm": 1.5689623355865479,
      "learning_rate": 7.925383657413203e-06,
      "loss": 0.0268,
      "step": 649700
    },
    {
      "epoch": 1.0632810300923654,
      "grad_norm": 0.3525228500366211,
      "learning_rate": 7.925317765199686e-06,
      "loss": 0.0321,
      "step": 649720
    },
    {
      "epoch": 1.0633137605310186,
      "grad_norm": 0.2877659201622009,
      "learning_rate": 7.92525187298617e-06,
      "loss": 0.0263,
      "step": 649740
    },
    {
      "epoch": 1.063346490969672,
      "grad_norm": 0.255476713180542,
      "learning_rate": 7.925185980772652e-06,
      "loss": 0.0231,
      "step": 649760
    },
    {
      "epoch": 1.0633792214083253,
      "grad_norm": 1.1158428192138672,
      "learning_rate": 7.925120088559136e-06,
      "loss": 0.0287,
      "step": 649780
    },
    {
      "epoch": 1.0634119518469787,
      "grad_norm": 0.46482589840888977,
      "learning_rate": 7.925054196345619e-06,
      "loss": 0.0266,
      "step": 649800
    },
    {
      "epoch": 1.063444682285632,
      "grad_norm": 0.2588188946247101,
      "learning_rate": 7.924988304132103e-06,
      "loss": 0.0277,
      "step": 649820
    },
    {
      "epoch": 1.0634774127242854,
      "grad_norm": 0.4783764183521271,
      "learning_rate": 7.924922411918585e-06,
      "loss": 0.023,
      "step": 649840
    },
    {
      "epoch": 1.0635101431629386,
      "grad_norm": 0.6258028149604797,
      "learning_rate": 7.924856519705068e-06,
      "loss": 0.0195,
      "step": 649860
    },
    {
      "epoch": 1.063542873601592,
      "grad_norm": 0.5561577677726746,
      "learning_rate": 7.92479062749155e-06,
      "loss": 0.0214,
      "step": 649880
    },
    {
      "epoch": 1.0635756040402453,
      "grad_norm": 0.28482282161712646,
      "learning_rate": 7.924724735278034e-06,
      "loss": 0.0214,
      "step": 649900
    },
    {
      "epoch": 1.0636083344788987,
      "grad_norm": 1.1547768115997314,
      "learning_rate": 7.924658843064516e-06,
      "loss": 0.0199,
      "step": 649920
    },
    {
      "epoch": 1.063641064917552,
      "grad_norm": 0.41585907340049744,
      "learning_rate": 7.924592950850999e-06,
      "loss": 0.0159,
      "step": 649940
    },
    {
      "epoch": 1.0636737953562054,
      "grad_norm": 0.5007607340812683,
      "learning_rate": 7.924527058637481e-06,
      "loss": 0.0199,
      "step": 649960
    },
    {
      "epoch": 1.0637065257948588,
      "grad_norm": 1.0991876125335693,
      "learning_rate": 7.924461166423965e-06,
      "loss": 0.0222,
      "step": 649980
    },
    {
      "epoch": 1.063739256233512,
      "grad_norm": 0.6434125900268555,
      "learning_rate": 7.924395274210447e-06,
      "loss": 0.0264,
      "step": 650000
    },
    {
      "epoch": 1.063739256233512,
      "eval_loss": 0.01306293997913599,
      "eval_runtime": 6481.6263,
      "eval_samples_per_second": 158.58,
      "eval_steps_per_second": 15.858,
      "eval_sts-dev_pearson_cosine": 0.9691642638466671,
      "eval_sts-dev_spearman_cosine": 0.8856231195241104,
      "step": 650000
    },
    {
      "epoch": 1.0637719866721653,
      "grad_norm": 1.7104865312576294,
      "learning_rate": 7.92432938199693e-06,
      "loss": 0.0242,
      "step": 650020
    },
    {
      "epoch": 1.0638047171108187,
      "grad_norm": 2.4946303367614746,
      "learning_rate": 7.924263489783412e-06,
      "loss": 0.0207,
      "step": 650040
    },
    {
      "epoch": 1.063837447549472,
      "grad_norm": 1.0025198459625244,
      "learning_rate": 7.924197597569896e-06,
      "loss": 0.0226,
      "step": 650060
    },
    {
      "epoch": 1.0638701779881254,
      "grad_norm": 0.24544177949428558,
      "learning_rate": 7.924131705356377e-06,
      "loss": 0.0339,
      "step": 650080
    },
    {
      "epoch": 1.0639029084267788,
      "grad_norm": 0.47326624393463135,
      "learning_rate": 7.924065813142861e-06,
      "loss": 0.0171,
      "step": 650100
    },
    {
      "epoch": 1.063935638865432,
      "grad_norm": 0.9929604530334473,
      "learning_rate": 7.923999920929345e-06,
      "loss": 0.0356,
      "step": 650120
    },
    {
      "epoch": 1.0639683693040853,
      "grad_norm": 1.327032446861267,
      "learning_rate": 7.923934028715827e-06,
      "loss": 0.0192,
      "step": 650140
    },
    {
      "epoch": 1.0640010997427387,
      "grad_norm": 1.8326839208602905,
      "learning_rate": 7.92386813650231e-06,
      "loss": 0.0256,
      "step": 650160
    },
    {
      "epoch": 1.064033830181392,
      "grad_norm": 0.48582983016967773,
      "learning_rate": 7.923802244288794e-06,
      "loss": 0.0284,
      "step": 650180
    },
    {
      "epoch": 1.0640665606200455,
      "grad_norm": 0.7660951018333435,
      "learning_rate": 7.923736352075276e-06,
      "loss": 0.0161,
      "step": 650200
    },
    {
      "epoch": 1.0640992910586988,
      "grad_norm": 0.2671462297439575,
      "learning_rate": 7.92367045986176e-06,
      "loss": 0.0255,
      "step": 650220
    },
    {
      "epoch": 1.0641320214973522,
      "grad_norm": 0.8044507503509521,
      "learning_rate": 7.923604567648243e-06,
      "loss": 0.0215,
      "step": 650240
    },
    {
      "epoch": 1.0641647519360053,
      "grad_norm": 0.3530714213848114,
      "learning_rate": 7.923538675434725e-06,
      "loss": 0.0159,
      "step": 650260
    },
    {
      "epoch": 1.0641974823746587,
      "grad_norm": 0.5352956056594849,
      "learning_rate": 7.923472783221208e-06,
      "loss": 0.0251,
      "step": 650280
    },
    {
      "epoch": 1.064230212813312,
      "grad_norm": 1.4578913450241089,
      "learning_rate": 7.92340689100769e-06,
      "loss": 0.0254,
      "step": 650300
    },
    {
      "epoch": 1.0642629432519655,
      "grad_norm": 0.7576612234115601,
      "learning_rate": 7.923340998794174e-06,
      "loss": 0.0183,
      "step": 650320
    },
    {
      "epoch": 1.0642956736906188,
      "grad_norm": 0.18189731240272522,
      "learning_rate": 7.923275106580656e-06,
      "loss": 0.0161,
      "step": 650340
    },
    {
      "epoch": 1.0643284041292722,
      "grad_norm": 1.3568872213363647,
      "learning_rate": 7.92320921436714e-06,
      "loss": 0.0229,
      "step": 650360
    },
    {
      "epoch": 1.0643611345679256,
      "grad_norm": 0.20552650094032288,
      "learning_rate": 7.923143322153621e-06,
      "loss": 0.0229,
      "step": 650380
    },
    {
      "epoch": 1.0643938650065787,
      "grad_norm": 0.7107157707214355,
      "learning_rate": 7.923077429940105e-06,
      "loss": 0.0243,
      "step": 650400
    },
    {
      "epoch": 1.064426595445232,
      "grad_norm": 0.5001697540283203,
      "learning_rate": 7.923011537726587e-06,
      "loss": 0.0299,
      "step": 650420
    },
    {
      "epoch": 1.0644593258838855,
      "grad_norm": 0.1383160650730133,
      "learning_rate": 7.92294564551307e-06,
      "loss": 0.0374,
      "step": 650440
    },
    {
      "epoch": 1.0644920563225388,
      "grad_norm": 0.523414134979248,
      "learning_rate": 7.922879753299552e-06,
      "loss": 0.0302,
      "step": 650460
    },
    {
      "epoch": 1.0645247867611922,
      "grad_norm": 0.5163348317146301,
      "learning_rate": 7.922813861086036e-06,
      "loss": 0.0178,
      "step": 650480
    },
    {
      "epoch": 1.0645575171998456,
      "grad_norm": 0.5276331901550293,
      "learning_rate": 7.92274796887252e-06,
      "loss": 0.0212,
      "step": 650500
    },
    {
      "epoch": 1.064590247638499,
      "grad_norm": 0.2779848873615265,
      "learning_rate": 7.922682076659001e-06,
      "loss": 0.0188,
      "step": 650520
    },
    {
      "epoch": 1.0646229780771521,
      "grad_norm": 0.6391245722770691,
      "learning_rate": 7.922616184445485e-06,
      "loss": 0.0164,
      "step": 650540
    },
    {
      "epoch": 1.0646557085158055,
      "grad_norm": 1.2421294450759888,
      "learning_rate": 7.922550292231968e-06,
      "loss": 0.02,
      "step": 650560
    },
    {
      "epoch": 1.0646884389544589,
      "grad_norm": 0.45927393436431885,
      "learning_rate": 7.92248440001845e-06,
      "loss": 0.023,
      "step": 650580
    },
    {
      "epoch": 1.0647211693931122,
      "grad_norm": 0.6426553726196289,
      "learning_rate": 7.922418507804934e-06,
      "loss": 0.0395,
      "step": 650600
    },
    {
      "epoch": 1.0647538998317656,
      "grad_norm": 1.3281559944152832,
      "learning_rate": 7.922352615591417e-06,
      "loss": 0.0234,
      "step": 650620
    },
    {
      "epoch": 1.064786630270419,
      "grad_norm": 0.4841422736644745,
      "learning_rate": 7.9222867233779e-06,
      "loss": 0.027,
      "step": 650640
    },
    {
      "epoch": 1.0648193607090721,
      "grad_norm": 0.7197810411453247,
      "learning_rate": 7.922220831164383e-06,
      "loss": 0.022,
      "step": 650660
    },
    {
      "epoch": 1.0648520911477255,
      "grad_norm": 1.0527310371398926,
      "learning_rate": 7.922154938950865e-06,
      "loss": 0.024,
      "step": 650680
    },
    {
      "epoch": 1.0648848215863789,
      "grad_norm": 1.6770288944244385,
      "learning_rate": 7.922089046737348e-06,
      "loss": 0.0267,
      "step": 650700
    },
    {
      "epoch": 1.0649175520250322,
      "grad_norm": 0.5249788165092468,
      "learning_rate": 7.92202315452383e-06,
      "loss": 0.0214,
      "step": 650720
    },
    {
      "epoch": 1.0649502824636856,
      "grad_norm": 0.4340120553970337,
      "learning_rate": 7.921957262310314e-06,
      "loss": 0.0219,
      "step": 650740
    },
    {
      "epoch": 1.064983012902339,
      "grad_norm": 1.1779698133468628,
      "learning_rate": 7.921891370096796e-06,
      "loss": 0.0297,
      "step": 650760
    },
    {
      "epoch": 1.0650157433409921,
      "grad_norm": 0.8055514097213745,
      "learning_rate": 7.92182547788328e-06,
      "loss": 0.0175,
      "step": 650780
    },
    {
      "epoch": 1.0650484737796455,
      "grad_norm": 0.831557035446167,
      "learning_rate": 7.921759585669761e-06,
      "loss": 0.0206,
      "step": 650800
    },
    {
      "epoch": 1.0650812042182989,
      "grad_norm": 1.128798007965088,
      "learning_rate": 7.921693693456245e-06,
      "loss": 0.0248,
      "step": 650820
    },
    {
      "epoch": 1.0651139346569523,
      "grad_norm": 0.7175170183181763,
      "learning_rate": 7.921627801242728e-06,
      "loss": 0.018,
      "step": 650840
    },
    {
      "epoch": 1.0651466650956056,
      "grad_norm": 0.7299911379814148,
      "learning_rate": 7.92156190902921e-06,
      "loss": 0.0171,
      "step": 650860
    },
    {
      "epoch": 1.065179395534259,
      "grad_norm": 0.9421060085296631,
      "learning_rate": 7.921496016815694e-06,
      "loss": 0.0261,
      "step": 650880
    },
    {
      "epoch": 1.0652121259729124,
      "grad_norm": 0.2586749196052551,
      "learning_rate": 7.921430124602176e-06,
      "loss": 0.0246,
      "step": 650900
    },
    {
      "epoch": 1.0652448564115655,
      "grad_norm": 0.9691991806030273,
      "learning_rate": 7.92136423238866e-06,
      "loss": 0.0311,
      "step": 650920
    },
    {
      "epoch": 1.065277586850219,
      "grad_norm": 1.3062242269515991,
      "learning_rate": 7.921298340175141e-06,
      "loss": 0.0241,
      "step": 650940
    },
    {
      "epoch": 1.0653103172888723,
      "grad_norm": 4.174008369445801,
      "learning_rate": 7.921232447961625e-06,
      "loss": 0.0241,
      "step": 650960
    },
    {
      "epoch": 1.0653430477275256,
      "grad_norm": 1.1718394756317139,
      "learning_rate": 7.921166555748108e-06,
      "loss": 0.0239,
      "step": 650980
    },
    {
      "epoch": 1.065375778166179,
      "grad_norm": 0.2869929373264313,
      "learning_rate": 7.92110066353459e-06,
      "loss": 0.0187,
      "step": 651000
    },
    {
      "epoch": 1.0654085086048324,
      "grad_norm": 2.737424612045288,
      "learning_rate": 7.921034771321074e-06,
      "loss": 0.022,
      "step": 651020
    },
    {
      "epoch": 1.0654412390434858,
      "grad_norm": 1.256657600402832,
      "learning_rate": 7.920968879107557e-06,
      "loss": 0.0221,
      "step": 651040
    },
    {
      "epoch": 1.065473969482139,
      "grad_norm": 1.5383914709091187,
      "learning_rate": 7.92090298689404e-06,
      "loss": 0.022,
      "step": 651060
    },
    {
      "epoch": 1.0655066999207923,
      "grad_norm": 0.23894409835338593,
      "learning_rate": 7.920837094680523e-06,
      "loss": 0.0253,
      "step": 651080
    },
    {
      "epoch": 1.0655394303594456,
      "grad_norm": 0.3075353503227234,
      "learning_rate": 7.920771202467005e-06,
      "loss": 0.021,
      "step": 651100
    },
    {
      "epoch": 1.065572160798099,
      "grad_norm": 1.034029483795166,
      "learning_rate": 7.920705310253488e-06,
      "loss": 0.02,
      "step": 651120
    },
    {
      "epoch": 1.0656048912367524,
      "grad_norm": 0.8899789452552795,
      "learning_rate": 7.92063941803997e-06,
      "loss": 0.0236,
      "step": 651140
    },
    {
      "epoch": 1.0656376216754058,
      "grad_norm": 1.6073299646377563,
      "learning_rate": 7.920573525826454e-06,
      "loss": 0.024,
      "step": 651160
    },
    {
      "epoch": 1.0656703521140591,
      "grad_norm": 5.936316013336182,
      "learning_rate": 7.920507633612936e-06,
      "loss": 0.0218,
      "step": 651180
    },
    {
      "epoch": 1.0657030825527123,
      "grad_norm": 1.8113845586776733,
      "learning_rate": 7.92044174139942e-06,
      "loss": 0.0241,
      "step": 651200
    },
    {
      "epoch": 1.0657358129913657,
      "grad_norm": 0.5791921615600586,
      "learning_rate": 7.920375849185903e-06,
      "loss": 0.0189,
      "step": 651220
    },
    {
      "epoch": 1.065768543430019,
      "grad_norm": 0.11353205889463425,
      "learning_rate": 7.920309956972385e-06,
      "loss": 0.0212,
      "step": 651240
    },
    {
      "epoch": 1.0658012738686724,
      "grad_norm": 0.9931691288948059,
      "learning_rate": 7.920244064758868e-06,
      "loss": 0.0239,
      "step": 651260
    },
    {
      "epoch": 1.0658340043073258,
      "grad_norm": 0.461080938577652,
      "learning_rate": 7.92017817254535e-06,
      "loss": 0.0289,
      "step": 651280
    },
    {
      "epoch": 1.0658667347459791,
      "grad_norm": 2.757534980773926,
      "learning_rate": 7.920112280331834e-06,
      "loss": 0.0317,
      "step": 651300
    },
    {
      "epoch": 1.0658994651846325,
      "grad_norm": 0.9853894114494324,
      "learning_rate": 7.920046388118316e-06,
      "loss": 0.0218,
      "step": 651320
    },
    {
      "epoch": 1.0659321956232857,
      "grad_norm": 1.1915943622589111,
      "learning_rate": 7.9199804959048e-06,
      "loss": 0.0187,
      "step": 651340
    },
    {
      "epoch": 1.065964926061939,
      "grad_norm": 0.8265385031700134,
      "learning_rate": 7.919914603691283e-06,
      "loss": 0.0273,
      "step": 651360
    },
    {
      "epoch": 1.0659976565005924,
      "grad_norm": 0.8244757056236267,
      "learning_rate": 7.919848711477765e-06,
      "loss": 0.0225,
      "step": 651380
    },
    {
      "epoch": 1.0660303869392458,
      "grad_norm": 0.8411524891853333,
      "learning_rate": 7.919782819264248e-06,
      "loss": 0.0258,
      "step": 651400
    },
    {
      "epoch": 1.0660631173778992,
      "grad_norm": 0.28543204069137573,
      "learning_rate": 7.919716927050732e-06,
      "loss": 0.0243,
      "step": 651420
    },
    {
      "epoch": 1.0660958478165525,
      "grad_norm": 0.40562522411346436,
      "learning_rate": 7.919651034837214e-06,
      "loss": 0.0301,
      "step": 651440
    },
    {
      "epoch": 1.0661285782552057,
      "grad_norm": 0.3718757927417755,
      "learning_rate": 7.919585142623698e-06,
      "loss": 0.0274,
      "step": 651460
    },
    {
      "epoch": 1.066161308693859,
      "grad_norm": 0.622418224811554,
      "learning_rate": 7.91951925041018e-06,
      "loss": 0.0224,
      "step": 651480
    },
    {
      "epoch": 1.0661940391325124,
      "grad_norm": 0.6116427183151245,
      "learning_rate": 7.919453358196663e-06,
      "loss": 0.0218,
      "step": 651500
    },
    {
      "epoch": 1.0662267695711658,
      "grad_norm": 0.6900954246520996,
      "learning_rate": 7.919387465983145e-06,
      "loss": 0.0238,
      "step": 651520
    },
    {
      "epoch": 1.0662595000098192,
      "grad_norm": 0.8892566561698914,
      "learning_rate": 7.919321573769628e-06,
      "loss": 0.0368,
      "step": 651540
    },
    {
      "epoch": 1.0662922304484725,
      "grad_norm": 0.5421627759933472,
      "learning_rate": 7.919255681556112e-06,
      "loss": 0.0187,
      "step": 651560
    },
    {
      "epoch": 1.0663249608871257,
      "grad_norm": 1.2031886577606201,
      "learning_rate": 7.919189789342594e-06,
      "loss": 0.0199,
      "step": 651580
    },
    {
      "epoch": 1.066357691325779,
      "grad_norm": 0.1828671097755432,
      "learning_rate": 7.919123897129078e-06,
      "loss": 0.0236,
      "step": 651600
    },
    {
      "epoch": 1.0663904217644324,
      "grad_norm": 1.5276710987091064,
      "learning_rate": 7.91905800491556e-06,
      "loss": 0.0196,
      "step": 651620
    },
    {
      "epoch": 1.0664231522030858,
      "grad_norm": 0.8838852047920227,
      "learning_rate": 7.918992112702043e-06,
      "loss": 0.0188,
      "step": 651640
    },
    {
      "epoch": 1.0664558826417392,
      "grad_norm": 2.2631442546844482,
      "learning_rate": 7.918926220488525e-06,
      "loss": 0.021,
      "step": 651660
    },
    {
      "epoch": 1.0664886130803926,
      "grad_norm": 0.2189907431602478,
      "learning_rate": 7.918860328275008e-06,
      "loss": 0.016,
      "step": 651680
    },
    {
      "epoch": 1.066521343519046,
      "grad_norm": 0.5167951583862305,
      "learning_rate": 7.91879443606149e-06,
      "loss": 0.0201,
      "step": 651700
    },
    {
      "epoch": 1.066554073957699,
      "grad_norm": 0.9768440127372742,
      "learning_rate": 7.918728543847974e-06,
      "loss": 0.0317,
      "step": 651720
    },
    {
      "epoch": 1.0665868043963525,
      "grad_norm": 1.381028175354004,
      "learning_rate": 7.918662651634456e-06,
      "loss": 0.0419,
      "step": 651740
    },
    {
      "epoch": 1.0666195348350058,
      "grad_norm": 0.7347990870475769,
      "learning_rate": 7.91859675942094e-06,
      "loss": 0.025,
      "step": 651760
    },
    {
      "epoch": 1.0666522652736592,
      "grad_norm": 1.1213092803955078,
      "learning_rate": 7.918530867207423e-06,
      "loss": 0.0229,
      "step": 651780
    },
    {
      "epoch": 1.0666849957123126,
      "grad_norm": 0.18446172773838043,
      "learning_rate": 7.918464974993905e-06,
      "loss": 0.0264,
      "step": 651800
    },
    {
      "epoch": 1.066717726150966,
      "grad_norm": 0.2334848791360855,
      "learning_rate": 7.918399082780389e-06,
      "loss": 0.0209,
      "step": 651820
    },
    {
      "epoch": 1.0667504565896193,
      "grad_norm": 0.653003454208374,
      "learning_rate": 7.918333190566872e-06,
      "loss": 0.0176,
      "step": 651840
    },
    {
      "epoch": 1.0667831870282725,
      "grad_norm": 0.3431972861289978,
      "learning_rate": 7.918267298353354e-06,
      "loss": 0.0229,
      "step": 651860
    },
    {
      "epoch": 1.0668159174669258,
      "grad_norm": 1.152829647064209,
      "learning_rate": 7.918201406139838e-06,
      "loss": 0.025,
      "step": 651880
    },
    {
      "epoch": 1.0668486479055792,
      "grad_norm": 0.5278438329696655,
      "learning_rate": 7.918135513926321e-06,
      "loss": 0.0252,
      "step": 651900
    },
    {
      "epoch": 1.0668813783442326,
      "grad_norm": 0.36607620120048523,
      "learning_rate": 7.918069621712803e-06,
      "loss": 0.0294,
      "step": 651920
    },
    {
      "epoch": 1.066914108782886,
      "grad_norm": 0.4336429834365845,
      "learning_rate": 7.918003729499287e-06,
      "loss": 0.0353,
      "step": 651940
    },
    {
      "epoch": 1.0669468392215393,
      "grad_norm": 0.42977645993232727,
      "learning_rate": 7.917937837285769e-06,
      "loss": 0.0249,
      "step": 651960
    },
    {
      "epoch": 1.0669795696601927,
      "grad_norm": 0.21861471235752106,
      "learning_rate": 7.917871945072252e-06,
      "loss": 0.0278,
      "step": 651980
    },
    {
      "epoch": 1.0670123000988458,
      "grad_norm": 0.47614091634750366,
      "learning_rate": 7.917806052858734e-06,
      "loss": 0.021,
      "step": 652000
    },
    {
      "epoch": 1.0670450305374992,
      "grad_norm": 0.4435645341873169,
      "learning_rate": 7.917740160645218e-06,
      "loss": 0.0225,
      "step": 652020
    },
    {
      "epoch": 1.0670777609761526,
      "grad_norm": 1.5075494050979614,
      "learning_rate": 7.9176742684317e-06,
      "loss": 0.0255,
      "step": 652040
    },
    {
      "epoch": 1.067110491414806,
      "grad_norm": 0.3943704664707184,
      "learning_rate": 7.917608376218183e-06,
      "loss": 0.0222,
      "step": 652060
    },
    {
      "epoch": 1.0671432218534593,
      "grad_norm": 0.3843073844909668,
      "learning_rate": 7.917542484004665e-06,
      "loss": 0.0173,
      "step": 652080
    },
    {
      "epoch": 1.0671759522921127,
      "grad_norm": 0.7588334679603577,
      "learning_rate": 7.917476591791149e-06,
      "loss": 0.0185,
      "step": 652100
    },
    {
      "epoch": 1.0672086827307659,
      "grad_norm": 1.3608759641647339,
      "learning_rate": 7.91741069957763e-06,
      "loss": 0.0282,
      "step": 652120
    },
    {
      "epoch": 1.0672414131694192,
      "grad_norm": 1.2739899158477783,
      "learning_rate": 7.917344807364114e-06,
      "loss": 0.0184,
      "step": 652140
    },
    {
      "epoch": 1.0672741436080726,
      "grad_norm": 0.7751599550247192,
      "learning_rate": 7.917278915150598e-06,
      "loss": 0.0219,
      "step": 652160
    },
    {
      "epoch": 1.067306874046726,
      "grad_norm": 0.5812476873397827,
      "learning_rate": 7.91721302293708e-06,
      "loss": 0.0272,
      "step": 652180
    },
    {
      "epoch": 1.0673396044853793,
      "grad_norm": 0.8137497305870056,
      "learning_rate": 7.917147130723563e-06,
      "loss": 0.0206,
      "step": 652200
    },
    {
      "epoch": 1.0673723349240327,
      "grad_norm": 0.7682297825813293,
      "learning_rate": 7.917081238510047e-06,
      "loss": 0.0151,
      "step": 652220
    },
    {
      "epoch": 1.067405065362686,
      "grad_norm": 1.5992735624313354,
      "learning_rate": 7.917015346296529e-06,
      "loss": 0.0237,
      "step": 652240
    },
    {
      "epoch": 1.0674377958013392,
      "grad_norm": 1.828796148300171,
      "learning_rate": 7.916949454083012e-06,
      "loss": 0.0225,
      "step": 652260
    },
    {
      "epoch": 1.0674705262399926,
      "grad_norm": 0.18958155810832977,
      "learning_rate": 7.916883561869496e-06,
      "loss": 0.029,
      "step": 652280
    },
    {
      "epoch": 1.067503256678646,
      "grad_norm": 0.2658369839191437,
      "learning_rate": 7.916817669655978e-06,
      "loss": 0.0175,
      "step": 652300
    },
    {
      "epoch": 1.0675359871172994,
      "grad_norm": 1.3376243114471436,
      "learning_rate": 7.916751777442461e-06,
      "loss": 0.0218,
      "step": 652320
    },
    {
      "epoch": 1.0675687175559527,
      "grad_norm": 0.9000442624092102,
      "learning_rate": 7.916685885228943e-06,
      "loss": 0.0241,
      "step": 652340
    },
    {
      "epoch": 1.067601447994606,
      "grad_norm": 0.42115548253059387,
      "learning_rate": 7.916619993015427e-06,
      "loss": 0.0314,
      "step": 652360
    },
    {
      "epoch": 1.0676341784332593,
      "grad_norm": 0.6164881587028503,
      "learning_rate": 7.916554100801909e-06,
      "loss": 0.0236,
      "step": 652380
    },
    {
      "epoch": 1.0676669088719126,
      "grad_norm": 1.0159019231796265,
      "learning_rate": 7.916488208588392e-06,
      "loss": 0.0259,
      "step": 652400
    },
    {
      "epoch": 1.067699639310566,
      "grad_norm": 1.259569525718689,
      "learning_rate": 7.916422316374874e-06,
      "loss": 0.0308,
      "step": 652420
    },
    {
      "epoch": 1.0677323697492194,
      "grad_norm": 0.8942140340805054,
      "learning_rate": 7.916356424161358e-06,
      "loss": 0.0189,
      "step": 652440
    },
    {
      "epoch": 1.0677651001878727,
      "grad_norm": 0.20665518939495087,
      "learning_rate": 7.91629053194784e-06,
      "loss": 0.0264,
      "step": 652460
    },
    {
      "epoch": 1.0677978306265261,
      "grad_norm": 0.5067479610443115,
      "learning_rate": 7.916224639734323e-06,
      "loss": 0.0349,
      "step": 652480
    },
    {
      "epoch": 1.0678305610651795,
      "grad_norm": 0.6390218138694763,
      "learning_rate": 7.916158747520805e-06,
      "loss": 0.0313,
      "step": 652500
    },
    {
      "epoch": 1.0678632915038326,
      "grad_norm": 0.3276021182537079,
      "learning_rate": 7.916092855307289e-06,
      "loss": 0.0311,
      "step": 652520
    },
    {
      "epoch": 1.067896021942486,
      "grad_norm": 0.21537302434444427,
      "learning_rate": 7.91602696309377e-06,
      "loss": 0.019,
      "step": 652540
    },
    {
      "epoch": 1.0679287523811394,
      "grad_norm": 2.4991567134857178,
      "learning_rate": 7.915961070880254e-06,
      "loss": 0.0195,
      "step": 652560
    },
    {
      "epoch": 1.0679614828197928,
      "grad_norm": 1.276335597038269,
      "learning_rate": 7.915895178666738e-06,
      "loss": 0.0204,
      "step": 652580
    },
    {
      "epoch": 1.0679942132584461,
      "grad_norm": 1.5928975343704224,
      "learning_rate": 7.915829286453221e-06,
      "loss": 0.0277,
      "step": 652600
    },
    {
      "epoch": 1.0680269436970995,
      "grad_norm": 2.1315510272979736,
      "learning_rate": 7.915763394239703e-06,
      "loss": 0.0257,
      "step": 652620
    },
    {
      "epoch": 1.0680596741357529,
      "grad_norm": 0.2604296803474426,
      "learning_rate": 7.915697502026187e-06,
      "loss": 0.0218,
      "step": 652640
    },
    {
      "epoch": 1.068092404574406,
      "grad_norm": 1.7172963619232178,
      "learning_rate": 7.91563160981267e-06,
      "loss": 0.0264,
      "step": 652660
    },
    {
      "epoch": 1.0681251350130594,
      "grad_norm": 0.3754274547100067,
      "learning_rate": 7.915565717599152e-06,
      "loss": 0.0226,
      "step": 652680
    },
    {
      "epoch": 1.0681578654517128,
      "grad_norm": 0.6757619976997375,
      "learning_rate": 7.915499825385636e-06,
      "loss": 0.0232,
      "step": 652700
    },
    {
      "epoch": 1.0681905958903661,
      "grad_norm": 1.3869343996047974,
      "learning_rate": 7.915433933172118e-06,
      "loss": 0.0343,
      "step": 652720
    },
    {
      "epoch": 1.0682233263290195,
      "grad_norm": 0.49100667238235474,
      "learning_rate": 7.915368040958601e-06,
      "loss": 0.027,
      "step": 652740
    },
    {
      "epoch": 1.0682560567676729,
      "grad_norm": 0.4787665903568268,
      "learning_rate": 7.915302148745083e-06,
      "loss": 0.0309,
      "step": 652760
    },
    {
      "epoch": 1.0682887872063263,
      "grad_norm": 0.3996010720729828,
      "learning_rate": 7.915236256531567e-06,
      "loss": 0.0283,
      "step": 652780
    },
    {
      "epoch": 1.0683215176449794,
      "grad_norm": 1.0515022277832031,
      "learning_rate": 7.915170364318049e-06,
      "loss": 0.022,
      "step": 652800
    },
    {
      "epoch": 1.0683542480836328,
      "grad_norm": 1.7341073751449585,
      "learning_rate": 7.915104472104532e-06,
      "loss": 0.018,
      "step": 652820
    },
    {
      "epoch": 1.0683869785222861,
      "grad_norm": 0.9163087606430054,
      "learning_rate": 7.915038579891014e-06,
      "loss": 0.0146,
      "step": 652840
    },
    {
      "epoch": 1.0684197089609395,
      "grad_norm": 0.4611717760562897,
      "learning_rate": 7.914972687677498e-06,
      "loss": 0.0222,
      "step": 652860
    },
    {
      "epoch": 1.068452439399593,
      "grad_norm": 0.35890862345695496,
      "learning_rate": 7.91490679546398e-06,
      "loss": 0.018,
      "step": 652880
    },
    {
      "epoch": 1.0684851698382463,
      "grad_norm": 1.52415132522583,
      "learning_rate": 7.914840903250463e-06,
      "loss": 0.0242,
      "step": 652900
    },
    {
      "epoch": 1.0685179002768994,
      "grad_norm": 0.10125725716352463,
      "learning_rate": 7.914775011036945e-06,
      "loss": 0.0228,
      "step": 652920
    },
    {
      "epoch": 1.0685506307155528,
      "grad_norm": 2.142951250076294,
      "learning_rate": 7.914709118823429e-06,
      "loss": 0.0224,
      "step": 652940
    },
    {
      "epoch": 1.0685833611542062,
      "grad_norm": 0.5706617832183838,
      "learning_rate": 7.914643226609912e-06,
      "loss": 0.0264,
      "step": 652960
    },
    {
      "epoch": 1.0686160915928595,
      "grad_norm": 0.14030766487121582,
      "learning_rate": 7.914577334396394e-06,
      "loss": 0.0193,
      "step": 652980
    },
    {
      "epoch": 1.068648822031513,
      "grad_norm": 1.3673163652420044,
      "learning_rate": 7.914511442182878e-06,
      "loss": 0.0332,
      "step": 653000
    },
    {
      "epoch": 1.0686815524701663,
      "grad_norm": 2.57454514503479,
      "learning_rate": 7.914445549969361e-06,
      "loss": 0.0218,
      "step": 653020
    },
    {
      "epoch": 1.0687142829088196,
      "grad_norm": 1.2377111911773682,
      "learning_rate": 7.914379657755843e-06,
      "loss": 0.0283,
      "step": 653040
    },
    {
      "epoch": 1.0687470133474728,
      "grad_norm": 0.5450302958488464,
      "learning_rate": 7.914313765542327e-06,
      "loss": 0.0231,
      "step": 653060
    },
    {
      "epoch": 1.0687797437861262,
      "grad_norm": 3.342230796813965,
      "learning_rate": 7.91424787332881e-06,
      "loss": 0.0232,
      "step": 653080
    },
    {
      "epoch": 1.0688124742247795,
      "grad_norm": 1.278822660446167,
      "learning_rate": 7.914181981115292e-06,
      "loss": 0.0283,
      "step": 653100
    },
    {
      "epoch": 1.068845204663433,
      "grad_norm": 0.4036928415298462,
      "learning_rate": 7.914116088901776e-06,
      "loss": 0.0246,
      "step": 653120
    },
    {
      "epoch": 1.0688779351020863,
      "grad_norm": 0.49221083521842957,
      "learning_rate": 7.914050196688258e-06,
      "loss": 0.0238,
      "step": 653140
    },
    {
      "epoch": 1.0689106655407397,
      "grad_norm": 1.0062997341156006,
      "learning_rate": 7.913984304474741e-06,
      "loss": 0.0204,
      "step": 653160
    },
    {
      "epoch": 1.0689433959793928,
      "grad_norm": 0.4224241375923157,
      "learning_rate": 7.913918412261223e-06,
      "loss": 0.0284,
      "step": 653180
    },
    {
      "epoch": 1.0689761264180462,
      "grad_norm": 1.7639433145523071,
      "learning_rate": 7.913852520047707e-06,
      "loss": 0.0246,
      "step": 653200
    },
    {
      "epoch": 1.0690088568566996,
      "grad_norm": 0.2199956476688385,
      "learning_rate": 7.913786627834189e-06,
      "loss": 0.0235,
      "step": 653220
    },
    {
      "epoch": 1.069041587295353,
      "grad_norm": 0.6559032797813416,
      "learning_rate": 7.913720735620672e-06,
      "loss": 0.0213,
      "step": 653240
    },
    {
      "epoch": 1.0690743177340063,
      "grad_norm": 1.0081731081008911,
      "learning_rate": 7.913654843407154e-06,
      "loss": 0.0227,
      "step": 653260
    },
    {
      "epoch": 1.0691070481726597,
      "grad_norm": 0.6316351294517517,
      "learning_rate": 7.913588951193638e-06,
      "loss": 0.022,
      "step": 653280
    },
    {
      "epoch": 1.069139778611313,
      "grad_norm": 0.7319139838218689,
      "learning_rate": 7.913523058980121e-06,
      "loss": 0.0251,
      "step": 653300
    },
    {
      "epoch": 1.0691725090499662,
      "grad_norm": 0.366789311170578,
      "learning_rate": 7.913457166766603e-06,
      "loss": 0.0232,
      "step": 653320
    },
    {
      "epoch": 1.0692052394886196,
      "grad_norm": 0.339852511882782,
      "learning_rate": 7.913391274553087e-06,
      "loss": 0.0217,
      "step": 653340
    },
    {
      "epoch": 1.069237969927273,
      "grad_norm": 1.635534644126892,
      "learning_rate": 7.913325382339569e-06,
      "loss": 0.0259,
      "step": 653360
    },
    {
      "epoch": 1.0692707003659263,
      "grad_norm": 1.4327032566070557,
      "learning_rate": 7.913259490126052e-06,
      "loss": 0.0291,
      "step": 653380
    },
    {
      "epoch": 1.0693034308045797,
      "grad_norm": 0.3998834192752838,
      "learning_rate": 7.913193597912536e-06,
      "loss": 0.0265,
      "step": 653400
    },
    {
      "epoch": 1.069336161243233,
      "grad_norm": 0.5574631690979004,
      "learning_rate": 7.913127705699018e-06,
      "loss": 0.0187,
      "step": 653420
    },
    {
      "epoch": 1.0693688916818864,
      "grad_norm": 0.13189969956874847,
      "learning_rate": 7.913061813485501e-06,
      "loss": 0.0217,
      "step": 653440
    },
    {
      "epoch": 1.0694016221205396,
      "grad_norm": 1.3058865070343018,
      "learning_rate": 7.912995921271985e-06,
      "loss": 0.0176,
      "step": 653460
    },
    {
      "epoch": 1.069434352559193,
      "grad_norm": 0.8263798952102661,
      "learning_rate": 7.912930029058467e-06,
      "loss": 0.0267,
      "step": 653480
    },
    {
      "epoch": 1.0694670829978463,
      "grad_norm": 0.8542945384979248,
      "learning_rate": 7.91286413684495e-06,
      "loss": 0.021,
      "step": 653500
    },
    {
      "epoch": 1.0694998134364997,
      "grad_norm": 1.242599606513977,
      "learning_rate": 7.912798244631432e-06,
      "loss": 0.0308,
      "step": 653520
    },
    {
      "epoch": 1.069532543875153,
      "grad_norm": 0.7727818489074707,
      "learning_rate": 7.912732352417916e-06,
      "loss": 0.0216,
      "step": 653540
    },
    {
      "epoch": 1.0695652743138064,
      "grad_norm": 0.12509594857692719,
      "learning_rate": 7.912666460204398e-06,
      "loss": 0.0235,
      "step": 653560
    },
    {
      "epoch": 1.0695980047524598,
      "grad_norm": 1.2240601778030396,
      "learning_rate": 7.912600567990881e-06,
      "loss": 0.0229,
      "step": 653580
    },
    {
      "epoch": 1.069630735191113,
      "grad_norm": 0.46188849210739136,
      "learning_rate": 7.912534675777363e-06,
      "loss": 0.0276,
      "step": 653600
    },
    {
      "epoch": 1.0696634656297663,
      "grad_norm": 0.8272823095321655,
      "learning_rate": 7.912468783563847e-06,
      "loss": 0.0229,
      "step": 653620
    },
    {
      "epoch": 1.0696961960684197,
      "grad_norm": 0.47614729404449463,
      "learning_rate": 7.912402891350329e-06,
      "loss": 0.0233,
      "step": 653640
    },
    {
      "epoch": 1.069728926507073,
      "grad_norm": 0.9831909537315369,
      "learning_rate": 7.912336999136812e-06,
      "loss": 0.0183,
      "step": 653660
    },
    {
      "epoch": 1.0697616569457264,
      "grad_norm": 1.444334626197815,
      "learning_rate": 7.912271106923296e-06,
      "loss": 0.0219,
      "step": 653680
    },
    {
      "epoch": 1.0697943873843798,
      "grad_norm": 0.6995537877082825,
      "learning_rate": 7.912205214709778e-06,
      "loss": 0.0285,
      "step": 653700
    },
    {
      "epoch": 1.069827117823033,
      "grad_norm": 1.7253679037094116,
      "learning_rate": 7.912139322496262e-06,
      "loss": 0.0261,
      "step": 653720
    },
    {
      "epoch": 1.0698598482616863,
      "grad_norm": 1.571761965751648,
      "learning_rate": 7.912073430282743e-06,
      "loss": 0.025,
      "step": 653740
    },
    {
      "epoch": 1.0698925787003397,
      "grad_norm": 0.5980275869369507,
      "learning_rate": 7.912007538069227e-06,
      "loss": 0.0209,
      "step": 653760
    },
    {
      "epoch": 1.069925309138993,
      "grad_norm": 1.4121366739273071,
      "learning_rate": 7.911941645855709e-06,
      "loss": 0.0277,
      "step": 653780
    },
    {
      "epoch": 1.0699580395776465,
      "grad_norm": 2.1116604804992676,
      "learning_rate": 7.911875753642192e-06,
      "loss": 0.0372,
      "step": 653800
    },
    {
      "epoch": 1.0699907700162998,
      "grad_norm": 0.5199604034423828,
      "learning_rate": 7.911809861428676e-06,
      "loss": 0.0241,
      "step": 653820
    },
    {
      "epoch": 1.070023500454953,
      "grad_norm": 0.6751585006713867,
      "learning_rate": 7.911743969215158e-06,
      "loss": 0.0201,
      "step": 653840
    },
    {
      "epoch": 1.0700562308936064,
      "grad_norm": 0.24147436022758484,
      "learning_rate": 7.911678077001642e-06,
      "loss": 0.0222,
      "step": 653860
    },
    {
      "epoch": 1.0700889613322597,
      "grad_norm": 0.8897836804389954,
      "learning_rate": 7.911612184788125e-06,
      "loss": 0.0149,
      "step": 653880
    },
    {
      "epoch": 1.070121691770913,
      "grad_norm": 0.6704609990119934,
      "learning_rate": 7.911546292574607e-06,
      "loss": 0.0272,
      "step": 653900
    },
    {
      "epoch": 1.0701544222095665,
      "grad_norm": 0.23374079167842865,
      "learning_rate": 7.91148040036109e-06,
      "loss": 0.0148,
      "step": 653920
    },
    {
      "epoch": 1.0701871526482198,
      "grad_norm": 0.8880358338356018,
      "learning_rate": 7.911414508147572e-06,
      "loss": 0.0343,
      "step": 653940
    },
    {
      "epoch": 1.0702198830868732,
      "grad_norm": 0.5896901488304138,
      "learning_rate": 7.911348615934056e-06,
      "loss": 0.0202,
      "step": 653960
    },
    {
      "epoch": 1.0702526135255264,
      "grad_norm": 0.4167066514492035,
      "learning_rate": 7.911282723720538e-06,
      "loss": 0.022,
      "step": 653980
    },
    {
      "epoch": 1.0702853439641797,
      "grad_norm": 2.3401496410369873,
      "learning_rate": 7.911216831507022e-06,
      "loss": 0.0301,
      "step": 654000
    },
    {
      "epoch": 1.070318074402833,
      "grad_norm": 0.6833425760269165,
      "learning_rate": 7.911150939293505e-06,
      "loss": 0.0324,
      "step": 654020
    },
    {
      "epoch": 1.0703508048414865,
      "grad_norm": 0.9178898334503174,
      "learning_rate": 7.911085047079987e-06,
      "loss": 0.0222,
      "step": 654040
    },
    {
      "epoch": 1.0703835352801399,
      "grad_norm": 0.4491638243198395,
      "learning_rate": 7.91101915486647e-06,
      "loss": 0.0194,
      "step": 654060
    },
    {
      "epoch": 1.0704162657187932,
      "grad_norm": 0.5415781736373901,
      "learning_rate": 7.910953262652953e-06,
      "loss": 0.0243,
      "step": 654080
    },
    {
      "epoch": 1.0704489961574466,
      "grad_norm": 0.4020959138870239,
      "learning_rate": 7.910887370439436e-06,
      "loss": 0.0151,
      "step": 654100
    },
    {
      "epoch": 1.0704817265960997,
      "grad_norm": 2.1412575244903564,
      "learning_rate": 7.910821478225918e-06,
      "loss": 0.0326,
      "step": 654120
    },
    {
      "epoch": 1.0705144570347531,
      "grad_norm": 0.3211022913455963,
      "learning_rate": 7.910755586012402e-06,
      "loss": 0.0245,
      "step": 654140
    },
    {
      "epoch": 1.0705471874734065,
      "grad_norm": 1.1191056966781616,
      "learning_rate": 7.910689693798883e-06,
      "loss": 0.0286,
      "step": 654160
    },
    {
      "epoch": 1.0705799179120599,
      "grad_norm": 0.46346479654312134,
      "learning_rate": 7.910623801585367e-06,
      "loss": 0.0222,
      "step": 654180
    },
    {
      "epoch": 1.0706126483507132,
      "grad_norm": 1.0764259099960327,
      "learning_rate": 7.91055790937185e-06,
      "loss": 0.018,
      "step": 654200
    },
    {
      "epoch": 1.0706453787893666,
      "grad_norm": 0.24502764642238617,
      "learning_rate": 7.910492017158333e-06,
      "loss": 0.0265,
      "step": 654220
    },
    {
      "epoch": 1.07067810922802,
      "grad_norm": 0.9717719554901123,
      "learning_rate": 7.910426124944816e-06,
      "loss": 0.0196,
      "step": 654240
    },
    {
      "epoch": 1.0707108396666731,
      "grad_norm": 0.36736929416656494,
      "learning_rate": 7.9103602327313e-06,
      "loss": 0.0267,
      "step": 654260
    },
    {
      "epoch": 1.0707435701053265,
      "grad_norm": 0.7707251906394958,
      "learning_rate": 7.910294340517782e-06,
      "loss": 0.0249,
      "step": 654280
    },
    {
      "epoch": 1.0707763005439799,
      "grad_norm": 0.8391420841217041,
      "learning_rate": 7.910228448304265e-06,
      "loss": 0.0237,
      "step": 654300
    },
    {
      "epoch": 1.0708090309826332,
      "grad_norm": 1.1629465818405151,
      "learning_rate": 7.910162556090747e-06,
      "loss": 0.0192,
      "step": 654320
    },
    {
      "epoch": 1.0708417614212866,
      "grad_norm": 1.9215834140777588,
      "learning_rate": 7.91009666387723e-06,
      "loss": 0.0197,
      "step": 654340
    },
    {
      "epoch": 1.07087449185994,
      "grad_norm": 0.1742231547832489,
      "learning_rate": 7.910030771663714e-06,
      "loss": 0.0159,
      "step": 654360
    },
    {
      "epoch": 1.0709072222985931,
      "grad_norm": 0.4553651809692383,
      "learning_rate": 7.909964879450196e-06,
      "loss": 0.0212,
      "step": 654380
    },
    {
      "epoch": 1.0709399527372465,
      "grad_norm": 0.14682450890541077,
      "learning_rate": 7.90989898723668e-06,
      "loss": 0.0427,
      "step": 654400
    },
    {
      "epoch": 1.0709726831758999,
      "grad_norm": 0.8718203902244568,
      "learning_rate": 7.909833095023162e-06,
      "loss": 0.0215,
      "step": 654420
    },
    {
      "epoch": 1.0710054136145533,
      "grad_norm": 0.4418359100818634,
      "learning_rate": 7.909767202809645e-06,
      "loss": 0.0234,
      "step": 654440
    },
    {
      "epoch": 1.0710381440532066,
      "grad_norm": 0.45565876364707947,
      "learning_rate": 7.909701310596127e-06,
      "loss": 0.0231,
      "step": 654460
    },
    {
      "epoch": 1.07107087449186,
      "grad_norm": 6.2334089279174805,
      "learning_rate": 7.90963541838261e-06,
      "loss": 0.0223,
      "step": 654480
    },
    {
      "epoch": 1.0711036049305134,
      "grad_norm": 1.0860133171081543,
      "learning_rate": 7.909569526169093e-06,
      "loss": 0.0226,
      "step": 654500
    },
    {
      "epoch": 1.0711363353691665,
      "grad_norm": 0.2815987765789032,
      "learning_rate": 7.909503633955576e-06,
      "loss": 0.0164,
      "step": 654520
    },
    {
      "epoch": 1.07116906580782,
      "grad_norm": 0.3784709572792053,
      "learning_rate": 7.909437741742058e-06,
      "loss": 0.0237,
      "step": 654540
    },
    {
      "epoch": 1.0712017962464733,
      "grad_norm": 0.24487581849098206,
      "learning_rate": 7.909371849528542e-06,
      "loss": 0.0172,
      "step": 654560
    },
    {
      "epoch": 1.0712345266851266,
      "grad_norm": 1.1805453300476074,
      "learning_rate": 7.909305957315024e-06,
      "loss": 0.0265,
      "step": 654580
    },
    {
      "epoch": 1.07126725712378,
      "grad_norm": 0.2526625990867615,
      "learning_rate": 7.909240065101507e-06,
      "loss": 0.0223,
      "step": 654600
    },
    {
      "epoch": 1.0712999875624334,
      "grad_norm": 0.4259144365787506,
      "learning_rate": 7.90917417288799e-06,
      "loss": 0.0256,
      "step": 654620
    },
    {
      "epoch": 1.0713327180010865,
      "grad_norm": 0.2796272933483124,
      "learning_rate": 7.909108280674473e-06,
      "loss": 0.0228,
      "step": 654640
    },
    {
      "epoch": 1.07136544843974,
      "grad_norm": 0.6134371757507324,
      "learning_rate": 7.909042388460956e-06,
      "loss": 0.0238,
      "step": 654660
    },
    {
      "epoch": 1.0713981788783933,
      "grad_norm": 0.5226145386695862,
      "learning_rate": 7.90897649624744e-06,
      "loss": 0.0255,
      "step": 654680
    },
    {
      "epoch": 1.0714309093170467,
      "grad_norm": 0.7601592540740967,
      "learning_rate": 7.908910604033922e-06,
      "loss": 0.0209,
      "step": 654700
    },
    {
      "epoch": 1.0714636397557,
      "grad_norm": 0.6710999011993408,
      "learning_rate": 7.908844711820405e-06,
      "loss": 0.0245,
      "step": 654720
    },
    {
      "epoch": 1.0714963701943534,
      "grad_norm": 0.7243115305900574,
      "learning_rate": 7.908778819606889e-06,
      "loss": 0.0215,
      "step": 654740
    },
    {
      "epoch": 1.0715291006330068,
      "grad_norm": 1.9575988054275513,
      "learning_rate": 7.90871292739337e-06,
      "loss": 0.0222,
      "step": 654760
    },
    {
      "epoch": 1.07156183107166,
      "grad_norm": 0.2786235809326172,
      "learning_rate": 7.908647035179854e-06,
      "loss": 0.018,
      "step": 654780
    },
    {
      "epoch": 1.0715945615103133,
      "grad_norm": 0.7286313772201538,
      "learning_rate": 7.908581142966336e-06,
      "loss": 0.0153,
      "step": 654800
    },
    {
      "epoch": 1.0716272919489667,
      "grad_norm": 0.6081525683403015,
      "learning_rate": 7.90851525075282e-06,
      "loss": 0.0202,
      "step": 654820
    },
    {
      "epoch": 1.07166002238762,
      "grad_norm": 0.9766256809234619,
      "learning_rate": 7.908449358539302e-06,
      "loss": 0.0183,
      "step": 654840
    },
    {
      "epoch": 1.0716927528262734,
      "grad_norm": 1.3598909378051758,
      "learning_rate": 7.908383466325785e-06,
      "loss": 0.0284,
      "step": 654860
    },
    {
      "epoch": 1.0717254832649268,
      "grad_norm": 0.2131877988576889,
      "learning_rate": 7.908317574112267e-06,
      "loss": 0.0256,
      "step": 654880
    },
    {
      "epoch": 1.0717582137035802,
      "grad_norm": 0.6278018355369568,
      "learning_rate": 7.90825168189875e-06,
      "loss": 0.0207,
      "step": 654900
    },
    {
      "epoch": 1.0717909441422333,
      "grad_norm": 0.5252301692962646,
      "learning_rate": 7.908185789685233e-06,
      "loss": 0.0292,
      "step": 654920
    },
    {
      "epoch": 1.0718236745808867,
      "grad_norm": 0.5279003977775574,
      "learning_rate": 7.908119897471716e-06,
      "loss": 0.0199,
      "step": 654940
    },
    {
      "epoch": 1.07185640501954,
      "grad_norm": 0.16740496456623077,
      "learning_rate": 7.908054005258198e-06,
      "loss": 0.02,
      "step": 654960
    },
    {
      "epoch": 1.0718891354581934,
      "grad_norm": 1.4223284721374512,
      "learning_rate": 7.907988113044682e-06,
      "loss": 0.0238,
      "step": 654980
    },
    {
      "epoch": 1.0719218658968468,
      "grad_norm": 1.5616809129714966,
      "learning_rate": 7.907922220831165e-06,
      "loss": 0.0181,
      "step": 655000
    },
    {
      "epoch": 1.0719545963355002,
      "grad_norm": 0.7291759252548218,
      "learning_rate": 7.907856328617647e-06,
      "loss": 0.0269,
      "step": 655020
    },
    {
      "epoch": 1.0719873267741535,
      "grad_norm": 0.8316710591316223,
      "learning_rate": 7.90779043640413e-06,
      "loss": 0.0336,
      "step": 655040
    },
    {
      "epoch": 1.0720200572128067,
      "grad_norm": 3.485344886779785,
      "learning_rate": 7.907724544190614e-06,
      "loss": 0.0231,
      "step": 655060
    },
    {
      "epoch": 1.07205278765146,
      "grad_norm": 1.024084210395813,
      "learning_rate": 7.907658651977096e-06,
      "loss": 0.0262,
      "step": 655080
    },
    {
      "epoch": 1.0720855180901134,
      "grad_norm": 1.7798738479614258,
      "learning_rate": 7.90759275976358e-06,
      "loss": 0.022,
      "step": 655100
    },
    {
      "epoch": 1.0721182485287668,
      "grad_norm": 0.5314630270004272,
      "learning_rate": 7.907526867550063e-06,
      "loss": 0.0191,
      "step": 655120
    },
    {
      "epoch": 1.0721509789674202,
      "grad_norm": 1.9089579582214355,
      "learning_rate": 7.907460975336545e-06,
      "loss": 0.0303,
      "step": 655140
    },
    {
      "epoch": 1.0721837094060735,
      "grad_norm": 4.4597954750061035,
      "learning_rate": 7.907395083123029e-06,
      "loss": 0.0216,
      "step": 655160
    },
    {
      "epoch": 1.0722164398447267,
      "grad_norm": 1.1317518949508667,
      "learning_rate": 7.90732919090951e-06,
      "loss": 0.0193,
      "step": 655180
    },
    {
      "epoch": 1.07224917028338,
      "grad_norm": 1.3686763048171997,
      "learning_rate": 7.907263298695994e-06,
      "loss": 0.0192,
      "step": 655200
    },
    {
      "epoch": 1.0722819007220334,
      "grad_norm": 1.4500805139541626,
      "learning_rate": 7.907197406482476e-06,
      "loss": 0.0276,
      "step": 655220
    },
    {
      "epoch": 1.0723146311606868,
      "grad_norm": 0.7958391904830933,
      "learning_rate": 7.90713151426896e-06,
      "loss": 0.0177,
      "step": 655240
    },
    {
      "epoch": 1.0723473615993402,
      "grad_norm": 0.38764122128486633,
      "learning_rate": 7.907065622055442e-06,
      "loss": 0.0206,
      "step": 655260
    },
    {
      "epoch": 1.0723800920379936,
      "grad_norm": 0.30848875641822815,
      "learning_rate": 7.906999729841925e-06,
      "loss": 0.0257,
      "step": 655280
    },
    {
      "epoch": 1.072412822476647,
      "grad_norm": 0.25323206186294556,
      "learning_rate": 7.906933837628407e-06,
      "loss": 0.0246,
      "step": 655300
    },
    {
      "epoch": 1.0724455529153,
      "grad_norm": 0.2609350085258484,
      "learning_rate": 7.906867945414891e-06,
      "loss": 0.0282,
      "step": 655320
    },
    {
      "epoch": 1.0724782833539535,
      "grad_norm": 0.8366500735282898,
      "learning_rate": 7.906802053201373e-06,
      "loss": 0.0319,
      "step": 655340
    },
    {
      "epoch": 1.0725110137926068,
      "grad_norm": 0.6989918351173401,
      "learning_rate": 7.906736160987856e-06,
      "loss": 0.0261,
      "step": 655360
    },
    {
      "epoch": 1.0725437442312602,
      "grad_norm": 0.8929657936096191,
      "learning_rate": 7.90667026877434e-06,
      "loss": 0.0201,
      "step": 655380
    },
    {
      "epoch": 1.0725764746699136,
      "grad_norm": 0.6396389603614807,
      "learning_rate": 7.906604376560822e-06,
      "loss": 0.0211,
      "step": 655400
    },
    {
      "epoch": 1.072609205108567,
      "grad_norm": 1.136405348777771,
      "learning_rate": 7.906538484347305e-06,
      "loss": 0.0266,
      "step": 655420
    },
    {
      "epoch": 1.07264193554722,
      "grad_norm": 0.7956412434577942,
      "learning_rate": 7.906472592133789e-06,
      "loss": 0.0234,
      "step": 655440
    },
    {
      "epoch": 1.0726746659858735,
      "grad_norm": 1.982763648033142,
      "learning_rate": 7.906406699920271e-06,
      "loss": 0.0271,
      "step": 655460
    },
    {
      "epoch": 1.0727073964245268,
      "grad_norm": 0.5922468304634094,
      "learning_rate": 7.906340807706754e-06,
      "loss": 0.0257,
      "step": 655480
    },
    {
      "epoch": 1.0727401268631802,
      "grad_norm": 0.20262213051319122,
      "learning_rate": 7.906274915493238e-06,
      "loss": 0.0276,
      "step": 655500
    },
    {
      "epoch": 1.0727728573018336,
      "grad_norm": 1.2593414783477783,
      "learning_rate": 7.90620902327972e-06,
      "loss": 0.0195,
      "step": 655520
    },
    {
      "epoch": 1.072805587740487,
      "grad_norm": 3.1642343997955322,
      "learning_rate": 7.906143131066204e-06,
      "loss": 0.0175,
      "step": 655540
    },
    {
      "epoch": 1.0728383181791403,
      "grad_norm": 0.9858105182647705,
      "learning_rate": 7.906077238852685e-06,
      "loss": 0.0239,
      "step": 655560
    },
    {
      "epoch": 1.0728710486177935,
      "grad_norm": 0.40354904532432556,
      "learning_rate": 7.906011346639169e-06,
      "loss": 0.0225,
      "step": 655580
    },
    {
      "epoch": 1.0729037790564468,
      "grad_norm": 1.2893775701522827,
      "learning_rate": 7.905945454425651e-06,
      "loss": 0.0221,
      "step": 655600
    },
    {
      "epoch": 1.0729365094951002,
      "grad_norm": 1.7083568572998047,
      "learning_rate": 7.905879562212134e-06,
      "loss": 0.0269,
      "step": 655620
    },
    {
      "epoch": 1.0729692399337536,
      "grad_norm": 0.7418255805969238,
      "learning_rate": 7.905813669998616e-06,
      "loss": 0.0198,
      "step": 655640
    },
    {
      "epoch": 1.073001970372407,
      "grad_norm": 0.6197325587272644,
      "learning_rate": 7.9057477777851e-06,
      "loss": 0.039,
      "step": 655660
    },
    {
      "epoch": 1.0730347008110603,
      "grad_norm": 0.5539736151695251,
      "learning_rate": 7.905681885571582e-06,
      "loss": 0.0234,
      "step": 655680
    },
    {
      "epoch": 1.0730674312497137,
      "grad_norm": 0.2871845066547394,
      "learning_rate": 7.905615993358065e-06,
      "loss": 0.0245,
      "step": 655700
    },
    {
      "epoch": 1.0731001616883669,
      "grad_norm": 0.1768256425857544,
      "learning_rate": 7.905550101144547e-06,
      "loss": 0.0267,
      "step": 655720
    },
    {
      "epoch": 1.0731328921270202,
      "grad_norm": 0.32824018597602844,
      "learning_rate": 7.905484208931031e-06,
      "loss": 0.0287,
      "step": 655740
    },
    {
      "epoch": 1.0731656225656736,
      "grad_norm": 0.506479024887085,
      "learning_rate": 7.905418316717513e-06,
      "loss": 0.0254,
      "step": 655760
    },
    {
      "epoch": 1.073198353004327,
      "grad_norm": 4.869390964508057,
      "learning_rate": 7.905352424503996e-06,
      "loss": 0.0153,
      "step": 655780
    },
    {
      "epoch": 1.0732310834429803,
      "grad_norm": 0.9298227429389954,
      "learning_rate": 7.90528653229048e-06,
      "loss": 0.0183,
      "step": 655800
    },
    {
      "epoch": 1.0732638138816337,
      "grad_norm": 0.5341383218765259,
      "learning_rate": 7.905220640076962e-06,
      "loss": 0.0181,
      "step": 655820
    },
    {
      "epoch": 1.073296544320287,
      "grad_norm": 0.996220588684082,
      "learning_rate": 7.905154747863445e-06,
      "loss": 0.0252,
      "step": 655840
    },
    {
      "epoch": 1.0733292747589402,
      "grad_norm": 0.35367777943611145,
      "learning_rate": 7.905088855649929e-06,
      "loss": 0.0227,
      "step": 655860
    },
    {
      "epoch": 1.0733620051975936,
      "grad_norm": 0.5970979332923889,
      "learning_rate": 7.905022963436411e-06,
      "loss": 0.0273,
      "step": 655880
    },
    {
      "epoch": 1.073394735636247,
      "grad_norm": 0.9479021430015564,
      "learning_rate": 7.904957071222895e-06,
      "loss": 0.0223,
      "step": 655900
    },
    {
      "epoch": 1.0734274660749004,
      "grad_norm": 0.9280235171318054,
      "learning_rate": 7.904891179009378e-06,
      "loss": 0.0238,
      "step": 655920
    },
    {
      "epoch": 1.0734601965135537,
      "grad_norm": 0.5343939661979675,
      "learning_rate": 7.90482528679586e-06,
      "loss": 0.0184,
      "step": 655940
    },
    {
      "epoch": 1.073492926952207,
      "grad_norm": 0.4807089865207672,
      "learning_rate": 7.904759394582344e-06,
      "loss": 0.0308,
      "step": 655960
    },
    {
      "epoch": 1.0735256573908603,
      "grad_norm": 0.5667049288749695,
      "learning_rate": 7.904693502368825e-06,
      "loss": 0.0181,
      "step": 655980
    },
    {
      "epoch": 1.0735583878295136,
      "grad_norm": 0.49603357911109924,
      "learning_rate": 7.904627610155309e-06,
      "loss": 0.0217,
      "step": 656000
    },
    {
      "epoch": 1.073591118268167,
      "grad_norm": 0.1321619749069214,
      "learning_rate": 7.904561717941791e-06,
      "loss": 0.0321,
      "step": 656020
    },
    {
      "epoch": 1.0736238487068204,
      "grad_norm": 0.19658228754997253,
      "learning_rate": 7.904495825728275e-06,
      "loss": 0.0226,
      "step": 656040
    },
    {
      "epoch": 1.0736565791454737,
      "grad_norm": 0.6441705226898193,
      "learning_rate": 7.904429933514756e-06,
      "loss": 0.0279,
      "step": 656060
    },
    {
      "epoch": 1.0736893095841271,
      "grad_norm": 0.4763157069683075,
      "learning_rate": 7.90436404130124e-06,
      "loss": 0.0135,
      "step": 656080
    },
    {
      "epoch": 1.0737220400227805,
      "grad_norm": 1.1600853204727173,
      "learning_rate": 7.904298149087722e-06,
      "loss": 0.0223,
      "step": 656100
    },
    {
      "epoch": 1.0737547704614336,
      "grad_norm": 1.3955230712890625,
      "learning_rate": 7.904232256874206e-06,
      "loss": 0.0327,
      "step": 656120
    },
    {
      "epoch": 1.073787500900087,
      "grad_norm": 1.6009474992752075,
      "learning_rate": 7.904166364660689e-06,
      "loss": 0.0276,
      "step": 656140
    },
    {
      "epoch": 1.0738202313387404,
      "grad_norm": 0.4603976011276245,
      "learning_rate": 7.904100472447171e-06,
      "loss": 0.0156,
      "step": 656160
    },
    {
      "epoch": 1.0738529617773938,
      "grad_norm": 3.2205140590667725,
      "learning_rate": 7.904034580233655e-06,
      "loss": 0.031,
      "step": 656180
    },
    {
      "epoch": 1.0738856922160471,
      "grad_norm": 0.6456261873245239,
      "learning_rate": 7.903968688020136e-06,
      "loss": 0.0227,
      "step": 656200
    },
    {
      "epoch": 1.0739184226547005,
      "grad_norm": 0.37509942054748535,
      "learning_rate": 7.90390279580662e-06,
      "loss": 0.0268,
      "step": 656220
    },
    {
      "epoch": 1.0739511530933536,
      "grad_norm": 0.38939741253852844,
      "learning_rate": 7.903836903593104e-06,
      "loss": 0.0211,
      "step": 656240
    },
    {
      "epoch": 1.073983883532007,
      "grad_norm": 0.7743372917175293,
      "learning_rate": 7.903771011379586e-06,
      "loss": 0.0239,
      "step": 656260
    },
    {
      "epoch": 1.0740166139706604,
      "grad_norm": 0.536541223526001,
      "learning_rate": 7.903705119166069e-06,
      "loss": 0.0216,
      "step": 656280
    },
    {
      "epoch": 1.0740493444093138,
      "grad_norm": Infinity,
      "learning_rate": 7.903639226952553e-06,
      "loss": 0.0206,
      "step": 656300
    },
    {
      "epoch": 1.0740820748479671,
      "grad_norm": 0.2760058343410492,
      "learning_rate": 7.903573334739035e-06,
      "loss": 0.0157,
      "step": 656320
    },
    {
      "epoch": 1.0741148052866205,
      "grad_norm": 0.4353947341442108,
      "learning_rate": 7.903507442525518e-06,
      "loss": 0.0237,
      "step": 656340
    },
    {
      "epoch": 1.0741475357252739,
      "grad_norm": 0.6296048164367676,
      "learning_rate": 7.903441550312e-06,
      "loss": 0.0197,
      "step": 656360
    },
    {
      "epoch": 1.074180266163927,
      "grad_norm": 0.5540710687637329,
      "learning_rate": 7.903375658098484e-06,
      "loss": 0.0244,
      "step": 656380
    },
    {
      "epoch": 1.0742129966025804,
      "grad_norm": 1.7826225757598877,
      "learning_rate": 7.903309765884966e-06,
      "loss": 0.0265,
      "step": 656400
    },
    {
      "epoch": 1.0742457270412338,
      "grad_norm": 0.5583434104919434,
      "learning_rate": 7.90324387367145e-06,
      "loss": 0.0229,
      "step": 656420
    },
    {
      "epoch": 1.0742784574798871,
      "grad_norm": 0.2501540780067444,
      "learning_rate": 7.903177981457931e-06,
      "loss": 0.0305,
      "step": 656440
    },
    {
      "epoch": 1.0743111879185405,
      "grad_norm": 2.464792490005493,
      "learning_rate": 7.903112089244415e-06,
      "loss": 0.0242,
      "step": 656460
    },
    {
      "epoch": 1.074343918357194,
      "grad_norm": 0.7969581484794617,
      "learning_rate": 7.903046197030898e-06,
      "loss": 0.0284,
      "step": 656480
    },
    {
      "epoch": 1.0743766487958473,
      "grad_norm": 0.8977438807487488,
      "learning_rate": 7.90298030481738e-06,
      "loss": 0.0241,
      "step": 656500
    },
    {
      "epoch": 1.0744093792345004,
      "grad_norm": 1.2243232727050781,
      "learning_rate": 7.902914412603864e-06,
      "loss": 0.0305,
      "step": 656520
    },
    {
      "epoch": 1.0744421096731538,
      "grad_norm": 0.6679356694221497,
      "learning_rate": 7.902848520390346e-06,
      "loss": 0.03,
      "step": 656540
    },
    {
      "epoch": 1.0744748401118072,
      "grad_norm": 0.38415369391441345,
      "learning_rate": 7.90278262817683e-06,
      "loss": 0.0239,
      "step": 656560
    },
    {
      "epoch": 1.0745075705504605,
      "grad_norm": 0.32626986503601074,
      "learning_rate": 7.902716735963311e-06,
      "loss": 0.0217,
      "step": 656580
    },
    {
      "epoch": 1.074540300989114,
      "grad_norm": 1.956200361251831,
      "learning_rate": 7.902650843749795e-06,
      "loss": 0.0264,
      "step": 656600
    },
    {
      "epoch": 1.0745730314277673,
      "grad_norm": 0.6246150732040405,
      "learning_rate": 7.902584951536277e-06,
      "loss": 0.0187,
      "step": 656620
    },
    {
      "epoch": 1.0746057618664207,
      "grad_norm": 0.3035743236541748,
      "learning_rate": 7.90251905932276e-06,
      "loss": 0.0338,
      "step": 656640
    },
    {
      "epoch": 1.0746384923050738,
      "grad_norm": 0.25021201372146606,
      "learning_rate": 7.902453167109244e-06,
      "loss": 0.0251,
      "step": 656660
    },
    {
      "epoch": 1.0746712227437272,
      "grad_norm": 0.9752406477928162,
      "learning_rate": 7.902387274895726e-06,
      "loss": 0.022,
      "step": 656680
    },
    {
      "epoch": 1.0747039531823805,
      "grad_norm": 1.4706957340240479,
      "learning_rate": 7.90232138268221e-06,
      "loss": 0.0257,
      "step": 656700
    },
    {
      "epoch": 1.074736683621034,
      "grad_norm": 1.2529929876327515,
      "learning_rate": 7.902255490468693e-06,
      "loss": 0.0257,
      "step": 656720
    },
    {
      "epoch": 1.0747694140596873,
      "grad_norm": 0.6428996324539185,
      "learning_rate": 7.902189598255175e-06,
      "loss": 0.0173,
      "step": 656740
    },
    {
      "epoch": 1.0748021444983407,
      "grad_norm": 1.1667426824569702,
      "learning_rate": 7.902123706041658e-06,
      "loss": 0.0251,
      "step": 656760
    },
    {
      "epoch": 1.0748348749369938,
      "grad_norm": 0.2545933723449707,
      "learning_rate": 7.90205781382814e-06,
      "loss": 0.0177,
      "step": 656780
    },
    {
      "epoch": 1.0748676053756472,
      "grad_norm": 1.0618627071380615,
      "learning_rate": 7.901991921614624e-06,
      "loss": 0.0305,
      "step": 656800
    },
    {
      "epoch": 1.0749003358143006,
      "grad_norm": 0.2579784393310547,
      "learning_rate": 7.901926029401107e-06,
      "loss": 0.0232,
      "step": 656820
    },
    {
      "epoch": 1.074933066252954,
      "grad_norm": 0.37952858209609985,
      "learning_rate": 7.90186013718759e-06,
      "loss": 0.0267,
      "step": 656840
    },
    {
      "epoch": 1.0749657966916073,
      "grad_norm": 0.41066470742225647,
      "learning_rate": 7.901794244974073e-06,
      "loss": 0.0243,
      "step": 656860
    },
    {
      "epoch": 1.0749985271302607,
      "grad_norm": 0.3757918179035187,
      "learning_rate": 7.901728352760555e-06,
      "loss": 0.0183,
      "step": 656880
    },
    {
      "epoch": 1.0750312575689138,
      "grad_norm": 0.6634865999221802,
      "learning_rate": 7.901662460547038e-06,
      "loss": 0.0275,
      "step": 656900
    },
    {
      "epoch": 1.0750639880075672,
      "grad_norm": 0.15914113819599152,
      "learning_rate": 7.90159656833352e-06,
      "loss": 0.0346,
      "step": 656920
    },
    {
      "epoch": 1.0750967184462206,
      "grad_norm": 1.2122864723205566,
      "learning_rate": 7.901530676120004e-06,
      "loss": 0.0244,
      "step": 656940
    },
    {
      "epoch": 1.075129448884874,
      "grad_norm": 0.8680856823921204,
      "learning_rate": 7.901464783906486e-06,
      "loss": 0.0231,
      "step": 656960
    },
    {
      "epoch": 1.0751621793235273,
      "grad_norm": 0.8182904720306396,
      "learning_rate": 7.90139889169297e-06,
      "loss": 0.0254,
      "step": 656980
    },
    {
      "epoch": 1.0751949097621807,
      "grad_norm": 0.11345162987709045,
      "learning_rate": 7.901332999479451e-06,
      "loss": 0.0261,
      "step": 657000
    },
    {
      "epoch": 1.075227640200834,
      "grad_norm": 1.0722521543502808,
      "learning_rate": 7.901267107265935e-06,
      "loss": 0.0214,
      "step": 657020
    },
    {
      "epoch": 1.0752603706394872,
      "grad_norm": 1.551281213760376,
      "learning_rate": 7.901201215052418e-06,
      "loss": 0.0288,
      "step": 657040
    },
    {
      "epoch": 1.0752931010781406,
      "grad_norm": 0.16300013661384583,
      "learning_rate": 7.9011353228389e-06,
      "loss": 0.0237,
      "step": 657060
    },
    {
      "epoch": 1.075325831516794,
      "grad_norm": 1.0941152572631836,
      "learning_rate": 7.901069430625384e-06,
      "loss": 0.0269,
      "step": 657080
    },
    {
      "epoch": 1.0753585619554473,
      "grad_norm": 0.6128321886062622,
      "learning_rate": 7.901003538411867e-06,
      "loss": 0.0228,
      "step": 657100
    },
    {
      "epoch": 1.0753912923941007,
      "grad_norm": 0.7672540545463562,
      "learning_rate": 7.90093764619835e-06,
      "loss": 0.0215,
      "step": 657120
    },
    {
      "epoch": 1.075424022832754,
      "grad_norm": 1.2984565496444702,
      "learning_rate": 7.900871753984833e-06,
      "loss": 0.0209,
      "step": 657140
    },
    {
      "epoch": 1.0754567532714074,
      "grad_norm": 0.6810391545295715,
      "learning_rate": 7.900805861771315e-06,
      "loss": 0.0237,
      "step": 657160
    },
    {
      "epoch": 1.0754894837100606,
      "grad_norm": 0.46808525919914246,
      "learning_rate": 7.900739969557798e-06,
      "loss": 0.0193,
      "step": 657180
    },
    {
      "epoch": 1.075522214148714,
      "grad_norm": 0.25022366642951965,
      "learning_rate": 7.900674077344282e-06,
      "loss": 0.0222,
      "step": 657200
    },
    {
      "epoch": 1.0755549445873673,
      "grad_norm": 0.8576799035072327,
      "learning_rate": 7.900608185130764e-06,
      "loss": 0.0186,
      "step": 657220
    },
    {
      "epoch": 1.0755876750260207,
      "grad_norm": 2.6116487979888916,
      "learning_rate": 7.900542292917247e-06,
      "loss": 0.0248,
      "step": 657240
    },
    {
      "epoch": 1.075620405464674,
      "grad_norm": 0.673231840133667,
      "learning_rate": 7.90047640070373e-06,
      "loss": 0.024,
      "step": 657260
    },
    {
      "epoch": 1.0756531359033275,
      "grad_norm": 0.3462322950363159,
      "learning_rate": 7.900410508490213e-06,
      "loss": 0.0149,
      "step": 657280
    },
    {
      "epoch": 1.0756858663419808,
      "grad_norm": 0.36199745535850525,
      "learning_rate": 7.900344616276695e-06,
      "loss": 0.0189,
      "step": 657300
    },
    {
      "epoch": 1.075718596780634,
      "grad_norm": 1.075298547744751,
      "learning_rate": 7.900278724063178e-06,
      "loss": 0.0225,
      "step": 657320
    },
    {
      "epoch": 1.0757513272192873,
      "grad_norm": 0.5214331746101379,
      "learning_rate": 7.90021283184966e-06,
      "loss": 0.0253,
      "step": 657340
    },
    {
      "epoch": 1.0757840576579407,
      "grad_norm": 0.7352671027183533,
      "learning_rate": 7.900146939636144e-06,
      "loss": 0.0262,
      "step": 657360
    },
    {
      "epoch": 1.075816788096594,
      "grad_norm": 0.8161875605583191,
      "learning_rate": 7.900081047422626e-06,
      "loss": 0.0204,
      "step": 657380
    },
    {
      "epoch": 1.0758495185352475,
      "grad_norm": 0.2937985360622406,
      "learning_rate": 7.90001515520911e-06,
      "loss": 0.0242,
      "step": 657400
    },
    {
      "epoch": 1.0758822489739008,
      "grad_norm": 0.5350486040115356,
      "learning_rate": 7.899949262995591e-06,
      "loss": 0.0188,
      "step": 657420
    },
    {
      "epoch": 1.075914979412554,
      "grad_norm": 0.4694434106349945,
      "learning_rate": 7.899883370782075e-06,
      "loss": 0.0136,
      "step": 657440
    },
    {
      "epoch": 1.0759477098512074,
      "grad_norm": 1.2732102870941162,
      "learning_rate": 7.899817478568558e-06,
      "loss": 0.0343,
      "step": 657460
    },
    {
      "epoch": 1.0759804402898607,
      "grad_norm": 0.8013247847557068,
      "learning_rate": 7.89975158635504e-06,
      "loss": 0.0246,
      "step": 657480
    },
    {
      "epoch": 1.076013170728514,
      "grad_norm": 1.7385494709014893,
      "learning_rate": 7.899685694141524e-06,
      "loss": 0.0237,
      "step": 657500
    },
    {
      "epoch": 1.0760459011671675,
      "grad_norm": 0.49840566515922546,
      "learning_rate": 7.899619801928007e-06,
      "loss": 0.0239,
      "step": 657520
    },
    {
      "epoch": 1.0760786316058208,
      "grad_norm": 0.749215304851532,
      "learning_rate": 7.899553909714491e-06,
      "loss": 0.022,
      "step": 657540
    },
    {
      "epoch": 1.0761113620444742,
      "grad_norm": 1.3032382726669312,
      "learning_rate": 7.899488017500973e-06,
      "loss": 0.0246,
      "step": 657560
    },
    {
      "epoch": 1.0761440924831274,
      "grad_norm": 0.9192091226577759,
      "learning_rate": 7.899422125287457e-06,
      "loss": 0.0201,
      "step": 657580
    },
    {
      "epoch": 1.0761768229217807,
      "grad_norm": 0.6702952980995178,
      "learning_rate": 7.899356233073938e-06,
      "loss": 0.0243,
      "step": 657600
    },
    {
      "epoch": 1.0762095533604341,
      "grad_norm": 1.606579303741455,
      "learning_rate": 7.899290340860422e-06,
      "loss": 0.0279,
      "step": 657620
    },
    {
      "epoch": 1.0762422837990875,
      "grad_norm": 1.0099884271621704,
      "learning_rate": 7.899224448646904e-06,
      "loss": 0.031,
      "step": 657640
    },
    {
      "epoch": 1.0762750142377409,
      "grad_norm": 0.23867438733577728,
      "learning_rate": 7.899158556433387e-06,
      "loss": 0.0174,
      "step": 657660
    },
    {
      "epoch": 1.0763077446763942,
      "grad_norm": 0.6386571526527405,
      "learning_rate": 7.89909266421987e-06,
      "loss": 0.023,
      "step": 657680
    },
    {
      "epoch": 1.0763404751150474,
      "grad_norm": 0.2452048808336258,
      "learning_rate": 7.899026772006353e-06,
      "loss": 0.0208,
      "step": 657700
    },
    {
      "epoch": 1.0763732055537008,
      "grad_norm": 1.1265017986297607,
      "learning_rate": 7.898960879792835e-06,
      "loss": 0.0279,
      "step": 657720
    },
    {
      "epoch": 1.0764059359923541,
      "grad_norm": 3.2998998165130615,
      "learning_rate": 7.898894987579318e-06,
      "loss": 0.0267,
      "step": 657740
    },
    {
      "epoch": 1.0764386664310075,
      "grad_norm": 0.6080543994903564,
      "learning_rate": 7.8988290953658e-06,
      "loss": 0.0197,
      "step": 657760
    },
    {
      "epoch": 1.0764713968696609,
      "grad_norm": 1.1507081985473633,
      "learning_rate": 7.898763203152284e-06,
      "loss": 0.0201,
      "step": 657780
    },
    {
      "epoch": 1.0765041273083142,
      "grad_norm": 0.5004706382751465,
      "learning_rate": 7.898697310938766e-06,
      "loss": 0.0227,
      "step": 657800
    },
    {
      "epoch": 1.0765368577469676,
      "grad_norm": 0.49478113651275635,
      "learning_rate": 7.89863141872525e-06,
      "loss": 0.0261,
      "step": 657820
    },
    {
      "epoch": 1.0765695881856208,
      "grad_norm": 0.8518936634063721,
      "learning_rate": 7.898565526511733e-06,
      "loss": 0.0257,
      "step": 657840
    },
    {
      "epoch": 1.0766023186242741,
      "grad_norm": 0.5286685228347778,
      "learning_rate": 7.898499634298215e-06,
      "loss": 0.0317,
      "step": 657860
    },
    {
      "epoch": 1.0766350490629275,
      "grad_norm": 0.6363043785095215,
      "learning_rate": 7.898433742084698e-06,
      "loss": 0.0247,
      "step": 657880
    },
    {
      "epoch": 1.0766677795015809,
      "grad_norm": 6.936660289764404,
      "learning_rate": 7.898367849871182e-06,
      "loss": 0.0262,
      "step": 657900
    },
    {
      "epoch": 1.0767005099402343,
      "grad_norm": 0.6405016183853149,
      "learning_rate": 7.898301957657664e-06,
      "loss": 0.0179,
      "step": 657920
    },
    {
      "epoch": 1.0767332403788876,
      "grad_norm": 2.0155189037323,
      "learning_rate": 7.898236065444148e-06,
      "loss": 0.0158,
      "step": 657940
    },
    {
      "epoch": 1.076765970817541,
      "grad_norm": 1.1558037996292114,
      "learning_rate": 7.898170173230631e-06,
      "loss": 0.0239,
      "step": 657960
    },
    {
      "epoch": 1.0767987012561941,
      "grad_norm": 0.4766821265220642,
      "learning_rate": 7.898104281017113e-06,
      "loss": 0.0291,
      "step": 657980
    },
    {
      "epoch": 1.0768314316948475,
      "grad_norm": 0.9827272295951843,
      "learning_rate": 7.898038388803597e-06,
      "loss": 0.0186,
      "step": 658000
    },
    {
      "epoch": 1.076864162133501,
      "grad_norm": 0.8145485520362854,
      "learning_rate": 7.897972496590079e-06,
      "loss": 0.0294,
      "step": 658020
    },
    {
      "epoch": 1.0768968925721543,
      "grad_norm": 1.879576563835144,
      "learning_rate": 7.897906604376562e-06,
      "loss": 0.023,
      "step": 658040
    },
    {
      "epoch": 1.0769296230108076,
      "grad_norm": 2.0686771869659424,
      "learning_rate": 7.897840712163044e-06,
      "loss": 0.0239,
      "step": 658060
    },
    {
      "epoch": 1.076962353449461,
      "grad_norm": 1.13209867477417,
      "learning_rate": 7.897774819949528e-06,
      "loss": 0.0284,
      "step": 658080
    },
    {
      "epoch": 1.0769950838881144,
      "grad_norm": 0.593666136264801,
      "learning_rate": 7.89770892773601e-06,
      "loss": 0.0362,
      "step": 658100
    },
    {
      "epoch": 1.0770278143267675,
      "grad_norm": 0.508054256439209,
      "learning_rate": 7.897643035522493e-06,
      "loss": 0.0222,
      "step": 658120
    },
    {
      "epoch": 1.077060544765421,
      "grad_norm": 0.6575401425361633,
      "learning_rate": 7.897577143308975e-06,
      "loss": 0.0272,
      "step": 658140
    },
    {
      "epoch": 1.0770932752040743,
      "grad_norm": 0.6911783814430237,
      "learning_rate": 7.897511251095459e-06,
      "loss": 0.0238,
      "step": 658160
    },
    {
      "epoch": 1.0771260056427276,
      "grad_norm": 2.839103937149048,
      "learning_rate": 7.89744535888194e-06,
      "loss": 0.027,
      "step": 658180
    },
    {
      "epoch": 1.077158736081381,
      "grad_norm": 0.3082330524921417,
      "learning_rate": 7.897379466668424e-06,
      "loss": 0.0285,
      "step": 658200
    },
    {
      "epoch": 1.0771914665200344,
      "grad_norm": 1.1444939374923706,
      "learning_rate": 7.897313574454908e-06,
      "loss": 0.0227,
      "step": 658220
    },
    {
      "epoch": 1.0772241969586875,
      "grad_norm": 0.5106544494628906,
      "learning_rate": 7.89724768224139e-06,
      "loss": 0.0222,
      "step": 658240
    },
    {
      "epoch": 1.077256927397341,
      "grad_norm": 0.6709571480751038,
      "learning_rate": 7.897181790027873e-06,
      "loss": 0.0208,
      "step": 658260
    },
    {
      "epoch": 1.0772896578359943,
      "grad_norm": 1.3315526247024536,
      "learning_rate": 7.897115897814357e-06,
      "loss": 0.0359,
      "step": 658280
    },
    {
      "epoch": 1.0773223882746477,
      "grad_norm": 0.6578410267829895,
      "learning_rate": 7.897050005600839e-06,
      "loss": 0.0223,
      "step": 658300
    },
    {
      "epoch": 1.077355118713301,
      "grad_norm": 0.6161916255950928,
      "learning_rate": 7.896984113387322e-06,
      "loss": 0.0229,
      "step": 658320
    },
    {
      "epoch": 1.0773878491519544,
      "grad_norm": 1.5852510929107666,
      "learning_rate": 7.896918221173806e-06,
      "loss": 0.0164,
      "step": 658340
    },
    {
      "epoch": 1.0774205795906078,
      "grad_norm": 0.2605167031288147,
      "learning_rate": 7.896852328960288e-06,
      "loss": 0.0207,
      "step": 658360
    },
    {
      "epoch": 1.077453310029261,
      "grad_norm": 0.6526498794555664,
      "learning_rate": 7.896786436746771e-06,
      "loss": 0.0316,
      "step": 658380
    },
    {
      "epoch": 1.0774860404679143,
      "grad_norm": 1.3249878883361816,
      "learning_rate": 7.896720544533253e-06,
      "loss": 0.0217,
      "step": 658400
    },
    {
      "epoch": 1.0775187709065677,
      "grad_norm": 1.137807846069336,
      "learning_rate": 7.896654652319737e-06,
      "loss": 0.0287,
      "step": 658420
    },
    {
      "epoch": 1.077551501345221,
      "grad_norm": 0.18266843259334564,
      "learning_rate": 7.896588760106219e-06,
      "loss": 0.0238,
      "step": 658440
    },
    {
      "epoch": 1.0775842317838744,
      "grad_norm": 0.7549615502357483,
      "learning_rate": 7.896522867892702e-06,
      "loss": 0.0227,
      "step": 658460
    },
    {
      "epoch": 1.0776169622225278,
      "grad_norm": 1.0089783668518066,
      "learning_rate": 7.896456975679184e-06,
      "loss": 0.0259,
      "step": 658480
    },
    {
      "epoch": 1.077649692661181,
      "grad_norm": 0.8916723132133484,
      "learning_rate": 7.896391083465668e-06,
      "loss": 0.0262,
      "step": 658500
    },
    {
      "epoch": 1.0776824230998343,
      "grad_norm": 0.7014074325561523,
      "learning_rate": 7.89632519125215e-06,
      "loss": 0.0198,
      "step": 658520
    },
    {
      "epoch": 1.0777151535384877,
      "grad_norm": 0.6280635595321655,
      "learning_rate": 7.896259299038633e-06,
      "loss": 0.0303,
      "step": 658540
    },
    {
      "epoch": 1.077747883977141,
      "grad_norm": 0.14322535693645477,
      "learning_rate": 7.896193406825115e-06,
      "loss": 0.0265,
      "step": 658560
    },
    {
      "epoch": 1.0777806144157944,
      "grad_norm": 0.829847514629364,
      "learning_rate": 7.896127514611599e-06,
      "loss": 0.032,
      "step": 658580
    },
    {
      "epoch": 1.0778133448544478,
      "grad_norm": 0.7022738456726074,
      "learning_rate": 7.896061622398082e-06,
      "loss": 0.0228,
      "step": 658600
    },
    {
      "epoch": 1.0778460752931012,
      "grad_norm": 0.6560009121894836,
      "learning_rate": 7.895995730184564e-06,
      "loss": 0.0185,
      "step": 658620
    },
    {
      "epoch": 1.0778788057317543,
      "grad_norm": 0.610673189163208,
      "learning_rate": 7.895929837971048e-06,
      "loss": 0.0235,
      "step": 658640
    },
    {
      "epoch": 1.0779115361704077,
      "grad_norm": 1.5892912149429321,
      "learning_rate": 7.89586394575753e-06,
      "loss": 0.0261,
      "step": 658660
    },
    {
      "epoch": 1.077944266609061,
      "grad_norm": 0.7340046167373657,
      "learning_rate": 7.895798053544013e-06,
      "loss": 0.024,
      "step": 658680
    },
    {
      "epoch": 1.0779769970477144,
      "grad_norm": 1.7715493440628052,
      "learning_rate": 7.895732161330497e-06,
      "loss": 0.0294,
      "step": 658700
    },
    {
      "epoch": 1.0780097274863678,
      "grad_norm": 0.6240569353103638,
      "learning_rate": 7.895666269116979e-06,
      "loss": 0.0249,
      "step": 658720
    },
    {
      "epoch": 1.0780424579250212,
      "grad_norm": 0.32147493958473206,
      "learning_rate": 7.895600376903462e-06,
      "loss": 0.0287,
      "step": 658740
    },
    {
      "epoch": 1.0780751883636746,
      "grad_norm": 1.8045313358306885,
      "learning_rate": 7.895534484689946e-06,
      "loss": 0.036,
      "step": 658760
    },
    {
      "epoch": 1.0781079188023277,
      "grad_norm": 0.3587184250354767,
      "learning_rate": 7.895468592476428e-06,
      "loss": 0.0189,
      "step": 658780
    },
    {
      "epoch": 1.078140649240981,
      "grad_norm": 0.4371359944343567,
      "learning_rate": 7.895402700262911e-06,
      "loss": 0.0191,
      "step": 658800
    },
    {
      "epoch": 1.0781733796796344,
      "grad_norm": 0.15115156769752502,
      "learning_rate": 7.895336808049393e-06,
      "loss": 0.0206,
      "step": 658820
    },
    {
      "epoch": 1.0782061101182878,
      "grad_norm": 0.5358571410179138,
      "learning_rate": 7.895270915835877e-06,
      "loss": 0.0221,
      "step": 658840
    },
    {
      "epoch": 1.0782388405569412,
      "grad_norm": 1.1854019165039062,
      "learning_rate": 7.895205023622359e-06,
      "loss": 0.0264,
      "step": 658860
    },
    {
      "epoch": 1.0782715709955946,
      "grad_norm": 2.1325879096984863,
      "learning_rate": 7.895139131408842e-06,
      "loss": 0.0257,
      "step": 658880
    },
    {
      "epoch": 1.078304301434248,
      "grad_norm": 0.4621795117855072,
      "learning_rate": 7.895073239195324e-06,
      "loss": 0.0198,
      "step": 658900
    },
    {
      "epoch": 1.078337031872901,
      "grad_norm": 0.406222403049469,
      "learning_rate": 7.895007346981808e-06,
      "loss": 0.025,
      "step": 658920
    },
    {
      "epoch": 1.0783697623115545,
      "grad_norm": 0.954066812992096,
      "learning_rate": 7.894941454768291e-06,
      "loss": 0.0183,
      "step": 658940
    },
    {
      "epoch": 1.0784024927502078,
      "grad_norm": 0.7852377891540527,
      "learning_rate": 7.894875562554773e-06,
      "loss": 0.026,
      "step": 658960
    },
    {
      "epoch": 1.0784352231888612,
      "grad_norm": 0.9643440246582031,
      "learning_rate": 7.894809670341257e-06,
      "loss": 0.0248,
      "step": 658980
    },
    {
      "epoch": 1.0784679536275146,
      "grad_norm": 0.411394864320755,
      "learning_rate": 7.894743778127739e-06,
      "loss": 0.0224,
      "step": 659000
    },
    {
      "epoch": 1.078500684066168,
      "grad_norm": 0.3948048949241638,
      "learning_rate": 7.894677885914222e-06,
      "loss": 0.0323,
      "step": 659020
    },
    {
      "epoch": 1.078533414504821,
      "grad_norm": 2.7529892921447754,
      "learning_rate": 7.894611993700704e-06,
      "loss": 0.0285,
      "step": 659040
    },
    {
      "epoch": 1.0785661449434745,
      "grad_norm": 2.076843023300171,
      "learning_rate": 7.894546101487188e-06,
      "loss": 0.0211,
      "step": 659060
    },
    {
      "epoch": 1.0785988753821278,
      "grad_norm": 0.3800261616706848,
      "learning_rate": 7.894480209273671e-06,
      "loss": 0.0217,
      "step": 659080
    },
    {
      "epoch": 1.0786316058207812,
      "grad_norm": 0.5581226348876953,
      "learning_rate": 7.894414317060153e-06,
      "loss": 0.0255,
      "step": 659100
    },
    {
      "epoch": 1.0786643362594346,
      "grad_norm": 0.43515369296073914,
      "learning_rate": 7.894348424846637e-06,
      "loss": 0.0238,
      "step": 659120
    },
    {
      "epoch": 1.078697066698088,
      "grad_norm": 0.415693461894989,
      "learning_rate": 7.89428253263312e-06,
      "loss": 0.0285,
      "step": 659140
    },
    {
      "epoch": 1.078729797136741,
      "grad_norm": 1.5302722454071045,
      "learning_rate": 7.894216640419602e-06,
      "loss": 0.02,
      "step": 659160
    },
    {
      "epoch": 1.0787625275753945,
      "grad_norm": 0.9148215055465698,
      "learning_rate": 7.894150748206086e-06,
      "loss": 0.0195,
      "step": 659180
    },
    {
      "epoch": 1.0787952580140479,
      "grad_norm": 0.3442766070365906,
      "learning_rate": 7.894084855992568e-06,
      "loss": 0.0285,
      "step": 659200
    },
    {
      "epoch": 1.0788279884527012,
      "grad_norm": 2.182227849960327,
      "learning_rate": 7.894018963779051e-06,
      "loss": 0.0252,
      "step": 659220
    },
    {
      "epoch": 1.0788607188913546,
      "grad_norm": 1.140216588973999,
      "learning_rate": 7.893953071565533e-06,
      "loss": 0.031,
      "step": 659240
    },
    {
      "epoch": 1.078893449330008,
      "grad_norm": 0.4891023337841034,
      "learning_rate": 7.893887179352017e-06,
      "loss": 0.0201,
      "step": 659260
    },
    {
      "epoch": 1.0789261797686613,
      "grad_norm": 0.9980984926223755,
      "learning_rate": 7.8938212871385e-06,
      "loss": 0.0213,
      "step": 659280
    },
    {
      "epoch": 1.0789589102073145,
      "grad_norm": 0.6466627717018127,
      "learning_rate": 7.893755394924982e-06,
      "loss": 0.0285,
      "step": 659300
    },
    {
      "epoch": 1.0789916406459679,
      "grad_norm": 0.23072880506515503,
      "learning_rate": 7.893689502711466e-06,
      "loss": 0.0312,
      "step": 659320
    },
    {
      "epoch": 1.0790243710846212,
      "grad_norm": 0.6417171359062195,
      "learning_rate": 7.893623610497948e-06,
      "loss": 0.0314,
      "step": 659340
    },
    {
      "epoch": 1.0790571015232746,
      "grad_norm": 1.8943746089935303,
      "learning_rate": 7.893557718284431e-06,
      "loss": 0.0279,
      "step": 659360
    },
    {
      "epoch": 1.079089831961928,
      "grad_norm": 0.8368555903434753,
      "learning_rate": 7.893491826070913e-06,
      "loss": 0.0195,
      "step": 659380
    },
    {
      "epoch": 1.0791225624005814,
      "grad_norm": 0.31716203689575195,
      "learning_rate": 7.893425933857397e-06,
      "loss": 0.0203,
      "step": 659400
    },
    {
      "epoch": 1.0791552928392347,
      "grad_norm": 0.3839097321033478,
      "learning_rate": 7.893360041643879e-06,
      "loss": 0.032,
      "step": 659420
    },
    {
      "epoch": 1.0791880232778879,
      "grad_norm": 0.4529027044773102,
      "learning_rate": 7.893294149430362e-06,
      "loss": 0.0324,
      "step": 659440
    },
    {
      "epoch": 1.0792207537165412,
      "grad_norm": 0.6707873344421387,
      "learning_rate": 7.893228257216844e-06,
      "loss": 0.0235,
      "step": 659460
    },
    {
      "epoch": 1.0792534841551946,
      "grad_norm": 0.16880761086940765,
      "learning_rate": 7.893162365003328e-06,
      "loss": 0.0248,
      "step": 659480
    },
    {
      "epoch": 1.079286214593848,
      "grad_norm": 1.9134740829467773,
      "learning_rate": 7.893096472789811e-06,
      "loss": 0.0205,
      "step": 659500
    },
    {
      "epoch": 1.0793189450325014,
      "grad_norm": 0.4009578824043274,
      "learning_rate": 7.893030580576293e-06,
      "loss": 0.0222,
      "step": 659520
    },
    {
      "epoch": 1.0793516754711547,
      "grad_norm": 0.8801689147949219,
      "learning_rate": 7.892964688362777e-06,
      "loss": 0.0259,
      "step": 659540
    },
    {
      "epoch": 1.079384405909808,
      "grad_norm": 0.419840008020401,
      "learning_rate": 7.89289879614926e-06,
      "loss": 0.0252,
      "step": 659560
    },
    {
      "epoch": 1.0794171363484613,
      "grad_norm": 1.7963149547576904,
      "learning_rate": 7.892832903935742e-06,
      "loss": 0.022,
      "step": 659580
    },
    {
      "epoch": 1.0794498667871146,
      "grad_norm": 0.7643618583679199,
      "learning_rate": 7.892767011722226e-06,
      "loss": 0.0218,
      "step": 659600
    },
    {
      "epoch": 1.079482597225768,
      "grad_norm": 1.0095860958099365,
      "learning_rate": 7.892701119508708e-06,
      "loss": 0.0241,
      "step": 659620
    },
    {
      "epoch": 1.0795153276644214,
      "grad_norm": 0.5526250600814819,
      "learning_rate": 7.892635227295191e-06,
      "loss": 0.0241,
      "step": 659640
    },
    {
      "epoch": 1.0795480581030747,
      "grad_norm": 0.3917510509490967,
      "learning_rate": 7.892569335081675e-06,
      "loss": 0.0303,
      "step": 659660
    },
    {
      "epoch": 1.0795807885417281,
      "grad_norm": 1.0109790563583374,
      "learning_rate": 7.892503442868157e-06,
      "loss": 0.0157,
      "step": 659680
    },
    {
      "epoch": 1.0796135189803815,
      "grad_norm": 0.3566420078277588,
      "learning_rate": 7.89243755065464e-06,
      "loss": 0.0321,
      "step": 659700
    },
    {
      "epoch": 1.0796462494190346,
      "grad_norm": 0.6910991072654724,
      "learning_rate": 7.892371658441122e-06,
      "loss": 0.0289,
      "step": 659720
    },
    {
      "epoch": 1.079678979857688,
      "grad_norm": 1.0142408609390259,
      "learning_rate": 7.892305766227606e-06,
      "loss": 0.0198,
      "step": 659740
    },
    {
      "epoch": 1.0797117102963414,
      "grad_norm": 0.9738188982009888,
      "learning_rate": 7.892239874014088e-06,
      "loss": 0.0242,
      "step": 659760
    },
    {
      "epoch": 1.0797444407349948,
      "grad_norm": 1.2850866317749023,
      "learning_rate": 7.892173981800571e-06,
      "loss": 0.0363,
      "step": 659780
    },
    {
      "epoch": 1.0797771711736481,
      "grad_norm": 0.5487673878669739,
      "learning_rate": 7.892108089587053e-06,
      "loss": 0.0208,
      "step": 659800
    },
    {
      "epoch": 1.0798099016123015,
      "grad_norm": 2.412522077560425,
      "learning_rate": 7.892042197373537e-06,
      "loss": 0.0313,
      "step": 659820
    },
    {
      "epoch": 1.0798426320509547,
      "grad_norm": 0.725200355052948,
      "learning_rate": 7.891976305160019e-06,
      "loss": 0.0274,
      "step": 659840
    },
    {
      "epoch": 1.079875362489608,
      "grad_norm": 0.7760375142097473,
      "learning_rate": 7.891910412946502e-06,
      "loss": 0.0163,
      "step": 659860
    },
    {
      "epoch": 1.0799080929282614,
      "grad_norm": 0.9200528264045715,
      "learning_rate": 7.891844520732986e-06,
      "loss": 0.0222,
      "step": 659880
    },
    {
      "epoch": 1.0799408233669148,
      "grad_norm": 2.661334991455078,
      "learning_rate": 7.891778628519468e-06,
      "loss": 0.0286,
      "step": 659900
    },
    {
      "epoch": 1.0799735538055681,
      "grad_norm": 0.38285455107688904,
      "learning_rate": 7.891712736305951e-06,
      "loss": 0.0223,
      "step": 659920
    },
    {
      "epoch": 1.0800062842442215,
      "grad_norm": 0.6534863114356995,
      "learning_rate": 7.891646844092435e-06,
      "loss": 0.0231,
      "step": 659940
    },
    {
      "epoch": 1.0800390146828747,
      "grad_norm": 5.778620719909668,
      "learning_rate": 7.891580951878917e-06,
      "loss": 0.0224,
      "step": 659960
    },
    {
      "epoch": 1.080071745121528,
      "grad_norm": 0.46504464745521545,
      "learning_rate": 7.8915150596654e-06,
      "loss": 0.0267,
      "step": 659980
    },
    {
      "epoch": 1.0801044755601814,
      "grad_norm": 0.3662244379520416,
      "learning_rate": 7.891449167451884e-06,
      "loss": 0.0281,
      "step": 660000
    },
    {
      "epoch": 1.0801372059988348,
      "grad_norm": 0.658029317855835,
      "learning_rate": 7.891383275238366e-06,
      "loss": 0.0224,
      "step": 660020
    },
    {
      "epoch": 1.0801699364374882,
      "grad_norm": 0.6431416869163513,
      "learning_rate": 7.89131738302485e-06,
      "loss": 0.0155,
      "step": 660040
    },
    {
      "epoch": 1.0802026668761415,
      "grad_norm": 0.9586928486824036,
      "learning_rate": 7.891251490811332e-06,
      "loss": 0.0213,
      "step": 660060
    },
    {
      "epoch": 1.080235397314795,
      "grad_norm": 1.0530152320861816,
      "learning_rate": 7.891185598597815e-06,
      "loss": 0.0308,
      "step": 660080
    },
    {
      "epoch": 1.080268127753448,
      "grad_norm": 0.9703549742698669,
      "learning_rate": 7.891119706384297e-06,
      "loss": 0.0171,
      "step": 660100
    },
    {
      "epoch": 1.0803008581921014,
      "grad_norm": 0.3548850119113922,
      "learning_rate": 7.89105381417078e-06,
      "loss": 0.0187,
      "step": 660120
    },
    {
      "epoch": 1.0803335886307548,
      "grad_norm": 0.34421682357788086,
      "learning_rate": 7.890987921957262e-06,
      "loss": 0.0177,
      "step": 660140
    },
    {
      "epoch": 1.0803663190694082,
      "grad_norm": 0.508213460445404,
      "learning_rate": 7.890922029743746e-06,
      "loss": 0.025,
      "step": 660160
    },
    {
      "epoch": 1.0803990495080615,
      "grad_norm": 0.823769748210907,
      "learning_rate": 7.890856137530228e-06,
      "loss": 0.0145,
      "step": 660180
    },
    {
      "epoch": 1.080431779946715,
      "grad_norm": 0.4726972281932831,
      "learning_rate": 7.890790245316712e-06,
      "loss": 0.0131,
      "step": 660200
    },
    {
      "epoch": 1.0804645103853683,
      "grad_norm": 1.7350538969039917,
      "learning_rate": 7.890724353103193e-06,
      "loss": 0.0235,
      "step": 660220
    },
    {
      "epoch": 1.0804972408240214,
      "grad_norm": 0.5450601577758789,
      "learning_rate": 7.890658460889677e-06,
      "loss": 0.0271,
      "step": 660240
    },
    {
      "epoch": 1.0805299712626748,
      "grad_norm": 2.1936135292053223,
      "learning_rate": 7.890592568676159e-06,
      "loss": 0.0225,
      "step": 660260
    },
    {
      "epoch": 1.0805627017013282,
      "grad_norm": 1.9205925464630127,
      "learning_rate": 7.890526676462642e-06,
      "loss": 0.0244,
      "step": 660280
    },
    {
      "epoch": 1.0805954321399815,
      "grad_norm": 0.8916354179382324,
      "learning_rate": 7.890460784249126e-06,
      "loss": 0.0346,
      "step": 660300
    },
    {
      "epoch": 1.080628162578635,
      "grad_norm": 0.47612714767456055,
      "learning_rate": 7.89039489203561e-06,
      "loss": 0.034,
      "step": 660320
    },
    {
      "epoch": 1.0806608930172883,
      "grad_norm": 0.5914343595504761,
      "learning_rate": 7.890328999822092e-06,
      "loss": 0.0172,
      "step": 660340
    },
    {
      "epoch": 1.0806936234559417,
      "grad_norm": 2.2972347736358643,
      "learning_rate": 7.890263107608575e-06,
      "loss": 0.0316,
      "step": 660360
    },
    {
      "epoch": 1.0807263538945948,
      "grad_norm": 1.0437443256378174,
      "learning_rate": 7.890197215395059e-06,
      "loss": 0.0215,
      "step": 660380
    },
    {
      "epoch": 1.0807590843332482,
      "grad_norm": 0.9689217805862427,
      "learning_rate": 7.89013132318154e-06,
      "loss": 0.021,
      "step": 660400
    },
    {
      "epoch": 1.0807918147719016,
      "grad_norm": 0.6542649865150452,
      "learning_rate": 7.890065430968024e-06,
      "loss": 0.0215,
      "step": 660420
    },
    {
      "epoch": 1.080824545210555,
      "grad_norm": 3.0317721366882324,
      "learning_rate": 7.889999538754506e-06,
      "loss": 0.0257,
      "step": 660440
    },
    {
      "epoch": 1.0808572756492083,
      "grad_norm": 0.6538045406341553,
      "learning_rate": 7.88993364654099e-06,
      "loss": 0.0199,
      "step": 660460
    },
    {
      "epoch": 1.0808900060878617,
      "grad_norm": 0.3433781564235687,
      "learning_rate": 7.889867754327472e-06,
      "loss": 0.0244,
      "step": 660480
    },
    {
      "epoch": 1.0809227365265148,
      "grad_norm": 0.39610984921455383,
      "learning_rate": 7.889801862113955e-06,
      "loss": 0.0246,
      "step": 660500
    },
    {
      "epoch": 1.0809554669651682,
      "grad_norm": 1.2419860363006592,
      "learning_rate": 7.889735969900437e-06,
      "loss": 0.0267,
      "step": 660520
    },
    {
      "epoch": 1.0809881974038216,
      "grad_norm": 0.6316777467727661,
      "learning_rate": 7.88967007768692e-06,
      "loss": 0.0235,
      "step": 660540
    },
    {
      "epoch": 1.081020927842475,
      "grad_norm": 0.4857955276966095,
      "learning_rate": 7.889604185473403e-06,
      "loss": 0.0298,
      "step": 660560
    },
    {
      "epoch": 1.0810536582811283,
      "grad_norm": 0.540959358215332,
      "learning_rate": 7.889538293259886e-06,
      "loss": 0.0243,
      "step": 660580
    },
    {
      "epoch": 1.0810863887197817,
      "grad_norm": 0.45071250200271606,
      "learning_rate": 7.889472401046368e-06,
      "loss": 0.0165,
      "step": 660600
    },
    {
      "epoch": 1.081119119158435,
      "grad_norm": 0.5147384405136108,
      "learning_rate": 7.889406508832852e-06,
      "loss": 0.0292,
      "step": 660620
    },
    {
      "epoch": 1.0811518495970882,
      "grad_norm": 0.3355274796485901,
      "learning_rate": 7.889340616619334e-06,
      "loss": 0.0174,
      "step": 660640
    },
    {
      "epoch": 1.0811845800357416,
      "grad_norm": 1.0144026279449463,
      "learning_rate": 7.889274724405817e-06,
      "loss": 0.0234,
      "step": 660660
    },
    {
      "epoch": 1.081217310474395,
      "grad_norm": 7.759420394897461,
      "learning_rate": 7.8892088321923e-06,
      "loss": 0.034,
      "step": 660680
    },
    {
      "epoch": 1.0812500409130483,
      "grad_norm": 1.081237554550171,
      "learning_rate": 7.889142939978783e-06,
      "loss": 0.0306,
      "step": 660700
    },
    {
      "epoch": 1.0812827713517017,
      "grad_norm": 0.13382358849048615,
      "learning_rate": 7.889077047765266e-06,
      "loss": 0.0232,
      "step": 660720
    },
    {
      "epoch": 1.081315501790355,
      "grad_norm": 0.48688313364982605,
      "learning_rate": 7.88901115555175e-06,
      "loss": 0.0256,
      "step": 660740
    },
    {
      "epoch": 1.0813482322290082,
      "grad_norm": 0.1754315197467804,
      "learning_rate": 7.888945263338232e-06,
      "loss": 0.0206,
      "step": 660760
    },
    {
      "epoch": 1.0813809626676616,
      "grad_norm": 0.5990800857543945,
      "learning_rate": 7.888879371124715e-06,
      "loss": 0.0209,
      "step": 660780
    },
    {
      "epoch": 1.081413693106315,
      "grad_norm": 0.6907856464385986,
      "learning_rate": 7.888813478911199e-06,
      "loss": 0.0171,
      "step": 660800
    },
    {
      "epoch": 1.0814464235449683,
      "grad_norm": 0.6277003288269043,
      "learning_rate": 7.88874758669768e-06,
      "loss": 0.021,
      "step": 660820
    },
    {
      "epoch": 1.0814791539836217,
      "grad_norm": 0.5554999709129333,
      "learning_rate": 7.888681694484164e-06,
      "loss": 0.0274,
      "step": 660840
    },
    {
      "epoch": 1.081511884422275,
      "grad_norm": 0.6731926798820496,
      "learning_rate": 7.888615802270646e-06,
      "loss": 0.0238,
      "step": 660860
    },
    {
      "epoch": 1.0815446148609285,
      "grad_norm": 3.196645498275757,
      "learning_rate": 7.88854991005713e-06,
      "loss": 0.0194,
      "step": 660880
    },
    {
      "epoch": 1.0815773452995816,
      "grad_norm": 0.7220696806907654,
      "learning_rate": 7.888484017843612e-06,
      "loss": 0.0173,
      "step": 660900
    },
    {
      "epoch": 1.081610075738235,
      "grad_norm": 1.6183695793151855,
      "learning_rate": 7.888418125630095e-06,
      "loss": 0.0189,
      "step": 660920
    },
    {
      "epoch": 1.0816428061768883,
      "grad_norm": 0.8381935954093933,
      "learning_rate": 7.888352233416577e-06,
      "loss": 0.0369,
      "step": 660940
    },
    {
      "epoch": 1.0816755366155417,
      "grad_norm": 1.5708675384521484,
      "learning_rate": 7.88828634120306e-06,
      "loss": 0.0263,
      "step": 660960
    },
    {
      "epoch": 1.081708267054195,
      "grad_norm": 1.0457384586334229,
      "learning_rate": 7.888220448989543e-06,
      "loss": 0.018,
      "step": 660980
    },
    {
      "epoch": 1.0817409974928485,
      "grad_norm": 0.5740604400634766,
      "learning_rate": 7.888154556776026e-06,
      "loss": 0.0172,
      "step": 661000
    },
    {
      "epoch": 1.0817737279315018,
      "grad_norm": 0.864061713218689,
      "learning_rate": 7.888088664562508e-06,
      "loss": 0.0287,
      "step": 661020
    },
    {
      "epoch": 1.081806458370155,
      "grad_norm": 0.30079352855682373,
      "learning_rate": 7.888022772348992e-06,
      "loss": 0.027,
      "step": 661040
    },
    {
      "epoch": 1.0818391888088084,
      "grad_norm": 0.7550031542778015,
      "learning_rate": 7.887956880135475e-06,
      "loss": 0.0253,
      "step": 661060
    },
    {
      "epoch": 1.0818719192474617,
      "grad_norm": 0.6665630340576172,
      "learning_rate": 7.887890987921957e-06,
      "loss": 0.0268,
      "step": 661080
    },
    {
      "epoch": 1.081904649686115,
      "grad_norm": 0.3239164650440216,
      "learning_rate": 7.88782509570844e-06,
      "loss": 0.0344,
      "step": 661100
    },
    {
      "epoch": 1.0819373801247685,
      "grad_norm": 2.3009302616119385,
      "learning_rate": 7.887759203494924e-06,
      "loss": 0.0261,
      "step": 661120
    },
    {
      "epoch": 1.0819701105634218,
      "grad_norm": 0.5995582342147827,
      "learning_rate": 7.887693311281406e-06,
      "loss": 0.0291,
      "step": 661140
    },
    {
      "epoch": 1.0820028410020752,
      "grad_norm": 0.4366985261440277,
      "learning_rate": 7.88762741906789e-06,
      "loss": 0.0276,
      "step": 661160
    },
    {
      "epoch": 1.0820355714407284,
      "grad_norm": 0.604071855545044,
      "learning_rate": 7.887561526854373e-06,
      "loss": 0.0304,
      "step": 661180
    },
    {
      "epoch": 1.0820683018793817,
      "grad_norm": 1.2972079515457153,
      "learning_rate": 7.887495634640855e-06,
      "loss": 0.0227,
      "step": 661200
    },
    {
      "epoch": 1.0821010323180351,
      "grad_norm": 1.5196369886398315,
      "learning_rate": 7.887429742427339e-06,
      "loss": 0.0195,
      "step": 661220
    },
    {
      "epoch": 1.0821337627566885,
      "grad_norm": 0.28407642245292664,
      "learning_rate": 7.88736385021382e-06,
      "loss": 0.0178,
      "step": 661240
    },
    {
      "epoch": 1.0821664931953419,
      "grad_norm": 1.7101216316223145,
      "learning_rate": 7.887297958000304e-06,
      "loss": 0.0241,
      "step": 661260
    },
    {
      "epoch": 1.0821992236339952,
      "grad_norm": 0.49928614497184753,
      "learning_rate": 7.887232065786786e-06,
      "loss": 0.0255,
      "step": 661280
    },
    {
      "epoch": 1.0822319540726484,
      "grad_norm": 1.006291151046753,
      "learning_rate": 7.88716617357327e-06,
      "loss": 0.0253,
      "step": 661300
    },
    {
      "epoch": 1.0822646845113018,
      "grad_norm": 0.40575867891311646,
      "learning_rate": 7.887100281359752e-06,
      "loss": 0.0297,
      "step": 661320
    },
    {
      "epoch": 1.0822974149499551,
      "grad_norm": 0.5887712240219116,
      "learning_rate": 7.887034389146235e-06,
      "loss": 0.0203,
      "step": 661340
    },
    {
      "epoch": 1.0823301453886085,
      "grad_norm": 0.5605661869049072,
      "learning_rate": 7.886968496932717e-06,
      "loss": 0.0141,
      "step": 661360
    },
    {
      "epoch": 1.0823628758272619,
      "grad_norm": 0.6958882212638855,
      "learning_rate": 7.8869026047192e-06,
      "loss": 0.027,
      "step": 661380
    },
    {
      "epoch": 1.0823956062659152,
      "grad_norm": 0.9372677803039551,
      "learning_rate": 7.886836712505684e-06,
      "loss": 0.0161,
      "step": 661400
    },
    {
      "epoch": 1.0824283367045686,
      "grad_norm": 0.21248513460159302,
      "learning_rate": 7.886770820292166e-06,
      "loss": 0.0227,
      "step": 661420
    },
    {
      "epoch": 1.0824610671432218,
      "grad_norm": 0.1535102277994156,
      "learning_rate": 7.88670492807865e-06,
      "loss": 0.0248,
      "step": 661440
    },
    {
      "epoch": 1.0824937975818751,
      "grad_norm": 0.7238273024559021,
      "learning_rate": 7.886639035865132e-06,
      "loss": 0.025,
      "step": 661460
    },
    {
      "epoch": 1.0825265280205285,
      "grad_norm": 0.3598030209541321,
      "learning_rate": 7.886573143651615e-06,
      "loss": 0.0198,
      "step": 661480
    },
    {
      "epoch": 1.0825592584591819,
      "grad_norm": 0.6877809762954712,
      "learning_rate": 7.886507251438097e-06,
      "loss": 0.0219,
      "step": 661500
    },
    {
      "epoch": 1.0825919888978353,
      "grad_norm": 0.35534578561782837,
      "learning_rate": 7.88644135922458e-06,
      "loss": 0.0198,
      "step": 661520
    },
    {
      "epoch": 1.0826247193364886,
      "grad_norm": 0.8863707184791565,
      "learning_rate": 7.886375467011064e-06,
      "loss": 0.0258,
      "step": 661540
    },
    {
      "epoch": 1.0826574497751418,
      "grad_norm": 4.130549430847168,
      "learning_rate": 7.886309574797546e-06,
      "loss": 0.0248,
      "step": 661560
    },
    {
      "epoch": 1.0826901802137952,
      "grad_norm": 0.2896912097930908,
      "learning_rate": 7.88624368258403e-06,
      "loss": 0.0249,
      "step": 661580
    },
    {
      "epoch": 1.0827229106524485,
      "grad_norm": 0.8823726177215576,
      "learning_rate": 7.886177790370513e-06,
      "loss": 0.0169,
      "step": 661600
    },
    {
      "epoch": 1.082755641091102,
      "grad_norm": 1.5188727378845215,
      "learning_rate": 7.886111898156995e-06,
      "loss": 0.0264,
      "step": 661620
    },
    {
      "epoch": 1.0827883715297553,
      "grad_norm": 0.4575629234313965,
      "learning_rate": 7.886046005943479e-06,
      "loss": 0.0282,
      "step": 661640
    },
    {
      "epoch": 1.0828211019684086,
      "grad_norm": 0.41690707206726074,
      "learning_rate": 7.885980113729961e-06,
      "loss": 0.0255,
      "step": 661660
    },
    {
      "epoch": 1.082853832407062,
      "grad_norm": 0.433840274810791,
      "learning_rate": 7.885914221516444e-06,
      "loss": 0.0301,
      "step": 661680
    },
    {
      "epoch": 1.0828865628457152,
      "grad_norm": 2.6452274322509766,
      "learning_rate": 7.885848329302926e-06,
      "loss": 0.0261,
      "step": 661700
    },
    {
      "epoch": 1.0829192932843685,
      "grad_norm": 0.6120951771736145,
      "learning_rate": 7.88578243708941e-06,
      "loss": 0.0164,
      "step": 661720
    },
    {
      "epoch": 1.082952023723022,
      "grad_norm": 0.4199884235858917,
      "learning_rate": 7.885716544875892e-06,
      "loss": 0.0231,
      "step": 661740
    },
    {
      "epoch": 1.0829847541616753,
      "grad_norm": 0.4220961034297943,
      "learning_rate": 7.885650652662375e-06,
      "loss": 0.0267,
      "step": 661760
    },
    {
      "epoch": 1.0830174846003287,
      "grad_norm": 0.32794710993766785,
      "learning_rate": 7.885584760448859e-06,
      "loss": 0.0197,
      "step": 661780
    },
    {
      "epoch": 1.083050215038982,
      "grad_norm": 0.764471173286438,
      "learning_rate": 7.885518868235341e-06,
      "loss": 0.0319,
      "step": 661800
    },
    {
      "epoch": 1.0830829454776354,
      "grad_norm": 0.593926191329956,
      "learning_rate": 7.885452976021824e-06,
      "loss": 0.0209,
      "step": 661820
    },
    {
      "epoch": 1.0831156759162885,
      "grad_norm": 0.9888096451759338,
      "learning_rate": 7.885387083808306e-06,
      "loss": 0.0326,
      "step": 661840
    },
    {
      "epoch": 1.083148406354942,
      "grad_norm": 0.6572792530059814,
      "learning_rate": 7.88532119159479e-06,
      "loss": 0.0191,
      "step": 661860
    },
    {
      "epoch": 1.0831811367935953,
      "grad_norm": 1.47744882106781,
      "learning_rate": 7.885255299381272e-06,
      "loss": 0.0316,
      "step": 661880
    },
    {
      "epoch": 1.0832138672322487,
      "grad_norm": 0.7717614769935608,
      "learning_rate": 7.885189407167755e-06,
      "loss": 0.0196,
      "step": 661900
    },
    {
      "epoch": 1.083246597670902,
      "grad_norm": 0.3509848117828369,
      "learning_rate": 7.885123514954239e-06,
      "loss": 0.0133,
      "step": 661920
    },
    {
      "epoch": 1.0832793281095554,
      "grad_norm": 0.4895153343677521,
      "learning_rate": 7.885057622740721e-06,
      "loss": 0.017,
      "step": 661940
    },
    {
      "epoch": 1.0833120585482088,
      "grad_norm": 0.6989626288414001,
      "learning_rate": 7.884991730527204e-06,
      "loss": 0.0178,
      "step": 661960
    },
    {
      "epoch": 1.083344788986862,
      "grad_norm": 0.572350025177002,
      "learning_rate": 7.884925838313688e-06,
      "loss": 0.0169,
      "step": 661980
    },
    {
      "epoch": 1.0833775194255153,
      "grad_norm": 1.054388403892517,
      "learning_rate": 7.88485994610017e-06,
      "loss": 0.0253,
      "step": 662000
    },
    {
      "epoch": 1.0834102498641687,
      "grad_norm": 0.7306538820266724,
      "learning_rate": 7.884794053886654e-06,
      "loss": 0.0278,
      "step": 662020
    },
    {
      "epoch": 1.083442980302822,
      "grad_norm": 0.7504350543022156,
      "learning_rate": 7.884728161673135e-06,
      "loss": 0.031,
      "step": 662040
    },
    {
      "epoch": 1.0834757107414754,
      "grad_norm": 0.7549413442611694,
      "learning_rate": 7.884662269459619e-06,
      "loss": 0.0191,
      "step": 662060
    },
    {
      "epoch": 1.0835084411801288,
      "grad_norm": 1.1949349641799927,
      "learning_rate": 7.884596377246101e-06,
      "loss": 0.0194,
      "step": 662080
    },
    {
      "epoch": 1.083541171618782,
      "grad_norm": 0.3636343777179718,
      "learning_rate": 7.884530485032585e-06,
      "loss": 0.0201,
      "step": 662100
    },
    {
      "epoch": 1.0835739020574353,
      "grad_norm": 1.6157076358795166,
      "learning_rate": 7.884464592819068e-06,
      "loss": 0.025,
      "step": 662120
    },
    {
      "epoch": 1.0836066324960887,
      "grad_norm": 0.2812996506690979,
      "learning_rate": 7.88439870060555e-06,
      "loss": 0.0165,
      "step": 662140
    },
    {
      "epoch": 1.083639362934742,
      "grad_norm": 2.6176421642303467,
      "learning_rate": 7.884332808392034e-06,
      "loss": 0.0298,
      "step": 662160
    },
    {
      "epoch": 1.0836720933733954,
      "grad_norm": 0.17293860018253326,
      "learning_rate": 7.884266916178515e-06,
      "loss": 0.0189,
      "step": 662180
    },
    {
      "epoch": 1.0837048238120488,
      "grad_norm": 2.7199864387512207,
      "learning_rate": 7.884201023964999e-06,
      "loss": 0.0216,
      "step": 662200
    },
    {
      "epoch": 1.083737554250702,
      "grad_norm": 0.9334787726402283,
      "learning_rate": 7.884135131751481e-06,
      "loss": 0.0263,
      "step": 662220
    },
    {
      "epoch": 1.0837702846893553,
      "grad_norm": 0.47044113278388977,
      "learning_rate": 7.884069239537965e-06,
      "loss": 0.0245,
      "step": 662240
    },
    {
      "epoch": 1.0838030151280087,
      "grad_norm": 1.2434264421463013,
      "learning_rate": 7.884003347324446e-06,
      "loss": 0.0146,
      "step": 662260
    },
    {
      "epoch": 1.083835745566662,
      "grad_norm": 1.4518266916275024,
      "learning_rate": 7.88393745511093e-06,
      "loss": 0.0371,
      "step": 662280
    },
    {
      "epoch": 1.0838684760053154,
      "grad_norm": 0.2827836573123932,
      "learning_rate": 7.883871562897412e-06,
      "loss": 0.0207,
      "step": 662300
    },
    {
      "epoch": 1.0839012064439688,
      "grad_norm": 0.6186084747314453,
      "learning_rate": 7.883805670683896e-06,
      "loss": 0.024,
      "step": 662320
    },
    {
      "epoch": 1.0839339368826222,
      "grad_norm": 0.7371366620063782,
      "learning_rate": 7.883739778470379e-06,
      "loss": 0.0273,
      "step": 662340
    },
    {
      "epoch": 1.0839666673212753,
      "grad_norm": 0.6100530028343201,
      "learning_rate": 7.883673886256861e-06,
      "loss": 0.0214,
      "step": 662360
    },
    {
      "epoch": 1.0839993977599287,
      "grad_norm": 2.5782432556152344,
      "learning_rate": 7.883607994043345e-06,
      "loss": 0.0345,
      "step": 662380
    },
    {
      "epoch": 1.084032128198582,
      "grad_norm": 0.5052331686019897,
      "learning_rate": 7.883542101829828e-06,
      "loss": 0.0197,
      "step": 662400
    },
    {
      "epoch": 1.0840648586372355,
      "grad_norm": 1.2995885610580444,
      "learning_rate": 7.88347620961631e-06,
      "loss": 0.0271,
      "step": 662420
    },
    {
      "epoch": 1.0840975890758888,
      "grad_norm": 0.2929258942604065,
      "learning_rate": 7.883410317402794e-06,
      "loss": 0.0292,
      "step": 662440
    },
    {
      "epoch": 1.0841303195145422,
      "grad_norm": 0.6512433290481567,
      "learning_rate": 7.883344425189277e-06,
      "loss": 0.0157,
      "step": 662460
    },
    {
      "epoch": 1.0841630499531956,
      "grad_norm": 0.16335740685462952,
      "learning_rate": 7.883278532975759e-06,
      "loss": 0.0247,
      "step": 662480
    },
    {
      "epoch": 1.0841957803918487,
      "grad_norm": 0.46294426918029785,
      "learning_rate": 7.883212640762243e-06,
      "loss": 0.0257,
      "step": 662500
    },
    {
      "epoch": 1.084228510830502,
      "grad_norm": 1.728702187538147,
      "learning_rate": 7.883146748548725e-06,
      "loss": 0.028,
      "step": 662520
    },
    {
      "epoch": 1.0842612412691555,
      "grad_norm": 0.36817997694015503,
      "learning_rate": 7.883080856335208e-06,
      "loss": 0.0187,
      "step": 662540
    },
    {
      "epoch": 1.0842939717078088,
      "grad_norm": 0.21661528944969177,
      "learning_rate": 7.88301496412169e-06,
      "loss": 0.0261,
      "step": 662560
    },
    {
      "epoch": 1.0843267021464622,
      "grad_norm": 0.6327351927757263,
      "learning_rate": 7.882949071908174e-06,
      "loss": 0.0237,
      "step": 662580
    },
    {
      "epoch": 1.0843594325851156,
      "grad_norm": 0.3773200213909149,
      "learning_rate": 7.882883179694656e-06,
      "loss": 0.0235,
      "step": 662600
    },
    {
      "epoch": 1.084392163023769,
      "grad_norm": 0.5896365642547607,
      "learning_rate": 7.882817287481139e-06,
      "loss": 0.0207,
      "step": 662620
    },
    {
      "epoch": 1.084424893462422,
      "grad_norm": 0.7341533899307251,
      "learning_rate": 7.882751395267621e-06,
      "loss": 0.0207,
      "step": 662640
    },
    {
      "epoch": 1.0844576239010755,
      "grad_norm": 0.5939819812774658,
      "learning_rate": 7.882685503054105e-06,
      "loss": 0.0224,
      "step": 662660
    },
    {
      "epoch": 1.0844903543397288,
      "grad_norm": 0.4421709179878235,
      "learning_rate": 7.882619610840587e-06,
      "loss": 0.0319,
      "step": 662680
    },
    {
      "epoch": 1.0845230847783822,
      "grad_norm": 0.31519946455955505,
      "learning_rate": 7.88255371862707e-06,
      "loss": 0.0228,
      "step": 662700
    },
    {
      "epoch": 1.0845558152170356,
      "grad_norm": 0.39120054244995117,
      "learning_rate": 7.882487826413554e-06,
      "loss": 0.0268,
      "step": 662720
    },
    {
      "epoch": 1.084588545655689,
      "grad_norm": 0.9170738458633423,
      "learning_rate": 7.882421934200036e-06,
      "loss": 0.0192,
      "step": 662740
    },
    {
      "epoch": 1.0846212760943421,
      "grad_norm": 0.2505381107330322,
      "learning_rate": 7.88235604198652e-06,
      "loss": 0.0203,
      "step": 662760
    },
    {
      "epoch": 1.0846540065329955,
      "grad_norm": 0.5769161581993103,
      "learning_rate": 7.882290149773003e-06,
      "loss": 0.023,
      "step": 662780
    },
    {
      "epoch": 1.0846867369716489,
      "grad_norm": 0.31970757246017456,
      "learning_rate": 7.882224257559485e-06,
      "loss": 0.0223,
      "step": 662800
    },
    {
      "epoch": 1.0847194674103022,
      "grad_norm": 1.1985573768615723,
      "learning_rate": 7.882158365345968e-06,
      "loss": 0.033,
      "step": 662820
    },
    {
      "epoch": 1.0847521978489556,
      "grad_norm": 2.9523279666900635,
      "learning_rate": 7.882092473132452e-06,
      "loss": 0.0219,
      "step": 662840
    },
    {
      "epoch": 1.084784928287609,
      "grad_norm": 0.27622270584106445,
      "learning_rate": 7.882026580918934e-06,
      "loss": 0.0187,
      "step": 662860
    },
    {
      "epoch": 1.0848176587262623,
      "grad_norm": 0.6013702750205994,
      "learning_rate": 7.881960688705417e-06,
      "loss": 0.0296,
      "step": 662880
    },
    {
      "epoch": 1.0848503891649155,
      "grad_norm": 2.019798755645752,
      "learning_rate": 7.8818947964919e-06,
      "loss": 0.0253,
      "step": 662900
    },
    {
      "epoch": 1.0848831196035689,
      "grad_norm": 0.5586739182472229,
      "learning_rate": 7.881828904278383e-06,
      "loss": 0.022,
      "step": 662920
    },
    {
      "epoch": 1.0849158500422222,
      "grad_norm": 0.7303073406219482,
      "learning_rate": 7.881763012064865e-06,
      "loss": 0.0229,
      "step": 662940
    },
    {
      "epoch": 1.0849485804808756,
      "grad_norm": 0.5762059092521667,
      "learning_rate": 7.881697119851348e-06,
      "loss": 0.0184,
      "step": 662960
    },
    {
      "epoch": 1.084981310919529,
      "grad_norm": 1.0241776704788208,
      "learning_rate": 7.88163122763783e-06,
      "loss": 0.0261,
      "step": 662980
    },
    {
      "epoch": 1.0850140413581824,
      "grad_norm": 1.7699503898620605,
      "learning_rate": 7.881565335424314e-06,
      "loss": 0.0201,
      "step": 663000
    },
    {
      "epoch": 1.0850467717968355,
      "grad_norm": 0.786742091178894,
      "learning_rate": 7.881499443210796e-06,
      "loss": 0.0324,
      "step": 663020
    },
    {
      "epoch": 1.0850795022354889,
      "grad_norm": 0.6078321933746338,
      "learning_rate": 7.88143355099728e-06,
      "loss": 0.0202,
      "step": 663040
    },
    {
      "epoch": 1.0851122326741423,
      "grad_norm": 1.0106695890426636,
      "learning_rate": 7.881367658783761e-06,
      "loss": 0.0233,
      "step": 663060
    },
    {
      "epoch": 1.0851449631127956,
      "grad_norm": 0.5526100397109985,
      "learning_rate": 7.881301766570245e-06,
      "loss": 0.0256,
      "step": 663080
    },
    {
      "epoch": 1.085177693551449,
      "grad_norm": 1.2877782583236694,
      "learning_rate": 7.881235874356728e-06,
      "loss": 0.0264,
      "step": 663100
    },
    {
      "epoch": 1.0852104239901024,
      "grad_norm": 1.0467475652694702,
      "learning_rate": 7.88116998214321e-06,
      "loss": 0.0269,
      "step": 663120
    },
    {
      "epoch": 1.0852431544287557,
      "grad_norm": 0.5032714009284973,
      "learning_rate": 7.881104089929694e-06,
      "loss": 0.0296,
      "step": 663140
    },
    {
      "epoch": 1.085275884867409,
      "grad_norm": 0.0849444717168808,
      "learning_rate": 7.881038197716177e-06,
      "loss": 0.0199,
      "step": 663160
    },
    {
      "epoch": 1.0853086153060623,
      "grad_norm": 1.7914754152297974,
      "learning_rate": 7.88097230550266e-06,
      "loss": 0.0226,
      "step": 663180
    },
    {
      "epoch": 1.0853413457447156,
      "grad_norm": 0.7080343961715698,
      "learning_rate": 7.880906413289143e-06,
      "loss": 0.0207,
      "step": 663200
    },
    {
      "epoch": 1.085374076183369,
      "grad_norm": 1.6306549310684204,
      "learning_rate": 7.880840521075626e-06,
      "loss": 0.0281,
      "step": 663220
    },
    {
      "epoch": 1.0854068066220224,
      "grad_norm": 0.7537111639976501,
      "learning_rate": 7.880774628862108e-06,
      "loss": 0.0288,
      "step": 663240
    },
    {
      "epoch": 1.0854395370606758,
      "grad_norm": 1.1245254278182983,
      "learning_rate": 7.880708736648592e-06,
      "loss": 0.0264,
      "step": 663260
    },
    {
      "epoch": 1.0854722674993291,
      "grad_norm": 2.2731804847717285,
      "learning_rate": 7.880642844435074e-06,
      "loss": 0.0264,
      "step": 663280
    },
    {
      "epoch": 1.0855049979379823,
      "grad_norm": 1.0270822048187256,
      "learning_rate": 7.880576952221557e-06,
      "loss": 0.0245,
      "step": 663300
    },
    {
      "epoch": 1.0855377283766356,
      "grad_norm": 0.4790610074996948,
      "learning_rate": 7.88051106000804e-06,
      "loss": 0.0222,
      "step": 663320
    },
    {
      "epoch": 1.085570458815289,
      "grad_norm": 0.5523772239685059,
      "learning_rate": 7.880445167794523e-06,
      "loss": 0.0239,
      "step": 663340
    },
    {
      "epoch": 1.0856031892539424,
      "grad_norm": 2.018336772918701,
      "learning_rate": 7.880379275581005e-06,
      "loss": 0.0261,
      "step": 663360
    },
    {
      "epoch": 1.0856359196925958,
      "grad_norm": 0.9507251977920532,
      "learning_rate": 7.880313383367488e-06,
      "loss": 0.0197,
      "step": 663380
    },
    {
      "epoch": 1.0856686501312491,
      "grad_norm": 1.408483862876892,
      "learning_rate": 7.88024749115397e-06,
      "loss": 0.0325,
      "step": 663400
    },
    {
      "epoch": 1.0857013805699025,
      "grad_norm": 0.43706631660461426,
      "learning_rate": 7.880181598940454e-06,
      "loss": 0.0189,
      "step": 663420
    },
    {
      "epoch": 1.0857341110085557,
      "grad_norm": 0.312572717666626,
      "learning_rate": 7.880115706726936e-06,
      "loss": 0.0355,
      "step": 663440
    },
    {
      "epoch": 1.085766841447209,
      "grad_norm": 1.0466140508651733,
      "learning_rate": 7.88004981451342e-06,
      "loss": 0.0265,
      "step": 663460
    },
    {
      "epoch": 1.0857995718858624,
      "grad_norm": 0.6215255856513977,
      "learning_rate": 7.879983922299901e-06,
      "loss": 0.0235,
      "step": 663480
    },
    {
      "epoch": 1.0858323023245158,
      "grad_norm": 0.444397896528244,
      "learning_rate": 7.879918030086385e-06,
      "loss": 0.0203,
      "step": 663500
    },
    {
      "epoch": 1.0858650327631691,
      "grad_norm": 0.2576088309288025,
      "learning_rate": 7.879852137872868e-06,
      "loss": 0.0283,
      "step": 663520
    },
    {
      "epoch": 1.0858977632018225,
      "grad_norm": 0.46522682905197144,
      "learning_rate": 7.87978624565935e-06,
      "loss": 0.0216,
      "step": 663540
    },
    {
      "epoch": 1.0859304936404757,
      "grad_norm": 1.5006436109542847,
      "learning_rate": 7.879720353445834e-06,
      "loss": 0.0392,
      "step": 663560
    },
    {
      "epoch": 1.085963224079129,
      "grad_norm": 0.5756006836891174,
      "learning_rate": 7.879654461232317e-06,
      "loss": 0.0194,
      "step": 663580
    },
    {
      "epoch": 1.0859959545177824,
      "grad_norm": 0.44097626209259033,
      "learning_rate": 7.8795885690188e-06,
      "loss": 0.0142,
      "step": 663600
    },
    {
      "epoch": 1.0860286849564358,
      "grad_norm": 0.3588055670261383,
      "learning_rate": 7.879522676805283e-06,
      "loss": 0.0217,
      "step": 663620
    },
    {
      "epoch": 1.0860614153950892,
      "grad_norm": 0.12376260757446289,
      "learning_rate": 7.879456784591766e-06,
      "loss": 0.032,
      "step": 663640
    },
    {
      "epoch": 1.0860941458337425,
      "grad_norm": 0.8434327840805054,
      "learning_rate": 7.879390892378248e-06,
      "loss": 0.0243,
      "step": 663660
    },
    {
      "epoch": 1.086126876272396,
      "grad_norm": 0.39162617921829224,
      "learning_rate": 7.879325000164732e-06,
      "loss": 0.0183,
      "step": 663680
    },
    {
      "epoch": 1.086159606711049,
      "grad_norm": 0.9825251698493958,
      "learning_rate": 7.879259107951214e-06,
      "loss": 0.0291,
      "step": 663700
    },
    {
      "epoch": 1.0861923371497024,
      "grad_norm": 0.36803388595581055,
      "learning_rate": 7.879193215737697e-06,
      "loss": 0.0213,
      "step": 663720
    },
    {
      "epoch": 1.0862250675883558,
      "grad_norm": 0.6633781790733337,
      "learning_rate": 7.87912732352418e-06,
      "loss": 0.0281,
      "step": 663740
    },
    {
      "epoch": 1.0862577980270092,
      "grad_norm": 0.8137134313583374,
      "learning_rate": 7.879061431310663e-06,
      "loss": 0.0289,
      "step": 663760
    },
    {
      "epoch": 1.0862905284656625,
      "grad_norm": 1.6650923490524292,
      "learning_rate": 7.878995539097145e-06,
      "loss": 0.0319,
      "step": 663780
    },
    {
      "epoch": 1.086323258904316,
      "grad_norm": 2.2778961658477783,
      "learning_rate": 7.878929646883628e-06,
      "loss": 0.0257,
      "step": 663800
    },
    {
      "epoch": 1.086355989342969,
      "grad_norm": 0.7029139399528503,
      "learning_rate": 7.87886375467011e-06,
      "loss": 0.0295,
      "step": 663820
    },
    {
      "epoch": 1.0863887197816224,
      "grad_norm": 0.6839896440505981,
      "learning_rate": 7.878797862456594e-06,
      "loss": 0.0324,
      "step": 663840
    },
    {
      "epoch": 1.0864214502202758,
      "grad_norm": 0.6503448486328125,
      "learning_rate": 7.878731970243077e-06,
      "loss": 0.0354,
      "step": 663860
    },
    {
      "epoch": 1.0864541806589292,
      "grad_norm": 0.3044240176677704,
      "learning_rate": 7.87866607802956e-06,
      "loss": 0.0219,
      "step": 663880
    },
    {
      "epoch": 1.0864869110975826,
      "grad_norm": 1.0713720321655273,
      "learning_rate": 7.878600185816043e-06,
      "loss": 0.0258,
      "step": 663900
    },
    {
      "epoch": 1.086519641536236,
      "grad_norm": 1.875227928161621,
      "learning_rate": 7.878534293602525e-06,
      "loss": 0.0211,
      "step": 663920
    },
    {
      "epoch": 1.0865523719748893,
      "grad_norm": 0.6206853985786438,
      "learning_rate": 7.878468401389008e-06,
      "loss": 0.0298,
      "step": 663940
    },
    {
      "epoch": 1.0865851024135424,
      "grad_norm": 0.09121265262365341,
      "learning_rate": 7.878402509175492e-06,
      "loss": 0.0292,
      "step": 663960
    },
    {
      "epoch": 1.0866178328521958,
      "grad_norm": 0.27778372168540955,
      "learning_rate": 7.878336616961974e-06,
      "loss": 0.0248,
      "step": 663980
    },
    {
      "epoch": 1.0866505632908492,
      "grad_norm": 2.1197025775909424,
      "learning_rate": 7.878270724748457e-06,
      "loss": 0.0337,
      "step": 664000
    },
    {
      "epoch": 1.0866832937295026,
      "grad_norm": 0.697426974773407,
      "learning_rate": 7.878204832534941e-06,
      "loss": 0.0203,
      "step": 664020
    },
    {
      "epoch": 1.086716024168156,
      "grad_norm": 0.666901707649231,
      "learning_rate": 7.878138940321423e-06,
      "loss": 0.0284,
      "step": 664040
    },
    {
      "epoch": 1.0867487546068093,
      "grad_norm": 0.44328445196151733,
      "learning_rate": 7.878073048107907e-06,
      "loss": 0.023,
      "step": 664060
    },
    {
      "epoch": 1.0867814850454627,
      "grad_norm": 1.0149033069610596,
      "learning_rate": 7.878007155894388e-06,
      "loss": 0.0163,
      "step": 664080
    },
    {
      "epoch": 1.0868142154841158,
      "grad_norm": 0.575492262840271,
      "learning_rate": 7.877941263680872e-06,
      "loss": 0.0248,
      "step": 664100
    },
    {
      "epoch": 1.0868469459227692,
      "grad_norm": 0.7961621284484863,
      "learning_rate": 7.877875371467354e-06,
      "loss": 0.0258,
      "step": 664120
    },
    {
      "epoch": 1.0868796763614226,
      "grad_norm": 0.8585341572761536,
      "learning_rate": 7.877809479253838e-06,
      "loss": 0.0212,
      "step": 664140
    },
    {
      "epoch": 1.086912406800076,
      "grad_norm": 0.899961531162262,
      "learning_rate": 7.87774358704032e-06,
      "loss": 0.0366,
      "step": 664160
    },
    {
      "epoch": 1.0869451372387293,
      "grad_norm": 1.1759268045425415,
      "learning_rate": 7.877677694826803e-06,
      "loss": 0.026,
      "step": 664180
    },
    {
      "epoch": 1.0869778676773827,
      "grad_norm": 1.101469874382019,
      "learning_rate": 7.877611802613285e-06,
      "loss": 0.0258,
      "step": 664200
    },
    {
      "epoch": 1.087010598116036,
      "grad_norm": 0.7494204044342041,
      "learning_rate": 7.877545910399768e-06,
      "loss": 0.0185,
      "step": 664220
    },
    {
      "epoch": 1.0870433285546892,
      "grad_norm": 0.25680994987487793,
      "learning_rate": 7.877480018186252e-06,
      "loss": 0.0243,
      "step": 664240
    },
    {
      "epoch": 1.0870760589933426,
      "grad_norm": 0.15152154862880707,
      "learning_rate": 7.877414125972734e-06,
      "loss": 0.0347,
      "step": 664260
    },
    {
      "epoch": 1.087108789431996,
      "grad_norm": 0.6338739991188049,
      "learning_rate": 7.877348233759218e-06,
      "loss": 0.0228,
      "step": 664280
    },
    {
      "epoch": 1.0871415198706493,
      "grad_norm": 0.9098516702651978,
      "learning_rate": 7.8772823415457e-06,
      "loss": 0.0244,
      "step": 664300
    },
    {
      "epoch": 1.0871742503093027,
      "grad_norm": 3.6598496437072754,
      "learning_rate": 7.877216449332183e-06,
      "loss": 0.0317,
      "step": 664320
    },
    {
      "epoch": 1.087206980747956,
      "grad_norm": 1.3336353302001953,
      "learning_rate": 7.877150557118665e-06,
      "loss": 0.0343,
      "step": 664340
    },
    {
      "epoch": 1.0872397111866092,
      "grad_norm": 0.2928559184074402,
      "learning_rate": 7.877084664905149e-06,
      "loss": 0.0186,
      "step": 664360
    },
    {
      "epoch": 1.0872724416252626,
      "grad_norm": 0.9007921814918518,
      "learning_rate": 7.877018772691632e-06,
      "loss": 0.0253,
      "step": 664380
    },
    {
      "epoch": 1.087305172063916,
      "grad_norm": 1.4146521091461182,
      "learning_rate": 7.876952880478114e-06,
      "loss": 0.0223,
      "step": 664400
    },
    {
      "epoch": 1.0873379025025693,
      "grad_norm": 1.149586796760559,
      "learning_rate": 7.876886988264598e-06,
      "loss": 0.0351,
      "step": 664420
    },
    {
      "epoch": 1.0873706329412227,
      "grad_norm": 0.7499766945838928,
      "learning_rate": 7.876821096051081e-06,
      "loss": 0.0276,
      "step": 664440
    },
    {
      "epoch": 1.087403363379876,
      "grad_norm": 0.35224294662475586,
      "learning_rate": 7.876755203837563e-06,
      "loss": 0.029,
      "step": 664460
    },
    {
      "epoch": 1.0874360938185295,
      "grad_norm": 1.3470498323440552,
      "learning_rate": 7.876689311624047e-06,
      "loss": 0.0225,
      "step": 664480
    },
    {
      "epoch": 1.0874688242571826,
      "grad_norm": 0.4254554808139801,
      "learning_rate": 7.876623419410529e-06,
      "loss": 0.0156,
      "step": 664500
    },
    {
      "epoch": 1.087501554695836,
      "grad_norm": 1.320594072341919,
      "learning_rate": 7.876557527197012e-06,
      "loss": 0.026,
      "step": 664520
    },
    {
      "epoch": 1.0875342851344894,
      "grad_norm": 0.5392807126045227,
      "learning_rate": 7.876491634983494e-06,
      "loss": 0.0285,
      "step": 664540
    },
    {
      "epoch": 1.0875670155731427,
      "grad_norm": 0.5124953985214233,
      "learning_rate": 7.876425742769978e-06,
      "loss": 0.0284,
      "step": 664560
    },
    {
      "epoch": 1.087599746011796,
      "grad_norm": 0.4466669261455536,
      "learning_rate": 7.876359850556461e-06,
      "loss": 0.0246,
      "step": 664580
    },
    {
      "epoch": 1.0876324764504495,
      "grad_norm": 0.9677754044532776,
      "learning_rate": 7.876293958342943e-06,
      "loss": 0.0297,
      "step": 664600
    },
    {
      "epoch": 1.0876652068891026,
      "grad_norm": 0.4252443313598633,
      "learning_rate": 7.876228066129427e-06,
      "loss": 0.0145,
      "step": 664620
    },
    {
      "epoch": 1.087697937327756,
      "grad_norm": 1.025201439857483,
      "learning_rate": 7.876162173915909e-06,
      "loss": 0.0298,
      "step": 664640
    },
    {
      "epoch": 1.0877306677664094,
      "grad_norm": 1.1249513626098633,
      "learning_rate": 7.876096281702392e-06,
      "loss": 0.0227,
      "step": 664660
    },
    {
      "epoch": 1.0877633982050627,
      "grad_norm": 1.3083314895629883,
      "learning_rate": 7.876030389488874e-06,
      "loss": 0.0226,
      "step": 664680
    },
    {
      "epoch": 1.087796128643716,
      "grad_norm": 0.44574421644210815,
      "learning_rate": 7.875964497275358e-06,
      "loss": 0.0245,
      "step": 664700
    },
    {
      "epoch": 1.0878288590823695,
      "grad_norm": 1.1625049114227295,
      "learning_rate": 7.87589860506184e-06,
      "loss": 0.0316,
      "step": 664720
    },
    {
      "epoch": 1.0878615895210229,
      "grad_norm": 0.8538994789123535,
      "learning_rate": 7.875832712848323e-06,
      "loss": 0.0332,
      "step": 664740
    },
    {
      "epoch": 1.087894319959676,
      "grad_norm": 0.23217736184597015,
      "learning_rate": 7.875766820634807e-06,
      "loss": 0.0276,
      "step": 664760
    },
    {
      "epoch": 1.0879270503983294,
      "grad_norm": 0.5995610356330872,
      "learning_rate": 7.875700928421289e-06,
      "loss": 0.0236,
      "step": 664780
    },
    {
      "epoch": 1.0879597808369827,
      "grad_norm": 0.12132658064365387,
      "learning_rate": 7.875635036207772e-06,
      "loss": 0.0162,
      "step": 664800
    },
    {
      "epoch": 1.0879925112756361,
      "grad_norm": 0.586388885974884,
      "learning_rate": 7.875569143994256e-06,
      "loss": 0.024,
      "step": 664820
    },
    {
      "epoch": 1.0880252417142895,
      "grad_norm": 0.9250997304916382,
      "learning_rate": 7.875503251780738e-06,
      "loss": 0.02,
      "step": 664840
    },
    {
      "epoch": 1.0880579721529429,
      "grad_norm": 0.5744947791099548,
      "learning_rate": 7.875437359567221e-06,
      "loss": 0.0302,
      "step": 664860
    },
    {
      "epoch": 1.0880907025915962,
      "grad_norm": 1.0669142007827759,
      "learning_rate": 7.875371467353703e-06,
      "loss": 0.0266,
      "step": 664880
    },
    {
      "epoch": 1.0881234330302494,
      "grad_norm": 0.7047650218009949,
      "learning_rate": 7.875305575140187e-06,
      "loss": 0.03,
      "step": 664900
    },
    {
      "epoch": 1.0881561634689028,
      "grad_norm": 3.13942289352417,
      "learning_rate": 7.87523968292667e-06,
      "loss": 0.0159,
      "step": 664920
    },
    {
      "epoch": 1.0881888939075561,
      "grad_norm": 0.4863353371620178,
      "learning_rate": 7.875173790713152e-06,
      "loss": 0.0295,
      "step": 664940
    },
    {
      "epoch": 1.0882216243462095,
      "grad_norm": 0.22375474870204926,
      "learning_rate": 7.875107898499636e-06,
      "loss": 0.018,
      "step": 664960
    },
    {
      "epoch": 1.0882543547848629,
      "grad_norm": 0.7015113234519958,
      "learning_rate": 7.875042006286118e-06,
      "loss": 0.0223,
      "step": 664980
    },
    {
      "epoch": 1.0882870852235162,
      "grad_norm": 0.5940388441085815,
      "learning_rate": 7.874976114072601e-06,
      "loss": 0.0284,
      "step": 665000
    },
    {
      "epoch": 1.0883198156621696,
      "grad_norm": 0.47166433930397034,
      "learning_rate": 7.874910221859083e-06,
      "loss": 0.0232,
      "step": 665020
    },
    {
      "epoch": 1.0883525461008228,
      "grad_norm": 1.1489149332046509,
      "learning_rate": 7.874844329645567e-06,
      "loss": 0.0341,
      "step": 665040
    },
    {
      "epoch": 1.0883852765394761,
      "grad_norm": 1.5405137538909912,
      "learning_rate": 7.874778437432049e-06,
      "loss": 0.0206,
      "step": 665060
    },
    {
      "epoch": 1.0884180069781295,
      "grad_norm": 1.0299665927886963,
      "learning_rate": 7.874712545218532e-06,
      "loss": 0.0264,
      "step": 665080
    },
    {
      "epoch": 1.0884507374167829,
      "grad_norm": 0.7675088047981262,
      "learning_rate": 7.874646653005014e-06,
      "loss": 0.0315,
      "step": 665100
    },
    {
      "epoch": 1.0884834678554363,
      "grad_norm": 0.4729084372520447,
      "learning_rate": 7.874580760791498e-06,
      "loss": 0.0265,
      "step": 665120
    },
    {
      "epoch": 1.0885161982940896,
      "grad_norm": 1.0310560464859009,
      "learning_rate": 7.87451486857798e-06,
      "loss": 0.0226,
      "step": 665140
    },
    {
      "epoch": 1.0885489287327428,
      "grad_norm": 1.2735066413879395,
      "learning_rate": 7.874448976364463e-06,
      "loss": 0.0231,
      "step": 665160
    },
    {
      "epoch": 1.0885816591713962,
      "grad_norm": 1.2066529989242554,
      "learning_rate": 7.874383084150947e-06,
      "loss": 0.0241,
      "step": 665180
    },
    {
      "epoch": 1.0886143896100495,
      "grad_norm": 1.4246846437454224,
      "learning_rate": 7.874317191937429e-06,
      "loss": 0.0268,
      "step": 665200
    },
    {
      "epoch": 1.088647120048703,
      "grad_norm": 0.3260381817817688,
      "learning_rate": 7.874251299723912e-06,
      "loss": 0.026,
      "step": 665220
    },
    {
      "epoch": 1.0886798504873563,
      "grad_norm": 0.4889833629131317,
      "learning_rate": 7.874185407510396e-06,
      "loss": 0.0289,
      "step": 665240
    },
    {
      "epoch": 1.0887125809260096,
      "grad_norm": 0.7632893919944763,
      "learning_rate": 7.874119515296878e-06,
      "loss": 0.0294,
      "step": 665260
    },
    {
      "epoch": 1.0887453113646628,
      "grad_norm": 2.7855072021484375,
      "learning_rate": 7.874053623083361e-06,
      "loss": 0.0266,
      "step": 665280
    },
    {
      "epoch": 1.0887780418033162,
      "grad_norm": 1.259796142578125,
      "learning_rate": 7.873987730869845e-06,
      "loss": 0.0288,
      "step": 665300
    },
    {
      "epoch": 1.0888107722419695,
      "grad_norm": 0.101984903216362,
      "learning_rate": 7.873921838656327e-06,
      "loss": 0.0217,
      "step": 665320
    },
    {
      "epoch": 1.088843502680623,
      "grad_norm": 0.667660117149353,
      "learning_rate": 7.87385594644281e-06,
      "loss": 0.0177,
      "step": 665340
    },
    {
      "epoch": 1.0888762331192763,
      "grad_norm": 0.23357626795768738,
      "learning_rate": 7.873790054229292e-06,
      "loss": 0.0224,
      "step": 665360
    },
    {
      "epoch": 1.0889089635579297,
      "grad_norm": 0.5235695242881775,
      "learning_rate": 7.873724162015776e-06,
      "loss": 0.0192,
      "step": 665380
    },
    {
      "epoch": 1.088941693996583,
      "grad_norm": 0.7325863838195801,
      "learning_rate": 7.873658269802258e-06,
      "loss": 0.0194,
      "step": 665400
    },
    {
      "epoch": 1.0889744244352362,
      "grad_norm": 0.9864263534545898,
      "learning_rate": 7.873592377588741e-06,
      "loss": 0.026,
      "step": 665420
    },
    {
      "epoch": 1.0890071548738895,
      "grad_norm": 0.36098307371139526,
      "learning_rate": 7.873526485375223e-06,
      "loss": 0.0204,
      "step": 665440
    },
    {
      "epoch": 1.089039885312543,
      "grad_norm": 1.0157880783081055,
      "learning_rate": 7.873460593161707e-06,
      "loss": 0.0262,
      "step": 665460
    },
    {
      "epoch": 1.0890726157511963,
      "grad_norm": 0.6371979117393494,
      "learning_rate": 7.873394700948189e-06,
      "loss": 0.02,
      "step": 665480
    },
    {
      "epoch": 1.0891053461898497,
      "grad_norm": 0.8824167251586914,
      "learning_rate": 7.873328808734672e-06,
      "loss": 0.0243,
      "step": 665500
    },
    {
      "epoch": 1.089138076628503,
      "grad_norm": 0.36013439297676086,
      "learning_rate": 7.873262916521154e-06,
      "loss": 0.0262,
      "step": 665520
    },
    {
      "epoch": 1.0891708070671564,
      "grad_norm": 1.2526136636734009,
      "learning_rate": 7.873197024307638e-06,
      "loss": 0.0242,
      "step": 665540
    },
    {
      "epoch": 1.0892035375058096,
      "grad_norm": 1.1641803979873657,
      "learning_rate": 7.873131132094121e-06,
      "loss": 0.027,
      "step": 665560
    },
    {
      "epoch": 1.089236267944463,
      "grad_norm": 0.4365041255950928,
      "learning_rate": 7.873065239880603e-06,
      "loss": 0.0337,
      "step": 665580
    },
    {
      "epoch": 1.0892689983831163,
      "grad_norm": 1.891615629196167,
      "learning_rate": 7.872999347667087e-06,
      "loss": 0.0254,
      "step": 665600
    },
    {
      "epoch": 1.0893017288217697,
      "grad_norm": 0.13896577060222626,
      "learning_rate": 7.87293345545357e-06,
      "loss": 0.0178,
      "step": 665620
    },
    {
      "epoch": 1.089334459260423,
      "grad_norm": 0.4890301823616028,
      "learning_rate": 7.872867563240052e-06,
      "loss": 0.0206,
      "step": 665640
    },
    {
      "epoch": 1.0893671896990764,
      "grad_norm": 0.5143265128135681,
      "learning_rate": 7.872801671026536e-06,
      "loss": 0.0253,
      "step": 665660
    },
    {
      "epoch": 1.0893999201377298,
      "grad_norm": 1.7657674551010132,
      "learning_rate": 7.87273577881302e-06,
      "loss": 0.0354,
      "step": 665680
    },
    {
      "epoch": 1.089432650576383,
      "grad_norm": 0.48148101568222046,
      "learning_rate": 7.872669886599501e-06,
      "loss": 0.0267,
      "step": 665700
    },
    {
      "epoch": 1.0894653810150363,
      "grad_norm": 0.23150376975536346,
      "learning_rate": 7.872603994385985e-06,
      "loss": 0.0262,
      "step": 665720
    },
    {
      "epoch": 1.0894981114536897,
      "grad_norm": 0.1334417313337326,
      "learning_rate": 7.872538102172467e-06,
      "loss": 0.0205,
      "step": 665740
    },
    {
      "epoch": 1.089530841892343,
      "grad_norm": 0.6897827982902527,
      "learning_rate": 7.87247220995895e-06,
      "loss": 0.0243,
      "step": 665760
    },
    {
      "epoch": 1.0895635723309964,
      "grad_norm": 1.5686739683151245,
      "learning_rate": 7.872406317745432e-06,
      "loss": 0.0247,
      "step": 665780
    },
    {
      "epoch": 1.0895963027696498,
      "grad_norm": 0.8122127056121826,
      "learning_rate": 7.872340425531916e-06,
      "loss": 0.0242,
      "step": 665800
    },
    {
      "epoch": 1.089629033208303,
      "grad_norm": 0.2262975126504898,
      "learning_rate": 7.872274533318398e-06,
      "loss": 0.0176,
      "step": 665820
    },
    {
      "epoch": 1.0896617636469563,
      "grad_norm": 0.7501965761184692,
      "learning_rate": 7.872208641104881e-06,
      "loss": 0.0135,
      "step": 665840
    },
    {
      "epoch": 1.0896944940856097,
      "grad_norm": 0.8074499368667603,
      "learning_rate": 7.872142748891363e-06,
      "loss": 0.0212,
      "step": 665860
    },
    {
      "epoch": 1.089727224524263,
      "grad_norm": 0.13849157094955444,
      "learning_rate": 7.872076856677847e-06,
      "loss": 0.0231,
      "step": 665880
    },
    {
      "epoch": 1.0897599549629164,
      "grad_norm": 1.0105549097061157,
      "learning_rate": 7.872010964464329e-06,
      "loss": 0.0168,
      "step": 665900
    },
    {
      "epoch": 1.0897926854015698,
      "grad_norm": 1.6284265518188477,
      "learning_rate": 7.871945072250812e-06,
      "loss": 0.0299,
      "step": 665920
    },
    {
      "epoch": 1.0898254158402232,
      "grad_norm": 0.6864308714866638,
      "learning_rate": 7.871879180037296e-06,
      "loss": 0.02,
      "step": 665940
    },
    {
      "epoch": 1.0898581462788763,
      "grad_norm": 0.6020494103431702,
      "learning_rate": 7.871813287823778e-06,
      "loss": 0.0285,
      "step": 665960
    },
    {
      "epoch": 1.0898908767175297,
      "grad_norm": 1.091848611831665,
      "learning_rate": 7.871747395610261e-06,
      "loss": 0.0222,
      "step": 665980
    },
    {
      "epoch": 1.089923607156183,
      "grad_norm": 0.4586474597454071,
      "learning_rate": 7.871681503396745e-06,
      "loss": 0.0228,
      "step": 666000
    },
    {
      "epoch": 1.0899563375948365,
      "grad_norm": 1.7690287828445435,
      "learning_rate": 7.871615611183227e-06,
      "loss": 0.02,
      "step": 666020
    },
    {
      "epoch": 1.0899890680334898,
      "grad_norm": 0.34844523668289185,
      "learning_rate": 7.87154971896971e-06,
      "loss": 0.0289,
      "step": 666040
    },
    {
      "epoch": 1.0900217984721432,
      "grad_norm": 1.0013099908828735,
      "learning_rate": 7.871483826756194e-06,
      "loss": 0.0317,
      "step": 666060
    },
    {
      "epoch": 1.0900545289107963,
      "grad_norm": 1.167472243309021,
      "learning_rate": 7.871417934542676e-06,
      "loss": 0.027,
      "step": 666080
    },
    {
      "epoch": 1.0900872593494497,
      "grad_norm": 7.583971977233887,
      "learning_rate": 7.87135204232916e-06,
      "loss": 0.0204,
      "step": 666100
    },
    {
      "epoch": 1.090119989788103,
      "grad_norm": 0.665432333946228,
      "learning_rate": 7.871286150115641e-06,
      "loss": 0.0217,
      "step": 666120
    },
    {
      "epoch": 1.0901527202267565,
      "grad_norm": 0.17353613674640656,
      "learning_rate": 7.871220257902125e-06,
      "loss": 0.0203,
      "step": 666140
    },
    {
      "epoch": 1.0901854506654098,
      "grad_norm": 0.21211285889148712,
      "learning_rate": 7.871154365688607e-06,
      "loss": 0.0305,
      "step": 666160
    },
    {
      "epoch": 1.0902181811040632,
      "grad_norm": 0.4043388068675995,
      "learning_rate": 7.87108847347509e-06,
      "loss": 0.0257,
      "step": 666180
    },
    {
      "epoch": 1.0902509115427166,
      "grad_norm": 0.9131854176521301,
      "learning_rate": 7.871022581261572e-06,
      "loss": 0.029,
      "step": 666200
    },
    {
      "epoch": 1.0902836419813697,
      "grad_norm": 1.4676042795181274,
      "learning_rate": 7.870956689048056e-06,
      "loss": 0.0297,
      "step": 666220
    },
    {
      "epoch": 1.090316372420023,
      "grad_norm": 0.4594283103942871,
      "learning_rate": 7.870890796834538e-06,
      "loss": 0.0252,
      "step": 666240
    },
    {
      "epoch": 1.0903491028586765,
      "grad_norm": 0.644938051700592,
      "learning_rate": 7.870824904621021e-06,
      "loss": 0.0244,
      "step": 666260
    },
    {
      "epoch": 1.0903818332973298,
      "grad_norm": 0.13657870888710022,
      "learning_rate": 7.870759012407503e-06,
      "loss": 0.0289,
      "step": 666280
    },
    {
      "epoch": 1.0904145637359832,
      "grad_norm": 0.6480711698532104,
      "learning_rate": 7.870693120193987e-06,
      "loss": 0.0179,
      "step": 666300
    },
    {
      "epoch": 1.0904472941746366,
      "grad_norm": 0.3231307864189148,
      "learning_rate": 7.87062722798047e-06,
      "loss": 0.0218,
      "step": 666320
    },
    {
      "epoch": 1.09048002461329,
      "grad_norm": 1.100583791732788,
      "learning_rate": 7.870561335766952e-06,
      "loss": 0.0228,
      "step": 666340
    },
    {
      "epoch": 1.0905127550519431,
      "grad_norm": 1.160447359085083,
      "learning_rate": 7.870495443553436e-06,
      "loss": 0.0317,
      "step": 666360
    },
    {
      "epoch": 1.0905454854905965,
      "grad_norm": 0.7439320683479309,
      "learning_rate": 7.870429551339918e-06,
      "loss": 0.0233,
      "step": 666380
    },
    {
      "epoch": 1.0905782159292499,
      "grad_norm": 0.1890057623386383,
      "learning_rate": 7.870363659126402e-06,
      "loss": 0.0255,
      "step": 666400
    },
    {
      "epoch": 1.0906109463679032,
      "grad_norm": 2.033353328704834,
      "learning_rate": 7.870297766912885e-06,
      "loss": 0.0191,
      "step": 666420
    },
    {
      "epoch": 1.0906436768065566,
      "grad_norm": 0.600286602973938,
      "learning_rate": 7.870231874699367e-06,
      "loss": 0.0158,
      "step": 666440
    },
    {
      "epoch": 1.09067640724521,
      "grad_norm": 0.8823256492614746,
      "learning_rate": 7.87016598248585e-06,
      "loss": 0.0275,
      "step": 666460
    },
    {
      "epoch": 1.0907091376838634,
      "grad_norm": 1.9167498350143433,
      "learning_rate": 7.870100090272334e-06,
      "loss": 0.023,
      "step": 666480
    },
    {
      "epoch": 1.0907418681225165,
      "grad_norm": 0.39722421765327454,
      "learning_rate": 7.870034198058816e-06,
      "loss": 0.0237,
      "step": 666500
    },
    {
      "epoch": 1.0907745985611699,
      "grad_norm": 0.5596247911453247,
      "learning_rate": 7.8699683058453e-06,
      "loss": 0.0275,
      "step": 666520
    },
    {
      "epoch": 1.0908073289998232,
      "grad_norm": 0.9208390712738037,
      "learning_rate": 7.869902413631782e-06,
      "loss": 0.0356,
      "step": 666540
    },
    {
      "epoch": 1.0908400594384766,
      "grad_norm": 0.511738121509552,
      "learning_rate": 7.869836521418265e-06,
      "loss": 0.0261,
      "step": 666560
    },
    {
      "epoch": 1.09087278987713,
      "grad_norm": 0.42534139752388,
      "learning_rate": 7.869770629204747e-06,
      "loss": 0.0235,
      "step": 666580
    },
    {
      "epoch": 1.0909055203157834,
      "grad_norm": 0.4803147315979004,
      "learning_rate": 7.86970473699123e-06,
      "loss": 0.0174,
      "step": 666600
    },
    {
      "epoch": 1.0909382507544365,
      "grad_norm": 0.4954873323440552,
      "learning_rate": 7.869638844777712e-06,
      "loss": 0.0274,
      "step": 666620
    },
    {
      "epoch": 1.0909709811930899,
      "grad_norm": 0.4926309585571289,
      "learning_rate": 7.869572952564196e-06,
      "loss": 0.0221,
      "step": 666640
    },
    {
      "epoch": 1.0910037116317433,
      "grad_norm": 0.871823787689209,
      "learning_rate": 7.869507060350678e-06,
      "loss": 0.0288,
      "step": 666660
    },
    {
      "epoch": 1.0910364420703966,
      "grad_norm": 1.0717124938964844,
      "learning_rate": 7.869441168137162e-06,
      "loss": 0.0277,
      "step": 666680
    },
    {
      "epoch": 1.09106917250905,
      "grad_norm": 0.2109176367521286,
      "learning_rate": 7.869375275923645e-06,
      "loss": 0.0176,
      "step": 666700
    },
    {
      "epoch": 1.0911019029477034,
      "grad_norm": 0.3665626347064972,
      "learning_rate": 7.869309383710127e-06,
      "loss": 0.0163,
      "step": 666720
    },
    {
      "epoch": 1.0911346333863567,
      "grad_norm": 1.7833380699157715,
      "learning_rate": 7.86924349149661e-06,
      "loss": 0.0269,
      "step": 666740
    },
    {
      "epoch": 1.09116736382501,
      "grad_norm": 0.7530086040496826,
      "learning_rate": 7.869177599283093e-06,
      "loss": 0.0185,
      "step": 666760
    },
    {
      "epoch": 1.0912000942636633,
      "grad_norm": 0.38366207480430603,
      "learning_rate": 7.869111707069576e-06,
      "loss": 0.0218,
      "step": 666780
    },
    {
      "epoch": 1.0912328247023166,
      "grad_norm": 0.16566108167171478,
      "learning_rate": 7.86904581485606e-06,
      "loss": 0.028,
      "step": 666800
    },
    {
      "epoch": 1.09126555514097,
      "grad_norm": 1.1099857091903687,
      "learning_rate": 7.868979922642542e-06,
      "loss": 0.0234,
      "step": 666820
    },
    {
      "epoch": 1.0912982855796234,
      "grad_norm": 1.1739318370819092,
      "learning_rate": 7.868914030429025e-06,
      "loss": 0.0177,
      "step": 666840
    },
    {
      "epoch": 1.0913310160182768,
      "grad_norm": 0.8372554779052734,
      "learning_rate": 7.868848138215509e-06,
      "loss": 0.0235,
      "step": 666860
    },
    {
      "epoch": 1.09136374645693,
      "grad_norm": 0.08668923377990723,
      "learning_rate": 7.86878224600199e-06,
      "loss": 0.0277,
      "step": 666880
    },
    {
      "epoch": 1.0913964768955833,
      "grad_norm": 0.15631791949272156,
      "learning_rate": 7.868716353788474e-06,
      "loss": 0.0233,
      "step": 666900
    },
    {
      "epoch": 1.0914292073342367,
      "grad_norm": 1.4083762168884277,
      "learning_rate": 7.868650461574956e-06,
      "loss": 0.0292,
      "step": 666920
    },
    {
      "epoch": 1.09146193777289,
      "grad_norm": 0.3540961742401123,
      "learning_rate": 7.86858456936144e-06,
      "loss": 0.0255,
      "step": 666940
    },
    {
      "epoch": 1.0914946682115434,
      "grad_norm": 8.174355506896973,
      "learning_rate": 7.868518677147922e-06,
      "loss": 0.0302,
      "step": 666960
    },
    {
      "epoch": 1.0915273986501968,
      "grad_norm": 0.5154324769973755,
      "learning_rate": 7.868452784934405e-06,
      "loss": 0.03,
      "step": 666980
    },
    {
      "epoch": 1.0915601290888501,
      "grad_norm": 0.2876652479171753,
      "learning_rate": 7.868386892720887e-06,
      "loss": 0.028,
      "step": 667000
    },
    {
      "epoch": 1.0915928595275033,
      "grad_norm": 1.0303579568862915,
      "learning_rate": 7.86832100050737e-06,
      "loss": 0.02,
      "step": 667020
    },
    {
      "epoch": 1.0916255899661567,
      "grad_norm": 3.050034999847412,
      "learning_rate": 7.868255108293854e-06,
      "loss": 0.0215,
      "step": 667040
    },
    {
      "epoch": 1.09165832040481,
      "grad_norm": 0.25270357728004456,
      "learning_rate": 7.868189216080336e-06,
      "loss": 0.0254,
      "step": 667060
    },
    {
      "epoch": 1.0916910508434634,
      "grad_norm": 0.43083882331848145,
      "learning_rate": 7.86812332386682e-06,
      "loss": 0.0285,
      "step": 667080
    },
    {
      "epoch": 1.0917237812821168,
      "grad_norm": 0.29015353322029114,
      "learning_rate": 7.868057431653302e-06,
      "loss": 0.0239,
      "step": 667100
    },
    {
      "epoch": 1.0917565117207702,
      "grad_norm": 0.7991739511489868,
      "learning_rate": 7.867991539439785e-06,
      "loss": 0.0242,
      "step": 667120
    },
    {
      "epoch": 1.0917892421594235,
      "grad_norm": 1.0902442932128906,
      "learning_rate": 7.867925647226267e-06,
      "loss": 0.0258,
      "step": 667140
    },
    {
      "epoch": 1.0918219725980767,
      "grad_norm": 1.2627222537994385,
      "learning_rate": 7.86785975501275e-06,
      "loss": 0.0224,
      "step": 667160
    },
    {
      "epoch": 1.09185470303673,
      "grad_norm": 0.13741979002952576,
      "learning_rate": 7.867793862799233e-06,
      "loss": 0.0314,
      "step": 667180
    },
    {
      "epoch": 1.0918874334753834,
      "grad_norm": 0.3863392472267151,
      "learning_rate": 7.867727970585716e-06,
      "loss": 0.0267,
      "step": 667200
    },
    {
      "epoch": 1.0919201639140368,
      "grad_norm": 0.31392621994018555,
      "learning_rate": 7.8676620783722e-06,
      "loss": 0.0183,
      "step": 667220
    },
    {
      "epoch": 1.0919528943526902,
      "grad_norm": 0.7859983444213867,
      "learning_rate": 7.867596186158682e-06,
      "loss": 0.0239,
      "step": 667240
    },
    {
      "epoch": 1.0919856247913435,
      "grad_norm": 0.32748445868492126,
      "learning_rate": 7.867530293945165e-06,
      "loss": 0.0182,
      "step": 667260
    },
    {
      "epoch": 1.092018355229997,
      "grad_norm": 0.8836029171943665,
      "learning_rate": 7.867464401731649e-06,
      "loss": 0.0248,
      "step": 667280
    },
    {
      "epoch": 1.09205108566865,
      "grad_norm": 0.6851186156272888,
      "learning_rate": 7.86739850951813e-06,
      "loss": 0.0259,
      "step": 667300
    },
    {
      "epoch": 1.0920838161073034,
      "grad_norm": 0.33780747652053833,
      "learning_rate": 7.867332617304614e-06,
      "loss": 0.0226,
      "step": 667320
    },
    {
      "epoch": 1.0921165465459568,
      "grad_norm": 3.253115177154541,
      "learning_rate": 7.867266725091096e-06,
      "loss": 0.024,
      "step": 667340
    },
    {
      "epoch": 1.0921492769846102,
      "grad_norm": 0.33825281262397766,
      "learning_rate": 7.86720083287758e-06,
      "loss": 0.0157,
      "step": 667360
    },
    {
      "epoch": 1.0921820074232635,
      "grad_norm": 0.40287187695503235,
      "learning_rate": 7.867134940664063e-06,
      "loss": 0.0276,
      "step": 667380
    },
    {
      "epoch": 1.092214737861917,
      "grad_norm": 2.848726987838745,
      "learning_rate": 7.867069048450545e-06,
      "loss": 0.0254,
      "step": 667400
    },
    {
      "epoch": 1.09224746830057,
      "grad_norm": 0.49104446172714233,
      "learning_rate": 7.867003156237029e-06,
      "loss": 0.0286,
      "step": 667420
    },
    {
      "epoch": 1.0922801987392234,
      "grad_norm": 0.4088194668292999,
      "learning_rate": 7.86693726402351e-06,
      "loss": 0.0234,
      "step": 667440
    },
    {
      "epoch": 1.0923129291778768,
      "grad_norm": 0.2921028435230255,
      "learning_rate": 7.866871371809994e-06,
      "loss": 0.0291,
      "step": 667460
    },
    {
      "epoch": 1.0923456596165302,
      "grad_norm": 0.6126825213432312,
      "learning_rate": 7.866805479596476e-06,
      "loss": 0.0299,
      "step": 667480
    },
    {
      "epoch": 1.0923783900551836,
      "grad_norm": 1.080722689628601,
      "learning_rate": 7.86673958738296e-06,
      "loss": 0.0265,
      "step": 667500
    },
    {
      "epoch": 1.092411120493837,
      "grad_norm": 0.510310709476471,
      "learning_rate": 7.866673695169442e-06,
      "loss": 0.0162,
      "step": 667520
    },
    {
      "epoch": 1.0924438509324903,
      "grad_norm": 0.23057353496551514,
      "learning_rate": 7.866607802955925e-06,
      "loss": 0.0264,
      "step": 667540
    },
    {
      "epoch": 1.0924765813711435,
      "grad_norm": 0.32888782024383545,
      "learning_rate": 7.866541910742407e-06,
      "loss": 0.0219,
      "step": 667560
    },
    {
      "epoch": 1.0925093118097968,
      "grad_norm": 0.30509963631629944,
      "learning_rate": 7.86647601852889e-06,
      "loss": 0.0206,
      "step": 667580
    },
    {
      "epoch": 1.0925420422484502,
      "grad_norm": 0.1510985940694809,
      "learning_rate": 7.866410126315374e-06,
      "loss": 0.0341,
      "step": 667600
    },
    {
      "epoch": 1.0925747726871036,
      "grad_norm": 0.7688679695129395,
      "learning_rate": 7.866344234101856e-06,
      "loss": 0.0231,
      "step": 667620
    },
    {
      "epoch": 1.092607503125757,
      "grad_norm": 1.7464039325714111,
      "learning_rate": 7.86627834188834e-06,
      "loss": 0.0273,
      "step": 667640
    },
    {
      "epoch": 1.0926402335644103,
      "grad_norm": 0.6586374044418335,
      "learning_rate": 7.866212449674823e-06,
      "loss": 0.0228,
      "step": 667660
    },
    {
      "epoch": 1.0926729640030635,
      "grad_norm": 0.42665550112724304,
      "learning_rate": 7.866146557461305e-06,
      "loss": 0.0232,
      "step": 667680
    },
    {
      "epoch": 1.0927056944417168,
      "grad_norm": 0.28216373920440674,
      "learning_rate": 7.866080665247789e-06,
      "loss": 0.0266,
      "step": 667700
    },
    {
      "epoch": 1.0927384248803702,
      "grad_norm": 0.609024703502655,
      "learning_rate": 7.86601477303427e-06,
      "loss": 0.032,
      "step": 667720
    },
    {
      "epoch": 1.0927711553190236,
      "grad_norm": 0.22952371835708618,
      "learning_rate": 7.865948880820754e-06,
      "loss": 0.0275,
      "step": 667740
    },
    {
      "epoch": 1.092803885757677,
      "grad_norm": 0.298884779214859,
      "learning_rate": 7.865882988607238e-06,
      "loss": 0.0207,
      "step": 667760
    },
    {
      "epoch": 1.0928366161963303,
      "grad_norm": 1.1701724529266357,
      "learning_rate": 7.86581709639372e-06,
      "loss": 0.0239,
      "step": 667780
    },
    {
      "epoch": 1.0928693466349837,
      "grad_norm": 0.5773794651031494,
      "learning_rate": 7.865751204180203e-06,
      "loss": 0.0196,
      "step": 667800
    },
    {
      "epoch": 1.0929020770736368,
      "grad_norm": 0.34738314151763916,
      "learning_rate": 7.865685311966685e-06,
      "loss": 0.0221,
      "step": 667820
    },
    {
      "epoch": 1.0929348075122902,
      "grad_norm": 1.409277319908142,
      "learning_rate": 7.865619419753169e-06,
      "loss": 0.024,
      "step": 667840
    },
    {
      "epoch": 1.0929675379509436,
      "grad_norm": 0.2699022591114044,
      "learning_rate": 7.86555352753965e-06,
      "loss": 0.0234,
      "step": 667860
    },
    {
      "epoch": 1.093000268389597,
      "grad_norm": 0.5559816360473633,
      "learning_rate": 7.865487635326134e-06,
      "loss": 0.0137,
      "step": 667880
    },
    {
      "epoch": 1.0930329988282503,
      "grad_norm": 1.2676723003387451,
      "learning_rate": 7.865421743112616e-06,
      "loss": 0.0255,
      "step": 667900
    },
    {
      "epoch": 1.0930657292669037,
      "grad_norm": 0.5517494082450867,
      "learning_rate": 7.8653558508991e-06,
      "loss": 0.024,
      "step": 667920
    },
    {
      "epoch": 1.093098459705557,
      "grad_norm": 0.3867451548576355,
      "learning_rate": 7.865289958685582e-06,
      "loss": 0.026,
      "step": 667940
    },
    {
      "epoch": 1.0931311901442102,
      "grad_norm": 0.8388553857803345,
      "learning_rate": 7.865224066472065e-06,
      "loss": 0.0276,
      "step": 667960
    },
    {
      "epoch": 1.0931639205828636,
      "grad_norm": 1.6474355459213257,
      "learning_rate": 7.865158174258547e-06,
      "loss": 0.0252,
      "step": 667980
    },
    {
      "epoch": 1.093196651021517,
      "grad_norm": 0.293697327375412,
      "learning_rate": 7.865092282045031e-06,
      "loss": 0.0175,
      "step": 668000
    },
    {
      "epoch": 1.0932293814601703,
      "grad_norm": 1.1825541257858276,
      "learning_rate": 7.865026389831514e-06,
      "loss": 0.0246,
      "step": 668020
    },
    {
      "epoch": 1.0932621118988237,
      "grad_norm": 0.4565131962299347,
      "learning_rate": 7.864960497617996e-06,
      "loss": 0.0229,
      "step": 668040
    },
    {
      "epoch": 1.093294842337477,
      "grad_norm": 0.8292099833488464,
      "learning_rate": 7.86489460540448e-06,
      "loss": 0.0275,
      "step": 668060
    },
    {
      "epoch": 1.0933275727761305,
      "grad_norm": 0.6464255452156067,
      "learning_rate": 7.864828713190964e-06,
      "loss": 0.0233,
      "step": 668080
    },
    {
      "epoch": 1.0933603032147836,
      "grad_norm": 3.083137035369873,
      "learning_rate": 7.864762820977447e-06,
      "loss": 0.0283,
      "step": 668100
    },
    {
      "epoch": 1.093393033653437,
      "grad_norm": 1.2672983407974243,
      "learning_rate": 7.864696928763929e-06,
      "loss": 0.0253,
      "step": 668120
    },
    {
      "epoch": 1.0934257640920904,
      "grad_norm": 0.5404371023178101,
      "learning_rate": 7.864631036550413e-06,
      "loss": 0.0277,
      "step": 668140
    },
    {
      "epoch": 1.0934584945307437,
      "grad_norm": 0.5796146392822266,
      "learning_rate": 7.864565144336894e-06,
      "loss": 0.0225,
      "step": 668160
    },
    {
      "epoch": 1.093491224969397,
      "grad_norm": 0.7878219485282898,
      "learning_rate": 7.864499252123378e-06,
      "loss": 0.0175,
      "step": 668180
    },
    {
      "epoch": 1.0935239554080505,
      "grad_norm": 0.6082894802093506,
      "learning_rate": 7.86443335990986e-06,
      "loss": 0.0385,
      "step": 668200
    },
    {
      "epoch": 1.0935566858467036,
      "grad_norm": 0.8771662712097168,
      "learning_rate": 7.864367467696344e-06,
      "loss": 0.0286,
      "step": 668220
    },
    {
      "epoch": 1.093589416285357,
      "grad_norm": 1.0983973741531372,
      "learning_rate": 7.864301575482825e-06,
      "loss": 0.0182,
      "step": 668240
    },
    {
      "epoch": 1.0936221467240104,
      "grad_norm": 0.600534975528717,
      "learning_rate": 7.864235683269309e-06,
      "loss": 0.0222,
      "step": 668260
    },
    {
      "epoch": 1.0936548771626637,
      "grad_norm": 0.7397936582565308,
      "learning_rate": 7.864169791055791e-06,
      "loss": 0.022,
      "step": 668280
    },
    {
      "epoch": 1.0936876076013171,
      "grad_norm": 0.7726671099662781,
      "learning_rate": 7.864103898842274e-06,
      "loss": 0.0183,
      "step": 668300
    },
    {
      "epoch": 1.0937203380399705,
      "grad_norm": 0.9040736556053162,
      "learning_rate": 7.864038006628756e-06,
      "loss": 0.0243,
      "step": 668320
    },
    {
      "epoch": 1.0937530684786236,
      "grad_norm": 1.9599955081939697,
      "learning_rate": 7.86397211441524e-06,
      "loss": 0.0187,
      "step": 668340
    },
    {
      "epoch": 1.093785798917277,
      "grad_norm": 0.7864817380905151,
      "learning_rate": 7.863906222201722e-06,
      "loss": 0.0155,
      "step": 668360
    },
    {
      "epoch": 1.0938185293559304,
      "grad_norm": 0.7386378645896912,
      "learning_rate": 7.863840329988205e-06,
      "loss": 0.0231,
      "step": 668380
    },
    {
      "epoch": 1.0938512597945838,
      "grad_norm": 0.758854866027832,
      "learning_rate": 7.863774437774689e-06,
      "loss": 0.0276,
      "step": 668400
    },
    {
      "epoch": 1.0938839902332371,
      "grad_norm": 0.18669863045215607,
      "learning_rate": 7.863708545561171e-06,
      "loss": 0.0187,
      "step": 668420
    },
    {
      "epoch": 1.0939167206718905,
      "grad_norm": 1.304397702217102,
      "learning_rate": 7.863642653347655e-06,
      "loss": 0.0271,
      "step": 668440
    },
    {
      "epoch": 1.0939494511105439,
      "grad_norm": 0.5286189317703247,
      "learning_rate": 7.863576761134138e-06,
      "loss": 0.023,
      "step": 668460
    },
    {
      "epoch": 1.093982181549197,
      "grad_norm": 0.9733034372329712,
      "learning_rate": 7.86351086892062e-06,
      "loss": 0.0293,
      "step": 668480
    },
    {
      "epoch": 1.0940149119878504,
      "grad_norm": 0.25166603922843933,
      "learning_rate": 7.863444976707104e-06,
      "loss": 0.0225,
      "step": 668500
    },
    {
      "epoch": 1.0940476424265038,
      "grad_norm": 1.452803373336792,
      "learning_rate": 7.863379084493587e-06,
      "loss": 0.0281,
      "step": 668520
    },
    {
      "epoch": 1.0940803728651571,
      "grad_norm": 0.7142247557640076,
      "learning_rate": 7.863313192280069e-06,
      "loss": 0.0222,
      "step": 668540
    },
    {
      "epoch": 1.0941131033038105,
      "grad_norm": 0.28981226682662964,
      "learning_rate": 7.863247300066553e-06,
      "loss": 0.0291,
      "step": 668560
    },
    {
      "epoch": 1.0941458337424639,
      "grad_norm": 0.6940012574195862,
      "learning_rate": 7.863181407853035e-06,
      "loss": 0.0185,
      "step": 668580
    },
    {
      "epoch": 1.0941785641811173,
      "grad_norm": 0.7498998045921326,
      "learning_rate": 7.863115515639518e-06,
      "loss": 0.0251,
      "step": 668600
    },
    {
      "epoch": 1.0942112946197704,
      "grad_norm": 0.3266538083553314,
      "learning_rate": 7.863049623426e-06,
      "loss": 0.0175,
      "step": 668620
    },
    {
      "epoch": 1.0942440250584238,
      "grad_norm": 0.7887493968009949,
      "learning_rate": 7.862983731212484e-06,
      "loss": 0.0176,
      "step": 668640
    },
    {
      "epoch": 1.0942767554970771,
      "grad_norm": 0.9953542351722717,
      "learning_rate": 7.862917838998966e-06,
      "loss": 0.0253,
      "step": 668660
    },
    {
      "epoch": 1.0943094859357305,
      "grad_norm": 4.037563323974609,
      "learning_rate": 7.862851946785449e-06,
      "loss": 0.0234,
      "step": 668680
    },
    {
      "epoch": 1.094342216374384,
      "grad_norm": 0.9553055167198181,
      "learning_rate": 7.862786054571931e-06,
      "loss": 0.0214,
      "step": 668700
    },
    {
      "epoch": 1.0943749468130373,
      "grad_norm": 0.7940744757652283,
      "learning_rate": 7.862720162358415e-06,
      "loss": 0.0435,
      "step": 668720
    },
    {
      "epoch": 1.0944076772516906,
      "grad_norm": 1.866108775138855,
      "learning_rate": 7.862654270144896e-06,
      "loss": 0.027,
      "step": 668740
    },
    {
      "epoch": 1.0944404076903438,
      "grad_norm": 0.5077369809150696,
      "learning_rate": 7.86258837793138e-06,
      "loss": 0.0196,
      "step": 668760
    },
    {
      "epoch": 1.0944731381289972,
      "grad_norm": 1.1940419673919678,
      "learning_rate": 7.862522485717864e-06,
      "loss": 0.0345,
      "step": 668780
    },
    {
      "epoch": 1.0945058685676505,
      "grad_norm": 0.5109777450561523,
      "learning_rate": 7.862456593504346e-06,
      "loss": 0.023,
      "step": 668800
    },
    {
      "epoch": 1.094538599006304,
      "grad_norm": 0.8212777972221375,
      "learning_rate": 7.862390701290829e-06,
      "loss": 0.0209,
      "step": 668820
    },
    {
      "epoch": 1.0945713294449573,
      "grad_norm": 0.4076746106147766,
      "learning_rate": 7.862324809077313e-06,
      "loss": 0.0283,
      "step": 668840
    },
    {
      "epoch": 1.0946040598836106,
      "grad_norm": 2.3914177417755127,
      "learning_rate": 7.862258916863795e-06,
      "loss": 0.0253,
      "step": 668860
    },
    {
      "epoch": 1.0946367903222638,
      "grad_norm": 15.871647834777832,
      "learning_rate": 7.862193024650278e-06,
      "loss": 0.0288,
      "step": 668880
    },
    {
      "epoch": 1.0946695207609172,
      "grad_norm": 1.153860092163086,
      "learning_rate": 7.862127132436762e-06,
      "loss": 0.022,
      "step": 668900
    },
    {
      "epoch": 1.0947022511995705,
      "grad_norm": 0.7916848659515381,
      "learning_rate": 7.862061240223244e-06,
      "loss": 0.0238,
      "step": 668920
    },
    {
      "epoch": 1.094734981638224,
      "grad_norm": 1.6513721942901611,
      "learning_rate": 7.861995348009727e-06,
      "loss": 0.0344,
      "step": 668940
    },
    {
      "epoch": 1.0947677120768773,
      "grad_norm": 0.3576073944568634,
      "learning_rate": 7.861929455796209e-06,
      "loss": 0.0276,
      "step": 668960
    },
    {
      "epoch": 1.0948004425155307,
      "grad_norm": 0.23301123082637787,
      "learning_rate": 7.861863563582693e-06,
      "loss": 0.0221,
      "step": 668980
    },
    {
      "epoch": 1.094833172954184,
      "grad_norm": 1.174032211303711,
      "learning_rate": 7.861797671369175e-06,
      "loss": 0.0245,
      "step": 669000
    },
    {
      "epoch": 1.0948659033928372,
      "grad_norm": 0.9188876748085022,
      "learning_rate": 7.861731779155658e-06,
      "loss": 0.0206,
      "step": 669020
    },
    {
      "epoch": 1.0948986338314906,
      "grad_norm": 1.156968593597412,
      "learning_rate": 7.86166588694214e-06,
      "loss": 0.0315,
      "step": 669040
    },
    {
      "epoch": 1.094931364270144,
      "grad_norm": 0.4428967535495758,
      "learning_rate": 7.861599994728624e-06,
      "loss": 0.0219,
      "step": 669060
    },
    {
      "epoch": 1.0949640947087973,
      "grad_norm": 0.341749906539917,
      "learning_rate": 7.861534102515106e-06,
      "loss": 0.0152,
      "step": 669080
    },
    {
      "epoch": 1.0949968251474507,
      "grad_norm": 0.3452618420124054,
      "learning_rate": 7.86146821030159e-06,
      "loss": 0.0338,
      "step": 669100
    },
    {
      "epoch": 1.095029555586104,
      "grad_norm": 1.64077889919281,
      "learning_rate": 7.861402318088071e-06,
      "loss": 0.0302,
      "step": 669120
    },
    {
      "epoch": 1.0950622860247572,
      "grad_norm": 0.3382766544818878,
      "learning_rate": 7.861336425874555e-06,
      "loss": 0.0238,
      "step": 669140
    },
    {
      "epoch": 1.0950950164634106,
      "grad_norm": 0.3559466004371643,
      "learning_rate": 7.861270533661038e-06,
      "loss": 0.0178,
      "step": 669160
    },
    {
      "epoch": 1.095127746902064,
      "grad_norm": 0.4722950756549835,
      "learning_rate": 7.86120464144752e-06,
      "loss": 0.0285,
      "step": 669180
    },
    {
      "epoch": 1.0951604773407173,
      "grad_norm": 0.16688944399356842,
      "learning_rate": 7.861138749234004e-06,
      "loss": 0.0276,
      "step": 669200
    },
    {
      "epoch": 1.0951932077793707,
      "grad_norm": 0.40800848603248596,
      "learning_rate": 7.861072857020486e-06,
      "loss": 0.0286,
      "step": 669220
    },
    {
      "epoch": 1.095225938218024,
      "grad_norm": 0.44429871439933777,
      "learning_rate": 7.86100696480697e-06,
      "loss": 0.0282,
      "step": 669240
    },
    {
      "epoch": 1.0952586686566774,
      "grad_norm": 0.31953737139701843,
      "learning_rate": 7.860941072593453e-06,
      "loss": 0.0246,
      "step": 669260
    },
    {
      "epoch": 1.0952913990953306,
      "grad_norm": 1.1518138647079468,
      "learning_rate": 7.860875180379935e-06,
      "loss": 0.0165,
      "step": 669280
    },
    {
      "epoch": 1.095324129533984,
      "grad_norm": 1.0123863220214844,
      "learning_rate": 7.860809288166418e-06,
      "loss": 0.0242,
      "step": 669300
    },
    {
      "epoch": 1.0953568599726373,
      "grad_norm": 1.1836261749267578,
      "learning_rate": 7.860743395952902e-06,
      "loss": 0.0212,
      "step": 669320
    },
    {
      "epoch": 1.0953895904112907,
      "grad_norm": 1.787551760673523,
      "learning_rate": 7.860677503739384e-06,
      "loss": 0.0199,
      "step": 669340
    },
    {
      "epoch": 1.095422320849944,
      "grad_norm": 0.2550792992115021,
      "learning_rate": 7.860611611525867e-06,
      "loss": 0.029,
      "step": 669360
    },
    {
      "epoch": 1.0954550512885974,
      "grad_norm": 1.1943904161453247,
      "learning_rate": 7.86054571931235e-06,
      "loss": 0.0235,
      "step": 669380
    },
    {
      "epoch": 1.0954877817272508,
      "grad_norm": 1.904375672340393,
      "learning_rate": 7.860479827098833e-06,
      "loss": 0.0345,
      "step": 669400
    },
    {
      "epoch": 1.095520512165904,
      "grad_norm": 0.3247968852519989,
      "learning_rate": 7.860413934885315e-06,
      "loss": 0.0248,
      "step": 669420
    },
    {
      "epoch": 1.0955532426045573,
      "grad_norm": 1.4380507469177246,
      "learning_rate": 7.860348042671798e-06,
      "loss": 0.0194,
      "step": 669440
    },
    {
      "epoch": 1.0955859730432107,
      "grad_norm": 1.5437102317810059,
      "learning_rate": 7.86028215045828e-06,
      "loss": 0.026,
      "step": 669460
    },
    {
      "epoch": 1.095618703481864,
      "grad_norm": 1.351983666419983,
      "learning_rate": 7.860216258244764e-06,
      "loss": 0.0359,
      "step": 669480
    },
    {
      "epoch": 1.0956514339205174,
      "grad_norm": 0.8473688960075378,
      "learning_rate": 7.860150366031247e-06,
      "loss": 0.0208,
      "step": 669500
    },
    {
      "epoch": 1.0956841643591708,
      "grad_norm": 0.593553900718689,
      "learning_rate": 7.86008447381773e-06,
      "loss": 0.0182,
      "step": 669520
    },
    {
      "epoch": 1.0957168947978242,
      "grad_norm": 0.5187551379203796,
      "learning_rate": 7.860018581604213e-06,
      "loss": 0.0228,
      "step": 669540
    },
    {
      "epoch": 1.0957496252364773,
      "grad_norm": 0.9324290156364441,
      "learning_rate": 7.859952689390695e-06,
      "loss": 0.022,
      "step": 669560
    },
    {
      "epoch": 1.0957823556751307,
      "grad_norm": 0.378623366355896,
      "learning_rate": 7.859886797177178e-06,
      "loss": 0.0255,
      "step": 669580
    },
    {
      "epoch": 1.095815086113784,
      "grad_norm": 0.6566714644432068,
      "learning_rate": 7.85982090496366e-06,
      "loss": 0.0322,
      "step": 669600
    },
    {
      "epoch": 1.0958478165524375,
      "grad_norm": 0.6208752989768982,
      "learning_rate": 7.859755012750144e-06,
      "loss": 0.0211,
      "step": 669620
    },
    {
      "epoch": 1.0958805469910908,
      "grad_norm": 1.3488290309906006,
      "learning_rate": 7.859689120536627e-06,
      "loss": 0.0201,
      "step": 669640
    },
    {
      "epoch": 1.0959132774297442,
      "grad_norm": 1.2763783931732178,
      "learning_rate": 7.85962322832311e-06,
      "loss": 0.0148,
      "step": 669660
    },
    {
      "epoch": 1.0959460078683974,
      "grad_norm": 1.7220218181610107,
      "learning_rate": 7.859557336109593e-06,
      "loss": 0.0214,
      "step": 669680
    },
    {
      "epoch": 1.0959787383070507,
      "grad_norm": 0.24921418726444244,
      "learning_rate": 7.859491443896076e-06,
      "loss": 0.0237,
      "step": 669700
    },
    {
      "epoch": 1.096011468745704,
      "grad_norm": 0.7110156416893005,
      "learning_rate": 7.859425551682558e-06,
      "loss": 0.0285,
      "step": 669720
    },
    {
      "epoch": 1.0960441991843575,
      "grad_norm": 0.8472084999084473,
      "learning_rate": 7.859359659469042e-06,
      "loss": 0.0274,
      "step": 669740
    },
    {
      "epoch": 1.0960769296230108,
      "grad_norm": 0.7238067984580994,
      "learning_rate": 7.859293767255524e-06,
      "loss": 0.0229,
      "step": 669760
    },
    {
      "epoch": 1.0961096600616642,
      "grad_norm": 0.34154757857322693,
      "learning_rate": 7.859227875042007e-06,
      "loss": 0.0247,
      "step": 669780
    },
    {
      "epoch": 1.0961423905003176,
      "grad_norm": 1.4342325925827026,
      "learning_rate": 7.85916198282849e-06,
      "loss": 0.02,
      "step": 669800
    },
    {
      "epoch": 1.0961751209389707,
      "grad_norm": 0.5210800766944885,
      "learning_rate": 7.859096090614973e-06,
      "loss": 0.016,
      "step": 669820
    },
    {
      "epoch": 1.096207851377624,
      "grad_norm": 2.063946008682251,
      "learning_rate": 7.859030198401456e-06,
      "loss": 0.0231,
      "step": 669840
    },
    {
      "epoch": 1.0962405818162775,
      "grad_norm": 1.0887936353683472,
      "learning_rate": 7.858964306187938e-06,
      "loss": 0.0217,
      "step": 669860
    },
    {
      "epoch": 1.0962733122549309,
      "grad_norm": 1.199471354484558,
      "learning_rate": 7.858898413974422e-06,
      "loss": 0.0293,
      "step": 669880
    },
    {
      "epoch": 1.0963060426935842,
      "grad_norm": 0.4303697943687439,
      "learning_rate": 7.858832521760904e-06,
      "loss": 0.025,
      "step": 669900
    },
    {
      "epoch": 1.0963387731322376,
      "grad_norm": 0.5677801966667175,
      "learning_rate": 7.858766629547387e-06,
      "loss": 0.0281,
      "step": 669920
    },
    {
      "epoch": 1.0963715035708907,
      "grad_norm": 0.2909950017929077,
      "learning_rate": 7.85870073733387e-06,
      "loss": 0.0194,
      "step": 669940
    },
    {
      "epoch": 1.0964042340095441,
      "grad_norm": 1.743822455406189,
      "learning_rate": 7.858634845120353e-06,
      "loss": 0.0228,
      "step": 669960
    },
    {
      "epoch": 1.0964369644481975,
      "grad_norm": 1.2471400499343872,
      "learning_rate": 7.858568952906835e-06,
      "loss": 0.0223,
      "step": 669980
    },
    {
      "epoch": 1.0964696948868509,
      "grad_norm": 0.3071563243865967,
      "learning_rate": 7.858503060693318e-06,
      "loss": 0.0306,
      "step": 670000
    },
    {
      "epoch": 1.0965024253255042,
      "grad_norm": 0.3691457211971283,
      "learning_rate": 7.8584371684798e-06,
      "loss": 0.0279,
      "step": 670020
    },
    {
      "epoch": 1.0965351557641576,
      "grad_norm": 1.2937490940093994,
      "learning_rate": 7.858371276266284e-06,
      "loss": 0.0344,
      "step": 670040
    },
    {
      "epoch": 1.096567886202811,
      "grad_norm": 0.5633962750434875,
      "learning_rate": 7.858305384052767e-06,
      "loss": 0.0247,
      "step": 670060
    },
    {
      "epoch": 1.0966006166414641,
      "grad_norm": 1.1341700553894043,
      "learning_rate": 7.85823949183925e-06,
      "loss": 0.0224,
      "step": 670080
    },
    {
      "epoch": 1.0966333470801175,
      "grad_norm": 1.2217909097671509,
      "learning_rate": 7.858173599625733e-06,
      "loss": 0.0221,
      "step": 670100
    },
    {
      "epoch": 1.0966660775187709,
      "grad_norm": 2.0467121601104736,
      "learning_rate": 7.858107707412217e-06,
      "loss": 0.0275,
      "step": 670120
    },
    {
      "epoch": 1.0966988079574242,
      "grad_norm": 0.9561670422554016,
      "learning_rate": 7.858041815198698e-06,
      "loss": 0.0229,
      "step": 670140
    },
    {
      "epoch": 1.0967315383960776,
      "grad_norm": 0.743476390838623,
      "learning_rate": 7.857975922985182e-06,
      "loss": 0.0208,
      "step": 670160
    },
    {
      "epoch": 1.096764268834731,
      "grad_norm": 0.099170982837677,
      "learning_rate": 7.857910030771664e-06,
      "loss": 0.0222,
      "step": 670180
    },
    {
      "epoch": 1.0967969992733844,
      "grad_norm": 1.2759907245635986,
      "learning_rate": 7.857844138558147e-06,
      "loss": 0.031,
      "step": 670200
    },
    {
      "epoch": 1.0968297297120375,
      "grad_norm": 0.6658006310462952,
      "learning_rate": 7.857778246344631e-06,
      "loss": 0.024,
      "step": 670220
    },
    {
      "epoch": 1.0968624601506909,
      "grad_norm": 0.35570284724235535,
      "learning_rate": 7.857712354131113e-06,
      "loss": 0.0202,
      "step": 670240
    },
    {
      "epoch": 1.0968951905893443,
      "grad_norm": 2.455786943435669,
      "learning_rate": 7.857646461917597e-06,
      "loss": 0.0305,
      "step": 670260
    },
    {
      "epoch": 1.0969279210279976,
      "grad_norm": 1.3309175968170166,
      "learning_rate": 7.857580569704078e-06,
      "loss": 0.0295,
      "step": 670280
    },
    {
      "epoch": 1.096960651466651,
      "grad_norm": 0.2755778729915619,
      "learning_rate": 7.857514677490562e-06,
      "loss": 0.0226,
      "step": 670300
    },
    {
      "epoch": 1.0969933819053044,
      "grad_norm": 0.32646751403808594,
      "learning_rate": 7.857448785277044e-06,
      "loss": 0.0255,
      "step": 670320
    },
    {
      "epoch": 1.0970261123439577,
      "grad_norm": 1.7233535051345825,
      "learning_rate": 7.857382893063528e-06,
      "loss": 0.0271,
      "step": 670340
    },
    {
      "epoch": 1.097058842782611,
      "grad_norm": 0.1606118083000183,
      "learning_rate": 7.85731700085001e-06,
      "loss": 0.0169,
      "step": 670360
    },
    {
      "epoch": 1.0970915732212643,
      "grad_norm": 0.43563076853752136,
      "learning_rate": 7.857251108636493e-06,
      "loss": 0.0172,
      "step": 670380
    },
    {
      "epoch": 1.0971243036599176,
      "grad_norm": 0.09819573909044266,
      "learning_rate": 7.857185216422975e-06,
      "loss": 0.0324,
      "step": 670400
    },
    {
      "epoch": 1.097157034098571,
      "grad_norm": 0.6588040590286255,
      "learning_rate": 7.857119324209458e-06,
      "loss": 0.0251,
      "step": 670420
    },
    {
      "epoch": 1.0971897645372244,
      "grad_norm": 0.09786872565746307,
      "learning_rate": 7.857053431995942e-06,
      "loss": 0.0187,
      "step": 670440
    },
    {
      "epoch": 1.0972224949758778,
      "grad_norm": 0.9206312298774719,
      "learning_rate": 7.856987539782424e-06,
      "loss": 0.0198,
      "step": 670460
    },
    {
      "epoch": 1.097255225414531,
      "grad_norm": 0.9756855964660645,
      "learning_rate": 7.856921647568908e-06,
      "loss": 0.0272,
      "step": 670480
    },
    {
      "epoch": 1.0972879558531843,
      "grad_norm": 1.753879427909851,
      "learning_rate": 7.856855755355391e-06,
      "loss": 0.0219,
      "step": 670500
    },
    {
      "epoch": 1.0973206862918377,
      "grad_norm": 2.1468799114227295,
      "learning_rate": 7.856789863141873e-06,
      "loss": 0.0246,
      "step": 670520
    },
    {
      "epoch": 1.097353416730491,
      "grad_norm": 0.382992684841156,
      "learning_rate": 7.856723970928357e-06,
      "loss": 0.0214,
      "step": 670540
    },
    {
      "epoch": 1.0973861471691444,
      "grad_norm": 1.0790935754776,
      "learning_rate": 7.85665807871484e-06,
      "loss": 0.0248,
      "step": 670560
    },
    {
      "epoch": 1.0974188776077978,
      "grad_norm": 0.7941721677780151,
      "learning_rate": 7.856592186501322e-06,
      "loss": 0.0188,
      "step": 670580
    },
    {
      "epoch": 1.097451608046451,
      "grad_norm": 1.6173913478851318,
      "learning_rate": 7.856526294287806e-06,
      "loss": 0.0245,
      "step": 670600
    },
    {
      "epoch": 1.0974843384851043,
      "grad_norm": 0.878555417060852,
      "learning_rate": 7.856460402074288e-06,
      "loss": 0.0309,
      "step": 670620
    },
    {
      "epoch": 1.0975170689237577,
      "grad_norm": 1.6129164695739746,
      "learning_rate": 7.856394509860771e-06,
      "loss": 0.0184,
      "step": 670640
    },
    {
      "epoch": 1.097549799362411,
      "grad_norm": 1.0555505752563477,
      "learning_rate": 7.856328617647253e-06,
      "loss": 0.0313,
      "step": 670660
    },
    {
      "epoch": 1.0975825298010644,
      "grad_norm": 0.5313693881034851,
      "learning_rate": 7.856262725433737e-06,
      "loss": 0.0195,
      "step": 670680
    },
    {
      "epoch": 1.0976152602397178,
      "grad_norm": 0.6892683506011963,
      "learning_rate": 7.856196833220219e-06,
      "loss": 0.0219,
      "step": 670700
    },
    {
      "epoch": 1.0976479906783712,
      "grad_norm": 0.4187042713165283,
      "learning_rate": 7.856130941006702e-06,
      "loss": 0.025,
      "step": 670720
    },
    {
      "epoch": 1.0976807211170243,
      "grad_norm": 0.5800399780273438,
      "learning_rate": 7.856065048793184e-06,
      "loss": 0.0331,
      "step": 670740
    },
    {
      "epoch": 1.0977134515556777,
      "grad_norm": 1.0918749570846558,
      "learning_rate": 7.855999156579668e-06,
      "loss": 0.0195,
      "step": 670760
    },
    {
      "epoch": 1.097746181994331,
      "grad_norm": 1.0729851722717285,
      "learning_rate": 7.85593326436615e-06,
      "loss": 0.026,
      "step": 670780
    },
    {
      "epoch": 1.0977789124329844,
      "grad_norm": 0.527655839920044,
      "learning_rate": 7.855867372152633e-06,
      "loss": 0.0203,
      "step": 670800
    },
    {
      "epoch": 1.0978116428716378,
      "grad_norm": 0.9882529377937317,
      "learning_rate": 7.855801479939115e-06,
      "loss": 0.0236,
      "step": 670820
    },
    {
      "epoch": 1.0978443733102912,
      "grad_norm": 1.6878249645233154,
      "learning_rate": 7.855735587725599e-06,
      "loss": 0.0207,
      "step": 670840
    },
    {
      "epoch": 1.0978771037489445,
      "grad_norm": 1.3851935863494873,
      "learning_rate": 7.855669695512082e-06,
      "loss": 0.0189,
      "step": 670860
    },
    {
      "epoch": 1.0979098341875977,
      "grad_norm": 1.2015031576156616,
      "learning_rate": 7.855603803298566e-06,
      "loss": 0.027,
      "step": 670880
    },
    {
      "epoch": 1.097942564626251,
      "grad_norm": 0.454608678817749,
      "learning_rate": 7.855537911085048e-06,
      "loss": 0.0308,
      "step": 670900
    },
    {
      "epoch": 1.0979752950649044,
      "grad_norm": 1.778220772743225,
      "learning_rate": 7.855472018871531e-06,
      "loss": 0.0246,
      "step": 670920
    },
    {
      "epoch": 1.0980080255035578,
      "grad_norm": 0.8323788046836853,
      "learning_rate": 7.855406126658015e-06,
      "loss": 0.0362,
      "step": 670940
    },
    {
      "epoch": 1.0980407559422112,
      "grad_norm": 0.3759029507637024,
      "learning_rate": 7.855340234444497e-06,
      "loss": 0.0221,
      "step": 670960
    },
    {
      "epoch": 1.0980734863808645,
      "grad_norm": 0.3478921949863434,
      "learning_rate": 7.85527434223098e-06,
      "loss": 0.0234,
      "step": 670980
    },
    {
      "epoch": 1.098106216819518,
      "grad_norm": 1.209399938583374,
      "learning_rate": 7.855208450017462e-06,
      "loss": 0.0217,
      "step": 671000
    },
    {
      "epoch": 1.098138947258171,
      "grad_norm": 0.9037955403327942,
      "learning_rate": 7.855142557803946e-06,
      "loss": 0.0306,
      "step": 671020
    },
    {
      "epoch": 1.0981716776968244,
      "grad_norm": 2.1797471046447754,
      "learning_rate": 7.855076665590428e-06,
      "loss": 0.0263,
      "step": 671040
    },
    {
      "epoch": 1.0982044081354778,
      "grad_norm": 2.0903139114379883,
      "learning_rate": 7.855010773376911e-06,
      "loss": 0.0224,
      "step": 671060
    },
    {
      "epoch": 1.0982371385741312,
      "grad_norm": 0.16731484234333038,
      "learning_rate": 7.854944881163393e-06,
      "loss": 0.0154,
      "step": 671080
    },
    {
      "epoch": 1.0982698690127846,
      "grad_norm": 0.7048617005348206,
      "learning_rate": 7.854878988949877e-06,
      "loss": 0.0269,
      "step": 671100
    },
    {
      "epoch": 1.098302599451438,
      "grad_norm": 0.3747410774230957,
      "learning_rate": 7.854813096736359e-06,
      "loss": 0.0264,
      "step": 671120
    },
    {
      "epoch": 1.098335329890091,
      "grad_norm": 0.6865626573562622,
      "learning_rate": 7.854747204522842e-06,
      "loss": 0.0186,
      "step": 671140
    },
    {
      "epoch": 1.0983680603287445,
      "grad_norm": 0.46686872839927673,
      "learning_rate": 7.854681312309324e-06,
      "loss": 0.0225,
      "step": 671160
    },
    {
      "epoch": 1.0984007907673978,
      "grad_norm": 0.39469054341316223,
      "learning_rate": 7.854615420095808e-06,
      "loss": 0.0235,
      "step": 671180
    },
    {
      "epoch": 1.0984335212060512,
      "grad_norm": 0.6889777183532715,
      "learning_rate": 7.85454952788229e-06,
      "loss": 0.0346,
      "step": 671200
    },
    {
      "epoch": 1.0984662516447046,
      "grad_norm": 0.5385560393333435,
      "learning_rate": 7.854483635668773e-06,
      "loss": 0.0279,
      "step": 671220
    },
    {
      "epoch": 1.098498982083358,
      "grad_norm": 1.2053653001785278,
      "learning_rate": 7.854417743455257e-06,
      "loss": 0.0233,
      "step": 671240
    },
    {
      "epoch": 1.0985317125220113,
      "grad_norm": 0.24866947531700134,
      "learning_rate": 7.854351851241739e-06,
      "loss": 0.0219,
      "step": 671260
    },
    {
      "epoch": 1.0985644429606645,
      "grad_norm": 0.1776140034198761,
      "learning_rate": 7.854285959028222e-06,
      "loss": 0.0168,
      "step": 671280
    },
    {
      "epoch": 1.0985971733993178,
      "grad_norm": 0.10582628101110458,
      "learning_rate": 7.854220066814706e-06,
      "loss": 0.0275,
      "step": 671300
    },
    {
      "epoch": 1.0986299038379712,
      "grad_norm": 1.7549692392349243,
      "learning_rate": 7.854154174601188e-06,
      "loss": 0.0229,
      "step": 671320
    },
    {
      "epoch": 1.0986626342766246,
      "grad_norm": 1.7764419317245483,
      "learning_rate": 7.854088282387671e-06,
      "loss": 0.0364,
      "step": 671340
    },
    {
      "epoch": 1.098695364715278,
      "grad_norm": 1.4844791889190674,
      "learning_rate": 7.854022390174155e-06,
      "loss": 0.0319,
      "step": 671360
    },
    {
      "epoch": 1.0987280951539313,
      "grad_norm": 0.5962216258049011,
      "learning_rate": 7.853956497960637e-06,
      "loss": 0.0226,
      "step": 671380
    },
    {
      "epoch": 1.0987608255925845,
      "grad_norm": 0.6427443623542786,
      "learning_rate": 7.85389060574712e-06,
      "loss": 0.0303,
      "step": 671400
    },
    {
      "epoch": 1.0987935560312379,
      "grad_norm": 0.8735435009002686,
      "learning_rate": 7.853824713533602e-06,
      "loss": 0.0258,
      "step": 671420
    },
    {
      "epoch": 1.0988262864698912,
      "grad_norm": 1.4918270111083984,
      "learning_rate": 7.853758821320086e-06,
      "loss": 0.0273,
      "step": 671440
    },
    {
      "epoch": 1.0988590169085446,
      "grad_norm": 0.7418545484542847,
      "learning_rate": 7.853692929106568e-06,
      "loss": 0.0325,
      "step": 671460
    },
    {
      "epoch": 1.098891747347198,
      "grad_norm": 0.7835334539413452,
      "learning_rate": 7.853627036893051e-06,
      "loss": 0.0211,
      "step": 671480
    },
    {
      "epoch": 1.0989244777858513,
      "grad_norm": 1.401861310005188,
      "learning_rate": 7.853561144679533e-06,
      "loss": 0.0218,
      "step": 671500
    },
    {
      "epoch": 1.0989572082245047,
      "grad_norm": 0.34079423546791077,
      "learning_rate": 7.853495252466017e-06,
      "loss": 0.0242,
      "step": 671520
    },
    {
      "epoch": 1.0989899386631579,
      "grad_norm": 1.6729551553726196,
      "learning_rate": 7.853429360252499e-06,
      "loss": 0.0231,
      "step": 671540
    },
    {
      "epoch": 1.0990226691018112,
      "grad_norm": 5.212184429168701,
      "learning_rate": 7.853363468038982e-06,
      "loss": 0.0282,
      "step": 671560
    },
    {
      "epoch": 1.0990553995404646,
      "grad_norm": 3.355442523956299,
      "learning_rate": 7.853297575825464e-06,
      "loss": 0.0281,
      "step": 671580
    },
    {
      "epoch": 1.099088129979118,
      "grad_norm": 0.5893292427062988,
      "learning_rate": 7.853231683611948e-06,
      "loss": 0.0197,
      "step": 671600
    },
    {
      "epoch": 1.0991208604177714,
      "grad_norm": 0.29308629035949707,
      "learning_rate": 7.853165791398431e-06,
      "loss": 0.0191,
      "step": 671620
    },
    {
      "epoch": 1.0991535908564247,
      "grad_norm": 0.6296286582946777,
      "learning_rate": 7.853099899184913e-06,
      "loss": 0.0173,
      "step": 671640
    },
    {
      "epoch": 1.099186321295078,
      "grad_norm": 0.759742021560669,
      "learning_rate": 7.853034006971397e-06,
      "loss": 0.03,
      "step": 671660
    },
    {
      "epoch": 1.0992190517337312,
      "grad_norm": 0.5926364660263062,
      "learning_rate": 7.85296811475788e-06,
      "loss": 0.0206,
      "step": 671680
    },
    {
      "epoch": 1.0992517821723846,
      "grad_norm": 0.645280659198761,
      "learning_rate": 7.852902222544362e-06,
      "loss": 0.0227,
      "step": 671700
    },
    {
      "epoch": 1.099284512611038,
      "grad_norm": 0.31277790665626526,
      "learning_rate": 7.852836330330846e-06,
      "loss": 0.0195,
      "step": 671720
    },
    {
      "epoch": 1.0993172430496914,
      "grad_norm": 0.6590268611907959,
      "learning_rate": 7.85277043811733e-06,
      "loss": 0.0154,
      "step": 671740
    },
    {
      "epoch": 1.0993499734883447,
      "grad_norm": 1.1696627140045166,
      "learning_rate": 7.852704545903811e-06,
      "loss": 0.0227,
      "step": 671760
    },
    {
      "epoch": 1.099382703926998,
      "grad_norm": 1.585119605064392,
      "learning_rate": 7.852638653690295e-06,
      "loss": 0.0243,
      "step": 671780
    },
    {
      "epoch": 1.0994154343656515,
      "grad_norm": 0.9820014238357544,
      "learning_rate": 7.852572761476777e-06,
      "loss": 0.0221,
      "step": 671800
    },
    {
      "epoch": 1.0994481648043046,
      "grad_norm": 0.8319839835166931,
      "learning_rate": 7.85250686926326e-06,
      "loss": 0.0267,
      "step": 671820
    },
    {
      "epoch": 1.099480895242958,
      "grad_norm": 1.6009331941604614,
      "learning_rate": 7.852440977049742e-06,
      "loss": 0.0179,
      "step": 671840
    },
    {
      "epoch": 1.0995136256816114,
      "grad_norm": 0.675488293170929,
      "learning_rate": 7.852375084836226e-06,
      "loss": 0.0345,
      "step": 671860
    },
    {
      "epoch": 1.0995463561202647,
      "grad_norm": 0.1799343377351761,
      "learning_rate": 7.852309192622708e-06,
      "loss": 0.0208,
      "step": 671880
    },
    {
      "epoch": 1.0995790865589181,
      "grad_norm": 0.7339953780174255,
      "learning_rate": 7.852243300409191e-06,
      "loss": 0.0296,
      "step": 671900
    },
    {
      "epoch": 1.0996118169975715,
      "grad_norm": 0.4476081430912018,
      "learning_rate": 7.852177408195673e-06,
      "loss": 0.02,
      "step": 671920
    },
    {
      "epoch": 1.0996445474362246,
      "grad_norm": 1.8601462841033936,
      "learning_rate": 7.852111515982157e-06,
      "loss": 0.025,
      "step": 671940
    },
    {
      "epoch": 1.099677277874878,
      "grad_norm": 0.3445282280445099,
      "learning_rate": 7.85204562376864e-06,
      "loss": 0.0199,
      "step": 671960
    },
    {
      "epoch": 1.0997100083135314,
      "grad_norm": 0.9458188414573669,
      "learning_rate": 7.851979731555122e-06,
      "loss": 0.0249,
      "step": 671980
    },
    {
      "epoch": 1.0997427387521848,
      "grad_norm": 0.2374555915594101,
      "learning_rate": 7.851913839341606e-06,
      "loss": 0.0248,
      "step": 672000
    },
    {
      "epoch": 1.0997754691908381,
      "grad_norm": 9.103839874267578,
      "learning_rate": 7.851847947128088e-06,
      "loss": 0.0262,
      "step": 672020
    },
    {
      "epoch": 1.0998081996294915,
      "grad_norm": 0.3329021632671356,
      "learning_rate": 7.851782054914571e-06,
      "loss": 0.0275,
      "step": 672040
    },
    {
      "epoch": 1.0998409300681449,
      "grad_norm": 0.14241303503513336,
      "learning_rate": 7.851716162701053e-06,
      "loss": 0.0329,
      "step": 672060
    },
    {
      "epoch": 1.099873660506798,
      "grad_norm": 1.8530464172363281,
      "learning_rate": 7.851650270487537e-06,
      "loss": 0.0161,
      "step": 672080
    },
    {
      "epoch": 1.0999063909454514,
      "grad_norm": 0.17240801453590393,
      "learning_rate": 7.85158437827402e-06,
      "loss": 0.0179,
      "step": 672100
    },
    {
      "epoch": 1.0999391213841048,
      "grad_norm": 0.14848501980304718,
      "learning_rate": 7.851518486060502e-06,
      "loss": 0.0199,
      "step": 672120
    },
    {
      "epoch": 1.0999718518227581,
      "grad_norm": 0.7069686055183411,
      "learning_rate": 7.851452593846986e-06,
      "loss": 0.0354,
      "step": 672140
    },
    {
      "epoch": 1.1000045822614115,
      "grad_norm": 4.344241619110107,
      "learning_rate": 7.85138670163347e-06,
      "loss": 0.0207,
      "step": 672160
    },
    {
      "epoch": 1.1000373127000649,
      "grad_norm": 0.13972808420658112,
      "learning_rate": 7.851320809419951e-06,
      "loss": 0.0188,
      "step": 672180
    },
    {
      "epoch": 1.100070043138718,
      "grad_norm": 0.404278427362442,
      "learning_rate": 7.851254917206435e-06,
      "loss": 0.0263,
      "step": 672200
    },
    {
      "epoch": 1.1001027735773714,
      "grad_norm": 0.8197022080421448,
      "learning_rate": 7.851189024992917e-06,
      "loss": 0.0392,
      "step": 672220
    },
    {
      "epoch": 1.1001355040160248,
      "grad_norm": 0.26604652404785156,
      "learning_rate": 7.8511231327794e-06,
      "loss": 0.024,
      "step": 672240
    },
    {
      "epoch": 1.1001682344546782,
      "grad_norm": 0.5772949457168579,
      "learning_rate": 7.851057240565882e-06,
      "loss": 0.0263,
      "step": 672260
    },
    {
      "epoch": 1.1002009648933315,
      "grad_norm": 0.4342467486858368,
      "learning_rate": 7.850991348352366e-06,
      "loss": 0.0183,
      "step": 672280
    },
    {
      "epoch": 1.100233695331985,
      "grad_norm": 0.4633905589580536,
      "learning_rate": 7.85092545613885e-06,
      "loss": 0.0261,
      "step": 672300
    },
    {
      "epoch": 1.1002664257706383,
      "grad_norm": 0.8792504668235779,
      "learning_rate": 7.850859563925331e-06,
      "loss": 0.0283,
      "step": 672320
    },
    {
      "epoch": 1.1002991562092914,
      "grad_norm": 0.2889252007007599,
      "learning_rate": 7.850793671711815e-06,
      "loss": 0.017,
      "step": 672340
    },
    {
      "epoch": 1.1003318866479448,
      "grad_norm": 0.10798560082912445,
      "learning_rate": 7.850727779498297e-06,
      "loss": 0.0184,
      "step": 672360
    },
    {
      "epoch": 1.1003646170865982,
      "grad_norm": 0.05372506380081177,
      "learning_rate": 7.85066188728478e-06,
      "loss": 0.0271,
      "step": 672380
    },
    {
      "epoch": 1.1003973475252515,
      "grad_norm": 0.8354758024215698,
      "learning_rate": 7.850595995071262e-06,
      "loss": 0.0283,
      "step": 672400
    },
    {
      "epoch": 1.100430077963905,
      "grad_norm": 2.202075242996216,
      "learning_rate": 7.850530102857746e-06,
      "loss": 0.0229,
      "step": 672420
    },
    {
      "epoch": 1.1004628084025583,
      "grad_norm": 1.8880321979522705,
      "learning_rate": 7.850464210644228e-06,
      "loss": 0.0263,
      "step": 672440
    },
    {
      "epoch": 1.1004955388412117,
      "grad_norm": 0.5391651391983032,
      "learning_rate": 7.850398318430711e-06,
      "loss": 0.0274,
      "step": 672460
    },
    {
      "epoch": 1.1005282692798648,
      "grad_norm": 1.484848141670227,
      "learning_rate": 7.850332426217195e-06,
      "loss": 0.0238,
      "step": 672480
    },
    {
      "epoch": 1.1005609997185182,
      "grad_norm": 1.0331617593765259,
      "learning_rate": 7.850266534003677e-06,
      "loss": 0.0277,
      "step": 672500
    },
    {
      "epoch": 1.1005937301571715,
      "grad_norm": 1.348724126815796,
      "learning_rate": 7.85020064179016e-06,
      "loss": 0.0211,
      "step": 672520
    },
    {
      "epoch": 1.100626460595825,
      "grad_norm": 2.8818600177764893,
      "learning_rate": 7.850134749576644e-06,
      "loss": 0.0228,
      "step": 672540
    },
    {
      "epoch": 1.1006591910344783,
      "grad_norm": 0.3076792061328888,
      "learning_rate": 7.850068857363126e-06,
      "loss": 0.0274,
      "step": 672560
    },
    {
      "epoch": 1.1006919214731317,
      "grad_norm": 0.4040547311306,
      "learning_rate": 7.85000296514961e-06,
      "loss": 0.0308,
      "step": 672580
    },
    {
      "epoch": 1.100724651911785,
      "grad_norm": 0.3048582673072815,
      "learning_rate": 7.849937072936091e-06,
      "loss": 0.0189,
      "step": 672600
    },
    {
      "epoch": 1.1007573823504382,
      "grad_norm": 0.7139429450035095,
      "learning_rate": 7.849871180722575e-06,
      "loss": 0.0318,
      "step": 672620
    },
    {
      "epoch": 1.1007901127890916,
      "grad_norm": 0.6571171283721924,
      "learning_rate": 7.849805288509057e-06,
      "loss": 0.0275,
      "step": 672640
    },
    {
      "epoch": 1.100822843227745,
      "grad_norm": 1.1643556356430054,
      "learning_rate": 7.84973939629554e-06,
      "loss": 0.0343,
      "step": 672660
    },
    {
      "epoch": 1.1008555736663983,
      "grad_norm": 0.8040511012077332,
      "learning_rate": 7.849673504082024e-06,
      "loss": 0.0232,
      "step": 672680
    },
    {
      "epoch": 1.1008883041050517,
      "grad_norm": 0.24246332049369812,
      "learning_rate": 7.849607611868506e-06,
      "loss": 0.0239,
      "step": 672700
    },
    {
      "epoch": 1.100921034543705,
      "grad_norm": 0.5356824994087219,
      "learning_rate": 7.84954171965499e-06,
      "loss": 0.029,
      "step": 672720
    },
    {
      "epoch": 1.1009537649823582,
      "grad_norm": 0.1861349642276764,
      "learning_rate": 7.849475827441472e-06,
      "loss": 0.0181,
      "step": 672740
    },
    {
      "epoch": 1.1009864954210116,
      "grad_norm": 0.2873058021068573,
      "learning_rate": 7.849409935227955e-06,
      "loss": 0.0193,
      "step": 672760
    },
    {
      "epoch": 1.101019225859665,
      "grad_norm": 0.9513870477676392,
      "learning_rate": 7.849344043014437e-06,
      "loss": 0.0313,
      "step": 672780
    },
    {
      "epoch": 1.1010519562983183,
      "grad_norm": 5.97027587890625,
      "learning_rate": 7.84927815080092e-06,
      "loss": 0.027,
      "step": 672800
    },
    {
      "epoch": 1.1010846867369717,
      "grad_norm": 0.4794459342956543,
      "learning_rate": 7.849212258587402e-06,
      "loss": 0.0262,
      "step": 672820
    },
    {
      "epoch": 1.101117417175625,
      "grad_norm": 0.4961636960506439,
      "learning_rate": 7.849146366373886e-06,
      "loss": 0.0329,
      "step": 672840
    },
    {
      "epoch": 1.1011501476142784,
      "grad_norm": 1.07424795627594,
      "learning_rate": 7.849080474160368e-06,
      "loss": 0.027,
      "step": 672860
    },
    {
      "epoch": 1.1011828780529316,
      "grad_norm": 0.6298335194587708,
      "learning_rate": 7.849014581946852e-06,
      "loss": 0.0226,
      "step": 672880
    },
    {
      "epoch": 1.101215608491585,
      "grad_norm": 0.22416411340236664,
      "learning_rate": 7.848948689733335e-06,
      "loss": 0.0159,
      "step": 672900
    },
    {
      "epoch": 1.1012483389302383,
      "grad_norm": 0.886222779750824,
      "learning_rate": 7.848882797519817e-06,
      "loss": 0.0209,
      "step": 672920
    },
    {
      "epoch": 1.1012810693688917,
      "grad_norm": 0.632312536239624,
      "learning_rate": 7.8488169053063e-06,
      "loss": 0.026,
      "step": 672940
    },
    {
      "epoch": 1.101313799807545,
      "grad_norm": 0.34407225251197815,
      "learning_rate": 7.848751013092784e-06,
      "loss": 0.0211,
      "step": 672960
    },
    {
      "epoch": 1.1013465302461984,
      "grad_norm": 1.4554988145828247,
      "learning_rate": 7.848685120879266e-06,
      "loss": 0.0207,
      "step": 672980
    },
    {
      "epoch": 1.1013792606848516,
      "grad_norm": 0.568345844745636,
      "learning_rate": 7.84861922866575e-06,
      "loss": 0.0204,
      "step": 673000
    },
    {
      "epoch": 1.101411991123505,
      "grad_norm": 1.6846040487289429,
      "learning_rate": 7.848553336452233e-06,
      "loss": 0.034,
      "step": 673020
    },
    {
      "epoch": 1.1014447215621583,
      "grad_norm": 1.041201114654541,
      "learning_rate": 7.848487444238715e-06,
      "loss": 0.0234,
      "step": 673040
    },
    {
      "epoch": 1.1014774520008117,
      "grad_norm": 0.1993565410375595,
      "learning_rate": 7.848421552025199e-06,
      "loss": 0.0176,
      "step": 673060
    },
    {
      "epoch": 1.101510182439465,
      "grad_norm": 0.8765127062797546,
      "learning_rate": 7.84835565981168e-06,
      "loss": 0.0357,
      "step": 673080
    },
    {
      "epoch": 1.1015429128781185,
      "grad_norm": 0.5711743831634521,
      "learning_rate": 7.848289767598164e-06,
      "loss": 0.0232,
      "step": 673100
    },
    {
      "epoch": 1.1015756433167718,
      "grad_norm": 0.06358494609594345,
      "learning_rate": 7.848223875384646e-06,
      "loss": 0.0234,
      "step": 673120
    },
    {
      "epoch": 1.101608373755425,
      "grad_norm": 1.1836943626403809,
      "learning_rate": 7.84815798317113e-06,
      "loss": 0.0259,
      "step": 673140
    },
    {
      "epoch": 1.1016411041940783,
      "grad_norm": 0.08825930207967758,
      "learning_rate": 7.848092090957612e-06,
      "loss": 0.0273,
      "step": 673160
    },
    {
      "epoch": 1.1016738346327317,
      "grad_norm": 0.22343330085277557,
      "learning_rate": 7.848026198744095e-06,
      "loss": 0.02,
      "step": 673180
    },
    {
      "epoch": 1.101706565071385,
      "grad_norm": 0.9470551609992981,
      "learning_rate": 7.847960306530577e-06,
      "loss": 0.0294,
      "step": 673200
    },
    {
      "epoch": 1.1017392955100385,
      "grad_norm": 1.491231083869934,
      "learning_rate": 7.84789441431706e-06,
      "loss": 0.0277,
      "step": 673220
    },
    {
      "epoch": 1.1017720259486918,
      "grad_norm": 0.20084837079048157,
      "learning_rate": 7.847828522103543e-06,
      "loss": 0.0217,
      "step": 673240
    },
    {
      "epoch": 1.1018047563873452,
      "grad_norm": 0.7060676217079163,
      "learning_rate": 7.847762629890026e-06,
      "loss": 0.0329,
      "step": 673260
    },
    {
      "epoch": 1.1018374868259984,
      "grad_norm": 0.7904878854751587,
      "learning_rate": 7.84769673767651e-06,
      "loss": 0.0248,
      "step": 673280
    },
    {
      "epoch": 1.1018702172646517,
      "grad_norm": 1.0941433906555176,
      "learning_rate": 7.847630845462992e-06,
      "loss": 0.0228,
      "step": 673300
    },
    {
      "epoch": 1.101902947703305,
      "grad_norm": 0.9373356103897095,
      "learning_rate": 7.847564953249475e-06,
      "loss": 0.0356,
      "step": 673320
    },
    {
      "epoch": 1.1019356781419585,
      "grad_norm": 0.1434982568025589,
      "learning_rate": 7.847499061035959e-06,
      "loss": 0.0145,
      "step": 673340
    },
    {
      "epoch": 1.1019684085806118,
      "grad_norm": 0.9937132000923157,
      "learning_rate": 7.84743316882244e-06,
      "loss": 0.0267,
      "step": 673360
    },
    {
      "epoch": 1.1020011390192652,
      "grad_norm": 0.17147885262966156,
      "learning_rate": 7.847367276608924e-06,
      "loss": 0.0139,
      "step": 673380
    },
    {
      "epoch": 1.1020338694579186,
      "grad_norm": 0.5620505213737488,
      "learning_rate": 7.847301384395408e-06,
      "loss": 0.0162,
      "step": 673400
    },
    {
      "epoch": 1.1020665998965717,
      "grad_norm": 0.9326147437095642,
      "learning_rate": 7.84723549218189e-06,
      "loss": 0.0326,
      "step": 673420
    },
    {
      "epoch": 1.1020993303352251,
      "grad_norm": 0.20528633892536163,
      "learning_rate": 7.847169599968373e-06,
      "loss": 0.0155,
      "step": 673440
    },
    {
      "epoch": 1.1021320607738785,
      "grad_norm": 0.7924374341964722,
      "learning_rate": 7.847103707754855e-06,
      "loss": 0.0319,
      "step": 673460
    },
    {
      "epoch": 1.1021647912125319,
      "grad_norm": 1.217808485031128,
      "learning_rate": 7.847037815541339e-06,
      "loss": 0.034,
      "step": 673480
    },
    {
      "epoch": 1.1021975216511852,
      "grad_norm": 0.9235361218452454,
      "learning_rate": 7.84697192332782e-06,
      "loss": 0.0232,
      "step": 673500
    },
    {
      "epoch": 1.1022302520898386,
      "grad_norm": 0.7682918906211853,
      "learning_rate": 7.846906031114304e-06,
      "loss": 0.022,
      "step": 673520
    },
    {
      "epoch": 1.1022629825284918,
      "grad_norm": 0.6598342657089233,
      "learning_rate": 7.846840138900786e-06,
      "loss": 0.0294,
      "step": 673540
    },
    {
      "epoch": 1.1022957129671451,
      "grad_norm": 0.3977031707763672,
      "learning_rate": 7.84677424668727e-06,
      "loss": 0.0263,
      "step": 673560
    },
    {
      "epoch": 1.1023284434057985,
      "grad_norm": 0.5294076800346375,
      "learning_rate": 7.846708354473752e-06,
      "loss": 0.0251,
      "step": 673580
    },
    {
      "epoch": 1.1023611738444519,
      "grad_norm": 1.0513423681259155,
      "learning_rate": 7.846642462260235e-06,
      "loss": 0.0181,
      "step": 673600
    },
    {
      "epoch": 1.1023939042831052,
      "grad_norm": 1.8899338245391846,
      "learning_rate": 7.846576570046717e-06,
      "loss": 0.0237,
      "step": 673620
    },
    {
      "epoch": 1.1024266347217586,
      "grad_norm": 0.8649437427520752,
      "learning_rate": 7.8465106778332e-06,
      "loss": 0.0334,
      "step": 673640
    },
    {
      "epoch": 1.1024593651604118,
      "grad_norm": 0.592218279838562,
      "learning_rate": 7.846444785619684e-06,
      "loss": 0.0189,
      "step": 673660
    },
    {
      "epoch": 1.1024920955990651,
      "grad_norm": 0.7018470764160156,
      "learning_rate": 7.846378893406166e-06,
      "loss": 0.0184,
      "step": 673680
    },
    {
      "epoch": 1.1025248260377185,
      "grad_norm": 0.8709399700164795,
      "learning_rate": 7.84631300119265e-06,
      "loss": 0.0191,
      "step": 673700
    },
    {
      "epoch": 1.1025575564763719,
      "grad_norm": 0.5558804869651794,
      "learning_rate": 7.846247108979133e-06,
      "loss": 0.0229,
      "step": 673720
    },
    {
      "epoch": 1.1025902869150253,
      "grad_norm": 0.6524510979652405,
      "learning_rate": 7.846181216765615e-06,
      "loss": 0.0223,
      "step": 673740
    },
    {
      "epoch": 1.1026230173536786,
      "grad_norm": 0.45504820346832275,
      "learning_rate": 7.846115324552099e-06,
      "loss": 0.0251,
      "step": 673760
    },
    {
      "epoch": 1.102655747792332,
      "grad_norm": 0.609352707862854,
      "learning_rate": 7.846049432338582e-06,
      "loss": 0.0213,
      "step": 673780
    },
    {
      "epoch": 1.1026884782309851,
      "grad_norm": 1.7839000225067139,
      "learning_rate": 7.845983540125064e-06,
      "loss": 0.0251,
      "step": 673800
    },
    {
      "epoch": 1.1027212086696385,
      "grad_norm": 4.15513801574707,
      "learning_rate": 7.845917647911548e-06,
      "loss": 0.027,
      "step": 673820
    },
    {
      "epoch": 1.102753939108292,
      "grad_norm": 0.2569243013858795,
      "learning_rate": 7.84585175569803e-06,
      "loss": 0.0239,
      "step": 673840
    },
    {
      "epoch": 1.1027866695469453,
      "grad_norm": 0.315463125705719,
      "learning_rate": 7.845785863484513e-06,
      "loss": 0.0244,
      "step": 673860
    },
    {
      "epoch": 1.1028193999855986,
      "grad_norm": 0.3070513904094696,
      "learning_rate": 7.845719971270995e-06,
      "loss": 0.0224,
      "step": 673880
    },
    {
      "epoch": 1.102852130424252,
      "grad_norm": 0.213282972574234,
      "learning_rate": 7.845654079057479e-06,
      "loss": 0.0286,
      "step": 673900
    },
    {
      "epoch": 1.1028848608629054,
      "grad_norm": 1.543907880783081,
      "learning_rate": 7.84558818684396e-06,
      "loss": 0.022,
      "step": 673920
    },
    {
      "epoch": 1.1029175913015585,
      "grad_norm": 2.4622128009796143,
      "learning_rate": 7.845522294630444e-06,
      "loss": 0.0237,
      "step": 673940
    },
    {
      "epoch": 1.102950321740212,
      "grad_norm": 0.8594193458557129,
      "learning_rate": 7.845456402416926e-06,
      "loss": 0.0305,
      "step": 673960
    },
    {
      "epoch": 1.1029830521788653,
      "grad_norm": 3.087704658508301,
      "learning_rate": 7.84539051020341e-06,
      "loss": 0.0294,
      "step": 673980
    },
    {
      "epoch": 1.1030157826175186,
      "grad_norm": 0.5777751803398132,
      "learning_rate": 7.845324617989892e-06,
      "loss": 0.0225,
      "step": 674000
    },
    {
      "epoch": 1.103048513056172,
      "grad_norm": 0.8606870174407959,
      "learning_rate": 7.845258725776375e-06,
      "loss": 0.0211,
      "step": 674020
    },
    {
      "epoch": 1.1030812434948254,
      "grad_norm": 0.5703844428062439,
      "learning_rate": 7.845192833562857e-06,
      "loss": 0.0247,
      "step": 674040
    },
    {
      "epoch": 1.1031139739334788,
      "grad_norm": 0.4542463421821594,
      "learning_rate": 7.84512694134934e-06,
      "loss": 0.023,
      "step": 674060
    },
    {
      "epoch": 1.103146704372132,
      "grad_norm": 0.39054107666015625,
      "learning_rate": 7.845061049135824e-06,
      "loss": 0.0168,
      "step": 674080
    },
    {
      "epoch": 1.1031794348107853,
      "grad_norm": 0.4745028614997864,
      "learning_rate": 7.844995156922306e-06,
      "loss": 0.0146,
      "step": 674100
    },
    {
      "epoch": 1.1032121652494387,
      "grad_norm": 0.6624806523323059,
      "learning_rate": 7.84492926470879e-06,
      "loss": 0.026,
      "step": 674120
    },
    {
      "epoch": 1.103244895688092,
      "grad_norm": 14.280261039733887,
      "learning_rate": 7.844863372495273e-06,
      "loss": 0.0223,
      "step": 674140
    },
    {
      "epoch": 1.1032776261267454,
      "grad_norm": 0.6607383489608765,
      "learning_rate": 7.844797480281755e-06,
      "loss": 0.0282,
      "step": 674160
    },
    {
      "epoch": 1.1033103565653988,
      "grad_norm": 0.3767136037349701,
      "learning_rate": 7.844731588068239e-06,
      "loss": 0.0251,
      "step": 674180
    },
    {
      "epoch": 1.103343087004052,
      "grad_norm": 0.27692800760269165,
      "learning_rate": 7.844665695854723e-06,
      "loss": 0.0263,
      "step": 674200
    },
    {
      "epoch": 1.1033758174427053,
      "grad_norm": 0.6016146540641785,
      "learning_rate": 7.844599803641204e-06,
      "loss": 0.0323,
      "step": 674220
    },
    {
      "epoch": 1.1034085478813587,
      "grad_norm": 0.10775302350521088,
      "learning_rate": 7.844533911427688e-06,
      "loss": 0.0271,
      "step": 674240
    },
    {
      "epoch": 1.103441278320012,
      "grad_norm": 0.8043062686920166,
      "learning_rate": 7.84446801921417e-06,
      "loss": 0.015,
      "step": 674260
    },
    {
      "epoch": 1.1034740087586654,
      "grad_norm": 0.5472372770309448,
      "learning_rate": 7.844402127000653e-06,
      "loss": 0.0207,
      "step": 674280
    },
    {
      "epoch": 1.1035067391973188,
      "grad_norm": 0.1897684782743454,
      "learning_rate": 7.844336234787135e-06,
      "loss": 0.0343,
      "step": 674300
    },
    {
      "epoch": 1.1035394696359722,
      "grad_norm": 2.0303144454956055,
      "learning_rate": 7.844270342573619e-06,
      "loss": 0.0252,
      "step": 674320
    },
    {
      "epoch": 1.1035722000746253,
      "grad_norm": 0.6362292170524597,
      "learning_rate": 7.844204450360101e-06,
      "loss": 0.0212,
      "step": 674340
    },
    {
      "epoch": 1.1036049305132787,
      "grad_norm": 0.8050095438957214,
      "learning_rate": 7.844138558146584e-06,
      "loss": 0.0284,
      "step": 674360
    },
    {
      "epoch": 1.103637660951932,
      "grad_norm": 2.9470925331115723,
      "learning_rate": 7.844072665933066e-06,
      "loss": 0.0248,
      "step": 674380
    },
    {
      "epoch": 1.1036703913905854,
      "grad_norm": 0.9687724113464355,
      "learning_rate": 7.84400677371955e-06,
      "loss": 0.0202,
      "step": 674400
    },
    {
      "epoch": 1.1037031218292388,
      "grad_norm": 0.5863841772079468,
      "learning_rate": 7.843940881506034e-06,
      "loss": 0.0164,
      "step": 674420
    },
    {
      "epoch": 1.1037358522678922,
      "grad_norm": 0.19153623282909393,
      "learning_rate": 7.843874989292515e-06,
      "loss": 0.0315,
      "step": 674440
    },
    {
      "epoch": 1.1037685827065453,
      "grad_norm": 0.1141192764043808,
      "learning_rate": 7.843809097078999e-06,
      "loss": 0.0245,
      "step": 674460
    },
    {
      "epoch": 1.1038013131451987,
      "grad_norm": 0.3285374641418457,
      "learning_rate": 7.843743204865481e-06,
      "loss": 0.0246,
      "step": 674480
    },
    {
      "epoch": 1.103834043583852,
      "grad_norm": 0.506076455116272,
      "learning_rate": 7.843677312651964e-06,
      "loss": 0.0196,
      "step": 674500
    },
    {
      "epoch": 1.1038667740225054,
      "grad_norm": 1.0837513208389282,
      "learning_rate": 7.843611420438448e-06,
      "loss": 0.0289,
      "step": 674520
    },
    {
      "epoch": 1.1038995044611588,
      "grad_norm": 0.19168046116828918,
      "learning_rate": 7.84354552822493e-06,
      "loss": 0.0177,
      "step": 674540
    },
    {
      "epoch": 1.1039322348998122,
      "grad_norm": 1.4831494092941284,
      "learning_rate": 7.843479636011414e-06,
      "loss": 0.023,
      "step": 674560
    },
    {
      "epoch": 1.1039649653384656,
      "grad_norm": 0.413101464509964,
      "learning_rate": 7.843413743797897e-06,
      "loss": 0.0269,
      "step": 674580
    },
    {
      "epoch": 1.1039976957771187,
      "grad_norm": 0.6015661954879761,
      "learning_rate": 7.843347851584379e-06,
      "loss": 0.0193,
      "step": 674600
    },
    {
      "epoch": 1.104030426215772,
      "grad_norm": 0.3892160952091217,
      "learning_rate": 7.843281959370863e-06,
      "loss": 0.021,
      "step": 674620
    },
    {
      "epoch": 1.1040631566544254,
      "grad_norm": 0.9295243620872498,
      "learning_rate": 7.843216067157345e-06,
      "loss": 0.0191,
      "step": 674640
    },
    {
      "epoch": 1.1040958870930788,
      "grad_norm": 1.1090623140335083,
      "learning_rate": 7.843150174943828e-06,
      "loss": 0.0301,
      "step": 674660
    },
    {
      "epoch": 1.1041286175317322,
      "grad_norm": 0.8185315132141113,
      "learning_rate": 7.84308428273031e-06,
      "loss": 0.0242,
      "step": 674680
    },
    {
      "epoch": 1.1041613479703856,
      "grad_norm": 0.8236052393913269,
      "learning_rate": 7.843018390516794e-06,
      "loss": 0.0216,
      "step": 674700
    },
    {
      "epoch": 1.104194078409039,
      "grad_norm": 0.18236832320690155,
      "learning_rate": 7.842952498303275e-06,
      "loss": 0.0152,
      "step": 674720
    },
    {
      "epoch": 1.104226808847692,
      "grad_norm": 4.0095953941345215,
      "learning_rate": 7.842886606089759e-06,
      "loss": 0.0271,
      "step": 674740
    },
    {
      "epoch": 1.1042595392863455,
      "grad_norm": 1.3609859943389893,
      "learning_rate": 7.842820713876241e-06,
      "loss": 0.0287,
      "step": 674760
    },
    {
      "epoch": 1.1042922697249988,
      "grad_norm": 0.412433385848999,
      "learning_rate": 7.842754821662725e-06,
      "loss": 0.0142,
      "step": 674780
    },
    {
      "epoch": 1.1043250001636522,
      "grad_norm": 0.4016312062740326,
      "learning_rate": 7.842688929449208e-06,
      "loss": 0.0223,
      "step": 674800
    },
    {
      "epoch": 1.1043577306023056,
      "grad_norm": 0.8339020609855652,
      "learning_rate": 7.84262303723569e-06,
      "loss": 0.0418,
      "step": 674820
    },
    {
      "epoch": 1.104390461040959,
      "grad_norm": 1.4707319736480713,
      "learning_rate": 7.842557145022174e-06,
      "loss": 0.0284,
      "step": 674840
    },
    {
      "epoch": 1.1044231914796123,
      "grad_norm": 0.2045060396194458,
      "learning_rate": 7.842491252808655e-06,
      "loss": 0.0208,
      "step": 674860
    },
    {
      "epoch": 1.1044559219182655,
      "grad_norm": 0.7259544134140015,
      "learning_rate": 7.842425360595139e-06,
      "loss": 0.026,
      "step": 674880
    },
    {
      "epoch": 1.1044886523569188,
      "grad_norm": 0.9255145788192749,
      "learning_rate": 7.842359468381621e-06,
      "loss": 0.0283,
      "step": 674900
    },
    {
      "epoch": 1.1045213827955722,
      "grad_norm": 0.32661372423171997,
      "learning_rate": 7.842293576168105e-06,
      "loss": 0.02,
      "step": 674920
    },
    {
      "epoch": 1.1045541132342256,
      "grad_norm": 0.13892275094985962,
      "learning_rate": 7.842227683954588e-06,
      "loss": 0.0294,
      "step": 674940
    },
    {
      "epoch": 1.104586843672879,
      "grad_norm": 1.1742229461669922,
      "learning_rate": 7.84216179174107e-06,
      "loss": 0.0247,
      "step": 674960
    },
    {
      "epoch": 1.1046195741115323,
      "grad_norm": 0.5070897936820984,
      "learning_rate": 7.842095899527554e-06,
      "loss": 0.0307,
      "step": 674980
    },
    {
      "epoch": 1.1046523045501855,
      "grad_norm": 1.1663864850997925,
      "learning_rate": 7.842030007314037e-06,
      "loss": 0.0328,
      "step": 675000
    },
    {
      "epoch": 1.1046850349888389,
      "grad_norm": 0.4280051589012146,
      "learning_rate": 7.841964115100519e-06,
      "loss": 0.0245,
      "step": 675020
    },
    {
      "epoch": 1.1047177654274922,
      "grad_norm": 0.17363035678863525,
      "learning_rate": 7.841898222887003e-06,
      "loss": 0.0217,
      "step": 675040
    },
    {
      "epoch": 1.1047504958661456,
      "grad_norm": 1.1316426992416382,
      "learning_rate": 7.841832330673485e-06,
      "loss": 0.0168,
      "step": 675060
    },
    {
      "epoch": 1.104783226304799,
      "grad_norm": 0.619873046875,
      "learning_rate": 7.841766438459968e-06,
      "loss": 0.0295,
      "step": 675080
    },
    {
      "epoch": 1.1048159567434523,
      "grad_norm": 3.949835777282715,
      "learning_rate": 7.84170054624645e-06,
      "loss": 0.035,
      "step": 675100
    },
    {
      "epoch": 1.1048486871821057,
      "grad_norm": 1.9464129209518433,
      "learning_rate": 7.841634654032934e-06,
      "loss": 0.0343,
      "step": 675120
    },
    {
      "epoch": 1.1048814176207589,
      "grad_norm": 0.6906768679618835,
      "learning_rate": 7.841568761819417e-06,
      "loss": 0.022,
      "step": 675140
    },
    {
      "epoch": 1.1049141480594122,
      "grad_norm": 1.025375247001648,
      "learning_rate": 7.841502869605899e-06,
      "loss": 0.0211,
      "step": 675160
    },
    {
      "epoch": 1.1049468784980656,
      "grad_norm": 0.13904474675655365,
      "learning_rate": 7.841436977392383e-06,
      "loss": 0.0203,
      "step": 675180
    },
    {
      "epoch": 1.104979608936719,
      "grad_norm": 0.45417627692222595,
      "learning_rate": 7.841371085178865e-06,
      "loss": 0.0278,
      "step": 675200
    },
    {
      "epoch": 1.1050123393753724,
      "grad_norm": 0.5646209716796875,
      "learning_rate": 7.841305192965348e-06,
      "loss": 0.026,
      "step": 675220
    },
    {
      "epoch": 1.1050450698140257,
      "grad_norm": 0.8346030116081238,
      "learning_rate": 7.84123930075183e-06,
      "loss": 0.0219,
      "step": 675240
    },
    {
      "epoch": 1.1050778002526789,
      "grad_norm": 0.5732812285423279,
      "learning_rate": 7.841173408538314e-06,
      "loss": 0.0204,
      "step": 675260
    },
    {
      "epoch": 1.1051105306913322,
      "grad_norm": 1.5965744256973267,
      "learning_rate": 7.841107516324796e-06,
      "loss": 0.0284,
      "step": 675280
    },
    {
      "epoch": 1.1051432611299856,
      "grad_norm": 1.9569216966629028,
      "learning_rate": 7.841041624111279e-06,
      "loss": 0.0243,
      "step": 675300
    },
    {
      "epoch": 1.105175991568639,
      "grad_norm": 1.6614910364151,
      "learning_rate": 7.840975731897763e-06,
      "loss": 0.0258,
      "step": 675320
    },
    {
      "epoch": 1.1052087220072924,
      "grad_norm": 1.3238391876220703,
      "learning_rate": 7.840909839684245e-06,
      "loss": 0.0187,
      "step": 675340
    },
    {
      "epoch": 1.1052414524459457,
      "grad_norm": 1.1629472970962524,
      "learning_rate": 7.840843947470728e-06,
      "loss": 0.0253,
      "step": 675360
    },
    {
      "epoch": 1.105274182884599,
      "grad_norm": 0.1777714639902115,
      "learning_rate": 7.840778055257212e-06,
      "loss": 0.0312,
      "step": 675380
    },
    {
      "epoch": 1.1053069133232523,
      "grad_norm": 0.2707918584346771,
      "learning_rate": 7.840712163043694e-06,
      "loss": 0.0178,
      "step": 675400
    },
    {
      "epoch": 1.1053396437619056,
      "grad_norm": 0.6373666524887085,
      "learning_rate": 7.840646270830177e-06,
      "loss": 0.0232,
      "step": 675420
    },
    {
      "epoch": 1.105372374200559,
      "grad_norm": 0.3525272607803345,
      "learning_rate": 7.84058037861666e-06,
      "loss": 0.021,
      "step": 675440
    },
    {
      "epoch": 1.1054051046392124,
      "grad_norm": 0.9106016755104065,
      "learning_rate": 7.840514486403143e-06,
      "loss": 0.023,
      "step": 675460
    },
    {
      "epoch": 1.1054378350778657,
      "grad_norm": 0.48047423362731934,
      "learning_rate": 7.840448594189626e-06,
      "loss": 0.0251,
      "step": 675480
    },
    {
      "epoch": 1.1054705655165191,
      "grad_norm": 0.4477434456348419,
      "learning_rate": 7.840382701976108e-06,
      "loss": 0.0298,
      "step": 675500
    },
    {
      "epoch": 1.1055032959551725,
      "grad_norm": 0.5849564671516418,
      "learning_rate": 7.840316809762592e-06,
      "loss": 0.0225,
      "step": 675520
    },
    {
      "epoch": 1.1055360263938256,
      "grad_norm": 0.7632012367248535,
      "learning_rate": 7.840250917549074e-06,
      "loss": 0.0238,
      "step": 675540
    },
    {
      "epoch": 1.105568756832479,
      "grad_norm": 1.1616626977920532,
      "learning_rate": 7.840185025335557e-06,
      "loss": 0.027,
      "step": 675560
    },
    {
      "epoch": 1.1056014872711324,
      "grad_norm": 0.4190431535243988,
      "learning_rate": 7.84011913312204e-06,
      "loss": 0.0256,
      "step": 675580
    },
    {
      "epoch": 1.1056342177097858,
      "grad_norm": 1.1449289321899414,
      "learning_rate": 7.840053240908523e-06,
      "loss": 0.0227,
      "step": 675600
    },
    {
      "epoch": 1.1056669481484391,
      "grad_norm": 0.6248376369476318,
      "learning_rate": 7.839987348695005e-06,
      "loss": 0.0194,
      "step": 675620
    },
    {
      "epoch": 1.1056996785870925,
      "grad_norm": 0.413039892911911,
      "learning_rate": 7.839921456481488e-06,
      "loss": 0.0268,
      "step": 675640
    },
    {
      "epoch": 1.1057324090257459,
      "grad_norm": 1.532626748085022,
      "learning_rate": 7.83985556426797e-06,
      "loss": 0.0291,
      "step": 675660
    },
    {
      "epoch": 1.105765139464399,
      "grad_norm": 1.7170804738998413,
      "learning_rate": 7.839789672054454e-06,
      "loss": 0.0381,
      "step": 675680
    },
    {
      "epoch": 1.1057978699030524,
      "grad_norm": 1.774079442024231,
      "learning_rate": 7.839723779840936e-06,
      "loss": 0.0211,
      "step": 675700
    },
    {
      "epoch": 1.1058306003417058,
      "grad_norm": 3.193301200866699,
      "learning_rate": 7.83965788762742e-06,
      "loss": 0.0221,
      "step": 675720
    },
    {
      "epoch": 1.1058633307803591,
      "grad_norm": 1.4414798021316528,
      "learning_rate": 7.839591995413903e-06,
      "loss": 0.022,
      "step": 675740
    },
    {
      "epoch": 1.1058960612190125,
      "grad_norm": 1.054148554801941,
      "learning_rate": 7.839526103200385e-06,
      "loss": 0.0301,
      "step": 675760
    },
    {
      "epoch": 1.1059287916576659,
      "grad_norm": 0.34244316816329956,
      "learning_rate": 7.839460210986868e-06,
      "loss": 0.021,
      "step": 675780
    },
    {
      "epoch": 1.105961522096319,
      "grad_norm": 1.2828459739685059,
      "learning_rate": 7.839394318773352e-06,
      "loss": 0.0236,
      "step": 675800
    },
    {
      "epoch": 1.1059942525349724,
      "grad_norm": 0.522968590259552,
      "learning_rate": 7.839328426559834e-06,
      "loss": 0.0293,
      "step": 675820
    },
    {
      "epoch": 1.1060269829736258,
      "grad_norm": 0.9083936810493469,
      "learning_rate": 7.839262534346317e-06,
      "loss": 0.0263,
      "step": 675840
    },
    {
      "epoch": 1.1060597134122792,
      "grad_norm": 0.3468899726867676,
      "learning_rate": 7.839196642132801e-06,
      "loss": 0.021,
      "step": 675860
    },
    {
      "epoch": 1.1060924438509325,
      "grad_norm": 0.6326341032981873,
      "learning_rate": 7.839130749919283e-06,
      "loss": 0.0201,
      "step": 675880
    },
    {
      "epoch": 1.106125174289586,
      "grad_norm": 0.1707308143377304,
      "learning_rate": 7.839064857705766e-06,
      "loss": 0.0358,
      "step": 675900
    },
    {
      "epoch": 1.1061579047282393,
      "grad_norm": 0.5282720923423767,
      "learning_rate": 7.838998965492248e-06,
      "loss": 0.022,
      "step": 675920
    },
    {
      "epoch": 1.1061906351668924,
      "grad_norm": 0.522348165512085,
      "learning_rate": 7.838933073278732e-06,
      "loss": 0.0241,
      "step": 675940
    },
    {
      "epoch": 1.1062233656055458,
      "grad_norm": 1.5415964126586914,
      "learning_rate": 7.838867181065214e-06,
      "loss": 0.0276,
      "step": 675960
    },
    {
      "epoch": 1.1062560960441992,
      "grad_norm": 0.5461105108261108,
      "learning_rate": 7.838801288851697e-06,
      "loss": 0.0222,
      "step": 675980
    },
    {
      "epoch": 1.1062888264828525,
      "grad_norm": 1.6127749681472778,
      "learning_rate": 7.83873539663818e-06,
      "loss": 0.029,
      "step": 676000
    },
    {
      "epoch": 1.106321556921506,
      "grad_norm": 0.28291672468185425,
      "learning_rate": 7.838669504424663e-06,
      "loss": 0.0279,
      "step": 676020
    },
    {
      "epoch": 1.1063542873601593,
      "grad_norm": 0.8464787602424622,
      "learning_rate": 7.838603612211145e-06,
      "loss": 0.0301,
      "step": 676040
    },
    {
      "epoch": 1.1063870177988124,
      "grad_norm": 0.5944467782974243,
      "learning_rate": 7.838537719997628e-06,
      "loss": 0.0243,
      "step": 676060
    },
    {
      "epoch": 1.1064197482374658,
      "grad_norm": 0.9216604232788086,
      "learning_rate": 7.83847182778411e-06,
      "loss": 0.0197,
      "step": 676080
    },
    {
      "epoch": 1.1064524786761192,
      "grad_norm": 0.23197448253631592,
      "learning_rate": 7.838405935570594e-06,
      "loss": 0.0206,
      "step": 676100
    },
    {
      "epoch": 1.1064852091147725,
      "grad_norm": 0.7775657176971436,
      "learning_rate": 7.838340043357077e-06,
      "loss": 0.0201,
      "step": 676120
    },
    {
      "epoch": 1.106517939553426,
      "grad_norm": 1.4760212898254395,
      "learning_rate": 7.83827415114356e-06,
      "loss": 0.0231,
      "step": 676140
    },
    {
      "epoch": 1.1065506699920793,
      "grad_norm": 0.9542734026908875,
      "learning_rate": 7.838208258930043e-06,
      "loss": 0.0232,
      "step": 676160
    },
    {
      "epoch": 1.1065834004307327,
      "grad_norm": 0.26433324813842773,
      "learning_rate": 7.838142366716526e-06,
      "loss": 0.0216,
      "step": 676180
    },
    {
      "epoch": 1.1066161308693858,
      "grad_norm": 0.9181274175643921,
      "learning_rate": 7.838076474503008e-06,
      "loss": 0.0165,
      "step": 676200
    },
    {
      "epoch": 1.1066488613080392,
      "grad_norm": 0.2608713209629059,
      "learning_rate": 7.838010582289492e-06,
      "loss": 0.0246,
      "step": 676220
    },
    {
      "epoch": 1.1066815917466926,
      "grad_norm": 0.8890466094017029,
      "learning_rate": 7.837944690075976e-06,
      "loss": 0.0171,
      "step": 676240
    },
    {
      "epoch": 1.106714322185346,
      "grad_norm": 0.28806567192077637,
      "learning_rate": 7.837878797862457e-06,
      "loss": 0.0212,
      "step": 676260
    },
    {
      "epoch": 1.1067470526239993,
      "grad_norm": 0.47345224022865295,
      "learning_rate": 7.837812905648941e-06,
      "loss": 0.0265,
      "step": 676280
    },
    {
      "epoch": 1.1067797830626527,
      "grad_norm": 0.8250174522399902,
      "learning_rate": 7.837747013435423e-06,
      "loss": 0.0282,
      "step": 676300
    },
    {
      "epoch": 1.106812513501306,
      "grad_norm": 0.47290775179862976,
      "learning_rate": 7.837681121221906e-06,
      "loss": 0.0279,
      "step": 676320
    },
    {
      "epoch": 1.1068452439399592,
      "grad_norm": 0.5034715533256531,
      "learning_rate": 7.837615229008388e-06,
      "loss": 0.0203,
      "step": 676340
    },
    {
      "epoch": 1.1068779743786126,
      "grad_norm": 0.2686207592487335,
      "learning_rate": 7.837549336794872e-06,
      "loss": 0.0256,
      "step": 676360
    },
    {
      "epoch": 1.106910704817266,
      "grad_norm": 0.45601361989974976,
      "learning_rate": 7.837483444581354e-06,
      "loss": 0.015,
      "step": 676380
    },
    {
      "epoch": 1.1069434352559193,
      "grad_norm": 0.35403281450271606,
      "learning_rate": 7.837417552367837e-06,
      "loss": 0.0238,
      "step": 676400
    },
    {
      "epoch": 1.1069761656945727,
      "grad_norm": 0.17376099526882172,
      "learning_rate": 7.83735166015432e-06,
      "loss": 0.0199,
      "step": 676420
    },
    {
      "epoch": 1.107008896133226,
      "grad_norm": 0.7952184081077576,
      "learning_rate": 7.837285767940803e-06,
      "loss": 0.0314,
      "step": 676440
    },
    {
      "epoch": 1.1070416265718794,
      "grad_norm": 0.1861702799797058,
      "learning_rate": 7.837219875727285e-06,
      "loss": 0.0285,
      "step": 676460
    },
    {
      "epoch": 1.1070743570105326,
      "grad_norm": 0.4241127371788025,
      "learning_rate": 7.837153983513768e-06,
      "loss": 0.0171,
      "step": 676480
    },
    {
      "epoch": 1.107107087449186,
      "grad_norm": 0.42310044169425964,
      "learning_rate": 7.837088091300252e-06,
      "loss": 0.0212,
      "step": 676500
    },
    {
      "epoch": 1.1071398178878393,
      "grad_norm": 0.299501895904541,
      "learning_rate": 7.837022199086734e-06,
      "loss": 0.0154,
      "step": 676520
    },
    {
      "epoch": 1.1071725483264927,
      "grad_norm": 0.17716282606124878,
      "learning_rate": 7.836956306873217e-06,
      "loss": 0.0186,
      "step": 676540
    },
    {
      "epoch": 1.107205278765146,
      "grad_norm": 0.4519351124763489,
      "learning_rate": 7.836890414659701e-06,
      "loss": 0.0176,
      "step": 676560
    },
    {
      "epoch": 1.1072380092037994,
      "grad_norm": 0.23950842022895813,
      "learning_rate": 7.836824522446183e-06,
      "loss": 0.0168,
      "step": 676580
    },
    {
      "epoch": 1.1072707396424526,
      "grad_norm": 0.49341729283332825,
      "learning_rate": 7.836758630232667e-06,
      "loss": 0.016,
      "step": 676600
    },
    {
      "epoch": 1.107303470081106,
      "grad_norm": 0.3768417537212372,
      "learning_rate": 7.83669273801915e-06,
      "loss": 0.0217,
      "step": 676620
    },
    {
      "epoch": 1.1073362005197593,
      "grad_norm": 0.5245391130447388,
      "learning_rate": 7.836626845805632e-06,
      "loss": 0.0187,
      "step": 676640
    },
    {
      "epoch": 1.1073689309584127,
      "grad_norm": 0.7452609539031982,
      "learning_rate": 7.836560953592116e-06,
      "loss": 0.0263,
      "step": 676660
    },
    {
      "epoch": 1.107401661397066,
      "grad_norm": 0.7297750115394592,
      "learning_rate": 7.836495061378598e-06,
      "loss": 0.0249,
      "step": 676680
    },
    {
      "epoch": 1.1074343918357195,
      "grad_norm": 0.3205423355102539,
      "learning_rate": 7.836429169165081e-06,
      "loss": 0.0242,
      "step": 676700
    },
    {
      "epoch": 1.1074671222743726,
      "grad_norm": 1.3969299793243408,
      "learning_rate": 7.836363276951563e-06,
      "loss": 0.0224,
      "step": 676720
    },
    {
      "epoch": 1.107499852713026,
      "grad_norm": 1.659847617149353,
      "learning_rate": 7.836297384738047e-06,
      "loss": 0.0338,
      "step": 676740
    },
    {
      "epoch": 1.1075325831516794,
      "grad_norm": 0.37696459889411926,
      "learning_rate": 7.836231492524528e-06,
      "loss": 0.0148,
      "step": 676760
    },
    {
      "epoch": 1.1075653135903327,
      "grad_norm": 0.8175234794616699,
      "learning_rate": 7.836165600311012e-06,
      "loss": 0.0156,
      "step": 676780
    },
    {
      "epoch": 1.107598044028986,
      "grad_norm": 1.6806906461715698,
      "learning_rate": 7.836099708097494e-06,
      "loss": 0.0257,
      "step": 676800
    },
    {
      "epoch": 1.1076307744676395,
      "grad_norm": 2.027956008911133,
      "learning_rate": 7.836033815883978e-06,
      "loss": 0.0421,
      "step": 676820
    },
    {
      "epoch": 1.1076635049062928,
      "grad_norm": 0.7455496788024902,
      "learning_rate": 7.83596792367046e-06,
      "loss": 0.0354,
      "step": 676840
    },
    {
      "epoch": 1.107696235344946,
      "grad_norm": 1.1298811435699463,
      "learning_rate": 7.835902031456943e-06,
      "loss": 0.0267,
      "step": 676860
    },
    {
      "epoch": 1.1077289657835994,
      "grad_norm": 0.6548751592636108,
      "learning_rate": 7.835836139243427e-06,
      "loss": 0.0295,
      "step": 676880
    },
    {
      "epoch": 1.1077616962222527,
      "grad_norm": 0.41836264729499817,
      "learning_rate": 7.835770247029908e-06,
      "loss": 0.0283,
      "step": 676900
    },
    {
      "epoch": 1.107794426660906,
      "grad_norm": 0.6129733324050903,
      "learning_rate": 7.835704354816392e-06,
      "loss": 0.0296,
      "step": 676920
    },
    {
      "epoch": 1.1078271570995595,
      "grad_norm": 0.39161956310272217,
      "learning_rate": 7.835638462602874e-06,
      "loss": 0.0189,
      "step": 676940
    },
    {
      "epoch": 1.1078598875382129,
      "grad_norm": 0.681270182132721,
      "learning_rate": 7.835572570389358e-06,
      "loss": 0.0154,
      "step": 676960
    },
    {
      "epoch": 1.1078926179768662,
      "grad_norm": 0.8450703620910645,
      "learning_rate": 7.835506678175841e-06,
      "loss": 0.0298,
      "step": 676980
    },
    {
      "epoch": 1.1079253484155194,
      "grad_norm": 0.5250294804573059,
      "learning_rate": 7.835440785962323e-06,
      "loss": 0.0221,
      "step": 677000
    },
    {
      "epoch": 1.1079580788541727,
      "grad_norm": 0.5820662379264832,
      "learning_rate": 7.835374893748807e-06,
      "loss": 0.0206,
      "step": 677020
    },
    {
      "epoch": 1.1079908092928261,
      "grad_norm": 1.0112744569778442,
      "learning_rate": 7.83530900153529e-06,
      "loss": 0.0197,
      "step": 677040
    },
    {
      "epoch": 1.1080235397314795,
      "grad_norm": 1.0650811195373535,
      "learning_rate": 7.835243109321772e-06,
      "loss": 0.0275,
      "step": 677060
    },
    {
      "epoch": 1.1080562701701329,
      "grad_norm": 1.1831653118133545,
      "learning_rate": 7.835177217108256e-06,
      "loss": 0.0206,
      "step": 677080
    },
    {
      "epoch": 1.1080890006087862,
      "grad_norm": 0.7766791582107544,
      "learning_rate": 7.835111324894738e-06,
      "loss": 0.031,
      "step": 677100
    },
    {
      "epoch": 1.1081217310474396,
      "grad_norm": 0.9418577551841736,
      "learning_rate": 7.835045432681221e-06,
      "loss": 0.0284,
      "step": 677120
    },
    {
      "epoch": 1.1081544614860928,
      "grad_norm": 0.822429358959198,
      "learning_rate": 7.834979540467703e-06,
      "loss": 0.0226,
      "step": 677140
    },
    {
      "epoch": 1.1081871919247461,
      "grad_norm": 0.4726558029651642,
      "learning_rate": 7.834913648254187e-06,
      "loss": 0.0208,
      "step": 677160
    },
    {
      "epoch": 1.1082199223633995,
      "grad_norm": 0.7438315749168396,
      "learning_rate": 7.834847756040669e-06,
      "loss": 0.021,
      "step": 677180
    },
    {
      "epoch": 1.1082526528020529,
      "grad_norm": 1.5250637531280518,
      "learning_rate": 7.834781863827152e-06,
      "loss": 0.0228,
      "step": 677200
    },
    {
      "epoch": 1.1082853832407062,
      "grad_norm": 0.1869610697031021,
      "learning_rate": 7.834715971613634e-06,
      "loss": 0.0244,
      "step": 677220
    },
    {
      "epoch": 1.1083181136793596,
      "grad_norm": 0.28707411885261536,
      "learning_rate": 7.834650079400118e-06,
      "loss": 0.0229,
      "step": 677240
    },
    {
      "epoch": 1.1083508441180128,
      "grad_norm": 0.5937755107879639,
      "learning_rate": 7.834584187186601e-06,
      "loss": 0.0275,
      "step": 677260
    },
    {
      "epoch": 1.1083835745566661,
      "grad_norm": 0.172256201505661,
      "learning_rate": 7.834518294973083e-06,
      "loss": 0.0239,
      "step": 677280
    },
    {
      "epoch": 1.1084163049953195,
      "grad_norm": 0.45266300439834595,
      "learning_rate": 7.834452402759567e-06,
      "loss": 0.0279,
      "step": 677300
    },
    {
      "epoch": 1.1084490354339729,
      "grad_norm": 1.0078325271606445,
      "learning_rate": 7.834386510546049e-06,
      "loss": 0.03,
      "step": 677320
    },
    {
      "epoch": 1.1084817658726263,
      "grad_norm": 1.3240255117416382,
      "learning_rate": 7.834320618332532e-06,
      "loss": 0.0328,
      "step": 677340
    },
    {
      "epoch": 1.1085144963112796,
      "grad_norm": 0.8609910011291504,
      "learning_rate": 7.834254726119016e-06,
      "loss": 0.0296,
      "step": 677360
    },
    {
      "epoch": 1.108547226749933,
      "grad_norm": 0.6673175692558289,
      "learning_rate": 7.834188833905498e-06,
      "loss": 0.0186,
      "step": 677380
    },
    {
      "epoch": 1.1085799571885862,
      "grad_norm": 0.5675081014633179,
      "learning_rate": 7.834122941691981e-06,
      "loss": 0.0324,
      "step": 677400
    },
    {
      "epoch": 1.1086126876272395,
      "grad_norm": 0.5824994444847107,
      "learning_rate": 7.834057049478465e-06,
      "loss": 0.0382,
      "step": 677420
    },
    {
      "epoch": 1.108645418065893,
      "grad_norm": 0.327985942363739,
      "learning_rate": 7.833991157264947e-06,
      "loss": 0.0228,
      "step": 677440
    },
    {
      "epoch": 1.1086781485045463,
      "grad_norm": 0.39305374026298523,
      "learning_rate": 7.83392526505143e-06,
      "loss": 0.0178,
      "step": 677460
    },
    {
      "epoch": 1.1087108789431996,
      "grad_norm": 1.6773884296417236,
      "learning_rate": 7.833859372837912e-06,
      "loss": 0.0172,
      "step": 677480
    },
    {
      "epoch": 1.108743609381853,
      "grad_norm": 1.1330796480178833,
      "learning_rate": 7.833793480624396e-06,
      "loss": 0.0278,
      "step": 677500
    },
    {
      "epoch": 1.1087763398205062,
      "grad_norm": 1.078251600265503,
      "learning_rate": 7.833727588410878e-06,
      "loss": 0.026,
      "step": 677520
    },
    {
      "epoch": 1.1088090702591595,
      "grad_norm": 0.5402584075927734,
      "learning_rate": 7.833661696197361e-06,
      "loss": 0.0271,
      "step": 677540
    },
    {
      "epoch": 1.108841800697813,
      "grad_norm": 2.0644068717956543,
      "learning_rate": 7.833595803983843e-06,
      "loss": 0.022,
      "step": 677560
    },
    {
      "epoch": 1.1088745311364663,
      "grad_norm": 0.6217226982116699,
      "learning_rate": 7.833529911770327e-06,
      "loss": 0.0271,
      "step": 677580
    },
    {
      "epoch": 1.1089072615751197,
      "grad_norm": 1.135208010673523,
      "learning_rate": 7.83346401955681e-06,
      "loss": 0.0291,
      "step": 677600
    },
    {
      "epoch": 1.108939992013773,
      "grad_norm": 0.3242829442024231,
      "learning_rate": 7.833398127343292e-06,
      "loss": 0.0289,
      "step": 677620
    },
    {
      "epoch": 1.1089727224524264,
      "grad_norm": 0.58479905128479,
      "learning_rate": 7.833332235129776e-06,
      "loss": 0.0309,
      "step": 677640
    },
    {
      "epoch": 1.1090054528910795,
      "grad_norm": 1.7276625633239746,
      "learning_rate": 7.833266342916258e-06,
      "loss": 0.0313,
      "step": 677660
    },
    {
      "epoch": 1.109038183329733,
      "grad_norm": 0.6657914519309998,
      "learning_rate": 7.833200450702741e-06,
      "loss": 0.0248,
      "step": 677680
    },
    {
      "epoch": 1.1090709137683863,
      "grad_norm": 0.6858900189399719,
      "learning_rate": 7.833134558489223e-06,
      "loss": 0.0222,
      "step": 677700
    },
    {
      "epoch": 1.1091036442070397,
      "grad_norm": 1.3855416774749756,
      "learning_rate": 7.833068666275707e-06,
      "loss": 0.0366,
      "step": 677720
    },
    {
      "epoch": 1.109136374645693,
      "grad_norm": 0.34467625617980957,
      "learning_rate": 7.833002774062189e-06,
      "loss": 0.0269,
      "step": 677740
    },
    {
      "epoch": 1.1091691050843464,
      "grad_norm": 1.087921142578125,
      "learning_rate": 7.832936881848672e-06,
      "loss": 0.026,
      "step": 677760
    },
    {
      "epoch": 1.1092018355229998,
      "grad_norm": 1.6107439994812012,
      "learning_rate": 7.832870989635156e-06,
      "loss": 0.0209,
      "step": 677780
    },
    {
      "epoch": 1.109234565961653,
      "grad_norm": 0.6938638687133789,
      "learning_rate": 7.832805097421638e-06,
      "loss": 0.0277,
      "step": 677800
    },
    {
      "epoch": 1.1092672964003063,
      "grad_norm": 0.816869854927063,
      "learning_rate": 7.832739205208121e-06,
      "loss": 0.0281,
      "step": 677820
    },
    {
      "epoch": 1.1093000268389597,
      "grad_norm": 0.37166401743888855,
      "learning_rate": 7.832673312994605e-06,
      "loss": 0.027,
      "step": 677840
    },
    {
      "epoch": 1.109332757277613,
      "grad_norm": 0.3622754216194153,
      "learning_rate": 7.832607420781087e-06,
      "loss": 0.0186,
      "step": 677860
    },
    {
      "epoch": 1.1093654877162664,
      "grad_norm": 0.33983588218688965,
      "learning_rate": 7.83254152856757e-06,
      "loss": 0.0213,
      "step": 677880
    },
    {
      "epoch": 1.1093982181549198,
      "grad_norm": 0.44687286019325256,
      "learning_rate": 7.832475636354052e-06,
      "loss": 0.0241,
      "step": 677900
    },
    {
      "epoch": 1.1094309485935732,
      "grad_norm": 0.6054878234863281,
      "learning_rate": 7.832409744140536e-06,
      "loss": 0.0199,
      "step": 677920
    },
    {
      "epoch": 1.1094636790322263,
      "grad_norm": 0.3400178551673889,
      "learning_rate": 7.83234385192702e-06,
      "loss": 0.0178,
      "step": 677940
    },
    {
      "epoch": 1.1094964094708797,
      "grad_norm": 0.405246376991272,
      "learning_rate": 7.832277959713501e-06,
      "loss": 0.0267,
      "step": 677960
    },
    {
      "epoch": 1.109529139909533,
      "grad_norm": 1.0965389013290405,
      "learning_rate": 7.832212067499985e-06,
      "loss": 0.019,
      "step": 677980
    },
    {
      "epoch": 1.1095618703481864,
      "grad_norm": 1.1116595268249512,
      "learning_rate": 7.832146175286467e-06,
      "loss": 0.0335,
      "step": 678000
    },
    {
      "epoch": 1.1095946007868398,
      "grad_norm": 0.16469214856624603,
      "learning_rate": 7.83208028307295e-06,
      "loss": 0.0158,
      "step": 678020
    },
    {
      "epoch": 1.1096273312254932,
      "grad_norm": 0.39862528443336487,
      "learning_rate": 7.832014390859432e-06,
      "loss": 0.0291,
      "step": 678040
    },
    {
      "epoch": 1.1096600616641463,
      "grad_norm": 1.028518557548523,
      "learning_rate": 7.831948498645916e-06,
      "loss": 0.0216,
      "step": 678060
    },
    {
      "epoch": 1.1096927921027997,
      "grad_norm": 1.905548334121704,
      "learning_rate": 7.831882606432398e-06,
      "loss": 0.0358,
      "step": 678080
    },
    {
      "epoch": 1.109725522541453,
      "grad_norm": 1.4502496719360352,
      "learning_rate": 7.831816714218881e-06,
      "loss": 0.0333,
      "step": 678100
    },
    {
      "epoch": 1.1097582529801064,
      "grad_norm": 0.11468484252691269,
      "learning_rate": 7.831750822005363e-06,
      "loss": 0.02,
      "step": 678120
    },
    {
      "epoch": 1.1097909834187598,
      "grad_norm": 0.8220130801200867,
      "learning_rate": 7.831684929791847e-06,
      "loss": 0.0221,
      "step": 678140
    },
    {
      "epoch": 1.1098237138574132,
      "grad_norm": 0.31963416934013367,
      "learning_rate": 7.83161903757833e-06,
      "loss": 0.0259,
      "step": 678160
    },
    {
      "epoch": 1.1098564442960666,
      "grad_norm": 0.5478314161300659,
      "learning_rate": 7.831553145364812e-06,
      "loss": 0.0384,
      "step": 678180
    },
    {
      "epoch": 1.1098891747347197,
      "grad_norm": 0.7794979214668274,
      "learning_rate": 7.831487253151296e-06,
      "loss": 0.0325,
      "step": 678200
    },
    {
      "epoch": 1.109921905173373,
      "grad_norm": 0.12439355254173279,
      "learning_rate": 7.83142136093778e-06,
      "loss": 0.0224,
      "step": 678220
    },
    {
      "epoch": 1.1099546356120265,
      "grad_norm": 0.4283047616481781,
      "learning_rate": 7.831355468724261e-06,
      "loss": 0.0181,
      "step": 678240
    },
    {
      "epoch": 1.1099873660506798,
      "grad_norm": 0.9443725347518921,
      "learning_rate": 7.831289576510745e-06,
      "loss": 0.0317,
      "step": 678260
    },
    {
      "epoch": 1.1100200964893332,
      "grad_norm": 0.351642370223999,
      "learning_rate": 7.831223684297229e-06,
      "loss": 0.0272,
      "step": 678280
    },
    {
      "epoch": 1.1100528269279866,
      "grad_norm": 1.3659987449645996,
      "learning_rate": 7.83115779208371e-06,
      "loss": 0.0192,
      "step": 678300
    },
    {
      "epoch": 1.1100855573666397,
      "grad_norm": 1.106689214706421,
      "learning_rate": 7.831091899870194e-06,
      "loss": 0.021,
      "step": 678320
    },
    {
      "epoch": 1.110118287805293,
      "grad_norm": 3.134598731994629,
      "learning_rate": 7.831026007656676e-06,
      "loss": 0.0179,
      "step": 678340
    },
    {
      "epoch": 1.1101510182439465,
      "grad_norm": 0.41718795895576477,
      "learning_rate": 7.83096011544316e-06,
      "loss": 0.0206,
      "step": 678360
    },
    {
      "epoch": 1.1101837486825998,
      "grad_norm": 0.44386565685272217,
      "learning_rate": 7.830894223229641e-06,
      "loss": 0.0261,
      "step": 678380
    },
    {
      "epoch": 1.1102164791212532,
      "grad_norm": 0.2651355266571045,
      "learning_rate": 7.830828331016125e-06,
      "loss": 0.0252,
      "step": 678400
    },
    {
      "epoch": 1.1102492095599066,
      "grad_norm": 1.5299466848373413,
      "learning_rate": 7.830762438802607e-06,
      "loss": 0.022,
      "step": 678420
    },
    {
      "epoch": 1.11028193999856,
      "grad_norm": 0.326623797416687,
      "learning_rate": 7.83069654658909e-06,
      "loss": 0.0191,
      "step": 678440
    },
    {
      "epoch": 1.110314670437213,
      "grad_norm": 0.4667140543460846,
      "learning_rate": 7.830630654375572e-06,
      "loss": 0.0219,
      "step": 678460
    },
    {
      "epoch": 1.1103474008758665,
      "grad_norm": 2.941497564315796,
      "learning_rate": 7.830564762162056e-06,
      "loss": 0.0273,
      "step": 678480
    },
    {
      "epoch": 1.1103801313145198,
      "grad_norm": 1.009126901626587,
      "learning_rate": 7.830498869948538e-06,
      "loss": 0.0194,
      "step": 678500
    },
    {
      "epoch": 1.1104128617531732,
      "grad_norm": 0.5766136646270752,
      "learning_rate": 7.830432977735021e-06,
      "loss": 0.0265,
      "step": 678520
    },
    {
      "epoch": 1.1104455921918266,
      "grad_norm": 0.3999454975128174,
      "learning_rate": 7.830367085521503e-06,
      "loss": 0.0212,
      "step": 678540
    },
    {
      "epoch": 1.11047832263048,
      "grad_norm": 1.2240108251571655,
      "learning_rate": 7.830301193307987e-06,
      "loss": 0.0242,
      "step": 678560
    },
    {
      "epoch": 1.1105110530691333,
      "grad_norm": 1.0048483610153198,
      "learning_rate": 7.83023530109447e-06,
      "loss": 0.0234,
      "step": 678580
    },
    {
      "epoch": 1.1105437835077865,
      "grad_norm": 0.346370130777359,
      "learning_rate": 7.830169408880952e-06,
      "loss": 0.0195,
      "step": 678600
    },
    {
      "epoch": 1.1105765139464399,
      "grad_norm": 0.17935051023960114,
      "learning_rate": 7.830103516667436e-06,
      "loss": 0.0211,
      "step": 678620
    },
    {
      "epoch": 1.1106092443850932,
      "grad_norm": 0.782496452331543,
      "learning_rate": 7.83003762445392e-06,
      "loss": 0.0226,
      "step": 678640
    },
    {
      "epoch": 1.1106419748237466,
      "grad_norm": 0.18803603947162628,
      "learning_rate": 7.829971732240403e-06,
      "loss": 0.0221,
      "step": 678660
    },
    {
      "epoch": 1.1106747052624,
      "grad_norm": 0.5341567993164062,
      "learning_rate": 7.829905840026885e-06,
      "loss": 0.0263,
      "step": 678680
    },
    {
      "epoch": 1.1107074357010533,
      "grad_norm": 1.1244205236434937,
      "learning_rate": 7.829839947813369e-06,
      "loss": 0.0258,
      "step": 678700
    },
    {
      "epoch": 1.1107401661397067,
      "grad_norm": 0.3364003598690033,
      "learning_rate": 7.82977405559985e-06,
      "loss": 0.0263,
      "step": 678720
    },
    {
      "epoch": 1.1107728965783599,
      "grad_norm": 1.2185218334197998,
      "learning_rate": 7.829708163386334e-06,
      "loss": 0.029,
      "step": 678740
    },
    {
      "epoch": 1.1108056270170132,
      "grad_norm": 0.34738394618034363,
      "learning_rate": 7.829642271172816e-06,
      "loss": 0.0189,
      "step": 678760
    },
    {
      "epoch": 1.1108383574556666,
      "grad_norm": 1.4081714153289795,
      "learning_rate": 7.8295763789593e-06,
      "loss": 0.0349,
      "step": 678780
    },
    {
      "epoch": 1.11087108789432,
      "grad_norm": 0.24562202394008636,
      "learning_rate": 7.829510486745781e-06,
      "loss": 0.0244,
      "step": 678800
    },
    {
      "epoch": 1.1109038183329734,
      "grad_norm": 3.937742233276367,
      "learning_rate": 7.829444594532265e-06,
      "loss": 0.0286,
      "step": 678820
    },
    {
      "epoch": 1.1109365487716267,
      "grad_norm": 1.3174059391021729,
      "learning_rate": 7.829378702318747e-06,
      "loss": 0.0233,
      "step": 678840
    },
    {
      "epoch": 1.1109692792102799,
      "grad_norm": 1.4865511655807495,
      "learning_rate": 7.82931281010523e-06,
      "loss": 0.0254,
      "step": 678860
    },
    {
      "epoch": 1.1110020096489333,
      "grad_norm": 0.4483985900878906,
      "learning_rate": 7.829246917891712e-06,
      "loss": 0.0226,
      "step": 678880
    },
    {
      "epoch": 1.1110347400875866,
      "grad_norm": 0.2437288761138916,
      "learning_rate": 7.829181025678196e-06,
      "loss": 0.0204,
      "step": 678900
    },
    {
      "epoch": 1.11106747052624,
      "grad_norm": 0.3371520936489105,
      "learning_rate": 7.829115133464678e-06,
      "loss": 0.025,
      "step": 678920
    },
    {
      "epoch": 1.1111002009648934,
      "grad_norm": 0.822652280330658,
      "learning_rate": 7.829049241251161e-06,
      "loss": 0.0338,
      "step": 678940
    },
    {
      "epoch": 1.1111329314035467,
      "grad_norm": 0.31584998965263367,
      "learning_rate": 7.828983349037645e-06,
      "loss": 0.0136,
      "step": 678960
    },
    {
      "epoch": 1.1111656618422,
      "grad_norm": 1.0124449729919434,
      "learning_rate": 7.828917456824127e-06,
      "loss": 0.026,
      "step": 678980
    },
    {
      "epoch": 1.1111983922808533,
      "grad_norm": 1.259102463722229,
      "learning_rate": 7.82885156461061e-06,
      "loss": 0.0251,
      "step": 679000
    },
    {
      "epoch": 1.1112311227195066,
      "grad_norm": 1.5213290452957153,
      "learning_rate": 7.828785672397094e-06,
      "loss": 0.0293,
      "step": 679020
    },
    {
      "epoch": 1.11126385315816,
      "grad_norm": 0.20405088365077972,
      "learning_rate": 7.828719780183576e-06,
      "loss": 0.0215,
      "step": 679040
    },
    {
      "epoch": 1.1112965835968134,
      "grad_norm": 0.5989996790885925,
      "learning_rate": 7.82865388797006e-06,
      "loss": 0.0233,
      "step": 679060
    },
    {
      "epoch": 1.1113293140354668,
      "grad_norm": 0.7850049734115601,
      "learning_rate": 7.828587995756543e-06,
      "loss": 0.0182,
      "step": 679080
    },
    {
      "epoch": 1.1113620444741201,
      "grad_norm": 0.9433354735374451,
      "learning_rate": 7.828522103543025e-06,
      "loss": 0.0286,
      "step": 679100
    },
    {
      "epoch": 1.1113947749127733,
      "grad_norm": 0.7002344727516174,
      "learning_rate": 7.828456211329509e-06,
      "loss": 0.0182,
      "step": 679120
    },
    {
      "epoch": 1.1114275053514266,
      "grad_norm": 1.0683170557022095,
      "learning_rate": 7.82839031911599e-06,
      "loss": 0.0176,
      "step": 679140
    },
    {
      "epoch": 1.11146023579008,
      "grad_norm": 1.280234932899475,
      "learning_rate": 7.828324426902474e-06,
      "loss": 0.0339,
      "step": 679160
    },
    {
      "epoch": 1.1114929662287334,
      "grad_norm": 0.5722676515579224,
      "learning_rate": 7.828258534688956e-06,
      "loss": 0.0333,
      "step": 679180
    },
    {
      "epoch": 1.1115256966673868,
      "grad_norm": 0.5987957119941711,
      "learning_rate": 7.82819264247544e-06,
      "loss": 0.0232,
      "step": 679200
    },
    {
      "epoch": 1.1115584271060401,
      "grad_norm": 0.4515687823295593,
      "learning_rate": 7.828126750261922e-06,
      "loss": 0.0131,
      "step": 679220
    },
    {
      "epoch": 1.1115911575446935,
      "grad_norm": 0.8922997713088989,
      "learning_rate": 7.828060858048405e-06,
      "loss": 0.0315,
      "step": 679240
    },
    {
      "epoch": 1.1116238879833467,
      "grad_norm": 1.934185266494751,
      "learning_rate": 7.827994965834887e-06,
      "loss": 0.0286,
      "step": 679260
    },
    {
      "epoch": 1.111656618422,
      "grad_norm": 0.9168797731399536,
      "learning_rate": 7.82792907362137e-06,
      "loss": 0.034,
      "step": 679280
    },
    {
      "epoch": 1.1116893488606534,
      "grad_norm": 0.8686814308166504,
      "learning_rate": 7.827863181407853e-06,
      "loss": 0.0254,
      "step": 679300
    },
    {
      "epoch": 1.1117220792993068,
      "grad_norm": 0.8953896164894104,
      "learning_rate": 7.827797289194336e-06,
      "loss": 0.0274,
      "step": 679320
    },
    {
      "epoch": 1.1117548097379601,
      "grad_norm": 0.49985870718955994,
      "learning_rate": 7.82773139698082e-06,
      "loss": 0.0214,
      "step": 679340
    },
    {
      "epoch": 1.1117875401766135,
      "grad_norm": 0.29925021529197693,
      "learning_rate": 7.827665504767302e-06,
      "loss": 0.0177,
      "step": 679360
    },
    {
      "epoch": 1.111820270615267,
      "grad_norm": 0.449394166469574,
      "learning_rate": 7.827599612553785e-06,
      "loss": 0.0239,
      "step": 679380
    },
    {
      "epoch": 1.11185300105392,
      "grad_norm": 0.475161075592041,
      "learning_rate": 7.827533720340269e-06,
      "loss": 0.0293,
      "step": 679400
    },
    {
      "epoch": 1.1118857314925734,
      "grad_norm": 0.3739050328731537,
      "learning_rate": 7.82746782812675e-06,
      "loss": 0.0165,
      "step": 679420
    },
    {
      "epoch": 1.1119184619312268,
      "grad_norm": 0.5358794331550598,
      "learning_rate": 7.827401935913234e-06,
      "loss": 0.0215,
      "step": 679440
    },
    {
      "epoch": 1.1119511923698802,
      "grad_norm": 0.15295937657356262,
      "learning_rate": 7.827336043699718e-06,
      "loss": 0.0284,
      "step": 679460
    },
    {
      "epoch": 1.1119839228085335,
      "grad_norm": 0.7149851322174072,
      "learning_rate": 7.8272701514862e-06,
      "loss": 0.0183,
      "step": 679480
    },
    {
      "epoch": 1.112016653247187,
      "grad_norm": 0.7997203469276428,
      "learning_rate": 7.827204259272683e-06,
      "loss": 0.0173,
      "step": 679500
    },
    {
      "epoch": 1.11204938368584,
      "grad_norm": 0.5562313795089722,
      "learning_rate": 7.827138367059165e-06,
      "loss": 0.0192,
      "step": 679520
    },
    {
      "epoch": 1.1120821141244934,
      "grad_norm": 0.4760251045227051,
      "learning_rate": 7.827072474845649e-06,
      "loss": 0.02,
      "step": 679540
    },
    {
      "epoch": 1.1121148445631468,
      "grad_norm": 0.23397491872310638,
      "learning_rate": 7.82700658263213e-06,
      "loss": 0.0286,
      "step": 679560
    },
    {
      "epoch": 1.1121475750018002,
      "grad_norm": 0.7810624241828918,
      "learning_rate": 7.826940690418614e-06,
      "loss": 0.0184,
      "step": 679580
    },
    {
      "epoch": 1.1121803054404535,
      "grad_norm": 0.8877107501029968,
      "learning_rate": 7.826874798205096e-06,
      "loss": 0.0399,
      "step": 679600
    },
    {
      "epoch": 1.112213035879107,
      "grad_norm": 2.197700262069702,
      "learning_rate": 7.82680890599158e-06,
      "loss": 0.0226,
      "step": 679620
    },
    {
      "epoch": 1.1122457663177603,
      "grad_norm": 0.3732422888278961,
      "learning_rate": 7.826743013778062e-06,
      "loss": 0.028,
      "step": 679640
    },
    {
      "epoch": 1.1122784967564134,
      "grad_norm": 0.31651318073272705,
      "learning_rate": 7.826677121564545e-06,
      "loss": 0.0262,
      "step": 679660
    },
    {
      "epoch": 1.1123112271950668,
      "grad_norm": 0.5028822422027588,
      "learning_rate": 7.826611229351027e-06,
      "loss": 0.026,
      "step": 679680
    },
    {
      "epoch": 1.1123439576337202,
      "grad_norm": 0.7441503405570984,
      "learning_rate": 7.82654533713751e-06,
      "loss": 0.0225,
      "step": 679700
    },
    {
      "epoch": 1.1123766880723736,
      "grad_norm": 1.0712913274765015,
      "learning_rate": 7.826479444923994e-06,
      "loss": 0.0295,
      "step": 679720
    },
    {
      "epoch": 1.112409418511027,
      "grad_norm": 0.4080624580383301,
      "learning_rate": 7.826413552710476e-06,
      "loss": 0.021,
      "step": 679740
    },
    {
      "epoch": 1.1124421489496803,
      "grad_norm": 0.43998414278030396,
      "learning_rate": 7.82634766049696e-06,
      "loss": 0.0224,
      "step": 679760
    },
    {
      "epoch": 1.1124748793883334,
      "grad_norm": 0.2109437733888626,
      "learning_rate": 7.826281768283442e-06,
      "loss": 0.0274,
      "step": 679780
    },
    {
      "epoch": 1.1125076098269868,
      "grad_norm": 0.920212984085083,
      "learning_rate": 7.826215876069925e-06,
      "loss": 0.0239,
      "step": 679800
    },
    {
      "epoch": 1.1125403402656402,
      "grad_norm": 2.1694464683532715,
      "learning_rate": 7.826149983856409e-06,
      "loss": 0.0256,
      "step": 679820
    },
    {
      "epoch": 1.1125730707042936,
      "grad_norm": 1.8356854915618896,
      "learning_rate": 7.82608409164289e-06,
      "loss": 0.0298,
      "step": 679840
    },
    {
      "epoch": 1.112605801142947,
      "grad_norm": 0.31828200817108154,
      "learning_rate": 7.826018199429374e-06,
      "loss": 0.0325,
      "step": 679860
    },
    {
      "epoch": 1.1126385315816003,
      "grad_norm": 2.802205801010132,
      "learning_rate": 7.825952307215858e-06,
      "loss": 0.0234,
      "step": 679880
    },
    {
      "epoch": 1.1126712620202537,
      "grad_norm": 1.0406306982040405,
      "learning_rate": 7.82588641500234e-06,
      "loss": 0.0253,
      "step": 679900
    },
    {
      "epoch": 1.1127039924589068,
      "grad_norm": 0.5786811113357544,
      "learning_rate": 7.825820522788823e-06,
      "loss": 0.0323,
      "step": 679920
    },
    {
      "epoch": 1.1127367228975602,
      "grad_norm": 0.5295073986053467,
      "learning_rate": 7.825754630575305e-06,
      "loss": 0.0264,
      "step": 679940
    },
    {
      "epoch": 1.1127694533362136,
      "grad_norm": 2.952324867248535,
      "learning_rate": 7.825688738361789e-06,
      "loss": 0.0256,
      "step": 679960
    },
    {
      "epoch": 1.112802183774867,
      "grad_norm": 0.5193170309066772,
      "learning_rate": 7.82562284614827e-06,
      "loss": 0.0321,
      "step": 679980
    },
    {
      "epoch": 1.1128349142135203,
      "grad_norm": 1.0447945594787598,
      "learning_rate": 7.825556953934754e-06,
      "loss": 0.0226,
      "step": 680000
    },
    {
      "epoch": 1.1128676446521737,
      "grad_norm": 0.1832449585199356,
      "learning_rate": 7.825491061721236e-06,
      "loss": 0.0195,
      "step": 680020
    },
    {
      "epoch": 1.112900375090827,
      "grad_norm": 0.12966766953468323,
      "learning_rate": 7.82542516950772e-06,
      "loss": 0.0249,
      "step": 680040
    },
    {
      "epoch": 1.1129331055294802,
      "grad_norm": 0.6677041053771973,
      "learning_rate": 7.825359277294203e-06,
      "loss": 0.0194,
      "step": 680060
    },
    {
      "epoch": 1.1129658359681336,
      "grad_norm": 0.7454090118408203,
      "learning_rate": 7.825293385080685e-06,
      "loss": 0.0212,
      "step": 680080
    },
    {
      "epoch": 1.112998566406787,
      "grad_norm": 0.4799041748046875,
      "learning_rate": 7.825227492867169e-06,
      "loss": 0.0227,
      "step": 680100
    },
    {
      "epoch": 1.1130312968454403,
      "grad_norm": 1.7214303016662598,
      "learning_rate": 7.82516160065365e-06,
      "loss": 0.0275,
      "step": 680120
    },
    {
      "epoch": 1.1130640272840937,
      "grad_norm": 1.9943633079528809,
      "learning_rate": 7.825095708440134e-06,
      "loss": 0.0159,
      "step": 680140
    },
    {
      "epoch": 1.113096757722747,
      "grad_norm": 0.27583497762680054,
      "learning_rate": 7.825029816226616e-06,
      "loss": 0.0228,
      "step": 680160
    },
    {
      "epoch": 1.1131294881614004,
      "grad_norm": 2.550893783569336,
      "learning_rate": 7.8249639240131e-06,
      "loss": 0.0187,
      "step": 680180
    },
    {
      "epoch": 1.1131622186000536,
      "grad_norm": 0.7116872072219849,
      "learning_rate": 7.824898031799583e-06,
      "loss": 0.0273,
      "step": 680200
    },
    {
      "epoch": 1.113194949038707,
      "grad_norm": 0.4754025638103485,
      "learning_rate": 7.824832139586065e-06,
      "loss": 0.0272,
      "step": 680220
    },
    {
      "epoch": 1.1132276794773603,
      "grad_norm": 0.16295823454856873,
      "learning_rate": 7.824766247372549e-06,
      "loss": 0.0129,
      "step": 680240
    },
    {
      "epoch": 1.1132604099160137,
      "grad_norm": 0.31233155727386475,
      "learning_rate": 7.824700355159032e-06,
      "loss": 0.0265,
      "step": 680260
    },
    {
      "epoch": 1.113293140354667,
      "grad_norm": 0.5685805082321167,
      "learning_rate": 7.824634462945514e-06,
      "loss": 0.0238,
      "step": 680280
    },
    {
      "epoch": 1.1133258707933205,
      "grad_norm": 0.12553241848945618,
      "learning_rate": 7.824568570731998e-06,
      "loss": 0.0212,
      "step": 680300
    },
    {
      "epoch": 1.1133586012319736,
      "grad_norm": 0.9484691023826599,
      "learning_rate": 7.82450267851848e-06,
      "loss": 0.0306,
      "step": 680320
    },
    {
      "epoch": 1.113391331670627,
      "grad_norm": 0.8552008271217346,
      "learning_rate": 7.824436786304963e-06,
      "loss": 0.0198,
      "step": 680340
    },
    {
      "epoch": 1.1134240621092804,
      "grad_norm": 0.14857301115989685,
      "learning_rate": 7.824370894091445e-06,
      "loss": 0.0233,
      "step": 680360
    },
    {
      "epoch": 1.1134567925479337,
      "grad_norm": 0.29317596554756165,
      "learning_rate": 7.824305001877929e-06,
      "loss": 0.0228,
      "step": 680380
    },
    {
      "epoch": 1.113489522986587,
      "grad_norm": 0.9845991730690002,
      "learning_rate": 7.824239109664413e-06,
      "loss": 0.0178,
      "step": 680400
    },
    {
      "epoch": 1.1135222534252405,
      "grad_norm": 0.5815459489822388,
      "learning_rate": 7.824173217450894e-06,
      "loss": 0.0248,
      "step": 680420
    },
    {
      "epoch": 1.1135549838638938,
      "grad_norm": 0.6396305561065674,
      "learning_rate": 7.824107325237378e-06,
      "loss": 0.0217,
      "step": 680440
    },
    {
      "epoch": 1.113587714302547,
      "grad_norm": 2.077338457107544,
      "learning_rate": 7.82404143302386e-06,
      "loss": 0.0183,
      "step": 680460
    },
    {
      "epoch": 1.1136204447412004,
      "grad_norm": 0.6313677430152893,
      "learning_rate": 7.823975540810343e-06,
      "loss": 0.0238,
      "step": 680480
    },
    {
      "epoch": 1.1136531751798537,
      "grad_norm": 1.0642801523208618,
      "learning_rate": 7.823909648596825e-06,
      "loss": 0.0348,
      "step": 680500
    },
    {
      "epoch": 1.113685905618507,
      "grad_norm": 0.31489792466163635,
      "learning_rate": 7.823843756383309e-06,
      "loss": 0.0212,
      "step": 680520
    },
    {
      "epoch": 1.1137186360571605,
      "grad_norm": 0.6888576149940491,
      "learning_rate": 7.82377786416979e-06,
      "loss": 0.0229,
      "step": 680540
    },
    {
      "epoch": 1.1137513664958139,
      "grad_norm": 0.3631685972213745,
      "learning_rate": 7.823711971956274e-06,
      "loss": 0.027,
      "step": 680560
    },
    {
      "epoch": 1.113784096934467,
      "grad_norm": 0.18135422468185425,
      "learning_rate": 7.823646079742756e-06,
      "loss": 0.0208,
      "step": 680580
    },
    {
      "epoch": 1.1138168273731204,
      "grad_norm": 0.5383993983268738,
      "learning_rate": 7.82358018752924e-06,
      "loss": 0.0274,
      "step": 680600
    },
    {
      "epoch": 1.1138495578117737,
      "grad_norm": 1.7274606227874756,
      "learning_rate": 7.823514295315723e-06,
      "loss": 0.0264,
      "step": 680620
    },
    {
      "epoch": 1.1138822882504271,
      "grad_norm": 0.5302239060401917,
      "learning_rate": 7.823448403102205e-06,
      "loss": 0.0192,
      "step": 680640
    },
    {
      "epoch": 1.1139150186890805,
      "grad_norm": 0.19292956590652466,
      "learning_rate": 7.823382510888689e-06,
      "loss": 0.027,
      "step": 680660
    },
    {
      "epoch": 1.1139477491277339,
      "grad_norm": 0.4586353898048401,
      "learning_rate": 7.823316618675173e-06,
      "loss": 0.0218,
      "step": 680680
    },
    {
      "epoch": 1.1139804795663872,
      "grad_norm": 0.10415150225162506,
      "learning_rate": 7.823250726461654e-06,
      "loss": 0.0249,
      "step": 680700
    },
    {
      "epoch": 1.1140132100050404,
      "grad_norm": 0.6873055696487427,
      "learning_rate": 7.823184834248138e-06,
      "loss": 0.02,
      "step": 680720
    },
    {
      "epoch": 1.1140459404436938,
      "grad_norm": 0.11195347458124161,
      "learning_rate": 7.82311894203462e-06,
      "loss": 0.024,
      "step": 680740
    },
    {
      "epoch": 1.1140786708823471,
      "grad_norm": 0.44540247321128845,
      "learning_rate": 7.823053049821104e-06,
      "loss": 0.0192,
      "step": 680760
    },
    {
      "epoch": 1.1141114013210005,
      "grad_norm": 0.6972781419754028,
      "learning_rate": 7.822987157607587e-06,
      "loss": 0.025,
      "step": 680780
    },
    {
      "epoch": 1.1141441317596539,
      "grad_norm": 0.35113659501075745,
      "learning_rate": 7.822921265394069e-06,
      "loss": 0.0239,
      "step": 680800
    },
    {
      "epoch": 1.1141768621983072,
      "grad_norm": 0.6316776871681213,
      "learning_rate": 7.822855373180553e-06,
      "loss": 0.0356,
      "step": 680820
    },
    {
      "epoch": 1.1142095926369606,
      "grad_norm": 0.6374561190605164,
      "learning_rate": 7.822789480967034e-06,
      "loss": 0.0286,
      "step": 680840
    },
    {
      "epoch": 1.1142423230756138,
      "grad_norm": 0.496287077665329,
      "learning_rate": 7.822723588753518e-06,
      "loss": 0.0189,
      "step": 680860
    },
    {
      "epoch": 1.1142750535142671,
      "grad_norm": 1.3283883333206177,
      "learning_rate": 7.82265769654e-06,
      "loss": 0.0206,
      "step": 680880
    },
    {
      "epoch": 1.1143077839529205,
      "grad_norm": 0.9037824869155884,
      "learning_rate": 7.822591804326484e-06,
      "loss": 0.0209,
      "step": 680900
    },
    {
      "epoch": 1.1143405143915739,
      "grad_norm": 1.6558442115783691,
      "learning_rate": 7.822525912112965e-06,
      "loss": 0.0286,
      "step": 680920
    },
    {
      "epoch": 1.1143732448302273,
      "grad_norm": 0.9729203581809998,
      "learning_rate": 7.822460019899449e-06,
      "loss": 0.0201,
      "step": 680940
    },
    {
      "epoch": 1.1144059752688806,
      "grad_norm": 0.26916825771331787,
      "learning_rate": 7.822394127685931e-06,
      "loss": 0.021,
      "step": 680960
    },
    {
      "epoch": 1.114438705707534,
      "grad_norm": 1.1789249181747437,
      "learning_rate": 7.822328235472415e-06,
      "loss": 0.0169,
      "step": 680980
    },
    {
      "epoch": 1.1144714361461872,
      "grad_norm": 2.3035728931427,
      "learning_rate": 7.822262343258898e-06,
      "loss": 0.0258,
      "step": 681000
    },
    {
      "epoch": 1.1145041665848405,
      "grad_norm": 3.0860161781311035,
      "learning_rate": 7.82219645104538e-06,
      "loss": 0.0228,
      "step": 681020
    },
    {
      "epoch": 1.114536897023494,
      "grad_norm": 0.4204166829586029,
      "learning_rate": 7.822130558831864e-06,
      "loss": 0.0259,
      "step": 681040
    },
    {
      "epoch": 1.1145696274621473,
      "grad_norm": 0.3983944058418274,
      "learning_rate": 7.822064666618347e-06,
      "loss": 0.0258,
      "step": 681060
    },
    {
      "epoch": 1.1146023579008006,
      "grad_norm": 0.15842120349407196,
      "learning_rate": 7.821998774404829e-06,
      "loss": 0.0316,
      "step": 681080
    },
    {
      "epoch": 1.114635088339454,
      "grad_norm": 0.2503872513771057,
      "learning_rate": 7.821932882191313e-06,
      "loss": 0.0184,
      "step": 681100
    },
    {
      "epoch": 1.1146678187781072,
      "grad_norm": 0.6074234843254089,
      "learning_rate": 7.821866989977796e-06,
      "loss": 0.0233,
      "step": 681120
    },
    {
      "epoch": 1.1147005492167605,
      "grad_norm": 1.0263919830322266,
      "learning_rate": 7.821801097764278e-06,
      "loss": 0.0205,
      "step": 681140
    },
    {
      "epoch": 1.114733279655414,
      "grad_norm": 0.4734939634799957,
      "learning_rate": 7.821735205550762e-06,
      "loss": 0.0205,
      "step": 681160
    },
    {
      "epoch": 1.1147660100940673,
      "grad_norm": 1.2454612255096436,
      "learning_rate": 7.821669313337244e-06,
      "loss": 0.0279,
      "step": 681180
    },
    {
      "epoch": 1.1147987405327207,
      "grad_norm": 0.4295322000980377,
      "learning_rate": 7.821603421123727e-06,
      "loss": 0.0369,
      "step": 681200
    },
    {
      "epoch": 1.114831470971374,
      "grad_norm": 0.9255752563476562,
      "learning_rate": 7.821537528910209e-06,
      "loss": 0.0317,
      "step": 681220
    },
    {
      "epoch": 1.1148642014100274,
      "grad_norm": 1.8906856775283813,
      "learning_rate": 7.821471636696693e-06,
      "loss": 0.0291,
      "step": 681240
    },
    {
      "epoch": 1.1148969318486806,
      "grad_norm": 0.21533989906311035,
      "learning_rate": 7.821405744483175e-06,
      "loss": 0.0276,
      "step": 681260
    },
    {
      "epoch": 1.114929662287334,
      "grad_norm": 0.3739969730377197,
      "learning_rate": 7.821339852269658e-06,
      "loss": 0.0249,
      "step": 681280
    },
    {
      "epoch": 1.1149623927259873,
      "grad_norm": 0.7131355404853821,
      "learning_rate": 7.82127396005614e-06,
      "loss": 0.0259,
      "step": 681300
    },
    {
      "epoch": 1.1149951231646407,
      "grad_norm": 0.823646068572998,
      "learning_rate": 7.821208067842624e-06,
      "loss": 0.0338,
      "step": 681320
    },
    {
      "epoch": 1.115027853603294,
      "grad_norm": 1.2595055103302002,
      "learning_rate": 7.821142175629106e-06,
      "loss": 0.0347,
      "step": 681340
    },
    {
      "epoch": 1.1150605840419474,
      "grad_norm": 0.12236269563436508,
      "learning_rate": 7.821076283415589e-06,
      "loss": 0.023,
      "step": 681360
    },
    {
      "epoch": 1.1150933144806006,
      "grad_norm": 1.3917574882507324,
      "learning_rate": 7.821010391202071e-06,
      "loss": 0.0259,
      "step": 681380
    },
    {
      "epoch": 1.115126044919254,
      "grad_norm": 2.788362741470337,
      "learning_rate": 7.820944498988555e-06,
      "loss": 0.0208,
      "step": 681400
    },
    {
      "epoch": 1.1151587753579073,
      "grad_norm": 0.4852258563041687,
      "learning_rate": 7.820878606775038e-06,
      "loss": 0.0234,
      "step": 681420
    },
    {
      "epoch": 1.1151915057965607,
      "grad_norm": 0.6628134846687317,
      "learning_rate": 7.820812714561522e-06,
      "loss": 0.026,
      "step": 681440
    },
    {
      "epoch": 1.115224236235214,
      "grad_norm": 0.21520811319351196,
      "learning_rate": 7.820746822348004e-06,
      "loss": 0.0322,
      "step": 681460
    },
    {
      "epoch": 1.1152569666738674,
      "grad_norm": 1.5326234102249146,
      "learning_rate": 7.820680930134487e-06,
      "loss": 0.0229,
      "step": 681480
    },
    {
      "epoch": 1.1152896971125208,
      "grad_norm": 0.8327189087867737,
      "learning_rate": 7.82061503792097e-06,
      "loss": 0.032,
      "step": 681500
    },
    {
      "epoch": 1.115322427551174,
      "grad_norm": 1.2705483436584473,
      "learning_rate": 7.820549145707453e-06,
      "loss": 0.0233,
      "step": 681520
    },
    {
      "epoch": 1.1153551579898273,
      "grad_norm": 0.6183027625083923,
      "learning_rate": 7.820483253493936e-06,
      "loss": 0.0201,
      "step": 681540
    },
    {
      "epoch": 1.1153878884284807,
      "grad_norm": 1.69913649559021,
      "learning_rate": 7.820417361280418e-06,
      "loss": 0.0326,
      "step": 681560
    },
    {
      "epoch": 1.115420618867134,
      "grad_norm": 0.6709386706352234,
      "learning_rate": 7.820351469066902e-06,
      "loss": 0.029,
      "step": 681580
    },
    {
      "epoch": 1.1154533493057874,
      "grad_norm": 0.1521570086479187,
      "learning_rate": 7.820285576853384e-06,
      "loss": 0.0208,
      "step": 681600
    },
    {
      "epoch": 1.1154860797444408,
      "grad_norm": 0.531806230545044,
      "learning_rate": 7.820219684639867e-06,
      "loss": 0.0219,
      "step": 681620
    },
    {
      "epoch": 1.1155188101830942,
      "grad_norm": 0.39448466897010803,
      "learning_rate": 7.820153792426349e-06,
      "loss": 0.0288,
      "step": 681640
    },
    {
      "epoch": 1.1155515406217473,
      "grad_norm": 5.37410306930542,
      "learning_rate": 7.820087900212833e-06,
      "loss": 0.0289,
      "step": 681660
    },
    {
      "epoch": 1.1155842710604007,
      "grad_norm": 0.659443736076355,
      "learning_rate": 7.820022007999315e-06,
      "loss": 0.0278,
      "step": 681680
    },
    {
      "epoch": 1.115617001499054,
      "grad_norm": 0.8444905281066895,
      "learning_rate": 7.819956115785798e-06,
      "loss": 0.018,
      "step": 681700
    },
    {
      "epoch": 1.1156497319377074,
      "grad_norm": 0.3656814694404602,
      "learning_rate": 7.81989022357228e-06,
      "loss": 0.0205,
      "step": 681720
    },
    {
      "epoch": 1.1156824623763608,
      "grad_norm": 0.36365777254104614,
      "learning_rate": 7.819824331358764e-06,
      "loss": 0.023,
      "step": 681740
    },
    {
      "epoch": 1.1157151928150142,
      "grad_norm": 0.3357868194580078,
      "learning_rate": 7.819758439145246e-06,
      "loss": 0.0363,
      "step": 681760
    },
    {
      "epoch": 1.1157479232536676,
      "grad_norm": 0.27895963191986084,
      "learning_rate": 7.81969254693173e-06,
      "loss": 0.02,
      "step": 681780
    },
    {
      "epoch": 1.1157806536923207,
      "grad_norm": 1.0740844011306763,
      "learning_rate": 7.819626654718213e-06,
      "loss": 0.034,
      "step": 681800
    },
    {
      "epoch": 1.115813384130974,
      "grad_norm": 0.5449360609054565,
      "learning_rate": 7.819560762504695e-06,
      "loss": 0.0196,
      "step": 681820
    },
    {
      "epoch": 1.1158461145696275,
      "grad_norm": 0.6931880712509155,
      "learning_rate": 7.819494870291178e-06,
      "loss": 0.0285,
      "step": 681840
    },
    {
      "epoch": 1.1158788450082808,
      "grad_norm": 0.3643409013748169,
      "learning_rate": 7.819428978077662e-06,
      "loss": 0.0194,
      "step": 681860
    },
    {
      "epoch": 1.1159115754469342,
      "grad_norm": 0.9184896349906921,
      "learning_rate": 7.819363085864144e-06,
      "loss": 0.0252,
      "step": 681880
    },
    {
      "epoch": 1.1159443058855876,
      "grad_norm": 0.5895598530769348,
      "learning_rate": 7.819297193650627e-06,
      "loss": 0.0254,
      "step": 681900
    },
    {
      "epoch": 1.1159770363242407,
      "grad_norm": 1.103611707687378,
      "learning_rate": 7.819231301437111e-06,
      "loss": 0.0218,
      "step": 681920
    },
    {
      "epoch": 1.116009766762894,
      "grad_norm": 0.8349395990371704,
      "learning_rate": 7.819165409223593e-06,
      "loss": 0.0222,
      "step": 681940
    },
    {
      "epoch": 1.1160424972015475,
      "grad_norm": 0.5106335878372192,
      "learning_rate": 7.819099517010076e-06,
      "loss": 0.0249,
      "step": 681960
    },
    {
      "epoch": 1.1160752276402008,
      "grad_norm": 0.672380268573761,
      "learning_rate": 7.819033624796558e-06,
      "loss": 0.0279,
      "step": 681980
    },
    {
      "epoch": 1.1161079580788542,
      "grad_norm": 0.9551168084144592,
      "learning_rate": 7.818967732583042e-06,
      "loss": 0.0243,
      "step": 682000
    },
    {
      "epoch": 1.1161406885175076,
      "grad_norm": 0.2790210247039795,
      "learning_rate": 7.818901840369524e-06,
      "loss": 0.0152,
      "step": 682020
    },
    {
      "epoch": 1.1161734189561607,
      "grad_norm": 0.6969540119171143,
      "learning_rate": 7.818835948156007e-06,
      "loss": 0.0173,
      "step": 682040
    },
    {
      "epoch": 1.116206149394814,
      "grad_norm": 0.38507258892059326,
      "learning_rate": 7.81877005594249e-06,
      "loss": 0.0223,
      "step": 682060
    },
    {
      "epoch": 1.1162388798334675,
      "grad_norm": 1.226876974105835,
      "learning_rate": 7.818704163728973e-06,
      "loss": 0.0266,
      "step": 682080
    },
    {
      "epoch": 1.1162716102721209,
      "grad_norm": 0.369710236787796,
      "learning_rate": 7.818638271515455e-06,
      "loss": 0.0239,
      "step": 682100
    },
    {
      "epoch": 1.1163043407107742,
      "grad_norm": 0.19599948823451996,
      "learning_rate": 7.818572379301938e-06,
      "loss": 0.0232,
      "step": 682120
    },
    {
      "epoch": 1.1163370711494276,
      "grad_norm": 0.23050618171691895,
      "learning_rate": 7.81850648708842e-06,
      "loss": 0.0161,
      "step": 682140
    },
    {
      "epoch": 1.116369801588081,
      "grad_norm": 0.5137590169906616,
      "learning_rate": 7.818440594874904e-06,
      "loss": 0.0246,
      "step": 682160
    },
    {
      "epoch": 1.1164025320267341,
      "grad_norm": 0.3696204125881195,
      "learning_rate": 7.818374702661387e-06,
      "loss": 0.0231,
      "step": 682180
    },
    {
      "epoch": 1.1164352624653875,
      "grad_norm": 0.7916606664657593,
      "learning_rate": 7.81830881044787e-06,
      "loss": 0.0314,
      "step": 682200
    },
    {
      "epoch": 1.1164679929040409,
      "grad_norm": 0.7898049354553223,
      "learning_rate": 7.818242918234353e-06,
      "loss": 0.027,
      "step": 682220
    },
    {
      "epoch": 1.1165007233426942,
      "grad_norm": 1.629378080368042,
      "learning_rate": 7.818177026020836e-06,
      "loss": 0.027,
      "step": 682240
    },
    {
      "epoch": 1.1165334537813476,
      "grad_norm": 4.645406723022461,
      "learning_rate": 7.818111133807318e-06,
      "loss": 0.0298,
      "step": 682260
    },
    {
      "epoch": 1.116566184220001,
      "grad_norm": 0.28603649139404297,
      "learning_rate": 7.818045241593802e-06,
      "loss": 0.018,
      "step": 682280
    },
    {
      "epoch": 1.1165989146586544,
      "grad_norm": 1.014577031135559,
      "learning_rate": 7.817979349380285e-06,
      "loss": 0.0164,
      "step": 682300
    },
    {
      "epoch": 1.1166316450973075,
      "grad_norm": 1.2202495336532593,
      "learning_rate": 7.817913457166767e-06,
      "loss": 0.0231,
      "step": 682320
    },
    {
      "epoch": 1.1166643755359609,
      "grad_norm": 2.331975221633911,
      "learning_rate": 7.817847564953251e-06,
      "loss": 0.032,
      "step": 682340
    },
    {
      "epoch": 1.1166971059746142,
      "grad_norm": 0.4940858483314514,
      "learning_rate": 7.817781672739733e-06,
      "loss": 0.0219,
      "step": 682360
    },
    {
      "epoch": 1.1167298364132676,
      "grad_norm": 0.6918200254440308,
      "learning_rate": 7.817715780526216e-06,
      "loss": 0.0238,
      "step": 682380
    },
    {
      "epoch": 1.116762566851921,
      "grad_norm": 0.2779732644557953,
      "learning_rate": 7.817649888312698e-06,
      "loss": 0.0369,
      "step": 682400
    },
    {
      "epoch": 1.1167952972905744,
      "grad_norm": 0.7202025651931763,
      "learning_rate": 7.817583996099182e-06,
      "loss": 0.032,
      "step": 682420
    },
    {
      "epoch": 1.1168280277292277,
      "grad_norm": 1.1001044511795044,
      "learning_rate": 7.817518103885664e-06,
      "loss": 0.0284,
      "step": 682440
    },
    {
      "epoch": 1.1168607581678809,
      "grad_norm": 0.9965360760688782,
      "learning_rate": 7.817452211672147e-06,
      "loss": 0.0247,
      "step": 682460
    },
    {
      "epoch": 1.1168934886065343,
      "grad_norm": 0.36154428124427795,
      "learning_rate": 7.81738631945863e-06,
      "loss": 0.028,
      "step": 682480
    },
    {
      "epoch": 1.1169262190451876,
      "grad_norm": 2.165283441543579,
      "learning_rate": 7.817320427245113e-06,
      "loss": 0.021,
      "step": 682500
    },
    {
      "epoch": 1.116958949483841,
      "grad_norm": 0.6483558416366577,
      "learning_rate": 7.817254535031596e-06,
      "loss": 0.024,
      "step": 682520
    },
    {
      "epoch": 1.1169916799224944,
      "grad_norm": 0.7511760592460632,
      "learning_rate": 7.817188642818078e-06,
      "loss": 0.0126,
      "step": 682540
    },
    {
      "epoch": 1.1170244103611477,
      "grad_norm": 0.2508573532104492,
      "learning_rate": 7.817122750604562e-06,
      "loss": 0.0254,
      "step": 682560
    },
    {
      "epoch": 1.117057140799801,
      "grad_norm": 0.1678561270236969,
      "learning_rate": 7.817056858391044e-06,
      "loss": 0.0229,
      "step": 682580
    },
    {
      "epoch": 1.1170898712384543,
      "grad_norm": 3.329149007797241,
      "learning_rate": 7.816990966177527e-06,
      "loss": 0.0233,
      "step": 682600
    },
    {
      "epoch": 1.1171226016771076,
      "grad_norm": 1.6412835121154785,
      "learning_rate": 7.81692507396401e-06,
      "loss": 0.0349,
      "step": 682620
    },
    {
      "epoch": 1.117155332115761,
      "grad_norm": 0.8649759888648987,
      "learning_rate": 7.816859181750493e-06,
      "loss": 0.021,
      "step": 682640
    },
    {
      "epoch": 1.1171880625544144,
      "grad_norm": 0.23488374054431915,
      "learning_rate": 7.816793289536977e-06,
      "loss": 0.0175,
      "step": 682660
    },
    {
      "epoch": 1.1172207929930678,
      "grad_norm": 0.6844776272773743,
      "learning_rate": 7.816727397323458e-06,
      "loss": 0.0358,
      "step": 682680
    },
    {
      "epoch": 1.1172535234317211,
      "grad_norm": 0.09056510031223297,
      "learning_rate": 7.816661505109942e-06,
      "loss": 0.0288,
      "step": 682700
    },
    {
      "epoch": 1.1172862538703743,
      "grad_norm": 1.0394666194915771,
      "learning_rate": 7.816595612896426e-06,
      "loss": 0.0271,
      "step": 682720
    },
    {
      "epoch": 1.1173189843090277,
      "grad_norm": 0.3159711956977844,
      "learning_rate": 7.816529720682907e-06,
      "loss": 0.0131,
      "step": 682740
    },
    {
      "epoch": 1.117351714747681,
      "grad_norm": 0.3492373824119568,
      "learning_rate": 7.816463828469391e-06,
      "loss": 0.0191,
      "step": 682760
    },
    {
      "epoch": 1.1173844451863344,
      "grad_norm": 0.3709442913532257,
      "learning_rate": 7.816397936255873e-06,
      "loss": 0.0228,
      "step": 682780
    },
    {
      "epoch": 1.1174171756249878,
      "grad_norm": 0.7628312110900879,
      "learning_rate": 7.816332044042357e-06,
      "loss": 0.0308,
      "step": 682800
    },
    {
      "epoch": 1.1174499060636411,
      "grad_norm": 0.1783372312784195,
      "learning_rate": 7.816266151828838e-06,
      "loss": 0.0294,
      "step": 682820
    },
    {
      "epoch": 1.1174826365022943,
      "grad_norm": 0.31869471073150635,
      "learning_rate": 7.816200259615322e-06,
      "loss": 0.0222,
      "step": 682840
    },
    {
      "epoch": 1.1175153669409477,
      "grad_norm": 0.8310292363166809,
      "learning_rate": 7.816134367401806e-06,
      "loss": 0.029,
      "step": 682860
    },
    {
      "epoch": 1.117548097379601,
      "grad_norm": 0.3718850016593933,
      "learning_rate": 7.816068475188287e-06,
      "loss": 0.0234,
      "step": 682880
    },
    {
      "epoch": 1.1175808278182544,
      "grad_norm": 1.2700121402740479,
      "learning_rate": 7.816002582974771e-06,
      "loss": 0.023,
      "step": 682900
    },
    {
      "epoch": 1.1176135582569078,
      "grad_norm": 1.5605127811431885,
      "learning_rate": 7.815936690761253e-06,
      "loss": 0.0284,
      "step": 682920
    },
    {
      "epoch": 1.1176462886955612,
      "grad_norm": 1.7090502977371216,
      "learning_rate": 7.815870798547737e-06,
      "loss": 0.0232,
      "step": 682940
    },
    {
      "epoch": 1.1176790191342145,
      "grad_norm": 0.7770713567733765,
      "learning_rate": 7.815804906334218e-06,
      "loss": 0.0291,
      "step": 682960
    },
    {
      "epoch": 1.1177117495728677,
      "grad_norm": 0.770603597164154,
      "learning_rate": 7.815739014120702e-06,
      "loss": 0.0334,
      "step": 682980
    },
    {
      "epoch": 1.117744480011521,
      "grad_norm": 1.270520567893982,
      "learning_rate": 7.815673121907184e-06,
      "loss": 0.0222,
      "step": 683000
    },
    {
      "epoch": 1.1177772104501744,
      "grad_norm": 0.8430042266845703,
      "learning_rate": 7.815607229693668e-06,
      "loss": 0.0205,
      "step": 683020
    },
    {
      "epoch": 1.1178099408888278,
      "grad_norm": 0.45904213190078735,
      "learning_rate": 7.815541337480151e-06,
      "loss": 0.015,
      "step": 683040
    },
    {
      "epoch": 1.1178426713274812,
      "grad_norm": 1.3265832662582397,
      "learning_rate": 7.815475445266633e-06,
      "loss": 0.0258,
      "step": 683060
    },
    {
      "epoch": 1.1178754017661345,
      "grad_norm": 1.2277014255523682,
      "learning_rate": 7.815409553053117e-06,
      "loss": 0.0301,
      "step": 683080
    },
    {
      "epoch": 1.117908132204788,
      "grad_norm": 1.1471025943756104,
      "learning_rate": 7.8153436608396e-06,
      "loss": 0.0244,
      "step": 683100
    },
    {
      "epoch": 1.117940862643441,
      "grad_norm": 0.8292515277862549,
      "learning_rate": 7.815277768626082e-06,
      "loss": 0.0243,
      "step": 683120
    },
    {
      "epoch": 1.1179735930820944,
      "grad_norm": 1.0054954290390015,
      "learning_rate": 7.815211876412566e-06,
      "loss": 0.0174,
      "step": 683140
    },
    {
      "epoch": 1.1180063235207478,
      "grad_norm": 0.7387123107910156,
      "learning_rate": 7.815145984199048e-06,
      "loss": 0.0263,
      "step": 683160
    },
    {
      "epoch": 1.1180390539594012,
      "grad_norm": 0.24004079401493073,
      "learning_rate": 7.815080091985531e-06,
      "loss": 0.0174,
      "step": 683180
    },
    {
      "epoch": 1.1180717843980545,
      "grad_norm": 0.5380339026451111,
      "learning_rate": 7.815014199772013e-06,
      "loss": 0.0203,
      "step": 683200
    },
    {
      "epoch": 1.118104514836708,
      "grad_norm": 0.6513876914978027,
      "learning_rate": 7.814948307558497e-06,
      "loss": 0.0214,
      "step": 683220
    },
    {
      "epoch": 1.1181372452753613,
      "grad_norm": 0.2428230196237564,
      "learning_rate": 7.81488241534498e-06,
      "loss": 0.0275,
      "step": 683240
    },
    {
      "epoch": 1.1181699757140144,
      "grad_norm": 0.5064507126808167,
      "learning_rate": 7.814816523131462e-06,
      "loss": 0.0185,
      "step": 683260
    },
    {
      "epoch": 1.1182027061526678,
      "grad_norm": 2.2204935550689697,
      "learning_rate": 7.814750630917946e-06,
      "loss": 0.0198,
      "step": 683280
    },
    {
      "epoch": 1.1182354365913212,
      "grad_norm": 0.2619836628437042,
      "learning_rate": 7.814684738704428e-06,
      "loss": 0.0263,
      "step": 683300
    },
    {
      "epoch": 1.1182681670299746,
      "grad_norm": 0.93983393907547,
      "learning_rate": 7.814618846490911e-06,
      "loss": 0.0257,
      "step": 683320
    },
    {
      "epoch": 1.118300897468628,
      "grad_norm": 0.5779929161071777,
      "learning_rate": 7.814552954277393e-06,
      "loss": 0.0244,
      "step": 683340
    },
    {
      "epoch": 1.1183336279072813,
      "grad_norm": 0.18740087747573853,
      "learning_rate": 7.814487062063877e-06,
      "loss": 0.0184,
      "step": 683360
    },
    {
      "epoch": 1.1183663583459345,
      "grad_norm": 1.0581879615783691,
      "learning_rate": 7.814421169850359e-06,
      "loss": 0.0195,
      "step": 683380
    },
    {
      "epoch": 1.1183990887845878,
      "grad_norm": 1.8905131816864014,
      "learning_rate": 7.814355277636842e-06,
      "loss": 0.0243,
      "step": 683400
    },
    {
      "epoch": 1.1184318192232412,
      "grad_norm": 1.6720548868179321,
      "learning_rate": 7.814289385423324e-06,
      "loss": 0.032,
      "step": 683420
    },
    {
      "epoch": 1.1184645496618946,
      "grad_norm": 0.4646589756011963,
      "learning_rate": 7.814223493209808e-06,
      "loss": 0.0223,
      "step": 683440
    },
    {
      "epoch": 1.118497280100548,
      "grad_norm": 0.25536367297172546,
      "learning_rate": 7.814157600996291e-06,
      "loss": 0.029,
      "step": 683460
    },
    {
      "epoch": 1.1185300105392013,
      "grad_norm": 4.883112907409668,
      "learning_rate": 7.814091708782773e-06,
      "loss": 0.0242,
      "step": 683480
    },
    {
      "epoch": 1.1185627409778547,
      "grad_norm": 1.2968605756759644,
      "learning_rate": 7.814025816569257e-06,
      "loss": 0.0238,
      "step": 683500
    },
    {
      "epoch": 1.1185954714165078,
      "grad_norm": 0.7648619413375854,
      "learning_rate": 7.81395992435574e-06,
      "loss": 0.0264,
      "step": 683520
    },
    {
      "epoch": 1.1186282018551612,
      "grad_norm": 1.1860063076019287,
      "learning_rate": 7.813894032142222e-06,
      "loss": 0.0193,
      "step": 683540
    },
    {
      "epoch": 1.1186609322938146,
      "grad_norm": 0.3440920114517212,
      "learning_rate": 7.813828139928706e-06,
      "loss": 0.0244,
      "step": 683560
    },
    {
      "epoch": 1.118693662732468,
      "grad_norm": 0.2446565330028534,
      "learning_rate": 7.81376224771519e-06,
      "loss": 0.0192,
      "step": 683580
    },
    {
      "epoch": 1.1187263931711213,
      "grad_norm": 1.2654366493225098,
      "learning_rate": 7.813696355501671e-06,
      "loss": 0.0249,
      "step": 683600
    },
    {
      "epoch": 1.1187591236097747,
      "grad_norm": 1.3630675077438354,
      "learning_rate": 7.813630463288155e-06,
      "loss": 0.0298,
      "step": 683620
    },
    {
      "epoch": 1.1187918540484278,
      "grad_norm": 1.142259955406189,
      "learning_rate": 7.813564571074637e-06,
      "loss": 0.0289,
      "step": 683640
    },
    {
      "epoch": 1.1188245844870812,
      "grad_norm": 1.5805563926696777,
      "learning_rate": 7.81349867886112e-06,
      "loss": 0.0188,
      "step": 683660
    },
    {
      "epoch": 1.1188573149257346,
      "grad_norm": 0.138004869222641,
      "learning_rate": 7.813432786647602e-06,
      "loss": 0.0266,
      "step": 683680
    },
    {
      "epoch": 1.118890045364388,
      "grad_norm": 0.6406195163726807,
      "learning_rate": 7.813366894434086e-06,
      "loss": 0.0266,
      "step": 683700
    },
    {
      "epoch": 1.1189227758030413,
      "grad_norm": 0.4769938886165619,
      "learning_rate": 7.813301002220568e-06,
      "loss": 0.0213,
      "step": 683720
    },
    {
      "epoch": 1.1189555062416947,
      "grad_norm": 1.1484380960464478,
      "learning_rate": 7.813235110007051e-06,
      "loss": 0.0261,
      "step": 683740
    },
    {
      "epoch": 1.118988236680348,
      "grad_norm": 0.2776530385017395,
      "learning_rate": 7.813169217793533e-06,
      "loss": 0.0225,
      "step": 683760
    },
    {
      "epoch": 1.1190209671190012,
      "grad_norm": 2.573476791381836,
      "learning_rate": 7.813103325580017e-06,
      "loss": 0.0283,
      "step": 683780
    },
    {
      "epoch": 1.1190536975576546,
      "grad_norm": 1.6055099964141846,
      "learning_rate": 7.813037433366499e-06,
      "loss": 0.0283,
      "step": 683800
    },
    {
      "epoch": 1.119086427996308,
      "grad_norm": 0.5132359862327576,
      "learning_rate": 7.812971541152982e-06,
      "loss": 0.0307,
      "step": 683820
    },
    {
      "epoch": 1.1191191584349613,
      "grad_norm": 0.8284164667129517,
      "learning_rate": 7.812905648939466e-06,
      "loss": 0.0274,
      "step": 683840
    },
    {
      "epoch": 1.1191518888736147,
      "grad_norm": 0.7853633761405945,
      "learning_rate": 7.812839756725948e-06,
      "loss": 0.0299,
      "step": 683860
    },
    {
      "epoch": 1.119184619312268,
      "grad_norm": 1.7406924962997437,
      "learning_rate": 7.812773864512431e-06,
      "loss": 0.02,
      "step": 683880
    },
    {
      "epoch": 1.1192173497509215,
      "grad_norm": 0.18360257148742676,
      "learning_rate": 7.812707972298915e-06,
      "loss": 0.0246,
      "step": 683900
    },
    {
      "epoch": 1.1192500801895746,
      "grad_norm": 0.244940847158432,
      "learning_rate": 7.812642080085397e-06,
      "loss": 0.0172,
      "step": 683920
    },
    {
      "epoch": 1.119282810628228,
      "grad_norm": 1.622109055519104,
      "learning_rate": 7.81257618787188e-06,
      "loss": 0.0208,
      "step": 683940
    },
    {
      "epoch": 1.1193155410668814,
      "grad_norm": 0.8774846792221069,
      "learning_rate": 7.812510295658364e-06,
      "loss": 0.0178,
      "step": 683960
    },
    {
      "epoch": 1.1193482715055347,
      "grad_norm": 0.44339099526405334,
      "learning_rate": 7.812444403444846e-06,
      "loss": 0.0197,
      "step": 683980
    },
    {
      "epoch": 1.119381001944188,
      "grad_norm": 1.3163719177246094,
      "learning_rate": 7.81237851123133e-06,
      "loss": 0.017,
      "step": 684000
    },
    {
      "epoch": 1.1194137323828415,
      "grad_norm": 0.962374746799469,
      "learning_rate": 7.812312619017811e-06,
      "loss": 0.0264,
      "step": 684020
    },
    {
      "epoch": 1.1194464628214948,
      "grad_norm": 0.3833020329475403,
      "learning_rate": 7.812246726804295e-06,
      "loss": 0.0199,
      "step": 684040
    },
    {
      "epoch": 1.119479193260148,
      "grad_norm": 0.4612223207950592,
      "learning_rate": 7.812180834590777e-06,
      "loss": 0.024,
      "step": 684060
    },
    {
      "epoch": 1.1195119236988014,
      "grad_norm": 0.5516939759254456,
      "learning_rate": 7.81211494237726e-06,
      "loss": 0.0166,
      "step": 684080
    },
    {
      "epoch": 1.1195446541374547,
      "grad_norm": 0.2059371918439865,
      "learning_rate": 7.812049050163742e-06,
      "loss": 0.0234,
      "step": 684100
    },
    {
      "epoch": 1.1195773845761081,
      "grad_norm": 0.30102619528770447,
      "learning_rate": 7.811983157950226e-06,
      "loss": 0.018,
      "step": 684120
    },
    {
      "epoch": 1.1196101150147615,
      "grad_norm": 0.8680033683776855,
      "learning_rate": 7.811917265736708e-06,
      "loss": 0.028,
      "step": 684140
    },
    {
      "epoch": 1.1196428454534149,
      "grad_norm": 0.20715533196926117,
      "learning_rate": 7.811851373523191e-06,
      "loss": 0.027,
      "step": 684160
    },
    {
      "epoch": 1.119675575892068,
      "grad_norm": 1.1915544271469116,
      "learning_rate": 7.811785481309673e-06,
      "loss": 0.0267,
      "step": 684180
    },
    {
      "epoch": 1.1197083063307214,
      "grad_norm": 0.6391242742538452,
      "learning_rate": 7.811719589096157e-06,
      "loss": 0.023,
      "step": 684200
    },
    {
      "epoch": 1.1197410367693748,
      "grad_norm": 0.3473969101905823,
      "learning_rate": 7.81165369688264e-06,
      "loss": 0.0224,
      "step": 684220
    },
    {
      "epoch": 1.1197737672080281,
      "grad_norm": 0.529105544090271,
      "learning_rate": 7.811587804669122e-06,
      "loss": 0.0178,
      "step": 684240
    },
    {
      "epoch": 1.1198064976466815,
      "grad_norm": 0.582897961139679,
      "learning_rate": 7.811521912455606e-06,
      "loss": 0.0288,
      "step": 684260
    },
    {
      "epoch": 1.1198392280853349,
      "grad_norm": 0.18280917406082153,
      "learning_rate": 7.81145602024209e-06,
      "loss": 0.0312,
      "step": 684280
    },
    {
      "epoch": 1.1198719585239882,
      "grad_norm": 0.48638948798179626,
      "learning_rate": 7.811390128028571e-06,
      "loss": 0.026,
      "step": 684300
    },
    {
      "epoch": 1.1199046889626414,
      "grad_norm": 1.4240113496780396,
      "learning_rate": 7.811324235815055e-06,
      "loss": 0.0247,
      "step": 684320
    },
    {
      "epoch": 1.1199374194012948,
      "grad_norm": 1.216193675994873,
      "learning_rate": 7.811258343601538e-06,
      "loss": 0.0385,
      "step": 684340
    },
    {
      "epoch": 1.1199701498399481,
      "grad_norm": 0.060242608189582825,
      "learning_rate": 7.81119245138802e-06,
      "loss": 0.0175,
      "step": 684360
    },
    {
      "epoch": 1.1200028802786015,
      "grad_norm": 0.32128503918647766,
      "learning_rate": 7.811126559174504e-06,
      "loss": 0.0227,
      "step": 684380
    },
    {
      "epoch": 1.1200356107172549,
      "grad_norm": 0.35513579845428467,
      "learning_rate": 7.811060666960986e-06,
      "loss": 0.0316,
      "step": 684400
    },
    {
      "epoch": 1.1200683411559083,
      "grad_norm": 0.9205408692359924,
      "learning_rate": 7.81099477474747e-06,
      "loss": 0.0205,
      "step": 684420
    },
    {
      "epoch": 1.1201010715945614,
      "grad_norm": 0.16283805668354034,
      "learning_rate": 7.810928882533951e-06,
      "loss": 0.0184,
      "step": 684440
    },
    {
      "epoch": 1.1201338020332148,
      "grad_norm": 0.5748070478439331,
      "learning_rate": 7.810862990320435e-06,
      "loss": 0.0242,
      "step": 684460
    },
    {
      "epoch": 1.1201665324718681,
      "grad_norm": 1.5504522323608398,
      "learning_rate": 7.810797098106917e-06,
      "loss": 0.0197,
      "step": 684480
    },
    {
      "epoch": 1.1201992629105215,
      "grad_norm": 2.1172091960906982,
      "learning_rate": 7.8107312058934e-06,
      "loss": 0.0254,
      "step": 684500
    },
    {
      "epoch": 1.120231993349175,
      "grad_norm": 0.9565088152885437,
      "learning_rate": 7.810665313679882e-06,
      "loss": 0.0212,
      "step": 684520
    },
    {
      "epoch": 1.1202647237878283,
      "grad_norm": 0.5303025841712952,
      "learning_rate": 7.810599421466366e-06,
      "loss": 0.029,
      "step": 684540
    },
    {
      "epoch": 1.1202974542264816,
      "grad_norm": 0.396977961063385,
      "learning_rate": 7.810533529252848e-06,
      "loss": 0.0405,
      "step": 684560
    },
    {
      "epoch": 1.1203301846651348,
      "grad_norm": 0.5718618035316467,
      "learning_rate": 7.810467637039331e-06,
      "loss": 0.0211,
      "step": 684580
    },
    {
      "epoch": 1.1203629151037882,
      "grad_norm": 1.8856866359710693,
      "learning_rate": 7.810401744825813e-06,
      "loss": 0.0216,
      "step": 684600
    },
    {
      "epoch": 1.1203956455424415,
      "grad_norm": 2.8889174461364746,
      "learning_rate": 7.810335852612297e-06,
      "loss": 0.032,
      "step": 684620
    },
    {
      "epoch": 1.120428375981095,
      "grad_norm": 0.43808379769325256,
      "learning_rate": 7.81026996039878e-06,
      "loss": 0.0208,
      "step": 684640
    },
    {
      "epoch": 1.1204611064197483,
      "grad_norm": 1.6311960220336914,
      "learning_rate": 7.810204068185262e-06,
      "loss": 0.0173,
      "step": 684660
    },
    {
      "epoch": 1.1204938368584016,
      "grad_norm": 0.29240882396698,
      "learning_rate": 7.810138175971746e-06,
      "loss": 0.0285,
      "step": 684680
    },
    {
      "epoch": 1.120526567297055,
      "grad_norm": 1.5342788696289062,
      "learning_rate": 7.81007228375823e-06,
      "loss": 0.0213,
      "step": 684700
    },
    {
      "epoch": 1.1205592977357082,
      "grad_norm": 2.2976901531219482,
      "learning_rate": 7.810006391544711e-06,
      "loss": 0.0174,
      "step": 684720
    },
    {
      "epoch": 1.1205920281743615,
      "grad_norm": 1.9118695259094238,
      "learning_rate": 7.809940499331195e-06,
      "loss": 0.0202,
      "step": 684740
    },
    {
      "epoch": 1.120624758613015,
      "grad_norm": 0.37437859177589417,
      "learning_rate": 7.809874607117679e-06,
      "loss": 0.018,
      "step": 684760
    },
    {
      "epoch": 1.1206574890516683,
      "grad_norm": 0.6107545495033264,
      "learning_rate": 7.80980871490416e-06,
      "loss": 0.0249,
      "step": 684780
    },
    {
      "epoch": 1.1206902194903217,
      "grad_norm": 0.49626949429512024,
      "learning_rate": 7.809742822690644e-06,
      "loss": 0.0184,
      "step": 684800
    },
    {
      "epoch": 1.120722949928975,
      "grad_norm": 0.5047959685325623,
      "learning_rate": 7.809676930477126e-06,
      "loss": 0.0148,
      "step": 684820
    },
    {
      "epoch": 1.1207556803676284,
      "grad_norm": 0.3152494728565216,
      "learning_rate": 7.80961103826361e-06,
      "loss": 0.0217,
      "step": 684840
    },
    {
      "epoch": 1.1207884108062816,
      "grad_norm": 0.34703952074050903,
      "learning_rate": 7.809545146050091e-06,
      "loss": 0.017,
      "step": 684860
    },
    {
      "epoch": 1.120821141244935,
      "grad_norm": 0.5253257155418396,
      "learning_rate": 7.809479253836575e-06,
      "loss": 0.0161,
      "step": 684880
    },
    {
      "epoch": 1.1208538716835883,
      "grad_norm": 1.130936861038208,
      "learning_rate": 7.809413361623057e-06,
      "loss": 0.0333,
      "step": 684900
    },
    {
      "epoch": 1.1208866021222417,
      "grad_norm": 0.24504727125167847,
      "learning_rate": 7.80934746940954e-06,
      "loss": 0.0222,
      "step": 684920
    },
    {
      "epoch": 1.120919332560895,
      "grad_norm": 3.646570920944214,
      "learning_rate": 7.809281577196022e-06,
      "loss": 0.0234,
      "step": 684940
    },
    {
      "epoch": 1.1209520629995484,
      "grad_norm": 1.0921157598495483,
      "learning_rate": 7.809215684982506e-06,
      "loss": 0.0217,
      "step": 684960
    },
    {
      "epoch": 1.1209847934382016,
      "grad_norm": 1.6110271215438843,
      "learning_rate": 7.80914979276899e-06,
      "loss": 0.0186,
      "step": 684980
    },
    {
      "epoch": 1.121017523876855,
      "grad_norm": 0.9928508400917053,
      "learning_rate": 7.809083900555471e-06,
      "loss": 0.0228,
      "step": 685000
    },
    {
      "epoch": 1.1210502543155083,
      "grad_norm": 0.4962500333786011,
      "learning_rate": 7.809018008341955e-06,
      "loss": 0.0219,
      "step": 685020
    },
    {
      "epoch": 1.1210829847541617,
      "grad_norm": 0.8660885691642761,
      "learning_rate": 7.808952116128437e-06,
      "loss": 0.02,
      "step": 685040
    },
    {
      "epoch": 1.121115715192815,
      "grad_norm": 0.720177173614502,
      "learning_rate": 7.80888622391492e-06,
      "loss": 0.0278,
      "step": 685060
    },
    {
      "epoch": 1.1211484456314684,
      "grad_norm": 1.3925678730010986,
      "learning_rate": 7.808820331701404e-06,
      "loss": 0.023,
      "step": 685080
    },
    {
      "epoch": 1.1211811760701216,
      "grad_norm": 0.14323736727237701,
      "learning_rate": 7.808754439487886e-06,
      "loss": 0.0139,
      "step": 685100
    },
    {
      "epoch": 1.121213906508775,
      "grad_norm": 0.9016082286834717,
      "learning_rate": 7.80868854727437e-06,
      "loss": 0.0171,
      "step": 685120
    },
    {
      "epoch": 1.1212466369474283,
      "grad_norm": 0.5266148447990417,
      "learning_rate": 7.808622655060853e-06,
      "loss": 0.0282,
      "step": 685140
    },
    {
      "epoch": 1.1212793673860817,
      "grad_norm": 0.7453421950340271,
      "learning_rate": 7.808556762847335e-06,
      "loss": 0.0255,
      "step": 685160
    },
    {
      "epoch": 1.121312097824735,
      "grad_norm": 0.3412873148918152,
      "learning_rate": 7.808490870633819e-06,
      "loss": 0.0248,
      "step": 685180
    },
    {
      "epoch": 1.1213448282633884,
      "grad_norm": 1.0997469425201416,
      "learning_rate": 7.8084249784203e-06,
      "loss": 0.0301,
      "step": 685200
    },
    {
      "epoch": 1.1213775587020418,
      "grad_norm": 0.46376705169677734,
      "learning_rate": 7.808359086206784e-06,
      "loss": 0.0293,
      "step": 685220
    },
    {
      "epoch": 1.121410289140695,
      "grad_norm": 0.3578883707523346,
      "learning_rate": 7.808293193993266e-06,
      "loss": 0.0195,
      "step": 685240
    },
    {
      "epoch": 1.1214430195793483,
      "grad_norm": 0.8129233121871948,
      "learning_rate": 7.80822730177975e-06,
      "loss": 0.0174,
      "step": 685260
    },
    {
      "epoch": 1.1214757500180017,
      "grad_norm": 1.2012382745742798,
      "learning_rate": 7.808161409566232e-06,
      "loss": 0.0273,
      "step": 685280
    },
    {
      "epoch": 1.121508480456655,
      "grad_norm": 1.063156247138977,
      "learning_rate": 7.808095517352715e-06,
      "loss": 0.0204,
      "step": 685300
    },
    {
      "epoch": 1.1215412108953084,
      "grad_norm": 1.1148462295532227,
      "learning_rate": 7.808029625139199e-06,
      "loss": 0.0272,
      "step": 685320
    },
    {
      "epoch": 1.1215739413339618,
      "grad_norm": 0.6592114567756653,
      "learning_rate": 7.80796373292568e-06,
      "loss": 0.0208,
      "step": 685340
    },
    {
      "epoch": 1.1216066717726152,
      "grad_norm": 0.33446717262268066,
      "learning_rate": 7.807897840712164e-06,
      "loss": 0.0303,
      "step": 685360
    },
    {
      "epoch": 1.1216394022112683,
      "grad_norm": 0.38018885254859924,
      "learning_rate": 7.807831948498646e-06,
      "loss": 0.0251,
      "step": 685380
    },
    {
      "epoch": 1.1216721326499217,
      "grad_norm": 1.0510600805282593,
      "learning_rate": 7.80776605628513e-06,
      "loss": 0.0303,
      "step": 685400
    },
    {
      "epoch": 1.121704863088575,
      "grad_norm": 0.5605539083480835,
      "learning_rate": 7.807700164071612e-06,
      "loss": 0.034,
      "step": 685420
    },
    {
      "epoch": 1.1217375935272285,
      "grad_norm": 0.1437339335680008,
      "learning_rate": 7.807634271858095e-06,
      "loss": 0.0241,
      "step": 685440
    },
    {
      "epoch": 1.1217703239658818,
      "grad_norm": 0.8099547624588013,
      "learning_rate": 7.807568379644577e-06,
      "loss": 0.0204,
      "step": 685460
    },
    {
      "epoch": 1.1218030544045352,
      "grad_norm": 0.7501665949821472,
      "learning_rate": 7.80750248743106e-06,
      "loss": 0.0217,
      "step": 685480
    },
    {
      "epoch": 1.1218357848431886,
      "grad_norm": 1.1033923625946045,
      "learning_rate": 7.807436595217544e-06,
      "loss": 0.0223,
      "step": 685500
    },
    {
      "epoch": 1.1218685152818417,
      "grad_norm": 0.5120633840560913,
      "learning_rate": 7.807370703004026e-06,
      "loss": 0.0209,
      "step": 685520
    },
    {
      "epoch": 1.121901245720495,
      "grad_norm": 0.9216811656951904,
      "learning_rate": 7.80730481079051e-06,
      "loss": 0.0247,
      "step": 685540
    },
    {
      "epoch": 1.1219339761591485,
      "grad_norm": 0.2038802057504654,
      "learning_rate": 7.807238918576993e-06,
      "loss": 0.0185,
      "step": 685560
    },
    {
      "epoch": 1.1219667065978018,
      "grad_norm": 1.1602165699005127,
      "learning_rate": 7.807173026363475e-06,
      "loss": 0.025,
      "step": 685580
    },
    {
      "epoch": 1.1219994370364552,
      "grad_norm": 0.3224811851978302,
      "learning_rate": 7.807107134149959e-06,
      "loss": 0.0253,
      "step": 685600
    },
    {
      "epoch": 1.1220321674751086,
      "grad_norm": 0.45432907342910767,
      "learning_rate": 7.80704124193644e-06,
      "loss": 0.0325,
      "step": 685620
    },
    {
      "epoch": 1.1220648979137617,
      "grad_norm": 0.33258113265037537,
      "learning_rate": 7.806975349722924e-06,
      "loss": 0.0308,
      "step": 685640
    },
    {
      "epoch": 1.122097628352415,
      "grad_norm": 1.0402379035949707,
      "learning_rate": 7.806909457509406e-06,
      "loss": 0.0238,
      "step": 685660
    },
    {
      "epoch": 1.1221303587910685,
      "grad_norm": 0.8581900596618652,
      "learning_rate": 7.80684356529589e-06,
      "loss": 0.0316,
      "step": 685680
    },
    {
      "epoch": 1.1221630892297219,
      "grad_norm": 0.5436372756958008,
      "learning_rate": 7.806777673082373e-06,
      "loss": 0.0251,
      "step": 685700
    },
    {
      "epoch": 1.1221958196683752,
      "grad_norm": 0.4149947464466095,
      "learning_rate": 7.806711780868855e-06,
      "loss": 0.0206,
      "step": 685720
    },
    {
      "epoch": 1.1222285501070286,
      "grad_norm": 2.0023293495178223,
      "learning_rate": 7.806645888655339e-06,
      "loss": 0.0329,
      "step": 685740
    },
    {
      "epoch": 1.122261280545682,
      "grad_norm": 1.0576626062393188,
      "learning_rate": 7.80657999644182e-06,
      "loss": 0.0299,
      "step": 685760
    },
    {
      "epoch": 1.1222940109843351,
      "grad_norm": 1.0875226259231567,
      "learning_rate": 7.806514104228304e-06,
      "loss": 0.0201,
      "step": 685780
    },
    {
      "epoch": 1.1223267414229885,
      "grad_norm": 0.28512895107269287,
      "learning_rate": 7.806448212014786e-06,
      "loss": 0.0243,
      "step": 685800
    },
    {
      "epoch": 1.1223594718616419,
      "grad_norm": 0.916542112827301,
      "learning_rate": 7.80638231980127e-06,
      "loss": 0.0184,
      "step": 685820
    },
    {
      "epoch": 1.1223922023002952,
      "grad_norm": 0.9258002042770386,
      "learning_rate": 7.806316427587752e-06,
      "loss": 0.0247,
      "step": 685840
    },
    {
      "epoch": 1.1224249327389486,
      "grad_norm": 0.5097398161888123,
      "learning_rate": 7.806250535374235e-06,
      "loss": 0.0216,
      "step": 685860
    },
    {
      "epoch": 1.122457663177602,
      "grad_norm": 0.6482608914375305,
      "learning_rate": 7.806184643160719e-06,
      "loss": 0.0169,
      "step": 685880
    },
    {
      "epoch": 1.1224903936162551,
      "grad_norm": 1.445448637008667,
      "learning_rate": 7.8061187509472e-06,
      "loss": 0.0325,
      "step": 685900
    },
    {
      "epoch": 1.1225231240549085,
      "grad_norm": 0.34136712551116943,
      "learning_rate": 7.806052858733684e-06,
      "loss": 0.0253,
      "step": 685920
    },
    {
      "epoch": 1.1225558544935619,
      "grad_norm": 0.6852012276649475,
      "learning_rate": 7.805986966520168e-06,
      "loss": 0.0186,
      "step": 685940
    },
    {
      "epoch": 1.1225885849322152,
      "grad_norm": 1.21329927444458,
      "learning_rate": 7.80592107430665e-06,
      "loss": 0.0303,
      "step": 685960
    },
    {
      "epoch": 1.1226213153708686,
      "grad_norm": 1.149416446685791,
      "learning_rate": 7.805855182093133e-06,
      "loss": 0.021,
      "step": 685980
    },
    {
      "epoch": 1.122654045809522,
      "grad_norm": 0.6638669967651367,
      "learning_rate": 7.805789289879615e-06,
      "loss": 0.0204,
      "step": 686000
    },
    {
      "epoch": 1.1226867762481754,
      "grad_norm": 3.235295057296753,
      "learning_rate": 7.805723397666099e-06,
      "loss": 0.0237,
      "step": 686020
    },
    {
      "epoch": 1.1227195066868285,
      "grad_norm": 1.0663056373596191,
      "learning_rate": 7.805657505452582e-06,
      "loss": 0.0211,
      "step": 686040
    },
    {
      "epoch": 1.1227522371254819,
      "grad_norm": 1.8994945287704468,
      "learning_rate": 7.805591613239064e-06,
      "loss": 0.0236,
      "step": 686060
    },
    {
      "epoch": 1.1227849675641353,
      "grad_norm": 0.5996913313865662,
      "learning_rate": 7.805525721025548e-06,
      "loss": 0.0377,
      "step": 686080
    },
    {
      "epoch": 1.1228176980027886,
      "grad_norm": 2.4527387619018555,
      "learning_rate": 7.80545982881203e-06,
      "loss": 0.0212,
      "step": 686100
    },
    {
      "epoch": 1.122850428441442,
      "grad_norm": 0.9723509550094604,
      "learning_rate": 7.805393936598513e-06,
      "loss": 0.0275,
      "step": 686120
    },
    {
      "epoch": 1.1228831588800954,
      "grad_norm": 1.5107859373092651,
      "learning_rate": 7.805328044384995e-06,
      "loss": 0.0277,
      "step": 686140
    },
    {
      "epoch": 1.1229158893187488,
      "grad_norm": 0.5200390219688416,
      "learning_rate": 7.805262152171479e-06,
      "loss": 0.0212,
      "step": 686160
    },
    {
      "epoch": 1.122948619757402,
      "grad_norm": 0.5513454675674438,
      "learning_rate": 7.80519625995796e-06,
      "loss": 0.0289,
      "step": 686180
    },
    {
      "epoch": 1.1229813501960553,
      "grad_norm": 1.4457086324691772,
      "learning_rate": 7.805130367744444e-06,
      "loss": 0.0239,
      "step": 686200
    },
    {
      "epoch": 1.1230140806347086,
      "grad_norm": 2.359494924545288,
      "learning_rate": 7.805064475530926e-06,
      "loss": 0.0274,
      "step": 686220
    },
    {
      "epoch": 1.123046811073362,
      "grad_norm": 0.20543931424617767,
      "learning_rate": 7.80499858331741e-06,
      "loss": 0.0267,
      "step": 686240
    },
    {
      "epoch": 1.1230795415120154,
      "grad_norm": 0.8216257095336914,
      "learning_rate": 7.804932691103892e-06,
      "loss": 0.024,
      "step": 686260
    },
    {
      "epoch": 1.1231122719506688,
      "grad_norm": 0.4316873550415039,
      "learning_rate": 7.804866798890375e-06,
      "loss": 0.0234,
      "step": 686280
    },
    {
      "epoch": 1.1231450023893221,
      "grad_norm": 0.4399178922176361,
      "learning_rate": 7.804800906676859e-06,
      "loss": 0.0305,
      "step": 686300
    },
    {
      "epoch": 1.1231777328279753,
      "grad_norm": 0.6055417656898499,
      "learning_rate": 7.80473501446334e-06,
      "loss": 0.0171,
      "step": 686320
    },
    {
      "epoch": 1.1232104632666287,
      "grad_norm": 0.857421338558197,
      "learning_rate": 7.804669122249824e-06,
      "loss": 0.0202,
      "step": 686340
    },
    {
      "epoch": 1.123243193705282,
      "grad_norm": 0.25888800621032715,
      "learning_rate": 7.804603230036308e-06,
      "loss": 0.0224,
      "step": 686360
    },
    {
      "epoch": 1.1232759241439354,
      "grad_norm": 0.9130468368530273,
      "learning_rate": 7.804537337822792e-06,
      "loss": 0.0225,
      "step": 686380
    },
    {
      "epoch": 1.1233086545825888,
      "grad_norm": 0.26799964904785156,
      "learning_rate": 7.804471445609273e-06,
      "loss": 0.0195,
      "step": 686400
    },
    {
      "epoch": 1.1233413850212421,
      "grad_norm": 1.9591878652572632,
      "learning_rate": 7.804405553395757e-06,
      "loss": 0.0289,
      "step": 686420
    },
    {
      "epoch": 1.1233741154598953,
      "grad_norm": 0.41548579931259155,
      "learning_rate": 7.804339661182239e-06,
      "loss": 0.0244,
      "step": 686440
    },
    {
      "epoch": 1.1234068458985487,
      "grad_norm": 0.5191317200660706,
      "learning_rate": 7.804273768968722e-06,
      "loss": 0.0304,
      "step": 686460
    },
    {
      "epoch": 1.123439576337202,
      "grad_norm": 0.35621756315231323,
      "learning_rate": 7.804207876755204e-06,
      "loss": 0.0298,
      "step": 686480
    },
    {
      "epoch": 1.1234723067758554,
      "grad_norm": 0.31498169898986816,
      "learning_rate": 7.804141984541688e-06,
      "loss": 0.0241,
      "step": 686500
    },
    {
      "epoch": 1.1235050372145088,
      "grad_norm": 0.5640109181404114,
      "learning_rate": 7.80407609232817e-06,
      "loss": 0.0248,
      "step": 686520
    },
    {
      "epoch": 1.1235377676531622,
      "grad_norm": 0.4992698132991791,
      "learning_rate": 7.804010200114653e-06,
      "loss": 0.0206,
      "step": 686540
    },
    {
      "epoch": 1.1235704980918155,
      "grad_norm": 0.25026753544807434,
      "learning_rate": 7.803944307901135e-06,
      "loss": 0.0238,
      "step": 686560
    },
    {
      "epoch": 1.1236032285304687,
      "grad_norm": 0.2177726775407791,
      "learning_rate": 7.803878415687619e-06,
      "loss": 0.034,
      "step": 686580
    },
    {
      "epoch": 1.123635958969122,
      "grad_norm": 0.41001349687576294,
      "learning_rate": 7.8038125234741e-06,
      "loss": 0.0245,
      "step": 686600
    },
    {
      "epoch": 1.1236686894077754,
      "grad_norm": 0.372872918844223,
      "learning_rate": 7.803746631260584e-06,
      "loss": 0.0227,
      "step": 686620
    },
    {
      "epoch": 1.1237014198464288,
      "grad_norm": 0.40601062774658203,
      "learning_rate": 7.803680739047066e-06,
      "loss": 0.0332,
      "step": 686640
    },
    {
      "epoch": 1.1237341502850822,
      "grad_norm": 0.5571008920669556,
      "learning_rate": 7.80361484683355e-06,
      "loss": 0.0326,
      "step": 686660
    },
    {
      "epoch": 1.1237668807237355,
      "grad_norm": 1.0830767154693604,
      "learning_rate": 7.803548954620033e-06,
      "loss": 0.0179,
      "step": 686680
    },
    {
      "epoch": 1.1237996111623887,
      "grad_norm": 1.8580095767974854,
      "learning_rate": 7.803483062406515e-06,
      "loss": 0.0228,
      "step": 686700
    },
    {
      "epoch": 1.123832341601042,
      "grad_norm": 0.5451787114143372,
      "learning_rate": 7.803417170192999e-06,
      "loss": 0.0371,
      "step": 686720
    },
    {
      "epoch": 1.1238650720396954,
      "grad_norm": 0.5719330310821533,
      "learning_rate": 7.803351277979483e-06,
      "loss": 0.0152,
      "step": 686740
    },
    {
      "epoch": 1.1238978024783488,
      "grad_norm": 0.34532827138900757,
      "learning_rate": 7.803285385765964e-06,
      "loss": 0.0188,
      "step": 686760
    },
    {
      "epoch": 1.1239305329170022,
      "grad_norm": 0.25511524081230164,
      "learning_rate": 7.803219493552448e-06,
      "loss": 0.0288,
      "step": 686780
    },
    {
      "epoch": 1.1239632633556556,
      "grad_norm": 0.6652470231056213,
      "learning_rate": 7.803153601338932e-06,
      "loss": 0.0217,
      "step": 686800
    },
    {
      "epoch": 1.123995993794309,
      "grad_norm": 0.17223292589187622,
      "learning_rate": 7.803087709125413e-06,
      "loss": 0.0317,
      "step": 686820
    },
    {
      "epoch": 1.124028724232962,
      "grad_norm": 1.3216476440429688,
      "learning_rate": 7.803021816911897e-06,
      "loss": 0.0246,
      "step": 686840
    },
    {
      "epoch": 1.1240614546716154,
      "grad_norm": 2.908057928085327,
      "learning_rate": 7.802955924698379e-06,
      "loss": 0.0156,
      "step": 686860
    },
    {
      "epoch": 1.1240941851102688,
      "grad_norm": 0.7780337333679199,
      "learning_rate": 7.802890032484863e-06,
      "loss": 0.0261,
      "step": 686880
    },
    {
      "epoch": 1.1241269155489222,
      "grad_norm": 0.547862708568573,
      "learning_rate": 7.802824140271344e-06,
      "loss": 0.0236,
      "step": 686900
    },
    {
      "epoch": 1.1241596459875756,
      "grad_norm": 1.0842092037200928,
      "learning_rate": 7.802758248057828e-06,
      "loss": 0.0233,
      "step": 686920
    },
    {
      "epoch": 1.124192376426229,
      "grad_norm": 1.505028486251831,
      "learning_rate": 7.80269235584431e-06,
      "loss": 0.0221,
      "step": 686940
    },
    {
      "epoch": 1.1242251068648823,
      "grad_norm": 0.1908688247203827,
      "learning_rate": 7.802626463630793e-06,
      "loss": 0.0089,
      "step": 686960
    },
    {
      "epoch": 1.1242578373035355,
      "grad_norm": 0.7032974362373352,
      "learning_rate": 7.802560571417275e-06,
      "loss": 0.0289,
      "step": 686980
    },
    {
      "epoch": 1.1242905677421888,
      "grad_norm": 1.5213333368301392,
      "learning_rate": 7.802494679203759e-06,
      "loss": 0.0231,
      "step": 687000
    },
    {
      "epoch": 1.1243232981808422,
      "grad_norm": 1.9975898265838623,
      "learning_rate": 7.802428786990241e-06,
      "loss": 0.0185,
      "step": 687020
    },
    {
      "epoch": 1.1243560286194956,
      "grad_norm": 1.580449104309082,
      "learning_rate": 7.802362894776724e-06,
      "loss": 0.031,
      "step": 687040
    },
    {
      "epoch": 1.124388759058149,
      "grad_norm": 0.611331045627594,
      "learning_rate": 7.802297002563208e-06,
      "loss": 0.0227,
      "step": 687060
    },
    {
      "epoch": 1.1244214894968023,
      "grad_norm": 1.369917631149292,
      "learning_rate": 7.80223111034969e-06,
      "loss": 0.0287,
      "step": 687080
    },
    {
      "epoch": 1.1244542199354557,
      "grad_norm": 1.012441635131836,
      "learning_rate": 7.802165218136174e-06,
      "loss": 0.0223,
      "step": 687100
    },
    {
      "epoch": 1.1244869503741088,
      "grad_norm": 1.3663650751113892,
      "learning_rate": 7.802099325922657e-06,
      "loss": 0.0344,
      "step": 687120
    },
    {
      "epoch": 1.1245196808127622,
      "grad_norm": 1.2517735958099365,
      "learning_rate": 7.802033433709139e-06,
      "loss": 0.0236,
      "step": 687140
    },
    {
      "epoch": 1.1245524112514156,
      "grad_norm": 0.35882583260536194,
      "learning_rate": 7.801967541495623e-06,
      "loss": 0.0215,
      "step": 687160
    },
    {
      "epoch": 1.124585141690069,
      "grad_norm": 0.8826804757118225,
      "learning_rate": 7.801901649282106e-06,
      "loss": 0.0257,
      "step": 687180
    },
    {
      "epoch": 1.1246178721287223,
      "grad_norm": 0.16660313308238983,
      "learning_rate": 7.801835757068588e-06,
      "loss": 0.0295,
      "step": 687200
    },
    {
      "epoch": 1.1246506025673757,
      "grad_norm": 0.5222079157829285,
      "learning_rate": 7.801769864855072e-06,
      "loss": 0.0278,
      "step": 687220
    },
    {
      "epoch": 1.1246833330060289,
      "grad_norm": 0.18017472326755524,
      "learning_rate": 7.801703972641554e-06,
      "loss": 0.02,
      "step": 687240
    },
    {
      "epoch": 1.1247160634446822,
      "grad_norm": 0.3000606894493103,
      "learning_rate": 7.801638080428037e-06,
      "loss": 0.0206,
      "step": 687260
    },
    {
      "epoch": 1.1247487938833356,
      "grad_norm": 0.4245818555355072,
      "learning_rate": 7.801572188214519e-06,
      "loss": 0.031,
      "step": 687280
    },
    {
      "epoch": 1.124781524321989,
      "grad_norm": 0.6135110259056091,
      "learning_rate": 7.801506296001003e-06,
      "loss": 0.0239,
      "step": 687300
    },
    {
      "epoch": 1.1248142547606423,
      "grad_norm": 0.7305173277854919,
      "learning_rate": 7.801440403787485e-06,
      "loss": 0.0229,
      "step": 687320
    },
    {
      "epoch": 1.1248469851992957,
      "grad_norm": 0.7206571102142334,
      "learning_rate": 7.801374511573968e-06,
      "loss": 0.0258,
      "step": 687340
    },
    {
      "epoch": 1.1248797156379489,
      "grad_norm": 0.5132707953453064,
      "learning_rate": 7.80130861936045e-06,
      "loss": 0.0172,
      "step": 687360
    },
    {
      "epoch": 1.1249124460766022,
      "grad_norm": 1.1296203136444092,
      "learning_rate": 7.801242727146934e-06,
      "loss": 0.0307,
      "step": 687380
    },
    {
      "epoch": 1.1249451765152556,
      "grad_norm": 2.6193408966064453,
      "learning_rate": 7.801176834933415e-06,
      "loss": 0.0275,
      "step": 687400
    },
    {
      "epoch": 1.124977906953909,
      "grad_norm": 0.42649346590042114,
      "learning_rate": 7.801110942719899e-06,
      "loss": 0.0304,
      "step": 687420
    },
    {
      "epoch": 1.1250106373925624,
      "grad_norm": 0.3553432822227478,
      "learning_rate": 7.801045050506383e-06,
      "loss": 0.0224,
      "step": 687440
    },
    {
      "epoch": 1.1250433678312157,
      "grad_norm": 0.3890661895275116,
      "learning_rate": 7.800979158292865e-06,
      "loss": 0.0285,
      "step": 687460
    },
    {
      "epoch": 1.125076098269869,
      "grad_norm": 0.9342216849327087,
      "learning_rate": 7.800913266079348e-06,
      "loss": 0.0316,
      "step": 687480
    },
    {
      "epoch": 1.1251088287085222,
      "grad_norm": 1.0107877254486084,
      "learning_rate": 7.80084737386583e-06,
      "loss": 0.0263,
      "step": 687500
    },
    {
      "epoch": 1.1251415591471756,
      "grad_norm": 2.6259846687316895,
      "learning_rate": 7.800781481652314e-06,
      "loss": 0.0221,
      "step": 687520
    },
    {
      "epoch": 1.125174289585829,
      "grad_norm": 0.5084173679351807,
      "learning_rate": 7.800715589438797e-06,
      "loss": 0.0332,
      "step": 687540
    },
    {
      "epoch": 1.1252070200244824,
      "grad_norm": 1.2814538478851318,
      "learning_rate": 7.800649697225279e-06,
      "loss": 0.019,
      "step": 687560
    },
    {
      "epoch": 1.1252397504631357,
      "grad_norm": 0.251770943403244,
      "learning_rate": 7.800583805011763e-06,
      "loss": 0.0227,
      "step": 687580
    },
    {
      "epoch": 1.125272480901789,
      "grad_norm": 0.34619900584220886,
      "learning_rate": 7.800517912798246e-06,
      "loss": 0.0248,
      "step": 687600
    },
    {
      "epoch": 1.1253052113404425,
      "grad_norm": 0.33148127794265747,
      "learning_rate": 7.800452020584728e-06,
      "loss": 0.0321,
      "step": 687620
    },
    {
      "epoch": 1.1253379417790956,
      "grad_norm": 0.2606469988822937,
      "learning_rate": 7.800386128371212e-06,
      "loss": 0.0136,
      "step": 687640
    },
    {
      "epoch": 1.125370672217749,
      "grad_norm": 0.7314561605453491,
      "learning_rate": 7.800320236157694e-06,
      "loss": 0.0235,
      "step": 687660
    },
    {
      "epoch": 1.1254034026564024,
      "grad_norm": 0.546440839767456,
      "learning_rate": 7.800254343944177e-06,
      "loss": 0.0162,
      "step": 687680
    },
    {
      "epoch": 1.1254361330950557,
      "grad_norm": 0.34397971630096436,
      "learning_rate": 7.800188451730659e-06,
      "loss": 0.028,
      "step": 687700
    },
    {
      "epoch": 1.1254688635337091,
      "grad_norm": 0.7475454211235046,
      "learning_rate": 7.800122559517143e-06,
      "loss": 0.0246,
      "step": 687720
    },
    {
      "epoch": 1.1255015939723625,
      "grad_norm": 0.779208779335022,
      "learning_rate": 7.800056667303625e-06,
      "loss": 0.0222,
      "step": 687740
    },
    {
      "epoch": 1.1255343244110159,
      "grad_norm": 0.32134902477264404,
      "learning_rate": 7.799990775090108e-06,
      "loss": 0.0183,
      "step": 687760
    },
    {
      "epoch": 1.125567054849669,
      "grad_norm": 3.9140377044677734,
      "learning_rate": 7.799924882876592e-06,
      "loss": 0.0296,
      "step": 687780
    },
    {
      "epoch": 1.1255997852883224,
      "grad_norm": 0.5903511643409729,
      "learning_rate": 7.799858990663074e-06,
      "loss": 0.0285,
      "step": 687800
    },
    {
      "epoch": 1.1256325157269758,
      "grad_norm": 0.41658204793930054,
      "learning_rate": 7.799793098449557e-06,
      "loss": 0.0283,
      "step": 687820
    },
    {
      "epoch": 1.1256652461656291,
      "grad_norm": 0.5063434839248657,
      "learning_rate": 7.799727206236039e-06,
      "loss": 0.024,
      "step": 687840
    },
    {
      "epoch": 1.1256979766042825,
      "grad_norm": 0.6106404662132263,
      "learning_rate": 7.799661314022523e-06,
      "loss": 0.0206,
      "step": 687860
    },
    {
      "epoch": 1.1257307070429359,
      "grad_norm": 1.5038448572158813,
      "learning_rate": 7.799595421809005e-06,
      "loss": 0.0232,
      "step": 687880
    },
    {
      "epoch": 1.1257634374815892,
      "grad_norm": 0.2369428277015686,
      "learning_rate": 7.799529529595488e-06,
      "loss": 0.024,
      "step": 687900
    },
    {
      "epoch": 1.1257961679202424,
      "grad_norm": 0.3536360561847687,
      "learning_rate": 7.799463637381972e-06,
      "loss": 0.0299,
      "step": 687920
    },
    {
      "epoch": 1.1258288983588958,
      "grad_norm": 0.684783399105072,
      "learning_rate": 7.799397745168454e-06,
      "loss": 0.0174,
      "step": 687940
    },
    {
      "epoch": 1.1258616287975491,
      "grad_norm": 2.7808661460876465,
      "learning_rate": 7.799331852954937e-06,
      "loss": 0.0244,
      "step": 687960
    },
    {
      "epoch": 1.1258943592362025,
      "grad_norm": 0.3359425365924835,
      "learning_rate": 7.799265960741421e-06,
      "loss": 0.021,
      "step": 687980
    },
    {
      "epoch": 1.1259270896748559,
      "grad_norm": 0.43937042355537415,
      "learning_rate": 7.799200068527903e-06,
      "loss": 0.0206,
      "step": 688000
    },
    {
      "epoch": 1.1259598201135093,
      "grad_norm": 0.23142962157726288,
      "learning_rate": 7.799134176314386e-06,
      "loss": 0.0232,
      "step": 688020
    },
    {
      "epoch": 1.1259925505521624,
      "grad_norm": 1.2052119970321655,
      "learning_rate": 7.799068284100868e-06,
      "loss": 0.0322,
      "step": 688040
    },
    {
      "epoch": 1.1260252809908158,
      "grad_norm": 0.5608871579170227,
      "learning_rate": 7.799002391887352e-06,
      "loss": 0.0262,
      "step": 688060
    },
    {
      "epoch": 1.1260580114294692,
      "grad_norm": 0.6931716203689575,
      "learning_rate": 7.798936499673834e-06,
      "loss": 0.021,
      "step": 688080
    },
    {
      "epoch": 1.1260907418681225,
      "grad_norm": 0.7652865648269653,
      "learning_rate": 7.798870607460317e-06,
      "loss": 0.0238,
      "step": 688100
    },
    {
      "epoch": 1.126123472306776,
      "grad_norm": 0.243360236287117,
      "learning_rate": 7.7988047152468e-06,
      "loss": 0.024,
      "step": 688120
    },
    {
      "epoch": 1.1261562027454293,
      "grad_norm": 0.3932309150695801,
      "learning_rate": 7.798738823033283e-06,
      "loss": 0.0298,
      "step": 688140
    },
    {
      "epoch": 1.1261889331840824,
      "grad_norm": 1.4156652688980103,
      "learning_rate": 7.798672930819766e-06,
      "loss": 0.0203,
      "step": 688160
    },
    {
      "epoch": 1.1262216636227358,
      "grad_norm": 0.15184839069843292,
      "learning_rate": 7.798607038606248e-06,
      "loss": 0.0159,
      "step": 688180
    },
    {
      "epoch": 1.1262543940613892,
      "grad_norm": 0.9824777841567993,
      "learning_rate": 7.798541146392732e-06,
      "loss": 0.0253,
      "step": 688200
    },
    {
      "epoch": 1.1262871245000425,
      "grad_norm": 1.9503822326660156,
      "learning_rate": 7.798475254179214e-06,
      "loss": 0.0341,
      "step": 688220
    },
    {
      "epoch": 1.126319854938696,
      "grad_norm": 0.4217306971549988,
      "learning_rate": 7.798409361965697e-06,
      "loss": 0.026,
      "step": 688240
    },
    {
      "epoch": 1.1263525853773493,
      "grad_norm": 0.8909155130386353,
      "learning_rate": 7.79834346975218e-06,
      "loss": 0.0178,
      "step": 688260
    },
    {
      "epoch": 1.1263853158160027,
      "grad_norm": 1.3846441507339478,
      "learning_rate": 7.798277577538663e-06,
      "loss": 0.017,
      "step": 688280
    },
    {
      "epoch": 1.1264180462546558,
      "grad_norm": 1.3068718910217285,
      "learning_rate": 7.798211685325145e-06,
      "loss": 0.0268,
      "step": 688300
    },
    {
      "epoch": 1.1264507766933092,
      "grad_norm": 0.5248100757598877,
      "learning_rate": 7.798145793111628e-06,
      "loss": 0.0166,
      "step": 688320
    },
    {
      "epoch": 1.1264835071319625,
      "grad_norm": 0.45695218443870544,
      "learning_rate": 7.798079900898112e-06,
      "loss": 0.0264,
      "step": 688340
    },
    {
      "epoch": 1.126516237570616,
      "grad_norm": 0.8858462572097778,
      "learning_rate": 7.798014008684594e-06,
      "loss": 0.0259,
      "step": 688360
    },
    {
      "epoch": 1.1265489680092693,
      "grad_norm": 0.20518366992473602,
      "learning_rate": 7.797948116471077e-06,
      "loss": 0.0189,
      "step": 688380
    },
    {
      "epoch": 1.1265816984479227,
      "grad_norm": 2.0324602127075195,
      "learning_rate": 7.797882224257561e-06,
      "loss": 0.0222,
      "step": 688400
    },
    {
      "epoch": 1.126614428886576,
      "grad_norm": 0.5583863258361816,
      "learning_rate": 7.797816332044043e-06,
      "loss": 0.0215,
      "step": 688420
    },
    {
      "epoch": 1.1266471593252292,
      "grad_norm": 0.7485069632530212,
      "learning_rate": 7.797750439830526e-06,
      "loss": 0.0135,
      "step": 688440
    },
    {
      "epoch": 1.1266798897638826,
      "grad_norm": 1.2554422616958618,
      "learning_rate": 7.797684547617008e-06,
      "loss": 0.0257,
      "step": 688460
    },
    {
      "epoch": 1.126712620202536,
      "grad_norm": 1.7087881565093994,
      "learning_rate": 7.797618655403492e-06,
      "loss": 0.0228,
      "step": 688480
    },
    {
      "epoch": 1.1267453506411893,
      "grad_norm": 0.2758272588253021,
      "learning_rate": 7.797552763189975e-06,
      "loss": 0.0149,
      "step": 688500
    },
    {
      "epoch": 1.1267780810798427,
      "grad_norm": 1.5432018041610718,
      "learning_rate": 7.797486870976457e-06,
      "loss": 0.022,
      "step": 688520
    },
    {
      "epoch": 1.126810811518496,
      "grad_norm": 1.1792868375778198,
      "learning_rate": 7.797420978762941e-06,
      "loss": 0.0254,
      "step": 688540
    },
    {
      "epoch": 1.1268435419571494,
      "grad_norm": 0.34201931953430176,
      "learning_rate": 7.797355086549423e-06,
      "loss": 0.0213,
      "step": 688560
    },
    {
      "epoch": 1.1268762723958026,
      "grad_norm": 0.3474758267402649,
      "learning_rate": 7.797289194335906e-06,
      "loss": 0.0249,
      "step": 688580
    },
    {
      "epoch": 1.126909002834456,
      "grad_norm": 0.4132840037345886,
      "learning_rate": 7.797223302122388e-06,
      "loss": 0.0219,
      "step": 688600
    },
    {
      "epoch": 1.1269417332731093,
      "grad_norm": 0.9215345978736877,
      "learning_rate": 7.797157409908872e-06,
      "loss": 0.0221,
      "step": 688620
    },
    {
      "epoch": 1.1269744637117627,
      "grad_norm": 0.9485844373703003,
      "learning_rate": 7.797091517695354e-06,
      "loss": 0.0237,
      "step": 688640
    },
    {
      "epoch": 1.127007194150416,
      "grad_norm": 2.1272642612457275,
      "learning_rate": 7.797025625481837e-06,
      "loss": 0.0267,
      "step": 688660
    },
    {
      "epoch": 1.1270399245890694,
      "grad_norm": 0.08339332044124603,
      "learning_rate": 7.79695973326832e-06,
      "loss": 0.027,
      "step": 688680
    },
    {
      "epoch": 1.1270726550277228,
      "grad_norm": 0.4472474455833435,
      "learning_rate": 7.796893841054803e-06,
      "loss": 0.0192,
      "step": 688700
    },
    {
      "epoch": 1.127105385466376,
      "grad_norm": 0.22505336999893188,
      "learning_rate": 7.796827948841286e-06,
      "loss": 0.0241,
      "step": 688720
    },
    {
      "epoch": 1.1271381159050293,
      "grad_norm": 0.3629700243473053,
      "learning_rate": 7.796762056627768e-06,
      "loss": 0.0261,
      "step": 688740
    },
    {
      "epoch": 1.1271708463436827,
      "grad_norm": 0.6128489375114441,
      "learning_rate": 7.796696164414252e-06,
      "loss": 0.0147,
      "step": 688760
    },
    {
      "epoch": 1.127203576782336,
      "grad_norm": 1.0246590375900269,
      "learning_rate": 7.796630272200736e-06,
      "loss": 0.025,
      "step": 688780
    },
    {
      "epoch": 1.1272363072209894,
      "grad_norm": 1.80301034450531,
      "learning_rate": 7.796564379987217e-06,
      "loss": 0.0285,
      "step": 688800
    },
    {
      "epoch": 1.1272690376596426,
      "grad_norm": 0.17206063866615295,
      "learning_rate": 7.796498487773701e-06,
      "loss": 0.0218,
      "step": 688820
    },
    {
      "epoch": 1.127301768098296,
      "grad_norm": 4.050211429595947,
      "learning_rate": 7.796432595560185e-06,
      "loss": 0.0276,
      "step": 688840
    },
    {
      "epoch": 1.1273344985369493,
      "grad_norm": 0.40746790170669556,
      "learning_rate": 7.796366703346666e-06,
      "loss": 0.0263,
      "step": 688860
    },
    {
      "epoch": 1.1273672289756027,
      "grad_norm": 0.671747624874115,
      "learning_rate": 7.79630081113315e-06,
      "loss": 0.0153,
      "step": 688880
    },
    {
      "epoch": 1.127399959414256,
      "grad_norm": 0.7183243036270142,
      "learning_rate": 7.796234918919632e-06,
      "loss": 0.0239,
      "step": 688900
    },
    {
      "epoch": 1.1274326898529095,
      "grad_norm": 0.6185590028762817,
      "learning_rate": 7.796169026706116e-06,
      "loss": 0.0266,
      "step": 688920
    },
    {
      "epoch": 1.1274654202915628,
      "grad_norm": 0.9962683320045471,
      "learning_rate": 7.796103134492597e-06,
      "loss": 0.0237,
      "step": 688940
    },
    {
      "epoch": 1.127498150730216,
      "grad_norm": 0.45252540707588196,
      "learning_rate": 7.796037242279081e-06,
      "loss": 0.025,
      "step": 688960
    },
    {
      "epoch": 1.1275308811688693,
      "grad_norm": 1.452615737915039,
      "learning_rate": 7.795971350065563e-06,
      "loss": 0.0368,
      "step": 688980
    },
    {
      "epoch": 1.1275636116075227,
      "grad_norm": 0.958297610282898,
      "learning_rate": 7.795905457852047e-06,
      "loss": 0.0262,
      "step": 689000
    },
    {
      "epoch": 1.127596342046176,
      "grad_norm": 0.9122267961502075,
      "learning_rate": 7.795839565638528e-06,
      "loss": 0.0236,
      "step": 689020
    },
    {
      "epoch": 1.1276290724848295,
      "grad_norm": 0.7428534030914307,
      "learning_rate": 7.795773673425012e-06,
      "loss": 0.0228,
      "step": 689040
    },
    {
      "epoch": 1.1276618029234828,
      "grad_norm": 0.6755375862121582,
      "learning_rate": 7.795707781211494e-06,
      "loss": 0.0208,
      "step": 689060
    },
    {
      "epoch": 1.1276945333621362,
      "grad_norm": 0.5704086422920227,
      "learning_rate": 7.795641888997977e-06,
      "loss": 0.0212,
      "step": 689080
    },
    {
      "epoch": 1.1277272638007894,
      "grad_norm": 0.5846194624900818,
      "learning_rate": 7.79557599678446e-06,
      "loss": 0.0179,
      "step": 689100
    },
    {
      "epoch": 1.1277599942394427,
      "grad_norm": 0.16891290247440338,
      "learning_rate": 7.795510104570943e-06,
      "loss": 0.0193,
      "step": 689120
    },
    {
      "epoch": 1.127792724678096,
      "grad_norm": 0.5035494565963745,
      "learning_rate": 7.795444212357427e-06,
      "loss": 0.0253,
      "step": 689140
    },
    {
      "epoch": 1.1278254551167495,
      "grad_norm": 0.28444337844848633,
      "learning_rate": 7.79537832014391e-06,
      "loss": 0.0211,
      "step": 689160
    },
    {
      "epoch": 1.1278581855554028,
      "grad_norm": 1.6058381795883179,
      "learning_rate": 7.795312427930392e-06,
      "loss": 0.0316,
      "step": 689180
    },
    {
      "epoch": 1.1278909159940562,
      "grad_norm": 2.5172343254089355,
      "learning_rate": 7.795246535716876e-06,
      "loss": 0.0213,
      "step": 689200
    },
    {
      "epoch": 1.1279236464327096,
      "grad_norm": 0.5325257182121277,
      "learning_rate": 7.79518064350336e-06,
      "loss": 0.0238,
      "step": 689220
    },
    {
      "epoch": 1.1279563768713627,
      "grad_norm": 0.5775818824768066,
      "learning_rate": 7.795114751289841e-06,
      "loss": 0.0258,
      "step": 689240
    },
    {
      "epoch": 1.1279891073100161,
      "grad_norm": 2.174889087677002,
      "learning_rate": 7.795048859076325e-06,
      "loss": 0.017,
      "step": 689260
    },
    {
      "epoch": 1.1280218377486695,
      "grad_norm": 0.33933225274086,
      "learning_rate": 7.794982966862807e-06,
      "loss": 0.0361,
      "step": 689280
    },
    {
      "epoch": 1.1280545681873229,
      "grad_norm": 0.6387242674827576,
      "learning_rate": 7.79491707464929e-06,
      "loss": 0.0295,
      "step": 689300
    },
    {
      "epoch": 1.1280872986259762,
      "grad_norm": 0.48102137446403503,
      "learning_rate": 7.794851182435772e-06,
      "loss": 0.0202,
      "step": 689320
    },
    {
      "epoch": 1.1281200290646296,
      "grad_norm": 0.6594982147216797,
      "learning_rate": 7.794785290222256e-06,
      "loss": 0.0261,
      "step": 689340
    },
    {
      "epoch": 1.128152759503283,
      "grad_norm": 0.8619217276573181,
      "learning_rate": 7.794719398008738e-06,
      "loss": 0.0186,
      "step": 689360
    },
    {
      "epoch": 1.1281854899419361,
      "grad_norm": 0.47358882427215576,
      "learning_rate": 7.794653505795221e-06,
      "loss": 0.0214,
      "step": 689380
    },
    {
      "epoch": 1.1282182203805895,
      "grad_norm": 0.8558459281921387,
      "learning_rate": 7.794587613581703e-06,
      "loss": 0.0223,
      "step": 689400
    },
    {
      "epoch": 1.1282509508192429,
      "grad_norm": 0.15480957925319672,
      "learning_rate": 7.794521721368187e-06,
      "loss": 0.0267,
      "step": 689420
    },
    {
      "epoch": 1.1282836812578962,
      "grad_norm": 0.3873116075992584,
      "learning_rate": 7.794455829154668e-06,
      "loss": 0.0224,
      "step": 689440
    },
    {
      "epoch": 1.1283164116965496,
      "grad_norm": 0.20818635821342468,
      "learning_rate": 7.794389936941152e-06,
      "loss": 0.0253,
      "step": 689460
    },
    {
      "epoch": 1.128349142135203,
      "grad_norm": 1.2114497423171997,
      "learning_rate": 7.794324044727634e-06,
      "loss": 0.0251,
      "step": 689480
    },
    {
      "epoch": 1.1283818725738564,
      "grad_norm": 0.3765445351600647,
      "learning_rate": 7.794258152514118e-06,
      "loss": 0.0239,
      "step": 689500
    },
    {
      "epoch": 1.1284146030125095,
      "grad_norm": 0.745654284954071,
      "learning_rate": 7.794192260300601e-06,
      "loss": 0.0244,
      "step": 689520
    },
    {
      "epoch": 1.1284473334511629,
      "grad_norm": 0.5967365503311157,
      "learning_rate": 7.794126368087083e-06,
      "loss": 0.0297,
      "step": 689540
    },
    {
      "epoch": 1.1284800638898163,
      "grad_norm": 0.42509159445762634,
      "learning_rate": 7.794060475873567e-06,
      "loss": 0.0266,
      "step": 689560
    },
    {
      "epoch": 1.1285127943284696,
      "grad_norm": 0.8560808300971985,
      "learning_rate": 7.79399458366005e-06,
      "loss": 0.0214,
      "step": 689580
    },
    {
      "epoch": 1.128545524767123,
      "grad_norm": 0.9898073077201843,
      "learning_rate": 7.793928691446532e-06,
      "loss": 0.0194,
      "step": 689600
    },
    {
      "epoch": 1.1285782552057761,
      "grad_norm": 0.5948056578636169,
      "learning_rate": 7.793862799233016e-06,
      "loss": 0.0186,
      "step": 689620
    },
    {
      "epoch": 1.1286109856444295,
      "grad_norm": 1.1810394525527954,
      "learning_rate": 7.7937969070195e-06,
      "loss": 0.0329,
      "step": 689640
    },
    {
      "epoch": 1.128643716083083,
      "grad_norm": 1.3176348209381104,
      "learning_rate": 7.793731014805981e-06,
      "loss": 0.0256,
      "step": 689660
    },
    {
      "epoch": 1.1286764465217363,
      "grad_norm": 2.6072614192962646,
      "learning_rate": 7.793665122592465e-06,
      "loss": 0.0317,
      "step": 689680
    },
    {
      "epoch": 1.1287091769603896,
      "grad_norm": 1.216493844985962,
      "learning_rate": 7.793599230378947e-06,
      "loss": 0.022,
      "step": 689700
    },
    {
      "epoch": 1.128741907399043,
      "grad_norm": 0.5674274563789368,
      "learning_rate": 7.79353333816543e-06,
      "loss": 0.0238,
      "step": 689720
    },
    {
      "epoch": 1.1287746378376964,
      "grad_norm": 0.3668343424797058,
      "learning_rate": 7.793467445951912e-06,
      "loss": 0.0256,
      "step": 689740
    },
    {
      "epoch": 1.1288073682763495,
      "grad_norm": 0.20174933969974518,
      "learning_rate": 7.793401553738396e-06,
      "loss": 0.0238,
      "step": 689760
    },
    {
      "epoch": 1.128840098715003,
      "grad_norm": 1.2787305116653442,
      "learning_rate": 7.793335661524878e-06,
      "loss": 0.0238,
      "step": 689780
    },
    {
      "epoch": 1.1288728291536563,
      "grad_norm": 1.9569867849349976,
      "learning_rate": 7.793269769311361e-06,
      "loss": 0.0224,
      "step": 689800
    },
    {
      "epoch": 1.1289055595923096,
      "grad_norm": 1.0351598262786865,
      "learning_rate": 7.793203877097843e-06,
      "loss": 0.0227,
      "step": 689820
    },
    {
      "epoch": 1.128938290030963,
      "grad_norm": 0.5265442728996277,
      "learning_rate": 7.793137984884327e-06,
      "loss": 0.0195,
      "step": 689840
    },
    {
      "epoch": 1.1289710204696164,
      "grad_norm": 0.4321780502796173,
      "learning_rate": 7.793072092670809e-06,
      "loss": 0.0305,
      "step": 689860
    },
    {
      "epoch": 1.1290037509082698,
      "grad_norm": 0.4258037209510803,
      "learning_rate": 7.793006200457292e-06,
      "loss": 0.0198,
      "step": 689880
    },
    {
      "epoch": 1.129036481346923,
      "grad_norm": 0.1518341302871704,
      "learning_rate": 7.792940308243776e-06,
      "loss": 0.0253,
      "step": 689900
    },
    {
      "epoch": 1.1290692117855763,
      "grad_norm": 0.27257096767425537,
      "learning_rate": 7.792874416030258e-06,
      "loss": 0.0276,
      "step": 689920
    },
    {
      "epoch": 1.1291019422242297,
      "grad_norm": 1.8636860847473145,
      "learning_rate": 7.792808523816741e-06,
      "loss": 0.0197,
      "step": 689940
    },
    {
      "epoch": 1.129134672662883,
      "grad_norm": 0.6622253656387329,
      "learning_rate": 7.792742631603225e-06,
      "loss": 0.0348,
      "step": 689960
    },
    {
      "epoch": 1.1291674031015364,
      "grad_norm": 1.0501174926757812,
      "learning_rate": 7.792676739389707e-06,
      "loss": 0.0275,
      "step": 689980
    },
    {
      "epoch": 1.1292001335401898,
      "grad_norm": 0.4345541000366211,
      "learning_rate": 7.79261084717619e-06,
      "loss": 0.0207,
      "step": 690000
    },
    {
      "epoch": 1.1292328639788431,
      "grad_norm": 0.19970478117465973,
      "learning_rate": 7.792544954962674e-06,
      "loss": 0.0312,
      "step": 690020
    },
    {
      "epoch": 1.1292655944174963,
      "grad_norm": 0.8698412775993347,
      "learning_rate": 7.792479062749156e-06,
      "loss": 0.025,
      "step": 690040
    },
    {
      "epoch": 1.1292983248561497,
      "grad_norm": 0.9980952143669128,
      "learning_rate": 7.79241317053564e-06,
      "loss": 0.0324,
      "step": 690060
    },
    {
      "epoch": 1.129331055294803,
      "grad_norm": 0.7460716366767883,
      "learning_rate": 7.792347278322121e-06,
      "loss": 0.0304,
      "step": 690080
    },
    {
      "epoch": 1.1293637857334564,
      "grad_norm": 1.5331847667694092,
      "learning_rate": 7.792281386108605e-06,
      "loss": 0.0194,
      "step": 690100
    },
    {
      "epoch": 1.1293965161721098,
      "grad_norm": 0.18214547634124756,
      "learning_rate": 7.792215493895087e-06,
      "loss": 0.0239,
      "step": 690120
    },
    {
      "epoch": 1.1294292466107632,
      "grad_norm": 1.6675610542297363,
      "learning_rate": 7.79214960168157e-06,
      "loss": 0.0131,
      "step": 690140
    },
    {
      "epoch": 1.1294619770494165,
      "grad_norm": 0.40448373556137085,
      "learning_rate": 7.792083709468052e-06,
      "loss": 0.0274,
      "step": 690160
    },
    {
      "epoch": 1.1294947074880697,
      "grad_norm": 0.20289863646030426,
      "learning_rate": 7.792017817254536e-06,
      "loss": 0.0165,
      "step": 690180
    },
    {
      "epoch": 1.129527437926723,
      "grad_norm": 0.33708107471466064,
      "learning_rate": 7.791951925041018e-06,
      "loss": 0.0242,
      "step": 690200
    },
    {
      "epoch": 1.1295601683653764,
      "grad_norm": 0.1838042438030243,
      "learning_rate": 7.791886032827501e-06,
      "loss": 0.0237,
      "step": 690220
    },
    {
      "epoch": 1.1295928988040298,
      "grad_norm": 0.6629534959793091,
      "learning_rate": 7.791820140613983e-06,
      "loss": 0.0325,
      "step": 690240
    },
    {
      "epoch": 1.1296256292426832,
      "grad_norm": 0.4627264440059662,
      "learning_rate": 7.791754248400467e-06,
      "loss": 0.0211,
      "step": 690260
    },
    {
      "epoch": 1.1296583596813365,
      "grad_norm": 0.7327919006347656,
      "learning_rate": 7.79168835618695e-06,
      "loss": 0.0304,
      "step": 690280
    },
    {
      "epoch": 1.1296910901199897,
      "grad_norm": 0.5677543878555298,
      "learning_rate": 7.791622463973432e-06,
      "loss": 0.0248,
      "step": 690300
    },
    {
      "epoch": 1.129723820558643,
      "grad_norm": 0.11174824088811874,
      "learning_rate": 7.791556571759916e-06,
      "loss": 0.0237,
      "step": 690320
    },
    {
      "epoch": 1.1297565509972964,
      "grad_norm": 0.26171910762786865,
      "learning_rate": 7.791490679546398e-06,
      "loss": 0.0271,
      "step": 690340
    },
    {
      "epoch": 1.1297892814359498,
      "grad_norm": 0.2594071626663208,
      "learning_rate": 7.791424787332881e-06,
      "loss": 0.0281,
      "step": 690360
    },
    {
      "epoch": 1.1298220118746032,
      "grad_norm": 0.8584977984428406,
      "learning_rate": 7.791358895119365e-06,
      "loss": 0.0335,
      "step": 690380
    },
    {
      "epoch": 1.1298547423132566,
      "grad_norm": 1.1621649265289307,
      "learning_rate": 7.791293002905847e-06,
      "loss": 0.0231,
      "step": 690400
    },
    {
      "epoch": 1.1298874727519097,
      "grad_norm": 0.7607744336128235,
      "learning_rate": 7.79122711069233e-06,
      "loss": 0.028,
      "step": 690420
    },
    {
      "epoch": 1.129920203190563,
      "grad_norm": 0.4230928421020508,
      "learning_rate": 7.791161218478814e-06,
      "loss": 0.0225,
      "step": 690440
    },
    {
      "epoch": 1.1299529336292164,
      "grad_norm": 0.4975079596042633,
      "learning_rate": 7.791095326265296e-06,
      "loss": 0.0231,
      "step": 690460
    },
    {
      "epoch": 1.1299856640678698,
      "grad_norm": 1.4594700336456299,
      "learning_rate": 7.79102943405178e-06,
      "loss": 0.0257,
      "step": 690480
    },
    {
      "epoch": 1.1300183945065232,
      "grad_norm": 0.720345675945282,
      "learning_rate": 7.790963541838261e-06,
      "loss": 0.0274,
      "step": 690500
    },
    {
      "epoch": 1.1300511249451766,
      "grad_norm": 1.1671142578125,
      "learning_rate": 7.790897649624745e-06,
      "loss": 0.0207,
      "step": 690520
    },
    {
      "epoch": 1.13008385538383,
      "grad_norm": 1.3864315748214722,
      "learning_rate": 7.790831757411227e-06,
      "loss": 0.0288,
      "step": 690540
    },
    {
      "epoch": 1.130116585822483,
      "grad_norm": 0.4087075889110565,
      "learning_rate": 7.79076586519771e-06,
      "loss": 0.0228,
      "step": 690560
    },
    {
      "epoch": 1.1301493162611365,
      "grad_norm": 0.05345505475997925,
      "learning_rate": 7.790699972984192e-06,
      "loss": 0.0236,
      "step": 690580
    },
    {
      "epoch": 1.1301820466997898,
      "grad_norm": 0.27478131651878357,
      "learning_rate": 7.790634080770676e-06,
      "loss": 0.017,
      "step": 690600
    },
    {
      "epoch": 1.1302147771384432,
      "grad_norm": 2.604928731918335,
      "learning_rate": 7.79056818855716e-06,
      "loss": 0.0176,
      "step": 690620
    },
    {
      "epoch": 1.1302475075770966,
      "grad_norm": 0.7032086253166199,
      "learning_rate": 7.790502296343641e-06,
      "loss": 0.0396,
      "step": 690640
    },
    {
      "epoch": 1.13028023801575,
      "grad_norm": 0.3368225395679474,
      "learning_rate": 7.790436404130125e-06,
      "loss": 0.028,
      "step": 690660
    },
    {
      "epoch": 1.1303129684544033,
      "grad_norm": 0.585792064666748,
      "learning_rate": 7.790370511916607e-06,
      "loss": 0.0268,
      "step": 690680
    },
    {
      "epoch": 1.1303456988930565,
      "grad_norm": 5.169150352478027,
      "learning_rate": 7.79030461970309e-06,
      "loss": 0.0269,
      "step": 690700
    },
    {
      "epoch": 1.1303784293317098,
      "grad_norm": 1.2788302898406982,
      "learning_rate": 7.790238727489572e-06,
      "loss": 0.025,
      "step": 690720
    },
    {
      "epoch": 1.1304111597703632,
      "grad_norm": 0.7644973397254944,
      "learning_rate": 7.790172835276056e-06,
      "loss": 0.0285,
      "step": 690740
    },
    {
      "epoch": 1.1304438902090166,
      "grad_norm": 0.7809526920318604,
      "learning_rate": 7.79010694306254e-06,
      "loss": 0.0277,
      "step": 690760
    },
    {
      "epoch": 1.13047662064767,
      "grad_norm": 0.8036953806877136,
      "learning_rate": 7.790041050849021e-06,
      "loss": 0.0328,
      "step": 690780
    },
    {
      "epoch": 1.1305093510863233,
      "grad_norm": 0.8114644289016724,
      "learning_rate": 7.789975158635505e-06,
      "loss": 0.03,
      "step": 690800
    },
    {
      "epoch": 1.1305420815249767,
      "grad_norm": 0.4403551518917084,
      "learning_rate": 7.789909266421989e-06,
      "loss": 0.0194,
      "step": 690820
    },
    {
      "epoch": 1.1305748119636299,
      "grad_norm": 0.5142149329185486,
      "learning_rate": 7.78984337420847e-06,
      "loss": 0.0221,
      "step": 690840
    },
    {
      "epoch": 1.1306075424022832,
      "grad_norm": 1.2177966833114624,
      "learning_rate": 7.789777481994954e-06,
      "loss": 0.0321,
      "step": 690860
    },
    {
      "epoch": 1.1306402728409366,
      "grad_norm": 0.21662373840808868,
      "learning_rate": 7.789711589781436e-06,
      "loss": 0.0168,
      "step": 690880
    },
    {
      "epoch": 1.13067300327959,
      "grad_norm": 0.08564766496419907,
      "learning_rate": 7.78964569756792e-06,
      "loss": 0.0165,
      "step": 690900
    },
    {
      "epoch": 1.1307057337182433,
      "grad_norm": 1.1712586879730225,
      "learning_rate": 7.789579805354401e-06,
      "loss": 0.0198,
      "step": 690920
    },
    {
      "epoch": 1.1307384641568967,
      "grad_norm": 0.6897634267807007,
      "learning_rate": 7.789513913140885e-06,
      "loss": 0.0325,
      "step": 690940
    },
    {
      "epoch": 1.13077119459555,
      "grad_norm": 1.5274814367294312,
      "learning_rate": 7.789448020927369e-06,
      "loss": 0.0261,
      "step": 690960
    },
    {
      "epoch": 1.1308039250342032,
      "grad_norm": 0.5107043981552124,
      "learning_rate": 7.78938212871385e-06,
      "loss": 0.0303,
      "step": 690980
    },
    {
      "epoch": 1.1308366554728566,
      "grad_norm": 3.3598415851593018,
      "learning_rate": 7.789316236500334e-06,
      "loss": 0.0259,
      "step": 691000
    },
    {
      "epoch": 1.13086938591151,
      "grad_norm": 0.38985270261764526,
      "learning_rate": 7.789250344286816e-06,
      "loss": 0.0233,
      "step": 691020
    },
    {
      "epoch": 1.1309021163501634,
      "grad_norm": 1.0718141794204712,
      "learning_rate": 7.7891844520733e-06,
      "loss": 0.0246,
      "step": 691040
    },
    {
      "epoch": 1.1309348467888167,
      "grad_norm": 1.0594712495803833,
      "learning_rate": 7.789118559859781e-06,
      "loss": 0.0227,
      "step": 691060
    },
    {
      "epoch": 1.1309675772274699,
      "grad_norm": 0.17569506168365479,
      "learning_rate": 7.789052667646265e-06,
      "loss": 0.0182,
      "step": 691080
    },
    {
      "epoch": 1.1310003076661233,
      "grad_norm": 0.3006229102611542,
      "learning_rate": 7.788986775432747e-06,
      "loss": 0.0333,
      "step": 691100
    },
    {
      "epoch": 1.1310330381047766,
      "grad_norm": 0.8611471056938171,
      "learning_rate": 7.78892088321923e-06,
      "loss": 0.0183,
      "step": 691120
    },
    {
      "epoch": 1.13106576854343,
      "grad_norm": 0.45194336771965027,
      "learning_rate": 7.788854991005712e-06,
      "loss": 0.0254,
      "step": 691140
    },
    {
      "epoch": 1.1310984989820834,
      "grad_norm": 0.15708956122398376,
      "learning_rate": 7.788789098792196e-06,
      "loss": 0.0314,
      "step": 691160
    },
    {
      "epoch": 1.1311312294207367,
      "grad_norm": 0.19518627226352692,
      "learning_rate": 7.78872320657868e-06,
      "loss": 0.0258,
      "step": 691180
    },
    {
      "epoch": 1.1311639598593901,
      "grad_norm": 0.5367303490638733,
      "learning_rate": 7.788657314365161e-06,
      "loss": 0.0219,
      "step": 691200
    },
    {
      "epoch": 1.1311966902980433,
      "grad_norm": 0.3387903571128845,
      "learning_rate": 7.788591422151645e-06,
      "loss": 0.0192,
      "step": 691220
    },
    {
      "epoch": 1.1312294207366966,
      "grad_norm": 1.0285840034484863,
      "learning_rate": 7.788525529938129e-06,
      "loss": 0.023,
      "step": 691240
    },
    {
      "epoch": 1.13126215117535,
      "grad_norm": 1.4466981887817383,
      "learning_rate": 7.78845963772461e-06,
      "loss": 0.0202,
      "step": 691260
    },
    {
      "epoch": 1.1312948816140034,
      "grad_norm": 0.502699077129364,
      "learning_rate": 7.788393745511094e-06,
      "loss": 0.026,
      "step": 691280
    },
    {
      "epoch": 1.1313276120526568,
      "grad_norm": 0.844532310962677,
      "learning_rate": 7.788327853297578e-06,
      "loss": 0.0343,
      "step": 691300
    },
    {
      "epoch": 1.1313603424913101,
      "grad_norm": 0.6048117280006409,
      "learning_rate": 7.78826196108406e-06,
      "loss": 0.0321,
      "step": 691320
    },
    {
      "epoch": 1.1313930729299635,
      "grad_norm": 0.7539482116699219,
      "learning_rate": 7.788196068870543e-06,
      "loss": 0.0238,
      "step": 691340
    },
    {
      "epoch": 1.1314258033686166,
      "grad_norm": 0.1302809715270996,
      "learning_rate": 7.788130176657025e-06,
      "loss": 0.0166,
      "step": 691360
    },
    {
      "epoch": 1.13145853380727,
      "grad_norm": 0.6157724857330322,
      "learning_rate": 7.788064284443509e-06,
      "loss": 0.0318,
      "step": 691380
    },
    {
      "epoch": 1.1314912642459234,
      "grad_norm": 0.5722796320915222,
      "learning_rate": 7.78799839222999e-06,
      "loss": 0.028,
      "step": 691400
    },
    {
      "epoch": 1.1315239946845768,
      "grad_norm": 0.8843710422515869,
      "learning_rate": 7.787932500016474e-06,
      "loss": 0.0311,
      "step": 691420
    },
    {
      "epoch": 1.1315567251232301,
      "grad_norm": 0.6603702306747437,
      "learning_rate": 7.787866607802956e-06,
      "loss": 0.0235,
      "step": 691440
    },
    {
      "epoch": 1.1315894555618835,
      "grad_norm": 0.8570836782455444,
      "learning_rate": 7.78780071558944e-06,
      "loss": 0.0389,
      "step": 691460
    },
    {
      "epoch": 1.1316221860005369,
      "grad_norm": 0.21031469106674194,
      "learning_rate": 7.787734823375921e-06,
      "loss": 0.0211,
      "step": 691480
    },
    {
      "epoch": 1.13165491643919,
      "grad_norm": 0.23420268297195435,
      "learning_rate": 7.787668931162405e-06,
      "loss": 0.0274,
      "step": 691500
    },
    {
      "epoch": 1.1316876468778434,
      "grad_norm": 0.5956542491912842,
      "learning_rate": 7.787603038948887e-06,
      "loss": 0.0226,
      "step": 691520
    },
    {
      "epoch": 1.1317203773164968,
      "grad_norm": 0.2718825936317444,
      "learning_rate": 7.78753714673537e-06,
      "loss": 0.0226,
      "step": 691540
    },
    {
      "epoch": 1.1317531077551501,
      "grad_norm": 1.863285779953003,
      "learning_rate": 7.787471254521854e-06,
      "loss": 0.0348,
      "step": 691560
    },
    {
      "epoch": 1.1317858381938035,
      "grad_norm": 5.350112438201904,
      "learning_rate": 7.787405362308336e-06,
      "loss": 0.0175,
      "step": 691580
    },
    {
      "epoch": 1.131818568632457,
      "grad_norm": 1.2261877059936523,
      "learning_rate": 7.78733947009482e-06,
      "loss": 0.036,
      "step": 691600
    },
    {
      "epoch": 1.1318512990711103,
      "grad_norm": 2.2529351711273193,
      "learning_rate": 7.787273577881303e-06,
      "loss": 0.0179,
      "step": 691620
    },
    {
      "epoch": 1.1318840295097634,
      "grad_norm": 2.3169424533843994,
      "learning_rate": 7.787207685667785e-06,
      "loss": 0.0389,
      "step": 691640
    },
    {
      "epoch": 1.1319167599484168,
      "grad_norm": 1.1860841512680054,
      "learning_rate": 7.787141793454269e-06,
      "loss": 0.0227,
      "step": 691660
    },
    {
      "epoch": 1.1319494903870702,
      "grad_norm": 1.127515196800232,
      "learning_rate": 7.787075901240752e-06,
      "loss": 0.0203,
      "step": 691680
    },
    {
      "epoch": 1.1319822208257235,
      "grad_norm": 1.5540822744369507,
      "learning_rate": 7.787010009027234e-06,
      "loss": 0.0337,
      "step": 691700
    },
    {
      "epoch": 1.132014951264377,
      "grad_norm": 1.0738780498504639,
      "learning_rate": 7.786944116813718e-06,
      "loss": 0.0308,
      "step": 691720
    },
    {
      "epoch": 1.1320476817030303,
      "grad_norm": 1.1832021474838257,
      "learning_rate": 7.7868782246002e-06,
      "loss": 0.0259,
      "step": 691740
    },
    {
      "epoch": 1.1320804121416836,
      "grad_norm": 0.9860530495643616,
      "learning_rate": 7.786812332386683e-06,
      "loss": 0.0181,
      "step": 691760
    },
    {
      "epoch": 1.1321131425803368,
      "grad_norm": 0.4459589123725891,
      "learning_rate": 7.786746440173165e-06,
      "loss": 0.0207,
      "step": 691780
    },
    {
      "epoch": 1.1321458730189902,
      "grad_norm": 0.23575395345687866,
      "learning_rate": 7.786680547959649e-06,
      "loss": 0.0255,
      "step": 691800
    },
    {
      "epoch": 1.1321786034576435,
      "grad_norm": 1.4718031883239746,
      "learning_rate": 7.78661465574613e-06,
      "loss": 0.0259,
      "step": 691820
    },
    {
      "epoch": 1.132211333896297,
      "grad_norm": 0.21712279319763184,
      "learning_rate": 7.786548763532614e-06,
      "loss": 0.0256,
      "step": 691840
    },
    {
      "epoch": 1.1322440643349503,
      "grad_norm": 0.6294076442718506,
      "learning_rate": 7.786482871319096e-06,
      "loss": 0.0197,
      "step": 691860
    },
    {
      "epoch": 1.1322767947736034,
      "grad_norm": 0.7999088168144226,
      "learning_rate": 7.78641697910558e-06,
      "loss": 0.0305,
      "step": 691880
    },
    {
      "epoch": 1.1323095252122568,
      "grad_norm": 0.5188127756118774,
      "learning_rate": 7.786351086892062e-06,
      "loss": 0.0217,
      "step": 691900
    },
    {
      "epoch": 1.1323422556509102,
      "grad_norm": 0.9602598547935486,
      "learning_rate": 7.786285194678545e-06,
      "loss": 0.0294,
      "step": 691920
    },
    {
      "epoch": 1.1323749860895636,
      "grad_norm": 1.5147825479507446,
      "learning_rate": 7.786219302465029e-06,
      "loss": 0.0235,
      "step": 691940
    },
    {
      "epoch": 1.132407716528217,
      "grad_norm": 0.535331130027771,
      "learning_rate": 7.78615341025151e-06,
      "loss": 0.0242,
      "step": 691960
    },
    {
      "epoch": 1.1324404469668703,
      "grad_norm": 0.5838499665260315,
      "learning_rate": 7.786087518037994e-06,
      "loss": 0.0235,
      "step": 691980
    },
    {
      "epoch": 1.1324731774055237,
      "grad_norm": 1.6352936029434204,
      "learning_rate": 7.786021625824478e-06,
      "loss": 0.0215,
      "step": 692000
    },
    {
      "epoch": 1.1325059078441768,
      "grad_norm": 1.0486624240875244,
      "learning_rate": 7.78595573361096e-06,
      "loss": 0.0209,
      "step": 692020
    },
    {
      "epoch": 1.1325386382828302,
      "grad_norm": 0.6673727035522461,
      "learning_rate": 7.785889841397443e-06,
      "loss": 0.0194,
      "step": 692040
    },
    {
      "epoch": 1.1325713687214836,
      "grad_norm": 1.0672564506530762,
      "learning_rate": 7.785823949183927e-06,
      "loss": 0.0248,
      "step": 692060
    },
    {
      "epoch": 1.132604099160137,
      "grad_norm": 0.8631083965301514,
      "learning_rate": 7.785758056970409e-06,
      "loss": 0.0185,
      "step": 692080
    },
    {
      "epoch": 1.1326368295987903,
      "grad_norm": 0.5539960265159607,
      "learning_rate": 7.785692164756892e-06,
      "loss": 0.0166,
      "step": 692100
    },
    {
      "epoch": 1.1326695600374437,
      "grad_norm": 0.45577472448349,
      "learning_rate": 7.785626272543374e-06,
      "loss": 0.0172,
      "step": 692120
    },
    {
      "epoch": 1.132702290476097,
      "grad_norm": 0.19194228947162628,
      "learning_rate": 7.785560380329858e-06,
      "loss": 0.0223,
      "step": 692140
    },
    {
      "epoch": 1.1327350209147502,
      "grad_norm": 0.745024561882019,
      "learning_rate": 7.78549448811634e-06,
      "loss": 0.0246,
      "step": 692160
    },
    {
      "epoch": 1.1327677513534036,
      "grad_norm": 0.7798776626586914,
      "learning_rate": 7.785428595902823e-06,
      "loss": 0.0354,
      "step": 692180
    },
    {
      "epoch": 1.132800481792057,
      "grad_norm": 0.39740023016929626,
      "learning_rate": 7.785362703689305e-06,
      "loss": 0.0181,
      "step": 692200
    },
    {
      "epoch": 1.1328332122307103,
      "grad_norm": 0.2734125256538391,
      "learning_rate": 7.785296811475789e-06,
      "loss": 0.0243,
      "step": 692220
    },
    {
      "epoch": 1.1328659426693637,
      "grad_norm": 1.15607750415802,
      "learning_rate": 7.78523091926227e-06,
      "loss": 0.0263,
      "step": 692240
    },
    {
      "epoch": 1.132898673108017,
      "grad_norm": 0.36471351981163025,
      "learning_rate": 7.785165027048754e-06,
      "loss": 0.0238,
      "step": 692260
    },
    {
      "epoch": 1.1329314035466704,
      "grad_norm": 0.7226423025131226,
      "learning_rate": 7.785099134835236e-06,
      "loss": 0.0424,
      "step": 692280
    },
    {
      "epoch": 1.1329641339853236,
      "grad_norm": 0.1252952665090561,
      "learning_rate": 7.78503324262172e-06,
      "loss": 0.0247,
      "step": 692300
    },
    {
      "epoch": 1.132996864423977,
      "grad_norm": 1.0482078790664673,
      "learning_rate": 7.784967350408202e-06,
      "loss": 0.0265,
      "step": 692320
    },
    {
      "epoch": 1.1330295948626303,
      "grad_norm": 0.4144911468029022,
      "learning_rate": 7.784901458194685e-06,
      "loss": 0.0238,
      "step": 692340
    },
    {
      "epoch": 1.1330623253012837,
      "grad_norm": 0.3091638684272766,
      "learning_rate": 7.784835565981169e-06,
      "loss": 0.0173,
      "step": 692360
    },
    {
      "epoch": 1.133095055739937,
      "grad_norm": 0.6474681496620178,
      "learning_rate": 7.78476967376765e-06,
      "loss": 0.0174,
      "step": 692380
    },
    {
      "epoch": 1.1331277861785904,
      "grad_norm": 0.4647411108016968,
      "learning_rate": 7.784703781554134e-06,
      "loss": 0.0252,
      "step": 692400
    },
    {
      "epoch": 1.1331605166172438,
      "grad_norm": 0.4091298282146454,
      "learning_rate": 7.784637889340618e-06,
      "loss": 0.0172,
      "step": 692420
    },
    {
      "epoch": 1.133193247055897,
      "grad_norm": 0.46502411365509033,
      "learning_rate": 7.7845719971271e-06,
      "loss": 0.0308,
      "step": 692440
    },
    {
      "epoch": 1.1332259774945503,
      "grad_norm": 0.9551170468330383,
      "learning_rate": 7.784506104913583e-06,
      "loss": 0.0142,
      "step": 692460
    },
    {
      "epoch": 1.1332587079332037,
      "grad_norm": 0.4661473035812378,
      "learning_rate": 7.784440212700067e-06,
      "loss": 0.036,
      "step": 692480
    },
    {
      "epoch": 1.133291438371857,
      "grad_norm": 1.8705099821090698,
      "learning_rate": 7.784374320486549e-06,
      "loss": 0.0207,
      "step": 692500
    },
    {
      "epoch": 1.1333241688105105,
      "grad_norm": 0.9218350648880005,
      "learning_rate": 7.784308428273032e-06,
      "loss": 0.0259,
      "step": 692520
    },
    {
      "epoch": 1.1333568992491638,
      "grad_norm": 0.5182821750640869,
      "learning_rate": 7.784242536059514e-06,
      "loss": 0.027,
      "step": 692540
    },
    {
      "epoch": 1.133389629687817,
      "grad_norm": 0.44202712178230286,
      "learning_rate": 7.784176643845998e-06,
      "loss": 0.021,
      "step": 692560
    },
    {
      "epoch": 1.1334223601264704,
      "grad_norm": 1.444995641708374,
      "learning_rate": 7.78411075163248e-06,
      "loss": 0.0323,
      "step": 692580
    },
    {
      "epoch": 1.1334550905651237,
      "grad_norm": 0.4790346026420593,
      "learning_rate": 7.784044859418963e-06,
      "loss": 0.0232,
      "step": 692600
    },
    {
      "epoch": 1.133487821003777,
      "grad_norm": 2.4277985095977783,
      "learning_rate": 7.783978967205445e-06,
      "loss": 0.023,
      "step": 692620
    },
    {
      "epoch": 1.1335205514424305,
      "grad_norm": 1.4170840978622437,
      "learning_rate": 7.783913074991929e-06,
      "loss": 0.0257,
      "step": 692640
    },
    {
      "epoch": 1.1335532818810838,
      "grad_norm": 1.4842405319213867,
      "learning_rate": 7.78384718277841e-06,
      "loss": 0.0158,
      "step": 692660
    },
    {
      "epoch": 1.133586012319737,
      "grad_norm": 0.34559834003448486,
      "learning_rate": 7.783781290564894e-06,
      "loss": 0.0244,
      "step": 692680
    },
    {
      "epoch": 1.1336187427583904,
      "grad_norm": 0.3784558176994324,
      "learning_rate": 7.783715398351376e-06,
      "loss": 0.0198,
      "step": 692700
    },
    {
      "epoch": 1.1336514731970437,
      "grad_norm": 0.7797497510910034,
      "learning_rate": 7.78364950613786e-06,
      "loss": 0.0262,
      "step": 692720
    },
    {
      "epoch": 1.133684203635697,
      "grad_norm": 0.08658910542726517,
      "learning_rate": 7.783583613924343e-06,
      "loss": 0.0229,
      "step": 692740
    },
    {
      "epoch": 1.1337169340743505,
      "grad_norm": 1.0418932437896729,
      "learning_rate": 7.783517721710825e-06,
      "loss": 0.0221,
      "step": 692760
    },
    {
      "epoch": 1.1337496645130039,
      "grad_norm": 0.9825254082679749,
      "learning_rate": 7.783451829497309e-06,
      "loss": 0.0243,
      "step": 692780
    },
    {
      "epoch": 1.1337823949516572,
      "grad_norm": 0.7449606657028198,
      "learning_rate": 7.783385937283792e-06,
      "loss": 0.0265,
      "step": 692800
    },
    {
      "epoch": 1.1338151253903104,
      "grad_norm": 0.6740395426750183,
      "learning_rate": 7.783320045070274e-06,
      "loss": 0.0255,
      "step": 692820
    },
    {
      "epoch": 1.1338478558289637,
      "grad_norm": 3.6652045249938965,
      "learning_rate": 7.783254152856758e-06,
      "loss": 0.0223,
      "step": 692840
    },
    {
      "epoch": 1.1338805862676171,
      "grad_norm": 0.5677798986434937,
      "learning_rate": 7.783188260643242e-06,
      "loss": 0.0187,
      "step": 692860
    },
    {
      "epoch": 1.1339133167062705,
      "grad_norm": 0.9098156690597534,
      "learning_rate": 7.783122368429723e-06,
      "loss": 0.0299,
      "step": 692880
    },
    {
      "epoch": 1.1339460471449239,
      "grad_norm": 0.09754163771867752,
      "learning_rate": 7.783056476216207e-06,
      "loss": 0.0182,
      "step": 692900
    },
    {
      "epoch": 1.1339787775835772,
      "grad_norm": 1.5552847385406494,
      "learning_rate": 7.782990584002689e-06,
      "loss": 0.0235,
      "step": 692920
    },
    {
      "epoch": 1.1340115080222306,
      "grad_norm": 0.637622594833374,
      "learning_rate": 7.782924691789172e-06,
      "loss": 0.0292,
      "step": 692940
    },
    {
      "epoch": 1.1340442384608838,
      "grad_norm": 0.6622380614280701,
      "learning_rate": 7.782858799575654e-06,
      "loss": 0.0318,
      "step": 692960
    },
    {
      "epoch": 1.1340769688995371,
      "grad_norm": 0.4634447395801544,
      "learning_rate": 7.782792907362138e-06,
      "loss": 0.0163,
      "step": 692980
    },
    {
      "epoch": 1.1341096993381905,
      "grad_norm": 1.6877943277359009,
      "learning_rate": 7.78272701514862e-06,
      "loss": 0.019,
      "step": 693000
    },
    {
      "epoch": 1.1341424297768439,
      "grad_norm": 2.474681854248047,
      "learning_rate": 7.782661122935103e-06,
      "loss": 0.0212,
      "step": 693020
    },
    {
      "epoch": 1.1341751602154972,
      "grad_norm": 0.39104294776916504,
      "learning_rate": 7.782595230721585e-06,
      "loss": 0.0231,
      "step": 693040
    },
    {
      "epoch": 1.1342078906541506,
      "grad_norm": 0.9250587821006775,
      "learning_rate": 7.782529338508069e-06,
      "loss": 0.0213,
      "step": 693060
    },
    {
      "epoch": 1.134240621092804,
      "grad_norm": 0.43401843309402466,
      "learning_rate": 7.782463446294553e-06,
      "loss": 0.0255,
      "step": 693080
    },
    {
      "epoch": 1.1342733515314571,
      "grad_norm": 0.4162486791610718,
      "learning_rate": 7.782397554081034e-06,
      "loss": 0.0251,
      "step": 693100
    },
    {
      "epoch": 1.1343060819701105,
      "grad_norm": 1.2622004747390747,
      "learning_rate": 7.782331661867518e-06,
      "loss": 0.0236,
      "step": 693120
    },
    {
      "epoch": 1.1343388124087639,
      "grad_norm": 0.45401209592819214,
      "learning_rate": 7.782265769654e-06,
      "loss": 0.0222,
      "step": 693140
    },
    {
      "epoch": 1.1343715428474173,
      "grad_norm": 1.9498255252838135,
      "learning_rate": 7.782199877440483e-06,
      "loss": 0.0195,
      "step": 693160
    },
    {
      "epoch": 1.1344042732860706,
      "grad_norm": 1.276021122932434,
      "learning_rate": 7.782133985226965e-06,
      "loss": 0.0243,
      "step": 693180
    },
    {
      "epoch": 1.134437003724724,
      "grad_norm": 0.6271718144416809,
      "learning_rate": 7.782068093013449e-06,
      "loss": 0.0356,
      "step": 693200
    },
    {
      "epoch": 1.1344697341633774,
      "grad_norm": 0.35169416666030884,
      "learning_rate": 7.782002200799933e-06,
      "loss": 0.0191,
      "step": 693220
    },
    {
      "epoch": 1.1345024646020305,
      "grad_norm": 1.8576343059539795,
      "learning_rate": 7.781936308586414e-06,
      "loss": 0.0217,
      "step": 693240
    },
    {
      "epoch": 1.134535195040684,
      "grad_norm": 0.4461938440799713,
      "learning_rate": 7.781870416372898e-06,
      "loss": 0.0236,
      "step": 693260
    },
    {
      "epoch": 1.1345679254793373,
      "grad_norm": 0.44789162278175354,
      "learning_rate": 7.781804524159382e-06,
      "loss": 0.0242,
      "step": 693280
    },
    {
      "epoch": 1.1346006559179906,
      "grad_norm": 0.19690053164958954,
      "learning_rate": 7.781738631945864e-06,
      "loss": 0.021,
      "step": 693300
    },
    {
      "epoch": 1.134633386356644,
      "grad_norm": 6.431909561157227,
      "learning_rate": 7.781672739732347e-06,
      "loss": 0.0366,
      "step": 693320
    },
    {
      "epoch": 1.1346661167952974,
      "grad_norm": 0.2963044345378876,
      "learning_rate": 7.781606847518829e-06,
      "loss": 0.0269,
      "step": 693340
    },
    {
      "epoch": 1.1346988472339505,
      "grad_norm": 0.9711751341819763,
      "learning_rate": 7.781540955305313e-06,
      "loss": 0.0229,
      "step": 693360
    },
    {
      "epoch": 1.134731577672604,
      "grad_norm": 0.3006758987903595,
      "learning_rate": 7.781475063091794e-06,
      "loss": 0.0283,
      "step": 693380
    },
    {
      "epoch": 1.1347643081112573,
      "grad_norm": 0.992976188659668,
      "learning_rate": 7.781409170878278e-06,
      "loss": 0.0195,
      "step": 693400
    },
    {
      "epoch": 1.1347970385499107,
      "grad_norm": 0.22160735726356506,
      "learning_rate": 7.781343278664762e-06,
      "loss": 0.0255,
      "step": 693420
    },
    {
      "epoch": 1.134829768988564,
      "grad_norm": 7.1996941566467285,
      "learning_rate": 7.781277386451244e-06,
      "loss": 0.0248,
      "step": 693440
    },
    {
      "epoch": 1.1348624994272174,
      "grad_norm": 1.974610447883606,
      "learning_rate": 7.781211494237727e-06,
      "loss": 0.015,
      "step": 693460
    },
    {
      "epoch": 1.1348952298658705,
      "grad_norm": 2.0445716381073,
      "learning_rate": 7.781145602024209e-06,
      "loss": 0.0241,
      "step": 693480
    },
    {
      "epoch": 1.134927960304524,
      "grad_norm": 0.2646627128124237,
      "learning_rate": 7.781079709810693e-06,
      "loss": 0.0174,
      "step": 693500
    },
    {
      "epoch": 1.1349606907431773,
      "grad_norm": 0.373543918132782,
      "learning_rate": 7.781013817597174e-06,
      "loss": 0.0203,
      "step": 693520
    },
    {
      "epoch": 1.1349934211818307,
      "grad_norm": 1.0119554996490479,
      "learning_rate": 7.780947925383658e-06,
      "loss": 0.0229,
      "step": 693540
    },
    {
      "epoch": 1.135026151620484,
      "grad_norm": 0.8143146634101868,
      "learning_rate": 7.78088203317014e-06,
      "loss": 0.0244,
      "step": 693560
    },
    {
      "epoch": 1.1350588820591374,
      "grad_norm": 1.1230802536010742,
      "learning_rate": 7.780816140956624e-06,
      "loss": 0.0332,
      "step": 693580
    },
    {
      "epoch": 1.1350916124977908,
      "grad_norm": 0.5861796736717224,
      "learning_rate": 7.780750248743107e-06,
      "loss": 0.0272,
      "step": 693600
    },
    {
      "epoch": 1.135124342936444,
      "grad_norm": 1.4284051656723022,
      "learning_rate": 7.780684356529589e-06,
      "loss": 0.0349,
      "step": 693620
    },
    {
      "epoch": 1.1351570733750973,
      "grad_norm": 0.9915955066680908,
      "learning_rate": 7.780618464316073e-06,
      "loss": 0.0252,
      "step": 693640
    },
    {
      "epoch": 1.1351898038137507,
      "grad_norm": 0.24509014189243317,
      "learning_rate": 7.780552572102556e-06,
      "loss": 0.0289,
      "step": 693660
    },
    {
      "epoch": 1.135222534252404,
      "grad_norm": 0.7375462055206299,
      "learning_rate": 7.780486679889038e-06,
      "loss": 0.0298,
      "step": 693680
    },
    {
      "epoch": 1.1352552646910574,
      "grad_norm": 0.7669941782951355,
      "learning_rate": 7.780420787675522e-06,
      "loss": 0.0201,
      "step": 693700
    },
    {
      "epoch": 1.1352879951297108,
      "grad_norm": 0.45100441575050354,
      "learning_rate": 7.780354895462004e-06,
      "loss": 0.0221,
      "step": 693720
    },
    {
      "epoch": 1.1353207255683642,
      "grad_norm": 0.2568055987358093,
      "learning_rate": 7.780289003248487e-06,
      "loss": 0.0212,
      "step": 693740
    },
    {
      "epoch": 1.1353534560070173,
      "grad_norm": 0.2960892617702484,
      "learning_rate": 7.78022311103497e-06,
      "loss": 0.0197,
      "step": 693760
    },
    {
      "epoch": 1.1353861864456707,
      "grad_norm": 0.7908515930175781,
      "learning_rate": 7.780157218821453e-06,
      "loss": 0.0209,
      "step": 693780
    },
    {
      "epoch": 1.135418916884324,
      "grad_norm": 0.6519663333892822,
      "learning_rate": 7.780091326607936e-06,
      "loss": 0.0263,
      "step": 693800
    },
    {
      "epoch": 1.1354516473229774,
      "grad_norm": 0.2940586805343628,
      "learning_rate": 7.780025434394418e-06,
      "loss": 0.0192,
      "step": 693820
    },
    {
      "epoch": 1.1354843777616308,
      "grad_norm": 0.33104658126831055,
      "learning_rate": 7.779959542180902e-06,
      "loss": 0.0242,
      "step": 693840
    },
    {
      "epoch": 1.1355171082002842,
      "grad_norm": 1.0822595357894897,
      "learning_rate": 7.779893649967384e-06,
      "loss": 0.0387,
      "step": 693860
    },
    {
      "epoch": 1.1355498386389375,
      "grad_norm": 0.9877492189407349,
      "learning_rate": 7.779827757753867e-06,
      "loss": 0.0273,
      "step": 693880
    },
    {
      "epoch": 1.1355825690775907,
      "grad_norm": 0.4474756121635437,
      "learning_rate": 7.779761865540349e-06,
      "loss": 0.0215,
      "step": 693900
    },
    {
      "epoch": 1.135615299516244,
      "grad_norm": 0.3178412914276123,
      "learning_rate": 7.779695973326833e-06,
      "loss": 0.0225,
      "step": 693920
    },
    {
      "epoch": 1.1356480299548974,
      "grad_norm": 1.1193490028381348,
      "learning_rate": 7.779630081113315e-06,
      "loss": 0.0237,
      "step": 693940
    },
    {
      "epoch": 1.1356807603935508,
      "grad_norm": 0.4028724133968353,
      "learning_rate": 7.779564188899798e-06,
      "loss": 0.0195,
      "step": 693960
    },
    {
      "epoch": 1.1357134908322042,
      "grad_norm": 0.5521103143692017,
      "learning_rate": 7.77949829668628e-06,
      "loss": 0.027,
      "step": 693980
    },
    {
      "epoch": 1.1357462212708576,
      "grad_norm": 0.511195957660675,
      "learning_rate": 7.779432404472764e-06,
      "loss": 0.0235,
      "step": 694000
    },
    {
      "epoch": 1.135778951709511,
      "grad_norm": 0.4483940303325653,
      "learning_rate": 7.779366512259247e-06,
      "loss": 0.0183,
      "step": 694020
    },
    {
      "epoch": 1.135811682148164,
      "grad_norm": 0.7139689326286316,
      "learning_rate": 7.779300620045729e-06,
      "loss": 0.0354,
      "step": 694040
    },
    {
      "epoch": 1.1358444125868175,
      "grad_norm": 0.13970302045345306,
      "learning_rate": 7.779234727832213e-06,
      "loss": 0.0254,
      "step": 694060
    },
    {
      "epoch": 1.1358771430254708,
      "grad_norm": 0.3680480122566223,
      "learning_rate": 7.779168835618696e-06,
      "loss": 0.027,
      "step": 694080
    },
    {
      "epoch": 1.1359098734641242,
      "grad_norm": 2.256429433822632,
      "learning_rate": 7.779102943405178e-06,
      "loss": 0.0241,
      "step": 694100
    },
    {
      "epoch": 1.1359426039027776,
      "grad_norm": 0.32499533891677856,
      "learning_rate": 7.779037051191662e-06,
      "loss": 0.0193,
      "step": 694120
    },
    {
      "epoch": 1.1359753343414307,
      "grad_norm": 0.06726345419883728,
      "learning_rate": 7.778971158978145e-06,
      "loss": 0.021,
      "step": 694140
    },
    {
      "epoch": 1.136008064780084,
      "grad_norm": 0.23102900385856628,
      "learning_rate": 7.778905266764627e-06,
      "loss": 0.0191,
      "step": 694160
    },
    {
      "epoch": 1.1360407952187375,
      "grad_norm": 0.30550745129585266,
      "learning_rate": 7.77883937455111e-06,
      "loss": 0.0166,
      "step": 694180
    },
    {
      "epoch": 1.1360735256573908,
      "grad_norm": 0.2540837824344635,
      "learning_rate": 7.778773482337593e-06,
      "loss": 0.0228,
      "step": 694200
    },
    {
      "epoch": 1.1361062560960442,
      "grad_norm": 0.7130521535873413,
      "learning_rate": 7.778707590124076e-06,
      "loss": 0.0179,
      "step": 694220
    },
    {
      "epoch": 1.1361389865346976,
      "grad_norm": 2.1182453632354736,
      "learning_rate": 7.778641697910558e-06,
      "loss": 0.0208,
      "step": 694240
    },
    {
      "epoch": 1.136171716973351,
      "grad_norm": 0.4595251679420471,
      "learning_rate": 7.778575805697042e-06,
      "loss": 0.0289,
      "step": 694260
    },
    {
      "epoch": 1.136204447412004,
      "grad_norm": 1.5507817268371582,
      "learning_rate": 7.778509913483524e-06,
      "loss": 0.0307,
      "step": 694280
    },
    {
      "epoch": 1.1362371778506575,
      "grad_norm": 0.724241316318512,
      "learning_rate": 7.778444021270007e-06,
      "loss": 0.0193,
      "step": 694300
    },
    {
      "epoch": 1.1362699082893108,
      "grad_norm": 0.5160343050956726,
      "learning_rate": 7.77837812905649e-06,
      "loss": 0.0222,
      "step": 694320
    },
    {
      "epoch": 1.1363026387279642,
      "grad_norm": 0.3775683343410492,
      "learning_rate": 7.778312236842973e-06,
      "loss": 0.0181,
      "step": 694340
    },
    {
      "epoch": 1.1363353691666176,
      "grad_norm": 0.4315921664237976,
      "learning_rate": 7.778246344629455e-06,
      "loss": 0.022,
      "step": 694360
    },
    {
      "epoch": 1.136368099605271,
      "grad_norm": 1.0152170658111572,
      "learning_rate": 7.778180452415938e-06,
      "loss": 0.0199,
      "step": 694380
    },
    {
      "epoch": 1.1364008300439243,
      "grad_norm": 0.8047383427619934,
      "learning_rate": 7.778114560202422e-06,
      "loss": 0.0184,
      "step": 694400
    },
    {
      "epoch": 1.1364335604825775,
      "grad_norm": 0.3414844274520874,
      "learning_rate": 7.778048667988904e-06,
      "loss": 0.0205,
      "step": 694420
    },
    {
      "epoch": 1.1364662909212309,
      "grad_norm": 0.9676450490951538,
      "learning_rate": 7.777982775775387e-06,
      "loss": 0.0245,
      "step": 694440
    },
    {
      "epoch": 1.1364990213598842,
      "grad_norm": 0.5585052967071533,
      "learning_rate": 7.777916883561871e-06,
      "loss": 0.0221,
      "step": 694460
    },
    {
      "epoch": 1.1365317517985376,
      "grad_norm": 0.7702759504318237,
      "learning_rate": 7.777850991348353e-06,
      "loss": 0.0146,
      "step": 694480
    },
    {
      "epoch": 1.136564482237191,
      "grad_norm": 0.11700642108917236,
      "learning_rate": 7.777785099134836e-06,
      "loss": 0.0194,
      "step": 694500
    },
    {
      "epoch": 1.1365972126758443,
      "grad_norm": 0.6300763487815857,
      "learning_rate": 7.77771920692132e-06,
      "loss": 0.0214,
      "step": 694520
    },
    {
      "epoch": 1.1366299431144977,
      "grad_norm": 0.7178372144699097,
      "learning_rate": 7.777653314707802e-06,
      "loss": 0.0297,
      "step": 694540
    },
    {
      "epoch": 1.1366626735531509,
      "grad_norm": 1.4783298969268799,
      "learning_rate": 7.777587422494285e-06,
      "loss": 0.0293,
      "step": 694560
    },
    {
      "epoch": 1.1366954039918042,
      "grad_norm": 1.0794572830200195,
      "learning_rate": 7.777521530280767e-06,
      "loss": 0.0288,
      "step": 694580
    },
    {
      "epoch": 1.1367281344304576,
      "grad_norm": 0.25694403052330017,
      "learning_rate": 7.777455638067251e-06,
      "loss": 0.0186,
      "step": 694600
    },
    {
      "epoch": 1.136760864869111,
      "grad_norm": 0.415821373462677,
      "learning_rate": 7.777389745853733e-06,
      "loss": 0.0278,
      "step": 694620
    },
    {
      "epoch": 1.1367935953077644,
      "grad_norm": 1.0930557250976562,
      "learning_rate": 7.777323853640216e-06,
      "loss": 0.0196,
      "step": 694640
    },
    {
      "epoch": 1.1368263257464177,
      "grad_norm": 0.5375450849533081,
      "learning_rate": 7.777257961426698e-06,
      "loss": 0.0204,
      "step": 694660
    },
    {
      "epoch": 1.136859056185071,
      "grad_norm": 0.2491825670003891,
      "learning_rate": 7.777192069213182e-06,
      "loss": 0.0211,
      "step": 694680
    },
    {
      "epoch": 1.1368917866237243,
      "grad_norm": 1.6516847610473633,
      "learning_rate": 7.777126176999664e-06,
      "loss": 0.0224,
      "step": 694700
    },
    {
      "epoch": 1.1369245170623776,
      "grad_norm": 0.381071537733078,
      "learning_rate": 7.777060284786147e-06,
      "loss": 0.0188,
      "step": 694720
    },
    {
      "epoch": 1.136957247501031,
      "grad_norm": 1.3734228610992432,
      "learning_rate": 7.77699439257263e-06,
      "loss": 0.0292,
      "step": 694740
    },
    {
      "epoch": 1.1369899779396844,
      "grad_norm": 0.9531340003013611,
      "learning_rate": 7.776928500359113e-06,
      "loss": 0.0247,
      "step": 694760
    },
    {
      "epoch": 1.1370227083783377,
      "grad_norm": 0.46936461329460144,
      "learning_rate": 7.776862608145596e-06,
      "loss": 0.0244,
      "step": 694780
    },
    {
      "epoch": 1.1370554388169911,
      "grad_norm": 0.5869274735450745,
      "learning_rate": 7.776796715932078e-06,
      "loss": 0.023,
      "step": 694800
    },
    {
      "epoch": 1.1370881692556445,
      "grad_norm": 0.15941517055034637,
      "learning_rate": 7.776730823718562e-06,
      "loss": 0.0166,
      "step": 694820
    },
    {
      "epoch": 1.1371208996942976,
      "grad_norm": 1.417378306388855,
      "learning_rate": 7.776664931505045e-06,
      "loss": 0.0253,
      "step": 694840
    },
    {
      "epoch": 1.137153630132951,
      "grad_norm": 0.44349879026412964,
      "learning_rate": 7.776599039291527e-06,
      "loss": 0.0184,
      "step": 694860
    },
    {
      "epoch": 1.1371863605716044,
      "grad_norm": 0.3001776337623596,
      "learning_rate": 7.776533147078011e-06,
      "loss": 0.0242,
      "step": 694880
    },
    {
      "epoch": 1.1372190910102578,
      "grad_norm": 1.0750248432159424,
      "learning_rate": 7.776467254864495e-06,
      "loss": 0.0255,
      "step": 694900
    },
    {
      "epoch": 1.1372518214489111,
      "grad_norm": 2.535641670227051,
      "learning_rate": 7.776401362650976e-06,
      "loss": 0.0251,
      "step": 694920
    },
    {
      "epoch": 1.1372845518875643,
      "grad_norm": 0.3034467101097107,
      "learning_rate": 7.77633547043746e-06,
      "loss": 0.0213,
      "step": 694940
    },
    {
      "epoch": 1.1373172823262176,
      "grad_norm": 0.33989056944847107,
      "learning_rate": 7.776269578223942e-06,
      "loss": 0.0192,
      "step": 694960
    },
    {
      "epoch": 1.137350012764871,
      "grad_norm": 0.667218804359436,
      "learning_rate": 7.776203686010425e-06,
      "loss": 0.0259,
      "step": 694980
    },
    {
      "epoch": 1.1373827432035244,
      "grad_norm": 0.34728115797042847,
      "learning_rate": 7.776137793796907e-06,
      "loss": 0.0173,
      "step": 695000
    },
    {
      "epoch": 1.1374154736421778,
      "grad_norm": 0.9807868003845215,
      "learning_rate": 7.776071901583391e-06,
      "loss": 0.024,
      "step": 695020
    },
    {
      "epoch": 1.1374482040808311,
      "grad_norm": 0.3219339847564697,
      "learning_rate": 7.776006009369873e-06,
      "loss": 0.0177,
      "step": 695040
    },
    {
      "epoch": 1.1374809345194845,
      "grad_norm": 0.6116460561752319,
      "learning_rate": 7.775940117156356e-06,
      "loss": 0.0236,
      "step": 695060
    },
    {
      "epoch": 1.1375136649581377,
      "grad_norm": 1.0746629238128662,
      "learning_rate": 7.775874224942838e-06,
      "loss": 0.018,
      "step": 695080
    },
    {
      "epoch": 1.137546395396791,
      "grad_norm": 0.43347448110580444,
      "learning_rate": 7.775808332729322e-06,
      "loss": 0.0225,
      "step": 695100
    },
    {
      "epoch": 1.1375791258354444,
      "grad_norm": 1.2694045305252075,
      "learning_rate": 7.775742440515804e-06,
      "loss": 0.0185,
      "step": 695120
    },
    {
      "epoch": 1.1376118562740978,
      "grad_norm": 1.757498025894165,
      "learning_rate": 7.775676548302287e-06,
      "loss": 0.0215,
      "step": 695140
    },
    {
      "epoch": 1.1376445867127511,
      "grad_norm": 0.4850389361381531,
      "learning_rate": 7.77561065608877e-06,
      "loss": 0.0296,
      "step": 695160
    },
    {
      "epoch": 1.1376773171514045,
      "grad_norm": 0.36759063601493835,
      "learning_rate": 7.775544763875253e-06,
      "loss": 0.0276,
      "step": 695180
    },
    {
      "epoch": 1.137710047590058,
      "grad_norm": 0.9315057396888733,
      "learning_rate": 7.775478871661736e-06,
      "loss": 0.0204,
      "step": 695200
    },
    {
      "epoch": 1.137742778028711,
      "grad_norm": 0.7518630623817444,
      "learning_rate": 7.775412979448218e-06,
      "loss": 0.0213,
      "step": 695220
    },
    {
      "epoch": 1.1377755084673644,
      "grad_norm": 0.6198894381523132,
      "learning_rate": 7.775347087234702e-06,
      "loss": 0.0178,
      "step": 695240
    },
    {
      "epoch": 1.1378082389060178,
      "grad_norm": 0.8173847198486328,
      "learning_rate": 7.775281195021186e-06,
      "loss": 0.0284,
      "step": 695260
    },
    {
      "epoch": 1.1378409693446712,
      "grad_norm": 1.2852058410644531,
      "learning_rate": 7.775215302807667e-06,
      "loss": 0.0171,
      "step": 695280
    },
    {
      "epoch": 1.1378736997833245,
      "grad_norm": 0.5542579293251038,
      "learning_rate": 7.775149410594151e-06,
      "loss": 0.0224,
      "step": 695300
    },
    {
      "epoch": 1.137906430221978,
      "grad_norm": 0.766640305519104,
      "learning_rate": 7.775083518380635e-06,
      "loss": 0.0222,
      "step": 695320
    },
    {
      "epoch": 1.1379391606606313,
      "grad_norm": 0.3682163953781128,
      "learning_rate": 7.775017626167117e-06,
      "loss": 0.0231,
      "step": 695340
    },
    {
      "epoch": 1.1379718910992844,
      "grad_norm": 0.8595259189605713,
      "learning_rate": 7.7749517339536e-06,
      "loss": 0.0217,
      "step": 695360
    },
    {
      "epoch": 1.1380046215379378,
      "grad_norm": 0.5061327219009399,
      "learning_rate": 7.774885841740082e-06,
      "loss": 0.0224,
      "step": 695380
    },
    {
      "epoch": 1.1380373519765912,
      "grad_norm": 1.2731399536132812,
      "learning_rate": 7.774819949526566e-06,
      "loss": 0.028,
      "step": 695400
    },
    {
      "epoch": 1.1380700824152445,
      "grad_norm": 1.8562397956848145,
      "learning_rate": 7.774754057313047e-06,
      "loss": 0.0293,
      "step": 695420
    },
    {
      "epoch": 1.138102812853898,
      "grad_norm": 1.3650195598602295,
      "learning_rate": 7.774688165099531e-06,
      "loss": 0.0313,
      "step": 695440
    },
    {
      "epoch": 1.1381355432925513,
      "grad_norm": 0.9670961499214172,
      "learning_rate": 7.774622272886013e-06,
      "loss": 0.0259,
      "step": 695460
    },
    {
      "epoch": 1.1381682737312047,
      "grad_norm": 0.743922233581543,
      "learning_rate": 7.774556380672497e-06,
      "loss": 0.0182,
      "step": 695480
    },
    {
      "epoch": 1.1382010041698578,
      "grad_norm": 1.281172513961792,
      "learning_rate": 7.774490488458978e-06,
      "loss": 0.0262,
      "step": 695500
    },
    {
      "epoch": 1.1382337346085112,
      "grad_norm": 0.9070181846618652,
      "learning_rate": 7.774424596245462e-06,
      "loss": 0.0294,
      "step": 695520
    },
    {
      "epoch": 1.1382664650471646,
      "grad_norm": 0.9249593615531921,
      "learning_rate": 7.774358704031946e-06,
      "loss": 0.0188,
      "step": 695540
    },
    {
      "epoch": 1.138299195485818,
      "grad_norm": 0.621431291103363,
      "learning_rate": 7.774292811818427e-06,
      "loss": 0.0233,
      "step": 695560
    },
    {
      "epoch": 1.1383319259244713,
      "grad_norm": 0.6793684363365173,
      "learning_rate": 7.774226919604911e-06,
      "loss": 0.0313,
      "step": 695580
    },
    {
      "epoch": 1.1383646563631247,
      "grad_norm": 0.6190235018730164,
      "learning_rate": 7.774161027391393e-06,
      "loss": 0.0298,
      "step": 695600
    },
    {
      "epoch": 1.1383973868017778,
      "grad_norm": 0.860844075679779,
      "learning_rate": 7.774095135177877e-06,
      "loss": 0.0163,
      "step": 695620
    },
    {
      "epoch": 1.1384301172404312,
      "grad_norm": 1.5014399290084839,
      "learning_rate": 7.77402924296436e-06,
      "loss": 0.0302,
      "step": 695640
    },
    {
      "epoch": 1.1384628476790846,
      "grad_norm": 0.9354954957962036,
      "learning_rate": 7.773963350750842e-06,
      "loss": 0.0269,
      "step": 695660
    },
    {
      "epoch": 1.138495578117738,
      "grad_norm": 0.48913857340812683,
      "learning_rate": 7.773897458537326e-06,
      "loss": 0.021,
      "step": 695680
    },
    {
      "epoch": 1.1385283085563913,
      "grad_norm": 0.6470220685005188,
      "learning_rate": 7.77383156632381e-06,
      "loss": 0.0204,
      "step": 695700
    },
    {
      "epoch": 1.1385610389950447,
      "grad_norm": 0.2786191999912262,
      "learning_rate": 7.773765674110291e-06,
      "loss": 0.0291,
      "step": 695720
    },
    {
      "epoch": 1.1385937694336978,
      "grad_norm": 0.387496680021286,
      "learning_rate": 7.773699781896775e-06,
      "loss": 0.0233,
      "step": 695740
    },
    {
      "epoch": 1.1386264998723512,
      "grad_norm": 0.7630954384803772,
      "learning_rate": 7.773633889683257e-06,
      "loss": 0.0263,
      "step": 695760
    },
    {
      "epoch": 1.1386592303110046,
      "grad_norm": 0.5355023741722107,
      "learning_rate": 7.77356799746974e-06,
      "loss": 0.0243,
      "step": 695780
    },
    {
      "epoch": 1.138691960749658,
      "grad_norm": 0.2575281858444214,
      "learning_rate": 7.773502105256222e-06,
      "loss": 0.0208,
      "step": 695800
    },
    {
      "epoch": 1.1387246911883113,
      "grad_norm": 0.3810108006000519,
      "learning_rate": 7.773436213042706e-06,
      "loss": 0.0211,
      "step": 695820
    },
    {
      "epoch": 1.1387574216269647,
      "grad_norm": 1.498684048652649,
      "learning_rate": 7.773370320829188e-06,
      "loss": 0.0212,
      "step": 695840
    },
    {
      "epoch": 1.138790152065618,
      "grad_norm": 0.3011878728866577,
      "learning_rate": 7.773304428615671e-06,
      "loss": 0.0215,
      "step": 695860
    },
    {
      "epoch": 1.1388228825042712,
      "grad_norm": 0.13945770263671875,
      "learning_rate": 7.773238536402155e-06,
      "loss": 0.0175,
      "step": 695880
    },
    {
      "epoch": 1.1388556129429246,
      "grad_norm": 2.011812448501587,
      "learning_rate": 7.773172644188637e-06,
      "loss": 0.0183,
      "step": 695900
    },
    {
      "epoch": 1.138888343381578,
      "grad_norm": 0.2216329127550125,
      "learning_rate": 7.77310675197512e-06,
      "loss": 0.0149,
      "step": 695920
    },
    {
      "epoch": 1.1389210738202313,
      "grad_norm": 1.1852471828460693,
      "learning_rate": 7.773040859761602e-06,
      "loss": 0.0212,
      "step": 695940
    },
    {
      "epoch": 1.1389538042588847,
      "grad_norm": 0.273506224155426,
      "learning_rate": 7.772974967548086e-06,
      "loss": 0.0171,
      "step": 695960
    },
    {
      "epoch": 1.138986534697538,
      "grad_norm": 0.5157045125961304,
      "learning_rate": 7.772909075334568e-06,
      "loss": 0.0169,
      "step": 695980
    },
    {
      "epoch": 1.1390192651361914,
      "grad_norm": 1.2388774156570435,
      "learning_rate": 7.772843183121051e-06,
      "loss": 0.0242,
      "step": 696000
    },
    {
      "epoch": 1.1390519955748446,
      "grad_norm": 0.9244533777236938,
      "learning_rate": 7.772777290907533e-06,
      "loss": 0.0257,
      "step": 696020
    },
    {
      "epoch": 1.139084726013498,
      "grad_norm": 0.13896092772483826,
      "learning_rate": 7.772711398694017e-06,
      "loss": 0.0193,
      "step": 696040
    },
    {
      "epoch": 1.1391174564521513,
      "grad_norm": 0.1257075071334839,
      "learning_rate": 7.7726455064805e-06,
      "loss": 0.0122,
      "step": 696060
    },
    {
      "epoch": 1.1391501868908047,
      "grad_norm": 1.5759202241897583,
      "learning_rate": 7.772579614266982e-06,
      "loss": 0.0256,
      "step": 696080
    },
    {
      "epoch": 1.139182917329458,
      "grad_norm": 0.588050127029419,
      "learning_rate": 7.772513722053466e-06,
      "loss": 0.0218,
      "step": 696100
    },
    {
      "epoch": 1.1392156477681115,
      "grad_norm": 0.9624955058097839,
      "learning_rate": 7.77244782983995e-06,
      "loss": 0.0247,
      "step": 696120
    },
    {
      "epoch": 1.1392483782067648,
      "grad_norm": 1.5973789691925049,
      "learning_rate": 7.772381937626431e-06,
      "loss": 0.0267,
      "step": 696140
    },
    {
      "epoch": 1.139281108645418,
      "grad_norm": 0.2667156159877777,
      "learning_rate": 7.772316045412915e-06,
      "loss": 0.021,
      "step": 696160
    },
    {
      "epoch": 1.1393138390840714,
      "grad_norm": 0.5712644457817078,
      "learning_rate": 7.772250153199397e-06,
      "loss": 0.022,
      "step": 696180
    },
    {
      "epoch": 1.1393465695227247,
      "grad_norm": 0.5730403065681458,
      "learning_rate": 7.77218426098588e-06,
      "loss": 0.0239,
      "step": 696200
    },
    {
      "epoch": 1.139379299961378,
      "grad_norm": 0.6549984812736511,
      "learning_rate": 7.772118368772362e-06,
      "loss": 0.0207,
      "step": 696220
    },
    {
      "epoch": 1.1394120304000315,
      "grad_norm": 0.9366998672485352,
      "learning_rate": 7.772052476558846e-06,
      "loss": 0.0314,
      "step": 696240
    },
    {
      "epoch": 1.1394447608386848,
      "grad_norm": 0.46750661730766296,
      "learning_rate": 7.77198658434533e-06,
      "loss": 0.0277,
      "step": 696260
    },
    {
      "epoch": 1.1394774912773382,
      "grad_norm": 0.29366448521614075,
      "learning_rate": 7.771920692131811e-06,
      "loss": 0.0262,
      "step": 696280
    },
    {
      "epoch": 1.1395102217159914,
      "grad_norm": 0.2983919084072113,
      "learning_rate": 7.771854799918295e-06,
      "loss": 0.0235,
      "step": 696300
    },
    {
      "epoch": 1.1395429521546447,
      "grad_norm": 0.6135240793228149,
      "learning_rate": 7.771788907704777e-06,
      "loss": 0.0212,
      "step": 696320
    },
    {
      "epoch": 1.1395756825932981,
      "grad_norm": 0.5273166298866272,
      "learning_rate": 7.77172301549126e-06,
      "loss": 0.0275,
      "step": 696340
    },
    {
      "epoch": 1.1396084130319515,
      "grad_norm": 0.12986770272254944,
      "learning_rate": 7.771657123277742e-06,
      "loss": 0.0258,
      "step": 696360
    },
    {
      "epoch": 1.1396411434706049,
      "grad_norm": 0.13969431817531586,
      "learning_rate": 7.771591231064226e-06,
      "loss": 0.0219,
      "step": 696380
    },
    {
      "epoch": 1.1396738739092582,
      "grad_norm": 0.474365234375,
      "learning_rate": 7.771525338850708e-06,
      "loss": 0.0264,
      "step": 696400
    },
    {
      "epoch": 1.1397066043479114,
      "grad_norm": 0.4864562451839447,
      "learning_rate": 7.771459446637191e-06,
      "loss": 0.0241,
      "step": 696420
    },
    {
      "epoch": 1.1397393347865648,
      "grad_norm": 1.348817229270935,
      "learning_rate": 7.771393554423675e-06,
      "loss": 0.0255,
      "step": 696440
    },
    {
      "epoch": 1.1397720652252181,
      "grad_norm": 0.5508064031600952,
      "learning_rate": 7.771327662210157e-06,
      "loss": 0.0202,
      "step": 696460
    },
    {
      "epoch": 1.1398047956638715,
      "grad_norm": 0.35273268818855286,
      "learning_rate": 7.77126176999664e-06,
      "loss": 0.0191,
      "step": 696480
    },
    {
      "epoch": 1.1398375261025249,
      "grad_norm": 0.21679937839508057,
      "learning_rate": 7.771195877783124e-06,
      "loss": 0.0195,
      "step": 696500
    },
    {
      "epoch": 1.1398702565411782,
      "grad_norm": 1.9975491762161255,
      "learning_rate": 7.771129985569606e-06,
      "loss": 0.0272,
      "step": 696520
    },
    {
      "epoch": 1.1399029869798314,
      "grad_norm": 0.8452656269073486,
      "learning_rate": 7.77106409335609e-06,
      "loss": 0.0187,
      "step": 696540
    },
    {
      "epoch": 1.1399357174184848,
      "grad_norm": 0.7141573429107666,
      "learning_rate": 7.770998201142571e-06,
      "loss": 0.019,
      "step": 696560
    },
    {
      "epoch": 1.1399684478571381,
      "grad_norm": 1.4670042991638184,
      "learning_rate": 7.770932308929055e-06,
      "loss": 0.0241,
      "step": 696580
    },
    {
      "epoch": 1.1400011782957915,
      "grad_norm": 1.8958185911178589,
      "learning_rate": 7.770866416715538e-06,
      "loss": 0.0213,
      "step": 696600
    },
    {
      "epoch": 1.1400339087344449,
      "grad_norm": 0.721409797668457,
      "learning_rate": 7.77080052450202e-06,
      "loss": 0.0233,
      "step": 696620
    },
    {
      "epoch": 1.1400666391730983,
      "grad_norm": 0.19153672456741333,
      "learning_rate": 7.770734632288504e-06,
      "loss": 0.0256,
      "step": 696640
    },
    {
      "epoch": 1.1400993696117516,
      "grad_norm": 0.3483942747116089,
      "learning_rate": 7.770668740074986e-06,
      "loss": 0.0241,
      "step": 696660
    },
    {
      "epoch": 1.1401321000504048,
      "grad_norm": 0.3598012328147888,
      "learning_rate": 7.77060284786147e-06,
      "loss": 0.0227,
      "step": 696680
    },
    {
      "epoch": 1.1401648304890581,
      "grad_norm": 0.361086368560791,
      "learning_rate": 7.770536955647951e-06,
      "loss": 0.0246,
      "step": 696700
    },
    {
      "epoch": 1.1401975609277115,
      "grad_norm": 0.6457740068435669,
      "learning_rate": 7.770471063434435e-06,
      "loss": 0.0232,
      "step": 696720
    },
    {
      "epoch": 1.140230291366365,
      "grad_norm": 0.3550935983657837,
      "learning_rate": 7.770405171220917e-06,
      "loss": 0.0238,
      "step": 696740
    },
    {
      "epoch": 1.1402630218050183,
      "grad_norm": 0.449308305978775,
      "learning_rate": 7.7703392790074e-06,
      "loss": 0.0257,
      "step": 696760
    },
    {
      "epoch": 1.1402957522436716,
      "grad_norm": 0.6854379773139954,
      "learning_rate": 7.770273386793882e-06,
      "loss": 0.0144,
      "step": 696780
    },
    {
      "epoch": 1.140328482682325,
      "grad_norm": 0.26228174567222595,
      "learning_rate": 7.770207494580366e-06,
      "loss": 0.0185,
      "step": 696800
    },
    {
      "epoch": 1.1403612131209782,
      "grad_norm": 0.962059736251831,
      "learning_rate": 7.770141602366848e-06,
      "loss": 0.024,
      "step": 696820
    },
    {
      "epoch": 1.1403939435596315,
      "grad_norm": 0.32555311918258667,
      "learning_rate": 7.770075710153331e-06,
      "loss": 0.0212,
      "step": 696840
    },
    {
      "epoch": 1.140426673998285,
      "grad_norm": 1.1209614276885986,
      "learning_rate": 7.770009817939815e-06,
      "loss": 0.0314,
      "step": 696860
    },
    {
      "epoch": 1.1404594044369383,
      "grad_norm": 0.7734893560409546,
      "learning_rate": 7.769943925726297e-06,
      "loss": 0.0182,
      "step": 696880
    },
    {
      "epoch": 1.1404921348755916,
      "grad_norm": 1.0276764631271362,
      "learning_rate": 7.76987803351278e-06,
      "loss": 0.0279,
      "step": 696900
    },
    {
      "epoch": 1.140524865314245,
      "grad_norm": 1.2347297668457031,
      "learning_rate": 7.769812141299264e-06,
      "loss": 0.0207,
      "step": 696920
    },
    {
      "epoch": 1.1405575957528984,
      "grad_norm": 1.0245519876480103,
      "learning_rate": 7.769746249085748e-06,
      "loss": 0.0215,
      "step": 696940
    },
    {
      "epoch": 1.1405903261915515,
      "grad_norm": 0.46587124466896057,
      "learning_rate": 7.76968035687223e-06,
      "loss": 0.0211,
      "step": 696960
    },
    {
      "epoch": 1.140623056630205,
      "grad_norm": 0.5772899389266968,
      "learning_rate": 7.769614464658713e-06,
      "loss": 0.032,
      "step": 696980
    },
    {
      "epoch": 1.1406557870688583,
      "grad_norm": 1.1198358535766602,
      "learning_rate": 7.769548572445195e-06,
      "loss": 0.0236,
      "step": 697000
    },
    {
      "epoch": 1.1406885175075117,
      "grad_norm": 0.8410360813140869,
      "learning_rate": 7.769482680231679e-06,
      "loss": 0.0198,
      "step": 697020
    },
    {
      "epoch": 1.140721247946165,
      "grad_norm": 0.13202892243862152,
      "learning_rate": 7.76941678801816e-06,
      "loss": 0.0254,
      "step": 697040
    },
    {
      "epoch": 1.1407539783848184,
      "grad_norm": 0.6362167000770569,
      "learning_rate": 7.769350895804644e-06,
      "loss": 0.0238,
      "step": 697060
    },
    {
      "epoch": 1.1407867088234718,
      "grad_norm": 0.19195209443569183,
      "learning_rate": 7.769285003591126e-06,
      "loss": 0.0187,
      "step": 697080
    },
    {
      "epoch": 1.140819439262125,
      "grad_norm": 0.5023092031478882,
      "learning_rate": 7.76921911137761e-06,
      "loss": 0.0218,
      "step": 697100
    },
    {
      "epoch": 1.1408521697007783,
      "grad_norm": 1.2599053382873535,
      "learning_rate": 7.769153219164091e-06,
      "loss": 0.0332,
      "step": 697120
    },
    {
      "epoch": 1.1408849001394317,
      "grad_norm": 0.15251146256923676,
      "learning_rate": 7.769087326950575e-06,
      "loss": 0.0258,
      "step": 697140
    },
    {
      "epoch": 1.140917630578085,
      "grad_norm": 0.46261078119277954,
      "learning_rate": 7.769021434737057e-06,
      "loss": 0.0173,
      "step": 697160
    },
    {
      "epoch": 1.1409503610167384,
      "grad_norm": 1.2204138040542603,
      "learning_rate": 7.76895554252354e-06,
      "loss": 0.0291,
      "step": 697180
    },
    {
      "epoch": 1.1409830914553916,
      "grad_norm": 2.1064274311065674,
      "learning_rate": 7.768889650310022e-06,
      "loss": 0.0272,
      "step": 697200
    },
    {
      "epoch": 1.141015821894045,
      "grad_norm": 0.35358425974845886,
      "learning_rate": 7.768823758096506e-06,
      "loss": 0.0202,
      "step": 697220
    },
    {
      "epoch": 1.1410485523326983,
      "grad_norm": 0.256722092628479,
      "learning_rate": 7.76875786588299e-06,
      "loss": 0.0244,
      "step": 697240
    },
    {
      "epoch": 1.1410812827713517,
      "grad_norm": 0.8785296082496643,
      "learning_rate": 7.768691973669471e-06,
      "loss": 0.0279,
      "step": 697260
    },
    {
      "epoch": 1.141114013210005,
      "grad_norm": 1.7387369871139526,
      "learning_rate": 7.768626081455955e-06,
      "loss": 0.0162,
      "step": 697280
    },
    {
      "epoch": 1.1411467436486584,
      "grad_norm": 0.2042631357908249,
      "learning_rate": 7.768560189242439e-06,
      "loss": 0.022,
      "step": 697300
    },
    {
      "epoch": 1.1411794740873118,
      "grad_norm": 0.7437792420387268,
      "learning_rate": 7.76849429702892e-06,
      "loss": 0.0157,
      "step": 697320
    },
    {
      "epoch": 1.141212204525965,
      "grad_norm": 2.289552927017212,
      "learning_rate": 7.768428404815404e-06,
      "loss": 0.0238,
      "step": 697340
    },
    {
      "epoch": 1.1412449349646183,
      "grad_norm": 0.2832321226596832,
      "learning_rate": 7.768362512601888e-06,
      "loss": 0.016,
      "step": 697360
    },
    {
      "epoch": 1.1412776654032717,
      "grad_norm": 0.629067599773407,
      "learning_rate": 7.76829662038837e-06,
      "loss": 0.0243,
      "step": 697380
    },
    {
      "epoch": 1.141310395841925,
      "grad_norm": 0.8749783635139465,
      "learning_rate": 7.768230728174853e-06,
      "loss": 0.019,
      "step": 697400
    },
    {
      "epoch": 1.1413431262805784,
      "grad_norm": 0.5813165903091431,
      "learning_rate": 7.768164835961335e-06,
      "loss": 0.0249,
      "step": 697420
    },
    {
      "epoch": 1.1413758567192318,
      "grad_norm": 0.4301048219203949,
      "learning_rate": 7.768098943747819e-06,
      "loss": 0.02,
      "step": 697440
    },
    {
      "epoch": 1.1414085871578852,
      "grad_norm": 0.9600458145141602,
      "learning_rate": 7.7680330515343e-06,
      "loss": 0.0199,
      "step": 697460
    },
    {
      "epoch": 1.1414413175965383,
      "grad_norm": 0.3827414810657501,
      "learning_rate": 7.767967159320784e-06,
      "loss": 0.0235,
      "step": 697480
    },
    {
      "epoch": 1.1414740480351917,
      "grad_norm": 0.829822838306427,
      "learning_rate": 7.767901267107266e-06,
      "loss": 0.0202,
      "step": 697500
    },
    {
      "epoch": 1.141506778473845,
      "grad_norm": 0.22874078154563904,
      "learning_rate": 7.76783537489375e-06,
      "loss": 0.0205,
      "step": 697520
    },
    {
      "epoch": 1.1415395089124984,
      "grad_norm": 1.0042728185653687,
      "learning_rate": 7.767769482680231e-06,
      "loss": 0.0229,
      "step": 697540
    },
    {
      "epoch": 1.1415722393511518,
      "grad_norm": 0.8035153746604919,
      "learning_rate": 7.767703590466715e-06,
      "loss": 0.0218,
      "step": 697560
    },
    {
      "epoch": 1.1416049697898052,
      "grad_norm": 0.7604526281356812,
      "learning_rate": 7.767637698253197e-06,
      "loss": 0.0245,
      "step": 697580
    },
    {
      "epoch": 1.1416377002284586,
      "grad_norm": 0.6926331520080566,
      "learning_rate": 7.76757180603968e-06,
      "loss": 0.0215,
      "step": 697600
    },
    {
      "epoch": 1.1416704306671117,
      "grad_norm": 0.8293985724449158,
      "learning_rate": 7.767505913826164e-06,
      "loss": 0.0183,
      "step": 697620
    },
    {
      "epoch": 1.141703161105765,
      "grad_norm": 1.5371503829956055,
      "learning_rate": 7.767440021612646e-06,
      "loss": 0.0209,
      "step": 697640
    },
    {
      "epoch": 1.1417358915444185,
      "grad_norm": 1.8400241136550903,
      "learning_rate": 7.76737412939913e-06,
      "loss": 0.0209,
      "step": 697660
    },
    {
      "epoch": 1.1417686219830718,
      "grad_norm": 1.0611443519592285,
      "learning_rate": 7.767308237185613e-06,
      "loss": 0.0248,
      "step": 697680
    },
    {
      "epoch": 1.1418013524217252,
      "grad_norm": 0.6652420163154602,
      "learning_rate": 7.767242344972095e-06,
      "loss": 0.0212,
      "step": 697700
    },
    {
      "epoch": 1.1418340828603786,
      "grad_norm": 0.15850098431110382,
      "learning_rate": 7.767176452758579e-06,
      "loss": 0.0239,
      "step": 697720
    },
    {
      "epoch": 1.141866813299032,
      "grad_norm": 0.46175652742385864,
      "learning_rate": 7.767110560545062e-06,
      "loss": 0.0191,
      "step": 697740
    },
    {
      "epoch": 1.141899543737685,
      "grad_norm": 1.0455304384231567,
      "learning_rate": 7.767044668331544e-06,
      "loss": 0.0217,
      "step": 697760
    },
    {
      "epoch": 1.1419322741763385,
      "grad_norm": 2.1563470363616943,
      "learning_rate": 7.766978776118028e-06,
      "loss": 0.0299,
      "step": 697780
    },
    {
      "epoch": 1.1419650046149918,
      "grad_norm": 0.7083471417427063,
      "learning_rate": 7.76691288390451e-06,
      "loss": 0.0271,
      "step": 697800
    },
    {
      "epoch": 1.1419977350536452,
      "grad_norm": 0.06483308970928192,
      "learning_rate": 7.766846991690993e-06,
      "loss": 0.0261,
      "step": 697820
    },
    {
      "epoch": 1.1420304654922986,
      "grad_norm": 1.4874173402786255,
      "learning_rate": 7.766781099477475e-06,
      "loss": 0.0194,
      "step": 697840
    },
    {
      "epoch": 1.142063195930952,
      "grad_norm": 0.6520317792892456,
      "learning_rate": 7.766715207263959e-06,
      "loss": 0.0256,
      "step": 697860
    },
    {
      "epoch": 1.1420959263696053,
      "grad_norm": 2.2616677284240723,
      "learning_rate": 7.76664931505044e-06,
      "loss": 0.0262,
      "step": 697880
    },
    {
      "epoch": 1.1421286568082585,
      "grad_norm": 2.854081153869629,
      "learning_rate": 7.766583422836924e-06,
      "loss": 0.0284,
      "step": 697900
    },
    {
      "epoch": 1.1421613872469119,
      "grad_norm": 0.5912018418312073,
      "learning_rate": 7.766517530623406e-06,
      "loss": 0.0233,
      "step": 697920
    },
    {
      "epoch": 1.1421941176855652,
      "grad_norm": 0.5055189728736877,
      "learning_rate": 7.76645163840989e-06,
      "loss": 0.0234,
      "step": 697940
    },
    {
      "epoch": 1.1422268481242186,
      "grad_norm": 0.6081721186637878,
      "learning_rate": 7.766385746196372e-06,
      "loss": 0.0225,
      "step": 697960
    },
    {
      "epoch": 1.142259578562872,
      "grad_norm": 0.884158194065094,
      "learning_rate": 7.766319853982855e-06,
      "loss": 0.0211,
      "step": 697980
    },
    {
      "epoch": 1.1422923090015251,
      "grad_norm": 0.8940014243125916,
      "learning_rate": 7.766253961769339e-06,
      "loss": 0.0445,
      "step": 698000
    },
    {
      "epoch": 1.1423250394401785,
      "grad_norm": 3.042846441268921,
      "learning_rate": 7.76618806955582e-06,
      "loss": 0.029,
      "step": 698020
    },
    {
      "epoch": 1.1423577698788319,
      "grad_norm": 0.5221796631813049,
      "learning_rate": 7.766122177342304e-06,
      "loss": 0.0207,
      "step": 698040
    },
    {
      "epoch": 1.1423905003174852,
      "grad_norm": 2.207855224609375,
      "learning_rate": 7.766056285128786e-06,
      "loss": 0.022,
      "step": 698060
    },
    {
      "epoch": 1.1424232307561386,
      "grad_norm": 0.7529494762420654,
      "learning_rate": 7.76599039291527e-06,
      "loss": 0.0255,
      "step": 698080
    },
    {
      "epoch": 1.142455961194792,
      "grad_norm": 0.9720702171325684,
      "learning_rate": 7.765924500701753e-06,
      "loss": 0.0192,
      "step": 698100
    },
    {
      "epoch": 1.1424886916334454,
      "grad_norm": 0.9733291268348694,
      "learning_rate": 7.765858608488235e-06,
      "loss": 0.0334,
      "step": 698120
    },
    {
      "epoch": 1.1425214220720985,
      "grad_norm": 0.4141034185886383,
      "learning_rate": 7.765792716274719e-06,
      "loss": 0.0233,
      "step": 698140
    },
    {
      "epoch": 1.1425541525107519,
      "grad_norm": 0.682274580001831,
      "learning_rate": 7.765726824061202e-06,
      "loss": 0.018,
      "step": 698160
    },
    {
      "epoch": 1.1425868829494052,
      "grad_norm": 0.8216463327407837,
      "learning_rate": 7.765660931847684e-06,
      "loss": 0.0195,
      "step": 698180
    },
    {
      "epoch": 1.1426196133880586,
      "grad_norm": 5.39329719543457,
      "learning_rate": 7.765595039634168e-06,
      "loss": 0.0333,
      "step": 698200
    },
    {
      "epoch": 1.142652343826712,
      "grad_norm": 0.5146352052688599,
      "learning_rate": 7.76552914742065e-06,
      "loss": 0.0212,
      "step": 698220
    },
    {
      "epoch": 1.1426850742653654,
      "grad_norm": 1.1435109376907349,
      "learning_rate": 7.765463255207133e-06,
      "loss": 0.0327,
      "step": 698240
    },
    {
      "epoch": 1.1427178047040187,
      "grad_norm": 0.3060338497161865,
      "learning_rate": 7.765397362993615e-06,
      "loss": 0.0192,
      "step": 698260
    },
    {
      "epoch": 1.1427505351426719,
      "grad_norm": 0.4858624339103699,
      "learning_rate": 7.765331470780099e-06,
      "loss": 0.0238,
      "step": 698280
    },
    {
      "epoch": 1.1427832655813253,
      "grad_norm": 0.8201554417610168,
      "learning_rate": 7.76526557856658e-06,
      "loss": 0.0261,
      "step": 698300
    },
    {
      "epoch": 1.1428159960199786,
      "grad_norm": 0.5823765397071838,
      "learning_rate": 7.765199686353064e-06,
      "loss": 0.0246,
      "step": 698320
    },
    {
      "epoch": 1.142848726458632,
      "grad_norm": 0.9475518465042114,
      "learning_rate": 7.765133794139548e-06,
      "loss": 0.0206,
      "step": 698340
    },
    {
      "epoch": 1.1428814568972854,
      "grad_norm": 0.40567702054977417,
      "learning_rate": 7.76506790192603e-06,
      "loss": 0.0281,
      "step": 698360
    },
    {
      "epoch": 1.1429141873359387,
      "grad_norm": 1.6215153932571411,
      "learning_rate": 7.765002009712513e-06,
      "loss": 0.0191,
      "step": 698380
    },
    {
      "epoch": 1.1429469177745921,
      "grad_norm": 3.4259634017944336,
      "learning_rate": 7.764936117498995e-06,
      "loss": 0.0216,
      "step": 698400
    },
    {
      "epoch": 1.1429796482132453,
      "grad_norm": 0.7706189155578613,
      "learning_rate": 7.764870225285479e-06,
      "loss": 0.0228,
      "step": 698420
    },
    {
      "epoch": 1.1430123786518986,
      "grad_norm": 1.4448975324630737,
      "learning_rate": 7.76480433307196e-06,
      "loss": 0.0183,
      "step": 698440
    },
    {
      "epoch": 1.143045109090552,
      "grad_norm": 0.615233302116394,
      "learning_rate": 7.764738440858444e-06,
      "loss": 0.0189,
      "step": 698460
    },
    {
      "epoch": 1.1430778395292054,
      "grad_norm": 3.19692325592041,
      "learning_rate": 7.764672548644928e-06,
      "loss": 0.0223,
      "step": 698480
    },
    {
      "epoch": 1.1431105699678588,
      "grad_norm": 0.5879327058792114,
      "learning_rate": 7.76460665643141e-06,
      "loss": 0.0288,
      "step": 698500
    },
    {
      "epoch": 1.1431433004065121,
      "grad_norm": 0.47944876551628113,
      "learning_rate": 7.764540764217893e-06,
      "loss": 0.028,
      "step": 698520
    },
    {
      "epoch": 1.1431760308451655,
      "grad_norm": 0.5268436670303345,
      "learning_rate": 7.764474872004377e-06,
      "loss": 0.0192,
      "step": 698540
    },
    {
      "epoch": 1.1432087612838187,
      "grad_norm": 0.48702573776245117,
      "learning_rate": 7.764408979790859e-06,
      "loss": 0.0134,
      "step": 698560
    },
    {
      "epoch": 1.143241491722472,
      "grad_norm": 3.696847915649414,
      "learning_rate": 7.764343087577342e-06,
      "loss": 0.0263,
      "step": 698580
    },
    {
      "epoch": 1.1432742221611254,
      "grad_norm": 0.5955663919448853,
      "learning_rate": 7.764277195363824e-06,
      "loss": 0.0275,
      "step": 698600
    },
    {
      "epoch": 1.1433069525997788,
      "grad_norm": 0.43470579385757446,
      "learning_rate": 7.764211303150308e-06,
      "loss": 0.0153,
      "step": 698620
    },
    {
      "epoch": 1.1433396830384321,
      "grad_norm": 1.4637631177902222,
      "learning_rate": 7.76414541093679e-06,
      "loss": 0.0227,
      "step": 698640
    },
    {
      "epoch": 1.1433724134770855,
      "grad_norm": 1.0639561414718628,
      "learning_rate": 7.764079518723273e-06,
      "loss": 0.022,
      "step": 698660
    },
    {
      "epoch": 1.1434051439157387,
      "grad_norm": 0.46191227436065674,
      "learning_rate": 7.764013626509755e-06,
      "loss": 0.0145,
      "step": 698680
    },
    {
      "epoch": 1.143437874354392,
      "grad_norm": 0.30881431698799133,
      "learning_rate": 7.763947734296239e-06,
      "loss": 0.0298,
      "step": 698700
    },
    {
      "epoch": 1.1434706047930454,
      "grad_norm": 0.11556430906057358,
      "learning_rate": 7.763881842082722e-06,
      "loss": 0.0204,
      "step": 698720
    },
    {
      "epoch": 1.1435033352316988,
      "grad_norm": 0.5938035845756531,
      "learning_rate": 7.763815949869204e-06,
      "loss": 0.0264,
      "step": 698740
    },
    {
      "epoch": 1.1435360656703522,
      "grad_norm": 0.7804344892501831,
      "learning_rate": 7.763750057655688e-06,
      "loss": 0.0176,
      "step": 698760
    },
    {
      "epoch": 1.1435687961090055,
      "grad_norm": 0.6216878294944763,
      "learning_rate": 7.76368416544217e-06,
      "loss": 0.0289,
      "step": 698780
    },
    {
      "epoch": 1.1436015265476587,
      "grad_norm": 1.3755666017532349,
      "learning_rate": 7.763618273228653e-06,
      "loss": 0.0258,
      "step": 698800
    },
    {
      "epoch": 1.143634256986312,
      "grad_norm": 0.3851032555103302,
      "learning_rate": 7.763552381015135e-06,
      "loss": 0.026,
      "step": 698820
    },
    {
      "epoch": 1.1436669874249654,
      "grad_norm": 1.1627804040908813,
      "learning_rate": 7.763486488801619e-06,
      "loss": 0.0253,
      "step": 698840
    },
    {
      "epoch": 1.1436997178636188,
      "grad_norm": 1.3537143468856812,
      "learning_rate": 7.7634205965881e-06,
      "loss": 0.0215,
      "step": 698860
    },
    {
      "epoch": 1.1437324483022722,
      "grad_norm": 0.558759331703186,
      "learning_rate": 7.763354704374584e-06,
      "loss": 0.0223,
      "step": 698880
    },
    {
      "epoch": 1.1437651787409255,
      "grad_norm": 1.294679045677185,
      "learning_rate": 7.763288812161068e-06,
      "loss": 0.0226,
      "step": 698900
    },
    {
      "epoch": 1.143797909179579,
      "grad_norm": 1.1757880449295044,
      "learning_rate": 7.76322291994755e-06,
      "loss": 0.029,
      "step": 698920
    },
    {
      "epoch": 1.143830639618232,
      "grad_norm": 0.6907636523246765,
      "learning_rate": 7.763157027734033e-06,
      "loss": 0.0126,
      "step": 698940
    },
    {
      "epoch": 1.1438633700568854,
      "grad_norm": 1.3403652906417847,
      "learning_rate": 7.763091135520517e-06,
      "loss": 0.0148,
      "step": 698960
    },
    {
      "epoch": 1.1438961004955388,
      "grad_norm": 0.5017057657241821,
      "learning_rate": 7.763025243306999e-06,
      "loss": 0.0242,
      "step": 698980
    },
    {
      "epoch": 1.1439288309341922,
      "grad_norm": 0.827659547328949,
      "learning_rate": 7.762959351093482e-06,
      "loss": 0.0255,
      "step": 699000
    },
    {
      "epoch": 1.1439615613728455,
      "grad_norm": 0.5450732111930847,
      "learning_rate": 7.762893458879964e-06,
      "loss": 0.0201,
      "step": 699020
    },
    {
      "epoch": 1.143994291811499,
      "grad_norm": 0.6178181767463684,
      "learning_rate": 7.762827566666448e-06,
      "loss": 0.0158,
      "step": 699040
    },
    {
      "epoch": 1.1440270222501523,
      "grad_norm": 0.29805028438568115,
      "learning_rate": 7.762761674452932e-06,
      "loss": 0.013,
      "step": 699060
    },
    {
      "epoch": 1.1440597526888054,
      "grad_norm": 0.6936431527137756,
      "learning_rate": 7.762695782239413e-06,
      "loss": 0.0209,
      "step": 699080
    },
    {
      "epoch": 1.1440924831274588,
      "grad_norm": 0.3962602913379669,
      "learning_rate": 7.762629890025897e-06,
      "loss": 0.0199,
      "step": 699100
    },
    {
      "epoch": 1.1441252135661122,
      "grad_norm": 0.3658461570739746,
      "learning_rate": 7.762563997812379e-06,
      "loss": 0.0286,
      "step": 699120
    },
    {
      "epoch": 1.1441579440047656,
      "grad_norm": 0.7223841547966003,
      "learning_rate": 7.762498105598862e-06,
      "loss": 0.0206,
      "step": 699140
    },
    {
      "epoch": 1.144190674443419,
      "grad_norm": 0.2712351381778717,
      "learning_rate": 7.762432213385344e-06,
      "loss": 0.0322,
      "step": 699160
    },
    {
      "epoch": 1.1442234048820723,
      "grad_norm": 1.396051049232483,
      "learning_rate": 7.762366321171828e-06,
      "loss": 0.0224,
      "step": 699180
    },
    {
      "epoch": 1.1442561353207257,
      "grad_norm": 2.952030658721924,
      "learning_rate": 7.76230042895831e-06,
      "loss": 0.0314,
      "step": 699200
    },
    {
      "epoch": 1.1442888657593788,
      "grad_norm": 2.672422170639038,
      "learning_rate": 7.762234536744793e-06,
      "loss": 0.0293,
      "step": 699220
    },
    {
      "epoch": 1.1443215961980322,
      "grad_norm": 0.36508631706237793,
      "learning_rate": 7.762168644531275e-06,
      "loss": 0.0219,
      "step": 699240
    },
    {
      "epoch": 1.1443543266366856,
      "grad_norm": 1.3031593561172485,
      "learning_rate": 7.762102752317759e-06,
      "loss": 0.0308,
      "step": 699260
    },
    {
      "epoch": 1.144387057075339,
      "grad_norm": 0.14313499629497528,
      "learning_rate": 7.762036860104242e-06,
      "loss": 0.02,
      "step": 699280
    },
    {
      "epoch": 1.1444197875139923,
      "grad_norm": 0.4219146966934204,
      "learning_rate": 7.761970967890724e-06,
      "loss": 0.0253,
      "step": 699300
    },
    {
      "epoch": 1.1444525179526457,
      "grad_norm": 0.40665286779403687,
      "learning_rate": 7.761905075677208e-06,
      "loss": 0.0264,
      "step": 699320
    },
    {
      "epoch": 1.144485248391299,
      "grad_norm": 1.2613258361816406,
      "learning_rate": 7.761839183463692e-06,
      "loss": 0.021,
      "step": 699340
    },
    {
      "epoch": 1.1445179788299522,
      "grad_norm": 1.0529917478561401,
      "learning_rate": 7.761773291250173e-06,
      "loss": 0.0174,
      "step": 699360
    },
    {
      "epoch": 1.1445507092686056,
      "grad_norm": 0.6980689764022827,
      "learning_rate": 7.761707399036657e-06,
      "loss": 0.032,
      "step": 699380
    },
    {
      "epoch": 1.144583439707259,
      "grad_norm": 0.37120121717453003,
      "learning_rate": 7.76164150682314e-06,
      "loss": 0.0222,
      "step": 699400
    },
    {
      "epoch": 1.1446161701459123,
      "grad_norm": 0.4127078652381897,
      "learning_rate": 7.761575614609623e-06,
      "loss": 0.0245,
      "step": 699420
    },
    {
      "epoch": 1.1446489005845657,
      "grad_norm": 0.38219141960144043,
      "learning_rate": 7.761509722396106e-06,
      "loss": 0.0232,
      "step": 699440
    },
    {
      "epoch": 1.1446816310232188,
      "grad_norm": 0.22345589101314545,
      "learning_rate": 7.761443830182588e-06,
      "loss": 0.0269,
      "step": 699460
    },
    {
      "epoch": 1.1447143614618722,
      "grad_norm": 0.6104500889778137,
      "learning_rate": 7.761377937969072e-06,
      "loss": 0.0191,
      "step": 699480
    },
    {
      "epoch": 1.1447470919005256,
      "grad_norm": 0.24167296290397644,
      "learning_rate": 7.761312045755553e-06,
      "loss": 0.0235,
      "step": 699500
    },
    {
      "epoch": 1.144779822339179,
      "grad_norm": 0.28042465448379517,
      "learning_rate": 7.761246153542037e-06,
      "loss": 0.0188,
      "step": 699520
    },
    {
      "epoch": 1.1448125527778323,
      "grad_norm": 0.6389742493629456,
      "learning_rate": 7.761180261328519e-06,
      "loss": 0.0168,
      "step": 699540
    },
    {
      "epoch": 1.1448452832164857,
      "grad_norm": 0.4727236032485962,
      "learning_rate": 7.761114369115003e-06,
      "loss": 0.0179,
      "step": 699560
    },
    {
      "epoch": 1.144878013655139,
      "grad_norm": 0.8538969159126282,
      "learning_rate": 7.761048476901484e-06,
      "loss": 0.0251,
      "step": 699580
    },
    {
      "epoch": 1.1449107440937922,
      "grad_norm": 0.2862895131111145,
      "learning_rate": 7.760982584687968e-06,
      "loss": 0.0199,
      "step": 699600
    },
    {
      "epoch": 1.1449434745324456,
      "grad_norm": 0.8094216585159302,
      "learning_rate": 7.76091669247445e-06,
      "loss": 0.0358,
      "step": 699620
    },
    {
      "epoch": 1.144976204971099,
      "grad_norm": 0.9237639904022217,
      "learning_rate": 7.760850800260934e-06,
      "loss": 0.0262,
      "step": 699640
    },
    {
      "epoch": 1.1450089354097523,
      "grad_norm": 1.4189475774765015,
      "learning_rate": 7.760784908047415e-06,
      "loss": 0.0297,
      "step": 699660
    },
    {
      "epoch": 1.1450416658484057,
      "grad_norm": 0.637051522731781,
      "learning_rate": 7.760719015833899e-06,
      "loss": 0.0122,
      "step": 699680
    },
    {
      "epoch": 1.145074396287059,
      "grad_norm": 0.3125385642051697,
      "learning_rate": 7.760653123620383e-06,
      "loss": 0.0253,
      "step": 699700
    },
    {
      "epoch": 1.1451071267257125,
      "grad_norm": 4.930344104766846,
      "learning_rate": 7.760587231406866e-06,
      "loss": 0.0204,
      "step": 699720
    },
    {
      "epoch": 1.1451398571643656,
      "grad_norm": 1.289591908454895,
      "learning_rate": 7.760521339193348e-06,
      "loss": 0.0286,
      "step": 699740
    },
    {
      "epoch": 1.145172587603019,
      "grad_norm": 0.37954285740852356,
      "learning_rate": 7.760455446979832e-06,
      "loss": 0.0232,
      "step": 699760
    },
    {
      "epoch": 1.1452053180416724,
      "grad_norm": 1.0621896982192993,
      "learning_rate": 7.760389554766315e-06,
      "loss": 0.0287,
      "step": 699780
    },
    {
      "epoch": 1.1452380484803257,
      "grad_norm": 0.621985912322998,
      "learning_rate": 7.760323662552797e-06,
      "loss": 0.0234,
      "step": 699800
    },
    {
      "epoch": 1.145270778918979,
      "grad_norm": 0.552253246307373,
      "learning_rate": 7.76025777033928e-06,
      "loss": 0.0146,
      "step": 699820
    },
    {
      "epoch": 1.1453035093576325,
      "grad_norm": 0.6135605573654175,
      "learning_rate": 7.760191878125763e-06,
      "loss": 0.0183,
      "step": 699840
    },
    {
      "epoch": 1.1453362397962858,
      "grad_norm": 1.3840886354446411,
      "learning_rate": 7.760125985912246e-06,
      "loss": 0.0237,
      "step": 699860
    },
    {
      "epoch": 1.145368970234939,
      "grad_norm": 0.7301352024078369,
      "learning_rate": 7.760060093698728e-06,
      "loss": 0.0283,
      "step": 699880
    },
    {
      "epoch": 1.1454017006735924,
      "grad_norm": 2.241058588027954,
      "learning_rate": 7.759994201485212e-06,
      "loss": 0.0226,
      "step": 699900
    },
    {
      "epoch": 1.1454344311122457,
      "grad_norm": 2.3497025966644287,
      "learning_rate": 7.759928309271694e-06,
      "loss": 0.0263,
      "step": 699920
    },
    {
      "epoch": 1.1454671615508991,
      "grad_norm": 1.1681307554244995,
      "learning_rate": 7.759862417058177e-06,
      "loss": 0.0213,
      "step": 699940
    },
    {
      "epoch": 1.1454998919895525,
      "grad_norm": 4.735158920288086,
      "learning_rate": 7.759796524844659e-06,
      "loss": 0.0238,
      "step": 699960
    },
    {
      "epoch": 1.1455326224282059,
      "grad_norm": 0.7747368216514587,
      "learning_rate": 7.759730632631143e-06,
      "loss": 0.0115,
      "step": 699980
    },
    {
      "epoch": 1.1455653528668592,
      "grad_norm": 2.5256850719451904,
      "learning_rate": 7.759664740417625e-06,
      "loss": 0.019,
      "step": 700000
    },
    {
      "epoch": 1.1455653528668592,
      "eval_loss": 0.012549696490168571,
      "eval_runtime": 6510.0221,
      "eval_samples_per_second": 157.889,
      "eval_steps_per_second": 15.789,
      "eval_sts-dev_pearson_cosine": 0.9703742273156336,
      "eval_sts-dev_spearman_cosine": 0.8861010596838592,
      "step": 700000
    },
    {
      "epoch": 1.1455980833055124,
      "grad_norm": 1.849449634552002,
      "learning_rate": 7.759598848204108e-06,
      "loss": 0.0318,
      "step": 700020
    },
    {
      "epoch": 1.1456308137441658,
      "grad_norm": 0.603400707244873,
      "learning_rate": 7.75953295599059e-06,
      "loss": 0.0186,
      "step": 700040
    },
    {
      "epoch": 1.1456635441828191,
      "grad_norm": 0.7659100294113159,
      "learning_rate": 7.759467063777074e-06,
      "loss": 0.0196,
      "step": 700060
    },
    {
      "epoch": 1.1456962746214725,
      "grad_norm": 0.9003440737724304,
      "learning_rate": 7.759401171563557e-06,
      "loss": 0.0328,
      "step": 700080
    },
    {
      "epoch": 1.1457290050601259,
      "grad_norm": 0.7367684841156006,
      "learning_rate": 7.759335279350039e-06,
      "loss": 0.0245,
      "step": 700100
    },
    {
      "epoch": 1.1457617354987792,
      "grad_norm": 1.6960395574569702,
      "learning_rate": 7.759269387136523e-06,
      "loss": 0.023,
      "step": 700120
    },
    {
      "epoch": 1.1457944659374326,
      "grad_norm": 0.8521727323532104,
      "learning_rate": 7.759203494923006e-06,
      "loss": 0.0254,
      "step": 700140
    },
    {
      "epoch": 1.1458271963760858,
      "grad_norm": 0.5286858677864075,
      "learning_rate": 7.759137602709488e-06,
      "loss": 0.0253,
      "step": 700160
    },
    {
      "epoch": 1.1458599268147391,
      "grad_norm": 0.13016770780086517,
      "learning_rate": 7.759071710495972e-06,
      "loss": 0.0231,
      "step": 700180
    },
    {
      "epoch": 1.1458926572533925,
      "grad_norm": 0.8527444005012512,
      "learning_rate": 7.759005818282455e-06,
      "loss": 0.019,
      "step": 700200
    },
    {
      "epoch": 1.1459253876920459,
      "grad_norm": 2.125999689102173,
      "learning_rate": 7.758939926068937e-06,
      "loss": 0.0247,
      "step": 700220
    },
    {
      "epoch": 1.1459581181306993,
      "grad_norm": 1.056409478187561,
      "learning_rate": 7.75887403385542e-06,
      "loss": 0.0217,
      "step": 700240
    },
    {
      "epoch": 1.1459908485693524,
      "grad_norm": 0.9240439534187317,
      "learning_rate": 7.758808141641903e-06,
      "loss": 0.0371,
      "step": 700260
    },
    {
      "epoch": 1.1460235790080058,
      "grad_norm": 0.35247254371643066,
      "learning_rate": 7.758742249428386e-06,
      "loss": 0.0234,
      "step": 700280
    },
    {
      "epoch": 1.1460563094466591,
      "grad_norm": 0.2355625182390213,
      "learning_rate": 7.758676357214868e-06,
      "loss": 0.025,
      "step": 700300
    },
    {
      "epoch": 1.1460890398853125,
      "grad_norm": 1.7720831632614136,
      "learning_rate": 7.758610465001352e-06,
      "loss": 0.0219,
      "step": 700320
    },
    {
      "epoch": 1.146121770323966,
      "grad_norm": 0.46430259943008423,
      "learning_rate": 7.758544572787834e-06,
      "loss": 0.0243,
      "step": 700340
    },
    {
      "epoch": 1.1461545007626193,
      "grad_norm": 1.1331658363342285,
      "learning_rate": 7.758478680574317e-06,
      "loss": 0.0365,
      "step": 700360
    },
    {
      "epoch": 1.1461872312012726,
      "grad_norm": 1.8113951683044434,
      "learning_rate": 7.758412788360799e-06,
      "loss": 0.0285,
      "step": 700380
    },
    {
      "epoch": 1.1462199616399258,
      "grad_norm": 0.389449805021286,
      "learning_rate": 7.758346896147283e-06,
      "loss": 0.0355,
      "step": 700400
    },
    {
      "epoch": 1.1462526920785792,
      "grad_norm": 0.20637910068035126,
      "learning_rate": 7.758281003933765e-06,
      "loss": 0.0158,
      "step": 700420
    },
    {
      "epoch": 1.1462854225172325,
      "grad_norm": 0.6402718424797058,
      "learning_rate": 7.758215111720248e-06,
      "loss": 0.025,
      "step": 700440
    },
    {
      "epoch": 1.146318152955886,
      "grad_norm": 0.7863544225692749,
      "learning_rate": 7.758149219506732e-06,
      "loss": 0.0246,
      "step": 700460
    },
    {
      "epoch": 1.1463508833945393,
      "grad_norm": 0.40543654561042786,
      "learning_rate": 7.758083327293214e-06,
      "loss": 0.0175,
      "step": 700480
    },
    {
      "epoch": 1.1463836138331926,
      "grad_norm": 1.2358551025390625,
      "learning_rate": 7.758017435079697e-06,
      "loss": 0.0308,
      "step": 700500
    },
    {
      "epoch": 1.146416344271846,
      "grad_norm": 0.40267467498779297,
      "learning_rate": 7.75795154286618e-06,
      "loss": 0.0165,
      "step": 700520
    },
    {
      "epoch": 1.1464490747104992,
      "grad_norm": 0.22234207391738892,
      "learning_rate": 7.757885650652663e-06,
      "loss": 0.0174,
      "step": 700540
    },
    {
      "epoch": 1.1464818051491525,
      "grad_norm": 1.774750828742981,
      "learning_rate": 7.757819758439146e-06,
      "loss": 0.0207,
      "step": 700560
    },
    {
      "epoch": 1.146514535587806,
      "grad_norm": 0.5064075589179993,
      "learning_rate": 7.75775386622563e-06,
      "loss": 0.0195,
      "step": 700580
    },
    {
      "epoch": 1.1465472660264593,
      "grad_norm": 1.140978455543518,
      "learning_rate": 7.757687974012112e-06,
      "loss": 0.0288,
      "step": 700600
    },
    {
      "epoch": 1.1465799964651127,
      "grad_norm": 1.51082444190979,
      "learning_rate": 7.757622081798595e-06,
      "loss": 0.0261,
      "step": 700620
    },
    {
      "epoch": 1.146612726903766,
      "grad_norm": 0.4543544054031372,
      "learning_rate": 7.757556189585077e-06,
      "loss": 0.0255,
      "step": 700640
    },
    {
      "epoch": 1.1466454573424194,
      "grad_norm": 0.16993744671344757,
      "learning_rate": 7.757490297371561e-06,
      "loss": 0.0229,
      "step": 700660
    },
    {
      "epoch": 1.1466781877810726,
      "grad_norm": 2.18449068069458,
      "learning_rate": 7.757424405158043e-06,
      "loss": 0.0293,
      "step": 700680
    },
    {
      "epoch": 1.146710918219726,
      "grad_norm": 0.6889892220497131,
      "learning_rate": 7.757358512944526e-06,
      "loss": 0.0162,
      "step": 700700
    },
    {
      "epoch": 1.1467436486583793,
      "grad_norm": 0.4661780297756195,
      "learning_rate": 7.757292620731008e-06,
      "loss": 0.0157,
      "step": 700720
    },
    {
      "epoch": 1.1467763790970327,
      "grad_norm": 0.9038440585136414,
      "learning_rate": 7.757226728517492e-06,
      "loss": 0.0205,
      "step": 700740
    },
    {
      "epoch": 1.146809109535686,
      "grad_norm": 1.9831548929214478,
      "learning_rate": 7.757160836303974e-06,
      "loss": 0.0247,
      "step": 700760
    },
    {
      "epoch": 1.1468418399743394,
      "grad_norm": 1.6687304973602295,
      "learning_rate": 7.757094944090457e-06,
      "loss": 0.0244,
      "step": 700780
    },
    {
      "epoch": 1.1468745704129928,
      "grad_norm": 0.6411551237106323,
      "learning_rate": 7.757029051876941e-06,
      "loss": 0.0285,
      "step": 700800
    },
    {
      "epoch": 1.146907300851646,
      "grad_norm": 10.318987846374512,
      "learning_rate": 7.756963159663423e-06,
      "loss": 0.0255,
      "step": 700820
    },
    {
      "epoch": 1.1469400312902993,
      "grad_norm": 1.3947654962539673,
      "learning_rate": 7.756897267449906e-06,
      "loss": 0.0273,
      "step": 700840
    },
    {
      "epoch": 1.1469727617289527,
      "grad_norm": 0.7387974858283997,
      "learning_rate": 7.756831375236388e-06,
      "loss": 0.0184,
      "step": 700860
    },
    {
      "epoch": 1.147005492167606,
      "grad_norm": 0.37783533334732056,
      "learning_rate": 7.756765483022872e-06,
      "loss": 0.0173,
      "step": 700880
    },
    {
      "epoch": 1.1470382226062594,
      "grad_norm": 0.6465355157852173,
      "learning_rate": 7.756699590809354e-06,
      "loss": 0.0203,
      "step": 700900
    },
    {
      "epoch": 1.1470709530449128,
      "grad_norm": 1.6678990125656128,
      "learning_rate": 7.756633698595837e-06,
      "loss": 0.0228,
      "step": 700920
    },
    {
      "epoch": 1.147103683483566,
      "grad_norm": 0.6472501754760742,
      "learning_rate": 7.756567806382321e-06,
      "loss": 0.026,
      "step": 700940
    },
    {
      "epoch": 1.1471364139222193,
      "grad_norm": 0.2486119121313095,
      "learning_rate": 7.756501914168803e-06,
      "loss": 0.0208,
      "step": 700960
    },
    {
      "epoch": 1.1471691443608727,
      "grad_norm": 0.18101860582828522,
      "learning_rate": 7.756436021955286e-06,
      "loss": 0.0221,
      "step": 700980
    },
    {
      "epoch": 1.147201874799526,
      "grad_norm": 1.1211566925048828,
      "learning_rate": 7.75637012974177e-06,
      "loss": 0.0323,
      "step": 701000
    },
    {
      "epoch": 1.1472346052381794,
      "grad_norm": 0.6760101318359375,
      "learning_rate": 7.756304237528252e-06,
      "loss": 0.03,
      "step": 701020
    },
    {
      "epoch": 1.1472673356768328,
      "grad_norm": 0.766030490398407,
      "learning_rate": 7.756238345314735e-06,
      "loss": 0.0193,
      "step": 701040
    },
    {
      "epoch": 1.147300066115486,
      "grad_norm": 0.8416153192520142,
      "learning_rate": 7.756172453101217e-06,
      "loss": 0.0188,
      "step": 701060
    },
    {
      "epoch": 1.1473327965541393,
      "grad_norm": 3.0481791496276855,
      "learning_rate": 7.756106560887701e-06,
      "loss": 0.0325,
      "step": 701080
    },
    {
      "epoch": 1.1473655269927927,
      "grad_norm": 1.6668031215667725,
      "learning_rate": 7.756040668674183e-06,
      "loss": 0.0302,
      "step": 701100
    },
    {
      "epoch": 1.147398257431446,
      "grad_norm": 0.4244028329849243,
      "learning_rate": 7.755974776460666e-06,
      "loss": 0.0184,
      "step": 701120
    },
    {
      "epoch": 1.1474309878700995,
      "grad_norm": 1.1179383993148804,
      "learning_rate": 7.755908884247148e-06,
      "loss": 0.0267,
      "step": 701140
    },
    {
      "epoch": 1.1474637183087528,
      "grad_norm": 0.9017623662948608,
      "learning_rate": 7.755842992033632e-06,
      "loss": 0.0231,
      "step": 701160
    },
    {
      "epoch": 1.1474964487474062,
      "grad_norm": 0.5052200555801392,
      "learning_rate": 7.755777099820115e-06,
      "loss": 0.0181,
      "step": 701180
    },
    {
      "epoch": 1.1475291791860593,
      "grad_norm": 0.41286200284957886,
      "learning_rate": 7.755711207606597e-06,
      "loss": 0.0358,
      "step": 701200
    },
    {
      "epoch": 1.1475619096247127,
      "grad_norm": 1.1321759223937988,
      "learning_rate": 7.755645315393081e-06,
      "loss": 0.0293,
      "step": 701220
    },
    {
      "epoch": 1.147594640063366,
      "grad_norm": 0.13741986453533173,
      "learning_rate": 7.755579423179563e-06,
      "loss": 0.0231,
      "step": 701240
    },
    {
      "epoch": 1.1476273705020195,
      "grad_norm": 1.048186182975769,
      "learning_rate": 7.755513530966046e-06,
      "loss": 0.0272,
      "step": 701260
    },
    {
      "epoch": 1.1476601009406728,
      "grad_norm": 0.40814605355262756,
      "learning_rate": 7.755447638752528e-06,
      "loss": 0.0361,
      "step": 701280
    },
    {
      "epoch": 1.1476928313793262,
      "grad_norm": 0.12348438054323196,
      "learning_rate": 7.755381746539012e-06,
      "loss": 0.03,
      "step": 701300
    },
    {
      "epoch": 1.1477255618179796,
      "grad_norm": 0.3770028352737427,
      "learning_rate": 7.755315854325496e-06,
      "loss": 0.0239,
      "step": 701320
    },
    {
      "epoch": 1.1477582922566327,
      "grad_norm": 0.5168208479881287,
      "learning_rate": 7.755249962111977e-06,
      "loss": 0.0178,
      "step": 701340
    },
    {
      "epoch": 1.147791022695286,
      "grad_norm": 0.43414655327796936,
      "learning_rate": 7.755184069898461e-06,
      "loss": 0.0232,
      "step": 701360
    },
    {
      "epoch": 1.1478237531339395,
      "grad_norm": 0.9658650755882263,
      "learning_rate": 7.755118177684945e-06,
      "loss": 0.0294,
      "step": 701380
    },
    {
      "epoch": 1.1478564835725928,
      "grad_norm": 0.7773562669754028,
      "learning_rate": 7.755052285471426e-06,
      "loss": 0.0195,
      "step": 701400
    },
    {
      "epoch": 1.1478892140112462,
      "grad_norm": 0.39748722314834595,
      "learning_rate": 7.75498639325791e-06,
      "loss": 0.0189,
      "step": 701420
    },
    {
      "epoch": 1.1479219444498996,
      "grad_norm": 1.6708568334579468,
      "learning_rate": 7.754920501044392e-06,
      "loss": 0.0202,
      "step": 701440
    },
    {
      "epoch": 1.147954674888553,
      "grad_norm": 0.13004013895988464,
      "learning_rate": 7.754854608830876e-06,
      "loss": 0.021,
      "step": 701460
    },
    {
      "epoch": 1.1479874053272061,
      "grad_norm": 1.1632442474365234,
      "learning_rate": 7.754788716617357e-06,
      "loss": 0.0304,
      "step": 701480
    },
    {
      "epoch": 1.1480201357658595,
      "grad_norm": 0.4534865915775299,
      "learning_rate": 7.754722824403841e-06,
      "loss": 0.0188,
      "step": 701500
    },
    {
      "epoch": 1.1480528662045129,
      "grad_norm": 1.405764102935791,
      "learning_rate": 7.754656932190325e-06,
      "loss": 0.0337,
      "step": 701520
    },
    {
      "epoch": 1.1480855966431662,
      "grad_norm": 0.4806123971939087,
      "learning_rate": 7.754591039976806e-06,
      "loss": 0.0281,
      "step": 701540
    },
    {
      "epoch": 1.1481183270818196,
      "grad_norm": 1.4690932035446167,
      "learning_rate": 7.75452514776329e-06,
      "loss": 0.0205,
      "step": 701560
    },
    {
      "epoch": 1.148151057520473,
      "grad_norm": 0.5913514494895935,
      "learning_rate": 7.754459255549772e-06,
      "loss": 0.0176,
      "step": 701580
    },
    {
      "epoch": 1.1481837879591263,
      "grad_norm": 0.5235013365745544,
      "learning_rate": 7.754393363336256e-06,
      "loss": 0.0283,
      "step": 701600
    },
    {
      "epoch": 1.1482165183977795,
      "grad_norm": 0.6654804348945618,
      "learning_rate": 7.754327471122737e-06,
      "loss": 0.0192,
      "step": 701620
    },
    {
      "epoch": 1.1482492488364329,
      "grad_norm": 0.2749810218811035,
      "learning_rate": 7.754261578909221e-06,
      "loss": 0.0171,
      "step": 701640
    },
    {
      "epoch": 1.1482819792750862,
      "grad_norm": 0.7006434798240662,
      "learning_rate": 7.754195686695703e-06,
      "loss": 0.0229,
      "step": 701660
    },
    {
      "epoch": 1.1483147097137396,
      "grad_norm": 0.5164580941200256,
      "learning_rate": 7.754129794482187e-06,
      "loss": 0.0222,
      "step": 701680
    },
    {
      "epoch": 1.148347440152393,
      "grad_norm": 0.5170549750328064,
      "learning_rate": 7.754063902268668e-06,
      "loss": 0.0224,
      "step": 701700
    },
    {
      "epoch": 1.1483801705910464,
      "grad_norm": 1.3625590801239014,
      "learning_rate": 7.753998010055152e-06,
      "loss": 0.0242,
      "step": 701720
    },
    {
      "epoch": 1.1484129010296995,
      "grad_norm": 0.6347381472587585,
      "learning_rate": 7.753932117841636e-06,
      "loss": 0.0201,
      "step": 701740
    },
    {
      "epoch": 1.1484456314683529,
      "grad_norm": 1.0345439910888672,
      "learning_rate": 7.753866225628117e-06,
      "loss": 0.0162,
      "step": 701760
    },
    {
      "epoch": 1.1484783619070063,
      "grad_norm": 0.4336559772491455,
      "learning_rate": 7.753800333414601e-06,
      "loss": 0.0224,
      "step": 701780
    },
    {
      "epoch": 1.1485110923456596,
      "grad_norm": 0.5822147130966187,
      "learning_rate": 7.753734441201085e-06,
      "loss": 0.0173,
      "step": 701800
    },
    {
      "epoch": 1.148543822784313,
      "grad_norm": 0.2630215883255005,
      "learning_rate": 7.753668548987567e-06,
      "loss": 0.03,
      "step": 701820
    },
    {
      "epoch": 1.1485765532229664,
      "grad_norm": 2.655045747756958,
      "learning_rate": 7.75360265677405e-06,
      "loss": 0.0213,
      "step": 701840
    },
    {
      "epoch": 1.1486092836616195,
      "grad_norm": 0.7503455281257629,
      "learning_rate": 7.753536764560534e-06,
      "loss": 0.0273,
      "step": 701860
    },
    {
      "epoch": 1.148642014100273,
      "grad_norm": 0.7051267623901367,
      "learning_rate": 7.753470872347016e-06,
      "loss": 0.0245,
      "step": 701880
    },
    {
      "epoch": 1.1486747445389263,
      "grad_norm": 1.2005950212478638,
      "learning_rate": 7.7534049801335e-06,
      "loss": 0.0217,
      "step": 701900
    },
    {
      "epoch": 1.1487074749775796,
      "grad_norm": 1.208816647529602,
      "learning_rate": 7.753339087919981e-06,
      "loss": 0.0358,
      "step": 701920
    },
    {
      "epoch": 1.148740205416233,
      "grad_norm": 0.7951341867446899,
      "learning_rate": 7.753273195706465e-06,
      "loss": 0.026,
      "step": 701940
    },
    {
      "epoch": 1.1487729358548864,
      "grad_norm": 2.1989665031433105,
      "learning_rate": 7.753207303492947e-06,
      "loss": 0.0211,
      "step": 701960
    },
    {
      "epoch": 1.1488056662935398,
      "grad_norm": 0.6706501841545105,
      "learning_rate": 7.75314141127943e-06,
      "loss": 0.0261,
      "step": 701980
    },
    {
      "epoch": 1.148838396732193,
      "grad_norm": 0.8901916146278381,
      "learning_rate": 7.753075519065912e-06,
      "loss": 0.0336,
      "step": 702000
    },
    {
      "epoch": 1.1488711271708463,
      "grad_norm": 1.1256464719772339,
      "learning_rate": 7.753009626852396e-06,
      "loss": 0.0269,
      "step": 702020
    },
    {
      "epoch": 1.1489038576094996,
      "grad_norm": 0.7218053936958313,
      "learning_rate": 7.752943734638878e-06,
      "loss": 0.0267,
      "step": 702040
    },
    {
      "epoch": 1.148936588048153,
      "grad_norm": 0.3084278106689453,
      "learning_rate": 7.752877842425361e-06,
      "loss": 0.0263,
      "step": 702060
    },
    {
      "epoch": 1.1489693184868064,
      "grad_norm": 0.915526270866394,
      "learning_rate": 7.752811950211843e-06,
      "loss": 0.0249,
      "step": 702080
    },
    {
      "epoch": 1.1490020489254598,
      "grad_norm": 1.4045013189315796,
      "learning_rate": 7.752746057998327e-06,
      "loss": 0.022,
      "step": 702100
    },
    {
      "epoch": 1.1490347793641131,
      "grad_norm": 0.7039209604263306,
      "learning_rate": 7.75268016578481e-06,
      "loss": 0.0255,
      "step": 702120
    },
    {
      "epoch": 1.1490675098027663,
      "grad_norm": 0.5229068994522095,
      "learning_rate": 7.752614273571292e-06,
      "loss": 0.0303,
      "step": 702140
    },
    {
      "epoch": 1.1491002402414197,
      "grad_norm": 2.0012567043304443,
      "learning_rate": 7.752548381357776e-06,
      "loss": 0.028,
      "step": 702160
    },
    {
      "epoch": 1.149132970680073,
      "grad_norm": 0.43023720383644104,
      "learning_rate": 7.75248248914426e-06,
      "loss": 0.0224,
      "step": 702180
    },
    {
      "epoch": 1.1491657011187264,
      "grad_norm": 1.8043383359909058,
      "learning_rate": 7.752416596930741e-06,
      "loss": 0.0314,
      "step": 702200
    },
    {
      "epoch": 1.1491984315573798,
      "grad_norm": 1.1974167823791504,
      "learning_rate": 7.752350704717225e-06,
      "loss": 0.0254,
      "step": 702220
    },
    {
      "epoch": 1.1492311619960331,
      "grad_norm": 0.4365820288658142,
      "learning_rate": 7.752284812503708e-06,
      "loss": 0.0226,
      "step": 702240
    },
    {
      "epoch": 1.1492638924346865,
      "grad_norm": 0.16732050478458405,
      "learning_rate": 7.75221892029019e-06,
      "loss": 0.0218,
      "step": 702260
    },
    {
      "epoch": 1.1492966228733397,
      "grad_norm": 1.635115146636963,
      "learning_rate": 7.752153028076674e-06,
      "loss": 0.0256,
      "step": 702280
    },
    {
      "epoch": 1.149329353311993,
      "grad_norm": 2.6648740768432617,
      "learning_rate": 7.752087135863156e-06,
      "loss": 0.0263,
      "step": 702300
    },
    {
      "epoch": 1.1493620837506464,
      "grad_norm": 0.49269676208496094,
      "learning_rate": 7.75202124364964e-06,
      "loss": 0.0175,
      "step": 702320
    },
    {
      "epoch": 1.1493948141892998,
      "grad_norm": 0.2411048412322998,
      "learning_rate": 7.751955351436121e-06,
      "loss": 0.0207,
      "step": 702340
    },
    {
      "epoch": 1.1494275446279532,
      "grad_norm": 0.7919905781745911,
      "learning_rate": 7.751889459222605e-06,
      "loss": 0.0257,
      "step": 702360
    },
    {
      "epoch": 1.1494602750666065,
      "grad_norm": 0.7343970537185669,
      "learning_rate": 7.751823567009087e-06,
      "loss": 0.023,
      "step": 702380
    },
    {
      "epoch": 1.14949300550526,
      "grad_norm": 0.8393365144729614,
      "learning_rate": 7.75175767479557e-06,
      "loss": 0.0227,
      "step": 702400
    },
    {
      "epoch": 1.149525735943913,
      "grad_norm": 0.5472320914268494,
      "learning_rate": 7.751691782582052e-06,
      "loss": 0.0225,
      "step": 702420
    },
    {
      "epoch": 1.1495584663825664,
      "grad_norm": 0.4165259003639221,
      "learning_rate": 7.751625890368536e-06,
      "loss": 0.0301,
      "step": 702440
    },
    {
      "epoch": 1.1495911968212198,
      "grad_norm": 0.195564866065979,
      "learning_rate": 7.751559998155018e-06,
      "loss": 0.0269,
      "step": 702460
    },
    {
      "epoch": 1.1496239272598732,
      "grad_norm": 0.3239382803440094,
      "learning_rate": 7.751494105941501e-06,
      "loss": 0.0123,
      "step": 702480
    },
    {
      "epoch": 1.1496566576985265,
      "grad_norm": 1.4885473251342773,
      "learning_rate": 7.751428213727985e-06,
      "loss": 0.0211,
      "step": 702500
    },
    {
      "epoch": 1.1496893881371797,
      "grad_norm": 0.21369783580303192,
      "learning_rate": 7.751362321514467e-06,
      "loss": 0.0207,
      "step": 702520
    },
    {
      "epoch": 1.149722118575833,
      "grad_norm": 0.7406474351882935,
      "learning_rate": 7.75129642930095e-06,
      "loss": 0.0198,
      "step": 702540
    },
    {
      "epoch": 1.1497548490144864,
      "grad_norm": 0.28971362113952637,
      "learning_rate": 7.751230537087434e-06,
      "loss": 0.0155,
      "step": 702560
    },
    {
      "epoch": 1.1497875794531398,
      "grad_norm": 0.7581794261932373,
      "learning_rate": 7.751164644873916e-06,
      "loss": 0.0384,
      "step": 702580
    },
    {
      "epoch": 1.1498203098917932,
      "grad_norm": 0.3907998502254486,
      "learning_rate": 7.7510987526604e-06,
      "loss": 0.0348,
      "step": 702600
    },
    {
      "epoch": 1.1498530403304466,
      "grad_norm": 0.4661490321159363,
      "learning_rate": 7.751032860446883e-06,
      "loss": 0.0235,
      "step": 702620
    },
    {
      "epoch": 1.1498857707691,
      "grad_norm": 0.8882778286933899,
      "learning_rate": 7.750966968233365e-06,
      "loss": 0.0167,
      "step": 702640
    },
    {
      "epoch": 1.149918501207753,
      "grad_norm": 0.8331751227378845,
      "learning_rate": 7.750901076019848e-06,
      "loss": 0.0279,
      "step": 702660
    },
    {
      "epoch": 1.1499512316464064,
      "grad_norm": 0.37353959679603577,
      "learning_rate": 7.75083518380633e-06,
      "loss": 0.0239,
      "step": 702680
    },
    {
      "epoch": 1.1499839620850598,
      "grad_norm": 2.463592767715454,
      "learning_rate": 7.750769291592814e-06,
      "loss": 0.021,
      "step": 702700
    },
    {
      "epoch": 1.1500166925237132,
      "grad_norm": 0.3133566975593567,
      "learning_rate": 7.750703399379296e-06,
      "loss": 0.0236,
      "step": 702720
    },
    {
      "epoch": 1.1500494229623666,
      "grad_norm": 0.1191490963101387,
      "learning_rate": 7.75063750716578e-06,
      "loss": 0.0201,
      "step": 702740
    },
    {
      "epoch": 1.15008215340102,
      "grad_norm": 1.9311656951904297,
      "learning_rate": 7.750571614952261e-06,
      "loss": 0.0283,
      "step": 702760
    },
    {
      "epoch": 1.1501148838396733,
      "grad_norm": 0.4670604169368744,
      "learning_rate": 7.750505722738745e-06,
      "loss": 0.0245,
      "step": 702780
    },
    {
      "epoch": 1.1501476142783265,
      "grad_norm": 1.9036974906921387,
      "learning_rate": 7.750439830525227e-06,
      "loss": 0.0233,
      "step": 702800
    },
    {
      "epoch": 1.1501803447169798,
      "grad_norm": 0.9368300437927246,
      "learning_rate": 7.75037393831171e-06,
      "loss": 0.0213,
      "step": 702820
    },
    {
      "epoch": 1.1502130751556332,
      "grad_norm": 1.2351592779159546,
      "learning_rate": 7.750308046098192e-06,
      "loss": 0.0213,
      "step": 702840
    },
    {
      "epoch": 1.1502458055942866,
      "grad_norm": 0.5676710605621338,
      "learning_rate": 7.750242153884676e-06,
      "loss": 0.0269,
      "step": 702860
    },
    {
      "epoch": 1.15027853603294,
      "grad_norm": 0.23220041394233704,
      "learning_rate": 7.750176261671158e-06,
      "loss": 0.02,
      "step": 702880
    },
    {
      "epoch": 1.1503112664715933,
      "grad_norm": 0.34489706158638,
      "learning_rate": 7.750110369457641e-06,
      "loss": 0.0273,
      "step": 702900
    },
    {
      "epoch": 1.1503439969102467,
      "grad_norm": 0.7678821086883545,
      "learning_rate": 7.750044477244125e-06,
      "loss": 0.0247,
      "step": 702920
    },
    {
      "epoch": 1.1503767273488998,
      "grad_norm": 0.503462553024292,
      "learning_rate": 7.749978585030607e-06,
      "loss": 0.0254,
      "step": 702940
    },
    {
      "epoch": 1.1504094577875532,
      "grad_norm": 0.5386432409286499,
      "learning_rate": 7.74991269281709e-06,
      "loss": 0.0252,
      "step": 702960
    },
    {
      "epoch": 1.1504421882262066,
      "grad_norm": 2.817089319229126,
      "learning_rate": 7.749846800603574e-06,
      "loss": 0.025,
      "step": 702980
    },
    {
      "epoch": 1.15047491866486,
      "grad_norm": 0.5026518702507019,
      "learning_rate": 7.749780908390056e-06,
      "loss": 0.0181,
      "step": 703000
    },
    {
      "epoch": 1.1505076491035133,
      "grad_norm": 0.5567616820335388,
      "learning_rate": 7.74971501617654e-06,
      "loss": 0.0264,
      "step": 703020
    },
    {
      "epoch": 1.1505403795421667,
      "grad_norm": 0.3932848274707794,
      "learning_rate": 7.749649123963023e-06,
      "loss": 0.0289,
      "step": 703040
    },
    {
      "epoch": 1.15057310998082,
      "grad_norm": 1.2359912395477295,
      "learning_rate": 7.749583231749505e-06,
      "loss": 0.0246,
      "step": 703060
    },
    {
      "epoch": 1.1506058404194732,
      "grad_norm": 0.20136229693889618,
      "learning_rate": 7.749517339535988e-06,
      "loss": 0.0267,
      "step": 703080
    },
    {
      "epoch": 1.1506385708581266,
      "grad_norm": 0.3591366708278656,
      "learning_rate": 7.74945144732247e-06,
      "loss": 0.0249,
      "step": 703100
    },
    {
      "epoch": 1.15067130129678,
      "grad_norm": 2.8183186054229736,
      "learning_rate": 7.749385555108954e-06,
      "loss": 0.0215,
      "step": 703120
    },
    {
      "epoch": 1.1507040317354333,
      "grad_norm": 0.40227967500686646,
      "learning_rate": 7.749319662895436e-06,
      "loss": 0.0156,
      "step": 703140
    },
    {
      "epoch": 1.1507367621740867,
      "grad_norm": 0.30030345916748047,
      "learning_rate": 7.74925377068192e-06,
      "loss": 0.0235,
      "step": 703160
    },
    {
      "epoch": 1.15076949261274,
      "grad_norm": 0.35848915576934814,
      "learning_rate": 7.749187878468401e-06,
      "loss": 0.0327,
      "step": 703180
    },
    {
      "epoch": 1.1508022230513935,
      "grad_norm": 1.1630409955978394,
      "learning_rate": 7.749121986254885e-06,
      "loss": 0.024,
      "step": 703200
    },
    {
      "epoch": 1.1508349534900466,
      "grad_norm": 1.1665087938308716,
      "learning_rate": 7.749056094041367e-06,
      "loss": 0.0178,
      "step": 703220
    },
    {
      "epoch": 1.1508676839287,
      "grad_norm": 0.9454942941665649,
      "learning_rate": 7.74899020182785e-06,
      "loss": 0.0245,
      "step": 703240
    },
    {
      "epoch": 1.1509004143673534,
      "grad_norm": 0.9168803095817566,
      "learning_rate": 7.748924309614334e-06,
      "loss": 0.0215,
      "step": 703260
    },
    {
      "epoch": 1.1509331448060067,
      "grad_norm": 1.5562710762023926,
      "learning_rate": 7.748858417400816e-06,
      "loss": 0.024,
      "step": 703280
    },
    {
      "epoch": 1.15096587524466,
      "grad_norm": 0.5306536555290222,
      "learning_rate": 7.7487925251873e-06,
      "loss": 0.0322,
      "step": 703300
    },
    {
      "epoch": 1.1509986056833132,
      "grad_norm": 1.0758056640625,
      "learning_rate": 7.748726632973781e-06,
      "loss": 0.0356,
      "step": 703320
    },
    {
      "epoch": 1.1510313361219666,
      "grad_norm": 1.1326876878738403,
      "learning_rate": 7.748660740760265e-06,
      "loss": 0.031,
      "step": 703340
    },
    {
      "epoch": 1.15106406656062,
      "grad_norm": 0.7417502999305725,
      "learning_rate": 7.748594848546749e-06,
      "loss": 0.0342,
      "step": 703360
    },
    {
      "epoch": 1.1510967969992734,
      "grad_norm": 0.7187573313713074,
      "learning_rate": 7.74852895633323e-06,
      "loss": 0.0389,
      "step": 703380
    },
    {
      "epoch": 1.1511295274379267,
      "grad_norm": 0.811871349811554,
      "learning_rate": 7.748463064119714e-06,
      "loss": 0.0152,
      "step": 703400
    },
    {
      "epoch": 1.15116225787658,
      "grad_norm": 0.38002297282218933,
      "learning_rate": 7.748397171906198e-06,
      "loss": 0.0323,
      "step": 703420
    },
    {
      "epoch": 1.1511949883152335,
      "grad_norm": 0.6737880110740662,
      "learning_rate": 7.74833127969268e-06,
      "loss": 0.0161,
      "step": 703440
    },
    {
      "epoch": 1.1512277187538866,
      "grad_norm": 1.7418174743652344,
      "learning_rate": 7.748265387479163e-06,
      "loss": 0.0153,
      "step": 703460
    },
    {
      "epoch": 1.15126044919254,
      "grad_norm": 1.882807731628418,
      "learning_rate": 7.748199495265645e-06,
      "loss": 0.0199,
      "step": 703480
    },
    {
      "epoch": 1.1512931796311934,
      "grad_norm": 0.25986596941947937,
      "learning_rate": 7.748133603052129e-06,
      "loss": 0.0265,
      "step": 703500
    },
    {
      "epoch": 1.1513259100698467,
      "grad_norm": 1.3910659551620483,
      "learning_rate": 7.74806771083861e-06,
      "loss": 0.0359,
      "step": 703520
    },
    {
      "epoch": 1.1513586405085001,
      "grad_norm": 0.13713300228118896,
      "learning_rate": 7.748001818625094e-06,
      "loss": 0.0207,
      "step": 703540
    },
    {
      "epoch": 1.1513913709471535,
      "grad_norm": 0.08641305565834045,
      "learning_rate": 7.747935926411576e-06,
      "loss": 0.0256,
      "step": 703560
    },
    {
      "epoch": 1.1514241013858069,
      "grad_norm": 0.5058706998825073,
      "learning_rate": 7.74787003419806e-06,
      "loss": 0.0188,
      "step": 703580
    },
    {
      "epoch": 1.15145683182446,
      "grad_norm": 1.0970486402511597,
      "learning_rate": 7.747804141984541e-06,
      "loss": 0.0164,
      "step": 703600
    },
    {
      "epoch": 1.1514895622631134,
      "grad_norm": 2.2079415321350098,
      "learning_rate": 7.747738249771025e-06,
      "loss": 0.0243,
      "step": 703620
    },
    {
      "epoch": 1.1515222927017668,
      "grad_norm": 0.39014068245887756,
      "learning_rate": 7.747672357557509e-06,
      "loss": 0.0182,
      "step": 703640
    },
    {
      "epoch": 1.1515550231404201,
      "grad_norm": 1.0152196884155273,
      "learning_rate": 7.74760646534399e-06,
      "loss": 0.0213,
      "step": 703660
    },
    {
      "epoch": 1.1515877535790735,
      "grad_norm": 1.0293947458267212,
      "learning_rate": 7.747540573130474e-06,
      "loss": 0.0328,
      "step": 703680
    },
    {
      "epoch": 1.1516204840177269,
      "grad_norm": 0.9390228986740112,
      "learning_rate": 7.747474680916956e-06,
      "loss": 0.0195,
      "step": 703700
    },
    {
      "epoch": 1.1516532144563802,
      "grad_norm": 1.456900715827942,
      "learning_rate": 7.74740878870344e-06,
      "loss": 0.0261,
      "step": 703720
    },
    {
      "epoch": 1.1516859448950334,
      "grad_norm": 0.7061499357223511,
      "learning_rate": 7.747342896489921e-06,
      "loss": 0.0226,
      "step": 703740
    },
    {
      "epoch": 1.1517186753336868,
      "grad_norm": 0.6795970797538757,
      "learning_rate": 7.747277004276405e-06,
      "loss": 0.0221,
      "step": 703760
    },
    {
      "epoch": 1.1517514057723401,
      "grad_norm": 0.6312270164489746,
      "learning_rate": 7.747211112062889e-06,
      "loss": 0.0307,
      "step": 703780
    },
    {
      "epoch": 1.1517841362109935,
      "grad_norm": 0.5714516043663025,
      "learning_rate": 7.74714521984937e-06,
      "loss": 0.0171,
      "step": 703800
    },
    {
      "epoch": 1.1518168666496469,
      "grad_norm": 1.6287102699279785,
      "learning_rate": 7.747079327635854e-06,
      "loss": 0.0284,
      "step": 703820
    },
    {
      "epoch": 1.1518495970883003,
      "grad_norm": 1.7371087074279785,
      "learning_rate": 7.747013435422338e-06,
      "loss": 0.0205,
      "step": 703840
    },
    {
      "epoch": 1.1518823275269536,
      "grad_norm": 0.9601833820343018,
      "learning_rate": 7.74694754320882e-06,
      "loss": 0.0308,
      "step": 703860
    },
    {
      "epoch": 1.1519150579656068,
      "grad_norm": 0.8712958097457886,
      "learning_rate": 7.746881650995303e-06,
      "loss": 0.0176,
      "step": 703880
    },
    {
      "epoch": 1.1519477884042602,
      "grad_norm": 0.35495296120643616,
      "learning_rate": 7.746815758781785e-06,
      "loss": 0.0152,
      "step": 703900
    },
    {
      "epoch": 1.1519805188429135,
      "grad_norm": 1.009541630744934,
      "learning_rate": 7.746749866568269e-06,
      "loss": 0.0208,
      "step": 703920
    },
    {
      "epoch": 1.152013249281567,
      "grad_norm": 0.9834396839141846,
      "learning_rate": 7.74668397435475e-06,
      "loss": 0.0194,
      "step": 703940
    },
    {
      "epoch": 1.1520459797202203,
      "grad_norm": 1.54762864112854,
      "learning_rate": 7.746618082141234e-06,
      "loss": 0.0205,
      "step": 703960
    },
    {
      "epoch": 1.1520787101588736,
      "grad_norm": 1.6766486167907715,
      "learning_rate": 7.746552189927718e-06,
      "loss": 0.0213,
      "step": 703980
    },
    {
      "epoch": 1.1521114405975268,
      "grad_norm": 0.8324148654937744,
      "learning_rate": 7.7464862977142e-06,
      "loss": 0.0338,
      "step": 704000
    },
    {
      "epoch": 1.1521441710361802,
      "grad_norm": 0.5277755260467529,
      "learning_rate": 7.746420405500683e-06,
      "loss": 0.0219,
      "step": 704020
    },
    {
      "epoch": 1.1521769014748335,
      "grad_norm": 0.08486244082450867,
      "learning_rate": 7.746354513287165e-06,
      "loss": 0.018,
      "step": 704040
    },
    {
      "epoch": 1.152209631913487,
      "grad_norm": 1.0080722570419312,
      "learning_rate": 7.746288621073649e-06,
      "loss": 0.0229,
      "step": 704060
    },
    {
      "epoch": 1.1522423623521403,
      "grad_norm": 5.541345596313477,
      "learning_rate": 7.74622272886013e-06,
      "loss": 0.0241,
      "step": 704080
    },
    {
      "epoch": 1.1522750927907937,
      "grad_norm": 1.0092651844024658,
      "learning_rate": 7.746156836646614e-06,
      "loss": 0.0251,
      "step": 704100
    },
    {
      "epoch": 1.1523078232294468,
      "grad_norm": 0.05107792839407921,
      "learning_rate": 7.746090944433096e-06,
      "loss": 0.0364,
      "step": 704120
    },
    {
      "epoch": 1.1523405536681002,
      "grad_norm": 0.6254233121871948,
      "learning_rate": 7.74602505221958e-06,
      "loss": 0.0213,
      "step": 704140
    },
    {
      "epoch": 1.1523732841067535,
      "grad_norm": 0.830186128616333,
      "learning_rate": 7.745959160006063e-06,
      "loss": 0.0169,
      "step": 704160
    },
    {
      "epoch": 1.152406014545407,
      "grad_norm": 0.22948674857616425,
      "learning_rate": 7.745893267792545e-06,
      "loss": 0.0259,
      "step": 704180
    },
    {
      "epoch": 1.1524387449840603,
      "grad_norm": 0.4248617887496948,
      "learning_rate": 7.745827375579029e-06,
      "loss": 0.0295,
      "step": 704200
    },
    {
      "epoch": 1.1524714754227137,
      "grad_norm": 0.43845435976982117,
      "learning_rate": 7.745761483365512e-06,
      "loss": 0.0216,
      "step": 704220
    },
    {
      "epoch": 1.152504205861367,
      "grad_norm": 0.813356339931488,
      "learning_rate": 7.745695591151994e-06,
      "loss": 0.0321,
      "step": 704240
    },
    {
      "epoch": 1.1525369363000202,
      "grad_norm": 0.22476419806480408,
      "learning_rate": 7.745629698938478e-06,
      "loss": 0.0225,
      "step": 704260
    },
    {
      "epoch": 1.1525696667386736,
      "grad_norm": 0.5378438234329224,
      "learning_rate": 7.74556380672496e-06,
      "loss": 0.0224,
      "step": 704280
    },
    {
      "epoch": 1.152602397177327,
      "grad_norm": 1.116909146308899,
      "learning_rate": 7.745497914511443e-06,
      "loss": 0.0245,
      "step": 704300
    },
    {
      "epoch": 1.1526351276159803,
      "grad_norm": 0.8733444809913635,
      "learning_rate": 7.745432022297927e-06,
      "loss": 0.0201,
      "step": 704320
    },
    {
      "epoch": 1.1526678580546337,
      "grad_norm": 0.19838771224021912,
      "learning_rate": 7.745366130084409e-06,
      "loss": 0.0214,
      "step": 704340
    },
    {
      "epoch": 1.152700588493287,
      "grad_norm": 0.6429911851882935,
      "learning_rate": 7.745300237870892e-06,
      "loss": 0.0274,
      "step": 704360
    },
    {
      "epoch": 1.1527333189319404,
      "grad_norm": 0.6751170754432678,
      "learning_rate": 7.745234345657374e-06,
      "loss": 0.0188,
      "step": 704380
    },
    {
      "epoch": 1.1527660493705936,
      "grad_norm": 0.5381221771240234,
      "learning_rate": 7.745168453443858e-06,
      "loss": 0.0189,
      "step": 704400
    },
    {
      "epoch": 1.152798779809247,
      "grad_norm": 1.8891040086746216,
      "learning_rate": 7.74510256123034e-06,
      "loss": 0.0363,
      "step": 704420
    },
    {
      "epoch": 1.1528315102479003,
      "grad_norm": 0.30394378304481506,
      "learning_rate": 7.745036669016823e-06,
      "loss": 0.0292,
      "step": 704440
    },
    {
      "epoch": 1.1528642406865537,
      "grad_norm": 0.8650848269462585,
      "learning_rate": 7.744970776803305e-06,
      "loss": 0.0217,
      "step": 704460
    },
    {
      "epoch": 1.152896971125207,
      "grad_norm": 0.7582108378410339,
      "learning_rate": 7.744904884589789e-06,
      "loss": 0.0217,
      "step": 704480
    },
    {
      "epoch": 1.1529297015638604,
      "grad_norm": 0.7069346308708191,
      "learning_rate": 7.74483899237627e-06,
      "loss": 0.0202,
      "step": 704500
    },
    {
      "epoch": 1.1529624320025138,
      "grad_norm": 0.3925473690032959,
      "learning_rate": 7.744773100162754e-06,
      "loss": 0.0167,
      "step": 704520
    },
    {
      "epoch": 1.152995162441167,
      "grad_norm": 0.43999406695365906,
      "learning_rate": 7.744707207949236e-06,
      "loss": 0.0301,
      "step": 704540
    },
    {
      "epoch": 1.1530278928798203,
      "grad_norm": 1.1801660060882568,
      "learning_rate": 7.74464131573572e-06,
      "loss": 0.023,
      "step": 704560
    },
    {
      "epoch": 1.1530606233184737,
      "grad_norm": 0.867300808429718,
      "learning_rate": 7.744575423522203e-06,
      "loss": 0.019,
      "step": 704580
    },
    {
      "epoch": 1.153093353757127,
      "grad_norm": 0.25004100799560547,
      "learning_rate": 7.744509531308685e-06,
      "loss": 0.0231,
      "step": 704600
    },
    {
      "epoch": 1.1531260841957804,
      "grad_norm": 0.8355657458305359,
      "learning_rate": 7.744443639095169e-06,
      "loss": 0.026,
      "step": 704620
    },
    {
      "epoch": 1.1531588146344338,
      "grad_norm": 0.21541063487529755,
      "learning_rate": 7.744377746881652e-06,
      "loss": 0.0198,
      "step": 704640
    },
    {
      "epoch": 1.1531915450730872,
      "grad_norm": 1.127854824066162,
      "learning_rate": 7.744311854668134e-06,
      "loss": 0.0159,
      "step": 704660
    },
    {
      "epoch": 1.1532242755117403,
      "grad_norm": 0.4607522487640381,
      "learning_rate": 7.744245962454618e-06,
      "loss": 0.0299,
      "step": 704680
    },
    {
      "epoch": 1.1532570059503937,
      "grad_norm": 1.2292611598968506,
      "learning_rate": 7.744180070241101e-06,
      "loss": 0.018,
      "step": 704700
    },
    {
      "epoch": 1.153289736389047,
      "grad_norm": 0.8671301603317261,
      "learning_rate": 7.744114178027583e-06,
      "loss": 0.019,
      "step": 704720
    },
    {
      "epoch": 1.1533224668277005,
      "grad_norm": 1.3572927713394165,
      "learning_rate": 7.744048285814067e-06,
      "loss": 0.0285,
      "step": 704740
    },
    {
      "epoch": 1.1533551972663538,
      "grad_norm": 0.42001187801361084,
      "learning_rate": 7.743982393600549e-06,
      "loss": 0.0212,
      "step": 704760
    },
    {
      "epoch": 1.1533879277050072,
      "grad_norm": 0.5296907424926758,
      "learning_rate": 7.743916501387032e-06,
      "loss": 0.0256,
      "step": 704780
    },
    {
      "epoch": 1.1534206581436603,
      "grad_norm": 2.4430344104766846,
      "learning_rate": 7.743850609173514e-06,
      "loss": 0.0219,
      "step": 704800
    },
    {
      "epoch": 1.1534533885823137,
      "grad_norm": 0.11535676568746567,
      "learning_rate": 7.743784716959998e-06,
      "loss": 0.0123,
      "step": 704820
    },
    {
      "epoch": 1.153486119020967,
      "grad_norm": 0.8684215545654297,
      "learning_rate": 7.74371882474648e-06,
      "loss": 0.0209,
      "step": 704840
    },
    {
      "epoch": 1.1535188494596205,
      "grad_norm": 1.5861845016479492,
      "learning_rate": 7.743652932532963e-06,
      "loss": 0.0295,
      "step": 704860
    },
    {
      "epoch": 1.1535515798982738,
      "grad_norm": 0.4318496882915497,
      "learning_rate": 7.743587040319445e-06,
      "loss": 0.021,
      "step": 704880
    },
    {
      "epoch": 1.1535843103369272,
      "grad_norm": 0.5112953186035156,
      "learning_rate": 7.743521148105929e-06,
      "loss": 0.0295,
      "step": 704900
    },
    {
      "epoch": 1.1536170407755804,
      "grad_norm": 0.4966457784175873,
      "learning_rate": 7.74345525589241e-06,
      "loss": 0.0203,
      "step": 704920
    },
    {
      "epoch": 1.1536497712142337,
      "grad_norm": 0.6716552972793579,
      "learning_rate": 7.743389363678894e-06,
      "loss": 0.0157,
      "step": 704940
    },
    {
      "epoch": 1.153682501652887,
      "grad_norm": 0.5825624465942383,
      "learning_rate": 7.743323471465378e-06,
      "loss": 0.023,
      "step": 704960
    },
    {
      "epoch": 1.1537152320915405,
      "grad_norm": 1.1184508800506592,
      "learning_rate": 7.74325757925186e-06,
      "loss": 0.0245,
      "step": 704980
    },
    {
      "epoch": 1.1537479625301938,
      "grad_norm": 0.6325873732566833,
      "learning_rate": 7.743191687038343e-06,
      "loss": 0.0207,
      "step": 705000
    },
    {
      "epoch": 1.1537806929688472,
      "grad_norm": 1.1129854917526245,
      "learning_rate": 7.743125794824827e-06,
      "loss": 0.0242,
      "step": 705020
    },
    {
      "epoch": 1.1538134234075006,
      "grad_norm": 0.7172091007232666,
      "learning_rate": 7.743059902611309e-06,
      "loss": 0.0229,
      "step": 705040
    },
    {
      "epoch": 1.1538461538461537,
      "grad_norm": 0.5680999159812927,
      "learning_rate": 7.742994010397792e-06,
      "loss": 0.0344,
      "step": 705060
    },
    {
      "epoch": 1.1538788842848071,
      "grad_norm": 0.5562769770622253,
      "learning_rate": 7.742928118184276e-06,
      "loss": 0.0277,
      "step": 705080
    },
    {
      "epoch": 1.1539116147234605,
      "grad_norm": 0.6592569947242737,
      "learning_rate": 7.742862225970758e-06,
      "loss": 0.0285,
      "step": 705100
    },
    {
      "epoch": 1.1539443451621139,
      "grad_norm": 0.9006229043006897,
      "learning_rate": 7.742796333757241e-06,
      "loss": 0.0236,
      "step": 705120
    },
    {
      "epoch": 1.1539770756007672,
      "grad_norm": 2.8677985668182373,
      "learning_rate": 7.742730441543723e-06,
      "loss": 0.0256,
      "step": 705140
    },
    {
      "epoch": 1.1540098060394206,
      "grad_norm": 0.4199291169643402,
      "learning_rate": 7.742664549330207e-06,
      "loss": 0.0222,
      "step": 705160
    },
    {
      "epoch": 1.154042536478074,
      "grad_norm": 0.7710492610931396,
      "learning_rate": 7.742598657116689e-06,
      "loss": 0.024,
      "step": 705180
    },
    {
      "epoch": 1.1540752669167271,
      "grad_norm": 0.30568480491638184,
      "learning_rate": 7.742532764903172e-06,
      "loss": 0.028,
      "step": 705200
    },
    {
      "epoch": 1.1541079973553805,
      "grad_norm": 0.5485115051269531,
      "learning_rate": 7.742466872689654e-06,
      "loss": 0.024,
      "step": 705220
    },
    {
      "epoch": 1.1541407277940339,
      "grad_norm": 1.043545126914978,
      "learning_rate": 7.742400980476138e-06,
      "loss": 0.0205,
      "step": 705240
    },
    {
      "epoch": 1.1541734582326872,
      "grad_norm": 0.3888247311115265,
      "learning_rate": 7.74233508826262e-06,
      "loss": 0.0209,
      "step": 705260
    },
    {
      "epoch": 1.1542061886713406,
      "grad_norm": 0.9863126277923584,
      "learning_rate": 7.742269196049103e-06,
      "loss": 0.0183,
      "step": 705280
    },
    {
      "epoch": 1.154238919109994,
      "grad_norm": 0.473373144865036,
      "learning_rate": 7.742203303835585e-06,
      "loss": 0.0203,
      "step": 705300
    },
    {
      "epoch": 1.1542716495486474,
      "grad_norm": 0.6463199853897095,
      "learning_rate": 7.742137411622069e-06,
      "loss": 0.0209,
      "step": 705320
    },
    {
      "epoch": 1.1543043799873005,
      "grad_norm": 0.162577822804451,
      "learning_rate": 7.742071519408552e-06,
      "loss": 0.0151,
      "step": 705340
    },
    {
      "epoch": 1.1543371104259539,
      "grad_norm": 0.24485483765602112,
      "learning_rate": 7.742005627195034e-06,
      "loss": 0.0192,
      "step": 705360
    },
    {
      "epoch": 1.1543698408646073,
      "grad_norm": 1.3093781471252441,
      "learning_rate": 7.741939734981518e-06,
      "loss": 0.0194,
      "step": 705380
    },
    {
      "epoch": 1.1544025713032606,
      "grad_norm": 0.5250375866889954,
      "learning_rate": 7.741873842768002e-06,
      "loss": 0.0314,
      "step": 705400
    },
    {
      "epoch": 1.154435301741914,
      "grad_norm": 0.6946353912353516,
      "learning_rate": 7.741807950554483e-06,
      "loss": 0.0287,
      "step": 705420
    },
    {
      "epoch": 1.1544680321805674,
      "grad_norm": 0.48791685700416565,
      "learning_rate": 7.741742058340967e-06,
      "loss": 0.0209,
      "step": 705440
    },
    {
      "epoch": 1.1545007626192207,
      "grad_norm": 0.27053335309028625,
      "learning_rate": 7.74167616612745e-06,
      "loss": 0.0206,
      "step": 705460
    },
    {
      "epoch": 1.154533493057874,
      "grad_norm": 1.9967868328094482,
      "learning_rate": 7.741610273913932e-06,
      "loss": 0.0296,
      "step": 705480
    },
    {
      "epoch": 1.1545662234965273,
      "grad_norm": 0.16742530465126038,
      "learning_rate": 7.741544381700416e-06,
      "loss": 0.0201,
      "step": 705500
    },
    {
      "epoch": 1.1545989539351806,
      "grad_norm": 1.2246205806732178,
      "learning_rate": 7.741478489486898e-06,
      "loss": 0.0215,
      "step": 705520
    },
    {
      "epoch": 1.154631684373834,
      "grad_norm": 0.4013155996799469,
      "learning_rate": 7.741412597273382e-06,
      "loss": 0.0207,
      "step": 705540
    },
    {
      "epoch": 1.1546644148124874,
      "grad_norm": 0.8532845973968506,
      "learning_rate": 7.741346705059863e-06,
      "loss": 0.0186,
      "step": 705560
    },
    {
      "epoch": 1.1546971452511405,
      "grad_norm": 1.5059809684753418,
      "learning_rate": 7.741280812846347e-06,
      "loss": 0.0221,
      "step": 705580
    },
    {
      "epoch": 1.154729875689794,
      "grad_norm": 0.1348564773797989,
      "learning_rate": 7.741214920632829e-06,
      "loss": 0.0224,
      "step": 705600
    },
    {
      "epoch": 1.1547626061284473,
      "grad_norm": 0.9087123274803162,
      "learning_rate": 7.741149028419313e-06,
      "loss": 0.02,
      "step": 705620
    },
    {
      "epoch": 1.1547953365671006,
      "grad_norm": 1.108498454093933,
      "learning_rate": 7.741083136205794e-06,
      "loss": 0.0245,
      "step": 705640
    },
    {
      "epoch": 1.154828067005754,
      "grad_norm": 1.5270733833312988,
      "learning_rate": 7.741017243992278e-06,
      "loss": 0.0221,
      "step": 705660
    },
    {
      "epoch": 1.1548607974444074,
      "grad_norm": 0.21349069476127625,
      "learning_rate": 7.74095135177876e-06,
      "loss": 0.0244,
      "step": 705680
    },
    {
      "epoch": 1.1548935278830608,
      "grad_norm": 0.6063013076782227,
      "learning_rate": 7.740885459565243e-06,
      "loss": 0.0195,
      "step": 705700
    },
    {
      "epoch": 1.154926258321714,
      "grad_norm": 0.6540190577507019,
      "learning_rate": 7.740819567351725e-06,
      "loss": 0.0225,
      "step": 705720
    },
    {
      "epoch": 1.1549589887603673,
      "grad_norm": 3.0958821773529053,
      "learning_rate": 7.740753675138209e-06,
      "loss": 0.0245,
      "step": 705740
    },
    {
      "epoch": 1.1549917191990207,
      "grad_norm": 0.32545140385627747,
      "learning_rate": 7.740687782924693e-06,
      "loss": 0.0198,
      "step": 705760
    },
    {
      "epoch": 1.155024449637674,
      "grad_norm": 0.3765709102153778,
      "learning_rate": 7.740621890711174e-06,
      "loss": 0.0205,
      "step": 705780
    },
    {
      "epoch": 1.1550571800763274,
      "grad_norm": 0.5556948184967041,
      "learning_rate": 7.740555998497658e-06,
      "loss": 0.02,
      "step": 705800
    },
    {
      "epoch": 1.1550899105149808,
      "grad_norm": 3.3442940711975098,
      "learning_rate": 7.740490106284142e-06,
      "loss": 0.0228,
      "step": 705820
    },
    {
      "epoch": 1.1551226409536341,
      "grad_norm": 0.7045186161994934,
      "learning_rate": 7.740424214070623e-06,
      "loss": 0.0296,
      "step": 705840
    },
    {
      "epoch": 1.1551553713922873,
      "grad_norm": 0.03718835115432739,
      "learning_rate": 7.740358321857107e-06,
      "loss": 0.0196,
      "step": 705860
    },
    {
      "epoch": 1.1551881018309407,
      "grad_norm": 0.8667373061180115,
      "learning_rate": 7.74029242964359e-06,
      "loss": 0.0256,
      "step": 705880
    },
    {
      "epoch": 1.155220832269594,
      "grad_norm": 1.226969838142395,
      "learning_rate": 7.740226537430073e-06,
      "loss": 0.0271,
      "step": 705900
    },
    {
      "epoch": 1.1552535627082474,
      "grad_norm": 0.6039047241210938,
      "learning_rate": 7.740160645216556e-06,
      "loss": 0.0234,
      "step": 705920
    },
    {
      "epoch": 1.1552862931469008,
      "grad_norm": 1.0658835172653198,
      "learning_rate": 7.740094753003038e-06,
      "loss": 0.0356,
      "step": 705940
    },
    {
      "epoch": 1.1553190235855542,
      "grad_norm": 0.195645272731781,
      "learning_rate": 7.740028860789522e-06,
      "loss": 0.027,
      "step": 705960
    },
    {
      "epoch": 1.1553517540242075,
      "grad_norm": 0.5232712626457214,
      "learning_rate": 7.739962968576004e-06,
      "loss": 0.0261,
      "step": 705980
    },
    {
      "epoch": 1.1553844844628607,
      "grad_norm": 0.5195175409317017,
      "learning_rate": 7.739897076362487e-06,
      "loss": 0.0219,
      "step": 706000
    },
    {
      "epoch": 1.155417214901514,
      "grad_norm": 0.21661867201328278,
      "learning_rate": 7.739831184148969e-06,
      "loss": 0.0231,
      "step": 706020
    },
    {
      "epoch": 1.1554499453401674,
      "grad_norm": 0.2836569547653198,
      "learning_rate": 7.739765291935453e-06,
      "loss": 0.0246,
      "step": 706040
    },
    {
      "epoch": 1.1554826757788208,
      "grad_norm": 0.7440897226333618,
      "learning_rate": 7.739699399721934e-06,
      "loss": 0.013,
      "step": 706060
    },
    {
      "epoch": 1.1555154062174742,
      "grad_norm": 0.8532028794288635,
      "learning_rate": 7.739633507508418e-06,
      "loss": 0.0279,
      "step": 706080
    },
    {
      "epoch": 1.1555481366561275,
      "grad_norm": 0.5683332085609436,
      "learning_rate": 7.739567615294902e-06,
      "loss": 0.0226,
      "step": 706100
    },
    {
      "epoch": 1.155580867094781,
      "grad_norm": 0.14028219878673553,
      "learning_rate": 7.739501723081384e-06,
      "loss": 0.0272,
      "step": 706120
    },
    {
      "epoch": 1.155613597533434,
      "grad_norm": 0.25338563323020935,
      "learning_rate": 7.739435830867867e-06,
      "loss": 0.0209,
      "step": 706140
    },
    {
      "epoch": 1.1556463279720874,
      "grad_norm": 1.0198783874511719,
      "learning_rate": 7.739369938654349e-06,
      "loss": 0.0259,
      "step": 706160
    },
    {
      "epoch": 1.1556790584107408,
      "grad_norm": 0.15037858486175537,
      "learning_rate": 7.739304046440833e-06,
      "loss": 0.0149,
      "step": 706180
    },
    {
      "epoch": 1.1557117888493942,
      "grad_norm": 0.6745322346687317,
      "learning_rate": 7.739238154227316e-06,
      "loss": 0.0201,
      "step": 706200
    },
    {
      "epoch": 1.1557445192880476,
      "grad_norm": 2.9831154346466064,
      "learning_rate": 7.739172262013798e-06,
      "loss": 0.0265,
      "step": 706220
    },
    {
      "epoch": 1.155777249726701,
      "grad_norm": 0.9823232889175415,
      "learning_rate": 7.739106369800282e-06,
      "loss": 0.0168,
      "step": 706240
    },
    {
      "epoch": 1.1558099801653543,
      "grad_norm": 1.6158034801483154,
      "learning_rate": 7.739040477586765e-06,
      "loss": 0.0255,
      "step": 706260
    },
    {
      "epoch": 1.1558427106040075,
      "grad_norm": 0.41091227531433105,
      "learning_rate": 7.738974585373247e-06,
      "loss": 0.0202,
      "step": 706280
    },
    {
      "epoch": 1.1558754410426608,
      "grad_norm": 1.2804418802261353,
      "learning_rate": 7.73890869315973e-06,
      "loss": 0.0318,
      "step": 706300
    },
    {
      "epoch": 1.1559081714813142,
      "grad_norm": 0.5632696151733398,
      "learning_rate": 7.738842800946213e-06,
      "loss": 0.028,
      "step": 706320
    },
    {
      "epoch": 1.1559409019199676,
      "grad_norm": 0.7202029824256897,
      "learning_rate": 7.738776908732696e-06,
      "loss": 0.021,
      "step": 706340
    },
    {
      "epoch": 1.155973632358621,
      "grad_norm": 3.6227056980133057,
      "learning_rate": 7.738711016519178e-06,
      "loss": 0.0282,
      "step": 706360
    },
    {
      "epoch": 1.156006362797274,
      "grad_norm": 0.6192127466201782,
      "learning_rate": 7.738645124305662e-06,
      "loss": 0.0266,
      "step": 706380
    },
    {
      "epoch": 1.1560390932359275,
      "grad_norm": 0.7907826900482178,
      "learning_rate": 7.738579232092144e-06,
      "loss": 0.0284,
      "step": 706400
    },
    {
      "epoch": 1.1560718236745808,
      "grad_norm": 0.20124419033527374,
      "learning_rate": 7.738513339878627e-06,
      "loss": 0.0215,
      "step": 706420
    },
    {
      "epoch": 1.1561045541132342,
      "grad_norm": 0.5437428951263428,
      "learning_rate": 7.73844744766511e-06,
      "loss": 0.0202,
      "step": 706440
    },
    {
      "epoch": 1.1561372845518876,
      "grad_norm": 0.48327967524528503,
      "learning_rate": 7.738381555451593e-06,
      "loss": 0.0242,
      "step": 706460
    },
    {
      "epoch": 1.156170014990541,
      "grad_norm": 1.0248616933822632,
      "learning_rate": 7.738315663238076e-06,
      "loss": 0.0239,
      "step": 706480
    },
    {
      "epoch": 1.1562027454291943,
      "grad_norm": 0.43529921770095825,
      "learning_rate": 7.738249771024558e-06,
      "loss": 0.0218,
      "step": 706500
    },
    {
      "epoch": 1.1562354758678475,
      "grad_norm": 0.2659355103969574,
      "learning_rate": 7.738183878811042e-06,
      "loss": 0.028,
      "step": 706520
    },
    {
      "epoch": 1.1562682063065008,
      "grad_norm": 0.3047657608985901,
      "learning_rate": 7.738117986597524e-06,
      "loss": 0.0171,
      "step": 706540
    },
    {
      "epoch": 1.1563009367451542,
      "grad_norm": 0.39824220538139343,
      "learning_rate": 7.738052094384007e-06,
      "loss": 0.0171,
      "step": 706560
    },
    {
      "epoch": 1.1563336671838076,
      "grad_norm": 0.16512523591518402,
      "learning_rate": 7.737986202170489e-06,
      "loss": 0.0313,
      "step": 706580
    },
    {
      "epoch": 1.156366397622461,
      "grad_norm": 1.1622240543365479,
      "learning_rate": 7.737920309956973e-06,
      "loss": 0.0157,
      "step": 706600
    },
    {
      "epoch": 1.1563991280611143,
      "grad_norm": 0.8079947829246521,
      "learning_rate": 7.737854417743456e-06,
      "loss": 0.0207,
      "step": 706620
    },
    {
      "epoch": 1.1564318584997677,
      "grad_norm": 1.4284987449645996,
      "learning_rate": 7.737788525529938e-06,
      "loss": 0.0262,
      "step": 706640
    },
    {
      "epoch": 1.1564645889384209,
      "grad_norm": 0.7694624662399292,
      "learning_rate": 7.737722633316422e-06,
      "loss": 0.0293,
      "step": 706660
    },
    {
      "epoch": 1.1564973193770742,
      "grad_norm": 1.1085988283157349,
      "learning_rate": 7.737656741102905e-06,
      "loss": 0.0206,
      "step": 706680
    },
    {
      "epoch": 1.1565300498157276,
      "grad_norm": 1.9449377059936523,
      "learning_rate": 7.737590848889387e-06,
      "loss": 0.0278,
      "step": 706700
    },
    {
      "epoch": 1.156562780254381,
      "grad_norm": 0.9064176678657532,
      "learning_rate": 7.73752495667587e-06,
      "loss": 0.0177,
      "step": 706720
    },
    {
      "epoch": 1.1565955106930343,
      "grad_norm": 0.22733370959758759,
      "learning_rate": 7.737459064462353e-06,
      "loss": 0.0139,
      "step": 706740
    },
    {
      "epoch": 1.1566282411316877,
      "grad_norm": 0.5647625923156738,
      "learning_rate": 7.737393172248836e-06,
      "loss": 0.0291,
      "step": 706760
    },
    {
      "epoch": 1.156660971570341,
      "grad_norm": 0.7487410306930542,
      "learning_rate": 7.73732728003532e-06,
      "loss": 0.0244,
      "step": 706780
    },
    {
      "epoch": 1.1566937020089942,
      "grad_norm": 0.16530704498291016,
      "learning_rate": 7.737261387821802e-06,
      "loss": 0.0171,
      "step": 706800
    },
    {
      "epoch": 1.1567264324476476,
      "grad_norm": 0.5359938144683838,
      "learning_rate": 7.737195495608285e-06,
      "loss": 0.018,
      "step": 706820
    },
    {
      "epoch": 1.156759162886301,
      "grad_norm": 0.9929589629173279,
      "learning_rate": 7.737129603394767e-06,
      "loss": 0.0164,
      "step": 706840
    },
    {
      "epoch": 1.1567918933249544,
      "grad_norm": 1.337769865989685,
      "learning_rate": 7.73706371118125e-06,
      "loss": 0.0322,
      "step": 706860
    },
    {
      "epoch": 1.1568246237636077,
      "grad_norm": 0.5475215911865234,
      "learning_rate": 7.736997818967733e-06,
      "loss": 0.0254,
      "step": 706880
    },
    {
      "epoch": 1.156857354202261,
      "grad_norm": 0.520457923412323,
      "learning_rate": 7.736931926754216e-06,
      "loss": 0.0257,
      "step": 706900
    },
    {
      "epoch": 1.1568900846409145,
      "grad_norm": 0.3770484924316406,
      "learning_rate": 7.736866034540698e-06,
      "loss": 0.0272,
      "step": 706920
    },
    {
      "epoch": 1.1569228150795676,
      "grad_norm": 0.8818995952606201,
      "learning_rate": 7.736800142327182e-06,
      "loss": 0.0269,
      "step": 706940
    },
    {
      "epoch": 1.156955545518221,
      "grad_norm": 0.44211313128471375,
      "learning_rate": 7.736734250113664e-06,
      "loss": 0.027,
      "step": 706960
    },
    {
      "epoch": 1.1569882759568744,
      "grad_norm": 0.3731028437614441,
      "learning_rate": 7.736668357900147e-06,
      "loss": 0.0207,
      "step": 706980
    },
    {
      "epoch": 1.1570210063955277,
      "grad_norm": 0.4342561364173889,
      "learning_rate": 7.736602465686631e-06,
      "loss": 0.019,
      "step": 707000
    },
    {
      "epoch": 1.1570537368341811,
      "grad_norm": 0.6984848976135254,
      "learning_rate": 7.736536573473113e-06,
      "loss": 0.0266,
      "step": 707020
    },
    {
      "epoch": 1.1570864672728345,
      "grad_norm": 0.28560885787010193,
      "learning_rate": 7.736470681259596e-06,
      "loss": 0.0223,
      "step": 707040
    },
    {
      "epoch": 1.1571191977114876,
      "grad_norm": 0.24742597341537476,
      "learning_rate": 7.73640478904608e-06,
      "loss": 0.0233,
      "step": 707060
    },
    {
      "epoch": 1.157151928150141,
      "grad_norm": 0.6523686647415161,
      "learning_rate": 7.736338896832562e-06,
      "loss": 0.0201,
      "step": 707080
    },
    {
      "epoch": 1.1571846585887944,
      "grad_norm": 1.6668773889541626,
      "learning_rate": 7.736273004619045e-06,
      "loss": 0.0355,
      "step": 707100
    },
    {
      "epoch": 1.1572173890274478,
      "grad_norm": 0.5024721622467041,
      "learning_rate": 7.736207112405527e-06,
      "loss": 0.0297,
      "step": 707120
    },
    {
      "epoch": 1.1572501194661011,
      "grad_norm": 0.34020277857780457,
      "learning_rate": 7.736141220192011e-06,
      "loss": 0.0228,
      "step": 707140
    },
    {
      "epoch": 1.1572828499047545,
      "grad_norm": 0.3063001036643982,
      "learning_rate": 7.736075327978494e-06,
      "loss": 0.0219,
      "step": 707160
    },
    {
      "epoch": 1.1573155803434076,
      "grad_norm": 0.31974175572395325,
      "learning_rate": 7.736009435764976e-06,
      "loss": 0.0218,
      "step": 707180
    },
    {
      "epoch": 1.157348310782061,
      "grad_norm": 0.505059003829956,
      "learning_rate": 7.73594354355146e-06,
      "loss": 0.0257,
      "step": 707200
    },
    {
      "epoch": 1.1573810412207144,
      "grad_norm": 0.8533353209495544,
      "learning_rate": 7.735877651337942e-06,
      "loss": 0.0239,
      "step": 707220
    },
    {
      "epoch": 1.1574137716593678,
      "grad_norm": 2.1053435802459717,
      "learning_rate": 7.735811759124425e-06,
      "loss": 0.029,
      "step": 707240
    },
    {
      "epoch": 1.1574465020980211,
      "grad_norm": 0.7178959250450134,
      "learning_rate": 7.735745866910907e-06,
      "loss": 0.03,
      "step": 707260
    },
    {
      "epoch": 1.1574792325366745,
      "grad_norm": 0.4537127614021301,
      "learning_rate": 7.735679974697391e-06,
      "loss": 0.0208,
      "step": 707280
    },
    {
      "epoch": 1.1575119629753279,
      "grad_norm": 0.6896468997001648,
      "learning_rate": 7.735614082483873e-06,
      "loss": 0.0231,
      "step": 707300
    },
    {
      "epoch": 1.157544693413981,
      "grad_norm": 0.8228941559791565,
      "learning_rate": 7.735548190270356e-06,
      "loss": 0.0254,
      "step": 707320
    },
    {
      "epoch": 1.1575774238526344,
      "grad_norm": 0.710759162902832,
      "learning_rate": 7.735482298056838e-06,
      "loss": 0.029,
      "step": 707340
    },
    {
      "epoch": 1.1576101542912878,
      "grad_norm": 0.45238080620765686,
      "learning_rate": 7.735416405843322e-06,
      "loss": 0.0237,
      "step": 707360
    },
    {
      "epoch": 1.1576428847299411,
      "grad_norm": 0.6153894662857056,
      "learning_rate": 7.735350513629804e-06,
      "loss": 0.0268,
      "step": 707380
    },
    {
      "epoch": 1.1576756151685945,
      "grad_norm": 1.329556941986084,
      "learning_rate": 7.735284621416287e-06,
      "loss": 0.027,
      "step": 707400
    },
    {
      "epoch": 1.157708345607248,
      "grad_norm": 0.43266624212265015,
      "learning_rate": 7.735218729202771e-06,
      "loss": 0.025,
      "step": 707420
    },
    {
      "epoch": 1.1577410760459013,
      "grad_norm": 1.3186382055282593,
      "learning_rate": 7.735152836989255e-06,
      "loss": 0.02,
      "step": 707440
    },
    {
      "epoch": 1.1577738064845544,
      "grad_norm": 0.7809138298034668,
      "learning_rate": 7.735086944775736e-06,
      "loss": 0.0292,
      "step": 707460
    },
    {
      "epoch": 1.1578065369232078,
      "grad_norm": 0.9868241548538208,
      "learning_rate": 7.73502105256222e-06,
      "loss": 0.0274,
      "step": 707480
    },
    {
      "epoch": 1.1578392673618612,
      "grad_norm": 0.4138650894165039,
      "learning_rate": 7.734955160348704e-06,
      "loss": 0.0266,
      "step": 707500
    },
    {
      "epoch": 1.1578719978005145,
      "grad_norm": 0.18907122313976288,
      "learning_rate": 7.734889268135185e-06,
      "loss": 0.0374,
      "step": 707520
    },
    {
      "epoch": 1.157904728239168,
      "grad_norm": 0.2935134768486023,
      "learning_rate": 7.734823375921669e-06,
      "loss": 0.0252,
      "step": 707540
    },
    {
      "epoch": 1.1579374586778213,
      "grad_norm": 0.5371162295341492,
      "learning_rate": 7.734757483708151e-06,
      "loss": 0.0266,
      "step": 707560
    },
    {
      "epoch": 1.1579701891164746,
      "grad_norm": 1.1731752157211304,
      "learning_rate": 7.734691591494635e-06,
      "loss": 0.0224,
      "step": 707580
    },
    {
      "epoch": 1.1580029195551278,
      "grad_norm": 1.0234813690185547,
      "learning_rate": 7.734625699281116e-06,
      "loss": 0.0257,
      "step": 707600
    },
    {
      "epoch": 1.1580356499937812,
      "grad_norm": 2.007068634033203,
      "learning_rate": 7.7345598070676e-06,
      "loss": 0.0298,
      "step": 707620
    },
    {
      "epoch": 1.1580683804324345,
      "grad_norm": 0.44528284668922424,
      "learning_rate": 7.734493914854082e-06,
      "loss": 0.0239,
      "step": 707640
    },
    {
      "epoch": 1.158101110871088,
      "grad_norm": 0.35953715443611145,
      "learning_rate": 7.734428022640566e-06,
      "loss": 0.019,
      "step": 707660
    },
    {
      "epoch": 1.1581338413097413,
      "grad_norm": 1.775087833404541,
      "learning_rate": 7.734362130427047e-06,
      "loss": 0.0315,
      "step": 707680
    },
    {
      "epoch": 1.1581665717483947,
      "grad_norm": 0.5334793925285339,
      "learning_rate": 7.734296238213531e-06,
      "loss": 0.0249,
      "step": 707700
    },
    {
      "epoch": 1.158199302187048,
      "grad_norm": 1.8433820009231567,
      "learning_rate": 7.734230346000013e-06,
      "loss": 0.0278,
      "step": 707720
    },
    {
      "epoch": 1.1582320326257012,
      "grad_norm": 0.8890863656997681,
      "learning_rate": 7.734164453786496e-06,
      "loss": 0.0218,
      "step": 707740
    },
    {
      "epoch": 1.1582647630643546,
      "grad_norm": 1.448843002319336,
      "learning_rate": 7.734098561572978e-06,
      "loss": 0.0308,
      "step": 707760
    },
    {
      "epoch": 1.158297493503008,
      "grad_norm": 1.0246226787567139,
      "learning_rate": 7.734032669359462e-06,
      "loss": 0.0273,
      "step": 707780
    },
    {
      "epoch": 1.1583302239416613,
      "grad_norm": 0.5472254157066345,
      "learning_rate": 7.733966777145946e-06,
      "loss": 0.0251,
      "step": 707800
    },
    {
      "epoch": 1.1583629543803147,
      "grad_norm": 1.1821695566177368,
      "learning_rate": 7.733900884932427e-06,
      "loss": 0.019,
      "step": 707820
    },
    {
      "epoch": 1.1583956848189678,
      "grad_norm": 1.5737717151641846,
      "learning_rate": 7.733834992718911e-06,
      "loss": 0.0276,
      "step": 707840
    },
    {
      "epoch": 1.1584284152576212,
      "grad_norm": 0.9094067811965942,
      "learning_rate": 7.733769100505395e-06,
      "loss": 0.032,
      "step": 707860
    },
    {
      "epoch": 1.1584611456962746,
      "grad_norm": 0.979681670665741,
      "learning_rate": 7.733703208291876e-06,
      "loss": 0.0284,
      "step": 707880
    },
    {
      "epoch": 1.158493876134928,
      "grad_norm": 1.5219765901565552,
      "learning_rate": 7.73363731607836e-06,
      "loss": 0.0257,
      "step": 707900
    },
    {
      "epoch": 1.1585266065735813,
      "grad_norm": 1.0929368734359741,
      "learning_rate": 7.733571423864844e-06,
      "loss": 0.03,
      "step": 707920
    },
    {
      "epoch": 1.1585593370122347,
      "grad_norm": 0.6163389682769775,
      "learning_rate": 7.733505531651326e-06,
      "loss": 0.0247,
      "step": 707940
    },
    {
      "epoch": 1.158592067450888,
      "grad_norm": 0.6150147318840027,
      "learning_rate": 7.733439639437809e-06,
      "loss": 0.0267,
      "step": 707960
    },
    {
      "epoch": 1.1586247978895412,
      "grad_norm": 0.8885523676872253,
      "learning_rate": 7.733373747224291e-06,
      "loss": 0.029,
      "step": 707980
    },
    {
      "epoch": 1.1586575283281946,
      "grad_norm": 1.142970085144043,
      "learning_rate": 7.733307855010775e-06,
      "loss": 0.0282,
      "step": 708000
    },
    {
      "epoch": 1.158690258766848,
      "grad_norm": 0.991470217704773,
      "learning_rate": 7.733241962797257e-06,
      "loss": 0.0241,
      "step": 708020
    },
    {
      "epoch": 1.1587229892055013,
      "grad_norm": 0.22452986240386963,
      "learning_rate": 7.73317607058374e-06,
      "loss": 0.0235,
      "step": 708040
    },
    {
      "epoch": 1.1587557196441547,
      "grad_norm": 0.21778543293476105,
      "learning_rate": 7.733110178370222e-06,
      "loss": 0.0241,
      "step": 708060
    },
    {
      "epoch": 1.158788450082808,
      "grad_norm": 1.611314058303833,
      "learning_rate": 7.733044286156706e-06,
      "loss": 0.0223,
      "step": 708080
    },
    {
      "epoch": 1.1588211805214614,
      "grad_norm": 2.16990327835083,
      "learning_rate": 7.732978393943187e-06,
      "loss": 0.0149,
      "step": 708100
    },
    {
      "epoch": 1.1588539109601146,
      "grad_norm": 0.32441648840904236,
      "learning_rate": 7.732912501729671e-06,
      "loss": 0.0278,
      "step": 708120
    },
    {
      "epoch": 1.158886641398768,
      "grad_norm": 0.2556666433811188,
      "learning_rate": 7.732846609516153e-06,
      "loss": 0.0246,
      "step": 708140
    },
    {
      "epoch": 1.1589193718374213,
      "grad_norm": 0.8112775683403015,
      "learning_rate": 7.732780717302637e-06,
      "loss": 0.0341,
      "step": 708160
    },
    {
      "epoch": 1.1589521022760747,
      "grad_norm": 1.675764799118042,
      "learning_rate": 7.73271482508912e-06,
      "loss": 0.0265,
      "step": 708180
    },
    {
      "epoch": 1.158984832714728,
      "grad_norm": 0.6597199440002441,
      "learning_rate": 7.732648932875602e-06,
      "loss": 0.0232,
      "step": 708200
    },
    {
      "epoch": 1.1590175631533814,
      "grad_norm": 0.4774182140827179,
      "learning_rate": 7.732583040662086e-06,
      "loss": 0.0199,
      "step": 708220
    },
    {
      "epoch": 1.1590502935920348,
      "grad_norm": 0.38956567645072937,
      "learning_rate": 7.73251714844857e-06,
      "loss": 0.0252,
      "step": 708240
    },
    {
      "epoch": 1.159083024030688,
      "grad_norm": 0.5585517883300781,
      "learning_rate": 7.732451256235051e-06,
      "loss": 0.0246,
      "step": 708260
    },
    {
      "epoch": 1.1591157544693413,
      "grad_norm": 0.9625203609466553,
      "learning_rate": 7.732385364021535e-06,
      "loss": 0.0238,
      "step": 708280
    },
    {
      "epoch": 1.1591484849079947,
      "grad_norm": 0.5929361581802368,
      "learning_rate": 7.732319471808018e-06,
      "loss": 0.0233,
      "step": 708300
    },
    {
      "epoch": 1.159181215346648,
      "grad_norm": 0.5079782009124756,
      "learning_rate": 7.7322535795945e-06,
      "loss": 0.0188,
      "step": 708320
    },
    {
      "epoch": 1.1592139457853015,
      "grad_norm": 1.1711387634277344,
      "learning_rate": 7.732187687380984e-06,
      "loss": 0.0298,
      "step": 708340
    },
    {
      "epoch": 1.1592466762239548,
      "grad_norm": 0.30982524156570435,
      "learning_rate": 7.732121795167466e-06,
      "loss": 0.0265,
      "step": 708360
    },
    {
      "epoch": 1.1592794066626082,
      "grad_norm": 0.32678723335266113,
      "learning_rate": 7.73205590295395e-06,
      "loss": 0.0178,
      "step": 708380
    },
    {
      "epoch": 1.1593121371012614,
      "grad_norm": 0.391411155462265,
      "learning_rate": 7.731990010740431e-06,
      "loss": 0.027,
      "step": 708400
    },
    {
      "epoch": 1.1593448675399147,
      "grad_norm": 1.171725869178772,
      "learning_rate": 7.731924118526915e-06,
      "loss": 0.0207,
      "step": 708420
    },
    {
      "epoch": 1.159377597978568,
      "grad_norm": 1.4343348741531372,
      "learning_rate": 7.731858226313397e-06,
      "loss": 0.0201,
      "step": 708440
    },
    {
      "epoch": 1.1594103284172215,
      "grad_norm": 0.296489953994751,
      "learning_rate": 7.73179233409988e-06,
      "loss": 0.0191,
      "step": 708460
    },
    {
      "epoch": 1.1594430588558748,
      "grad_norm": 0.880752444267273,
      "learning_rate": 7.731726441886362e-06,
      "loss": 0.0267,
      "step": 708480
    },
    {
      "epoch": 1.1594757892945282,
      "grad_norm": 0.8552099466323853,
      "learning_rate": 7.731660549672846e-06,
      "loss": 0.0347,
      "step": 708500
    },
    {
      "epoch": 1.1595085197331816,
      "grad_norm": 0.32854026556015015,
      "learning_rate": 7.731594657459328e-06,
      "loss": 0.0239,
      "step": 708520
    },
    {
      "epoch": 1.1595412501718347,
      "grad_norm": 0.48306912183761597,
      "learning_rate": 7.731528765245811e-06,
      "loss": 0.0151,
      "step": 708540
    },
    {
      "epoch": 1.159573980610488,
      "grad_norm": 1.2079960107803345,
      "learning_rate": 7.731462873032295e-06,
      "loss": 0.0272,
      "step": 708560
    },
    {
      "epoch": 1.1596067110491415,
      "grad_norm": 1.36453115940094,
      "learning_rate": 7.731396980818777e-06,
      "loss": 0.0217,
      "step": 708580
    },
    {
      "epoch": 1.1596394414877949,
      "grad_norm": 0.5367059707641602,
      "learning_rate": 7.73133108860526e-06,
      "loss": 0.0282,
      "step": 708600
    },
    {
      "epoch": 1.1596721719264482,
      "grad_norm": 1.0283857583999634,
      "learning_rate": 7.731265196391742e-06,
      "loss": 0.0302,
      "step": 708620
    },
    {
      "epoch": 1.1597049023651014,
      "grad_norm": 0.43534719944000244,
      "learning_rate": 7.731199304178226e-06,
      "loss": 0.0219,
      "step": 708640
    },
    {
      "epoch": 1.1597376328037547,
      "grad_norm": 0.4388798773288727,
      "learning_rate": 7.73113341196471e-06,
      "loss": 0.0236,
      "step": 708660
    },
    {
      "epoch": 1.1597703632424081,
      "grad_norm": 0.376787930727005,
      "learning_rate": 7.731067519751191e-06,
      "loss": 0.0227,
      "step": 708680
    },
    {
      "epoch": 1.1598030936810615,
      "grad_norm": 0.4783218502998352,
      "learning_rate": 7.731001627537675e-06,
      "loss": 0.0284,
      "step": 708700
    },
    {
      "epoch": 1.1598358241197149,
      "grad_norm": 0.8109282851219177,
      "learning_rate": 7.730935735324158e-06,
      "loss": 0.0298,
      "step": 708720
    },
    {
      "epoch": 1.1598685545583682,
      "grad_norm": 0.6427232623100281,
      "learning_rate": 7.73086984311064e-06,
      "loss": 0.0252,
      "step": 708740
    },
    {
      "epoch": 1.1599012849970216,
      "grad_norm": 0.30999448895454407,
      "learning_rate": 7.730803950897124e-06,
      "loss": 0.0381,
      "step": 708760
    },
    {
      "epoch": 1.1599340154356748,
      "grad_norm": 0.8998530507087708,
      "learning_rate": 7.730738058683606e-06,
      "loss": 0.0223,
      "step": 708780
    },
    {
      "epoch": 1.1599667458743281,
      "grad_norm": 0.9928139448165894,
      "learning_rate": 7.73067216647009e-06,
      "loss": 0.0214,
      "step": 708800
    },
    {
      "epoch": 1.1599994763129815,
      "grad_norm": 0.49531853199005127,
      "learning_rate": 7.730606274256571e-06,
      "loss": 0.0199,
      "step": 708820
    },
    {
      "epoch": 1.1600322067516349,
      "grad_norm": 0.3235539495944977,
      "learning_rate": 7.730540382043055e-06,
      "loss": 0.0212,
      "step": 708840
    },
    {
      "epoch": 1.1600649371902882,
      "grad_norm": 0.7882853150367737,
      "learning_rate": 7.730474489829537e-06,
      "loss": 0.0265,
      "step": 708860
    },
    {
      "epoch": 1.1600976676289416,
      "grad_norm": 0.8580099940299988,
      "learning_rate": 7.73040859761602e-06,
      "loss": 0.0133,
      "step": 708880
    },
    {
      "epoch": 1.160130398067595,
      "grad_norm": 1.5004276037216187,
      "learning_rate": 7.730342705402504e-06,
      "loss": 0.0257,
      "step": 708900
    },
    {
      "epoch": 1.1601631285062481,
      "grad_norm": 0.40172815322875977,
      "learning_rate": 7.730276813188986e-06,
      "loss": 0.0171,
      "step": 708920
    },
    {
      "epoch": 1.1601958589449015,
      "grad_norm": 0.5497339963912964,
      "learning_rate": 7.73021092097547e-06,
      "loss": 0.0228,
      "step": 708940
    },
    {
      "epoch": 1.1602285893835549,
      "grad_norm": 1.29806387424469,
      "learning_rate": 7.730145028761951e-06,
      "loss": 0.034,
      "step": 708960
    },
    {
      "epoch": 1.1602613198222083,
      "grad_norm": 1.908564567565918,
      "learning_rate": 7.730079136548435e-06,
      "loss": 0.0261,
      "step": 708980
    },
    {
      "epoch": 1.1602940502608616,
      "grad_norm": 4.946745872497559,
      "learning_rate": 7.730013244334917e-06,
      "loss": 0.0253,
      "step": 709000
    },
    {
      "epoch": 1.160326780699515,
      "grad_norm": 0.9100732803344727,
      "learning_rate": 7.7299473521214e-06,
      "loss": 0.0179,
      "step": 709020
    },
    {
      "epoch": 1.1603595111381684,
      "grad_norm": 0.46573951840400696,
      "learning_rate": 7.729881459907884e-06,
      "loss": 0.033,
      "step": 709040
    },
    {
      "epoch": 1.1603922415768215,
      "grad_norm": 0.902148425579071,
      "learning_rate": 7.729815567694366e-06,
      "loss": 0.0228,
      "step": 709060
    },
    {
      "epoch": 1.160424972015475,
      "grad_norm": 0.9311059713363647,
      "learning_rate": 7.72974967548085e-06,
      "loss": 0.0261,
      "step": 709080
    },
    {
      "epoch": 1.1604577024541283,
      "grad_norm": 0.7632900476455688,
      "learning_rate": 7.729683783267333e-06,
      "loss": 0.0202,
      "step": 709100
    },
    {
      "epoch": 1.1604904328927816,
      "grad_norm": 0.7974615097045898,
      "learning_rate": 7.729617891053815e-06,
      "loss": 0.0302,
      "step": 709120
    },
    {
      "epoch": 1.160523163331435,
      "grad_norm": 0.5920674204826355,
      "learning_rate": 7.729551998840298e-06,
      "loss": 0.0191,
      "step": 709140
    },
    {
      "epoch": 1.1605558937700884,
      "grad_norm": 0.5931897759437561,
      "learning_rate": 7.72948610662678e-06,
      "loss": 0.0167,
      "step": 709160
    },
    {
      "epoch": 1.1605886242087418,
      "grad_norm": 0.586293637752533,
      "learning_rate": 7.729420214413264e-06,
      "loss": 0.0257,
      "step": 709180
    },
    {
      "epoch": 1.160621354647395,
      "grad_norm": 0.4340839684009552,
      "learning_rate": 7.729354322199746e-06,
      "loss": 0.0186,
      "step": 709200
    },
    {
      "epoch": 1.1606540850860483,
      "grad_norm": 0.4296128749847412,
      "learning_rate": 7.72928842998623e-06,
      "loss": 0.0151,
      "step": 709220
    },
    {
      "epoch": 1.1606868155247017,
      "grad_norm": 0.9753588438034058,
      "learning_rate": 7.729222537772711e-06,
      "loss": 0.0181,
      "step": 709240
    },
    {
      "epoch": 1.160719545963355,
      "grad_norm": 0.528491199016571,
      "learning_rate": 7.729156645559195e-06,
      "loss": 0.0186,
      "step": 709260
    },
    {
      "epoch": 1.1607522764020084,
      "grad_norm": 0.7632255554199219,
      "learning_rate": 7.729090753345678e-06,
      "loss": 0.0195,
      "step": 709280
    },
    {
      "epoch": 1.1607850068406618,
      "grad_norm": 0.5708584189414978,
      "learning_rate": 7.72902486113216e-06,
      "loss": 0.0303,
      "step": 709300
    },
    {
      "epoch": 1.160817737279315,
      "grad_norm": 0.7760518789291382,
      "learning_rate": 7.728958968918644e-06,
      "loss": 0.0249,
      "step": 709320
    },
    {
      "epoch": 1.1608504677179683,
      "grad_norm": 0.1767335683107376,
      "learning_rate": 7.728893076705126e-06,
      "loss": 0.0145,
      "step": 709340
    },
    {
      "epoch": 1.1608831981566217,
      "grad_norm": 0.49098217487335205,
      "learning_rate": 7.72882718449161e-06,
      "loss": 0.0211,
      "step": 709360
    },
    {
      "epoch": 1.160915928595275,
      "grad_norm": 0.5512309670448303,
      "learning_rate": 7.728761292278091e-06,
      "loss": 0.0151,
      "step": 709380
    },
    {
      "epoch": 1.1609486590339284,
      "grad_norm": 0.2669498026371002,
      "learning_rate": 7.728695400064575e-06,
      "loss": 0.0257,
      "step": 709400
    },
    {
      "epoch": 1.1609813894725818,
      "grad_norm": 0.6674008369445801,
      "learning_rate": 7.728629507851057e-06,
      "loss": 0.0202,
      "step": 709420
    },
    {
      "epoch": 1.161014119911235,
      "grad_norm": 1.4003722667694092,
      "learning_rate": 7.72856361563754e-06,
      "loss": 0.0165,
      "step": 709440
    },
    {
      "epoch": 1.1610468503498883,
      "grad_norm": 1.1338348388671875,
      "learning_rate": 7.728497723424024e-06,
      "loss": 0.0293,
      "step": 709460
    },
    {
      "epoch": 1.1610795807885417,
      "grad_norm": 0.771217942237854,
      "learning_rate": 7.728431831210506e-06,
      "loss": 0.0214,
      "step": 709480
    },
    {
      "epoch": 1.161112311227195,
      "grad_norm": 0.3745051622390747,
      "learning_rate": 7.72836593899699e-06,
      "loss": 0.0247,
      "step": 709500
    },
    {
      "epoch": 1.1611450416658484,
      "grad_norm": 0.5892938375473022,
      "learning_rate": 7.728300046783473e-06,
      "loss": 0.0232,
      "step": 709520
    },
    {
      "epoch": 1.1611777721045018,
      "grad_norm": 1.2059495449066162,
      "learning_rate": 7.728234154569955e-06,
      "loss": 0.0357,
      "step": 709540
    },
    {
      "epoch": 1.1612105025431552,
      "grad_norm": 0.6043010354042053,
      "learning_rate": 7.728168262356438e-06,
      "loss": 0.0223,
      "step": 709560
    },
    {
      "epoch": 1.1612432329818083,
      "grad_norm": 0.39753732085227966,
      "learning_rate": 7.72810237014292e-06,
      "loss": 0.0172,
      "step": 709580
    },
    {
      "epoch": 1.1612759634204617,
      "grad_norm": 0.7858834266662598,
      "learning_rate": 7.728036477929404e-06,
      "loss": 0.0275,
      "step": 709600
    },
    {
      "epoch": 1.161308693859115,
      "grad_norm": 0.2689960300922394,
      "learning_rate": 7.727970585715888e-06,
      "loss": 0.0219,
      "step": 709620
    },
    {
      "epoch": 1.1613414242977684,
      "grad_norm": 0.7607207298278809,
      "learning_rate": 7.72790469350237e-06,
      "loss": 0.0249,
      "step": 709640
    },
    {
      "epoch": 1.1613741547364218,
      "grad_norm": 1.2194325923919678,
      "learning_rate": 7.727838801288853e-06,
      "loss": 0.0274,
      "step": 709660
    },
    {
      "epoch": 1.1614068851750752,
      "grad_norm": 1.003696084022522,
      "learning_rate": 7.727772909075335e-06,
      "loss": 0.0207,
      "step": 709680
    },
    {
      "epoch": 1.1614396156137285,
      "grad_norm": 0.4321335256099701,
      "learning_rate": 7.727707016861819e-06,
      "loss": 0.021,
      "step": 709700
    },
    {
      "epoch": 1.1614723460523817,
      "grad_norm": 0.5549188852310181,
      "learning_rate": 7.7276411246483e-06,
      "loss": 0.0247,
      "step": 709720
    },
    {
      "epoch": 1.161505076491035,
      "grad_norm": 0.9016006588935852,
      "learning_rate": 7.727575232434784e-06,
      "loss": 0.0277,
      "step": 709740
    },
    {
      "epoch": 1.1615378069296884,
      "grad_norm": 0.4398176372051239,
      "learning_rate": 7.727509340221266e-06,
      "loss": 0.0205,
      "step": 709760
    },
    {
      "epoch": 1.1615705373683418,
      "grad_norm": 1.0492045879364014,
      "learning_rate": 7.72744344800775e-06,
      "loss": 0.0241,
      "step": 709780
    },
    {
      "epoch": 1.1616032678069952,
      "grad_norm": 0.9208419322967529,
      "learning_rate": 7.727377555794231e-06,
      "loss": 0.0217,
      "step": 709800
    },
    {
      "epoch": 1.1616359982456486,
      "grad_norm": 0.07117117941379547,
      "learning_rate": 7.727311663580715e-06,
      "loss": 0.025,
      "step": 709820
    },
    {
      "epoch": 1.161668728684302,
      "grad_norm": 0.5593960285186768,
      "learning_rate": 7.727245771367199e-06,
      "loss": 0.022,
      "step": 709840
    },
    {
      "epoch": 1.161701459122955,
      "grad_norm": 1.2692482471466064,
      "learning_rate": 7.72717987915368e-06,
      "loss": 0.0222,
      "step": 709860
    },
    {
      "epoch": 1.1617341895616085,
      "grad_norm": 1.29915189743042,
      "learning_rate": 7.727113986940164e-06,
      "loss": 0.0204,
      "step": 709880
    },
    {
      "epoch": 1.1617669200002618,
      "grad_norm": 0.6602796316146851,
      "learning_rate": 7.727048094726648e-06,
      "loss": 0.0218,
      "step": 709900
    },
    {
      "epoch": 1.1617996504389152,
      "grad_norm": 0.778348445892334,
      "learning_rate": 7.72698220251313e-06,
      "loss": 0.019,
      "step": 709920
    },
    {
      "epoch": 1.1618323808775686,
      "grad_norm": 0.29218271374702454,
      "learning_rate": 7.726916310299613e-06,
      "loss": 0.0218,
      "step": 709940
    },
    {
      "epoch": 1.161865111316222,
      "grad_norm": 1.153846263885498,
      "learning_rate": 7.726850418086097e-06,
      "loss": 0.0215,
      "step": 709960
    },
    {
      "epoch": 1.1618978417548753,
      "grad_norm": 0.7772168517112732,
      "learning_rate": 7.726784525872579e-06,
      "loss": 0.024,
      "step": 709980
    },
    {
      "epoch": 1.1619305721935285,
      "grad_norm": 0.5770871639251709,
      "learning_rate": 7.726718633659062e-06,
      "loss": 0.023,
      "step": 710000
    },
    {
      "epoch": 1.1619633026321818,
      "grad_norm": 0.36200976371765137,
      "learning_rate": 7.726652741445544e-06,
      "loss": 0.0251,
      "step": 710020
    },
    {
      "epoch": 1.1619960330708352,
      "grad_norm": 0.18501213192939758,
      "learning_rate": 7.726586849232028e-06,
      "loss": 0.023,
      "step": 710040
    },
    {
      "epoch": 1.1620287635094886,
      "grad_norm": 0.5335051417350769,
      "learning_rate": 7.72652095701851e-06,
      "loss": 0.0283,
      "step": 710060
    },
    {
      "epoch": 1.162061493948142,
      "grad_norm": 1.4806824922561646,
      "learning_rate": 7.726455064804993e-06,
      "loss": 0.0317,
      "step": 710080
    },
    {
      "epoch": 1.1620942243867953,
      "grad_norm": 1.4047106504440308,
      "learning_rate": 7.726389172591475e-06,
      "loss": 0.0261,
      "step": 710100
    },
    {
      "epoch": 1.1621269548254485,
      "grad_norm": 0.7850108742713928,
      "learning_rate": 7.726323280377959e-06,
      "loss": 0.0183,
      "step": 710120
    },
    {
      "epoch": 1.1621596852641018,
      "grad_norm": 1.2388447523117065,
      "learning_rate": 7.72625738816444e-06,
      "loss": 0.0161,
      "step": 710140
    },
    {
      "epoch": 1.1621924157027552,
      "grad_norm": 0.6459718346595764,
      "learning_rate": 7.726191495950924e-06,
      "loss": 0.0247,
      "step": 710160
    },
    {
      "epoch": 1.1622251461414086,
      "grad_norm": 0.2550918161869049,
      "learning_rate": 7.726125603737406e-06,
      "loss": 0.0199,
      "step": 710180
    },
    {
      "epoch": 1.162257876580062,
      "grad_norm": 0.5338243246078491,
      "learning_rate": 7.72605971152389e-06,
      "loss": 0.0198,
      "step": 710200
    },
    {
      "epoch": 1.1622906070187153,
      "grad_norm": 0.39000844955444336,
      "learning_rate": 7.725993819310373e-06,
      "loss": 0.0232,
      "step": 710220
    },
    {
      "epoch": 1.1623233374573685,
      "grad_norm": 4.164742469787598,
      "learning_rate": 7.725927927096855e-06,
      "loss": 0.0256,
      "step": 710240
    },
    {
      "epoch": 1.1623560678960219,
      "grad_norm": 0.8443832397460938,
      "learning_rate": 7.725862034883339e-06,
      "loss": 0.025,
      "step": 710260
    },
    {
      "epoch": 1.1623887983346752,
      "grad_norm": 0.16205836832523346,
      "learning_rate": 7.725796142669822e-06,
      "loss": 0.0184,
      "step": 710280
    },
    {
      "epoch": 1.1624215287733286,
      "grad_norm": 1.095844030380249,
      "learning_rate": 7.725730250456304e-06,
      "loss": 0.0204,
      "step": 710300
    },
    {
      "epoch": 1.162454259211982,
      "grad_norm": 1.0827794075012207,
      "learning_rate": 7.725664358242788e-06,
      "loss": 0.023,
      "step": 710320
    },
    {
      "epoch": 1.1624869896506353,
      "grad_norm": 1.0355836153030396,
      "learning_rate": 7.725598466029271e-06,
      "loss": 0.0257,
      "step": 710340
    },
    {
      "epoch": 1.1625197200892887,
      "grad_norm": 0.2978232204914093,
      "learning_rate": 7.725532573815753e-06,
      "loss": 0.0152,
      "step": 710360
    },
    {
      "epoch": 1.1625524505279419,
      "grad_norm": 0.7909281253814697,
      "learning_rate": 7.725466681602237e-06,
      "loss": 0.0252,
      "step": 710380
    },
    {
      "epoch": 1.1625851809665952,
      "grad_norm": 0.26033520698547363,
      "learning_rate": 7.725400789388719e-06,
      "loss": 0.015,
      "step": 710400
    },
    {
      "epoch": 1.1626179114052486,
      "grad_norm": 1.3200576305389404,
      "learning_rate": 7.725334897175202e-06,
      "loss": 0.015,
      "step": 710420
    },
    {
      "epoch": 1.162650641843902,
      "grad_norm": 0.4176471531391144,
      "learning_rate": 7.725269004961684e-06,
      "loss": 0.0208,
      "step": 710440
    },
    {
      "epoch": 1.1626833722825554,
      "grad_norm": 1.2959458827972412,
      "learning_rate": 7.725203112748168e-06,
      "loss": 0.027,
      "step": 710460
    },
    {
      "epoch": 1.1627161027212087,
      "grad_norm": 0.26493480801582336,
      "learning_rate": 7.72513722053465e-06,
      "loss": 0.0165,
      "step": 710480
    },
    {
      "epoch": 1.162748833159862,
      "grad_norm": 0.31682997941970825,
      "learning_rate": 7.725071328321133e-06,
      "loss": 0.017,
      "step": 710500
    },
    {
      "epoch": 1.1627815635985153,
      "grad_norm": 2.2338876724243164,
      "learning_rate": 7.725005436107615e-06,
      "loss": 0.0274,
      "step": 710520
    },
    {
      "epoch": 1.1628142940371686,
      "grad_norm": 0.45780321955680847,
      "learning_rate": 7.724939543894099e-06,
      "loss": 0.0231,
      "step": 710540
    },
    {
      "epoch": 1.162847024475822,
      "grad_norm": 0.49133211374282837,
      "learning_rate": 7.72487365168058e-06,
      "loss": 0.0201,
      "step": 710560
    },
    {
      "epoch": 1.1628797549144754,
      "grad_norm": 0.4570068120956421,
      "learning_rate": 7.724807759467064e-06,
      "loss": 0.024,
      "step": 710580
    },
    {
      "epoch": 1.1629124853531287,
      "grad_norm": 1.6403018236160278,
      "learning_rate": 7.724741867253546e-06,
      "loss": 0.0287,
      "step": 710600
    },
    {
      "epoch": 1.1629452157917821,
      "grad_norm": 0.550140380859375,
      "learning_rate": 7.72467597504003e-06,
      "loss": 0.0324,
      "step": 710620
    },
    {
      "epoch": 1.1629779462304355,
      "grad_norm": 0.7279794216156006,
      "learning_rate": 7.724610082826513e-06,
      "loss": 0.0281,
      "step": 710640
    },
    {
      "epoch": 1.1630106766690886,
      "grad_norm": 3.1897404193878174,
      "learning_rate": 7.724544190612995e-06,
      "loss": 0.0268,
      "step": 710660
    },
    {
      "epoch": 1.163043407107742,
      "grad_norm": 0.9278490543365479,
      "learning_rate": 7.724478298399479e-06,
      "loss": 0.0242,
      "step": 710680
    },
    {
      "epoch": 1.1630761375463954,
      "grad_norm": 0.4598548710346222,
      "learning_rate": 7.724412406185962e-06,
      "loss": 0.0148,
      "step": 710700
    },
    {
      "epoch": 1.1631088679850488,
      "grad_norm": 1.7571401596069336,
      "learning_rate": 7.724346513972444e-06,
      "loss": 0.0292,
      "step": 710720
    },
    {
      "epoch": 1.1631415984237021,
      "grad_norm": 0.6399800181388855,
      "learning_rate": 7.724280621758928e-06,
      "loss": 0.0255,
      "step": 710740
    },
    {
      "epoch": 1.1631743288623555,
      "grad_norm": 0.58604896068573,
      "learning_rate": 7.724214729545411e-06,
      "loss": 0.0184,
      "step": 710760
    },
    {
      "epoch": 1.1632070593010089,
      "grad_norm": 0.13504314422607422,
      "learning_rate": 7.724148837331893e-06,
      "loss": 0.021,
      "step": 710780
    },
    {
      "epoch": 1.163239789739662,
      "grad_norm": 0.3575553297996521,
      "learning_rate": 7.724082945118377e-06,
      "loss": 0.0231,
      "step": 710800
    },
    {
      "epoch": 1.1632725201783154,
      "grad_norm": 0.7628127336502075,
      "learning_rate": 7.724017052904859e-06,
      "loss": 0.0213,
      "step": 710820
    },
    {
      "epoch": 1.1633052506169688,
      "grad_norm": 0.28546836972236633,
      "learning_rate": 7.723951160691342e-06,
      "loss": 0.0167,
      "step": 710840
    },
    {
      "epoch": 1.1633379810556221,
      "grad_norm": 0.47951942682266235,
      "learning_rate": 7.723885268477824e-06,
      "loss": 0.0249,
      "step": 710860
    },
    {
      "epoch": 1.1633707114942755,
      "grad_norm": 1.0531535148620605,
      "learning_rate": 7.723819376264308e-06,
      "loss": 0.0194,
      "step": 710880
    },
    {
      "epoch": 1.1634034419329287,
      "grad_norm": 0.5459798574447632,
      "learning_rate": 7.72375348405079e-06,
      "loss": 0.0174,
      "step": 710900
    },
    {
      "epoch": 1.163436172371582,
      "grad_norm": 0.550518810749054,
      "learning_rate": 7.723687591837273e-06,
      "loss": 0.0265,
      "step": 710920
    },
    {
      "epoch": 1.1634689028102354,
      "grad_norm": 0.33831632137298584,
      "learning_rate": 7.723621699623755e-06,
      "loss": 0.0211,
      "step": 710940
    },
    {
      "epoch": 1.1635016332488888,
      "grad_norm": 0.39533257484436035,
      "learning_rate": 7.723555807410239e-06,
      "loss": 0.0253,
      "step": 710960
    },
    {
      "epoch": 1.1635343636875422,
      "grad_norm": 3.229292154312134,
      "learning_rate": 7.72348991519672e-06,
      "loss": 0.039,
      "step": 710980
    },
    {
      "epoch": 1.1635670941261955,
      "grad_norm": 0.3692358136177063,
      "learning_rate": 7.723424022983204e-06,
      "loss": 0.0157,
      "step": 711000
    },
    {
      "epoch": 1.163599824564849,
      "grad_norm": 0.9491785764694214,
      "learning_rate": 7.723358130769688e-06,
      "loss": 0.0242,
      "step": 711020
    },
    {
      "epoch": 1.163632555003502,
      "grad_norm": 0.2353086769580841,
      "learning_rate": 7.72329223855617e-06,
      "loss": 0.018,
      "step": 711040
    },
    {
      "epoch": 1.1636652854421554,
      "grad_norm": 3.9568557739257812,
      "learning_rate": 7.723226346342653e-06,
      "loss": 0.0293,
      "step": 711060
    },
    {
      "epoch": 1.1636980158808088,
      "grad_norm": 0.5221233367919922,
      "learning_rate": 7.723160454129137e-06,
      "loss": 0.0187,
      "step": 711080
    },
    {
      "epoch": 1.1637307463194622,
      "grad_norm": 0.765390157699585,
      "learning_rate": 7.723094561915619e-06,
      "loss": 0.0264,
      "step": 711100
    },
    {
      "epoch": 1.1637634767581155,
      "grad_norm": 0.9132262468338013,
      "learning_rate": 7.723028669702102e-06,
      "loss": 0.0269,
      "step": 711120
    },
    {
      "epoch": 1.163796207196769,
      "grad_norm": 0.777142345905304,
      "learning_rate": 7.722962777488586e-06,
      "loss": 0.0209,
      "step": 711140
    },
    {
      "epoch": 1.1638289376354223,
      "grad_norm": 0.8537003397941589,
      "learning_rate": 7.722896885275068e-06,
      "loss": 0.0251,
      "step": 711160
    },
    {
      "epoch": 1.1638616680740754,
      "grad_norm": 1.0076508522033691,
      "learning_rate": 7.722830993061551e-06,
      "loss": 0.0187,
      "step": 711180
    },
    {
      "epoch": 1.1638943985127288,
      "grad_norm": 0.8790176510810852,
      "learning_rate": 7.722765100848033e-06,
      "loss": 0.018,
      "step": 711200
    },
    {
      "epoch": 1.1639271289513822,
      "grad_norm": 0.08062911033630371,
      "learning_rate": 7.722699208634517e-06,
      "loss": 0.0266,
      "step": 711220
    },
    {
      "epoch": 1.1639598593900355,
      "grad_norm": 0.5948271751403809,
      "learning_rate": 7.722633316420999e-06,
      "loss": 0.0263,
      "step": 711240
    },
    {
      "epoch": 1.163992589828689,
      "grad_norm": 0.48168256878852844,
      "learning_rate": 7.722567424207482e-06,
      "loss": 0.0178,
      "step": 711260
    },
    {
      "epoch": 1.1640253202673423,
      "grad_norm": 1.037172555923462,
      "learning_rate": 7.722501531993964e-06,
      "loss": 0.029,
      "step": 711280
    },
    {
      "epoch": 1.1640580507059957,
      "grad_norm": 0.6073778867721558,
      "learning_rate": 7.722435639780448e-06,
      "loss": 0.0213,
      "step": 711300
    },
    {
      "epoch": 1.1640907811446488,
      "grad_norm": 0.6487191915512085,
      "learning_rate": 7.72236974756693e-06,
      "loss": 0.021,
      "step": 711320
    },
    {
      "epoch": 1.1641235115833022,
      "grad_norm": 0.30903831124305725,
      "learning_rate": 7.722303855353413e-06,
      "loss": 0.0252,
      "step": 711340
    },
    {
      "epoch": 1.1641562420219556,
      "grad_norm": 0.567751944065094,
      "learning_rate": 7.722237963139897e-06,
      "loss": 0.0191,
      "step": 711360
    },
    {
      "epoch": 1.164188972460609,
      "grad_norm": 0.728206992149353,
      "learning_rate": 7.722172070926379e-06,
      "loss": 0.0227,
      "step": 711380
    },
    {
      "epoch": 1.1642217028992623,
      "grad_norm": 1.1004987955093384,
      "learning_rate": 7.722106178712862e-06,
      "loss": 0.0329,
      "step": 711400
    },
    {
      "epoch": 1.1642544333379157,
      "grad_norm": 0.4941520392894745,
      "learning_rate": 7.722040286499344e-06,
      "loss": 0.0255,
      "step": 711420
    },
    {
      "epoch": 1.164287163776569,
      "grad_norm": 1.0050792694091797,
      "learning_rate": 7.721974394285828e-06,
      "loss": 0.0346,
      "step": 711440
    },
    {
      "epoch": 1.1643198942152222,
      "grad_norm": 0.28546464443206787,
      "learning_rate": 7.72190850207231e-06,
      "loss": 0.0202,
      "step": 711460
    },
    {
      "epoch": 1.1643526246538756,
      "grad_norm": 1.061566710472107,
      "learning_rate": 7.721842609858793e-06,
      "loss": 0.0253,
      "step": 711480
    },
    {
      "epoch": 1.164385355092529,
      "grad_norm": 0.1599661409854889,
      "learning_rate": 7.721776717645277e-06,
      "loss": 0.0263,
      "step": 711500
    },
    {
      "epoch": 1.1644180855311823,
      "grad_norm": 0.4611656367778778,
      "learning_rate": 7.721710825431759e-06,
      "loss": 0.022,
      "step": 711520
    },
    {
      "epoch": 1.1644508159698357,
      "grad_norm": 0.5697876214981079,
      "learning_rate": 7.721644933218242e-06,
      "loss": 0.0166,
      "step": 711540
    },
    {
      "epoch": 1.164483546408489,
      "grad_norm": 1.5466334819793701,
      "learning_rate": 7.721579041004726e-06,
      "loss": 0.0353,
      "step": 711560
    },
    {
      "epoch": 1.1645162768471424,
      "grad_norm": 0.45426440238952637,
      "learning_rate": 7.721513148791208e-06,
      "loss": 0.0328,
      "step": 711580
    },
    {
      "epoch": 1.1645490072857956,
      "grad_norm": 0.22600603103637695,
      "learning_rate": 7.721447256577691e-06,
      "loss": 0.0194,
      "step": 711600
    },
    {
      "epoch": 1.164581737724449,
      "grad_norm": 0.156277596950531,
      "learning_rate": 7.721381364364173e-06,
      "loss": 0.0129,
      "step": 711620
    },
    {
      "epoch": 1.1646144681631023,
      "grad_norm": 0.9232532978057861,
      "learning_rate": 7.721315472150657e-06,
      "loss": 0.0263,
      "step": 711640
    },
    {
      "epoch": 1.1646471986017557,
      "grad_norm": 0.1824232041835785,
      "learning_rate": 7.721249579937139e-06,
      "loss": 0.0262,
      "step": 711660
    },
    {
      "epoch": 1.164679929040409,
      "grad_norm": 0.7982068657875061,
      "learning_rate": 7.721183687723622e-06,
      "loss": 0.0218,
      "step": 711680
    },
    {
      "epoch": 1.1647126594790622,
      "grad_norm": 1.2622922658920288,
      "learning_rate": 7.721117795510104e-06,
      "loss": 0.0256,
      "step": 711700
    },
    {
      "epoch": 1.1647453899177156,
      "grad_norm": 0.9403480887413025,
      "learning_rate": 7.721051903296588e-06,
      "loss": 0.0217,
      "step": 711720
    },
    {
      "epoch": 1.164778120356369,
      "grad_norm": 0.9781168103218079,
      "learning_rate": 7.720986011083072e-06,
      "loss": 0.0243,
      "step": 711740
    },
    {
      "epoch": 1.1648108507950223,
      "grad_norm": 0.34452134370803833,
      "learning_rate": 7.720920118869553e-06,
      "loss": 0.0194,
      "step": 711760
    },
    {
      "epoch": 1.1648435812336757,
      "grad_norm": 0.5306445360183716,
      "learning_rate": 7.720854226656037e-06,
      "loss": 0.0196,
      "step": 711780
    },
    {
      "epoch": 1.164876311672329,
      "grad_norm": 0.3549770414829254,
      "learning_rate": 7.720788334442519e-06,
      "loss": 0.0284,
      "step": 711800
    },
    {
      "epoch": 1.1649090421109825,
      "grad_norm": 1.741573691368103,
      "learning_rate": 7.720722442229002e-06,
      "loss": 0.0252,
      "step": 711820
    },
    {
      "epoch": 1.1649417725496356,
      "grad_norm": 3.0508882999420166,
      "learning_rate": 7.720656550015484e-06,
      "loss": 0.0209,
      "step": 711840
    },
    {
      "epoch": 1.164974502988289,
      "grad_norm": 0.6934717893600464,
      "learning_rate": 7.720590657801968e-06,
      "loss": 0.0385,
      "step": 711860
    },
    {
      "epoch": 1.1650072334269423,
      "grad_norm": 0.42688989639282227,
      "learning_rate": 7.720524765588452e-06,
      "loss": 0.0262,
      "step": 711880
    },
    {
      "epoch": 1.1650399638655957,
      "grad_norm": 0.4636066257953644,
      "learning_rate": 7.720458873374933e-06,
      "loss": 0.0141,
      "step": 711900
    },
    {
      "epoch": 1.165072694304249,
      "grad_norm": 1.2814736366271973,
      "learning_rate": 7.720392981161417e-06,
      "loss": 0.0244,
      "step": 711920
    },
    {
      "epoch": 1.1651054247429025,
      "grad_norm": 0.481243371963501,
      "learning_rate": 7.7203270889479e-06,
      "loss": 0.0268,
      "step": 711940
    },
    {
      "epoch": 1.1651381551815558,
      "grad_norm": 0.5923153162002563,
      "learning_rate": 7.720261196734383e-06,
      "loss": 0.0317,
      "step": 711960
    },
    {
      "epoch": 1.165170885620209,
      "grad_norm": 1.0553284883499146,
      "learning_rate": 7.720195304520866e-06,
      "loss": 0.0201,
      "step": 711980
    },
    {
      "epoch": 1.1652036160588624,
      "grad_norm": 1.214921236038208,
      "learning_rate": 7.720129412307348e-06,
      "loss": 0.0211,
      "step": 712000
    },
    {
      "epoch": 1.1652363464975157,
      "grad_norm": 0.14874647557735443,
      "learning_rate": 7.720063520093832e-06,
      "loss": 0.0233,
      "step": 712020
    },
    {
      "epoch": 1.165269076936169,
      "grad_norm": 1.88267183303833,
      "learning_rate": 7.719997627880313e-06,
      "loss": 0.034,
      "step": 712040
    },
    {
      "epoch": 1.1653018073748225,
      "grad_norm": 0.69930100440979,
      "learning_rate": 7.719931735666797e-06,
      "loss": 0.03,
      "step": 712060
    },
    {
      "epoch": 1.1653345378134758,
      "grad_norm": 0.9257971048355103,
      "learning_rate": 7.71986584345328e-06,
      "loss": 0.0246,
      "step": 712080
    },
    {
      "epoch": 1.1653672682521292,
      "grad_norm": 0.2597815692424774,
      "learning_rate": 7.719799951239763e-06,
      "loss": 0.0204,
      "step": 712100
    },
    {
      "epoch": 1.1653999986907824,
      "grad_norm": 0.45524293184280396,
      "learning_rate": 7.719734059026246e-06,
      "loss": 0.0208,
      "step": 712120
    },
    {
      "epoch": 1.1654327291294357,
      "grad_norm": 0.27351850271224976,
      "learning_rate": 7.719668166812728e-06,
      "loss": 0.0271,
      "step": 712140
    },
    {
      "epoch": 1.1654654595680891,
      "grad_norm": 0.572357177734375,
      "learning_rate": 7.719602274599212e-06,
      "loss": 0.0193,
      "step": 712160
    },
    {
      "epoch": 1.1654981900067425,
      "grad_norm": 1.3152085542678833,
      "learning_rate": 7.719536382385693e-06,
      "loss": 0.0215,
      "step": 712180
    },
    {
      "epoch": 1.1655309204453959,
      "grad_norm": 0.7862200140953064,
      "learning_rate": 7.719470490172177e-06,
      "loss": 0.0235,
      "step": 712200
    },
    {
      "epoch": 1.1655636508840492,
      "grad_norm": 1.1240098476409912,
      "learning_rate": 7.719404597958659e-06,
      "loss": 0.0214,
      "step": 712220
    },
    {
      "epoch": 1.1655963813227026,
      "grad_norm": 0.5109371542930603,
      "learning_rate": 7.719338705745143e-06,
      "loss": 0.0299,
      "step": 712240
    },
    {
      "epoch": 1.1656291117613558,
      "grad_norm": 1.7543836832046509,
      "learning_rate": 7.719272813531624e-06,
      "loss": 0.021,
      "step": 712260
    },
    {
      "epoch": 1.1656618422000091,
      "grad_norm": 0.7253441214561462,
      "learning_rate": 7.719206921318108e-06,
      "loss": 0.0168,
      "step": 712280
    },
    {
      "epoch": 1.1656945726386625,
      "grad_norm": 1.3134187459945679,
      "learning_rate": 7.719141029104592e-06,
      "loss": 0.0301,
      "step": 712300
    },
    {
      "epoch": 1.1657273030773159,
      "grad_norm": 0.4932686984539032,
      "learning_rate": 7.719075136891074e-06,
      "loss": 0.026,
      "step": 712320
    },
    {
      "epoch": 1.1657600335159692,
      "grad_norm": 0.8053867816925049,
      "learning_rate": 7.719009244677557e-06,
      "loss": 0.0183,
      "step": 712340
    },
    {
      "epoch": 1.1657927639546226,
      "grad_norm": 0.4637591242790222,
      "learning_rate": 7.71894335246404e-06,
      "loss": 0.0203,
      "step": 712360
    },
    {
      "epoch": 1.1658254943932758,
      "grad_norm": 0.7399134039878845,
      "learning_rate": 7.718877460250523e-06,
      "loss": 0.0295,
      "step": 712380
    },
    {
      "epoch": 1.1658582248319291,
      "grad_norm": 1.2598755359649658,
      "learning_rate": 7.718811568037006e-06,
      "loss": 0.0221,
      "step": 712400
    },
    {
      "epoch": 1.1658909552705825,
      "grad_norm": 1.345707893371582,
      "learning_rate": 7.71874567582349e-06,
      "loss": 0.023,
      "step": 712420
    },
    {
      "epoch": 1.1659236857092359,
      "grad_norm": 0.5302316546440125,
      "learning_rate": 7.718679783609972e-06,
      "loss": 0.0253,
      "step": 712440
    },
    {
      "epoch": 1.1659564161478893,
      "grad_norm": 0.7798215746879578,
      "learning_rate": 7.718613891396455e-06,
      "loss": 0.0281,
      "step": 712460
    },
    {
      "epoch": 1.1659891465865426,
      "grad_norm": 0.5355183482170105,
      "learning_rate": 7.718547999182937e-06,
      "loss": 0.016,
      "step": 712480
    },
    {
      "epoch": 1.1660218770251958,
      "grad_norm": 0.6433253884315491,
      "learning_rate": 7.71848210696942e-06,
      "loss": 0.0191,
      "step": 712500
    },
    {
      "epoch": 1.1660546074638491,
      "grad_norm": 0.5378304719924927,
      "learning_rate": 7.718416214755903e-06,
      "loss": 0.0179,
      "step": 712520
    },
    {
      "epoch": 1.1660873379025025,
      "grad_norm": 0.25981268286705017,
      "learning_rate": 7.718350322542386e-06,
      "loss": 0.0132,
      "step": 712540
    },
    {
      "epoch": 1.166120068341156,
      "grad_norm": 0.9919136762619019,
      "learning_rate": 7.718284430328868e-06,
      "loss": 0.035,
      "step": 712560
    },
    {
      "epoch": 1.1661527987798093,
      "grad_norm": 0.6543741822242737,
      "learning_rate": 7.718218538115352e-06,
      "loss": 0.0252,
      "step": 712580
    },
    {
      "epoch": 1.1661855292184626,
      "grad_norm": 0.37828370928764343,
      "learning_rate": 7.718152645901834e-06,
      "loss": 0.0137,
      "step": 712600
    },
    {
      "epoch": 1.166218259657116,
      "grad_norm": 1.5858368873596191,
      "learning_rate": 7.718086753688317e-06,
      "loss": 0.0227,
      "step": 712620
    },
    {
      "epoch": 1.1662509900957692,
      "grad_norm": 0.6162863969802856,
      "learning_rate": 7.718020861474799e-06,
      "loss": 0.0156,
      "step": 712640
    },
    {
      "epoch": 1.1662837205344225,
      "grad_norm": 0.35372963547706604,
      "learning_rate": 7.717954969261283e-06,
      "loss": 0.0223,
      "step": 712660
    },
    {
      "epoch": 1.166316450973076,
      "grad_norm": 0.3253036439418793,
      "learning_rate": 7.717889077047766e-06,
      "loss": 0.0216,
      "step": 712680
    },
    {
      "epoch": 1.1663491814117293,
      "grad_norm": 0.32207390666007996,
      "learning_rate": 7.717823184834248e-06,
      "loss": 0.0212,
      "step": 712700
    },
    {
      "epoch": 1.1663819118503826,
      "grad_norm": 1.3062337636947632,
      "learning_rate": 7.717757292620732e-06,
      "loss": 0.0218,
      "step": 712720
    },
    {
      "epoch": 1.166414642289036,
      "grad_norm": 0.31162598729133606,
      "learning_rate": 7.717691400407215e-06,
      "loss": 0.0254,
      "step": 712740
    },
    {
      "epoch": 1.1664473727276894,
      "grad_norm": 1.8768439292907715,
      "learning_rate": 7.717625508193697e-06,
      "loss": 0.033,
      "step": 712760
    },
    {
      "epoch": 1.1664801031663425,
      "grad_norm": 0.6267707943916321,
      "learning_rate": 7.71755961598018e-06,
      "loss": 0.0218,
      "step": 712780
    },
    {
      "epoch": 1.166512833604996,
      "grad_norm": 0.596235990524292,
      "learning_rate": 7.717493723766664e-06,
      "loss": 0.0235,
      "step": 712800
    },
    {
      "epoch": 1.1665455640436493,
      "grad_norm": 0.6162627339363098,
      "learning_rate": 7.717427831553146e-06,
      "loss": 0.0383,
      "step": 712820
    },
    {
      "epoch": 1.1665782944823027,
      "grad_norm": 0.9173003435134888,
      "learning_rate": 7.71736193933963e-06,
      "loss": 0.0253,
      "step": 712840
    },
    {
      "epoch": 1.166611024920956,
      "grad_norm": 0.25546520948410034,
      "learning_rate": 7.717296047126112e-06,
      "loss": 0.0217,
      "step": 712860
    },
    {
      "epoch": 1.1666437553596094,
      "grad_norm": 0.27757954597473145,
      "learning_rate": 7.717230154912595e-06,
      "loss": 0.0256,
      "step": 712880
    },
    {
      "epoch": 1.1666764857982628,
      "grad_norm": 1.7445428371429443,
      "learning_rate": 7.717164262699077e-06,
      "loss": 0.0152,
      "step": 712900
    },
    {
      "epoch": 1.166709216236916,
      "grad_norm": 1.2813400030136108,
      "learning_rate": 7.71709837048556e-06,
      "loss": 0.0208,
      "step": 712920
    },
    {
      "epoch": 1.1667419466755693,
      "grad_norm": 0.45806851983070374,
      "learning_rate": 7.717032478272043e-06,
      "loss": 0.024,
      "step": 712940
    },
    {
      "epoch": 1.1667746771142227,
      "grad_norm": 0.757136881351471,
      "learning_rate": 7.716966586058526e-06,
      "loss": 0.0202,
      "step": 712960
    },
    {
      "epoch": 1.166807407552876,
      "grad_norm": 0.6059021949768066,
      "learning_rate": 7.716900693845008e-06,
      "loss": 0.0164,
      "step": 712980
    },
    {
      "epoch": 1.1668401379915294,
      "grad_norm": 0.35257333517074585,
      "learning_rate": 7.716834801631492e-06,
      "loss": 0.029,
      "step": 713000
    },
    {
      "epoch": 1.1668728684301828,
      "grad_norm": 1.351835012435913,
      "learning_rate": 7.716768909417974e-06,
      "loss": 0.0246,
      "step": 713020
    },
    {
      "epoch": 1.1669055988688362,
      "grad_norm": 0.8522233963012695,
      "learning_rate": 7.716703017204457e-06,
      "loss": 0.0245,
      "step": 713040
    },
    {
      "epoch": 1.1669383293074893,
      "grad_norm": 2.7393245697021484,
      "learning_rate": 7.71663712499094e-06,
      "loss": 0.0245,
      "step": 713060
    },
    {
      "epoch": 1.1669710597461427,
      "grad_norm": 0.6075542569160461,
      "learning_rate": 7.716571232777423e-06,
      "loss": 0.0263,
      "step": 713080
    },
    {
      "epoch": 1.167003790184796,
      "grad_norm": 1.781307339668274,
      "learning_rate": 7.716505340563906e-06,
      "loss": 0.0153,
      "step": 713100
    },
    {
      "epoch": 1.1670365206234494,
      "grad_norm": 1.8758511543273926,
      "learning_rate": 7.71643944835039e-06,
      "loss": 0.0266,
      "step": 713120
    },
    {
      "epoch": 1.1670692510621028,
      "grad_norm": 0.36039721965789795,
      "learning_rate": 7.716373556136872e-06,
      "loss": 0.0159,
      "step": 713140
    },
    {
      "epoch": 1.1671019815007562,
      "grad_norm": 3.4509475231170654,
      "learning_rate": 7.716307663923355e-06,
      "loss": 0.0154,
      "step": 713160
    },
    {
      "epoch": 1.1671347119394093,
      "grad_norm": 0.3197975158691406,
      "learning_rate": 7.716241771709839e-06,
      "loss": 0.0183,
      "step": 713180
    },
    {
      "epoch": 1.1671674423780627,
      "grad_norm": 0.4074263274669647,
      "learning_rate": 7.71617587949632e-06,
      "loss": 0.0265,
      "step": 713200
    },
    {
      "epoch": 1.167200172816716,
      "grad_norm": 0.9349097013473511,
      "learning_rate": 7.716109987282804e-06,
      "loss": 0.0209,
      "step": 713220
    },
    {
      "epoch": 1.1672329032553694,
      "grad_norm": 0.4355997145175934,
      "learning_rate": 7.716044095069286e-06,
      "loss": 0.0165,
      "step": 713240
    },
    {
      "epoch": 1.1672656336940228,
      "grad_norm": 0.30695641040802,
      "learning_rate": 7.71597820285577e-06,
      "loss": 0.0252,
      "step": 713260
    },
    {
      "epoch": 1.1672983641326762,
      "grad_norm": 0.9969660043716431,
      "learning_rate": 7.715912310642252e-06,
      "loss": 0.0288,
      "step": 713280
    },
    {
      "epoch": 1.1673310945713293,
      "grad_norm": 1.306958794593811,
      "learning_rate": 7.715846418428735e-06,
      "loss": 0.0248,
      "step": 713300
    },
    {
      "epoch": 1.1673638250099827,
      "grad_norm": 1.5810130834579468,
      "learning_rate": 7.715780526215217e-06,
      "loss": 0.0297,
      "step": 713320
    },
    {
      "epoch": 1.167396555448636,
      "grad_norm": 0.1622304916381836,
      "learning_rate": 7.715714634001701e-06,
      "loss": 0.0234,
      "step": 713340
    },
    {
      "epoch": 1.1674292858872894,
      "grad_norm": 0.5056861042976379,
      "learning_rate": 7.715648741788183e-06,
      "loss": 0.0269,
      "step": 713360
    },
    {
      "epoch": 1.1674620163259428,
      "grad_norm": 0.8226240277290344,
      "learning_rate": 7.715582849574666e-06,
      "loss": 0.0168,
      "step": 713380
    },
    {
      "epoch": 1.1674947467645962,
      "grad_norm": 0.30238208174705505,
      "learning_rate": 7.715516957361148e-06,
      "loss": 0.0234,
      "step": 713400
    },
    {
      "epoch": 1.1675274772032496,
      "grad_norm": 1.042283535003662,
      "learning_rate": 7.715451065147632e-06,
      "loss": 0.0203,
      "step": 713420
    },
    {
      "epoch": 1.1675602076419027,
      "grad_norm": 0.4197697937488556,
      "learning_rate": 7.715385172934114e-06,
      "loss": 0.0191,
      "step": 713440
    },
    {
      "epoch": 1.167592938080556,
      "grad_norm": 1.052290678024292,
      "learning_rate": 7.715319280720597e-06,
      "loss": 0.026,
      "step": 713460
    },
    {
      "epoch": 1.1676256685192095,
      "grad_norm": 0.8780739903450012,
      "learning_rate": 7.715253388507081e-06,
      "loss": 0.0178,
      "step": 713480
    },
    {
      "epoch": 1.1676583989578628,
      "grad_norm": 0.7000868320465088,
      "learning_rate": 7.715187496293563e-06,
      "loss": 0.0296,
      "step": 713500
    },
    {
      "epoch": 1.1676911293965162,
      "grad_norm": 1.0072453022003174,
      "learning_rate": 7.715121604080046e-06,
      "loss": 0.0293,
      "step": 713520
    },
    {
      "epoch": 1.1677238598351696,
      "grad_norm": 0.16034774482250214,
      "learning_rate": 7.71505571186653e-06,
      "loss": 0.0194,
      "step": 713540
    },
    {
      "epoch": 1.167756590273823,
      "grad_norm": 0.2908591032028198,
      "learning_rate": 7.714989819653012e-06,
      "loss": 0.0228,
      "step": 713560
    },
    {
      "epoch": 1.167789320712476,
      "grad_norm": 1.6866346597671509,
      "learning_rate": 7.714923927439495e-06,
      "loss": 0.0239,
      "step": 713580
    },
    {
      "epoch": 1.1678220511511295,
      "grad_norm": 0.6667397618293762,
      "learning_rate": 7.714858035225979e-06,
      "loss": 0.0198,
      "step": 713600
    },
    {
      "epoch": 1.1678547815897828,
      "grad_norm": 0.6029572486877441,
      "learning_rate": 7.714792143012461e-06,
      "loss": 0.0254,
      "step": 713620
    },
    {
      "epoch": 1.1678875120284362,
      "grad_norm": 0.9320123195648193,
      "learning_rate": 7.714726250798945e-06,
      "loss": 0.0311,
      "step": 713640
    },
    {
      "epoch": 1.1679202424670896,
      "grad_norm": 1.9121922254562378,
      "learning_rate": 7.714660358585426e-06,
      "loss": 0.0282,
      "step": 713660
    },
    {
      "epoch": 1.167952972905743,
      "grad_norm": 1.4558849334716797,
      "learning_rate": 7.71459446637191e-06,
      "loss": 0.0191,
      "step": 713680
    },
    {
      "epoch": 1.1679857033443963,
      "grad_norm": 0.3487951159477234,
      "learning_rate": 7.714528574158392e-06,
      "loss": 0.02,
      "step": 713700
    },
    {
      "epoch": 1.1680184337830495,
      "grad_norm": 2.400068998336792,
      "learning_rate": 7.714462681944875e-06,
      "loss": 0.0217,
      "step": 713720
    },
    {
      "epoch": 1.1680511642217029,
      "grad_norm": 0.7366692423820496,
      "learning_rate": 7.714396789731357e-06,
      "loss": 0.02,
      "step": 713740
    },
    {
      "epoch": 1.1680838946603562,
      "grad_norm": 0.35932639241218567,
      "learning_rate": 7.714330897517841e-06,
      "loss": 0.0253,
      "step": 713760
    },
    {
      "epoch": 1.1681166250990096,
      "grad_norm": 5.487405300140381,
      "learning_rate": 7.714265005304323e-06,
      "loss": 0.0202,
      "step": 713780
    },
    {
      "epoch": 1.168149355537663,
      "grad_norm": 0.412350594997406,
      "learning_rate": 7.714199113090806e-06,
      "loss": 0.0211,
      "step": 713800
    },
    {
      "epoch": 1.1681820859763163,
      "grad_norm": 0.3813442289829254,
      "learning_rate": 7.71413322087729e-06,
      "loss": 0.0195,
      "step": 713820
    },
    {
      "epoch": 1.1682148164149697,
      "grad_norm": 0.4277920722961426,
      "learning_rate": 7.714067328663772e-06,
      "loss": 0.0213,
      "step": 713840
    },
    {
      "epoch": 1.1682475468536229,
      "grad_norm": 0.3104099631309509,
      "learning_rate": 7.714001436450255e-06,
      "loss": 0.0216,
      "step": 713860
    },
    {
      "epoch": 1.1682802772922762,
      "grad_norm": 1.4317525625228882,
      "learning_rate": 7.713935544236737e-06,
      "loss": 0.0337,
      "step": 713880
    },
    {
      "epoch": 1.1683130077309296,
      "grad_norm": 0.5507516860961914,
      "learning_rate": 7.713869652023221e-06,
      "loss": 0.0223,
      "step": 713900
    },
    {
      "epoch": 1.168345738169583,
      "grad_norm": 0.6594699025154114,
      "learning_rate": 7.713803759809705e-06,
      "loss": 0.0221,
      "step": 713920
    },
    {
      "epoch": 1.1683784686082364,
      "grad_norm": 0.2763138711452484,
      "learning_rate": 7.713737867596186e-06,
      "loss": 0.0243,
      "step": 713940
    },
    {
      "epoch": 1.1684111990468895,
      "grad_norm": 0.1996842920780182,
      "learning_rate": 7.71367197538267e-06,
      "loss": 0.016,
      "step": 713960
    },
    {
      "epoch": 1.1684439294855429,
      "grad_norm": 0.5591856837272644,
      "learning_rate": 7.713606083169154e-06,
      "loss": 0.0262,
      "step": 713980
    },
    {
      "epoch": 1.1684766599241962,
      "grad_norm": 0.5231356024742126,
      "learning_rate": 7.713540190955636e-06,
      "loss": 0.021,
      "step": 714000
    },
    {
      "epoch": 1.1685093903628496,
      "grad_norm": 0.2664071321487427,
      "learning_rate": 7.713474298742119e-06,
      "loss": 0.0193,
      "step": 714020
    },
    {
      "epoch": 1.168542120801503,
      "grad_norm": 0.1642104685306549,
      "learning_rate": 7.713408406528601e-06,
      "loss": 0.0285,
      "step": 714040
    },
    {
      "epoch": 1.1685748512401564,
      "grad_norm": 1.1884291172027588,
      "learning_rate": 7.713342514315085e-06,
      "loss": 0.0338,
      "step": 714060
    },
    {
      "epoch": 1.1686075816788097,
      "grad_norm": 1.3832190036773682,
      "learning_rate": 7.713276622101566e-06,
      "loss": 0.0255,
      "step": 714080
    },
    {
      "epoch": 1.1686403121174629,
      "grad_norm": 0.6993764638900757,
      "learning_rate": 7.71321072988805e-06,
      "loss": 0.0272,
      "step": 714100
    },
    {
      "epoch": 1.1686730425561163,
      "grad_norm": 0.28876355290412903,
      "learning_rate": 7.713144837674532e-06,
      "loss": 0.0272,
      "step": 714120
    },
    {
      "epoch": 1.1687057729947696,
      "grad_norm": 0.281841516494751,
      "learning_rate": 7.713078945461016e-06,
      "loss": 0.0251,
      "step": 714140
    },
    {
      "epoch": 1.168738503433423,
      "grad_norm": 0.8839415311813354,
      "learning_rate": 7.713013053247497e-06,
      "loss": 0.0289,
      "step": 714160
    },
    {
      "epoch": 1.1687712338720764,
      "grad_norm": 0.8352187275886536,
      "learning_rate": 7.712947161033981e-06,
      "loss": 0.0256,
      "step": 714180
    },
    {
      "epoch": 1.1688039643107297,
      "grad_norm": 0.5792238116264343,
      "learning_rate": 7.712881268820465e-06,
      "loss": 0.0342,
      "step": 714200
    },
    {
      "epoch": 1.1688366947493831,
      "grad_norm": 0.8086210489273071,
      "learning_rate": 7.712815376606946e-06,
      "loss": 0.0217,
      "step": 714220
    },
    {
      "epoch": 1.1688694251880363,
      "grad_norm": 1.0920356512069702,
      "learning_rate": 7.71274948439343e-06,
      "loss": 0.0285,
      "step": 714240
    },
    {
      "epoch": 1.1689021556266896,
      "grad_norm": 0.41898593306541443,
      "learning_rate": 7.712683592179912e-06,
      "loss": 0.0228,
      "step": 714260
    },
    {
      "epoch": 1.168934886065343,
      "grad_norm": 0.3283229470252991,
      "learning_rate": 7.712617699966396e-06,
      "loss": 0.0229,
      "step": 714280
    },
    {
      "epoch": 1.1689676165039964,
      "grad_norm": 0.3931717276573181,
      "learning_rate": 7.712551807752877e-06,
      "loss": 0.0176,
      "step": 714300
    },
    {
      "epoch": 1.1690003469426498,
      "grad_norm": 1.6693742275238037,
      "learning_rate": 7.712485915539361e-06,
      "loss": 0.0266,
      "step": 714320
    },
    {
      "epoch": 1.1690330773813031,
      "grad_norm": 0.3002382814884186,
      "learning_rate": 7.712420023325845e-06,
      "loss": 0.0153,
      "step": 714340
    },
    {
      "epoch": 1.1690658078199565,
      "grad_norm": 0.582970917224884,
      "learning_rate": 7.712354131112327e-06,
      "loss": 0.0203,
      "step": 714360
    },
    {
      "epoch": 1.1690985382586097,
      "grad_norm": 0.5478776097297668,
      "learning_rate": 7.71228823889881e-06,
      "loss": 0.02,
      "step": 714380
    },
    {
      "epoch": 1.169131268697263,
      "grad_norm": 0.4793977439403534,
      "learning_rate": 7.712222346685294e-06,
      "loss": 0.0214,
      "step": 714400
    },
    {
      "epoch": 1.1691639991359164,
      "grad_norm": 0.4407995641231537,
      "learning_rate": 7.712156454471776e-06,
      "loss": 0.0276,
      "step": 714420
    },
    {
      "epoch": 1.1691967295745698,
      "grad_norm": 0.4877135455608368,
      "learning_rate": 7.71209056225826e-06,
      "loss": 0.0271,
      "step": 714440
    },
    {
      "epoch": 1.1692294600132231,
      "grad_norm": 0.9723089933395386,
      "learning_rate": 7.712024670044741e-06,
      "loss": 0.0231,
      "step": 714460
    },
    {
      "epoch": 1.1692621904518765,
      "grad_norm": 0.7327685356140137,
      "learning_rate": 7.711958777831225e-06,
      "loss": 0.0205,
      "step": 714480
    },
    {
      "epoch": 1.1692949208905299,
      "grad_norm": 0.9608625173568726,
      "learning_rate": 7.711892885617707e-06,
      "loss": 0.0223,
      "step": 714500
    },
    {
      "epoch": 1.169327651329183,
      "grad_norm": 0.46662893891334534,
      "learning_rate": 7.71182699340419e-06,
      "loss": 0.022,
      "step": 714520
    },
    {
      "epoch": 1.1693603817678364,
      "grad_norm": 3.0634117126464844,
      "learning_rate": 7.711761101190674e-06,
      "loss": 0.0272,
      "step": 714540
    },
    {
      "epoch": 1.1693931122064898,
      "grad_norm": 0.848629355430603,
      "learning_rate": 7.711695208977156e-06,
      "loss": 0.0294,
      "step": 714560
    },
    {
      "epoch": 1.1694258426451432,
      "grad_norm": 0.43000736832618713,
      "learning_rate": 7.71162931676364e-06,
      "loss": 0.0179,
      "step": 714580
    },
    {
      "epoch": 1.1694585730837965,
      "grad_norm": 0.6610259413719177,
      "learning_rate": 7.711563424550121e-06,
      "loss": 0.0232,
      "step": 714600
    },
    {
      "epoch": 1.16949130352245,
      "grad_norm": 1.572940468788147,
      "learning_rate": 7.711497532336605e-06,
      "loss": 0.0241,
      "step": 714620
    },
    {
      "epoch": 1.1695240339611033,
      "grad_norm": 0.797389805316925,
      "learning_rate": 7.711431640123087e-06,
      "loss": 0.029,
      "step": 714640
    },
    {
      "epoch": 1.1695567643997564,
      "grad_norm": 2.1897528171539307,
      "learning_rate": 7.71136574790957e-06,
      "loss": 0.028,
      "step": 714660
    },
    {
      "epoch": 1.1695894948384098,
      "grad_norm": 0.4403764009475708,
      "learning_rate": 7.711299855696052e-06,
      "loss": 0.0226,
      "step": 714680
    },
    {
      "epoch": 1.1696222252770632,
      "grad_norm": 0.4463407099246979,
      "learning_rate": 7.711233963482536e-06,
      "loss": 0.0222,
      "step": 714700
    },
    {
      "epoch": 1.1696549557157165,
      "grad_norm": 1.3707469701766968,
      "learning_rate": 7.71116807126902e-06,
      "loss": 0.0273,
      "step": 714720
    },
    {
      "epoch": 1.16968768615437,
      "grad_norm": 0.8856798410415649,
      "learning_rate": 7.711102179055501e-06,
      "loss": 0.0344,
      "step": 714740
    },
    {
      "epoch": 1.169720416593023,
      "grad_norm": 0.5655953288078308,
      "learning_rate": 7.711036286841985e-06,
      "loss": 0.0184,
      "step": 714760
    },
    {
      "epoch": 1.1697531470316764,
      "grad_norm": 0.14376163482666016,
      "learning_rate": 7.710970394628468e-06,
      "loss": 0.0221,
      "step": 714780
    },
    {
      "epoch": 1.1697858774703298,
      "grad_norm": 1.9266281127929688,
      "learning_rate": 7.71090450241495e-06,
      "loss": 0.0222,
      "step": 714800
    },
    {
      "epoch": 1.1698186079089832,
      "grad_norm": 0.5127578973770142,
      "learning_rate": 7.710838610201434e-06,
      "loss": 0.0242,
      "step": 714820
    },
    {
      "epoch": 1.1698513383476365,
      "grad_norm": 0.3435073792934418,
      "learning_rate": 7.710772717987916e-06,
      "loss": 0.0236,
      "step": 714840
    },
    {
      "epoch": 1.16988406878629,
      "grad_norm": 1.1992775201797485,
      "learning_rate": 7.7107068257744e-06,
      "loss": 0.0176,
      "step": 714860
    },
    {
      "epoch": 1.1699167992249433,
      "grad_norm": 1.0597929954528809,
      "learning_rate": 7.710640933560883e-06,
      "loss": 0.0254,
      "step": 714880
    },
    {
      "epoch": 1.1699495296635964,
      "grad_norm": 0.7392747402191162,
      "learning_rate": 7.710575041347365e-06,
      "loss": 0.0266,
      "step": 714900
    },
    {
      "epoch": 1.1699822601022498,
      "grad_norm": 3.669609546661377,
      "learning_rate": 7.710509149133848e-06,
      "loss": 0.0222,
      "step": 714920
    },
    {
      "epoch": 1.1700149905409032,
      "grad_norm": 0.6162684559822083,
      "learning_rate": 7.71044325692033e-06,
      "loss": 0.0247,
      "step": 714940
    },
    {
      "epoch": 1.1700477209795566,
      "grad_norm": 0.4774629473686218,
      "learning_rate": 7.710377364706814e-06,
      "loss": 0.0169,
      "step": 714960
    },
    {
      "epoch": 1.17008045141821,
      "grad_norm": 0.4014133810997009,
      "learning_rate": 7.710311472493296e-06,
      "loss": 0.0235,
      "step": 714980
    },
    {
      "epoch": 1.1701131818568633,
      "grad_norm": 0.3186056613922119,
      "learning_rate": 7.71024558027978e-06,
      "loss": 0.0256,
      "step": 715000
    },
    {
      "epoch": 1.1701459122955167,
      "grad_norm": 0.42170751094818115,
      "learning_rate": 7.710179688066261e-06,
      "loss": 0.0225,
      "step": 715020
    },
    {
      "epoch": 1.1701786427341698,
      "grad_norm": 1.0004620552062988,
      "learning_rate": 7.710113795852745e-06,
      "loss": 0.0246,
      "step": 715040
    },
    {
      "epoch": 1.1702113731728232,
      "grad_norm": 0.4512723684310913,
      "learning_rate": 7.710047903639227e-06,
      "loss": 0.0286,
      "step": 715060
    },
    {
      "epoch": 1.1702441036114766,
      "grad_norm": 0.5450312495231628,
      "learning_rate": 7.70998201142571e-06,
      "loss": 0.0216,
      "step": 715080
    },
    {
      "epoch": 1.17027683405013,
      "grad_norm": 0.2729106545448303,
      "learning_rate": 7.709916119212192e-06,
      "loss": 0.0331,
      "step": 715100
    },
    {
      "epoch": 1.1703095644887833,
      "grad_norm": 0.26995527744293213,
      "learning_rate": 7.709850226998676e-06,
      "loss": 0.0153,
      "step": 715120
    },
    {
      "epoch": 1.1703422949274367,
      "grad_norm": 0.4073399007320404,
      "learning_rate": 7.70978433478516e-06,
      "loss": 0.0187,
      "step": 715140
    },
    {
      "epoch": 1.17037502536609,
      "grad_norm": 0.3805945813655853,
      "learning_rate": 7.709718442571641e-06,
      "loss": 0.0221,
      "step": 715160
    },
    {
      "epoch": 1.1704077558047432,
      "grad_norm": 0.6947305798530579,
      "learning_rate": 7.709652550358125e-06,
      "loss": 0.0264,
      "step": 715180
    },
    {
      "epoch": 1.1704404862433966,
      "grad_norm": 0.7006098031997681,
      "learning_rate": 7.709586658144608e-06,
      "loss": 0.0174,
      "step": 715200
    },
    {
      "epoch": 1.17047321668205,
      "grad_norm": 1.0935914516448975,
      "learning_rate": 7.70952076593109e-06,
      "loss": 0.0189,
      "step": 715220
    },
    {
      "epoch": 1.1705059471207033,
      "grad_norm": 0.8461291193962097,
      "learning_rate": 7.709454873717574e-06,
      "loss": 0.0207,
      "step": 715240
    },
    {
      "epoch": 1.1705386775593567,
      "grad_norm": 0.732994556427002,
      "learning_rate": 7.709388981504057e-06,
      "loss": 0.0206,
      "step": 715260
    },
    {
      "epoch": 1.17057140799801,
      "grad_norm": 0.8344951868057251,
      "learning_rate": 7.70932308929054e-06,
      "loss": 0.0246,
      "step": 715280
    },
    {
      "epoch": 1.1706041384366634,
      "grad_norm": 0.7645049691200256,
      "learning_rate": 7.709257197077023e-06,
      "loss": 0.0157,
      "step": 715300
    },
    {
      "epoch": 1.1706368688753166,
      "grad_norm": 0.8459045886993408,
      "learning_rate": 7.709191304863505e-06,
      "loss": 0.0246,
      "step": 715320
    },
    {
      "epoch": 1.17066959931397,
      "grad_norm": 0.8757716417312622,
      "learning_rate": 7.709125412649988e-06,
      "loss": 0.0238,
      "step": 715340
    },
    {
      "epoch": 1.1707023297526233,
      "grad_norm": 1.047497034072876,
      "learning_rate": 7.70905952043647e-06,
      "loss": 0.0302,
      "step": 715360
    },
    {
      "epoch": 1.1707350601912767,
      "grad_norm": 0.41535162925720215,
      "learning_rate": 7.708993628222954e-06,
      "loss": 0.0249,
      "step": 715380
    },
    {
      "epoch": 1.17076779062993,
      "grad_norm": 0.5631643533706665,
      "learning_rate": 7.708927736009436e-06,
      "loss": 0.0338,
      "step": 715400
    },
    {
      "epoch": 1.1708005210685835,
      "grad_norm": 0.18042638897895813,
      "learning_rate": 7.70886184379592e-06,
      "loss": 0.0167,
      "step": 715420
    },
    {
      "epoch": 1.1708332515072366,
      "grad_norm": 1.079756498336792,
      "learning_rate": 7.708795951582401e-06,
      "loss": 0.0302,
      "step": 715440
    },
    {
      "epoch": 1.17086598194589,
      "grad_norm": 0.4328119158744812,
      "learning_rate": 7.708730059368885e-06,
      "loss": 0.0245,
      "step": 715460
    },
    {
      "epoch": 1.1708987123845433,
      "grad_norm": 0.4047289490699768,
      "learning_rate": 7.708664167155367e-06,
      "loss": 0.0245,
      "step": 715480
    },
    {
      "epoch": 1.1709314428231967,
      "grad_norm": 0.29513755440711975,
      "learning_rate": 7.70859827494185e-06,
      "loss": 0.0287,
      "step": 715500
    },
    {
      "epoch": 1.17096417326185,
      "grad_norm": 0.8289433717727661,
      "learning_rate": 7.708532382728334e-06,
      "loss": 0.0192,
      "step": 715520
    },
    {
      "epoch": 1.1709969037005035,
      "grad_norm": 0.6776436567306519,
      "learning_rate": 7.708466490514816e-06,
      "loss": 0.0258,
      "step": 715540
    },
    {
      "epoch": 1.1710296341391566,
      "grad_norm": 0.7637993693351746,
      "learning_rate": 7.7084005983013e-06,
      "loss": 0.0225,
      "step": 715560
    },
    {
      "epoch": 1.17106236457781,
      "grad_norm": 0.5794792771339417,
      "learning_rate": 7.708334706087783e-06,
      "loss": 0.0285,
      "step": 715580
    },
    {
      "epoch": 1.1710950950164634,
      "grad_norm": 0.16664980351924896,
      "learning_rate": 7.708268813874265e-06,
      "loss": 0.0278,
      "step": 715600
    },
    {
      "epoch": 1.1711278254551167,
      "grad_norm": 1.0587639808654785,
      "learning_rate": 7.708202921660748e-06,
      "loss": 0.0225,
      "step": 715620
    },
    {
      "epoch": 1.17116055589377,
      "grad_norm": 0.6678498387336731,
      "learning_rate": 7.708137029447232e-06,
      "loss": 0.0238,
      "step": 715640
    },
    {
      "epoch": 1.1711932863324235,
      "grad_norm": 0.28003033995628357,
      "learning_rate": 7.708071137233714e-06,
      "loss": 0.0252,
      "step": 715660
    },
    {
      "epoch": 1.1712260167710768,
      "grad_norm": 0.6995194554328918,
      "learning_rate": 7.708005245020198e-06,
      "loss": 0.018,
      "step": 715680
    },
    {
      "epoch": 1.17125874720973,
      "grad_norm": 0.7909995913505554,
      "learning_rate": 7.70793935280668e-06,
      "loss": 0.0241,
      "step": 715700
    },
    {
      "epoch": 1.1712914776483834,
      "grad_norm": 1.0325573682785034,
      "learning_rate": 7.707873460593163e-06,
      "loss": 0.0225,
      "step": 715720
    },
    {
      "epoch": 1.1713242080870367,
      "grad_norm": 1.8544083833694458,
      "learning_rate": 7.707807568379645e-06,
      "loss": 0.0265,
      "step": 715740
    },
    {
      "epoch": 1.1713569385256901,
      "grad_norm": 0.5672832131385803,
      "learning_rate": 7.707741676166128e-06,
      "loss": 0.0231,
      "step": 715760
    },
    {
      "epoch": 1.1713896689643435,
      "grad_norm": 1.1015316247940063,
      "learning_rate": 7.70767578395261e-06,
      "loss": 0.0241,
      "step": 715780
    },
    {
      "epoch": 1.1714223994029969,
      "grad_norm": 1.0440218448638916,
      "learning_rate": 7.707609891739094e-06,
      "loss": 0.0188,
      "step": 715800
    },
    {
      "epoch": 1.1714551298416502,
      "grad_norm": 2.2032854557037354,
      "learning_rate": 7.707543999525576e-06,
      "loss": 0.0187,
      "step": 715820
    },
    {
      "epoch": 1.1714878602803034,
      "grad_norm": 0.3590499758720398,
      "learning_rate": 7.70747810731206e-06,
      "loss": 0.0234,
      "step": 715840
    },
    {
      "epoch": 1.1715205907189568,
      "grad_norm": 0.19925612211227417,
      "learning_rate": 7.707412215098541e-06,
      "loss": 0.0218,
      "step": 715860
    },
    {
      "epoch": 1.1715533211576101,
      "grad_norm": 1.4109013080596924,
      "learning_rate": 7.707346322885025e-06,
      "loss": 0.0239,
      "step": 715880
    },
    {
      "epoch": 1.1715860515962635,
      "grad_norm": 0.9774149060249329,
      "learning_rate": 7.707280430671508e-06,
      "loss": 0.0243,
      "step": 715900
    },
    {
      "epoch": 1.1716187820349169,
      "grad_norm": 0.6758440732955933,
      "learning_rate": 7.70721453845799e-06,
      "loss": 0.0249,
      "step": 715920
    },
    {
      "epoch": 1.1716515124735702,
      "grad_norm": 0.6842091679573059,
      "learning_rate": 7.707148646244474e-06,
      "loss": 0.0254,
      "step": 715940
    },
    {
      "epoch": 1.1716842429122236,
      "grad_norm": 1.0597445964813232,
      "learning_rate": 7.707082754030958e-06,
      "loss": 0.0224,
      "step": 715960
    },
    {
      "epoch": 1.1717169733508768,
      "grad_norm": 0.846463143825531,
      "learning_rate": 7.70701686181744e-06,
      "loss": 0.019,
      "step": 715980
    },
    {
      "epoch": 1.1717497037895301,
      "grad_norm": 0.32836785912513733,
      "learning_rate": 7.706950969603923e-06,
      "loss": 0.0229,
      "step": 716000
    },
    {
      "epoch": 1.1717824342281835,
      "grad_norm": 0.9993860125541687,
      "learning_rate": 7.706885077390407e-06,
      "loss": 0.0237,
      "step": 716020
    },
    {
      "epoch": 1.1718151646668369,
      "grad_norm": 0.29729291796684265,
      "learning_rate": 7.706819185176889e-06,
      "loss": 0.0254,
      "step": 716040
    },
    {
      "epoch": 1.1718478951054903,
      "grad_norm": 0.49946504831314087,
      "learning_rate": 7.706753292963372e-06,
      "loss": 0.0198,
      "step": 716060
    },
    {
      "epoch": 1.1718806255441436,
      "grad_norm": 1.2261439561843872,
      "learning_rate": 7.706687400749854e-06,
      "loss": 0.0316,
      "step": 716080
    },
    {
      "epoch": 1.171913355982797,
      "grad_norm": 0.3294699192047119,
      "learning_rate": 7.706621508536338e-06,
      "loss": 0.0176,
      "step": 716100
    },
    {
      "epoch": 1.1719460864214502,
      "grad_norm": 0.0812833234667778,
      "learning_rate": 7.70655561632282e-06,
      "loss": 0.032,
      "step": 716120
    },
    {
      "epoch": 1.1719788168601035,
      "grad_norm": 0.6078410744667053,
      "learning_rate": 7.706489724109303e-06,
      "loss": 0.0172,
      "step": 716140
    },
    {
      "epoch": 1.172011547298757,
      "grad_norm": 0.5747072696685791,
      "learning_rate": 7.706423831895785e-06,
      "loss": 0.0198,
      "step": 716160
    },
    {
      "epoch": 1.1720442777374103,
      "grad_norm": 0.9846962690353394,
      "learning_rate": 7.706357939682269e-06,
      "loss": 0.0268,
      "step": 716180
    },
    {
      "epoch": 1.1720770081760636,
      "grad_norm": 0.8380007147789001,
      "learning_rate": 7.70629204746875e-06,
      "loss": 0.0214,
      "step": 716200
    },
    {
      "epoch": 1.1721097386147168,
      "grad_norm": 0.3154052793979645,
      "learning_rate": 7.706226155255234e-06,
      "loss": 0.0193,
      "step": 716220
    },
    {
      "epoch": 1.1721424690533702,
      "grad_norm": 0.3622119724750519,
      "learning_rate": 7.706160263041716e-06,
      "loss": 0.0288,
      "step": 716240
    },
    {
      "epoch": 1.1721751994920235,
      "grad_norm": 1.9966418743133545,
      "learning_rate": 7.7060943708282e-06,
      "loss": 0.0192,
      "step": 716260
    },
    {
      "epoch": 1.172207929930677,
      "grad_norm": 0.4482947289943695,
      "learning_rate": 7.706028478614683e-06,
      "loss": 0.0122,
      "step": 716280
    },
    {
      "epoch": 1.1722406603693303,
      "grad_norm": 0.5305224657058716,
      "learning_rate": 7.705962586401165e-06,
      "loss": 0.0182,
      "step": 716300
    },
    {
      "epoch": 1.1722733908079837,
      "grad_norm": 0.36744678020477295,
      "learning_rate": 7.705896694187649e-06,
      "loss": 0.0275,
      "step": 716320
    },
    {
      "epoch": 1.172306121246637,
      "grad_norm": 1.3312195539474487,
      "learning_rate": 7.70583080197413e-06,
      "loss": 0.0198,
      "step": 716340
    },
    {
      "epoch": 1.1723388516852902,
      "grad_norm": 0.6138071417808533,
      "learning_rate": 7.705764909760614e-06,
      "loss": 0.0195,
      "step": 716360
    },
    {
      "epoch": 1.1723715821239435,
      "grad_norm": 0.7499628663063049,
      "learning_rate": 7.705699017547098e-06,
      "loss": 0.0218,
      "step": 716380
    },
    {
      "epoch": 1.172404312562597,
      "grad_norm": 0.3065527677536011,
      "learning_rate": 7.70563312533358e-06,
      "loss": 0.0227,
      "step": 716400
    },
    {
      "epoch": 1.1724370430012503,
      "grad_norm": 0.3506752550601959,
      "learning_rate": 7.705567233120063e-06,
      "loss": 0.0276,
      "step": 716420
    },
    {
      "epoch": 1.1724697734399037,
      "grad_norm": 0.4292983412742615,
      "learning_rate": 7.705501340906547e-06,
      "loss": 0.017,
      "step": 716440
    },
    {
      "epoch": 1.172502503878557,
      "grad_norm": 0.7390351891517639,
      "learning_rate": 7.705435448693029e-06,
      "loss": 0.0218,
      "step": 716460
    },
    {
      "epoch": 1.1725352343172104,
      "grad_norm": 3.2869913578033447,
      "learning_rate": 7.705369556479512e-06,
      "loss": 0.021,
      "step": 716480
    },
    {
      "epoch": 1.1725679647558636,
      "grad_norm": 0.6631184816360474,
      "learning_rate": 7.705303664265994e-06,
      "loss": 0.0261,
      "step": 716500
    },
    {
      "epoch": 1.172600695194517,
      "grad_norm": 0.35245281457901,
      "learning_rate": 7.705237772052478e-06,
      "loss": 0.0222,
      "step": 716520
    },
    {
      "epoch": 1.1726334256331703,
      "grad_norm": 0.4632487893104553,
      "learning_rate": 7.70517187983896e-06,
      "loss": 0.0245,
      "step": 716540
    },
    {
      "epoch": 1.1726661560718237,
      "grad_norm": 0.6374412775039673,
      "learning_rate": 7.705105987625443e-06,
      "loss": 0.0191,
      "step": 716560
    },
    {
      "epoch": 1.172698886510477,
      "grad_norm": 0.5107530355453491,
      "learning_rate": 7.705040095411925e-06,
      "loss": 0.0214,
      "step": 716580
    },
    {
      "epoch": 1.1727316169491304,
      "grad_norm": 0.9087719917297363,
      "learning_rate": 7.704974203198409e-06,
      "loss": 0.0233,
      "step": 716600
    },
    {
      "epoch": 1.1727643473877838,
      "grad_norm": 0.812991201877594,
      "learning_rate": 7.70490831098489e-06,
      "loss": 0.0251,
      "step": 716620
    },
    {
      "epoch": 1.172797077826437,
      "grad_norm": 0.1553483009338379,
      "learning_rate": 7.704842418771374e-06,
      "loss": 0.0219,
      "step": 716640
    },
    {
      "epoch": 1.1728298082650903,
      "grad_norm": 1.1168501377105713,
      "learning_rate": 7.704776526557858e-06,
      "loss": 0.0197,
      "step": 716660
    },
    {
      "epoch": 1.1728625387037437,
      "grad_norm": 0.2564312517642975,
      "learning_rate": 7.70471063434434e-06,
      "loss": 0.0307,
      "step": 716680
    },
    {
      "epoch": 1.172895269142397,
      "grad_norm": 0.7485361695289612,
      "learning_rate": 7.704644742130823e-06,
      "loss": 0.0259,
      "step": 716700
    },
    {
      "epoch": 1.1729279995810504,
      "grad_norm": 0.4495773911476135,
      "learning_rate": 7.704578849917305e-06,
      "loss": 0.0207,
      "step": 716720
    },
    {
      "epoch": 1.1729607300197038,
      "grad_norm": 1.1107823848724365,
      "learning_rate": 7.704512957703789e-06,
      "loss": 0.0242,
      "step": 716740
    },
    {
      "epoch": 1.1729934604583572,
      "grad_norm": 0.2633206248283386,
      "learning_rate": 7.704447065490272e-06,
      "loss": 0.0242,
      "step": 716760
    },
    {
      "epoch": 1.1730261908970103,
      "grad_norm": 0.35179442167282104,
      "learning_rate": 7.704381173276754e-06,
      "loss": 0.0216,
      "step": 716780
    },
    {
      "epoch": 1.1730589213356637,
      "grad_norm": 0.7500116229057312,
      "learning_rate": 7.704315281063238e-06,
      "loss": 0.0275,
      "step": 716800
    },
    {
      "epoch": 1.173091651774317,
      "grad_norm": 0.9344504475593567,
      "learning_rate": 7.704249388849721e-06,
      "loss": 0.0219,
      "step": 716820
    },
    {
      "epoch": 1.1731243822129704,
      "grad_norm": 1.1706279516220093,
      "learning_rate": 7.704183496636203e-06,
      "loss": 0.0194,
      "step": 716840
    },
    {
      "epoch": 1.1731571126516238,
      "grad_norm": 1.7349255084991455,
      "learning_rate": 7.704117604422687e-06,
      "loss": 0.0296,
      "step": 716860
    },
    {
      "epoch": 1.1731898430902772,
      "grad_norm": 2.988640785217285,
      "learning_rate": 7.704051712209169e-06,
      "loss": 0.0225,
      "step": 716880
    },
    {
      "epoch": 1.1732225735289306,
      "grad_norm": 2.0412285327911377,
      "learning_rate": 7.703985819995652e-06,
      "loss": 0.0235,
      "step": 716900
    },
    {
      "epoch": 1.1732553039675837,
      "grad_norm": 0.6533858180046082,
      "learning_rate": 7.703919927782134e-06,
      "loss": 0.0176,
      "step": 716920
    },
    {
      "epoch": 1.173288034406237,
      "grad_norm": 0.4951827824115753,
      "learning_rate": 7.703854035568618e-06,
      "loss": 0.0164,
      "step": 716940
    },
    {
      "epoch": 1.1733207648448905,
      "grad_norm": 0.841891348361969,
      "learning_rate": 7.7037881433551e-06,
      "loss": 0.0233,
      "step": 716960
    },
    {
      "epoch": 1.1733534952835438,
      "grad_norm": 0.24929851293563843,
      "learning_rate": 7.703722251141583e-06,
      "loss": 0.0194,
      "step": 716980
    },
    {
      "epoch": 1.1733862257221972,
      "grad_norm": 0.2503773272037506,
      "learning_rate": 7.703656358928067e-06,
      "loss": 0.0278,
      "step": 717000
    },
    {
      "epoch": 1.1734189561608503,
      "grad_norm": 1.3366608619689941,
      "learning_rate": 7.703590466714549e-06,
      "loss": 0.0173,
      "step": 717020
    },
    {
      "epoch": 1.1734516865995037,
      "grad_norm": 0.2828707695007324,
      "learning_rate": 7.703524574501032e-06,
      "loss": 0.0214,
      "step": 717040
    },
    {
      "epoch": 1.173484417038157,
      "grad_norm": 0.8055598735809326,
      "learning_rate": 7.703458682287514e-06,
      "loss": 0.0207,
      "step": 717060
    },
    {
      "epoch": 1.1735171474768105,
      "grad_norm": 0.6488022804260254,
      "learning_rate": 7.703392790073998e-06,
      "loss": 0.0263,
      "step": 717080
    },
    {
      "epoch": 1.1735498779154638,
      "grad_norm": 0.9086616635322571,
      "learning_rate": 7.70332689786048e-06,
      "loss": 0.0238,
      "step": 717100
    },
    {
      "epoch": 1.1735826083541172,
      "grad_norm": 0.6305555105209351,
      "learning_rate": 7.703261005646963e-06,
      "loss": 0.0266,
      "step": 717120
    },
    {
      "epoch": 1.1736153387927706,
      "grad_norm": 1.808262586593628,
      "learning_rate": 7.703195113433445e-06,
      "loss": 0.0321,
      "step": 717140
    },
    {
      "epoch": 1.1736480692314237,
      "grad_norm": 0.7711925506591797,
      "learning_rate": 7.703129221219929e-06,
      "loss": 0.0265,
      "step": 717160
    },
    {
      "epoch": 1.173680799670077,
      "grad_norm": 0.8458371162414551,
      "learning_rate": 7.703063329006412e-06,
      "loss": 0.0254,
      "step": 717180
    },
    {
      "epoch": 1.1737135301087305,
      "grad_norm": 1.100319266319275,
      "learning_rate": 7.702997436792894e-06,
      "loss": 0.0221,
      "step": 717200
    },
    {
      "epoch": 1.1737462605473838,
      "grad_norm": 1.0123289823532104,
      "learning_rate": 7.702931544579378e-06,
      "loss": 0.0184,
      "step": 717220
    },
    {
      "epoch": 1.1737789909860372,
      "grad_norm": 1.3189512491226196,
      "learning_rate": 7.702865652365861e-06,
      "loss": 0.0259,
      "step": 717240
    },
    {
      "epoch": 1.1738117214246906,
      "grad_norm": 0.40828120708465576,
      "learning_rate": 7.702799760152343e-06,
      "loss": 0.0204,
      "step": 717260
    },
    {
      "epoch": 1.173844451863344,
      "grad_norm": 0.9138067960739136,
      "learning_rate": 7.702733867938827e-06,
      "loss": 0.0297,
      "step": 717280
    },
    {
      "epoch": 1.1738771823019971,
      "grad_norm": 0.5776317715644836,
      "learning_rate": 7.702667975725309e-06,
      "loss": 0.017,
      "step": 717300
    },
    {
      "epoch": 1.1739099127406505,
      "grad_norm": 0.21733693778514862,
      "learning_rate": 7.702602083511792e-06,
      "loss": 0.0177,
      "step": 717320
    },
    {
      "epoch": 1.1739426431793039,
      "grad_norm": 0.3929729163646698,
      "learning_rate": 7.702536191298276e-06,
      "loss": 0.0213,
      "step": 717340
    },
    {
      "epoch": 1.1739753736179572,
      "grad_norm": 1.2580262422561646,
      "learning_rate": 7.702470299084758e-06,
      "loss": 0.027,
      "step": 717360
    },
    {
      "epoch": 1.1740081040566106,
      "grad_norm": 0.77027827501297,
      "learning_rate": 7.702404406871241e-06,
      "loss": 0.0277,
      "step": 717380
    },
    {
      "epoch": 1.174040834495264,
      "grad_norm": 1.1534820795059204,
      "learning_rate": 7.702338514657723e-06,
      "loss": 0.018,
      "step": 717400
    },
    {
      "epoch": 1.1740735649339173,
      "grad_norm": 1.1233116388320923,
      "learning_rate": 7.702272622444207e-06,
      "loss": 0.0271,
      "step": 717420
    },
    {
      "epoch": 1.1741062953725705,
      "grad_norm": 0.26837095618247986,
      "learning_rate": 7.702206730230689e-06,
      "loss": 0.0223,
      "step": 717440
    },
    {
      "epoch": 1.1741390258112239,
      "grad_norm": 0.16246779263019562,
      "learning_rate": 7.702140838017172e-06,
      "loss": 0.028,
      "step": 717460
    },
    {
      "epoch": 1.1741717562498772,
      "grad_norm": 0.17845341563224792,
      "learning_rate": 7.702074945803654e-06,
      "loss": 0.0276,
      "step": 717480
    },
    {
      "epoch": 1.1742044866885306,
      "grad_norm": 0.33899423480033875,
      "learning_rate": 7.702009053590138e-06,
      "loss": 0.0218,
      "step": 717500
    },
    {
      "epoch": 1.174237217127184,
      "grad_norm": 1.291326880455017,
      "learning_rate": 7.70194316137662e-06,
      "loss": 0.0361,
      "step": 717520
    },
    {
      "epoch": 1.1742699475658374,
      "grad_norm": 0.3107295036315918,
      "learning_rate": 7.701877269163103e-06,
      "loss": 0.0158,
      "step": 717540
    },
    {
      "epoch": 1.1743026780044907,
      "grad_norm": 3.2949295043945312,
      "learning_rate": 7.701811376949587e-06,
      "loss": 0.0248,
      "step": 717560
    },
    {
      "epoch": 1.1743354084431439,
      "grad_norm": 0.4635572135448456,
      "learning_rate": 7.701745484736069e-06,
      "loss": 0.0179,
      "step": 717580
    },
    {
      "epoch": 1.1743681388817973,
      "grad_norm": 0.5197659730911255,
      "learning_rate": 7.701679592522552e-06,
      "loss": 0.0247,
      "step": 717600
    },
    {
      "epoch": 1.1744008693204506,
      "grad_norm": 0.33346423506736755,
      "learning_rate": 7.701613700309036e-06,
      "loss": 0.0236,
      "step": 717620
    },
    {
      "epoch": 1.174433599759104,
      "grad_norm": 0.4497780501842499,
      "learning_rate": 7.701547808095518e-06,
      "loss": 0.0166,
      "step": 717640
    },
    {
      "epoch": 1.1744663301977574,
      "grad_norm": 0.4143434464931488,
      "learning_rate": 7.701481915882001e-06,
      "loss": 0.0233,
      "step": 717660
    },
    {
      "epoch": 1.1744990606364107,
      "grad_norm": 0.37934496998786926,
      "learning_rate": 7.701416023668483e-06,
      "loss": 0.0279,
      "step": 717680
    },
    {
      "epoch": 1.174531791075064,
      "grad_norm": 0.10142798721790314,
      "learning_rate": 7.701350131454967e-06,
      "loss": 0.022,
      "step": 717700
    },
    {
      "epoch": 1.1745645215137173,
      "grad_norm": 1.5376445055007935,
      "learning_rate": 7.70128423924145e-06,
      "loss": 0.0291,
      "step": 717720
    },
    {
      "epoch": 1.1745972519523706,
      "grad_norm": 0.46620920300483704,
      "learning_rate": 7.701218347027932e-06,
      "loss": 0.0234,
      "step": 717740
    },
    {
      "epoch": 1.174629982391024,
      "grad_norm": 1.4458431005477905,
      "learning_rate": 7.701152454814416e-06,
      "loss": 0.0252,
      "step": 717760
    },
    {
      "epoch": 1.1746627128296774,
      "grad_norm": 0.5586864948272705,
      "learning_rate": 7.701086562600898e-06,
      "loss": 0.0221,
      "step": 717780
    },
    {
      "epoch": 1.1746954432683308,
      "grad_norm": 1.1148004531860352,
      "learning_rate": 7.701020670387381e-06,
      "loss": 0.0202,
      "step": 717800
    },
    {
      "epoch": 1.174728173706984,
      "grad_norm": 0.24806754291057587,
      "learning_rate": 7.700954778173863e-06,
      "loss": 0.0253,
      "step": 717820
    },
    {
      "epoch": 1.1747609041456373,
      "grad_norm": 1.1951931715011597,
      "learning_rate": 7.700888885960347e-06,
      "loss": 0.0234,
      "step": 717840
    },
    {
      "epoch": 1.1747936345842906,
      "grad_norm": 0.1531127244234085,
      "learning_rate": 7.700822993746829e-06,
      "loss": 0.0267,
      "step": 717860
    },
    {
      "epoch": 1.174826365022944,
      "grad_norm": 0.4065397083759308,
      "learning_rate": 7.700757101533312e-06,
      "loss": 0.0167,
      "step": 717880
    },
    {
      "epoch": 1.1748590954615974,
      "grad_norm": 1.651802659034729,
      "learning_rate": 7.700691209319794e-06,
      "loss": 0.0242,
      "step": 717900
    },
    {
      "epoch": 1.1748918259002508,
      "grad_norm": 1.3196038007736206,
      "learning_rate": 7.700625317106278e-06,
      "loss": 0.0202,
      "step": 717920
    },
    {
      "epoch": 1.1749245563389041,
      "grad_norm": 1.1532180309295654,
      "learning_rate": 7.70055942489276e-06,
      "loss": 0.0227,
      "step": 717940
    },
    {
      "epoch": 1.1749572867775573,
      "grad_norm": 0.2716297209262848,
      "learning_rate": 7.700493532679243e-06,
      "loss": 0.0238,
      "step": 717960
    },
    {
      "epoch": 1.1749900172162107,
      "grad_norm": 1.7723852396011353,
      "learning_rate": 7.700427640465727e-06,
      "loss": 0.0208,
      "step": 717980
    },
    {
      "epoch": 1.175022747654864,
      "grad_norm": 0.5505149960517883,
      "learning_rate": 7.70036174825221e-06,
      "loss": 0.0154,
      "step": 718000
    },
    {
      "epoch": 1.1750554780935174,
      "grad_norm": 0.22953671216964722,
      "learning_rate": 7.700295856038692e-06,
      "loss": 0.0174,
      "step": 718020
    },
    {
      "epoch": 1.1750882085321708,
      "grad_norm": 0.2882114052772522,
      "learning_rate": 7.700229963825176e-06,
      "loss": 0.0252,
      "step": 718040
    },
    {
      "epoch": 1.1751209389708241,
      "grad_norm": 0.5990177392959595,
      "learning_rate": 7.70016407161166e-06,
      "loss": 0.0275,
      "step": 718060
    },
    {
      "epoch": 1.1751536694094775,
      "grad_norm": 2.0049211978912354,
      "learning_rate": 7.700098179398142e-06,
      "loss": 0.0238,
      "step": 718080
    },
    {
      "epoch": 1.1751863998481307,
      "grad_norm": 1.0729657411575317,
      "learning_rate": 7.700032287184625e-06,
      "loss": 0.027,
      "step": 718100
    },
    {
      "epoch": 1.175219130286784,
      "grad_norm": 1.1728967428207397,
      "learning_rate": 7.699966394971107e-06,
      "loss": 0.0255,
      "step": 718120
    },
    {
      "epoch": 1.1752518607254374,
      "grad_norm": 0.390323668718338,
      "learning_rate": 7.69990050275759e-06,
      "loss": 0.0311,
      "step": 718140
    },
    {
      "epoch": 1.1752845911640908,
      "grad_norm": 1.443287968635559,
      "learning_rate": 7.699834610544072e-06,
      "loss": 0.0254,
      "step": 718160
    },
    {
      "epoch": 1.1753173216027442,
      "grad_norm": 1.4865176677703857,
      "learning_rate": 7.699768718330556e-06,
      "loss": 0.0189,
      "step": 718180
    },
    {
      "epoch": 1.1753500520413975,
      "grad_norm": 0.5645602941513062,
      "learning_rate": 7.699702826117038e-06,
      "loss": 0.0313,
      "step": 718200
    },
    {
      "epoch": 1.175382782480051,
      "grad_norm": 1.242910385131836,
      "learning_rate": 7.699636933903522e-06,
      "loss": 0.0272,
      "step": 718220
    },
    {
      "epoch": 1.175415512918704,
      "grad_norm": 0.3973585069179535,
      "learning_rate": 7.699571041690003e-06,
      "loss": 0.0237,
      "step": 718240
    },
    {
      "epoch": 1.1754482433573574,
      "grad_norm": 1.3847501277923584,
      "learning_rate": 7.699505149476487e-06,
      "loss": 0.0392,
      "step": 718260
    },
    {
      "epoch": 1.1754809737960108,
      "grad_norm": 0.5267353653907776,
      "learning_rate": 7.699439257262969e-06,
      "loss": 0.0153,
      "step": 718280
    },
    {
      "epoch": 1.1755137042346642,
      "grad_norm": 1.4748106002807617,
      "learning_rate": 7.699373365049453e-06,
      "loss": 0.0278,
      "step": 718300
    },
    {
      "epoch": 1.1755464346733175,
      "grad_norm": 0.9559392929077148,
      "learning_rate": 7.699307472835934e-06,
      "loss": 0.0219,
      "step": 718320
    },
    {
      "epoch": 1.175579165111971,
      "grad_norm": 1.8120918273925781,
      "learning_rate": 7.699241580622418e-06,
      "loss": 0.0227,
      "step": 718340
    },
    {
      "epoch": 1.1756118955506243,
      "grad_norm": 0.16427166759967804,
      "learning_rate": 7.699175688408902e-06,
      "loss": 0.0209,
      "step": 718360
    },
    {
      "epoch": 1.1756446259892774,
      "grad_norm": 0.46138060092926025,
      "learning_rate": 7.699109796195383e-06,
      "loss": 0.0146,
      "step": 718380
    },
    {
      "epoch": 1.1756773564279308,
      "grad_norm": 0.4571914076805115,
      "learning_rate": 7.699043903981867e-06,
      "loss": 0.0246,
      "step": 718400
    },
    {
      "epoch": 1.1757100868665842,
      "grad_norm": 0.48766329884529114,
      "learning_rate": 7.69897801176835e-06,
      "loss": 0.0251,
      "step": 718420
    },
    {
      "epoch": 1.1757428173052376,
      "grad_norm": 0.3579985201358795,
      "learning_rate": 7.698912119554833e-06,
      "loss": 0.0184,
      "step": 718440
    },
    {
      "epoch": 1.175775547743891,
      "grad_norm": 1.6689252853393555,
      "learning_rate": 7.698846227341316e-06,
      "loss": 0.0222,
      "step": 718460
    },
    {
      "epoch": 1.1758082781825443,
      "grad_norm": 1.2880195379257202,
      "learning_rate": 7.6987803351278e-06,
      "loss": 0.0306,
      "step": 718480
    },
    {
      "epoch": 1.1758410086211974,
      "grad_norm": 0.43366652727127075,
      "learning_rate": 7.698714442914282e-06,
      "loss": 0.0327,
      "step": 718500
    },
    {
      "epoch": 1.1758737390598508,
      "grad_norm": 0.31652456521987915,
      "learning_rate": 7.698648550700765e-06,
      "loss": 0.0359,
      "step": 718520
    },
    {
      "epoch": 1.1759064694985042,
      "grad_norm": 0.9427721500396729,
      "learning_rate": 7.698582658487247e-06,
      "loss": 0.024,
      "step": 718540
    },
    {
      "epoch": 1.1759391999371576,
      "grad_norm": 0.37095901370048523,
      "learning_rate": 7.69851676627373e-06,
      "loss": 0.0208,
      "step": 718560
    },
    {
      "epoch": 1.175971930375811,
      "grad_norm": 0.4556845426559448,
      "learning_rate": 7.698450874060213e-06,
      "loss": 0.0198,
      "step": 718580
    },
    {
      "epoch": 1.1760046608144643,
      "grad_norm": 0.2933291494846344,
      "learning_rate": 7.698384981846696e-06,
      "loss": 0.0208,
      "step": 718600
    },
    {
      "epoch": 1.1760373912531175,
      "grad_norm": 0.47659730911254883,
      "learning_rate": 7.698319089633178e-06,
      "loss": 0.0191,
      "step": 718620
    },
    {
      "epoch": 1.1760701216917708,
      "grad_norm": 0.7833274602890015,
      "learning_rate": 7.698253197419662e-06,
      "loss": 0.0324,
      "step": 718640
    },
    {
      "epoch": 1.1761028521304242,
      "grad_norm": 0.7935640811920166,
      "learning_rate": 7.698187305206144e-06,
      "loss": 0.0227,
      "step": 718660
    },
    {
      "epoch": 1.1761355825690776,
      "grad_norm": 0.5737209320068359,
      "learning_rate": 7.698121412992627e-06,
      "loss": 0.026,
      "step": 718680
    },
    {
      "epoch": 1.176168313007731,
      "grad_norm": 0.24455378949642181,
      "learning_rate": 7.698055520779109e-06,
      "loss": 0.0207,
      "step": 718700
    },
    {
      "epoch": 1.1762010434463843,
      "grad_norm": 1.4815913438796997,
      "learning_rate": 7.697989628565593e-06,
      "loss": 0.0196,
      "step": 718720
    },
    {
      "epoch": 1.1762337738850377,
      "grad_norm": 2.6349449157714844,
      "learning_rate": 7.697923736352076e-06,
      "loss": 0.0199,
      "step": 718740
    },
    {
      "epoch": 1.1762665043236908,
      "grad_norm": 1.4808154106140137,
      "learning_rate": 7.697857844138558e-06,
      "loss": 0.0245,
      "step": 718760
    },
    {
      "epoch": 1.1762992347623442,
      "grad_norm": 1.9646869897842407,
      "learning_rate": 7.697791951925042e-06,
      "loss": 0.0266,
      "step": 718780
    },
    {
      "epoch": 1.1763319652009976,
      "grad_norm": 0.9873366951942444,
      "learning_rate": 7.697726059711525e-06,
      "loss": 0.0208,
      "step": 718800
    },
    {
      "epoch": 1.176364695639651,
      "grad_norm": 0.2884761691093445,
      "learning_rate": 7.697660167498007e-06,
      "loss": 0.0255,
      "step": 718820
    },
    {
      "epoch": 1.1763974260783043,
      "grad_norm": 0.3933120667934418,
      "learning_rate": 7.69759427528449e-06,
      "loss": 0.0231,
      "step": 718840
    },
    {
      "epoch": 1.1764301565169577,
      "grad_norm": 0.9717994928359985,
      "learning_rate": 7.697528383070974e-06,
      "loss": 0.0207,
      "step": 718860
    },
    {
      "epoch": 1.176462886955611,
      "grad_norm": 1.000736117362976,
      "learning_rate": 7.697462490857456e-06,
      "loss": 0.0279,
      "step": 718880
    },
    {
      "epoch": 1.1764956173942642,
      "grad_norm": 0.42333564162254333,
      "learning_rate": 7.69739659864394e-06,
      "loss": 0.0153,
      "step": 718900
    },
    {
      "epoch": 1.1765283478329176,
      "grad_norm": 1.0634225606918335,
      "learning_rate": 7.697330706430422e-06,
      "loss": 0.0252,
      "step": 718920
    },
    {
      "epoch": 1.176561078271571,
      "grad_norm": 2.231879472732544,
      "learning_rate": 7.697264814216905e-06,
      "loss": 0.0225,
      "step": 718940
    },
    {
      "epoch": 1.1765938087102243,
      "grad_norm": 0.5985754728317261,
      "learning_rate": 7.697198922003387e-06,
      "loss": 0.0262,
      "step": 718960
    },
    {
      "epoch": 1.1766265391488777,
      "grad_norm": 0.08687381446361542,
      "learning_rate": 7.69713302978987e-06,
      "loss": 0.0197,
      "step": 718980
    },
    {
      "epoch": 1.176659269587531,
      "grad_norm": 0.4150569438934326,
      "learning_rate": 7.697067137576353e-06,
      "loss": 0.0168,
      "step": 719000
    },
    {
      "epoch": 1.1766920000261845,
      "grad_norm": 1.180232286453247,
      "learning_rate": 7.697001245362836e-06,
      "loss": 0.0259,
      "step": 719020
    },
    {
      "epoch": 1.1767247304648376,
      "grad_norm": 1.0127016305923462,
      "learning_rate": 7.696935353149318e-06,
      "loss": 0.0254,
      "step": 719040
    },
    {
      "epoch": 1.176757460903491,
      "grad_norm": 2.1485352516174316,
      "learning_rate": 7.696869460935802e-06,
      "loss": 0.0198,
      "step": 719060
    },
    {
      "epoch": 1.1767901913421444,
      "grad_norm": 0.2695479989051819,
      "learning_rate": 7.696803568722284e-06,
      "loss": 0.0204,
      "step": 719080
    },
    {
      "epoch": 1.1768229217807977,
      "grad_norm": 0.14408302307128906,
      "learning_rate": 7.696737676508767e-06,
      "loss": 0.0318,
      "step": 719100
    },
    {
      "epoch": 1.176855652219451,
      "grad_norm": 1.6075186729431152,
      "learning_rate": 7.69667178429525e-06,
      "loss": 0.0271,
      "step": 719120
    },
    {
      "epoch": 1.1768883826581045,
      "grad_norm": 1.3153183460235596,
      "learning_rate": 7.696605892081733e-06,
      "loss": 0.0284,
      "step": 719140
    },
    {
      "epoch": 1.1769211130967578,
      "grad_norm": 0.1762131303548813,
      "learning_rate": 7.696539999868216e-06,
      "loss": 0.0229,
      "step": 719160
    },
    {
      "epoch": 1.176953843535411,
      "grad_norm": 0.6628540754318237,
      "learning_rate": 7.696474107654698e-06,
      "loss": 0.0192,
      "step": 719180
    },
    {
      "epoch": 1.1769865739740644,
      "grad_norm": 1.1714613437652588,
      "learning_rate": 7.696408215441182e-06,
      "loss": 0.0186,
      "step": 719200
    },
    {
      "epoch": 1.1770193044127177,
      "grad_norm": 0.5259607434272766,
      "learning_rate": 7.696342323227665e-06,
      "loss": 0.0203,
      "step": 719220
    },
    {
      "epoch": 1.177052034851371,
      "grad_norm": 1.5378260612487793,
      "learning_rate": 7.696276431014147e-06,
      "loss": 0.0286,
      "step": 719240
    },
    {
      "epoch": 1.1770847652900245,
      "grad_norm": 1.0475744009017944,
      "learning_rate": 7.69621053880063e-06,
      "loss": 0.0196,
      "step": 719260
    },
    {
      "epoch": 1.1771174957286776,
      "grad_norm": 0.1716947853565216,
      "learning_rate": 7.696144646587114e-06,
      "loss": 0.0231,
      "step": 719280
    },
    {
      "epoch": 1.177150226167331,
      "grad_norm": 0.6938701272010803,
      "learning_rate": 7.696078754373596e-06,
      "loss": 0.031,
      "step": 719300
    },
    {
      "epoch": 1.1771829566059844,
      "grad_norm": 0.520257830619812,
      "learning_rate": 7.69601286216008e-06,
      "loss": 0.0217,
      "step": 719320
    },
    {
      "epoch": 1.1772156870446377,
      "grad_norm": 0.8554405570030212,
      "learning_rate": 7.695946969946562e-06,
      "loss": 0.0292,
      "step": 719340
    },
    {
      "epoch": 1.1772484174832911,
      "grad_norm": 0.12038008123636246,
      "learning_rate": 7.695881077733045e-06,
      "loss": 0.0196,
      "step": 719360
    },
    {
      "epoch": 1.1772811479219445,
      "grad_norm": 0.3350849449634552,
      "learning_rate": 7.695815185519527e-06,
      "loss": 0.0176,
      "step": 719380
    },
    {
      "epoch": 1.1773138783605979,
      "grad_norm": 0.5548071265220642,
      "learning_rate": 7.69574929330601e-06,
      "loss": 0.0196,
      "step": 719400
    },
    {
      "epoch": 1.177346608799251,
      "grad_norm": 0.7767579555511475,
      "learning_rate": 7.695683401092493e-06,
      "loss": 0.0195,
      "step": 719420
    },
    {
      "epoch": 1.1773793392379044,
      "grad_norm": 0.4599839448928833,
      "learning_rate": 7.695617508878976e-06,
      "loss": 0.0255,
      "step": 719440
    },
    {
      "epoch": 1.1774120696765578,
      "grad_norm": 0.28484615683555603,
      "learning_rate": 7.69555161666546e-06,
      "loss": 0.0237,
      "step": 719460
    },
    {
      "epoch": 1.1774448001152111,
      "grad_norm": 0.5308572053909302,
      "learning_rate": 7.695485724451942e-06,
      "loss": 0.0225,
      "step": 719480
    },
    {
      "epoch": 1.1774775305538645,
      "grad_norm": 0.912179172039032,
      "learning_rate": 7.695419832238425e-06,
      "loss": 0.0209,
      "step": 719500
    },
    {
      "epoch": 1.1775102609925179,
      "grad_norm": 1.0917963981628418,
      "learning_rate": 7.695353940024907e-06,
      "loss": 0.0167,
      "step": 719520
    },
    {
      "epoch": 1.1775429914311712,
      "grad_norm": 0.09889008104801178,
      "learning_rate": 7.69528804781139e-06,
      "loss": 0.0226,
      "step": 719540
    },
    {
      "epoch": 1.1775757218698244,
      "grad_norm": 0.83761066198349,
      "learning_rate": 7.695222155597873e-06,
      "loss": 0.0291,
      "step": 719560
    },
    {
      "epoch": 1.1776084523084778,
      "grad_norm": 1.0915825366973877,
      "learning_rate": 7.695156263384356e-06,
      "loss": 0.0212,
      "step": 719580
    },
    {
      "epoch": 1.1776411827471311,
      "grad_norm": 0.31930699944496155,
      "learning_rate": 7.69509037117084e-06,
      "loss": 0.0282,
      "step": 719600
    },
    {
      "epoch": 1.1776739131857845,
      "grad_norm": 0.5109634399414062,
      "learning_rate": 7.695024478957322e-06,
      "loss": 0.0198,
      "step": 719620
    },
    {
      "epoch": 1.1777066436244379,
      "grad_norm": 0.2014976143836975,
      "learning_rate": 7.694958586743805e-06,
      "loss": 0.0193,
      "step": 719640
    },
    {
      "epoch": 1.1777393740630913,
      "grad_norm": 1.6826592683792114,
      "learning_rate": 7.694892694530289e-06,
      "loss": 0.0277,
      "step": 719660
    },
    {
      "epoch": 1.1777721045017446,
      "grad_norm": 1.0399363040924072,
      "learning_rate": 7.694826802316771e-06,
      "loss": 0.0196,
      "step": 719680
    },
    {
      "epoch": 1.1778048349403978,
      "grad_norm": 1.8252527713775635,
      "learning_rate": 7.694760910103254e-06,
      "loss": 0.0178,
      "step": 719700
    },
    {
      "epoch": 1.1778375653790512,
      "grad_norm": 1.434524416923523,
      "learning_rate": 7.694695017889736e-06,
      "loss": 0.0225,
      "step": 719720
    },
    {
      "epoch": 1.1778702958177045,
      "grad_norm": 0.8220949172973633,
      "learning_rate": 7.69462912567622e-06,
      "loss": 0.0258,
      "step": 719740
    },
    {
      "epoch": 1.177903026256358,
      "grad_norm": 0.4548911154270172,
      "learning_rate": 7.694563233462702e-06,
      "loss": 0.0205,
      "step": 719760
    },
    {
      "epoch": 1.1779357566950113,
      "grad_norm": 0.4857795834541321,
      "learning_rate": 7.694497341249185e-06,
      "loss": 0.0381,
      "step": 719780
    },
    {
      "epoch": 1.1779684871336646,
      "grad_norm": 0.56026291847229,
      "learning_rate": 7.694431449035669e-06,
      "loss": 0.0169,
      "step": 719800
    },
    {
      "epoch": 1.178001217572318,
      "grad_norm": 1.074569821357727,
      "learning_rate": 7.694365556822151e-06,
      "loss": 0.0222,
      "step": 719820
    },
    {
      "epoch": 1.1780339480109712,
      "grad_norm": 0.17590856552124023,
      "learning_rate": 7.694299664608634e-06,
      "loss": 0.0173,
      "step": 719840
    },
    {
      "epoch": 1.1780666784496245,
      "grad_norm": 0.9478009343147278,
      "learning_rate": 7.694233772395116e-06,
      "loss": 0.0216,
      "step": 719860
    },
    {
      "epoch": 1.178099408888278,
      "grad_norm": 0.9194664359092712,
      "learning_rate": 7.6941678801816e-06,
      "loss": 0.0229,
      "step": 719880
    },
    {
      "epoch": 1.1781321393269313,
      "grad_norm": 0.9424769282341003,
      "learning_rate": 7.694101987968082e-06,
      "loss": 0.0261,
      "step": 719900
    },
    {
      "epoch": 1.1781648697655847,
      "grad_norm": 0.6194592118263245,
      "learning_rate": 7.694036095754565e-06,
      "loss": 0.0216,
      "step": 719920
    },
    {
      "epoch": 1.178197600204238,
      "grad_norm": 0.35949984192848206,
      "learning_rate": 7.693970203541047e-06,
      "loss": 0.0277,
      "step": 719940
    },
    {
      "epoch": 1.1782303306428914,
      "grad_norm": 1.1676785945892334,
      "learning_rate": 7.693904311327531e-06,
      "loss": 0.0203,
      "step": 719960
    },
    {
      "epoch": 1.1782630610815445,
      "grad_norm": 2.07817006111145,
      "learning_rate": 7.693838419114013e-06,
      "loss": 0.0207,
      "step": 719980
    },
    {
      "epoch": 1.178295791520198,
      "grad_norm": 0.5899547338485718,
      "learning_rate": 7.693772526900496e-06,
      "loss": 0.029,
      "step": 720000
    },
    {
      "epoch": 1.1783285219588513,
      "grad_norm": 0.36070120334625244,
      "learning_rate": 7.69370663468698e-06,
      "loss": 0.0236,
      "step": 720020
    },
    {
      "epoch": 1.1783612523975047,
      "grad_norm": 0.9101104140281677,
      "learning_rate": 7.693640742473462e-06,
      "loss": 0.0295,
      "step": 720040
    },
    {
      "epoch": 1.178393982836158,
      "grad_norm": 0.41516703367233276,
      "learning_rate": 7.693574850259945e-06,
      "loss": 0.0279,
      "step": 720060
    },
    {
      "epoch": 1.1784267132748112,
      "grad_norm": 1.001311182975769,
      "learning_rate": 7.693508958046429e-06,
      "loss": 0.0252,
      "step": 720080
    },
    {
      "epoch": 1.1784594437134646,
      "grad_norm": 1.0651015043258667,
      "learning_rate": 7.693443065832911e-06,
      "loss": 0.0374,
      "step": 720100
    },
    {
      "epoch": 1.178492174152118,
      "grad_norm": 0.8418042659759521,
      "learning_rate": 7.693377173619395e-06,
      "loss": 0.0208,
      "step": 720120
    },
    {
      "epoch": 1.1785249045907713,
      "grad_norm": 1.1205693483352661,
      "learning_rate": 7.693311281405876e-06,
      "loss": 0.0178,
      "step": 720140
    },
    {
      "epoch": 1.1785576350294247,
      "grad_norm": 0.4373806416988373,
      "learning_rate": 7.69324538919236e-06,
      "loss": 0.0259,
      "step": 720160
    },
    {
      "epoch": 1.178590365468078,
      "grad_norm": 0.8684219717979431,
      "learning_rate": 7.693179496978844e-06,
      "loss": 0.0161,
      "step": 720180
    },
    {
      "epoch": 1.1786230959067314,
      "grad_norm": 0.9367899894714355,
      "learning_rate": 7.693113604765325e-06,
      "loss": 0.0267,
      "step": 720200
    },
    {
      "epoch": 1.1786558263453846,
      "grad_norm": 0.9103214144706726,
      "learning_rate": 7.693047712551809e-06,
      "loss": 0.0175,
      "step": 720220
    },
    {
      "epoch": 1.178688556784038,
      "grad_norm": 0.5651502013206482,
      "learning_rate": 7.692981820338291e-06,
      "loss": 0.0281,
      "step": 720240
    },
    {
      "epoch": 1.1787212872226913,
      "grad_norm": 0.7557440996170044,
      "learning_rate": 7.692915928124775e-06,
      "loss": 0.0244,
      "step": 720260
    },
    {
      "epoch": 1.1787540176613447,
      "grad_norm": 0.9306458234786987,
      "learning_rate": 7.692850035911256e-06,
      "loss": 0.0284,
      "step": 720280
    },
    {
      "epoch": 1.178786748099998,
      "grad_norm": 1.3395591974258423,
      "learning_rate": 7.69278414369774e-06,
      "loss": 0.0227,
      "step": 720300
    },
    {
      "epoch": 1.1788194785386514,
      "grad_norm": 0.8096777200698853,
      "learning_rate": 7.692718251484222e-06,
      "loss": 0.0201,
      "step": 720320
    },
    {
      "epoch": 1.1788522089773048,
      "grad_norm": 0.6455390453338623,
      "learning_rate": 7.692652359270706e-06,
      "loss": 0.0201,
      "step": 720340
    },
    {
      "epoch": 1.178884939415958,
      "grad_norm": 0.26652827858924866,
      "learning_rate": 7.692586467057187e-06,
      "loss": 0.0255,
      "step": 720360
    },
    {
      "epoch": 1.1789176698546113,
      "grad_norm": 1.656373381614685,
      "learning_rate": 7.692520574843671e-06,
      "loss": 0.0281,
      "step": 720380
    },
    {
      "epoch": 1.1789504002932647,
      "grad_norm": 0.8256241679191589,
      "learning_rate": 7.692454682630155e-06,
      "loss": 0.0236,
      "step": 720400
    },
    {
      "epoch": 1.178983130731918,
      "grad_norm": 1.4022290706634521,
      "learning_rate": 7.692388790416636e-06,
      "loss": 0.0211,
      "step": 720420
    },
    {
      "epoch": 1.1790158611705714,
      "grad_norm": 0.6760566830635071,
      "learning_rate": 7.69232289820312e-06,
      "loss": 0.0211,
      "step": 720440
    },
    {
      "epoch": 1.1790485916092248,
      "grad_norm": 1.6000982522964478,
      "learning_rate": 7.692257005989604e-06,
      "loss": 0.0266,
      "step": 720460
    },
    {
      "epoch": 1.1790813220478782,
      "grad_norm": 1.8169946670532227,
      "learning_rate": 7.692191113776086e-06,
      "loss": 0.0164,
      "step": 720480
    },
    {
      "epoch": 1.1791140524865313,
      "grad_norm": 1.1532739400863647,
      "learning_rate": 7.692125221562569e-06,
      "loss": 0.0168,
      "step": 720500
    },
    {
      "epoch": 1.1791467829251847,
      "grad_norm": 0.14573626220226288,
      "learning_rate": 7.692059329349053e-06,
      "loss": 0.0217,
      "step": 720520
    },
    {
      "epoch": 1.179179513363838,
      "grad_norm": 0.46056312322616577,
      "learning_rate": 7.691993437135535e-06,
      "loss": 0.0244,
      "step": 720540
    },
    {
      "epoch": 1.1792122438024915,
      "grad_norm": 0.6215689182281494,
      "learning_rate": 7.691927544922018e-06,
      "loss": 0.0209,
      "step": 720560
    },
    {
      "epoch": 1.1792449742411448,
      "grad_norm": 0.5463998913764954,
      "learning_rate": 7.6918616527085e-06,
      "loss": 0.016,
      "step": 720580
    },
    {
      "epoch": 1.1792777046797982,
      "grad_norm": 1.3660786151885986,
      "learning_rate": 7.691795760494984e-06,
      "loss": 0.0221,
      "step": 720600
    },
    {
      "epoch": 1.1793104351184516,
      "grad_norm": 0.7297171950340271,
      "learning_rate": 7.691729868281466e-06,
      "loss": 0.027,
      "step": 720620
    },
    {
      "epoch": 1.1793431655571047,
      "grad_norm": 1.0483860969543457,
      "learning_rate": 7.69166397606795e-06,
      "loss": 0.0216,
      "step": 720640
    },
    {
      "epoch": 1.179375895995758,
      "grad_norm": 0.34302255511283875,
      "learning_rate": 7.691598083854431e-06,
      "loss": 0.0274,
      "step": 720660
    },
    {
      "epoch": 1.1794086264344115,
      "grad_norm": 0.5135213136672974,
      "learning_rate": 7.691532191640915e-06,
      "loss": 0.0242,
      "step": 720680
    },
    {
      "epoch": 1.1794413568730648,
      "grad_norm": 0.7442319989204407,
      "learning_rate": 7.691466299427397e-06,
      "loss": 0.0241,
      "step": 720700
    },
    {
      "epoch": 1.1794740873117182,
      "grad_norm": 0.45409348607063293,
      "learning_rate": 7.69140040721388e-06,
      "loss": 0.0259,
      "step": 720720
    },
    {
      "epoch": 1.1795068177503716,
      "grad_norm": 1.4975354671478271,
      "learning_rate": 7.691334515000362e-06,
      "loss": 0.0266,
      "step": 720740
    },
    {
      "epoch": 1.1795395481890247,
      "grad_norm": 1.0838401317596436,
      "learning_rate": 7.691268622786846e-06,
      "loss": 0.0307,
      "step": 720760
    },
    {
      "epoch": 1.179572278627678,
      "grad_norm": 0.2990490198135376,
      "learning_rate": 7.69120273057333e-06,
      "loss": 0.027,
      "step": 720780
    },
    {
      "epoch": 1.1796050090663315,
      "grad_norm": 0.16480067372322083,
      "learning_rate": 7.691136838359811e-06,
      "loss": 0.0186,
      "step": 720800
    },
    {
      "epoch": 1.1796377395049849,
      "grad_norm": 0.6399205327033997,
      "learning_rate": 7.691070946146295e-06,
      "loss": 0.0285,
      "step": 720820
    },
    {
      "epoch": 1.1796704699436382,
      "grad_norm": 0.43773844838142395,
      "learning_rate": 7.691005053932778e-06,
      "loss": 0.0235,
      "step": 720840
    },
    {
      "epoch": 1.1797032003822916,
      "grad_norm": 0.410714715719223,
      "learning_rate": 7.69093916171926e-06,
      "loss": 0.0256,
      "step": 720860
    },
    {
      "epoch": 1.1797359308209447,
      "grad_norm": 1.2633864879608154,
      "learning_rate": 7.690873269505744e-06,
      "loss": 0.0304,
      "step": 720880
    },
    {
      "epoch": 1.1797686612595981,
      "grad_norm": 0.9606873393058777,
      "learning_rate": 7.690807377292227e-06,
      "loss": 0.0246,
      "step": 720900
    },
    {
      "epoch": 1.1798013916982515,
      "grad_norm": 0.315472275018692,
      "learning_rate": 7.69074148507871e-06,
      "loss": 0.017,
      "step": 720920
    },
    {
      "epoch": 1.1798341221369049,
      "grad_norm": 0.7315228581428528,
      "learning_rate": 7.690675592865193e-06,
      "loss": 0.0263,
      "step": 720940
    },
    {
      "epoch": 1.1798668525755582,
      "grad_norm": 0.18119487166404724,
      "learning_rate": 7.690609700651675e-06,
      "loss": 0.0317,
      "step": 720960
    },
    {
      "epoch": 1.1798995830142116,
      "grad_norm": 0.3174954950809479,
      "learning_rate": 7.690543808438158e-06,
      "loss": 0.0371,
      "step": 720980
    },
    {
      "epoch": 1.179932313452865,
      "grad_norm": 0.23175394535064697,
      "learning_rate": 7.69047791622464e-06,
      "loss": 0.0231,
      "step": 721000
    },
    {
      "epoch": 1.1799650438915181,
      "grad_norm": 2.2340221405029297,
      "learning_rate": 7.690412024011124e-06,
      "loss": 0.0245,
      "step": 721020
    },
    {
      "epoch": 1.1799977743301715,
      "grad_norm": 0.48511984944343567,
      "learning_rate": 7.690346131797606e-06,
      "loss": 0.0274,
      "step": 721040
    },
    {
      "epoch": 1.1800305047688249,
      "grad_norm": 0.6993802189826965,
      "learning_rate": 7.69028023958409e-06,
      "loss": 0.0263,
      "step": 721060
    },
    {
      "epoch": 1.1800632352074782,
      "grad_norm": 1.9133435487747192,
      "learning_rate": 7.690214347370571e-06,
      "loss": 0.0306,
      "step": 721080
    },
    {
      "epoch": 1.1800959656461316,
      "grad_norm": 0.9790943264961243,
      "learning_rate": 7.690148455157055e-06,
      "loss": 0.0247,
      "step": 721100
    },
    {
      "epoch": 1.180128696084785,
      "grad_norm": 0.457341730594635,
      "learning_rate": 7.690082562943537e-06,
      "loss": 0.0178,
      "step": 721120
    },
    {
      "epoch": 1.1801614265234384,
      "grad_norm": 0.8469269275665283,
      "learning_rate": 7.69001667073002e-06,
      "loss": 0.0222,
      "step": 721140
    },
    {
      "epoch": 1.1801941569620915,
      "grad_norm": 0.7284954786300659,
      "learning_rate": 7.689950778516502e-06,
      "loss": 0.0247,
      "step": 721160
    },
    {
      "epoch": 1.1802268874007449,
      "grad_norm": 0.6310842037200928,
      "learning_rate": 7.689884886302986e-06,
      "loss": 0.0232,
      "step": 721180
    },
    {
      "epoch": 1.1802596178393983,
      "grad_norm": 1.1089613437652588,
      "learning_rate": 7.68981899408947e-06,
      "loss": 0.0242,
      "step": 721200
    },
    {
      "epoch": 1.1802923482780516,
      "grad_norm": 0.7969473004341125,
      "learning_rate": 7.689753101875951e-06,
      "loss": 0.0194,
      "step": 721220
    },
    {
      "epoch": 1.180325078716705,
      "grad_norm": 2.1865341663360596,
      "learning_rate": 7.689687209662435e-06,
      "loss": 0.0248,
      "step": 721240
    },
    {
      "epoch": 1.1803578091553584,
      "grad_norm": 0.11495237052440643,
      "learning_rate": 7.689621317448918e-06,
      "loss": 0.0184,
      "step": 721260
    },
    {
      "epoch": 1.1803905395940117,
      "grad_norm": 0.282490074634552,
      "learning_rate": 7.6895554252354e-06,
      "loss": 0.0268,
      "step": 721280
    },
    {
      "epoch": 1.180423270032665,
      "grad_norm": 1.4163920879364014,
      "learning_rate": 7.689489533021884e-06,
      "loss": 0.0119,
      "step": 721300
    },
    {
      "epoch": 1.1804560004713183,
      "grad_norm": 0.11107970774173737,
      "learning_rate": 7.689423640808367e-06,
      "loss": 0.0221,
      "step": 721320
    },
    {
      "epoch": 1.1804887309099716,
      "grad_norm": 0.47716742753982544,
      "learning_rate": 7.68935774859485e-06,
      "loss": 0.0229,
      "step": 721340
    },
    {
      "epoch": 1.180521461348625,
      "grad_norm": 0.4742110073566437,
      "learning_rate": 7.689291856381333e-06,
      "loss": 0.0236,
      "step": 721360
    },
    {
      "epoch": 1.1805541917872784,
      "grad_norm": 0.898738443851471,
      "learning_rate": 7.689225964167815e-06,
      "loss": 0.0244,
      "step": 721380
    },
    {
      "epoch": 1.1805869222259318,
      "grad_norm": 0.8558034896850586,
      "learning_rate": 7.689160071954298e-06,
      "loss": 0.0234,
      "step": 721400
    },
    {
      "epoch": 1.1806196526645851,
      "grad_norm": 0.7659757733345032,
      "learning_rate": 7.68909417974078e-06,
      "loss": 0.0284,
      "step": 721420
    },
    {
      "epoch": 1.1806523831032383,
      "grad_norm": 0.3422531485557556,
      "learning_rate": 7.689028287527264e-06,
      "loss": 0.0271,
      "step": 721440
    },
    {
      "epoch": 1.1806851135418917,
      "grad_norm": 0.7575191259384155,
      "learning_rate": 7.688962395313746e-06,
      "loss": 0.0292,
      "step": 721460
    },
    {
      "epoch": 1.180717843980545,
      "grad_norm": 0.7039276361465454,
      "learning_rate": 7.68889650310023e-06,
      "loss": 0.027,
      "step": 721480
    },
    {
      "epoch": 1.1807505744191984,
      "grad_norm": 0.9530307650566101,
      "learning_rate": 7.688830610886711e-06,
      "loss": 0.0283,
      "step": 721500
    },
    {
      "epoch": 1.1807833048578518,
      "grad_norm": 1.5127187967300415,
      "learning_rate": 7.688764718673195e-06,
      "loss": 0.0358,
      "step": 721520
    },
    {
      "epoch": 1.1808160352965051,
      "grad_norm": 1.1470881700515747,
      "learning_rate": 7.688698826459677e-06,
      "loss": 0.0254,
      "step": 721540
    },
    {
      "epoch": 1.1808487657351583,
      "grad_norm": 1.1079967021942139,
      "learning_rate": 7.68863293424616e-06,
      "loss": 0.021,
      "step": 721560
    },
    {
      "epoch": 1.1808814961738117,
      "grad_norm": 0.24641206860542297,
      "learning_rate": 7.688567042032644e-06,
      "loss": 0.0222,
      "step": 721580
    },
    {
      "epoch": 1.180914226612465,
      "grad_norm": 0.24579612910747528,
      "learning_rate": 7.688501149819126e-06,
      "loss": 0.0295,
      "step": 721600
    },
    {
      "epoch": 1.1809469570511184,
      "grad_norm": 1.2482545375823975,
      "learning_rate": 7.68843525760561e-06,
      "loss": 0.0201,
      "step": 721620
    },
    {
      "epoch": 1.1809796874897718,
      "grad_norm": 0.6101527214050293,
      "learning_rate": 7.688369365392093e-06,
      "loss": 0.0237,
      "step": 721640
    },
    {
      "epoch": 1.1810124179284252,
      "grad_norm": 0.8064874410629272,
      "learning_rate": 7.688303473178575e-06,
      "loss": 0.0245,
      "step": 721660
    },
    {
      "epoch": 1.1810451483670783,
      "grad_norm": 0.378703773021698,
      "learning_rate": 7.688237580965058e-06,
      "loss": 0.0145,
      "step": 721680
    },
    {
      "epoch": 1.1810778788057317,
      "grad_norm": 1.1848679780960083,
      "learning_rate": 7.688171688751542e-06,
      "loss": 0.0213,
      "step": 721700
    },
    {
      "epoch": 1.181110609244385,
      "grad_norm": 3.6705269813537598,
      "learning_rate": 7.688105796538024e-06,
      "loss": 0.0244,
      "step": 721720
    },
    {
      "epoch": 1.1811433396830384,
      "grad_norm": 1.7713689804077148,
      "learning_rate": 7.688039904324507e-06,
      "loss": 0.0227,
      "step": 721740
    },
    {
      "epoch": 1.1811760701216918,
      "grad_norm": 0.9529922008514404,
      "learning_rate": 7.68797401211099e-06,
      "loss": 0.0233,
      "step": 721760
    },
    {
      "epoch": 1.1812088005603452,
      "grad_norm": 0.33094605803489685,
      "learning_rate": 7.687908119897473e-06,
      "loss": 0.0235,
      "step": 721780
    },
    {
      "epoch": 1.1812415309989985,
      "grad_norm": 0.3583596348762512,
      "learning_rate": 7.687842227683955e-06,
      "loss": 0.016,
      "step": 721800
    },
    {
      "epoch": 1.1812742614376517,
      "grad_norm": 1.6139994859695435,
      "learning_rate": 7.687776335470438e-06,
      "loss": 0.0197,
      "step": 721820
    },
    {
      "epoch": 1.181306991876305,
      "grad_norm": 1.3753185272216797,
      "learning_rate": 7.68771044325692e-06,
      "loss": 0.0233,
      "step": 721840
    },
    {
      "epoch": 1.1813397223149584,
      "grad_norm": 0.1283891350030899,
      "learning_rate": 7.687644551043404e-06,
      "loss": 0.0279,
      "step": 721860
    },
    {
      "epoch": 1.1813724527536118,
      "grad_norm": 1.3740583658218384,
      "learning_rate": 7.687578658829886e-06,
      "loss": 0.0219,
      "step": 721880
    },
    {
      "epoch": 1.1814051831922652,
      "grad_norm": 0.9694105982780457,
      "learning_rate": 7.68751276661637e-06,
      "loss": 0.0265,
      "step": 721900
    },
    {
      "epoch": 1.1814379136309185,
      "grad_norm": 0.3389929533004761,
      "learning_rate": 7.687446874402853e-06,
      "loss": 0.0165,
      "step": 721920
    },
    {
      "epoch": 1.181470644069572,
      "grad_norm": 0.4737027883529663,
      "learning_rate": 7.687380982189335e-06,
      "loss": 0.0183,
      "step": 721940
    },
    {
      "epoch": 1.181503374508225,
      "grad_norm": 1.2797106504440308,
      "learning_rate": 7.687315089975818e-06,
      "loss": 0.0315,
      "step": 721960
    },
    {
      "epoch": 1.1815361049468784,
      "grad_norm": 0.9220454096794128,
      "learning_rate": 7.6872491977623e-06,
      "loss": 0.0252,
      "step": 721980
    },
    {
      "epoch": 1.1815688353855318,
      "grad_norm": 0.287463903427124,
      "learning_rate": 7.687183305548784e-06,
      "loss": 0.0226,
      "step": 722000
    },
    {
      "epoch": 1.1816015658241852,
      "grad_norm": 0.12407758831977844,
      "learning_rate": 7.687117413335266e-06,
      "loss": 0.0226,
      "step": 722020
    },
    {
      "epoch": 1.1816342962628386,
      "grad_norm": 0.8401311635971069,
      "learning_rate": 7.68705152112175e-06,
      "loss": 0.0264,
      "step": 722040
    },
    {
      "epoch": 1.181667026701492,
      "grad_norm": 1.0705976486206055,
      "learning_rate": 7.686985628908233e-06,
      "loss": 0.024,
      "step": 722060
    },
    {
      "epoch": 1.1816997571401453,
      "grad_norm": 0.6135551929473877,
      "learning_rate": 7.686919736694715e-06,
      "loss": 0.0239,
      "step": 722080
    },
    {
      "epoch": 1.1817324875787985,
      "grad_norm": 0.5602878928184509,
      "learning_rate": 7.686853844481198e-06,
      "loss": 0.027,
      "step": 722100
    },
    {
      "epoch": 1.1817652180174518,
      "grad_norm": 0.8125150203704834,
      "learning_rate": 7.686787952267682e-06,
      "loss": 0.0212,
      "step": 722120
    },
    {
      "epoch": 1.1817979484561052,
      "grad_norm": 0.6241068840026855,
      "learning_rate": 7.686722060054164e-06,
      "loss": 0.0196,
      "step": 722140
    },
    {
      "epoch": 1.1818306788947586,
      "grad_norm": 2.3563759326934814,
      "learning_rate": 7.686656167840648e-06,
      "loss": 0.0214,
      "step": 722160
    },
    {
      "epoch": 1.181863409333412,
      "grad_norm": 0.7513741850852966,
      "learning_rate": 7.68659027562713e-06,
      "loss": 0.0381,
      "step": 722180
    },
    {
      "epoch": 1.1818961397720653,
      "grad_norm": 0.637646496295929,
      "learning_rate": 7.686524383413613e-06,
      "loss": 0.0173,
      "step": 722200
    },
    {
      "epoch": 1.1819288702107187,
      "grad_norm": 0.30984410643577576,
      "learning_rate": 7.686458491200095e-06,
      "loss": 0.0233,
      "step": 722220
    },
    {
      "epoch": 1.1819616006493718,
      "grad_norm": 1.182730793952942,
      "learning_rate": 7.686392598986578e-06,
      "loss": 0.0219,
      "step": 722240
    },
    {
      "epoch": 1.1819943310880252,
      "grad_norm": 10.192355155944824,
      "learning_rate": 7.686326706773062e-06,
      "loss": 0.0199,
      "step": 722260
    },
    {
      "epoch": 1.1820270615266786,
      "grad_norm": 0.8317234516143799,
      "learning_rate": 7.686260814559544e-06,
      "loss": 0.027,
      "step": 722280
    },
    {
      "epoch": 1.182059791965332,
      "grad_norm": 0.5123934149742126,
      "learning_rate": 7.686194922346028e-06,
      "loss": 0.0179,
      "step": 722300
    },
    {
      "epoch": 1.1820925224039853,
      "grad_norm": 0.8897660374641418,
      "learning_rate": 7.68612903013251e-06,
      "loss": 0.0221,
      "step": 722320
    },
    {
      "epoch": 1.1821252528426385,
      "grad_norm": 0.8070691227912903,
      "learning_rate": 7.686063137918993e-06,
      "loss": 0.0254,
      "step": 722340
    },
    {
      "epoch": 1.1821579832812918,
      "grad_norm": 0.3376893699169159,
      "learning_rate": 7.685997245705475e-06,
      "loss": 0.0259,
      "step": 722360
    },
    {
      "epoch": 1.1821907137199452,
      "grad_norm": 0.9202173352241516,
      "learning_rate": 7.685931353491959e-06,
      "loss": 0.02,
      "step": 722380
    },
    {
      "epoch": 1.1822234441585986,
      "grad_norm": 0.3013628125190735,
      "learning_rate": 7.68586546127844e-06,
      "loss": 0.0342,
      "step": 722400
    },
    {
      "epoch": 1.182256174597252,
      "grad_norm": 1.785537600517273,
      "learning_rate": 7.685799569064924e-06,
      "loss": 0.0211,
      "step": 722420
    },
    {
      "epoch": 1.1822889050359053,
      "grad_norm": 1.8414462804794312,
      "learning_rate": 7.685733676851408e-06,
      "loss": 0.0213,
      "step": 722440
    },
    {
      "epoch": 1.1823216354745587,
      "grad_norm": 0.4316176474094391,
      "learning_rate": 7.68566778463789e-06,
      "loss": 0.0195,
      "step": 722460
    },
    {
      "epoch": 1.1823543659132119,
      "grad_norm": 1.3975794315338135,
      "learning_rate": 7.685601892424373e-06,
      "loss": 0.0292,
      "step": 722480
    },
    {
      "epoch": 1.1823870963518652,
      "grad_norm": 1.3481887578964233,
      "learning_rate": 7.685536000210857e-06,
      "loss": 0.0223,
      "step": 722500
    },
    {
      "epoch": 1.1824198267905186,
      "grad_norm": 0.2813720107078552,
      "learning_rate": 7.685470107997339e-06,
      "loss": 0.024,
      "step": 722520
    },
    {
      "epoch": 1.182452557229172,
      "grad_norm": 0.6967991590499878,
      "learning_rate": 7.685404215783822e-06,
      "loss": 0.0235,
      "step": 722540
    },
    {
      "epoch": 1.1824852876678253,
      "grad_norm": 0.2545795440673828,
      "learning_rate": 7.685338323570304e-06,
      "loss": 0.0244,
      "step": 722560
    },
    {
      "epoch": 1.1825180181064787,
      "grad_norm": 0.4377715289592743,
      "learning_rate": 7.685272431356788e-06,
      "loss": 0.0276,
      "step": 722580
    },
    {
      "epoch": 1.182550748545132,
      "grad_norm": 0.6193354725837708,
      "learning_rate": 7.68520653914327e-06,
      "loss": 0.0218,
      "step": 722600
    },
    {
      "epoch": 1.1825834789837852,
      "grad_norm": 1.1615252494812012,
      "learning_rate": 7.685140646929753e-06,
      "loss": 0.0298,
      "step": 722620
    },
    {
      "epoch": 1.1826162094224386,
      "grad_norm": 0.5164874792098999,
      "learning_rate": 7.685074754716237e-06,
      "loss": 0.0233,
      "step": 722640
    },
    {
      "epoch": 1.182648939861092,
      "grad_norm": 0.718711256980896,
      "learning_rate": 7.685008862502719e-06,
      "loss": 0.0367,
      "step": 722660
    },
    {
      "epoch": 1.1826816702997454,
      "grad_norm": 0.7071280479431152,
      "learning_rate": 7.684942970289202e-06,
      "loss": 0.0294,
      "step": 722680
    },
    {
      "epoch": 1.1827144007383987,
      "grad_norm": 0.4327504634857178,
      "learning_rate": 7.684877078075684e-06,
      "loss": 0.0278,
      "step": 722700
    },
    {
      "epoch": 1.182747131177052,
      "grad_norm": 1.6387051343917847,
      "learning_rate": 7.684811185862168e-06,
      "loss": 0.0212,
      "step": 722720
    },
    {
      "epoch": 1.1827798616157055,
      "grad_norm": 0.4368327856063843,
      "learning_rate": 7.68474529364865e-06,
      "loss": 0.0251,
      "step": 722740
    },
    {
      "epoch": 1.1828125920543586,
      "grad_norm": 0.2951125502586365,
      "learning_rate": 7.684679401435133e-06,
      "loss": 0.0335,
      "step": 722760
    },
    {
      "epoch": 1.182845322493012,
      "grad_norm": 0.3693864047527313,
      "learning_rate": 7.684613509221615e-06,
      "loss": 0.02,
      "step": 722780
    },
    {
      "epoch": 1.1828780529316654,
      "grad_norm": 0.35963085293769836,
      "learning_rate": 7.684547617008099e-06,
      "loss": 0.0198,
      "step": 722800
    },
    {
      "epoch": 1.1829107833703187,
      "grad_norm": 1.5442017316818237,
      "learning_rate": 7.68448172479458e-06,
      "loss": 0.0347,
      "step": 722820
    },
    {
      "epoch": 1.1829435138089721,
      "grad_norm": 0.4625847041606903,
      "learning_rate": 7.684415832581064e-06,
      "loss": 0.0124,
      "step": 722840
    },
    {
      "epoch": 1.1829762442476255,
      "grad_norm": 0.6432443857192993,
      "learning_rate": 7.684349940367548e-06,
      "loss": 0.0189,
      "step": 722860
    },
    {
      "epoch": 1.1830089746862789,
      "grad_norm": 0.5344425439834595,
      "learning_rate": 7.68428404815403e-06,
      "loss": 0.0232,
      "step": 722880
    },
    {
      "epoch": 1.183041705124932,
      "grad_norm": 0.7971049547195435,
      "learning_rate": 7.684218155940513e-06,
      "loss": 0.0175,
      "step": 722900
    },
    {
      "epoch": 1.1830744355635854,
      "grad_norm": 0.1867392212152481,
      "learning_rate": 7.684152263726997e-06,
      "loss": 0.0247,
      "step": 722920
    },
    {
      "epoch": 1.1831071660022388,
      "grad_norm": 0.2257213443517685,
      "learning_rate": 7.684086371513479e-06,
      "loss": 0.0207,
      "step": 722940
    },
    {
      "epoch": 1.1831398964408921,
      "grad_norm": 0.5321345925331116,
      "learning_rate": 7.684020479299962e-06,
      "loss": 0.0208,
      "step": 722960
    },
    {
      "epoch": 1.1831726268795455,
      "grad_norm": 0.31131067872047424,
      "learning_rate": 7.683954587086446e-06,
      "loss": 0.0285,
      "step": 722980
    },
    {
      "epoch": 1.1832053573181989,
      "grad_norm": 0.48897597193717957,
      "learning_rate": 7.683888694872928e-06,
      "loss": 0.0244,
      "step": 723000
    },
    {
      "epoch": 1.1832380877568522,
      "grad_norm": 0.546184778213501,
      "learning_rate": 7.683822802659411e-06,
      "loss": 0.0201,
      "step": 723020
    },
    {
      "epoch": 1.1832708181955054,
      "grad_norm": 0.9678604602813721,
      "learning_rate": 7.683756910445893e-06,
      "loss": 0.0315,
      "step": 723040
    },
    {
      "epoch": 1.1833035486341588,
      "grad_norm": 1.4984831809997559,
      "learning_rate": 7.683691018232377e-06,
      "loss": 0.0234,
      "step": 723060
    },
    {
      "epoch": 1.1833362790728121,
      "grad_norm": 0.2820506691932678,
      "learning_rate": 7.683625126018859e-06,
      "loss": 0.0252,
      "step": 723080
    },
    {
      "epoch": 1.1833690095114655,
      "grad_norm": 0.9845142960548401,
      "learning_rate": 7.683559233805342e-06,
      "loss": 0.0239,
      "step": 723100
    },
    {
      "epoch": 1.1834017399501189,
      "grad_norm": 0.5686284899711609,
      "learning_rate": 7.683493341591824e-06,
      "loss": 0.0199,
      "step": 723120
    },
    {
      "epoch": 1.183434470388772,
      "grad_norm": 1.0212160348892212,
      "learning_rate": 7.683427449378308e-06,
      "loss": 0.0251,
      "step": 723140
    },
    {
      "epoch": 1.1834672008274254,
      "grad_norm": 0.5212745070457458,
      "learning_rate": 7.68336155716479e-06,
      "loss": 0.0209,
      "step": 723160
    },
    {
      "epoch": 1.1834999312660788,
      "grad_norm": 0.7244816422462463,
      "learning_rate": 7.683295664951273e-06,
      "loss": 0.0279,
      "step": 723180
    },
    {
      "epoch": 1.1835326617047321,
      "grad_norm": 0.36363622546195984,
      "learning_rate": 7.683229772737755e-06,
      "loss": 0.0183,
      "step": 723200
    },
    {
      "epoch": 1.1835653921433855,
      "grad_norm": 0.762367844581604,
      "learning_rate": 7.683163880524239e-06,
      "loss": 0.0213,
      "step": 723220
    },
    {
      "epoch": 1.183598122582039,
      "grad_norm": 0.2123848795890808,
      "learning_rate": 7.683097988310722e-06,
      "loss": 0.0223,
      "step": 723240
    },
    {
      "epoch": 1.1836308530206923,
      "grad_norm": 0.18326537311077118,
      "learning_rate": 7.683032096097204e-06,
      "loss": 0.0205,
      "step": 723260
    },
    {
      "epoch": 1.1836635834593454,
      "grad_norm": 0.06870253384113312,
      "learning_rate": 7.682966203883688e-06,
      "loss": 0.0223,
      "step": 723280
    },
    {
      "epoch": 1.1836963138979988,
      "grad_norm": 0.9098057150840759,
      "learning_rate": 7.682900311670171e-06,
      "loss": 0.0215,
      "step": 723300
    },
    {
      "epoch": 1.1837290443366522,
      "grad_norm": 0.4654516279697418,
      "learning_rate": 7.682834419456653e-06,
      "loss": 0.0209,
      "step": 723320
    },
    {
      "epoch": 1.1837617747753055,
      "grad_norm": 0.25632673501968384,
      "learning_rate": 7.682768527243137e-06,
      "loss": 0.0335,
      "step": 723340
    },
    {
      "epoch": 1.183794505213959,
      "grad_norm": 0.8418292999267578,
      "learning_rate": 7.68270263502962e-06,
      "loss": 0.021,
      "step": 723360
    },
    {
      "epoch": 1.1838272356526123,
      "grad_norm": 0.8281657099723816,
      "learning_rate": 7.682636742816102e-06,
      "loss": 0.0245,
      "step": 723380
    },
    {
      "epoch": 1.1838599660912656,
      "grad_norm": 0.360185444355011,
      "learning_rate": 7.682570850602586e-06,
      "loss": 0.0188,
      "step": 723400
    },
    {
      "epoch": 1.1838926965299188,
      "grad_norm": 0.5484922528266907,
      "learning_rate": 7.682504958389068e-06,
      "loss": 0.0224,
      "step": 723420
    },
    {
      "epoch": 1.1839254269685722,
      "grad_norm": 1.343085765838623,
      "learning_rate": 7.682439066175551e-06,
      "loss": 0.0218,
      "step": 723440
    },
    {
      "epoch": 1.1839581574072255,
      "grad_norm": 0.31097933650016785,
      "learning_rate": 7.682373173962033e-06,
      "loss": 0.0225,
      "step": 723460
    },
    {
      "epoch": 1.183990887845879,
      "grad_norm": 0.803314208984375,
      "learning_rate": 7.682307281748517e-06,
      "loss": 0.0282,
      "step": 723480
    },
    {
      "epoch": 1.1840236182845323,
      "grad_norm": 0.6966809034347534,
      "learning_rate": 7.682241389534999e-06,
      "loss": 0.0262,
      "step": 723500
    },
    {
      "epoch": 1.1840563487231857,
      "grad_norm": 0.9361075758934021,
      "learning_rate": 7.682175497321482e-06,
      "loss": 0.0223,
      "step": 723520
    },
    {
      "epoch": 1.184089079161839,
      "grad_norm": 1.15101158618927,
      "learning_rate": 7.682109605107964e-06,
      "loss": 0.0163,
      "step": 723540
    },
    {
      "epoch": 1.1841218096004922,
      "grad_norm": 1.4579771757125854,
      "learning_rate": 7.682043712894448e-06,
      "loss": 0.0291,
      "step": 723560
    },
    {
      "epoch": 1.1841545400391456,
      "grad_norm": 1.0844486951828003,
      "learning_rate": 7.68197782068093e-06,
      "loss": 0.0159,
      "step": 723580
    },
    {
      "epoch": 1.184187270477799,
      "grad_norm": 1.4342057704925537,
      "learning_rate": 7.681911928467413e-06,
      "loss": 0.0221,
      "step": 723600
    },
    {
      "epoch": 1.1842200009164523,
      "grad_norm": 0.22198298573493958,
      "learning_rate": 7.681846036253897e-06,
      "loss": 0.0265,
      "step": 723620
    },
    {
      "epoch": 1.1842527313551057,
      "grad_norm": 0.39678558707237244,
      "learning_rate": 7.681780144040379e-06,
      "loss": 0.0258,
      "step": 723640
    },
    {
      "epoch": 1.184285461793759,
      "grad_norm": 0.5496209859848022,
      "learning_rate": 7.681714251826862e-06,
      "loss": 0.0319,
      "step": 723660
    },
    {
      "epoch": 1.1843181922324124,
      "grad_norm": 0.8601419925689697,
      "learning_rate": 7.681648359613346e-06,
      "loss": 0.0262,
      "step": 723680
    },
    {
      "epoch": 1.1843509226710656,
      "grad_norm": 0.6758353114128113,
      "learning_rate": 7.681582467399828e-06,
      "loss": 0.0266,
      "step": 723700
    },
    {
      "epoch": 1.184383653109719,
      "grad_norm": 0.48988252878189087,
      "learning_rate": 7.681516575186311e-06,
      "loss": 0.0263,
      "step": 723720
    },
    {
      "epoch": 1.1844163835483723,
      "grad_norm": 1.198630452156067,
      "learning_rate": 7.681450682972795e-06,
      "loss": 0.0252,
      "step": 723740
    },
    {
      "epoch": 1.1844491139870257,
      "grad_norm": 0.5573373436927795,
      "learning_rate": 7.681384790759277e-06,
      "loss": 0.0201,
      "step": 723760
    },
    {
      "epoch": 1.184481844425679,
      "grad_norm": 0.30705296993255615,
      "learning_rate": 7.68131889854576e-06,
      "loss": 0.0196,
      "step": 723780
    },
    {
      "epoch": 1.1845145748643324,
      "grad_norm": 1.8598660230636597,
      "learning_rate": 7.681253006332242e-06,
      "loss": 0.0261,
      "step": 723800
    },
    {
      "epoch": 1.1845473053029856,
      "grad_norm": 1.5075477361679077,
      "learning_rate": 7.681187114118726e-06,
      "loss": 0.0258,
      "step": 723820
    },
    {
      "epoch": 1.184580035741639,
      "grad_norm": 0.9663697481155396,
      "learning_rate": 7.681121221905208e-06,
      "loss": 0.0272,
      "step": 723840
    },
    {
      "epoch": 1.1846127661802923,
      "grad_norm": 0.10978340357542038,
      "learning_rate": 7.681055329691691e-06,
      "loss": 0.0236,
      "step": 723860
    },
    {
      "epoch": 1.1846454966189457,
      "grad_norm": 0.9337660670280457,
      "learning_rate": 7.680989437478173e-06,
      "loss": 0.0195,
      "step": 723880
    },
    {
      "epoch": 1.184678227057599,
      "grad_norm": 0.7526118755340576,
      "learning_rate": 7.680923545264657e-06,
      "loss": 0.0194,
      "step": 723900
    },
    {
      "epoch": 1.1847109574962524,
      "grad_norm": 0.5828720331192017,
      "learning_rate": 7.680857653051139e-06,
      "loss": 0.0212,
      "step": 723920
    },
    {
      "epoch": 1.1847436879349056,
      "grad_norm": 0.22319237887859344,
      "learning_rate": 7.680791760837622e-06,
      "loss": 0.0219,
      "step": 723940
    },
    {
      "epoch": 1.184776418373559,
      "grad_norm": 0.5981528759002686,
      "learning_rate": 7.680725868624104e-06,
      "loss": 0.0193,
      "step": 723960
    },
    {
      "epoch": 1.1848091488122123,
      "grad_norm": 1.2614140510559082,
      "learning_rate": 7.680659976410588e-06,
      "loss": 0.0296,
      "step": 723980
    },
    {
      "epoch": 1.1848418792508657,
      "grad_norm": 1.9917638301849365,
      "learning_rate": 7.68059408419707e-06,
      "loss": 0.018,
      "step": 724000
    },
    {
      "epoch": 1.184874609689519,
      "grad_norm": 0.20693591237068176,
      "learning_rate": 7.680528191983553e-06,
      "loss": 0.0183,
      "step": 724020
    },
    {
      "epoch": 1.1849073401281724,
      "grad_norm": 0.6994737982749939,
      "learning_rate": 7.680462299770037e-06,
      "loss": 0.0165,
      "step": 724040
    },
    {
      "epoch": 1.1849400705668258,
      "grad_norm": 0.9656614661216736,
      "learning_rate": 7.680396407556519e-06,
      "loss": 0.0172,
      "step": 724060
    },
    {
      "epoch": 1.184972801005479,
      "grad_norm": 0.44116368889808655,
      "learning_rate": 7.680330515343002e-06,
      "loss": 0.014,
      "step": 724080
    },
    {
      "epoch": 1.1850055314441323,
      "grad_norm": 0.5805188417434692,
      "learning_rate": 7.680264623129486e-06,
      "loss": 0.036,
      "step": 724100
    },
    {
      "epoch": 1.1850382618827857,
      "grad_norm": 0.8131384253501892,
      "learning_rate": 7.680198730915968e-06,
      "loss": 0.0203,
      "step": 724120
    },
    {
      "epoch": 1.185070992321439,
      "grad_norm": 0.648313581943512,
      "learning_rate": 7.680132838702451e-06,
      "loss": 0.0248,
      "step": 724140
    },
    {
      "epoch": 1.1851037227600925,
      "grad_norm": 0.7714506387710571,
      "learning_rate": 7.680066946488935e-06,
      "loss": 0.0219,
      "step": 724160
    },
    {
      "epoch": 1.1851364531987458,
      "grad_norm": 0.2247529774904251,
      "learning_rate": 7.680001054275417e-06,
      "loss": 0.0161,
      "step": 724180
    },
    {
      "epoch": 1.1851691836373992,
      "grad_norm": 0.2930862009525299,
      "learning_rate": 7.6799351620619e-06,
      "loss": 0.0245,
      "step": 724200
    },
    {
      "epoch": 1.1852019140760524,
      "grad_norm": 0.44954758882522583,
      "learning_rate": 7.679869269848382e-06,
      "loss": 0.0424,
      "step": 724220
    },
    {
      "epoch": 1.1852346445147057,
      "grad_norm": 0.5277650356292725,
      "learning_rate": 7.679803377634866e-06,
      "loss": 0.0244,
      "step": 724240
    },
    {
      "epoch": 1.185267374953359,
      "grad_norm": 1.3917206525802612,
      "learning_rate": 7.679737485421348e-06,
      "loss": 0.0199,
      "step": 724260
    },
    {
      "epoch": 1.1853001053920125,
      "grad_norm": 0.21364697813987732,
      "learning_rate": 7.679671593207832e-06,
      "loss": 0.0178,
      "step": 724280
    },
    {
      "epoch": 1.1853328358306658,
      "grad_norm": 0.8224890828132629,
      "learning_rate": 7.679605700994313e-06,
      "loss": 0.0183,
      "step": 724300
    },
    {
      "epoch": 1.1853655662693192,
      "grad_norm": 0.9782693982124329,
      "learning_rate": 7.679539808780797e-06,
      "loss": 0.029,
      "step": 724320
    },
    {
      "epoch": 1.1853982967079726,
      "grad_norm": 1.0206248760223389,
      "learning_rate": 7.679473916567279e-06,
      "loss": 0.0209,
      "step": 724340
    },
    {
      "epoch": 1.1854310271466257,
      "grad_norm": 0.32824596762657166,
      "learning_rate": 7.679408024353762e-06,
      "loss": 0.0178,
      "step": 724360
    },
    {
      "epoch": 1.185463757585279,
      "grad_norm": 0.17570151388645172,
      "learning_rate": 7.679342132140246e-06,
      "loss": 0.0273,
      "step": 724380
    },
    {
      "epoch": 1.1854964880239325,
      "grad_norm": 0.2377428412437439,
      "learning_rate": 7.679276239926728e-06,
      "loss": 0.0231,
      "step": 724400
    },
    {
      "epoch": 1.1855292184625859,
      "grad_norm": 0.557415783405304,
      "learning_rate": 7.679210347713212e-06,
      "loss": 0.0208,
      "step": 724420
    },
    {
      "epoch": 1.1855619489012392,
      "grad_norm": 0.6956958770751953,
      "learning_rate": 7.679144455499693e-06,
      "loss": 0.0208,
      "step": 724440
    },
    {
      "epoch": 1.1855946793398926,
      "grad_norm": 2.414149045944214,
      "learning_rate": 7.679078563286177e-06,
      "loss": 0.0283,
      "step": 724460
    },
    {
      "epoch": 1.185627409778546,
      "grad_norm": 1.9203230142593384,
      "learning_rate": 7.67901267107266e-06,
      "loss": 0.0196,
      "step": 724480
    },
    {
      "epoch": 1.1856601402171991,
      "grad_norm": 0.9781638383865356,
      "learning_rate": 7.678946778859142e-06,
      "loss": 0.0235,
      "step": 724500
    },
    {
      "epoch": 1.1856928706558525,
      "grad_norm": 0.5579439997673035,
      "learning_rate": 7.678880886645626e-06,
      "loss": 0.0271,
      "step": 724520
    },
    {
      "epoch": 1.1857256010945059,
      "grad_norm": 0.8075323700904846,
      "learning_rate": 7.67881499443211e-06,
      "loss": 0.0253,
      "step": 724540
    },
    {
      "epoch": 1.1857583315331592,
      "grad_norm": 0.6410664319992065,
      "learning_rate": 7.678749102218592e-06,
      "loss": 0.0223,
      "step": 724560
    },
    {
      "epoch": 1.1857910619718126,
      "grad_norm": 0.6782819628715515,
      "learning_rate": 7.678683210005075e-06,
      "loss": 0.0205,
      "step": 724580
    },
    {
      "epoch": 1.185823792410466,
      "grad_norm": 1.2574670314788818,
      "learning_rate": 7.678617317791557e-06,
      "loss": 0.0295,
      "step": 724600
    },
    {
      "epoch": 1.1858565228491191,
      "grad_norm": 1.1922725439071655,
      "learning_rate": 7.67855142557804e-06,
      "loss": 0.0241,
      "step": 724620
    },
    {
      "epoch": 1.1858892532877725,
      "grad_norm": 1.3979135751724243,
      "learning_rate": 7.678485533364523e-06,
      "loss": 0.0205,
      "step": 724640
    },
    {
      "epoch": 1.1859219837264259,
      "grad_norm": 0.6763128042221069,
      "learning_rate": 7.678419641151006e-06,
      "loss": 0.0239,
      "step": 724660
    },
    {
      "epoch": 1.1859547141650792,
      "grad_norm": 0.1750374734401703,
      "learning_rate": 7.678353748937488e-06,
      "loss": 0.026,
      "step": 724680
    },
    {
      "epoch": 1.1859874446037326,
      "grad_norm": 0.369482159614563,
      "learning_rate": 7.678287856723972e-06,
      "loss": 0.0228,
      "step": 724700
    },
    {
      "epoch": 1.186020175042386,
      "grad_norm": 0.8289158344268799,
      "learning_rate": 7.678221964510453e-06,
      "loss": 0.0244,
      "step": 724720
    },
    {
      "epoch": 1.1860529054810391,
      "grad_norm": 0.06569693237543106,
      "learning_rate": 7.678156072296937e-06,
      "loss": 0.0217,
      "step": 724740
    },
    {
      "epoch": 1.1860856359196925,
      "grad_norm": 0.6913745999336243,
      "learning_rate": 7.67809018008342e-06,
      "loss": 0.0292,
      "step": 724760
    },
    {
      "epoch": 1.1861183663583459,
      "grad_norm": 0.2627621591091156,
      "learning_rate": 7.678024287869903e-06,
      "loss": 0.0168,
      "step": 724780
    },
    {
      "epoch": 1.1861510967969993,
      "grad_norm": 0.8060242533683777,
      "learning_rate": 7.677958395656386e-06,
      "loss": 0.023,
      "step": 724800
    },
    {
      "epoch": 1.1861838272356526,
      "grad_norm": 1.266918659210205,
      "learning_rate": 7.677892503442868e-06,
      "loss": 0.032,
      "step": 724820
    },
    {
      "epoch": 1.186216557674306,
      "grad_norm": 1.3711618185043335,
      "learning_rate": 7.677826611229352e-06,
      "loss": 0.028,
      "step": 724840
    },
    {
      "epoch": 1.1862492881129594,
      "grad_norm": 0.3460678160190582,
      "learning_rate": 7.677760719015833e-06,
      "loss": 0.0212,
      "step": 724860
    },
    {
      "epoch": 1.1862820185516125,
      "grad_norm": 0.12403330206871033,
      "learning_rate": 7.677694826802317e-06,
      "loss": 0.0224,
      "step": 724880
    },
    {
      "epoch": 1.186314748990266,
      "grad_norm": 1.5205281972885132,
      "learning_rate": 7.6776289345888e-06,
      "loss": 0.0208,
      "step": 724900
    },
    {
      "epoch": 1.1863474794289193,
      "grad_norm": 0.27898475527763367,
      "learning_rate": 7.677563042375283e-06,
      "loss": 0.0154,
      "step": 724920
    },
    {
      "epoch": 1.1863802098675726,
      "grad_norm": 0.28960132598876953,
      "learning_rate": 7.677497150161766e-06,
      "loss": 0.0271,
      "step": 724940
    },
    {
      "epoch": 1.186412940306226,
      "grad_norm": 0.5079097151756287,
      "learning_rate": 7.67743125794825e-06,
      "loss": 0.0209,
      "step": 724960
    },
    {
      "epoch": 1.1864456707448794,
      "grad_norm": 0.9372872114181519,
      "learning_rate": 7.677365365734732e-06,
      "loss": 0.0293,
      "step": 724980
    },
    {
      "epoch": 1.1864784011835328,
      "grad_norm": 1.0928056240081787,
      "learning_rate": 7.677299473521215e-06,
      "loss": 0.0182,
      "step": 725000
    },
    {
      "epoch": 1.186511131622186,
      "grad_norm": 0.15106500685214996,
      "learning_rate": 7.677233581307697e-06,
      "loss": 0.0288,
      "step": 725020
    },
    {
      "epoch": 1.1865438620608393,
      "grad_norm": 0.3850364685058594,
      "learning_rate": 7.67716768909418e-06,
      "loss": 0.0206,
      "step": 725040
    },
    {
      "epoch": 1.1865765924994927,
      "grad_norm": Infinity,
      "learning_rate": 7.677101796880663e-06,
      "loss": 0.0228,
      "step": 725060
    },
    {
      "epoch": 1.186609322938146,
      "grad_norm": 1.1290661096572876,
      "learning_rate": 7.677035904667146e-06,
      "loss": 0.0265,
      "step": 725080
    },
    {
      "epoch": 1.1866420533767994,
      "grad_norm": 0.9531981348991394,
      "learning_rate": 7.67697001245363e-06,
      "loss": 0.0237,
      "step": 725100
    },
    {
      "epoch": 1.1866747838154528,
      "grad_norm": 0.3513624966144562,
      "learning_rate": 7.676904120240112e-06,
      "loss": 0.0166,
      "step": 725120
    },
    {
      "epoch": 1.1867075142541061,
      "grad_norm": 0.2978079319000244,
      "learning_rate": 7.676838228026595e-06,
      "loss": 0.0282,
      "step": 725140
    },
    {
      "epoch": 1.1867402446927593,
      "grad_norm": 0.20418202877044678,
      "learning_rate": 7.676772335813077e-06,
      "loss": 0.0229,
      "step": 725160
    },
    {
      "epoch": 1.1867729751314127,
      "grad_norm": 0.32858654856681824,
      "learning_rate": 7.67670644359956e-06,
      "loss": 0.0176,
      "step": 725180
    },
    {
      "epoch": 1.186805705570066,
      "grad_norm": 0.42958396673202515,
      "learning_rate": 7.676640551386043e-06,
      "loss": 0.0259,
      "step": 725200
    },
    {
      "epoch": 1.1868384360087194,
      "grad_norm": 0.9470741152763367,
      "learning_rate": 7.676574659172526e-06,
      "loss": 0.023,
      "step": 725220
    },
    {
      "epoch": 1.1868711664473728,
      "grad_norm": 0.9942703247070312,
      "learning_rate": 7.676508766959008e-06,
      "loss": 0.0235,
      "step": 725240
    },
    {
      "epoch": 1.1869038968860262,
      "grad_norm": 0.28011223673820496,
      "learning_rate": 7.676442874745492e-06,
      "loss": 0.0187,
      "step": 725260
    },
    {
      "epoch": 1.1869366273246795,
      "grad_norm": 0.5704440474510193,
      "learning_rate": 7.676376982531975e-06,
      "loss": 0.024,
      "step": 725280
    },
    {
      "epoch": 1.1869693577633327,
      "grad_norm": 1.5551073551177979,
      "learning_rate": 7.676311090318457e-06,
      "loss": 0.0223,
      "step": 725300
    },
    {
      "epoch": 1.187002088201986,
      "grad_norm": 0.20003893971443176,
      "learning_rate": 7.67624519810494e-06,
      "loss": 0.0154,
      "step": 725320
    },
    {
      "epoch": 1.1870348186406394,
      "grad_norm": 0.32860004901885986,
      "learning_rate": 7.676179305891424e-06,
      "loss": 0.0215,
      "step": 725340
    },
    {
      "epoch": 1.1870675490792928,
      "grad_norm": 3.4572510719299316,
      "learning_rate": 7.676113413677906e-06,
      "loss": 0.0287,
      "step": 725360
    },
    {
      "epoch": 1.1871002795179462,
      "grad_norm": 0.8455739617347717,
      "learning_rate": 7.67604752146439e-06,
      "loss": 0.0225,
      "step": 725380
    },
    {
      "epoch": 1.1871330099565993,
      "grad_norm": 0.35251930356025696,
      "learning_rate": 7.675981629250872e-06,
      "loss": 0.0306,
      "step": 725400
    },
    {
      "epoch": 1.1871657403952527,
      "grad_norm": 0.4442601203918457,
      "learning_rate": 7.675915737037355e-06,
      "loss": 0.0192,
      "step": 725420
    },
    {
      "epoch": 1.187198470833906,
      "grad_norm": 0.25952383875846863,
      "learning_rate": 7.675849844823839e-06,
      "loss": 0.0226,
      "step": 725440
    },
    {
      "epoch": 1.1872312012725594,
      "grad_norm": 0.12351230531930923,
      "learning_rate": 7.67578395261032e-06,
      "loss": 0.0224,
      "step": 725460
    },
    {
      "epoch": 1.1872639317112128,
      "grad_norm": 1.5079998970031738,
      "learning_rate": 7.675718060396804e-06,
      "loss": 0.0222,
      "step": 725480
    },
    {
      "epoch": 1.1872966621498662,
      "grad_norm": 0.43693631887435913,
      "learning_rate": 7.675652168183286e-06,
      "loss": 0.0253,
      "step": 725500
    },
    {
      "epoch": 1.1873293925885195,
      "grad_norm": 0.3132671117782593,
      "learning_rate": 7.67558627596977e-06,
      "loss": 0.0263,
      "step": 725520
    },
    {
      "epoch": 1.1873621230271727,
      "grad_norm": 0.11898139864206314,
      "learning_rate": 7.675520383756252e-06,
      "loss": 0.0248,
      "step": 725540
    },
    {
      "epoch": 1.187394853465826,
      "grad_norm": 0.6763741374015808,
      "learning_rate": 7.675454491542735e-06,
      "loss": 0.0226,
      "step": 725560
    },
    {
      "epoch": 1.1874275839044794,
      "grad_norm": 0.6517254710197449,
      "learning_rate": 7.675388599329217e-06,
      "loss": 0.0239,
      "step": 725580
    },
    {
      "epoch": 1.1874603143431328,
      "grad_norm": 0.5952088832855225,
      "learning_rate": 7.6753227071157e-06,
      "loss": 0.0307,
      "step": 725600
    },
    {
      "epoch": 1.1874930447817862,
      "grad_norm": 2.45159912109375,
      "learning_rate": 7.675256814902183e-06,
      "loss": 0.0225,
      "step": 725620
    },
    {
      "epoch": 1.1875257752204396,
      "grad_norm": 0.3936674892902374,
      "learning_rate": 7.675190922688666e-06,
      "loss": 0.0161,
      "step": 725640
    },
    {
      "epoch": 1.187558505659093,
      "grad_norm": 2.20125412940979,
      "learning_rate": 7.675125030475148e-06,
      "loss": 0.0355,
      "step": 725660
    },
    {
      "epoch": 1.187591236097746,
      "grad_norm": 0.6830502152442932,
      "learning_rate": 7.675059138261632e-06,
      "loss": 0.0243,
      "step": 725680
    },
    {
      "epoch": 1.1876239665363995,
      "grad_norm": 0.20018674433231354,
      "learning_rate": 7.674993246048115e-06,
      "loss": 0.0275,
      "step": 725700
    },
    {
      "epoch": 1.1876566969750528,
      "grad_norm": 1.4093927145004272,
      "learning_rate": 7.674927353834597e-06,
      "loss": 0.0326,
      "step": 725720
    },
    {
      "epoch": 1.1876894274137062,
      "grad_norm": 1.791524887084961,
      "learning_rate": 7.67486146162108e-06,
      "loss": 0.0206,
      "step": 725740
    },
    {
      "epoch": 1.1877221578523596,
      "grad_norm": 0.6850175857543945,
      "learning_rate": 7.674795569407564e-06,
      "loss": 0.0146,
      "step": 725760
    },
    {
      "epoch": 1.187754888291013,
      "grad_norm": 0.77367103099823,
      "learning_rate": 7.674729677194048e-06,
      "loss": 0.0199,
      "step": 725780
    },
    {
      "epoch": 1.1877876187296663,
      "grad_norm": 0.3758775591850281,
      "learning_rate": 7.67466378498053e-06,
      "loss": 0.0237,
      "step": 725800
    },
    {
      "epoch": 1.1878203491683195,
      "grad_norm": 0.5596728920936584,
      "learning_rate": 7.674597892767013e-06,
      "loss": 0.0179,
      "step": 725820
    },
    {
      "epoch": 1.1878530796069728,
      "grad_norm": 0.3729841113090515,
      "learning_rate": 7.674532000553495e-06,
      "loss": 0.0186,
      "step": 725840
    },
    {
      "epoch": 1.1878858100456262,
      "grad_norm": 0.33208537101745605,
      "learning_rate": 7.674466108339979e-06,
      "loss": 0.0212,
      "step": 725860
    },
    {
      "epoch": 1.1879185404842796,
      "grad_norm": 0.570141077041626,
      "learning_rate": 7.674400216126461e-06,
      "loss": 0.0253,
      "step": 725880
    },
    {
      "epoch": 1.187951270922933,
      "grad_norm": 0.8204743266105652,
      "learning_rate": 7.674334323912944e-06,
      "loss": 0.0181,
      "step": 725900
    },
    {
      "epoch": 1.1879840013615863,
      "grad_norm": 0.5402143001556396,
      "learning_rate": 7.674268431699426e-06,
      "loss": 0.0257,
      "step": 725920
    },
    {
      "epoch": 1.1880167318002397,
      "grad_norm": 0.37698453664779663,
      "learning_rate": 7.67420253948591e-06,
      "loss": 0.015,
      "step": 725940
    },
    {
      "epoch": 1.1880494622388929,
      "grad_norm": 0.33731162548065186,
      "learning_rate": 7.674136647272392e-06,
      "loss": 0.0191,
      "step": 725960
    },
    {
      "epoch": 1.1880821926775462,
      "grad_norm": 1.4628046751022339,
      "learning_rate": 7.674070755058875e-06,
      "loss": 0.0235,
      "step": 725980
    },
    {
      "epoch": 1.1881149231161996,
      "grad_norm": 0.9180117249488831,
      "learning_rate": 7.674004862845357e-06,
      "loss": 0.026,
      "step": 726000
    },
    {
      "epoch": 1.188147653554853,
      "grad_norm": 1.2355952262878418,
      "learning_rate": 7.673938970631841e-06,
      "loss": 0.0263,
      "step": 726020
    },
    {
      "epoch": 1.1881803839935063,
      "grad_norm": 1.0118813514709473,
      "learning_rate": 7.673873078418323e-06,
      "loss": 0.0248,
      "step": 726040
    },
    {
      "epoch": 1.1882131144321597,
      "grad_norm": 1.831039547920227,
      "learning_rate": 7.673807186204806e-06,
      "loss": 0.0189,
      "step": 726060
    },
    {
      "epoch": 1.1882458448708129,
      "grad_norm": 0.35401472449302673,
      "learning_rate": 7.67374129399129e-06,
      "loss": 0.0124,
      "step": 726080
    },
    {
      "epoch": 1.1882785753094662,
      "grad_norm": 0.7463759779930115,
      "learning_rate": 7.673675401777772e-06,
      "loss": 0.0321,
      "step": 726100
    },
    {
      "epoch": 1.1883113057481196,
      "grad_norm": 0.7363031506538391,
      "learning_rate": 7.673609509564255e-06,
      "loss": 0.0176,
      "step": 726120
    },
    {
      "epoch": 1.188344036186773,
      "grad_norm": 0.9671540260314941,
      "learning_rate": 7.673543617350739e-06,
      "loss": 0.0202,
      "step": 726140
    },
    {
      "epoch": 1.1883767666254264,
      "grad_norm": 0.1315026432275772,
      "learning_rate": 7.673477725137221e-06,
      "loss": 0.0227,
      "step": 726160
    },
    {
      "epoch": 1.1884094970640797,
      "grad_norm": 1.1601632833480835,
      "learning_rate": 7.673411832923704e-06,
      "loss": 0.0242,
      "step": 726180
    },
    {
      "epoch": 1.1884422275027329,
      "grad_norm": 0.2453242838382721,
      "learning_rate": 7.673345940710188e-06,
      "loss": 0.0159,
      "step": 726200
    },
    {
      "epoch": 1.1884749579413862,
      "grad_norm": 0.49355053901672363,
      "learning_rate": 7.67328004849667e-06,
      "loss": 0.0236,
      "step": 726220
    },
    {
      "epoch": 1.1885076883800396,
      "grad_norm": 0.09296446293592453,
      "learning_rate": 7.673214156283154e-06,
      "loss": 0.0258,
      "step": 726240
    },
    {
      "epoch": 1.188540418818693,
      "grad_norm": 0.5642977952957153,
      "learning_rate": 7.673148264069635e-06,
      "loss": 0.0217,
      "step": 726260
    },
    {
      "epoch": 1.1885731492573464,
      "grad_norm": 0.25209686160087585,
      "learning_rate": 7.673082371856119e-06,
      "loss": 0.0336,
      "step": 726280
    },
    {
      "epoch": 1.1886058796959997,
      "grad_norm": 1.0604037046432495,
      "learning_rate": 7.673016479642601e-06,
      "loss": 0.0303,
      "step": 726300
    },
    {
      "epoch": 1.188638610134653,
      "grad_norm": 0.8685342669487,
      "learning_rate": 7.672950587429085e-06,
      "loss": 0.0281,
      "step": 726320
    },
    {
      "epoch": 1.1886713405733063,
      "grad_norm": 0.6145044565200806,
      "learning_rate": 7.672884695215566e-06,
      "loss": 0.0364,
      "step": 726340
    },
    {
      "epoch": 1.1887040710119596,
      "grad_norm": 0.857485294342041,
      "learning_rate": 7.67281880300205e-06,
      "loss": 0.0231,
      "step": 726360
    },
    {
      "epoch": 1.188736801450613,
      "grad_norm": 0.4865693151950836,
      "learning_rate": 7.672752910788532e-06,
      "loss": 0.0204,
      "step": 726380
    },
    {
      "epoch": 1.1887695318892664,
      "grad_norm": 0.47399479150772095,
      "learning_rate": 7.672687018575015e-06,
      "loss": 0.0184,
      "step": 726400
    },
    {
      "epoch": 1.1888022623279197,
      "grad_norm": 0.9275935292243958,
      "learning_rate": 7.672621126361497e-06,
      "loss": 0.0198,
      "step": 726420
    },
    {
      "epoch": 1.1888349927665731,
      "grad_norm": 0.585830569267273,
      "learning_rate": 7.672555234147981e-06,
      "loss": 0.0297,
      "step": 726440
    },
    {
      "epoch": 1.1888677232052265,
      "grad_norm": 1.7552226781845093,
      "learning_rate": 7.672489341934465e-06,
      "loss": 0.0299,
      "step": 726460
    },
    {
      "epoch": 1.1889004536438796,
      "grad_norm": 0.17359881103038788,
      "learning_rate": 7.672423449720946e-06,
      "loss": 0.0177,
      "step": 726480
    },
    {
      "epoch": 1.188933184082533,
      "grad_norm": 0.4661118686199188,
      "learning_rate": 7.67235755750743e-06,
      "loss": 0.0193,
      "step": 726500
    },
    {
      "epoch": 1.1889659145211864,
      "grad_norm": 0.2625254690647125,
      "learning_rate": 7.672291665293914e-06,
      "loss": 0.0262,
      "step": 726520
    },
    {
      "epoch": 1.1889986449598398,
      "grad_norm": 0.1678822934627533,
      "learning_rate": 7.672225773080395e-06,
      "loss": 0.0212,
      "step": 726540
    },
    {
      "epoch": 1.1890313753984931,
      "grad_norm": 1.1177802085876465,
      "learning_rate": 7.672159880866879e-06,
      "loss": 0.0203,
      "step": 726560
    },
    {
      "epoch": 1.1890641058371465,
      "grad_norm": 0.8426651358604431,
      "learning_rate": 7.672093988653363e-06,
      "loss": 0.0255,
      "step": 726580
    },
    {
      "epoch": 1.1890968362757999,
      "grad_norm": 0.6218115091323853,
      "learning_rate": 7.672028096439845e-06,
      "loss": 0.0209,
      "step": 726600
    },
    {
      "epoch": 1.189129566714453,
      "grad_norm": 0.7729788422584534,
      "learning_rate": 7.671962204226328e-06,
      "loss": 0.0181,
      "step": 726620
    },
    {
      "epoch": 1.1891622971531064,
      "grad_norm": 0.40186187624931335,
      "learning_rate": 7.67189631201281e-06,
      "loss": 0.028,
      "step": 726640
    },
    {
      "epoch": 1.1891950275917598,
      "grad_norm": 0.5975208282470703,
      "learning_rate": 7.671830419799294e-06,
      "loss": 0.0252,
      "step": 726660
    },
    {
      "epoch": 1.1892277580304131,
      "grad_norm": 0.6388882398605347,
      "learning_rate": 7.671764527585776e-06,
      "loss": 0.0176,
      "step": 726680
    },
    {
      "epoch": 1.1892604884690665,
      "grad_norm": 1.1588345766067505,
      "learning_rate": 7.671698635372259e-06,
      "loss": 0.0169,
      "step": 726700
    },
    {
      "epoch": 1.1892932189077199,
      "grad_norm": 0.7054035663604736,
      "learning_rate": 7.671632743158741e-06,
      "loss": 0.0213,
      "step": 726720
    },
    {
      "epoch": 1.1893259493463733,
      "grad_norm": 0.10456293821334839,
      "learning_rate": 7.671566850945225e-06,
      "loss": 0.0235,
      "step": 726740
    },
    {
      "epoch": 1.1893586797850264,
      "grad_norm": 0.2081032395362854,
      "learning_rate": 7.671500958731706e-06,
      "loss": 0.0251,
      "step": 726760
    },
    {
      "epoch": 1.1893914102236798,
      "grad_norm": 0.5541225671768188,
      "learning_rate": 7.67143506651819e-06,
      "loss": 0.0212,
      "step": 726780
    },
    {
      "epoch": 1.1894241406623332,
      "grad_norm": 0.27815908193588257,
      "learning_rate": 7.671369174304672e-06,
      "loss": 0.0198,
      "step": 726800
    },
    {
      "epoch": 1.1894568711009865,
      "grad_norm": 1.2086952924728394,
      "learning_rate": 7.671303282091156e-06,
      "loss": 0.022,
      "step": 726820
    },
    {
      "epoch": 1.18948960153964,
      "grad_norm": 0.8898748159408569,
      "learning_rate": 7.671237389877639e-06,
      "loss": 0.0226,
      "step": 726840
    },
    {
      "epoch": 1.1895223319782933,
      "grad_norm": 0.3816719651222229,
      "learning_rate": 7.671171497664121e-06,
      "loss": 0.0195,
      "step": 726860
    },
    {
      "epoch": 1.1895550624169464,
      "grad_norm": 1.1556999683380127,
      "learning_rate": 7.671105605450605e-06,
      "loss": 0.0246,
      "step": 726880
    },
    {
      "epoch": 1.1895877928555998,
      "grad_norm": 0.23241806030273438,
      "learning_rate": 7.671039713237087e-06,
      "loss": 0.0314,
      "step": 726900
    },
    {
      "epoch": 1.1896205232942532,
      "grad_norm": 1.4234237670898438,
      "learning_rate": 7.67097382102357e-06,
      "loss": 0.0225,
      "step": 726920
    },
    {
      "epoch": 1.1896532537329065,
      "grad_norm": 1.0368218421936035,
      "learning_rate": 7.670907928810054e-06,
      "loss": 0.0211,
      "step": 726940
    },
    {
      "epoch": 1.18968598417156,
      "grad_norm": 0.7084257006645203,
      "learning_rate": 7.670842036596536e-06,
      "loss": 0.0181,
      "step": 726960
    },
    {
      "epoch": 1.1897187146102133,
      "grad_norm": 0.3339642286300659,
      "learning_rate": 7.67077614438302e-06,
      "loss": 0.0187,
      "step": 726980
    },
    {
      "epoch": 1.1897514450488664,
      "grad_norm": 0.46751827001571655,
      "learning_rate": 7.670710252169503e-06,
      "loss": 0.0224,
      "step": 727000
    },
    {
      "epoch": 1.1897841754875198,
      "grad_norm": 2.421757936477661,
      "learning_rate": 7.670644359955985e-06,
      "loss": 0.0234,
      "step": 727020
    },
    {
      "epoch": 1.1898169059261732,
      "grad_norm": 0.34132814407348633,
      "learning_rate": 7.670578467742468e-06,
      "loss": 0.0182,
      "step": 727040
    },
    {
      "epoch": 1.1898496363648265,
      "grad_norm": 0.727984607219696,
      "learning_rate": 7.67051257552895e-06,
      "loss": 0.0245,
      "step": 727060
    },
    {
      "epoch": 1.18988236680348,
      "grad_norm": 0.7187796235084534,
      "learning_rate": 7.670446683315434e-06,
      "loss": 0.0239,
      "step": 727080
    },
    {
      "epoch": 1.1899150972421333,
      "grad_norm": 0.5940971970558167,
      "learning_rate": 7.670380791101916e-06,
      "loss": 0.0206,
      "step": 727100
    },
    {
      "epoch": 1.1899478276807867,
      "grad_norm": 0.9209521412849426,
      "learning_rate": 7.6703148988884e-06,
      "loss": 0.0249,
      "step": 727120
    },
    {
      "epoch": 1.1899805581194398,
      "grad_norm": 0.13474056124687195,
      "learning_rate": 7.670249006674881e-06,
      "loss": 0.0191,
      "step": 727140
    },
    {
      "epoch": 1.1900132885580932,
      "grad_norm": 0.2977883517742157,
      "learning_rate": 7.670183114461365e-06,
      "loss": 0.0274,
      "step": 727160
    },
    {
      "epoch": 1.1900460189967466,
      "grad_norm": 0.5267558693885803,
      "learning_rate": 7.670117222247847e-06,
      "loss": 0.0336,
      "step": 727180
    },
    {
      "epoch": 1.1900787494354,
      "grad_norm": 0.9151070713996887,
      "learning_rate": 7.67005133003433e-06,
      "loss": 0.0202,
      "step": 727200
    },
    {
      "epoch": 1.1901114798740533,
      "grad_norm": 1.3985439538955688,
      "learning_rate": 7.669985437820814e-06,
      "loss": 0.0214,
      "step": 727220
    },
    {
      "epoch": 1.1901442103127067,
      "grad_norm": 0.30318230390548706,
      "learning_rate": 7.669919545607296e-06,
      "loss": 0.0208,
      "step": 727240
    },
    {
      "epoch": 1.19017694075136,
      "grad_norm": 2.249547243118286,
      "learning_rate": 7.66985365339378e-06,
      "loss": 0.0214,
      "step": 727260
    },
    {
      "epoch": 1.1902096711900132,
      "grad_norm": 1.255053162574768,
      "learning_rate": 7.669787761180261e-06,
      "loss": 0.0282,
      "step": 727280
    },
    {
      "epoch": 1.1902424016286666,
      "grad_norm": 1.63741135597229,
      "learning_rate": 7.669721868966745e-06,
      "loss": 0.0312,
      "step": 727300
    },
    {
      "epoch": 1.19027513206732,
      "grad_norm": 0.5735366344451904,
      "learning_rate": 7.669655976753228e-06,
      "loss": 0.0239,
      "step": 727320
    },
    {
      "epoch": 1.1903078625059733,
      "grad_norm": 1.4518476724624634,
      "learning_rate": 7.66959008453971e-06,
      "loss": 0.0252,
      "step": 727340
    },
    {
      "epoch": 1.1903405929446267,
      "grad_norm": 0.7275509238243103,
      "learning_rate": 7.669524192326194e-06,
      "loss": 0.0183,
      "step": 727360
    },
    {
      "epoch": 1.19037332338328,
      "grad_norm": 0.29691240191459656,
      "learning_rate": 7.669458300112677e-06,
      "loss": 0.0205,
      "step": 727380
    },
    {
      "epoch": 1.1904060538219334,
      "grad_norm": 0.7673304677009583,
      "learning_rate": 7.66939240789916e-06,
      "loss": 0.0232,
      "step": 727400
    },
    {
      "epoch": 1.1904387842605866,
      "grad_norm": 0.7003679871559143,
      "learning_rate": 7.669326515685643e-06,
      "loss": 0.0191,
      "step": 727420
    },
    {
      "epoch": 1.19047151469924,
      "grad_norm": 0.44974374771118164,
      "learning_rate": 7.669260623472125e-06,
      "loss": 0.0264,
      "step": 727440
    },
    {
      "epoch": 1.1905042451378933,
      "grad_norm": 4.7494893074035645,
      "learning_rate": 7.669194731258608e-06,
      "loss": 0.0197,
      "step": 727460
    },
    {
      "epoch": 1.1905369755765467,
      "grad_norm": 0.3323580026626587,
      "learning_rate": 7.66912883904509e-06,
      "loss": 0.0233,
      "step": 727480
    },
    {
      "epoch": 1.1905697060152,
      "grad_norm": 0.2652094066143036,
      "learning_rate": 7.669062946831574e-06,
      "loss": 0.0222,
      "step": 727500
    },
    {
      "epoch": 1.1906024364538534,
      "grad_norm": 0.8568418025970459,
      "learning_rate": 7.668997054618056e-06,
      "loss": 0.0193,
      "step": 727520
    },
    {
      "epoch": 1.1906351668925068,
      "grad_norm": 0.3502872586250305,
      "learning_rate": 7.66893116240454e-06,
      "loss": 0.0172,
      "step": 727540
    },
    {
      "epoch": 1.19066789733116,
      "grad_norm": 0.19433686137199402,
      "learning_rate": 7.668865270191023e-06,
      "loss": 0.0177,
      "step": 727560
    },
    {
      "epoch": 1.1907006277698133,
      "grad_norm": 0.8768503665924072,
      "learning_rate": 7.668799377977505e-06,
      "loss": 0.0243,
      "step": 727580
    },
    {
      "epoch": 1.1907333582084667,
      "grad_norm": 1.8089414834976196,
      "learning_rate": 7.668733485763988e-06,
      "loss": 0.0247,
      "step": 727600
    },
    {
      "epoch": 1.19076608864712,
      "grad_norm": 1.642773985862732,
      "learning_rate": 7.66866759355047e-06,
      "loss": 0.0181,
      "step": 727620
    },
    {
      "epoch": 1.1907988190857735,
      "grad_norm": 0.45400378108024597,
      "learning_rate": 7.668601701336954e-06,
      "loss": 0.023,
      "step": 727640
    },
    {
      "epoch": 1.1908315495244266,
      "grad_norm": 0.26250433921813965,
      "learning_rate": 7.668535809123436e-06,
      "loss": 0.017,
      "step": 727660
    },
    {
      "epoch": 1.19086427996308,
      "grad_norm": 0.5684080123901367,
      "learning_rate": 7.66846991690992e-06,
      "loss": 0.0151,
      "step": 727680
    },
    {
      "epoch": 1.1908970104017333,
      "grad_norm": 0.9903355240821838,
      "learning_rate": 7.668404024696401e-06,
      "loss": 0.0244,
      "step": 727700
    },
    {
      "epoch": 1.1909297408403867,
      "grad_norm": 0.9399787187576294,
      "learning_rate": 7.668338132482885e-06,
      "loss": 0.0194,
      "step": 727720
    },
    {
      "epoch": 1.19096247127904,
      "grad_norm": 1.3454482555389404,
      "learning_rate": 7.668272240269368e-06,
      "loss": 0.0234,
      "step": 727740
    },
    {
      "epoch": 1.1909952017176935,
      "grad_norm": 0.6434584856033325,
      "learning_rate": 7.66820634805585e-06,
      "loss": 0.0251,
      "step": 727760
    },
    {
      "epoch": 1.1910279321563468,
      "grad_norm": 0.25601956248283386,
      "learning_rate": 7.668140455842334e-06,
      "loss": 0.0221,
      "step": 727780
    },
    {
      "epoch": 1.191060662595,
      "grad_norm": 0.7926092743873596,
      "learning_rate": 7.668074563628817e-06,
      "loss": 0.0252,
      "step": 727800
    },
    {
      "epoch": 1.1910933930336534,
      "grad_norm": 0.30734097957611084,
      "learning_rate": 7.6680086714153e-06,
      "loss": 0.0286,
      "step": 727820
    },
    {
      "epoch": 1.1911261234723067,
      "grad_norm": 0.47887516021728516,
      "learning_rate": 7.667942779201783e-06,
      "loss": 0.0283,
      "step": 727840
    },
    {
      "epoch": 1.19115885391096,
      "grad_norm": 1.2881579399108887,
      "learning_rate": 7.667876886988265e-06,
      "loss": 0.0258,
      "step": 727860
    },
    {
      "epoch": 1.1911915843496135,
      "grad_norm": 0.6758875846862793,
      "learning_rate": 7.667810994774748e-06,
      "loss": 0.0219,
      "step": 727880
    },
    {
      "epoch": 1.1912243147882668,
      "grad_norm": 0.5969932675361633,
      "learning_rate": 7.667745102561232e-06,
      "loss": 0.0196,
      "step": 727900
    },
    {
      "epoch": 1.1912570452269202,
      "grad_norm": 6.549699306488037,
      "learning_rate": 7.667679210347714e-06,
      "loss": 0.0215,
      "step": 727920
    },
    {
      "epoch": 1.1912897756655734,
      "grad_norm": 0.2920222282409668,
      "learning_rate": 7.667613318134197e-06,
      "loss": 0.0195,
      "step": 727940
    },
    {
      "epoch": 1.1913225061042267,
      "grad_norm": 0.10276121646165848,
      "learning_rate": 7.66754742592068e-06,
      "loss": 0.0186,
      "step": 727960
    },
    {
      "epoch": 1.1913552365428801,
      "grad_norm": 0.581994354724884,
      "learning_rate": 7.667481533707163e-06,
      "loss": 0.0343,
      "step": 727980
    },
    {
      "epoch": 1.1913879669815335,
      "grad_norm": 0.2863190770149231,
      "learning_rate": 7.667415641493645e-06,
      "loss": 0.0198,
      "step": 728000
    },
    {
      "epoch": 1.1914206974201869,
      "grad_norm": 1.225091814994812,
      "learning_rate": 7.667349749280128e-06,
      "loss": 0.0339,
      "step": 728020
    },
    {
      "epoch": 1.1914534278588402,
      "grad_norm": 0.5391585230827332,
      "learning_rate": 7.66728385706661e-06,
      "loss": 0.0157,
      "step": 728040
    },
    {
      "epoch": 1.1914861582974936,
      "grad_norm": 0.4262717366218567,
      "learning_rate": 7.667217964853094e-06,
      "loss": 0.0268,
      "step": 728060
    },
    {
      "epoch": 1.1915188887361468,
      "grad_norm": 0.4903961420059204,
      "learning_rate": 7.667152072639576e-06,
      "loss": 0.018,
      "step": 728080
    },
    {
      "epoch": 1.1915516191748001,
      "grad_norm": 2.794124126434326,
      "learning_rate": 7.66708618042606e-06,
      "loss": 0.0302,
      "step": 728100
    },
    {
      "epoch": 1.1915843496134535,
      "grad_norm": 0.38335558772087097,
      "learning_rate": 7.667020288212543e-06,
      "loss": 0.0231,
      "step": 728120
    },
    {
      "epoch": 1.1916170800521069,
      "grad_norm": 0.903782308101654,
      "learning_rate": 7.666954395999025e-06,
      "loss": 0.0209,
      "step": 728140
    },
    {
      "epoch": 1.1916498104907602,
      "grad_norm": 0.6489594578742981,
      "learning_rate": 7.666888503785508e-06,
      "loss": 0.0238,
      "step": 728160
    },
    {
      "epoch": 1.1916825409294136,
      "grad_norm": 0.6000085473060608,
      "learning_rate": 7.666822611571992e-06,
      "loss": 0.0238,
      "step": 728180
    },
    {
      "epoch": 1.191715271368067,
      "grad_norm": 0.6817215085029602,
      "learning_rate": 7.666756719358474e-06,
      "loss": 0.0213,
      "step": 728200
    },
    {
      "epoch": 1.1917480018067201,
      "grad_norm": 0.3463202118873596,
      "learning_rate": 7.666690827144957e-06,
      "loss": 0.0242,
      "step": 728220
    },
    {
      "epoch": 1.1917807322453735,
      "grad_norm": 0.5004386901855469,
      "learning_rate": 7.666624934931441e-06,
      "loss": 0.0261,
      "step": 728240
    },
    {
      "epoch": 1.1918134626840269,
      "grad_norm": 0.4459195137023926,
      "learning_rate": 7.666559042717923e-06,
      "loss": 0.0185,
      "step": 728260
    },
    {
      "epoch": 1.1918461931226803,
      "grad_norm": 0.3456698954105377,
      "learning_rate": 7.666493150504407e-06,
      "loss": 0.0224,
      "step": 728280
    },
    {
      "epoch": 1.1918789235613336,
      "grad_norm": 0.12209057807922363,
      "learning_rate": 7.666427258290888e-06,
      "loss": 0.0181,
      "step": 728300
    },
    {
      "epoch": 1.191911653999987,
      "grad_norm": 0.328220933675766,
      "learning_rate": 7.666361366077372e-06,
      "loss": 0.0192,
      "step": 728320
    },
    {
      "epoch": 1.1919443844386404,
      "grad_norm": 0.9969894289970398,
      "learning_rate": 7.666295473863854e-06,
      "loss": 0.0152,
      "step": 728340
    },
    {
      "epoch": 1.1919771148772935,
      "grad_norm": 0.24447789788246155,
      "learning_rate": 7.666229581650338e-06,
      "loss": 0.0312,
      "step": 728360
    },
    {
      "epoch": 1.192009845315947,
      "grad_norm": 0.20131415128707886,
      "learning_rate": 7.66616368943682e-06,
      "loss": 0.0219,
      "step": 728380
    },
    {
      "epoch": 1.1920425757546003,
      "grad_norm": 1.1077626943588257,
      "learning_rate": 7.666097797223303e-06,
      "loss": 0.026,
      "step": 728400
    },
    {
      "epoch": 1.1920753061932536,
      "grad_norm": 1.1059728860855103,
      "learning_rate": 7.666031905009785e-06,
      "loss": 0.0264,
      "step": 728420
    },
    {
      "epoch": 1.192108036631907,
      "grad_norm": 0.7024582028388977,
      "learning_rate": 7.665966012796268e-06,
      "loss": 0.0252,
      "step": 728440
    },
    {
      "epoch": 1.1921407670705602,
      "grad_norm": 0.5671362280845642,
      "learning_rate": 7.66590012058275e-06,
      "loss": 0.0186,
      "step": 728460
    },
    {
      "epoch": 1.1921734975092135,
      "grad_norm": 0.8385332226753235,
      "learning_rate": 7.665834228369234e-06,
      "loss": 0.0317,
      "step": 728480
    },
    {
      "epoch": 1.192206227947867,
      "grad_norm": 0.4622727632522583,
      "learning_rate": 7.665768336155716e-06,
      "loss": 0.022,
      "step": 728500
    },
    {
      "epoch": 1.1922389583865203,
      "grad_norm": 0.7182961702346802,
      "learning_rate": 7.6657024439422e-06,
      "loss": 0.0278,
      "step": 728520
    },
    {
      "epoch": 1.1922716888251736,
      "grad_norm": 0.8332575559616089,
      "learning_rate": 7.665636551728683e-06,
      "loss": 0.0254,
      "step": 728540
    },
    {
      "epoch": 1.192304419263827,
      "grad_norm": 1.7061928510665894,
      "learning_rate": 7.665570659515167e-06,
      "loss": 0.0171,
      "step": 728560
    },
    {
      "epoch": 1.1923371497024804,
      "grad_norm": 0.19878916442394257,
      "learning_rate": 7.665504767301649e-06,
      "loss": 0.0262,
      "step": 728580
    },
    {
      "epoch": 1.1923698801411335,
      "grad_norm": 1.6402121782302856,
      "learning_rate": 7.665438875088132e-06,
      "loss": 0.0175,
      "step": 728600
    },
    {
      "epoch": 1.192402610579787,
      "grad_norm": 0.5384221076965332,
      "learning_rate": 7.665372982874616e-06,
      "loss": 0.0244,
      "step": 728620
    },
    {
      "epoch": 1.1924353410184403,
      "grad_norm": 0.6555342674255371,
      "learning_rate": 7.665307090661098e-06,
      "loss": 0.0292,
      "step": 728640
    },
    {
      "epoch": 1.1924680714570937,
      "grad_norm": 1.7712254524230957,
      "learning_rate": 7.665241198447581e-06,
      "loss": 0.0264,
      "step": 728660
    },
    {
      "epoch": 1.192500801895747,
      "grad_norm": 0.42792192101478577,
      "learning_rate": 7.665175306234063e-06,
      "loss": 0.0206,
      "step": 728680
    },
    {
      "epoch": 1.1925335323344004,
      "grad_norm": 1.2150259017944336,
      "learning_rate": 7.665109414020547e-06,
      "loss": 0.0184,
      "step": 728700
    },
    {
      "epoch": 1.1925662627730538,
      "grad_norm": 0.6738357543945312,
      "learning_rate": 7.665043521807029e-06,
      "loss": 0.017,
      "step": 728720
    },
    {
      "epoch": 1.192598993211707,
      "grad_norm": 0.4129979610443115,
      "learning_rate": 7.664977629593512e-06,
      "loss": 0.0186,
      "step": 728740
    },
    {
      "epoch": 1.1926317236503603,
      "grad_norm": 0.545975387096405,
      "learning_rate": 7.664911737379994e-06,
      "loss": 0.0229,
      "step": 728760
    },
    {
      "epoch": 1.1926644540890137,
      "grad_norm": 0.269523024559021,
      "learning_rate": 7.664845845166478e-06,
      "loss": 0.0164,
      "step": 728780
    },
    {
      "epoch": 1.192697184527667,
      "grad_norm": 0.8749433159828186,
      "learning_rate": 7.66477995295296e-06,
      "loss": 0.0257,
      "step": 728800
    },
    {
      "epoch": 1.1927299149663204,
      "grad_norm": 0.17443887889385223,
      "learning_rate": 7.664714060739443e-06,
      "loss": 0.0276,
      "step": 728820
    },
    {
      "epoch": 1.1927626454049738,
      "grad_norm": 0.781934916973114,
      "learning_rate": 7.664648168525925e-06,
      "loss": 0.0111,
      "step": 728840
    },
    {
      "epoch": 1.1927953758436272,
      "grad_norm": 0.851777195930481,
      "learning_rate": 7.664582276312409e-06,
      "loss": 0.0229,
      "step": 728860
    },
    {
      "epoch": 1.1928281062822803,
      "grad_norm": 0.2128124088048935,
      "learning_rate": 7.66451638409889e-06,
      "loss": 0.0256,
      "step": 728880
    },
    {
      "epoch": 1.1928608367209337,
      "grad_norm": 0.10647791624069214,
      "learning_rate": 7.664450491885374e-06,
      "loss": 0.019,
      "step": 728900
    },
    {
      "epoch": 1.192893567159587,
      "grad_norm": 0.20556029677391052,
      "learning_rate": 7.664384599671858e-06,
      "loss": 0.016,
      "step": 728920
    },
    {
      "epoch": 1.1929262975982404,
      "grad_norm": 0.3540596663951874,
      "learning_rate": 7.66431870745834e-06,
      "loss": 0.0221,
      "step": 728940
    },
    {
      "epoch": 1.1929590280368938,
      "grad_norm": 0.9427005648612976,
      "learning_rate": 7.664252815244823e-06,
      "loss": 0.023,
      "step": 728960
    },
    {
      "epoch": 1.1929917584755472,
      "grad_norm": 0.17083483934402466,
      "learning_rate": 7.664186923031307e-06,
      "loss": 0.0181,
      "step": 728980
    },
    {
      "epoch": 1.1930244889142005,
      "grad_norm": 0.2679765522480011,
      "learning_rate": 7.664121030817789e-06,
      "loss": 0.0215,
      "step": 729000
    },
    {
      "epoch": 1.1930572193528537,
      "grad_norm": 2.005552291870117,
      "learning_rate": 7.664055138604272e-06,
      "loss": 0.0344,
      "step": 729020
    },
    {
      "epoch": 1.193089949791507,
      "grad_norm": 0.40153032541275024,
      "learning_rate": 7.663989246390756e-06,
      "loss": 0.0217,
      "step": 729040
    },
    {
      "epoch": 1.1931226802301604,
      "grad_norm": 0.522961437702179,
      "learning_rate": 7.663923354177238e-06,
      "loss": 0.0223,
      "step": 729060
    },
    {
      "epoch": 1.1931554106688138,
      "grad_norm": 0.2431139200925827,
      "learning_rate": 7.663857461963721e-06,
      "loss": 0.0193,
      "step": 729080
    },
    {
      "epoch": 1.1931881411074672,
      "grad_norm": 0.1779630333185196,
      "learning_rate": 7.663791569750203e-06,
      "loss": 0.0246,
      "step": 729100
    },
    {
      "epoch": 1.1932208715461206,
      "grad_norm": 0.8881561160087585,
      "learning_rate": 7.663725677536687e-06,
      "loss": 0.0276,
      "step": 729120
    },
    {
      "epoch": 1.1932536019847737,
      "grad_norm": 0.26931777596473694,
      "learning_rate": 7.663659785323169e-06,
      "loss": 0.0177,
      "step": 729140
    },
    {
      "epoch": 1.193286332423427,
      "grad_norm": 0.4650789499282837,
      "learning_rate": 7.663593893109652e-06,
      "loss": 0.0176,
      "step": 729160
    },
    {
      "epoch": 1.1933190628620804,
      "grad_norm": 0.49904075264930725,
      "learning_rate": 7.663528000896134e-06,
      "loss": 0.0347,
      "step": 729180
    },
    {
      "epoch": 1.1933517933007338,
      "grad_norm": 1.0913758277893066,
      "learning_rate": 7.663462108682618e-06,
      "loss": 0.0274,
      "step": 729200
    },
    {
      "epoch": 1.1933845237393872,
      "grad_norm": 0.6682473421096802,
      "learning_rate": 7.6633962164691e-06,
      "loss": 0.0277,
      "step": 729220
    },
    {
      "epoch": 1.1934172541780406,
      "grad_norm": 0.9147220849990845,
      "learning_rate": 7.663330324255583e-06,
      "loss": 0.0149,
      "step": 729240
    },
    {
      "epoch": 1.1934499846166937,
      "grad_norm": 1.1160390377044678,
      "learning_rate": 7.663264432042065e-06,
      "loss": 0.0314,
      "step": 729260
    },
    {
      "epoch": 1.193482715055347,
      "grad_norm": 0.35476359724998474,
      "learning_rate": 7.663198539828549e-06,
      "loss": 0.0231,
      "step": 729280
    },
    {
      "epoch": 1.1935154454940005,
      "grad_norm": 0.7434825301170349,
      "learning_rate": 7.663132647615032e-06,
      "loss": 0.0211,
      "step": 729300
    },
    {
      "epoch": 1.1935481759326538,
      "grad_norm": Infinity,
      "learning_rate": 7.663066755401514e-06,
      "loss": 0.0294,
      "step": 729320
    },
    {
      "epoch": 1.1935809063713072,
      "grad_norm": 1.493952751159668,
      "learning_rate": 7.663000863187998e-06,
      "loss": 0.0227,
      "step": 729340
    },
    {
      "epoch": 1.1936136368099606,
      "grad_norm": 1.2097786664962769,
      "learning_rate": 7.662934970974481e-06,
      "loss": 0.0261,
      "step": 729360
    },
    {
      "epoch": 1.193646367248614,
      "grad_norm": 0.8026698231697083,
      "learning_rate": 7.662869078760963e-06,
      "loss": 0.0201,
      "step": 729380
    },
    {
      "epoch": 1.193679097687267,
      "grad_norm": 1.175178050994873,
      "learning_rate": 7.662803186547447e-06,
      "loss": 0.0226,
      "step": 729400
    },
    {
      "epoch": 1.1937118281259205,
      "grad_norm": 0.5682284832000732,
      "learning_rate": 7.66273729433393e-06,
      "loss": 0.0216,
      "step": 729420
    },
    {
      "epoch": 1.1937445585645738,
      "grad_norm": 0.867167592048645,
      "learning_rate": 7.662671402120412e-06,
      "loss": 0.026,
      "step": 729440
    },
    {
      "epoch": 1.1937772890032272,
      "grad_norm": 2.2103664875030518,
      "learning_rate": 7.662605509906896e-06,
      "loss": 0.0229,
      "step": 729460
    },
    {
      "epoch": 1.1938100194418806,
      "grad_norm": 0.7831118106842041,
      "learning_rate": 7.662539617693378e-06,
      "loss": 0.0196,
      "step": 729480
    },
    {
      "epoch": 1.193842749880534,
      "grad_norm": 1.566787838935852,
      "learning_rate": 7.662473725479861e-06,
      "loss": 0.0206,
      "step": 729500
    },
    {
      "epoch": 1.1938754803191873,
      "grad_norm": 0.4737285077571869,
      "learning_rate": 7.662407833266343e-06,
      "loss": 0.017,
      "step": 729520
    },
    {
      "epoch": 1.1939082107578405,
      "grad_norm": 0.09379356354475021,
      "learning_rate": 7.662341941052827e-06,
      "loss": 0.0265,
      "step": 729540
    },
    {
      "epoch": 1.1939409411964939,
      "grad_norm": 0.34375229477882385,
      "learning_rate": 7.662276048839309e-06,
      "loss": 0.0186,
      "step": 729560
    },
    {
      "epoch": 1.1939736716351472,
      "grad_norm": 0.2785479426383972,
      "learning_rate": 7.662210156625792e-06,
      "loss": 0.0193,
      "step": 729580
    },
    {
      "epoch": 1.1940064020738006,
      "grad_norm": 0.7217077016830444,
      "learning_rate": 7.662144264412274e-06,
      "loss": 0.0153,
      "step": 729600
    },
    {
      "epoch": 1.194039132512454,
      "grad_norm": 0.2846435308456421,
      "learning_rate": 7.662078372198758e-06,
      "loss": 0.0218,
      "step": 729620
    },
    {
      "epoch": 1.1940718629511073,
      "grad_norm": 1.447960376739502,
      "learning_rate": 7.66201247998524e-06,
      "loss": 0.0259,
      "step": 729640
    },
    {
      "epoch": 1.1941045933897607,
      "grad_norm": 0.45598483085632324,
      "learning_rate": 7.661946587771723e-06,
      "loss": 0.0169,
      "step": 729660
    },
    {
      "epoch": 1.1941373238284139,
      "grad_norm": 1.1426362991333008,
      "learning_rate": 7.661880695558207e-06,
      "loss": 0.0158,
      "step": 729680
    },
    {
      "epoch": 1.1941700542670672,
      "grad_norm": 0.5429489612579346,
      "learning_rate": 7.661814803344689e-06,
      "loss": 0.0222,
      "step": 729700
    },
    {
      "epoch": 1.1942027847057206,
      "grad_norm": 1.2607040405273438,
      "learning_rate": 7.661748911131172e-06,
      "loss": 0.027,
      "step": 729720
    },
    {
      "epoch": 1.194235515144374,
      "grad_norm": 0.638590931892395,
      "learning_rate": 7.661683018917654e-06,
      "loss": 0.0264,
      "step": 729740
    },
    {
      "epoch": 1.1942682455830274,
      "grad_norm": 0.7417761087417603,
      "learning_rate": 7.661617126704138e-06,
      "loss": 0.0249,
      "step": 729760
    },
    {
      "epoch": 1.1943009760216807,
      "grad_norm": 0.7997309565544128,
      "learning_rate": 7.661551234490621e-06,
      "loss": 0.0179,
      "step": 729780
    },
    {
      "epoch": 1.194333706460334,
      "grad_norm": 2.072211265563965,
      "learning_rate": 7.661485342277103e-06,
      "loss": 0.0236,
      "step": 729800
    },
    {
      "epoch": 1.1943664368989872,
      "grad_norm": 3.253929615020752,
      "learning_rate": 7.661419450063587e-06,
      "loss": 0.0243,
      "step": 729820
    },
    {
      "epoch": 1.1943991673376406,
      "grad_norm": 0.32519567012786865,
      "learning_rate": 7.66135355785007e-06,
      "loss": 0.0145,
      "step": 729840
    },
    {
      "epoch": 1.194431897776294,
      "grad_norm": 0.5290818214416504,
      "learning_rate": 7.661287665636552e-06,
      "loss": 0.0165,
      "step": 729860
    },
    {
      "epoch": 1.1944646282149474,
      "grad_norm": 0.40274447202682495,
      "learning_rate": 7.661221773423036e-06,
      "loss": 0.016,
      "step": 729880
    },
    {
      "epoch": 1.1944973586536007,
      "grad_norm": 0.8209146857261658,
      "learning_rate": 7.661155881209518e-06,
      "loss": 0.0137,
      "step": 729900
    },
    {
      "epoch": 1.194530089092254,
      "grad_norm": 2.144724130630493,
      "learning_rate": 7.661089988996001e-06,
      "loss": 0.0253,
      "step": 729920
    },
    {
      "epoch": 1.1945628195309073,
      "grad_norm": 0.08155176043510437,
      "learning_rate": 7.661024096782483e-06,
      "loss": 0.0179,
      "step": 729940
    },
    {
      "epoch": 1.1945955499695606,
      "grad_norm": 1.191607117652893,
      "learning_rate": 7.660958204568967e-06,
      "loss": 0.0301,
      "step": 729960
    },
    {
      "epoch": 1.194628280408214,
      "grad_norm": 0.6096575260162354,
      "learning_rate": 7.660892312355449e-06,
      "loss": 0.0282,
      "step": 729980
    },
    {
      "epoch": 1.1946610108468674,
      "grad_norm": 1.0999319553375244,
      "learning_rate": 7.660826420141932e-06,
      "loss": 0.0213,
      "step": 730000
    },
    {
      "epoch": 1.1946937412855207,
      "grad_norm": 0.21224161982536316,
      "learning_rate": 7.660760527928416e-06,
      "loss": 0.0138,
      "step": 730020
    },
    {
      "epoch": 1.1947264717241741,
      "grad_norm": 0.341487318277359,
      "learning_rate": 7.660694635714898e-06,
      "loss": 0.0202,
      "step": 730040
    },
    {
      "epoch": 1.1947592021628273,
      "grad_norm": 0.179085373878479,
      "learning_rate": 7.660628743501381e-06,
      "loss": 0.0242,
      "step": 730060
    },
    {
      "epoch": 1.1947919326014806,
      "grad_norm": 0.6337752342224121,
      "learning_rate": 7.660562851287863e-06,
      "loss": 0.0245,
      "step": 730080
    },
    {
      "epoch": 1.194824663040134,
      "grad_norm": 1.5120563507080078,
      "learning_rate": 7.660496959074347e-06,
      "loss": 0.0268,
      "step": 730100
    },
    {
      "epoch": 1.1948573934787874,
      "grad_norm": 0.3866117596626282,
      "learning_rate": 7.660431066860829e-06,
      "loss": 0.0226,
      "step": 730120
    },
    {
      "epoch": 1.1948901239174408,
      "grad_norm": 0.7148272395133972,
      "learning_rate": 7.660365174647312e-06,
      "loss": 0.0254,
      "step": 730140
    },
    {
      "epoch": 1.1949228543560941,
      "grad_norm": 0.7128013968467712,
      "learning_rate": 7.660299282433796e-06,
      "loss": 0.0193,
      "step": 730160
    },
    {
      "epoch": 1.1949555847947475,
      "grad_norm": 0.7663171887397766,
      "learning_rate": 7.660233390220278e-06,
      "loss": 0.0225,
      "step": 730180
    },
    {
      "epoch": 1.1949883152334007,
      "grad_norm": 1.423540472984314,
      "learning_rate": 7.660167498006761e-06,
      "loss": 0.016,
      "step": 730200
    },
    {
      "epoch": 1.195021045672054,
      "grad_norm": 1.314229965209961,
      "learning_rate": 7.660101605793245e-06,
      "loss": 0.0269,
      "step": 730220
    },
    {
      "epoch": 1.1950537761107074,
      "grad_norm": 3.130929470062256,
      "learning_rate": 7.660035713579727e-06,
      "loss": 0.0237,
      "step": 730240
    },
    {
      "epoch": 1.1950865065493608,
      "grad_norm": 0.32238173484802246,
      "learning_rate": 7.65996982136621e-06,
      "loss": 0.0323,
      "step": 730260
    },
    {
      "epoch": 1.1951192369880141,
      "grad_norm": 0.20233187079429626,
      "learning_rate": 7.659903929152692e-06,
      "loss": 0.0158,
      "step": 730280
    },
    {
      "epoch": 1.1951519674266675,
      "grad_norm": 0.37195369601249695,
      "learning_rate": 7.659838036939176e-06,
      "loss": 0.016,
      "step": 730300
    },
    {
      "epoch": 1.1951846978653209,
      "grad_norm": 2.413379430770874,
      "learning_rate": 7.659772144725658e-06,
      "loss": 0.0168,
      "step": 730320
    },
    {
      "epoch": 1.195217428303974,
      "grad_norm": 0.3753622770309448,
      "learning_rate": 7.659706252512141e-06,
      "loss": 0.0212,
      "step": 730340
    },
    {
      "epoch": 1.1952501587426274,
      "grad_norm": 0.2687033712863922,
      "learning_rate": 7.659640360298625e-06,
      "loss": 0.011,
      "step": 730360
    },
    {
      "epoch": 1.1952828891812808,
      "grad_norm": 0.3402523100376129,
      "learning_rate": 7.659574468085107e-06,
      "loss": 0.0163,
      "step": 730380
    },
    {
      "epoch": 1.1953156196199342,
      "grad_norm": 1.9129632711410522,
      "learning_rate": 7.65950857587159e-06,
      "loss": 0.0211,
      "step": 730400
    },
    {
      "epoch": 1.1953483500585875,
      "grad_norm": 0.9616839289665222,
      "learning_rate": 7.659442683658072e-06,
      "loss": 0.0237,
      "step": 730420
    },
    {
      "epoch": 1.195381080497241,
      "grad_norm": 0.5431692004203796,
      "learning_rate": 7.659376791444556e-06,
      "loss": 0.0157,
      "step": 730440
    },
    {
      "epoch": 1.1954138109358943,
      "grad_norm": 1.866458535194397,
      "learning_rate": 7.659310899231038e-06,
      "loss": 0.0219,
      "step": 730460
    },
    {
      "epoch": 1.1954465413745474,
      "grad_norm": 1.0612726211547852,
      "learning_rate": 7.659245007017521e-06,
      "loss": 0.0306,
      "step": 730480
    },
    {
      "epoch": 1.1954792718132008,
      "grad_norm": 0.7204881310462952,
      "learning_rate": 7.659179114804003e-06,
      "loss": 0.027,
      "step": 730500
    },
    {
      "epoch": 1.1955120022518542,
      "grad_norm": 0.36887145042419434,
      "learning_rate": 7.659113222590487e-06,
      "loss": 0.0245,
      "step": 730520
    },
    {
      "epoch": 1.1955447326905075,
      "grad_norm": 0.3199428915977478,
      "learning_rate": 7.659047330376969e-06,
      "loss": 0.0235,
      "step": 730540
    },
    {
      "epoch": 1.195577463129161,
      "grad_norm": 1.2918484210968018,
      "learning_rate": 7.658981438163452e-06,
      "loss": 0.0253,
      "step": 730560
    },
    {
      "epoch": 1.1956101935678143,
      "grad_norm": 1.4936811923980713,
      "learning_rate": 7.658915545949936e-06,
      "loss": 0.0198,
      "step": 730580
    },
    {
      "epoch": 1.1956429240064677,
      "grad_norm": 0.305594265460968,
      "learning_rate": 7.658849653736418e-06,
      "loss": 0.0258,
      "step": 730600
    },
    {
      "epoch": 1.1956756544451208,
      "grad_norm": 0.3480389416217804,
      "learning_rate": 7.658783761522902e-06,
      "loss": 0.0183,
      "step": 730620
    },
    {
      "epoch": 1.1957083848837742,
      "grad_norm": 2.164519786834717,
      "learning_rate": 7.658717869309385e-06,
      "loss": 0.0303,
      "step": 730640
    },
    {
      "epoch": 1.1957411153224276,
      "grad_norm": 0.2787054479122162,
      "learning_rate": 7.658651977095867e-06,
      "loss": 0.0196,
      "step": 730660
    },
    {
      "epoch": 1.195773845761081,
      "grad_norm": 0.13489684462547302,
      "learning_rate": 7.65858608488235e-06,
      "loss": 0.0206,
      "step": 730680
    },
    {
      "epoch": 1.1958065761997343,
      "grad_norm": 0.15496079623699188,
      "learning_rate": 7.658520192668832e-06,
      "loss": 0.0208,
      "step": 730700
    },
    {
      "epoch": 1.1958393066383874,
      "grad_norm": 0.3286730647087097,
      "learning_rate": 7.658454300455316e-06,
      "loss": 0.0251,
      "step": 730720
    },
    {
      "epoch": 1.1958720370770408,
      "grad_norm": 1.068389654159546,
      "learning_rate": 7.6583884082418e-06,
      "loss": 0.0212,
      "step": 730740
    },
    {
      "epoch": 1.1959047675156942,
      "grad_norm": 1.0724130868911743,
      "learning_rate": 7.658322516028282e-06,
      "loss": 0.0193,
      "step": 730760
    },
    {
      "epoch": 1.1959374979543476,
      "grad_norm": 1.2877029180526733,
      "learning_rate": 7.658256623814765e-06,
      "loss": 0.0274,
      "step": 730780
    },
    {
      "epoch": 1.195970228393001,
      "grad_norm": 0.5416300892829895,
      "learning_rate": 7.658190731601247e-06,
      "loss": 0.0199,
      "step": 730800
    },
    {
      "epoch": 1.1960029588316543,
      "grad_norm": 1.508095145225525,
      "learning_rate": 7.65812483938773e-06,
      "loss": 0.0301,
      "step": 730820
    },
    {
      "epoch": 1.1960356892703077,
      "grad_norm": 0.6025826334953308,
      "learning_rate": 7.658058947174212e-06,
      "loss": 0.0171,
      "step": 730840
    },
    {
      "epoch": 1.1960684197089608,
      "grad_norm": 0.7919091582298279,
      "learning_rate": 7.657993054960696e-06,
      "loss": 0.0297,
      "step": 730860
    },
    {
      "epoch": 1.1961011501476142,
      "grad_norm": 0.7404493093490601,
      "learning_rate": 7.657927162747178e-06,
      "loss": 0.025,
      "step": 730880
    },
    {
      "epoch": 1.1961338805862676,
      "grad_norm": 0.7253410220146179,
      "learning_rate": 7.657861270533662e-06,
      "loss": 0.0205,
      "step": 730900
    },
    {
      "epoch": 1.196166611024921,
      "grad_norm": 0.3408670723438263,
      "learning_rate": 7.657795378320143e-06,
      "loss": 0.0171,
      "step": 730920
    },
    {
      "epoch": 1.1961993414635743,
      "grad_norm": 0.4229743778705597,
      "learning_rate": 7.657729486106627e-06,
      "loss": 0.016,
      "step": 730940
    },
    {
      "epoch": 1.1962320719022277,
      "grad_norm": 0.24056878685951233,
      "learning_rate": 7.65766359389311e-06,
      "loss": 0.0235,
      "step": 730960
    },
    {
      "epoch": 1.196264802340881,
      "grad_norm": 1.2636522054672241,
      "learning_rate": 7.657597701679593e-06,
      "loss": 0.031,
      "step": 730980
    },
    {
      "epoch": 1.1962975327795342,
      "grad_norm": 0.34770724177360535,
      "learning_rate": 7.657531809466076e-06,
      "loss": 0.0223,
      "step": 731000
    },
    {
      "epoch": 1.1963302632181876,
      "grad_norm": 0.7054165005683899,
      "learning_rate": 7.65746591725256e-06,
      "loss": 0.0157,
      "step": 731020
    },
    {
      "epoch": 1.196362993656841,
      "grad_norm": 0.7476716041564941,
      "learning_rate": 7.657400025039042e-06,
      "loss": 0.0227,
      "step": 731040
    },
    {
      "epoch": 1.1963957240954943,
      "grad_norm": 0.21711507439613342,
      "learning_rate": 7.657334132825525e-06,
      "loss": 0.0202,
      "step": 731060
    },
    {
      "epoch": 1.1964284545341477,
      "grad_norm": 0.6795044541358948,
      "learning_rate": 7.657268240612009e-06,
      "loss": 0.0179,
      "step": 731080
    },
    {
      "epoch": 1.196461184972801,
      "grad_norm": 0.2720761001110077,
      "learning_rate": 7.65720234839849e-06,
      "loss": 0.0221,
      "step": 731100
    },
    {
      "epoch": 1.1964939154114544,
      "grad_norm": 0.6709686517715454,
      "learning_rate": 7.657136456184974e-06,
      "loss": 0.0277,
      "step": 731120
    },
    {
      "epoch": 1.1965266458501076,
      "grad_norm": 0.8424228429794312,
      "learning_rate": 7.657070563971456e-06,
      "loss": 0.0241,
      "step": 731140
    },
    {
      "epoch": 1.196559376288761,
      "grad_norm": 0.6449299454689026,
      "learning_rate": 7.65700467175794e-06,
      "loss": 0.0223,
      "step": 731160
    },
    {
      "epoch": 1.1965921067274143,
      "grad_norm": 0.4428919553756714,
      "learning_rate": 7.656938779544422e-06,
      "loss": 0.0283,
      "step": 731180
    },
    {
      "epoch": 1.1966248371660677,
      "grad_norm": 0.5129695534706116,
      "learning_rate": 7.656872887330905e-06,
      "loss": 0.0179,
      "step": 731200
    },
    {
      "epoch": 1.196657567604721,
      "grad_norm": 0.23900273442268372,
      "learning_rate": 7.656806995117387e-06,
      "loss": 0.0194,
      "step": 731220
    },
    {
      "epoch": 1.1966902980433745,
      "grad_norm": 0.8648819327354431,
      "learning_rate": 7.65674110290387e-06,
      "loss": 0.0273,
      "step": 731240
    },
    {
      "epoch": 1.1967230284820278,
      "grad_norm": 0.5687884092330933,
      "learning_rate": 7.656675210690353e-06,
      "loss": 0.0203,
      "step": 731260
    },
    {
      "epoch": 1.196755758920681,
      "grad_norm": 0.7851527333259583,
      "learning_rate": 7.656609318476836e-06,
      "loss": 0.0153,
      "step": 731280
    },
    {
      "epoch": 1.1967884893593344,
      "grad_norm": 0.9920935034751892,
      "learning_rate": 7.656543426263318e-06,
      "loss": 0.0203,
      "step": 731300
    },
    {
      "epoch": 1.1968212197979877,
      "grad_norm": 0.7055017948150635,
      "learning_rate": 7.656477534049802e-06,
      "loss": 0.0345,
      "step": 731320
    },
    {
      "epoch": 1.196853950236641,
      "grad_norm": 2.0632762908935547,
      "learning_rate": 7.656411641836285e-06,
      "loss": 0.019,
      "step": 731340
    },
    {
      "epoch": 1.1968866806752945,
      "grad_norm": 0.4109977185726166,
      "learning_rate": 7.656345749622767e-06,
      "loss": 0.0201,
      "step": 731360
    },
    {
      "epoch": 1.1969194111139478,
      "grad_norm": 1.8880772590637207,
      "learning_rate": 7.65627985740925e-06,
      "loss": 0.0305,
      "step": 731380
    },
    {
      "epoch": 1.1969521415526012,
      "grad_norm": 0.45206648111343384,
      "learning_rate": 7.656213965195734e-06,
      "loss": 0.0263,
      "step": 731400
    },
    {
      "epoch": 1.1969848719912544,
      "grad_norm": 0.313060998916626,
      "learning_rate": 7.656148072982216e-06,
      "loss": 0.0241,
      "step": 731420
    },
    {
      "epoch": 1.1970176024299077,
      "grad_norm": 0.8905161023139954,
      "learning_rate": 7.6560821807687e-06,
      "loss": 0.0317,
      "step": 731440
    },
    {
      "epoch": 1.197050332868561,
      "grad_norm": 0.9094557166099548,
      "learning_rate": 7.656016288555183e-06,
      "loss": 0.0137,
      "step": 731460
    },
    {
      "epoch": 1.1970830633072145,
      "grad_norm": 0.8294441103935242,
      "learning_rate": 7.655950396341665e-06,
      "loss": 0.024,
      "step": 731480
    },
    {
      "epoch": 1.1971157937458679,
      "grad_norm": 0.2926628589630127,
      "learning_rate": 7.655884504128149e-06,
      "loss": 0.0256,
      "step": 731500
    },
    {
      "epoch": 1.197148524184521,
      "grad_norm": 0.5707224607467651,
      "learning_rate": 7.65581861191463e-06,
      "loss": 0.0391,
      "step": 731520
    },
    {
      "epoch": 1.1971812546231744,
      "grad_norm": 2.433547258377075,
      "learning_rate": 7.655752719701114e-06,
      "loss": 0.0256,
      "step": 731540
    },
    {
      "epoch": 1.1972139850618277,
      "grad_norm": 0.8424291610717773,
      "learning_rate": 7.655686827487596e-06,
      "loss": 0.0233,
      "step": 731560
    },
    {
      "epoch": 1.1972467155004811,
      "grad_norm": 0.5276050567626953,
      "learning_rate": 7.65562093527408e-06,
      "loss": 0.0219,
      "step": 731580
    },
    {
      "epoch": 1.1972794459391345,
      "grad_norm": 0.658160924911499,
      "learning_rate": 7.655555043060562e-06,
      "loss": 0.0258,
      "step": 731600
    },
    {
      "epoch": 1.1973121763777879,
      "grad_norm": 0.5532320737838745,
      "learning_rate": 7.655489150847045e-06,
      "loss": 0.0197,
      "step": 731620
    },
    {
      "epoch": 1.1973449068164412,
      "grad_norm": 0.4387299716472626,
      "learning_rate": 7.655423258633527e-06,
      "loss": 0.0214,
      "step": 731640
    },
    {
      "epoch": 1.1973776372550944,
      "grad_norm": 0.2972674071788788,
      "learning_rate": 7.65535736642001e-06,
      "loss": 0.0173,
      "step": 731660
    },
    {
      "epoch": 1.1974103676937478,
      "grad_norm": 0.40411096811294556,
      "learning_rate": 7.655291474206493e-06,
      "loss": 0.0195,
      "step": 731680
    },
    {
      "epoch": 1.1974430981324011,
      "grad_norm": 0.35374322533607483,
      "learning_rate": 7.655225581992976e-06,
      "loss": 0.0232,
      "step": 731700
    },
    {
      "epoch": 1.1974758285710545,
      "grad_norm": 0.3055551052093506,
      "learning_rate": 7.655159689779458e-06,
      "loss": 0.019,
      "step": 731720
    },
    {
      "epoch": 1.1975085590097079,
      "grad_norm": 0.4287179708480835,
      "learning_rate": 7.655093797565942e-06,
      "loss": 0.0181,
      "step": 731740
    },
    {
      "epoch": 1.1975412894483612,
      "grad_norm": 1.2959650754928589,
      "learning_rate": 7.655027905352425e-06,
      "loss": 0.0188,
      "step": 731760
    },
    {
      "epoch": 1.1975740198870146,
      "grad_norm": 1.244136929512024,
      "learning_rate": 7.654962013138907e-06,
      "loss": 0.0202,
      "step": 731780
    },
    {
      "epoch": 1.1976067503256678,
      "grad_norm": 0.6819888949394226,
      "learning_rate": 7.65489612092539e-06,
      "loss": 0.0243,
      "step": 731800
    },
    {
      "epoch": 1.1976394807643211,
      "grad_norm": 0.23078668117523193,
      "learning_rate": 7.654830228711874e-06,
      "loss": 0.0245,
      "step": 731820
    },
    {
      "epoch": 1.1976722112029745,
      "grad_norm": 0.8810296058654785,
      "learning_rate": 7.654764336498356e-06,
      "loss": 0.0225,
      "step": 731840
    },
    {
      "epoch": 1.1977049416416279,
      "grad_norm": 0.39513280987739563,
      "learning_rate": 7.65469844428484e-06,
      "loss": 0.0197,
      "step": 731860
    },
    {
      "epoch": 1.1977376720802813,
      "grad_norm": 0.3116680979728699,
      "learning_rate": 7.654632552071323e-06,
      "loss": 0.023,
      "step": 731880
    },
    {
      "epoch": 1.1977704025189346,
      "grad_norm": 0.22821436822414398,
      "learning_rate": 7.654566659857805e-06,
      "loss": 0.012,
      "step": 731900
    },
    {
      "epoch": 1.197803132957588,
      "grad_norm": 0.5951622724533081,
      "learning_rate": 7.654500767644289e-06,
      "loss": 0.0273,
      "step": 731920
    },
    {
      "epoch": 1.1978358633962412,
      "grad_norm": 1.2541714906692505,
      "learning_rate": 7.65443487543077e-06,
      "loss": 0.0347,
      "step": 731940
    },
    {
      "epoch": 1.1978685938348945,
      "grad_norm": 1.450567364692688,
      "learning_rate": 7.654368983217254e-06,
      "loss": 0.0236,
      "step": 731960
    },
    {
      "epoch": 1.197901324273548,
      "grad_norm": 1.043689489364624,
      "learning_rate": 7.654303091003736e-06,
      "loss": 0.0217,
      "step": 731980
    },
    {
      "epoch": 1.1979340547122013,
      "grad_norm": 0.43848106265068054,
      "learning_rate": 7.65423719879022e-06,
      "loss": 0.029,
      "step": 732000
    },
    {
      "epoch": 1.1979667851508546,
      "grad_norm": 1.1679853200912476,
      "learning_rate": 7.654171306576702e-06,
      "loss": 0.0111,
      "step": 732020
    },
    {
      "epoch": 1.197999515589508,
      "grad_norm": 0.7931845784187317,
      "learning_rate": 7.654105414363185e-06,
      "loss": 0.025,
      "step": 732040
    },
    {
      "epoch": 1.1980322460281614,
      "grad_norm": 1.0868382453918457,
      "learning_rate": 7.654039522149667e-06,
      "loss": 0.0215,
      "step": 732060
    },
    {
      "epoch": 1.1980649764668145,
      "grad_norm": 0.6947498321533203,
      "learning_rate": 7.65397362993615e-06,
      "loss": 0.0178,
      "step": 732080
    },
    {
      "epoch": 1.198097706905468,
      "grad_norm": 0.5296369791030884,
      "learning_rate": 7.653907737722633e-06,
      "loss": 0.0252,
      "step": 732100
    },
    {
      "epoch": 1.1981304373441213,
      "grad_norm": 0.5911446213722229,
      "learning_rate": 7.653841845509116e-06,
      "loss": 0.0201,
      "step": 732120
    },
    {
      "epoch": 1.1981631677827747,
      "grad_norm": 0.43287941813468933,
      "learning_rate": 7.6537759532956e-06,
      "loss": 0.0223,
      "step": 732140
    },
    {
      "epoch": 1.198195898221428,
      "grad_norm": 0.26798295974731445,
      "learning_rate": 7.653710061082082e-06,
      "loss": 0.0302,
      "step": 732160
    },
    {
      "epoch": 1.1982286286600814,
      "grad_norm": 1.2026902437210083,
      "learning_rate": 7.653644168868565e-06,
      "loss": 0.0211,
      "step": 732180
    },
    {
      "epoch": 1.1982613590987345,
      "grad_norm": 1.4236259460449219,
      "learning_rate": 7.653578276655049e-06,
      "loss": 0.0325,
      "step": 732200
    },
    {
      "epoch": 1.198294089537388,
      "grad_norm": 0.6481184363365173,
      "learning_rate": 7.653512384441531e-06,
      "loss": 0.029,
      "step": 732220
    },
    {
      "epoch": 1.1983268199760413,
      "grad_norm": 0.14547909796237946,
      "learning_rate": 7.653446492228014e-06,
      "loss": 0.0213,
      "step": 732240
    },
    {
      "epoch": 1.1983595504146947,
      "grad_norm": 0.39838284254074097,
      "learning_rate": 7.653380600014498e-06,
      "loss": 0.014,
      "step": 732260
    },
    {
      "epoch": 1.198392280853348,
      "grad_norm": 0.5022181868553162,
      "learning_rate": 7.65331470780098e-06,
      "loss": 0.0277,
      "step": 732280
    },
    {
      "epoch": 1.1984250112920014,
      "grad_norm": 0.7369717955589294,
      "learning_rate": 7.653248815587464e-06,
      "loss": 0.0208,
      "step": 732300
    },
    {
      "epoch": 1.1984577417306546,
      "grad_norm": 0.8303192257881165,
      "learning_rate": 7.653182923373945e-06,
      "loss": 0.0239,
      "step": 732320
    },
    {
      "epoch": 1.198490472169308,
      "grad_norm": 0.22130247950553894,
      "learning_rate": 7.653117031160429e-06,
      "loss": 0.0215,
      "step": 732340
    },
    {
      "epoch": 1.1985232026079613,
      "grad_norm": 1.8172351121902466,
      "learning_rate": 7.653051138946911e-06,
      "loss": 0.0349,
      "step": 732360
    },
    {
      "epoch": 1.1985559330466147,
      "grad_norm": 0.6628891229629517,
      "learning_rate": 7.652985246733394e-06,
      "loss": 0.0252,
      "step": 732380
    },
    {
      "epoch": 1.198588663485268,
      "grad_norm": 0.6646010279655457,
      "learning_rate": 7.652919354519876e-06,
      "loss": 0.0234,
      "step": 732400
    },
    {
      "epoch": 1.1986213939239214,
      "grad_norm": 0.6893020272254944,
      "learning_rate": 7.65285346230636e-06,
      "loss": 0.032,
      "step": 732420
    },
    {
      "epoch": 1.1986541243625748,
      "grad_norm": 0.6315818428993225,
      "learning_rate": 7.652787570092842e-06,
      "loss": 0.0293,
      "step": 732440
    },
    {
      "epoch": 1.198686854801228,
      "grad_norm": 0.5533312559127808,
      "learning_rate": 7.652721677879325e-06,
      "loss": 0.0347,
      "step": 732460
    },
    {
      "epoch": 1.1987195852398813,
      "grad_norm": 1.3265029191970825,
      "learning_rate": 7.652655785665809e-06,
      "loss": 0.0226,
      "step": 732480
    },
    {
      "epoch": 1.1987523156785347,
      "grad_norm": 0.5412501096725464,
      "learning_rate": 7.652589893452291e-06,
      "loss": 0.0318,
      "step": 732500
    },
    {
      "epoch": 1.198785046117188,
      "grad_norm": 0.9155100584030151,
      "learning_rate": 7.652524001238774e-06,
      "loss": 0.0175,
      "step": 732520
    },
    {
      "epoch": 1.1988177765558414,
      "grad_norm": 1.10561203956604,
      "learning_rate": 7.652458109025256e-06,
      "loss": 0.0303,
      "step": 732540
    },
    {
      "epoch": 1.1988505069944948,
      "grad_norm": 0.7485396265983582,
      "learning_rate": 7.65239221681174e-06,
      "loss": 0.0249,
      "step": 732560
    },
    {
      "epoch": 1.1988832374331482,
      "grad_norm": 0.8010561466217041,
      "learning_rate": 7.652326324598222e-06,
      "loss": 0.0284,
      "step": 732580
    },
    {
      "epoch": 1.1989159678718013,
      "grad_norm": 0.5949956774711609,
      "learning_rate": 7.652260432384705e-06,
      "loss": 0.0206,
      "step": 732600
    },
    {
      "epoch": 1.1989486983104547,
      "grad_norm": 1.4099191427230835,
      "learning_rate": 7.652194540171189e-06,
      "loss": 0.0276,
      "step": 732620
    },
    {
      "epoch": 1.198981428749108,
      "grad_norm": 0.4736878275871277,
      "learning_rate": 7.652128647957671e-06,
      "loss": 0.0268,
      "step": 732640
    },
    {
      "epoch": 1.1990141591877614,
      "grad_norm": 1.089995265007019,
      "learning_rate": 7.652062755744155e-06,
      "loss": 0.025,
      "step": 732660
    },
    {
      "epoch": 1.1990468896264148,
      "grad_norm": 1.406695008277893,
      "learning_rate": 7.651996863530638e-06,
      "loss": 0.0241,
      "step": 732680
    },
    {
      "epoch": 1.1990796200650682,
      "grad_norm": 1.1241059303283691,
      "learning_rate": 7.65193097131712e-06,
      "loss": 0.017,
      "step": 732700
    },
    {
      "epoch": 1.1991123505037216,
      "grad_norm": 0.2789216637611389,
      "learning_rate": 7.651865079103604e-06,
      "loss": 0.0208,
      "step": 732720
    },
    {
      "epoch": 1.1991450809423747,
      "grad_norm": 2.163599967956543,
      "learning_rate": 7.651799186890085e-06,
      "loss": 0.0199,
      "step": 732740
    },
    {
      "epoch": 1.199177811381028,
      "grad_norm": 0.45988836884498596,
      "learning_rate": 7.651733294676569e-06,
      "loss": 0.0184,
      "step": 732760
    },
    {
      "epoch": 1.1992105418196815,
      "grad_norm": 0.4903505742549896,
      "learning_rate": 7.651667402463051e-06,
      "loss": 0.0191,
      "step": 732780
    },
    {
      "epoch": 1.1992432722583348,
      "grad_norm": 0.27089184522628784,
      "learning_rate": 7.651601510249535e-06,
      "loss": 0.0266,
      "step": 732800
    },
    {
      "epoch": 1.1992760026969882,
      "grad_norm": 1.951345682144165,
      "learning_rate": 7.651535618036018e-06,
      "loss": 0.0215,
      "step": 732820
    },
    {
      "epoch": 1.1993087331356416,
      "grad_norm": 0.8648226857185364,
      "learning_rate": 7.6514697258225e-06,
      "loss": 0.0289,
      "step": 732840
    },
    {
      "epoch": 1.199341463574295,
      "grad_norm": 1.4514479637145996,
      "learning_rate": 7.651403833608984e-06,
      "loss": 0.02,
      "step": 732860
    },
    {
      "epoch": 1.199374194012948,
      "grad_norm": 0.4781687557697296,
      "learning_rate": 7.651337941395465e-06,
      "loss": 0.029,
      "step": 732880
    },
    {
      "epoch": 1.1994069244516015,
      "grad_norm": 0.23833265900611877,
      "learning_rate": 7.651272049181949e-06,
      "loss": 0.0205,
      "step": 732900
    },
    {
      "epoch": 1.1994396548902548,
      "grad_norm": 1.236860990524292,
      "learning_rate": 7.651206156968431e-06,
      "loss": 0.0275,
      "step": 732920
    },
    {
      "epoch": 1.1994723853289082,
      "grad_norm": 0.41825634241104126,
      "learning_rate": 7.651140264754915e-06,
      "loss": 0.028,
      "step": 732940
    },
    {
      "epoch": 1.1995051157675616,
      "grad_norm": 0.6751468777656555,
      "learning_rate": 7.651074372541396e-06,
      "loss": 0.0196,
      "step": 732960
    },
    {
      "epoch": 1.199537846206215,
      "grad_norm": 0.26937705278396606,
      "learning_rate": 7.65100848032788e-06,
      "loss": 0.0231,
      "step": 732980
    },
    {
      "epoch": 1.199570576644868,
      "grad_norm": 1.1584594249725342,
      "learning_rate": 7.650942588114364e-06,
      "loss": 0.023,
      "step": 733000
    },
    {
      "epoch": 1.1996033070835215,
      "grad_norm": 0.7835336923599243,
      "learning_rate": 7.650876695900846e-06,
      "loss": 0.0292,
      "step": 733020
    },
    {
      "epoch": 1.1996360375221748,
      "grad_norm": 1.8803870677947998,
      "learning_rate": 7.650810803687329e-06,
      "loss": 0.0268,
      "step": 733040
    },
    {
      "epoch": 1.1996687679608282,
      "grad_norm": 0.5305082201957703,
      "learning_rate": 7.650744911473813e-06,
      "loss": 0.0268,
      "step": 733060
    },
    {
      "epoch": 1.1997014983994816,
      "grad_norm": 1.30904221534729,
      "learning_rate": 7.650679019260295e-06,
      "loss": 0.0247,
      "step": 733080
    },
    {
      "epoch": 1.199734228838135,
      "grad_norm": 1.5373547077178955,
      "learning_rate": 7.650613127046778e-06,
      "loss": 0.0316,
      "step": 733100
    },
    {
      "epoch": 1.1997669592767881,
      "grad_norm": 0.9816308617591858,
      "learning_rate": 7.65054723483326e-06,
      "loss": 0.0288,
      "step": 733120
    },
    {
      "epoch": 1.1997996897154415,
      "grad_norm": 1.2464728355407715,
      "learning_rate": 7.650481342619744e-06,
      "loss": 0.0318,
      "step": 733140
    },
    {
      "epoch": 1.1998324201540949,
      "grad_norm": 0.8715898394584656,
      "learning_rate": 7.650415450406226e-06,
      "loss": 0.023,
      "step": 733160
    },
    {
      "epoch": 1.1998651505927482,
      "grad_norm": 0.6806899309158325,
      "learning_rate": 7.650349558192709e-06,
      "loss": 0.0198,
      "step": 733180
    },
    {
      "epoch": 1.1998978810314016,
      "grad_norm": 1.2674075365066528,
      "learning_rate": 7.650283665979193e-06,
      "loss": 0.0182,
      "step": 733200
    },
    {
      "epoch": 1.199930611470055,
      "grad_norm": 0.3098433017730713,
      "learning_rate": 7.650217773765675e-06,
      "loss": 0.0171,
      "step": 733220
    },
    {
      "epoch": 1.1999633419087083,
      "grad_norm": 0.4009225368499756,
      "learning_rate": 7.650151881552158e-06,
      "loss": 0.0228,
      "step": 733240
    },
    {
      "epoch": 1.1999960723473615,
      "grad_norm": 0.434244841337204,
      "learning_rate": 7.65008598933864e-06,
      "loss": 0.0235,
      "step": 733260
    },
    {
      "epoch": 1.2000288027860149,
      "grad_norm": 0.43469345569610596,
      "learning_rate": 7.650020097125124e-06,
      "loss": 0.0288,
      "step": 733280
    },
    {
      "epoch": 1.2000615332246682,
      "grad_norm": 0.7049425840377808,
      "learning_rate": 7.649954204911606e-06,
      "loss": 0.0263,
      "step": 733300
    },
    {
      "epoch": 1.2000942636633216,
      "grad_norm": 0.6517130136489868,
      "learning_rate": 7.64988831269809e-06,
      "loss": 0.0301,
      "step": 733320
    },
    {
      "epoch": 1.200126994101975,
      "grad_norm": 1.2270219326019287,
      "learning_rate": 7.649822420484571e-06,
      "loss": 0.0239,
      "step": 733340
    },
    {
      "epoch": 1.2001597245406284,
      "grad_norm": 0.578947126865387,
      "learning_rate": 7.649756528271055e-06,
      "loss": 0.0293,
      "step": 733360
    },
    {
      "epoch": 1.2001924549792817,
      "grad_norm": 0.2098415493965149,
      "learning_rate": 7.649690636057537e-06,
      "loss": 0.0131,
      "step": 733380
    },
    {
      "epoch": 1.2002251854179349,
      "grad_norm": 0.34717297554016113,
      "learning_rate": 7.64962474384402e-06,
      "loss": 0.025,
      "step": 733400
    },
    {
      "epoch": 1.2002579158565883,
      "grad_norm": 0.4832718074321747,
      "learning_rate": 7.649558851630504e-06,
      "loss": 0.0211,
      "step": 733420
    },
    {
      "epoch": 1.2002906462952416,
      "grad_norm": 0.39308494329452515,
      "learning_rate": 7.649492959416986e-06,
      "loss": 0.0201,
      "step": 733440
    },
    {
      "epoch": 1.200323376733895,
      "grad_norm": 0.2378753125667572,
      "learning_rate": 7.64942706720347e-06,
      "loss": 0.0194,
      "step": 733460
    },
    {
      "epoch": 1.2003561071725484,
      "grad_norm": 1.0461119413375854,
      "learning_rate": 7.649361174989953e-06,
      "loss": 0.0303,
      "step": 733480
    },
    {
      "epoch": 1.2003888376112017,
      "grad_norm": 2.3206088542938232,
      "learning_rate": 7.649295282776435e-06,
      "loss": 0.0159,
      "step": 733500
    },
    {
      "epoch": 1.2004215680498551,
      "grad_norm": 0.7959760427474976,
      "learning_rate": 7.649229390562918e-06,
      "loss": 0.02,
      "step": 733520
    },
    {
      "epoch": 1.2004542984885083,
      "grad_norm": 0.5965409278869629,
      "learning_rate": 7.649163498349402e-06,
      "loss": 0.018,
      "step": 733540
    },
    {
      "epoch": 1.2004870289271616,
      "grad_norm": 0.7620947957038879,
      "learning_rate": 7.649097606135884e-06,
      "loss": 0.0287,
      "step": 733560
    },
    {
      "epoch": 1.200519759365815,
      "grad_norm": 0.4061293303966522,
      "learning_rate": 7.649031713922367e-06,
      "loss": 0.0214,
      "step": 733580
    },
    {
      "epoch": 1.2005524898044684,
      "grad_norm": 1.246302843093872,
      "learning_rate": 7.64896582170885e-06,
      "loss": 0.0232,
      "step": 733600
    },
    {
      "epoch": 1.2005852202431218,
      "grad_norm": 1.9047702550888062,
      "learning_rate": 7.648899929495333e-06,
      "loss": 0.0206,
      "step": 733620
    },
    {
      "epoch": 1.2006179506817751,
      "grad_norm": 0.7136574983596802,
      "learning_rate": 7.648834037281815e-06,
      "loss": 0.0209,
      "step": 733640
    },
    {
      "epoch": 1.2006506811204285,
      "grad_norm": 0.7392202615737915,
      "learning_rate": 7.648768145068298e-06,
      "loss": 0.0138,
      "step": 733660
    },
    {
      "epoch": 1.2006834115590816,
      "grad_norm": 1.3273897171020508,
      "learning_rate": 7.64870225285478e-06,
      "loss": 0.0206,
      "step": 733680
    },
    {
      "epoch": 1.200716141997735,
      "grad_norm": 0.7035463452339172,
      "learning_rate": 7.648636360641264e-06,
      "loss": 0.0219,
      "step": 733700
    },
    {
      "epoch": 1.2007488724363884,
      "grad_norm": 0.32953763008117676,
      "learning_rate": 7.648570468427746e-06,
      "loss": 0.0238,
      "step": 733720
    },
    {
      "epoch": 1.2007816028750418,
      "grad_norm": 0.20525744557380676,
      "learning_rate": 7.64850457621423e-06,
      "loss": 0.0173,
      "step": 733740
    },
    {
      "epoch": 1.2008143333136951,
      "grad_norm": 0.701424241065979,
      "learning_rate": 7.648438684000711e-06,
      "loss": 0.0239,
      "step": 733760
    },
    {
      "epoch": 1.2008470637523483,
      "grad_norm": 0.6728383302688599,
      "learning_rate": 7.648372791787195e-06,
      "loss": 0.0212,
      "step": 733780
    },
    {
      "epoch": 1.2008797941910017,
      "grad_norm": 0.8748465180397034,
      "learning_rate": 7.648306899573678e-06,
      "loss": 0.0189,
      "step": 733800
    },
    {
      "epoch": 1.200912524629655,
      "grad_norm": 0.5563300251960754,
      "learning_rate": 7.64824100736016e-06,
      "loss": 0.0335,
      "step": 733820
    },
    {
      "epoch": 1.2009452550683084,
      "grad_norm": 0.5784665942192078,
      "learning_rate": 7.648175115146644e-06,
      "loss": 0.024,
      "step": 733840
    },
    {
      "epoch": 1.2009779855069618,
      "grad_norm": 1.213064432144165,
      "learning_rate": 7.648109222933127e-06,
      "loss": 0.0231,
      "step": 733860
    },
    {
      "epoch": 1.2010107159456151,
      "grad_norm": 0.10301609337329865,
      "learning_rate": 7.64804333071961e-06,
      "loss": 0.0178,
      "step": 733880
    },
    {
      "epoch": 1.2010434463842685,
      "grad_norm": 0.31148284673690796,
      "learning_rate": 7.647977438506093e-06,
      "loss": 0.017,
      "step": 733900
    },
    {
      "epoch": 1.2010761768229217,
      "grad_norm": 1.1367110013961792,
      "learning_rate": 7.647911546292576e-06,
      "loss": 0.0278,
      "step": 733920
    },
    {
      "epoch": 1.201108907261575,
      "grad_norm": 0.7615448832511902,
      "learning_rate": 7.647845654079058e-06,
      "loss": 0.0302,
      "step": 733940
    },
    {
      "epoch": 1.2011416377002284,
      "grad_norm": 0.6477952599525452,
      "learning_rate": 7.647779761865542e-06,
      "loss": 0.0256,
      "step": 733960
    },
    {
      "epoch": 1.2011743681388818,
      "grad_norm": 0.3595239818096161,
      "learning_rate": 7.647713869652024e-06,
      "loss": 0.0227,
      "step": 733980
    },
    {
      "epoch": 1.2012070985775352,
      "grad_norm": 1.3189538717269897,
      "learning_rate": 7.647647977438507e-06,
      "loss": 0.0244,
      "step": 734000
    },
    {
      "epoch": 1.2012398290161885,
      "grad_norm": 0.4602496027946472,
      "learning_rate": 7.64758208522499e-06,
      "loss": 0.0274,
      "step": 734020
    },
    {
      "epoch": 1.201272559454842,
      "grad_norm": 1.1277642250061035,
      "learning_rate": 7.647516193011473e-06,
      "loss": 0.0209,
      "step": 734040
    },
    {
      "epoch": 1.201305289893495,
      "grad_norm": 0.8980180621147156,
      "learning_rate": 7.647450300797955e-06,
      "loss": 0.0293,
      "step": 734060
    },
    {
      "epoch": 1.2013380203321484,
      "grad_norm": 0.28543931245803833,
      "learning_rate": 7.647384408584438e-06,
      "loss": 0.0199,
      "step": 734080
    },
    {
      "epoch": 1.2013707507708018,
      "grad_norm": 1.0711718797683716,
      "learning_rate": 7.64731851637092e-06,
      "loss": 0.0203,
      "step": 734100
    },
    {
      "epoch": 1.2014034812094552,
      "grad_norm": 0.7225550413131714,
      "learning_rate": 7.647252624157404e-06,
      "loss": 0.0212,
      "step": 734120
    },
    {
      "epoch": 1.2014362116481085,
      "grad_norm": 0.3502761721611023,
      "learning_rate": 7.647186731943886e-06,
      "loss": 0.0257,
      "step": 734140
    },
    {
      "epoch": 1.201468942086762,
      "grad_norm": 0.606984555721283,
      "learning_rate": 7.64712083973037e-06,
      "loss": 0.0211,
      "step": 734160
    },
    {
      "epoch": 1.2015016725254153,
      "grad_norm": 0.5393297076225281,
      "learning_rate": 7.647054947516853e-06,
      "loss": 0.0174,
      "step": 734180
    },
    {
      "epoch": 1.2015344029640684,
      "grad_norm": 0.9053966999053955,
      "learning_rate": 7.646989055303335e-06,
      "loss": 0.0171,
      "step": 734200
    },
    {
      "epoch": 1.2015671334027218,
      "grad_norm": 0.7658939957618713,
      "learning_rate": 7.646923163089818e-06,
      "loss": 0.0232,
      "step": 734220
    },
    {
      "epoch": 1.2015998638413752,
      "grad_norm": 0.9155289530754089,
      "learning_rate": 7.646857270876302e-06,
      "loss": 0.0244,
      "step": 734240
    },
    {
      "epoch": 1.2016325942800286,
      "grad_norm": 0.23805879056453705,
      "learning_rate": 7.646791378662784e-06,
      "loss": 0.0228,
      "step": 734260
    },
    {
      "epoch": 1.201665324718682,
      "grad_norm": 0.1657164841890335,
      "learning_rate": 7.646725486449267e-06,
      "loss": 0.0268,
      "step": 734280
    },
    {
      "epoch": 1.2016980551573353,
      "grad_norm": 0.399922251701355,
      "learning_rate": 7.646659594235751e-06,
      "loss": 0.0201,
      "step": 734300
    },
    {
      "epoch": 1.2017307855959887,
      "grad_norm": 1.6727657318115234,
      "learning_rate": 7.646593702022233e-06,
      "loss": 0.0173,
      "step": 734320
    },
    {
      "epoch": 1.2017635160346418,
      "grad_norm": 2.2505736351013184,
      "learning_rate": 7.646527809808717e-06,
      "loss": 0.033,
      "step": 734340
    },
    {
      "epoch": 1.2017962464732952,
      "grad_norm": 0.26012858748435974,
      "learning_rate": 7.646461917595198e-06,
      "loss": 0.0193,
      "step": 734360
    },
    {
      "epoch": 1.2018289769119486,
      "grad_norm": 1.7801495790481567,
      "learning_rate": 7.646396025381682e-06,
      "loss": 0.0339,
      "step": 734380
    },
    {
      "epoch": 1.201861707350602,
      "grad_norm": 0.4111805558204651,
      "learning_rate": 7.646330133168164e-06,
      "loss": 0.0164,
      "step": 734400
    },
    {
      "epoch": 1.2018944377892553,
      "grad_norm": 0.700685441493988,
      "learning_rate": 7.646264240954647e-06,
      "loss": 0.0203,
      "step": 734420
    },
    {
      "epoch": 1.2019271682279087,
      "grad_norm": 0.19233280420303345,
      "learning_rate": 7.64619834874113e-06,
      "loss": 0.02,
      "step": 734440
    },
    {
      "epoch": 1.2019598986665618,
      "grad_norm": 1.008251428604126,
      "learning_rate": 7.646132456527613e-06,
      "loss": 0.0317,
      "step": 734460
    },
    {
      "epoch": 1.2019926291052152,
      "grad_norm": 0.1473681926727295,
      "learning_rate": 7.646066564314095e-06,
      "loss": 0.0235,
      "step": 734480
    },
    {
      "epoch": 1.2020253595438686,
      "grad_norm": 1.6629619598388672,
      "learning_rate": 7.646000672100578e-06,
      "loss": 0.0275,
      "step": 734500
    },
    {
      "epoch": 1.202058089982522,
      "grad_norm": 0.7951762676239014,
      "learning_rate": 7.64593477988706e-06,
      "loss": 0.0199,
      "step": 734520
    },
    {
      "epoch": 1.2020908204211753,
      "grad_norm": 1.5785523653030396,
      "learning_rate": 7.645868887673544e-06,
      "loss": 0.0261,
      "step": 734540
    },
    {
      "epoch": 1.2021235508598287,
      "grad_norm": 0.9900219440460205,
      "learning_rate": 7.645802995460026e-06,
      "loss": 0.0194,
      "step": 734560
    },
    {
      "epoch": 1.2021562812984818,
      "grad_norm": 1.531823754310608,
      "learning_rate": 7.64573710324651e-06,
      "loss": 0.0203,
      "step": 734580
    },
    {
      "epoch": 1.2021890117371352,
      "grad_norm": 0.6497739553451538,
      "learning_rate": 7.645671211032993e-06,
      "loss": 0.0222,
      "step": 734600
    },
    {
      "epoch": 1.2022217421757886,
      "grad_norm": 0.9747826457023621,
      "learning_rate": 7.645605318819475e-06,
      "loss": 0.0223,
      "step": 734620
    },
    {
      "epoch": 1.202254472614442,
      "grad_norm": 1.5935943126678467,
      "learning_rate": 7.645539426605958e-06,
      "loss": 0.0199,
      "step": 734640
    },
    {
      "epoch": 1.2022872030530953,
      "grad_norm": 0.4776782989501953,
      "learning_rate": 7.645473534392442e-06,
      "loss": 0.0272,
      "step": 734660
    },
    {
      "epoch": 1.2023199334917487,
      "grad_norm": 0.14011217653751373,
      "learning_rate": 7.645407642178924e-06,
      "loss": 0.0168,
      "step": 734680
    },
    {
      "epoch": 1.202352663930402,
      "grad_norm": 0.20568925142288208,
      "learning_rate": 7.645341749965408e-06,
      "loss": 0.0176,
      "step": 734700
    },
    {
      "epoch": 1.2023853943690552,
      "grad_norm": 1.6459306478500366,
      "learning_rate": 7.645275857751891e-06,
      "loss": 0.0177,
      "step": 734720
    },
    {
      "epoch": 1.2024181248077086,
      "grad_norm": 0.5014036893844604,
      "learning_rate": 7.645209965538373e-06,
      "loss": 0.0215,
      "step": 734740
    },
    {
      "epoch": 1.202450855246362,
      "grad_norm": 1.4537875652313232,
      "learning_rate": 7.645144073324857e-06,
      "loss": 0.0294,
      "step": 734760
    },
    {
      "epoch": 1.2024835856850153,
      "grad_norm": 0.7303686738014221,
      "learning_rate": 7.645078181111338e-06,
      "loss": 0.0225,
      "step": 734780
    },
    {
      "epoch": 1.2025163161236687,
      "grad_norm": 0.7572269439697266,
      "learning_rate": 7.645012288897822e-06,
      "loss": 0.0225,
      "step": 734800
    },
    {
      "epoch": 1.202549046562322,
      "grad_norm": 1.514267086982727,
      "learning_rate": 7.644946396684304e-06,
      "loss": 0.0198,
      "step": 734820
    },
    {
      "epoch": 1.2025817770009755,
      "grad_norm": 0.708459734916687,
      "learning_rate": 7.644880504470788e-06,
      "loss": 0.0219,
      "step": 734840
    },
    {
      "epoch": 1.2026145074396286,
      "grad_norm": 0.22728654742240906,
      "learning_rate": 7.64481461225727e-06,
      "loss": 0.0137,
      "step": 734860
    },
    {
      "epoch": 1.202647237878282,
      "grad_norm": 0.43545985221862793,
      "learning_rate": 7.644748720043753e-06,
      "loss": 0.0232,
      "step": 734880
    },
    {
      "epoch": 1.2026799683169354,
      "grad_norm": 0.8306313753128052,
      "learning_rate": 7.644682827830235e-06,
      "loss": 0.025,
      "step": 734900
    },
    {
      "epoch": 1.2027126987555887,
      "grad_norm": 0.21482114493846893,
      "learning_rate": 7.644616935616719e-06,
      "loss": 0.0237,
      "step": 734920
    },
    {
      "epoch": 1.202745429194242,
      "grad_norm": 0.884225606918335,
      "learning_rate": 7.644551043403202e-06,
      "loss": 0.0224,
      "step": 734940
    },
    {
      "epoch": 1.2027781596328955,
      "grad_norm": 1.1311719417572021,
      "learning_rate": 7.644485151189684e-06,
      "loss": 0.0246,
      "step": 734960
    },
    {
      "epoch": 1.2028108900715488,
      "grad_norm": 0.536471962928772,
      "learning_rate": 7.644419258976168e-06,
      "loss": 0.0242,
      "step": 734980
    },
    {
      "epoch": 1.202843620510202,
      "grad_norm": 0.7900627255439758,
      "learning_rate": 7.64435336676265e-06,
      "loss": 0.0163,
      "step": 735000
    },
    {
      "epoch": 1.2028763509488554,
      "grad_norm": 0.597032904624939,
      "learning_rate": 7.644287474549133e-06,
      "loss": 0.0176,
      "step": 735020
    },
    {
      "epoch": 1.2029090813875087,
      "grad_norm": 0.16663524508476257,
      "learning_rate": 7.644221582335617e-06,
      "loss": 0.021,
      "step": 735040
    },
    {
      "epoch": 1.202941811826162,
      "grad_norm": 0.9766438603401184,
      "learning_rate": 7.644155690122099e-06,
      "loss": 0.0188,
      "step": 735060
    },
    {
      "epoch": 1.2029745422648155,
      "grad_norm": 1.0315359830856323,
      "learning_rate": 7.644089797908582e-06,
      "loss": 0.0216,
      "step": 735080
    },
    {
      "epoch": 1.2030072727034689,
      "grad_norm": 0.4073960483074188,
      "learning_rate": 7.644023905695066e-06,
      "loss": 0.0233,
      "step": 735100
    },
    {
      "epoch": 1.2030400031421222,
      "grad_norm": 3.131753444671631,
      "learning_rate": 7.643958013481548e-06,
      "loss": 0.0185,
      "step": 735120
    },
    {
      "epoch": 1.2030727335807754,
      "grad_norm": 0.3580968677997589,
      "learning_rate": 7.643892121268031e-06,
      "loss": 0.019,
      "step": 735140
    },
    {
      "epoch": 1.2031054640194287,
      "grad_norm": 1.0510742664337158,
      "learning_rate": 7.643826229054513e-06,
      "loss": 0.0345,
      "step": 735160
    },
    {
      "epoch": 1.2031381944580821,
      "grad_norm": 0.7031534314155579,
      "learning_rate": 7.643760336840997e-06,
      "loss": 0.0321,
      "step": 735180
    },
    {
      "epoch": 1.2031709248967355,
      "grad_norm": 1.3277517557144165,
      "learning_rate": 7.643694444627479e-06,
      "loss": 0.015,
      "step": 735200
    },
    {
      "epoch": 1.2032036553353889,
      "grad_norm": 0.7077917456626892,
      "learning_rate": 7.643628552413962e-06,
      "loss": 0.0232,
      "step": 735220
    },
    {
      "epoch": 1.2032363857740422,
      "grad_norm": 0.2992688715457916,
      "learning_rate": 7.643562660200444e-06,
      "loss": 0.0184,
      "step": 735240
    },
    {
      "epoch": 1.2032691162126954,
      "grad_norm": 0.954109251499176,
      "learning_rate": 7.643496767986928e-06,
      "loss": 0.0184,
      "step": 735260
    },
    {
      "epoch": 1.2033018466513488,
      "grad_norm": 0.3066962659358978,
      "learning_rate": 7.643430875773411e-06,
      "loss": 0.0167,
      "step": 735280
    },
    {
      "epoch": 1.2033345770900021,
      "grad_norm": 0.6087267398834229,
      "learning_rate": 7.643364983559893e-06,
      "loss": 0.0165,
      "step": 735300
    },
    {
      "epoch": 1.2033673075286555,
      "grad_norm": 1.2158517837524414,
      "learning_rate": 7.643299091346377e-06,
      "loss": 0.0132,
      "step": 735320
    },
    {
      "epoch": 1.2034000379673089,
      "grad_norm": 1.0826126337051392,
      "learning_rate": 7.643233199132859e-06,
      "loss": 0.0243,
      "step": 735340
    },
    {
      "epoch": 1.2034327684059622,
      "grad_norm": 0.3146493434906006,
      "learning_rate": 7.643167306919342e-06,
      "loss": 0.026,
      "step": 735360
    },
    {
      "epoch": 1.2034654988446154,
      "grad_norm": 0.44137996435165405,
      "learning_rate": 7.643101414705824e-06,
      "loss": 0.0252,
      "step": 735380
    },
    {
      "epoch": 1.2034982292832688,
      "grad_norm": 0.693449854850769,
      "learning_rate": 7.643035522492308e-06,
      "loss": 0.0242,
      "step": 735400
    },
    {
      "epoch": 1.2035309597219221,
      "grad_norm": 0.8085103631019592,
      "learning_rate": 7.64296963027879e-06,
      "loss": 0.0237,
      "step": 735420
    },
    {
      "epoch": 1.2035636901605755,
      "grad_norm": 0.9232801198959351,
      "learning_rate": 7.642903738065273e-06,
      "loss": 0.0232,
      "step": 735440
    },
    {
      "epoch": 1.2035964205992289,
      "grad_norm": 0.5806283354759216,
      "learning_rate": 7.642837845851757e-06,
      "loss": 0.0206,
      "step": 735460
    },
    {
      "epoch": 1.2036291510378823,
      "grad_norm": 0.29022103548049927,
      "learning_rate": 7.642771953638239e-06,
      "loss": 0.0127,
      "step": 735480
    },
    {
      "epoch": 1.2036618814765356,
      "grad_norm": 0.9753599762916565,
      "learning_rate": 7.642706061424722e-06,
      "loss": 0.0161,
      "step": 735500
    },
    {
      "epoch": 1.2036946119151888,
      "grad_norm": 0.46444565057754517,
      "learning_rate": 7.642640169211206e-06,
      "loss": 0.0198,
      "step": 735520
    },
    {
      "epoch": 1.2037273423538422,
      "grad_norm": 0.30867382884025574,
      "learning_rate": 7.642574276997688e-06,
      "loss": 0.0179,
      "step": 735540
    },
    {
      "epoch": 1.2037600727924955,
      "grad_norm": 0.7170644998550415,
      "learning_rate": 7.642508384784171e-06,
      "loss": 0.0276,
      "step": 735560
    },
    {
      "epoch": 1.203792803231149,
      "grad_norm": 0.16654528677463531,
      "learning_rate": 7.642442492570653e-06,
      "loss": 0.0209,
      "step": 735580
    },
    {
      "epoch": 1.2038255336698023,
      "grad_norm": 0.4219166934490204,
      "learning_rate": 7.642376600357137e-06,
      "loss": 0.0251,
      "step": 735600
    },
    {
      "epoch": 1.2038582641084556,
      "grad_norm": 0.8210845589637756,
      "learning_rate": 7.642310708143619e-06,
      "loss": 0.0185,
      "step": 735620
    },
    {
      "epoch": 1.203890994547109,
      "grad_norm": 0.30665451288223267,
      "learning_rate": 7.642244815930102e-06,
      "loss": 0.0232,
      "step": 735640
    },
    {
      "epoch": 1.2039237249857622,
      "grad_norm": 0.6655210256576538,
      "learning_rate": 7.642178923716586e-06,
      "loss": 0.0198,
      "step": 735660
    },
    {
      "epoch": 1.2039564554244155,
      "grad_norm": 0.48614683747291565,
      "learning_rate": 7.642113031503068e-06,
      "loss": 0.0229,
      "step": 735680
    },
    {
      "epoch": 1.203989185863069,
      "grad_norm": 0.8254237174987793,
      "learning_rate": 7.642047139289551e-06,
      "loss": 0.0181,
      "step": 735700
    },
    {
      "epoch": 1.2040219163017223,
      "grad_norm": 0.7197211980819702,
      "learning_rate": 7.641981247076033e-06,
      "loss": 0.0208,
      "step": 735720
    },
    {
      "epoch": 1.2040546467403757,
      "grad_norm": 0.23503313958644867,
      "learning_rate": 7.641915354862517e-06,
      "loss": 0.023,
      "step": 735740
    },
    {
      "epoch": 1.204087377179029,
      "grad_norm": 0.27463921904563904,
      "learning_rate": 7.641849462648999e-06,
      "loss": 0.0202,
      "step": 735760
    },
    {
      "epoch": 1.2041201076176824,
      "grad_norm": 0.4340750575065613,
      "learning_rate": 7.641783570435482e-06,
      "loss": 0.0164,
      "step": 735780
    },
    {
      "epoch": 1.2041528380563356,
      "grad_norm": 0.787217378616333,
      "learning_rate": 7.641717678221964e-06,
      "loss": 0.0242,
      "step": 735800
    },
    {
      "epoch": 1.204185568494989,
      "grad_norm": 0.8113427758216858,
      "learning_rate": 7.641651786008448e-06,
      "loss": 0.0161,
      "step": 735820
    },
    {
      "epoch": 1.2042182989336423,
      "grad_norm": 0.8709946870803833,
      "learning_rate": 7.641585893794931e-06,
      "loss": 0.0184,
      "step": 735840
    },
    {
      "epoch": 1.2042510293722957,
      "grad_norm": 0.5022592544555664,
      "learning_rate": 7.641520001581413e-06,
      "loss": 0.0245,
      "step": 735860
    },
    {
      "epoch": 1.204283759810949,
      "grad_norm": 0.10339470207691193,
      "learning_rate": 7.641454109367897e-06,
      "loss": 0.0153,
      "step": 735880
    },
    {
      "epoch": 1.2043164902496024,
      "grad_norm": 2.5133492946624756,
      "learning_rate": 7.64138821715438e-06,
      "loss": 0.0276,
      "step": 735900
    },
    {
      "epoch": 1.2043492206882558,
      "grad_norm": 0.8760737776756287,
      "learning_rate": 7.641322324940862e-06,
      "loss": 0.019,
      "step": 735920
    },
    {
      "epoch": 1.204381951126909,
      "grad_norm": 1.0422844886779785,
      "learning_rate": 7.641256432727346e-06,
      "loss": 0.0206,
      "step": 735940
    },
    {
      "epoch": 1.2044146815655623,
      "grad_norm": 0.17906473577022552,
      "learning_rate": 7.641190540513828e-06,
      "loss": 0.0188,
      "step": 735960
    },
    {
      "epoch": 1.2044474120042157,
      "grad_norm": 1.068228840827942,
      "learning_rate": 7.641124648300311e-06,
      "loss": 0.0267,
      "step": 735980
    },
    {
      "epoch": 1.204480142442869,
      "grad_norm": 0.13605444133281708,
      "learning_rate": 7.641058756086795e-06,
      "loss": 0.0189,
      "step": 736000
    },
    {
      "epoch": 1.2045128728815224,
      "grad_norm": 0.5864752531051636,
      "learning_rate": 7.640992863873277e-06,
      "loss": 0.0235,
      "step": 736020
    },
    {
      "epoch": 1.2045456033201756,
      "grad_norm": 1.5009338855743408,
      "learning_rate": 7.64092697165976e-06,
      "loss": 0.0251,
      "step": 736040
    },
    {
      "epoch": 1.204578333758829,
      "grad_norm": 0.06690195947885513,
      "learning_rate": 7.640861079446242e-06,
      "loss": 0.0196,
      "step": 736060
    },
    {
      "epoch": 1.2046110641974823,
      "grad_norm": 1.9761989116668701,
      "learning_rate": 7.640795187232726e-06,
      "loss": 0.0312,
      "step": 736080
    },
    {
      "epoch": 1.2046437946361357,
      "grad_norm": 0.27209001779556274,
      "learning_rate": 7.640729295019208e-06,
      "loss": 0.0217,
      "step": 736100
    },
    {
      "epoch": 1.204676525074789,
      "grad_norm": 0.3426330089569092,
      "learning_rate": 7.640663402805691e-06,
      "loss": 0.0211,
      "step": 736120
    },
    {
      "epoch": 1.2047092555134424,
      "grad_norm": 1.1646608114242554,
      "learning_rate": 7.640597510592173e-06,
      "loss": 0.0313,
      "step": 736140
    },
    {
      "epoch": 1.2047419859520958,
      "grad_norm": 0.8468908071517944,
      "learning_rate": 7.640531618378657e-06,
      "loss": 0.0306,
      "step": 736160
    },
    {
      "epoch": 1.204774716390749,
      "grad_norm": 0.6994355916976929,
      "learning_rate": 7.640465726165139e-06,
      "loss": 0.0176,
      "step": 736180
    },
    {
      "epoch": 1.2048074468294023,
      "grad_norm": 0.31740301847457886,
      "learning_rate": 7.640399833951622e-06,
      "loss": 0.0234,
      "step": 736200
    },
    {
      "epoch": 1.2048401772680557,
      "grad_norm": 1.3685730695724487,
      "learning_rate": 7.640333941738104e-06,
      "loss": 0.0249,
      "step": 736220
    },
    {
      "epoch": 1.204872907706709,
      "grad_norm": 1.3063554763793945,
      "learning_rate": 7.640268049524588e-06,
      "loss": 0.028,
      "step": 736240
    },
    {
      "epoch": 1.2049056381453624,
      "grad_norm": 1.1736725568771362,
      "learning_rate": 7.640202157311071e-06,
      "loss": 0.0191,
      "step": 736260
    },
    {
      "epoch": 1.2049383685840158,
      "grad_norm": 0.7706776857376099,
      "learning_rate": 7.640136265097555e-06,
      "loss": 0.0306,
      "step": 736280
    },
    {
      "epoch": 1.2049710990226692,
      "grad_norm": 1.2308586835861206,
      "learning_rate": 7.640070372884037e-06,
      "loss": 0.0225,
      "step": 736300
    },
    {
      "epoch": 1.2050038294613223,
      "grad_norm": 0.850590169429779,
      "learning_rate": 7.64000448067052e-06,
      "loss": 0.0206,
      "step": 736320
    },
    {
      "epoch": 1.2050365598999757,
      "grad_norm": 1.7526251077651978,
      "learning_rate": 7.639938588457004e-06,
      "loss": 0.0269,
      "step": 736340
    },
    {
      "epoch": 1.205069290338629,
      "grad_norm": 2.0730748176574707,
      "learning_rate": 7.639872696243486e-06,
      "loss": 0.0178,
      "step": 736360
    },
    {
      "epoch": 1.2051020207772825,
      "grad_norm": 1.2676838636398315,
      "learning_rate": 7.63980680402997e-06,
      "loss": 0.0327,
      "step": 736380
    },
    {
      "epoch": 1.2051347512159358,
      "grad_norm": 0.4118461608886719,
      "learning_rate": 7.639740911816451e-06,
      "loss": 0.022,
      "step": 736400
    },
    {
      "epoch": 1.2051674816545892,
      "grad_norm": 0.5575013160705566,
      "learning_rate": 7.639675019602935e-06,
      "loss": 0.02,
      "step": 736420
    },
    {
      "epoch": 1.2052002120932426,
      "grad_norm": 1.1188268661499023,
      "learning_rate": 7.639609127389417e-06,
      "loss": 0.0196,
      "step": 736440
    },
    {
      "epoch": 1.2052329425318957,
      "grad_norm": 0.1096532940864563,
      "learning_rate": 7.6395432351759e-06,
      "loss": 0.0251,
      "step": 736460
    },
    {
      "epoch": 1.205265672970549,
      "grad_norm": 0.292657732963562,
      "learning_rate": 7.639477342962382e-06,
      "loss": 0.0161,
      "step": 736480
    },
    {
      "epoch": 1.2052984034092025,
      "grad_norm": 0.4872240126132965,
      "learning_rate": 7.639411450748866e-06,
      "loss": 0.025,
      "step": 736500
    },
    {
      "epoch": 1.2053311338478558,
      "grad_norm": 3.560098648071289,
      "learning_rate": 7.639345558535348e-06,
      "loss": 0.0281,
      "step": 736520
    },
    {
      "epoch": 1.2053638642865092,
      "grad_norm": 0.38945186138153076,
      "learning_rate": 7.639279666321831e-06,
      "loss": 0.029,
      "step": 736540
    },
    {
      "epoch": 1.2053965947251626,
      "grad_norm": 0.6224726438522339,
      "learning_rate": 7.639213774108313e-06,
      "loss": 0.0295,
      "step": 736560
    },
    {
      "epoch": 1.205429325163816,
      "grad_norm": 0.2154453992843628,
      "learning_rate": 7.639147881894797e-06,
      "loss": 0.023,
      "step": 736580
    },
    {
      "epoch": 1.205462055602469,
      "grad_norm": 1.1614271402359009,
      "learning_rate": 7.639081989681279e-06,
      "loss": 0.0274,
      "step": 736600
    },
    {
      "epoch": 1.2054947860411225,
      "grad_norm": 0.25910142064094543,
      "learning_rate": 7.639016097467762e-06,
      "loss": 0.0207,
      "step": 736620
    },
    {
      "epoch": 1.2055275164797759,
      "grad_norm": 0.9811831116676331,
      "learning_rate": 7.638950205254246e-06,
      "loss": 0.0238,
      "step": 736640
    },
    {
      "epoch": 1.2055602469184292,
      "grad_norm": 1.8324801921844482,
      "learning_rate": 7.638884313040728e-06,
      "loss": 0.0309,
      "step": 736660
    },
    {
      "epoch": 1.2055929773570826,
      "grad_norm": 1.2081413269042969,
      "learning_rate": 7.638818420827211e-06,
      "loss": 0.0278,
      "step": 736680
    },
    {
      "epoch": 1.205625707795736,
      "grad_norm": 0.1070217490196228,
      "learning_rate": 7.638752528613695e-06,
      "loss": 0.0181,
      "step": 736700
    },
    {
      "epoch": 1.2056584382343893,
      "grad_norm": 0.4715871214866638,
      "learning_rate": 7.638686636400177e-06,
      "loss": 0.0254,
      "step": 736720
    },
    {
      "epoch": 1.2056911686730425,
      "grad_norm": 1.1315209865570068,
      "learning_rate": 7.63862074418666e-06,
      "loss": 0.0202,
      "step": 736740
    },
    {
      "epoch": 1.2057238991116959,
      "grad_norm": 1.4071320295333862,
      "learning_rate": 7.638554851973144e-06,
      "loss": 0.0238,
      "step": 736760
    },
    {
      "epoch": 1.2057566295503492,
      "grad_norm": 0.9957436323165894,
      "learning_rate": 7.638488959759626e-06,
      "loss": 0.0137,
      "step": 736780
    },
    {
      "epoch": 1.2057893599890026,
      "grad_norm": 1.2883028984069824,
      "learning_rate": 7.63842306754611e-06,
      "loss": 0.0234,
      "step": 736800
    },
    {
      "epoch": 1.205822090427656,
      "grad_norm": 0.6076322197914124,
      "learning_rate": 7.638357175332591e-06,
      "loss": 0.0167,
      "step": 736820
    },
    {
      "epoch": 1.2058548208663091,
      "grad_norm": 0.23869964480400085,
      "learning_rate": 7.638291283119075e-06,
      "loss": 0.016,
      "step": 736840
    },
    {
      "epoch": 1.2058875513049625,
      "grad_norm": 0.6704898476600647,
      "learning_rate": 7.638225390905557e-06,
      "loss": 0.0249,
      "step": 736860
    },
    {
      "epoch": 1.2059202817436159,
      "grad_norm": 0.5227131843566895,
      "learning_rate": 7.63815949869204e-06,
      "loss": 0.0251,
      "step": 736880
    },
    {
      "epoch": 1.2059530121822692,
      "grad_norm": 0.5951371192932129,
      "learning_rate": 7.638093606478522e-06,
      "loss": 0.0271,
      "step": 736900
    },
    {
      "epoch": 1.2059857426209226,
      "grad_norm": 0.8746193051338196,
      "learning_rate": 7.638027714265006e-06,
      "loss": 0.0207,
      "step": 736920
    },
    {
      "epoch": 1.206018473059576,
      "grad_norm": 1.7152738571166992,
      "learning_rate": 7.637961822051488e-06,
      "loss": 0.0248,
      "step": 736940
    },
    {
      "epoch": 1.2060512034982294,
      "grad_norm": 1.3112202882766724,
      "learning_rate": 7.637895929837972e-06,
      "loss": 0.0259,
      "step": 736960
    },
    {
      "epoch": 1.2060839339368825,
      "grad_norm": 0.9452034831047058,
      "learning_rate": 7.637830037624453e-06,
      "loss": 0.0312,
      "step": 736980
    },
    {
      "epoch": 1.2061166643755359,
      "grad_norm": 1.3895176649093628,
      "learning_rate": 7.637764145410937e-06,
      "loss": 0.0209,
      "step": 737000
    },
    {
      "epoch": 1.2061493948141893,
      "grad_norm": 2.22904896736145,
      "learning_rate": 7.63769825319742e-06,
      "loss": 0.021,
      "step": 737020
    },
    {
      "epoch": 1.2061821252528426,
      "grad_norm": 0.35060805082321167,
      "learning_rate": 7.637632360983902e-06,
      "loss": 0.0219,
      "step": 737040
    },
    {
      "epoch": 1.206214855691496,
      "grad_norm": 0.29077795147895813,
      "learning_rate": 7.637566468770386e-06,
      "loss": 0.0249,
      "step": 737060
    },
    {
      "epoch": 1.2062475861301494,
      "grad_norm": 0.7137943506240845,
      "learning_rate": 7.63750057655687e-06,
      "loss": 0.0243,
      "step": 737080
    },
    {
      "epoch": 1.2062803165688027,
      "grad_norm": 1.0955147743225098,
      "learning_rate": 7.637434684343352e-06,
      "loss": 0.0227,
      "step": 737100
    },
    {
      "epoch": 1.206313047007456,
      "grad_norm": 0.3823913335800171,
      "learning_rate": 7.637368792129835e-06,
      "loss": 0.0163,
      "step": 737120
    },
    {
      "epoch": 1.2063457774461093,
      "grad_norm": 0.9106932878494263,
      "learning_rate": 7.637302899916319e-06,
      "loss": 0.025,
      "step": 737140
    },
    {
      "epoch": 1.2063785078847626,
      "grad_norm": 0.16974450647830963,
      "learning_rate": 7.6372370077028e-06,
      "loss": 0.0237,
      "step": 737160
    },
    {
      "epoch": 1.206411238323416,
      "grad_norm": 1.7993643283843994,
      "learning_rate": 7.637171115489284e-06,
      "loss": 0.0227,
      "step": 737180
    },
    {
      "epoch": 1.2064439687620694,
      "grad_norm": 0.9134219884872437,
      "learning_rate": 7.637105223275766e-06,
      "loss": 0.0306,
      "step": 737200
    },
    {
      "epoch": 1.2064766992007228,
      "grad_norm": 0.7676258683204651,
      "learning_rate": 7.63703933106225e-06,
      "loss": 0.018,
      "step": 737220
    },
    {
      "epoch": 1.2065094296393761,
      "grad_norm": 0.3167991042137146,
      "learning_rate": 7.636973438848732e-06,
      "loss": 0.0208,
      "step": 737240
    },
    {
      "epoch": 1.2065421600780293,
      "grad_norm": 1.3837429285049438,
      "learning_rate": 7.636907546635215e-06,
      "loss": 0.0251,
      "step": 737260
    },
    {
      "epoch": 1.2065748905166827,
      "grad_norm": 0.11949842423200607,
      "learning_rate": 7.636841654421697e-06,
      "loss": 0.0282,
      "step": 737280
    },
    {
      "epoch": 1.206607620955336,
      "grad_norm": 0.46588510274887085,
      "learning_rate": 7.63677576220818e-06,
      "loss": 0.0252,
      "step": 737300
    },
    {
      "epoch": 1.2066403513939894,
      "grad_norm": 0.5590792298316956,
      "learning_rate": 7.636709869994663e-06,
      "loss": 0.0243,
      "step": 737320
    },
    {
      "epoch": 1.2066730818326428,
      "grad_norm": 0.9786274433135986,
      "learning_rate": 7.636643977781146e-06,
      "loss": 0.0181,
      "step": 737340
    },
    {
      "epoch": 1.2067058122712961,
      "grad_norm": 0.3424617052078247,
      "learning_rate": 7.636578085567628e-06,
      "loss": 0.0249,
      "step": 737360
    },
    {
      "epoch": 1.2067385427099495,
      "grad_norm": 0.4706198275089264,
      "learning_rate": 7.636512193354112e-06,
      "loss": 0.0178,
      "step": 737380
    },
    {
      "epoch": 1.2067712731486027,
      "grad_norm": 0.7521322965621948,
      "learning_rate": 7.636446301140595e-06,
      "loss": 0.0246,
      "step": 737400
    },
    {
      "epoch": 1.206804003587256,
      "grad_norm": 0.6245146989822388,
      "learning_rate": 7.636380408927077e-06,
      "loss": 0.0209,
      "step": 737420
    },
    {
      "epoch": 1.2068367340259094,
      "grad_norm": 1.1538029909133911,
      "learning_rate": 7.63631451671356e-06,
      "loss": 0.0198,
      "step": 737440
    },
    {
      "epoch": 1.2068694644645628,
      "grad_norm": 0.8338328003883362,
      "learning_rate": 7.636248624500043e-06,
      "loss": 0.0196,
      "step": 737460
    },
    {
      "epoch": 1.2069021949032162,
      "grad_norm": 1.2479854822158813,
      "learning_rate": 7.636182732286526e-06,
      "loss": 0.0224,
      "step": 737480
    },
    {
      "epoch": 1.2069349253418695,
      "grad_norm": 0.4217051863670349,
      "learning_rate": 7.63611684007301e-06,
      "loss": 0.0201,
      "step": 737500
    },
    {
      "epoch": 1.2069676557805227,
      "grad_norm": 0.5361831188201904,
      "learning_rate": 7.636050947859492e-06,
      "loss": 0.0286,
      "step": 737520
    },
    {
      "epoch": 1.207000386219176,
      "grad_norm": 1.0909429788589478,
      "learning_rate": 7.635985055645975e-06,
      "loss": 0.022,
      "step": 737540
    },
    {
      "epoch": 1.2070331166578294,
      "grad_norm": 0.3419361710548401,
      "learning_rate": 7.635919163432459e-06,
      "loss": 0.0258,
      "step": 737560
    },
    {
      "epoch": 1.2070658470964828,
      "grad_norm": 0.4902222752571106,
      "learning_rate": 7.63585327121894e-06,
      "loss": 0.0187,
      "step": 737580
    },
    {
      "epoch": 1.2070985775351362,
      "grad_norm": 0.7807337641716003,
      "learning_rate": 7.635787379005424e-06,
      "loss": 0.0258,
      "step": 737600
    },
    {
      "epoch": 1.2071313079737895,
      "grad_norm": 0.8978015184402466,
      "learning_rate": 7.635721486791906e-06,
      "loss": 0.0275,
      "step": 737620
    },
    {
      "epoch": 1.2071640384124427,
      "grad_norm": 0.5939459800720215,
      "learning_rate": 7.63565559457839e-06,
      "loss": 0.0192,
      "step": 737640
    },
    {
      "epoch": 1.207196768851096,
      "grad_norm": 0.2716280221939087,
      "learning_rate": 7.635589702364872e-06,
      "loss": 0.0227,
      "step": 737660
    },
    {
      "epoch": 1.2072294992897494,
      "grad_norm": 0.29901933670043945,
      "learning_rate": 7.635523810151355e-06,
      "loss": 0.0302,
      "step": 737680
    },
    {
      "epoch": 1.2072622297284028,
      "grad_norm": 0.6564534902572632,
      "learning_rate": 7.635457917937837e-06,
      "loss": 0.0189,
      "step": 737700
    },
    {
      "epoch": 1.2072949601670562,
      "grad_norm": 0.7518009543418884,
      "learning_rate": 7.63539202572432e-06,
      "loss": 0.0331,
      "step": 737720
    },
    {
      "epoch": 1.2073276906057095,
      "grad_norm": 0.4342588782310486,
      "learning_rate": 7.635326133510804e-06,
      "loss": 0.0262,
      "step": 737740
    },
    {
      "epoch": 1.207360421044363,
      "grad_norm": 0.2558172345161438,
      "learning_rate": 7.635260241297286e-06,
      "loss": 0.023,
      "step": 737760
    },
    {
      "epoch": 1.207393151483016,
      "grad_norm": 0.2594524919986725,
      "learning_rate": 7.63519434908377e-06,
      "loss": 0.0332,
      "step": 737780
    },
    {
      "epoch": 1.2074258819216694,
      "grad_norm": 1.370698094367981,
      "learning_rate": 7.635128456870252e-06,
      "loss": 0.0263,
      "step": 737800
    },
    {
      "epoch": 1.2074586123603228,
      "grad_norm": 1.073773741722107,
      "learning_rate": 7.635062564656735e-06,
      "loss": 0.0321,
      "step": 737820
    },
    {
      "epoch": 1.2074913427989762,
      "grad_norm": 0.4047479033470154,
      "learning_rate": 7.634996672443217e-06,
      "loss": 0.02,
      "step": 737840
    },
    {
      "epoch": 1.2075240732376296,
      "grad_norm": 1.6420111656188965,
      "learning_rate": 7.6349307802297e-06,
      "loss": 0.0186,
      "step": 737860
    },
    {
      "epoch": 1.207556803676283,
      "grad_norm": 1.2888379096984863,
      "learning_rate": 7.634864888016184e-06,
      "loss": 0.0174,
      "step": 737880
    },
    {
      "epoch": 1.2075895341149363,
      "grad_norm": 0.19618098437786102,
      "learning_rate": 7.634798995802666e-06,
      "loss": 0.0163,
      "step": 737900
    },
    {
      "epoch": 1.2076222645535895,
      "grad_norm": 0.692365825176239,
      "learning_rate": 7.63473310358915e-06,
      "loss": 0.0203,
      "step": 737920
    },
    {
      "epoch": 1.2076549949922428,
      "grad_norm": 0.6334462761878967,
      "learning_rate": 7.634667211375633e-06,
      "loss": 0.0151,
      "step": 737940
    },
    {
      "epoch": 1.2076877254308962,
      "grad_norm": 1.7823503017425537,
      "learning_rate": 7.634601319162115e-06,
      "loss": 0.034,
      "step": 737960
    },
    {
      "epoch": 1.2077204558695496,
      "grad_norm": 0.7196875810623169,
      "learning_rate": 7.634535426948599e-06,
      "loss": 0.029,
      "step": 737980
    },
    {
      "epoch": 1.207753186308203,
      "grad_norm": 1.6003830432891846,
      "learning_rate": 7.63446953473508e-06,
      "loss": 0.0287,
      "step": 738000
    },
    {
      "epoch": 1.2077859167468563,
      "grad_norm": 0.5489977598190308,
      "learning_rate": 7.634403642521564e-06,
      "loss": 0.023,
      "step": 738020
    },
    {
      "epoch": 1.2078186471855097,
      "grad_norm": 0.1641411930322647,
      "learning_rate": 7.634337750308046e-06,
      "loss": 0.0119,
      "step": 738040
    },
    {
      "epoch": 1.2078513776241628,
      "grad_norm": 0.6294191479682922,
      "learning_rate": 7.63427185809453e-06,
      "loss": 0.0144,
      "step": 738060
    },
    {
      "epoch": 1.2078841080628162,
      "grad_norm": 2.642449378967285,
      "learning_rate": 7.634205965881012e-06,
      "loss": 0.034,
      "step": 738080
    },
    {
      "epoch": 1.2079168385014696,
      "grad_norm": 1.5995508432388306,
      "learning_rate": 7.634140073667495e-06,
      "loss": 0.0195,
      "step": 738100
    },
    {
      "epoch": 1.207949568940123,
      "grad_norm": 0.6323938965797424,
      "learning_rate": 7.634074181453979e-06,
      "loss": 0.02,
      "step": 738120
    },
    {
      "epoch": 1.2079822993787763,
      "grad_norm": 0.8045361042022705,
      "learning_rate": 7.63400828924046e-06,
      "loss": 0.023,
      "step": 738140
    },
    {
      "epoch": 1.2080150298174297,
      "grad_norm": 0.2346612960100174,
      "learning_rate": 7.633942397026944e-06,
      "loss": 0.0303,
      "step": 738160
    },
    {
      "epoch": 1.208047760256083,
      "grad_norm": 0.6614767909049988,
      "learning_rate": 7.633876504813426e-06,
      "loss": 0.0244,
      "step": 738180
    },
    {
      "epoch": 1.2080804906947362,
      "grad_norm": 1.954768419265747,
      "learning_rate": 7.63381061259991e-06,
      "loss": 0.0305,
      "step": 738200
    },
    {
      "epoch": 1.2081132211333896,
      "grad_norm": 0.2397894710302353,
      "learning_rate": 7.633744720386392e-06,
      "loss": 0.0218,
      "step": 738220
    },
    {
      "epoch": 1.208145951572043,
      "grad_norm": 0.7156565189361572,
      "learning_rate": 7.633678828172875e-06,
      "loss": 0.0159,
      "step": 738240
    },
    {
      "epoch": 1.2081786820106963,
      "grad_norm": 0.8502345085144043,
      "learning_rate": 7.633612935959357e-06,
      "loss": 0.0339,
      "step": 738260
    },
    {
      "epoch": 1.2082114124493497,
      "grad_norm": 0.9579153656959534,
      "learning_rate": 7.63354704374584e-06,
      "loss": 0.0217,
      "step": 738280
    },
    {
      "epoch": 1.208244142888003,
      "grad_norm": 1.1388282775878906,
      "learning_rate": 7.633481151532324e-06,
      "loss": 0.0199,
      "step": 738300
    },
    {
      "epoch": 1.2082768733266562,
      "grad_norm": 0.6729111075401306,
      "learning_rate": 7.633415259318806e-06,
      "loss": 0.0293,
      "step": 738320
    },
    {
      "epoch": 1.2083096037653096,
      "grad_norm": 0.6625391840934753,
      "learning_rate": 7.63334936710529e-06,
      "loss": 0.0168,
      "step": 738340
    },
    {
      "epoch": 1.208342334203963,
      "grad_norm": 1.43962824344635,
      "learning_rate": 7.633283474891773e-06,
      "loss": 0.0145,
      "step": 738360
    },
    {
      "epoch": 1.2083750646426163,
      "grad_norm": 1.813232421875,
      "learning_rate": 7.633217582678255e-06,
      "loss": 0.026,
      "step": 738380
    },
    {
      "epoch": 1.2084077950812697,
      "grad_norm": 0.7720286846160889,
      "learning_rate": 7.633151690464739e-06,
      "loss": 0.0222,
      "step": 738400
    },
    {
      "epoch": 1.208440525519923,
      "grad_norm": 0.3766004145145416,
      "learning_rate": 7.63308579825122e-06,
      "loss": 0.0189,
      "step": 738420
    },
    {
      "epoch": 1.2084732559585762,
      "grad_norm": 0.4236370623111725,
      "learning_rate": 7.633019906037704e-06,
      "loss": 0.0176,
      "step": 738440
    },
    {
      "epoch": 1.2085059863972296,
      "grad_norm": 0.14992238581180573,
      "learning_rate": 7.632954013824188e-06,
      "loss": 0.0147,
      "step": 738460
    },
    {
      "epoch": 1.208538716835883,
      "grad_norm": 0.14112377166748047,
      "learning_rate": 7.63288812161067e-06,
      "loss": 0.0166,
      "step": 738480
    },
    {
      "epoch": 1.2085714472745364,
      "grad_norm": 0.39473479986190796,
      "learning_rate": 7.632822229397153e-06,
      "loss": 0.0174,
      "step": 738500
    },
    {
      "epoch": 1.2086041777131897,
      "grad_norm": 1.0938713550567627,
      "learning_rate": 7.632756337183635e-06,
      "loss": 0.0194,
      "step": 738520
    },
    {
      "epoch": 1.208636908151843,
      "grad_norm": 0.26070544123649597,
      "learning_rate": 7.632690444970119e-06,
      "loss": 0.0264,
      "step": 738540
    },
    {
      "epoch": 1.2086696385904965,
      "grad_norm": 1.0379084348678589,
      "learning_rate": 7.632624552756601e-06,
      "loss": 0.0284,
      "step": 738560
    },
    {
      "epoch": 1.2087023690291496,
      "grad_norm": 1.1366467475891113,
      "learning_rate": 7.632558660543084e-06,
      "loss": 0.0282,
      "step": 738580
    },
    {
      "epoch": 1.208735099467803,
      "grad_norm": 3.301111936569214,
      "learning_rate": 7.632492768329566e-06,
      "loss": 0.0293,
      "step": 738600
    },
    {
      "epoch": 1.2087678299064564,
      "grad_norm": 0.8075236082077026,
      "learning_rate": 7.63242687611605e-06,
      "loss": 0.0209,
      "step": 738620
    },
    {
      "epoch": 1.2088005603451097,
      "grad_norm": 0.3638513386249542,
      "learning_rate": 7.632360983902532e-06,
      "loss": 0.0158,
      "step": 738640
    },
    {
      "epoch": 1.2088332907837631,
      "grad_norm": 0.3245485723018646,
      "learning_rate": 7.632295091689015e-06,
      "loss": 0.0195,
      "step": 738660
    },
    {
      "epoch": 1.2088660212224165,
      "grad_norm": 0.22171084582805634,
      "learning_rate": 7.632229199475499e-06,
      "loss": 0.0253,
      "step": 738680
    },
    {
      "epoch": 1.2088987516610699,
      "grad_norm": 0.8205947875976562,
      "learning_rate": 7.632163307261981e-06,
      "loss": 0.0199,
      "step": 738700
    },
    {
      "epoch": 1.208931482099723,
      "grad_norm": 1.324125051498413,
      "learning_rate": 7.632097415048464e-06,
      "loss": 0.0193,
      "step": 738720
    },
    {
      "epoch": 1.2089642125383764,
      "grad_norm": 0.17779508233070374,
      "learning_rate": 7.632031522834948e-06,
      "loss": 0.0257,
      "step": 738740
    },
    {
      "epoch": 1.2089969429770298,
      "grad_norm": 0.7196083068847656,
      "learning_rate": 7.63196563062143e-06,
      "loss": 0.0207,
      "step": 738760
    },
    {
      "epoch": 1.2090296734156831,
      "grad_norm": 0.9798340201377869,
      "learning_rate": 7.631899738407914e-06,
      "loss": 0.0199,
      "step": 738780
    },
    {
      "epoch": 1.2090624038543365,
      "grad_norm": 0.7714635729789734,
      "learning_rate": 7.631833846194397e-06,
      "loss": 0.0271,
      "step": 738800
    },
    {
      "epoch": 1.2090951342929899,
      "grad_norm": 0.6993668675422668,
      "learning_rate": 7.631767953980879e-06,
      "loss": 0.0258,
      "step": 738820
    },
    {
      "epoch": 1.2091278647316432,
      "grad_norm": 6.86199951171875,
      "learning_rate": 7.631702061767363e-06,
      "loss": 0.0249,
      "step": 738840
    },
    {
      "epoch": 1.2091605951702964,
      "grad_norm": 0.14154678583145142,
      "learning_rate": 7.631636169553844e-06,
      "loss": 0.0229,
      "step": 738860
    },
    {
      "epoch": 1.2091933256089498,
      "grad_norm": 0.6579415798187256,
      "learning_rate": 7.631570277340328e-06,
      "loss": 0.0245,
      "step": 738880
    },
    {
      "epoch": 1.2092260560476031,
      "grad_norm": 0.17949457466602325,
      "learning_rate": 7.63150438512681e-06,
      "loss": 0.0294,
      "step": 738900
    },
    {
      "epoch": 1.2092587864862565,
      "grad_norm": 0.6981064081192017,
      "learning_rate": 7.631438492913294e-06,
      "loss": 0.0195,
      "step": 738920
    },
    {
      "epoch": 1.2092915169249099,
      "grad_norm": 0.5511342287063599,
      "learning_rate": 7.631372600699775e-06,
      "loss": 0.0258,
      "step": 738940
    },
    {
      "epoch": 1.2093242473635633,
      "grad_norm": 0.6270087361335754,
      "learning_rate": 7.631306708486259e-06,
      "loss": 0.0238,
      "step": 738960
    },
    {
      "epoch": 1.2093569778022166,
      "grad_norm": 0.5938320755958557,
      "learning_rate": 7.631240816272741e-06,
      "loss": 0.0265,
      "step": 738980
    },
    {
      "epoch": 1.2093897082408698,
      "grad_norm": 1.044899821281433,
      "learning_rate": 7.631174924059225e-06,
      "loss": 0.0281,
      "step": 739000
    },
    {
      "epoch": 1.2094224386795231,
      "grad_norm": 0.7143088579177856,
      "learning_rate": 7.631109031845706e-06,
      "loss": 0.0299,
      "step": 739020
    },
    {
      "epoch": 1.2094551691181765,
      "grad_norm": 1.5568372011184692,
      "learning_rate": 7.63104313963219e-06,
      "loss": 0.023,
      "step": 739040
    },
    {
      "epoch": 1.20948789955683,
      "grad_norm": 0.8325899243354797,
      "learning_rate": 7.630977247418674e-06,
      "loss": 0.0247,
      "step": 739060
    },
    {
      "epoch": 1.2095206299954833,
      "grad_norm": 1.2495062351226807,
      "learning_rate": 7.630911355205155e-06,
      "loss": 0.0266,
      "step": 739080
    },
    {
      "epoch": 1.2095533604341364,
      "grad_norm": 1.0680596828460693,
      "learning_rate": 7.630845462991639e-06,
      "loss": 0.0249,
      "step": 739100
    },
    {
      "epoch": 1.2095860908727898,
      "grad_norm": 0.7111552953720093,
      "learning_rate": 7.630779570778123e-06,
      "loss": 0.0322,
      "step": 739120
    },
    {
      "epoch": 1.2096188213114432,
      "grad_norm": 0.6501309275627136,
      "learning_rate": 7.630713678564605e-06,
      "loss": 0.0277,
      "step": 739140
    },
    {
      "epoch": 1.2096515517500965,
      "grad_norm": 0.2723461389541626,
      "learning_rate": 7.630647786351088e-06,
      "loss": 0.0193,
      "step": 739160
    },
    {
      "epoch": 1.20968428218875,
      "grad_norm": 0.2839517891407013,
      "learning_rate": 7.630581894137572e-06,
      "loss": 0.0246,
      "step": 739180
    },
    {
      "epoch": 1.2097170126274033,
      "grad_norm": 0.2548787295818329,
      "learning_rate": 7.630516001924054e-06,
      "loss": 0.0161,
      "step": 739200
    },
    {
      "epoch": 1.2097497430660566,
      "grad_norm": 1.5334935188293457,
      "learning_rate": 7.630450109710537e-06,
      "loss": 0.0214,
      "step": 739220
    },
    {
      "epoch": 1.2097824735047098,
      "grad_norm": 0.4446793496608734,
      "learning_rate": 7.630384217497019e-06,
      "loss": 0.0164,
      "step": 739240
    },
    {
      "epoch": 1.2098152039433632,
      "grad_norm": 0.16264352202415466,
      "learning_rate": 7.630318325283503e-06,
      "loss": 0.0178,
      "step": 739260
    },
    {
      "epoch": 1.2098479343820165,
      "grad_norm": 0.2164740115404129,
      "learning_rate": 7.630252433069985e-06,
      "loss": 0.0151,
      "step": 739280
    },
    {
      "epoch": 1.20988066482067,
      "grad_norm": 0.6573557257652283,
      "learning_rate": 7.630186540856468e-06,
      "loss": 0.029,
      "step": 739300
    },
    {
      "epoch": 1.2099133952593233,
      "grad_norm": 0.7016825079917908,
      "learning_rate": 7.63012064864295e-06,
      "loss": 0.018,
      "step": 739320
    },
    {
      "epoch": 1.2099461256979767,
      "grad_norm": 0.5361149907112122,
      "learning_rate": 7.630054756429434e-06,
      "loss": 0.0227,
      "step": 739340
    },
    {
      "epoch": 1.20997885613663,
      "grad_norm": 0.5067222118377686,
      "learning_rate": 7.629988864215916e-06,
      "loss": 0.0284,
      "step": 739360
    },
    {
      "epoch": 1.2100115865752832,
      "grad_norm": 0.3643532395362854,
      "learning_rate": 7.629922972002399e-06,
      "loss": 0.0228,
      "step": 739380
    },
    {
      "epoch": 1.2100443170139366,
      "grad_norm": 0.5227733850479126,
      "learning_rate": 7.629857079788881e-06,
      "loss": 0.0203,
      "step": 739400
    },
    {
      "epoch": 1.21007704745259,
      "grad_norm": 1.6947423219680786,
      "learning_rate": 7.629791187575365e-06,
      "loss": 0.0209,
      "step": 739420
    },
    {
      "epoch": 1.2101097778912433,
      "grad_norm": 1.5469632148742676,
      "learning_rate": 7.629725295361846e-06,
      "loss": 0.0295,
      "step": 739440
    },
    {
      "epoch": 1.2101425083298967,
      "grad_norm": 0.4671403169631958,
      "learning_rate": 7.62965940314833e-06,
      "loss": 0.0142,
      "step": 739460
    },
    {
      "epoch": 1.21017523876855,
      "grad_norm": 0.9829900860786438,
      "learning_rate": 7.629593510934814e-06,
      "loss": 0.0307,
      "step": 739480
    },
    {
      "epoch": 1.2102079692072034,
      "grad_norm": 0.19781085848808289,
      "learning_rate": 7.629527618721296e-06,
      "loss": 0.0207,
      "step": 739500
    },
    {
      "epoch": 1.2102406996458566,
      "grad_norm": 0.6497361660003662,
      "learning_rate": 7.629461726507779e-06,
      "loss": 0.0208,
      "step": 739520
    },
    {
      "epoch": 1.21027343008451,
      "grad_norm": 0.6812572479248047,
      "learning_rate": 7.629395834294263e-06,
      "loss": 0.0291,
      "step": 739540
    },
    {
      "epoch": 1.2103061605231633,
      "grad_norm": 0.31337299942970276,
      "learning_rate": 7.6293299420807455e-06,
      "loss": 0.018,
      "step": 739560
    },
    {
      "epoch": 1.2103388909618167,
      "grad_norm": 0.2464531660079956,
      "learning_rate": 7.629264049867227e-06,
      "loss": 0.0276,
      "step": 739580
    },
    {
      "epoch": 1.21037162140047,
      "grad_norm": 1.007065773010254,
      "learning_rate": 7.629198157653711e-06,
      "loss": 0.026,
      "step": 739600
    },
    {
      "epoch": 1.2104043518391234,
      "grad_norm": 0.7068825364112854,
      "learning_rate": 7.629132265440194e-06,
      "loss": 0.0269,
      "step": 739620
    },
    {
      "epoch": 1.2104370822777768,
      "grad_norm": 1.3464967012405396,
      "learning_rate": 7.6290663732266764e-06,
      "loss": 0.0241,
      "step": 739640
    },
    {
      "epoch": 1.21046981271643,
      "grad_norm": 0.26738932728767395,
      "learning_rate": 7.629000481013159e-06,
      "loss": 0.0218,
      "step": 739660
    },
    {
      "epoch": 1.2105025431550833,
      "grad_norm": 1.056000828742981,
      "learning_rate": 7.628934588799643e-06,
      "loss": 0.0226,
      "step": 739680
    },
    {
      "epoch": 1.2105352735937367,
      "grad_norm": 0.6096124053001404,
      "learning_rate": 7.628868696586125e-06,
      "loss": 0.024,
      "step": 739700
    },
    {
      "epoch": 1.21056800403239,
      "grad_norm": 0.22041574120521545,
      "learning_rate": 7.628802804372608e-06,
      "loss": 0.0309,
      "step": 739720
    },
    {
      "epoch": 1.2106007344710434,
      "grad_norm": 0.3765658140182495,
      "learning_rate": 7.62873691215909e-06,
      "loss": 0.03,
      "step": 739740
    },
    {
      "epoch": 1.2106334649096968,
      "grad_norm": 0.5710854530334473,
      "learning_rate": 7.628671019945574e-06,
      "loss": 0.0217,
      "step": 739760
    },
    {
      "epoch": 1.2106661953483502,
      "grad_norm": 0.1970667690038681,
      "learning_rate": 7.6286051277320565e-06,
      "loss": 0.0164,
      "step": 739780
    },
    {
      "epoch": 1.2106989257870033,
      "grad_norm": 0.32119977474212646,
      "learning_rate": 7.628539235518539e-06,
      "loss": 0.0303,
      "step": 739800
    },
    {
      "epoch": 1.2107316562256567,
      "grad_norm": 0.972160279750824,
      "learning_rate": 7.628473343305022e-06,
      "loss": 0.024,
      "step": 739820
    },
    {
      "epoch": 1.21076438666431,
      "grad_norm": 1.224299430847168,
      "learning_rate": 7.6284074510915055e-06,
      "loss": 0.022,
      "step": 739840
    },
    {
      "epoch": 1.2107971171029634,
      "grad_norm": 0.5424677133560181,
      "learning_rate": 7.628341558877988e-06,
      "loss": 0.0281,
      "step": 739860
    },
    {
      "epoch": 1.2108298475416168,
      "grad_norm": 0.08463001251220703,
      "learning_rate": 7.628275666664471e-06,
      "loss": 0.0254,
      "step": 739880
    },
    {
      "epoch": 1.21086257798027,
      "grad_norm": 0.3932042121887207,
      "learning_rate": 7.628209774450955e-06,
      "loss": 0.0208,
      "step": 739900
    },
    {
      "epoch": 1.2108953084189233,
      "grad_norm": 0.6114266514778137,
      "learning_rate": 7.6281438822374365e-06,
      "loss": 0.0261,
      "step": 739920
    },
    {
      "epoch": 1.2109280388575767,
      "grad_norm": 0.2165881246328354,
      "learning_rate": 7.62807799002392e-06,
      "loss": 0.0205,
      "step": 739940
    },
    {
      "epoch": 1.21096076929623,
      "grad_norm": 0.10165659338235855,
      "learning_rate": 7.628012097810402e-06,
      "loss": 0.0214,
      "step": 739960
    },
    {
      "epoch": 1.2109934997348835,
      "grad_norm": 0.7431460618972778,
      "learning_rate": 7.6279462055968855e-06,
      "loss": 0.0209,
      "step": 739980
    },
    {
      "epoch": 1.2110262301735368,
      "grad_norm": 0.8057875633239746,
      "learning_rate": 7.627880313383368e-06,
      "loss": 0.0129,
      "step": 740000
    },
    {
      "epoch": 1.2110589606121902,
      "grad_norm": 0.44863593578338623,
      "learning_rate": 7.627814421169851e-06,
      "loss": 0.0244,
      "step": 740020
    },
    {
      "epoch": 1.2110916910508434,
      "grad_norm": 0.3387015461921692,
      "learning_rate": 7.627748528956334e-06,
      "loss": 0.0186,
      "step": 740040
    },
    {
      "epoch": 1.2111244214894967,
      "grad_norm": 0.9230816960334778,
      "learning_rate": 7.627682636742817e-06,
      "loss": 0.0229,
      "step": 740060
    },
    {
      "epoch": 1.21115715192815,
      "grad_norm": 0.27367860078811646,
      "learning_rate": 7.627616744529299e-06,
      "loss": 0.0245,
      "step": 740080
    },
    {
      "epoch": 1.2111898823668035,
      "grad_norm": 1.2173774242401123,
      "learning_rate": 7.627550852315783e-06,
      "loss": 0.0229,
      "step": 740100
    },
    {
      "epoch": 1.2112226128054568,
      "grad_norm": 0.5406513810157776,
      "learning_rate": 7.627484960102265e-06,
      "loss": 0.0191,
      "step": 740120
    },
    {
      "epoch": 1.2112553432441102,
      "grad_norm": 0.5701717138290405,
      "learning_rate": 7.627419067888748e-06,
      "loss": 0.02,
      "step": 740140
    },
    {
      "epoch": 1.2112880736827636,
      "grad_norm": 0.6498425602912903,
      "learning_rate": 7.62735317567523e-06,
      "loss": 0.0204,
      "step": 740160
    },
    {
      "epoch": 1.2113208041214167,
      "grad_norm": 0.5899577140808105,
      "learning_rate": 7.627287283461714e-06,
      "loss": 0.0224,
      "step": 740180
    },
    {
      "epoch": 1.21135353456007,
      "grad_norm": 0.23718790709972382,
      "learning_rate": 7.6272213912481965e-06,
      "loss": 0.0235,
      "step": 740200
    },
    {
      "epoch": 1.2113862649987235,
      "grad_norm": 0.7347152233123779,
      "learning_rate": 7.627155499034679e-06,
      "loss": 0.0206,
      "step": 740220
    },
    {
      "epoch": 1.2114189954373769,
      "grad_norm": 0.266421377658844,
      "learning_rate": 7.627089606821163e-06,
      "loss": 0.0357,
      "step": 740240
    },
    {
      "epoch": 1.2114517258760302,
      "grad_norm": 0.26438838243484497,
      "learning_rate": 7.627023714607646e-06,
      "loss": 0.0139,
      "step": 740260
    },
    {
      "epoch": 1.2114844563146836,
      "grad_norm": 1.458693504333496,
      "learning_rate": 7.626957822394128e-06,
      "loss": 0.0159,
      "step": 740280
    },
    {
      "epoch": 1.211517186753337,
      "grad_norm": 1.4435136318206787,
      "learning_rate": 7.626891930180611e-06,
      "loss": 0.0263,
      "step": 740300
    },
    {
      "epoch": 1.2115499171919901,
      "grad_norm": 0.7429193258285522,
      "learning_rate": 7.626826037967095e-06,
      "loss": 0.0244,
      "step": 740320
    },
    {
      "epoch": 1.2115826476306435,
      "grad_norm": 3.2054202556610107,
      "learning_rate": 7.6267601457535766e-06,
      "loss": 0.0241,
      "step": 740340
    },
    {
      "epoch": 1.2116153780692969,
      "grad_norm": 0.47042298316955566,
      "learning_rate": 7.62669425354006e-06,
      "loss": 0.0229,
      "step": 740360
    },
    {
      "epoch": 1.2116481085079502,
      "grad_norm": 1.6631582975387573,
      "learning_rate": 7.626628361326542e-06,
      "loss": 0.0219,
      "step": 740380
    },
    {
      "epoch": 1.2116808389466036,
      "grad_norm": 0.767626941204071,
      "learning_rate": 7.626562469113026e-06,
      "loss": 0.0254,
      "step": 740400
    },
    {
      "epoch": 1.211713569385257,
      "grad_norm": 0.6942151784896851,
      "learning_rate": 7.626496576899508e-06,
      "loss": 0.0248,
      "step": 740420
    },
    {
      "epoch": 1.2117462998239104,
      "grad_norm": 0.37248972058296204,
      "learning_rate": 7.626430684685991e-06,
      "loss": 0.0162,
      "step": 740440
    },
    {
      "epoch": 1.2117790302625635,
      "grad_norm": 0.34668681025505066,
      "learning_rate": 7.626364792472474e-06,
      "loss": 0.0234,
      "step": 740460
    },
    {
      "epoch": 1.2118117607012169,
      "grad_norm": 0.6702529788017273,
      "learning_rate": 7.6262989002589574e-06,
      "loss": 0.0171,
      "step": 740480
    },
    {
      "epoch": 1.2118444911398703,
      "grad_norm": 0.6085187792778015,
      "learning_rate": 7.626233008045439e-06,
      "loss": 0.0287,
      "step": 740500
    },
    {
      "epoch": 1.2118772215785236,
      "grad_norm": 0.678303062915802,
      "learning_rate": 7.626167115831923e-06,
      "loss": 0.0263,
      "step": 740520
    },
    {
      "epoch": 1.211909952017177,
      "grad_norm": 0.9753071069717407,
      "learning_rate": 7.626101223618405e-06,
      "loss": 0.0274,
      "step": 740540
    },
    {
      "epoch": 1.2119426824558304,
      "grad_norm": 1.8081316947937012,
      "learning_rate": 7.626035331404888e-06,
      "loss": 0.0301,
      "step": 740560
    },
    {
      "epoch": 1.2119754128944835,
      "grad_norm": 0.5502423048019409,
      "learning_rate": 7.625969439191372e-06,
      "loss": 0.0218,
      "step": 740580
    },
    {
      "epoch": 1.212008143333137,
      "grad_norm": 0.548379123210907,
      "learning_rate": 7.625903546977854e-06,
      "loss": 0.0185,
      "step": 740600
    },
    {
      "epoch": 1.2120408737717903,
      "grad_norm": 0.3411314785480499,
      "learning_rate": 7.6258376547643375e-06,
      "loss": 0.0239,
      "step": 740620
    },
    {
      "epoch": 1.2120736042104436,
      "grad_norm": 1.045155644416809,
      "learning_rate": 7.62577176255082e-06,
      "loss": 0.0222,
      "step": 740640
    },
    {
      "epoch": 1.212106334649097,
      "grad_norm": 2.1978156566619873,
      "learning_rate": 7.625705870337303e-06,
      "loss": 0.0314,
      "step": 740660
    },
    {
      "epoch": 1.2121390650877504,
      "grad_norm": 1.2510902881622314,
      "learning_rate": 7.625639978123786e-06,
      "loss": 0.021,
      "step": 740680
    },
    {
      "epoch": 1.2121717955264035,
      "grad_norm": 0.48432451486587524,
      "learning_rate": 7.625574085910269e-06,
      "loss": 0.0315,
      "step": 740700
    },
    {
      "epoch": 1.212204525965057,
      "grad_norm": 0.4559575021266937,
      "learning_rate": 7.625508193696751e-06,
      "loss": 0.0227,
      "step": 740720
    },
    {
      "epoch": 1.2122372564037103,
      "grad_norm": 0.047444526106119156,
      "learning_rate": 7.625442301483235e-06,
      "loss": 0.0205,
      "step": 740740
    },
    {
      "epoch": 1.2122699868423636,
      "grad_norm": 0.33849892020225525,
      "learning_rate": 7.625376409269717e-06,
      "loss": 0.025,
      "step": 740760
    },
    {
      "epoch": 1.212302717281017,
      "grad_norm": 0.23181544244289398,
      "learning_rate": 7.6253105170562e-06,
      "loss": 0.0223,
      "step": 740780
    },
    {
      "epoch": 1.2123354477196704,
      "grad_norm": 0.3304087817668915,
      "learning_rate": 7.625244624842683e-06,
      "loss": 0.0243,
      "step": 740800
    },
    {
      "epoch": 1.2123681781583238,
      "grad_norm": 0.2242894172668457,
      "learning_rate": 7.625178732629166e-06,
      "loss": 0.0163,
      "step": 740820
    },
    {
      "epoch": 1.212400908596977,
      "grad_norm": 0.39618608355522156,
      "learning_rate": 7.6251128404156484e-06,
      "loss": 0.0235,
      "step": 740840
    },
    {
      "epoch": 1.2124336390356303,
      "grad_norm": 1.0343858003616333,
      "learning_rate": 7.625046948202132e-06,
      "loss": 0.0242,
      "step": 740860
    },
    {
      "epoch": 1.2124663694742837,
      "grad_norm": 1.4545797109603882,
      "learning_rate": 7.624981055988614e-06,
      "loss": 0.0238,
      "step": 740880
    },
    {
      "epoch": 1.212499099912937,
      "grad_norm": 0.5135471224784851,
      "learning_rate": 7.6249151637750975e-06,
      "loss": 0.0254,
      "step": 740900
    },
    {
      "epoch": 1.2125318303515904,
      "grad_norm": 0.6165875196456909,
      "learning_rate": 7.624849271561581e-06,
      "loss": 0.0191,
      "step": 740920
    },
    {
      "epoch": 1.2125645607902438,
      "grad_norm": 0.6699402928352356,
      "learning_rate": 7.624783379348063e-06,
      "loss": 0.0264,
      "step": 740940
    },
    {
      "epoch": 1.2125972912288971,
      "grad_norm": 0.598873496055603,
      "learning_rate": 7.6247174871345466e-06,
      "loss": 0.0216,
      "step": 740960
    },
    {
      "epoch": 1.2126300216675503,
      "grad_norm": 0.20531219244003296,
      "learning_rate": 7.6246515949210285e-06,
      "loss": 0.024,
      "step": 740980
    },
    {
      "epoch": 1.2126627521062037,
      "grad_norm": 0.5756280422210693,
      "learning_rate": 7.624585702707512e-06,
      "loss": 0.0216,
      "step": 741000
    },
    {
      "epoch": 1.212695482544857,
      "grad_norm": 0.7120456695556641,
      "learning_rate": 7.624519810493995e-06,
      "loss": 0.0289,
      "step": 741020
    },
    {
      "epoch": 1.2127282129835104,
      "grad_norm": 1.1888014078140259,
      "learning_rate": 7.6244539182804775e-06,
      "loss": 0.0266,
      "step": 741040
    },
    {
      "epoch": 1.2127609434221638,
      "grad_norm": 1.6376185417175293,
      "learning_rate": 7.62438802606696e-06,
      "loss": 0.0319,
      "step": 741060
    },
    {
      "epoch": 1.2127936738608172,
      "grad_norm": 0.41298648715019226,
      "learning_rate": 7.624322133853444e-06,
      "loss": 0.0179,
      "step": 741080
    },
    {
      "epoch": 1.2128264042994705,
      "grad_norm": 0.7262731790542603,
      "learning_rate": 7.624256241639926e-06,
      "loss": 0.0198,
      "step": 741100
    },
    {
      "epoch": 1.2128591347381237,
      "grad_norm": 0.921190083026886,
      "learning_rate": 7.624190349426409e-06,
      "loss": 0.0369,
      "step": 741120
    },
    {
      "epoch": 1.212891865176777,
      "grad_norm": 0.5723784565925598,
      "learning_rate": 7.624124457212891e-06,
      "loss": 0.0186,
      "step": 741140
    },
    {
      "epoch": 1.2129245956154304,
      "grad_norm": 0.5143327713012695,
      "learning_rate": 7.624058564999375e-06,
      "loss": 0.0167,
      "step": 741160
    },
    {
      "epoch": 1.2129573260540838,
      "grad_norm": 1.6984565258026123,
      "learning_rate": 7.623992672785857e-06,
      "loss": 0.0228,
      "step": 741180
    },
    {
      "epoch": 1.2129900564927372,
      "grad_norm": 1.487621784210205,
      "learning_rate": 7.62392678057234e-06,
      "loss": 0.0254,
      "step": 741200
    },
    {
      "epoch": 1.2130227869313905,
      "grad_norm": 0.6982818245887756,
      "learning_rate": 7.623860888358823e-06,
      "loss": 0.0256,
      "step": 741220
    },
    {
      "epoch": 1.213055517370044,
      "grad_norm": 0.6906751394271851,
      "learning_rate": 7.623794996145306e-06,
      "loss": 0.0228,
      "step": 741240
    },
    {
      "epoch": 1.213088247808697,
      "grad_norm": 0.17861098051071167,
      "learning_rate": 7.623729103931789e-06,
      "loss": 0.0224,
      "step": 741260
    },
    {
      "epoch": 1.2131209782473504,
      "grad_norm": 1.0387212038040161,
      "learning_rate": 7.623663211718272e-06,
      "loss": 0.0291,
      "step": 741280
    },
    {
      "epoch": 1.2131537086860038,
      "grad_norm": 0.44644615054130554,
      "learning_rate": 7.623597319504755e-06,
      "loss": 0.021,
      "step": 741300
    },
    {
      "epoch": 1.2131864391246572,
      "grad_norm": 0.4733949601650238,
      "learning_rate": 7.6235314272912376e-06,
      "loss": 0.0332,
      "step": 741320
    },
    {
      "epoch": 1.2132191695633106,
      "grad_norm": 0.4846639335155487,
      "learning_rate": 7.623465535077721e-06,
      "loss": 0.019,
      "step": 741340
    },
    {
      "epoch": 1.213251900001964,
      "grad_norm": 0.31631168723106384,
      "learning_rate": 7.623399642864203e-06,
      "loss": 0.0218,
      "step": 741360
    },
    {
      "epoch": 1.213284630440617,
      "grad_norm": 0.2748373746871948,
      "learning_rate": 7.623333750650687e-06,
      "loss": 0.0203,
      "step": 741380
    },
    {
      "epoch": 1.2133173608792704,
      "grad_norm": 0.5174952149391174,
      "learning_rate": 7.6232678584371685e-06,
      "loss": 0.0249,
      "step": 741400
    },
    {
      "epoch": 1.2133500913179238,
      "grad_norm": 0.37700390815734863,
      "learning_rate": 7.623201966223652e-06,
      "loss": 0.0139,
      "step": 741420
    },
    {
      "epoch": 1.2133828217565772,
      "grad_norm": 0.5124727487564087,
      "learning_rate": 7.623136074010135e-06,
      "loss": 0.0228,
      "step": 741440
    },
    {
      "epoch": 1.2134155521952306,
      "grad_norm": 0.36944299936294556,
      "learning_rate": 7.623070181796618e-06,
      "loss": 0.0212,
      "step": 741460
    },
    {
      "epoch": 1.213448282633884,
      "grad_norm": 0.2898242771625519,
      "learning_rate": 7.6230042895831e-06,
      "loss": 0.0255,
      "step": 741480
    },
    {
      "epoch": 1.213481013072537,
      "grad_norm": 1.1456189155578613,
      "learning_rate": 7.622938397369584e-06,
      "loss": 0.0309,
      "step": 741500
    },
    {
      "epoch": 1.2135137435111905,
      "grad_norm": 0.23888634145259857,
      "learning_rate": 7.622872505156066e-06,
      "loss": 0.0152,
      "step": 741520
    },
    {
      "epoch": 1.2135464739498438,
      "grad_norm": 0.1767825335264206,
      "learning_rate": 7.622806612942549e-06,
      "loss": 0.0187,
      "step": 741540
    },
    {
      "epoch": 1.2135792043884972,
      "grad_norm": 0.7096070051193237,
      "learning_rate": 7.622740720729031e-06,
      "loss": 0.0345,
      "step": 741560
    },
    {
      "epoch": 1.2136119348271506,
      "grad_norm": 0.2648938000202179,
      "learning_rate": 7.622674828515515e-06,
      "loss": 0.021,
      "step": 741580
    },
    {
      "epoch": 1.213644665265804,
      "grad_norm": 0.6027886867523193,
      "learning_rate": 7.622608936301998e-06,
      "loss": 0.0212,
      "step": 741600
    },
    {
      "epoch": 1.2136773957044573,
      "grad_norm": 0.43890124559402466,
      "learning_rate": 7.62254304408848e-06,
      "loss": 0.0277,
      "step": 741620
    },
    {
      "epoch": 1.2137101261431105,
      "grad_norm": 0.886885941028595,
      "learning_rate": 7.622477151874964e-06,
      "loss": 0.0237,
      "step": 741640
    },
    {
      "epoch": 1.2137428565817638,
      "grad_norm": 0.09615324437618256,
      "learning_rate": 7.622411259661447e-06,
      "loss": 0.0233,
      "step": 741660
    },
    {
      "epoch": 1.2137755870204172,
      "grad_norm": 0.22048571705818176,
      "learning_rate": 7.6223453674479294e-06,
      "loss": 0.0198,
      "step": 741680
    },
    {
      "epoch": 1.2138083174590706,
      "grad_norm": 0.348127156496048,
      "learning_rate": 7.622279475234412e-06,
      "loss": 0.0254,
      "step": 741700
    },
    {
      "epoch": 1.213841047897724,
      "grad_norm": 0.3406662046909332,
      "learning_rate": 7.622213583020896e-06,
      "loss": 0.0196,
      "step": 741720
    },
    {
      "epoch": 1.2138737783363773,
      "grad_norm": 0.3072676360607147,
      "learning_rate": 7.622147690807378e-06,
      "loss": 0.0189,
      "step": 741740
    },
    {
      "epoch": 1.2139065087750307,
      "grad_norm": 1.7028567790985107,
      "learning_rate": 7.622081798593861e-06,
      "loss": 0.0201,
      "step": 741760
    },
    {
      "epoch": 1.2139392392136839,
      "grad_norm": 1.1165854930877686,
      "learning_rate": 7.622015906380343e-06,
      "loss": 0.0261,
      "step": 741780
    },
    {
      "epoch": 1.2139719696523372,
      "grad_norm": 0.4324621856212616,
      "learning_rate": 7.621950014166827e-06,
      "loss": 0.0245,
      "step": 741800
    },
    {
      "epoch": 1.2140047000909906,
      "grad_norm": 0.9120320677757263,
      "learning_rate": 7.6218841219533095e-06,
      "loss": 0.0292,
      "step": 741820
    },
    {
      "epoch": 1.214037430529644,
      "grad_norm": 0.6634246110916138,
      "learning_rate": 7.621818229739792e-06,
      "loss": 0.0213,
      "step": 741840
    },
    {
      "epoch": 1.2140701609682973,
      "grad_norm": 0.8091892600059509,
      "learning_rate": 7.621752337526275e-06,
      "loss": 0.0236,
      "step": 741860
    },
    {
      "epoch": 1.2141028914069507,
      "grad_norm": 0.48435574769973755,
      "learning_rate": 7.6216864453127585e-06,
      "loss": 0.0217,
      "step": 741880
    },
    {
      "epoch": 1.214135621845604,
      "grad_norm": 0.7537267208099365,
      "learning_rate": 7.62162055309924e-06,
      "loss": 0.0279,
      "step": 741900
    },
    {
      "epoch": 1.2141683522842572,
      "grad_norm": 0.53401118516922,
      "learning_rate": 7.621554660885724e-06,
      "loss": 0.0266,
      "step": 741920
    },
    {
      "epoch": 1.2142010827229106,
      "grad_norm": 0.7057257890701294,
      "learning_rate": 7.621488768672206e-06,
      "loss": 0.0193,
      "step": 741940
    },
    {
      "epoch": 1.214233813161564,
      "grad_norm": 0.29714107513427734,
      "learning_rate": 7.6214228764586895e-06,
      "loss": 0.0245,
      "step": 741960
    },
    {
      "epoch": 1.2142665436002174,
      "grad_norm": 0.2658345401287079,
      "learning_rate": 7.621356984245173e-06,
      "loss": 0.022,
      "step": 741980
    },
    {
      "epoch": 1.2142992740388707,
      "grad_norm": 1.8097527027130127,
      "learning_rate": 7.621291092031655e-06,
      "loss": 0.023,
      "step": 742000
    },
    {
      "epoch": 1.214332004477524,
      "grad_norm": 0.2670014798641205,
      "learning_rate": 7.6212251998181385e-06,
      "loss": 0.0271,
      "step": 742020
    },
    {
      "epoch": 1.2143647349161775,
      "grad_norm": 1.7404959201812744,
      "learning_rate": 7.6211593076046204e-06,
      "loss": 0.0269,
      "step": 742040
    },
    {
      "epoch": 1.2143974653548306,
      "grad_norm": 0.5780378580093384,
      "learning_rate": 7.621093415391104e-06,
      "loss": 0.0221,
      "step": 742060
    },
    {
      "epoch": 1.214430195793484,
      "grad_norm": 0.5239108204841614,
      "learning_rate": 7.621027523177587e-06,
      "loss": 0.0247,
      "step": 742080
    },
    {
      "epoch": 1.2144629262321374,
      "grad_norm": 0.8638837933540344,
      "learning_rate": 7.62096163096407e-06,
      "loss": 0.0275,
      "step": 742100
    },
    {
      "epoch": 1.2144956566707907,
      "grad_norm": 0.6758864521980286,
      "learning_rate": 7.620895738750552e-06,
      "loss": 0.0307,
      "step": 742120
    },
    {
      "epoch": 1.214528387109444,
      "grad_norm": 1.291224718093872,
      "learning_rate": 7.620829846537036e-06,
      "loss": 0.0283,
      "step": 742140
    },
    {
      "epoch": 1.2145611175480973,
      "grad_norm": 0.3234364092350006,
      "learning_rate": 7.620763954323518e-06,
      "loss": 0.0223,
      "step": 742160
    },
    {
      "epoch": 1.2145938479867506,
      "grad_norm": 0.1272212117910385,
      "learning_rate": 7.620698062110001e-06,
      "loss": 0.0266,
      "step": 742180
    },
    {
      "epoch": 1.214626578425404,
      "grad_norm": 0.6019822955131531,
      "learning_rate": 7.620632169896483e-06,
      "loss": 0.023,
      "step": 742200
    },
    {
      "epoch": 1.2146593088640574,
      "grad_norm": 0.9422764182090759,
      "learning_rate": 7.620566277682967e-06,
      "loss": 0.0195,
      "step": 742220
    },
    {
      "epoch": 1.2146920393027107,
      "grad_norm": 0.3329228162765503,
      "learning_rate": 7.6205003854694495e-06,
      "loss": 0.0206,
      "step": 742240
    },
    {
      "epoch": 1.2147247697413641,
      "grad_norm": 0.6608591079711914,
      "learning_rate": 7.620434493255932e-06,
      "loss": 0.0252,
      "step": 742260
    },
    {
      "epoch": 1.2147575001800175,
      "grad_norm": 0.5552695989608765,
      "learning_rate": 7.620368601042415e-06,
      "loss": 0.0176,
      "step": 742280
    },
    {
      "epoch": 1.2147902306186706,
      "grad_norm": 0.5856403708457947,
      "learning_rate": 7.620302708828899e-06,
      "loss": 0.0238,
      "step": 742300
    },
    {
      "epoch": 1.214822961057324,
      "grad_norm": 0.7957179546356201,
      "learning_rate": 7.620236816615381e-06,
      "loss": 0.0207,
      "step": 742320
    },
    {
      "epoch": 1.2148556914959774,
      "grad_norm": 1.5114405155181885,
      "learning_rate": 7.620170924401864e-06,
      "loss": 0.0232,
      "step": 742340
    },
    {
      "epoch": 1.2148884219346308,
      "grad_norm": 0.655448317527771,
      "learning_rate": 7.620105032188348e-06,
      "loss": 0.019,
      "step": 742360
    },
    {
      "epoch": 1.2149211523732841,
      "grad_norm": 1.207574486732483,
      "learning_rate": 7.6200391399748296e-06,
      "loss": 0.0241,
      "step": 742380
    },
    {
      "epoch": 1.2149538828119375,
      "grad_norm": 0.9357373118400574,
      "learning_rate": 7.619973247761313e-06,
      "loss": 0.0281,
      "step": 742400
    },
    {
      "epoch": 1.2149866132505909,
      "grad_norm": 0.6126355528831482,
      "learning_rate": 7.619907355547795e-06,
      "loss": 0.0232,
      "step": 742420
    },
    {
      "epoch": 1.215019343689244,
      "grad_norm": 0.36841264367103577,
      "learning_rate": 7.619841463334279e-06,
      "loss": 0.0178,
      "step": 742440
    },
    {
      "epoch": 1.2150520741278974,
      "grad_norm": 1.60928213596344,
      "learning_rate": 7.619775571120761e-06,
      "loss": 0.0218,
      "step": 742460
    },
    {
      "epoch": 1.2150848045665508,
      "grad_norm": 0.5397390723228455,
      "learning_rate": 7.619709678907244e-06,
      "loss": 0.016,
      "step": 742480
    },
    {
      "epoch": 1.2151175350052041,
      "grad_norm": 0.38114872574806213,
      "learning_rate": 7.619643786693727e-06,
      "loss": 0.0212,
      "step": 742500
    },
    {
      "epoch": 1.2151502654438575,
      "grad_norm": 0.11656417697668076,
      "learning_rate": 7.6195778944802104e-06,
      "loss": 0.0211,
      "step": 742520
    },
    {
      "epoch": 1.2151829958825109,
      "grad_norm": 0.6576372385025024,
      "learning_rate": 7.619512002266692e-06,
      "loss": 0.0147,
      "step": 742540
    },
    {
      "epoch": 1.2152157263211643,
      "grad_norm": 18.835813522338867,
      "learning_rate": 7.619446110053176e-06,
      "loss": 0.0187,
      "step": 742560
    },
    {
      "epoch": 1.2152484567598174,
      "grad_norm": 0.5544798374176025,
      "learning_rate": 7.619380217839658e-06,
      "loss": 0.0213,
      "step": 742580
    },
    {
      "epoch": 1.2152811871984708,
      "grad_norm": 1.666565179824829,
      "learning_rate": 7.619314325626141e-06,
      "loss": 0.0305,
      "step": 742600
    },
    {
      "epoch": 1.2153139176371242,
      "grad_norm": 1.052851915359497,
      "learning_rate": 7.619248433412624e-06,
      "loss": 0.0223,
      "step": 742620
    },
    {
      "epoch": 1.2153466480757775,
      "grad_norm": 0.3870907425880432,
      "learning_rate": 7.619182541199107e-06,
      "loss": 0.0196,
      "step": 742640
    },
    {
      "epoch": 1.215379378514431,
      "grad_norm": 0.83418208360672,
      "learning_rate": 7.61911664898559e-06,
      "loss": 0.0183,
      "step": 742660
    },
    {
      "epoch": 1.2154121089530843,
      "grad_norm": 0.5531913042068481,
      "learning_rate": 7.619050756772073e-06,
      "loss": 0.0194,
      "step": 742680
    },
    {
      "epoch": 1.2154448393917376,
      "grad_norm": 0.540037214756012,
      "learning_rate": 7.618984864558556e-06,
      "loss": 0.0218,
      "step": 742700
    },
    {
      "epoch": 1.2154775698303908,
      "grad_norm": 1.9903545379638672,
      "learning_rate": 7.618918972345039e-06,
      "loss": 0.0183,
      "step": 742720
    },
    {
      "epoch": 1.2155103002690442,
      "grad_norm": 0.7514920830726624,
      "learning_rate": 7.618853080131522e-06,
      "loss": 0.02,
      "step": 742740
    },
    {
      "epoch": 1.2155430307076975,
      "grad_norm": 0.5979670286178589,
      "learning_rate": 7.618787187918004e-06,
      "loss": 0.0225,
      "step": 742760
    },
    {
      "epoch": 1.215575761146351,
      "grad_norm": 0.8257737159729004,
      "learning_rate": 7.618721295704488e-06,
      "loss": 0.0276,
      "step": 742780
    },
    {
      "epoch": 1.2156084915850043,
      "grad_norm": 0.4715922474861145,
      "learning_rate": 7.61865540349097e-06,
      "loss": 0.0262,
      "step": 742800
    },
    {
      "epoch": 1.2156412220236577,
      "grad_norm": 0.5245605111122131,
      "learning_rate": 7.618589511277453e-06,
      "loss": 0.0284,
      "step": 742820
    },
    {
      "epoch": 1.2156739524623108,
      "grad_norm": 0.6987601518630981,
      "learning_rate": 7.618523619063936e-06,
      "loss": 0.0213,
      "step": 742840
    },
    {
      "epoch": 1.2157066829009642,
      "grad_norm": 0.53973788022995,
      "learning_rate": 7.618457726850419e-06,
      "loss": 0.0237,
      "step": 742860
    },
    {
      "epoch": 1.2157394133396175,
      "grad_norm": 0.5998886227607727,
      "learning_rate": 7.6183918346369014e-06,
      "loss": 0.0267,
      "step": 742880
    },
    {
      "epoch": 1.215772143778271,
      "grad_norm": 1.1786361932754517,
      "learning_rate": 7.618325942423385e-06,
      "loss": 0.0209,
      "step": 742900
    },
    {
      "epoch": 1.2158048742169243,
      "grad_norm": 0.29619300365448,
      "learning_rate": 7.618260050209867e-06,
      "loss": 0.0181,
      "step": 742920
    },
    {
      "epoch": 1.2158376046555777,
      "grad_norm": 0.9503452181816101,
      "learning_rate": 7.6181941579963505e-06,
      "loss": 0.0239,
      "step": 742940
    },
    {
      "epoch": 1.2158703350942308,
      "grad_norm": 0.296278178691864,
      "learning_rate": 7.618128265782832e-06,
      "loss": 0.0204,
      "step": 742960
    },
    {
      "epoch": 1.2159030655328842,
      "grad_norm": 0.5782947540283203,
      "learning_rate": 7.618062373569316e-06,
      "loss": 0.0219,
      "step": 742980
    },
    {
      "epoch": 1.2159357959715376,
      "grad_norm": 0.39632490277290344,
      "learning_rate": 7.617996481355798e-06,
      "loss": 0.0212,
      "step": 743000
    },
    {
      "epoch": 1.215968526410191,
      "grad_norm": 0.16872546076774597,
      "learning_rate": 7.6179305891422815e-06,
      "loss": 0.0188,
      "step": 743020
    },
    {
      "epoch": 1.2160012568488443,
      "grad_norm": 1.3482372760772705,
      "learning_rate": 7.617864696928765e-06,
      "loss": 0.0238,
      "step": 743040
    },
    {
      "epoch": 1.2160339872874977,
      "grad_norm": 1.2605103254318237,
      "learning_rate": 7.617798804715247e-06,
      "loss": 0.0282,
      "step": 743060
    },
    {
      "epoch": 1.216066717726151,
      "grad_norm": 0.455746591091156,
      "learning_rate": 7.6177329125017305e-06,
      "loss": 0.0159,
      "step": 743080
    },
    {
      "epoch": 1.2160994481648042,
      "grad_norm": 0.9916481375694275,
      "learning_rate": 7.617667020288213e-06,
      "loss": 0.0322,
      "step": 743100
    },
    {
      "epoch": 1.2161321786034576,
      "grad_norm": 0.544128954410553,
      "learning_rate": 7.617601128074696e-06,
      "loss": 0.0215,
      "step": 743120
    },
    {
      "epoch": 1.216164909042111,
      "grad_norm": 0.5939510464668274,
      "learning_rate": 7.617535235861179e-06,
      "loss": 0.0194,
      "step": 743140
    },
    {
      "epoch": 1.2161976394807643,
      "grad_norm": 1.0232396125793457,
      "learning_rate": 7.617469343647662e-06,
      "loss": 0.0322,
      "step": 743160
    },
    {
      "epoch": 1.2162303699194177,
      "grad_norm": 0.1675989031791687,
      "learning_rate": 7.617403451434144e-06,
      "loss": 0.0197,
      "step": 743180
    },
    {
      "epoch": 1.216263100358071,
      "grad_norm": 1.1748312711715698,
      "learning_rate": 7.617337559220628e-06,
      "loss": 0.0211,
      "step": 743200
    },
    {
      "epoch": 1.2162958307967244,
      "grad_norm": 2.070300579071045,
      "learning_rate": 7.61727166700711e-06,
      "loss": 0.0227,
      "step": 743220
    },
    {
      "epoch": 1.2163285612353776,
      "grad_norm": 0.9803335666656494,
      "learning_rate": 7.617205774793593e-06,
      "loss": 0.0316,
      "step": 743240
    },
    {
      "epoch": 1.216361291674031,
      "grad_norm": 0.3678721785545349,
      "learning_rate": 7.617139882580076e-06,
      "loss": 0.0231,
      "step": 743260
    },
    {
      "epoch": 1.2163940221126843,
      "grad_norm": 0.22606545686721802,
      "learning_rate": 7.617073990366559e-06,
      "loss": 0.025,
      "step": 743280
    },
    {
      "epoch": 1.2164267525513377,
      "grad_norm": 0.5093849301338196,
      "learning_rate": 7.6170080981530415e-06,
      "loss": 0.0205,
      "step": 743300
    },
    {
      "epoch": 1.216459482989991,
      "grad_norm": 0.291659951210022,
      "learning_rate": 7.616942205939525e-06,
      "loss": 0.0278,
      "step": 743320
    },
    {
      "epoch": 1.2164922134286444,
      "grad_norm": 1.076598048210144,
      "learning_rate": 7.616876313726007e-06,
      "loss": 0.0265,
      "step": 743340
    },
    {
      "epoch": 1.2165249438672978,
      "grad_norm": 0.17865918576717377,
      "learning_rate": 7.616810421512491e-06,
      "loss": 0.0218,
      "step": 743360
    },
    {
      "epoch": 1.216557674305951,
      "grad_norm": 0.6727559566497803,
      "learning_rate": 7.616744529298974e-06,
      "loss": 0.0158,
      "step": 743380
    },
    {
      "epoch": 1.2165904047446043,
      "grad_norm": 0.12382268905639648,
      "learning_rate": 7.616678637085456e-06,
      "loss": 0.0203,
      "step": 743400
    },
    {
      "epoch": 1.2166231351832577,
      "grad_norm": 1.2655805349349976,
      "learning_rate": 7.61661274487194e-06,
      "loss": 0.0215,
      "step": 743420
    },
    {
      "epoch": 1.216655865621911,
      "grad_norm": 0.5359184145927429,
      "learning_rate": 7.6165468526584215e-06,
      "loss": 0.029,
      "step": 743440
    },
    {
      "epoch": 1.2166885960605645,
      "grad_norm": 0.2333003133535385,
      "learning_rate": 7.616480960444905e-06,
      "loss": 0.025,
      "step": 743460
    },
    {
      "epoch": 1.2167213264992178,
      "grad_norm": 1.3438420295715332,
      "learning_rate": 7.616415068231388e-06,
      "loss": 0.0231,
      "step": 743480
    },
    {
      "epoch": 1.2167540569378712,
      "grad_norm": 0.5552283525466919,
      "learning_rate": 7.616349176017871e-06,
      "loss": 0.0175,
      "step": 743500
    },
    {
      "epoch": 1.2167867873765243,
      "grad_norm": 0.493442177772522,
      "learning_rate": 7.616283283804353e-06,
      "loss": 0.0254,
      "step": 743520
    },
    {
      "epoch": 1.2168195178151777,
      "grad_norm": 0.5121836066246033,
      "learning_rate": 7.616217391590837e-06,
      "loss": 0.0212,
      "step": 743540
    },
    {
      "epoch": 1.216852248253831,
      "grad_norm": 1.240515112876892,
      "learning_rate": 7.616151499377319e-06,
      "loss": 0.0222,
      "step": 743560
    },
    {
      "epoch": 1.2168849786924845,
      "grad_norm": 0.32375282049179077,
      "learning_rate": 7.616085607163802e-06,
      "loss": 0.019,
      "step": 743580
    },
    {
      "epoch": 1.2169177091311378,
      "grad_norm": 0.19867397844791412,
      "learning_rate": 7.616019714950284e-06,
      "loss": 0.0207,
      "step": 743600
    },
    {
      "epoch": 1.2169504395697912,
      "grad_norm": 0.6364567279815674,
      "learning_rate": 7.615953822736768e-06,
      "loss": 0.034,
      "step": 743620
    },
    {
      "epoch": 1.2169831700084444,
      "grad_norm": 1.1318645477294922,
      "learning_rate": 7.615887930523251e-06,
      "loss": 0.0222,
      "step": 743640
    },
    {
      "epoch": 1.2170159004470977,
      "grad_norm": 0.4224409759044647,
      "learning_rate": 7.615822038309733e-06,
      "loss": 0.0201,
      "step": 743660
    },
    {
      "epoch": 1.217048630885751,
      "grad_norm": 1.236400842666626,
      "learning_rate": 7.615756146096216e-06,
      "loss": 0.0296,
      "step": 743680
    },
    {
      "epoch": 1.2170813613244045,
      "grad_norm": 0.7493568062782288,
      "learning_rate": 7.6156902538827e-06,
      "loss": 0.0255,
      "step": 743700
    },
    {
      "epoch": 1.2171140917630578,
      "grad_norm": 0.5386133790016174,
      "learning_rate": 7.6156243616691824e-06,
      "loss": 0.0185,
      "step": 743720
    },
    {
      "epoch": 1.2171468222017112,
      "grad_norm": 0.7843613028526306,
      "learning_rate": 7.615558469455665e-06,
      "loss": 0.0205,
      "step": 743740
    },
    {
      "epoch": 1.2171795526403644,
      "grad_norm": 0.4269907772541046,
      "learning_rate": 7.615492577242149e-06,
      "loss": 0.025,
      "step": 743760
    },
    {
      "epoch": 1.2172122830790177,
      "grad_norm": 0.9989655613899231,
      "learning_rate": 7.615426685028631e-06,
      "loss": 0.0286,
      "step": 743780
    },
    {
      "epoch": 1.2172450135176711,
      "grad_norm": 0.0930076539516449,
      "learning_rate": 7.615360792815114e-06,
      "loss": 0.0186,
      "step": 743800
    },
    {
      "epoch": 1.2172777439563245,
      "grad_norm": 0.28138431906700134,
      "learning_rate": 7.615294900601596e-06,
      "loss": 0.0255,
      "step": 743820
    },
    {
      "epoch": 1.2173104743949779,
      "grad_norm": 1.1075509786605835,
      "learning_rate": 7.61522900838808e-06,
      "loss": 0.0182,
      "step": 743840
    },
    {
      "epoch": 1.2173432048336312,
      "grad_norm": 0.27297696471214294,
      "learning_rate": 7.6151631161745625e-06,
      "loss": 0.0152,
      "step": 743860
    },
    {
      "epoch": 1.2173759352722846,
      "grad_norm": 0.9652689099311829,
      "learning_rate": 7.615097223961045e-06,
      "loss": 0.0265,
      "step": 743880
    },
    {
      "epoch": 1.2174086657109378,
      "grad_norm": 0.41698476672172546,
      "learning_rate": 7.615031331747528e-06,
      "loss": 0.0181,
      "step": 743900
    },
    {
      "epoch": 1.2174413961495911,
      "grad_norm": 0.5320033431053162,
      "learning_rate": 7.6149654395340115e-06,
      "loss": 0.0277,
      "step": 743920
    },
    {
      "epoch": 1.2174741265882445,
      "grad_norm": 0.5870020389556885,
      "learning_rate": 7.6148995473204934e-06,
      "loss": 0.0253,
      "step": 743940
    },
    {
      "epoch": 1.2175068570268979,
      "grad_norm": 0.4318370223045349,
      "learning_rate": 7.614833655106977e-06,
      "loss": 0.0187,
      "step": 743960
    },
    {
      "epoch": 1.2175395874655512,
      "grad_norm": 0.42742979526519775,
      "learning_rate": 7.614767762893459e-06,
      "loss": 0.0326,
      "step": 743980
    },
    {
      "epoch": 1.2175723179042046,
      "grad_norm": 0.5435107350349426,
      "learning_rate": 7.6147018706799425e-06,
      "loss": 0.0233,
      "step": 744000
    },
    {
      "epoch": 1.217605048342858,
      "grad_norm": 1.5597904920578003,
      "learning_rate": 7.614635978466424e-06,
      "loss": 0.0213,
      "step": 744020
    },
    {
      "epoch": 1.2176377787815111,
      "grad_norm": 0.5387693643569946,
      "learning_rate": 7.614570086252908e-06,
      "loss": 0.0392,
      "step": 744040
    },
    {
      "epoch": 1.2176705092201645,
      "grad_norm": 0.3971322774887085,
      "learning_rate": 7.614504194039391e-06,
      "loss": 0.0237,
      "step": 744060
    },
    {
      "epoch": 1.2177032396588179,
      "grad_norm": 0.9285318851470947,
      "learning_rate": 7.6144383018258734e-06,
      "loss": 0.0247,
      "step": 744080
    },
    {
      "epoch": 1.2177359700974713,
      "grad_norm": 0.24909387528896332,
      "learning_rate": 7.614372409612357e-06,
      "loss": 0.0181,
      "step": 744100
    },
    {
      "epoch": 1.2177687005361246,
      "grad_norm": 0.2020719349384308,
      "learning_rate": 7.61430651739884e-06,
      "loss": 0.0183,
      "step": 744120
    },
    {
      "epoch": 1.217801430974778,
      "grad_norm": 0.4945096969604492,
      "learning_rate": 7.6142406251853225e-06,
      "loss": 0.0151,
      "step": 744140
    },
    {
      "epoch": 1.2178341614134314,
      "grad_norm": 0.647734522819519,
      "learning_rate": 7.614174732971805e-06,
      "loss": 0.0202,
      "step": 744160
    },
    {
      "epoch": 1.2178668918520845,
      "grad_norm": 1.007306694984436,
      "learning_rate": 7.614108840758289e-06,
      "loss": 0.0261,
      "step": 744180
    },
    {
      "epoch": 1.217899622290738,
      "grad_norm": 0.5215222239494324,
      "learning_rate": 7.614042948544771e-06,
      "loss": 0.0152,
      "step": 744200
    },
    {
      "epoch": 1.2179323527293913,
      "grad_norm": 0.1631106734275818,
      "learning_rate": 7.613977056331254e-06,
      "loss": 0.0221,
      "step": 744220
    },
    {
      "epoch": 1.2179650831680446,
      "grad_norm": 0.20799094438552856,
      "learning_rate": 7.613911164117736e-06,
      "loss": 0.0154,
      "step": 744240
    },
    {
      "epoch": 1.217997813606698,
      "grad_norm": 0.5334733128547668,
      "learning_rate": 7.61384527190422e-06,
      "loss": 0.0374,
      "step": 744260
    },
    {
      "epoch": 1.2180305440453514,
      "grad_norm": 1.4548184871673584,
      "learning_rate": 7.6137793796907025e-06,
      "loss": 0.028,
      "step": 744280
    },
    {
      "epoch": 1.2180632744840048,
      "grad_norm": 0.7506095170974731,
      "learning_rate": 7.613713487477185e-06,
      "loss": 0.0229,
      "step": 744300
    },
    {
      "epoch": 1.218096004922658,
      "grad_norm": 0.5547360181808472,
      "learning_rate": 7.613647595263668e-06,
      "loss": 0.0241,
      "step": 744320
    },
    {
      "epoch": 1.2181287353613113,
      "grad_norm": 0.3796024024486542,
      "learning_rate": 7.613581703050152e-06,
      "loss": 0.0234,
      "step": 744340
    },
    {
      "epoch": 1.2181614657999646,
      "grad_norm": 0.6039430499076843,
      "learning_rate": 7.6135158108366335e-06,
      "loss": 0.0225,
      "step": 744360
    },
    {
      "epoch": 1.218194196238618,
      "grad_norm": 2.0267040729522705,
      "learning_rate": 7.613449918623117e-06,
      "loss": 0.0233,
      "step": 744380
    },
    {
      "epoch": 1.2182269266772714,
      "grad_norm": 0.22179481387138367,
      "learning_rate": 7.613384026409599e-06,
      "loss": 0.018,
      "step": 744400
    },
    {
      "epoch": 1.2182596571159245,
      "grad_norm": 0.7784480452537537,
      "learning_rate": 7.6133181341960826e-06,
      "loss": 0.0208,
      "step": 744420
    },
    {
      "epoch": 1.218292387554578,
      "grad_norm": 0.07679769396781921,
      "learning_rate": 7.613252241982566e-06,
      "loss": 0.0149,
      "step": 744440
    },
    {
      "epoch": 1.2183251179932313,
      "grad_norm": 0.31681028008461,
      "learning_rate": 7.613186349769048e-06,
      "loss": 0.0146,
      "step": 744460
    },
    {
      "epoch": 1.2183578484318847,
      "grad_norm": 0.2653275728225708,
      "learning_rate": 7.613120457555532e-06,
      "loss": 0.0266,
      "step": 744480
    },
    {
      "epoch": 1.218390578870538,
      "grad_norm": 1.1046854257583618,
      "learning_rate": 7.613054565342014e-06,
      "loss": 0.0215,
      "step": 744500
    },
    {
      "epoch": 1.2184233093091914,
      "grad_norm": 1.1305994987487793,
      "learning_rate": 7.612988673128497e-06,
      "loss": 0.0222,
      "step": 744520
    },
    {
      "epoch": 1.2184560397478448,
      "grad_norm": 0.4913134276866913,
      "learning_rate": 7.61292278091498e-06,
      "loss": 0.0221,
      "step": 744540
    },
    {
      "epoch": 1.218488770186498,
      "grad_norm": 0.588165819644928,
      "learning_rate": 7.6128568887014634e-06,
      "loss": 0.022,
      "step": 744560
    },
    {
      "epoch": 1.2185215006251513,
      "grad_norm": 0.5620480179786682,
      "learning_rate": 7.612790996487945e-06,
      "loss": 0.0207,
      "step": 744580
    },
    {
      "epoch": 1.2185542310638047,
      "grad_norm": 0.2946660816669464,
      "learning_rate": 7.612725104274429e-06,
      "loss": 0.0292,
      "step": 744600
    },
    {
      "epoch": 1.218586961502458,
      "grad_norm": 0.6853589415550232,
      "learning_rate": 7.612659212060911e-06,
      "loss": 0.0273,
      "step": 744620
    },
    {
      "epoch": 1.2186196919411114,
      "grad_norm": 0.5782220363616943,
      "learning_rate": 7.612593319847394e-06,
      "loss": 0.0201,
      "step": 744640
    },
    {
      "epoch": 1.2186524223797648,
      "grad_norm": 1.2061717510223389,
      "learning_rate": 7.612527427633877e-06,
      "loss": 0.0269,
      "step": 744660
    },
    {
      "epoch": 1.2186851528184182,
      "grad_norm": 0.36912739276885986,
      "learning_rate": 7.61246153542036e-06,
      "loss": 0.0204,
      "step": 744680
    },
    {
      "epoch": 1.2187178832570713,
      "grad_norm": 0.42193499207496643,
      "learning_rate": 7.612395643206843e-06,
      "loss": 0.0206,
      "step": 744700
    },
    {
      "epoch": 1.2187506136957247,
      "grad_norm": 2.5627403259277344,
      "learning_rate": 7.612329750993326e-06,
      "loss": 0.0232,
      "step": 744720
    },
    {
      "epoch": 1.218783344134378,
      "grad_norm": 0.27406150102615356,
      "learning_rate": 7.612263858779808e-06,
      "loss": 0.0215,
      "step": 744740
    },
    {
      "epoch": 1.2188160745730314,
      "grad_norm": 0.4369010031223297,
      "learning_rate": 7.612197966566292e-06,
      "loss": 0.0162,
      "step": 744760
    },
    {
      "epoch": 1.2188488050116848,
      "grad_norm": 0.6392947435379028,
      "learning_rate": 7.612132074352775e-06,
      "loss": 0.0195,
      "step": 744780
    },
    {
      "epoch": 1.2188815354503382,
      "grad_norm": 0.3741818368434906,
      "learning_rate": 7.612066182139257e-06,
      "loss": 0.0286,
      "step": 744800
    },
    {
      "epoch": 1.2189142658889915,
      "grad_norm": 0.8136464357376099,
      "learning_rate": 7.612000289925741e-06,
      "loss": 0.0219,
      "step": 744820
    },
    {
      "epoch": 1.2189469963276447,
      "grad_norm": 0.881409227848053,
      "learning_rate": 7.611934397712223e-06,
      "loss": 0.0287,
      "step": 744840
    },
    {
      "epoch": 1.218979726766298,
      "grad_norm": 0.5743138194084167,
      "learning_rate": 7.611868505498706e-06,
      "loss": 0.0176,
      "step": 744860
    },
    {
      "epoch": 1.2190124572049514,
      "grad_norm": 0.45746251940727234,
      "learning_rate": 7.611802613285189e-06,
      "loss": 0.018,
      "step": 744880
    },
    {
      "epoch": 1.2190451876436048,
      "grad_norm": 0.9152499437332153,
      "learning_rate": 7.611736721071672e-06,
      "loss": 0.021,
      "step": 744900
    },
    {
      "epoch": 1.2190779180822582,
      "grad_norm": 0.3969852030277252,
      "learning_rate": 7.6116708288581544e-06,
      "loss": 0.0151,
      "step": 744920
    },
    {
      "epoch": 1.2191106485209116,
      "grad_norm": 0.08389586955308914,
      "learning_rate": 7.611604936644638e-06,
      "loss": 0.0221,
      "step": 744940
    },
    {
      "epoch": 1.219143378959565,
      "grad_norm": 0.9271623492240906,
      "learning_rate": 7.61153904443112e-06,
      "loss": 0.0186,
      "step": 744960
    },
    {
      "epoch": 1.219176109398218,
      "grad_norm": 0.62253338098526,
      "learning_rate": 7.6114731522176035e-06,
      "loss": 0.0156,
      "step": 744980
    },
    {
      "epoch": 1.2192088398368714,
      "grad_norm": 0.48415669798851013,
      "learning_rate": 7.611407260004085e-06,
      "loss": 0.0194,
      "step": 745000
    },
    {
      "epoch": 1.2192415702755248,
      "grad_norm": 0.8731017708778381,
      "learning_rate": 7.611341367790569e-06,
      "loss": 0.0201,
      "step": 745020
    },
    {
      "epoch": 1.2192743007141782,
      "grad_norm": 1.0627862215042114,
      "learning_rate": 7.611275475577051e-06,
      "loss": 0.0269,
      "step": 745040
    },
    {
      "epoch": 1.2193070311528316,
      "grad_norm": 0.28341561555862427,
      "learning_rate": 7.6112095833635345e-06,
      "loss": 0.0212,
      "step": 745060
    },
    {
      "epoch": 1.219339761591485,
      "grad_norm": 0.44761309027671814,
      "learning_rate": 7.611143691150017e-06,
      "loss": 0.0212,
      "step": 745080
    },
    {
      "epoch": 1.2193724920301383,
      "grad_norm": 0.6631624698638916,
      "learning_rate": 7.6110777989365e-06,
      "loss": 0.0253,
      "step": 745100
    },
    {
      "epoch": 1.2194052224687915,
      "grad_norm": 0.2528966963291168,
      "learning_rate": 7.611011906722983e-06,
      "loss": 0.0221,
      "step": 745120
    },
    {
      "epoch": 1.2194379529074448,
      "grad_norm": 0.15515364706516266,
      "learning_rate": 7.610946014509466e-06,
      "loss": 0.023,
      "step": 745140
    },
    {
      "epoch": 1.2194706833460982,
      "grad_norm": 0.3779759109020233,
      "learning_rate": 7.610880122295949e-06,
      "loss": 0.0221,
      "step": 745160
    },
    {
      "epoch": 1.2195034137847516,
      "grad_norm": 0.5964851379394531,
      "learning_rate": 7.610814230082432e-06,
      "loss": 0.0192,
      "step": 745180
    },
    {
      "epoch": 1.219536144223405,
      "grad_norm": 0.4350168704986572,
      "learning_rate": 7.610748337868915e-06,
      "loss": 0.0235,
      "step": 745200
    },
    {
      "epoch": 1.219568874662058,
      "grad_norm": 0.31328731775283813,
      "learning_rate": 7.610682445655397e-06,
      "loss": 0.0289,
      "step": 745220
    },
    {
      "epoch": 1.2196016051007115,
      "grad_norm": 0.5395581126213074,
      "learning_rate": 7.610616553441881e-06,
      "loss": 0.0205,
      "step": 745240
    },
    {
      "epoch": 1.2196343355393648,
      "grad_norm": 0.5344977974891663,
      "learning_rate": 7.610550661228363e-06,
      "loss": 0.0232,
      "step": 745260
    },
    {
      "epoch": 1.2196670659780182,
      "grad_norm": 0.5419031381607056,
      "learning_rate": 7.610484769014846e-06,
      "loss": 0.0215,
      "step": 745280
    },
    {
      "epoch": 1.2196997964166716,
      "grad_norm": 0.35793793201446533,
      "learning_rate": 7.610418876801329e-06,
      "loss": 0.0246,
      "step": 745300
    },
    {
      "epoch": 1.219732526855325,
      "grad_norm": 1.057300090789795,
      "learning_rate": 7.610352984587812e-06,
      "loss": 0.0204,
      "step": 745320
    },
    {
      "epoch": 1.2197652572939783,
      "grad_norm": 0.6737678050994873,
      "learning_rate": 7.6102870923742945e-06,
      "loss": 0.0115,
      "step": 745340
    },
    {
      "epoch": 1.2197979877326315,
      "grad_norm": 0.779901921749115,
      "learning_rate": 7.610221200160778e-06,
      "loss": 0.0174,
      "step": 745360
    },
    {
      "epoch": 1.2198307181712849,
      "grad_norm": 0.8692992925643921,
      "learning_rate": 7.61015530794726e-06,
      "loss": 0.0208,
      "step": 745380
    },
    {
      "epoch": 1.2198634486099382,
      "grad_norm": 0.7416946291923523,
      "learning_rate": 7.610089415733744e-06,
      "loss": 0.0193,
      "step": 745400
    },
    {
      "epoch": 1.2198961790485916,
      "grad_norm": 1.288360595703125,
      "learning_rate": 7.6100235235202255e-06,
      "loss": 0.0152,
      "step": 745420
    },
    {
      "epoch": 1.219928909487245,
      "grad_norm": 0.31461843848228455,
      "learning_rate": 7.609957631306709e-06,
      "loss": 0.0216,
      "step": 745440
    },
    {
      "epoch": 1.2199616399258983,
      "grad_norm": 1.764029860496521,
      "learning_rate": 7.609891739093192e-06,
      "loss": 0.0248,
      "step": 745460
    },
    {
      "epoch": 1.2199943703645517,
      "grad_norm": 1.1390780210494995,
      "learning_rate": 7.6098258468796745e-06,
      "loss": 0.0253,
      "step": 745480
    },
    {
      "epoch": 1.2200271008032049,
      "grad_norm": 0.5824940800666809,
      "learning_rate": 7.609759954666158e-06,
      "loss": 0.0184,
      "step": 745500
    },
    {
      "epoch": 1.2200598312418582,
      "grad_norm": 0.18082717061042786,
      "learning_rate": 7.609694062452641e-06,
      "loss": 0.0257,
      "step": 745520
    },
    {
      "epoch": 1.2200925616805116,
      "grad_norm": 0.10110118985176086,
      "learning_rate": 7.609628170239124e-06,
      "loss": 0.0183,
      "step": 745540
    },
    {
      "epoch": 1.220125292119165,
      "grad_norm": 0.5240595936775208,
      "learning_rate": 7.609562278025606e-06,
      "loss": 0.0233,
      "step": 745560
    },
    {
      "epoch": 1.2201580225578184,
      "grad_norm": 0.6111846566200256,
      "learning_rate": 7.60949638581209e-06,
      "loss": 0.0314,
      "step": 745580
    },
    {
      "epoch": 1.2201907529964717,
      "grad_norm": 1.6237040758132935,
      "learning_rate": 7.609430493598572e-06,
      "loss": 0.0184,
      "step": 745600
    },
    {
      "epoch": 1.220223483435125,
      "grad_norm": 0.5973697304725647,
      "learning_rate": 7.609364601385055e-06,
      "loss": 0.0255,
      "step": 745620
    },
    {
      "epoch": 1.2202562138737783,
      "grad_norm": 0.6760272979736328,
      "learning_rate": 7.609298709171537e-06,
      "loss": 0.0236,
      "step": 745640
    },
    {
      "epoch": 1.2202889443124316,
      "grad_norm": 0.6453173160552979,
      "learning_rate": 7.609232816958021e-06,
      "loss": 0.0227,
      "step": 745660
    },
    {
      "epoch": 1.220321674751085,
      "grad_norm": 0.42070329189300537,
      "learning_rate": 7.609166924744504e-06,
      "loss": 0.0248,
      "step": 745680
    },
    {
      "epoch": 1.2203544051897384,
      "grad_norm": 0.8415704965591431,
      "learning_rate": 7.609101032530986e-06,
      "loss": 0.0258,
      "step": 745700
    },
    {
      "epoch": 1.2203871356283917,
      "grad_norm": 0.5193411111831665,
      "learning_rate": 7.609035140317469e-06,
      "loss": 0.0202,
      "step": 745720
    },
    {
      "epoch": 1.2204198660670451,
      "grad_norm": 0.92844158411026,
      "learning_rate": 7.608969248103953e-06,
      "loss": 0.0214,
      "step": 745740
    },
    {
      "epoch": 1.2204525965056985,
      "grad_norm": 0.5884507298469543,
      "learning_rate": 7.608903355890435e-06,
      "loss": 0.0288,
      "step": 745760
    },
    {
      "epoch": 1.2204853269443516,
      "grad_norm": 1.1162596940994263,
      "learning_rate": 7.608837463676918e-06,
      "loss": 0.0238,
      "step": 745780
    },
    {
      "epoch": 1.220518057383005,
      "grad_norm": 1.0328564643859863,
      "learning_rate": 7.6087715714634e-06,
      "loss": 0.0201,
      "step": 745800
    },
    {
      "epoch": 1.2205507878216584,
      "grad_norm": 1.0282121896743774,
      "learning_rate": 7.608705679249884e-06,
      "loss": 0.0209,
      "step": 745820
    },
    {
      "epoch": 1.2205835182603118,
      "grad_norm": 0.9048112630844116,
      "learning_rate": 7.608639787036367e-06,
      "loss": 0.0268,
      "step": 745840
    },
    {
      "epoch": 1.2206162486989651,
      "grad_norm": 2.234522581100464,
      "learning_rate": 7.608573894822849e-06,
      "loss": 0.0243,
      "step": 745860
    },
    {
      "epoch": 1.2206489791376185,
      "grad_norm": 1.13972806930542,
      "learning_rate": 7.608508002609333e-06,
      "loss": 0.0206,
      "step": 745880
    },
    {
      "epoch": 1.2206817095762716,
      "grad_norm": 1.7236433029174805,
      "learning_rate": 7.608442110395815e-06,
      "loss": 0.0255,
      "step": 745900
    },
    {
      "epoch": 1.220714440014925,
      "grad_norm": 0.5706793665885925,
      "learning_rate": 7.608376218182298e-06,
      "loss": 0.0222,
      "step": 745920
    },
    {
      "epoch": 1.2207471704535784,
      "grad_norm": 1.307201623916626,
      "learning_rate": 7.608310325968781e-06,
      "loss": 0.029,
      "step": 745940
    },
    {
      "epoch": 1.2207799008922318,
      "grad_norm": 0.5696625113487244,
      "learning_rate": 7.608244433755264e-06,
      "loss": 0.0131,
      "step": 745960
    },
    {
      "epoch": 1.2208126313308851,
      "grad_norm": 0.7327516078948975,
      "learning_rate": 7.6081785415417464e-06,
      "loss": 0.0195,
      "step": 745980
    },
    {
      "epoch": 1.2208453617695385,
      "grad_norm": 0.3626879155635834,
      "learning_rate": 7.60811264932823e-06,
      "loss": 0.0204,
      "step": 746000
    },
    {
      "epoch": 1.2208780922081917,
      "grad_norm": 1.9102122783660889,
      "learning_rate": 7.608046757114712e-06,
      "loss": 0.0305,
      "step": 746020
    },
    {
      "epoch": 1.220910822646845,
      "grad_norm": 1.6358286142349243,
      "learning_rate": 7.6079808649011955e-06,
      "loss": 0.0241,
      "step": 746040
    },
    {
      "epoch": 1.2209435530854984,
      "grad_norm": 0.5223932266235352,
      "learning_rate": 7.607914972687677e-06,
      "loss": 0.0173,
      "step": 746060
    },
    {
      "epoch": 1.2209762835241518,
      "grad_norm": 1.1955939531326294,
      "learning_rate": 7.607849080474161e-06,
      "loss": 0.0264,
      "step": 746080
    },
    {
      "epoch": 1.2210090139628051,
      "grad_norm": 0.30876243114471436,
      "learning_rate": 7.607783188260644e-06,
      "loss": 0.0204,
      "step": 746100
    },
    {
      "epoch": 1.2210417444014585,
      "grad_norm": 0.5143480896949768,
      "learning_rate": 7.6077172960471265e-06,
      "loss": 0.0213,
      "step": 746120
    },
    {
      "epoch": 1.221074474840112,
      "grad_norm": 0.1724640280008316,
      "learning_rate": 7.607651403833609e-06,
      "loss": 0.0261,
      "step": 746140
    },
    {
      "epoch": 1.221107205278765,
      "grad_norm": 0.17841345071792603,
      "learning_rate": 7.607585511620093e-06,
      "loss": 0.023,
      "step": 746160
    },
    {
      "epoch": 1.2211399357174184,
      "grad_norm": 0.3772389590740204,
      "learning_rate": 7.607519619406575e-06,
      "loss": 0.0174,
      "step": 746180
    },
    {
      "epoch": 1.2211726661560718,
      "grad_norm": 2.924541473388672,
      "learning_rate": 7.607453727193058e-06,
      "loss": 0.0341,
      "step": 746200
    },
    {
      "epoch": 1.2212053965947252,
      "grad_norm": 0.7629700899124146,
      "learning_rate": 7.607387834979542e-06,
      "loss": 0.0309,
      "step": 746220
    },
    {
      "epoch": 1.2212381270333785,
      "grad_norm": 1.793771743774414,
      "learning_rate": 7.607321942766024e-06,
      "loss": 0.0265,
      "step": 746240
    },
    {
      "epoch": 1.221270857472032,
      "grad_norm": 1.0655957460403442,
      "learning_rate": 7.607256050552507e-06,
      "loss": 0.0229,
      "step": 746260
    },
    {
      "epoch": 1.2213035879106853,
      "grad_norm": 0.2136191874742508,
      "learning_rate": 7.607190158338989e-06,
      "loss": 0.0162,
      "step": 746280
    },
    {
      "epoch": 1.2213363183493384,
      "grad_norm": 0.21984796226024628,
      "learning_rate": 7.607124266125473e-06,
      "loss": 0.0183,
      "step": 746300
    },
    {
      "epoch": 1.2213690487879918,
      "grad_norm": 0.39465656876564026,
      "learning_rate": 7.6070583739119555e-06,
      "loss": 0.0179,
      "step": 746320
    },
    {
      "epoch": 1.2214017792266452,
      "grad_norm": 0.6675893664360046,
      "learning_rate": 7.606992481698438e-06,
      "loss": 0.0194,
      "step": 746340
    },
    {
      "epoch": 1.2214345096652985,
      "grad_norm": 0.767991840839386,
      "learning_rate": 7.606926589484921e-06,
      "loss": 0.0307,
      "step": 746360
    },
    {
      "epoch": 1.221467240103952,
      "grad_norm": 0.38762518763542175,
      "learning_rate": 7.606860697271405e-06,
      "loss": 0.0164,
      "step": 746380
    },
    {
      "epoch": 1.2214999705426053,
      "grad_norm": 0.8503715991973877,
      "learning_rate": 7.6067948050578865e-06,
      "loss": 0.0237,
      "step": 746400
    },
    {
      "epoch": 1.2215327009812587,
      "grad_norm": 0.29344597458839417,
      "learning_rate": 7.60672891284437e-06,
      "loss": 0.028,
      "step": 746420
    },
    {
      "epoch": 1.2215654314199118,
      "grad_norm": 0.641589879989624,
      "learning_rate": 7.606663020630852e-06,
      "loss": 0.018,
      "step": 746440
    },
    {
      "epoch": 1.2215981618585652,
      "grad_norm": 0.2620176672935486,
      "learning_rate": 7.6065971284173356e-06,
      "loss": 0.0285,
      "step": 746460
    },
    {
      "epoch": 1.2216308922972186,
      "grad_norm": 1.036178469657898,
      "learning_rate": 7.606531236203818e-06,
      "loss": 0.0221,
      "step": 746480
    },
    {
      "epoch": 1.221663622735872,
      "grad_norm": 0.20882926881313324,
      "learning_rate": 7.606465343990301e-06,
      "loss": 0.0155,
      "step": 746500
    },
    {
      "epoch": 1.2216963531745253,
      "grad_norm": 1.248953104019165,
      "learning_rate": 7.606399451776784e-06,
      "loss": 0.0198,
      "step": 746520
    },
    {
      "epoch": 1.2217290836131787,
      "grad_norm": 0.42572206258773804,
      "learning_rate": 7.606333559563267e-06,
      "loss": 0.0269,
      "step": 746540
    },
    {
      "epoch": 1.221761814051832,
      "grad_norm": 0.7457029819488525,
      "learning_rate": 7.60626766734975e-06,
      "loss": 0.0212,
      "step": 746560
    },
    {
      "epoch": 1.2217945444904852,
      "grad_norm": 1.891893744468689,
      "learning_rate": 7.606201775136233e-06,
      "loss": 0.0225,
      "step": 746580
    },
    {
      "epoch": 1.2218272749291386,
      "grad_norm": 0.14670541882514954,
      "learning_rate": 7.6061358829227164e-06,
      "loss": 0.0154,
      "step": 746600
    },
    {
      "epoch": 1.221860005367792,
      "grad_norm": 0.47345060110092163,
      "learning_rate": 7.606069990709198e-06,
      "loss": 0.0218,
      "step": 746620
    },
    {
      "epoch": 1.2218927358064453,
      "grad_norm": 0.663638710975647,
      "learning_rate": 7.606004098495682e-06,
      "loss": 0.0167,
      "step": 746640
    },
    {
      "epoch": 1.2219254662450987,
      "grad_norm": 1.0971940755844116,
      "learning_rate": 7.605938206282164e-06,
      "loss": 0.022,
      "step": 746660
    },
    {
      "epoch": 1.221958196683752,
      "grad_norm": 0.39337030053138733,
      "learning_rate": 7.605872314068647e-06,
      "loss": 0.0254,
      "step": 746680
    },
    {
      "epoch": 1.2219909271224052,
      "grad_norm": 0.5089328289031982,
      "learning_rate": 7.60580642185513e-06,
      "loss": 0.0213,
      "step": 746700
    },
    {
      "epoch": 1.2220236575610586,
      "grad_norm": 0.7471198439598083,
      "learning_rate": 7.605740529641613e-06,
      "loss": 0.0197,
      "step": 746720
    },
    {
      "epoch": 1.222056387999712,
      "grad_norm": 0.37488844990730286,
      "learning_rate": 7.605674637428096e-06,
      "loss": 0.0314,
      "step": 746740
    },
    {
      "epoch": 1.2220891184383653,
      "grad_norm": 1.0524370670318604,
      "learning_rate": 7.605608745214579e-06,
      "loss": 0.0218,
      "step": 746760
    },
    {
      "epoch": 1.2221218488770187,
      "grad_norm": 0.8270670175552368,
      "learning_rate": 7.605542853001061e-06,
      "loss": 0.0253,
      "step": 746780
    },
    {
      "epoch": 1.222154579315672,
      "grad_norm": 0.615350604057312,
      "learning_rate": 7.605476960787545e-06,
      "loss": 0.0262,
      "step": 746800
    },
    {
      "epoch": 1.2221873097543252,
      "grad_norm": 0.6215196847915649,
      "learning_rate": 7.605411068574027e-06,
      "loss": 0.0233,
      "step": 746820
    },
    {
      "epoch": 1.2222200401929786,
      "grad_norm": 0.3712833523750305,
      "learning_rate": 7.60534517636051e-06,
      "loss": 0.022,
      "step": 746840
    },
    {
      "epoch": 1.222252770631632,
      "grad_norm": 0.4985560476779938,
      "learning_rate": 7.605279284146992e-06,
      "loss": 0.0323,
      "step": 746860
    },
    {
      "epoch": 1.2222855010702853,
      "grad_norm": 1.137491226196289,
      "learning_rate": 7.605213391933476e-06,
      "loss": 0.018,
      "step": 746880
    },
    {
      "epoch": 1.2223182315089387,
      "grad_norm": 1.1328887939453125,
      "learning_rate": 7.605147499719959e-06,
      "loss": 0.0316,
      "step": 746900
    },
    {
      "epoch": 1.222350961947592,
      "grad_norm": 0.28166499733924866,
      "learning_rate": 7.605081607506441e-06,
      "loss": 0.0196,
      "step": 746920
    },
    {
      "epoch": 1.2223836923862454,
      "grad_norm": 0.2888198494911194,
      "learning_rate": 7.605015715292925e-06,
      "loss": 0.0255,
      "step": 746940
    },
    {
      "epoch": 1.2224164228248986,
      "grad_norm": 0.4800111949443817,
      "learning_rate": 7.6049498230794075e-06,
      "loss": 0.0216,
      "step": 746960
    },
    {
      "epoch": 1.222449153263552,
      "grad_norm": 0.8298851847648621,
      "learning_rate": 7.60488393086589e-06,
      "loss": 0.0242,
      "step": 746980
    },
    {
      "epoch": 1.2224818837022053,
      "grad_norm": 1.2419497966766357,
      "learning_rate": 7.604818038652373e-06,
      "loss": 0.0285,
      "step": 747000
    },
    {
      "epoch": 1.2225146141408587,
      "grad_norm": 0.43665575981140137,
      "learning_rate": 7.6047521464388565e-06,
      "loss": 0.0173,
      "step": 747020
    },
    {
      "epoch": 1.222547344579512,
      "grad_norm": 0.6045626401901245,
      "learning_rate": 7.604686254225338e-06,
      "loss": 0.0239,
      "step": 747040
    },
    {
      "epoch": 1.2225800750181655,
      "grad_norm": 0.7549538612365723,
      "learning_rate": 7.604620362011822e-06,
      "loss": 0.0208,
      "step": 747060
    },
    {
      "epoch": 1.2226128054568188,
      "grad_norm": 0.9637979865074158,
      "learning_rate": 7.604554469798304e-06,
      "loss": 0.0166,
      "step": 747080
    },
    {
      "epoch": 1.222645535895472,
      "grad_norm": 0.23881882429122925,
      "learning_rate": 7.6044885775847875e-06,
      "loss": 0.021,
      "step": 747100
    },
    {
      "epoch": 1.2226782663341254,
      "grad_norm": 0.14319875836372375,
      "learning_rate": 7.60442268537127e-06,
      "loss": 0.0262,
      "step": 747120
    },
    {
      "epoch": 1.2227109967727787,
      "grad_norm": 0.3844813108444214,
      "learning_rate": 7.604356793157753e-06,
      "loss": 0.0215,
      "step": 747140
    },
    {
      "epoch": 1.222743727211432,
      "grad_norm": 0.6014012694358826,
      "learning_rate": 7.604290900944236e-06,
      "loss": 0.0176,
      "step": 747160
    },
    {
      "epoch": 1.2227764576500855,
      "grad_norm": 0.7857732772827148,
      "learning_rate": 7.604225008730719e-06,
      "loss": 0.025,
      "step": 747180
    },
    {
      "epoch": 1.2228091880887388,
      "grad_norm": 0.8934775590896606,
      "learning_rate": 7.604159116517201e-06,
      "loss": 0.0272,
      "step": 747200
    },
    {
      "epoch": 1.2228419185273922,
      "grad_norm": 0.8785191774368286,
      "learning_rate": 7.604093224303685e-06,
      "loss": 0.0274,
      "step": 747220
    },
    {
      "epoch": 1.2228746489660454,
      "grad_norm": 1.4956363439559937,
      "learning_rate": 7.604027332090168e-06,
      "loss": 0.0215,
      "step": 747240
    },
    {
      "epoch": 1.2229073794046987,
      "grad_norm": 0.3748806416988373,
      "learning_rate": 7.60396143987665e-06,
      "loss": 0.0149,
      "step": 747260
    },
    {
      "epoch": 1.222940109843352,
      "grad_norm": 0.319182425737381,
      "learning_rate": 7.603895547663134e-06,
      "loss": 0.0274,
      "step": 747280
    },
    {
      "epoch": 1.2229728402820055,
      "grad_norm": 2.1012606620788574,
      "learning_rate": 7.603829655449616e-06,
      "loss": 0.0218,
      "step": 747300
    },
    {
      "epoch": 1.2230055707206589,
      "grad_norm": 0.855010449886322,
      "learning_rate": 7.603763763236099e-06,
      "loss": 0.0215,
      "step": 747320
    },
    {
      "epoch": 1.2230383011593122,
      "grad_norm": 1.1702282428741455,
      "learning_rate": 7.603697871022582e-06,
      "loss": 0.0306,
      "step": 747340
    },
    {
      "epoch": 1.2230710315979656,
      "grad_norm": 1.9186606407165527,
      "learning_rate": 7.603631978809065e-06,
      "loss": 0.0241,
      "step": 747360
    },
    {
      "epoch": 1.2231037620366187,
      "grad_norm": 0.2762635052204132,
      "learning_rate": 7.6035660865955475e-06,
      "loss": 0.0159,
      "step": 747380
    },
    {
      "epoch": 1.2231364924752721,
      "grad_norm": 0.17354890704154968,
      "learning_rate": 7.603500194382031e-06,
      "loss": 0.0142,
      "step": 747400
    },
    {
      "epoch": 1.2231692229139255,
      "grad_norm": 1.5079470872879028,
      "learning_rate": 7.603434302168513e-06,
      "loss": 0.0175,
      "step": 747420
    },
    {
      "epoch": 1.2232019533525789,
      "grad_norm": 0.16585904359817505,
      "learning_rate": 7.603368409954997e-06,
      "loss": 0.0304,
      "step": 747440
    },
    {
      "epoch": 1.2232346837912322,
      "grad_norm": 0.9598339200019836,
      "learning_rate": 7.6033025177414785e-06,
      "loss": 0.0302,
      "step": 747460
    },
    {
      "epoch": 1.2232674142298854,
      "grad_norm": 0.24782784283161163,
      "learning_rate": 7.603236625527962e-06,
      "loss": 0.0294,
      "step": 747480
    },
    {
      "epoch": 1.2233001446685388,
      "grad_norm": 0.3110112249851227,
      "learning_rate": 7.603170733314445e-06,
      "loss": 0.0184,
      "step": 747500
    },
    {
      "epoch": 1.2233328751071921,
      "grad_norm": 0.6034086346626282,
      "learning_rate": 7.6031048411009276e-06,
      "loss": 0.0218,
      "step": 747520
    },
    {
      "epoch": 1.2233656055458455,
      "grad_norm": 0.5894471406936646,
      "learning_rate": 7.60303894888741e-06,
      "loss": 0.0224,
      "step": 747540
    },
    {
      "epoch": 1.2233983359844989,
      "grad_norm": 0.2660946547985077,
      "learning_rate": 7.602973056673894e-06,
      "loss": 0.014,
      "step": 747560
    },
    {
      "epoch": 1.2234310664231522,
      "grad_norm": 1.1272207498550415,
      "learning_rate": 7.602907164460376e-06,
      "loss": 0.0166,
      "step": 747580
    },
    {
      "epoch": 1.2234637968618056,
      "grad_norm": 1.157528281211853,
      "learning_rate": 7.602841272246859e-06,
      "loss": 0.024,
      "step": 747600
    },
    {
      "epoch": 1.2234965273004588,
      "grad_norm": 0.611988365650177,
      "learning_rate": 7.602775380033343e-06,
      "loss": 0.0253,
      "step": 747620
    },
    {
      "epoch": 1.2235292577391121,
      "grad_norm": 0.21689081192016602,
      "learning_rate": 7.602709487819825e-06,
      "loss": 0.0241,
      "step": 747640
    },
    {
      "epoch": 1.2235619881777655,
      "grad_norm": 1.719251275062561,
      "learning_rate": 7.6026435956063084e-06,
      "loss": 0.0281,
      "step": 747660
    },
    {
      "epoch": 1.2235947186164189,
      "grad_norm": 0.4880582392215729,
      "learning_rate": 7.60257770339279e-06,
      "loss": 0.0269,
      "step": 747680
    },
    {
      "epoch": 1.2236274490550723,
      "grad_norm": 0.8256381154060364,
      "learning_rate": 7.602511811179274e-06,
      "loss": 0.0357,
      "step": 747700
    },
    {
      "epoch": 1.2236601794937256,
      "grad_norm": 0.3496822714805603,
      "learning_rate": 7.602445918965757e-06,
      "loss": 0.0182,
      "step": 747720
    },
    {
      "epoch": 1.223692909932379,
      "grad_norm": 0.686122715473175,
      "learning_rate": 7.602380026752239e-06,
      "loss": 0.0321,
      "step": 747740
    },
    {
      "epoch": 1.2237256403710322,
      "grad_norm": 1.4516047239303589,
      "learning_rate": 7.602314134538722e-06,
      "loss": 0.0215,
      "step": 747760
    },
    {
      "epoch": 1.2237583708096855,
      "grad_norm": 0.46940866112709045,
      "learning_rate": 7.602248242325206e-06,
      "loss": 0.0344,
      "step": 747780
    },
    {
      "epoch": 1.223791101248339,
      "grad_norm": 0.5376430153846741,
      "learning_rate": 7.602182350111688e-06,
      "loss": 0.0182,
      "step": 747800
    },
    {
      "epoch": 1.2238238316869923,
      "grad_norm": 0.5618821382522583,
      "learning_rate": 7.602116457898171e-06,
      "loss": 0.023,
      "step": 747820
    },
    {
      "epoch": 1.2238565621256456,
      "grad_norm": 1.6134604215621948,
      "learning_rate": 7.602050565684653e-06,
      "loss": 0.0281,
      "step": 747840
    },
    {
      "epoch": 1.223889292564299,
      "grad_norm": 0.4456731379032135,
      "learning_rate": 7.601984673471137e-06,
      "loss": 0.0253,
      "step": 747860
    },
    {
      "epoch": 1.2239220230029524,
      "grad_norm": 0.4539532959461212,
      "learning_rate": 7.6019187812576186e-06,
      "loss": 0.0162,
      "step": 747880
    },
    {
      "epoch": 1.2239547534416055,
      "grad_norm": 1.1872516870498657,
      "learning_rate": 7.601852889044102e-06,
      "loss": 0.0229,
      "step": 747900
    },
    {
      "epoch": 1.223987483880259,
      "grad_norm": 1.7213929891586304,
      "learning_rate": 7.601786996830585e-06,
      "loss": 0.0297,
      "step": 747920
    },
    {
      "epoch": 1.2240202143189123,
      "grad_norm": 0.5062416791915894,
      "learning_rate": 7.601721104617068e-06,
      "loss": 0.0224,
      "step": 747940
    },
    {
      "epoch": 1.2240529447575657,
      "grad_norm": 0.2312597930431366,
      "learning_rate": 7.601655212403551e-06,
      "loss": 0.0221,
      "step": 747960
    },
    {
      "epoch": 1.224085675196219,
      "grad_norm": 1.3085312843322754,
      "learning_rate": 7.601589320190034e-06,
      "loss": 0.0233,
      "step": 747980
    },
    {
      "epoch": 1.2241184056348724,
      "grad_norm": 0.5099705457687378,
      "learning_rate": 7.601523427976517e-06,
      "loss": 0.0236,
      "step": 748000
    },
    {
      "epoch": 1.2241511360735258,
      "grad_norm": 1.263461947441101,
      "learning_rate": 7.6014575357629994e-06,
      "loss": 0.0182,
      "step": 748020
    },
    {
      "epoch": 1.224183866512179,
      "grad_norm": 0.5778378844261169,
      "learning_rate": 7.601391643549483e-06,
      "loss": 0.0256,
      "step": 748040
    },
    {
      "epoch": 1.2242165969508323,
      "grad_norm": 0.34371131658554077,
      "learning_rate": 7.601325751335965e-06,
      "loss": 0.0291,
      "step": 748060
    },
    {
      "epoch": 1.2242493273894857,
      "grad_norm": 0.2618287205696106,
      "learning_rate": 7.6012598591224485e-06,
      "loss": 0.0217,
      "step": 748080
    },
    {
      "epoch": 1.224282057828139,
      "grad_norm": 0.39208316802978516,
      "learning_rate": 7.60119396690893e-06,
      "loss": 0.0228,
      "step": 748100
    },
    {
      "epoch": 1.2243147882667924,
      "grad_norm": 0.8280256986618042,
      "learning_rate": 7.601128074695414e-06,
      "loss": 0.0233,
      "step": 748120
    },
    {
      "epoch": 1.2243475187054458,
      "grad_norm": 0.13920633494853973,
      "learning_rate": 7.601062182481897e-06,
      "loss": 0.0257,
      "step": 748140
    },
    {
      "epoch": 1.2243802491440992,
      "grad_norm": 0.21846775710582733,
      "learning_rate": 7.6009962902683795e-06,
      "loss": 0.037,
      "step": 748160
    },
    {
      "epoch": 1.2244129795827523,
      "grad_norm": 1.0538463592529297,
      "learning_rate": 7.600930398054862e-06,
      "loss": 0.0235,
      "step": 748180
    },
    {
      "epoch": 1.2244457100214057,
      "grad_norm": 0.406107097864151,
      "learning_rate": 7.600864505841346e-06,
      "loss": 0.0209,
      "step": 748200
    },
    {
      "epoch": 1.224478440460059,
      "grad_norm": 0.9486795663833618,
      "learning_rate": 7.600798613627828e-06,
      "loss": 0.023,
      "step": 748220
    },
    {
      "epoch": 1.2245111708987124,
      "grad_norm": 0.2812109589576721,
      "learning_rate": 7.600732721414311e-06,
      "loss": 0.0178,
      "step": 748240
    },
    {
      "epoch": 1.2245439013373658,
      "grad_norm": 0.9462286829948425,
      "learning_rate": 7.600666829200793e-06,
      "loss": 0.0196,
      "step": 748260
    },
    {
      "epoch": 1.224576631776019,
      "grad_norm": 0.28880736231803894,
      "learning_rate": 7.600600936987277e-06,
      "loss": 0.0249,
      "step": 748280
    },
    {
      "epoch": 1.2246093622146723,
      "grad_norm": 1.9708665609359741,
      "learning_rate": 7.60053504477376e-06,
      "loss": 0.0194,
      "step": 748300
    },
    {
      "epoch": 1.2246420926533257,
      "grad_norm": 0.9856908917427063,
      "learning_rate": 7.600469152560242e-06,
      "loss": 0.0221,
      "step": 748320
    },
    {
      "epoch": 1.224674823091979,
      "grad_norm": 0.24138617515563965,
      "learning_rate": 7.600403260346726e-06,
      "loss": 0.0254,
      "step": 748340
    },
    {
      "epoch": 1.2247075535306324,
      "grad_norm": 0.3563101589679718,
      "learning_rate": 7.6003373681332086e-06,
      "loss": 0.0243,
      "step": 748360
    },
    {
      "epoch": 1.2247402839692858,
      "grad_norm": 0.3114442229270935,
      "learning_rate": 7.600271475919691e-06,
      "loss": 0.0181,
      "step": 748380
    },
    {
      "epoch": 1.2247730144079392,
      "grad_norm": 1.3667560815811157,
      "learning_rate": 7.600205583706174e-06,
      "loss": 0.0307,
      "step": 748400
    },
    {
      "epoch": 1.2248057448465923,
      "grad_norm": 0.3532816469669342,
      "learning_rate": 7.600139691492658e-06,
      "loss": 0.0245,
      "step": 748420
    },
    {
      "epoch": 1.2248384752852457,
      "grad_norm": 0.3789465129375458,
      "learning_rate": 7.6000737992791395e-06,
      "loss": 0.0238,
      "step": 748440
    },
    {
      "epoch": 1.224871205723899,
      "grad_norm": 0.2288414090871811,
      "learning_rate": 7.600007907065623e-06,
      "loss": 0.0333,
      "step": 748460
    },
    {
      "epoch": 1.2249039361625524,
      "grad_norm": 0.22849063575267792,
      "learning_rate": 7.599942014852105e-06,
      "loss": 0.0201,
      "step": 748480
    },
    {
      "epoch": 1.2249366666012058,
      "grad_norm": 0.7335839867591858,
      "learning_rate": 7.599876122638589e-06,
      "loss": 0.0211,
      "step": 748500
    },
    {
      "epoch": 1.2249693970398592,
      "grad_norm": 1.4240578413009644,
      "learning_rate": 7.599810230425071e-06,
      "loss": 0.0266,
      "step": 748520
    },
    {
      "epoch": 1.2250021274785126,
      "grad_norm": 0.18395470082759857,
      "learning_rate": 7.599744338211554e-06,
      "loss": 0.0154,
      "step": 748540
    },
    {
      "epoch": 1.2250348579171657,
      "grad_norm": 0.2326430082321167,
      "learning_rate": 7.599678445998037e-06,
      "loss": 0.0181,
      "step": 748560
    },
    {
      "epoch": 1.225067588355819,
      "grad_norm": 0.2541615664958954,
      "learning_rate": 7.59961255378452e-06,
      "loss": 0.0304,
      "step": 748580
    },
    {
      "epoch": 1.2251003187944725,
      "grad_norm": 0.30792471766471863,
      "learning_rate": 7.599546661571002e-06,
      "loss": 0.0279,
      "step": 748600
    },
    {
      "epoch": 1.2251330492331258,
      "grad_norm": 1.5358678102493286,
      "learning_rate": 7.599480769357486e-06,
      "loss": 0.0205,
      "step": 748620
    },
    {
      "epoch": 1.2251657796717792,
      "grad_norm": 0.42879146337509155,
      "learning_rate": 7.599414877143968e-06,
      "loss": 0.0257,
      "step": 748640
    },
    {
      "epoch": 1.2251985101104326,
      "grad_norm": 0.5350621342658997,
      "learning_rate": 7.599348984930451e-06,
      "loss": 0.0223,
      "step": 748660
    },
    {
      "epoch": 1.225231240549086,
      "grad_norm": 0.26624733209609985,
      "learning_rate": 7.599283092716935e-06,
      "loss": 0.0172,
      "step": 748680
    },
    {
      "epoch": 1.225263970987739,
      "grad_norm": 0.7944059371948242,
      "learning_rate": 7.599217200503417e-06,
      "loss": 0.0257,
      "step": 748700
    },
    {
      "epoch": 1.2252967014263925,
      "grad_norm": 0.8041167855262756,
      "learning_rate": 7.5991513082899e-06,
      "loss": 0.0254,
      "step": 748720
    },
    {
      "epoch": 1.2253294318650458,
      "grad_norm": 0.5668633580207825,
      "learning_rate": 7.599085416076382e-06,
      "loss": 0.0256,
      "step": 748740
    },
    {
      "epoch": 1.2253621623036992,
      "grad_norm": 0.37297263741493225,
      "learning_rate": 7.599019523862866e-06,
      "loss": 0.0148,
      "step": 748760
    },
    {
      "epoch": 1.2253948927423526,
      "grad_norm": 0.22464324533939362,
      "learning_rate": 7.598953631649349e-06,
      "loss": 0.021,
      "step": 748780
    },
    {
      "epoch": 1.225427623181006,
      "grad_norm": 0.315227746963501,
      "learning_rate": 7.598887739435832e-06,
      "loss": 0.0183,
      "step": 748800
    },
    {
      "epoch": 1.2254603536196593,
      "grad_norm": 0.21510657668113708,
      "learning_rate": 7.598821847222314e-06,
      "loss": 0.0211,
      "step": 748820
    },
    {
      "epoch": 1.2254930840583125,
      "grad_norm": 0.1499159187078476,
      "learning_rate": 7.598755955008798e-06,
      "loss": 0.0232,
      "step": 748840
    },
    {
      "epoch": 1.2255258144969658,
      "grad_norm": 0.431365042924881,
      "learning_rate": 7.59869006279528e-06,
      "loss": 0.038,
      "step": 748860
    },
    {
      "epoch": 1.2255585449356192,
      "grad_norm": 0.1909918636083603,
      "learning_rate": 7.598624170581763e-06,
      "loss": 0.0234,
      "step": 748880
    },
    {
      "epoch": 1.2255912753742726,
      "grad_norm": 1.167483925819397,
      "learning_rate": 7.598558278368245e-06,
      "loss": 0.0186,
      "step": 748900
    },
    {
      "epoch": 1.225624005812926,
      "grad_norm": 0.3439267575740814,
      "learning_rate": 7.598492386154729e-06,
      "loss": 0.0264,
      "step": 748920
    },
    {
      "epoch": 1.2256567362515793,
      "grad_norm": 1.1629081964492798,
      "learning_rate": 7.598426493941211e-06,
      "loss": 0.0265,
      "step": 748940
    },
    {
      "epoch": 1.2256894666902325,
      "grad_norm": 1.3298251628875732,
      "learning_rate": 7.598360601727694e-06,
      "loss": 0.0298,
      "step": 748960
    },
    {
      "epoch": 1.2257221971288859,
      "grad_norm": 1.2174466848373413,
      "learning_rate": 7.598294709514177e-06,
      "loss": 0.0247,
      "step": 748980
    },
    {
      "epoch": 1.2257549275675392,
      "grad_norm": 1.3359564542770386,
      "learning_rate": 7.5982288173006605e-06,
      "loss": 0.0205,
      "step": 749000
    },
    {
      "epoch": 1.2257876580061926,
      "grad_norm": 0.43917450308799744,
      "learning_rate": 7.598162925087143e-06,
      "loss": 0.0217,
      "step": 749020
    },
    {
      "epoch": 1.225820388444846,
      "grad_norm": 0.37229055166244507,
      "learning_rate": 7.598097032873626e-06,
      "loss": 0.0145,
      "step": 749040
    },
    {
      "epoch": 1.2258531188834993,
      "grad_norm": 0.14705488085746765,
      "learning_rate": 7.5980311406601095e-06,
      "loss": 0.016,
      "step": 749060
    },
    {
      "epoch": 1.2258858493221525,
      "grad_norm": 0.21370258927345276,
      "learning_rate": 7.597965248446591e-06,
      "loss": 0.024,
      "step": 749080
    },
    {
      "epoch": 1.2259185797608059,
      "grad_norm": 0.4814576804637909,
      "learning_rate": 7.597899356233075e-06,
      "loss": 0.0193,
      "step": 749100
    },
    {
      "epoch": 1.2259513101994592,
      "grad_norm": 0.3268755078315735,
      "learning_rate": 7.597833464019557e-06,
      "loss": 0.0251,
      "step": 749120
    },
    {
      "epoch": 1.2259840406381126,
      "grad_norm": 0.12788957357406616,
      "learning_rate": 7.5977675718060405e-06,
      "loss": 0.0132,
      "step": 749140
    },
    {
      "epoch": 1.226016771076766,
      "grad_norm": 0.4970373809337616,
      "learning_rate": 7.597701679592523e-06,
      "loss": 0.0279,
      "step": 749160
    },
    {
      "epoch": 1.2260495015154194,
      "grad_norm": 2.055837631225586,
      "learning_rate": 7.597635787379006e-06,
      "loss": 0.022,
      "step": 749180
    },
    {
      "epoch": 1.2260822319540727,
      "grad_norm": 0.5636739730834961,
      "learning_rate": 7.597569895165489e-06,
      "loss": 0.0221,
      "step": 749200
    },
    {
      "epoch": 1.2261149623927259,
      "grad_norm": 0.6718045473098755,
      "learning_rate": 7.597504002951972e-06,
      "loss": 0.0364,
      "step": 749220
    },
    {
      "epoch": 1.2261476928313793,
      "grad_norm": 0.9924404621124268,
      "learning_rate": 7.597438110738454e-06,
      "loss": 0.0324,
      "step": 749240
    },
    {
      "epoch": 1.2261804232700326,
      "grad_norm": 1.7607355117797852,
      "learning_rate": 7.597372218524938e-06,
      "loss": 0.0294,
      "step": 749260
    },
    {
      "epoch": 1.226213153708686,
      "grad_norm": 1.20474374294281,
      "learning_rate": 7.59730632631142e-06,
      "loss": 0.0203,
      "step": 749280
    },
    {
      "epoch": 1.2262458841473394,
      "grad_norm": 0.9264850616455078,
      "learning_rate": 7.597240434097903e-06,
      "loss": 0.0216,
      "step": 749300
    },
    {
      "epoch": 1.2262786145859927,
      "grad_norm": 0.20611383020877838,
      "learning_rate": 7.597174541884386e-06,
      "loss": 0.0201,
      "step": 749320
    },
    {
      "epoch": 1.2263113450246461,
      "grad_norm": 0.6552594304084778,
      "learning_rate": 7.597108649670869e-06,
      "loss": 0.0309,
      "step": 749340
    },
    {
      "epoch": 1.2263440754632993,
      "grad_norm": 0.5357458591461182,
      "learning_rate": 7.597042757457352e-06,
      "loss": 0.0253,
      "step": 749360
    },
    {
      "epoch": 1.2263768059019526,
      "grad_norm": 0.5527868866920471,
      "learning_rate": 7.596976865243835e-06,
      "loss": 0.035,
      "step": 749380
    },
    {
      "epoch": 1.226409536340606,
      "grad_norm": 0.31420913338661194,
      "learning_rate": 7.596910973030318e-06,
      "loss": 0.0168,
      "step": 749400
    },
    {
      "epoch": 1.2264422667792594,
      "grad_norm": 0.3940586745738983,
      "learning_rate": 7.5968450808168005e-06,
      "loss": 0.0242,
      "step": 749420
    },
    {
      "epoch": 1.2264749972179128,
      "grad_norm": 1.3082942962646484,
      "learning_rate": 7.596779188603284e-06,
      "loss": 0.0241,
      "step": 749440
    },
    {
      "epoch": 1.2265077276565661,
      "grad_norm": 0.5151641368865967,
      "learning_rate": 7.596713296389766e-06,
      "loss": 0.0284,
      "step": 749460
    },
    {
      "epoch": 1.2265404580952195,
      "grad_norm": 0.4157349169254303,
      "learning_rate": 7.59664740417625e-06,
      "loss": 0.0265,
      "step": 749480
    },
    {
      "epoch": 1.2265731885338726,
      "grad_norm": 0.4531128406524658,
      "learning_rate": 7.5965815119627315e-06,
      "loss": 0.0175,
      "step": 749500
    },
    {
      "epoch": 1.226605918972526,
      "grad_norm": 0.23550258576869965,
      "learning_rate": 7.596515619749215e-06,
      "loss": 0.0235,
      "step": 749520
    },
    {
      "epoch": 1.2266386494111794,
      "grad_norm": 0.4291463792324066,
      "learning_rate": 7.596449727535698e-06,
      "loss": 0.0197,
      "step": 749540
    },
    {
      "epoch": 1.2266713798498328,
      "grad_norm": 1.4551734924316406,
      "learning_rate": 7.5963838353221806e-06,
      "loss": 0.0328,
      "step": 749560
    },
    {
      "epoch": 1.2267041102884861,
      "grad_norm": 1.0686113834381104,
      "learning_rate": 7.596317943108663e-06,
      "loss": 0.0278,
      "step": 749580
    },
    {
      "epoch": 1.2267368407271395,
      "grad_norm": 0.6754915118217468,
      "learning_rate": 7.596252050895147e-06,
      "loss": 0.0226,
      "step": 749600
    },
    {
      "epoch": 1.2267695711657929,
      "grad_norm": 0.6011990904808044,
      "learning_rate": 7.596186158681629e-06,
      "loss": 0.0268,
      "step": 749620
    },
    {
      "epoch": 1.226802301604446,
      "grad_norm": 0.4396694600582123,
      "learning_rate": 7.596120266468112e-06,
      "loss": 0.0166,
      "step": 749640
    },
    {
      "epoch": 1.2268350320430994,
      "grad_norm": 0.6612592339515686,
      "learning_rate": 7.596054374254594e-06,
      "loss": 0.019,
      "step": 749660
    },
    {
      "epoch": 1.2268677624817528,
      "grad_norm": 0.49281150102615356,
      "learning_rate": 7.595988482041078e-06,
      "loss": 0.0182,
      "step": 749680
    },
    {
      "epoch": 1.2269004929204061,
      "grad_norm": 0.4968383312225342,
      "learning_rate": 7.59592258982756e-06,
      "loss": 0.0179,
      "step": 749700
    },
    {
      "epoch": 1.2269332233590595,
      "grad_norm": 1.1166352033615112,
      "learning_rate": 7.595856697614043e-06,
      "loss": 0.0274,
      "step": 749720
    },
    {
      "epoch": 1.226965953797713,
      "grad_norm": 2.250067710876465,
      "learning_rate": 7.595790805400527e-06,
      "loss": 0.0261,
      "step": 749740
    },
    {
      "epoch": 1.226998684236366,
      "grad_norm": 0.11027191579341888,
      "learning_rate": 7.595724913187009e-06,
      "loss": 0.0161,
      "step": 749760
    },
    {
      "epoch": 1.2270314146750194,
      "grad_norm": 0.5845118761062622,
      "learning_rate": 7.595659020973492e-06,
      "loss": 0.0215,
      "step": 749780
    },
    {
      "epoch": 1.2270641451136728,
      "grad_norm": 0.6623257398605347,
      "learning_rate": 7.595593128759975e-06,
      "loss": 0.0254,
      "step": 749800
    },
    {
      "epoch": 1.2270968755523262,
      "grad_norm": 0.896480917930603,
      "learning_rate": 7.595527236546458e-06,
      "loss": 0.0242,
      "step": 749820
    },
    {
      "epoch": 1.2271296059909795,
      "grad_norm": 1.108909010887146,
      "learning_rate": 7.595461344332941e-06,
      "loss": 0.026,
      "step": 749840
    },
    {
      "epoch": 1.227162336429633,
      "grad_norm": 0.3054170310497284,
      "learning_rate": 7.595395452119424e-06,
      "loss": 0.0143,
      "step": 749860
    },
    {
      "epoch": 1.227195066868286,
      "grad_norm": 1.20365309715271,
      "learning_rate": 7.595329559905906e-06,
      "loss": 0.0281,
      "step": 749880
    },
    {
      "epoch": 1.2272277973069394,
      "grad_norm": 0.2981105148792267,
      "learning_rate": 7.59526366769239e-06,
      "loss": 0.0264,
      "step": 749900
    },
    {
      "epoch": 1.2272605277455928,
      "grad_norm": 0.131166473031044,
      "learning_rate": 7.5951977754788716e-06,
      "loss": 0.0205,
      "step": 749920
    },
    {
      "epoch": 1.2272932581842462,
      "grad_norm": 0.6131069660186768,
      "learning_rate": 7.595131883265355e-06,
      "loss": 0.0225,
      "step": 749940
    },
    {
      "epoch": 1.2273259886228995,
      "grad_norm": 4.150231838226318,
      "learning_rate": 7.595065991051838e-06,
      "loss": 0.0166,
      "step": 749960
    },
    {
      "epoch": 1.227358719061553,
      "grad_norm": 0.5754925012588501,
      "learning_rate": 7.595000098838321e-06,
      "loss": 0.0267,
      "step": 749980
    },
    {
      "epoch": 1.2273914495002063,
      "grad_norm": 0.8389425277709961,
      "learning_rate": 7.594934206624803e-06,
      "loss": 0.0213,
      "step": 750000
    },
    {
      "epoch": 1.2273914495002063,
      "eval_loss": 0.01215579453855753,
      "eval_runtime": 6507.1841,
      "eval_samples_per_second": 157.957,
      "eval_steps_per_second": 15.796,
      "eval_sts-dev_pearson_cosine": 0.9713204622092374,
      "eval_sts-dev_spearman_cosine": 0.8866737661549909,
      "step": 750000
    },
    {
      "epoch": 1.2274241799388594,
      "grad_norm": 0.42047086358070374,
      "learning_rate": 7.594868314411287e-06,
      "loss": 0.0219,
      "step": 750020
    },
    {
      "epoch": 1.2274569103775128,
      "grad_norm": 0.5391493439674377,
      "learning_rate": 7.594802422197769e-06,
      "loss": 0.0247,
      "step": 750040
    },
    {
      "epoch": 1.2274896408161662,
      "grad_norm": 0.7234576940536499,
      "learning_rate": 7.5947365299842524e-06,
      "loss": 0.0269,
      "step": 750060
    },
    {
      "epoch": 1.2275223712548196,
      "grad_norm": 0.7692119479179382,
      "learning_rate": 7.594670637770736e-06,
      "loss": 0.0175,
      "step": 750080
    },
    {
      "epoch": 1.227555101693473,
      "grad_norm": 0.33376798033714294,
      "learning_rate": 7.594604745557218e-06,
      "loss": 0.0203,
      "step": 750100
    },
    {
      "epoch": 1.2275878321321263,
      "grad_norm": 0.7971535325050354,
      "learning_rate": 7.5945388533437015e-06,
      "loss": 0.0296,
      "step": 750120
    },
    {
      "epoch": 1.2276205625707797,
      "grad_norm": 1.408168911933899,
      "learning_rate": 7.594472961130183e-06,
      "loss": 0.0244,
      "step": 750140
    },
    {
      "epoch": 1.2276532930094328,
      "grad_norm": 0.8122789859771729,
      "learning_rate": 7.594407068916667e-06,
      "loss": 0.0213,
      "step": 750160
    },
    {
      "epoch": 1.2276860234480862,
      "grad_norm": 0.36911046504974365,
      "learning_rate": 7.59434117670315e-06,
      "loss": 0.0217,
      "step": 750180
    },
    {
      "epoch": 1.2277187538867396,
      "grad_norm": 1.4563146829605103,
      "learning_rate": 7.5942752844896325e-06,
      "loss": 0.0266,
      "step": 750200
    },
    {
      "epoch": 1.227751484325393,
      "grad_norm": 0.4130323529243469,
      "learning_rate": 7.594209392276115e-06,
      "loss": 0.0189,
      "step": 750220
    },
    {
      "epoch": 1.2277842147640463,
      "grad_norm": 0.3187934160232544,
      "learning_rate": 7.594143500062599e-06,
      "loss": 0.0188,
      "step": 750240
    },
    {
      "epoch": 1.2278169452026997,
      "grad_norm": 0.9865436553955078,
      "learning_rate": 7.594077607849081e-06,
      "loss": 0.0205,
      "step": 750260
    },
    {
      "epoch": 1.227849675641353,
      "grad_norm": 1.739800214767456,
      "learning_rate": 7.594011715635564e-06,
      "loss": 0.0211,
      "step": 750280
    },
    {
      "epoch": 1.2278824060800062,
      "grad_norm": 0.4795893132686615,
      "learning_rate": 7.593945823422046e-06,
      "loss": 0.0206,
      "step": 750300
    },
    {
      "epoch": 1.2279151365186596,
      "grad_norm": 1.0341416597366333,
      "learning_rate": 7.59387993120853e-06,
      "loss": 0.0172,
      "step": 750320
    },
    {
      "epoch": 1.227947866957313,
      "grad_norm": 0.44419360160827637,
      "learning_rate": 7.5938140389950125e-06,
      "loss": 0.0208,
      "step": 750340
    },
    {
      "epoch": 1.2279805973959663,
      "grad_norm": 1.2538011074066162,
      "learning_rate": 7.593748146781495e-06,
      "loss": 0.0188,
      "step": 750360
    },
    {
      "epoch": 1.2280133278346197,
      "grad_norm": 0.38227608799934387,
      "learning_rate": 7.593682254567978e-06,
      "loss": 0.019,
      "step": 750380
    },
    {
      "epoch": 1.228046058273273,
      "grad_norm": 0.715295672416687,
      "learning_rate": 7.5936163623544616e-06,
      "loss": 0.0343,
      "step": 750400
    },
    {
      "epoch": 1.2280787887119264,
      "grad_norm": 0.1970239132642746,
      "learning_rate": 7.593550470140944e-06,
      "loss": 0.0273,
      "step": 750420
    },
    {
      "epoch": 1.2281115191505796,
      "grad_norm": 0.6377118825912476,
      "learning_rate": 7.593484577927427e-06,
      "loss": 0.0159,
      "step": 750440
    },
    {
      "epoch": 1.228144249589233,
      "grad_norm": 0.12366100400686264,
      "learning_rate": 7.593418685713911e-06,
      "loss": 0.0211,
      "step": 750460
    },
    {
      "epoch": 1.2281769800278863,
      "grad_norm": 0.6018319725990295,
      "learning_rate": 7.5933527935003925e-06,
      "loss": 0.0194,
      "step": 750480
    },
    {
      "epoch": 1.2282097104665397,
      "grad_norm": 0.5216540694236755,
      "learning_rate": 7.593286901286876e-06,
      "loss": 0.0241,
      "step": 750500
    },
    {
      "epoch": 1.228242440905193,
      "grad_norm": 0.9383665919303894,
      "learning_rate": 7.593221009073358e-06,
      "loss": 0.021,
      "step": 750520
    },
    {
      "epoch": 1.2282751713438462,
      "grad_norm": 0.5167996287345886,
      "learning_rate": 7.593155116859842e-06,
      "loss": 0.0157,
      "step": 750540
    },
    {
      "epoch": 1.2283079017824996,
      "grad_norm": 1.6899865865707397,
      "learning_rate": 7.593089224646324e-06,
      "loss": 0.0234,
      "step": 750560
    },
    {
      "epoch": 1.228340632221153,
      "grad_norm": 0.9568362832069397,
      "learning_rate": 7.593023332432807e-06,
      "loss": 0.0214,
      "step": 750580
    },
    {
      "epoch": 1.2283733626598063,
      "grad_norm": 0.45687294006347656,
      "learning_rate": 7.59295744021929e-06,
      "loss": 0.0207,
      "step": 750600
    },
    {
      "epoch": 1.2284060930984597,
      "grad_norm": 0.1744351089000702,
      "learning_rate": 7.592891548005773e-06,
      "loss": 0.0189,
      "step": 750620
    },
    {
      "epoch": 1.228438823537113,
      "grad_norm": 0.8212885856628418,
      "learning_rate": 7.592825655792255e-06,
      "loss": 0.0324,
      "step": 750640
    },
    {
      "epoch": 1.2284715539757665,
      "grad_norm": 0.4271555542945862,
      "learning_rate": 7.592759763578739e-06,
      "loss": 0.0231,
      "step": 750660
    },
    {
      "epoch": 1.2285042844144196,
      "grad_norm": 0.809528648853302,
      "learning_rate": 7.592693871365221e-06,
      "loss": 0.0224,
      "step": 750680
    },
    {
      "epoch": 1.228537014853073,
      "grad_norm": 1.6293936967849731,
      "learning_rate": 7.592627979151704e-06,
      "loss": 0.0166,
      "step": 750700
    },
    {
      "epoch": 1.2285697452917264,
      "grad_norm": 1.4544014930725098,
      "learning_rate": 7.592562086938186e-06,
      "loss": 0.0244,
      "step": 750720
    },
    {
      "epoch": 1.2286024757303797,
      "grad_norm": 1.0769760608673096,
      "learning_rate": 7.59249619472467e-06,
      "loss": 0.0318,
      "step": 750740
    },
    {
      "epoch": 1.228635206169033,
      "grad_norm": 0.4212959408760071,
      "learning_rate": 7.592430302511153e-06,
      "loss": 0.0264,
      "step": 750760
    },
    {
      "epoch": 1.2286679366076865,
      "grad_norm": 0.5167982578277588,
      "learning_rate": 7.592364410297635e-06,
      "loss": 0.0224,
      "step": 750780
    },
    {
      "epoch": 1.2287006670463398,
      "grad_norm": 0.5190603137016296,
      "learning_rate": 7.592298518084119e-06,
      "loss": 0.0211,
      "step": 750800
    },
    {
      "epoch": 1.228733397484993,
      "grad_norm": 0.9444627165794373,
      "learning_rate": 7.592232625870602e-06,
      "loss": 0.0184,
      "step": 750820
    },
    {
      "epoch": 1.2287661279236464,
      "grad_norm": 0.912061870098114,
      "learning_rate": 7.592166733657084e-06,
      "loss": 0.0169,
      "step": 750840
    },
    {
      "epoch": 1.2287988583622997,
      "grad_norm": 0.27872827649116516,
      "learning_rate": 7.592100841443567e-06,
      "loss": 0.0231,
      "step": 750860
    },
    {
      "epoch": 1.2288315888009531,
      "grad_norm": 0.7039021253585815,
      "learning_rate": 7.592034949230051e-06,
      "loss": 0.0213,
      "step": 750880
    },
    {
      "epoch": 1.2288643192396065,
      "grad_norm": 0.2983584702014923,
      "learning_rate": 7.591969057016533e-06,
      "loss": 0.0209,
      "step": 750900
    },
    {
      "epoch": 1.2288970496782599,
      "grad_norm": 0.20655062794685364,
      "learning_rate": 7.591903164803016e-06,
      "loss": 0.0142,
      "step": 750920
    },
    {
      "epoch": 1.2289297801169132,
      "grad_norm": 0.7217496633529663,
      "learning_rate": 7.591837272589498e-06,
      "loss": 0.0298,
      "step": 750940
    },
    {
      "epoch": 1.2289625105555664,
      "grad_norm": 0.24570854008197784,
      "learning_rate": 7.591771380375982e-06,
      "loss": 0.0192,
      "step": 750960
    },
    {
      "epoch": 1.2289952409942198,
      "grad_norm": 0.4206700026988983,
      "learning_rate": 7.591705488162464e-06,
      "loss": 0.0239,
      "step": 750980
    },
    {
      "epoch": 1.2290279714328731,
      "grad_norm": 0.06373157352209091,
      "learning_rate": 7.591639595948947e-06,
      "loss": 0.0236,
      "step": 751000
    },
    {
      "epoch": 1.2290607018715265,
      "grad_norm": 0.5981274247169495,
      "learning_rate": 7.59157370373543e-06,
      "loss": 0.0198,
      "step": 751020
    },
    {
      "epoch": 1.2290934323101799,
      "grad_norm": 0.6159588098526001,
      "learning_rate": 7.5915078115219135e-06,
      "loss": 0.0281,
      "step": 751040
    },
    {
      "epoch": 1.2291261627488332,
      "grad_norm": 0.908431351184845,
      "learning_rate": 7.591441919308395e-06,
      "loss": 0.023,
      "step": 751060
    },
    {
      "epoch": 1.2291588931874866,
      "grad_norm": 0.1361117660999298,
      "learning_rate": 7.591376027094879e-06,
      "loss": 0.015,
      "step": 751080
    },
    {
      "epoch": 1.2291916236261398,
      "grad_norm": 1.1546776294708252,
      "learning_rate": 7.591310134881361e-06,
      "loss": 0.019,
      "step": 751100
    },
    {
      "epoch": 1.2292243540647931,
      "grad_norm": 0.3414674699306488,
      "learning_rate": 7.5912442426678444e-06,
      "loss": 0.0203,
      "step": 751120
    },
    {
      "epoch": 1.2292570845034465,
      "grad_norm": 1.247523546218872,
      "learning_rate": 7.591178350454328e-06,
      "loss": 0.0316,
      "step": 751140
    },
    {
      "epoch": 1.2292898149420999,
      "grad_norm": 0.6715111136436462,
      "learning_rate": 7.59111245824081e-06,
      "loss": 0.0218,
      "step": 751160
    },
    {
      "epoch": 1.2293225453807533,
      "grad_norm": 0.4615135192871094,
      "learning_rate": 7.5910465660272935e-06,
      "loss": 0.0214,
      "step": 751180
    },
    {
      "epoch": 1.2293552758194066,
      "grad_norm": 1.2865936756134033,
      "learning_rate": 7.590980673813776e-06,
      "loss": 0.0327,
      "step": 751200
    },
    {
      "epoch": 1.2293880062580598,
      "grad_norm": 1.4709057807922363,
      "learning_rate": 7.590914781600259e-06,
      "loss": 0.0196,
      "step": 751220
    },
    {
      "epoch": 1.2294207366967131,
      "grad_norm": 0.45456773042678833,
      "learning_rate": 7.590848889386742e-06,
      "loss": 0.0185,
      "step": 751240
    },
    {
      "epoch": 1.2294534671353665,
      "grad_norm": 0.7398538589477539,
      "learning_rate": 7.590782997173225e-06,
      "loss": 0.0176,
      "step": 751260
    },
    {
      "epoch": 1.22948619757402,
      "grad_norm": 0.23282122611999512,
      "learning_rate": 7.590717104959707e-06,
      "loss": 0.0138,
      "step": 751280
    },
    {
      "epoch": 1.2295189280126733,
      "grad_norm": 0.776011049747467,
      "learning_rate": 7.590651212746191e-06,
      "loss": 0.0283,
      "step": 751300
    },
    {
      "epoch": 1.2295516584513266,
      "grad_norm": 0.9763237833976746,
      "learning_rate": 7.590585320532673e-06,
      "loss": 0.0316,
      "step": 751320
    },
    {
      "epoch": 1.2295843888899798,
      "grad_norm": 1.8559468984603882,
      "learning_rate": 7.590519428319156e-06,
      "loss": 0.021,
      "step": 751340
    },
    {
      "epoch": 1.2296171193286332,
      "grad_norm": 0.13082346320152283,
      "learning_rate": 7.590453536105639e-06,
      "loss": 0.0205,
      "step": 751360
    },
    {
      "epoch": 1.2296498497672865,
      "grad_norm": 1.837893009185791,
      "learning_rate": 7.590387643892122e-06,
      "loss": 0.0194,
      "step": 751380
    },
    {
      "epoch": 1.22968258020594,
      "grad_norm": 1.1123560667037964,
      "learning_rate": 7.5903217516786045e-06,
      "loss": 0.0228,
      "step": 751400
    },
    {
      "epoch": 1.2297153106445933,
      "grad_norm": 0.8018797039985657,
      "learning_rate": 7.590255859465088e-06,
      "loss": 0.0271,
      "step": 751420
    },
    {
      "epoch": 1.2297480410832466,
      "grad_norm": 0.3283553421497345,
      "learning_rate": 7.59018996725157e-06,
      "loss": 0.0217,
      "step": 751440
    },
    {
      "epoch": 1.2297807715219,
      "grad_norm": 0.6749007701873779,
      "learning_rate": 7.5901240750380535e-06,
      "loss": 0.0198,
      "step": 751460
    },
    {
      "epoch": 1.2298135019605532,
      "grad_norm": 0.4538275897502899,
      "learning_rate": 7.590058182824537e-06,
      "loss": 0.0323,
      "step": 751480
    },
    {
      "epoch": 1.2298462323992065,
      "grad_norm": 0.41546812653541565,
      "learning_rate": 7.589992290611019e-06,
      "loss": 0.0259,
      "step": 751500
    },
    {
      "epoch": 1.22987896283786,
      "grad_norm": 0.36632421612739563,
      "learning_rate": 7.589926398397503e-06,
      "loss": 0.0273,
      "step": 751520
    },
    {
      "epoch": 1.2299116932765133,
      "grad_norm": 0.2615610957145691,
      "learning_rate": 7.5898605061839845e-06,
      "loss": 0.0342,
      "step": 751540
    },
    {
      "epoch": 1.2299444237151667,
      "grad_norm": 0.8537346720695496,
      "learning_rate": 7.589794613970468e-06,
      "loss": 0.0201,
      "step": 751560
    },
    {
      "epoch": 1.22997715415382,
      "grad_norm": 0.5027235150337219,
      "learning_rate": 7.589728721756951e-06,
      "loss": 0.0277,
      "step": 751580
    },
    {
      "epoch": 1.2300098845924734,
      "grad_norm": 0.4631403088569641,
      "learning_rate": 7.5896628295434336e-06,
      "loss": 0.0251,
      "step": 751600
    },
    {
      "epoch": 1.2300426150311266,
      "grad_norm": 1.8053685426712036,
      "learning_rate": 7.589596937329916e-06,
      "loss": 0.0181,
      "step": 751620
    },
    {
      "epoch": 1.23007534546978,
      "grad_norm": 0.6692058444023132,
      "learning_rate": 7.5895310451164e-06,
      "loss": 0.0176,
      "step": 751640
    },
    {
      "epoch": 1.2301080759084333,
      "grad_norm": 0.5997835397720337,
      "learning_rate": 7.589465152902882e-06,
      "loss": 0.0215,
      "step": 751660
    },
    {
      "epoch": 1.2301408063470867,
      "grad_norm": 0.28316670656204224,
      "learning_rate": 7.589399260689365e-06,
      "loss": 0.0209,
      "step": 751680
    },
    {
      "epoch": 1.23017353678574,
      "grad_norm": 0.8583765029907227,
      "learning_rate": 7.589333368475847e-06,
      "loss": 0.0261,
      "step": 751700
    },
    {
      "epoch": 1.2302062672243934,
      "grad_norm": 0.9174042344093323,
      "learning_rate": 7.589267476262331e-06,
      "loss": 0.0179,
      "step": 751720
    },
    {
      "epoch": 1.2302389976630468,
      "grad_norm": 0.7353243827819824,
      "learning_rate": 7.589201584048813e-06,
      "loss": 0.0155,
      "step": 751740
    },
    {
      "epoch": 1.2302717281017,
      "grad_norm": 0.3415641486644745,
      "learning_rate": 7.589135691835296e-06,
      "loss": 0.0122,
      "step": 751760
    },
    {
      "epoch": 1.2303044585403533,
      "grad_norm": 1.3692775964736938,
      "learning_rate": 7.589069799621779e-06,
      "loss": 0.0202,
      "step": 751780
    },
    {
      "epoch": 1.2303371889790067,
      "grad_norm": 0.12240348011255264,
      "learning_rate": 7.589003907408262e-06,
      "loss": 0.0199,
      "step": 751800
    },
    {
      "epoch": 1.23036991941766,
      "grad_norm": 1.2498475313186646,
      "learning_rate": 7.588938015194745e-06,
      "loss": 0.0192,
      "step": 751820
    },
    {
      "epoch": 1.2304026498563134,
      "grad_norm": 0.4481479227542877,
      "learning_rate": 7.588872122981228e-06,
      "loss": 0.0219,
      "step": 751840
    },
    {
      "epoch": 1.2304353802949668,
      "grad_norm": 0.6900119185447693,
      "learning_rate": 7.588806230767711e-06,
      "loss": 0.0198,
      "step": 751860
    },
    {
      "epoch": 1.2304681107336202,
      "grad_norm": 0.7529211044311523,
      "learning_rate": 7.588740338554194e-06,
      "loss": 0.0171,
      "step": 751880
    },
    {
      "epoch": 1.2305008411722733,
      "grad_norm": 0.30812379717826843,
      "learning_rate": 7.588674446340677e-06,
      "loss": 0.0329,
      "step": 751900
    },
    {
      "epoch": 1.2305335716109267,
      "grad_norm": 0.9639568328857422,
      "learning_rate": 7.588608554127159e-06,
      "loss": 0.023,
      "step": 751920
    },
    {
      "epoch": 1.23056630204958,
      "grad_norm": 0.27233511209487915,
      "learning_rate": 7.588542661913643e-06,
      "loss": 0.0276,
      "step": 751940
    },
    {
      "epoch": 1.2305990324882334,
      "grad_norm": 0.35400480031967163,
      "learning_rate": 7.588476769700125e-06,
      "loss": 0.014,
      "step": 751960
    },
    {
      "epoch": 1.2306317629268868,
      "grad_norm": 1.3031952381134033,
      "learning_rate": 7.588410877486608e-06,
      "loss": 0.0144,
      "step": 751980
    },
    {
      "epoch": 1.2306644933655402,
      "grad_norm": 0.527177095413208,
      "learning_rate": 7.588344985273091e-06,
      "loss": 0.023,
      "step": 752000
    },
    {
      "epoch": 1.2306972238041933,
      "grad_norm": 0.3339928090572357,
      "learning_rate": 7.588279093059574e-06,
      "loss": 0.0286,
      "step": 752020
    },
    {
      "epoch": 1.2307299542428467,
      "grad_norm": 0.308628648519516,
      "learning_rate": 7.588213200846056e-06,
      "loss": 0.0217,
      "step": 752040
    },
    {
      "epoch": 1.2307626846815,
      "grad_norm": 0.7287843823432922,
      "learning_rate": 7.58814730863254e-06,
      "loss": 0.0257,
      "step": 752060
    },
    {
      "epoch": 1.2307954151201534,
      "grad_norm": 0.5879844427108765,
      "learning_rate": 7.588081416419022e-06,
      "loss": 0.0255,
      "step": 752080
    },
    {
      "epoch": 1.2308281455588068,
      "grad_norm": 4.906696319580078,
      "learning_rate": 7.5880155242055055e-06,
      "loss": 0.0283,
      "step": 752100
    },
    {
      "epoch": 1.2308608759974602,
      "grad_norm": 0.5447006821632385,
      "learning_rate": 7.587949631991987e-06,
      "loss": 0.0262,
      "step": 752120
    },
    {
      "epoch": 1.2308936064361133,
      "grad_norm": 0.21664959192276,
      "learning_rate": 7.587883739778471e-06,
      "loss": 0.0248,
      "step": 752140
    },
    {
      "epoch": 1.2309263368747667,
      "grad_norm": 0.41290533542633057,
      "learning_rate": 7.587817847564954e-06,
      "loss": 0.0238,
      "step": 752160
    },
    {
      "epoch": 1.23095906731342,
      "grad_norm": 0.7162365913391113,
      "learning_rate": 7.587751955351436e-06,
      "loss": 0.025,
      "step": 752180
    },
    {
      "epoch": 1.2309917977520735,
      "grad_norm": 1.2359553575515747,
      "learning_rate": 7.58768606313792e-06,
      "loss": 0.0204,
      "step": 752200
    },
    {
      "epoch": 1.2310245281907268,
      "grad_norm": 0.5190155506134033,
      "learning_rate": 7.587620170924403e-06,
      "loss": 0.0255,
      "step": 752220
    },
    {
      "epoch": 1.2310572586293802,
      "grad_norm": 0.6725844144821167,
      "learning_rate": 7.5875542787108855e-06,
      "loss": 0.0268,
      "step": 752240
    },
    {
      "epoch": 1.2310899890680336,
      "grad_norm": 0.6135819554328918,
      "learning_rate": 7.587488386497368e-06,
      "loss": 0.02,
      "step": 752260
    },
    {
      "epoch": 1.2311227195066867,
      "grad_norm": 1.349267601966858,
      "learning_rate": 7.587422494283852e-06,
      "loss": 0.0228,
      "step": 752280
    },
    {
      "epoch": 1.23115544994534,
      "grad_norm": 0.31130486726760864,
      "learning_rate": 7.587356602070334e-06,
      "loss": 0.0278,
      "step": 752300
    },
    {
      "epoch": 1.2311881803839935,
      "grad_norm": 0.2791360318660736,
      "learning_rate": 7.587290709856817e-06,
      "loss": 0.0171,
      "step": 752320
    },
    {
      "epoch": 1.2312209108226468,
      "grad_norm": 0.2874099612236023,
      "learning_rate": 7.587224817643299e-06,
      "loss": 0.0152,
      "step": 752340
    },
    {
      "epoch": 1.2312536412613002,
      "grad_norm": 0.6717564463615417,
      "learning_rate": 7.587158925429783e-06,
      "loss": 0.0217,
      "step": 752360
    },
    {
      "epoch": 1.2312863716999536,
      "grad_norm": 0.49223873019218445,
      "learning_rate": 7.5870930332162655e-06,
      "loss": 0.0316,
      "step": 752380
    },
    {
      "epoch": 1.231319102138607,
      "grad_norm": 0.6036707758903503,
      "learning_rate": 7.587027141002748e-06,
      "loss": 0.0303,
      "step": 752400
    },
    {
      "epoch": 1.23135183257726,
      "grad_norm": 0.9525645971298218,
      "learning_rate": 7.586961248789231e-06,
      "loss": 0.0224,
      "step": 752420
    },
    {
      "epoch": 1.2313845630159135,
      "grad_norm": 0.25355264544487,
      "learning_rate": 7.5868953565757146e-06,
      "loss": 0.0193,
      "step": 752440
    },
    {
      "epoch": 1.2314172934545669,
      "grad_norm": 0.49752748012542725,
      "learning_rate": 7.5868294643621965e-06,
      "loss": 0.0189,
      "step": 752460
    },
    {
      "epoch": 1.2314500238932202,
      "grad_norm": 0.7159359455108643,
      "learning_rate": 7.58676357214868e-06,
      "loss": 0.0242,
      "step": 752480
    },
    {
      "epoch": 1.2314827543318736,
      "grad_norm": 0.20589572191238403,
      "learning_rate": 7.586697679935162e-06,
      "loss": 0.0172,
      "step": 752500
    },
    {
      "epoch": 1.231515484770527,
      "grad_norm": 0.24844922125339508,
      "learning_rate": 7.5866317877216455e-06,
      "loss": 0.025,
      "step": 752520
    },
    {
      "epoch": 1.2315482152091803,
      "grad_norm": 0.26125413179397583,
      "learning_rate": 7.586565895508129e-06,
      "loss": 0.0196,
      "step": 752540
    },
    {
      "epoch": 1.2315809456478335,
      "grad_norm": 0.7128375172615051,
      "learning_rate": 7.586500003294611e-06,
      "loss": 0.0181,
      "step": 752560
    },
    {
      "epoch": 1.2316136760864869,
      "grad_norm": 0.1891164928674698,
      "learning_rate": 7.586434111081095e-06,
      "loss": 0.0174,
      "step": 752580
    },
    {
      "epoch": 1.2316464065251402,
      "grad_norm": 0.3474995791912079,
      "learning_rate": 7.5863682188675765e-06,
      "loss": 0.0231,
      "step": 752600
    },
    {
      "epoch": 1.2316791369637936,
      "grad_norm": 0.23946943879127502,
      "learning_rate": 7.58630232665406e-06,
      "loss": 0.0239,
      "step": 752620
    },
    {
      "epoch": 1.231711867402447,
      "grad_norm": 0.6376901268959045,
      "learning_rate": 7.586236434440543e-06,
      "loss": 0.0191,
      "step": 752640
    },
    {
      "epoch": 1.2317445978411004,
      "grad_norm": 0.5756300091743469,
      "learning_rate": 7.586170542227026e-06,
      "loss": 0.023,
      "step": 752660
    },
    {
      "epoch": 1.2317773282797537,
      "grad_norm": 0.5618694424629211,
      "learning_rate": 7.586104650013508e-06,
      "loss": 0.0283,
      "step": 752680
    },
    {
      "epoch": 1.2318100587184069,
      "grad_norm": 0.19763946533203125,
      "learning_rate": 7.586038757799992e-06,
      "loss": 0.0226,
      "step": 752700
    },
    {
      "epoch": 1.2318427891570602,
      "grad_norm": 0.5370811223983765,
      "learning_rate": 7.585972865586474e-06,
      "loss": 0.0213,
      "step": 752720
    },
    {
      "epoch": 1.2318755195957136,
      "grad_norm": 0.13843096792697906,
      "learning_rate": 7.585906973372957e-06,
      "loss": 0.023,
      "step": 752740
    },
    {
      "epoch": 1.231908250034367,
      "grad_norm": 0.8545281291007996,
      "learning_rate": 7.585841081159439e-06,
      "loss": 0.0177,
      "step": 752760
    },
    {
      "epoch": 1.2319409804730204,
      "grad_norm": 0.22004492580890656,
      "learning_rate": 7.585775188945923e-06,
      "loss": 0.0255,
      "step": 752780
    },
    {
      "epoch": 1.2319737109116735,
      "grad_norm": 0.5047600865364075,
      "learning_rate": 7.5857092967324056e-06,
      "loss": 0.0187,
      "step": 752800
    },
    {
      "epoch": 1.2320064413503269,
      "grad_norm": 0.15797150135040283,
      "learning_rate": 7.585643404518888e-06,
      "loss": 0.0257,
      "step": 752820
    },
    {
      "epoch": 1.2320391717889803,
      "grad_norm": 0.5473819375038147,
      "learning_rate": 7.585577512305371e-06,
      "loss": 0.0188,
      "step": 752840
    },
    {
      "epoch": 1.2320719022276336,
      "grad_norm": 1.470700740814209,
      "learning_rate": 7.585511620091855e-06,
      "loss": 0.0244,
      "step": 752860
    },
    {
      "epoch": 1.232104632666287,
      "grad_norm": 2.5138132572174072,
      "learning_rate": 7.585445727878337e-06,
      "loss": 0.0206,
      "step": 752880
    },
    {
      "epoch": 1.2321373631049404,
      "grad_norm": 1.8342936038970947,
      "learning_rate": 7.58537983566482e-06,
      "loss": 0.0166,
      "step": 752900
    },
    {
      "epoch": 1.2321700935435937,
      "grad_norm": 1.0059034824371338,
      "learning_rate": 7.585313943451304e-06,
      "loss": 0.0267,
      "step": 752920
    },
    {
      "epoch": 1.232202823982247,
      "grad_norm": 0.47725218534469604,
      "learning_rate": 7.585248051237786e-06,
      "loss": 0.0158,
      "step": 752940
    },
    {
      "epoch": 1.2322355544209003,
      "grad_norm": 1.0054008960723877,
      "learning_rate": 7.585182159024269e-06,
      "loss": 0.0268,
      "step": 752960
    },
    {
      "epoch": 1.2322682848595536,
      "grad_norm": 2.899939775466919,
      "learning_rate": 7.585116266810751e-06,
      "loss": 0.0274,
      "step": 752980
    },
    {
      "epoch": 1.232301015298207,
      "grad_norm": 0.9899060726165771,
      "learning_rate": 7.585050374597235e-06,
      "loss": 0.0191,
      "step": 753000
    },
    {
      "epoch": 1.2323337457368604,
      "grad_norm": 0.4292212426662445,
      "learning_rate": 7.584984482383717e-06,
      "loss": 0.0245,
      "step": 753020
    },
    {
      "epoch": 1.2323664761755138,
      "grad_norm": 1.6288716793060303,
      "learning_rate": 7.5849185901702e-06,
      "loss": 0.0249,
      "step": 753040
    },
    {
      "epoch": 1.2323992066141671,
      "grad_norm": 0.7776726484298706,
      "learning_rate": 7.584852697956683e-06,
      "loss": 0.0272,
      "step": 753060
    },
    {
      "epoch": 1.2324319370528203,
      "grad_norm": 1.3905401229858398,
      "learning_rate": 7.5847868057431665e-06,
      "loss": 0.0219,
      "step": 753080
    },
    {
      "epoch": 1.2324646674914737,
      "grad_norm": 0.3878193199634552,
      "learning_rate": 7.584720913529648e-06,
      "loss": 0.0167,
      "step": 753100
    },
    {
      "epoch": 1.232497397930127,
      "grad_norm": 0.6526047587394714,
      "learning_rate": 7.584655021316132e-06,
      "loss": 0.0269,
      "step": 753120
    },
    {
      "epoch": 1.2325301283687804,
      "grad_norm": 0.748196542263031,
      "learning_rate": 7.584589129102614e-06,
      "loss": 0.0252,
      "step": 753140
    },
    {
      "epoch": 1.2325628588074338,
      "grad_norm": 1.908278226852417,
      "learning_rate": 7.5845232368890974e-06,
      "loss": 0.0296,
      "step": 753160
    },
    {
      "epoch": 1.2325955892460871,
      "grad_norm": 1.2961838245391846,
      "learning_rate": 7.58445734467558e-06,
      "loss": 0.0189,
      "step": 753180
    },
    {
      "epoch": 1.2326283196847405,
      "grad_norm": 0.6334939002990723,
      "learning_rate": 7.584391452462063e-06,
      "loss": 0.0232,
      "step": 753200
    },
    {
      "epoch": 1.2326610501233937,
      "grad_norm": 0.4626557230949402,
      "learning_rate": 7.5843255602485465e-06,
      "loss": 0.0232,
      "step": 753220
    },
    {
      "epoch": 1.232693780562047,
      "grad_norm": 0.4183100759983063,
      "learning_rate": 7.584259668035029e-06,
      "loss": 0.0142,
      "step": 753240
    },
    {
      "epoch": 1.2327265110007004,
      "grad_norm": 0.20215412974357605,
      "learning_rate": 7.584193775821512e-06,
      "loss": 0.034,
      "step": 753260
    },
    {
      "epoch": 1.2327592414393538,
      "grad_norm": 1.194572925567627,
      "learning_rate": 7.584127883607995e-06,
      "loss": 0.0284,
      "step": 753280
    },
    {
      "epoch": 1.2327919718780072,
      "grad_norm": 0.8208810687065125,
      "learning_rate": 7.584061991394478e-06,
      "loss": 0.0202,
      "step": 753300
    },
    {
      "epoch": 1.2328247023166605,
      "grad_norm": 1.2553728818893433,
      "learning_rate": 7.58399609918096e-06,
      "loss": 0.0254,
      "step": 753320
    },
    {
      "epoch": 1.232857432755314,
      "grad_norm": 0.3878454566001892,
      "learning_rate": 7.583930206967444e-06,
      "loss": 0.0161,
      "step": 753340
    },
    {
      "epoch": 1.232890163193967,
      "grad_norm": 1.2037826776504517,
      "learning_rate": 7.583864314753926e-06,
      "loss": 0.0254,
      "step": 753360
    },
    {
      "epoch": 1.2329228936326204,
      "grad_norm": 0.7436327934265137,
      "learning_rate": 7.583798422540409e-06,
      "loss": 0.0367,
      "step": 753380
    },
    {
      "epoch": 1.2329556240712738,
      "grad_norm": 0.8481712341308594,
      "learning_rate": 7.583732530326892e-06,
      "loss": 0.0178,
      "step": 753400
    },
    {
      "epoch": 1.2329883545099272,
      "grad_norm": 0.9231106042861938,
      "learning_rate": 7.583666638113375e-06,
      "loss": 0.0236,
      "step": 753420
    },
    {
      "epoch": 1.2330210849485805,
      "grad_norm": 0.7649127840995789,
      "learning_rate": 7.5836007458998575e-06,
      "loss": 0.0141,
      "step": 753440
    },
    {
      "epoch": 1.233053815387234,
      "grad_norm": 0.40825355052948,
      "learning_rate": 7.583534853686341e-06,
      "loss": 0.0197,
      "step": 753460
    },
    {
      "epoch": 1.2330865458258873,
      "grad_norm": 0.2252030223608017,
      "learning_rate": 7.583468961472823e-06,
      "loss": 0.0252,
      "step": 753480
    },
    {
      "epoch": 1.2331192762645404,
      "grad_norm": 0.7595392465591431,
      "learning_rate": 7.5834030692593065e-06,
      "loss": 0.0259,
      "step": 753500
    },
    {
      "epoch": 1.2331520067031938,
      "grad_norm": 0.4532583951950073,
      "learning_rate": 7.5833371770457884e-06,
      "loss": 0.0133,
      "step": 753520
    },
    {
      "epoch": 1.2331847371418472,
      "grad_norm": 0.30776840448379517,
      "learning_rate": 7.583271284832272e-06,
      "loss": 0.0178,
      "step": 753540
    },
    {
      "epoch": 1.2332174675805005,
      "grad_norm": 1.7350571155548096,
      "learning_rate": 7.583205392618754e-06,
      "loss": 0.0256,
      "step": 753560
    },
    {
      "epoch": 1.233250198019154,
      "grad_norm": 0.5012343525886536,
      "learning_rate": 7.5831395004052375e-06,
      "loss": 0.0252,
      "step": 753580
    },
    {
      "epoch": 1.233282928457807,
      "grad_norm": 0.4940588176250458,
      "learning_rate": 7.583073608191721e-06,
      "loss": 0.0212,
      "step": 753600
    },
    {
      "epoch": 1.2333156588964604,
      "grad_norm": 0.6957495808601379,
      "learning_rate": 7.583007715978203e-06,
      "loss": 0.0263,
      "step": 753620
    },
    {
      "epoch": 1.2333483893351138,
      "grad_norm": 0.7841159105300903,
      "learning_rate": 7.5829418237646866e-06,
      "loss": 0.0233,
      "step": 753640
    },
    {
      "epoch": 1.2333811197737672,
      "grad_norm": 0.6545689702033997,
      "learning_rate": 7.582875931551169e-06,
      "loss": 0.0259,
      "step": 753660
    },
    {
      "epoch": 1.2334138502124206,
      "grad_norm": 0.2070629596710205,
      "learning_rate": 7.582810039337652e-06,
      "loss": 0.0316,
      "step": 753680
    },
    {
      "epoch": 1.233446580651074,
      "grad_norm": 0.3652457594871521,
      "learning_rate": 7.582744147124135e-06,
      "loss": 0.0204,
      "step": 753700
    },
    {
      "epoch": 1.2334793110897273,
      "grad_norm": 0.1947432905435562,
      "learning_rate": 7.582678254910618e-06,
      "loss": 0.025,
      "step": 753720
    },
    {
      "epoch": 1.2335120415283805,
      "grad_norm": 0.3561743497848511,
      "learning_rate": 7.5826123626971e-06,
      "loss": 0.017,
      "step": 753740
    },
    {
      "epoch": 1.2335447719670338,
      "grad_norm": 0.24745844304561615,
      "learning_rate": 7.582546470483584e-06,
      "loss": 0.0286,
      "step": 753760
    },
    {
      "epoch": 1.2335775024056872,
      "grad_norm": 4.748570442199707,
      "learning_rate": 7.582480578270066e-06,
      "loss": 0.0315,
      "step": 753780
    },
    {
      "epoch": 1.2336102328443406,
      "grad_norm": 1.4021131992340088,
      "learning_rate": 7.582414686056549e-06,
      "loss": 0.0212,
      "step": 753800
    },
    {
      "epoch": 1.233642963282994,
      "grad_norm": 0.8462445735931396,
      "learning_rate": 7.582348793843032e-06,
      "loss": 0.0246,
      "step": 753820
    },
    {
      "epoch": 1.2336756937216473,
      "grad_norm": 0.2661016583442688,
      "learning_rate": 7.582282901629515e-06,
      "loss": 0.0135,
      "step": 753840
    },
    {
      "epoch": 1.2337084241603007,
      "grad_norm": 0.8490831255912781,
      "learning_rate": 7.5822170094159976e-06,
      "loss": 0.0262,
      "step": 753860
    },
    {
      "epoch": 1.2337411545989538,
      "grad_norm": 0.5250819325447083,
      "learning_rate": 7.582151117202481e-06,
      "loss": 0.0244,
      "step": 753880
    },
    {
      "epoch": 1.2337738850376072,
      "grad_norm": 2.5682239532470703,
      "learning_rate": 7.582085224988963e-06,
      "loss": 0.0241,
      "step": 753900
    },
    {
      "epoch": 1.2338066154762606,
      "grad_norm": 0.23142442107200623,
      "learning_rate": 7.582019332775447e-06,
      "loss": 0.0269,
      "step": 753920
    },
    {
      "epoch": 1.233839345914914,
      "grad_norm": 0.7166620492935181,
      "learning_rate": 7.58195344056193e-06,
      "loss": 0.0208,
      "step": 753940
    },
    {
      "epoch": 1.2338720763535673,
      "grad_norm": 0.3215869963169098,
      "learning_rate": 7.581887548348412e-06,
      "loss": 0.0242,
      "step": 753960
    },
    {
      "epoch": 1.2339048067922207,
      "grad_norm": 0.6748185157775879,
      "learning_rate": 7.581821656134896e-06,
      "loss": 0.0203,
      "step": 753980
    },
    {
      "epoch": 1.233937537230874,
      "grad_norm": 1.3424125909805298,
      "learning_rate": 7.581755763921378e-06,
      "loss": 0.0263,
      "step": 754000
    },
    {
      "epoch": 1.2339702676695272,
      "grad_norm": 1.0784525871276855,
      "learning_rate": 7.581689871707861e-06,
      "loss": 0.0279,
      "step": 754020
    },
    {
      "epoch": 1.2340029981081806,
      "grad_norm": 1.3501996994018555,
      "learning_rate": 7.581623979494344e-06,
      "loss": 0.0163,
      "step": 754040
    },
    {
      "epoch": 1.234035728546834,
      "grad_norm": 0.9142423272132874,
      "learning_rate": 7.581558087280827e-06,
      "loss": 0.0227,
      "step": 754060
    },
    {
      "epoch": 1.2340684589854873,
      "grad_norm": 0.16935883462429047,
      "learning_rate": 7.581492195067309e-06,
      "loss": 0.0235,
      "step": 754080
    },
    {
      "epoch": 1.2341011894241407,
      "grad_norm": 0.48321521282196045,
      "learning_rate": 7.581426302853793e-06,
      "loss": 0.0193,
      "step": 754100
    },
    {
      "epoch": 1.234133919862794,
      "grad_norm": 0.1908605694770813,
      "learning_rate": 7.581360410640275e-06,
      "loss": 0.0198,
      "step": 754120
    },
    {
      "epoch": 1.2341666503014475,
      "grad_norm": 0.6786611676216125,
      "learning_rate": 7.5812945184267585e-06,
      "loss": 0.0202,
      "step": 754140
    },
    {
      "epoch": 1.2341993807401006,
      "grad_norm": 1.367104172706604,
      "learning_rate": 7.58122862621324e-06,
      "loss": 0.0214,
      "step": 754160
    },
    {
      "epoch": 1.234232111178754,
      "grad_norm": 1.076823353767395,
      "learning_rate": 7.581162733999724e-06,
      "loss": 0.0244,
      "step": 754180
    },
    {
      "epoch": 1.2342648416174073,
      "grad_norm": 2.49363374710083,
      "learning_rate": 7.581096841786207e-06,
      "loss": 0.0297,
      "step": 754200
    },
    {
      "epoch": 1.2342975720560607,
      "grad_norm": 0.5924906134605408,
      "learning_rate": 7.581030949572689e-06,
      "loss": 0.0372,
      "step": 754220
    },
    {
      "epoch": 1.234330302494714,
      "grad_norm": 1.1725070476531982,
      "learning_rate": 7.580965057359172e-06,
      "loss": 0.0232,
      "step": 754240
    },
    {
      "epoch": 1.2343630329333675,
      "grad_norm": 0.5098806619644165,
      "learning_rate": 7.580899165145656e-06,
      "loss": 0.0223,
      "step": 754260
    },
    {
      "epoch": 1.2343957633720206,
      "grad_norm": 0.3498584032058716,
      "learning_rate": 7.5808332729321385e-06,
      "loss": 0.0289,
      "step": 754280
    },
    {
      "epoch": 1.234428493810674,
      "grad_norm": 0.7505165934562683,
      "learning_rate": 7.580767380718621e-06,
      "loss": 0.0222,
      "step": 754300
    },
    {
      "epoch": 1.2344612242493274,
      "grad_norm": 1.4504700899124146,
      "learning_rate": 7.580701488505105e-06,
      "loss": 0.0226,
      "step": 754320
    },
    {
      "epoch": 1.2344939546879807,
      "grad_norm": 0.16614006459712982,
      "learning_rate": 7.580635596291587e-06,
      "loss": 0.021,
      "step": 754340
    },
    {
      "epoch": 1.234526685126634,
      "grad_norm": 1.112532138824463,
      "learning_rate": 7.58056970407807e-06,
      "loss": 0.0322,
      "step": 754360
    },
    {
      "epoch": 1.2345594155652875,
      "grad_norm": 0.7650244235992432,
      "learning_rate": 7.580503811864552e-06,
      "loss": 0.0203,
      "step": 754380
    },
    {
      "epoch": 1.2345921460039406,
      "grad_norm": 0.2309822291135788,
      "learning_rate": 7.580437919651036e-06,
      "loss": 0.0165,
      "step": 754400
    },
    {
      "epoch": 1.234624876442594,
      "grad_norm": 0.2623017132282257,
      "learning_rate": 7.5803720274375185e-06,
      "loss": 0.0157,
      "step": 754420
    },
    {
      "epoch": 1.2346576068812474,
      "grad_norm": 0.23567485809326172,
      "learning_rate": 7.580306135224001e-06,
      "loss": 0.0175,
      "step": 754440
    },
    {
      "epoch": 1.2346903373199007,
      "grad_norm": 0.350112646818161,
      "learning_rate": 7.580240243010484e-06,
      "loss": 0.0177,
      "step": 754460
    },
    {
      "epoch": 1.2347230677585541,
      "grad_norm": 0.5559261441230774,
      "learning_rate": 7.5801743507969676e-06,
      "loss": 0.0214,
      "step": 754480
    },
    {
      "epoch": 1.2347557981972075,
      "grad_norm": 1.0709145069122314,
      "learning_rate": 7.5801084585834495e-06,
      "loss": 0.0346,
      "step": 754500
    },
    {
      "epoch": 1.2347885286358609,
      "grad_norm": 2.5571086406707764,
      "learning_rate": 7.580042566369933e-06,
      "loss": 0.0187,
      "step": 754520
    },
    {
      "epoch": 1.234821259074514,
      "grad_norm": 0.7398582696914673,
      "learning_rate": 7.579976674156415e-06,
      "loss": 0.0252,
      "step": 754540
    },
    {
      "epoch": 1.2348539895131674,
      "grad_norm": 0.9312058091163635,
      "learning_rate": 7.5799107819428985e-06,
      "loss": 0.0203,
      "step": 754560
    },
    {
      "epoch": 1.2348867199518208,
      "grad_norm": 1.2582361698150635,
      "learning_rate": 7.5798448897293804e-06,
      "loss": 0.0241,
      "step": 754580
    },
    {
      "epoch": 1.2349194503904741,
      "grad_norm": 0.8421991467475891,
      "learning_rate": 7.579778997515864e-06,
      "loss": 0.0319,
      "step": 754600
    },
    {
      "epoch": 1.2349521808291275,
      "grad_norm": 0.45056578516960144,
      "learning_rate": 7.579713105302347e-06,
      "loss": 0.0178,
      "step": 754620
    },
    {
      "epoch": 1.2349849112677809,
      "grad_norm": 1.648608922958374,
      "learning_rate": 7.5796472130888295e-06,
      "loss": 0.0221,
      "step": 754640
    },
    {
      "epoch": 1.2350176417064342,
      "grad_norm": 1.5589553117752075,
      "learning_rate": 7.579581320875313e-06,
      "loss": 0.0268,
      "step": 754660
    },
    {
      "epoch": 1.2350503721450874,
      "grad_norm": 0.27851372957229614,
      "learning_rate": 7.579515428661796e-06,
      "loss": 0.0289,
      "step": 754680
    },
    {
      "epoch": 1.2350831025837408,
      "grad_norm": 0.5127952694892883,
      "learning_rate": 7.5794495364482786e-06,
      "loss": 0.0177,
      "step": 754700
    },
    {
      "epoch": 1.2351158330223941,
      "grad_norm": 0.8699866533279419,
      "learning_rate": 7.579383644234761e-06,
      "loss": 0.0271,
      "step": 754720
    },
    {
      "epoch": 1.2351485634610475,
      "grad_norm": 0.07833480834960938,
      "learning_rate": 7.579317752021245e-06,
      "loss": 0.0174,
      "step": 754740
    },
    {
      "epoch": 1.2351812938997009,
      "grad_norm": 0.4938105046749115,
      "learning_rate": 7.579251859807727e-06,
      "loss": 0.0192,
      "step": 754760
    },
    {
      "epoch": 1.2352140243383543,
      "grad_norm": 0.04425986483693123,
      "learning_rate": 7.57918596759421e-06,
      "loss": 0.018,
      "step": 754780
    },
    {
      "epoch": 1.2352467547770076,
      "grad_norm": 0.5775752663612366,
      "learning_rate": 7.579120075380692e-06,
      "loss": 0.0201,
      "step": 754800
    },
    {
      "epoch": 1.2352794852156608,
      "grad_norm": 0.41869598627090454,
      "learning_rate": 7.579054183167176e-06,
      "loss": 0.0176,
      "step": 754820
    },
    {
      "epoch": 1.2353122156543141,
      "grad_norm": 0.2338181436061859,
      "learning_rate": 7.578988290953659e-06,
      "loss": 0.0307,
      "step": 754840
    },
    {
      "epoch": 1.2353449460929675,
      "grad_norm": 0.5674017071723938,
      "learning_rate": 7.578922398740141e-06,
      "loss": 0.0206,
      "step": 754860
    },
    {
      "epoch": 1.235377676531621,
      "grad_norm": 0.4905308783054352,
      "learning_rate": 7.578856506526624e-06,
      "loss": 0.0323,
      "step": 754880
    },
    {
      "epoch": 1.2354104069702743,
      "grad_norm": 1.7335960865020752,
      "learning_rate": 7.578790614313108e-06,
      "loss": 0.021,
      "step": 754900
    },
    {
      "epoch": 1.2354431374089276,
      "grad_norm": 0.44577229022979736,
      "learning_rate": 7.5787247220995895e-06,
      "loss": 0.0206,
      "step": 754920
    },
    {
      "epoch": 1.235475867847581,
      "grad_norm": 0.6534922122955322,
      "learning_rate": 7.578658829886073e-06,
      "loss": 0.0215,
      "step": 754940
    },
    {
      "epoch": 1.2355085982862342,
      "grad_norm": 1.509668231010437,
      "learning_rate": 7.578592937672555e-06,
      "loss": 0.0219,
      "step": 754960
    },
    {
      "epoch": 1.2355413287248875,
      "grad_norm": 0.2801959216594696,
      "learning_rate": 7.578527045459039e-06,
      "loss": 0.0213,
      "step": 754980
    },
    {
      "epoch": 1.235574059163541,
      "grad_norm": 0.24449896812438965,
      "learning_rate": 7.578461153245522e-06,
      "loss": 0.0201,
      "step": 755000
    },
    {
      "epoch": 1.2356067896021943,
      "grad_norm": 0.2423085719347,
      "learning_rate": 7.578395261032004e-06,
      "loss": 0.0277,
      "step": 755020
    },
    {
      "epoch": 1.2356395200408476,
      "grad_norm": 0.9366339445114136,
      "learning_rate": 7.578329368818488e-06,
      "loss": 0.0262,
      "step": 755040
    },
    {
      "epoch": 1.235672250479501,
      "grad_norm": 0.41502517461776733,
      "learning_rate": 7.57826347660497e-06,
      "loss": 0.0258,
      "step": 755060
    },
    {
      "epoch": 1.2357049809181542,
      "grad_norm": 0.6625540852546692,
      "learning_rate": 7.578197584391453e-06,
      "loss": 0.0277,
      "step": 755080
    },
    {
      "epoch": 1.2357377113568075,
      "grad_norm": 0.05371519550681114,
      "learning_rate": 7.578131692177936e-06,
      "loss": 0.027,
      "step": 755100
    },
    {
      "epoch": 1.235770441795461,
      "grad_norm": 1.0451064109802246,
      "learning_rate": 7.5780657999644195e-06,
      "loss": 0.0217,
      "step": 755120
    },
    {
      "epoch": 1.2358031722341143,
      "grad_norm": 0.6520038843154907,
      "learning_rate": 7.577999907750901e-06,
      "loss": 0.024,
      "step": 755140
    },
    {
      "epoch": 1.2358359026727677,
      "grad_norm": 0.9174270033836365,
      "learning_rate": 7.577934015537385e-06,
      "loss": 0.0245,
      "step": 755160
    },
    {
      "epoch": 1.235868633111421,
      "grad_norm": 1.4613826274871826,
      "learning_rate": 7.577868123323867e-06,
      "loss": 0.0212,
      "step": 755180
    },
    {
      "epoch": 1.2359013635500742,
      "grad_norm": 0.8713386654853821,
      "learning_rate": 7.5778022311103504e-06,
      "loss": 0.0215,
      "step": 755200
    },
    {
      "epoch": 1.2359340939887276,
      "grad_norm": 0.1735527217388153,
      "learning_rate": 7.577736338896833e-06,
      "loss": 0.0229,
      "step": 755220
    },
    {
      "epoch": 1.235966824427381,
      "grad_norm": 0.6898435354232788,
      "learning_rate": 7.577670446683316e-06,
      "loss": 0.0287,
      "step": 755240
    },
    {
      "epoch": 1.2359995548660343,
      "grad_norm": 0.6924219131469727,
      "learning_rate": 7.577604554469799e-06,
      "loss": 0.0199,
      "step": 755260
    },
    {
      "epoch": 1.2360322853046877,
      "grad_norm": 0.6279421448707581,
      "learning_rate": 7.577538662256282e-06,
      "loss": 0.0182,
      "step": 755280
    },
    {
      "epoch": 1.236065015743341,
      "grad_norm": 1.3800221681594849,
      "learning_rate": 7.577472770042764e-06,
      "loss": 0.0303,
      "step": 755300
    },
    {
      "epoch": 1.2360977461819944,
      "grad_norm": 1.7959175109863281,
      "learning_rate": 7.577406877829248e-06,
      "loss": 0.0204,
      "step": 755320
    },
    {
      "epoch": 1.2361304766206476,
      "grad_norm": 0.31845352053642273,
      "learning_rate": 7.577340985615731e-06,
      "loss": 0.0233,
      "step": 755340
    },
    {
      "epoch": 1.236163207059301,
      "grad_norm": 0.3910629451274872,
      "learning_rate": 7.577275093402213e-06,
      "loss": 0.0171,
      "step": 755360
    },
    {
      "epoch": 1.2361959374979543,
      "grad_norm": 1.090149164199829,
      "learning_rate": 7.577209201188697e-06,
      "loss": 0.0225,
      "step": 755380
    },
    {
      "epoch": 1.2362286679366077,
      "grad_norm": 0.9010631442070007,
      "learning_rate": 7.577143308975179e-06,
      "loss": 0.0124,
      "step": 755400
    },
    {
      "epoch": 1.236261398375261,
      "grad_norm": 0.18951383233070374,
      "learning_rate": 7.577077416761662e-06,
      "loss": 0.0189,
      "step": 755420
    },
    {
      "epoch": 1.2362941288139144,
      "grad_norm": 1.143178105354309,
      "learning_rate": 7.577011524548145e-06,
      "loss": 0.0291,
      "step": 755440
    },
    {
      "epoch": 1.2363268592525678,
      "grad_norm": 0.8030127882957458,
      "learning_rate": 7.576945632334628e-06,
      "loss": 0.0273,
      "step": 755460
    },
    {
      "epoch": 1.236359589691221,
      "grad_norm": 1.888153314590454,
      "learning_rate": 7.5768797401211105e-06,
      "loss": 0.0227,
      "step": 755480
    },
    {
      "epoch": 1.2363923201298743,
      "grad_norm": 1.2734376192092896,
      "learning_rate": 7.576813847907594e-06,
      "loss": 0.0201,
      "step": 755500
    },
    {
      "epoch": 1.2364250505685277,
      "grad_norm": 1.5577285289764404,
      "learning_rate": 7.576747955694076e-06,
      "loss": 0.0279,
      "step": 755520
    },
    {
      "epoch": 1.236457781007181,
      "grad_norm": 0.8249866366386414,
      "learning_rate": 7.5766820634805596e-06,
      "loss": 0.02,
      "step": 755540
    },
    {
      "epoch": 1.2364905114458344,
      "grad_norm": 0.30414167046546936,
      "learning_rate": 7.5766161712670414e-06,
      "loss": 0.0155,
      "step": 755560
    },
    {
      "epoch": 1.2365232418844878,
      "grad_norm": 1.354480266571045,
      "learning_rate": 7.576550279053525e-06,
      "loss": 0.0187,
      "step": 755580
    },
    {
      "epoch": 1.2365559723231412,
      "grad_norm": 0.4961682856082916,
      "learning_rate": 7.576484386840007e-06,
      "loss": 0.0223,
      "step": 755600
    },
    {
      "epoch": 1.2365887027617943,
      "grad_norm": 0.6888240575790405,
      "learning_rate": 7.5764184946264905e-06,
      "loss": 0.0162,
      "step": 755620
    },
    {
      "epoch": 1.2366214332004477,
      "grad_norm": 0.578439474105835,
      "learning_rate": 7.576352602412973e-06,
      "loss": 0.02,
      "step": 755640
    },
    {
      "epoch": 1.236654163639101,
      "grad_norm": 0.25621944665908813,
      "learning_rate": 7.576286710199456e-06,
      "loss": 0.0292,
      "step": 755660
    },
    {
      "epoch": 1.2366868940777545,
      "grad_norm": 0.69388347864151,
      "learning_rate": 7.576220817985939e-06,
      "loss": 0.017,
      "step": 755680
    },
    {
      "epoch": 1.2367196245164078,
      "grad_norm": 0.3836732804775238,
      "learning_rate": 7.576154925772422e-06,
      "loss": 0.0256,
      "step": 755700
    },
    {
      "epoch": 1.2367523549550612,
      "grad_norm": 0.556129515171051,
      "learning_rate": 7.576089033558905e-06,
      "loss": 0.0223,
      "step": 755720
    },
    {
      "epoch": 1.2367850853937146,
      "grad_norm": 0.8401057720184326,
      "learning_rate": 7.576023141345388e-06,
      "loss": 0.0223,
      "step": 755740
    },
    {
      "epoch": 1.2368178158323677,
      "grad_norm": 0.7457388639450073,
      "learning_rate": 7.575957249131871e-06,
      "loss": 0.0194,
      "step": 755760
    },
    {
      "epoch": 1.236850546271021,
      "grad_norm": 0.4924732744693756,
      "learning_rate": 7.575891356918353e-06,
      "loss": 0.0225,
      "step": 755780
    },
    {
      "epoch": 1.2368832767096745,
      "grad_norm": 0.9229470491409302,
      "learning_rate": 7.575825464704837e-06,
      "loss": 0.0195,
      "step": 755800
    },
    {
      "epoch": 1.2369160071483278,
      "grad_norm": 0.5325247049331665,
      "learning_rate": 7.575759572491319e-06,
      "loss": 0.0203,
      "step": 755820
    },
    {
      "epoch": 1.2369487375869812,
      "grad_norm": 0.8922203183174133,
      "learning_rate": 7.575693680277802e-06,
      "loss": 0.0169,
      "step": 755840
    },
    {
      "epoch": 1.2369814680256344,
      "grad_norm": 0.28601521253585815,
      "learning_rate": 7.575627788064285e-06,
      "loss": 0.0216,
      "step": 755860
    },
    {
      "epoch": 1.2370141984642877,
      "grad_norm": 1.9263877868652344,
      "learning_rate": 7.575561895850768e-06,
      "loss": 0.0237,
      "step": 755880
    },
    {
      "epoch": 1.237046928902941,
      "grad_norm": 0.5028662085533142,
      "learning_rate": 7.5754960036372506e-06,
      "loss": 0.0157,
      "step": 755900
    },
    {
      "epoch": 1.2370796593415945,
      "grad_norm": 0.889513373374939,
      "learning_rate": 7.575430111423734e-06,
      "loss": 0.0206,
      "step": 755920
    },
    {
      "epoch": 1.2371123897802478,
      "grad_norm": 0.5176527500152588,
      "learning_rate": 7.575364219210216e-06,
      "loss": 0.0337,
      "step": 755940
    },
    {
      "epoch": 1.2371451202189012,
      "grad_norm": 0.21777327358722687,
      "learning_rate": 7.5752983269967e-06,
      "loss": 0.018,
      "step": 755960
    },
    {
      "epoch": 1.2371778506575546,
      "grad_norm": 1.0862948894500732,
      "learning_rate": 7.5752324347831815e-06,
      "loss": 0.0214,
      "step": 755980
    },
    {
      "epoch": 1.2372105810962077,
      "grad_norm": 0.752413809299469,
      "learning_rate": 7.575166542569665e-06,
      "loss": 0.021,
      "step": 756000
    },
    {
      "epoch": 1.2372433115348611,
      "grad_norm": 0.6487048864364624,
      "learning_rate": 7.575100650356148e-06,
      "loss": 0.0327,
      "step": 756020
    },
    {
      "epoch": 1.2372760419735145,
      "grad_norm": 0.2986850440502167,
      "learning_rate": 7.575034758142631e-06,
      "loss": 0.0181,
      "step": 756040
    },
    {
      "epoch": 1.2373087724121679,
      "grad_norm": 1.5159239768981934,
      "learning_rate": 7.574968865929114e-06,
      "loss": 0.0203,
      "step": 756060
    },
    {
      "epoch": 1.2373415028508212,
      "grad_norm": 1.0164908170700073,
      "learning_rate": 7.574902973715597e-06,
      "loss": 0.0153,
      "step": 756080
    },
    {
      "epoch": 1.2373742332894746,
      "grad_norm": 0.2227221578359604,
      "learning_rate": 7.57483708150208e-06,
      "loss": 0.0215,
      "step": 756100
    },
    {
      "epoch": 1.237406963728128,
      "grad_norm": 0.5623601078987122,
      "learning_rate": 7.574771189288562e-06,
      "loss": 0.0252,
      "step": 756120
    },
    {
      "epoch": 1.2374396941667811,
      "grad_norm": 0.748250424861908,
      "learning_rate": 7.574705297075046e-06,
      "loss": 0.0189,
      "step": 756140
    },
    {
      "epoch": 1.2374724246054345,
      "grad_norm": 0.07600053399801254,
      "learning_rate": 7.574639404861528e-06,
      "loss": 0.0242,
      "step": 756160
    },
    {
      "epoch": 1.2375051550440879,
      "grad_norm": 1.225995421409607,
      "learning_rate": 7.5745735126480115e-06,
      "loss": 0.0231,
      "step": 756180
    },
    {
      "epoch": 1.2375378854827412,
      "grad_norm": 0.4026243984699249,
      "learning_rate": 7.574507620434493e-06,
      "loss": 0.0284,
      "step": 756200
    },
    {
      "epoch": 1.2375706159213946,
      "grad_norm": 0.875935971736908,
      "learning_rate": 7.574441728220977e-06,
      "loss": 0.0202,
      "step": 756220
    },
    {
      "epoch": 1.237603346360048,
      "grad_norm": 0.8980965614318848,
      "learning_rate": 7.57437583600746e-06,
      "loss": 0.0216,
      "step": 756240
    },
    {
      "epoch": 1.2376360767987014,
      "grad_norm": 0.20267795026302338,
      "learning_rate": 7.574309943793942e-06,
      "loss": 0.0155,
      "step": 756260
    },
    {
      "epoch": 1.2376688072373545,
      "grad_norm": 1.228909969329834,
      "learning_rate": 7.574244051580425e-06,
      "loss": 0.0278,
      "step": 756280
    },
    {
      "epoch": 1.2377015376760079,
      "grad_norm": 0.9190994501113892,
      "learning_rate": 7.574178159366909e-06,
      "loss": 0.0156,
      "step": 756300
    },
    {
      "epoch": 1.2377342681146613,
      "grad_norm": 1.1494067907333374,
      "learning_rate": 7.574112267153391e-06,
      "loss": 0.0171,
      "step": 756320
    },
    {
      "epoch": 1.2377669985533146,
      "grad_norm": 0.7503900527954102,
      "learning_rate": 7.574046374939874e-06,
      "loss": 0.0196,
      "step": 756340
    },
    {
      "epoch": 1.237799728991968,
      "grad_norm": 0.4472282826900482,
      "learning_rate": 7.573980482726356e-06,
      "loss": 0.024,
      "step": 756360
    },
    {
      "epoch": 1.2378324594306214,
      "grad_norm": 0.3989265561103821,
      "learning_rate": 7.57391459051284e-06,
      "loss": 0.0307,
      "step": 756380
    },
    {
      "epoch": 1.2378651898692747,
      "grad_norm": 0.8325772285461426,
      "learning_rate": 7.573848698299323e-06,
      "loss": 0.0231,
      "step": 756400
    },
    {
      "epoch": 1.237897920307928,
      "grad_norm": 0.2955869436264038,
      "learning_rate": 7.573782806085805e-06,
      "loss": 0.0157,
      "step": 756420
    },
    {
      "epoch": 1.2379306507465813,
      "grad_norm": 0.1783706545829773,
      "learning_rate": 7.573716913872289e-06,
      "loss": 0.0205,
      "step": 756440
    },
    {
      "epoch": 1.2379633811852346,
      "grad_norm": 0.5514417290687561,
      "learning_rate": 7.573651021658771e-06,
      "loss": 0.0264,
      "step": 756460
    },
    {
      "epoch": 1.237996111623888,
      "grad_norm": 0.5485270023345947,
      "learning_rate": 7.573585129445254e-06,
      "loss": 0.0219,
      "step": 756480
    },
    {
      "epoch": 1.2380288420625414,
      "grad_norm": 0.9307210445404053,
      "learning_rate": 7.573519237231737e-06,
      "loss": 0.0218,
      "step": 756500
    },
    {
      "epoch": 1.2380615725011948,
      "grad_norm": 0.5253509879112244,
      "learning_rate": 7.573453345018221e-06,
      "loss": 0.0289,
      "step": 756520
    },
    {
      "epoch": 1.2380943029398481,
      "grad_norm": 0.7842292189598083,
      "learning_rate": 7.5733874528047025e-06,
      "loss": 0.0249,
      "step": 756540
    },
    {
      "epoch": 1.2381270333785013,
      "grad_norm": 0.5775939226150513,
      "learning_rate": 7.573321560591186e-06,
      "loss": 0.0304,
      "step": 756560
    },
    {
      "epoch": 1.2381597638171546,
      "grad_norm": 0.34814074635505676,
      "learning_rate": 7.573255668377668e-06,
      "loss": 0.0223,
      "step": 756580
    },
    {
      "epoch": 1.238192494255808,
      "grad_norm": 5.15753698348999,
      "learning_rate": 7.5731897761641515e-06,
      "loss": 0.0298,
      "step": 756600
    },
    {
      "epoch": 1.2382252246944614,
      "grad_norm": 0.5841540098190308,
      "learning_rate": 7.5731238839506334e-06,
      "loss": 0.0198,
      "step": 756620
    },
    {
      "epoch": 1.2382579551331148,
      "grad_norm": 0.5905358791351318,
      "learning_rate": 7.573057991737117e-06,
      "loss": 0.0198,
      "step": 756640
    },
    {
      "epoch": 1.238290685571768,
      "grad_norm": 0.14505021274089813,
      "learning_rate": 7.5729920995236e-06,
      "loss": 0.0274,
      "step": 756660
    },
    {
      "epoch": 1.2383234160104213,
      "grad_norm": 0.8983436226844788,
      "learning_rate": 7.5729262073100825e-06,
      "loss": 0.0262,
      "step": 756680
    },
    {
      "epoch": 1.2383561464490747,
      "grad_norm": 0.39458221197128296,
      "learning_rate": 7.572860315096565e-06,
      "loss": 0.0154,
      "step": 756700
    },
    {
      "epoch": 1.238388876887728,
      "grad_norm": 0.2636810839176178,
      "learning_rate": 7.572794422883049e-06,
      "loss": 0.0172,
      "step": 756720
    },
    {
      "epoch": 1.2384216073263814,
      "grad_norm": 3.6640613079071045,
      "learning_rate": 7.5727285306695316e-06,
      "loss": 0.0322,
      "step": 756740
    },
    {
      "epoch": 1.2384543377650348,
      "grad_norm": 0.3364923894405365,
      "learning_rate": 7.572662638456014e-06,
      "loss": 0.0233,
      "step": 756760
    },
    {
      "epoch": 1.2384870682036881,
      "grad_norm": 0.18941143155097961,
      "learning_rate": 7.572596746242498e-06,
      "loss": 0.022,
      "step": 756780
    },
    {
      "epoch": 1.2385197986423413,
      "grad_norm": 0.7651346325874329,
      "learning_rate": 7.57253085402898e-06,
      "loss": 0.0273,
      "step": 756800
    },
    {
      "epoch": 1.2385525290809947,
      "grad_norm": 0.8902360796928406,
      "learning_rate": 7.572464961815463e-06,
      "loss": 0.0228,
      "step": 756820
    },
    {
      "epoch": 1.238585259519648,
      "grad_norm": 0.7174619436264038,
      "learning_rate": 7.572399069601945e-06,
      "loss": 0.017,
      "step": 756840
    },
    {
      "epoch": 1.2386179899583014,
      "grad_norm": 0.21064171195030212,
      "learning_rate": 7.572333177388429e-06,
      "loss": 0.0266,
      "step": 756860
    },
    {
      "epoch": 1.2386507203969548,
      "grad_norm": 0.7954624891281128,
      "learning_rate": 7.572267285174912e-06,
      "loss": 0.0214,
      "step": 756880
    },
    {
      "epoch": 1.2386834508356082,
      "grad_norm": 0.5815659761428833,
      "learning_rate": 7.572201392961394e-06,
      "loss": 0.0311,
      "step": 756900
    },
    {
      "epoch": 1.2387161812742615,
      "grad_norm": 1.5415948629379272,
      "learning_rate": 7.572135500747877e-06,
      "loss": 0.0354,
      "step": 756920
    },
    {
      "epoch": 1.2387489117129147,
      "grad_norm": 0.3942345082759857,
      "learning_rate": 7.572069608534361e-06,
      "loss": 0.0294,
      "step": 756940
    },
    {
      "epoch": 1.238781642151568,
      "grad_norm": 0.15402062237262726,
      "learning_rate": 7.5720037163208425e-06,
      "loss": 0.0322,
      "step": 756960
    },
    {
      "epoch": 1.2388143725902214,
      "grad_norm": 0.2912607789039612,
      "learning_rate": 7.571937824107326e-06,
      "loss": 0.0285,
      "step": 756980
    },
    {
      "epoch": 1.2388471030288748,
      "grad_norm": 0.5918707251548767,
      "learning_rate": 7.571871931893808e-06,
      "loss": 0.0189,
      "step": 757000
    },
    {
      "epoch": 1.2388798334675282,
      "grad_norm": 0.3803408741950989,
      "learning_rate": 7.571806039680292e-06,
      "loss": 0.0248,
      "step": 757020
    },
    {
      "epoch": 1.2389125639061815,
      "grad_norm": 0.7530470490455627,
      "learning_rate": 7.571740147466774e-06,
      "loss": 0.0239,
      "step": 757040
    },
    {
      "epoch": 1.238945294344835,
      "grad_norm": 0.9140724539756775,
      "learning_rate": 7.571674255253257e-06,
      "loss": 0.0234,
      "step": 757060
    },
    {
      "epoch": 1.238978024783488,
      "grad_norm": 0.7703093886375427,
      "learning_rate": 7.57160836303974e-06,
      "loss": 0.0349,
      "step": 757080
    },
    {
      "epoch": 1.2390107552221414,
      "grad_norm": 0.7232213616371155,
      "learning_rate": 7.571542470826223e-06,
      "loss": 0.021,
      "step": 757100
    },
    {
      "epoch": 1.2390434856607948,
      "grad_norm": 0.7766711711883545,
      "learning_rate": 7.571476578612706e-06,
      "loss": 0.0274,
      "step": 757120
    },
    {
      "epoch": 1.2390762160994482,
      "grad_norm": 1.4330224990844727,
      "learning_rate": 7.571410686399189e-06,
      "loss": 0.0222,
      "step": 757140
    },
    {
      "epoch": 1.2391089465381016,
      "grad_norm": 0.7101601958274841,
      "learning_rate": 7.5713447941856725e-06,
      "loss": 0.0292,
      "step": 757160
    },
    {
      "epoch": 1.239141676976755,
      "grad_norm": 1.1934822797775269,
      "learning_rate": 7.571278901972154e-06,
      "loss": 0.0306,
      "step": 757180
    },
    {
      "epoch": 1.2391744074154083,
      "grad_norm": 0.5182926654815674,
      "learning_rate": 7.571213009758638e-06,
      "loss": 0.0243,
      "step": 757200
    },
    {
      "epoch": 1.2392071378540614,
      "grad_norm": 1.2801858186721802,
      "learning_rate": 7.57114711754512e-06,
      "loss": 0.0187,
      "step": 757220
    },
    {
      "epoch": 1.2392398682927148,
      "grad_norm": 0.314464807510376,
      "learning_rate": 7.5710812253316034e-06,
      "loss": 0.023,
      "step": 757240
    },
    {
      "epoch": 1.2392725987313682,
      "grad_norm": 0.35880398750305176,
      "learning_rate": 7.571015333118086e-06,
      "loss": 0.0133,
      "step": 757260
    },
    {
      "epoch": 1.2393053291700216,
      "grad_norm": 0.4228472113609314,
      "learning_rate": 7.570949440904569e-06,
      "loss": 0.0174,
      "step": 757280
    },
    {
      "epoch": 1.239338059608675,
      "grad_norm": 0.765592634677887,
      "learning_rate": 7.570883548691052e-06,
      "loss": 0.0271,
      "step": 757300
    },
    {
      "epoch": 1.2393707900473283,
      "grad_norm": 0.4521869719028473,
      "learning_rate": 7.570817656477535e-06,
      "loss": 0.0244,
      "step": 757320
    },
    {
      "epoch": 1.2394035204859815,
      "grad_norm": 0.5707185864448547,
      "learning_rate": 7.570751764264017e-06,
      "loss": 0.0207,
      "step": 757340
    },
    {
      "epoch": 1.2394362509246348,
      "grad_norm": 1.5747560262680054,
      "learning_rate": 7.570685872050501e-06,
      "loss": 0.025,
      "step": 757360
    },
    {
      "epoch": 1.2394689813632882,
      "grad_norm": 2.452223539352417,
      "learning_rate": 7.570619979836983e-06,
      "loss": 0.0244,
      "step": 757380
    },
    {
      "epoch": 1.2395017118019416,
      "grad_norm": 0.6082735061645508,
      "learning_rate": 7.570554087623466e-06,
      "loss": 0.0229,
      "step": 757400
    },
    {
      "epoch": 1.239534442240595,
      "grad_norm": 1.0551092624664307,
      "learning_rate": 7.570488195409948e-06,
      "loss": 0.0337,
      "step": 757420
    },
    {
      "epoch": 1.2395671726792483,
      "grad_norm": 0.3160257041454315,
      "learning_rate": 7.570422303196432e-06,
      "loss": 0.0272,
      "step": 757440
    },
    {
      "epoch": 1.2395999031179015,
      "grad_norm": 0.760195791721344,
      "learning_rate": 7.570356410982915e-06,
      "loss": 0.0219,
      "step": 757460
    },
    {
      "epoch": 1.2396326335565548,
      "grad_norm": 4.256242275238037,
      "learning_rate": 7.570290518769397e-06,
      "loss": 0.03,
      "step": 757480
    },
    {
      "epoch": 1.2396653639952082,
      "grad_norm": 0.97940593957901,
      "learning_rate": 7.570224626555881e-06,
      "loss": 0.0251,
      "step": 757500
    },
    {
      "epoch": 1.2396980944338616,
      "grad_norm": 0.9003401398658752,
      "learning_rate": 7.5701587343423635e-06,
      "loss": 0.0244,
      "step": 757520
    },
    {
      "epoch": 1.239730824872515,
      "grad_norm": 0.3938622772693634,
      "learning_rate": 7.570092842128846e-06,
      "loss": 0.0253,
      "step": 757540
    },
    {
      "epoch": 1.2397635553111683,
      "grad_norm": 0.3919871747493744,
      "learning_rate": 7.570026949915329e-06,
      "loss": 0.0372,
      "step": 757560
    },
    {
      "epoch": 1.2397962857498217,
      "grad_norm": 0.8839324116706848,
      "learning_rate": 7.5699610577018126e-06,
      "loss": 0.026,
      "step": 757580
    },
    {
      "epoch": 1.2398290161884749,
      "grad_norm": 0.3879636824131012,
      "learning_rate": 7.5698951654882945e-06,
      "loss": 0.021,
      "step": 757600
    },
    {
      "epoch": 1.2398617466271282,
      "grad_norm": 0.5447518825531006,
      "learning_rate": 7.569829273274778e-06,
      "loss": 0.0195,
      "step": 757620
    },
    {
      "epoch": 1.2398944770657816,
      "grad_norm": 0.8455767631530762,
      "learning_rate": 7.56976338106126e-06,
      "loss": 0.0174,
      "step": 757640
    },
    {
      "epoch": 1.239927207504435,
      "grad_norm": 0.1785380095243454,
      "learning_rate": 7.5696974888477435e-06,
      "loss": 0.0159,
      "step": 757660
    },
    {
      "epoch": 1.2399599379430883,
      "grad_norm": 0.569231390953064,
      "learning_rate": 7.569631596634226e-06,
      "loss": 0.0269,
      "step": 757680
    },
    {
      "epoch": 1.2399926683817417,
      "grad_norm": 0.810568630695343,
      "learning_rate": 7.569565704420709e-06,
      "loss": 0.0263,
      "step": 757700
    },
    {
      "epoch": 1.240025398820395,
      "grad_norm": 0.9381604790687561,
      "learning_rate": 7.569499812207192e-06,
      "loss": 0.0225,
      "step": 757720
    },
    {
      "epoch": 1.2400581292590482,
      "grad_norm": 1.09975004196167,
      "learning_rate": 7.569433919993675e-06,
      "loss": 0.0288,
      "step": 757740
    },
    {
      "epoch": 1.2400908596977016,
      "grad_norm": 0.7379977703094482,
      "learning_rate": 7.569368027780157e-06,
      "loss": 0.02,
      "step": 757760
    },
    {
      "epoch": 1.240123590136355,
      "grad_norm": 0.5841242074966431,
      "learning_rate": 7.569302135566641e-06,
      "loss": 0.0227,
      "step": 757780
    },
    {
      "epoch": 1.2401563205750084,
      "grad_norm": 0.4630143642425537,
      "learning_rate": 7.569236243353124e-06,
      "loss": 0.0207,
      "step": 757800
    },
    {
      "epoch": 1.2401890510136617,
      "grad_norm": 0.2956090569496155,
      "learning_rate": 7.569170351139606e-06,
      "loss": 0.0297,
      "step": 757820
    },
    {
      "epoch": 1.240221781452315,
      "grad_norm": 0.8351680636405945,
      "learning_rate": 7.56910445892609e-06,
      "loss": 0.0283,
      "step": 757840
    },
    {
      "epoch": 1.2402545118909685,
      "grad_norm": 1.3725998401641846,
      "learning_rate": 7.569038566712572e-06,
      "loss": 0.0197,
      "step": 757860
    },
    {
      "epoch": 1.2402872423296216,
      "grad_norm": 1.3087939023971558,
      "learning_rate": 7.568972674499055e-06,
      "loss": 0.0251,
      "step": 757880
    },
    {
      "epoch": 1.240319972768275,
      "grad_norm": 1.1401275396347046,
      "learning_rate": 7.568906782285538e-06,
      "loss": 0.028,
      "step": 757900
    },
    {
      "epoch": 1.2403527032069284,
      "grad_norm": 1.1360397338867188,
      "learning_rate": 7.568840890072021e-06,
      "loss": 0.0158,
      "step": 757920
    },
    {
      "epoch": 1.2403854336455817,
      "grad_norm": 0.3174002468585968,
      "learning_rate": 7.5687749978585036e-06,
      "loss": 0.0178,
      "step": 757940
    },
    {
      "epoch": 1.240418164084235,
      "grad_norm": 0.5878050327301025,
      "learning_rate": 7.568709105644987e-06,
      "loss": 0.02,
      "step": 757960
    },
    {
      "epoch": 1.2404508945228885,
      "grad_norm": 1.0516324043273926,
      "learning_rate": 7.568643213431469e-06,
      "loss": 0.03,
      "step": 757980
    },
    {
      "epoch": 1.2404836249615419,
      "grad_norm": 0.6950192451477051,
      "learning_rate": 7.568577321217953e-06,
      "loss": 0.0194,
      "step": 758000
    },
    {
      "epoch": 1.240516355400195,
      "grad_norm": 0.5514146685600281,
      "learning_rate": 7.5685114290044345e-06,
      "loss": 0.0256,
      "step": 758020
    },
    {
      "epoch": 1.2405490858388484,
      "grad_norm": 1.0294902324676514,
      "learning_rate": 7.568445536790918e-06,
      "loss": 0.0299,
      "step": 758040
    },
    {
      "epoch": 1.2405818162775017,
      "grad_norm": 0.538008451461792,
      "learning_rate": 7.568379644577401e-06,
      "loss": 0.0216,
      "step": 758060
    },
    {
      "epoch": 1.2406145467161551,
      "grad_norm": 1.4515800476074219,
      "learning_rate": 7.568313752363884e-06,
      "loss": 0.0314,
      "step": 758080
    },
    {
      "epoch": 1.2406472771548085,
      "grad_norm": 0.7430122494697571,
      "learning_rate": 7.568247860150366e-06,
      "loss": 0.0235,
      "step": 758100
    },
    {
      "epoch": 1.2406800075934619,
      "grad_norm": 0.9337081909179688,
      "learning_rate": 7.56818196793685e-06,
      "loss": 0.0237,
      "step": 758120
    },
    {
      "epoch": 1.240712738032115,
      "grad_norm": 1.0947600603103638,
      "learning_rate": 7.568116075723332e-06,
      "loss": 0.0252,
      "step": 758140
    },
    {
      "epoch": 1.2407454684707684,
      "grad_norm": 0.3631628453731537,
      "learning_rate": 7.568050183509815e-06,
      "loss": 0.0232,
      "step": 758160
    },
    {
      "epoch": 1.2407781989094218,
      "grad_norm": 0.26707765460014343,
      "learning_rate": 7.567984291296299e-06,
      "loss": 0.0155,
      "step": 758180
    },
    {
      "epoch": 1.2408109293480751,
      "grad_norm": 0.5114592909812927,
      "learning_rate": 7.567918399082781e-06,
      "loss": 0.0174,
      "step": 758200
    },
    {
      "epoch": 1.2408436597867285,
      "grad_norm": 0.4362928867340088,
      "learning_rate": 7.5678525068692645e-06,
      "loss": 0.0279,
      "step": 758220
    },
    {
      "epoch": 1.2408763902253819,
      "grad_norm": 0.4936565160751343,
      "learning_rate": 7.567786614655746e-06,
      "loss": 0.0238,
      "step": 758240
    },
    {
      "epoch": 1.240909120664035,
      "grad_norm": 0.6146052479743958,
      "learning_rate": 7.56772072244223e-06,
      "loss": 0.0204,
      "step": 758260
    },
    {
      "epoch": 1.2409418511026884,
      "grad_norm": 2.7753548622131348,
      "learning_rate": 7.567654830228713e-06,
      "loss": 0.0227,
      "step": 758280
    },
    {
      "epoch": 1.2409745815413418,
      "grad_norm": 0.6401489973068237,
      "learning_rate": 7.5675889380151954e-06,
      "loss": 0.0136,
      "step": 758300
    },
    {
      "epoch": 1.2410073119799951,
      "grad_norm": 0.84068763256073,
      "learning_rate": 7.567523045801678e-06,
      "loss": 0.019,
      "step": 758320
    },
    {
      "epoch": 1.2410400424186485,
      "grad_norm": 0.7518563866615295,
      "learning_rate": 7.567457153588162e-06,
      "loss": 0.0187,
      "step": 758340
    },
    {
      "epoch": 1.2410727728573019,
      "grad_norm": 0.6020615100860596,
      "learning_rate": 7.567391261374644e-06,
      "loss": 0.0129,
      "step": 758360
    },
    {
      "epoch": 1.2411055032959553,
      "grad_norm": 0.497131884098053,
      "learning_rate": 7.567325369161127e-06,
      "loss": 0.0287,
      "step": 758380
    },
    {
      "epoch": 1.2411382337346084,
      "grad_norm": 0.37107789516448975,
      "learning_rate": 7.567259476947609e-06,
      "loss": 0.0152,
      "step": 758400
    },
    {
      "epoch": 1.2411709641732618,
      "grad_norm": 0.6603743433952332,
      "learning_rate": 7.567193584734093e-06,
      "loss": 0.0231,
      "step": 758420
    },
    {
      "epoch": 1.2412036946119152,
      "grad_norm": 1.1808948516845703,
      "learning_rate": 7.567127692520575e-06,
      "loss": 0.0182,
      "step": 758440
    },
    {
      "epoch": 1.2412364250505685,
      "grad_norm": 0.690140962600708,
      "learning_rate": 7.567061800307058e-06,
      "loss": 0.0226,
      "step": 758460
    },
    {
      "epoch": 1.241269155489222,
      "grad_norm": 1.2720531225204468,
      "learning_rate": 7.566995908093541e-06,
      "loss": 0.0242,
      "step": 758480
    },
    {
      "epoch": 1.2413018859278753,
      "grad_norm": 0.4776577651500702,
      "learning_rate": 7.566930015880024e-06,
      "loss": 0.0301,
      "step": 758500
    },
    {
      "epoch": 1.2413346163665286,
      "grad_norm": 0.9372361302375793,
      "learning_rate": 7.566864123666507e-06,
      "loss": 0.0207,
      "step": 758520
    },
    {
      "epoch": 1.2413673468051818,
      "grad_norm": 0.35850974917411804,
      "learning_rate": 7.56679823145299e-06,
      "loss": 0.022,
      "step": 758540
    },
    {
      "epoch": 1.2414000772438352,
      "grad_norm": 0.15922988951206207,
      "learning_rate": 7.566732339239473e-06,
      "loss": 0.0228,
      "step": 758560
    },
    {
      "epoch": 1.2414328076824885,
      "grad_norm": 0.2137739062309265,
      "learning_rate": 7.5666664470259555e-06,
      "loss": 0.0233,
      "step": 758580
    },
    {
      "epoch": 1.241465538121142,
      "grad_norm": 0.10969991981983185,
      "learning_rate": 7.566600554812439e-06,
      "loss": 0.0172,
      "step": 758600
    },
    {
      "epoch": 1.2414982685597953,
      "grad_norm": 0.9730990529060364,
      "learning_rate": 7.566534662598921e-06,
      "loss": 0.0273,
      "step": 758620
    },
    {
      "epoch": 1.2415309989984487,
      "grad_norm": 0.21228338778018951,
      "learning_rate": 7.5664687703854045e-06,
      "loss": 0.0146,
      "step": 758640
    },
    {
      "epoch": 1.241563729437102,
      "grad_norm": 0.2598835229873657,
      "learning_rate": 7.5664028781718864e-06,
      "loss": 0.0148,
      "step": 758660
    },
    {
      "epoch": 1.2415964598757552,
      "grad_norm": 1.4944040775299072,
      "learning_rate": 7.56633698595837e-06,
      "loss": 0.0279,
      "step": 758680
    },
    {
      "epoch": 1.2416291903144085,
      "grad_norm": 0.651706337928772,
      "learning_rate": 7.566271093744853e-06,
      "loss": 0.021,
      "step": 758700
    },
    {
      "epoch": 1.241661920753062,
      "grad_norm": 0.14262095093727112,
      "learning_rate": 7.5662052015313355e-06,
      "loss": 0.0178,
      "step": 758720
    },
    {
      "epoch": 1.2416946511917153,
      "grad_norm": 0.3303040564060211,
      "learning_rate": 7.566139309317818e-06,
      "loss": 0.0252,
      "step": 758740
    },
    {
      "epoch": 1.2417273816303687,
      "grad_norm": 0.4352304935455322,
      "learning_rate": 7.566073417104302e-06,
      "loss": 0.025,
      "step": 758760
    },
    {
      "epoch": 1.241760112069022,
      "grad_norm": 1.6138657331466675,
      "learning_rate": 7.566007524890784e-06,
      "loss": 0.0164,
      "step": 758780
    },
    {
      "epoch": 1.2417928425076754,
      "grad_norm": 1.33992600440979,
      "learning_rate": 7.565941632677267e-06,
      "loss": 0.0172,
      "step": 758800
    },
    {
      "epoch": 1.2418255729463286,
      "grad_norm": 1.1969289779663086,
      "learning_rate": 7.565875740463749e-06,
      "loss": 0.0257,
      "step": 758820
    },
    {
      "epoch": 1.241858303384982,
      "grad_norm": 0.978203296661377,
      "learning_rate": 7.565809848250233e-06,
      "loss": 0.0243,
      "step": 758840
    },
    {
      "epoch": 1.2418910338236353,
      "grad_norm": 0.9541928172111511,
      "learning_rate": 7.565743956036716e-06,
      "loss": 0.0244,
      "step": 758860
    },
    {
      "epoch": 1.2419237642622887,
      "grad_norm": 0.5937446355819702,
      "learning_rate": 7.565678063823198e-06,
      "loss": 0.019,
      "step": 758880
    },
    {
      "epoch": 1.241956494700942,
      "grad_norm": 0.3551234006881714,
      "learning_rate": 7.565612171609682e-06,
      "loss": 0.0362,
      "step": 758900
    },
    {
      "epoch": 1.2419892251395952,
      "grad_norm": 0.4686836302280426,
      "learning_rate": 7.565546279396165e-06,
      "loss": 0.0206,
      "step": 758920
    },
    {
      "epoch": 1.2420219555782486,
      "grad_norm": 0.42640700936317444,
      "learning_rate": 7.565480387182647e-06,
      "loss": 0.024,
      "step": 758940
    },
    {
      "epoch": 1.242054686016902,
      "grad_norm": 0.654453694820404,
      "learning_rate": 7.56541449496913e-06,
      "loss": 0.0258,
      "step": 758960
    },
    {
      "epoch": 1.2420874164555553,
      "grad_norm": 0.45836764574050903,
      "learning_rate": 7.565348602755614e-06,
      "loss": 0.0228,
      "step": 758980
    },
    {
      "epoch": 1.2421201468942087,
      "grad_norm": 0.2875722050666809,
      "learning_rate": 7.5652827105420956e-06,
      "loss": 0.0156,
      "step": 759000
    },
    {
      "epoch": 1.242152877332862,
      "grad_norm": 0.8885325789451599,
      "learning_rate": 7.565216818328579e-06,
      "loss": 0.0251,
      "step": 759020
    },
    {
      "epoch": 1.2421856077715154,
      "grad_norm": 0.4811043441295624,
      "learning_rate": 7.565150926115061e-06,
      "loss": 0.0259,
      "step": 759040
    },
    {
      "epoch": 1.2422183382101686,
      "grad_norm": 0.40866002440452576,
      "learning_rate": 7.565085033901545e-06,
      "loss": 0.0217,
      "step": 759060
    },
    {
      "epoch": 1.242251068648822,
      "grad_norm": 0.41315603256225586,
      "learning_rate": 7.565019141688027e-06,
      "loss": 0.0136,
      "step": 759080
    },
    {
      "epoch": 1.2422837990874753,
      "grad_norm": 0.6659850478172302,
      "learning_rate": 7.56495324947451e-06,
      "loss": 0.0176,
      "step": 759100
    },
    {
      "epoch": 1.2423165295261287,
      "grad_norm": 0.5817162394523621,
      "learning_rate": 7.564887357260993e-06,
      "loss": 0.0189,
      "step": 759120
    },
    {
      "epoch": 1.242349259964782,
      "grad_norm": 0.5694228410720825,
      "learning_rate": 7.5648214650474764e-06,
      "loss": 0.0191,
      "step": 759140
    },
    {
      "epoch": 1.2423819904034354,
      "grad_norm": 1.6457195281982422,
      "learning_rate": 7.564755572833958e-06,
      "loss": 0.0274,
      "step": 759160
    },
    {
      "epoch": 1.2424147208420888,
      "grad_norm": 0.6789957284927368,
      "learning_rate": 7.564689680620442e-06,
      "loss": 0.0219,
      "step": 759180
    },
    {
      "epoch": 1.242447451280742,
      "grad_norm": 0.6094754934310913,
      "learning_rate": 7.564623788406924e-06,
      "loss": 0.0226,
      "step": 759200
    },
    {
      "epoch": 1.2424801817193953,
      "grad_norm": 0.4099913239479065,
      "learning_rate": 7.564557896193407e-06,
      "loss": 0.0131,
      "step": 759220
    },
    {
      "epoch": 1.2425129121580487,
      "grad_norm": 0.9890212416648865,
      "learning_rate": 7.564492003979891e-06,
      "loss": 0.0251,
      "step": 759240
    },
    {
      "epoch": 1.242545642596702,
      "grad_norm": 0.5047498941421509,
      "learning_rate": 7.564426111766373e-06,
      "loss": 0.0285,
      "step": 759260
    },
    {
      "epoch": 1.2425783730353555,
      "grad_norm": 0.683308482170105,
      "learning_rate": 7.5643602195528565e-06,
      "loss": 0.0166,
      "step": 759280
    },
    {
      "epoch": 1.2426111034740088,
      "grad_norm": 2.834001064300537,
      "learning_rate": 7.564294327339339e-06,
      "loss": 0.024,
      "step": 759300
    },
    {
      "epoch": 1.2426438339126622,
      "grad_norm": 0.6886191964149475,
      "learning_rate": 7.564228435125822e-06,
      "loss": 0.0212,
      "step": 759320
    },
    {
      "epoch": 1.2426765643513153,
      "grad_norm": 0.9691693186759949,
      "learning_rate": 7.564162542912305e-06,
      "loss": 0.0285,
      "step": 759340
    },
    {
      "epoch": 1.2427092947899687,
      "grad_norm": 2.331780433654785,
      "learning_rate": 7.564096650698788e-06,
      "loss": 0.0229,
      "step": 759360
    },
    {
      "epoch": 1.242742025228622,
      "grad_norm": 1.2412970066070557,
      "learning_rate": 7.56403075848527e-06,
      "loss": 0.0247,
      "step": 759380
    },
    {
      "epoch": 1.2427747556672755,
      "grad_norm": 0.30928653478622437,
      "learning_rate": 7.563964866271754e-06,
      "loss": 0.023,
      "step": 759400
    },
    {
      "epoch": 1.2428074861059288,
      "grad_norm": 0.4922935366630554,
      "learning_rate": 7.563898974058236e-06,
      "loss": 0.0187,
      "step": 759420
    },
    {
      "epoch": 1.2428402165445822,
      "grad_norm": 0.21899092197418213,
      "learning_rate": 7.563833081844719e-06,
      "loss": 0.0255,
      "step": 759440
    },
    {
      "epoch": 1.2428729469832356,
      "grad_norm": 2.077287435531616,
      "learning_rate": 7.563767189631201e-06,
      "loss": 0.0238,
      "step": 759460
    },
    {
      "epoch": 1.2429056774218887,
      "grad_norm": 0.5900463461875916,
      "learning_rate": 7.563701297417685e-06,
      "loss": 0.0261,
      "step": 759480
    },
    {
      "epoch": 1.242938407860542,
      "grad_norm": 0.8304923176765442,
      "learning_rate": 7.5636354052041674e-06,
      "loss": 0.0251,
      "step": 759500
    },
    {
      "epoch": 1.2429711382991955,
      "grad_norm": 0.2858783006668091,
      "learning_rate": 7.56356951299065e-06,
      "loss": 0.019,
      "step": 759520
    },
    {
      "epoch": 1.2430038687378488,
      "grad_norm": 0.14156630635261536,
      "learning_rate": 7.563503620777133e-06,
      "loss": 0.0186,
      "step": 759540
    },
    {
      "epoch": 1.2430365991765022,
      "grad_norm": 1.6165943145751953,
      "learning_rate": 7.5634377285636165e-06,
      "loss": 0.0213,
      "step": 759560
    },
    {
      "epoch": 1.2430693296151556,
      "grad_norm": 0.6487918496131897,
      "learning_rate": 7.563371836350099e-06,
      "loss": 0.0225,
      "step": 759580
    },
    {
      "epoch": 1.243102060053809,
      "grad_norm": 0.5856454372406006,
      "learning_rate": 7.563305944136582e-06,
      "loss": 0.0242,
      "step": 759600
    },
    {
      "epoch": 1.2431347904924621,
      "grad_norm": 0.2105114907026291,
      "learning_rate": 7.5632400519230656e-06,
      "loss": 0.0317,
      "step": 759620
    },
    {
      "epoch": 1.2431675209311155,
      "grad_norm": 3.156139612197876,
      "learning_rate": 7.5631741597095475e-06,
      "loss": 0.0214,
      "step": 759640
    },
    {
      "epoch": 1.2432002513697689,
      "grad_norm": 0.6692366600036621,
      "learning_rate": 7.563108267496031e-06,
      "loss": 0.0148,
      "step": 759660
    },
    {
      "epoch": 1.2432329818084222,
      "grad_norm": 0.4794323444366455,
      "learning_rate": 7.563042375282513e-06,
      "loss": 0.0166,
      "step": 759680
    },
    {
      "epoch": 1.2432657122470756,
      "grad_norm": 0.2628529965877533,
      "learning_rate": 7.5629764830689965e-06,
      "loss": 0.0225,
      "step": 759700
    },
    {
      "epoch": 1.2432984426857288,
      "grad_norm": 0.20883481204509735,
      "learning_rate": 7.562910590855479e-06,
      "loss": 0.0221,
      "step": 759720
    },
    {
      "epoch": 1.2433311731243821,
      "grad_norm": 0.8727968335151672,
      "learning_rate": 7.562844698641962e-06,
      "loss": 0.0265,
      "step": 759740
    },
    {
      "epoch": 1.2433639035630355,
      "grad_norm": 0.8485837578773499,
      "learning_rate": 7.562778806428445e-06,
      "loss": 0.0271,
      "step": 759760
    },
    {
      "epoch": 1.2433966340016889,
      "grad_norm": 0.5159640312194824,
      "learning_rate": 7.562712914214928e-06,
      "loss": 0.0223,
      "step": 759780
    },
    {
      "epoch": 1.2434293644403422,
      "grad_norm": 0.22800493240356445,
      "learning_rate": 7.56264702200141e-06,
      "loss": 0.0292,
      "step": 759800
    },
    {
      "epoch": 1.2434620948789956,
      "grad_norm": 0.557725191116333,
      "learning_rate": 7.562581129787894e-06,
      "loss": 0.0292,
      "step": 759820
    },
    {
      "epoch": 1.243494825317649,
      "grad_norm": 0.6003067493438721,
      "learning_rate": 7.562515237574376e-06,
      "loss": 0.0189,
      "step": 759840
    },
    {
      "epoch": 1.2435275557563021,
      "grad_norm": 0.7308191657066345,
      "learning_rate": 7.562449345360859e-06,
      "loss": 0.0298,
      "step": 759860
    },
    {
      "epoch": 1.2435602861949555,
      "grad_norm": 0.17646358907222748,
      "learning_rate": 7.562383453147342e-06,
      "loss": 0.0166,
      "step": 759880
    },
    {
      "epoch": 1.2435930166336089,
      "grad_norm": 1.3962578773498535,
      "learning_rate": 7.562317560933825e-06,
      "loss": 0.0219,
      "step": 759900
    },
    {
      "epoch": 1.2436257470722623,
      "grad_norm": 1.0352123975753784,
      "learning_rate": 7.562251668720308e-06,
      "loss": 0.0185,
      "step": 759920
    },
    {
      "epoch": 1.2436584775109156,
      "grad_norm": 0.5414482355117798,
      "learning_rate": 7.562185776506791e-06,
      "loss": 0.0251,
      "step": 759940
    },
    {
      "epoch": 1.243691207949569,
      "grad_norm": 0.16974139213562012,
      "learning_rate": 7.562119884293274e-06,
      "loss": 0.0223,
      "step": 759960
    },
    {
      "epoch": 1.2437239383882224,
      "grad_norm": 0.4496508538722992,
      "learning_rate": 7.562053992079757e-06,
      "loss": 0.0205,
      "step": 759980
    },
    {
      "epoch": 1.2437566688268755,
      "grad_norm": 0.7129316329956055,
      "learning_rate": 7.56198809986624e-06,
      "loss": 0.0298,
      "step": 760000
    },
    {
      "epoch": 1.243789399265529,
      "grad_norm": 0.42963525652885437,
      "learning_rate": 7.561922207652722e-06,
      "loss": 0.0188,
      "step": 760020
    },
    {
      "epoch": 1.2438221297041823,
      "grad_norm": 1.9766920804977417,
      "learning_rate": 7.561856315439206e-06,
      "loss": 0.0237,
      "step": 760040
    },
    {
      "epoch": 1.2438548601428356,
      "grad_norm": 0.9442043304443359,
      "learning_rate": 7.5617904232256875e-06,
      "loss": 0.0155,
      "step": 760060
    },
    {
      "epoch": 1.243887590581489,
      "grad_norm": 0.16895486414432526,
      "learning_rate": 7.561724531012171e-06,
      "loss": 0.0205,
      "step": 760080
    },
    {
      "epoch": 1.2439203210201424,
      "grad_norm": 0.9843440055847168,
      "learning_rate": 7.561658638798654e-06,
      "loss": 0.0207,
      "step": 760100
    },
    {
      "epoch": 1.2439530514587958,
      "grad_norm": 0.6632896065711975,
      "learning_rate": 7.561592746585137e-06,
      "loss": 0.0219,
      "step": 760120
    },
    {
      "epoch": 1.243985781897449,
      "grad_norm": 1.7595901489257812,
      "learning_rate": 7.561526854371619e-06,
      "loss": 0.0213,
      "step": 760140
    },
    {
      "epoch": 1.2440185123361023,
      "grad_norm": 1.019934058189392,
      "learning_rate": 7.561460962158103e-06,
      "loss": 0.0243,
      "step": 760160
    },
    {
      "epoch": 1.2440512427747557,
      "grad_norm": 0.16077181696891785,
      "learning_rate": 7.561395069944585e-06,
      "loss": 0.0313,
      "step": 760180
    },
    {
      "epoch": 1.244083973213409,
      "grad_norm": 0.7436158061027527,
      "learning_rate": 7.561329177731068e-06,
      "loss": 0.0268,
      "step": 760200
    },
    {
      "epoch": 1.2441167036520624,
      "grad_norm": 0.39764404296875,
      "learning_rate": 7.56126328551755e-06,
      "loss": 0.0316,
      "step": 760220
    },
    {
      "epoch": 1.2441494340907158,
      "grad_norm": 0.6282501220703125,
      "learning_rate": 7.561197393304034e-06,
      "loss": 0.016,
      "step": 760240
    },
    {
      "epoch": 1.2441821645293691,
      "grad_norm": 0.2663508653640747,
      "learning_rate": 7.5611315010905175e-06,
      "loss": 0.0174,
      "step": 760260
    },
    {
      "epoch": 1.2442148949680223,
      "grad_norm": 0.3356058895587921,
      "learning_rate": 7.561065608876999e-06,
      "loss": 0.0256,
      "step": 760280
    },
    {
      "epoch": 1.2442476254066757,
      "grad_norm": 2.7533724308013916,
      "learning_rate": 7.560999716663483e-06,
      "loss": 0.0258,
      "step": 760300
    },
    {
      "epoch": 1.244280355845329,
      "grad_norm": 1.4490498304367065,
      "learning_rate": 7.560933824449965e-06,
      "loss": 0.0233,
      "step": 760320
    },
    {
      "epoch": 1.2443130862839824,
      "grad_norm": 1.7819665670394897,
      "learning_rate": 7.5608679322364484e-06,
      "loss": 0.0225,
      "step": 760340
    },
    {
      "epoch": 1.2443458167226358,
      "grad_norm": 0.5146732926368713,
      "learning_rate": 7.560802040022931e-06,
      "loss": 0.0253,
      "step": 760360
    },
    {
      "epoch": 1.2443785471612892,
      "grad_norm": 1.531772255897522,
      "learning_rate": 7.560736147809415e-06,
      "loss": 0.0282,
      "step": 760380
    },
    {
      "epoch": 1.2444112775999423,
      "grad_norm": 0.4811168313026428,
      "learning_rate": 7.560670255595897e-06,
      "loss": 0.0299,
      "step": 760400
    },
    {
      "epoch": 1.2444440080385957,
      "grad_norm": 0.3280402719974518,
      "learning_rate": 7.56060436338238e-06,
      "loss": 0.0149,
      "step": 760420
    },
    {
      "epoch": 1.244476738477249,
      "grad_norm": 0.316558837890625,
      "learning_rate": 7.560538471168862e-06,
      "loss": 0.0225,
      "step": 760440
    },
    {
      "epoch": 1.2445094689159024,
      "grad_norm": 1.2941784858703613,
      "learning_rate": 7.560472578955346e-06,
      "loss": 0.0247,
      "step": 760460
    },
    {
      "epoch": 1.2445421993545558,
      "grad_norm": 0.8322964310646057,
      "learning_rate": 7.560406686741828e-06,
      "loss": 0.0195,
      "step": 760480
    },
    {
      "epoch": 1.2445749297932092,
      "grad_norm": 0.4580504596233368,
      "learning_rate": 7.560340794528311e-06,
      "loss": 0.0224,
      "step": 760500
    },
    {
      "epoch": 1.2446076602318623,
      "grad_norm": 0.7362164258956909,
      "learning_rate": 7.560274902314794e-06,
      "loss": 0.0186,
      "step": 760520
    },
    {
      "epoch": 1.2446403906705157,
      "grad_norm": 0.12241183966398239,
      "learning_rate": 7.560209010101277e-06,
      "loss": 0.0154,
      "step": 760540
    },
    {
      "epoch": 1.244673121109169,
      "grad_norm": 0.43245190382003784,
      "learning_rate": 7.560143117887759e-06,
      "loss": 0.0286,
      "step": 760560
    },
    {
      "epoch": 1.2447058515478224,
      "grad_norm": 0.7523576021194458,
      "learning_rate": 7.560077225674243e-06,
      "loss": 0.0281,
      "step": 760580
    },
    {
      "epoch": 1.2447385819864758,
      "grad_norm": 0.11757078766822815,
      "learning_rate": 7.560011333460725e-06,
      "loss": 0.0158,
      "step": 760600
    },
    {
      "epoch": 1.2447713124251292,
      "grad_norm": 0.5604155659675598,
      "learning_rate": 7.5599454412472085e-06,
      "loss": 0.0236,
      "step": 760620
    },
    {
      "epoch": 1.2448040428637825,
      "grad_norm": 0.4611034691333771,
      "learning_rate": 7.559879549033692e-06,
      "loss": 0.0247,
      "step": 760640
    },
    {
      "epoch": 1.2448367733024357,
      "grad_norm": 0.3251625895500183,
      "learning_rate": 7.559813656820174e-06,
      "loss": 0.0338,
      "step": 760660
    },
    {
      "epoch": 1.244869503741089,
      "grad_norm": 1.3434607982635498,
      "learning_rate": 7.5597477646066576e-06,
      "loss": 0.0249,
      "step": 760680
    },
    {
      "epoch": 1.2449022341797424,
      "grad_norm": 0.2743624150753021,
      "learning_rate": 7.5596818723931394e-06,
      "loss": 0.0228,
      "step": 760700
    },
    {
      "epoch": 1.2449349646183958,
      "grad_norm": 0.10563303530216217,
      "learning_rate": 7.559615980179623e-06,
      "loss": 0.0269,
      "step": 760720
    },
    {
      "epoch": 1.2449676950570492,
      "grad_norm": 0.5593044757843018,
      "learning_rate": 7.559550087966106e-06,
      "loss": 0.0365,
      "step": 760740
    },
    {
      "epoch": 1.2450004254957026,
      "grad_norm": 1.1548089981079102,
      "learning_rate": 7.5594841957525885e-06,
      "loss": 0.0403,
      "step": 760760
    },
    {
      "epoch": 1.245033155934356,
      "grad_norm": 2.197509527206421,
      "learning_rate": 7.559418303539071e-06,
      "loss": 0.033,
      "step": 760780
    },
    {
      "epoch": 1.245065886373009,
      "grad_norm": 0.8435553908348083,
      "learning_rate": 7.559352411325555e-06,
      "loss": 0.0339,
      "step": 760800
    },
    {
      "epoch": 1.2450986168116625,
      "grad_norm": 1.3038767576217651,
      "learning_rate": 7.559286519112037e-06,
      "loss": 0.0241,
      "step": 760820
    },
    {
      "epoch": 1.2451313472503158,
      "grad_norm": 0.17170634865760803,
      "learning_rate": 7.55922062689852e-06,
      "loss": 0.0235,
      "step": 760840
    },
    {
      "epoch": 1.2451640776889692,
      "grad_norm": 1.0279618501663208,
      "learning_rate": 7.559154734685002e-06,
      "loss": 0.0188,
      "step": 760860
    },
    {
      "epoch": 1.2451968081276226,
      "grad_norm": 0.9410735964775085,
      "learning_rate": 7.559088842471486e-06,
      "loss": 0.0221,
      "step": 760880
    },
    {
      "epoch": 1.245229538566276,
      "grad_norm": 2.9713211059570312,
      "learning_rate": 7.5590229502579685e-06,
      "loss": 0.025,
      "step": 760900
    },
    {
      "epoch": 1.2452622690049293,
      "grad_norm": 0.44795548915863037,
      "learning_rate": 7.558957058044451e-06,
      "loss": 0.0169,
      "step": 760920
    },
    {
      "epoch": 1.2452949994435825,
      "grad_norm": 0.681628942489624,
      "learning_rate": 7.558891165830934e-06,
      "loss": 0.0293,
      "step": 760940
    },
    {
      "epoch": 1.2453277298822358,
      "grad_norm": 0.5732115507125854,
      "learning_rate": 7.558825273617418e-06,
      "loss": 0.0296,
      "step": 760960
    },
    {
      "epoch": 1.2453604603208892,
      "grad_norm": 0.14636273682117462,
      "learning_rate": 7.5587593814039e-06,
      "loss": 0.0204,
      "step": 760980
    },
    {
      "epoch": 1.2453931907595426,
      "grad_norm": 0.9537566900253296,
      "learning_rate": 7.558693489190383e-06,
      "loss": 0.0254,
      "step": 761000
    },
    {
      "epoch": 1.245425921198196,
      "grad_norm": 0.8584164977073669,
      "learning_rate": 7.558627596976867e-06,
      "loss": 0.0185,
      "step": 761020
    },
    {
      "epoch": 1.2454586516368493,
      "grad_norm": 0.38829749822616577,
      "learning_rate": 7.5585617047633486e-06,
      "loss": 0.0189,
      "step": 761040
    },
    {
      "epoch": 1.2454913820755027,
      "grad_norm": 0.3290206789970398,
      "learning_rate": 7.558495812549832e-06,
      "loss": 0.0318,
      "step": 761060
    },
    {
      "epoch": 1.2455241125141558,
      "grad_norm": 0.1233493909239769,
      "learning_rate": 7.558429920336314e-06,
      "loss": 0.0258,
      "step": 761080
    },
    {
      "epoch": 1.2455568429528092,
      "grad_norm": 1.0206289291381836,
      "learning_rate": 7.558364028122798e-06,
      "loss": 0.0275,
      "step": 761100
    },
    {
      "epoch": 1.2455895733914626,
      "grad_norm": 0.7303036451339722,
      "learning_rate": 7.55829813590928e-06,
      "loss": 0.017,
      "step": 761120
    },
    {
      "epoch": 1.245622303830116,
      "grad_norm": 0.234641894698143,
      "learning_rate": 7.558232243695763e-06,
      "loss": 0.02,
      "step": 761140
    },
    {
      "epoch": 1.2456550342687693,
      "grad_norm": 0.4793027639389038,
      "learning_rate": 7.558166351482246e-06,
      "loss": 0.0173,
      "step": 761160
    },
    {
      "epoch": 1.2456877647074225,
      "grad_norm": 0.7602517008781433,
      "learning_rate": 7.5581004592687294e-06,
      "loss": 0.0338,
      "step": 761180
    },
    {
      "epoch": 1.2457204951460759,
      "grad_norm": 0.34320685267448425,
      "learning_rate": 7.558034567055211e-06,
      "loss": 0.0179,
      "step": 761200
    },
    {
      "epoch": 1.2457532255847292,
      "grad_norm": 0.349434494972229,
      "learning_rate": 7.557968674841695e-06,
      "loss": 0.0242,
      "step": 761220
    },
    {
      "epoch": 1.2457859560233826,
      "grad_norm": 0.7383588552474976,
      "learning_rate": 7.557902782628177e-06,
      "loss": 0.0122,
      "step": 761240
    },
    {
      "epoch": 1.245818686462036,
      "grad_norm": 0.2424442172050476,
      "learning_rate": 7.55783689041466e-06,
      "loss": 0.0255,
      "step": 761260
    },
    {
      "epoch": 1.2458514169006893,
      "grad_norm": 0.5771024823188782,
      "learning_rate": 7.557770998201142e-06,
      "loss": 0.0193,
      "step": 761280
    },
    {
      "epoch": 1.2458841473393427,
      "grad_norm": 1.2879230976104736,
      "learning_rate": 7.557705105987626e-06,
      "loss": 0.0201,
      "step": 761300
    },
    {
      "epoch": 1.2459168777779959,
      "grad_norm": 0.722434937953949,
      "learning_rate": 7.5576392137741095e-06,
      "loss": 0.0193,
      "step": 761320
    },
    {
      "epoch": 1.2459496082166492,
      "grad_norm": 1.8920278549194336,
      "learning_rate": 7.557573321560591e-06,
      "loss": 0.0139,
      "step": 761340
    },
    {
      "epoch": 1.2459823386553026,
      "grad_norm": 1.3761094808578491,
      "learning_rate": 7.557507429347075e-06,
      "loss": 0.0145,
      "step": 761360
    },
    {
      "epoch": 1.246015069093956,
      "grad_norm": 1.2181413173675537,
      "learning_rate": 7.557441537133558e-06,
      "loss": 0.0232,
      "step": 761380
    },
    {
      "epoch": 1.2460477995326094,
      "grad_norm": 0.3099060356616974,
      "learning_rate": 7.55737564492004e-06,
      "loss": 0.0205,
      "step": 761400
    },
    {
      "epoch": 1.2460805299712627,
      "grad_norm": 0.4203060567378998,
      "learning_rate": 7.557309752706523e-06,
      "loss": 0.0191,
      "step": 761420
    },
    {
      "epoch": 1.246113260409916,
      "grad_norm": 0.12282373756170273,
      "learning_rate": 7.557243860493007e-06,
      "loss": 0.0218,
      "step": 761440
    },
    {
      "epoch": 1.2461459908485693,
      "grad_norm": 0.7276350855827332,
      "learning_rate": 7.557177968279489e-06,
      "loss": 0.0153,
      "step": 761460
    },
    {
      "epoch": 1.2461787212872226,
      "grad_norm": 1.0865355730056763,
      "learning_rate": 7.557112076065972e-06,
      "loss": 0.0287,
      "step": 761480
    },
    {
      "epoch": 1.246211451725876,
      "grad_norm": 0.29165059328079224,
      "learning_rate": 7.557046183852454e-06,
      "loss": 0.014,
      "step": 761500
    },
    {
      "epoch": 1.2462441821645294,
      "grad_norm": 0.8953301906585693,
      "learning_rate": 7.556980291638938e-06,
      "loss": 0.0424,
      "step": 761520
    },
    {
      "epoch": 1.2462769126031827,
      "grad_norm": 2.88203501701355,
      "learning_rate": 7.5569143994254204e-06,
      "loss": 0.0271,
      "step": 761540
    },
    {
      "epoch": 1.2463096430418361,
      "grad_norm": 0.14101757109165192,
      "learning_rate": 7.556848507211903e-06,
      "loss": 0.0172,
      "step": 761560
    },
    {
      "epoch": 1.2463423734804895,
      "grad_norm": 0.42662128806114197,
      "learning_rate": 7.556782614998386e-06,
      "loss": 0.0306,
      "step": 761580
    },
    {
      "epoch": 1.2463751039191426,
      "grad_norm": 0.2724543809890747,
      "learning_rate": 7.5567167227848695e-06,
      "loss": 0.0174,
      "step": 761600
    },
    {
      "epoch": 1.246407834357796,
      "grad_norm": 0.3912455141544342,
      "learning_rate": 7.556650830571351e-06,
      "loss": 0.0216,
      "step": 761620
    },
    {
      "epoch": 1.2464405647964494,
      "grad_norm": 0.4572378098964691,
      "learning_rate": 7.556584938357835e-06,
      "loss": 0.0204,
      "step": 761640
    },
    {
      "epoch": 1.2464732952351028,
      "grad_norm": 3.1504130363464355,
      "learning_rate": 7.556519046144317e-06,
      "loss": 0.0213,
      "step": 761660
    },
    {
      "epoch": 1.2465060256737561,
      "grad_norm": 1.0894742012023926,
      "learning_rate": 7.5564531539308005e-06,
      "loss": 0.0243,
      "step": 761680
    },
    {
      "epoch": 1.2465387561124095,
      "grad_norm": 0.19892436265945435,
      "learning_rate": 7.556387261717284e-06,
      "loss": 0.0241,
      "step": 761700
    },
    {
      "epoch": 1.2465714865510629,
      "grad_norm": 0.7141983509063721,
      "learning_rate": 7.556321369503766e-06,
      "loss": 0.0252,
      "step": 761720
    },
    {
      "epoch": 1.246604216989716,
      "grad_norm": 0.6123877167701721,
      "learning_rate": 7.5562554772902495e-06,
      "loss": 0.0175,
      "step": 761740
    },
    {
      "epoch": 1.2466369474283694,
      "grad_norm": 0.5465185642242432,
      "learning_rate": 7.556189585076732e-06,
      "loss": 0.0155,
      "step": 761760
    },
    {
      "epoch": 1.2466696778670228,
      "grad_norm": 0.4858417212963104,
      "learning_rate": 7.556123692863215e-06,
      "loss": 0.0247,
      "step": 761780
    },
    {
      "epoch": 1.2467024083056761,
      "grad_norm": 0.9660606980323792,
      "learning_rate": 7.556057800649698e-06,
      "loss": 0.0223,
      "step": 761800
    },
    {
      "epoch": 1.2467351387443295,
      "grad_norm": 8.733963966369629,
      "learning_rate": 7.555991908436181e-06,
      "loss": 0.0188,
      "step": 761820
    },
    {
      "epoch": 1.2467678691829829,
      "grad_norm": 1.0193861722946167,
      "learning_rate": 7.555926016222663e-06,
      "loss": 0.0202,
      "step": 761840
    },
    {
      "epoch": 1.2468005996216363,
      "grad_norm": 0.506109893321991,
      "learning_rate": 7.555860124009147e-06,
      "loss": 0.0221,
      "step": 761860
    },
    {
      "epoch": 1.2468333300602894,
      "grad_norm": 0.34759268164634705,
      "learning_rate": 7.555794231795629e-06,
      "loss": 0.0268,
      "step": 761880
    },
    {
      "epoch": 1.2468660604989428,
      "grad_norm": 0.9683694839477539,
      "learning_rate": 7.555728339582112e-06,
      "loss": 0.024,
      "step": 761900
    },
    {
      "epoch": 1.2468987909375961,
      "grad_norm": 0.3536323308944702,
      "learning_rate": 7.555662447368595e-06,
      "loss": 0.0235,
      "step": 761920
    },
    {
      "epoch": 1.2469315213762495,
      "grad_norm": 1.0589033365249634,
      "learning_rate": 7.555596555155078e-06,
      "loss": 0.0139,
      "step": 761940
    },
    {
      "epoch": 1.246964251814903,
      "grad_norm": 0.6959646940231323,
      "learning_rate": 7.5555306629415605e-06,
      "loss": 0.0297,
      "step": 761960
    },
    {
      "epoch": 1.246996982253556,
      "grad_norm": 0.4154665470123291,
      "learning_rate": 7.555464770728044e-06,
      "loss": 0.0255,
      "step": 761980
    },
    {
      "epoch": 1.2470297126922094,
      "grad_norm": 0.3706357777118683,
      "learning_rate": 7.555398878514526e-06,
      "loss": 0.0227,
      "step": 762000
    },
    {
      "epoch": 1.2470624431308628,
      "grad_norm": 0.2482597827911377,
      "learning_rate": 7.55533298630101e-06,
      "loss": 0.0227,
      "step": 762020
    },
    {
      "epoch": 1.2470951735695162,
      "grad_norm": 1.1992043256759644,
      "learning_rate": 7.555267094087493e-06,
      "loss": 0.0324,
      "step": 762040
    },
    {
      "epoch": 1.2471279040081695,
      "grad_norm": 0.21264514327049255,
      "learning_rate": 7.555201201873975e-06,
      "loss": 0.024,
      "step": 762060
    },
    {
      "epoch": 1.247160634446823,
      "grad_norm": 0.47348159551620483,
      "learning_rate": 7.555135309660459e-06,
      "loss": 0.0212,
      "step": 762080
    },
    {
      "epoch": 1.2471933648854763,
      "grad_norm": 0.7088711261749268,
      "learning_rate": 7.5550694174469405e-06,
      "loss": 0.0206,
      "step": 762100
    },
    {
      "epoch": 1.2472260953241294,
      "grad_norm": 0.39935997128486633,
      "learning_rate": 7.555003525233424e-06,
      "loss": 0.023,
      "step": 762120
    },
    {
      "epoch": 1.2472588257627828,
      "grad_norm": 0.4028002619743347,
      "learning_rate": 7.554937633019907e-06,
      "loss": 0.0238,
      "step": 762140
    },
    {
      "epoch": 1.2472915562014362,
      "grad_norm": 0.24038657546043396,
      "learning_rate": 7.55487174080639e-06,
      "loss": 0.0241,
      "step": 762160
    },
    {
      "epoch": 1.2473242866400895,
      "grad_norm": 0.9344018697738647,
      "learning_rate": 7.554805848592872e-06,
      "loss": 0.0216,
      "step": 762180
    },
    {
      "epoch": 1.247357017078743,
      "grad_norm": 0.5402116775512695,
      "learning_rate": 7.554739956379356e-06,
      "loss": 0.0217,
      "step": 762200
    },
    {
      "epoch": 1.2473897475173963,
      "grad_norm": 0.21665677428245544,
      "learning_rate": 7.554674064165838e-06,
      "loss": 0.0216,
      "step": 762220
    },
    {
      "epoch": 1.2474224779560497,
      "grad_norm": 0.27975937724113464,
      "learning_rate": 7.554608171952321e-06,
      "loss": 0.0298,
      "step": 762240
    },
    {
      "epoch": 1.2474552083947028,
      "grad_norm": 0.2324403077363968,
      "learning_rate": 7.554542279738803e-06,
      "loss": 0.0178,
      "step": 762260
    },
    {
      "epoch": 1.2474879388333562,
      "grad_norm": 0.6102772951126099,
      "learning_rate": 7.554476387525287e-06,
      "loss": 0.023,
      "step": 762280
    },
    {
      "epoch": 1.2475206692720096,
      "grad_norm": 0.35068920254707336,
      "learning_rate": 7.554410495311769e-06,
      "loss": 0.0172,
      "step": 762300
    },
    {
      "epoch": 1.247553399710663,
      "grad_norm": 0.9175217151641846,
      "learning_rate": 7.554344603098252e-06,
      "loss": 0.0222,
      "step": 762320
    },
    {
      "epoch": 1.2475861301493163,
      "grad_norm": 0.5470448136329651,
      "learning_rate": 7.554278710884735e-06,
      "loss": 0.0182,
      "step": 762340
    },
    {
      "epoch": 1.2476188605879697,
      "grad_norm": 0.6751812100410461,
      "learning_rate": 7.554212818671218e-06,
      "loss": 0.0212,
      "step": 762360
    },
    {
      "epoch": 1.247651591026623,
      "grad_norm": 1.4119784832000732,
      "learning_rate": 7.5541469264577014e-06,
      "loss": 0.0218,
      "step": 762380
    },
    {
      "epoch": 1.2476843214652762,
      "grad_norm": 1.5014606714248657,
      "learning_rate": 7.554081034244184e-06,
      "loss": 0.0149,
      "step": 762400
    },
    {
      "epoch": 1.2477170519039296,
      "grad_norm": 0.875207781791687,
      "learning_rate": 7.554015142030667e-06,
      "loss": 0.0273,
      "step": 762420
    },
    {
      "epoch": 1.247749782342583,
      "grad_norm": 0.7755305171012878,
      "learning_rate": 7.55394924981715e-06,
      "loss": 0.0279,
      "step": 762440
    },
    {
      "epoch": 1.2477825127812363,
      "grad_norm": 1.9602892398834229,
      "learning_rate": 7.553883357603633e-06,
      "loss": 0.0293,
      "step": 762460
    },
    {
      "epoch": 1.2478152432198897,
      "grad_norm": 1.038394570350647,
      "learning_rate": 7.553817465390115e-06,
      "loss": 0.0193,
      "step": 762480
    },
    {
      "epoch": 1.247847973658543,
      "grad_norm": 0.20768994092941284,
      "learning_rate": 7.553751573176599e-06,
      "loss": 0.0197,
      "step": 762500
    },
    {
      "epoch": 1.2478807040971964,
      "grad_norm": 0.5360802412033081,
      "learning_rate": 7.553685680963081e-06,
      "loss": 0.021,
      "step": 762520
    },
    {
      "epoch": 1.2479134345358496,
      "grad_norm": 0.33627554774284363,
      "learning_rate": 7.553619788749564e-06,
      "loss": 0.0294,
      "step": 762540
    },
    {
      "epoch": 1.247946164974503,
      "grad_norm": 0.5369713306427002,
      "learning_rate": 7.553553896536047e-06,
      "loss": 0.0273,
      "step": 762560
    },
    {
      "epoch": 1.2479788954131563,
      "grad_norm": 1.239342451095581,
      "learning_rate": 7.55348800432253e-06,
      "loss": 0.0203,
      "step": 762580
    },
    {
      "epoch": 1.2480116258518097,
      "grad_norm": 0.5785996317863464,
      "learning_rate": 7.5534221121090124e-06,
      "loss": 0.0212,
      "step": 762600
    },
    {
      "epoch": 1.248044356290463,
      "grad_norm": 0.8786686062812805,
      "learning_rate": 7.553356219895496e-06,
      "loss": 0.0153,
      "step": 762620
    },
    {
      "epoch": 1.2480770867291164,
      "grad_norm": 0.38761821389198303,
      "learning_rate": 7.553290327681978e-06,
      "loss": 0.0302,
      "step": 762640
    },
    {
      "epoch": 1.2481098171677696,
      "grad_norm": 0.4503033757209778,
      "learning_rate": 7.5532244354684615e-06,
      "loss": 0.0189,
      "step": 762660
    },
    {
      "epoch": 1.248142547606423,
      "grad_norm": 1.128176212310791,
      "learning_rate": 7.553158543254943e-06,
      "loss": 0.0178,
      "step": 762680
    },
    {
      "epoch": 1.2481752780450763,
      "grad_norm": 0.2835509181022644,
      "learning_rate": 7.553092651041427e-06,
      "loss": 0.0182,
      "step": 762700
    },
    {
      "epoch": 1.2482080084837297,
      "grad_norm": 0.8747633099555969,
      "learning_rate": 7.5530267588279106e-06,
      "loss": 0.0227,
      "step": 762720
    },
    {
      "epoch": 1.248240738922383,
      "grad_norm": 1.3063580989837646,
      "learning_rate": 7.5529608666143925e-06,
      "loss": 0.0279,
      "step": 762740
    },
    {
      "epoch": 1.2482734693610364,
      "grad_norm": 0.35385662317276,
      "learning_rate": 7.552894974400876e-06,
      "loss": 0.0206,
      "step": 762760
    },
    {
      "epoch": 1.2483061997996896,
      "grad_norm": 0.473322331905365,
      "learning_rate": 7.552829082187359e-06,
      "loss": 0.0251,
      "step": 762780
    },
    {
      "epoch": 1.248338930238343,
      "grad_norm": 0.5433616042137146,
      "learning_rate": 7.5527631899738415e-06,
      "loss": 0.0253,
      "step": 762800
    },
    {
      "epoch": 1.2483716606769963,
      "grad_norm": 0.38506150245666504,
      "learning_rate": 7.552697297760324e-06,
      "loss": 0.0225,
      "step": 762820
    },
    {
      "epoch": 1.2484043911156497,
      "grad_norm": 0.9354150891304016,
      "learning_rate": 7.552631405546808e-06,
      "loss": 0.0242,
      "step": 762840
    },
    {
      "epoch": 1.248437121554303,
      "grad_norm": 1.1217914819717407,
      "learning_rate": 7.55256551333329e-06,
      "loss": 0.0173,
      "step": 762860
    },
    {
      "epoch": 1.2484698519929565,
      "grad_norm": 0.7548108100891113,
      "learning_rate": 7.552499621119773e-06,
      "loss": 0.02,
      "step": 762880
    },
    {
      "epoch": 1.2485025824316098,
      "grad_norm": 0.20655673742294312,
      "learning_rate": 7.552433728906255e-06,
      "loss": 0.0332,
      "step": 762900
    },
    {
      "epoch": 1.248535312870263,
      "grad_norm": 0.3302595615386963,
      "learning_rate": 7.552367836692739e-06,
      "loss": 0.0236,
      "step": 762920
    },
    {
      "epoch": 1.2485680433089164,
      "grad_norm": 0.5328036546707153,
      "learning_rate": 7.5523019444792215e-06,
      "loss": 0.0283,
      "step": 762940
    },
    {
      "epoch": 1.2486007737475697,
      "grad_norm": 0.18766185641288757,
      "learning_rate": 7.552236052265704e-06,
      "loss": 0.0188,
      "step": 762960
    },
    {
      "epoch": 1.248633504186223,
      "grad_norm": 0.8846715092658997,
      "learning_rate": 7.552170160052187e-06,
      "loss": 0.0179,
      "step": 762980
    },
    {
      "epoch": 1.2486662346248765,
      "grad_norm": 0.27855297923088074,
      "learning_rate": 7.552104267838671e-06,
      "loss": 0.0273,
      "step": 763000
    },
    {
      "epoch": 1.2486989650635298,
      "grad_norm": 0.4127386212348938,
      "learning_rate": 7.5520383756251525e-06,
      "loss": 0.0263,
      "step": 763020
    },
    {
      "epoch": 1.2487316955021832,
      "grad_norm": 1.4063001871109009,
      "learning_rate": 7.551972483411636e-06,
      "loss": 0.0188,
      "step": 763040
    },
    {
      "epoch": 1.2487644259408364,
      "grad_norm": 0.5410712957382202,
      "learning_rate": 7.551906591198118e-06,
      "loss": 0.0251,
      "step": 763060
    },
    {
      "epoch": 1.2487971563794897,
      "grad_norm": 0.624851644039154,
      "learning_rate": 7.5518406989846016e-06,
      "loss": 0.0348,
      "step": 763080
    },
    {
      "epoch": 1.248829886818143,
      "grad_norm": 0.31179875135421753,
      "learning_rate": 7.551774806771085e-06,
      "loss": 0.0143,
      "step": 763100
    },
    {
      "epoch": 1.2488626172567965,
      "grad_norm": 0.6702215671539307,
      "learning_rate": 7.551708914557567e-06,
      "loss": 0.016,
      "step": 763120
    },
    {
      "epoch": 1.2488953476954499,
      "grad_norm": 0.8730833530426025,
      "learning_rate": 7.551643022344051e-06,
      "loss": 0.0225,
      "step": 763140
    },
    {
      "epoch": 1.2489280781341032,
      "grad_norm": 0.4201061427593231,
      "learning_rate": 7.551577130130533e-06,
      "loss": 0.0184,
      "step": 763160
    },
    {
      "epoch": 1.2489608085727566,
      "grad_norm": 0.20934869349002838,
      "learning_rate": 7.551511237917016e-06,
      "loss": 0.0225,
      "step": 763180
    },
    {
      "epoch": 1.2489935390114097,
      "grad_norm": 0.4519549310207367,
      "learning_rate": 7.551445345703499e-06,
      "loss": 0.0192,
      "step": 763200
    },
    {
      "epoch": 1.2490262694500631,
      "grad_norm": 0.6830037236213684,
      "learning_rate": 7.5513794534899824e-06,
      "loss": 0.0256,
      "step": 763220
    },
    {
      "epoch": 1.2490589998887165,
      "grad_norm": 0.3892005383968353,
      "learning_rate": 7.551313561276464e-06,
      "loss": 0.0354,
      "step": 763240
    },
    {
      "epoch": 1.2490917303273699,
      "grad_norm": 0.7228200435638428,
      "learning_rate": 7.551247669062948e-06,
      "loss": 0.0238,
      "step": 763260
    },
    {
      "epoch": 1.2491244607660232,
      "grad_norm": 0.22455213963985443,
      "learning_rate": 7.55118177684943e-06,
      "loss": 0.0222,
      "step": 763280
    },
    {
      "epoch": 1.2491571912046766,
      "grad_norm": Infinity,
      "learning_rate": 7.551115884635913e-06,
      "loss": 0.0168,
      "step": 763300
    },
    {
      "epoch": 1.24918992164333,
      "grad_norm": 0.6815613508224487,
      "learning_rate": 7.551049992422395e-06,
      "loss": 0.0259,
      "step": 763320
    },
    {
      "epoch": 1.2492226520819831,
      "grad_norm": 0.37336862087249756,
      "learning_rate": 7.550984100208879e-06,
      "loss": 0.0286,
      "step": 763340
    },
    {
      "epoch": 1.2492553825206365,
      "grad_norm": 0.4699200987815857,
      "learning_rate": 7.550918207995362e-06,
      "loss": 0.0244,
      "step": 763360
    },
    {
      "epoch": 1.2492881129592899,
      "grad_norm": 1.180323839187622,
      "learning_rate": 7.550852315781844e-06,
      "loss": 0.0235,
      "step": 763380
    },
    {
      "epoch": 1.2493208433979432,
      "grad_norm": 0.5027307868003845,
      "learning_rate": 7.550786423568327e-06,
      "loss": 0.0276,
      "step": 763400
    },
    {
      "epoch": 1.2493535738365966,
      "grad_norm": 1.5981371402740479,
      "learning_rate": 7.550720531354811e-06,
      "loss": 0.0306,
      "step": 763420
    },
    {
      "epoch": 1.24938630427525,
      "grad_norm": 0.18824322521686554,
      "learning_rate": 7.5506546391412934e-06,
      "loss": 0.0178,
      "step": 763440
    },
    {
      "epoch": 1.2494190347139031,
      "grad_norm": 0.4644393026828766,
      "learning_rate": 7.550588746927776e-06,
      "loss": 0.0193,
      "step": 763460
    },
    {
      "epoch": 1.2494517651525565,
      "grad_norm": 5.049099922180176,
      "learning_rate": 7.55052285471426e-06,
      "loss": 0.0246,
      "step": 763480
    },
    {
      "epoch": 1.2494844955912099,
      "grad_norm": 0.9012939929962158,
      "learning_rate": 7.550456962500742e-06,
      "loss": 0.0271,
      "step": 763500
    },
    {
      "epoch": 1.2495172260298633,
      "grad_norm": 0.8867346048355103,
      "learning_rate": 7.550391070287225e-06,
      "loss": 0.03,
      "step": 763520
    },
    {
      "epoch": 1.2495499564685166,
      "grad_norm": 0.37498119473457336,
      "learning_rate": 7.550325178073707e-06,
      "loss": 0.0273,
      "step": 763540
    },
    {
      "epoch": 1.24958268690717,
      "grad_norm": 0.3234173357486725,
      "learning_rate": 7.550259285860191e-06,
      "loss": 0.0215,
      "step": 763560
    },
    {
      "epoch": 1.2496154173458232,
      "grad_norm": 0.7373645305633545,
      "learning_rate": 7.5501933936466735e-06,
      "loss": 0.0174,
      "step": 763580
    },
    {
      "epoch": 1.2496481477844765,
      "grad_norm": 0.9415591359138489,
      "learning_rate": 7.550127501433156e-06,
      "loss": 0.0216,
      "step": 763600
    },
    {
      "epoch": 1.24968087822313,
      "grad_norm": 1.89542818069458,
      "learning_rate": 7.550061609219639e-06,
      "loss": 0.0203,
      "step": 763620
    },
    {
      "epoch": 1.2497136086617833,
      "grad_norm": 1.3833194971084595,
      "learning_rate": 7.5499957170061225e-06,
      "loss": 0.0236,
      "step": 763640
    },
    {
      "epoch": 1.2497463391004366,
      "grad_norm": 0.9637746214866638,
      "learning_rate": 7.549929824792604e-06,
      "loss": 0.0215,
      "step": 763660
    },
    {
      "epoch": 1.24977906953909,
      "grad_norm": 0.8407837152481079,
      "learning_rate": 7.549863932579088e-06,
      "loss": 0.021,
      "step": 763680
    },
    {
      "epoch": 1.2498117999777434,
      "grad_norm": 0.4346592128276825,
      "learning_rate": 7.54979804036557e-06,
      "loss": 0.0289,
      "step": 763700
    },
    {
      "epoch": 1.2498445304163965,
      "grad_norm": 1.180415153503418,
      "learning_rate": 7.5497321481520535e-06,
      "loss": 0.0206,
      "step": 763720
    },
    {
      "epoch": 1.24987726085505,
      "grad_norm": 0.17387773096561432,
      "learning_rate": 7.549666255938536e-06,
      "loss": 0.0191,
      "step": 763740
    },
    {
      "epoch": 1.2499099912937033,
      "grad_norm": 0.648141622543335,
      "learning_rate": 7.549600363725019e-06,
      "loss": 0.0295,
      "step": 763760
    },
    {
      "epoch": 1.2499427217323567,
      "grad_norm": 0.44815459847450256,
      "learning_rate": 7.5495344715115025e-06,
      "loss": 0.0265,
      "step": 763780
    },
    {
      "epoch": 1.24997545217101,
      "grad_norm": 0.46644720435142517,
      "learning_rate": 7.549468579297985e-06,
      "loss": 0.0276,
      "step": 763800
    },
    {
      "epoch": 1.2500081826096634,
      "grad_norm": 0.2367628663778305,
      "learning_rate": 7.549402687084468e-06,
      "loss": 0.013,
      "step": 763820
    },
    {
      "epoch": 1.2500409130483168,
      "grad_norm": 0.7689920663833618,
      "learning_rate": 7.549336794870951e-06,
      "loss": 0.0175,
      "step": 763840
    },
    {
      "epoch": 1.25007364348697,
      "grad_norm": 0.5576976537704468,
      "learning_rate": 7.549270902657434e-06,
      "loss": 0.0202,
      "step": 763860
    },
    {
      "epoch": 1.2501063739256233,
      "grad_norm": 0.6141788959503174,
      "learning_rate": 7.549205010443916e-06,
      "loss": 0.0189,
      "step": 763880
    },
    {
      "epoch": 1.2501391043642767,
      "grad_norm": 1.0340970754623413,
      "learning_rate": 7.5491391182304e-06,
      "loss": 0.0196,
      "step": 763900
    },
    {
      "epoch": 1.25017183480293,
      "grad_norm": 0.7811159491539001,
      "learning_rate": 7.549073226016882e-06,
      "loss": 0.018,
      "step": 763920
    },
    {
      "epoch": 1.2502045652415834,
      "grad_norm": 0.6452987790107727,
      "learning_rate": 7.549007333803365e-06,
      "loss": 0.0256,
      "step": 763940
    },
    {
      "epoch": 1.2502372956802368,
      "grad_norm": 0.7339621782302856,
      "learning_rate": 7.548941441589848e-06,
      "loss": 0.02,
      "step": 763960
    },
    {
      "epoch": 1.2502700261188902,
      "grad_norm": 1.954775333404541,
      "learning_rate": 7.548875549376331e-06,
      "loss": 0.0317,
      "step": 763980
    },
    {
      "epoch": 1.2503027565575433,
      "grad_norm": 0.49002811312675476,
      "learning_rate": 7.5488096571628135e-06,
      "loss": 0.0128,
      "step": 764000
    },
    {
      "epoch": 1.2503354869961967,
      "grad_norm": 0.9325770735740662,
      "learning_rate": 7.548743764949297e-06,
      "loss": 0.0227,
      "step": 764020
    },
    {
      "epoch": 1.25036821743485,
      "grad_norm": 0.8751486539840698,
      "learning_rate": 7.548677872735779e-06,
      "loss": 0.0302,
      "step": 764040
    },
    {
      "epoch": 1.2504009478735034,
      "grad_norm": 0.44142958521842957,
      "learning_rate": 7.548611980522263e-06,
      "loss": 0.0182,
      "step": 764060
    },
    {
      "epoch": 1.2504336783121568,
      "grad_norm": 0.9989753365516663,
      "learning_rate": 7.5485460883087445e-06,
      "loss": 0.0201,
      "step": 764080
    },
    {
      "epoch": 1.25046640875081,
      "grad_norm": 0.2195502668619156,
      "learning_rate": 7.548480196095228e-06,
      "loss": 0.0215,
      "step": 764100
    },
    {
      "epoch": 1.2504991391894635,
      "grad_norm": 0.45289695262908936,
      "learning_rate": 7.54841430388171e-06,
      "loss": 0.0178,
      "step": 764120
    },
    {
      "epoch": 1.2505318696281167,
      "grad_norm": 0.8500657081604004,
      "learning_rate": 7.5483484116681936e-06,
      "loss": 0.0181,
      "step": 764140
    },
    {
      "epoch": 1.25056460006677,
      "grad_norm": 0.3719941973686218,
      "learning_rate": 7.548282519454677e-06,
      "loss": 0.024,
      "step": 764160
    },
    {
      "epoch": 1.2505973305054234,
      "grad_norm": 0.2008422613143921,
      "learning_rate": 7.548216627241159e-06,
      "loss": 0.0219,
      "step": 764180
    },
    {
      "epoch": 1.2506300609440768,
      "grad_norm": 0.5455357432365417,
      "learning_rate": 7.548150735027643e-06,
      "loss": 0.0311,
      "step": 764200
    },
    {
      "epoch": 1.2506627913827302,
      "grad_norm": 0.8455584645271301,
      "learning_rate": 7.548084842814125e-06,
      "loss": 0.0297,
      "step": 764220
    },
    {
      "epoch": 1.2506955218213833,
      "grad_norm": 0.40862372517585754,
      "learning_rate": 7.548018950600608e-06,
      "loss": 0.0267,
      "step": 764240
    },
    {
      "epoch": 1.250728252260037,
      "grad_norm": 2.7893264293670654,
      "learning_rate": 7.547953058387091e-06,
      "loss": 0.0326,
      "step": 764260
    },
    {
      "epoch": 1.25076098269869,
      "grad_norm": 0.43169379234313965,
      "learning_rate": 7.5478871661735744e-06,
      "loss": 0.0215,
      "step": 764280
    },
    {
      "epoch": 1.2507937131373434,
      "grad_norm": 0.2769782245159149,
      "learning_rate": 7.547821273960056e-06,
      "loss": 0.0211,
      "step": 764300
    },
    {
      "epoch": 1.2508264435759968,
      "grad_norm": 0.4218030571937561,
      "learning_rate": 7.54775538174654e-06,
      "loss": 0.013,
      "step": 764320
    },
    {
      "epoch": 1.2508591740146502,
      "grad_norm": 0.5744367837905884,
      "learning_rate": 7.547689489533022e-06,
      "loss": 0.0273,
      "step": 764340
    },
    {
      "epoch": 1.2508919044533036,
      "grad_norm": 0.8854097127914429,
      "learning_rate": 7.547623597319505e-06,
      "loss": 0.0302,
      "step": 764360
    },
    {
      "epoch": 1.2509246348919567,
      "grad_norm": 0.8563480973243713,
      "learning_rate": 7.547557705105988e-06,
      "loss": 0.0183,
      "step": 764380
    },
    {
      "epoch": 1.25095736533061,
      "grad_norm": 0.7135099172592163,
      "learning_rate": 7.547491812892471e-06,
      "loss": 0.0186,
      "step": 764400
    },
    {
      "epoch": 1.2509900957692635,
      "grad_norm": 0.543882429599762,
      "learning_rate": 7.547425920678954e-06,
      "loss": 0.0218,
      "step": 764420
    },
    {
      "epoch": 1.2510228262079168,
      "grad_norm": 0.6180528998374939,
      "learning_rate": 7.547360028465437e-06,
      "loss": 0.0266,
      "step": 764440
    },
    {
      "epoch": 1.2510555566465702,
      "grad_norm": 0.2394043654203415,
      "learning_rate": 7.547294136251919e-06,
      "loss": 0.0195,
      "step": 764460
    },
    {
      "epoch": 1.2510882870852236,
      "grad_norm": 0.18712517619132996,
      "learning_rate": 7.547228244038403e-06,
      "loss": 0.0282,
      "step": 764480
    },
    {
      "epoch": 1.251121017523877,
      "grad_norm": 0.5939145088195801,
      "learning_rate": 7.547162351824886e-06,
      "loss": 0.0305,
      "step": 764500
    },
    {
      "epoch": 1.25115374796253,
      "grad_norm": 0.42953482270240784,
      "learning_rate": 7.547096459611368e-06,
      "loss": 0.0167,
      "step": 764520
    },
    {
      "epoch": 1.2511864784011835,
      "grad_norm": 0.4754510223865509,
      "learning_rate": 7.547030567397852e-06,
      "loss": 0.0218,
      "step": 764540
    },
    {
      "epoch": 1.2512192088398368,
      "grad_norm": 0.4089893102645874,
      "learning_rate": 7.546964675184334e-06,
      "loss": 0.0152,
      "step": 764560
    },
    {
      "epoch": 1.2512519392784902,
      "grad_norm": 1.523807168006897,
      "learning_rate": 7.546898782970817e-06,
      "loss": 0.0298,
      "step": 764580
    },
    {
      "epoch": 1.2512846697171436,
      "grad_norm": 0.5472257733345032,
      "learning_rate": 7.5468328907573e-06,
      "loss": 0.0287,
      "step": 764600
    },
    {
      "epoch": 1.251317400155797,
      "grad_norm": 0.28130272030830383,
      "learning_rate": 7.546766998543783e-06,
      "loss": 0.0199,
      "step": 764620
    },
    {
      "epoch": 1.2513501305944503,
      "grad_norm": 0.5349757671356201,
      "learning_rate": 7.5467011063302654e-06,
      "loss": 0.0203,
      "step": 764640
    },
    {
      "epoch": 1.2513828610331035,
      "grad_norm": 0.6853747367858887,
      "learning_rate": 7.546635214116749e-06,
      "loss": 0.0173,
      "step": 764660
    },
    {
      "epoch": 1.2514155914717568,
      "grad_norm": 4.24392032623291,
      "learning_rate": 7.546569321903231e-06,
      "loss": 0.0218,
      "step": 764680
    },
    {
      "epoch": 1.2514483219104102,
      "grad_norm": 0.14297975599765778,
      "learning_rate": 7.5465034296897145e-06,
      "loss": 0.0229,
      "step": 764700
    },
    {
      "epoch": 1.2514810523490636,
      "grad_norm": 0.9846577644348145,
      "learning_rate": 7.546437537476196e-06,
      "loss": 0.0154,
      "step": 764720
    },
    {
      "epoch": 1.251513782787717,
      "grad_norm": 0.7363924980163574,
      "learning_rate": 7.54637164526268e-06,
      "loss": 0.0393,
      "step": 764740
    },
    {
      "epoch": 1.2515465132263703,
      "grad_norm": 1.0784823894500732,
      "learning_rate": 7.546305753049163e-06,
      "loss": 0.0278,
      "step": 764760
    },
    {
      "epoch": 1.2515792436650237,
      "grad_norm": 0.33800917863845825,
      "learning_rate": 7.5462398608356455e-06,
      "loss": 0.0225,
      "step": 764780
    },
    {
      "epoch": 1.2516119741036769,
      "grad_norm": 0.8925318121910095,
      "learning_rate": 7.546173968622128e-06,
      "loss": 0.0163,
      "step": 764800
    },
    {
      "epoch": 1.2516447045423302,
      "grad_norm": 0.13999749720096588,
      "learning_rate": 7.546108076408612e-06,
      "loss": 0.0248,
      "step": 764820
    },
    {
      "epoch": 1.2516774349809836,
      "grad_norm": 0.9416157007217407,
      "learning_rate": 7.5460421841950945e-06,
      "loss": 0.0223,
      "step": 764840
    },
    {
      "epoch": 1.251710165419637,
      "grad_norm": 0.32096171379089355,
      "learning_rate": 7.545976291981577e-06,
      "loss": 0.0217,
      "step": 764860
    },
    {
      "epoch": 1.2517428958582903,
      "grad_norm": 0.6671587824821472,
      "learning_rate": 7.545910399768061e-06,
      "loss": 0.0148,
      "step": 764880
    },
    {
      "epoch": 1.2517756262969435,
      "grad_norm": 0.6337252259254456,
      "learning_rate": 7.545844507554543e-06,
      "loss": 0.0288,
      "step": 764900
    },
    {
      "epoch": 1.251808356735597,
      "grad_norm": 0.25180017948150635,
      "learning_rate": 7.545778615341026e-06,
      "loss": 0.0244,
      "step": 764920
    },
    {
      "epoch": 1.2518410871742502,
      "grad_norm": 0.7332852482795715,
      "learning_rate": 7.545712723127508e-06,
      "loss": 0.0278,
      "step": 764940
    },
    {
      "epoch": 1.2518738176129036,
      "grad_norm": 0.7170489430427551,
      "learning_rate": 7.545646830913992e-06,
      "loss": 0.0196,
      "step": 764960
    },
    {
      "epoch": 1.251906548051557,
      "grad_norm": 0.47936883568763733,
      "learning_rate": 7.5455809387004745e-06,
      "loss": 0.0205,
      "step": 764980
    },
    {
      "epoch": 1.2519392784902104,
      "grad_norm": 1.064391851425171,
      "learning_rate": 7.545515046486957e-06,
      "loss": 0.0186,
      "step": 765000
    },
    {
      "epoch": 1.2519720089288637,
      "grad_norm": 0.24044716358184814,
      "learning_rate": 7.54544915427344e-06,
      "loss": 0.0156,
      "step": 765020
    },
    {
      "epoch": 1.2520047393675169,
      "grad_norm": 1.0401722192764282,
      "learning_rate": 7.545383262059924e-06,
      "loss": 0.02,
      "step": 765040
    },
    {
      "epoch": 1.2520374698061705,
      "grad_norm": 0.8916469812393188,
      "learning_rate": 7.5453173698464055e-06,
      "loss": 0.0281,
      "step": 765060
    },
    {
      "epoch": 1.2520702002448236,
      "grad_norm": 0.32896146178245544,
      "learning_rate": 7.545251477632889e-06,
      "loss": 0.019,
      "step": 765080
    },
    {
      "epoch": 1.252102930683477,
      "grad_norm": 0.9174762964248657,
      "learning_rate": 7.545185585419371e-06,
      "loss": 0.0271,
      "step": 765100
    },
    {
      "epoch": 1.2521356611221304,
      "grad_norm": 0.30412137508392334,
      "learning_rate": 7.5451196932058546e-06,
      "loss": 0.0235,
      "step": 765120
    },
    {
      "epoch": 1.2521683915607837,
      "grad_norm": 0.269318550825119,
      "learning_rate": 7.5450538009923365e-06,
      "loss": 0.0198,
      "step": 765140
    },
    {
      "epoch": 1.2522011219994371,
      "grad_norm": 0.3937569856643677,
      "learning_rate": 7.54498790877882e-06,
      "loss": 0.0209,
      "step": 765160
    },
    {
      "epoch": 1.2522338524380903,
      "grad_norm": 0.25792139768600464,
      "learning_rate": 7.544922016565303e-06,
      "loss": 0.0208,
      "step": 765180
    },
    {
      "epoch": 1.2522665828767436,
      "grad_norm": 1.1363921165466309,
      "learning_rate": 7.5448561243517855e-06,
      "loss": 0.0253,
      "step": 765200
    },
    {
      "epoch": 1.252299313315397,
      "grad_norm": 0.27885207533836365,
      "learning_rate": 7.544790232138269e-06,
      "loss": 0.0211,
      "step": 765220
    },
    {
      "epoch": 1.2523320437540504,
      "grad_norm": 0.3516737222671509,
      "learning_rate": 7.544724339924752e-06,
      "loss": 0.0189,
      "step": 765240
    },
    {
      "epoch": 1.2523647741927038,
      "grad_norm": 1.5159989595413208,
      "learning_rate": 7.544658447711235e-06,
      "loss": 0.0179,
      "step": 765260
    },
    {
      "epoch": 1.2523975046313571,
      "grad_norm": 0.4097444415092468,
      "learning_rate": 7.544592555497717e-06,
      "loss": 0.0207,
      "step": 765280
    },
    {
      "epoch": 1.2524302350700105,
      "grad_norm": 3.286268949508667,
      "learning_rate": 7.544526663284201e-06,
      "loss": 0.0221,
      "step": 765300
    },
    {
      "epoch": 1.2524629655086637,
      "grad_norm": 0.5976043939590454,
      "learning_rate": 7.544460771070683e-06,
      "loss": 0.0217,
      "step": 765320
    },
    {
      "epoch": 1.252495695947317,
      "grad_norm": 0.42215806245803833,
      "learning_rate": 7.544394878857166e-06,
      "loss": 0.0158,
      "step": 765340
    },
    {
      "epoch": 1.2525284263859704,
      "grad_norm": 0.49705827236175537,
      "learning_rate": 7.544328986643648e-06,
      "loss": 0.0196,
      "step": 765360
    },
    {
      "epoch": 1.2525611568246238,
      "grad_norm": 0.5675486922264099,
      "learning_rate": 7.544263094430132e-06,
      "loss": 0.0224,
      "step": 765380
    },
    {
      "epoch": 1.2525938872632771,
      "grad_norm": 0.07505550980567932,
      "learning_rate": 7.544197202216615e-06,
      "loss": 0.0263,
      "step": 765400
    },
    {
      "epoch": 1.2526266177019305,
      "grad_norm": 0.9563736915588379,
      "learning_rate": 7.544131310003097e-06,
      "loss": 0.0236,
      "step": 765420
    },
    {
      "epoch": 1.2526593481405839,
      "grad_norm": 0.6012906432151794,
      "learning_rate": 7.54406541778958e-06,
      "loss": 0.0198,
      "step": 765440
    },
    {
      "epoch": 1.252692078579237,
      "grad_norm": 0.7896844744682312,
      "learning_rate": 7.543999525576064e-06,
      "loss": 0.0205,
      "step": 765460
    },
    {
      "epoch": 1.2527248090178904,
      "grad_norm": 2.44201397895813,
      "learning_rate": 7.543933633362546e-06,
      "loss": 0.02,
      "step": 765480
    },
    {
      "epoch": 1.2527575394565438,
      "grad_norm": 0.5809904932975769,
      "learning_rate": 7.543867741149029e-06,
      "loss": 0.0243,
      "step": 765500
    },
    {
      "epoch": 1.2527902698951972,
      "grad_norm": 1.1180622577667236,
      "learning_rate": 7.543801848935511e-06,
      "loss": 0.0224,
      "step": 765520
    },
    {
      "epoch": 1.2528230003338505,
      "grad_norm": 0.4490604102611542,
      "learning_rate": 7.543735956721995e-06,
      "loss": 0.0222,
      "step": 765540
    },
    {
      "epoch": 1.252855730772504,
      "grad_norm": 1.015881061553955,
      "learning_rate": 7.543670064508478e-06,
      "loss": 0.0232,
      "step": 765560
    },
    {
      "epoch": 1.2528884612111573,
      "grad_norm": 1.9290825128555298,
      "learning_rate": 7.54360417229496e-06,
      "loss": 0.0252,
      "step": 765580
    },
    {
      "epoch": 1.2529211916498104,
      "grad_norm": 0.29525822401046753,
      "learning_rate": 7.543538280081444e-06,
      "loss": 0.0237,
      "step": 765600
    },
    {
      "epoch": 1.2529539220884638,
      "grad_norm": 0.5068022608757019,
      "learning_rate": 7.5434723878679265e-06,
      "loss": 0.026,
      "step": 765620
    },
    {
      "epoch": 1.2529866525271172,
      "grad_norm": 1.105429768562317,
      "learning_rate": 7.543406495654409e-06,
      "loss": 0.0202,
      "step": 765640
    },
    {
      "epoch": 1.2530193829657705,
      "grad_norm": 0.5121554136276245,
      "learning_rate": 7.543340603440892e-06,
      "loss": 0.0216,
      "step": 765660
    },
    {
      "epoch": 1.253052113404424,
      "grad_norm": 0.8128946423530579,
      "learning_rate": 7.5432747112273755e-06,
      "loss": 0.0203,
      "step": 765680
    },
    {
      "epoch": 1.253084843843077,
      "grad_norm": 0.8187054395675659,
      "learning_rate": 7.543208819013857e-06,
      "loss": 0.0219,
      "step": 765700
    },
    {
      "epoch": 1.2531175742817307,
      "grad_norm": 0.2435309886932373,
      "learning_rate": 7.543142926800341e-06,
      "loss": 0.0234,
      "step": 765720
    },
    {
      "epoch": 1.2531503047203838,
      "grad_norm": 1.5198791027069092,
      "learning_rate": 7.543077034586823e-06,
      "loss": 0.0259,
      "step": 765740
    },
    {
      "epoch": 1.2531830351590372,
      "grad_norm": 0.32550379633903503,
      "learning_rate": 7.5430111423733065e-06,
      "loss": 0.0279,
      "step": 765760
    },
    {
      "epoch": 1.2532157655976905,
      "grad_norm": 0.9063895344734192,
      "learning_rate": 7.542945250159789e-06,
      "loss": 0.0231,
      "step": 765780
    },
    {
      "epoch": 1.253248496036344,
      "grad_norm": 0.18310096859931946,
      "learning_rate": 7.542879357946272e-06,
      "loss": 0.0177,
      "step": 765800
    },
    {
      "epoch": 1.2532812264749973,
      "grad_norm": 1.3657437562942505,
      "learning_rate": 7.542813465732755e-06,
      "loss": 0.021,
      "step": 765820
    },
    {
      "epoch": 1.2533139569136504,
      "grad_norm": 0.4129139482975006,
      "learning_rate": 7.542747573519238e-06,
      "loss": 0.0218,
      "step": 765840
    },
    {
      "epoch": 1.253346687352304,
      "grad_norm": 0.7952415943145752,
      "learning_rate": 7.54268168130572e-06,
      "loss": 0.0295,
      "step": 765860
    },
    {
      "epoch": 1.2533794177909572,
      "grad_norm": 0.6252158880233765,
      "learning_rate": 7.542615789092204e-06,
      "loss": 0.0234,
      "step": 765880
    },
    {
      "epoch": 1.2534121482296106,
      "grad_norm": 2.265122413635254,
      "learning_rate": 7.542549896878687e-06,
      "loss": 0.036,
      "step": 765900
    },
    {
      "epoch": 1.253444878668264,
      "grad_norm": 0.3758848309516907,
      "learning_rate": 7.542484004665169e-06,
      "loss": 0.0203,
      "step": 765920
    },
    {
      "epoch": 1.2534776091069173,
      "grad_norm": 0.46999120712280273,
      "learning_rate": 7.542418112451653e-06,
      "loss": 0.022,
      "step": 765940
    },
    {
      "epoch": 1.2535103395455707,
      "grad_norm": 0.09871331602334976,
      "learning_rate": 7.542352220238135e-06,
      "loss": 0.0158,
      "step": 765960
    },
    {
      "epoch": 1.2535430699842238,
      "grad_norm": 0.6929445266723633,
      "learning_rate": 7.542286328024618e-06,
      "loss": 0.0297,
      "step": 765980
    },
    {
      "epoch": 1.2535758004228772,
      "grad_norm": 2.4760940074920654,
      "learning_rate": 7.542220435811101e-06,
      "loss": 0.0209,
      "step": 766000
    },
    {
      "epoch": 1.2536085308615306,
      "grad_norm": 0.2094949185848236,
      "learning_rate": 7.542154543597584e-06,
      "loss": 0.026,
      "step": 766020
    },
    {
      "epoch": 1.253641261300184,
      "grad_norm": 0.49693477153778076,
      "learning_rate": 7.5420886513840665e-06,
      "loss": 0.0269,
      "step": 766040
    },
    {
      "epoch": 1.2536739917388373,
      "grad_norm": 0.5433644652366638,
      "learning_rate": 7.54202275917055e-06,
      "loss": 0.016,
      "step": 766060
    },
    {
      "epoch": 1.2537067221774907,
      "grad_norm": 0.728534996509552,
      "learning_rate": 7.541956866957032e-06,
      "loss": 0.0153,
      "step": 766080
    },
    {
      "epoch": 1.253739452616144,
      "grad_norm": 0.4997301995754242,
      "learning_rate": 7.541890974743516e-06,
      "loss": 0.0233,
      "step": 766100
    },
    {
      "epoch": 1.2537721830547972,
      "grad_norm": 0.6344456076622009,
      "learning_rate": 7.5418250825299975e-06,
      "loss": 0.0309,
      "step": 766120
    },
    {
      "epoch": 1.2538049134934506,
      "grad_norm": 0.3601965308189392,
      "learning_rate": 7.541759190316481e-06,
      "loss": 0.0235,
      "step": 766140
    },
    {
      "epoch": 1.253837643932104,
      "grad_norm": 0.25750428438186646,
      "learning_rate": 7.541693298102963e-06,
      "loss": 0.0149,
      "step": 766160
    },
    {
      "epoch": 1.2538703743707573,
      "grad_norm": 0.2629392147064209,
      "learning_rate": 7.5416274058894466e-06,
      "loss": 0.0177,
      "step": 766180
    },
    {
      "epoch": 1.2539031048094107,
      "grad_norm": 0.9360063672065735,
      "learning_rate": 7.541561513675929e-06,
      "loss": 0.0207,
      "step": 766200
    },
    {
      "epoch": 1.253935835248064,
      "grad_norm": 0.7855148911476135,
      "learning_rate": 7.541495621462412e-06,
      "loss": 0.0285,
      "step": 766220
    },
    {
      "epoch": 1.2539685656867174,
      "grad_norm": 0.912385106086731,
      "learning_rate": 7.541429729248896e-06,
      "loss": 0.0139,
      "step": 766240
    },
    {
      "epoch": 1.2540012961253706,
      "grad_norm": 0.5032590627670288,
      "learning_rate": 7.541363837035378e-06,
      "loss": 0.0224,
      "step": 766260
    },
    {
      "epoch": 1.254034026564024,
      "grad_norm": 1.4020216464996338,
      "learning_rate": 7.541297944821861e-06,
      "loss": 0.0202,
      "step": 766280
    },
    {
      "epoch": 1.2540667570026773,
      "grad_norm": 0.4417886734008789,
      "learning_rate": 7.541232052608344e-06,
      "loss": 0.0234,
      "step": 766300
    },
    {
      "epoch": 1.2540994874413307,
      "grad_norm": 0.3038454055786133,
      "learning_rate": 7.5411661603948274e-06,
      "loss": 0.0226,
      "step": 766320
    },
    {
      "epoch": 1.254132217879984,
      "grad_norm": 0.535915195941925,
      "learning_rate": 7.541100268181309e-06,
      "loss": 0.0291,
      "step": 766340
    },
    {
      "epoch": 1.2541649483186372,
      "grad_norm": 0.1563408374786377,
      "learning_rate": 7.541034375967793e-06,
      "loss": 0.0307,
      "step": 766360
    },
    {
      "epoch": 1.2541976787572908,
      "grad_norm": 0.23720486462116241,
      "learning_rate": 7.540968483754275e-06,
      "loss": 0.0213,
      "step": 766380
    },
    {
      "epoch": 1.254230409195944,
      "grad_norm": 0.37523555755615234,
      "learning_rate": 7.540902591540758e-06,
      "loss": 0.0243,
      "step": 766400
    },
    {
      "epoch": 1.2542631396345973,
      "grad_norm": 0.4947027266025543,
      "learning_rate": 7.540836699327241e-06,
      "loss": 0.0135,
      "step": 766420
    },
    {
      "epoch": 1.2542958700732507,
      "grad_norm": 0.49495452642440796,
      "learning_rate": 7.540770807113724e-06,
      "loss": 0.0281,
      "step": 766440
    },
    {
      "epoch": 1.254328600511904,
      "grad_norm": 0.15784437954425812,
      "learning_rate": 7.540704914900207e-06,
      "loss": 0.0104,
      "step": 766460
    },
    {
      "epoch": 1.2543613309505575,
      "grad_norm": 2.1976568698883057,
      "learning_rate": 7.54063902268669e-06,
      "loss": 0.0193,
      "step": 766480
    },
    {
      "epoch": 1.2543940613892106,
      "grad_norm": 0.5173406004905701,
      "learning_rate": 7.540573130473172e-06,
      "loss": 0.019,
      "step": 766500
    },
    {
      "epoch": 1.2544267918278642,
      "grad_norm": 0.9487687349319458,
      "learning_rate": 7.540507238259656e-06,
      "loss": 0.0237,
      "step": 766520
    },
    {
      "epoch": 1.2544595222665174,
      "grad_norm": 2.4029455184936523,
      "learning_rate": 7.5404413460461376e-06,
      "loss": 0.025,
      "step": 766540
    },
    {
      "epoch": 1.2544922527051707,
      "grad_norm": 0.18997593224048615,
      "learning_rate": 7.540375453832621e-06,
      "loss": 0.0209,
      "step": 766560
    },
    {
      "epoch": 1.254524983143824,
      "grad_norm": 0.877310574054718,
      "learning_rate": 7.540309561619104e-06,
      "loss": 0.0238,
      "step": 766580
    },
    {
      "epoch": 1.2545577135824775,
      "grad_norm": 1.5006605386734009,
      "learning_rate": 7.540243669405587e-06,
      "loss": 0.0263,
      "step": 766600
    },
    {
      "epoch": 1.2545904440211308,
      "grad_norm": 0.6184774041175842,
      "learning_rate": 7.54017777719207e-06,
      "loss": 0.029,
      "step": 766620
    },
    {
      "epoch": 1.254623174459784,
      "grad_norm": 1.578677773475647,
      "learning_rate": 7.540111884978553e-06,
      "loss": 0.0334,
      "step": 766640
    },
    {
      "epoch": 1.2546559048984374,
      "grad_norm": 0.34218382835388184,
      "learning_rate": 7.540045992765036e-06,
      "loss": 0.0447,
      "step": 766660
    },
    {
      "epoch": 1.2546886353370907,
      "grad_norm": 0.43534716963768005,
      "learning_rate": 7.5399801005515184e-06,
      "loss": 0.0231,
      "step": 766680
    },
    {
      "epoch": 1.2547213657757441,
      "grad_norm": 0.2081255167722702,
      "learning_rate": 7.539914208338002e-06,
      "loss": 0.0246,
      "step": 766700
    },
    {
      "epoch": 1.2547540962143975,
      "grad_norm": 0.7708384990692139,
      "learning_rate": 7.539848316124484e-06,
      "loss": 0.0275,
      "step": 766720
    },
    {
      "epoch": 1.2547868266530509,
      "grad_norm": 1.1658556461334229,
      "learning_rate": 7.5397824239109675e-06,
      "loss": 0.0165,
      "step": 766740
    },
    {
      "epoch": 1.2548195570917042,
      "grad_norm": 0.634594738483429,
      "learning_rate": 7.539716531697449e-06,
      "loss": 0.0213,
      "step": 766760
    },
    {
      "epoch": 1.2548522875303574,
      "grad_norm": 0.3561919033527374,
      "learning_rate": 7.539650639483933e-06,
      "loss": 0.0226,
      "step": 766780
    },
    {
      "epoch": 1.2548850179690108,
      "grad_norm": 1.8040724992752075,
      "learning_rate": 7.539584747270416e-06,
      "loss": 0.0184,
      "step": 766800
    },
    {
      "epoch": 1.2549177484076641,
      "grad_norm": 0.4982930123806,
      "learning_rate": 7.5395188550568985e-06,
      "loss": 0.023,
      "step": 766820
    },
    {
      "epoch": 1.2549504788463175,
      "grad_norm": 0.20849503576755524,
      "learning_rate": 7.539452962843381e-06,
      "loss": 0.0253,
      "step": 766840
    },
    {
      "epoch": 1.2549832092849709,
      "grad_norm": 0.4040758013725281,
      "learning_rate": 7.539387070629865e-06,
      "loss": 0.0267,
      "step": 766860
    },
    {
      "epoch": 1.2550159397236242,
      "grad_norm": 0.9131127595901489,
      "learning_rate": 7.539321178416347e-06,
      "loss": 0.0193,
      "step": 766880
    },
    {
      "epoch": 1.2550486701622776,
      "grad_norm": 1.3065060377120972,
      "learning_rate": 7.53925528620283e-06,
      "loss": 0.026,
      "step": 766900
    },
    {
      "epoch": 1.2550814006009308,
      "grad_norm": 2.1309945583343506,
      "learning_rate": 7.539189393989312e-06,
      "loss": 0.0236,
      "step": 766920
    },
    {
      "epoch": 1.2551141310395841,
      "grad_norm": 0.567525327205658,
      "learning_rate": 7.539123501775796e-06,
      "loss": 0.0225,
      "step": 766940
    },
    {
      "epoch": 1.2551468614782375,
      "grad_norm": 0.5646364688873291,
      "learning_rate": 7.539057609562279e-06,
      "loss": 0.0249,
      "step": 766960
    },
    {
      "epoch": 1.2551795919168909,
      "grad_norm": 0.37108418345451355,
      "learning_rate": 7.538991717348761e-06,
      "loss": 0.0144,
      "step": 766980
    },
    {
      "epoch": 1.2552123223555443,
      "grad_norm": 0.4484381377696991,
      "learning_rate": 7.538925825135245e-06,
      "loss": 0.0322,
      "step": 767000
    },
    {
      "epoch": 1.2552450527941976,
      "grad_norm": 9.043203353881836,
      "learning_rate": 7.538859932921727e-06,
      "loss": 0.0196,
      "step": 767020
    },
    {
      "epoch": 1.255277783232851,
      "grad_norm": 1.6565051078796387,
      "learning_rate": 7.53879404070821e-06,
      "loss": 0.0264,
      "step": 767040
    },
    {
      "epoch": 1.2553105136715041,
      "grad_norm": 0.4121279716491699,
      "learning_rate": 7.538728148494693e-06,
      "loss": 0.0193,
      "step": 767060
    },
    {
      "epoch": 1.2553432441101575,
      "grad_norm": 0.1745433509349823,
      "learning_rate": 7.538662256281177e-06,
      "loss": 0.0177,
      "step": 767080
    },
    {
      "epoch": 1.255375974548811,
      "grad_norm": 0.8383409976959229,
      "learning_rate": 7.5385963640676585e-06,
      "loss": 0.0285,
      "step": 767100
    },
    {
      "epoch": 1.2554087049874643,
      "grad_norm": 0.353139191865921,
      "learning_rate": 7.538530471854142e-06,
      "loss": 0.0174,
      "step": 767120
    },
    {
      "epoch": 1.2554414354261176,
      "grad_norm": 2.019414186477661,
      "learning_rate": 7.538464579640624e-06,
      "loss": 0.0176,
      "step": 767140
    },
    {
      "epoch": 1.2554741658647708,
      "grad_norm": 0.9454387426376343,
      "learning_rate": 7.538398687427108e-06,
      "loss": 0.0213,
      "step": 767160
    },
    {
      "epoch": 1.2555068963034244,
      "grad_norm": 0.3335168659687042,
      "learning_rate": 7.5383327952135895e-06,
      "loss": 0.0245,
      "step": 767180
    },
    {
      "epoch": 1.2555396267420775,
      "grad_norm": 0.2998516261577606,
      "learning_rate": 7.538266903000073e-06,
      "loss": 0.0193,
      "step": 767200
    },
    {
      "epoch": 1.255572357180731,
      "grad_norm": 0.8335081934928894,
      "learning_rate": 7.538201010786556e-06,
      "loss": 0.0207,
      "step": 767220
    },
    {
      "epoch": 1.2556050876193843,
      "grad_norm": 0.06810543686151505,
      "learning_rate": 7.5381351185730385e-06,
      "loss": 0.0245,
      "step": 767240
    },
    {
      "epoch": 1.2556378180580376,
      "grad_norm": 0.5291006565093994,
      "learning_rate": 7.538069226359521e-06,
      "loss": 0.0219,
      "step": 767260
    },
    {
      "epoch": 1.255670548496691,
      "grad_norm": 0.3636409342288971,
      "learning_rate": 7.538003334146005e-06,
      "loss": 0.0339,
      "step": 767280
    },
    {
      "epoch": 1.2557032789353442,
      "grad_norm": 0.26346081495285034,
      "learning_rate": 7.537937441932488e-06,
      "loss": 0.0233,
      "step": 767300
    },
    {
      "epoch": 1.2557360093739978,
      "grad_norm": 0.8082184791564941,
      "learning_rate": 7.53787154971897e-06,
      "loss": 0.0343,
      "step": 767320
    },
    {
      "epoch": 1.255768739812651,
      "grad_norm": 0.7311208248138428,
      "learning_rate": 7.537805657505454e-06,
      "loss": 0.0274,
      "step": 767340
    },
    {
      "epoch": 1.2558014702513043,
      "grad_norm": 3.1531550884246826,
      "learning_rate": 7.537739765291936e-06,
      "loss": 0.0235,
      "step": 767360
    },
    {
      "epoch": 1.2558342006899577,
      "grad_norm": 1.2438716888427734,
      "learning_rate": 7.537673873078419e-06,
      "loss": 0.0275,
      "step": 767380
    },
    {
      "epoch": 1.255866931128611,
      "grad_norm": 0.4191263020038605,
      "learning_rate": 7.537607980864901e-06,
      "loss": 0.018,
      "step": 767400
    },
    {
      "epoch": 1.2558996615672644,
      "grad_norm": 0.5636884570121765,
      "learning_rate": 7.537542088651385e-06,
      "loss": 0.0235,
      "step": 767420
    },
    {
      "epoch": 1.2559323920059176,
      "grad_norm": 0.32416823506355286,
      "learning_rate": 7.537476196437868e-06,
      "loss": 0.0183,
      "step": 767440
    },
    {
      "epoch": 1.255965122444571,
      "grad_norm": 0.8678059577941895,
      "learning_rate": 7.53741030422435e-06,
      "loss": 0.0279,
      "step": 767460
    },
    {
      "epoch": 1.2559978528832243,
      "grad_norm": 0.2908113896846771,
      "learning_rate": 7.537344412010833e-06,
      "loss": 0.0194,
      "step": 767480
    },
    {
      "epoch": 1.2560305833218777,
      "grad_norm": 1.3418312072753906,
      "learning_rate": 7.537278519797317e-06,
      "loss": 0.031,
      "step": 767500
    },
    {
      "epoch": 1.256063313760531,
      "grad_norm": 0.748727023601532,
      "learning_rate": 7.537212627583799e-06,
      "loss": 0.0309,
      "step": 767520
    },
    {
      "epoch": 1.2560960441991844,
      "grad_norm": 0.6224284172058105,
      "learning_rate": 7.537146735370282e-06,
      "loss": 0.017,
      "step": 767540
    },
    {
      "epoch": 1.2561287746378378,
      "grad_norm": 1.5941203832626343,
      "learning_rate": 7.537080843156764e-06,
      "loss": 0.0217,
      "step": 767560
    },
    {
      "epoch": 1.256161505076491,
      "grad_norm": 1.1520096063613892,
      "learning_rate": 7.537014950943248e-06,
      "loss": 0.0223,
      "step": 767580
    },
    {
      "epoch": 1.2561942355151443,
      "grad_norm": 0.651444673538208,
      "learning_rate": 7.53694905872973e-06,
      "loss": 0.0303,
      "step": 767600
    },
    {
      "epoch": 1.2562269659537977,
      "grad_norm": 0.3370349407196045,
      "learning_rate": 7.536883166516213e-06,
      "loss": 0.0217,
      "step": 767620
    },
    {
      "epoch": 1.256259696392451,
      "grad_norm": 0.7980194687843323,
      "learning_rate": 7.536817274302696e-06,
      "loss": 0.0209,
      "step": 767640
    },
    {
      "epoch": 1.2562924268311044,
      "grad_norm": 0.9038166403770447,
      "learning_rate": 7.5367513820891795e-06,
      "loss": 0.0154,
      "step": 767660
    },
    {
      "epoch": 1.2563251572697578,
      "grad_norm": 0.2217749059200287,
      "learning_rate": 7.536685489875662e-06,
      "loss": 0.0209,
      "step": 767680
    },
    {
      "epoch": 1.2563578877084112,
      "grad_norm": 1.329214096069336,
      "learning_rate": 7.536619597662145e-06,
      "loss": 0.0204,
      "step": 767700
    },
    {
      "epoch": 1.2563906181470643,
      "grad_norm": 0.8414857983589172,
      "learning_rate": 7.5365537054486285e-06,
      "loss": 0.0225,
      "step": 767720
    },
    {
      "epoch": 1.2564233485857177,
      "grad_norm": 0.6183570027351379,
      "learning_rate": 7.53648781323511e-06,
      "loss": 0.0277,
      "step": 767740
    },
    {
      "epoch": 1.256456079024371,
      "grad_norm": 0.7491553425788879,
      "learning_rate": 7.536421921021594e-06,
      "loss": 0.0179,
      "step": 767760
    },
    {
      "epoch": 1.2564888094630244,
      "grad_norm": 0.14533878862857819,
      "learning_rate": 7.536356028808076e-06,
      "loss": 0.0182,
      "step": 767780
    },
    {
      "epoch": 1.2565215399016778,
      "grad_norm": 2.066377639770508,
      "learning_rate": 7.5362901365945595e-06,
      "loss": 0.0299,
      "step": 767800
    },
    {
      "epoch": 1.2565542703403312,
      "grad_norm": 0.36803942918777466,
      "learning_rate": 7.536224244381042e-06,
      "loss": 0.0203,
      "step": 767820
    },
    {
      "epoch": 1.2565870007789846,
      "grad_norm": 0.8148936629295349,
      "learning_rate": 7.536158352167525e-06,
      "loss": 0.0234,
      "step": 767840
    },
    {
      "epoch": 1.2566197312176377,
      "grad_norm": 1.0995951890945435,
      "learning_rate": 7.536092459954008e-06,
      "loss": 0.0271,
      "step": 767860
    },
    {
      "epoch": 1.256652461656291,
      "grad_norm": 0.12902218103408813,
      "learning_rate": 7.536026567740491e-06,
      "loss": 0.0157,
      "step": 767880
    },
    {
      "epoch": 1.2566851920949444,
      "grad_norm": 0.7168363928794861,
      "learning_rate": 7.535960675526973e-06,
      "loss": 0.0193,
      "step": 767900
    },
    {
      "epoch": 1.2567179225335978,
      "grad_norm": 0.8663725852966309,
      "learning_rate": 7.535894783313457e-06,
      "loss": 0.0207,
      "step": 767920
    },
    {
      "epoch": 1.2567506529722512,
      "grad_norm": 0.3757583498954773,
      "learning_rate": 7.535828891099939e-06,
      "loss": 0.0202,
      "step": 767940
    },
    {
      "epoch": 1.2567833834109043,
      "grad_norm": 0.16477760672569275,
      "learning_rate": 7.535762998886422e-06,
      "loss": 0.0207,
      "step": 767960
    },
    {
      "epoch": 1.256816113849558,
      "grad_norm": 0.2097306102514267,
      "learning_rate": 7.535697106672904e-06,
      "loss": 0.02,
      "step": 767980
    },
    {
      "epoch": 1.256848844288211,
      "grad_norm": 0.889896035194397,
      "learning_rate": 7.535631214459388e-06,
      "loss": 0.0291,
      "step": 768000
    },
    {
      "epoch": 1.2568815747268645,
      "grad_norm": 0.3986178934574127,
      "learning_rate": 7.535565322245871e-06,
      "loss": 0.0265,
      "step": 768020
    },
    {
      "epoch": 1.2569143051655178,
      "grad_norm": 0.5334055423736572,
      "learning_rate": 7.535499430032353e-06,
      "loss": 0.022,
      "step": 768040
    },
    {
      "epoch": 1.2569470356041712,
      "grad_norm": 0.4561263620853424,
      "learning_rate": 7.535433537818837e-06,
      "loss": 0.021,
      "step": 768060
    },
    {
      "epoch": 1.2569797660428246,
      "grad_norm": 0.2388102263212204,
      "learning_rate": 7.5353676456053195e-06,
      "loss": 0.0202,
      "step": 768080
    },
    {
      "epoch": 1.2570124964814777,
      "grad_norm": 0.35357385873794556,
      "learning_rate": 7.535301753391802e-06,
      "loss": 0.0213,
      "step": 768100
    },
    {
      "epoch": 1.2570452269201313,
      "grad_norm": 0.5419588088989258,
      "learning_rate": 7.535235861178285e-06,
      "loss": 0.0198,
      "step": 768120
    },
    {
      "epoch": 1.2570779573587845,
      "grad_norm": 0.21832337975502014,
      "learning_rate": 7.535169968964769e-06,
      "loss": 0.0218,
      "step": 768140
    },
    {
      "epoch": 1.2571106877974378,
      "grad_norm": 0.2496790587902069,
      "learning_rate": 7.5351040767512505e-06,
      "loss": 0.0308,
      "step": 768160
    },
    {
      "epoch": 1.2571434182360912,
      "grad_norm": 0.5404677391052246,
      "learning_rate": 7.535038184537734e-06,
      "loss": 0.0307,
      "step": 768180
    },
    {
      "epoch": 1.2571761486747446,
      "grad_norm": 0.9385098814964294,
      "learning_rate": 7.534972292324216e-06,
      "loss": 0.0229,
      "step": 768200
    },
    {
      "epoch": 1.257208879113398,
      "grad_norm": 0.35234153270721436,
      "learning_rate": 7.5349064001106996e-06,
      "loss": 0.0195,
      "step": 768220
    },
    {
      "epoch": 1.257241609552051,
      "grad_norm": 0.3741462528705597,
      "learning_rate": 7.534840507897182e-06,
      "loss": 0.0274,
      "step": 768240
    },
    {
      "epoch": 1.2572743399907045,
      "grad_norm": 0.44142946600914,
      "learning_rate": 7.534774615683665e-06,
      "loss": 0.0288,
      "step": 768260
    },
    {
      "epoch": 1.2573070704293579,
      "grad_norm": 0.5509446263313293,
      "learning_rate": 7.534708723470148e-06,
      "loss": 0.0183,
      "step": 768280
    },
    {
      "epoch": 1.2573398008680112,
      "grad_norm": 0.43135470151901245,
      "learning_rate": 7.534642831256631e-06,
      "loss": 0.0293,
      "step": 768300
    },
    {
      "epoch": 1.2573725313066646,
      "grad_norm": 0.9323956370353699,
      "learning_rate": 7.534576939043113e-06,
      "loss": 0.0215,
      "step": 768320
    },
    {
      "epoch": 1.257405261745318,
      "grad_norm": 0.5825937390327454,
      "learning_rate": 7.534511046829597e-06,
      "loss": 0.0241,
      "step": 768340
    },
    {
      "epoch": 1.2574379921839713,
      "grad_norm": 0.7437251806259155,
      "learning_rate": 7.5344451546160804e-06,
      "loss": 0.0208,
      "step": 768360
    },
    {
      "epoch": 1.2574707226226245,
      "grad_norm": 0.9539486765861511,
      "learning_rate": 7.534379262402562e-06,
      "loss": 0.0195,
      "step": 768380
    },
    {
      "epoch": 1.2575034530612779,
      "grad_norm": 6.8242645263671875,
      "learning_rate": 7.534313370189046e-06,
      "loss": 0.0203,
      "step": 768400
    },
    {
      "epoch": 1.2575361834999312,
      "grad_norm": 0.9964607954025269,
      "learning_rate": 7.534247477975528e-06,
      "loss": 0.0253,
      "step": 768420
    },
    {
      "epoch": 1.2575689139385846,
      "grad_norm": 0.2728891372680664,
      "learning_rate": 7.534181585762011e-06,
      "loss": 0.0241,
      "step": 768440
    },
    {
      "epoch": 1.257601644377238,
      "grad_norm": 1.7638581991195679,
      "learning_rate": 7.534115693548494e-06,
      "loss": 0.0351,
      "step": 768460
    },
    {
      "epoch": 1.2576343748158914,
      "grad_norm": 0.21101970970630646,
      "learning_rate": 7.534049801334977e-06,
      "loss": 0.0197,
      "step": 768480
    },
    {
      "epoch": 1.2576671052545447,
      "grad_norm": 1.1628444194793701,
      "learning_rate": 7.53398390912146e-06,
      "loss": 0.0262,
      "step": 768500
    },
    {
      "epoch": 1.2576998356931979,
      "grad_norm": 0.7841575741767883,
      "learning_rate": 7.533918016907943e-06,
      "loss": 0.0263,
      "step": 768520
    },
    {
      "epoch": 1.2577325661318512,
      "grad_norm": 0.5747596621513367,
      "learning_rate": 7.533852124694425e-06,
      "loss": 0.0158,
      "step": 768540
    },
    {
      "epoch": 1.2577652965705046,
      "grad_norm": 0.6404359340667725,
      "learning_rate": 7.533786232480909e-06,
      "loss": 0.0254,
      "step": 768560
    },
    {
      "epoch": 1.257798027009158,
      "grad_norm": 0.5438997745513916,
      "learning_rate": 7.5337203402673906e-06,
      "loss": 0.0139,
      "step": 768580
    },
    {
      "epoch": 1.2578307574478114,
      "grad_norm": 0.7372135519981384,
      "learning_rate": 7.533654448053874e-06,
      "loss": 0.0228,
      "step": 768600
    },
    {
      "epoch": 1.2578634878864647,
      "grad_norm": 0.2621228098869324,
      "learning_rate": 7.533588555840357e-06,
      "loss": 0.0339,
      "step": 768620
    },
    {
      "epoch": 1.257896218325118,
      "grad_norm": 0.3140876591205597,
      "learning_rate": 7.53352266362684e-06,
      "loss": 0.0234,
      "step": 768640
    },
    {
      "epoch": 1.2579289487637713,
      "grad_norm": 0.6154201626777649,
      "learning_rate": 7.533456771413322e-06,
      "loss": 0.0351,
      "step": 768660
    },
    {
      "epoch": 1.2579616792024246,
      "grad_norm": 0.8567871451377869,
      "learning_rate": 7.533390879199806e-06,
      "loss": 0.0245,
      "step": 768680
    },
    {
      "epoch": 1.257994409641078,
      "grad_norm": 0.3539109230041504,
      "learning_rate": 7.533324986986289e-06,
      "loss": 0.0186,
      "step": 768700
    },
    {
      "epoch": 1.2580271400797314,
      "grad_norm": 0.35411912202835083,
      "learning_rate": 7.5332590947727714e-06,
      "loss": 0.0233,
      "step": 768720
    },
    {
      "epoch": 1.2580598705183847,
      "grad_norm": 0.22109825909137726,
      "learning_rate": 7.533193202559255e-06,
      "loss": 0.0226,
      "step": 768740
    },
    {
      "epoch": 1.258092600957038,
      "grad_norm": 0.48869872093200684,
      "learning_rate": 7.533127310345737e-06,
      "loss": 0.0254,
      "step": 768760
    },
    {
      "epoch": 1.2581253313956915,
      "grad_norm": 0.35146400332450867,
      "learning_rate": 7.5330614181322205e-06,
      "loss": 0.0181,
      "step": 768780
    },
    {
      "epoch": 1.2581580618343446,
      "grad_norm": 0.5671677589416504,
      "learning_rate": 7.532995525918702e-06,
      "loss": 0.0222,
      "step": 768800
    },
    {
      "epoch": 1.258190792272998,
      "grad_norm": 0.7223840951919556,
      "learning_rate": 7.532929633705186e-06,
      "loss": 0.0245,
      "step": 768820
    },
    {
      "epoch": 1.2582235227116514,
      "grad_norm": 0.19300171732902527,
      "learning_rate": 7.532863741491669e-06,
      "loss": 0.0219,
      "step": 768840
    },
    {
      "epoch": 1.2582562531503048,
      "grad_norm": 0.34126803278923035,
      "learning_rate": 7.5327978492781515e-06,
      "loss": 0.0122,
      "step": 768860
    },
    {
      "epoch": 1.2582889835889581,
      "grad_norm": 0.2815527319908142,
      "learning_rate": 7.532731957064634e-06,
      "loss": 0.0168,
      "step": 768880
    },
    {
      "epoch": 1.2583217140276113,
      "grad_norm": 0.09427080303430557,
      "learning_rate": 7.532666064851118e-06,
      "loss": 0.016,
      "step": 768900
    },
    {
      "epoch": 1.2583544444662647,
      "grad_norm": 1.2767860889434814,
      "learning_rate": 7.5326001726376e-06,
      "loss": 0.0322,
      "step": 768920
    },
    {
      "epoch": 1.258387174904918,
      "grad_norm": 0.19029571115970612,
      "learning_rate": 7.532534280424083e-06,
      "loss": 0.021,
      "step": 768940
    },
    {
      "epoch": 1.2584199053435714,
      "grad_norm": 0.42430374026298523,
      "learning_rate": 7.532468388210565e-06,
      "loss": 0.0242,
      "step": 768960
    },
    {
      "epoch": 1.2584526357822248,
      "grad_norm": 0.6883662343025208,
      "learning_rate": 7.532402495997049e-06,
      "loss": 0.023,
      "step": 768980
    },
    {
      "epoch": 1.2584853662208781,
      "grad_norm": 1.4863052368164062,
      "learning_rate": 7.532336603783531e-06,
      "loss": 0.0245,
      "step": 769000
    },
    {
      "epoch": 1.2585180966595315,
      "grad_norm": 1.553589940071106,
      "learning_rate": 7.532270711570014e-06,
      "loss": 0.0253,
      "step": 769020
    },
    {
      "epoch": 1.2585508270981847,
      "grad_norm": 0.25779032707214355,
      "learning_rate": 7.532204819356497e-06,
      "loss": 0.0198,
      "step": 769040
    },
    {
      "epoch": 1.258583557536838,
      "grad_norm": 1.2532916069030762,
      "learning_rate": 7.53213892714298e-06,
      "loss": 0.0233,
      "step": 769060
    },
    {
      "epoch": 1.2586162879754914,
      "grad_norm": 0.1582448035478592,
      "learning_rate": 7.532073034929463e-06,
      "loss": 0.0225,
      "step": 769080
    },
    {
      "epoch": 1.2586490184141448,
      "grad_norm": 0.5972861051559448,
      "learning_rate": 7.532007142715946e-06,
      "loss": 0.02,
      "step": 769100
    },
    {
      "epoch": 1.2586817488527982,
      "grad_norm": 1.169609546661377,
      "learning_rate": 7.531941250502429e-06,
      "loss": 0.0213,
      "step": 769120
    },
    {
      "epoch": 1.2587144792914515,
      "grad_norm": 0.31742656230926514,
      "learning_rate": 7.5318753582889115e-06,
      "loss": 0.0272,
      "step": 769140
    },
    {
      "epoch": 1.258747209730105,
      "grad_norm": 0.8045822381973267,
      "learning_rate": 7.531809466075395e-06,
      "loss": 0.0238,
      "step": 769160
    },
    {
      "epoch": 1.258779940168758,
      "grad_norm": 1.235474944114685,
      "learning_rate": 7.531743573861877e-06,
      "loss": 0.0406,
      "step": 769180
    },
    {
      "epoch": 1.2588126706074114,
      "grad_norm": 1.3541347980499268,
      "learning_rate": 7.531677681648361e-06,
      "loss": 0.0169,
      "step": 769200
    },
    {
      "epoch": 1.2588454010460648,
      "grad_norm": 1.365618348121643,
      "learning_rate": 7.5316117894348425e-06,
      "loss": 0.0265,
      "step": 769220
    },
    {
      "epoch": 1.2588781314847182,
      "grad_norm": 0.694631814956665,
      "learning_rate": 7.531545897221326e-06,
      "loss": 0.0253,
      "step": 769240
    },
    {
      "epoch": 1.2589108619233715,
      "grad_norm": 0.07409007102251053,
      "learning_rate": 7.531480005007809e-06,
      "loss": 0.0192,
      "step": 769260
    },
    {
      "epoch": 1.258943592362025,
      "grad_norm": 1.0671615600585938,
      "learning_rate": 7.5314141127942915e-06,
      "loss": 0.0208,
      "step": 769280
    },
    {
      "epoch": 1.2589763228006783,
      "grad_norm": 0.48643431067466736,
      "learning_rate": 7.531348220580774e-06,
      "loss": 0.0274,
      "step": 769300
    },
    {
      "epoch": 1.2590090532393314,
      "grad_norm": 0.9676903486251831,
      "learning_rate": 7.531282328367258e-06,
      "loss": 0.028,
      "step": 769320
    },
    {
      "epoch": 1.2590417836779848,
      "grad_norm": 0.39452847838401794,
      "learning_rate": 7.53121643615374e-06,
      "loss": 0.0148,
      "step": 769340
    },
    {
      "epoch": 1.2590745141166382,
      "grad_norm": 0.8019198775291443,
      "learning_rate": 7.531150543940223e-06,
      "loss": 0.0225,
      "step": 769360
    },
    {
      "epoch": 1.2591072445552915,
      "grad_norm": 1.7454745769500732,
      "learning_rate": 7.531084651726705e-06,
      "loss": 0.0182,
      "step": 769380
    },
    {
      "epoch": 1.259139974993945,
      "grad_norm": 0.5017543435096741,
      "learning_rate": 7.531018759513189e-06,
      "loss": 0.0245,
      "step": 769400
    },
    {
      "epoch": 1.259172705432598,
      "grad_norm": 0.3230121433734894,
      "learning_rate": 7.530952867299672e-06,
      "loss": 0.0303,
      "step": 769420
    },
    {
      "epoch": 1.2592054358712517,
      "grad_norm": 0.7064872980117798,
      "learning_rate": 7.530886975086154e-06,
      "loss": 0.0269,
      "step": 769440
    },
    {
      "epoch": 1.2592381663099048,
      "grad_norm": 0.32177606225013733,
      "learning_rate": 7.530821082872638e-06,
      "loss": 0.0198,
      "step": 769460
    },
    {
      "epoch": 1.2592708967485582,
      "grad_norm": 1.0030262470245361,
      "learning_rate": 7.530755190659121e-06,
      "loss": 0.0311,
      "step": 769480
    },
    {
      "epoch": 1.2593036271872116,
      "grad_norm": 1.5253037214279175,
      "learning_rate": 7.530689298445603e-06,
      "loss": 0.032,
      "step": 769500
    },
    {
      "epoch": 1.259336357625865,
      "grad_norm": 0.7720701694488525,
      "learning_rate": 7.530623406232086e-06,
      "loss": 0.0194,
      "step": 769520
    },
    {
      "epoch": 1.2593690880645183,
      "grad_norm": 0.824231743812561,
      "learning_rate": 7.53055751401857e-06,
      "loss": 0.0252,
      "step": 769540
    },
    {
      "epoch": 1.2594018185031715,
      "grad_norm": 0.7166613936424255,
      "learning_rate": 7.530491621805052e-06,
      "loss": 0.0222,
      "step": 769560
    },
    {
      "epoch": 1.259434548941825,
      "grad_norm": 1.8865549564361572,
      "learning_rate": 7.530425729591535e-06,
      "loss": 0.0296,
      "step": 769580
    },
    {
      "epoch": 1.2594672793804782,
      "grad_norm": 0.16721194982528687,
      "learning_rate": 7.530359837378017e-06,
      "loss": 0.017,
      "step": 769600
    },
    {
      "epoch": 1.2595000098191316,
      "grad_norm": 1.360032558441162,
      "learning_rate": 7.530293945164501e-06,
      "loss": 0.015,
      "step": 769620
    },
    {
      "epoch": 1.259532740257785,
      "grad_norm": 0.4780234694480896,
      "learning_rate": 7.530228052950983e-06,
      "loss": 0.028,
      "step": 769640
    },
    {
      "epoch": 1.2595654706964383,
      "grad_norm": 0.45462337136268616,
      "learning_rate": 7.530162160737466e-06,
      "loss": 0.0144,
      "step": 769660
    },
    {
      "epoch": 1.2595982011350917,
      "grad_norm": 0.9100522994995117,
      "learning_rate": 7.530096268523949e-06,
      "loss": 0.019,
      "step": 769680
    },
    {
      "epoch": 1.2596309315737448,
      "grad_norm": 0.6568703651428223,
      "learning_rate": 7.5300303763104325e-06,
      "loss": 0.0281,
      "step": 769700
    },
    {
      "epoch": 1.2596636620123982,
      "grad_norm": 2.7133564949035645,
      "learning_rate": 7.529964484096914e-06,
      "loss": 0.0266,
      "step": 769720
    },
    {
      "epoch": 1.2596963924510516,
      "grad_norm": 0.3074428141117096,
      "learning_rate": 7.529898591883398e-06,
      "loss": 0.0188,
      "step": 769740
    },
    {
      "epoch": 1.259729122889705,
      "grad_norm": 0.5462509393692017,
      "learning_rate": 7.5298326996698815e-06,
      "loss": 0.0196,
      "step": 769760
    },
    {
      "epoch": 1.2597618533283583,
      "grad_norm": 1.4529497623443604,
      "learning_rate": 7.5297668074563634e-06,
      "loss": 0.0234,
      "step": 769780
    },
    {
      "epoch": 1.2597945837670117,
      "grad_norm": 0.9510953426361084,
      "learning_rate": 7.529700915242847e-06,
      "loss": 0.0184,
      "step": 769800
    },
    {
      "epoch": 1.259827314205665,
      "grad_norm": 1.6814874410629272,
      "learning_rate": 7.529635023029329e-06,
      "loss": 0.0206,
      "step": 769820
    },
    {
      "epoch": 1.2598600446443182,
      "grad_norm": 0.6392002701759338,
      "learning_rate": 7.5295691308158125e-06,
      "loss": 0.0209,
      "step": 769840
    },
    {
      "epoch": 1.2598927750829716,
      "grad_norm": 0.06986106932163239,
      "learning_rate": 7.529503238602295e-06,
      "loss": 0.0263,
      "step": 769860
    },
    {
      "epoch": 1.259925505521625,
      "grad_norm": 0.13374535739421844,
      "learning_rate": 7.529437346388778e-06,
      "loss": 0.0204,
      "step": 769880
    },
    {
      "epoch": 1.2599582359602783,
      "grad_norm": 0.9072746634483337,
      "learning_rate": 7.529371454175261e-06,
      "loss": 0.0234,
      "step": 769900
    },
    {
      "epoch": 1.2599909663989317,
      "grad_norm": 0.3450561761856079,
      "learning_rate": 7.529305561961744e-06,
      "loss": 0.0181,
      "step": 769920
    },
    {
      "epoch": 1.260023696837585,
      "grad_norm": 0.32162803411483765,
      "learning_rate": 7.529239669748226e-06,
      "loss": 0.0256,
      "step": 769940
    },
    {
      "epoch": 1.2600564272762385,
      "grad_norm": 0.631900429725647,
      "learning_rate": 7.52917377753471e-06,
      "loss": 0.0245,
      "step": 769960
    },
    {
      "epoch": 1.2600891577148916,
      "grad_norm": 0.26383891701698303,
      "learning_rate": 7.529107885321192e-06,
      "loss": 0.0213,
      "step": 769980
    },
    {
      "epoch": 1.260121888153545,
      "grad_norm": 1.3549903631210327,
      "learning_rate": 7.529041993107675e-06,
      "loss": 0.0242,
      "step": 770000
    },
    {
      "epoch": 1.2601546185921984,
      "grad_norm": 0.21517449617385864,
      "learning_rate": 7.528976100894157e-06,
      "loss": 0.0185,
      "step": 770020
    },
    {
      "epoch": 1.2601873490308517,
      "grad_norm": 0.8164481520652771,
      "learning_rate": 7.528910208680641e-06,
      "loss": 0.0239,
      "step": 770040
    },
    {
      "epoch": 1.260220079469505,
      "grad_norm": 0.3275987505912781,
      "learning_rate": 7.5288443164671235e-06,
      "loss": 0.0299,
      "step": 770060
    },
    {
      "epoch": 1.2602528099081585,
      "grad_norm": 0.2620607018470764,
      "learning_rate": 7.528778424253606e-06,
      "loss": 0.0197,
      "step": 770080
    },
    {
      "epoch": 1.2602855403468118,
      "grad_norm": 0.9987400770187378,
      "learning_rate": 7.528712532040089e-06,
      "loss": 0.0305,
      "step": 770100
    },
    {
      "epoch": 1.260318270785465,
      "grad_norm": 0.26838555932044983,
      "learning_rate": 7.5286466398265725e-06,
      "loss": 0.018,
      "step": 770120
    },
    {
      "epoch": 1.2603510012241184,
      "grad_norm": 0.20832030475139618,
      "learning_rate": 7.528580747613055e-06,
      "loss": 0.0251,
      "step": 770140
    },
    {
      "epoch": 1.2603837316627717,
      "grad_norm": 0.48604172468185425,
      "learning_rate": 7.528514855399538e-06,
      "loss": 0.0188,
      "step": 770160
    },
    {
      "epoch": 1.260416462101425,
      "grad_norm": 0.888976514339447,
      "learning_rate": 7.528448963186022e-06,
      "loss": 0.0223,
      "step": 770180
    },
    {
      "epoch": 1.2604491925400785,
      "grad_norm": 1.0754094123840332,
      "learning_rate": 7.5283830709725035e-06,
      "loss": 0.0267,
      "step": 770200
    },
    {
      "epoch": 1.2604819229787316,
      "grad_norm": 0.5482155084609985,
      "learning_rate": 7.528317178758987e-06,
      "loss": 0.0238,
      "step": 770220
    },
    {
      "epoch": 1.2605146534173852,
      "grad_norm": 0.7678564786911011,
      "learning_rate": 7.528251286545469e-06,
      "loss": 0.0247,
      "step": 770240
    },
    {
      "epoch": 1.2605473838560384,
      "grad_norm": 0.8131788969039917,
      "learning_rate": 7.5281853943319526e-06,
      "loss": 0.0177,
      "step": 770260
    },
    {
      "epoch": 1.2605801142946917,
      "grad_norm": 0.20182543992996216,
      "learning_rate": 7.528119502118435e-06,
      "loss": 0.0165,
      "step": 770280
    },
    {
      "epoch": 1.2606128447333451,
      "grad_norm": 0.7442775964736938,
      "learning_rate": 7.528053609904918e-06,
      "loss": 0.0175,
      "step": 770300
    },
    {
      "epoch": 1.2606455751719985,
      "grad_norm": 0.3404114246368408,
      "learning_rate": 7.527987717691401e-06,
      "loss": 0.0168,
      "step": 770320
    },
    {
      "epoch": 1.2606783056106519,
      "grad_norm": 1.119574785232544,
      "learning_rate": 7.527921825477884e-06,
      "loss": 0.0154,
      "step": 770340
    },
    {
      "epoch": 1.260711036049305,
      "grad_norm": 0.4376457631587982,
      "learning_rate": 7.527855933264366e-06,
      "loss": 0.021,
      "step": 770360
    },
    {
      "epoch": 1.2607437664879586,
      "grad_norm": 0.4538363516330719,
      "learning_rate": 7.52779004105085e-06,
      "loss": 0.021,
      "step": 770380
    },
    {
      "epoch": 1.2607764969266118,
      "grad_norm": 0.6192502379417419,
      "learning_rate": 7.527724148837332e-06,
      "loss": 0.0252,
      "step": 770400
    },
    {
      "epoch": 1.2608092273652651,
      "grad_norm": 0.3387013077735901,
      "learning_rate": 7.527658256623815e-06,
      "loss": 0.0242,
      "step": 770420
    },
    {
      "epoch": 1.2608419578039185,
      "grad_norm": 0.29131823778152466,
      "learning_rate": 7.527592364410298e-06,
      "loss": 0.0207,
      "step": 770440
    },
    {
      "epoch": 1.2608746882425719,
      "grad_norm": 0.7503299117088318,
      "learning_rate": 7.527526472196781e-06,
      "loss": 0.0341,
      "step": 770460
    },
    {
      "epoch": 1.2609074186812252,
      "grad_norm": 2.0232625007629395,
      "learning_rate": 7.527460579983264e-06,
      "loss": 0.0216,
      "step": 770480
    },
    {
      "epoch": 1.2609401491198784,
      "grad_norm": 0.2147866189479828,
      "learning_rate": 7.527394687769747e-06,
      "loss": 0.0254,
      "step": 770500
    },
    {
      "epoch": 1.2609728795585318,
      "grad_norm": 0.39212682843208313,
      "learning_rate": 7.52732879555623e-06,
      "loss": 0.0173,
      "step": 770520
    },
    {
      "epoch": 1.2610056099971851,
      "grad_norm": 0.6805698275566101,
      "learning_rate": 7.527262903342713e-06,
      "loss": 0.0212,
      "step": 770540
    },
    {
      "epoch": 1.2610383404358385,
      "grad_norm": 0.720717191696167,
      "learning_rate": 7.527197011129196e-06,
      "loss": 0.0196,
      "step": 770560
    },
    {
      "epoch": 1.2610710708744919,
      "grad_norm": 0.4241490364074707,
      "learning_rate": 7.527131118915678e-06,
      "loss": 0.0211,
      "step": 770580
    },
    {
      "epoch": 1.2611038013131453,
      "grad_norm": 0.46773359179496765,
      "learning_rate": 7.527065226702162e-06,
      "loss": 0.0242,
      "step": 770600
    },
    {
      "epoch": 1.2611365317517986,
      "grad_norm": 0.19339406490325928,
      "learning_rate": 7.526999334488644e-06,
      "loss": 0.0192,
      "step": 770620
    },
    {
      "epoch": 1.2611692621904518,
      "grad_norm": 0.5708379745483398,
      "learning_rate": 7.526933442275127e-06,
      "loss": 0.0263,
      "step": 770640
    },
    {
      "epoch": 1.2612019926291052,
      "grad_norm": 1.614128589630127,
      "learning_rate": 7.52686755006161e-06,
      "loss": 0.0268,
      "step": 770660
    },
    {
      "epoch": 1.2612347230677585,
      "grad_norm": 0.9409178495407104,
      "learning_rate": 7.526801657848093e-06,
      "loss": 0.0276,
      "step": 770680
    },
    {
      "epoch": 1.261267453506412,
      "grad_norm": 0.49123212695121765,
      "learning_rate": 7.526735765634575e-06,
      "loss": 0.0218,
      "step": 770700
    },
    {
      "epoch": 1.2613001839450653,
      "grad_norm": 1.5924981832504272,
      "learning_rate": 7.526669873421059e-06,
      "loss": 0.0225,
      "step": 770720
    },
    {
      "epoch": 1.2613329143837186,
      "grad_norm": 0.8610849976539612,
      "learning_rate": 7.526603981207541e-06,
      "loss": 0.0167,
      "step": 770740
    },
    {
      "epoch": 1.261365644822372,
      "grad_norm": 0.2841320335865021,
      "learning_rate": 7.5265380889940245e-06,
      "loss": 0.0264,
      "step": 770760
    },
    {
      "epoch": 1.2613983752610252,
      "grad_norm": 1.023443341255188,
      "learning_rate": 7.526472196780506e-06,
      "loss": 0.0226,
      "step": 770780
    },
    {
      "epoch": 1.2614311056996785,
      "grad_norm": 0.0809071883559227,
      "learning_rate": 7.52640630456699e-06,
      "loss": 0.0231,
      "step": 770800
    },
    {
      "epoch": 1.261463836138332,
      "grad_norm": 0.20090968906879425,
      "learning_rate": 7.5263404123534735e-06,
      "loss": 0.0237,
      "step": 770820
    },
    {
      "epoch": 1.2614965665769853,
      "grad_norm": 0.38107872009277344,
      "learning_rate": 7.526274520139955e-06,
      "loss": 0.0226,
      "step": 770840
    },
    {
      "epoch": 1.2615292970156387,
      "grad_norm": 1.106836199760437,
      "learning_rate": 7.526208627926439e-06,
      "loss": 0.021,
      "step": 770860
    },
    {
      "epoch": 1.261562027454292,
      "grad_norm": 0.2689638137817383,
      "learning_rate": 7.526142735712921e-06,
      "loss": 0.0258,
      "step": 770880
    },
    {
      "epoch": 1.2615947578929454,
      "grad_norm": 0.3693460524082184,
      "learning_rate": 7.5260768434994045e-06,
      "loss": 0.0205,
      "step": 770900
    },
    {
      "epoch": 1.2616274883315985,
      "grad_norm": 0.7547172904014587,
      "learning_rate": 7.526010951285887e-06,
      "loss": 0.0201,
      "step": 770920
    },
    {
      "epoch": 1.261660218770252,
      "grad_norm": 1.0791106224060059,
      "learning_rate": 7.525945059072371e-06,
      "loss": 0.0207,
      "step": 770940
    },
    {
      "epoch": 1.2616929492089053,
      "grad_norm": 0.9036109447479248,
      "learning_rate": 7.525879166858853e-06,
      "loss": 0.0209,
      "step": 770960
    },
    {
      "epoch": 1.2617256796475587,
      "grad_norm": 0.5581622123718262,
      "learning_rate": 7.525813274645336e-06,
      "loss": 0.0224,
      "step": 770980
    },
    {
      "epoch": 1.261758410086212,
      "grad_norm": 0.40494582056999207,
      "learning_rate": 7.525747382431818e-06,
      "loss": 0.0257,
      "step": 771000
    },
    {
      "epoch": 1.2617911405248652,
      "grad_norm": 0.5443128347396851,
      "learning_rate": 7.525681490218302e-06,
      "loss": 0.0175,
      "step": 771020
    },
    {
      "epoch": 1.2618238709635188,
      "grad_norm": 0.9258438348770142,
      "learning_rate": 7.525615598004784e-06,
      "loss": 0.0297,
      "step": 771040
    },
    {
      "epoch": 1.261856601402172,
      "grad_norm": 1.3316640853881836,
      "learning_rate": 7.525549705791267e-06,
      "loss": 0.028,
      "step": 771060
    },
    {
      "epoch": 1.2618893318408253,
      "grad_norm": 1.1437655687332153,
      "learning_rate": 7.52548381357775e-06,
      "loss": 0.0275,
      "step": 771080
    },
    {
      "epoch": 1.2619220622794787,
      "grad_norm": 0.4317967891693115,
      "learning_rate": 7.525417921364233e-06,
      "loss": 0.0212,
      "step": 771100
    },
    {
      "epoch": 1.261954792718132,
      "grad_norm": 0.3990182876586914,
      "learning_rate": 7.5253520291507155e-06,
      "loss": 0.0201,
      "step": 771120
    },
    {
      "epoch": 1.2619875231567854,
      "grad_norm": 1.842431902885437,
      "learning_rate": 7.525286136937199e-06,
      "loss": 0.0206,
      "step": 771140
    },
    {
      "epoch": 1.2620202535954386,
      "grad_norm": 0.16164769232273102,
      "learning_rate": 7.525220244723681e-06,
      "loss": 0.0169,
      "step": 771160
    },
    {
      "epoch": 1.2620529840340922,
      "grad_norm": 1.0621678829193115,
      "learning_rate": 7.5251543525101645e-06,
      "loss": 0.0306,
      "step": 771180
    },
    {
      "epoch": 1.2620857144727453,
      "grad_norm": 0.6928134560585022,
      "learning_rate": 7.525088460296648e-06,
      "loss": 0.0204,
      "step": 771200
    },
    {
      "epoch": 1.2621184449113987,
      "grad_norm": 1.1879156827926636,
      "learning_rate": 7.52502256808313e-06,
      "loss": 0.0236,
      "step": 771220
    },
    {
      "epoch": 1.262151175350052,
      "grad_norm": 0.9634281396865845,
      "learning_rate": 7.524956675869614e-06,
      "loss": 0.0163,
      "step": 771240
    },
    {
      "epoch": 1.2621839057887054,
      "grad_norm": 0.2264036387205124,
      "learning_rate": 7.5248907836560955e-06,
      "loss": 0.018,
      "step": 771260
    },
    {
      "epoch": 1.2622166362273588,
      "grad_norm": 0.18830737471580505,
      "learning_rate": 7.524824891442579e-06,
      "loss": 0.0211,
      "step": 771280
    },
    {
      "epoch": 1.262249366666012,
      "grad_norm": 0.6809552907943726,
      "learning_rate": 7.524758999229062e-06,
      "loss": 0.0231,
      "step": 771300
    },
    {
      "epoch": 1.2622820971046653,
      "grad_norm": 0.5161209106445312,
      "learning_rate": 7.5246931070155446e-06,
      "loss": 0.0235,
      "step": 771320
    },
    {
      "epoch": 1.2623148275433187,
      "grad_norm": 0.753580629825592,
      "learning_rate": 7.524627214802027e-06,
      "loss": 0.0292,
      "step": 771340
    },
    {
      "epoch": 1.262347557981972,
      "grad_norm": 0.7676541805267334,
      "learning_rate": 7.524561322588511e-06,
      "loss": 0.0226,
      "step": 771360
    },
    {
      "epoch": 1.2623802884206254,
      "grad_norm": 1.0553803443908691,
      "learning_rate": 7.524495430374993e-06,
      "loss": 0.0233,
      "step": 771380
    },
    {
      "epoch": 1.2624130188592788,
      "grad_norm": 0.6811822652816772,
      "learning_rate": 7.524429538161476e-06,
      "loss": 0.0219,
      "step": 771400
    },
    {
      "epoch": 1.2624457492979322,
      "grad_norm": 1.3670340776443481,
      "learning_rate": 7.524363645947958e-06,
      "loss": 0.0233,
      "step": 771420
    },
    {
      "epoch": 1.2624784797365853,
      "grad_norm": 0.3625645637512207,
      "learning_rate": 7.524297753734442e-06,
      "loss": 0.02,
      "step": 771440
    },
    {
      "epoch": 1.2625112101752387,
      "grad_norm": 0.9134296178817749,
      "learning_rate": 7.524231861520925e-06,
      "loss": 0.0157,
      "step": 771460
    },
    {
      "epoch": 1.262543940613892,
      "grad_norm": 0.07514335960149765,
      "learning_rate": 7.524165969307407e-06,
      "loss": 0.0192,
      "step": 771480
    },
    {
      "epoch": 1.2625766710525455,
      "grad_norm": 0.5742136240005493,
      "learning_rate": 7.52410007709389e-06,
      "loss": 0.0215,
      "step": 771500
    },
    {
      "epoch": 1.2626094014911988,
      "grad_norm": 1.641756534576416,
      "learning_rate": 7.524034184880374e-06,
      "loss": 0.0268,
      "step": 771520
    },
    {
      "epoch": 1.2626421319298522,
      "grad_norm": 0.6950243711471558,
      "learning_rate": 7.523968292666856e-06,
      "loss": 0.0186,
      "step": 771540
    },
    {
      "epoch": 1.2626748623685056,
      "grad_norm": 1.9225326776504517,
      "learning_rate": 7.523902400453339e-06,
      "loss": 0.0293,
      "step": 771560
    },
    {
      "epoch": 1.2627075928071587,
      "grad_norm": 0.8313850164413452,
      "learning_rate": 7.523836508239823e-06,
      "loss": 0.017,
      "step": 771580
    },
    {
      "epoch": 1.262740323245812,
      "grad_norm": 0.6934741139411926,
      "learning_rate": 7.523770616026305e-06,
      "loss": 0.0155,
      "step": 771600
    },
    {
      "epoch": 1.2627730536844655,
      "grad_norm": 1.5848115682601929,
      "learning_rate": 7.523704723812788e-06,
      "loss": 0.0279,
      "step": 771620
    },
    {
      "epoch": 1.2628057841231188,
      "grad_norm": 0.8653072714805603,
      "learning_rate": 7.52363883159927e-06,
      "loss": 0.0303,
      "step": 771640
    },
    {
      "epoch": 1.2628385145617722,
      "grad_norm": 0.23367948830127716,
      "learning_rate": 7.523572939385754e-06,
      "loss": 0.0143,
      "step": 771660
    },
    {
      "epoch": 1.2628712450004254,
      "grad_norm": 0.980323851108551,
      "learning_rate": 7.523507047172236e-06,
      "loss": 0.0307,
      "step": 771680
    },
    {
      "epoch": 1.262903975439079,
      "grad_norm": 0.5477073192596436,
      "learning_rate": 7.523441154958719e-06,
      "loss": 0.0167,
      "step": 771700
    },
    {
      "epoch": 1.262936705877732,
      "grad_norm": 1.148795247077942,
      "learning_rate": 7.523375262745202e-06,
      "loss": 0.0127,
      "step": 771720
    },
    {
      "epoch": 1.2629694363163855,
      "grad_norm": 0.1697341948747635,
      "learning_rate": 7.5233093705316855e-06,
      "loss": 0.0179,
      "step": 771740
    },
    {
      "epoch": 1.2630021667550388,
      "grad_norm": 1.1999008655548096,
      "learning_rate": 7.523243478318167e-06,
      "loss": 0.0201,
      "step": 771760
    },
    {
      "epoch": 1.2630348971936922,
      "grad_norm": 0.7674320340156555,
      "learning_rate": 7.523177586104651e-06,
      "loss": 0.0188,
      "step": 771780
    },
    {
      "epoch": 1.2630676276323456,
      "grad_norm": 0.20712383091449738,
      "learning_rate": 7.523111693891133e-06,
      "loss": 0.0248,
      "step": 771800
    },
    {
      "epoch": 1.2631003580709987,
      "grad_norm": 1.2184417247772217,
      "learning_rate": 7.5230458016776164e-06,
      "loss": 0.0319,
      "step": 771820
    },
    {
      "epoch": 1.2631330885096523,
      "grad_norm": 0.5393983125686646,
      "learning_rate": 7.522979909464098e-06,
      "loss": 0.0292,
      "step": 771840
    },
    {
      "epoch": 1.2631658189483055,
      "grad_norm": 0.05851954221725464,
      "learning_rate": 7.522914017250582e-06,
      "loss": 0.019,
      "step": 771860
    },
    {
      "epoch": 1.2631985493869589,
      "grad_norm": 1.201675295829773,
      "learning_rate": 7.5228481250370655e-06,
      "loss": 0.0335,
      "step": 771880
    },
    {
      "epoch": 1.2632312798256122,
      "grad_norm": 0.32475608587265015,
      "learning_rate": 7.522782232823547e-06,
      "loss": 0.026,
      "step": 771900
    },
    {
      "epoch": 1.2632640102642656,
      "grad_norm": 0.48642513155937195,
      "learning_rate": 7.522716340610031e-06,
      "loss": 0.015,
      "step": 771920
    },
    {
      "epoch": 1.263296740702919,
      "grad_norm": 1.4363489151000977,
      "learning_rate": 7.522650448396514e-06,
      "loss": 0.0279,
      "step": 771940
    },
    {
      "epoch": 1.2633294711415721,
      "grad_norm": 0.7390231490135193,
      "learning_rate": 7.5225845561829965e-06,
      "loss": 0.0235,
      "step": 771960
    },
    {
      "epoch": 1.2633622015802255,
      "grad_norm": 0.998400866985321,
      "learning_rate": 7.522518663969479e-06,
      "loss": 0.0252,
      "step": 771980
    },
    {
      "epoch": 1.2633949320188789,
      "grad_norm": 0.34913021326065063,
      "learning_rate": 7.522452771755963e-06,
      "loss": 0.0372,
      "step": 772000
    },
    {
      "epoch": 1.2634276624575322,
      "grad_norm": 0.18872295320034027,
      "learning_rate": 7.522386879542445e-06,
      "loss": 0.0116,
      "step": 772020
    },
    {
      "epoch": 1.2634603928961856,
      "grad_norm": 0.39043834805488586,
      "learning_rate": 7.522320987328928e-06,
      "loss": 0.0227,
      "step": 772040
    },
    {
      "epoch": 1.263493123334839,
      "grad_norm": 0.4251621663570404,
      "learning_rate": 7.52225509511541e-06,
      "loss": 0.0232,
      "step": 772060
    },
    {
      "epoch": 1.2635258537734924,
      "grad_norm": 0.3902090787887573,
      "learning_rate": 7.522189202901894e-06,
      "loss": 0.0234,
      "step": 772080
    },
    {
      "epoch": 1.2635585842121455,
      "grad_norm": 1.2850650548934937,
      "learning_rate": 7.5221233106883765e-06,
      "loss": 0.0349,
      "step": 772100
    },
    {
      "epoch": 1.2635913146507989,
      "grad_norm": 0.5519605875015259,
      "learning_rate": 7.522057418474859e-06,
      "loss": 0.0257,
      "step": 772120
    },
    {
      "epoch": 1.2636240450894523,
      "grad_norm": 0.11832567304372787,
      "learning_rate": 7.521991526261342e-06,
      "loss": 0.0157,
      "step": 772140
    },
    {
      "epoch": 1.2636567755281056,
      "grad_norm": 1.5508642196655273,
      "learning_rate": 7.5219256340478256e-06,
      "loss": 0.0253,
      "step": 772160
    },
    {
      "epoch": 1.263689505966759,
      "grad_norm": 0.4691891372203827,
      "learning_rate": 7.5218597418343074e-06,
      "loss": 0.0187,
      "step": 772180
    },
    {
      "epoch": 1.2637222364054124,
      "grad_norm": 1.5588860511779785,
      "learning_rate": 7.521793849620791e-06,
      "loss": 0.02,
      "step": 772200
    },
    {
      "epoch": 1.2637549668440657,
      "grad_norm": 1.5716805458068848,
      "learning_rate": 7.521727957407275e-06,
      "loss": 0.0231,
      "step": 772220
    },
    {
      "epoch": 1.263787697282719,
      "grad_norm": 0.21802298724651337,
      "learning_rate": 7.5216620651937565e-06,
      "loss": 0.0275,
      "step": 772240
    },
    {
      "epoch": 1.2638204277213723,
      "grad_norm": 1.308076024055481,
      "learning_rate": 7.52159617298024e-06,
      "loss": 0.0274,
      "step": 772260
    },
    {
      "epoch": 1.2638531581600256,
      "grad_norm": 1.7259546518325806,
      "learning_rate": 7.521530280766722e-06,
      "loss": 0.0215,
      "step": 772280
    },
    {
      "epoch": 1.263885888598679,
      "grad_norm": 0.248984694480896,
      "learning_rate": 7.521464388553206e-06,
      "loss": 0.0225,
      "step": 772300
    },
    {
      "epoch": 1.2639186190373324,
      "grad_norm": 2.013598918914795,
      "learning_rate": 7.521398496339688e-06,
      "loss": 0.0201,
      "step": 772320
    },
    {
      "epoch": 1.2639513494759858,
      "grad_norm": 0.42274242639541626,
      "learning_rate": 7.521332604126171e-06,
      "loss": 0.0221,
      "step": 772340
    },
    {
      "epoch": 1.2639840799146391,
      "grad_norm": 0.3899555504322052,
      "learning_rate": 7.521266711912654e-06,
      "loss": 0.0272,
      "step": 772360
    },
    {
      "epoch": 1.2640168103532923,
      "grad_norm": 0.2227948009967804,
      "learning_rate": 7.521200819699137e-06,
      "loss": 0.0174,
      "step": 772380
    },
    {
      "epoch": 1.2640495407919456,
      "grad_norm": 2.3595879077911377,
      "learning_rate": 7.521134927485619e-06,
      "loss": 0.0218,
      "step": 772400
    },
    {
      "epoch": 1.264082271230599,
      "grad_norm": 0.9708020687103271,
      "learning_rate": 7.521069035272103e-06,
      "loss": 0.0243,
      "step": 772420
    },
    {
      "epoch": 1.2641150016692524,
      "grad_norm": 1.1982208490371704,
      "learning_rate": 7.521003143058585e-06,
      "loss": 0.0248,
      "step": 772440
    },
    {
      "epoch": 1.2641477321079058,
      "grad_norm": 0.38704219460487366,
      "learning_rate": 7.520937250845068e-06,
      "loss": 0.021,
      "step": 772460
    },
    {
      "epoch": 1.264180462546559,
      "grad_norm": 1.5685898065567017,
      "learning_rate": 7.520871358631551e-06,
      "loss": 0.0238,
      "step": 772480
    },
    {
      "epoch": 1.2642131929852125,
      "grad_norm": 0.6383612155914307,
      "learning_rate": 7.520805466418034e-06,
      "loss": 0.0162,
      "step": 772500
    },
    {
      "epoch": 1.2642459234238657,
      "grad_norm": 0.4065232276916504,
      "learning_rate": 7.5207395742045166e-06,
      "loss": 0.0226,
      "step": 772520
    },
    {
      "epoch": 1.264278653862519,
      "grad_norm": 0.595014750957489,
      "learning_rate": 7.520673681991e-06,
      "loss": 0.021,
      "step": 772540
    },
    {
      "epoch": 1.2643113843011724,
      "grad_norm": 0.4650585651397705,
      "learning_rate": 7.520607789777482e-06,
      "loss": 0.0224,
      "step": 772560
    },
    {
      "epoch": 1.2643441147398258,
      "grad_norm": 0.553498387336731,
      "learning_rate": 7.520541897563966e-06,
      "loss": 0.0173,
      "step": 772580
    },
    {
      "epoch": 1.2643768451784791,
      "grad_norm": 0.5662394762039185,
      "learning_rate": 7.520476005350449e-06,
      "loss": 0.0136,
      "step": 772600
    },
    {
      "epoch": 1.2644095756171323,
      "grad_norm": 0.6218805909156799,
      "learning_rate": 7.520410113136931e-06,
      "loss": 0.0272,
      "step": 772620
    },
    {
      "epoch": 1.264442306055786,
      "grad_norm": 0.9003272652626038,
      "learning_rate": 7.520344220923415e-06,
      "loss": 0.0194,
      "step": 772640
    },
    {
      "epoch": 1.264475036494439,
      "grad_norm": 1.0971906185150146,
      "learning_rate": 7.520278328709897e-06,
      "loss": 0.023,
      "step": 772660
    },
    {
      "epoch": 1.2645077669330924,
      "grad_norm": 1.561216115951538,
      "learning_rate": 7.52021243649638e-06,
      "loss": 0.0269,
      "step": 772680
    },
    {
      "epoch": 1.2645404973717458,
      "grad_norm": 0.9435150027275085,
      "learning_rate": 7.520146544282863e-06,
      "loss": 0.0208,
      "step": 772700
    },
    {
      "epoch": 1.2645732278103992,
      "grad_norm": 0.7680433392524719,
      "learning_rate": 7.520080652069346e-06,
      "loss": 0.0167,
      "step": 772720
    },
    {
      "epoch": 1.2646059582490525,
      "grad_norm": 0.7457793354988098,
      "learning_rate": 7.520014759855828e-06,
      "loss": 0.0215,
      "step": 772740
    },
    {
      "epoch": 1.2646386886877057,
      "grad_norm": 0.1833411455154419,
      "learning_rate": 7.519948867642312e-06,
      "loss": 0.0162,
      "step": 772760
    },
    {
      "epoch": 1.264671419126359,
      "grad_norm": 0.736193835735321,
      "learning_rate": 7.519882975428794e-06,
      "loss": 0.0272,
      "step": 772780
    },
    {
      "epoch": 1.2647041495650124,
      "grad_norm": 0.9736762046813965,
      "learning_rate": 7.5198170832152775e-06,
      "loss": 0.0226,
      "step": 772800
    },
    {
      "epoch": 1.2647368800036658,
      "grad_norm": 1.137547492980957,
      "learning_rate": 7.519751191001759e-06,
      "loss": 0.0174,
      "step": 772820
    },
    {
      "epoch": 1.2647696104423192,
      "grad_norm": 1.0512374639511108,
      "learning_rate": 7.519685298788243e-06,
      "loss": 0.0227,
      "step": 772840
    },
    {
      "epoch": 1.2648023408809725,
      "grad_norm": 1.2419747114181519,
      "learning_rate": 7.519619406574725e-06,
      "loss": 0.0195,
      "step": 772860
    },
    {
      "epoch": 1.264835071319626,
      "grad_norm": 0.7171343564987183,
      "learning_rate": 7.519553514361208e-06,
      "loss": 0.0271,
      "step": 772880
    },
    {
      "epoch": 1.264867801758279,
      "grad_norm": 2.1893508434295654,
      "learning_rate": 7.519487622147691e-06,
      "loss": 0.0193,
      "step": 772900
    },
    {
      "epoch": 1.2649005321969324,
      "grad_norm": 0.9030793905258179,
      "learning_rate": 7.519421729934174e-06,
      "loss": 0.0241,
      "step": 772920
    },
    {
      "epoch": 1.2649332626355858,
      "grad_norm": 0.47910192608833313,
      "learning_rate": 7.5193558377206575e-06,
      "loss": 0.0245,
      "step": 772940
    },
    {
      "epoch": 1.2649659930742392,
      "grad_norm": 0.5836198329925537,
      "learning_rate": 7.51928994550714e-06,
      "loss": 0.0306,
      "step": 772960
    },
    {
      "epoch": 1.2649987235128926,
      "grad_norm": 0.5981590151786804,
      "learning_rate": 7.519224053293623e-06,
      "loss": 0.022,
      "step": 772980
    },
    {
      "epoch": 1.265031453951546,
      "grad_norm": 1.7135416269302368,
      "learning_rate": 7.519158161080106e-06,
      "loss": 0.0254,
      "step": 773000
    },
    {
      "epoch": 1.2650641843901993,
      "grad_norm": 0.7545260787010193,
      "learning_rate": 7.519092268866589e-06,
      "loss": 0.0184,
      "step": 773020
    },
    {
      "epoch": 1.2650969148288524,
      "grad_norm": 0.24187804758548737,
      "learning_rate": 7.519026376653071e-06,
      "loss": 0.0266,
      "step": 773040
    },
    {
      "epoch": 1.2651296452675058,
      "grad_norm": 0.51249098777771,
      "learning_rate": 7.518960484439555e-06,
      "loss": 0.0219,
      "step": 773060
    },
    {
      "epoch": 1.2651623757061592,
      "grad_norm": 1.414093255996704,
      "learning_rate": 7.518894592226037e-06,
      "loss": 0.0214,
      "step": 773080
    },
    {
      "epoch": 1.2651951061448126,
      "grad_norm": 0.8180972933769226,
      "learning_rate": 7.51882870001252e-06,
      "loss": 0.0249,
      "step": 773100
    },
    {
      "epoch": 1.265227836583466,
      "grad_norm": 0.6016042828559875,
      "learning_rate": 7.518762807799003e-06,
      "loss": 0.0171,
      "step": 773120
    },
    {
      "epoch": 1.2652605670221193,
      "grad_norm": 1.7483415603637695,
      "learning_rate": 7.518696915585486e-06,
      "loss": 0.025,
      "step": 773140
    },
    {
      "epoch": 1.2652932974607727,
      "grad_norm": 0.40798938274383545,
      "learning_rate": 7.5186310233719685e-06,
      "loss": 0.0262,
      "step": 773160
    },
    {
      "epoch": 1.2653260278994258,
      "grad_norm": 0.5214054584503174,
      "learning_rate": 7.518565131158452e-06,
      "loss": 0.0244,
      "step": 773180
    },
    {
      "epoch": 1.2653587583380792,
      "grad_norm": 1.3486802577972412,
      "learning_rate": 7.518499238944934e-06,
      "loss": 0.0355,
      "step": 773200
    },
    {
      "epoch": 1.2653914887767326,
      "grad_norm": 2.0055203437805176,
      "learning_rate": 7.5184333467314175e-06,
      "loss": 0.0355,
      "step": 773220
    },
    {
      "epoch": 1.265424219215386,
      "grad_norm": 0.4152296185493469,
      "learning_rate": 7.5183674545178994e-06,
      "loss": 0.0266,
      "step": 773240
    },
    {
      "epoch": 1.2654569496540393,
      "grad_norm": 0.7080550789833069,
      "learning_rate": 7.518301562304383e-06,
      "loss": 0.0151,
      "step": 773260
    },
    {
      "epoch": 1.2654896800926925,
      "grad_norm": 0.8447046279907227,
      "learning_rate": 7.518235670090867e-06,
      "loss": 0.0217,
      "step": 773280
    },
    {
      "epoch": 1.265522410531346,
      "grad_norm": 0.2722468078136444,
      "learning_rate": 7.5181697778773485e-06,
      "loss": 0.0229,
      "step": 773300
    },
    {
      "epoch": 1.2655551409699992,
      "grad_norm": 0.687025249004364,
      "learning_rate": 7.518103885663832e-06,
      "loss": 0.0328,
      "step": 773320
    },
    {
      "epoch": 1.2655878714086526,
      "grad_norm": 0.9224759936332703,
      "learning_rate": 7.518037993450315e-06,
      "loss": 0.0237,
      "step": 773340
    },
    {
      "epoch": 1.265620601847306,
      "grad_norm": 1.0699480772018433,
      "learning_rate": 7.5179721012367976e-06,
      "loss": 0.0288,
      "step": 773360
    },
    {
      "epoch": 1.2656533322859593,
      "grad_norm": 0.3628825545310974,
      "learning_rate": 7.51790620902328e-06,
      "loss": 0.0197,
      "step": 773380
    },
    {
      "epoch": 1.2656860627246127,
      "grad_norm": 0.4092155396938324,
      "learning_rate": 7.517840316809764e-06,
      "loss": 0.0255,
      "step": 773400
    },
    {
      "epoch": 1.2657187931632659,
      "grad_norm": 0.38202542066574097,
      "learning_rate": 7.517774424596246e-06,
      "loss": 0.0184,
      "step": 773420
    },
    {
      "epoch": 1.2657515236019194,
      "grad_norm": 0.11901284009218216,
      "learning_rate": 7.517708532382729e-06,
      "loss": 0.028,
      "step": 773440
    },
    {
      "epoch": 1.2657842540405726,
      "grad_norm": 1.535874605178833,
      "learning_rate": 7.517642640169211e-06,
      "loss": 0.0206,
      "step": 773460
    },
    {
      "epoch": 1.265816984479226,
      "grad_norm": 1.4898992776870728,
      "learning_rate": 7.517576747955695e-06,
      "loss": 0.0299,
      "step": 773480
    },
    {
      "epoch": 1.2658497149178793,
      "grad_norm": 1.4003382921218872,
      "learning_rate": 7.517510855742178e-06,
      "loss": 0.0236,
      "step": 773500
    },
    {
      "epoch": 1.2658824453565327,
      "grad_norm": 0.3305460214614868,
      "learning_rate": 7.51744496352866e-06,
      "loss": 0.0151,
      "step": 773520
    },
    {
      "epoch": 1.265915175795186,
      "grad_norm": 1.2482136487960815,
      "learning_rate": 7.517379071315143e-06,
      "loss": 0.0297,
      "step": 773540
    },
    {
      "epoch": 1.2659479062338392,
      "grad_norm": 1.86332368850708,
      "learning_rate": 7.517313179101627e-06,
      "loss": 0.0227,
      "step": 773560
    },
    {
      "epoch": 1.2659806366724926,
      "grad_norm": 0.6135503649711609,
      "learning_rate": 7.5172472868881085e-06,
      "loss": 0.0213,
      "step": 773580
    },
    {
      "epoch": 1.266013367111146,
      "grad_norm": 0.3521992266178131,
      "learning_rate": 7.517181394674592e-06,
      "loss": 0.0159,
      "step": 773600
    },
    {
      "epoch": 1.2660460975497994,
      "grad_norm": 1.1082929372787476,
      "learning_rate": 7.517115502461074e-06,
      "loss": 0.0205,
      "step": 773620
    },
    {
      "epoch": 1.2660788279884527,
      "grad_norm": 0.6905120015144348,
      "learning_rate": 7.517049610247558e-06,
      "loss": 0.0199,
      "step": 773640
    },
    {
      "epoch": 1.266111558427106,
      "grad_norm": 0.7789052128791809,
      "learning_rate": 7.516983718034041e-06,
      "loss": 0.0192,
      "step": 773660
    },
    {
      "epoch": 1.2661442888657595,
      "grad_norm": 0.3860374689102173,
      "learning_rate": 7.516917825820523e-06,
      "loss": 0.0349,
      "step": 773680
    },
    {
      "epoch": 1.2661770193044126,
      "grad_norm": 2.079848051071167,
      "learning_rate": 7.516851933607007e-06,
      "loss": 0.0311,
      "step": 773700
    },
    {
      "epoch": 1.266209749743066,
      "grad_norm": 0.43995732069015503,
      "learning_rate": 7.516786041393489e-06,
      "loss": 0.023,
      "step": 773720
    },
    {
      "epoch": 1.2662424801817194,
      "grad_norm": 0.8542664051055908,
      "learning_rate": 7.516720149179972e-06,
      "loss": 0.0312,
      "step": 773740
    },
    {
      "epoch": 1.2662752106203727,
      "grad_norm": 0.5700097680091858,
      "learning_rate": 7.516654256966455e-06,
      "loss": 0.0208,
      "step": 773760
    },
    {
      "epoch": 1.266307941059026,
      "grad_norm": 0.4942280650138855,
      "learning_rate": 7.5165883647529385e-06,
      "loss": 0.0194,
      "step": 773780
    },
    {
      "epoch": 1.2663406714976795,
      "grad_norm": 0.5353232622146606,
      "learning_rate": 7.51652247253942e-06,
      "loss": 0.0256,
      "step": 773800
    },
    {
      "epoch": 1.2663734019363329,
      "grad_norm": 0.3427095413208008,
      "learning_rate": 7.516456580325904e-06,
      "loss": 0.0172,
      "step": 773820
    },
    {
      "epoch": 1.266406132374986,
      "grad_norm": 0.5832679867744446,
      "learning_rate": 7.516390688112386e-06,
      "loss": 0.0238,
      "step": 773840
    },
    {
      "epoch": 1.2664388628136394,
      "grad_norm": 0.919696569442749,
      "learning_rate": 7.5163247958988694e-06,
      "loss": 0.0201,
      "step": 773860
    },
    {
      "epoch": 1.2664715932522927,
      "grad_norm": 0.48613062500953674,
      "learning_rate": 7.516258903685351e-06,
      "loss": 0.0275,
      "step": 773880
    },
    {
      "epoch": 1.2665043236909461,
      "grad_norm": 0.9050061106681824,
      "learning_rate": 7.516193011471835e-06,
      "loss": 0.0229,
      "step": 773900
    },
    {
      "epoch": 1.2665370541295995,
      "grad_norm": 0.3825453817844391,
      "learning_rate": 7.516127119258318e-06,
      "loss": 0.0152,
      "step": 773920
    },
    {
      "epoch": 1.2665697845682529,
      "grad_norm": 0.694167971611023,
      "learning_rate": 7.5160612270448e-06,
      "loss": 0.0308,
      "step": 773940
    },
    {
      "epoch": 1.2666025150069062,
      "grad_norm": 0.18807931244373322,
      "learning_rate": 7.515995334831283e-06,
      "loss": 0.0202,
      "step": 773960
    },
    {
      "epoch": 1.2666352454455594,
      "grad_norm": 1.149194359779358,
      "learning_rate": 7.515929442617767e-06,
      "loss": 0.0167,
      "step": 773980
    },
    {
      "epoch": 1.2666679758842128,
      "grad_norm": 0.8613747954368591,
      "learning_rate": 7.5158635504042495e-06,
      "loss": 0.0139,
      "step": 774000
    },
    {
      "epoch": 1.2667007063228661,
      "grad_norm": 0.6809216141700745,
      "learning_rate": 7.515797658190732e-06,
      "loss": 0.0235,
      "step": 774020
    },
    {
      "epoch": 1.2667334367615195,
      "grad_norm": 1.1843456029891968,
      "learning_rate": 7.515731765977216e-06,
      "loss": 0.0365,
      "step": 774040
    },
    {
      "epoch": 1.2667661672001729,
      "grad_norm": 0.3691088855266571,
      "learning_rate": 7.515665873763698e-06,
      "loss": 0.0231,
      "step": 774060
    },
    {
      "epoch": 1.266798897638826,
      "grad_norm": 0.5756337642669678,
      "learning_rate": 7.515599981550181e-06,
      "loss": 0.0247,
      "step": 774080
    },
    {
      "epoch": 1.2668316280774796,
      "grad_norm": 0.6983058452606201,
      "learning_rate": 7.515534089336663e-06,
      "loss": 0.0239,
      "step": 774100
    },
    {
      "epoch": 1.2668643585161328,
      "grad_norm": 0.9060282111167908,
      "learning_rate": 7.515468197123147e-06,
      "loss": 0.0272,
      "step": 774120
    },
    {
      "epoch": 1.2668970889547861,
      "grad_norm": 1.9879662990570068,
      "learning_rate": 7.5154023049096295e-06,
      "loss": 0.0242,
      "step": 774140
    },
    {
      "epoch": 1.2669298193934395,
      "grad_norm": 0.20207436382770538,
      "learning_rate": 7.515336412696112e-06,
      "loss": 0.0194,
      "step": 774160
    },
    {
      "epoch": 1.2669625498320929,
      "grad_norm": 1.4390803575515747,
      "learning_rate": 7.515270520482595e-06,
      "loss": 0.0283,
      "step": 774180
    },
    {
      "epoch": 1.2669952802707463,
      "grad_norm": 1.0752531290054321,
      "learning_rate": 7.5152046282690786e-06,
      "loss": 0.0213,
      "step": 774200
    },
    {
      "epoch": 1.2670280107093994,
      "grad_norm": 0.18830226361751556,
      "learning_rate": 7.5151387360555605e-06,
      "loss": 0.0241,
      "step": 774220
    },
    {
      "epoch": 1.267060741148053,
      "grad_norm": 0.5758017897605896,
      "learning_rate": 7.515072843842044e-06,
      "loss": 0.0258,
      "step": 774240
    },
    {
      "epoch": 1.2670934715867062,
      "grad_norm": 1.1654202938079834,
      "learning_rate": 7.515006951628526e-06,
      "loss": 0.0193,
      "step": 774260
    },
    {
      "epoch": 1.2671262020253595,
      "grad_norm": 1.1017547845840454,
      "learning_rate": 7.5149410594150095e-06,
      "loss": 0.0237,
      "step": 774280
    },
    {
      "epoch": 1.267158932464013,
      "grad_norm": 0.8856961727142334,
      "learning_rate": 7.514875167201492e-06,
      "loss": 0.0183,
      "step": 774300
    },
    {
      "epoch": 1.2671916629026663,
      "grad_norm": 1.0420745611190796,
      "learning_rate": 7.514809274987975e-06,
      "loss": 0.0197,
      "step": 774320
    },
    {
      "epoch": 1.2672243933413196,
      "grad_norm": 0.665520191192627,
      "learning_rate": 7.514743382774459e-06,
      "loss": 0.0143,
      "step": 774340
    },
    {
      "epoch": 1.2672571237799728,
      "grad_norm": 0.8004332184791565,
      "learning_rate": 7.514677490560941e-06,
      "loss": 0.0253,
      "step": 774360
    },
    {
      "epoch": 1.2672898542186262,
      "grad_norm": 0.5718837976455688,
      "learning_rate": 7.514611598347424e-06,
      "loss": 0.0179,
      "step": 774380
    },
    {
      "epoch": 1.2673225846572795,
      "grad_norm": 0.6817935109138489,
      "learning_rate": 7.514545706133907e-06,
      "loss": 0.0244,
      "step": 774400
    },
    {
      "epoch": 1.267355315095933,
      "grad_norm": 0.47598040103912354,
      "learning_rate": 7.51447981392039e-06,
      "loss": 0.0233,
      "step": 774420
    },
    {
      "epoch": 1.2673880455345863,
      "grad_norm": 0.4340164363384247,
      "learning_rate": 7.514413921706872e-06,
      "loss": 0.0147,
      "step": 774440
    },
    {
      "epoch": 1.2674207759732397,
      "grad_norm": 0.34832847118377686,
      "learning_rate": 7.514348029493356e-06,
      "loss": 0.0164,
      "step": 774460
    },
    {
      "epoch": 1.267453506411893,
      "grad_norm": 1.694854736328125,
      "learning_rate": 7.514282137279838e-06,
      "loss": 0.0246,
      "step": 774480
    },
    {
      "epoch": 1.2674862368505462,
      "grad_norm": 0.34446659684181213,
      "learning_rate": 7.514216245066321e-06,
      "loss": 0.0257,
      "step": 774500
    },
    {
      "epoch": 1.2675189672891995,
      "grad_norm": 0.26944077014923096,
      "learning_rate": 7.514150352852804e-06,
      "loss": 0.0183,
      "step": 774520
    },
    {
      "epoch": 1.267551697727853,
      "grad_norm": 0.5441782474517822,
      "learning_rate": 7.514084460639287e-06,
      "loss": 0.0236,
      "step": 774540
    },
    {
      "epoch": 1.2675844281665063,
      "grad_norm": 0.7868672609329224,
      "learning_rate": 7.5140185684257696e-06,
      "loss": 0.0262,
      "step": 774560
    },
    {
      "epoch": 1.2676171586051597,
      "grad_norm": 2.6385135650634766,
      "learning_rate": 7.513952676212253e-06,
      "loss": 0.0241,
      "step": 774580
    },
    {
      "epoch": 1.267649889043813,
      "grad_norm": 0.3561311662197113,
      "learning_rate": 7.513886783998735e-06,
      "loss": 0.0171,
      "step": 774600
    },
    {
      "epoch": 1.2676826194824664,
      "grad_norm": 1.5705267190933228,
      "learning_rate": 7.513820891785219e-06,
      "loss": 0.0227,
      "step": 774620
    },
    {
      "epoch": 1.2677153499211196,
      "grad_norm": 0.7165060043334961,
      "learning_rate": 7.5137549995717005e-06,
      "loss": 0.0229,
      "step": 774640
    },
    {
      "epoch": 1.267748080359773,
      "grad_norm": 0.2791438102722168,
      "learning_rate": 7.513689107358184e-06,
      "loss": 0.0166,
      "step": 774660
    },
    {
      "epoch": 1.2677808107984263,
      "grad_norm": 0.6557446122169495,
      "learning_rate": 7.513623215144666e-06,
      "loss": 0.0336,
      "step": 774680
    },
    {
      "epoch": 1.2678135412370797,
      "grad_norm": 0.23035107553005219,
      "learning_rate": 7.51355732293115e-06,
      "loss": 0.0268,
      "step": 774700
    },
    {
      "epoch": 1.267846271675733,
      "grad_norm": 0.19408132135868073,
      "learning_rate": 7.513491430717633e-06,
      "loss": 0.0207,
      "step": 774720
    },
    {
      "epoch": 1.2678790021143862,
      "grad_norm": 0.1700023114681244,
      "learning_rate": 7.513425538504115e-06,
      "loss": 0.026,
      "step": 774740
    },
    {
      "epoch": 1.2679117325530398,
      "grad_norm": 0.8262222409248352,
      "learning_rate": 7.513359646290599e-06,
      "loss": 0.0267,
      "step": 774760
    },
    {
      "epoch": 1.267944462991693,
      "grad_norm": 0.5263774394989014,
      "learning_rate": 7.513293754077081e-06,
      "loss": 0.0231,
      "step": 774780
    },
    {
      "epoch": 1.2679771934303463,
      "grad_norm": 1.0608062744140625,
      "learning_rate": 7.513227861863565e-06,
      "loss": 0.028,
      "step": 774800
    },
    {
      "epoch": 1.2680099238689997,
      "grad_norm": 2.477384090423584,
      "learning_rate": 7.513161969650047e-06,
      "loss": 0.0281,
      "step": 774820
    },
    {
      "epoch": 1.268042654307653,
      "grad_norm": 0.4742765426635742,
      "learning_rate": 7.5130960774365305e-06,
      "loss": 0.0241,
      "step": 774840
    },
    {
      "epoch": 1.2680753847463064,
      "grad_norm": 5.509957790374756,
      "learning_rate": 7.513030185223012e-06,
      "loss": 0.0178,
      "step": 774860
    },
    {
      "epoch": 1.2681081151849596,
      "grad_norm": 0.4366956353187561,
      "learning_rate": 7.512964293009496e-06,
      "loss": 0.0285,
      "step": 774880
    },
    {
      "epoch": 1.2681408456236132,
      "grad_norm": 0.09459833800792694,
      "learning_rate": 7.512898400795978e-06,
      "loss": 0.0196,
      "step": 774900
    },
    {
      "epoch": 1.2681735760622663,
      "grad_norm": 0.4166240692138672,
      "learning_rate": 7.5128325085824614e-06,
      "loss": 0.0244,
      "step": 774920
    },
    {
      "epoch": 1.2682063065009197,
      "grad_norm": 1.093427062034607,
      "learning_rate": 7.512766616368944e-06,
      "loss": 0.0243,
      "step": 774940
    },
    {
      "epoch": 1.268239036939573,
      "grad_norm": 1.0560729503631592,
      "learning_rate": 7.512700724155427e-06,
      "loss": 0.027,
      "step": 774960
    },
    {
      "epoch": 1.2682717673782264,
      "grad_norm": 0.8008614778518677,
      "learning_rate": 7.51263483194191e-06,
      "loss": 0.0244,
      "step": 774980
    },
    {
      "epoch": 1.2683044978168798,
      "grad_norm": 0.9656600952148438,
      "learning_rate": 7.512568939728393e-06,
      "loss": 0.0231,
      "step": 775000
    },
    {
      "epoch": 1.268337228255533,
      "grad_norm": 0.6136729717254639,
      "learning_rate": 7.512503047514875e-06,
      "loss": 0.025,
      "step": 775020
    },
    {
      "epoch": 1.2683699586941863,
      "grad_norm": 0.6835609674453735,
      "learning_rate": 7.512437155301359e-06,
      "loss": 0.0187,
      "step": 775040
    },
    {
      "epoch": 1.2684026891328397,
      "grad_norm": 0.7417985796928406,
      "learning_rate": 7.512371263087842e-06,
      "loss": 0.0183,
      "step": 775060
    },
    {
      "epoch": 1.268435419571493,
      "grad_norm": 0.07107555866241455,
      "learning_rate": 7.512305370874324e-06,
      "loss": 0.0206,
      "step": 775080
    },
    {
      "epoch": 1.2684681500101465,
      "grad_norm": 0.9215882420539856,
      "learning_rate": 7.512239478660808e-06,
      "loss": 0.0201,
      "step": 775100
    },
    {
      "epoch": 1.2685008804487998,
      "grad_norm": 0.5491875410079956,
      "learning_rate": 7.51217358644729e-06,
      "loss": 0.0241,
      "step": 775120
    },
    {
      "epoch": 1.2685336108874532,
      "grad_norm": 0.2286747545003891,
      "learning_rate": 7.512107694233773e-06,
      "loss": 0.0294,
      "step": 775140
    },
    {
      "epoch": 1.2685663413261064,
      "grad_norm": 0.5854521989822388,
      "learning_rate": 7.512041802020256e-06,
      "loss": 0.021,
      "step": 775160
    },
    {
      "epoch": 1.2685990717647597,
      "grad_norm": 0.7677454352378845,
      "learning_rate": 7.511975909806739e-06,
      "loss": 0.0157,
      "step": 775180
    },
    {
      "epoch": 1.268631802203413,
      "grad_norm": 1.2264946699142456,
      "learning_rate": 7.5119100175932215e-06,
      "loss": 0.0186,
      "step": 775200
    },
    {
      "epoch": 1.2686645326420665,
      "grad_norm": 0.4111671447753906,
      "learning_rate": 7.511844125379705e-06,
      "loss": 0.0203,
      "step": 775220
    },
    {
      "epoch": 1.2686972630807198,
      "grad_norm": 0.4798167645931244,
      "learning_rate": 7.511778233166187e-06,
      "loss": 0.0153,
      "step": 775240
    },
    {
      "epoch": 1.2687299935193732,
      "grad_norm": 0.18318022787570953,
      "learning_rate": 7.5117123409526705e-06,
      "loss": 0.029,
      "step": 775260
    },
    {
      "epoch": 1.2687627239580266,
      "grad_norm": 1.3636046648025513,
      "learning_rate": 7.5116464487391524e-06,
      "loss": 0.0208,
      "step": 775280
    },
    {
      "epoch": 1.2687954543966797,
      "grad_norm": 0.297469824552536,
      "learning_rate": 7.511580556525636e-06,
      "loss": 0.0217,
      "step": 775300
    },
    {
      "epoch": 1.268828184835333,
      "grad_norm": 1.0340327024459839,
      "learning_rate": 7.511514664312119e-06,
      "loss": 0.0301,
      "step": 775320
    },
    {
      "epoch": 1.2688609152739865,
      "grad_norm": 0.3940965235233307,
      "learning_rate": 7.5114487720986015e-06,
      "loss": 0.0174,
      "step": 775340
    },
    {
      "epoch": 1.2688936457126399,
      "grad_norm": 0.6953344345092773,
      "learning_rate": 7.511382879885084e-06,
      "loss": 0.0225,
      "step": 775360
    },
    {
      "epoch": 1.2689263761512932,
      "grad_norm": 0.4005046486854553,
      "learning_rate": 7.511316987671568e-06,
      "loss": 0.0233,
      "step": 775380
    },
    {
      "epoch": 1.2689591065899466,
      "grad_norm": 0.7116584777832031,
      "learning_rate": 7.5112510954580506e-06,
      "loss": 0.0176,
      "step": 775400
    },
    {
      "epoch": 1.2689918370286,
      "grad_norm": 0.2326596975326538,
      "learning_rate": 7.511185203244533e-06,
      "loss": 0.0164,
      "step": 775420
    },
    {
      "epoch": 1.2690245674672531,
      "grad_norm": 0.2550318241119385,
      "learning_rate": 7.511119311031017e-06,
      "loss": 0.0272,
      "step": 775440
    },
    {
      "epoch": 1.2690572979059065,
      "grad_norm": 0.8505871295928955,
      "learning_rate": 7.511053418817499e-06,
      "loss": 0.0212,
      "step": 775460
    },
    {
      "epoch": 1.2690900283445599,
      "grad_norm": 1.4031975269317627,
      "learning_rate": 7.510987526603982e-06,
      "loss": 0.0233,
      "step": 775480
    },
    {
      "epoch": 1.2691227587832132,
      "grad_norm": 0.23269341886043549,
      "learning_rate": 7.510921634390464e-06,
      "loss": 0.025,
      "step": 775500
    },
    {
      "epoch": 1.2691554892218666,
      "grad_norm": 0.21108372509479523,
      "learning_rate": 7.510855742176948e-06,
      "loss": 0.0192,
      "step": 775520
    },
    {
      "epoch": 1.2691882196605198,
      "grad_norm": 0.5751911997795105,
      "learning_rate": 7.510789849963431e-06,
      "loss": 0.0174,
      "step": 775540
    },
    {
      "epoch": 1.2692209500991734,
      "grad_norm": 0.18430787324905396,
      "learning_rate": 7.510723957749913e-06,
      "loss": 0.0225,
      "step": 775560
    },
    {
      "epoch": 1.2692536805378265,
      "grad_norm": 0.41477063298225403,
      "learning_rate": 7.510658065536396e-06,
      "loss": 0.0235,
      "step": 775580
    },
    {
      "epoch": 1.2692864109764799,
      "grad_norm": 0.07345996797084808,
      "learning_rate": 7.51059217332288e-06,
      "loss": 0.0163,
      "step": 775600
    },
    {
      "epoch": 1.2693191414151332,
      "grad_norm": 0.5703101754188538,
      "learning_rate": 7.5105262811093616e-06,
      "loss": 0.021,
      "step": 775620
    },
    {
      "epoch": 1.2693518718537866,
      "grad_norm": 1.2242246866226196,
      "learning_rate": 7.510460388895845e-06,
      "loss": 0.0243,
      "step": 775640
    },
    {
      "epoch": 1.26938460229244,
      "grad_norm": 0.2786043584346771,
      "learning_rate": 7.510394496682327e-06,
      "loss": 0.0181,
      "step": 775660
    },
    {
      "epoch": 1.2694173327310931,
      "grad_norm": 0.8116862773895264,
      "learning_rate": 7.510328604468811e-06,
      "loss": 0.0259,
      "step": 775680
    },
    {
      "epoch": 1.2694500631697467,
      "grad_norm": 0.08325459063053131,
      "learning_rate": 7.5102627122552925e-06,
      "loss": 0.0208,
      "step": 775700
    },
    {
      "epoch": 1.2694827936083999,
      "grad_norm": 0.4380221962928772,
      "learning_rate": 7.510196820041776e-06,
      "loss": 0.0178,
      "step": 775720
    },
    {
      "epoch": 1.2695155240470533,
      "grad_norm": 1.4317338466644287,
      "learning_rate": 7.51013092782826e-06,
      "loss": 0.0304,
      "step": 775740
    },
    {
      "epoch": 1.2695482544857066,
      "grad_norm": 0.5127732753753662,
      "learning_rate": 7.510065035614742e-06,
      "loss": 0.0194,
      "step": 775760
    },
    {
      "epoch": 1.26958098492436,
      "grad_norm": 0.818332850933075,
      "learning_rate": 7.509999143401225e-06,
      "loss": 0.0256,
      "step": 775780
    },
    {
      "epoch": 1.2696137153630134,
      "grad_norm": 0.473481148481369,
      "learning_rate": 7.509933251187708e-06,
      "loss": 0.0292,
      "step": 775800
    },
    {
      "epoch": 1.2696464458016665,
      "grad_norm": 0.8845516443252563,
      "learning_rate": 7.509867358974191e-06,
      "loss": 0.0205,
      "step": 775820
    },
    {
      "epoch": 1.26967917624032,
      "grad_norm": 0.43859419226646423,
      "learning_rate": 7.509801466760673e-06,
      "loss": 0.02,
      "step": 775840
    },
    {
      "epoch": 1.2697119066789733,
      "grad_norm": 0.3199785053730011,
      "learning_rate": 7.509735574547157e-06,
      "loss": 0.0156,
      "step": 775860
    },
    {
      "epoch": 1.2697446371176266,
      "grad_norm": 0.5164089798927307,
      "learning_rate": 7.509669682333639e-06,
      "loss": 0.0212,
      "step": 775880
    },
    {
      "epoch": 1.26977736755628,
      "grad_norm": 0.6845666170120239,
      "learning_rate": 7.5096037901201224e-06,
      "loss": 0.0196,
      "step": 775900
    },
    {
      "epoch": 1.2698100979949334,
      "grad_norm": 0.511864960193634,
      "learning_rate": 7.509537897906604e-06,
      "loss": 0.0199,
      "step": 775920
    },
    {
      "epoch": 1.2698428284335868,
      "grad_norm": 0.40389901399612427,
      "learning_rate": 7.509472005693088e-06,
      "loss": 0.0184,
      "step": 775940
    },
    {
      "epoch": 1.26987555887224,
      "grad_norm": 0.3880367577075958,
      "learning_rate": 7.509406113479571e-06,
      "loss": 0.017,
      "step": 775960
    },
    {
      "epoch": 1.2699082893108933,
      "grad_norm": 0.4037927985191345,
      "learning_rate": 7.509340221266053e-06,
      "loss": 0.0261,
      "step": 775980
    },
    {
      "epoch": 1.2699410197495467,
      "grad_norm": 1.0120179653167725,
      "learning_rate": 7.509274329052536e-06,
      "loss": 0.0255,
      "step": 776000
    },
    {
      "epoch": 1.2699737501882,
      "grad_norm": 0.5684539675712585,
      "learning_rate": 7.50920843683902e-06,
      "loss": 0.0189,
      "step": 776020
    },
    {
      "epoch": 1.2700064806268534,
      "grad_norm": 0.543761670589447,
      "learning_rate": 7.509142544625502e-06,
      "loss": 0.0246,
      "step": 776040
    },
    {
      "epoch": 1.2700392110655068,
      "grad_norm": 0.3186807632446289,
      "learning_rate": 7.509076652411985e-06,
      "loss": 0.0299,
      "step": 776060
    },
    {
      "epoch": 1.2700719415041601,
      "grad_norm": 0.2005566507577896,
      "learning_rate": 7.509010760198467e-06,
      "loss": 0.0241,
      "step": 776080
    },
    {
      "epoch": 1.2701046719428133,
      "grad_norm": 0.7649674415588379,
      "learning_rate": 7.508944867984951e-06,
      "loss": 0.0259,
      "step": 776100
    },
    {
      "epoch": 1.2701374023814667,
      "grad_norm": 2.145604372024536,
      "learning_rate": 7.508878975771434e-06,
      "loss": 0.0262,
      "step": 776120
    },
    {
      "epoch": 1.27017013282012,
      "grad_norm": 0.44597259163856506,
      "learning_rate": 7.508813083557916e-06,
      "loss": 0.0147,
      "step": 776140
    },
    {
      "epoch": 1.2702028632587734,
      "grad_norm": 1.4495103359222412,
      "learning_rate": 7.5087471913444e-06,
      "loss": 0.0249,
      "step": 776160
    },
    {
      "epoch": 1.2702355936974268,
      "grad_norm": 0.2263544797897339,
      "learning_rate": 7.5086812991308825e-06,
      "loss": 0.0215,
      "step": 776180
    },
    {
      "epoch": 1.2702683241360802,
      "grad_norm": 1.0526124238967896,
      "learning_rate": 7.508615406917365e-06,
      "loss": 0.0179,
      "step": 776200
    },
    {
      "epoch": 1.2703010545747335,
      "grad_norm": 1.4573988914489746,
      "learning_rate": 7.508549514703848e-06,
      "loss": 0.0191,
      "step": 776220
    },
    {
      "epoch": 1.2703337850133867,
      "grad_norm": 0.36050301790237427,
      "learning_rate": 7.5084836224903316e-06,
      "loss": 0.0172,
      "step": 776240
    },
    {
      "epoch": 1.27036651545204,
      "grad_norm": 0.08529096841812134,
      "learning_rate": 7.5084177302768135e-06,
      "loss": 0.0169,
      "step": 776260
    },
    {
      "epoch": 1.2703992458906934,
      "grad_norm": 0.5951826572418213,
      "learning_rate": 7.508351838063297e-06,
      "loss": 0.0262,
      "step": 776280
    },
    {
      "epoch": 1.2704319763293468,
      "grad_norm": 0.6570534706115723,
      "learning_rate": 7.508285945849779e-06,
      "loss": 0.0175,
      "step": 776300
    },
    {
      "epoch": 1.2704647067680002,
      "grad_norm": 0.43945688009262085,
      "learning_rate": 7.5082200536362625e-06,
      "loss": 0.0188,
      "step": 776320
    },
    {
      "epoch": 1.2704974372066533,
      "grad_norm": 0.33456432819366455,
      "learning_rate": 7.508154161422745e-06,
      "loss": 0.0234,
      "step": 776340
    },
    {
      "epoch": 1.270530167645307,
      "grad_norm": 0.5793330073356628,
      "learning_rate": 7.508088269209228e-06,
      "loss": 0.024,
      "step": 776360
    },
    {
      "epoch": 1.27056289808396,
      "grad_norm": 1.328173041343689,
      "learning_rate": 7.508022376995711e-06,
      "loss": 0.0303,
      "step": 776380
    },
    {
      "epoch": 1.2705956285226134,
      "grad_norm": 0.3339448571205139,
      "learning_rate": 7.507956484782194e-06,
      "loss": 0.0247,
      "step": 776400
    },
    {
      "epoch": 1.2706283589612668,
      "grad_norm": 1.0486574172973633,
      "learning_rate": 7.507890592568676e-06,
      "loss": 0.0285,
      "step": 776420
    },
    {
      "epoch": 1.2706610893999202,
      "grad_norm": 0.9636704325675964,
      "learning_rate": 7.50782470035516e-06,
      "loss": 0.0273,
      "step": 776440
    },
    {
      "epoch": 1.2706938198385735,
      "grad_norm": 0.19085730612277985,
      "learning_rate": 7.507758808141643e-06,
      "loss": 0.0258,
      "step": 776460
    },
    {
      "epoch": 1.2707265502772267,
      "grad_norm": 0.5080751776695251,
      "learning_rate": 7.507692915928125e-06,
      "loss": 0.0198,
      "step": 776480
    },
    {
      "epoch": 1.2707592807158803,
      "grad_norm": 1.5724937915802002,
      "learning_rate": 7.507627023714609e-06,
      "loss": 0.0243,
      "step": 776500
    },
    {
      "epoch": 1.2707920111545334,
      "grad_norm": 0.38771262764930725,
      "learning_rate": 7.507561131501091e-06,
      "loss": 0.0225,
      "step": 776520
    },
    {
      "epoch": 1.2708247415931868,
      "grad_norm": 1.7855814695358276,
      "learning_rate": 7.507495239287574e-06,
      "loss": 0.0229,
      "step": 776540
    },
    {
      "epoch": 1.2708574720318402,
      "grad_norm": 1.7530758380889893,
      "learning_rate": 7.507429347074057e-06,
      "loss": 0.0206,
      "step": 776560
    },
    {
      "epoch": 1.2708902024704936,
      "grad_norm": 0.25617292523384094,
      "learning_rate": 7.50736345486054e-06,
      "loss": 0.0205,
      "step": 776580
    },
    {
      "epoch": 1.270922932909147,
      "grad_norm": 0.6051661968231201,
      "learning_rate": 7.5072975626470226e-06,
      "loss": 0.0305,
      "step": 776600
    },
    {
      "epoch": 1.2709556633478,
      "grad_norm": 0.4635750651359558,
      "learning_rate": 7.507231670433506e-06,
      "loss": 0.0155,
      "step": 776620
    },
    {
      "epoch": 1.2709883937864535,
      "grad_norm": 2.4090471267700195,
      "learning_rate": 7.507165778219988e-06,
      "loss": 0.0272,
      "step": 776640
    },
    {
      "epoch": 1.2710211242251068,
      "grad_norm": 0.8848231434822083,
      "learning_rate": 7.507099886006472e-06,
      "loss": 0.0193,
      "step": 776660
    },
    {
      "epoch": 1.2710538546637602,
      "grad_norm": 0.7331112027168274,
      "learning_rate": 7.5070339937929535e-06,
      "loss": 0.0222,
      "step": 776680
    },
    {
      "epoch": 1.2710865851024136,
      "grad_norm": 0.7347314953804016,
      "learning_rate": 7.506968101579437e-06,
      "loss": 0.0185,
      "step": 776700
    },
    {
      "epoch": 1.271119315541067,
      "grad_norm": 0.37214013934135437,
      "learning_rate": 7.506902209365919e-06,
      "loss": 0.0188,
      "step": 776720
    },
    {
      "epoch": 1.2711520459797203,
      "grad_norm": 0.18156370520591736,
      "learning_rate": 7.506836317152403e-06,
      "loss": 0.0273,
      "step": 776740
    },
    {
      "epoch": 1.2711847764183735,
      "grad_norm": 0.36740991473197937,
      "learning_rate": 7.506770424938885e-06,
      "loss": 0.0259,
      "step": 776760
    },
    {
      "epoch": 1.2712175068570268,
      "grad_norm": 0.8681293725967407,
      "learning_rate": 7.506704532725368e-06,
      "loss": 0.0171,
      "step": 776780
    },
    {
      "epoch": 1.2712502372956802,
      "grad_norm": 0.13806867599487305,
      "learning_rate": 7.506638640511852e-06,
      "loss": 0.0273,
      "step": 776800
    },
    {
      "epoch": 1.2712829677343336,
      "grad_norm": 0.7852770686149597,
      "learning_rate": 7.506572748298334e-06,
      "loss": 0.0248,
      "step": 776820
    },
    {
      "epoch": 1.271315698172987,
      "grad_norm": 0.5767211318016052,
      "learning_rate": 7.506506856084817e-06,
      "loss": 0.0156,
      "step": 776840
    },
    {
      "epoch": 1.2713484286116403,
      "grad_norm": 0.9321359992027283,
      "learning_rate": 7.5064409638713e-06,
      "loss": 0.0265,
      "step": 776860
    },
    {
      "epoch": 1.2713811590502937,
      "grad_norm": 0.742406964302063,
      "learning_rate": 7.5063750716577835e-06,
      "loss": 0.0212,
      "step": 776880
    },
    {
      "epoch": 1.2714138894889468,
      "grad_norm": 1.4321726560592651,
      "learning_rate": 7.506309179444265e-06,
      "loss": 0.0352,
      "step": 776900
    },
    {
      "epoch": 1.2714466199276002,
      "grad_norm": 0.3385360538959503,
      "learning_rate": 7.506243287230749e-06,
      "loss": 0.0275,
      "step": 776920
    },
    {
      "epoch": 1.2714793503662536,
      "grad_norm": 0.46801018714904785,
      "learning_rate": 7.506177395017231e-06,
      "loss": 0.0294,
      "step": 776940
    },
    {
      "epoch": 1.271512080804907,
      "grad_norm": 2.5760669708251953,
      "learning_rate": 7.5061115028037144e-06,
      "loss": 0.0228,
      "step": 776960
    },
    {
      "epoch": 1.2715448112435603,
      "grad_norm": 0.7345691323280334,
      "learning_rate": 7.506045610590197e-06,
      "loss": 0.0195,
      "step": 776980
    },
    {
      "epoch": 1.2715775416822137,
      "grad_norm": 1.2597360610961914,
      "learning_rate": 7.50597971837668e-06,
      "loss": 0.0183,
      "step": 777000
    },
    {
      "epoch": 1.271610272120867,
      "grad_norm": 0.2848425805568695,
      "learning_rate": 7.505913826163163e-06,
      "loss": 0.0239,
      "step": 777020
    },
    {
      "epoch": 1.2716430025595202,
      "grad_norm": 0.39082178473472595,
      "learning_rate": 7.505847933949646e-06,
      "loss": 0.0216,
      "step": 777040
    },
    {
      "epoch": 1.2716757329981736,
      "grad_norm": 0.753365159034729,
      "learning_rate": 7.505782041736128e-06,
      "loss": 0.0274,
      "step": 777060
    },
    {
      "epoch": 1.271708463436827,
      "grad_norm": 0.6142786741256714,
      "learning_rate": 7.505716149522612e-06,
      "loss": 0.0216,
      "step": 777080
    },
    {
      "epoch": 1.2717411938754803,
      "grad_norm": 1.4554377794265747,
      "learning_rate": 7.505650257309094e-06,
      "loss": 0.0167,
      "step": 777100
    },
    {
      "epoch": 1.2717739243141337,
      "grad_norm": 0.9503090381622314,
      "learning_rate": 7.505584365095577e-06,
      "loss": 0.0244,
      "step": 777120
    },
    {
      "epoch": 1.2718066547527869,
      "grad_norm": 0.9032137393951416,
      "learning_rate": 7.50551847288206e-06,
      "loss": 0.0273,
      "step": 777140
    },
    {
      "epoch": 1.2718393851914405,
      "grad_norm": 0.8529700636863708,
      "learning_rate": 7.505452580668543e-06,
      "loss": 0.0221,
      "step": 777160
    },
    {
      "epoch": 1.2718721156300936,
      "grad_norm": 1.9856449365615845,
      "learning_rate": 7.505386688455026e-06,
      "loss": 0.03,
      "step": 777180
    },
    {
      "epoch": 1.271904846068747,
      "grad_norm": 0.3748392164707184,
      "learning_rate": 7.505320796241509e-06,
      "loss": 0.0238,
      "step": 777200
    },
    {
      "epoch": 1.2719375765074004,
      "grad_norm": 0.07684153318405151,
      "learning_rate": 7.505254904027992e-06,
      "loss": 0.016,
      "step": 777220
    },
    {
      "epoch": 1.2719703069460537,
      "grad_norm": 1.920333743095398,
      "learning_rate": 7.5051890118144745e-06,
      "loss": 0.0217,
      "step": 777240
    },
    {
      "epoch": 1.272003037384707,
      "grad_norm": 0.24541857838630676,
      "learning_rate": 7.505123119600958e-06,
      "loss": 0.0249,
      "step": 777260
    },
    {
      "epoch": 1.2720357678233603,
      "grad_norm": 1.7850390672683716,
      "learning_rate": 7.50505722738744e-06,
      "loss": 0.0187,
      "step": 777280
    },
    {
      "epoch": 1.2720684982620136,
      "grad_norm": 0.7204515337944031,
      "learning_rate": 7.5049913351739235e-06,
      "loss": 0.0232,
      "step": 777300
    },
    {
      "epoch": 1.272101228700667,
      "grad_norm": 0.5775535106658936,
      "learning_rate": 7.5049254429604054e-06,
      "loss": 0.0132,
      "step": 777320
    },
    {
      "epoch": 1.2721339591393204,
      "grad_norm": 2.1669867038726807,
      "learning_rate": 7.504859550746889e-06,
      "loss": 0.023,
      "step": 777340
    },
    {
      "epoch": 1.2721666895779737,
      "grad_norm": 0.9777066111564636,
      "learning_rate": 7.504793658533372e-06,
      "loss": 0.02,
      "step": 777360
    },
    {
      "epoch": 1.2721994200166271,
      "grad_norm": 2.1097583770751953,
      "learning_rate": 7.5047277663198545e-06,
      "loss": 0.034,
      "step": 777380
    },
    {
      "epoch": 1.2722321504552805,
      "grad_norm": 1.7076610326766968,
      "learning_rate": 7.504661874106337e-06,
      "loss": 0.0262,
      "step": 777400
    },
    {
      "epoch": 1.2722648808939336,
      "grad_norm": 0.17341023683547974,
      "learning_rate": 7.504595981892821e-06,
      "loss": 0.0205,
      "step": 777420
    },
    {
      "epoch": 1.272297611332587,
      "grad_norm": 0.8441153764724731,
      "learning_rate": 7.504530089679303e-06,
      "loss": 0.0201,
      "step": 777440
    },
    {
      "epoch": 1.2723303417712404,
      "grad_norm": 0.677405595779419,
      "learning_rate": 7.504464197465786e-06,
      "loss": 0.0212,
      "step": 777460
    },
    {
      "epoch": 1.2723630722098938,
      "grad_norm": 0.14739404618740082,
      "learning_rate": 7.504398305252268e-06,
      "loss": 0.022,
      "step": 777480
    },
    {
      "epoch": 1.2723958026485471,
      "grad_norm": 0.3939836919307709,
      "learning_rate": 7.504332413038752e-06,
      "loss": 0.0184,
      "step": 777500
    },
    {
      "epoch": 1.2724285330872005,
      "grad_norm": 0.1674734503030777,
      "learning_rate": 7.504266520825235e-06,
      "loss": 0.0178,
      "step": 777520
    },
    {
      "epoch": 1.2724612635258539,
      "grad_norm": 0.6359480023384094,
      "learning_rate": 7.504200628611717e-06,
      "loss": 0.0256,
      "step": 777540
    },
    {
      "epoch": 1.272493993964507,
      "grad_norm": 1.308732509613037,
      "learning_rate": 7.504134736398201e-06,
      "loss": 0.0181,
      "step": 777560
    },
    {
      "epoch": 1.2725267244031604,
      "grad_norm": 0.5560048818588257,
      "learning_rate": 7.504068844184684e-06,
      "loss": 0.0216,
      "step": 777580
    },
    {
      "epoch": 1.2725594548418138,
      "grad_norm": 0.8908824324607849,
      "learning_rate": 7.504002951971166e-06,
      "loss": 0.0261,
      "step": 777600
    },
    {
      "epoch": 1.2725921852804671,
      "grad_norm": 0.18878613412380219,
      "learning_rate": 7.503937059757649e-06,
      "loss": 0.0214,
      "step": 777620
    },
    {
      "epoch": 1.2726249157191205,
      "grad_norm": 0.7555827498435974,
      "learning_rate": 7.503871167544133e-06,
      "loss": 0.0184,
      "step": 777640
    },
    {
      "epoch": 1.2726576461577739,
      "grad_norm": 0.12484325468540192,
      "learning_rate": 7.5038052753306146e-06,
      "loss": 0.0248,
      "step": 777660
    },
    {
      "epoch": 1.2726903765964273,
      "grad_norm": 1.164970874786377,
      "learning_rate": 7.503739383117098e-06,
      "loss": 0.0215,
      "step": 777680
    },
    {
      "epoch": 1.2727231070350804,
      "grad_norm": 1.9039907455444336,
      "learning_rate": 7.50367349090358e-06,
      "loss": 0.0312,
      "step": 777700
    },
    {
      "epoch": 1.2727558374737338,
      "grad_norm": 0.20640255510807037,
      "learning_rate": 7.503607598690064e-06,
      "loss": 0.0236,
      "step": 777720
    },
    {
      "epoch": 1.2727885679123871,
      "grad_norm": 1.3131542205810547,
      "learning_rate": 7.5035417064765455e-06,
      "loss": 0.0166,
      "step": 777740
    },
    {
      "epoch": 1.2728212983510405,
      "grad_norm": 0.40772464871406555,
      "learning_rate": 7.503475814263029e-06,
      "loss": 0.0237,
      "step": 777760
    },
    {
      "epoch": 1.272854028789694,
      "grad_norm": 0.1253325194120407,
      "learning_rate": 7.503409922049512e-06,
      "loss": 0.0183,
      "step": 777780
    },
    {
      "epoch": 1.272886759228347,
      "grad_norm": 0.25572264194488525,
      "learning_rate": 7.503344029835995e-06,
      "loss": 0.0226,
      "step": 777800
    },
    {
      "epoch": 1.2729194896670006,
      "grad_norm": 1.4412059783935547,
      "learning_rate": 7.503278137622477e-06,
      "loss": 0.0227,
      "step": 777820
    },
    {
      "epoch": 1.2729522201056538,
      "grad_norm": 2.1034834384918213,
      "learning_rate": 7.503212245408961e-06,
      "loss": 0.0259,
      "step": 777840
    },
    {
      "epoch": 1.2729849505443072,
      "grad_norm": 0.6558813452720642,
      "learning_rate": 7.503146353195444e-06,
      "loss": 0.0204,
      "step": 777860
    },
    {
      "epoch": 1.2730176809829605,
      "grad_norm": 0.41352033615112305,
      "learning_rate": 7.503080460981926e-06,
      "loss": 0.0207,
      "step": 777880
    },
    {
      "epoch": 1.273050411421614,
      "grad_norm": 0.5962249636650085,
      "learning_rate": 7.50301456876841e-06,
      "loss": 0.0248,
      "step": 777900
    },
    {
      "epoch": 1.2730831418602673,
      "grad_norm": 0.5876195430755615,
      "learning_rate": 7.502948676554892e-06,
      "loss": 0.0219,
      "step": 777920
    },
    {
      "epoch": 1.2731158722989204,
      "grad_norm": 6.234110355377197,
      "learning_rate": 7.5028827843413755e-06,
      "loss": 0.0249,
      "step": 777940
    },
    {
      "epoch": 1.273148602737574,
      "grad_norm": 1.3926233053207397,
      "learning_rate": 7.502816892127857e-06,
      "loss": 0.0313,
      "step": 777960
    },
    {
      "epoch": 1.2731813331762272,
      "grad_norm": 0.13169121742248535,
      "learning_rate": 7.502750999914341e-06,
      "loss": 0.0268,
      "step": 777980
    },
    {
      "epoch": 1.2732140636148805,
      "grad_norm": 0.6941016912460327,
      "learning_rate": 7.502685107700824e-06,
      "loss": 0.0165,
      "step": 778000
    },
    {
      "epoch": 1.273246794053534,
      "grad_norm": 1.2905902862548828,
      "learning_rate": 7.502619215487306e-06,
      "loss": 0.0242,
      "step": 778020
    },
    {
      "epoch": 1.2732795244921873,
      "grad_norm": 2.165602684020996,
      "learning_rate": 7.502553323273789e-06,
      "loss": 0.0193,
      "step": 778040
    },
    {
      "epoch": 1.2733122549308407,
      "grad_norm": 0.4550761878490448,
      "learning_rate": 7.502487431060273e-06,
      "loss": 0.0293,
      "step": 778060
    },
    {
      "epoch": 1.2733449853694938,
      "grad_norm": 1.0546663999557495,
      "learning_rate": 7.502421538846755e-06,
      "loss": 0.03,
      "step": 778080
    },
    {
      "epoch": 1.2733777158081472,
      "grad_norm": 0.24114210903644562,
      "learning_rate": 7.502355646633238e-06,
      "loss": 0.0285,
      "step": 778100
    },
    {
      "epoch": 1.2734104462468006,
      "grad_norm": 1.008053183555603,
      "learning_rate": 7.50228975441972e-06,
      "loss": 0.0246,
      "step": 778120
    },
    {
      "epoch": 1.273443176685454,
      "grad_norm": 0.2540923058986664,
      "learning_rate": 7.502223862206204e-06,
      "loss": 0.02,
      "step": 778140
    },
    {
      "epoch": 1.2734759071241073,
      "grad_norm": 0.45016202330589294,
      "learning_rate": 7.5021579699926864e-06,
      "loss": 0.0201,
      "step": 778160
    },
    {
      "epoch": 1.2735086375627607,
      "grad_norm": 1.3566572666168213,
      "learning_rate": 7.502092077779169e-06,
      "loss": 0.0249,
      "step": 778180
    },
    {
      "epoch": 1.273541368001414,
      "grad_norm": 0.24057155847549438,
      "learning_rate": 7.502026185565653e-06,
      "loss": 0.0144,
      "step": 778200
    },
    {
      "epoch": 1.2735740984400672,
      "grad_norm": 0.08257683366537094,
      "learning_rate": 7.5019602933521355e-06,
      "loss": 0.0164,
      "step": 778220
    },
    {
      "epoch": 1.2736068288787206,
      "grad_norm": 0.7262140512466431,
      "learning_rate": 7.501894401138618e-06,
      "loss": 0.0214,
      "step": 778240
    },
    {
      "epoch": 1.273639559317374,
      "grad_norm": 0.43365731835365295,
      "learning_rate": 7.501828508925101e-06,
      "loss": 0.0155,
      "step": 778260
    },
    {
      "epoch": 1.2736722897560273,
      "grad_norm": 1.0837255716323853,
      "learning_rate": 7.5017626167115846e-06,
      "loss": 0.0252,
      "step": 778280
    },
    {
      "epoch": 1.2737050201946807,
      "grad_norm": 1.02647864818573,
      "learning_rate": 7.5016967244980665e-06,
      "loss": 0.0264,
      "step": 778300
    },
    {
      "epoch": 1.273737750633334,
      "grad_norm": 0.2310899794101715,
      "learning_rate": 7.50163083228455e-06,
      "loss": 0.0233,
      "step": 778320
    },
    {
      "epoch": 1.2737704810719874,
      "grad_norm": 0.36376824975013733,
      "learning_rate": 7.501564940071032e-06,
      "loss": 0.0122,
      "step": 778340
    },
    {
      "epoch": 1.2738032115106406,
      "grad_norm": 0.6149510741233826,
      "learning_rate": 7.5014990478575155e-06,
      "loss": 0.0285,
      "step": 778360
    },
    {
      "epoch": 1.273835941949294,
      "grad_norm": 1.4235002994537354,
      "learning_rate": 7.501433155643998e-06,
      "loss": 0.0262,
      "step": 778380
    },
    {
      "epoch": 1.2738686723879473,
      "grad_norm": 0.3291909992694855,
      "learning_rate": 7.501367263430481e-06,
      "loss": 0.023,
      "step": 778400
    },
    {
      "epoch": 1.2739014028266007,
      "grad_norm": 1.2178062200546265,
      "learning_rate": 7.501301371216964e-06,
      "loss": 0.0296,
      "step": 778420
    },
    {
      "epoch": 1.273934133265254,
      "grad_norm": 0.37863028049468994,
      "learning_rate": 7.501235479003447e-06,
      "loss": 0.0199,
      "step": 778440
    },
    {
      "epoch": 1.2739668637039074,
      "grad_norm": 0.8826412558555603,
      "learning_rate": 7.501169586789929e-06,
      "loss": 0.0238,
      "step": 778460
    },
    {
      "epoch": 1.2739995941425608,
      "grad_norm": 1.9326767921447754,
      "learning_rate": 7.501103694576413e-06,
      "loss": 0.0285,
      "step": 778480
    },
    {
      "epoch": 1.274032324581214,
      "grad_norm": 1.0462971925735474,
      "learning_rate": 7.501037802362895e-06,
      "loss": 0.02,
      "step": 778500
    },
    {
      "epoch": 1.2740650550198673,
      "grad_norm": 0.4399309456348419,
      "learning_rate": 7.500971910149378e-06,
      "loss": 0.026,
      "step": 778520
    },
    {
      "epoch": 1.2740977854585207,
      "grad_norm": 0.9059949517250061,
      "learning_rate": 7.50090601793586e-06,
      "loss": 0.0201,
      "step": 778540
    },
    {
      "epoch": 1.274130515897174,
      "grad_norm": 0.7233874201774597,
      "learning_rate": 7.500840125722344e-06,
      "loss": 0.0277,
      "step": 778560
    },
    {
      "epoch": 1.2741632463358274,
      "grad_norm": 0.14522041380405426,
      "learning_rate": 7.500774233508827e-06,
      "loss": 0.0191,
      "step": 778580
    },
    {
      "epoch": 1.2741959767744806,
      "grad_norm": 1.0217006206512451,
      "learning_rate": 7.500708341295309e-06,
      "loss": 0.0204,
      "step": 778600
    },
    {
      "epoch": 1.2742287072131342,
      "grad_norm": 1.3090277910232544,
      "learning_rate": 7.500642449081793e-06,
      "loss": 0.0439,
      "step": 778620
    },
    {
      "epoch": 1.2742614376517873,
      "grad_norm": 0.39389872550964355,
      "learning_rate": 7.500576556868276e-06,
      "loss": 0.0244,
      "step": 778640
    },
    {
      "epoch": 1.2742941680904407,
      "grad_norm": 0.3720519244670868,
      "learning_rate": 7.500510664654758e-06,
      "loss": 0.0146,
      "step": 778660
    },
    {
      "epoch": 1.274326898529094,
      "grad_norm": 0.7239547371864319,
      "learning_rate": 7.500444772441241e-06,
      "loss": 0.0246,
      "step": 778680
    },
    {
      "epoch": 1.2743596289677475,
      "grad_norm": 0.21425439417362213,
      "learning_rate": 7.500378880227725e-06,
      "loss": 0.0207,
      "step": 778700
    },
    {
      "epoch": 1.2743923594064008,
      "grad_norm": 0.7148699760437012,
      "learning_rate": 7.5003129880142065e-06,
      "loss": 0.0324,
      "step": 778720
    },
    {
      "epoch": 1.274425089845054,
      "grad_norm": 1.193010926246643,
      "learning_rate": 7.50024709580069e-06,
      "loss": 0.0258,
      "step": 778740
    },
    {
      "epoch": 1.2744578202837076,
      "grad_norm": 0.39376693964004517,
      "learning_rate": 7.500181203587172e-06,
      "loss": 0.0226,
      "step": 778760
    },
    {
      "epoch": 1.2744905507223607,
      "grad_norm": 0.6153623461723328,
      "learning_rate": 7.500115311373656e-06,
      "loss": 0.0189,
      "step": 778780
    },
    {
      "epoch": 1.274523281161014,
      "grad_norm": 0.6239001154899597,
      "learning_rate": 7.500049419160138e-06,
      "loss": 0.0284,
      "step": 778800
    },
    {
      "epoch": 1.2745560115996675,
      "grad_norm": 0.9280317425727844,
      "learning_rate": 7.499983526946621e-06,
      "loss": 0.0177,
      "step": 778820
    },
    {
      "epoch": 1.2745887420383208,
      "grad_norm": 2.308417558670044,
      "learning_rate": 7.499917634733104e-06,
      "loss": 0.0206,
      "step": 778840
    },
    {
      "epoch": 1.2746214724769742,
      "grad_norm": 0.3372943103313446,
      "learning_rate": 7.499851742519587e-06,
      "loss": 0.0208,
      "step": 778860
    },
    {
      "epoch": 1.2746542029156274,
      "grad_norm": 0.7085797786712646,
      "learning_rate": 7.499785850306069e-06,
      "loss": 0.0194,
      "step": 778880
    },
    {
      "epoch": 1.2746869333542807,
      "grad_norm": 0.7056772112846375,
      "learning_rate": 7.499719958092553e-06,
      "loss": 0.0235,
      "step": 778900
    },
    {
      "epoch": 1.274719663792934,
      "grad_norm": 0.5854036808013916,
      "learning_rate": 7.4996540658790365e-06,
      "loss": 0.0201,
      "step": 778920
    },
    {
      "epoch": 1.2747523942315875,
      "grad_norm": 0.5907379984855652,
      "learning_rate": 7.499588173665518e-06,
      "loss": 0.0355,
      "step": 778940
    },
    {
      "epoch": 1.2747851246702409,
      "grad_norm": 0.4008939564228058,
      "learning_rate": 7.499522281452002e-06,
      "loss": 0.0204,
      "step": 778960
    },
    {
      "epoch": 1.2748178551088942,
      "grad_norm": 0.5605792999267578,
      "learning_rate": 7.499456389238484e-06,
      "loss": 0.0228,
      "step": 778980
    },
    {
      "epoch": 1.2748505855475476,
      "grad_norm": 0.6544992327690125,
      "learning_rate": 7.4993904970249674e-06,
      "loss": 0.0202,
      "step": 779000
    },
    {
      "epoch": 1.2748833159862007,
      "grad_norm": 0.45237359404563904,
      "learning_rate": 7.49932460481145e-06,
      "loss": 0.0142,
      "step": 779020
    },
    {
      "epoch": 1.2749160464248541,
      "grad_norm": 0.7000222206115723,
      "learning_rate": 7.499258712597933e-06,
      "loss": 0.0214,
      "step": 779040
    },
    {
      "epoch": 1.2749487768635075,
      "grad_norm": 0.4716125726699829,
      "learning_rate": 7.499192820384416e-06,
      "loss": 0.0233,
      "step": 779060
    },
    {
      "epoch": 1.2749815073021609,
      "grad_norm": 0.3593316674232483,
      "learning_rate": 7.499126928170899e-06,
      "loss": 0.0255,
      "step": 779080
    },
    {
      "epoch": 1.2750142377408142,
      "grad_norm": 0.4495624005794525,
      "learning_rate": 7.499061035957381e-06,
      "loss": 0.0216,
      "step": 779100
    },
    {
      "epoch": 1.2750469681794676,
      "grad_norm": 0.2575111389160156,
      "learning_rate": 7.498995143743865e-06,
      "loss": 0.0157,
      "step": 779120
    },
    {
      "epoch": 1.275079698618121,
      "grad_norm": 0.5354599952697754,
      "learning_rate": 7.498929251530347e-06,
      "loss": 0.0271,
      "step": 779140
    },
    {
      "epoch": 1.2751124290567741,
      "grad_norm": 0.9653598070144653,
      "learning_rate": 7.49886335931683e-06,
      "loss": 0.0274,
      "step": 779160
    },
    {
      "epoch": 1.2751451594954275,
      "grad_norm": 0.58409184217453,
      "learning_rate": 7.498797467103313e-06,
      "loss": 0.0263,
      "step": 779180
    },
    {
      "epoch": 1.2751778899340809,
      "grad_norm": 0.3878779113292694,
      "learning_rate": 7.498731574889796e-06,
      "loss": 0.024,
      "step": 779200
    },
    {
      "epoch": 1.2752106203727342,
      "grad_norm": 0.2884071469306946,
      "learning_rate": 7.498665682676278e-06,
      "loss": 0.0302,
      "step": 779220
    },
    {
      "epoch": 1.2752433508113876,
      "grad_norm": 0.9592186212539673,
      "learning_rate": 7.498599790462762e-06,
      "loss": 0.0172,
      "step": 779240
    },
    {
      "epoch": 1.275276081250041,
      "grad_norm": 0.4912624955177307,
      "learning_rate": 7.498533898249245e-06,
      "loss": 0.0275,
      "step": 779260
    },
    {
      "epoch": 1.2753088116886944,
      "grad_norm": 0.477074533700943,
      "learning_rate": 7.4984680060357275e-06,
      "loss": 0.0269,
      "step": 779280
    },
    {
      "epoch": 1.2753415421273475,
      "grad_norm": 0.877227783203125,
      "learning_rate": 7.498402113822211e-06,
      "loss": 0.017,
      "step": 779300
    },
    {
      "epoch": 1.2753742725660009,
      "grad_norm": 0.8539872169494629,
      "learning_rate": 7.498336221608693e-06,
      "loss": 0.0283,
      "step": 779320
    },
    {
      "epoch": 1.2754070030046543,
      "grad_norm": 0.24086982011795044,
      "learning_rate": 7.4982703293951766e-06,
      "loss": 0.019,
      "step": 779340
    },
    {
      "epoch": 1.2754397334433076,
      "grad_norm": 1.1196144819259644,
      "learning_rate": 7.4982044371816584e-06,
      "loss": 0.0203,
      "step": 779360
    },
    {
      "epoch": 1.275472463881961,
      "grad_norm": 1.5365285873413086,
      "learning_rate": 7.498138544968142e-06,
      "loss": 0.0231,
      "step": 779380
    },
    {
      "epoch": 1.2755051943206142,
      "grad_norm": 0.5584068894386292,
      "learning_rate": 7.498072652754625e-06,
      "loss": 0.0199,
      "step": 779400
    },
    {
      "epoch": 1.2755379247592677,
      "grad_norm": 0.5200559496879578,
      "learning_rate": 7.4980067605411075e-06,
      "loss": 0.0299,
      "step": 779420
    },
    {
      "epoch": 1.275570655197921,
      "grad_norm": 1.0168499946594238,
      "learning_rate": 7.49794086832759e-06,
      "loss": 0.0297,
      "step": 779440
    },
    {
      "epoch": 1.2756033856365743,
      "grad_norm": 1.1861129999160767,
      "learning_rate": 7.497874976114074e-06,
      "loss": 0.0196,
      "step": 779460
    },
    {
      "epoch": 1.2756361160752276,
      "grad_norm": 0.5585870146751404,
      "learning_rate": 7.497809083900556e-06,
      "loss": 0.0202,
      "step": 779480
    },
    {
      "epoch": 1.275668846513881,
      "grad_norm": 0.24467048048973083,
      "learning_rate": 7.497743191687039e-06,
      "loss": 0.0197,
      "step": 779500
    },
    {
      "epoch": 1.2757015769525344,
      "grad_norm": 0.5769041180610657,
      "learning_rate": 7.497677299473521e-06,
      "loss": 0.0173,
      "step": 779520
    },
    {
      "epoch": 1.2757343073911875,
      "grad_norm": 0.5381004810333252,
      "learning_rate": 7.497611407260005e-06,
      "loss": 0.0225,
      "step": 779540
    },
    {
      "epoch": 1.2757670378298411,
      "grad_norm": 1.758978009223938,
      "learning_rate": 7.497545515046487e-06,
      "loss": 0.0207,
      "step": 779560
    },
    {
      "epoch": 1.2757997682684943,
      "grad_norm": 1.7431387901306152,
      "learning_rate": 7.49747962283297e-06,
      "loss": 0.0246,
      "step": 779580
    },
    {
      "epoch": 1.2758324987071477,
      "grad_norm": 0.9244585037231445,
      "learning_rate": 7.497413730619453e-06,
      "loss": 0.0276,
      "step": 779600
    },
    {
      "epoch": 1.275865229145801,
      "grad_norm": 0.5970335602760315,
      "learning_rate": 7.497347838405936e-06,
      "loss": 0.0226,
      "step": 779620
    },
    {
      "epoch": 1.2758979595844544,
      "grad_norm": 0.7162030339241028,
      "learning_rate": 7.497281946192419e-06,
      "loss": 0.0241,
      "step": 779640
    },
    {
      "epoch": 1.2759306900231078,
      "grad_norm": 1.4705696105957031,
      "learning_rate": 7.497216053978902e-06,
      "loss": 0.0233,
      "step": 779660
    },
    {
      "epoch": 1.275963420461761,
      "grad_norm": 0.6788955926895142,
      "learning_rate": 7.497150161765385e-06,
      "loss": 0.0182,
      "step": 779680
    },
    {
      "epoch": 1.2759961509004143,
      "grad_norm": 0.6361129879951477,
      "learning_rate": 7.4970842695518676e-06,
      "loss": 0.03,
      "step": 779700
    },
    {
      "epoch": 1.2760288813390677,
      "grad_norm": 0.267122358083725,
      "learning_rate": 7.497018377338351e-06,
      "loss": 0.0264,
      "step": 779720
    },
    {
      "epoch": 1.276061611777721,
      "grad_norm": 5.702361583709717,
      "learning_rate": 7.496952485124833e-06,
      "loss": 0.0186,
      "step": 779740
    },
    {
      "epoch": 1.2760943422163744,
      "grad_norm": 0.9244524836540222,
      "learning_rate": 7.496886592911317e-06,
      "loss": 0.0281,
      "step": 779760
    },
    {
      "epoch": 1.2761270726550278,
      "grad_norm": 0.458843857049942,
      "learning_rate": 7.4968207006977985e-06,
      "loss": 0.0321,
      "step": 779780
    },
    {
      "epoch": 1.2761598030936812,
      "grad_norm": 0.8305063247680664,
      "learning_rate": 7.496754808484282e-06,
      "loss": 0.0288,
      "step": 779800
    },
    {
      "epoch": 1.2761925335323343,
      "grad_norm": 1.6793336868286133,
      "learning_rate": 7.496688916270765e-06,
      "loss": 0.0187,
      "step": 779820
    },
    {
      "epoch": 1.2762252639709877,
      "grad_norm": 0.7635343074798584,
      "learning_rate": 7.496623024057248e-06,
      "loss": 0.0173,
      "step": 779840
    },
    {
      "epoch": 1.276257994409641,
      "grad_norm": 0.8958309292793274,
      "learning_rate": 7.49655713184373e-06,
      "loss": 0.0204,
      "step": 779860
    },
    {
      "epoch": 1.2762907248482944,
      "grad_norm": 0.1405002921819687,
      "learning_rate": 7.496491239630214e-06,
      "loss": 0.0218,
      "step": 779880
    },
    {
      "epoch": 1.2763234552869478,
      "grad_norm": 0.2735360562801361,
      "learning_rate": 7.496425347416696e-06,
      "loss": 0.0219,
      "step": 779900
    },
    {
      "epoch": 1.2763561857256012,
      "grad_norm": 0.2330172061920166,
      "learning_rate": 7.496359455203179e-06,
      "loss": 0.0212,
      "step": 779920
    },
    {
      "epoch": 1.2763889161642545,
      "grad_norm": 0.75592440366745,
      "learning_rate": 7.496293562989661e-06,
      "loss": 0.0177,
      "step": 779940
    },
    {
      "epoch": 1.2764216466029077,
      "grad_norm": 0.36925622820854187,
      "learning_rate": 7.496227670776145e-06,
      "loss": 0.0256,
      "step": 779960
    },
    {
      "epoch": 1.276454377041561,
      "grad_norm": 0.584309458732605,
      "learning_rate": 7.4961617785626285e-06,
      "loss": 0.0179,
      "step": 779980
    },
    {
      "epoch": 1.2764871074802144,
      "grad_norm": 0.514739453792572,
      "learning_rate": 7.49609588634911e-06,
      "loss": 0.0207,
      "step": 780000
    },
    {
      "epoch": 1.2765198379188678,
      "grad_norm": 1.6266322135925293,
      "learning_rate": 7.496029994135594e-06,
      "loss": 0.0219,
      "step": 780020
    },
    {
      "epoch": 1.2765525683575212,
      "grad_norm": 0.6837776899337769,
      "learning_rate": 7.495964101922077e-06,
      "loss": 0.0259,
      "step": 780040
    },
    {
      "epoch": 1.2765852987961743,
      "grad_norm": 1.0081896781921387,
      "learning_rate": 7.495898209708559e-06,
      "loss": 0.019,
      "step": 780060
    },
    {
      "epoch": 1.276618029234828,
      "grad_norm": 0.23478926718235016,
      "learning_rate": 7.495832317495042e-06,
      "loss": 0.0241,
      "step": 780080
    },
    {
      "epoch": 1.276650759673481,
      "grad_norm": 1.1588902473449707,
      "learning_rate": 7.495766425281526e-06,
      "loss": 0.0282,
      "step": 780100
    },
    {
      "epoch": 1.2766834901121344,
      "grad_norm": 0.32175785303115845,
      "learning_rate": 7.495700533068008e-06,
      "loss": 0.0191,
      "step": 780120
    },
    {
      "epoch": 1.2767162205507878,
      "grad_norm": 0.7532221078872681,
      "learning_rate": 7.495634640854491e-06,
      "loss": 0.0281,
      "step": 780140
    },
    {
      "epoch": 1.2767489509894412,
      "grad_norm": 1.4127331972122192,
      "learning_rate": 7.495568748640973e-06,
      "loss": 0.0231,
      "step": 780160
    },
    {
      "epoch": 1.2767816814280946,
      "grad_norm": 0.932694137096405,
      "learning_rate": 7.495502856427457e-06,
      "loss": 0.0211,
      "step": 780180
    },
    {
      "epoch": 1.2768144118667477,
      "grad_norm": 1.1145862340927124,
      "learning_rate": 7.4954369642139394e-06,
      "loss": 0.0239,
      "step": 780200
    },
    {
      "epoch": 1.2768471423054013,
      "grad_norm": 0.8177206516265869,
      "learning_rate": 7.495371072000422e-06,
      "loss": 0.0173,
      "step": 780220
    },
    {
      "epoch": 1.2768798727440545,
      "grad_norm": 1.0127928256988525,
      "learning_rate": 7.495305179786905e-06,
      "loss": 0.0191,
      "step": 780240
    },
    {
      "epoch": 1.2769126031827078,
      "grad_norm": 0.3758925199508667,
      "learning_rate": 7.4952392875733885e-06,
      "loss": 0.0283,
      "step": 780260
    },
    {
      "epoch": 1.2769453336213612,
      "grad_norm": 0.2780478596687317,
      "learning_rate": 7.49517339535987e-06,
      "loss": 0.0224,
      "step": 780280
    },
    {
      "epoch": 1.2769780640600146,
      "grad_norm": 0.9230303764343262,
      "learning_rate": 7.495107503146354e-06,
      "loss": 0.0226,
      "step": 780300
    },
    {
      "epoch": 1.277010794498668,
      "grad_norm": 1.2986716032028198,
      "learning_rate": 7.495041610932838e-06,
      "loss": 0.0295,
      "step": 780320
    },
    {
      "epoch": 1.277043524937321,
      "grad_norm": 0.30235934257507324,
      "learning_rate": 7.4949757187193195e-06,
      "loss": 0.0266,
      "step": 780340
    },
    {
      "epoch": 1.2770762553759745,
      "grad_norm": 1.9502569437026978,
      "learning_rate": 7.494909826505803e-06,
      "loss": 0.0215,
      "step": 780360
    },
    {
      "epoch": 1.2771089858146278,
      "grad_norm": 0.16896730661392212,
      "learning_rate": 7.494843934292285e-06,
      "loss": 0.0202,
      "step": 780380
    },
    {
      "epoch": 1.2771417162532812,
      "grad_norm": 0.3133169114589691,
      "learning_rate": 7.4947780420787685e-06,
      "loss": 0.0284,
      "step": 780400
    },
    {
      "epoch": 1.2771744466919346,
      "grad_norm": 0.5023841261863708,
      "learning_rate": 7.494712149865251e-06,
      "loss": 0.0238,
      "step": 780420
    },
    {
      "epoch": 1.277207177130588,
      "grad_norm": 0.22522331774234772,
      "learning_rate": 7.494646257651734e-06,
      "loss": 0.0216,
      "step": 780440
    },
    {
      "epoch": 1.2772399075692413,
      "grad_norm": 0.3107352554798126,
      "learning_rate": 7.494580365438217e-06,
      "loss": 0.0219,
      "step": 780460
    },
    {
      "epoch": 1.2772726380078945,
      "grad_norm": 0.3387797772884369,
      "learning_rate": 7.4945144732247e-06,
      "loss": 0.0195,
      "step": 780480
    },
    {
      "epoch": 1.2773053684465479,
      "grad_norm": 2.3917698860168457,
      "learning_rate": 7.494448581011182e-06,
      "loss": 0.0238,
      "step": 780500
    },
    {
      "epoch": 1.2773380988852012,
      "grad_norm": 1.6238991022109985,
      "learning_rate": 7.494382688797666e-06,
      "loss": 0.0193,
      "step": 780520
    },
    {
      "epoch": 1.2773708293238546,
      "grad_norm": 0.8449086546897888,
      "learning_rate": 7.494316796584148e-06,
      "loss": 0.0263,
      "step": 780540
    },
    {
      "epoch": 1.277403559762508,
      "grad_norm": 1.3058934211730957,
      "learning_rate": 7.494250904370631e-06,
      "loss": 0.0301,
      "step": 780560
    },
    {
      "epoch": 1.2774362902011613,
      "grad_norm": 0.31509530544281006,
      "learning_rate": 7.494185012157113e-06,
      "loss": 0.0181,
      "step": 780580
    },
    {
      "epoch": 1.2774690206398147,
      "grad_norm": 1.0462676286697388,
      "learning_rate": 7.494119119943597e-06,
      "loss": 0.0252,
      "step": 780600
    },
    {
      "epoch": 1.2775017510784679,
      "grad_norm": 0.7894389033317566,
      "learning_rate": 7.4940532277300795e-06,
      "loss": 0.0209,
      "step": 780620
    },
    {
      "epoch": 1.2775344815171212,
      "grad_norm": 0.42193618416786194,
      "learning_rate": 7.493987335516562e-06,
      "loss": 0.0225,
      "step": 780640
    },
    {
      "epoch": 1.2775672119557746,
      "grad_norm": 1.8210102319717407,
      "learning_rate": 7.493921443303045e-06,
      "loss": 0.0214,
      "step": 780660
    },
    {
      "epoch": 1.277599942394428,
      "grad_norm": 2.179173469543457,
      "learning_rate": 7.493855551089529e-06,
      "loss": 0.0283,
      "step": 780680
    },
    {
      "epoch": 1.2776326728330814,
      "grad_norm": 0.4649341404438019,
      "learning_rate": 7.493789658876011e-06,
      "loss": 0.0206,
      "step": 780700
    },
    {
      "epoch": 1.2776654032717347,
      "grad_norm": 0.12324807792901993,
      "learning_rate": 7.493723766662494e-06,
      "loss": 0.0171,
      "step": 780720
    },
    {
      "epoch": 1.277698133710388,
      "grad_norm": 0.7658535242080688,
      "learning_rate": 7.493657874448978e-06,
      "loss": 0.0304,
      "step": 780740
    },
    {
      "epoch": 1.2777308641490412,
      "grad_norm": 0.7102410197257996,
      "learning_rate": 7.4935919822354595e-06,
      "loss": 0.0196,
      "step": 780760
    },
    {
      "epoch": 1.2777635945876946,
      "grad_norm": 0.0947946161031723,
      "learning_rate": 7.493526090021943e-06,
      "loss": 0.0158,
      "step": 780780
    },
    {
      "epoch": 1.277796325026348,
      "grad_norm": 0.42743292450904846,
      "learning_rate": 7.493460197808425e-06,
      "loss": 0.0243,
      "step": 780800
    },
    {
      "epoch": 1.2778290554650014,
      "grad_norm": 0.7662673592567444,
      "learning_rate": 7.493394305594909e-06,
      "loss": 0.0209,
      "step": 780820
    },
    {
      "epoch": 1.2778617859036547,
      "grad_norm": 0.38635289669036865,
      "learning_rate": 7.493328413381391e-06,
      "loss": 0.0178,
      "step": 780840
    },
    {
      "epoch": 1.2778945163423079,
      "grad_norm": 0.8040112257003784,
      "learning_rate": 7.493262521167874e-06,
      "loss": 0.0252,
      "step": 780860
    },
    {
      "epoch": 1.2779272467809615,
      "grad_norm": 0.3438182771205902,
      "learning_rate": 7.493196628954357e-06,
      "loss": 0.0136,
      "step": 780880
    },
    {
      "epoch": 1.2779599772196146,
      "grad_norm": 0.39941710233688354,
      "learning_rate": 7.49313073674084e-06,
      "loss": 0.0271,
      "step": 780900
    },
    {
      "epoch": 1.277992707658268,
      "grad_norm": 0.5551303625106812,
      "learning_rate": 7.493064844527322e-06,
      "loss": 0.0247,
      "step": 780920
    },
    {
      "epoch": 1.2780254380969214,
      "grad_norm": 0.8047038912773132,
      "learning_rate": 7.492998952313806e-06,
      "loss": 0.0301,
      "step": 780940
    },
    {
      "epoch": 1.2780581685355747,
      "grad_norm": 1.2748578786849976,
      "learning_rate": 7.492933060100288e-06,
      "loss": 0.024,
      "step": 780960
    },
    {
      "epoch": 1.2780908989742281,
      "grad_norm": 0.4086562693119049,
      "learning_rate": 7.492867167886771e-06,
      "loss": 0.0249,
      "step": 780980
    },
    {
      "epoch": 1.2781236294128813,
      "grad_norm": 0.4364694356918335,
      "learning_rate": 7.492801275673254e-06,
      "loss": 0.0164,
      "step": 781000
    },
    {
      "epoch": 1.2781563598515349,
      "grad_norm": 0.6824554204940796,
      "learning_rate": 7.492735383459737e-06,
      "loss": 0.0267,
      "step": 781020
    },
    {
      "epoch": 1.278189090290188,
      "grad_norm": 1.3221299648284912,
      "learning_rate": 7.4926694912462204e-06,
      "loss": 0.0289,
      "step": 781040
    },
    {
      "epoch": 1.2782218207288414,
      "grad_norm": 0.757444441318512,
      "learning_rate": 7.492603599032703e-06,
      "loss": 0.0217,
      "step": 781060
    },
    {
      "epoch": 1.2782545511674948,
      "grad_norm": 0.45122265815734863,
      "learning_rate": 7.492537706819186e-06,
      "loss": 0.0221,
      "step": 781080
    },
    {
      "epoch": 1.2782872816061481,
      "grad_norm": 1.7537192106246948,
      "learning_rate": 7.492471814605669e-06,
      "loss": 0.0241,
      "step": 781100
    },
    {
      "epoch": 1.2783200120448015,
      "grad_norm": 0.5059594511985779,
      "learning_rate": 7.492405922392152e-06,
      "loss": 0.0181,
      "step": 781120
    },
    {
      "epoch": 1.2783527424834547,
      "grad_norm": 0.6611863374710083,
      "learning_rate": 7.492340030178634e-06,
      "loss": 0.0226,
      "step": 781140
    },
    {
      "epoch": 1.278385472922108,
      "grad_norm": 0.5902173519134521,
      "learning_rate": 7.492274137965118e-06,
      "loss": 0.0173,
      "step": 781160
    },
    {
      "epoch": 1.2784182033607614,
      "grad_norm": 0.7141216993331909,
      "learning_rate": 7.4922082457516e-06,
      "loss": 0.0294,
      "step": 781180
    },
    {
      "epoch": 1.2784509337994148,
      "grad_norm": 0.3721325099468231,
      "learning_rate": 7.492142353538083e-06,
      "loss": 0.0204,
      "step": 781200
    },
    {
      "epoch": 1.2784836642380681,
      "grad_norm": 1.236664891242981,
      "learning_rate": 7.492076461324566e-06,
      "loss": 0.026,
      "step": 781220
    },
    {
      "epoch": 1.2785163946767215,
      "grad_norm": 1.2722655534744263,
      "learning_rate": 7.492010569111049e-06,
      "loss": 0.0222,
      "step": 781240
    },
    {
      "epoch": 1.2785491251153749,
      "grad_norm": 1.0611050128936768,
      "learning_rate": 7.4919446768975314e-06,
      "loss": 0.0251,
      "step": 781260
    },
    {
      "epoch": 1.278581855554028,
      "grad_norm": 0.23716135323047638,
      "learning_rate": 7.491878784684015e-06,
      "loss": 0.0224,
      "step": 781280
    },
    {
      "epoch": 1.2786145859926814,
      "grad_norm": 0.20047783851623535,
      "learning_rate": 7.491812892470497e-06,
      "loss": 0.0202,
      "step": 781300
    },
    {
      "epoch": 1.2786473164313348,
      "grad_norm": 0.4136943221092224,
      "learning_rate": 7.4917470002569805e-06,
      "loss": 0.0233,
      "step": 781320
    },
    {
      "epoch": 1.2786800468699882,
      "grad_norm": 0.5342987179756165,
      "learning_rate": 7.491681108043462e-06,
      "loss": 0.0185,
      "step": 781340
    },
    {
      "epoch": 1.2787127773086415,
      "grad_norm": 0.5439208745956421,
      "learning_rate": 7.491615215829946e-06,
      "loss": 0.0169,
      "step": 781360
    },
    {
      "epoch": 1.278745507747295,
      "grad_norm": 0.5875623226165771,
      "learning_rate": 7.4915493236164296e-06,
      "loss": 0.0204,
      "step": 781380
    },
    {
      "epoch": 1.2787782381859483,
      "grad_norm": 0.24256564676761627,
      "learning_rate": 7.4914834314029115e-06,
      "loss": 0.0218,
      "step": 781400
    },
    {
      "epoch": 1.2788109686246014,
      "grad_norm": 0.5385978817939758,
      "learning_rate": 7.491417539189395e-06,
      "loss": 0.0205,
      "step": 781420
    },
    {
      "epoch": 1.2788436990632548,
      "grad_norm": 0.7915781736373901,
      "learning_rate": 7.491351646975877e-06,
      "loss": 0.02,
      "step": 781440
    },
    {
      "epoch": 1.2788764295019082,
      "grad_norm": 1.9558125734329224,
      "learning_rate": 7.4912857547623605e-06,
      "loss": 0.0292,
      "step": 781460
    },
    {
      "epoch": 1.2789091599405615,
      "grad_norm": 0.7373000383377075,
      "learning_rate": 7.491219862548843e-06,
      "loss": 0.0259,
      "step": 781480
    },
    {
      "epoch": 1.278941890379215,
      "grad_norm": 0.5200487971305847,
      "learning_rate": 7.491153970335327e-06,
      "loss": 0.0213,
      "step": 781500
    },
    {
      "epoch": 1.2789746208178683,
      "grad_norm": 0.673596978187561,
      "learning_rate": 7.491088078121809e-06,
      "loss": 0.0212,
      "step": 781520
    },
    {
      "epoch": 1.2790073512565217,
      "grad_norm": 0.6749399900436401,
      "learning_rate": 7.491022185908292e-06,
      "loss": 0.0181,
      "step": 781540
    },
    {
      "epoch": 1.2790400816951748,
      "grad_norm": 1.11166512966156,
      "learning_rate": 7.490956293694774e-06,
      "loss": 0.0301,
      "step": 781560
    },
    {
      "epoch": 1.2790728121338282,
      "grad_norm": 1.3388192653656006,
      "learning_rate": 7.490890401481258e-06,
      "loss": 0.0309,
      "step": 781580
    },
    {
      "epoch": 1.2791055425724815,
      "grad_norm": 0.37682318687438965,
      "learning_rate": 7.49082450926774e-06,
      "loss": 0.0218,
      "step": 781600
    },
    {
      "epoch": 1.279138273011135,
      "grad_norm": 0.38498762249946594,
      "learning_rate": 7.490758617054223e-06,
      "loss": 0.0305,
      "step": 781620
    },
    {
      "epoch": 1.2791710034497883,
      "grad_norm": 0.24101312458515167,
      "learning_rate": 7.490692724840706e-06,
      "loss": 0.0204,
      "step": 781640
    },
    {
      "epoch": 1.2792037338884414,
      "grad_norm": 0.28962820768356323,
      "learning_rate": 7.490626832627189e-06,
      "loss": 0.0193,
      "step": 781660
    },
    {
      "epoch": 1.279236464327095,
      "grad_norm": 0.38778388500213623,
      "learning_rate": 7.4905609404136715e-06,
      "loss": 0.022,
      "step": 781680
    },
    {
      "epoch": 1.2792691947657482,
      "grad_norm": 0.45342564582824707,
      "learning_rate": 7.490495048200155e-06,
      "loss": 0.018,
      "step": 781700
    },
    {
      "epoch": 1.2793019252044016,
      "grad_norm": 0.4643360674381256,
      "learning_rate": 7.490429155986638e-06,
      "loss": 0.0154,
      "step": 781720
    },
    {
      "epoch": 1.279334655643055,
      "grad_norm": 0.28181275725364685,
      "learning_rate": 7.4903632637731206e-06,
      "loss": 0.0246,
      "step": 781740
    },
    {
      "epoch": 1.2793673860817083,
      "grad_norm": 1.0316954851150513,
      "learning_rate": 7.490297371559604e-06,
      "loss": 0.0313,
      "step": 781760
    },
    {
      "epoch": 1.2794001165203617,
      "grad_norm": 0.22960208356380463,
      "learning_rate": 7.490231479346086e-06,
      "loss": 0.0253,
      "step": 781780
    },
    {
      "epoch": 1.2794328469590148,
      "grad_norm": 1.3751300573349,
      "learning_rate": 7.49016558713257e-06,
      "loss": 0.0206,
      "step": 781800
    },
    {
      "epoch": 1.2794655773976684,
      "grad_norm": 0.6564045548439026,
      "learning_rate": 7.4900996949190515e-06,
      "loss": 0.0153,
      "step": 781820
    },
    {
      "epoch": 1.2794983078363216,
      "grad_norm": 0.4017091691493988,
      "learning_rate": 7.490033802705535e-06,
      "loss": 0.0153,
      "step": 781840
    },
    {
      "epoch": 1.279531038274975,
      "grad_norm": 1.3253765106201172,
      "learning_rate": 7.489967910492018e-06,
      "loss": 0.0392,
      "step": 781860
    },
    {
      "epoch": 1.2795637687136283,
      "grad_norm": 0.4462463855743408,
      "learning_rate": 7.489902018278501e-06,
      "loss": 0.0304,
      "step": 781880
    },
    {
      "epoch": 1.2795964991522817,
      "grad_norm": 0.7056542634963989,
      "learning_rate": 7.489836126064983e-06,
      "loss": 0.0253,
      "step": 781900
    },
    {
      "epoch": 1.279629229590935,
      "grad_norm": 0.6521759033203125,
      "learning_rate": 7.489770233851467e-06,
      "loss": 0.0228,
      "step": 781920
    },
    {
      "epoch": 1.2796619600295882,
      "grad_norm": 1.6444072723388672,
      "learning_rate": 7.489704341637949e-06,
      "loss": 0.0191,
      "step": 781940
    },
    {
      "epoch": 1.2796946904682416,
      "grad_norm": 0.36307722330093384,
      "learning_rate": 7.489638449424432e-06,
      "loss": 0.0198,
      "step": 781960
    },
    {
      "epoch": 1.279727420906895,
      "grad_norm": 13.798707008361816,
      "learning_rate": 7.489572557210914e-06,
      "loss": 0.0242,
      "step": 781980
    },
    {
      "epoch": 1.2797601513455483,
      "grad_norm": 0.4382067620754242,
      "learning_rate": 7.489506664997398e-06,
      "loss": 0.0241,
      "step": 782000
    },
    {
      "epoch": 1.2797928817842017,
      "grad_norm": 0.46978819370269775,
      "learning_rate": 7.489440772783881e-06,
      "loss": 0.0218,
      "step": 782020
    },
    {
      "epoch": 1.279825612222855,
      "grad_norm": 0.894756555557251,
      "learning_rate": 7.489374880570363e-06,
      "loss": 0.0272,
      "step": 782040
    },
    {
      "epoch": 1.2798583426615084,
      "grad_norm": 1.5345113277435303,
      "learning_rate": 7.489308988356846e-06,
      "loss": 0.0238,
      "step": 782060
    },
    {
      "epoch": 1.2798910731001616,
      "grad_norm": 0.7675544023513794,
      "learning_rate": 7.48924309614333e-06,
      "loss": 0.0287,
      "step": 782080
    },
    {
      "epoch": 1.279923803538815,
      "grad_norm": 0.5695963501930237,
      "learning_rate": 7.4891772039298124e-06,
      "loss": 0.0395,
      "step": 782100
    },
    {
      "epoch": 1.2799565339774683,
      "grad_norm": 1.1763155460357666,
      "learning_rate": 7.489111311716295e-06,
      "loss": 0.0262,
      "step": 782120
    },
    {
      "epoch": 1.2799892644161217,
      "grad_norm": 0.26502835750579834,
      "learning_rate": 7.489045419502779e-06,
      "loss": 0.0248,
      "step": 782140
    },
    {
      "epoch": 1.280021994854775,
      "grad_norm": 1.4188311100006104,
      "learning_rate": 7.488979527289261e-06,
      "loss": 0.0252,
      "step": 782160
    },
    {
      "epoch": 1.2800547252934285,
      "grad_norm": 1.2494983673095703,
      "learning_rate": 7.488913635075744e-06,
      "loss": 0.0136,
      "step": 782180
    },
    {
      "epoch": 1.2800874557320818,
      "grad_norm": 0.3320489227771759,
      "learning_rate": 7.488847742862226e-06,
      "loss": 0.0194,
      "step": 782200
    },
    {
      "epoch": 1.280120186170735,
      "grad_norm": 0.6569296717643738,
      "learning_rate": 7.48878185064871e-06,
      "loss": 0.0227,
      "step": 782220
    },
    {
      "epoch": 1.2801529166093883,
      "grad_norm": 0.8204929828643799,
      "learning_rate": 7.4887159584351925e-06,
      "loss": 0.0194,
      "step": 782240
    },
    {
      "epoch": 1.2801856470480417,
      "grad_norm": 0.3063561022281647,
      "learning_rate": 7.488650066221675e-06,
      "loss": 0.0162,
      "step": 782260
    },
    {
      "epoch": 1.280218377486695,
      "grad_norm": 0.6377028226852417,
      "learning_rate": 7.488584174008158e-06,
      "loss": 0.0183,
      "step": 782280
    },
    {
      "epoch": 1.2802511079253485,
      "grad_norm": 0.2796325087547302,
      "learning_rate": 7.4885182817946415e-06,
      "loss": 0.0254,
      "step": 782300
    },
    {
      "epoch": 1.2802838383640018,
      "grad_norm": 0.06436517834663391,
      "learning_rate": 7.488452389581123e-06,
      "loss": 0.0142,
      "step": 782320
    },
    {
      "epoch": 1.2803165688026552,
      "grad_norm": 1.093524694442749,
      "learning_rate": 7.488386497367607e-06,
      "loss": 0.0288,
      "step": 782340
    },
    {
      "epoch": 1.2803492992413084,
      "grad_norm": 0.5434905290603638,
      "learning_rate": 7.488320605154089e-06,
      "loss": 0.0228,
      "step": 782360
    },
    {
      "epoch": 1.2803820296799617,
      "grad_norm": 0.8438672423362732,
      "learning_rate": 7.4882547129405725e-06,
      "loss": 0.021,
      "step": 782380
    },
    {
      "epoch": 1.280414760118615,
      "grad_norm": 0.5916364789009094,
      "learning_rate": 7.488188820727054e-06,
      "loss": 0.02,
      "step": 782400
    },
    {
      "epoch": 1.2804474905572685,
      "grad_norm": 0.29597678780555725,
      "learning_rate": 7.488122928513538e-06,
      "loss": 0.0184,
      "step": 782420
    },
    {
      "epoch": 1.2804802209959218,
      "grad_norm": 0.6093685030937195,
      "learning_rate": 7.4880570363000215e-06,
      "loss": 0.0197,
      "step": 782440
    },
    {
      "epoch": 1.280512951434575,
      "grad_norm": 0.7295880913734436,
      "learning_rate": 7.4879911440865034e-06,
      "loss": 0.0212,
      "step": 782460
    },
    {
      "epoch": 1.2805456818732286,
      "grad_norm": 1.1040745973587036,
      "learning_rate": 7.487925251872987e-06,
      "loss": 0.0207,
      "step": 782480
    },
    {
      "epoch": 1.2805784123118817,
      "grad_norm": 0.9647108912467957,
      "learning_rate": 7.48785935965947e-06,
      "loss": 0.029,
      "step": 782500
    },
    {
      "epoch": 1.2806111427505351,
      "grad_norm": 0.49814504384994507,
      "learning_rate": 7.4877934674459525e-06,
      "loss": 0.0165,
      "step": 782520
    },
    {
      "epoch": 1.2806438731891885,
      "grad_norm": 1.3869789838790894,
      "learning_rate": 7.487727575232435e-06,
      "loss": 0.0256,
      "step": 782540
    },
    {
      "epoch": 1.2806766036278419,
      "grad_norm": 0.3722072243690491,
      "learning_rate": 7.487661683018919e-06,
      "loss": 0.0183,
      "step": 782560
    },
    {
      "epoch": 1.2807093340664952,
      "grad_norm": 0.696618914604187,
      "learning_rate": 7.487595790805401e-06,
      "loss": 0.0193,
      "step": 782580
    },
    {
      "epoch": 1.2807420645051484,
      "grad_norm": 0.39687713980674744,
      "learning_rate": 7.487529898591884e-06,
      "loss": 0.0244,
      "step": 782600
    },
    {
      "epoch": 1.280774794943802,
      "grad_norm": 0.8085352778434753,
      "learning_rate": 7.487464006378366e-06,
      "loss": 0.019,
      "step": 782620
    },
    {
      "epoch": 1.2808075253824551,
      "grad_norm": 0.7650117874145508,
      "learning_rate": 7.48739811416485e-06,
      "loss": 0.0214,
      "step": 782640
    },
    {
      "epoch": 1.2808402558211085,
      "grad_norm": 0.2634548246860504,
      "learning_rate": 7.4873322219513325e-06,
      "loss": 0.0189,
      "step": 782660
    },
    {
      "epoch": 1.2808729862597619,
      "grad_norm": 0.6824074983596802,
      "learning_rate": 7.487266329737815e-06,
      "loss": 0.0147,
      "step": 782680
    },
    {
      "epoch": 1.2809057166984152,
      "grad_norm": 1.2555266618728638,
      "learning_rate": 7.487200437524298e-06,
      "loss": 0.0337,
      "step": 782700
    },
    {
      "epoch": 1.2809384471370686,
      "grad_norm": 1.015162706375122,
      "learning_rate": 7.487134545310782e-06,
      "loss": 0.0216,
      "step": 782720
    },
    {
      "epoch": 1.2809711775757218,
      "grad_norm": 0.319020539522171,
      "learning_rate": 7.4870686530972635e-06,
      "loss": 0.0174,
      "step": 782740
    },
    {
      "epoch": 1.2810039080143751,
      "grad_norm": 0.3677166998386383,
      "learning_rate": 7.487002760883747e-06,
      "loss": 0.0202,
      "step": 782760
    },
    {
      "epoch": 1.2810366384530285,
      "grad_norm": 0.9125677347183228,
      "learning_rate": 7.486936868670231e-06,
      "loss": 0.0181,
      "step": 782780
    },
    {
      "epoch": 1.2810693688916819,
      "grad_norm": 0.8664035201072693,
      "learning_rate": 7.4868709764567126e-06,
      "loss": 0.0237,
      "step": 782800
    },
    {
      "epoch": 1.2811020993303353,
      "grad_norm": 0.8030140399932861,
      "learning_rate": 7.486805084243196e-06,
      "loss": 0.0218,
      "step": 782820
    },
    {
      "epoch": 1.2811348297689886,
      "grad_norm": 1.080371618270874,
      "learning_rate": 7.486739192029678e-06,
      "loss": 0.023,
      "step": 782840
    },
    {
      "epoch": 1.281167560207642,
      "grad_norm": 0.09982256591320038,
      "learning_rate": 7.486673299816162e-06,
      "loss": 0.0177,
      "step": 782860
    },
    {
      "epoch": 1.2812002906462951,
      "grad_norm": 0.7273260354995728,
      "learning_rate": 7.486607407602644e-06,
      "loss": 0.0207,
      "step": 782880
    },
    {
      "epoch": 1.2812330210849485,
      "grad_norm": 0.12947805225849152,
      "learning_rate": 7.486541515389127e-06,
      "loss": 0.0193,
      "step": 782900
    },
    {
      "epoch": 1.281265751523602,
      "grad_norm": 1.1495630741119385,
      "learning_rate": 7.48647562317561e-06,
      "loss": 0.0209,
      "step": 782920
    },
    {
      "epoch": 1.2812984819622553,
      "grad_norm": 0.6353563666343689,
      "learning_rate": 7.4864097309620934e-06,
      "loss": 0.0177,
      "step": 782940
    },
    {
      "epoch": 1.2813312124009086,
      "grad_norm": 6.317371845245361,
      "learning_rate": 7.486343838748575e-06,
      "loss": 0.0346,
      "step": 782960
    },
    {
      "epoch": 1.281363942839562,
      "grad_norm": 0.48374712467193604,
      "learning_rate": 7.486277946535059e-06,
      "loss": 0.0224,
      "step": 782980
    },
    {
      "epoch": 1.2813966732782154,
      "grad_norm": 0.5374531149864197,
      "learning_rate": 7.486212054321541e-06,
      "loss": 0.0222,
      "step": 783000
    },
    {
      "epoch": 1.2814294037168685,
      "grad_norm": 0.3254794776439667,
      "learning_rate": 7.486146162108024e-06,
      "loss": 0.0168,
      "step": 783020
    },
    {
      "epoch": 1.281462134155522,
      "grad_norm": 0.22932234406471252,
      "learning_rate": 7.486080269894507e-06,
      "loss": 0.0224,
      "step": 783040
    },
    {
      "epoch": 1.2814948645941753,
      "grad_norm": 3.169081449508667,
      "learning_rate": 7.48601437768099e-06,
      "loss": 0.0205,
      "step": 783060
    },
    {
      "epoch": 1.2815275950328286,
      "grad_norm": 0.3285979628562927,
      "learning_rate": 7.485948485467473e-06,
      "loss": 0.0348,
      "step": 783080
    },
    {
      "epoch": 1.281560325471482,
      "grad_norm": 0.6538374423980713,
      "learning_rate": 7.485882593253956e-06,
      "loss": 0.0208,
      "step": 783100
    },
    {
      "epoch": 1.2815930559101352,
      "grad_norm": 0.6133907437324524,
      "learning_rate": 7.485816701040438e-06,
      "loss": 0.0239,
      "step": 783120
    },
    {
      "epoch": 1.2816257863487888,
      "grad_norm": 0.8624271750450134,
      "learning_rate": 7.485750808826922e-06,
      "loss": 0.0245,
      "step": 783140
    },
    {
      "epoch": 1.281658516787442,
      "grad_norm": 1.3559266328811646,
      "learning_rate": 7.485684916613405e-06,
      "loss": 0.0217,
      "step": 783160
    },
    {
      "epoch": 1.2816912472260953,
      "grad_norm": 3.429081439971924,
      "learning_rate": 7.485619024399887e-06,
      "loss": 0.0168,
      "step": 783180
    },
    {
      "epoch": 1.2817239776647487,
      "grad_norm": 0.8564808964729309,
      "learning_rate": 7.485553132186371e-06,
      "loss": 0.0293,
      "step": 783200
    },
    {
      "epoch": 1.281756708103402,
      "grad_norm": 0.18129296600818634,
      "learning_rate": 7.485487239972853e-06,
      "loss": 0.0152,
      "step": 783220
    },
    {
      "epoch": 1.2817894385420554,
      "grad_norm": 0.7310268878936768,
      "learning_rate": 7.485421347759336e-06,
      "loss": 0.0247,
      "step": 783240
    },
    {
      "epoch": 1.2818221689807086,
      "grad_norm": 0.8715786933898926,
      "learning_rate": 7.485355455545819e-06,
      "loss": 0.029,
      "step": 783260
    },
    {
      "epoch": 1.2818548994193621,
      "grad_norm": 1.1434102058410645,
      "learning_rate": 7.485289563332302e-06,
      "loss": 0.0289,
      "step": 783280
    },
    {
      "epoch": 1.2818876298580153,
      "grad_norm": 0.5173321962356567,
      "learning_rate": 7.4852236711187844e-06,
      "loss": 0.0185,
      "step": 783300
    },
    {
      "epoch": 1.2819203602966687,
      "grad_norm": 0.9498528242111206,
      "learning_rate": 7.485157778905268e-06,
      "loss": 0.022,
      "step": 783320
    },
    {
      "epoch": 1.281953090735322,
      "grad_norm": 0.8614157438278198,
      "learning_rate": 7.48509188669175e-06,
      "loss": 0.0161,
      "step": 783340
    },
    {
      "epoch": 1.2819858211739754,
      "grad_norm": 0.2057551145553589,
      "learning_rate": 7.4850259944782335e-06,
      "loss": 0.0181,
      "step": 783360
    },
    {
      "epoch": 1.2820185516126288,
      "grad_norm": 0.33286401629447937,
      "learning_rate": 7.484960102264715e-06,
      "loss": 0.0175,
      "step": 783380
    },
    {
      "epoch": 1.282051282051282,
      "grad_norm": 1.1061830520629883,
      "learning_rate": 7.484894210051199e-06,
      "loss": 0.0275,
      "step": 783400
    },
    {
      "epoch": 1.2820840124899353,
      "grad_norm": 0.7355127334594727,
      "learning_rate": 7.484828317837681e-06,
      "loss": 0.0183,
      "step": 783420
    },
    {
      "epoch": 1.2821167429285887,
      "grad_norm": 0.7837706804275513,
      "learning_rate": 7.4847624256241645e-06,
      "loss": 0.0293,
      "step": 783440
    },
    {
      "epoch": 1.282149473367242,
      "grad_norm": 1.7946524620056152,
      "learning_rate": 7.484696533410647e-06,
      "loss": 0.02,
      "step": 783460
    },
    {
      "epoch": 1.2821822038058954,
      "grad_norm": 0.7194579243659973,
      "learning_rate": 7.48463064119713e-06,
      "loss": 0.0289,
      "step": 783480
    },
    {
      "epoch": 1.2822149342445488,
      "grad_norm": 0.2656090557575226,
      "learning_rate": 7.4845647489836135e-06,
      "loss": 0.0189,
      "step": 783500
    },
    {
      "epoch": 1.2822476646832022,
      "grad_norm": 0.715458869934082,
      "learning_rate": 7.484498856770096e-06,
      "loss": 0.0206,
      "step": 783520
    },
    {
      "epoch": 1.2822803951218553,
      "grad_norm": 0.2370700240135193,
      "learning_rate": 7.484432964556579e-06,
      "loss": 0.0194,
      "step": 783540
    },
    {
      "epoch": 1.2823131255605087,
      "grad_norm": 0.769605278968811,
      "learning_rate": 7.484367072343062e-06,
      "loss": 0.031,
      "step": 783560
    },
    {
      "epoch": 1.282345855999162,
      "grad_norm": 1.035308837890625,
      "learning_rate": 7.484301180129545e-06,
      "loss": 0.0236,
      "step": 783580
    },
    {
      "epoch": 1.2823785864378154,
      "grad_norm": 0.6555287837982178,
      "learning_rate": 7.484235287916027e-06,
      "loss": 0.0252,
      "step": 783600
    },
    {
      "epoch": 1.2824113168764688,
      "grad_norm": 0.8201996684074402,
      "learning_rate": 7.484169395702511e-06,
      "loss": 0.022,
      "step": 783620
    },
    {
      "epoch": 1.2824440473151222,
      "grad_norm": 0.3260667026042938,
      "learning_rate": 7.484103503488993e-06,
      "loss": 0.0246,
      "step": 783640
    },
    {
      "epoch": 1.2824767777537756,
      "grad_norm": 0.24886652827262878,
      "learning_rate": 7.484037611275476e-06,
      "loss": 0.0219,
      "step": 783660
    },
    {
      "epoch": 1.2825095081924287,
      "grad_norm": 0.6756196022033691,
      "learning_rate": 7.483971719061959e-06,
      "loss": 0.0212,
      "step": 783680
    },
    {
      "epoch": 1.282542238631082,
      "grad_norm": 0.3618311882019043,
      "learning_rate": 7.483905826848442e-06,
      "loss": 0.0189,
      "step": 783700
    },
    {
      "epoch": 1.2825749690697354,
      "grad_norm": 1.8702534437179565,
      "learning_rate": 7.4838399346349245e-06,
      "loss": 0.0227,
      "step": 783720
    },
    {
      "epoch": 1.2826076995083888,
      "grad_norm": 2.221709966659546,
      "learning_rate": 7.483774042421408e-06,
      "loss": 0.0221,
      "step": 783740
    },
    {
      "epoch": 1.2826404299470422,
      "grad_norm": 1.799519419670105,
      "learning_rate": 7.48370815020789e-06,
      "loss": 0.0278,
      "step": 783760
    },
    {
      "epoch": 1.2826731603856956,
      "grad_norm": 1.0013669729232788,
      "learning_rate": 7.483642257994374e-06,
      "loss": 0.0262,
      "step": 783780
    },
    {
      "epoch": 1.282705890824349,
      "grad_norm": 0.4878315329551697,
      "learning_rate": 7.4835763657808555e-06,
      "loss": 0.0214,
      "step": 783800
    },
    {
      "epoch": 1.282738621263002,
      "grad_norm": 0.35362961888313293,
      "learning_rate": 7.483510473567339e-06,
      "loss": 0.0157,
      "step": 783820
    },
    {
      "epoch": 1.2827713517016555,
      "grad_norm": 0.2701243460178375,
      "learning_rate": 7.483444581353823e-06,
      "loss": 0.027,
      "step": 783840
    },
    {
      "epoch": 1.2828040821403088,
      "grad_norm": 1.2563480138778687,
      "learning_rate": 7.4833786891403045e-06,
      "loss": 0.0225,
      "step": 783860
    },
    {
      "epoch": 1.2828368125789622,
      "grad_norm": 0.8349887132644653,
      "learning_rate": 7.483312796926788e-06,
      "loss": 0.0177,
      "step": 783880
    },
    {
      "epoch": 1.2828695430176156,
      "grad_norm": 0.38617077469825745,
      "learning_rate": 7.483246904713271e-06,
      "loss": 0.0231,
      "step": 783900
    },
    {
      "epoch": 1.2829022734562687,
      "grad_norm": 0.45599091053009033,
      "learning_rate": 7.483181012499754e-06,
      "loss": 0.019,
      "step": 783920
    },
    {
      "epoch": 1.2829350038949223,
      "grad_norm": 0.19967934489250183,
      "learning_rate": 7.483115120286236e-06,
      "loss": 0.0223,
      "step": 783940
    },
    {
      "epoch": 1.2829677343335755,
      "grad_norm": 0.45172128081321716,
      "learning_rate": 7.48304922807272e-06,
      "loss": 0.0215,
      "step": 783960
    },
    {
      "epoch": 1.2830004647722288,
      "grad_norm": 1.0249758958816528,
      "learning_rate": 7.482983335859202e-06,
      "loss": 0.0234,
      "step": 783980
    },
    {
      "epoch": 1.2830331952108822,
      "grad_norm": 2.5350420475006104,
      "learning_rate": 7.482917443645685e-06,
      "loss": 0.0229,
      "step": 784000
    },
    {
      "epoch": 1.2830659256495356,
      "grad_norm": 0.5092291831970215,
      "learning_rate": 7.482851551432167e-06,
      "loss": 0.0269,
      "step": 784020
    },
    {
      "epoch": 1.283098656088189,
      "grad_norm": 1.5616962909698486,
      "learning_rate": 7.482785659218651e-06,
      "loss": 0.0294,
      "step": 784040
    },
    {
      "epoch": 1.283131386526842,
      "grad_norm": 0.6798684000968933,
      "learning_rate": 7.482719767005134e-06,
      "loss": 0.0162,
      "step": 784060
    },
    {
      "epoch": 1.2831641169654957,
      "grad_norm": 0.4584876000881195,
      "learning_rate": 7.482653874791616e-06,
      "loss": 0.0197,
      "step": 784080
    },
    {
      "epoch": 1.2831968474041489,
      "grad_norm": 0.9386729598045349,
      "learning_rate": 7.482587982578099e-06,
      "loss": 0.0259,
      "step": 784100
    },
    {
      "epoch": 1.2832295778428022,
      "grad_norm": 2.4523513317108154,
      "learning_rate": 7.482522090364583e-06,
      "loss": 0.0216,
      "step": 784120
    },
    {
      "epoch": 1.2832623082814556,
      "grad_norm": 0.4198061525821686,
      "learning_rate": 7.482456198151065e-06,
      "loss": 0.0207,
      "step": 784140
    },
    {
      "epoch": 1.283295038720109,
      "grad_norm": 0.49136844277381897,
      "learning_rate": 7.482390305937548e-06,
      "loss": 0.0184,
      "step": 784160
    },
    {
      "epoch": 1.2833277691587623,
      "grad_norm": 0.6303014755249023,
      "learning_rate": 7.48232441372403e-06,
      "loss": 0.0306,
      "step": 784180
    },
    {
      "epoch": 1.2833604995974155,
      "grad_norm": 1.5659732818603516,
      "learning_rate": 7.482258521510514e-06,
      "loss": 0.0203,
      "step": 784200
    },
    {
      "epoch": 1.2833932300360689,
      "grad_norm": 0.31252235174179077,
      "learning_rate": 7.482192629296997e-06,
      "loss": 0.018,
      "step": 784220
    },
    {
      "epoch": 1.2834259604747222,
      "grad_norm": 1.3185008764266968,
      "learning_rate": 7.482126737083479e-06,
      "loss": 0.0194,
      "step": 784240
    },
    {
      "epoch": 1.2834586909133756,
      "grad_norm": 0.6328277587890625,
      "learning_rate": 7.482060844869963e-06,
      "loss": 0.02,
      "step": 784260
    },
    {
      "epoch": 1.283491421352029,
      "grad_norm": 0.35547125339508057,
      "learning_rate": 7.4819949526564455e-06,
      "loss": 0.0182,
      "step": 784280
    },
    {
      "epoch": 1.2835241517906824,
      "grad_norm": 0.40082234144210815,
      "learning_rate": 7.481929060442928e-06,
      "loss": 0.0165,
      "step": 784300
    },
    {
      "epoch": 1.2835568822293357,
      "grad_norm": 0.9511527419090271,
      "learning_rate": 7.481863168229411e-06,
      "loss": 0.0261,
      "step": 784320
    },
    {
      "epoch": 1.2835896126679889,
      "grad_norm": 0.19026128947734833,
      "learning_rate": 7.4817972760158945e-06,
      "loss": 0.0189,
      "step": 784340
    },
    {
      "epoch": 1.2836223431066422,
      "grad_norm": 0.2356787770986557,
      "learning_rate": 7.481731383802376e-06,
      "loss": 0.0287,
      "step": 784360
    },
    {
      "epoch": 1.2836550735452956,
      "grad_norm": 0.5302882194519043,
      "learning_rate": 7.48166549158886e-06,
      "loss": 0.0178,
      "step": 784380
    },
    {
      "epoch": 1.283687803983949,
      "grad_norm": 0.6576477289199829,
      "learning_rate": 7.481599599375342e-06,
      "loss": 0.0272,
      "step": 784400
    },
    {
      "epoch": 1.2837205344226024,
      "grad_norm": 0.27772748470306396,
      "learning_rate": 7.4815337071618255e-06,
      "loss": 0.0195,
      "step": 784420
    },
    {
      "epoch": 1.2837532648612557,
      "grad_norm": 0.7398580312728882,
      "learning_rate": 7.481467814948307e-06,
      "loss": 0.0208,
      "step": 784440
    },
    {
      "epoch": 1.283785995299909,
      "grad_norm": 0.6013505458831787,
      "learning_rate": 7.481401922734791e-06,
      "loss": 0.0271,
      "step": 784460
    },
    {
      "epoch": 1.2838187257385623,
      "grad_norm": 0.5846239328384399,
      "learning_rate": 7.481336030521274e-06,
      "loss": 0.0186,
      "step": 784480
    },
    {
      "epoch": 1.2838514561772156,
      "grad_norm": 0.6109415292739868,
      "learning_rate": 7.4812701383077564e-06,
      "loss": 0.0328,
      "step": 784500
    },
    {
      "epoch": 1.283884186615869,
      "grad_norm": 0.5694531202316284,
      "learning_rate": 7.481204246094239e-06,
      "loss": 0.0156,
      "step": 784520
    },
    {
      "epoch": 1.2839169170545224,
      "grad_norm": 2.576085090637207,
      "learning_rate": 7.481138353880723e-06,
      "loss": 0.0345,
      "step": 784540
    },
    {
      "epoch": 1.2839496474931757,
      "grad_norm": 0.23686842620372772,
      "learning_rate": 7.4810724616672055e-06,
      "loss": 0.0204,
      "step": 784560
    },
    {
      "epoch": 1.2839823779318291,
      "grad_norm": 0.2910410463809967,
      "learning_rate": 7.481006569453688e-06,
      "loss": 0.0217,
      "step": 784580
    },
    {
      "epoch": 1.2840151083704825,
      "grad_norm": 0.7354921698570251,
      "learning_rate": 7.480940677240172e-06,
      "loss": 0.0193,
      "step": 784600
    },
    {
      "epoch": 1.2840478388091356,
      "grad_norm": 0.5284608006477356,
      "learning_rate": 7.480874785026654e-06,
      "loss": 0.0296,
      "step": 784620
    },
    {
      "epoch": 1.284080569247789,
      "grad_norm": 1.1375789642333984,
      "learning_rate": 7.480808892813137e-06,
      "loss": 0.0206,
      "step": 784640
    },
    {
      "epoch": 1.2841132996864424,
      "grad_norm": 0.5144018530845642,
      "learning_rate": 7.480743000599619e-06,
      "loss": 0.0229,
      "step": 784660
    },
    {
      "epoch": 1.2841460301250958,
      "grad_norm": 0.845410943031311,
      "learning_rate": 7.480677108386103e-06,
      "loss": 0.0157,
      "step": 784680
    },
    {
      "epoch": 1.2841787605637491,
      "grad_norm": 0.18879914283752441,
      "learning_rate": 7.4806112161725855e-06,
      "loss": 0.0188,
      "step": 784700
    },
    {
      "epoch": 1.2842114910024023,
      "grad_norm": 0.27534303069114685,
      "learning_rate": 7.480545323959068e-06,
      "loss": 0.0231,
      "step": 784720
    },
    {
      "epoch": 1.2842442214410559,
      "grad_norm": 1.283883810043335,
      "learning_rate": 7.480479431745551e-06,
      "loss": 0.026,
      "step": 784740
    },
    {
      "epoch": 1.284276951879709,
      "grad_norm": 1.2240298986434937,
      "learning_rate": 7.480413539532035e-06,
      "loss": 0.0205,
      "step": 784760
    },
    {
      "epoch": 1.2843096823183624,
      "grad_norm": 0.12418291717767715,
      "learning_rate": 7.4803476473185165e-06,
      "loss": 0.0189,
      "step": 784780
    },
    {
      "epoch": 1.2843424127570158,
      "grad_norm": 0.27160924673080444,
      "learning_rate": 7.480281755105e-06,
      "loss": 0.0165,
      "step": 784800
    },
    {
      "epoch": 1.2843751431956691,
      "grad_norm": 0.5050216317176819,
      "learning_rate": 7.480215862891482e-06,
      "loss": 0.0181,
      "step": 784820
    },
    {
      "epoch": 1.2844078736343225,
      "grad_norm": 0.43629857897758484,
      "learning_rate": 7.4801499706779656e-06,
      "loss": 0.02,
      "step": 784840
    },
    {
      "epoch": 1.2844406040729757,
      "grad_norm": 1.3169761896133423,
      "learning_rate": 7.480084078464448e-06,
      "loss": 0.017,
      "step": 784860
    },
    {
      "epoch": 1.2844733345116293,
      "grad_norm": 0.272562175989151,
      "learning_rate": 7.480018186250931e-06,
      "loss": 0.0249,
      "step": 784880
    },
    {
      "epoch": 1.2845060649502824,
      "grad_norm": 0.37912243604660034,
      "learning_rate": 7.479952294037415e-06,
      "loss": 0.0325,
      "step": 784900
    },
    {
      "epoch": 1.2845387953889358,
      "grad_norm": 0.5308573842048645,
      "learning_rate": 7.479886401823897e-06,
      "loss": 0.0155,
      "step": 784920
    },
    {
      "epoch": 1.2845715258275892,
      "grad_norm": 0.23145487904548645,
      "learning_rate": 7.47982050961038e-06,
      "loss": 0.0194,
      "step": 784940
    },
    {
      "epoch": 1.2846042562662425,
      "grad_norm": 0.37881729006767273,
      "learning_rate": 7.479754617396863e-06,
      "loss": 0.0187,
      "step": 784960
    },
    {
      "epoch": 1.284636986704896,
      "grad_norm": 1.1082122325897217,
      "learning_rate": 7.4796887251833464e-06,
      "loss": 0.0226,
      "step": 784980
    },
    {
      "epoch": 1.284669717143549,
      "grad_norm": 0.3612789511680603,
      "learning_rate": 7.479622832969828e-06,
      "loss": 0.0204,
      "step": 785000
    },
    {
      "epoch": 1.2847024475822024,
      "grad_norm": 3.786461114883423,
      "learning_rate": 7.479556940756312e-06,
      "loss": 0.0206,
      "step": 785020
    },
    {
      "epoch": 1.2847351780208558,
      "grad_norm": 0.47577688097953796,
      "learning_rate": 7.479491048542794e-06,
      "loss": 0.022,
      "step": 785040
    },
    {
      "epoch": 1.2847679084595092,
      "grad_norm": 1.6544214487075806,
      "learning_rate": 7.479425156329277e-06,
      "loss": 0.022,
      "step": 785060
    },
    {
      "epoch": 1.2848006388981625,
      "grad_norm": 1.2887245416641235,
      "learning_rate": 7.47935926411576e-06,
      "loss": 0.0276,
      "step": 785080
    },
    {
      "epoch": 1.284833369336816,
      "grad_norm": 1.0056431293487549,
      "learning_rate": 7.479293371902243e-06,
      "loss": 0.0224,
      "step": 785100
    },
    {
      "epoch": 1.2848660997754693,
      "grad_norm": 0.6404300928115845,
      "learning_rate": 7.479227479688726e-06,
      "loss": 0.024,
      "step": 785120
    },
    {
      "epoch": 1.2848988302141224,
      "grad_norm": 0.1514699012041092,
      "learning_rate": 7.479161587475209e-06,
      "loss": 0.0175,
      "step": 785140
    },
    {
      "epoch": 1.2849315606527758,
      "grad_norm": 0.5768625140190125,
      "learning_rate": 7.479095695261691e-06,
      "loss": 0.0184,
      "step": 785160
    },
    {
      "epoch": 1.2849642910914292,
      "grad_norm": 1.2946984767913818,
      "learning_rate": 7.479029803048175e-06,
      "loss": 0.033,
      "step": 785180
    },
    {
      "epoch": 1.2849970215300826,
      "grad_norm": 0.8570531010627747,
      "learning_rate": 7.4789639108346566e-06,
      "loss": 0.0224,
      "step": 785200
    },
    {
      "epoch": 1.285029751968736,
      "grad_norm": 0.35293734073638916,
      "learning_rate": 7.47889801862114e-06,
      "loss": 0.029,
      "step": 785220
    },
    {
      "epoch": 1.2850624824073893,
      "grad_norm": 0.4813549220561981,
      "learning_rate": 7.478832126407624e-06,
      "loss": 0.0266,
      "step": 785240
    },
    {
      "epoch": 1.2850952128460427,
      "grad_norm": 0.2614561915397644,
      "learning_rate": 7.478766234194106e-06,
      "loss": 0.0186,
      "step": 785260
    },
    {
      "epoch": 1.2851279432846958,
      "grad_norm": 3.964174747467041,
      "learning_rate": 7.478700341980589e-06,
      "loss": 0.0178,
      "step": 785280
    },
    {
      "epoch": 1.2851606737233492,
      "grad_norm": 0.9340643882751465,
      "learning_rate": 7.478634449767071e-06,
      "loss": 0.0267,
      "step": 785300
    },
    {
      "epoch": 1.2851934041620026,
      "grad_norm": 0.3906630873680115,
      "learning_rate": 7.478568557553555e-06,
      "loss": 0.028,
      "step": 785320
    },
    {
      "epoch": 1.285226134600656,
      "grad_norm": 1.397483229637146,
      "learning_rate": 7.4785026653400374e-06,
      "loss": 0.0282,
      "step": 785340
    },
    {
      "epoch": 1.2852588650393093,
      "grad_norm": 1.2441186904907227,
      "learning_rate": 7.478436773126521e-06,
      "loss": 0.0315,
      "step": 785360
    },
    {
      "epoch": 1.2852915954779627,
      "grad_norm": 0.6157611608505249,
      "learning_rate": 7.478370880913003e-06,
      "loss": 0.0182,
      "step": 785380
    },
    {
      "epoch": 1.285324325916616,
      "grad_norm": 0.537532389163971,
      "learning_rate": 7.4783049886994865e-06,
      "loss": 0.026,
      "step": 785400
    },
    {
      "epoch": 1.2853570563552692,
      "grad_norm": 0.36884555220603943,
      "learning_rate": 7.478239096485968e-06,
      "loss": 0.0284,
      "step": 785420
    },
    {
      "epoch": 1.2853897867939226,
      "grad_norm": 1.1166093349456787,
      "learning_rate": 7.478173204272452e-06,
      "loss": 0.0201,
      "step": 785440
    },
    {
      "epoch": 1.285422517232576,
      "grad_norm": 1.0677285194396973,
      "learning_rate": 7.478107312058934e-06,
      "loss": 0.0211,
      "step": 785460
    },
    {
      "epoch": 1.2854552476712293,
      "grad_norm": 0.8371890783309937,
      "learning_rate": 7.4780414198454175e-06,
      "loss": 0.0208,
      "step": 785480
    },
    {
      "epoch": 1.2854879781098827,
      "grad_norm": 0.2225833237171173,
      "learning_rate": 7.4779755276319e-06,
      "loss": 0.0154,
      "step": 785500
    },
    {
      "epoch": 1.2855207085485358,
      "grad_norm": 0.6334770917892456,
      "learning_rate": 7.477909635418383e-06,
      "loss": 0.0194,
      "step": 785520
    },
    {
      "epoch": 1.2855534389871894,
      "grad_norm": 0.7114800214767456,
      "learning_rate": 7.477843743204866e-06,
      "loss": 0.0289,
      "step": 785540
    },
    {
      "epoch": 1.2855861694258426,
      "grad_norm": 0.49068742990493774,
      "learning_rate": 7.477777850991349e-06,
      "loss": 0.0194,
      "step": 785560
    },
    {
      "epoch": 1.285618899864496,
      "grad_norm": 1.8923920392990112,
      "learning_rate": 7.477711958777831e-06,
      "loss": 0.0191,
      "step": 785580
    },
    {
      "epoch": 1.2856516303031493,
      "grad_norm": 0.5019195079803467,
      "learning_rate": 7.477646066564315e-06,
      "loss": 0.0221,
      "step": 785600
    },
    {
      "epoch": 1.2856843607418027,
      "grad_norm": 0.9176761507987976,
      "learning_rate": 7.477580174350798e-06,
      "loss": 0.0171,
      "step": 785620
    },
    {
      "epoch": 1.285717091180456,
      "grad_norm": 0.7617328763008118,
      "learning_rate": 7.47751428213728e-06,
      "loss": 0.022,
      "step": 785640
    },
    {
      "epoch": 1.2857498216191092,
      "grad_norm": 0.055725276470184326,
      "learning_rate": 7.477448389923764e-06,
      "loss": 0.016,
      "step": 785660
    },
    {
      "epoch": 1.2857825520577626,
      "grad_norm": 0.28061947226524353,
      "learning_rate": 7.477382497710246e-06,
      "loss": 0.0175,
      "step": 785680
    },
    {
      "epoch": 1.285815282496416,
      "grad_norm": 1.1313369274139404,
      "learning_rate": 7.477316605496729e-06,
      "loss": 0.0286,
      "step": 785700
    },
    {
      "epoch": 1.2858480129350693,
      "grad_norm": 1.400025725364685,
      "learning_rate": 7.477250713283212e-06,
      "loss": 0.0201,
      "step": 785720
    },
    {
      "epoch": 1.2858807433737227,
      "grad_norm": 1.4390575885772705,
      "learning_rate": 7.477184821069695e-06,
      "loss": 0.0218,
      "step": 785740
    },
    {
      "epoch": 1.285913473812376,
      "grad_norm": 0.4925536811351776,
      "learning_rate": 7.4771189288561775e-06,
      "loss": 0.0234,
      "step": 785760
    },
    {
      "epoch": 1.2859462042510295,
      "grad_norm": 0.4082042872905731,
      "learning_rate": 7.477053036642661e-06,
      "loss": 0.021,
      "step": 785780
    },
    {
      "epoch": 1.2859789346896826,
      "grad_norm": 0.6056219339370728,
      "learning_rate": 7.476987144429143e-06,
      "loss": 0.0194,
      "step": 785800
    },
    {
      "epoch": 1.286011665128336,
      "grad_norm": 0.36007004976272583,
      "learning_rate": 7.476921252215627e-06,
      "loss": 0.0294,
      "step": 785820
    },
    {
      "epoch": 1.2860443955669894,
      "grad_norm": 0.6018412709236145,
      "learning_rate": 7.4768553600021085e-06,
      "loss": 0.0231,
      "step": 785840
    },
    {
      "epoch": 1.2860771260056427,
      "grad_norm": 0.3570694923400879,
      "learning_rate": 7.476789467788592e-06,
      "loss": 0.0244,
      "step": 785860
    },
    {
      "epoch": 1.286109856444296,
      "grad_norm": 1.0210002660751343,
      "learning_rate": 7.476723575575075e-06,
      "loss": 0.0259,
      "step": 785880
    },
    {
      "epoch": 1.2861425868829495,
      "grad_norm": 0.48963063955307007,
      "learning_rate": 7.4766576833615575e-06,
      "loss": 0.0177,
      "step": 785900
    },
    {
      "epoch": 1.2861753173216028,
      "grad_norm": 0.5375085473060608,
      "learning_rate": 7.47659179114804e-06,
      "loss": 0.0239,
      "step": 785920
    },
    {
      "epoch": 1.286208047760256,
      "grad_norm": 0.4309799373149872,
      "learning_rate": 7.476525898934524e-06,
      "loss": 0.016,
      "step": 785940
    },
    {
      "epoch": 1.2862407781989094,
      "grad_norm": 1.7724586725234985,
      "learning_rate": 7.476460006721007e-06,
      "loss": 0.0229,
      "step": 785960
    },
    {
      "epoch": 1.2862735086375627,
      "grad_norm": 0.5668430328369141,
      "learning_rate": 7.476394114507489e-06,
      "loss": 0.0184,
      "step": 785980
    },
    {
      "epoch": 1.286306239076216,
      "grad_norm": 1.0998886823654175,
      "learning_rate": 7.476328222293973e-06,
      "loss": 0.0241,
      "step": 786000
    },
    {
      "epoch": 1.2863389695148695,
      "grad_norm": 0.5180159211158752,
      "learning_rate": 7.476262330080455e-06,
      "loss": 0.0229,
      "step": 786020
    },
    {
      "epoch": 1.2863716999535229,
      "grad_norm": 0.41696715354919434,
      "learning_rate": 7.476196437866938e-06,
      "loss": 0.0263,
      "step": 786040
    },
    {
      "epoch": 1.2864044303921762,
      "grad_norm": 1.0178009271621704,
      "learning_rate": 7.47613054565342e-06,
      "loss": 0.029,
      "step": 786060
    },
    {
      "epoch": 1.2864371608308294,
      "grad_norm": 1.1598165035247803,
      "learning_rate": 7.476064653439904e-06,
      "loss": 0.0206,
      "step": 786080
    },
    {
      "epoch": 1.2864698912694827,
      "grad_norm": 0.7716823220252991,
      "learning_rate": 7.475998761226387e-06,
      "loss": 0.0188,
      "step": 786100
    },
    {
      "epoch": 1.2865026217081361,
      "grad_norm": 1.2405210733413696,
      "learning_rate": 7.475932869012869e-06,
      "loss": 0.023,
      "step": 786120
    },
    {
      "epoch": 1.2865353521467895,
      "grad_norm": 1.9873034954071045,
      "learning_rate": 7.475866976799352e-06,
      "loss": 0.0207,
      "step": 786140
    },
    {
      "epoch": 1.2865680825854429,
      "grad_norm": 0.9637474417686462,
      "learning_rate": 7.475801084585836e-06,
      "loss": 0.0202,
      "step": 786160
    },
    {
      "epoch": 1.286600813024096,
      "grad_norm": 0.16807709634304047,
      "learning_rate": 7.475735192372318e-06,
      "loss": 0.0201,
      "step": 786180
    },
    {
      "epoch": 1.2866335434627496,
      "grad_norm": 0.4252076745033264,
      "learning_rate": 7.475669300158801e-06,
      "loss": 0.0227,
      "step": 786200
    },
    {
      "epoch": 1.2866662739014028,
      "grad_norm": 0.5248469114303589,
      "learning_rate": 7.475603407945283e-06,
      "loss": 0.0223,
      "step": 786220
    },
    {
      "epoch": 1.2866990043400561,
      "grad_norm": 0.2121545374393463,
      "learning_rate": 7.475537515731767e-06,
      "loss": 0.0211,
      "step": 786240
    },
    {
      "epoch": 1.2867317347787095,
      "grad_norm": 0.4609435796737671,
      "learning_rate": 7.4754716235182486e-06,
      "loss": 0.0238,
      "step": 786260
    },
    {
      "epoch": 1.2867644652173629,
      "grad_norm": 2.672926664352417,
      "learning_rate": 7.475405731304732e-06,
      "loss": 0.0239,
      "step": 786280
    },
    {
      "epoch": 1.2867971956560162,
      "grad_norm": 0.619586169719696,
      "learning_rate": 7.475339839091216e-06,
      "loss": 0.0204,
      "step": 786300
    },
    {
      "epoch": 1.2868299260946694,
      "grad_norm": 0.16213731467723846,
      "learning_rate": 7.475273946877698e-06,
      "loss": 0.0208,
      "step": 786320
    },
    {
      "epoch": 1.286862656533323,
      "grad_norm": 0.460420697927475,
      "learning_rate": 7.475208054664181e-06,
      "loss": 0.0288,
      "step": 786340
    },
    {
      "epoch": 1.2868953869719761,
      "grad_norm": 0.566266655921936,
      "learning_rate": 7.475142162450664e-06,
      "loss": 0.0188,
      "step": 786360
    },
    {
      "epoch": 1.2869281174106295,
      "grad_norm": 0.6608726382255554,
      "learning_rate": 7.475076270237147e-06,
      "loss": 0.0234,
      "step": 786380
    },
    {
      "epoch": 1.2869608478492829,
      "grad_norm": 1.3822906017303467,
      "learning_rate": 7.4750103780236294e-06,
      "loss": 0.0271,
      "step": 786400
    },
    {
      "epoch": 1.2869935782879363,
      "grad_norm": 0.45451629161834717,
      "learning_rate": 7.474944485810113e-06,
      "loss": 0.0211,
      "step": 786420
    },
    {
      "epoch": 1.2870263087265896,
      "grad_norm": 1.0756957530975342,
      "learning_rate": 7.474878593596595e-06,
      "loss": 0.0243,
      "step": 786440
    },
    {
      "epoch": 1.2870590391652428,
      "grad_norm": 0.5748153328895569,
      "learning_rate": 7.4748127013830785e-06,
      "loss": 0.0175,
      "step": 786460
    },
    {
      "epoch": 1.2870917696038962,
      "grad_norm": 0.3395403027534485,
      "learning_rate": 7.47474680916956e-06,
      "loss": 0.0199,
      "step": 786480
    },
    {
      "epoch": 1.2871245000425495,
      "grad_norm": 0.15863364934921265,
      "learning_rate": 7.474680916956044e-06,
      "loss": 0.0237,
      "step": 786500
    },
    {
      "epoch": 1.287157230481203,
      "grad_norm": 1.3009779453277588,
      "learning_rate": 7.474615024742527e-06,
      "loss": 0.0213,
      "step": 786520
    },
    {
      "epoch": 1.2871899609198563,
      "grad_norm": 0.7982473969459534,
      "learning_rate": 7.4745491325290095e-06,
      "loss": 0.0146,
      "step": 786540
    },
    {
      "epoch": 1.2872226913585096,
      "grad_norm": 0.6147699356079102,
      "learning_rate": 7.474483240315492e-06,
      "loss": 0.0166,
      "step": 786560
    },
    {
      "epoch": 1.287255421797163,
      "grad_norm": 0.3518696129322052,
      "learning_rate": 7.474417348101976e-06,
      "loss": 0.0153,
      "step": 786580
    },
    {
      "epoch": 1.2872881522358162,
      "grad_norm": 1.0953702926635742,
      "learning_rate": 7.474351455888458e-06,
      "loss": 0.019,
      "step": 786600
    },
    {
      "epoch": 1.2873208826744695,
      "grad_norm": 0.27022063732147217,
      "learning_rate": 7.474285563674941e-06,
      "loss": 0.025,
      "step": 786620
    },
    {
      "epoch": 1.287353613113123,
      "grad_norm": 0.12135566025972366,
      "learning_rate": 7.474219671461423e-06,
      "loss": 0.0265,
      "step": 786640
    },
    {
      "epoch": 1.2873863435517763,
      "grad_norm": 0.7361907958984375,
      "learning_rate": 7.474153779247907e-06,
      "loss": 0.0228,
      "step": 786660
    },
    {
      "epoch": 1.2874190739904297,
      "grad_norm": 1.4492545127868652,
      "learning_rate": 7.47408788703439e-06,
      "loss": 0.0279,
      "step": 786680
    },
    {
      "epoch": 1.287451804429083,
      "grad_norm": 0.18495818972587585,
      "learning_rate": 7.474021994820872e-06,
      "loss": 0.0153,
      "step": 786700
    },
    {
      "epoch": 1.2874845348677364,
      "grad_norm": 1.4170559644699097,
      "learning_rate": 7.473956102607356e-06,
      "loss": 0.0244,
      "step": 786720
    },
    {
      "epoch": 1.2875172653063895,
      "grad_norm": 0.3640991449356079,
      "learning_rate": 7.4738902103938385e-06,
      "loss": 0.0204,
      "step": 786740
    },
    {
      "epoch": 1.287549995745043,
      "grad_norm": 0.518868625164032,
      "learning_rate": 7.473824318180321e-06,
      "loss": 0.0276,
      "step": 786760
    },
    {
      "epoch": 1.2875827261836963,
      "grad_norm": 0.4322916567325592,
      "learning_rate": 7.473758425966804e-06,
      "loss": 0.0209,
      "step": 786780
    },
    {
      "epoch": 1.2876154566223497,
      "grad_norm": 0.6111189126968384,
      "learning_rate": 7.473692533753288e-06,
      "loss": 0.0179,
      "step": 786800
    },
    {
      "epoch": 1.287648187061003,
      "grad_norm": 3.8304686546325684,
      "learning_rate": 7.4736266415397695e-06,
      "loss": 0.0251,
      "step": 786820
    },
    {
      "epoch": 1.2876809174996564,
      "grad_norm": 0.26243993639945984,
      "learning_rate": 7.473560749326253e-06,
      "loss": 0.0244,
      "step": 786840
    },
    {
      "epoch": 1.2877136479383098,
      "grad_norm": 0.333703875541687,
      "learning_rate": 7.473494857112735e-06,
      "loss": 0.016,
      "step": 786860
    },
    {
      "epoch": 1.287746378376963,
      "grad_norm": 0.6596125960350037,
      "learning_rate": 7.4734289648992186e-06,
      "loss": 0.0225,
      "step": 786880
    },
    {
      "epoch": 1.2877791088156163,
      "grad_norm": 0.3228641748428345,
      "learning_rate": 7.473363072685701e-06,
      "loss": 0.0328,
      "step": 786900
    },
    {
      "epoch": 1.2878118392542697,
      "grad_norm": 0.9342860579490662,
      "learning_rate": 7.473297180472184e-06,
      "loss": 0.0301,
      "step": 786920
    },
    {
      "epoch": 1.287844569692923,
      "grad_norm": 4.103274822235107,
      "learning_rate": 7.473231288258667e-06,
      "loss": 0.0213,
      "step": 786940
    },
    {
      "epoch": 1.2878773001315764,
      "grad_norm": 0.4854244589805603,
      "learning_rate": 7.47316539604515e-06,
      "loss": 0.0278,
      "step": 786960
    },
    {
      "epoch": 1.2879100305702296,
      "grad_norm": 0.8097220659255981,
      "learning_rate": 7.473099503831632e-06,
      "loss": 0.0188,
      "step": 786980
    },
    {
      "epoch": 1.2879427610088832,
      "grad_norm": 0.4988071322441101,
      "learning_rate": 7.473033611618116e-06,
      "loss": 0.0193,
      "step": 787000
    },
    {
      "epoch": 1.2879754914475363,
      "grad_norm": 0.9891933798789978,
      "learning_rate": 7.4729677194045994e-06,
      "loss": 0.0207,
      "step": 787020
    },
    {
      "epoch": 1.2880082218861897,
      "grad_norm": 0.39565974473953247,
      "learning_rate": 7.472901827191081e-06,
      "loss": 0.016,
      "step": 787040
    },
    {
      "epoch": 1.288040952324843,
      "grad_norm": 0.4740329384803772,
      "learning_rate": 7.472835934977565e-06,
      "loss": 0.0288,
      "step": 787060
    },
    {
      "epoch": 1.2880736827634964,
      "grad_norm": 0.4980093538761139,
      "learning_rate": 7.472770042764047e-06,
      "loss": 0.0267,
      "step": 787080
    },
    {
      "epoch": 1.2881064132021498,
      "grad_norm": 0.8992368578910828,
      "learning_rate": 7.47270415055053e-06,
      "loss": 0.0215,
      "step": 787100
    },
    {
      "epoch": 1.288139143640803,
      "grad_norm": 1.4653438329696655,
      "learning_rate": 7.472638258337013e-06,
      "loss": 0.0299,
      "step": 787120
    },
    {
      "epoch": 1.2881718740794565,
      "grad_norm": 0.7729902863502502,
      "learning_rate": 7.472572366123496e-06,
      "loss": 0.0275,
      "step": 787140
    },
    {
      "epoch": 1.2882046045181097,
      "grad_norm": 0.5409671664237976,
      "learning_rate": 7.472506473909979e-06,
      "loss": 0.0253,
      "step": 787160
    },
    {
      "epoch": 1.288237334956763,
      "grad_norm": 0.7791005969047546,
      "learning_rate": 7.472440581696462e-06,
      "loss": 0.0241,
      "step": 787180
    },
    {
      "epoch": 1.2882700653954164,
      "grad_norm": 0.5532546639442444,
      "learning_rate": 7.472374689482944e-06,
      "loss": 0.0222,
      "step": 787200
    },
    {
      "epoch": 1.2883027958340698,
      "grad_norm": 0.352245956659317,
      "learning_rate": 7.472308797269428e-06,
      "loss": 0.0235,
      "step": 787220
    },
    {
      "epoch": 1.2883355262727232,
      "grad_norm": 2.1585893630981445,
      "learning_rate": 7.47224290505591e-06,
      "loss": 0.0257,
      "step": 787240
    },
    {
      "epoch": 1.2883682567113763,
      "grad_norm": 0.3368063271045685,
      "learning_rate": 7.472177012842393e-06,
      "loss": 0.0286,
      "step": 787260
    },
    {
      "epoch": 1.2884009871500297,
      "grad_norm": 0.9086389541625977,
      "learning_rate": 7.472111120628875e-06,
      "loss": 0.0308,
      "step": 787280
    },
    {
      "epoch": 1.288433717588683,
      "grad_norm": 0.2445775419473648,
      "learning_rate": 7.472045228415359e-06,
      "loss": 0.0242,
      "step": 787300
    },
    {
      "epoch": 1.2884664480273365,
      "grad_norm": 0.5494676232337952,
      "learning_rate": 7.471979336201841e-06,
      "loss": 0.0256,
      "step": 787320
    },
    {
      "epoch": 1.2884991784659898,
      "grad_norm": 1.2902910709381104,
      "learning_rate": 7.471913443988324e-06,
      "loss": 0.0244,
      "step": 787340
    },
    {
      "epoch": 1.2885319089046432,
      "grad_norm": 2.6720409393310547,
      "learning_rate": 7.471847551774808e-06,
      "loss": 0.0261,
      "step": 787360
    },
    {
      "epoch": 1.2885646393432966,
      "grad_norm": 0.8046788573265076,
      "learning_rate": 7.4717816595612904e-06,
      "loss": 0.026,
      "step": 787380
    },
    {
      "epoch": 1.2885973697819497,
      "grad_norm": 0.2725915014743805,
      "learning_rate": 7.471715767347773e-06,
      "loss": 0.0282,
      "step": 787400
    },
    {
      "epoch": 1.288630100220603,
      "grad_norm": 1.3416088819503784,
      "learning_rate": 7.471649875134256e-06,
      "loss": 0.027,
      "step": 787420
    },
    {
      "epoch": 1.2886628306592565,
      "grad_norm": 0.3171676695346832,
      "learning_rate": 7.4715839829207395e-06,
      "loss": 0.0235,
      "step": 787440
    },
    {
      "epoch": 1.2886955610979098,
      "grad_norm": 0.5717050433158875,
      "learning_rate": 7.471518090707221e-06,
      "loss": 0.0239,
      "step": 787460
    },
    {
      "epoch": 1.2887282915365632,
      "grad_norm": 1.0039544105529785,
      "learning_rate": 7.471452198493705e-06,
      "loss": 0.0241,
      "step": 787480
    },
    {
      "epoch": 1.2887610219752166,
      "grad_norm": 0.4373413324356079,
      "learning_rate": 7.471386306280187e-06,
      "loss": 0.0158,
      "step": 787500
    },
    {
      "epoch": 1.28879375241387,
      "grad_norm": 1.9004919528961182,
      "learning_rate": 7.4713204140666705e-06,
      "loss": 0.0184,
      "step": 787520
    },
    {
      "epoch": 1.288826482852523,
      "grad_norm": 0.8846612572669983,
      "learning_rate": 7.471254521853153e-06,
      "loss": 0.0259,
      "step": 787540
    },
    {
      "epoch": 1.2888592132911765,
      "grad_norm": 1.9082090854644775,
      "learning_rate": 7.471188629639636e-06,
      "loss": 0.032,
      "step": 787560
    },
    {
      "epoch": 1.2888919437298298,
      "grad_norm": 0.2395874708890915,
      "learning_rate": 7.471122737426119e-06,
      "loss": 0.0161,
      "step": 787580
    },
    {
      "epoch": 1.2889246741684832,
      "grad_norm": 0.5690023899078369,
      "learning_rate": 7.471056845212602e-06,
      "loss": 0.0155,
      "step": 787600
    },
    {
      "epoch": 1.2889574046071366,
      "grad_norm": 0.48856452107429504,
      "learning_rate": 7.470990952999084e-06,
      "loss": 0.025,
      "step": 787620
    },
    {
      "epoch": 1.28899013504579,
      "grad_norm": 0.6338707804679871,
      "learning_rate": 7.470925060785568e-06,
      "loss": 0.0154,
      "step": 787640
    },
    {
      "epoch": 1.2890228654844433,
      "grad_norm": 0.6006226539611816,
      "learning_rate": 7.47085916857205e-06,
      "loss": 0.0242,
      "step": 787660
    },
    {
      "epoch": 1.2890555959230965,
      "grad_norm": 0.8946638107299805,
      "learning_rate": 7.470793276358533e-06,
      "loss": 0.0176,
      "step": 787680
    },
    {
      "epoch": 1.2890883263617499,
      "grad_norm": 0.745709240436554,
      "learning_rate": 7.470727384145017e-06,
      "loss": 0.0255,
      "step": 787700
    },
    {
      "epoch": 1.2891210568004032,
      "grad_norm": 0.7071050405502319,
      "learning_rate": 7.470661491931499e-06,
      "loss": 0.0215,
      "step": 787720
    },
    {
      "epoch": 1.2891537872390566,
      "grad_norm": 0.4614618718624115,
      "learning_rate": 7.470595599717982e-06,
      "loss": 0.0253,
      "step": 787740
    },
    {
      "epoch": 1.28918651767771,
      "grad_norm": 0.7346411347389221,
      "learning_rate": 7.470529707504465e-06,
      "loss": 0.0246,
      "step": 787760
    },
    {
      "epoch": 1.2892192481163631,
      "grad_norm": 1.8303661346435547,
      "learning_rate": 7.470463815290948e-06,
      "loss": 0.0255,
      "step": 787780
    },
    {
      "epoch": 1.2892519785550167,
      "grad_norm": 0.8229741454124451,
      "learning_rate": 7.4703979230774305e-06,
      "loss": 0.0294,
      "step": 787800
    },
    {
      "epoch": 1.2892847089936699,
      "grad_norm": 0.5211855173110962,
      "learning_rate": 7.470332030863914e-06,
      "loss": 0.0287,
      "step": 787820
    },
    {
      "epoch": 1.2893174394323232,
      "grad_norm": 0.3505956530570984,
      "learning_rate": 7.470266138650396e-06,
      "loss": 0.024,
      "step": 787840
    },
    {
      "epoch": 1.2893501698709766,
      "grad_norm": 0.33631595969200134,
      "learning_rate": 7.47020024643688e-06,
      "loss": 0.0267,
      "step": 787860
    },
    {
      "epoch": 1.28938290030963,
      "grad_norm": 0.38710150122642517,
      "learning_rate": 7.4701343542233615e-06,
      "loss": 0.0168,
      "step": 787880
    },
    {
      "epoch": 1.2894156307482834,
      "grad_norm": 0.706249475479126,
      "learning_rate": 7.470068462009845e-06,
      "loss": 0.0193,
      "step": 787900
    },
    {
      "epoch": 1.2894483611869365,
      "grad_norm": 0.1809726357460022,
      "learning_rate": 7.470002569796328e-06,
      "loss": 0.0177,
      "step": 787920
    },
    {
      "epoch": 1.28948109162559,
      "grad_norm": 0.7129048705101013,
      "learning_rate": 7.4699366775828106e-06,
      "loss": 0.0137,
      "step": 787940
    },
    {
      "epoch": 1.2895138220642433,
      "grad_norm": 0.2078150063753128,
      "learning_rate": 7.469870785369293e-06,
      "loss": 0.0248,
      "step": 787960
    },
    {
      "epoch": 1.2895465525028966,
      "grad_norm": 0.7358903884887695,
      "learning_rate": 7.469804893155777e-06,
      "loss": 0.0124,
      "step": 787980
    },
    {
      "epoch": 1.28957928294155,
      "grad_norm": 0.8893588781356812,
      "learning_rate": 7.469739000942259e-06,
      "loss": 0.0256,
      "step": 788000
    },
    {
      "epoch": 1.2896120133802034,
      "grad_norm": 0.7152175307273865,
      "learning_rate": 7.469673108728742e-06,
      "loss": 0.0194,
      "step": 788020
    },
    {
      "epoch": 1.2896447438188567,
      "grad_norm": 1.3802577257156372,
      "learning_rate": 7.469607216515224e-06,
      "loss": 0.0265,
      "step": 788040
    },
    {
      "epoch": 1.28967747425751,
      "grad_norm": 0.8865030407905579,
      "learning_rate": 7.469541324301708e-06,
      "loss": 0.0226,
      "step": 788060
    },
    {
      "epoch": 1.2897102046961633,
      "grad_norm": 0.21656139194965363,
      "learning_rate": 7.469475432088191e-06,
      "loss": 0.0219,
      "step": 788080
    },
    {
      "epoch": 1.2897429351348166,
      "grad_norm": 0.27743637561798096,
      "learning_rate": 7.469409539874673e-06,
      "loss": 0.0166,
      "step": 788100
    },
    {
      "epoch": 1.28977566557347,
      "grad_norm": 0.8538619875907898,
      "learning_rate": 7.469343647661157e-06,
      "loss": 0.0271,
      "step": 788120
    },
    {
      "epoch": 1.2898083960121234,
      "grad_norm": 0.6586977243423462,
      "learning_rate": 7.46927775544764e-06,
      "loss": 0.0214,
      "step": 788140
    },
    {
      "epoch": 1.2898411264507768,
      "grad_norm": 1.8312625885009766,
      "learning_rate": 7.469211863234122e-06,
      "loss": 0.0187,
      "step": 788160
    },
    {
      "epoch": 1.2898738568894301,
      "grad_norm": 1.2013800144195557,
      "learning_rate": 7.469145971020605e-06,
      "loss": 0.02,
      "step": 788180
    },
    {
      "epoch": 1.2899065873280833,
      "grad_norm": 0.5503398776054382,
      "learning_rate": 7.469080078807089e-06,
      "loss": 0.0185,
      "step": 788200
    },
    {
      "epoch": 1.2899393177667366,
      "grad_norm": 0.44857484102249146,
      "learning_rate": 7.469014186593571e-06,
      "loss": 0.0216,
      "step": 788220
    },
    {
      "epoch": 1.28997204820539,
      "grad_norm": 0.609224259853363,
      "learning_rate": 7.468948294380054e-06,
      "loss": 0.0234,
      "step": 788240
    },
    {
      "epoch": 1.2900047786440434,
      "grad_norm": 0.577010989189148,
      "learning_rate": 7.468882402166536e-06,
      "loss": 0.0226,
      "step": 788260
    },
    {
      "epoch": 1.2900375090826968,
      "grad_norm": 2.574312925338745,
      "learning_rate": 7.46881650995302e-06,
      "loss": 0.0189,
      "step": 788280
    },
    {
      "epoch": 1.2900702395213501,
      "grad_norm": 0.3747694194316864,
      "learning_rate": 7.4687506177395016e-06,
      "loss": 0.0307,
      "step": 788300
    },
    {
      "epoch": 1.2901029699600035,
      "grad_norm": 0.5802770256996155,
      "learning_rate": 7.468684725525985e-06,
      "loss": 0.0212,
      "step": 788320
    },
    {
      "epoch": 1.2901357003986567,
      "grad_norm": 0.3536067306995392,
      "learning_rate": 7.468618833312468e-06,
      "loss": 0.0264,
      "step": 788340
    },
    {
      "epoch": 1.29016843083731,
      "grad_norm": 0.2399667352437973,
      "learning_rate": 7.468552941098951e-06,
      "loss": 0.0226,
      "step": 788360
    },
    {
      "epoch": 1.2902011612759634,
      "grad_norm": 0.9594599008560181,
      "learning_rate": 7.468487048885433e-06,
      "loss": 0.0263,
      "step": 788380
    },
    {
      "epoch": 1.2902338917146168,
      "grad_norm": 0.5065520405769348,
      "learning_rate": 7.468421156671917e-06,
      "loss": 0.0324,
      "step": 788400
    },
    {
      "epoch": 1.2902666221532701,
      "grad_norm": 0.25473839044570923,
      "learning_rate": 7.4683552644584e-06,
      "loss": 0.0203,
      "step": 788420
    },
    {
      "epoch": 1.2902993525919233,
      "grad_norm": 0.27244478464126587,
      "learning_rate": 7.4682893722448824e-06,
      "loss": 0.022,
      "step": 788440
    },
    {
      "epoch": 1.290332083030577,
      "grad_norm": 0.22915440797805786,
      "learning_rate": 7.468223480031366e-06,
      "loss": 0.0183,
      "step": 788460
    },
    {
      "epoch": 1.29036481346923,
      "grad_norm": 0.9381364583969116,
      "learning_rate": 7.468157587817848e-06,
      "loss": 0.0199,
      "step": 788480
    },
    {
      "epoch": 1.2903975439078834,
      "grad_norm": 0.394165962934494,
      "learning_rate": 7.4680916956043315e-06,
      "loss": 0.0219,
      "step": 788500
    },
    {
      "epoch": 1.2904302743465368,
      "grad_norm": 0.22863177955150604,
      "learning_rate": 7.468025803390813e-06,
      "loss": 0.0238,
      "step": 788520
    },
    {
      "epoch": 1.2904630047851902,
      "grad_norm": 1.5543709993362427,
      "learning_rate": 7.467959911177297e-06,
      "loss": 0.0181,
      "step": 788540
    },
    {
      "epoch": 1.2904957352238435,
      "grad_norm": 0.7967187166213989,
      "learning_rate": 7.46789401896378e-06,
      "loss": 0.0245,
      "step": 788560
    },
    {
      "epoch": 1.2905284656624967,
      "grad_norm": 0.3789364993572235,
      "learning_rate": 7.4678281267502625e-06,
      "loss": 0.0243,
      "step": 788580
    },
    {
      "epoch": 1.2905611961011503,
      "grad_norm": 0.3768739104270935,
      "learning_rate": 7.467762234536745e-06,
      "loss": 0.0226,
      "step": 788600
    },
    {
      "epoch": 1.2905939265398034,
      "grad_norm": 0.5802837610244751,
      "learning_rate": 7.467696342323229e-06,
      "loss": 0.0303,
      "step": 788620
    },
    {
      "epoch": 1.2906266569784568,
      "grad_norm": 0.548757016658783,
      "learning_rate": 7.467630450109711e-06,
      "loss": 0.0271,
      "step": 788640
    },
    {
      "epoch": 1.2906593874171102,
      "grad_norm": 0.4944803714752197,
      "learning_rate": 7.467564557896194e-06,
      "loss": 0.0164,
      "step": 788660
    },
    {
      "epoch": 1.2906921178557635,
      "grad_norm": 0.5005621314048767,
      "learning_rate": 7.467498665682676e-06,
      "loss": 0.0151,
      "step": 788680
    },
    {
      "epoch": 1.290724848294417,
      "grad_norm": 0.5650386810302734,
      "learning_rate": 7.46743277346916e-06,
      "loss": 0.0204,
      "step": 788700
    },
    {
      "epoch": 1.29075757873307,
      "grad_norm": 1.2909332513809204,
      "learning_rate": 7.4673668812556425e-06,
      "loss": 0.0241,
      "step": 788720
    },
    {
      "epoch": 1.2907903091717234,
      "grad_norm": 0.6585971713066101,
      "learning_rate": 7.467300989042125e-06,
      "loss": 0.0215,
      "step": 788740
    },
    {
      "epoch": 1.2908230396103768,
      "grad_norm": 1.0006502866744995,
      "learning_rate": 7.467235096828609e-06,
      "loss": 0.0273,
      "step": 788760
    },
    {
      "epoch": 1.2908557700490302,
      "grad_norm": 0.6943265199661255,
      "learning_rate": 7.4671692046150915e-06,
      "loss": 0.0213,
      "step": 788780
    },
    {
      "epoch": 1.2908885004876836,
      "grad_norm": 1.5716346502304077,
      "learning_rate": 7.467103312401574e-06,
      "loss": 0.0318,
      "step": 788800
    },
    {
      "epoch": 1.290921230926337,
      "grad_norm": 1.7349129915237427,
      "learning_rate": 7.467037420188057e-06,
      "loss": 0.0211,
      "step": 788820
    },
    {
      "epoch": 1.2909539613649903,
      "grad_norm": 0.45238909125328064,
      "learning_rate": 7.466971527974541e-06,
      "loss": 0.0234,
      "step": 788840
    },
    {
      "epoch": 1.2909866918036434,
      "grad_norm": 0.788914144039154,
      "learning_rate": 7.4669056357610225e-06,
      "loss": 0.0255,
      "step": 788860
    },
    {
      "epoch": 1.2910194222422968,
      "grad_norm": 0.9149057269096375,
      "learning_rate": 7.466839743547506e-06,
      "loss": 0.0162,
      "step": 788880
    },
    {
      "epoch": 1.2910521526809502,
      "grad_norm": 0.5048960447311401,
      "learning_rate": 7.466773851333988e-06,
      "loss": 0.0186,
      "step": 788900
    },
    {
      "epoch": 1.2910848831196036,
      "grad_norm": 0.8458941578865051,
      "learning_rate": 7.4667079591204716e-06,
      "loss": 0.0155,
      "step": 788920
    },
    {
      "epoch": 1.291117613558257,
      "grad_norm": 2.415199041366577,
      "learning_rate": 7.466642066906954e-06,
      "loss": 0.0325,
      "step": 788940
    },
    {
      "epoch": 1.2911503439969103,
      "grad_norm": 0.3865532875061035,
      "learning_rate": 7.466576174693437e-06,
      "loss": 0.0192,
      "step": 788960
    },
    {
      "epoch": 1.2911830744355637,
      "grad_norm": 1.0963424444198608,
      "learning_rate": 7.46651028247992e-06,
      "loss": 0.0242,
      "step": 788980
    },
    {
      "epoch": 1.2912158048742168,
      "grad_norm": 1.1396825313568115,
      "learning_rate": 7.466444390266403e-06,
      "loss": 0.0234,
      "step": 789000
    },
    {
      "epoch": 1.2912485353128702,
      "grad_norm": 0.7684116363525391,
      "learning_rate": 7.466378498052885e-06,
      "loss": 0.0359,
      "step": 789020
    },
    {
      "epoch": 1.2912812657515236,
      "grad_norm": 0.4227669835090637,
      "learning_rate": 7.466312605839369e-06,
      "loss": 0.0178,
      "step": 789040
    },
    {
      "epoch": 1.291313996190177,
      "grad_norm": 0.30281582474708557,
      "learning_rate": 7.466246713625851e-06,
      "loss": 0.0197,
      "step": 789060
    },
    {
      "epoch": 1.2913467266288303,
      "grad_norm": 0.5963484644889832,
      "learning_rate": 7.466180821412334e-06,
      "loss": 0.0148,
      "step": 789080
    },
    {
      "epoch": 1.2913794570674837,
      "grad_norm": 1.5399090051651,
      "learning_rate": 7.466114929198816e-06,
      "loss": 0.0236,
      "step": 789100
    },
    {
      "epoch": 1.291412187506137,
      "grad_norm": 0.6236091256141663,
      "learning_rate": 7.4660490369853e-06,
      "loss": 0.0243,
      "step": 789120
    },
    {
      "epoch": 1.2914449179447902,
      "grad_norm": 0.40756309032440186,
      "learning_rate": 7.465983144771783e-06,
      "loss": 0.0299,
      "step": 789140
    },
    {
      "epoch": 1.2914776483834436,
      "grad_norm": 0.4474295675754547,
      "learning_rate": 7.465917252558265e-06,
      "loss": 0.0161,
      "step": 789160
    },
    {
      "epoch": 1.291510378822097,
      "grad_norm": 0.5981482863426208,
      "learning_rate": 7.465851360344749e-06,
      "loss": 0.0285,
      "step": 789180
    },
    {
      "epoch": 1.2915431092607503,
      "grad_norm": 0.7890250086784363,
      "learning_rate": 7.465785468131232e-06,
      "loss": 0.0252,
      "step": 789200
    },
    {
      "epoch": 1.2915758396994037,
      "grad_norm": 0.8671162724494934,
      "learning_rate": 7.465719575917715e-06,
      "loss": 0.024,
      "step": 789220
    },
    {
      "epoch": 1.2916085701380569,
      "grad_norm": 0.4964852035045624,
      "learning_rate": 7.465653683704197e-06,
      "loss": 0.0145,
      "step": 789240
    },
    {
      "epoch": 1.2916413005767104,
      "grad_norm": 0.6163989305496216,
      "learning_rate": 7.465587791490681e-06,
      "loss": 0.0176,
      "step": 789260
    },
    {
      "epoch": 1.2916740310153636,
      "grad_norm": 0.16213107109069824,
      "learning_rate": 7.465521899277163e-06,
      "loss": 0.0169,
      "step": 789280
    },
    {
      "epoch": 1.291706761454017,
      "grad_norm": 0.5014711022377014,
      "learning_rate": 7.465456007063646e-06,
      "loss": 0.023,
      "step": 789300
    },
    {
      "epoch": 1.2917394918926703,
      "grad_norm": 0.765670895576477,
      "learning_rate": 7.465390114850128e-06,
      "loss": 0.0292,
      "step": 789320
    },
    {
      "epoch": 1.2917722223313237,
      "grad_norm": 0.1646323800086975,
      "learning_rate": 7.465324222636612e-06,
      "loss": 0.0162,
      "step": 789340
    },
    {
      "epoch": 1.291804952769977,
      "grad_norm": 0.8128507137298584,
      "learning_rate": 7.465258330423094e-06,
      "loss": 0.0176,
      "step": 789360
    },
    {
      "epoch": 1.2918376832086302,
      "grad_norm": 0.2977414131164551,
      "learning_rate": 7.465192438209577e-06,
      "loss": 0.0163,
      "step": 789380
    },
    {
      "epoch": 1.2918704136472838,
      "grad_norm": 0.2022773176431656,
      "learning_rate": 7.46512654599606e-06,
      "loss": 0.0274,
      "step": 789400
    },
    {
      "epoch": 1.291903144085937,
      "grad_norm": 0.2843250036239624,
      "learning_rate": 7.4650606537825435e-06,
      "loss": 0.0198,
      "step": 789420
    },
    {
      "epoch": 1.2919358745245904,
      "grad_norm": 0.2022407054901123,
      "learning_rate": 7.464994761569025e-06,
      "loss": 0.0254,
      "step": 789440
    },
    {
      "epoch": 1.2919686049632437,
      "grad_norm": 0.7595084309577942,
      "learning_rate": 7.464928869355509e-06,
      "loss": 0.0305,
      "step": 789460
    },
    {
      "epoch": 1.292001335401897,
      "grad_norm": 1.3636724948883057,
      "learning_rate": 7.4648629771419925e-06,
      "loss": 0.0268,
      "step": 789480
    },
    {
      "epoch": 1.2920340658405505,
      "grad_norm": 0.4218643307685852,
      "learning_rate": 7.464797084928474e-06,
      "loss": 0.0201,
      "step": 789500
    },
    {
      "epoch": 1.2920667962792036,
      "grad_norm": 0.8612869381904602,
      "learning_rate": 7.464731192714958e-06,
      "loss": 0.0169,
      "step": 789520
    },
    {
      "epoch": 1.292099526717857,
      "grad_norm": 0.9488179087638855,
      "learning_rate": 7.46466530050144e-06,
      "loss": 0.0228,
      "step": 789540
    },
    {
      "epoch": 1.2921322571565104,
      "grad_norm": 0.28347018361091614,
      "learning_rate": 7.4645994082879235e-06,
      "loss": 0.0195,
      "step": 789560
    },
    {
      "epoch": 1.2921649875951637,
      "grad_norm": 0.651117205619812,
      "learning_rate": 7.464533516074406e-06,
      "loss": 0.0226,
      "step": 789580
    },
    {
      "epoch": 1.292197718033817,
      "grad_norm": 0.3872400224208832,
      "learning_rate": 7.464467623860889e-06,
      "loss": 0.0274,
      "step": 789600
    },
    {
      "epoch": 1.2922304484724705,
      "grad_norm": 1.010593295097351,
      "learning_rate": 7.464401731647372e-06,
      "loss": 0.0166,
      "step": 789620
    },
    {
      "epoch": 1.2922631789111239,
      "grad_norm": 0.14517243206501007,
      "learning_rate": 7.464335839433855e-06,
      "loss": 0.0247,
      "step": 789640
    },
    {
      "epoch": 1.292295909349777,
      "grad_norm": 0.25342071056365967,
      "learning_rate": 7.464269947220337e-06,
      "loss": 0.02,
      "step": 789660
    },
    {
      "epoch": 1.2923286397884304,
      "grad_norm": 0.8957312703132629,
      "learning_rate": 7.464204055006821e-06,
      "loss": 0.0242,
      "step": 789680
    },
    {
      "epoch": 1.2923613702270837,
      "grad_norm": 0.5523152947425842,
      "learning_rate": 7.464138162793303e-06,
      "loss": 0.0196,
      "step": 789700
    },
    {
      "epoch": 1.2923941006657371,
      "grad_norm": 0.388223260641098,
      "learning_rate": 7.464072270579786e-06,
      "loss": 0.0245,
      "step": 789720
    },
    {
      "epoch": 1.2924268311043905,
      "grad_norm": 0.5809133648872375,
      "learning_rate": 7.464006378366269e-06,
      "loss": 0.0262,
      "step": 789740
    },
    {
      "epoch": 1.2924595615430439,
      "grad_norm": 1.162713885307312,
      "learning_rate": 7.463940486152752e-06,
      "loss": 0.019,
      "step": 789760
    },
    {
      "epoch": 1.2924922919816972,
      "grad_norm": 0.4315515458583832,
      "learning_rate": 7.4638745939392345e-06,
      "loss": 0.0148,
      "step": 789780
    },
    {
      "epoch": 1.2925250224203504,
      "grad_norm": 2.06852126121521,
      "learning_rate": 7.463808701725718e-06,
      "loss": 0.031,
      "step": 789800
    },
    {
      "epoch": 1.2925577528590038,
      "grad_norm": 0.19705653190612793,
      "learning_rate": 7.463742809512201e-06,
      "loss": 0.0167,
      "step": 789820
    },
    {
      "epoch": 1.2925904832976571,
      "grad_norm": 0.3480328321456909,
      "learning_rate": 7.4636769172986835e-06,
      "loss": 0.0215,
      "step": 789840
    },
    {
      "epoch": 1.2926232137363105,
      "grad_norm": 0.37066590785980225,
      "learning_rate": 7.463611025085167e-06,
      "loss": 0.0166,
      "step": 789860
    },
    {
      "epoch": 1.2926559441749639,
      "grad_norm": 0.8108067512512207,
      "learning_rate": 7.463545132871649e-06,
      "loss": 0.0173,
      "step": 789880
    },
    {
      "epoch": 1.2926886746136173,
      "grad_norm": 0.512786328792572,
      "learning_rate": 7.463479240658133e-06,
      "loss": 0.0255,
      "step": 789900
    },
    {
      "epoch": 1.2927214050522706,
      "grad_norm": 0.2787420153617859,
      "learning_rate": 7.4634133484446145e-06,
      "loss": 0.0285,
      "step": 789920
    },
    {
      "epoch": 1.2927541354909238,
      "grad_norm": 0.2824292778968811,
      "learning_rate": 7.463347456231098e-06,
      "loss": 0.0168,
      "step": 789940
    },
    {
      "epoch": 1.2927868659295771,
      "grad_norm": 0.2106713354587555,
      "learning_rate": 7.463281564017581e-06,
      "loss": 0.0192,
      "step": 789960
    },
    {
      "epoch": 1.2928195963682305,
      "grad_norm": 0.15142612159252167,
      "learning_rate": 7.4632156718040636e-06,
      "loss": 0.0292,
      "step": 789980
    },
    {
      "epoch": 1.292852326806884,
      "grad_norm": 0.6145966053009033,
      "learning_rate": 7.463149779590546e-06,
      "loss": 0.0332,
      "step": 790000
    },
    {
      "epoch": 1.2928850572455373,
      "grad_norm": 0.4988742768764496,
      "learning_rate": 7.46308388737703e-06,
      "loss": 0.0165,
      "step": 790020
    },
    {
      "epoch": 1.2929177876841904,
      "grad_norm": 0.7641757726669312,
      "learning_rate": 7.463017995163512e-06,
      "loss": 0.0187,
      "step": 790040
    },
    {
      "epoch": 1.292950518122844,
      "grad_norm": 0.26277226209640503,
      "learning_rate": 7.462952102949995e-06,
      "loss": 0.0171,
      "step": 790060
    },
    {
      "epoch": 1.2929832485614972,
      "grad_norm": 1.2053163051605225,
      "learning_rate": 7.462886210736477e-06,
      "loss": 0.0213,
      "step": 790080
    },
    {
      "epoch": 1.2930159790001505,
      "grad_norm": 1.4535549879074097,
      "learning_rate": 7.462820318522961e-06,
      "loss": 0.0221,
      "step": 790100
    },
    {
      "epoch": 1.293048709438804,
      "grad_norm": 0.40814095735549927,
      "learning_rate": 7.462754426309443e-06,
      "loss": 0.026,
      "step": 790120
    },
    {
      "epoch": 1.2930814398774573,
      "grad_norm": 0.6637261509895325,
      "learning_rate": 7.462688534095926e-06,
      "loss": 0.0204,
      "step": 790140
    },
    {
      "epoch": 1.2931141703161106,
      "grad_norm": 0.1359708160161972,
      "learning_rate": 7.462622641882409e-06,
      "loss": 0.0148,
      "step": 790160
    },
    {
      "epoch": 1.2931469007547638,
      "grad_norm": 0.21081304550170898,
      "learning_rate": 7.462556749668892e-06,
      "loss": 0.0281,
      "step": 790180
    },
    {
      "epoch": 1.2931796311934174,
      "grad_norm": 0.7210524082183838,
      "learning_rate": 7.462490857455375e-06,
      "loss": 0.0241,
      "step": 790200
    },
    {
      "epoch": 1.2932123616320705,
      "grad_norm": 0.13086752593517303,
      "learning_rate": 7.462424965241858e-06,
      "loss": 0.0173,
      "step": 790220
    },
    {
      "epoch": 1.293245092070724,
      "grad_norm": 0.42091700434684753,
      "learning_rate": 7.462359073028341e-06,
      "loss": 0.0267,
      "step": 790240
    },
    {
      "epoch": 1.2932778225093773,
      "grad_norm": 0.3130716383457184,
      "learning_rate": 7.462293180814824e-06,
      "loss": 0.0216,
      "step": 790260
    },
    {
      "epoch": 1.2933105529480307,
      "grad_norm": 0.2597537040710449,
      "learning_rate": 7.462227288601307e-06,
      "loss": 0.0191,
      "step": 790280
    },
    {
      "epoch": 1.293343283386684,
      "grad_norm": 0.47390151023864746,
      "learning_rate": 7.462161396387789e-06,
      "loss": 0.0216,
      "step": 790300
    },
    {
      "epoch": 1.2933760138253372,
      "grad_norm": 0.716454267501831,
      "learning_rate": 7.462095504174273e-06,
      "loss": 0.0231,
      "step": 790320
    },
    {
      "epoch": 1.2934087442639906,
      "grad_norm": 0.6327703595161438,
      "learning_rate": 7.4620296119607546e-06,
      "loss": 0.0232,
      "step": 790340
    },
    {
      "epoch": 1.293441474702644,
      "grad_norm": 2.122783899307251,
      "learning_rate": 7.461963719747238e-06,
      "loss": 0.0316,
      "step": 790360
    },
    {
      "epoch": 1.2934742051412973,
      "grad_norm": 0.2563098967075348,
      "learning_rate": 7.461897827533721e-06,
      "loss": 0.0188,
      "step": 790380
    },
    {
      "epoch": 1.2935069355799507,
      "grad_norm": 0.6710511445999146,
      "learning_rate": 7.461831935320204e-06,
      "loss": 0.0278,
      "step": 790400
    },
    {
      "epoch": 1.293539666018604,
      "grad_norm": 0.303096204996109,
      "learning_rate": 7.461766043106686e-06,
      "loss": 0.0239,
      "step": 790420
    },
    {
      "epoch": 1.2935723964572574,
      "grad_norm": 0.20084881782531738,
      "learning_rate": 7.46170015089317e-06,
      "loss": 0.0173,
      "step": 790440
    },
    {
      "epoch": 1.2936051268959106,
      "grad_norm": 0.5958598256111145,
      "learning_rate": 7.461634258679652e-06,
      "loss": 0.0198,
      "step": 790460
    },
    {
      "epoch": 1.293637857334564,
      "grad_norm": 0.6408230066299438,
      "learning_rate": 7.4615683664661354e-06,
      "loss": 0.0223,
      "step": 790480
    },
    {
      "epoch": 1.2936705877732173,
      "grad_norm": 0.2319035828113556,
      "learning_rate": 7.461502474252617e-06,
      "loss": 0.0189,
      "step": 790500
    },
    {
      "epoch": 1.2937033182118707,
      "grad_norm": 0.9632265567779541,
      "learning_rate": 7.461436582039101e-06,
      "loss": 0.0276,
      "step": 790520
    },
    {
      "epoch": 1.293736048650524,
      "grad_norm": 0.2604796886444092,
      "learning_rate": 7.4613706898255845e-06,
      "loss": 0.0216,
      "step": 790540
    },
    {
      "epoch": 1.2937687790891774,
      "grad_norm": 1.9584932327270508,
      "learning_rate": 7.461304797612066e-06,
      "loss": 0.0169,
      "step": 790560
    },
    {
      "epoch": 1.2938015095278308,
      "grad_norm": 0.44718775153160095,
      "learning_rate": 7.46123890539855e-06,
      "loss": 0.0294,
      "step": 790580
    },
    {
      "epoch": 1.293834239966484,
      "grad_norm": 0.4649747908115387,
      "learning_rate": 7.461173013185033e-06,
      "loss": 0.0242,
      "step": 790600
    },
    {
      "epoch": 1.2938669704051373,
      "grad_norm": 0.8015643954277039,
      "learning_rate": 7.4611071209715155e-06,
      "loss": 0.018,
      "step": 790620
    },
    {
      "epoch": 1.2938997008437907,
      "grad_norm": 0.9628216624259949,
      "learning_rate": 7.461041228757998e-06,
      "loss": 0.0229,
      "step": 790640
    },
    {
      "epoch": 1.293932431282444,
      "grad_norm": 0.22957807779312134,
      "learning_rate": 7.460975336544482e-06,
      "loss": 0.0245,
      "step": 790660
    },
    {
      "epoch": 1.2939651617210974,
      "grad_norm": 1.1344748735427856,
      "learning_rate": 7.460909444330964e-06,
      "loss": 0.0235,
      "step": 790680
    },
    {
      "epoch": 1.2939978921597508,
      "grad_norm": 0.6603089570999146,
      "learning_rate": 7.460843552117447e-06,
      "loss": 0.0231,
      "step": 790700
    },
    {
      "epoch": 1.2940306225984042,
      "grad_norm": 0.19443784654140472,
      "learning_rate": 7.460777659903929e-06,
      "loss": 0.0298,
      "step": 790720
    },
    {
      "epoch": 1.2940633530370573,
      "grad_norm": 0.4772702157497406,
      "learning_rate": 7.460711767690413e-06,
      "loss": 0.0261,
      "step": 790740
    },
    {
      "epoch": 1.2940960834757107,
      "grad_norm": 1.4013543128967285,
      "learning_rate": 7.4606458754768955e-06,
      "loss": 0.0214,
      "step": 790760
    },
    {
      "epoch": 1.294128813914364,
      "grad_norm": 0.44904568791389465,
      "learning_rate": 7.460579983263378e-06,
      "loss": 0.0147,
      "step": 790780
    },
    {
      "epoch": 1.2941615443530174,
      "grad_norm": 0.13763320446014404,
      "learning_rate": 7.460514091049861e-06,
      "loss": 0.0265,
      "step": 790800
    },
    {
      "epoch": 1.2941942747916708,
      "grad_norm": 0.6227465271949768,
      "learning_rate": 7.4604481988363446e-06,
      "loss": 0.0286,
      "step": 790820
    },
    {
      "epoch": 1.294227005230324,
      "grad_norm": 1.1518654823303223,
      "learning_rate": 7.4603823066228264e-06,
      "loss": 0.016,
      "step": 790840
    },
    {
      "epoch": 1.2942597356689776,
      "grad_norm": 0.5714029669761658,
      "learning_rate": 7.46031641440931e-06,
      "loss": 0.0192,
      "step": 790860
    },
    {
      "epoch": 1.2942924661076307,
      "grad_norm": 1.0586150884628296,
      "learning_rate": 7.460250522195794e-06,
      "loss": 0.0157,
      "step": 790880
    },
    {
      "epoch": 1.294325196546284,
      "grad_norm": 0.2272033393383026,
      "learning_rate": 7.4601846299822755e-06,
      "loss": 0.0156,
      "step": 790900
    },
    {
      "epoch": 1.2943579269849375,
      "grad_norm": 1.439494252204895,
      "learning_rate": 7.460118737768759e-06,
      "loss": 0.0194,
      "step": 790920
    },
    {
      "epoch": 1.2943906574235908,
      "grad_norm": 2.030280828475952,
      "learning_rate": 7.460052845555241e-06,
      "loss": 0.0302,
      "step": 790940
    },
    {
      "epoch": 1.2944233878622442,
      "grad_norm": 1.1981273889541626,
      "learning_rate": 7.459986953341725e-06,
      "loss": 0.0223,
      "step": 790960
    },
    {
      "epoch": 1.2944561183008974,
      "grad_norm": 1.6074963808059692,
      "learning_rate": 7.459921061128207e-06,
      "loss": 0.0365,
      "step": 790980
    },
    {
      "epoch": 1.294488848739551,
      "grad_norm": 0.8574560284614563,
      "learning_rate": 7.45985516891469e-06,
      "loss": 0.0255,
      "step": 791000
    },
    {
      "epoch": 1.294521579178204,
      "grad_norm": 0.37756600975990295,
      "learning_rate": 7.459789276701173e-06,
      "loss": 0.0174,
      "step": 791020
    },
    {
      "epoch": 1.2945543096168575,
      "grad_norm": 0.7116177678108215,
      "learning_rate": 7.459723384487656e-06,
      "loss": 0.0244,
      "step": 791040
    },
    {
      "epoch": 1.2945870400555108,
      "grad_norm": 0.770400881767273,
      "learning_rate": 7.459657492274138e-06,
      "loss": 0.0216,
      "step": 791060
    },
    {
      "epoch": 1.2946197704941642,
      "grad_norm": 0.4957635998725891,
      "learning_rate": 7.459591600060622e-06,
      "loss": 0.0229,
      "step": 791080
    },
    {
      "epoch": 1.2946525009328176,
      "grad_norm": 0.7866573929786682,
      "learning_rate": 7.459525707847104e-06,
      "loss": 0.0309,
      "step": 791100
    },
    {
      "epoch": 1.2946852313714707,
      "grad_norm": 0.25848349928855896,
      "learning_rate": 7.459459815633587e-06,
      "loss": 0.0167,
      "step": 791120
    },
    {
      "epoch": 1.294717961810124,
      "grad_norm": 0.2661794126033783,
      "learning_rate": 7.459393923420069e-06,
      "loss": 0.0277,
      "step": 791140
    },
    {
      "epoch": 1.2947506922487775,
      "grad_norm": 0.3053651452064514,
      "learning_rate": 7.459328031206553e-06,
      "loss": 0.0235,
      "step": 791160
    },
    {
      "epoch": 1.2947834226874309,
      "grad_norm": 0.3988630175590515,
      "learning_rate": 7.4592621389930356e-06,
      "loss": 0.0236,
      "step": 791180
    },
    {
      "epoch": 1.2948161531260842,
      "grad_norm": 0.654238760471344,
      "learning_rate": 7.459196246779518e-06,
      "loss": 0.0179,
      "step": 791200
    },
    {
      "epoch": 1.2948488835647376,
      "grad_norm": 0.6277265548706055,
      "learning_rate": 7.459130354566002e-06,
      "loss": 0.0194,
      "step": 791220
    },
    {
      "epoch": 1.294881614003391,
      "grad_norm": 0.8387572169303894,
      "learning_rate": 7.459064462352485e-06,
      "loss": 0.0291,
      "step": 791240
    },
    {
      "epoch": 1.2949143444420441,
      "grad_norm": 1.655561923980713,
      "learning_rate": 7.458998570138967e-06,
      "loss": 0.023,
      "step": 791260
    },
    {
      "epoch": 1.2949470748806975,
      "grad_norm": 0.4687708616256714,
      "learning_rate": 7.45893267792545e-06,
      "loss": 0.0173,
      "step": 791280
    },
    {
      "epoch": 1.2949798053193509,
      "grad_norm": 0.512855052947998,
      "learning_rate": 7.458866785711934e-06,
      "loss": 0.0225,
      "step": 791300
    },
    {
      "epoch": 1.2950125357580042,
      "grad_norm": 0.5203440189361572,
      "learning_rate": 7.458800893498416e-06,
      "loss": 0.028,
      "step": 791320
    },
    {
      "epoch": 1.2950452661966576,
      "grad_norm": 0.6876575946807861,
      "learning_rate": 7.458735001284899e-06,
      "loss": 0.0256,
      "step": 791340
    },
    {
      "epoch": 1.295077996635311,
      "grad_norm": 1.49613356590271,
      "learning_rate": 7.458669109071381e-06,
      "loss": 0.0253,
      "step": 791360
    },
    {
      "epoch": 1.2951107270739644,
      "grad_norm": 0.0794091671705246,
      "learning_rate": 7.458603216857865e-06,
      "loss": 0.0283,
      "step": 791380
    },
    {
      "epoch": 1.2951434575126175,
      "grad_norm": 1.2489898204803467,
      "learning_rate": 7.458537324644347e-06,
      "loss": 0.0204,
      "step": 791400
    },
    {
      "epoch": 1.2951761879512709,
      "grad_norm": 0.5971365571022034,
      "learning_rate": 7.45847143243083e-06,
      "loss": 0.0229,
      "step": 791420
    },
    {
      "epoch": 1.2952089183899242,
      "grad_norm": 0.39007845520973206,
      "learning_rate": 7.458405540217313e-06,
      "loss": 0.0281,
      "step": 791440
    },
    {
      "epoch": 1.2952416488285776,
      "grad_norm": 0.2533796429634094,
      "learning_rate": 7.4583396480037965e-06,
      "loss": 0.0226,
      "step": 791460
    },
    {
      "epoch": 1.295274379267231,
      "grad_norm": 0.4043017327785492,
      "learning_rate": 7.458273755790278e-06,
      "loss": 0.0265,
      "step": 791480
    },
    {
      "epoch": 1.2953071097058841,
      "grad_norm": 0.3978651463985443,
      "learning_rate": 7.458207863576762e-06,
      "loss": 0.0277,
      "step": 791500
    },
    {
      "epoch": 1.2953398401445377,
      "grad_norm": 0.7811499834060669,
      "learning_rate": 7.458141971363244e-06,
      "loss": 0.0128,
      "step": 791520
    },
    {
      "epoch": 1.2953725705831909,
      "grad_norm": 0.18548761308193207,
      "learning_rate": 7.458076079149727e-06,
      "loss": 0.0212,
      "step": 791540
    },
    {
      "epoch": 1.2954053010218443,
      "grad_norm": 0.3750949501991272,
      "learning_rate": 7.45801018693621e-06,
      "loss": 0.0223,
      "step": 791560
    },
    {
      "epoch": 1.2954380314604976,
      "grad_norm": 0.11668235063552856,
      "learning_rate": 7.457944294722693e-06,
      "loss": 0.0144,
      "step": 791580
    },
    {
      "epoch": 1.295470761899151,
      "grad_norm": 1.045383334159851,
      "learning_rate": 7.4578784025091765e-06,
      "loss": 0.0277,
      "step": 791600
    },
    {
      "epoch": 1.2955034923378044,
      "grad_norm": 0.3821571469306946,
      "learning_rate": 7.457812510295659e-06,
      "loss": 0.0193,
      "step": 791620
    },
    {
      "epoch": 1.2955362227764575,
      "grad_norm": 0.4618678092956543,
      "learning_rate": 7.457746618082142e-06,
      "loss": 0.0144,
      "step": 791640
    },
    {
      "epoch": 1.2955689532151111,
      "grad_norm": 0.6404461860656738,
      "learning_rate": 7.457680725868625e-06,
      "loss": 0.0224,
      "step": 791660
    },
    {
      "epoch": 1.2956016836537643,
      "grad_norm": 1.3519536256790161,
      "learning_rate": 7.457614833655108e-06,
      "loss": 0.0201,
      "step": 791680
    },
    {
      "epoch": 1.2956344140924176,
      "grad_norm": 0.861782431602478,
      "learning_rate": 7.45754894144159e-06,
      "loss": 0.0184,
      "step": 791700
    },
    {
      "epoch": 1.295667144531071,
      "grad_norm": 0.38569197058677673,
      "learning_rate": 7.457483049228074e-06,
      "loss": 0.0194,
      "step": 791720
    },
    {
      "epoch": 1.2956998749697244,
      "grad_norm": 0.6894638538360596,
      "learning_rate": 7.457417157014556e-06,
      "loss": 0.0218,
      "step": 791740
    },
    {
      "epoch": 1.2957326054083778,
      "grad_norm": 1.6607656478881836,
      "learning_rate": 7.457351264801039e-06,
      "loss": 0.021,
      "step": 791760
    },
    {
      "epoch": 1.295765335847031,
      "grad_norm": 0.8918819427490234,
      "learning_rate": 7.457285372587522e-06,
      "loss": 0.0178,
      "step": 791780
    },
    {
      "epoch": 1.2957980662856843,
      "grad_norm": 2.094503164291382,
      "learning_rate": 7.457219480374005e-06,
      "loss": 0.0285,
      "step": 791800
    },
    {
      "epoch": 1.2958307967243377,
      "grad_norm": 1.3201898336410522,
      "learning_rate": 7.4571535881604875e-06,
      "loss": 0.0223,
      "step": 791820
    },
    {
      "epoch": 1.295863527162991,
      "grad_norm": 2.171832799911499,
      "learning_rate": 7.457087695946971e-06,
      "loss": 0.0258,
      "step": 791840
    },
    {
      "epoch": 1.2958962576016444,
      "grad_norm": 0.19698455929756165,
      "learning_rate": 7.457021803733453e-06,
      "loss": 0.0195,
      "step": 791860
    },
    {
      "epoch": 1.2959289880402978,
      "grad_norm": 1.4242860078811646,
      "learning_rate": 7.4569559115199365e-06,
      "loss": 0.0154,
      "step": 791880
    },
    {
      "epoch": 1.2959617184789511,
      "grad_norm": 0.8010545372962952,
      "learning_rate": 7.4568900193064184e-06,
      "loss": 0.0242,
      "step": 791900
    },
    {
      "epoch": 1.2959944489176043,
      "grad_norm": 0.24334099888801575,
      "learning_rate": 7.456824127092902e-06,
      "loss": 0.0194,
      "step": 791920
    },
    {
      "epoch": 1.2960271793562577,
      "grad_norm": 0.8342110514640808,
      "learning_rate": 7.456758234879386e-06,
      "loss": 0.0263,
      "step": 791940
    },
    {
      "epoch": 1.296059909794911,
      "grad_norm": 1.0220658779144287,
      "learning_rate": 7.4566923426658675e-06,
      "loss": 0.0265,
      "step": 791960
    },
    {
      "epoch": 1.2960926402335644,
      "grad_norm": 0.5745573043823242,
      "learning_rate": 7.456626450452351e-06,
      "loss": 0.0199,
      "step": 791980
    },
    {
      "epoch": 1.2961253706722178,
      "grad_norm": 0.8590047955513,
      "learning_rate": 7.456560558238834e-06,
      "loss": 0.0234,
      "step": 792000
    },
    {
      "epoch": 1.2961581011108712,
      "grad_norm": 0.11263377219438553,
      "learning_rate": 7.4564946660253166e-06,
      "loss": 0.0263,
      "step": 792020
    },
    {
      "epoch": 1.2961908315495245,
      "grad_norm": 0.691365659236908,
      "learning_rate": 7.456428773811799e-06,
      "loss": 0.0228,
      "step": 792040
    },
    {
      "epoch": 1.2962235619881777,
      "grad_norm": 0.47252172231674194,
      "learning_rate": 7.456362881598283e-06,
      "loss": 0.0272,
      "step": 792060
    },
    {
      "epoch": 1.296256292426831,
      "grad_norm": 0.028619443997740746,
      "learning_rate": 7.456296989384765e-06,
      "loss": 0.0287,
      "step": 792080
    },
    {
      "epoch": 1.2962890228654844,
      "grad_norm": 0.4268588125705719,
      "learning_rate": 7.456231097171248e-06,
      "loss": 0.0179,
      "step": 792100
    },
    {
      "epoch": 1.2963217533041378,
      "grad_norm": 0.23731347918510437,
      "learning_rate": 7.45616520495773e-06,
      "loss": 0.0276,
      "step": 792120
    },
    {
      "epoch": 1.2963544837427912,
      "grad_norm": 0.5500748157501221,
      "learning_rate": 7.456099312744214e-06,
      "loss": 0.027,
      "step": 792140
    },
    {
      "epoch": 1.2963872141814445,
      "grad_norm": 1.0269752740859985,
      "learning_rate": 7.456033420530696e-06,
      "loss": 0.0184,
      "step": 792160
    },
    {
      "epoch": 1.296419944620098,
      "grad_norm": 0.189767524600029,
      "learning_rate": 7.455967528317179e-06,
      "loss": 0.0183,
      "step": 792180
    },
    {
      "epoch": 1.296452675058751,
      "grad_norm": 0.18160124123096466,
      "learning_rate": 7.455901636103662e-06,
      "loss": 0.0181,
      "step": 792200
    },
    {
      "epoch": 1.2964854054974044,
      "grad_norm": 0.3468465805053711,
      "learning_rate": 7.455835743890145e-06,
      "loss": 0.0227,
      "step": 792220
    },
    {
      "epoch": 1.2965181359360578,
      "grad_norm": 0.28935107588768005,
      "learning_rate": 7.4557698516766275e-06,
      "loss": 0.0136,
      "step": 792240
    },
    {
      "epoch": 1.2965508663747112,
      "grad_norm": 0.34382951259613037,
      "learning_rate": 7.455703959463111e-06,
      "loss": 0.0155,
      "step": 792260
    },
    {
      "epoch": 1.2965835968133645,
      "grad_norm": 1.8338053226470947,
      "learning_rate": 7.455638067249594e-06,
      "loss": 0.0187,
      "step": 792280
    },
    {
      "epoch": 1.2966163272520177,
      "grad_norm": 0.2932409942150116,
      "learning_rate": 7.455572175036077e-06,
      "loss": 0.0222,
      "step": 792300
    },
    {
      "epoch": 1.2966490576906713,
      "grad_norm": 0.5906028747558594,
      "learning_rate": 7.45550628282256e-06,
      "loss": 0.0275,
      "step": 792320
    },
    {
      "epoch": 1.2966817881293244,
      "grad_norm": 0.842517614364624,
      "learning_rate": 7.455440390609042e-06,
      "loss": 0.0234,
      "step": 792340
    },
    {
      "epoch": 1.2967145185679778,
      "grad_norm": 1.8859788179397583,
      "learning_rate": 7.455374498395526e-06,
      "loss": 0.0179,
      "step": 792360
    },
    {
      "epoch": 1.2967472490066312,
      "grad_norm": 0.5649620890617371,
      "learning_rate": 7.4553086061820076e-06,
      "loss": 0.0272,
      "step": 792380
    },
    {
      "epoch": 1.2967799794452846,
      "grad_norm": 0.3453139364719391,
      "learning_rate": 7.455242713968491e-06,
      "loss": 0.0177,
      "step": 792400
    },
    {
      "epoch": 1.296812709883938,
      "grad_norm": 0.3278414309024811,
      "learning_rate": 7.455176821754974e-06,
      "loss": 0.0256,
      "step": 792420
    },
    {
      "epoch": 1.296845440322591,
      "grad_norm": 3.4089834690093994,
      "learning_rate": 7.455110929541457e-06,
      "loss": 0.0136,
      "step": 792440
    },
    {
      "epoch": 1.2968781707612447,
      "grad_norm": 0.9159249663352966,
      "learning_rate": 7.455045037327939e-06,
      "loss": 0.0258,
      "step": 792460
    },
    {
      "epoch": 1.2969109011998978,
      "grad_norm": 0.5715409517288208,
      "learning_rate": 7.454979145114423e-06,
      "loss": 0.0202,
      "step": 792480
    },
    {
      "epoch": 1.2969436316385512,
      "grad_norm": 0.9802857041358948,
      "learning_rate": 7.454913252900905e-06,
      "loss": 0.0228,
      "step": 792500
    },
    {
      "epoch": 1.2969763620772046,
      "grad_norm": 0.4847455322742462,
      "learning_rate": 7.4548473606873884e-06,
      "loss": 0.025,
      "step": 792520
    },
    {
      "epoch": 1.297009092515858,
      "grad_norm": 0.3812320828437805,
      "learning_rate": 7.45478146847387e-06,
      "loss": 0.0264,
      "step": 792540
    },
    {
      "epoch": 1.2970418229545113,
      "grad_norm": 0.7369146943092346,
      "learning_rate": 7.454715576260354e-06,
      "loss": 0.0245,
      "step": 792560
    },
    {
      "epoch": 1.2970745533931645,
      "grad_norm": 1.7724870443344116,
      "learning_rate": 7.454649684046837e-06,
      "loss": 0.028,
      "step": 792580
    },
    {
      "epoch": 1.2971072838318178,
      "grad_norm": 0.7738840579986572,
      "learning_rate": 7.454583791833319e-06,
      "loss": 0.0198,
      "step": 792600
    },
    {
      "epoch": 1.2971400142704712,
      "grad_norm": 0.5978783369064331,
      "learning_rate": 7.454517899619802e-06,
      "loss": 0.0202,
      "step": 792620
    },
    {
      "epoch": 1.2971727447091246,
      "grad_norm": 0.20442961156368256,
      "learning_rate": 7.454452007406286e-06,
      "loss": 0.0227,
      "step": 792640
    },
    {
      "epoch": 1.297205475147778,
      "grad_norm": 0.5459266901016235,
      "learning_rate": 7.4543861151927685e-06,
      "loss": 0.0198,
      "step": 792660
    },
    {
      "epoch": 1.2972382055864313,
      "grad_norm": 1.6370872259140015,
      "learning_rate": 7.454320222979251e-06,
      "loss": 0.0223,
      "step": 792680
    },
    {
      "epoch": 1.2972709360250847,
      "grad_norm": 0.5922913551330566,
      "learning_rate": 7.454254330765735e-06,
      "loss": 0.0189,
      "step": 792700
    },
    {
      "epoch": 1.2973036664637378,
      "grad_norm": 0.8490396738052368,
      "learning_rate": 7.454188438552217e-06,
      "loss": 0.0246,
      "step": 792720
    },
    {
      "epoch": 1.2973363969023912,
      "grad_norm": 0.7661535739898682,
      "learning_rate": 7.4541225463387e-06,
      "loss": 0.0245,
      "step": 792740
    },
    {
      "epoch": 1.2973691273410446,
      "grad_norm": 0.08831029385328293,
      "learning_rate": 7.454056654125182e-06,
      "loss": 0.0217,
      "step": 792760
    },
    {
      "epoch": 1.297401857779698,
      "grad_norm": 1.2826734781265259,
      "learning_rate": 7.453990761911666e-06,
      "loss": 0.0262,
      "step": 792780
    },
    {
      "epoch": 1.2974345882183513,
      "grad_norm": 0.6363179087638855,
      "learning_rate": 7.4539248696981485e-06,
      "loss": 0.0266,
      "step": 792800
    },
    {
      "epoch": 1.2974673186570047,
      "grad_norm": 0.39839839935302734,
      "learning_rate": 7.453858977484631e-06,
      "loss": 0.0237,
      "step": 792820
    },
    {
      "epoch": 1.297500049095658,
      "grad_norm": 0.941368579864502,
      "learning_rate": 7.453793085271114e-06,
      "loss": 0.0231,
      "step": 792840
    },
    {
      "epoch": 1.2975327795343112,
      "grad_norm": 0.8679928779602051,
      "learning_rate": 7.4537271930575976e-06,
      "loss": 0.031,
      "step": 792860
    },
    {
      "epoch": 1.2975655099729646,
      "grad_norm": 0.49943825602531433,
      "learning_rate": 7.4536613008440795e-06,
      "loss": 0.0214,
      "step": 792880
    },
    {
      "epoch": 1.297598240411618,
      "grad_norm": 0.7325955033302307,
      "learning_rate": 7.453595408630563e-06,
      "loss": 0.0225,
      "step": 792900
    },
    {
      "epoch": 1.2976309708502713,
      "grad_norm": 0.6759949922561646,
      "learning_rate": 7.453529516417045e-06,
      "loss": 0.0162,
      "step": 792920
    },
    {
      "epoch": 1.2976637012889247,
      "grad_norm": 0.676651656627655,
      "learning_rate": 7.4534636242035285e-06,
      "loss": 0.021,
      "step": 792940
    },
    {
      "epoch": 1.297696431727578,
      "grad_norm": 0.733948290348053,
      "learning_rate": 7.45339773199001e-06,
      "loss": 0.0155,
      "step": 792960
    },
    {
      "epoch": 1.2977291621662315,
      "grad_norm": 0.6399174332618713,
      "learning_rate": 7.453331839776494e-06,
      "loss": 0.0211,
      "step": 792980
    },
    {
      "epoch": 1.2977618926048846,
      "grad_norm": 0.7312419414520264,
      "learning_rate": 7.453265947562978e-06,
      "loss": 0.0192,
      "step": 793000
    },
    {
      "epoch": 1.297794623043538,
      "grad_norm": 0.40778255462646484,
      "learning_rate": 7.4532000553494595e-06,
      "loss": 0.0247,
      "step": 793020
    },
    {
      "epoch": 1.2978273534821914,
      "grad_norm": 0.9037601351737976,
      "learning_rate": 7.453134163135943e-06,
      "loss": 0.0348,
      "step": 793040
    },
    {
      "epoch": 1.2978600839208447,
      "grad_norm": 1.3058606386184692,
      "learning_rate": 7.453068270922426e-06,
      "loss": 0.0252,
      "step": 793060
    },
    {
      "epoch": 1.297892814359498,
      "grad_norm": 0.4979799687862396,
      "learning_rate": 7.4530023787089085e-06,
      "loss": 0.0184,
      "step": 793080
    },
    {
      "epoch": 1.2979255447981513,
      "grad_norm": 0.6031653881072998,
      "learning_rate": 7.452936486495391e-06,
      "loss": 0.0131,
      "step": 793100
    },
    {
      "epoch": 1.2979582752368048,
      "grad_norm": 0.31071019172668457,
      "learning_rate": 7.452870594281875e-06,
      "loss": 0.0213,
      "step": 793120
    },
    {
      "epoch": 1.297991005675458,
      "grad_norm": 0.14554205536842346,
      "learning_rate": 7.452804702068357e-06,
      "loss": 0.0177,
      "step": 793140
    },
    {
      "epoch": 1.2980237361141114,
      "grad_norm": 1.2074050903320312,
      "learning_rate": 7.45273880985484e-06,
      "loss": 0.0253,
      "step": 793160
    },
    {
      "epoch": 1.2980564665527647,
      "grad_norm": 0.49958911538124084,
      "learning_rate": 7.452672917641322e-06,
      "loss": 0.0177,
      "step": 793180
    },
    {
      "epoch": 1.2980891969914181,
      "grad_norm": 1.5461429357528687,
      "learning_rate": 7.452607025427806e-06,
      "loss": 0.0187,
      "step": 793200
    },
    {
      "epoch": 1.2981219274300715,
      "grad_norm": 0.289180189371109,
      "learning_rate": 7.4525411332142886e-06,
      "loss": 0.0333,
      "step": 793220
    },
    {
      "epoch": 1.2981546578687246,
      "grad_norm": 1.1151944398880005,
      "learning_rate": 7.452475241000771e-06,
      "loss": 0.0239,
      "step": 793240
    },
    {
      "epoch": 1.2981873883073782,
      "grad_norm": 0.8082031011581421,
      "learning_rate": 7.452409348787254e-06,
      "loss": 0.0218,
      "step": 793260
    },
    {
      "epoch": 1.2982201187460314,
      "grad_norm": 0.8169065117835999,
      "learning_rate": 7.452343456573738e-06,
      "loss": 0.0277,
      "step": 793280
    },
    {
      "epoch": 1.2982528491846848,
      "grad_norm": 0.24359698593616486,
      "learning_rate": 7.4522775643602195e-06,
      "loss": 0.0278,
      "step": 793300
    },
    {
      "epoch": 1.2982855796233381,
      "grad_norm": 0.9035633206367493,
      "learning_rate": 7.452211672146703e-06,
      "loss": 0.022,
      "step": 793320
    },
    {
      "epoch": 1.2983183100619915,
      "grad_norm": 0.37215325236320496,
      "learning_rate": 7.452145779933187e-06,
      "loss": 0.0127,
      "step": 793340
    },
    {
      "epoch": 1.2983510405006449,
      "grad_norm": 0.839550256729126,
      "learning_rate": 7.452079887719669e-06,
      "loss": 0.0211,
      "step": 793360
    },
    {
      "epoch": 1.298383770939298,
      "grad_norm": 1.197110652923584,
      "learning_rate": 7.452013995506152e-06,
      "loss": 0.025,
      "step": 793380
    },
    {
      "epoch": 1.2984165013779514,
      "grad_norm": 0.8755525946617126,
      "learning_rate": 7.451948103292634e-06,
      "loss": 0.0196,
      "step": 793400
    },
    {
      "epoch": 1.2984492318166048,
      "grad_norm": 0.34246283769607544,
      "learning_rate": 7.451882211079118e-06,
      "loss": 0.0144,
      "step": 793420
    },
    {
      "epoch": 1.2984819622552581,
      "grad_norm": 0.33527275919914246,
      "learning_rate": 7.4518163188656e-06,
      "loss": 0.0292,
      "step": 793440
    },
    {
      "epoch": 1.2985146926939115,
      "grad_norm": 1.9234898090362549,
      "learning_rate": 7.451750426652083e-06,
      "loss": 0.0301,
      "step": 793460
    },
    {
      "epoch": 1.2985474231325649,
      "grad_norm": 1.0079715251922607,
      "learning_rate": 7.451684534438566e-06,
      "loss": 0.0272,
      "step": 793480
    },
    {
      "epoch": 1.2985801535712183,
      "grad_norm": 0.43591660261154175,
      "learning_rate": 7.4516186422250495e-06,
      "loss": 0.0152,
      "step": 793500
    },
    {
      "epoch": 1.2986128840098714,
      "grad_norm": 0.28275999426841736,
      "learning_rate": 7.451552750011531e-06,
      "loss": 0.0147,
      "step": 793520
    },
    {
      "epoch": 1.2986456144485248,
      "grad_norm": 0.4583791494369507,
      "learning_rate": 7.451486857798015e-06,
      "loss": 0.0238,
      "step": 793540
    },
    {
      "epoch": 1.2986783448871781,
      "grad_norm": 0.3668340742588043,
      "learning_rate": 7.451420965584497e-06,
      "loss": 0.0205,
      "step": 793560
    },
    {
      "epoch": 1.2987110753258315,
      "grad_norm": 0.9915254712104797,
      "learning_rate": 7.4513550733709804e-06,
      "loss": 0.0363,
      "step": 793580
    },
    {
      "epoch": 1.298743805764485,
      "grad_norm": 0.6770452857017517,
      "learning_rate": 7.451289181157463e-06,
      "loss": 0.0225,
      "step": 793600
    },
    {
      "epoch": 1.2987765362031383,
      "grad_norm": 0.31709083914756775,
      "learning_rate": 7.451223288943946e-06,
      "loss": 0.0222,
      "step": 793620
    },
    {
      "epoch": 1.2988092666417916,
      "grad_norm": 0.2214777171611786,
      "learning_rate": 7.451157396730429e-06,
      "loss": 0.0213,
      "step": 793640
    },
    {
      "epoch": 1.2988419970804448,
      "grad_norm": 0.7229060530662537,
      "learning_rate": 7.451091504516912e-06,
      "loss": 0.0247,
      "step": 793660
    },
    {
      "epoch": 1.2988747275190982,
      "grad_norm": 0.6504501700401306,
      "learning_rate": 7.451025612303395e-06,
      "loss": 0.0231,
      "step": 793680
    },
    {
      "epoch": 1.2989074579577515,
      "grad_norm": 0.7474022507667542,
      "learning_rate": 7.450959720089878e-06,
      "loss": 0.0269,
      "step": 793700
    },
    {
      "epoch": 1.298940188396405,
      "grad_norm": 0.2083471715450287,
      "learning_rate": 7.450893827876361e-06,
      "loss": 0.0216,
      "step": 793720
    },
    {
      "epoch": 1.2989729188350583,
      "grad_norm": 0.9423058032989502,
      "learning_rate": 7.450827935662843e-06,
      "loss": 0.0236,
      "step": 793740
    },
    {
      "epoch": 1.2990056492737116,
      "grad_norm": 0.28062018752098083,
      "learning_rate": 7.450762043449327e-06,
      "loss": 0.0265,
      "step": 793760
    },
    {
      "epoch": 1.299038379712365,
      "grad_norm": 0.43401038646698,
      "learning_rate": 7.450696151235809e-06,
      "loss": 0.024,
      "step": 793780
    },
    {
      "epoch": 1.2990711101510182,
      "grad_norm": 0.8443112969398499,
      "learning_rate": 7.450630259022292e-06,
      "loss": 0.0186,
      "step": 793800
    },
    {
      "epoch": 1.2991038405896715,
      "grad_norm": 1.6859405040740967,
      "learning_rate": 7.450564366808775e-06,
      "loss": 0.0229,
      "step": 793820
    },
    {
      "epoch": 1.299136571028325,
      "grad_norm": 0.826491117477417,
      "learning_rate": 7.450498474595258e-06,
      "loss": 0.027,
      "step": 793840
    },
    {
      "epoch": 1.2991693014669783,
      "grad_norm": 0.8028789162635803,
      "learning_rate": 7.4504325823817405e-06,
      "loss": 0.0302,
      "step": 793860
    },
    {
      "epoch": 1.2992020319056317,
      "grad_norm": 1.6862298250198364,
      "learning_rate": 7.450366690168224e-06,
      "loss": 0.0256,
      "step": 793880
    },
    {
      "epoch": 1.2992347623442848,
      "grad_norm": 0.4534187316894531,
      "learning_rate": 7.450300797954706e-06,
      "loss": 0.0334,
      "step": 793900
    },
    {
      "epoch": 1.2992674927829384,
      "grad_norm": 0.27793192863464355,
      "learning_rate": 7.4502349057411895e-06,
      "loss": 0.0181,
      "step": 793920
    },
    {
      "epoch": 1.2993002232215916,
      "grad_norm": 0.27994927763938904,
      "learning_rate": 7.4501690135276714e-06,
      "loss": 0.0201,
      "step": 793940
    },
    {
      "epoch": 1.299332953660245,
      "grad_norm": 0.506133496761322,
      "learning_rate": 7.450103121314155e-06,
      "loss": 0.0212,
      "step": 793960
    },
    {
      "epoch": 1.2993656840988983,
      "grad_norm": 0.6864175200462341,
      "learning_rate": 7.450037229100637e-06,
      "loss": 0.0248,
      "step": 793980
    },
    {
      "epoch": 1.2993984145375517,
      "grad_norm": 0.5381017923355103,
      "learning_rate": 7.4499713368871205e-06,
      "loss": 0.0174,
      "step": 794000
    },
    {
      "epoch": 1.299431144976205,
      "grad_norm": 0.42089352011680603,
      "learning_rate": 7.449905444673603e-06,
      "loss": 0.0223,
      "step": 794020
    },
    {
      "epoch": 1.2994638754148582,
      "grad_norm": 1.1680119037628174,
      "learning_rate": 7.449839552460086e-06,
      "loss": 0.0219,
      "step": 794040
    },
    {
      "epoch": 1.2994966058535116,
      "grad_norm": 1.0523760318756104,
      "learning_rate": 7.4497736602465696e-06,
      "loss": 0.0217,
      "step": 794060
    },
    {
      "epoch": 1.299529336292165,
      "grad_norm": 1.1228102445602417,
      "learning_rate": 7.449707768033052e-06,
      "loss": 0.0218,
      "step": 794080
    },
    {
      "epoch": 1.2995620667308183,
      "grad_norm": 0.5328137874603271,
      "learning_rate": 7.449641875819535e-06,
      "loss": 0.018,
      "step": 794100
    },
    {
      "epoch": 1.2995947971694717,
      "grad_norm": 0.12639681994915009,
      "learning_rate": 7.449575983606018e-06,
      "loss": 0.0173,
      "step": 794120
    },
    {
      "epoch": 1.299627527608125,
      "grad_norm": 1.0216082334518433,
      "learning_rate": 7.449510091392501e-06,
      "loss": 0.0198,
      "step": 794140
    },
    {
      "epoch": 1.2996602580467784,
      "grad_norm": 0.30866098403930664,
      "learning_rate": 7.449444199178983e-06,
      "loss": 0.0215,
      "step": 794160
    },
    {
      "epoch": 1.2996929884854316,
      "grad_norm": 0.7925005555152893,
      "learning_rate": 7.449378306965467e-06,
      "loss": 0.0207,
      "step": 794180
    },
    {
      "epoch": 1.299725718924085,
      "grad_norm": 0.8578267693519592,
      "learning_rate": 7.449312414751949e-06,
      "loss": 0.015,
      "step": 794200
    },
    {
      "epoch": 1.2997584493627383,
      "grad_norm": 0.6195430159568787,
      "learning_rate": 7.449246522538432e-06,
      "loss": 0.0221,
      "step": 794220
    },
    {
      "epoch": 1.2997911798013917,
      "grad_norm": 0.7415246367454529,
      "learning_rate": 7.449180630324915e-06,
      "loss": 0.0278,
      "step": 794240
    },
    {
      "epoch": 1.299823910240045,
      "grad_norm": 0.7639142870903015,
      "learning_rate": 7.449114738111398e-06,
      "loss": 0.0248,
      "step": 794260
    },
    {
      "epoch": 1.2998566406786984,
      "grad_norm": 0.5292841196060181,
      "learning_rate": 7.4490488458978806e-06,
      "loss": 0.0174,
      "step": 794280
    },
    {
      "epoch": 1.2998893711173518,
      "grad_norm": 0.49682238698005676,
      "learning_rate": 7.448982953684364e-06,
      "loss": 0.0222,
      "step": 794300
    },
    {
      "epoch": 1.299922101556005,
      "grad_norm": 0.8527494072914124,
      "learning_rate": 7.448917061470846e-06,
      "loss": 0.0199,
      "step": 794320
    },
    {
      "epoch": 1.2999548319946583,
      "grad_norm": 0.21306617558002472,
      "learning_rate": 7.44885116925733e-06,
      "loss": 0.0192,
      "step": 794340
    },
    {
      "epoch": 1.2999875624333117,
      "grad_norm": 0.2022126466035843,
      "learning_rate": 7.4487852770438115e-06,
      "loss": 0.0235,
      "step": 794360
    },
    {
      "epoch": 1.300020292871965,
      "grad_norm": 0.3926551043987274,
      "learning_rate": 7.448719384830295e-06,
      "loss": 0.0187,
      "step": 794380
    },
    {
      "epoch": 1.3000530233106184,
      "grad_norm": 0.47463810443878174,
      "learning_rate": 7.448653492616779e-06,
      "loss": 0.0235,
      "step": 794400
    },
    {
      "epoch": 1.3000857537492718,
      "grad_norm": 0.8748824596405029,
      "learning_rate": 7.448587600403261e-06,
      "loss": 0.0239,
      "step": 794420
    },
    {
      "epoch": 1.3001184841879252,
      "grad_norm": 0.579829752445221,
      "learning_rate": 7.448521708189744e-06,
      "loss": 0.0207,
      "step": 794440
    },
    {
      "epoch": 1.3001512146265783,
      "grad_norm": 0.7940605282783508,
      "learning_rate": 7.448455815976227e-06,
      "loss": 0.0166,
      "step": 794460
    },
    {
      "epoch": 1.3001839450652317,
      "grad_norm": 0.1566980481147766,
      "learning_rate": 7.44838992376271e-06,
      "loss": 0.0238,
      "step": 794480
    },
    {
      "epoch": 1.300216675503885,
      "grad_norm": 0.8418307900428772,
      "learning_rate": 7.448324031549192e-06,
      "loss": 0.031,
      "step": 794500
    },
    {
      "epoch": 1.3002494059425385,
      "grad_norm": 0.8199076652526855,
      "learning_rate": 7.448258139335676e-06,
      "loss": 0.0184,
      "step": 794520
    },
    {
      "epoch": 1.3002821363811918,
      "grad_norm": 1.0877671241760254,
      "learning_rate": 7.448192247122158e-06,
      "loss": 0.0202,
      "step": 794540
    },
    {
      "epoch": 1.300314866819845,
      "grad_norm": 1.2048717737197876,
      "learning_rate": 7.4481263549086415e-06,
      "loss": 0.0251,
      "step": 794560
    },
    {
      "epoch": 1.3003475972584986,
      "grad_norm": 0.6170504689216614,
      "learning_rate": 7.448060462695123e-06,
      "loss": 0.0199,
      "step": 794580
    },
    {
      "epoch": 1.3003803276971517,
      "grad_norm": 0.3145627975463867,
      "learning_rate": 7.447994570481607e-06,
      "loss": 0.0207,
      "step": 794600
    },
    {
      "epoch": 1.300413058135805,
      "grad_norm": 0.9772952795028687,
      "learning_rate": 7.44792867826809e-06,
      "loss": 0.0279,
      "step": 794620
    },
    {
      "epoch": 1.3004457885744585,
      "grad_norm": 0.2386758029460907,
      "learning_rate": 7.447862786054572e-06,
      "loss": 0.0165,
      "step": 794640
    },
    {
      "epoch": 1.3004785190131118,
      "grad_norm": 0.787114679813385,
      "learning_rate": 7.447796893841055e-06,
      "loss": 0.0248,
      "step": 794660
    },
    {
      "epoch": 1.3005112494517652,
      "grad_norm": 1.8899224996566772,
      "learning_rate": 7.447731001627539e-06,
      "loss": 0.0248,
      "step": 794680
    },
    {
      "epoch": 1.3005439798904184,
      "grad_norm": 0.21111522614955902,
      "learning_rate": 7.447665109414021e-06,
      "loss": 0.0181,
      "step": 794700
    },
    {
      "epoch": 1.300576710329072,
      "grad_norm": 0.4167918860912323,
      "learning_rate": 7.447599217200504e-06,
      "loss": 0.0176,
      "step": 794720
    },
    {
      "epoch": 1.3006094407677251,
      "grad_norm": 0.1530640572309494,
      "learning_rate": 7.447533324986988e-06,
      "loss": 0.0247,
      "step": 794740
    },
    {
      "epoch": 1.3006421712063785,
      "grad_norm": 0.8003020286560059,
      "learning_rate": 7.44746743277347e-06,
      "loss": 0.0194,
      "step": 794760
    },
    {
      "epoch": 1.3006749016450319,
      "grad_norm": 1.509866714477539,
      "learning_rate": 7.447401540559953e-06,
      "loss": 0.0332,
      "step": 794780
    },
    {
      "epoch": 1.3007076320836852,
      "grad_norm": 1.0814801454544067,
      "learning_rate": 7.447335648346435e-06,
      "loss": 0.0186,
      "step": 794800
    },
    {
      "epoch": 1.3007403625223386,
      "grad_norm": 0.19811896979808807,
      "learning_rate": 7.447269756132919e-06,
      "loss": 0.0131,
      "step": 794820
    },
    {
      "epoch": 1.3007730929609918,
      "grad_norm": 0.44158145785331726,
      "learning_rate": 7.4472038639194015e-06,
      "loss": 0.0314,
      "step": 794840
    },
    {
      "epoch": 1.3008058233996451,
      "grad_norm": 0.5220818519592285,
      "learning_rate": 7.447137971705884e-06,
      "loss": 0.0168,
      "step": 794860
    },
    {
      "epoch": 1.3008385538382985,
      "grad_norm": 0.4341058135032654,
      "learning_rate": 7.447072079492367e-06,
      "loss": 0.0166,
      "step": 794880
    },
    {
      "epoch": 1.3008712842769519,
      "grad_norm": 0.8040498495101929,
      "learning_rate": 7.4470061872788506e-06,
      "loss": 0.0303,
      "step": 794900
    },
    {
      "epoch": 1.3009040147156052,
      "grad_norm": 1.152392864227295,
      "learning_rate": 7.4469402950653325e-06,
      "loss": 0.0288,
      "step": 794920
    },
    {
      "epoch": 1.3009367451542586,
      "grad_norm": 0.5834816098213196,
      "learning_rate": 7.446874402851816e-06,
      "loss": 0.0307,
      "step": 794940
    },
    {
      "epoch": 1.300969475592912,
      "grad_norm": 0.8155171275138855,
      "learning_rate": 7.446808510638298e-06,
      "loss": 0.0225,
      "step": 794960
    },
    {
      "epoch": 1.3010022060315651,
      "grad_norm": 0.7894893884658813,
      "learning_rate": 7.4467426184247815e-06,
      "loss": 0.0221,
      "step": 794980
    },
    {
      "epoch": 1.3010349364702185,
      "grad_norm": 0.6565294861793518,
      "learning_rate": 7.446676726211263e-06,
      "loss": 0.0219,
      "step": 795000
    },
    {
      "epoch": 1.3010676669088719,
      "grad_norm": 0.33666032552719116,
      "learning_rate": 7.446610833997747e-06,
      "loss": 0.0199,
      "step": 795020
    },
    {
      "epoch": 1.3011003973475253,
      "grad_norm": 0.9337058067321777,
      "learning_rate": 7.44654494178423e-06,
      "loss": 0.0245,
      "step": 795040
    },
    {
      "epoch": 1.3011331277861786,
      "grad_norm": 0.41329437494277954,
      "learning_rate": 7.4464790495707125e-06,
      "loss": 0.0121,
      "step": 795060
    },
    {
      "epoch": 1.301165858224832,
      "grad_norm": 1.4326564073562622,
      "learning_rate": 7.446413157357195e-06,
      "loss": 0.0226,
      "step": 795080
    },
    {
      "epoch": 1.3011985886634854,
      "grad_norm": 0.29119569063186646,
      "learning_rate": 7.446347265143679e-06,
      "loss": 0.0252,
      "step": 795100
    },
    {
      "epoch": 1.3012313191021385,
      "grad_norm": 1.3116047382354736,
      "learning_rate": 7.4462813729301616e-06,
      "loss": 0.0251,
      "step": 795120
    },
    {
      "epoch": 1.301264049540792,
      "grad_norm": 0.46576356887817383,
      "learning_rate": 7.446215480716644e-06,
      "loss": 0.0394,
      "step": 795140
    },
    {
      "epoch": 1.3012967799794453,
      "grad_norm": 0.5168164968490601,
      "learning_rate": 7.446149588503128e-06,
      "loss": 0.0261,
      "step": 795160
    },
    {
      "epoch": 1.3013295104180986,
      "grad_norm": 0.5627340078353882,
      "learning_rate": 7.44608369628961e-06,
      "loss": 0.023,
      "step": 795180
    },
    {
      "epoch": 1.301362240856752,
      "grad_norm": 0.22501255571842194,
      "learning_rate": 7.446017804076093e-06,
      "loss": 0.0223,
      "step": 795200
    },
    {
      "epoch": 1.3013949712954054,
      "grad_norm": 0.5105910301208496,
      "learning_rate": 7.445951911862575e-06,
      "loss": 0.0272,
      "step": 795220
    },
    {
      "epoch": 1.3014277017340588,
      "grad_norm": 0.49730244278907776,
      "learning_rate": 7.445886019649059e-06,
      "loss": 0.0145,
      "step": 795240
    },
    {
      "epoch": 1.301460432172712,
      "grad_norm": 1.0407185554504395,
      "learning_rate": 7.445820127435542e-06,
      "loss": 0.0245,
      "step": 795260
    },
    {
      "epoch": 1.3014931626113653,
      "grad_norm": 0.32139891386032104,
      "learning_rate": 7.445754235222024e-06,
      "loss": 0.0175,
      "step": 795280
    },
    {
      "epoch": 1.3015258930500186,
      "grad_norm": 0.461557000875473,
      "learning_rate": 7.445688343008507e-06,
      "loss": 0.0149,
      "step": 795300
    },
    {
      "epoch": 1.301558623488672,
      "grad_norm": 1.0985716581344604,
      "learning_rate": 7.445622450794991e-06,
      "loss": 0.031,
      "step": 795320
    },
    {
      "epoch": 1.3015913539273254,
      "grad_norm": 0.24149805307388306,
      "learning_rate": 7.4455565585814725e-06,
      "loss": 0.0153,
      "step": 795340
    },
    {
      "epoch": 1.3016240843659785,
      "grad_norm": 0.1328824907541275,
      "learning_rate": 7.445490666367956e-06,
      "loss": 0.0189,
      "step": 795360
    },
    {
      "epoch": 1.3016568148046321,
      "grad_norm": 0.6967451572418213,
      "learning_rate": 7.445424774154438e-06,
      "loss": 0.017,
      "step": 795380
    },
    {
      "epoch": 1.3016895452432853,
      "grad_norm": 0.4084899127483368,
      "learning_rate": 7.445358881940922e-06,
      "loss": 0.0291,
      "step": 795400
    },
    {
      "epoch": 1.3017222756819387,
      "grad_norm": 0.8365201354026794,
      "learning_rate": 7.445292989727404e-06,
      "loss": 0.0201,
      "step": 795420
    },
    {
      "epoch": 1.301755006120592,
      "grad_norm": 0.2325897514820099,
      "learning_rate": 7.445227097513887e-06,
      "loss": 0.0239,
      "step": 795440
    },
    {
      "epoch": 1.3017877365592454,
      "grad_norm": 1.621734619140625,
      "learning_rate": 7.445161205300371e-06,
      "loss": 0.0173,
      "step": 795460
    },
    {
      "epoch": 1.3018204669978988,
      "grad_norm": 0.2352520376443863,
      "learning_rate": 7.445095313086853e-06,
      "loss": 0.0209,
      "step": 795480
    },
    {
      "epoch": 1.301853197436552,
      "grad_norm": 0.8822561502456665,
      "learning_rate": 7.445029420873336e-06,
      "loss": 0.02,
      "step": 795500
    },
    {
      "epoch": 1.3018859278752055,
      "grad_norm": 1.5004874467849731,
      "learning_rate": 7.444963528659819e-06,
      "loss": 0.0244,
      "step": 795520
    },
    {
      "epoch": 1.3019186583138587,
      "grad_norm": 1.0108779668807983,
      "learning_rate": 7.4448976364463025e-06,
      "loss": 0.0236,
      "step": 795540
    },
    {
      "epoch": 1.301951388752512,
      "grad_norm": 0.8938543200492859,
      "learning_rate": 7.444831744232784e-06,
      "loss": 0.0228,
      "step": 795560
    },
    {
      "epoch": 1.3019841191911654,
      "grad_norm": 0.16087797284126282,
      "learning_rate": 7.444765852019268e-06,
      "loss": 0.0229,
      "step": 795580
    },
    {
      "epoch": 1.3020168496298188,
      "grad_norm": 0.6465387344360352,
      "learning_rate": 7.44469995980575e-06,
      "loss": 0.0228,
      "step": 795600
    },
    {
      "epoch": 1.3020495800684722,
      "grad_norm": 0.6205273866653442,
      "learning_rate": 7.4446340675922334e-06,
      "loss": 0.0305,
      "step": 795620
    },
    {
      "epoch": 1.3020823105071253,
      "grad_norm": 2.8173999786376953,
      "learning_rate": 7.444568175378716e-06,
      "loss": 0.026,
      "step": 795640
    },
    {
      "epoch": 1.3021150409457787,
      "grad_norm": 0.34016910195350647,
      "learning_rate": 7.444502283165199e-06,
      "loss": 0.0202,
      "step": 795660
    },
    {
      "epoch": 1.302147771384432,
      "grad_norm": 0.30782172083854675,
      "learning_rate": 7.444436390951682e-06,
      "loss": 0.0212,
      "step": 795680
    },
    {
      "epoch": 1.3021805018230854,
      "grad_norm": 0.14216524362564087,
      "learning_rate": 7.444370498738165e-06,
      "loss": 0.0165,
      "step": 795700
    },
    {
      "epoch": 1.3022132322617388,
      "grad_norm": 0.5886271595954895,
      "learning_rate": 7.444304606524647e-06,
      "loss": 0.0162,
      "step": 795720
    },
    {
      "epoch": 1.3022459627003922,
      "grad_norm": 0.5535794496536255,
      "learning_rate": 7.444238714311131e-06,
      "loss": 0.0158,
      "step": 795740
    },
    {
      "epoch": 1.3022786931390455,
      "grad_norm": 0.3940480649471283,
      "learning_rate": 7.444172822097613e-06,
      "loss": 0.0126,
      "step": 795760
    },
    {
      "epoch": 1.3023114235776987,
      "grad_norm": 0.5916497707366943,
      "learning_rate": 7.444106929884096e-06,
      "loss": 0.0197,
      "step": 795780
    },
    {
      "epoch": 1.302344154016352,
      "grad_norm": 0.6261746287345886,
      "learning_rate": 7.44404103767058e-06,
      "loss": 0.0169,
      "step": 795800
    },
    {
      "epoch": 1.3023768844550054,
      "grad_norm": 1.1385548114776611,
      "learning_rate": 7.443975145457062e-06,
      "loss": 0.0174,
      "step": 795820
    },
    {
      "epoch": 1.3024096148936588,
      "grad_norm": 0.4277119040489197,
      "learning_rate": 7.443909253243545e-06,
      "loss": 0.0312,
      "step": 795840
    },
    {
      "epoch": 1.3024423453323122,
      "grad_norm": 0.4743807017803192,
      "learning_rate": 7.443843361030027e-06,
      "loss": 0.028,
      "step": 795860
    },
    {
      "epoch": 1.3024750757709656,
      "grad_norm": 0.3434786796569824,
      "learning_rate": 7.443777468816511e-06,
      "loss": 0.0247,
      "step": 795880
    },
    {
      "epoch": 1.302507806209619,
      "grad_norm": 0.6253879070281982,
      "learning_rate": 7.4437115766029935e-06,
      "loss": 0.0167,
      "step": 795900
    },
    {
      "epoch": 1.302540536648272,
      "grad_norm": 0.4902973771095276,
      "learning_rate": 7.443645684389477e-06,
      "loss": 0.0191,
      "step": 795920
    },
    {
      "epoch": 1.3025732670869254,
      "grad_norm": 0.6572386026382446,
      "learning_rate": 7.443579792175959e-06,
      "loss": 0.0311,
      "step": 795940
    },
    {
      "epoch": 1.3026059975255788,
      "grad_norm": 0.48202112317085266,
      "learning_rate": 7.4435138999624426e-06,
      "loss": 0.023,
      "step": 795960
    },
    {
      "epoch": 1.3026387279642322,
      "grad_norm": 0.3318174183368683,
      "learning_rate": 7.4434480077489244e-06,
      "loss": 0.0157,
      "step": 795980
    },
    {
      "epoch": 1.3026714584028856,
      "grad_norm": 0.4331129789352417,
      "learning_rate": 7.443382115535408e-06,
      "loss": 0.0205,
      "step": 796000
    },
    {
      "epoch": 1.302704188841539,
      "grad_norm": 0.5400314331054688,
      "learning_rate": 7.44331622332189e-06,
      "loss": 0.0191,
      "step": 796020
    },
    {
      "epoch": 1.3027369192801923,
      "grad_norm": 2.760284185409546,
      "learning_rate": 7.4432503311083735e-06,
      "loss": 0.0183,
      "step": 796040
    },
    {
      "epoch": 1.3027696497188455,
      "grad_norm": 0.20948152244091034,
      "learning_rate": 7.443184438894856e-06,
      "loss": 0.0149,
      "step": 796060
    },
    {
      "epoch": 1.3028023801574988,
      "grad_norm": 1.271515965461731,
      "learning_rate": 7.443118546681339e-06,
      "loss": 0.0273,
      "step": 796080
    },
    {
      "epoch": 1.3028351105961522,
      "grad_norm": 0.24900037050247192,
      "learning_rate": 7.443052654467822e-06,
      "loss": 0.0161,
      "step": 796100
    },
    {
      "epoch": 1.3028678410348056,
      "grad_norm": 0.21685907244682312,
      "learning_rate": 7.442986762254305e-06,
      "loss": 0.0317,
      "step": 796120
    },
    {
      "epoch": 1.302900571473459,
      "grad_norm": 2.258711576461792,
      "learning_rate": 7.442920870040787e-06,
      "loss": 0.0131,
      "step": 796140
    },
    {
      "epoch": 1.302933301912112,
      "grad_norm": 0.35048070549964905,
      "learning_rate": 7.442854977827271e-06,
      "loss": 0.02,
      "step": 796160
    },
    {
      "epoch": 1.3029660323507657,
      "grad_norm": 0.8199480772018433,
      "learning_rate": 7.442789085613754e-06,
      "loss": 0.0254,
      "step": 796180
    },
    {
      "epoch": 1.3029987627894188,
      "grad_norm": 0.520568311214447,
      "learning_rate": 7.442723193400236e-06,
      "loss": 0.0219,
      "step": 796200
    },
    {
      "epoch": 1.3030314932280722,
      "grad_norm": 1.0075818300247192,
      "learning_rate": 7.44265730118672e-06,
      "loss": 0.0262,
      "step": 796220
    },
    {
      "epoch": 1.3030642236667256,
      "grad_norm": 3.2654383182525635,
      "learning_rate": 7.442591408973202e-06,
      "loss": 0.0167,
      "step": 796240
    },
    {
      "epoch": 1.303096954105379,
      "grad_norm": 1.0727075338363647,
      "learning_rate": 7.442525516759685e-06,
      "loss": 0.0202,
      "step": 796260
    },
    {
      "epoch": 1.3031296845440323,
      "grad_norm": 0.45313072204589844,
      "learning_rate": 7.442459624546168e-06,
      "loss": 0.0198,
      "step": 796280
    },
    {
      "epoch": 1.3031624149826855,
      "grad_norm": 0.4293968379497528,
      "learning_rate": 7.442393732332651e-06,
      "loss": 0.0251,
      "step": 796300
    },
    {
      "epoch": 1.303195145421339,
      "grad_norm": 0.4349396526813507,
      "learning_rate": 7.4423278401191336e-06,
      "loss": 0.0247,
      "step": 796320
    },
    {
      "epoch": 1.3032278758599922,
      "grad_norm": 0.4524620771408081,
      "learning_rate": 7.442261947905617e-06,
      "loss": 0.0266,
      "step": 796340
    },
    {
      "epoch": 1.3032606062986456,
      "grad_norm": 1.2202874422073364,
      "learning_rate": 7.442196055692099e-06,
      "loss": 0.0228,
      "step": 796360
    },
    {
      "epoch": 1.303293336737299,
      "grad_norm": 0.17663249373435974,
      "learning_rate": 7.442130163478583e-06,
      "loss": 0.0239,
      "step": 796380
    },
    {
      "epoch": 1.3033260671759523,
      "grad_norm": 0.5131716728210449,
      "learning_rate": 7.4420642712650645e-06,
      "loss": 0.0311,
      "step": 796400
    },
    {
      "epoch": 1.3033587976146057,
      "grad_norm": 0.4849606156349182,
      "learning_rate": 7.441998379051548e-06,
      "loss": 0.0232,
      "step": 796420
    },
    {
      "epoch": 1.3033915280532589,
      "grad_norm": 0.9477788805961609,
      "learning_rate": 7.441932486838031e-06,
      "loss": 0.0185,
      "step": 796440
    },
    {
      "epoch": 1.3034242584919122,
      "grad_norm": 0.428358793258667,
      "learning_rate": 7.441866594624514e-06,
      "loss": 0.0274,
      "step": 796460
    },
    {
      "epoch": 1.3034569889305656,
      "grad_norm": 0.2758510112762451,
      "learning_rate": 7.441800702410996e-06,
      "loss": 0.0363,
      "step": 796480
    },
    {
      "epoch": 1.303489719369219,
      "grad_norm": 0.31445246934890747,
      "learning_rate": 7.44173481019748e-06,
      "loss": 0.0235,
      "step": 796500
    },
    {
      "epoch": 1.3035224498078724,
      "grad_norm": 1.1515371799468994,
      "learning_rate": 7.441668917983963e-06,
      "loss": 0.0275,
      "step": 796520
    },
    {
      "epoch": 1.3035551802465257,
      "grad_norm": 0.9810168743133545,
      "learning_rate": 7.441603025770445e-06,
      "loss": 0.0205,
      "step": 796540
    },
    {
      "epoch": 1.303587910685179,
      "grad_norm": 0.3639225661754608,
      "learning_rate": 7.441537133556929e-06,
      "loss": 0.0236,
      "step": 796560
    },
    {
      "epoch": 1.3036206411238322,
      "grad_norm": 0.8580113649368286,
      "learning_rate": 7.441471241343411e-06,
      "loss": 0.0212,
      "step": 796580
    },
    {
      "epoch": 1.3036533715624856,
      "grad_norm": 0.470124214887619,
      "learning_rate": 7.4414053491298945e-06,
      "loss": 0.019,
      "step": 796600
    },
    {
      "epoch": 1.303686102001139,
      "grad_norm": 0.9471173882484436,
      "learning_rate": 7.441339456916376e-06,
      "loss": 0.0213,
      "step": 796620
    },
    {
      "epoch": 1.3037188324397924,
      "grad_norm": 0.29815906286239624,
      "learning_rate": 7.44127356470286e-06,
      "loss": 0.0211,
      "step": 796640
    },
    {
      "epoch": 1.3037515628784457,
      "grad_norm": 1.037760615348816,
      "learning_rate": 7.441207672489343e-06,
      "loss": 0.0321,
      "step": 796660
    },
    {
      "epoch": 1.303784293317099,
      "grad_norm": 0.5107155442237854,
      "learning_rate": 7.441141780275825e-06,
      "loss": 0.027,
      "step": 796680
    },
    {
      "epoch": 1.3038170237557525,
      "grad_norm": 0.39662259817123413,
      "learning_rate": 7.441075888062308e-06,
      "loss": 0.0366,
      "step": 796700
    },
    {
      "epoch": 1.3038497541944056,
      "grad_norm": 2.1004903316497803,
      "learning_rate": 7.441009995848792e-06,
      "loss": 0.0226,
      "step": 796720
    },
    {
      "epoch": 1.303882484633059,
      "grad_norm": 0.545047402381897,
      "learning_rate": 7.440944103635274e-06,
      "loss": 0.021,
      "step": 796740
    },
    {
      "epoch": 1.3039152150717124,
      "grad_norm": 0.4537295401096344,
      "learning_rate": 7.440878211421757e-06,
      "loss": 0.0192,
      "step": 796760
    },
    {
      "epoch": 1.3039479455103657,
      "grad_norm": 1.4046456813812256,
      "learning_rate": 7.440812319208239e-06,
      "loss": 0.0205,
      "step": 796780
    },
    {
      "epoch": 1.3039806759490191,
      "grad_norm": 0.12145213782787323,
      "learning_rate": 7.440746426994723e-06,
      "loss": 0.0143,
      "step": 796800
    },
    {
      "epoch": 1.3040134063876723,
      "grad_norm": 1.017964243888855,
      "learning_rate": 7.440680534781205e-06,
      "loss": 0.0269,
      "step": 796820
    },
    {
      "epoch": 1.3040461368263259,
      "grad_norm": 0.16442649066448212,
      "learning_rate": 7.440614642567688e-06,
      "loss": 0.0197,
      "step": 796840
    },
    {
      "epoch": 1.304078867264979,
      "grad_norm": 0.41171160340309143,
      "learning_rate": 7.440548750354172e-06,
      "loss": 0.0163,
      "step": 796860
    },
    {
      "epoch": 1.3041115977036324,
      "grad_norm": 0.24953125417232513,
      "learning_rate": 7.440482858140654e-06,
      "loss": 0.0181,
      "step": 796880
    },
    {
      "epoch": 1.3041443281422858,
      "grad_norm": 0.35125938057899475,
      "learning_rate": 7.440416965927137e-06,
      "loss": 0.0201,
      "step": 796900
    },
    {
      "epoch": 1.3041770585809391,
      "grad_norm": 0.6197803020477295,
      "learning_rate": 7.44035107371362e-06,
      "loss": 0.0161,
      "step": 796920
    },
    {
      "epoch": 1.3042097890195925,
      "grad_norm": 0.8189211487770081,
      "learning_rate": 7.440285181500103e-06,
      "loss": 0.0204,
      "step": 796940
    },
    {
      "epoch": 1.3042425194582457,
      "grad_norm": 0.4052823483943939,
      "learning_rate": 7.4402192892865855e-06,
      "loss": 0.0267,
      "step": 796960
    },
    {
      "epoch": 1.3042752498968992,
      "grad_norm": 0.4993821084499359,
      "learning_rate": 7.440153397073069e-06,
      "loss": 0.0263,
      "step": 796980
    },
    {
      "epoch": 1.3043079803355524,
      "grad_norm": 1.1486190557479858,
      "learning_rate": 7.440087504859551e-06,
      "loss": 0.0238,
      "step": 797000
    },
    {
      "epoch": 1.3043407107742058,
      "grad_norm": 1.3872764110565186,
      "learning_rate": 7.4400216126460345e-06,
      "loss": 0.0216,
      "step": 797020
    },
    {
      "epoch": 1.3043734412128591,
      "grad_norm": 0.48853155970573425,
      "learning_rate": 7.4399557204325164e-06,
      "loss": 0.0255,
      "step": 797040
    },
    {
      "epoch": 1.3044061716515125,
      "grad_norm": 0.6536421179771423,
      "learning_rate": 7.439889828219e-06,
      "loss": 0.0199,
      "step": 797060
    },
    {
      "epoch": 1.3044389020901659,
      "grad_norm": 1.0366822481155396,
      "learning_rate": 7.439823936005483e-06,
      "loss": 0.0282,
      "step": 797080
    },
    {
      "epoch": 1.304471632528819,
      "grad_norm": 0.6505118012428284,
      "learning_rate": 7.4397580437919655e-06,
      "loss": 0.0177,
      "step": 797100
    },
    {
      "epoch": 1.3045043629674724,
      "grad_norm": 1.2592172622680664,
      "learning_rate": 7.439692151578448e-06,
      "loss": 0.0178,
      "step": 797120
    },
    {
      "epoch": 1.3045370934061258,
      "grad_norm": 0.47998565435409546,
      "learning_rate": 7.439626259364932e-06,
      "loss": 0.0173,
      "step": 797140
    },
    {
      "epoch": 1.3045698238447792,
      "grad_norm": 0.24238499999046326,
      "learning_rate": 7.439560367151414e-06,
      "loss": 0.0218,
      "step": 797160
    },
    {
      "epoch": 1.3046025542834325,
      "grad_norm": 1.268929123878479,
      "learning_rate": 7.439494474937897e-06,
      "loss": 0.0247,
      "step": 797180
    },
    {
      "epoch": 1.304635284722086,
      "grad_norm": 0.9171983003616333,
      "learning_rate": 7.439428582724381e-06,
      "loss": 0.0349,
      "step": 797200
    },
    {
      "epoch": 1.3046680151607393,
      "grad_norm": 0.3614886403083801,
      "learning_rate": 7.439362690510863e-06,
      "loss": 0.0158,
      "step": 797220
    },
    {
      "epoch": 1.3047007455993924,
      "grad_norm": 0.7625621557235718,
      "learning_rate": 7.439296798297346e-06,
      "loss": 0.029,
      "step": 797240
    },
    {
      "epoch": 1.3047334760380458,
      "grad_norm": 4.4868879318237305,
      "learning_rate": 7.439230906083828e-06,
      "loss": 0.0399,
      "step": 797260
    },
    {
      "epoch": 1.3047662064766992,
      "grad_norm": 2.6534605026245117,
      "learning_rate": 7.439165013870312e-06,
      "loss": 0.0153,
      "step": 797280
    },
    {
      "epoch": 1.3047989369153525,
      "grad_norm": 0.8149994015693665,
      "learning_rate": 7.439099121656795e-06,
      "loss": 0.0296,
      "step": 797300
    },
    {
      "epoch": 1.304831667354006,
      "grad_norm": 1.3448610305786133,
      "learning_rate": 7.439033229443277e-06,
      "loss": 0.0236,
      "step": 797320
    },
    {
      "epoch": 1.3048643977926593,
      "grad_norm": 0.09174938499927521,
      "learning_rate": 7.43896733722976e-06,
      "loss": 0.0162,
      "step": 797340
    },
    {
      "epoch": 1.3048971282313127,
      "grad_norm": 0.4134615659713745,
      "learning_rate": 7.438901445016244e-06,
      "loss": 0.0173,
      "step": 797360
    },
    {
      "epoch": 1.3049298586699658,
      "grad_norm": 0.08576254546642303,
      "learning_rate": 7.4388355528027255e-06,
      "loss": 0.0159,
      "step": 797380
    },
    {
      "epoch": 1.3049625891086192,
      "grad_norm": 0.6095507144927979,
      "learning_rate": 7.438769660589209e-06,
      "loss": 0.0241,
      "step": 797400
    },
    {
      "epoch": 1.3049953195472725,
      "grad_norm": 0.7931849956512451,
      "learning_rate": 7.438703768375691e-06,
      "loss": 0.0271,
      "step": 797420
    },
    {
      "epoch": 1.305028049985926,
      "grad_norm": 0.2140611857175827,
      "learning_rate": 7.438637876162175e-06,
      "loss": 0.0227,
      "step": 797440
    },
    {
      "epoch": 1.3050607804245793,
      "grad_norm": 1.1581419706344604,
      "learning_rate": 7.438571983948657e-06,
      "loss": 0.0189,
      "step": 797460
    },
    {
      "epoch": 1.3050935108632327,
      "grad_norm": 0.9921731948852539,
      "learning_rate": 7.43850609173514e-06,
      "loss": 0.0183,
      "step": 797480
    },
    {
      "epoch": 1.305126241301886,
      "grad_norm": 0.7734460830688477,
      "learning_rate": 7.438440199521623e-06,
      "loss": 0.0204,
      "step": 797500
    },
    {
      "epoch": 1.3051589717405392,
      "grad_norm": 0.7525806427001953,
      "learning_rate": 7.438374307308106e-06,
      "loss": 0.0154,
      "step": 797520
    },
    {
      "epoch": 1.3051917021791926,
      "grad_norm": 0.36395320296287537,
      "learning_rate": 7.438308415094588e-06,
      "loss": 0.0217,
      "step": 797540
    },
    {
      "epoch": 1.305224432617846,
      "grad_norm": 0.7218731045722961,
      "learning_rate": 7.438242522881072e-06,
      "loss": 0.0257,
      "step": 797560
    },
    {
      "epoch": 1.3052571630564993,
      "grad_norm": 0.3076212406158447,
      "learning_rate": 7.4381766306675555e-06,
      "loss": 0.0226,
      "step": 797580
    },
    {
      "epoch": 1.3052898934951527,
      "grad_norm": 0.6437960863113403,
      "learning_rate": 7.438110738454037e-06,
      "loss": 0.0205,
      "step": 797600
    },
    {
      "epoch": 1.3053226239338058,
      "grad_norm": 0.17196109890937805,
      "learning_rate": 7.438044846240521e-06,
      "loss": 0.0212,
      "step": 797620
    },
    {
      "epoch": 1.3053553543724594,
      "grad_norm": 0.49784108996391296,
      "learning_rate": 7.437978954027003e-06,
      "loss": 0.0235,
      "step": 797640
    },
    {
      "epoch": 1.3053880848111126,
      "grad_norm": 1.401879906654358,
      "learning_rate": 7.4379130618134864e-06,
      "loss": 0.0269,
      "step": 797660
    },
    {
      "epoch": 1.305420815249766,
      "grad_norm": 0.32864537835121155,
      "learning_rate": 7.437847169599969e-06,
      "loss": 0.0156,
      "step": 797680
    },
    {
      "epoch": 1.3054535456884193,
      "grad_norm": 0.683284342288971,
      "learning_rate": 7.437781277386452e-06,
      "loss": 0.0181,
      "step": 797700
    },
    {
      "epoch": 1.3054862761270727,
      "grad_norm": 0.40764930844306946,
      "learning_rate": 7.437715385172935e-06,
      "loss": 0.0225,
      "step": 797720
    },
    {
      "epoch": 1.305519006565726,
      "grad_norm": 0.783875584602356,
      "learning_rate": 7.437649492959418e-06,
      "loss": 0.0229,
      "step": 797740
    },
    {
      "epoch": 1.3055517370043792,
      "grad_norm": 0.8438714146614075,
      "learning_rate": 7.4375836007459e-06,
      "loss": 0.0167,
      "step": 797760
    },
    {
      "epoch": 1.3055844674430328,
      "grad_norm": 1.0471831560134888,
      "learning_rate": 7.437517708532384e-06,
      "loss": 0.0155,
      "step": 797780
    },
    {
      "epoch": 1.305617197881686,
      "grad_norm": 0.5820555090904236,
      "learning_rate": 7.437451816318866e-06,
      "loss": 0.0186,
      "step": 797800
    },
    {
      "epoch": 1.3056499283203393,
      "grad_norm": 0.6216322779655457,
      "learning_rate": 7.437385924105349e-06,
      "loss": 0.034,
      "step": 797820
    },
    {
      "epoch": 1.3056826587589927,
      "grad_norm": 0.33052685856819153,
      "learning_rate": 7.437320031891831e-06,
      "loss": 0.0182,
      "step": 797840
    },
    {
      "epoch": 1.305715389197646,
      "grad_norm": 1.1791069507598877,
      "learning_rate": 7.437254139678315e-06,
      "loss": 0.0264,
      "step": 797860
    },
    {
      "epoch": 1.3057481196362994,
      "grad_norm": 5.98821496963501,
      "learning_rate": 7.4371882474647974e-06,
      "loss": 0.028,
      "step": 797880
    },
    {
      "epoch": 1.3057808500749526,
      "grad_norm": 0.22222675383090973,
      "learning_rate": 7.43712235525128e-06,
      "loss": 0.015,
      "step": 797900
    },
    {
      "epoch": 1.305813580513606,
      "grad_norm": 0.9614330530166626,
      "learning_rate": 7.437056463037764e-06,
      "loss": 0.0273,
      "step": 797920
    },
    {
      "epoch": 1.3058463109522593,
      "grad_norm": 1.188046932220459,
      "learning_rate": 7.4369905708242465e-06,
      "loss": 0.0194,
      "step": 797940
    },
    {
      "epoch": 1.3058790413909127,
      "grad_norm": 1.4701759815216064,
      "learning_rate": 7.436924678610729e-06,
      "loss": 0.0237,
      "step": 797960
    },
    {
      "epoch": 1.305911771829566,
      "grad_norm": 1.3565874099731445,
      "learning_rate": 7.436858786397212e-06,
      "loss": 0.0147,
      "step": 797980
    },
    {
      "epoch": 1.3059445022682195,
      "grad_norm": 0.6631044149398804,
      "learning_rate": 7.4367928941836956e-06,
      "loss": 0.0222,
      "step": 798000
    },
    {
      "epoch": 1.3059772327068728,
      "grad_norm": 0.5482759475708008,
      "learning_rate": 7.4367270019701775e-06,
      "loss": 0.019,
      "step": 798020
    },
    {
      "epoch": 1.306009963145526,
      "grad_norm": 0.1299993097782135,
      "learning_rate": 7.436661109756661e-06,
      "loss": 0.0192,
      "step": 798040
    },
    {
      "epoch": 1.3060426935841793,
      "grad_norm": 0.3887174129486084,
      "learning_rate": 7.436595217543143e-06,
      "loss": 0.0284,
      "step": 798060
    },
    {
      "epoch": 1.3060754240228327,
      "grad_norm": 0.4151318371295929,
      "learning_rate": 7.4365293253296265e-06,
      "loss": 0.025,
      "step": 798080
    },
    {
      "epoch": 1.306108154461486,
      "grad_norm": 1.650894045829773,
      "learning_rate": 7.436463433116109e-06,
      "loss": 0.0262,
      "step": 798100
    },
    {
      "epoch": 1.3061408849001395,
      "grad_norm": 0.1274997442960739,
      "learning_rate": 7.436397540902592e-06,
      "loss": 0.0198,
      "step": 798120
    },
    {
      "epoch": 1.3061736153387928,
      "grad_norm": 0.18816828727722168,
      "learning_rate": 7.436331648689075e-06,
      "loss": 0.0267,
      "step": 798140
    },
    {
      "epoch": 1.3062063457774462,
      "grad_norm": 1.0341190099716187,
      "learning_rate": 7.436265756475558e-06,
      "loss": 0.0278,
      "step": 798160
    },
    {
      "epoch": 1.3062390762160994,
      "grad_norm": 0.49192944169044495,
      "learning_rate": 7.43619986426204e-06,
      "loss": 0.016,
      "step": 798180
    },
    {
      "epoch": 1.3062718066547527,
      "grad_norm": 1.4671275615692139,
      "learning_rate": 7.436133972048524e-06,
      "loss": 0.0168,
      "step": 798200
    },
    {
      "epoch": 1.306304537093406,
      "grad_norm": 0.2836540639400482,
      "learning_rate": 7.436068079835006e-06,
      "loss": 0.0214,
      "step": 798220
    },
    {
      "epoch": 1.3063372675320595,
      "grad_norm": 0.6960681080818176,
      "learning_rate": 7.436002187621489e-06,
      "loss": 0.0264,
      "step": 798240
    },
    {
      "epoch": 1.3063699979707128,
      "grad_norm": 0.4819197356700897,
      "learning_rate": 7.435936295407973e-06,
      "loss": 0.019,
      "step": 798260
    },
    {
      "epoch": 1.3064027284093662,
      "grad_norm": 0.25624415278434753,
      "learning_rate": 7.435870403194455e-06,
      "loss": 0.023,
      "step": 798280
    },
    {
      "epoch": 1.3064354588480196,
      "grad_norm": 1.5673043727874756,
      "learning_rate": 7.435804510980938e-06,
      "loss": 0.0195,
      "step": 798300
    },
    {
      "epoch": 1.3064681892866727,
      "grad_norm": 0.06415320932865143,
      "learning_rate": 7.435738618767421e-06,
      "loss": 0.0191,
      "step": 798320
    },
    {
      "epoch": 1.3065009197253261,
      "grad_norm": 0.3750574588775635,
      "learning_rate": 7.435672726553904e-06,
      "loss": 0.0203,
      "step": 798340
    },
    {
      "epoch": 1.3065336501639795,
      "grad_norm": 0.41438207030296326,
      "learning_rate": 7.4356068343403866e-06,
      "loss": 0.0366,
      "step": 798360
    },
    {
      "epoch": 1.3065663806026329,
      "grad_norm": 1.0265004634857178,
      "learning_rate": 7.43554094212687e-06,
      "loss": 0.0244,
      "step": 798380
    },
    {
      "epoch": 1.3065991110412862,
      "grad_norm": 1.6330721378326416,
      "learning_rate": 7.435475049913352e-06,
      "loss": 0.0218,
      "step": 798400
    },
    {
      "epoch": 1.3066318414799394,
      "grad_norm": 0.892658531665802,
      "learning_rate": 7.435409157699836e-06,
      "loss": 0.0171,
      "step": 798420
    },
    {
      "epoch": 1.306664571918593,
      "grad_norm": 0.8789995312690735,
      "learning_rate": 7.4353432654863175e-06,
      "loss": 0.0237,
      "step": 798440
    },
    {
      "epoch": 1.3066973023572461,
      "grad_norm": 0.31426694989204407,
      "learning_rate": 7.435277373272801e-06,
      "loss": 0.0208,
      "step": 798460
    },
    {
      "epoch": 1.3067300327958995,
      "grad_norm": 0.928601086139679,
      "learning_rate": 7.435211481059284e-06,
      "loss": 0.0201,
      "step": 798480
    },
    {
      "epoch": 1.3067627632345529,
      "grad_norm": 0.616005003452301,
      "learning_rate": 7.435145588845767e-06,
      "loss": 0.0228,
      "step": 798500
    },
    {
      "epoch": 1.3067954936732062,
      "grad_norm": 0.14320385456085205,
      "learning_rate": 7.435079696632249e-06,
      "loss": 0.0185,
      "step": 798520
    },
    {
      "epoch": 1.3068282241118596,
      "grad_norm": 0.7682470679283142,
      "learning_rate": 7.435013804418733e-06,
      "loss": 0.0213,
      "step": 798540
    },
    {
      "epoch": 1.3068609545505128,
      "grad_norm": 0.3688492178916931,
      "learning_rate": 7.434947912205215e-06,
      "loss": 0.022,
      "step": 798560
    },
    {
      "epoch": 1.3068936849891664,
      "grad_norm": 0.22468578815460205,
      "learning_rate": 7.434882019991698e-06,
      "loss": 0.0259,
      "step": 798580
    },
    {
      "epoch": 1.3069264154278195,
      "grad_norm": 0.8607835173606873,
      "learning_rate": 7.43481612777818e-06,
      "loss": 0.0254,
      "step": 798600
    },
    {
      "epoch": 1.3069591458664729,
      "grad_norm": 1.212613582611084,
      "learning_rate": 7.434750235564664e-06,
      "loss": 0.0237,
      "step": 798620
    },
    {
      "epoch": 1.3069918763051263,
      "grad_norm": 0.695883572101593,
      "learning_rate": 7.4346843433511475e-06,
      "loss": 0.0185,
      "step": 798640
    },
    {
      "epoch": 1.3070246067437796,
      "grad_norm": 0.5464425086975098,
      "learning_rate": 7.434618451137629e-06,
      "loss": 0.0204,
      "step": 798660
    },
    {
      "epoch": 1.307057337182433,
      "grad_norm": 1.0023690462112427,
      "learning_rate": 7.434552558924113e-06,
      "loss": 0.0194,
      "step": 798680
    },
    {
      "epoch": 1.3070900676210861,
      "grad_norm": 0.08641315251588821,
      "learning_rate": 7.434486666710596e-06,
      "loss": 0.0303,
      "step": 798700
    },
    {
      "epoch": 1.3071227980597395,
      "grad_norm": 1.075903296470642,
      "learning_rate": 7.4344207744970784e-06,
      "loss": 0.0259,
      "step": 798720
    },
    {
      "epoch": 1.307155528498393,
      "grad_norm": 0.7359236478805542,
      "learning_rate": 7.434354882283561e-06,
      "loss": 0.021,
      "step": 798740
    },
    {
      "epoch": 1.3071882589370463,
      "grad_norm": 1.226076364517212,
      "learning_rate": 7.434288990070045e-06,
      "loss": 0.024,
      "step": 798760
    },
    {
      "epoch": 1.3072209893756996,
      "grad_norm": 2.154484748840332,
      "learning_rate": 7.434223097856527e-06,
      "loss": 0.0224,
      "step": 798780
    },
    {
      "epoch": 1.307253719814353,
      "grad_norm": 0.7784987688064575,
      "learning_rate": 7.43415720564301e-06,
      "loss": 0.0197,
      "step": 798800
    },
    {
      "epoch": 1.3072864502530064,
      "grad_norm": 0.6160233616828918,
      "learning_rate": 7.434091313429492e-06,
      "loss": 0.0159,
      "step": 798820
    },
    {
      "epoch": 1.3073191806916595,
      "grad_norm": 0.39118656516075134,
      "learning_rate": 7.434025421215976e-06,
      "loss": 0.0226,
      "step": 798840
    },
    {
      "epoch": 1.307351911130313,
      "grad_norm": 1.3506604433059692,
      "learning_rate": 7.433959529002458e-06,
      "loss": 0.0184,
      "step": 798860
    },
    {
      "epoch": 1.3073846415689663,
      "grad_norm": 1.591827392578125,
      "learning_rate": 7.433893636788941e-06,
      "loss": 0.0175,
      "step": 798880
    },
    {
      "epoch": 1.3074173720076196,
      "grad_norm": 0.7799737453460693,
      "learning_rate": 7.433827744575424e-06,
      "loss": 0.0231,
      "step": 798900
    },
    {
      "epoch": 1.307450102446273,
      "grad_norm": 0.47506317496299744,
      "learning_rate": 7.433761852361907e-06,
      "loss": 0.0232,
      "step": 798920
    },
    {
      "epoch": 1.3074828328849264,
      "grad_norm": 0.35183635354042053,
      "learning_rate": 7.433695960148389e-06,
      "loss": 0.0211,
      "step": 798940
    },
    {
      "epoch": 1.3075155633235798,
      "grad_norm": 0.6946019530296326,
      "learning_rate": 7.433630067934873e-06,
      "loss": 0.0302,
      "step": 798960
    },
    {
      "epoch": 1.307548293762233,
      "grad_norm": 0.3303303122520447,
      "learning_rate": 7.433564175721356e-06,
      "loss": 0.0273,
      "step": 798980
    },
    {
      "epoch": 1.3075810242008863,
      "grad_norm": 1.2346241474151611,
      "learning_rate": 7.4334982835078385e-06,
      "loss": 0.0138,
      "step": 799000
    },
    {
      "epoch": 1.3076137546395397,
      "grad_norm": 0.5949380397796631,
      "learning_rate": 7.433432391294322e-06,
      "loss": 0.0217,
      "step": 799020
    },
    {
      "epoch": 1.307646485078193,
      "grad_norm": 0.49723193049430847,
      "learning_rate": 7.433366499080804e-06,
      "loss": 0.0232,
      "step": 799040
    },
    {
      "epoch": 1.3076792155168464,
      "grad_norm": 0.4359881281852722,
      "learning_rate": 7.4333006068672875e-06,
      "loss": 0.0274,
      "step": 799060
    },
    {
      "epoch": 1.3077119459554998,
      "grad_norm": 1.7260653972625732,
      "learning_rate": 7.4332347146537694e-06,
      "loss": 0.0244,
      "step": 799080
    },
    {
      "epoch": 1.3077446763941531,
      "grad_norm": 1.0628662109375,
      "learning_rate": 7.433168822440253e-06,
      "loss": 0.0209,
      "step": 799100
    },
    {
      "epoch": 1.3077774068328063,
      "grad_norm": 0.3584712743759155,
      "learning_rate": 7.433102930226736e-06,
      "loss": 0.0152,
      "step": 799120
    },
    {
      "epoch": 1.3078101372714597,
      "grad_norm": 0.30567431449890137,
      "learning_rate": 7.4330370380132185e-06,
      "loss": 0.0205,
      "step": 799140
    },
    {
      "epoch": 1.307842867710113,
      "grad_norm": 0.2215343564748764,
      "learning_rate": 7.432971145799701e-06,
      "loss": 0.0181,
      "step": 799160
    },
    {
      "epoch": 1.3078755981487664,
      "grad_norm": 0.5098065733909607,
      "learning_rate": 7.432905253586185e-06,
      "loss": 0.0224,
      "step": 799180
    },
    {
      "epoch": 1.3079083285874198,
      "grad_norm": 1.2023582458496094,
      "learning_rate": 7.432839361372667e-06,
      "loss": 0.0222,
      "step": 799200
    },
    {
      "epoch": 1.307941059026073,
      "grad_norm": 0.37404707074165344,
      "learning_rate": 7.43277346915915e-06,
      "loss": 0.0179,
      "step": 799220
    },
    {
      "epoch": 1.3079737894647265,
      "grad_norm": 0.7541919946670532,
      "learning_rate": 7.432707576945632e-06,
      "loss": 0.0197,
      "step": 799240
    },
    {
      "epoch": 1.3080065199033797,
      "grad_norm": 0.9293731451034546,
      "learning_rate": 7.432641684732116e-06,
      "loss": 0.0167,
      "step": 799260
    },
    {
      "epoch": 1.308039250342033,
      "grad_norm": 0.7989674210548401,
      "learning_rate": 7.4325757925185985e-06,
      "loss": 0.0236,
      "step": 799280
    },
    {
      "epoch": 1.3080719807806864,
      "grad_norm": 1.2065744400024414,
      "learning_rate": 7.432509900305081e-06,
      "loss": 0.0185,
      "step": 799300
    },
    {
      "epoch": 1.3081047112193398,
      "grad_norm": 0.6654758453369141,
      "learning_rate": 7.432444008091565e-06,
      "loss": 0.0174,
      "step": 799320
    },
    {
      "epoch": 1.3081374416579932,
      "grad_norm": 0.6257801055908203,
      "learning_rate": 7.432378115878048e-06,
      "loss": 0.0259,
      "step": 799340
    },
    {
      "epoch": 1.3081701720966463,
      "grad_norm": 0.1700705587863922,
      "learning_rate": 7.43231222366453e-06,
      "loss": 0.0292,
      "step": 799360
    },
    {
      "epoch": 1.3082029025353,
      "grad_norm": 0.19910849630832672,
      "learning_rate": 7.432246331451013e-06,
      "loss": 0.0204,
      "step": 799380
    },
    {
      "epoch": 1.308235632973953,
      "grad_norm": 0.2986762523651123,
      "learning_rate": 7.432180439237497e-06,
      "loss": 0.027,
      "step": 799400
    },
    {
      "epoch": 1.3082683634126064,
      "grad_norm": 1.1028679609298706,
      "learning_rate": 7.4321145470239786e-06,
      "loss": 0.0204,
      "step": 799420
    },
    {
      "epoch": 1.3083010938512598,
      "grad_norm": 0.9944841265678406,
      "learning_rate": 7.432048654810462e-06,
      "loss": 0.0188,
      "step": 799440
    },
    {
      "epoch": 1.3083338242899132,
      "grad_norm": 0.3447525203227997,
      "learning_rate": 7.431982762596944e-06,
      "loss": 0.0179,
      "step": 799460
    },
    {
      "epoch": 1.3083665547285666,
      "grad_norm": 0.9849733114242554,
      "learning_rate": 7.431916870383428e-06,
      "loss": 0.023,
      "step": 799480
    },
    {
      "epoch": 1.3083992851672197,
      "grad_norm": 0.32036617398262024,
      "learning_rate": 7.43185097816991e-06,
      "loss": 0.0194,
      "step": 799500
    },
    {
      "epoch": 1.308432015605873,
      "grad_norm": 29.94050407409668,
      "learning_rate": 7.431785085956393e-06,
      "loss": 0.0195,
      "step": 799520
    },
    {
      "epoch": 1.3084647460445264,
      "grad_norm": 0.3760245740413666,
      "learning_rate": 7.431719193742876e-06,
      "loss": 0.0202,
      "step": 799540
    },
    {
      "epoch": 1.3084974764831798,
      "grad_norm": 3.0818889141082764,
      "learning_rate": 7.431653301529359e-06,
      "loss": 0.0172,
      "step": 799560
    },
    {
      "epoch": 1.3085302069218332,
      "grad_norm": 0.6077883839607239,
      "learning_rate": 7.431587409315841e-06,
      "loss": 0.023,
      "step": 799580
    },
    {
      "epoch": 1.3085629373604866,
      "grad_norm": 0.6861620545387268,
      "learning_rate": 7.431521517102325e-06,
      "loss": 0.0235,
      "step": 799600
    },
    {
      "epoch": 1.30859566779914,
      "grad_norm": 0.650913655757904,
      "learning_rate": 7.431455624888807e-06,
      "loss": 0.0229,
      "step": 799620
    },
    {
      "epoch": 1.308628398237793,
      "grad_norm": 0.2292068600654602,
      "learning_rate": 7.43138973267529e-06,
      "loss": 0.0196,
      "step": 799640
    },
    {
      "epoch": 1.3086611286764465,
      "grad_norm": 0.8518986105918884,
      "learning_rate": 7.431323840461772e-06,
      "loss": 0.0246,
      "step": 799660
    },
    {
      "epoch": 1.3086938591150998,
      "grad_norm": 0.45175397396087646,
      "learning_rate": 7.431257948248256e-06,
      "loss": 0.0182,
      "step": 799680
    },
    {
      "epoch": 1.3087265895537532,
      "grad_norm": 0.30193769931793213,
      "learning_rate": 7.4311920560347394e-06,
      "loss": 0.0149,
      "step": 799700
    },
    {
      "epoch": 1.3087593199924066,
      "grad_norm": 0.3340257704257965,
      "learning_rate": 7.431126163821221e-06,
      "loss": 0.0237,
      "step": 799720
    },
    {
      "epoch": 1.30879205043106,
      "grad_norm": 1.0070946216583252,
      "learning_rate": 7.431060271607705e-06,
      "loss": 0.0204,
      "step": 799740
    },
    {
      "epoch": 1.3088247808697133,
      "grad_norm": 0.5763982534408569,
      "learning_rate": 7.430994379394188e-06,
      "loss": 0.0353,
      "step": 799760
    },
    {
      "epoch": 1.3088575113083665,
      "grad_norm": 0.4555034637451172,
      "learning_rate": 7.430928487180671e-06,
      "loss": 0.0252,
      "step": 799780
    },
    {
      "epoch": 1.3088902417470198,
      "grad_norm": 0.6986212134361267,
      "learning_rate": 7.430862594967153e-06,
      "loss": 0.0253,
      "step": 799800
    },
    {
      "epoch": 1.3089229721856732,
      "grad_norm": 0.6696727871894836,
      "learning_rate": 7.430796702753637e-06,
      "loss": 0.0122,
      "step": 799820
    },
    {
      "epoch": 1.3089557026243266,
      "grad_norm": 0.37614548206329346,
      "learning_rate": 7.430730810540119e-06,
      "loss": 0.025,
      "step": 799840
    },
    {
      "epoch": 1.30898843306298,
      "grad_norm": 0.26988133788108826,
      "learning_rate": 7.430664918326602e-06,
      "loss": 0.0253,
      "step": 799860
    },
    {
      "epoch": 1.3090211635016331,
      "grad_norm": 0.18940749764442444,
      "learning_rate": 7.430599026113084e-06,
      "loss": 0.0214,
      "step": 799880
    },
    {
      "epoch": 1.3090538939402867,
      "grad_norm": 0.22790633141994476,
      "learning_rate": 7.430533133899568e-06,
      "loss": 0.0168,
      "step": 799900
    },
    {
      "epoch": 1.3090866243789399,
      "grad_norm": 0.8469488620758057,
      "learning_rate": 7.4304672416860504e-06,
      "loss": 0.0284,
      "step": 799920
    },
    {
      "epoch": 1.3091193548175932,
      "grad_norm": 0.7879378795623779,
      "learning_rate": 7.430401349472533e-06,
      "loss": 0.0199,
      "step": 799940
    },
    {
      "epoch": 1.3091520852562466,
      "grad_norm": 1.6106117963790894,
      "learning_rate": 7.430335457259016e-06,
      "loss": 0.0352,
      "step": 799960
    },
    {
      "epoch": 1.3091848156949,
      "grad_norm": 0.2639329731464386,
      "learning_rate": 7.4302695650454995e-06,
      "loss": 0.0156,
      "step": 799980
    },
    {
      "epoch": 1.3092175461335533,
      "grad_norm": 0.9368170499801636,
      "learning_rate": 7.430203672831981e-06,
      "loss": 0.0207,
      "step": 800000
    },
    {
      "epoch": 1.3092175461335533,
      "eval_loss": 0.011745245195925236,
      "eval_runtime": 6497.1665,
      "eval_samples_per_second": 158.201,
      "eval_steps_per_second": 15.82,
      "eval_sts-dev_pearson_cosine": 0.9723828397326942,
      "eval_sts-dev_spearman_cosine": 0.8876213252615486,
      "step": 800000
    },
    {
      "epoch": 1.3092502765722065,
      "grad_norm": 3.5884199142456055,
      "learning_rate": 7.430137780618465e-06,
      "loss": 0.0308,
      "step": 800020
    },
    {
      "epoch": 1.30928300701086,
      "grad_norm": 1.19853937625885,
      "learning_rate": 7.4300718884049486e-06,
      "loss": 0.0167,
      "step": 800040
    },
    {
      "epoch": 1.3093157374495132,
      "grad_norm": 1.4610596895217896,
      "learning_rate": 7.4300059961914305e-06,
      "loss": 0.0202,
      "step": 800060
    },
    {
      "epoch": 1.3093484678881666,
      "grad_norm": 0.2728007137775421,
      "learning_rate": 7.429940103977914e-06,
      "loss": 0.0182,
      "step": 800080
    },
    {
      "epoch": 1.30938119832682,
      "grad_norm": 1.2154204845428467,
      "learning_rate": 7.429874211764396e-06,
      "loss": 0.0147,
      "step": 800100
    },
    {
      "epoch": 1.3094139287654734,
      "grad_norm": 0.7465252876281738,
      "learning_rate": 7.4298083195508795e-06,
      "loss": 0.025,
      "step": 800120
    },
    {
      "epoch": 1.3094466592041267,
      "grad_norm": 0.8414462804794312,
      "learning_rate": 7.429742427337362e-06,
      "loss": 0.0176,
      "step": 800140
    },
    {
      "epoch": 1.3094793896427799,
      "grad_norm": 0.5557873249053955,
      "learning_rate": 7.429676535123845e-06,
      "loss": 0.0266,
      "step": 800160
    },
    {
      "epoch": 1.3095121200814333,
      "grad_norm": 1.536859393119812,
      "learning_rate": 7.429610642910328e-06,
      "loss": 0.026,
      "step": 800180
    },
    {
      "epoch": 1.3095448505200866,
      "grad_norm": 0.7576261758804321,
      "learning_rate": 7.429544750696811e-06,
      "loss": 0.0196,
      "step": 800200
    },
    {
      "epoch": 1.30957758095874,
      "grad_norm": 0.8312267065048218,
      "learning_rate": 7.429478858483293e-06,
      "loss": 0.0251,
      "step": 800220
    },
    {
      "epoch": 1.3096103113973934,
      "grad_norm": 0.4625224173069,
      "learning_rate": 7.429412966269777e-06,
      "loss": 0.02,
      "step": 800240
    },
    {
      "epoch": 1.3096430418360467,
      "grad_norm": 0.8911548852920532,
      "learning_rate": 7.429347074056259e-06,
      "loss": 0.0201,
      "step": 800260
    },
    {
      "epoch": 1.3096757722747001,
      "grad_norm": 1.3012558221817017,
      "learning_rate": 7.429281181842742e-06,
      "loss": 0.0257,
      "step": 800280
    },
    {
      "epoch": 1.3097085027133533,
      "grad_norm": 1.395944356918335,
      "learning_rate": 7.429215289629225e-06,
      "loss": 0.0308,
      "step": 800300
    },
    {
      "epoch": 1.3097412331520066,
      "grad_norm": 0.6207510232925415,
      "learning_rate": 7.429149397415708e-06,
      "loss": 0.0226,
      "step": 800320
    },
    {
      "epoch": 1.30977396359066,
      "grad_norm": 0.30478882789611816,
      "learning_rate": 7.4290835052021905e-06,
      "loss": 0.0116,
      "step": 800340
    },
    {
      "epoch": 1.3098066940293134,
      "grad_norm": 2.927900791168213,
      "learning_rate": 7.429017612988674e-06,
      "loss": 0.0263,
      "step": 800360
    },
    {
      "epoch": 1.3098394244679668,
      "grad_norm": 8.066872596740723,
      "learning_rate": 7.428951720775157e-06,
      "loss": 0.023,
      "step": 800380
    },
    {
      "epoch": 1.3098721549066201,
      "grad_norm": 0.24520283937454224,
      "learning_rate": 7.4288858285616396e-06,
      "loss": 0.0156,
      "step": 800400
    },
    {
      "epoch": 1.3099048853452735,
      "grad_norm": 0.6320317983627319,
      "learning_rate": 7.428819936348123e-06,
      "loss": 0.0196,
      "step": 800420
    },
    {
      "epoch": 1.3099376157839266,
      "grad_norm": 0.1131051704287529,
      "learning_rate": 7.428754044134605e-06,
      "loss": 0.0139,
      "step": 800440
    },
    {
      "epoch": 1.30997034622258,
      "grad_norm": 1.7663942575454712,
      "learning_rate": 7.428688151921089e-06,
      "loss": 0.0242,
      "step": 800460
    },
    {
      "epoch": 1.3100030766612334,
      "grad_norm": 0.5203166604042053,
      "learning_rate": 7.4286222597075705e-06,
      "loss": 0.0256,
      "step": 800480
    },
    {
      "epoch": 1.3100358070998868,
      "grad_norm": 0.09188517183065414,
      "learning_rate": 7.428556367494054e-06,
      "loss": 0.0244,
      "step": 800500
    },
    {
      "epoch": 1.3100685375385401,
      "grad_norm": 0.5284136533737183,
      "learning_rate": 7.428490475280537e-06,
      "loss": 0.0278,
      "step": 800520
    },
    {
      "epoch": 1.3101012679771935,
      "grad_norm": 0.9546289443969727,
      "learning_rate": 7.42842458306702e-06,
      "loss": 0.0177,
      "step": 800540
    },
    {
      "epoch": 1.3101339984158469,
      "grad_norm": 0.15836979448795319,
      "learning_rate": 7.428358690853502e-06,
      "loss": 0.0227,
      "step": 800560
    },
    {
      "epoch": 1.3101667288545,
      "grad_norm": 0.18540482223033905,
      "learning_rate": 7.428292798639986e-06,
      "loss": 0.0171,
      "step": 800580
    },
    {
      "epoch": 1.3101994592931534,
      "grad_norm": 0.3117723762989044,
      "learning_rate": 7.428226906426468e-06,
      "loss": 0.0148,
      "step": 800600
    },
    {
      "epoch": 1.3102321897318068,
      "grad_norm": 0.32519587874412537,
      "learning_rate": 7.428161014212951e-06,
      "loss": 0.0193,
      "step": 800620
    },
    {
      "epoch": 1.3102649201704601,
      "grad_norm": 0.10532144457101822,
      "learning_rate": 7.428095121999433e-06,
      "loss": 0.0149,
      "step": 800640
    },
    {
      "epoch": 1.3102976506091135,
      "grad_norm": 0.24985989928245544,
      "learning_rate": 7.428029229785917e-06,
      "loss": 0.0286,
      "step": 800660
    },
    {
      "epoch": 1.3103303810477667,
      "grad_norm": 1.1883656978607178,
      "learning_rate": 7.427963337572399e-06,
      "loss": 0.0412,
      "step": 800680
    },
    {
      "epoch": 1.3103631114864203,
      "grad_norm": 0.2431989163160324,
      "learning_rate": 7.427897445358882e-06,
      "loss": 0.0236,
      "step": 800700
    },
    {
      "epoch": 1.3103958419250734,
      "grad_norm": 0.28974050283432007,
      "learning_rate": 7.427831553145366e-06,
      "loss": 0.0225,
      "step": 800720
    },
    {
      "epoch": 1.3104285723637268,
      "grad_norm": 0.25840526819229126,
      "learning_rate": 7.427765660931848e-06,
      "loss": 0.0208,
      "step": 800740
    },
    {
      "epoch": 1.3104613028023802,
      "grad_norm": 0.6832910180091858,
      "learning_rate": 7.4276997687183314e-06,
      "loss": 0.0234,
      "step": 800760
    },
    {
      "epoch": 1.3104940332410335,
      "grad_norm": 0.9702416658401489,
      "learning_rate": 7.427633876504814e-06,
      "loss": 0.0207,
      "step": 800780
    },
    {
      "epoch": 1.310526763679687,
      "grad_norm": 0.4500871002674103,
      "learning_rate": 7.427567984291297e-06,
      "loss": 0.0262,
      "step": 800800
    },
    {
      "epoch": 1.31055949411834,
      "grad_norm": 0.1901482492685318,
      "learning_rate": 7.42750209207778e-06,
      "loss": 0.0185,
      "step": 800820
    },
    {
      "epoch": 1.3105922245569936,
      "grad_norm": 0.4379529356956482,
      "learning_rate": 7.427436199864263e-06,
      "loss": 0.0132,
      "step": 800840
    },
    {
      "epoch": 1.3106249549956468,
      "grad_norm": 0.48413389921188354,
      "learning_rate": 7.427370307650745e-06,
      "loss": 0.0169,
      "step": 800860
    },
    {
      "epoch": 1.3106576854343002,
      "grad_norm": 1.8431580066680908,
      "learning_rate": 7.427304415437229e-06,
      "loss": 0.0236,
      "step": 800880
    },
    {
      "epoch": 1.3106904158729535,
      "grad_norm": 0.9661279916763306,
      "learning_rate": 7.427238523223711e-06,
      "loss": 0.0194,
      "step": 800900
    },
    {
      "epoch": 1.310723146311607,
      "grad_norm": 0.23219843208789825,
      "learning_rate": 7.427172631010194e-06,
      "loss": 0.0281,
      "step": 800920
    },
    {
      "epoch": 1.3107558767502603,
      "grad_norm": 0.9329352974891663,
      "learning_rate": 7.427106738796677e-06,
      "loss": 0.0295,
      "step": 800940
    },
    {
      "epoch": 1.3107886071889134,
      "grad_norm": 0.7513533234596252,
      "learning_rate": 7.42704084658316e-06,
      "loss": 0.0156,
      "step": 800960
    },
    {
      "epoch": 1.3108213376275668,
      "grad_norm": 0.4288664758205414,
      "learning_rate": 7.426974954369642e-06,
      "loss": 0.015,
      "step": 800980
    },
    {
      "epoch": 1.3108540680662202,
      "grad_norm": 0.08110636472702026,
      "learning_rate": 7.426909062156126e-06,
      "loss": 0.0239,
      "step": 801000
    },
    {
      "epoch": 1.3108867985048736,
      "grad_norm": 0.3031640648841858,
      "learning_rate": 7.426843169942608e-06,
      "loss": 0.0212,
      "step": 801020
    },
    {
      "epoch": 1.310919528943527,
      "grad_norm": 0.4810546040534973,
      "learning_rate": 7.4267772777290915e-06,
      "loss": 0.0288,
      "step": 801040
    },
    {
      "epoch": 1.3109522593821803,
      "grad_norm": 1.548162579536438,
      "learning_rate": 7.426711385515573e-06,
      "loss": 0.0239,
      "step": 801060
    },
    {
      "epoch": 1.3109849898208337,
      "grad_norm": 0.5387774109840393,
      "learning_rate": 7.426645493302057e-06,
      "loss": 0.0236,
      "step": 801080
    },
    {
      "epoch": 1.3110177202594868,
      "grad_norm": 0.26772090792655945,
      "learning_rate": 7.4265796010885405e-06,
      "loss": 0.0384,
      "step": 801100
    },
    {
      "epoch": 1.3110504506981402,
      "grad_norm": 0.8746463656425476,
      "learning_rate": 7.4265137088750224e-06,
      "loss": 0.0248,
      "step": 801120
    },
    {
      "epoch": 1.3110831811367936,
      "grad_norm": 0.9284927845001221,
      "learning_rate": 7.426447816661506e-06,
      "loss": 0.018,
      "step": 801140
    },
    {
      "epoch": 1.311115911575447,
      "grad_norm": 0.24247361719608307,
      "learning_rate": 7.426381924447989e-06,
      "loss": 0.0324,
      "step": 801160
    },
    {
      "epoch": 1.3111486420141003,
      "grad_norm": 0.4772660732269287,
      "learning_rate": 7.4263160322344715e-06,
      "loss": 0.0186,
      "step": 801180
    },
    {
      "epoch": 1.3111813724527537,
      "grad_norm": 0.17437009513378143,
      "learning_rate": 7.426250140020954e-06,
      "loss": 0.026,
      "step": 801200
    },
    {
      "epoch": 1.311214102891407,
      "grad_norm": 0.6300395727157593,
      "learning_rate": 7.426184247807438e-06,
      "loss": 0.0227,
      "step": 801220
    },
    {
      "epoch": 1.3112468333300602,
      "grad_norm": 0.5743499398231506,
      "learning_rate": 7.42611835559392e-06,
      "loss": 0.0183,
      "step": 801240
    },
    {
      "epoch": 1.3112795637687136,
      "grad_norm": 0.7843416333198547,
      "learning_rate": 7.426052463380403e-06,
      "loss": 0.0177,
      "step": 801260
    },
    {
      "epoch": 1.311312294207367,
      "grad_norm": 0.14275196194648743,
      "learning_rate": 7.425986571166885e-06,
      "loss": 0.0185,
      "step": 801280
    },
    {
      "epoch": 1.3113450246460203,
      "grad_norm": 0.3655739426612854,
      "learning_rate": 7.425920678953369e-06,
      "loss": 0.022,
      "step": 801300
    },
    {
      "epoch": 1.3113777550846737,
      "grad_norm": 1.807883620262146,
      "learning_rate": 7.4258547867398515e-06,
      "loss": 0.0175,
      "step": 801320
    },
    {
      "epoch": 1.311410485523327,
      "grad_norm": 0.8459617495536804,
      "learning_rate": 7.425788894526334e-06,
      "loss": 0.0196,
      "step": 801340
    },
    {
      "epoch": 1.3114432159619804,
      "grad_norm": 0.21086779236793518,
      "learning_rate": 7.425723002312817e-06,
      "loss": 0.0231,
      "step": 801360
    },
    {
      "epoch": 1.3114759464006336,
      "grad_norm": 0.93946373462677,
      "learning_rate": 7.425657110099301e-06,
      "loss": 0.0273,
      "step": 801380
    },
    {
      "epoch": 1.311508676839287,
      "grad_norm": 1.03757905960083,
      "learning_rate": 7.4255912178857825e-06,
      "loss": 0.0246,
      "step": 801400
    },
    {
      "epoch": 1.3115414072779403,
      "grad_norm": 0.9116002321243286,
      "learning_rate": 7.425525325672266e-06,
      "loss": 0.0269,
      "step": 801420
    },
    {
      "epoch": 1.3115741377165937,
      "grad_norm": 0.3243143558502197,
      "learning_rate": 7.42545943345875e-06,
      "loss": 0.0256,
      "step": 801440
    },
    {
      "epoch": 1.311606868155247,
      "grad_norm": 0.1672595888376236,
      "learning_rate": 7.4253935412452316e-06,
      "loss": 0.0216,
      "step": 801460
    },
    {
      "epoch": 1.3116395985939002,
      "grad_norm": 4.678007125854492,
      "learning_rate": 7.425327649031715e-06,
      "loss": 0.0231,
      "step": 801480
    },
    {
      "epoch": 1.3116723290325538,
      "grad_norm": 0.5343849658966064,
      "learning_rate": 7.425261756818197e-06,
      "loss": 0.0231,
      "step": 801500
    },
    {
      "epoch": 1.311705059471207,
      "grad_norm": 0.7029544115066528,
      "learning_rate": 7.425195864604681e-06,
      "loss": 0.0222,
      "step": 801520
    },
    {
      "epoch": 1.3117377899098603,
      "grad_norm": 0.274304062128067,
      "learning_rate": 7.425129972391163e-06,
      "loss": 0.0196,
      "step": 801540
    },
    {
      "epoch": 1.3117705203485137,
      "grad_norm": 1.0914820432662964,
      "learning_rate": 7.425064080177646e-06,
      "loss": 0.0245,
      "step": 801560
    },
    {
      "epoch": 1.311803250787167,
      "grad_norm": 0.6484717726707458,
      "learning_rate": 7.424998187964129e-06,
      "loss": 0.0184,
      "step": 801580
    },
    {
      "epoch": 1.3118359812258205,
      "grad_norm": 0.6184407472610474,
      "learning_rate": 7.4249322957506124e-06,
      "loss": 0.0241,
      "step": 801600
    },
    {
      "epoch": 1.3118687116644736,
      "grad_norm": 0.529183566570282,
      "learning_rate": 7.424866403537094e-06,
      "loss": 0.0218,
      "step": 801620
    },
    {
      "epoch": 1.3119014421031272,
      "grad_norm": 0.972615122795105,
      "learning_rate": 7.424800511323578e-06,
      "loss": 0.0235,
      "step": 801640
    },
    {
      "epoch": 1.3119341725417804,
      "grad_norm": 6.94359827041626,
      "learning_rate": 7.42473461911006e-06,
      "loss": 0.0248,
      "step": 801660
    },
    {
      "epoch": 1.3119669029804337,
      "grad_norm": 1.2503324747085571,
      "learning_rate": 7.424668726896543e-06,
      "loss": 0.0239,
      "step": 801680
    },
    {
      "epoch": 1.311999633419087,
      "grad_norm": 1.1059954166412354,
      "learning_rate": 7.424602834683025e-06,
      "loss": 0.02,
      "step": 801700
    },
    {
      "epoch": 1.3120323638577405,
      "grad_norm": 0.9052416086196899,
      "learning_rate": 7.424536942469509e-06,
      "loss": 0.027,
      "step": 801720
    },
    {
      "epoch": 1.3120650942963938,
      "grad_norm": 1.814249873161316,
      "learning_rate": 7.424471050255992e-06,
      "loss": 0.0206,
      "step": 801740
    },
    {
      "epoch": 1.312097824735047,
      "grad_norm": 1.2094836235046387,
      "learning_rate": 7.424405158042474e-06,
      "loss": 0.0256,
      "step": 801760
    },
    {
      "epoch": 1.3121305551737004,
      "grad_norm": 1.191042184829712,
      "learning_rate": 7.424339265828958e-06,
      "loss": 0.0225,
      "step": 801780
    },
    {
      "epoch": 1.3121632856123537,
      "grad_norm": 1.3755449056625366,
      "learning_rate": 7.424273373615441e-06,
      "loss": 0.0186,
      "step": 801800
    },
    {
      "epoch": 1.312196016051007,
      "grad_norm": 0.6602155566215515,
      "learning_rate": 7.424207481401923e-06,
      "loss": 0.0214,
      "step": 801820
    },
    {
      "epoch": 1.3122287464896605,
      "grad_norm": 0.3161850571632385,
      "learning_rate": 7.424141589188406e-06,
      "loss": 0.0215,
      "step": 801840
    },
    {
      "epoch": 1.3122614769283139,
      "grad_norm": 3.1401610374450684,
      "learning_rate": 7.42407569697489e-06,
      "loss": 0.026,
      "step": 801860
    },
    {
      "epoch": 1.3122942073669672,
      "grad_norm": 0.3784954845905304,
      "learning_rate": 7.424009804761372e-06,
      "loss": 0.0174,
      "step": 801880
    },
    {
      "epoch": 1.3123269378056204,
      "grad_norm": 3.306410312652588,
      "learning_rate": 7.423943912547855e-06,
      "loss": 0.024,
      "step": 801900
    },
    {
      "epoch": 1.3123596682442737,
      "grad_norm": 0.37149783968925476,
      "learning_rate": 7.423878020334337e-06,
      "loss": 0.0206,
      "step": 801920
    },
    {
      "epoch": 1.3123923986829271,
      "grad_norm": 0.5712597370147705,
      "learning_rate": 7.423812128120821e-06,
      "loss": 0.028,
      "step": 801940
    },
    {
      "epoch": 1.3124251291215805,
      "grad_norm": 0.348928302526474,
      "learning_rate": 7.4237462359073034e-06,
      "loss": 0.0208,
      "step": 801960
    },
    {
      "epoch": 1.3124578595602339,
      "grad_norm": 0.7410017848014832,
      "learning_rate": 7.423680343693786e-06,
      "loss": 0.0192,
      "step": 801980
    },
    {
      "epoch": 1.3124905899988872,
      "grad_norm": 5.333602428436279,
      "learning_rate": 7.423614451480269e-06,
      "loss": 0.0185,
      "step": 802000
    },
    {
      "epoch": 1.3125233204375406,
      "grad_norm": 0.43409961462020874,
      "learning_rate": 7.4235485592667525e-06,
      "loss": 0.0155,
      "step": 802020
    },
    {
      "epoch": 1.3125560508761938,
      "grad_norm": 0.4138484299182892,
      "learning_rate": 7.423482667053234e-06,
      "loss": 0.0192,
      "step": 802040
    },
    {
      "epoch": 1.3125887813148471,
      "grad_norm": 0.13576020300388336,
      "learning_rate": 7.423416774839718e-06,
      "loss": 0.0187,
      "step": 802060
    },
    {
      "epoch": 1.3126215117535005,
      "grad_norm": 0.9776785969734192,
      "learning_rate": 7.4233508826262e-06,
      "loss": 0.0195,
      "step": 802080
    },
    {
      "epoch": 1.3126542421921539,
      "grad_norm": 0.7934494614601135,
      "learning_rate": 7.4232849904126835e-06,
      "loss": 0.0164,
      "step": 802100
    },
    {
      "epoch": 1.3126869726308072,
      "grad_norm": 0.5263378620147705,
      "learning_rate": 7.423219098199166e-06,
      "loss": 0.0229,
      "step": 802120
    },
    {
      "epoch": 1.3127197030694606,
      "grad_norm": 0.7472361922264099,
      "learning_rate": 7.423153205985649e-06,
      "loss": 0.0243,
      "step": 802140
    },
    {
      "epoch": 1.312752433508114,
      "grad_norm": 0.1453268676996231,
      "learning_rate": 7.4230873137721325e-06,
      "loss": 0.022,
      "step": 802160
    },
    {
      "epoch": 1.3127851639467671,
      "grad_norm": 0.19144277274608612,
      "learning_rate": 7.423021421558615e-06,
      "loss": 0.0182,
      "step": 802180
    },
    {
      "epoch": 1.3128178943854205,
      "grad_norm": 3.093520402908325,
      "learning_rate": 7.422955529345098e-06,
      "loss": 0.0237,
      "step": 802200
    },
    {
      "epoch": 1.3128506248240739,
      "grad_norm": 0.5285989046096802,
      "learning_rate": 7.422889637131581e-06,
      "loss": 0.0221,
      "step": 802220
    },
    {
      "epoch": 1.3128833552627273,
      "grad_norm": 0.3695906698703766,
      "learning_rate": 7.422823744918064e-06,
      "loss": 0.0209,
      "step": 802240
    },
    {
      "epoch": 1.3129160857013806,
      "grad_norm": 0.4960343837738037,
      "learning_rate": 7.422757852704546e-06,
      "loss": 0.0167,
      "step": 802260
    },
    {
      "epoch": 1.3129488161400338,
      "grad_norm": 0.19156044721603394,
      "learning_rate": 7.42269196049103e-06,
      "loss": 0.0245,
      "step": 802280
    },
    {
      "epoch": 1.3129815465786874,
      "grad_norm": 0.45489323139190674,
      "learning_rate": 7.422626068277512e-06,
      "loss": 0.0241,
      "step": 802300
    },
    {
      "epoch": 1.3130142770173405,
      "grad_norm": 0.3542742133140564,
      "learning_rate": 7.422560176063995e-06,
      "loss": 0.0261,
      "step": 802320
    },
    {
      "epoch": 1.313047007455994,
      "grad_norm": 0.26450538635253906,
      "learning_rate": 7.422494283850478e-06,
      "loss": 0.0275,
      "step": 802340
    },
    {
      "epoch": 1.3130797378946473,
      "grad_norm": 0.30329787731170654,
      "learning_rate": 7.422428391636961e-06,
      "loss": 0.0177,
      "step": 802360
    },
    {
      "epoch": 1.3131124683333006,
      "grad_norm": 2.219728946685791,
      "learning_rate": 7.4223624994234435e-06,
      "loss": 0.0279,
      "step": 802380
    },
    {
      "epoch": 1.313145198771954,
      "grad_norm": 1.9977846145629883,
      "learning_rate": 7.422296607209927e-06,
      "loss": 0.0228,
      "step": 802400
    },
    {
      "epoch": 1.3131779292106072,
      "grad_norm": 0.7939761877059937,
      "learning_rate": 7.422230714996409e-06,
      "loss": 0.0271,
      "step": 802420
    },
    {
      "epoch": 1.3132106596492605,
      "grad_norm": 0.16499240696430206,
      "learning_rate": 7.422164822782893e-06,
      "loss": 0.0192,
      "step": 802440
    },
    {
      "epoch": 1.313243390087914,
      "grad_norm": 0.14649350941181183,
      "learning_rate": 7.4220989305693745e-06,
      "loss": 0.0265,
      "step": 802460
    },
    {
      "epoch": 1.3132761205265673,
      "grad_norm": 0.5862524509429932,
      "learning_rate": 7.422033038355858e-06,
      "loss": 0.022,
      "step": 802480
    },
    {
      "epoch": 1.3133088509652207,
      "grad_norm": 0.7146283984184265,
      "learning_rate": 7.421967146142342e-06,
      "loss": 0.0227,
      "step": 802500
    },
    {
      "epoch": 1.313341581403874,
      "grad_norm": 1.3553500175476074,
      "learning_rate": 7.4219012539288235e-06,
      "loss": 0.0228,
      "step": 802520
    },
    {
      "epoch": 1.3133743118425274,
      "grad_norm": 0.758266806602478,
      "learning_rate": 7.421835361715307e-06,
      "loss": 0.0287,
      "step": 802540
    },
    {
      "epoch": 1.3134070422811805,
      "grad_norm": 2.765747308731079,
      "learning_rate": 7.42176946950179e-06,
      "loss": 0.0312,
      "step": 802560
    },
    {
      "epoch": 1.313439772719834,
      "grad_norm": 0.7311679720878601,
      "learning_rate": 7.421703577288273e-06,
      "loss": 0.0195,
      "step": 802580
    },
    {
      "epoch": 1.3134725031584873,
      "grad_norm": 0.6341080069541931,
      "learning_rate": 7.421637685074755e-06,
      "loss": 0.013,
      "step": 802600
    },
    {
      "epoch": 1.3135052335971407,
      "grad_norm": 0.48215875029563904,
      "learning_rate": 7.421571792861239e-06,
      "loss": 0.0196,
      "step": 802620
    },
    {
      "epoch": 1.313537964035794,
      "grad_norm": 0.4337329864501953,
      "learning_rate": 7.421505900647721e-06,
      "loss": 0.0203,
      "step": 802640
    },
    {
      "epoch": 1.3135706944744474,
      "grad_norm": 1.05974280834198,
      "learning_rate": 7.421440008434204e-06,
      "loss": 0.0218,
      "step": 802660
    },
    {
      "epoch": 1.3136034249131008,
      "grad_norm": 1.832819938659668,
      "learning_rate": 7.421374116220686e-06,
      "loss": 0.0267,
      "step": 802680
    },
    {
      "epoch": 1.313636155351754,
      "grad_norm": 0.8696261048316956,
      "learning_rate": 7.42130822400717e-06,
      "loss": 0.0282,
      "step": 802700
    },
    {
      "epoch": 1.3136688857904073,
      "grad_norm": 0.4863658845424652,
      "learning_rate": 7.421242331793652e-06,
      "loss": 0.0221,
      "step": 802720
    },
    {
      "epoch": 1.3137016162290607,
      "grad_norm": 0.2703148126602173,
      "learning_rate": 7.421176439580135e-06,
      "loss": 0.0158,
      "step": 802740
    },
    {
      "epoch": 1.313734346667714,
      "grad_norm": 0.22428317368030548,
      "learning_rate": 7.421110547366618e-06,
      "loss": 0.0172,
      "step": 802760
    },
    {
      "epoch": 1.3137670771063674,
      "grad_norm": 0.9598351120948792,
      "learning_rate": 7.421044655153101e-06,
      "loss": 0.0222,
      "step": 802780
    },
    {
      "epoch": 1.3137998075450208,
      "grad_norm": 1.9384182691574097,
      "learning_rate": 7.420978762939584e-06,
      "loss": 0.0218,
      "step": 802800
    },
    {
      "epoch": 1.3138325379836742,
      "grad_norm": 0.617465615272522,
      "learning_rate": 7.420912870726067e-06,
      "loss": 0.0215,
      "step": 802820
    },
    {
      "epoch": 1.3138652684223273,
      "grad_norm": 0.25525152683258057,
      "learning_rate": 7.42084697851255e-06,
      "loss": 0.0207,
      "step": 802840
    },
    {
      "epoch": 1.3138979988609807,
      "grad_norm": 0.3141494393348694,
      "learning_rate": 7.420781086299033e-06,
      "loss": 0.0104,
      "step": 802860
    },
    {
      "epoch": 1.313930729299634,
      "grad_norm": 0.2055082470178604,
      "learning_rate": 7.420715194085516e-06,
      "loss": 0.0243,
      "step": 802880
    },
    {
      "epoch": 1.3139634597382874,
      "grad_norm": 0.322564959526062,
      "learning_rate": 7.420649301871998e-06,
      "loss": 0.0229,
      "step": 802900
    },
    {
      "epoch": 1.3139961901769408,
      "grad_norm": 0.5610334873199463,
      "learning_rate": 7.420583409658482e-06,
      "loss": 0.0192,
      "step": 802920
    },
    {
      "epoch": 1.314028920615594,
      "grad_norm": 0.7788916826248169,
      "learning_rate": 7.420517517444964e-06,
      "loss": 0.0167,
      "step": 802940
    },
    {
      "epoch": 1.3140616510542475,
      "grad_norm": 0.39930960536003113,
      "learning_rate": 7.420451625231447e-06,
      "loss": 0.0186,
      "step": 802960
    },
    {
      "epoch": 1.3140943814929007,
      "grad_norm": 1.1686878204345703,
      "learning_rate": 7.42038573301793e-06,
      "loss": 0.0182,
      "step": 802980
    },
    {
      "epoch": 1.314127111931554,
      "grad_norm": 0.43923985958099365,
      "learning_rate": 7.420319840804413e-06,
      "loss": 0.0257,
      "step": 803000
    },
    {
      "epoch": 1.3141598423702074,
      "grad_norm": 0.8540599942207336,
      "learning_rate": 7.420253948590895e-06,
      "loss": 0.0202,
      "step": 803020
    },
    {
      "epoch": 1.3141925728088608,
      "grad_norm": 1.4732872247695923,
      "learning_rate": 7.420188056377379e-06,
      "loss": 0.023,
      "step": 803040
    },
    {
      "epoch": 1.3142253032475142,
      "grad_norm": 0.8577905893325806,
      "learning_rate": 7.420122164163861e-06,
      "loss": 0.0218,
      "step": 803060
    },
    {
      "epoch": 1.3142580336861673,
      "grad_norm": 0.6874586343765259,
      "learning_rate": 7.4200562719503445e-06,
      "loss": 0.0176,
      "step": 803080
    },
    {
      "epoch": 1.314290764124821,
      "grad_norm": 0.7580226063728333,
      "learning_rate": 7.419990379736826e-06,
      "loss": 0.02,
      "step": 803100
    },
    {
      "epoch": 1.314323494563474,
      "grad_norm": 0.8368597030639648,
      "learning_rate": 7.41992448752331e-06,
      "loss": 0.0173,
      "step": 803120
    },
    {
      "epoch": 1.3143562250021275,
      "grad_norm": 0.4898355305194855,
      "learning_rate": 7.419858595309793e-06,
      "loss": 0.0239,
      "step": 803140
    },
    {
      "epoch": 1.3143889554407808,
      "grad_norm": 0.6582596898078918,
      "learning_rate": 7.4197927030962754e-06,
      "loss": 0.0199,
      "step": 803160
    },
    {
      "epoch": 1.3144216858794342,
      "grad_norm": 0.8068172335624695,
      "learning_rate": 7.419726810882759e-06,
      "loss": 0.0282,
      "step": 803180
    },
    {
      "epoch": 1.3144544163180876,
      "grad_norm": 1.3813388347625732,
      "learning_rate": 7.419660918669242e-06,
      "loss": 0.0164,
      "step": 803200
    },
    {
      "epoch": 1.3144871467567407,
      "grad_norm": 0.7703050971031189,
      "learning_rate": 7.4195950264557245e-06,
      "loss": 0.0239,
      "step": 803220
    },
    {
      "epoch": 1.314519877195394,
      "grad_norm": 0.3285718560218811,
      "learning_rate": 7.419529134242207e-06,
      "loss": 0.0224,
      "step": 803240
    },
    {
      "epoch": 1.3145526076340475,
      "grad_norm": 1.4894192218780518,
      "learning_rate": 7.419463242028691e-06,
      "loss": 0.0245,
      "step": 803260
    },
    {
      "epoch": 1.3145853380727008,
      "grad_norm": 1.4425336122512817,
      "learning_rate": 7.419397349815173e-06,
      "loss": 0.0148,
      "step": 803280
    },
    {
      "epoch": 1.3146180685113542,
      "grad_norm": 1.6874589920043945,
      "learning_rate": 7.419331457601656e-06,
      "loss": 0.0188,
      "step": 803300
    },
    {
      "epoch": 1.3146507989500076,
      "grad_norm": 0.4686777591705322,
      "learning_rate": 7.419265565388138e-06,
      "loss": 0.0218,
      "step": 803320
    },
    {
      "epoch": 1.314683529388661,
      "grad_norm": 2.1459879875183105,
      "learning_rate": 7.419199673174622e-06,
      "loss": 0.0218,
      "step": 803340
    },
    {
      "epoch": 1.314716259827314,
      "grad_norm": 0.44423341751098633,
      "learning_rate": 7.4191337809611045e-06,
      "loss": 0.0237,
      "step": 803360
    },
    {
      "epoch": 1.3147489902659675,
      "grad_norm": 0.42202144861221313,
      "learning_rate": 7.419067888747587e-06,
      "loss": 0.0295,
      "step": 803380
    },
    {
      "epoch": 1.3147817207046208,
      "grad_norm": 0.40219923853874207,
      "learning_rate": 7.41900199653407e-06,
      "loss": 0.0169,
      "step": 803400
    },
    {
      "epoch": 1.3148144511432742,
      "grad_norm": 0.4361742436885834,
      "learning_rate": 7.418936104320554e-06,
      "loss": 0.0136,
      "step": 803420
    },
    {
      "epoch": 1.3148471815819276,
      "grad_norm": 1.0834020376205444,
      "learning_rate": 7.4188702121070355e-06,
      "loss": 0.0214,
      "step": 803440
    },
    {
      "epoch": 1.314879912020581,
      "grad_norm": 0.2923159897327423,
      "learning_rate": 7.418804319893519e-06,
      "loss": 0.0187,
      "step": 803460
    },
    {
      "epoch": 1.3149126424592343,
      "grad_norm": 0.5542740821838379,
      "learning_rate": 7.418738427680001e-06,
      "loss": 0.0227,
      "step": 803480
    },
    {
      "epoch": 1.3149453728978875,
      "grad_norm": 0.36901238560676575,
      "learning_rate": 7.4186725354664846e-06,
      "loss": 0.0277,
      "step": 803500
    },
    {
      "epoch": 1.3149781033365409,
      "grad_norm": 1.0734708309173584,
      "learning_rate": 7.4186066432529665e-06,
      "loss": 0.0196,
      "step": 803520
    },
    {
      "epoch": 1.3150108337751942,
      "grad_norm": 0.5220155715942383,
      "learning_rate": 7.41854075103945e-06,
      "loss": 0.0219,
      "step": 803540
    },
    {
      "epoch": 1.3150435642138476,
      "grad_norm": 2.5322041511535645,
      "learning_rate": 7.418474858825934e-06,
      "loss": 0.0223,
      "step": 803560
    },
    {
      "epoch": 1.315076294652501,
      "grad_norm": 0.8017446398735046,
      "learning_rate": 7.4184089666124155e-06,
      "loss": 0.028,
      "step": 803580
    },
    {
      "epoch": 1.3151090250911543,
      "grad_norm": 0.11944129317998886,
      "learning_rate": 7.418343074398899e-06,
      "loss": 0.0267,
      "step": 803600
    },
    {
      "epoch": 1.3151417555298077,
      "grad_norm": 0.6245434284210205,
      "learning_rate": 7.418277182185382e-06,
      "loss": 0.0291,
      "step": 803620
    },
    {
      "epoch": 1.3151744859684609,
      "grad_norm": 0.6939977407455444,
      "learning_rate": 7.4182112899718654e-06,
      "loss": 0.0328,
      "step": 803640
    },
    {
      "epoch": 1.3152072164071142,
      "grad_norm": 0.9989840388298035,
      "learning_rate": 7.418145397758347e-06,
      "loss": 0.0208,
      "step": 803660
    },
    {
      "epoch": 1.3152399468457676,
      "grad_norm": 0.7654704451560974,
      "learning_rate": 7.418079505544831e-06,
      "loss": 0.026,
      "step": 803680
    },
    {
      "epoch": 1.315272677284421,
      "grad_norm": 0.7260003089904785,
      "learning_rate": 7.418013613331313e-06,
      "loss": 0.0252,
      "step": 803700
    },
    {
      "epoch": 1.3153054077230744,
      "grad_norm": 0.28474923968315125,
      "learning_rate": 7.417947721117796e-06,
      "loss": 0.0231,
      "step": 803720
    },
    {
      "epoch": 1.3153381381617275,
      "grad_norm": 0.6618127226829529,
      "learning_rate": 7.417881828904278e-06,
      "loss": 0.0135,
      "step": 803740
    },
    {
      "epoch": 1.315370868600381,
      "grad_norm": 0.9238868951797485,
      "learning_rate": 7.417815936690762e-06,
      "loss": 0.0249,
      "step": 803760
    },
    {
      "epoch": 1.3154035990390343,
      "grad_norm": 0.6415529847145081,
      "learning_rate": 7.417750044477245e-06,
      "loss": 0.0239,
      "step": 803780
    },
    {
      "epoch": 1.3154363294776876,
      "grad_norm": 0.9018375873565674,
      "learning_rate": 7.417684152263727e-06,
      "loss": 0.021,
      "step": 803800
    },
    {
      "epoch": 1.315469059916341,
      "grad_norm": 0.5715349316596985,
      "learning_rate": 7.41761826005021e-06,
      "loss": 0.0188,
      "step": 803820
    },
    {
      "epoch": 1.3155017903549944,
      "grad_norm": 0.7416879534721375,
      "learning_rate": 7.417552367836694e-06,
      "loss": 0.0184,
      "step": 803840
    },
    {
      "epoch": 1.3155345207936477,
      "grad_norm": 0.5748000741004944,
      "learning_rate": 7.4174864756231756e-06,
      "loss": 0.0235,
      "step": 803860
    },
    {
      "epoch": 1.315567251232301,
      "grad_norm": 0.5418614745140076,
      "learning_rate": 7.417420583409659e-06,
      "loss": 0.0266,
      "step": 803880
    },
    {
      "epoch": 1.3155999816709545,
      "grad_norm": 0.17536090314388275,
      "learning_rate": 7.417354691196143e-06,
      "loss": 0.0268,
      "step": 803900
    },
    {
      "epoch": 1.3156327121096076,
      "grad_norm": 0.37821465730667114,
      "learning_rate": 7.417288798982625e-06,
      "loss": 0.0204,
      "step": 803920
    },
    {
      "epoch": 1.315665442548261,
      "grad_norm": 0.6167888045310974,
      "learning_rate": 7.417222906769108e-06,
      "loss": 0.0204,
      "step": 803940
    },
    {
      "epoch": 1.3156981729869144,
      "grad_norm": 0.5501430034637451,
      "learning_rate": 7.41715701455559e-06,
      "loss": 0.0187,
      "step": 803960
    },
    {
      "epoch": 1.3157309034255678,
      "grad_norm": 0.5321016907691956,
      "learning_rate": 7.417091122342074e-06,
      "loss": 0.0186,
      "step": 803980
    },
    {
      "epoch": 1.3157636338642211,
      "grad_norm": 0.6041311025619507,
      "learning_rate": 7.4170252301285564e-06,
      "loss": 0.0159,
      "step": 804000
    },
    {
      "epoch": 1.3157963643028743,
      "grad_norm": 0.3412168025970459,
      "learning_rate": 7.416959337915039e-06,
      "loss": 0.014,
      "step": 804020
    },
    {
      "epoch": 1.3158290947415276,
      "grad_norm": 2.6080639362335205,
      "learning_rate": 7.416893445701522e-06,
      "loss": 0.0242,
      "step": 804040
    },
    {
      "epoch": 1.315861825180181,
      "grad_norm": 0.5540986061096191,
      "learning_rate": 7.4168275534880055e-06,
      "loss": 0.0148,
      "step": 804060
    },
    {
      "epoch": 1.3158945556188344,
      "grad_norm": 0.646831214427948,
      "learning_rate": 7.416761661274487e-06,
      "loss": 0.0186,
      "step": 804080
    },
    {
      "epoch": 1.3159272860574878,
      "grad_norm": 0.4297105371952057,
      "learning_rate": 7.416695769060971e-06,
      "loss": 0.0206,
      "step": 804100
    },
    {
      "epoch": 1.3159600164961411,
      "grad_norm": 0.8006592988967896,
      "learning_rate": 7.416629876847453e-06,
      "loss": 0.0254,
      "step": 804120
    },
    {
      "epoch": 1.3159927469347945,
      "grad_norm": 0.3232502043247223,
      "learning_rate": 7.4165639846339365e-06,
      "loss": 0.0115,
      "step": 804140
    },
    {
      "epoch": 1.3160254773734477,
      "grad_norm": 0.29599034786224365,
      "learning_rate": 7.416498092420419e-06,
      "loss": 0.0195,
      "step": 804160
    },
    {
      "epoch": 1.316058207812101,
      "grad_norm": 1.1078786849975586,
      "learning_rate": 7.416432200206902e-06,
      "loss": 0.0271,
      "step": 804180
    },
    {
      "epoch": 1.3160909382507544,
      "grad_norm": 0.4549277126789093,
      "learning_rate": 7.416366307993385e-06,
      "loss": 0.028,
      "step": 804200
    },
    {
      "epoch": 1.3161236686894078,
      "grad_norm": 0.9887410402297974,
      "learning_rate": 7.416300415779868e-06,
      "loss": 0.0226,
      "step": 804220
    },
    {
      "epoch": 1.3161563991280611,
      "grad_norm": 0.173334002494812,
      "learning_rate": 7.416234523566351e-06,
      "loss": 0.0202,
      "step": 804240
    },
    {
      "epoch": 1.3161891295667145,
      "grad_norm": 0.9500430822372437,
      "learning_rate": 7.416168631352834e-06,
      "loss": 0.023,
      "step": 804260
    },
    {
      "epoch": 1.316221860005368,
      "grad_norm": 0.8945106267929077,
      "learning_rate": 7.416102739139317e-06,
      "loss": 0.0238,
      "step": 804280
    },
    {
      "epoch": 1.316254590444021,
      "grad_norm": 0.1460995078086853,
      "learning_rate": 7.416036846925799e-06,
      "loss": 0.0234,
      "step": 804300
    },
    {
      "epoch": 1.3162873208826744,
      "grad_norm": 1.0770059823989868,
      "learning_rate": 7.415970954712283e-06,
      "loss": 0.0239,
      "step": 804320
    },
    {
      "epoch": 1.3163200513213278,
      "grad_norm": 0.21518534421920776,
      "learning_rate": 7.415905062498765e-06,
      "loss": 0.0246,
      "step": 804340
    },
    {
      "epoch": 1.3163527817599812,
      "grad_norm": 0.3425441384315491,
      "learning_rate": 7.415839170285248e-06,
      "loss": 0.0196,
      "step": 804360
    },
    {
      "epoch": 1.3163855121986345,
      "grad_norm": 0.6025328040122986,
      "learning_rate": 7.415773278071731e-06,
      "loss": 0.0208,
      "step": 804380
    },
    {
      "epoch": 1.316418242637288,
      "grad_norm": 0.9714850187301636,
      "learning_rate": 7.415707385858214e-06,
      "loss": 0.0257,
      "step": 804400
    },
    {
      "epoch": 1.3164509730759413,
      "grad_norm": 0.3840908110141754,
      "learning_rate": 7.4156414936446965e-06,
      "loss": 0.0255,
      "step": 804420
    },
    {
      "epoch": 1.3164837035145944,
      "grad_norm": 0.7959486842155457,
      "learning_rate": 7.41557560143118e-06,
      "loss": 0.0236,
      "step": 804440
    },
    {
      "epoch": 1.3165164339532478,
      "grad_norm": 0.936297595500946,
      "learning_rate": 7.415509709217662e-06,
      "loss": 0.0264,
      "step": 804460
    },
    {
      "epoch": 1.3165491643919012,
      "grad_norm": 0.14407652616500854,
      "learning_rate": 7.415443817004146e-06,
      "loss": 0.014,
      "step": 804480
    },
    {
      "epoch": 1.3165818948305545,
      "grad_norm": 0.7713719010353088,
      "learning_rate": 7.4153779247906275e-06,
      "loss": 0.0242,
      "step": 804500
    },
    {
      "epoch": 1.316614625269208,
      "grad_norm": 2.817889451980591,
      "learning_rate": 7.415312032577111e-06,
      "loss": 0.0181,
      "step": 804520
    },
    {
      "epoch": 1.316647355707861,
      "grad_norm": 1.5240994691848755,
      "learning_rate": 7.415246140363593e-06,
      "loss": 0.0261,
      "step": 804540
    },
    {
      "epoch": 1.3166800861465147,
      "grad_norm": 0.5314954519271851,
      "learning_rate": 7.4151802481500765e-06,
      "loss": 0.0266,
      "step": 804560
    },
    {
      "epoch": 1.3167128165851678,
      "grad_norm": 0.5816853642463684,
      "learning_rate": 7.415114355936559e-06,
      "loss": 0.0202,
      "step": 804580
    },
    {
      "epoch": 1.3167455470238212,
      "grad_norm": 0.23867572844028473,
      "learning_rate": 7.415048463723042e-06,
      "loss": 0.0319,
      "step": 804600
    },
    {
      "epoch": 1.3167782774624746,
      "grad_norm": 2.052672863006592,
      "learning_rate": 7.414982571509526e-06,
      "loss": 0.0258,
      "step": 804620
    },
    {
      "epoch": 1.316811007901128,
      "grad_norm": 1.7309060096740723,
      "learning_rate": 7.414916679296008e-06,
      "loss": 0.0206,
      "step": 804640
    },
    {
      "epoch": 1.3168437383397813,
      "grad_norm": 0.7673072814941406,
      "learning_rate": 7.414850787082491e-06,
      "loss": 0.0199,
      "step": 804660
    },
    {
      "epoch": 1.3168764687784345,
      "grad_norm": 0.6648262739181519,
      "learning_rate": 7.414784894868974e-06,
      "loss": 0.0161,
      "step": 804680
    },
    {
      "epoch": 1.316909199217088,
      "grad_norm": 1.458345890045166,
      "learning_rate": 7.414719002655457e-06,
      "loss": 0.027,
      "step": 804700
    },
    {
      "epoch": 1.3169419296557412,
      "grad_norm": 0.28063324093818665,
      "learning_rate": 7.414653110441939e-06,
      "loss": 0.0216,
      "step": 804720
    },
    {
      "epoch": 1.3169746600943946,
      "grad_norm": 0.0974833071231842,
      "learning_rate": 7.414587218228423e-06,
      "loss": 0.0173,
      "step": 804740
    },
    {
      "epoch": 1.317007390533048,
      "grad_norm": 0.8233056664466858,
      "learning_rate": 7.414521326014905e-06,
      "loss": 0.0266,
      "step": 804760
    },
    {
      "epoch": 1.3170401209717013,
      "grad_norm": 0.6672342419624329,
      "learning_rate": 7.414455433801388e-06,
      "loss": 0.0224,
      "step": 804780
    },
    {
      "epoch": 1.3170728514103547,
      "grad_norm": 0.2469407021999359,
      "learning_rate": 7.414389541587871e-06,
      "loss": 0.0217,
      "step": 804800
    },
    {
      "epoch": 1.3171055818490078,
      "grad_norm": 0.8618462085723877,
      "learning_rate": 7.414323649374354e-06,
      "loss": 0.0294,
      "step": 804820
    },
    {
      "epoch": 1.3171383122876612,
      "grad_norm": 0.22418683767318726,
      "learning_rate": 7.414257757160837e-06,
      "loss": 0.0279,
      "step": 804840
    },
    {
      "epoch": 1.3171710427263146,
      "grad_norm": 0.508036732673645,
      "learning_rate": 7.41419186494732e-06,
      "loss": 0.0329,
      "step": 804860
    },
    {
      "epoch": 1.317203773164968,
      "grad_norm": 1.006801724433899,
      "learning_rate": 7.414125972733802e-06,
      "loss": 0.0186,
      "step": 804880
    },
    {
      "epoch": 1.3172365036036213,
      "grad_norm": 0.7380204200744629,
      "learning_rate": 7.414060080520286e-06,
      "loss": 0.0207,
      "step": 804900
    },
    {
      "epoch": 1.3172692340422747,
      "grad_norm": 0.5145517587661743,
      "learning_rate": 7.4139941883067676e-06,
      "loss": 0.0158,
      "step": 804920
    },
    {
      "epoch": 1.317301964480928,
      "grad_norm": 0.3730056881904602,
      "learning_rate": 7.413928296093251e-06,
      "loss": 0.0209,
      "step": 804940
    },
    {
      "epoch": 1.3173346949195812,
      "grad_norm": 0.2515600025653839,
      "learning_rate": 7.413862403879735e-06,
      "loss": 0.0201,
      "step": 804960
    },
    {
      "epoch": 1.3173674253582346,
      "grad_norm": 0.6410822868347168,
      "learning_rate": 7.413796511666217e-06,
      "loss": 0.0183,
      "step": 804980
    },
    {
      "epoch": 1.317400155796888,
      "grad_norm": 0.161960631608963,
      "learning_rate": 7.4137306194527e-06,
      "loss": 0.0155,
      "step": 805000
    },
    {
      "epoch": 1.3174328862355413,
      "grad_norm": 0.44131308794021606,
      "learning_rate": 7.413664727239183e-06,
      "loss": 0.0289,
      "step": 805020
    },
    {
      "epoch": 1.3174656166741947,
      "grad_norm": 0.7113589644432068,
      "learning_rate": 7.413598835025666e-06,
      "loss": 0.0279,
      "step": 805040
    },
    {
      "epoch": 1.317498347112848,
      "grad_norm": 0.5735346078872681,
      "learning_rate": 7.4135329428121484e-06,
      "loss": 0.0275,
      "step": 805060
    },
    {
      "epoch": 1.3175310775515015,
      "grad_norm": 0.23740726709365845,
      "learning_rate": 7.413467050598632e-06,
      "loss": 0.0293,
      "step": 805080
    },
    {
      "epoch": 1.3175638079901546,
      "grad_norm": 0.36382201313972473,
      "learning_rate": 7.413401158385114e-06,
      "loss": 0.0292,
      "step": 805100
    },
    {
      "epoch": 1.317596538428808,
      "grad_norm": 0.4381594657897949,
      "learning_rate": 7.4133352661715975e-06,
      "loss": 0.0238,
      "step": 805120
    },
    {
      "epoch": 1.3176292688674613,
      "grad_norm": 1.4065386056900024,
      "learning_rate": 7.413269373958079e-06,
      "loss": 0.0212,
      "step": 805140
    },
    {
      "epoch": 1.3176619993061147,
      "grad_norm": 0.2788301110267639,
      "learning_rate": 7.413203481744563e-06,
      "loss": 0.019,
      "step": 805160
    },
    {
      "epoch": 1.317694729744768,
      "grad_norm": 0.8186715841293335,
      "learning_rate": 7.413137589531046e-06,
      "loss": 0.0229,
      "step": 805180
    },
    {
      "epoch": 1.3177274601834212,
      "grad_norm": 0.41321849822998047,
      "learning_rate": 7.4130716973175285e-06,
      "loss": 0.0284,
      "step": 805200
    },
    {
      "epoch": 1.3177601906220748,
      "grad_norm": 0.31035301089286804,
      "learning_rate": 7.413005805104011e-06,
      "loss": 0.0208,
      "step": 805220
    },
    {
      "epoch": 1.317792921060728,
      "grad_norm": 0.3786468207836151,
      "learning_rate": 7.412939912890495e-06,
      "loss": 0.0213,
      "step": 805240
    },
    {
      "epoch": 1.3178256514993814,
      "grad_norm": 0.4214939475059509,
      "learning_rate": 7.412874020676977e-06,
      "loss": 0.0279,
      "step": 805260
    },
    {
      "epoch": 1.3178583819380347,
      "grad_norm": 0.7345982193946838,
      "learning_rate": 7.41280812846346e-06,
      "loss": 0.0249,
      "step": 805280
    },
    {
      "epoch": 1.317891112376688,
      "grad_norm": 0.13482442498207092,
      "learning_rate": 7.412742236249944e-06,
      "loss": 0.0146,
      "step": 805300
    },
    {
      "epoch": 1.3179238428153415,
      "grad_norm": 0.6181773543357849,
      "learning_rate": 7.412676344036426e-06,
      "loss": 0.0155,
      "step": 805320
    },
    {
      "epoch": 1.3179565732539946,
      "grad_norm": 0.43312981724739075,
      "learning_rate": 7.412610451822909e-06,
      "loss": 0.0279,
      "step": 805340
    },
    {
      "epoch": 1.3179893036926482,
      "grad_norm": 0.8712460994720459,
      "learning_rate": 7.412544559609391e-06,
      "loss": 0.0194,
      "step": 805360
    },
    {
      "epoch": 1.3180220341313014,
      "grad_norm": 0.292222261428833,
      "learning_rate": 7.412478667395875e-06,
      "loss": 0.0208,
      "step": 805380
    },
    {
      "epoch": 1.3180547645699547,
      "grad_norm": 0.2709198296070099,
      "learning_rate": 7.4124127751823575e-06,
      "loss": 0.0358,
      "step": 805400
    },
    {
      "epoch": 1.3180874950086081,
      "grad_norm": 0.5057602524757385,
      "learning_rate": 7.41234688296884e-06,
      "loss": 0.0183,
      "step": 805420
    },
    {
      "epoch": 1.3181202254472615,
      "grad_norm": 0.15256565809249878,
      "learning_rate": 7.412280990755323e-06,
      "loss": 0.0225,
      "step": 805440
    },
    {
      "epoch": 1.3181529558859149,
      "grad_norm": 0.3744904100894928,
      "learning_rate": 7.412215098541807e-06,
      "loss": 0.0158,
      "step": 805460
    },
    {
      "epoch": 1.318185686324568,
      "grad_norm": 1.0800484418869019,
      "learning_rate": 7.4121492063282885e-06,
      "loss": 0.0219,
      "step": 805480
    },
    {
      "epoch": 1.3182184167632214,
      "grad_norm": 1.9105747938156128,
      "learning_rate": 7.412083314114772e-06,
      "loss": 0.0288,
      "step": 805500
    },
    {
      "epoch": 1.3182511472018748,
      "grad_norm": 1.8161059617996216,
      "learning_rate": 7.412017421901254e-06,
      "loss": 0.0189,
      "step": 805520
    },
    {
      "epoch": 1.3182838776405281,
      "grad_norm": 1.1433528661727905,
      "learning_rate": 7.4119515296877376e-06,
      "loss": 0.0209,
      "step": 805540
    },
    {
      "epoch": 1.3183166080791815,
      "grad_norm": 0.9930614829063416,
      "learning_rate": 7.4118856374742195e-06,
      "loss": 0.0199,
      "step": 805560
    },
    {
      "epoch": 1.3183493385178349,
      "grad_norm": 2.7547903060913086,
      "learning_rate": 7.411819745260703e-06,
      "loss": 0.0178,
      "step": 805580
    },
    {
      "epoch": 1.3183820689564882,
      "grad_norm": 16.158729553222656,
      "learning_rate": 7.411753853047186e-06,
      "loss": 0.0263,
      "step": 805600
    },
    {
      "epoch": 1.3184147993951414,
      "grad_norm": 0.5268012881278992,
      "learning_rate": 7.4116879608336685e-06,
      "loss": 0.0239,
      "step": 805620
    },
    {
      "epoch": 1.3184475298337948,
      "grad_norm": 0.7108714580535889,
      "learning_rate": 7.411622068620151e-06,
      "loss": 0.019,
      "step": 805640
    },
    {
      "epoch": 1.3184802602724481,
      "grad_norm": 1.5549523830413818,
      "learning_rate": 7.411556176406635e-06,
      "loss": 0.0323,
      "step": 805660
    },
    {
      "epoch": 1.3185129907111015,
      "grad_norm": 0.6510725021362305,
      "learning_rate": 7.411490284193118e-06,
      "loss": 0.0218,
      "step": 805680
    },
    {
      "epoch": 1.3185457211497549,
      "grad_norm": 0.7653433680534363,
      "learning_rate": 7.4114243919796e-06,
      "loss": 0.0351,
      "step": 805700
    },
    {
      "epoch": 1.3185784515884083,
      "grad_norm": 0.5879141688346863,
      "learning_rate": 7.411358499766084e-06,
      "loss": 0.0129,
      "step": 805720
    },
    {
      "epoch": 1.3186111820270616,
      "grad_norm": 0.44292473793029785,
      "learning_rate": 7.411292607552566e-06,
      "loss": 0.0209,
      "step": 805740
    },
    {
      "epoch": 1.3186439124657148,
      "grad_norm": 0.46028655767440796,
      "learning_rate": 7.411226715339049e-06,
      "loss": 0.0132,
      "step": 805760
    },
    {
      "epoch": 1.3186766429043681,
      "grad_norm": 0.128587543964386,
      "learning_rate": 7.411160823125531e-06,
      "loss": 0.014,
      "step": 805780
    },
    {
      "epoch": 1.3187093733430215,
      "grad_norm": 1.3138190507888794,
      "learning_rate": 7.411094930912015e-06,
      "loss": 0.0244,
      "step": 805800
    },
    {
      "epoch": 1.318742103781675,
      "grad_norm": 0.36888253688812256,
      "learning_rate": 7.411029038698498e-06,
      "loss": 0.0174,
      "step": 805820
    },
    {
      "epoch": 1.3187748342203283,
      "grad_norm": 0.17218436300754547,
      "learning_rate": 7.41096314648498e-06,
      "loss": 0.0273,
      "step": 805840
    },
    {
      "epoch": 1.3188075646589816,
      "grad_norm": 0.2232598215341568,
      "learning_rate": 7.410897254271463e-06,
      "loss": 0.0199,
      "step": 805860
    },
    {
      "epoch": 1.318840295097635,
      "grad_norm": 1.3389447927474976,
      "learning_rate": 7.410831362057947e-06,
      "loss": 0.0216,
      "step": 805880
    },
    {
      "epoch": 1.3188730255362882,
      "grad_norm": 0.894667387008667,
      "learning_rate": 7.410765469844429e-06,
      "loss": 0.0221,
      "step": 805900
    },
    {
      "epoch": 1.3189057559749415,
      "grad_norm": 0.3126269578933716,
      "learning_rate": 7.410699577630912e-06,
      "loss": 0.0206,
      "step": 805920
    },
    {
      "epoch": 1.318938486413595,
      "grad_norm": 0.39113712310791016,
      "learning_rate": 7.410633685417394e-06,
      "loss": 0.0165,
      "step": 805940
    },
    {
      "epoch": 1.3189712168522483,
      "grad_norm": 0.23629255592823029,
      "learning_rate": 7.410567793203878e-06,
      "loss": 0.0162,
      "step": 805960
    },
    {
      "epoch": 1.3190039472909016,
      "grad_norm": 0.4545137286186218,
      "learning_rate": 7.41050190099036e-06,
      "loss": 0.0363,
      "step": 805980
    },
    {
      "epoch": 1.3190366777295548,
      "grad_norm": 0.278894305229187,
      "learning_rate": 7.410436008776843e-06,
      "loss": 0.0256,
      "step": 806000
    },
    {
      "epoch": 1.3190694081682084,
      "grad_norm": 0.7702879309654236,
      "learning_rate": 7.410370116563327e-06,
      "loss": 0.0246,
      "step": 806020
    },
    {
      "epoch": 1.3191021386068615,
      "grad_norm": 0.3009977638721466,
      "learning_rate": 7.4103042243498095e-06,
      "loss": 0.0202,
      "step": 806040
    },
    {
      "epoch": 1.319134869045515,
      "grad_norm": 0.6127172708511353,
      "learning_rate": 7.410238332136292e-06,
      "loss": 0.0174,
      "step": 806060
    },
    {
      "epoch": 1.3191675994841683,
      "grad_norm": 0.04227904602885246,
      "learning_rate": 7.410172439922775e-06,
      "loss": 0.015,
      "step": 806080
    },
    {
      "epoch": 1.3192003299228217,
      "grad_norm": 0.27208951115608215,
      "learning_rate": 7.4101065477092585e-06,
      "loss": 0.0219,
      "step": 806100
    },
    {
      "epoch": 1.319233060361475,
      "grad_norm": 0.41786932945251465,
      "learning_rate": 7.41004065549574e-06,
      "loss": 0.0203,
      "step": 806120
    },
    {
      "epoch": 1.3192657908001282,
      "grad_norm": 0.1782064139842987,
      "learning_rate": 7.409974763282224e-06,
      "loss": 0.0218,
      "step": 806140
    },
    {
      "epoch": 1.3192985212387818,
      "grad_norm": 0.7836238145828247,
      "learning_rate": 7.409908871068706e-06,
      "loss": 0.0228,
      "step": 806160
    },
    {
      "epoch": 1.319331251677435,
      "grad_norm": 0.8618617057800293,
      "learning_rate": 7.4098429788551895e-06,
      "loss": 0.0223,
      "step": 806180
    },
    {
      "epoch": 1.3193639821160883,
      "grad_norm": 0.23525020480155945,
      "learning_rate": 7.409777086641672e-06,
      "loss": 0.017,
      "step": 806200
    },
    {
      "epoch": 1.3193967125547417,
      "grad_norm": 0.6231837272644043,
      "learning_rate": 7.409711194428155e-06,
      "loss": 0.0211,
      "step": 806220
    },
    {
      "epoch": 1.319429442993395,
      "grad_norm": 0.6006911993026733,
      "learning_rate": 7.409645302214638e-06,
      "loss": 0.0257,
      "step": 806240
    },
    {
      "epoch": 1.3194621734320484,
      "grad_norm": 1.3017882108688354,
      "learning_rate": 7.409579410001121e-06,
      "loss": 0.0245,
      "step": 806260
    },
    {
      "epoch": 1.3194949038707016,
      "grad_norm": 0.436369389295578,
      "learning_rate": 7.409513517787603e-06,
      "loss": 0.0113,
      "step": 806280
    },
    {
      "epoch": 1.319527634309355,
      "grad_norm": 2.77209210395813,
      "learning_rate": 7.409447625574087e-06,
      "loss": 0.0198,
      "step": 806300
    },
    {
      "epoch": 1.3195603647480083,
      "grad_norm": 0.20936612784862518,
      "learning_rate": 7.409381733360569e-06,
      "loss": 0.0203,
      "step": 806320
    },
    {
      "epoch": 1.3195930951866617,
      "grad_norm": 0.8612154126167297,
      "learning_rate": 7.409315841147052e-06,
      "loss": 0.0172,
      "step": 806340
    },
    {
      "epoch": 1.319625825625315,
      "grad_norm": 0.5704035758972168,
      "learning_rate": 7.409249948933536e-06,
      "loss": 0.0221,
      "step": 806360
    },
    {
      "epoch": 1.3196585560639684,
      "grad_norm": 2.2074134349823,
      "learning_rate": 7.409184056720018e-06,
      "loss": 0.0284,
      "step": 806380
    },
    {
      "epoch": 1.3196912865026218,
      "grad_norm": 0.8901732563972473,
      "learning_rate": 7.409118164506501e-06,
      "loss": 0.0268,
      "step": 806400
    },
    {
      "epoch": 1.319724016941275,
      "grad_norm": 0.43035492300987244,
      "learning_rate": 7.409052272292984e-06,
      "loss": 0.021,
      "step": 806420
    },
    {
      "epoch": 1.3197567473799283,
      "grad_norm": 0.7928772568702698,
      "learning_rate": 7.408986380079467e-06,
      "loss": 0.0256,
      "step": 806440
    },
    {
      "epoch": 1.3197894778185817,
      "grad_norm": 0.7176680564880371,
      "learning_rate": 7.4089204878659495e-06,
      "loss": 0.0232,
      "step": 806460
    },
    {
      "epoch": 1.319822208257235,
      "grad_norm": 0.15767256915569305,
      "learning_rate": 7.408854595652433e-06,
      "loss": 0.0219,
      "step": 806480
    },
    {
      "epoch": 1.3198549386958884,
      "grad_norm": 0.8140762448310852,
      "learning_rate": 7.408788703438915e-06,
      "loss": 0.0162,
      "step": 806500
    },
    {
      "epoch": 1.3198876691345418,
      "grad_norm": 0.22344668209552765,
      "learning_rate": 7.408722811225399e-06,
      "loss": 0.0284,
      "step": 806520
    },
    {
      "epoch": 1.3199203995731952,
      "grad_norm": 1.5628443956375122,
      "learning_rate": 7.4086569190118805e-06,
      "loss": 0.0309,
      "step": 806540
    },
    {
      "epoch": 1.3199531300118483,
      "grad_norm": 0.2135467231273651,
      "learning_rate": 7.408591026798364e-06,
      "loss": 0.0233,
      "step": 806560
    },
    {
      "epoch": 1.3199858604505017,
      "grad_norm": 1.3861104249954224,
      "learning_rate": 7.408525134584846e-06,
      "loss": 0.0196,
      "step": 806580
    },
    {
      "epoch": 1.320018590889155,
      "grad_norm": 0.6797482967376709,
      "learning_rate": 7.4084592423713296e-06,
      "loss": 0.0166,
      "step": 806600
    },
    {
      "epoch": 1.3200513213278084,
      "grad_norm": 0.7744759321212769,
      "learning_rate": 7.408393350157812e-06,
      "loss": 0.0249,
      "step": 806620
    },
    {
      "epoch": 1.3200840517664618,
      "grad_norm": 0.8370227813720703,
      "learning_rate": 7.408327457944295e-06,
      "loss": 0.0254,
      "step": 806640
    },
    {
      "epoch": 1.3201167822051152,
      "grad_norm": 1.0623743534088135,
      "learning_rate": 7.408261565730778e-06,
      "loss": 0.0272,
      "step": 806660
    },
    {
      "epoch": 1.3201495126437686,
      "grad_norm": 0.6087616086006165,
      "learning_rate": 7.408195673517261e-06,
      "loss": 0.0216,
      "step": 806680
    },
    {
      "epoch": 1.3201822430824217,
      "grad_norm": 0.5640466213226318,
      "learning_rate": 7.408129781303744e-06,
      "loss": 0.02,
      "step": 806700
    },
    {
      "epoch": 1.320214973521075,
      "grad_norm": 1.368829369544983,
      "learning_rate": 7.408063889090227e-06,
      "loss": 0.0236,
      "step": 806720
    },
    {
      "epoch": 1.3202477039597285,
      "grad_norm": 0.2347690612077713,
      "learning_rate": 7.4079979968767104e-06,
      "loss": 0.0224,
      "step": 806740
    },
    {
      "epoch": 1.3202804343983818,
      "grad_norm": 0.3679015636444092,
      "learning_rate": 7.407932104663192e-06,
      "loss": 0.0129,
      "step": 806760
    },
    {
      "epoch": 1.3203131648370352,
      "grad_norm": 0.7021830081939697,
      "learning_rate": 7.407866212449676e-06,
      "loss": 0.0242,
      "step": 806780
    },
    {
      "epoch": 1.3203458952756884,
      "grad_norm": 0.47388678789138794,
      "learning_rate": 7.407800320236158e-06,
      "loss": 0.0197,
      "step": 806800
    },
    {
      "epoch": 1.320378625714342,
      "grad_norm": 1.480743169784546,
      "learning_rate": 7.407734428022641e-06,
      "loss": 0.0205,
      "step": 806820
    },
    {
      "epoch": 1.320411356152995,
      "grad_norm": 0.750577449798584,
      "learning_rate": 7.407668535809124e-06,
      "loss": 0.0164,
      "step": 806840
    },
    {
      "epoch": 1.3204440865916485,
      "grad_norm": 1.6739139556884766,
      "learning_rate": 7.407602643595607e-06,
      "loss": 0.0282,
      "step": 806860
    },
    {
      "epoch": 1.3204768170303018,
      "grad_norm": 0.6998468637466431,
      "learning_rate": 7.40753675138209e-06,
      "loss": 0.0287,
      "step": 806880
    },
    {
      "epoch": 1.3205095474689552,
      "grad_norm": 0.2887190282344818,
      "learning_rate": 7.407470859168573e-06,
      "loss": 0.0258,
      "step": 806900
    },
    {
      "epoch": 1.3205422779076086,
      "grad_norm": 1.5868841409683228,
      "learning_rate": 7.407404966955055e-06,
      "loss": 0.0235,
      "step": 806920
    },
    {
      "epoch": 1.3205750083462617,
      "grad_norm": 0.21581852436065674,
      "learning_rate": 7.407339074741539e-06,
      "loss": 0.0194,
      "step": 806940
    },
    {
      "epoch": 1.3206077387849153,
      "grad_norm": 0.40478047728538513,
      "learning_rate": 7.4072731825280206e-06,
      "loss": 0.0242,
      "step": 806960
    },
    {
      "epoch": 1.3206404692235685,
      "grad_norm": 1.7047300338745117,
      "learning_rate": 7.407207290314504e-06,
      "loss": 0.0279,
      "step": 806980
    },
    {
      "epoch": 1.3206731996622219,
      "grad_norm": 1.0174622535705566,
      "learning_rate": 7.407141398100987e-06,
      "loss": 0.0261,
      "step": 807000
    },
    {
      "epoch": 1.3207059301008752,
      "grad_norm": 0.9772146940231323,
      "learning_rate": 7.40707550588747e-06,
      "loss": 0.029,
      "step": 807020
    },
    {
      "epoch": 1.3207386605395286,
      "grad_norm": 1.3703542947769165,
      "learning_rate": 7.407009613673952e-06,
      "loss": 0.0257,
      "step": 807040
    },
    {
      "epoch": 1.320771390978182,
      "grad_norm": 0.9722387194633484,
      "learning_rate": 7.406943721460436e-06,
      "loss": 0.0186,
      "step": 807060
    },
    {
      "epoch": 1.3208041214168351,
      "grad_norm": 0.48144105076789856,
      "learning_rate": 7.406877829246919e-06,
      "loss": 0.0226,
      "step": 807080
    },
    {
      "epoch": 1.3208368518554885,
      "grad_norm": 0.8800343871116638,
      "learning_rate": 7.4068119370334014e-06,
      "loss": 0.017,
      "step": 807100
    },
    {
      "epoch": 1.3208695822941419,
      "grad_norm": 0.22874338924884796,
      "learning_rate": 7.406746044819885e-06,
      "loss": 0.0258,
      "step": 807120
    },
    {
      "epoch": 1.3209023127327952,
      "grad_norm": 0.2986621856689453,
      "learning_rate": 7.406680152606367e-06,
      "loss": 0.0211,
      "step": 807140
    },
    {
      "epoch": 1.3209350431714486,
      "grad_norm": 2.3946094512939453,
      "learning_rate": 7.4066142603928505e-06,
      "loss": 0.0255,
      "step": 807160
    },
    {
      "epoch": 1.320967773610102,
      "grad_norm": 1.0766785144805908,
      "learning_rate": 7.406548368179332e-06,
      "loss": 0.0253,
      "step": 807180
    },
    {
      "epoch": 1.3210005040487554,
      "grad_norm": 0.185499370098114,
      "learning_rate": 7.406482475965816e-06,
      "loss": 0.0181,
      "step": 807200
    },
    {
      "epoch": 1.3210332344874085,
      "grad_norm": 1.4531193971633911,
      "learning_rate": 7.406416583752299e-06,
      "loss": 0.0263,
      "step": 807220
    },
    {
      "epoch": 1.3210659649260619,
      "grad_norm": 4.034937381744385,
      "learning_rate": 7.4063506915387815e-06,
      "loss": 0.0192,
      "step": 807240
    },
    {
      "epoch": 1.3210986953647152,
      "grad_norm": 0.37897101044654846,
      "learning_rate": 7.406284799325264e-06,
      "loss": 0.0208,
      "step": 807260
    },
    {
      "epoch": 1.3211314258033686,
      "grad_norm": 0.6417513489723206,
      "learning_rate": 7.406218907111748e-06,
      "loss": 0.0219,
      "step": 807280
    },
    {
      "epoch": 1.321164156242022,
      "grad_norm": 0.5437941551208496,
      "learning_rate": 7.40615301489823e-06,
      "loss": 0.0178,
      "step": 807300
    },
    {
      "epoch": 1.3211968866806754,
      "grad_norm": 0.2884232699871063,
      "learning_rate": 7.406087122684713e-06,
      "loss": 0.0193,
      "step": 807320
    },
    {
      "epoch": 1.3212296171193287,
      "grad_norm": 0.9060049057006836,
      "learning_rate": 7.406021230471195e-06,
      "loss": 0.0239,
      "step": 807340
    },
    {
      "epoch": 1.3212623475579819,
      "grad_norm": 0.5036698579788208,
      "learning_rate": 7.405955338257679e-06,
      "loss": 0.0159,
      "step": 807360
    },
    {
      "epoch": 1.3212950779966353,
      "grad_norm": 0.4879225790500641,
      "learning_rate": 7.405889446044161e-06,
      "loss": 0.0213,
      "step": 807380
    },
    {
      "epoch": 1.3213278084352886,
      "grad_norm": 0.8504050374031067,
      "learning_rate": 7.405823553830644e-06,
      "loss": 0.0227,
      "step": 807400
    },
    {
      "epoch": 1.321360538873942,
      "grad_norm": 0.18008816242218018,
      "learning_rate": 7.405757661617128e-06,
      "loss": 0.0144,
      "step": 807420
    },
    {
      "epoch": 1.3213932693125954,
      "grad_norm": 0.31955692172050476,
      "learning_rate": 7.40569176940361e-06,
      "loss": 0.026,
      "step": 807440
    },
    {
      "epoch": 1.3214259997512487,
      "grad_norm": 0.18842054903507233,
      "learning_rate": 7.405625877190093e-06,
      "loss": 0.0231,
      "step": 807460
    },
    {
      "epoch": 1.3214587301899021,
      "grad_norm": 0.545341432094574,
      "learning_rate": 7.405559984976576e-06,
      "loss": 0.0249,
      "step": 807480
    },
    {
      "epoch": 1.3214914606285553,
      "grad_norm": 0.8657781481742859,
      "learning_rate": 7.40549409276306e-06,
      "loss": 0.0184,
      "step": 807500
    },
    {
      "epoch": 1.3215241910672086,
      "grad_norm": 0.44463276863098145,
      "learning_rate": 7.4054282005495415e-06,
      "loss": 0.0254,
      "step": 807520
    },
    {
      "epoch": 1.321556921505862,
      "grad_norm": 0.8111628293991089,
      "learning_rate": 7.405362308336025e-06,
      "loss": 0.0322,
      "step": 807540
    },
    {
      "epoch": 1.3215896519445154,
      "grad_norm": 0.17280128598213196,
      "learning_rate": 7.405296416122507e-06,
      "loss": 0.0212,
      "step": 807560
    },
    {
      "epoch": 1.3216223823831688,
      "grad_norm": 1.2572036981582642,
      "learning_rate": 7.405230523908991e-06,
      "loss": 0.0266,
      "step": 807580
    },
    {
      "epoch": 1.321655112821822,
      "grad_norm": 0.7775216698646545,
      "learning_rate": 7.4051646316954725e-06,
      "loss": 0.0311,
      "step": 807600
    },
    {
      "epoch": 1.3216878432604755,
      "grad_norm": 0.8303539752960205,
      "learning_rate": 7.405098739481956e-06,
      "loss": 0.025,
      "step": 807620
    },
    {
      "epoch": 1.3217205736991287,
      "grad_norm": 0.502801775932312,
      "learning_rate": 7.405032847268439e-06,
      "loss": 0.0223,
      "step": 807640
    },
    {
      "epoch": 1.321753304137782,
      "grad_norm": 0.7736268043518066,
      "learning_rate": 7.4049669550549215e-06,
      "loss": 0.02,
      "step": 807660
    },
    {
      "epoch": 1.3217860345764354,
      "grad_norm": 0.9331691265106201,
      "learning_rate": 7.404901062841404e-06,
      "loss": 0.024,
      "step": 807680
    },
    {
      "epoch": 1.3218187650150888,
      "grad_norm": 1.290257453918457,
      "learning_rate": 7.404835170627888e-06,
      "loss": 0.0149,
      "step": 807700
    },
    {
      "epoch": 1.3218514954537421,
      "grad_norm": 0.4003184139728546,
      "learning_rate": 7.40476927841437e-06,
      "loss": 0.0163,
      "step": 807720
    },
    {
      "epoch": 1.3218842258923953,
      "grad_norm": 0.2451065480709076,
      "learning_rate": 7.404703386200853e-06,
      "loss": 0.021,
      "step": 807740
    },
    {
      "epoch": 1.3219169563310489,
      "grad_norm": 0.5547577738761902,
      "learning_rate": 7.404637493987337e-06,
      "loss": 0.0253,
      "step": 807760
    },
    {
      "epoch": 1.321949686769702,
      "grad_norm": 0.6906042098999023,
      "learning_rate": 7.404571601773819e-06,
      "loss": 0.0209,
      "step": 807780
    },
    {
      "epoch": 1.3219824172083554,
      "grad_norm": 0.250770628452301,
      "learning_rate": 7.404505709560302e-06,
      "loss": 0.0217,
      "step": 807800
    },
    {
      "epoch": 1.3220151476470088,
      "grad_norm": 0.4250047206878662,
      "learning_rate": 7.404439817346784e-06,
      "loss": 0.0199,
      "step": 807820
    },
    {
      "epoch": 1.3220478780856622,
      "grad_norm": 0.3350646495819092,
      "learning_rate": 7.404373925133268e-06,
      "loss": 0.0204,
      "step": 807840
    },
    {
      "epoch": 1.3220806085243155,
      "grad_norm": 0.6474916338920593,
      "learning_rate": 7.404308032919751e-06,
      "loss": 0.0198,
      "step": 807860
    },
    {
      "epoch": 1.3221133389629687,
      "grad_norm": 1.1808229684829712,
      "learning_rate": 7.404242140706233e-06,
      "loss": 0.0259,
      "step": 807880
    },
    {
      "epoch": 1.322146069401622,
      "grad_norm": 0.6787561774253845,
      "learning_rate": 7.404176248492716e-06,
      "loss": 0.019,
      "step": 807900
    },
    {
      "epoch": 1.3221787998402754,
      "grad_norm": 0.17718194425106049,
      "learning_rate": 7.4041103562792e-06,
      "loss": 0.0244,
      "step": 807920
    },
    {
      "epoch": 1.3222115302789288,
      "grad_norm": 1.9635850191116333,
      "learning_rate": 7.404044464065682e-06,
      "loss": 0.023,
      "step": 807940
    },
    {
      "epoch": 1.3222442607175822,
      "grad_norm": 0.15139184892177582,
      "learning_rate": 7.403978571852165e-06,
      "loss": 0.0211,
      "step": 807960
    },
    {
      "epoch": 1.3222769911562355,
      "grad_norm": 0.12538056075572968,
      "learning_rate": 7.403912679638647e-06,
      "loss": 0.02,
      "step": 807980
    },
    {
      "epoch": 1.322309721594889,
      "grad_norm": 1.7219187021255493,
      "learning_rate": 7.403846787425131e-06,
      "loss": 0.0292,
      "step": 808000
    },
    {
      "epoch": 1.322342452033542,
      "grad_norm": 0.5365130305290222,
      "learning_rate": 7.403780895211613e-06,
      "loss": 0.0181,
      "step": 808020
    },
    {
      "epoch": 1.3223751824721954,
      "grad_norm": 0.7951508164405823,
      "learning_rate": 7.403715002998096e-06,
      "loss": 0.0195,
      "step": 808040
    },
    {
      "epoch": 1.3224079129108488,
      "grad_norm": 0.8728834986686707,
      "learning_rate": 7.403649110784579e-06,
      "loss": 0.016,
      "step": 808060
    },
    {
      "epoch": 1.3224406433495022,
      "grad_norm": 0.7198086977005005,
      "learning_rate": 7.4035832185710625e-06,
      "loss": 0.0148,
      "step": 808080
    },
    {
      "epoch": 1.3224733737881555,
      "grad_norm": 0.6923372745513916,
      "learning_rate": 7.403517326357544e-06,
      "loss": 0.0201,
      "step": 808100
    },
    {
      "epoch": 1.322506104226809,
      "grad_norm": 0.624406099319458,
      "learning_rate": 7.403451434144028e-06,
      "loss": 0.0224,
      "step": 808120
    },
    {
      "epoch": 1.3225388346654623,
      "grad_norm": 1.2211933135986328,
      "learning_rate": 7.4033855419305115e-06,
      "loss": 0.0227,
      "step": 808140
    },
    {
      "epoch": 1.3225715651041154,
      "grad_norm": 0.4475739300251007,
      "learning_rate": 7.403319649716993e-06,
      "loss": 0.0177,
      "step": 808160
    },
    {
      "epoch": 1.3226042955427688,
      "grad_norm": 0.2942655384540558,
      "learning_rate": 7.403253757503477e-06,
      "loss": 0.022,
      "step": 808180
    },
    {
      "epoch": 1.3226370259814222,
      "grad_norm": 0.4496489465236664,
      "learning_rate": 7.403187865289959e-06,
      "loss": 0.0285,
      "step": 808200
    },
    {
      "epoch": 1.3226697564200756,
      "grad_norm": 3.0266191959381104,
      "learning_rate": 7.4031219730764425e-06,
      "loss": 0.0257,
      "step": 808220
    },
    {
      "epoch": 1.322702486858729,
      "grad_norm": 0.726866602897644,
      "learning_rate": 7.403056080862925e-06,
      "loss": 0.0243,
      "step": 808240
    },
    {
      "epoch": 1.322735217297382,
      "grad_norm": 0.36781826615333557,
      "learning_rate": 7.402990188649408e-06,
      "loss": 0.0304,
      "step": 808260
    },
    {
      "epoch": 1.3227679477360357,
      "grad_norm": 0.6748703122138977,
      "learning_rate": 7.402924296435891e-06,
      "loss": 0.0248,
      "step": 808280
    },
    {
      "epoch": 1.3228006781746888,
      "grad_norm": 1.811405897140503,
      "learning_rate": 7.402858404222374e-06,
      "loss": 0.0285,
      "step": 808300
    },
    {
      "epoch": 1.3228334086133422,
      "grad_norm": 0.5175334215164185,
      "learning_rate": 7.402792512008856e-06,
      "loss": 0.0232,
      "step": 808320
    },
    {
      "epoch": 1.3228661390519956,
      "grad_norm": 0.34128832817077637,
      "learning_rate": 7.40272661979534e-06,
      "loss": 0.022,
      "step": 808340
    },
    {
      "epoch": 1.322898869490649,
      "grad_norm": 0.19850997626781464,
      "learning_rate": 7.402660727581822e-06,
      "loss": 0.0138,
      "step": 808360
    },
    {
      "epoch": 1.3229315999293023,
      "grad_norm": 1.1300603151321411,
      "learning_rate": 7.402594835368305e-06,
      "loss": 0.0272,
      "step": 808380
    },
    {
      "epoch": 1.3229643303679555,
      "grad_norm": 0.7757488489151001,
      "learning_rate": 7.402528943154787e-06,
      "loss": 0.0254,
      "step": 808400
    },
    {
      "epoch": 1.322997060806609,
      "grad_norm": 0.9668710827827454,
      "learning_rate": 7.402463050941271e-06,
      "loss": 0.0217,
      "step": 808420
    },
    {
      "epoch": 1.3230297912452622,
      "grad_norm": 0.44473791122436523,
      "learning_rate": 7.4023971587277535e-06,
      "loss": 0.0219,
      "step": 808440
    },
    {
      "epoch": 1.3230625216839156,
      "grad_norm": 0.4239692986011505,
      "learning_rate": 7.402331266514236e-06,
      "loss": 0.0231,
      "step": 808460
    },
    {
      "epoch": 1.323095252122569,
      "grad_norm": 0.8419956564903259,
      "learning_rate": 7.40226537430072e-06,
      "loss": 0.0177,
      "step": 808480
    },
    {
      "epoch": 1.3231279825612223,
      "grad_norm": 0.6174992322921753,
      "learning_rate": 7.4021994820872025e-06,
      "loss": 0.0282,
      "step": 808500
    },
    {
      "epoch": 1.3231607129998757,
      "grad_norm": 0.36208492517471313,
      "learning_rate": 7.402133589873685e-06,
      "loss": 0.0161,
      "step": 808520
    },
    {
      "epoch": 1.3231934434385288,
      "grad_norm": 0.729634702205658,
      "learning_rate": 7.402067697660168e-06,
      "loss": 0.0219,
      "step": 808540
    },
    {
      "epoch": 1.3232261738771822,
      "grad_norm": 0.5021829009056091,
      "learning_rate": 7.402001805446652e-06,
      "loss": 0.0252,
      "step": 808560
    },
    {
      "epoch": 1.3232589043158356,
      "grad_norm": 0.8035212755203247,
      "learning_rate": 7.4019359132331335e-06,
      "loss": 0.0202,
      "step": 808580
    },
    {
      "epoch": 1.323291634754489,
      "grad_norm": 0.4088970124721527,
      "learning_rate": 7.401870021019617e-06,
      "loss": 0.0193,
      "step": 808600
    },
    {
      "epoch": 1.3233243651931423,
      "grad_norm": 0.3654022216796875,
      "learning_rate": 7.401804128806099e-06,
      "loss": 0.0164,
      "step": 808620
    },
    {
      "epoch": 1.3233570956317957,
      "grad_norm": 0.5817865133285522,
      "learning_rate": 7.4017382365925826e-06,
      "loss": 0.0237,
      "step": 808640
    },
    {
      "epoch": 1.323389826070449,
      "grad_norm": 0.17705337703227997,
      "learning_rate": 7.401672344379065e-06,
      "loss": 0.0334,
      "step": 808660
    },
    {
      "epoch": 1.3234225565091022,
      "grad_norm": 0.15330028533935547,
      "learning_rate": 7.401606452165548e-06,
      "loss": 0.0302,
      "step": 808680
    },
    {
      "epoch": 1.3234552869477556,
      "grad_norm": 1.373641848564148,
      "learning_rate": 7.401540559952031e-06,
      "loss": 0.0342,
      "step": 808700
    },
    {
      "epoch": 1.323488017386409,
      "grad_norm": 0.6017255187034607,
      "learning_rate": 7.401474667738514e-06,
      "loss": 0.0205,
      "step": 808720
    },
    {
      "epoch": 1.3235207478250623,
      "grad_norm": 1.0505367517471313,
      "learning_rate": 7.401408775524996e-06,
      "loss": 0.0296,
      "step": 808740
    },
    {
      "epoch": 1.3235534782637157,
      "grad_norm": 0.4736994504928589,
      "learning_rate": 7.40134288331148e-06,
      "loss": 0.0173,
      "step": 808760
    },
    {
      "epoch": 1.323586208702369,
      "grad_norm": 0.6001248955726624,
      "learning_rate": 7.401276991097962e-06,
      "loss": 0.0168,
      "step": 808780
    },
    {
      "epoch": 1.3236189391410225,
      "grad_norm": 1.6201001405715942,
      "learning_rate": 7.401211098884445e-06,
      "loss": 0.0232,
      "step": 808800
    },
    {
      "epoch": 1.3236516695796756,
      "grad_norm": 0.42587754130363464,
      "learning_rate": 7.401145206670929e-06,
      "loss": 0.0199,
      "step": 808820
    },
    {
      "epoch": 1.323684400018329,
      "grad_norm": 0.4792011082172394,
      "learning_rate": 7.401079314457411e-06,
      "loss": 0.0249,
      "step": 808840
    },
    {
      "epoch": 1.3237171304569824,
      "grad_norm": 4.300126552581787,
      "learning_rate": 7.401013422243894e-06,
      "loss": 0.0249,
      "step": 808860
    },
    {
      "epoch": 1.3237498608956357,
      "grad_norm": 0.31576067209243774,
      "learning_rate": 7.400947530030377e-06,
      "loss": 0.0289,
      "step": 808880
    },
    {
      "epoch": 1.323782591334289,
      "grad_norm": 0.5288184285163879,
      "learning_rate": 7.40088163781686e-06,
      "loss": 0.0202,
      "step": 808900
    },
    {
      "epoch": 1.3238153217729425,
      "grad_norm": 0.35237690806388855,
      "learning_rate": 7.400815745603343e-06,
      "loss": 0.0249,
      "step": 808920
    },
    {
      "epoch": 1.3238480522115958,
      "grad_norm": 0.24456389248371124,
      "learning_rate": 7.400749853389826e-06,
      "loss": 0.0208,
      "step": 808940
    },
    {
      "epoch": 1.323880782650249,
      "grad_norm": 0.7025732398033142,
      "learning_rate": 7.400683961176308e-06,
      "loss": 0.027,
      "step": 808960
    },
    {
      "epoch": 1.3239135130889024,
      "grad_norm": 0.4465837776660919,
      "learning_rate": 7.400618068962792e-06,
      "loss": 0.0324,
      "step": 808980
    },
    {
      "epoch": 1.3239462435275557,
      "grad_norm": 0.04201466590166092,
      "learning_rate": 7.4005521767492736e-06,
      "loss": 0.0207,
      "step": 809000
    },
    {
      "epoch": 1.3239789739662091,
      "grad_norm": 1.5154645442962646,
      "learning_rate": 7.400486284535757e-06,
      "loss": 0.0275,
      "step": 809020
    },
    {
      "epoch": 1.3240117044048625,
      "grad_norm": 0.5921524167060852,
      "learning_rate": 7.40042039232224e-06,
      "loss": 0.0315,
      "step": 809040
    },
    {
      "epoch": 1.3240444348435156,
      "grad_norm": 0.4052945375442505,
      "learning_rate": 7.400354500108723e-06,
      "loss": 0.0215,
      "step": 809060
    },
    {
      "epoch": 1.3240771652821692,
      "grad_norm": 0.4977254867553711,
      "learning_rate": 7.400288607895205e-06,
      "loss": 0.0319,
      "step": 809080
    },
    {
      "epoch": 1.3241098957208224,
      "grad_norm": 0.3293766379356384,
      "learning_rate": 7.400222715681689e-06,
      "loss": 0.026,
      "step": 809100
    },
    {
      "epoch": 1.3241426261594758,
      "grad_norm": 0.8667323589324951,
      "learning_rate": 7.400156823468171e-06,
      "loss": 0.0241,
      "step": 809120
    },
    {
      "epoch": 1.3241753565981291,
      "grad_norm": 1.1545933485031128,
      "learning_rate": 7.4000909312546544e-06,
      "loss": 0.0257,
      "step": 809140
    },
    {
      "epoch": 1.3242080870367825,
      "grad_norm": 1.9362804889678955,
      "learning_rate": 7.400025039041136e-06,
      "loss": 0.0203,
      "step": 809160
    },
    {
      "epoch": 1.3242408174754359,
      "grad_norm": 2.3551344871520996,
      "learning_rate": 7.39995914682762e-06,
      "loss": 0.0238,
      "step": 809180
    },
    {
      "epoch": 1.324273547914089,
      "grad_norm": 0.624904215335846,
      "learning_rate": 7.3998932546141035e-06,
      "loss": 0.0197,
      "step": 809200
    },
    {
      "epoch": 1.3243062783527426,
      "grad_norm": 3.623835563659668,
      "learning_rate": 7.399827362400585e-06,
      "loss": 0.0169,
      "step": 809220
    },
    {
      "epoch": 1.3243390087913958,
      "grad_norm": 1.049928903579712,
      "learning_rate": 7.399761470187069e-06,
      "loss": 0.0192,
      "step": 809240
    },
    {
      "epoch": 1.3243717392300491,
      "grad_norm": 0.5830478072166443,
      "learning_rate": 7.399695577973552e-06,
      "loss": 0.0142,
      "step": 809260
    },
    {
      "epoch": 1.3244044696687025,
      "grad_norm": 1.9810442924499512,
      "learning_rate": 7.3996296857600345e-06,
      "loss": 0.0272,
      "step": 809280
    },
    {
      "epoch": 1.3244372001073559,
      "grad_norm": 0.9359314441680908,
      "learning_rate": 7.399563793546517e-06,
      "loss": 0.0233,
      "step": 809300
    },
    {
      "epoch": 1.3244699305460093,
      "grad_norm": 0.5522928833961487,
      "learning_rate": 7.399497901333001e-06,
      "loss": 0.0237,
      "step": 809320
    },
    {
      "epoch": 1.3245026609846624,
      "grad_norm": 0.7181009650230408,
      "learning_rate": 7.399432009119483e-06,
      "loss": 0.0217,
      "step": 809340
    },
    {
      "epoch": 1.3245353914233158,
      "grad_norm": 0.35179921984672546,
      "learning_rate": 7.399366116905966e-06,
      "loss": 0.0142,
      "step": 809360
    },
    {
      "epoch": 1.3245681218619691,
      "grad_norm": 2.75264573097229,
      "learning_rate": 7.399300224692448e-06,
      "loss": 0.0157,
      "step": 809380
    },
    {
      "epoch": 1.3246008523006225,
      "grad_norm": 0.8651531934738159,
      "learning_rate": 7.399234332478932e-06,
      "loss": 0.0251,
      "step": 809400
    },
    {
      "epoch": 1.324633582739276,
      "grad_norm": 2.1222872734069824,
      "learning_rate": 7.399168440265414e-06,
      "loss": 0.0307,
      "step": 809420
    },
    {
      "epoch": 1.3246663131779293,
      "grad_norm": 0.8191986083984375,
      "learning_rate": 7.399102548051897e-06,
      "loss": 0.022,
      "step": 809440
    },
    {
      "epoch": 1.3246990436165826,
      "grad_norm": 0.3677446246147156,
      "learning_rate": 7.39903665583838e-06,
      "loss": 0.0203,
      "step": 809460
    },
    {
      "epoch": 1.3247317740552358,
      "grad_norm": 3.5657448768615723,
      "learning_rate": 7.398970763624863e-06,
      "loss": 0.0179,
      "step": 809480
    },
    {
      "epoch": 1.3247645044938892,
      "grad_norm": 0.4167909324169159,
      "learning_rate": 7.3989048714113455e-06,
      "loss": 0.0249,
      "step": 809500
    },
    {
      "epoch": 1.3247972349325425,
      "grad_norm": 1.25426185131073,
      "learning_rate": 7.398838979197829e-06,
      "loss": 0.0267,
      "step": 809520
    },
    {
      "epoch": 1.324829965371196,
      "grad_norm": 0.5277966856956482,
      "learning_rate": 7.398773086984312e-06,
      "loss": 0.0238,
      "step": 809540
    },
    {
      "epoch": 1.3248626958098493,
      "grad_norm": 0.5274962186813354,
      "learning_rate": 7.3987071947707945e-06,
      "loss": 0.0203,
      "step": 809560
    },
    {
      "epoch": 1.3248954262485027,
      "grad_norm": 0.3758041262626648,
      "learning_rate": 7.398641302557278e-06,
      "loss": 0.0193,
      "step": 809580
    },
    {
      "epoch": 1.324928156687156,
      "grad_norm": 2.560224771499634,
      "learning_rate": 7.39857541034376e-06,
      "loss": 0.0245,
      "step": 809600
    },
    {
      "epoch": 1.3249608871258092,
      "grad_norm": 0.7021223902702332,
      "learning_rate": 7.398509518130244e-06,
      "loss": 0.0191,
      "step": 809620
    },
    {
      "epoch": 1.3249936175644625,
      "grad_norm": 0.3851344585418701,
      "learning_rate": 7.3984436259167255e-06,
      "loss": 0.0165,
      "step": 809640
    },
    {
      "epoch": 1.325026348003116,
      "grad_norm": 0.4525811970233917,
      "learning_rate": 7.398377733703209e-06,
      "loss": 0.0239,
      "step": 809660
    },
    {
      "epoch": 1.3250590784417693,
      "grad_norm": 0.4122183322906494,
      "learning_rate": 7.398311841489692e-06,
      "loss": 0.015,
      "step": 809680
    },
    {
      "epoch": 1.3250918088804227,
      "grad_norm": 0.36378419399261475,
      "learning_rate": 7.3982459492761745e-06,
      "loss": 0.0252,
      "step": 809700
    },
    {
      "epoch": 1.325124539319076,
      "grad_norm": 0.7325528264045715,
      "learning_rate": 7.398180057062657e-06,
      "loss": 0.0262,
      "step": 809720
    },
    {
      "epoch": 1.3251572697577294,
      "grad_norm": 0.8430491089820862,
      "learning_rate": 7.398114164849141e-06,
      "loss": 0.0231,
      "step": 809740
    },
    {
      "epoch": 1.3251900001963826,
      "grad_norm": 0.402599573135376,
      "learning_rate": 7.398048272635623e-06,
      "loss": 0.0171,
      "step": 809760
    },
    {
      "epoch": 1.325222730635036,
      "grad_norm": 0.4024229943752289,
      "learning_rate": 7.397982380422106e-06,
      "loss": 0.0281,
      "step": 809780
    },
    {
      "epoch": 1.3252554610736893,
      "grad_norm": 1.1812968254089355,
      "learning_rate": 7.397916488208588e-06,
      "loss": 0.0232,
      "step": 809800
    },
    {
      "epoch": 1.3252881915123427,
      "grad_norm": 0.5633339285850525,
      "learning_rate": 7.397850595995072e-06,
      "loss": 0.024,
      "step": 809820
    },
    {
      "epoch": 1.325320921950996,
      "grad_norm": 1.9870258569717407,
      "learning_rate": 7.3977847037815546e-06,
      "loss": 0.0279,
      "step": 809840
    },
    {
      "epoch": 1.3253536523896492,
      "grad_norm": 0.10020776838064194,
      "learning_rate": 7.397718811568037e-06,
      "loss": 0.0167,
      "step": 809860
    },
    {
      "epoch": 1.3253863828283028,
      "grad_norm": 0.4095841646194458,
      "learning_rate": 7.397652919354521e-06,
      "loss": 0.0314,
      "step": 809880
    },
    {
      "epoch": 1.325419113266956,
      "grad_norm": 0.4874110519886017,
      "learning_rate": 7.397587027141004e-06,
      "loss": 0.0235,
      "step": 809900
    },
    {
      "epoch": 1.3254518437056093,
      "grad_norm": 0.23608186841011047,
      "learning_rate": 7.397521134927486e-06,
      "loss": 0.0296,
      "step": 809920
    },
    {
      "epoch": 1.3254845741442627,
      "grad_norm": 0.4277723431587219,
      "learning_rate": 7.397455242713969e-06,
      "loss": 0.0262,
      "step": 809940
    },
    {
      "epoch": 1.325517304582916,
      "grad_norm": 0.6359061002731323,
      "learning_rate": 7.397389350500453e-06,
      "loss": 0.0259,
      "step": 809960
    },
    {
      "epoch": 1.3255500350215694,
      "grad_norm": 0.8980905413627625,
      "learning_rate": 7.397323458286935e-06,
      "loss": 0.0193,
      "step": 809980
    },
    {
      "epoch": 1.3255827654602226,
      "grad_norm": 0.3092428743839264,
      "learning_rate": 7.397257566073418e-06,
      "loss": 0.0224,
      "step": 810000
    },
    {
      "epoch": 1.3256154958988762,
      "grad_norm": 0.3147567808628082,
      "learning_rate": 7.3971916738599e-06,
      "loss": 0.0244,
      "step": 810020
    },
    {
      "epoch": 1.3256482263375293,
      "grad_norm": 0.4706891179084778,
      "learning_rate": 7.397125781646384e-06,
      "loss": 0.02,
      "step": 810040
    },
    {
      "epoch": 1.3256809567761827,
      "grad_norm": 1.1925163269042969,
      "learning_rate": 7.397059889432866e-06,
      "loss": 0.0214,
      "step": 810060
    },
    {
      "epoch": 1.325713687214836,
      "grad_norm": 1.336478352546692,
      "learning_rate": 7.396993997219349e-06,
      "loss": 0.0257,
      "step": 810080
    },
    {
      "epoch": 1.3257464176534894,
      "grad_norm": 0.6079769730567932,
      "learning_rate": 7.396928105005832e-06,
      "loss": 0.0212,
      "step": 810100
    },
    {
      "epoch": 1.3257791480921428,
      "grad_norm": 1.3077518939971924,
      "learning_rate": 7.3968622127923155e-06,
      "loss": 0.0276,
      "step": 810120
    },
    {
      "epoch": 1.325811878530796,
      "grad_norm": 0.7361970543861389,
      "learning_rate": 7.396796320578797e-06,
      "loss": 0.0189,
      "step": 810140
    },
    {
      "epoch": 1.3258446089694493,
      "grad_norm": 0.6260107755661011,
      "learning_rate": 7.396730428365281e-06,
      "loss": 0.0232,
      "step": 810160
    },
    {
      "epoch": 1.3258773394081027,
      "grad_norm": 0.07748465240001678,
      "learning_rate": 7.396664536151763e-06,
      "loss": 0.0182,
      "step": 810180
    },
    {
      "epoch": 1.325910069846756,
      "grad_norm": 0.6903508305549622,
      "learning_rate": 7.3965986439382464e-06,
      "loss": 0.0165,
      "step": 810200
    },
    {
      "epoch": 1.3259428002854095,
      "grad_norm": 0.45893004536628723,
      "learning_rate": 7.39653275172473e-06,
      "loss": 0.0279,
      "step": 810220
    },
    {
      "epoch": 1.3259755307240628,
      "grad_norm": 0.21371380984783173,
      "learning_rate": 7.396466859511212e-06,
      "loss": 0.0211,
      "step": 810240
    },
    {
      "epoch": 1.3260082611627162,
      "grad_norm": 1.0246992111206055,
      "learning_rate": 7.3964009672976955e-06,
      "loss": 0.0266,
      "step": 810260
    },
    {
      "epoch": 1.3260409916013693,
      "grad_norm": 0.27194082736968994,
      "learning_rate": 7.396335075084178e-06,
      "loss": 0.0185,
      "step": 810280
    },
    {
      "epoch": 1.3260737220400227,
      "grad_norm": 0.750441312789917,
      "learning_rate": 7.396269182870661e-06,
      "loss": 0.0158,
      "step": 810300
    },
    {
      "epoch": 1.326106452478676,
      "grad_norm": 0.32302841544151306,
      "learning_rate": 7.396203290657144e-06,
      "loss": 0.0173,
      "step": 810320
    },
    {
      "epoch": 1.3261391829173295,
      "grad_norm": 0.32448750734329224,
      "learning_rate": 7.396137398443627e-06,
      "loss": 0.0277,
      "step": 810340
    },
    {
      "epoch": 1.3261719133559828,
      "grad_norm": 0.3922646939754486,
      "learning_rate": 7.396071506230109e-06,
      "loss": 0.0246,
      "step": 810360
    },
    {
      "epoch": 1.3262046437946362,
      "grad_norm": 1.8803863525390625,
      "learning_rate": 7.396005614016593e-06,
      "loss": 0.0215,
      "step": 810380
    },
    {
      "epoch": 1.3262373742332896,
      "grad_norm": 0.42723825573921204,
      "learning_rate": 7.395939721803075e-06,
      "loss": 0.03,
      "step": 810400
    },
    {
      "epoch": 1.3262701046719427,
      "grad_norm": 0.40309861302375793,
      "learning_rate": 7.395873829589558e-06,
      "loss": 0.0194,
      "step": 810420
    },
    {
      "epoch": 1.326302835110596,
      "grad_norm": 0.8068512678146362,
      "learning_rate": 7.39580793737604e-06,
      "loss": 0.0291,
      "step": 810440
    },
    {
      "epoch": 1.3263355655492495,
      "grad_norm": 2.720566749572754,
      "learning_rate": 7.395742045162524e-06,
      "loss": 0.0229,
      "step": 810460
    },
    {
      "epoch": 1.3263682959879028,
      "grad_norm": 0.29905039072036743,
      "learning_rate": 7.3956761529490065e-06,
      "loss": 0.0152,
      "step": 810480
    },
    {
      "epoch": 1.3264010264265562,
      "grad_norm": 0.35009339451789856,
      "learning_rate": 7.395610260735489e-06,
      "loss": 0.0171,
      "step": 810500
    },
    {
      "epoch": 1.3264337568652096,
      "grad_norm": 0.1858581304550171,
      "learning_rate": 7.395544368521972e-06,
      "loss": 0.02,
      "step": 810520
    },
    {
      "epoch": 1.326466487303863,
      "grad_norm": 1.247511386871338,
      "learning_rate": 7.3954784763084555e-06,
      "loss": 0.0237,
      "step": 810540
    },
    {
      "epoch": 1.3264992177425161,
      "grad_norm": 1.3198312520980835,
      "learning_rate": 7.3954125840949374e-06,
      "loss": 0.0141,
      "step": 810560
    },
    {
      "epoch": 1.3265319481811695,
      "grad_norm": 0.26087686419487,
      "learning_rate": 7.395346691881421e-06,
      "loss": 0.0278,
      "step": 810580
    },
    {
      "epoch": 1.3265646786198229,
      "grad_norm": 1.237710952758789,
      "learning_rate": 7.395280799667905e-06,
      "loss": 0.0229,
      "step": 810600
    },
    {
      "epoch": 1.3265974090584762,
      "grad_norm": 0.42610615491867065,
      "learning_rate": 7.3952149074543865e-06,
      "loss": 0.0223,
      "step": 810620
    },
    {
      "epoch": 1.3266301394971296,
      "grad_norm": 0.35146239399909973,
      "learning_rate": 7.39514901524087e-06,
      "loss": 0.0183,
      "step": 810640
    },
    {
      "epoch": 1.3266628699357828,
      "grad_norm": 1.069989800453186,
      "learning_rate": 7.395083123027352e-06,
      "loss": 0.0315,
      "step": 810660
    },
    {
      "epoch": 1.3266956003744363,
      "grad_norm": 0.43675899505615234,
      "learning_rate": 7.3950172308138356e-06,
      "loss": 0.0254,
      "step": 810680
    },
    {
      "epoch": 1.3267283308130895,
      "grad_norm": 0.8479381203651428,
      "learning_rate": 7.394951338600318e-06,
      "loss": 0.0144,
      "step": 810700
    },
    {
      "epoch": 1.3267610612517429,
      "grad_norm": 1.5318683385849,
      "learning_rate": 7.394885446386801e-06,
      "loss": 0.0265,
      "step": 810720
    },
    {
      "epoch": 1.3267937916903962,
      "grad_norm": 0.32075121998786926,
      "learning_rate": 7.394819554173284e-06,
      "loss": 0.0196,
      "step": 810740
    },
    {
      "epoch": 1.3268265221290496,
      "grad_norm": 0.6944654583930969,
      "learning_rate": 7.394753661959767e-06,
      "loss": 0.0215,
      "step": 810760
    },
    {
      "epoch": 1.326859252567703,
      "grad_norm": 0.5004002451896667,
      "learning_rate": 7.394687769746249e-06,
      "loss": 0.0304,
      "step": 810780
    },
    {
      "epoch": 1.3268919830063561,
      "grad_norm": 0.8865724802017212,
      "learning_rate": 7.394621877532733e-06,
      "loss": 0.0159,
      "step": 810800
    },
    {
      "epoch": 1.3269247134450095,
      "grad_norm": 0.9777979850769043,
      "learning_rate": 7.394555985319215e-06,
      "loss": 0.0169,
      "step": 810820
    },
    {
      "epoch": 1.3269574438836629,
      "grad_norm": 2.004845142364502,
      "learning_rate": 7.394490093105698e-06,
      "loss": 0.0247,
      "step": 810840
    },
    {
      "epoch": 1.3269901743223163,
      "grad_norm": 0.42161473631858826,
      "learning_rate": 7.394424200892181e-06,
      "loss": 0.0237,
      "step": 810860
    },
    {
      "epoch": 1.3270229047609696,
      "grad_norm": 1.49248468875885,
      "learning_rate": 7.394358308678664e-06,
      "loss": 0.0281,
      "step": 810880
    },
    {
      "epoch": 1.327055635199623,
      "grad_norm": 0.16711123287677765,
      "learning_rate": 7.3942924164651466e-06,
      "loss": 0.0227,
      "step": 810900
    },
    {
      "epoch": 1.3270883656382764,
      "grad_norm": 0.1785200536251068,
      "learning_rate": 7.39422652425163e-06,
      "loss": 0.0243,
      "step": 810920
    },
    {
      "epoch": 1.3271210960769295,
      "grad_norm": 0.9804176688194275,
      "learning_rate": 7.394160632038113e-06,
      "loss": 0.0296,
      "step": 810940
    },
    {
      "epoch": 1.327153826515583,
      "grad_norm": 0.4506661593914032,
      "learning_rate": 7.394094739824596e-06,
      "loss": 0.0181,
      "step": 810960
    },
    {
      "epoch": 1.3271865569542363,
      "grad_norm": 0.7484463453292847,
      "learning_rate": 7.394028847611079e-06,
      "loss": 0.0275,
      "step": 810980
    },
    {
      "epoch": 1.3272192873928896,
      "grad_norm": 0.13488230109214783,
      "learning_rate": 7.393962955397561e-06,
      "loss": 0.0203,
      "step": 811000
    },
    {
      "epoch": 1.327252017831543,
      "grad_norm": 0.6276904344558716,
      "learning_rate": 7.393897063184045e-06,
      "loss": 0.023,
      "step": 811020
    },
    {
      "epoch": 1.3272847482701964,
      "grad_norm": 0.640791654586792,
      "learning_rate": 7.3938311709705266e-06,
      "loss": 0.0217,
      "step": 811040
    },
    {
      "epoch": 1.3273174787088498,
      "grad_norm": 1.134724736213684,
      "learning_rate": 7.39376527875701e-06,
      "loss": 0.0236,
      "step": 811060
    },
    {
      "epoch": 1.327350209147503,
      "grad_norm": 0.4727593660354614,
      "learning_rate": 7.393699386543493e-06,
      "loss": 0.0284,
      "step": 811080
    },
    {
      "epoch": 1.3273829395861563,
      "grad_norm": 0.6730057001113892,
      "learning_rate": 7.393633494329976e-06,
      "loss": 0.0292,
      "step": 811100
    },
    {
      "epoch": 1.3274156700248096,
      "grad_norm": 0.386438250541687,
      "learning_rate": 7.393567602116458e-06,
      "loss": 0.0248,
      "step": 811120
    },
    {
      "epoch": 1.327448400463463,
      "grad_norm": 0.2675483822822571,
      "learning_rate": 7.393501709902942e-06,
      "loss": 0.0284,
      "step": 811140
    },
    {
      "epoch": 1.3274811309021164,
      "grad_norm": 3.7688050270080566,
      "learning_rate": 7.393435817689424e-06,
      "loss": 0.027,
      "step": 811160
    },
    {
      "epoch": 1.3275138613407698,
      "grad_norm": 0.5539769530296326,
      "learning_rate": 7.3933699254759074e-06,
      "loss": 0.0283,
      "step": 811180
    },
    {
      "epoch": 1.3275465917794231,
      "grad_norm": 0.6498602032661438,
      "learning_rate": 7.393304033262389e-06,
      "loss": 0.0159,
      "step": 811200
    },
    {
      "epoch": 1.3275793222180763,
      "grad_norm": 1.4980970621109009,
      "learning_rate": 7.393238141048873e-06,
      "loss": 0.0185,
      "step": 811220
    },
    {
      "epoch": 1.3276120526567297,
      "grad_norm": 0.2517053782939911,
      "learning_rate": 7.393172248835355e-06,
      "loss": 0.0188,
      "step": 811240
    },
    {
      "epoch": 1.327644783095383,
      "grad_norm": 0.5672804117202759,
      "learning_rate": 7.393106356621838e-06,
      "loss": 0.0191,
      "step": 811260
    },
    {
      "epoch": 1.3276775135340364,
      "grad_norm": 0.42451274394989014,
      "learning_rate": 7.393040464408322e-06,
      "loss": 0.0157,
      "step": 811280
    },
    {
      "epoch": 1.3277102439726898,
      "grad_norm": 0.4793091416358948,
      "learning_rate": 7.392974572194804e-06,
      "loss": 0.0205,
      "step": 811300
    },
    {
      "epoch": 1.327742974411343,
      "grad_norm": 1.1452864408493042,
      "learning_rate": 7.3929086799812875e-06,
      "loss": 0.0204,
      "step": 811320
    },
    {
      "epoch": 1.3277757048499965,
      "grad_norm": 0.15469998121261597,
      "learning_rate": 7.39284278776777e-06,
      "loss": 0.021,
      "step": 811340
    },
    {
      "epoch": 1.3278084352886497,
      "grad_norm": 1.1483081579208374,
      "learning_rate": 7.392776895554253e-06,
      "loss": 0.0182,
      "step": 811360
    },
    {
      "epoch": 1.327841165727303,
      "grad_norm": 1.0115973949432373,
      "learning_rate": 7.392711003340736e-06,
      "loss": 0.0254,
      "step": 811380
    },
    {
      "epoch": 1.3278738961659564,
      "grad_norm": 0.6411306262016296,
      "learning_rate": 7.392645111127219e-06,
      "loss": 0.0328,
      "step": 811400
    },
    {
      "epoch": 1.3279066266046098,
      "grad_norm": 0.3152942359447479,
      "learning_rate": 7.392579218913701e-06,
      "loss": 0.0302,
      "step": 811420
    },
    {
      "epoch": 1.3279393570432632,
      "grad_norm": 0.6167038083076477,
      "learning_rate": 7.392513326700185e-06,
      "loss": 0.0243,
      "step": 811440
    },
    {
      "epoch": 1.3279720874819163,
      "grad_norm": 2.716933488845825,
      "learning_rate": 7.392447434486667e-06,
      "loss": 0.0309,
      "step": 811460
    },
    {
      "epoch": 1.32800481792057,
      "grad_norm": 0.5535831451416016,
      "learning_rate": 7.39238154227315e-06,
      "loss": 0.0172,
      "step": 811480
    },
    {
      "epoch": 1.328037548359223,
      "grad_norm": 0.9203100204467773,
      "learning_rate": 7.392315650059633e-06,
      "loss": 0.0221,
      "step": 811500
    },
    {
      "epoch": 1.3280702787978764,
      "grad_norm": 0.5474268794059753,
      "learning_rate": 7.392249757846116e-06,
      "loss": 0.0216,
      "step": 811520
    },
    {
      "epoch": 1.3281030092365298,
      "grad_norm": 0.33115705847740173,
      "learning_rate": 7.3921838656325985e-06,
      "loss": 0.0268,
      "step": 811540
    },
    {
      "epoch": 1.3281357396751832,
      "grad_norm": 1.1029369831085205,
      "learning_rate": 7.392117973419082e-06,
      "loss": 0.034,
      "step": 811560
    },
    {
      "epoch": 1.3281684701138365,
      "grad_norm": 0.2711937129497528,
      "learning_rate": 7.392052081205564e-06,
      "loss": 0.016,
      "step": 811580
    },
    {
      "epoch": 1.3282012005524897,
      "grad_norm": 0.26698121428489685,
      "learning_rate": 7.3919861889920475e-06,
      "loss": 0.0217,
      "step": 811600
    },
    {
      "epoch": 1.328233930991143,
      "grad_norm": 0.6126494407653809,
      "learning_rate": 7.391920296778529e-06,
      "loss": 0.0176,
      "step": 811620
    },
    {
      "epoch": 1.3282666614297964,
      "grad_norm": 0.5473850965499878,
      "learning_rate": 7.391854404565013e-06,
      "loss": 0.0192,
      "step": 811640
    },
    {
      "epoch": 1.3282993918684498,
      "grad_norm": 0.5170722603797913,
      "learning_rate": 7.391788512351497e-06,
      "loss": 0.0166,
      "step": 811660
    },
    {
      "epoch": 1.3283321223071032,
      "grad_norm": 1.0023424625396729,
      "learning_rate": 7.3917226201379785e-06,
      "loss": 0.0226,
      "step": 811680
    },
    {
      "epoch": 1.3283648527457566,
      "grad_norm": 0.24873311817646027,
      "learning_rate": 7.391656727924462e-06,
      "loss": 0.0294,
      "step": 811700
    },
    {
      "epoch": 1.32839758318441,
      "grad_norm": 1.160067081451416,
      "learning_rate": 7.391590835710945e-06,
      "loss": 0.0181,
      "step": 811720
    },
    {
      "epoch": 1.328430313623063,
      "grad_norm": 3.855210304260254,
      "learning_rate": 7.3915249434974275e-06,
      "loss": 0.0202,
      "step": 811740
    },
    {
      "epoch": 1.3284630440617164,
      "grad_norm": 0.20993608236312866,
      "learning_rate": 7.39145905128391e-06,
      "loss": 0.0178,
      "step": 811760
    },
    {
      "epoch": 1.3284957745003698,
      "grad_norm": 0.1349203884601593,
      "learning_rate": 7.391393159070394e-06,
      "loss": 0.0152,
      "step": 811780
    },
    {
      "epoch": 1.3285285049390232,
      "grad_norm": 1.0003657341003418,
      "learning_rate": 7.391327266856876e-06,
      "loss": 0.027,
      "step": 811800
    },
    {
      "epoch": 1.3285612353776766,
      "grad_norm": 0.49890652298927307,
      "learning_rate": 7.391261374643359e-06,
      "loss": 0.0199,
      "step": 811820
    },
    {
      "epoch": 1.32859396581633,
      "grad_norm": 1.4731614589691162,
      "learning_rate": 7.391195482429841e-06,
      "loss": 0.0185,
      "step": 811840
    },
    {
      "epoch": 1.3286266962549833,
      "grad_norm": 0.17437812685966492,
      "learning_rate": 7.391129590216325e-06,
      "loss": 0.0285,
      "step": 811860
    },
    {
      "epoch": 1.3286594266936365,
      "grad_norm": 0.21339115500450134,
      "learning_rate": 7.3910636980028076e-06,
      "loss": 0.0255,
      "step": 811880
    },
    {
      "epoch": 1.3286921571322898,
      "grad_norm": 0.7654381990432739,
      "learning_rate": 7.39099780578929e-06,
      "loss": 0.0192,
      "step": 811900
    },
    {
      "epoch": 1.3287248875709432,
      "grad_norm": 2.544567823410034,
      "learning_rate": 7.390931913575773e-06,
      "loss": 0.0359,
      "step": 811920
    },
    {
      "epoch": 1.3287576180095966,
      "grad_norm": 1.2678581476211548,
      "learning_rate": 7.390866021362257e-06,
      "loss": 0.0289,
      "step": 811940
    },
    {
      "epoch": 1.32879034844825,
      "grad_norm": 0.4116176664829254,
      "learning_rate": 7.3908001291487385e-06,
      "loss": 0.0185,
      "step": 811960
    },
    {
      "epoch": 1.3288230788869033,
      "grad_norm": 0.2903955280780792,
      "learning_rate": 7.390734236935222e-06,
      "loss": 0.0146,
      "step": 811980
    },
    {
      "epoch": 1.3288558093255567,
      "grad_norm": 1.1252583265304565,
      "learning_rate": 7.390668344721706e-06,
      "loss": 0.0267,
      "step": 812000
    },
    {
      "epoch": 1.3288885397642098,
      "grad_norm": 1.449270486831665,
      "learning_rate": 7.390602452508188e-06,
      "loss": 0.0157,
      "step": 812020
    },
    {
      "epoch": 1.3289212702028632,
      "grad_norm": 1.1116620302200317,
      "learning_rate": 7.390536560294671e-06,
      "loss": 0.0195,
      "step": 812040
    },
    {
      "epoch": 1.3289540006415166,
      "grad_norm": 0.7047417163848877,
      "learning_rate": 7.390470668081153e-06,
      "loss": 0.0358,
      "step": 812060
    },
    {
      "epoch": 1.32898673108017,
      "grad_norm": 0.12123838067054749,
      "learning_rate": 7.390404775867637e-06,
      "loss": 0.0226,
      "step": 812080
    },
    {
      "epoch": 1.3290194615188233,
      "grad_norm": 1.077439785003662,
      "learning_rate": 7.390338883654119e-06,
      "loss": 0.0269,
      "step": 812100
    },
    {
      "epoch": 1.3290521919574765,
      "grad_norm": 1.253245234489441,
      "learning_rate": 7.390272991440602e-06,
      "loss": 0.0223,
      "step": 812120
    },
    {
      "epoch": 1.32908492239613,
      "grad_norm": 0.5494130253791809,
      "learning_rate": 7.390207099227085e-06,
      "loss": 0.0341,
      "step": 812140
    },
    {
      "epoch": 1.3291176528347832,
      "grad_norm": 1.0957443714141846,
      "learning_rate": 7.3901412070135685e-06,
      "loss": 0.0261,
      "step": 812160
    },
    {
      "epoch": 1.3291503832734366,
      "grad_norm": 0.3542889356613159,
      "learning_rate": 7.39007531480005e-06,
      "loss": 0.0276,
      "step": 812180
    },
    {
      "epoch": 1.32918311371209,
      "grad_norm": 0.44454407691955566,
      "learning_rate": 7.390009422586534e-06,
      "loss": 0.0216,
      "step": 812200
    },
    {
      "epoch": 1.3292158441507433,
      "grad_norm": 0.7803109288215637,
      "learning_rate": 7.389943530373016e-06,
      "loss": 0.018,
      "step": 812220
    },
    {
      "epoch": 1.3292485745893967,
      "grad_norm": 1.4043149948120117,
      "learning_rate": 7.3898776381594994e-06,
      "loss": 0.0236,
      "step": 812240
    },
    {
      "epoch": 1.3292813050280499,
      "grad_norm": 0.7815973162651062,
      "learning_rate": 7.389811745945981e-06,
      "loss": 0.0296,
      "step": 812260
    },
    {
      "epoch": 1.3293140354667035,
      "grad_norm": 0.5499181747436523,
      "learning_rate": 7.389745853732465e-06,
      "loss": 0.0235,
      "step": 812280
    },
    {
      "epoch": 1.3293467659053566,
      "grad_norm": 0.6211646795272827,
      "learning_rate": 7.389679961518948e-06,
      "loss": 0.0186,
      "step": 812300
    },
    {
      "epoch": 1.32937949634401,
      "grad_norm": 0.11758244782686234,
      "learning_rate": 7.38961406930543e-06,
      "loss": 0.0203,
      "step": 812320
    },
    {
      "epoch": 1.3294122267826634,
      "grad_norm": 1.406522274017334,
      "learning_rate": 7.389548177091914e-06,
      "loss": 0.0245,
      "step": 812340
    },
    {
      "epoch": 1.3294449572213167,
      "grad_norm": 0.6937069892883301,
      "learning_rate": 7.389482284878397e-06,
      "loss": 0.0237,
      "step": 812360
    },
    {
      "epoch": 1.32947768765997,
      "grad_norm": 0.8506843447685242,
      "learning_rate": 7.3894163926648795e-06,
      "loss": 0.0299,
      "step": 812380
    },
    {
      "epoch": 1.3295104180986232,
      "grad_norm": 0.09894764423370361,
      "learning_rate": 7.389350500451362e-06,
      "loss": 0.0166,
      "step": 812400
    },
    {
      "epoch": 1.3295431485372766,
      "grad_norm": 0.7612292766571045,
      "learning_rate": 7.389284608237846e-06,
      "loss": 0.0236,
      "step": 812420
    },
    {
      "epoch": 1.32957587897593,
      "grad_norm": 0.3679075837135315,
      "learning_rate": 7.389218716024328e-06,
      "loss": 0.0158,
      "step": 812440
    },
    {
      "epoch": 1.3296086094145834,
      "grad_norm": 0.4801138639450073,
      "learning_rate": 7.389152823810811e-06,
      "loss": 0.016,
      "step": 812460
    },
    {
      "epoch": 1.3296413398532367,
      "grad_norm": 1.496120572090149,
      "learning_rate": 7.389086931597293e-06,
      "loss": 0.0268,
      "step": 812480
    },
    {
      "epoch": 1.32967407029189,
      "grad_norm": 0.45666375756263733,
      "learning_rate": 7.389021039383777e-06,
      "loss": 0.0226,
      "step": 812500
    },
    {
      "epoch": 1.3297068007305435,
      "grad_norm": 0.5547643899917603,
      "learning_rate": 7.3889551471702595e-06,
      "loss": 0.0273,
      "step": 812520
    },
    {
      "epoch": 1.3297395311691966,
      "grad_norm": 1.381691575050354,
      "learning_rate": 7.388889254956742e-06,
      "loss": 0.0189,
      "step": 812540
    },
    {
      "epoch": 1.32977226160785,
      "grad_norm": 0.9834981560707092,
      "learning_rate": 7.388823362743225e-06,
      "loss": 0.0186,
      "step": 812560
    },
    {
      "epoch": 1.3298049920465034,
      "grad_norm": 3.14581298828125,
      "learning_rate": 7.3887574705297085e-06,
      "loss": 0.0203,
      "step": 812580
    },
    {
      "epoch": 1.3298377224851567,
      "grad_norm": 0.865593671798706,
      "learning_rate": 7.3886915783161904e-06,
      "loss": 0.0183,
      "step": 812600
    },
    {
      "epoch": 1.3298704529238101,
      "grad_norm": 0.21814751625061035,
      "learning_rate": 7.388625686102674e-06,
      "loss": 0.0151,
      "step": 812620
    },
    {
      "epoch": 1.3299031833624635,
      "grad_norm": 0.42607802152633667,
      "learning_rate": 7.388559793889156e-06,
      "loss": 0.0205,
      "step": 812640
    },
    {
      "epoch": 1.3299359138011169,
      "grad_norm": 0.8999614119529724,
      "learning_rate": 7.3884939016756395e-06,
      "loss": 0.0245,
      "step": 812660
    },
    {
      "epoch": 1.32996864423977,
      "grad_norm": 0.3911493122577667,
      "learning_rate": 7.388428009462123e-06,
      "loss": 0.023,
      "step": 812680
    },
    {
      "epoch": 1.3300013746784234,
      "grad_norm": 3.0580079555511475,
      "learning_rate": 7.388362117248605e-06,
      "loss": 0.0145,
      "step": 812700
    },
    {
      "epoch": 1.3300341051170768,
      "grad_norm": 0.7134810090065002,
      "learning_rate": 7.3882962250350886e-06,
      "loss": 0.0249,
      "step": 812720
    },
    {
      "epoch": 1.3300668355557301,
      "grad_norm": 1.2694008350372314,
      "learning_rate": 7.388230332821571e-06,
      "loss": 0.0192,
      "step": 812740
    },
    {
      "epoch": 1.3300995659943835,
      "grad_norm": 0.4843692481517792,
      "learning_rate": 7.388164440608054e-06,
      "loss": 0.0242,
      "step": 812760
    },
    {
      "epoch": 1.3301322964330369,
      "grad_norm": 0.48518943786621094,
      "learning_rate": 7.388098548394537e-06,
      "loss": 0.0216,
      "step": 812780
    },
    {
      "epoch": 1.3301650268716902,
      "grad_norm": 0.4842683970928192,
      "learning_rate": 7.38803265618102e-06,
      "loss": 0.0258,
      "step": 812800
    },
    {
      "epoch": 1.3301977573103434,
      "grad_norm": 3.5462117195129395,
      "learning_rate": 7.387966763967502e-06,
      "loss": 0.0287,
      "step": 812820
    },
    {
      "epoch": 1.3302304877489968,
      "grad_norm": 0.7945855259895325,
      "learning_rate": 7.387900871753986e-06,
      "loss": 0.0332,
      "step": 812840
    },
    {
      "epoch": 1.3302632181876501,
      "grad_norm": 0.3375021517276764,
      "learning_rate": 7.387834979540468e-06,
      "loss": 0.0171,
      "step": 812860
    },
    {
      "epoch": 1.3302959486263035,
      "grad_norm": 0.7816952466964722,
      "learning_rate": 7.387769087326951e-06,
      "loss": 0.029,
      "step": 812880
    },
    {
      "epoch": 1.3303286790649569,
      "grad_norm": 0.19082413613796234,
      "learning_rate": 7.387703195113434e-06,
      "loss": 0.0183,
      "step": 812900
    },
    {
      "epoch": 1.33036140950361,
      "grad_norm": 0.5887664556503296,
      "learning_rate": 7.387637302899917e-06,
      "loss": 0.0211,
      "step": 812920
    },
    {
      "epoch": 1.3303941399422636,
      "grad_norm": 0.6823751330375671,
      "learning_rate": 7.3875714106863996e-06,
      "loss": 0.0219,
      "step": 812940
    },
    {
      "epoch": 1.3304268703809168,
      "grad_norm": 1.9326146841049194,
      "learning_rate": 7.387505518472883e-06,
      "loss": 0.0266,
      "step": 812960
    },
    {
      "epoch": 1.3304596008195702,
      "grad_norm": 0.8624171018600464,
      "learning_rate": 7.387439626259365e-06,
      "loss": 0.0184,
      "step": 812980
    },
    {
      "epoch": 1.3304923312582235,
      "grad_norm": 0.6937085390090942,
      "learning_rate": 7.387373734045849e-06,
      "loss": 0.0212,
      "step": 813000
    },
    {
      "epoch": 1.330525061696877,
      "grad_norm": 7.299643039703369,
      "learning_rate": 7.3873078418323305e-06,
      "loss": 0.0184,
      "step": 813020
    },
    {
      "epoch": 1.3305577921355303,
      "grad_norm": 1.3646240234375,
      "learning_rate": 7.387241949618814e-06,
      "loss": 0.0202,
      "step": 813040
    },
    {
      "epoch": 1.3305905225741834,
      "grad_norm": 0.4879000186920166,
      "learning_rate": 7.387176057405298e-06,
      "loss": 0.0284,
      "step": 813060
    },
    {
      "epoch": 1.330623253012837,
      "grad_norm": 0.5516812205314636,
      "learning_rate": 7.38711016519178e-06,
      "loss": 0.0155,
      "step": 813080
    },
    {
      "epoch": 1.3306559834514902,
      "grad_norm": 0.7715590596199036,
      "learning_rate": 7.387044272978263e-06,
      "loss": 0.0206,
      "step": 813100
    },
    {
      "epoch": 1.3306887138901435,
      "grad_norm": 0.8849629163742065,
      "learning_rate": 7.386978380764746e-06,
      "loss": 0.0169,
      "step": 813120
    },
    {
      "epoch": 1.330721444328797,
      "grad_norm": 1.7084006071090698,
      "learning_rate": 7.386912488551229e-06,
      "loss": 0.0299,
      "step": 813140
    },
    {
      "epoch": 1.3307541747674503,
      "grad_norm": 0.47793760895729065,
      "learning_rate": 7.386846596337711e-06,
      "loss": 0.0168,
      "step": 813160
    },
    {
      "epoch": 1.3307869052061037,
      "grad_norm": 0.1762041598558426,
      "learning_rate": 7.386780704124195e-06,
      "loss": 0.028,
      "step": 813180
    },
    {
      "epoch": 1.3308196356447568,
      "grad_norm": 1.5586634874343872,
      "learning_rate": 7.386714811910677e-06,
      "loss": 0.019,
      "step": 813200
    },
    {
      "epoch": 1.3308523660834102,
      "grad_norm": 0.8397718071937561,
      "learning_rate": 7.3866489196971605e-06,
      "loss": 0.0194,
      "step": 813220
    },
    {
      "epoch": 1.3308850965220635,
      "grad_norm": 0.4028886556625366,
      "learning_rate": 7.386583027483642e-06,
      "loss": 0.0212,
      "step": 813240
    },
    {
      "epoch": 1.330917826960717,
      "grad_norm": 1.3738162517547607,
      "learning_rate": 7.386517135270126e-06,
      "loss": 0.0203,
      "step": 813260
    },
    {
      "epoch": 1.3309505573993703,
      "grad_norm": 1.1392204761505127,
      "learning_rate": 7.386451243056608e-06,
      "loss": 0.0223,
      "step": 813280
    },
    {
      "epoch": 1.3309832878380237,
      "grad_norm": 0.16585499048233032,
      "learning_rate": 7.386385350843091e-06,
      "loss": 0.0231,
      "step": 813300
    },
    {
      "epoch": 1.331016018276677,
      "grad_norm": 0.5453222990036011,
      "learning_rate": 7.386319458629574e-06,
      "loss": 0.0214,
      "step": 813320
    },
    {
      "epoch": 1.3310487487153302,
      "grad_norm": 1.539028286933899,
      "learning_rate": 7.386253566416057e-06,
      "loss": 0.0156,
      "step": 813340
    },
    {
      "epoch": 1.3310814791539836,
      "grad_norm": 0.1153009682893753,
      "learning_rate": 7.38618767420254e-06,
      "loss": 0.0113,
      "step": 813360
    },
    {
      "epoch": 1.331114209592637,
      "grad_norm": 1.145740270614624,
      "learning_rate": 7.386121781989023e-06,
      "loss": 0.0171,
      "step": 813380
    },
    {
      "epoch": 1.3311469400312903,
      "grad_norm": 0.47988444566726685,
      "learning_rate": 7.386055889775506e-06,
      "loss": 0.0231,
      "step": 813400
    },
    {
      "epoch": 1.3311796704699437,
      "grad_norm": 0.7088256478309631,
      "learning_rate": 7.385989997561989e-06,
      "loss": 0.0155,
      "step": 813420
    },
    {
      "epoch": 1.331212400908597,
      "grad_norm": 0.08358785510063171,
      "learning_rate": 7.385924105348472e-06,
      "loss": 0.0325,
      "step": 813440
    },
    {
      "epoch": 1.3312451313472504,
      "grad_norm": 0.662327229976654,
      "learning_rate": 7.385858213134954e-06,
      "loss": 0.0315,
      "step": 813460
    },
    {
      "epoch": 1.3312778617859036,
      "grad_norm": 1.133179783821106,
      "learning_rate": 7.385792320921438e-06,
      "loss": 0.0238,
      "step": 813480
    },
    {
      "epoch": 1.331310592224557,
      "grad_norm": 0.6688440442085266,
      "learning_rate": 7.38572642870792e-06,
      "loss": 0.0191,
      "step": 813500
    },
    {
      "epoch": 1.3313433226632103,
      "grad_norm": 0.1455693244934082,
      "learning_rate": 7.385660536494403e-06,
      "loss": 0.0201,
      "step": 813520
    },
    {
      "epoch": 1.3313760531018637,
      "grad_norm": 0.19154061377048492,
      "learning_rate": 7.385594644280886e-06,
      "loss": 0.0218,
      "step": 813540
    },
    {
      "epoch": 1.331408783540517,
      "grad_norm": 0.7279914021492004,
      "learning_rate": 7.385528752067369e-06,
      "loss": 0.0186,
      "step": 813560
    },
    {
      "epoch": 1.3314415139791702,
      "grad_norm": 0.4253983199596405,
      "learning_rate": 7.3854628598538515e-06,
      "loss": 0.0205,
      "step": 813580
    },
    {
      "epoch": 1.3314742444178238,
      "grad_norm": 0.5872694253921509,
      "learning_rate": 7.385396967640335e-06,
      "loss": 0.0192,
      "step": 813600
    },
    {
      "epoch": 1.331506974856477,
      "grad_norm": 0.9544572830200195,
      "learning_rate": 7.385331075426817e-06,
      "loss": 0.0252,
      "step": 813620
    },
    {
      "epoch": 1.3315397052951303,
      "grad_norm": 0.5262133479118347,
      "learning_rate": 7.3852651832133005e-06,
      "loss": 0.0265,
      "step": 813640
    },
    {
      "epoch": 1.3315724357337837,
      "grad_norm": 0.18713140487670898,
      "learning_rate": 7.385199290999782e-06,
      "loss": 0.023,
      "step": 813660
    },
    {
      "epoch": 1.331605166172437,
      "grad_norm": 0.5854287147521973,
      "learning_rate": 7.385133398786266e-06,
      "loss": 0.0269,
      "step": 813680
    },
    {
      "epoch": 1.3316378966110904,
      "grad_norm": 0.6259302496910095,
      "learning_rate": 7.385067506572749e-06,
      "loss": 0.0277,
      "step": 813700
    },
    {
      "epoch": 1.3316706270497436,
      "grad_norm": 0.5712406039237976,
      "learning_rate": 7.3850016143592315e-06,
      "loss": 0.0201,
      "step": 813720
    },
    {
      "epoch": 1.3317033574883972,
      "grad_norm": 1.29335618019104,
      "learning_rate": 7.384935722145715e-06,
      "loss": 0.0264,
      "step": 813740
    },
    {
      "epoch": 1.3317360879270503,
      "grad_norm": 0.9675992131233215,
      "learning_rate": 7.384869829932198e-06,
      "loss": 0.0294,
      "step": 813760
    },
    {
      "epoch": 1.3317688183657037,
      "grad_norm": 1.4300347566604614,
      "learning_rate": 7.3848039377186806e-06,
      "loss": 0.0242,
      "step": 813780
    },
    {
      "epoch": 1.331801548804357,
      "grad_norm": 0.7360543012619019,
      "learning_rate": 7.384738045505163e-06,
      "loss": 0.0284,
      "step": 813800
    },
    {
      "epoch": 1.3318342792430105,
      "grad_norm": 1.2729549407958984,
      "learning_rate": 7.384672153291647e-06,
      "loss": 0.0239,
      "step": 813820
    },
    {
      "epoch": 1.3318670096816638,
      "grad_norm": 0.5272091031074524,
      "learning_rate": 7.384606261078129e-06,
      "loss": 0.0152,
      "step": 813840
    },
    {
      "epoch": 1.331899740120317,
      "grad_norm": 0.7656663656234741,
      "learning_rate": 7.384540368864612e-06,
      "loss": 0.0195,
      "step": 813860
    },
    {
      "epoch": 1.3319324705589703,
      "grad_norm": 1.612749695777893,
      "learning_rate": 7.384474476651094e-06,
      "loss": 0.0172,
      "step": 813880
    },
    {
      "epoch": 1.3319652009976237,
      "grad_norm": 0.5071589350700378,
      "learning_rate": 7.384408584437578e-06,
      "loss": 0.0222,
      "step": 813900
    },
    {
      "epoch": 1.331997931436277,
      "grad_norm": 1.9327384233474731,
      "learning_rate": 7.384342692224061e-06,
      "loss": 0.0226,
      "step": 813920
    },
    {
      "epoch": 1.3320306618749305,
      "grad_norm": 0.9115275144577026,
      "learning_rate": 7.384276800010543e-06,
      "loss": 0.0344,
      "step": 813940
    },
    {
      "epoch": 1.3320633923135838,
      "grad_norm": 0.3065817952156067,
      "learning_rate": 7.384210907797026e-06,
      "loss": 0.0251,
      "step": 813960
    },
    {
      "epoch": 1.3320961227522372,
      "grad_norm": 1.5466337203979492,
      "learning_rate": 7.38414501558351e-06,
      "loss": 0.0348,
      "step": 813980
    },
    {
      "epoch": 1.3321288531908904,
      "grad_norm": 0.3356611728668213,
      "learning_rate": 7.3840791233699915e-06,
      "loss": 0.0224,
      "step": 814000
    },
    {
      "epoch": 1.3321615836295437,
      "grad_norm": 0.903788685798645,
      "learning_rate": 7.384013231156475e-06,
      "loss": 0.0223,
      "step": 814020
    },
    {
      "epoch": 1.332194314068197,
      "grad_norm": 1.060143232345581,
      "learning_rate": 7.383947338942957e-06,
      "loss": 0.0157,
      "step": 814040
    },
    {
      "epoch": 1.3322270445068505,
      "grad_norm": 0.6060172319412231,
      "learning_rate": 7.383881446729441e-06,
      "loss": 0.0275,
      "step": 814060
    },
    {
      "epoch": 1.3322597749455038,
      "grad_norm": 0.61979740858078,
      "learning_rate": 7.3838155545159225e-06,
      "loss": 0.0191,
      "step": 814080
    },
    {
      "epoch": 1.3322925053841572,
      "grad_norm": 0.8182001113891602,
      "learning_rate": 7.383749662302406e-06,
      "loss": 0.0179,
      "step": 814100
    },
    {
      "epoch": 1.3323252358228106,
      "grad_norm": 0.43357279896736145,
      "learning_rate": 7.38368377008889e-06,
      "loss": 0.0173,
      "step": 814120
    },
    {
      "epoch": 1.3323579662614637,
      "grad_norm": 0.7690303325653076,
      "learning_rate": 7.3836178778753716e-06,
      "loss": 0.0173,
      "step": 814140
    },
    {
      "epoch": 1.3323906967001171,
      "grad_norm": 0.3780694603919983,
      "learning_rate": 7.383551985661855e-06,
      "loss": 0.0234,
      "step": 814160
    },
    {
      "epoch": 1.3324234271387705,
      "grad_norm": 0.3292315602302551,
      "learning_rate": 7.383486093448338e-06,
      "loss": 0.0204,
      "step": 814180
    },
    {
      "epoch": 1.3324561575774239,
      "grad_norm": 0.4886549115180969,
      "learning_rate": 7.3834202012348215e-06,
      "loss": 0.0259,
      "step": 814200
    },
    {
      "epoch": 1.3324888880160772,
      "grad_norm": 0.19011642038822174,
      "learning_rate": 7.383354309021303e-06,
      "loss": 0.0175,
      "step": 814220
    },
    {
      "epoch": 1.3325216184547306,
      "grad_norm": 0.18824586272239685,
      "learning_rate": 7.383288416807787e-06,
      "loss": 0.0231,
      "step": 814240
    },
    {
      "epoch": 1.332554348893384,
      "grad_norm": 0.6286907196044922,
      "learning_rate": 7.383222524594269e-06,
      "loss": 0.0235,
      "step": 814260
    },
    {
      "epoch": 1.3325870793320371,
      "grad_norm": 0.47937366366386414,
      "learning_rate": 7.3831566323807524e-06,
      "loss": 0.0214,
      "step": 814280
    },
    {
      "epoch": 1.3326198097706905,
      "grad_norm": 1.7253659963607788,
      "learning_rate": 7.383090740167234e-06,
      "loss": 0.0259,
      "step": 814300
    },
    {
      "epoch": 1.3326525402093439,
      "grad_norm": 0.8142585158348083,
      "learning_rate": 7.383024847953718e-06,
      "loss": 0.0133,
      "step": 814320
    },
    {
      "epoch": 1.3326852706479972,
      "grad_norm": 0.21942909061908722,
      "learning_rate": 7.382958955740201e-06,
      "loss": 0.0204,
      "step": 814340
    },
    {
      "epoch": 1.3327180010866506,
      "grad_norm": 0.23044781386852264,
      "learning_rate": 7.382893063526683e-06,
      "loss": 0.0185,
      "step": 814360
    },
    {
      "epoch": 1.3327507315253038,
      "grad_norm": 1.4384135007858276,
      "learning_rate": 7.382827171313166e-06,
      "loss": 0.019,
      "step": 814380
    },
    {
      "epoch": 1.3327834619639574,
      "grad_norm": 0.22353748977184296,
      "learning_rate": 7.38276127909965e-06,
      "loss": 0.0239,
      "step": 814400
    },
    {
      "epoch": 1.3328161924026105,
      "grad_norm": 0.20580343902111053,
      "learning_rate": 7.382695386886132e-06,
      "loss": 0.0283,
      "step": 814420
    },
    {
      "epoch": 1.3328489228412639,
      "grad_norm": 0.3212481737136841,
      "learning_rate": 7.382629494672615e-06,
      "loss": 0.0228,
      "step": 814440
    },
    {
      "epoch": 1.3328816532799173,
      "grad_norm": 1.4257937669754028,
      "learning_rate": 7.382563602459099e-06,
      "loss": 0.0266,
      "step": 814460
    },
    {
      "epoch": 1.3329143837185706,
      "grad_norm": 1.2713185548782349,
      "learning_rate": 7.382497710245581e-06,
      "loss": 0.0233,
      "step": 814480
    },
    {
      "epoch": 1.332947114157224,
      "grad_norm": 0.3702218234539032,
      "learning_rate": 7.382431818032064e-06,
      "loss": 0.0206,
      "step": 814500
    },
    {
      "epoch": 1.3329798445958772,
      "grad_norm": 0.19571028649806976,
      "learning_rate": 7.382365925818546e-06,
      "loss": 0.0183,
      "step": 814520
    },
    {
      "epoch": 1.3330125750345307,
      "grad_norm": 0.969355046749115,
      "learning_rate": 7.38230003360503e-06,
      "loss": 0.022,
      "step": 814540
    },
    {
      "epoch": 1.333045305473184,
      "grad_norm": 0.18445640802383423,
      "learning_rate": 7.3822341413915125e-06,
      "loss": 0.0178,
      "step": 814560
    },
    {
      "epoch": 1.3330780359118373,
      "grad_norm": 0.4946075677871704,
      "learning_rate": 7.382168249177995e-06,
      "loss": 0.0201,
      "step": 814580
    },
    {
      "epoch": 1.3331107663504906,
      "grad_norm": 0.5331318378448486,
      "learning_rate": 7.382102356964478e-06,
      "loss": 0.0162,
      "step": 814600
    },
    {
      "epoch": 1.333143496789144,
      "grad_norm": 0.21384671330451965,
      "learning_rate": 7.3820364647509616e-06,
      "loss": 0.0154,
      "step": 814620
    },
    {
      "epoch": 1.3331762272277974,
      "grad_norm": 0.7506045699119568,
      "learning_rate": 7.3819705725374434e-06,
      "loss": 0.025,
      "step": 814640
    },
    {
      "epoch": 1.3332089576664505,
      "grad_norm": 0.7991365790367126,
      "learning_rate": 7.381904680323927e-06,
      "loss": 0.026,
      "step": 814660
    },
    {
      "epoch": 1.333241688105104,
      "grad_norm": 0.6212940216064453,
      "learning_rate": 7.381838788110409e-06,
      "loss": 0.015,
      "step": 814680
    },
    {
      "epoch": 1.3332744185437573,
      "grad_norm": 0.6575788259506226,
      "learning_rate": 7.3817728958968925e-06,
      "loss": 0.0133,
      "step": 814700
    },
    {
      "epoch": 1.3333071489824107,
      "grad_norm": 0.4464220702648163,
      "learning_rate": 7.381707003683375e-06,
      "loss": 0.0169,
      "step": 814720
    },
    {
      "epoch": 1.333339879421064,
      "grad_norm": 0.5627454519271851,
      "learning_rate": 7.381641111469858e-06,
      "loss": 0.0164,
      "step": 814740
    },
    {
      "epoch": 1.3333726098597174,
      "grad_norm": 0.43113526701927185,
      "learning_rate": 7.381575219256341e-06,
      "loss": 0.0175,
      "step": 814760
    },
    {
      "epoch": 1.3334053402983708,
      "grad_norm": 0.48433127999305725,
      "learning_rate": 7.381509327042824e-06,
      "loss": 0.0235,
      "step": 814780
    },
    {
      "epoch": 1.333438070737024,
      "grad_norm": 0.29652026295661926,
      "learning_rate": 7.381443434829307e-06,
      "loss": 0.0229,
      "step": 814800
    },
    {
      "epoch": 1.3334708011756773,
      "grad_norm": 0.4937114715576172,
      "learning_rate": 7.38137754261579e-06,
      "loss": 0.0219,
      "step": 814820
    },
    {
      "epoch": 1.3335035316143307,
      "grad_norm": 0.43167632818222046,
      "learning_rate": 7.381311650402273e-06,
      "loss": 0.0214,
      "step": 814840
    },
    {
      "epoch": 1.333536262052984,
      "grad_norm": 1.3252876996994019,
      "learning_rate": 7.381245758188755e-06,
      "loss": 0.0258,
      "step": 814860
    },
    {
      "epoch": 1.3335689924916374,
      "grad_norm": 0.5628641843795776,
      "learning_rate": 7.381179865975239e-06,
      "loss": 0.0242,
      "step": 814880
    },
    {
      "epoch": 1.3336017229302908,
      "grad_norm": 0.8072249293327332,
      "learning_rate": 7.381113973761721e-06,
      "loss": 0.0336,
      "step": 814900
    },
    {
      "epoch": 1.3336344533689442,
      "grad_norm": 0.36553823947906494,
      "learning_rate": 7.381048081548204e-06,
      "loss": 0.017,
      "step": 814920
    },
    {
      "epoch": 1.3336671838075973,
      "grad_norm": 1.3935561180114746,
      "learning_rate": 7.380982189334687e-06,
      "loss": 0.0256,
      "step": 814940
    },
    {
      "epoch": 1.3336999142462507,
      "grad_norm": 0.31869322061538696,
      "learning_rate": 7.38091629712117e-06,
      "loss": 0.0237,
      "step": 814960
    },
    {
      "epoch": 1.333732644684904,
      "grad_norm": 0.6547662019729614,
      "learning_rate": 7.3808504049076526e-06,
      "loss": 0.0167,
      "step": 814980
    },
    {
      "epoch": 1.3337653751235574,
      "grad_norm": 0.879270613193512,
      "learning_rate": 7.380784512694136e-06,
      "loss": 0.0178,
      "step": 815000
    },
    {
      "epoch": 1.3337981055622108,
      "grad_norm": 0.4743729531764984,
      "learning_rate": 7.380718620480618e-06,
      "loss": 0.0172,
      "step": 815020
    },
    {
      "epoch": 1.3338308360008642,
      "grad_norm": 3.193464756011963,
      "learning_rate": 7.380652728267102e-06,
      "loss": 0.0221,
      "step": 815040
    },
    {
      "epoch": 1.3338635664395175,
      "grad_norm": 0.3727266490459442,
      "learning_rate": 7.3805868360535835e-06,
      "loss": 0.0242,
      "step": 815060
    },
    {
      "epoch": 1.3338962968781707,
      "grad_norm": 0.6421516537666321,
      "learning_rate": 7.380520943840067e-06,
      "loss": 0.0333,
      "step": 815080
    },
    {
      "epoch": 1.333929027316824,
      "grad_norm": 0.3024161756038666,
      "learning_rate": 7.380455051626549e-06,
      "loss": 0.0179,
      "step": 815100
    },
    {
      "epoch": 1.3339617577554774,
      "grad_norm": 0.15014222264289856,
      "learning_rate": 7.380389159413033e-06,
      "loss": 0.0218,
      "step": 815120
    },
    {
      "epoch": 1.3339944881941308,
      "grad_norm": 0.6405176520347595,
      "learning_rate": 7.380323267199515e-06,
      "loss": 0.0236,
      "step": 815140
    },
    {
      "epoch": 1.3340272186327842,
      "grad_norm": 1.0985589027404785,
      "learning_rate": 7.380257374985998e-06,
      "loss": 0.0194,
      "step": 815160
    },
    {
      "epoch": 1.3340599490714373,
      "grad_norm": 0.3846175968647003,
      "learning_rate": 7.380191482772482e-06,
      "loss": 0.0181,
      "step": 815180
    },
    {
      "epoch": 1.334092679510091,
      "grad_norm": 2.752042770385742,
      "learning_rate": 7.380125590558964e-06,
      "loss": 0.0302,
      "step": 815200
    },
    {
      "epoch": 1.334125409948744,
      "grad_norm": 0.33089324831962585,
      "learning_rate": 7.380059698345447e-06,
      "loss": 0.0279,
      "step": 815220
    },
    {
      "epoch": 1.3341581403873974,
      "grad_norm": 0.8225455284118652,
      "learning_rate": 7.37999380613193e-06,
      "loss": 0.0389,
      "step": 815240
    },
    {
      "epoch": 1.3341908708260508,
      "grad_norm": 0.800957441329956,
      "learning_rate": 7.3799279139184135e-06,
      "loss": 0.0273,
      "step": 815260
    },
    {
      "epoch": 1.3342236012647042,
      "grad_norm": 0.6457996368408203,
      "learning_rate": 7.379862021704895e-06,
      "loss": 0.0235,
      "step": 815280
    },
    {
      "epoch": 1.3342563317033576,
      "grad_norm": 0.11145327985286713,
      "learning_rate": 7.379796129491379e-06,
      "loss": 0.0147,
      "step": 815300
    },
    {
      "epoch": 1.3342890621420107,
      "grad_norm": 0.3470361530780792,
      "learning_rate": 7.379730237277861e-06,
      "loss": 0.0185,
      "step": 815320
    },
    {
      "epoch": 1.3343217925806643,
      "grad_norm": 0.42987513542175293,
      "learning_rate": 7.379664345064344e-06,
      "loss": 0.015,
      "step": 815340
    },
    {
      "epoch": 1.3343545230193175,
      "grad_norm": 0.4700966775417328,
      "learning_rate": 7.379598452850827e-06,
      "loss": 0.0302,
      "step": 815360
    },
    {
      "epoch": 1.3343872534579708,
      "grad_norm": 1.1235754489898682,
      "learning_rate": 7.37953256063731e-06,
      "loss": 0.0282,
      "step": 815380
    },
    {
      "epoch": 1.3344199838966242,
      "grad_norm": 0.30321159958839417,
      "learning_rate": 7.379466668423793e-06,
      "loss": 0.0264,
      "step": 815400
    },
    {
      "epoch": 1.3344527143352776,
      "grad_norm": 1.7419918775558472,
      "learning_rate": 7.379400776210276e-06,
      "loss": 0.0196,
      "step": 815420
    },
    {
      "epoch": 1.334485444773931,
      "grad_norm": 0.9013688564300537,
      "learning_rate": 7.379334883996758e-06,
      "loss": 0.0154,
      "step": 815440
    },
    {
      "epoch": 1.334518175212584,
      "grad_norm": 0.31128573417663574,
      "learning_rate": 7.379268991783242e-06,
      "loss": 0.0313,
      "step": 815460
    },
    {
      "epoch": 1.3345509056512375,
      "grad_norm": 0.505806028842926,
      "learning_rate": 7.379203099569724e-06,
      "loss": 0.0203,
      "step": 815480
    },
    {
      "epoch": 1.3345836360898908,
      "grad_norm": 0.38923248648643494,
      "learning_rate": 7.379137207356207e-06,
      "loss": 0.027,
      "step": 815500
    },
    {
      "epoch": 1.3346163665285442,
      "grad_norm": 0.33407506346702576,
      "learning_rate": 7.379071315142691e-06,
      "loss": 0.0241,
      "step": 815520
    },
    {
      "epoch": 1.3346490969671976,
      "grad_norm": 0.7746376395225525,
      "learning_rate": 7.379005422929173e-06,
      "loss": 0.034,
      "step": 815540
    },
    {
      "epoch": 1.334681827405851,
      "grad_norm": 1.211319923400879,
      "learning_rate": 7.378939530715656e-06,
      "loss": 0.0305,
      "step": 815560
    },
    {
      "epoch": 1.3347145578445043,
      "grad_norm": 0.1468556970357895,
      "learning_rate": 7.378873638502139e-06,
      "loss": 0.0221,
      "step": 815580
    },
    {
      "epoch": 1.3347472882831575,
      "grad_norm": 0.43316587805747986,
      "learning_rate": 7.378807746288622e-06,
      "loss": 0.0169,
      "step": 815600
    },
    {
      "epoch": 1.3347800187218108,
      "grad_norm": 0.4349217116832733,
      "learning_rate": 7.3787418540751045e-06,
      "loss": 0.0236,
      "step": 815620
    },
    {
      "epoch": 1.3348127491604642,
      "grad_norm": 0.5131971836090088,
      "learning_rate": 7.378675961861588e-06,
      "loss": 0.0137,
      "step": 815640
    },
    {
      "epoch": 1.3348454795991176,
      "grad_norm": 0.2953258752822876,
      "learning_rate": 7.37861006964807e-06,
      "loss": 0.0235,
      "step": 815660
    },
    {
      "epoch": 1.334878210037771,
      "grad_norm": 0.6640782952308655,
      "learning_rate": 7.3785441774345535e-06,
      "loss": 0.0247,
      "step": 815680
    },
    {
      "epoch": 1.3349109404764243,
      "grad_norm": 0.3465709984302521,
      "learning_rate": 7.3784782852210354e-06,
      "loss": 0.0226,
      "step": 815700
    },
    {
      "epoch": 1.3349436709150777,
      "grad_norm": 0.33896249532699585,
      "learning_rate": 7.378412393007519e-06,
      "loss": 0.0163,
      "step": 815720
    },
    {
      "epoch": 1.3349764013537309,
      "grad_norm": 0.6694810390472412,
      "learning_rate": 7.378346500794002e-06,
      "loss": 0.0159,
      "step": 815740
    },
    {
      "epoch": 1.3350091317923842,
      "grad_norm": 0.32923564314842224,
      "learning_rate": 7.3782806085804845e-06,
      "loss": 0.0188,
      "step": 815760
    },
    {
      "epoch": 1.3350418622310376,
      "grad_norm": 0.32135438919067383,
      "learning_rate": 7.378214716366967e-06,
      "loss": 0.0226,
      "step": 815780
    },
    {
      "epoch": 1.335074592669691,
      "grad_norm": 0.584979236125946,
      "learning_rate": 7.378148824153451e-06,
      "loss": 0.0195,
      "step": 815800
    },
    {
      "epoch": 1.3351073231083443,
      "grad_norm": 0.20840193331241608,
      "learning_rate": 7.378082931939933e-06,
      "loss": 0.0183,
      "step": 815820
    },
    {
      "epoch": 1.3351400535469977,
      "grad_norm": 0.40287265181541443,
      "learning_rate": 7.378017039726416e-06,
      "loss": 0.0179,
      "step": 815840
    },
    {
      "epoch": 1.335172783985651,
      "grad_norm": 0.2605791985988617,
      "learning_rate": 7.3779511475129e-06,
      "loss": 0.0219,
      "step": 815860
    },
    {
      "epoch": 1.3352055144243042,
      "grad_norm": 0.5256694555282593,
      "learning_rate": 7.377885255299382e-06,
      "loss": 0.0225,
      "step": 815880
    },
    {
      "epoch": 1.3352382448629576,
      "grad_norm": 0.49969181418418884,
      "learning_rate": 7.377819363085865e-06,
      "loss": 0.0182,
      "step": 815900
    },
    {
      "epoch": 1.335270975301611,
      "grad_norm": 0.5837072134017944,
      "learning_rate": 7.377753470872347e-06,
      "loss": 0.0225,
      "step": 815920
    },
    {
      "epoch": 1.3353037057402644,
      "grad_norm": 1.0007920265197754,
      "learning_rate": 7.377687578658831e-06,
      "loss": 0.0154,
      "step": 815940
    },
    {
      "epoch": 1.3353364361789177,
      "grad_norm": 0.46690061688423157,
      "learning_rate": 7.377621686445314e-06,
      "loss": 0.0154,
      "step": 815960
    },
    {
      "epoch": 1.3353691666175709,
      "grad_norm": 0.22261512279510498,
      "learning_rate": 7.377555794231796e-06,
      "loss": 0.022,
      "step": 815980
    },
    {
      "epoch": 1.3354018970562245,
      "grad_norm": 1.0566353797912598,
      "learning_rate": 7.377489902018279e-06,
      "loss": 0.0243,
      "step": 816000
    },
    {
      "epoch": 1.3354346274948776,
      "grad_norm": 0.701598584651947,
      "learning_rate": 7.377424009804763e-06,
      "loss": 0.0163,
      "step": 816020
    },
    {
      "epoch": 1.335467357933531,
      "grad_norm": 1.1396129131317139,
      "learning_rate": 7.3773581175912445e-06,
      "loss": 0.0247,
      "step": 816040
    },
    {
      "epoch": 1.3355000883721844,
      "grad_norm": 1.6737803220748901,
      "learning_rate": 7.377292225377728e-06,
      "loss": 0.0219,
      "step": 816060
    },
    {
      "epoch": 1.3355328188108377,
      "grad_norm": 0.3175252377986908,
      "learning_rate": 7.37722633316421e-06,
      "loss": 0.0196,
      "step": 816080
    },
    {
      "epoch": 1.3355655492494911,
      "grad_norm": 1.3457398414611816,
      "learning_rate": 7.377160440950694e-06,
      "loss": 0.0205,
      "step": 816100
    },
    {
      "epoch": 1.3355982796881443,
      "grad_norm": 9.707791328430176,
      "learning_rate": 7.3770945487371755e-06,
      "loss": 0.0288,
      "step": 816120
    },
    {
      "epoch": 1.3356310101267979,
      "grad_norm": 0.37707117199897766,
      "learning_rate": 7.377028656523659e-06,
      "loss": 0.018,
      "step": 816140
    },
    {
      "epoch": 1.335663740565451,
      "grad_norm": 0.46137505769729614,
      "learning_rate": 7.376962764310142e-06,
      "loss": 0.0266,
      "step": 816160
    },
    {
      "epoch": 1.3356964710041044,
      "grad_norm": 0.7437238693237305,
      "learning_rate": 7.3768968720966246e-06,
      "loss": 0.0158,
      "step": 816180
    },
    {
      "epoch": 1.3357292014427578,
      "grad_norm": 0.6005284190177917,
      "learning_rate": 7.376830979883108e-06,
      "loss": 0.0213,
      "step": 816200
    },
    {
      "epoch": 1.3357619318814111,
      "grad_norm": 0.8463550209999084,
      "learning_rate": 7.376765087669591e-06,
      "loss": 0.0183,
      "step": 816220
    },
    {
      "epoch": 1.3357946623200645,
      "grad_norm": 0.9150984883308411,
      "learning_rate": 7.376699195456074e-06,
      "loss": 0.0259,
      "step": 816240
    },
    {
      "epoch": 1.3358273927587176,
      "grad_norm": 0.7917354702949524,
      "learning_rate": 7.376633303242556e-06,
      "loss": 0.033,
      "step": 816260
    },
    {
      "epoch": 1.335860123197371,
      "grad_norm": 0.4382551908493042,
      "learning_rate": 7.37656741102904e-06,
      "loss": 0.0191,
      "step": 816280
    },
    {
      "epoch": 1.3358928536360244,
      "grad_norm": 0.5146324038505554,
      "learning_rate": 7.376501518815522e-06,
      "loss": 0.0205,
      "step": 816300
    },
    {
      "epoch": 1.3359255840746778,
      "grad_norm": 0.7557655572891235,
      "learning_rate": 7.3764356266020054e-06,
      "loss": 0.0267,
      "step": 816320
    },
    {
      "epoch": 1.3359583145133311,
      "grad_norm": 1.2857078313827515,
      "learning_rate": 7.376369734388487e-06,
      "loss": 0.0253,
      "step": 816340
    },
    {
      "epoch": 1.3359910449519845,
      "grad_norm": 0.4485262334346771,
      "learning_rate": 7.376303842174971e-06,
      "loss": 0.0235,
      "step": 816360
    },
    {
      "epoch": 1.3360237753906379,
      "grad_norm": 0.6437454223632812,
      "learning_rate": 7.376237949961454e-06,
      "loss": 0.017,
      "step": 816380
    },
    {
      "epoch": 1.336056505829291,
      "grad_norm": 0.2508363723754883,
      "learning_rate": 7.376172057747936e-06,
      "loss": 0.0329,
      "step": 816400
    },
    {
      "epoch": 1.3360892362679444,
      "grad_norm": 0.5100789666175842,
      "learning_rate": 7.376106165534419e-06,
      "loss": 0.0153,
      "step": 816420
    },
    {
      "epoch": 1.3361219667065978,
      "grad_norm": 1.6633716821670532,
      "learning_rate": 7.376040273320903e-06,
      "loss": 0.0247,
      "step": 816440
    },
    {
      "epoch": 1.3361546971452511,
      "grad_norm": 0.7442728281021118,
      "learning_rate": 7.375974381107385e-06,
      "loss": 0.0197,
      "step": 816460
    },
    {
      "epoch": 1.3361874275839045,
      "grad_norm": 0.6279535889625549,
      "learning_rate": 7.375908488893868e-06,
      "loss": 0.0214,
      "step": 816480
    },
    {
      "epoch": 1.336220158022558,
      "grad_norm": 0.9783838391304016,
      "learning_rate": 7.37584259668035e-06,
      "loss": 0.0142,
      "step": 816500
    },
    {
      "epoch": 1.3362528884612113,
      "grad_norm": 1.146905541419983,
      "learning_rate": 7.375776704466834e-06,
      "loss": 0.0257,
      "step": 816520
    },
    {
      "epoch": 1.3362856188998644,
      "grad_norm": 0.724830687046051,
      "learning_rate": 7.3757108122533164e-06,
      "loss": 0.0334,
      "step": 816540
    },
    {
      "epoch": 1.3363183493385178,
      "grad_norm": 0.546923816204071,
      "learning_rate": 7.375644920039799e-06,
      "loss": 0.0142,
      "step": 816560
    },
    {
      "epoch": 1.3363510797771712,
      "grad_norm": 1.4170795679092407,
      "learning_rate": 7.375579027826283e-06,
      "loss": 0.028,
      "step": 816580
    },
    {
      "epoch": 1.3363838102158245,
      "grad_norm": 0.1514674574136734,
      "learning_rate": 7.3755131356127655e-06,
      "loss": 0.0167,
      "step": 816600
    },
    {
      "epoch": 1.336416540654478,
      "grad_norm": 2.3767378330230713,
      "learning_rate": 7.375447243399248e-06,
      "loss": 0.0186,
      "step": 816620
    },
    {
      "epoch": 1.336449271093131,
      "grad_norm": 0.8585392832756042,
      "learning_rate": 7.375381351185731e-06,
      "loss": 0.0303,
      "step": 816640
    },
    {
      "epoch": 1.3364820015317846,
      "grad_norm": 0.5034711956977844,
      "learning_rate": 7.3753154589722146e-06,
      "loss": 0.0249,
      "step": 816660
    },
    {
      "epoch": 1.3365147319704378,
      "grad_norm": 0.9313043355941772,
      "learning_rate": 7.3752495667586965e-06,
      "loss": 0.0183,
      "step": 816680
    },
    {
      "epoch": 1.3365474624090912,
      "grad_norm": 0.30590254068374634,
      "learning_rate": 7.37518367454518e-06,
      "loss": 0.0222,
      "step": 816700
    },
    {
      "epoch": 1.3365801928477445,
      "grad_norm": 0.7258192300796509,
      "learning_rate": 7.375117782331662e-06,
      "loss": 0.0311,
      "step": 816720
    },
    {
      "epoch": 1.336612923286398,
      "grad_norm": 0.6539479494094849,
      "learning_rate": 7.3750518901181455e-06,
      "loss": 0.0195,
      "step": 816740
    },
    {
      "epoch": 1.3366456537250513,
      "grad_norm": 0.9578776955604553,
      "learning_rate": 7.374985997904628e-06,
      "loss": 0.0212,
      "step": 816760
    },
    {
      "epoch": 1.3366783841637044,
      "grad_norm": 0.8705077767372131,
      "learning_rate": 7.374920105691111e-06,
      "loss": 0.0357,
      "step": 816780
    },
    {
      "epoch": 1.336711114602358,
      "grad_norm": 0.3885558247566223,
      "learning_rate": 7.374854213477594e-06,
      "loss": 0.0182,
      "step": 816800
    },
    {
      "epoch": 1.3367438450410112,
      "grad_norm": 0.3918088972568512,
      "learning_rate": 7.374788321264077e-06,
      "loss": 0.0202,
      "step": 816820
    },
    {
      "epoch": 1.3367765754796646,
      "grad_norm": 0.8972225189208984,
      "learning_rate": 7.374722429050559e-06,
      "loss": 0.0169,
      "step": 816840
    },
    {
      "epoch": 1.336809305918318,
      "grad_norm": 0.7412446737289429,
      "learning_rate": 7.374656536837043e-06,
      "loss": 0.0192,
      "step": 816860
    },
    {
      "epoch": 1.3368420363569713,
      "grad_norm": 0.6559690237045288,
      "learning_rate": 7.374590644623525e-06,
      "loss": 0.0217,
      "step": 816880
    },
    {
      "epoch": 1.3368747667956247,
      "grad_norm": 0.33971765637397766,
      "learning_rate": 7.374524752410008e-06,
      "loss": 0.0191,
      "step": 816900
    },
    {
      "epoch": 1.3369074972342778,
      "grad_norm": 1.348724365234375,
      "learning_rate": 7.374458860196492e-06,
      "loss": 0.0158,
      "step": 816920
    },
    {
      "epoch": 1.3369402276729312,
      "grad_norm": 0.48660919070243835,
      "learning_rate": 7.374392967982974e-06,
      "loss": 0.0263,
      "step": 816940
    },
    {
      "epoch": 1.3369729581115846,
      "grad_norm": 0.5543144941329956,
      "learning_rate": 7.374327075769457e-06,
      "loss": 0.0256,
      "step": 816960
    },
    {
      "epoch": 1.337005688550238,
      "grad_norm": 1.1756023168563843,
      "learning_rate": 7.37426118355594e-06,
      "loss": 0.0281,
      "step": 816980
    },
    {
      "epoch": 1.3370384189888913,
      "grad_norm": 0.8971319794654846,
      "learning_rate": 7.374195291342423e-06,
      "loss": 0.0206,
      "step": 817000
    },
    {
      "epoch": 1.3370711494275447,
      "grad_norm": 0.15170474350452423,
      "learning_rate": 7.3741293991289056e-06,
      "loss": 0.0222,
      "step": 817020
    },
    {
      "epoch": 1.337103879866198,
      "grad_norm": 0.44599029421806335,
      "learning_rate": 7.374063506915389e-06,
      "loss": 0.0281,
      "step": 817040
    },
    {
      "epoch": 1.3371366103048512,
      "grad_norm": 1.0589853525161743,
      "learning_rate": 7.373997614701871e-06,
      "loss": 0.0189,
      "step": 817060
    },
    {
      "epoch": 1.3371693407435046,
      "grad_norm": 0.6811834573745728,
      "learning_rate": 7.373931722488355e-06,
      "loss": 0.0254,
      "step": 817080
    },
    {
      "epoch": 1.337202071182158,
      "grad_norm": 0.8424296975135803,
      "learning_rate": 7.3738658302748365e-06,
      "loss": 0.0206,
      "step": 817100
    },
    {
      "epoch": 1.3372348016208113,
      "grad_norm": 0.7397897839546204,
      "learning_rate": 7.37379993806132e-06,
      "loss": 0.0175,
      "step": 817120
    },
    {
      "epoch": 1.3372675320594647,
      "grad_norm": 0.4339931011199951,
      "learning_rate": 7.373734045847802e-06,
      "loss": 0.0223,
      "step": 817140
    },
    {
      "epoch": 1.337300262498118,
      "grad_norm": 0.4176081120967865,
      "learning_rate": 7.373668153634286e-06,
      "loss": 0.0315,
      "step": 817160
    },
    {
      "epoch": 1.3373329929367714,
      "grad_norm": 0.5018101930618286,
      "learning_rate": 7.373602261420768e-06,
      "loss": 0.0239,
      "step": 817180
    },
    {
      "epoch": 1.3373657233754246,
      "grad_norm": 4.99236536026001,
      "learning_rate": 7.373536369207251e-06,
      "loss": 0.0264,
      "step": 817200
    },
    {
      "epoch": 1.337398453814078,
      "grad_norm": 0.21376337110996246,
      "learning_rate": 7.373470476993734e-06,
      "loss": 0.0305,
      "step": 817220
    },
    {
      "epoch": 1.3374311842527313,
      "grad_norm": 0.7201869487762451,
      "learning_rate": 7.373404584780217e-06,
      "loss": 0.0216,
      "step": 817240
    },
    {
      "epoch": 1.3374639146913847,
      "grad_norm": 0.5044612884521484,
      "learning_rate": 7.3733386925667e-06,
      "loss": 0.0243,
      "step": 817260
    },
    {
      "epoch": 1.337496645130038,
      "grad_norm": 0.8343091607093811,
      "learning_rate": 7.373272800353183e-06,
      "loss": 0.0239,
      "step": 817280
    },
    {
      "epoch": 1.3375293755686914,
      "grad_norm": 0.38179153203964233,
      "learning_rate": 7.3732069081396665e-06,
      "loss": 0.0231,
      "step": 817300
    },
    {
      "epoch": 1.3375621060073448,
      "grad_norm": 5.900489330291748,
      "learning_rate": 7.373141015926148e-06,
      "loss": 0.0363,
      "step": 817320
    },
    {
      "epoch": 1.337594836445998,
      "grad_norm": 0.3369775116443634,
      "learning_rate": 7.373075123712632e-06,
      "loss": 0.0157,
      "step": 817340
    },
    {
      "epoch": 1.3376275668846513,
      "grad_norm": 0.3170492351055145,
      "learning_rate": 7.373009231499114e-06,
      "loss": 0.0252,
      "step": 817360
    },
    {
      "epoch": 1.3376602973233047,
      "grad_norm": 0.6205378770828247,
      "learning_rate": 7.3729433392855974e-06,
      "loss": 0.025,
      "step": 817380
    },
    {
      "epoch": 1.337693027761958,
      "grad_norm": 1.1206576824188232,
      "learning_rate": 7.37287744707208e-06,
      "loss": 0.0275,
      "step": 817400
    },
    {
      "epoch": 1.3377257582006115,
      "grad_norm": 0.6527504920959473,
      "learning_rate": 7.372811554858563e-06,
      "loss": 0.0315,
      "step": 817420
    },
    {
      "epoch": 1.3377584886392646,
      "grad_norm": 0.5142595767974854,
      "learning_rate": 7.372745662645046e-06,
      "loss": 0.0175,
      "step": 817440
    },
    {
      "epoch": 1.3377912190779182,
      "grad_norm": 0.6757146716117859,
      "learning_rate": 7.372679770431529e-06,
      "loss": 0.0113,
      "step": 817460
    },
    {
      "epoch": 1.3378239495165714,
      "grad_norm": 1.6885915994644165,
      "learning_rate": 7.372613878218011e-06,
      "loss": 0.0168,
      "step": 817480
    },
    {
      "epoch": 1.3378566799552247,
      "grad_norm": 2.3531904220581055,
      "learning_rate": 7.372547986004495e-06,
      "loss": 0.0255,
      "step": 817500
    },
    {
      "epoch": 1.337889410393878,
      "grad_norm": 1.2092454433441162,
      "learning_rate": 7.372482093790977e-06,
      "loss": 0.0222,
      "step": 817520
    },
    {
      "epoch": 1.3379221408325315,
      "grad_norm": 0.4532519578933716,
      "learning_rate": 7.37241620157746e-06,
      "loss": 0.0264,
      "step": 817540
    },
    {
      "epoch": 1.3379548712711848,
      "grad_norm": 0.2301502674818039,
      "learning_rate": 7.372350309363943e-06,
      "loss": 0.023,
      "step": 817560
    },
    {
      "epoch": 1.337987601709838,
      "grad_norm": 1.1794666051864624,
      "learning_rate": 7.372284417150426e-06,
      "loss": 0.0239,
      "step": 817580
    },
    {
      "epoch": 1.3380203321484916,
      "grad_norm": 0.1649552881717682,
      "learning_rate": 7.372218524936908e-06,
      "loss": 0.02,
      "step": 817600
    },
    {
      "epoch": 1.3380530625871447,
      "grad_norm": 0.3560428321361542,
      "learning_rate": 7.372152632723392e-06,
      "loss": 0.0212,
      "step": 817620
    },
    {
      "epoch": 1.338085793025798,
      "grad_norm": 0.45684173703193665,
      "learning_rate": 7.372086740509875e-06,
      "loss": 0.022,
      "step": 817640
    },
    {
      "epoch": 1.3381185234644515,
      "grad_norm": 0.697413980960846,
      "learning_rate": 7.3720208482963575e-06,
      "loss": 0.0225,
      "step": 817660
    },
    {
      "epoch": 1.3381512539031049,
      "grad_norm": 0.1721860021352768,
      "learning_rate": 7.371954956082841e-06,
      "loss": 0.0178,
      "step": 817680
    },
    {
      "epoch": 1.3381839843417582,
      "grad_norm": 1.2663401365280151,
      "learning_rate": 7.371889063869323e-06,
      "loss": 0.0311,
      "step": 817700
    },
    {
      "epoch": 1.3382167147804114,
      "grad_norm": 0.13366179168224335,
      "learning_rate": 7.3718231716558065e-06,
      "loss": 0.0241,
      "step": 817720
    },
    {
      "epoch": 1.3382494452190647,
      "grad_norm": 1.1892378330230713,
      "learning_rate": 7.3717572794422884e-06,
      "loss": 0.0166,
      "step": 817740
    },
    {
      "epoch": 1.3382821756577181,
      "grad_norm": 0.20362631976604462,
      "learning_rate": 7.371691387228772e-06,
      "loss": 0.0235,
      "step": 817760
    },
    {
      "epoch": 1.3383149060963715,
      "grad_norm": 0.09961538016796112,
      "learning_rate": 7.371625495015255e-06,
      "loss": 0.0204,
      "step": 817780
    },
    {
      "epoch": 1.3383476365350249,
      "grad_norm": 0.31403130292892456,
      "learning_rate": 7.3715596028017375e-06,
      "loss": 0.0214,
      "step": 817800
    },
    {
      "epoch": 1.3383803669736782,
      "grad_norm": 0.8558411598205566,
      "learning_rate": 7.37149371058822e-06,
      "loss": 0.0191,
      "step": 817820
    },
    {
      "epoch": 1.3384130974123316,
      "grad_norm": 1.4828556776046753,
      "learning_rate": 7.371427818374704e-06,
      "loss": 0.0236,
      "step": 817840
    },
    {
      "epoch": 1.3384458278509848,
      "grad_norm": 0.3080413341522217,
      "learning_rate": 7.371361926161186e-06,
      "loss": 0.0279,
      "step": 817860
    },
    {
      "epoch": 1.3384785582896381,
      "grad_norm": 0.7582132816314697,
      "learning_rate": 7.371296033947669e-06,
      "loss": 0.0216,
      "step": 817880
    },
    {
      "epoch": 1.3385112887282915,
      "grad_norm": 0.5396628975868225,
      "learning_rate": 7.371230141734151e-06,
      "loss": 0.0221,
      "step": 817900
    },
    {
      "epoch": 1.3385440191669449,
      "grad_norm": 0.6025899648666382,
      "learning_rate": 7.371164249520635e-06,
      "loss": 0.0223,
      "step": 817920
    },
    {
      "epoch": 1.3385767496055982,
      "grad_norm": 0.5120956301689148,
      "learning_rate": 7.371098357307117e-06,
      "loss": 0.0282,
      "step": 817940
    },
    {
      "epoch": 1.3386094800442516,
      "grad_norm": 0.5150087475776672,
      "learning_rate": 7.3710324650936e-06,
      "loss": 0.0256,
      "step": 817960
    },
    {
      "epoch": 1.338642210482905,
      "grad_norm": 0.21980179846286774,
      "learning_rate": 7.370966572880084e-06,
      "loss": 0.0189,
      "step": 817980
    },
    {
      "epoch": 1.3386749409215581,
      "grad_norm": 0.14667852222919464,
      "learning_rate": 7.370900680666566e-06,
      "loss": 0.0267,
      "step": 818000
    },
    {
      "epoch": 1.3387076713602115,
      "grad_norm": 0.7922804951667786,
      "learning_rate": 7.370834788453049e-06,
      "loss": 0.0195,
      "step": 818020
    },
    {
      "epoch": 1.3387404017988649,
      "grad_norm": 0.3121371865272522,
      "learning_rate": 7.370768896239532e-06,
      "loss": 0.0233,
      "step": 818040
    },
    {
      "epoch": 1.3387731322375183,
      "grad_norm": 1.3049352169036865,
      "learning_rate": 7.370703004026016e-06,
      "loss": 0.0167,
      "step": 818060
    },
    {
      "epoch": 1.3388058626761716,
      "grad_norm": 0.3736134171485901,
      "learning_rate": 7.3706371118124976e-06,
      "loss": 0.0183,
      "step": 818080
    },
    {
      "epoch": 1.338838593114825,
      "grad_norm": 0.6289310455322266,
      "learning_rate": 7.370571219598981e-06,
      "loss": 0.0272,
      "step": 818100
    },
    {
      "epoch": 1.3388713235534784,
      "grad_norm": 0.5618783831596375,
      "learning_rate": 7.370505327385463e-06,
      "loss": 0.0189,
      "step": 818120
    },
    {
      "epoch": 1.3389040539921315,
      "grad_norm": 1.2458131313323975,
      "learning_rate": 7.370439435171947e-06,
      "loss": 0.0174,
      "step": 818140
    },
    {
      "epoch": 1.338936784430785,
      "grad_norm": 0.3928951621055603,
      "learning_rate": 7.3703735429584285e-06,
      "loss": 0.0268,
      "step": 818160
    },
    {
      "epoch": 1.3389695148694383,
      "grad_norm": 0.5858249664306641,
      "learning_rate": 7.370307650744912e-06,
      "loss": 0.0219,
      "step": 818180
    },
    {
      "epoch": 1.3390022453080916,
      "grad_norm": 0.4111303985118866,
      "learning_rate": 7.370241758531395e-06,
      "loss": 0.0196,
      "step": 818200
    },
    {
      "epoch": 1.339034975746745,
      "grad_norm": 1.726547122001648,
      "learning_rate": 7.370175866317878e-06,
      "loss": 0.0201,
      "step": 818220
    },
    {
      "epoch": 1.3390677061853982,
      "grad_norm": 0.6125084161758423,
      "learning_rate": 7.37010997410436e-06,
      "loss": 0.0273,
      "step": 818240
    },
    {
      "epoch": 1.3391004366240518,
      "grad_norm": 0.4980223774909973,
      "learning_rate": 7.370044081890844e-06,
      "loss": 0.0205,
      "step": 818260
    },
    {
      "epoch": 1.339133167062705,
      "grad_norm": 0.3027137219905853,
      "learning_rate": 7.369978189677326e-06,
      "loss": 0.0231,
      "step": 818280
    },
    {
      "epoch": 1.3391658975013583,
      "grad_norm": 0.37831029295921326,
      "learning_rate": 7.369912297463809e-06,
      "loss": 0.0195,
      "step": 818300
    },
    {
      "epoch": 1.3391986279400117,
      "grad_norm": 0.46014755964279175,
      "learning_rate": 7.369846405250293e-06,
      "loss": 0.018,
      "step": 818320
    },
    {
      "epoch": 1.339231358378665,
      "grad_norm": 0.2712364196777344,
      "learning_rate": 7.369780513036775e-06,
      "loss": 0.0132,
      "step": 818340
    },
    {
      "epoch": 1.3392640888173184,
      "grad_norm": 0.3719572424888611,
      "learning_rate": 7.3697146208232585e-06,
      "loss": 0.0182,
      "step": 818360
    },
    {
      "epoch": 1.3392968192559715,
      "grad_norm": 0.3104315996170044,
      "learning_rate": 7.36964872860974e-06,
      "loss": 0.0144,
      "step": 818380
    },
    {
      "epoch": 1.3393295496946251,
      "grad_norm": 0.7127400636672974,
      "learning_rate": 7.369582836396224e-06,
      "loss": 0.0207,
      "step": 818400
    },
    {
      "epoch": 1.3393622801332783,
      "grad_norm": 1.2969543933868408,
      "learning_rate": 7.369516944182707e-06,
      "loss": 0.0287,
      "step": 818420
    },
    {
      "epoch": 1.3393950105719317,
      "grad_norm": 0.5115625858306885,
      "learning_rate": 7.369451051969189e-06,
      "loss": 0.016,
      "step": 818440
    },
    {
      "epoch": 1.339427741010585,
      "grad_norm": 0.5141849517822266,
      "learning_rate": 7.369385159755672e-06,
      "loss": 0.023,
      "step": 818460
    },
    {
      "epoch": 1.3394604714492384,
      "grad_norm": 0.7935569882392883,
      "learning_rate": 7.369319267542156e-06,
      "loss": 0.0186,
      "step": 818480
    },
    {
      "epoch": 1.3394932018878918,
      "grad_norm": 0.6711218357086182,
      "learning_rate": 7.369253375328638e-06,
      "loss": 0.024,
      "step": 818500
    },
    {
      "epoch": 1.339525932326545,
      "grad_norm": 0.5408139824867249,
      "learning_rate": 7.369187483115121e-06,
      "loss": 0.0253,
      "step": 818520
    },
    {
      "epoch": 1.3395586627651983,
      "grad_norm": 0.15450918674468994,
      "learning_rate": 7.369121590901603e-06,
      "loss": 0.024,
      "step": 818540
    },
    {
      "epoch": 1.3395913932038517,
      "grad_norm": 0.8319990634918213,
      "learning_rate": 7.369055698688087e-06,
      "loss": 0.0163,
      "step": 818560
    },
    {
      "epoch": 1.339624123642505,
      "grad_norm": 0.583314061164856,
      "learning_rate": 7.3689898064745694e-06,
      "loss": 0.0277,
      "step": 818580
    },
    {
      "epoch": 1.3396568540811584,
      "grad_norm": 1.0412031412124634,
      "learning_rate": 7.368923914261052e-06,
      "loss": 0.0234,
      "step": 818600
    },
    {
      "epoch": 1.3396895845198118,
      "grad_norm": 0.5764041543006897,
      "learning_rate": 7.368858022047535e-06,
      "loss": 0.0276,
      "step": 818620
    },
    {
      "epoch": 1.3397223149584652,
      "grad_norm": 0.21345050632953644,
      "learning_rate": 7.3687921298340185e-06,
      "loss": 0.022,
      "step": 818640
    },
    {
      "epoch": 1.3397550453971183,
      "grad_norm": 0.8692275285720825,
      "learning_rate": 7.3687262376205e-06,
      "loss": 0.023,
      "step": 818660
    },
    {
      "epoch": 1.3397877758357717,
      "grad_norm": 0.7537975311279297,
      "learning_rate": 7.368660345406984e-06,
      "loss": 0.0262,
      "step": 818680
    },
    {
      "epoch": 1.339820506274425,
      "grad_norm": 0.975016176700592,
      "learning_rate": 7.3685944531934676e-06,
      "loss": 0.0222,
      "step": 818700
    },
    {
      "epoch": 1.3398532367130784,
      "grad_norm": 0.2497824728488922,
      "learning_rate": 7.3685285609799495e-06,
      "loss": 0.0264,
      "step": 818720
    },
    {
      "epoch": 1.3398859671517318,
      "grad_norm": 0.6285425424575806,
      "learning_rate": 7.368462668766433e-06,
      "loss": 0.0321,
      "step": 818740
    },
    {
      "epoch": 1.3399186975903852,
      "grad_norm": 0.7821183800697327,
      "learning_rate": 7.368396776552915e-06,
      "loss": 0.0207,
      "step": 818760
    },
    {
      "epoch": 1.3399514280290385,
      "grad_norm": 0.12583769857883453,
      "learning_rate": 7.3683308843393985e-06,
      "loss": 0.0201,
      "step": 818780
    },
    {
      "epoch": 1.3399841584676917,
      "grad_norm": 1.4666742086410522,
      "learning_rate": 7.368264992125881e-06,
      "loss": 0.0198,
      "step": 818800
    },
    {
      "epoch": 1.340016888906345,
      "grad_norm": 0.5442444086074829,
      "learning_rate": 7.368199099912364e-06,
      "loss": 0.0178,
      "step": 818820
    },
    {
      "epoch": 1.3400496193449984,
      "grad_norm": 0.592438817024231,
      "learning_rate": 7.368133207698847e-06,
      "loss": 0.0207,
      "step": 818840
    },
    {
      "epoch": 1.3400823497836518,
      "grad_norm": 1.1067019701004028,
      "learning_rate": 7.36806731548533e-06,
      "loss": 0.0199,
      "step": 818860
    },
    {
      "epoch": 1.3401150802223052,
      "grad_norm": 0.41124653816223145,
      "learning_rate": 7.368001423271812e-06,
      "loss": 0.0202,
      "step": 818880
    },
    {
      "epoch": 1.3401478106609586,
      "grad_norm": 0.1860172599554062,
      "learning_rate": 7.367935531058296e-06,
      "loss": 0.0204,
      "step": 818900
    },
    {
      "epoch": 1.340180541099612,
      "grad_norm": 1.5876822471618652,
      "learning_rate": 7.367869638844778e-06,
      "loss": 0.0201,
      "step": 818920
    },
    {
      "epoch": 1.340213271538265,
      "grad_norm": 0.4672509431838989,
      "learning_rate": 7.367803746631261e-06,
      "loss": 0.0199,
      "step": 818940
    },
    {
      "epoch": 1.3402460019769185,
      "grad_norm": 0.33163976669311523,
      "learning_rate": 7.367737854417743e-06,
      "loss": 0.0193,
      "step": 818960
    },
    {
      "epoch": 1.3402787324155718,
      "grad_norm": 0.0808623656630516,
      "learning_rate": 7.367671962204227e-06,
      "loss": 0.0229,
      "step": 818980
    },
    {
      "epoch": 1.3403114628542252,
      "grad_norm": 0.8014178276062012,
      "learning_rate": 7.3676060699907095e-06,
      "loss": 0.0134,
      "step": 819000
    },
    {
      "epoch": 1.3403441932928786,
      "grad_norm": 0.8297796845436096,
      "learning_rate": 7.367540177777192e-06,
      "loss": 0.0173,
      "step": 819020
    },
    {
      "epoch": 1.3403769237315317,
      "grad_norm": 0.19434070587158203,
      "learning_rate": 7.367474285563676e-06,
      "loss": 0.0213,
      "step": 819040
    },
    {
      "epoch": 1.3404096541701853,
      "grad_norm": 0.44309529662132263,
      "learning_rate": 7.367408393350159e-06,
      "loss": 0.0182,
      "step": 819060
    },
    {
      "epoch": 1.3404423846088385,
      "grad_norm": 0.6409119367599487,
      "learning_rate": 7.367342501136641e-06,
      "loss": 0.0174,
      "step": 819080
    },
    {
      "epoch": 1.3404751150474918,
      "grad_norm": 0.245697021484375,
      "learning_rate": 7.367276608923124e-06,
      "loss": 0.0224,
      "step": 819100
    },
    {
      "epoch": 1.3405078454861452,
      "grad_norm": 0.9477152228355408,
      "learning_rate": 7.367210716709608e-06,
      "loss": 0.0236,
      "step": 819120
    },
    {
      "epoch": 1.3405405759247986,
      "grad_norm": 0.23090100288391113,
      "learning_rate": 7.3671448244960895e-06,
      "loss": 0.0233,
      "step": 819140
    },
    {
      "epoch": 1.340573306363452,
      "grad_norm": 1.1737406253814697,
      "learning_rate": 7.367078932282573e-06,
      "loss": 0.0225,
      "step": 819160
    },
    {
      "epoch": 1.340606036802105,
      "grad_norm": 0.3882957100868225,
      "learning_rate": 7.367013040069055e-06,
      "loss": 0.0185,
      "step": 819180
    },
    {
      "epoch": 1.3406387672407585,
      "grad_norm": 0.48330020904541016,
      "learning_rate": 7.366947147855539e-06,
      "loss": 0.0216,
      "step": 819200
    },
    {
      "epoch": 1.3406714976794118,
      "grad_norm": 0.9041317105293274,
      "learning_rate": 7.366881255642021e-06,
      "loss": 0.0151,
      "step": 819220
    },
    {
      "epoch": 1.3407042281180652,
      "grad_norm": 0.25456544756889343,
      "learning_rate": 7.366815363428504e-06,
      "loss": 0.0176,
      "step": 819240
    },
    {
      "epoch": 1.3407369585567186,
      "grad_norm": 0.23192530870437622,
      "learning_rate": 7.366749471214987e-06,
      "loss": 0.0275,
      "step": 819260
    },
    {
      "epoch": 1.340769688995372,
      "grad_norm": 0.6989546418190002,
      "learning_rate": 7.36668357900147e-06,
      "loss": 0.0151,
      "step": 819280
    },
    {
      "epoch": 1.3408024194340253,
      "grad_norm": 0.9918767213821411,
      "learning_rate": 7.366617686787952e-06,
      "loss": 0.0189,
      "step": 819300
    },
    {
      "epoch": 1.3408351498726785,
      "grad_norm": 1.0795515775680542,
      "learning_rate": 7.366551794574436e-06,
      "loss": 0.0219,
      "step": 819320
    },
    {
      "epoch": 1.3408678803113319,
      "grad_norm": 1.2859019041061401,
      "learning_rate": 7.366485902360918e-06,
      "loss": 0.0275,
      "step": 819340
    },
    {
      "epoch": 1.3409006107499852,
      "grad_norm": 0.4035584032535553,
      "learning_rate": 7.366420010147401e-06,
      "loss": 0.0176,
      "step": 819360
    },
    {
      "epoch": 1.3409333411886386,
      "grad_norm": 1.0012983083724976,
      "learning_rate": 7.366354117933885e-06,
      "loss": 0.0276,
      "step": 819380
    },
    {
      "epoch": 1.340966071627292,
      "grad_norm": 0.34120580554008484,
      "learning_rate": 7.366288225720367e-06,
      "loss": 0.0163,
      "step": 819400
    },
    {
      "epoch": 1.3409988020659454,
      "grad_norm": 0.130336731672287,
      "learning_rate": 7.3662223335068504e-06,
      "loss": 0.0263,
      "step": 819420
    },
    {
      "epoch": 1.3410315325045987,
      "grad_norm": 0.5458782911300659,
      "learning_rate": 7.366156441293333e-06,
      "loss": 0.0259,
      "step": 819440
    },
    {
      "epoch": 1.3410642629432519,
      "grad_norm": 0.783962070941925,
      "learning_rate": 7.366090549079816e-06,
      "loss": 0.0196,
      "step": 819460
    },
    {
      "epoch": 1.3410969933819052,
      "grad_norm": 0.67624431848526,
      "learning_rate": 7.366024656866299e-06,
      "loss": 0.0188,
      "step": 819480
    },
    {
      "epoch": 1.3411297238205586,
      "grad_norm": 0.7086586356163025,
      "learning_rate": 7.365958764652782e-06,
      "loss": 0.0204,
      "step": 819500
    },
    {
      "epoch": 1.341162454259212,
      "grad_norm": 0.2511599361896515,
      "learning_rate": 7.365892872439264e-06,
      "loss": 0.0189,
      "step": 819520
    },
    {
      "epoch": 1.3411951846978654,
      "grad_norm": 1.1334333419799805,
      "learning_rate": 7.365826980225748e-06,
      "loss": 0.0271,
      "step": 819540
    },
    {
      "epoch": 1.3412279151365187,
      "grad_norm": 0.8555377721786499,
      "learning_rate": 7.36576108801223e-06,
      "loss": 0.0228,
      "step": 819560
    },
    {
      "epoch": 1.341260645575172,
      "grad_norm": 0.2254462093114853,
      "learning_rate": 7.365695195798713e-06,
      "loss": 0.0194,
      "step": 819580
    },
    {
      "epoch": 1.3412933760138253,
      "grad_norm": 0.19733338057994843,
      "learning_rate": 7.365629303585196e-06,
      "loss": 0.021,
      "step": 819600
    },
    {
      "epoch": 1.3413261064524786,
      "grad_norm": 0.52982097864151,
      "learning_rate": 7.365563411371679e-06,
      "loss": 0.0211,
      "step": 819620
    },
    {
      "epoch": 1.341358836891132,
      "grad_norm": 0.42830342054367065,
      "learning_rate": 7.365497519158161e-06,
      "loss": 0.019,
      "step": 819640
    },
    {
      "epoch": 1.3413915673297854,
      "grad_norm": 0.6697061061859131,
      "learning_rate": 7.365431626944645e-06,
      "loss": 0.0213,
      "step": 819660
    },
    {
      "epoch": 1.3414242977684387,
      "grad_norm": 0.16688641905784607,
      "learning_rate": 7.365365734731127e-06,
      "loss": 0.0141,
      "step": 819680
    },
    {
      "epoch": 1.341457028207092,
      "grad_norm": 0.44495904445648193,
      "learning_rate": 7.3652998425176105e-06,
      "loss": 0.022,
      "step": 819700
    },
    {
      "epoch": 1.3414897586457455,
      "grad_norm": 0.9960916042327881,
      "learning_rate": 7.365233950304094e-06,
      "loss": 0.0332,
      "step": 819720
    },
    {
      "epoch": 1.3415224890843986,
      "grad_norm": 0.7206864953041077,
      "learning_rate": 7.365168058090576e-06,
      "loss": 0.0193,
      "step": 819740
    },
    {
      "epoch": 1.341555219523052,
      "grad_norm": 1.104366660118103,
      "learning_rate": 7.3651021658770596e-06,
      "loss": 0.0284,
      "step": 819760
    },
    {
      "epoch": 1.3415879499617054,
      "grad_norm": 0.26918280124664307,
      "learning_rate": 7.3650362736635414e-06,
      "loss": 0.0184,
      "step": 819780
    },
    {
      "epoch": 1.3416206804003588,
      "grad_norm": 0.709629476070404,
      "learning_rate": 7.364970381450025e-06,
      "loss": 0.0224,
      "step": 819800
    },
    {
      "epoch": 1.3416534108390121,
      "grad_norm": 0.6117878556251526,
      "learning_rate": 7.364904489236508e-06,
      "loss": 0.0203,
      "step": 819820
    },
    {
      "epoch": 1.3416861412776653,
      "grad_norm": 0.5901056528091431,
      "learning_rate": 7.3648385970229905e-06,
      "loss": 0.0158,
      "step": 819840
    },
    {
      "epoch": 1.3417188717163189,
      "grad_norm": 0.7527686357498169,
      "learning_rate": 7.364772704809473e-06,
      "loss": 0.0248,
      "step": 819860
    },
    {
      "epoch": 1.341751602154972,
      "grad_norm": 0.3466920554637909,
      "learning_rate": 7.364706812595957e-06,
      "loss": 0.027,
      "step": 819880
    },
    {
      "epoch": 1.3417843325936254,
      "grad_norm": 2.681736946105957,
      "learning_rate": 7.364640920382439e-06,
      "loss": 0.0287,
      "step": 819900
    },
    {
      "epoch": 1.3418170630322788,
      "grad_norm": 0.8286943435668945,
      "learning_rate": 7.364575028168922e-06,
      "loss": 0.0228,
      "step": 819920
    },
    {
      "epoch": 1.3418497934709321,
      "grad_norm": 0.44184640049934387,
      "learning_rate": 7.364509135955404e-06,
      "loss": 0.0281,
      "step": 819940
    },
    {
      "epoch": 1.3418825239095855,
      "grad_norm": 1.4859453439712524,
      "learning_rate": 7.364443243741888e-06,
      "loss": 0.0238,
      "step": 819960
    },
    {
      "epoch": 1.3419152543482387,
      "grad_norm": 0.24577899277210236,
      "learning_rate": 7.36437735152837e-06,
      "loss": 0.0207,
      "step": 819980
    },
    {
      "epoch": 1.341947984786892,
      "grad_norm": 1.325364351272583,
      "learning_rate": 7.364311459314853e-06,
      "loss": 0.0288,
      "step": 820000
    },
    {
      "epoch": 1.3419807152255454,
      "grad_norm": 0.30851230025291443,
      "learning_rate": 7.364245567101336e-06,
      "loss": 0.0192,
      "step": 820020
    },
    {
      "epoch": 1.3420134456641988,
      "grad_norm": 0.8366678953170776,
      "learning_rate": 7.364179674887819e-06,
      "loss": 0.0251,
      "step": 820040
    },
    {
      "epoch": 1.3420461761028522,
      "grad_norm": 1.0315366983413696,
      "learning_rate": 7.3641137826743015e-06,
      "loss": 0.0313,
      "step": 820060
    },
    {
      "epoch": 1.3420789065415055,
      "grad_norm": 0.17113611102104187,
      "learning_rate": 7.364047890460785e-06,
      "loss": 0.0217,
      "step": 820080
    },
    {
      "epoch": 1.342111636980159,
      "grad_norm": 0.6650803089141846,
      "learning_rate": 7.363981998247268e-06,
      "loss": 0.0272,
      "step": 820100
    },
    {
      "epoch": 1.342144367418812,
      "grad_norm": 0.20777063071727753,
      "learning_rate": 7.3639161060337506e-06,
      "loss": 0.0165,
      "step": 820120
    },
    {
      "epoch": 1.3421770978574654,
      "grad_norm": 1.3793975114822388,
      "learning_rate": 7.363850213820234e-06,
      "loss": 0.0233,
      "step": 820140
    },
    {
      "epoch": 1.3422098282961188,
      "grad_norm": 0.1706315577030182,
      "learning_rate": 7.363784321606716e-06,
      "loss": 0.025,
      "step": 820160
    },
    {
      "epoch": 1.3422425587347722,
      "grad_norm": 1.8264024257659912,
      "learning_rate": 7.3637184293932e-06,
      "loss": 0.0151,
      "step": 820180
    },
    {
      "epoch": 1.3422752891734255,
      "grad_norm": 0.7952204346656799,
      "learning_rate": 7.3636525371796815e-06,
      "loss": 0.0217,
      "step": 820200
    },
    {
      "epoch": 1.342308019612079,
      "grad_norm": 0.4075206518173218,
      "learning_rate": 7.363586644966165e-06,
      "loss": 0.0145,
      "step": 820220
    },
    {
      "epoch": 1.3423407500507323,
      "grad_norm": 1.6168347597122192,
      "learning_rate": 7.363520752752648e-06,
      "loss": 0.0239,
      "step": 820240
    },
    {
      "epoch": 1.3423734804893854,
      "grad_norm": 0.4436832368373871,
      "learning_rate": 7.363454860539131e-06,
      "loss": 0.0184,
      "step": 820260
    },
    {
      "epoch": 1.3424062109280388,
      "grad_norm": 2.401827335357666,
      "learning_rate": 7.363388968325613e-06,
      "loss": 0.0343,
      "step": 820280
    },
    {
      "epoch": 1.3424389413666922,
      "grad_norm": 0.7072018384933472,
      "learning_rate": 7.363323076112097e-06,
      "loss": 0.0222,
      "step": 820300
    },
    {
      "epoch": 1.3424716718053455,
      "grad_norm": 0.7330225110054016,
      "learning_rate": 7.363257183898579e-06,
      "loss": 0.0177,
      "step": 820320
    },
    {
      "epoch": 1.342504402243999,
      "grad_norm": 0.2231684774160385,
      "learning_rate": 7.363191291685062e-06,
      "loss": 0.0221,
      "step": 820340
    },
    {
      "epoch": 1.3425371326826523,
      "grad_norm": 0.2585799992084503,
      "learning_rate": 7.363125399471544e-06,
      "loss": 0.0223,
      "step": 820360
    },
    {
      "epoch": 1.3425698631213057,
      "grad_norm": 0.8231751918792725,
      "learning_rate": 7.363059507258028e-06,
      "loss": 0.0255,
      "step": 820380
    },
    {
      "epoch": 1.3426025935599588,
      "grad_norm": 0.5042046308517456,
      "learning_rate": 7.362993615044511e-06,
      "loss": 0.0212,
      "step": 820400
    },
    {
      "epoch": 1.3426353239986122,
      "grad_norm": 1.53365159034729,
      "learning_rate": 7.362927722830993e-06,
      "loss": 0.0175,
      "step": 820420
    },
    {
      "epoch": 1.3426680544372656,
      "grad_norm": 0.3772093653678894,
      "learning_rate": 7.362861830617477e-06,
      "loss": 0.0132,
      "step": 820440
    },
    {
      "epoch": 1.342700784875919,
      "grad_norm": 1.1186904907226562,
      "learning_rate": 7.36279593840396e-06,
      "loss": 0.0231,
      "step": 820460
    },
    {
      "epoch": 1.3427335153145723,
      "grad_norm": 0.41614893078804016,
      "learning_rate": 7.362730046190442e-06,
      "loss": 0.0221,
      "step": 820480
    },
    {
      "epoch": 1.3427662457532255,
      "grad_norm": 0.5065350532531738,
      "learning_rate": 7.362664153976925e-06,
      "loss": 0.0206,
      "step": 820500
    },
    {
      "epoch": 1.342798976191879,
      "grad_norm": 0.5068371295928955,
      "learning_rate": 7.362598261763409e-06,
      "loss": 0.0171,
      "step": 820520
    },
    {
      "epoch": 1.3428317066305322,
      "grad_norm": 0.27177682518959045,
      "learning_rate": 7.362532369549891e-06,
      "loss": 0.0187,
      "step": 820540
    },
    {
      "epoch": 1.3428644370691856,
      "grad_norm": 0.31672677397727966,
      "learning_rate": 7.362466477336374e-06,
      "loss": 0.0233,
      "step": 820560
    },
    {
      "epoch": 1.342897167507839,
      "grad_norm": 0.26233428716659546,
      "learning_rate": 7.362400585122856e-06,
      "loss": 0.0264,
      "step": 820580
    },
    {
      "epoch": 1.3429298979464923,
      "grad_norm": 0.3796500563621521,
      "learning_rate": 7.36233469290934e-06,
      "loss": 0.0217,
      "step": 820600
    },
    {
      "epoch": 1.3429626283851457,
      "grad_norm": 0.8157934546470642,
      "learning_rate": 7.3622688006958224e-06,
      "loss": 0.0298,
      "step": 820620
    },
    {
      "epoch": 1.3429953588237988,
      "grad_norm": 0.1919979602098465,
      "learning_rate": 7.362202908482305e-06,
      "loss": 0.0267,
      "step": 820640
    },
    {
      "epoch": 1.3430280892624524,
      "grad_norm": 1.2843669652938843,
      "learning_rate": 7.362137016268788e-06,
      "loss": 0.0235,
      "step": 820660
    },
    {
      "epoch": 1.3430608197011056,
      "grad_norm": 0.9619795083999634,
      "learning_rate": 7.3620711240552715e-06,
      "loss": 0.0242,
      "step": 820680
    },
    {
      "epoch": 1.343093550139759,
      "grad_norm": 0.41263988614082336,
      "learning_rate": 7.362005231841753e-06,
      "loss": 0.0157,
      "step": 820700
    },
    {
      "epoch": 1.3431262805784123,
      "grad_norm": 0.9386911988258362,
      "learning_rate": 7.361939339628237e-06,
      "loss": 0.0246,
      "step": 820720
    },
    {
      "epoch": 1.3431590110170657,
      "grad_norm": 1.143608570098877,
      "learning_rate": 7.361873447414719e-06,
      "loss": 0.0265,
      "step": 820740
    },
    {
      "epoch": 1.343191741455719,
      "grad_norm": 1.1244041919708252,
      "learning_rate": 7.3618075552012025e-06,
      "loss": 0.0202,
      "step": 820760
    },
    {
      "epoch": 1.3432244718943722,
      "grad_norm": 0.4777643382549286,
      "learning_rate": 7.361741662987686e-06,
      "loss": 0.0236,
      "step": 820780
    },
    {
      "epoch": 1.3432572023330256,
      "grad_norm": 1.38161039352417,
      "learning_rate": 7.361675770774168e-06,
      "loss": 0.024,
      "step": 820800
    },
    {
      "epoch": 1.343289932771679,
      "grad_norm": 0.9438455700874329,
      "learning_rate": 7.3616098785606515e-06,
      "loss": 0.0226,
      "step": 820820
    },
    {
      "epoch": 1.3433226632103323,
      "grad_norm": 1.227920651435852,
      "learning_rate": 7.361543986347134e-06,
      "loss": 0.0256,
      "step": 820840
    },
    {
      "epoch": 1.3433553936489857,
      "grad_norm": 0.584195077419281,
      "learning_rate": 7.361478094133617e-06,
      "loss": 0.0212,
      "step": 820860
    },
    {
      "epoch": 1.343388124087639,
      "grad_norm": 0.13943006098270416,
      "learning_rate": 7.3614122019201e-06,
      "loss": 0.0175,
      "step": 820880
    },
    {
      "epoch": 1.3434208545262925,
      "grad_norm": 1.5613771677017212,
      "learning_rate": 7.361346309706583e-06,
      "loss": 0.0275,
      "step": 820900
    },
    {
      "epoch": 1.3434535849649456,
      "grad_norm": 0.2337748408317566,
      "learning_rate": 7.361280417493065e-06,
      "loss": 0.0202,
      "step": 820920
    },
    {
      "epoch": 1.343486315403599,
      "grad_norm": 0.8894266486167908,
      "learning_rate": 7.361214525279549e-06,
      "loss": 0.0229,
      "step": 820940
    },
    {
      "epoch": 1.3435190458422523,
      "grad_norm": 0.7299741506576538,
      "learning_rate": 7.361148633066031e-06,
      "loss": 0.0192,
      "step": 820960
    },
    {
      "epoch": 1.3435517762809057,
      "grad_norm": 0.8397270441055298,
      "learning_rate": 7.361082740852514e-06,
      "loss": 0.0231,
      "step": 820980
    },
    {
      "epoch": 1.343584506719559,
      "grad_norm": 0.36781617999076843,
      "learning_rate": 7.361016848638996e-06,
      "loss": 0.0283,
      "step": 821000
    },
    {
      "epoch": 1.3436172371582125,
      "grad_norm": 0.351496160030365,
      "learning_rate": 7.36095095642548e-06,
      "loss": 0.0247,
      "step": 821020
    },
    {
      "epoch": 1.3436499675968658,
      "grad_norm": 0.25671452283859253,
      "learning_rate": 7.3608850642119625e-06,
      "loss": 0.0231,
      "step": 821040
    },
    {
      "epoch": 1.343682698035519,
      "grad_norm": 0.3868626356124878,
      "learning_rate": 7.360819171998445e-06,
      "loss": 0.0268,
      "step": 821060
    },
    {
      "epoch": 1.3437154284741724,
      "grad_norm": 0.12288958579301834,
      "learning_rate": 7.360753279784928e-06,
      "loss": 0.0209,
      "step": 821080
    },
    {
      "epoch": 1.3437481589128257,
      "grad_norm": 0.5649493336677551,
      "learning_rate": 7.360687387571412e-06,
      "loss": 0.0278,
      "step": 821100
    },
    {
      "epoch": 1.343780889351479,
      "grad_norm": 0.28818264603614807,
      "learning_rate": 7.3606214953578935e-06,
      "loss": 0.0215,
      "step": 821120
    },
    {
      "epoch": 1.3438136197901325,
      "grad_norm": 0.3704362213611603,
      "learning_rate": 7.360555603144377e-06,
      "loss": 0.0238,
      "step": 821140
    },
    {
      "epoch": 1.3438463502287858,
      "grad_norm": 0.5874793529510498,
      "learning_rate": 7.360489710930861e-06,
      "loss": 0.0213,
      "step": 821160
    },
    {
      "epoch": 1.3438790806674392,
      "grad_norm": 0.9107409119606018,
      "learning_rate": 7.3604238187173425e-06,
      "loss": 0.0148,
      "step": 821180
    },
    {
      "epoch": 1.3439118111060924,
      "grad_norm": 0.4880223870277405,
      "learning_rate": 7.360357926503826e-06,
      "loss": 0.017,
      "step": 821200
    },
    {
      "epoch": 1.3439445415447457,
      "grad_norm": 1.2821754217147827,
      "learning_rate": 7.360292034290308e-06,
      "loss": 0.0207,
      "step": 821220
    },
    {
      "epoch": 1.3439772719833991,
      "grad_norm": 0.4075874984264374,
      "learning_rate": 7.360226142076792e-06,
      "loss": 0.0214,
      "step": 821240
    },
    {
      "epoch": 1.3440100024220525,
      "grad_norm": 1.1029001474380493,
      "learning_rate": 7.360160249863274e-06,
      "loss": 0.02,
      "step": 821260
    },
    {
      "epoch": 1.3440427328607059,
      "grad_norm": 0.19600079953670502,
      "learning_rate": 7.360094357649757e-06,
      "loss": 0.0202,
      "step": 821280
    },
    {
      "epoch": 1.344075463299359,
      "grad_norm": 1.8145906925201416,
      "learning_rate": 7.36002846543624e-06,
      "loss": 0.0277,
      "step": 821300
    },
    {
      "epoch": 1.3441081937380126,
      "grad_norm": 0.4852171540260315,
      "learning_rate": 7.359962573222723e-06,
      "loss": 0.0286,
      "step": 821320
    },
    {
      "epoch": 1.3441409241766658,
      "grad_norm": 0.6076807975769043,
      "learning_rate": 7.359896681009205e-06,
      "loss": 0.0255,
      "step": 821340
    },
    {
      "epoch": 1.3441736546153191,
      "grad_norm": 0.44955363869667053,
      "learning_rate": 7.359830788795689e-06,
      "loss": 0.0198,
      "step": 821360
    },
    {
      "epoch": 1.3442063850539725,
      "grad_norm": 1.6218421459197998,
      "learning_rate": 7.359764896582171e-06,
      "loss": 0.0221,
      "step": 821380
    },
    {
      "epoch": 1.3442391154926259,
      "grad_norm": 0.5351880788803101,
      "learning_rate": 7.359699004368654e-06,
      "loss": 0.0242,
      "step": 821400
    },
    {
      "epoch": 1.3442718459312792,
      "grad_norm": 0.7137935161590576,
      "learning_rate": 7.359633112155137e-06,
      "loss": 0.0224,
      "step": 821420
    },
    {
      "epoch": 1.3443045763699324,
      "grad_norm": 0.3548700213432312,
      "learning_rate": 7.35956721994162e-06,
      "loss": 0.0166,
      "step": 821440
    },
    {
      "epoch": 1.344337306808586,
      "grad_norm": 1.3270964622497559,
      "learning_rate": 7.359501327728103e-06,
      "loss": 0.0195,
      "step": 821460
    },
    {
      "epoch": 1.3443700372472391,
      "grad_norm": 0.18157848715782166,
      "learning_rate": 7.359435435514586e-06,
      "loss": 0.0242,
      "step": 821480
    },
    {
      "epoch": 1.3444027676858925,
      "grad_norm": 1.7054071426391602,
      "learning_rate": 7.359369543301069e-06,
      "loss": 0.0151,
      "step": 821500
    },
    {
      "epoch": 1.3444354981245459,
      "grad_norm": 0.47839978337287903,
      "learning_rate": 7.359303651087552e-06,
      "loss": 0.0201,
      "step": 821520
    },
    {
      "epoch": 1.3444682285631993,
      "grad_norm": 1.6099495887756348,
      "learning_rate": 7.359237758874035e-06,
      "loss": 0.0185,
      "step": 821540
    },
    {
      "epoch": 1.3445009590018526,
      "grad_norm": 0.9663403630256653,
      "learning_rate": 7.359171866660517e-06,
      "loss": 0.0236,
      "step": 821560
    },
    {
      "epoch": 1.3445336894405058,
      "grad_norm": 0.3022110164165497,
      "learning_rate": 7.359105974447001e-06,
      "loss": 0.0166,
      "step": 821580
    },
    {
      "epoch": 1.3445664198791591,
      "grad_norm": 0.4714488685131073,
      "learning_rate": 7.359040082233483e-06,
      "loss": 0.0219,
      "step": 821600
    },
    {
      "epoch": 1.3445991503178125,
      "grad_norm": 0.4413975477218628,
      "learning_rate": 7.358974190019966e-06,
      "loss": 0.0199,
      "step": 821620
    },
    {
      "epoch": 1.344631880756466,
      "grad_norm": 0.24392685294151306,
      "learning_rate": 7.358908297806449e-06,
      "loss": 0.0188,
      "step": 821640
    },
    {
      "epoch": 1.3446646111951193,
      "grad_norm": 0.2437528818845749,
      "learning_rate": 7.358842405592932e-06,
      "loss": 0.0173,
      "step": 821660
    },
    {
      "epoch": 1.3446973416337726,
      "grad_norm": 0.9150651693344116,
      "learning_rate": 7.3587765133794144e-06,
      "loss": 0.0224,
      "step": 821680
    },
    {
      "epoch": 1.344730072072426,
      "grad_norm": 4.246782302856445,
      "learning_rate": 7.358710621165898e-06,
      "loss": 0.0258,
      "step": 821700
    },
    {
      "epoch": 1.3447628025110792,
      "grad_norm": 0.4972792863845825,
      "learning_rate": 7.35864472895238e-06,
      "loss": 0.0204,
      "step": 821720
    },
    {
      "epoch": 1.3447955329497325,
      "grad_norm": 1.8299531936645508,
      "learning_rate": 7.3585788367388635e-06,
      "loss": 0.0163,
      "step": 821740
    },
    {
      "epoch": 1.344828263388386,
      "grad_norm": 0.15980187058448792,
      "learning_rate": 7.358512944525345e-06,
      "loss": 0.0295,
      "step": 821760
    },
    {
      "epoch": 1.3448609938270393,
      "grad_norm": 1.2433555126190186,
      "learning_rate": 7.358447052311829e-06,
      "loss": 0.0172,
      "step": 821780
    },
    {
      "epoch": 1.3448937242656926,
      "grad_norm": 0.2911018133163452,
      "learning_rate": 7.358381160098311e-06,
      "loss": 0.0148,
      "step": 821800
    },
    {
      "epoch": 1.344926454704346,
      "grad_norm": 0.714121401309967,
      "learning_rate": 7.3583152678847944e-06,
      "loss": 0.0186,
      "step": 821820
    },
    {
      "epoch": 1.3449591851429994,
      "grad_norm": 0.31879445910453796,
      "learning_rate": 7.358249375671278e-06,
      "loss": 0.0139,
      "step": 821840
    },
    {
      "epoch": 1.3449919155816525,
      "grad_norm": 0.051779232919216156,
      "learning_rate": 7.35818348345776e-06,
      "loss": 0.0347,
      "step": 821860
    },
    {
      "epoch": 1.345024646020306,
      "grad_norm": 0.30716535449028015,
      "learning_rate": 7.3581175912442435e-06,
      "loss": 0.0178,
      "step": 821880
    },
    {
      "epoch": 1.3450573764589593,
      "grad_norm": 0.2765245735645294,
      "learning_rate": 7.358051699030726e-06,
      "loss": 0.0168,
      "step": 821900
    },
    {
      "epoch": 1.3450901068976127,
      "grad_norm": 0.5238900780677795,
      "learning_rate": 7.35798580681721e-06,
      "loss": 0.0237,
      "step": 821920
    },
    {
      "epoch": 1.345122837336266,
      "grad_norm": 1.3290332555770874,
      "learning_rate": 7.357919914603692e-06,
      "loss": 0.019,
      "step": 821940
    },
    {
      "epoch": 1.3451555677749192,
      "grad_norm": 1.3145967721939087,
      "learning_rate": 7.357854022390175e-06,
      "loss": 0.0229,
      "step": 821960
    },
    {
      "epoch": 1.3451882982135728,
      "grad_norm": 0.36029571294784546,
      "learning_rate": 7.357788130176657e-06,
      "loss": 0.0209,
      "step": 821980
    },
    {
      "epoch": 1.345221028652226,
      "grad_norm": 1.0248684883117676,
      "learning_rate": 7.357722237963141e-06,
      "loss": 0.0176,
      "step": 822000
    },
    {
      "epoch": 1.3452537590908793,
      "grad_norm": 0.1083277016878128,
      "learning_rate": 7.357656345749623e-06,
      "loss": 0.0208,
      "step": 822020
    },
    {
      "epoch": 1.3452864895295327,
      "grad_norm": 0.23852388560771942,
      "learning_rate": 7.357590453536106e-06,
      "loss": 0.0287,
      "step": 822040
    },
    {
      "epoch": 1.345319219968186,
      "grad_norm": 0.37890228629112244,
      "learning_rate": 7.357524561322589e-06,
      "loss": 0.0194,
      "step": 822060
    },
    {
      "epoch": 1.3453519504068394,
      "grad_norm": 0.40502339601516724,
      "learning_rate": 7.357458669109072e-06,
      "loss": 0.0195,
      "step": 822080
    },
    {
      "epoch": 1.3453846808454926,
      "grad_norm": 0.3708289563655853,
      "learning_rate": 7.3573927768955545e-06,
      "loss": 0.017,
      "step": 822100
    },
    {
      "epoch": 1.3454174112841462,
      "grad_norm": 1.2444654703140259,
      "learning_rate": 7.357326884682038e-06,
      "loss": 0.0214,
      "step": 822120
    },
    {
      "epoch": 1.3454501417227993,
      "grad_norm": 0.513696551322937,
      "learning_rate": 7.35726099246852e-06,
      "loss": 0.0232,
      "step": 822140
    },
    {
      "epoch": 1.3454828721614527,
      "grad_norm": 0.5408603549003601,
      "learning_rate": 7.3571951002550036e-06,
      "loss": 0.0177,
      "step": 822160
    },
    {
      "epoch": 1.345515602600106,
      "grad_norm": 4.428795337677002,
      "learning_rate": 7.357129208041487e-06,
      "loss": 0.0189,
      "step": 822180
    },
    {
      "epoch": 1.3455483330387594,
      "grad_norm": 0.18185332417488098,
      "learning_rate": 7.357063315827969e-06,
      "loss": 0.0139,
      "step": 822200
    },
    {
      "epoch": 1.3455810634774128,
      "grad_norm": 2.9558937549591064,
      "learning_rate": 7.356997423614453e-06,
      "loss": 0.0204,
      "step": 822220
    },
    {
      "epoch": 1.345613793916066,
      "grad_norm": 0.1257352977991104,
      "learning_rate": 7.3569315314009345e-06,
      "loss": 0.0264,
      "step": 822240
    },
    {
      "epoch": 1.3456465243547193,
      "grad_norm": 0.6244392991065979,
      "learning_rate": 7.356865639187418e-06,
      "loss": 0.0227,
      "step": 822260
    },
    {
      "epoch": 1.3456792547933727,
      "grad_norm": 0.9358810782432556,
      "learning_rate": 7.356799746973901e-06,
      "loss": 0.02,
      "step": 822280
    },
    {
      "epoch": 1.345711985232026,
      "grad_norm": 0.25554338097572327,
      "learning_rate": 7.356733854760384e-06,
      "loss": 0.0299,
      "step": 822300
    },
    {
      "epoch": 1.3457447156706794,
      "grad_norm": 1.0707441568374634,
      "learning_rate": 7.356667962546866e-06,
      "loss": 0.023,
      "step": 822320
    },
    {
      "epoch": 1.3457774461093328,
      "grad_norm": 0.3278898596763611,
      "learning_rate": 7.35660207033335e-06,
      "loss": 0.0204,
      "step": 822340
    },
    {
      "epoch": 1.3458101765479862,
      "grad_norm": 0.7259889841079712,
      "learning_rate": 7.356536178119832e-06,
      "loss": 0.0177,
      "step": 822360
    },
    {
      "epoch": 1.3458429069866393,
      "grad_norm": 0.41234320402145386,
      "learning_rate": 7.356470285906315e-06,
      "loss": 0.0202,
      "step": 822380
    },
    {
      "epoch": 1.3458756374252927,
      "grad_norm": 0.44472575187683105,
      "learning_rate": 7.356404393692797e-06,
      "loss": 0.0279,
      "step": 822400
    },
    {
      "epoch": 1.345908367863946,
      "grad_norm": 0.29775887727737427,
      "learning_rate": 7.356338501479281e-06,
      "loss": 0.0164,
      "step": 822420
    },
    {
      "epoch": 1.3459410983025994,
      "grad_norm": 1.1514883041381836,
      "learning_rate": 7.356272609265764e-06,
      "loss": 0.0257,
      "step": 822440
    },
    {
      "epoch": 1.3459738287412528,
      "grad_norm": 0.9364848136901855,
      "learning_rate": 7.356206717052246e-06,
      "loss": 0.0236,
      "step": 822460
    },
    {
      "epoch": 1.3460065591799062,
      "grad_norm": 0.9039292931556702,
      "learning_rate": 7.356140824838729e-06,
      "loss": 0.0221,
      "step": 822480
    },
    {
      "epoch": 1.3460392896185596,
      "grad_norm": 0.16917863488197327,
      "learning_rate": 7.356074932625213e-06,
      "loss": 0.0188,
      "step": 822500
    },
    {
      "epoch": 1.3460720200572127,
      "grad_norm": 0.38860973715782166,
      "learning_rate": 7.3560090404116946e-06,
      "loss": 0.0171,
      "step": 822520
    },
    {
      "epoch": 1.346104750495866,
      "grad_norm": 0.5361868739128113,
      "learning_rate": 7.355943148198178e-06,
      "loss": 0.0197,
      "step": 822540
    },
    {
      "epoch": 1.3461374809345195,
      "grad_norm": 0.534732460975647,
      "learning_rate": 7.355877255984662e-06,
      "loss": 0.0166,
      "step": 822560
    },
    {
      "epoch": 1.3461702113731728,
      "grad_norm": 0.8689274787902832,
      "learning_rate": 7.355811363771144e-06,
      "loss": 0.0219,
      "step": 822580
    },
    {
      "epoch": 1.3462029418118262,
      "grad_norm": 1.2946462631225586,
      "learning_rate": 7.355745471557627e-06,
      "loss": 0.0186,
      "step": 822600
    },
    {
      "epoch": 1.3462356722504796,
      "grad_norm": 1.138789415359497,
      "learning_rate": 7.355679579344109e-06,
      "loss": 0.019,
      "step": 822620
    },
    {
      "epoch": 1.346268402689133,
      "grad_norm": 0.7493994832038879,
      "learning_rate": 7.355613687130593e-06,
      "loss": 0.0275,
      "step": 822640
    },
    {
      "epoch": 1.346301133127786,
      "grad_norm": 0.4354155957698822,
      "learning_rate": 7.3555477949170754e-06,
      "loss": 0.0221,
      "step": 822660
    },
    {
      "epoch": 1.3463338635664395,
      "grad_norm": 0.09269477427005768,
      "learning_rate": 7.355481902703558e-06,
      "loss": 0.0148,
      "step": 822680
    },
    {
      "epoch": 1.3463665940050928,
      "grad_norm": 0.6890246868133545,
      "learning_rate": 7.355416010490041e-06,
      "loss": 0.0234,
      "step": 822700
    },
    {
      "epoch": 1.3463993244437462,
      "grad_norm": 0.28497314453125,
      "learning_rate": 7.3553501182765245e-06,
      "loss": 0.0131,
      "step": 822720
    },
    {
      "epoch": 1.3464320548823996,
      "grad_norm": 1.3616267442703247,
      "learning_rate": 7.355284226063006e-06,
      "loss": 0.0269,
      "step": 822740
    },
    {
      "epoch": 1.3464647853210527,
      "grad_norm": 0.26226693391799927,
      "learning_rate": 7.35521833384949e-06,
      "loss": 0.0177,
      "step": 822760
    },
    {
      "epoch": 1.3464975157597063,
      "grad_norm": 0.40981346368789673,
      "learning_rate": 7.355152441635972e-06,
      "loss": 0.0163,
      "step": 822780
    },
    {
      "epoch": 1.3465302461983595,
      "grad_norm": 0.25837913155555725,
      "learning_rate": 7.3550865494224555e-06,
      "loss": 0.0222,
      "step": 822800
    },
    {
      "epoch": 1.3465629766370129,
      "grad_norm": 1.424163579940796,
      "learning_rate": 7.355020657208937e-06,
      "loss": 0.0237,
      "step": 822820
    },
    {
      "epoch": 1.3465957070756662,
      "grad_norm": 0.34788113832473755,
      "learning_rate": 7.354954764995421e-06,
      "loss": 0.0291,
      "step": 822840
    },
    {
      "epoch": 1.3466284375143196,
      "grad_norm": 0.15590523183345795,
      "learning_rate": 7.354888872781904e-06,
      "loss": 0.0172,
      "step": 822860
    },
    {
      "epoch": 1.346661167952973,
      "grad_norm": 0.20572252571582794,
      "learning_rate": 7.3548229805683864e-06,
      "loss": 0.0147,
      "step": 822880
    },
    {
      "epoch": 1.3466938983916261,
      "grad_norm": 0.40412458777427673,
      "learning_rate": 7.35475708835487e-06,
      "loss": 0.0206,
      "step": 822900
    },
    {
      "epoch": 1.3467266288302797,
      "grad_norm": 1.1317039728164673,
      "learning_rate": 7.354691196141353e-06,
      "loss": 0.0176,
      "step": 822920
    },
    {
      "epoch": 1.3467593592689329,
      "grad_norm": 0.689475953578949,
      "learning_rate": 7.3546253039278355e-06,
      "loss": 0.0239,
      "step": 822940
    },
    {
      "epoch": 1.3467920897075862,
      "grad_norm": 0.5913838744163513,
      "learning_rate": 7.354559411714318e-06,
      "loss": 0.0294,
      "step": 822960
    },
    {
      "epoch": 1.3468248201462396,
      "grad_norm": 0.32823655009269714,
      "learning_rate": 7.354493519500802e-06,
      "loss": 0.0128,
      "step": 822980
    },
    {
      "epoch": 1.346857550584893,
      "grad_norm": 0.3742161691188812,
      "learning_rate": 7.354427627287284e-06,
      "loss": 0.0151,
      "step": 823000
    },
    {
      "epoch": 1.3468902810235464,
      "grad_norm": 1.0772099494934082,
      "learning_rate": 7.354361735073767e-06,
      "loss": 0.024,
      "step": 823020
    },
    {
      "epoch": 1.3469230114621995,
      "grad_norm": 0.8020733594894409,
      "learning_rate": 7.354295842860249e-06,
      "loss": 0.0221,
      "step": 823040
    },
    {
      "epoch": 1.3469557419008529,
      "grad_norm": 0.44236671924591064,
      "learning_rate": 7.354229950646733e-06,
      "loss": 0.0177,
      "step": 823060
    },
    {
      "epoch": 1.3469884723395062,
      "grad_norm": 0.6689144968986511,
      "learning_rate": 7.3541640584332155e-06,
      "loss": 0.0215,
      "step": 823080
    },
    {
      "epoch": 1.3470212027781596,
      "grad_norm": 0.3392280638217926,
      "learning_rate": 7.354098166219698e-06,
      "loss": 0.0209,
      "step": 823100
    },
    {
      "epoch": 1.347053933216813,
      "grad_norm": 0.7978995442390442,
      "learning_rate": 7.354032274006181e-06,
      "loss": 0.031,
      "step": 823120
    },
    {
      "epoch": 1.3470866636554664,
      "grad_norm": 0.8984240293502808,
      "learning_rate": 7.353966381792665e-06,
      "loss": 0.0257,
      "step": 823140
    },
    {
      "epoch": 1.3471193940941197,
      "grad_norm": 0.6559234857559204,
      "learning_rate": 7.3539004895791465e-06,
      "loss": 0.0289,
      "step": 823160
    },
    {
      "epoch": 1.3471521245327729,
      "grad_norm": 0.7680162787437439,
      "learning_rate": 7.35383459736563e-06,
      "loss": 0.0246,
      "step": 823180
    },
    {
      "epoch": 1.3471848549714263,
      "grad_norm": 0.2083221673965454,
      "learning_rate": 7.353768705152112e-06,
      "loss": 0.0283,
      "step": 823200
    },
    {
      "epoch": 1.3472175854100796,
      "grad_norm": 0.802558958530426,
      "learning_rate": 7.3537028129385955e-06,
      "loss": 0.0142,
      "step": 823220
    },
    {
      "epoch": 1.347250315848733,
      "grad_norm": 0.7818681597709656,
      "learning_rate": 7.353636920725079e-06,
      "loss": 0.0199,
      "step": 823240
    },
    {
      "epoch": 1.3472830462873864,
      "grad_norm": 0.49963605403900146,
      "learning_rate": 7.353571028511561e-06,
      "loss": 0.019,
      "step": 823260
    },
    {
      "epoch": 1.3473157767260397,
      "grad_norm": 0.809108555316925,
      "learning_rate": 7.353505136298045e-06,
      "loss": 0.0206,
      "step": 823280
    },
    {
      "epoch": 1.3473485071646931,
      "grad_norm": 0.265195369720459,
      "learning_rate": 7.353439244084527e-06,
      "loss": 0.0293,
      "step": 823300
    },
    {
      "epoch": 1.3473812376033463,
      "grad_norm": 0.2619223892688751,
      "learning_rate": 7.35337335187101e-06,
      "loss": 0.0375,
      "step": 823320
    },
    {
      "epoch": 1.3474139680419996,
      "grad_norm": 0.22980913519859314,
      "learning_rate": 7.353307459657493e-06,
      "loss": 0.0119,
      "step": 823340
    },
    {
      "epoch": 1.347446698480653,
      "grad_norm": 0.8411771655082703,
      "learning_rate": 7.353241567443976e-06,
      "loss": 0.0304,
      "step": 823360
    },
    {
      "epoch": 1.3474794289193064,
      "grad_norm": 0.23524686694145203,
      "learning_rate": 7.353175675230458e-06,
      "loss": 0.0263,
      "step": 823380
    },
    {
      "epoch": 1.3475121593579598,
      "grad_norm": 0.17330396175384521,
      "learning_rate": 7.353109783016942e-06,
      "loss": 0.0239,
      "step": 823400
    },
    {
      "epoch": 1.3475448897966131,
      "grad_norm": 0.20611292123794556,
      "learning_rate": 7.353043890803424e-06,
      "loss": 0.0181,
      "step": 823420
    },
    {
      "epoch": 1.3475776202352665,
      "grad_norm": 0.4492011070251465,
      "learning_rate": 7.352977998589907e-06,
      "loss": 0.0302,
      "step": 823440
    },
    {
      "epoch": 1.3476103506739197,
      "grad_norm": 1.3561029434204102,
      "learning_rate": 7.35291210637639e-06,
      "loss": 0.0267,
      "step": 823460
    },
    {
      "epoch": 1.347643081112573,
      "grad_norm": 0.63379967212677,
      "learning_rate": 7.352846214162873e-06,
      "loss": 0.031,
      "step": 823480
    },
    {
      "epoch": 1.3476758115512264,
      "grad_norm": 0.2620702385902405,
      "learning_rate": 7.352780321949356e-06,
      "loss": 0.0282,
      "step": 823500
    },
    {
      "epoch": 1.3477085419898798,
      "grad_norm": 0.9972509145736694,
      "learning_rate": 7.352714429735839e-06,
      "loss": 0.0246,
      "step": 823520
    },
    {
      "epoch": 1.3477412724285331,
      "grad_norm": 0.26300105452537537,
      "learning_rate": 7.352648537522321e-06,
      "loss": 0.0187,
      "step": 823540
    },
    {
      "epoch": 1.3477740028671863,
      "grad_norm": 0.5522473454475403,
      "learning_rate": 7.352582645308805e-06,
      "loss": 0.018,
      "step": 823560
    },
    {
      "epoch": 1.3478067333058399,
      "grad_norm": 0.20447392761707306,
      "learning_rate": 7.3525167530952866e-06,
      "loss": 0.0152,
      "step": 823580
    },
    {
      "epoch": 1.347839463744493,
      "grad_norm": 0.07821402698755264,
      "learning_rate": 7.35245086088177e-06,
      "loss": 0.0276,
      "step": 823600
    },
    {
      "epoch": 1.3478721941831464,
      "grad_norm": 0.826030969619751,
      "learning_rate": 7.352384968668254e-06,
      "loss": 0.0267,
      "step": 823620
    },
    {
      "epoch": 1.3479049246217998,
      "grad_norm": 0.6900026798248291,
      "learning_rate": 7.352319076454736e-06,
      "loss": 0.0239,
      "step": 823640
    },
    {
      "epoch": 1.3479376550604532,
      "grad_norm": 0.44862768054008484,
      "learning_rate": 7.352253184241219e-06,
      "loss": 0.0183,
      "step": 823660
    },
    {
      "epoch": 1.3479703854991065,
      "grad_norm": 0.8000357747077942,
      "learning_rate": 7.352187292027702e-06,
      "loss": 0.0246,
      "step": 823680
    },
    {
      "epoch": 1.3480031159377597,
      "grad_norm": 0.3946532905101776,
      "learning_rate": 7.352121399814185e-06,
      "loss": 0.0216,
      "step": 823700
    },
    {
      "epoch": 1.3480358463764133,
      "grad_norm": 0.11870325356721878,
      "learning_rate": 7.3520555076006674e-06,
      "loss": 0.0158,
      "step": 823720
    },
    {
      "epoch": 1.3480685768150664,
      "grad_norm": 0.4218192994594574,
      "learning_rate": 7.351989615387151e-06,
      "loss": 0.0222,
      "step": 823740
    },
    {
      "epoch": 1.3481013072537198,
      "grad_norm": 0.5055299997329712,
      "learning_rate": 7.351923723173633e-06,
      "loss": 0.017,
      "step": 823760
    },
    {
      "epoch": 1.3481340376923732,
      "grad_norm": 0.08220122754573822,
      "learning_rate": 7.3518578309601165e-06,
      "loss": 0.0198,
      "step": 823780
    },
    {
      "epoch": 1.3481667681310265,
      "grad_norm": 1.0269628763198853,
      "learning_rate": 7.351791938746598e-06,
      "loss": 0.0176,
      "step": 823800
    },
    {
      "epoch": 1.34819949856968,
      "grad_norm": 0.19482147693634033,
      "learning_rate": 7.351726046533082e-06,
      "loss": 0.0192,
      "step": 823820
    },
    {
      "epoch": 1.348232229008333,
      "grad_norm": 0.7576383352279663,
      "learning_rate": 7.351660154319564e-06,
      "loss": 0.0298,
      "step": 823840
    },
    {
      "epoch": 1.3482649594469864,
      "grad_norm": 0.45491480827331543,
      "learning_rate": 7.3515942621060475e-06,
      "loss": 0.0168,
      "step": 823860
    },
    {
      "epoch": 1.3482976898856398,
      "grad_norm": 1.0133205652236938,
      "learning_rate": 7.35152836989253e-06,
      "loss": 0.0163,
      "step": 823880
    },
    {
      "epoch": 1.3483304203242932,
      "grad_norm": 0.25503411889076233,
      "learning_rate": 7.351462477679013e-06,
      "loss": 0.0216,
      "step": 823900
    },
    {
      "epoch": 1.3483631507629465,
      "grad_norm": 0.9778916239738464,
      "learning_rate": 7.351396585465496e-06,
      "loss": 0.0273,
      "step": 823920
    },
    {
      "epoch": 1.3483958812016,
      "grad_norm": 0.41247060894966125,
      "learning_rate": 7.351330693251979e-06,
      "loss": 0.021,
      "step": 823940
    },
    {
      "epoch": 1.3484286116402533,
      "grad_norm": 0.4994605481624603,
      "learning_rate": 7.351264801038462e-06,
      "loss": 0.0227,
      "step": 823960
    },
    {
      "epoch": 1.3484613420789064,
      "grad_norm": 0.7844666838645935,
      "learning_rate": 7.351198908824945e-06,
      "loss": 0.024,
      "step": 823980
    },
    {
      "epoch": 1.3484940725175598,
      "grad_norm": 0.6717315912246704,
      "learning_rate": 7.351133016611428e-06,
      "loss": 0.0224,
      "step": 824000
    },
    {
      "epoch": 1.3485268029562132,
      "grad_norm": 1.3223013877868652,
      "learning_rate": 7.35106712439791e-06,
      "loss": 0.0156,
      "step": 824020
    },
    {
      "epoch": 1.3485595333948666,
      "grad_norm": 0.8728993535041809,
      "learning_rate": 7.351001232184394e-06,
      "loss": 0.0269,
      "step": 824040
    },
    {
      "epoch": 1.34859226383352,
      "grad_norm": 0.9156413078308105,
      "learning_rate": 7.350935339970876e-06,
      "loss": 0.0166,
      "step": 824060
    },
    {
      "epoch": 1.3486249942721733,
      "grad_norm": 0.8285914659500122,
      "learning_rate": 7.350869447757359e-06,
      "loss": 0.0221,
      "step": 824080
    },
    {
      "epoch": 1.3486577247108267,
      "grad_norm": 0.45050209760665894,
      "learning_rate": 7.350803555543842e-06,
      "loss": 0.0224,
      "step": 824100
    },
    {
      "epoch": 1.3486904551494798,
      "grad_norm": 1.0749380588531494,
      "learning_rate": 7.350737663330325e-06,
      "loss": 0.0218,
      "step": 824120
    },
    {
      "epoch": 1.3487231855881332,
      "grad_norm": 0.31734156608581543,
      "learning_rate": 7.3506717711168075e-06,
      "loss": 0.0208,
      "step": 824140
    },
    {
      "epoch": 1.3487559160267866,
      "grad_norm": 0.7597174048423767,
      "learning_rate": 7.350605878903291e-06,
      "loss": 0.0293,
      "step": 824160
    },
    {
      "epoch": 1.34878864646544,
      "grad_norm": 0.43557852506637573,
      "learning_rate": 7.350539986689773e-06,
      "loss": 0.026,
      "step": 824180
    },
    {
      "epoch": 1.3488213769040933,
      "grad_norm": 0.7182266712188721,
      "learning_rate": 7.3504740944762566e-06,
      "loss": 0.0163,
      "step": 824200
    },
    {
      "epoch": 1.3488541073427467,
      "grad_norm": 0.40759164094924927,
      "learning_rate": 7.3504082022627385e-06,
      "loss": 0.0216,
      "step": 824220
    },
    {
      "epoch": 1.3488868377814,
      "grad_norm": 0.5104668736457825,
      "learning_rate": 7.350342310049222e-06,
      "loss": 0.0219,
      "step": 824240
    },
    {
      "epoch": 1.3489195682200532,
      "grad_norm": 0.7840398550033569,
      "learning_rate": 7.350276417835705e-06,
      "loss": 0.0227,
      "step": 824260
    },
    {
      "epoch": 1.3489522986587066,
      "grad_norm": 0.5282135605812073,
      "learning_rate": 7.3502105256221875e-06,
      "loss": 0.0212,
      "step": 824280
    },
    {
      "epoch": 1.34898502909736,
      "grad_norm": 0.7674856185913086,
      "learning_rate": 7.350144633408671e-06,
      "loss": 0.0198,
      "step": 824300
    },
    {
      "epoch": 1.3490177595360133,
      "grad_norm": 0.8877444863319397,
      "learning_rate": 7.350078741195154e-06,
      "loss": 0.0171,
      "step": 824320
    },
    {
      "epoch": 1.3490504899746667,
      "grad_norm": 0.5452256798744202,
      "learning_rate": 7.350012848981637e-06,
      "loss": 0.0262,
      "step": 824340
    },
    {
      "epoch": 1.3490832204133199,
      "grad_norm": 1.7450700998306274,
      "learning_rate": 7.349946956768119e-06,
      "loss": 0.0414,
      "step": 824360
    },
    {
      "epoch": 1.3491159508519734,
      "grad_norm": 0.16296622157096863,
      "learning_rate": 7.349881064554603e-06,
      "loss": 0.0197,
      "step": 824380
    },
    {
      "epoch": 1.3491486812906266,
      "grad_norm": 0.4525229036808014,
      "learning_rate": 7.349815172341085e-06,
      "loss": 0.0179,
      "step": 824400
    },
    {
      "epoch": 1.34918141172928,
      "grad_norm": 0.18966588377952576,
      "learning_rate": 7.349749280127568e-06,
      "loss": 0.0136,
      "step": 824420
    },
    {
      "epoch": 1.3492141421679333,
      "grad_norm": 1.9547654390335083,
      "learning_rate": 7.34968338791405e-06,
      "loss": 0.0213,
      "step": 824440
    },
    {
      "epoch": 1.3492468726065867,
      "grad_norm": 0.763478696346283,
      "learning_rate": 7.349617495700534e-06,
      "loss": 0.0184,
      "step": 824460
    },
    {
      "epoch": 1.34927960304524,
      "grad_norm": 0.833772599697113,
      "learning_rate": 7.349551603487017e-06,
      "loss": 0.0158,
      "step": 824480
    },
    {
      "epoch": 1.3493123334838932,
      "grad_norm": 0.8283818960189819,
      "learning_rate": 7.349485711273499e-06,
      "loss": 0.0279,
      "step": 824500
    },
    {
      "epoch": 1.3493450639225468,
      "grad_norm": 0.7237301468849182,
      "learning_rate": 7.349419819059982e-06,
      "loss": 0.0186,
      "step": 824520
    },
    {
      "epoch": 1.3493777943612,
      "grad_norm": 0.32244598865509033,
      "learning_rate": 7.349353926846466e-06,
      "loss": 0.027,
      "step": 824540
    },
    {
      "epoch": 1.3494105247998534,
      "grad_norm": 0.6584784388542175,
      "learning_rate": 7.349288034632948e-06,
      "loss": 0.0233,
      "step": 824560
    },
    {
      "epoch": 1.3494432552385067,
      "grad_norm": 0.15821698307991028,
      "learning_rate": 7.349222142419431e-06,
      "loss": 0.0115,
      "step": 824580
    },
    {
      "epoch": 1.34947598567716,
      "grad_norm": 0.9860780239105225,
      "learning_rate": 7.349156250205913e-06,
      "loss": 0.0328,
      "step": 824600
    },
    {
      "epoch": 1.3495087161158135,
      "grad_norm": 0.5717847347259521,
      "learning_rate": 7.349090357992397e-06,
      "loss": 0.0183,
      "step": 824620
    },
    {
      "epoch": 1.3495414465544666,
      "grad_norm": 0.21101325750350952,
      "learning_rate": 7.3490244657788785e-06,
      "loss": 0.0311,
      "step": 824640
    },
    {
      "epoch": 1.34957417699312,
      "grad_norm": 0.4170248508453369,
      "learning_rate": 7.348958573565362e-06,
      "loss": 0.0161,
      "step": 824660
    },
    {
      "epoch": 1.3496069074317734,
      "grad_norm": 0.2162582278251648,
      "learning_rate": 7.348892681351846e-06,
      "loss": 0.0289,
      "step": 824680
    },
    {
      "epoch": 1.3496396378704267,
      "grad_norm": 0.29163751006126404,
      "learning_rate": 7.3488267891383285e-06,
      "loss": 0.0298,
      "step": 824700
    },
    {
      "epoch": 1.34967236830908,
      "grad_norm": 0.4986479580402374,
      "learning_rate": 7.348760896924811e-06,
      "loss": 0.0166,
      "step": 824720
    },
    {
      "epoch": 1.3497050987477335,
      "grad_norm": 0.7271240949630737,
      "learning_rate": 7.348695004711294e-06,
      "loss": 0.0269,
      "step": 824740
    },
    {
      "epoch": 1.3497378291863869,
      "grad_norm": 0.6309851408004761,
      "learning_rate": 7.3486291124977775e-06,
      "loss": 0.0202,
      "step": 824760
    },
    {
      "epoch": 1.34977055962504,
      "grad_norm": 1.9373724460601807,
      "learning_rate": 7.348563220284259e-06,
      "loss": 0.0248,
      "step": 824780
    },
    {
      "epoch": 1.3498032900636934,
      "grad_norm": 0.3313335180282593,
      "learning_rate": 7.348497328070743e-06,
      "loss": 0.0178,
      "step": 824800
    },
    {
      "epoch": 1.3498360205023467,
      "grad_norm": 0.33745449781417847,
      "learning_rate": 7.348431435857225e-06,
      "loss": 0.0315,
      "step": 824820
    },
    {
      "epoch": 1.3498687509410001,
      "grad_norm": 0.7733927369117737,
      "learning_rate": 7.3483655436437085e-06,
      "loss": 0.0212,
      "step": 824840
    },
    {
      "epoch": 1.3499014813796535,
      "grad_norm": 0.21528252959251404,
      "learning_rate": 7.34829965143019e-06,
      "loss": 0.0219,
      "step": 824860
    },
    {
      "epoch": 1.3499342118183069,
      "grad_norm": 1.1050975322723389,
      "learning_rate": 7.348233759216674e-06,
      "loss": 0.0332,
      "step": 824880
    },
    {
      "epoch": 1.3499669422569602,
      "grad_norm": 1.7120389938354492,
      "learning_rate": 7.348167867003157e-06,
      "loss": 0.0281,
      "step": 824900
    },
    {
      "epoch": 1.3499996726956134,
      "grad_norm": 0.40050008893013,
      "learning_rate": 7.3481019747896394e-06,
      "loss": 0.0185,
      "step": 824920
    },
    {
      "epoch": 1.3500324031342668,
      "grad_norm": 0.21215984225273132,
      "learning_rate": 7.348036082576122e-06,
      "loss": 0.019,
      "step": 824940
    },
    {
      "epoch": 1.3500651335729201,
      "grad_norm": 0.5164628028869629,
      "learning_rate": 7.347970190362606e-06,
      "loss": 0.0253,
      "step": 824960
    },
    {
      "epoch": 1.3500978640115735,
      "grad_norm": 0.15335087478160858,
      "learning_rate": 7.347904298149088e-06,
      "loss": 0.0179,
      "step": 824980
    },
    {
      "epoch": 1.3501305944502269,
      "grad_norm": 0.13215932250022888,
      "learning_rate": 7.347838405935571e-06,
      "loss": 0.0321,
      "step": 825000
    },
    {
      "epoch": 1.35016332488888,
      "grad_norm": 0.433078408241272,
      "learning_rate": 7.347772513722055e-06,
      "loss": 0.0239,
      "step": 825020
    },
    {
      "epoch": 1.3501960553275336,
      "grad_norm": 1.2124751806259155,
      "learning_rate": 7.347706621508537e-06,
      "loss": 0.0202,
      "step": 825040
    },
    {
      "epoch": 1.3502287857661868,
      "grad_norm": 0.47580528259277344,
      "learning_rate": 7.34764072929502e-06,
      "loss": 0.0219,
      "step": 825060
    },
    {
      "epoch": 1.3502615162048401,
      "grad_norm": 0.6534553170204163,
      "learning_rate": 7.347574837081502e-06,
      "loss": 0.035,
      "step": 825080
    },
    {
      "epoch": 1.3502942466434935,
      "grad_norm": 0.7027201652526855,
      "learning_rate": 7.347508944867986e-06,
      "loss": 0.02,
      "step": 825100
    },
    {
      "epoch": 1.3503269770821469,
      "grad_norm": 1.0930163860321045,
      "learning_rate": 7.3474430526544685e-06,
      "loss": 0.0309,
      "step": 825120
    },
    {
      "epoch": 1.3503597075208003,
      "grad_norm": 1.254679560661316,
      "learning_rate": 7.347377160440951e-06,
      "loss": 0.0232,
      "step": 825140
    },
    {
      "epoch": 1.3503924379594534,
      "grad_norm": 1.2264676094055176,
      "learning_rate": 7.347311268227434e-06,
      "loss": 0.0195,
      "step": 825160
    },
    {
      "epoch": 1.350425168398107,
      "grad_norm": 1.3179833889007568,
      "learning_rate": 7.347245376013918e-06,
      "loss": 0.0292,
      "step": 825180
    },
    {
      "epoch": 1.3504578988367602,
      "grad_norm": 0.20861448347568512,
      "learning_rate": 7.3471794838003995e-06,
      "loss": 0.0275,
      "step": 825200
    },
    {
      "epoch": 1.3504906292754135,
      "grad_norm": 0.4247858226299286,
      "learning_rate": 7.347113591586883e-06,
      "loss": 0.0222,
      "step": 825220
    },
    {
      "epoch": 1.350523359714067,
      "grad_norm": 0.3681603670120239,
      "learning_rate": 7.347047699373365e-06,
      "loss": 0.0204,
      "step": 825240
    },
    {
      "epoch": 1.3505560901527203,
      "grad_norm": 0.6067835688591003,
      "learning_rate": 7.3469818071598486e-06,
      "loss": 0.0248,
      "step": 825260
    },
    {
      "epoch": 1.3505888205913736,
      "grad_norm": 0.5356314778327942,
      "learning_rate": 7.346915914946331e-06,
      "loss": 0.0196,
      "step": 825280
    },
    {
      "epoch": 1.3506215510300268,
      "grad_norm": 1.509475588798523,
      "learning_rate": 7.346850022732814e-06,
      "loss": 0.0193,
      "step": 825300
    },
    {
      "epoch": 1.3506542814686802,
      "grad_norm": 0.5988844633102417,
      "learning_rate": 7.346784130519297e-06,
      "loss": 0.0232,
      "step": 825320
    },
    {
      "epoch": 1.3506870119073335,
      "grad_norm": 3.8708245754241943,
      "learning_rate": 7.34671823830578e-06,
      "loss": 0.022,
      "step": 825340
    },
    {
      "epoch": 1.350719742345987,
      "grad_norm": 0.89693284034729,
      "learning_rate": 7.346652346092263e-06,
      "loss": 0.0229,
      "step": 825360
    },
    {
      "epoch": 1.3507524727846403,
      "grad_norm": 0.8099498748779297,
      "learning_rate": 7.346586453878746e-06,
      "loss": 0.0161,
      "step": 825380
    },
    {
      "epoch": 1.3507852032232937,
      "grad_norm": 0.2790960371494293,
      "learning_rate": 7.3465205616652294e-06,
      "loss": 0.0216,
      "step": 825400
    },
    {
      "epoch": 1.350817933661947,
      "grad_norm": 0.7752079367637634,
      "learning_rate": 7.346454669451711e-06,
      "loss": 0.0218,
      "step": 825420
    },
    {
      "epoch": 1.3508506641006002,
      "grad_norm": 0.45528456568717957,
      "learning_rate": 7.346388777238195e-06,
      "loss": 0.0281,
      "step": 825440
    },
    {
      "epoch": 1.3508833945392535,
      "grad_norm": 0.1879008710384369,
      "learning_rate": 7.346322885024677e-06,
      "loss": 0.0247,
      "step": 825460
    },
    {
      "epoch": 1.350916124977907,
      "grad_norm": 0.6667335033416748,
      "learning_rate": 7.34625699281116e-06,
      "loss": 0.0268,
      "step": 825480
    },
    {
      "epoch": 1.3509488554165603,
      "grad_norm": 0.5414820313453674,
      "learning_rate": 7.346191100597643e-06,
      "loss": 0.0249,
      "step": 825500
    },
    {
      "epoch": 1.3509815858552137,
      "grad_norm": 0.6252769231796265,
      "learning_rate": 7.346125208384126e-06,
      "loss": 0.0207,
      "step": 825520
    },
    {
      "epoch": 1.351014316293867,
      "grad_norm": 1.0518919229507446,
      "learning_rate": 7.346059316170609e-06,
      "loss": 0.0174,
      "step": 825540
    },
    {
      "epoch": 1.3510470467325204,
      "grad_norm": 0.5215915441513062,
      "learning_rate": 7.345993423957092e-06,
      "loss": 0.0214,
      "step": 825560
    },
    {
      "epoch": 1.3510797771711736,
      "grad_norm": 0.6474192142486572,
      "learning_rate": 7.345927531743574e-06,
      "loss": 0.0175,
      "step": 825580
    },
    {
      "epoch": 1.351112507609827,
      "grad_norm": 1.549842119216919,
      "learning_rate": 7.345861639530058e-06,
      "loss": 0.0179,
      "step": 825600
    },
    {
      "epoch": 1.3511452380484803,
      "grad_norm": 0.10531580448150635,
      "learning_rate": 7.3457957473165396e-06,
      "loss": 0.0169,
      "step": 825620
    },
    {
      "epoch": 1.3511779684871337,
      "grad_norm": 0.1125035509467125,
      "learning_rate": 7.345729855103023e-06,
      "loss": 0.0235,
      "step": 825640
    },
    {
      "epoch": 1.351210698925787,
      "grad_norm": 0.42776262760162354,
      "learning_rate": 7.345663962889505e-06,
      "loss": 0.0212,
      "step": 825660
    },
    {
      "epoch": 1.3512434293644404,
      "grad_norm": 0.3746814727783203,
      "learning_rate": 7.345598070675989e-06,
      "loss": 0.0242,
      "step": 825680
    },
    {
      "epoch": 1.3512761598030938,
      "grad_norm": 0.47710004448890686,
      "learning_rate": 7.345532178462472e-06,
      "loss": 0.0202,
      "step": 825700
    },
    {
      "epoch": 1.351308890241747,
      "grad_norm": 0.9824322462081909,
      "learning_rate": 7.345466286248954e-06,
      "loss": 0.0294,
      "step": 825720
    },
    {
      "epoch": 1.3513416206804003,
      "grad_norm": 0.9315184354782104,
      "learning_rate": 7.345400394035438e-06,
      "loss": 0.0133,
      "step": 825740
    },
    {
      "epoch": 1.3513743511190537,
      "grad_norm": 0.8203189969062805,
      "learning_rate": 7.3453345018219204e-06,
      "loss": 0.0199,
      "step": 825760
    },
    {
      "epoch": 1.351407081557707,
      "grad_norm": 0.4871158003807068,
      "learning_rate": 7.345268609608403e-06,
      "loss": 0.022,
      "step": 825780
    },
    {
      "epoch": 1.3514398119963604,
      "grad_norm": 0.6580042243003845,
      "learning_rate": 7.345202717394886e-06,
      "loss": 0.016,
      "step": 825800
    },
    {
      "epoch": 1.3514725424350136,
      "grad_norm": 0.42650318145751953,
      "learning_rate": 7.3451368251813695e-06,
      "loss": 0.0217,
      "step": 825820
    },
    {
      "epoch": 1.3515052728736672,
      "grad_norm": 0.8326805233955383,
      "learning_rate": 7.345070932967851e-06,
      "loss": 0.0174,
      "step": 825840
    },
    {
      "epoch": 1.3515380033123203,
      "grad_norm": 0.45493772625923157,
      "learning_rate": 7.345005040754335e-06,
      "loss": 0.0192,
      "step": 825860
    },
    {
      "epoch": 1.3515707337509737,
      "grad_norm": 1.0296682119369507,
      "learning_rate": 7.344939148540817e-06,
      "loss": 0.0165,
      "step": 825880
    },
    {
      "epoch": 1.351603464189627,
      "grad_norm": 0.42599886655807495,
      "learning_rate": 7.3448732563273005e-06,
      "loss": 0.0215,
      "step": 825900
    },
    {
      "epoch": 1.3516361946282804,
      "grad_norm": 1.0468666553497314,
      "learning_rate": 7.344807364113783e-06,
      "loss": 0.0217,
      "step": 825920
    },
    {
      "epoch": 1.3516689250669338,
      "grad_norm": 0.2423245757818222,
      "learning_rate": 7.344741471900266e-06,
      "loss": 0.0216,
      "step": 825940
    },
    {
      "epoch": 1.351701655505587,
      "grad_norm": 0.40885627269744873,
      "learning_rate": 7.344675579686749e-06,
      "loss": 0.0237,
      "step": 825960
    },
    {
      "epoch": 1.3517343859442406,
      "grad_norm": 0.6703554391860962,
      "learning_rate": 7.344609687473232e-06,
      "loss": 0.0186,
      "step": 825980
    },
    {
      "epoch": 1.3517671163828937,
      "grad_norm": 0.39827629923820496,
      "learning_rate": 7.344543795259714e-06,
      "loss": 0.0201,
      "step": 826000
    },
    {
      "epoch": 1.351799846821547,
      "grad_norm": 0.5883249044418335,
      "learning_rate": 7.344477903046198e-06,
      "loss": 0.023,
      "step": 826020
    },
    {
      "epoch": 1.3518325772602005,
      "grad_norm": 0.22379717230796814,
      "learning_rate": 7.34441201083268e-06,
      "loss": 0.0178,
      "step": 826040
    },
    {
      "epoch": 1.3518653076988538,
      "grad_norm": 1.1527774333953857,
      "learning_rate": 7.344346118619163e-06,
      "loss": 0.0235,
      "step": 826060
    },
    {
      "epoch": 1.3518980381375072,
      "grad_norm": 0.6008167862892151,
      "learning_rate": 7.344280226405647e-06,
      "loss": 0.0263,
      "step": 826080
    },
    {
      "epoch": 1.3519307685761603,
      "grad_norm": 0.24959339201450348,
      "learning_rate": 7.344214334192129e-06,
      "loss": 0.0257,
      "step": 826100
    },
    {
      "epoch": 1.3519634990148137,
      "grad_norm": 0.40480223298072815,
      "learning_rate": 7.344148441978612e-06,
      "loss": 0.0197,
      "step": 826120
    },
    {
      "epoch": 1.351996229453467,
      "grad_norm": 0.20524567365646362,
      "learning_rate": 7.344082549765095e-06,
      "loss": 0.0231,
      "step": 826140
    },
    {
      "epoch": 1.3520289598921205,
      "grad_norm": 0.10796722024679184,
      "learning_rate": 7.344016657551578e-06,
      "loss": 0.0215,
      "step": 826160
    },
    {
      "epoch": 1.3520616903307738,
      "grad_norm": 0.9207754135131836,
      "learning_rate": 7.3439507653380605e-06,
      "loss": 0.0263,
      "step": 826180
    },
    {
      "epoch": 1.3520944207694272,
      "grad_norm": 0.36183303594589233,
      "learning_rate": 7.343884873124544e-06,
      "loss": 0.0208,
      "step": 826200
    },
    {
      "epoch": 1.3521271512080806,
      "grad_norm": 0.4807127118110657,
      "learning_rate": 7.343818980911026e-06,
      "loss": 0.0178,
      "step": 826220
    },
    {
      "epoch": 1.3521598816467337,
      "grad_norm": 0.5709972381591797,
      "learning_rate": 7.34375308869751e-06,
      "loss": 0.0219,
      "step": 826240
    },
    {
      "epoch": 1.352192612085387,
      "grad_norm": 0.5522932410240173,
      "learning_rate": 7.3436871964839915e-06,
      "loss": 0.0211,
      "step": 826260
    },
    {
      "epoch": 1.3522253425240405,
      "grad_norm": 1.2596845626831055,
      "learning_rate": 7.343621304270475e-06,
      "loss": 0.0237,
      "step": 826280
    },
    {
      "epoch": 1.3522580729626938,
      "grad_norm": 1.1500321626663208,
      "learning_rate": 7.343555412056958e-06,
      "loss": 0.0235,
      "step": 826300
    },
    {
      "epoch": 1.3522908034013472,
      "grad_norm": 1.3733431100845337,
      "learning_rate": 7.3434895198434405e-06,
      "loss": 0.0323,
      "step": 826320
    },
    {
      "epoch": 1.3523235338400006,
      "grad_norm": 1.0077024698257446,
      "learning_rate": 7.343423627629923e-06,
      "loss": 0.0235,
      "step": 826340
    },
    {
      "epoch": 1.352356264278654,
      "grad_norm": 0.34303396940231323,
      "learning_rate": 7.343357735416407e-06,
      "loss": 0.0289,
      "step": 826360
    },
    {
      "epoch": 1.3523889947173071,
      "grad_norm": 0.23433667421340942,
      "learning_rate": 7.343291843202889e-06,
      "loss": 0.0195,
      "step": 826380
    },
    {
      "epoch": 1.3524217251559605,
      "grad_norm": 0.6354859471321106,
      "learning_rate": 7.343225950989372e-06,
      "loss": 0.0231,
      "step": 826400
    },
    {
      "epoch": 1.3524544555946139,
      "grad_norm": 0.8578889966011047,
      "learning_rate": 7.343160058775856e-06,
      "loss": 0.0306,
      "step": 826420
    },
    {
      "epoch": 1.3524871860332672,
      "grad_norm": 0.2971563935279846,
      "learning_rate": 7.343094166562338e-06,
      "loss": 0.016,
      "step": 826440
    },
    {
      "epoch": 1.3525199164719206,
      "grad_norm": 1.1360797882080078,
      "learning_rate": 7.343028274348821e-06,
      "loss": 0.0231,
      "step": 826460
    },
    {
      "epoch": 1.352552646910574,
      "grad_norm": 1.1673519611358643,
      "learning_rate": 7.342962382135303e-06,
      "loss": 0.0237,
      "step": 826480
    },
    {
      "epoch": 1.3525853773492273,
      "grad_norm": 0.707858145236969,
      "learning_rate": 7.342896489921787e-06,
      "loss": 0.0273,
      "step": 826500
    },
    {
      "epoch": 1.3526181077878805,
      "grad_norm": 1.07064688205719,
      "learning_rate": 7.34283059770827e-06,
      "loss": 0.0231,
      "step": 826520
    },
    {
      "epoch": 1.3526508382265339,
      "grad_norm": 0.7232832312583923,
      "learning_rate": 7.342764705494752e-06,
      "loss": 0.0236,
      "step": 826540
    },
    {
      "epoch": 1.3526835686651872,
      "grad_norm": 0.39214879274368286,
      "learning_rate": 7.342698813281235e-06,
      "loss": 0.0259,
      "step": 826560
    },
    {
      "epoch": 1.3527162991038406,
      "grad_norm": 0.4542407691478729,
      "learning_rate": 7.342632921067719e-06,
      "loss": 0.0205,
      "step": 826580
    },
    {
      "epoch": 1.352749029542494,
      "grad_norm": 0.33405596017837524,
      "learning_rate": 7.342567028854201e-06,
      "loss": 0.0198,
      "step": 826600
    },
    {
      "epoch": 1.3527817599811471,
      "grad_norm": 0.6654362082481384,
      "learning_rate": 7.342501136640684e-06,
      "loss": 0.0246,
      "step": 826620
    },
    {
      "epoch": 1.3528144904198007,
      "grad_norm": 0.3312356472015381,
      "learning_rate": 7.342435244427166e-06,
      "loss": 0.0273,
      "step": 826640
    },
    {
      "epoch": 1.3528472208584539,
      "grad_norm": 0.8546874523162842,
      "learning_rate": 7.34236935221365e-06,
      "loss": 0.0234,
      "step": 826660
    },
    {
      "epoch": 1.3528799512971073,
      "grad_norm": 0.31427016854286194,
      "learning_rate": 7.3423034600001315e-06,
      "loss": 0.0185,
      "step": 826680
    },
    {
      "epoch": 1.3529126817357606,
      "grad_norm": 1.0737272500991821,
      "learning_rate": 7.342237567786615e-06,
      "loss": 0.0243,
      "step": 826700
    },
    {
      "epoch": 1.352945412174414,
      "grad_norm": 0.21058721840381622,
      "learning_rate": 7.342171675573098e-06,
      "loss": 0.0156,
      "step": 826720
    },
    {
      "epoch": 1.3529781426130674,
      "grad_norm": 3.0801846981048584,
      "learning_rate": 7.342105783359581e-06,
      "loss": 0.0161,
      "step": 826740
    },
    {
      "epoch": 1.3530108730517205,
      "grad_norm": 0.793768048286438,
      "learning_rate": 7.342039891146064e-06,
      "loss": 0.023,
      "step": 826760
    },
    {
      "epoch": 1.3530436034903741,
      "grad_norm": 0.9070539474487305,
      "learning_rate": 7.341973998932547e-06,
      "loss": 0.0236,
      "step": 826780
    },
    {
      "epoch": 1.3530763339290273,
      "grad_norm": 0.19393029808998108,
      "learning_rate": 7.34190810671903e-06,
      "loss": 0.0122,
      "step": 826800
    },
    {
      "epoch": 1.3531090643676806,
      "grad_norm": 1.1707994937896729,
      "learning_rate": 7.341842214505512e-06,
      "loss": 0.0192,
      "step": 826820
    },
    {
      "epoch": 1.353141794806334,
      "grad_norm": 0.10995753109455109,
      "learning_rate": 7.341776322291996e-06,
      "loss": 0.0255,
      "step": 826840
    },
    {
      "epoch": 1.3531745252449874,
      "grad_norm": 0.8692784309387207,
      "learning_rate": 7.341710430078478e-06,
      "loss": 0.0179,
      "step": 826860
    },
    {
      "epoch": 1.3532072556836408,
      "grad_norm": 0.4894607663154602,
      "learning_rate": 7.3416445378649615e-06,
      "loss": 0.027,
      "step": 826880
    },
    {
      "epoch": 1.353239986122294,
      "grad_norm": 1.424049973487854,
      "learning_rate": 7.341578645651443e-06,
      "loss": 0.02,
      "step": 826900
    },
    {
      "epoch": 1.3532727165609473,
      "grad_norm": 0.23684749007225037,
      "learning_rate": 7.341512753437927e-06,
      "loss": 0.0207,
      "step": 826920
    },
    {
      "epoch": 1.3533054469996006,
      "grad_norm": 2.134791612625122,
      "learning_rate": 7.34144686122441e-06,
      "loss": 0.0194,
      "step": 826940
    },
    {
      "epoch": 1.353338177438254,
      "grad_norm": 0.36773452162742615,
      "learning_rate": 7.3413809690108924e-06,
      "loss": 0.0159,
      "step": 826960
    },
    {
      "epoch": 1.3533709078769074,
      "grad_norm": 0.7006668448448181,
      "learning_rate": 7.341315076797375e-06,
      "loss": 0.0168,
      "step": 826980
    },
    {
      "epoch": 1.3534036383155608,
      "grad_norm": 0.2749009132385254,
      "learning_rate": 7.341249184583859e-06,
      "loss": 0.0303,
      "step": 827000
    },
    {
      "epoch": 1.3534363687542141,
      "grad_norm": 0.30498936772346497,
      "learning_rate": 7.341183292370341e-06,
      "loss": 0.0229,
      "step": 827020
    },
    {
      "epoch": 1.3534690991928673,
      "grad_norm": 0.25889456272125244,
      "learning_rate": 7.341117400156824e-06,
      "loss": 0.0152,
      "step": 827040
    },
    {
      "epoch": 1.3535018296315207,
      "grad_norm": 2.090358257293701,
      "learning_rate": 7.341051507943306e-06,
      "loss": 0.0193,
      "step": 827060
    },
    {
      "epoch": 1.353534560070174,
      "grad_norm": 1.2855241298675537,
      "learning_rate": 7.34098561572979e-06,
      "loss": 0.0318,
      "step": 827080
    },
    {
      "epoch": 1.3535672905088274,
      "grad_norm": 1.3641998767852783,
      "learning_rate": 7.3409197235162725e-06,
      "loss": 0.0259,
      "step": 827100
    },
    {
      "epoch": 1.3536000209474808,
      "grad_norm": 0.12901858985424042,
      "learning_rate": 7.340853831302755e-06,
      "loss": 0.0144,
      "step": 827120
    },
    {
      "epoch": 1.3536327513861341,
      "grad_norm": 0.8197681903839111,
      "learning_rate": 7.340787939089239e-06,
      "loss": 0.0217,
      "step": 827140
    },
    {
      "epoch": 1.3536654818247875,
      "grad_norm": 1.559266209602356,
      "learning_rate": 7.3407220468757215e-06,
      "loss": 0.0211,
      "step": 827160
    },
    {
      "epoch": 1.3536982122634407,
      "grad_norm": 0.9574376344680786,
      "learning_rate": 7.340656154662204e-06,
      "loss": 0.0186,
      "step": 827180
    },
    {
      "epoch": 1.353730942702094,
      "grad_norm": 0.6406525373458862,
      "learning_rate": 7.340590262448687e-06,
      "loss": 0.0296,
      "step": 827200
    },
    {
      "epoch": 1.3537636731407474,
      "grad_norm": 0.6974871158599854,
      "learning_rate": 7.340524370235171e-06,
      "loss": 0.0204,
      "step": 827220
    },
    {
      "epoch": 1.3537964035794008,
      "grad_norm": 0.5922457575798035,
      "learning_rate": 7.3404584780216525e-06,
      "loss": 0.0258,
      "step": 827240
    },
    {
      "epoch": 1.3538291340180542,
      "grad_norm": 0.4544793665409088,
      "learning_rate": 7.340392585808136e-06,
      "loss": 0.0173,
      "step": 827260
    },
    {
      "epoch": 1.3538618644567075,
      "grad_norm": 0.6623879075050354,
      "learning_rate": 7.340326693594618e-06,
      "loss": 0.0193,
      "step": 827280
    },
    {
      "epoch": 1.353894594895361,
      "grad_norm": 0.2964688539505005,
      "learning_rate": 7.3402608013811016e-06,
      "loss": 0.0229,
      "step": 827300
    },
    {
      "epoch": 1.353927325334014,
      "grad_norm": 0.2397477626800537,
      "learning_rate": 7.340194909167584e-06,
      "loss": 0.0222,
      "step": 827320
    },
    {
      "epoch": 1.3539600557726674,
      "grad_norm": 1.5002847909927368,
      "learning_rate": 7.340129016954067e-06,
      "loss": 0.028,
      "step": 827340
    },
    {
      "epoch": 1.3539927862113208,
      "grad_norm": 0.7148531079292297,
      "learning_rate": 7.34006312474055e-06,
      "loss": 0.0232,
      "step": 827360
    },
    {
      "epoch": 1.3540255166499742,
      "grad_norm": 0.752482533454895,
      "learning_rate": 7.339997232527033e-06,
      "loss": 0.0169,
      "step": 827380
    },
    {
      "epoch": 1.3540582470886275,
      "grad_norm": 0.6076574325561523,
      "learning_rate": 7.339931340313515e-06,
      "loss": 0.0218,
      "step": 827400
    },
    {
      "epoch": 1.3540909775272807,
      "grad_norm": 1.0936579704284668,
      "learning_rate": 7.339865448099999e-06,
      "loss": 0.0252,
      "step": 827420
    },
    {
      "epoch": 1.3541237079659343,
      "grad_norm": 1.1919214725494385,
      "learning_rate": 7.339799555886481e-06,
      "loss": 0.0192,
      "step": 827440
    },
    {
      "epoch": 1.3541564384045874,
      "grad_norm": 1.742675542831421,
      "learning_rate": 7.339733663672964e-06,
      "loss": 0.0239,
      "step": 827460
    },
    {
      "epoch": 1.3541891688432408,
      "grad_norm": 1.3580130338668823,
      "learning_rate": 7.339667771459448e-06,
      "loss": 0.0172,
      "step": 827480
    },
    {
      "epoch": 1.3542218992818942,
      "grad_norm": 0.45801976323127747,
      "learning_rate": 7.33960187924593e-06,
      "loss": 0.0217,
      "step": 827500
    },
    {
      "epoch": 1.3542546297205476,
      "grad_norm": 0.33167439699172974,
      "learning_rate": 7.339535987032413e-06,
      "loss": 0.018,
      "step": 827520
    },
    {
      "epoch": 1.354287360159201,
      "grad_norm": 0.28157028555870056,
      "learning_rate": 7.339470094818896e-06,
      "loss": 0.0145,
      "step": 827540
    },
    {
      "epoch": 1.354320090597854,
      "grad_norm": 0.7987347841262817,
      "learning_rate": 7.339404202605379e-06,
      "loss": 0.0217,
      "step": 827560
    },
    {
      "epoch": 1.3543528210365074,
      "grad_norm": 0.712142288684845,
      "learning_rate": 7.339338310391862e-06,
      "loss": 0.0181,
      "step": 827580
    },
    {
      "epoch": 1.3543855514751608,
      "grad_norm": 0.10762429237365723,
      "learning_rate": 7.339272418178345e-06,
      "loss": 0.0308,
      "step": 827600
    },
    {
      "epoch": 1.3544182819138142,
      "grad_norm": 0.18993660807609558,
      "learning_rate": 7.339206525964827e-06,
      "loss": 0.0263,
      "step": 827620
    },
    {
      "epoch": 1.3544510123524676,
      "grad_norm": 0.6750544309616089,
      "learning_rate": 7.339140633751311e-06,
      "loss": 0.0204,
      "step": 827640
    },
    {
      "epoch": 1.354483742791121,
      "grad_norm": 2.289294481277466,
      "learning_rate": 7.3390747415377926e-06,
      "loss": 0.0262,
      "step": 827660
    },
    {
      "epoch": 1.3545164732297743,
      "grad_norm": 2.2752227783203125,
      "learning_rate": 7.339008849324276e-06,
      "loss": 0.0275,
      "step": 827680
    },
    {
      "epoch": 1.3545492036684275,
      "grad_norm": 0.557390570640564,
      "learning_rate": 7.338942957110758e-06,
      "loss": 0.0213,
      "step": 827700
    },
    {
      "epoch": 1.3545819341070808,
      "grad_norm": 0.12802377343177795,
      "learning_rate": 7.338877064897242e-06,
      "loss": 0.0215,
      "step": 827720
    },
    {
      "epoch": 1.3546146645457342,
      "grad_norm": 0.38603532314300537,
      "learning_rate": 7.338811172683724e-06,
      "loss": 0.0195,
      "step": 827740
    },
    {
      "epoch": 1.3546473949843876,
      "grad_norm": 0.3249644339084625,
      "learning_rate": 7.338745280470207e-06,
      "loss": 0.023,
      "step": 827760
    },
    {
      "epoch": 1.354680125423041,
      "grad_norm": 0.4834858477115631,
      "learning_rate": 7.33867938825669e-06,
      "loss": 0.0149,
      "step": 827780
    },
    {
      "epoch": 1.3547128558616943,
      "grad_norm": 0.5722035765647888,
      "learning_rate": 7.3386134960431734e-06,
      "loss": 0.015,
      "step": 827800
    },
    {
      "epoch": 1.3547455863003477,
      "grad_norm": 0.8116837739944458,
      "learning_rate": 7.338547603829656e-06,
      "loss": 0.0187,
      "step": 827820
    },
    {
      "epoch": 1.3547783167390008,
      "grad_norm": 0.43964052200317383,
      "learning_rate": 7.338481711616139e-06,
      "loss": 0.0219,
      "step": 827840
    },
    {
      "epoch": 1.3548110471776542,
      "grad_norm": 0.23646347224712372,
      "learning_rate": 7.3384158194026225e-06,
      "loss": 0.0283,
      "step": 827860
    },
    {
      "epoch": 1.3548437776163076,
      "grad_norm": 0.4901217222213745,
      "learning_rate": 7.338349927189104e-06,
      "loss": 0.0269,
      "step": 827880
    },
    {
      "epoch": 1.354876508054961,
      "grad_norm": 1.260980486869812,
      "learning_rate": 7.338284034975588e-06,
      "loss": 0.0235,
      "step": 827900
    },
    {
      "epoch": 1.3549092384936143,
      "grad_norm": 0.975141704082489,
      "learning_rate": 7.33821814276207e-06,
      "loss": 0.0173,
      "step": 827920
    },
    {
      "epoch": 1.3549419689322677,
      "grad_norm": 0.9064383506774902,
      "learning_rate": 7.3381522505485535e-06,
      "loss": 0.0308,
      "step": 827940
    },
    {
      "epoch": 1.354974699370921,
      "grad_norm": 0.7283821105957031,
      "learning_rate": 7.338086358335036e-06,
      "loss": 0.0305,
      "step": 827960
    },
    {
      "epoch": 1.3550074298095742,
      "grad_norm": 0.34101882576942444,
      "learning_rate": 7.338020466121519e-06,
      "loss": 0.0171,
      "step": 827980
    },
    {
      "epoch": 1.3550401602482276,
      "grad_norm": 0.10117388516664505,
      "learning_rate": 7.337954573908002e-06,
      "loss": 0.0147,
      "step": 828000
    },
    {
      "epoch": 1.355072890686881,
      "grad_norm": 0.9677451252937317,
      "learning_rate": 7.337888681694485e-06,
      "loss": 0.0357,
      "step": 828020
    },
    {
      "epoch": 1.3551056211255343,
      "grad_norm": 1.3664528131484985,
      "learning_rate": 7.337822789480967e-06,
      "loss": 0.0199,
      "step": 828040
    },
    {
      "epoch": 1.3551383515641877,
      "grad_norm": 1.2614957094192505,
      "learning_rate": 7.337756897267451e-06,
      "loss": 0.0202,
      "step": 828060
    },
    {
      "epoch": 1.3551710820028409,
      "grad_norm": 0.47987648844718933,
      "learning_rate": 7.337691005053933e-06,
      "loss": 0.0343,
      "step": 828080
    },
    {
      "epoch": 1.3552038124414945,
      "grad_norm": 0.3472639322280884,
      "learning_rate": 7.337625112840416e-06,
      "loss": 0.0154,
      "step": 828100
    },
    {
      "epoch": 1.3552365428801476,
      "grad_norm": 0.2049628049135208,
      "learning_rate": 7.337559220626899e-06,
      "loss": 0.014,
      "step": 828120
    },
    {
      "epoch": 1.355269273318801,
      "grad_norm": 0.06013397499918938,
      "learning_rate": 7.337493328413382e-06,
      "loss": 0.0216,
      "step": 828140
    },
    {
      "epoch": 1.3553020037574544,
      "grad_norm": 0.784035861492157,
      "learning_rate": 7.337427436199865e-06,
      "loss": 0.0216,
      "step": 828160
    },
    {
      "epoch": 1.3553347341961077,
      "grad_norm": 0.3377819359302521,
      "learning_rate": 7.337361543986348e-06,
      "loss": 0.0138,
      "step": 828180
    },
    {
      "epoch": 1.355367464634761,
      "grad_norm": 1.130102276802063,
      "learning_rate": 7.337295651772831e-06,
      "loss": 0.0208,
      "step": 828200
    },
    {
      "epoch": 1.3554001950734142,
      "grad_norm": 0.9427561163902283,
      "learning_rate": 7.3372297595593135e-06,
      "loss": 0.0257,
      "step": 828220
    },
    {
      "epoch": 1.3554329255120678,
      "grad_norm": 0.21650879085063934,
      "learning_rate": 7.337163867345797e-06,
      "loss": 0.0187,
      "step": 828240
    },
    {
      "epoch": 1.355465655950721,
      "grad_norm": 0.2312798947095871,
      "learning_rate": 7.337097975132279e-06,
      "loss": 0.0244,
      "step": 828260
    },
    {
      "epoch": 1.3554983863893744,
      "grad_norm": 0.4240078926086426,
      "learning_rate": 7.337032082918763e-06,
      "loss": 0.0191,
      "step": 828280
    },
    {
      "epoch": 1.3555311168280277,
      "grad_norm": 0.943281352519989,
      "learning_rate": 7.3369661907052445e-06,
      "loss": 0.0207,
      "step": 828300
    },
    {
      "epoch": 1.355563847266681,
      "grad_norm": 1.3115540742874146,
      "learning_rate": 7.336900298491728e-06,
      "loss": 0.0149,
      "step": 828320
    },
    {
      "epoch": 1.3555965777053345,
      "grad_norm": 0.5127220749855042,
      "learning_rate": 7.336834406278211e-06,
      "loss": 0.0144,
      "step": 828340
    },
    {
      "epoch": 1.3556293081439876,
      "grad_norm": 1.3967013359069824,
      "learning_rate": 7.3367685140646935e-06,
      "loss": 0.0189,
      "step": 828360
    },
    {
      "epoch": 1.355662038582641,
      "grad_norm": 0.267643004655838,
      "learning_rate": 7.336702621851176e-06,
      "loss": 0.0163,
      "step": 828380
    },
    {
      "epoch": 1.3556947690212944,
      "grad_norm": 1.2662444114685059,
      "learning_rate": 7.33663672963766e-06,
      "loss": 0.0258,
      "step": 828400
    },
    {
      "epoch": 1.3557274994599477,
      "grad_norm": 0.2831154763698578,
      "learning_rate": 7.336570837424142e-06,
      "loss": 0.0314,
      "step": 828420
    },
    {
      "epoch": 1.3557602298986011,
      "grad_norm": 0.3372044265270233,
      "learning_rate": 7.336504945210625e-06,
      "loss": 0.0214,
      "step": 828440
    },
    {
      "epoch": 1.3557929603372545,
      "grad_norm": 0.5422438383102417,
      "learning_rate": 7.336439052997107e-06,
      "loss": 0.0198,
      "step": 828460
    },
    {
      "epoch": 1.3558256907759079,
      "grad_norm": 0.26835528016090393,
      "learning_rate": 7.336373160783591e-06,
      "loss": 0.0213,
      "step": 828480
    },
    {
      "epoch": 1.355858421214561,
      "grad_norm": 0.9991844892501831,
      "learning_rate": 7.336307268570073e-06,
      "loss": 0.0276,
      "step": 828500
    },
    {
      "epoch": 1.3558911516532144,
      "grad_norm": 0.4932331144809723,
      "learning_rate": 7.336241376356556e-06,
      "loss": 0.0213,
      "step": 828520
    },
    {
      "epoch": 1.3559238820918678,
      "grad_norm": 0.4987089931964874,
      "learning_rate": 7.33617548414304e-06,
      "loss": 0.0276,
      "step": 828540
    },
    {
      "epoch": 1.3559566125305211,
      "grad_norm": 0.29043325781822205,
      "learning_rate": 7.336109591929522e-06,
      "loss": 0.0225,
      "step": 828560
    },
    {
      "epoch": 1.3559893429691745,
      "grad_norm": 1.3295453786849976,
      "learning_rate": 7.336043699716005e-06,
      "loss": 0.0252,
      "step": 828580
    },
    {
      "epoch": 1.3560220734078279,
      "grad_norm": 0.30637726187705994,
      "learning_rate": 7.335977807502488e-06,
      "loss": 0.0193,
      "step": 828600
    },
    {
      "epoch": 1.3560548038464812,
      "grad_norm": 0.7094069123268127,
      "learning_rate": 7.335911915288972e-06,
      "loss": 0.0283,
      "step": 828620
    },
    {
      "epoch": 1.3560875342851344,
      "grad_norm": 0.29738399386405945,
      "learning_rate": 7.335846023075454e-06,
      "loss": 0.0224,
      "step": 828640
    },
    {
      "epoch": 1.3561202647237878,
      "grad_norm": 0.5061675906181335,
      "learning_rate": 7.335780130861937e-06,
      "loss": 0.0188,
      "step": 828660
    },
    {
      "epoch": 1.3561529951624411,
      "grad_norm": 0.46686235070228577,
      "learning_rate": 7.335714238648419e-06,
      "loss": 0.0227,
      "step": 828680
    },
    {
      "epoch": 1.3561857256010945,
      "grad_norm": 1.584657907485962,
      "learning_rate": 7.335648346434903e-06,
      "loss": 0.0161,
      "step": 828700
    },
    {
      "epoch": 1.3562184560397479,
      "grad_norm": 1.4400300979614258,
      "learning_rate": 7.3355824542213846e-06,
      "loss": 0.0222,
      "step": 828720
    },
    {
      "epoch": 1.3562511864784013,
      "grad_norm": 0.27713173627853394,
      "learning_rate": 7.335516562007868e-06,
      "loss": 0.0219,
      "step": 828740
    },
    {
      "epoch": 1.3562839169170546,
      "grad_norm": 0.8165292143821716,
      "learning_rate": 7.335450669794351e-06,
      "loss": 0.0311,
      "step": 828760
    },
    {
      "epoch": 1.3563166473557078,
      "grad_norm": 0.6709714531898499,
      "learning_rate": 7.335384777580834e-06,
      "loss": 0.0209,
      "step": 828780
    },
    {
      "epoch": 1.3563493777943612,
      "grad_norm": 0.2407093048095703,
      "learning_rate": 7.335318885367316e-06,
      "loss": 0.0231,
      "step": 828800
    },
    {
      "epoch": 1.3563821082330145,
      "grad_norm": 0.23240041732788086,
      "learning_rate": 7.3352529931538e-06,
      "loss": 0.0166,
      "step": 828820
    },
    {
      "epoch": 1.356414838671668,
      "grad_norm": 1.4938186407089233,
      "learning_rate": 7.335187100940282e-06,
      "loss": 0.0211,
      "step": 828840
    },
    {
      "epoch": 1.3564475691103213,
      "grad_norm": 2.4238038063049316,
      "learning_rate": 7.3351212087267654e-06,
      "loss": 0.0306,
      "step": 828860
    },
    {
      "epoch": 1.3564802995489744,
      "grad_norm": 1.1413382291793823,
      "learning_rate": 7.335055316513249e-06,
      "loss": 0.0183,
      "step": 828880
    },
    {
      "epoch": 1.356513029987628,
      "grad_norm": 0.31400129199028015,
      "learning_rate": 7.334989424299731e-06,
      "loss": 0.0238,
      "step": 828900
    },
    {
      "epoch": 1.3565457604262812,
      "grad_norm": 0.2406003773212433,
      "learning_rate": 7.3349235320862145e-06,
      "loss": 0.0246,
      "step": 828920
    },
    {
      "epoch": 1.3565784908649345,
      "grad_norm": 0.8254604339599609,
      "learning_rate": 7.334857639872696e-06,
      "loss": 0.028,
      "step": 828940
    },
    {
      "epoch": 1.356611221303588,
      "grad_norm": 1.1335417032241821,
      "learning_rate": 7.33479174765918e-06,
      "loss": 0.0181,
      "step": 828960
    },
    {
      "epoch": 1.3566439517422413,
      "grad_norm": 0.36869436502456665,
      "learning_rate": 7.334725855445663e-06,
      "loss": 0.0158,
      "step": 828980
    },
    {
      "epoch": 1.3566766821808947,
      "grad_norm": 0.18231633305549622,
      "learning_rate": 7.3346599632321455e-06,
      "loss": 0.0168,
      "step": 829000
    },
    {
      "epoch": 1.3567094126195478,
      "grad_norm": 0.3040162920951843,
      "learning_rate": 7.334594071018628e-06,
      "loss": 0.0141,
      "step": 829020
    },
    {
      "epoch": 1.3567421430582014,
      "grad_norm": 0.36871081590652466,
      "learning_rate": 7.334528178805112e-06,
      "loss": 0.02,
      "step": 829040
    },
    {
      "epoch": 1.3567748734968545,
      "grad_norm": 0.9488595128059387,
      "learning_rate": 7.334462286591594e-06,
      "loss": 0.0202,
      "step": 829060
    },
    {
      "epoch": 1.356807603935508,
      "grad_norm": 0.9047257304191589,
      "learning_rate": 7.334396394378077e-06,
      "loss": 0.0237,
      "step": 829080
    },
    {
      "epoch": 1.3568403343741613,
      "grad_norm": 1.1905572414398193,
      "learning_rate": 7.334330502164559e-06,
      "loss": 0.0172,
      "step": 829100
    },
    {
      "epoch": 1.3568730648128147,
      "grad_norm": 0.5154994130134583,
      "learning_rate": 7.334264609951043e-06,
      "loss": 0.0214,
      "step": 829120
    },
    {
      "epoch": 1.356905795251468,
      "grad_norm": 0.27914464473724365,
      "learning_rate": 7.3341987177375255e-06,
      "loss": 0.024,
      "step": 829140
    },
    {
      "epoch": 1.3569385256901212,
      "grad_norm": 0.18235208094120026,
      "learning_rate": 7.334132825524008e-06,
      "loss": 0.0218,
      "step": 829160
    },
    {
      "epoch": 1.3569712561287746,
      "grad_norm": 0.9525580406188965,
      "learning_rate": 7.334066933310491e-06,
      "loss": 0.0306,
      "step": 829180
    },
    {
      "epoch": 1.357003986567428,
      "grad_norm": 1.459093451499939,
      "learning_rate": 7.3340010410969745e-06,
      "loss": 0.0237,
      "step": 829200
    },
    {
      "epoch": 1.3570367170060813,
      "grad_norm": 1.114784598350525,
      "learning_rate": 7.333935148883457e-06,
      "loss": 0.0251,
      "step": 829220
    },
    {
      "epoch": 1.3570694474447347,
      "grad_norm": 1.0195649862289429,
      "learning_rate": 7.33386925666994e-06,
      "loss": 0.0234,
      "step": 829240
    },
    {
      "epoch": 1.357102177883388,
      "grad_norm": 1.2730753421783447,
      "learning_rate": 7.333803364456424e-06,
      "loss": 0.0205,
      "step": 829260
    },
    {
      "epoch": 1.3571349083220414,
      "grad_norm": 0.9587484002113342,
      "learning_rate": 7.3337374722429055e-06,
      "loss": 0.0298,
      "step": 829280
    },
    {
      "epoch": 1.3571676387606946,
      "grad_norm": 2.156710147857666,
      "learning_rate": 7.333671580029389e-06,
      "loss": 0.0211,
      "step": 829300
    },
    {
      "epoch": 1.357200369199348,
      "grad_norm": 0.8286769390106201,
      "learning_rate": 7.333605687815871e-06,
      "loss": 0.0199,
      "step": 829320
    },
    {
      "epoch": 1.3572330996380013,
      "grad_norm": 0.44852879643440247,
      "learning_rate": 7.3335397956023546e-06,
      "loss": 0.0181,
      "step": 829340
    },
    {
      "epoch": 1.3572658300766547,
      "grad_norm": 0.29067546129226685,
      "learning_rate": 7.333473903388837e-06,
      "loss": 0.0248,
      "step": 829360
    },
    {
      "epoch": 1.357298560515308,
      "grad_norm": 1.6329457759857178,
      "learning_rate": 7.33340801117532e-06,
      "loss": 0.0185,
      "step": 829380
    },
    {
      "epoch": 1.3573312909539614,
      "grad_norm": 1.373720645904541,
      "learning_rate": 7.333342118961803e-06,
      "loss": 0.0199,
      "step": 829400
    },
    {
      "epoch": 1.3573640213926148,
      "grad_norm": 1.3821089267730713,
      "learning_rate": 7.333276226748286e-06,
      "loss": 0.0181,
      "step": 829420
    },
    {
      "epoch": 1.357396751831268,
      "grad_norm": 0.3206092119216919,
      "learning_rate": 7.333210334534768e-06,
      "loss": 0.0265,
      "step": 829440
    },
    {
      "epoch": 1.3574294822699213,
      "grad_norm": 0.2668797969818115,
      "learning_rate": 7.333144442321252e-06,
      "loss": 0.0149,
      "step": 829460
    },
    {
      "epoch": 1.3574622127085747,
      "grad_norm": 0.7304572463035583,
      "learning_rate": 7.333078550107734e-06,
      "loss": 0.0233,
      "step": 829480
    },
    {
      "epoch": 1.357494943147228,
      "grad_norm": 0.2824839651584625,
      "learning_rate": 7.333012657894217e-06,
      "loss": 0.0172,
      "step": 829500
    },
    {
      "epoch": 1.3575276735858814,
      "grad_norm": 0.9824199676513672,
      "learning_rate": 7.332946765680699e-06,
      "loss": 0.0223,
      "step": 829520
    },
    {
      "epoch": 1.3575604040245348,
      "grad_norm": 0.2677105665206909,
      "learning_rate": 7.332880873467183e-06,
      "loss": 0.0242,
      "step": 829540
    },
    {
      "epoch": 1.3575931344631882,
      "grad_norm": 0.6803525686264038,
      "learning_rate": 7.3328149812536656e-06,
      "loss": 0.0219,
      "step": 829560
    },
    {
      "epoch": 1.3576258649018413,
      "grad_norm": 1.0391844511032104,
      "learning_rate": 7.332749089040148e-06,
      "loss": 0.0198,
      "step": 829580
    },
    {
      "epoch": 1.3576585953404947,
      "grad_norm": 0.18467064201831818,
      "learning_rate": 7.332683196826632e-06,
      "loss": 0.019,
      "step": 829600
    },
    {
      "epoch": 1.357691325779148,
      "grad_norm": 0.3863077461719513,
      "learning_rate": 7.332617304613115e-06,
      "loss": 0.0279,
      "step": 829620
    },
    {
      "epoch": 1.3577240562178015,
      "grad_norm": 0.42020079493522644,
      "learning_rate": 7.332551412399597e-06,
      "loss": 0.0212,
      "step": 829640
    },
    {
      "epoch": 1.3577567866564548,
      "grad_norm": 1.2248188257217407,
      "learning_rate": 7.33248552018608e-06,
      "loss": 0.0223,
      "step": 829660
    },
    {
      "epoch": 1.357789517095108,
      "grad_norm": 0.36138641834259033,
      "learning_rate": 7.332419627972564e-06,
      "loss": 0.0171,
      "step": 829680
    },
    {
      "epoch": 1.3578222475337616,
      "grad_norm": 0.7601759433746338,
      "learning_rate": 7.332353735759046e-06,
      "loss": 0.0248,
      "step": 829700
    },
    {
      "epoch": 1.3578549779724147,
      "grad_norm": 1.4412530660629272,
      "learning_rate": 7.332287843545529e-06,
      "loss": 0.0214,
      "step": 829720
    },
    {
      "epoch": 1.357887708411068,
      "grad_norm": 0.13059091567993164,
      "learning_rate": 7.332221951332011e-06,
      "loss": 0.0228,
      "step": 829740
    },
    {
      "epoch": 1.3579204388497215,
      "grad_norm": 0.722812294960022,
      "learning_rate": 7.332156059118495e-06,
      "loss": 0.0221,
      "step": 829760
    },
    {
      "epoch": 1.3579531692883748,
      "grad_norm": 1.2156949043273926,
      "learning_rate": 7.332090166904977e-06,
      "loss": 0.0258,
      "step": 829780
    },
    {
      "epoch": 1.3579858997270282,
      "grad_norm": 1.0364890098571777,
      "learning_rate": 7.33202427469146e-06,
      "loss": 0.0251,
      "step": 829800
    },
    {
      "epoch": 1.3580186301656814,
      "grad_norm": 1.5015901327133179,
      "learning_rate": 7.331958382477943e-06,
      "loss": 0.0328,
      "step": 829820
    },
    {
      "epoch": 1.358051360604335,
      "grad_norm": 0.20799937844276428,
      "learning_rate": 7.3318924902644265e-06,
      "loss": 0.0215,
      "step": 829840
    },
    {
      "epoch": 1.358084091042988,
      "grad_norm": 0.33632543683052063,
      "learning_rate": 7.331826598050908e-06,
      "loss": 0.0183,
      "step": 829860
    },
    {
      "epoch": 1.3581168214816415,
      "grad_norm": 0.21580910682678223,
      "learning_rate": 7.331760705837392e-06,
      "loss": 0.0234,
      "step": 829880
    },
    {
      "epoch": 1.3581495519202949,
      "grad_norm": 0.630041778087616,
      "learning_rate": 7.331694813623874e-06,
      "loss": 0.0241,
      "step": 829900
    },
    {
      "epoch": 1.3581822823589482,
      "grad_norm": 0.7183650135993958,
      "learning_rate": 7.331628921410357e-06,
      "loss": 0.0179,
      "step": 829920
    },
    {
      "epoch": 1.3582150127976016,
      "grad_norm": 0.37033262848854065,
      "learning_rate": 7.331563029196841e-06,
      "loss": 0.0279,
      "step": 829940
    },
    {
      "epoch": 1.3582477432362547,
      "grad_norm": 0.2619200348854065,
      "learning_rate": 7.331497136983323e-06,
      "loss": 0.017,
      "step": 829960
    },
    {
      "epoch": 1.3582804736749081,
      "grad_norm": 0.897824764251709,
      "learning_rate": 7.3314312447698065e-06,
      "loss": 0.031,
      "step": 829980
    },
    {
      "epoch": 1.3583132041135615,
      "grad_norm": 0.321004182100296,
      "learning_rate": 7.331365352556289e-06,
      "loss": 0.0162,
      "step": 830000
    },
    {
      "epoch": 1.3583459345522149,
      "grad_norm": 0.3332814872264862,
      "learning_rate": 7.331299460342772e-06,
      "loss": 0.0125,
      "step": 830020
    },
    {
      "epoch": 1.3583786649908682,
      "grad_norm": 0.3603711724281311,
      "learning_rate": 7.331233568129255e-06,
      "loss": 0.0176,
      "step": 830040
    },
    {
      "epoch": 1.3584113954295216,
      "grad_norm": 0.9939613342285156,
      "learning_rate": 7.331167675915738e-06,
      "loss": 0.0252,
      "step": 830060
    },
    {
      "epoch": 1.358444125868175,
      "grad_norm": 0.5741596221923828,
      "learning_rate": 7.33110178370222e-06,
      "loss": 0.0234,
      "step": 830080
    },
    {
      "epoch": 1.3584768563068281,
      "grad_norm": 1.5378506183624268,
      "learning_rate": 7.331035891488704e-06,
      "loss": 0.029,
      "step": 830100
    },
    {
      "epoch": 1.3585095867454815,
      "grad_norm": 0.5609950423240662,
      "learning_rate": 7.330969999275186e-06,
      "loss": 0.0216,
      "step": 830120
    },
    {
      "epoch": 1.3585423171841349,
      "grad_norm": 0.605523943901062,
      "learning_rate": 7.330904107061669e-06,
      "loss": 0.0294,
      "step": 830140
    },
    {
      "epoch": 1.3585750476227882,
      "grad_norm": 3.833998918533325,
      "learning_rate": 7.330838214848152e-06,
      "loss": 0.0205,
      "step": 830160
    },
    {
      "epoch": 1.3586077780614416,
      "grad_norm": 0.4033683240413666,
      "learning_rate": 7.330772322634635e-06,
      "loss": 0.0189,
      "step": 830180
    },
    {
      "epoch": 1.358640508500095,
      "grad_norm": 0.534248411655426,
      "learning_rate": 7.3307064304211175e-06,
      "loss": 0.0329,
      "step": 830200
    },
    {
      "epoch": 1.3586732389387484,
      "grad_norm": 0.9807760119438171,
      "learning_rate": 7.330640538207601e-06,
      "loss": 0.0199,
      "step": 830220
    },
    {
      "epoch": 1.3587059693774015,
      "grad_norm": 0.27737298607826233,
      "learning_rate": 7.330574645994083e-06,
      "loss": 0.0141,
      "step": 830240
    },
    {
      "epoch": 1.3587386998160549,
      "grad_norm": 1.1965969800949097,
      "learning_rate": 7.3305087537805665e-06,
      "loss": 0.0271,
      "step": 830260
    },
    {
      "epoch": 1.3587714302547083,
      "grad_norm": 1.2111376523971558,
      "learning_rate": 7.33044286156705e-06,
      "loss": 0.0193,
      "step": 830280
    },
    {
      "epoch": 1.3588041606933616,
      "grad_norm": 0.43206313252449036,
      "learning_rate": 7.330376969353532e-06,
      "loss": 0.0195,
      "step": 830300
    },
    {
      "epoch": 1.358836891132015,
      "grad_norm": 0.44701552391052246,
      "learning_rate": 7.330311077140016e-06,
      "loss": 0.0261,
      "step": 830320
    },
    {
      "epoch": 1.3588696215706682,
      "grad_norm": 0.29489627480506897,
      "learning_rate": 7.3302451849264975e-06,
      "loss": 0.0268,
      "step": 830340
    },
    {
      "epoch": 1.3589023520093217,
      "grad_norm": 0.2941068112850189,
      "learning_rate": 7.330179292712981e-06,
      "loss": 0.013,
      "step": 830360
    },
    {
      "epoch": 1.358935082447975,
      "grad_norm": 0.5254740118980408,
      "learning_rate": 7.330113400499464e-06,
      "loss": 0.0203,
      "step": 830380
    },
    {
      "epoch": 1.3589678128866283,
      "grad_norm": 0.9310428500175476,
      "learning_rate": 7.3300475082859466e-06,
      "loss": 0.016,
      "step": 830400
    },
    {
      "epoch": 1.3590005433252816,
      "grad_norm": 0.8026009202003479,
      "learning_rate": 7.329981616072429e-06,
      "loss": 0.0276,
      "step": 830420
    },
    {
      "epoch": 1.359033273763935,
      "grad_norm": 0.27120447158813477,
      "learning_rate": 7.329915723858913e-06,
      "loss": 0.024,
      "step": 830440
    },
    {
      "epoch": 1.3590660042025884,
      "grad_norm": 0.21988317370414734,
      "learning_rate": 7.329849831645395e-06,
      "loss": 0.0181,
      "step": 830460
    },
    {
      "epoch": 1.3590987346412415,
      "grad_norm": 1.850136160850525,
      "learning_rate": 7.329783939431878e-06,
      "loss": 0.0297,
      "step": 830480
    },
    {
      "epoch": 1.3591314650798951,
      "grad_norm": 0.460331529378891,
      "learning_rate": 7.32971804721836e-06,
      "loss": 0.021,
      "step": 830500
    },
    {
      "epoch": 1.3591641955185483,
      "grad_norm": 0.24772289395332336,
      "learning_rate": 7.329652155004844e-06,
      "loss": 0.0187,
      "step": 830520
    },
    {
      "epoch": 1.3591969259572017,
      "grad_norm": 0.4485289454460144,
      "learning_rate": 7.329586262791326e-06,
      "loss": 0.0262,
      "step": 830540
    },
    {
      "epoch": 1.359229656395855,
      "grad_norm": 0.5061130523681641,
      "learning_rate": 7.329520370577809e-06,
      "loss": 0.0177,
      "step": 830560
    },
    {
      "epoch": 1.3592623868345084,
      "grad_norm": 0.6731061935424805,
      "learning_rate": 7.329454478364292e-06,
      "loss": 0.014,
      "step": 830580
    },
    {
      "epoch": 1.3592951172731618,
      "grad_norm": 0.44695529341697693,
      "learning_rate": 7.329388586150775e-06,
      "loss": 0.0261,
      "step": 830600
    },
    {
      "epoch": 1.359327847711815,
      "grad_norm": 1.576418399810791,
      "learning_rate": 7.3293226939372575e-06,
      "loss": 0.0345,
      "step": 830620
    },
    {
      "epoch": 1.3593605781504683,
      "grad_norm": 0.9569775462150574,
      "learning_rate": 7.329256801723741e-06,
      "loss": 0.0292,
      "step": 830640
    },
    {
      "epoch": 1.3593933085891217,
      "grad_norm": 0.24597923457622528,
      "learning_rate": 7.329190909510224e-06,
      "loss": 0.0227,
      "step": 830660
    },
    {
      "epoch": 1.359426039027775,
      "grad_norm": 0.48554402589797974,
      "learning_rate": 7.329125017296707e-06,
      "loss": 0.0199,
      "step": 830680
    },
    {
      "epoch": 1.3594587694664284,
      "grad_norm": 0.2893633544445038,
      "learning_rate": 7.32905912508319e-06,
      "loss": 0.0267,
      "step": 830700
    },
    {
      "epoch": 1.3594914999050818,
      "grad_norm": 0.3261704444885254,
      "learning_rate": 7.328993232869672e-06,
      "loss": 0.0222,
      "step": 830720
    },
    {
      "epoch": 1.3595242303437352,
      "grad_norm": 0.4315017759799957,
      "learning_rate": 7.328927340656156e-06,
      "loss": 0.0193,
      "step": 830740
    },
    {
      "epoch": 1.3595569607823883,
      "grad_norm": 0.5053759217262268,
      "learning_rate": 7.3288614484426376e-06,
      "loss": 0.0239,
      "step": 830760
    },
    {
      "epoch": 1.3595896912210417,
      "grad_norm": 1.0414279699325562,
      "learning_rate": 7.328795556229121e-06,
      "loss": 0.0258,
      "step": 830780
    },
    {
      "epoch": 1.359622421659695,
      "grad_norm": 1.2258415222167969,
      "learning_rate": 7.328729664015604e-06,
      "loss": 0.0236,
      "step": 830800
    },
    {
      "epoch": 1.3596551520983484,
      "grad_norm": 0.9545414447784424,
      "learning_rate": 7.328663771802087e-06,
      "loss": 0.0212,
      "step": 830820
    },
    {
      "epoch": 1.3596878825370018,
      "grad_norm": 0.5983083844184875,
      "learning_rate": 7.328597879588569e-06,
      "loss": 0.0179,
      "step": 830840
    },
    {
      "epoch": 1.3597206129756552,
      "grad_norm": 1.2091883420944214,
      "learning_rate": 7.328531987375053e-06,
      "loss": 0.027,
      "step": 830860
    },
    {
      "epoch": 1.3597533434143085,
      "grad_norm": 1.359080195426941,
      "learning_rate": 7.328466095161535e-06,
      "loss": 0.0197,
      "step": 830880
    },
    {
      "epoch": 1.3597860738529617,
      "grad_norm": 0.8477963209152222,
      "learning_rate": 7.3284002029480184e-06,
      "loss": 0.02,
      "step": 830900
    },
    {
      "epoch": 1.359818804291615,
      "grad_norm": 0.42642447352409363,
      "learning_rate": 7.3283343107345e-06,
      "loss": 0.0276,
      "step": 830920
    },
    {
      "epoch": 1.3598515347302684,
      "grad_norm": 0.4729485809803009,
      "learning_rate": 7.328268418520984e-06,
      "loss": 0.025,
      "step": 830940
    },
    {
      "epoch": 1.3598842651689218,
      "grad_norm": 1.3013936281204224,
      "learning_rate": 7.328202526307467e-06,
      "loss": 0.0246,
      "step": 830960
    },
    {
      "epoch": 1.3599169956075752,
      "grad_norm": 0.6587364077568054,
      "learning_rate": 7.328136634093949e-06,
      "loss": 0.0177,
      "step": 830980
    },
    {
      "epoch": 1.3599497260462285,
      "grad_norm": 0.4474446177482605,
      "learning_rate": 7.328070741880433e-06,
      "loss": 0.0205,
      "step": 831000
    },
    {
      "epoch": 1.359982456484882,
      "grad_norm": 0.2668999135494232,
      "learning_rate": 7.328004849666916e-06,
      "loss": 0.0164,
      "step": 831020
    },
    {
      "epoch": 1.360015186923535,
      "grad_norm": 1.3109904527664185,
      "learning_rate": 7.3279389574533985e-06,
      "loss": 0.0227,
      "step": 831040
    },
    {
      "epoch": 1.3600479173621884,
      "grad_norm": 1.5802454948425293,
      "learning_rate": 7.327873065239881e-06,
      "loss": 0.0192,
      "step": 831060
    },
    {
      "epoch": 1.3600806478008418,
      "grad_norm": 1.4558186531066895,
      "learning_rate": 7.327807173026365e-06,
      "loss": 0.0208,
      "step": 831080
    },
    {
      "epoch": 1.3601133782394952,
      "grad_norm": 0.9802117943763733,
      "learning_rate": 7.327741280812847e-06,
      "loss": 0.021,
      "step": 831100
    },
    {
      "epoch": 1.3601461086781486,
      "grad_norm": 0.5629663467407227,
      "learning_rate": 7.32767538859933e-06,
      "loss": 0.0178,
      "step": 831120
    },
    {
      "epoch": 1.3601788391168017,
      "grad_norm": 0.40193548798561096,
      "learning_rate": 7.327609496385812e-06,
      "loss": 0.026,
      "step": 831140
    },
    {
      "epoch": 1.3602115695554553,
      "grad_norm": 0.5217581391334534,
      "learning_rate": 7.327543604172296e-06,
      "loss": 0.0155,
      "step": 831160
    },
    {
      "epoch": 1.3602442999941085,
      "grad_norm": 0.10821014642715454,
      "learning_rate": 7.3274777119587785e-06,
      "loss": 0.027,
      "step": 831180
    },
    {
      "epoch": 1.3602770304327618,
      "grad_norm": 0.09690511971712112,
      "learning_rate": 7.327411819745261e-06,
      "loss": 0.0217,
      "step": 831200
    },
    {
      "epoch": 1.3603097608714152,
      "grad_norm": 0.2863497734069824,
      "learning_rate": 7.327345927531744e-06,
      "loss": 0.0182,
      "step": 831220
    },
    {
      "epoch": 1.3603424913100686,
      "grad_norm": 0.7995430827140808,
      "learning_rate": 7.3272800353182276e-06,
      "loss": 0.0329,
      "step": 831240
    },
    {
      "epoch": 1.360375221748722,
      "grad_norm": 0.5515193343162537,
      "learning_rate": 7.3272141431047094e-06,
      "loss": 0.0199,
      "step": 831260
    },
    {
      "epoch": 1.360407952187375,
      "grad_norm": 0.13274016976356506,
      "learning_rate": 7.327148250891193e-06,
      "loss": 0.015,
      "step": 831280
    },
    {
      "epoch": 1.3604406826260287,
      "grad_norm": 0.24580828845500946,
      "learning_rate": 7.327082358677675e-06,
      "loss": 0.0182,
      "step": 831300
    },
    {
      "epoch": 1.3604734130646818,
      "grad_norm": 0.7673383355140686,
      "learning_rate": 7.3270164664641585e-06,
      "loss": 0.0185,
      "step": 831320
    },
    {
      "epoch": 1.3605061435033352,
      "grad_norm": 0.7751821875572205,
      "learning_rate": 7.326950574250642e-06,
      "loss": 0.0193,
      "step": 831340
    },
    {
      "epoch": 1.3605388739419886,
      "grad_norm": 0.2503819465637207,
      "learning_rate": 7.326884682037124e-06,
      "loss": 0.0186,
      "step": 831360
    },
    {
      "epoch": 1.360571604380642,
      "grad_norm": 0.23717191815376282,
      "learning_rate": 7.3268187898236076e-06,
      "loss": 0.0255,
      "step": 831380
    },
    {
      "epoch": 1.3606043348192953,
      "grad_norm": 0.4975782334804535,
      "learning_rate": 7.32675289761009e-06,
      "loss": 0.0189,
      "step": 831400
    },
    {
      "epoch": 1.3606370652579485,
      "grad_norm": 1.1142016649246216,
      "learning_rate": 7.326687005396573e-06,
      "loss": 0.0151,
      "step": 831420
    },
    {
      "epoch": 1.3606697956966018,
      "grad_norm": 0.5896473526954651,
      "learning_rate": 7.326621113183056e-06,
      "loss": 0.0214,
      "step": 831440
    },
    {
      "epoch": 1.3607025261352552,
      "grad_norm": 0.21792124211788177,
      "learning_rate": 7.326555220969539e-06,
      "loss": 0.0149,
      "step": 831460
    },
    {
      "epoch": 1.3607352565739086,
      "grad_norm": 0.49684447050094604,
      "learning_rate": 7.326489328756021e-06,
      "loss": 0.0208,
      "step": 831480
    },
    {
      "epoch": 1.360767987012562,
      "grad_norm": 0.509730339050293,
      "learning_rate": 7.326423436542505e-06,
      "loss": 0.0207,
      "step": 831500
    },
    {
      "epoch": 1.3608007174512153,
      "grad_norm": 0.1831832081079483,
      "learning_rate": 7.326357544328987e-06,
      "loss": 0.0173,
      "step": 831520
    },
    {
      "epoch": 1.3608334478898687,
      "grad_norm": 0.1634325087070465,
      "learning_rate": 7.32629165211547e-06,
      "loss": 0.0317,
      "step": 831540
    },
    {
      "epoch": 1.3608661783285219,
      "grad_norm": 0.5495901703834534,
      "learning_rate": 7.326225759901952e-06,
      "loss": 0.0236,
      "step": 831560
    },
    {
      "epoch": 1.3608989087671752,
      "grad_norm": 1.0076861381530762,
      "learning_rate": 7.326159867688436e-06,
      "loss": 0.0199,
      "step": 831580
    },
    {
      "epoch": 1.3609316392058286,
      "grad_norm": 0.3636939525604248,
      "learning_rate": 7.3260939754749186e-06,
      "loss": 0.0229,
      "step": 831600
    },
    {
      "epoch": 1.360964369644482,
      "grad_norm": 0.06706520915031433,
      "learning_rate": 7.326028083261401e-06,
      "loss": 0.0214,
      "step": 831620
    },
    {
      "epoch": 1.3609971000831353,
      "grad_norm": 0.9037818908691406,
      "learning_rate": 7.325962191047884e-06,
      "loss": 0.019,
      "step": 831640
    },
    {
      "epoch": 1.3610298305217887,
      "grad_norm": 0.46392232179641724,
      "learning_rate": 7.325896298834368e-06,
      "loss": 0.0242,
      "step": 831660
    },
    {
      "epoch": 1.361062560960442,
      "grad_norm": 0.6819773316383362,
      "learning_rate": 7.32583040662085e-06,
      "loss": 0.028,
      "step": 831680
    },
    {
      "epoch": 1.3610952913990952,
      "grad_norm": 0.6787517666816711,
      "learning_rate": 7.325764514407333e-06,
      "loss": 0.0198,
      "step": 831700
    },
    {
      "epoch": 1.3611280218377486,
      "grad_norm": 1.034693717956543,
      "learning_rate": 7.325698622193817e-06,
      "loss": 0.0231,
      "step": 831720
    },
    {
      "epoch": 1.361160752276402,
      "grad_norm": 0.7652328610420227,
      "learning_rate": 7.325632729980299e-06,
      "loss": 0.0218,
      "step": 831740
    },
    {
      "epoch": 1.3611934827150554,
      "grad_norm": 0.09126882255077362,
      "learning_rate": 7.325566837766782e-06,
      "loss": 0.0237,
      "step": 831760
    },
    {
      "epoch": 1.3612262131537087,
      "grad_norm": 0.512176513671875,
      "learning_rate": 7.325500945553264e-06,
      "loss": 0.0296,
      "step": 831780
    },
    {
      "epoch": 1.361258943592362,
      "grad_norm": 1.3310884237289429,
      "learning_rate": 7.325435053339748e-06,
      "loss": 0.0195,
      "step": 831800
    },
    {
      "epoch": 1.3612916740310155,
      "grad_norm": 0.7658769488334656,
      "learning_rate": 7.32536916112623e-06,
      "loss": 0.0172,
      "step": 831820
    },
    {
      "epoch": 1.3613244044696686,
      "grad_norm": 1.2758102416992188,
      "learning_rate": 7.325303268912713e-06,
      "loss": 0.021,
      "step": 831840
    },
    {
      "epoch": 1.361357134908322,
      "grad_norm": 4.316939353942871,
      "learning_rate": 7.325237376699196e-06,
      "loss": 0.0227,
      "step": 831860
    },
    {
      "epoch": 1.3613898653469754,
      "grad_norm": 0.6393992900848389,
      "learning_rate": 7.3251714844856795e-06,
      "loss": 0.0201,
      "step": 831880
    },
    {
      "epoch": 1.3614225957856287,
      "grad_norm": 0.8973539471626282,
      "learning_rate": 7.325105592272161e-06,
      "loss": 0.0158,
      "step": 831900
    },
    {
      "epoch": 1.3614553262242821,
      "grad_norm": 0.6812419891357422,
      "learning_rate": 7.325039700058645e-06,
      "loss": 0.0134,
      "step": 831920
    },
    {
      "epoch": 1.3614880566629353,
      "grad_norm": 0.6344459652900696,
      "learning_rate": 7.324973807845127e-06,
      "loss": 0.017,
      "step": 831940
    },
    {
      "epoch": 1.3615207871015889,
      "grad_norm": 0.46344900131225586,
      "learning_rate": 7.32490791563161e-06,
      "loss": 0.0123,
      "step": 831960
    },
    {
      "epoch": 1.361553517540242,
      "grad_norm": 0.4657168388366699,
      "learning_rate": 7.324842023418093e-06,
      "loss": 0.0259,
      "step": 831980
    },
    {
      "epoch": 1.3615862479788954,
      "grad_norm": 2.086155652999878,
      "learning_rate": 7.324776131204576e-06,
      "loss": 0.0297,
      "step": 832000
    },
    {
      "epoch": 1.3616189784175488,
      "grad_norm": 0.29957279562950134,
      "learning_rate": 7.324710238991059e-06,
      "loss": 0.0226,
      "step": 832020
    },
    {
      "epoch": 1.3616517088562021,
      "grad_norm": 0.4092490077018738,
      "learning_rate": 7.324644346777542e-06,
      "loss": 0.0153,
      "step": 832040
    },
    {
      "epoch": 1.3616844392948555,
      "grad_norm": 0.6822422742843628,
      "learning_rate": 7.324578454564025e-06,
      "loss": 0.0199,
      "step": 832060
    },
    {
      "epoch": 1.3617171697335086,
      "grad_norm": 0.3625308871269226,
      "learning_rate": 7.324512562350508e-06,
      "loss": 0.0188,
      "step": 832080
    },
    {
      "epoch": 1.3617499001721622,
      "grad_norm": 0.5240052938461304,
      "learning_rate": 7.324446670136991e-06,
      "loss": 0.0144,
      "step": 832100
    },
    {
      "epoch": 1.3617826306108154,
      "grad_norm": 0.46483534574508667,
      "learning_rate": 7.324380777923473e-06,
      "loss": 0.0277,
      "step": 832120
    },
    {
      "epoch": 1.3618153610494688,
      "grad_norm": 1.1304941177368164,
      "learning_rate": 7.324314885709957e-06,
      "loss": 0.019,
      "step": 832140
    },
    {
      "epoch": 1.3618480914881221,
      "grad_norm": 3.1059436798095703,
      "learning_rate": 7.324248993496439e-06,
      "loss": 0.0227,
      "step": 832160
    },
    {
      "epoch": 1.3618808219267755,
      "grad_norm": 0.5071729421615601,
      "learning_rate": 7.324183101282922e-06,
      "loss": 0.027,
      "step": 832180
    },
    {
      "epoch": 1.3619135523654289,
      "grad_norm": 0.2623623311519623,
      "learning_rate": 7.324117209069405e-06,
      "loss": 0.0196,
      "step": 832200
    },
    {
      "epoch": 1.361946282804082,
      "grad_norm": 1.147242784500122,
      "learning_rate": 7.324051316855888e-06,
      "loss": 0.0251,
      "step": 832220
    },
    {
      "epoch": 1.3619790132427354,
      "grad_norm": 0.3428075909614563,
      "learning_rate": 7.3239854246423705e-06,
      "loss": 0.0234,
      "step": 832240
    },
    {
      "epoch": 1.3620117436813888,
      "grad_norm": 0.4804581105709076,
      "learning_rate": 7.323919532428854e-06,
      "loss": 0.022,
      "step": 832260
    },
    {
      "epoch": 1.3620444741200421,
      "grad_norm": 0.8290057182312012,
      "learning_rate": 7.323853640215336e-06,
      "loss": 0.0324,
      "step": 832280
    },
    {
      "epoch": 1.3620772045586955,
      "grad_norm": 0.2896643579006195,
      "learning_rate": 7.3237877480018195e-06,
      "loss": 0.018,
      "step": 832300
    },
    {
      "epoch": 1.362109934997349,
      "grad_norm": 0.279662549495697,
      "learning_rate": 7.3237218557883014e-06,
      "loss": 0.0174,
      "step": 832320
    },
    {
      "epoch": 1.3621426654360023,
      "grad_norm": 1.2529011964797974,
      "learning_rate": 7.323655963574785e-06,
      "loss": 0.0222,
      "step": 832340
    },
    {
      "epoch": 1.3621753958746554,
      "grad_norm": 0.39036452770233154,
      "learning_rate": 7.323590071361267e-06,
      "loss": 0.02,
      "step": 832360
    },
    {
      "epoch": 1.3622081263133088,
      "grad_norm": 1.1166282892227173,
      "learning_rate": 7.3235241791477505e-06,
      "loss": 0.0251,
      "step": 832380
    },
    {
      "epoch": 1.3622408567519622,
      "grad_norm": 0.7916162014007568,
      "learning_rate": 7.323458286934234e-06,
      "loss": 0.0172,
      "step": 832400
    },
    {
      "epoch": 1.3622735871906155,
      "grad_norm": 0.7336727976799011,
      "learning_rate": 7.323392394720716e-06,
      "loss": 0.0248,
      "step": 832420
    },
    {
      "epoch": 1.362306317629269,
      "grad_norm": 0.5512700080871582,
      "learning_rate": 7.3233265025071996e-06,
      "loss": 0.0202,
      "step": 832440
    },
    {
      "epoch": 1.3623390480679223,
      "grad_norm": 0.12674179673194885,
      "learning_rate": 7.323260610293682e-06,
      "loss": 0.0178,
      "step": 832460
    },
    {
      "epoch": 1.3623717785065756,
      "grad_norm": 0.936747133731842,
      "learning_rate": 7.323194718080166e-06,
      "loss": 0.0294,
      "step": 832480
    },
    {
      "epoch": 1.3624045089452288,
      "grad_norm": 0.2552606463432312,
      "learning_rate": 7.323128825866648e-06,
      "loss": 0.019,
      "step": 832500
    },
    {
      "epoch": 1.3624372393838822,
      "grad_norm": 0.3324369192123413,
      "learning_rate": 7.323062933653131e-06,
      "loss": 0.021,
      "step": 832520
    },
    {
      "epoch": 1.3624699698225355,
      "grad_norm": 0.33041056990623474,
      "learning_rate": 7.322997041439613e-06,
      "loss": 0.0217,
      "step": 832540
    },
    {
      "epoch": 1.362502700261189,
      "grad_norm": 1.7216428518295288,
      "learning_rate": 7.322931149226097e-06,
      "loss": 0.0188,
      "step": 832560
    },
    {
      "epoch": 1.3625354306998423,
      "grad_norm": 0.17105983197689056,
      "learning_rate": 7.322865257012579e-06,
      "loss": 0.0207,
      "step": 832580
    },
    {
      "epoch": 1.3625681611384957,
      "grad_norm": 0.16447696089744568,
      "learning_rate": 7.322799364799062e-06,
      "loss": 0.0243,
      "step": 832600
    },
    {
      "epoch": 1.362600891577149,
      "grad_norm": 1.3643014430999756,
      "learning_rate": 7.322733472585545e-06,
      "loss": 0.0209,
      "step": 832620
    },
    {
      "epoch": 1.3626336220158022,
      "grad_norm": 0.6993334293365479,
      "learning_rate": 7.322667580372028e-06,
      "loss": 0.0277,
      "step": 832640
    },
    {
      "epoch": 1.3626663524544556,
      "grad_norm": 0.11499588191509247,
      "learning_rate": 7.3226016881585105e-06,
      "loss": 0.0212,
      "step": 832660
    },
    {
      "epoch": 1.362699082893109,
      "grad_norm": 0.7353672981262207,
      "learning_rate": 7.322535795944994e-06,
      "loss": 0.0195,
      "step": 832680
    },
    {
      "epoch": 1.3627318133317623,
      "grad_norm": 0.2826036810874939,
      "learning_rate": 7.322469903731476e-06,
      "loss": 0.0155,
      "step": 832700
    },
    {
      "epoch": 1.3627645437704157,
      "grad_norm": 1.3455322980880737,
      "learning_rate": 7.32240401151796e-06,
      "loss": 0.0216,
      "step": 832720
    },
    {
      "epoch": 1.3627972742090688,
      "grad_norm": 0.45640742778778076,
      "learning_rate": 7.322338119304443e-06,
      "loss": 0.0192,
      "step": 832740
    },
    {
      "epoch": 1.3628300046477224,
      "grad_norm": 0.4919028878211975,
      "learning_rate": 7.322272227090925e-06,
      "loss": 0.0227,
      "step": 832760
    },
    {
      "epoch": 1.3628627350863756,
      "grad_norm": 0.3105899393558502,
      "learning_rate": 7.322206334877409e-06,
      "loss": 0.0269,
      "step": 832780
    },
    {
      "epoch": 1.362895465525029,
      "grad_norm": 0.4684233069419861,
      "learning_rate": 7.3221404426638906e-06,
      "loss": 0.0231,
      "step": 832800
    },
    {
      "epoch": 1.3629281959636823,
      "grad_norm": 0.728992223739624,
      "learning_rate": 7.322074550450374e-06,
      "loss": 0.0193,
      "step": 832820
    },
    {
      "epoch": 1.3629609264023357,
      "grad_norm": 0.6404415965080261,
      "learning_rate": 7.322008658236857e-06,
      "loss": 0.0139,
      "step": 832840
    },
    {
      "epoch": 1.362993656840989,
      "grad_norm": 0.5025421977043152,
      "learning_rate": 7.32194276602334e-06,
      "loss": 0.0191,
      "step": 832860
    },
    {
      "epoch": 1.3630263872796422,
      "grad_norm": 0.790919840335846,
      "learning_rate": 7.321876873809822e-06,
      "loss": 0.02,
      "step": 832880
    },
    {
      "epoch": 1.3630591177182958,
      "grad_norm": 0.7246382236480713,
      "learning_rate": 7.321810981596306e-06,
      "loss": 0.022,
      "step": 832900
    },
    {
      "epoch": 1.363091848156949,
      "grad_norm": 0.5633883476257324,
      "learning_rate": 7.321745089382788e-06,
      "loss": 0.0238,
      "step": 832920
    },
    {
      "epoch": 1.3631245785956023,
      "grad_norm": 0.1722257286310196,
      "learning_rate": 7.3216791971692714e-06,
      "loss": 0.0153,
      "step": 832940
    },
    {
      "epoch": 1.3631573090342557,
      "grad_norm": 0.5697567462921143,
      "learning_rate": 7.321613304955753e-06,
      "loss": 0.0148,
      "step": 832960
    },
    {
      "epoch": 1.363190039472909,
      "grad_norm": 0.08637820929288864,
      "learning_rate": 7.321547412742237e-06,
      "loss": 0.0144,
      "step": 832980
    },
    {
      "epoch": 1.3632227699115624,
      "grad_norm": 0.6952688694000244,
      "learning_rate": 7.32148152052872e-06,
      "loss": 0.0188,
      "step": 833000
    },
    {
      "epoch": 1.3632555003502156,
      "grad_norm": 0.860991895198822,
      "learning_rate": 7.321415628315202e-06,
      "loss": 0.0204,
      "step": 833020
    },
    {
      "epoch": 1.363288230788869,
      "grad_norm": 1.9074598550796509,
      "learning_rate": 7.321349736101685e-06,
      "loss": 0.0307,
      "step": 833040
    },
    {
      "epoch": 1.3633209612275223,
      "grad_norm": 0.827532947063446,
      "learning_rate": 7.321283843888169e-06,
      "loss": 0.0242,
      "step": 833060
    },
    {
      "epoch": 1.3633536916661757,
      "grad_norm": 0.6292693614959717,
      "learning_rate": 7.321217951674651e-06,
      "loss": 0.0219,
      "step": 833080
    },
    {
      "epoch": 1.363386422104829,
      "grad_norm": 0.8512156009674072,
      "learning_rate": 7.321152059461134e-06,
      "loss": 0.0193,
      "step": 833100
    },
    {
      "epoch": 1.3634191525434824,
      "grad_norm": 0.23858672380447388,
      "learning_rate": 7.321086167247618e-06,
      "loss": 0.0211,
      "step": 833120
    },
    {
      "epoch": 1.3634518829821358,
      "grad_norm": 4.165837287902832,
      "learning_rate": 7.3210202750341e-06,
      "loss": 0.0247,
      "step": 833140
    },
    {
      "epoch": 1.363484613420789,
      "grad_norm": 0.4928903579711914,
      "learning_rate": 7.320954382820583e-06,
      "loss": 0.0304,
      "step": 833160
    },
    {
      "epoch": 1.3635173438594423,
      "grad_norm": 0.415494441986084,
      "learning_rate": 7.320888490607065e-06,
      "loss": 0.0166,
      "step": 833180
    },
    {
      "epoch": 1.3635500742980957,
      "grad_norm": 0.9777042865753174,
      "learning_rate": 7.320822598393549e-06,
      "loss": 0.0195,
      "step": 833200
    },
    {
      "epoch": 1.363582804736749,
      "grad_norm": 0.4799616038799286,
      "learning_rate": 7.3207567061800315e-06,
      "loss": 0.0262,
      "step": 833220
    },
    {
      "epoch": 1.3636155351754025,
      "grad_norm": 0.24621739983558655,
      "learning_rate": 7.320690813966514e-06,
      "loss": 0.026,
      "step": 833240
    },
    {
      "epoch": 1.3636482656140558,
      "grad_norm": 2.0605294704437256,
      "learning_rate": 7.320624921752997e-06,
      "loss": 0.0288,
      "step": 833260
    },
    {
      "epoch": 1.3636809960527092,
      "grad_norm": 0.19089557230472565,
      "learning_rate": 7.3205590295394806e-06,
      "loss": 0.0199,
      "step": 833280
    },
    {
      "epoch": 1.3637137264913624,
      "grad_norm": 0.2862258851528168,
      "learning_rate": 7.3204931373259624e-06,
      "loss": 0.0246,
      "step": 833300
    },
    {
      "epoch": 1.3637464569300157,
      "grad_norm": 1.8499960899353027,
      "learning_rate": 7.320427245112446e-06,
      "loss": 0.0139,
      "step": 833320
    },
    {
      "epoch": 1.363779187368669,
      "grad_norm": 3.538699150085449,
      "learning_rate": 7.320361352898928e-06,
      "loss": 0.0177,
      "step": 833340
    },
    {
      "epoch": 1.3638119178073225,
      "grad_norm": 0.619301974773407,
      "learning_rate": 7.3202954606854115e-06,
      "loss": 0.024,
      "step": 833360
    },
    {
      "epoch": 1.3638446482459758,
      "grad_norm": 0.5172594785690308,
      "learning_rate": 7.320229568471893e-06,
      "loss": 0.0238,
      "step": 833380
    },
    {
      "epoch": 1.363877378684629,
      "grad_norm": 0.9337819814682007,
      "learning_rate": 7.320163676258377e-06,
      "loss": 0.0247,
      "step": 833400
    },
    {
      "epoch": 1.3639101091232826,
      "grad_norm": 0.24794571101665497,
      "learning_rate": 7.32009778404486e-06,
      "loss": 0.0185,
      "step": 833420
    },
    {
      "epoch": 1.3639428395619357,
      "grad_norm": 1.7656712532043457,
      "learning_rate": 7.3200318918313425e-06,
      "loss": 0.0276,
      "step": 833440
    },
    {
      "epoch": 1.363975570000589,
      "grad_norm": 0.24554131925106049,
      "learning_rate": 7.319965999617826e-06,
      "loss": 0.0154,
      "step": 833460
    },
    {
      "epoch": 1.3640083004392425,
      "grad_norm": 0.7655546069145203,
      "learning_rate": 7.319900107404309e-06,
      "loss": 0.0244,
      "step": 833480
    },
    {
      "epoch": 1.3640410308778959,
      "grad_norm": 0.1380525380373001,
      "learning_rate": 7.3198342151907915e-06,
      "loss": 0.0254,
      "step": 833500
    },
    {
      "epoch": 1.3640737613165492,
      "grad_norm": 0.8539062738418579,
      "learning_rate": 7.319768322977274e-06,
      "loss": 0.0233,
      "step": 833520
    },
    {
      "epoch": 1.3641064917552024,
      "grad_norm": 0.21233321726322174,
      "learning_rate": 7.319702430763758e-06,
      "loss": 0.0265,
      "step": 833540
    },
    {
      "epoch": 1.364139222193856,
      "grad_norm": 0.5442168116569519,
      "learning_rate": 7.31963653855024e-06,
      "loss": 0.0156,
      "step": 833560
    },
    {
      "epoch": 1.3641719526325091,
      "grad_norm": 0.6545178890228271,
      "learning_rate": 7.319570646336723e-06,
      "loss": 0.0206,
      "step": 833580
    },
    {
      "epoch": 1.3642046830711625,
      "grad_norm": 0.37835001945495605,
      "learning_rate": 7.319504754123205e-06,
      "loss": 0.0235,
      "step": 833600
    },
    {
      "epoch": 1.3642374135098159,
      "grad_norm": 0.27207982540130615,
      "learning_rate": 7.319438861909689e-06,
      "loss": 0.0393,
      "step": 833620
    },
    {
      "epoch": 1.3642701439484692,
      "grad_norm": 0.2998826205730438,
      "learning_rate": 7.3193729696961716e-06,
      "loss": 0.0251,
      "step": 833640
    },
    {
      "epoch": 1.3643028743871226,
      "grad_norm": 0.9508049488067627,
      "learning_rate": 7.319307077482654e-06,
      "loss": 0.0238,
      "step": 833660
    },
    {
      "epoch": 1.3643356048257758,
      "grad_norm": 0.8221936821937561,
      "learning_rate": 7.319241185269137e-06,
      "loss": 0.0211,
      "step": 833680
    },
    {
      "epoch": 1.3643683352644291,
      "grad_norm": 0.25607243180274963,
      "learning_rate": 7.319175293055621e-06,
      "loss": 0.0283,
      "step": 833700
    },
    {
      "epoch": 1.3644010657030825,
      "grad_norm": 0.23041753470897675,
      "learning_rate": 7.3191094008421025e-06,
      "loss": 0.0256,
      "step": 833720
    },
    {
      "epoch": 1.3644337961417359,
      "grad_norm": 4.025247097015381,
      "learning_rate": 7.319043508628586e-06,
      "loss": 0.027,
      "step": 833740
    },
    {
      "epoch": 1.3644665265803892,
      "grad_norm": 0.36921679973602295,
      "learning_rate": 7.318977616415068e-06,
      "loss": 0.0129,
      "step": 833760
    },
    {
      "epoch": 1.3644992570190426,
      "grad_norm": 1.1508612632751465,
      "learning_rate": 7.318911724201552e-06,
      "loss": 0.0162,
      "step": 833780
    },
    {
      "epoch": 1.364531987457696,
      "grad_norm": 2.9286773204803467,
      "learning_rate": 7.318845831988035e-06,
      "loss": 0.0335,
      "step": 833800
    },
    {
      "epoch": 1.3645647178963491,
      "grad_norm": 0.2846156358718872,
      "learning_rate": 7.318779939774517e-06,
      "loss": 0.015,
      "step": 833820
    },
    {
      "epoch": 1.3645974483350025,
      "grad_norm": 0.7279517650604248,
      "learning_rate": 7.318714047561001e-06,
      "loss": 0.0158,
      "step": 833840
    },
    {
      "epoch": 1.3646301787736559,
      "grad_norm": 0.42350491881370544,
      "learning_rate": 7.318648155347483e-06,
      "loss": 0.0145,
      "step": 833860
    },
    {
      "epoch": 1.3646629092123093,
      "grad_norm": 0.9744261503219604,
      "learning_rate": 7.318582263133966e-06,
      "loss": 0.0415,
      "step": 833880
    },
    {
      "epoch": 1.3646956396509626,
      "grad_norm": 0.848850667476654,
      "learning_rate": 7.318516370920449e-06,
      "loss": 0.025,
      "step": 833900
    },
    {
      "epoch": 1.364728370089616,
      "grad_norm": 0.6174920201301575,
      "learning_rate": 7.3184504787069325e-06,
      "loss": 0.0282,
      "step": 833920
    },
    {
      "epoch": 1.3647611005282694,
      "grad_norm": 0.8020063638687134,
      "learning_rate": 7.318384586493414e-06,
      "loss": 0.0275,
      "step": 833940
    },
    {
      "epoch": 1.3647938309669225,
      "grad_norm": 0.5015151500701904,
      "learning_rate": 7.318318694279898e-06,
      "loss": 0.018,
      "step": 833960
    },
    {
      "epoch": 1.364826561405576,
      "grad_norm": 1.7873778343200684,
      "learning_rate": 7.31825280206638e-06,
      "loss": 0.0219,
      "step": 833980
    },
    {
      "epoch": 1.3648592918442293,
      "grad_norm": 0.6650143265724182,
      "learning_rate": 7.318186909852863e-06,
      "loss": 0.0194,
      "step": 834000
    },
    {
      "epoch": 1.3648920222828826,
      "grad_norm": 0.4395613670349121,
      "learning_rate": 7.318121017639346e-06,
      "loss": 0.0253,
      "step": 834020
    },
    {
      "epoch": 1.364924752721536,
      "grad_norm": 0.3357415497303009,
      "learning_rate": 7.318055125425829e-06,
      "loss": 0.0163,
      "step": 834040
    },
    {
      "epoch": 1.3649574831601894,
      "grad_norm": 0.4200807809829712,
      "learning_rate": 7.317989233212312e-06,
      "loss": 0.0244,
      "step": 834060
    },
    {
      "epoch": 1.3649902135988428,
      "grad_norm": 0.2313620001077652,
      "learning_rate": 7.317923340998795e-06,
      "loss": 0.0161,
      "step": 834080
    },
    {
      "epoch": 1.365022944037496,
      "grad_norm": 0.4845471680164337,
      "learning_rate": 7.317857448785277e-06,
      "loss": 0.025,
      "step": 834100
    },
    {
      "epoch": 1.3650556744761493,
      "grad_norm": 1.1255360841751099,
      "learning_rate": 7.317791556571761e-06,
      "loss": 0.0229,
      "step": 834120
    },
    {
      "epoch": 1.3650884049148027,
      "grad_norm": 0.7435327768325806,
      "learning_rate": 7.317725664358243e-06,
      "loss": 0.0227,
      "step": 834140
    },
    {
      "epoch": 1.365121135353456,
      "grad_norm": 0.4743412733078003,
      "learning_rate": 7.317659772144726e-06,
      "loss": 0.0279,
      "step": 834160
    },
    {
      "epoch": 1.3651538657921094,
      "grad_norm": 0.5025528073310852,
      "learning_rate": 7.31759387993121e-06,
      "loss": 0.0163,
      "step": 834180
    },
    {
      "epoch": 1.3651865962307626,
      "grad_norm": 0.4425012767314911,
      "learning_rate": 7.317527987717692e-06,
      "loss": 0.0221,
      "step": 834200
    },
    {
      "epoch": 1.3652193266694161,
      "grad_norm": 0.5617040991783142,
      "learning_rate": 7.317462095504175e-06,
      "loss": 0.0168,
      "step": 834220
    },
    {
      "epoch": 1.3652520571080693,
      "grad_norm": 1.1977425813674927,
      "learning_rate": 7.317396203290658e-06,
      "loss": 0.0242,
      "step": 834240
    },
    {
      "epoch": 1.3652847875467227,
      "grad_norm": 0.31400009989738464,
      "learning_rate": 7.317330311077141e-06,
      "loss": 0.0242,
      "step": 834260
    },
    {
      "epoch": 1.365317517985376,
      "grad_norm": 0.4107348322868347,
      "learning_rate": 7.3172644188636235e-06,
      "loss": 0.0204,
      "step": 834280
    },
    {
      "epoch": 1.3653502484240294,
      "grad_norm": 0.3175855576992035,
      "learning_rate": 7.317198526650107e-06,
      "loss": 0.0226,
      "step": 834300
    },
    {
      "epoch": 1.3653829788626828,
      "grad_norm": 0.9541642069816589,
      "learning_rate": 7.317132634436589e-06,
      "loss": 0.0283,
      "step": 834320
    },
    {
      "epoch": 1.365415709301336,
      "grad_norm": 0.30220475792884827,
      "learning_rate": 7.3170667422230725e-06,
      "loss": 0.0195,
      "step": 834340
    },
    {
      "epoch": 1.3654484397399895,
      "grad_norm": 2.305927276611328,
      "learning_rate": 7.3170008500095544e-06,
      "loss": 0.0296,
      "step": 834360
    },
    {
      "epoch": 1.3654811701786427,
      "grad_norm": 0.21731510758399963,
      "learning_rate": 7.316934957796038e-06,
      "loss": 0.0213,
      "step": 834380
    },
    {
      "epoch": 1.365513900617296,
      "grad_norm": 0.4658033549785614,
      "learning_rate": 7.31686906558252e-06,
      "loss": 0.0172,
      "step": 834400
    },
    {
      "epoch": 1.3655466310559494,
      "grad_norm": 1.286733627319336,
      "learning_rate": 7.3168031733690035e-06,
      "loss": 0.0237,
      "step": 834420
    },
    {
      "epoch": 1.3655793614946028,
      "grad_norm": 0.4414058327674866,
      "learning_rate": 7.316737281155486e-06,
      "loss": 0.0227,
      "step": 834440
    },
    {
      "epoch": 1.3656120919332562,
      "grad_norm": 0.7495725750923157,
      "learning_rate": 7.316671388941969e-06,
      "loss": 0.0219,
      "step": 834460
    },
    {
      "epoch": 1.3656448223719093,
      "grad_norm": 0.48424047231674194,
      "learning_rate": 7.316605496728452e-06,
      "loss": 0.0173,
      "step": 834480
    },
    {
      "epoch": 1.3656775528105627,
      "grad_norm": 0.4769722521305084,
      "learning_rate": 7.316539604514935e-06,
      "loss": 0.0238,
      "step": 834500
    },
    {
      "epoch": 1.365710283249216,
      "grad_norm": 1.2793883085250854,
      "learning_rate": 7.316473712301418e-06,
      "loss": 0.0202,
      "step": 834520
    },
    {
      "epoch": 1.3657430136878694,
      "grad_norm": 0.565385639667511,
      "learning_rate": 7.316407820087901e-06,
      "loss": 0.0235,
      "step": 834540
    },
    {
      "epoch": 1.3657757441265228,
      "grad_norm": 1.232844591140747,
      "learning_rate": 7.316341927874384e-06,
      "loss": 0.0274,
      "step": 834560
    },
    {
      "epoch": 1.3658084745651762,
      "grad_norm": 0.19391033053398132,
      "learning_rate": 7.316276035660866e-06,
      "loss": 0.0157,
      "step": 834580
    },
    {
      "epoch": 1.3658412050038296,
      "grad_norm": 0.7457100749015808,
      "learning_rate": 7.31621014344735e-06,
      "loss": 0.0254,
      "step": 834600
    },
    {
      "epoch": 1.3658739354424827,
      "grad_norm": 1.5047354698181152,
      "learning_rate": 7.316144251233832e-06,
      "loss": 0.0204,
      "step": 834620
    },
    {
      "epoch": 1.365906665881136,
      "grad_norm": 0.8377172946929932,
      "learning_rate": 7.316078359020315e-06,
      "loss": 0.0177,
      "step": 834640
    },
    {
      "epoch": 1.3659393963197894,
      "grad_norm": 0.8466809391975403,
      "learning_rate": 7.316012466806798e-06,
      "loss": 0.0169,
      "step": 834660
    },
    {
      "epoch": 1.3659721267584428,
      "grad_norm": 0.7253803610801697,
      "learning_rate": 7.315946574593281e-06,
      "loss": 0.0171,
      "step": 834680
    },
    {
      "epoch": 1.3660048571970962,
      "grad_norm": 1.103912353515625,
      "learning_rate": 7.3158806823797635e-06,
      "loss": 0.0334,
      "step": 834700
    },
    {
      "epoch": 1.3660375876357496,
      "grad_norm": 0.2571467161178589,
      "learning_rate": 7.315814790166247e-06,
      "loss": 0.0243,
      "step": 834720
    },
    {
      "epoch": 1.366070318074403,
      "grad_norm": 1.5606660842895508,
      "learning_rate": 7.315748897952729e-06,
      "loss": 0.0267,
      "step": 834740
    },
    {
      "epoch": 1.366103048513056,
      "grad_norm": 0.6667438745498657,
      "learning_rate": 7.315683005739213e-06,
      "loss": 0.0275,
      "step": 834760
    },
    {
      "epoch": 1.3661357789517095,
      "grad_norm": 0.6920247077941895,
      "learning_rate": 7.3156171135256945e-06,
      "loss": 0.0271,
      "step": 834780
    },
    {
      "epoch": 1.3661685093903628,
      "grad_norm": 0.10457335412502289,
      "learning_rate": 7.315551221312178e-06,
      "loss": 0.031,
      "step": 834800
    },
    {
      "epoch": 1.3662012398290162,
      "grad_norm": 1.1420619487762451,
      "learning_rate": 7.315485329098661e-06,
      "loss": 0.0229,
      "step": 834820
    },
    {
      "epoch": 1.3662339702676696,
      "grad_norm": 0.5697797536849976,
      "learning_rate": 7.3154194368851436e-06,
      "loss": 0.0216,
      "step": 834840
    },
    {
      "epoch": 1.366266700706323,
      "grad_norm": 0.5931769609451294,
      "learning_rate": 7.315353544671627e-06,
      "loss": 0.019,
      "step": 834860
    },
    {
      "epoch": 1.3662994311449763,
      "grad_norm": 0.4083225429058075,
      "learning_rate": 7.31528765245811e-06,
      "loss": 0.031,
      "step": 834880
    },
    {
      "epoch": 1.3663321615836295,
      "grad_norm": 1.6817512512207031,
      "learning_rate": 7.315221760244593e-06,
      "loss": 0.0216,
      "step": 834900
    },
    {
      "epoch": 1.3663648920222828,
      "grad_norm": 0.9602708220481873,
      "learning_rate": 7.315155868031075e-06,
      "loss": 0.0232,
      "step": 834920
    },
    {
      "epoch": 1.3663976224609362,
      "grad_norm": 0.5907478332519531,
      "learning_rate": 7.315089975817559e-06,
      "loss": 0.0218,
      "step": 834940
    },
    {
      "epoch": 1.3664303528995896,
      "grad_norm": 0.8982796669006348,
      "learning_rate": 7.315024083604041e-06,
      "loss": 0.0241,
      "step": 834960
    },
    {
      "epoch": 1.366463083338243,
      "grad_norm": 0.5454753637313843,
      "learning_rate": 7.3149581913905244e-06,
      "loss": 0.0262,
      "step": 834980
    },
    {
      "epoch": 1.366495813776896,
      "grad_norm": 0.4415005147457123,
      "learning_rate": 7.314892299177006e-06,
      "loss": 0.0204,
      "step": 835000
    },
    {
      "epoch": 1.3665285442155497,
      "grad_norm": 0.5239267945289612,
      "learning_rate": 7.31482640696349e-06,
      "loss": 0.0242,
      "step": 835020
    },
    {
      "epoch": 1.3665612746542029,
      "grad_norm": 0.511317789554596,
      "learning_rate": 7.314760514749973e-06,
      "loss": 0.0213,
      "step": 835040
    },
    {
      "epoch": 1.3665940050928562,
      "grad_norm": 0.2559695541858673,
      "learning_rate": 7.314694622536455e-06,
      "loss": 0.0237,
      "step": 835060
    },
    {
      "epoch": 1.3666267355315096,
      "grad_norm": 0.10737895220518112,
      "learning_rate": 7.314628730322938e-06,
      "loss": 0.0234,
      "step": 835080
    },
    {
      "epoch": 1.366659465970163,
      "grad_norm": 0.40594932436943054,
      "learning_rate": 7.314562838109422e-06,
      "loss": 0.0204,
      "step": 835100
    },
    {
      "epoch": 1.3666921964088163,
      "grad_norm": 2.2889695167541504,
      "learning_rate": 7.314496945895904e-06,
      "loss": 0.0141,
      "step": 835120
    },
    {
      "epoch": 1.3667249268474695,
      "grad_norm": 0.49178922176361084,
      "learning_rate": 7.314431053682387e-06,
      "loss": 0.0204,
      "step": 835140
    },
    {
      "epoch": 1.366757657286123,
      "grad_norm": 0.5658692717552185,
      "learning_rate": 7.314365161468869e-06,
      "loss": 0.0169,
      "step": 835160
    },
    {
      "epoch": 1.3667903877247762,
      "grad_norm": 0.41320550441741943,
      "learning_rate": 7.314299269255353e-06,
      "loss": 0.0234,
      "step": 835180
    },
    {
      "epoch": 1.3668231181634296,
      "grad_norm": 0.3359206020832062,
      "learning_rate": 7.314233377041836e-06,
      "loss": 0.0226,
      "step": 835200
    },
    {
      "epoch": 1.366855848602083,
      "grad_norm": 0.9492687582969666,
      "learning_rate": 7.314167484828318e-06,
      "loss": 0.0188,
      "step": 835220
    },
    {
      "epoch": 1.3668885790407364,
      "grad_norm": 1.0555471181869507,
      "learning_rate": 7.314101592614802e-06,
      "loss": 0.0152,
      "step": 835240
    },
    {
      "epoch": 1.3669213094793897,
      "grad_norm": 0.6752611994743347,
      "learning_rate": 7.3140357004012845e-06,
      "loss": 0.0237,
      "step": 835260
    },
    {
      "epoch": 1.3669540399180429,
      "grad_norm": 0.3113183379173279,
      "learning_rate": 7.313969808187767e-06,
      "loss": 0.0221,
      "step": 835280
    },
    {
      "epoch": 1.3669867703566962,
      "grad_norm": 0.7095134854316711,
      "learning_rate": 7.31390391597425e-06,
      "loss": 0.0152,
      "step": 835300
    },
    {
      "epoch": 1.3670195007953496,
      "grad_norm": 1.1847350597381592,
      "learning_rate": 7.3138380237607336e-06,
      "loss": 0.0144,
      "step": 835320
    },
    {
      "epoch": 1.367052231234003,
      "grad_norm": 1.3280583620071411,
      "learning_rate": 7.3137721315472155e-06,
      "loss": 0.021,
      "step": 835340
    },
    {
      "epoch": 1.3670849616726564,
      "grad_norm": 0.44135332107543945,
      "learning_rate": 7.313706239333699e-06,
      "loss": 0.0153,
      "step": 835360
    },
    {
      "epoch": 1.3671176921113097,
      "grad_norm": 0.7885057926177979,
      "learning_rate": 7.313640347120181e-06,
      "loss": 0.0209,
      "step": 835380
    },
    {
      "epoch": 1.367150422549963,
      "grad_norm": 0.392299085855484,
      "learning_rate": 7.3135744549066645e-06,
      "loss": 0.0185,
      "step": 835400
    },
    {
      "epoch": 1.3671831529886163,
      "grad_norm": 0.3508768379688263,
      "learning_rate": 7.313508562693146e-06,
      "loss": 0.0137,
      "step": 835420
    },
    {
      "epoch": 1.3672158834272696,
      "grad_norm": 0.5447826385498047,
      "learning_rate": 7.31344267047963e-06,
      "loss": 0.0249,
      "step": 835440
    },
    {
      "epoch": 1.367248613865923,
      "grad_norm": 0.48819249868392944,
      "learning_rate": 7.313376778266113e-06,
      "loss": 0.0198,
      "step": 835460
    },
    {
      "epoch": 1.3672813443045764,
      "grad_norm": 0.7954012751579285,
      "learning_rate": 7.3133108860525955e-06,
      "loss": 0.0193,
      "step": 835480
    },
    {
      "epoch": 1.3673140747432297,
      "grad_norm": 0.632304310798645,
      "learning_rate": 7.313244993839078e-06,
      "loss": 0.021,
      "step": 835500
    },
    {
      "epoch": 1.3673468051818831,
      "grad_norm": 0.7607938051223755,
      "learning_rate": 7.313179101625562e-06,
      "loss": 0.0201,
      "step": 835520
    },
    {
      "epoch": 1.3673795356205365,
      "grad_norm": 0.19299504160881042,
      "learning_rate": 7.313113209412044e-06,
      "loss": 0.0233,
      "step": 835540
    },
    {
      "epoch": 1.3674122660591896,
      "grad_norm": 0.22463534772396088,
      "learning_rate": 7.313047317198527e-06,
      "loss": 0.0202,
      "step": 835560
    },
    {
      "epoch": 1.367444996497843,
      "grad_norm": 1.3654052019119263,
      "learning_rate": 7.312981424985011e-06,
      "loss": 0.0159,
      "step": 835580
    },
    {
      "epoch": 1.3674777269364964,
      "grad_norm": 0.6510137915611267,
      "learning_rate": 7.312915532771493e-06,
      "loss": 0.0252,
      "step": 835600
    },
    {
      "epoch": 1.3675104573751498,
      "grad_norm": 0.318708598613739,
      "learning_rate": 7.312849640557976e-06,
      "loss": 0.0211,
      "step": 835620
    },
    {
      "epoch": 1.3675431878138031,
      "grad_norm": 1.529097080230713,
      "learning_rate": 7.312783748344458e-06,
      "loss": 0.0144,
      "step": 835640
    },
    {
      "epoch": 1.3675759182524565,
      "grad_norm": 0.592021107673645,
      "learning_rate": 7.312717856130942e-06,
      "loss": 0.0185,
      "step": 835660
    },
    {
      "epoch": 1.3676086486911099,
      "grad_norm": 0.517857551574707,
      "learning_rate": 7.3126519639174246e-06,
      "loss": 0.0186,
      "step": 835680
    },
    {
      "epoch": 1.367641379129763,
      "grad_norm": 1.8049898147583008,
      "learning_rate": 7.312586071703907e-06,
      "loss": 0.0261,
      "step": 835700
    },
    {
      "epoch": 1.3676741095684164,
      "grad_norm": 1.0120351314544678,
      "learning_rate": 7.31252017949039e-06,
      "loss": 0.0239,
      "step": 835720
    },
    {
      "epoch": 1.3677068400070698,
      "grad_norm": 0.6903087496757507,
      "learning_rate": 7.312454287276874e-06,
      "loss": 0.0318,
      "step": 835740
    },
    {
      "epoch": 1.3677395704457231,
      "grad_norm": 1.1532286405563354,
      "learning_rate": 7.3123883950633555e-06,
      "loss": 0.0211,
      "step": 835760
    },
    {
      "epoch": 1.3677723008843765,
      "grad_norm": 0.26857343316078186,
      "learning_rate": 7.312322502849839e-06,
      "loss": 0.0261,
      "step": 835780
    },
    {
      "epoch": 1.3678050313230297,
      "grad_norm": 0.3291264772415161,
      "learning_rate": 7.312256610636321e-06,
      "loss": 0.0212,
      "step": 835800
    },
    {
      "epoch": 1.3678377617616833,
      "grad_norm": 0.613976001739502,
      "learning_rate": 7.312190718422805e-06,
      "loss": 0.0305,
      "step": 835820
    },
    {
      "epoch": 1.3678704922003364,
      "grad_norm": 0.4218062162399292,
      "learning_rate": 7.312124826209287e-06,
      "loss": 0.0177,
      "step": 835840
    },
    {
      "epoch": 1.3679032226389898,
      "grad_norm": 0.6539185643196106,
      "learning_rate": 7.31205893399577e-06,
      "loss": 0.0263,
      "step": 835860
    },
    {
      "epoch": 1.3679359530776432,
      "grad_norm": 0.49885234236717224,
      "learning_rate": 7.311993041782253e-06,
      "loss": 0.0161,
      "step": 835880
    },
    {
      "epoch": 1.3679686835162965,
      "grad_norm": 0.4532710313796997,
      "learning_rate": 7.311927149568736e-06,
      "loss": 0.0207,
      "step": 835900
    },
    {
      "epoch": 1.36800141395495,
      "grad_norm": 0.2295566350221634,
      "learning_rate": 7.311861257355219e-06,
      "loss": 0.0199,
      "step": 835920
    },
    {
      "epoch": 1.368034144393603,
      "grad_norm": 0.568175733089447,
      "learning_rate": 7.311795365141702e-06,
      "loss": 0.0226,
      "step": 835940
    },
    {
      "epoch": 1.3680668748322564,
      "grad_norm": 0.2613367736339569,
      "learning_rate": 7.3117294729281855e-06,
      "loss": 0.0215,
      "step": 835960
    },
    {
      "epoch": 1.3680996052709098,
      "grad_norm": 0.42366790771484375,
      "learning_rate": 7.311663580714667e-06,
      "loss": 0.0203,
      "step": 835980
    },
    {
      "epoch": 1.3681323357095632,
      "grad_norm": 0.6555355787277222,
      "learning_rate": 7.311597688501151e-06,
      "loss": 0.0184,
      "step": 836000
    },
    {
      "epoch": 1.3681650661482165,
      "grad_norm": 0.47155436873435974,
      "learning_rate": 7.311531796287633e-06,
      "loss": 0.019,
      "step": 836020
    },
    {
      "epoch": 1.36819779658687,
      "grad_norm": 0.20457573235034943,
      "learning_rate": 7.3114659040741164e-06,
      "loss": 0.0238,
      "step": 836040
    },
    {
      "epoch": 1.3682305270255233,
      "grad_norm": 0.8654840588569641,
      "learning_rate": 7.311400011860599e-06,
      "loss": 0.0172,
      "step": 836060
    },
    {
      "epoch": 1.3682632574641764,
      "grad_norm": 1.140259861946106,
      "learning_rate": 7.311334119647082e-06,
      "loss": 0.0218,
      "step": 836080
    },
    {
      "epoch": 1.3682959879028298,
      "grad_norm": 0.5225324630737305,
      "learning_rate": 7.311268227433565e-06,
      "loss": 0.0197,
      "step": 836100
    },
    {
      "epoch": 1.3683287183414832,
      "grad_norm": 0.31018534302711487,
      "learning_rate": 7.311202335220048e-06,
      "loss": 0.0181,
      "step": 836120
    },
    {
      "epoch": 1.3683614487801365,
      "grad_norm": 1.2188057899475098,
      "learning_rate": 7.31113644300653e-06,
      "loss": 0.0197,
      "step": 836140
    },
    {
      "epoch": 1.36839417921879,
      "grad_norm": 0.41196006536483765,
      "learning_rate": 7.311070550793014e-06,
      "loss": 0.0185,
      "step": 836160
    },
    {
      "epoch": 1.3684269096574433,
      "grad_norm": 0.972456693649292,
      "learning_rate": 7.311004658579496e-06,
      "loss": 0.0201,
      "step": 836180
    },
    {
      "epoch": 1.3684596400960967,
      "grad_norm": 1.3244575262069702,
      "learning_rate": 7.310938766365979e-06,
      "loss": 0.0179,
      "step": 836200
    },
    {
      "epoch": 1.3684923705347498,
      "grad_norm": 0.4209122657775879,
      "learning_rate": 7.310872874152461e-06,
      "loss": 0.0236,
      "step": 836220
    },
    {
      "epoch": 1.3685251009734032,
      "grad_norm": 0.13544961810112,
      "learning_rate": 7.310806981938945e-06,
      "loss": 0.0177,
      "step": 836240
    },
    {
      "epoch": 1.3685578314120566,
      "grad_norm": 0.39309433102607727,
      "learning_rate": 7.310741089725428e-06,
      "loss": 0.0236,
      "step": 836260
    },
    {
      "epoch": 1.36859056185071,
      "grad_norm": 0.1853790581226349,
      "learning_rate": 7.31067519751191e-06,
      "loss": 0.0183,
      "step": 836280
    },
    {
      "epoch": 1.3686232922893633,
      "grad_norm": 0.8243272304534912,
      "learning_rate": 7.310609305298394e-06,
      "loss": 0.0153,
      "step": 836300
    },
    {
      "epoch": 1.3686560227280167,
      "grad_norm": 0.6749897003173828,
      "learning_rate": 7.3105434130848765e-06,
      "loss": 0.0318,
      "step": 836320
    },
    {
      "epoch": 1.36868875316667,
      "grad_norm": 0.6292752623558044,
      "learning_rate": 7.31047752087136e-06,
      "loss": 0.0189,
      "step": 836340
    },
    {
      "epoch": 1.3687214836053232,
      "grad_norm": 0.5376089811325073,
      "learning_rate": 7.310411628657842e-06,
      "loss": 0.0131,
      "step": 836360
    },
    {
      "epoch": 1.3687542140439766,
      "grad_norm": 0.5965230464935303,
      "learning_rate": 7.3103457364443255e-06,
      "loss": 0.0204,
      "step": 836380
    },
    {
      "epoch": 1.36878694448263,
      "grad_norm": 0.7258598804473877,
      "learning_rate": 7.3102798442308074e-06,
      "loss": 0.0245,
      "step": 836400
    },
    {
      "epoch": 1.3688196749212833,
      "grad_norm": 1.0508065223693848,
      "learning_rate": 7.310213952017291e-06,
      "loss": 0.0148,
      "step": 836420
    },
    {
      "epoch": 1.3688524053599367,
      "grad_norm": 0.701488733291626,
      "learning_rate": 7.310148059803773e-06,
      "loss": 0.0216,
      "step": 836440
    },
    {
      "epoch": 1.3688851357985898,
      "grad_norm": 1.1325733661651611,
      "learning_rate": 7.3100821675902565e-06,
      "loss": 0.0266,
      "step": 836460
    },
    {
      "epoch": 1.3689178662372434,
      "grad_norm": 0.6210154891014099,
      "learning_rate": 7.310016275376739e-06,
      "loss": 0.0245,
      "step": 836480
    },
    {
      "epoch": 1.3689505966758966,
      "grad_norm": 1.436891794204712,
      "learning_rate": 7.309950383163222e-06,
      "loss": 0.0165,
      "step": 836500
    },
    {
      "epoch": 1.36898332711455,
      "grad_norm": 0.20119693875312805,
      "learning_rate": 7.309884490949705e-06,
      "loss": 0.0186,
      "step": 836520
    },
    {
      "epoch": 1.3690160575532033,
      "grad_norm": 0.5310781598091125,
      "learning_rate": 7.309818598736188e-06,
      "loss": 0.0213,
      "step": 836540
    },
    {
      "epoch": 1.3690487879918567,
      "grad_norm": 1.317071795463562,
      "learning_rate": 7.30975270652267e-06,
      "loss": 0.0156,
      "step": 836560
    },
    {
      "epoch": 1.36908151843051,
      "grad_norm": 0.1835075318813324,
      "learning_rate": 7.309686814309154e-06,
      "loss": 0.0299,
      "step": 836580
    },
    {
      "epoch": 1.3691142488691632,
      "grad_norm": 0.2819579541683197,
      "learning_rate": 7.309620922095636e-06,
      "loss": 0.0159,
      "step": 836600
    },
    {
      "epoch": 1.3691469793078168,
      "grad_norm": 0.30395978689193726,
      "learning_rate": 7.309555029882119e-06,
      "loss": 0.0175,
      "step": 836620
    },
    {
      "epoch": 1.36917970974647,
      "grad_norm": 0.3443502187728882,
      "learning_rate": 7.309489137668603e-06,
      "loss": 0.0296,
      "step": 836640
    },
    {
      "epoch": 1.3692124401851233,
      "grad_norm": 0.6892077922821045,
      "learning_rate": 7.309423245455085e-06,
      "loss": 0.0295,
      "step": 836660
    },
    {
      "epoch": 1.3692451706237767,
      "grad_norm": 0.5629064440727234,
      "learning_rate": 7.309357353241568e-06,
      "loss": 0.0214,
      "step": 836680
    },
    {
      "epoch": 1.36927790106243,
      "grad_norm": 0.241729274392128,
      "learning_rate": 7.309291461028051e-06,
      "loss": 0.0215,
      "step": 836700
    },
    {
      "epoch": 1.3693106315010835,
      "grad_norm": 0.3977445363998413,
      "learning_rate": 7.309225568814534e-06,
      "loss": 0.0231,
      "step": 836720
    },
    {
      "epoch": 1.3693433619397366,
      "grad_norm": 0.4942362308502197,
      "learning_rate": 7.3091596766010166e-06,
      "loss": 0.0147,
      "step": 836740
    },
    {
      "epoch": 1.36937609237839,
      "grad_norm": 0.29874134063720703,
      "learning_rate": 7.3090937843875e-06,
      "loss": 0.0243,
      "step": 836760
    },
    {
      "epoch": 1.3694088228170433,
      "grad_norm": 0.30657342076301575,
      "learning_rate": 7.309027892173982e-06,
      "loss": 0.0195,
      "step": 836780
    },
    {
      "epoch": 1.3694415532556967,
      "grad_norm": 0.7135152220726013,
      "learning_rate": 7.308961999960466e-06,
      "loss": 0.0255,
      "step": 836800
    },
    {
      "epoch": 1.36947428369435,
      "grad_norm": 0.15304169058799744,
      "learning_rate": 7.3088961077469475e-06,
      "loss": 0.0235,
      "step": 836820
    },
    {
      "epoch": 1.3695070141330035,
      "grad_norm": 0.6547492146492004,
      "learning_rate": 7.308830215533431e-06,
      "loss": 0.026,
      "step": 836840
    },
    {
      "epoch": 1.3695397445716568,
      "grad_norm": 0.7719365954399109,
      "learning_rate": 7.308764323319914e-06,
      "loss": 0.0251,
      "step": 836860
    },
    {
      "epoch": 1.36957247501031,
      "grad_norm": 0.7862589955329895,
      "learning_rate": 7.308698431106397e-06,
      "loss": 0.0232,
      "step": 836880
    },
    {
      "epoch": 1.3696052054489634,
      "grad_norm": 0.8509100079536438,
      "learning_rate": 7.308632538892879e-06,
      "loss": 0.0232,
      "step": 836900
    },
    {
      "epoch": 1.3696379358876167,
      "grad_norm": 0.2981654703617096,
      "learning_rate": 7.308566646679363e-06,
      "loss": 0.0245,
      "step": 836920
    },
    {
      "epoch": 1.36967066632627,
      "grad_norm": 1.641638994216919,
      "learning_rate": 7.308500754465845e-06,
      "loss": 0.0349,
      "step": 836940
    },
    {
      "epoch": 1.3697033967649235,
      "grad_norm": 1.3661878108978271,
      "learning_rate": 7.308434862252328e-06,
      "loss": 0.0225,
      "step": 836960
    },
    {
      "epoch": 1.3697361272035768,
      "grad_norm": 0.33341071009635925,
      "learning_rate": 7.308368970038812e-06,
      "loss": 0.0178,
      "step": 836980
    },
    {
      "epoch": 1.3697688576422302,
      "grad_norm": 1.1690248250961304,
      "learning_rate": 7.308303077825294e-06,
      "loss": 0.0201,
      "step": 837000
    },
    {
      "epoch": 1.3698015880808834,
      "grad_norm": 0.8425664305686951,
      "learning_rate": 7.3082371856117775e-06,
      "loss": 0.0191,
      "step": 837020
    },
    {
      "epoch": 1.3698343185195367,
      "grad_norm": 0.8850679397583008,
      "learning_rate": 7.308171293398259e-06,
      "loss": 0.0254,
      "step": 837040
    },
    {
      "epoch": 1.3698670489581901,
      "grad_norm": 0.6656501889228821,
      "learning_rate": 7.308105401184743e-06,
      "loss": 0.0195,
      "step": 837060
    },
    {
      "epoch": 1.3698997793968435,
      "grad_norm": 8.113310813903809,
      "learning_rate": 7.308039508971226e-06,
      "loss": 0.0302,
      "step": 837080
    },
    {
      "epoch": 1.3699325098354969,
      "grad_norm": 0.5950483679771423,
      "learning_rate": 7.307973616757708e-06,
      "loss": 0.0212,
      "step": 837100
    },
    {
      "epoch": 1.3699652402741502,
      "grad_norm": 1.2098842859268188,
      "learning_rate": 7.307907724544191e-06,
      "loss": 0.0235,
      "step": 837120
    },
    {
      "epoch": 1.3699979707128036,
      "grad_norm": 0.3612317144870758,
      "learning_rate": 7.307841832330675e-06,
      "loss": 0.0224,
      "step": 837140
    },
    {
      "epoch": 1.3700307011514568,
      "grad_norm": 0.8866773247718811,
      "learning_rate": 7.307775940117157e-06,
      "loss": 0.0341,
      "step": 837160
    },
    {
      "epoch": 1.3700634315901101,
      "grad_norm": 0.24866217374801636,
      "learning_rate": 7.30771004790364e-06,
      "loss": 0.0211,
      "step": 837180
    },
    {
      "epoch": 1.3700961620287635,
      "grad_norm": 1.1106058359146118,
      "learning_rate": 7.307644155690122e-06,
      "loss": 0.0253,
      "step": 837200
    },
    {
      "epoch": 1.3701288924674169,
      "grad_norm": 0.706409752368927,
      "learning_rate": 7.307578263476606e-06,
      "loss": 0.0213,
      "step": 837220
    },
    {
      "epoch": 1.3701616229060702,
      "grad_norm": 0.6136214733123779,
      "learning_rate": 7.307512371263088e-06,
      "loss": 0.0167,
      "step": 837240
    },
    {
      "epoch": 1.3701943533447234,
      "grad_norm": 0.39049723744392395,
      "learning_rate": 7.307446479049571e-06,
      "loss": 0.02,
      "step": 837260
    },
    {
      "epoch": 1.370227083783377,
      "grad_norm": 0.6056624054908752,
      "learning_rate": 7.307380586836054e-06,
      "loss": 0.0242,
      "step": 837280
    },
    {
      "epoch": 1.3702598142220301,
      "grad_norm": 0.2728603482246399,
      "learning_rate": 7.307314694622537e-06,
      "loss": 0.0165,
      "step": 837300
    },
    {
      "epoch": 1.3702925446606835,
      "grad_norm": 0.7446701526641846,
      "learning_rate": 7.30724880240902e-06,
      "loss": 0.0255,
      "step": 837320
    },
    {
      "epoch": 1.3703252750993369,
      "grad_norm": 0.12492556124925613,
      "learning_rate": 7.307182910195503e-06,
      "loss": 0.0171,
      "step": 837340
    },
    {
      "epoch": 1.3703580055379903,
      "grad_norm": 0.7458791136741638,
      "learning_rate": 7.307117017981986e-06,
      "loss": 0.0265,
      "step": 837360
    },
    {
      "epoch": 1.3703907359766436,
      "grad_norm": 0.5179635882377625,
      "learning_rate": 7.3070511257684685e-06,
      "loss": 0.0183,
      "step": 837380
    },
    {
      "epoch": 1.3704234664152968,
      "grad_norm": 0.508755087852478,
      "learning_rate": 7.306985233554952e-06,
      "loss": 0.0252,
      "step": 837400
    },
    {
      "epoch": 1.3704561968539504,
      "grad_norm": 0.6407957673072815,
      "learning_rate": 7.306919341341434e-06,
      "loss": 0.0204,
      "step": 837420
    },
    {
      "epoch": 1.3704889272926035,
      "grad_norm": 0.46186816692352295,
      "learning_rate": 7.3068534491279175e-06,
      "loss": 0.0216,
      "step": 837440
    },
    {
      "epoch": 1.370521657731257,
      "grad_norm": 0.48929446935653687,
      "learning_rate": 7.306787556914399e-06,
      "loss": 0.0245,
      "step": 837460
    },
    {
      "epoch": 1.3705543881699103,
      "grad_norm": 0.9690350890159607,
      "learning_rate": 7.306721664700883e-06,
      "loss": 0.0231,
      "step": 837480
    },
    {
      "epoch": 1.3705871186085636,
      "grad_norm": 0.2897183895111084,
      "learning_rate": 7.306655772487366e-06,
      "loss": 0.0197,
      "step": 837500
    },
    {
      "epoch": 1.370619849047217,
      "grad_norm": 0.39430832862854004,
      "learning_rate": 7.3065898802738485e-06,
      "loss": 0.0179,
      "step": 837520
    },
    {
      "epoch": 1.3706525794858702,
      "grad_norm": 1.7937538623809814,
      "learning_rate": 7.306523988060331e-06,
      "loss": 0.0236,
      "step": 837540
    },
    {
      "epoch": 1.3706853099245235,
      "grad_norm": 0.6033025979995728,
      "learning_rate": 7.306458095846815e-06,
      "loss": 0.0188,
      "step": 837560
    },
    {
      "epoch": 1.370718040363177,
      "grad_norm": 0.980320155620575,
      "learning_rate": 7.306392203633297e-06,
      "loss": 0.0247,
      "step": 837580
    },
    {
      "epoch": 1.3707507708018303,
      "grad_norm": 0.6754004955291748,
      "learning_rate": 7.30632631141978e-06,
      "loss": 0.0275,
      "step": 837600
    },
    {
      "epoch": 1.3707835012404836,
      "grad_norm": 1.0871185064315796,
      "learning_rate": 7.306260419206262e-06,
      "loss": 0.0181,
      "step": 837620
    },
    {
      "epoch": 1.370816231679137,
      "grad_norm": 1.5725983381271362,
      "learning_rate": 7.306194526992746e-06,
      "loss": 0.0305,
      "step": 837640
    },
    {
      "epoch": 1.3708489621177904,
      "grad_norm": 1.4911302328109741,
      "learning_rate": 7.306128634779229e-06,
      "loss": 0.0181,
      "step": 837660
    },
    {
      "epoch": 1.3708816925564435,
      "grad_norm": 0.7995075583457947,
      "learning_rate": 7.306062742565711e-06,
      "loss": 0.0273,
      "step": 837680
    },
    {
      "epoch": 1.370914422995097,
      "grad_norm": 0.511466920375824,
      "learning_rate": 7.305996850352195e-06,
      "loss": 0.0222,
      "step": 837700
    },
    {
      "epoch": 1.3709471534337503,
      "grad_norm": 0.2902051508426666,
      "learning_rate": 7.305930958138678e-06,
      "loss": 0.0209,
      "step": 837720
    },
    {
      "epoch": 1.3709798838724037,
      "grad_norm": 0.5258274674415588,
      "learning_rate": 7.30586506592516e-06,
      "loss": 0.0198,
      "step": 837740
    },
    {
      "epoch": 1.371012614311057,
      "grad_norm": 0.3609149158000946,
      "learning_rate": 7.305799173711643e-06,
      "loss": 0.0195,
      "step": 837760
    },
    {
      "epoch": 1.3710453447497104,
      "grad_norm": 0.8804035186767578,
      "learning_rate": 7.305733281498127e-06,
      "loss": 0.0325,
      "step": 837780
    },
    {
      "epoch": 1.3710780751883638,
      "grad_norm": 1.4188705682754517,
      "learning_rate": 7.3056673892846085e-06,
      "loss": 0.0245,
      "step": 837800
    },
    {
      "epoch": 1.371110805627017,
      "grad_norm": 0.43231162428855896,
      "learning_rate": 7.305601497071092e-06,
      "loss": 0.028,
      "step": 837820
    },
    {
      "epoch": 1.3711435360656703,
      "grad_norm": 0.3209167420864105,
      "learning_rate": 7.305535604857574e-06,
      "loss": 0.0311,
      "step": 837840
    },
    {
      "epoch": 1.3711762665043237,
      "grad_norm": 0.8279076218605042,
      "learning_rate": 7.305469712644058e-06,
      "loss": 0.018,
      "step": 837860
    },
    {
      "epoch": 1.371208996942977,
      "grad_norm": 0.22643156349658966,
      "learning_rate": 7.30540382043054e-06,
      "loss": 0.0204,
      "step": 837880
    },
    {
      "epoch": 1.3712417273816304,
      "grad_norm": 0.3079885244369507,
      "learning_rate": 7.305337928217023e-06,
      "loss": 0.0243,
      "step": 837900
    },
    {
      "epoch": 1.3712744578202838,
      "grad_norm": 0.8485124111175537,
      "learning_rate": 7.305272036003506e-06,
      "loss": 0.0246,
      "step": 837920
    },
    {
      "epoch": 1.3713071882589372,
      "grad_norm": 3.22257924079895,
      "learning_rate": 7.305206143789989e-06,
      "loss": 0.0223,
      "step": 837940
    },
    {
      "epoch": 1.3713399186975903,
      "grad_norm": 0.7872921824455261,
      "learning_rate": 7.305140251576471e-06,
      "loss": 0.0356,
      "step": 837960
    },
    {
      "epoch": 1.3713726491362437,
      "grad_norm": 0.431577205657959,
      "learning_rate": 7.305074359362955e-06,
      "loss": 0.034,
      "step": 837980
    },
    {
      "epoch": 1.371405379574897,
      "grad_norm": 0.4910781681537628,
      "learning_rate": 7.305008467149437e-06,
      "loss": 0.0166,
      "step": 838000
    },
    {
      "epoch": 1.3714381100135504,
      "grad_norm": 1.0639193058013916,
      "learning_rate": 7.30494257493592e-06,
      "loss": 0.0257,
      "step": 838020
    },
    {
      "epoch": 1.3714708404522038,
      "grad_norm": 0.33830583095550537,
      "learning_rate": 7.304876682722404e-06,
      "loss": 0.0177,
      "step": 838040
    },
    {
      "epoch": 1.371503570890857,
      "grad_norm": 0.4690200388431549,
      "learning_rate": 7.304810790508886e-06,
      "loss": 0.0191,
      "step": 838060
    },
    {
      "epoch": 1.3715363013295105,
      "grad_norm": 0.5142063498497009,
      "learning_rate": 7.3047448982953694e-06,
      "loss": 0.019,
      "step": 838080
    },
    {
      "epoch": 1.3715690317681637,
      "grad_norm": 0.4662778377532959,
      "learning_rate": 7.304679006081852e-06,
      "loss": 0.0212,
      "step": 838100
    },
    {
      "epoch": 1.371601762206817,
      "grad_norm": 0.6450607180595398,
      "learning_rate": 7.304613113868335e-06,
      "loss": 0.0277,
      "step": 838120
    },
    {
      "epoch": 1.3716344926454704,
      "grad_norm": 0.5730003714561462,
      "learning_rate": 7.304547221654818e-06,
      "loss": 0.0175,
      "step": 838140
    },
    {
      "epoch": 1.3716672230841238,
      "grad_norm": 0.8758180141448975,
      "learning_rate": 7.304481329441301e-06,
      "loss": 0.0217,
      "step": 838160
    },
    {
      "epoch": 1.3716999535227772,
      "grad_norm": 0.342666894197464,
      "learning_rate": 7.304415437227783e-06,
      "loss": 0.021,
      "step": 838180
    },
    {
      "epoch": 1.3717326839614303,
      "grad_norm": 0.3798907697200775,
      "learning_rate": 7.304349545014267e-06,
      "loss": 0.0203,
      "step": 838200
    },
    {
      "epoch": 1.371765414400084,
      "grad_norm": 0.32658278942108154,
      "learning_rate": 7.304283652800749e-06,
      "loss": 0.0208,
      "step": 838220
    },
    {
      "epoch": 1.371798144838737,
      "grad_norm": 0.5596602559089661,
      "learning_rate": 7.304217760587232e-06,
      "loss": 0.0129,
      "step": 838240
    },
    {
      "epoch": 1.3718308752773904,
      "grad_norm": 0.16723276674747467,
      "learning_rate": 7.304151868373714e-06,
      "loss": 0.0192,
      "step": 838260
    },
    {
      "epoch": 1.3718636057160438,
      "grad_norm": 0.7799373269081116,
      "learning_rate": 7.304085976160198e-06,
      "loss": 0.0196,
      "step": 838280
    },
    {
      "epoch": 1.3718963361546972,
      "grad_norm": 2.0024309158325195,
      "learning_rate": 7.30402008394668e-06,
      "loss": 0.0199,
      "step": 838300
    },
    {
      "epoch": 1.3719290665933506,
      "grad_norm": 0.4922392666339874,
      "learning_rate": 7.303954191733163e-06,
      "loss": 0.0194,
      "step": 838320
    },
    {
      "epoch": 1.3719617970320037,
      "grad_norm": 0.5659220814704895,
      "learning_rate": 7.303888299519646e-06,
      "loss": 0.0234,
      "step": 838340
    },
    {
      "epoch": 1.371994527470657,
      "grad_norm": 1.050802230834961,
      "learning_rate": 7.3038224073061295e-06,
      "loss": 0.0212,
      "step": 838360
    },
    {
      "epoch": 1.3720272579093105,
      "grad_norm": 0.6099808812141418,
      "learning_rate": 7.303756515092612e-06,
      "loss": 0.0214,
      "step": 838380
    },
    {
      "epoch": 1.3720599883479638,
      "grad_norm": 0.9882131814956665,
      "learning_rate": 7.303690622879095e-06,
      "loss": 0.0274,
      "step": 838400
    },
    {
      "epoch": 1.3720927187866172,
      "grad_norm": 0.1539258062839508,
      "learning_rate": 7.3036247306655786e-06,
      "loss": 0.018,
      "step": 838420
    },
    {
      "epoch": 1.3721254492252706,
      "grad_norm": 0.34682223200798035,
      "learning_rate": 7.3035588384520604e-06,
      "loss": 0.0151,
      "step": 838440
    },
    {
      "epoch": 1.372158179663924,
      "grad_norm": 0.7384827733039856,
      "learning_rate": 7.303492946238544e-06,
      "loss": 0.0215,
      "step": 838460
    },
    {
      "epoch": 1.372190910102577,
      "grad_norm": 2.9942848682403564,
      "learning_rate": 7.303427054025026e-06,
      "loss": 0.0132,
      "step": 838480
    },
    {
      "epoch": 1.3722236405412305,
      "grad_norm": 0.6805488467216492,
      "learning_rate": 7.3033611618115095e-06,
      "loss": 0.0212,
      "step": 838500
    },
    {
      "epoch": 1.3722563709798838,
      "grad_norm": 0.42602333426475525,
      "learning_rate": 7.303295269597992e-06,
      "loss": 0.0257,
      "step": 838520
    },
    {
      "epoch": 1.3722891014185372,
      "grad_norm": 0.3105238080024719,
      "learning_rate": 7.303229377384475e-06,
      "loss": 0.0237,
      "step": 838540
    },
    {
      "epoch": 1.3723218318571906,
      "grad_norm": 0.6846497654914856,
      "learning_rate": 7.303163485170958e-06,
      "loss": 0.027,
      "step": 838560
    },
    {
      "epoch": 1.372354562295844,
      "grad_norm": 0.445205956697464,
      "learning_rate": 7.303097592957441e-06,
      "loss": 0.0327,
      "step": 838580
    },
    {
      "epoch": 1.3723872927344973,
      "grad_norm": 0.5013672709465027,
      "learning_rate": 7.303031700743923e-06,
      "loss": 0.0232,
      "step": 838600
    },
    {
      "epoch": 1.3724200231731505,
      "grad_norm": 0.7773439288139343,
      "learning_rate": 7.302965808530407e-06,
      "loss": 0.0232,
      "step": 838620
    },
    {
      "epoch": 1.3724527536118039,
      "grad_norm": 0.3511868417263031,
      "learning_rate": 7.302899916316889e-06,
      "loss": 0.0296,
      "step": 838640
    },
    {
      "epoch": 1.3724854840504572,
      "grad_norm": 0.5728380084037781,
      "learning_rate": 7.302834024103372e-06,
      "loss": 0.0236,
      "step": 838660
    },
    {
      "epoch": 1.3725182144891106,
      "grad_norm": 0.1972501277923584,
      "learning_rate": 7.302768131889855e-06,
      "loss": 0.0229,
      "step": 838680
    },
    {
      "epoch": 1.372550944927764,
      "grad_norm": 0.8671363592147827,
      "learning_rate": 7.302702239676338e-06,
      "loss": 0.0202,
      "step": 838700
    },
    {
      "epoch": 1.3725836753664173,
      "grad_norm": 0.6207861304283142,
      "learning_rate": 7.302636347462821e-06,
      "loss": 0.0235,
      "step": 838720
    },
    {
      "epoch": 1.3726164058050707,
      "grad_norm": 2.0320122241973877,
      "learning_rate": 7.302570455249304e-06,
      "loss": 0.0358,
      "step": 838740
    },
    {
      "epoch": 1.3726491362437239,
      "grad_norm": 1.1101382970809937,
      "learning_rate": 7.302504563035787e-06,
      "loss": 0.0181,
      "step": 838760
    },
    {
      "epoch": 1.3726818666823772,
      "grad_norm": 0.2861188054084778,
      "learning_rate": 7.3024386708222696e-06,
      "loss": 0.0146,
      "step": 838780
    },
    {
      "epoch": 1.3727145971210306,
      "grad_norm": 0.19629508256912231,
      "learning_rate": 7.302372778608753e-06,
      "loss": 0.0138,
      "step": 838800
    },
    {
      "epoch": 1.372747327559684,
      "grad_norm": 0.7884418368339539,
      "learning_rate": 7.302306886395235e-06,
      "loss": 0.023,
      "step": 838820
    },
    {
      "epoch": 1.3727800579983374,
      "grad_norm": 2.236907958984375,
      "learning_rate": 7.302240994181719e-06,
      "loss": 0.0268,
      "step": 838840
    },
    {
      "epoch": 1.3728127884369905,
      "grad_norm": 0.3517472445964813,
      "learning_rate": 7.3021751019682005e-06,
      "loss": 0.023,
      "step": 838860
    },
    {
      "epoch": 1.372845518875644,
      "grad_norm": 0.5345189571380615,
      "learning_rate": 7.302109209754684e-06,
      "loss": 0.0182,
      "step": 838880
    },
    {
      "epoch": 1.3728782493142972,
      "grad_norm": 1.1306073665618896,
      "learning_rate": 7.302043317541167e-06,
      "loss": 0.0227,
      "step": 838900
    },
    {
      "epoch": 1.3729109797529506,
      "grad_norm": 0.4379909932613373,
      "learning_rate": 7.30197742532765e-06,
      "loss": 0.0211,
      "step": 838920
    },
    {
      "epoch": 1.372943710191604,
      "grad_norm": 1.594844937324524,
      "learning_rate": 7.301911533114132e-06,
      "loss": 0.0277,
      "step": 838940
    },
    {
      "epoch": 1.3729764406302574,
      "grad_norm": 0.586295485496521,
      "learning_rate": 7.301845640900616e-06,
      "loss": 0.035,
      "step": 838960
    },
    {
      "epoch": 1.3730091710689107,
      "grad_norm": 0.18360190093517303,
      "learning_rate": 7.301779748687098e-06,
      "loss": 0.02,
      "step": 838980
    },
    {
      "epoch": 1.3730419015075639,
      "grad_norm": 0.42083314061164856,
      "learning_rate": 7.301713856473581e-06,
      "loss": 0.0254,
      "step": 839000
    },
    {
      "epoch": 1.3730746319462173,
      "grad_norm": 0.22924983501434326,
      "learning_rate": 7.301647964260063e-06,
      "loss": 0.0158,
      "step": 839020
    },
    {
      "epoch": 1.3731073623848706,
      "grad_norm": 0.2614484429359436,
      "learning_rate": 7.301582072046547e-06,
      "loss": 0.0139,
      "step": 839040
    },
    {
      "epoch": 1.373140092823524,
      "grad_norm": 1.4163726568222046,
      "learning_rate": 7.301516179833029e-06,
      "loss": 0.0363,
      "step": 839060
    },
    {
      "epoch": 1.3731728232621774,
      "grad_norm": 1.096039056777954,
      "learning_rate": 7.301450287619512e-06,
      "loss": 0.0172,
      "step": 839080
    },
    {
      "epoch": 1.3732055537008307,
      "grad_norm": 0.6808323264122009,
      "learning_rate": 7.301384395405996e-06,
      "loss": 0.0214,
      "step": 839100
    },
    {
      "epoch": 1.3732382841394841,
      "grad_norm": 0.25253432989120483,
      "learning_rate": 7.301318503192479e-06,
      "loss": 0.0196,
      "step": 839120
    },
    {
      "epoch": 1.3732710145781373,
      "grad_norm": 0.8168045282363892,
      "learning_rate": 7.301252610978961e-06,
      "loss": 0.0232,
      "step": 839140
    },
    {
      "epoch": 1.3733037450167906,
      "grad_norm": 0.1894526183605194,
      "learning_rate": 7.301186718765444e-06,
      "loss": 0.0251,
      "step": 839160
    },
    {
      "epoch": 1.373336475455444,
      "grad_norm": 0.356234073638916,
      "learning_rate": 7.301120826551928e-06,
      "loss": 0.0151,
      "step": 839180
    },
    {
      "epoch": 1.3733692058940974,
      "grad_norm": 0.4246813952922821,
      "learning_rate": 7.30105493433841e-06,
      "loss": 0.026,
      "step": 839200
    },
    {
      "epoch": 1.3734019363327508,
      "grad_norm": 0.29461926221847534,
      "learning_rate": 7.300989042124893e-06,
      "loss": 0.0192,
      "step": 839220
    },
    {
      "epoch": 1.3734346667714041,
      "grad_norm": 0.1974928230047226,
      "learning_rate": 7.300923149911375e-06,
      "loss": 0.0212,
      "step": 839240
    },
    {
      "epoch": 1.3734673972100575,
      "grad_norm": 1.5899909734725952,
      "learning_rate": 7.300857257697859e-06,
      "loss": 0.0228,
      "step": 839260
    },
    {
      "epoch": 1.3735001276487107,
      "grad_norm": 1.3664079904556274,
      "learning_rate": 7.300791365484341e-06,
      "loss": 0.0208,
      "step": 839280
    },
    {
      "epoch": 1.373532858087364,
      "grad_norm": 0.3041419982910156,
      "learning_rate": 7.300725473270824e-06,
      "loss": 0.0306,
      "step": 839300
    },
    {
      "epoch": 1.3735655885260174,
      "grad_norm": 0.5914645195007324,
      "learning_rate": 7.300659581057307e-06,
      "loss": 0.0221,
      "step": 839320
    },
    {
      "epoch": 1.3735983189646708,
      "grad_norm": 0.25496843457221985,
      "learning_rate": 7.30059368884379e-06,
      "loss": 0.0167,
      "step": 839340
    },
    {
      "epoch": 1.3736310494033241,
      "grad_norm": 0.9954624176025391,
      "learning_rate": 7.300527796630272e-06,
      "loss": 0.0186,
      "step": 839360
    },
    {
      "epoch": 1.3736637798419775,
      "grad_norm": 0.282907634973526,
      "learning_rate": 7.300461904416756e-06,
      "loss": 0.0233,
      "step": 839380
    },
    {
      "epoch": 1.373696510280631,
      "grad_norm": 0.3812759220600128,
      "learning_rate": 7.300396012203238e-06,
      "loss": 0.0221,
      "step": 839400
    },
    {
      "epoch": 1.373729240719284,
      "grad_norm": 1.8343658447265625,
      "learning_rate": 7.3003301199897215e-06,
      "loss": 0.0241,
      "step": 839420
    },
    {
      "epoch": 1.3737619711579374,
      "grad_norm": 1.5386351346969604,
      "learning_rate": 7.300264227776205e-06,
      "loss": 0.0201,
      "step": 839440
    },
    {
      "epoch": 1.3737947015965908,
      "grad_norm": 1.3463777303695679,
      "learning_rate": 7.300198335562687e-06,
      "loss": 0.0397,
      "step": 839460
    },
    {
      "epoch": 1.3738274320352442,
      "grad_norm": 1.6485811471939087,
      "learning_rate": 7.3001324433491705e-06,
      "loss": 0.0216,
      "step": 839480
    },
    {
      "epoch": 1.3738601624738975,
      "grad_norm": 0.8540704846382141,
      "learning_rate": 7.3000665511356524e-06,
      "loss": 0.0191,
      "step": 839500
    },
    {
      "epoch": 1.3738928929125507,
      "grad_norm": 0.07459457963705063,
      "learning_rate": 7.300000658922136e-06,
      "loss": 0.0189,
      "step": 839520
    },
    {
      "epoch": 1.3739256233512043,
      "grad_norm": 0.8381592631340027,
      "learning_rate": 7.299934766708619e-06,
      "loss": 0.0225,
      "step": 839540
    },
    {
      "epoch": 1.3739583537898574,
      "grad_norm": 1.4874234199523926,
      "learning_rate": 7.2998688744951015e-06,
      "loss": 0.0189,
      "step": 839560
    },
    {
      "epoch": 1.3739910842285108,
      "grad_norm": 2.293792486190796,
      "learning_rate": 7.299802982281584e-06,
      "loss": 0.0196,
      "step": 839580
    },
    {
      "epoch": 1.3740238146671642,
      "grad_norm": 0.3343888819217682,
      "learning_rate": 7.299737090068068e-06,
      "loss": 0.028,
      "step": 839600
    },
    {
      "epoch": 1.3740565451058175,
      "grad_norm": 0.5603101253509521,
      "learning_rate": 7.29967119785455e-06,
      "loss": 0.0261,
      "step": 839620
    },
    {
      "epoch": 1.374089275544471,
      "grad_norm": 0.9026576280593872,
      "learning_rate": 7.299605305641033e-06,
      "loss": 0.0221,
      "step": 839640
    },
    {
      "epoch": 1.374122005983124,
      "grad_norm": 0.5578232407569885,
      "learning_rate": 7.299539413427515e-06,
      "loss": 0.0274,
      "step": 839660
    },
    {
      "epoch": 1.3741547364217777,
      "grad_norm": 0.4380607604980469,
      "learning_rate": 7.299473521213999e-06,
      "loss": 0.0338,
      "step": 839680
    },
    {
      "epoch": 1.3741874668604308,
      "grad_norm": 0.17889459431171417,
      "learning_rate": 7.2994076290004815e-06,
      "loss": 0.0225,
      "step": 839700
    },
    {
      "epoch": 1.3742201972990842,
      "grad_norm": 1.9348371028900146,
      "learning_rate": 7.299341736786964e-06,
      "loss": 0.0194,
      "step": 839720
    },
    {
      "epoch": 1.3742529277377376,
      "grad_norm": 1.7850522994995117,
      "learning_rate": 7.299275844573447e-06,
      "loss": 0.0274,
      "step": 839740
    },
    {
      "epoch": 1.374285658176391,
      "grad_norm": 0.6352603435516357,
      "learning_rate": 7.299209952359931e-06,
      "loss": 0.0228,
      "step": 839760
    },
    {
      "epoch": 1.3743183886150443,
      "grad_norm": 0.7738258242607117,
      "learning_rate": 7.299144060146413e-06,
      "loss": 0.0239,
      "step": 839780
    },
    {
      "epoch": 1.3743511190536974,
      "grad_norm": 1.1048011779785156,
      "learning_rate": 7.299078167932896e-06,
      "loss": 0.0209,
      "step": 839800
    },
    {
      "epoch": 1.3743838494923508,
      "grad_norm": 0.07527206093072891,
      "learning_rate": 7.29901227571938e-06,
      "loss": 0.0209,
      "step": 839820
    },
    {
      "epoch": 1.3744165799310042,
      "grad_norm": 0.5035596489906311,
      "learning_rate": 7.2989463835058615e-06,
      "loss": 0.0224,
      "step": 839840
    },
    {
      "epoch": 1.3744493103696576,
      "grad_norm": 0.6273857355117798,
      "learning_rate": 7.298880491292345e-06,
      "loss": 0.0181,
      "step": 839860
    },
    {
      "epoch": 1.374482040808311,
      "grad_norm": 1.7792901992797852,
      "learning_rate": 7.298814599078827e-06,
      "loss": 0.0272,
      "step": 839880
    },
    {
      "epoch": 1.3745147712469643,
      "grad_norm": 0.390984445810318,
      "learning_rate": 7.298748706865311e-06,
      "loss": 0.0198,
      "step": 839900
    },
    {
      "epoch": 1.3745475016856177,
      "grad_norm": 0.2481766790151596,
      "learning_rate": 7.298682814651793e-06,
      "loss": 0.0181,
      "step": 839920
    },
    {
      "epoch": 1.3745802321242708,
      "grad_norm": 1.417152762413025,
      "learning_rate": 7.298616922438276e-06,
      "loss": 0.0161,
      "step": 839940
    },
    {
      "epoch": 1.3746129625629242,
      "grad_norm": 0.24451465904712677,
      "learning_rate": 7.298551030224759e-06,
      "loss": 0.0227,
      "step": 839960
    },
    {
      "epoch": 1.3746456930015776,
      "grad_norm": 1.296995759010315,
      "learning_rate": 7.298485138011242e-06,
      "loss": 0.0259,
      "step": 839980
    },
    {
      "epoch": 1.374678423440231,
      "grad_norm": 0.6995210647583008,
      "learning_rate": 7.298419245797724e-06,
      "loss": 0.026,
      "step": 840000
    },
    {
      "epoch": 1.3747111538788843,
      "grad_norm": 0.49801403284072876,
      "learning_rate": 7.298353353584208e-06,
      "loss": 0.0222,
      "step": 840020
    },
    {
      "epoch": 1.3747438843175377,
      "grad_norm": 2.0133144855499268,
      "learning_rate": 7.29828746137069e-06,
      "loss": 0.027,
      "step": 840040
    },
    {
      "epoch": 1.374776614756191,
      "grad_norm": 0.8023694753646851,
      "learning_rate": 7.298221569157173e-06,
      "loss": 0.0208,
      "step": 840060
    },
    {
      "epoch": 1.3748093451948442,
      "grad_norm": 0.4834337830543518,
      "learning_rate": 7.298155676943655e-06,
      "loss": 0.0322,
      "step": 840080
    },
    {
      "epoch": 1.3748420756334976,
      "grad_norm": 4.331718444824219,
      "learning_rate": 7.298089784730139e-06,
      "loss": 0.0246,
      "step": 840100
    },
    {
      "epoch": 1.374874806072151,
      "grad_norm": 0.6174238324165344,
      "learning_rate": 7.298023892516622e-06,
      "loss": 0.0183,
      "step": 840120
    },
    {
      "epoch": 1.3749075365108043,
      "grad_norm": 0.43325480818748474,
      "learning_rate": 7.297958000303104e-06,
      "loss": 0.0267,
      "step": 840140
    },
    {
      "epoch": 1.3749402669494577,
      "grad_norm": 1.2593382596969604,
      "learning_rate": 7.297892108089588e-06,
      "loss": 0.0277,
      "step": 840160
    },
    {
      "epoch": 1.374972997388111,
      "grad_norm": 0.16021540760993958,
      "learning_rate": 7.297826215876071e-06,
      "loss": 0.0162,
      "step": 840180
    },
    {
      "epoch": 1.3750057278267644,
      "grad_norm": 0.7070602774620056,
      "learning_rate": 7.297760323662553e-06,
      "loss": 0.0202,
      "step": 840200
    },
    {
      "epoch": 1.3750384582654176,
      "grad_norm": 0.27428892254829407,
      "learning_rate": 7.297694431449036e-06,
      "loss": 0.0253,
      "step": 840220
    },
    {
      "epoch": 1.375071188704071,
      "grad_norm": 1.1969549655914307,
      "learning_rate": 7.29762853923552e-06,
      "loss": 0.0155,
      "step": 840240
    },
    {
      "epoch": 1.3751039191427243,
      "grad_norm": 0.8079469203948975,
      "learning_rate": 7.297562647022002e-06,
      "loss": 0.0246,
      "step": 840260
    },
    {
      "epoch": 1.3751366495813777,
      "grad_norm": 0.44264620542526245,
      "learning_rate": 7.297496754808485e-06,
      "loss": 0.0176,
      "step": 840280
    },
    {
      "epoch": 1.375169380020031,
      "grad_norm": 0.7177584171295166,
      "learning_rate": 7.297430862594967e-06,
      "loss": 0.022,
      "step": 840300
    },
    {
      "epoch": 1.3752021104586842,
      "grad_norm": 0.4979862570762634,
      "learning_rate": 7.297364970381451e-06,
      "loss": 0.0254,
      "step": 840320
    },
    {
      "epoch": 1.3752348408973378,
      "grad_norm": 1.1319025754928589,
      "learning_rate": 7.2972990781679334e-06,
      "loss": 0.021,
      "step": 840340
    },
    {
      "epoch": 1.375267571335991,
      "grad_norm": 0.5067025423049927,
      "learning_rate": 7.297233185954416e-06,
      "loss": 0.0205,
      "step": 840360
    },
    {
      "epoch": 1.3753003017746444,
      "grad_norm": 0.9288904666900635,
      "learning_rate": 7.297167293740899e-06,
      "loss": 0.0321,
      "step": 840380
    },
    {
      "epoch": 1.3753330322132977,
      "grad_norm": 0.9037459492683411,
      "learning_rate": 7.2971014015273825e-06,
      "loss": 0.0124,
      "step": 840400
    },
    {
      "epoch": 1.375365762651951,
      "grad_norm": 0.45111897587776184,
      "learning_rate": 7.297035509313864e-06,
      "loss": 0.0152,
      "step": 840420
    },
    {
      "epoch": 1.3753984930906045,
      "grad_norm": 1.3284043073654175,
      "learning_rate": 7.296969617100348e-06,
      "loss": 0.019,
      "step": 840440
    },
    {
      "epoch": 1.3754312235292576,
      "grad_norm": 0.6438736915588379,
      "learning_rate": 7.29690372488683e-06,
      "loss": 0.0206,
      "step": 840460
    },
    {
      "epoch": 1.3754639539679112,
      "grad_norm": 2.1090192794799805,
      "learning_rate": 7.2968378326733135e-06,
      "loss": 0.0231,
      "step": 840480
    },
    {
      "epoch": 1.3754966844065644,
      "grad_norm": 1.1650773286819458,
      "learning_rate": 7.296771940459797e-06,
      "loss": 0.0256,
      "step": 840500
    },
    {
      "epoch": 1.3755294148452177,
      "grad_norm": 0.4698496460914612,
      "learning_rate": 7.296706048246279e-06,
      "loss": 0.0222,
      "step": 840520
    },
    {
      "epoch": 1.375562145283871,
      "grad_norm": 0.6934695243835449,
      "learning_rate": 7.2966401560327625e-06,
      "loss": 0.0197,
      "step": 840540
    },
    {
      "epoch": 1.3755948757225245,
      "grad_norm": 0.4892865717411041,
      "learning_rate": 7.296574263819245e-06,
      "loss": 0.0195,
      "step": 840560
    },
    {
      "epoch": 1.3756276061611779,
      "grad_norm": 0.4874974191188812,
      "learning_rate": 7.296508371605728e-06,
      "loss": 0.0161,
      "step": 840580
    },
    {
      "epoch": 1.375660336599831,
      "grad_norm": 0.9434301853179932,
      "learning_rate": 7.296442479392211e-06,
      "loss": 0.0224,
      "step": 840600
    },
    {
      "epoch": 1.3756930670384844,
      "grad_norm": 0.537279486656189,
      "learning_rate": 7.296376587178694e-06,
      "loss": 0.0236,
      "step": 840620
    },
    {
      "epoch": 1.3757257974771377,
      "grad_norm": 0.2695956528186798,
      "learning_rate": 7.296310694965176e-06,
      "loss": 0.0163,
      "step": 840640
    },
    {
      "epoch": 1.3757585279157911,
      "grad_norm": 0.2803071439266205,
      "learning_rate": 7.29624480275166e-06,
      "loss": 0.0237,
      "step": 840660
    },
    {
      "epoch": 1.3757912583544445,
      "grad_norm": 0.7467188835144043,
      "learning_rate": 7.296178910538142e-06,
      "loss": 0.0251,
      "step": 840680
    },
    {
      "epoch": 1.3758239887930979,
      "grad_norm": 0.6547274589538574,
      "learning_rate": 7.296113018324625e-06,
      "loss": 0.0242,
      "step": 840700
    },
    {
      "epoch": 1.3758567192317512,
      "grad_norm": 0.4366243779659271,
      "learning_rate": 7.296047126111108e-06,
      "loss": 0.0304,
      "step": 840720
    },
    {
      "epoch": 1.3758894496704044,
      "grad_norm": 0.2772458791732788,
      "learning_rate": 7.295981233897591e-06,
      "loss": 0.0245,
      "step": 840740
    },
    {
      "epoch": 1.3759221801090578,
      "grad_norm": 0.5523528456687927,
      "learning_rate": 7.2959153416840735e-06,
      "loss": 0.0189,
      "step": 840760
    },
    {
      "epoch": 1.3759549105477111,
      "grad_norm": 0.9227165579795837,
      "learning_rate": 7.295849449470557e-06,
      "loss": 0.0257,
      "step": 840780
    },
    {
      "epoch": 1.3759876409863645,
      "grad_norm": 0.7068665027618408,
      "learning_rate": 7.295783557257039e-06,
      "loss": 0.0172,
      "step": 840800
    },
    {
      "epoch": 1.3760203714250179,
      "grad_norm": 0.150427907705307,
      "learning_rate": 7.2957176650435226e-06,
      "loss": 0.0138,
      "step": 840820
    },
    {
      "epoch": 1.3760531018636712,
      "grad_norm": 0.8286304473876953,
      "learning_rate": 7.295651772830006e-06,
      "loss": 0.0234,
      "step": 840840
    },
    {
      "epoch": 1.3760858323023246,
      "grad_norm": 0.756770670413971,
      "learning_rate": 7.295585880616488e-06,
      "loss": 0.0252,
      "step": 840860
    },
    {
      "epoch": 1.3761185627409778,
      "grad_norm": 0.3044087886810303,
      "learning_rate": 7.295519988402972e-06,
      "loss": 0.0154,
      "step": 840880
    },
    {
      "epoch": 1.3761512931796311,
      "grad_norm": 0.7717357873916626,
      "learning_rate": 7.2954540961894535e-06,
      "loss": 0.0197,
      "step": 840900
    },
    {
      "epoch": 1.3761840236182845,
      "grad_norm": 0.3630499541759491,
      "learning_rate": 7.295388203975937e-06,
      "loss": 0.0193,
      "step": 840920
    },
    {
      "epoch": 1.3762167540569379,
      "grad_norm": 0.8908731341362,
      "learning_rate": 7.29532231176242e-06,
      "loss": 0.024,
      "step": 840940
    },
    {
      "epoch": 1.3762494844955913,
      "grad_norm": 0.48839879035949707,
      "learning_rate": 7.295256419548903e-06,
      "loss": 0.0182,
      "step": 840960
    },
    {
      "epoch": 1.3762822149342446,
      "grad_norm": 0.17643997073173523,
      "learning_rate": 7.295190527335385e-06,
      "loss": 0.0254,
      "step": 840980
    },
    {
      "epoch": 1.376314945372898,
      "grad_norm": 0.5984050035476685,
      "learning_rate": 7.295124635121869e-06,
      "loss": 0.0343,
      "step": 841000
    },
    {
      "epoch": 1.3763476758115512,
      "grad_norm": 0.7312820553779602,
      "learning_rate": 7.295058742908351e-06,
      "loss": 0.0212,
      "step": 841020
    },
    {
      "epoch": 1.3763804062502045,
      "grad_norm": 0.5954831838607788,
      "learning_rate": 7.294992850694834e-06,
      "loss": 0.0249,
      "step": 841040
    },
    {
      "epoch": 1.376413136688858,
      "grad_norm": 0.26994752883911133,
      "learning_rate": 7.294926958481316e-06,
      "loss": 0.0138,
      "step": 841060
    },
    {
      "epoch": 1.3764458671275113,
      "grad_norm": 0.2454671561717987,
      "learning_rate": 7.2948610662678e-06,
      "loss": 0.0273,
      "step": 841080
    },
    {
      "epoch": 1.3764785975661646,
      "grad_norm": 0.15547451376914978,
      "learning_rate": 7.294795174054282e-06,
      "loss": 0.0229,
      "step": 841100
    },
    {
      "epoch": 1.3765113280048178,
      "grad_norm": 0.5392299294471741,
      "learning_rate": 7.294729281840765e-06,
      "loss": 0.0261,
      "step": 841120
    },
    {
      "epoch": 1.3765440584434714,
      "grad_norm": 0.6116858720779419,
      "learning_rate": 7.294663389627248e-06,
      "loss": 0.0261,
      "step": 841140
    },
    {
      "epoch": 1.3765767888821245,
      "grad_norm": 0.3669523596763611,
      "learning_rate": 7.294597497413731e-06,
      "loss": 0.0173,
      "step": 841160
    },
    {
      "epoch": 1.376609519320778,
      "grad_norm": 0.6525889039039612,
      "learning_rate": 7.2945316052002144e-06,
      "loss": 0.029,
      "step": 841180
    },
    {
      "epoch": 1.3766422497594313,
      "grad_norm": 0.5414406657218933,
      "learning_rate": 7.294465712986697e-06,
      "loss": 0.0187,
      "step": 841200
    },
    {
      "epoch": 1.3766749801980847,
      "grad_norm": 0.35811853408813477,
      "learning_rate": 7.29439982077318e-06,
      "loss": 0.0263,
      "step": 841220
    },
    {
      "epoch": 1.376707710636738,
      "grad_norm": 0.5252212882041931,
      "learning_rate": 7.294333928559663e-06,
      "loss": 0.018,
      "step": 841240
    },
    {
      "epoch": 1.3767404410753912,
      "grad_norm": 0.5959794521331787,
      "learning_rate": 7.294268036346146e-06,
      "loss": 0.0198,
      "step": 841260
    },
    {
      "epoch": 1.3767731715140448,
      "grad_norm": 0.44830992817878723,
      "learning_rate": 7.294202144132628e-06,
      "loss": 0.0199,
      "step": 841280
    },
    {
      "epoch": 1.376805901952698,
      "grad_norm": 0.9815317392349243,
      "learning_rate": 7.294136251919112e-06,
      "loss": 0.0158,
      "step": 841300
    },
    {
      "epoch": 1.3768386323913513,
      "grad_norm": 0.22781674563884735,
      "learning_rate": 7.294070359705594e-06,
      "loss": 0.018,
      "step": 841320
    },
    {
      "epoch": 1.3768713628300047,
      "grad_norm": 1.274347186088562,
      "learning_rate": 7.294004467492077e-06,
      "loss": 0.0175,
      "step": 841340
    },
    {
      "epoch": 1.376904093268658,
      "grad_norm": 0.7360720634460449,
      "learning_rate": 7.29393857527856e-06,
      "loss": 0.0278,
      "step": 841360
    },
    {
      "epoch": 1.3769368237073114,
      "grad_norm": 1.0788060426712036,
      "learning_rate": 7.293872683065043e-06,
      "loss": 0.0217,
      "step": 841380
    },
    {
      "epoch": 1.3769695541459646,
      "grad_norm": 0.8466469645500183,
      "learning_rate": 7.293806790851525e-06,
      "loss": 0.022,
      "step": 841400
    },
    {
      "epoch": 1.377002284584618,
      "grad_norm": 0.2966702878475189,
      "learning_rate": 7.293740898638009e-06,
      "loss": 0.024,
      "step": 841420
    },
    {
      "epoch": 1.3770350150232713,
      "grad_norm": 0.9515770673751831,
      "learning_rate": 7.293675006424491e-06,
      "loss": 0.0246,
      "step": 841440
    },
    {
      "epoch": 1.3770677454619247,
      "grad_norm": 0.5669123530387878,
      "learning_rate": 7.2936091142109745e-06,
      "loss": 0.0199,
      "step": 841460
    },
    {
      "epoch": 1.377100475900578,
      "grad_norm": 0.9336463809013367,
      "learning_rate": 7.293543221997456e-06,
      "loss": 0.0256,
      "step": 841480
    },
    {
      "epoch": 1.3771332063392314,
      "grad_norm": 0.11282741278409958,
      "learning_rate": 7.29347732978394e-06,
      "loss": 0.0202,
      "step": 841500
    },
    {
      "epoch": 1.3771659367778848,
      "grad_norm": 0.24336431920528412,
      "learning_rate": 7.293411437570423e-06,
      "loss": 0.0182,
      "step": 841520
    },
    {
      "epoch": 1.377198667216538,
      "grad_norm": 1.2155026197433472,
      "learning_rate": 7.2933455453569054e-06,
      "loss": 0.0305,
      "step": 841540
    },
    {
      "epoch": 1.3772313976551913,
      "grad_norm": 0.9510286450386047,
      "learning_rate": 7.293279653143389e-06,
      "loss": 0.0318,
      "step": 841560
    },
    {
      "epoch": 1.3772641280938447,
      "grad_norm": 1.214155673980713,
      "learning_rate": 7.293213760929872e-06,
      "loss": 0.0187,
      "step": 841580
    },
    {
      "epoch": 1.377296858532498,
      "grad_norm": 0.6569861769676208,
      "learning_rate": 7.2931478687163545e-06,
      "loss": 0.0214,
      "step": 841600
    },
    {
      "epoch": 1.3773295889711514,
      "grad_norm": 0.9090476036071777,
      "learning_rate": 7.293081976502837e-06,
      "loss": 0.0209,
      "step": 841620
    },
    {
      "epoch": 1.3773623194098048,
      "grad_norm": 0.4315398335456848,
      "learning_rate": 7.293016084289321e-06,
      "loss": 0.0271,
      "step": 841640
    },
    {
      "epoch": 1.3773950498484582,
      "grad_norm": 0.756443202495575,
      "learning_rate": 7.292950192075803e-06,
      "loss": 0.0326,
      "step": 841660
    },
    {
      "epoch": 1.3774277802871113,
      "grad_norm": 0.23028382658958435,
      "learning_rate": 7.292884299862286e-06,
      "loss": 0.0173,
      "step": 841680
    },
    {
      "epoch": 1.3774605107257647,
      "grad_norm": 0.44875532388687134,
      "learning_rate": 7.292818407648768e-06,
      "loss": 0.0211,
      "step": 841700
    },
    {
      "epoch": 1.377493241164418,
      "grad_norm": 0.8973913788795471,
      "learning_rate": 7.292752515435252e-06,
      "loss": 0.0172,
      "step": 841720
    },
    {
      "epoch": 1.3775259716030714,
      "grad_norm": 1.514028549194336,
      "learning_rate": 7.2926866232217345e-06,
      "loss": 0.0268,
      "step": 841740
    },
    {
      "epoch": 1.3775587020417248,
      "grad_norm": 0.21579790115356445,
      "learning_rate": 7.292620731008217e-06,
      "loss": 0.0198,
      "step": 841760
    },
    {
      "epoch": 1.377591432480378,
      "grad_norm": 0.8769468665122986,
      "learning_rate": 7.2925548387947e-06,
      "loss": 0.0189,
      "step": 841780
    },
    {
      "epoch": 1.3776241629190316,
      "grad_norm": 0.2876545786857605,
      "learning_rate": 7.292488946581184e-06,
      "loss": 0.0185,
      "step": 841800
    },
    {
      "epoch": 1.3776568933576847,
      "grad_norm": 0.5519511699676514,
      "learning_rate": 7.2924230543676655e-06,
      "loss": 0.0248,
      "step": 841820
    },
    {
      "epoch": 1.377689623796338,
      "grad_norm": 1.0648609399795532,
      "learning_rate": 7.292357162154149e-06,
      "loss": 0.0122,
      "step": 841840
    },
    {
      "epoch": 1.3777223542349915,
      "grad_norm": 0.7116561532020569,
      "learning_rate": 7.292291269940631e-06,
      "loss": 0.0359,
      "step": 841860
    },
    {
      "epoch": 1.3777550846736448,
      "grad_norm": 1.0630614757537842,
      "learning_rate": 7.2922253777271146e-06,
      "loss": 0.0227,
      "step": 841880
    },
    {
      "epoch": 1.3777878151122982,
      "grad_norm": 0.42194581031799316,
      "learning_rate": 7.292159485513598e-06,
      "loss": 0.0181,
      "step": 841900
    },
    {
      "epoch": 1.3778205455509513,
      "grad_norm": 0.6426607370376587,
      "learning_rate": 7.29209359330008e-06,
      "loss": 0.0223,
      "step": 841920
    },
    {
      "epoch": 1.377853275989605,
      "grad_norm": 0.9780213832855225,
      "learning_rate": 7.292027701086564e-06,
      "loss": 0.0278,
      "step": 841940
    },
    {
      "epoch": 1.377886006428258,
      "grad_norm": 0.1805569976568222,
      "learning_rate": 7.291961808873046e-06,
      "loss": 0.0174,
      "step": 841960
    },
    {
      "epoch": 1.3779187368669115,
      "grad_norm": 0.1903398334980011,
      "learning_rate": 7.291895916659529e-06,
      "loss": 0.0292,
      "step": 841980
    },
    {
      "epoch": 1.3779514673055648,
      "grad_norm": 0.5335086584091187,
      "learning_rate": 7.291830024446012e-06,
      "loss": 0.0232,
      "step": 842000
    },
    {
      "epoch": 1.3779841977442182,
      "grad_norm": 0.15593165159225464,
      "learning_rate": 7.2917641322324954e-06,
      "loss": 0.0243,
      "step": 842020
    },
    {
      "epoch": 1.3780169281828716,
      "grad_norm": 1.2009834051132202,
      "learning_rate": 7.291698240018977e-06,
      "loss": 0.0164,
      "step": 842040
    },
    {
      "epoch": 1.3780496586215247,
      "grad_norm": 0.3059775233268738,
      "learning_rate": 7.291632347805461e-06,
      "loss": 0.0161,
      "step": 842060
    },
    {
      "epoch": 1.378082389060178,
      "grad_norm": 0.20790517330169678,
      "learning_rate": 7.291566455591943e-06,
      "loss": 0.0153,
      "step": 842080
    },
    {
      "epoch": 1.3781151194988315,
      "grad_norm": 0.843073844909668,
      "learning_rate": 7.291500563378426e-06,
      "loss": 0.0174,
      "step": 842100
    },
    {
      "epoch": 1.3781478499374848,
      "grad_norm": 0.7420628666877747,
      "learning_rate": 7.291434671164908e-06,
      "loss": 0.0199,
      "step": 842120
    },
    {
      "epoch": 1.3781805803761382,
      "grad_norm": 1.9939111471176147,
      "learning_rate": 7.291368778951392e-06,
      "loss": 0.0196,
      "step": 842140
    },
    {
      "epoch": 1.3782133108147916,
      "grad_norm": 0.35277116298675537,
      "learning_rate": 7.291302886737875e-06,
      "loss": 0.0287,
      "step": 842160
    },
    {
      "epoch": 1.378246041253445,
      "grad_norm": 1.2798417806625366,
      "learning_rate": 7.291236994524357e-06,
      "loss": 0.0223,
      "step": 842180
    },
    {
      "epoch": 1.3782787716920981,
      "grad_norm": 0.7108851075172424,
      "learning_rate": 7.29117110231084e-06,
      "loss": 0.0201,
      "step": 842200
    },
    {
      "epoch": 1.3783115021307515,
      "grad_norm": 0.3646702170372009,
      "learning_rate": 7.291105210097324e-06,
      "loss": 0.0188,
      "step": 842220
    },
    {
      "epoch": 1.3783442325694049,
      "grad_norm": 1.3730679750442505,
      "learning_rate": 7.291039317883806e-06,
      "loss": 0.0289,
      "step": 842240
    },
    {
      "epoch": 1.3783769630080582,
      "grad_norm": 0.28214913606643677,
      "learning_rate": 7.290973425670289e-06,
      "loss": 0.0188,
      "step": 842260
    },
    {
      "epoch": 1.3784096934467116,
      "grad_norm": 1.1109217405319214,
      "learning_rate": 7.290907533456773e-06,
      "loss": 0.0209,
      "step": 842280
    },
    {
      "epoch": 1.378442423885365,
      "grad_norm": 0.2805241644382477,
      "learning_rate": 7.290841641243255e-06,
      "loss": 0.0196,
      "step": 842300
    },
    {
      "epoch": 1.3784751543240183,
      "grad_norm": 0.17062316834926605,
      "learning_rate": 7.290775749029738e-06,
      "loss": 0.026,
      "step": 842320
    },
    {
      "epoch": 1.3785078847626715,
      "grad_norm": 1.3921329975128174,
      "learning_rate": 7.29070985681622e-06,
      "loss": 0.0201,
      "step": 842340
    },
    {
      "epoch": 1.3785406152013249,
      "grad_norm": 0.6989060044288635,
      "learning_rate": 7.290643964602704e-06,
      "loss": 0.0263,
      "step": 842360
    },
    {
      "epoch": 1.3785733456399782,
      "grad_norm": 0.5900640487670898,
      "learning_rate": 7.2905780723891864e-06,
      "loss": 0.0183,
      "step": 842380
    },
    {
      "epoch": 1.3786060760786316,
      "grad_norm": 1.192294716835022,
      "learning_rate": 7.290512180175669e-06,
      "loss": 0.0264,
      "step": 842400
    },
    {
      "epoch": 1.378638806517285,
      "grad_norm": 0.7210690379142761,
      "learning_rate": 7.290446287962152e-06,
      "loss": 0.018,
      "step": 842420
    },
    {
      "epoch": 1.3786715369559384,
      "grad_norm": 1.0436755418777466,
      "learning_rate": 7.2903803957486355e-06,
      "loss": 0.0211,
      "step": 842440
    },
    {
      "epoch": 1.3787042673945917,
      "grad_norm": 0.9450703263282776,
      "learning_rate": 7.290314503535117e-06,
      "loss": 0.0262,
      "step": 842460
    },
    {
      "epoch": 1.3787369978332449,
      "grad_norm": 0.6152684092521667,
      "learning_rate": 7.290248611321601e-06,
      "loss": 0.0183,
      "step": 842480
    },
    {
      "epoch": 1.3787697282718983,
      "grad_norm": 0.39497995376586914,
      "learning_rate": 7.290182719108083e-06,
      "loss": 0.0259,
      "step": 842500
    },
    {
      "epoch": 1.3788024587105516,
      "grad_norm": 0.16824893653392792,
      "learning_rate": 7.2901168268945665e-06,
      "loss": 0.0154,
      "step": 842520
    },
    {
      "epoch": 1.378835189149205,
      "grad_norm": 1.4899659156799316,
      "learning_rate": 7.290050934681049e-06,
      "loss": 0.0176,
      "step": 842540
    },
    {
      "epoch": 1.3788679195878584,
      "grad_norm": 0.10283567756414413,
      "learning_rate": 7.289985042467532e-06,
      "loss": 0.0198,
      "step": 842560
    },
    {
      "epoch": 1.3789006500265115,
      "grad_norm": 0.6141659617424011,
      "learning_rate": 7.289919150254015e-06,
      "loss": 0.0201,
      "step": 842580
    },
    {
      "epoch": 1.3789333804651651,
      "grad_norm": 1.321005940437317,
      "learning_rate": 7.289853258040498e-06,
      "loss": 0.0201,
      "step": 842600
    },
    {
      "epoch": 1.3789661109038183,
      "grad_norm": 0.8741997480392456,
      "learning_rate": 7.289787365826981e-06,
      "loss": 0.02,
      "step": 842620
    },
    {
      "epoch": 1.3789988413424716,
      "grad_norm": 0.6627256870269775,
      "learning_rate": 7.289721473613464e-06,
      "loss": 0.0303,
      "step": 842640
    },
    {
      "epoch": 1.379031571781125,
      "grad_norm": 0.9953888654708862,
      "learning_rate": 7.289655581399947e-06,
      "loss": 0.0281,
      "step": 842660
    },
    {
      "epoch": 1.3790643022197784,
      "grad_norm": 0.34911850094795227,
      "learning_rate": 7.289589689186429e-06,
      "loss": 0.0161,
      "step": 842680
    },
    {
      "epoch": 1.3790970326584318,
      "grad_norm": 0.3098607361316681,
      "learning_rate": 7.289523796972913e-06,
      "loss": 0.0324,
      "step": 842700
    },
    {
      "epoch": 1.379129763097085,
      "grad_norm": 0.27731600403785706,
      "learning_rate": 7.289457904759395e-06,
      "loss": 0.021,
      "step": 842720
    },
    {
      "epoch": 1.3791624935357385,
      "grad_norm": 0.08391077071428299,
      "learning_rate": 7.289392012545878e-06,
      "loss": 0.0194,
      "step": 842740
    },
    {
      "epoch": 1.3791952239743916,
      "grad_norm": 0.12063997983932495,
      "learning_rate": 7.289326120332361e-06,
      "loss": 0.0176,
      "step": 842760
    },
    {
      "epoch": 1.379227954413045,
      "grad_norm": 1.5180763006210327,
      "learning_rate": 7.289260228118844e-06,
      "loss": 0.0209,
      "step": 842780
    },
    {
      "epoch": 1.3792606848516984,
      "grad_norm": 0.37696513533592224,
      "learning_rate": 7.2891943359053265e-06,
      "loss": 0.0281,
      "step": 842800
    },
    {
      "epoch": 1.3792934152903518,
      "grad_norm": 0.2981549799442291,
      "learning_rate": 7.28912844369181e-06,
      "loss": 0.0185,
      "step": 842820
    },
    {
      "epoch": 1.3793261457290051,
      "grad_norm": 0.33829745650291443,
      "learning_rate": 7.289062551478292e-06,
      "loss": 0.0272,
      "step": 842840
    },
    {
      "epoch": 1.3793588761676583,
      "grad_norm": 0.7645665407180786,
      "learning_rate": 7.2889966592647756e-06,
      "loss": 0.0243,
      "step": 842860
    },
    {
      "epoch": 1.3793916066063117,
      "grad_norm": 0.2403886914253235,
      "learning_rate": 7.2889307670512575e-06,
      "loss": 0.0159,
      "step": 842880
    },
    {
      "epoch": 1.379424337044965,
      "grad_norm": 0.6418944597244263,
      "learning_rate": 7.288864874837741e-06,
      "loss": 0.0268,
      "step": 842900
    },
    {
      "epoch": 1.3794570674836184,
      "grad_norm": 0.39440301060676575,
      "learning_rate": 7.288798982624223e-06,
      "loss": 0.0203,
      "step": 842920
    },
    {
      "epoch": 1.3794897979222718,
      "grad_norm": 1.278912901878357,
      "learning_rate": 7.2887330904107065e-06,
      "loss": 0.0204,
      "step": 842940
    },
    {
      "epoch": 1.3795225283609251,
      "grad_norm": 0.2354901283979416,
      "learning_rate": 7.28866719819719e-06,
      "loss": 0.0214,
      "step": 842960
    },
    {
      "epoch": 1.3795552587995785,
      "grad_norm": 0.18335945904254913,
      "learning_rate": 7.288601305983672e-06,
      "loss": 0.0168,
      "step": 842980
    },
    {
      "epoch": 1.3795879892382317,
      "grad_norm": 1.1369503736495972,
      "learning_rate": 7.288535413770156e-06,
      "loss": 0.024,
      "step": 843000
    },
    {
      "epoch": 1.379620719676885,
      "grad_norm": 0.808610200881958,
      "learning_rate": 7.288469521556638e-06,
      "loss": 0.0214,
      "step": 843020
    },
    {
      "epoch": 1.3796534501155384,
      "grad_norm": 0.7087854146957397,
      "learning_rate": 7.288403629343122e-06,
      "loss": 0.0229,
      "step": 843040
    },
    {
      "epoch": 1.3796861805541918,
      "grad_norm": 1.6578565835952759,
      "learning_rate": 7.288337737129604e-06,
      "loss": 0.0244,
      "step": 843060
    },
    {
      "epoch": 1.3797189109928452,
      "grad_norm": 0.26907381415367126,
      "learning_rate": 7.288271844916087e-06,
      "loss": 0.0158,
      "step": 843080
    },
    {
      "epoch": 1.3797516414314985,
      "grad_norm": 0.38604363799095154,
      "learning_rate": 7.288205952702569e-06,
      "loss": 0.0161,
      "step": 843100
    },
    {
      "epoch": 1.379784371870152,
      "grad_norm": 1.7793635129928589,
      "learning_rate": 7.288140060489053e-06,
      "loss": 0.0227,
      "step": 843120
    },
    {
      "epoch": 1.379817102308805,
      "grad_norm": 0.6256863474845886,
      "learning_rate": 7.288074168275535e-06,
      "loss": 0.0168,
      "step": 843140
    },
    {
      "epoch": 1.3798498327474584,
      "grad_norm": 0.28705868124961853,
      "learning_rate": 7.288008276062018e-06,
      "loss": 0.0267,
      "step": 843160
    },
    {
      "epoch": 1.3798825631861118,
      "grad_norm": 0.4413739740848541,
      "learning_rate": 7.287942383848501e-06,
      "loss": 0.0173,
      "step": 843180
    },
    {
      "epoch": 1.3799152936247652,
      "grad_norm": 0.11295674741268158,
      "learning_rate": 7.287876491634984e-06,
      "loss": 0.0227,
      "step": 843200
    },
    {
      "epoch": 1.3799480240634185,
      "grad_norm": 0.2932354807853699,
      "learning_rate": 7.287810599421467e-06,
      "loss": 0.0162,
      "step": 843220
    },
    {
      "epoch": 1.379980754502072,
      "grad_norm": 0.25364527106285095,
      "learning_rate": 7.28774470720795e-06,
      "loss": 0.0156,
      "step": 843240
    },
    {
      "epoch": 1.3800134849407253,
      "grad_norm": 0.859476625919342,
      "learning_rate": 7.287678814994432e-06,
      "loss": 0.0186,
      "step": 843260
    },
    {
      "epoch": 1.3800462153793784,
      "grad_norm": 0.9978646636009216,
      "learning_rate": 7.287612922780916e-06,
      "loss": 0.0303,
      "step": 843280
    },
    {
      "epoch": 1.3800789458180318,
      "grad_norm": 0.5760908126831055,
      "learning_rate": 7.287547030567399e-06,
      "loss": 0.0262,
      "step": 843300
    },
    {
      "epoch": 1.3801116762566852,
      "grad_norm": 0.7739112377166748,
      "learning_rate": 7.287481138353881e-06,
      "loss": 0.0195,
      "step": 843320
    },
    {
      "epoch": 1.3801444066953386,
      "grad_norm": 0.5501139760017395,
      "learning_rate": 7.287415246140365e-06,
      "loss": 0.0254,
      "step": 843340
    },
    {
      "epoch": 1.380177137133992,
      "grad_norm": 0.9104777574539185,
      "learning_rate": 7.287349353926847e-06,
      "loss": 0.0145,
      "step": 843360
    },
    {
      "epoch": 1.380209867572645,
      "grad_norm": 0.7851008176803589,
      "learning_rate": 7.28728346171333e-06,
      "loss": 0.0134,
      "step": 843380
    },
    {
      "epoch": 1.3802425980112987,
      "grad_norm": 0.37550392746925354,
      "learning_rate": 7.287217569499813e-06,
      "loss": 0.0211,
      "step": 843400
    },
    {
      "epoch": 1.3802753284499518,
      "grad_norm": 0.5672438144683838,
      "learning_rate": 7.287151677286296e-06,
      "loss": 0.0205,
      "step": 843420
    },
    {
      "epoch": 1.3803080588886052,
      "grad_norm": 0.47013071179389954,
      "learning_rate": 7.287085785072778e-06,
      "loss": 0.0176,
      "step": 843440
    },
    {
      "epoch": 1.3803407893272586,
      "grad_norm": 0.44340038299560547,
      "learning_rate": 7.287019892859262e-06,
      "loss": 0.0289,
      "step": 843460
    },
    {
      "epoch": 1.380373519765912,
      "grad_norm": 0.965340256690979,
      "learning_rate": 7.286954000645744e-06,
      "loss": 0.025,
      "step": 843480
    },
    {
      "epoch": 1.3804062502045653,
      "grad_norm": 0.22399094700813293,
      "learning_rate": 7.2868881084322275e-06,
      "loss": 0.0201,
      "step": 843500
    },
    {
      "epoch": 1.3804389806432185,
      "grad_norm": 1.0487017631530762,
      "learning_rate": 7.286822216218709e-06,
      "loss": 0.0222,
      "step": 843520
    },
    {
      "epoch": 1.380471711081872,
      "grad_norm": 0.23258322477340698,
      "learning_rate": 7.286756324005193e-06,
      "loss": 0.0232,
      "step": 843540
    },
    {
      "epoch": 1.3805044415205252,
      "grad_norm": 0.08621440082788467,
      "learning_rate": 7.286690431791676e-06,
      "loss": 0.0198,
      "step": 843560
    },
    {
      "epoch": 1.3805371719591786,
      "grad_norm": 0.3927198052406311,
      "learning_rate": 7.2866245395781584e-06,
      "loss": 0.0213,
      "step": 843580
    },
    {
      "epoch": 1.380569902397832,
      "grad_norm": 0.2786317467689514,
      "learning_rate": 7.286558647364641e-06,
      "loss": 0.02,
      "step": 843600
    },
    {
      "epoch": 1.3806026328364853,
      "grad_norm": 0.3094983696937561,
      "learning_rate": 7.286492755151125e-06,
      "loss": 0.0262,
      "step": 843620
    },
    {
      "epoch": 1.3806353632751387,
      "grad_norm": 1.3914375305175781,
      "learning_rate": 7.286426862937607e-06,
      "loss": 0.0204,
      "step": 843640
    },
    {
      "epoch": 1.3806680937137918,
      "grad_norm": 0.4802367091178894,
      "learning_rate": 7.28636097072409e-06,
      "loss": 0.0197,
      "step": 843660
    },
    {
      "epoch": 1.3807008241524452,
      "grad_norm": 0.6630985140800476,
      "learning_rate": 7.286295078510574e-06,
      "loss": 0.0245,
      "step": 843680
    },
    {
      "epoch": 1.3807335545910986,
      "grad_norm": 2.532776355743408,
      "learning_rate": 7.286229186297056e-06,
      "loss": 0.0156,
      "step": 843700
    },
    {
      "epoch": 1.380766285029752,
      "grad_norm": 0.3795241117477417,
      "learning_rate": 7.286163294083539e-06,
      "loss": 0.0207,
      "step": 843720
    },
    {
      "epoch": 1.3807990154684053,
      "grad_norm": 0.39908382296562195,
      "learning_rate": 7.286097401870021e-06,
      "loss": 0.025,
      "step": 843740
    },
    {
      "epoch": 1.3808317459070587,
      "grad_norm": 0.40843647718429565,
      "learning_rate": 7.286031509656505e-06,
      "loss": 0.014,
      "step": 843760
    },
    {
      "epoch": 1.380864476345712,
      "grad_norm": 0.37903892993927,
      "learning_rate": 7.2859656174429875e-06,
      "loss": 0.0217,
      "step": 843780
    },
    {
      "epoch": 1.3808972067843652,
      "grad_norm": 2.7183773517608643,
      "learning_rate": 7.28589972522947e-06,
      "loss": 0.0321,
      "step": 843800
    },
    {
      "epoch": 1.3809299372230186,
      "grad_norm": 0.3137621283531189,
      "learning_rate": 7.285833833015953e-06,
      "loss": 0.0204,
      "step": 843820
    },
    {
      "epoch": 1.380962667661672,
      "grad_norm": 0.3682626485824585,
      "learning_rate": 7.285767940802437e-06,
      "loss": 0.0218,
      "step": 843840
    },
    {
      "epoch": 1.3809953981003253,
      "grad_norm": 0.5570837259292603,
      "learning_rate": 7.2857020485889185e-06,
      "loss": 0.0152,
      "step": 843860
    },
    {
      "epoch": 1.3810281285389787,
      "grad_norm": 0.6050891280174255,
      "learning_rate": 7.285636156375402e-06,
      "loss": 0.0208,
      "step": 843880
    },
    {
      "epoch": 1.381060858977632,
      "grad_norm": 0.7938399314880371,
      "learning_rate": 7.285570264161884e-06,
      "loss": 0.0233,
      "step": 843900
    },
    {
      "epoch": 1.3810935894162855,
      "grad_norm": 0.389865905046463,
      "learning_rate": 7.2855043719483676e-06,
      "loss": 0.0168,
      "step": 843920
    },
    {
      "epoch": 1.3811263198549386,
      "grad_norm": 0.6879383325576782,
      "learning_rate": 7.2854384797348495e-06,
      "loss": 0.0132,
      "step": 843940
    },
    {
      "epoch": 1.381159050293592,
      "grad_norm": 0.26690930128097534,
      "learning_rate": 7.285372587521333e-06,
      "loss": 0.0131,
      "step": 843960
    },
    {
      "epoch": 1.3811917807322454,
      "grad_norm": 1.572752594947815,
      "learning_rate": 7.285306695307816e-06,
      "loss": 0.024,
      "step": 843980
    },
    {
      "epoch": 1.3812245111708987,
      "grad_norm": 0.9510427713394165,
      "learning_rate": 7.2852408030942985e-06,
      "loss": 0.0259,
      "step": 844000
    },
    {
      "epoch": 1.381257241609552,
      "grad_norm": 0.3326759934425354,
      "learning_rate": 7.285174910880782e-06,
      "loss": 0.0177,
      "step": 844020
    },
    {
      "epoch": 1.3812899720482055,
      "grad_norm": 1.0905218124389648,
      "learning_rate": 7.285109018667265e-06,
      "loss": 0.0279,
      "step": 844040
    },
    {
      "epoch": 1.3813227024868588,
      "grad_norm": 0.4913864731788635,
      "learning_rate": 7.285043126453748e-06,
      "loss": 0.0223,
      "step": 844060
    },
    {
      "epoch": 1.381355432925512,
      "grad_norm": 0.46402350068092346,
      "learning_rate": 7.28497723424023e-06,
      "loss": 0.0244,
      "step": 844080
    },
    {
      "epoch": 1.3813881633641654,
      "grad_norm": 0.8344959020614624,
      "learning_rate": 7.284911342026714e-06,
      "loss": 0.0181,
      "step": 844100
    },
    {
      "epoch": 1.3814208938028187,
      "grad_norm": 0.470079630613327,
      "learning_rate": 7.284845449813196e-06,
      "loss": 0.012,
      "step": 844120
    },
    {
      "epoch": 1.3814536242414721,
      "grad_norm": 0.4411002993583679,
      "learning_rate": 7.284779557599679e-06,
      "loss": 0.027,
      "step": 844140
    },
    {
      "epoch": 1.3814863546801255,
      "grad_norm": 0.8597398996353149,
      "learning_rate": 7.284713665386161e-06,
      "loss": 0.0282,
      "step": 844160
    },
    {
      "epoch": 1.3815190851187786,
      "grad_norm": 0.34469369053840637,
      "learning_rate": 7.284647773172645e-06,
      "loss": 0.017,
      "step": 844180
    },
    {
      "epoch": 1.3815518155574322,
      "grad_norm": 0.2410798966884613,
      "learning_rate": 7.284581880959128e-06,
      "loss": 0.0211,
      "step": 844200
    },
    {
      "epoch": 1.3815845459960854,
      "grad_norm": 0.7526357173919678,
      "learning_rate": 7.28451598874561e-06,
      "loss": 0.0202,
      "step": 844220
    },
    {
      "epoch": 1.3816172764347388,
      "grad_norm": 0.9047158360481262,
      "learning_rate": 7.284450096532093e-06,
      "loss": 0.0209,
      "step": 844240
    },
    {
      "epoch": 1.3816500068733921,
      "grad_norm": 0.07500475645065308,
      "learning_rate": 7.284384204318577e-06,
      "loss": 0.0272,
      "step": 844260
    },
    {
      "epoch": 1.3816827373120455,
      "grad_norm": 0.20713092386722565,
      "learning_rate": 7.2843183121050586e-06,
      "loss": 0.0227,
      "step": 844280
    },
    {
      "epoch": 1.3817154677506989,
      "grad_norm": 0.39320164918899536,
      "learning_rate": 7.284252419891542e-06,
      "loss": 0.0163,
      "step": 844300
    },
    {
      "epoch": 1.381748198189352,
      "grad_norm": 0.3553280234336853,
      "learning_rate": 7.284186527678024e-06,
      "loss": 0.0234,
      "step": 844320
    },
    {
      "epoch": 1.3817809286280054,
      "grad_norm": 0.1702960878610611,
      "learning_rate": 7.284120635464508e-06,
      "loss": 0.0247,
      "step": 844340
    },
    {
      "epoch": 1.3818136590666588,
      "grad_norm": 0.24846063554286957,
      "learning_rate": 7.284054743250991e-06,
      "loss": 0.0171,
      "step": 844360
    },
    {
      "epoch": 1.3818463895053121,
      "grad_norm": 0.5412957668304443,
      "learning_rate": 7.283988851037473e-06,
      "loss": 0.0252,
      "step": 844380
    },
    {
      "epoch": 1.3818791199439655,
      "grad_norm": 1.2371788024902344,
      "learning_rate": 7.283922958823957e-06,
      "loss": 0.016,
      "step": 844400
    },
    {
      "epoch": 1.3819118503826189,
      "grad_norm": 0.21707184612751007,
      "learning_rate": 7.2838570666104394e-06,
      "loss": 0.0147,
      "step": 844420
    },
    {
      "epoch": 1.3819445808212723,
      "grad_norm": 0.944892168045044,
      "learning_rate": 7.283791174396922e-06,
      "loss": 0.0192,
      "step": 844440
    },
    {
      "epoch": 1.3819773112599254,
      "grad_norm": 1.2104021310806274,
      "learning_rate": 7.283725282183405e-06,
      "loss": 0.023,
      "step": 844460
    },
    {
      "epoch": 1.3820100416985788,
      "grad_norm": 2.4354500770568848,
      "learning_rate": 7.2836593899698885e-06,
      "loss": 0.0146,
      "step": 844480
    },
    {
      "epoch": 1.3820427721372321,
      "grad_norm": 0.6234182715415955,
      "learning_rate": 7.28359349775637e-06,
      "loss": 0.0131,
      "step": 844500
    },
    {
      "epoch": 1.3820755025758855,
      "grad_norm": 0.6726083159446716,
      "learning_rate": 7.283527605542854e-06,
      "loss": 0.0158,
      "step": 844520
    },
    {
      "epoch": 1.382108233014539,
      "grad_norm": 0.5892157554626465,
      "learning_rate": 7.283461713329336e-06,
      "loss": 0.0184,
      "step": 844540
    },
    {
      "epoch": 1.3821409634531923,
      "grad_norm": 0.4929567873477936,
      "learning_rate": 7.2833958211158195e-06,
      "loss": 0.0217,
      "step": 844560
    },
    {
      "epoch": 1.3821736938918456,
      "grad_norm": 0.8600476980209351,
      "learning_rate": 7.283329928902302e-06,
      "loss": 0.0244,
      "step": 844580
    },
    {
      "epoch": 1.3822064243304988,
      "grad_norm": 0.9852323532104492,
      "learning_rate": 7.283264036688785e-06,
      "loss": 0.0274,
      "step": 844600
    },
    {
      "epoch": 1.3822391547691522,
      "grad_norm": 0.5694276690483093,
      "learning_rate": 7.283198144475268e-06,
      "loss": 0.0219,
      "step": 844620
    },
    {
      "epoch": 1.3822718852078055,
      "grad_norm": 0.9310674071311951,
      "learning_rate": 7.283132252261751e-06,
      "loss": 0.0191,
      "step": 844640
    },
    {
      "epoch": 1.382304615646459,
      "grad_norm": 1.20919668674469,
      "learning_rate": 7.283066360048233e-06,
      "loss": 0.028,
      "step": 844660
    },
    {
      "epoch": 1.3823373460851123,
      "grad_norm": 0.23057402670383453,
      "learning_rate": 7.283000467834717e-06,
      "loss": 0.0237,
      "step": 844680
    },
    {
      "epoch": 1.3823700765237656,
      "grad_norm": 0.3178292214870453,
      "learning_rate": 7.2829345756212e-06,
      "loss": 0.0175,
      "step": 844700
    },
    {
      "epoch": 1.382402806962419,
      "grad_norm": 0.6939055323600769,
      "learning_rate": 7.282868683407682e-06,
      "loss": 0.0139,
      "step": 844720
    },
    {
      "epoch": 1.3824355374010722,
      "grad_norm": 0.7303286194801331,
      "learning_rate": 7.282802791194166e-06,
      "loss": 0.0197,
      "step": 844740
    },
    {
      "epoch": 1.3824682678397255,
      "grad_norm": 0.714179277420044,
      "learning_rate": 7.282736898980648e-06,
      "loss": 0.0197,
      "step": 844760
    },
    {
      "epoch": 1.382500998278379,
      "grad_norm": 0.7118271589279175,
      "learning_rate": 7.282671006767131e-06,
      "loss": 0.0206,
      "step": 844780
    },
    {
      "epoch": 1.3825337287170323,
      "grad_norm": 1.364923357963562,
      "learning_rate": 7.282605114553614e-06,
      "loss": 0.0302,
      "step": 844800
    },
    {
      "epoch": 1.3825664591556857,
      "grad_norm": 0.5293645262718201,
      "learning_rate": 7.282539222340097e-06,
      "loss": 0.0285,
      "step": 844820
    },
    {
      "epoch": 1.3825991895943388,
      "grad_norm": 0.4277203381061554,
      "learning_rate": 7.2824733301265795e-06,
      "loss": 0.017,
      "step": 844840
    },
    {
      "epoch": 1.3826319200329924,
      "grad_norm": 1.0588610172271729,
      "learning_rate": 7.282407437913063e-06,
      "loss": 0.0238,
      "step": 844860
    },
    {
      "epoch": 1.3826646504716456,
      "grad_norm": 0.33432692289352417,
      "learning_rate": 7.282341545699545e-06,
      "loss": 0.0195,
      "step": 844880
    },
    {
      "epoch": 1.382697380910299,
      "grad_norm": 0.6453213691711426,
      "learning_rate": 7.282275653486029e-06,
      "loss": 0.0282,
      "step": 844900
    },
    {
      "epoch": 1.3827301113489523,
      "grad_norm": 0.3085745871067047,
      "learning_rate": 7.2822097612725105e-06,
      "loss": 0.0192,
      "step": 844920
    },
    {
      "epoch": 1.3827628417876057,
      "grad_norm": 0.8837109208106995,
      "learning_rate": 7.282143869058994e-06,
      "loss": 0.0173,
      "step": 844940
    },
    {
      "epoch": 1.382795572226259,
      "grad_norm": 0.1883200705051422,
      "learning_rate": 7.282077976845476e-06,
      "loss": 0.0165,
      "step": 844960
    },
    {
      "epoch": 1.3828283026649122,
      "grad_norm": 0.9631264209747314,
      "learning_rate": 7.2820120846319595e-06,
      "loss": 0.0266,
      "step": 844980
    },
    {
      "epoch": 1.3828610331035658,
      "grad_norm": 0.7236937284469604,
      "learning_rate": 7.281946192418442e-06,
      "loss": 0.0273,
      "step": 845000
    },
    {
      "epoch": 1.382893763542219,
      "grad_norm": 0.7201277613639832,
      "learning_rate": 7.281880300204925e-06,
      "loss": 0.0214,
      "step": 845020
    },
    {
      "epoch": 1.3829264939808723,
      "grad_norm": 0.3729940950870514,
      "learning_rate": 7.281814407991408e-06,
      "loss": 0.0184,
      "step": 845040
    },
    {
      "epoch": 1.3829592244195257,
      "grad_norm": 0.5486702919006348,
      "learning_rate": 7.281748515777891e-06,
      "loss": 0.0256,
      "step": 845060
    },
    {
      "epoch": 1.382991954858179,
      "grad_norm": 0.29606226086616516,
      "learning_rate": 7.281682623564374e-06,
      "loss": 0.0197,
      "step": 845080
    },
    {
      "epoch": 1.3830246852968324,
      "grad_norm": 1.3736270666122437,
      "learning_rate": 7.281616731350857e-06,
      "loss": 0.0199,
      "step": 845100
    },
    {
      "epoch": 1.3830574157354856,
      "grad_norm": 0.9212859869003296,
      "learning_rate": 7.28155083913734e-06,
      "loss": 0.0167,
      "step": 845120
    },
    {
      "epoch": 1.383090146174139,
      "grad_norm": 0.5734865665435791,
      "learning_rate": 7.281484946923822e-06,
      "loss": 0.0253,
      "step": 845140
    },
    {
      "epoch": 1.3831228766127923,
      "grad_norm": 0.26056307554244995,
      "learning_rate": 7.281419054710306e-06,
      "loss": 0.0199,
      "step": 845160
    },
    {
      "epoch": 1.3831556070514457,
      "grad_norm": 2.2840287685394287,
      "learning_rate": 7.281353162496788e-06,
      "loss": 0.0247,
      "step": 845180
    },
    {
      "epoch": 1.383188337490099,
      "grad_norm": 3.027313470840454,
      "learning_rate": 7.281287270283271e-06,
      "loss": 0.0274,
      "step": 845200
    },
    {
      "epoch": 1.3832210679287524,
      "grad_norm": 1.1866705417633057,
      "learning_rate": 7.281221378069754e-06,
      "loss": 0.0318,
      "step": 845220
    },
    {
      "epoch": 1.3832537983674058,
      "grad_norm": 0.6564352512359619,
      "learning_rate": 7.281155485856237e-06,
      "loss": 0.0107,
      "step": 845240
    },
    {
      "epoch": 1.383286528806059,
      "grad_norm": 0.7081465721130371,
      "learning_rate": 7.28108959364272e-06,
      "loss": 0.0309,
      "step": 845260
    },
    {
      "epoch": 1.3833192592447123,
      "grad_norm": 1.2059204578399658,
      "learning_rate": 7.281023701429203e-06,
      "loss": 0.015,
      "step": 845280
    },
    {
      "epoch": 1.3833519896833657,
      "grad_norm": 0.8037872314453125,
      "learning_rate": 7.280957809215685e-06,
      "loss": 0.0165,
      "step": 845300
    },
    {
      "epoch": 1.383384720122019,
      "grad_norm": 0.3317660391330719,
      "learning_rate": 7.280891917002169e-06,
      "loss": 0.0292,
      "step": 845320
    },
    {
      "epoch": 1.3834174505606724,
      "grad_norm": 0.9145130515098572,
      "learning_rate": 7.2808260247886506e-06,
      "loss": 0.0244,
      "step": 845340
    },
    {
      "epoch": 1.3834501809993258,
      "grad_norm": 1.1138365268707275,
      "learning_rate": 7.280760132575134e-06,
      "loss": 0.0287,
      "step": 845360
    },
    {
      "epoch": 1.3834829114379792,
      "grad_norm": 0.6276711225509644,
      "learning_rate": 7.280694240361617e-06,
      "loss": 0.0169,
      "step": 845380
    },
    {
      "epoch": 1.3835156418766323,
      "grad_norm": 0.3869052231311798,
      "learning_rate": 7.2806283481481e-06,
      "loss": 0.0263,
      "step": 845400
    },
    {
      "epoch": 1.3835483723152857,
      "grad_norm": 0.4783354103565216,
      "learning_rate": 7.280562455934583e-06,
      "loss": 0.0184,
      "step": 845420
    },
    {
      "epoch": 1.383581102753939,
      "grad_norm": 0.9883046746253967,
      "learning_rate": 7.280496563721066e-06,
      "loss": 0.0237,
      "step": 845440
    },
    {
      "epoch": 1.3836138331925925,
      "grad_norm": 1.5976682901382446,
      "learning_rate": 7.280430671507549e-06,
      "loss": 0.024,
      "step": 845460
    },
    {
      "epoch": 1.3836465636312458,
      "grad_norm": 0.8731756806373596,
      "learning_rate": 7.280364779294031e-06,
      "loss": 0.0303,
      "step": 845480
    },
    {
      "epoch": 1.3836792940698992,
      "grad_norm": 0.3402707576751709,
      "learning_rate": 7.280298887080515e-06,
      "loss": 0.0165,
      "step": 845500
    },
    {
      "epoch": 1.3837120245085526,
      "grad_norm": 1.1944504976272583,
      "learning_rate": 7.280232994866997e-06,
      "loss": 0.0259,
      "step": 845520
    },
    {
      "epoch": 1.3837447549472057,
      "grad_norm": 0.9034994840621948,
      "learning_rate": 7.2801671026534805e-06,
      "loss": 0.023,
      "step": 845540
    },
    {
      "epoch": 1.383777485385859,
      "grad_norm": 0.5222737193107605,
      "learning_rate": 7.280101210439962e-06,
      "loss": 0.0187,
      "step": 845560
    },
    {
      "epoch": 1.3838102158245125,
      "grad_norm": 0.6587492227554321,
      "learning_rate": 7.280035318226446e-06,
      "loss": 0.0197,
      "step": 845580
    },
    {
      "epoch": 1.3838429462631658,
      "grad_norm": 0.6300676465034485,
      "learning_rate": 7.279969426012929e-06,
      "loss": 0.0182,
      "step": 845600
    },
    {
      "epoch": 1.3838756767018192,
      "grad_norm": 0.8608834743499756,
      "learning_rate": 7.2799035337994114e-06,
      "loss": 0.0206,
      "step": 845620
    },
    {
      "epoch": 1.3839084071404724,
      "grad_norm": 0.735063374042511,
      "learning_rate": 7.279837641585894e-06,
      "loss": 0.0223,
      "step": 845640
    },
    {
      "epoch": 1.383941137579126,
      "grad_norm": 0.21277819573879242,
      "learning_rate": 7.279771749372378e-06,
      "loss": 0.0217,
      "step": 845660
    },
    {
      "epoch": 1.383973868017779,
      "grad_norm": 0.6136674880981445,
      "learning_rate": 7.27970585715886e-06,
      "loss": 0.0244,
      "step": 845680
    },
    {
      "epoch": 1.3840065984564325,
      "grad_norm": 0.5534015893936157,
      "learning_rate": 7.279639964945343e-06,
      "loss": 0.0185,
      "step": 845700
    },
    {
      "epoch": 1.3840393288950859,
      "grad_norm": 0.501920759677887,
      "learning_rate": 7.279574072731825e-06,
      "loss": 0.0256,
      "step": 845720
    },
    {
      "epoch": 1.3840720593337392,
      "grad_norm": 3.2409322261810303,
      "learning_rate": 7.279508180518309e-06,
      "loss": 0.0246,
      "step": 845740
    },
    {
      "epoch": 1.3841047897723926,
      "grad_norm": 0.295565128326416,
      "learning_rate": 7.279442288304792e-06,
      "loss": 0.0179,
      "step": 845760
    },
    {
      "epoch": 1.3841375202110457,
      "grad_norm": 0.27620193362236023,
      "learning_rate": 7.279376396091274e-06,
      "loss": 0.0272,
      "step": 845780
    },
    {
      "epoch": 1.3841702506496993,
      "grad_norm": 0.2998840808868408,
      "learning_rate": 7.279310503877758e-06,
      "loss": 0.023,
      "step": 845800
    },
    {
      "epoch": 1.3842029810883525,
      "grad_norm": 0.3147870600223541,
      "learning_rate": 7.2792446116642405e-06,
      "loss": 0.0236,
      "step": 845820
    },
    {
      "epoch": 1.3842357115270059,
      "grad_norm": 0.22847433388233185,
      "learning_rate": 7.279178719450723e-06,
      "loss": 0.0178,
      "step": 845840
    },
    {
      "epoch": 1.3842684419656592,
      "grad_norm": 0.5370028018951416,
      "learning_rate": 7.279112827237206e-06,
      "loss": 0.0272,
      "step": 845860
    },
    {
      "epoch": 1.3843011724043126,
      "grad_norm": 0.930203378200531,
      "learning_rate": 7.27904693502369e-06,
      "loss": 0.0182,
      "step": 845880
    },
    {
      "epoch": 1.384333902842966,
      "grad_norm": 0.4472007751464844,
      "learning_rate": 7.2789810428101715e-06,
      "loss": 0.0168,
      "step": 845900
    },
    {
      "epoch": 1.3843666332816191,
      "grad_norm": 0.7001920342445374,
      "learning_rate": 7.278915150596655e-06,
      "loss": 0.0208,
      "step": 845920
    },
    {
      "epoch": 1.3843993637202725,
      "grad_norm": 0.6633328795433044,
      "learning_rate": 7.278849258383137e-06,
      "loss": 0.0208,
      "step": 845940
    },
    {
      "epoch": 1.3844320941589259,
      "grad_norm": 0.3566046953201294,
      "learning_rate": 7.2787833661696206e-06,
      "loss": 0.0231,
      "step": 845960
    },
    {
      "epoch": 1.3844648245975792,
      "grad_norm": 0.23316679894924164,
      "learning_rate": 7.2787174739561025e-06,
      "loss": 0.0243,
      "step": 845980
    },
    {
      "epoch": 1.3844975550362326,
      "grad_norm": 0.527398407459259,
      "learning_rate": 7.278651581742586e-06,
      "loss": 0.0197,
      "step": 846000
    },
    {
      "epoch": 1.384530285474886,
      "grad_norm": 1.4833099842071533,
      "learning_rate": 7.278585689529069e-06,
      "loss": 0.0238,
      "step": 846020
    },
    {
      "epoch": 1.3845630159135394,
      "grad_norm": 1.1608821153640747,
      "learning_rate": 7.2785197973155515e-06,
      "loss": 0.012,
      "step": 846040
    },
    {
      "epoch": 1.3845957463521925,
      "grad_norm": 0.24958166480064392,
      "learning_rate": 7.278453905102034e-06,
      "loss": 0.0228,
      "step": 846060
    },
    {
      "epoch": 1.3846284767908459,
      "grad_norm": 0.5442302227020264,
      "learning_rate": 7.278388012888518e-06,
      "loss": 0.0243,
      "step": 846080
    },
    {
      "epoch": 1.3846612072294993,
      "grad_norm": 0.8428207635879517,
      "learning_rate": 7.278322120675e-06,
      "loss": 0.0165,
      "step": 846100
    },
    {
      "epoch": 1.3846939376681526,
      "grad_norm": 0.35538193583488464,
      "learning_rate": 7.278256228461483e-06,
      "loss": 0.0261,
      "step": 846120
    },
    {
      "epoch": 1.384726668106806,
      "grad_norm": 1.084678292274475,
      "learning_rate": 7.278190336247967e-06,
      "loss": 0.0228,
      "step": 846140
    },
    {
      "epoch": 1.3847593985454594,
      "grad_norm": 0.44177478551864624,
      "learning_rate": 7.278124444034449e-06,
      "loss": 0.0175,
      "step": 846160
    },
    {
      "epoch": 1.3847921289841127,
      "grad_norm": 0.2057199329137802,
      "learning_rate": 7.278058551820932e-06,
      "loss": 0.0203,
      "step": 846180
    },
    {
      "epoch": 1.384824859422766,
      "grad_norm": 0.865088939666748,
      "learning_rate": 7.277992659607414e-06,
      "loss": 0.0265,
      "step": 846200
    },
    {
      "epoch": 1.3848575898614193,
      "grad_norm": 0.21381975710391998,
      "learning_rate": 7.277926767393898e-06,
      "loss": 0.032,
      "step": 846220
    },
    {
      "epoch": 1.3848903203000726,
      "grad_norm": 0.8464270830154419,
      "learning_rate": 7.277860875180381e-06,
      "loss": 0.0232,
      "step": 846240
    },
    {
      "epoch": 1.384923050738726,
      "grad_norm": 0.261026531457901,
      "learning_rate": 7.277794982966863e-06,
      "loss": 0.026,
      "step": 846260
    },
    {
      "epoch": 1.3849557811773794,
      "grad_norm": 0.7680758833885193,
      "learning_rate": 7.277729090753346e-06,
      "loss": 0.0281,
      "step": 846280
    },
    {
      "epoch": 1.3849885116160328,
      "grad_norm": 0.3484545946121216,
      "learning_rate": 7.27766319853983e-06,
      "loss": 0.019,
      "step": 846300
    },
    {
      "epoch": 1.3850212420546861,
      "grad_norm": 0.21886585652828217,
      "learning_rate": 7.2775973063263116e-06,
      "loss": 0.023,
      "step": 846320
    },
    {
      "epoch": 1.3850539724933393,
      "grad_norm": 1.1071698665618896,
      "learning_rate": 7.277531414112795e-06,
      "loss": 0.0122,
      "step": 846340
    },
    {
      "epoch": 1.3850867029319927,
      "grad_norm": 0.4359937608242035,
      "learning_rate": 7.277465521899277e-06,
      "loss": 0.0212,
      "step": 846360
    },
    {
      "epoch": 1.385119433370646,
      "grad_norm": 1.2757160663604736,
      "learning_rate": 7.277399629685761e-06,
      "loss": 0.0204,
      "step": 846380
    },
    {
      "epoch": 1.3851521638092994,
      "grad_norm": 0.44181621074676514,
      "learning_rate": 7.277333737472243e-06,
      "loss": 0.0251,
      "step": 846400
    },
    {
      "epoch": 1.3851848942479528,
      "grad_norm": 1.0887763500213623,
      "learning_rate": 7.277267845258726e-06,
      "loss": 0.0161,
      "step": 846420
    },
    {
      "epoch": 1.385217624686606,
      "grad_norm": 0.8610743880271912,
      "learning_rate": 7.277201953045209e-06,
      "loss": 0.0178,
      "step": 846440
    },
    {
      "epoch": 1.3852503551252595,
      "grad_norm": 0.3027869462966919,
      "learning_rate": 7.2771360608316924e-06,
      "loss": 0.0184,
      "step": 846460
    },
    {
      "epoch": 1.3852830855639127,
      "grad_norm": 2.7594499588012695,
      "learning_rate": 7.277070168618175e-06,
      "loss": 0.04,
      "step": 846480
    },
    {
      "epoch": 1.385315816002566,
      "grad_norm": 0.3058032989501953,
      "learning_rate": 7.277004276404658e-06,
      "loss": 0.0224,
      "step": 846500
    },
    {
      "epoch": 1.3853485464412194,
      "grad_norm": 0.39368152618408203,
      "learning_rate": 7.2769383841911415e-06,
      "loss": 0.0243,
      "step": 846520
    },
    {
      "epoch": 1.3853812768798728,
      "grad_norm": 0.5881118774414062,
      "learning_rate": 7.276872491977623e-06,
      "loss": 0.0236,
      "step": 846540
    },
    {
      "epoch": 1.3854140073185262,
      "grad_norm": 1.0187466144561768,
      "learning_rate": 7.276806599764107e-06,
      "loss": 0.0192,
      "step": 846560
    },
    {
      "epoch": 1.3854467377571793,
      "grad_norm": 0.9401029944419861,
      "learning_rate": 7.276740707550589e-06,
      "loss": 0.0151,
      "step": 846580
    },
    {
      "epoch": 1.385479468195833,
      "grad_norm": 0.5285462737083435,
      "learning_rate": 7.2766748153370725e-06,
      "loss": 0.0215,
      "step": 846600
    },
    {
      "epoch": 1.385512198634486,
      "grad_norm": 0.3469635844230652,
      "learning_rate": 7.276608923123555e-06,
      "loss": 0.0189,
      "step": 846620
    },
    {
      "epoch": 1.3855449290731394,
      "grad_norm": 1.816171407699585,
      "learning_rate": 7.276543030910038e-06,
      "loss": 0.0235,
      "step": 846640
    },
    {
      "epoch": 1.3855776595117928,
      "grad_norm": 0.48735636472702026,
      "learning_rate": 7.276477138696521e-06,
      "loss": 0.0157,
      "step": 846660
    },
    {
      "epoch": 1.3856103899504462,
      "grad_norm": 1.0898782014846802,
      "learning_rate": 7.276411246483004e-06,
      "loss": 0.02,
      "step": 846680
    },
    {
      "epoch": 1.3856431203890995,
      "grad_norm": 0.6468601822853088,
      "learning_rate": 7.276345354269486e-06,
      "loss": 0.0229,
      "step": 846700
    },
    {
      "epoch": 1.3856758508277527,
      "grad_norm": 0.7216951251029968,
      "learning_rate": 7.27627946205597e-06,
      "loss": 0.0264,
      "step": 846720
    },
    {
      "epoch": 1.385708581266406,
      "grad_norm": 0.7125266790390015,
      "learning_rate": 7.276213569842452e-06,
      "loss": 0.0186,
      "step": 846740
    },
    {
      "epoch": 1.3857413117050594,
      "grad_norm": 0.3844240605831146,
      "learning_rate": 7.276147677628935e-06,
      "loss": 0.0224,
      "step": 846760
    },
    {
      "epoch": 1.3857740421437128,
      "grad_norm": 1.033875584602356,
      "learning_rate": 7.276081785415417e-06,
      "loss": 0.0264,
      "step": 846780
    },
    {
      "epoch": 1.3858067725823662,
      "grad_norm": 0.7365556955337524,
      "learning_rate": 7.276015893201901e-06,
      "loss": 0.0294,
      "step": 846800
    },
    {
      "epoch": 1.3858395030210195,
      "grad_norm": 1.0333739519119263,
      "learning_rate": 7.275950000988384e-06,
      "loss": 0.0274,
      "step": 846820
    },
    {
      "epoch": 1.385872233459673,
      "grad_norm": 0.33743321895599365,
      "learning_rate": 7.275884108774866e-06,
      "loss": 0.0205,
      "step": 846840
    },
    {
      "epoch": 1.385904963898326,
      "grad_norm": 0.6178531050682068,
      "learning_rate": 7.27581821656135e-06,
      "loss": 0.0165,
      "step": 846860
    },
    {
      "epoch": 1.3859376943369794,
      "grad_norm": 0.287070095539093,
      "learning_rate": 7.2757523243478325e-06,
      "loss": 0.0241,
      "step": 846880
    },
    {
      "epoch": 1.3859704247756328,
      "grad_norm": 0.05939005687832832,
      "learning_rate": 7.275686432134316e-06,
      "loss": 0.0158,
      "step": 846900
    },
    {
      "epoch": 1.3860031552142862,
      "grad_norm": 0.7932978868484497,
      "learning_rate": 7.275620539920798e-06,
      "loss": 0.0229,
      "step": 846920
    },
    {
      "epoch": 1.3860358856529396,
      "grad_norm": 1.410082459449768,
      "learning_rate": 7.275554647707282e-06,
      "loss": 0.0292,
      "step": 846940
    },
    {
      "epoch": 1.386068616091593,
      "grad_norm": 2.3759920597076416,
      "learning_rate": 7.2754887554937635e-06,
      "loss": 0.0276,
      "step": 846960
    },
    {
      "epoch": 1.3861013465302463,
      "grad_norm": 0.19594381749629974,
      "learning_rate": 7.275422863280247e-06,
      "loss": 0.0248,
      "step": 846980
    },
    {
      "epoch": 1.3861340769688995,
      "grad_norm": 1.6259649991989136,
      "learning_rate": 7.275356971066729e-06,
      "loss": 0.0309,
      "step": 847000
    },
    {
      "epoch": 1.3861668074075528,
      "grad_norm": 0.8800972700119019,
      "learning_rate": 7.2752910788532125e-06,
      "loss": 0.0244,
      "step": 847020
    },
    {
      "epoch": 1.3861995378462062,
      "grad_norm": 0.28035253286361694,
      "learning_rate": 7.275225186639695e-06,
      "loss": 0.0236,
      "step": 847040
    },
    {
      "epoch": 1.3862322682848596,
      "grad_norm": 0.22892877459526062,
      "learning_rate": 7.275159294426178e-06,
      "loss": 0.0166,
      "step": 847060
    },
    {
      "epoch": 1.386264998723513,
      "grad_norm": 0.6103970408439636,
      "learning_rate": 7.275093402212661e-06,
      "loss": 0.0229,
      "step": 847080
    },
    {
      "epoch": 1.3862977291621663,
      "grad_norm": 0.5130892395973206,
      "learning_rate": 7.275027509999144e-06,
      "loss": 0.0226,
      "step": 847100
    },
    {
      "epoch": 1.3863304596008197,
      "grad_norm": 0.7302839756011963,
      "learning_rate": 7.274961617785626e-06,
      "loss": 0.0231,
      "step": 847120
    },
    {
      "epoch": 1.3863631900394728,
      "grad_norm": 0.619242250919342,
      "learning_rate": 7.27489572557211e-06,
      "loss": 0.0179,
      "step": 847140
    },
    {
      "epoch": 1.3863959204781262,
      "grad_norm": 0.5263569951057434,
      "learning_rate": 7.274829833358593e-06,
      "loss": 0.0232,
      "step": 847160
    },
    {
      "epoch": 1.3864286509167796,
      "grad_norm": 0.6816514134407043,
      "learning_rate": 7.274763941145075e-06,
      "loss": 0.0229,
      "step": 847180
    },
    {
      "epoch": 1.386461381355433,
      "grad_norm": 0.3271777629852295,
      "learning_rate": 7.274698048931559e-06,
      "loss": 0.0286,
      "step": 847200
    },
    {
      "epoch": 1.3864941117940863,
      "grad_norm": 2.247504472732544,
      "learning_rate": 7.274632156718041e-06,
      "loss": 0.0315,
      "step": 847220
    },
    {
      "epoch": 1.3865268422327395,
      "grad_norm": 0.13487300276756287,
      "learning_rate": 7.274566264504524e-06,
      "loss": 0.0155,
      "step": 847240
    },
    {
      "epoch": 1.386559572671393,
      "grad_norm": 0.3132266402244568,
      "learning_rate": 7.274500372291007e-06,
      "loss": 0.0194,
      "step": 847260
    },
    {
      "epoch": 1.3865923031100462,
      "grad_norm": 0.3669942319393158,
      "learning_rate": 7.27443448007749e-06,
      "loss": 0.0251,
      "step": 847280
    },
    {
      "epoch": 1.3866250335486996,
      "grad_norm": 1.4495187997817993,
      "learning_rate": 7.274368587863973e-06,
      "loss": 0.0197,
      "step": 847300
    },
    {
      "epoch": 1.386657763987353,
      "grad_norm": 1.5827678442001343,
      "learning_rate": 7.274302695650456e-06,
      "loss": 0.0185,
      "step": 847320
    },
    {
      "epoch": 1.3866904944260063,
      "grad_norm": 0.5674859285354614,
      "learning_rate": 7.274236803436938e-06,
      "loss": 0.0303,
      "step": 847340
    },
    {
      "epoch": 1.3867232248646597,
      "grad_norm": 0.4199909269809723,
      "learning_rate": 7.274170911223422e-06,
      "loss": 0.0284,
      "step": 847360
    },
    {
      "epoch": 1.3867559553033129,
      "grad_norm": 0.6607685089111328,
      "learning_rate": 7.2741050190099036e-06,
      "loss": 0.0173,
      "step": 847380
    },
    {
      "epoch": 1.3867886857419662,
      "grad_norm": 1.5446444749832153,
      "learning_rate": 7.274039126796387e-06,
      "loss": 0.0341,
      "step": 847400
    },
    {
      "epoch": 1.3868214161806196,
      "grad_norm": 0.07940594851970673,
      "learning_rate": 7.27397323458287e-06,
      "loss": 0.0283,
      "step": 847420
    },
    {
      "epoch": 1.386854146619273,
      "grad_norm": 1.2668853998184204,
      "learning_rate": 7.273907342369353e-06,
      "loss": 0.0226,
      "step": 847440
    },
    {
      "epoch": 1.3868868770579263,
      "grad_norm": 1.433602213859558,
      "learning_rate": 7.273841450155835e-06,
      "loss": 0.0194,
      "step": 847460
    },
    {
      "epoch": 1.3869196074965797,
      "grad_norm": 0.6314600110054016,
      "learning_rate": 7.273775557942319e-06,
      "loss": 0.0213,
      "step": 847480
    },
    {
      "epoch": 1.386952337935233,
      "grad_norm": 0.6806625127792358,
      "learning_rate": 7.273709665728801e-06,
      "loss": 0.0203,
      "step": 847500
    },
    {
      "epoch": 1.3869850683738862,
      "grad_norm": 0.1535888910293579,
      "learning_rate": 7.2736437735152844e-06,
      "loss": 0.0294,
      "step": 847520
    },
    {
      "epoch": 1.3870177988125396,
      "grad_norm": 0.19720764458179474,
      "learning_rate": 7.273577881301768e-06,
      "loss": 0.0227,
      "step": 847540
    },
    {
      "epoch": 1.387050529251193,
      "grad_norm": 0.33232957124710083,
      "learning_rate": 7.27351198908825e-06,
      "loss": 0.0142,
      "step": 847560
    },
    {
      "epoch": 1.3870832596898464,
      "grad_norm": 0.25440752506256104,
      "learning_rate": 7.2734460968747335e-06,
      "loss": 0.0182,
      "step": 847580
    },
    {
      "epoch": 1.3871159901284997,
      "grad_norm": 0.732507586479187,
      "learning_rate": 7.273380204661215e-06,
      "loss": 0.0188,
      "step": 847600
    },
    {
      "epoch": 1.387148720567153,
      "grad_norm": 0.3720164895057678,
      "learning_rate": 7.273314312447699e-06,
      "loss": 0.0176,
      "step": 847620
    },
    {
      "epoch": 1.3871814510058065,
      "grad_norm": 0.7287402749061584,
      "learning_rate": 7.273248420234182e-06,
      "loss": 0.0299,
      "step": 847640
    },
    {
      "epoch": 1.3872141814444596,
      "grad_norm": 0.8079902529716492,
      "learning_rate": 7.2731825280206645e-06,
      "loss": 0.014,
      "step": 847660
    },
    {
      "epoch": 1.387246911883113,
      "grad_norm": 0.46477365493774414,
      "learning_rate": 7.273116635807147e-06,
      "loss": 0.0194,
      "step": 847680
    },
    {
      "epoch": 1.3872796423217664,
      "grad_norm": 0.9848728179931641,
      "learning_rate": 7.273050743593631e-06,
      "loss": 0.0268,
      "step": 847700
    },
    {
      "epoch": 1.3873123727604197,
      "grad_norm": 0.15546271204948425,
      "learning_rate": 7.272984851380113e-06,
      "loss": 0.0134,
      "step": 847720
    },
    {
      "epoch": 1.3873451031990731,
      "grad_norm": 0.6898890137672424,
      "learning_rate": 7.272918959166596e-06,
      "loss": 0.0201,
      "step": 847740
    },
    {
      "epoch": 1.3873778336377265,
      "grad_norm": 1.0478590726852417,
      "learning_rate": 7.272853066953078e-06,
      "loss": 0.025,
      "step": 847760
    },
    {
      "epoch": 1.3874105640763799,
      "grad_norm": 0.3805849254131317,
      "learning_rate": 7.272787174739562e-06,
      "loss": 0.0209,
      "step": 847780
    },
    {
      "epoch": 1.387443294515033,
      "grad_norm": 0.3798448443412781,
      "learning_rate": 7.272721282526044e-06,
      "loss": 0.0176,
      "step": 847800
    },
    {
      "epoch": 1.3874760249536864,
      "grad_norm": 0.7761271595954895,
      "learning_rate": 7.272655390312527e-06,
      "loss": 0.024,
      "step": 847820
    },
    {
      "epoch": 1.3875087553923398,
      "grad_norm": 0.4091903865337372,
      "learning_rate": 7.27258949809901e-06,
      "loss": 0.0339,
      "step": 847840
    },
    {
      "epoch": 1.3875414858309931,
      "grad_norm": 0.3823917508125305,
      "learning_rate": 7.272523605885493e-06,
      "loss": 0.0247,
      "step": 847860
    },
    {
      "epoch": 1.3875742162696465,
      "grad_norm": 0.21572837233543396,
      "learning_rate": 7.272457713671976e-06,
      "loss": 0.0165,
      "step": 847880
    },
    {
      "epoch": 1.3876069467082996,
      "grad_norm": 0.8473142981529236,
      "learning_rate": 7.272391821458459e-06,
      "loss": 0.0206,
      "step": 847900
    },
    {
      "epoch": 1.3876396771469532,
      "grad_norm": 0.3790915608406067,
      "learning_rate": 7.272325929244942e-06,
      "loss": 0.0152,
      "step": 847920
    },
    {
      "epoch": 1.3876724075856064,
      "grad_norm": 0.6791688799858093,
      "learning_rate": 7.2722600370314245e-06,
      "loss": 0.0139,
      "step": 847940
    },
    {
      "epoch": 1.3877051380242598,
      "grad_norm": 2.017481565475464,
      "learning_rate": 7.272194144817908e-06,
      "loss": 0.0301,
      "step": 847960
    },
    {
      "epoch": 1.3877378684629131,
      "grad_norm": 0.7394431829452515,
      "learning_rate": 7.27212825260439e-06,
      "loss": 0.0193,
      "step": 847980
    },
    {
      "epoch": 1.3877705989015665,
      "grad_norm": 0.8263962268829346,
      "learning_rate": 7.2720623603908736e-06,
      "loss": 0.0352,
      "step": 848000
    },
    {
      "epoch": 1.3878033293402199,
      "grad_norm": 0.6418637037277222,
      "learning_rate": 7.2719964681773555e-06,
      "loss": 0.0178,
      "step": 848020
    },
    {
      "epoch": 1.387836059778873,
      "grad_norm": 1.1964869499206543,
      "learning_rate": 7.271930575963839e-06,
      "loss": 0.0267,
      "step": 848040
    },
    {
      "epoch": 1.3878687902175266,
      "grad_norm": 0.3325459063053131,
      "learning_rate": 7.271864683750322e-06,
      "loss": 0.0172,
      "step": 848060
    },
    {
      "epoch": 1.3879015206561798,
      "grad_norm": 0.3393380343914032,
      "learning_rate": 7.2717987915368045e-06,
      "loss": 0.0205,
      "step": 848080
    },
    {
      "epoch": 1.3879342510948331,
      "grad_norm": 3.7214341163635254,
      "learning_rate": 7.271732899323287e-06,
      "loss": 0.0211,
      "step": 848100
    },
    {
      "epoch": 1.3879669815334865,
      "grad_norm": 0.33868300914764404,
      "learning_rate": 7.271667007109771e-06,
      "loss": 0.0184,
      "step": 848120
    },
    {
      "epoch": 1.38799971197214,
      "grad_norm": 0.3659122586250305,
      "learning_rate": 7.271601114896253e-06,
      "loss": 0.0193,
      "step": 848140
    },
    {
      "epoch": 1.3880324424107933,
      "grad_norm": 0.6152823567390442,
      "learning_rate": 7.271535222682736e-06,
      "loss": 0.0222,
      "step": 848160
    },
    {
      "epoch": 1.3880651728494464,
      "grad_norm": 0.5983825325965881,
      "learning_rate": 7.271469330469218e-06,
      "loss": 0.0241,
      "step": 848180
    },
    {
      "epoch": 1.3880979032880998,
      "grad_norm": 0.24878953397274017,
      "learning_rate": 7.271403438255702e-06,
      "loss": 0.0197,
      "step": 848200
    },
    {
      "epoch": 1.3881306337267532,
      "grad_norm": 0.6187536716461182,
      "learning_rate": 7.271337546042185e-06,
      "loss": 0.0217,
      "step": 848220
    },
    {
      "epoch": 1.3881633641654065,
      "grad_norm": 0.6640962362289429,
      "learning_rate": 7.271271653828667e-06,
      "loss": 0.0184,
      "step": 848240
    },
    {
      "epoch": 1.38819609460406,
      "grad_norm": 0.9162161350250244,
      "learning_rate": 7.271205761615151e-06,
      "loss": 0.0252,
      "step": 848260
    },
    {
      "epoch": 1.3882288250427133,
      "grad_norm": 0.1303897351026535,
      "learning_rate": 7.271139869401634e-06,
      "loss": 0.0167,
      "step": 848280
    },
    {
      "epoch": 1.3882615554813666,
      "grad_norm": 0.5087390542030334,
      "learning_rate": 7.271073977188116e-06,
      "loss": 0.0177,
      "step": 848300
    },
    {
      "epoch": 1.3882942859200198,
      "grad_norm": 0.9068050384521484,
      "learning_rate": 7.271008084974599e-06,
      "loss": 0.02,
      "step": 848320
    },
    {
      "epoch": 1.3883270163586732,
      "grad_norm": 1.0314576625823975,
      "learning_rate": 7.270942192761083e-06,
      "loss": 0.0252,
      "step": 848340
    },
    {
      "epoch": 1.3883597467973265,
      "grad_norm": 1.7403743267059326,
      "learning_rate": 7.270876300547565e-06,
      "loss": 0.0166,
      "step": 848360
    },
    {
      "epoch": 1.38839247723598,
      "grad_norm": 0.4487149715423584,
      "learning_rate": 7.270810408334048e-06,
      "loss": 0.0228,
      "step": 848380
    },
    {
      "epoch": 1.3884252076746333,
      "grad_norm": 0.7402302026748657,
      "learning_rate": 7.27074451612053e-06,
      "loss": 0.0187,
      "step": 848400
    },
    {
      "epoch": 1.3884579381132867,
      "grad_norm": 0.5474006533622742,
      "learning_rate": 7.270678623907014e-06,
      "loss": 0.0241,
      "step": 848420
    },
    {
      "epoch": 1.38849066855194,
      "grad_norm": 0.6862764358520508,
      "learning_rate": 7.270612731693496e-06,
      "loss": 0.0228,
      "step": 848440
    },
    {
      "epoch": 1.3885233989905932,
      "grad_norm": 0.8519066572189331,
      "learning_rate": 7.270546839479979e-06,
      "loss": 0.0281,
      "step": 848460
    },
    {
      "epoch": 1.3885561294292466,
      "grad_norm": 0.8235889077186584,
      "learning_rate": 7.270480947266462e-06,
      "loss": 0.0178,
      "step": 848480
    },
    {
      "epoch": 1.3885888598679,
      "grad_norm": 3.1492977142333984,
      "learning_rate": 7.2704150550529455e-06,
      "loss": 0.017,
      "step": 848500
    },
    {
      "epoch": 1.3886215903065533,
      "grad_norm": 2.7521610260009766,
      "learning_rate": 7.270349162839427e-06,
      "loss": 0.0245,
      "step": 848520
    },
    {
      "epoch": 1.3886543207452067,
      "grad_norm": 0.5075963139533997,
      "learning_rate": 7.270283270625911e-06,
      "loss": 0.0218,
      "step": 848540
    },
    {
      "epoch": 1.38868705118386,
      "grad_norm": 0.18957866728305817,
      "learning_rate": 7.270217378412393e-06,
      "loss": 0.0253,
      "step": 848560
    },
    {
      "epoch": 1.3887197816225134,
      "grad_norm": 0.7723019123077393,
      "learning_rate": 7.270151486198876e-06,
      "loss": 0.0207,
      "step": 848580
    },
    {
      "epoch": 1.3887525120611666,
      "grad_norm": 0.796040415763855,
      "learning_rate": 7.27008559398536e-06,
      "loss": 0.029,
      "step": 848600
    },
    {
      "epoch": 1.38878524249982,
      "grad_norm": 0.298591673374176,
      "learning_rate": 7.270019701771842e-06,
      "loss": 0.0214,
      "step": 848620
    },
    {
      "epoch": 1.3888179729384733,
      "grad_norm": 0.8861625790596008,
      "learning_rate": 7.2699538095583255e-06,
      "loss": 0.0218,
      "step": 848640
    },
    {
      "epoch": 1.3888507033771267,
      "grad_norm": 0.4161711633205414,
      "learning_rate": 7.269887917344808e-06,
      "loss": 0.0239,
      "step": 848660
    },
    {
      "epoch": 1.38888343381578,
      "grad_norm": 1.1472225189208984,
      "learning_rate": 7.269822025131291e-06,
      "loss": 0.0266,
      "step": 848680
    },
    {
      "epoch": 1.3889161642544332,
      "grad_norm": 0.7752079963684082,
      "learning_rate": 7.269756132917774e-06,
      "loss": 0.0236,
      "step": 848700
    },
    {
      "epoch": 1.3889488946930868,
      "grad_norm": 0.38363751769065857,
      "learning_rate": 7.269690240704257e-06,
      "loss": 0.0228,
      "step": 848720
    },
    {
      "epoch": 1.38898162513174,
      "grad_norm": 1.1087793111801147,
      "learning_rate": 7.269624348490739e-06,
      "loss": 0.0286,
      "step": 848740
    },
    {
      "epoch": 1.3890143555703933,
      "grad_norm": 0.7610416412353516,
      "learning_rate": 7.269558456277223e-06,
      "loss": 0.0233,
      "step": 848760
    },
    {
      "epoch": 1.3890470860090467,
      "grad_norm": 0.7091178297996521,
      "learning_rate": 7.269492564063705e-06,
      "loss": 0.0134,
      "step": 848780
    },
    {
      "epoch": 1.3890798164477,
      "grad_norm": 1.051535725593567,
      "learning_rate": 7.269426671850188e-06,
      "loss": 0.0192,
      "step": 848800
    },
    {
      "epoch": 1.3891125468863534,
      "grad_norm": 0.3379342555999756,
      "learning_rate": 7.26936077963667e-06,
      "loss": 0.0176,
      "step": 848820
    },
    {
      "epoch": 1.3891452773250066,
      "grad_norm": 0.8548016548156738,
      "learning_rate": 7.269294887423154e-06,
      "loss": 0.023,
      "step": 848840
    },
    {
      "epoch": 1.3891780077636602,
      "grad_norm": 0.14856064319610596,
      "learning_rate": 7.2692289952096365e-06,
      "loss": 0.0183,
      "step": 848860
    },
    {
      "epoch": 1.3892107382023133,
      "grad_norm": 1.407569169998169,
      "learning_rate": 7.269163102996119e-06,
      "loss": 0.0204,
      "step": 848880
    },
    {
      "epoch": 1.3892434686409667,
      "grad_norm": 0.44697001576423645,
      "learning_rate": 7.269097210782602e-06,
      "loss": 0.0283,
      "step": 848900
    },
    {
      "epoch": 1.38927619907962,
      "grad_norm": 0.3234555423259735,
      "learning_rate": 7.2690313185690855e-06,
      "loss": 0.0285,
      "step": 848920
    },
    {
      "epoch": 1.3893089295182734,
      "grad_norm": 0.2707149386405945,
      "learning_rate": 7.268965426355568e-06,
      "loss": 0.0164,
      "step": 848940
    },
    {
      "epoch": 1.3893416599569268,
      "grad_norm": 1.4599257707595825,
      "learning_rate": 7.268899534142051e-06,
      "loss": 0.0208,
      "step": 848960
    },
    {
      "epoch": 1.38937439039558,
      "grad_norm": 0.6668074727058411,
      "learning_rate": 7.268833641928535e-06,
      "loss": 0.0199,
      "step": 848980
    },
    {
      "epoch": 1.3894071208342333,
      "grad_norm": 1.596105933189392,
      "learning_rate": 7.2687677497150165e-06,
      "loss": 0.0223,
      "step": 849000
    },
    {
      "epoch": 1.3894398512728867,
      "grad_norm": 1.1730780601501465,
      "learning_rate": 7.2687018575015e-06,
      "loss": 0.0246,
      "step": 849020
    },
    {
      "epoch": 1.38947258171154,
      "grad_norm": 0.3264712691307068,
      "learning_rate": 7.268635965287982e-06,
      "loss": 0.0176,
      "step": 849040
    },
    {
      "epoch": 1.3895053121501935,
      "grad_norm": 0.5653234124183655,
      "learning_rate": 7.2685700730744656e-06,
      "loss": 0.0254,
      "step": 849060
    },
    {
      "epoch": 1.3895380425888468,
      "grad_norm": 0.3285098075866699,
      "learning_rate": 7.268504180860948e-06,
      "loss": 0.0168,
      "step": 849080
    },
    {
      "epoch": 1.3895707730275002,
      "grad_norm": 6.237415790557861,
      "learning_rate": 7.268438288647431e-06,
      "loss": 0.0232,
      "step": 849100
    },
    {
      "epoch": 1.3896035034661534,
      "grad_norm": 1.2022455930709839,
      "learning_rate": 7.268372396433914e-06,
      "loss": 0.0199,
      "step": 849120
    },
    {
      "epoch": 1.3896362339048067,
      "grad_norm": 1.0504692792892456,
      "learning_rate": 7.268306504220397e-06,
      "loss": 0.0201,
      "step": 849140
    },
    {
      "epoch": 1.38966896434346,
      "grad_norm": 0.2274360954761505,
      "learning_rate": 7.268240612006879e-06,
      "loss": 0.0213,
      "step": 849160
    },
    {
      "epoch": 1.3897016947821135,
      "grad_norm": 0.1387733817100525,
      "learning_rate": 7.268174719793363e-06,
      "loss": 0.0222,
      "step": 849180
    },
    {
      "epoch": 1.3897344252207668,
      "grad_norm": 0.25211215019226074,
      "learning_rate": 7.268108827579845e-06,
      "loss": 0.0239,
      "step": 849200
    },
    {
      "epoch": 1.3897671556594202,
      "grad_norm": 0.6367477178573608,
      "learning_rate": 7.268042935366328e-06,
      "loss": 0.0145,
      "step": 849220
    },
    {
      "epoch": 1.3897998860980736,
      "grad_norm": 0.7916655540466309,
      "learning_rate": 7.267977043152811e-06,
      "loss": 0.0207,
      "step": 849240
    },
    {
      "epoch": 1.3898326165367267,
      "grad_norm": 0.39570286870002747,
      "learning_rate": 7.267911150939294e-06,
      "loss": 0.0183,
      "step": 849260
    },
    {
      "epoch": 1.3898653469753801,
      "grad_norm": 0.11747846752405167,
      "learning_rate": 7.267845258725777e-06,
      "loss": 0.0189,
      "step": 849280
    },
    {
      "epoch": 1.3898980774140335,
      "grad_norm": 0.6950012445449829,
      "learning_rate": 7.26777936651226e-06,
      "loss": 0.026,
      "step": 849300
    },
    {
      "epoch": 1.3899308078526869,
      "grad_norm": 0.8463575839996338,
      "learning_rate": 7.267713474298743e-06,
      "loss": 0.024,
      "step": 849320
    },
    {
      "epoch": 1.3899635382913402,
      "grad_norm": 0.19569867849349976,
      "learning_rate": 7.267647582085226e-06,
      "loss": 0.0208,
      "step": 849340
    },
    {
      "epoch": 1.3899962687299936,
      "grad_norm": 0.4281037151813507,
      "learning_rate": 7.267581689871709e-06,
      "loss": 0.018,
      "step": 849360
    },
    {
      "epoch": 1.390028999168647,
      "grad_norm": 1.1336994171142578,
      "learning_rate": 7.267515797658191e-06,
      "loss": 0.015,
      "step": 849380
    },
    {
      "epoch": 1.3900617296073001,
      "grad_norm": 0.46477705240249634,
      "learning_rate": 7.267449905444675e-06,
      "loss": 0.031,
      "step": 849400
    },
    {
      "epoch": 1.3900944600459535,
      "grad_norm": 0.3747451901435852,
      "learning_rate": 7.2673840132311566e-06,
      "loss": 0.0151,
      "step": 849420
    },
    {
      "epoch": 1.3901271904846069,
      "grad_norm": 0.23787452280521393,
      "learning_rate": 7.26731812101764e-06,
      "loss": 0.0151,
      "step": 849440
    },
    {
      "epoch": 1.3901599209232602,
      "grad_norm": 0.2420075535774231,
      "learning_rate": 7.267252228804123e-06,
      "loss": 0.0131,
      "step": 849460
    },
    {
      "epoch": 1.3901926513619136,
      "grad_norm": 0.6110789775848389,
      "learning_rate": 7.267186336590606e-06,
      "loss": 0.032,
      "step": 849480
    },
    {
      "epoch": 1.3902253818005668,
      "grad_norm": 0.15737661719322205,
      "learning_rate": 7.267120444377088e-06,
      "loss": 0.0221,
      "step": 849500
    },
    {
      "epoch": 1.3902581122392204,
      "grad_norm": 0.8825007677078247,
      "learning_rate": 7.267054552163572e-06,
      "loss": 0.0199,
      "step": 849520
    },
    {
      "epoch": 1.3902908426778735,
      "grad_norm": 0.1487339287996292,
      "learning_rate": 7.266988659950054e-06,
      "loss": 0.0223,
      "step": 849540
    },
    {
      "epoch": 1.3903235731165269,
      "grad_norm": 0.30380403995513916,
      "learning_rate": 7.2669227677365374e-06,
      "loss": 0.0215,
      "step": 849560
    },
    {
      "epoch": 1.3903563035551803,
      "grad_norm": 0.4030027687549591,
      "learning_rate": 7.266856875523019e-06,
      "loss": 0.0191,
      "step": 849580
    },
    {
      "epoch": 1.3903890339938336,
      "grad_norm": 0.4459233283996582,
      "learning_rate": 7.266790983309503e-06,
      "loss": 0.0176,
      "step": 849600
    },
    {
      "epoch": 1.390421764432487,
      "grad_norm": 1.1926954984664917,
      "learning_rate": 7.266725091095985e-06,
      "loss": 0.0242,
      "step": 849620
    },
    {
      "epoch": 1.3904544948711401,
      "grad_norm": 1.1172415018081665,
      "learning_rate": 7.266659198882468e-06,
      "loss": 0.0319,
      "step": 849640
    },
    {
      "epoch": 1.3904872253097937,
      "grad_norm": 0.349227637052536,
      "learning_rate": 7.266593306668952e-06,
      "loss": 0.0263,
      "step": 849660
    },
    {
      "epoch": 1.390519955748447,
      "grad_norm": 1.0836843252182007,
      "learning_rate": 7.266527414455435e-06,
      "loss": 0.031,
      "step": 849680
    },
    {
      "epoch": 1.3905526861871003,
      "grad_norm": 0.4287135899066925,
      "learning_rate": 7.2664615222419175e-06,
      "loss": 0.0192,
      "step": 849700
    },
    {
      "epoch": 1.3905854166257536,
      "grad_norm": 0.5506793260574341,
      "learning_rate": 7.2663956300284e-06,
      "loss": 0.0184,
      "step": 849720
    },
    {
      "epoch": 1.390618147064407,
      "grad_norm": 0.5477432608604431,
      "learning_rate": 7.266329737814884e-06,
      "loss": 0.0179,
      "step": 849740
    },
    {
      "epoch": 1.3906508775030604,
      "grad_norm": 0.19222037494182587,
      "learning_rate": 7.266263845601366e-06,
      "loss": 0.0189,
      "step": 849760
    },
    {
      "epoch": 1.3906836079417135,
      "grad_norm": 0.868635892868042,
      "learning_rate": 7.266197953387849e-06,
      "loss": 0.0218,
      "step": 849780
    },
    {
      "epoch": 1.390716338380367,
      "grad_norm": 0.593415379524231,
      "learning_rate": 7.266132061174331e-06,
      "loss": 0.0246,
      "step": 849800
    },
    {
      "epoch": 1.3907490688190203,
      "grad_norm": 0.6431972980499268,
      "learning_rate": 7.266066168960815e-06,
      "loss": 0.0159,
      "step": 849820
    },
    {
      "epoch": 1.3907817992576736,
      "grad_norm": 2.602165460586548,
      "learning_rate": 7.266000276747297e-06,
      "loss": 0.0217,
      "step": 849840
    },
    {
      "epoch": 1.390814529696327,
      "grad_norm": 0.3248380720615387,
      "learning_rate": 7.26593438453378e-06,
      "loss": 0.0258,
      "step": 849860
    },
    {
      "epoch": 1.3908472601349804,
      "grad_norm": 0.7510771155357361,
      "learning_rate": 7.265868492320263e-06,
      "loss": 0.0219,
      "step": 849880
    },
    {
      "epoch": 1.3908799905736338,
      "grad_norm": 0.20893041789531708,
      "learning_rate": 7.265802600106746e-06,
      "loss": 0.0262,
      "step": 849900
    },
    {
      "epoch": 1.390912721012287,
      "grad_norm": 0.8086426258087158,
      "learning_rate": 7.2657367078932284e-06,
      "loss": 0.0178,
      "step": 849920
    },
    {
      "epoch": 1.3909454514509403,
      "grad_norm": 0.5722159147262573,
      "learning_rate": 7.265670815679712e-06,
      "loss": 0.0234,
      "step": 849940
    },
    {
      "epoch": 1.3909781818895937,
      "grad_norm": 1.1012051105499268,
      "learning_rate": 7.265604923466194e-06,
      "loss": 0.0278,
      "step": 849960
    },
    {
      "epoch": 1.391010912328247,
      "grad_norm": 0.16264379024505615,
      "learning_rate": 7.2655390312526775e-06,
      "loss": 0.0258,
      "step": 849980
    },
    {
      "epoch": 1.3910436427669004,
      "grad_norm": 0.17506538331508636,
      "learning_rate": 7.265473139039161e-06,
      "loss": 0.0235,
      "step": 850000
    },
    {
      "epoch": 1.3910436427669004,
      "eval_loss": 0.011329708620905876,
      "eval_runtime": 6507.9355,
      "eval_samples_per_second": 157.939,
      "eval_steps_per_second": 15.794,
      "eval_sts-dev_pearson_cosine": 0.9735249149194155,
      "eval_sts-dev_spearman_cosine": 0.8885879952119572,
      "step": 850000
    },
    {
      "epoch": 1.3910763732055538,
      "grad_norm": 0.29449334740638733,
      "learning_rate": 7.265407246825643e-06,
      "loss": 0.0163,
      "step": 850020
    },
    {
      "epoch": 1.3911091036442071,
      "grad_norm": 0.20863471925258636,
      "learning_rate": 7.265341354612127e-06,
      "loss": 0.0241,
      "step": 850040
    },
    {
      "epoch": 1.3911418340828603,
      "grad_norm": 0.13054808974266052,
      "learning_rate": 7.2652754623986085e-06,
      "loss": 0.0206,
      "step": 850060
    },
    {
      "epoch": 1.3911745645215137,
      "grad_norm": 0.8566837310791016,
      "learning_rate": 7.265209570185092e-06,
      "loss": 0.0252,
      "step": 850080
    },
    {
      "epoch": 1.391207294960167,
      "grad_norm": 0.4376731514930725,
      "learning_rate": 7.265143677971575e-06,
      "loss": 0.0165,
      "step": 850100
    },
    {
      "epoch": 1.3912400253988204,
      "grad_norm": 0.3457632064819336,
      "learning_rate": 7.2650777857580575e-06,
      "loss": 0.0218,
      "step": 850120
    },
    {
      "epoch": 1.3912727558374738,
      "grad_norm": 1.031404972076416,
      "learning_rate": 7.26501189354454e-06,
      "loss": 0.0197,
      "step": 850140
    },
    {
      "epoch": 1.391305486276127,
      "grad_norm": 0.514040470123291,
      "learning_rate": 7.264946001331024e-06,
      "loss": 0.0234,
      "step": 850160
    },
    {
      "epoch": 1.3913382167147805,
      "grad_norm": 0.3327283561229706,
      "learning_rate": 7.264880109117506e-06,
      "loss": 0.0222,
      "step": 850180
    },
    {
      "epoch": 1.3913709471534337,
      "grad_norm": 0.7764787673950195,
      "learning_rate": 7.264814216903989e-06,
      "loss": 0.0182,
      "step": 850200
    },
    {
      "epoch": 1.391403677592087,
      "grad_norm": 0.1966169774532318,
      "learning_rate": 7.264748324690471e-06,
      "loss": 0.0212,
      "step": 850220
    },
    {
      "epoch": 1.3914364080307404,
      "grad_norm": 0.6037178039550781,
      "learning_rate": 7.264682432476955e-06,
      "loss": 0.0182,
      "step": 850240
    },
    {
      "epoch": 1.3914691384693938,
      "grad_norm": 0.2502289116382599,
      "learning_rate": 7.2646165402634376e-06,
      "loss": 0.0153,
      "step": 850260
    },
    {
      "epoch": 1.3915018689080472,
      "grad_norm": 0.3013497591018677,
      "learning_rate": 7.26455064804992e-06,
      "loss": 0.016,
      "step": 850280
    },
    {
      "epoch": 1.3915345993467003,
      "grad_norm": 1.3866851329803467,
      "learning_rate": 7.264484755836403e-06,
      "loss": 0.0346,
      "step": 850300
    },
    {
      "epoch": 1.391567329785354,
      "grad_norm": 1.1056230068206787,
      "learning_rate": 7.264418863622887e-06,
      "loss": 0.0243,
      "step": 850320
    },
    {
      "epoch": 1.391600060224007,
      "grad_norm": 0.4628545641899109,
      "learning_rate": 7.264352971409369e-06,
      "loss": 0.0226,
      "step": 850340
    },
    {
      "epoch": 1.3916327906626604,
      "grad_norm": 0.18402662873268127,
      "learning_rate": 7.264287079195852e-06,
      "loss": 0.0233,
      "step": 850360
    },
    {
      "epoch": 1.3916655211013138,
      "grad_norm": 0.9038548469543457,
      "learning_rate": 7.264221186982336e-06,
      "loss": 0.0226,
      "step": 850380
    },
    {
      "epoch": 1.3916982515399672,
      "grad_norm": 1.4506102800369263,
      "learning_rate": 7.264155294768818e-06,
      "loss": 0.0251,
      "step": 850400
    },
    {
      "epoch": 1.3917309819786206,
      "grad_norm": 0.1446009874343872,
      "learning_rate": 7.264089402555301e-06,
      "loss": 0.0148,
      "step": 850420
    },
    {
      "epoch": 1.3917637124172737,
      "grad_norm": 0.17345857620239258,
      "learning_rate": 7.264023510341783e-06,
      "loss": 0.0164,
      "step": 850440
    },
    {
      "epoch": 1.391796442855927,
      "grad_norm": 0.1614365130662918,
      "learning_rate": 7.263957618128267e-06,
      "loss": 0.0333,
      "step": 850460
    },
    {
      "epoch": 1.3918291732945804,
      "grad_norm": 0.22030292451381683,
      "learning_rate": 7.263891725914749e-06,
      "loss": 0.0238,
      "step": 850480
    },
    {
      "epoch": 1.3918619037332338,
      "grad_norm": 1.6951205730438232,
      "learning_rate": 7.263825833701232e-06,
      "loss": 0.0241,
      "step": 850500
    },
    {
      "epoch": 1.3918946341718872,
      "grad_norm": 1.3561757802963257,
      "learning_rate": 7.263759941487715e-06,
      "loss": 0.0247,
      "step": 850520
    },
    {
      "epoch": 1.3919273646105406,
      "grad_norm": 0.5790815353393555,
      "learning_rate": 7.2636940492741985e-06,
      "loss": 0.0256,
      "step": 850540
    },
    {
      "epoch": 1.391960095049194,
      "grad_norm": 0.5339060425758362,
      "learning_rate": 7.26362815706068e-06,
      "loss": 0.021,
      "step": 850560
    },
    {
      "epoch": 1.391992825487847,
      "grad_norm": 0.3289330005645752,
      "learning_rate": 7.263562264847164e-06,
      "loss": 0.0146,
      "step": 850580
    },
    {
      "epoch": 1.3920255559265005,
      "grad_norm": 0.6998623013496399,
      "learning_rate": 7.263496372633646e-06,
      "loss": 0.0186,
      "step": 850600
    },
    {
      "epoch": 1.3920582863651538,
      "grad_norm": 0.24739670753479004,
      "learning_rate": 7.263430480420129e-06,
      "loss": 0.0186,
      "step": 850620
    },
    {
      "epoch": 1.3920910168038072,
      "grad_norm": 0.2836221158504486,
      "learning_rate": 7.263364588206611e-06,
      "loss": 0.025,
      "step": 850640
    },
    {
      "epoch": 1.3921237472424606,
      "grad_norm": 0.15688616037368774,
      "learning_rate": 7.263298695993095e-06,
      "loss": 0.0225,
      "step": 850660
    },
    {
      "epoch": 1.392156477681114,
      "grad_norm": 0.341116338968277,
      "learning_rate": 7.2632328037795785e-06,
      "loss": 0.0174,
      "step": 850680
    },
    {
      "epoch": 1.3921892081197673,
      "grad_norm": 0.5999547839164734,
      "learning_rate": 7.26316691156606e-06,
      "loss": 0.0156,
      "step": 850700
    },
    {
      "epoch": 1.3922219385584205,
      "grad_norm": 1.857982873916626,
      "learning_rate": 7.263101019352544e-06,
      "loss": 0.0284,
      "step": 850720
    },
    {
      "epoch": 1.3922546689970738,
      "grad_norm": 0.8489712476730347,
      "learning_rate": 7.263035127139027e-06,
      "loss": 0.0195,
      "step": 850740
    },
    {
      "epoch": 1.3922873994357272,
      "grad_norm": 0.49147674441337585,
      "learning_rate": 7.26296923492551e-06,
      "loss": 0.0214,
      "step": 850760
    },
    {
      "epoch": 1.3923201298743806,
      "grad_norm": 0.5192617774009705,
      "learning_rate": 7.262903342711992e-06,
      "loss": 0.0129,
      "step": 850780
    },
    {
      "epoch": 1.392352860313034,
      "grad_norm": 0.25064876675605774,
      "learning_rate": 7.262837450498476e-06,
      "loss": 0.0235,
      "step": 850800
    },
    {
      "epoch": 1.3923855907516873,
      "grad_norm": 1.624765396118164,
      "learning_rate": 7.262771558284958e-06,
      "loss": 0.0291,
      "step": 850820
    },
    {
      "epoch": 1.3924183211903407,
      "grad_norm": 0.12492332607507706,
      "learning_rate": 7.262705666071441e-06,
      "loss": 0.0197,
      "step": 850840
    },
    {
      "epoch": 1.3924510516289939,
      "grad_norm": 1.9977681636810303,
      "learning_rate": 7.262639773857923e-06,
      "loss": 0.023,
      "step": 850860
    },
    {
      "epoch": 1.3924837820676472,
      "grad_norm": 0.5657121539115906,
      "learning_rate": 7.262573881644407e-06,
      "loss": 0.0192,
      "step": 850880
    },
    {
      "epoch": 1.3925165125063006,
      "grad_norm": 0.16086900234222412,
      "learning_rate": 7.2625079894308895e-06,
      "loss": 0.0252,
      "step": 850900
    },
    {
      "epoch": 1.392549242944954,
      "grad_norm": 1.8243441581726074,
      "learning_rate": 7.262442097217372e-06,
      "loss": 0.0251,
      "step": 850920
    },
    {
      "epoch": 1.3925819733836073,
      "grad_norm": 0.2252907007932663,
      "learning_rate": 7.262376205003855e-06,
      "loss": 0.0219,
      "step": 850940
    },
    {
      "epoch": 1.3926147038222605,
      "grad_norm": 0.7959315776824951,
      "learning_rate": 7.2623103127903385e-06,
      "loss": 0.0253,
      "step": 850960
    },
    {
      "epoch": 1.392647434260914,
      "grad_norm": 0.5977270603179932,
      "learning_rate": 7.2622444205768204e-06,
      "loss": 0.0138,
      "step": 850980
    },
    {
      "epoch": 1.3926801646995672,
      "grad_norm": 0.7300025820732117,
      "learning_rate": 7.262178528363304e-06,
      "loss": 0.0171,
      "step": 851000
    },
    {
      "epoch": 1.3927128951382206,
      "grad_norm": 0.46743571758270264,
      "learning_rate": 7.262112636149786e-06,
      "loss": 0.0116,
      "step": 851020
    },
    {
      "epoch": 1.392745625576874,
      "grad_norm": 2.2316253185272217,
      "learning_rate": 7.2620467439362695e-06,
      "loss": 0.0149,
      "step": 851040
    },
    {
      "epoch": 1.3927783560155274,
      "grad_norm": 0.4322280287742615,
      "learning_rate": 7.261980851722753e-06,
      "loss": 0.0178,
      "step": 851060
    },
    {
      "epoch": 1.3928110864541807,
      "grad_norm": 1.4503328800201416,
      "learning_rate": 7.261914959509235e-06,
      "loss": 0.0201,
      "step": 851080
    },
    {
      "epoch": 1.3928438168928339,
      "grad_norm": 0.270803838968277,
      "learning_rate": 7.2618490672957186e-06,
      "loss": 0.0212,
      "step": 851100
    },
    {
      "epoch": 1.3928765473314875,
      "grad_norm": 0.7487698793411255,
      "learning_rate": 7.261783175082201e-06,
      "loss": 0.0238,
      "step": 851120
    },
    {
      "epoch": 1.3929092777701406,
      "grad_norm": 0.43863749504089355,
      "learning_rate": 7.261717282868684e-06,
      "loss": 0.0175,
      "step": 851140
    },
    {
      "epoch": 1.392942008208794,
      "grad_norm": 1.6380884647369385,
      "learning_rate": 7.261651390655167e-06,
      "loss": 0.023,
      "step": 851160
    },
    {
      "epoch": 1.3929747386474474,
      "grad_norm": 0.6505674123764038,
      "learning_rate": 7.26158549844165e-06,
      "loss": 0.0246,
      "step": 851180
    },
    {
      "epoch": 1.3930074690861007,
      "grad_norm": 0.9943361282348633,
      "learning_rate": 7.261519606228132e-06,
      "loss": 0.0241,
      "step": 851200
    },
    {
      "epoch": 1.393040199524754,
      "grad_norm": 0.18981963396072388,
      "learning_rate": 7.261453714014616e-06,
      "loss": 0.0272,
      "step": 851220
    },
    {
      "epoch": 1.3930729299634073,
      "grad_norm": Infinity,
      "learning_rate": 7.261387821801098e-06,
      "loss": 0.0207,
      "step": 851240
    },
    {
      "epoch": 1.3931056604020606,
      "grad_norm": 0.9919629096984863,
      "learning_rate": 7.261321929587581e-06,
      "loss": 0.0174,
      "step": 851260
    },
    {
      "epoch": 1.393138390840714,
      "grad_norm": 0.28711313009262085,
      "learning_rate": 7.261256037374064e-06,
      "loss": 0.0251,
      "step": 851280
    },
    {
      "epoch": 1.3931711212793674,
      "grad_norm": 0.2518591582775116,
      "learning_rate": 7.261190145160547e-06,
      "loss": 0.0262,
      "step": 851300
    },
    {
      "epoch": 1.3932038517180207,
      "grad_norm": 0.773375391960144,
      "learning_rate": 7.2611242529470295e-06,
      "loss": 0.0192,
      "step": 851320
    },
    {
      "epoch": 1.3932365821566741,
      "grad_norm": 0.2332373559474945,
      "learning_rate": 7.261058360733513e-06,
      "loss": 0.0181,
      "step": 851340
    },
    {
      "epoch": 1.3932693125953275,
      "grad_norm": 0.41453102231025696,
      "learning_rate": 7.260992468519995e-06,
      "loss": 0.019,
      "step": 851360
    },
    {
      "epoch": 1.3933020430339806,
      "grad_norm": 0.7264766097068787,
      "learning_rate": 7.260926576306479e-06,
      "loss": 0.0198,
      "step": 851380
    },
    {
      "epoch": 1.393334773472634,
      "grad_norm": 0.5176366567611694,
      "learning_rate": 7.260860684092962e-06,
      "loss": 0.0221,
      "step": 851400
    },
    {
      "epoch": 1.3933675039112874,
      "grad_norm": 0.4095030725002289,
      "learning_rate": 7.260794791879444e-06,
      "loss": 0.0182,
      "step": 851420
    },
    {
      "epoch": 1.3934002343499408,
      "grad_norm": 0.2143506407737732,
      "learning_rate": 7.260728899665928e-06,
      "loss": 0.0185,
      "step": 851440
    },
    {
      "epoch": 1.3934329647885941,
      "grad_norm": 0.23577077686786652,
      "learning_rate": 7.2606630074524096e-06,
      "loss": 0.0205,
      "step": 851460
    },
    {
      "epoch": 1.3934656952272475,
      "grad_norm": 0.2997400462627411,
      "learning_rate": 7.260597115238893e-06,
      "loss": 0.0194,
      "step": 851480
    },
    {
      "epoch": 1.3934984256659009,
      "grad_norm": 0.7989629507064819,
      "learning_rate": 7.260531223025376e-06,
      "loss": 0.0184,
      "step": 851500
    },
    {
      "epoch": 1.393531156104554,
      "grad_norm": 0.5207292437553406,
      "learning_rate": 7.260465330811859e-06,
      "loss": 0.0226,
      "step": 851520
    },
    {
      "epoch": 1.3935638865432074,
      "grad_norm": 1.6954127550125122,
      "learning_rate": 7.260399438598341e-06,
      "loss": 0.0201,
      "step": 851540
    },
    {
      "epoch": 1.3935966169818608,
      "grad_norm": 0.25241246819496155,
      "learning_rate": 7.260333546384825e-06,
      "loss": 0.0123,
      "step": 851560
    },
    {
      "epoch": 1.3936293474205141,
      "grad_norm": 0.8187416791915894,
      "learning_rate": 7.260267654171307e-06,
      "loss": 0.0212,
      "step": 851580
    },
    {
      "epoch": 1.3936620778591675,
      "grad_norm": 0.9261223077774048,
      "learning_rate": 7.2602017619577904e-06,
      "loss": 0.0292,
      "step": 851600
    },
    {
      "epoch": 1.3936948082978209,
      "grad_norm": 0.7653758525848389,
      "learning_rate": 7.260135869744272e-06,
      "loss": 0.0213,
      "step": 851620
    },
    {
      "epoch": 1.3937275387364743,
      "grad_norm": 1.0244306325912476,
      "learning_rate": 7.260069977530756e-06,
      "loss": 0.0194,
      "step": 851640
    },
    {
      "epoch": 1.3937602691751274,
      "grad_norm": 0.7223742008209229,
      "learning_rate": 7.260004085317238e-06,
      "loss": 0.0269,
      "step": 851660
    },
    {
      "epoch": 1.3937929996137808,
      "grad_norm": 0.4017734229564667,
      "learning_rate": 7.259938193103721e-06,
      "loss": 0.0171,
      "step": 851680
    },
    {
      "epoch": 1.3938257300524342,
      "grad_norm": 0.6699409484863281,
      "learning_rate": 7.259872300890204e-06,
      "loss": 0.0216,
      "step": 851700
    },
    {
      "epoch": 1.3938584604910875,
      "grad_norm": 0.9639913439750671,
      "learning_rate": 7.259806408676687e-06,
      "loss": 0.0242,
      "step": 851720
    },
    {
      "epoch": 1.393891190929741,
      "grad_norm": 2.322871685028076,
      "learning_rate": 7.2597405164631705e-06,
      "loss": 0.0221,
      "step": 851740
    },
    {
      "epoch": 1.393923921368394,
      "grad_norm": 1.0596935749053955,
      "learning_rate": 7.259674624249653e-06,
      "loss": 0.0235,
      "step": 851760
    },
    {
      "epoch": 1.3939566518070476,
      "grad_norm": 0.18939520418643951,
      "learning_rate": 7.259608732036136e-06,
      "loss": 0.0223,
      "step": 851780
    },
    {
      "epoch": 1.3939893822457008,
      "grad_norm": 1.0020157098770142,
      "learning_rate": 7.259542839822619e-06,
      "loss": 0.0149,
      "step": 851800
    },
    {
      "epoch": 1.3940221126843542,
      "grad_norm": 0.24761125445365906,
      "learning_rate": 7.259476947609102e-06,
      "loss": 0.0181,
      "step": 851820
    },
    {
      "epoch": 1.3940548431230075,
      "grad_norm": 0.5942671895027161,
      "learning_rate": 7.259411055395584e-06,
      "loss": 0.0204,
      "step": 851840
    },
    {
      "epoch": 1.394087573561661,
      "grad_norm": 1.0364863872528076,
      "learning_rate": 7.259345163182068e-06,
      "loss": 0.0156,
      "step": 851860
    },
    {
      "epoch": 1.3941203040003143,
      "grad_norm": 0.6997947096824646,
      "learning_rate": 7.25927927096855e-06,
      "loss": 0.0231,
      "step": 851880
    },
    {
      "epoch": 1.3941530344389674,
      "grad_norm": 1.49820876121521,
      "learning_rate": 7.259213378755033e-06,
      "loss": 0.031,
      "step": 851900
    },
    {
      "epoch": 1.394185764877621,
      "grad_norm": 0.2917836010456085,
      "learning_rate": 7.259147486541516e-06,
      "loss": 0.0198,
      "step": 851920
    },
    {
      "epoch": 1.3942184953162742,
      "grad_norm": 0.46819064021110535,
      "learning_rate": 7.259081594327999e-06,
      "loss": 0.0198,
      "step": 851940
    },
    {
      "epoch": 1.3942512257549275,
      "grad_norm": 0.5814849734306335,
      "learning_rate": 7.2590157021144815e-06,
      "loss": 0.0203,
      "step": 851960
    },
    {
      "epoch": 1.394283956193581,
      "grad_norm": 1.1476991176605225,
      "learning_rate": 7.258949809900965e-06,
      "loss": 0.0268,
      "step": 851980
    },
    {
      "epoch": 1.3943166866322343,
      "grad_norm": 0.37592777609825134,
      "learning_rate": 7.258883917687447e-06,
      "loss": 0.0218,
      "step": 852000
    },
    {
      "epoch": 1.3943494170708877,
      "grad_norm": 1.6313639879226685,
      "learning_rate": 7.2588180254739305e-06,
      "loss": 0.0224,
      "step": 852020
    },
    {
      "epoch": 1.3943821475095408,
      "grad_norm": 1.9644540548324585,
      "learning_rate": 7.258752133260412e-06,
      "loss": 0.0236,
      "step": 852040
    },
    {
      "epoch": 1.3944148779481942,
      "grad_norm": 1.154269814491272,
      "learning_rate": 7.258686241046896e-06,
      "loss": 0.0243,
      "step": 852060
    },
    {
      "epoch": 1.3944476083868476,
      "grad_norm": 0.44091150164604187,
      "learning_rate": 7.258620348833379e-06,
      "loss": 0.036,
      "step": 852080
    },
    {
      "epoch": 1.394480338825501,
      "grad_norm": 0.4560113847255707,
      "learning_rate": 7.2585544566198615e-06,
      "loss": 0.0338,
      "step": 852100
    },
    {
      "epoch": 1.3945130692641543,
      "grad_norm": 0.2502845823764801,
      "learning_rate": 7.258488564406345e-06,
      "loss": 0.0259,
      "step": 852120
    },
    {
      "epoch": 1.3945457997028077,
      "grad_norm": 0.15711940824985504,
      "learning_rate": 7.258422672192828e-06,
      "loss": 0.0208,
      "step": 852140
    },
    {
      "epoch": 1.394578530141461,
      "grad_norm": 0.5430904626846313,
      "learning_rate": 7.2583567799793105e-06,
      "loss": 0.0176,
      "step": 852160
    },
    {
      "epoch": 1.3946112605801142,
      "grad_norm": 0.3043789267539978,
      "learning_rate": 7.258290887765793e-06,
      "loss": 0.0159,
      "step": 852180
    },
    {
      "epoch": 1.3946439910187676,
      "grad_norm": 0.5584647059440613,
      "learning_rate": 7.258224995552277e-06,
      "loss": 0.0227,
      "step": 852200
    },
    {
      "epoch": 1.394676721457421,
      "grad_norm": 0.5371676683425903,
      "learning_rate": 7.258159103338759e-06,
      "loss": 0.0183,
      "step": 852220
    },
    {
      "epoch": 1.3947094518960743,
      "grad_norm": 1.255580186843872,
      "learning_rate": 7.258093211125242e-06,
      "loss": 0.0224,
      "step": 852240
    },
    {
      "epoch": 1.3947421823347277,
      "grad_norm": 1.0057090520858765,
      "learning_rate": 7.258027318911724e-06,
      "loss": 0.0156,
      "step": 852260
    },
    {
      "epoch": 1.394774912773381,
      "grad_norm": 0.6641644239425659,
      "learning_rate": 7.257961426698208e-06,
      "loss": 0.0274,
      "step": 852280
    },
    {
      "epoch": 1.3948076432120344,
      "grad_norm": 1.4417651891708374,
      "learning_rate": 7.2578955344846906e-06,
      "loss": 0.0196,
      "step": 852300
    },
    {
      "epoch": 1.3948403736506876,
      "grad_norm": 0.580733060836792,
      "learning_rate": 7.257829642271173e-06,
      "loss": 0.0165,
      "step": 852320
    },
    {
      "epoch": 1.394873104089341,
      "grad_norm": 0.4473142921924591,
      "learning_rate": 7.257763750057656e-06,
      "loss": 0.0223,
      "step": 852340
    },
    {
      "epoch": 1.3949058345279943,
      "grad_norm": 3.1001813411712646,
      "learning_rate": 7.25769785784414e-06,
      "loss": 0.0296,
      "step": 852360
    },
    {
      "epoch": 1.3949385649666477,
      "grad_norm": 0.5814756155014038,
      "learning_rate": 7.2576319656306215e-06,
      "loss": 0.0225,
      "step": 852380
    },
    {
      "epoch": 1.394971295405301,
      "grad_norm": 1.4159791469573975,
      "learning_rate": 7.257566073417105e-06,
      "loss": 0.0199,
      "step": 852400
    },
    {
      "epoch": 1.3950040258439544,
      "grad_norm": 0.9464435577392578,
      "learning_rate": 7.257500181203587e-06,
      "loss": 0.0263,
      "step": 852420
    },
    {
      "epoch": 1.3950367562826078,
      "grad_norm": 0.5590356588363647,
      "learning_rate": 7.257434288990071e-06,
      "loss": 0.0251,
      "step": 852440
    },
    {
      "epoch": 1.395069486721261,
      "grad_norm": 0.412983775138855,
      "learning_rate": 7.257368396776554e-06,
      "loss": 0.0163,
      "step": 852460
    },
    {
      "epoch": 1.3951022171599143,
      "grad_norm": 1.222338318824768,
      "learning_rate": 7.257302504563036e-06,
      "loss": 0.0238,
      "step": 852480
    },
    {
      "epoch": 1.3951349475985677,
      "grad_norm": 0.47576892375946045,
      "learning_rate": 7.25723661234952e-06,
      "loss": 0.0244,
      "step": 852500
    },
    {
      "epoch": 1.395167678037221,
      "grad_norm": 0.2951529622077942,
      "learning_rate": 7.257170720136002e-06,
      "loss": 0.026,
      "step": 852520
    },
    {
      "epoch": 1.3952004084758745,
      "grad_norm": 0.09043566882610321,
      "learning_rate": 7.257104827922485e-06,
      "loss": 0.0133,
      "step": 852540
    },
    {
      "epoch": 1.3952331389145276,
      "grad_norm": 2.1218438148498535,
      "learning_rate": 7.257038935708968e-06,
      "loss": 0.0178,
      "step": 852560
    },
    {
      "epoch": 1.3952658693531812,
      "grad_norm": 1.1684558391571045,
      "learning_rate": 7.2569730434954515e-06,
      "loss": 0.02,
      "step": 852580
    },
    {
      "epoch": 1.3952985997918343,
      "grad_norm": 0.8772845268249512,
      "learning_rate": 7.256907151281933e-06,
      "loss": 0.0249,
      "step": 852600
    },
    {
      "epoch": 1.3953313302304877,
      "grad_norm": 0.3598054349422455,
      "learning_rate": 7.256841259068417e-06,
      "loss": 0.0195,
      "step": 852620
    },
    {
      "epoch": 1.395364060669141,
      "grad_norm": 1.1276271343231201,
      "learning_rate": 7.256775366854899e-06,
      "loss": 0.0315,
      "step": 852640
    },
    {
      "epoch": 1.3953967911077945,
      "grad_norm": 2.125258445739746,
      "learning_rate": 7.2567094746413824e-06,
      "loss": 0.0215,
      "step": 852660
    },
    {
      "epoch": 1.3954295215464478,
      "grad_norm": 1.1968828439712524,
      "learning_rate": 7.256643582427864e-06,
      "loss": 0.0259,
      "step": 852680
    },
    {
      "epoch": 1.395462251985101,
      "grad_norm": 1.0089893341064453,
      "learning_rate": 7.256577690214348e-06,
      "loss": 0.0207,
      "step": 852700
    },
    {
      "epoch": 1.3954949824237546,
      "grad_norm": 0.7250300049781799,
      "learning_rate": 7.256511798000831e-06,
      "loss": 0.0225,
      "step": 852720
    },
    {
      "epoch": 1.3955277128624077,
      "grad_norm": 1.198217511177063,
      "learning_rate": 7.256445905787313e-06,
      "loss": 0.0228,
      "step": 852740
    },
    {
      "epoch": 1.395560443301061,
      "grad_norm": 1.2950369119644165,
      "learning_rate": 7.256380013573796e-06,
      "loss": 0.0221,
      "step": 852760
    },
    {
      "epoch": 1.3955931737397145,
      "grad_norm": 0.7472084164619446,
      "learning_rate": 7.25631412136028e-06,
      "loss": 0.0243,
      "step": 852780
    },
    {
      "epoch": 1.3956259041783678,
      "grad_norm": 0.7758002281188965,
      "learning_rate": 7.2562482291467625e-06,
      "loss": 0.018,
      "step": 852800
    },
    {
      "epoch": 1.3956586346170212,
      "grad_norm": 1.3503509759902954,
      "learning_rate": 7.256182336933245e-06,
      "loss": 0.022,
      "step": 852820
    },
    {
      "epoch": 1.3956913650556744,
      "grad_norm": 0.31077516078948975,
      "learning_rate": 7.256116444719729e-06,
      "loss": 0.0232,
      "step": 852840
    },
    {
      "epoch": 1.3957240954943277,
      "grad_norm": 0.664359986782074,
      "learning_rate": 7.256050552506211e-06,
      "loss": 0.0172,
      "step": 852860
    },
    {
      "epoch": 1.3957568259329811,
      "grad_norm": 0.2836782932281494,
      "learning_rate": 7.255984660292694e-06,
      "loss": 0.0257,
      "step": 852880
    },
    {
      "epoch": 1.3957895563716345,
      "grad_norm": 0.14887438714504242,
      "learning_rate": 7.255918768079176e-06,
      "loss": 0.0162,
      "step": 852900
    },
    {
      "epoch": 1.3958222868102879,
      "grad_norm": 0.2210681289434433,
      "learning_rate": 7.25585287586566e-06,
      "loss": 0.0167,
      "step": 852920
    },
    {
      "epoch": 1.3958550172489412,
      "grad_norm": 0.6615226864814758,
      "learning_rate": 7.2557869836521425e-06,
      "loss": 0.0297,
      "step": 852940
    },
    {
      "epoch": 1.3958877476875946,
      "grad_norm": 0.621724009513855,
      "learning_rate": 7.255721091438625e-06,
      "loss": 0.0353,
      "step": 852960
    },
    {
      "epoch": 1.3959204781262478,
      "grad_norm": 0.6965822577476501,
      "learning_rate": 7.255655199225108e-06,
      "loss": 0.0195,
      "step": 852980
    },
    {
      "epoch": 1.3959532085649011,
      "grad_norm": 1.021394968032837,
      "learning_rate": 7.2555893070115915e-06,
      "loss": 0.0189,
      "step": 853000
    },
    {
      "epoch": 1.3959859390035545,
      "grad_norm": 0.41269442439079285,
      "learning_rate": 7.2555234147980734e-06,
      "loss": 0.0175,
      "step": 853020
    },
    {
      "epoch": 1.3960186694422079,
      "grad_norm": 0.17633050680160522,
      "learning_rate": 7.255457522584557e-06,
      "loss": 0.022,
      "step": 853040
    },
    {
      "epoch": 1.3960513998808612,
      "grad_norm": 0.54195237159729,
      "learning_rate": 7.255391630371039e-06,
      "loss": 0.0209,
      "step": 853060
    },
    {
      "epoch": 1.3960841303195146,
      "grad_norm": 0.31087031960487366,
      "learning_rate": 7.2553257381575225e-06,
      "loss": 0.0215,
      "step": 853080
    },
    {
      "epoch": 1.396116860758168,
      "grad_norm": 0.5062505006790161,
      "learning_rate": 7.255259845944005e-06,
      "loss": 0.0282,
      "step": 853100
    },
    {
      "epoch": 1.3961495911968211,
      "grad_norm": 0.35299426317214966,
      "learning_rate": 7.255193953730488e-06,
      "loss": 0.0281,
      "step": 853120
    },
    {
      "epoch": 1.3961823216354745,
      "grad_norm": 1.1020045280456543,
      "learning_rate": 7.2551280615169716e-06,
      "loss": 0.0146,
      "step": 853140
    },
    {
      "epoch": 1.3962150520741279,
      "grad_norm": 0.5762994289398193,
      "learning_rate": 7.255062169303454e-06,
      "loss": 0.0292,
      "step": 853160
    },
    {
      "epoch": 1.3962477825127813,
      "grad_norm": 1.0148541927337646,
      "learning_rate": 7.254996277089937e-06,
      "loss": 0.0233,
      "step": 853180
    },
    {
      "epoch": 1.3962805129514346,
      "grad_norm": 0.44329383969306946,
      "learning_rate": 7.25493038487642e-06,
      "loss": 0.0245,
      "step": 853200
    },
    {
      "epoch": 1.3963132433900878,
      "grad_norm": 0.3608624339103699,
      "learning_rate": 7.254864492662903e-06,
      "loss": 0.0237,
      "step": 853220
    },
    {
      "epoch": 1.3963459738287414,
      "grad_norm": 0.2129211276769638,
      "learning_rate": 7.254798600449385e-06,
      "loss": 0.0288,
      "step": 853240
    },
    {
      "epoch": 1.3963787042673945,
      "grad_norm": 1.0634067058563232,
      "learning_rate": 7.254732708235869e-06,
      "loss": 0.0284,
      "step": 853260
    },
    {
      "epoch": 1.396411434706048,
      "grad_norm": 0.49897751212120056,
      "learning_rate": 7.254666816022351e-06,
      "loss": 0.0159,
      "step": 853280
    },
    {
      "epoch": 1.3964441651447013,
      "grad_norm": 1.0751930475234985,
      "learning_rate": 7.254600923808834e-06,
      "loss": 0.0172,
      "step": 853300
    },
    {
      "epoch": 1.3964768955833546,
      "grad_norm": 0.8482893705368042,
      "learning_rate": 7.254535031595317e-06,
      "loss": 0.0184,
      "step": 853320
    },
    {
      "epoch": 1.396509626022008,
      "grad_norm": 1.346551537513733,
      "learning_rate": 7.2544691393818e-06,
      "loss": 0.0192,
      "step": 853340
    },
    {
      "epoch": 1.3965423564606612,
      "grad_norm": 0.9650552272796631,
      "learning_rate": 7.2544032471682826e-06,
      "loss": 0.0228,
      "step": 853360
    },
    {
      "epoch": 1.3965750868993148,
      "grad_norm": 0.751621663570404,
      "learning_rate": 7.254337354954766e-06,
      "loss": 0.0171,
      "step": 853380
    },
    {
      "epoch": 1.396607817337968,
      "grad_norm": 0.9901782274246216,
      "learning_rate": 7.254271462741248e-06,
      "loss": 0.0144,
      "step": 853400
    },
    {
      "epoch": 1.3966405477766213,
      "grad_norm": 0.5126621723175049,
      "learning_rate": 7.254205570527732e-06,
      "loss": 0.0161,
      "step": 853420
    },
    {
      "epoch": 1.3966732782152746,
      "grad_norm": 1.2691371440887451,
      "learning_rate": 7.2541396783142135e-06,
      "loss": 0.0173,
      "step": 853440
    },
    {
      "epoch": 1.396706008653928,
      "grad_norm": 0.6043879985809326,
      "learning_rate": 7.254073786100697e-06,
      "loss": 0.0292,
      "step": 853460
    },
    {
      "epoch": 1.3967387390925814,
      "grad_norm": 0.32465630769729614,
      "learning_rate": 7.254007893887179e-06,
      "loss": 0.0133,
      "step": 853480
    },
    {
      "epoch": 1.3967714695312345,
      "grad_norm": 0.605384111404419,
      "learning_rate": 7.253942001673663e-06,
      "loss": 0.0205,
      "step": 853500
    },
    {
      "epoch": 1.396804199969888,
      "grad_norm": 0.2877756357192993,
      "learning_rate": 7.253876109460146e-06,
      "loss": 0.018,
      "step": 853520
    },
    {
      "epoch": 1.3968369304085413,
      "grad_norm": 1.542527437210083,
      "learning_rate": 7.253810217246629e-06,
      "loss": 0.025,
      "step": 853540
    },
    {
      "epoch": 1.3968696608471947,
      "grad_norm": 0.22138850390911102,
      "learning_rate": 7.253744325033112e-06,
      "loss": 0.0195,
      "step": 853560
    },
    {
      "epoch": 1.396902391285848,
      "grad_norm": 2.139029026031494,
      "learning_rate": 7.253678432819594e-06,
      "loss": 0.025,
      "step": 853580
    },
    {
      "epoch": 1.3969351217245014,
      "grad_norm": 0.5455517172813416,
      "learning_rate": 7.253612540606078e-06,
      "loss": 0.0227,
      "step": 853600
    },
    {
      "epoch": 1.3969678521631548,
      "grad_norm": 0.3479613959789276,
      "learning_rate": 7.25354664839256e-06,
      "loss": 0.0152,
      "step": 853620
    },
    {
      "epoch": 1.397000582601808,
      "grad_norm": 2.5476038455963135,
      "learning_rate": 7.2534807561790434e-06,
      "loss": 0.0232,
      "step": 853640
    },
    {
      "epoch": 1.3970333130404613,
      "grad_norm": 0.6107354760169983,
      "learning_rate": 7.253414863965525e-06,
      "loss": 0.0204,
      "step": 853660
    },
    {
      "epoch": 1.3970660434791147,
      "grad_norm": 0.5670122504234314,
      "learning_rate": 7.253348971752009e-06,
      "loss": 0.0234,
      "step": 853680
    },
    {
      "epoch": 1.397098773917768,
      "grad_norm": 0.6351249814033508,
      "learning_rate": 7.253283079538491e-06,
      "loss": 0.0282,
      "step": 853700
    },
    {
      "epoch": 1.3971315043564214,
      "grad_norm": 2.3149023056030273,
      "learning_rate": 7.253217187324974e-06,
      "loss": 0.0161,
      "step": 853720
    },
    {
      "epoch": 1.3971642347950748,
      "grad_norm": 0.8463335633277893,
      "learning_rate": 7.253151295111457e-06,
      "loss": 0.0184,
      "step": 853740
    },
    {
      "epoch": 1.3971969652337282,
      "grad_norm": 0.999168872833252,
      "learning_rate": 7.25308540289794e-06,
      "loss": 0.0139,
      "step": 853760
    },
    {
      "epoch": 1.3972296956723813,
      "grad_norm": 0.5195096135139465,
      "learning_rate": 7.253019510684423e-06,
      "loss": 0.0156,
      "step": 853780
    },
    {
      "epoch": 1.3972624261110347,
      "grad_norm": 0.4739815294742584,
      "learning_rate": 7.252953618470906e-06,
      "loss": 0.0193,
      "step": 853800
    },
    {
      "epoch": 1.397295156549688,
      "grad_norm": 0.5183972120285034,
      "learning_rate": 7.252887726257388e-06,
      "loss": 0.0178,
      "step": 853820
    },
    {
      "epoch": 1.3973278869883414,
      "grad_norm": 1.1308358907699585,
      "learning_rate": 7.252821834043872e-06,
      "loss": 0.0206,
      "step": 853840
    },
    {
      "epoch": 1.3973606174269948,
      "grad_norm": 0.4948074519634247,
      "learning_rate": 7.252755941830355e-06,
      "loss": 0.0217,
      "step": 853860
    },
    {
      "epoch": 1.3973933478656482,
      "grad_norm": 0.7132406830787659,
      "learning_rate": 7.252690049616837e-06,
      "loss": 0.0164,
      "step": 853880
    },
    {
      "epoch": 1.3974260783043015,
      "grad_norm": 0.40355348587036133,
      "learning_rate": 7.252624157403321e-06,
      "loss": 0.016,
      "step": 853900
    },
    {
      "epoch": 1.3974588087429547,
      "grad_norm": 0.3100719153881073,
      "learning_rate": 7.252558265189803e-06,
      "loss": 0.032,
      "step": 853920
    },
    {
      "epoch": 1.397491539181608,
      "grad_norm": 0.1994660347700119,
      "learning_rate": 7.252492372976286e-06,
      "loss": 0.0196,
      "step": 853940
    },
    {
      "epoch": 1.3975242696202614,
      "grad_norm": 0.2905135154724121,
      "learning_rate": 7.252426480762769e-06,
      "loss": 0.0235,
      "step": 853960
    },
    {
      "epoch": 1.3975570000589148,
      "grad_norm": 0.2957988977432251,
      "learning_rate": 7.252360588549252e-06,
      "loss": 0.0206,
      "step": 853980
    },
    {
      "epoch": 1.3975897304975682,
      "grad_norm": 0.6394344568252563,
      "learning_rate": 7.2522946963357345e-06,
      "loss": 0.0161,
      "step": 854000
    },
    {
      "epoch": 1.3976224609362213,
      "grad_norm": 1.6348744630813599,
      "learning_rate": 7.252228804122218e-06,
      "loss": 0.0191,
      "step": 854020
    },
    {
      "epoch": 1.397655191374875,
      "grad_norm": 0.2813883125782013,
      "learning_rate": 7.2521629119087e-06,
      "loss": 0.0249,
      "step": 854040
    },
    {
      "epoch": 1.397687921813528,
      "grad_norm": 0.3231622278690338,
      "learning_rate": 7.2520970196951835e-06,
      "loss": 0.0158,
      "step": 854060
    },
    {
      "epoch": 1.3977206522521815,
      "grad_norm": 2.032313346862793,
      "learning_rate": 7.252031127481665e-06,
      "loss": 0.023,
      "step": 854080
    },
    {
      "epoch": 1.3977533826908348,
      "grad_norm": 0.841350793838501,
      "learning_rate": 7.251965235268149e-06,
      "loss": 0.0207,
      "step": 854100
    },
    {
      "epoch": 1.3977861131294882,
      "grad_norm": 0.37310874462127686,
      "learning_rate": 7.251899343054632e-06,
      "loss": 0.0169,
      "step": 854120
    },
    {
      "epoch": 1.3978188435681416,
      "grad_norm": 0.6139448881149292,
      "learning_rate": 7.2518334508411145e-06,
      "loss": 0.0212,
      "step": 854140
    },
    {
      "epoch": 1.3978515740067947,
      "grad_norm": 0.5045453906059265,
      "learning_rate": 7.251767558627597e-06,
      "loss": 0.0234,
      "step": 854160
    },
    {
      "epoch": 1.3978843044454483,
      "grad_norm": 0.2840147316455841,
      "learning_rate": 7.251701666414081e-06,
      "loss": 0.0196,
      "step": 854180
    },
    {
      "epoch": 1.3979170348841015,
      "grad_norm": 0.5873651504516602,
      "learning_rate": 7.2516357742005636e-06,
      "loss": 0.0252,
      "step": 854200
    },
    {
      "epoch": 1.3979497653227548,
      "grad_norm": 0.6500826478004456,
      "learning_rate": 7.251569881987046e-06,
      "loss": 0.0205,
      "step": 854220
    },
    {
      "epoch": 1.3979824957614082,
      "grad_norm": 1.564855933189392,
      "learning_rate": 7.25150398977353e-06,
      "loss": 0.0262,
      "step": 854240
    },
    {
      "epoch": 1.3980152262000616,
      "grad_norm": 0.2308197170495987,
      "learning_rate": 7.251438097560012e-06,
      "loss": 0.0207,
      "step": 854260
    },
    {
      "epoch": 1.398047956638715,
      "grad_norm": 0.7295961976051331,
      "learning_rate": 7.251372205346495e-06,
      "loss": 0.0195,
      "step": 854280
    },
    {
      "epoch": 1.398080687077368,
      "grad_norm": 0.2197776585817337,
      "learning_rate": 7.251306313132977e-06,
      "loss": 0.0156,
      "step": 854300
    },
    {
      "epoch": 1.3981134175160215,
      "grad_norm": 0.3276926279067993,
      "learning_rate": 7.251240420919461e-06,
      "loss": 0.0197,
      "step": 854320
    },
    {
      "epoch": 1.3981461479546748,
      "grad_norm": 0.9040247201919556,
      "learning_rate": 7.2511745287059436e-06,
      "loss": 0.0263,
      "step": 854340
    },
    {
      "epoch": 1.3981788783933282,
      "grad_norm": 0.6236759424209595,
      "learning_rate": 7.251108636492426e-06,
      "loss": 0.0228,
      "step": 854360
    },
    {
      "epoch": 1.3982116088319816,
      "grad_norm": 0.9007149934768677,
      "learning_rate": 7.251042744278909e-06,
      "loss": 0.0203,
      "step": 854380
    },
    {
      "epoch": 1.398244339270635,
      "grad_norm": 0.09241330623626709,
      "learning_rate": 7.250976852065393e-06,
      "loss": 0.0218,
      "step": 854400
    },
    {
      "epoch": 1.3982770697092883,
      "grad_norm": 0.9223711490631104,
      "learning_rate": 7.2509109598518745e-06,
      "loss": 0.0233,
      "step": 854420
    },
    {
      "epoch": 1.3983098001479415,
      "grad_norm": 2.3934879302978516,
      "learning_rate": 7.250845067638358e-06,
      "loss": 0.0265,
      "step": 854440
    },
    {
      "epoch": 1.3983425305865949,
      "grad_norm": 0.5199573636054993,
      "learning_rate": 7.25077917542484e-06,
      "loss": 0.0178,
      "step": 854460
    },
    {
      "epoch": 1.3983752610252482,
      "grad_norm": 0.36875516176223755,
      "learning_rate": 7.250713283211324e-06,
      "loss": 0.0212,
      "step": 854480
    },
    {
      "epoch": 1.3984079914639016,
      "grad_norm": 0.813840925693512,
      "learning_rate": 7.2506473909978055e-06,
      "loss": 0.0194,
      "step": 854500
    },
    {
      "epoch": 1.398440721902555,
      "grad_norm": 1.1806654930114746,
      "learning_rate": 7.250581498784289e-06,
      "loss": 0.0284,
      "step": 854520
    },
    {
      "epoch": 1.3984734523412083,
      "grad_norm": 0.9619939923286438,
      "learning_rate": 7.250515606570772e-06,
      "loss": 0.0205,
      "step": 854540
    },
    {
      "epoch": 1.3985061827798617,
      "grad_norm": 0.7611303329467773,
      "learning_rate": 7.2504497143572546e-06,
      "loss": 0.0238,
      "step": 854560
    },
    {
      "epoch": 1.3985389132185149,
      "grad_norm": 0.7889856100082397,
      "learning_rate": 7.250383822143738e-06,
      "loss": 0.0229,
      "step": 854580
    },
    {
      "epoch": 1.3985716436571682,
      "grad_norm": 0.45521894097328186,
      "learning_rate": 7.250317929930221e-06,
      "loss": 0.023,
      "step": 854600
    },
    {
      "epoch": 1.3986043740958216,
      "grad_norm": 0.24737755954265594,
      "learning_rate": 7.2502520377167045e-06,
      "loss": 0.0168,
      "step": 854620
    },
    {
      "epoch": 1.398637104534475,
      "grad_norm": 0.732104480266571,
      "learning_rate": 7.250186145503186e-06,
      "loss": 0.0337,
      "step": 854640
    },
    {
      "epoch": 1.3986698349731284,
      "grad_norm": 0.7824176549911499,
      "learning_rate": 7.25012025328967e-06,
      "loss": 0.02,
      "step": 854660
    },
    {
      "epoch": 1.3987025654117817,
      "grad_norm": 1.0100229978561401,
      "learning_rate": 7.250054361076152e-06,
      "loss": 0.0242,
      "step": 854680
    },
    {
      "epoch": 1.398735295850435,
      "grad_norm": 0.25838831067085266,
      "learning_rate": 7.2499884688626354e-06,
      "loss": 0.0216,
      "step": 854700
    },
    {
      "epoch": 1.3987680262890883,
      "grad_norm": 1.568617582321167,
      "learning_rate": 7.249922576649117e-06,
      "loss": 0.0199,
      "step": 854720
    },
    {
      "epoch": 1.3988007567277416,
      "grad_norm": 0.6717631816864014,
      "learning_rate": 7.249856684435601e-06,
      "loss": 0.0207,
      "step": 854740
    },
    {
      "epoch": 1.398833487166395,
      "grad_norm": 0.3356563448905945,
      "learning_rate": 7.249790792222084e-06,
      "loss": 0.0278,
      "step": 854760
    },
    {
      "epoch": 1.3988662176050484,
      "grad_norm": 0.7383309602737427,
      "learning_rate": 7.249724900008566e-06,
      "loss": 0.0276,
      "step": 854780
    },
    {
      "epoch": 1.3988989480437017,
      "grad_norm": 1.0595952272415161,
      "learning_rate": 7.249659007795049e-06,
      "loss": 0.0155,
      "step": 854800
    },
    {
      "epoch": 1.398931678482355,
      "grad_norm": 0.675993800163269,
      "learning_rate": 7.249593115581533e-06,
      "loss": 0.0154,
      "step": 854820
    },
    {
      "epoch": 1.3989644089210085,
      "grad_norm": 0.24282298982143402,
      "learning_rate": 7.249527223368015e-06,
      "loss": 0.0194,
      "step": 854840
    },
    {
      "epoch": 1.3989971393596616,
      "grad_norm": 1.2050412893295288,
      "learning_rate": 7.249461331154498e-06,
      "loss": 0.0255,
      "step": 854860
    },
    {
      "epoch": 1.399029869798315,
      "grad_norm": 0.4472576081752777,
      "learning_rate": 7.24939543894098e-06,
      "loss": 0.0226,
      "step": 854880
    },
    {
      "epoch": 1.3990626002369684,
      "grad_norm": 0.29220908880233765,
      "learning_rate": 7.249329546727464e-06,
      "loss": 0.019,
      "step": 854900
    },
    {
      "epoch": 1.3990953306756218,
      "grad_norm": 0.9174448847770691,
      "learning_rate": 7.249263654513947e-06,
      "loss": 0.0243,
      "step": 854920
    },
    {
      "epoch": 1.3991280611142751,
      "grad_norm": 0.5630460977554321,
      "learning_rate": 7.249197762300429e-06,
      "loss": 0.0176,
      "step": 854940
    },
    {
      "epoch": 1.3991607915529283,
      "grad_norm": 0.4703812003135681,
      "learning_rate": 7.249131870086913e-06,
      "loss": 0.0208,
      "step": 854960
    },
    {
      "epoch": 1.3991935219915819,
      "grad_norm": 0.08755827695131302,
      "learning_rate": 7.2490659778733955e-06,
      "loss": 0.0183,
      "step": 854980
    },
    {
      "epoch": 1.399226252430235,
      "grad_norm": 0.2703789174556732,
      "learning_rate": 7.249000085659878e-06,
      "loss": 0.0171,
      "step": 855000
    },
    {
      "epoch": 1.3992589828688884,
      "grad_norm": 0.8852057456970215,
      "learning_rate": 7.248934193446361e-06,
      "loss": 0.0258,
      "step": 855020
    },
    {
      "epoch": 1.3992917133075418,
      "grad_norm": 0.7815855741500854,
      "learning_rate": 7.2488683012328445e-06,
      "loss": 0.0198,
      "step": 855040
    },
    {
      "epoch": 1.3993244437461951,
      "grad_norm": 0.5747485756874084,
      "learning_rate": 7.2488024090193264e-06,
      "loss": 0.026,
      "step": 855060
    },
    {
      "epoch": 1.3993571741848485,
      "grad_norm": 0.4831050634384155,
      "learning_rate": 7.24873651680581e-06,
      "loss": 0.0252,
      "step": 855080
    },
    {
      "epoch": 1.3993899046235017,
      "grad_norm": 0.48864343762397766,
      "learning_rate": 7.248670624592292e-06,
      "loss": 0.0249,
      "step": 855100
    },
    {
      "epoch": 1.399422635062155,
      "grad_norm": 0.7685797214508057,
      "learning_rate": 7.2486047323787755e-06,
      "loss": 0.0182,
      "step": 855120
    },
    {
      "epoch": 1.3994553655008084,
      "grad_norm": 1.9448175430297852,
      "learning_rate": 7.248538840165258e-06,
      "loss": 0.0193,
      "step": 855140
    },
    {
      "epoch": 1.3994880959394618,
      "grad_norm": 0.5663480162620544,
      "learning_rate": 7.248472947951741e-06,
      "loss": 0.0214,
      "step": 855160
    },
    {
      "epoch": 1.3995208263781151,
      "grad_norm": 0.18227174878120422,
      "learning_rate": 7.248407055738224e-06,
      "loss": 0.0143,
      "step": 855180
    },
    {
      "epoch": 1.3995535568167685,
      "grad_norm": 0.5603392720222473,
      "learning_rate": 7.248341163524707e-06,
      "loss": 0.0244,
      "step": 855200
    },
    {
      "epoch": 1.399586287255422,
      "grad_norm": 0.09884040802717209,
      "learning_rate": 7.248275271311189e-06,
      "loss": 0.0208,
      "step": 855220
    },
    {
      "epoch": 1.399619017694075,
      "grad_norm": 0.5941666960716248,
      "learning_rate": 7.248209379097673e-06,
      "loss": 0.0149,
      "step": 855240
    },
    {
      "epoch": 1.3996517481327284,
      "grad_norm": 1.0610315799713135,
      "learning_rate": 7.248143486884156e-06,
      "loss": 0.0235,
      "step": 855260
    },
    {
      "epoch": 1.3996844785713818,
      "grad_norm": 1.2583492994308472,
      "learning_rate": 7.248077594670638e-06,
      "loss": 0.0164,
      "step": 855280
    },
    {
      "epoch": 1.3997172090100352,
      "grad_norm": 0.6862311363220215,
      "learning_rate": 7.248011702457122e-06,
      "loss": 0.0222,
      "step": 855300
    },
    {
      "epoch": 1.3997499394486885,
      "grad_norm": 0.16746345162391663,
      "learning_rate": 7.247945810243604e-06,
      "loss": 0.0313,
      "step": 855320
    },
    {
      "epoch": 1.399782669887342,
      "grad_norm": 0.39803653955459595,
      "learning_rate": 7.247879918030087e-06,
      "loss": 0.0135,
      "step": 855340
    },
    {
      "epoch": 1.3998154003259953,
      "grad_norm": 0.31866350769996643,
      "learning_rate": 7.24781402581657e-06,
      "loss": 0.0187,
      "step": 855360
    },
    {
      "epoch": 1.3998481307646484,
      "grad_norm": 0.2635609209537506,
      "learning_rate": 7.247748133603053e-06,
      "loss": 0.0183,
      "step": 855380
    },
    {
      "epoch": 1.3998808612033018,
      "grad_norm": 0.31375426054000854,
      "learning_rate": 7.2476822413895356e-06,
      "loss": 0.0181,
      "step": 855400
    },
    {
      "epoch": 1.3999135916419552,
      "grad_norm": 0.1855885237455368,
      "learning_rate": 7.247616349176019e-06,
      "loss": 0.0265,
      "step": 855420
    },
    {
      "epoch": 1.3999463220806085,
      "grad_norm": 0.22624623775482178,
      "learning_rate": 7.247550456962501e-06,
      "loss": 0.0218,
      "step": 855440
    },
    {
      "epoch": 1.399979052519262,
      "grad_norm": 1.6137797832489014,
      "learning_rate": 7.247484564748985e-06,
      "loss": 0.023,
      "step": 855460
    },
    {
      "epoch": 1.4000117829579153,
      "grad_norm": 0.5612896680831909,
      "learning_rate": 7.2474186725354665e-06,
      "loss": 0.0236,
      "step": 855480
    },
    {
      "epoch": 1.4000445133965687,
      "grad_norm": 0.42618948221206665,
      "learning_rate": 7.24735278032195e-06,
      "loss": 0.0294,
      "step": 855500
    },
    {
      "epoch": 1.4000772438352218,
      "grad_norm": 0.628459095954895,
      "learning_rate": 7.247286888108432e-06,
      "loss": 0.0202,
      "step": 855520
    },
    {
      "epoch": 1.4001099742738752,
      "grad_norm": 0.9381750822067261,
      "learning_rate": 7.247220995894916e-06,
      "loss": 0.0203,
      "step": 855540
    },
    {
      "epoch": 1.4001427047125286,
      "grad_norm": 1.0548772811889648,
      "learning_rate": 7.247155103681398e-06,
      "loss": 0.0354,
      "step": 855560
    },
    {
      "epoch": 1.400175435151182,
      "grad_norm": 0.33989983797073364,
      "learning_rate": 7.247089211467881e-06,
      "loss": 0.0174,
      "step": 855580
    },
    {
      "epoch": 1.4002081655898353,
      "grad_norm": 0.22937291860580444,
      "learning_rate": 7.247023319254364e-06,
      "loss": 0.0182,
      "step": 855600
    },
    {
      "epoch": 1.4002408960284884,
      "grad_norm": 0.24953536689281464,
      "learning_rate": 7.246957427040847e-06,
      "loss": 0.0221,
      "step": 855620
    },
    {
      "epoch": 1.400273626467142,
      "grad_norm": 0.3198741376399994,
      "learning_rate": 7.24689153482733e-06,
      "loss": 0.0215,
      "step": 855640
    },
    {
      "epoch": 1.4003063569057952,
      "grad_norm": 0.46758538484573364,
      "learning_rate": 7.246825642613813e-06,
      "loss": 0.0172,
      "step": 855660
    },
    {
      "epoch": 1.4003390873444486,
      "grad_norm": 0.5559785962104797,
      "learning_rate": 7.2467597504002965e-06,
      "loss": 0.0178,
      "step": 855680
    },
    {
      "epoch": 1.400371817783102,
      "grad_norm": 0.16114133596420288,
      "learning_rate": 7.246693858186778e-06,
      "loss": 0.0166,
      "step": 855700
    },
    {
      "epoch": 1.4004045482217553,
      "grad_norm": 1.0074055194854736,
      "learning_rate": 7.246627965973262e-06,
      "loss": 0.0216,
      "step": 855720
    },
    {
      "epoch": 1.4004372786604087,
      "grad_norm": 0.4341284930706024,
      "learning_rate": 7.246562073759744e-06,
      "loss": 0.0241,
      "step": 855740
    },
    {
      "epoch": 1.4004700090990618,
      "grad_norm": 0.2633563280105591,
      "learning_rate": 7.246496181546227e-06,
      "loss": 0.0137,
      "step": 855760
    },
    {
      "epoch": 1.4005027395377152,
      "grad_norm": 0.6075973510742188,
      "learning_rate": 7.24643028933271e-06,
      "loss": 0.0195,
      "step": 855780
    },
    {
      "epoch": 1.4005354699763686,
      "grad_norm": 0.6579565405845642,
      "learning_rate": 7.246364397119193e-06,
      "loss": 0.0177,
      "step": 855800
    },
    {
      "epoch": 1.400568200415022,
      "grad_norm": 0.6675252914428711,
      "learning_rate": 7.246298504905676e-06,
      "loss": 0.0207,
      "step": 855820
    },
    {
      "epoch": 1.4006009308536753,
      "grad_norm": 0.17891983687877655,
      "learning_rate": 7.246232612692159e-06,
      "loss": 0.0308,
      "step": 855840
    },
    {
      "epoch": 1.4006336612923287,
      "grad_norm": 0.5001953840255737,
      "learning_rate": 7.246166720478641e-06,
      "loss": 0.0223,
      "step": 855860
    },
    {
      "epoch": 1.400666391730982,
      "grad_norm": 0.4549963176250458,
      "learning_rate": 7.246100828265125e-06,
      "loss": 0.0143,
      "step": 855880
    },
    {
      "epoch": 1.4006991221696352,
      "grad_norm": 0.5902654528617859,
      "learning_rate": 7.246034936051607e-06,
      "loss": 0.0287,
      "step": 855900
    },
    {
      "epoch": 1.4007318526082886,
      "grad_norm": 0.20824696123600006,
      "learning_rate": 7.24596904383809e-06,
      "loss": 0.0148,
      "step": 855920
    },
    {
      "epoch": 1.400764583046942,
      "grad_norm": 0.5048917531967163,
      "learning_rate": 7.245903151624573e-06,
      "loss": 0.0205,
      "step": 855940
    },
    {
      "epoch": 1.4007973134855953,
      "grad_norm": 1.200234055519104,
      "learning_rate": 7.245837259411056e-06,
      "loss": 0.0256,
      "step": 855960
    },
    {
      "epoch": 1.4008300439242487,
      "grad_norm": 0.35873350501060486,
      "learning_rate": 7.245771367197539e-06,
      "loss": 0.0263,
      "step": 855980
    },
    {
      "epoch": 1.400862774362902,
      "grad_norm": 0.4730103611946106,
      "learning_rate": 7.245705474984022e-06,
      "loss": 0.0169,
      "step": 856000
    },
    {
      "epoch": 1.4008955048015554,
      "grad_norm": 0.46686825156211853,
      "learning_rate": 7.245639582770505e-06,
      "loss": 0.0219,
      "step": 856020
    },
    {
      "epoch": 1.4009282352402086,
      "grad_norm": 0.3260972499847412,
      "learning_rate": 7.2455736905569875e-06,
      "loss": 0.0193,
      "step": 856040
    },
    {
      "epoch": 1.400960965678862,
      "grad_norm": 0.767848551273346,
      "learning_rate": 7.245507798343471e-06,
      "loss": 0.0269,
      "step": 856060
    },
    {
      "epoch": 1.4009936961175153,
      "grad_norm": 0.7247520089149475,
      "learning_rate": 7.245441906129953e-06,
      "loss": 0.021,
      "step": 856080
    },
    {
      "epoch": 1.4010264265561687,
      "grad_norm": 0.35362496972084045,
      "learning_rate": 7.2453760139164365e-06,
      "loss": 0.0239,
      "step": 856100
    },
    {
      "epoch": 1.401059156994822,
      "grad_norm": 0.5912448167800903,
      "learning_rate": 7.2453101217029184e-06,
      "loss": 0.0254,
      "step": 856120
    },
    {
      "epoch": 1.4010918874334755,
      "grad_norm": 0.5854883193969727,
      "learning_rate": 7.245244229489402e-06,
      "loss": 0.024,
      "step": 856140
    },
    {
      "epoch": 1.4011246178721288,
      "grad_norm": 1.365666389465332,
      "learning_rate": 7.245178337275885e-06,
      "loss": 0.0282,
      "step": 856160
    },
    {
      "epoch": 1.401157348310782,
      "grad_norm": 0.26789864897727966,
      "learning_rate": 7.2451124450623675e-06,
      "loss": 0.0241,
      "step": 856180
    },
    {
      "epoch": 1.4011900787494354,
      "grad_norm": 1.523152232170105,
      "learning_rate": 7.24504655284885e-06,
      "loss": 0.0343,
      "step": 856200
    },
    {
      "epoch": 1.4012228091880887,
      "grad_norm": 0.40774258971214294,
      "learning_rate": 7.244980660635334e-06,
      "loss": 0.0203,
      "step": 856220
    },
    {
      "epoch": 1.401255539626742,
      "grad_norm": 0.4548947811126709,
      "learning_rate": 7.244914768421816e-06,
      "loss": 0.0211,
      "step": 856240
    },
    {
      "epoch": 1.4012882700653955,
      "grad_norm": 0.2660025358200073,
      "learning_rate": 7.244848876208299e-06,
      "loss": 0.0195,
      "step": 856260
    },
    {
      "epoch": 1.4013210005040486,
      "grad_norm": 0.7206805348396301,
      "learning_rate": 7.244782983994781e-06,
      "loss": 0.0239,
      "step": 856280
    },
    {
      "epoch": 1.4013537309427022,
      "grad_norm": 3.3374269008636475,
      "learning_rate": 7.244717091781265e-06,
      "loss": 0.0232,
      "step": 856300
    },
    {
      "epoch": 1.4013864613813554,
      "grad_norm": 0.5975631475448608,
      "learning_rate": 7.244651199567748e-06,
      "loss": 0.0218,
      "step": 856320
    },
    {
      "epoch": 1.4014191918200087,
      "grad_norm": 0.7604115605354309,
      "learning_rate": 7.24458530735423e-06,
      "loss": 0.0255,
      "step": 856340
    },
    {
      "epoch": 1.401451922258662,
      "grad_norm": 0.5087581872940063,
      "learning_rate": 7.244519415140714e-06,
      "loss": 0.013,
      "step": 856360
    },
    {
      "epoch": 1.4014846526973155,
      "grad_norm": 0.6482999324798584,
      "learning_rate": 7.244453522927197e-06,
      "loss": 0.0195,
      "step": 856380
    },
    {
      "epoch": 1.4015173831359689,
      "grad_norm": 0.26599520444869995,
      "learning_rate": 7.244387630713679e-06,
      "loss": 0.0199,
      "step": 856400
    },
    {
      "epoch": 1.401550113574622,
      "grad_norm": 0.25419947504997253,
      "learning_rate": 7.244321738500162e-06,
      "loss": 0.0162,
      "step": 856420
    },
    {
      "epoch": 1.4015828440132756,
      "grad_norm": 0.9712531566619873,
      "learning_rate": 7.244255846286646e-06,
      "loss": 0.02,
      "step": 856440
    },
    {
      "epoch": 1.4016155744519287,
      "grad_norm": 0.577243447303772,
      "learning_rate": 7.2441899540731275e-06,
      "loss": 0.0186,
      "step": 856460
    },
    {
      "epoch": 1.4016483048905821,
      "grad_norm": 0.4826345145702362,
      "learning_rate": 7.244124061859611e-06,
      "loss": 0.0158,
      "step": 856480
    },
    {
      "epoch": 1.4016810353292355,
      "grad_norm": 0.4773334562778473,
      "learning_rate": 7.244058169646093e-06,
      "loss": 0.0174,
      "step": 856500
    },
    {
      "epoch": 1.4017137657678889,
      "grad_norm": 0.3241916596889496,
      "learning_rate": 7.243992277432577e-06,
      "loss": 0.023,
      "step": 856520
    },
    {
      "epoch": 1.4017464962065422,
      "grad_norm": 1.5628011226654053,
      "learning_rate": 7.2439263852190585e-06,
      "loss": 0.0175,
      "step": 856540
    },
    {
      "epoch": 1.4017792266451954,
      "grad_norm": 0.21969473361968994,
      "learning_rate": 7.243860493005542e-06,
      "loss": 0.0245,
      "step": 856560
    },
    {
      "epoch": 1.4018119570838488,
      "grad_norm": 0.357496440410614,
      "learning_rate": 7.243794600792025e-06,
      "loss": 0.0197,
      "step": 856580
    },
    {
      "epoch": 1.4018446875225021,
      "grad_norm": 0.5961035490036011,
      "learning_rate": 7.2437287085785076e-06,
      "loss": 0.0183,
      "step": 856600
    },
    {
      "epoch": 1.4018774179611555,
      "grad_norm": 0.2416626214981079,
      "learning_rate": 7.24366281636499e-06,
      "loss": 0.0166,
      "step": 856620
    },
    {
      "epoch": 1.4019101483998089,
      "grad_norm": 0.7958479523658752,
      "learning_rate": 7.243596924151474e-06,
      "loss": 0.0132,
      "step": 856640
    },
    {
      "epoch": 1.4019428788384622,
      "grad_norm": 0.6949493885040283,
      "learning_rate": 7.243531031937957e-06,
      "loss": 0.0207,
      "step": 856660
    },
    {
      "epoch": 1.4019756092771156,
      "grad_norm": 1.2418824434280396,
      "learning_rate": 7.243465139724439e-06,
      "loss": 0.0193,
      "step": 856680
    },
    {
      "epoch": 1.4020083397157688,
      "grad_norm": 0.7822131514549255,
      "learning_rate": 7.243399247510923e-06,
      "loss": 0.0272,
      "step": 856700
    },
    {
      "epoch": 1.4020410701544221,
      "grad_norm": 0.34012332558631897,
      "learning_rate": 7.243333355297405e-06,
      "loss": 0.0289,
      "step": 856720
    },
    {
      "epoch": 1.4020738005930755,
      "grad_norm": 1.1856156587600708,
      "learning_rate": 7.2432674630838884e-06,
      "loss": 0.0145,
      "step": 856740
    },
    {
      "epoch": 1.4021065310317289,
      "grad_norm": 0.9877375364303589,
      "learning_rate": 7.24320157087037e-06,
      "loss": 0.0247,
      "step": 856760
    },
    {
      "epoch": 1.4021392614703823,
      "grad_norm": 0.8810876607894897,
      "learning_rate": 7.243135678656854e-06,
      "loss": 0.0197,
      "step": 856780
    },
    {
      "epoch": 1.4021719919090356,
      "grad_norm": 0.16339139640331268,
      "learning_rate": 7.243069786443337e-06,
      "loss": 0.0283,
      "step": 856800
    },
    {
      "epoch": 1.402204722347689,
      "grad_norm": 0.25437405705451965,
      "learning_rate": 7.243003894229819e-06,
      "loss": 0.0245,
      "step": 856820
    },
    {
      "epoch": 1.4022374527863422,
      "grad_norm": 0.7758145928382874,
      "learning_rate": 7.242938002016302e-06,
      "loss": 0.0171,
      "step": 856840
    },
    {
      "epoch": 1.4022701832249955,
      "grad_norm": 0.6912798285484314,
      "learning_rate": 7.242872109802786e-06,
      "loss": 0.0198,
      "step": 856860
    },
    {
      "epoch": 1.402302913663649,
      "grad_norm": 0.9406048655509949,
      "learning_rate": 7.242806217589268e-06,
      "loss": 0.0189,
      "step": 856880
    },
    {
      "epoch": 1.4023356441023023,
      "grad_norm": 0.13654553890228271,
      "learning_rate": 7.242740325375751e-06,
      "loss": 0.0259,
      "step": 856900
    },
    {
      "epoch": 1.4023683745409556,
      "grad_norm": 0.7179501056671143,
      "learning_rate": 7.242674433162233e-06,
      "loss": 0.026,
      "step": 856920
    },
    {
      "epoch": 1.402401104979609,
      "grad_norm": 0.24913223087787628,
      "learning_rate": 7.242608540948717e-06,
      "loss": 0.0296,
      "step": 856940
    },
    {
      "epoch": 1.4024338354182624,
      "grad_norm": 0.3989764451980591,
      "learning_rate": 7.242542648735199e-06,
      "loss": 0.0259,
      "step": 856960
    },
    {
      "epoch": 1.4024665658569155,
      "grad_norm": 1.2207398414611816,
      "learning_rate": 7.242476756521682e-06,
      "loss": 0.0195,
      "step": 856980
    },
    {
      "epoch": 1.402499296295569,
      "grad_norm": 1.1126976013183594,
      "learning_rate": 7.242410864308165e-06,
      "loss": 0.0199,
      "step": 857000
    },
    {
      "epoch": 1.4025320267342223,
      "grad_norm": 0.36408287286758423,
      "learning_rate": 7.2423449720946485e-06,
      "loss": 0.0191,
      "step": 857020
    },
    {
      "epoch": 1.4025647571728757,
      "grad_norm": 0.3280906677246094,
      "learning_rate": 7.242279079881131e-06,
      "loss": 0.0186,
      "step": 857040
    },
    {
      "epoch": 1.402597487611529,
      "grad_norm": 0.41351476311683655,
      "learning_rate": 7.242213187667614e-06,
      "loss": 0.0172,
      "step": 857060
    },
    {
      "epoch": 1.4026302180501822,
      "grad_norm": 1.0868028402328491,
      "learning_rate": 7.2421472954540976e-06,
      "loss": 0.0197,
      "step": 857080
    },
    {
      "epoch": 1.4026629484888358,
      "grad_norm": 1.3248406648635864,
      "learning_rate": 7.2420814032405794e-06,
      "loss": 0.0261,
      "step": 857100
    },
    {
      "epoch": 1.402695678927489,
      "grad_norm": 0.35054102540016174,
      "learning_rate": 7.242015511027063e-06,
      "loss": 0.0175,
      "step": 857120
    },
    {
      "epoch": 1.4027284093661423,
      "grad_norm": 0.5782989263534546,
      "learning_rate": 7.241949618813545e-06,
      "loss": 0.0221,
      "step": 857140
    },
    {
      "epoch": 1.4027611398047957,
      "grad_norm": 0.22329393029212952,
      "learning_rate": 7.2418837266000285e-06,
      "loss": 0.0215,
      "step": 857160
    },
    {
      "epoch": 1.402793870243449,
      "grad_norm": 0.5448863506317139,
      "learning_rate": 7.241817834386511e-06,
      "loss": 0.0232,
      "step": 857180
    },
    {
      "epoch": 1.4028266006821024,
      "grad_norm": 0.5330663323402405,
      "learning_rate": 7.241751942172994e-06,
      "loss": 0.0221,
      "step": 857200
    },
    {
      "epoch": 1.4028593311207556,
      "grad_norm": 0.8394309282302856,
      "learning_rate": 7.241686049959477e-06,
      "loss": 0.0277,
      "step": 857220
    },
    {
      "epoch": 1.4028920615594092,
      "grad_norm": 0.18054836988449097,
      "learning_rate": 7.24162015774596e-06,
      "loss": 0.0233,
      "step": 857240
    },
    {
      "epoch": 1.4029247919980623,
      "grad_norm": 0.3338710069656372,
      "learning_rate": 7.241554265532442e-06,
      "loss": 0.0181,
      "step": 857260
    },
    {
      "epoch": 1.4029575224367157,
      "grad_norm": 0.2877083420753479,
      "learning_rate": 7.241488373318926e-06,
      "loss": 0.022,
      "step": 857280
    },
    {
      "epoch": 1.402990252875369,
      "grad_norm": 0.5306686162948608,
      "learning_rate": 7.241422481105408e-06,
      "loss": 0.0198,
      "step": 857300
    },
    {
      "epoch": 1.4030229833140224,
      "grad_norm": 0.6501480340957642,
      "learning_rate": 7.241356588891891e-06,
      "loss": 0.029,
      "step": 857320
    },
    {
      "epoch": 1.4030557137526758,
      "grad_norm": 1.8597809076309204,
      "learning_rate": 7.241290696678373e-06,
      "loss": 0.0219,
      "step": 857340
    },
    {
      "epoch": 1.403088444191329,
      "grad_norm": 0.5868748426437378,
      "learning_rate": 7.241224804464857e-06,
      "loss": 0.0244,
      "step": 857360
    },
    {
      "epoch": 1.4031211746299823,
      "grad_norm": 0.6481032967567444,
      "learning_rate": 7.24115891225134e-06,
      "loss": 0.0215,
      "step": 857380
    },
    {
      "epoch": 1.4031539050686357,
      "grad_norm": 0.5492905974388123,
      "learning_rate": 7.241093020037822e-06,
      "loss": 0.02,
      "step": 857400
    },
    {
      "epoch": 1.403186635507289,
      "grad_norm": 0.7942837476730347,
      "learning_rate": 7.241027127824306e-06,
      "loss": 0.0182,
      "step": 857420
    },
    {
      "epoch": 1.4032193659459424,
      "grad_norm": 1.0047942399978638,
      "learning_rate": 7.2409612356107886e-06,
      "loss": 0.0143,
      "step": 857440
    },
    {
      "epoch": 1.4032520963845958,
      "grad_norm": 0.7612318992614746,
      "learning_rate": 7.240895343397272e-06,
      "loss": 0.0359,
      "step": 857460
    },
    {
      "epoch": 1.4032848268232492,
      "grad_norm": 0.8968245983123779,
      "learning_rate": 7.240829451183754e-06,
      "loss": 0.0272,
      "step": 857480
    },
    {
      "epoch": 1.4033175572619023,
      "grad_norm": 1.274393916130066,
      "learning_rate": 7.240763558970238e-06,
      "loss": 0.0202,
      "step": 857500
    },
    {
      "epoch": 1.4033502877005557,
      "grad_norm": 0.33130428194999695,
      "learning_rate": 7.2406976667567195e-06,
      "loss": 0.0206,
      "step": 857520
    },
    {
      "epoch": 1.403383018139209,
      "grad_norm": 0.4051872193813324,
      "learning_rate": 7.240631774543203e-06,
      "loss": 0.0186,
      "step": 857540
    },
    {
      "epoch": 1.4034157485778624,
      "grad_norm": 0.856965184211731,
      "learning_rate": 7.240565882329685e-06,
      "loss": 0.0155,
      "step": 857560
    },
    {
      "epoch": 1.4034484790165158,
      "grad_norm": 0.543104887008667,
      "learning_rate": 7.240499990116169e-06,
      "loss": 0.0279,
      "step": 857580
    },
    {
      "epoch": 1.4034812094551692,
      "grad_norm": 0.33743494749069214,
      "learning_rate": 7.240434097902651e-06,
      "loss": 0.0127,
      "step": 857600
    },
    {
      "epoch": 1.4035139398938226,
      "grad_norm": 0.6768028140068054,
      "learning_rate": 7.240368205689134e-06,
      "loss": 0.0159,
      "step": 857620
    },
    {
      "epoch": 1.4035466703324757,
      "grad_norm": 0.3046932816505432,
      "learning_rate": 7.240302313475617e-06,
      "loss": 0.0335,
      "step": 857640
    },
    {
      "epoch": 1.403579400771129,
      "grad_norm": 0.4999992847442627,
      "learning_rate": 7.2402364212621e-06,
      "loss": 0.0213,
      "step": 857660
    },
    {
      "epoch": 1.4036121312097825,
      "grad_norm": 0.11098664253950119,
      "learning_rate": 7.240170529048582e-06,
      "loss": 0.0194,
      "step": 857680
    },
    {
      "epoch": 1.4036448616484358,
      "grad_norm": 1.148808240890503,
      "learning_rate": 7.240104636835066e-06,
      "loss": 0.0287,
      "step": 857700
    },
    {
      "epoch": 1.4036775920870892,
      "grad_norm": 1.271669626235962,
      "learning_rate": 7.2400387446215495e-06,
      "loss": 0.0145,
      "step": 857720
    },
    {
      "epoch": 1.4037103225257426,
      "grad_norm": 0.6023480296134949,
      "learning_rate": 7.239972852408031e-06,
      "loss": 0.0213,
      "step": 857740
    },
    {
      "epoch": 1.403743052964396,
      "grad_norm": 0.24984955787658691,
      "learning_rate": 7.239906960194515e-06,
      "loss": 0.0215,
      "step": 857760
    },
    {
      "epoch": 1.403775783403049,
      "grad_norm": 0.9806594252586365,
      "learning_rate": 7.239841067980997e-06,
      "loss": 0.0174,
      "step": 857780
    },
    {
      "epoch": 1.4038085138417025,
      "grad_norm": 0.3131556808948517,
      "learning_rate": 7.23977517576748e-06,
      "loss": 0.023,
      "step": 857800
    },
    {
      "epoch": 1.4038412442803558,
      "grad_norm": 0.5493136048316956,
      "learning_rate": 7.239709283553963e-06,
      "loss": 0.0218,
      "step": 857820
    },
    {
      "epoch": 1.4038739747190092,
      "grad_norm": 2.3547115325927734,
      "learning_rate": 7.239643391340446e-06,
      "loss": 0.0173,
      "step": 857840
    },
    {
      "epoch": 1.4039067051576626,
      "grad_norm": 0.3654385209083557,
      "learning_rate": 7.239577499126929e-06,
      "loss": 0.0336,
      "step": 857860
    },
    {
      "epoch": 1.4039394355963157,
      "grad_norm": 1.1839457750320435,
      "learning_rate": 7.239511606913412e-06,
      "loss": 0.014,
      "step": 857880
    },
    {
      "epoch": 1.4039721660349693,
      "grad_norm": 1.203861951828003,
      "learning_rate": 7.239445714699894e-06,
      "loss": 0.0278,
      "step": 857900
    },
    {
      "epoch": 1.4040048964736225,
      "grad_norm": 0.9976844191551208,
      "learning_rate": 7.239379822486378e-06,
      "loss": 0.0207,
      "step": 857920
    },
    {
      "epoch": 1.4040376269122758,
      "grad_norm": 0.27057433128356934,
      "learning_rate": 7.23931393027286e-06,
      "loss": 0.0177,
      "step": 857940
    },
    {
      "epoch": 1.4040703573509292,
      "grad_norm": 0.33647796511650085,
      "learning_rate": 7.239248038059343e-06,
      "loss": 0.0226,
      "step": 857960
    },
    {
      "epoch": 1.4041030877895826,
      "grad_norm": 1.013211727142334,
      "learning_rate": 7.239182145845826e-06,
      "loss": 0.0205,
      "step": 857980
    },
    {
      "epoch": 1.404135818228236,
      "grad_norm": 0.6598020195960999,
      "learning_rate": 7.239116253632309e-06,
      "loss": 0.0213,
      "step": 858000
    },
    {
      "epoch": 1.4041685486668891,
      "grad_norm": 0.755279541015625,
      "learning_rate": 7.239050361418791e-06,
      "loss": 0.0279,
      "step": 858020
    },
    {
      "epoch": 1.4042012791055427,
      "grad_norm": 0.8004248738288879,
      "learning_rate": 7.238984469205275e-06,
      "loss": 0.0208,
      "step": 858040
    },
    {
      "epoch": 1.4042340095441959,
      "grad_norm": 0.5863458514213562,
      "learning_rate": 7.238918576991757e-06,
      "loss": 0.0275,
      "step": 858060
    },
    {
      "epoch": 1.4042667399828492,
      "grad_norm": 0.5661211609840393,
      "learning_rate": 7.2388526847782405e-06,
      "loss": 0.0187,
      "step": 858080
    },
    {
      "epoch": 1.4042994704215026,
      "grad_norm": 0.6292005181312561,
      "learning_rate": 7.238786792564724e-06,
      "loss": 0.0242,
      "step": 858100
    },
    {
      "epoch": 1.404332200860156,
      "grad_norm": 0.17306296527385712,
      "learning_rate": 7.238720900351206e-06,
      "loss": 0.0163,
      "step": 858120
    },
    {
      "epoch": 1.4043649312988093,
      "grad_norm": 0.4759969711303711,
      "learning_rate": 7.2386550081376895e-06,
      "loss": 0.0151,
      "step": 858140
    },
    {
      "epoch": 1.4043976617374625,
      "grad_norm": 0.3667665719985962,
      "learning_rate": 7.2385891159241714e-06,
      "loss": 0.0199,
      "step": 858160
    },
    {
      "epoch": 1.4044303921761159,
      "grad_norm": 1.2207313776016235,
      "learning_rate": 7.238523223710655e-06,
      "loss": 0.0261,
      "step": 858180
    },
    {
      "epoch": 1.4044631226147692,
      "grad_norm": 0.17010509967803955,
      "learning_rate": 7.238457331497138e-06,
      "loss": 0.0166,
      "step": 858200
    },
    {
      "epoch": 1.4044958530534226,
      "grad_norm": 0.7857055068016052,
      "learning_rate": 7.2383914392836205e-06,
      "loss": 0.0261,
      "step": 858220
    },
    {
      "epoch": 1.404528583492076,
      "grad_norm": 1.3954963684082031,
      "learning_rate": 7.238325547070103e-06,
      "loss": 0.0175,
      "step": 858240
    },
    {
      "epoch": 1.4045613139307294,
      "grad_norm": 1.1268200874328613,
      "learning_rate": 7.238259654856587e-06,
      "loss": 0.0272,
      "step": 858260
    },
    {
      "epoch": 1.4045940443693827,
      "grad_norm": 0.30414435267448425,
      "learning_rate": 7.238193762643069e-06,
      "loss": 0.019,
      "step": 858280
    },
    {
      "epoch": 1.4046267748080359,
      "grad_norm": 0.7907863855361938,
      "learning_rate": 7.238127870429552e-06,
      "loss": 0.0191,
      "step": 858300
    },
    {
      "epoch": 1.4046595052466893,
      "grad_norm": 0.9119966626167297,
      "learning_rate": 7.238061978216034e-06,
      "loss": 0.0193,
      "step": 858320
    },
    {
      "epoch": 1.4046922356853426,
      "grad_norm": 0.64061439037323,
      "learning_rate": 7.237996086002518e-06,
      "loss": 0.0201,
      "step": 858340
    },
    {
      "epoch": 1.404724966123996,
      "grad_norm": 0.2532796561717987,
      "learning_rate": 7.237930193789e-06,
      "loss": 0.0145,
      "step": 858360
    },
    {
      "epoch": 1.4047576965626494,
      "grad_norm": 0.3022763431072235,
      "learning_rate": 7.237864301575483e-06,
      "loss": 0.0233,
      "step": 858380
    },
    {
      "epoch": 1.4047904270013027,
      "grad_norm": 0.6538580656051636,
      "learning_rate": 7.237798409361966e-06,
      "loss": 0.0242,
      "step": 858400
    },
    {
      "epoch": 1.4048231574399561,
      "grad_norm": 1.5340427160263062,
      "learning_rate": 7.237732517148449e-06,
      "loss": 0.0198,
      "step": 858420
    },
    {
      "epoch": 1.4048558878786093,
      "grad_norm": 1.3019062280654907,
      "learning_rate": 7.237666624934932e-06,
      "loss": 0.024,
      "step": 858440
    },
    {
      "epoch": 1.4048886183172626,
      "grad_norm": 0.4051242172718048,
      "learning_rate": 7.237600732721415e-06,
      "loss": 0.019,
      "step": 858460
    },
    {
      "epoch": 1.404921348755916,
      "grad_norm": 0.5824903845787048,
      "learning_rate": 7.237534840507898e-06,
      "loss": 0.0218,
      "step": 858480
    },
    {
      "epoch": 1.4049540791945694,
      "grad_norm": 0.7849758863449097,
      "learning_rate": 7.2374689482943805e-06,
      "loss": 0.0201,
      "step": 858500
    },
    {
      "epoch": 1.4049868096332228,
      "grad_norm": 0.9601582884788513,
      "learning_rate": 7.237403056080864e-06,
      "loss": 0.0168,
      "step": 858520
    },
    {
      "epoch": 1.405019540071876,
      "grad_norm": 1.1638818979263306,
      "learning_rate": 7.237337163867346e-06,
      "loss": 0.0255,
      "step": 858540
    },
    {
      "epoch": 1.4050522705105295,
      "grad_norm": 1.9759163856506348,
      "learning_rate": 7.23727127165383e-06,
      "loss": 0.0147,
      "step": 858560
    },
    {
      "epoch": 1.4050850009491826,
      "grad_norm": 1.1460307836532593,
      "learning_rate": 7.2372053794403115e-06,
      "loss": 0.0173,
      "step": 858580
    },
    {
      "epoch": 1.405117731387836,
      "grad_norm": 0.49752262234687805,
      "learning_rate": 7.237139487226795e-06,
      "loss": 0.0178,
      "step": 858600
    },
    {
      "epoch": 1.4051504618264894,
      "grad_norm": 0.1401459276676178,
      "learning_rate": 7.237073595013278e-06,
      "loss": 0.0206,
      "step": 858620
    },
    {
      "epoch": 1.4051831922651428,
      "grad_norm": 0.3257461190223694,
      "learning_rate": 7.2370077027997606e-06,
      "loss": 0.0216,
      "step": 858640
    },
    {
      "epoch": 1.4052159227037961,
      "grad_norm": 0.4999653100967407,
      "learning_rate": 7.236941810586243e-06,
      "loss": 0.027,
      "step": 858660
    },
    {
      "epoch": 1.4052486531424493,
      "grad_norm": 0.6586771011352539,
      "learning_rate": 7.236875918372727e-06,
      "loss": 0.0275,
      "step": 858680
    },
    {
      "epoch": 1.4052813835811029,
      "grad_norm": 0.09214409440755844,
      "learning_rate": 7.236810026159209e-06,
      "loss": 0.0213,
      "step": 858700
    },
    {
      "epoch": 1.405314114019756,
      "grad_norm": 1.0168535709381104,
      "learning_rate": 7.236744133945692e-06,
      "loss": 0.0233,
      "step": 858720
    },
    {
      "epoch": 1.4053468444584094,
      "grad_norm": 2.080918073654175,
      "learning_rate": 7.236678241732174e-06,
      "loss": 0.0247,
      "step": 858740
    },
    {
      "epoch": 1.4053795748970628,
      "grad_norm": 0.4320520758628845,
      "learning_rate": 7.236612349518658e-06,
      "loss": 0.0185,
      "step": 858760
    },
    {
      "epoch": 1.4054123053357161,
      "grad_norm": 0.6677010655403137,
      "learning_rate": 7.2365464573051414e-06,
      "loss": 0.0186,
      "step": 858780
    },
    {
      "epoch": 1.4054450357743695,
      "grad_norm": 0.5420509576797485,
      "learning_rate": 7.236480565091623e-06,
      "loss": 0.0181,
      "step": 858800
    },
    {
      "epoch": 1.4054777662130227,
      "grad_norm": 0.3317340612411499,
      "learning_rate": 7.236414672878107e-06,
      "loss": 0.0326,
      "step": 858820
    },
    {
      "epoch": 1.405510496651676,
      "grad_norm": 0.21241556107997894,
      "learning_rate": 7.23634878066459e-06,
      "loss": 0.0399,
      "step": 858840
    },
    {
      "epoch": 1.4055432270903294,
      "grad_norm": 0.14921681582927704,
      "learning_rate": 7.236282888451072e-06,
      "loss": 0.019,
      "step": 858860
    },
    {
      "epoch": 1.4055759575289828,
      "grad_norm": 0.5724201202392578,
      "learning_rate": 7.236216996237555e-06,
      "loss": 0.0237,
      "step": 858880
    },
    {
      "epoch": 1.4056086879676362,
      "grad_norm": 1.2614134550094604,
      "learning_rate": 7.236151104024039e-06,
      "loss": 0.028,
      "step": 858900
    },
    {
      "epoch": 1.4056414184062895,
      "grad_norm": 0.5751795172691345,
      "learning_rate": 7.236085211810521e-06,
      "loss": 0.0137,
      "step": 858920
    },
    {
      "epoch": 1.405674148844943,
      "grad_norm": 0.6848117709159851,
      "learning_rate": 7.236019319597004e-06,
      "loss": 0.0187,
      "step": 858940
    },
    {
      "epoch": 1.405706879283596,
      "grad_norm": 0.5425465106964111,
      "learning_rate": 7.235953427383486e-06,
      "loss": 0.0199,
      "step": 858960
    },
    {
      "epoch": 1.4057396097222494,
      "grad_norm": 0.39946621656417847,
      "learning_rate": 7.23588753516997e-06,
      "loss": 0.0247,
      "step": 858980
    },
    {
      "epoch": 1.4057723401609028,
      "grad_norm": 0.6803662776947021,
      "learning_rate": 7.2358216429564524e-06,
      "loss": 0.0264,
      "step": 859000
    },
    {
      "epoch": 1.4058050705995562,
      "grad_norm": 0.4120416045188904,
      "learning_rate": 7.235755750742935e-06,
      "loss": 0.016,
      "step": 859020
    },
    {
      "epoch": 1.4058378010382095,
      "grad_norm": 0.25178155303001404,
      "learning_rate": 7.235689858529418e-06,
      "loss": 0.0199,
      "step": 859040
    },
    {
      "epoch": 1.405870531476863,
      "grad_norm": 0.51911860704422,
      "learning_rate": 7.2356239663159015e-06,
      "loss": 0.0167,
      "step": 859060
    },
    {
      "epoch": 1.4059032619155163,
      "grad_norm": 0.6991888284683228,
      "learning_rate": 7.235558074102383e-06,
      "loss": 0.032,
      "step": 859080
    },
    {
      "epoch": 1.4059359923541694,
      "grad_norm": 6.571914196014404,
      "learning_rate": 7.235492181888867e-06,
      "loss": 0.0153,
      "step": 859100
    },
    {
      "epoch": 1.4059687227928228,
      "grad_norm": 1.154180645942688,
      "learning_rate": 7.235426289675349e-06,
      "loss": 0.0198,
      "step": 859120
    },
    {
      "epoch": 1.4060014532314762,
      "grad_norm": 0.1126466616988182,
      "learning_rate": 7.2353603974618325e-06,
      "loss": 0.0262,
      "step": 859140
    },
    {
      "epoch": 1.4060341836701296,
      "grad_norm": 1.1647292375564575,
      "learning_rate": 7.235294505248316e-06,
      "loss": 0.0215,
      "step": 859160
    },
    {
      "epoch": 1.406066914108783,
      "grad_norm": 0.20854511857032776,
      "learning_rate": 7.235228613034798e-06,
      "loss": 0.0144,
      "step": 859180
    },
    {
      "epoch": 1.4060996445474363,
      "grad_norm": 0.7266687154769897,
      "learning_rate": 7.2351627208212815e-06,
      "loss": 0.0223,
      "step": 859200
    },
    {
      "epoch": 1.4061323749860897,
      "grad_norm": 0.28157758712768555,
      "learning_rate": 7.235096828607764e-06,
      "loss": 0.0215,
      "step": 859220
    },
    {
      "epoch": 1.4061651054247428,
      "grad_norm": 0.7800077795982361,
      "learning_rate": 7.235030936394247e-06,
      "loss": 0.0251,
      "step": 859240
    },
    {
      "epoch": 1.4061978358633962,
      "grad_norm": 1.2317124605178833,
      "learning_rate": 7.23496504418073e-06,
      "loss": 0.0192,
      "step": 859260
    },
    {
      "epoch": 1.4062305663020496,
      "grad_norm": 1.092533826828003,
      "learning_rate": 7.234899151967213e-06,
      "loss": 0.0211,
      "step": 859280
    },
    {
      "epoch": 1.406263296740703,
      "grad_norm": 0.6591697931289673,
      "learning_rate": 7.234833259753695e-06,
      "loss": 0.0184,
      "step": 859300
    },
    {
      "epoch": 1.4062960271793563,
      "grad_norm": 1.163496732711792,
      "learning_rate": 7.234767367540179e-06,
      "loss": 0.0133,
      "step": 859320
    },
    {
      "epoch": 1.4063287576180095,
      "grad_norm": 0.23684388399124146,
      "learning_rate": 7.234701475326661e-06,
      "loss": 0.0131,
      "step": 859340
    },
    {
      "epoch": 1.406361488056663,
      "grad_norm": 0.20428922772407532,
      "learning_rate": 7.234635583113144e-06,
      "loss": 0.0228,
      "step": 859360
    },
    {
      "epoch": 1.4063942184953162,
      "grad_norm": 0.6195342540740967,
      "learning_rate": 7.234569690899626e-06,
      "loss": 0.0322,
      "step": 859380
    },
    {
      "epoch": 1.4064269489339696,
      "grad_norm": 1.351163387298584,
      "learning_rate": 7.23450379868611e-06,
      "loss": 0.0221,
      "step": 859400
    },
    {
      "epoch": 1.406459679372623,
      "grad_norm": 1.1573150157928467,
      "learning_rate": 7.2344379064725925e-06,
      "loss": 0.0217,
      "step": 859420
    },
    {
      "epoch": 1.4064924098112763,
      "grad_norm": 0.5477895140647888,
      "learning_rate": 7.234372014259075e-06,
      "loss": 0.0218,
      "step": 859440
    },
    {
      "epoch": 1.4065251402499297,
      "grad_norm": 0.2533729374408722,
      "learning_rate": 7.234306122045558e-06,
      "loss": 0.0149,
      "step": 859460
    },
    {
      "epoch": 1.4065578706885828,
      "grad_norm": 0.6123223304748535,
      "learning_rate": 7.2342402298320416e-06,
      "loss": 0.0201,
      "step": 859480
    },
    {
      "epoch": 1.4065906011272364,
      "grad_norm": 0.960640013217926,
      "learning_rate": 7.234174337618524e-06,
      "loss": 0.0254,
      "step": 859500
    },
    {
      "epoch": 1.4066233315658896,
      "grad_norm": 0.2869614362716675,
      "learning_rate": 7.234108445405007e-06,
      "loss": 0.0178,
      "step": 859520
    },
    {
      "epoch": 1.406656062004543,
      "grad_norm": 0.22879205644130707,
      "learning_rate": 7.234042553191491e-06,
      "loss": 0.0211,
      "step": 859540
    },
    {
      "epoch": 1.4066887924431963,
      "grad_norm": 0.3284771740436554,
      "learning_rate": 7.2339766609779725e-06,
      "loss": 0.0234,
      "step": 859560
    },
    {
      "epoch": 1.4067215228818497,
      "grad_norm": 1.282583236694336,
      "learning_rate": 7.233910768764456e-06,
      "loss": 0.0176,
      "step": 859580
    },
    {
      "epoch": 1.406754253320503,
      "grad_norm": 0.5737002491950989,
      "learning_rate": 7.233844876550938e-06,
      "loss": 0.0251,
      "step": 859600
    },
    {
      "epoch": 1.4067869837591562,
      "grad_norm": 0.806762158870697,
      "learning_rate": 7.233778984337422e-06,
      "loss": 0.0236,
      "step": 859620
    },
    {
      "epoch": 1.4068197141978096,
      "grad_norm": 0.21679723262786865,
      "learning_rate": 7.233713092123904e-06,
      "loss": 0.0202,
      "step": 859640
    },
    {
      "epoch": 1.406852444636463,
      "grad_norm": 0.26693612337112427,
      "learning_rate": 7.233647199910387e-06,
      "loss": 0.0293,
      "step": 859660
    },
    {
      "epoch": 1.4068851750751163,
      "grad_norm": 0.1375107318162918,
      "learning_rate": 7.23358130769687e-06,
      "loss": 0.0214,
      "step": 859680
    },
    {
      "epoch": 1.4069179055137697,
      "grad_norm": 0.4387393593788147,
      "learning_rate": 7.233515415483353e-06,
      "loss": 0.018,
      "step": 859700
    },
    {
      "epoch": 1.406950635952423,
      "grad_norm": 0.28884533047676086,
      "learning_rate": 7.233449523269835e-06,
      "loss": 0.0248,
      "step": 859720
    },
    {
      "epoch": 1.4069833663910765,
      "grad_norm": 0.2522728443145752,
      "learning_rate": 7.233383631056319e-06,
      "loss": 0.0225,
      "step": 859740
    },
    {
      "epoch": 1.4070160968297296,
      "grad_norm": 0.23028919100761414,
      "learning_rate": 7.233317738842801e-06,
      "loss": 0.0208,
      "step": 859760
    },
    {
      "epoch": 1.407048827268383,
      "grad_norm": 2.2754197120666504,
      "learning_rate": 7.233251846629284e-06,
      "loss": 0.0128,
      "step": 859780
    },
    {
      "epoch": 1.4070815577070364,
      "grad_norm": 0.9661792516708374,
      "learning_rate": 7.233185954415767e-06,
      "loss": 0.0319,
      "step": 859800
    },
    {
      "epoch": 1.4071142881456897,
      "grad_norm": 0.6179059743881226,
      "learning_rate": 7.23312006220225e-06,
      "loss": 0.0209,
      "step": 859820
    },
    {
      "epoch": 1.407147018584343,
      "grad_norm": 1.1697477102279663,
      "learning_rate": 7.2330541699887334e-06,
      "loss": 0.0179,
      "step": 859840
    },
    {
      "epoch": 1.4071797490229965,
      "grad_norm": 0.8654285073280334,
      "learning_rate": 7.232988277775216e-06,
      "loss": 0.0203,
      "step": 859860
    },
    {
      "epoch": 1.4072124794616498,
      "grad_norm": 0.6127626895904541,
      "learning_rate": 7.232922385561699e-06,
      "loss": 0.0202,
      "step": 859880
    },
    {
      "epoch": 1.407245209900303,
      "grad_norm": 5.811679363250732,
      "learning_rate": 7.232856493348182e-06,
      "loss": 0.0338,
      "step": 859900
    },
    {
      "epoch": 1.4072779403389564,
      "grad_norm": 0.3728604018688202,
      "learning_rate": 7.232790601134665e-06,
      "loss": 0.0263,
      "step": 859920
    },
    {
      "epoch": 1.4073106707776097,
      "grad_norm": 0.6545467972755432,
      "learning_rate": 7.232724708921147e-06,
      "loss": 0.0211,
      "step": 859940
    },
    {
      "epoch": 1.4073434012162631,
      "grad_norm": 1.0469518899917603,
      "learning_rate": 7.232658816707631e-06,
      "loss": 0.0168,
      "step": 859960
    },
    {
      "epoch": 1.4073761316549165,
      "grad_norm": 1.2469327449798584,
      "learning_rate": 7.232592924494113e-06,
      "loss": 0.02,
      "step": 859980
    },
    {
      "epoch": 1.4074088620935699,
      "grad_norm": 0.719141960144043,
      "learning_rate": 7.232527032280596e-06,
      "loss": 0.0279,
      "step": 860000
    },
    {
      "epoch": 1.4074415925322232,
      "grad_norm": 0.21069616079330444,
      "learning_rate": 7.232461140067079e-06,
      "loss": 0.0207,
      "step": 860020
    },
    {
      "epoch": 1.4074743229708764,
      "grad_norm": 0.19342033565044403,
      "learning_rate": 7.232395247853562e-06,
      "loss": 0.0226,
      "step": 860040
    },
    {
      "epoch": 1.4075070534095298,
      "grad_norm": 0.21132637560367584,
      "learning_rate": 7.232329355640044e-06,
      "loss": 0.0303,
      "step": 860060
    },
    {
      "epoch": 1.4075397838481831,
      "grad_norm": 0.5359781384468079,
      "learning_rate": 7.232263463426528e-06,
      "loss": 0.0231,
      "step": 860080
    },
    {
      "epoch": 1.4075725142868365,
      "grad_norm": 1.0490095615386963,
      "learning_rate": 7.23219757121301e-06,
      "loss": 0.0269,
      "step": 860100
    },
    {
      "epoch": 1.4076052447254899,
      "grad_norm": 0.5395126938819885,
      "learning_rate": 7.2321316789994935e-06,
      "loss": 0.024,
      "step": 860120
    },
    {
      "epoch": 1.407637975164143,
      "grad_norm": 0.6927844285964966,
      "learning_rate": 7.232065786785975e-06,
      "loss": 0.0217,
      "step": 860140
    },
    {
      "epoch": 1.4076707056027966,
      "grad_norm": 0.5460717678070068,
      "learning_rate": 7.231999894572459e-06,
      "loss": 0.0189,
      "step": 860160
    },
    {
      "epoch": 1.4077034360414498,
      "grad_norm": 0.3226028382778168,
      "learning_rate": 7.2319340023589425e-06,
      "loss": 0.0233,
      "step": 860180
    },
    {
      "epoch": 1.4077361664801031,
      "grad_norm": 0.19647188484668732,
      "learning_rate": 7.2318681101454244e-06,
      "loss": 0.0208,
      "step": 860200
    },
    {
      "epoch": 1.4077688969187565,
      "grad_norm": 2.7639334201812744,
      "learning_rate": 7.231802217931908e-06,
      "loss": 0.0183,
      "step": 860220
    },
    {
      "epoch": 1.4078016273574099,
      "grad_norm": 0.23658937215805054,
      "learning_rate": 7.231736325718391e-06,
      "loss": 0.0249,
      "step": 860240
    },
    {
      "epoch": 1.4078343577960633,
      "grad_norm": 0.545556902885437,
      "learning_rate": 7.2316704335048735e-06,
      "loss": 0.0241,
      "step": 860260
    },
    {
      "epoch": 1.4078670882347164,
      "grad_norm": 0.6844589710235596,
      "learning_rate": 7.231604541291356e-06,
      "loss": 0.0179,
      "step": 860280
    },
    {
      "epoch": 1.40789981867337,
      "grad_norm": 0.30419498682022095,
      "learning_rate": 7.23153864907784e-06,
      "loss": 0.0293,
      "step": 860300
    },
    {
      "epoch": 1.4079325491120231,
      "grad_norm": 0.31758254766464233,
      "learning_rate": 7.231472756864322e-06,
      "loss": 0.0236,
      "step": 860320
    },
    {
      "epoch": 1.4079652795506765,
      "grad_norm": 1.2951077222824097,
      "learning_rate": 7.231406864650805e-06,
      "loss": 0.0204,
      "step": 860340
    },
    {
      "epoch": 1.40799800998933,
      "grad_norm": 0.26928025484085083,
      "learning_rate": 7.231340972437287e-06,
      "loss": 0.0244,
      "step": 860360
    },
    {
      "epoch": 1.4080307404279833,
      "grad_norm": 0.2777886986732483,
      "learning_rate": 7.231275080223771e-06,
      "loss": 0.0241,
      "step": 860380
    },
    {
      "epoch": 1.4080634708666366,
      "grad_norm": 0.16591167449951172,
      "learning_rate": 7.231209188010253e-06,
      "loss": 0.0168,
      "step": 860400
    },
    {
      "epoch": 1.4080962013052898,
      "grad_norm": 0.44851216673851013,
      "learning_rate": 7.231143295796736e-06,
      "loss": 0.038,
      "step": 860420
    },
    {
      "epoch": 1.4081289317439432,
      "grad_norm": 1.134404182434082,
      "learning_rate": 7.231077403583219e-06,
      "loss": 0.0199,
      "step": 860440
    },
    {
      "epoch": 1.4081616621825965,
      "grad_norm": 0.19042663276195526,
      "learning_rate": 7.231011511369702e-06,
      "loss": 0.0161,
      "step": 860460
    },
    {
      "epoch": 1.40819439262125,
      "grad_norm": 0.9805753827095032,
      "learning_rate": 7.2309456191561845e-06,
      "loss": 0.02,
      "step": 860480
    },
    {
      "epoch": 1.4082271230599033,
      "grad_norm": 0.26191675662994385,
      "learning_rate": 7.230879726942668e-06,
      "loss": 0.0157,
      "step": 860500
    },
    {
      "epoch": 1.4082598534985566,
      "grad_norm": 0.29594144225120544,
      "learning_rate": 7.23081383472915e-06,
      "loss": 0.0247,
      "step": 860520
    },
    {
      "epoch": 1.40829258393721,
      "grad_norm": 0.8193285465240479,
      "learning_rate": 7.2307479425156336e-06,
      "loss": 0.0223,
      "step": 860540
    },
    {
      "epoch": 1.4083253143758632,
      "grad_norm": 0.3584321439266205,
      "learning_rate": 7.230682050302117e-06,
      "loss": 0.0231,
      "step": 860560
    },
    {
      "epoch": 1.4083580448145165,
      "grad_norm": 0.8334336876869202,
      "learning_rate": 7.230616158088599e-06,
      "loss": 0.0241,
      "step": 860580
    },
    {
      "epoch": 1.40839077525317,
      "grad_norm": 0.3472214937210083,
      "learning_rate": 7.230550265875083e-06,
      "loss": 0.017,
      "step": 860600
    },
    {
      "epoch": 1.4084235056918233,
      "grad_norm": 0.3269416391849518,
      "learning_rate": 7.2304843736615645e-06,
      "loss": 0.0212,
      "step": 860620
    },
    {
      "epoch": 1.4084562361304767,
      "grad_norm": 0.804142415523529,
      "learning_rate": 7.230418481448048e-06,
      "loss": 0.0183,
      "step": 860640
    },
    {
      "epoch": 1.40848896656913,
      "grad_norm": 0.4417342245578766,
      "learning_rate": 7.230352589234531e-06,
      "loss": 0.0185,
      "step": 860660
    },
    {
      "epoch": 1.4085216970077834,
      "grad_norm": 1.689719319343567,
      "learning_rate": 7.230286697021014e-06,
      "loss": 0.023,
      "step": 860680
    },
    {
      "epoch": 1.4085544274464366,
      "grad_norm": 0.6356057524681091,
      "learning_rate": 7.230220804807496e-06,
      "loss": 0.0219,
      "step": 860700
    },
    {
      "epoch": 1.40858715788509,
      "grad_norm": 0.6510119438171387,
      "learning_rate": 7.23015491259398e-06,
      "loss": 0.0206,
      "step": 860720
    },
    {
      "epoch": 1.4086198883237433,
      "grad_norm": 0.5217458009719849,
      "learning_rate": 7.230089020380462e-06,
      "loss": 0.0208,
      "step": 860740
    },
    {
      "epoch": 1.4086526187623967,
      "grad_norm": 1.1029845476150513,
      "learning_rate": 7.230023128166945e-06,
      "loss": 0.0248,
      "step": 860760
    },
    {
      "epoch": 1.40868534920105,
      "grad_norm": 0.23614749312400818,
      "learning_rate": 7.229957235953427e-06,
      "loss": 0.0262,
      "step": 860780
    },
    {
      "epoch": 1.4087180796397034,
      "grad_norm": 0.4668853282928467,
      "learning_rate": 7.229891343739911e-06,
      "loss": 0.0147,
      "step": 860800
    },
    {
      "epoch": 1.4087508100783568,
      "grad_norm": 0.9871532917022705,
      "learning_rate": 7.229825451526394e-06,
      "loss": 0.0224,
      "step": 860820
    },
    {
      "epoch": 1.40878354051701,
      "grad_norm": 0.38249605894088745,
      "learning_rate": 7.229759559312876e-06,
      "loss": 0.0188,
      "step": 860840
    },
    {
      "epoch": 1.4088162709556633,
      "grad_norm": 3.827707290649414,
      "learning_rate": 7.229693667099359e-06,
      "loss": 0.0283,
      "step": 860860
    },
    {
      "epoch": 1.4088490013943167,
      "grad_norm": 0.5377931594848633,
      "learning_rate": 7.229627774885843e-06,
      "loss": 0.0129,
      "step": 860880
    },
    {
      "epoch": 1.40888173183297,
      "grad_norm": 0.6259344816207886,
      "learning_rate": 7.229561882672325e-06,
      "loss": 0.0194,
      "step": 860900
    },
    {
      "epoch": 1.4089144622716234,
      "grad_norm": 0.4409969449043274,
      "learning_rate": 7.229495990458808e-06,
      "loss": 0.0222,
      "step": 860920
    },
    {
      "epoch": 1.4089471927102766,
      "grad_norm": 0.16852375864982605,
      "learning_rate": 7.229430098245292e-06,
      "loss": 0.0129,
      "step": 860940
    },
    {
      "epoch": 1.4089799231489302,
      "grad_norm": 0.34184467792510986,
      "learning_rate": 7.229364206031774e-06,
      "loss": 0.0273,
      "step": 860960
    },
    {
      "epoch": 1.4090126535875833,
      "grad_norm": 0.3135104477405548,
      "learning_rate": 7.229298313818257e-06,
      "loss": 0.0192,
      "step": 860980
    },
    {
      "epoch": 1.4090453840262367,
      "grad_norm": 0.5202285051345825,
      "learning_rate": 7.229232421604739e-06,
      "loss": 0.0197,
      "step": 861000
    },
    {
      "epoch": 1.40907811446489,
      "grad_norm": 0.9571928381919861,
      "learning_rate": 7.229166529391223e-06,
      "loss": 0.0315,
      "step": 861020
    },
    {
      "epoch": 1.4091108449035434,
      "grad_norm": 0.4858122766017914,
      "learning_rate": 7.2291006371777054e-06,
      "loss": 0.0236,
      "step": 861040
    },
    {
      "epoch": 1.4091435753421968,
      "grad_norm": 0.9495706558227539,
      "learning_rate": 7.229034744964188e-06,
      "loss": 0.0255,
      "step": 861060
    },
    {
      "epoch": 1.40917630578085,
      "grad_norm": 0.8225930333137512,
      "learning_rate": 7.228968852750671e-06,
      "loss": 0.024,
      "step": 861080
    },
    {
      "epoch": 1.4092090362195036,
      "grad_norm": 0.5465962886810303,
      "learning_rate": 7.2289029605371545e-06,
      "loss": 0.0156,
      "step": 861100
    },
    {
      "epoch": 1.4092417666581567,
      "grad_norm": 0.7855560183525085,
      "learning_rate": 7.228837068323636e-06,
      "loss": 0.0145,
      "step": 861120
    },
    {
      "epoch": 1.40927449709681,
      "grad_norm": 1.7512918710708618,
      "learning_rate": 7.22877117611012e-06,
      "loss": 0.0258,
      "step": 861140
    },
    {
      "epoch": 1.4093072275354634,
      "grad_norm": 0.750331699848175,
      "learning_rate": 7.228705283896602e-06,
      "loss": 0.0228,
      "step": 861160
    },
    {
      "epoch": 1.4093399579741168,
      "grad_norm": 0.9138997197151184,
      "learning_rate": 7.2286393916830855e-06,
      "loss": 0.0266,
      "step": 861180
    },
    {
      "epoch": 1.4093726884127702,
      "grad_norm": 0.9860953688621521,
      "learning_rate": 7.228573499469567e-06,
      "loss": 0.0231,
      "step": 861200
    },
    {
      "epoch": 1.4094054188514233,
      "grad_norm": 0.38037392497062683,
      "learning_rate": 7.228507607256051e-06,
      "loss": 0.0194,
      "step": 861220
    },
    {
      "epoch": 1.4094381492900767,
      "grad_norm": 0.3436400294303894,
      "learning_rate": 7.2284417150425345e-06,
      "loss": 0.0217,
      "step": 861240
    },
    {
      "epoch": 1.40947087972873,
      "grad_norm": 4.1926116943359375,
      "learning_rate": 7.228375822829016e-06,
      "loss": 0.0183,
      "step": 861260
    },
    {
      "epoch": 1.4095036101673835,
      "grad_norm": 0.3080384433269501,
      "learning_rate": 7.2283099306155e-06,
      "loss": 0.0184,
      "step": 861280
    },
    {
      "epoch": 1.4095363406060368,
      "grad_norm": 0.5141803622245789,
      "learning_rate": 7.228244038401983e-06,
      "loss": 0.0228,
      "step": 861300
    },
    {
      "epoch": 1.4095690710446902,
      "grad_norm": 0.7284790277481079,
      "learning_rate": 7.228178146188466e-06,
      "loss": 0.0269,
      "step": 861320
    },
    {
      "epoch": 1.4096018014833436,
      "grad_norm": 0.7351692318916321,
      "learning_rate": 7.228112253974948e-06,
      "loss": 0.017,
      "step": 861340
    },
    {
      "epoch": 1.4096345319219967,
      "grad_norm": 0.891431987285614,
      "learning_rate": 7.228046361761432e-06,
      "loss": 0.0173,
      "step": 861360
    },
    {
      "epoch": 1.40966726236065,
      "grad_norm": 0.8070584535598755,
      "learning_rate": 7.227980469547914e-06,
      "loss": 0.0239,
      "step": 861380
    },
    {
      "epoch": 1.4096999927993035,
      "grad_norm": 0.6181085109710693,
      "learning_rate": 7.227914577334397e-06,
      "loss": 0.0223,
      "step": 861400
    },
    {
      "epoch": 1.4097327232379568,
      "grad_norm": 0.49744105339050293,
      "learning_rate": 7.227848685120879e-06,
      "loss": 0.0297,
      "step": 861420
    },
    {
      "epoch": 1.4097654536766102,
      "grad_norm": 0.18893371522426605,
      "learning_rate": 7.227782792907363e-06,
      "loss": 0.0149,
      "step": 861440
    },
    {
      "epoch": 1.4097981841152636,
      "grad_norm": 1.1626379489898682,
      "learning_rate": 7.2277169006938455e-06,
      "loss": 0.0141,
      "step": 861460
    },
    {
      "epoch": 1.409830914553917,
      "grad_norm": 1.009158968925476,
      "learning_rate": 7.227651008480328e-06,
      "loss": 0.0269,
      "step": 861480
    },
    {
      "epoch": 1.40986364499257,
      "grad_norm": 0.7583938241004944,
      "learning_rate": 7.227585116266811e-06,
      "loss": 0.0163,
      "step": 861500
    },
    {
      "epoch": 1.4098963754312235,
      "grad_norm": 0.22766293585300446,
      "learning_rate": 7.227519224053295e-06,
      "loss": 0.0237,
      "step": 861520
    },
    {
      "epoch": 1.4099291058698769,
      "grad_norm": 0.13173098862171173,
      "learning_rate": 7.2274533318397765e-06,
      "loss": 0.015,
      "step": 861540
    },
    {
      "epoch": 1.4099618363085302,
      "grad_norm": 0.6581929922103882,
      "learning_rate": 7.22738743962626e-06,
      "loss": 0.0212,
      "step": 861560
    },
    {
      "epoch": 1.4099945667471836,
      "grad_norm": 0.6507309079170227,
      "learning_rate": 7.227321547412742e-06,
      "loss": 0.0219,
      "step": 861580
    },
    {
      "epoch": 1.4100272971858367,
      "grad_norm": 0.38048669695854187,
      "learning_rate": 7.2272556551992255e-06,
      "loss": 0.0147,
      "step": 861600
    },
    {
      "epoch": 1.4100600276244903,
      "grad_norm": 1.184605598449707,
      "learning_rate": 7.227189762985709e-06,
      "loss": 0.0221,
      "step": 861620
    },
    {
      "epoch": 1.4100927580631435,
      "grad_norm": 0.8115302920341492,
      "learning_rate": 7.227123870772191e-06,
      "loss": 0.0227,
      "step": 861640
    },
    {
      "epoch": 1.4101254885017969,
      "grad_norm": 0.29529869556427,
      "learning_rate": 7.227057978558675e-06,
      "loss": 0.0105,
      "step": 861660
    },
    {
      "epoch": 1.4101582189404502,
      "grad_norm": 0.6195377707481384,
      "learning_rate": 7.226992086345157e-06,
      "loss": 0.0188,
      "step": 861680
    },
    {
      "epoch": 1.4101909493791036,
      "grad_norm": 0.6825196146965027,
      "learning_rate": 7.22692619413164e-06,
      "loss": 0.0202,
      "step": 861700
    },
    {
      "epoch": 1.410223679817757,
      "grad_norm": 1.1876859664916992,
      "learning_rate": 7.226860301918123e-06,
      "loss": 0.0264,
      "step": 861720
    },
    {
      "epoch": 1.4102564102564101,
      "grad_norm": 6.560237884521484,
      "learning_rate": 7.226794409704606e-06,
      "loss": 0.0237,
      "step": 861740
    },
    {
      "epoch": 1.4102891406950637,
      "grad_norm": 0.3510517179965973,
      "learning_rate": 7.226728517491088e-06,
      "loss": 0.0163,
      "step": 861760
    },
    {
      "epoch": 1.4103218711337169,
      "grad_norm": 0.68510502576828,
      "learning_rate": 7.226662625277572e-06,
      "loss": 0.0185,
      "step": 861780
    },
    {
      "epoch": 1.4103546015723702,
      "grad_norm": 0.13849826157093048,
      "learning_rate": 7.226596733064054e-06,
      "loss": 0.0262,
      "step": 861800
    },
    {
      "epoch": 1.4103873320110236,
      "grad_norm": 0.9713579416275024,
      "learning_rate": 7.226530840850537e-06,
      "loss": 0.0223,
      "step": 861820
    },
    {
      "epoch": 1.410420062449677,
      "grad_norm": 0.8920686841011047,
      "learning_rate": 7.22646494863702e-06,
      "loss": 0.0207,
      "step": 861840
    },
    {
      "epoch": 1.4104527928883304,
      "grad_norm": 0.6104384660720825,
      "learning_rate": 7.226399056423503e-06,
      "loss": 0.0163,
      "step": 861860
    },
    {
      "epoch": 1.4104855233269835,
      "grad_norm": 0.7845320105552673,
      "learning_rate": 7.226333164209986e-06,
      "loss": 0.0249,
      "step": 861880
    },
    {
      "epoch": 1.4105182537656369,
      "grad_norm": 0.3807630240917206,
      "learning_rate": 7.226267271996469e-06,
      "loss": 0.0277,
      "step": 861900
    },
    {
      "epoch": 1.4105509842042903,
      "grad_norm": 0.3073050081729889,
      "learning_rate": 7.226201379782951e-06,
      "loss": 0.0224,
      "step": 861920
    },
    {
      "epoch": 1.4105837146429436,
      "grad_norm": 0.3949373960494995,
      "learning_rate": 7.226135487569435e-06,
      "loss": 0.0249,
      "step": 861940
    },
    {
      "epoch": 1.410616445081597,
      "grad_norm": 0.3385436236858368,
      "learning_rate": 7.226069595355918e-06,
      "loss": 0.0191,
      "step": 861960
    },
    {
      "epoch": 1.4106491755202504,
      "grad_norm": 0.4373760521411896,
      "learning_rate": 7.2260037031424e-06,
      "loss": 0.021,
      "step": 861980
    },
    {
      "epoch": 1.4106819059589037,
      "grad_norm": 0.59037846326828,
      "learning_rate": 7.225937810928884e-06,
      "loss": 0.024,
      "step": 862000
    },
    {
      "epoch": 1.410714636397557,
      "grad_norm": 0.18201152980327606,
      "learning_rate": 7.225871918715366e-06,
      "loss": 0.0167,
      "step": 862020
    },
    {
      "epoch": 1.4107473668362103,
      "grad_norm": 0.42775359749794006,
      "learning_rate": 7.225806026501849e-06,
      "loss": 0.0202,
      "step": 862040
    },
    {
      "epoch": 1.4107800972748636,
      "grad_norm": 0.41682517528533936,
      "learning_rate": 7.225740134288332e-06,
      "loss": 0.0257,
      "step": 862060
    },
    {
      "epoch": 1.410812827713517,
      "grad_norm": 0.21693547070026398,
      "learning_rate": 7.225674242074815e-06,
      "loss": 0.0284,
      "step": 862080
    },
    {
      "epoch": 1.4108455581521704,
      "grad_norm": 0.23653927445411682,
      "learning_rate": 7.225608349861297e-06,
      "loss": 0.0295,
      "step": 862100
    },
    {
      "epoch": 1.4108782885908238,
      "grad_norm": 1.9588524103164673,
      "learning_rate": 7.225542457647781e-06,
      "loss": 0.0262,
      "step": 862120
    },
    {
      "epoch": 1.4109110190294771,
      "grad_norm": 0.19139207899570465,
      "learning_rate": 7.225476565434263e-06,
      "loss": 0.0312,
      "step": 862140
    },
    {
      "epoch": 1.4109437494681303,
      "grad_norm": 0.43674033880233765,
      "learning_rate": 7.2254106732207465e-06,
      "loss": 0.0162,
      "step": 862160
    },
    {
      "epoch": 1.4109764799067837,
      "grad_norm": 0.6563184261322021,
      "learning_rate": 7.225344781007228e-06,
      "loss": 0.0209,
      "step": 862180
    },
    {
      "epoch": 1.411009210345437,
      "grad_norm": 1.409818172454834,
      "learning_rate": 7.225278888793712e-06,
      "loss": 0.0131,
      "step": 862200
    },
    {
      "epoch": 1.4110419407840904,
      "grad_norm": 1.0801750421524048,
      "learning_rate": 7.225212996580194e-06,
      "loss": 0.0173,
      "step": 862220
    },
    {
      "epoch": 1.4110746712227438,
      "grad_norm": 0.4380665719509125,
      "learning_rate": 7.2251471043666774e-06,
      "loss": 0.0202,
      "step": 862240
    },
    {
      "epoch": 1.4111074016613971,
      "grad_norm": 1.7658462524414062,
      "learning_rate": 7.22508121215316e-06,
      "loss": 0.0208,
      "step": 862260
    },
    {
      "epoch": 1.4111401321000505,
      "grad_norm": 0.35717105865478516,
      "learning_rate": 7.225015319939643e-06,
      "loss": 0.0273,
      "step": 862280
    },
    {
      "epoch": 1.4111728625387037,
      "grad_norm": 0.7792887091636658,
      "learning_rate": 7.2249494277261265e-06,
      "loss": 0.0241,
      "step": 862300
    },
    {
      "epoch": 1.411205592977357,
      "grad_norm": 0.19646167755126953,
      "learning_rate": 7.224883535512609e-06,
      "loss": 0.019,
      "step": 862320
    },
    {
      "epoch": 1.4112383234160104,
      "grad_norm": 0.37366586923599243,
      "learning_rate": 7.224817643299092e-06,
      "loss": 0.0185,
      "step": 862340
    },
    {
      "epoch": 1.4112710538546638,
      "grad_norm": 0.9726424217224121,
      "learning_rate": 7.224751751085575e-06,
      "loss": 0.0243,
      "step": 862360
    },
    {
      "epoch": 1.4113037842933172,
      "grad_norm": 0.1807432323694229,
      "learning_rate": 7.224685858872058e-06,
      "loss": 0.018,
      "step": 862380
    },
    {
      "epoch": 1.4113365147319703,
      "grad_norm": 0.6564372181892395,
      "learning_rate": 7.22461996665854e-06,
      "loss": 0.0256,
      "step": 862400
    },
    {
      "epoch": 1.411369245170624,
      "grad_norm": 0.5253700613975525,
      "learning_rate": 7.224554074445024e-06,
      "loss": 0.0236,
      "step": 862420
    },
    {
      "epoch": 1.411401975609277,
      "grad_norm": 1.230193018913269,
      "learning_rate": 7.224488182231506e-06,
      "loss": 0.0263,
      "step": 862440
    },
    {
      "epoch": 1.4114347060479304,
      "grad_norm": 1.0428131818771362,
      "learning_rate": 7.224422290017989e-06,
      "loss": 0.0246,
      "step": 862460
    },
    {
      "epoch": 1.4114674364865838,
      "grad_norm": 0.7600961923599243,
      "learning_rate": 7.224356397804472e-06,
      "loss": 0.0186,
      "step": 862480
    },
    {
      "epoch": 1.4115001669252372,
      "grad_norm": 0.4770453870296478,
      "learning_rate": 7.224290505590955e-06,
      "loss": 0.0235,
      "step": 862500
    },
    {
      "epoch": 1.4115328973638905,
      "grad_norm": 1.2729471921920776,
      "learning_rate": 7.2242246133774375e-06,
      "loss": 0.0202,
      "step": 862520
    },
    {
      "epoch": 1.4115656278025437,
      "grad_norm": 0.349965363740921,
      "learning_rate": 7.224158721163921e-06,
      "loss": 0.0245,
      "step": 862540
    },
    {
      "epoch": 1.4115983582411973,
      "grad_norm": 0.09364799410104752,
      "learning_rate": 7.224092828950403e-06,
      "loss": 0.022,
      "step": 862560
    },
    {
      "epoch": 1.4116310886798504,
      "grad_norm": 0.19248522818088531,
      "learning_rate": 7.2240269367368866e-06,
      "loss": 0.0202,
      "step": 862580
    },
    {
      "epoch": 1.4116638191185038,
      "grad_norm": 0.7830725312232971,
      "learning_rate": 7.2239610445233685e-06,
      "loss": 0.0246,
      "step": 862600
    },
    {
      "epoch": 1.4116965495571572,
      "grad_norm": 0.19441339373588562,
      "learning_rate": 7.223895152309852e-06,
      "loss": 0.0245,
      "step": 862620
    },
    {
      "epoch": 1.4117292799958105,
      "grad_norm": 0.5874086618423462,
      "learning_rate": 7.223829260096336e-06,
      "loss": 0.0142,
      "step": 862640
    },
    {
      "epoch": 1.411762010434464,
      "grad_norm": 0.7944519519805908,
      "learning_rate": 7.2237633678828175e-06,
      "loss": 0.017,
      "step": 862660
    },
    {
      "epoch": 1.411794740873117,
      "grad_norm": 0.2948187291622162,
      "learning_rate": 7.223697475669301e-06,
      "loss": 0.0233,
      "step": 862680
    },
    {
      "epoch": 1.4118274713117704,
      "grad_norm": 0.221467524766922,
      "learning_rate": 7.223631583455784e-06,
      "loss": 0.0291,
      "step": 862700
    },
    {
      "epoch": 1.4118602017504238,
      "grad_norm": 0.9064553380012512,
      "learning_rate": 7.223565691242267e-06,
      "loss": 0.0228,
      "step": 862720
    },
    {
      "epoch": 1.4118929321890772,
      "grad_norm": 0.5229913592338562,
      "learning_rate": 7.223499799028749e-06,
      "loss": 0.0192,
      "step": 862740
    },
    {
      "epoch": 1.4119256626277306,
      "grad_norm": 1.3334907293319702,
      "learning_rate": 7.223433906815233e-06,
      "loss": 0.0307,
      "step": 862760
    },
    {
      "epoch": 1.411958393066384,
      "grad_norm": 0.8972126841545105,
      "learning_rate": 7.223368014601715e-06,
      "loss": 0.0208,
      "step": 862780
    },
    {
      "epoch": 1.4119911235050373,
      "grad_norm": 1.403213620185852,
      "learning_rate": 7.223302122388198e-06,
      "loss": 0.0235,
      "step": 862800
    },
    {
      "epoch": 1.4120238539436905,
      "grad_norm": 0.4801289141178131,
      "learning_rate": 7.22323623017468e-06,
      "loss": 0.0306,
      "step": 862820
    },
    {
      "epoch": 1.4120565843823438,
      "grad_norm": 0.7046799063682556,
      "learning_rate": 7.223170337961164e-06,
      "loss": 0.0221,
      "step": 862840
    },
    {
      "epoch": 1.4120893148209972,
      "grad_norm": 0.745361864566803,
      "learning_rate": 7.223104445747647e-06,
      "loss": 0.0146,
      "step": 862860
    },
    {
      "epoch": 1.4121220452596506,
      "grad_norm": 0.5515584945678711,
      "learning_rate": 7.223038553534129e-06,
      "loss": 0.021,
      "step": 862880
    },
    {
      "epoch": 1.412154775698304,
      "grad_norm": 0.12872877717018127,
      "learning_rate": 7.222972661320612e-06,
      "loss": 0.0261,
      "step": 862900
    },
    {
      "epoch": 1.4121875061369573,
      "grad_norm": 0.1833285540342331,
      "learning_rate": 7.222906769107096e-06,
      "loss": 0.0249,
      "step": 862920
    },
    {
      "epoch": 1.4122202365756107,
      "grad_norm": 0.40525174140930176,
      "learning_rate": 7.2228408768935776e-06,
      "loss": 0.0172,
      "step": 862940
    },
    {
      "epoch": 1.4122529670142638,
      "grad_norm": 0.6737145185470581,
      "learning_rate": 7.222774984680061e-06,
      "loss": 0.0208,
      "step": 862960
    },
    {
      "epoch": 1.4122856974529172,
      "grad_norm": 0.5470903515815735,
      "learning_rate": 7.222709092466543e-06,
      "loss": 0.0202,
      "step": 862980
    },
    {
      "epoch": 1.4123184278915706,
      "grad_norm": 0.28289857506752014,
      "learning_rate": 7.222643200253027e-06,
      "loss": 0.0243,
      "step": 863000
    },
    {
      "epoch": 1.412351158330224,
      "grad_norm": 0.9782471060752869,
      "learning_rate": 7.22257730803951e-06,
      "loss": 0.0298,
      "step": 863020
    },
    {
      "epoch": 1.4123838887688773,
      "grad_norm": 2.2221693992614746,
      "learning_rate": 7.222511415825992e-06,
      "loss": 0.02,
      "step": 863040
    },
    {
      "epoch": 1.4124166192075307,
      "grad_norm": 0.24311362206935883,
      "learning_rate": 7.222445523612476e-06,
      "loss": 0.0241,
      "step": 863060
    },
    {
      "epoch": 1.412449349646184,
      "grad_norm": 0.6862553358078003,
      "learning_rate": 7.2223796313989584e-06,
      "loss": 0.0148,
      "step": 863080
    },
    {
      "epoch": 1.4124820800848372,
      "grad_norm": 0.4862392842769623,
      "learning_rate": 7.222313739185441e-06,
      "loss": 0.0236,
      "step": 863100
    },
    {
      "epoch": 1.4125148105234906,
      "grad_norm": 0.2932737469673157,
      "learning_rate": 7.222247846971924e-06,
      "loss": 0.022,
      "step": 863120
    },
    {
      "epoch": 1.412547540962144,
      "grad_norm": 0.5836616158485413,
      "learning_rate": 7.2221819547584075e-06,
      "loss": 0.0192,
      "step": 863140
    },
    {
      "epoch": 1.4125802714007973,
      "grad_norm": 0.3226384222507477,
      "learning_rate": 7.222116062544889e-06,
      "loss": 0.0246,
      "step": 863160
    },
    {
      "epoch": 1.4126130018394507,
      "grad_norm": 1.3873419761657715,
      "learning_rate": 7.222050170331373e-06,
      "loss": 0.0342,
      "step": 863180
    },
    {
      "epoch": 1.4126457322781039,
      "grad_norm": 0.15658076107501984,
      "learning_rate": 7.221984278117855e-06,
      "loss": 0.0126,
      "step": 863200
    },
    {
      "epoch": 1.4126784627167575,
      "grad_norm": 0.2420700192451477,
      "learning_rate": 7.2219183859043385e-06,
      "loss": 0.0258,
      "step": 863220
    },
    {
      "epoch": 1.4127111931554106,
      "grad_norm": 0.996315598487854,
      "learning_rate": 7.22185249369082e-06,
      "loss": 0.0243,
      "step": 863240
    },
    {
      "epoch": 1.412743923594064,
      "grad_norm": 0.6385508179664612,
      "learning_rate": 7.221786601477304e-06,
      "loss": 0.0262,
      "step": 863260
    },
    {
      "epoch": 1.4127766540327173,
      "grad_norm": 5.0278639793396,
      "learning_rate": 7.221720709263787e-06,
      "loss": 0.0178,
      "step": 863280
    },
    {
      "epoch": 1.4128093844713707,
      "grad_norm": 0.8122453093528748,
      "learning_rate": 7.2216548170502694e-06,
      "loss": 0.0285,
      "step": 863300
    },
    {
      "epoch": 1.412842114910024,
      "grad_norm": 1.0092898607254028,
      "learning_rate": 7.221588924836752e-06,
      "loss": 0.0254,
      "step": 863320
    },
    {
      "epoch": 1.4128748453486772,
      "grad_norm": 0.10910701006650925,
      "learning_rate": 7.221523032623236e-06,
      "loss": 0.0165,
      "step": 863340
    },
    {
      "epoch": 1.4129075757873308,
      "grad_norm": 0.9075883030891418,
      "learning_rate": 7.2214571404097185e-06,
      "loss": 0.0252,
      "step": 863360
    },
    {
      "epoch": 1.412940306225984,
      "grad_norm": 0.6885649561882019,
      "learning_rate": 7.221391248196201e-06,
      "loss": 0.0176,
      "step": 863380
    },
    {
      "epoch": 1.4129730366646374,
      "grad_norm": 1.3864370584487915,
      "learning_rate": 7.221325355982685e-06,
      "loss": 0.0209,
      "step": 863400
    },
    {
      "epoch": 1.4130057671032907,
      "grad_norm": 0.7515929937362671,
      "learning_rate": 7.221259463769167e-06,
      "loss": 0.0209,
      "step": 863420
    },
    {
      "epoch": 1.413038497541944,
      "grad_norm": 1.808809518814087,
      "learning_rate": 7.22119357155565e-06,
      "loss": 0.0315,
      "step": 863440
    },
    {
      "epoch": 1.4130712279805975,
      "grad_norm": 0.5876690149307251,
      "learning_rate": 7.221127679342132e-06,
      "loss": 0.0254,
      "step": 863460
    },
    {
      "epoch": 1.4131039584192506,
      "grad_norm": 0.7482458353042603,
      "learning_rate": 7.221061787128616e-06,
      "loss": 0.0227,
      "step": 863480
    },
    {
      "epoch": 1.413136688857904,
      "grad_norm": 0.6931045055389404,
      "learning_rate": 7.2209958949150985e-06,
      "loss": 0.0208,
      "step": 863500
    },
    {
      "epoch": 1.4131694192965574,
      "grad_norm": 0.20129632949829102,
      "learning_rate": 7.220930002701581e-06,
      "loss": 0.0192,
      "step": 863520
    },
    {
      "epoch": 1.4132021497352107,
      "grad_norm": 0.3224996328353882,
      "learning_rate": 7.220864110488064e-06,
      "loss": 0.0264,
      "step": 863540
    },
    {
      "epoch": 1.4132348801738641,
      "grad_norm": 0.7322936654090881,
      "learning_rate": 7.220798218274548e-06,
      "loss": 0.0195,
      "step": 863560
    },
    {
      "epoch": 1.4132676106125175,
      "grad_norm": 1.9278223514556885,
      "learning_rate": 7.2207323260610295e-06,
      "loss": 0.0264,
      "step": 863580
    },
    {
      "epoch": 1.4133003410511709,
      "grad_norm": 0.3453570008277893,
      "learning_rate": 7.220666433847513e-06,
      "loss": 0.0223,
      "step": 863600
    },
    {
      "epoch": 1.413333071489824,
      "grad_norm": 0.43508052825927734,
      "learning_rate": 7.220600541633995e-06,
      "loss": 0.0199,
      "step": 863620
    },
    {
      "epoch": 1.4133658019284774,
      "grad_norm": 0.6328908801078796,
      "learning_rate": 7.2205346494204785e-06,
      "loss": 0.0154,
      "step": 863640
    },
    {
      "epoch": 1.4133985323671308,
      "grad_norm": 0.8899574875831604,
      "learning_rate": 7.220468757206961e-06,
      "loss": 0.0141,
      "step": 863660
    },
    {
      "epoch": 1.4134312628057841,
      "grad_norm": 1.1944316625595093,
      "learning_rate": 7.220402864993444e-06,
      "loss": 0.0213,
      "step": 863680
    },
    {
      "epoch": 1.4134639932444375,
      "grad_norm": 1.416651725769043,
      "learning_rate": 7.220336972779928e-06,
      "loss": 0.0269,
      "step": 863700
    },
    {
      "epoch": 1.4134967236830909,
      "grad_norm": 1.3557840585708618,
      "learning_rate": 7.22027108056641e-06,
      "loss": 0.0177,
      "step": 863720
    },
    {
      "epoch": 1.4135294541217442,
      "grad_norm": 0.9736160039901733,
      "learning_rate": 7.220205188352893e-06,
      "loss": 0.0254,
      "step": 863740
    },
    {
      "epoch": 1.4135621845603974,
      "grad_norm": 0.5746895670890808,
      "learning_rate": 7.220139296139376e-06,
      "loss": 0.0158,
      "step": 863760
    },
    {
      "epoch": 1.4135949149990508,
      "grad_norm": 0.319439172744751,
      "learning_rate": 7.220073403925859e-06,
      "loss": 0.0212,
      "step": 863780
    },
    {
      "epoch": 1.4136276454377041,
      "grad_norm": 1.1155853271484375,
      "learning_rate": 7.220007511712341e-06,
      "loss": 0.028,
      "step": 863800
    },
    {
      "epoch": 1.4136603758763575,
      "grad_norm": 0.4341434836387634,
      "learning_rate": 7.219941619498825e-06,
      "loss": 0.025,
      "step": 863820
    },
    {
      "epoch": 1.4136931063150109,
      "grad_norm": 0.4549791216850281,
      "learning_rate": 7.219875727285307e-06,
      "loss": 0.0195,
      "step": 863840
    },
    {
      "epoch": 1.4137258367536643,
      "grad_norm": 1.434861660003662,
      "learning_rate": 7.21980983507179e-06,
      "loss": 0.0247,
      "step": 863860
    },
    {
      "epoch": 1.4137585671923176,
      "grad_norm": 0.25306081771850586,
      "learning_rate": 7.219743942858273e-06,
      "loss": 0.0124,
      "step": 863880
    },
    {
      "epoch": 1.4137912976309708,
      "grad_norm": 0.712215006351471,
      "learning_rate": 7.219678050644756e-06,
      "loss": 0.0223,
      "step": 863900
    },
    {
      "epoch": 1.4138240280696242,
      "grad_norm": 0.8544862270355225,
      "learning_rate": 7.219612158431239e-06,
      "loss": 0.0201,
      "step": 863920
    },
    {
      "epoch": 1.4138567585082775,
      "grad_norm": 1.4015576839447021,
      "learning_rate": 7.219546266217722e-06,
      "loss": 0.0232,
      "step": 863940
    },
    {
      "epoch": 1.413889488946931,
      "grad_norm": 0.7432736754417419,
      "learning_rate": 7.219480374004204e-06,
      "loss": 0.0282,
      "step": 863960
    },
    {
      "epoch": 1.4139222193855843,
      "grad_norm": 0.590325117111206,
      "learning_rate": 7.219414481790688e-06,
      "loss": 0.0147,
      "step": 863980
    },
    {
      "epoch": 1.4139549498242374,
      "grad_norm": 1.1255154609680176,
      "learning_rate": 7.2193485895771696e-06,
      "loss": 0.0159,
      "step": 864000
    },
    {
      "epoch": 1.413987680262891,
      "grad_norm": 0.556634783744812,
      "learning_rate": 7.219282697363653e-06,
      "loss": 0.0173,
      "step": 864020
    },
    {
      "epoch": 1.4140204107015442,
      "grad_norm": 0.31916823983192444,
      "learning_rate": 7.219216805150135e-06,
      "loss": 0.0234,
      "step": 864040
    },
    {
      "epoch": 1.4140531411401975,
      "grad_norm": 0.12820571660995483,
      "learning_rate": 7.219150912936619e-06,
      "loss": 0.0191,
      "step": 864060
    },
    {
      "epoch": 1.414085871578851,
      "grad_norm": 1.334067940711975,
      "learning_rate": 7.219085020723102e-06,
      "loss": 0.0278,
      "step": 864080
    },
    {
      "epoch": 1.4141186020175043,
      "grad_norm": 0.874296247959137,
      "learning_rate": 7.219019128509585e-06,
      "loss": 0.024,
      "step": 864100
    },
    {
      "epoch": 1.4141513324561577,
      "grad_norm": 0.31136956810951233,
      "learning_rate": 7.218953236296068e-06,
      "loss": 0.0196,
      "step": 864120
    },
    {
      "epoch": 1.4141840628948108,
      "grad_norm": 0.6079613566398621,
      "learning_rate": 7.2188873440825504e-06,
      "loss": 0.0174,
      "step": 864140
    },
    {
      "epoch": 1.4142167933334642,
      "grad_norm": 0.1354520171880722,
      "learning_rate": 7.218821451869034e-06,
      "loss": 0.0152,
      "step": 864160
    },
    {
      "epoch": 1.4142495237721175,
      "grad_norm": 0.7061748504638672,
      "learning_rate": 7.218755559655516e-06,
      "loss": 0.0244,
      "step": 864180
    },
    {
      "epoch": 1.414282254210771,
      "grad_norm": 0.7698947191238403,
      "learning_rate": 7.2186896674419995e-06,
      "loss": 0.0201,
      "step": 864200
    },
    {
      "epoch": 1.4143149846494243,
      "grad_norm": 0.341673344373703,
      "learning_rate": 7.218623775228481e-06,
      "loss": 0.0165,
      "step": 864220
    },
    {
      "epoch": 1.4143477150880777,
      "grad_norm": 0.5600044131278992,
      "learning_rate": 7.218557883014965e-06,
      "loss": 0.0184,
      "step": 864240
    },
    {
      "epoch": 1.414380445526731,
      "grad_norm": 0.26123642921447754,
      "learning_rate": 7.218491990801447e-06,
      "loss": 0.0256,
      "step": 864260
    },
    {
      "epoch": 1.4144131759653842,
      "grad_norm": 0.7702533006668091,
      "learning_rate": 7.2184260985879305e-06,
      "loss": 0.0174,
      "step": 864280
    },
    {
      "epoch": 1.4144459064040376,
      "grad_norm": 0.37294355034828186,
      "learning_rate": 7.218360206374413e-06,
      "loss": 0.0188,
      "step": 864300
    },
    {
      "epoch": 1.414478636842691,
      "grad_norm": 0.5018805861473083,
      "learning_rate": 7.218294314160896e-06,
      "loss": 0.0148,
      "step": 864320
    },
    {
      "epoch": 1.4145113672813443,
      "grad_norm": 0.4396476745605469,
      "learning_rate": 7.218228421947379e-06,
      "loss": 0.0138,
      "step": 864340
    },
    {
      "epoch": 1.4145440977199977,
      "grad_norm": 0.9594843983650208,
      "learning_rate": 7.218162529733862e-06,
      "loss": 0.0148,
      "step": 864360
    },
    {
      "epoch": 1.414576828158651,
      "grad_norm": 1.0317317247390747,
      "learning_rate": 7.218096637520344e-06,
      "loss": 0.0222,
      "step": 864380
    },
    {
      "epoch": 1.4146095585973044,
      "grad_norm": 0.5696991086006165,
      "learning_rate": 7.218030745306828e-06,
      "loss": 0.0171,
      "step": 864400
    },
    {
      "epoch": 1.4146422890359576,
      "grad_norm": 1.0982943773269653,
      "learning_rate": 7.217964853093311e-06,
      "loss": 0.0325,
      "step": 864420
    },
    {
      "epoch": 1.414675019474611,
      "grad_norm": 0.22173747420310974,
      "learning_rate": 7.217898960879793e-06,
      "loss": 0.0275,
      "step": 864440
    },
    {
      "epoch": 1.4147077499132643,
      "grad_norm": 0.4971136152744293,
      "learning_rate": 7.217833068666277e-06,
      "loss": 0.0199,
      "step": 864460
    },
    {
      "epoch": 1.4147404803519177,
      "grad_norm": 0.6460195183753967,
      "learning_rate": 7.217767176452759e-06,
      "loss": 0.0265,
      "step": 864480
    },
    {
      "epoch": 1.414773210790571,
      "grad_norm": 1.076706051826477,
      "learning_rate": 7.217701284239242e-06,
      "loss": 0.0205,
      "step": 864500
    },
    {
      "epoch": 1.4148059412292244,
      "grad_norm": 1.436112403869629,
      "learning_rate": 7.217635392025725e-06,
      "loss": 0.0184,
      "step": 864520
    },
    {
      "epoch": 1.4148386716678778,
      "grad_norm": 0.2933450937271118,
      "learning_rate": 7.217569499812208e-06,
      "loss": 0.0212,
      "step": 864540
    },
    {
      "epoch": 1.414871402106531,
      "grad_norm": 2.948394536972046,
      "learning_rate": 7.2175036075986905e-06,
      "loss": 0.0188,
      "step": 864560
    },
    {
      "epoch": 1.4149041325451843,
      "grad_norm": 0.4501153230667114,
      "learning_rate": 7.217437715385174e-06,
      "loss": 0.0103,
      "step": 864580
    },
    {
      "epoch": 1.4149368629838377,
      "grad_norm": 0.32475098967552185,
      "learning_rate": 7.217371823171656e-06,
      "loss": 0.0201,
      "step": 864600
    },
    {
      "epoch": 1.414969593422491,
      "grad_norm": 0.473511278629303,
      "learning_rate": 7.2173059309581396e-06,
      "loss": 0.0171,
      "step": 864620
    },
    {
      "epoch": 1.4150023238611444,
      "grad_norm": 1.1385605335235596,
      "learning_rate": 7.2172400387446215e-06,
      "loss": 0.0232,
      "step": 864640
    },
    {
      "epoch": 1.4150350542997976,
      "grad_norm": 0.34149691462516785,
      "learning_rate": 7.217174146531105e-06,
      "loss": 0.017,
      "step": 864660
    },
    {
      "epoch": 1.4150677847384512,
      "grad_norm": 1.7422677278518677,
      "learning_rate": 7.217108254317588e-06,
      "loss": 0.0244,
      "step": 864680
    },
    {
      "epoch": 1.4151005151771043,
      "grad_norm": 2.07132887840271,
      "learning_rate": 7.2170423621040705e-06,
      "loss": 0.0179,
      "step": 864700
    },
    {
      "epoch": 1.4151332456157577,
      "grad_norm": 0.1884249746799469,
      "learning_rate": 7.216976469890553e-06,
      "loss": 0.0238,
      "step": 864720
    },
    {
      "epoch": 1.415165976054411,
      "grad_norm": 0.7302924394607544,
      "learning_rate": 7.216910577677037e-06,
      "loss": 0.0164,
      "step": 864740
    },
    {
      "epoch": 1.4151987064930645,
      "grad_norm": 0.35264602303504944,
      "learning_rate": 7.21684468546352e-06,
      "loss": 0.023,
      "step": 864760
    },
    {
      "epoch": 1.4152314369317178,
      "grad_norm": 1.8146724700927734,
      "learning_rate": 7.216778793250002e-06,
      "loss": 0.023,
      "step": 864780
    },
    {
      "epoch": 1.415264167370371,
      "grad_norm": 0.9652600884437561,
      "learning_rate": 7.216712901036486e-06,
      "loss": 0.0217,
      "step": 864800
    },
    {
      "epoch": 1.4152968978090246,
      "grad_norm": 0.40355080366134644,
      "learning_rate": 7.216647008822968e-06,
      "loss": 0.0206,
      "step": 864820
    },
    {
      "epoch": 1.4153296282476777,
      "grad_norm": 0.4414884150028229,
      "learning_rate": 7.216581116609451e-06,
      "loss": 0.0204,
      "step": 864840
    },
    {
      "epoch": 1.415362358686331,
      "grad_norm": 0.8514121770858765,
      "learning_rate": 7.216515224395933e-06,
      "loss": 0.0327,
      "step": 864860
    },
    {
      "epoch": 1.4153950891249845,
      "grad_norm": 0.24388110637664795,
      "learning_rate": 7.216449332182417e-06,
      "loss": 0.0158,
      "step": 864880
    },
    {
      "epoch": 1.4154278195636378,
      "grad_norm": 0.5198116302490234,
      "learning_rate": 7.2163834399689e-06,
      "loss": 0.0201,
      "step": 864900
    },
    {
      "epoch": 1.4154605500022912,
      "grad_norm": 0.5208067297935486,
      "learning_rate": 7.216317547755382e-06,
      "loss": 0.0212,
      "step": 864920
    },
    {
      "epoch": 1.4154932804409444,
      "grad_norm": 0.26755252480506897,
      "learning_rate": 7.216251655541865e-06,
      "loss": 0.0234,
      "step": 864940
    },
    {
      "epoch": 1.4155260108795977,
      "grad_norm": 0.7854586839675903,
      "learning_rate": 7.216185763328349e-06,
      "loss": 0.0272,
      "step": 864960
    },
    {
      "epoch": 1.415558741318251,
      "grad_norm": 0.6798576712608337,
      "learning_rate": 7.216119871114831e-06,
      "loss": 0.024,
      "step": 864980
    },
    {
      "epoch": 1.4155914717569045,
      "grad_norm": 0.24434129893779755,
      "learning_rate": 7.216053978901314e-06,
      "loss": 0.0274,
      "step": 865000
    },
    {
      "epoch": 1.4156242021955578,
      "grad_norm": 0.20915400981903076,
      "learning_rate": 7.215988086687796e-06,
      "loss": 0.0237,
      "step": 865020
    },
    {
      "epoch": 1.4156569326342112,
      "grad_norm": 0.7086260318756104,
      "learning_rate": 7.21592219447428e-06,
      "loss": 0.0261,
      "step": 865040
    },
    {
      "epoch": 1.4156896630728646,
      "grad_norm": 0.3887875974178314,
      "learning_rate": 7.2158563022607615e-06,
      "loss": 0.0197,
      "step": 865060
    },
    {
      "epoch": 1.4157223935115177,
      "grad_norm": 0.39758557081222534,
      "learning_rate": 7.215790410047245e-06,
      "loss": 0.0191,
      "step": 865080
    },
    {
      "epoch": 1.4157551239501711,
      "grad_norm": 0.7669191360473633,
      "learning_rate": 7.215724517833728e-06,
      "loss": 0.0257,
      "step": 865100
    },
    {
      "epoch": 1.4157878543888245,
      "grad_norm": 0.2184215933084488,
      "learning_rate": 7.215658625620211e-06,
      "loss": 0.0171,
      "step": 865120
    },
    {
      "epoch": 1.4158205848274779,
      "grad_norm": 0.6928963661193848,
      "learning_rate": 7.215592733406694e-06,
      "loss": 0.0224,
      "step": 865140
    },
    {
      "epoch": 1.4158533152661312,
      "grad_norm": 1.350930094718933,
      "learning_rate": 7.215526841193177e-06,
      "loss": 0.0233,
      "step": 865160
    },
    {
      "epoch": 1.4158860457047846,
      "grad_norm": 0.8679925203323364,
      "learning_rate": 7.2154609489796605e-06,
      "loss": 0.0218,
      "step": 865180
    },
    {
      "epoch": 1.415918776143438,
      "grad_norm": 0.7405492067337036,
      "learning_rate": 7.215395056766142e-06,
      "loss": 0.0295,
      "step": 865200
    },
    {
      "epoch": 1.4159515065820911,
      "grad_norm": 0.07541221380233765,
      "learning_rate": 7.215329164552626e-06,
      "loss": 0.0134,
      "step": 865220
    },
    {
      "epoch": 1.4159842370207445,
      "grad_norm": 0.3562261164188385,
      "learning_rate": 7.215263272339108e-06,
      "loss": 0.0256,
      "step": 865240
    },
    {
      "epoch": 1.4160169674593979,
      "grad_norm": 0.6119776964187622,
      "learning_rate": 7.2151973801255915e-06,
      "loss": 0.0176,
      "step": 865260
    },
    {
      "epoch": 1.4160496978980512,
      "grad_norm": 2.588181734085083,
      "learning_rate": 7.215131487912073e-06,
      "loss": 0.0169,
      "step": 865280
    },
    {
      "epoch": 1.4160824283367046,
      "grad_norm": 0.6050660014152527,
      "learning_rate": 7.215065595698557e-06,
      "loss": 0.0177,
      "step": 865300
    },
    {
      "epoch": 1.416115158775358,
      "grad_norm": 0.41901418566703796,
      "learning_rate": 7.21499970348504e-06,
      "loss": 0.0195,
      "step": 865320
    },
    {
      "epoch": 1.4161478892140114,
      "grad_norm": 0.3730486035346985,
      "learning_rate": 7.2149338112715224e-06,
      "loss": 0.0259,
      "step": 865340
    },
    {
      "epoch": 1.4161806196526645,
      "grad_norm": 0.6971675753593445,
      "learning_rate": 7.214867919058005e-06,
      "loss": 0.0215,
      "step": 865360
    },
    {
      "epoch": 1.4162133500913179,
      "grad_norm": 0.7884566783905029,
      "learning_rate": 7.214802026844489e-06,
      "loss": 0.023,
      "step": 865380
    },
    {
      "epoch": 1.4162460805299713,
      "grad_norm": 1.8164622783660889,
      "learning_rate": 7.214736134630971e-06,
      "loss": 0.0237,
      "step": 865400
    },
    {
      "epoch": 1.4162788109686246,
      "grad_norm": 0.6185076236724854,
      "learning_rate": 7.214670242417454e-06,
      "loss": 0.0135,
      "step": 865420
    },
    {
      "epoch": 1.416311541407278,
      "grad_norm": 0.7357844114303589,
      "learning_rate": 7.214604350203936e-06,
      "loss": 0.0282,
      "step": 865440
    },
    {
      "epoch": 1.4163442718459311,
      "grad_norm": 0.8337306380271912,
      "learning_rate": 7.21453845799042e-06,
      "loss": 0.0247,
      "step": 865460
    },
    {
      "epoch": 1.4163770022845847,
      "grad_norm": 0.36634230613708496,
      "learning_rate": 7.214472565776903e-06,
      "loss": 0.0188,
      "step": 865480
    },
    {
      "epoch": 1.416409732723238,
      "grad_norm": 0.22352954745292664,
      "learning_rate": 7.214406673563385e-06,
      "loss": 0.0212,
      "step": 865500
    },
    {
      "epoch": 1.4164424631618913,
      "grad_norm": 1.2701647281646729,
      "learning_rate": 7.214340781349869e-06,
      "loss": 0.023,
      "step": 865520
    },
    {
      "epoch": 1.4164751936005446,
      "grad_norm": 0.7002941370010376,
      "learning_rate": 7.2142748891363515e-06,
      "loss": 0.0191,
      "step": 865540
    },
    {
      "epoch": 1.416507924039198,
      "grad_norm": 1.0987706184387207,
      "learning_rate": 7.214208996922834e-06,
      "loss": 0.0246,
      "step": 865560
    },
    {
      "epoch": 1.4165406544778514,
      "grad_norm": 0.527465283870697,
      "learning_rate": 7.214143104709317e-06,
      "loss": 0.0261,
      "step": 865580
    },
    {
      "epoch": 1.4165733849165045,
      "grad_norm": 1.1695386171340942,
      "learning_rate": 7.214077212495801e-06,
      "loss": 0.0217,
      "step": 865600
    },
    {
      "epoch": 1.4166061153551581,
      "grad_norm": 0.6805343627929688,
      "learning_rate": 7.2140113202822825e-06,
      "loss": 0.0286,
      "step": 865620
    },
    {
      "epoch": 1.4166388457938113,
      "grad_norm": 0.7890820503234863,
      "learning_rate": 7.213945428068766e-06,
      "loss": 0.0175,
      "step": 865640
    },
    {
      "epoch": 1.4166715762324646,
      "grad_norm": 1.014419436454773,
      "learning_rate": 7.213879535855248e-06,
      "loss": 0.0295,
      "step": 865660
    },
    {
      "epoch": 1.416704306671118,
      "grad_norm": 0.4743463397026062,
      "learning_rate": 7.2138136436417316e-06,
      "loss": 0.0219,
      "step": 865680
    },
    {
      "epoch": 1.4167370371097714,
      "grad_norm": 0.8534340262413025,
      "learning_rate": 7.213747751428214e-06,
      "loss": 0.0157,
      "step": 865700
    },
    {
      "epoch": 1.4167697675484248,
      "grad_norm": 0.5562946796417236,
      "learning_rate": 7.213681859214697e-06,
      "loss": 0.0172,
      "step": 865720
    },
    {
      "epoch": 1.416802497987078,
      "grad_norm": 1.2139384746551514,
      "learning_rate": 7.21361596700118e-06,
      "loss": 0.0179,
      "step": 865740
    },
    {
      "epoch": 1.4168352284257313,
      "grad_norm": 0.20191679894924164,
      "learning_rate": 7.213550074787663e-06,
      "loss": 0.027,
      "step": 865760
    },
    {
      "epoch": 1.4168679588643847,
      "grad_norm": 1.038118839263916,
      "learning_rate": 7.213484182574145e-06,
      "loss": 0.0235,
      "step": 865780
    },
    {
      "epoch": 1.416900689303038,
      "grad_norm": 0.6092615127563477,
      "learning_rate": 7.213418290360629e-06,
      "loss": 0.0201,
      "step": 865800
    },
    {
      "epoch": 1.4169334197416914,
      "grad_norm": 0.3656822443008423,
      "learning_rate": 7.213352398147112e-06,
      "loss": 0.0268,
      "step": 865820
    },
    {
      "epoch": 1.4169661501803448,
      "grad_norm": 0.3078777492046356,
      "learning_rate": 7.213286505933594e-06,
      "loss": 0.0185,
      "step": 865840
    },
    {
      "epoch": 1.4169988806189981,
      "grad_norm": 0.5190348029136658,
      "learning_rate": 7.213220613720078e-06,
      "loss": 0.0186,
      "step": 865860
    },
    {
      "epoch": 1.4170316110576513,
      "grad_norm": 0.21669147908687592,
      "learning_rate": 7.21315472150656e-06,
      "loss": 0.0251,
      "step": 865880
    },
    {
      "epoch": 1.4170643414963047,
      "grad_norm": 0.41718605160713196,
      "learning_rate": 7.213088829293043e-06,
      "loss": 0.0211,
      "step": 865900
    },
    {
      "epoch": 1.417097071934958,
      "grad_norm": 0.5770537257194519,
      "learning_rate": 7.213022937079526e-06,
      "loss": 0.0152,
      "step": 865920
    },
    {
      "epoch": 1.4171298023736114,
      "grad_norm": 1.0130491256713867,
      "learning_rate": 7.212957044866009e-06,
      "loss": 0.0355,
      "step": 865940
    },
    {
      "epoch": 1.4171625328122648,
      "grad_norm": 1.5567759275436401,
      "learning_rate": 7.212891152652492e-06,
      "loss": 0.0211,
      "step": 865960
    },
    {
      "epoch": 1.4171952632509182,
      "grad_norm": 0.3079318106174469,
      "learning_rate": 7.212825260438975e-06,
      "loss": 0.0178,
      "step": 865980
    },
    {
      "epoch": 1.4172279936895715,
      "grad_norm": 0.40337684750556946,
      "learning_rate": 7.212759368225457e-06,
      "loss": 0.0163,
      "step": 866000
    },
    {
      "epoch": 1.4172607241282247,
      "grad_norm": 0.13302883505821228,
      "learning_rate": 7.212693476011941e-06,
      "loss": 0.0198,
      "step": 866020
    },
    {
      "epoch": 1.417293454566878,
      "grad_norm": 0.6969755291938782,
      "learning_rate": 7.2126275837984226e-06,
      "loss": 0.0317,
      "step": 866040
    },
    {
      "epoch": 1.4173261850055314,
      "grad_norm": 1.655633568763733,
      "learning_rate": 7.212561691584906e-06,
      "loss": 0.0275,
      "step": 866060
    },
    {
      "epoch": 1.4173589154441848,
      "grad_norm": 0.45341840386390686,
      "learning_rate": 7.212495799371388e-06,
      "loss": 0.0131,
      "step": 866080
    },
    {
      "epoch": 1.4173916458828382,
      "grad_norm": 0.47022920846939087,
      "learning_rate": 7.212429907157872e-06,
      "loss": 0.0147,
      "step": 866100
    },
    {
      "epoch": 1.4174243763214915,
      "grad_norm": 0.7885674834251404,
      "learning_rate": 7.212364014944354e-06,
      "loss": 0.0227,
      "step": 866120
    },
    {
      "epoch": 1.417457106760145,
      "grad_norm": 0.9228293299674988,
      "learning_rate": 7.212298122730837e-06,
      "loss": 0.0175,
      "step": 866140
    },
    {
      "epoch": 1.417489837198798,
      "grad_norm": 0.18031921982765198,
      "learning_rate": 7.212232230517321e-06,
      "loss": 0.0262,
      "step": 866160
    },
    {
      "epoch": 1.4175225676374514,
      "grad_norm": 0.6922706961631775,
      "learning_rate": 7.2121663383038034e-06,
      "loss": 0.0188,
      "step": 866180
    },
    {
      "epoch": 1.4175552980761048,
      "grad_norm": 0.6313660740852356,
      "learning_rate": 7.212100446090286e-06,
      "loss": 0.0186,
      "step": 866200
    },
    {
      "epoch": 1.4175880285147582,
      "grad_norm": 0.12078950554132462,
      "learning_rate": 7.212034553876769e-06,
      "loss": 0.0133,
      "step": 866220
    },
    {
      "epoch": 1.4176207589534116,
      "grad_norm": 0.4147327244281769,
      "learning_rate": 7.2119686616632525e-06,
      "loss": 0.0134,
      "step": 866240
    },
    {
      "epoch": 1.4176534893920647,
      "grad_norm": 1.4707494974136353,
      "learning_rate": 7.211902769449734e-06,
      "loss": 0.0148,
      "step": 866260
    },
    {
      "epoch": 1.4176862198307183,
      "grad_norm": 0.25406670570373535,
      "learning_rate": 7.211836877236218e-06,
      "loss": 0.0228,
      "step": 866280
    },
    {
      "epoch": 1.4177189502693714,
      "grad_norm": 0.32583650946617126,
      "learning_rate": 7.2117709850227e-06,
      "loss": 0.0132,
      "step": 866300
    },
    {
      "epoch": 1.4177516807080248,
      "grad_norm": 0.5010133981704712,
      "learning_rate": 7.2117050928091835e-06,
      "loss": 0.022,
      "step": 866320
    },
    {
      "epoch": 1.4177844111466782,
      "grad_norm": 0.5276136994361877,
      "learning_rate": 7.211639200595666e-06,
      "loss": 0.0238,
      "step": 866340
    },
    {
      "epoch": 1.4178171415853316,
      "grad_norm": 0.7739366888999939,
      "learning_rate": 7.211573308382149e-06,
      "loss": 0.0244,
      "step": 866360
    },
    {
      "epoch": 1.417849872023985,
      "grad_norm": 0.6692671775817871,
      "learning_rate": 7.211507416168632e-06,
      "loss": 0.0207,
      "step": 866380
    },
    {
      "epoch": 1.417882602462638,
      "grad_norm": 0.4906737804412842,
      "learning_rate": 7.211441523955115e-06,
      "loss": 0.0259,
      "step": 866400
    },
    {
      "epoch": 1.4179153329012917,
      "grad_norm": 0.42801591753959656,
      "learning_rate": 7.211375631741597e-06,
      "loss": 0.0207,
      "step": 866420
    },
    {
      "epoch": 1.4179480633399448,
      "grad_norm": 0.8266478180885315,
      "learning_rate": 7.211309739528081e-06,
      "loss": 0.0199,
      "step": 866440
    },
    {
      "epoch": 1.4179807937785982,
      "grad_norm": 1.6923567056655884,
      "learning_rate": 7.211243847314563e-06,
      "loss": 0.0262,
      "step": 866460
    },
    {
      "epoch": 1.4180135242172516,
      "grad_norm": 0.8353274464607239,
      "learning_rate": 7.211177955101046e-06,
      "loss": 0.0275,
      "step": 866480
    },
    {
      "epoch": 1.418046254655905,
      "grad_norm": 0.22229255735874176,
      "learning_rate": 7.211112062887529e-06,
      "loss": 0.0141,
      "step": 866500
    },
    {
      "epoch": 1.4180789850945583,
      "grad_norm": 1.5248863697052002,
      "learning_rate": 7.211046170674012e-06,
      "loss": 0.0164,
      "step": 866520
    },
    {
      "epoch": 1.4181117155332115,
      "grad_norm": 1.3423585891723633,
      "learning_rate": 7.210980278460495e-06,
      "loss": 0.0316,
      "step": 866540
    },
    {
      "epoch": 1.4181444459718648,
      "grad_norm": 0.5032013654708862,
      "learning_rate": 7.210914386246978e-06,
      "loss": 0.0186,
      "step": 866560
    },
    {
      "epoch": 1.4181771764105182,
      "grad_norm": 0.24350064992904663,
      "learning_rate": 7.210848494033461e-06,
      "loss": 0.0301,
      "step": 866580
    },
    {
      "epoch": 1.4182099068491716,
      "grad_norm": 0.8885605931282043,
      "learning_rate": 7.2107826018199435e-06,
      "loss": 0.0234,
      "step": 866600
    },
    {
      "epoch": 1.418242637287825,
      "grad_norm": 0.6844704151153564,
      "learning_rate": 7.210716709606427e-06,
      "loss": 0.0187,
      "step": 866620
    },
    {
      "epoch": 1.4182753677264783,
      "grad_norm": 0.33185890316963196,
      "learning_rate": 7.210650817392909e-06,
      "loss": 0.0196,
      "step": 866640
    },
    {
      "epoch": 1.4183080981651317,
      "grad_norm": 0.10867021232843399,
      "learning_rate": 7.2105849251793926e-06,
      "loss": 0.0185,
      "step": 866660
    },
    {
      "epoch": 1.4183408286037849,
      "grad_norm": 0.6831822991371155,
      "learning_rate": 7.2105190329658745e-06,
      "loss": 0.0171,
      "step": 866680
    },
    {
      "epoch": 1.4183735590424382,
      "grad_norm": 0.8394185304641724,
      "learning_rate": 7.210453140752358e-06,
      "loss": 0.0285,
      "step": 866700
    },
    {
      "epoch": 1.4184062894810916,
      "grad_norm": 0.1925409734249115,
      "learning_rate": 7.210387248538841e-06,
      "loss": 0.0184,
      "step": 866720
    },
    {
      "epoch": 1.418439019919745,
      "grad_norm": 0.3678554892539978,
      "learning_rate": 7.2103213563253235e-06,
      "loss": 0.0153,
      "step": 866740
    },
    {
      "epoch": 1.4184717503583983,
      "grad_norm": 0.16525740921497345,
      "learning_rate": 7.210255464111806e-06,
      "loss": 0.0188,
      "step": 866760
    },
    {
      "epoch": 1.4185044807970517,
      "grad_norm": 2.0595757961273193,
      "learning_rate": 7.21018957189829e-06,
      "loss": 0.0155,
      "step": 866780
    },
    {
      "epoch": 1.418537211235705,
      "grad_norm": 0.5168477892875671,
      "learning_rate": 7.210123679684772e-06,
      "loss": 0.0233,
      "step": 866800
    },
    {
      "epoch": 1.4185699416743582,
      "grad_norm": 1.1861178874969482,
      "learning_rate": 7.210057787471255e-06,
      "loss": 0.0156,
      "step": 866820
    },
    {
      "epoch": 1.4186026721130116,
      "grad_norm": 0.35152173042297363,
      "learning_rate": 7.209991895257737e-06,
      "loss": 0.0277,
      "step": 866840
    },
    {
      "epoch": 1.418635402551665,
      "grad_norm": 0.29401180148124695,
      "learning_rate": 7.209926003044221e-06,
      "loss": 0.0121,
      "step": 866860
    },
    {
      "epoch": 1.4186681329903184,
      "grad_norm": 0.42817068099975586,
      "learning_rate": 7.209860110830704e-06,
      "loss": 0.0323,
      "step": 866880
    },
    {
      "epoch": 1.4187008634289717,
      "grad_norm": 0.04469427466392517,
      "learning_rate": 7.209794218617186e-06,
      "loss": 0.0251,
      "step": 866900
    },
    {
      "epoch": 1.4187335938676249,
      "grad_norm": 0.3211018741130829,
      "learning_rate": 7.20972832640367e-06,
      "loss": 0.0252,
      "step": 866920
    },
    {
      "epoch": 1.4187663243062785,
      "grad_norm": 0.5147321820259094,
      "learning_rate": 7.209662434190153e-06,
      "loss": 0.014,
      "step": 866940
    },
    {
      "epoch": 1.4187990547449316,
      "grad_norm": 0.3173635005950928,
      "learning_rate": 7.209596541976635e-06,
      "loss": 0.0254,
      "step": 866960
    },
    {
      "epoch": 1.418831785183585,
      "grad_norm": 0.7142117619514465,
      "learning_rate": 7.209530649763118e-06,
      "loss": 0.0255,
      "step": 866980
    },
    {
      "epoch": 1.4188645156222384,
      "grad_norm": 0.13247749209403992,
      "learning_rate": 7.209464757549602e-06,
      "loss": 0.014,
      "step": 867000
    },
    {
      "epoch": 1.4188972460608917,
      "grad_norm": 3.4864912033081055,
      "learning_rate": 7.209398865336084e-06,
      "loss": 0.0178,
      "step": 867020
    },
    {
      "epoch": 1.418929976499545,
      "grad_norm": 0.22518275678157806,
      "learning_rate": 7.209332973122567e-06,
      "loss": 0.0197,
      "step": 867040
    },
    {
      "epoch": 1.4189627069381983,
      "grad_norm": 0.3171520531177521,
      "learning_rate": 7.209267080909049e-06,
      "loss": 0.0204,
      "step": 867060
    },
    {
      "epoch": 1.4189954373768519,
      "grad_norm": 0.17461995780467987,
      "learning_rate": 7.209201188695533e-06,
      "loss": 0.0226,
      "step": 867080
    },
    {
      "epoch": 1.419028167815505,
      "grad_norm": 0.9840172529220581,
      "learning_rate": 7.2091352964820145e-06,
      "loss": 0.011,
      "step": 867100
    },
    {
      "epoch": 1.4190608982541584,
      "grad_norm": 0.9945418834686279,
      "learning_rate": 7.209069404268498e-06,
      "loss": 0.0242,
      "step": 867120
    },
    {
      "epoch": 1.4190936286928117,
      "grad_norm": 1.220900058746338,
      "learning_rate": 7.209003512054981e-06,
      "loss": 0.0223,
      "step": 867140
    },
    {
      "epoch": 1.4191263591314651,
      "grad_norm": 0.6164734959602356,
      "learning_rate": 7.208937619841464e-06,
      "loss": 0.0154,
      "step": 867160
    },
    {
      "epoch": 1.4191590895701185,
      "grad_norm": 1.1615358591079712,
      "learning_rate": 7.208871727627946e-06,
      "loss": 0.0173,
      "step": 867180
    },
    {
      "epoch": 1.4191918200087716,
      "grad_norm": 0.5729921460151672,
      "learning_rate": 7.20880583541443e-06,
      "loss": 0.0181,
      "step": 867200
    },
    {
      "epoch": 1.419224550447425,
      "grad_norm": 0.6250900626182556,
      "learning_rate": 7.208739943200913e-06,
      "loss": 0.0267,
      "step": 867220
    },
    {
      "epoch": 1.4192572808860784,
      "grad_norm": 0.24127262830734253,
      "learning_rate": 7.208674050987395e-06,
      "loss": 0.0166,
      "step": 867240
    },
    {
      "epoch": 1.4192900113247318,
      "grad_norm": 0.7040689587593079,
      "learning_rate": 7.208608158773879e-06,
      "loss": 0.0267,
      "step": 867260
    },
    {
      "epoch": 1.4193227417633851,
      "grad_norm": 0.638312578201294,
      "learning_rate": 7.208542266560361e-06,
      "loss": 0.0266,
      "step": 867280
    },
    {
      "epoch": 1.4193554722020385,
      "grad_norm": 0.4091527462005615,
      "learning_rate": 7.2084763743468445e-06,
      "loss": 0.0177,
      "step": 867300
    },
    {
      "epoch": 1.4193882026406919,
      "grad_norm": 2.261988639831543,
      "learning_rate": 7.208410482133326e-06,
      "loss": 0.0238,
      "step": 867320
    },
    {
      "epoch": 1.419420933079345,
      "grad_norm": 0.34571215510368347,
      "learning_rate": 7.20834458991981e-06,
      "loss": 0.0258,
      "step": 867340
    },
    {
      "epoch": 1.4194536635179984,
      "grad_norm": 0.2672632932662964,
      "learning_rate": 7.208278697706293e-06,
      "loss": 0.0184,
      "step": 867360
    },
    {
      "epoch": 1.4194863939566518,
      "grad_norm": 1.7844130992889404,
      "learning_rate": 7.2082128054927754e-06,
      "loss": 0.0236,
      "step": 867380
    },
    {
      "epoch": 1.4195191243953051,
      "grad_norm": 0.4461444616317749,
      "learning_rate": 7.208146913279258e-06,
      "loss": 0.0315,
      "step": 867400
    },
    {
      "epoch": 1.4195518548339585,
      "grad_norm": 0.3158249258995056,
      "learning_rate": 7.208081021065742e-06,
      "loss": 0.0182,
      "step": 867420
    },
    {
      "epoch": 1.4195845852726119,
      "grad_norm": 0.7514513731002808,
      "learning_rate": 7.208015128852224e-06,
      "loss": 0.0173,
      "step": 867440
    },
    {
      "epoch": 1.4196173157112653,
      "grad_norm": 0.5566047430038452,
      "learning_rate": 7.207949236638707e-06,
      "loss": 0.021,
      "step": 867460
    },
    {
      "epoch": 1.4196500461499184,
      "grad_norm": 0.9077735543251038,
      "learning_rate": 7.207883344425189e-06,
      "loss": 0.0171,
      "step": 867480
    },
    {
      "epoch": 1.4196827765885718,
      "grad_norm": 0.17971870303153992,
      "learning_rate": 7.207817452211673e-06,
      "loss": 0.0321,
      "step": 867500
    },
    {
      "epoch": 1.4197155070272252,
      "grad_norm": 0.46573320031166077,
      "learning_rate": 7.2077515599981555e-06,
      "loss": 0.0191,
      "step": 867520
    },
    {
      "epoch": 1.4197482374658785,
      "grad_norm": 0.8616671562194824,
      "learning_rate": 7.207685667784638e-06,
      "loss": 0.0272,
      "step": 867540
    },
    {
      "epoch": 1.419780967904532,
      "grad_norm": 0.3129357695579529,
      "learning_rate": 7.207619775571121e-06,
      "loss": 0.0235,
      "step": 867560
    },
    {
      "epoch": 1.4198136983431853,
      "grad_norm": 0.6025328040122986,
      "learning_rate": 7.2075538833576045e-06,
      "loss": 0.0242,
      "step": 867580
    },
    {
      "epoch": 1.4198464287818386,
      "grad_norm": 0.8276293277740479,
      "learning_rate": 7.207487991144087e-06,
      "loss": 0.0203,
      "step": 867600
    },
    {
      "epoch": 1.4198791592204918,
      "grad_norm": 0.20169007778167725,
      "learning_rate": 7.20742209893057e-06,
      "loss": 0.0284,
      "step": 867620
    },
    {
      "epoch": 1.4199118896591452,
      "grad_norm": 1.9071629047393799,
      "learning_rate": 7.207356206717054e-06,
      "loss": 0.0163,
      "step": 867640
    },
    {
      "epoch": 1.4199446200977985,
      "grad_norm": 0.3339399993419647,
      "learning_rate": 7.2072903145035355e-06,
      "loss": 0.0179,
      "step": 867660
    },
    {
      "epoch": 1.419977350536452,
      "grad_norm": 0.5728419423103333,
      "learning_rate": 7.207224422290019e-06,
      "loss": 0.0218,
      "step": 867680
    },
    {
      "epoch": 1.4200100809751053,
      "grad_norm": 0.2968834936618805,
      "learning_rate": 7.207158530076501e-06,
      "loss": 0.021,
      "step": 867700
    },
    {
      "epoch": 1.4200428114137584,
      "grad_norm": 1.320732593536377,
      "learning_rate": 7.2070926378629846e-06,
      "loss": 0.0215,
      "step": 867720
    },
    {
      "epoch": 1.420075541852412,
      "grad_norm": 1.4608471393585205,
      "learning_rate": 7.207026745649467e-06,
      "loss": 0.0265,
      "step": 867740
    },
    {
      "epoch": 1.4201082722910652,
      "grad_norm": 0.7507472634315491,
      "learning_rate": 7.20696085343595e-06,
      "loss": 0.0158,
      "step": 867760
    },
    {
      "epoch": 1.4201410027297185,
      "grad_norm": 1.459757924079895,
      "learning_rate": 7.206894961222433e-06,
      "loss": 0.0197,
      "step": 867780
    },
    {
      "epoch": 1.420173733168372,
      "grad_norm": 0.7170191407203674,
      "learning_rate": 7.206829069008916e-06,
      "loss": 0.0248,
      "step": 867800
    },
    {
      "epoch": 1.4202064636070253,
      "grad_norm": 1.0702868700027466,
      "learning_rate": 7.206763176795398e-06,
      "loss": 0.0254,
      "step": 867820
    },
    {
      "epoch": 1.4202391940456787,
      "grad_norm": 1.150086760520935,
      "learning_rate": 7.206697284581882e-06,
      "loss": 0.0215,
      "step": 867840
    },
    {
      "epoch": 1.4202719244843318,
      "grad_norm": 0.4460577964782715,
      "learning_rate": 7.206631392368364e-06,
      "loss": 0.0321,
      "step": 867860
    },
    {
      "epoch": 1.4203046549229854,
      "grad_norm": 0.5915404558181763,
      "learning_rate": 7.206565500154847e-06,
      "loss": 0.024,
      "step": 867880
    },
    {
      "epoch": 1.4203373853616386,
      "grad_norm": 0.2111995816230774,
      "learning_rate": 7.206499607941329e-06,
      "loss": 0.0104,
      "step": 867900
    },
    {
      "epoch": 1.420370115800292,
      "grad_norm": 1.4711371660232544,
      "learning_rate": 7.206433715727813e-06,
      "loss": 0.0193,
      "step": 867920
    },
    {
      "epoch": 1.4204028462389453,
      "grad_norm": 0.2844404876232147,
      "learning_rate": 7.206367823514296e-06,
      "loss": 0.022,
      "step": 867940
    },
    {
      "epoch": 1.4204355766775987,
      "grad_norm": 0.47931987047195435,
      "learning_rate": 7.206301931300779e-06,
      "loss": 0.0209,
      "step": 867960
    },
    {
      "epoch": 1.420468307116252,
      "grad_norm": 1.1134769916534424,
      "learning_rate": 7.206236039087262e-06,
      "loss": 0.0239,
      "step": 867980
    },
    {
      "epoch": 1.4205010375549052,
      "grad_norm": 0.5957753658294678,
      "learning_rate": 7.206170146873745e-06,
      "loss": 0.0238,
      "step": 868000
    },
    {
      "epoch": 1.4205337679935586,
      "grad_norm": 0.22990453243255615,
      "learning_rate": 7.206104254660228e-06,
      "loss": 0.0194,
      "step": 868020
    },
    {
      "epoch": 1.420566498432212,
      "grad_norm": 0.5868142247200012,
      "learning_rate": 7.20603836244671e-06,
      "loss": 0.0242,
      "step": 868040
    },
    {
      "epoch": 1.4205992288708653,
      "grad_norm": 0.3589516580104828,
      "learning_rate": 7.205972470233194e-06,
      "loss": 0.0308,
      "step": 868060
    },
    {
      "epoch": 1.4206319593095187,
      "grad_norm": 1.0158946514129639,
      "learning_rate": 7.2059065780196756e-06,
      "loss": 0.0355,
      "step": 868080
    },
    {
      "epoch": 1.420664689748172,
      "grad_norm": 0.4313373267650604,
      "learning_rate": 7.205840685806159e-06,
      "loss": 0.0223,
      "step": 868100
    },
    {
      "epoch": 1.4206974201868254,
      "grad_norm": 1.1098673343658447,
      "learning_rate": 7.205774793592641e-06,
      "loss": 0.0182,
      "step": 868120
    },
    {
      "epoch": 1.4207301506254786,
      "grad_norm": 0.9459810256958008,
      "learning_rate": 7.205708901379125e-06,
      "loss": 0.0137,
      "step": 868140
    },
    {
      "epoch": 1.420762881064132,
      "grad_norm": 0.3287385106086731,
      "learning_rate": 7.205643009165607e-06,
      "loss": 0.0273,
      "step": 868160
    },
    {
      "epoch": 1.4207956115027853,
      "grad_norm": 0.7101661562919617,
      "learning_rate": 7.20557711695209e-06,
      "loss": 0.0215,
      "step": 868180
    },
    {
      "epoch": 1.4208283419414387,
      "grad_norm": 0.23322951793670654,
      "learning_rate": 7.205511224738573e-06,
      "loss": 0.0345,
      "step": 868200
    },
    {
      "epoch": 1.420861072380092,
      "grad_norm": 0.2631772756576538,
      "learning_rate": 7.2054453325250564e-06,
      "loss": 0.0334,
      "step": 868220
    },
    {
      "epoch": 1.4208938028187454,
      "grad_norm": 0.21954071521759033,
      "learning_rate": 7.205379440311538e-06,
      "loss": 0.0349,
      "step": 868240
    },
    {
      "epoch": 1.4209265332573988,
      "grad_norm": 0.8788958787918091,
      "learning_rate": 7.205313548098022e-06,
      "loss": 0.0273,
      "step": 868260
    },
    {
      "epoch": 1.420959263696052,
      "grad_norm": 0.8529897332191467,
      "learning_rate": 7.2052476558845055e-06,
      "loss": 0.0215,
      "step": 868280
    },
    {
      "epoch": 1.4209919941347053,
      "grad_norm": 0.6784543395042419,
      "learning_rate": 7.205181763670987e-06,
      "loss": 0.0184,
      "step": 868300
    },
    {
      "epoch": 1.4210247245733587,
      "grad_norm": 1.7331029176712036,
      "learning_rate": 7.205115871457471e-06,
      "loss": 0.0308,
      "step": 868320
    },
    {
      "epoch": 1.421057455012012,
      "grad_norm": 0.5363167524337769,
      "learning_rate": 7.205049979243953e-06,
      "loss": 0.0109,
      "step": 868340
    },
    {
      "epoch": 1.4210901854506655,
      "grad_norm": 0.842283308506012,
      "learning_rate": 7.2049840870304365e-06,
      "loss": 0.0205,
      "step": 868360
    },
    {
      "epoch": 1.4211229158893188,
      "grad_norm": 0.26557984948158264,
      "learning_rate": 7.204918194816919e-06,
      "loss": 0.0248,
      "step": 868380
    },
    {
      "epoch": 1.4211556463279722,
      "grad_norm": 0.620924711227417,
      "learning_rate": 7.204852302603402e-06,
      "loss": 0.0232,
      "step": 868400
    },
    {
      "epoch": 1.4211883767666253,
      "grad_norm": 0.7947401404380798,
      "learning_rate": 7.204786410389885e-06,
      "loss": 0.0193,
      "step": 868420
    },
    {
      "epoch": 1.4212211072052787,
      "grad_norm": 0.797028660774231,
      "learning_rate": 7.204720518176368e-06,
      "loss": 0.023,
      "step": 868440
    },
    {
      "epoch": 1.421253837643932,
      "grad_norm": 0.6373239755630493,
      "learning_rate": 7.20465462596285e-06,
      "loss": 0.0154,
      "step": 868460
    },
    {
      "epoch": 1.4212865680825855,
      "grad_norm": 0.7167807221412659,
      "learning_rate": 7.204588733749334e-06,
      "loss": 0.0189,
      "step": 868480
    },
    {
      "epoch": 1.4213192985212388,
      "grad_norm": 0.3972783088684082,
      "learning_rate": 7.204522841535816e-06,
      "loss": 0.0179,
      "step": 868500
    },
    {
      "epoch": 1.421352028959892,
      "grad_norm": 0.5695053935050964,
      "learning_rate": 7.204456949322299e-06,
      "loss": 0.0194,
      "step": 868520
    },
    {
      "epoch": 1.4213847593985456,
      "grad_norm": 0.6590797305107117,
      "learning_rate": 7.204391057108782e-06,
      "loss": 0.0248,
      "step": 868540
    },
    {
      "epoch": 1.4214174898371987,
      "grad_norm": 0.41325753927230835,
      "learning_rate": 7.204325164895265e-06,
      "loss": 0.0251,
      "step": 868560
    },
    {
      "epoch": 1.421450220275852,
      "grad_norm": 0.5068621635437012,
      "learning_rate": 7.2042592726817474e-06,
      "loss": 0.0314,
      "step": 868580
    },
    {
      "epoch": 1.4214829507145055,
      "grad_norm": 0.5845687985420227,
      "learning_rate": 7.204193380468231e-06,
      "loss": 0.0196,
      "step": 868600
    },
    {
      "epoch": 1.4215156811531588,
      "grad_norm": 0.5102272629737854,
      "learning_rate": 7.204127488254713e-06,
      "loss": 0.0202,
      "step": 868620
    },
    {
      "epoch": 1.4215484115918122,
      "grad_norm": 0.28720223903656006,
      "learning_rate": 7.2040615960411965e-06,
      "loss": 0.0177,
      "step": 868640
    },
    {
      "epoch": 1.4215811420304654,
      "grad_norm": 1.6327238082885742,
      "learning_rate": 7.20399570382768e-06,
      "loss": 0.021,
      "step": 868660
    },
    {
      "epoch": 1.421613872469119,
      "grad_norm": 1.4070441722869873,
      "learning_rate": 7.203929811614162e-06,
      "loss": 0.0208,
      "step": 868680
    },
    {
      "epoch": 1.4216466029077721,
      "grad_norm": 0.20759420096874237,
      "learning_rate": 7.203863919400646e-06,
      "loss": 0.0155,
      "step": 868700
    },
    {
      "epoch": 1.4216793333464255,
      "grad_norm": 0.5843908190727234,
      "learning_rate": 7.2037980271871275e-06,
      "loss": 0.0223,
      "step": 868720
    },
    {
      "epoch": 1.4217120637850789,
      "grad_norm": 0.9090152978897095,
      "learning_rate": 7.203732134973611e-06,
      "loss": 0.022,
      "step": 868740
    },
    {
      "epoch": 1.4217447942237322,
      "grad_norm": 0.45439058542251587,
      "learning_rate": 7.203666242760094e-06,
      "loss": 0.0202,
      "step": 868760
    },
    {
      "epoch": 1.4217775246623856,
      "grad_norm": 1.545609712600708,
      "learning_rate": 7.2036003505465765e-06,
      "loss": 0.0139,
      "step": 868780
    },
    {
      "epoch": 1.4218102551010388,
      "grad_norm": 0.312950998544693,
      "learning_rate": 7.203534458333059e-06,
      "loss": 0.0233,
      "step": 868800
    },
    {
      "epoch": 1.4218429855396921,
      "grad_norm": 0.43275728821754456,
      "learning_rate": 7.203468566119543e-06,
      "loss": 0.0231,
      "step": 868820
    },
    {
      "epoch": 1.4218757159783455,
      "grad_norm": 1.1555639505386353,
      "learning_rate": 7.203402673906025e-06,
      "loss": 0.0323,
      "step": 868840
    },
    {
      "epoch": 1.4219084464169989,
      "grad_norm": 0.5906726717948914,
      "learning_rate": 7.203336781692508e-06,
      "loss": 0.0166,
      "step": 868860
    },
    {
      "epoch": 1.4219411768556522,
      "grad_norm": 0.9167233109474182,
      "learning_rate": 7.20327088947899e-06,
      "loss": 0.0289,
      "step": 868880
    },
    {
      "epoch": 1.4219739072943056,
      "grad_norm": 0.8736656904220581,
      "learning_rate": 7.203204997265474e-06,
      "loss": 0.0339,
      "step": 868900
    },
    {
      "epoch": 1.422006637732959,
      "grad_norm": 0.9597242474555969,
      "learning_rate": 7.203139105051956e-06,
      "loss": 0.0223,
      "step": 868920
    },
    {
      "epoch": 1.4220393681716121,
      "grad_norm": 0.42160725593566895,
      "learning_rate": 7.203073212838439e-06,
      "loss": 0.015,
      "step": 868940
    },
    {
      "epoch": 1.4220720986102655,
      "grad_norm": 0.2783082127571106,
      "learning_rate": 7.203007320624922e-06,
      "loss": 0.0205,
      "step": 868960
    },
    {
      "epoch": 1.4221048290489189,
      "grad_norm": 1.066310167312622,
      "learning_rate": 7.202941428411405e-06,
      "loss": 0.0131,
      "step": 868980
    },
    {
      "epoch": 1.4221375594875723,
      "grad_norm": 1.1763625144958496,
      "learning_rate": 7.202875536197888e-06,
      "loss": 0.0264,
      "step": 869000
    },
    {
      "epoch": 1.4221702899262256,
      "grad_norm": 0.28836533427238464,
      "learning_rate": 7.202809643984371e-06,
      "loss": 0.0163,
      "step": 869020
    },
    {
      "epoch": 1.422203020364879,
      "grad_norm": 0.6006504893302917,
      "learning_rate": 7.202743751770855e-06,
      "loss": 0.0233,
      "step": 869040
    },
    {
      "epoch": 1.4222357508035324,
      "grad_norm": 0.6027357578277588,
      "learning_rate": 7.202677859557337e-06,
      "loss": 0.0232,
      "step": 869060
    },
    {
      "epoch": 1.4222684812421855,
      "grad_norm": 1.8621704578399658,
      "learning_rate": 7.20261196734382e-06,
      "loss": 0.0289,
      "step": 869080
    },
    {
      "epoch": 1.422301211680839,
      "grad_norm": 0.7247929573059082,
      "learning_rate": 7.202546075130302e-06,
      "loss": 0.0279,
      "step": 869100
    },
    {
      "epoch": 1.4223339421194923,
      "grad_norm": 0.7050201892852783,
      "learning_rate": 7.202480182916786e-06,
      "loss": 0.023,
      "step": 869120
    },
    {
      "epoch": 1.4223666725581456,
      "grad_norm": 1.3300946950912476,
      "learning_rate": 7.2024142907032675e-06,
      "loss": 0.019,
      "step": 869140
    },
    {
      "epoch": 1.422399402996799,
      "grad_norm": 0.47105246782302856,
      "learning_rate": 7.202348398489751e-06,
      "loss": 0.0173,
      "step": 869160
    },
    {
      "epoch": 1.4224321334354524,
      "grad_norm": 0.13366608321666718,
      "learning_rate": 7.202282506276234e-06,
      "loss": 0.0204,
      "step": 869180
    },
    {
      "epoch": 1.4224648638741058,
      "grad_norm": 0.6715402007102966,
      "learning_rate": 7.202216614062717e-06,
      "loss": 0.0155,
      "step": 869200
    },
    {
      "epoch": 1.422497594312759,
      "grad_norm": 0.9810070395469666,
      "learning_rate": 7.202150721849199e-06,
      "loss": 0.0203,
      "step": 869220
    },
    {
      "epoch": 1.4225303247514123,
      "grad_norm": 0.4870564043521881,
      "learning_rate": 7.202084829635683e-06,
      "loss": 0.0176,
      "step": 869240
    },
    {
      "epoch": 1.4225630551900657,
      "grad_norm": 0.6336378455162048,
      "learning_rate": 7.202018937422165e-06,
      "loss": 0.0175,
      "step": 869260
    },
    {
      "epoch": 1.422595785628719,
      "grad_norm": 1.08025062084198,
      "learning_rate": 7.201953045208648e-06,
      "loss": 0.0284,
      "step": 869280
    },
    {
      "epoch": 1.4226285160673724,
      "grad_norm": 0.3967653512954712,
      "learning_rate": 7.20188715299513e-06,
      "loss": 0.0249,
      "step": 869300
    },
    {
      "epoch": 1.4226612465060255,
      "grad_norm": 0.4670254588127136,
      "learning_rate": 7.201821260781614e-06,
      "loss": 0.0246,
      "step": 869320
    },
    {
      "epoch": 1.4226939769446791,
      "grad_norm": 0.46606945991516113,
      "learning_rate": 7.2017553685680975e-06,
      "loss": 0.0145,
      "step": 869340
    },
    {
      "epoch": 1.4227267073833323,
      "grad_norm": 1.5336641073226929,
      "learning_rate": 7.201689476354579e-06,
      "loss": 0.0134,
      "step": 869360
    },
    {
      "epoch": 1.4227594378219857,
      "grad_norm": 0.5300754904747009,
      "learning_rate": 7.201623584141063e-06,
      "loss": 0.0235,
      "step": 869380
    },
    {
      "epoch": 1.422792168260639,
      "grad_norm": 1.2800989151000977,
      "learning_rate": 7.201557691927546e-06,
      "loss": 0.0262,
      "step": 869400
    },
    {
      "epoch": 1.4228248986992924,
      "grad_norm": 0.15868769586086273,
      "learning_rate": 7.2014917997140284e-06,
      "loss": 0.0212,
      "step": 869420
    },
    {
      "epoch": 1.4228576291379458,
      "grad_norm": 0.1299489438533783,
      "learning_rate": 7.201425907500511e-06,
      "loss": 0.0117,
      "step": 869440
    },
    {
      "epoch": 1.422890359576599,
      "grad_norm": 1.685347557067871,
      "learning_rate": 7.201360015286995e-06,
      "loss": 0.0169,
      "step": 869460
    },
    {
      "epoch": 1.4229230900152525,
      "grad_norm": 0.4132727086544037,
      "learning_rate": 7.201294123073477e-06,
      "loss": 0.0227,
      "step": 869480
    },
    {
      "epoch": 1.4229558204539057,
      "grad_norm": 2.751452684402466,
      "learning_rate": 7.20122823085996e-06,
      "loss": 0.0354,
      "step": 869500
    },
    {
      "epoch": 1.422988550892559,
      "grad_norm": 0.5777648687362671,
      "learning_rate": 7.201162338646442e-06,
      "loss": 0.016,
      "step": 869520
    },
    {
      "epoch": 1.4230212813312124,
      "grad_norm": 0.5344004034996033,
      "learning_rate": 7.201096446432926e-06,
      "loss": 0.0162,
      "step": 869540
    },
    {
      "epoch": 1.4230540117698658,
      "grad_norm": 0.8844312429428101,
      "learning_rate": 7.2010305542194085e-06,
      "loss": 0.0196,
      "step": 869560
    },
    {
      "epoch": 1.4230867422085192,
      "grad_norm": 0.3908447027206421,
      "learning_rate": 7.200964662005891e-06,
      "loss": 0.0166,
      "step": 869580
    },
    {
      "epoch": 1.4231194726471723,
      "grad_norm": 0.13051922619342804,
      "learning_rate": 7.200898769792374e-06,
      "loss": 0.0196,
      "step": 869600
    },
    {
      "epoch": 1.4231522030858257,
      "grad_norm": 0.7253208160400391,
      "learning_rate": 7.2008328775788575e-06,
      "loss": 0.0181,
      "step": 869620
    },
    {
      "epoch": 1.423184933524479,
      "grad_norm": 0.31408119201660156,
      "learning_rate": 7.2007669853653394e-06,
      "loss": 0.0203,
      "step": 869640
    },
    {
      "epoch": 1.4232176639631324,
      "grad_norm": 1.053099274635315,
      "learning_rate": 7.200701093151823e-06,
      "loss": 0.0223,
      "step": 869660
    },
    {
      "epoch": 1.4232503944017858,
      "grad_norm": 0.28657278418540955,
      "learning_rate": 7.200635200938307e-06,
      "loss": 0.0205,
      "step": 869680
    },
    {
      "epoch": 1.4232831248404392,
      "grad_norm": 0.6375036239624023,
      "learning_rate": 7.2005693087247885e-06,
      "loss": 0.0253,
      "step": 869700
    },
    {
      "epoch": 1.4233158552790925,
      "grad_norm": 0.2967270314693451,
      "learning_rate": 7.200503416511272e-06,
      "loss": 0.0241,
      "step": 869720
    },
    {
      "epoch": 1.4233485857177457,
      "grad_norm": 0.9137046337127686,
      "learning_rate": 7.200437524297754e-06,
      "loss": 0.023,
      "step": 869740
    },
    {
      "epoch": 1.423381316156399,
      "grad_norm": 0.6107833385467529,
      "learning_rate": 7.2003716320842376e-06,
      "loss": 0.0197,
      "step": 869760
    },
    {
      "epoch": 1.4234140465950524,
      "grad_norm": 0.262651652097702,
      "learning_rate": 7.20030573987072e-06,
      "loss": 0.0148,
      "step": 869780
    },
    {
      "epoch": 1.4234467770337058,
      "grad_norm": 0.3768506944179535,
      "learning_rate": 7.200239847657203e-06,
      "loss": 0.0248,
      "step": 869800
    },
    {
      "epoch": 1.4234795074723592,
      "grad_norm": 0.2958733141422272,
      "learning_rate": 7.200173955443686e-06,
      "loss": 0.0148,
      "step": 869820
    },
    {
      "epoch": 1.4235122379110126,
      "grad_norm": 0.19470839202404022,
      "learning_rate": 7.200108063230169e-06,
      "loss": 0.0349,
      "step": 869840
    },
    {
      "epoch": 1.423544968349666,
      "grad_norm": 0.19784647226333618,
      "learning_rate": 7.200042171016651e-06,
      "loss": 0.0188,
      "step": 869860
    },
    {
      "epoch": 1.423577698788319,
      "grad_norm": 0.6623871922492981,
      "learning_rate": 7.199976278803135e-06,
      "loss": 0.022,
      "step": 869880
    },
    {
      "epoch": 1.4236104292269725,
      "grad_norm": 0.500877857208252,
      "learning_rate": 7.199910386589617e-06,
      "loss": 0.0271,
      "step": 869900
    },
    {
      "epoch": 1.4236431596656258,
      "grad_norm": 0.2776079773902893,
      "learning_rate": 7.1998444943761e-06,
      "loss": 0.0202,
      "step": 869920
    },
    {
      "epoch": 1.4236758901042792,
      "grad_norm": 0.48620378971099854,
      "learning_rate": 7.199778602162582e-06,
      "loss": 0.0201,
      "step": 869940
    },
    {
      "epoch": 1.4237086205429326,
      "grad_norm": 0.49632731080055237,
      "learning_rate": 7.199712709949066e-06,
      "loss": 0.013,
      "step": 869960
    },
    {
      "epoch": 1.4237413509815857,
      "grad_norm": 0.4127599895000458,
      "learning_rate": 7.1996468177355485e-06,
      "loss": 0.0127,
      "step": 869980
    },
    {
      "epoch": 1.4237740814202393,
      "grad_norm": 0.5081522464752197,
      "learning_rate": 7.199580925522031e-06,
      "loss": 0.0225,
      "step": 870000
    },
    {
      "epoch": 1.4238068118588925,
      "grad_norm": 1.482199788093567,
      "learning_rate": 7.199515033308514e-06,
      "loss": 0.0292,
      "step": 870020
    },
    {
      "epoch": 1.4238395422975458,
      "grad_norm": 0.510826051235199,
      "learning_rate": 7.199449141094998e-06,
      "loss": 0.0252,
      "step": 870040
    },
    {
      "epoch": 1.4238722727361992,
      "grad_norm": 0.4602597653865814,
      "learning_rate": 7.19938324888148e-06,
      "loss": 0.02,
      "step": 870060
    },
    {
      "epoch": 1.4239050031748526,
      "grad_norm": 0.3546803593635559,
      "learning_rate": 7.199317356667963e-06,
      "loss": 0.0174,
      "step": 870080
    },
    {
      "epoch": 1.423937733613506,
      "grad_norm": 0.5842480063438416,
      "learning_rate": 7.199251464454447e-06,
      "loss": 0.0236,
      "step": 870100
    },
    {
      "epoch": 1.423970464052159,
      "grad_norm": 0.18924961984157562,
      "learning_rate": 7.1991855722409286e-06,
      "loss": 0.0143,
      "step": 870120
    },
    {
      "epoch": 1.4240031944908127,
      "grad_norm": 0.6890571117401123,
      "learning_rate": 7.199119680027412e-06,
      "loss": 0.0317,
      "step": 870140
    },
    {
      "epoch": 1.4240359249294658,
      "grad_norm": 0.9861090779304504,
      "learning_rate": 7.199053787813894e-06,
      "loss": 0.0198,
      "step": 870160
    },
    {
      "epoch": 1.4240686553681192,
      "grad_norm": 0.8947609663009644,
      "learning_rate": 7.198987895600378e-06,
      "loss": 0.0189,
      "step": 870180
    },
    {
      "epoch": 1.4241013858067726,
      "grad_norm": 0.5303437113761902,
      "learning_rate": 7.19892200338686e-06,
      "loss": 0.017,
      "step": 870200
    },
    {
      "epoch": 1.424134116245426,
      "grad_norm": 0.5249884724617004,
      "learning_rate": 7.198856111173343e-06,
      "loss": 0.0231,
      "step": 870220
    },
    {
      "epoch": 1.4241668466840793,
      "grad_norm": 0.5232961773872375,
      "learning_rate": 7.198790218959826e-06,
      "loss": 0.0183,
      "step": 870240
    },
    {
      "epoch": 1.4241995771227325,
      "grad_norm": 1.0443792343139648,
      "learning_rate": 7.1987243267463094e-06,
      "loss": 0.0343,
      "step": 870260
    },
    {
      "epoch": 1.4242323075613859,
      "grad_norm": 0.33784738183021545,
      "learning_rate": 7.198658434532791e-06,
      "loss": 0.0283,
      "step": 870280
    },
    {
      "epoch": 1.4242650380000392,
      "grad_norm": 0.7898728251457214,
      "learning_rate": 7.198592542319275e-06,
      "loss": 0.0142,
      "step": 870300
    },
    {
      "epoch": 1.4242977684386926,
      "grad_norm": 1.0692214965820312,
      "learning_rate": 7.198526650105757e-06,
      "loss": 0.0204,
      "step": 870320
    },
    {
      "epoch": 1.424330498877346,
      "grad_norm": 0.5851780772209167,
      "learning_rate": 7.19846075789224e-06,
      "loss": 0.0254,
      "step": 870340
    },
    {
      "epoch": 1.4243632293159993,
      "grad_norm": 2.9792962074279785,
      "learning_rate": 7.198394865678723e-06,
      "loss": 0.0175,
      "step": 870360
    },
    {
      "epoch": 1.4243959597546527,
      "grad_norm": 0.9702926874160767,
      "learning_rate": 7.198328973465206e-06,
      "loss": 0.0249,
      "step": 870380
    },
    {
      "epoch": 1.4244286901933059,
      "grad_norm": 0.6067415475845337,
      "learning_rate": 7.1982630812516895e-06,
      "loss": 0.0247,
      "step": 870400
    },
    {
      "epoch": 1.4244614206319592,
      "grad_norm": 3.8785486221313477,
      "learning_rate": 7.198197189038172e-06,
      "loss": 0.0301,
      "step": 870420
    },
    {
      "epoch": 1.4244941510706126,
      "grad_norm": 1.0742634534835815,
      "learning_rate": 7.198131296824655e-06,
      "loss": 0.0245,
      "step": 870440
    },
    {
      "epoch": 1.424526881509266,
      "grad_norm": 0.49230480194091797,
      "learning_rate": 7.198065404611138e-06,
      "loss": 0.0167,
      "step": 870460
    },
    {
      "epoch": 1.4245596119479194,
      "grad_norm": 0.6305007338523865,
      "learning_rate": 7.197999512397621e-06,
      "loss": 0.0231,
      "step": 870480
    },
    {
      "epoch": 1.4245923423865727,
      "grad_norm": 0.8987867832183838,
      "learning_rate": 7.197933620184103e-06,
      "loss": 0.0175,
      "step": 870500
    },
    {
      "epoch": 1.424625072825226,
      "grad_norm": 0.3006470799446106,
      "learning_rate": 7.197867727970587e-06,
      "loss": 0.0234,
      "step": 870520
    },
    {
      "epoch": 1.4246578032638793,
      "grad_norm": 1.1092662811279297,
      "learning_rate": 7.197801835757069e-06,
      "loss": 0.0368,
      "step": 870540
    },
    {
      "epoch": 1.4246905337025326,
      "grad_norm": 0.8913745284080505,
      "learning_rate": 7.197735943543552e-06,
      "loss": 0.0121,
      "step": 870560
    },
    {
      "epoch": 1.424723264141186,
      "grad_norm": 0.390875905752182,
      "learning_rate": 7.197670051330035e-06,
      "loss": 0.0233,
      "step": 870580
    },
    {
      "epoch": 1.4247559945798394,
      "grad_norm": 0.39899760484695435,
      "learning_rate": 7.197604159116518e-06,
      "loss": 0.0165,
      "step": 870600
    },
    {
      "epoch": 1.4247887250184927,
      "grad_norm": 0.23483118414878845,
      "learning_rate": 7.1975382669030005e-06,
      "loss": 0.0253,
      "step": 870620
    },
    {
      "epoch": 1.4248214554571461,
      "grad_norm": 0.12561722099781036,
      "learning_rate": 7.197472374689484e-06,
      "loss": 0.0169,
      "step": 870640
    },
    {
      "epoch": 1.4248541858957995,
      "grad_norm": 0.6867972612380981,
      "learning_rate": 7.197406482475966e-06,
      "loss": 0.0219,
      "step": 870660
    },
    {
      "epoch": 1.4248869163344526,
      "grad_norm": 0.828475296497345,
      "learning_rate": 7.1973405902624495e-06,
      "loss": 0.0228,
      "step": 870680
    },
    {
      "epoch": 1.424919646773106,
      "grad_norm": 0.26320263743400574,
      "learning_rate": 7.197274698048931e-06,
      "loss": 0.0212,
      "step": 870700
    },
    {
      "epoch": 1.4249523772117594,
      "grad_norm": 0.64145827293396,
      "learning_rate": 7.197208805835415e-06,
      "loss": 0.0227,
      "step": 870720
    },
    {
      "epoch": 1.4249851076504128,
      "grad_norm": 0.2567365765571594,
      "learning_rate": 7.197142913621899e-06,
      "loss": 0.0167,
      "step": 870740
    },
    {
      "epoch": 1.4250178380890661,
      "grad_norm": 0.7677270770072937,
      "learning_rate": 7.1970770214083805e-06,
      "loss": 0.0198,
      "step": 870760
    },
    {
      "epoch": 1.4250505685277193,
      "grad_norm": 0.47649291157722473,
      "learning_rate": 7.197011129194864e-06,
      "loss": 0.0203,
      "step": 870780
    },
    {
      "epoch": 1.4250832989663729,
      "grad_norm": 0.8919200897216797,
      "learning_rate": 7.196945236981347e-06,
      "loss": 0.0201,
      "step": 870800
    },
    {
      "epoch": 1.425116029405026,
      "grad_norm": 0.7064396142959595,
      "learning_rate": 7.1968793447678295e-06,
      "loss": 0.025,
      "step": 870820
    },
    {
      "epoch": 1.4251487598436794,
      "grad_norm": 0.5633313655853271,
      "learning_rate": 7.196813452554312e-06,
      "loss": 0.0272,
      "step": 870840
    },
    {
      "epoch": 1.4251814902823328,
      "grad_norm": 0.21170802414417267,
      "learning_rate": 7.196747560340796e-06,
      "loss": 0.0187,
      "step": 870860
    },
    {
      "epoch": 1.4252142207209861,
      "grad_norm": 0.4339843690395355,
      "learning_rate": 7.196681668127278e-06,
      "loss": 0.015,
      "step": 870880
    },
    {
      "epoch": 1.4252469511596395,
      "grad_norm": 0.8190235495567322,
      "learning_rate": 7.196615775913761e-06,
      "loss": 0.0174,
      "step": 870900
    },
    {
      "epoch": 1.4252796815982927,
      "grad_norm": 0.19536620378494263,
      "learning_rate": 7.196549883700243e-06,
      "loss": 0.0231,
      "step": 870920
    },
    {
      "epoch": 1.4253124120369463,
      "grad_norm": 0.6259981989860535,
      "learning_rate": 7.196483991486727e-06,
      "loss": 0.0205,
      "step": 870940
    },
    {
      "epoch": 1.4253451424755994,
      "grad_norm": 0.46548980474472046,
      "learning_rate": 7.196418099273209e-06,
      "loss": 0.0201,
      "step": 870960
    },
    {
      "epoch": 1.4253778729142528,
      "grad_norm": 0.9920905232429504,
      "learning_rate": 7.196352207059692e-06,
      "loss": 0.0265,
      "step": 870980
    },
    {
      "epoch": 1.4254106033529061,
      "grad_norm": 1.0896267890930176,
      "learning_rate": 7.196286314846175e-06,
      "loss": 0.02,
      "step": 871000
    },
    {
      "epoch": 1.4254433337915595,
      "grad_norm": 0.4025422930717468,
      "learning_rate": 7.196220422632658e-06,
      "loss": 0.0263,
      "step": 871020
    },
    {
      "epoch": 1.425476064230213,
      "grad_norm": 0.21080295741558075,
      "learning_rate": 7.1961545304191405e-06,
      "loss": 0.0188,
      "step": 871040
    },
    {
      "epoch": 1.425508794668866,
      "grad_norm": 0.6448786854743958,
      "learning_rate": 7.196088638205624e-06,
      "loss": 0.0153,
      "step": 871060
    },
    {
      "epoch": 1.4255415251075194,
      "grad_norm": 0.36054643988609314,
      "learning_rate": 7.196022745992106e-06,
      "loss": 0.0164,
      "step": 871080
    },
    {
      "epoch": 1.4255742555461728,
      "grad_norm": 0.6776696443557739,
      "learning_rate": 7.19595685377859e-06,
      "loss": 0.0213,
      "step": 871100
    },
    {
      "epoch": 1.4256069859848262,
      "grad_norm": 0.6596691608428955,
      "learning_rate": 7.195890961565073e-06,
      "loss": 0.0256,
      "step": 871120
    },
    {
      "epoch": 1.4256397164234795,
      "grad_norm": 0.43579840660095215,
      "learning_rate": 7.195825069351555e-06,
      "loss": 0.0198,
      "step": 871140
    },
    {
      "epoch": 1.425672446862133,
      "grad_norm": 2.4510109424591064,
      "learning_rate": 7.195759177138039e-06,
      "loss": 0.0258,
      "step": 871160
    },
    {
      "epoch": 1.4257051773007863,
      "grad_norm": 0.36331504583358765,
      "learning_rate": 7.1956932849245206e-06,
      "loss": 0.0148,
      "step": 871180
    },
    {
      "epoch": 1.4257379077394394,
      "grad_norm": 0.3650091588497162,
      "learning_rate": 7.195627392711004e-06,
      "loss": 0.0182,
      "step": 871200
    },
    {
      "epoch": 1.4257706381780928,
      "grad_norm": 0.49639061093330383,
      "learning_rate": 7.195561500497487e-06,
      "loss": 0.0222,
      "step": 871220
    },
    {
      "epoch": 1.4258033686167462,
      "grad_norm": 1.3244024515151978,
      "learning_rate": 7.19549560828397e-06,
      "loss": 0.0197,
      "step": 871240
    },
    {
      "epoch": 1.4258360990553995,
      "grad_norm": 0.7349057197570801,
      "learning_rate": 7.195429716070452e-06,
      "loss": 0.0172,
      "step": 871260
    },
    {
      "epoch": 1.425868829494053,
      "grad_norm": 0.33856046199798584,
      "learning_rate": 7.195363823856936e-06,
      "loss": 0.0238,
      "step": 871280
    },
    {
      "epoch": 1.4259015599327063,
      "grad_norm": 0.9732303023338318,
      "learning_rate": 7.195297931643418e-06,
      "loss": 0.0258,
      "step": 871300
    },
    {
      "epoch": 1.4259342903713597,
      "grad_norm": 0.6367387771606445,
      "learning_rate": 7.1952320394299014e-06,
      "loss": 0.0173,
      "step": 871320
    },
    {
      "epoch": 1.4259670208100128,
      "grad_norm": 0.38781389594078064,
      "learning_rate": 7.195166147216383e-06,
      "loss": 0.0277,
      "step": 871340
    },
    {
      "epoch": 1.4259997512486662,
      "grad_norm": 0.4603254497051239,
      "learning_rate": 7.195100255002867e-06,
      "loss": 0.0258,
      "step": 871360
    },
    {
      "epoch": 1.4260324816873196,
      "grad_norm": 0.8425154089927673,
      "learning_rate": 7.19503436278935e-06,
      "loss": 0.0167,
      "step": 871380
    },
    {
      "epoch": 1.426065212125973,
      "grad_norm": 0.5252183079719543,
      "learning_rate": 7.194968470575832e-06,
      "loss": 0.0278,
      "step": 871400
    },
    {
      "epoch": 1.4260979425646263,
      "grad_norm": 0.9650688171386719,
      "learning_rate": 7.194902578362315e-06,
      "loss": 0.0177,
      "step": 871420
    },
    {
      "epoch": 1.4261306730032797,
      "grad_norm": 0.37884145975112915,
      "learning_rate": 7.194836686148799e-06,
      "loss": 0.0205,
      "step": 871440
    },
    {
      "epoch": 1.426163403441933,
      "grad_norm": 1.3249086141586304,
      "learning_rate": 7.1947707939352815e-06,
      "loss": 0.0256,
      "step": 871460
    },
    {
      "epoch": 1.4261961338805862,
      "grad_norm": 0.673256516456604,
      "learning_rate": 7.194704901721764e-06,
      "loss": 0.023,
      "step": 871480
    },
    {
      "epoch": 1.4262288643192396,
      "grad_norm": 0.2850404679775238,
      "learning_rate": 7.194639009508248e-06,
      "loss": 0.0258,
      "step": 871500
    },
    {
      "epoch": 1.426261594757893,
      "grad_norm": 2.664290189743042,
      "learning_rate": 7.19457311729473e-06,
      "loss": 0.0188,
      "step": 871520
    },
    {
      "epoch": 1.4262943251965463,
      "grad_norm": 1.2839386463165283,
      "learning_rate": 7.194507225081213e-06,
      "loss": 0.0159,
      "step": 871540
    },
    {
      "epoch": 1.4263270556351997,
      "grad_norm": 0.21992424130439758,
      "learning_rate": 7.194441332867695e-06,
      "loss": 0.0223,
      "step": 871560
    },
    {
      "epoch": 1.4263597860738528,
      "grad_norm": 0.7494658827781677,
      "learning_rate": 7.194375440654179e-06,
      "loss": 0.0236,
      "step": 871580
    },
    {
      "epoch": 1.4263925165125064,
      "grad_norm": 0.2866361439228058,
      "learning_rate": 7.1943095484406615e-06,
      "loss": 0.0178,
      "step": 871600
    },
    {
      "epoch": 1.4264252469511596,
      "grad_norm": 0.5832809805870056,
      "learning_rate": 7.194243656227144e-06,
      "loss": 0.0169,
      "step": 871620
    },
    {
      "epoch": 1.426457977389813,
      "grad_norm": 0.4830411970615387,
      "learning_rate": 7.194177764013627e-06,
      "loss": 0.0156,
      "step": 871640
    },
    {
      "epoch": 1.4264907078284663,
      "grad_norm": 0.6520386338233948,
      "learning_rate": 7.1941118718001105e-06,
      "loss": 0.0193,
      "step": 871660
    },
    {
      "epoch": 1.4265234382671197,
      "grad_norm": 0.2713220417499542,
      "learning_rate": 7.1940459795865924e-06,
      "loss": 0.0257,
      "step": 871680
    },
    {
      "epoch": 1.426556168705773,
      "grad_norm": 0.3352615535259247,
      "learning_rate": 7.193980087373076e-06,
      "loss": 0.021,
      "step": 871700
    },
    {
      "epoch": 1.4265888991444262,
      "grad_norm": 0.6343449354171753,
      "learning_rate": 7.193914195159558e-06,
      "loss": 0.0199,
      "step": 871720
    },
    {
      "epoch": 1.4266216295830798,
      "grad_norm": 1.9528645277023315,
      "learning_rate": 7.1938483029460415e-06,
      "loss": 0.0333,
      "step": 871740
    },
    {
      "epoch": 1.426654360021733,
      "grad_norm": 0.5568417310714722,
      "learning_rate": 7.193782410732523e-06,
      "loss": 0.0207,
      "step": 871760
    },
    {
      "epoch": 1.4266870904603863,
      "grad_norm": 0.297212690114975,
      "learning_rate": 7.193716518519007e-06,
      "loss": 0.0252,
      "step": 871780
    },
    {
      "epoch": 1.4267198208990397,
      "grad_norm": 0.3375557065010071,
      "learning_rate": 7.1936506263054906e-06,
      "loss": 0.0143,
      "step": 871800
    },
    {
      "epoch": 1.426752551337693,
      "grad_norm": 0.49761518836021423,
      "learning_rate": 7.193584734091973e-06,
      "loss": 0.0242,
      "step": 871820
    },
    {
      "epoch": 1.4267852817763464,
      "grad_norm": 0.5373551249504089,
      "learning_rate": 7.193518841878456e-06,
      "loss": 0.0123,
      "step": 871840
    },
    {
      "epoch": 1.4268180122149996,
      "grad_norm": 0.4783809781074524,
      "learning_rate": 7.193452949664939e-06,
      "loss": 0.0229,
      "step": 871860
    },
    {
      "epoch": 1.426850742653653,
      "grad_norm": 0.5689253807067871,
      "learning_rate": 7.193387057451422e-06,
      "loss": 0.018,
      "step": 871880
    },
    {
      "epoch": 1.4268834730923063,
      "grad_norm": 0.49437007308006287,
      "learning_rate": 7.193321165237904e-06,
      "loss": 0.0174,
      "step": 871900
    },
    {
      "epoch": 1.4269162035309597,
      "grad_norm": 0.6270624399185181,
      "learning_rate": 7.193255273024388e-06,
      "loss": 0.0256,
      "step": 871920
    },
    {
      "epoch": 1.426948933969613,
      "grad_norm": 0.3172188997268677,
      "learning_rate": 7.19318938081087e-06,
      "loss": 0.0259,
      "step": 871940
    },
    {
      "epoch": 1.4269816644082665,
      "grad_norm": 0.07476745545864105,
      "learning_rate": 7.193123488597353e-06,
      "loss": 0.0146,
      "step": 871960
    },
    {
      "epoch": 1.4270143948469198,
      "grad_norm": 0.8194502592086792,
      "learning_rate": 7.193057596383835e-06,
      "loss": 0.0241,
      "step": 871980
    },
    {
      "epoch": 1.427047125285573,
      "grad_norm": 0.9061049222946167,
      "learning_rate": 7.192991704170319e-06,
      "loss": 0.0151,
      "step": 872000
    },
    {
      "epoch": 1.4270798557242264,
      "grad_norm": 0.3902752995491028,
      "learning_rate": 7.1929258119568016e-06,
      "loss": 0.0211,
      "step": 872020
    },
    {
      "epoch": 1.4271125861628797,
      "grad_norm": 2.8995256423950195,
      "learning_rate": 7.192859919743284e-06,
      "loss": 0.0216,
      "step": 872040
    },
    {
      "epoch": 1.427145316601533,
      "grad_norm": 0.4327363669872284,
      "learning_rate": 7.192794027529767e-06,
      "loss": 0.0276,
      "step": 872060
    },
    {
      "epoch": 1.4271780470401865,
      "grad_norm": 0.4139847457408905,
      "learning_rate": 7.192728135316251e-06,
      "loss": 0.0176,
      "step": 872080
    },
    {
      "epoch": 1.4272107774788398,
      "grad_norm": 0.12183691561222076,
      "learning_rate": 7.1926622431027325e-06,
      "loss": 0.0214,
      "step": 872100
    },
    {
      "epoch": 1.4272435079174932,
      "grad_norm": 0.2818039655685425,
      "learning_rate": 7.192596350889216e-06,
      "loss": 0.021,
      "step": 872120
    },
    {
      "epoch": 1.4272762383561464,
      "grad_norm": 0.2857421040534973,
      "learning_rate": 7.1925304586757e-06,
      "loss": 0.0187,
      "step": 872140
    },
    {
      "epoch": 1.4273089687947997,
      "grad_norm": 0.28598836064338684,
      "learning_rate": 7.192464566462182e-06,
      "loss": 0.0238,
      "step": 872160
    },
    {
      "epoch": 1.427341699233453,
      "grad_norm": 0.6148156523704529,
      "learning_rate": 7.192398674248665e-06,
      "loss": 0.016,
      "step": 872180
    },
    {
      "epoch": 1.4273744296721065,
      "grad_norm": 0.7171977162361145,
      "learning_rate": 7.192332782035147e-06,
      "loss": 0.0211,
      "step": 872200
    },
    {
      "epoch": 1.4274071601107599,
      "grad_norm": 0.3794078230857849,
      "learning_rate": 7.192266889821631e-06,
      "loss": 0.0181,
      "step": 872220
    },
    {
      "epoch": 1.4274398905494132,
      "grad_norm": 0.5413720607757568,
      "learning_rate": 7.192200997608113e-06,
      "loss": 0.0224,
      "step": 872240
    },
    {
      "epoch": 1.4274726209880666,
      "grad_norm": 0.46587011218070984,
      "learning_rate": 7.192135105394596e-06,
      "loss": 0.0202,
      "step": 872260
    },
    {
      "epoch": 1.4275053514267197,
      "grad_norm": 0.18531173467636108,
      "learning_rate": 7.192069213181079e-06,
      "loss": 0.0294,
      "step": 872280
    },
    {
      "epoch": 1.4275380818653731,
      "grad_norm": 0.507689893245697,
      "learning_rate": 7.1920033209675625e-06,
      "loss": 0.0213,
      "step": 872300
    },
    {
      "epoch": 1.4275708123040265,
      "grad_norm": 0.28731289505958557,
      "learning_rate": 7.191937428754044e-06,
      "loss": 0.023,
      "step": 872320
    },
    {
      "epoch": 1.4276035427426799,
      "grad_norm": 0.7487871050834656,
      "learning_rate": 7.191871536540528e-06,
      "loss": 0.0163,
      "step": 872340
    },
    {
      "epoch": 1.4276362731813332,
      "grad_norm": 0.6855387091636658,
      "learning_rate": 7.19180564432701e-06,
      "loss": 0.0194,
      "step": 872360
    },
    {
      "epoch": 1.4276690036199864,
      "grad_norm": 1.3734500408172607,
      "learning_rate": 7.191739752113493e-06,
      "loss": 0.0262,
      "step": 872380
    },
    {
      "epoch": 1.42770173405864,
      "grad_norm": 0.8465811610221863,
      "learning_rate": 7.191673859899976e-06,
      "loss": 0.0238,
      "step": 872400
    },
    {
      "epoch": 1.4277344644972931,
      "grad_norm": 0.20162251591682434,
      "learning_rate": 7.191607967686459e-06,
      "loss": 0.0156,
      "step": 872420
    },
    {
      "epoch": 1.4277671949359465,
      "grad_norm": 1.1939109563827515,
      "learning_rate": 7.191542075472942e-06,
      "loss": 0.0226,
      "step": 872440
    },
    {
      "epoch": 1.4277999253745999,
      "grad_norm": 0.10998854041099548,
      "learning_rate": 7.191476183259425e-06,
      "loss": 0.0124,
      "step": 872460
    },
    {
      "epoch": 1.4278326558132532,
      "grad_norm": 1.1069101095199585,
      "learning_rate": 7.191410291045907e-06,
      "loss": 0.0193,
      "step": 872480
    },
    {
      "epoch": 1.4278653862519066,
      "grad_norm": 0.3569431006908417,
      "learning_rate": 7.191344398832391e-06,
      "loss": 0.0208,
      "step": 872500
    },
    {
      "epoch": 1.4278981166905598,
      "grad_norm": 0.053739096969366074,
      "learning_rate": 7.191278506618874e-06,
      "loss": 0.0234,
      "step": 872520
    },
    {
      "epoch": 1.4279308471292131,
      "grad_norm": 0.5647882223129272,
      "learning_rate": 7.191212614405356e-06,
      "loss": 0.0231,
      "step": 872540
    },
    {
      "epoch": 1.4279635775678665,
      "grad_norm": 0.39174535870552063,
      "learning_rate": 7.19114672219184e-06,
      "loss": 0.0164,
      "step": 872560
    },
    {
      "epoch": 1.4279963080065199,
      "grad_norm": 0.5661447048187256,
      "learning_rate": 7.191080829978322e-06,
      "loss": 0.0166,
      "step": 872580
    },
    {
      "epoch": 1.4280290384451733,
      "grad_norm": 1.1565362215042114,
      "learning_rate": 7.191014937764805e-06,
      "loss": 0.0329,
      "step": 872600
    },
    {
      "epoch": 1.4280617688838266,
      "grad_norm": 1.0520439147949219,
      "learning_rate": 7.190949045551288e-06,
      "loss": 0.0191,
      "step": 872620
    },
    {
      "epoch": 1.42809449932248,
      "grad_norm": 0.47683751583099365,
      "learning_rate": 7.190883153337771e-06,
      "loss": 0.0251,
      "step": 872640
    },
    {
      "epoch": 1.4281272297611332,
      "grad_norm": 0.6814225316047668,
      "learning_rate": 7.1908172611242535e-06,
      "loss": 0.0284,
      "step": 872660
    },
    {
      "epoch": 1.4281599601997865,
      "grad_norm": 1.33805251121521,
      "learning_rate": 7.190751368910737e-06,
      "loss": 0.0199,
      "step": 872680
    },
    {
      "epoch": 1.42819269063844,
      "grad_norm": 0.48651084303855896,
      "learning_rate": 7.190685476697219e-06,
      "loss": 0.0187,
      "step": 872700
    },
    {
      "epoch": 1.4282254210770933,
      "grad_norm": 1.181288480758667,
      "learning_rate": 7.1906195844837025e-06,
      "loss": 0.0163,
      "step": 872720
    },
    {
      "epoch": 1.4282581515157466,
      "grad_norm": 1.8952518701553345,
      "learning_rate": 7.190553692270184e-06,
      "loss": 0.0293,
      "step": 872740
    },
    {
      "epoch": 1.4282908819544,
      "grad_norm": 0.4420165717601776,
      "learning_rate": 7.190487800056668e-06,
      "loss": 0.0347,
      "step": 872760
    },
    {
      "epoch": 1.4283236123930534,
      "grad_norm": 0.6677222847938538,
      "learning_rate": 7.19042190784315e-06,
      "loss": 0.0162,
      "step": 872780
    },
    {
      "epoch": 1.4283563428317065,
      "grad_norm": 0.800816535949707,
      "learning_rate": 7.1903560156296335e-06,
      "loss": 0.0215,
      "step": 872800
    },
    {
      "epoch": 1.42838907327036,
      "grad_norm": 1.967423915863037,
      "learning_rate": 7.190290123416116e-06,
      "loss": 0.0238,
      "step": 872820
    },
    {
      "epoch": 1.4284218037090133,
      "grad_norm": 0.4382791817188263,
      "learning_rate": 7.190224231202599e-06,
      "loss": 0.0276,
      "step": 872840
    },
    {
      "epoch": 1.4284545341476667,
      "grad_norm": 1.0953447818756104,
      "learning_rate": 7.1901583389890826e-06,
      "loss": 0.0249,
      "step": 872860
    },
    {
      "epoch": 1.42848726458632,
      "grad_norm": 1.4152295589447021,
      "learning_rate": 7.190092446775565e-06,
      "loss": 0.0291,
      "step": 872880
    },
    {
      "epoch": 1.4285199950249734,
      "grad_norm": 0.9135904908180237,
      "learning_rate": 7.190026554562048e-06,
      "loss": 0.022,
      "step": 872900
    },
    {
      "epoch": 1.4285527254636268,
      "grad_norm": 0.25430452823638916,
      "learning_rate": 7.189960662348531e-06,
      "loss": 0.0196,
      "step": 872920
    },
    {
      "epoch": 1.42858545590228,
      "grad_norm": 0.7478695511817932,
      "learning_rate": 7.189894770135014e-06,
      "loss": 0.0177,
      "step": 872940
    },
    {
      "epoch": 1.4286181863409333,
      "grad_norm": 0.07594456523656845,
      "learning_rate": 7.189828877921496e-06,
      "loss": 0.0184,
      "step": 872960
    },
    {
      "epoch": 1.4286509167795867,
      "grad_norm": 0.718021035194397,
      "learning_rate": 7.18976298570798e-06,
      "loss": 0.0149,
      "step": 872980
    },
    {
      "epoch": 1.42868364721824,
      "grad_norm": 0.9048278331756592,
      "learning_rate": 7.189697093494462e-06,
      "loss": 0.0321,
      "step": 873000
    },
    {
      "epoch": 1.4287163776568934,
      "grad_norm": 0.10204482823610306,
      "learning_rate": 7.189631201280945e-06,
      "loss": 0.0196,
      "step": 873020
    },
    {
      "epoch": 1.4287491080955466,
      "grad_norm": 0.46618613600730896,
      "learning_rate": 7.189565309067428e-06,
      "loss": 0.0254,
      "step": 873040
    },
    {
      "epoch": 1.4287818385342002,
      "grad_norm": 1.1893963813781738,
      "learning_rate": 7.189499416853911e-06,
      "loss": 0.0312,
      "step": 873060
    },
    {
      "epoch": 1.4288145689728533,
      "grad_norm": 1.5175135135650635,
      "learning_rate": 7.1894335246403935e-06,
      "loss": 0.02,
      "step": 873080
    },
    {
      "epoch": 1.4288472994115067,
      "grad_norm": 0.3227449357509613,
      "learning_rate": 7.189367632426877e-06,
      "loss": 0.0148,
      "step": 873100
    },
    {
      "epoch": 1.42888002985016,
      "grad_norm": 0.6512042284011841,
      "learning_rate": 7.189301740213359e-06,
      "loss": 0.0182,
      "step": 873120
    },
    {
      "epoch": 1.4289127602888134,
      "grad_norm": 0.46402302384376526,
      "learning_rate": 7.189235847999843e-06,
      "loss": 0.0158,
      "step": 873140
    },
    {
      "epoch": 1.4289454907274668,
      "grad_norm": 0.3362526297569275,
      "learning_rate": 7.1891699557863245e-06,
      "loss": 0.0207,
      "step": 873160
    },
    {
      "epoch": 1.42897822116612,
      "grad_norm": 0.6685454845428467,
      "learning_rate": 7.189104063572808e-06,
      "loss": 0.0221,
      "step": 873180
    },
    {
      "epoch": 1.4290109516047735,
      "grad_norm": 0.2436911165714264,
      "learning_rate": 7.189038171359292e-06,
      "loss": 0.0131,
      "step": 873200
    },
    {
      "epoch": 1.4290436820434267,
      "grad_norm": 0.769198477268219,
      "learning_rate": 7.1889722791457736e-06,
      "loss": 0.0261,
      "step": 873220
    },
    {
      "epoch": 1.42907641248208,
      "grad_norm": 0.44708144664764404,
      "learning_rate": 7.188906386932257e-06,
      "loss": 0.0148,
      "step": 873240
    },
    {
      "epoch": 1.4291091429207334,
      "grad_norm": 0.6294484734535217,
      "learning_rate": 7.18884049471874e-06,
      "loss": 0.0196,
      "step": 873260
    },
    {
      "epoch": 1.4291418733593868,
      "grad_norm": 0.22166785597801208,
      "learning_rate": 7.188774602505223e-06,
      "loss": 0.0236,
      "step": 873280
    },
    {
      "epoch": 1.4291746037980402,
      "grad_norm": 0.37296944856643677,
      "learning_rate": 7.188708710291705e-06,
      "loss": 0.0178,
      "step": 873300
    },
    {
      "epoch": 1.4292073342366933,
      "grad_norm": 0.8277587890625,
      "learning_rate": 7.188642818078189e-06,
      "loss": 0.022,
      "step": 873320
    },
    {
      "epoch": 1.4292400646753467,
      "grad_norm": 0.8720577955245972,
      "learning_rate": 7.188576925864671e-06,
      "loss": 0.0237,
      "step": 873340
    },
    {
      "epoch": 1.429272795114,
      "grad_norm": 1.3831605911254883,
      "learning_rate": 7.1885110336511544e-06,
      "loss": 0.0275,
      "step": 873360
    },
    {
      "epoch": 1.4293055255526534,
      "grad_norm": 0.8612465858459473,
      "learning_rate": 7.188445141437636e-06,
      "loss": 0.022,
      "step": 873380
    },
    {
      "epoch": 1.4293382559913068,
      "grad_norm": 0.1169586181640625,
      "learning_rate": 7.18837924922412e-06,
      "loss": 0.0091,
      "step": 873400
    },
    {
      "epoch": 1.4293709864299602,
      "grad_norm": 1.0298182964324951,
      "learning_rate": 7.188313357010603e-06,
      "loss": 0.0169,
      "step": 873420
    },
    {
      "epoch": 1.4294037168686136,
      "grad_norm": 0.5626975297927856,
      "learning_rate": 7.188247464797085e-06,
      "loss": 0.0228,
      "step": 873440
    },
    {
      "epoch": 1.4294364473072667,
      "grad_norm": 0.7406190633773804,
      "learning_rate": 7.188181572583568e-06,
      "loss": 0.0148,
      "step": 873460
    },
    {
      "epoch": 1.42946917774592,
      "grad_norm": 1.96488356590271,
      "learning_rate": 7.188115680370052e-06,
      "loss": 0.025,
      "step": 873480
    },
    {
      "epoch": 1.4295019081845735,
      "grad_norm": 1.580278992652893,
      "learning_rate": 7.188049788156534e-06,
      "loss": 0.027,
      "step": 873500
    },
    {
      "epoch": 1.4295346386232268,
      "grad_norm": 0.8981837034225464,
      "learning_rate": 7.187983895943017e-06,
      "loss": 0.0243,
      "step": 873520
    },
    {
      "epoch": 1.4295673690618802,
      "grad_norm": 0.17080943286418915,
      "learning_rate": 7.187918003729499e-06,
      "loss": 0.0209,
      "step": 873540
    },
    {
      "epoch": 1.4296000995005336,
      "grad_norm": 0.40762847661972046,
      "learning_rate": 7.187852111515983e-06,
      "loss": 0.0182,
      "step": 873560
    },
    {
      "epoch": 1.429632829939187,
      "grad_norm": 0.7673206925392151,
      "learning_rate": 7.187786219302466e-06,
      "loss": 0.016,
      "step": 873580
    },
    {
      "epoch": 1.42966556037784,
      "grad_norm": 1.5940459966659546,
      "learning_rate": 7.187720327088948e-06,
      "loss": 0.0264,
      "step": 873600
    },
    {
      "epoch": 1.4296982908164935,
      "grad_norm": 0.3466287851333618,
      "learning_rate": 7.187654434875432e-06,
      "loss": 0.0259,
      "step": 873620
    },
    {
      "epoch": 1.4297310212551468,
      "grad_norm": 0.9468321800231934,
      "learning_rate": 7.1875885426619145e-06,
      "loss": 0.0325,
      "step": 873640
    },
    {
      "epoch": 1.4297637516938002,
      "grad_norm": 0.598553478717804,
      "learning_rate": 7.187522650448397e-06,
      "loss": 0.0191,
      "step": 873660
    },
    {
      "epoch": 1.4297964821324536,
      "grad_norm": 0.46410873532295227,
      "learning_rate": 7.18745675823488e-06,
      "loss": 0.0185,
      "step": 873680
    },
    {
      "epoch": 1.429829212571107,
      "grad_norm": 0.35882869362831116,
      "learning_rate": 7.1873908660213636e-06,
      "loss": 0.0158,
      "step": 873700
    },
    {
      "epoch": 1.4298619430097603,
      "grad_norm": 0.3752128481864929,
      "learning_rate": 7.1873249738078454e-06,
      "loss": 0.0223,
      "step": 873720
    },
    {
      "epoch": 1.4298946734484135,
      "grad_norm": 1.1187084913253784,
      "learning_rate": 7.187259081594329e-06,
      "loss": 0.0271,
      "step": 873740
    },
    {
      "epoch": 1.4299274038870669,
      "grad_norm": 0.5387786626815796,
      "learning_rate": 7.187193189380811e-06,
      "loss": 0.0268,
      "step": 873760
    },
    {
      "epoch": 1.4299601343257202,
      "grad_norm": 0.6228942275047302,
      "learning_rate": 7.1871272971672945e-06,
      "loss": 0.0199,
      "step": 873780
    },
    {
      "epoch": 1.4299928647643736,
      "grad_norm": 1.2079118490219116,
      "learning_rate": 7.187061404953776e-06,
      "loss": 0.0182,
      "step": 873800
    },
    {
      "epoch": 1.430025595203027,
      "grad_norm": 0.8991407155990601,
      "learning_rate": 7.18699551274026e-06,
      "loss": 0.02,
      "step": 873820
    },
    {
      "epoch": 1.4300583256416801,
      "grad_norm": 0.6490646004676819,
      "learning_rate": 7.186929620526743e-06,
      "loss": 0.0235,
      "step": 873840
    },
    {
      "epoch": 1.4300910560803337,
      "grad_norm": 0.5599774718284607,
      "learning_rate": 7.1868637283132255e-06,
      "loss": 0.0177,
      "step": 873860
    },
    {
      "epoch": 1.4301237865189869,
      "grad_norm": 0.9184210300445557,
      "learning_rate": 7.186797836099708e-06,
      "loss": 0.0198,
      "step": 873880
    },
    {
      "epoch": 1.4301565169576402,
      "grad_norm": 0.3334193229675293,
      "learning_rate": 7.186731943886192e-06,
      "loss": 0.0239,
      "step": 873900
    },
    {
      "epoch": 1.4301892473962936,
      "grad_norm": 0.44854992628097534,
      "learning_rate": 7.1866660516726745e-06,
      "loss": 0.0249,
      "step": 873920
    },
    {
      "epoch": 1.430221977834947,
      "grad_norm": 0.650467038154602,
      "learning_rate": 7.186600159459157e-06,
      "loss": 0.0203,
      "step": 873940
    },
    {
      "epoch": 1.4302547082736004,
      "grad_norm": 0.8227440714836121,
      "learning_rate": 7.186534267245641e-06,
      "loss": 0.0259,
      "step": 873960
    },
    {
      "epoch": 1.4302874387122535,
      "grad_norm": 0.790435254573822,
      "learning_rate": 7.186468375032123e-06,
      "loss": 0.0292,
      "step": 873980
    },
    {
      "epoch": 1.430320169150907,
      "grad_norm": 1.0906683206558228,
      "learning_rate": 7.186402482818606e-06,
      "loss": 0.024,
      "step": 874000
    },
    {
      "epoch": 1.4303528995895602,
      "grad_norm": 0.8537790179252625,
      "learning_rate": 7.186336590605088e-06,
      "loss": 0.0253,
      "step": 874020
    },
    {
      "epoch": 1.4303856300282136,
      "grad_norm": 0.33958274126052856,
      "learning_rate": 7.186270698391572e-06,
      "loss": 0.0272,
      "step": 874040
    },
    {
      "epoch": 1.430418360466867,
      "grad_norm": 0.648934006690979,
      "learning_rate": 7.1862048061780546e-06,
      "loss": 0.0245,
      "step": 874060
    },
    {
      "epoch": 1.4304510909055204,
      "grad_norm": 0.871457576751709,
      "learning_rate": 7.186138913964537e-06,
      "loss": 0.0214,
      "step": 874080
    },
    {
      "epoch": 1.4304838213441737,
      "grad_norm": 0.2974092662334442,
      "learning_rate": 7.18607302175102e-06,
      "loss": 0.017,
      "step": 874100
    },
    {
      "epoch": 1.4305165517828269,
      "grad_norm": 1.0368396043777466,
      "learning_rate": 7.186007129537504e-06,
      "loss": 0.0232,
      "step": 874120
    },
    {
      "epoch": 1.4305492822214803,
      "grad_norm": 0.7744913101196289,
      "learning_rate": 7.1859412373239855e-06,
      "loss": 0.0254,
      "step": 874140
    },
    {
      "epoch": 1.4305820126601336,
      "grad_norm": 0.681236207485199,
      "learning_rate": 7.185875345110469e-06,
      "loss": 0.0233,
      "step": 874160
    },
    {
      "epoch": 1.430614743098787,
      "grad_norm": 0.8658531904220581,
      "learning_rate": 7.185809452896951e-06,
      "loss": 0.0221,
      "step": 874180
    },
    {
      "epoch": 1.4306474735374404,
      "grad_norm": 0.7907001376152039,
      "learning_rate": 7.185743560683435e-06,
      "loss": 0.021,
      "step": 874200
    },
    {
      "epoch": 1.4306802039760937,
      "grad_norm": 1.0920021533966064,
      "learning_rate": 7.185677668469917e-06,
      "loss": 0.0186,
      "step": 874220
    },
    {
      "epoch": 1.4307129344147471,
      "grad_norm": 0.5333936810493469,
      "learning_rate": 7.1856117762564e-06,
      "loss": 0.0232,
      "step": 874240
    },
    {
      "epoch": 1.4307456648534003,
      "grad_norm": 0.38512122631073,
      "learning_rate": 7.185545884042884e-06,
      "loss": 0.0226,
      "step": 874260
    },
    {
      "epoch": 1.4307783952920536,
      "grad_norm": 0.3133760094642639,
      "learning_rate": 7.185479991829366e-06,
      "loss": 0.0262,
      "step": 874280
    },
    {
      "epoch": 1.430811125730707,
      "grad_norm": 0.7486043572425842,
      "learning_rate": 7.185414099615849e-06,
      "loss": 0.029,
      "step": 874300
    },
    {
      "epoch": 1.4308438561693604,
      "grad_norm": 0.3851796090602875,
      "learning_rate": 7.185348207402332e-06,
      "loss": 0.0274,
      "step": 874320
    },
    {
      "epoch": 1.4308765866080138,
      "grad_norm": 2.05210280418396,
      "learning_rate": 7.1852823151888155e-06,
      "loss": 0.022,
      "step": 874340
    },
    {
      "epoch": 1.4309093170466671,
      "grad_norm": 0.8071243166923523,
      "learning_rate": 7.185216422975297e-06,
      "loss": 0.0187,
      "step": 874360
    },
    {
      "epoch": 1.4309420474853205,
      "grad_norm": 0.5651289224624634,
      "learning_rate": 7.185150530761781e-06,
      "loss": 0.0226,
      "step": 874380
    },
    {
      "epoch": 1.4309747779239737,
      "grad_norm": 0.24255026876926422,
      "learning_rate": 7.185084638548263e-06,
      "loss": 0.0238,
      "step": 874400
    },
    {
      "epoch": 1.431007508362627,
      "grad_norm": 0.8874946236610413,
      "learning_rate": 7.185018746334746e-06,
      "loss": 0.0238,
      "step": 874420
    },
    {
      "epoch": 1.4310402388012804,
      "grad_norm": 0.5878804326057434,
      "learning_rate": 7.184952854121229e-06,
      "loss": 0.0199,
      "step": 874440
    },
    {
      "epoch": 1.4310729692399338,
      "grad_norm": 0.3864980936050415,
      "learning_rate": 7.184886961907712e-06,
      "loss": 0.0162,
      "step": 874460
    },
    {
      "epoch": 1.4311056996785871,
      "grad_norm": 0.5930261015892029,
      "learning_rate": 7.184821069694195e-06,
      "loss": 0.0264,
      "step": 874480
    },
    {
      "epoch": 1.4311384301172405,
      "grad_norm": 0.9535108208656311,
      "learning_rate": 7.184755177480678e-06,
      "loss": 0.0173,
      "step": 874500
    },
    {
      "epoch": 1.4311711605558939,
      "grad_norm": 0.6642038822174072,
      "learning_rate": 7.18468928526716e-06,
      "loss": 0.0197,
      "step": 874520
    },
    {
      "epoch": 1.431203890994547,
      "grad_norm": 0.21481864154338837,
      "learning_rate": 7.184623393053644e-06,
      "loss": 0.0231,
      "step": 874540
    },
    {
      "epoch": 1.4312366214332004,
      "grad_norm": 0.3546111285686493,
      "learning_rate": 7.184557500840126e-06,
      "loss": 0.0204,
      "step": 874560
    },
    {
      "epoch": 1.4312693518718538,
      "grad_norm": 0.3157534897327423,
      "learning_rate": 7.184491608626609e-06,
      "loss": 0.0178,
      "step": 874580
    },
    {
      "epoch": 1.4313020823105072,
      "grad_norm": 0.8286547064781189,
      "learning_rate": 7.184425716413092e-06,
      "loss": 0.019,
      "step": 874600
    },
    {
      "epoch": 1.4313348127491605,
      "grad_norm": 1.0789568424224854,
      "learning_rate": 7.184359824199575e-06,
      "loss": 0.0205,
      "step": 874620
    },
    {
      "epoch": 1.4313675431878137,
      "grad_norm": 1.4368151426315308,
      "learning_rate": 7.184293931986058e-06,
      "loss": 0.0179,
      "step": 874640
    },
    {
      "epoch": 1.4314002736264673,
      "grad_norm": 0.3968629539012909,
      "learning_rate": 7.184228039772541e-06,
      "loss": 0.0104,
      "step": 874660
    },
    {
      "epoch": 1.4314330040651204,
      "grad_norm": 1.3097556829452515,
      "learning_rate": 7.184162147559024e-06,
      "loss": 0.0146,
      "step": 874680
    },
    {
      "epoch": 1.4314657345037738,
      "grad_norm": 1.273411512374878,
      "learning_rate": 7.1840962553455065e-06,
      "loss": 0.0223,
      "step": 874700
    },
    {
      "epoch": 1.4314984649424272,
      "grad_norm": 0.43115800619125366,
      "learning_rate": 7.18403036313199e-06,
      "loss": 0.0204,
      "step": 874720
    },
    {
      "epoch": 1.4315311953810805,
      "grad_norm": 0.2396232932806015,
      "learning_rate": 7.183964470918472e-06,
      "loss": 0.0202,
      "step": 874740
    },
    {
      "epoch": 1.431563925819734,
      "grad_norm": 0.1663961410522461,
      "learning_rate": 7.1838985787049555e-06,
      "loss": 0.0236,
      "step": 874760
    },
    {
      "epoch": 1.431596656258387,
      "grad_norm": 0.32415488362312317,
      "learning_rate": 7.1838326864914374e-06,
      "loss": 0.0183,
      "step": 874780
    },
    {
      "epoch": 1.4316293866970407,
      "grad_norm": 2.329397678375244,
      "learning_rate": 7.183766794277921e-06,
      "loss": 0.0174,
      "step": 874800
    },
    {
      "epoch": 1.4316621171356938,
      "grad_norm": 0.34610503911972046,
      "learning_rate": 7.183700902064403e-06,
      "loss": 0.0205,
      "step": 874820
    },
    {
      "epoch": 1.4316948475743472,
      "grad_norm": 0.1738869696855545,
      "learning_rate": 7.1836350098508865e-06,
      "loss": 0.0203,
      "step": 874840
    },
    {
      "epoch": 1.4317275780130005,
      "grad_norm": 0.24862568080425262,
      "learning_rate": 7.183569117637369e-06,
      "loss": 0.0189,
      "step": 874860
    },
    {
      "epoch": 1.431760308451654,
      "grad_norm": 0.29167237877845764,
      "learning_rate": 7.183503225423852e-06,
      "loss": 0.0278,
      "step": 874880
    },
    {
      "epoch": 1.4317930388903073,
      "grad_norm": 1.0574555397033691,
      "learning_rate": 7.183437333210335e-06,
      "loss": 0.018,
      "step": 874900
    },
    {
      "epoch": 1.4318257693289604,
      "grad_norm": 0.42288610339164734,
      "learning_rate": 7.183371440996818e-06,
      "loss": 0.0135,
      "step": 874920
    },
    {
      "epoch": 1.4318584997676138,
      "grad_norm": 3.3464434146881104,
      "learning_rate": 7.1833055487833e-06,
      "loss": 0.023,
      "step": 874940
    },
    {
      "epoch": 1.4318912302062672,
      "grad_norm": 0.9447816014289856,
      "learning_rate": 7.183239656569784e-06,
      "loss": 0.0248,
      "step": 874960
    },
    {
      "epoch": 1.4319239606449206,
      "grad_norm": 0.11013972759246826,
      "learning_rate": 7.183173764356267e-06,
      "loss": 0.0134,
      "step": 874980
    },
    {
      "epoch": 1.431956691083574,
      "grad_norm": 0.6672833561897278,
      "learning_rate": 7.183107872142749e-06,
      "loss": 0.0293,
      "step": 875000
    },
    {
      "epoch": 1.4319894215222273,
      "grad_norm": 0.1688908487558365,
      "learning_rate": 7.183041979929233e-06,
      "loss": 0.0121,
      "step": 875020
    },
    {
      "epoch": 1.4320221519608807,
      "grad_norm": 1.264108419418335,
      "learning_rate": 7.182976087715715e-06,
      "loss": 0.0158,
      "step": 875040
    },
    {
      "epoch": 1.4320548823995338,
      "grad_norm": 0.5536405444145203,
      "learning_rate": 7.182910195502198e-06,
      "loss": 0.0133,
      "step": 875060
    },
    {
      "epoch": 1.4320876128381872,
      "grad_norm": 0.5013535022735596,
      "learning_rate": 7.182844303288681e-06,
      "loss": 0.0238,
      "step": 875080
    },
    {
      "epoch": 1.4321203432768406,
      "grad_norm": 0.769328773021698,
      "learning_rate": 7.182778411075164e-06,
      "loss": 0.0204,
      "step": 875100
    },
    {
      "epoch": 1.432153073715494,
      "grad_norm": 0.7491532564163208,
      "learning_rate": 7.1827125188616465e-06,
      "loss": 0.0195,
      "step": 875120
    },
    {
      "epoch": 1.4321858041541473,
      "grad_norm": 0.9616233706474304,
      "learning_rate": 7.18264662664813e-06,
      "loss": 0.0223,
      "step": 875140
    },
    {
      "epoch": 1.4322185345928007,
      "grad_norm": 1.153082251548767,
      "learning_rate": 7.182580734434612e-06,
      "loss": 0.0186,
      "step": 875160
    },
    {
      "epoch": 1.432251265031454,
      "grad_norm": 1.7084063291549683,
      "learning_rate": 7.182514842221096e-06,
      "loss": 0.0202,
      "step": 875180
    },
    {
      "epoch": 1.4322839954701072,
      "grad_norm": 0.24466681480407715,
      "learning_rate": 7.1824489500075775e-06,
      "loss": 0.0175,
      "step": 875200
    },
    {
      "epoch": 1.4323167259087606,
      "grad_norm": 0.5299096703529358,
      "learning_rate": 7.182383057794061e-06,
      "loss": 0.0188,
      "step": 875220
    },
    {
      "epoch": 1.432349456347414,
      "grad_norm": 2.6774661540985107,
      "learning_rate": 7.182317165580544e-06,
      "loss": 0.0166,
      "step": 875240
    },
    {
      "epoch": 1.4323821867860673,
      "grad_norm": 0.58552485704422,
      "learning_rate": 7.1822512733670266e-06,
      "loss": 0.0198,
      "step": 875260
    },
    {
      "epoch": 1.4324149172247207,
      "grad_norm": 0.8598982691764832,
      "learning_rate": 7.182185381153509e-06,
      "loss": 0.02,
      "step": 875280
    },
    {
      "epoch": 1.4324476476633738,
      "grad_norm": 0.4711191654205322,
      "learning_rate": 7.182119488939993e-06,
      "loss": 0.0227,
      "step": 875300
    },
    {
      "epoch": 1.4324803781020274,
      "grad_norm": 0.1589275747537613,
      "learning_rate": 7.182053596726476e-06,
      "loss": 0.019,
      "step": 875320
    },
    {
      "epoch": 1.4325131085406806,
      "grad_norm": 0.35541656613349915,
      "learning_rate": 7.181987704512958e-06,
      "loss": 0.0175,
      "step": 875340
    },
    {
      "epoch": 1.432545838979334,
      "grad_norm": 0.15865376591682434,
      "learning_rate": 7.181921812299442e-06,
      "loss": 0.0238,
      "step": 875360
    },
    {
      "epoch": 1.4325785694179873,
      "grad_norm": 0.45810723304748535,
      "learning_rate": 7.181855920085924e-06,
      "loss": 0.027,
      "step": 875380
    },
    {
      "epoch": 1.4326112998566407,
      "grad_norm": 1.3433325290679932,
      "learning_rate": 7.1817900278724074e-06,
      "loss": 0.0221,
      "step": 875400
    },
    {
      "epoch": 1.432644030295294,
      "grad_norm": 1.8654429912567139,
      "learning_rate": 7.181724135658889e-06,
      "loss": 0.0288,
      "step": 875420
    },
    {
      "epoch": 1.4326767607339472,
      "grad_norm": 0.7843552231788635,
      "learning_rate": 7.181658243445373e-06,
      "loss": 0.0225,
      "step": 875440
    },
    {
      "epoch": 1.4327094911726008,
      "grad_norm": 0.24954620003700256,
      "learning_rate": 7.181592351231856e-06,
      "loss": 0.0222,
      "step": 875460
    },
    {
      "epoch": 1.432742221611254,
      "grad_norm": 1.7237794399261475,
      "learning_rate": 7.181526459018338e-06,
      "loss": 0.0109,
      "step": 875480
    },
    {
      "epoch": 1.4327749520499073,
      "grad_norm": 1.0716044902801514,
      "learning_rate": 7.181460566804821e-06,
      "loss": 0.0238,
      "step": 875500
    },
    {
      "epoch": 1.4328076824885607,
      "grad_norm": 0.6778380870819092,
      "learning_rate": 7.181394674591305e-06,
      "loss": 0.024,
      "step": 875520
    },
    {
      "epoch": 1.432840412927214,
      "grad_norm": 0.49211034178733826,
      "learning_rate": 7.181328782377787e-06,
      "loss": 0.0168,
      "step": 875540
    },
    {
      "epoch": 1.4328731433658675,
      "grad_norm": 0.6290392279624939,
      "learning_rate": 7.18126289016427e-06,
      "loss": 0.0146,
      "step": 875560
    },
    {
      "epoch": 1.4329058738045206,
      "grad_norm": 0.090394526720047,
      "learning_rate": 7.181196997950752e-06,
      "loss": 0.0142,
      "step": 875580
    },
    {
      "epoch": 1.432938604243174,
      "grad_norm": 0.16404616832733154,
      "learning_rate": 7.181131105737236e-06,
      "loss": 0.0227,
      "step": 875600
    },
    {
      "epoch": 1.4329713346818274,
      "grad_norm": 3.3146746158599854,
      "learning_rate": 7.181065213523718e-06,
      "loss": 0.0146,
      "step": 875620
    },
    {
      "epoch": 1.4330040651204807,
      "grad_norm": 1.3240008354187012,
      "learning_rate": 7.180999321310201e-06,
      "loss": 0.0132,
      "step": 875640
    },
    {
      "epoch": 1.433036795559134,
      "grad_norm": 0.493701308965683,
      "learning_rate": 7.180933429096685e-06,
      "loss": 0.0202,
      "step": 875660
    },
    {
      "epoch": 1.4330695259977875,
      "grad_norm": 0.24357563257217407,
      "learning_rate": 7.180867536883167e-06,
      "loss": 0.0141,
      "step": 875680
    },
    {
      "epoch": 1.4331022564364408,
      "grad_norm": 0.6221749782562256,
      "learning_rate": 7.18080164466965e-06,
      "loss": 0.0234,
      "step": 875700
    },
    {
      "epoch": 1.433134986875094,
      "grad_norm": 0.30193030834198,
      "learning_rate": 7.180735752456133e-06,
      "loss": 0.0212,
      "step": 875720
    },
    {
      "epoch": 1.4331677173137474,
      "grad_norm": 0.15038064122200012,
      "learning_rate": 7.1806698602426166e-06,
      "loss": 0.0253,
      "step": 875740
    },
    {
      "epoch": 1.4332004477524007,
      "grad_norm": 0.2890620529651642,
      "learning_rate": 7.1806039680290985e-06,
      "loss": 0.0158,
      "step": 875760
    },
    {
      "epoch": 1.4332331781910541,
      "grad_norm": 0.3938698470592499,
      "learning_rate": 7.180538075815582e-06,
      "loss": 0.0213,
      "step": 875780
    },
    {
      "epoch": 1.4332659086297075,
      "grad_norm": 1.4382355213165283,
      "learning_rate": 7.180472183602064e-06,
      "loss": 0.0171,
      "step": 875800
    },
    {
      "epoch": 1.4332986390683609,
      "grad_norm": 0.5677164793014526,
      "learning_rate": 7.1804062913885475e-06,
      "loss": 0.0261,
      "step": 875820
    },
    {
      "epoch": 1.4333313695070142,
      "grad_norm": 0.5446023344993591,
      "learning_rate": 7.180340399175029e-06,
      "loss": 0.023,
      "step": 875840
    },
    {
      "epoch": 1.4333640999456674,
      "grad_norm": 1.254042148590088,
      "learning_rate": 7.180274506961513e-06,
      "loss": 0.0225,
      "step": 875860
    },
    {
      "epoch": 1.4333968303843208,
      "grad_norm": 0.3521465063095093,
      "learning_rate": 7.180208614747996e-06,
      "loss": 0.0216,
      "step": 875880
    },
    {
      "epoch": 1.4334295608229741,
      "grad_norm": 0.6906540989875793,
      "learning_rate": 7.1801427225344785e-06,
      "loss": 0.0228,
      "step": 875900
    },
    {
      "epoch": 1.4334622912616275,
      "grad_norm": 1.2218537330627441,
      "learning_rate": 7.180076830320961e-06,
      "loss": 0.0314,
      "step": 875920
    },
    {
      "epoch": 1.4334950217002809,
      "grad_norm": 0.3738784193992615,
      "learning_rate": 7.180010938107445e-06,
      "loss": 0.0212,
      "step": 875940
    },
    {
      "epoch": 1.4335277521389342,
      "grad_norm": 1.325410008430481,
      "learning_rate": 7.179945045893927e-06,
      "loss": 0.0205,
      "step": 875960
    },
    {
      "epoch": 1.4335604825775876,
      "grad_norm": 0.8225587606430054,
      "learning_rate": 7.17987915368041e-06,
      "loss": 0.0209,
      "step": 875980
    },
    {
      "epoch": 1.4335932130162408,
      "grad_norm": 0.6654423475265503,
      "learning_rate": 7.179813261466892e-06,
      "loss": 0.0163,
      "step": 876000
    },
    {
      "epoch": 1.4336259434548941,
      "grad_norm": 0.10256548970937729,
      "learning_rate": 7.179747369253376e-06,
      "loss": 0.0189,
      "step": 876020
    },
    {
      "epoch": 1.4336586738935475,
      "grad_norm": 0.7142120003700256,
      "learning_rate": 7.179681477039859e-06,
      "loss": 0.0223,
      "step": 876040
    },
    {
      "epoch": 1.4336914043322009,
      "grad_norm": 0.4650498926639557,
      "learning_rate": 7.179615584826341e-06,
      "loss": 0.0193,
      "step": 876060
    },
    {
      "epoch": 1.4337241347708543,
      "grad_norm": 0.3388957381248474,
      "learning_rate": 7.179549692612825e-06,
      "loss": 0.0261,
      "step": 876080
    },
    {
      "epoch": 1.4337568652095074,
      "grad_norm": 0.5687421560287476,
      "learning_rate": 7.1794838003993076e-06,
      "loss": 0.0236,
      "step": 876100
    },
    {
      "epoch": 1.433789595648161,
      "grad_norm": 0.2994372844696045,
      "learning_rate": 7.17941790818579e-06,
      "loss": 0.0219,
      "step": 876120
    },
    {
      "epoch": 1.4338223260868141,
      "grad_norm": 0.42165181040763855,
      "learning_rate": 7.179352015972273e-06,
      "loss": 0.0145,
      "step": 876140
    },
    {
      "epoch": 1.4338550565254675,
      "grad_norm": 1.1764534711837769,
      "learning_rate": 7.179286123758757e-06,
      "loss": 0.0278,
      "step": 876160
    },
    {
      "epoch": 1.433887786964121,
      "grad_norm": 0.767549991607666,
      "learning_rate": 7.1792202315452385e-06,
      "loss": 0.0199,
      "step": 876180
    },
    {
      "epoch": 1.4339205174027743,
      "grad_norm": 0.43356218934059143,
      "learning_rate": 7.179154339331722e-06,
      "loss": 0.0176,
      "step": 876200
    },
    {
      "epoch": 1.4339532478414276,
      "grad_norm": 1.0946100950241089,
      "learning_rate": 7.179088447118204e-06,
      "loss": 0.0183,
      "step": 876220
    },
    {
      "epoch": 1.4339859782800808,
      "grad_norm": 1.2344766855239868,
      "learning_rate": 7.179022554904688e-06,
      "loss": 0.0285,
      "step": 876240
    },
    {
      "epoch": 1.4340187087187344,
      "grad_norm": 0.5322065949440002,
      "learning_rate": 7.17895666269117e-06,
      "loss": 0.0253,
      "step": 876260
    },
    {
      "epoch": 1.4340514391573875,
      "grad_norm": 0.3615431785583496,
      "learning_rate": 7.178890770477653e-06,
      "loss": 0.0186,
      "step": 876280
    },
    {
      "epoch": 1.434084169596041,
      "grad_norm": 0.394235223531723,
      "learning_rate": 7.178824878264136e-06,
      "loss": 0.0182,
      "step": 876300
    },
    {
      "epoch": 1.4341169000346943,
      "grad_norm": 0.483086496591568,
      "learning_rate": 7.178758986050619e-06,
      "loss": 0.0161,
      "step": 876320
    },
    {
      "epoch": 1.4341496304733476,
      "grad_norm": 0.9178221821784973,
      "learning_rate": 7.178693093837101e-06,
      "loss": 0.0219,
      "step": 876340
    },
    {
      "epoch": 1.434182360912001,
      "grad_norm": 0.6530659794807434,
      "learning_rate": 7.178627201623585e-06,
      "loss": 0.0231,
      "step": 876360
    },
    {
      "epoch": 1.4342150913506542,
      "grad_norm": 0.7306063175201416,
      "learning_rate": 7.1785613094100685e-06,
      "loss": 0.0209,
      "step": 876380
    },
    {
      "epoch": 1.4342478217893075,
      "grad_norm": 0.7833055257797241,
      "learning_rate": 7.17849541719655e-06,
      "loss": 0.022,
      "step": 876400
    },
    {
      "epoch": 1.434280552227961,
      "grad_norm": 0.7369660139083862,
      "learning_rate": 7.178429524983034e-06,
      "loss": 0.0254,
      "step": 876420
    },
    {
      "epoch": 1.4343132826666143,
      "grad_norm": 1.4058558940887451,
      "learning_rate": 7.178363632769516e-06,
      "loss": 0.0258,
      "step": 876440
    },
    {
      "epoch": 1.4343460131052677,
      "grad_norm": 0.2565391957759857,
      "learning_rate": 7.1782977405559994e-06,
      "loss": 0.0198,
      "step": 876460
    },
    {
      "epoch": 1.434378743543921,
      "grad_norm": 0.267286092042923,
      "learning_rate": 7.178231848342482e-06,
      "loss": 0.0185,
      "step": 876480
    },
    {
      "epoch": 1.4344114739825744,
      "grad_norm": 0.07314541190862656,
      "learning_rate": 7.178165956128965e-06,
      "loss": 0.0259,
      "step": 876500
    },
    {
      "epoch": 1.4344442044212276,
      "grad_norm": 0.36694538593292236,
      "learning_rate": 7.178100063915448e-06,
      "loss": 0.0252,
      "step": 876520
    },
    {
      "epoch": 1.434476934859881,
      "grad_norm": 0.3583371043205261,
      "learning_rate": 7.178034171701931e-06,
      "loss": 0.0205,
      "step": 876540
    },
    {
      "epoch": 1.4345096652985343,
      "grad_norm": 0.7621015906333923,
      "learning_rate": 7.177968279488413e-06,
      "loss": 0.03,
      "step": 876560
    },
    {
      "epoch": 1.4345423957371877,
      "grad_norm": 0.4070216715335846,
      "learning_rate": 7.177902387274897e-06,
      "loss": 0.0161,
      "step": 876580
    },
    {
      "epoch": 1.434575126175841,
      "grad_norm": 1.8944833278656006,
      "learning_rate": 7.177836495061379e-06,
      "loss": 0.0218,
      "step": 876600
    },
    {
      "epoch": 1.4346078566144944,
      "grad_norm": 0.16549353301525116,
      "learning_rate": 7.177770602847862e-06,
      "loss": 0.0235,
      "step": 876620
    },
    {
      "epoch": 1.4346405870531478,
      "grad_norm": 2.077826738357544,
      "learning_rate": 7.177704710634344e-06,
      "loss": 0.0177,
      "step": 876640
    },
    {
      "epoch": 1.434673317491801,
      "grad_norm": 0.31153756380081177,
      "learning_rate": 7.177638818420828e-06,
      "loss": 0.0194,
      "step": 876660
    },
    {
      "epoch": 1.4347060479304543,
      "grad_norm": 0.2712213695049286,
      "learning_rate": 7.17757292620731e-06,
      "loss": 0.0143,
      "step": 876680
    },
    {
      "epoch": 1.4347387783691077,
      "grad_norm": 0.7359240055084229,
      "learning_rate": 7.177507033993793e-06,
      "loss": 0.0176,
      "step": 876700
    },
    {
      "epoch": 1.434771508807761,
      "grad_norm": 0.6348795294761658,
      "learning_rate": 7.177441141780277e-06,
      "loss": 0.0215,
      "step": 876720
    },
    {
      "epoch": 1.4348042392464144,
      "grad_norm": 1.6371135711669922,
      "learning_rate": 7.1773752495667595e-06,
      "loss": 0.0231,
      "step": 876740
    },
    {
      "epoch": 1.4348369696850678,
      "grad_norm": 0.19909751415252686,
      "learning_rate": 7.177309357353242e-06,
      "loss": 0.0174,
      "step": 876760
    },
    {
      "epoch": 1.4348697001237212,
      "grad_norm": 0.49301019310951233,
      "learning_rate": 7.177243465139725e-06,
      "loss": 0.0167,
      "step": 876780
    },
    {
      "epoch": 1.4349024305623743,
      "grad_norm": 0.3978380262851715,
      "learning_rate": 7.1771775729262085e-06,
      "loss": 0.0185,
      "step": 876800
    },
    {
      "epoch": 1.4349351610010277,
      "grad_norm": 0.2984786629676819,
      "learning_rate": 7.1771116807126904e-06,
      "loss": 0.0238,
      "step": 876820
    },
    {
      "epoch": 1.434967891439681,
      "grad_norm": 0.39304131269454956,
      "learning_rate": 7.177045788499174e-06,
      "loss": 0.018,
      "step": 876840
    },
    {
      "epoch": 1.4350006218783344,
      "grad_norm": 0.6688539981842041,
      "learning_rate": 7.176979896285656e-06,
      "loss": 0.0283,
      "step": 876860
    },
    {
      "epoch": 1.4350333523169878,
      "grad_norm": 0.2648766338825226,
      "learning_rate": 7.1769140040721395e-06,
      "loss": 0.0218,
      "step": 876880
    },
    {
      "epoch": 1.435066082755641,
      "grad_norm": 0.14166875183582306,
      "learning_rate": 7.176848111858622e-06,
      "loss": 0.0193,
      "step": 876900
    },
    {
      "epoch": 1.4350988131942946,
      "grad_norm": 0.7707651257514954,
      "learning_rate": 7.176782219645105e-06,
      "loss": 0.0208,
      "step": 876920
    },
    {
      "epoch": 1.4351315436329477,
      "grad_norm": 0.5482800006866455,
      "learning_rate": 7.176716327431588e-06,
      "loss": 0.0239,
      "step": 876940
    },
    {
      "epoch": 1.435164274071601,
      "grad_norm": 0.3347650468349457,
      "learning_rate": 7.176650435218071e-06,
      "loss": 0.0271,
      "step": 876960
    },
    {
      "epoch": 1.4351970045102544,
      "grad_norm": 0.4485786557197571,
      "learning_rate": 7.176584543004553e-06,
      "loss": 0.0166,
      "step": 876980
    },
    {
      "epoch": 1.4352297349489078,
      "grad_norm": 0.1655634492635727,
      "learning_rate": 7.176518650791037e-06,
      "loss": 0.0189,
      "step": 877000
    },
    {
      "epoch": 1.4352624653875612,
      "grad_norm": 0.7984330058097839,
      "learning_rate": 7.176452758577519e-06,
      "loss": 0.0161,
      "step": 877020
    },
    {
      "epoch": 1.4352951958262143,
      "grad_norm": 0.510358452796936,
      "learning_rate": 7.176386866364002e-06,
      "loss": 0.0164,
      "step": 877040
    },
    {
      "epoch": 1.435327926264868,
      "grad_norm": 0.940834641456604,
      "learning_rate": 7.176320974150485e-06,
      "loss": 0.0336,
      "step": 877060
    },
    {
      "epoch": 1.435360656703521,
      "grad_norm": 0.19903351366519928,
      "learning_rate": 7.176255081936968e-06,
      "loss": 0.0236,
      "step": 877080
    },
    {
      "epoch": 1.4353933871421745,
      "grad_norm": 0.5489232540130615,
      "learning_rate": 7.176189189723451e-06,
      "loss": 0.0158,
      "step": 877100
    },
    {
      "epoch": 1.4354261175808278,
      "grad_norm": 0.8288694024085999,
      "learning_rate": 7.176123297509934e-06,
      "loss": 0.02,
      "step": 877120
    },
    {
      "epoch": 1.4354588480194812,
      "grad_norm": 0.22812074422836304,
      "learning_rate": 7.176057405296417e-06,
      "loss": 0.0291,
      "step": 877140
    },
    {
      "epoch": 1.4354915784581346,
      "grad_norm": 0.3682701289653778,
      "learning_rate": 7.1759915130828996e-06,
      "loss": 0.0192,
      "step": 877160
    },
    {
      "epoch": 1.4355243088967877,
      "grad_norm": 0.6574203968048096,
      "learning_rate": 7.175925620869383e-06,
      "loss": 0.0167,
      "step": 877180
    },
    {
      "epoch": 1.435557039335441,
      "grad_norm": 1.1463619470596313,
      "learning_rate": 7.175859728655865e-06,
      "loss": 0.0199,
      "step": 877200
    },
    {
      "epoch": 1.4355897697740945,
      "grad_norm": 0.5586854219436646,
      "learning_rate": 7.175793836442349e-06,
      "loss": 0.0182,
      "step": 877220
    },
    {
      "epoch": 1.4356225002127478,
      "grad_norm": 0.4917881488800049,
      "learning_rate": 7.1757279442288305e-06,
      "loss": 0.0244,
      "step": 877240
    },
    {
      "epoch": 1.4356552306514012,
      "grad_norm": 0.13720162212848663,
      "learning_rate": 7.175662052015314e-06,
      "loss": 0.0212,
      "step": 877260
    },
    {
      "epoch": 1.4356879610900546,
      "grad_norm": 0.9231047034263611,
      "learning_rate": 7.175596159801797e-06,
      "loss": 0.0248,
      "step": 877280
    },
    {
      "epoch": 1.435720691528708,
      "grad_norm": 0.1052071750164032,
      "learning_rate": 7.1755302675882796e-06,
      "loss": 0.0126,
      "step": 877300
    },
    {
      "epoch": 1.435753421967361,
      "grad_norm": 0.5844114422798157,
      "learning_rate": 7.175464375374762e-06,
      "loss": 0.0127,
      "step": 877320
    },
    {
      "epoch": 1.4357861524060145,
      "grad_norm": 0.6533644199371338,
      "learning_rate": 7.175398483161246e-06,
      "loss": 0.0175,
      "step": 877340
    },
    {
      "epoch": 1.4358188828446679,
      "grad_norm": 0.36969858407974243,
      "learning_rate": 7.175332590947728e-06,
      "loss": 0.0171,
      "step": 877360
    },
    {
      "epoch": 1.4358516132833212,
      "grad_norm": 0.1705458164215088,
      "learning_rate": 7.175266698734211e-06,
      "loss": 0.0176,
      "step": 877380
    },
    {
      "epoch": 1.4358843437219746,
      "grad_norm": 0.8162407875061035,
      "learning_rate": 7.175200806520693e-06,
      "loss": 0.0214,
      "step": 877400
    },
    {
      "epoch": 1.435917074160628,
      "grad_norm": 0.17619462311267853,
      "learning_rate": 7.175134914307177e-06,
      "loss": 0.0313,
      "step": 877420
    },
    {
      "epoch": 1.4359498045992813,
      "grad_norm": 0.6426356434822083,
      "learning_rate": 7.1750690220936604e-06,
      "loss": 0.0316,
      "step": 877440
    },
    {
      "epoch": 1.4359825350379345,
      "grad_norm": 0.8904169797897339,
      "learning_rate": 7.175003129880142e-06,
      "loss": 0.0264,
      "step": 877460
    },
    {
      "epoch": 1.4360152654765879,
      "grad_norm": 0.5357765555381775,
      "learning_rate": 7.174937237666626e-06,
      "loss": 0.033,
      "step": 877480
    },
    {
      "epoch": 1.4360479959152412,
      "grad_norm": 0.38036003708839417,
      "learning_rate": 7.174871345453109e-06,
      "loss": 0.0353,
      "step": 877500
    },
    {
      "epoch": 1.4360807263538946,
      "grad_norm": 0.6623818874359131,
      "learning_rate": 7.174805453239591e-06,
      "loss": 0.0269,
      "step": 877520
    },
    {
      "epoch": 1.436113456792548,
      "grad_norm": 0.7846083641052246,
      "learning_rate": 7.174739561026074e-06,
      "loss": 0.016,
      "step": 877540
    },
    {
      "epoch": 1.4361461872312014,
      "grad_norm": 1.0070173740386963,
      "learning_rate": 7.174673668812558e-06,
      "loss": 0.0191,
      "step": 877560
    },
    {
      "epoch": 1.4361789176698547,
      "grad_norm": 0.3108580708503723,
      "learning_rate": 7.17460777659904e-06,
      "loss": 0.0239,
      "step": 877580
    },
    {
      "epoch": 1.4362116481085079,
      "grad_norm": 0.40334299206733704,
      "learning_rate": 7.174541884385523e-06,
      "loss": 0.0138,
      "step": 877600
    },
    {
      "epoch": 1.4362443785471612,
      "grad_norm": 0.23884989321231842,
      "learning_rate": 7.174475992172005e-06,
      "loss": 0.016,
      "step": 877620
    },
    {
      "epoch": 1.4362771089858146,
      "grad_norm": 0.16136059165000916,
      "learning_rate": 7.174410099958489e-06,
      "loss": 0.0288,
      "step": 877640
    },
    {
      "epoch": 1.436309839424468,
      "grad_norm": 1.0605895519256592,
      "learning_rate": 7.174344207744971e-06,
      "loss": 0.0264,
      "step": 877660
    },
    {
      "epoch": 1.4363425698631214,
      "grad_norm": 1.3404983282089233,
      "learning_rate": 7.174278315531454e-06,
      "loss": 0.0229,
      "step": 877680
    },
    {
      "epoch": 1.4363753003017745,
      "grad_norm": 14.016928672790527,
      "learning_rate": 7.174212423317937e-06,
      "loss": 0.0263,
      "step": 877700
    },
    {
      "epoch": 1.436408030740428,
      "grad_norm": 0.16142261028289795,
      "learning_rate": 7.17414653110442e-06,
      "loss": 0.0228,
      "step": 877720
    },
    {
      "epoch": 1.4364407611790813,
      "grad_norm": 0.2809504568576813,
      "learning_rate": 7.174080638890902e-06,
      "loss": 0.015,
      "step": 877740
    },
    {
      "epoch": 1.4364734916177346,
      "grad_norm": 1.4616669416427612,
      "learning_rate": 7.174014746677386e-06,
      "loss": 0.021,
      "step": 877760
    },
    {
      "epoch": 1.436506222056388,
      "grad_norm": 0.15640777349472046,
      "learning_rate": 7.173948854463869e-06,
      "loss": 0.0188,
      "step": 877780
    },
    {
      "epoch": 1.4365389524950414,
      "grad_norm": 1.5418204069137573,
      "learning_rate": 7.1738829622503515e-06,
      "loss": 0.0211,
      "step": 877800
    },
    {
      "epoch": 1.4365716829336947,
      "grad_norm": 1.2846046686172485,
      "learning_rate": 7.173817070036835e-06,
      "loss": 0.0259,
      "step": 877820
    },
    {
      "epoch": 1.436604413372348,
      "grad_norm": 1.2521592378616333,
      "learning_rate": 7.173751177823317e-06,
      "loss": 0.019,
      "step": 877840
    },
    {
      "epoch": 1.4366371438110015,
      "grad_norm": 0.07427233457565308,
      "learning_rate": 7.1736852856098005e-06,
      "loss": 0.0336,
      "step": 877860
    },
    {
      "epoch": 1.4366698742496546,
      "grad_norm": 0.7553345561027527,
      "learning_rate": 7.173619393396282e-06,
      "loss": 0.0255,
      "step": 877880
    },
    {
      "epoch": 1.436702604688308,
      "grad_norm": 0.44848594069480896,
      "learning_rate": 7.173553501182766e-06,
      "loss": 0.0274,
      "step": 877900
    },
    {
      "epoch": 1.4367353351269614,
      "grad_norm": 0.8407866954803467,
      "learning_rate": 7.173487608969249e-06,
      "loss": 0.0223,
      "step": 877920
    },
    {
      "epoch": 1.4367680655656148,
      "grad_norm": 0.4412614703178406,
      "learning_rate": 7.1734217167557315e-06,
      "loss": 0.0185,
      "step": 877940
    },
    {
      "epoch": 1.4368007960042681,
      "grad_norm": 1.4578068256378174,
      "learning_rate": 7.173355824542214e-06,
      "loss": 0.017,
      "step": 877960
    },
    {
      "epoch": 1.4368335264429213,
      "grad_norm": 0.5163023471832275,
      "learning_rate": 7.173289932328698e-06,
      "loss": 0.0157,
      "step": 877980
    },
    {
      "epoch": 1.4368662568815747,
      "grad_norm": 0.4401761293411255,
      "learning_rate": 7.17322404011518e-06,
      "loss": 0.0208,
      "step": 878000
    },
    {
      "epoch": 1.436898987320228,
      "grad_norm": 0.5624004602432251,
      "learning_rate": 7.173158147901663e-06,
      "loss": 0.0235,
      "step": 878020
    },
    {
      "epoch": 1.4369317177588814,
      "grad_norm": 0.12519320845603943,
      "learning_rate": 7.173092255688145e-06,
      "loss": 0.0182,
      "step": 878040
    },
    {
      "epoch": 1.4369644481975348,
      "grad_norm": 1.1425381898880005,
      "learning_rate": 7.173026363474629e-06,
      "loss": 0.0195,
      "step": 878060
    },
    {
      "epoch": 1.4369971786361881,
      "grad_norm": 1.6346195936203003,
      "learning_rate": 7.1729604712611115e-06,
      "loss": 0.0243,
      "step": 878080
    },
    {
      "epoch": 1.4370299090748415,
      "grad_norm": 0.19939835369586945,
      "learning_rate": 7.172894579047594e-06,
      "loss": 0.019,
      "step": 878100
    },
    {
      "epoch": 1.4370626395134947,
      "grad_norm": 0.42315271496772766,
      "learning_rate": 7.172828686834078e-06,
      "loss": 0.0211,
      "step": 878120
    },
    {
      "epoch": 1.437095369952148,
      "grad_norm": 0.10804497450590134,
      "learning_rate": 7.1727627946205606e-06,
      "loss": 0.0316,
      "step": 878140
    },
    {
      "epoch": 1.4371281003908014,
      "grad_norm": 0.5086473822593689,
      "learning_rate": 7.172696902407043e-06,
      "loss": 0.0234,
      "step": 878160
    },
    {
      "epoch": 1.4371608308294548,
      "grad_norm": 0.2003452330827713,
      "learning_rate": 7.172631010193526e-06,
      "loss": 0.0184,
      "step": 878180
    },
    {
      "epoch": 1.4371935612681082,
      "grad_norm": 0.5478973388671875,
      "learning_rate": 7.17256511798001e-06,
      "loss": 0.026,
      "step": 878200
    },
    {
      "epoch": 1.4372262917067615,
      "grad_norm": 0.6144388318061829,
      "learning_rate": 7.1724992257664915e-06,
      "loss": 0.0259,
      "step": 878220
    },
    {
      "epoch": 1.437259022145415,
      "grad_norm": 0.13826368749141693,
      "learning_rate": 7.172433333552975e-06,
      "loss": 0.018,
      "step": 878240
    },
    {
      "epoch": 1.437291752584068,
      "grad_norm": 0.19729243218898773,
      "learning_rate": 7.172367441339457e-06,
      "loss": 0.0162,
      "step": 878260
    },
    {
      "epoch": 1.4373244830227214,
      "grad_norm": 0.3525661528110504,
      "learning_rate": 7.172301549125941e-06,
      "loss": 0.0194,
      "step": 878280
    },
    {
      "epoch": 1.4373572134613748,
      "grad_norm": 0.4075835347175598,
      "learning_rate": 7.172235656912423e-06,
      "loss": 0.0176,
      "step": 878300
    },
    {
      "epoch": 1.4373899439000282,
      "grad_norm": 0.7050100564956665,
      "learning_rate": 7.172169764698906e-06,
      "loss": 0.0195,
      "step": 878320
    },
    {
      "epoch": 1.4374226743386815,
      "grad_norm": 0.5464282631874084,
      "learning_rate": 7.172103872485389e-06,
      "loss": 0.0163,
      "step": 878340
    },
    {
      "epoch": 1.4374554047773347,
      "grad_norm": 1.3509190082550049,
      "learning_rate": 7.172037980271872e-06,
      "loss": 0.03,
      "step": 878360
    },
    {
      "epoch": 1.4374881352159883,
      "grad_norm": 0.9626688957214355,
      "learning_rate": 7.171972088058354e-06,
      "loss": 0.0197,
      "step": 878380
    },
    {
      "epoch": 1.4375208656546414,
      "grad_norm": 1.1569007635116577,
      "learning_rate": 7.171906195844838e-06,
      "loss": 0.0206,
      "step": 878400
    },
    {
      "epoch": 1.4375535960932948,
      "grad_norm": 0.30620142817497253,
      "learning_rate": 7.17184030363132e-06,
      "loss": 0.0185,
      "step": 878420
    },
    {
      "epoch": 1.4375863265319482,
      "grad_norm": 0.8993410468101501,
      "learning_rate": 7.171774411417803e-06,
      "loss": 0.0264,
      "step": 878440
    },
    {
      "epoch": 1.4376190569706015,
      "grad_norm": 1.8394583463668823,
      "learning_rate": 7.171708519204285e-06,
      "loss": 0.0203,
      "step": 878460
    },
    {
      "epoch": 1.437651787409255,
      "grad_norm": 0.6697675585746765,
      "learning_rate": 7.171642626990769e-06,
      "loss": 0.0169,
      "step": 878480
    },
    {
      "epoch": 1.437684517847908,
      "grad_norm": 0.7366818785667419,
      "learning_rate": 7.1715767347772524e-06,
      "loss": 0.0195,
      "step": 878500
    },
    {
      "epoch": 1.4377172482865617,
      "grad_norm": 0.6764829754829407,
      "learning_rate": 7.171510842563735e-06,
      "loss": 0.0184,
      "step": 878520
    },
    {
      "epoch": 1.4377499787252148,
      "grad_norm": 0.2885821461677551,
      "learning_rate": 7.171444950350218e-06,
      "loss": 0.0162,
      "step": 878540
    },
    {
      "epoch": 1.4377827091638682,
      "grad_norm": 0.23015519976615906,
      "learning_rate": 7.171379058136701e-06,
      "loss": 0.0233,
      "step": 878560
    },
    {
      "epoch": 1.4378154396025216,
      "grad_norm": 0.31404024362564087,
      "learning_rate": 7.171313165923184e-06,
      "loss": 0.0251,
      "step": 878580
    },
    {
      "epoch": 1.437848170041175,
      "grad_norm": 0.9107037782669067,
      "learning_rate": 7.171247273709666e-06,
      "loss": 0.0252,
      "step": 878600
    },
    {
      "epoch": 1.4378809004798283,
      "grad_norm": 0.46727943420410156,
      "learning_rate": 7.17118138149615e-06,
      "loss": 0.0205,
      "step": 878620
    },
    {
      "epoch": 1.4379136309184815,
      "grad_norm": 0.7453337907791138,
      "learning_rate": 7.171115489282632e-06,
      "loss": 0.0276,
      "step": 878640
    },
    {
      "epoch": 1.4379463613571348,
      "grad_norm": 0.22768010199069977,
      "learning_rate": 7.171049597069115e-06,
      "loss": 0.016,
      "step": 878660
    },
    {
      "epoch": 1.4379790917957882,
      "grad_norm": 0.6916515827178955,
      "learning_rate": 7.170983704855597e-06,
      "loss": 0.0247,
      "step": 878680
    },
    {
      "epoch": 1.4380118222344416,
      "grad_norm": 0.5117951035499573,
      "learning_rate": 7.170917812642081e-06,
      "loss": 0.0215,
      "step": 878700
    },
    {
      "epoch": 1.438044552673095,
      "grad_norm": 0.5115350484848022,
      "learning_rate": 7.170851920428563e-06,
      "loss": 0.0206,
      "step": 878720
    },
    {
      "epoch": 1.4380772831117483,
      "grad_norm": 0.26004764437675476,
      "learning_rate": 7.170786028215046e-06,
      "loss": 0.0157,
      "step": 878740
    },
    {
      "epoch": 1.4381100135504017,
      "grad_norm": 0.1261131465435028,
      "learning_rate": 7.170720136001529e-06,
      "loss": 0.0178,
      "step": 878760
    },
    {
      "epoch": 1.4381427439890548,
      "grad_norm": 0.4593343138694763,
      "learning_rate": 7.1706542437880125e-06,
      "loss": 0.0183,
      "step": 878780
    },
    {
      "epoch": 1.4381754744277082,
      "grad_norm": 0.3080037832260132,
      "learning_rate": 7.170588351574494e-06,
      "loss": 0.0227,
      "step": 878800
    },
    {
      "epoch": 1.4382082048663616,
      "grad_norm": 0.42221224308013916,
      "learning_rate": 7.170522459360978e-06,
      "loss": 0.0166,
      "step": 878820
    },
    {
      "epoch": 1.438240935305015,
      "grad_norm": 1.0189400911331177,
      "learning_rate": 7.1704565671474615e-06,
      "loss": 0.0242,
      "step": 878840
    },
    {
      "epoch": 1.4382736657436683,
      "grad_norm": 0.04770408570766449,
      "learning_rate": 7.1703906749339434e-06,
      "loss": 0.0214,
      "step": 878860
    },
    {
      "epoch": 1.4383063961823217,
      "grad_norm": 0.6446518301963806,
      "learning_rate": 7.170324782720427e-06,
      "loss": 0.0217,
      "step": 878880
    },
    {
      "epoch": 1.438339126620975,
      "grad_norm": 0.2625280022621155,
      "learning_rate": 7.170258890506909e-06,
      "loss": 0.0133,
      "step": 878900
    },
    {
      "epoch": 1.4383718570596282,
      "grad_norm": 3.406639337539673,
      "learning_rate": 7.1701929982933925e-06,
      "loss": 0.0182,
      "step": 878920
    },
    {
      "epoch": 1.4384045874982816,
      "grad_norm": 0.4533698260784149,
      "learning_rate": 7.170127106079875e-06,
      "loss": 0.0287,
      "step": 878940
    },
    {
      "epoch": 1.438437317936935,
      "grad_norm": 0.2773995101451874,
      "learning_rate": 7.170061213866358e-06,
      "loss": 0.0158,
      "step": 878960
    },
    {
      "epoch": 1.4384700483755883,
      "grad_norm": 0.9185671806335449,
      "learning_rate": 7.169995321652841e-06,
      "loss": 0.0224,
      "step": 878980
    },
    {
      "epoch": 1.4385027788142417,
      "grad_norm": 0.48392465710639954,
      "learning_rate": 7.169929429439324e-06,
      "loss": 0.0149,
      "step": 879000
    },
    {
      "epoch": 1.438535509252895,
      "grad_norm": 0.43549174070358276,
      "learning_rate": 7.169863537225806e-06,
      "loss": 0.018,
      "step": 879020
    },
    {
      "epoch": 1.4385682396915485,
      "grad_norm": 1.0641499757766724,
      "learning_rate": 7.16979764501229e-06,
      "loss": 0.0215,
      "step": 879040
    },
    {
      "epoch": 1.4386009701302016,
      "grad_norm": 0.2747008800506592,
      "learning_rate": 7.169731752798772e-06,
      "loss": 0.0256,
      "step": 879060
    },
    {
      "epoch": 1.438633700568855,
      "grad_norm": 0.507133424282074,
      "learning_rate": 7.169665860585255e-06,
      "loss": 0.0194,
      "step": 879080
    },
    {
      "epoch": 1.4386664310075084,
      "grad_norm": 1.637162446975708,
      "learning_rate": 7.169599968371738e-06,
      "loss": 0.0135,
      "step": 879100
    },
    {
      "epoch": 1.4386991614461617,
      "grad_norm": 0.5220798850059509,
      "learning_rate": 7.169534076158221e-06,
      "loss": 0.0164,
      "step": 879120
    },
    {
      "epoch": 1.438731891884815,
      "grad_norm": 1.0206509828567505,
      "learning_rate": 7.1694681839447035e-06,
      "loss": 0.0196,
      "step": 879140
    },
    {
      "epoch": 1.4387646223234682,
      "grad_norm": 0.7878243923187256,
      "learning_rate": 7.169402291731187e-06,
      "loss": 0.0181,
      "step": 879160
    },
    {
      "epoch": 1.4387973527621218,
      "grad_norm": 0.8417304754257202,
      "learning_rate": 7.16933639951767e-06,
      "loss": 0.0296,
      "step": 879180
    },
    {
      "epoch": 1.438830083200775,
      "grad_norm": 0.7721778154373169,
      "learning_rate": 7.1692705073041526e-06,
      "loss": 0.021,
      "step": 879200
    },
    {
      "epoch": 1.4388628136394284,
      "grad_norm": 0.4781521260738373,
      "learning_rate": 7.169204615090636e-06,
      "loss": 0.0175,
      "step": 879220
    },
    {
      "epoch": 1.4388955440780817,
      "grad_norm": 0.1357233077287674,
      "learning_rate": 7.169138722877118e-06,
      "loss": 0.0213,
      "step": 879240
    },
    {
      "epoch": 1.438928274516735,
      "grad_norm": 1.1407972574234009,
      "learning_rate": 7.169072830663602e-06,
      "loss": 0.0206,
      "step": 879260
    },
    {
      "epoch": 1.4389610049553885,
      "grad_norm": 0.5871217250823975,
      "learning_rate": 7.1690069384500835e-06,
      "loss": 0.0253,
      "step": 879280
    },
    {
      "epoch": 1.4389937353940416,
      "grad_norm": 0.38155224919319153,
      "learning_rate": 7.168941046236567e-06,
      "loss": 0.0176,
      "step": 879300
    },
    {
      "epoch": 1.4390264658326952,
      "grad_norm": 0.3106599748134613,
      "learning_rate": 7.16887515402305e-06,
      "loss": 0.0251,
      "step": 879320
    },
    {
      "epoch": 1.4390591962713484,
      "grad_norm": 0.5355949401855469,
      "learning_rate": 7.168809261809533e-06,
      "loss": 0.0256,
      "step": 879340
    },
    {
      "epoch": 1.4390919267100017,
      "grad_norm": 0.8302731513977051,
      "learning_rate": 7.168743369596015e-06,
      "loss": 0.0327,
      "step": 879360
    },
    {
      "epoch": 1.4391246571486551,
      "grad_norm": 0.4471748173236847,
      "learning_rate": 7.168677477382499e-06,
      "loss": 0.0155,
      "step": 879380
    },
    {
      "epoch": 1.4391573875873085,
      "grad_norm": 0.44762980937957764,
      "learning_rate": 7.168611585168981e-06,
      "loss": 0.0195,
      "step": 879400
    },
    {
      "epoch": 1.4391901180259619,
      "grad_norm": 0.9150891304016113,
      "learning_rate": 7.168545692955464e-06,
      "loss": 0.0178,
      "step": 879420
    },
    {
      "epoch": 1.439222848464615,
      "grad_norm": 0.2882785201072693,
      "learning_rate": 7.168479800741946e-06,
      "loss": 0.0172,
      "step": 879440
    },
    {
      "epoch": 1.4392555789032684,
      "grad_norm": 0.4184839725494385,
      "learning_rate": 7.16841390852843e-06,
      "loss": 0.0262,
      "step": 879460
    },
    {
      "epoch": 1.4392883093419218,
      "grad_norm": 0.4617919921875,
      "learning_rate": 7.168348016314912e-06,
      "loss": 0.0231,
      "step": 879480
    },
    {
      "epoch": 1.4393210397805751,
      "grad_norm": 0.1920219510793686,
      "learning_rate": 7.168282124101395e-06,
      "loss": 0.0295,
      "step": 879500
    },
    {
      "epoch": 1.4393537702192285,
      "grad_norm": 0.3959607481956482,
      "learning_rate": 7.168216231887878e-06,
      "loss": 0.0277,
      "step": 879520
    },
    {
      "epoch": 1.4393865006578819,
      "grad_norm": 1.088647484779358,
      "learning_rate": 7.168150339674361e-06,
      "loss": 0.0208,
      "step": 879540
    },
    {
      "epoch": 1.4394192310965352,
      "grad_norm": 0.820091962814331,
      "learning_rate": 7.168084447460844e-06,
      "loss": 0.0209,
      "step": 879560
    },
    {
      "epoch": 1.4394519615351884,
      "grad_norm": 0.4950856566429138,
      "learning_rate": 7.168018555247327e-06,
      "loss": 0.0284,
      "step": 879580
    },
    {
      "epoch": 1.4394846919738418,
      "grad_norm": 0.1502860188484192,
      "learning_rate": 7.167952663033811e-06,
      "loss": 0.0246,
      "step": 879600
    },
    {
      "epoch": 1.4395174224124951,
      "grad_norm": 0.9453514218330383,
      "learning_rate": 7.167886770820293e-06,
      "loss": 0.0173,
      "step": 879620
    },
    {
      "epoch": 1.4395501528511485,
      "grad_norm": 0.25328171253204346,
      "learning_rate": 7.167820878606776e-06,
      "loss": 0.0212,
      "step": 879640
    },
    {
      "epoch": 1.4395828832898019,
      "grad_norm": 0.5622564554214478,
      "learning_rate": 7.167754986393258e-06,
      "loss": 0.0213,
      "step": 879660
    },
    {
      "epoch": 1.4396156137284553,
      "grad_norm": 0.30606967210769653,
      "learning_rate": 7.167689094179742e-06,
      "loss": 0.0261,
      "step": 879680
    },
    {
      "epoch": 1.4396483441671086,
      "grad_norm": 0.7093189358711243,
      "learning_rate": 7.167623201966224e-06,
      "loss": 0.0165,
      "step": 879700
    },
    {
      "epoch": 1.4396810746057618,
      "grad_norm": 0.3606358468532562,
      "learning_rate": 7.167557309752707e-06,
      "loss": 0.0207,
      "step": 879720
    },
    {
      "epoch": 1.4397138050444152,
      "grad_norm": 1.4809191226959229,
      "learning_rate": 7.16749141753919e-06,
      "loss": 0.0214,
      "step": 879740
    },
    {
      "epoch": 1.4397465354830685,
      "grad_norm": 0.37861520051956177,
      "learning_rate": 7.167425525325673e-06,
      "loss": 0.0227,
      "step": 879760
    },
    {
      "epoch": 1.439779265921722,
      "grad_norm": 0.46492913365364075,
      "learning_rate": 7.167359633112155e-06,
      "loss": 0.0187,
      "step": 879780
    },
    {
      "epoch": 1.4398119963603753,
      "grad_norm": 0.2733754813671112,
      "learning_rate": 7.167293740898639e-06,
      "loss": 0.0353,
      "step": 879800
    },
    {
      "epoch": 1.4398447267990286,
      "grad_norm": 0.49725961685180664,
      "learning_rate": 7.167227848685121e-06,
      "loss": 0.0173,
      "step": 879820
    },
    {
      "epoch": 1.439877457237682,
      "grad_norm": 0.30167320370674133,
      "learning_rate": 7.1671619564716045e-06,
      "loss": 0.0188,
      "step": 879840
    },
    {
      "epoch": 1.4399101876763352,
      "grad_norm": 0.32196733355522156,
      "learning_rate": 7.167096064258086e-06,
      "loss": 0.0226,
      "step": 879860
    },
    {
      "epoch": 1.4399429181149885,
      "grad_norm": 0.4456672966480255,
      "learning_rate": 7.16703017204457e-06,
      "loss": 0.0285,
      "step": 879880
    },
    {
      "epoch": 1.439975648553642,
      "grad_norm": 0.7081783413887024,
      "learning_rate": 7.1669642798310535e-06,
      "loss": 0.0246,
      "step": 879900
    },
    {
      "epoch": 1.4400083789922953,
      "grad_norm": 0.5403560996055603,
      "learning_rate": 7.1668983876175354e-06,
      "loss": 0.0248,
      "step": 879920
    },
    {
      "epoch": 1.4400411094309487,
      "grad_norm": 0.2716529369354248,
      "learning_rate": 7.166832495404019e-06,
      "loss": 0.0246,
      "step": 879940
    },
    {
      "epoch": 1.4400738398696018,
      "grad_norm": 1.9683353900909424,
      "learning_rate": 7.166766603190502e-06,
      "loss": 0.0279,
      "step": 879960
    },
    {
      "epoch": 1.4401065703082554,
      "grad_norm": 0.23361733555793762,
      "learning_rate": 7.1667007109769845e-06,
      "loss": 0.0283,
      "step": 879980
    },
    {
      "epoch": 1.4401393007469085,
      "grad_norm": 0.5600242614746094,
      "learning_rate": 7.166634818763467e-06,
      "loss": 0.0194,
      "step": 880000
    },
    {
      "epoch": 1.440172031185562,
      "grad_norm": 0.9928276538848877,
      "learning_rate": 7.166568926549951e-06,
      "loss": 0.0158,
      "step": 880020
    },
    {
      "epoch": 1.4402047616242153,
      "grad_norm": 0.5003150105476379,
      "learning_rate": 7.166503034336433e-06,
      "loss": 0.0213,
      "step": 880040
    },
    {
      "epoch": 1.4402374920628687,
      "grad_norm": 0.7061837911605835,
      "learning_rate": 7.166437142122916e-06,
      "loss": 0.0131,
      "step": 880060
    },
    {
      "epoch": 1.440270222501522,
      "grad_norm": 0.7561649084091187,
      "learning_rate": 7.166371249909398e-06,
      "loss": 0.0173,
      "step": 880080
    },
    {
      "epoch": 1.4403029529401752,
      "grad_norm": 0.8200833201408386,
      "learning_rate": 7.166305357695882e-06,
      "loss": 0.0207,
      "step": 880100
    },
    {
      "epoch": 1.4403356833788288,
      "grad_norm": 0.40538540482521057,
      "learning_rate": 7.1662394654823645e-06,
      "loss": 0.024,
      "step": 880120
    },
    {
      "epoch": 1.440368413817482,
      "grad_norm": 0.3997204601764679,
      "learning_rate": 7.166173573268847e-06,
      "loss": 0.024,
      "step": 880140
    },
    {
      "epoch": 1.4404011442561353,
      "grad_norm": 1.0920491218566895,
      "learning_rate": 7.16610768105533e-06,
      "loss": 0.0149,
      "step": 880160
    },
    {
      "epoch": 1.4404338746947887,
      "grad_norm": 0.5847649574279785,
      "learning_rate": 7.166041788841814e-06,
      "loss": 0.0233,
      "step": 880180
    },
    {
      "epoch": 1.440466605133442,
      "grad_norm": 0.8859069347381592,
      "learning_rate": 7.1659758966282955e-06,
      "loss": 0.0227,
      "step": 880200
    },
    {
      "epoch": 1.4404993355720954,
      "grad_norm": 0.18169616162776947,
      "learning_rate": 7.165910004414779e-06,
      "loss": 0.0337,
      "step": 880220
    },
    {
      "epoch": 1.4405320660107486,
      "grad_norm": 0.9796154499053955,
      "learning_rate": 7.165844112201263e-06,
      "loss": 0.0237,
      "step": 880240
    },
    {
      "epoch": 1.440564796449402,
      "grad_norm": 1.0212085247039795,
      "learning_rate": 7.1657782199877445e-06,
      "loss": 0.028,
      "step": 880260
    },
    {
      "epoch": 1.4405975268880553,
      "grad_norm": 1.6902395486831665,
      "learning_rate": 7.165712327774228e-06,
      "loss": 0.0179,
      "step": 880280
    },
    {
      "epoch": 1.4406302573267087,
      "grad_norm": 0.4306008517742157,
      "learning_rate": 7.16564643556071e-06,
      "loss": 0.0228,
      "step": 880300
    },
    {
      "epoch": 1.440662987765362,
      "grad_norm": 0.21029698848724365,
      "learning_rate": 7.165580543347194e-06,
      "loss": 0.0212,
      "step": 880320
    },
    {
      "epoch": 1.4406957182040154,
      "grad_norm": 1.0798754692077637,
      "learning_rate": 7.165514651133676e-06,
      "loss": 0.0211,
      "step": 880340
    },
    {
      "epoch": 1.4407284486426688,
      "grad_norm": 0.2922961115837097,
      "learning_rate": 7.165448758920159e-06,
      "loss": 0.0154,
      "step": 880360
    },
    {
      "epoch": 1.440761179081322,
      "grad_norm": 0.1198320984840393,
      "learning_rate": 7.165382866706642e-06,
      "loss": 0.019,
      "step": 880380
    },
    {
      "epoch": 1.4407939095199753,
      "grad_norm": 0.6963407397270203,
      "learning_rate": 7.165316974493125e-06,
      "loss": 0.0211,
      "step": 880400
    },
    {
      "epoch": 1.4408266399586287,
      "grad_norm": 0.40031567215919495,
      "learning_rate": 7.165251082279607e-06,
      "loss": 0.0206,
      "step": 880420
    },
    {
      "epoch": 1.440859370397282,
      "grad_norm": 0.2351113259792328,
      "learning_rate": 7.165185190066091e-06,
      "loss": 0.0262,
      "step": 880440
    },
    {
      "epoch": 1.4408921008359354,
      "grad_norm": 0.2785480320453644,
      "learning_rate": 7.165119297852573e-06,
      "loss": 0.0285,
      "step": 880460
    },
    {
      "epoch": 1.4409248312745888,
      "grad_norm": 0.6466748118400574,
      "learning_rate": 7.165053405639056e-06,
      "loss": 0.0255,
      "step": 880480
    },
    {
      "epoch": 1.4409575617132422,
      "grad_norm": 0.6140119433403015,
      "learning_rate": 7.164987513425538e-06,
      "loss": 0.0226,
      "step": 880500
    },
    {
      "epoch": 1.4409902921518953,
      "grad_norm": 1.4682847261428833,
      "learning_rate": 7.164921621212022e-06,
      "loss": 0.0242,
      "step": 880520
    },
    {
      "epoch": 1.4410230225905487,
      "grad_norm": 0.23295539617538452,
      "learning_rate": 7.164855728998505e-06,
      "loss": 0.0155,
      "step": 880540
    },
    {
      "epoch": 1.441055753029202,
      "grad_norm": 0.7406415343284607,
      "learning_rate": 7.164789836784987e-06,
      "loss": 0.0229,
      "step": 880560
    },
    {
      "epoch": 1.4410884834678555,
      "grad_norm": 0.37135550379753113,
      "learning_rate": 7.16472394457147e-06,
      "loss": 0.0173,
      "step": 880580
    },
    {
      "epoch": 1.4411212139065088,
      "grad_norm": 0.44462692737579346,
      "learning_rate": 7.164658052357954e-06,
      "loss": 0.0247,
      "step": 880600
    },
    {
      "epoch": 1.4411539443451622,
      "grad_norm": 0.4150327146053314,
      "learning_rate": 7.164592160144436e-06,
      "loss": 0.0213,
      "step": 880620
    },
    {
      "epoch": 1.4411866747838156,
      "grad_norm": 0.7632132172584534,
      "learning_rate": 7.164526267930919e-06,
      "loss": 0.0174,
      "step": 880640
    },
    {
      "epoch": 1.4412194052224687,
      "grad_norm": 1.041733980178833,
      "learning_rate": 7.164460375717403e-06,
      "loss": 0.0146,
      "step": 880660
    },
    {
      "epoch": 1.441252135661122,
      "grad_norm": 0.5290741920471191,
      "learning_rate": 7.164394483503885e-06,
      "loss": 0.0201,
      "step": 880680
    },
    {
      "epoch": 1.4412848660997755,
      "grad_norm": 0.5481050610542297,
      "learning_rate": 7.164328591290368e-06,
      "loss": 0.0295,
      "step": 880700
    },
    {
      "epoch": 1.4413175965384288,
      "grad_norm": 0.40389567613601685,
      "learning_rate": 7.16426269907685e-06,
      "loss": 0.0243,
      "step": 880720
    },
    {
      "epoch": 1.4413503269770822,
      "grad_norm": 0.6286894679069519,
      "learning_rate": 7.164196806863334e-06,
      "loss": 0.0268,
      "step": 880740
    },
    {
      "epoch": 1.4413830574157354,
      "grad_norm": 0.22111588716506958,
      "learning_rate": 7.164130914649816e-06,
      "loss": 0.0198,
      "step": 880760
    },
    {
      "epoch": 1.441415787854389,
      "grad_norm": 0.4855402112007141,
      "learning_rate": 7.164065022436299e-06,
      "loss": 0.0173,
      "step": 880780
    },
    {
      "epoch": 1.441448518293042,
      "grad_norm": 2.7667555809020996,
      "learning_rate": 7.163999130222782e-06,
      "loss": 0.0219,
      "step": 880800
    },
    {
      "epoch": 1.4414812487316955,
      "grad_norm": 0.7197117209434509,
      "learning_rate": 7.1639332380092655e-06,
      "loss": 0.0144,
      "step": 880820
    },
    {
      "epoch": 1.4415139791703488,
      "grad_norm": 2.5603842735290527,
      "learning_rate": 7.163867345795747e-06,
      "loss": 0.0176,
      "step": 880840
    },
    {
      "epoch": 1.4415467096090022,
      "grad_norm": 0.41902998089790344,
      "learning_rate": 7.163801453582231e-06,
      "loss": 0.0191,
      "step": 880860
    },
    {
      "epoch": 1.4415794400476556,
      "grad_norm": 1.7434130907058716,
      "learning_rate": 7.163735561368713e-06,
      "loss": 0.0176,
      "step": 880880
    },
    {
      "epoch": 1.4416121704863087,
      "grad_norm": 0.5472232103347778,
      "learning_rate": 7.1636696691551964e-06,
      "loss": 0.0217,
      "step": 880900
    },
    {
      "epoch": 1.4416449009249621,
      "grad_norm": 0.7824000716209412,
      "learning_rate": 7.163603776941679e-06,
      "loss": 0.0163,
      "step": 880920
    },
    {
      "epoch": 1.4416776313636155,
      "grad_norm": 0.10127787292003632,
      "learning_rate": 7.163537884728162e-06,
      "loss": 0.017,
      "step": 880940
    },
    {
      "epoch": 1.4417103618022689,
      "grad_norm": 0.6424037218093872,
      "learning_rate": 7.1634719925146455e-06,
      "loss": 0.0265,
      "step": 880960
    },
    {
      "epoch": 1.4417430922409222,
      "grad_norm": 0.6858240365982056,
      "learning_rate": 7.163406100301128e-06,
      "loss": 0.0233,
      "step": 880980
    },
    {
      "epoch": 1.4417758226795756,
      "grad_norm": 1.4179205894470215,
      "learning_rate": 7.163340208087611e-06,
      "loss": 0.0189,
      "step": 881000
    },
    {
      "epoch": 1.441808553118229,
      "grad_norm": 0.6840240359306335,
      "learning_rate": 7.163274315874094e-06,
      "loss": 0.028,
      "step": 881020
    },
    {
      "epoch": 1.4418412835568821,
      "grad_norm": 0.4025411009788513,
      "learning_rate": 7.163208423660577e-06,
      "loss": 0.0174,
      "step": 881040
    },
    {
      "epoch": 1.4418740139955355,
      "grad_norm": 1.054882287979126,
      "learning_rate": 7.163142531447059e-06,
      "loss": 0.0187,
      "step": 881060
    },
    {
      "epoch": 1.4419067444341889,
      "grad_norm": 0.24592609703540802,
      "learning_rate": 7.163076639233543e-06,
      "loss": 0.0231,
      "step": 881080
    },
    {
      "epoch": 1.4419394748728422,
      "grad_norm": 0.7640995979309082,
      "learning_rate": 7.163010747020025e-06,
      "loss": 0.0146,
      "step": 881100
    },
    {
      "epoch": 1.4419722053114956,
      "grad_norm": 2.0454611778259277,
      "learning_rate": 7.162944854806508e-06,
      "loss": 0.0245,
      "step": 881120
    },
    {
      "epoch": 1.442004935750149,
      "grad_norm": 0.590192973613739,
      "learning_rate": 7.162878962592991e-06,
      "loss": 0.022,
      "step": 881140
    },
    {
      "epoch": 1.4420376661888024,
      "grad_norm": 0.7872475981712341,
      "learning_rate": 7.162813070379474e-06,
      "loss": 0.0199,
      "step": 881160
    },
    {
      "epoch": 1.4420703966274555,
      "grad_norm": 0.3846353590488434,
      "learning_rate": 7.1627471781659565e-06,
      "loss": 0.0174,
      "step": 881180
    },
    {
      "epoch": 1.4421031270661089,
      "grad_norm": 3.2302510738372803,
      "learning_rate": 7.16268128595244e-06,
      "loss": 0.0159,
      "step": 881200
    },
    {
      "epoch": 1.4421358575047623,
      "grad_norm": 0.23080512881278992,
      "learning_rate": 7.162615393738922e-06,
      "loss": 0.015,
      "step": 881220
    },
    {
      "epoch": 1.4421685879434156,
      "grad_norm": 0.44102713465690613,
      "learning_rate": 7.1625495015254056e-06,
      "loss": 0.0155,
      "step": 881240
    },
    {
      "epoch": 1.442201318382069,
      "grad_norm": 0.9740855097770691,
      "learning_rate": 7.1624836093118875e-06,
      "loss": 0.0372,
      "step": 881260
    },
    {
      "epoch": 1.4422340488207224,
      "grad_norm": 0.650888979434967,
      "learning_rate": 7.162417717098371e-06,
      "loss": 0.0115,
      "step": 881280
    },
    {
      "epoch": 1.4422667792593757,
      "grad_norm": 0.39937624335289,
      "learning_rate": 7.162351824884855e-06,
      "loss": 0.0169,
      "step": 881300
    },
    {
      "epoch": 1.442299509698029,
      "grad_norm": 0.5576843023300171,
      "learning_rate": 7.1622859326713365e-06,
      "loss": 0.0129,
      "step": 881320
    },
    {
      "epoch": 1.4423322401366823,
      "grad_norm": 0.17416393756866455,
      "learning_rate": 7.16222004045782e-06,
      "loss": 0.0171,
      "step": 881340
    },
    {
      "epoch": 1.4423649705753356,
      "grad_norm": 0.5335514545440674,
      "learning_rate": 7.162154148244303e-06,
      "loss": 0.0268,
      "step": 881360
    },
    {
      "epoch": 1.442397701013989,
      "grad_norm": 0.42317458987236023,
      "learning_rate": 7.162088256030786e-06,
      "loss": 0.0184,
      "step": 881380
    },
    {
      "epoch": 1.4424304314526424,
      "grad_norm": 0.33669373393058777,
      "learning_rate": 7.162022363817268e-06,
      "loss": 0.0208,
      "step": 881400
    },
    {
      "epoch": 1.4424631618912955,
      "grad_norm": 0.13698400557041168,
      "learning_rate": 7.161956471603752e-06,
      "loss": 0.0165,
      "step": 881420
    },
    {
      "epoch": 1.4424958923299491,
      "grad_norm": 0.17310768365859985,
      "learning_rate": 7.161890579390234e-06,
      "loss": 0.0172,
      "step": 881440
    },
    {
      "epoch": 1.4425286227686023,
      "grad_norm": 12.855627059936523,
      "learning_rate": 7.161824687176717e-06,
      "loss": 0.0169,
      "step": 881460
    },
    {
      "epoch": 1.4425613532072556,
      "grad_norm": 0.824009120464325,
      "learning_rate": 7.161758794963199e-06,
      "loss": 0.0181,
      "step": 881480
    },
    {
      "epoch": 1.442594083645909,
      "grad_norm": 1.1939973831176758,
      "learning_rate": 7.161692902749683e-06,
      "loss": 0.0243,
      "step": 881500
    },
    {
      "epoch": 1.4426268140845624,
      "grad_norm": 1.350570797920227,
      "learning_rate": 7.161627010536165e-06,
      "loss": 0.0251,
      "step": 881520
    },
    {
      "epoch": 1.4426595445232158,
      "grad_norm": 0.6669574975967407,
      "learning_rate": 7.161561118322648e-06,
      "loss": 0.0172,
      "step": 881540
    },
    {
      "epoch": 1.442692274961869,
      "grad_norm": 0.6533504128456116,
      "learning_rate": 7.161495226109131e-06,
      "loss": 0.0247,
      "step": 881560
    },
    {
      "epoch": 1.4427250054005225,
      "grad_norm": 0.8345761895179749,
      "learning_rate": 7.161429333895614e-06,
      "loss": 0.0225,
      "step": 881580
    },
    {
      "epoch": 1.4427577358391757,
      "grad_norm": 0.3443886935710907,
      "learning_rate": 7.1613634416820966e-06,
      "loss": 0.0206,
      "step": 881600
    },
    {
      "epoch": 1.442790466277829,
      "grad_norm": 0.9023642539978027,
      "learning_rate": 7.16129754946858e-06,
      "loss": 0.0209,
      "step": 881620
    },
    {
      "epoch": 1.4428231967164824,
      "grad_norm": 0.41827234625816345,
      "learning_rate": 7.161231657255063e-06,
      "loss": 0.0291,
      "step": 881640
    },
    {
      "epoch": 1.4428559271551358,
      "grad_norm": 1.5881386995315552,
      "learning_rate": 7.161165765041546e-06,
      "loss": 0.0324,
      "step": 881660
    },
    {
      "epoch": 1.4428886575937891,
      "grad_norm": 0.28453728556632996,
      "learning_rate": 7.161099872828029e-06,
      "loss": 0.0175,
      "step": 881680
    },
    {
      "epoch": 1.4429213880324423,
      "grad_norm": 0.3617616295814514,
      "learning_rate": 7.161033980614511e-06,
      "loss": 0.0141,
      "step": 881700
    },
    {
      "epoch": 1.4429541184710957,
      "grad_norm": 0.31394776701927185,
      "learning_rate": 7.160968088400995e-06,
      "loss": 0.0247,
      "step": 881720
    },
    {
      "epoch": 1.442986848909749,
      "grad_norm": 0.6754559278488159,
      "learning_rate": 7.160902196187477e-06,
      "loss": 0.0164,
      "step": 881740
    },
    {
      "epoch": 1.4430195793484024,
      "grad_norm": 0.9678961634635925,
      "learning_rate": 7.16083630397396e-06,
      "loss": 0.024,
      "step": 881760
    },
    {
      "epoch": 1.4430523097870558,
      "grad_norm": 0.5929534435272217,
      "learning_rate": 7.160770411760443e-06,
      "loss": 0.0145,
      "step": 881780
    },
    {
      "epoch": 1.4430850402257092,
      "grad_norm": 0.7491751909255981,
      "learning_rate": 7.160704519546926e-06,
      "loss": 0.0158,
      "step": 881800
    },
    {
      "epoch": 1.4431177706643625,
      "grad_norm": 0.9666696190834045,
      "learning_rate": 7.160638627333408e-06,
      "loss": 0.0287,
      "step": 881820
    },
    {
      "epoch": 1.4431505011030157,
      "grad_norm": 0.2461571842432022,
      "learning_rate": 7.160572735119892e-06,
      "loss": 0.0202,
      "step": 881840
    },
    {
      "epoch": 1.443183231541669,
      "grad_norm": 1.07429039478302,
      "learning_rate": 7.160506842906374e-06,
      "loss": 0.0258,
      "step": 881860
    },
    {
      "epoch": 1.4432159619803224,
      "grad_norm": 0.16856136918067932,
      "learning_rate": 7.1604409506928575e-06,
      "loss": 0.0179,
      "step": 881880
    },
    {
      "epoch": 1.4432486924189758,
      "grad_norm": 0.34192100167274475,
      "learning_rate": 7.160375058479339e-06,
      "loss": 0.0204,
      "step": 881900
    },
    {
      "epoch": 1.4432814228576292,
      "grad_norm": 1.5602360963821411,
      "learning_rate": 7.160309166265823e-06,
      "loss": 0.0239,
      "step": 881920
    },
    {
      "epoch": 1.4433141532962825,
      "grad_norm": 0.36231401562690735,
      "learning_rate": 7.160243274052306e-06,
      "loss": 0.019,
      "step": 881940
    },
    {
      "epoch": 1.443346883734936,
      "grad_norm": 0.6966169476509094,
      "learning_rate": 7.1601773818387884e-06,
      "loss": 0.0156,
      "step": 881960
    },
    {
      "epoch": 1.443379614173589,
      "grad_norm": 0.8542259931564331,
      "learning_rate": 7.160111489625271e-06,
      "loss": 0.0227,
      "step": 881980
    },
    {
      "epoch": 1.4434123446122424,
      "grad_norm": 1.8376178741455078,
      "learning_rate": 7.160045597411755e-06,
      "loss": 0.0163,
      "step": 882000
    },
    {
      "epoch": 1.4434450750508958,
      "grad_norm": 1.1182050704956055,
      "learning_rate": 7.1599797051982375e-06,
      "loss": 0.0182,
      "step": 882020
    },
    {
      "epoch": 1.4434778054895492,
      "grad_norm": 0.40469494462013245,
      "learning_rate": 7.15991381298472e-06,
      "loss": 0.0221,
      "step": 882040
    },
    {
      "epoch": 1.4435105359282026,
      "grad_norm": 0.5785514712333679,
      "learning_rate": 7.159847920771204e-06,
      "loss": 0.0138,
      "step": 882060
    },
    {
      "epoch": 1.443543266366856,
      "grad_norm": 0.18242277204990387,
      "learning_rate": 7.159782028557686e-06,
      "loss": 0.0171,
      "step": 882080
    },
    {
      "epoch": 1.4435759968055093,
      "grad_norm": 0.7280851602554321,
      "learning_rate": 7.159716136344169e-06,
      "loss": 0.0266,
      "step": 882100
    },
    {
      "epoch": 1.4436087272441624,
      "grad_norm": 0.3621402382850647,
      "learning_rate": 7.159650244130651e-06,
      "loss": 0.0242,
      "step": 882120
    },
    {
      "epoch": 1.4436414576828158,
      "grad_norm": 1.828853726387024,
      "learning_rate": 7.159584351917135e-06,
      "loss": 0.0195,
      "step": 882140
    },
    {
      "epoch": 1.4436741881214692,
      "grad_norm": 0.6451300978660583,
      "learning_rate": 7.1595184597036175e-06,
      "loss": 0.0211,
      "step": 882160
    },
    {
      "epoch": 1.4437069185601226,
      "grad_norm": 1.1615544557571411,
      "learning_rate": 7.1594525674901e-06,
      "loss": 0.0195,
      "step": 882180
    },
    {
      "epoch": 1.443739648998776,
      "grad_norm": 0.25692301988601685,
      "learning_rate": 7.159386675276583e-06,
      "loss": 0.0167,
      "step": 882200
    },
    {
      "epoch": 1.443772379437429,
      "grad_norm": 0.7027114629745483,
      "learning_rate": 7.159320783063067e-06,
      "loss": 0.0162,
      "step": 882220
    },
    {
      "epoch": 1.4438051098760827,
      "grad_norm": 0.2575860917568207,
      "learning_rate": 7.1592548908495485e-06,
      "loss": 0.0171,
      "step": 882240
    },
    {
      "epoch": 1.4438378403147358,
      "grad_norm": 0.4893980920314789,
      "learning_rate": 7.159188998636032e-06,
      "loss": 0.0254,
      "step": 882260
    },
    {
      "epoch": 1.4438705707533892,
      "grad_norm": 0.2321070283651352,
      "learning_rate": 7.159123106422514e-06,
      "loss": 0.0253,
      "step": 882280
    },
    {
      "epoch": 1.4439033011920426,
      "grad_norm": 0.6292281150817871,
      "learning_rate": 7.1590572142089975e-06,
      "loss": 0.0246,
      "step": 882300
    },
    {
      "epoch": 1.443936031630696,
      "grad_norm": 0.18052470684051514,
      "learning_rate": 7.1589913219954794e-06,
      "loss": 0.03,
      "step": 882320
    },
    {
      "epoch": 1.4439687620693493,
      "grad_norm": 0.3554922938346863,
      "learning_rate": 7.158925429781963e-06,
      "loss": 0.021,
      "step": 882340
    },
    {
      "epoch": 1.4440014925080025,
      "grad_norm": 1.835153579711914,
      "learning_rate": 7.158859537568447e-06,
      "loss": 0.0301,
      "step": 882360
    },
    {
      "epoch": 1.444034222946656,
      "grad_norm": 0.9083579182624817,
      "learning_rate": 7.158793645354929e-06,
      "loss": 0.0125,
      "step": 882380
    },
    {
      "epoch": 1.4440669533853092,
      "grad_norm": 1.196578860282898,
      "learning_rate": 7.158727753141412e-06,
      "loss": 0.0265,
      "step": 882400
    },
    {
      "epoch": 1.4440996838239626,
      "grad_norm": 0.4490964710712433,
      "learning_rate": 7.158661860927895e-06,
      "loss": 0.0249,
      "step": 882420
    },
    {
      "epoch": 1.444132414262616,
      "grad_norm": 0.2903425395488739,
      "learning_rate": 7.158595968714378e-06,
      "loss": 0.0218,
      "step": 882440
    },
    {
      "epoch": 1.4441651447012693,
      "grad_norm": 0.24563667178153992,
      "learning_rate": 7.15853007650086e-06,
      "loss": 0.0167,
      "step": 882460
    },
    {
      "epoch": 1.4441978751399227,
      "grad_norm": 0.5160938501358032,
      "learning_rate": 7.158464184287344e-06,
      "loss": 0.0208,
      "step": 882480
    },
    {
      "epoch": 1.4442306055785759,
      "grad_norm": 0.5817054510116577,
      "learning_rate": 7.158398292073826e-06,
      "loss": 0.0175,
      "step": 882500
    },
    {
      "epoch": 1.4442633360172292,
      "grad_norm": 1.0617363452911377,
      "learning_rate": 7.158332399860309e-06,
      "loss": 0.0228,
      "step": 882520
    },
    {
      "epoch": 1.4442960664558826,
      "grad_norm": 0.79248046875,
      "learning_rate": 7.158266507646791e-06,
      "loss": 0.023,
      "step": 882540
    },
    {
      "epoch": 1.444328796894536,
      "grad_norm": 0.20999418199062347,
      "learning_rate": 7.158200615433275e-06,
      "loss": 0.0276,
      "step": 882560
    },
    {
      "epoch": 1.4443615273331893,
      "grad_norm": 0.9619979858398438,
      "learning_rate": 7.158134723219758e-06,
      "loss": 0.0267,
      "step": 882580
    },
    {
      "epoch": 1.4443942577718427,
      "grad_norm": 0.15396662056446075,
      "learning_rate": 7.15806883100624e-06,
      "loss": 0.0192,
      "step": 882600
    },
    {
      "epoch": 1.444426988210496,
      "grad_norm": 0.4919871687889099,
      "learning_rate": 7.158002938792723e-06,
      "loss": 0.0226,
      "step": 882620
    },
    {
      "epoch": 1.4444597186491492,
      "grad_norm": 0.4536176919937134,
      "learning_rate": 7.157937046579207e-06,
      "loss": 0.0214,
      "step": 882640
    },
    {
      "epoch": 1.4444924490878026,
      "grad_norm": 0.4158720374107361,
      "learning_rate": 7.1578711543656886e-06,
      "loss": 0.0214,
      "step": 882660
    },
    {
      "epoch": 1.444525179526456,
      "grad_norm": 0.6113974452018738,
      "learning_rate": 7.157805262152172e-06,
      "loss": 0.0232,
      "step": 882680
    },
    {
      "epoch": 1.4445579099651094,
      "grad_norm": 0.8043476343154907,
      "learning_rate": 7.157739369938656e-06,
      "loss": 0.0196,
      "step": 882700
    },
    {
      "epoch": 1.4445906404037627,
      "grad_norm": 2.404172897338867,
      "learning_rate": 7.157673477725138e-06,
      "loss": 0.0272,
      "step": 882720
    },
    {
      "epoch": 1.444623370842416,
      "grad_norm": 0.3050779700279236,
      "learning_rate": 7.157607585511621e-06,
      "loss": 0.02,
      "step": 882740
    },
    {
      "epoch": 1.4446561012810695,
      "grad_norm": 1.1835932731628418,
      "learning_rate": 7.157541693298103e-06,
      "loss": 0.0373,
      "step": 882760
    },
    {
      "epoch": 1.4446888317197226,
      "grad_norm": 0.6012570858001709,
      "learning_rate": 7.157475801084587e-06,
      "loss": 0.0168,
      "step": 882780
    },
    {
      "epoch": 1.444721562158376,
      "grad_norm": 0.2937021851539612,
      "learning_rate": 7.1574099088710694e-06,
      "loss": 0.021,
      "step": 882800
    },
    {
      "epoch": 1.4447542925970294,
      "grad_norm": 0.2632516622543335,
      "learning_rate": 7.157344016657552e-06,
      "loss": 0.0211,
      "step": 882820
    },
    {
      "epoch": 1.4447870230356827,
      "grad_norm": 0.17964082956314087,
      "learning_rate": 7.157278124444035e-06,
      "loss": 0.0254,
      "step": 882840
    },
    {
      "epoch": 1.444819753474336,
      "grad_norm": 0.7649773359298706,
      "learning_rate": 7.1572122322305185e-06,
      "loss": 0.0198,
      "step": 882860
    },
    {
      "epoch": 1.4448524839129895,
      "grad_norm": 0.6296398639678955,
      "learning_rate": 7.157146340017e-06,
      "loss": 0.0162,
      "step": 882880
    },
    {
      "epoch": 1.4448852143516429,
      "grad_norm": 0.7990137338638306,
      "learning_rate": 7.157080447803484e-06,
      "loss": 0.0188,
      "step": 882900
    },
    {
      "epoch": 1.444917944790296,
      "grad_norm": 0.5867519378662109,
      "learning_rate": 7.157014555589966e-06,
      "loss": 0.0254,
      "step": 882920
    },
    {
      "epoch": 1.4449506752289494,
      "grad_norm": 0.3277939260005951,
      "learning_rate": 7.1569486633764495e-06,
      "loss": 0.0222,
      "step": 882940
    },
    {
      "epoch": 1.4449834056676027,
      "grad_norm": 0.5919301509857178,
      "learning_rate": 7.156882771162932e-06,
      "loss": 0.0213,
      "step": 882960
    },
    {
      "epoch": 1.4450161361062561,
      "grad_norm": 0.27564820647239685,
      "learning_rate": 7.156816878949415e-06,
      "loss": 0.0169,
      "step": 882980
    },
    {
      "epoch": 1.4450488665449095,
      "grad_norm": 0.9770641922950745,
      "learning_rate": 7.156750986735898e-06,
      "loss": 0.0213,
      "step": 883000
    },
    {
      "epoch": 1.4450815969835626,
      "grad_norm": 1.096157193183899,
      "learning_rate": 7.156685094522381e-06,
      "loss": 0.019,
      "step": 883020
    },
    {
      "epoch": 1.4451143274222162,
      "grad_norm": 0.7163402438163757,
      "learning_rate": 7.156619202308863e-06,
      "loss": 0.0205,
      "step": 883040
    },
    {
      "epoch": 1.4451470578608694,
      "grad_norm": 0.7638422846794128,
      "learning_rate": 7.156553310095347e-06,
      "loss": 0.0258,
      "step": 883060
    },
    {
      "epoch": 1.4451797882995228,
      "grad_norm": 0.1779364049434662,
      "learning_rate": 7.15648741788183e-06,
      "loss": 0.0161,
      "step": 883080
    },
    {
      "epoch": 1.4452125187381761,
      "grad_norm": 1.7749488353729248,
      "learning_rate": 7.156421525668312e-06,
      "loss": 0.0274,
      "step": 883100
    },
    {
      "epoch": 1.4452452491768295,
      "grad_norm": 0.7776759266853333,
      "learning_rate": 7.156355633454796e-06,
      "loss": 0.0175,
      "step": 883120
    },
    {
      "epoch": 1.4452779796154829,
      "grad_norm": 0.8568398356437683,
      "learning_rate": 7.156289741241278e-06,
      "loss": 0.0115,
      "step": 883140
    },
    {
      "epoch": 1.445310710054136,
      "grad_norm": 0.21581678092479706,
      "learning_rate": 7.156223849027761e-06,
      "loss": 0.0175,
      "step": 883160
    },
    {
      "epoch": 1.4453434404927896,
      "grad_norm": Infinity,
      "learning_rate": 7.156157956814244e-06,
      "loss": 0.0193,
      "step": 883180
    },
    {
      "epoch": 1.4453761709314428,
      "grad_norm": 1.0570623874664307,
      "learning_rate": 7.156092064600727e-06,
      "loss": 0.0217,
      "step": 883200
    },
    {
      "epoch": 1.4454089013700961,
      "grad_norm": 5.705778121948242,
      "learning_rate": 7.1560261723872095e-06,
      "loss": 0.0212,
      "step": 883220
    },
    {
      "epoch": 1.4454416318087495,
      "grad_norm": 0.20047760009765625,
      "learning_rate": 7.155960280173693e-06,
      "loss": 0.0176,
      "step": 883240
    },
    {
      "epoch": 1.4454743622474029,
      "grad_norm": 0.28276509046554565,
      "learning_rate": 7.155894387960175e-06,
      "loss": 0.0225,
      "step": 883260
    },
    {
      "epoch": 1.4455070926860563,
      "grad_norm": 1.066948652267456,
      "learning_rate": 7.1558284957466586e-06,
      "loss": 0.0164,
      "step": 883280
    },
    {
      "epoch": 1.4455398231247094,
      "grad_norm": 0.22041785717010498,
      "learning_rate": 7.1557626035331405e-06,
      "loss": 0.023,
      "step": 883300
    },
    {
      "epoch": 1.4455725535633628,
      "grad_norm": 0.34864118695259094,
      "learning_rate": 7.155696711319624e-06,
      "loss": 0.0128,
      "step": 883320
    },
    {
      "epoch": 1.4456052840020162,
      "grad_norm": 0.5727599859237671,
      "learning_rate": 7.155630819106106e-06,
      "loss": 0.0196,
      "step": 883340
    },
    {
      "epoch": 1.4456380144406695,
      "grad_norm": 0.743514358997345,
      "learning_rate": 7.1555649268925895e-06,
      "loss": 0.0205,
      "step": 883360
    },
    {
      "epoch": 1.445670744879323,
      "grad_norm": 0.6372822523117065,
      "learning_rate": 7.155499034679072e-06,
      "loss": 0.0236,
      "step": 883380
    },
    {
      "epoch": 1.4457034753179763,
      "grad_norm": 0.37786173820495605,
      "learning_rate": 7.155433142465555e-06,
      "loss": 0.0173,
      "step": 883400
    },
    {
      "epoch": 1.4457362057566296,
      "grad_norm": 1.551797866821289,
      "learning_rate": 7.155367250252039e-06,
      "loss": 0.0234,
      "step": 883420
    },
    {
      "epoch": 1.4457689361952828,
      "grad_norm": 0.47982850670814514,
      "learning_rate": 7.155301358038521e-06,
      "loss": 0.025,
      "step": 883440
    },
    {
      "epoch": 1.4458016666339362,
      "grad_norm": 0.4187161922454834,
      "learning_rate": 7.155235465825005e-06,
      "loss": 0.0183,
      "step": 883460
    },
    {
      "epoch": 1.4458343970725895,
      "grad_norm": 0.3930194675922394,
      "learning_rate": 7.155169573611487e-06,
      "loss": 0.0225,
      "step": 883480
    },
    {
      "epoch": 1.445867127511243,
      "grad_norm": 0.3315122723579407,
      "learning_rate": 7.15510368139797e-06,
      "loss": 0.021,
      "step": 883500
    },
    {
      "epoch": 1.4458998579498963,
      "grad_norm": 0.3209521472454071,
      "learning_rate": 7.155037789184452e-06,
      "loss": 0.0129,
      "step": 883520
    },
    {
      "epoch": 1.4459325883885497,
      "grad_norm": 0.46427151560783386,
      "learning_rate": 7.154971896970936e-06,
      "loss": 0.0176,
      "step": 883540
    },
    {
      "epoch": 1.445965318827203,
      "grad_norm": 0.17447088658809662,
      "learning_rate": 7.154906004757418e-06,
      "loss": 0.0237,
      "step": 883560
    },
    {
      "epoch": 1.4459980492658562,
      "grad_norm": 0.8658496141433716,
      "learning_rate": 7.154840112543901e-06,
      "loss": 0.0323,
      "step": 883580
    },
    {
      "epoch": 1.4460307797045096,
      "grad_norm": 0.34232062101364136,
      "learning_rate": 7.154774220330384e-06,
      "loss": 0.0216,
      "step": 883600
    },
    {
      "epoch": 1.446063510143163,
      "grad_norm": 0.25847455859184265,
      "learning_rate": 7.154708328116867e-06,
      "loss": 0.0224,
      "step": 883620
    },
    {
      "epoch": 1.4460962405818163,
      "grad_norm": 0.8342541456222534,
      "learning_rate": 7.15464243590335e-06,
      "loss": 0.02,
      "step": 883640
    },
    {
      "epoch": 1.4461289710204697,
      "grad_norm": 0.3996785879135132,
      "learning_rate": 7.154576543689833e-06,
      "loss": 0.0208,
      "step": 883660
    },
    {
      "epoch": 1.4461617014591228,
      "grad_norm": 0.6447994709014893,
      "learning_rate": 7.154510651476315e-06,
      "loss": 0.0181,
      "step": 883680
    },
    {
      "epoch": 1.4461944318977764,
      "grad_norm": 0.24956564605236053,
      "learning_rate": 7.154444759262799e-06,
      "loss": 0.0294,
      "step": 883700
    },
    {
      "epoch": 1.4462271623364296,
      "grad_norm": 0.2589099407196045,
      "learning_rate": 7.1543788670492805e-06,
      "loss": 0.0212,
      "step": 883720
    },
    {
      "epoch": 1.446259892775083,
      "grad_norm": 1.6433041095733643,
      "learning_rate": 7.154312974835764e-06,
      "loss": 0.0295,
      "step": 883740
    },
    {
      "epoch": 1.4462926232137363,
      "grad_norm": 0.8091703653335571,
      "learning_rate": 7.154247082622248e-06,
      "loss": 0.0312,
      "step": 883760
    },
    {
      "epoch": 1.4463253536523897,
      "grad_norm": 0.4357907474040985,
      "learning_rate": 7.15418119040873e-06,
      "loss": 0.0326,
      "step": 883780
    },
    {
      "epoch": 1.446358084091043,
      "grad_norm": 1.356968879699707,
      "learning_rate": 7.154115298195213e-06,
      "loss": 0.0291,
      "step": 883800
    },
    {
      "epoch": 1.4463908145296962,
      "grad_norm": 0.4849279820919037,
      "learning_rate": 7.154049405981696e-06,
      "loss": 0.0149,
      "step": 883820
    },
    {
      "epoch": 1.4464235449683498,
      "grad_norm": 0.8523041009902954,
      "learning_rate": 7.153983513768179e-06,
      "loss": 0.0214,
      "step": 883840
    },
    {
      "epoch": 1.446456275407003,
      "grad_norm": 0.7231737971305847,
      "learning_rate": 7.153917621554661e-06,
      "loss": 0.0293,
      "step": 883860
    },
    {
      "epoch": 1.4464890058456563,
      "grad_norm": 0.11336926370859146,
      "learning_rate": 7.153851729341145e-06,
      "loss": 0.0273,
      "step": 883880
    },
    {
      "epoch": 1.4465217362843097,
      "grad_norm": 1.2736455202102661,
      "learning_rate": 7.153785837127627e-06,
      "loss": 0.0262,
      "step": 883900
    },
    {
      "epoch": 1.446554466722963,
      "grad_norm": 1.130889892578125,
      "learning_rate": 7.1537199449141105e-06,
      "loss": 0.0256,
      "step": 883920
    },
    {
      "epoch": 1.4465871971616164,
      "grad_norm": 0.39506807923316956,
      "learning_rate": 7.153654052700592e-06,
      "loss": 0.0201,
      "step": 883940
    },
    {
      "epoch": 1.4466199276002696,
      "grad_norm": 0.1887008100748062,
      "learning_rate": 7.153588160487076e-06,
      "loss": 0.0189,
      "step": 883960
    },
    {
      "epoch": 1.446652658038923,
      "grad_norm": 0.8031992316246033,
      "learning_rate": 7.153522268273559e-06,
      "loss": 0.0256,
      "step": 883980
    },
    {
      "epoch": 1.4466853884775763,
      "grad_norm": 0.33840271830558777,
      "learning_rate": 7.1534563760600414e-06,
      "loss": 0.0175,
      "step": 884000
    },
    {
      "epoch": 1.4467181189162297,
      "grad_norm": 0.46657586097717285,
      "learning_rate": 7.153390483846524e-06,
      "loss": 0.0276,
      "step": 884020
    },
    {
      "epoch": 1.446750849354883,
      "grad_norm": 0.8902686834335327,
      "learning_rate": 7.153324591633008e-06,
      "loss": 0.0238,
      "step": 884040
    },
    {
      "epoch": 1.4467835797935364,
      "grad_norm": 0.6509311199188232,
      "learning_rate": 7.15325869941949e-06,
      "loss": 0.0241,
      "step": 884060
    },
    {
      "epoch": 1.4468163102321898,
      "grad_norm": 0.26693999767303467,
      "learning_rate": 7.153192807205973e-06,
      "loss": 0.0219,
      "step": 884080
    },
    {
      "epoch": 1.446849040670843,
      "grad_norm": 0.8498338460922241,
      "learning_rate": 7.153126914992455e-06,
      "loss": 0.0163,
      "step": 884100
    },
    {
      "epoch": 1.4468817711094963,
      "grad_norm": 0.6288871169090271,
      "learning_rate": 7.153061022778939e-06,
      "loss": 0.0259,
      "step": 884120
    },
    {
      "epoch": 1.4469145015481497,
      "grad_norm": 0.6468120813369751,
      "learning_rate": 7.152995130565422e-06,
      "loss": 0.0186,
      "step": 884140
    },
    {
      "epoch": 1.446947231986803,
      "grad_norm": 0.6489542722702026,
      "learning_rate": 7.152929238351904e-06,
      "loss": 0.0282,
      "step": 884160
    },
    {
      "epoch": 1.4469799624254565,
      "grad_norm": 0.38385123014450073,
      "learning_rate": 7.152863346138388e-06,
      "loss": 0.0193,
      "step": 884180
    },
    {
      "epoch": 1.4470126928641098,
      "grad_norm": 0.2658321261405945,
      "learning_rate": 7.1527974539248705e-06,
      "loss": 0.0162,
      "step": 884200
    },
    {
      "epoch": 1.4470454233027632,
      "grad_norm": 0.4602893888950348,
      "learning_rate": 7.152731561711353e-06,
      "loss": 0.0217,
      "step": 884220
    },
    {
      "epoch": 1.4470781537414164,
      "grad_norm": 0.5900290012359619,
      "learning_rate": 7.152665669497836e-06,
      "loss": 0.0166,
      "step": 884240
    },
    {
      "epoch": 1.4471108841800697,
      "grad_norm": 0.7952046394348145,
      "learning_rate": 7.15259977728432e-06,
      "loss": 0.0198,
      "step": 884260
    },
    {
      "epoch": 1.447143614618723,
      "grad_norm": 1.8570799827575684,
      "learning_rate": 7.1525338850708015e-06,
      "loss": 0.0173,
      "step": 884280
    },
    {
      "epoch": 1.4471763450573765,
      "grad_norm": 0.31864866614341736,
      "learning_rate": 7.152467992857285e-06,
      "loss": 0.0182,
      "step": 884300
    },
    {
      "epoch": 1.4472090754960298,
      "grad_norm": 1.8151741027832031,
      "learning_rate": 7.152402100643767e-06,
      "loss": 0.0293,
      "step": 884320
    },
    {
      "epoch": 1.4472418059346832,
      "grad_norm": 0.7859022617340088,
      "learning_rate": 7.1523362084302506e-06,
      "loss": 0.0174,
      "step": 884340
    },
    {
      "epoch": 1.4472745363733366,
      "grad_norm": 0.4341166317462921,
      "learning_rate": 7.1522703162167324e-06,
      "loss": 0.0181,
      "step": 884360
    },
    {
      "epoch": 1.4473072668119897,
      "grad_norm": 0.5856359004974365,
      "learning_rate": 7.152204424003216e-06,
      "loss": 0.0212,
      "step": 884380
    },
    {
      "epoch": 1.447339997250643,
      "grad_norm": 1.0634685754776,
      "learning_rate": 7.152138531789699e-06,
      "loss": 0.0214,
      "step": 884400
    },
    {
      "epoch": 1.4473727276892965,
      "grad_norm": 0.7206582427024841,
      "learning_rate": 7.1520726395761815e-06,
      "loss": 0.0228,
      "step": 884420
    },
    {
      "epoch": 1.4474054581279499,
      "grad_norm": 0.2594958543777466,
      "learning_rate": 7.152006747362664e-06,
      "loss": 0.023,
      "step": 884440
    },
    {
      "epoch": 1.4474381885666032,
      "grad_norm": 0.23081517219543457,
      "learning_rate": 7.151940855149148e-06,
      "loss": 0.0203,
      "step": 884460
    },
    {
      "epoch": 1.4474709190052564,
      "grad_norm": 0.41460683941841125,
      "learning_rate": 7.151874962935631e-06,
      "loss": 0.0187,
      "step": 884480
    },
    {
      "epoch": 1.44750364944391,
      "grad_norm": 0.6160280704498291,
      "learning_rate": 7.151809070722113e-06,
      "loss": 0.0212,
      "step": 884500
    },
    {
      "epoch": 1.4475363798825631,
      "grad_norm": 0.41618984937667847,
      "learning_rate": 7.151743178508597e-06,
      "loss": 0.0164,
      "step": 884520
    },
    {
      "epoch": 1.4475691103212165,
      "grad_norm": 0.6494208574295044,
      "learning_rate": 7.151677286295079e-06,
      "loss": 0.0188,
      "step": 884540
    },
    {
      "epoch": 1.4476018407598699,
      "grad_norm": 1.0461547374725342,
      "learning_rate": 7.151611394081562e-06,
      "loss": 0.0149,
      "step": 884560
    },
    {
      "epoch": 1.4476345711985232,
      "grad_norm": 0.49127882719039917,
      "learning_rate": 7.151545501868044e-06,
      "loss": 0.0122,
      "step": 884580
    },
    {
      "epoch": 1.4476673016371766,
      "grad_norm": 0.5533597469329834,
      "learning_rate": 7.151479609654528e-06,
      "loss": 0.0247,
      "step": 884600
    },
    {
      "epoch": 1.4477000320758298,
      "grad_norm": 0.48707181215286255,
      "learning_rate": 7.151413717441011e-06,
      "loss": 0.0304,
      "step": 884620
    },
    {
      "epoch": 1.4477327625144834,
      "grad_norm": 0.43286848068237305,
      "learning_rate": 7.151347825227493e-06,
      "loss": 0.0194,
      "step": 884640
    },
    {
      "epoch": 1.4477654929531365,
      "grad_norm": 0.9499614834785461,
      "learning_rate": 7.151281933013976e-06,
      "loss": 0.022,
      "step": 884660
    },
    {
      "epoch": 1.4477982233917899,
      "grad_norm": 0.06931223720312119,
      "learning_rate": 7.15121604080046e-06,
      "loss": 0.0171,
      "step": 884680
    },
    {
      "epoch": 1.4478309538304432,
      "grad_norm": 0.6473106741905212,
      "learning_rate": 7.1511501485869416e-06,
      "loss": 0.0166,
      "step": 884700
    },
    {
      "epoch": 1.4478636842690966,
      "grad_norm": 0.6530836820602417,
      "learning_rate": 7.151084256373425e-06,
      "loss": 0.0257,
      "step": 884720
    },
    {
      "epoch": 1.44789641470775,
      "grad_norm": 0.3021262288093567,
      "learning_rate": 7.151018364159907e-06,
      "loss": 0.0197,
      "step": 884740
    },
    {
      "epoch": 1.4479291451464031,
      "grad_norm": 1.0443006753921509,
      "learning_rate": 7.150952471946391e-06,
      "loss": 0.0224,
      "step": 884760
    },
    {
      "epoch": 1.4479618755850565,
      "grad_norm": 0.4722262918949127,
      "learning_rate": 7.150886579732873e-06,
      "loss": 0.024,
      "step": 884780
    },
    {
      "epoch": 1.4479946060237099,
      "grad_norm": 1.3447401523590088,
      "learning_rate": 7.150820687519356e-06,
      "loss": 0.0257,
      "step": 884800
    },
    {
      "epoch": 1.4480273364623633,
      "grad_norm": 0.20927739143371582,
      "learning_rate": 7.15075479530584e-06,
      "loss": 0.0154,
      "step": 884820
    },
    {
      "epoch": 1.4480600669010166,
      "grad_norm": 0.3110876977443695,
      "learning_rate": 7.1506889030923224e-06,
      "loss": 0.0242,
      "step": 884840
    },
    {
      "epoch": 1.44809279733967,
      "grad_norm": 0.6662421822547913,
      "learning_rate": 7.150623010878805e-06,
      "loss": 0.0155,
      "step": 884860
    },
    {
      "epoch": 1.4481255277783234,
      "grad_norm": 1.1683158874511719,
      "learning_rate": 7.150557118665288e-06,
      "loss": 0.0201,
      "step": 884880
    },
    {
      "epoch": 1.4481582582169765,
      "grad_norm": 0.5954521894454956,
      "learning_rate": 7.1504912264517715e-06,
      "loss": 0.0185,
      "step": 884900
    },
    {
      "epoch": 1.44819098865563,
      "grad_norm": 0.38358432054519653,
      "learning_rate": 7.150425334238253e-06,
      "loss": 0.0194,
      "step": 884920
    },
    {
      "epoch": 1.4482237190942833,
      "grad_norm": 0.7823034524917603,
      "learning_rate": 7.150359442024737e-06,
      "loss": 0.0262,
      "step": 884940
    },
    {
      "epoch": 1.4482564495329366,
      "grad_norm": 0.2869665324687958,
      "learning_rate": 7.150293549811219e-06,
      "loss": 0.0182,
      "step": 884960
    },
    {
      "epoch": 1.44828917997159,
      "grad_norm": 1.1125171184539795,
      "learning_rate": 7.1502276575977025e-06,
      "loss": 0.0208,
      "step": 884980
    },
    {
      "epoch": 1.4483219104102434,
      "grad_norm": 0.9916388392448425,
      "learning_rate": 7.150161765384185e-06,
      "loss": 0.0278,
      "step": 885000
    },
    {
      "epoch": 1.4483546408488968,
      "grad_norm": 0.7047531604766846,
      "learning_rate": 7.150095873170668e-06,
      "loss": 0.027,
      "step": 885020
    },
    {
      "epoch": 1.44838737128755,
      "grad_norm": 0.3278901278972626,
      "learning_rate": 7.150029980957151e-06,
      "loss": 0.0151,
      "step": 885040
    },
    {
      "epoch": 1.4484201017262033,
      "grad_norm": 0.16699205338954926,
      "learning_rate": 7.149964088743634e-06,
      "loss": 0.0286,
      "step": 885060
    },
    {
      "epoch": 1.4484528321648567,
      "grad_norm": 0.7623156309127808,
      "learning_rate": 7.149898196530116e-06,
      "loss": 0.0219,
      "step": 885080
    },
    {
      "epoch": 1.44848556260351,
      "grad_norm": 0.8331802487373352,
      "learning_rate": 7.1498323043166e-06,
      "loss": 0.0155,
      "step": 885100
    },
    {
      "epoch": 1.4485182930421634,
      "grad_norm": 0.9756042957305908,
      "learning_rate": 7.149766412103082e-06,
      "loss": 0.0224,
      "step": 885120
    },
    {
      "epoch": 1.4485510234808168,
      "grad_norm": 0.613567054271698,
      "learning_rate": 7.149700519889565e-06,
      "loss": 0.0156,
      "step": 885140
    },
    {
      "epoch": 1.4485837539194701,
      "grad_norm": 2.039154052734375,
      "learning_rate": 7.149634627676049e-06,
      "loss": 0.0284,
      "step": 885160
    },
    {
      "epoch": 1.4486164843581233,
      "grad_norm": 0.17139986157417297,
      "learning_rate": 7.149568735462531e-06,
      "loss": 0.0274,
      "step": 885180
    },
    {
      "epoch": 1.4486492147967767,
      "grad_norm": 0.6633995771408081,
      "learning_rate": 7.149502843249014e-06,
      "loss": 0.0167,
      "step": 885200
    },
    {
      "epoch": 1.44868194523543,
      "grad_norm": 0.29776814579963684,
      "learning_rate": 7.149436951035497e-06,
      "loss": 0.0159,
      "step": 885220
    },
    {
      "epoch": 1.4487146756740834,
      "grad_norm": 1.1806278228759766,
      "learning_rate": 7.14937105882198e-06,
      "loss": 0.0184,
      "step": 885240
    },
    {
      "epoch": 1.4487474061127368,
      "grad_norm": 0.8541815280914307,
      "learning_rate": 7.1493051666084625e-06,
      "loss": 0.0229,
      "step": 885260
    },
    {
      "epoch": 1.44878013655139,
      "grad_norm": 0.5365115404129028,
      "learning_rate": 7.149239274394946e-06,
      "loss": 0.0234,
      "step": 885280
    },
    {
      "epoch": 1.4488128669900435,
      "grad_norm": 0.36208000779151917,
      "learning_rate": 7.149173382181428e-06,
      "loss": 0.0262,
      "step": 885300
    },
    {
      "epoch": 1.4488455974286967,
      "grad_norm": 0.8707485198974609,
      "learning_rate": 7.149107489967912e-06,
      "loss": 0.0184,
      "step": 885320
    },
    {
      "epoch": 1.44887832786735,
      "grad_norm": 0.43573716282844543,
      "learning_rate": 7.1490415977543935e-06,
      "loss": 0.03,
      "step": 885340
    },
    {
      "epoch": 1.4489110583060034,
      "grad_norm": 0.48366206884384155,
      "learning_rate": 7.148975705540877e-06,
      "loss": 0.0174,
      "step": 885360
    },
    {
      "epoch": 1.4489437887446568,
      "grad_norm": 0.7364054918289185,
      "learning_rate": 7.148909813327359e-06,
      "loss": 0.025,
      "step": 885380
    },
    {
      "epoch": 1.4489765191833102,
      "grad_norm": 0.5855767130851746,
      "learning_rate": 7.1488439211138425e-06,
      "loss": 0.0197,
      "step": 885400
    },
    {
      "epoch": 1.4490092496219633,
      "grad_norm": 0.9091156125068665,
      "learning_rate": 7.148778028900325e-06,
      "loss": 0.0184,
      "step": 885420
    },
    {
      "epoch": 1.449041980060617,
      "grad_norm": 1.2044497728347778,
      "learning_rate": 7.148712136686808e-06,
      "loss": 0.0201,
      "step": 885440
    },
    {
      "epoch": 1.44907471049927,
      "grad_norm": 0.633298397064209,
      "learning_rate": 7.148646244473291e-06,
      "loss": 0.026,
      "step": 885460
    },
    {
      "epoch": 1.4491074409379234,
      "grad_norm": 1.7504918575286865,
      "learning_rate": 7.148580352259774e-06,
      "loss": 0.0216,
      "step": 885480
    },
    {
      "epoch": 1.4491401713765768,
      "grad_norm": 1.1388999223709106,
      "learning_rate": 7.148514460046256e-06,
      "loss": 0.0269,
      "step": 885500
    },
    {
      "epoch": 1.4491729018152302,
      "grad_norm": 1.4316998720169067,
      "learning_rate": 7.14844856783274e-06,
      "loss": 0.019,
      "step": 885520
    },
    {
      "epoch": 1.4492056322538835,
      "grad_norm": 0.5634130239486694,
      "learning_rate": 7.148382675619223e-06,
      "loss": 0.0207,
      "step": 885540
    },
    {
      "epoch": 1.4492383626925367,
      "grad_norm": 0.5401791334152222,
      "learning_rate": 7.148316783405705e-06,
      "loss": 0.0165,
      "step": 885560
    },
    {
      "epoch": 1.44927109313119,
      "grad_norm": 0.3618081212043762,
      "learning_rate": 7.148250891192189e-06,
      "loss": 0.0166,
      "step": 885580
    },
    {
      "epoch": 1.4493038235698434,
      "grad_norm": 1.5182870626449585,
      "learning_rate": 7.148184998978671e-06,
      "loss": 0.0236,
      "step": 885600
    },
    {
      "epoch": 1.4493365540084968,
      "grad_norm": 0.685905933380127,
      "learning_rate": 7.148119106765154e-06,
      "loss": 0.027,
      "step": 885620
    },
    {
      "epoch": 1.4493692844471502,
      "grad_norm": 0.5341683626174927,
      "learning_rate": 7.148053214551637e-06,
      "loss": 0.0178,
      "step": 885640
    },
    {
      "epoch": 1.4494020148858036,
      "grad_norm": 1.0348135232925415,
      "learning_rate": 7.14798732233812e-06,
      "loss": 0.0221,
      "step": 885660
    },
    {
      "epoch": 1.449434745324457,
      "grad_norm": 1.1421302556991577,
      "learning_rate": 7.147921430124603e-06,
      "loss": 0.0305,
      "step": 885680
    },
    {
      "epoch": 1.44946747576311,
      "grad_norm": 1.080898404121399,
      "learning_rate": 7.147855537911086e-06,
      "loss": 0.0254,
      "step": 885700
    },
    {
      "epoch": 1.4495002062017635,
      "grad_norm": 0.5170215368270874,
      "learning_rate": 7.147789645697568e-06,
      "loss": 0.0197,
      "step": 885720
    },
    {
      "epoch": 1.4495329366404168,
      "grad_norm": 0.3925236165523529,
      "learning_rate": 7.147723753484052e-06,
      "loss": 0.0158,
      "step": 885740
    },
    {
      "epoch": 1.4495656670790702,
      "grad_norm": 2.161051034927368,
      "learning_rate": 7.1476578612705335e-06,
      "loss": 0.0199,
      "step": 885760
    },
    {
      "epoch": 1.4495983975177236,
      "grad_norm": 0.8130348324775696,
      "learning_rate": 7.147591969057017e-06,
      "loss": 0.02,
      "step": 885780
    },
    {
      "epoch": 1.449631127956377,
      "grad_norm": 0.21166405081748962,
      "learning_rate": 7.1475260768435e-06,
      "loss": 0.019,
      "step": 885800
    },
    {
      "epoch": 1.4496638583950303,
      "grad_norm": 0.6439740061759949,
      "learning_rate": 7.147460184629983e-06,
      "loss": 0.0192,
      "step": 885820
    },
    {
      "epoch": 1.4496965888336835,
      "grad_norm": 0.4558325707912445,
      "learning_rate": 7.147394292416465e-06,
      "loss": 0.0197,
      "step": 885840
    },
    {
      "epoch": 1.4497293192723368,
      "grad_norm": 0.6812036633491516,
      "learning_rate": 7.147328400202949e-06,
      "loss": 0.033,
      "step": 885860
    },
    {
      "epoch": 1.4497620497109902,
      "grad_norm": 0.8087950944900513,
      "learning_rate": 7.147262507989432e-06,
      "loss": 0.0278,
      "step": 885880
    },
    {
      "epoch": 1.4497947801496436,
      "grad_norm": 0.6309386491775513,
      "learning_rate": 7.147196615775914e-06,
      "loss": 0.0228,
      "step": 885900
    },
    {
      "epoch": 1.449827510588297,
      "grad_norm": 0.9312142133712769,
      "learning_rate": 7.147130723562398e-06,
      "loss": 0.0229,
      "step": 885920
    },
    {
      "epoch": 1.4498602410269503,
      "grad_norm": 0.48551106452941895,
      "learning_rate": 7.14706483134888e-06,
      "loss": 0.0171,
      "step": 885940
    },
    {
      "epoch": 1.4498929714656037,
      "grad_norm": 0.3683474659919739,
      "learning_rate": 7.1469989391353635e-06,
      "loss": 0.0259,
      "step": 885960
    },
    {
      "epoch": 1.4499257019042568,
      "grad_norm": 1.0624696016311646,
      "learning_rate": 7.146933046921845e-06,
      "loss": 0.0365,
      "step": 885980
    },
    {
      "epoch": 1.4499584323429102,
      "grad_norm": 2.2944538593292236,
      "learning_rate": 7.146867154708329e-06,
      "loss": 0.0221,
      "step": 886000
    },
    {
      "epoch": 1.4499911627815636,
      "grad_norm": 1.2636076211929321,
      "learning_rate": 7.146801262494812e-06,
      "loss": 0.0198,
      "step": 886020
    },
    {
      "epoch": 1.450023893220217,
      "grad_norm": 0.6191143989562988,
      "learning_rate": 7.1467353702812944e-06,
      "loss": 0.0274,
      "step": 886040
    },
    {
      "epoch": 1.4500566236588703,
      "grad_norm": 0.6900316476821899,
      "learning_rate": 7.146669478067777e-06,
      "loss": 0.02,
      "step": 886060
    },
    {
      "epoch": 1.4500893540975235,
      "grad_norm": 1.4739439487457275,
      "learning_rate": 7.146603585854261e-06,
      "loss": 0.0232,
      "step": 886080
    },
    {
      "epoch": 1.450122084536177,
      "grad_norm": 0.3584257662296295,
      "learning_rate": 7.146537693640743e-06,
      "loss": 0.019,
      "step": 886100
    },
    {
      "epoch": 1.4501548149748302,
      "grad_norm": 0.3255372643470764,
      "learning_rate": 7.146471801427226e-06,
      "loss": 0.014,
      "step": 886120
    },
    {
      "epoch": 1.4501875454134836,
      "grad_norm": 0.34804898500442505,
      "learning_rate": 7.146405909213708e-06,
      "loss": 0.0212,
      "step": 886140
    },
    {
      "epoch": 1.450220275852137,
      "grad_norm": 0.8039611577987671,
      "learning_rate": 7.146340017000192e-06,
      "loss": 0.0232,
      "step": 886160
    },
    {
      "epoch": 1.4502530062907903,
      "grad_norm": 0.32196715474128723,
      "learning_rate": 7.146274124786674e-06,
      "loss": 0.0151,
      "step": 886180
    },
    {
      "epoch": 1.4502857367294437,
      "grad_norm": 0.4042729437351227,
      "learning_rate": 7.146208232573157e-06,
      "loss": 0.023,
      "step": 886200
    },
    {
      "epoch": 1.4503184671680969,
      "grad_norm": 0.5060868263244629,
      "learning_rate": 7.146142340359641e-06,
      "loss": 0.0171,
      "step": 886220
    },
    {
      "epoch": 1.4503511976067505,
      "grad_norm": 0.5825477838516235,
      "learning_rate": 7.1460764481461235e-06,
      "loss": 0.0201,
      "step": 886240
    },
    {
      "epoch": 1.4503839280454036,
      "grad_norm": 0.5653811693191528,
      "learning_rate": 7.146010555932606e-06,
      "loss": 0.0146,
      "step": 886260
    },
    {
      "epoch": 1.450416658484057,
      "grad_norm": 0.42195025086402893,
      "learning_rate": 7.145944663719089e-06,
      "loss": 0.0219,
      "step": 886280
    },
    {
      "epoch": 1.4504493889227104,
      "grad_norm": 0.6081669926643372,
      "learning_rate": 7.145878771505573e-06,
      "loss": 0.019,
      "step": 886300
    },
    {
      "epoch": 1.4504821193613637,
      "grad_norm": 0.648917555809021,
      "learning_rate": 7.1458128792920545e-06,
      "loss": 0.0236,
      "step": 886320
    },
    {
      "epoch": 1.450514849800017,
      "grad_norm": 0.8023191094398499,
      "learning_rate": 7.145746987078538e-06,
      "loss": 0.0237,
      "step": 886340
    },
    {
      "epoch": 1.4505475802386703,
      "grad_norm": 0.7486480474472046,
      "learning_rate": 7.14568109486502e-06,
      "loss": 0.0225,
      "step": 886360
    },
    {
      "epoch": 1.4505803106773236,
      "grad_norm": 0.9985718131065369,
      "learning_rate": 7.1456152026515036e-06,
      "loss": 0.0258,
      "step": 886380
    },
    {
      "epoch": 1.450613041115977,
      "grad_norm": 0.4209097623825073,
      "learning_rate": 7.1455493104379855e-06,
      "loss": 0.0239,
      "step": 886400
    },
    {
      "epoch": 1.4506457715546304,
      "grad_norm": 0.7993611693382263,
      "learning_rate": 7.145483418224469e-06,
      "loss": 0.0207,
      "step": 886420
    },
    {
      "epoch": 1.4506785019932837,
      "grad_norm": 0.32207322120666504,
      "learning_rate": 7.145417526010952e-06,
      "loss": 0.02,
      "step": 886440
    },
    {
      "epoch": 1.4507112324319371,
      "grad_norm": 0.2719574570655823,
      "learning_rate": 7.1453516337974345e-06,
      "loss": 0.0256,
      "step": 886460
    },
    {
      "epoch": 1.4507439628705905,
      "grad_norm": 0.1447536051273346,
      "learning_rate": 7.145285741583917e-06,
      "loss": 0.0188,
      "step": 886480
    },
    {
      "epoch": 1.4507766933092436,
      "grad_norm": 0.414022833108902,
      "learning_rate": 7.145219849370401e-06,
      "loss": 0.0241,
      "step": 886500
    },
    {
      "epoch": 1.450809423747897,
      "grad_norm": 1.6960679292678833,
      "learning_rate": 7.145153957156883e-06,
      "loss": 0.0189,
      "step": 886520
    },
    {
      "epoch": 1.4508421541865504,
      "grad_norm": 0.33707085251808167,
      "learning_rate": 7.145088064943366e-06,
      "loss": 0.0203,
      "step": 886540
    },
    {
      "epoch": 1.4508748846252038,
      "grad_norm": 0.3527804911136627,
      "learning_rate": 7.145022172729848e-06,
      "loss": 0.0169,
      "step": 886560
    },
    {
      "epoch": 1.4509076150638571,
      "grad_norm": 0.4131210744380951,
      "learning_rate": 7.144956280516332e-06,
      "loss": 0.0178,
      "step": 886580
    },
    {
      "epoch": 1.4509403455025105,
      "grad_norm": 0.3070225119590759,
      "learning_rate": 7.144890388302815e-06,
      "loss": 0.0214,
      "step": 886600
    },
    {
      "epoch": 1.4509730759411639,
      "grad_norm": 0.12744678556919098,
      "learning_rate": 7.144824496089297e-06,
      "loss": 0.0317,
      "step": 886620
    },
    {
      "epoch": 1.451005806379817,
      "grad_norm": 0.4246314764022827,
      "learning_rate": 7.144758603875781e-06,
      "loss": 0.0181,
      "step": 886640
    },
    {
      "epoch": 1.4510385368184704,
      "grad_norm": 0.3641929626464844,
      "learning_rate": 7.144692711662264e-06,
      "loss": 0.0293,
      "step": 886660
    },
    {
      "epoch": 1.4510712672571238,
      "grad_norm": 1.030783772468567,
      "learning_rate": 7.144626819448746e-06,
      "loss": 0.022,
      "step": 886680
    },
    {
      "epoch": 1.4511039976957771,
      "grad_norm": 0.46600842475891113,
      "learning_rate": 7.144560927235229e-06,
      "loss": 0.0214,
      "step": 886700
    },
    {
      "epoch": 1.4511367281344305,
      "grad_norm": 1.0828970670700073,
      "learning_rate": 7.144495035021713e-06,
      "loss": 0.0239,
      "step": 886720
    },
    {
      "epoch": 1.4511694585730837,
      "grad_norm": 0.40911126136779785,
      "learning_rate": 7.1444291428081946e-06,
      "loss": 0.0277,
      "step": 886740
    },
    {
      "epoch": 1.4512021890117373,
      "grad_norm": 0.44701409339904785,
      "learning_rate": 7.144363250594678e-06,
      "loss": 0.0229,
      "step": 886760
    },
    {
      "epoch": 1.4512349194503904,
      "grad_norm": 0.9132294058799744,
      "learning_rate": 7.14429735838116e-06,
      "loss": 0.0235,
      "step": 886780
    },
    {
      "epoch": 1.4512676498890438,
      "grad_norm": 0.7833662629127502,
      "learning_rate": 7.144231466167644e-06,
      "loss": 0.0199,
      "step": 886800
    },
    {
      "epoch": 1.4513003803276971,
      "grad_norm": 0.9413299560546875,
      "learning_rate": 7.144165573954126e-06,
      "loss": 0.0153,
      "step": 886820
    },
    {
      "epoch": 1.4513331107663505,
      "grad_norm": 1.8745886087417603,
      "learning_rate": 7.144099681740609e-06,
      "loss": 0.0212,
      "step": 886840
    },
    {
      "epoch": 1.451365841205004,
      "grad_norm": 0.4718970060348511,
      "learning_rate": 7.144033789527092e-06,
      "loss": 0.0173,
      "step": 886860
    },
    {
      "epoch": 1.451398571643657,
      "grad_norm": 0.5719605684280396,
      "learning_rate": 7.1439678973135754e-06,
      "loss": 0.0221,
      "step": 886880
    },
    {
      "epoch": 1.4514313020823106,
      "grad_norm": 0.42554205656051636,
      "learning_rate": 7.143902005100057e-06,
      "loss": 0.0239,
      "step": 886900
    },
    {
      "epoch": 1.4514640325209638,
      "grad_norm": 0.48892247676849365,
      "learning_rate": 7.143836112886541e-06,
      "loss": 0.0245,
      "step": 886920
    },
    {
      "epoch": 1.4514967629596172,
      "grad_norm": 0.8242795467376709,
      "learning_rate": 7.1437702206730245e-06,
      "loss": 0.0215,
      "step": 886940
    },
    {
      "epoch": 1.4515294933982705,
      "grad_norm": 0.40373292565345764,
      "learning_rate": 7.143704328459506e-06,
      "loss": 0.0113,
      "step": 886960
    },
    {
      "epoch": 1.451562223836924,
      "grad_norm": 0.3591970205307007,
      "learning_rate": 7.14363843624599e-06,
      "loss": 0.0172,
      "step": 886980
    },
    {
      "epoch": 1.4515949542755773,
      "grad_norm": 0.8721946477890015,
      "learning_rate": 7.143572544032472e-06,
      "loss": 0.0137,
      "step": 887000
    },
    {
      "epoch": 1.4516276847142304,
      "grad_norm": 0.3021826446056366,
      "learning_rate": 7.1435066518189555e-06,
      "loss": 0.0195,
      "step": 887020
    },
    {
      "epoch": 1.4516604151528838,
      "grad_norm": 0.34726688265800476,
      "learning_rate": 7.143440759605438e-06,
      "loss": 0.0247,
      "step": 887040
    },
    {
      "epoch": 1.4516931455915372,
      "grad_norm": 1.012361764907837,
      "learning_rate": 7.143374867391921e-06,
      "loss": 0.0222,
      "step": 887060
    },
    {
      "epoch": 1.4517258760301905,
      "grad_norm": 0.4423518180847168,
      "learning_rate": 7.143308975178404e-06,
      "loss": 0.0249,
      "step": 887080
    },
    {
      "epoch": 1.451758606468844,
      "grad_norm": 0.4434238374233246,
      "learning_rate": 7.143243082964887e-06,
      "loss": 0.0168,
      "step": 887100
    },
    {
      "epoch": 1.4517913369074973,
      "grad_norm": 0.36738070845603943,
      "learning_rate": 7.143177190751369e-06,
      "loss": 0.0191,
      "step": 887120
    },
    {
      "epoch": 1.4518240673461507,
      "grad_norm": 0.23778600990772247,
      "learning_rate": 7.143111298537853e-06,
      "loss": 0.0184,
      "step": 887140
    },
    {
      "epoch": 1.4518567977848038,
      "grad_norm": 0.8379380702972412,
      "learning_rate": 7.143045406324335e-06,
      "loss": 0.0216,
      "step": 887160
    },
    {
      "epoch": 1.4518895282234572,
      "grad_norm": 0.6641675233840942,
      "learning_rate": 7.142979514110818e-06,
      "loss": 0.0282,
      "step": 887180
    },
    {
      "epoch": 1.4519222586621106,
      "grad_norm": 0.38643619418144226,
      "learning_rate": 7.1429136218973e-06,
      "loss": 0.023,
      "step": 887200
    },
    {
      "epoch": 1.451954989100764,
      "grad_norm": 0.710833728313446,
      "learning_rate": 7.142847729683784e-06,
      "loss": 0.0206,
      "step": 887220
    },
    {
      "epoch": 1.4519877195394173,
      "grad_norm": 0.6199996471405029,
      "learning_rate": 7.1427818374702665e-06,
      "loss": 0.0254,
      "step": 887240
    },
    {
      "epoch": 1.4520204499780707,
      "grad_norm": 0.35388898849487305,
      "learning_rate": 7.142715945256749e-06,
      "loss": 0.0158,
      "step": 887260
    },
    {
      "epoch": 1.452053180416724,
      "grad_norm": 0.6358485221862793,
      "learning_rate": 7.142650053043233e-06,
      "loss": 0.0216,
      "step": 887280
    },
    {
      "epoch": 1.4520859108553772,
      "grad_norm": 1.0915106534957886,
      "learning_rate": 7.1425841608297155e-06,
      "loss": 0.0186,
      "step": 887300
    },
    {
      "epoch": 1.4521186412940306,
      "grad_norm": 0.3857206702232361,
      "learning_rate": 7.142518268616198e-06,
      "loss": 0.019,
      "step": 887320
    },
    {
      "epoch": 1.452151371732684,
      "grad_norm": 0.7546377778053284,
      "learning_rate": 7.142452376402681e-06,
      "loss": 0.0276,
      "step": 887340
    },
    {
      "epoch": 1.4521841021713373,
      "grad_norm": 0.4313499331474304,
      "learning_rate": 7.142386484189165e-06,
      "loss": 0.0147,
      "step": 887360
    },
    {
      "epoch": 1.4522168326099907,
      "grad_norm": 0.9361168146133423,
      "learning_rate": 7.1423205919756465e-06,
      "loss": 0.015,
      "step": 887380
    },
    {
      "epoch": 1.452249563048644,
      "grad_norm": 0.4112280011177063,
      "learning_rate": 7.14225469976213e-06,
      "loss": 0.0197,
      "step": 887400
    },
    {
      "epoch": 1.4522822934872974,
      "grad_norm": 1.6700336933135986,
      "learning_rate": 7.142188807548612e-06,
      "loss": 0.0323,
      "step": 887420
    },
    {
      "epoch": 1.4523150239259506,
      "grad_norm": 5.4625139236450195,
      "learning_rate": 7.1421229153350955e-06,
      "loss": 0.0221,
      "step": 887440
    },
    {
      "epoch": 1.452347754364604,
      "grad_norm": 1.166661262512207,
      "learning_rate": 7.142057023121578e-06,
      "loss": 0.0223,
      "step": 887460
    },
    {
      "epoch": 1.4523804848032573,
      "grad_norm": 0.9517565965652466,
      "learning_rate": 7.141991130908061e-06,
      "loss": 0.0232,
      "step": 887480
    },
    {
      "epoch": 1.4524132152419107,
      "grad_norm": 1.1182448863983154,
      "learning_rate": 7.141925238694544e-06,
      "loss": 0.0275,
      "step": 887500
    },
    {
      "epoch": 1.452445945680564,
      "grad_norm": 0.9748871922492981,
      "learning_rate": 7.141859346481027e-06,
      "loss": 0.0223,
      "step": 887520
    },
    {
      "epoch": 1.4524786761192172,
      "grad_norm": 0.3353419601917267,
      "learning_rate": 7.141793454267509e-06,
      "loss": 0.0252,
      "step": 887540
    },
    {
      "epoch": 1.4525114065578708,
      "grad_norm": 0.5826057195663452,
      "learning_rate": 7.141727562053993e-06,
      "loss": 0.0201,
      "step": 887560
    },
    {
      "epoch": 1.452544136996524,
      "grad_norm": 0.11072289943695068,
      "learning_rate": 7.141661669840475e-06,
      "loss": 0.0171,
      "step": 887580
    },
    {
      "epoch": 1.4525768674351773,
      "grad_norm": 0.39409464597702026,
      "learning_rate": 7.141595777626958e-06,
      "loss": 0.0274,
      "step": 887600
    },
    {
      "epoch": 1.4526095978738307,
      "grad_norm": 0.8662322759628296,
      "learning_rate": 7.141529885413442e-06,
      "loss": 0.021,
      "step": 887620
    },
    {
      "epoch": 1.452642328312484,
      "grad_norm": 0.33345699310302734,
      "learning_rate": 7.141463993199924e-06,
      "loss": 0.023,
      "step": 887640
    },
    {
      "epoch": 1.4526750587511374,
      "grad_norm": 0.7371454834938049,
      "learning_rate": 7.141398100986407e-06,
      "loss": 0.0191,
      "step": 887660
    },
    {
      "epoch": 1.4527077891897906,
      "grad_norm": 1.233359694480896,
      "learning_rate": 7.14133220877289e-06,
      "loss": 0.0285,
      "step": 887680
    },
    {
      "epoch": 1.4527405196284442,
      "grad_norm": 0.2491285800933838,
      "learning_rate": 7.141266316559373e-06,
      "loss": 0.019,
      "step": 887700
    },
    {
      "epoch": 1.4527732500670973,
      "grad_norm": 2.0756990909576416,
      "learning_rate": 7.141200424345856e-06,
      "loss": 0.024,
      "step": 887720
    },
    {
      "epoch": 1.4528059805057507,
      "grad_norm": 1.9110767841339111,
      "learning_rate": 7.141134532132339e-06,
      "loss": 0.0191,
      "step": 887740
    },
    {
      "epoch": 1.452838710944404,
      "grad_norm": 0.11869368702173233,
      "learning_rate": 7.141068639918821e-06,
      "loss": 0.0221,
      "step": 887760
    },
    {
      "epoch": 1.4528714413830575,
      "grad_norm": 0.3032267093658447,
      "learning_rate": 7.141002747705305e-06,
      "loss": 0.025,
      "step": 887780
    },
    {
      "epoch": 1.4529041718217108,
      "grad_norm": 0.360236257314682,
      "learning_rate": 7.1409368554917866e-06,
      "loss": 0.024,
      "step": 887800
    },
    {
      "epoch": 1.452936902260364,
      "grad_norm": 0.19547326862812042,
      "learning_rate": 7.14087096327827e-06,
      "loss": 0.0186,
      "step": 887820
    },
    {
      "epoch": 1.4529696326990174,
      "grad_norm": 1.231165885925293,
      "learning_rate": 7.140805071064753e-06,
      "loss": 0.0305,
      "step": 887840
    },
    {
      "epoch": 1.4530023631376707,
      "grad_norm": 0.8087828159332275,
      "learning_rate": 7.140739178851236e-06,
      "loss": 0.0151,
      "step": 887860
    },
    {
      "epoch": 1.453035093576324,
      "grad_norm": 0.44526684284210205,
      "learning_rate": 7.140673286637718e-06,
      "loss": 0.0209,
      "step": 887880
    },
    {
      "epoch": 1.4530678240149775,
      "grad_norm": 0.16977247595787048,
      "learning_rate": 7.140607394424202e-06,
      "loss": 0.026,
      "step": 887900
    },
    {
      "epoch": 1.4531005544536308,
      "grad_norm": 0.5316461324691772,
      "learning_rate": 7.140541502210684e-06,
      "loss": 0.021,
      "step": 887920
    },
    {
      "epoch": 1.4531332848922842,
      "grad_norm": 1.007287859916687,
      "learning_rate": 7.1404756099971674e-06,
      "loss": 0.0146,
      "step": 887940
    },
    {
      "epoch": 1.4531660153309374,
      "grad_norm": 0.8885915279388428,
      "learning_rate": 7.140409717783649e-06,
      "loss": 0.0206,
      "step": 887960
    },
    {
      "epoch": 1.4531987457695907,
      "grad_norm": 0.49951669573783875,
      "learning_rate": 7.140343825570133e-06,
      "loss": 0.0243,
      "step": 887980
    },
    {
      "epoch": 1.453231476208244,
      "grad_norm": 0.2128945291042328,
      "learning_rate": 7.1402779333566165e-06,
      "loss": 0.0166,
      "step": 888000
    },
    {
      "epoch": 1.4532642066468975,
      "grad_norm": 0.335488498210907,
      "learning_rate": 7.140212041143098e-06,
      "loss": 0.0125,
      "step": 888020
    },
    {
      "epoch": 1.4532969370855509,
      "grad_norm": 0.685710072517395,
      "learning_rate": 7.140146148929582e-06,
      "loss": 0.019,
      "step": 888040
    },
    {
      "epoch": 1.4533296675242042,
      "grad_norm": 0.810400128364563,
      "learning_rate": 7.140080256716065e-06,
      "loss": 0.0158,
      "step": 888060
    },
    {
      "epoch": 1.4533623979628576,
      "grad_norm": 2.44467830657959,
      "learning_rate": 7.1400143645025475e-06,
      "loss": 0.0216,
      "step": 888080
    },
    {
      "epoch": 1.4533951284015107,
      "grad_norm": 0.4613824486732483,
      "learning_rate": 7.13994847228903e-06,
      "loss": 0.0231,
      "step": 888100
    },
    {
      "epoch": 1.4534278588401641,
      "grad_norm": 0.34580856561660767,
      "learning_rate": 7.139882580075514e-06,
      "loss": 0.0217,
      "step": 888120
    },
    {
      "epoch": 1.4534605892788175,
      "grad_norm": 1.3523362874984741,
      "learning_rate": 7.139816687861996e-06,
      "loss": 0.0258,
      "step": 888140
    },
    {
      "epoch": 1.4534933197174709,
      "grad_norm": 0.8729591965675354,
      "learning_rate": 7.139750795648479e-06,
      "loss": 0.0154,
      "step": 888160
    },
    {
      "epoch": 1.4535260501561242,
      "grad_norm": 0.7416183948516846,
      "learning_rate": 7.139684903434961e-06,
      "loss": 0.0185,
      "step": 888180
    },
    {
      "epoch": 1.4535587805947776,
      "grad_norm": 0.1264140009880066,
      "learning_rate": 7.139619011221445e-06,
      "loss": 0.0172,
      "step": 888200
    },
    {
      "epoch": 1.453591511033431,
      "grad_norm": 0.263017863035202,
      "learning_rate": 7.139553119007927e-06,
      "loss": 0.0196,
      "step": 888220
    },
    {
      "epoch": 1.4536242414720841,
      "grad_norm": 0.3331017792224884,
      "learning_rate": 7.13948722679441e-06,
      "loss": 0.018,
      "step": 888240
    },
    {
      "epoch": 1.4536569719107375,
      "grad_norm": 0.4422762095928192,
      "learning_rate": 7.139421334580893e-06,
      "loss": 0.0193,
      "step": 888260
    },
    {
      "epoch": 1.4536897023493909,
      "grad_norm": 1.9121145009994507,
      "learning_rate": 7.139355442367376e-06,
      "loss": 0.0155,
      "step": 888280
    },
    {
      "epoch": 1.4537224327880442,
      "grad_norm": 0.5525431632995605,
      "learning_rate": 7.1392895501538584e-06,
      "loss": 0.0274,
      "step": 888300
    },
    {
      "epoch": 1.4537551632266976,
      "grad_norm": 0.47328993678092957,
      "learning_rate": 7.139223657940342e-06,
      "loss": 0.0215,
      "step": 888320
    },
    {
      "epoch": 1.4537878936653508,
      "grad_norm": 0.31493866443634033,
      "learning_rate": 7.139157765726825e-06,
      "loss": 0.0189,
      "step": 888340
    },
    {
      "epoch": 1.4538206241040044,
      "grad_norm": 0.5677511096000671,
      "learning_rate": 7.1390918735133075e-06,
      "loss": 0.0211,
      "step": 888360
    },
    {
      "epoch": 1.4538533545426575,
      "grad_norm": 1.0473558902740479,
      "learning_rate": 7.139025981299791e-06,
      "loss": 0.0237,
      "step": 888380
    },
    {
      "epoch": 1.4538860849813109,
      "grad_norm": 0.34580060839653015,
      "learning_rate": 7.138960089086273e-06,
      "loss": 0.0248,
      "step": 888400
    },
    {
      "epoch": 1.4539188154199643,
      "grad_norm": 0.5579169392585754,
      "learning_rate": 7.1388941968727566e-06,
      "loss": 0.0212,
      "step": 888420
    },
    {
      "epoch": 1.4539515458586176,
      "grad_norm": 0.1817210167646408,
      "learning_rate": 7.1388283046592385e-06,
      "loss": 0.0167,
      "step": 888440
    },
    {
      "epoch": 1.453984276297271,
      "grad_norm": 0.5313923358917236,
      "learning_rate": 7.138762412445722e-06,
      "loss": 0.0266,
      "step": 888460
    },
    {
      "epoch": 1.4540170067359242,
      "grad_norm": 0.5124735236167908,
      "learning_rate": 7.138696520232205e-06,
      "loss": 0.0158,
      "step": 888480
    },
    {
      "epoch": 1.4540497371745777,
      "grad_norm": 2.3726837635040283,
      "learning_rate": 7.1386306280186875e-06,
      "loss": 0.023,
      "step": 888500
    },
    {
      "epoch": 1.454082467613231,
      "grad_norm": 0.24864104390144348,
      "learning_rate": 7.13856473580517e-06,
      "loss": 0.0243,
      "step": 888520
    },
    {
      "epoch": 1.4541151980518843,
      "grad_norm": 0.7164197564125061,
      "learning_rate": 7.138498843591654e-06,
      "loss": 0.0208,
      "step": 888540
    },
    {
      "epoch": 1.4541479284905376,
      "grad_norm": 0.2337387502193451,
      "learning_rate": 7.138432951378136e-06,
      "loss": 0.0266,
      "step": 888560
    },
    {
      "epoch": 1.454180658929191,
      "grad_norm": 3.3058574199676514,
      "learning_rate": 7.138367059164619e-06,
      "loss": 0.0253,
      "step": 888580
    },
    {
      "epoch": 1.4542133893678444,
      "grad_norm": 0.6739857196807861,
      "learning_rate": 7.138301166951101e-06,
      "loss": 0.0183,
      "step": 888600
    },
    {
      "epoch": 1.4542461198064975,
      "grad_norm": 0.21147866547107697,
      "learning_rate": 7.138235274737585e-06,
      "loss": 0.0237,
      "step": 888620
    },
    {
      "epoch": 1.454278850245151,
      "grad_norm": 1.190039873123169,
      "learning_rate": 7.1381693825240676e-06,
      "loss": 0.0259,
      "step": 888640
    },
    {
      "epoch": 1.4543115806838043,
      "grad_norm": 0.23982319235801697,
      "learning_rate": 7.13810349031055e-06,
      "loss": 0.0233,
      "step": 888660
    },
    {
      "epoch": 1.4543443111224577,
      "grad_norm": 0.2581162750720978,
      "learning_rate": 7.138037598097034e-06,
      "loss": 0.0125,
      "step": 888680
    },
    {
      "epoch": 1.454377041561111,
      "grad_norm": 0.969496488571167,
      "learning_rate": 7.137971705883517e-06,
      "loss": 0.0172,
      "step": 888700
    },
    {
      "epoch": 1.4544097719997644,
      "grad_norm": 0.7858273983001709,
      "learning_rate": 7.137905813669999e-06,
      "loss": 0.0275,
      "step": 888720
    },
    {
      "epoch": 1.4544425024384178,
      "grad_norm": 0.4111267626285553,
      "learning_rate": 7.137839921456482e-06,
      "loss": 0.0219,
      "step": 888740
    },
    {
      "epoch": 1.454475232877071,
      "grad_norm": 0.3695771098136902,
      "learning_rate": 7.137774029242966e-06,
      "loss": 0.0208,
      "step": 888760
    },
    {
      "epoch": 1.4545079633157243,
      "grad_norm": 0.9791085720062256,
      "learning_rate": 7.1377081370294476e-06,
      "loss": 0.0191,
      "step": 888780
    },
    {
      "epoch": 1.4545406937543777,
      "grad_norm": 0.6773006916046143,
      "learning_rate": 7.137642244815931e-06,
      "loss": 0.0268,
      "step": 888800
    },
    {
      "epoch": 1.454573424193031,
      "grad_norm": 0.1705174297094345,
      "learning_rate": 7.137576352602413e-06,
      "loss": 0.0151,
      "step": 888820
    },
    {
      "epoch": 1.4546061546316844,
      "grad_norm": 0.27263206243515015,
      "learning_rate": 7.137510460388897e-06,
      "loss": 0.0183,
      "step": 888840
    },
    {
      "epoch": 1.4546388850703378,
      "grad_norm": 1.0773406028747559,
      "learning_rate": 7.137444568175379e-06,
      "loss": 0.0247,
      "step": 888860
    },
    {
      "epoch": 1.4546716155089912,
      "grad_norm": 0.4122532904148102,
      "learning_rate": 7.137378675961862e-06,
      "loss": 0.0176,
      "step": 888880
    },
    {
      "epoch": 1.4547043459476443,
      "grad_norm": 1.0717856884002686,
      "learning_rate": 7.137312783748345e-06,
      "loss": 0.0205,
      "step": 888900
    },
    {
      "epoch": 1.4547370763862977,
      "grad_norm": 0.6794852018356323,
      "learning_rate": 7.1372468915348284e-06,
      "loss": 0.0306,
      "step": 888920
    },
    {
      "epoch": 1.454769806824951,
      "grad_norm": 0.22248567640781403,
      "learning_rate": 7.13718099932131e-06,
      "loss": 0.0117,
      "step": 888940
    },
    {
      "epoch": 1.4548025372636044,
      "grad_norm": 0.4569638669490814,
      "learning_rate": 7.137115107107794e-06,
      "loss": 0.0229,
      "step": 888960
    },
    {
      "epoch": 1.4548352677022578,
      "grad_norm": 1.094616413116455,
      "learning_rate": 7.137049214894276e-06,
      "loss": 0.027,
      "step": 888980
    },
    {
      "epoch": 1.4548679981409112,
      "grad_norm": 0.3584003150463104,
      "learning_rate": 7.136983322680759e-06,
      "loss": 0.0257,
      "step": 889000
    },
    {
      "epoch": 1.4549007285795645,
      "grad_norm": 0.18488776683807373,
      "learning_rate": 7.136917430467242e-06,
      "loss": 0.0218,
      "step": 889020
    },
    {
      "epoch": 1.4549334590182177,
      "grad_norm": 1.174199104309082,
      "learning_rate": 7.136851538253725e-06,
      "loss": 0.0143,
      "step": 889040
    },
    {
      "epoch": 1.454966189456871,
      "grad_norm": 1.127842664718628,
      "learning_rate": 7.1367856460402085e-06,
      "loss": 0.0313,
      "step": 889060
    },
    {
      "epoch": 1.4549989198955244,
      "grad_norm": 1.583922028541565,
      "learning_rate": 7.136719753826691e-06,
      "loss": 0.0224,
      "step": 889080
    },
    {
      "epoch": 1.4550316503341778,
      "grad_norm": 0.26389238238334656,
      "learning_rate": 7.136653861613174e-06,
      "loss": 0.0175,
      "step": 889100
    },
    {
      "epoch": 1.4550643807728312,
      "grad_norm": 0.48115983605384827,
      "learning_rate": 7.136587969399657e-06,
      "loss": 0.0201,
      "step": 889120
    },
    {
      "epoch": 1.4550971112114843,
      "grad_norm": 0.6658786535263062,
      "learning_rate": 7.13652207718614e-06,
      "loss": 0.0192,
      "step": 889140
    },
    {
      "epoch": 1.455129841650138,
      "grad_norm": 0.5439164042472839,
      "learning_rate": 7.136456184972622e-06,
      "loss": 0.0222,
      "step": 889160
    },
    {
      "epoch": 1.455162572088791,
      "grad_norm": 0.971185564994812,
      "learning_rate": 7.136390292759106e-06,
      "loss": 0.0151,
      "step": 889180
    },
    {
      "epoch": 1.4551953025274444,
      "grad_norm": 0.2841901481151581,
      "learning_rate": 7.136324400545588e-06,
      "loss": 0.0177,
      "step": 889200
    },
    {
      "epoch": 1.4552280329660978,
      "grad_norm": 0.22588729858398438,
      "learning_rate": 7.136258508332071e-06,
      "loss": 0.0225,
      "step": 889220
    },
    {
      "epoch": 1.4552607634047512,
      "grad_norm": 0.761343240737915,
      "learning_rate": 7.136192616118553e-06,
      "loss": 0.0197,
      "step": 889240
    },
    {
      "epoch": 1.4552934938434046,
      "grad_norm": 0.28793758153915405,
      "learning_rate": 7.136126723905037e-06,
      "loss": 0.0271,
      "step": 889260
    },
    {
      "epoch": 1.4553262242820577,
      "grad_norm": 0.5028834342956543,
      "learning_rate": 7.1360608316915195e-06,
      "loss": 0.022,
      "step": 889280
    },
    {
      "epoch": 1.455358954720711,
      "grad_norm": 0.32164984941482544,
      "learning_rate": 7.135994939478002e-06,
      "loss": 0.0205,
      "step": 889300
    },
    {
      "epoch": 1.4553916851593645,
      "grad_norm": 0.31517764925956726,
      "learning_rate": 7.135929047264485e-06,
      "loss": 0.0134,
      "step": 889320
    },
    {
      "epoch": 1.4554244155980178,
      "grad_norm": 0.6374687552452087,
      "learning_rate": 7.1358631550509685e-06,
      "loss": 0.0193,
      "step": 889340
    },
    {
      "epoch": 1.4554571460366712,
      "grad_norm": 0.683040201663971,
      "learning_rate": 7.13579726283745e-06,
      "loss": 0.0271,
      "step": 889360
    },
    {
      "epoch": 1.4554898764753246,
      "grad_norm": 0.9090384840965271,
      "learning_rate": 7.135731370623934e-06,
      "loss": 0.0254,
      "step": 889380
    },
    {
      "epoch": 1.455522606913978,
      "grad_norm": 0.2919272482395172,
      "learning_rate": 7.135665478410418e-06,
      "loss": 0.0287,
      "step": 889400
    },
    {
      "epoch": 1.455555337352631,
      "grad_norm": 1.128257393836975,
      "learning_rate": 7.1355995861968995e-06,
      "loss": 0.0281,
      "step": 889420
    },
    {
      "epoch": 1.4555880677912845,
      "grad_norm": 0.35507047176361084,
      "learning_rate": 7.135533693983383e-06,
      "loss": 0.0131,
      "step": 889440
    },
    {
      "epoch": 1.4556207982299378,
      "grad_norm": 0.9158293008804321,
      "learning_rate": 7.135467801769865e-06,
      "loss": 0.0172,
      "step": 889460
    },
    {
      "epoch": 1.4556535286685912,
      "grad_norm": 1.0666282176971436,
      "learning_rate": 7.1354019095563485e-06,
      "loss": 0.0362,
      "step": 889480
    },
    {
      "epoch": 1.4556862591072446,
      "grad_norm": 0.20542137324810028,
      "learning_rate": 7.135336017342831e-06,
      "loss": 0.0155,
      "step": 889500
    },
    {
      "epoch": 1.455718989545898,
      "grad_norm": 0.3741500973701477,
      "learning_rate": 7.135270125129314e-06,
      "loss": 0.0204,
      "step": 889520
    },
    {
      "epoch": 1.4557517199845513,
      "grad_norm": 1.8354363441467285,
      "learning_rate": 7.135204232915797e-06,
      "loss": 0.029,
      "step": 889540
    },
    {
      "epoch": 1.4557844504232045,
      "grad_norm": 0.3985121250152588,
      "learning_rate": 7.13513834070228e-06,
      "loss": 0.0152,
      "step": 889560
    },
    {
      "epoch": 1.4558171808618579,
      "grad_norm": 0.8875119686126709,
      "learning_rate": 7.135072448488762e-06,
      "loss": 0.0177,
      "step": 889580
    },
    {
      "epoch": 1.4558499113005112,
      "grad_norm": 0.30669867992401123,
      "learning_rate": 7.135006556275246e-06,
      "loss": 0.0232,
      "step": 889600
    },
    {
      "epoch": 1.4558826417391646,
      "grad_norm": 0.7617291808128357,
      "learning_rate": 7.134940664061728e-06,
      "loss": 0.0191,
      "step": 889620
    },
    {
      "epoch": 1.455915372177818,
      "grad_norm": 0.4619706869125366,
      "learning_rate": 7.134874771848211e-06,
      "loss": 0.0255,
      "step": 889640
    },
    {
      "epoch": 1.4559481026164713,
      "grad_norm": 0.5669248104095459,
      "learning_rate": 7.134808879634694e-06,
      "loss": 0.0234,
      "step": 889660
    },
    {
      "epoch": 1.4559808330551247,
      "grad_norm": 0.34647876024246216,
      "learning_rate": 7.134742987421177e-06,
      "loss": 0.021,
      "step": 889680
    },
    {
      "epoch": 1.4560135634937779,
      "grad_norm": 0.30447202920913696,
      "learning_rate": 7.1346770952076595e-06,
      "loss": 0.0184,
      "step": 889700
    },
    {
      "epoch": 1.4560462939324312,
      "grad_norm": 0.13804608583450317,
      "learning_rate": 7.134611202994143e-06,
      "loss": 0.018,
      "step": 889720
    },
    {
      "epoch": 1.4560790243710846,
      "grad_norm": 0.5031226873397827,
      "learning_rate": 7.134545310780626e-06,
      "loss": 0.0225,
      "step": 889740
    },
    {
      "epoch": 1.456111754809738,
      "grad_norm": 0.782559871673584,
      "learning_rate": 7.134479418567109e-06,
      "loss": 0.0235,
      "step": 889760
    },
    {
      "epoch": 1.4561444852483914,
      "grad_norm": 0.5809755921363831,
      "learning_rate": 7.134413526353592e-06,
      "loss": 0.0291,
      "step": 889780
    },
    {
      "epoch": 1.4561772156870445,
      "grad_norm": 0.3272905945777893,
      "learning_rate": 7.134347634140074e-06,
      "loss": 0.0184,
      "step": 889800
    },
    {
      "epoch": 1.456209946125698,
      "grad_norm": 0.6244566440582275,
      "learning_rate": 7.134281741926558e-06,
      "loss": 0.0193,
      "step": 889820
    },
    {
      "epoch": 1.4562426765643512,
      "grad_norm": 0.2767453193664551,
      "learning_rate": 7.1342158497130396e-06,
      "loss": 0.017,
      "step": 889840
    },
    {
      "epoch": 1.4562754070030046,
      "grad_norm": 0.5823600888252258,
      "learning_rate": 7.134149957499523e-06,
      "loss": 0.021,
      "step": 889860
    },
    {
      "epoch": 1.456308137441658,
      "grad_norm": 0.723157525062561,
      "learning_rate": 7.134084065286006e-06,
      "loss": 0.0226,
      "step": 889880
    },
    {
      "epoch": 1.4563408678803114,
      "grad_norm": 1.0641155242919922,
      "learning_rate": 7.134018173072489e-06,
      "loss": 0.0224,
      "step": 889900
    },
    {
      "epoch": 1.4563735983189647,
      "grad_norm": 0.1090618371963501,
      "learning_rate": 7.133952280858971e-06,
      "loss": 0.0166,
      "step": 889920
    },
    {
      "epoch": 1.4564063287576179,
      "grad_norm": 1.0099154710769653,
      "learning_rate": 7.133886388645455e-06,
      "loss": 0.031,
      "step": 889940
    },
    {
      "epoch": 1.4564390591962715,
      "grad_norm": 0.11079367995262146,
      "learning_rate": 7.133820496431937e-06,
      "loss": 0.0149,
      "step": 889960
    },
    {
      "epoch": 1.4564717896349246,
      "grad_norm": 0.20771411061286926,
      "learning_rate": 7.1337546042184204e-06,
      "loss": 0.0179,
      "step": 889980
    },
    {
      "epoch": 1.456504520073578,
      "grad_norm": 0.3843003213405609,
      "learning_rate": 7.133688712004902e-06,
      "loss": 0.016,
      "step": 890000
    },
    {
      "epoch": 1.4565372505122314,
      "grad_norm": 0.8549861907958984,
      "learning_rate": 7.133622819791386e-06,
      "loss": 0.0171,
      "step": 890020
    },
    {
      "epoch": 1.4565699809508847,
      "grad_norm": 0.8147692084312439,
      "learning_rate": 7.133556927577868e-06,
      "loss": 0.0279,
      "step": 890040
    },
    {
      "epoch": 1.4566027113895381,
      "grad_norm": 1.8099302053451538,
      "learning_rate": 7.133491035364351e-06,
      "loss": 0.0274,
      "step": 890060
    },
    {
      "epoch": 1.4566354418281913,
      "grad_norm": 0.24725981056690216,
      "learning_rate": 7.133425143150834e-06,
      "loss": 0.0246,
      "step": 890080
    },
    {
      "epoch": 1.4566681722668446,
      "grad_norm": 0.36632925271987915,
      "learning_rate": 7.133359250937317e-06,
      "loss": 0.0161,
      "step": 890100
    },
    {
      "epoch": 1.456700902705498,
      "grad_norm": 1.2541558742523193,
      "learning_rate": 7.1332933587238005e-06,
      "loss": 0.0148,
      "step": 890120
    },
    {
      "epoch": 1.4567336331441514,
      "grad_norm": 0.6852790713310242,
      "learning_rate": 7.133227466510283e-06,
      "loss": 0.0208,
      "step": 890140
    },
    {
      "epoch": 1.4567663635828048,
      "grad_norm": 1.8072131872177124,
      "learning_rate": 7.133161574296767e-06,
      "loss": 0.0222,
      "step": 890160
    },
    {
      "epoch": 1.4567990940214581,
      "grad_norm": 0.3809316158294678,
      "learning_rate": 7.133095682083249e-06,
      "loss": 0.0158,
      "step": 890180
    },
    {
      "epoch": 1.4568318244601115,
      "grad_norm": 1.2288222312927246,
      "learning_rate": 7.133029789869732e-06,
      "loss": 0.0157,
      "step": 890200
    },
    {
      "epoch": 1.4568645548987647,
      "grad_norm": 0.2620246112346649,
      "learning_rate": 7.132963897656214e-06,
      "loss": 0.0223,
      "step": 890220
    },
    {
      "epoch": 1.456897285337418,
      "grad_norm": 2.0176706314086914,
      "learning_rate": 7.132898005442698e-06,
      "loss": 0.0163,
      "step": 890240
    },
    {
      "epoch": 1.4569300157760714,
      "grad_norm": 2.3578901290893555,
      "learning_rate": 7.13283211322918e-06,
      "loss": 0.0212,
      "step": 890260
    },
    {
      "epoch": 1.4569627462147248,
      "grad_norm": 0.5992563962936401,
      "learning_rate": 7.132766221015663e-06,
      "loss": 0.0165,
      "step": 890280
    },
    {
      "epoch": 1.4569954766533781,
      "grad_norm": 0.5667347311973572,
      "learning_rate": 7.132700328802146e-06,
      "loss": 0.0154,
      "step": 890300
    },
    {
      "epoch": 1.4570282070920315,
      "grad_norm": 0.47421008348464966,
      "learning_rate": 7.132634436588629e-06,
      "loss": 0.0228,
      "step": 890320
    },
    {
      "epoch": 1.4570609375306849,
      "grad_norm": 0.2154276818037033,
      "learning_rate": 7.1325685443751114e-06,
      "loss": 0.0221,
      "step": 890340
    },
    {
      "epoch": 1.457093667969338,
      "grad_norm": 0.3859351873397827,
      "learning_rate": 7.132502652161595e-06,
      "loss": 0.0252,
      "step": 890360
    },
    {
      "epoch": 1.4571263984079914,
      "grad_norm": 0.5842936038970947,
      "learning_rate": 7.132436759948077e-06,
      "loss": 0.018,
      "step": 890380
    },
    {
      "epoch": 1.4571591288466448,
      "grad_norm": 1.3756731748580933,
      "learning_rate": 7.1323708677345605e-06,
      "loss": 0.0222,
      "step": 890400
    },
    {
      "epoch": 1.4571918592852982,
      "grad_norm": 0.21598856151103973,
      "learning_rate": 7.132304975521042e-06,
      "loss": 0.0202,
      "step": 890420
    },
    {
      "epoch": 1.4572245897239515,
      "grad_norm": 1.5993021726608276,
      "learning_rate": 7.132239083307526e-06,
      "loss": 0.019,
      "step": 890440
    },
    {
      "epoch": 1.457257320162605,
      "grad_norm": 0.8566946387290955,
      "learning_rate": 7.1321731910940096e-06,
      "loss": 0.0219,
      "step": 890460
    },
    {
      "epoch": 1.4572900506012583,
      "grad_norm": 0.7693981528282166,
      "learning_rate": 7.1321072988804915e-06,
      "loss": 0.0165,
      "step": 890480
    },
    {
      "epoch": 1.4573227810399114,
      "grad_norm": 0.13258184492588043,
      "learning_rate": 7.132041406666975e-06,
      "loss": 0.0262,
      "step": 890500
    },
    {
      "epoch": 1.4573555114785648,
      "grad_norm": 0.6467567682266235,
      "learning_rate": 7.131975514453458e-06,
      "loss": 0.0196,
      "step": 890520
    },
    {
      "epoch": 1.4573882419172182,
      "grad_norm": 0.17341329157352448,
      "learning_rate": 7.1319096222399405e-06,
      "loss": 0.0209,
      "step": 890540
    },
    {
      "epoch": 1.4574209723558715,
      "grad_norm": 0.25945714116096497,
      "learning_rate": 7.131843730026423e-06,
      "loss": 0.0268,
      "step": 890560
    },
    {
      "epoch": 1.457453702794525,
      "grad_norm": 1.001124382019043,
      "learning_rate": 7.131777837812907e-06,
      "loss": 0.0258,
      "step": 890580
    },
    {
      "epoch": 1.457486433233178,
      "grad_norm": 0.62006014585495,
      "learning_rate": 7.131711945599389e-06,
      "loss": 0.0255,
      "step": 890600
    },
    {
      "epoch": 1.4575191636718317,
      "grad_norm": 1.3386305570602417,
      "learning_rate": 7.131646053385872e-06,
      "loss": 0.0182,
      "step": 890620
    },
    {
      "epoch": 1.4575518941104848,
      "grad_norm": 0.6940574645996094,
      "learning_rate": 7.131580161172354e-06,
      "loss": 0.029,
      "step": 890640
    },
    {
      "epoch": 1.4575846245491382,
      "grad_norm": 0.2292191982269287,
      "learning_rate": 7.131514268958838e-06,
      "loss": 0.0155,
      "step": 890660
    },
    {
      "epoch": 1.4576173549877915,
      "grad_norm": 0.45458823442459106,
      "learning_rate": 7.1314483767453206e-06,
      "loss": 0.0224,
      "step": 890680
    },
    {
      "epoch": 1.457650085426445,
      "grad_norm": 0.642246425151825,
      "learning_rate": 7.131382484531803e-06,
      "loss": 0.0154,
      "step": 890700
    },
    {
      "epoch": 1.4576828158650983,
      "grad_norm": 0.8588954210281372,
      "learning_rate": 7.131316592318286e-06,
      "loss": 0.0183,
      "step": 890720
    },
    {
      "epoch": 1.4577155463037514,
      "grad_norm": 0.4739576280117035,
      "learning_rate": 7.13125070010477e-06,
      "loss": 0.0247,
      "step": 890740
    },
    {
      "epoch": 1.457748276742405,
      "grad_norm": 0.5485996007919312,
      "learning_rate": 7.1311848078912515e-06,
      "loss": 0.0173,
      "step": 890760
    },
    {
      "epoch": 1.4577810071810582,
      "grad_norm": 0.12212858349084854,
      "learning_rate": 7.131118915677735e-06,
      "loss": 0.02,
      "step": 890780
    },
    {
      "epoch": 1.4578137376197116,
      "grad_norm": 0.6612794995307922,
      "learning_rate": 7.131053023464219e-06,
      "loss": 0.036,
      "step": 890800
    },
    {
      "epoch": 1.457846468058365,
      "grad_norm": 0.5941656827926636,
      "learning_rate": 7.130987131250701e-06,
      "loss": 0.0215,
      "step": 890820
    },
    {
      "epoch": 1.4578791984970183,
      "grad_norm": 0.47786715626716614,
      "learning_rate": 7.130921239037184e-06,
      "loss": 0.0209,
      "step": 890840
    },
    {
      "epoch": 1.4579119289356717,
      "grad_norm": 0.6719886064529419,
      "learning_rate": 7.130855346823666e-06,
      "loss": 0.016,
      "step": 890860
    },
    {
      "epoch": 1.4579446593743248,
      "grad_norm": 0.9658603072166443,
      "learning_rate": 7.13078945461015e-06,
      "loss": 0.02,
      "step": 890880
    },
    {
      "epoch": 1.4579773898129782,
      "grad_norm": 0.23025399446487427,
      "learning_rate": 7.130723562396632e-06,
      "loss": 0.0192,
      "step": 890900
    },
    {
      "epoch": 1.4580101202516316,
      "grad_norm": 0.8264925479888916,
      "learning_rate": 7.130657670183115e-06,
      "loss": 0.0159,
      "step": 890920
    },
    {
      "epoch": 1.458042850690285,
      "grad_norm": 0.2921479642391205,
      "learning_rate": 7.130591777969598e-06,
      "loss": 0.0219,
      "step": 890940
    },
    {
      "epoch": 1.4580755811289383,
      "grad_norm": 0.6680141687393188,
      "learning_rate": 7.1305258857560815e-06,
      "loss": 0.0162,
      "step": 890960
    },
    {
      "epoch": 1.4581083115675917,
      "grad_norm": 0.32886987924575806,
      "learning_rate": 7.130459993542563e-06,
      "loss": 0.0214,
      "step": 890980
    },
    {
      "epoch": 1.458141042006245,
      "grad_norm": 0.7488860487937927,
      "learning_rate": 7.130394101329047e-06,
      "loss": 0.0216,
      "step": 891000
    },
    {
      "epoch": 1.4581737724448982,
      "grad_norm": 0.1425742655992508,
      "learning_rate": 7.130328209115529e-06,
      "loss": 0.0226,
      "step": 891020
    },
    {
      "epoch": 1.4582065028835516,
      "grad_norm": 0.4479115605354309,
      "learning_rate": 7.130262316902012e-06,
      "loss": 0.0157,
      "step": 891040
    },
    {
      "epoch": 1.458239233322205,
      "grad_norm": 0.43569594621658325,
      "learning_rate": 7.130196424688494e-06,
      "loss": 0.014,
      "step": 891060
    },
    {
      "epoch": 1.4582719637608583,
      "grad_norm": 0.31891122460365295,
      "learning_rate": 7.130130532474978e-06,
      "loss": 0.0246,
      "step": 891080
    },
    {
      "epoch": 1.4583046941995117,
      "grad_norm": 0.3121820092201233,
      "learning_rate": 7.130064640261461e-06,
      "loss": 0.0269,
      "step": 891100
    },
    {
      "epoch": 1.458337424638165,
      "grad_norm": 0.7235326170921326,
      "learning_rate": 7.129998748047943e-06,
      "loss": 0.0197,
      "step": 891120
    },
    {
      "epoch": 1.4583701550768184,
      "grad_norm": 4.720004558563232,
      "learning_rate": 7.129932855834427e-06,
      "loss": 0.0299,
      "step": 891140
    },
    {
      "epoch": 1.4584028855154716,
      "grad_norm": 1.7001358270645142,
      "learning_rate": 7.12986696362091e-06,
      "loss": 0.0242,
      "step": 891160
    },
    {
      "epoch": 1.458435615954125,
      "grad_norm": 0.1896875500679016,
      "learning_rate": 7.1298010714073924e-06,
      "loss": 0.0169,
      "step": 891180
    },
    {
      "epoch": 1.4584683463927783,
      "grad_norm": 0.7144408226013184,
      "learning_rate": 7.129735179193875e-06,
      "loss": 0.0286,
      "step": 891200
    },
    {
      "epoch": 1.4585010768314317,
      "grad_norm": 0.6955330967903137,
      "learning_rate": 7.129669286980359e-06,
      "loss": 0.0226,
      "step": 891220
    },
    {
      "epoch": 1.458533807270085,
      "grad_norm": 0.9343996047973633,
      "learning_rate": 7.129603394766841e-06,
      "loss": 0.0164,
      "step": 891240
    },
    {
      "epoch": 1.4585665377087385,
      "grad_norm": 0.8492096662521362,
      "learning_rate": 7.129537502553324e-06,
      "loss": 0.0268,
      "step": 891260
    },
    {
      "epoch": 1.4585992681473918,
      "grad_norm": 0.40031102299690247,
      "learning_rate": 7.129471610339806e-06,
      "loss": 0.0215,
      "step": 891280
    },
    {
      "epoch": 1.458631998586045,
      "grad_norm": 0.2617473602294922,
      "learning_rate": 7.12940571812629e-06,
      "loss": 0.0173,
      "step": 891300
    },
    {
      "epoch": 1.4586647290246983,
      "grad_norm": 0.39410400390625,
      "learning_rate": 7.1293398259127725e-06,
      "loss": 0.0265,
      "step": 891320
    },
    {
      "epoch": 1.4586974594633517,
      "grad_norm": 0.6407466530799866,
      "learning_rate": 7.129273933699255e-06,
      "loss": 0.0159,
      "step": 891340
    },
    {
      "epoch": 1.458730189902005,
      "grad_norm": 1.2359858751296997,
      "learning_rate": 7.129208041485738e-06,
      "loss": 0.0235,
      "step": 891360
    },
    {
      "epoch": 1.4587629203406585,
      "grad_norm": 0.21319971978664398,
      "learning_rate": 7.1291421492722215e-06,
      "loss": 0.0206,
      "step": 891380
    },
    {
      "epoch": 1.4587956507793116,
      "grad_norm": 0.5534688234329224,
      "learning_rate": 7.1290762570587034e-06,
      "loss": 0.0158,
      "step": 891400
    },
    {
      "epoch": 1.4588283812179652,
      "grad_norm": 0.5957568287849426,
      "learning_rate": 7.129010364845187e-06,
      "loss": 0.0177,
      "step": 891420
    },
    {
      "epoch": 1.4588611116566184,
      "grad_norm": 0.8837665915489197,
      "learning_rate": 7.128944472631669e-06,
      "loss": 0.0289,
      "step": 891440
    },
    {
      "epoch": 1.4588938420952717,
      "grad_norm": 0.1826643943786621,
      "learning_rate": 7.1288785804181525e-06,
      "loss": 0.0157,
      "step": 891460
    },
    {
      "epoch": 1.458926572533925,
      "grad_norm": 0.502249002456665,
      "learning_rate": 7.128812688204635e-06,
      "loss": 0.0238,
      "step": 891480
    },
    {
      "epoch": 1.4589593029725785,
      "grad_norm": 1.5696887969970703,
      "learning_rate": 7.128746795991118e-06,
      "loss": 0.0177,
      "step": 891500
    },
    {
      "epoch": 1.4589920334112318,
      "grad_norm": 1.128533959388733,
      "learning_rate": 7.1286809037776016e-06,
      "loss": 0.0321,
      "step": 891520
    },
    {
      "epoch": 1.459024763849885,
      "grad_norm": 0.5960806608200073,
      "learning_rate": 7.128615011564084e-06,
      "loss": 0.0195,
      "step": 891540
    },
    {
      "epoch": 1.4590574942885386,
      "grad_norm": 0.4835391640663147,
      "learning_rate": 7.128549119350567e-06,
      "loss": 0.0171,
      "step": 891560
    },
    {
      "epoch": 1.4590902247271917,
      "grad_norm": 0.4519451856613159,
      "learning_rate": 7.12848322713705e-06,
      "loss": 0.0136,
      "step": 891580
    },
    {
      "epoch": 1.4591229551658451,
      "grad_norm": 0.4393360912799835,
      "learning_rate": 7.128417334923533e-06,
      "loss": 0.0205,
      "step": 891600
    },
    {
      "epoch": 1.4591556856044985,
      "grad_norm": 0.4466973543167114,
      "learning_rate": 7.128351442710015e-06,
      "loss": 0.0174,
      "step": 891620
    },
    {
      "epoch": 1.4591884160431519,
      "grad_norm": 1.389450192451477,
      "learning_rate": 7.128285550496499e-06,
      "loss": 0.0245,
      "step": 891640
    },
    {
      "epoch": 1.4592211464818052,
      "grad_norm": 0.34698036313056946,
      "learning_rate": 7.128219658282981e-06,
      "loss": 0.0174,
      "step": 891660
    },
    {
      "epoch": 1.4592538769204584,
      "grad_norm": 0.8284119963645935,
      "learning_rate": 7.128153766069464e-06,
      "loss": 0.0288,
      "step": 891680
    },
    {
      "epoch": 1.4592866073591118,
      "grad_norm": 1.1110553741455078,
      "learning_rate": 7.128087873855947e-06,
      "loss": 0.0223,
      "step": 891700
    },
    {
      "epoch": 1.4593193377977651,
      "grad_norm": 0.8492087125778198,
      "learning_rate": 7.12802198164243e-06,
      "loss": 0.0214,
      "step": 891720
    },
    {
      "epoch": 1.4593520682364185,
      "grad_norm": 1.2856860160827637,
      "learning_rate": 7.1279560894289125e-06,
      "loss": 0.0223,
      "step": 891740
    },
    {
      "epoch": 1.4593847986750719,
      "grad_norm": 1.6139988899230957,
      "learning_rate": 7.127890197215396e-06,
      "loss": 0.0172,
      "step": 891760
    },
    {
      "epoch": 1.4594175291137252,
      "grad_norm": 0.6710138320922852,
      "learning_rate": 7.127824305001878e-06,
      "loss": 0.0216,
      "step": 891780
    },
    {
      "epoch": 1.4594502595523786,
      "grad_norm": 0.1445251852273941,
      "learning_rate": 7.127758412788362e-06,
      "loss": 0.0165,
      "step": 891800
    },
    {
      "epoch": 1.4594829899910318,
      "grad_norm": 0.4590243101119995,
      "learning_rate": 7.1276925205748435e-06,
      "loss": 0.0208,
      "step": 891820
    },
    {
      "epoch": 1.4595157204296851,
      "grad_norm": 0.14757919311523438,
      "learning_rate": 7.127626628361327e-06,
      "loss": 0.0195,
      "step": 891840
    },
    {
      "epoch": 1.4595484508683385,
      "grad_norm": 1.645781397819519,
      "learning_rate": 7.127560736147811e-06,
      "loss": 0.018,
      "step": 891860
    },
    {
      "epoch": 1.4595811813069919,
      "grad_norm": 2.444152355194092,
      "learning_rate": 7.1274948439342926e-06,
      "loss": 0.0213,
      "step": 891880
    },
    {
      "epoch": 1.4596139117456453,
      "grad_norm": 0.5352683663368225,
      "learning_rate": 7.127428951720776e-06,
      "loss": 0.0182,
      "step": 891900
    },
    {
      "epoch": 1.4596466421842986,
      "grad_norm": 0.5960806012153625,
      "learning_rate": 7.127363059507259e-06,
      "loss": 0.0194,
      "step": 891920
    },
    {
      "epoch": 1.459679372622952,
      "grad_norm": 0.4212997853755951,
      "learning_rate": 7.127297167293742e-06,
      "loss": 0.0209,
      "step": 891940
    },
    {
      "epoch": 1.4597121030616051,
      "grad_norm": Infinity,
      "learning_rate": 7.127231275080224e-06,
      "loss": 0.0266,
      "step": 891960
    },
    {
      "epoch": 1.4597448335002585,
      "grad_norm": 0.8836303949356079,
      "learning_rate": 7.127165382866708e-06,
      "loss": 0.0251,
      "step": 891980
    },
    {
      "epoch": 1.459777563938912,
      "grad_norm": 0.6006292104721069,
      "learning_rate": 7.12709949065319e-06,
      "loss": 0.0166,
      "step": 892000
    },
    {
      "epoch": 1.4598102943775653,
      "grad_norm": 0.4551137685775757,
      "learning_rate": 7.1270335984396734e-06,
      "loss": 0.0219,
      "step": 892020
    },
    {
      "epoch": 1.4598430248162186,
      "grad_norm": 1.6393908262252808,
      "learning_rate": 7.126967706226155e-06,
      "loss": 0.0175,
      "step": 892040
    },
    {
      "epoch": 1.4598757552548718,
      "grad_norm": 0.8844932317733765,
      "learning_rate": 7.126901814012639e-06,
      "loss": 0.0174,
      "step": 892060
    },
    {
      "epoch": 1.4599084856935254,
      "grad_norm": 0.5923810601234436,
      "learning_rate": 7.126835921799121e-06,
      "loss": 0.0145,
      "step": 892080
    },
    {
      "epoch": 1.4599412161321785,
      "grad_norm": 1.099142074584961,
      "learning_rate": 7.126770029585604e-06,
      "loss": 0.0228,
      "step": 892100
    },
    {
      "epoch": 1.459973946570832,
      "grad_norm": 0.6309599280357361,
      "learning_rate": 7.126704137372087e-06,
      "loss": 0.0157,
      "step": 892120
    },
    {
      "epoch": 1.4600066770094853,
      "grad_norm": 0.5446958541870117,
      "learning_rate": 7.12663824515857e-06,
      "loss": 0.0191,
      "step": 892140
    },
    {
      "epoch": 1.4600394074481386,
      "grad_norm": 0.4046648442745209,
      "learning_rate": 7.126572352945053e-06,
      "loss": 0.0216,
      "step": 892160
    },
    {
      "epoch": 1.460072137886792,
      "grad_norm": 2.095508575439453,
      "learning_rate": 7.126506460731536e-06,
      "loss": 0.0245,
      "step": 892180
    },
    {
      "epoch": 1.4601048683254452,
      "grad_norm": 0.09313281625509262,
      "learning_rate": 7.126440568518019e-06,
      "loss": 0.0211,
      "step": 892200
    },
    {
      "epoch": 1.4601375987640988,
      "grad_norm": 0.7766242027282715,
      "learning_rate": 7.126374676304502e-06,
      "loss": 0.0208,
      "step": 892220
    },
    {
      "epoch": 1.460170329202752,
      "grad_norm": 0.5117115378379822,
      "learning_rate": 7.126308784090985e-06,
      "loss": 0.0187,
      "step": 892240
    },
    {
      "epoch": 1.4602030596414053,
      "grad_norm": 0.9839735627174377,
      "learning_rate": 7.126242891877467e-06,
      "loss": 0.0173,
      "step": 892260
    },
    {
      "epoch": 1.4602357900800587,
      "grad_norm": 0.21290598809719086,
      "learning_rate": 7.126176999663951e-06,
      "loss": 0.0208,
      "step": 892280
    },
    {
      "epoch": 1.460268520518712,
      "grad_norm": 0.16068297624588013,
      "learning_rate": 7.126111107450433e-06,
      "loss": 0.0279,
      "step": 892300
    },
    {
      "epoch": 1.4603012509573654,
      "grad_norm": 0.23853574693202972,
      "learning_rate": 7.126045215236916e-06,
      "loss": 0.0194,
      "step": 892320
    },
    {
      "epoch": 1.4603339813960186,
      "grad_norm": 0.3836897611618042,
      "learning_rate": 7.125979323023399e-06,
      "loss": 0.0187,
      "step": 892340
    },
    {
      "epoch": 1.460366711834672,
      "grad_norm": 0.6661859154701233,
      "learning_rate": 7.125913430809882e-06,
      "loss": 0.0231,
      "step": 892360
    },
    {
      "epoch": 1.4603994422733253,
      "grad_norm": 0.18794605135917664,
      "learning_rate": 7.1258475385963644e-06,
      "loss": 0.0129,
      "step": 892380
    },
    {
      "epoch": 1.4604321727119787,
      "grad_norm": 0.33910924196243286,
      "learning_rate": 7.125781646382848e-06,
      "loss": 0.0148,
      "step": 892400
    },
    {
      "epoch": 1.460464903150632,
      "grad_norm": 0.6227942109107971,
      "learning_rate": 7.12571575416933e-06,
      "loss": 0.0232,
      "step": 892420
    },
    {
      "epoch": 1.4604976335892854,
      "grad_norm": 0.4726870059967041,
      "learning_rate": 7.1256498619558135e-06,
      "loss": 0.0191,
      "step": 892440
    },
    {
      "epoch": 1.4605303640279388,
      "grad_norm": 0.7819515466690063,
      "learning_rate": 7.125583969742295e-06,
      "loss": 0.0126,
      "step": 892460
    },
    {
      "epoch": 1.460563094466592,
      "grad_norm": 0.7657049298286438,
      "learning_rate": 7.125518077528779e-06,
      "loss": 0.0212,
      "step": 892480
    },
    {
      "epoch": 1.4605958249052453,
      "grad_norm": 0.3563670516014099,
      "learning_rate": 7.125452185315262e-06,
      "loss": 0.0162,
      "step": 892500
    },
    {
      "epoch": 1.4606285553438987,
      "grad_norm": 0.3005342185497284,
      "learning_rate": 7.1253862931017445e-06,
      "loss": 0.0259,
      "step": 892520
    },
    {
      "epoch": 1.460661285782552,
      "grad_norm": 0.315064400434494,
      "learning_rate": 7.125320400888227e-06,
      "loss": 0.0164,
      "step": 892540
    },
    {
      "epoch": 1.4606940162212054,
      "grad_norm": 0.8760573267936707,
      "learning_rate": 7.125254508674711e-06,
      "loss": 0.0235,
      "step": 892560
    },
    {
      "epoch": 1.4607267466598588,
      "grad_norm": 0.35328415036201477,
      "learning_rate": 7.1251886164611935e-06,
      "loss": 0.0158,
      "step": 892580
    },
    {
      "epoch": 1.4607594770985122,
      "grad_norm": 0.43913134932518005,
      "learning_rate": 7.125122724247676e-06,
      "loss": 0.0171,
      "step": 892600
    },
    {
      "epoch": 1.4607922075371653,
      "grad_norm": 0.27471864223480225,
      "learning_rate": 7.12505683203416e-06,
      "loss": 0.0167,
      "step": 892620
    },
    {
      "epoch": 1.4608249379758187,
      "grad_norm": 1.2836788892745972,
      "learning_rate": 7.124990939820642e-06,
      "loss": 0.023,
      "step": 892640
    },
    {
      "epoch": 1.460857668414472,
      "grad_norm": 0.4911557734012604,
      "learning_rate": 7.124925047607125e-06,
      "loss": 0.0226,
      "step": 892660
    },
    {
      "epoch": 1.4608903988531254,
      "grad_norm": 0.5582762956619263,
      "learning_rate": 7.124859155393607e-06,
      "loss": 0.0237,
      "step": 892680
    },
    {
      "epoch": 1.4609231292917788,
      "grad_norm": 0.3255380690097809,
      "learning_rate": 7.124793263180091e-06,
      "loss": 0.0206,
      "step": 892700
    },
    {
      "epoch": 1.4609558597304322,
      "grad_norm": 1.7417908906936646,
      "learning_rate": 7.1247273709665736e-06,
      "loss": 0.0259,
      "step": 892720
    },
    {
      "epoch": 1.4609885901690856,
      "grad_norm": 0.37996768951416016,
      "learning_rate": 7.124661478753056e-06,
      "loss": 0.0221,
      "step": 892740
    },
    {
      "epoch": 1.4610213206077387,
      "grad_norm": 1.041662573814392,
      "learning_rate": 7.124595586539539e-06,
      "loss": 0.0141,
      "step": 892760
    },
    {
      "epoch": 1.461054051046392,
      "grad_norm": 0.47430816292762756,
      "learning_rate": 7.124529694326023e-06,
      "loss": 0.0198,
      "step": 892780
    },
    {
      "epoch": 1.4610867814850454,
      "grad_norm": 1.6543939113616943,
      "learning_rate": 7.1244638021125045e-06,
      "loss": 0.0177,
      "step": 892800
    },
    {
      "epoch": 1.4611195119236988,
      "grad_norm": 2.1250269412994385,
      "learning_rate": 7.124397909898988e-06,
      "loss": 0.0125,
      "step": 892820
    },
    {
      "epoch": 1.4611522423623522,
      "grad_norm": 0.5315504670143127,
      "learning_rate": 7.12433201768547e-06,
      "loss": 0.0133,
      "step": 892840
    },
    {
      "epoch": 1.4611849728010053,
      "grad_norm": 0.4693896174430847,
      "learning_rate": 7.124266125471954e-06,
      "loss": 0.0294,
      "step": 892860
    },
    {
      "epoch": 1.461217703239659,
      "grad_norm": 0.9928162693977356,
      "learning_rate": 7.1242002332584355e-06,
      "loss": 0.0184,
      "step": 892880
    },
    {
      "epoch": 1.461250433678312,
      "grad_norm": 1.0106794834136963,
      "learning_rate": 7.124134341044919e-06,
      "loss": 0.0225,
      "step": 892900
    },
    {
      "epoch": 1.4612831641169655,
      "grad_norm": 2.0605123043060303,
      "learning_rate": 7.124068448831403e-06,
      "loss": 0.0164,
      "step": 892920
    },
    {
      "epoch": 1.4613158945556188,
      "grad_norm": 0.7696596384048462,
      "learning_rate": 7.124002556617885e-06,
      "loss": 0.0335,
      "step": 892940
    },
    {
      "epoch": 1.4613486249942722,
      "grad_norm": 0.4960777461528778,
      "learning_rate": 7.123936664404368e-06,
      "loss": 0.0199,
      "step": 892960
    },
    {
      "epoch": 1.4613813554329256,
      "grad_norm": 0.5023016333580017,
      "learning_rate": 7.123870772190851e-06,
      "loss": 0.0242,
      "step": 892980
    },
    {
      "epoch": 1.4614140858715787,
      "grad_norm": 0.24536415934562683,
      "learning_rate": 7.1238048799773345e-06,
      "loss": 0.0234,
      "step": 893000
    },
    {
      "epoch": 1.4614468163102323,
      "grad_norm": 0.7011593580245972,
      "learning_rate": 7.123738987763816e-06,
      "loss": 0.0258,
      "step": 893020
    },
    {
      "epoch": 1.4614795467488855,
      "grad_norm": 0.37310871481895447,
      "learning_rate": 7.1236730955503e-06,
      "loss": 0.0178,
      "step": 893040
    },
    {
      "epoch": 1.4615122771875388,
      "grad_norm": 2.0671334266662598,
      "learning_rate": 7.123607203336782e-06,
      "loss": 0.0199,
      "step": 893060
    },
    {
      "epoch": 1.4615450076261922,
      "grad_norm": 0.19535768032073975,
      "learning_rate": 7.123541311123265e-06,
      "loss": 0.0214,
      "step": 893080
    },
    {
      "epoch": 1.4615777380648456,
      "grad_norm": 0.29188022017478943,
      "learning_rate": 7.123475418909747e-06,
      "loss": 0.0271,
      "step": 893100
    },
    {
      "epoch": 1.461610468503499,
      "grad_norm": 0.7872561812400818,
      "learning_rate": 7.123409526696231e-06,
      "loss": 0.0267,
      "step": 893120
    },
    {
      "epoch": 1.461643198942152,
      "grad_norm": 0.22500832378864288,
      "learning_rate": 7.123343634482714e-06,
      "loss": 0.0231,
      "step": 893140
    },
    {
      "epoch": 1.4616759293808055,
      "grad_norm": 0.3002174198627472,
      "learning_rate": 7.123277742269196e-06,
      "loss": 0.0168,
      "step": 893160
    },
    {
      "epoch": 1.4617086598194589,
      "grad_norm": 2.111492395401001,
      "learning_rate": 7.123211850055679e-06,
      "loss": 0.0159,
      "step": 893180
    },
    {
      "epoch": 1.4617413902581122,
      "grad_norm": 0.2693983316421509,
      "learning_rate": 7.123145957842163e-06,
      "loss": 0.0168,
      "step": 893200
    },
    {
      "epoch": 1.4617741206967656,
      "grad_norm": 1.2457040548324585,
      "learning_rate": 7.123080065628645e-06,
      "loss": 0.0156,
      "step": 893220
    },
    {
      "epoch": 1.461806851135419,
      "grad_norm": 0.8217061758041382,
      "learning_rate": 7.123014173415128e-06,
      "loss": 0.0157,
      "step": 893240
    },
    {
      "epoch": 1.4618395815740723,
      "grad_norm": 1.3279565572738647,
      "learning_rate": 7.122948281201612e-06,
      "loss": 0.0248,
      "step": 893260
    },
    {
      "epoch": 1.4618723120127255,
      "grad_norm": 0.4720580279827118,
      "learning_rate": 7.122882388988094e-06,
      "loss": 0.0145,
      "step": 893280
    },
    {
      "epoch": 1.4619050424513789,
      "grad_norm": 0.9090523719787598,
      "learning_rate": 7.122816496774577e-06,
      "loss": 0.0173,
      "step": 893300
    },
    {
      "epoch": 1.4619377728900322,
      "grad_norm": 0.424426406621933,
      "learning_rate": 7.122750604561059e-06,
      "loss": 0.0271,
      "step": 893320
    },
    {
      "epoch": 1.4619705033286856,
      "grad_norm": 1.222254991531372,
      "learning_rate": 7.122684712347543e-06,
      "loss": 0.03,
      "step": 893340
    },
    {
      "epoch": 1.462003233767339,
      "grad_norm": 0.676272988319397,
      "learning_rate": 7.1226188201340255e-06,
      "loss": 0.0157,
      "step": 893360
    },
    {
      "epoch": 1.4620359642059924,
      "grad_norm": 0.42062729597091675,
      "learning_rate": 7.122552927920508e-06,
      "loss": 0.0242,
      "step": 893380
    },
    {
      "epoch": 1.4620686946446457,
      "grad_norm": 0.7793245911598206,
      "learning_rate": 7.122487035706991e-06,
      "loss": 0.0139,
      "step": 893400
    },
    {
      "epoch": 1.4621014250832989,
      "grad_norm": 1.6075176000595093,
      "learning_rate": 7.1224211434934745e-06,
      "loss": 0.0176,
      "step": 893420
    },
    {
      "epoch": 1.4621341555219523,
      "grad_norm": 0.19744525849819183,
      "learning_rate": 7.1223552512799564e-06,
      "loss": 0.0232,
      "step": 893440
    },
    {
      "epoch": 1.4621668859606056,
      "grad_norm": 0.7662078142166138,
      "learning_rate": 7.12228935906644e-06,
      "loss": 0.013,
      "step": 893460
    },
    {
      "epoch": 1.462199616399259,
      "grad_norm": 0.4757627248764038,
      "learning_rate": 7.122223466852922e-06,
      "loss": 0.0215,
      "step": 893480
    },
    {
      "epoch": 1.4622323468379124,
      "grad_norm": 0.2978028953075409,
      "learning_rate": 7.1221575746394055e-06,
      "loss": 0.0125,
      "step": 893500
    },
    {
      "epoch": 1.4622650772765657,
      "grad_norm": 1.2622560262680054,
      "learning_rate": 7.122091682425888e-06,
      "loss": 0.0197,
      "step": 893520
    },
    {
      "epoch": 1.4622978077152191,
      "grad_norm": 0.4958481788635254,
      "learning_rate": 7.122025790212371e-06,
      "loss": 0.0209,
      "step": 893540
    },
    {
      "epoch": 1.4623305381538723,
      "grad_norm": 1.2388664484024048,
      "learning_rate": 7.121959897998854e-06,
      "loss": 0.0178,
      "step": 893560
    },
    {
      "epoch": 1.4623632685925256,
      "grad_norm": 0.435940146446228,
      "learning_rate": 7.121894005785337e-06,
      "loss": 0.0269,
      "step": 893580
    },
    {
      "epoch": 1.462395999031179,
      "grad_norm": 0.12547869980335236,
      "learning_rate": 7.121828113571819e-06,
      "loss": 0.0274,
      "step": 893600
    },
    {
      "epoch": 1.4624287294698324,
      "grad_norm": 0.8124338388442993,
      "learning_rate": 7.121762221358303e-06,
      "loss": 0.0213,
      "step": 893620
    },
    {
      "epoch": 1.4624614599084858,
      "grad_norm": 0.4042987823486328,
      "learning_rate": 7.121696329144786e-06,
      "loss": 0.0326,
      "step": 893640
    },
    {
      "epoch": 1.462494190347139,
      "grad_norm": 1.1492407321929932,
      "learning_rate": 7.121630436931268e-06,
      "loss": 0.0236,
      "step": 893660
    },
    {
      "epoch": 1.4625269207857925,
      "grad_norm": 0.6248703598976135,
      "learning_rate": 7.121564544717752e-06,
      "loss": 0.0165,
      "step": 893680
    },
    {
      "epoch": 1.4625596512244456,
      "grad_norm": 0.4194908142089844,
      "learning_rate": 7.121498652504234e-06,
      "loss": 0.0165,
      "step": 893700
    },
    {
      "epoch": 1.462592381663099,
      "grad_norm": 0.39699843525886536,
      "learning_rate": 7.121432760290717e-06,
      "loss": 0.0187,
      "step": 893720
    },
    {
      "epoch": 1.4626251121017524,
      "grad_norm": 0.2456440031528473,
      "learning_rate": 7.1213668680772e-06,
      "loss": 0.0247,
      "step": 893740
    },
    {
      "epoch": 1.4626578425404058,
      "grad_norm": 1.1649479866027832,
      "learning_rate": 7.121300975863683e-06,
      "loss": 0.0189,
      "step": 893760
    },
    {
      "epoch": 1.4626905729790591,
      "grad_norm": 0.2833098769187927,
      "learning_rate": 7.1212350836501655e-06,
      "loss": 0.0141,
      "step": 893780
    },
    {
      "epoch": 1.4627233034177123,
      "grad_norm": 0.13605068624019623,
      "learning_rate": 7.121169191436649e-06,
      "loss": 0.0131,
      "step": 893800
    },
    {
      "epoch": 1.4627560338563659,
      "grad_norm": 0.21114249527454376,
      "learning_rate": 7.121103299223131e-06,
      "loss": 0.0179,
      "step": 893820
    },
    {
      "epoch": 1.462788764295019,
      "grad_norm": 0.31554222106933594,
      "learning_rate": 7.121037407009615e-06,
      "loss": 0.0222,
      "step": 893840
    },
    {
      "epoch": 1.4628214947336724,
      "grad_norm": 0.13480105996131897,
      "learning_rate": 7.1209715147960965e-06,
      "loss": 0.0301,
      "step": 893860
    },
    {
      "epoch": 1.4628542251723258,
      "grad_norm": 0.6317407488822937,
      "learning_rate": 7.12090562258258e-06,
      "loss": 0.0185,
      "step": 893880
    },
    {
      "epoch": 1.4628869556109791,
      "grad_norm": 0.6419461965560913,
      "learning_rate": 7.120839730369062e-06,
      "loss": 0.023,
      "step": 893900
    },
    {
      "epoch": 1.4629196860496325,
      "grad_norm": 0.271628737449646,
      "learning_rate": 7.1207738381555456e-06,
      "loss": 0.0152,
      "step": 893920
    },
    {
      "epoch": 1.4629524164882857,
      "grad_norm": 0.5243934988975525,
      "learning_rate": 7.120707945942028e-06,
      "loss": 0.0181,
      "step": 893940
    },
    {
      "epoch": 1.462985146926939,
      "grad_norm": 0.21412771940231323,
      "learning_rate": 7.120642053728511e-06,
      "loss": 0.0166,
      "step": 893960
    },
    {
      "epoch": 1.4630178773655924,
      "grad_norm": 0.8644790053367615,
      "learning_rate": 7.120576161514995e-06,
      "loss": 0.0379,
      "step": 893980
    },
    {
      "epoch": 1.4630506078042458,
      "grad_norm": 0.5498346090316772,
      "learning_rate": 7.120510269301477e-06,
      "loss": 0.0164,
      "step": 894000
    },
    {
      "epoch": 1.4630833382428992,
      "grad_norm": 0.48898008465766907,
      "learning_rate": 7.120444377087961e-06,
      "loss": 0.019,
      "step": 894020
    },
    {
      "epoch": 1.4631160686815525,
      "grad_norm": 0.34027203917503357,
      "learning_rate": 7.120378484874443e-06,
      "loss": 0.0144,
      "step": 894040
    },
    {
      "epoch": 1.463148799120206,
      "grad_norm": 1.4327876567840576,
      "learning_rate": 7.1203125926609264e-06,
      "loss": 0.0225,
      "step": 894060
    },
    {
      "epoch": 1.463181529558859,
      "grad_norm": 1.0574219226837158,
      "learning_rate": 7.120246700447408e-06,
      "loss": 0.0181,
      "step": 894080
    },
    {
      "epoch": 1.4632142599975124,
      "grad_norm": 1.310634970664978,
      "learning_rate": 7.120180808233892e-06,
      "loss": 0.0191,
      "step": 894100
    },
    {
      "epoch": 1.4632469904361658,
      "grad_norm": 0.7726473212242126,
      "learning_rate": 7.120114916020374e-06,
      "loss": 0.0217,
      "step": 894120
    },
    {
      "epoch": 1.4632797208748192,
      "grad_norm": 0.2515628933906555,
      "learning_rate": 7.120049023806857e-06,
      "loss": 0.0182,
      "step": 894140
    },
    {
      "epoch": 1.4633124513134725,
      "grad_norm": 1.1470248699188232,
      "learning_rate": 7.11998313159334e-06,
      "loss": 0.0209,
      "step": 894160
    },
    {
      "epoch": 1.463345181752126,
      "grad_norm": 0.8912403583526611,
      "learning_rate": 7.119917239379823e-06,
      "loss": 0.0199,
      "step": 894180
    },
    {
      "epoch": 1.4633779121907793,
      "grad_norm": 0.44525301456451416,
      "learning_rate": 7.119851347166306e-06,
      "loss": 0.0217,
      "step": 894200
    },
    {
      "epoch": 1.4634106426294324,
      "grad_norm": 0.32194846868515015,
      "learning_rate": 7.119785454952789e-06,
      "loss": 0.0347,
      "step": 894220
    },
    {
      "epoch": 1.4634433730680858,
      "grad_norm": 0.6781493425369263,
      "learning_rate": 7.119719562739271e-06,
      "loss": 0.0167,
      "step": 894240
    },
    {
      "epoch": 1.4634761035067392,
      "grad_norm": 2.230280876159668,
      "learning_rate": 7.119653670525755e-06,
      "loss": 0.0239,
      "step": 894260
    },
    {
      "epoch": 1.4635088339453926,
      "grad_norm": 0.12166998535394669,
      "learning_rate": 7.119587778312237e-06,
      "loss": 0.0169,
      "step": 894280
    },
    {
      "epoch": 1.463541564384046,
      "grad_norm": 0.181550532579422,
      "learning_rate": 7.11952188609872e-06,
      "loss": 0.0182,
      "step": 894300
    },
    {
      "epoch": 1.4635742948226993,
      "grad_norm": 0.8050313591957092,
      "learning_rate": 7.119455993885204e-06,
      "loss": 0.015,
      "step": 894320
    },
    {
      "epoch": 1.4636070252613527,
      "grad_norm": 0.703965961933136,
      "learning_rate": 7.119390101671686e-06,
      "loss": 0.0191,
      "step": 894340
    },
    {
      "epoch": 1.4636397557000058,
      "grad_norm": 0.6868040561676025,
      "learning_rate": 7.119324209458169e-06,
      "loss": 0.0233,
      "step": 894360
    },
    {
      "epoch": 1.4636724861386592,
      "grad_norm": 0.40360766649246216,
      "learning_rate": 7.119258317244652e-06,
      "loss": 0.0239,
      "step": 894380
    },
    {
      "epoch": 1.4637052165773126,
      "grad_norm": 0.5352358222007751,
      "learning_rate": 7.119192425031135e-06,
      "loss": 0.0159,
      "step": 894400
    },
    {
      "epoch": 1.463737947015966,
      "grad_norm": 0.1859295666217804,
      "learning_rate": 7.1191265328176175e-06,
      "loss": 0.0169,
      "step": 894420
    },
    {
      "epoch": 1.4637706774546193,
      "grad_norm": 0.23551981151103973,
      "learning_rate": 7.119060640604101e-06,
      "loss": 0.0222,
      "step": 894440
    },
    {
      "epoch": 1.4638034078932725,
      "grad_norm": 0.32515522837638855,
      "learning_rate": 7.118994748390583e-06,
      "loss": 0.0195,
      "step": 894460
    },
    {
      "epoch": 1.463836138331926,
      "grad_norm": 0.26548466086387634,
      "learning_rate": 7.1189288561770665e-06,
      "loss": 0.0197,
      "step": 894480
    },
    {
      "epoch": 1.4638688687705792,
      "grad_norm": 0.46704792976379395,
      "learning_rate": 7.118862963963548e-06,
      "loss": 0.0171,
      "step": 894500
    },
    {
      "epoch": 1.4639015992092326,
      "grad_norm": 1.0441700220108032,
      "learning_rate": 7.118797071750032e-06,
      "loss": 0.0144,
      "step": 894520
    },
    {
      "epoch": 1.463934329647886,
      "grad_norm": 0.9852648973464966,
      "learning_rate": 7.118731179536515e-06,
      "loss": 0.0154,
      "step": 894540
    },
    {
      "epoch": 1.4639670600865393,
      "grad_norm": 0.38679513335227966,
      "learning_rate": 7.1186652873229975e-06,
      "loss": 0.0218,
      "step": 894560
    },
    {
      "epoch": 1.4639997905251927,
      "grad_norm": 0.4089781939983368,
      "learning_rate": 7.11859939510948e-06,
      "loss": 0.021,
      "step": 894580
    },
    {
      "epoch": 1.4640325209638458,
      "grad_norm": 0.7167434692382812,
      "learning_rate": 7.118533502895964e-06,
      "loss": 0.0224,
      "step": 894600
    },
    {
      "epoch": 1.4640652514024994,
      "grad_norm": 0.22604455053806305,
      "learning_rate": 7.118467610682446e-06,
      "loss": 0.0251,
      "step": 894620
    },
    {
      "epoch": 1.4640979818411526,
      "grad_norm": 0.1816735565662384,
      "learning_rate": 7.118401718468929e-06,
      "loss": 0.0127,
      "step": 894640
    },
    {
      "epoch": 1.464130712279806,
      "grad_norm": 1.3176668882369995,
      "learning_rate": 7.118335826255413e-06,
      "loss": 0.0209,
      "step": 894660
    },
    {
      "epoch": 1.4641634427184593,
      "grad_norm": 1.2315503358840942,
      "learning_rate": 7.118269934041895e-06,
      "loss": 0.0207,
      "step": 894680
    },
    {
      "epoch": 1.4641961731571127,
      "grad_norm": 1.0766655206680298,
      "learning_rate": 7.118204041828378e-06,
      "loss": 0.0248,
      "step": 894700
    },
    {
      "epoch": 1.464228903595766,
      "grad_norm": 0.956805944442749,
      "learning_rate": 7.11813814961486e-06,
      "loss": 0.0167,
      "step": 894720
    },
    {
      "epoch": 1.4642616340344192,
      "grad_norm": 1.078770399093628,
      "learning_rate": 7.118072257401344e-06,
      "loss": 0.0312,
      "step": 894740
    },
    {
      "epoch": 1.4642943644730726,
      "grad_norm": 0.1752282679080963,
      "learning_rate": 7.1180063651878266e-06,
      "loss": 0.025,
      "step": 894760
    },
    {
      "epoch": 1.464327094911726,
      "grad_norm": 0.8617876768112183,
      "learning_rate": 7.117940472974309e-06,
      "loss": 0.019,
      "step": 894780
    },
    {
      "epoch": 1.4643598253503793,
      "grad_norm": 1.036064624786377,
      "learning_rate": 7.117874580760792e-06,
      "loss": 0.0197,
      "step": 894800
    },
    {
      "epoch": 1.4643925557890327,
      "grad_norm": 0.3234732151031494,
      "learning_rate": 7.117808688547276e-06,
      "loss": 0.0168,
      "step": 894820
    },
    {
      "epoch": 1.464425286227686,
      "grad_norm": 0.639536440372467,
      "learning_rate": 7.1177427963337575e-06,
      "loss": 0.0198,
      "step": 894840
    },
    {
      "epoch": 1.4644580166663395,
      "grad_norm": 0.1889742761850357,
      "learning_rate": 7.117676904120241e-06,
      "loss": 0.0156,
      "step": 894860
    },
    {
      "epoch": 1.4644907471049926,
      "grad_norm": 0.3033197224140167,
      "learning_rate": 7.117611011906723e-06,
      "loss": 0.0259,
      "step": 894880
    },
    {
      "epoch": 1.464523477543646,
      "grad_norm": 0.6980295181274414,
      "learning_rate": 7.117545119693207e-06,
      "loss": 0.0198,
      "step": 894900
    },
    {
      "epoch": 1.4645562079822994,
      "grad_norm": 0.21304984390735626,
      "learning_rate": 7.1174792274796885e-06,
      "loss": 0.0263,
      "step": 894920
    },
    {
      "epoch": 1.4645889384209527,
      "grad_norm": 0.21384640038013458,
      "learning_rate": 7.117413335266172e-06,
      "loss": 0.0132,
      "step": 894940
    },
    {
      "epoch": 1.464621668859606,
      "grad_norm": 1.301357626914978,
      "learning_rate": 7.117347443052655e-06,
      "loss": 0.0207,
      "step": 894960
    },
    {
      "epoch": 1.4646543992982595,
      "grad_norm": 0.4537527859210968,
      "learning_rate": 7.1172815508391376e-06,
      "loss": 0.0212,
      "step": 894980
    },
    {
      "epoch": 1.4646871297369128,
      "grad_norm": 0.5148541331291199,
      "learning_rate": 7.11721565862562e-06,
      "loss": 0.0215,
      "step": 895000
    },
    {
      "epoch": 1.464719860175566,
      "grad_norm": 0.5945114493370056,
      "learning_rate": 7.117149766412104e-06,
      "loss": 0.0283,
      "step": 895020
    },
    {
      "epoch": 1.4647525906142194,
      "grad_norm": 0.2078140527009964,
      "learning_rate": 7.117083874198587e-06,
      "loss": 0.015,
      "step": 895040
    },
    {
      "epoch": 1.4647853210528727,
      "grad_norm": 0.42159560322761536,
      "learning_rate": 7.117017981985069e-06,
      "loss": 0.0175,
      "step": 895060
    },
    {
      "epoch": 1.464818051491526,
      "grad_norm": 0.1806112676858902,
      "learning_rate": 7.116952089771553e-06,
      "loss": 0.02,
      "step": 895080
    },
    {
      "epoch": 1.4648507819301795,
      "grad_norm": 0.9579911828041077,
      "learning_rate": 7.116886197558035e-06,
      "loss": 0.0224,
      "step": 895100
    },
    {
      "epoch": 1.4648835123688326,
      "grad_norm": 0.29685619473457336,
      "learning_rate": 7.1168203053445184e-06,
      "loss": 0.0115,
      "step": 895120
    },
    {
      "epoch": 1.4649162428074862,
      "grad_norm": 0.24906538426876068,
      "learning_rate": 7.116754413131e-06,
      "loss": 0.0161,
      "step": 895140
    },
    {
      "epoch": 1.4649489732461394,
      "grad_norm": 1.0402112007141113,
      "learning_rate": 7.116688520917484e-06,
      "loss": 0.0119,
      "step": 895160
    },
    {
      "epoch": 1.4649817036847927,
      "grad_norm": 0.8959155678749084,
      "learning_rate": 7.116622628703967e-06,
      "loss": 0.0217,
      "step": 895180
    },
    {
      "epoch": 1.4650144341234461,
      "grad_norm": 0.638704240322113,
      "learning_rate": 7.116556736490449e-06,
      "loss": 0.0336,
      "step": 895200
    },
    {
      "epoch": 1.4650471645620995,
      "grad_norm": 0.9642757773399353,
      "learning_rate": 7.116490844276932e-06,
      "loss": 0.0265,
      "step": 895220
    },
    {
      "epoch": 1.4650798950007529,
      "grad_norm": 0.2951614260673523,
      "learning_rate": 7.116424952063416e-06,
      "loss": 0.0256,
      "step": 895240
    },
    {
      "epoch": 1.465112625439406,
      "grad_norm": 0.27048492431640625,
      "learning_rate": 7.116359059849898e-06,
      "loss": 0.0314,
      "step": 895260
    },
    {
      "epoch": 1.4651453558780596,
      "grad_norm": 0.39569002389907837,
      "learning_rate": 7.116293167636381e-06,
      "loss": 0.0202,
      "step": 895280
    },
    {
      "epoch": 1.4651780863167128,
      "grad_norm": 0.4021647274494171,
      "learning_rate": 7.116227275422863e-06,
      "loss": 0.0168,
      "step": 895300
    },
    {
      "epoch": 1.4652108167553661,
      "grad_norm": 1.8647236824035645,
      "learning_rate": 7.116161383209347e-06,
      "loss": 0.0261,
      "step": 895320
    },
    {
      "epoch": 1.4652435471940195,
      "grad_norm": 1.0978227853775024,
      "learning_rate": 7.116095490995829e-06,
      "loss": 0.0222,
      "step": 895340
    },
    {
      "epoch": 1.4652762776326729,
      "grad_norm": 0.4181525707244873,
      "learning_rate": 7.116029598782312e-06,
      "loss": 0.0205,
      "step": 895360
    },
    {
      "epoch": 1.4653090080713262,
      "grad_norm": 0.7821548581123352,
      "learning_rate": 7.115963706568796e-06,
      "loss": 0.019,
      "step": 895380
    },
    {
      "epoch": 1.4653417385099794,
      "grad_norm": 0.294055312871933,
      "learning_rate": 7.1158978143552785e-06,
      "loss": 0.0182,
      "step": 895400
    },
    {
      "epoch": 1.4653744689486328,
      "grad_norm": 1.5710065364837646,
      "learning_rate": 7.115831922141761e-06,
      "loss": 0.0221,
      "step": 895420
    },
    {
      "epoch": 1.4654071993872861,
      "grad_norm": 0.38155943155288696,
      "learning_rate": 7.115766029928244e-06,
      "loss": 0.0261,
      "step": 895440
    },
    {
      "epoch": 1.4654399298259395,
      "grad_norm": 0.7563351988792419,
      "learning_rate": 7.1157001377147275e-06,
      "loss": 0.0125,
      "step": 895460
    },
    {
      "epoch": 1.4654726602645929,
      "grad_norm": 0.5965396165847778,
      "learning_rate": 7.1156342455012094e-06,
      "loss": 0.0164,
      "step": 895480
    },
    {
      "epoch": 1.4655053907032463,
      "grad_norm": 0.19596393406391144,
      "learning_rate": 7.115568353287693e-06,
      "loss": 0.0218,
      "step": 895500
    },
    {
      "epoch": 1.4655381211418996,
      "grad_norm": 0.09901920706033707,
      "learning_rate": 7.115502461074175e-06,
      "loss": 0.0176,
      "step": 895520
    },
    {
      "epoch": 1.4655708515805528,
      "grad_norm": 0.1710790991783142,
      "learning_rate": 7.1154365688606585e-06,
      "loss": 0.0142,
      "step": 895540
    },
    {
      "epoch": 1.4656035820192062,
      "grad_norm": 0.8884845972061157,
      "learning_rate": 7.115370676647141e-06,
      "loss": 0.0239,
      "step": 895560
    },
    {
      "epoch": 1.4656363124578595,
      "grad_norm": 0.7147271037101746,
      "learning_rate": 7.115304784433624e-06,
      "loss": 0.0192,
      "step": 895580
    },
    {
      "epoch": 1.465669042896513,
      "grad_norm": 0.25922632217407227,
      "learning_rate": 7.115238892220107e-06,
      "loss": 0.0183,
      "step": 895600
    },
    {
      "epoch": 1.4657017733351663,
      "grad_norm": 1.028501033782959,
      "learning_rate": 7.11517300000659e-06,
      "loss": 0.0158,
      "step": 895620
    },
    {
      "epoch": 1.4657345037738196,
      "grad_norm": 0.5173688530921936,
      "learning_rate": 7.115107107793072e-06,
      "loss": 0.0283,
      "step": 895640
    },
    {
      "epoch": 1.465767234212473,
      "grad_norm": 0.19890998303890228,
      "learning_rate": 7.115041215579556e-06,
      "loss": 0.0251,
      "step": 895660
    },
    {
      "epoch": 1.4657999646511262,
      "grad_norm": 0.23573793470859528,
      "learning_rate": 7.114975323366038e-06,
      "loss": 0.0221,
      "step": 895680
    },
    {
      "epoch": 1.4658326950897795,
      "grad_norm": 0.44353798031806946,
      "learning_rate": 7.114909431152521e-06,
      "loss": 0.0315,
      "step": 895700
    },
    {
      "epoch": 1.465865425528433,
      "grad_norm": 0.5580775141716003,
      "learning_rate": 7.114843538939005e-06,
      "loss": 0.0236,
      "step": 895720
    },
    {
      "epoch": 1.4658981559670863,
      "grad_norm": 0.42438608407974243,
      "learning_rate": 7.114777646725487e-06,
      "loss": 0.0232,
      "step": 895740
    },
    {
      "epoch": 1.4659308864057397,
      "grad_norm": 0.20127573609352112,
      "learning_rate": 7.11471175451197e-06,
      "loss": 0.019,
      "step": 895760
    },
    {
      "epoch": 1.465963616844393,
      "grad_norm": 0.26111000776290894,
      "learning_rate": 7.114645862298453e-06,
      "loss": 0.0162,
      "step": 895780
    },
    {
      "epoch": 1.4659963472830464,
      "grad_norm": 0.19312681257724762,
      "learning_rate": 7.114579970084936e-06,
      "loss": 0.0178,
      "step": 895800
    },
    {
      "epoch": 1.4660290777216995,
      "grad_norm": 0.9847537875175476,
      "learning_rate": 7.1145140778714186e-06,
      "loss": 0.024,
      "step": 895820
    },
    {
      "epoch": 1.466061808160353,
      "grad_norm": 0.09314855933189392,
      "learning_rate": 7.114448185657902e-06,
      "loss": 0.0156,
      "step": 895840
    },
    {
      "epoch": 1.4660945385990063,
      "grad_norm": 0.16580261290073395,
      "learning_rate": 7.114382293444384e-06,
      "loss": 0.026,
      "step": 895860
    },
    {
      "epoch": 1.4661272690376597,
      "grad_norm": 1.2856063842773438,
      "learning_rate": 7.114316401230868e-06,
      "loss": 0.0266,
      "step": 895880
    },
    {
      "epoch": 1.466159999476313,
      "grad_norm": 0.30339697003364563,
      "learning_rate": 7.1142505090173495e-06,
      "loss": 0.0138,
      "step": 895900
    },
    {
      "epoch": 1.4661927299149662,
      "grad_norm": 0.9416952729225159,
      "learning_rate": 7.114184616803833e-06,
      "loss": 0.0287,
      "step": 895920
    },
    {
      "epoch": 1.4662254603536198,
      "grad_norm": 0.2975940704345703,
      "learning_rate": 7.114118724590315e-06,
      "loss": 0.0282,
      "step": 895940
    },
    {
      "epoch": 1.466258190792273,
      "grad_norm": 0.47502872347831726,
      "learning_rate": 7.114052832376799e-06,
      "loss": 0.0191,
      "step": 895960
    },
    {
      "epoch": 1.4662909212309263,
      "grad_norm": 0.5237118005752563,
      "learning_rate": 7.113986940163281e-06,
      "loss": 0.0166,
      "step": 895980
    },
    {
      "epoch": 1.4663236516695797,
      "grad_norm": 0.8425325751304626,
      "learning_rate": 7.113921047949764e-06,
      "loss": 0.0249,
      "step": 896000
    },
    {
      "epoch": 1.466356382108233,
      "grad_norm": 0.4722290337085724,
      "learning_rate": 7.113855155736247e-06,
      "loss": 0.0276,
      "step": 896020
    },
    {
      "epoch": 1.4663891125468864,
      "grad_norm": 0.5261971354484558,
      "learning_rate": 7.11378926352273e-06,
      "loss": 0.0174,
      "step": 896040
    },
    {
      "epoch": 1.4664218429855396,
      "grad_norm": 0.2953225374221802,
      "learning_rate": 7.113723371309212e-06,
      "loss": 0.0234,
      "step": 896060
    },
    {
      "epoch": 1.4664545734241932,
      "grad_norm": 0.24819216132164001,
      "learning_rate": 7.113657479095696e-06,
      "loss": 0.0159,
      "step": 896080
    },
    {
      "epoch": 1.4664873038628463,
      "grad_norm": 0.3756240904331207,
      "learning_rate": 7.1135915868821795e-06,
      "loss": 0.0121,
      "step": 896100
    },
    {
      "epoch": 1.4665200343014997,
      "grad_norm": 1.2936909198760986,
      "learning_rate": 7.113525694668661e-06,
      "loss": 0.0287,
      "step": 896120
    },
    {
      "epoch": 1.466552764740153,
      "grad_norm": 0.9880216717720032,
      "learning_rate": 7.113459802455145e-06,
      "loss": 0.0193,
      "step": 896140
    },
    {
      "epoch": 1.4665854951788064,
      "grad_norm": 0.16379216313362122,
      "learning_rate": 7.113393910241627e-06,
      "loss": 0.0143,
      "step": 896160
    },
    {
      "epoch": 1.4666182256174598,
      "grad_norm": 1.4270110130310059,
      "learning_rate": 7.11332801802811e-06,
      "loss": 0.0198,
      "step": 896180
    },
    {
      "epoch": 1.466650956056113,
      "grad_norm": 0.2495761513710022,
      "learning_rate": 7.113262125814593e-06,
      "loss": 0.0232,
      "step": 896200
    },
    {
      "epoch": 1.4666836864947663,
      "grad_norm": 2.0796687602996826,
      "learning_rate": 7.113196233601076e-06,
      "loss": 0.0165,
      "step": 896220
    },
    {
      "epoch": 1.4667164169334197,
      "grad_norm": 0.22428776323795319,
      "learning_rate": 7.113130341387559e-06,
      "loss": 0.0283,
      "step": 896240
    },
    {
      "epoch": 1.466749147372073,
      "grad_norm": 0.14758028090000153,
      "learning_rate": 7.113064449174042e-06,
      "loss": 0.0218,
      "step": 896260
    },
    {
      "epoch": 1.4667818778107264,
      "grad_norm": 0.5311555862426758,
      "learning_rate": 7.112998556960524e-06,
      "loss": 0.0321,
      "step": 896280
    },
    {
      "epoch": 1.4668146082493798,
      "grad_norm": 0.29025569558143616,
      "learning_rate": 7.112932664747008e-06,
      "loss": 0.0238,
      "step": 896300
    },
    {
      "epoch": 1.4668473386880332,
      "grad_norm": 0.21346330642700195,
      "learning_rate": 7.11286677253349e-06,
      "loss": 0.0207,
      "step": 896320
    },
    {
      "epoch": 1.4668800691266863,
      "grad_norm": 0.39542701840400696,
      "learning_rate": 7.112800880319973e-06,
      "loss": 0.0229,
      "step": 896340
    },
    {
      "epoch": 1.4669127995653397,
      "grad_norm": 0.44755059480667114,
      "learning_rate": 7.112734988106456e-06,
      "loss": 0.014,
      "step": 896360
    },
    {
      "epoch": 1.466945530003993,
      "grad_norm": 0.8114076852798462,
      "learning_rate": 7.112669095892939e-06,
      "loss": 0.0187,
      "step": 896380
    },
    {
      "epoch": 1.4669782604426465,
      "grad_norm": 6.083775520324707,
      "learning_rate": 7.112603203679421e-06,
      "loss": 0.0213,
      "step": 896400
    },
    {
      "epoch": 1.4670109908812998,
      "grad_norm": 0.7028898000717163,
      "learning_rate": 7.112537311465905e-06,
      "loss": 0.0199,
      "step": 896420
    },
    {
      "epoch": 1.4670437213199532,
      "grad_norm": 0.5125542283058167,
      "learning_rate": 7.112471419252388e-06,
      "loss": 0.0179,
      "step": 896440
    },
    {
      "epoch": 1.4670764517586066,
      "grad_norm": 0.23350892961025238,
      "learning_rate": 7.1124055270388705e-06,
      "loss": 0.0218,
      "step": 896460
    },
    {
      "epoch": 1.4671091821972597,
      "grad_norm": 0.26592984795570374,
      "learning_rate": 7.112339634825354e-06,
      "loss": 0.0157,
      "step": 896480
    },
    {
      "epoch": 1.467141912635913,
      "grad_norm": 0.22699841856956482,
      "learning_rate": 7.112273742611836e-06,
      "loss": 0.0265,
      "step": 896500
    },
    {
      "epoch": 1.4671746430745665,
      "grad_norm": 0.9495967626571655,
      "learning_rate": 7.1122078503983195e-06,
      "loss": 0.0224,
      "step": 896520
    },
    {
      "epoch": 1.4672073735132198,
      "grad_norm": 1.2243698835372925,
      "learning_rate": 7.112141958184801e-06,
      "loss": 0.0251,
      "step": 896540
    },
    {
      "epoch": 1.4672401039518732,
      "grad_norm": 1.6023272275924683,
      "learning_rate": 7.112076065971285e-06,
      "loss": 0.0164,
      "step": 896560
    },
    {
      "epoch": 1.4672728343905266,
      "grad_norm": 0.10699699819087982,
      "learning_rate": 7.112010173757768e-06,
      "loss": 0.0236,
      "step": 896580
    },
    {
      "epoch": 1.46730556482918,
      "grad_norm": 0.26426470279693604,
      "learning_rate": 7.1119442815442505e-06,
      "loss": 0.0206,
      "step": 896600
    },
    {
      "epoch": 1.467338295267833,
      "grad_norm": 0.9856153130531311,
      "learning_rate": 7.111878389330733e-06,
      "loss": 0.0242,
      "step": 896620
    },
    {
      "epoch": 1.4673710257064865,
      "grad_norm": 0.20067311823368073,
      "learning_rate": 7.111812497117217e-06,
      "loss": 0.0187,
      "step": 896640
    },
    {
      "epoch": 1.4674037561451398,
      "grad_norm": 0.2609822750091553,
      "learning_rate": 7.111746604903699e-06,
      "loss": 0.023,
      "step": 896660
    },
    {
      "epoch": 1.4674364865837932,
      "grad_norm": 0.534531831741333,
      "learning_rate": 7.111680712690182e-06,
      "loss": 0.0227,
      "step": 896680
    },
    {
      "epoch": 1.4674692170224466,
      "grad_norm": 0.2179027795791626,
      "learning_rate": 7.111614820476664e-06,
      "loss": 0.0195,
      "step": 896700
    },
    {
      "epoch": 1.4675019474610997,
      "grad_norm": 1.0628153085708618,
      "learning_rate": 7.111548928263148e-06,
      "loss": 0.0239,
      "step": 896720
    },
    {
      "epoch": 1.4675346778997533,
      "grad_norm": 0.3187110126018524,
      "learning_rate": 7.11148303604963e-06,
      "loss": 0.0228,
      "step": 896740
    },
    {
      "epoch": 1.4675674083384065,
      "grad_norm": 0.3833301365375519,
      "learning_rate": 7.111417143836113e-06,
      "loss": 0.0194,
      "step": 896760
    },
    {
      "epoch": 1.4676001387770599,
      "grad_norm": 0.5790805220603943,
      "learning_rate": 7.111351251622597e-06,
      "loss": 0.0129,
      "step": 896780
    },
    {
      "epoch": 1.4676328692157132,
      "grad_norm": 0.5005874037742615,
      "learning_rate": 7.11128535940908e-06,
      "loss": 0.0172,
      "step": 896800
    },
    {
      "epoch": 1.4676655996543666,
      "grad_norm": 1.3230299949645996,
      "learning_rate": 7.111219467195562e-06,
      "loss": 0.017,
      "step": 896820
    },
    {
      "epoch": 1.46769833009302,
      "grad_norm": 1.4850103855133057,
      "learning_rate": 7.111153574982045e-06,
      "loss": 0.0204,
      "step": 896840
    },
    {
      "epoch": 1.4677310605316731,
      "grad_norm": 0.39685654640197754,
      "learning_rate": 7.111087682768529e-06,
      "loss": 0.0208,
      "step": 896860
    },
    {
      "epoch": 1.4677637909703267,
      "grad_norm": 1.6246181726455688,
      "learning_rate": 7.1110217905550105e-06,
      "loss": 0.025,
      "step": 896880
    },
    {
      "epoch": 1.4677965214089799,
      "grad_norm": 0.43218860030174255,
      "learning_rate": 7.110955898341494e-06,
      "loss": 0.0235,
      "step": 896900
    },
    {
      "epoch": 1.4678292518476332,
      "grad_norm": 0.7678096294403076,
      "learning_rate": 7.110890006127976e-06,
      "loss": 0.0186,
      "step": 896920
    },
    {
      "epoch": 1.4678619822862866,
      "grad_norm": 0.3492366075515747,
      "learning_rate": 7.11082411391446e-06,
      "loss": 0.0214,
      "step": 896940
    },
    {
      "epoch": 1.46789471272494,
      "grad_norm": 0.6682888269424438,
      "learning_rate": 7.1107582217009415e-06,
      "loss": 0.0224,
      "step": 896960
    },
    {
      "epoch": 1.4679274431635934,
      "grad_norm": 0.32401004433631897,
      "learning_rate": 7.110692329487425e-06,
      "loss": 0.0202,
      "step": 896980
    },
    {
      "epoch": 1.4679601736022465,
      "grad_norm": 0.2590578496456146,
      "learning_rate": 7.110626437273908e-06,
      "loss": 0.0198,
      "step": 897000
    },
    {
      "epoch": 1.4679929040408999,
      "grad_norm": 0.6843903660774231,
      "learning_rate": 7.1105605450603906e-06,
      "loss": 0.0205,
      "step": 897020
    },
    {
      "epoch": 1.4680256344795533,
      "grad_norm": 0.6991938948631287,
      "learning_rate": 7.110494652846873e-06,
      "loss": 0.0254,
      "step": 897040
    },
    {
      "epoch": 1.4680583649182066,
      "grad_norm": 0.18088306486606598,
      "learning_rate": 7.110428760633357e-06,
      "loss": 0.0194,
      "step": 897060
    },
    {
      "epoch": 1.46809109535686,
      "grad_norm": 0.30423033237457275,
      "learning_rate": 7.110362868419839e-06,
      "loss": 0.0226,
      "step": 897080
    },
    {
      "epoch": 1.4681238257955134,
      "grad_norm": 0.21310514211654663,
      "learning_rate": 7.110296976206322e-06,
      "loss": 0.0143,
      "step": 897100
    },
    {
      "epoch": 1.4681565562341667,
      "grad_norm": 0.45144882798194885,
      "learning_rate": 7.110231083992806e-06,
      "loss": 0.0185,
      "step": 897120
    },
    {
      "epoch": 1.46818928667282,
      "grad_norm": 0.7071285843849182,
      "learning_rate": 7.110165191779288e-06,
      "loss": 0.0228,
      "step": 897140
    },
    {
      "epoch": 1.4682220171114733,
      "grad_norm": 0.6212103366851807,
      "learning_rate": 7.1100992995657714e-06,
      "loss": 0.0207,
      "step": 897160
    },
    {
      "epoch": 1.4682547475501266,
      "grad_norm": 0.9203137755393982,
      "learning_rate": 7.110033407352253e-06,
      "loss": 0.0249,
      "step": 897180
    },
    {
      "epoch": 1.46828747798878,
      "grad_norm": 0.33340802788734436,
      "learning_rate": 7.109967515138737e-06,
      "loss": 0.02,
      "step": 897200
    },
    {
      "epoch": 1.4683202084274334,
      "grad_norm": 0.09256243705749512,
      "learning_rate": 7.10990162292522e-06,
      "loss": 0.0169,
      "step": 897220
    },
    {
      "epoch": 1.4683529388660868,
      "grad_norm": 0.5369276404380798,
      "learning_rate": 7.109835730711702e-06,
      "loss": 0.0166,
      "step": 897240
    },
    {
      "epoch": 1.4683856693047401,
      "grad_norm": 0.16725987195968628,
      "learning_rate": 7.109769838498185e-06,
      "loss": 0.0267,
      "step": 897260
    },
    {
      "epoch": 1.4684183997433933,
      "grad_norm": 0.20640388131141663,
      "learning_rate": 7.109703946284669e-06,
      "loss": 0.0296,
      "step": 897280
    },
    {
      "epoch": 1.4684511301820466,
      "grad_norm": 0.2428029626607895,
      "learning_rate": 7.109638054071151e-06,
      "loss": 0.0155,
      "step": 897300
    },
    {
      "epoch": 1.4684838606207,
      "grad_norm": 1.1356661319732666,
      "learning_rate": 7.109572161857634e-06,
      "loss": 0.0201,
      "step": 897320
    },
    {
      "epoch": 1.4685165910593534,
      "grad_norm": 0.37792542576789856,
      "learning_rate": 7.109506269644116e-06,
      "loss": 0.0183,
      "step": 897340
    },
    {
      "epoch": 1.4685493214980068,
      "grad_norm": 0.5648588538169861,
      "learning_rate": 7.1094403774306e-06,
      "loss": 0.0255,
      "step": 897360
    },
    {
      "epoch": 1.4685820519366601,
      "grad_norm": 0.15448468923568726,
      "learning_rate": 7.109374485217082e-06,
      "loss": 0.0223,
      "step": 897380
    },
    {
      "epoch": 1.4686147823753135,
      "grad_norm": 0.205279141664505,
      "learning_rate": 7.109308593003565e-06,
      "loss": 0.0148,
      "step": 897400
    },
    {
      "epoch": 1.4686475128139667,
      "grad_norm": 0.3297783434391022,
      "learning_rate": 7.109242700790048e-06,
      "loss": 0.0171,
      "step": 897420
    },
    {
      "epoch": 1.46868024325262,
      "grad_norm": 0.32987335324287415,
      "learning_rate": 7.1091768085765315e-06,
      "loss": 0.0119,
      "step": 897440
    },
    {
      "epoch": 1.4687129736912734,
      "grad_norm": 0.3708312511444092,
      "learning_rate": 7.109110916363013e-06,
      "loss": 0.0271,
      "step": 897460
    },
    {
      "epoch": 1.4687457041299268,
      "grad_norm": 0.437286376953125,
      "learning_rate": 7.109045024149497e-06,
      "loss": 0.0141,
      "step": 897480
    },
    {
      "epoch": 1.4687784345685801,
      "grad_norm": 0.29804539680480957,
      "learning_rate": 7.1089791319359806e-06,
      "loss": 0.0198,
      "step": 897500
    },
    {
      "epoch": 1.4688111650072333,
      "grad_norm": 0.5752407312393188,
      "learning_rate": 7.1089132397224624e-06,
      "loss": 0.0187,
      "step": 897520
    },
    {
      "epoch": 1.468843895445887,
      "grad_norm": 0.3264476954936981,
      "learning_rate": 7.108847347508946e-06,
      "loss": 0.015,
      "step": 897540
    },
    {
      "epoch": 1.46887662588454,
      "grad_norm": 0.3797639310359955,
      "learning_rate": 7.108781455295428e-06,
      "loss": 0.0201,
      "step": 897560
    },
    {
      "epoch": 1.4689093563231934,
      "grad_norm": 0.5959088802337646,
      "learning_rate": 7.1087155630819115e-06,
      "loss": 0.0235,
      "step": 897580
    },
    {
      "epoch": 1.4689420867618468,
      "grad_norm": 0.7239381074905396,
      "learning_rate": 7.108649670868394e-06,
      "loss": 0.0225,
      "step": 897600
    },
    {
      "epoch": 1.4689748172005002,
      "grad_norm": 0.24461400508880615,
      "learning_rate": 7.108583778654877e-06,
      "loss": 0.0268,
      "step": 897620
    },
    {
      "epoch": 1.4690075476391535,
      "grad_norm": 1.405439019203186,
      "learning_rate": 7.10851788644136e-06,
      "loss": 0.0225,
      "step": 897640
    },
    {
      "epoch": 1.4690402780778067,
      "grad_norm": 0.4486280083656311,
      "learning_rate": 7.108451994227843e-06,
      "loss": 0.0224,
      "step": 897660
    },
    {
      "epoch": 1.46907300851646,
      "grad_norm": 0.5785367488861084,
      "learning_rate": 7.108386102014325e-06,
      "loss": 0.0181,
      "step": 897680
    },
    {
      "epoch": 1.4691057389551134,
      "grad_norm": 1.4039818048477173,
      "learning_rate": 7.108320209800809e-06,
      "loss": 0.0175,
      "step": 897700
    },
    {
      "epoch": 1.4691384693937668,
      "grad_norm": 0.2066851258277893,
      "learning_rate": 7.108254317587291e-06,
      "loss": 0.0148,
      "step": 897720
    },
    {
      "epoch": 1.4691711998324202,
      "grad_norm": 0.8809641599655151,
      "learning_rate": 7.108188425373774e-06,
      "loss": 0.0223,
      "step": 897740
    },
    {
      "epoch": 1.4692039302710735,
      "grad_norm": 0.49651870131492615,
      "learning_rate": 7.108122533160256e-06,
      "loss": 0.0204,
      "step": 897760
    },
    {
      "epoch": 1.469236660709727,
      "grad_norm": 0.4359234869480133,
      "learning_rate": 7.10805664094674e-06,
      "loss": 0.0209,
      "step": 897780
    },
    {
      "epoch": 1.46926939114838,
      "grad_norm": 0.2529715299606323,
      "learning_rate": 7.1079907487332225e-06,
      "loss": 0.0229,
      "step": 897800
    },
    {
      "epoch": 1.4693021215870334,
      "grad_norm": 0.5586951375007629,
      "learning_rate": 7.107924856519705e-06,
      "loss": 0.0233,
      "step": 897820
    },
    {
      "epoch": 1.4693348520256868,
      "grad_norm": 0.22046080231666565,
      "learning_rate": 7.107858964306189e-06,
      "loss": 0.0262,
      "step": 897840
    },
    {
      "epoch": 1.4693675824643402,
      "grad_norm": 0.6164311170578003,
      "learning_rate": 7.1077930720926716e-06,
      "loss": 0.0279,
      "step": 897860
    },
    {
      "epoch": 1.4694003129029936,
      "grad_norm": 0.3660334646701813,
      "learning_rate": 7.107727179879155e-06,
      "loss": 0.0189,
      "step": 897880
    },
    {
      "epoch": 1.469433043341647,
      "grad_norm": 0.3994060456752777,
      "learning_rate": 7.107661287665637e-06,
      "loss": 0.0224,
      "step": 897900
    },
    {
      "epoch": 1.4694657737803003,
      "grad_norm": 0.3163261115550995,
      "learning_rate": 7.107595395452121e-06,
      "loss": 0.0205,
      "step": 897920
    },
    {
      "epoch": 1.4694985042189534,
      "grad_norm": 0.42068788409233093,
      "learning_rate": 7.1075295032386025e-06,
      "loss": 0.0247,
      "step": 897940
    },
    {
      "epoch": 1.4695312346576068,
      "grad_norm": 0.6755549907684326,
      "learning_rate": 7.107463611025086e-06,
      "loss": 0.0174,
      "step": 897960
    },
    {
      "epoch": 1.4695639650962602,
      "grad_norm": 0.3260127902030945,
      "learning_rate": 7.107397718811568e-06,
      "loss": 0.0212,
      "step": 897980
    },
    {
      "epoch": 1.4695966955349136,
      "grad_norm": 0.7210453152656555,
      "learning_rate": 7.107331826598052e-06,
      "loss": 0.0299,
      "step": 898000
    },
    {
      "epoch": 1.469629425973567,
      "grad_norm": 0.3660942316055298,
      "learning_rate": 7.107265934384534e-06,
      "loss": 0.0242,
      "step": 898020
    },
    {
      "epoch": 1.4696621564122203,
      "grad_norm": 0.6368685960769653,
      "learning_rate": 7.107200042171017e-06,
      "loss": 0.0188,
      "step": 898040
    },
    {
      "epoch": 1.4696948868508737,
      "grad_norm": 1.1999144554138184,
      "learning_rate": 7.1071341499575e-06,
      "loss": 0.0294,
      "step": 898060
    },
    {
      "epoch": 1.4697276172895268,
      "grad_norm": 0.7987380623817444,
      "learning_rate": 7.107068257743983e-06,
      "loss": 0.0169,
      "step": 898080
    },
    {
      "epoch": 1.4697603477281802,
      "grad_norm": 1.337570309638977,
      "learning_rate": 7.107002365530465e-06,
      "loss": 0.0249,
      "step": 898100
    },
    {
      "epoch": 1.4697930781668336,
      "grad_norm": 0.407696932554245,
      "learning_rate": 7.106936473316949e-06,
      "loss": 0.0264,
      "step": 898120
    },
    {
      "epoch": 1.469825808605487,
      "grad_norm": 0.5259566903114319,
      "learning_rate": 7.106870581103431e-06,
      "loss": 0.0247,
      "step": 898140
    },
    {
      "epoch": 1.4698585390441403,
      "grad_norm": 1.0357041358947754,
      "learning_rate": 7.106804688889914e-06,
      "loss": 0.0153,
      "step": 898160
    },
    {
      "epoch": 1.4698912694827935,
      "grad_norm": 0.888417661190033,
      "learning_rate": 7.106738796676398e-06,
      "loss": 0.0247,
      "step": 898180
    },
    {
      "epoch": 1.469923999921447,
      "grad_norm": 0.7440282106399536,
      "learning_rate": 7.10667290446288e-06,
      "loss": 0.0284,
      "step": 898200
    },
    {
      "epoch": 1.4699567303601002,
      "grad_norm": 0.295775830745697,
      "learning_rate": 7.106607012249363e-06,
      "loss": 0.0107,
      "step": 898220
    },
    {
      "epoch": 1.4699894607987536,
      "grad_norm": 0.7345691323280334,
      "learning_rate": 7.106541120035846e-06,
      "loss": 0.0164,
      "step": 898240
    },
    {
      "epoch": 1.470022191237407,
      "grad_norm": 0.7522162199020386,
      "learning_rate": 7.106475227822329e-06,
      "loss": 0.0185,
      "step": 898260
    },
    {
      "epoch": 1.4700549216760603,
      "grad_norm": 0.7975901365280151,
      "learning_rate": 7.106409335608812e-06,
      "loss": 0.02,
      "step": 898280
    },
    {
      "epoch": 1.4700876521147137,
      "grad_norm": 0.36500832438468933,
      "learning_rate": 7.106343443395295e-06,
      "loss": 0.0187,
      "step": 898300
    },
    {
      "epoch": 1.4701203825533669,
      "grad_norm": 0.3923119902610779,
      "learning_rate": 7.106277551181777e-06,
      "loss": 0.0221,
      "step": 898320
    },
    {
      "epoch": 1.4701531129920204,
      "grad_norm": 0.46666547656059265,
      "learning_rate": 7.106211658968261e-06,
      "loss": 0.0194,
      "step": 898340
    },
    {
      "epoch": 1.4701858434306736,
      "grad_norm": 0.9361698031425476,
      "learning_rate": 7.106145766754743e-06,
      "loss": 0.0138,
      "step": 898360
    },
    {
      "epoch": 1.470218573869327,
      "grad_norm": 0.8055371046066284,
      "learning_rate": 7.106079874541226e-06,
      "loss": 0.0139,
      "step": 898380
    },
    {
      "epoch": 1.4702513043079803,
      "grad_norm": 1.2040737867355347,
      "learning_rate": 7.106013982327709e-06,
      "loss": 0.0207,
      "step": 898400
    },
    {
      "epoch": 1.4702840347466337,
      "grad_norm": 1.0593185424804688,
      "learning_rate": 7.105948090114192e-06,
      "loss": 0.0225,
      "step": 898420
    },
    {
      "epoch": 1.470316765185287,
      "grad_norm": 0.36614343523979187,
      "learning_rate": 7.105882197900674e-06,
      "loss": 0.017,
      "step": 898440
    },
    {
      "epoch": 1.4703494956239402,
      "grad_norm": 1.2310822010040283,
      "learning_rate": 7.105816305687158e-06,
      "loss": 0.0223,
      "step": 898460
    },
    {
      "epoch": 1.4703822260625936,
      "grad_norm": 0.5231710076332092,
      "learning_rate": 7.10575041347364e-06,
      "loss": 0.0265,
      "step": 898480
    },
    {
      "epoch": 1.470414956501247,
      "grad_norm": 0.8761717081069946,
      "learning_rate": 7.1056845212601235e-06,
      "loss": 0.0197,
      "step": 898500
    },
    {
      "epoch": 1.4704476869399004,
      "grad_norm": 0.7596423625946045,
      "learning_rate": 7.105618629046605e-06,
      "loss": 0.0225,
      "step": 898520
    },
    {
      "epoch": 1.4704804173785537,
      "grad_norm": 0.37597256898880005,
      "learning_rate": 7.105552736833089e-06,
      "loss": 0.0208,
      "step": 898540
    },
    {
      "epoch": 1.470513147817207,
      "grad_norm": 0.26313796639442444,
      "learning_rate": 7.1054868446195725e-06,
      "loss": 0.0264,
      "step": 898560
    },
    {
      "epoch": 1.4705458782558605,
      "grad_norm": 0.49904757738113403,
      "learning_rate": 7.1054209524060544e-06,
      "loss": 0.0298,
      "step": 898580
    },
    {
      "epoch": 1.4705786086945136,
      "grad_norm": 1.7400552034378052,
      "learning_rate": 7.105355060192538e-06,
      "loss": 0.0162,
      "step": 898600
    },
    {
      "epoch": 1.470611339133167,
      "grad_norm": 0.7049278616905212,
      "learning_rate": 7.105289167979021e-06,
      "loss": 0.0209,
      "step": 898620
    },
    {
      "epoch": 1.4706440695718204,
      "grad_norm": 0.4102526009082794,
      "learning_rate": 7.1052232757655035e-06,
      "loss": 0.0319,
      "step": 898640
    },
    {
      "epoch": 1.4706768000104737,
      "grad_norm": 0.28242218494415283,
      "learning_rate": 7.105157383551986e-06,
      "loss": 0.0248,
      "step": 898660
    },
    {
      "epoch": 1.4707095304491271,
      "grad_norm": 0.41760239005088806,
      "learning_rate": 7.10509149133847e-06,
      "loss": 0.0199,
      "step": 898680
    },
    {
      "epoch": 1.4707422608877805,
      "grad_norm": 0.7330844402313232,
      "learning_rate": 7.105025599124952e-06,
      "loss": 0.0232,
      "step": 898700
    },
    {
      "epoch": 1.4707749913264339,
      "grad_norm": 0.43749377131462097,
      "learning_rate": 7.104959706911435e-06,
      "loss": 0.0229,
      "step": 898720
    },
    {
      "epoch": 1.470807721765087,
      "grad_norm": 0.7224938869476318,
      "learning_rate": 7.104893814697917e-06,
      "loss": 0.0247,
      "step": 898740
    },
    {
      "epoch": 1.4708404522037404,
      "grad_norm": 0.5053713917732239,
      "learning_rate": 7.104827922484401e-06,
      "loss": 0.0254,
      "step": 898760
    },
    {
      "epoch": 1.4708731826423938,
      "grad_norm": 0.439721941947937,
      "learning_rate": 7.104762030270883e-06,
      "loss": 0.0272,
      "step": 898780
    },
    {
      "epoch": 1.4709059130810471,
      "grad_norm": 0.5991284251213074,
      "learning_rate": 7.104696138057366e-06,
      "loss": 0.0322,
      "step": 898800
    },
    {
      "epoch": 1.4709386435197005,
      "grad_norm": 0.3788366913795471,
      "learning_rate": 7.104630245843849e-06,
      "loss": 0.0126,
      "step": 898820
    },
    {
      "epoch": 1.4709713739583539,
      "grad_norm": 0.45558294653892517,
      "learning_rate": 7.104564353630332e-06,
      "loss": 0.0115,
      "step": 898840
    },
    {
      "epoch": 1.4710041043970072,
      "grad_norm": 0.19165940582752228,
      "learning_rate": 7.1044984614168145e-06,
      "loss": 0.0202,
      "step": 898860
    },
    {
      "epoch": 1.4710368348356604,
      "grad_norm": 0.8041719198226929,
      "learning_rate": 7.104432569203298e-06,
      "loss": 0.0157,
      "step": 898880
    },
    {
      "epoch": 1.4710695652743138,
      "grad_norm": 0.6460161805152893,
      "learning_rate": 7.104366676989781e-06,
      "loss": 0.0187,
      "step": 898900
    },
    {
      "epoch": 1.4711022957129671,
      "grad_norm": 0.25321611762046814,
      "learning_rate": 7.1043007847762635e-06,
      "loss": 0.016,
      "step": 898920
    },
    {
      "epoch": 1.4711350261516205,
      "grad_norm": 0.8942004442214966,
      "learning_rate": 7.104234892562747e-06,
      "loss": 0.0174,
      "step": 898940
    },
    {
      "epoch": 1.4711677565902739,
      "grad_norm": 0.7318183183670044,
      "learning_rate": 7.104169000349229e-06,
      "loss": 0.0259,
      "step": 898960
    },
    {
      "epoch": 1.471200487028927,
      "grad_norm": 0.27998557686805725,
      "learning_rate": 7.104103108135713e-06,
      "loss": 0.0162,
      "step": 898980
    },
    {
      "epoch": 1.4712332174675806,
      "grad_norm": 0.11416309326887131,
      "learning_rate": 7.1040372159221945e-06,
      "loss": 0.0184,
      "step": 899000
    },
    {
      "epoch": 1.4712659479062338,
      "grad_norm": 0.23884864151477814,
      "learning_rate": 7.103971323708678e-06,
      "loss": 0.0115,
      "step": 899020
    },
    {
      "epoch": 1.4712986783448871,
      "grad_norm": 0.3299109935760498,
      "learning_rate": 7.103905431495161e-06,
      "loss": 0.0243,
      "step": 899040
    },
    {
      "epoch": 1.4713314087835405,
      "grad_norm": 0.759973406791687,
      "learning_rate": 7.1038395392816436e-06,
      "loss": 0.0233,
      "step": 899060
    },
    {
      "epoch": 1.471364139222194,
      "grad_norm": 1.4869284629821777,
      "learning_rate": 7.103773647068126e-06,
      "loss": 0.0241,
      "step": 899080
    },
    {
      "epoch": 1.4713968696608473,
      "grad_norm": 0.41736894845962524,
      "learning_rate": 7.10370775485461e-06,
      "loss": 0.0327,
      "step": 899100
    },
    {
      "epoch": 1.4714296000995004,
      "grad_norm": 0.5039543509483337,
      "learning_rate": 7.103641862641092e-06,
      "loss": 0.0211,
      "step": 899120
    },
    {
      "epoch": 1.471462330538154,
      "grad_norm": 0.8889929056167603,
      "learning_rate": 7.103575970427575e-06,
      "loss": 0.0206,
      "step": 899140
    },
    {
      "epoch": 1.4714950609768072,
      "grad_norm": 0.8468973636627197,
      "learning_rate": 7.103510078214057e-06,
      "loss": 0.0259,
      "step": 899160
    },
    {
      "epoch": 1.4715277914154605,
      "grad_norm": 0.5848370790481567,
      "learning_rate": 7.103444186000541e-06,
      "loss": 0.0182,
      "step": 899180
    },
    {
      "epoch": 1.471560521854114,
      "grad_norm": 0.75795978307724,
      "learning_rate": 7.103378293787024e-06,
      "loss": 0.0162,
      "step": 899200
    },
    {
      "epoch": 1.4715932522927673,
      "grad_norm": 0.37100276350975037,
      "learning_rate": 7.103312401573506e-06,
      "loss": 0.0177,
      "step": 899220
    },
    {
      "epoch": 1.4716259827314206,
      "grad_norm": 0.6402500867843628,
      "learning_rate": 7.10324650935999e-06,
      "loss": 0.0223,
      "step": 899240
    },
    {
      "epoch": 1.4716587131700738,
      "grad_norm": 0.4864940941333771,
      "learning_rate": 7.103180617146473e-06,
      "loss": 0.0223,
      "step": 899260
    },
    {
      "epoch": 1.4716914436087272,
      "grad_norm": 0.13184736669063568,
      "learning_rate": 7.103114724932955e-06,
      "loss": 0.0234,
      "step": 899280
    },
    {
      "epoch": 1.4717241740473805,
      "grad_norm": 0.37975409626960754,
      "learning_rate": 7.103048832719438e-06,
      "loss": 0.0209,
      "step": 899300
    },
    {
      "epoch": 1.471756904486034,
      "grad_norm": 0.7343896627426147,
      "learning_rate": 7.102982940505922e-06,
      "loss": 0.0205,
      "step": 899320
    },
    {
      "epoch": 1.4717896349246873,
      "grad_norm": 0.4392938017845154,
      "learning_rate": 7.102917048292404e-06,
      "loss": 0.0149,
      "step": 899340
    },
    {
      "epoch": 1.4718223653633407,
      "grad_norm": 1.3630311489105225,
      "learning_rate": 7.102851156078887e-06,
      "loss": 0.0233,
      "step": 899360
    },
    {
      "epoch": 1.471855095801994,
      "grad_norm": 0.8476999998092651,
      "learning_rate": 7.102785263865369e-06,
      "loss": 0.0205,
      "step": 899380
    },
    {
      "epoch": 1.4718878262406472,
      "grad_norm": 0.2724624276161194,
      "learning_rate": 7.102719371651853e-06,
      "loss": 0.0202,
      "step": 899400
    },
    {
      "epoch": 1.4719205566793006,
      "grad_norm": 0.3255801796913147,
      "learning_rate": 7.1026534794383354e-06,
      "loss": 0.023,
      "step": 899420
    },
    {
      "epoch": 1.471953287117954,
      "grad_norm": 1.04502272605896,
      "learning_rate": 7.102587587224818e-06,
      "loss": 0.0254,
      "step": 899440
    },
    {
      "epoch": 1.4719860175566073,
      "grad_norm": 0.9419565200805664,
      "learning_rate": 7.102521695011301e-06,
      "loss": 0.0145,
      "step": 899460
    },
    {
      "epoch": 1.4720187479952607,
      "grad_norm": 0.16734375059604645,
      "learning_rate": 7.1024558027977845e-06,
      "loss": 0.0154,
      "step": 899480
    },
    {
      "epoch": 1.472051478433914,
      "grad_norm": 1.8313827514648438,
      "learning_rate": 7.102389910584266e-06,
      "loss": 0.0346,
      "step": 899500
    },
    {
      "epoch": 1.4720842088725674,
      "grad_norm": 0.34599488973617554,
      "learning_rate": 7.10232401837075e-06,
      "loss": 0.0196,
      "step": 899520
    },
    {
      "epoch": 1.4721169393112206,
      "grad_norm": 1.25836980342865,
      "learning_rate": 7.102258126157232e-06,
      "loss": 0.0237,
      "step": 899540
    },
    {
      "epoch": 1.472149669749874,
      "grad_norm": 0.27941566705703735,
      "learning_rate": 7.1021922339437155e-06,
      "loss": 0.0201,
      "step": 899560
    },
    {
      "epoch": 1.4721824001885273,
      "grad_norm": 0.7557029128074646,
      "learning_rate": 7.102126341730198e-06,
      "loss": 0.015,
      "step": 899580
    },
    {
      "epoch": 1.4722151306271807,
      "grad_norm": 1.1402709484100342,
      "learning_rate": 7.102060449516681e-06,
      "loss": 0.0218,
      "step": 899600
    },
    {
      "epoch": 1.472247861065834,
      "grad_norm": 0.6316474080085754,
      "learning_rate": 7.1019945573031645e-06,
      "loss": 0.0186,
      "step": 899620
    },
    {
      "epoch": 1.4722805915044874,
      "grad_norm": 0.4939604103565216,
      "learning_rate": 7.101928665089647e-06,
      "loss": 0.0172,
      "step": 899640
    },
    {
      "epoch": 1.4723133219431408,
      "grad_norm": 2.292830467224121,
      "learning_rate": 7.10186277287613e-06,
      "loss": 0.0176,
      "step": 899660
    },
    {
      "epoch": 1.472346052381794,
      "grad_norm": 0.6472724676132202,
      "learning_rate": 7.101796880662613e-06,
      "loss": 0.0169,
      "step": 899680
    },
    {
      "epoch": 1.4723787828204473,
      "grad_norm": 0.3605289161205292,
      "learning_rate": 7.101730988449096e-06,
      "loss": 0.0228,
      "step": 899700
    },
    {
      "epoch": 1.4724115132591007,
      "grad_norm": 0.5538337230682373,
      "learning_rate": 7.101665096235578e-06,
      "loss": 0.0242,
      "step": 899720
    },
    {
      "epoch": 1.472444243697754,
      "grad_norm": 0.17287711799144745,
      "learning_rate": 7.101599204022062e-06,
      "loss": 0.0189,
      "step": 899740
    },
    {
      "epoch": 1.4724769741364074,
      "grad_norm": 0.9591517448425293,
      "learning_rate": 7.101533311808544e-06,
      "loss": 0.02,
      "step": 899760
    },
    {
      "epoch": 1.4725097045750606,
      "grad_norm": 0.35309362411499023,
      "learning_rate": 7.101467419595027e-06,
      "loss": 0.0195,
      "step": 899780
    },
    {
      "epoch": 1.4725424350137142,
      "grad_norm": 0.642412543296814,
      "learning_rate": 7.101401527381509e-06,
      "loss": 0.0235,
      "step": 899800
    },
    {
      "epoch": 1.4725751654523673,
      "grad_norm": 0.5505790114402771,
      "learning_rate": 7.101335635167993e-06,
      "loss": 0.0235,
      "step": 899820
    },
    {
      "epoch": 1.4726078958910207,
      "grad_norm": 0.6709902882575989,
      "learning_rate": 7.1012697429544755e-06,
      "loss": 0.034,
      "step": 899840
    },
    {
      "epoch": 1.472640626329674,
      "grad_norm": 0.36658811569213867,
      "learning_rate": 7.101203850740958e-06,
      "loss": 0.0227,
      "step": 899860
    },
    {
      "epoch": 1.4726733567683274,
      "grad_norm": 0.5903047919273376,
      "learning_rate": 7.101137958527441e-06,
      "loss": 0.0198,
      "step": 899880
    },
    {
      "epoch": 1.4727060872069808,
      "grad_norm": 0.41945138573646545,
      "learning_rate": 7.1010720663139246e-06,
      "loss": 0.0217,
      "step": 899900
    },
    {
      "epoch": 1.472738817645634,
      "grad_norm": 0.3021099865436554,
      "learning_rate": 7.1010061741004065e-06,
      "loss": 0.0207,
      "step": 899920
    },
    {
      "epoch": 1.4727715480842876,
      "grad_norm": 0.5916572213172913,
      "learning_rate": 7.10094028188689e-06,
      "loss": 0.0235,
      "step": 899940
    },
    {
      "epoch": 1.4728042785229407,
      "grad_norm": 0.7611003518104553,
      "learning_rate": 7.100874389673374e-06,
      "loss": 0.0204,
      "step": 899960
    },
    {
      "epoch": 1.472837008961594,
      "grad_norm": 0.2900069057941437,
      "learning_rate": 7.1008084974598555e-06,
      "loss": 0.0307,
      "step": 899980
    },
    {
      "epoch": 1.4728697394002475,
      "grad_norm": 0.15724064409732819,
      "learning_rate": 7.100742605246339e-06,
      "loss": 0.0209,
      "step": 900000
    },
    {
      "epoch": 1.4728697394002475,
      "eval_loss": 0.010955456644296646,
      "eval_runtime": 6502.2245,
      "eval_samples_per_second": 158.078,
      "eval_steps_per_second": 15.808,
      "eval_sts-dev_pearson_cosine": 0.9743064690864603,
      "eval_sts-dev_spearman_cosine": 0.8885283696505835,
      "step": 900000
    },
    {
      "epoch": 1.4729024698389008,
      "grad_norm": 0.8838117718696594,
      "learning_rate": 7.100676713032821e-06,
      "loss": 0.0213,
      "step": 900020
    },
    {
      "epoch": 1.4729352002775542,
      "grad_norm": 0.208647683262825,
      "learning_rate": 7.100610820819305e-06,
      "loss": 0.0158,
      "step": 900040
    },
    {
      "epoch": 1.4729679307162074,
      "grad_norm": 0.1914595365524292,
      "learning_rate": 7.100544928605787e-06,
      "loss": 0.0244,
      "step": 900060
    },
    {
      "epoch": 1.4730006611548607,
      "grad_norm": 0.5214563608169556,
      "learning_rate": 7.10047903639227e-06,
      "loss": 0.0158,
      "step": 900080
    },
    {
      "epoch": 1.473033391593514,
      "grad_norm": 1.2815958261489868,
      "learning_rate": 7.100413144178753e-06,
      "loss": 0.0219,
      "step": 900100
    },
    {
      "epoch": 1.4730661220321675,
      "grad_norm": 1.4228630065917969,
      "learning_rate": 7.100347251965236e-06,
      "loss": 0.0265,
      "step": 900120
    },
    {
      "epoch": 1.4730988524708208,
      "grad_norm": 0.9276716709136963,
      "learning_rate": 7.100281359751718e-06,
      "loss": 0.0204,
      "step": 900140
    },
    {
      "epoch": 1.4731315829094742,
      "grad_norm": 0.6751329898834229,
      "learning_rate": 7.100215467538202e-06,
      "loss": 0.0134,
      "step": 900160
    },
    {
      "epoch": 1.4731643133481276,
      "grad_norm": 0.3992804288864136,
      "learning_rate": 7.100149575324684e-06,
      "loss": 0.0174,
      "step": 900180
    },
    {
      "epoch": 1.4731970437867807,
      "grad_norm": 3.037213087081909,
      "learning_rate": 7.100083683111167e-06,
      "loss": 0.014,
      "step": 900200
    },
    {
      "epoch": 1.473229774225434,
      "grad_norm": 0.4793994724750519,
      "learning_rate": 7.10001779089765e-06,
      "loss": 0.0221,
      "step": 900220
    },
    {
      "epoch": 1.4732625046640875,
      "grad_norm": 1.080004096031189,
      "learning_rate": 7.099951898684133e-06,
      "loss": 0.0174,
      "step": 900240
    },
    {
      "epoch": 1.4732952351027409,
      "grad_norm": 1.1560816764831543,
      "learning_rate": 7.099886006470616e-06,
      "loss": 0.0171,
      "step": 900260
    },
    {
      "epoch": 1.4733279655413942,
      "grad_norm": 0.8820147514343262,
      "learning_rate": 7.099820114257099e-06,
      "loss": 0.0188,
      "step": 900280
    },
    {
      "epoch": 1.4733606959800476,
      "grad_norm": 0.7722887992858887,
      "learning_rate": 7.099754222043582e-06,
      "loss": 0.0202,
      "step": 900300
    },
    {
      "epoch": 1.473393426418701,
      "grad_norm": 0.7448332905769348,
      "learning_rate": 7.099688329830065e-06,
      "loss": 0.0203,
      "step": 900320
    },
    {
      "epoch": 1.4734261568573541,
      "grad_norm": 1.0120320320129395,
      "learning_rate": 7.099622437616548e-06,
      "loss": 0.0179,
      "step": 900340
    },
    {
      "epoch": 1.4734588872960075,
      "grad_norm": 0.39333662390708923,
      "learning_rate": 7.09955654540303e-06,
      "loss": 0.0265,
      "step": 900360
    },
    {
      "epoch": 1.4734916177346609,
      "grad_norm": 0.2617638111114502,
      "learning_rate": 7.099490653189514e-06,
      "loss": 0.0172,
      "step": 900380
    },
    {
      "epoch": 1.4735243481733142,
      "grad_norm": 0.09483490139245987,
      "learning_rate": 7.099424760975996e-06,
      "loss": 0.0155,
      "step": 900400
    },
    {
      "epoch": 1.4735570786119676,
      "grad_norm": 0.31452232599258423,
      "learning_rate": 7.099358868762479e-06,
      "loss": 0.0139,
      "step": 900420
    },
    {
      "epoch": 1.4735898090506208,
      "grad_norm": 0.23884597420692444,
      "learning_rate": 7.099292976548962e-06,
      "loss": 0.0222,
      "step": 900440
    },
    {
      "epoch": 1.4736225394892744,
      "grad_norm": 0.5611863136291504,
      "learning_rate": 7.099227084335445e-06,
      "loss": 0.0206,
      "step": 900460
    },
    {
      "epoch": 1.4736552699279275,
      "grad_norm": 0.33910784125328064,
      "learning_rate": 7.099161192121927e-06,
      "loss": 0.0247,
      "step": 900480
    },
    {
      "epoch": 1.4736880003665809,
      "grad_norm": 1.1934651136398315,
      "learning_rate": 7.099095299908411e-06,
      "loss": 0.0189,
      "step": 900500
    },
    {
      "epoch": 1.4737207308052342,
      "grad_norm": 2.935335159301758,
      "learning_rate": 7.099029407694893e-06,
      "loss": 0.0276,
      "step": 900520
    },
    {
      "epoch": 1.4737534612438876,
      "grad_norm": 1.4346224069595337,
      "learning_rate": 7.0989635154813765e-06,
      "loss": 0.0174,
      "step": 900540
    },
    {
      "epoch": 1.473786191682541,
      "grad_norm": 0.08151282370090485,
      "learning_rate": 7.098897623267858e-06,
      "loss": 0.02,
      "step": 900560
    },
    {
      "epoch": 1.4738189221211941,
      "grad_norm": 1.4082341194152832,
      "learning_rate": 7.098831731054342e-06,
      "loss": 0.0269,
      "step": 900580
    },
    {
      "epoch": 1.4738516525598477,
      "grad_norm": 0.6380248665809631,
      "learning_rate": 7.098765838840824e-06,
      "loss": 0.0181,
      "step": 900600
    },
    {
      "epoch": 1.4738843829985009,
      "grad_norm": 0.8140822649002075,
      "learning_rate": 7.0986999466273074e-06,
      "loss": 0.03,
      "step": 900620
    },
    {
      "epoch": 1.4739171134371543,
      "grad_norm": 0.47099119424819946,
      "learning_rate": 7.098634054413791e-06,
      "loss": 0.027,
      "step": 900640
    },
    {
      "epoch": 1.4739498438758076,
      "grad_norm": 0.5251336693763733,
      "learning_rate": 7.098568162200274e-06,
      "loss": 0.0192,
      "step": 900660
    },
    {
      "epoch": 1.473982574314461,
      "grad_norm": 0.6746478080749512,
      "learning_rate": 7.0985022699867565e-06,
      "loss": 0.0199,
      "step": 900680
    },
    {
      "epoch": 1.4740153047531144,
      "grad_norm": 0.20437709987163544,
      "learning_rate": 7.098436377773239e-06,
      "loss": 0.0289,
      "step": 900700
    },
    {
      "epoch": 1.4740480351917675,
      "grad_norm": 0.4100787937641144,
      "learning_rate": 7.098370485559723e-06,
      "loss": 0.0181,
      "step": 900720
    },
    {
      "epoch": 1.474080765630421,
      "grad_norm": 0.4372740685939789,
      "learning_rate": 7.098304593346205e-06,
      "loss": 0.0197,
      "step": 900740
    },
    {
      "epoch": 1.4741134960690743,
      "grad_norm": 0.18030408024787903,
      "learning_rate": 7.098238701132688e-06,
      "loss": 0.0154,
      "step": 900760
    },
    {
      "epoch": 1.4741462265077276,
      "grad_norm": 0.9118410348892212,
      "learning_rate": 7.09817280891917e-06,
      "loss": 0.0209,
      "step": 900780
    },
    {
      "epoch": 1.474178956946381,
      "grad_norm": 0.47415000200271606,
      "learning_rate": 7.098106916705654e-06,
      "loss": 0.0177,
      "step": 900800
    },
    {
      "epoch": 1.4742116873850344,
      "grad_norm": 0.6176461577415466,
      "learning_rate": 7.098041024492136e-06,
      "loss": 0.0192,
      "step": 900820
    },
    {
      "epoch": 1.4742444178236878,
      "grad_norm": 0.29790475964546204,
      "learning_rate": 7.097975132278619e-06,
      "loss": 0.0259,
      "step": 900840
    },
    {
      "epoch": 1.474277148262341,
      "grad_norm": 0.1946086436510086,
      "learning_rate": 7.097909240065102e-06,
      "loss": 0.0167,
      "step": 900860
    },
    {
      "epoch": 1.4743098787009943,
      "grad_norm": 1.277523398399353,
      "learning_rate": 7.097843347851585e-06,
      "loss": 0.0209,
      "step": 900880
    },
    {
      "epoch": 1.4743426091396477,
      "grad_norm": 0.6914703249931335,
      "learning_rate": 7.0977774556380675e-06,
      "loss": 0.0141,
      "step": 900900
    },
    {
      "epoch": 1.474375339578301,
      "grad_norm": 0.39091819524765015,
      "learning_rate": 7.097711563424551e-06,
      "loss": 0.0233,
      "step": 900920
    },
    {
      "epoch": 1.4744080700169544,
      "grad_norm": 0.30940043926239014,
      "learning_rate": 7.097645671211033e-06,
      "loss": 0.0225,
      "step": 900940
    },
    {
      "epoch": 1.4744408004556078,
      "grad_norm": 1.8474479913711548,
      "learning_rate": 7.0975797789975165e-06,
      "loss": 0.0305,
      "step": 900960
    },
    {
      "epoch": 1.4744735308942611,
      "grad_norm": 0.31366321444511414,
      "learning_rate": 7.0975138867839984e-06,
      "loss": 0.0201,
      "step": 900980
    },
    {
      "epoch": 1.4745062613329143,
      "grad_norm": 1.6966867446899414,
      "learning_rate": 7.097447994570482e-06,
      "loss": 0.0193,
      "step": 901000
    },
    {
      "epoch": 1.4745389917715677,
      "grad_norm": 0.34547746181488037,
      "learning_rate": 7.097382102356966e-06,
      "loss": 0.0154,
      "step": 901020
    },
    {
      "epoch": 1.474571722210221,
      "grad_norm": 0.286345511674881,
      "learning_rate": 7.0973162101434475e-06,
      "loss": 0.0225,
      "step": 901040
    },
    {
      "epoch": 1.4746044526488744,
      "grad_norm": 0.7517714500427246,
      "learning_rate": 7.097250317929931e-06,
      "loss": 0.0206,
      "step": 901060
    },
    {
      "epoch": 1.4746371830875278,
      "grad_norm": 0.1688707321882248,
      "learning_rate": 7.097184425716414e-06,
      "loss": 0.0182,
      "step": 901080
    },
    {
      "epoch": 1.4746699135261812,
      "grad_norm": 0.3815954327583313,
      "learning_rate": 7.0971185335028966e-06,
      "loss": 0.0244,
      "step": 901100
    },
    {
      "epoch": 1.4747026439648345,
      "grad_norm": 1.1375675201416016,
      "learning_rate": 7.097052641289379e-06,
      "loss": 0.0179,
      "step": 901120
    },
    {
      "epoch": 1.4747353744034877,
      "grad_norm": 1.6754857301712036,
      "learning_rate": 7.096986749075863e-06,
      "loss": 0.0171,
      "step": 901140
    },
    {
      "epoch": 1.474768104842141,
      "grad_norm": 0.4694962799549103,
      "learning_rate": 7.096920856862345e-06,
      "loss": 0.0152,
      "step": 901160
    },
    {
      "epoch": 1.4748008352807944,
      "grad_norm": 0.2990367114543915,
      "learning_rate": 7.096854964648828e-06,
      "loss": 0.0178,
      "step": 901180
    },
    {
      "epoch": 1.4748335657194478,
      "grad_norm": 1.2808295488357544,
      "learning_rate": 7.09678907243531e-06,
      "loss": 0.0142,
      "step": 901200
    },
    {
      "epoch": 1.4748662961581012,
      "grad_norm": 0.33187389373779297,
      "learning_rate": 7.096723180221794e-06,
      "loss": 0.0203,
      "step": 901220
    },
    {
      "epoch": 1.4748990265967543,
      "grad_norm": 1.684972882270813,
      "learning_rate": 7.096657288008277e-06,
      "loss": 0.0204,
      "step": 901240
    },
    {
      "epoch": 1.474931757035408,
      "grad_norm": 0.5999073386192322,
      "learning_rate": 7.096591395794759e-06,
      "loss": 0.026,
      "step": 901260
    },
    {
      "epoch": 1.474964487474061,
      "grad_norm": 0.8093442320823669,
      "learning_rate": 7.096525503581242e-06,
      "loss": 0.0214,
      "step": 901280
    },
    {
      "epoch": 1.4749972179127144,
      "grad_norm": 0.787919819355011,
      "learning_rate": 7.096459611367726e-06,
      "loss": 0.0199,
      "step": 901300
    },
    {
      "epoch": 1.4750299483513678,
      "grad_norm": 0.6364558935165405,
      "learning_rate": 7.0963937191542076e-06,
      "loss": 0.0201,
      "step": 901320
    },
    {
      "epoch": 1.4750626787900212,
      "grad_norm": 0.34946882724761963,
      "learning_rate": 7.096327826940691e-06,
      "loss": 0.0304,
      "step": 901340
    },
    {
      "epoch": 1.4750954092286745,
      "grad_norm": 0.22107872366905212,
      "learning_rate": 7.096261934727175e-06,
      "loss": 0.0182,
      "step": 901360
    },
    {
      "epoch": 1.4751281396673277,
      "grad_norm": 0.10451291501522064,
      "learning_rate": 7.096196042513657e-06,
      "loss": 0.0174,
      "step": 901380
    },
    {
      "epoch": 1.4751608701059813,
      "grad_norm": 4.379984378814697,
      "learning_rate": 7.09613015030014e-06,
      "loss": 0.0251,
      "step": 901400
    },
    {
      "epoch": 1.4751936005446344,
      "grad_norm": 1.6820800304412842,
      "learning_rate": 7.096064258086622e-06,
      "loss": 0.0262,
      "step": 901420
    },
    {
      "epoch": 1.4752263309832878,
      "grad_norm": 1.2809314727783203,
      "learning_rate": 7.095998365873106e-06,
      "loss": 0.0247,
      "step": 901440
    },
    {
      "epoch": 1.4752590614219412,
      "grad_norm": 0.532514750957489,
      "learning_rate": 7.0959324736595884e-06,
      "loss": 0.021,
      "step": 901460
    },
    {
      "epoch": 1.4752917918605946,
      "grad_norm": 0.2893235683441162,
      "learning_rate": 7.095866581446071e-06,
      "loss": 0.0254,
      "step": 901480
    },
    {
      "epoch": 1.475324522299248,
      "grad_norm": 0.40441465377807617,
      "learning_rate": 7.095800689232554e-06,
      "loss": 0.0161,
      "step": 901500
    },
    {
      "epoch": 1.475357252737901,
      "grad_norm": 0.6073042154312134,
      "learning_rate": 7.0957347970190375e-06,
      "loss": 0.0173,
      "step": 901520
    },
    {
      "epoch": 1.4753899831765545,
      "grad_norm": 1.773041009902954,
      "learning_rate": 7.095668904805519e-06,
      "loss": 0.0183,
      "step": 901540
    },
    {
      "epoch": 1.4754227136152078,
      "grad_norm": 0.7982630729675293,
      "learning_rate": 7.095603012592003e-06,
      "loss": 0.0252,
      "step": 901560
    },
    {
      "epoch": 1.4754554440538612,
      "grad_norm": 0.33295950293540955,
      "learning_rate": 7.095537120378485e-06,
      "loss": 0.0143,
      "step": 901580
    },
    {
      "epoch": 1.4754881744925146,
      "grad_norm": 0.5053240060806274,
      "learning_rate": 7.0954712281649685e-06,
      "loss": 0.0148,
      "step": 901600
    },
    {
      "epoch": 1.475520904931168,
      "grad_norm": 0.65552818775177,
      "learning_rate": 7.09540533595145e-06,
      "loss": 0.0157,
      "step": 901620
    },
    {
      "epoch": 1.4755536353698213,
      "grad_norm": 0.39831244945526123,
      "learning_rate": 7.095339443737934e-06,
      "loss": 0.0239,
      "step": 901640
    },
    {
      "epoch": 1.4755863658084745,
      "grad_norm": 1.286880373954773,
      "learning_rate": 7.095273551524417e-06,
      "loss": 0.0268,
      "step": 901660
    },
    {
      "epoch": 1.4756190962471278,
      "grad_norm": 1.0205150842666626,
      "learning_rate": 7.095207659310899e-06,
      "loss": 0.0183,
      "step": 901680
    },
    {
      "epoch": 1.4756518266857812,
      "grad_norm": 0.20490364730358124,
      "learning_rate": 7.095141767097383e-06,
      "loss": 0.0239,
      "step": 901700
    },
    {
      "epoch": 1.4756845571244346,
      "grad_norm": 2.046598196029663,
      "learning_rate": 7.095075874883866e-06,
      "loss": 0.0197,
      "step": 901720
    },
    {
      "epoch": 1.475717287563088,
      "grad_norm": 0.3066132068634033,
      "learning_rate": 7.0950099826703485e-06,
      "loss": 0.0205,
      "step": 901740
    },
    {
      "epoch": 1.4757500180017413,
      "grad_norm": 1.247368335723877,
      "learning_rate": 7.094944090456831e-06,
      "loss": 0.0173,
      "step": 901760
    },
    {
      "epoch": 1.4757827484403947,
      "grad_norm": 0.44291993975639343,
      "learning_rate": 7.094878198243315e-06,
      "loss": 0.0246,
      "step": 901780
    },
    {
      "epoch": 1.4758154788790478,
      "grad_norm": 0.5496155619621277,
      "learning_rate": 7.094812306029797e-06,
      "loss": 0.0236,
      "step": 901800
    },
    {
      "epoch": 1.4758482093177012,
      "grad_norm": 1.141671061515808,
      "learning_rate": 7.09474641381628e-06,
      "loss": 0.0177,
      "step": 901820
    },
    {
      "epoch": 1.4758809397563546,
      "grad_norm": 0.35799750685691833,
      "learning_rate": 7.094680521602762e-06,
      "loss": 0.0228,
      "step": 901840
    },
    {
      "epoch": 1.475913670195008,
      "grad_norm": 0.7916219830513,
      "learning_rate": 7.094614629389246e-06,
      "loss": 0.0207,
      "step": 901860
    },
    {
      "epoch": 1.4759464006336613,
      "grad_norm": 0.4504980444908142,
      "learning_rate": 7.0945487371757285e-06,
      "loss": 0.0189,
      "step": 901880
    },
    {
      "epoch": 1.4759791310723147,
      "grad_norm": 0.6353212594985962,
      "learning_rate": 7.094482844962211e-06,
      "loss": 0.0246,
      "step": 901900
    },
    {
      "epoch": 1.476011861510968,
      "grad_norm": 0.5327697396278381,
      "learning_rate": 7.094416952748694e-06,
      "loss": 0.0205,
      "step": 901920
    },
    {
      "epoch": 1.4760445919496212,
      "grad_norm": 1.0435606241226196,
      "learning_rate": 7.0943510605351776e-06,
      "loss": 0.0197,
      "step": 901940
    },
    {
      "epoch": 1.4760773223882746,
      "grad_norm": 0.45366066694259644,
      "learning_rate": 7.0942851683216595e-06,
      "loss": 0.0229,
      "step": 901960
    },
    {
      "epoch": 1.476110052826928,
      "grad_norm": 0.42764803767204285,
      "learning_rate": 7.094219276108143e-06,
      "loss": 0.0183,
      "step": 901980
    },
    {
      "epoch": 1.4761427832655813,
      "grad_norm": 0.403644859790802,
      "learning_rate": 7.094153383894625e-06,
      "loss": 0.0229,
      "step": 902000
    },
    {
      "epoch": 1.4761755137042347,
      "grad_norm": 1.3037632703781128,
      "learning_rate": 7.0940874916811085e-06,
      "loss": 0.0255,
      "step": 902020
    },
    {
      "epoch": 1.4762082441428879,
      "grad_norm": 0.37931710481643677,
      "learning_rate": 7.094021599467591e-06,
      "loss": 0.0249,
      "step": 902040
    },
    {
      "epoch": 1.4762409745815415,
      "grad_norm": 0.2546025216579437,
      "learning_rate": 7.093955707254074e-06,
      "loss": 0.0281,
      "step": 902060
    },
    {
      "epoch": 1.4762737050201946,
      "grad_norm": 0.4630143642425537,
      "learning_rate": 7.093889815040558e-06,
      "loss": 0.0181,
      "step": 902080
    },
    {
      "epoch": 1.476306435458848,
      "grad_norm": 0.5798763632774353,
      "learning_rate": 7.09382392282704e-06,
      "loss": 0.0312,
      "step": 902100
    },
    {
      "epoch": 1.4763391658975014,
      "grad_norm": 0.6277672052383423,
      "learning_rate": 7.093758030613523e-06,
      "loss": 0.014,
      "step": 902120
    },
    {
      "epoch": 1.4763718963361547,
      "grad_norm": 0.2736133337020874,
      "learning_rate": 7.093692138400006e-06,
      "loss": 0.0201,
      "step": 902140
    },
    {
      "epoch": 1.476404626774808,
      "grad_norm": 0.3264935314655304,
      "learning_rate": 7.093626246186489e-06,
      "loss": 0.0168,
      "step": 902160
    },
    {
      "epoch": 1.4764373572134613,
      "grad_norm": 1.5857027769088745,
      "learning_rate": 7.093560353972971e-06,
      "loss": 0.0159,
      "step": 902180
    },
    {
      "epoch": 1.4764700876521148,
      "grad_norm": 0.16380642354488373,
      "learning_rate": 7.093494461759455e-06,
      "loss": 0.0207,
      "step": 902200
    },
    {
      "epoch": 1.476502818090768,
      "grad_norm": 0.5436499714851379,
      "learning_rate": 7.093428569545937e-06,
      "loss": 0.013,
      "step": 902220
    },
    {
      "epoch": 1.4765355485294214,
      "grad_norm": 0.38924598693847656,
      "learning_rate": 7.09336267733242e-06,
      "loss": 0.0185,
      "step": 902240
    },
    {
      "epoch": 1.4765682789680747,
      "grad_norm": 0.16799074411392212,
      "learning_rate": 7.093296785118903e-06,
      "loss": 0.0162,
      "step": 902260
    },
    {
      "epoch": 1.4766010094067281,
      "grad_norm": 1.7472028732299805,
      "learning_rate": 7.093230892905386e-06,
      "loss": 0.0238,
      "step": 902280
    },
    {
      "epoch": 1.4766337398453815,
      "grad_norm": 0.6381467580795288,
      "learning_rate": 7.093165000691869e-06,
      "loss": 0.0211,
      "step": 902300
    },
    {
      "epoch": 1.4766664702840346,
      "grad_norm": 0.36340394616127014,
      "learning_rate": 7.093099108478352e-06,
      "loss": 0.0215,
      "step": 902320
    },
    {
      "epoch": 1.476699200722688,
      "grad_norm": 0.4402083158493042,
      "learning_rate": 7.093033216264834e-06,
      "loss": 0.0159,
      "step": 902340
    },
    {
      "epoch": 1.4767319311613414,
      "grad_norm": 0.8089134097099304,
      "learning_rate": 7.092967324051318e-06,
      "loss": 0.0142,
      "step": 902360
    },
    {
      "epoch": 1.4767646615999948,
      "grad_norm": 0.6219713091850281,
      "learning_rate": 7.0929014318377995e-06,
      "loss": 0.0203,
      "step": 902380
    },
    {
      "epoch": 1.4767973920386481,
      "grad_norm": 0.2636779248714447,
      "learning_rate": 7.092835539624283e-06,
      "loss": 0.0149,
      "step": 902400
    },
    {
      "epoch": 1.4768301224773015,
      "grad_norm": 0.09240346401929855,
      "learning_rate": 7.092769647410767e-06,
      "loss": 0.0147,
      "step": 902420
    },
    {
      "epoch": 1.4768628529159549,
      "grad_norm": 1.3084754943847656,
      "learning_rate": 7.092703755197249e-06,
      "loss": 0.0142,
      "step": 902440
    },
    {
      "epoch": 1.476895583354608,
      "grad_norm": 0.678198516368866,
      "learning_rate": 7.092637862983732e-06,
      "loss": 0.02,
      "step": 902460
    },
    {
      "epoch": 1.4769283137932614,
      "grad_norm": 0.8823218941688538,
      "learning_rate": 7.092571970770215e-06,
      "loss": 0.0261,
      "step": 902480
    },
    {
      "epoch": 1.4769610442319148,
      "grad_norm": 0.24143284559249878,
      "learning_rate": 7.092506078556698e-06,
      "loss": 0.0206,
      "step": 902500
    },
    {
      "epoch": 1.4769937746705681,
      "grad_norm": 0.08897361159324646,
      "learning_rate": 7.09244018634318e-06,
      "loss": 0.0335,
      "step": 902520
    },
    {
      "epoch": 1.4770265051092215,
      "grad_norm": 0.6088593602180481,
      "learning_rate": 7.092374294129664e-06,
      "loss": 0.0229,
      "step": 902540
    },
    {
      "epoch": 1.4770592355478749,
      "grad_norm": 1.7399886846542358,
      "learning_rate": 7.092308401916146e-06,
      "loss": 0.0216,
      "step": 902560
    },
    {
      "epoch": 1.4770919659865283,
      "grad_norm": 0.3497745096683502,
      "learning_rate": 7.0922425097026295e-06,
      "loss": 0.0295,
      "step": 902580
    },
    {
      "epoch": 1.4771246964251814,
      "grad_norm": 0.5036331415176392,
      "learning_rate": 7.092176617489111e-06,
      "loss": 0.0155,
      "step": 902600
    },
    {
      "epoch": 1.4771574268638348,
      "grad_norm": 0.17824462056159973,
      "learning_rate": 7.092110725275595e-06,
      "loss": 0.0215,
      "step": 902620
    },
    {
      "epoch": 1.4771901573024881,
      "grad_norm": 0.5611242055892944,
      "learning_rate": 7.092044833062077e-06,
      "loss": 0.0217,
      "step": 902640
    },
    {
      "epoch": 1.4772228877411415,
      "grad_norm": 0.7476598620414734,
      "learning_rate": 7.0919789408485604e-06,
      "loss": 0.0219,
      "step": 902660
    },
    {
      "epoch": 1.477255618179795,
      "grad_norm": 0.6356902122497559,
      "learning_rate": 7.091913048635043e-06,
      "loss": 0.0222,
      "step": 902680
    },
    {
      "epoch": 1.4772883486184483,
      "grad_norm": 0.621982753276825,
      "learning_rate": 7.091847156421526e-06,
      "loss": 0.0264,
      "step": 902700
    },
    {
      "epoch": 1.4773210790571016,
      "grad_norm": 0.13742604851722717,
      "learning_rate": 7.091781264208009e-06,
      "loss": 0.016,
      "step": 902720
    },
    {
      "epoch": 1.4773538094957548,
      "grad_norm": 3.116039514541626,
      "learning_rate": 7.091715371994492e-06,
      "loss": 0.0179,
      "step": 902740
    },
    {
      "epoch": 1.4773865399344082,
      "grad_norm": 0.3290490210056305,
      "learning_rate": 7.091649479780975e-06,
      "loss": 0.0199,
      "step": 902760
    },
    {
      "epoch": 1.4774192703730615,
      "grad_norm": 0.6740773916244507,
      "learning_rate": 7.091583587567458e-06,
      "loss": 0.0218,
      "step": 902780
    },
    {
      "epoch": 1.477452000811715,
      "grad_norm": 1.676580548286438,
      "learning_rate": 7.091517695353941e-06,
      "loss": 0.0201,
      "step": 902800
    },
    {
      "epoch": 1.4774847312503683,
      "grad_norm": 0.2769368588924408,
      "learning_rate": 7.091451803140423e-06,
      "loss": 0.0164,
      "step": 902820
    },
    {
      "epoch": 1.4775174616890214,
      "grad_norm": 0.2021097093820572,
      "learning_rate": 7.091385910926907e-06,
      "loss": 0.0226,
      "step": 902840
    },
    {
      "epoch": 1.477550192127675,
      "grad_norm": 0.4397597312927246,
      "learning_rate": 7.091320018713389e-06,
      "loss": 0.0268,
      "step": 902860
    },
    {
      "epoch": 1.4775829225663282,
      "grad_norm": 0.3463197648525238,
      "learning_rate": 7.091254126499872e-06,
      "loss": 0.0197,
      "step": 902880
    },
    {
      "epoch": 1.4776156530049815,
      "grad_norm": 0.9268880486488342,
      "learning_rate": 7.091188234286355e-06,
      "loss": 0.0189,
      "step": 902900
    },
    {
      "epoch": 1.477648383443635,
      "grad_norm": 1.4264823198318481,
      "learning_rate": 7.091122342072838e-06,
      "loss": 0.0184,
      "step": 902920
    },
    {
      "epoch": 1.4776811138822883,
      "grad_norm": 1.4398080110549927,
      "learning_rate": 7.0910564498593205e-06,
      "loss": 0.0187,
      "step": 902940
    },
    {
      "epoch": 1.4777138443209417,
      "grad_norm": 0.14648954570293427,
      "learning_rate": 7.090990557645804e-06,
      "loss": 0.0215,
      "step": 902960
    },
    {
      "epoch": 1.4777465747595948,
      "grad_norm": 0.6872530579566956,
      "learning_rate": 7.090924665432286e-06,
      "loss": 0.0194,
      "step": 902980
    },
    {
      "epoch": 1.4777793051982484,
      "grad_norm": 0.741132915019989,
      "learning_rate": 7.0908587732187696e-06,
      "loss": 0.0217,
      "step": 903000
    },
    {
      "epoch": 1.4778120356369016,
      "grad_norm": 0.4935213625431061,
      "learning_rate": 7.0907928810052514e-06,
      "loss": 0.0157,
      "step": 903020
    },
    {
      "epoch": 1.477844766075555,
      "grad_norm": 0.6888836622238159,
      "learning_rate": 7.090726988791735e-06,
      "loss": 0.0171,
      "step": 903040
    },
    {
      "epoch": 1.4778774965142083,
      "grad_norm": 1.017415165901184,
      "learning_rate": 7.090661096578218e-06,
      "loss": 0.0193,
      "step": 903060
    },
    {
      "epoch": 1.4779102269528617,
      "grad_norm": 0.8441793918609619,
      "learning_rate": 7.0905952043647005e-06,
      "loss": 0.0203,
      "step": 903080
    },
    {
      "epoch": 1.477942957391515,
      "grad_norm": 0.9777684807777405,
      "learning_rate": 7.090529312151184e-06,
      "loss": 0.0227,
      "step": 903100
    },
    {
      "epoch": 1.4779756878301682,
      "grad_norm": 0.5631470680236816,
      "learning_rate": 7.090463419937667e-06,
      "loss": 0.0267,
      "step": 903120
    },
    {
      "epoch": 1.4780084182688216,
      "grad_norm": 0.617141842842102,
      "learning_rate": 7.09039752772415e-06,
      "loss": 0.0188,
      "step": 903140
    },
    {
      "epoch": 1.478041148707475,
      "grad_norm": 0.7855945229530334,
      "learning_rate": 7.090331635510632e-06,
      "loss": 0.0226,
      "step": 903160
    },
    {
      "epoch": 1.4780738791461283,
      "grad_norm": 0.3379048705101013,
      "learning_rate": 7.090265743297116e-06,
      "loss": 0.0196,
      "step": 903180
    },
    {
      "epoch": 1.4781066095847817,
      "grad_norm": 0.6999127268791199,
      "learning_rate": 7.090199851083598e-06,
      "loss": 0.0202,
      "step": 903200
    },
    {
      "epoch": 1.478139340023435,
      "grad_norm": 0.2190115749835968,
      "learning_rate": 7.090133958870081e-06,
      "loss": 0.0252,
      "step": 903220
    },
    {
      "epoch": 1.4781720704620884,
      "grad_norm": 0.8315314054489136,
      "learning_rate": 7.090068066656563e-06,
      "loss": 0.0274,
      "step": 903240
    },
    {
      "epoch": 1.4782048009007416,
      "grad_norm": 1.7466769218444824,
      "learning_rate": 7.090002174443047e-06,
      "loss": 0.0213,
      "step": 903260
    },
    {
      "epoch": 1.478237531339395,
      "grad_norm": 0.6255876421928406,
      "learning_rate": 7.08993628222953e-06,
      "loss": 0.0193,
      "step": 903280
    },
    {
      "epoch": 1.4782702617780483,
      "grad_norm": 0.44578367471694946,
      "learning_rate": 7.089870390016012e-06,
      "loss": 0.0161,
      "step": 903300
    },
    {
      "epoch": 1.4783029922167017,
      "grad_norm": 0.33064034581184387,
      "learning_rate": 7.089804497802495e-06,
      "loss": 0.0291,
      "step": 903320
    },
    {
      "epoch": 1.478335722655355,
      "grad_norm": 0.28001469373703003,
      "learning_rate": 7.089738605588979e-06,
      "loss": 0.0189,
      "step": 903340
    },
    {
      "epoch": 1.4783684530940084,
      "grad_norm": 0.2472285032272339,
      "learning_rate": 7.0896727133754606e-06,
      "loss": 0.0191,
      "step": 903360
    },
    {
      "epoch": 1.4784011835326618,
      "grad_norm": 0.29985329508781433,
      "learning_rate": 7.089606821161944e-06,
      "loss": 0.0179,
      "step": 903380
    },
    {
      "epoch": 1.478433913971315,
      "grad_norm": 0.45907819271087646,
      "learning_rate": 7.089540928948426e-06,
      "loss": 0.0187,
      "step": 903400
    },
    {
      "epoch": 1.4784666444099683,
      "grad_norm": 0.6891910433769226,
      "learning_rate": 7.08947503673491e-06,
      "loss": 0.0242,
      "step": 903420
    },
    {
      "epoch": 1.4784993748486217,
      "grad_norm": 0.8569880723953247,
      "learning_rate": 7.089409144521392e-06,
      "loss": 0.0241,
      "step": 903440
    },
    {
      "epoch": 1.478532105287275,
      "grad_norm": 0.9461393356323242,
      "learning_rate": 7.089343252307875e-06,
      "loss": 0.0221,
      "step": 903460
    },
    {
      "epoch": 1.4785648357259285,
      "grad_norm": 0.21903033554553986,
      "learning_rate": 7.089277360094359e-06,
      "loss": 0.0128,
      "step": 903480
    },
    {
      "epoch": 1.4785975661645816,
      "grad_norm": 0.5605462193489075,
      "learning_rate": 7.0892114678808414e-06,
      "loss": 0.0191,
      "step": 903500
    },
    {
      "epoch": 1.4786302966032352,
      "grad_norm": 0.8209633827209473,
      "learning_rate": 7.089145575667324e-06,
      "loss": 0.0279,
      "step": 903520
    },
    {
      "epoch": 1.4786630270418883,
      "grad_norm": 0.48476606607437134,
      "learning_rate": 7.089079683453807e-06,
      "loss": 0.0182,
      "step": 903540
    },
    {
      "epoch": 1.4786957574805417,
      "grad_norm": 0.5081968903541565,
      "learning_rate": 7.0890137912402905e-06,
      "loss": 0.0283,
      "step": 903560
    },
    {
      "epoch": 1.478728487919195,
      "grad_norm": 0.5471014976501465,
      "learning_rate": 7.088947899026772e-06,
      "loss": 0.0204,
      "step": 903580
    },
    {
      "epoch": 1.4787612183578485,
      "grad_norm": 0.23606647551059723,
      "learning_rate": 7.088882006813256e-06,
      "loss": 0.0113,
      "step": 903600
    },
    {
      "epoch": 1.4787939487965018,
      "grad_norm": 0.8655543923377991,
      "learning_rate": 7.088816114599738e-06,
      "loss": 0.0282,
      "step": 903620
    },
    {
      "epoch": 1.478826679235155,
      "grad_norm": 0.23752471804618835,
      "learning_rate": 7.0887502223862215e-06,
      "loss": 0.0157,
      "step": 903640
    },
    {
      "epoch": 1.4788594096738086,
      "grad_norm": 0.35488906502723694,
      "learning_rate": 7.088684330172703e-06,
      "loss": 0.0171,
      "step": 903660
    },
    {
      "epoch": 1.4788921401124617,
      "grad_norm": 0.3155120015144348,
      "learning_rate": 7.088618437959187e-06,
      "loss": 0.027,
      "step": 903680
    },
    {
      "epoch": 1.478924870551115,
      "grad_norm": 0.12584295868873596,
      "learning_rate": 7.08855254574567e-06,
      "loss": 0.0158,
      "step": 903700
    },
    {
      "epoch": 1.4789576009897685,
      "grad_norm": 0.5266466736793518,
      "learning_rate": 7.088486653532152e-06,
      "loss": 0.0155,
      "step": 903720
    },
    {
      "epoch": 1.4789903314284218,
      "grad_norm": 1.1865522861480713,
      "learning_rate": 7.088420761318635e-06,
      "loss": 0.0195,
      "step": 903740
    },
    {
      "epoch": 1.4790230618670752,
      "grad_norm": 0.887972891330719,
      "learning_rate": 7.088354869105119e-06,
      "loss": 0.014,
      "step": 903760
    },
    {
      "epoch": 1.4790557923057284,
      "grad_norm": 2.5730206966400146,
      "learning_rate": 7.088288976891601e-06,
      "loss": 0.0164,
      "step": 903780
    },
    {
      "epoch": 1.4790885227443817,
      "grad_norm": 1.6019023656845093,
      "learning_rate": 7.088223084678084e-06,
      "loss": 0.0242,
      "step": 903800
    },
    {
      "epoch": 1.4791212531830351,
      "grad_norm": 1.1815160512924194,
      "learning_rate": 7.088157192464568e-06,
      "loss": 0.0273,
      "step": 903820
    },
    {
      "epoch": 1.4791539836216885,
      "grad_norm": 0.5325246453285217,
      "learning_rate": 7.08809130025105e-06,
      "loss": 0.0236,
      "step": 903840
    },
    {
      "epoch": 1.4791867140603419,
      "grad_norm": 0.20693863928318024,
      "learning_rate": 7.088025408037533e-06,
      "loss": 0.0226,
      "step": 903860
    },
    {
      "epoch": 1.4792194444989952,
      "grad_norm": 0.4177655279636383,
      "learning_rate": 7.087959515824015e-06,
      "loss": 0.0277,
      "step": 903880
    },
    {
      "epoch": 1.4792521749376486,
      "grad_norm": 0.19058486819267273,
      "learning_rate": 7.087893623610499e-06,
      "loss": 0.0159,
      "step": 903900
    },
    {
      "epoch": 1.4792849053763018,
      "grad_norm": 1.2065762281417847,
      "learning_rate": 7.0878277313969815e-06,
      "loss": 0.0143,
      "step": 903920
    },
    {
      "epoch": 1.4793176358149551,
      "grad_norm": 2.1167609691619873,
      "learning_rate": 7.087761839183464e-06,
      "loss": 0.0246,
      "step": 903940
    },
    {
      "epoch": 1.4793503662536085,
      "grad_norm": 0.45250940322875977,
      "learning_rate": 7.087695946969947e-06,
      "loss": 0.0179,
      "step": 903960
    },
    {
      "epoch": 1.4793830966922619,
      "grad_norm": 0.6216924786567688,
      "learning_rate": 7.087630054756431e-06,
      "loss": 0.0193,
      "step": 903980
    },
    {
      "epoch": 1.4794158271309152,
      "grad_norm": 0.048763345927000046,
      "learning_rate": 7.0875641625429125e-06,
      "loss": 0.0174,
      "step": 904000
    },
    {
      "epoch": 1.4794485575695686,
      "grad_norm": 0.7877796292304993,
      "learning_rate": 7.087498270329396e-06,
      "loss": 0.022,
      "step": 904020
    },
    {
      "epoch": 1.479481288008222,
      "grad_norm": 0.4523870050907135,
      "learning_rate": 7.087432378115878e-06,
      "loss": 0.0294,
      "step": 904040
    },
    {
      "epoch": 1.4795140184468751,
      "grad_norm": 0.21992748975753784,
      "learning_rate": 7.0873664859023615e-06,
      "loss": 0.0167,
      "step": 904060
    },
    {
      "epoch": 1.4795467488855285,
      "grad_norm": 0.6620149612426758,
      "learning_rate": 7.087300593688844e-06,
      "loss": 0.0232,
      "step": 904080
    },
    {
      "epoch": 1.4795794793241819,
      "grad_norm": 0.3924606740474701,
      "learning_rate": 7.087234701475327e-06,
      "loss": 0.0212,
      "step": 904100
    },
    {
      "epoch": 1.4796122097628353,
      "grad_norm": 0.2709854543209076,
      "learning_rate": 7.08716880926181e-06,
      "loss": 0.0194,
      "step": 904120
    },
    {
      "epoch": 1.4796449402014886,
      "grad_norm": 0.38133367896080017,
      "learning_rate": 7.087102917048293e-06,
      "loss": 0.0197,
      "step": 904140
    },
    {
      "epoch": 1.479677670640142,
      "grad_norm": 0.5612378716468811,
      "learning_rate": 7.087037024834776e-06,
      "loss": 0.0226,
      "step": 904160
    },
    {
      "epoch": 1.4797104010787954,
      "grad_norm": 1.233842372894287,
      "learning_rate": 7.086971132621259e-06,
      "loss": 0.0234,
      "step": 904180
    },
    {
      "epoch": 1.4797431315174485,
      "grad_norm": 0.6070569753646851,
      "learning_rate": 7.086905240407742e-06,
      "loss": 0.0278,
      "step": 904200
    },
    {
      "epoch": 1.479775861956102,
      "grad_norm": 0.32094454765319824,
      "learning_rate": 7.086839348194224e-06,
      "loss": 0.0227,
      "step": 904220
    },
    {
      "epoch": 1.4798085923947553,
      "grad_norm": 0.16612863540649414,
      "learning_rate": 7.086773455980708e-06,
      "loss": 0.0151,
      "step": 904240
    },
    {
      "epoch": 1.4798413228334086,
      "grad_norm": 0.9924875497817993,
      "learning_rate": 7.08670756376719e-06,
      "loss": 0.025,
      "step": 904260
    },
    {
      "epoch": 1.479874053272062,
      "grad_norm": 0.33285656571388245,
      "learning_rate": 7.086641671553673e-06,
      "loss": 0.0177,
      "step": 904280
    },
    {
      "epoch": 1.4799067837107152,
      "grad_norm": 0.6177424788475037,
      "learning_rate": 7.086575779340156e-06,
      "loss": 0.0181,
      "step": 904300
    },
    {
      "epoch": 1.4799395141493688,
      "grad_norm": 0.6889159679412842,
      "learning_rate": 7.086509887126639e-06,
      "loss": 0.0271,
      "step": 904320
    },
    {
      "epoch": 1.479972244588022,
      "grad_norm": 0.46599718928337097,
      "learning_rate": 7.086443994913122e-06,
      "loss": 0.0182,
      "step": 904340
    },
    {
      "epoch": 1.4800049750266753,
      "grad_norm": 0.4730285704135895,
      "learning_rate": 7.086378102699605e-06,
      "loss": 0.0257,
      "step": 904360
    },
    {
      "epoch": 1.4800377054653286,
      "grad_norm": 0.634861946105957,
      "learning_rate": 7.086312210486087e-06,
      "loss": 0.0124,
      "step": 904380
    },
    {
      "epoch": 1.480070435903982,
      "grad_norm": 0.6906883120536804,
      "learning_rate": 7.086246318272571e-06,
      "loss": 0.0178,
      "step": 904400
    },
    {
      "epoch": 1.4801031663426354,
      "grad_norm": 2.274851083755493,
      "learning_rate": 7.0861804260590525e-06,
      "loss": 0.0269,
      "step": 904420
    },
    {
      "epoch": 1.4801358967812885,
      "grad_norm": 0.6403968930244446,
      "learning_rate": 7.086114533845536e-06,
      "loss": 0.0293,
      "step": 904440
    },
    {
      "epoch": 1.4801686272199421,
      "grad_norm": 0.517196536064148,
      "learning_rate": 7.086048641632018e-06,
      "loss": 0.0194,
      "step": 904460
    },
    {
      "epoch": 1.4802013576585953,
      "grad_norm": 0.32842206954956055,
      "learning_rate": 7.085982749418502e-06,
      "loss": 0.013,
      "step": 904480
    },
    {
      "epoch": 1.4802340880972487,
      "grad_norm": 0.8031409978866577,
      "learning_rate": 7.085916857204984e-06,
      "loss": 0.0237,
      "step": 904500
    },
    {
      "epoch": 1.480266818535902,
      "grad_norm": 0.724759042263031,
      "learning_rate": 7.085850964991467e-06,
      "loss": 0.0285,
      "step": 904520
    },
    {
      "epoch": 1.4802995489745554,
      "grad_norm": 0.8687126040458679,
      "learning_rate": 7.085785072777951e-06,
      "loss": 0.0148,
      "step": 904540
    },
    {
      "epoch": 1.4803322794132088,
      "grad_norm": 0.5863680243492126,
      "learning_rate": 7.085719180564433e-06,
      "loss": 0.0155,
      "step": 904560
    },
    {
      "epoch": 1.480365009851862,
      "grad_norm": 1.818886399269104,
      "learning_rate": 7.085653288350917e-06,
      "loss": 0.0199,
      "step": 904580
    },
    {
      "epoch": 1.4803977402905153,
      "grad_norm": 0.5432961583137512,
      "learning_rate": 7.085587396137399e-06,
      "loss": 0.0259,
      "step": 904600
    },
    {
      "epoch": 1.4804304707291687,
      "grad_norm": 0.850877583026886,
      "learning_rate": 7.0855215039238825e-06,
      "loss": 0.0224,
      "step": 904620
    },
    {
      "epoch": 1.480463201167822,
      "grad_norm": 0.5389719605445862,
      "learning_rate": 7.085455611710364e-06,
      "loss": 0.0235,
      "step": 904640
    },
    {
      "epoch": 1.4804959316064754,
      "grad_norm": 0.4374169111251831,
      "learning_rate": 7.085389719496848e-06,
      "loss": 0.0247,
      "step": 904660
    },
    {
      "epoch": 1.4805286620451288,
      "grad_norm": 0.15849041938781738,
      "learning_rate": 7.08532382728333e-06,
      "loss": 0.0188,
      "step": 904680
    },
    {
      "epoch": 1.4805613924837822,
      "grad_norm": 0.7417758703231812,
      "learning_rate": 7.0852579350698134e-06,
      "loss": 0.0144,
      "step": 904700
    },
    {
      "epoch": 1.4805941229224353,
      "grad_norm": 0.2046094387769699,
      "learning_rate": 7.085192042856296e-06,
      "loss": 0.0239,
      "step": 904720
    },
    {
      "epoch": 1.4806268533610887,
      "grad_norm": 0.5722694396972656,
      "learning_rate": 7.085126150642779e-06,
      "loss": 0.0188,
      "step": 904740
    },
    {
      "epoch": 1.480659583799742,
      "grad_norm": 0.5355167984962463,
      "learning_rate": 7.085060258429262e-06,
      "loss": 0.0172,
      "step": 904760
    },
    {
      "epoch": 1.4806923142383954,
      "grad_norm": 0.1080290824174881,
      "learning_rate": 7.084994366215745e-06,
      "loss": 0.0215,
      "step": 904780
    },
    {
      "epoch": 1.4807250446770488,
      "grad_norm": 0.6860007643699646,
      "learning_rate": 7.084928474002227e-06,
      "loss": 0.0187,
      "step": 904800
    },
    {
      "epoch": 1.4807577751157022,
      "grad_norm": 0.4472710192203522,
      "learning_rate": 7.084862581788711e-06,
      "loss": 0.0314,
      "step": 904820
    },
    {
      "epoch": 1.4807905055543555,
      "grad_norm": 1.8628464937210083,
      "learning_rate": 7.084796689575193e-06,
      "loss": 0.0209,
      "step": 904840
    },
    {
      "epoch": 1.4808232359930087,
      "grad_norm": 0.46152058243751526,
      "learning_rate": 7.084730797361676e-06,
      "loss": 0.017,
      "step": 904860
    },
    {
      "epoch": 1.480855966431662,
      "grad_norm": 0.9335163831710815,
      "learning_rate": 7.08466490514816e-06,
      "loss": 0.0274,
      "step": 904880
    },
    {
      "epoch": 1.4808886968703154,
      "grad_norm": 0.41429805755615234,
      "learning_rate": 7.084599012934642e-06,
      "loss": 0.022,
      "step": 904900
    },
    {
      "epoch": 1.4809214273089688,
      "grad_norm": 0.7524245977401733,
      "learning_rate": 7.084533120721125e-06,
      "loss": 0.0171,
      "step": 904920
    },
    {
      "epoch": 1.4809541577476222,
      "grad_norm": 0.4158843755722046,
      "learning_rate": 7.084467228507608e-06,
      "loss": 0.0284,
      "step": 904940
    },
    {
      "epoch": 1.4809868881862756,
      "grad_norm": 3.325956344604492,
      "learning_rate": 7.084401336294091e-06,
      "loss": 0.0154,
      "step": 904960
    },
    {
      "epoch": 1.481019618624929,
      "grad_norm": 1.3350924253463745,
      "learning_rate": 7.0843354440805735e-06,
      "loss": 0.0169,
      "step": 904980
    },
    {
      "epoch": 1.481052349063582,
      "grad_norm": 0.19126510620117188,
      "learning_rate": 7.084269551867057e-06,
      "loss": 0.0343,
      "step": 905000
    },
    {
      "epoch": 1.4810850795022354,
      "grad_norm": 0.5874297618865967,
      "learning_rate": 7.084203659653539e-06,
      "loss": 0.0201,
      "step": 905020
    },
    {
      "epoch": 1.4811178099408888,
      "grad_norm": 0.5267410278320312,
      "learning_rate": 7.0841377674400226e-06,
      "loss": 0.0188,
      "step": 905040
    },
    {
      "epoch": 1.4811505403795422,
      "grad_norm": 0.37763455510139465,
      "learning_rate": 7.0840718752265045e-06,
      "loss": 0.0206,
      "step": 905060
    },
    {
      "epoch": 1.4811832708181956,
      "grad_norm": 0.3540639579296112,
      "learning_rate": 7.084005983012988e-06,
      "loss": 0.0192,
      "step": 905080
    },
    {
      "epoch": 1.4812160012568487,
      "grad_norm": 0.9959234595298767,
      "learning_rate": 7.083940090799471e-06,
      "loss": 0.0203,
      "step": 905100
    },
    {
      "epoch": 1.4812487316955023,
      "grad_norm": 1.3811304569244385,
      "learning_rate": 7.0838741985859535e-06,
      "loss": 0.0159,
      "step": 905120
    },
    {
      "epoch": 1.4812814621341555,
      "grad_norm": 0.9650670886039734,
      "learning_rate": 7.083808306372436e-06,
      "loss": 0.019,
      "step": 905140
    },
    {
      "epoch": 1.4813141925728088,
      "grad_norm": 0.23923197388648987,
      "learning_rate": 7.08374241415892e-06,
      "loss": 0.0215,
      "step": 905160
    },
    {
      "epoch": 1.4813469230114622,
      "grad_norm": 0.10109513998031616,
      "learning_rate": 7.083676521945402e-06,
      "loss": 0.0176,
      "step": 905180
    },
    {
      "epoch": 1.4813796534501156,
      "grad_norm": 0.6712397933006287,
      "learning_rate": 7.083610629731885e-06,
      "loss": 0.0194,
      "step": 905200
    },
    {
      "epoch": 1.481412383888769,
      "grad_norm": 0.314927875995636,
      "learning_rate": 7.083544737518369e-06,
      "loss": 0.018,
      "step": 905220
    },
    {
      "epoch": 1.481445114327422,
      "grad_norm": 0.5478428602218628,
      "learning_rate": 7.083478845304851e-06,
      "loss": 0.0153,
      "step": 905240
    },
    {
      "epoch": 1.4814778447660757,
      "grad_norm": 0.31457963585853577,
      "learning_rate": 7.083412953091334e-06,
      "loss": 0.0232,
      "step": 905260
    },
    {
      "epoch": 1.4815105752047288,
      "grad_norm": 0.6238897442817688,
      "learning_rate": 7.083347060877816e-06,
      "loss": 0.0176,
      "step": 905280
    },
    {
      "epoch": 1.4815433056433822,
      "grad_norm": 0.3277495503425598,
      "learning_rate": 7.0832811686643e-06,
      "loss": 0.0187,
      "step": 905300
    },
    {
      "epoch": 1.4815760360820356,
      "grad_norm": 0.7215470671653748,
      "learning_rate": 7.083215276450783e-06,
      "loss": 0.0235,
      "step": 905320
    },
    {
      "epoch": 1.481608766520689,
      "grad_norm": 0.595109760761261,
      "learning_rate": 7.083149384237265e-06,
      "loss": 0.0187,
      "step": 905340
    },
    {
      "epoch": 1.4816414969593423,
      "grad_norm": 0.21054792404174805,
      "learning_rate": 7.083083492023748e-06,
      "loss": 0.0307,
      "step": 905360
    },
    {
      "epoch": 1.4816742273979955,
      "grad_norm": 0.30365946888923645,
      "learning_rate": 7.083017599810232e-06,
      "loss": 0.016,
      "step": 905380
    },
    {
      "epoch": 1.4817069578366489,
      "grad_norm": 0.4934050142765045,
      "learning_rate": 7.0829517075967136e-06,
      "loss": 0.018,
      "step": 905400
    },
    {
      "epoch": 1.4817396882753022,
      "grad_norm": 0.19880340993404388,
      "learning_rate": 7.082885815383197e-06,
      "loss": 0.0166,
      "step": 905420
    },
    {
      "epoch": 1.4817724187139556,
      "grad_norm": 0.16416245698928833,
      "learning_rate": 7.082819923169679e-06,
      "loss": 0.017,
      "step": 905440
    },
    {
      "epoch": 1.481805149152609,
      "grad_norm": 0.1974397599697113,
      "learning_rate": 7.082754030956163e-06,
      "loss": 0.0211,
      "step": 905460
    },
    {
      "epoch": 1.4818378795912623,
      "grad_norm": 0.5049854516983032,
      "learning_rate": 7.0826881387426445e-06,
      "loss": 0.0224,
      "step": 905480
    },
    {
      "epoch": 1.4818706100299157,
      "grad_norm": 0.14254094660282135,
      "learning_rate": 7.082622246529128e-06,
      "loss": 0.0106,
      "step": 905500
    },
    {
      "epoch": 1.4819033404685689,
      "grad_norm": 0.21572506427764893,
      "learning_rate": 7.082556354315611e-06,
      "loss": 0.0199,
      "step": 905520
    },
    {
      "epoch": 1.4819360709072222,
      "grad_norm": 0.4870077669620514,
      "learning_rate": 7.082490462102094e-06,
      "loss": 0.0354,
      "step": 905540
    },
    {
      "epoch": 1.4819688013458756,
      "grad_norm": 0.7893378734588623,
      "learning_rate": 7.082424569888576e-06,
      "loss": 0.0154,
      "step": 905560
    },
    {
      "epoch": 1.482001531784529,
      "grad_norm": 1.0339702367782593,
      "learning_rate": 7.08235867767506e-06,
      "loss": 0.0127,
      "step": 905580
    },
    {
      "epoch": 1.4820342622231824,
      "grad_norm": 0.49421387910842896,
      "learning_rate": 7.082292785461543e-06,
      "loss": 0.0152,
      "step": 905600
    },
    {
      "epoch": 1.4820669926618357,
      "grad_norm": 0.6045412421226501,
      "learning_rate": 7.082226893248025e-06,
      "loss": 0.0204,
      "step": 905620
    },
    {
      "epoch": 1.482099723100489,
      "grad_norm": 0.69236820936203,
      "learning_rate": 7.082161001034509e-06,
      "loss": 0.0228,
      "step": 905640
    },
    {
      "epoch": 1.4821324535391422,
      "grad_norm": 0.22097660601139069,
      "learning_rate": 7.082095108820991e-06,
      "loss": 0.0204,
      "step": 905660
    },
    {
      "epoch": 1.4821651839777956,
      "grad_norm": 1.0336174964904785,
      "learning_rate": 7.0820292166074745e-06,
      "loss": 0.0164,
      "step": 905680
    },
    {
      "epoch": 1.482197914416449,
      "grad_norm": 0.39039477705955505,
      "learning_rate": 7.081963324393956e-06,
      "loss": 0.0292,
      "step": 905700
    },
    {
      "epoch": 1.4822306448551024,
      "grad_norm": 0.622808039188385,
      "learning_rate": 7.08189743218044e-06,
      "loss": 0.0214,
      "step": 905720
    },
    {
      "epoch": 1.4822633752937557,
      "grad_norm": 1.48813796043396,
      "learning_rate": 7.081831539966923e-06,
      "loss": 0.0205,
      "step": 905740
    },
    {
      "epoch": 1.482296105732409,
      "grad_norm": 0.6379895210266113,
      "learning_rate": 7.0817656477534054e-06,
      "loss": 0.0238,
      "step": 905760
    },
    {
      "epoch": 1.4823288361710625,
      "grad_norm": 0.49226635694503784,
      "learning_rate": 7.081699755539888e-06,
      "loss": 0.0184,
      "step": 905780
    },
    {
      "epoch": 1.4823615666097156,
      "grad_norm": 0.4883180558681488,
      "learning_rate": 7.081633863326372e-06,
      "loss": 0.0261,
      "step": 905800
    },
    {
      "epoch": 1.482394297048369,
      "grad_norm": 0.3622342050075531,
      "learning_rate": 7.081567971112854e-06,
      "loss": 0.0168,
      "step": 905820
    },
    {
      "epoch": 1.4824270274870224,
      "grad_norm": 0.6207703948020935,
      "learning_rate": 7.081502078899337e-06,
      "loss": 0.0168,
      "step": 905840
    },
    {
      "epoch": 1.4824597579256757,
      "grad_norm": 0.969930112361908,
      "learning_rate": 7.081436186685819e-06,
      "loss": 0.0177,
      "step": 905860
    },
    {
      "epoch": 1.4824924883643291,
      "grad_norm": 0.6814392805099487,
      "learning_rate": 7.081370294472303e-06,
      "loss": 0.0201,
      "step": 905880
    },
    {
      "epoch": 1.4825252188029823,
      "grad_norm": 0.9602953791618347,
      "learning_rate": 7.0813044022587855e-06,
      "loss": 0.0194,
      "step": 905900
    },
    {
      "epoch": 1.4825579492416359,
      "grad_norm": 1.0684113502502441,
      "learning_rate": 7.081238510045268e-06,
      "loss": 0.0172,
      "step": 905920
    },
    {
      "epoch": 1.482590679680289,
      "grad_norm": 0.3098074793815613,
      "learning_rate": 7.081172617831752e-06,
      "loss": 0.0155,
      "step": 905940
    },
    {
      "epoch": 1.4826234101189424,
      "grad_norm": 0.4800747036933899,
      "learning_rate": 7.0811067256182345e-06,
      "loss": 0.0259,
      "step": 905960
    },
    {
      "epoch": 1.4826561405575958,
      "grad_norm": 0.3203795850276947,
      "learning_rate": 7.081040833404717e-06,
      "loss": 0.0182,
      "step": 905980
    },
    {
      "epoch": 1.4826888709962491,
      "grad_norm": 0.5341131687164307,
      "learning_rate": 7.0809749411912e-06,
      "loss": 0.0218,
      "step": 906000
    },
    {
      "epoch": 1.4827216014349025,
      "grad_norm": 0.9391836524009705,
      "learning_rate": 7.080909048977684e-06,
      "loss": 0.0244,
      "step": 906020
    },
    {
      "epoch": 1.4827543318735557,
      "grad_norm": 0.4292791783809662,
      "learning_rate": 7.0808431567641655e-06,
      "loss": 0.0204,
      "step": 906040
    },
    {
      "epoch": 1.482787062312209,
      "grad_norm": 0.10183829069137573,
      "learning_rate": 7.080777264550649e-06,
      "loss": 0.021,
      "step": 906060
    },
    {
      "epoch": 1.4828197927508624,
      "grad_norm": 0.5006294846534729,
      "learning_rate": 7.080711372337131e-06,
      "loss": 0.0272,
      "step": 906080
    },
    {
      "epoch": 1.4828525231895158,
      "grad_norm": 1.3978135585784912,
      "learning_rate": 7.0806454801236145e-06,
      "loss": 0.0267,
      "step": 906100
    },
    {
      "epoch": 1.4828852536281691,
      "grad_norm": 0.2149539291858673,
      "learning_rate": 7.080579587910097e-06,
      "loss": 0.0127,
      "step": 906120
    },
    {
      "epoch": 1.4829179840668225,
      "grad_norm": 0.2856625020503998,
      "learning_rate": 7.08051369569658e-06,
      "loss": 0.021,
      "step": 906140
    },
    {
      "epoch": 1.4829507145054759,
      "grad_norm": 0.6553128957748413,
      "learning_rate": 7.080447803483063e-06,
      "loss": 0.0195,
      "step": 906160
    },
    {
      "epoch": 1.482983444944129,
      "grad_norm": 0.4513729214668274,
      "learning_rate": 7.080381911269546e-06,
      "loss": 0.0221,
      "step": 906180
    },
    {
      "epoch": 1.4830161753827824,
      "grad_norm": 0.5616770386695862,
      "learning_rate": 7.080316019056028e-06,
      "loss": 0.0228,
      "step": 906200
    },
    {
      "epoch": 1.4830489058214358,
      "grad_norm": 0.3780312240123749,
      "learning_rate": 7.080250126842512e-06,
      "loss": 0.0187,
      "step": 906220
    },
    {
      "epoch": 1.4830816362600892,
      "grad_norm": 0.28686514496803284,
      "learning_rate": 7.080184234628994e-06,
      "loss": 0.02,
      "step": 906240
    },
    {
      "epoch": 1.4831143666987425,
      "grad_norm": 0.29318973422050476,
      "learning_rate": 7.080118342415477e-06,
      "loss": 0.0219,
      "step": 906260
    },
    {
      "epoch": 1.483147097137396,
      "grad_norm": 0.2832125723361969,
      "learning_rate": 7.080052450201961e-06,
      "loss": 0.0204,
      "step": 906280
    },
    {
      "epoch": 1.4831798275760493,
      "grad_norm": 0.29765889048576355,
      "learning_rate": 7.079986557988443e-06,
      "loss": 0.0111,
      "step": 906300
    },
    {
      "epoch": 1.4832125580147024,
      "grad_norm": 8.555658340454102,
      "learning_rate": 7.079920665774926e-06,
      "loss": 0.0188,
      "step": 906320
    },
    {
      "epoch": 1.4832452884533558,
      "grad_norm": 0.17524266242980957,
      "learning_rate": 7.079854773561409e-06,
      "loss": 0.0168,
      "step": 906340
    },
    {
      "epoch": 1.4832780188920092,
      "grad_norm": 0.15183795988559723,
      "learning_rate": 7.079788881347892e-06,
      "loss": 0.0173,
      "step": 906360
    },
    {
      "epoch": 1.4833107493306625,
      "grad_norm": 0.5939386487007141,
      "learning_rate": 7.079722989134375e-06,
      "loss": 0.0201,
      "step": 906380
    },
    {
      "epoch": 1.483343479769316,
      "grad_norm": 0.2753324508666992,
      "learning_rate": 7.079657096920858e-06,
      "loss": 0.0241,
      "step": 906400
    },
    {
      "epoch": 1.4833762102079693,
      "grad_norm": 1.0307161808013916,
      "learning_rate": 7.07959120470734e-06,
      "loss": 0.0224,
      "step": 906420
    },
    {
      "epoch": 1.4834089406466227,
      "grad_norm": 0.7882221937179565,
      "learning_rate": 7.079525312493824e-06,
      "loss": 0.0213,
      "step": 906440
    },
    {
      "epoch": 1.4834416710852758,
      "grad_norm": 0.2825693190097809,
      "learning_rate": 7.0794594202803056e-06,
      "loss": 0.0183,
      "step": 906460
    },
    {
      "epoch": 1.4834744015239292,
      "grad_norm": 0.17153580486774445,
      "learning_rate": 7.079393528066789e-06,
      "loss": 0.0212,
      "step": 906480
    },
    {
      "epoch": 1.4835071319625825,
      "grad_norm": 0.527703583240509,
      "learning_rate": 7.079327635853271e-06,
      "loss": 0.03,
      "step": 906500
    },
    {
      "epoch": 1.483539862401236,
      "grad_norm": 1.3228963613510132,
      "learning_rate": 7.079261743639755e-06,
      "loss": 0.0216,
      "step": 906520
    },
    {
      "epoch": 1.4835725928398893,
      "grad_norm": 0.34206515550613403,
      "learning_rate": 7.079195851426237e-06,
      "loss": 0.0196,
      "step": 906540
    },
    {
      "epoch": 1.4836053232785424,
      "grad_norm": 0.23805789649486542,
      "learning_rate": 7.07912995921272e-06,
      "loss": 0.0183,
      "step": 906560
    },
    {
      "epoch": 1.483638053717196,
      "grad_norm": 0.8666358590126038,
      "learning_rate": 7.079064066999203e-06,
      "loss": 0.0169,
      "step": 906580
    },
    {
      "epoch": 1.4836707841558492,
      "grad_norm": 1.2280325889587402,
      "learning_rate": 7.0789981747856864e-06,
      "loss": 0.0284,
      "step": 906600
    },
    {
      "epoch": 1.4837035145945026,
      "grad_norm": 0.8293856382369995,
      "learning_rate": 7.078932282572169e-06,
      "loss": 0.0159,
      "step": 906620
    },
    {
      "epoch": 1.483736245033156,
      "grad_norm": 0.633979082107544,
      "learning_rate": 7.078866390358652e-06,
      "loss": 0.0172,
      "step": 906640
    },
    {
      "epoch": 1.4837689754718093,
      "grad_norm": 1.0213961601257324,
      "learning_rate": 7.0788004981451355e-06,
      "loss": 0.0178,
      "step": 906660
    },
    {
      "epoch": 1.4838017059104627,
      "grad_norm": 0.6059159636497498,
      "learning_rate": 7.078734605931617e-06,
      "loss": 0.016,
      "step": 906680
    },
    {
      "epoch": 1.4838344363491158,
      "grad_norm": 0.8933480381965637,
      "learning_rate": 7.078668713718101e-06,
      "loss": 0.0186,
      "step": 906700
    },
    {
      "epoch": 1.4838671667877694,
      "grad_norm": 1.0534723997116089,
      "learning_rate": 7.078602821504583e-06,
      "loss": 0.0165,
      "step": 906720
    },
    {
      "epoch": 1.4838998972264226,
      "grad_norm": 1.1235445737838745,
      "learning_rate": 7.0785369292910665e-06,
      "loss": 0.023,
      "step": 906740
    },
    {
      "epoch": 1.483932627665076,
      "grad_norm": 1.9724806547164917,
      "learning_rate": 7.078471037077549e-06,
      "loss": 0.0157,
      "step": 906760
    },
    {
      "epoch": 1.4839653581037293,
      "grad_norm": 0.8887642025947571,
      "learning_rate": 7.078405144864032e-06,
      "loss": 0.0237,
      "step": 906780
    },
    {
      "epoch": 1.4839980885423827,
      "grad_norm": 2.5231783390045166,
      "learning_rate": 7.078339252650515e-06,
      "loss": 0.0266,
      "step": 906800
    },
    {
      "epoch": 1.484030818981036,
      "grad_norm": 0.26218754053115845,
      "learning_rate": 7.078273360436998e-06,
      "loss": 0.0225,
      "step": 906820
    },
    {
      "epoch": 1.4840635494196892,
      "grad_norm": 0.50958651304245,
      "learning_rate": 7.07820746822348e-06,
      "loss": 0.0173,
      "step": 906840
    },
    {
      "epoch": 1.4840962798583426,
      "grad_norm": 0.15082547068595886,
      "learning_rate": 7.078141576009964e-06,
      "loss": 0.019,
      "step": 906860
    },
    {
      "epoch": 1.484129010296996,
      "grad_norm": 0.5441529750823975,
      "learning_rate": 7.078075683796446e-06,
      "loss": 0.0287,
      "step": 906880
    },
    {
      "epoch": 1.4841617407356493,
      "grad_norm": 0.5676741003990173,
      "learning_rate": 7.078009791582929e-06,
      "loss": 0.0168,
      "step": 906900
    },
    {
      "epoch": 1.4841944711743027,
      "grad_norm": 0.49430498480796814,
      "learning_rate": 7.077943899369412e-06,
      "loss": 0.0244,
      "step": 906920
    },
    {
      "epoch": 1.484227201612956,
      "grad_norm": 0.8769183158874512,
      "learning_rate": 7.077878007155895e-06,
      "loss": 0.0232,
      "step": 906940
    },
    {
      "epoch": 1.4842599320516094,
      "grad_norm": 0.8780969381332397,
      "learning_rate": 7.0778121149423774e-06,
      "loss": 0.0164,
      "step": 906960
    },
    {
      "epoch": 1.4842926624902626,
      "grad_norm": 0.43513935804367065,
      "learning_rate": 7.077746222728861e-06,
      "loss": 0.0189,
      "step": 906980
    },
    {
      "epoch": 1.484325392928916,
      "grad_norm": 0.7725438475608826,
      "learning_rate": 7.077680330515344e-06,
      "loss": 0.0198,
      "step": 907000
    },
    {
      "epoch": 1.4843581233675693,
      "grad_norm": 1.356493353843689,
      "learning_rate": 7.0776144383018265e-06,
      "loss": 0.0321,
      "step": 907020
    },
    {
      "epoch": 1.4843908538062227,
      "grad_norm": 0.6842830181121826,
      "learning_rate": 7.07754854608831e-06,
      "loss": 0.0217,
      "step": 907040
    },
    {
      "epoch": 1.484423584244876,
      "grad_norm": 0.9377847909927368,
      "learning_rate": 7.077482653874792e-06,
      "loss": 0.0186,
      "step": 907060
    },
    {
      "epoch": 1.4844563146835295,
      "grad_norm": 0.2871650755405426,
      "learning_rate": 7.0774167616612756e-06,
      "loss": 0.0179,
      "step": 907080
    },
    {
      "epoch": 1.4844890451221828,
      "grad_norm": 0.6634975075721741,
      "learning_rate": 7.0773508694477575e-06,
      "loss": 0.0236,
      "step": 907100
    },
    {
      "epoch": 1.484521775560836,
      "grad_norm": 0.3683299422264099,
      "learning_rate": 7.077284977234241e-06,
      "loss": 0.0222,
      "step": 907120
    },
    {
      "epoch": 1.4845545059994893,
      "grad_norm": 1.4776923656463623,
      "learning_rate": 7.077219085020724e-06,
      "loss": 0.0207,
      "step": 907140
    },
    {
      "epoch": 1.4845872364381427,
      "grad_norm": 0.7695106863975525,
      "learning_rate": 7.0771531928072065e-06,
      "loss": 0.0176,
      "step": 907160
    },
    {
      "epoch": 1.484619966876796,
      "grad_norm": 0.36994048953056335,
      "learning_rate": 7.077087300593689e-06,
      "loss": 0.0223,
      "step": 907180
    },
    {
      "epoch": 1.4846526973154495,
      "grad_norm": 0.33635959029197693,
      "learning_rate": 7.077021408380173e-06,
      "loss": 0.0209,
      "step": 907200
    },
    {
      "epoch": 1.4846854277541028,
      "grad_norm": 0.50166255235672,
      "learning_rate": 7.076955516166655e-06,
      "loss": 0.021,
      "step": 907220
    },
    {
      "epoch": 1.4847181581927562,
      "grad_norm": 0.6070739030838013,
      "learning_rate": 7.076889623953138e-06,
      "loss": 0.0212,
      "step": 907240
    },
    {
      "epoch": 1.4847508886314094,
      "grad_norm": 0.28104424476623535,
      "learning_rate": 7.07682373173962e-06,
      "loss": 0.0233,
      "step": 907260
    },
    {
      "epoch": 1.4847836190700627,
      "grad_norm": 0.825210690498352,
      "learning_rate": 7.076757839526104e-06,
      "loss": 0.0164,
      "step": 907280
    },
    {
      "epoch": 1.484816349508716,
      "grad_norm": 0.5738056898117065,
      "learning_rate": 7.076691947312586e-06,
      "loss": 0.0154,
      "step": 907300
    },
    {
      "epoch": 1.4848490799473695,
      "grad_norm": 0.7395864725112915,
      "learning_rate": 7.076626055099069e-06,
      "loss": 0.0162,
      "step": 907320
    },
    {
      "epoch": 1.4848818103860228,
      "grad_norm": 1.6536831855773926,
      "learning_rate": 7.076560162885553e-06,
      "loss": 0.0193,
      "step": 907340
    },
    {
      "epoch": 1.484914540824676,
      "grad_norm": 0.5147436261177063,
      "learning_rate": 7.076494270672036e-06,
      "loss": 0.0285,
      "step": 907360
    },
    {
      "epoch": 1.4849472712633296,
      "grad_norm": 1.0538018941879272,
      "learning_rate": 7.076428378458518e-06,
      "loss": 0.0208,
      "step": 907380
    },
    {
      "epoch": 1.4849800017019827,
      "grad_norm": 0.3852004408836365,
      "learning_rate": 7.076362486245001e-06,
      "loss": 0.0216,
      "step": 907400
    },
    {
      "epoch": 1.4850127321406361,
      "grad_norm": 0.27679240703582764,
      "learning_rate": 7.076296594031485e-06,
      "loss": 0.0198,
      "step": 907420
    },
    {
      "epoch": 1.4850454625792895,
      "grad_norm": 0.32586777210235596,
      "learning_rate": 7.076230701817967e-06,
      "loss": 0.0153,
      "step": 907440
    },
    {
      "epoch": 1.4850781930179429,
      "grad_norm": 0.118739552795887,
      "learning_rate": 7.07616480960445e-06,
      "loss": 0.0158,
      "step": 907460
    },
    {
      "epoch": 1.4851109234565962,
      "grad_norm": 0.2591414153575897,
      "learning_rate": 7.076098917390932e-06,
      "loss": 0.0241,
      "step": 907480
    },
    {
      "epoch": 1.4851436538952494,
      "grad_norm": 1.9960274696350098,
      "learning_rate": 7.076033025177416e-06,
      "loss": 0.0219,
      "step": 907500
    },
    {
      "epoch": 1.485176384333903,
      "grad_norm": 0.7367156147956848,
      "learning_rate": 7.0759671329638975e-06,
      "loss": 0.0154,
      "step": 907520
    },
    {
      "epoch": 1.4852091147725561,
      "grad_norm": 0.10269805788993835,
      "learning_rate": 7.075901240750381e-06,
      "loss": 0.0135,
      "step": 907540
    },
    {
      "epoch": 1.4852418452112095,
      "grad_norm": 1.4005192518234253,
      "learning_rate": 7.075835348536864e-06,
      "loss": 0.0247,
      "step": 907560
    },
    {
      "epoch": 1.4852745756498629,
      "grad_norm": 0.3947592079639435,
      "learning_rate": 7.075769456323347e-06,
      "loss": 0.0184,
      "step": 907580
    },
    {
      "epoch": 1.4853073060885162,
      "grad_norm": 0.9991161227226257,
      "learning_rate": 7.075703564109829e-06,
      "loss": 0.0209,
      "step": 907600
    },
    {
      "epoch": 1.4853400365271696,
      "grad_norm": 0.3821680247783661,
      "learning_rate": 7.075637671896313e-06,
      "loss": 0.0236,
      "step": 907620
    },
    {
      "epoch": 1.4853727669658228,
      "grad_norm": 0.16293147206306458,
      "learning_rate": 7.075571779682795e-06,
      "loss": 0.0197,
      "step": 907640
    },
    {
      "epoch": 1.4854054974044761,
      "grad_norm": 0.3711317181587219,
      "learning_rate": 7.075505887469278e-06,
      "loss": 0.0297,
      "step": 907660
    },
    {
      "epoch": 1.4854382278431295,
      "grad_norm": 0.5847666263580322,
      "learning_rate": 7.075439995255762e-06,
      "loss": 0.0169,
      "step": 907680
    },
    {
      "epoch": 1.4854709582817829,
      "grad_norm": 0.7909834384918213,
      "learning_rate": 7.075374103042244e-06,
      "loss": 0.0116,
      "step": 907700
    },
    {
      "epoch": 1.4855036887204363,
      "grad_norm": 0.45848754048347473,
      "learning_rate": 7.0753082108287275e-06,
      "loss": 0.0237,
      "step": 907720
    },
    {
      "epoch": 1.4855364191590896,
      "grad_norm": 0.7549822330474854,
      "learning_rate": 7.075242318615209e-06,
      "loss": 0.0188,
      "step": 907740
    },
    {
      "epoch": 1.485569149597743,
      "grad_norm": 0.6524176001548767,
      "learning_rate": 7.075176426401693e-06,
      "loss": 0.0164,
      "step": 907760
    },
    {
      "epoch": 1.4856018800363961,
      "grad_norm": 1.7761176824569702,
      "learning_rate": 7.075110534188176e-06,
      "loss": 0.0258,
      "step": 907780
    },
    {
      "epoch": 1.4856346104750495,
      "grad_norm": 0.8341182470321655,
      "learning_rate": 7.0750446419746584e-06,
      "loss": 0.0236,
      "step": 907800
    },
    {
      "epoch": 1.485667340913703,
      "grad_norm": 0.10585767030715942,
      "learning_rate": 7.074978749761141e-06,
      "loss": 0.0203,
      "step": 907820
    },
    {
      "epoch": 1.4857000713523563,
      "grad_norm": 0.6474227905273438,
      "learning_rate": 7.074912857547625e-06,
      "loss": 0.0278,
      "step": 907840
    },
    {
      "epoch": 1.4857328017910096,
      "grad_norm": 0.8054903149604797,
      "learning_rate": 7.074846965334107e-06,
      "loss": 0.0164,
      "step": 907860
    },
    {
      "epoch": 1.485765532229663,
      "grad_norm": 0.26750698685646057,
      "learning_rate": 7.07478107312059e-06,
      "loss": 0.022,
      "step": 907880
    },
    {
      "epoch": 1.4857982626683164,
      "grad_norm": 0.4451930522918701,
      "learning_rate": 7.074715180907072e-06,
      "loss": 0.0218,
      "step": 907900
    },
    {
      "epoch": 1.4858309931069695,
      "grad_norm": 0.9957193732261658,
      "learning_rate": 7.074649288693556e-06,
      "loss": 0.0178,
      "step": 907920
    },
    {
      "epoch": 1.485863723545623,
      "grad_norm": 0.346276193857193,
      "learning_rate": 7.0745833964800385e-06,
      "loss": 0.0249,
      "step": 907940
    },
    {
      "epoch": 1.4858964539842763,
      "grad_norm": 0.615669310092926,
      "learning_rate": 7.074517504266521e-06,
      "loss": 0.0242,
      "step": 907960
    },
    {
      "epoch": 1.4859291844229296,
      "grad_norm": 0.44892311096191406,
      "learning_rate": 7.074451612053004e-06,
      "loss": 0.0182,
      "step": 907980
    },
    {
      "epoch": 1.485961914861583,
      "grad_norm": 0.14579451084136963,
      "learning_rate": 7.0743857198394875e-06,
      "loss": 0.0216,
      "step": 908000
    },
    {
      "epoch": 1.4859946453002364,
      "grad_norm": 1.4945695400238037,
      "learning_rate": 7.074319827625969e-06,
      "loss": 0.0252,
      "step": 908020
    },
    {
      "epoch": 1.4860273757388898,
      "grad_norm": 0.6186736822128296,
      "learning_rate": 7.074253935412453e-06,
      "loss": 0.0221,
      "step": 908040
    },
    {
      "epoch": 1.486060106177543,
      "grad_norm": 0.6471186876296997,
      "learning_rate": 7.074188043198937e-06,
      "loss": 0.018,
      "step": 908060
    },
    {
      "epoch": 1.4860928366161963,
      "grad_norm": 0.11339441686868668,
      "learning_rate": 7.0741221509854185e-06,
      "loss": 0.0181,
      "step": 908080
    },
    {
      "epoch": 1.4861255670548497,
      "grad_norm": 5.792619705200195,
      "learning_rate": 7.074056258771902e-06,
      "loss": 0.0163,
      "step": 908100
    },
    {
      "epoch": 1.486158297493503,
      "grad_norm": 1.0834343433380127,
      "learning_rate": 7.073990366558384e-06,
      "loss": 0.019,
      "step": 908120
    },
    {
      "epoch": 1.4861910279321564,
      "grad_norm": 0.13996002078056335,
      "learning_rate": 7.0739244743448676e-06,
      "loss": 0.0197,
      "step": 908140
    },
    {
      "epoch": 1.4862237583708096,
      "grad_norm": 0.9189198017120361,
      "learning_rate": 7.07385858213135e-06,
      "loss": 0.0234,
      "step": 908160
    },
    {
      "epoch": 1.4862564888094631,
      "grad_norm": 0.6594837307929993,
      "learning_rate": 7.073792689917833e-06,
      "loss": 0.0284,
      "step": 908180
    },
    {
      "epoch": 1.4862892192481163,
      "grad_norm": 1.8942530155181885,
      "learning_rate": 7.073726797704316e-06,
      "loss": 0.0241,
      "step": 908200
    },
    {
      "epoch": 1.4863219496867697,
      "grad_norm": 0.5659477114677429,
      "learning_rate": 7.073660905490799e-06,
      "loss": 0.0182,
      "step": 908220
    },
    {
      "epoch": 1.486354680125423,
      "grad_norm": 0.21157531440258026,
      "learning_rate": 7.073595013277281e-06,
      "loss": 0.0205,
      "step": 908240
    },
    {
      "epoch": 1.4863874105640764,
      "grad_norm": 0.1344536393880844,
      "learning_rate": 7.073529121063765e-06,
      "loss": 0.0242,
      "step": 908260
    },
    {
      "epoch": 1.4864201410027298,
      "grad_norm": 0.29354947805404663,
      "learning_rate": 7.073463228850247e-06,
      "loss": 0.0199,
      "step": 908280
    },
    {
      "epoch": 1.486452871441383,
      "grad_norm": 0.4514767825603485,
      "learning_rate": 7.07339733663673e-06,
      "loss": 0.0154,
      "step": 908300
    },
    {
      "epoch": 1.4864856018800365,
      "grad_norm": 1.0097471475601196,
      "learning_rate": 7.073331444423212e-06,
      "loss": 0.024,
      "step": 908320
    },
    {
      "epoch": 1.4865183323186897,
      "grad_norm": 0.760757565498352,
      "learning_rate": 7.073265552209696e-06,
      "loss": 0.0252,
      "step": 908340
    },
    {
      "epoch": 1.486551062757343,
      "grad_norm": 0.687396228313446,
      "learning_rate": 7.0731996599961785e-06,
      "loss": 0.0292,
      "step": 908360
    },
    {
      "epoch": 1.4865837931959964,
      "grad_norm": 0.18983915448188782,
      "learning_rate": 7.073133767782661e-06,
      "loss": 0.0234,
      "step": 908380
    },
    {
      "epoch": 1.4866165236346498,
      "grad_norm": 0.4991198778152466,
      "learning_rate": 7.073067875569145e-06,
      "loss": 0.0194,
      "step": 908400
    },
    {
      "epoch": 1.4866492540733032,
      "grad_norm": 0.9999808669090271,
      "learning_rate": 7.073001983355628e-06,
      "loss": 0.0167,
      "step": 908420
    },
    {
      "epoch": 1.4866819845119563,
      "grad_norm": 0.7818468809127808,
      "learning_rate": 7.072936091142111e-06,
      "loss": 0.0156,
      "step": 908440
    },
    {
      "epoch": 1.4867147149506097,
      "grad_norm": 0.0751805305480957,
      "learning_rate": 7.072870198928593e-06,
      "loss": 0.0278,
      "step": 908460
    },
    {
      "epoch": 1.486747445389263,
      "grad_norm": 0.45733004808425903,
      "learning_rate": 7.072804306715077e-06,
      "loss": 0.0288,
      "step": 908480
    },
    {
      "epoch": 1.4867801758279164,
      "grad_norm": 0.7654510140419006,
      "learning_rate": 7.0727384145015586e-06,
      "loss": 0.0201,
      "step": 908500
    },
    {
      "epoch": 1.4868129062665698,
      "grad_norm": 0.9210837483406067,
      "learning_rate": 7.072672522288042e-06,
      "loss": 0.0192,
      "step": 908520
    },
    {
      "epoch": 1.4868456367052232,
      "grad_norm": 0.2349703162908554,
      "learning_rate": 7.072606630074524e-06,
      "loss": 0.0259,
      "step": 908540
    },
    {
      "epoch": 1.4868783671438766,
      "grad_norm": 0.36726298928260803,
      "learning_rate": 7.072540737861008e-06,
      "loss": 0.0158,
      "step": 908560
    },
    {
      "epoch": 1.4869110975825297,
      "grad_norm": 0.7115662097930908,
      "learning_rate": 7.07247484564749e-06,
      "loss": 0.0181,
      "step": 908580
    },
    {
      "epoch": 1.486943828021183,
      "grad_norm": 0.7638172507286072,
      "learning_rate": 7.072408953433973e-06,
      "loss": 0.0209,
      "step": 908600
    },
    {
      "epoch": 1.4869765584598365,
      "grad_norm": 0.7988296747207642,
      "learning_rate": 7.072343061220456e-06,
      "loss": 0.0204,
      "step": 908620
    },
    {
      "epoch": 1.4870092888984898,
      "grad_norm": 0.18308256566524506,
      "learning_rate": 7.0722771690069394e-06,
      "loss": 0.0169,
      "step": 908640
    },
    {
      "epoch": 1.4870420193371432,
      "grad_norm": 0.4110458493232727,
      "learning_rate": 7.072211276793421e-06,
      "loss": 0.015,
      "step": 908660
    },
    {
      "epoch": 1.4870747497757966,
      "grad_norm": 0.8793818950653076,
      "learning_rate": 7.072145384579905e-06,
      "loss": 0.014,
      "step": 908680
    },
    {
      "epoch": 1.48710748021445,
      "grad_norm": 1.7118587493896484,
      "learning_rate": 7.072079492366387e-06,
      "loss": 0.0191,
      "step": 908700
    },
    {
      "epoch": 1.487140210653103,
      "grad_norm": 2.095346212387085,
      "learning_rate": 7.07201360015287e-06,
      "loss": 0.0189,
      "step": 908720
    },
    {
      "epoch": 1.4871729410917565,
      "grad_norm": 0.557367205619812,
      "learning_rate": 7.071947707939354e-06,
      "loss": 0.0212,
      "step": 908740
    },
    {
      "epoch": 1.4872056715304098,
      "grad_norm": 0.10698684304952621,
      "learning_rate": 7.071881815725836e-06,
      "loss": 0.0241,
      "step": 908760
    },
    {
      "epoch": 1.4872384019690632,
      "grad_norm": 1.818545937538147,
      "learning_rate": 7.0718159235123195e-06,
      "loss": 0.0302,
      "step": 908780
    },
    {
      "epoch": 1.4872711324077166,
      "grad_norm": 0.3775741457939148,
      "learning_rate": 7.071750031298802e-06,
      "loss": 0.0157,
      "step": 908800
    },
    {
      "epoch": 1.4873038628463697,
      "grad_norm": 0.3789508044719696,
      "learning_rate": 7.071684139085285e-06,
      "loss": 0.0343,
      "step": 908820
    },
    {
      "epoch": 1.4873365932850233,
      "grad_norm": 0.16257770359516144,
      "learning_rate": 7.071618246871768e-06,
      "loss": 0.0196,
      "step": 908840
    },
    {
      "epoch": 1.4873693237236765,
      "grad_norm": 0.4286671280860901,
      "learning_rate": 7.071552354658251e-06,
      "loss": 0.0125,
      "step": 908860
    },
    {
      "epoch": 1.4874020541623298,
      "grad_norm": 0.6356348991394043,
      "learning_rate": 7.071486462444733e-06,
      "loss": 0.0219,
      "step": 908880
    },
    {
      "epoch": 1.4874347846009832,
      "grad_norm": 0.3574868142604828,
      "learning_rate": 7.071420570231217e-06,
      "loss": 0.0183,
      "step": 908900
    },
    {
      "epoch": 1.4874675150396366,
      "grad_norm": 0.6836225390434265,
      "learning_rate": 7.071354678017699e-06,
      "loss": 0.0176,
      "step": 908920
    },
    {
      "epoch": 1.48750024547829,
      "grad_norm": 1.4059643745422363,
      "learning_rate": 7.071288785804182e-06,
      "loss": 0.0218,
      "step": 908940
    },
    {
      "epoch": 1.4875329759169431,
      "grad_norm": 0.4889953136444092,
      "learning_rate": 7.071222893590665e-06,
      "loss": 0.0251,
      "step": 908960
    },
    {
      "epoch": 1.4875657063555967,
      "grad_norm": 2.698232889175415,
      "learning_rate": 7.071157001377148e-06,
      "loss": 0.0143,
      "step": 908980
    },
    {
      "epoch": 1.4875984367942499,
      "grad_norm": 0.2935836613178253,
      "learning_rate": 7.0710911091636304e-06,
      "loss": 0.0167,
      "step": 909000
    },
    {
      "epoch": 1.4876311672329032,
      "grad_norm": 0.20897285640239716,
      "learning_rate": 7.071025216950114e-06,
      "loss": 0.0203,
      "step": 909020
    },
    {
      "epoch": 1.4876638976715566,
      "grad_norm": 0.5476011633872986,
      "learning_rate": 7.070959324736596e-06,
      "loss": 0.0144,
      "step": 909040
    },
    {
      "epoch": 1.48769662811021,
      "grad_norm": 0.9422719478607178,
      "learning_rate": 7.0708934325230795e-06,
      "loss": 0.0209,
      "step": 909060
    },
    {
      "epoch": 1.4877293585488633,
      "grad_norm": 0.6020880341529846,
      "learning_rate": 7.070827540309561e-06,
      "loss": 0.0212,
      "step": 909080
    },
    {
      "epoch": 1.4877620889875165,
      "grad_norm": 0.7393684983253479,
      "learning_rate": 7.070761648096045e-06,
      "loss": 0.0198,
      "step": 909100
    },
    {
      "epoch": 1.4877948194261699,
      "grad_norm": 0.32475244998931885,
      "learning_rate": 7.0706957558825286e-06,
      "loss": 0.0176,
      "step": 909120
    },
    {
      "epoch": 1.4878275498648232,
      "grad_norm": 0.5145692229270935,
      "learning_rate": 7.0706298636690105e-06,
      "loss": 0.0125,
      "step": 909140
    },
    {
      "epoch": 1.4878602803034766,
      "grad_norm": 0.1885301172733307,
      "learning_rate": 7.070563971455494e-06,
      "loss": 0.014,
      "step": 909160
    },
    {
      "epoch": 1.48789301074213,
      "grad_norm": 0.5936165452003479,
      "learning_rate": 7.070498079241977e-06,
      "loss": 0.0126,
      "step": 909180
    },
    {
      "epoch": 1.4879257411807834,
      "grad_norm": 0.43227094411849976,
      "learning_rate": 7.0704321870284595e-06,
      "loss": 0.0248,
      "step": 909200
    },
    {
      "epoch": 1.4879584716194367,
      "grad_norm": 0.6812860369682312,
      "learning_rate": 7.070366294814942e-06,
      "loss": 0.0217,
      "step": 909220
    },
    {
      "epoch": 1.4879912020580899,
      "grad_norm": 0.9313870668411255,
      "learning_rate": 7.070300402601426e-06,
      "loss": 0.0162,
      "step": 909240
    },
    {
      "epoch": 1.4880239324967433,
      "grad_norm": 0.16190193593502045,
      "learning_rate": 7.070234510387908e-06,
      "loss": 0.0203,
      "step": 909260
    },
    {
      "epoch": 1.4880566629353966,
      "grad_norm": 0.5751832127571106,
      "learning_rate": 7.070168618174391e-06,
      "loss": 0.0169,
      "step": 909280
    },
    {
      "epoch": 1.48808939337405,
      "grad_norm": 1.006264328956604,
      "learning_rate": 7.070102725960873e-06,
      "loss": 0.0246,
      "step": 909300
    },
    {
      "epoch": 1.4881221238127034,
      "grad_norm": 1.6268435716629028,
      "learning_rate": 7.070036833747357e-06,
      "loss": 0.0144,
      "step": 909320
    },
    {
      "epoch": 1.4881548542513567,
      "grad_norm": 0.13040043413639069,
      "learning_rate": 7.069970941533839e-06,
      "loss": 0.0138,
      "step": 909340
    },
    {
      "epoch": 1.4881875846900101,
      "grad_norm": 1.48701810836792,
      "learning_rate": 7.069905049320322e-06,
      "loss": 0.0187,
      "step": 909360
    },
    {
      "epoch": 1.4882203151286633,
      "grad_norm": 0.17237995564937592,
      "learning_rate": 7.069839157106805e-06,
      "loss": 0.0212,
      "step": 909380
    },
    {
      "epoch": 1.4882530455673166,
      "grad_norm": 0.769392728805542,
      "learning_rate": 7.069773264893288e-06,
      "loss": 0.0156,
      "step": 909400
    },
    {
      "epoch": 1.48828577600597,
      "grad_norm": 0.5129017233848572,
      "learning_rate": 7.0697073726797705e-06,
      "loss": 0.0242,
      "step": 909420
    },
    {
      "epoch": 1.4883185064446234,
      "grad_norm": 0.07888853549957275,
      "learning_rate": 7.069641480466254e-06,
      "loss": 0.0139,
      "step": 909440
    },
    {
      "epoch": 1.4883512368832768,
      "grad_norm": 0.9274348020553589,
      "learning_rate": 7.069575588252737e-06,
      "loss": 0.0226,
      "step": 909460
    },
    {
      "epoch": 1.4883839673219301,
      "grad_norm": 0.5838066935539246,
      "learning_rate": 7.06950969603922e-06,
      "loss": 0.0213,
      "step": 909480
    },
    {
      "epoch": 1.4884166977605835,
      "grad_norm": 0.3063156306743622,
      "learning_rate": 7.069443803825703e-06,
      "loss": 0.0255,
      "step": 909500
    },
    {
      "epoch": 1.4884494281992366,
      "grad_norm": 0.3907899260520935,
      "learning_rate": 7.069377911612185e-06,
      "loss": 0.0284,
      "step": 909520
    },
    {
      "epoch": 1.48848215863789,
      "grad_norm": 1.6530287265777588,
      "learning_rate": 7.069312019398669e-06,
      "loss": 0.0159,
      "step": 909540
    },
    {
      "epoch": 1.4885148890765434,
      "grad_norm": 0.39892634749412537,
      "learning_rate": 7.0692461271851505e-06,
      "loss": 0.0286,
      "step": 909560
    },
    {
      "epoch": 1.4885476195151968,
      "grad_norm": 0.2751849591732025,
      "learning_rate": 7.069180234971634e-06,
      "loss": 0.0167,
      "step": 909580
    },
    {
      "epoch": 1.4885803499538501,
      "grad_norm": 0.4498889744281769,
      "learning_rate": 7.069114342758117e-06,
      "loss": 0.0152,
      "step": 909600
    },
    {
      "epoch": 1.4886130803925033,
      "grad_norm": 1.1185040473937988,
      "learning_rate": 7.0690484505446e-06,
      "loss": 0.0193,
      "step": 909620
    },
    {
      "epoch": 1.4886458108311569,
      "grad_norm": 0.15582087635993958,
      "learning_rate": 7.068982558331082e-06,
      "loss": 0.0114,
      "step": 909640
    },
    {
      "epoch": 1.48867854126981,
      "grad_norm": 0.6008592247962952,
      "learning_rate": 7.068916666117566e-06,
      "loss": 0.0201,
      "step": 909660
    },
    {
      "epoch": 1.4887112717084634,
      "grad_norm": 0.2638811767101288,
      "learning_rate": 7.068850773904048e-06,
      "loss": 0.0198,
      "step": 909680
    },
    {
      "epoch": 1.4887440021471168,
      "grad_norm": 0.2508637011051178,
      "learning_rate": 7.068784881690531e-06,
      "loss": 0.0173,
      "step": 909700
    },
    {
      "epoch": 1.4887767325857701,
      "grad_norm": 0.4758440852165222,
      "learning_rate": 7.068718989477013e-06,
      "loss": 0.0201,
      "step": 909720
    },
    {
      "epoch": 1.4888094630244235,
      "grad_norm": 0.696337103843689,
      "learning_rate": 7.068653097263497e-06,
      "loss": 0.0281,
      "step": 909740
    },
    {
      "epoch": 1.4888421934630767,
      "grad_norm": 1.3552712202072144,
      "learning_rate": 7.06858720504998e-06,
      "loss": 0.0249,
      "step": 909760
    },
    {
      "epoch": 1.4888749239017303,
      "grad_norm": 0.16226671636104584,
      "learning_rate": 7.068521312836462e-06,
      "loss": 0.028,
      "step": 909780
    },
    {
      "epoch": 1.4889076543403834,
      "grad_norm": 0.5540831089019775,
      "learning_rate": 7.068455420622946e-06,
      "loss": 0.0338,
      "step": 909800
    },
    {
      "epoch": 1.4889403847790368,
      "grad_norm": 0.723383903503418,
      "learning_rate": 7.068389528409429e-06,
      "loss": 0.028,
      "step": 909820
    },
    {
      "epoch": 1.4889731152176902,
      "grad_norm": 0.5315694808959961,
      "learning_rate": 7.0683236361959114e-06,
      "loss": 0.0182,
      "step": 909840
    },
    {
      "epoch": 1.4890058456563435,
      "grad_norm": 0.42235323786735535,
      "learning_rate": 7.068257743982394e-06,
      "loss": 0.0259,
      "step": 909860
    },
    {
      "epoch": 1.489038576094997,
      "grad_norm": 1.5427449941635132,
      "learning_rate": 7.068191851768878e-06,
      "loss": 0.0185,
      "step": 909880
    },
    {
      "epoch": 1.48907130653365,
      "grad_norm": 0.17697319388389587,
      "learning_rate": 7.06812595955536e-06,
      "loss": 0.0191,
      "step": 909900
    },
    {
      "epoch": 1.4891040369723034,
      "grad_norm": 0.29289066791534424,
      "learning_rate": 7.068060067341843e-06,
      "loss": 0.0257,
      "step": 909920
    },
    {
      "epoch": 1.4891367674109568,
      "grad_norm": 0.7429031729698181,
      "learning_rate": 7.067994175128325e-06,
      "loss": 0.0208,
      "step": 909940
    },
    {
      "epoch": 1.4891694978496102,
      "grad_norm": 0.15942297875881195,
      "learning_rate": 7.067928282914809e-06,
      "loss": 0.0205,
      "step": 909960
    },
    {
      "epoch": 1.4892022282882635,
      "grad_norm": 0.7194936275482178,
      "learning_rate": 7.0678623907012915e-06,
      "loss": 0.0191,
      "step": 909980
    },
    {
      "epoch": 1.489234958726917,
      "grad_norm": 0.3496769070625305,
      "learning_rate": 7.067796498487774e-06,
      "loss": 0.0212,
      "step": 910000
    },
    {
      "epoch": 1.4892676891655703,
      "grad_norm": 0.4330945312976837,
      "learning_rate": 7.067730606274257e-06,
      "loss": 0.0211,
      "step": 910020
    },
    {
      "epoch": 1.4893004196042234,
      "grad_norm": 0.6120138764381409,
      "learning_rate": 7.0676647140607405e-06,
      "loss": 0.0303,
      "step": 910040
    },
    {
      "epoch": 1.4893331500428768,
      "grad_norm": 0.8227351903915405,
      "learning_rate": 7.0675988218472224e-06,
      "loss": 0.0162,
      "step": 910060
    },
    {
      "epoch": 1.4893658804815302,
      "grad_norm": 0.8105626702308655,
      "learning_rate": 7.067532929633706e-06,
      "loss": 0.0277,
      "step": 910080
    },
    {
      "epoch": 1.4893986109201836,
      "grad_norm": 0.9254236817359924,
      "learning_rate": 7.067467037420188e-06,
      "loss": 0.0247,
      "step": 910100
    },
    {
      "epoch": 1.489431341358837,
      "grad_norm": 0.3541405498981476,
      "learning_rate": 7.0674011452066715e-06,
      "loss": 0.018,
      "step": 910120
    },
    {
      "epoch": 1.4894640717974903,
      "grad_norm": 0.5158258676528931,
      "learning_rate": 7.067335252993155e-06,
      "loss": 0.0187,
      "step": 910140
    },
    {
      "epoch": 1.4894968022361437,
      "grad_norm": 0.6914178729057312,
      "learning_rate": 7.067269360779637e-06,
      "loss": 0.0184,
      "step": 910160
    },
    {
      "epoch": 1.4895295326747968,
      "grad_norm": 0.22433888912200928,
      "learning_rate": 7.0672034685661206e-06,
      "loss": 0.0184,
      "step": 910180
    },
    {
      "epoch": 1.4895622631134502,
      "grad_norm": 0.1215125247836113,
      "learning_rate": 7.067137576352603e-06,
      "loss": 0.0157,
      "step": 910200
    },
    {
      "epoch": 1.4895949935521036,
      "grad_norm": 1.206421971321106,
      "learning_rate": 7.067071684139086e-06,
      "loss": 0.021,
      "step": 910220
    },
    {
      "epoch": 1.489627723990757,
      "grad_norm": 0.488683819770813,
      "learning_rate": 7.067005791925569e-06,
      "loss": 0.0145,
      "step": 910240
    },
    {
      "epoch": 1.4896604544294103,
      "grad_norm": 0.35588160157203674,
      "learning_rate": 7.066939899712052e-06,
      "loss": 0.0223,
      "step": 910260
    },
    {
      "epoch": 1.4896931848680637,
      "grad_norm": 0.2629176378250122,
      "learning_rate": 7.066874007498534e-06,
      "loss": 0.0211,
      "step": 910280
    },
    {
      "epoch": 1.489725915306717,
      "grad_norm": 2.532369375228882,
      "learning_rate": 7.066808115285018e-06,
      "loss": 0.0188,
      "step": 910300
    },
    {
      "epoch": 1.4897586457453702,
      "grad_norm": 0.5452955961227417,
      "learning_rate": 7.0667422230715e-06,
      "loss": 0.015,
      "step": 910320
    },
    {
      "epoch": 1.4897913761840236,
      "grad_norm": 0.5637737512588501,
      "learning_rate": 7.066676330857983e-06,
      "loss": 0.0195,
      "step": 910340
    },
    {
      "epoch": 1.489824106622677,
      "grad_norm": 0.1331886649131775,
      "learning_rate": 7.066610438644465e-06,
      "loss": 0.0246,
      "step": 910360
    },
    {
      "epoch": 1.4898568370613303,
      "grad_norm": 0.598943293094635,
      "learning_rate": 7.066544546430949e-06,
      "loss": 0.0177,
      "step": 910380
    },
    {
      "epoch": 1.4898895674999837,
      "grad_norm": 0.46151870489120483,
      "learning_rate": 7.0664786542174315e-06,
      "loss": 0.0255,
      "step": 910400
    },
    {
      "epoch": 1.4899222979386368,
      "grad_norm": 0.3988455533981323,
      "learning_rate": 7.066412762003914e-06,
      "loss": 0.0239,
      "step": 910420
    },
    {
      "epoch": 1.4899550283772904,
      "grad_norm": 0.2654770016670227,
      "learning_rate": 7.066346869790397e-06,
      "loss": 0.0197,
      "step": 910440
    },
    {
      "epoch": 1.4899877588159436,
      "grad_norm": 0.2703552544116974,
      "learning_rate": 7.066280977576881e-06,
      "loss": 0.0304,
      "step": 910460
    },
    {
      "epoch": 1.490020489254597,
      "grad_norm": 1.2635244131088257,
      "learning_rate": 7.0662150853633625e-06,
      "loss": 0.0259,
      "step": 910480
    },
    {
      "epoch": 1.4900532196932503,
      "grad_norm": 0.2924113869667053,
      "learning_rate": 7.066149193149846e-06,
      "loss": 0.0167,
      "step": 910500
    },
    {
      "epoch": 1.4900859501319037,
      "grad_norm": 0.28429439663887024,
      "learning_rate": 7.06608330093633e-06,
      "loss": 0.0183,
      "step": 910520
    },
    {
      "epoch": 1.490118680570557,
      "grad_norm": 0.42321109771728516,
      "learning_rate": 7.0660174087228116e-06,
      "loss": 0.0136,
      "step": 910540
    },
    {
      "epoch": 1.4901514110092102,
      "grad_norm": 0.2747795581817627,
      "learning_rate": 7.065951516509295e-06,
      "loss": 0.0299,
      "step": 910560
    },
    {
      "epoch": 1.4901841414478638,
      "grad_norm": 1.1426889896392822,
      "learning_rate": 7.065885624295777e-06,
      "loss": 0.0265,
      "step": 910580
    },
    {
      "epoch": 1.490216871886517,
      "grad_norm": 0.7543542385101318,
      "learning_rate": 7.065819732082261e-06,
      "loss": 0.0225,
      "step": 910600
    },
    {
      "epoch": 1.4902496023251703,
      "grad_norm": 0.6865119934082031,
      "learning_rate": 7.065753839868743e-06,
      "loss": 0.0238,
      "step": 910620
    },
    {
      "epoch": 1.4902823327638237,
      "grad_norm": 0.8303496241569519,
      "learning_rate": 7.065687947655226e-06,
      "loss": 0.0175,
      "step": 910640
    },
    {
      "epoch": 1.490315063202477,
      "grad_norm": 0.3696850836277008,
      "learning_rate": 7.065622055441709e-06,
      "loss": 0.0223,
      "step": 910660
    },
    {
      "epoch": 1.4903477936411305,
      "grad_norm": 0.5942779779434204,
      "learning_rate": 7.0655561632281924e-06,
      "loss": 0.0191,
      "step": 910680
    },
    {
      "epoch": 1.4903805240797836,
      "grad_norm": 0.734973669052124,
      "learning_rate": 7.065490271014674e-06,
      "loss": 0.0173,
      "step": 910700
    },
    {
      "epoch": 1.490413254518437,
      "grad_norm": 0.376891165971756,
      "learning_rate": 7.065424378801158e-06,
      "loss": 0.0222,
      "step": 910720
    },
    {
      "epoch": 1.4904459849570904,
      "grad_norm": 0.729948103427887,
      "learning_rate": 7.06535848658764e-06,
      "loss": 0.0221,
      "step": 910740
    },
    {
      "epoch": 1.4904787153957437,
      "grad_norm": 0.6123971343040466,
      "learning_rate": 7.065292594374123e-06,
      "loss": 0.0215,
      "step": 910760
    },
    {
      "epoch": 1.490511445834397,
      "grad_norm": 1.3331633806228638,
      "learning_rate": 7.065226702160606e-06,
      "loss": 0.0208,
      "step": 910780
    },
    {
      "epoch": 1.4905441762730505,
      "grad_norm": 0.1672726273536682,
      "learning_rate": 7.065160809947089e-06,
      "loss": 0.0354,
      "step": 910800
    },
    {
      "epoch": 1.4905769067117038,
      "grad_norm": 0.4546627700328827,
      "learning_rate": 7.065094917733572e-06,
      "loss": 0.0224,
      "step": 910820
    },
    {
      "epoch": 1.490609637150357,
      "grad_norm": 1.5307520627975464,
      "learning_rate": 7.065029025520055e-06,
      "loss": 0.0218,
      "step": 910840
    },
    {
      "epoch": 1.4906423675890104,
      "grad_norm": 0.21464700996875763,
      "learning_rate": 7.064963133306538e-06,
      "loss": 0.0256,
      "step": 910860
    },
    {
      "epoch": 1.4906750980276637,
      "grad_norm": 0.36275315284729004,
      "learning_rate": 7.064897241093021e-06,
      "loss": 0.0249,
      "step": 910880
    },
    {
      "epoch": 1.490707828466317,
      "grad_norm": 1.1683489084243774,
      "learning_rate": 7.064831348879504e-06,
      "loss": 0.0274,
      "step": 910900
    },
    {
      "epoch": 1.4907405589049705,
      "grad_norm": 0.29634690284729004,
      "learning_rate": 7.064765456665986e-06,
      "loss": 0.032,
      "step": 910920
    },
    {
      "epoch": 1.4907732893436239,
      "grad_norm": 0.26886847615242004,
      "learning_rate": 7.06469956445247e-06,
      "loss": 0.0262,
      "step": 910940
    },
    {
      "epoch": 1.4908060197822772,
      "grad_norm": 0.6419250965118408,
      "learning_rate": 7.064633672238952e-06,
      "loss": 0.0202,
      "step": 910960
    },
    {
      "epoch": 1.4908387502209304,
      "grad_norm": 0.22951234877109528,
      "learning_rate": 7.064567780025435e-06,
      "loss": 0.0266,
      "step": 910980
    },
    {
      "epoch": 1.4908714806595837,
      "grad_norm": 1.1235748529434204,
      "learning_rate": 7.064501887811918e-06,
      "loss": 0.0214,
      "step": 911000
    },
    {
      "epoch": 1.4909042110982371,
      "grad_norm": 1.729828953742981,
      "learning_rate": 7.064435995598401e-06,
      "loss": 0.0223,
      "step": 911020
    },
    {
      "epoch": 1.4909369415368905,
      "grad_norm": 1.1260507106781006,
      "learning_rate": 7.0643701033848835e-06,
      "loss": 0.0194,
      "step": 911040
    },
    {
      "epoch": 1.4909696719755439,
      "grad_norm": 1.1109811067581177,
      "learning_rate": 7.064304211171367e-06,
      "loss": 0.0246,
      "step": 911060
    },
    {
      "epoch": 1.4910024024141972,
      "grad_norm": 1.0096325874328613,
      "learning_rate": 7.064238318957849e-06,
      "loss": 0.0216,
      "step": 911080
    },
    {
      "epoch": 1.4910351328528506,
      "grad_norm": 0.19514746963977814,
      "learning_rate": 7.0641724267443325e-06,
      "loss": 0.0231,
      "step": 911100
    },
    {
      "epoch": 1.4910678632915038,
      "grad_norm": 0.5527984499931335,
      "learning_rate": 7.064106534530814e-06,
      "loss": 0.021,
      "step": 911120
    },
    {
      "epoch": 1.4911005937301571,
      "grad_norm": 0.2552914023399353,
      "learning_rate": 7.064040642317298e-06,
      "loss": 0.0176,
      "step": 911140
    },
    {
      "epoch": 1.4911333241688105,
      "grad_norm": 6.402622699737549,
      "learning_rate": 7.06397475010378e-06,
      "loss": 0.0143,
      "step": 911160
    },
    {
      "epoch": 1.4911660546074639,
      "grad_norm": 0.3956349790096283,
      "learning_rate": 7.0639088578902635e-06,
      "loss": 0.0224,
      "step": 911180
    },
    {
      "epoch": 1.4911987850461172,
      "grad_norm": 0.7099549174308777,
      "learning_rate": 7.063842965676747e-06,
      "loss": 0.0168,
      "step": 911200
    },
    {
      "epoch": 1.4912315154847704,
      "grad_norm": 0.6029396653175354,
      "learning_rate": 7.06377707346323e-06,
      "loss": 0.0194,
      "step": 911220
    },
    {
      "epoch": 1.491264245923424,
      "grad_norm": 0.12056490033864975,
      "learning_rate": 7.0637111812497125e-06,
      "loss": 0.0224,
      "step": 911240
    },
    {
      "epoch": 1.4912969763620771,
      "grad_norm": 0.34698837995529175,
      "learning_rate": 7.063645289036195e-06,
      "loss": 0.0188,
      "step": 911260
    },
    {
      "epoch": 1.4913297068007305,
      "grad_norm": 0.6939767599105835,
      "learning_rate": 7.063579396822679e-06,
      "loss": 0.0194,
      "step": 911280
    },
    {
      "epoch": 1.4913624372393839,
      "grad_norm": 1.5799294710159302,
      "learning_rate": 7.063513504609161e-06,
      "loss": 0.0301,
      "step": 911300
    },
    {
      "epoch": 1.4913951676780373,
      "grad_norm": 1.1652436256408691,
      "learning_rate": 7.063447612395644e-06,
      "loss": 0.0287,
      "step": 911320
    },
    {
      "epoch": 1.4914278981166906,
      "grad_norm": 1.7039213180541992,
      "learning_rate": 7.063381720182126e-06,
      "loss": 0.0206,
      "step": 911340
    },
    {
      "epoch": 1.4914606285553438,
      "grad_norm": 0.7403510212898254,
      "learning_rate": 7.06331582796861e-06,
      "loss": 0.0284,
      "step": 911360
    },
    {
      "epoch": 1.4914933589939974,
      "grad_norm": 0.5808156728744507,
      "learning_rate": 7.063249935755092e-06,
      "loss": 0.0208,
      "step": 911380
    },
    {
      "epoch": 1.4915260894326505,
      "grad_norm": 0.7368398904800415,
      "learning_rate": 7.063184043541575e-06,
      "loss": 0.0209,
      "step": 911400
    },
    {
      "epoch": 1.491558819871304,
      "grad_norm": 0.7546346783638,
      "learning_rate": 7.063118151328058e-06,
      "loss": 0.0275,
      "step": 911420
    },
    {
      "epoch": 1.4915915503099573,
      "grad_norm": 1.3653358221054077,
      "learning_rate": 7.063052259114541e-06,
      "loss": 0.0128,
      "step": 911440
    },
    {
      "epoch": 1.4916242807486106,
      "grad_norm": 1.507196068763733,
      "learning_rate": 7.0629863669010235e-06,
      "loss": 0.0207,
      "step": 911460
    },
    {
      "epoch": 1.491657011187264,
      "grad_norm": 0.2262534499168396,
      "learning_rate": 7.062920474687507e-06,
      "loss": 0.0201,
      "step": 911480
    },
    {
      "epoch": 1.4916897416259172,
      "grad_norm": 0.2760692536830902,
      "learning_rate": 7.062854582473989e-06,
      "loss": 0.0221,
      "step": 911500
    },
    {
      "epoch": 1.4917224720645705,
      "grad_norm": 0.6655104756355286,
      "learning_rate": 7.062788690260473e-06,
      "loss": 0.0169,
      "step": 911520
    },
    {
      "epoch": 1.491755202503224,
      "grad_norm": 0.6087371706962585,
      "learning_rate": 7.0627227980469545e-06,
      "loss": 0.0195,
      "step": 911540
    },
    {
      "epoch": 1.4917879329418773,
      "grad_norm": 1.9612276554107666,
      "learning_rate": 7.062656905833438e-06,
      "loss": 0.02,
      "step": 911560
    },
    {
      "epoch": 1.4918206633805307,
      "grad_norm": 0.9757674932479858,
      "learning_rate": 7.062591013619922e-06,
      "loss": 0.0257,
      "step": 911580
    },
    {
      "epoch": 1.491853393819184,
      "grad_norm": 0.5942226052284241,
      "learning_rate": 7.0625251214064036e-06,
      "loss": 0.0189,
      "step": 911600
    },
    {
      "epoch": 1.4918861242578374,
      "grad_norm": 0.783867597579956,
      "learning_rate": 7.062459229192887e-06,
      "loss": 0.031,
      "step": 911620
    },
    {
      "epoch": 1.4919188546964905,
      "grad_norm": 0.8282302618026733,
      "learning_rate": 7.06239333697937e-06,
      "loss": 0.0271,
      "step": 911640
    },
    {
      "epoch": 1.491951585135144,
      "grad_norm": 0.9775050282478333,
      "learning_rate": 7.062327444765853e-06,
      "loss": 0.0195,
      "step": 911660
    },
    {
      "epoch": 1.4919843155737973,
      "grad_norm": 1.560744285583496,
      "learning_rate": 7.062261552552335e-06,
      "loss": 0.0263,
      "step": 911680
    },
    {
      "epoch": 1.4920170460124507,
      "grad_norm": 0.47468557953834534,
      "learning_rate": 7.062195660338819e-06,
      "loss": 0.0213,
      "step": 911700
    },
    {
      "epoch": 1.492049776451104,
      "grad_norm": 0.03192836791276932,
      "learning_rate": 7.062129768125301e-06,
      "loss": 0.0153,
      "step": 911720
    },
    {
      "epoch": 1.4920825068897574,
      "grad_norm": 0.5193712115287781,
      "learning_rate": 7.0620638759117844e-06,
      "loss": 0.0236,
      "step": 911740
    },
    {
      "epoch": 1.4921152373284108,
      "grad_norm": 0.4773409068584442,
      "learning_rate": 7.061997983698266e-06,
      "loss": 0.0268,
      "step": 911760
    },
    {
      "epoch": 1.492147967767064,
      "grad_norm": 0.667442262172699,
      "learning_rate": 7.06193209148475e-06,
      "loss": 0.0157,
      "step": 911780
    },
    {
      "epoch": 1.4921806982057173,
      "grad_norm": 1.0528368949890137,
      "learning_rate": 7.061866199271233e-06,
      "loss": 0.0167,
      "step": 911800
    },
    {
      "epoch": 1.4922134286443707,
      "grad_norm": 0.3007484972476959,
      "learning_rate": 7.061800307057715e-06,
      "loss": 0.0278,
      "step": 911820
    },
    {
      "epoch": 1.492246159083024,
      "grad_norm": 0.43883082270622253,
      "learning_rate": 7.061734414844198e-06,
      "loss": 0.0196,
      "step": 911840
    },
    {
      "epoch": 1.4922788895216774,
      "grad_norm": 1.3412671089172363,
      "learning_rate": 7.061668522630682e-06,
      "loss": 0.0195,
      "step": 911860
    },
    {
      "epoch": 1.4923116199603306,
      "grad_norm": 0.24747814238071442,
      "learning_rate": 7.061602630417164e-06,
      "loss": 0.0177,
      "step": 911880
    },
    {
      "epoch": 1.4923443503989842,
      "grad_norm": 0.6280587315559387,
      "learning_rate": 7.061536738203647e-06,
      "loss": 0.0226,
      "step": 911900
    },
    {
      "epoch": 1.4923770808376373,
      "grad_norm": 0.1978452205657959,
      "learning_rate": 7.061470845990131e-06,
      "loss": 0.0258,
      "step": 911920
    },
    {
      "epoch": 1.4924098112762907,
      "grad_norm": 0.7000370621681213,
      "learning_rate": 7.061404953776613e-06,
      "loss": 0.0183,
      "step": 911940
    },
    {
      "epoch": 1.492442541714944,
      "grad_norm": 1.1135824918746948,
      "learning_rate": 7.061339061563096e-06,
      "loss": 0.026,
      "step": 911960
    },
    {
      "epoch": 1.4924752721535974,
      "grad_norm": 0.6993671655654907,
      "learning_rate": 7.061273169349578e-06,
      "loss": 0.0322,
      "step": 911980
    },
    {
      "epoch": 1.4925080025922508,
      "grad_norm": 1.5030046701431274,
      "learning_rate": 7.061207277136062e-06,
      "loss": 0.0208,
      "step": 912000
    },
    {
      "epoch": 1.492540733030904,
      "grad_norm": 0.8363844752311707,
      "learning_rate": 7.0611413849225445e-06,
      "loss": 0.03,
      "step": 912020
    },
    {
      "epoch": 1.4925734634695575,
      "grad_norm": 1.289045810699463,
      "learning_rate": 7.061075492709027e-06,
      "loss": 0.0139,
      "step": 912040
    },
    {
      "epoch": 1.4926061939082107,
      "grad_norm": 0.4173113703727722,
      "learning_rate": 7.06100960049551e-06,
      "loss": 0.0247,
      "step": 912060
    },
    {
      "epoch": 1.492638924346864,
      "grad_norm": 0.5744951367378235,
      "learning_rate": 7.0609437082819935e-06,
      "loss": 0.0203,
      "step": 912080
    },
    {
      "epoch": 1.4926716547855174,
      "grad_norm": 0.2406611293554306,
      "learning_rate": 7.0608778160684754e-06,
      "loss": 0.0192,
      "step": 912100
    },
    {
      "epoch": 1.4927043852241708,
      "grad_norm": 0.5636383891105652,
      "learning_rate": 7.060811923854959e-06,
      "loss": 0.0164,
      "step": 912120
    },
    {
      "epoch": 1.4927371156628242,
      "grad_norm": 0.29148951172828674,
      "learning_rate": 7.060746031641441e-06,
      "loss": 0.0328,
      "step": 912140
    },
    {
      "epoch": 1.4927698461014773,
      "grad_norm": 0.21614807844161987,
      "learning_rate": 7.0606801394279245e-06,
      "loss": 0.0174,
      "step": 912160
    },
    {
      "epoch": 1.4928025765401307,
      "grad_norm": 0.7238976359367371,
      "learning_rate": 7.060614247214406e-06,
      "loss": 0.0227,
      "step": 912180
    },
    {
      "epoch": 1.492835306978784,
      "grad_norm": 0.33896738290786743,
      "learning_rate": 7.06054835500089e-06,
      "loss": 0.0235,
      "step": 912200
    },
    {
      "epoch": 1.4928680374174375,
      "grad_norm": 0.2157113254070282,
      "learning_rate": 7.060482462787373e-06,
      "loss": 0.0165,
      "step": 912220
    },
    {
      "epoch": 1.4929007678560908,
      "grad_norm": 0.6374340653419495,
      "learning_rate": 7.0604165705738555e-06,
      "loss": 0.0168,
      "step": 912240
    },
    {
      "epoch": 1.4929334982947442,
      "grad_norm": 1.3520069122314453,
      "learning_rate": 7.060350678360339e-06,
      "loss": 0.0129,
      "step": 912260
    },
    {
      "epoch": 1.4929662287333976,
      "grad_norm": 0.9869796633720398,
      "learning_rate": 7.060284786146822e-06,
      "loss": 0.023,
      "step": 912280
    },
    {
      "epoch": 1.4929989591720507,
      "grad_norm": 0.050722964107990265,
      "learning_rate": 7.060218893933305e-06,
      "loss": 0.0166,
      "step": 912300
    },
    {
      "epoch": 1.493031689610704,
      "grad_norm": 0.8343166708946228,
      "learning_rate": 7.060153001719787e-06,
      "loss": 0.0155,
      "step": 912320
    },
    {
      "epoch": 1.4930644200493575,
      "grad_norm": 0.48871883749961853,
      "learning_rate": 7.060087109506271e-06,
      "loss": 0.0215,
      "step": 912340
    },
    {
      "epoch": 1.4930971504880108,
      "grad_norm": 0.214594766497612,
      "learning_rate": 7.060021217292753e-06,
      "loss": 0.027,
      "step": 912360
    },
    {
      "epoch": 1.4931298809266642,
      "grad_norm": 0.4414292275905609,
      "learning_rate": 7.059955325079236e-06,
      "loss": 0.0219,
      "step": 912380
    },
    {
      "epoch": 1.4931626113653176,
      "grad_norm": 0.32582682371139526,
      "learning_rate": 7.059889432865718e-06,
      "loss": 0.0161,
      "step": 912400
    },
    {
      "epoch": 1.493195341803971,
      "grad_norm": 1.5540910959243774,
      "learning_rate": 7.059823540652202e-06,
      "loss": 0.0219,
      "step": 912420
    },
    {
      "epoch": 1.493228072242624,
      "grad_norm": 2.940117120742798,
      "learning_rate": 7.0597576484386846e-06,
      "loss": 0.0202,
      "step": 912440
    },
    {
      "epoch": 1.4932608026812775,
      "grad_norm": 1.6504614353179932,
      "learning_rate": 7.059691756225167e-06,
      "loss": 0.0266,
      "step": 912460
    },
    {
      "epoch": 1.4932935331199308,
      "grad_norm": 0.3916814625263214,
      "learning_rate": 7.05962586401165e-06,
      "loss": 0.0217,
      "step": 912480
    },
    {
      "epoch": 1.4933262635585842,
      "grad_norm": 0.23417697846889496,
      "learning_rate": 7.059559971798134e-06,
      "loss": 0.0147,
      "step": 912500
    },
    {
      "epoch": 1.4933589939972376,
      "grad_norm": 0.32182586193084717,
      "learning_rate": 7.0594940795846155e-06,
      "loss": 0.0211,
      "step": 912520
    },
    {
      "epoch": 1.493391724435891,
      "grad_norm": 0.4814740717411041,
      "learning_rate": 7.059428187371099e-06,
      "loss": 0.0238,
      "step": 912540
    },
    {
      "epoch": 1.4934244548745443,
      "grad_norm": 0.30496883392333984,
      "learning_rate": 7.059362295157581e-06,
      "loss": 0.022,
      "step": 912560
    },
    {
      "epoch": 1.4934571853131975,
      "grad_norm": 0.04230185225605965,
      "learning_rate": 7.0592964029440646e-06,
      "loss": 0.025,
      "step": 912580
    },
    {
      "epoch": 1.4934899157518509,
      "grad_norm": 0.523932695388794,
      "learning_rate": 7.059230510730548e-06,
      "loss": 0.0175,
      "step": 912600
    },
    {
      "epoch": 1.4935226461905042,
      "grad_norm": 1.0034270286560059,
      "learning_rate": 7.05916461851703e-06,
      "loss": 0.0211,
      "step": 912620
    },
    {
      "epoch": 1.4935553766291576,
      "grad_norm": 0.19902163743972778,
      "learning_rate": 7.059098726303514e-06,
      "loss": 0.0144,
      "step": 912640
    },
    {
      "epoch": 1.493588107067811,
      "grad_norm": 0.1666719764471054,
      "learning_rate": 7.059032834089996e-06,
      "loss": 0.0175,
      "step": 912660
    },
    {
      "epoch": 1.4936208375064641,
      "grad_norm": 0.5975142121315002,
      "learning_rate": 7.058966941876479e-06,
      "loss": 0.0188,
      "step": 912680
    },
    {
      "epoch": 1.4936535679451177,
      "grad_norm": 1.8242076635360718,
      "learning_rate": 7.058901049662962e-06,
      "loss": 0.0224,
      "step": 912700
    },
    {
      "epoch": 1.4936862983837709,
      "grad_norm": 1.1530168056488037,
      "learning_rate": 7.0588351574494454e-06,
      "loss": 0.0259,
      "step": 912720
    },
    {
      "epoch": 1.4937190288224242,
      "grad_norm": 0.28403881192207336,
      "learning_rate": 7.058769265235927e-06,
      "loss": 0.0199,
      "step": 912740
    },
    {
      "epoch": 1.4937517592610776,
      "grad_norm": 0.5702707171440125,
      "learning_rate": 7.058703373022411e-06,
      "loss": 0.0192,
      "step": 912760
    },
    {
      "epoch": 1.493784489699731,
      "grad_norm": 0.8063551783561707,
      "learning_rate": 7.058637480808893e-06,
      "loss": 0.0306,
      "step": 912780
    },
    {
      "epoch": 1.4938172201383844,
      "grad_norm": 0.17812834680080414,
      "learning_rate": 7.058571588595376e-06,
      "loss": 0.0099,
      "step": 912800
    },
    {
      "epoch": 1.4938499505770375,
      "grad_norm": 0.7601137161254883,
      "learning_rate": 7.058505696381859e-06,
      "loss": 0.0186,
      "step": 912820
    },
    {
      "epoch": 1.493882681015691,
      "grad_norm": 0.5606348514556885,
      "learning_rate": 7.058439804168342e-06,
      "loss": 0.0238,
      "step": 912840
    },
    {
      "epoch": 1.4939154114543443,
      "grad_norm": 0.6104595065116882,
      "learning_rate": 7.058373911954825e-06,
      "loss": 0.0241,
      "step": 912860
    },
    {
      "epoch": 1.4939481418929976,
      "grad_norm": 0.6742220520973206,
      "learning_rate": 7.058308019741308e-06,
      "loss": 0.0203,
      "step": 912880
    },
    {
      "epoch": 1.493980872331651,
      "grad_norm": 0.30084481835365295,
      "learning_rate": 7.05824212752779e-06,
      "loss": 0.0206,
      "step": 912900
    },
    {
      "epoch": 1.4940136027703044,
      "grad_norm": 0.6534146666526794,
      "learning_rate": 7.058176235314274e-06,
      "loss": 0.0245,
      "step": 912920
    },
    {
      "epoch": 1.4940463332089577,
      "grad_norm": 0.9408072233200073,
      "learning_rate": 7.058110343100756e-06,
      "loss": 0.0241,
      "step": 912940
    },
    {
      "epoch": 1.494079063647611,
      "grad_norm": 0.53425133228302,
      "learning_rate": 7.058044450887239e-06,
      "loss": 0.0183,
      "step": 912960
    },
    {
      "epoch": 1.4941117940862643,
      "grad_norm": 0.19073989987373352,
      "learning_rate": 7.057978558673723e-06,
      "loss": 0.027,
      "step": 912980
    },
    {
      "epoch": 1.4941445245249176,
      "grad_norm": 1.0465279817581177,
      "learning_rate": 7.057912666460205e-06,
      "loss": 0.018,
      "step": 913000
    },
    {
      "epoch": 1.494177254963571,
      "grad_norm": 0.5734106302261353,
      "learning_rate": 7.057846774246688e-06,
      "loss": 0.0145,
      "step": 913020
    },
    {
      "epoch": 1.4942099854022244,
      "grad_norm": 0.6730589270591736,
      "learning_rate": 7.057780882033171e-06,
      "loss": 0.0282,
      "step": 913040
    },
    {
      "epoch": 1.4942427158408778,
      "grad_norm": 0.3951263427734375,
      "learning_rate": 7.057714989819654e-06,
      "loss": 0.0294,
      "step": 913060
    },
    {
      "epoch": 1.4942754462795311,
      "grad_norm": 0.73026043176651,
      "learning_rate": 7.0576490976061365e-06,
      "loss": 0.0199,
      "step": 913080
    },
    {
      "epoch": 1.4943081767181843,
      "grad_norm": 0.8223005533218384,
      "learning_rate": 7.05758320539262e-06,
      "loss": 0.0182,
      "step": 913100
    },
    {
      "epoch": 1.4943409071568377,
      "grad_norm": 0.655945360660553,
      "learning_rate": 7.057517313179102e-06,
      "loss": 0.0206,
      "step": 913120
    },
    {
      "epoch": 1.494373637595491,
      "grad_norm": 0.29897528886795044,
      "learning_rate": 7.0574514209655855e-06,
      "loss": 0.0178,
      "step": 913140
    },
    {
      "epoch": 1.4944063680341444,
      "grad_norm": 0.22922366857528687,
      "learning_rate": 7.057385528752067e-06,
      "loss": 0.0285,
      "step": 913160
    },
    {
      "epoch": 1.4944390984727978,
      "grad_norm": 0.6975154876708984,
      "learning_rate": 7.057319636538551e-06,
      "loss": 0.0154,
      "step": 913180
    },
    {
      "epoch": 1.4944718289114511,
      "grad_norm": 0.5416589379310608,
      "learning_rate": 7.057253744325033e-06,
      "loss": 0.0187,
      "step": 913200
    },
    {
      "epoch": 1.4945045593501045,
      "grad_norm": 0.6064530611038208,
      "learning_rate": 7.0571878521115165e-06,
      "loss": 0.0213,
      "step": 913220
    },
    {
      "epoch": 1.4945372897887577,
      "grad_norm": 0.3190648555755615,
      "learning_rate": 7.057121959897999e-06,
      "loss": 0.0291,
      "step": 913240
    },
    {
      "epoch": 1.494570020227411,
      "grad_norm": 0.5442888140678406,
      "learning_rate": 7.057056067684482e-06,
      "loss": 0.0177,
      "step": 913260
    },
    {
      "epoch": 1.4946027506660644,
      "grad_norm": 0.27071624994277954,
      "learning_rate": 7.056990175470965e-06,
      "loss": 0.0129,
      "step": 913280
    },
    {
      "epoch": 1.4946354811047178,
      "grad_norm": 0.9265794157981873,
      "learning_rate": 7.056924283257448e-06,
      "loss": 0.022,
      "step": 913300
    },
    {
      "epoch": 1.4946682115433712,
      "grad_norm": 0.20606295764446259,
      "learning_rate": 7.056858391043931e-06,
      "loss": 0.0185,
      "step": 913320
    },
    {
      "epoch": 1.4947009419820245,
      "grad_norm": 0.941183865070343,
      "learning_rate": 7.056792498830414e-06,
      "loss": 0.0217,
      "step": 913340
    },
    {
      "epoch": 1.494733672420678,
      "grad_norm": 0.49266862869262695,
      "learning_rate": 7.056726606616897e-06,
      "loss": 0.0258,
      "step": 913360
    },
    {
      "epoch": 1.494766402859331,
      "grad_norm": 0.405881404876709,
      "learning_rate": 7.056660714403379e-06,
      "loss": 0.0206,
      "step": 913380
    },
    {
      "epoch": 1.4947991332979844,
      "grad_norm": 0.4720919132232666,
      "learning_rate": 7.056594822189863e-06,
      "loss": 0.0227,
      "step": 913400
    },
    {
      "epoch": 1.4948318637366378,
      "grad_norm": 1.2079366445541382,
      "learning_rate": 7.056528929976345e-06,
      "loss": 0.0308,
      "step": 913420
    },
    {
      "epoch": 1.4948645941752912,
      "grad_norm": 0.3459891080856323,
      "learning_rate": 7.056463037762828e-06,
      "loss": 0.0275,
      "step": 913440
    },
    {
      "epoch": 1.4948973246139445,
      "grad_norm": 0.5743682980537415,
      "learning_rate": 7.056397145549311e-06,
      "loss": 0.027,
      "step": 913460
    },
    {
      "epoch": 1.4949300550525977,
      "grad_norm": 0.3029651343822479,
      "learning_rate": 7.056331253335794e-06,
      "loss": 0.0221,
      "step": 913480
    },
    {
      "epoch": 1.4949627854912513,
      "grad_norm": 1.3684055805206299,
      "learning_rate": 7.0562653611222765e-06,
      "loss": 0.0183,
      "step": 913500
    },
    {
      "epoch": 1.4949955159299044,
      "grad_norm": 0.42106372117996216,
      "learning_rate": 7.05619946890876e-06,
      "loss": 0.0301,
      "step": 913520
    },
    {
      "epoch": 1.4950282463685578,
      "grad_norm": 0.8185518383979797,
      "learning_rate": 7.056133576695242e-06,
      "loss": 0.0243,
      "step": 913540
    },
    {
      "epoch": 1.4950609768072112,
      "grad_norm": 0.5667661428451538,
      "learning_rate": 7.056067684481726e-06,
      "loss": 0.0183,
      "step": 913560
    },
    {
      "epoch": 1.4950937072458645,
      "grad_norm": 0.9767170548439026,
      "learning_rate": 7.0560017922682075e-06,
      "loss": 0.0197,
      "step": 913580
    },
    {
      "epoch": 1.495126437684518,
      "grad_norm": 0.8590610027313232,
      "learning_rate": 7.055935900054691e-06,
      "loss": 0.0327,
      "step": 913600
    },
    {
      "epoch": 1.495159168123171,
      "grad_norm": 0.13157466053962708,
      "learning_rate": 7.055870007841174e-06,
      "loss": 0.018,
      "step": 913620
    },
    {
      "epoch": 1.4951918985618247,
      "grad_norm": 0.460534006357193,
      "learning_rate": 7.0558041156276566e-06,
      "loss": 0.0248,
      "step": 913640
    },
    {
      "epoch": 1.4952246290004778,
      "grad_norm": 0.34980452060699463,
      "learning_rate": 7.05573822341414e-06,
      "loss": 0.016,
      "step": 913660
    },
    {
      "epoch": 1.4952573594391312,
      "grad_norm": 0.7346013188362122,
      "learning_rate": 7.055672331200623e-06,
      "loss": 0.0191,
      "step": 913680
    },
    {
      "epoch": 1.4952900898777846,
      "grad_norm": 0.8144055008888245,
      "learning_rate": 7.055606438987106e-06,
      "loss": 0.0204,
      "step": 913700
    },
    {
      "epoch": 1.495322820316438,
      "grad_norm": 0.5705785751342773,
      "learning_rate": 7.055540546773588e-06,
      "loss": 0.0307,
      "step": 913720
    },
    {
      "epoch": 1.4953555507550913,
      "grad_norm": 0.3200722932815552,
      "learning_rate": 7.055474654560072e-06,
      "loss": 0.021,
      "step": 913740
    },
    {
      "epoch": 1.4953882811937445,
      "grad_norm": 2.046790838241577,
      "learning_rate": 7.055408762346554e-06,
      "loss": 0.0259,
      "step": 913760
    },
    {
      "epoch": 1.4954210116323978,
      "grad_norm": 0.6552272439002991,
      "learning_rate": 7.0553428701330374e-06,
      "loss": 0.0176,
      "step": 913780
    },
    {
      "epoch": 1.4954537420710512,
      "grad_norm": 0.5310255885124207,
      "learning_rate": 7.055276977919519e-06,
      "loss": 0.0175,
      "step": 913800
    },
    {
      "epoch": 1.4954864725097046,
      "grad_norm": 0.6377956867218018,
      "learning_rate": 7.055211085706003e-06,
      "loss": 0.0178,
      "step": 913820
    },
    {
      "epoch": 1.495519202948358,
      "grad_norm": 1.0456011295318604,
      "learning_rate": 7.055145193492486e-06,
      "loss": 0.0202,
      "step": 913840
    },
    {
      "epoch": 1.4955519333870113,
      "grad_norm": 0.6472837924957275,
      "learning_rate": 7.055079301278968e-06,
      "loss": 0.0202,
      "step": 913860
    },
    {
      "epoch": 1.4955846638256647,
      "grad_norm": 0.34282079339027405,
      "learning_rate": 7.055013409065451e-06,
      "loss": 0.0205,
      "step": 913880
    },
    {
      "epoch": 1.4956173942643178,
      "grad_norm": 0.33235958218574524,
      "learning_rate": 7.054947516851935e-06,
      "loss": 0.0232,
      "step": 913900
    },
    {
      "epoch": 1.4956501247029712,
      "grad_norm": 0.3079007863998413,
      "learning_rate": 7.054881624638417e-06,
      "loss": 0.0124,
      "step": 913920
    },
    {
      "epoch": 1.4956828551416246,
      "grad_norm": 0.9461749792098999,
      "learning_rate": 7.0548157324249e-06,
      "loss": 0.0132,
      "step": 913940
    },
    {
      "epoch": 1.495715585580278,
      "grad_norm": 1.0264729261398315,
      "learning_rate": 7.054749840211382e-06,
      "loss": 0.0203,
      "step": 913960
    },
    {
      "epoch": 1.4957483160189313,
      "grad_norm": 0.5725237727165222,
      "learning_rate": 7.054683947997866e-06,
      "loss": 0.0228,
      "step": 913980
    },
    {
      "epoch": 1.4957810464575847,
      "grad_norm": 0.19045034050941467,
      "learning_rate": 7.054618055784348e-06,
      "loss": 0.0191,
      "step": 914000
    },
    {
      "epoch": 1.495813776896238,
      "grad_norm": 0.21653859317302704,
      "learning_rate": 7.054552163570831e-06,
      "loss": 0.0192,
      "step": 914020
    },
    {
      "epoch": 1.4958465073348912,
      "grad_norm": 0.8218405246734619,
      "learning_rate": 7.054486271357315e-06,
      "loss": 0.022,
      "step": 914040
    },
    {
      "epoch": 1.4958792377735446,
      "grad_norm": 0.6410928964614868,
      "learning_rate": 7.0544203791437975e-06,
      "loss": 0.0214,
      "step": 914060
    },
    {
      "epoch": 1.495911968212198,
      "grad_norm": 0.25483861565589905,
      "learning_rate": 7.05435448693028e-06,
      "loss": 0.0229,
      "step": 914080
    },
    {
      "epoch": 1.4959446986508513,
      "grad_norm": 0.877170979976654,
      "learning_rate": 7.054288594716763e-06,
      "loss": 0.0235,
      "step": 914100
    },
    {
      "epoch": 1.4959774290895047,
      "grad_norm": 0.7561759948730469,
      "learning_rate": 7.0542227025032465e-06,
      "loss": 0.0174,
      "step": 914120
    },
    {
      "epoch": 1.496010159528158,
      "grad_norm": 0.7944536805152893,
      "learning_rate": 7.0541568102897284e-06,
      "loss": 0.0181,
      "step": 914140
    },
    {
      "epoch": 1.4960428899668115,
      "grad_norm": 1.1232470273971558,
      "learning_rate": 7.054090918076212e-06,
      "loss": 0.0224,
      "step": 914160
    },
    {
      "epoch": 1.4960756204054646,
      "grad_norm": 0.41925889253616333,
      "learning_rate": 7.054025025862694e-06,
      "loss": 0.0282,
      "step": 914180
    },
    {
      "epoch": 1.496108350844118,
      "grad_norm": 1.5783331394195557,
      "learning_rate": 7.0539591336491775e-06,
      "loss": 0.0205,
      "step": 914200
    },
    {
      "epoch": 1.4961410812827713,
      "grad_norm": 0.09677474945783615,
      "learning_rate": 7.053893241435659e-06,
      "loss": 0.0154,
      "step": 914220
    },
    {
      "epoch": 1.4961738117214247,
      "grad_norm": 0.3357403576374054,
      "learning_rate": 7.053827349222143e-06,
      "loss": 0.0164,
      "step": 914240
    },
    {
      "epoch": 1.496206542160078,
      "grad_norm": 4.8939313888549805,
      "learning_rate": 7.053761457008626e-06,
      "loss": 0.0204,
      "step": 914260
    },
    {
      "epoch": 1.4962392725987312,
      "grad_norm": 1.233119010925293,
      "learning_rate": 7.0536955647951085e-06,
      "loss": 0.0257,
      "step": 914280
    },
    {
      "epoch": 1.4962720030373848,
      "grad_norm": 0.7662948369979858,
      "learning_rate": 7.053629672581591e-06,
      "loss": 0.0261,
      "step": 914300
    },
    {
      "epoch": 1.496304733476038,
      "grad_norm": 0.30149877071380615,
      "learning_rate": 7.053563780368075e-06,
      "loss": 0.0132,
      "step": 914320
    },
    {
      "epoch": 1.4963374639146914,
      "grad_norm": 0.643913984298706,
      "learning_rate": 7.053497888154557e-06,
      "loss": 0.024,
      "step": 914340
    },
    {
      "epoch": 1.4963701943533447,
      "grad_norm": 0.6423505544662476,
      "learning_rate": 7.05343199594104e-06,
      "loss": 0.0172,
      "step": 914360
    },
    {
      "epoch": 1.496402924791998,
      "grad_norm": 0.5186794400215149,
      "learning_rate": 7.053366103727524e-06,
      "loss": 0.0277,
      "step": 914380
    },
    {
      "epoch": 1.4964356552306515,
      "grad_norm": 0.9393912553787231,
      "learning_rate": 7.053300211514006e-06,
      "loss": 0.0234,
      "step": 914400
    },
    {
      "epoch": 1.4964683856693046,
      "grad_norm": 0.42055338621139526,
      "learning_rate": 7.053234319300489e-06,
      "loss": 0.0216,
      "step": 914420
    },
    {
      "epoch": 1.496501116107958,
      "grad_norm": 0.32514822483062744,
      "learning_rate": 7.053168427086971e-06,
      "loss": 0.0192,
      "step": 914440
    },
    {
      "epoch": 1.4965338465466114,
      "grad_norm": 1.4754687547683716,
      "learning_rate": 7.053102534873455e-06,
      "loss": 0.0209,
      "step": 914460
    },
    {
      "epoch": 1.4965665769852647,
      "grad_norm": 0.7193472385406494,
      "learning_rate": 7.0530366426599376e-06,
      "loss": 0.0164,
      "step": 914480
    },
    {
      "epoch": 1.4965993074239181,
      "grad_norm": 0.8762431740760803,
      "learning_rate": 7.05297075044642e-06,
      "loss": 0.0257,
      "step": 914500
    },
    {
      "epoch": 1.4966320378625715,
      "grad_norm": 0.4140327274799347,
      "learning_rate": 7.052904858232903e-06,
      "loss": 0.0172,
      "step": 914520
    },
    {
      "epoch": 1.4966647683012249,
      "grad_norm": 0.46494925022125244,
      "learning_rate": 7.052838966019387e-06,
      "loss": 0.0223,
      "step": 914540
    },
    {
      "epoch": 1.496697498739878,
      "grad_norm": 0.7828584313392639,
      "learning_rate": 7.0527730738058685e-06,
      "loss": 0.0265,
      "step": 914560
    },
    {
      "epoch": 1.4967302291785314,
      "grad_norm": 0.4244723618030548,
      "learning_rate": 7.052707181592352e-06,
      "loss": 0.0265,
      "step": 914580
    },
    {
      "epoch": 1.4967629596171848,
      "grad_norm": 0.8154252171516418,
      "learning_rate": 7.052641289378834e-06,
      "loss": 0.0193,
      "step": 914600
    },
    {
      "epoch": 1.4967956900558381,
      "grad_norm": 0.5980097651481628,
      "learning_rate": 7.052575397165318e-06,
      "loss": 0.0209,
      "step": 914620
    },
    {
      "epoch": 1.4968284204944915,
      "grad_norm": 0.16730046272277832,
      "learning_rate": 7.0525095049518e-06,
      "loss": 0.023,
      "step": 914640
    },
    {
      "epoch": 1.4968611509331449,
      "grad_norm": 0.47093528509140015,
      "learning_rate": 7.052443612738283e-06,
      "loss": 0.0134,
      "step": 914660
    },
    {
      "epoch": 1.4968938813717982,
      "grad_norm": 0.2982294261455536,
      "learning_rate": 7.052377720524766e-06,
      "loss": 0.0197,
      "step": 914680
    },
    {
      "epoch": 1.4969266118104514,
      "grad_norm": 1.605211615562439,
      "learning_rate": 7.052311828311249e-06,
      "loss": 0.0171,
      "step": 914700
    },
    {
      "epoch": 1.4969593422491048,
      "grad_norm": 0.763221025466919,
      "learning_rate": 7.052245936097732e-06,
      "loss": 0.0266,
      "step": 914720
    },
    {
      "epoch": 1.4969920726877581,
      "grad_norm": 0.36243101954460144,
      "learning_rate": 7.052180043884215e-06,
      "loss": 0.0254,
      "step": 914740
    },
    {
      "epoch": 1.4970248031264115,
      "grad_norm": 0.8221906423568726,
      "learning_rate": 7.0521141516706985e-06,
      "loss": 0.0308,
      "step": 914760
    },
    {
      "epoch": 1.4970575335650649,
      "grad_norm": 0.05293760076165199,
      "learning_rate": 7.05204825945718e-06,
      "loss": 0.0181,
      "step": 914780
    },
    {
      "epoch": 1.4970902640037183,
      "grad_norm": 0.3130773603916168,
      "learning_rate": 7.051982367243664e-06,
      "loss": 0.024,
      "step": 914800
    },
    {
      "epoch": 1.4971229944423716,
      "grad_norm": 1.4397610425949097,
      "learning_rate": 7.051916475030146e-06,
      "loss": 0.0169,
      "step": 914820
    },
    {
      "epoch": 1.4971557248810248,
      "grad_norm": 0.3389774262905121,
      "learning_rate": 7.051850582816629e-06,
      "loss": 0.0212,
      "step": 914840
    },
    {
      "epoch": 1.4971884553196781,
      "grad_norm": 1.6055326461791992,
      "learning_rate": 7.051784690603112e-06,
      "loss": 0.0179,
      "step": 914860
    },
    {
      "epoch": 1.4972211857583315,
      "grad_norm": 0.14262890815734863,
      "learning_rate": 7.051718798389595e-06,
      "loss": 0.02,
      "step": 914880
    },
    {
      "epoch": 1.497253916196985,
      "grad_norm": 0.20065298676490784,
      "learning_rate": 7.051652906176078e-06,
      "loss": 0.0214,
      "step": 914900
    },
    {
      "epoch": 1.4972866466356383,
      "grad_norm": 0.4341832995414734,
      "learning_rate": 7.051587013962561e-06,
      "loss": 0.0127,
      "step": 914920
    },
    {
      "epoch": 1.4973193770742914,
      "grad_norm": 1.1546741724014282,
      "learning_rate": 7.051521121749043e-06,
      "loss": 0.0265,
      "step": 914940
    },
    {
      "epoch": 1.497352107512945,
      "grad_norm": 0.8851633071899414,
      "learning_rate": 7.051455229535527e-06,
      "loss": 0.0224,
      "step": 914960
    },
    {
      "epoch": 1.4973848379515982,
      "grad_norm": 0.18169161677360535,
      "learning_rate": 7.051389337322009e-06,
      "loss": 0.0246,
      "step": 914980
    },
    {
      "epoch": 1.4974175683902515,
      "grad_norm": 0.21372295916080475,
      "learning_rate": 7.051323445108492e-06,
      "loss": 0.026,
      "step": 915000
    },
    {
      "epoch": 1.497450298828905,
      "grad_norm": 1.7585556507110596,
      "learning_rate": 7.051257552894974e-06,
      "loss": 0.0184,
      "step": 915020
    },
    {
      "epoch": 1.4974830292675583,
      "grad_norm": 0.1791658103466034,
      "learning_rate": 7.051191660681458e-06,
      "loss": 0.0175,
      "step": 915040
    },
    {
      "epoch": 1.4975157597062116,
      "grad_norm": 0.7186046242713928,
      "learning_rate": 7.05112576846794e-06,
      "loss": 0.0157,
      "step": 915060
    },
    {
      "epoch": 1.4975484901448648,
      "grad_norm": 0.8089154958724976,
      "learning_rate": 7.051059876254424e-06,
      "loss": 0.0272,
      "step": 915080
    },
    {
      "epoch": 1.4975812205835184,
      "grad_norm": 0.428117573261261,
      "learning_rate": 7.050993984040907e-06,
      "loss": 0.0183,
      "step": 915100
    },
    {
      "epoch": 1.4976139510221715,
      "grad_norm": 1.2884397506713867,
      "learning_rate": 7.0509280918273895e-06,
      "loss": 0.0147,
      "step": 915120
    },
    {
      "epoch": 1.497646681460825,
      "grad_norm": 0.4735488295555115,
      "learning_rate": 7.050862199613873e-06,
      "loss": 0.0195,
      "step": 915140
    },
    {
      "epoch": 1.4976794118994783,
      "grad_norm": 2.321531295776367,
      "learning_rate": 7.050796307400355e-06,
      "loss": 0.021,
      "step": 915160
    },
    {
      "epoch": 1.4977121423381317,
      "grad_norm": 0.287175714969635,
      "learning_rate": 7.0507304151868385e-06,
      "loss": 0.0193,
      "step": 915180
    },
    {
      "epoch": 1.497744872776785,
      "grad_norm": 0.25429394841194153,
      "learning_rate": 7.05066452297332e-06,
      "loss": 0.018,
      "step": 915200
    },
    {
      "epoch": 1.4977776032154382,
      "grad_norm": 0.29144197702407837,
      "learning_rate": 7.050598630759804e-06,
      "loss": 0.0159,
      "step": 915220
    },
    {
      "epoch": 1.4978103336540916,
      "grad_norm": 1.8457614183425903,
      "learning_rate": 7.050532738546286e-06,
      "loss": 0.0194,
      "step": 915240
    },
    {
      "epoch": 1.497843064092745,
      "grad_norm": 0.7100405097007751,
      "learning_rate": 7.0504668463327695e-06,
      "loss": 0.0214,
      "step": 915260
    },
    {
      "epoch": 1.4978757945313983,
      "grad_norm": 0.5768747925758362,
      "learning_rate": 7.050400954119252e-06,
      "loss": 0.0229,
      "step": 915280
    },
    {
      "epoch": 1.4979085249700517,
      "grad_norm": 0.5353636741638184,
      "learning_rate": 7.050335061905735e-06,
      "loss": 0.0143,
      "step": 915300
    },
    {
      "epoch": 1.497941255408705,
      "grad_norm": 0.9905458092689514,
      "learning_rate": 7.050269169692218e-06,
      "loss": 0.0208,
      "step": 915320
    },
    {
      "epoch": 1.4979739858473584,
      "grad_norm": 0.7744380831718445,
      "learning_rate": 7.050203277478701e-06,
      "loss": 0.0216,
      "step": 915340
    },
    {
      "epoch": 1.4980067162860116,
      "grad_norm": 0.4635882079601288,
      "learning_rate": 7.050137385265183e-06,
      "loss": 0.0167,
      "step": 915360
    },
    {
      "epoch": 1.498039446724665,
      "grad_norm": 0.44152700901031494,
      "learning_rate": 7.050071493051667e-06,
      "loss": 0.0263,
      "step": 915380
    },
    {
      "epoch": 1.4980721771633183,
      "grad_norm": 0.5291218757629395,
      "learning_rate": 7.050005600838149e-06,
      "loss": 0.0219,
      "step": 915400
    },
    {
      "epoch": 1.4981049076019717,
      "grad_norm": 0.49780014157295227,
      "learning_rate": 7.049939708624632e-06,
      "loss": 0.0189,
      "step": 915420
    },
    {
      "epoch": 1.498137638040625,
      "grad_norm": 0.7901545763015747,
      "learning_rate": 7.049873816411116e-06,
      "loss": 0.0302,
      "step": 915440
    },
    {
      "epoch": 1.4981703684792784,
      "grad_norm": 0.48896610736846924,
      "learning_rate": 7.049807924197598e-06,
      "loss": 0.0244,
      "step": 915460
    },
    {
      "epoch": 1.4982030989179318,
      "grad_norm": 0.5869632363319397,
      "learning_rate": 7.049742031984081e-06,
      "loss": 0.0208,
      "step": 915480
    },
    {
      "epoch": 1.498235829356585,
      "grad_norm": 0.07760059088468552,
      "learning_rate": 7.049676139770564e-06,
      "loss": 0.0167,
      "step": 915500
    },
    {
      "epoch": 1.4982685597952383,
      "grad_norm": 0.546448290348053,
      "learning_rate": 7.049610247557047e-06,
      "loss": 0.025,
      "step": 915520
    },
    {
      "epoch": 1.4983012902338917,
      "grad_norm": 0.4083958864212036,
      "learning_rate": 7.0495443553435295e-06,
      "loss": 0.0147,
      "step": 915540
    },
    {
      "epoch": 1.498334020672545,
      "grad_norm": 0.4184628129005432,
      "learning_rate": 7.049478463130013e-06,
      "loss": 0.02,
      "step": 915560
    },
    {
      "epoch": 1.4983667511111984,
      "grad_norm": 0.27889201045036316,
      "learning_rate": 7.049412570916495e-06,
      "loss": 0.0204,
      "step": 915580
    },
    {
      "epoch": 1.4983994815498518,
      "grad_norm": 0.5963493585586548,
      "learning_rate": 7.049346678702979e-06,
      "loss": 0.0221,
      "step": 915600
    },
    {
      "epoch": 1.4984322119885052,
      "grad_norm": 0.6885340213775635,
      "learning_rate": 7.0492807864894605e-06,
      "loss": 0.0249,
      "step": 915620
    },
    {
      "epoch": 1.4984649424271583,
      "grad_norm": 0.6263629794120789,
      "learning_rate": 7.049214894275944e-06,
      "loss": 0.0184,
      "step": 915640
    },
    {
      "epoch": 1.4984976728658117,
      "grad_norm": 0.8898861408233643,
      "learning_rate": 7.049149002062427e-06,
      "loss": 0.0273,
      "step": 915660
    },
    {
      "epoch": 1.498530403304465,
      "grad_norm": 1.1242579221725464,
      "learning_rate": 7.0490831098489096e-06,
      "loss": 0.022,
      "step": 915680
    },
    {
      "epoch": 1.4985631337431184,
      "grad_norm": 0.4370165169239044,
      "learning_rate": 7.049017217635392e-06,
      "loss": 0.0189,
      "step": 915700
    },
    {
      "epoch": 1.4985958641817718,
      "grad_norm": 0.273449569940567,
      "learning_rate": 7.048951325421876e-06,
      "loss": 0.0186,
      "step": 915720
    },
    {
      "epoch": 1.498628594620425,
      "grad_norm": 0.0980365052819252,
      "learning_rate": 7.048885433208358e-06,
      "loss": 0.0194,
      "step": 915740
    },
    {
      "epoch": 1.4986613250590786,
      "grad_norm": 0.172247052192688,
      "learning_rate": 7.048819540994841e-06,
      "loss": 0.0142,
      "step": 915760
    },
    {
      "epoch": 1.4986940554977317,
      "grad_norm": 0.6241280436515808,
      "learning_rate": 7.048753648781325e-06,
      "loss": 0.0166,
      "step": 915780
    },
    {
      "epoch": 1.498726785936385,
      "grad_norm": 0.2245423048734665,
      "learning_rate": 7.048687756567807e-06,
      "loss": 0.0149,
      "step": 915800
    },
    {
      "epoch": 1.4987595163750385,
      "grad_norm": 0.5628464818000793,
      "learning_rate": 7.0486218643542904e-06,
      "loss": 0.0182,
      "step": 915820
    },
    {
      "epoch": 1.4987922468136918,
      "grad_norm": 0.892379641532898,
      "learning_rate": 7.048555972140772e-06,
      "loss": 0.0233,
      "step": 915840
    },
    {
      "epoch": 1.4988249772523452,
      "grad_norm": 0.16026091575622559,
      "learning_rate": 7.048490079927256e-06,
      "loss": 0.0162,
      "step": 915860
    },
    {
      "epoch": 1.4988577076909984,
      "grad_norm": 0.18923495709896088,
      "learning_rate": 7.048424187713739e-06,
      "loss": 0.0156,
      "step": 915880
    },
    {
      "epoch": 1.498890438129652,
      "grad_norm": 0.7456740140914917,
      "learning_rate": 7.048358295500221e-06,
      "loss": 0.0241,
      "step": 915900
    },
    {
      "epoch": 1.498923168568305,
      "grad_norm": 1.9393130540847778,
      "learning_rate": 7.048292403286704e-06,
      "loss": 0.0131,
      "step": 915920
    },
    {
      "epoch": 1.4989558990069585,
      "grad_norm": 0.43125930428504944,
      "learning_rate": 7.048226511073188e-06,
      "loss": 0.0291,
      "step": 915940
    },
    {
      "epoch": 1.4989886294456118,
      "grad_norm": 0.6601101756095886,
      "learning_rate": 7.04816061885967e-06,
      "loss": 0.0215,
      "step": 915960
    },
    {
      "epoch": 1.4990213598842652,
      "grad_norm": 1.6024211645126343,
      "learning_rate": 7.048094726646153e-06,
      "loss": 0.0207,
      "step": 915980
    },
    {
      "epoch": 1.4990540903229186,
      "grad_norm": 0.5750027298927307,
      "learning_rate": 7.048028834432635e-06,
      "loss": 0.0234,
      "step": 916000
    },
    {
      "epoch": 1.4990868207615717,
      "grad_norm": 0.6440271139144897,
      "learning_rate": 7.047962942219119e-06,
      "loss": 0.024,
      "step": 916020
    },
    {
      "epoch": 1.499119551200225,
      "grad_norm": 0.2795012295246124,
      "learning_rate": 7.0478970500056006e-06,
      "loss": 0.019,
      "step": 916040
    },
    {
      "epoch": 1.4991522816388785,
      "grad_norm": 0.2611640989780426,
      "learning_rate": 7.047831157792084e-06,
      "loss": 0.0217,
      "step": 916060
    },
    {
      "epoch": 1.4991850120775319,
      "grad_norm": 0.5736986994743347,
      "learning_rate": 7.047765265578567e-06,
      "loss": 0.0156,
      "step": 916080
    },
    {
      "epoch": 1.4992177425161852,
      "grad_norm": 0.9632292985916138,
      "learning_rate": 7.04769937336505e-06,
      "loss": 0.0228,
      "step": 916100
    },
    {
      "epoch": 1.4992504729548386,
      "grad_norm": 0.6113728284835815,
      "learning_rate": 7.047633481151533e-06,
      "loss": 0.02,
      "step": 916120
    },
    {
      "epoch": 1.499283203393492,
      "grad_norm": 0.7430142760276794,
      "learning_rate": 7.047567588938016e-06,
      "loss": 0.0209,
      "step": 916140
    },
    {
      "epoch": 1.4993159338321451,
      "grad_norm": 0.7874623537063599,
      "learning_rate": 7.0475016967244996e-06,
      "loss": 0.018,
      "step": 916160
    },
    {
      "epoch": 1.4993486642707985,
      "grad_norm": 0.33628660440444946,
      "learning_rate": 7.0474358045109814e-06,
      "loss": 0.0184,
      "step": 916180
    },
    {
      "epoch": 1.4993813947094519,
      "grad_norm": 1.333761215209961,
      "learning_rate": 7.047369912297465e-06,
      "loss": 0.0202,
      "step": 916200
    },
    {
      "epoch": 1.4994141251481052,
      "grad_norm": 0.31546640396118164,
      "learning_rate": 7.047304020083947e-06,
      "loss": 0.0333,
      "step": 916220
    },
    {
      "epoch": 1.4994468555867586,
      "grad_norm": 0.9906522035598755,
      "learning_rate": 7.0472381278704305e-06,
      "loss": 0.0269,
      "step": 916240
    },
    {
      "epoch": 1.499479586025412,
      "grad_norm": 0.1216200515627861,
      "learning_rate": 7.047172235656912e-06,
      "loss": 0.0197,
      "step": 916260
    },
    {
      "epoch": 1.4995123164640654,
      "grad_norm": 2.5299062728881836,
      "learning_rate": 7.047106343443396e-06,
      "loss": 0.0196,
      "step": 916280
    },
    {
      "epoch": 1.4995450469027185,
      "grad_norm": 6.483554840087891,
      "learning_rate": 7.047040451229879e-06,
      "loss": 0.0211,
      "step": 916300
    },
    {
      "epoch": 1.4995777773413719,
      "grad_norm": 0.23437970876693726,
      "learning_rate": 7.0469745590163615e-06,
      "loss": 0.0291,
      "step": 916320
    },
    {
      "epoch": 1.4996105077800252,
      "grad_norm": 0.395572304725647,
      "learning_rate": 7.046908666802844e-06,
      "loss": 0.0211,
      "step": 916340
    },
    {
      "epoch": 1.4996432382186786,
      "grad_norm": 1.6155632734298706,
      "learning_rate": 7.046842774589328e-06,
      "loss": 0.0198,
      "step": 916360
    },
    {
      "epoch": 1.499675968657332,
      "grad_norm": 0.42253947257995605,
      "learning_rate": 7.04677688237581e-06,
      "loss": 0.0176,
      "step": 916380
    },
    {
      "epoch": 1.4997086990959854,
      "grad_norm": 0.08868449926376343,
      "learning_rate": 7.046710990162293e-06,
      "loss": 0.0149,
      "step": 916400
    },
    {
      "epoch": 1.4997414295346387,
      "grad_norm": 0.7435009479522705,
      "learning_rate": 7.046645097948775e-06,
      "loss": 0.0205,
      "step": 916420
    },
    {
      "epoch": 1.4997741599732919,
      "grad_norm": 0.30019962787628174,
      "learning_rate": 7.046579205735259e-06,
      "loss": 0.02,
      "step": 916440
    },
    {
      "epoch": 1.4998068904119453,
      "grad_norm": 1.0641262531280518,
      "learning_rate": 7.0465133135217415e-06,
      "loss": 0.0221,
      "step": 916460
    },
    {
      "epoch": 1.4998396208505986,
      "grad_norm": 0.4054313004016876,
      "learning_rate": 7.046447421308224e-06,
      "loss": 0.0122,
      "step": 916480
    },
    {
      "epoch": 1.499872351289252,
      "grad_norm": 0.4497460722923279,
      "learning_rate": 7.046381529094708e-06,
      "loss": 0.0262,
      "step": 916500
    },
    {
      "epoch": 1.4999050817279054,
      "grad_norm": 0.22127723693847656,
      "learning_rate": 7.0463156368811906e-06,
      "loss": 0.0257,
      "step": 916520
    },
    {
      "epoch": 1.4999378121665585,
      "grad_norm": 0.41379314661026,
      "learning_rate": 7.046249744667673e-06,
      "loss": 0.025,
      "step": 916540
    },
    {
      "epoch": 1.4999705426052121,
      "grad_norm": 1.241970419883728,
      "learning_rate": 7.046183852454156e-06,
      "loss": 0.0208,
      "step": 916560
    },
    {
      "epoch": 1.5000032730438653,
      "grad_norm": 0.9754148125648499,
      "learning_rate": 7.04611796024064e-06,
      "loss": 0.0164,
      "step": 916580
    },
    {
      "epoch": 1.5000360034825186,
      "grad_norm": 0.6548131108283997,
      "learning_rate": 7.0460520680271215e-06,
      "loss": 0.0195,
      "step": 916600
    },
    {
      "epoch": 1.500068733921172,
      "grad_norm": 0.6245211362838745,
      "learning_rate": 7.045986175813605e-06,
      "loss": 0.0215,
      "step": 916620
    },
    {
      "epoch": 1.5001014643598254,
      "grad_norm": 0.44311779737472534,
      "learning_rate": 7.045920283600087e-06,
      "loss": 0.0214,
      "step": 916640
    },
    {
      "epoch": 1.5001341947984788,
      "grad_norm": 0.59539794921875,
      "learning_rate": 7.045854391386571e-06,
      "loss": 0.0176,
      "step": 916660
    },
    {
      "epoch": 1.500166925237132,
      "grad_norm": 0.39386335015296936,
      "learning_rate": 7.045788499173053e-06,
      "loss": 0.0256,
      "step": 916680
    },
    {
      "epoch": 1.5001996556757855,
      "grad_norm": 0.5028201341629028,
      "learning_rate": 7.045722606959536e-06,
      "loss": 0.0189,
      "step": 916700
    },
    {
      "epoch": 1.5002323861144387,
      "grad_norm": 0.15745870769023895,
      "learning_rate": 7.045656714746019e-06,
      "loss": 0.0249,
      "step": 916720
    },
    {
      "epoch": 1.500265116553092,
      "grad_norm": 0.27298563718795776,
      "learning_rate": 7.045590822532502e-06,
      "loss": 0.0244,
      "step": 916740
    },
    {
      "epoch": 1.5002978469917454,
      "grad_norm": 0.5867055654525757,
      "learning_rate": 7.045524930318984e-06,
      "loss": 0.0127,
      "step": 916760
    },
    {
      "epoch": 1.5003305774303988,
      "grad_norm": 3.123699903488159,
      "learning_rate": 7.045459038105468e-06,
      "loss": 0.0217,
      "step": 916780
    },
    {
      "epoch": 1.5003633078690521,
      "grad_norm": 1.4323761463165283,
      "learning_rate": 7.04539314589195e-06,
      "loss": 0.0175,
      "step": 916800
    },
    {
      "epoch": 1.5003960383077053,
      "grad_norm": 0.7726099491119385,
      "learning_rate": 7.045327253678433e-06,
      "loss": 0.0237,
      "step": 916820
    },
    {
      "epoch": 1.5004287687463589,
      "grad_norm": 0.42657291889190674,
      "learning_rate": 7.045261361464917e-06,
      "loss": 0.0271,
      "step": 916840
    },
    {
      "epoch": 1.500461499185012,
      "grad_norm": 0.8826831579208374,
      "learning_rate": 7.045195469251399e-06,
      "loss": 0.0232,
      "step": 916860
    },
    {
      "epoch": 1.5004942296236654,
      "grad_norm": 0.3735767900943756,
      "learning_rate": 7.045129577037882e-06,
      "loss": 0.0177,
      "step": 916880
    },
    {
      "epoch": 1.5005269600623188,
      "grad_norm": 1.4264994859695435,
      "learning_rate": 7.045063684824365e-06,
      "loss": 0.0188,
      "step": 916900
    },
    {
      "epoch": 1.500559690500972,
      "grad_norm": 2.6623799800872803,
      "learning_rate": 7.044997792610848e-06,
      "loss": 0.0217,
      "step": 916920
    },
    {
      "epoch": 1.5005924209396255,
      "grad_norm": 0.19420774281024933,
      "learning_rate": 7.044931900397331e-06,
      "loss": 0.0175,
      "step": 916940
    },
    {
      "epoch": 1.5006251513782787,
      "grad_norm": 0.719967782497406,
      "learning_rate": 7.044866008183814e-06,
      "loss": 0.0177,
      "step": 916960
    },
    {
      "epoch": 1.5006578818169323,
      "grad_norm": 0.46512535214424133,
      "learning_rate": 7.044800115970296e-06,
      "loss": 0.0214,
      "step": 916980
    },
    {
      "epoch": 1.5006906122555854,
      "grad_norm": 1.2939090728759766,
      "learning_rate": 7.04473422375678e-06,
      "loss": 0.0273,
      "step": 917000
    },
    {
      "epoch": 1.5007233426942388,
      "grad_norm": 2.704883575439453,
      "learning_rate": 7.044668331543262e-06,
      "loss": 0.0213,
      "step": 917020
    },
    {
      "epoch": 1.5007560731328922,
      "grad_norm": 1.9185991287231445,
      "learning_rate": 7.044602439329745e-06,
      "loss": 0.0237,
      "step": 917040
    },
    {
      "epoch": 1.5007888035715453,
      "grad_norm": 0.4928431510925293,
      "learning_rate": 7.044536547116227e-06,
      "loss": 0.0237,
      "step": 917060
    },
    {
      "epoch": 1.500821534010199,
      "grad_norm": 0.9007791876792908,
      "learning_rate": 7.044470654902711e-06,
      "loss": 0.015,
      "step": 917080
    },
    {
      "epoch": 1.500854264448852,
      "grad_norm": 0.27403023838996887,
      "learning_rate": 7.044404762689193e-06,
      "loss": 0.0189,
      "step": 917100
    },
    {
      "epoch": 1.5008869948875054,
      "grad_norm": 0.7582165002822876,
      "learning_rate": 7.044338870475676e-06,
      "loss": 0.0221,
      "step": 917120
    },
    {
      "epoch": 1.5009197253261588,
      "grad_norm": 0.3243260383605957,
      "learning_rate": 7.044272978262159e-06,
      "loss": 0.0273,
      "step": 917140
    },
    {
      "epoch": 1.5009524557648122,
      "grad_norm": 0.36423060297966003,
      "learning_rate": 7.0442070860486425e-06,
      "loss": 0.0151,
      "step": 917160
    },
    {
      "epoch": 1.5009851862034655,
      "grad_norm": 1.1369832754135132,
      "learning_rate": 7.044141193835125e-06,
      "loss": 0.0212,
      "step": 917180
    },
    {
      "epoch": 1.5010179166421187,
      "grad_norm": 0.3974062502384186,
      "learning_rate": 7.044075301621608e-06,
      "loss": 0.0208,
      "step": 917200
    },
    {
      "epoch": 1.5010506470807723,
      "grad_norm": 0.38173216581344604,
      "learning_rate": 7.0440094094080915e-06,
      "loss": 0.0165,
      "step": 917220
    },
    {
      "epoch": 1.5010833775194254,
      "grad_norm": 0.2915666103363037,
      "learning_rate": 7.0439435171945734e-06,
      "loss": 0.0169,
      "step": 917240
    },
    {
      "epoch": 1.5011161079580788,
      "grad_norm": 0.7089366912841797,
      "learning_rate": 7.043877624981057e-06,
      "loss": 0.0131,
      "step": 917260
    },
    {
      "epoch": 1.5011488383967322,
      "grad_norm": 0.4068703353404999,
      "learning_rate": 7.043811732767539e-06,
      "loss": 0.018,
      "step": 917280
    },
    {
      "epoch": 1.5011815688353856,
      "grad_norm": 0.4149179458618164,
      "learning_rate": 7.0437458405540225e-06,
      "loss": 0.0218,
      "step": 917300
    },
    {
      "epoch": 1.501214299274039,
      "grad_norm": 0.6245595216751099,
      "learning_rate": 7.043679948340505e-06,
      "loss": 0.0256,
      "step": 917320
    },
    {
      "epoch": 1.501247029712692,
      "grad_norm": 0.5864402055740356,
      "learning_rate": 7.043614056126988e-06,
      "loss": 0.0159,
      "step": 917340
    },
    {
      "epoch": 1.5012797601513457,
      "grad_norm": 1.260832667350769,
      "learning_rate": 7.043548163913471e-06,
      "loss": 0.0176,
      "step": 917360
    },
    {
      "epoch": 1.5013124905899988,
      "grad_norm": 1.2367359399795532,
      "learning_rate": 7.043482271699954e-06,
      "loss": 0.0234,
      "step": 917380
    },
    {
      "epoch": 1.5013452210286522,
      "grad_norm": 0.4396716058254242,
      "learning_rate": 7.043416379486436e-06,
      "loss": 0.0223,
      "step": 917400
    },
    {
      "epoch": 1.5013779514673056,
      "grad_norm": 0.8148267269134521,
      "learning_rate": 7.04335048727292e-06,
      "loss": 0.0279,
      "step": 917420
    },
    {
      "epoch": 1.501410681905959,
      "grad_norm": 0.2878049910068512,
      "learning_rate": 7.043284595059402e-06,
      "loss": 0.0187,
      "step": 917440
    },
    {
      "epoch": 1.5014434123446123,
      "grad_norm": 0.21312564611434937,
      "learning_rate": 7.043218702845885e-06,
      "loss": 0.0172,
      "step": 917460
    },
    {
      "epoch": 1.5014761427832655,
      "grad_norm": 0.5346563458442688,
      "learning_rate": 7.043152810632368e-06,
      "loss": 0.0217,
      "step": 917480
    },
    {
      "epoch": 1.501508873221919,
      "grad_norm": 1.0327168703079224,
      "learning_rate": 7.043086918418851e-06,
      "loss": 0.0198,
      "step": 917500
    },
    {
      "epoch": 1.5015416036605722,
      "grad_norm": 0.39476436376571655,
      "learning_rate": 7.0430210262053335e-06,
      "loss": 0.0173,
      "step": 917520
    },
    {
      "epoch": 1.5015743340992256,
      "grad_norm": 0.5294507145881653,
      "learning_rate": 7.042955133991817e-06,
      "loss": 0.0194,
      "step": 917540
    },
    {
      "epoch": 1.501607064537879,
      "grad_norm": 0.2683712840080261,
      "learning_rate": 7.0428892417783e-06,
      "loss": 0.0113,
      "step": 917560
    },
    {
      "epoch": 1.5016397949765323,
      "grad_norm": 0.23637253046035767,
      "learning_rate": 7.0428233495647825e-06,
      "loss": 0.0265,
      "step": 917580
    },
    {
      "epoch": 1.5016725254151857,
      "grad_norm": 0.6289725303649902,
      "learning_rate": 7.042757457351266e-06,
      "loss": 0.0263,
      "step": 917600
    },
    {
      "epoch": 1.5017052558538388,
      "grad_norm": 1.0662578344345093,
      "learning_rate": 7.042691565137748e-06,
      "loss": 0.0335,
      "step": 917620
    },
    {
      "epoch": 1.5017379862924924,
      "grad_norm": 1.1667741537094116,
      "learning_rate": 7.042625672924232e-06,
      "loss": 0.021,
      "step": 917640
    },
    {
      "epoch": 1.5017707167311456,
      "grad_norm": 1.2130513191223145,
      "learning_rate": 7.0425597807107135e-06,
      "loss": 0.0213,
      "step": 917660
    },
    {
      "epoch": 1.501803447169799,
      "grad_norm": 0.4119100570678711,
      "learning_rate": 7.042493888497197e-06,
      "loss": 0.0186,
      "step": 917680
    },
    {
      "epoch": 1.5018361776084523,
      "grad_norm": 0.46163105964660645,
      "learning_rate": 7.04242799628368e-06,
      "loss": 0.0233,
      "step": 917700
    },
    {
      "epoch": 1.5018689080471055,
      "grad_norm": 0.5665512681007385,
      "learning_rate": 7.0423621040701626e-06,
      "loss": 0.0129,
      "step": 917720
    },
    {
      "epoch": 1.501901638485759,
      "grad_norm": 1.184203028678894,
      "learning_rate": 7.042296211856645e-06,
      "loss": 0.0193,
      "step": 917740
    },
    {
      "epoch": 1.5019343689244122,
      "grad_norm": 0.39857253432273865,
      "learning_rate": 7.042230319643129e-06,
      "loss": 0.0228,
      "step": 917760
    },
    {
      "epoch": 1.5019670993630658,
      "grad_norm": 0.7517591714859009,
      "learning_rate": 7.042164427429611e-06,
      "loss": 0.0228,
      "step": 917780
    },
    {
      "epoch": 1.501999829801719,
      "grad_norm": 0.5161007642745972,
      "learning_rate": 7.042098535216094e-06,
      "loss": 0.0138,
      "step": 917800
    },
    {
      "epoch": 1.5020325602403723,
      "grad_norm": 0.5986030697822571,
      "learning_rate": 7.042032643002576e-06,
      "loss": 0.0211,
      "step": 917820
    },
    {
      "epoch": 1.5020652906790257,
      "grad_norm": 0.2868979573249817,
      "learning_rate": 7.04196675078906e-06,
      "loss": 0.0155,
      "step": 917840
    },
    {
      "epoch": 1.5020980211176789,
      "grad_norm": 1.1365151405334473,
      "learning_rate": 7.041900858575543e-06,
      "loss": 0.0234,
      "step": 917860
    },
    {
      "epoch": 1.5021307515563325,
      "grad_norm": 0.5505477786064148,
      "learning_rate": 7.041834966362025e-06,
      "loss": 0.0264,
      "step": 917880
    },
    {
      "epoch": 1.5021634819949856,
      "grad_norm": 0.3862190842628479,
      "learning_rate": 7.041769074148509e-06,
      "loss": 0.0159,
      "step": 917900
    },
    {
      "epoch": 1.502196212433639,
      "grad_norm": 1.0645482540130615,
      "learning_rate": 7.041703181934992e-06,
      "loss": 0.0192,
      "step": 917920
    },
    {
      "epoch": 1.5022289428722924,
      "grad_norm": 0.1733519732952118,
      "learning_rate": 7.041637289721474e-06,
      "loss": 0.0222,
      "step": 917940
    },
    {
      "epoch": 1.5022616733109457,
      "grad_norm": 0.8426298499107361,
      "learning_rate": 7.041571397507957e-06,
      "loss": 0.0233,
      "step": 917960
    },
    {
      "epoch": 1.502294403749599,
      "grad_norm": 0.49358421564102173,
      "learning_rate": 7.041505505294441e-06,
      "loss": 0.03,
      "step": 917980
    },
    {
      "epoch": 1.5023271341882523,
      "grad_norm": 0.31151941418647766,
      "learning_rate": 7.041439613080923e-06,
      "loss": 0.0315,
      "step": 918000
    },
    {
      "epoch": 1.5023598646269058,
      "grad_norm": 0.7858965396881104,
      "learning_rate": 7.041373720867406e-06,
      "loss": 0.0183,
      "step": 918020
    },
    {
      "epoch": 1.502392595065559,
      "grad_norm": 1.7672169208526611,
      "learning_rate": 7.041307828653888e-06,
      "loss": 0.0232,
      "step": 918040
    },
    {
      "epoch": 1.5024253255042124,
      "grad_norm": 0.778788685798645,
      "learning_rate": 7.041241936440372e-06,
      "loss": 0.0152,
      "step": 918060
    },
    {
      "epoch": 1.5024580559428657,
      "grad_norm": 0.3287280201911926,
      "learning_rate": 7.041176044226854e-06,
      "loss": 0.0247,
      "step": 918080
    },
    {
      "epoch": 1.5024907863815191,
      "grad_norm": 0.06318484246730804,
      "learning_rate": 7.041110152013337e-06,
      "loss": 0.022,
      "step": 918100
    },
    {
      "epoch": 1.5025235168201725,
      "grad_norm": 0.9914255738258362,
      "learning_rate": 7.04104425979982e-06,
      "loss": 0.0187,
      "step": 918120
    },
    {
      "epoch": 1.5025562472588256,
      "grad_norm": 1.3954815864562988,
      "learning_rate": 7.040978367586303e-06,
      "loss": 0.0252,
      "step": 918140
    },
    {
      "epoch": 1.5025889776974792,
      "grad_norm": 0.7609220743179321,
      "learning_rate": 7.040912475372785e-06,
      "loss": 0.0209,
      "step": 918160
    },
    {
      "epoch": 1.5026217081361324,
      "grad_norm": 1.1941596269607544,
      "learning_rate": 7.040846583159269e-06,
      "loss": 0.0312,
      "step": 918180
    },
    {
      "epoch": 1.5026544385747858,
      "grad_norm": 0.592910885810852,
      "learning_rate": 7.040780690945751e-06,
      "loss": 0.0151,
      "step": 918200
    },
    {
      "epoch": 1.5026871690134391,
      "grad_norm": 1.26373291015625,
      "learning_rate": 7.0407147987322345e-06,
      "loss": 0.0236,
      "step": 918220
    },
    {
      "epoch": 1.5027198994520925,
      "grad_norm": 0.2201288640499115,
      "learning_rate": 7.040648906518718e-06,
      "loss": 0.0208,
      "step": 918240
    },
    {
      "epoch": 1.5027526298907459,
      "grad_norm": 0.128986656665802,
      "learning_rate": 7.0405830143052e-06,
      "loss": 0.0194,
      "step": 918260
    },
    {
      "epoch": 1.502785360329399,
      "grad_norm": 0.14121392369270325,
      "learning_rate": 7.0405171220916835e-06,
      "loss": 0.019,
      "step": 918280
    },
    {
      "epoch": 1.5028180907680526,
      "grad_norm": 0.6955709457397461,
      "learning_rate": 7.040451229878165e-06,
      "loss": 0.0177,
      "step": 918300
    },
    {
      "epoch": 1.5028508212067058,
      "grad_norm": 0.573606014251709,
      "learning_rate": 7.040385337664649e-06,
      "loss": 0.0208,
      "step": 918320
    },
    {
      "epoch": 1.5028835516453591,
      "grad_norm": 1.691252589225769,
      "learning_rate": 7.040319445451132e-06,
      "loss": 0.0177,
      "step": 918340
    },
    {
      "epoch": 1.5029162820840125,
      "grad_norm": 1.0129801034927368,
      "learning_rate": 7.0402535532376145e-06,
      "loss": 0.0157,
      "step": 918360
    },
    {
      "epoch": 1.5029490125226659,
      "grad_norm": 0.4697554111480713,
      "learning_rate": 7.040187661024097e-06,
      "loss": 0.0233,
      "step": 918380
    },
    {
      "epoch": 1.5029817429613193,
      "grad_norm": 0.6096447110176086,
      "learning_rate": 7.040121768810581e-06,
      "loss": 0.0237,
      "step": 918400
    },
    {
      "epoch": 1.5030144733999724,
      "grad_norm": 0.5825293064117432,
      "learning_rate": 7.040055876597063e-06,
      "loss": 0.0172,
      "step": 918420
    },
    {
      "epoch": 1.503047203838626,
      "grad_norm": 1.4542288780212402,
      "learning_rate": 7.039989984383546e-06,
      "loss": 0.0164,
      "step": 918440
    },
    {
      "epoch": 1.5030799342772792,
      "grad_norm": 0.39182108640670776,
      "learning_rate": 7.039924092170028e-06,
      "loss": 0.0228,
      "step": 918460
    },
    {
      "epoch": 1.5031126647159325,
      "grad_norm": 0.5016008019447327,
      "learning_rate": 7.039858199956512e-06,
      "loss": 0.0193,
      "step": 918480
    },
    {
      "epoch": 1.503145395154586,
      "grad_norm": 0.25793230533599854,
      "learning_rate": 7.0397923077429945e-06,
      "loss": 0.015,
      "step": 918500
    },
    {
      "epoch": 1.503178125593239,
      "grad_norm": 2.6275689601898193,
      "learning_rate": 7.039726415529477e-06,
      "loss": 0.03,
      "step": 918520
    },
    {
      "epoch": 1.5032108560318926,
      "grad_norm": 0.6140351295471191,
      "learning_rate": 7.03966052331596e-06,
      "loss": 0.0258,
      "step": 918540
    },
    {
      "epoch": 1.5032435864705458,
      "grad_norm": 0.5316554307937622,
      "learning_rate": 7.0395946311024436e-06,
      "loss": 0.0214,
      "step": 918560
    },
    {
      "epoch": 1.5032763169091992,
      "grad_norm": 0.1901881992816925,
      "learning_rate": 7.0395287388889255e-06,
      "loss": 0.02,
      "step": 918580
    },
    {
      "epoch": 1.5033090473478525,
      "grad_norm": 0.254900187253952,
      "learning_rate": 7.039462846675409e-06,
      "loss": 0.0127,
      "step": 918600
    },
    {
      "epoch": 1.503341777786506,
      "grad_norm": 0.6254151463508606,
      "learning_rate": 7.039396954461893e-06,
      "loss": 0.0203,
      "step": 918620
    },
    {
      "epoch": 1.5033745082251593,
      "grad_norm": 0.47064006328582764,
      "learning_rate": 7.0393310622483745e-06,
      "loss": 0.027,
      "step": 918640
    },
    {
      "epoch": 1.5034072386638124,
      "grad_norm": 0.6447094082832336,
      "learning_rate": 7.039265170034858e-06,
      "loss": 0.0148,
      "step": 918660
    },
    {
      "epoch": 1.503439969102466,
      "grad_norm": 0.3534921109676361,
      "learning_rate": 7.03919927782134e-06,
      "loss": 0.0229,
      "step": 918680
    },
    {
      "epoch": 1.5034726995411192,
      "grad_norm": 0.3550783395767212,
      "learning_rate": 7.039133385607824e-06,
      "loss": 0.0196,
      "step": 918700
    },
    {
      "epoch": 1.5035054299797725,
      "grad_norm": 0.5425342321395874,
      "learning_rate": 7.039067493394306e-06,
      "loss": 0.0235,
      "step": 918720
    },
    {
      "epoch": 1.503538160418426,
      "grad_norm": 0.29386115074157715,
      "learning_rate": 7.039001601180789e-06,
      "loss": 0.0162,
      "step": 918740
    },
    {
      "epoch": 1.5035708908570793,
      "grad_norm": 0.2198815643787384,
      "learning_rate": 7.038935708967272e-06,
      "loss": 0.0211,
      "step": 918760
    },
    {
      "epoch": 1.5036036212957327,
      "grad_norm": 0.26021143794059753,
      "learning_rate": 7.038869816753755e-06,
      "loss": 0.0252,
      "step": 918780
    },
    {
      "epoch": 1.5036363517343858,
      "grad_norm": 0.3952769339084625,
      "learning_rate": 7.038803924540237e-06,
      "loss": 0.0229,
      "step": 918800
    },
    {
      "epoch": 1.5036690821730394,
      "grad_norm": 0.38094234466552734,
      "learning_rate": 7.038738032326721e-06,
      "loss": 0.0304,
      "step": 918820
    },
    {
      "epoch": 1.5037018126116926,
      "grad_norm": 1.4725680351257324,
      "learning_rate": 7.038672140113203e-06,
      "loss": 0.0217,
      "step": 918840
    },
    {
      "epoch": 1.503734543050346,
      "grad_norm": 3.4059741497039795,
      "learning_rate": 7.038606247899686e-06,
      "loss": 0.0328,
      "step": 918860
    },
    {
      "epoch": 1.5037672734889993,
      "grad_norm": 1.165859580039978,
      "learning_rate": 7.038540355686168e-06,
      "loss": 0.0186,
      "step": 918880
    },
    {
      "epoch": 1.5038000039276527,
      "grad_norm": 0.2579588294029236,
      "learning_rate": 7.038474463472652e-06,
      "loss": 0.0192,
      "step": 918900
    },
    {
      "epoch": 1.503832734366306,
      "grad_norm": 0.8456687927246094,
      "learning_rate": 7.038408571259135e-06,
      "loss": 0.0196,
      "step": 918920
    },
    {
      "epoch": 1.5038654648049592,
      "grad_norm": 0.8224779367446899,
      "learning_rate": 7.038342679045618e-06,
      "loss": 0.0288,
      "step": 918940
    },
    {
      "epoch": 1.5038981952436128,
      "grad_norm": 0.7791354656219482,
      "learning_rate": 7.038276786832101e-06,
      "loss": 0.0156,
      "step": 918960
    },
    {
      "epoch": 1.503930925682266,
      "grad_norm": 0.43064209818840027,
      "learning_rate": 7.038210894618584e-06,
      "loss": 0.0168,
      "step": 918980
    },
    {
      "epoch": 1.5039636561209193,
      "grad_norm": 0.6846572756767273,
      "learning_rate": 7.038145002405067e-06,
      "loss": 0.0274,
      "step": 919000
    },
    {
      "epoch": 1.5039963865595727,
      "grad_norm": 0.28487154841423035,
      "learning_rate": 7.038079110191549e-06,
      "loss": 0.0228,
      "step": 919020
    },
    {
      "epoch": 1.504029116998226,
      "grad_norm": 0.7519524693489075,
      "learning_rate": 7.038013217978033e-06,
      "loss": 0.0266,
      "step": 919040
    },
    {
      "epoch": 1.5040618474368794,
      "grad_norm": 1.029255986213684,
      "learning_rate": 7.037947325764515e-06,
      "loss": 0.0317,
      "step": 919060
    },
    {
      "epoch": 1.5040945778755326,
      "grad_norm": 0.2763921916484833,
      "learning_rate": 7.037881433550998e-06,
      "loss": 0.0143,
      "step": 919080
    },
    {
      "epoch": 1.5041273083141862,
      "grad_norm": 0.28574085235595703,
      "learning_rate": 7.03781554133748e-06,
      "loss": 0.0208,
      "step": 919100
    },
    {
      "epoch": 1.5041600387528393,
      "grad_norm": 0.28226280212402344,
      "learning_rate": 7.037749649123964e-06,
      "loss": 0.0131,
      "step": 919120
    },
    {
      "epoch": 1.5041927691914927,
      "grad_norm": 0.987468421459198,
      "learning_rate": 7.037683756910446e-06,
      "loss": 0.0222,
      "step": 919140
    },
    {
      "epoch": 1.504225499630146,
      "grad_norm": 0.38041260838508606,
      "learning_rate": 7.037617864696929e-06,
      "loss": 0.0195,
      "step": 919160
    },
    {
      "epoch": 1.5042582300687992,
      "grad_norm": 0.48927611112594604,
      "learning_rate": 7.037551972483412e-06,
      "loss": 0.0262,
      "step": 919180
    },
    {
      "epoch": 1.5042909605074528,
      "grad_norm": 0.18463446199893951,
      "learning_rate": 7.0374860802698955e-06,
      "loss": 0.0187,
      "step": 919200
    },
    {
      "epoch": 1.504323690946106,
      "grad_norm": 1.3751816749572754,
      "learning_rate": 7.037420188056377e-06,
      "loss": 0.0217,
      "step": 919220
    },
    {
      "epoch": 1.5043564213847596,
      "grad_norm": 1.6735951900482178,
      "learning_rate": 7.037354295842861e-06,
      "loss": 0.0236,
      "step": 919240
    },
    {
      "epoch": 1.5043891518234127,
      "grad_norm": 1.831518292427063,
      "learning_rate": 7.037288403629343e-06,
      "loss": 0.0241,
      "step": 919260
    },
    {
      "epoch": 1.504421882262066,
      "grad_norm": 0.1427433341741562,
      "learning_rate": 7.0372225114158264e-06,
      "loss": 0.0209,
      "step": 919280
    },
    {
      "epoch": 1.5044546127007195,
      "grad_norm": 0.6943770051002502,
      "learning_rate": 7.03715661920231e-06,
      "loss": 0.0176,
      "step": 919300
    },
    {
      "epoch": 1.5044873431393726,
      "grad_norm": 0.30003103613853455,
      "learning_rate": 7.037090726988792e-06,
      "loss": 0.0164,
      "step": 919320
    },
    {
      "epoch": 1.5045200735780262,
      "grad_norm": 0.9614349007606506,
      "learning_rate": 7.0370248347752755e-06,
      "loss": 0.0124,
      "step": 919340
    },
    {
      "epoch": 1.5045528040166793,
      "grad_norm": 0.7933693528175354,
      "learning_rate": 7.036958942561758e-06,
      "loss": 0.0182,
      "step": 919360
    },
    {
      "epoch": 1.5045855344553327,
      "grad_norm": 0.9515233635902405,
      "learning_rate": 7.036893050348241e-06,
      "loss": 0.0228,
      "step": 919380
    },
    {
      "epoch": 1.504618264893986,
      "grad_norm": 0.7182467579841614,
      "learning_rate": 7.036827158134724e-06,
      "loss": 0.0158,
      "step": 919400
    },
    {
      "epoch": 1.5046509953326395,
      "grad_norm": 0.8243932127952576,
      "learning_rate": 7.036761265921207e-06,
      "loss": 0.0253,
      "step": 919420
    },
    {
      "epoch": 1.5046837257712928,
      "grad_norm": 0.37407976388931274,
      "learning_rate": 7.036695373707689e-06,
      "loss": 0.0162,
      "step": 919440
    },
    {
      "epoch": 1.504716456209946,
      "grad_norm": 0.6346126198768616,
      "learning_rate": 7.036629481494173e-06,
      "loss": 0.0204,
      "step": 919460
    },
    {
      "epoch": 1.5047491866485996,
      "grad_norm": 0.1970112919807434,
      "learning_rate": 7.036563589280655e-06,
      "loss": 0.0247,
      "step": 919480
    },
    {
      "epoch": 1.5047819170872527,
      "grad_norm": 1.0647252798080444,
      "learning_rate": 7.036497697067138e-06,
      "loss": 0.0202,
      "step": 919500
    },
    {
      "epoch": 1.504814647525906,
      "grad_norm": 0.11765875667333603,
      "learning_rate": 7.036431804853621e-06,
      "loss": 0.0148,
      "step": 919520
    },
    {
      "epoch": 1.5048473779645595,
      "grad_norm": 1.373457670211792,
      "learning_rate": 7.036365912640104e-06,
      "loss": 0.0193,
      "step": 919540
    },
    {
      "epoch": 1.5048801084032128,
      "grad_norm": 0.4175395369529724,
      "learning_rate": 7.0363000204265865e-06,
      "loss": 0.0208,
      "step": 919560
    },
    {
      "epoch": 1.5049128388418662,
      "grad_norm": 0.375512957572937,
      "learning_rate": 7.03623412821307e-06,
      "loss": 0.0185,
      "step": 919580
    },
    {
      "epoch": 1.5049455692805194,
      "grad_norm": 0.46434852480888367,
      "learning_rate": 7.036168235999552e-06,
      "loss": 0.0208,
      "step": 919600
    },
    {
      "epoch": 1.504978299719173,
      "grad_norm": 1.5019423961639404,
      "learning_rate": 7.0361023437860356e-06,
      "loss": 0.0262,
      "step": 919620
    },
    {
      "epoch": 1.5050110301578261,
      "grad_norm": 0.4445553421974182,
      "learning_rate": 7.036036451572519e-06,
      "loss": 0.0218,
      "step": 919640
    },
    {
      "epoch": 1.5050437605964795,
      "grad_norm": 0.4335574805736542,
      "learning_rate": 7.035970559359001e-06,
      "loss": 0.0175,
      "step": 919660
    },
    {
      "epoch": 1.5050764910351329,
      "grad_norm": 0.24905920028686523,
      "learning_rate": 7.035904667145485e-06,
      "loss": 0.0177,
      "step": 919680
    },
    {
      "epoch": 1.5051092214737862,
      "grad_norm": 0.3967306613922119,
      "learning_rate": 7.0358387749319665e-06,
      "loss": 0.0177,
      "step": 919700
    },
    {
      "epoch": 1.5051419519124396,
      "grad_norm": 0.4633983373641968,
      "learning_rate": 7.03577288271845e-06,
      "loss": 0.0239,
      "step": 919720
    },
    {
      "epoch": 1.5051746823510928,
      "grad_norm": 0.26513487100601196,
      "learning_rate": 7.035706990504933e-06,
      "loss": 0.0167,
      "step": 919740
    },
    {
      "epoch": 1.5052074127897463,
      "grad_norm": 1.0416916608810425,
      "learning_rate": 7.035641098291416e-06,
      "loss": 0.0228,
      "step": 919760
    },
    {
      "epoch": 1.5052401432283995,
      "grad_norm": 0.1380171775817871,
      "learning_rate": 7.035575206077898e-06,
      "loss": 0.0199,
      "step": 919780
    },
    {
      "epoch": 1.5052728736670529,
      "grad_norm": 1.3712037801742554,
      "learning_rate": 7.035509313864382e-06,
      "loss": 0.016,
      "step": 919800
    },
    {
      "epoch": 1.5053056041057062,
      "grad_norm": 0.8129214644432068,
      "learning_rate": 7.035443421650864e-06,
      "loss": 0.0204,
      "step": 919820
    },
    {
      "epoch": 1.5053383345443596,
      "grad_norm": 0.624902606010437,
      "learning_rate": 7.035377529437347e-06,
      "loss": 0.0226,
      "step": 919840
    },
    {
      "epoch": 1.505371064983013,
      "grad_norm": 0.2476760745048523,
      "learning_rate": 7.035311637223829e-06,
      "loss": 0.0204,
      "step": 919860
    },
    {
      "epoch": 1.5054037954216661,
      "grad_norm": 0.5138140916824341,
      "learning_rate": 7.035245745010313e-06,
      "loss": 0.0244,
      "step": 919880
    },
    {
      "epoch": 1.5054365258603197,
      "grad_norm": 0.4034990072250366,
      "learning_rate": 7.035179852796795e-06,
      "loss": 0.0211,
      "step": 919900
    },
    {
      "epoch": 1.5054692562989729,
      "grad_norm": 0.9789244532585144,
      "learning_rate": 7.035113960583278e-06,
      "loss": 0.0202,
      "step": 919920
    },
    {
      "epoch": 1.5055019867376263,
      "grad_norm": 4.486180305480957,
      "learning_rate": 7.035048068369761e-06,
      "loss": 0.0224,
      "step": 919940
    },
    {
      "epoch": 1.5055347171762796,
      "grad_norm": 0.7845669984817505,
      "learning_rate": 7.034982176156244e-06,
      "loss": 0.0275,
      "step": 919960
    },
    {
      "epoch": 1.5055674476149328,
      "grad_norm": 0.7506948113441467,
      "learning_rate": 7.0349162839427266e-06,
      "loss": 0.0158,
      "step": 919980
    },
    {
      "epoch": 1.5056001780535864,
      "grad_norm": 0.7616026997566223,
      "learning_rate": 7.03485039172921e-06,
      "loss": 0.0216,
      "step": 920000
    },
    {
      "epoch": 1.5056329084922395,
      "grad_norm": 1.7912455797195435,
      "learning_rate": 7.034784499515693e-06,
      "loss": 0.0235,
      "step": 920020
    },
    {
      "epoch": 1.5056656389308931,
      "grad_norm": 0.7680537104606628,
      "learning_rate": 7.034718607302176e-06,
      "loss": 0.0325,
      "step": 920040
    },
    {
      "epoch": 1.5056983693695463,
      "grad_norm": 1.0760481357574463,
      "learning_rate": 7.034652715088659e-06,
      "loss": 0.0199,
      "step": 920060
    },
    {
      "epoch": 1.5057310998081996,
      "grad_norm": 0.9050517082214355,
      "learning_rate": 7.034586822875141e-06,
      "loss": 0.0124,
      "step": 920080
    },
    {
      "epoch": 1.505763830246853,
      "grad_norm": 0.19948020577430725,
      "learning_rate": 7.034520930661625e-06,
      "loss": 0.0199,
      "step": 920100
    },
    {
      "epoch": 1.5057965606855062,
      "grad_norm": 0.5094490647315979,
      "learning_rate": 7.034455038448107e-06,
      "loss": 0.0188,
      "step": 920120
    },
    {
      "epoch": 1.5058292911241598,
      "grad_norm": 0.39232614636421204,
      "learning_rate": 7.03438914623459e-06,
      "loss": 0.0165,
      "step": 920140
    },
    {
      "epoch": 1.505862021562813,
      "grad_norm": 0.5794619917869568,
      "learning_rate": 7.034323254021073e-06,
      "loss": 0.0217,
      "step": 920160
    },
    {
      "epoch": 1.5058947520014663,
      "grad_norm": 2.2970399856567383,
      "learning_rate": 7.034257361807556e-06,
      "loss": 0.0163,
      "step": 920180
    },
    {
      "epoch": 1.5059274824401196,
      "grad_norm": 0.33692115545272827,
      "learning_rate": 7.034191469594038e-06,
      "loss": 0.0169,
      "step": 920200
    },
    {
      "epoch": 1.505960212878773,
      "grad_norm": 0.5096475481987,
      "learning_rate": 7.034125577380522e-06,
      "loss": 0.0284,
      "step": 920220
    },
    {
      "epoch": 1.5059929433174264,
      "grad_norm": 0.5475270748138428,
      "learning_rate": 7.034059685167004e-06,
      "loss": 0.0181,
      "step": 920240
    },
    {
      "epoch": 1.5060256737560795,
      "grad_norm": 0.21679462492465973,
      "learning_rate": 7.0339937929534875e-06,
      "loss": 0.0202,
      "step": 920260
    },
    {
      "epoch": 1.5060584041947331,
      "grad_norm": 0.5898401141166687,
      "learning_rate": 7.033927900739969e-06,
      "loss": 0.0176,
      "step": 920280
    },
    {
      "epoch": 1.5060911346333863,
      "grad_norm": 0.6628630757331848,
      "learning_rate": 7.033862008526453e-06,
      "loss": 0.0305,
      "step": 920300
    },
    {
      "epoch": 1.5061238650720397,
      "grad_norm": 0.5353981852531433,
      "learning_rate": 7.033796116312936e-06,
      "loss": 0.0254,
      "step": 920320
    },
    {
      "epoch": 1.506156595510693,
      "grad_norm": 0.6166471838951111,
      "learning_rate": 7.033730224099418e-06,
      "loss": 0.0207,
      "step": 920340
    },
    {
      "epoch": 1.5061893259493464,
      "grad_norm": 0.17010898888111115,
      "learning_rate": 7.033664331885902e-06,
      "loss": 0.0155,
      "step": 920360
    },
    {
      "epoch": 1.5062220563879998,
      "grad_norm": 0.7530049085617065,
      "learning_rate": 7.033598439672385e-06,
      "loss": 0.0245,
      "step": 920380
    },
    {
      "epoch": 1.506254786826653,
      "grad_norm": 0.362323522567749,
      "learning_rate": 7.0335325474588675e-06,
      "loss": 0.0183,
      "step": 920400
    },
    {
      "epoch": 1.5062875172653065,
      "grad_norm": 2.952759027481079,
      "learning_rate": 7.03346665524535e-06,
      "loss": 0.028,
      "step": 920420
    },
    {
      "epoch": 1.5063202477039597,
      "grad_norm": 0.10598607361316681,
      "learning_rate": 7.033400763031834e-06,
      "loss": 0.0206,
      "step": 920440
    },
    {
      "epoch": 1.506352978142613,
      "grad_norm": 0.48811817169189453,
      "learning_rate": 7.033334870818316e-06,
      "loss": 0.0141,
      "step": 920460
    },
    {
      "epoch": 1.5063857085812664,
      "grad_norm": 0.4622650444507599,
      "learning_rate": 7.033268978604799e-06,
      "loss": 0.0191,
      "step": 920480
    },
    {
      "epoch": 1.5064184390199198,
      "grad_norm": 0.20633171498775482,
      "learning_rate": 7.033203086391281e-06,
      "loss": 0.0158,
      "step": 920500
    },
    {
      "epoch": 1.5064511694585732,
      "grad_norm": 0.4573332369327545,
      "learning_rate": 7.033137194177765e-06,
      "loss": 0.0263,
      "step": 920520
    },
    {
      "epoch": 1.5064838998972263,
      "grad_norm": 0.5989144444465637,
      "learning_rate": 7.0330713019642475e-06,
      "loss": 0.0248,
      "step": 920540
    },
    {
      "epoch": 1.50651663033588,
      "grad_norm": 0.5036070346832275,
      "learning_rate": 7.03300540975073e-06,
      "loss": 0.0187,
      "step": 920560
    },
    {
      "epoch": 1.506549360774533,
      "grad_norm": 1.1501193046569824,
      "learning_rate": 7.032939517537213e-06,
      "loss": 0.0167,
      "step": 920580
    },
    {
      "epoch": 1.5065820912131864,
      "grad_norm": 0.639514684677124,
      "learning_rate": 7.032873625323697e-06,
      "loss": 0.016,
      "step": 920600
    },
    {
      "epoch": 1.5066148216518398,
      "grad_norm": 0.3573741018772125,
      "learning_rate": 7.0328077331101785e-06,
      "loss": 0.0185,
      "step": 920620
    },
    {
      "epoch": 1.5066475520904932,
      "grad_norm": 1.3643938302993774,
      "learning_rate": 7.032741840896662e-06,
      "loss": 0.0249,
      "step": 920640
    },
    {
      "epoch": 1.5066802825291465,
      "grad_norm": 0.44226813316345215,
      "learning_rate": 7.032675948683144e-06,
      "loss": 0.0122,
      "step": 920660
    },
    {
      "epoch": 1.5067130129677997,
      "grad_norm": 1.7023431062698364,
      "learning_rate": 7.0326100564696275e-06,
      "loss": 0.0222,
      "step": 920680
    },
    {
      "epoch": 1.5067457434064533,
      "grad_norm": 0.37932199239730835,
      "learning_rate": 7.032544164256111e-06,
      "loss": 0.0279,
      "step": 920700
    },
    {
      "epoch": 1.5067784738451064,
      "grad_norm": 0.7568644881248474,
      "learning_rate": 7.032478272042593e-06,
      "loss": 0.0199,
      "step": 920720
    },
    {
      "epoch": 1.5068112042837598,
      "grad_norm": 1.112059473991394,
      "learning_rate": 7.032412379829077e-06,
      "loss": 0.015,
      "step": 920740
    },
    {
      "epoch": 1.5068439347224132,
      "grad_norm": 0.021971488371491432,
      "learning_rate": 7.032346487615559e-06,
      "loss": 0.0168,
      "step": 920760
    },
    {
      "epoch": 1.5068766651610663,
      "grad_norm": 0.7927844524383545,
      "learning_rate": 7.032280595402042e-06,
      "loss": 0.0259,
      "step": 920780
    },
    {
      "epoch": 1.50690939559972,
      "grad_norm": 0.8277679085731506,
      "learning_rate": 7.032214703188525e-06,
      "loss": 0.0243,
      "step": 920800
    },
    {
      "epoch": 1.506942126038373,
      "grad_norm": 0.5012750029563904,
      "learning_rate": 7.032148810975008e-06,
      "loss": 0.0175,
      "step": 920820
    },
    {
      "epoch": 1.5069748564770267,
      "grad_norm": 0.2959345579147339,
      "learning_rate": 7.03208291876149e-06,
      "loss": 0.0151,
      "step": 920840
    },
    {
      "epoch": 1.5070075869156798,
      "grad_norm": 0.1554577350616455,
      "learning_rate": 7.032017026547974e-06,
      "loss": 0.0144,
      "step": 920860
    },
    {
      "epoch": 1.5070403173543332,
      "grad_norm": 0.23213918507099152,
      "learning_rate": 7.031951134334456e-06,
      "loss": 0.0176,
      "step": 920880
    },
    {
      "epoch": 1.5070730477929866,
      "grad_norm": 0.376775860786438,
      "learning_rate": 7.031885242120939e-06,
      "loss": 0.0187,
      "step": 920900
    },
    {
      "epoch": 1.5071057782316397,
      "grad_norm": 1.0308363437652588,
      "learning_rate": 7.031819349907421e-06,
      "loss": 0.019,
      "step": 920920
    },
    {
      "epoch": 1.5071385086702933,
      "grad_norm": 0.6887229681015015,
      "learning_rate": 7.031753457693905e-06,
      "loss": 0.022,
      "step": 920940
    },
    {
      "epoch": 1.5071712391089465,
      "grad_norm": 0.6620873212814331,
      "learning_rate": 7.031687565480388e-06,
      "loss": 0.0229,
      "step": 920960
    },
    {
      "epoch": 1.5072039695475998,
      "grad_norm": 0.17654873430728912,
      "learning_rate": 7.03162167326687e-06,
      "loss": 0.0225,
      "step": 920980
    },
    {
      "epoch": 1.5072366999862532,
      "grad_norm": 0.46851110458374023,
      "learning_rate": 7.031555781053353e-06,
      "loss": 0.0211,
      "step": 921000
    },
    {
      "epoch": 1.5072694304249066,
      "grad_norm": 0.7217975854873657,
      "learning_rate": 7.031489888839837e-06,
      "loss": 0.0165,
      "step": 921020
    },
    {
      "epoch": 1.50730216086356,
      "grad_norm": 0.7179215550422668,
      "learning_rate": 7.0314239966263185e-06,
      "loss": 0.0262,
      "step": 921040
    },
    {
      "epoch": 1.507334891302213,
      "grad_norm": 1.7965363264083862,
      "learning_rate": 7.031358104412802e-06,
      "loss": 0.0168,
      "step": 921060
    },
    {
      "epoch": 1.5073676217408667,
      "grad_norm": 0.35623255372047424,
      "learning_rate": 7.031292212199286e-06,
      "loss": 0.0213,
      "step": 921080
    },
    {
      "epoch": 1.5074003521795198,
      "grad_norm": 0.6212701797485352,
      "learning_rate": 7.031226319985768e-06,
      "loss": 0.0273,
      "step": 921100
    },
    {
      "epoch": 1.5074330826181732,
      "grad_norm": 0.2982887327671051,
      "learning_rate": 7.031160427772251e-06,
      "loss": 0.0144,
      "step": 921120
    },
    {
      "epoch": 1.5074658130568266,
      "grad_norm": 0.3926372528076172,
      "learning_rate": 7.031094535558733e-06,
      "loss": 0.0257,
      "step": 921140
    },
    {
      "epoch": 1.50749854349548,
      "grad_norm": 0.236403226852417,
      "learning_rate": 7.031028643345217e-06,
      "loss": 0.0227,
      "step": 921160
    },
    {
      "epoch": 1.5075312739341333,
      "grad_norm": 1.572996735572815,
      "learning_rate": 7.030962751131699e-06,
      "loss": 0.021,
      "step": 921180
    },
    {
      "epoch": 1.5075640043727865,
      "grad_norm": 0.5872122645378113,
      "learning_rate": 7.030896858918182e-06,
      "loss": 0.0178,
      "step": 921200
    },
    {
      "epoch": 1.50759673481144,
      "grad_norm": 0.6624463200569153,
      "learning_rate": 7.030830966704665e-06,
      "loss": 0.0202,
      "step": 921220
    },
    {
      "epoch": 1.5076294652500932,
      "grad_norm": 1.8645890951156616,
      "learning_rate": 7.0307650744911485e-06,
      "loss": 0.0232,
      "step": 921240
    },
    {
      "epoch": 1.5076621956887466,
      "grad_norm": 0.7149333953857422,
      "learning_rate": 7.03069918227763e-06,
      "loss": 0.0191,
      "step": 921260
    },
    {
      "epoch": 1.5076949261274,
      "grad_norm": 0.6755315661430359,
      "learning_rate": 7.030633290064114e-06,
      "loss": 0.0202,
      "step": 921280
    },
    {
      "epoch": 1.5077276565660533,
      "grad_norm": 0.8299144506454468,
      "learning_rate": 7.030567397850596e-06,
      "loss": 0.0278,
      "step": 921300
    },
    {
      "epoch": 1.5077603870047067,
      "grad_norm": 0.7675235867500305,
      "learning_rate": 7.0305015056370794e-06,
      "loss": 0.0178,
      "step": 921320
    },
    {
      "epoch": 1.5077931174433599,
      "grad_norm": 0.7906423807144165,
      "learning_rate": 7.030435613423562e-06,
      "loss": 0.0246,
      "step": 921340
    },
    {
      "epoch": 1.5078258478820135,
      "grad_norm": 0.38118883967399597,
      "learning_rate": 7.030369721210045e-06,
      "loss": 0.0216,
      "step": 921360
    },
    {
      "epoch": 1.5078585783206666,
      "grad_norm": 0.6094707250595093,
      "learning_rate": 7.030303828996528e-06,
      "loss": 0.0286,
      "step": 921380
    },
    {
      "epoch": 1.50789130875932,
      "grad_norm": 0.1785374879837036,
      "learning_rate": 7.030237936783011e-06,
      "loss": 0.0155,
      "step": 921400
    },
    {
      "epoch": 1.5079240391979734,
      "grad_norm": 0.3672349154949188,
      "learning_rate": 7.030172044569494e-06,
      "loss": 0.0173,
      "step": 921420
    },
    {
      "epoch": 1.5079567696366265,
      "grad_norm": 0.8914321660995483,
      "learning_rate": 7.030106152355977e-06,
      "loss": 0.0172,
      "step": 921440
    },
    {
      "epoch": 1.50798950007528,
      "grad_norm": 0.3171568512916565,
      "learning_rate": 7.03004026014246e-06,
      "loss": 0.0181,
      "step": 921460
    },
    {
      "epoch": 1.5080222305139332,
      "grad_norm": 0.8277668356895447,
      "learning_rate": 7.029974367928942e-06,
      "loss": 0.0227,
      "step": 921480
    },
    {
      "epoch": 1.5080549609525868,
      "grad_norm": 0.835486650466919,
      "learning_rate": 7.029908475715426e-06,
      "loss": 0.0127,
      "step": 921500
    },
    {
      "epoch": 1.50808769139124,
      "grad_norm": 0.2972054183483124,
      "learning_rate": 7.029842583501908e-06,
      "loss": 0.0206,
      "step": 921520
    },
    {
      "epoch": 1.5081204218298934,
      "grad_norm": 0.112457774579525,
      "learning_rate": 7.029776691288391e-06,
      "loss": 0.0175,
      "step": 921540
    },
    {
      "epoch": 1.5081531522685467,
      "grad_norm": 0.1933484822511673,
      "learning_rate": 7.029710799074874e-06,
      "loss": 0.0188,
      "step": 921560
    },
    {
      "epoch": 1.5081858827071999,
      "grad_norm": 1.764201045036316,
      "learning_rate": 7.029644906861357e-06,
      "loss": 0.0231,
      "step": 921580
    },
    {
      "epoch": 1.5082186131458535,
      "grad_norm": 0.5176775455474854,
      "learning_rate": 7.0295790146478395e-06,
      "loss": 0.021,
      "step": 921600
    },
    {
      "epoch": 1.5082513435845066,
      "grad_norm": 0.44103914499282837,
      "learning_rate": 7.029513122434323e-06,
      "loss": 0.0178,
      "step": 921620
    },
    {
      "epoch": 1.50828407402316,
      "grad_norm": 0.43564310669898987,
      "learning_rate": 7.029447230220805e-06,
      "loss": 0.0281,
      "step": 921640
    },
    {
      "epoch": 1.5083168044618134,
      "grad_norm": 1.6683088541030884,
      "learning_rate": 7.0293813380072886e-06,
      "loss": 0.0173,
      "step": 921660
    },
    {
      "epoch": 1.5083495349004667,
      "grad_norm": 1.247928261756897,
      "learning_rate": 7.0293154457937705e-06,
      "loss": 0.0218,
      "step": 921680
    },
    {
      "epoch": 1.5083822653391201,
      "grad_norm": 0.23192276060581207,
      "learning_rate": 7.029249553580254e-06,
      "loss": 0.0266,
      "step": 921700
    },
    {
      "epoch": 1.5084149957777733,
      "grad_norm": 0.2841574549674988,
      "learning_rate": 7.029183661366737e-06,
      "loss": 0.0185,
      "step": 921720
    },
    {
      "epoch": 1.5084477262164269,
      "grad_norm": 1.2952648401260376,
      "learning_rate": 7.0291177691532195e-06,
      "loss": 0.0224,
      "step": 921740
    },
    {
      "epoch": 1.50848045665508,
      "grad_norm": 1.00274658203125,
      "learning_rate": 7.029051876939703e-06,
      "loss": 0.0231,
      "step": 921760
    },
    {
      "epoch": 1.5085131870937334,
      "grad_norm": 0.746887743473053,
      "learning_rate": 7.028985984726186e-06,
      "loss": 0.028,
      "step": 921780
    },
    {
      "epoch": 1.5085459175323868,
      "grad_norm": 0.2128121256828308,
      "learning_rate": 7.028920092512669e-06,
      "loss": 0.0343,
      "step": 921800
    },
    {
      "epoch": 1.5085786479710401,
      "grad_norm": 0.3333296775817871,
      "learning_rate": 7.028854200299151e-06,
      "loss": 0.0187,
      "step": 921820
    },
    {
      "epoch": 1.5086113784096935,
      "grad_norm": 1.0400980710983276,
      "learning_rate": 7.028788308085635e-06,
      "loss": 0.0235,
      "step": 921840
    },
    {
      "epoch": 1.5086441088483467,
      "grad_norm": 0.8609251976013184,
      "learning_rate": 7.028722415872117e-06,
      "loss": 0.027,
      "step": 921860
    },
    {
      "epoch": 1.5086768392870002,
      "grad_norm": 0.2674623429775238,
      "learning_rate": 7.0286565236586e-06,
      "loss": 0.0292,
      "step": 921880
    },
    {
      "epoch": 1.5087095697256534,
      "grad_norm": 0.23419302701950073,
      "learning_rate": 7.028590631445082e-06,
      "loss": 0.0166,
      "step": 921900
    },
    {
      "epoch": 1.5087423001643068,
      "grad_norm": 0.2574285566806793,
      "learning_rate": 7.028524739231566e-06,
      "loss": 0.0186,
      "step": 921920
    },
    {
      "epoch": 1.5087750306029601,
      "grad_norm": 1.1218303442001343,
      "learning_rate": 7.028458847018048e-06,
      "loss": 0.0171,
      "step": 921940
    },
    {
      "epoch": 1.5088077610416135,
      "grad_norm": 0.9862838983535767,
      "learning_rate": 7.028392954804531e-06,
      "loss": 0.0202,
      "step": 921960
    },
    {
      "epoch": 1.5088404914802669,
      "grad_norm": 0.7160780429840088,
      "learning_rate": 7.028327062591014e-06,
      "loss": 0.0246,
      "step": 921980
    },
    {
      "epoch": 1.50887322191892,
      "grad_norm": 0.5126756429672241,
      "learning_rate": 7.028261170377497e-06,
      "loss": 0.017,
      "step": 922000
    },
    {
      "epoch": 1.5089059523575736,
      "grad_norm": 0.3500533401966095,
      "learning_rate": 7.0281952781639796e-06,
      "loss": 0.0229,
      "step": 922020
    },
    {
      "epoch": 1.5089386827962268,
      "grad_norm": 0.39458951354026794,
      "learning_rate": 7.028129385950463e-06,
      "loss": 0.0198,
      "step": 922040
    },
    {
      "epoch": 1.5089714132348802,
      "grad_norm": 0.6587815880775452,
      "learning_rate": 7.028063493736945e-06,
      "loss": 0.0218,
      "step": 922060
    },
    {
      "epoch": 1.5090041436735335,
      "grad_norm": 0.87275230884552,
      "learning_rate": 7.027997601523429e-06,
      "loss": 0.0235,
      "step": 922080
    },
    {
      "epoch": 1.509036874112187,
      "grad_norm": 0.12337784469127655,
      "learning_rate": 7.027931709309912e-06,
      "loss": 0.019,
      "step": 922100
    },
    {
      "epoch": 1.5090696045508403,
      "grad_norm": 0.22147563099861145,
      "learning_rate": 7.027865817096394e-06,
      "loss": 0.0199,
      "step": 922120
    },
    {
      "epoch": 1.5091023349894934,
      "grad_norm": 0.7562313079833984,
      "learning_rate": 7.027799924882878e-06,
      "loss": 0.0202,
      "step": 922140
    },
    {
      "epoch": 1.509135065428147,
      "grad_norm": 0.5307890176773071,
      "learning_rate": 7.02773403266936e-06,
      "loss": 0.0233,
      "step": 922160
    },
    {
      "epoch": 1.5091677958668002,
      "grad_norm": 0.4622913599014282,
      "learning_rate": 7.027668140455843e-06,
      "loss": 0.0171,
      "step": 922180
    },
    {
      "epoch": 1.5092005263054535,
      "grad_norm": 0.5913500189781189,
      "learning_rate": 7.027602248242326e-06,
      "loss": 0.0252,
      "step": 922200
    },
    {
      "epoch": 1.509233256744107,
      "grad_norm": 0.6856784820556641,
      "learning_rate": 7.027536356028809e-06,
      "loss": 0.0293,
      "step": 922220
    },
    {
      "epoch": 1.50926598718276,
      "grad_norm": 0.36464789509773254,
      "learning_rate": 7.027470463815291e-06,
      "loss": 0.0252,
      "step": 922240
    },
    {
      "epoch": 1.5092987176214137,
      "grad_norm": 4.0533833503723145,
      "learning_rate": 7.027404571601775e-06,
      "loss": 0.0278,
      "step": 922260
    },
    {
      "epoch": 1.5093314480600668,
      "grad_norm": 0.6506600975990295,
      "learning_rate": 7.027338679388257e-06,
      "loss": 0.0134,
      "step": 922280
    },
    {
      "epoch": 1.5093641784987204,
      "grad_norm": 0.9630990028381348,
      "learning_rate": 7.0272727871747405e-06,
      "loss": 0.0159,
      "step": 922300
    },
    {
      "epoch": 1.5093969089373735,
      "grad_norm": 0.4153818190097809,
      "learning_rate": 7.027206894961222e-06,
      "loss": 0.0149,
      "step": 922320
    },
    {
      "epoch": 1.509429639376027,
      "grad_norm": 0.30479979515075684,
      "learning_rate": 7.027141002747706e-06,
      "loss": 0.0199,
      "step": 922340
    },
    {
      "epoch": 1.5094623698146803,
      "grad_norm": 0.6689019799232483,
      "learning_rate": 7.027075110534189e-06,
      "loss": 0.0216,
      "step": 922360
    },
    {
      "epoch": 1.5094951002533334,
      "grad_norm": 0.6226143836975098,
      "learning_rate": 7.0270092183206714e-06,
      "loss": 0.0181,
      "step": 922380
    },
    {
      "epoch": 1.509527830691987,
      "grad_norm": 0.36639198660850525,
      "learning_rate": 7.026943326107154e-06,
      "loss": 0.0148,
      "step": 922400
    },
    {
      "epoch": 1.5095605611306402,
      "grad_norm": 0.32351788878440857,
      "learning_rate": 7.026877433893638e-06,
      "loss": 0.0173,
      "step": 922420
    },
    {
      "epoch": 1.5095932915692936,
      "grad_norm": 0.7251843214035034,
      "learning_rate": 7.02681154168012e-06,
      "loss": 0.0231,
      "step": 922440
    },
    {
      "epoch": 1.509626022007947,
      "grad_norm": 1.423232078552246,
      "learning_rate": 7.026745649466603e-06,
      "loss": 0.0153,
      "step": 922460
    },
    {
      "epoch": 1.5096587524466003,
      "grad_norm": 0.31902414560317993,
      "learning_rate": 7.026679757253087e-06,
      "loss": 0.0252,
      "step": 922480
    },
    {
      "epoch": 1.5096914828852537,
      "grad_norm": 0.26479196548461914,
      "learning_rate": 7.026613865039569e-06,
      "loss": 0.0177,
      "step": 922500
    },
    {
      "epoch": 1.5097242133239068,
      "grad_norm": 0.8718364834785461,
      "learning_rate": 7.026547972826052e-06,
      "loss": 0.0145,
      "step": 922520
    },
    {
      "epoch": 1.5097569437625604,
      "grad_norm": 1.2574517726898193,
      "learning_rate": 7.026482080612534e-06,
      "loss": 0.0228,
      "step": 922540
    },
    {
      "epoch": 1.5097896742012136,
      "grad_norm": 1.9894307851791382,
      "learning_rate": 7.026416188399018e-06,
      "loss": 0.0269,
      "step": 922560
    },
    {
      "epoch": 1.509822404639867,
      "grad_norm": 0.7468051314353943,
      "learning_rate": 7.0263502961855005e-06,
      "loss": 0.024,
      "step": 922580
    },
    {
      "epoch": 1.5098551350785203,
      "grad_norm": 0.7740462422370911,
      "learning_rate": 7.026284403971983e-06,
      "loss": 0.0168,
      "step": 922600
    },
    {
      "epoch": 1.5098878655171737,
      "grad_norm": 0.9725217819213867,
      "learning_rate": 7.026218511758466e-06,
      "loss": 0.0115,
      "step": 922620
    },
    {
      "epoch": 1.509920595955827,
      "grad_norm": 0.5025991797447205,
      "learning_rate": 7.02615261954495e-06,
      "loss": 0.0274,
      "step": 922640
    },
    {
      "epoch": 1.5099533263944802,
      "grad_norm": 0.2628895044326782,
      "learning_rate": 7.0260867273314315e-06,
      "loss": 0.0209,
      "step": 922660
    },
    {
      "epoch": 1.5099860568331338,
      "grad_norm": 0.3505544662475586,
      "learning_rate": 7.026020835117915e-06,
      "loss": 0.0177,
      "step": 922680
    },
    {
      "epoch": 1.510018787271787,
      "grad_norm": 0.14849697053432465,
      "learning_rate": 7.025954942904397e-06,
      "loss": 0.013,
      "step": 922700
    },
    {
      "epoch": 1.5100515177104403,
      "grad_norm": 0.12436741590499878,
      "learning_rate": 7.0258890506908805e-06,
      "loss": 0.0162,
      "step": 922720
    },
    {
      "epoch": 1.5100842481490937,
      "grad_norm": 0.45644086599349976,
      "learning_rate": 7.0258231584773624e-06,
      "loss": 0.0265,
      "step": 922740
    },
    {
      "epoch": 1.510116978587747,
      "grad_norm": 0.663027286529541,
      "learning_rate": 7.025757266263846e-06,
      "loss": 0.0195,
      "step": 922760
    },
    {
      "epoch": 1.5101497090264004,
      "grad_norm": 0.6973128318786621,
      "learning_rate": 7.025691374050329e-06,
      "loss": 0.0151,
      "step": 922780
    },
    {
      "epoch": 1.5101824394650536,
      "grad_norm": 0.1039348915219307,
      "learning_rate": 7.0256254818368115e-06,
      "loss": 0.0287,
      "step": 922800
    },
    {
      "epoch": 1.5102151699037072,
      "grad_norm": 0.268353670835495,
      "learning_rate": 7.025559589623295e-06,
      "loss": 0.0163,
      "step": 922820
    },
    {
      "epoch": 1.5102479003423603,
      "grad_norm": 0.44252389669418335,
      "learning_rate": 7.025493697409778e-06,
      "loss": 0.0188,
      "step": 922840
    },
    {
      "epoch": 1.5102806307810137,
      "grad_norm": 0.6418429017066956,
      "learning_rate": 7.025427805196261e-06,
      "loss": 0.0279,
      "step": 922860
    },
    {
      "epoch": 1.510313361219667,
      "grad_norm": 1.0641374588012695,
      "learning_rate": 7.025361912982743e-06,
      "loss": 0.0196,
      "step": 922880
    },
    {
      "epoch": 1.5103460916583205,
      "grad_norm": 2.041776418685913,
      "learning_rate": 7.025296020769227e-06,
      "loss": 0.0199,
      "step": 922900
    },
    {
      "epoch": 1.5103788220969738,
      "grad_norm": 1.4564036130905151,
      "learning_rate": 7.025230128555709e-06,
      "loss": 0.022,
      "step": 922920
    },
    {
      "epoch": 1.510411552535627,
      "grad_norm": 1.150413155555725,
      "learning_rate": 7.025164236342192e-06,
      "loss": 0.0236,
      "step": 922940
    },
    {
      "epoch": 1.5104442829742806,
      "grad_norm": 0.4316050410270691,
      "learning_rate": 7.025098344128674e-06,
      "loss": 0.0201,
      "step": 922960
    },
    {
      "epoch": 1.5104770134129337,
      "grad_norm": 0.23904480040073395,
      "learning_rate": 7.025032451915158e-06,
      "loss": 0.0184,
      "step": 922980
    },
    {
      "epoch": 1.510509743851587,
      "grad_norm": 0.17591159045696259,
      "learning_rate": 7.024966559701641e-06,
      "loss": 0.0202,
      "step": 923000
    },
    {
      "epoch": 1.5105424742902405,
      "grad_norm": 0.23585006594657898,
      "learning_rate": 7.024900667488123e-06,
      "loss": 0.0189,
      "step": 923020
    },
    {
      "epoch": 1.5105752047288936,
      "grad_norm": 1.0844606161117554,
      "learning_rate": 7.024834775274606e-06,
      "loss": 0.0273,
      "step": 923040
    },
    {
      "epoch": 1.5106079351675472,
      "grad_norm": 3.454047441482544,
      "learning_rate": 7.02476888306109e-06,
      "loss": 0.0256,
      "step": 923060
    },
    {
      "epoch": 1.5106406656062004,
      "grad_norm": 0.2292940765619278,
      "learning_rate": 7.0247029908475716e-06,
      "loss": 0.0147,
      "step": 923080
    },
    {
      "epoch": 1.510673396044854,
      "grad_norm": 1.6297264099121094,
      "learning_rate": 7.024637098634055e-06,
      "loss": 0.0239,
      "step": 923100
    },
    {
      "epoch": 1.510706126483507,
      "grad_norm": 0.40226373076438904,
      "learning_rate": 7.024571206420537e-06,
      "loss": 0.0259,
      "step": 923120
    },
    {
      "epoch": 1.5107388569221605,
      "grad_norm": 1.6778974533081055,
      "learning_rate": 7.024505314207021e-06,
      "loss": 0.0244,
      "step": 923140
    },
    {
      "epoch": 1.5107715873608139,
      "grad_norm": 0.46521708369255066,
      "learning_rate": 7.024439421993504e-06,
      "loss": 0.0203,
      "step": 923160
    },
    {
      "epoch": 1.510804317799467,
      "grad_norm": 0.4434712827205658,
      "learning_rate": 7.024373529779986e-06,
      "loss": 0.0244,
      "step": 923180
    },
    {
      "epoch": 1.5108370482381206,
      "grad_norm": 0.2789228856563568,
      "learning_rate": 7.02430763756647e-06,
      "loss": 0.021,
      "step": 923200
    },
    {
      "epoch": 1.5108697786767737,
      "grad_norm": 0.6318852305412292,
      "learning_rate": 7.0242417453529524e-06,
      "loss": 0.014,
      "step": 923220
    },
    {
      "epoch": 1.5109025091154271,
      "grad_norm": 0.1378067284822464,
      "learning_rate": 7.024175853139435e-06,
      "loss": 0.0174,
      "step": 923240
    },
    {
      "epoch": 1.5109352395540805,
      "grad_norm": 0.3244763910770416,
      "learning_rate": 7.024109960925918e-06,
      "loss": 0.0161,
      "step": 923260
    },
    {
      "epoch": 1.5109679699927339,
      "grad_norm": 0.5031883120536804,
      "learning_rate": 7.0240440687124015e-06,
      "loss": 0.0237,
      "step": 923280
    },
    {
      "epoch": 1.5110007004313872,
      "grad_norm": 3.3245062828063965,
      "learning_rate": 7.023978176498883e-06,
      "loss": 0.0262,
      "step": 923300
    },
    {
      "epoch": 1.5110334308700404,
      "grad_norm": 0.3846704065799713,
      "learning_rate": 7.023912284285367e-06,
      "loss": 0.0206,
      "step": 923320
    },
    {
      "epoch": 1.511066161308694,
      "grad_norm": 1.0881693363189697,
      "learning_rate": 7.023846392071849e-06,
      "loss": 0.0256,
      "step": 923340
    },
    {
      "epoch": 1.5110988917473471,
      "grad_norm": 0.34177616238594055,
      "learning_rate": 7.0237804998583324e-06,
      "loss": 0.0124,
      "step": 923360
    },
    {
      "epoch": 1.5111316221860005,
      "grad_norm": 0.1614120900630951,
      "learning_rate": 7.023714607644815e-06,
      "loss": 0.0192,
      "step": 923380
    },
    {
      "epoch": 1.5111643526246539,
      "grad_norm": 0.1273861676454544,
      "learning_rate": 7.023648715431298e-06,
      "loss": 0.0173,
      "step": 923400
    },
    {
      "epoch": 1.5111970830633072,
      "grad_norm": 0.9736250638961792,
      "learning_rate": 7.023582823217781e-06,
      "loss": 0.0279,
      "step": 923420
    },
    {
      "epoch": 1.5112298135019606,
      "grad_norm": 0.46434664726257324,
      "learning_rate": 7.023516931004264e-06,
      "loss": 0.0204,
      "step": 923440
    },
    {
      "epoch": 1.5112625439406138,
      "grad_norm": 0.15641526877880096,
      "learning_rate": 7.023451038790746e-06,
      "loss": 0.015,
      "step": 923460
    },
    {
      "epoch": 1.5112952743792674,
      "grad_norm": 0.5631182789802551,
      "learning_rate": 7.02338514657723e-06,
      "loss": 0.0222,
      "step": 923480
    },
    {
      "epoch": 1.5113280048179205,
      "grad_norm": 0.2950110137462616,
      "learning_rate": 7.023319254363712e-06,
      "loss": 0.0306,
      "step": 923500
    },
    {
      "epoch": 1.5113607352565739,
      "grad_norm": 0.19575135409832,
      "learning_rate": 7.023253362150195e-06,
      "loss": 0.017,
      "step": 923520
    },
    {
      "epoch": 1.5113934656952273,
      "grad_norm": 0.21704816818237305,
      "learning_rate": 7.023187469936679e-06,
      "loss": 0.0183,
      "step": 923540
    },
    {
      "epoch": 1.5114261961338806,
      "grad_norm": 0.29576629400253296,
      "learning_rate": 7.023121577723161e-06,
      "loss": 0.0156,
      "step": 923560
    },
    {
      "epoch": 1.511458926572534,
      "grad_norm": 0.7412660717964172,
      "learning_rate": 7.023055685509644e-06,
      "loss": 0.0198,
      "step": 923580
    },
    {
      "epoch": 1.5114916570111872,
      "grad_norm": 0.6480804681777954,
      "learning_rate": 7.022989793296127e-06,
      "loss": 0.0252,
      "step": 923600
    },
    {
      "epoch": 1.5115243874498407,
      "grad_norm": 0.6449151635169983,
      "learning_rate": 7.02292390108261e-06,
      "loss": 0.0173,
      "step": 923620
    },
    {
      "epoch": 1.511557117888494,
      "grad_norm": 0.9419389963150024,
      "learning_rate": 7.0228580088690925e-06,
      "loss": 0.0273,
      "step": 923640
    },
    {
      "epoch": 1.5115898483271473,
      "grad_norm": 0.29331809282302856,
      "learning_rate": 7.022792116655576e-06,
      "loss": 0.0187,
      "step": 923660
    },
    {
      "epoch": 1.5116225787658006,
      "grad_norm": 2.0268609523773193,
      "learning_rate": 7.022726224442058e-06,
      "loss": 0.0247,
      "step": 923680
    },
    {
      "epoch": 1.511655309204454,
      "grad_norm": 0.4373030364513397,
      "learning_rate": 7.0226603322285416e-06,
      "loss": 0.0163,
      "step": 923700
    },
    {
      "epoch": 1.5116880396431074,
      "grad_norm": 0.5245344638824463,
      "learning_rate": 7.0225944400150235e-06,
      "loss": 0.0297,
      "step": 923720
    },
    {
      "epoch": 1.5117207700817605,
      "grad_norm": 0.24542967975139618,
      "learning_rate": 7.022528547801507e-06,
      "loss": 0.0213,
      "step": 923740
    },
    {
      "epoch": 1.5117535005204141,
      "grad_norm": 1.3515905141830444,
      "learning_rate": 7.022462655587989e-06,
      "loss": 0.0242,
      "step": 923760
    },
    {
      "epoch": 1.5117862309590673,
      "grad_norm": 0.9584493041038513,
      "learning_rate": 7.0223967633744725e-06,
      "loss": 0.0153,
      "step": 923780
    },
    {
      "epoch": 1.5118189613977207,
      "grad_norm": 0.9189267158508301,
      "learning_rate": 7.022330871160955e-06,
      "loss": 0.0245,
      "step": 923800
    },
    {
      "epoch": 1.511851691836374,
      "grad_norm": 0.5380714535713196,
      "learning_rate": 7.022264978947438e-06,
      "loss": 0.0166,
      "step": 923820
    },
    {
      "epoch": 1.5118844222750272,
      "grad_norm": 0.6290966868400574,
      "learning_rate": 7.022199086733921e-06,
      "loss": 0.0199,
      "step": 923840
    },
    {
      "epoch": 1.5119171527136808,
      "grad_norm": 1.5490586757659912,
      "learning_rate": 7.022133194520404e-06,
      "loss": 0.0309,
      "step": 923860
    },
    {
      "epoch": 1.511949883152334,
      "grad_norm": 0.3874518871307373,
      "learning_rate": 7.022067302306887e-06,
      "loss": 0.0195,
      "step": 923880
    },
    {
      "epoch": 1.5119826135909875,
      "grad_norm": 0.7069586515426636,
      "learning_rate": 7.02200141009337e-06,
      "loss": 0.0217,
      "step": 923900
    },
    {
      "epoch": 1.5120153440296407,
      "grad_norm": 0.12762488424777985,
      "learning_rate": 7.021935517879853e-06,
      "loss": 0.0144,
      "step": 923920
    },
    {
      "epoch": 1.512048074468294,
      "grad_norm": 1.002885341644287,
      "learning_rate": 7.021869625666335e-06,
      "loss": 0.0182,
      "step": 923940
    },
    {
      "epoch": 1.5120808049069474,
      "grad_norm": 0.4026351273059845,
      "learning_rate": 7.021803733452819e-06,
      "loss": 0.013,
      "step": 923960
    },
    {
      "epoch": 1.5121135353456006,
      "grad_norm": 1.1579266786575317,
      "learning_rate": 7.021737841239301e-06,
      "loss": 0.0186,
      "step": 923980
    },
    {
      "epoch": 1.5121462657842542,
      "grad_norm": 0.7947648763656616,
      "learning_rate": 7.021671949025784e-06,
      "loss": 0.0199,
      "step": 924000
    },
    {
      "epoch": 1.5121789962229073,
      "grad_norm": 0.11546026915311813,
      "learning_rate": 7.021606056812267e-06,
      "loss": 0.0156,
      "step": 924020
    },
    {
      "epoch": 1.5122117266615607,
      "grad_norm": 0.3380764126777649,
      "learning_rate": 7.02154016459875e-06,
      "loss": 0.0141,
      "step": 924040
    },
    {
      "epoch": 1.512244457100214,
      "grad_norm": 0.49264273047447205,
      "learning_rate": 7.0214742723852326e-06,
      "loss": 0.0218,
      "step": 924060
    },
    {
      "epoch": 1.5122771875388674,
      "grad_norm": 0.11018407344818115,
      "learning_rate": 7.021408380171716e-06,
      "loss": 0.02,
      "step": 924080
    },
    {
      "epoch": 1.5123099179775208,
      "grad_norm": 1.1740663051605225,
      "learning_rate": 7.021342487958198e-06,
      "loss": 0.0248,
      "step": 924100
    },
    {
      "epoch": 1.512342648416174,
      "grad_norm": 0.817435085773468,
      "learning_rate": 7.021276595744682e-06,
      "loss": 0.0272,
      "step": 924120
    },
    {
      "epoch": 1.5123753788548275,
      "grad_norm": 2.0135600566864014,
      "learning_rate": 7.0212107035311635e-06,
      "loss": 0.0205,
      "step": 924140
    },
    {
      "epoch": 1.5124081092934807,
      "grad_norm": 0.17952053248882294,
      "learning_rate": 7.021144811317647e-06,
      "loss": 0.0197,
      "step": 924160
    },
    {
      "epoch": 1.512440839732134,
      "grad_norm": 0.5195039510726929,
      "learning_rate": 7.02107891910413e-06,
      "loss": 0.0226,
      "step": 924180
    },
    {
      "epoch": 1.5124735701707874,
      "grad_norm": 0.26154211163520813,
      "learning_rate": 7.021013026890613e-06,
      "loss": 0.0153,
      "step": 924200
    },
    {
      "epoch": 1.5125063006094408,
      "grad_norm": 0.43638554215431213,
      "learning_rate": 7.020947134677096e-06,
      "loss": 0.0225,
      "step": 924220
    },
    {
      "epoch": 1.5125390310480942,
      "grad_norm": 0.9064722061157227,
      "learning_rate": 7.020881242463579e-06,
      "loss": 0.0162,
      "step": 924240
    },
    {
      "epoch": 1.5125717614867473,
      "grad_norm": 0.23838669061660767,
      "learning_rate": 7.020815350250062e-06,
      "loss": 0.0139,
      "step": 924260
    },
    {
      "epoch": 1.512604491925401,
      "grad_norm": 0.20557692646980286,
      "learning_rate": 7.020749458036544e-06,
      "loss": 0.0137,
      "step": 924280
    },
    {
      "epoch": 1.512637222364054,
      "grad_norm": 0.6676154136657715,
      "learning_rate": 7.020683565823028e-06,
      "loss": 0.0177,
      "step": 924300
    },
    {
      "epoch": 1.5126699528027074,
      "grad_norm": 0.2220045030117035,
      "learning_rate": 7.02061767360951e-06,
      "loss": 0.0219,
      "step": 924320
    },
    {
      "epoch": 1.5127026832413608,
      "grad_norm": 0.47008392214775085,
      "learning_rate": 7.0205517813959935e-06,
      "loss": 0.0167,
      "step": 924340
    },
    {
      "epoch": 1.5127354136800142,
      "grad_norm": 1.24351167678833,
      "learning_rate": 7.020485889182475e-06,
      "loss": 0.0194,
      "step": 924360
    },
    {
      "epoch": 1.5127681441186676,
      "grad_norm": 0.6608772277832031,
      "learning_rate": 7.020419996968959e-06,
      "loss": 0.0177,
      "step": 924380
    },
    {
      "epoch": 1.5128008745573207,
      "grad_norm": 0.49676838517189026,
      "learning_rate": 7.020354104755442e-06,
      "loss": 0.0249,
      "step": 924400
    },
    {
      "epoch": 1.5128336049959743,
      "grad_norm": 0.2617177367210388,
      "learning_rate": 7.0202882125419244e-06,
      "loss": 0.0143,
      "step": 924420
    },
    {
      "epoch": 1.5128663354346275,
      "grad_norm": 0.41822993755340576,
      "learning_rate": 7.020222320328407e-06,
      "loss": 0.024,
      "step": 924440
    },
    {
      "epoch": 1.5128990658732808,
      "grad_norm": 0.38432127237319946,
      "learning_rate": 7.020156428114891e-06,
      "loss": 0.0158,
      "step": 924460
    },
    {
      "epoch": 1.5129317963119342,
      "grad_norm": 0.8450193405151367,
      "learning_rate": 7.020090535901373e-06,
      "loss": 0.0281,
      "step": 924480
    },
    {
      "epoch": 1.5129645267505873,
      "grad_norm": 0.6066752076148987,
      "learning_rate": 7.020024643687856e-06,
      "loss": 0.0217,
      "step": 924500
    },
    {
      "epoch": 1.512997257189241,
      "grad_norm": 0.21697358787059784,
      "learning_rate": 7.019958751474338e-06,
      "loss": 0.0219,
      "step": 924520
    },
    {
      "epoch": 1.513029987627894,
      "grad_norm": 0.7104600667953491,
      "learning_rate": 7.019892859260822e-06,
      "loss": 0.0271,
      "step": 924540
    },
    {
      "epoch": 1.5130627180665477,
      "grad_norm": 1.4879488945007324,
      "learning_rate": 7.0198269670473045e-06,
      "loss": 0.0212,
      "step": 924560
    },
    {
      "epoch": 1.5130954485052008,
      "grad_norm": 1.1272059679031372,
      "learning_rate": 7.019761074833787e-06,
      "loss": 0.0106,
      "step": 924580
    },
    {
      "epoch": 1.5131281789438542,
      "grad_norm": 0.33435702323913574,
      "learning_rate": 7.019695182620271e-06,
      "loss": 0.0123,
      "step": 924600
    },
    {
      "epoch": 1.5131609093825076,
      "grad_norm": 0.7304930090904236,
      "learning_rate": 7.0196292904067535e-06,
      "loss": 0.0235,
      "step": 924620
    },
    {
      "epoch": 1.5131936398211607,
      "grad_norm": 0.5543848276138306,
      "learning_rate": 7.019563398193236e-06,
      "loss": 0.0118,
      "step": 924640
    },
    {
      "epoch": 1.5132263702598143,
      "grad_norm": 0.99824458360672,
      "learning_rate": 7.019497505979719e-06,
      "loss": 0.026,
      "step": 924660
    },
    {
      "epoch": 1.5132591006984675,
      "grad_norm": 1.200171709060669,
      "learning_rate": 7.019431613766203e-06,
      "loss": 0.0322,
      "step": 924680
    },
    {
      "epoch": 1.5132918311371208,
      "grad_norm": 2.3642401695251465,
      "learning_rate": 7.0193657215526845e-06,
      "loss": 0.0215,
      "step": 924700
    },
    {
      "epoch": 1.5133245615757742,
      "grad_norm": 0.39246490597724915,
      "learning_rate": 7.019299829339168e-06,
      "loss": 0.0237,
      "step": 924720
    },
    {
      "epoch": 1.5133572920144276,
      "grad_norm": 0.6568348407745361,
      "learning_rate": 7.01923393712565e-06,
      "loss": 0.0188,
      "step": 924740
    },
    {
      "epoch": 1.513390022453081,
      "grad_norm": 1.0584670305252075,
      "learning_rate": 7.0191680449121335e-06,
      "loss": 0.0251,
      "step": 924760
    },
    {
      "epoch": 1.5134227528917341,
      "grad_norm": 1.3622959852218628,
      "learning_rate": 7.0191021526986154e-06,
      "loss": 0.0209,
      "step": 924780
    },
    {
      "epoch": 1.5134554833303877,
      "grad_norm": 0.5115916132926941,
      "learning_rate": 7.019036260485099e-06,
      "loss": 0.0146,
      "step": 924800
    },
    {
      "epoch": 1.5134882137690409,
      "grad_norm": 0.7368760108947754,
      "learning_rate": 7.018970368271582e-06,
      "loss": 0.0156,
      "step": 924820
    },
    {
      "epoch": 1.5135209442076942,
      "grad_norm": 0.44277068972587585,
      "learning_rate": 7.0189044760580645e-06,
      "loss": 0.0308,
      "step": 924840
    },
    {
      "epoch": 1.5135536746463476,
      "grad_norm": 0.5395426750183105,
      "learning_rate": 7.018838583844547e-06,
      "loss": 0.024,
      "step": 924860
    },
    {
      "epoch": 1.513586405085001,
      "grad_norm": 0.40994739532470703,
      "learning_rate": 7.018772691631031e-06,
      "loss": 0.0179,
      "step": 924880
    },
    {
      "epoch": 1.5136191355236543,
      "grad_norm": 1.0831997394561768,
      "learning_rate": 7.018706799417513e-06,
      "loss": 0.0232,
      "step": 924900
    },
    {
      "epoch": 1.5136518659623075,
      "grad_norm": 0.8252041339874268,
      "learning_rate": 7.018640907203996e-06,
      "loss": 0.0217,
      "step": 924920
    },
    {
      "epoch": 1.513684596400961,
      "grad_norm": 2.1430776119232178,
      "learning_rate": 7.01857501499048e-06,
      "loss": 0.0239,
      "step": 924940
    },
    {
      "epoch": 1.5137173268396142,
      "grad_norm": 0.7788601517677307,
      "learning_rate": 7.018509122776962e-06,
      "loss": 0.015,
      "step": 924960
    },
    {
      "epoch": 1.5137500572782676,
      "grad_norm": 0.7513806223869324,
      "learning_rate": 7.018443230563445e-06,
      "loss": 0.0289,
      "step": 924980
    },
    {
      "epoch": 1.513782787716921,
      "grad_norm": 0.8855488896369934,
      "learning_rate": 7.018377338349927e-06,
      "loss": 0.0273,
      "step": 925000
    },
    {
      "epoch": 1.5138155181555744,
      "grad_norm": 0.5514367818832397,
      "learning_rate": 7.018311446136411e-06,
      "loss": 0.0299,
      "step": 925020
    },
    {
      "epoch": 1.5138482485942277,
      "grad_norm": 0.8594176769256592,
      "learning_rate": 7.018245553922894e-06,
      "loss": 0.0163,
      "step": 925040
    },
    {
      "epoch": 1.5138809790328809,
      "grad_norm": 1.0871833562850952,
      "learning_rate": 7.018179661709376e-06,
      "loss": 0.022,
      "step": 925060
    },
    {
      "epoch": 1.5139137094715345,
      "grad_norm": 0.6971949338912964,
      "learning_rate": 7.018113769495859e-06,
      "loss": 0.0256,
      "step": 925080
    },
    {
      "epoch": 1.5139464399101876,
      "grad_norm": 0.4676021933555603,
      "learning_rate": 7.018047877282343e-06,
      "loss": 0.0203,
      "step": 925100
    },
    {
      "epoch": 1.513979170348841,
      "grad_norm": 0.34822797775268555,
      "learning_rate": 7.0179819850688246e-06,
      "loss": 0.0147,
      "step": 925120
    },
    {
      "epoch": 1.5140119007874944,
      "grad_norm": 0.4259166717529297,
      "learning_rate": 7.017916092855308e-06,
      "loss": 0.0212,
      "step": 925140
    },
    {
      "epoch": 1.5140446312261477,
      "grad_norm": 0.7214826345443726,
      "learning_rate": 7.01785020064179e-06,
      "loss": 0.0234,
      "step": 925160
    },
    {
      "epoch": 1.5140773616648011,
      "grad_norm": 0.26655760407447815,
      "learning_rate": 7.017784308428274e-06,
      "loss": 0.0306,
      "step": 925180
    },
    {
      "epoch": 1.5141100921034543,
      "grad_norm": 1.0518096685409546,
      "learning_rate": 7.017718416214756e-06,
      "loss": 0.0261,
      "step": 925200
    },
    {
      "epoch": 1.5141428225421079,
      "grad_norm": 0.8758410811424255,
      "learning_rate": 7.017652524001239e-06,
      "loss": 0.0229,
      "step": 925220
    },
    {
      "epoch": 1.514175552980761,
      "grad_norm": 0.36012452840805054,
      "learning_rate": 7.017586631787722e-06,
      "loss": 0.0235,
      "step": 925240
    },
    {
      "epoch": 1.5142082834194144,
      "grad_norm": 1.0365349054336548,
      "learning_rate": 7.0175207395742054e-06,
      "loss": 0.0302,
      "step": 925260
    },
    {
      "epoch": 1.5142410138580678,
      "grad_norm": 0.3125467002391815,
      "learning_rate": 7.017454847360688e-06,
      "loss": 0.0179,
      "step": 925280
    },
    {
      "epoch": 1.514273744296721,
      "grad_norm": 1.6781870126724243,
      "learning_rate": 7.017388955147171e-06,
      "loss": 0.0161,
      "step": 925300
    },
    {
      "epoch": 1.5143064747353745,
      "grad_norm": 1.170688509941101,
      "learning_rate": 7.0173230629336545e-06,
      "loss": 0.0231,
      "step": 925320
    },
    {
      "epoch": 1.5143392051740276,
      "grad_norm": 0.663938045501709,
      "learning_rate": 7.017257170720136e-06,
      "loss": 0.0294,
      "step": 925340
    },
    {
      "epoch": 1.5143719356126812,
      "grad_norm": 0.12329545617103577,
      "learning_rate": 7.01719127850662e-06,
      "loss": 0.0205,
      "step": 925360
    },
    {
      "epoch": 1.5144046660513344,
      "grad_norm": 0.7306625247001648,
      "learning_rate": 7.017125386293102e-06,
      "loss": 0.0219,
      "step": 925380
    },
    {
      "epoch": 1.5144373964899878,
      "grad_norm": 0.37671294808387756,
      "learning_rate": 7.0170594940795855e-06,
      "loss": 0.0209,
      "step": 925400
    },
    {
      "epoch": 1.5144701269286411,
      "grad_norm": 0.572882354259491,
      "learning_rate": 7.016993601866068e-06,
      "loss": 0.0202,
      "step": 925420
    },
    {
      "epoch": 1.5145028573672943,
      "grad_norm": 0.8901045918464661,
      "learning_rate": 7.016927709652551e-06,
      "loss": 0.0141,
      "step": 925440
    },
    {
      "epoch": 1.5145355878059479,
      "grad_norm": 0.8628190755844116,
      "learning_rate": 7.016861817439034e-06,
      "loss": 0.0275,
      "step": 925460
    },
    {
      "epoch": 1.514568318244601,
      "grad_norm": 0.26406434178352356,
      "learning_rate": 7.016795925225517e-06,
      "loss": 0.0143,
      "step": 925480
    },
    {
      "epoch": 1.5146010486832544,
      "grad_norm": 0.3638216555118561,
      "learning_rate": 7.016730033011999e-06,
      "loss": 0.0123,
      "step": 925500
    },
    {
      "epoch": 1.5146337791219078,
      "grad_norm": 0.34313052892684937,
      "learning_rate": 7.016664140798483e-06,
      "loss": 0.0169,
      "step": 925520
    },
    {
      "epoch": 1.5146665095605611,
      "grad_norm": 0.2857021391391754,
      "learning_rate": 7.016598248584965e-06,
      "loss": 0.0183,
      "step": 925540
    },
    {
      "epoch": 1.5146992399992145,
      "grad_norm": 0.6536049842834473,
      "learning_rate": 7.016532356371448e-06,
      "loss": 0.0285,
      "step": 925560
    },
    {
      "epoch": 1.5147319704378677,
      "grad_norm": 0.05856487154960632,
      "learning_rate": 7.01646646415793e-06,
      "loss": 0.0176,
      "step": 925580
    },
    {
      "epoch": 1.5147647008765213,
      "grad_norm": 1.3249495029449463,
      "learning_rate": 7.016400571944414e-06,
      "loss": 0.0219,
      "step": 925600
    },
    {
      "epoch": 1.5147974313151744,
      "grad_norm": 1.1197997331619263,
      "learning_rate": 7.016334679730897e-06,
      "loss": 0.0242,
      "step": 925620
    },
    {
      "epoch": 1.5148301617538278,
      "grad_norm": 0.7698585987091064,
      "learning_rate": 7.01626878751738e-06,
      "loss": 0.0106,
      "step": 925640
    },
    {
      "epoch": 1.5148628921924812,
      "grad_norm": 1.2584125995635986,
      "learning_rate": 7.016202895303863e-06,
      "loss": 0.0259,
      "step": 925660
    },
    {
      "epoch": 1.5148956226311345,
      "grad_norm": 1.08604097366333,
      "learning_rate": 7.0161370030903455e-06,
      "loss": 0.0143,
      "step": 925680
    },
    {
      "epoch": 1.514928353069788,
      "grad_norm": 0.6636776924133301,
      "learning_rate": 7.016071110876829e-06,
      "loss": 0.0253,
      "step": 925700
    },
    {
      "epoch": 1.514961083508441,
      "grad_norm": 0.1654464155435562,
      "learning_rate": 7.016005218663311e-06,
      "loss": 0.0212,
      "step": 925720
    },
    {
      "epoch": 1.5149938139470946,
      "grad_norm": 1.1686525344848633,
      "learning_rate": 7.0159393264497946e-06,
      "loss": 0.0178,
      "step": 925740
    },
    {
      "epoch": 1.5150265443857478,
      "grad_norm": 0.30184465646743774,
      "learning_rate": 7.0158734342362765e-06,
      "loss": 0.0158,
      "step": 925760
    },
    {
      "epoch": 1.5150592748244012,
      "grad_norm": 0.7393087148666382,
      "learning_rate": 7.01580754202276e-06,
      "loss": 0.0226,
      "step": 925780
    },
    {
      "epoch": 1.5150920052630545,
      "grad_norm": 0.44900432229042053,
      "learning_rate": 7.015741649809242e-06,
      "loss": 0.029,
      "step": 925800
    },
    {
      "epoch": 1.515124735701708,
      "grad_norm": 0.22073252499103546,
      "learning_rate": 7.0156757575957255e-06,
      "loss": 0.0267,
      "step": 925820
    },
    {
      "epoch": 1.5151574661403613,
      "grad_norm": 0.5864062905311584,
      "learning_rate": 7.015609865382208e-06,
      "loss": 0.0219,
      "step": 925840
    },
    {
      "epoch": 1.5151901965790144,
      "grad_norm": 0.5615379810333252,
      "learning_rate": 7.015543973168691e-06,
      "loss": 0.0277,
      "step": 925860
    },
    {
      "epoch": 1.515222927017668,
      "grad_norm": 0.46829065680503845,
      "learning_rate": 7.015478080955174e-06,
      "loss": 0.0287,
      "step": 925880
    },
    {
      "epoch": 1.5152556574563212,
      "grad_norm": 0.40021824836730957,
      "learning_rate": 7.015412188741657e-06,
      "loss": 0.0208,
      "step": 925900
    },
    {
      "epoch": 1.5152883878949746,
      "grad_norm": 0.7609534859657288,
      "learning_rate": 7.015346296528139e-06,
      "loss": 0.0216,
      "step": 925920
    },
    {
      "epoch": 1.515321118333628,
      "grad_norm": 0.3134104907512665,
      "learning_rate": 7.015280404314623e-06,
      "loss": 0.0218,
      "step": 925940
    },
    {
      "epoch": 1.5153538487722813,
      "grad_norm": 0.338830828666687,
      "learning_rate": 7.015214512101105e-06,
      "loss": 0.0206,
      "step": 925960
    },
    {
      "epoch": 1.5153865792109347,
      "grad_norm": 0.20040903985500336,
      "learning_rate": 7.015148619887588e-06,
      "loss": 0.0255,
      "step": 925980
    },
    {
      "epoch": 1.5154193096495878,
      "grad_norm": 0.628879189491272,
      "learning_rate": 7.015082727674072e-06,
      "loss": 0.028,
      "step": 926000
    },
    {
      "epoch": 1.5154520400882414,
      "grad_norm": 0.7121365666389465,
      "learning_rate": 7.015016835460554e-06,
      "loss": 0.023,
      "step": 926020
    },
    {
      "epoch": 1.5154847705268946,
      "grad_norm": 0.2004445195198059,
      "learning_rate": 7.014950943247037e-06,
      "loss": 0.0217,
      "step": 926040
    },
    {
      "epoch": 1.515517500965548,
      "grad_norm": 0.370609849691391,
      "learning_rate": 7.01488505103352e-06,
      "loss": 0.0156,
      "step": 926060
    },
    {
      "epoch": 1.5155502314042013,
      "grad_norm": 0.292095422744751,
      "learning_rate": 7.014819158820003e-06,
      "loss": 0.0167,
      "step": 926080
    },
    {
      "epoch": 1.5155829618428545,
      "grad_norm": 0.26471859216690063,
      "learning_rate": 7.014753266606486e-06,
      "loss": 0.0263,
      "step": 926100
    },
    {
      "epoch": 1.515615692281508,
      "grad_norm": 1.243686318397522,
      "learning_rate": 7.014687374392969e-06,
      "loss": 0.0265,
      "step": 926120
    },
    {
      "epoch": 1.5156484227201612,
      "grad_norm": 0.7725430130958557,
      "learning_rate": 7.014621482179451e-06,
      "loss": 0.0168,
      "step": 926140
    },
    {
      "epoch": 1.5156811531588148,
      "grad_norm": 0.3545810282230377,
      "learning_rate": 7.014555589965935e-06,
      "loss": 0.023,
      "step": 926160
    },
    {
      "epoch": 1.515713883597468,
      "grad_norm": 0.6369127035140991,
      "learning_rate": 7.0144896977524165e-06,
      "loss": 0.0228,
      "step": 926180
    },
    {
      "epoch": 1.5157466140361213,
      "grad_norm": 0.11998934298753738,
      "learning_rate": 7.0144238055389e-06,
      "loss": 0.0226,
      "step": 926200
    },
    {
      "epoch": 1.5157793444747747,
      "grad_norm": 1.2400074005126953,
      "learning_rate": 7.014357913325383e-06,
      "loss": 0.0196,
      "step": 926220
    },
    {
      "epoch": 1.5158120749134278,
      "grad_norm": 0.12050486356019974,
      "learning_rate": 7.014292021111866e-06,
      "loss": 0.0116,
      "step": 926240
    },
    {
      "epoch": 1.5158448053520814,
      "grad_norm": 1.0424247980117798,
      "learning_rate": 7.014226128898348e-06,
      "loss": 0.0177,
      "step": 926260
    },
    {
      "epoch": 1.5158775357907346,
      "grad_norm": 1.1047534942626953,
      "learning_rate": 7.014160236684832e-06,
      "loss": 0.0254,
      "step": 926280
    },
    {
      "epoch": 1.515910266229388,
      "grad_norm": 0.9380165338516235,
      "learning_rate": 7.014094344471314e-06,
      "loss": 0.0213,
      "step": 926300
    },
    {
      "epoch": 1.5159429966680413,
      "grad_norm": 0.3759900629520416,
      "learning_rate": 7.014028452257797e-06,
      "loss": 0.0169,
      "step": 926320
    },
    {
      "epoch": 1.5159757271066947,
      "grad_norm": 0.4364895224571228,
      "learning_rate": 7.013962560044281e-06,
      "loss": 0.0127,
      "step": 926340
    },
    {
      "epoch": 1.516008457545348,
      "grad_norm": 1.8704110383987427,
      "learning_rate": 7.013896667830763e-06,
      "loss": 0.0193,
      "step": 926360
    },
    {
      "epoch": 1.5160411879840012,
      "grad_norm": 0.11018303036689758,
      "learning_rate": 7.0138307756172465e-06,
      "loss": 0.0222,
      "step": 926380
    },
    {
      "epoch": 1.5160739184226548,
      "grad_norm": 1.8605724573135376,
      "learning_rate": 7.013764883403728e-06,
      "loss": 0.0287,
      "step": 926400
    },
    {
      "epoch": 1.516106648861308,
      "grad_norm": 0.494316428899765,
      "learning_rate": 7.013698991190212e-06,
      "loss": 0.0171,
      "step": 926420
    },
    {
      "epoch": 1.5161393792999613,
      "grad_norm": 0.9430813789367676,
      "learning_rate": 7.013633098976695e-06,
      "loss": 0.0242,
      "step": 926440
    },
    {
      "epoch": 1.5161721097386147,
      "grad_norm": 0.8319577574729919,
      "learning_rate": 7.0135672067631774e-06,
      "loss": 0.0167,
      "step": 926460
    },
    {
      "epoch": 1.516204840177268,
      "grad_norm": 0.3708254098892212,
      "learning_rate": 7.01350131454966e-06,
      "loss": 0.0191,
      "step": 926480
    },
    {
      "epoch": 1.5162375706159215,
      "grad_norm": 0.29449257254600525,
      "learning_rate": 7.013435422336144e-06,
      "loss": 0.0159,
      "step": 926500
    },
    {
      "epoch": 1.5162703010545746,
      "grad_norm": 0.3515494167804718,
      "learning_rate": 7.013369530122626e-06,
      "loss": 0.0182,
      "step": 926520
    },
    {
      "epoch": 1.5163030314932282,
      "grad_norm": 0.2924330234527588,
      "learning_rate": 7.013303637909109e-06,
      "loss": 0.0159,
      "step": 926540
    },
    {
      "epoch": 1.5163357619318814,
      "grad_norm": 0.4346688687801361,
      "learning_rate": 7.013237745695591e-06,
      "loss": 0.0251,
      "step": 926560
    },
    {
      "epoch": 1.5163684923705347,
      "grad_norm": 0.6430606245994568,
      "learning_rate": 7.013171853482075e-06,
      "loss": 0.0168,
      "step": 926580
    },
    {
      "epoch": 1.516401222809188,
      "grad_norm": 0.6575739979743958,
      "learning_rate": 7.013105961268557e-06,
      "loss": 0.0176,
      "step": 926600
    },
    {
      "epoch": 1.5164339532478415,
      "grad_norm": 0.041030049324035645,
      "learning_rate": 7.01304006905504e-06,
      "loss": 0.0132,
      "step": 926620
    },
    {
      "epoch": 1.5164666836864948,
      "grad_norm": 0.7823876142501831,
      "learning_rate": 7.012974176841523e-06,
      "loss": 0.0231,
      "step": 926640
    },
    {
      "epoch": 1.516499414125148,
      "grad_norm": 0.5487851500511169,
      "learning_rate": 7.012908284628006e-06,
      "loss": 0.0164,
      "step": 926660
    },
    {
      "epoch": 1.5165321445638016,
      "grad_norm": 0.8547452688217163,
      "learning_rate": 7.012842392414489e-06,
      "loss": 0.0176,
      "step": 926680
    },
    {
      "epoch": 1.5165648750024547,
      "grad_norm": 0.18934881687164307,
      "learning_rate": 7.012776500200972e-06,
      "loss": 0.0284,
      "step": 926700
    },
    {
      "epoch": 1.516597605441108,
      "grad_norm": 0.5532531142234802,
      "learning_rate": 7.012710607987456e-06,
      "loss": 0.0151,
      "step": 926720
    },
    {
      "epoch": 1.5166303358797615,
      "grad_norm": 0.5471351742744446,
      "learning_rate": 7.0126447157739375e-06,
      "loss": 0.0176,
      "step": 926740
    },
    {
      "epoch": 1.5166630663184149,
      "grad_norm": 1.7779945135116577,
      "learning_rate": 7.012578823560421e-06,
      "loss": 0.0205,
      "step": 926760
    },
    {
      "epoch": 1.5166957967570682,
      "grad_norm": 0.3870580196380615,
      "learning_rate": 7.012512931346903e-06,
      "loss": 0.0178,
      "step": 926780
    },
    {
      "epoch": 1.5167285271957214,
      "grad_norm": 0.4874636232852936,
      "learning_rate": 7.0124470391333866e-06,
      "loss": 0.0248,
      "step": 926800
    },
    {
      "epoch": 1.516761257634375,
      "grad_norm": 0.3280910849571228,
      "learning_rate": 7.0123811469198684e-06,
      "loss": 0.0201,
      "step": 926820
    },
    {
      "epoch": 1.5167939880730281,
      "grad_norm": 0.7902160882949829,
      "learning_rate": 7.012315254706352e-06,
      "loss": 0.0146,
      "step": 926840
    },
    {
      "epoch": 1.5168267185116815,
      "grad_norm": 0.9259732365608215,
      "learning_rate": 7.012249362492835e-06,
      "loss": 0.0185,
      "step": 926860
    },
    {
      "epoch": 1.5168594489503349,
      "grad_norm": 1.1613240242004395,
      "learning_rate": 7.0121834702793175e-06,
      "loss": 0.0139,
      "step": 926880
    },
    {
      "epoch": 1.516892179388988,
      "grad_norm": 0.45560646057128906,
      "learning_rate": 7.0121175780658e-06,
      "loss": 0.0136,
      "step": 926900
    },
    {
      "epoch": 1.5169249098276416,
      "grad_norm": 0.2565898597240448,
      "learning_rate": 7.012051685852284e-06,
      "loss": 0.0186,
      "step": 926920
    },
    {
      "epoch": 1.5169576402662948,
      "grad_norm": 0.483355313539505,
      "learning_rate": 7.011985793638766e-06,
      "loss": 0.0156,
      "step": 926940
    },
    {
      "epoch": 1.5169903707049481,
      "grad_norm": 1.1024295091629028,
      "learning_rate": 7.011919901425249e-06,
      "loss": 0.0184,
      "step": 926960
    },
    {
      "epoch": 1.5170231011436015,
      "grad_norm": 0.33925381302833557,
      "learning_rate": 7.011854009211731e-06,
      "loss": 0.0229,
      "step": 926980
    },
    {
      "epoch": 1.5170558315822549,
      "grad_norm": 0.42580631375312805,
      "learning_rate": 7.011788116998215e-06,
      "loss": 0.0198,
      "step": 927000
    },
    {
      "epoch": 1.5170885620209082,
      "grad_norm": 0.2586566209793091,
      "learning_rate": 7.0117222247846975e-06,
      "loss": 0.0239,
      "step": 927020
    },
    {
      "epoch": 1.5171212924595614,
      "grad_norm": 0.2129056304693222,
      "learning_rate": 7.01165633257118e-06,
      "loss": 0.0165,
      "step": 927040
    },
    {
      "epoch": 1.517154022898215,
      "grad_norm": 0.6015836000442505,
      "learning_rate": 7.011590440357664e-06,
      "loss": 0.0177,
      "step": 927060
    },
    {
      "epoch": 1.5171867533368681,
      "grad_norm": 1.1588445901870728,
      "learning_rate": 7.011524548144147e-06,
      "loss": 0.0232,
      "step": 927080
    },
    {
      "epoch": 1.5172194837755215,
      "grad_norm": 0.7423388957977295,
      "learning_rate": 7.011458655930629e-06,
      "loss": 0.0227,
      "step": 927100
    },
    {
      "epoch": 1.5172522142141749,
      "grad_norm": 0.5825262665748596,
      "learning_rate": 7.011392763717112e-06,
      "loss": 0.0211,
      "step": 927120
    },
    {
      "epoch": 1.5172849446528283,
      "grad_norm": 0.9835978150367737,
      "learning_rate": 7.011326871503596e-06,
      "loss": 0.0269,
      "step": 927140
    },
    {
      "epoch": 1.5173176750914816,
      "grad_norm": 1.438769817352295,
      "learning_rate": 7.0112609792900776e-06,
      "loss": 0.0363,
      "step": 927160
    },
    {
      "epoch": 1.5173504055301348,
      "grad_norm": 0.7228630185127258,
      "learning_rate": 7.011195087076561e-06,
      "loss": 0.0138,
      "step": 927180
    },
    {
      "epoch": 1.5173831359687884,
      "grad_norm": 1.02167546749115,
      "learning_rate": 7.011129194863043e-06,
      "loss": 0.0246,
      "step": 927200
    },
    {
      "epoch": 1.5174158664074415,
      "grad_norm": 0.6209930181503296,
      "learning_rate": 7.011063302649527e-06,
      "loss": 0.0237,
      "step": 927220
    },
    {
      "epoch": 1.517448596846095,
      "grad_norm": 0.3625984489917755,
      "learning_rate": 7.010997410436009e-06,
      "loss": 0.0292,
      "step": 927240
    },
    {
      "epoch": 1.5174813272847483,
      "grad_norm": 0.47143447399139404,
      "learning_rate": 7.010931518222492e-06,
      "loss": 0.023,
      "step": 927260
    },
    {
      "epoch": 1.5175140577234016,
      "grad_norm": 0.36178773641586304,
      "learning_rate": 7.010865626008975e-06,
      "loss": 0.0175,
      "step": 927280
    },
    {
      "epoch": 1.517546788162055,
      "grad_norm": 0.3189488649368286,
      "learning_rate": 7.0107997337954584e-06,
      "loss": 0.0126,
      "step": 927300
    },
    {
      "epoch": 1.5175795186007082,
      "grad_norm": 0.6339877247810364,
      "learning_rate": 7.01073384158194e-06,
      "loss": 0.0334,
      "step": 927320
    },
    {
      "epoch": 1.5176122490393618,
      "grad_norm": 0.7907056212425232,
      "learning_rate": 7.010667949368424e-06,
      "loss": 0.0203,
      "step": 927340
    },
    {
      "epoch": 1.517644979478015,
      "grad_norm": 0.8538866639137268,
      "learning_rate": 7.010602057154906e-06,
      "loss": 0.022,
      "step": 927360
    },
    {
      "epoch": 1.5176777099166683,
      "grad_norm": 0.7280771136283875,
      "learning_rate": 7.010536164941389e-06,
      "loss": 0.0198,
      "step": 927380
    },
    {
      "epoch": 1.5177104403553217,
      "grad_norm": 0.8090405464172363,
      "learning_rate": 7.010470272727873e-06,
      "loss": 0.0206,
      "step": 927400
    },
    {
      "epoch": 1.517743170793975,
      "grad_norm": 0.5331766605377197,
      "learning_rate": 7.010404380514355e-06,
      "loss": 0.0207,
      "step": 927420
    },
    {
      "epoch": 1.5177759012326284,
      "grad_norm": 0.5873997211456299,
      "learning_rate": 7.0103384883008385e-06,
      "loss": 0.0162,
      "step": 927440
    },
    {
      "epoch": 1.5178086316712815,
      "grad_norm": 0.8875613808631897,
      "learning_rate": 7.010272596087321e-06,
      "loss": 0.0205,
      "step": 927460
    },
    {
      "epoch": 1.5178413621099351,
      "grad_norm": 0.6490055322647095,
      "learning_rate": 7.010206703873804e-06,
      "loss": 0.0193,
      "step": 927480
    },
    {
      "epoch": 1.5178740925485883,
      "grad_norm": 1.1188639402389526,
      "learning_rate": 7.010140811660287e-06,
      "loss": 0.0173,
      "step": 927500
    },
    {
      "epoch": 1.5179068229872417,
      "grad_norm": 1.1166754961013794,
      "learning_rate": 7.01007491944677e-06,
      "loss": 0.0261,
      "step": 927520
    },
    {
      "epoch": 1.517939553425895,
      "grad_norm": 0.6676897406578064,
      "learning_rate": 7.010009027233252e-06,
      "loss": 0.0323,
      "step": 927540
    },
    {
      "epoch": 1.5179722838645482,
      "grad_norm": 0.35895097255706787,
      "learning_rate": 7.009943135019736e-06,
      "loss": 0.0153,
      "step": 927560
    },
    {
      "epoch": 1.5180050143032018,
      "grad_norm": 1.4569119215011597,
      "learning_rate": 7.009877242806218e-06,
      "loss": 0.0151,
      "step": 927580
    },
    {
      "epoch": 1.518037744741855,
      "grad_norm": 0.25081682205200195,
      "learning_rate": 7.009811350592701e-06,
      "loss": 0.0187,
      "step": 927600
    },
    {
      "epoch": 1.5180704751805085,
      "grad_norm": 0.16881489753723145,
      "learning_rate": 7.009745458379183e-06,
      "loss": 0.0276,
      "step": 927620
    },
    {
      "epoch": 1.5181032056191617,
      "grad_norm": 0.31233787536621094,
      "learning_rate": 7.009679566165667e-06,
      "loss": 0.0177,
      "step": 927640
    },
    {
      "epoch": 1.518135936057815,
      "grad_norm": 0.5464476943016052,
      "learning_rate": 7.0096136739521494e-06,
      "loss": 0.0141,
      "step": 927660
    },
    {
      "epoch": 1.5181686664964684,
      "grad_norm": 0.15893340110778809,
      "learning_rate": 7.009547781738632e-06,
      "loss": 0.0167,
      "step": 927680
    },
    {
      "epoch": 1.5182013969351216,
      "grad_norm": 0.16591104865074158,
      "learning_rate": 7.009481889525115e-06,
      "loss": 0.0189,
      "step": 927700
    },
    {
      "epoch": 1.5182341273737752,
      "grad_norm": 0.8225938081741333,
      "learning_rate": 7.0094159973115985e-06,
      "loss": 0.0222,
      "step": 927720
    },
    {
      "epoch": 1.5182668578124283,
      "grad_norm": 0.4789489805698395,
      "learning_rate": 7.009350105098081e-06,
      "loss": 0.017,
      "step": 927740
    },
    {
      "epoch": 1.5182995882510817,
      "grad_norm": 0.8240590691566467,
      "learning_rate": 7.009284212884564e-06,
      "loss": 0.0131,
      "step": 927760
    },
    {
      "epoch": 1.518332318689735,
      "grad_norm": 1.5895899534225464,
      "learning_rate": 7.009218320671048e-06,
      "loss": 0.0217,
      "step": 927780
    },
    {
      "epoch": 1.5183650491283884,
      "grad_norm": 0.5635229349136353,
      "learning_rate": 7.0091524284575295e-06,
      "loss": 0.0181,
      "step": 927800
    },
    {
      "epoch": 1.5183977795670418,
      "grad_norm": 0.5398390293121338,
      "learning_rate": 7.009086536244013e-06,
      "loss": 0.0178,
      "step": 927820
    },
    {
      "epoch": 1.518430510005695,
      "grad_norm": 0.2407907247543335,
      "learning_rate": 7.009020644030495e-06,
      "loss": 0.0155,
      "step": 927840
    },
    {
      "epoch": 1.5184632404443485,
      "grad_norm": 0.2608640491962433,
      "learning_rate": 7.0089547518169785e-06,
      "loss": 0.0203,
      "step": 927860
    },
    {
      "epoch": 1.5184959708830017,
      "grad_norm": 0.4342958331108093,
      "learning_rate": 7.008888859603461e-06,
      "loss": 0.0184,
      "step": 927880
    },
    {
      "epoch": 1.518528701321655,
      "grad_norm": 0.6640714406967163,
      "learning_rate": 7.008822967389944e-06,
      "loss": 0.0165,
      "step": 927900
    },
    {
      "epoch": 1.5185614317603084,
      "grad_norm": 0.49910303950309753,
      "learning_rate": 7.008757075176427e-06,
      "loss": 0.0181,
      "step": 927920
    },
    {
      "epoch": 1.5185941621989618,
      "grad_norm": 1.019255518913269,
      "learning_rate": 7.00869118296291e-06,
      "loss": 0.0219,
      "step": 927940
    },
    {
      "epoch": 1.5186268926376152,
      "grad_norm": 0.7583178281784058,
      "learning_rate": 7.008625290749392e-06,
      "loss": 0.018,
      "step": 927960
    },
    {
      "epoch": 1.5186596230762683,
      "grad_norm": 0.14260946214199066,
      "learning_rate": 7.008559398535876e-06,
      "loss": 0.0251,
      "step": 927980
    },
    {
      "epoch": 1.518692353514922,
      "grad_norm": 0.7577916383743286,
      "learning_rate": 7.008493506322358e-06,
      "loss": 0.0252,
      "step": 928000
    },
    {
      "epoch": 1.518725083953575,
      "grad_norm": 0.6354384422302246,
      "learning_rate": 7.008427614108841e-06,
      "loss": 0.0218,
      "step": 928020
    },
    {
      "epoch": 1.5187578143922285,
      "grad_norm": 0.7106280326843262,
      "learning_rate": 7.008361721895324e-06,
      "loss": 0.0205,
      "step": 928040
    },
    {
      "epoch": 1.5187905448308818,
      "grad_norm": 0.30155569314956665,
      "learning_rate": 7.008295829681807e-06,
      "loss": 0.0204,
      "step": 928060
    },
    {
      "epoch": 1.5188232752695352,
      "grad_norm": 0.40432190895080566,
      "learning_rate": 7.00822993746829e-06,
      "loss": 0.0191,
      "step": 928080
    },
    {
      "epoch": 1.5188560057081886,
      "grad_norm": 2.286052942276001,
      "learning_rate": 7.008164045254773e-06,
      "loss": 0.0196,
      "step": 928100
    },
    {
      "epoch": 1.5188887361468417,
      "grad_norm": 0.5811154246330261,
      "learning_rate": 7.008098153041256e-06,
      "loss": 0.0325,
      "step": 928120
    },
    {
      "epoch": 1.5189214665854953,
      "grad_norm": 1.4337135553359985,
      "learning_rate": 7.008032260827739e-06,
      "loss": 0.0285,
      "step": 928140
    },
    {
      "epoch": 1.5189541970241485,
      "grad_norm": 0.17891980707645416,
      "learning_rate": 7.007966368614222e-06,
      "loss": 0.0223,
      "step": 928160
    },
    {
      "epoch": 1.5189869274628018,
      "grad_norm": 0.957155168056488,
      "learning_rate": 7.007900476400704e-06,
      "loss": 0.0242,
      "step": 928180
    },
    {
      "epoch": 1.5190196579014552,
      "grad_norm": 0.1697416603565216,
      "learning_rate": 7.007834584187188e-06,
      "loss": 0.0224,
      "step": 928200
    },
    {
      "epoch": 1.5190523883401086,
      "grad_norm": 1.9381526708602905,
      "learning_rate": 7.0077686919736695e-06,
      "loss": 0.0267,
      "step": 928220
    },
    {
      "epoch": 1.519085118778762,
      "grad_norm": 0.2628345787525177,
      "learning_rate": 7.007702799760153e-06,
      "loss": 0.0214,
      "step": 928240
    },
    {
      "epoch": 1.519117849217415,
      "grad_norm": 0.7222684025764465,
      "learning_rate": 7.007636907546636e-06,
      "loss": 0.0277,
      "step": 928260
    },
    {
      "epoch": 1.5191505796560687,
      "grad_norm": 1.626112461090088,
      "learning_rate": 7.007571015333119e-06,
      "loss": 0.0209,
      "step": 928280
    },
    {
      "epoch": 1.5191833100947219,
      "grad_norm": 0.1208992674946785,
      "learning_rate": 7.007505123119601e-06,
      "loss": 0.0213,
      "step": 928300
    },
    {
      "epoch": 1.5192160405333752,
      "grad_norm": 1.0047943592071533,
      "learning_rate": 7.007439230906085e-06,
      "loss": 0.0255,
      "step": 928320
    },
    {
      "epoch": 1.5192487709720286,
      "grad_norm": 0.416328102350235,
      "learning_rate": 7.007373338692567e-06,
      "loss": 0.0208,
      "step": 928340
    },
    {
      "epoch": 1.5192815014106817,
      "grad_norm": 1.891669750213623,
      "learning_rate": 7.00730744647905e-06,
      "loss": 0.0153,
      "step": 928360
    },
    {
      "epoch": 1.5193142318493353,
      "grad_norm": 0.4693048298358917,
      "learning_rate": 7.007241554265532e-06,
      "loss": 0.0214,
      "step": 928380
    },
    {
      "epoch": 1.5193469622879885,
      "grad_norm": 0.2022208273410797,
      "learning_rate": 7.007175662052016e-06,
      "loss": 0.0282,
      "step": 928400
    },
    {
      "epoch": 1.519379692726642,
      "grad_norm": 0.5225681662559509,
      "learning_rate": 7.007109769838499e-06,
      "loss": 0.0196,
      "step": 928420
    },
    {
      "epoch": 1.5194124231652952,
      "grad_norm": 0.47973987460136414,
      "learning_rate": 7.007043877624981e-06,
      "loss": 0.0251,
      "step": 928440
    },
    {
      "epoch": 1.5194451536039486,
      "grad_norm": 0.5844230651855469,
      "learning_rate": 7.006977985411465e-06,
      "loss": 0.0154,
      "step": 928460
    },
    {
      "epoch": 1.519477884042602,
      "grad_norm": 0.3290391266345978,
      "learning_rate": 7.006912093197948e-06,
      "loss": 0.0252,
      "step": 928480
    },
    {
      "epoch": 1.5195106144812551,
      "grad_norm": 0.15486600995063782,
      "learning_rate": 7.0068462009844304e-06,
      "loss": 0.0224,
      "step": 928500
    },
    {
      "epoch": 1.5195433449199087,
      "grad_norm": 1.2992416620254517,
      "learning_rate": 7.006780308770913e-06,
      "loss": 0.0239,
      "step": 928520
    },
    {
      "epoch": 1.5195760753585619,
      "grad_norm": 0.6865286231040955,
      "learning_rate": 7.006714416557397e-06,
      "loss": 0.0182,
      "step": 928540
    },
    {
      "epoch": 1.5196088057972152,
      "grad_norm": 0.669230043888092,
      "learning_rate": 7.006648524343879e-06,
      "loss": 0.0159,
      "step": 928560
    },
    {
      "epoch": 1.5196415362358686,
      "grad_norm": 0.9789713025093079,
      "learning_rate": 7.006582632130362e-06,
      "loss": 0.019,
      "step": 928580
    },
    {
      "epoch": 1.519674266674522,
      "grad_norm": 0.3180593252182007,
      "learning_rate": 7.006516739916844e-06,
      "loss": 0.0274,
      "step": 928600
    },
    {
      "epoch": 1.5197069971131754,
      "grad_norm": 0.13825646042823792,
      "learning_rate": 7.006450847703328e-06,
      "loss": 0.018,
      "step": 928620
    },
    {
      "epoch": 1.5197397275518285,
      "grad_norm": 0.38169020414352417,
      "learning_rate": 7.00638495548981e-06,
      "loss": 0.0175,
      "step": 928640
    },
    {
      "epoch": 1.519772457990482,
      "grad_norm": 0.6106997728347778,
      "learning_rate": 7.006319063276293e-06,
      "loss": 0.0218,
      "step": 928660
    },
    {
      "epoch": 1.5198051884291353,
      "grad_norm": 0.5900137424468994,
      "learning_rate": 7.006253171062776e-06,
      "loss": 0.0246,
      "step": 928680
    },
    {
      "epoch": 1.5198379188677886,
      "grad_norm": 1.749727725982666,
      "learning_rate": 7.006187278849259e-06,
      "loss": 0.0198,
      "step": 928700
    },
    {
      "epoch": 1.519870649306442,
      "grad_norm": 0.08017156273126602,
      "learning_rate": 7.0061213866357414e-06,
      "loss": 0.0205,
      "step": 928720
    },
    {
      "epoch": 1.5199033797450954,
      "grad_norm": 1.0058867931365967,
      "learning_rate": 7.006055494422225e-06,
      "loss": 0.0183,
      "step": 928740
    },
    {
      "epoch": 1.5199361101837487,
      "grad_norm": 0.3520236313343048,
      "learning_rate": 7.005989602208707e-06,
      "loss": 0.0238,
      "step": 928760
    },
    {
      "epoch": 1.519968840622402,
      "grad_norm": 0.5098317861557007,
      "learning_rate": 7.0059237099951905e-06,
      "loss": 0.0213,
      "step": 928780
    },
    {
      "epoch": 1.5200015710610555,
      "grad_norm": 0.2523951828479767,
      "learning_rate": 7.005857817781674e-06,
      "loss": 0.0234,
      "step": 928800
    },
    {
      "epoch": 1.5200343014997086,
      "grad_norm": 0.8786972761154175,
      "learning_rate": 7.005791925568156e-06,
      "loss": 0.0165,
      "step": 928820
    },
    {
      "epoch": 1.520067031938362,
      "grad_norm": 0.3014843165874481,
      "learning_rate": 7.0057260333546396e-06,
      "loss": 0.0205,
      "step": 928840
    },
    {
      "epoch": 1.5200997623770154,
      "grad_norm": 0.22278405725955963,
      "learning_rate": 7.0056601411411215e-06,
      "loss": 0.027,
      "step": 928860
    },
    {
      "epoch": 1.5201324928156688,
      "grad_norm": 0.7673047184944153,
      "learning_rate": 7.005594248927605e-06,
      "loss": 0.0217,
      "step": 928880
    },
    {
      "epoch": 1.5201652232543221,
      "grad_norm": 1.0204004049301147,
      "learning_rate": 7.005528356714088e-06,
      "loss": 0.02,
      "step": 928900
    },
    {
      "epoch": 1.5201979536929753,
      "grad_norm": 0.47805681824684143,
      "learning_rate": 7.0054624645005705e-06,
      "loss": 0.0208,
      "step": 928920
    },
    {
      "epoch": 1.5202306841316289,
      "grad_norm": 0.698810875415802,
      "learning_rate": 7.005396572287053e-06,
      "loss": 0.0249,
      "step": 928940
    },
    {
      "epoch": 1.520263414570282,
      "grad_norm": 0.3062858283519745,
      "learning_rate": 7.005330680073537e-06,
      "loss": 0.022,
      "step": 928960
    },
    {
      "epoch": 1.5202961450089354,
      "grad_norm": 0.6199531555175781,
      "learning_rate": 7.005264787860019e-06,
      "loss": 0.0292,
      "step": 928980
    },
    {
      "epoch": 1.5203288754475888,
      "grad_norm": 0.5005316734313965,
      "learning_rate": 7.005198895646502e-06,
      "loss": 0.0267,
      "step": 929000
    },
    {
      "epoch": 1.5203616058862421,
      "grad_norm": 0.5980277061462402,
      "learning_rate": 7.005133003432984e-06,
      "loss": 0.0208,
      "step": 929020
    },
    {
      "epoch": 1.5203943363248955,
      "grad_norm": 1.3728837966918945,
      "learning_rate": 7.005067111219468e-06,
      "loss": 0.0163,
      "step": 929040
    },
    {
      "epoch": 1.5204270667635487,
      "grad_norm": 0.5448060631752014,
      "learning_rate": 7.0050012190059505e-06,
      "loss": 0.0194,
      "step": 929060
    },
    {
      "epoch": 1.5204597972022023,
      "grad_norm": 0.20911137759685516,
      "learning_rate": 7.004935326792433e-06,
      "loss": 0.015,
      "step": 929080
    },
    {
      "epoch": 1.5204925276408554,
      "grad_norm": 0.2340519279241562,
      "learning_rate": 7.004869434578916e-06,
      "loss": 0.0212,
      "step": 929100
    },
    {
      "epoch": 1.5205252580795088,
      "grad_norm": 0.2142747938632965,
      "learning_rate": 7.0048035423654e-06,
      "loss": 0.0202,
      "step": 929120
    },
    {
      "epoch": 1.5205579885181622,
      "grad_norm": 0.5861983299255371,
      "learning_rate": 7.004737650151882e-06,
      "loss": 0.0229,
      "step": 929140
    },
    {
      "epoch": 1.5205907189568153,
      "grad_norm": 0.5096191763877869,
      "learning_rate": 7.004671757938365e-06,
      "loss": 0.0208,
      "step": 929160
    },
    {
      "epoch": 1.520623449395469,
      "grad_norm": 0.639745831489563,
      "learning_rate": 7.004605865724849e-06,
      "loss": 0.0234,
      "step": 929180
    },
    {
      "epoch": 1.520656179834122,
      "grad_norm": 0.5013990998268127,
      "learning_rate": 7.0045399735113306e-06,
      "loss": 0.0252,
      "step": 929200
    },
    {
      "epoch": 1.5206889102727756,
      "grad_norm": 0.6977917551994324,
      "learning_rate": 7.004474081297814e-06,
      "loss": 0.0183,
      "step": 929220
    },
    {
      "epoch": 1.5207216407114288,
      "grad_norm": 0.2367936372756958,
      "learning_rate": 7.004408189084296e-06,
      "loss": 0.0235,
      "step": 929240
    },
    {
      "epoch": 1.5207543711500822,
      "grad_norm": 1.0619233846664429,
      "learning_rate": 7.00434229687078e-06,
      "loss": 0.0244,
      "step": 929260
    },
    {
      "epoch": 1.5207871015887355,
      "grad_norm": 0.15112636983394623,
      "learning_rate": 7.004276404657262e-06,
      "loss": 0.0177,
      "step": 929280
    },
    {
      "epoch": 1.5208198320273887,
      "grad_norm": 0.15948523581027985,
      "learning_rate": 7.004210512443745e-06,
      "loss": 0.019,
      "step": 929300
    },
    {
      "epoch": 1.5208525624660423,
      "grad_norm": 1.074990153312683,
      "learning_rate": 7.004144620230228e-06,
      "loss": 0.0176,
      "step": 929320
    },
    {
      "epoch": 1.5208852929046954,
      "grad_norm": 0.4210224151611328,
      "learning_rate": 7.0040787280167114e-06,
      "loss": 0.0153,
      "step": 929340
    },
    {
      "epoch": 1.5209180233433488,
      "grad_norm": 0.5547028183937073,
      "learning_rate": 7.004012835803193e-06,
      "loss": 0.0147,
      "step": 929360
    },
    {
      "epoch": 1.5209507537820022,
      "grad_norm": 1.6698899269104004,
      "learning_rate": 7.003946943589677e-06,
      "loss": 0.0131,
      "step": 929380
    },
    {
      "epoch": 1.5209834842206555,
      "grad_norm": 0.6775355339050293,
      "learning_rate": 7.003881051376159e-06,
      "loss": 0.016,
      "step": 929400
    },
    {
      "epoch": 1.521016214659309,
      "grad_norm": 0.30052411556243896,
      "learning_rate": 7.003815159162642e-06,
      "loss": 0.0249,
      "step": 929420
    },
    {
      "epoch": 1.521048945097962,
      "grad_norm": 1.700640320777893,
      "learning_rate": 7.003749266949124e-06,
      "loss": 0.019,
      "step": 929440
    },
    {
      "epoch": 1.5210816755366157,
      "grad_norm": 0.47080132365226746,
      "learning_rate": 7.003683374735608e-06,
      "loss": 0.0187,
      "step": 929460
    },
    {
      "epoch": 1.5211144059752688,
      "grad_norm": 0.2742093503475189,
      "learning_rate": 7.003617482522091e-06,
      "loss": 0.0218,
      "step": 929480
    },
    {
      "epoch": 1.5211471364139222,
      "grad_norm": 0.45134541392326355,
      "learning_rate": 7.003551590308574e-06,
      "loss": 0.0263,
      "step": 929500
    },
    {
      "epoch": 1.5211798668525756,
      "grad_norm": 0.18512694537639618,
      "learning_rate": 7.003485698095057e-06,
      "loss": 0.0177,
      "step": 929520
    },
    {
      "epoch": 1.521212597291229,
      "grad_norm": 0.4211718440055847,
      "learning_rate": 7.00341980588154e-06,
      "loss": 0.0179,
      "step": 929540
    },
    {
      "epoch": 1.5212453277298823,
      "grad_norm": 0.4972812831401825,
      "learning_rate": 7.003353913668023e-06,
      "loss": 0.0202,
      "step": 929560
    },
    {
      "epoch": 1.5212780581685355,
      "grad_norm": 0.7678637504577637,
      "learning_rate": 7.003288021454505e-06,
      "loss": 0.015,
      "step": 929580
    },
    {
      "epoch": 1.521310788607189,
      "grad_norm": 0.6889579892158508,
      "learning_rate": 7.003222129240989e-06,
      "loss": 0.0297,
      "step": 929600
    },
    {
      "epoch": 1.5213435190458422,
      "grad_norm": 0.5073140859603882,
      "learning_rate": 7.003156237027471e-06,
      "loss": 0.0222,
      "step": 929620
    },
    {
      "epoch": 1.5213762494844956,
      "grad_norm": 0.8082018494606018,
      "learning_rate": 7.003090344813954e-06,
      "loss": 0.0136,
      "step": 929640
    },
    {
      "epoch": 1.521408979923149,
      "grad_norm": 0.7875857949256897,
      "learning_rate": 7.003024452600436e-06,
      "loss": 0.0341,
      "step": 929660
    },
    {
      "epoch": 1.5214417103618023,
      "grad_norm": 0.47028860449790955,
      "learning_rate": 7.00295856038692e-06,
      "loss": 0.0211,
      "step": 929680
    },
    {
      "epoch": 1.5214744408004557,
      "grad_norm": 0.2141243815422058,
      "learning_rate": 7.0028926681734025e-06,
      "loss": 0.018,
      "step": 929700
    },
    {
      "epoch": 1.5215071712391088,
      "grad_norm": 0.44366690516471863,
      "learning_rate": 7.002826775959885e-06,
      "loss": 0.026,
      "step": 929720
    },
    {
      "epoch": 1.5215399016777624,
      "grad_norm": 0.12484834343194962,
      "learning_rate": 7.002760883746368e-06,
      "loss": 0.0213,
      "step": 929740
    },
    {
      "epoch": 1.5215726321164156,
      "grad_norm": 0.5493288636207581,
      "learning_rate": 7.0026949915328515e-06,
      "loss": 0.015,
      "step": 929760
    },
    {
      "epoch": 1.521605362555069,
      "grad_norm": 0.18446119129657745,
      "learning_rate": 7.002629099319333e-06,
      "loss": 0.0246,
      "step": 929780
    },
    {
      "epoch": 1.5216380929937223,
      "grad_norm": 0.20994018018245697,
      "learning_rate": 7.002563207105817e-06,
      "loss": 0.0217,
      "step": 929800
    },
    {
      "epoch": 1.5216708234323755,
      "grad_norm": 0.4639616310596466,
      "learning_rate": 7.002497314892299e-06,
      "loss": 0.0266,
      "step": 929820
    },
    {
      "epoch": 1.521703553871029,
      "grad_norm": 0.21361681818962097,
      "learning_rate": 7.0024314226787825e-06,
      "loss": 0.0192,
      "step": 929840
    },
    {
      "epoch": 1.5217362843096822,
      "grad_norm": 0.5811177492141724,
      "learning_rate": 7.002365530465266e-06,
      "loss": 0.0153,
      "step": 929860
    },
    {
      "epoch": 1.5217690147483358,
      "grad_norm": 1.5947526693344116,
      "learning_rate": 7.002299638251748e-06,
      "loss": 0.0218,
      "step": 929880
    },
    {
      "epoch": 1.521801745186989,
      "grad_norm": 0.4864838123321533,
      "learning_rate": 7.0022337460382315e-06,
      "loss": 0.0267,
      "step": 929900
    },
    {
      "epoch": 1.5218344756256423,
      "grad_norm": 0.2236451953649521,
      "learning_rate": 7.002167853824714e-06,
      "loss": 0.0156,
      "step": 929920
    },
    {
      "epoch": 1.5218672060642957,
      "grad_norm": 0.829254686832428,
      "learning_rate": 7.002101961611197e-06,
      "loss": 0.0199,
      "step": 929940
    },
    {
      "epoch": 1.5218999365029489,
      "grad_norm": 1.3212324380874634,
      "learning_rate": 7.00203606939768e-06,
      "loss": 0.0183,
      "step": 929960
    },
    {
      "epoch": 1.5219326669416025,
      "grad_norm": 0.5238283276557922,
      "learning_rate": 7.001970177184163e-06,
      "loss": 0.0217,
      "step": 929980
    },
    {
      "epoch": 1.5219653973802556,
      "grad_norm": 0.39923611283302307,
      "learning_rate": 7.001904284970645e-06,
      "loss": 0.0167,
      "step": 930000
    },
    {
      "epoch": 1.521998127818909,
      "grad_norm": 0.4023827612400055,
      "learning_rate": 7.001838392757129e-06,
      "loss": 0.0204,
      "step": 930020
    },
    {
      "epoch": 1.5220308582575623,
      "grad_norm": 1.7439947128295898,
      "learning_rate": 7.001772500543611e-06,
      "loss": 0.0214,
      "step": 930040
    },
    {
      "epoch": 1.5220635886962157,
      "grad_norm": 0.5550611615180969,
      "learning_rate": 7.001706608330094e-06,
      "loss": 0.0165,
      "step": 930060
    },
    {
      "epoch": 1.522096319134869,
      "grad_norm": 0.7891831398010254,
      "learning_rate": 7.001640716116577e-06,
      "loss": 0.0141,
      "step": 930080
    },
    {
      "epoch": 1.5221290495735222,
      "grad_norm": 0.635419487953186,
      "learning_rate": 7.00157482390306e-06,
      "loss": 0.0148,
      "step": 930100
    },
    {
      "epoch": 1.5221617800121758,
      "grad_norm": 0.18734048306941986,
      "learning_rate": 7.0015089316895425e-06,
      "loss": 0.0143,
      "step": 930120
    },
    {
      "epoch": 1.522194510450829,
      "grad_norm": 1.6018385887145996,
      "learning_rate": 7.001443039476026e-06,
      "loss": 0.0274,
      "step": 930140
    },
    {
      "epoch": 1.5222272408894824,
      "grad_norm": 0.33231455087661743,
      "learning_rate": 7.001377147262508e-06,
      "loss": 0.0214,
      "step": 930160
    },
    {
      "epoch": 1.5222599713281357,
      "grad_norm": 0.4002370238304138,
      "learning_rate": 7.001311255048992e-06,
      "loss": 0.0192,
      "step": 930180
    },
    {
      "epoch": 1.522292701766789,
      "grad_norm": 0.38398659229278564,
      "learning_rate": 7.001245362835475e-06,
      "loss": 0.0258,
      "step": 930200
    },
    {
      "epoch": 1.5223254322054425,
      "grad_norm": 1.03978431224823,
      "learning_rate": 7.001179470621957e-06,
      "loss": 0.0236,
      "step": 930220
    },
    {
      "epoch": 1.5223581626440956,
      "grad_norm": 0.26594677567481995,
      "learning_rate": 7.001113578408441e-06,
      "loss": 0.0194,
      "step": 930240
    },
    {
      "epoch": 1.5223908930827492,
      "grad_norm": 0.9197186231613159,
      "learning_rate": 7.0010476861949226e-06,
      "loss": 0.0172,
      "step": 930260
    },
    {
      "epoch": 1.5224236235214024,
      "grad_norm": 0.3219829797744751,
      "learning_rate": 7.000981793981406e-06,
      "loss": 0.0209,
      "step": 930280
    },
    {
      "epoch": 1.5224563539600557,
      "grad_norm": 0.3940097689628601,
      "learning_rate": 7.000915901767889e-06,
      "loss": 0.0209,
      "step": 930300
    },
    {
      "epoch": 1.5224890843987091,
      "grad_norm": 1.031715750694275,
      "learning_rate": 7.000850009554372e-06,
      "loss": 0.017,
      "step": 930320
    },
    {
      "epoch": 1.5225218148373625,
      "grad_norm": 1.1762117147445679,
      "learning_rate": 7.000784117340854e-06,
      "loss": 0.0179,
      "step": 930340
    },
    {
      "epoch": 1.5225545452760159,
      "grad_norm": 0.11876241862773895,
      "learning_rate": 7.000718225127338e-06,
      "loss": 0.0275,
      "step": 930360
    },
    {
      "epoch": 1.522587275714669,
      "grad_norm": 0.999614417552948,
      "learning_rate": 7.00065233291382e-06,
      "loss": 0.0174,
      "step": 930380
    },
    {
      "epoch": 1.5226200061533226,
      "grad_norm": 0.16622990369796753,
      "learning_rate": 7.0005864407003034e-06,
      "loss": 0.0211,
      "step": 930400
    },
    {
      "epoch": 1.5226527365919758,
      "grad_norm": 0.688246488571167,
      "learning_rate": 7.000520548486785e-06,
      "loss": 0.0237,
      "step": 930420
    },
    {
      "epoch": 1.5226854670306291,
      "grad_norm": 0.3068757951259613,
      "learning_rate": 7.000454656273269e-06,
      "loss": 0.0142,
      "step": 930440
    },
    {
      "epoch": 1.5227181974692825,
      "grad_norm": 0.988234281539917,
      "learning_rate": 7.000388764059751e-06,
      "loss": 0.0149,
      "step": 930460
    },
    {
      "epoch": 1.5227509279079359,
      "grad_norm": 0.21107427775859833,
      "learning_rate": 7.000322871846234e-06,
      "loss": 0.0164,
      "step": 930480
    },
    {
      "epoch": 1.5227836583465892,
      "grad_norm": 0.7240025997161865,
      "learning_rate": 7.000256979632717e-06,
      "loss": 0.0182,
      "step": 930500
    },
    {
      "epoch": 1.5228163887852424,
      "grad_norm": 0.1836683452129364,
      "learning_rate": 7.0001910874192e-06,
      "loss": 0.0189,
      "step": 930520
    },
    {
      "epoch": 1.522849119223896,
      "grad_norm": 0.18353022634983063,
      "learning_rate": 7.000125195205683e-06,
      "loss": 0.0298,
      "step": 930540
    },
    {
      "epoch": 1.5228818496625491,
      "grad_norm": 0.46877068281173706,
      "learning_rate": 7.000059302992166e-06,
      "loss": 0.0159,
      "step": 930560
    },
    {
      "epoch": 1.5229145801012025,
      "grad_norm": 2.0693039894104004,
      "learning_rate": 6.99999341077865e-06,
      "loss": 0.0243,
      "step": 930580
    },
    {
      "epoch": 1.5229473105398559,
      "grad_norm": 0.14093385636806488,
      "learning_rate": 6.999927518565132e-06,
      "loss": 0.028,
      "step": 930600
    },
    {
      "epoch": 1.522980040978509,
      "grad_norm": 0.5311052203178406,
      "learning_rate": 6.999861626351615e-06,
      "loss": 0.0192,
      "step": 930620
    },
    {
      "epoch": 1.5230127714171626,
      "grad_norm": 0.8100304007530212,
      "learning_rate": 6.999795734138097e-06,
      "loss": 0.0261,
      "step": 930640
    },
    {
      "epoch": 1.5230455018558158,
      "grad_norm": 0.1390247941017151,
      "learning_rate": 6.999729841924581e-06,
      "loss": 0.014,
      "step": 930660
    },
    {
      "epoch": 1.5230782322944694,
      "grad_norm": 0.693025529384613,
      "learning_rate": 6.999663949711063e-06,
      "loss": 0.0226,
      "step": 930680
    },
    {
      "epoch": 1.5231109627331225,
      "grad_norm": 0.4397454559803009,
      "learning_rate": 6.999598057497546e-06,
      "loss": 0.0164,
      "step": 930700
    },
    {
      "epoch": 1.523143693171776,
      "grad_norm": 0.3459990918636322,
      "learning_rate": 6.999532165284029e-06,
      "loss": 0.0246,
      "step": 930720
    },
    {
      "epoch": 1.5231764236104293,
      "grad_norm": 1.0544233322143555,
      "learning_rate": 6.999466273070512e-06,
      "loss": 0.0181,
      "step": 930740
    },
    {
      "epoch": 1.5232091540490824,
      "grad_norm": 0.33382320404052734,
      "learning_rate": 6.9994003808569944e-06,
      "loss": 0.0236,
      "step": 930760
    },
    {
      "epoch": 1.523241884487736,
      "grad_norm": 0.16277074813842773,
      "learning_rate": 6.999334488643478e-06,
      "loss": 0.0171,
      "step": 930780
    },
    {
      "epoch": 1.5232746149263892,
      "grad_norm": 0.5386865735054016,
      "learning_rate": 6.99926859642996e-06,
      "loss": 0.0152,
      "step": 930800
    },
    {
      "epoch": 1.5233073453650425,
      "grad_norm": 0.4601936936378479,
      "learning_rate": 6.9992027042164435e-06,
      "loss": 0.0179,
      "step": 930820
    },
    {
      "epoch": 1.523340075803696,
      "grad_norm": 0.35348817706108093,
      "learning_rate": 6.999136812002925e-06,
      "loss": 0.0251,
      "step": 930840
    },
    {
      "epoch": 1.5233728062423493,
      "grad_norm": 0.3975151777267456,
      "learning_rate": 6.999070919789409e-06,
      "loss": 0.0257,
      "step": 930860
    },
    {
      "epoch": 1.5234055366810026,
      "grad_norm": 1.1466302871704102,
      "learning_rate": 6.999005027575892e-06,
      "loss": 0.0181,
      "step": 930880
    },
    {
      "epoch": 1.5234382671196558,
      "grad_norm": 1.0082826614379883,
      "learning_rate": 6.9989391353623745e-06,
      "loss": 0.0235,
      "step": 930900
    },
    {
      "epoch": 1.5234709975583094,
      "grad_norm": 0.3465185761451721,
      "learning_rate": 6.998873243148858e-06,
      "loss": 0.0218,
      "step": 930920
    },
    {
      "epoch": 1.5235037279969625,
      "grad_norm": 0.7128512263298035,
      "learning_rate": 6.998807350935341e-06,
      "loss": 0.0186,
      "step": 930940
    },
    {
      "epoch": 1.523536458435616,
      "grad_norm": 0.3656271696090698,
      "learning_rate": 6.9987414587218235e-06,
      "loss": 0.024,
      "step": 930960
    },
    {
      "epoch": 1.5235691888742693,
      "grad_norm": 1.7373284101486206,
      "learning_rate": 6.998675566508306e-06,
      "loss": 0.0246,
      "step": 930980
    },
    {
      "epoch": 1.5236019193129227,
      "grad_norm": 0.5593736171722412,
      "learning_rate": 6.99860967429479e-06,
      "loss": 0.0315,
      "step": 931000
    },
    {
      "epoch": 1.523634649751576,
      "grad_norm": 0.15148067474365234,
      "learning_rate": 6.998543782081272e-06,
      "loss": 0.0199,
      "step": 931020
    },
    {
      "epoch": 1.5236673801902292,
      "grad_norm": 0.39805445075035095,
      "learning_rate": 6.998477889867755e-06,
      "loss": 0.017,
      "step": 931040
    },
    {
      "epoch": 1.5237001106288828,
      "grad_norm": 0.9135257601737976,
      "learning_rate": 6.998411997654237e-06,
      "loss": 0.0158,
      "step": 931060
    },
    {
      "epoch": 1.523732841067536,
      "grad_norm": 0.2907019853591919,
      "learning_rate": 6.998346105440721e-06,
      "loss": 0.0181,
      "step": 931080
    },
    {
      "epoch": 1.5237655715061893,
      "grad_norm": 0.18843239545822144,
      "learning_rate": 6.9982802132272036e-06,
      "loss": 0.0232,
      "step": 931100
    },
    {
      "epoch": 1.5237983019448427,
      "grad_norm": 0.5731116533279419,
      "learning_rate": 6.998214321013686e-06,
      "loss": 0.0217,
      "step": 931120
    },
    {
      "epoch": 1.523831032383496,
      "grad_norm": 0.8668022155761719,
      "learning_rate": 6.998148428800169e-06,
      "loss": 0.0231,
      "step": 931140
    },
    {
      "epoch": 1.5238637628221494,
      "grad_norm": 0.12897509336471558,
      "learning_rate": 6.998082536586653e-06,
      "loss": 0.0219,
      "step": 931160
    },
    {
      "epoch": 1.5238964932608026,
      "grad_norm": 0.11641912162303925,
      "learning_rate": 6.9980166443731345e-06,
      "loss": 0.0255,
      "step": 931180
    },
    {
      "epoch": 1.5239292236994562,
      "grad_norm": 0.4250575304031372,
      "learning_rate": 6.997950752159618e-06,
      "loss": 0.0111,
      "step": 931200
    },
    {
      "epoch": 1.5239619541381093,
      "grad_norm": 0.14909504354000092,
      "learning_rate": 6.9978848599461e-06,
      "loss": 0.0215,
      "step": 931220
    },
    {
      "epoch": 1.5239946845767627,
      "grad_norm": 0.5006228089332581,
      "learning_rate": 6.997818967732584e-06,
      "loss": 0.0132,
      "step": 931240
    },
    {
      "epoch": 1.524027415015416,
      "grad_norm": 0.20350433886051178,
      "learning_rate": 6.997753075519067e-06,
      "loss": 0.0201,
      "step": 931260
    },
    {
      "epoch": 1.5240601454540694,
      "grad_norm": 0.42253127694129944,
      "learning_rate": 6.997687183305549e-06,
      "loss": 0.0188,
      "step": 931280
    },
    {
      "epoch": 1.5240928758927228,
      "grad_norm": 0.6007735729217529,
      "learning_rate": 6.997621291092033e-06,
      "loss": 0.0222,
      "step": 931300
    },
    {
      "epoch": 1.524125606331376,
      "grad_norm": 0.3565019369125366,
      "learning_rate": 6.997555398878515e-06,
      "loss": 0.0218,
      "step": 931320
    },
    {
      "epoch": 1.5241583367700295,
      "grad_norm": 0.45438647270202637,
      "learning_rate": 6.997489506664998e-06,
      "loss": 0.012,
      "step": 931340
    },
    {
      "epoch": 1.5241910672086827,
      "grad_norm": 0.8969045281410217,
      "learning_rate": 6.997423614451481e-06,
      "loss": 0.0267,
      "step": 931360
    },
    {
      "epoch": 1.524223797647336,
      "grad_norm": 0.6627689599990845,
      "learning_rate": 6.9973577222379645e-06,
      "loss": 0.0249,
      "step": 931380
    },
    {
      "epoch": 1.5242565280859894,
      "grad_norm": 0.6024676561355591,
      "learning_rate": 6.997291830024446e-06,
      "loss": 0.0208,
      "step": 931400
    },
    {
      "epoch": 1.5242892585246426,
      "grad_norm": 0.46295997500419617,
      "learning_rate": 6.99722593781093e-06,
      "loss": 0.0187,
      "step": 931420
    },
    {
      "epoch": 1.5243219889632962,
      "grad_norm": 0.16986984014511108,
      "learning_rate": 6.997160045597412e-06,
      "loss": 0.0198,
      "step": 931440
    },
    {
      "epoch": 1.5243547194019493,
      "grad_norm": 0.1860993504524231,
      "learning_rate": 6.997094153383895e-06,
      "loss": 0.0182,
      "step": 931460
    },
    {
      "epoch": 1.524387449840603,
      "grad_norm": 1.5880377292633057,
      "learning_rate": 6.997028261170377e-06,
      "loss": 0.0178,
      "step": 931480
    },
    {
      "epoch": 1.524420180279256,
      "grad_norm": 0.6650289297103882,
      "learning_rate": 6.996962368956861e-06,
      "loss": 0.0225,
      "step": 931500
    },
    {
      "epoch": 1.5244529107179094,
      "grad_norm": 0.853451669216156,
      "learning_rate": 6.996896476743344e-06,
      "loss": 0.0191,
      "step": 931520
    },
    {
      "epoch": 1.5244856411565628,
      "grad_norm": 0.35926562547683716,
      "learning_rate": 6.996830584529826e-06,
      "loss": 0.0174,
      "step": 931540
    },
    {
      "epoch": 1.524518371595216,
      "grad_norm": 1.2123900651931763,
      "learning_rate": 6.996764692316309e-06,
      "loss": 0.0217,
      "step": 931560
    },
    {
      "epoch": 1.5245511020338696,
      "grad_norm": 0.6810582876205444,
      "learning_rate": 6.996698800102793e-06,
      "loss": 0.0169,
      "step": 931580
    },
    {
      "epoch": 1.5245838324725227,
      "grad_norm": 0.7908294796943665,
      "learning_rate": 6.9966329078892754e-06,
      "loss": 0.0237,
      "step": 931600
    },
    {
      "epoch": 1.524616562911176,
      "grad_norm": 0.06134627386927605,
      "learning_rate": 6.996567015675758e-06,
      "loss": 0.0246,
      "step": 931620
    },
    {
      "epoch": 1.5246492933498295,
      "grad_norm": 0.27301424741744995,
      "learning_rate": 6.996501123462242e-06,
      "loss": 0.0219,
      "step": 931640
    },
    {
      "epoch": 1.5246820237884828,
      "grad_norm": 0.552927553653717,
      "learning_rate": 6.996435231248724e-06,
      "loss": 0.0152,
      "step": 931660
    },
    {
      "epoch": 1.5247147542271362,
      "grad_norm": 0.664535403251648,
      "learning_rate": 6.996369339035207e-06,
      "loss": 0.0109,
      "step": 931680
    },
    {
      "epoch": 1.5247474846657894,
      "grad_norm": 0.8483969569206238,
      "learning_rate": 6.996303446821689e-06,
      "loss": 0.0216,
      "step": 931700
    },
    {
      "epoch": 1.524780215104443,
      "grad_norm": 6.373908519744873,
      "learning_rate": 6.996237554608173e-06,
      "loss": 0.0233,
      "step": 931720
    },
    {
      "epoch": 1.524812945543096,
      "grad_norm": 1.6974941492080688,
      "learning_rate": 6.9961716623946555e-06,
      "loss": 0.0266,
      "step": 931740
    },
    {
      "epoch": 1.5248456759817495,
      "grad_norm": 0.9126002192497253,
      "learning_rate": 6.996105770181138e-06,
      "loss": 0.0253,
      "step": 931760
    },
    {
      "epoch": 1.5248784064204028,
      "grad_norm": 0.7486807107925415,
      "learning_rate": 6.996039877967621e-06,
      "loss": 0.0198,
      "step": 931780
    },
    {
      "epoch": 1.5249111368590562,
      "grad_norm": 1.8394320011138916,
      "learning_rate": 6.9959739857541045e-06,
      "loss": 0.0223,
      "step": 931800
    },
    {
      "epoch": 1.5249438672977096,
      "grad_norm": 1.6337946653366089,
      "learning_rate": 6.995908093540586e-06,
      "loss": 0.0127,
      "step": 931820
    },
    {
      "epoch": 1.5249765977363627,
      "grad_norm": 0.7077530026435852,
      "learning_rate": 6.99584220132707e-06,
      "loss": 0.0194,
      "step": 931840
    },
    {
      "epoch": 1.5250093281750163,
      "grad_norm": 0.25029614567756653,
      "learning_rate": 6.995776309113552e-06,
      "loss": 0.0174,
      "step": 931860
    },
    {
      "epoch": 1.5250420586136695,
      "grad_norm": 1.2052361965179443,
      "learning_rate": 6.9957104169000355e-06,
      "loss": 0.0239,
      "step": 931880
    },
    {
      "epoch": 1.5250747890523229,
      "grad_norm": 0.35700085759162903,
      "learning_rate": 6.995644524686518e-06,
      "loss": 0.02,
      "step": 931900
    },
    {
      "epoch": 1.5251075194909762,
      "grad_norm": 0.5051262378692627,
      "learning_rate": 6.995578632473001e-06,
      "loss": 0.0145,
      "step": 931920
    },
    {
      "epoch": 1.5251402499296296,
      "grad_norm": 0.3466886878013611,
      "learning_rate": 6.995512740259484e-06,
      "loss": 0.0288,
      "step": 931940
    },
    {
      "epoch": 1.525172980368283,
      "grad_norm": 1.250607967376709,
      "learning_rate": 6.995446848045967e-06,
      "loss": 0.0168,
      "step": 931960
    },
    {
      "epoch": 1.5252057108069361,
      "grad_norm": 0.8297454714775085,
      "learning_rate": 6.99538095583245e-06,
      "loss": 0.0164,
      "step": 931980
    },
    {
      "epoch": 1.5252384412455897,
      "grad_norm": 0.45443084836006165,
      "learning_rate": 6.995315063618933e-06,
      "loss": 0.0217,
      "step": 932000
    },
    {
      "epoch": 1.5252711716842429,
      "grad_norm": 0.9737697839736938,
      "learning_rate": 6.995249171405416e-06,
      "loss": 0.0187,
      "step": 932020
    },
    {
      "epoch": 1.5253039021228962,
      "grad_norm": 0.39350369572639465,
      "learning_rate": 6.995183279191898e-06,
      "loss": 0.0212,
      "step": 932040
    },
    {
      "epoch": 1.5253366325615496,
      "grad_norm": 2.2666943073272705,
      "learning_rate": 6.995117386978382e-06,
      "loss": 0.0186,
      "step": 932060
    },
    {
      "epoch": 1.525369363000203,
      "grad_norm": 0.8200523257255554,
      "learning_rate": 6.995051494764864e-06,
      "loss": 0.0213,
      "step": 932080
    },
    {
      "epoch": 1.5254020934388564,
      "grad_norm": 0.9668594002723694,
      "learning_rate": 6.994985602551347e-06,
      "loss": 0.0194,
      "step": 932100
    },
    {
      "epoch": 1.5254348238775095,
      "grad_norm": 0.38815778493881226,
      "learning_rate": 6.99491971033783e-06,
      "loss": 0.017,
      "step": 932120
    },
    {
      "epoch": 1.525467554316163,
      "grad_norm": 0.653194785118103,
      "learning_rate": 6.994853818124313e-06,
      "loss": 0.0233,
      "step": 932140
    },
    {
      "epoch": 1.5255002847548162,
      "grad_norm": 0.2733030915260315,
      "learning_rate": 6.9947879259107955e-06,
      "loss": 0.0209,
      "step": 932160
    },
    {
      "epoch": 1.5255330151934696,
      "grad_norm": 0.3628188967704773,
      "learning_rate": 6.994722033697279e-06,
      "loss": 0.0268,
      "step": 932180
    },
    {
      "epoch": 1.525565745632123,
      "grad_norm": 0.5003220438957214,
      "learning_rate": 6.994656141483761e-06,
      "loss": 0.0193,
      "step": 932200
    },
    {
      "epoch": 1.5255984760707761,
      "grad_norm": 0.529484748840332,
      "learning_rate": 6.994590249270245e-06,
      "loss": 0.0239,
      "step": 932220
    },
    {
      "epoch": 1.5256312065094297,
      "grad_norm": 0.4145551025867462,
      "learning_rate": 6.9945243570567265e-06,
      "loss": 0.0183,
      "step": 932240
    },
    {
      "epoch": 1.5256639369480829,
      "grad_norm": 0.19419021904468536,
      "learning_rate": 6.99445846484321e-06,
      "loss": 0.0162,
      "step": 932260
    },
    {
      "epoch": 1.5256966673867365,
      "grad_norm": 0.1921207308769226,
      "learning_rate": 6.994392572629693e-06,
      "loss": 0.0323,
      "step": 932280
    },
    {
      "epoch": 1.5257293978253896,
      "grad_norm": 0.8505815863609314,
      "learning_rate": 6.9943266804161756e-06,
      "loss": 0.0209,
      "step": 932300
    },
    {
      "epoch": 1.525762128264043,
      "grad_norm": 0.4115190804004669,
      "learning_rate": 6.994260788202659e-06,
      "loss": 0.0172,
      "step": 932320
    },
    {
      "epoch": 1.5257948587026964,
      "grad_norm": 1.2010424137115479,
      "learning_rate": 6.994194895989142e-06,
      "loss": 0.0205,
      "step": 932340
    },
    {
      "epoch": 1.5258275891413495,
      "grad_norm": 0.36792248487472534,
      "learning_rate": 6.994129003775625e-06,
      "loss": 0.0109,
      "step": 932360
    },
    {
      "epoch": 1.5258603195800031,
      "grad_norm": 0.8239031434059143,
      "learning_rate": 6.994063111562107e-06,
      "loss": 0.0295,
      "step": 932380
    },
    {
      "epoch": 1.5258930500186563,
      "grad_norm": 0.8265822529792786,
      "learning_rate": 6.993997219348591e-06,
      "loss": 0.0144,
      "step": 932400
    },
    {
      "epoch": 1.5259257804573096,
      "grad_norm": 1.4020804166793823,
      "learning_rate": 6.993931327135073e-06,
      "loss": 0.0172,
      "step": 932420
    },
    {
      "epoch": 1.525958510895963,
      "grad_norm": 0.2165706902742386,
      "learning_rate": 6.9938654349215564e-06,
      "loss": 0.0242,
      "step": 932440
    },
    {
      "epoch": 1.5259912413346164,
      "grad_norm": 0.5282514691352844,
      "learning_rate": 6.993799542708038e-06,
      "loss": 0.0157,
      "step": 932460
    },
    {
      "epoch": 1.5260239717732698,
      "grad_norm": 0.4241703450679779,
      "learning_rate": 6.993733650494522e-06,
      "loss": 0.0175,
      "step": 932480
    },
    {
      "epoch": 1.526056702211923,
      "grad_norm": 0.49982157349586487,
      "learning_rate": 6.993667758281004e-06,
      "loss": 0.0177,
      "step": 932500
    },
    {
      "epoch": 1.5260894326505765,
      "grad_norm": 0.29135236144065857,
      "learning_rate": 6.993601866067487e-06,
      "loss": 0.0199,
      "step": 932520
    },
    {
      "epoch": 1.5261221630892297,
      "grad_norm": 0.6511225700378418,
      "learning_rate": 6.99353597385397e-06,
      "loss": 0.0167,
      "step": 932540
    },
    {
      "epoch": 1.526154893527883,
      "grad_norm": 0.25161510705947876,
      "learning_rate": 6.993470081640453e-06,
      "loss": 0.018,
      "step": 932560
    },
    {
      "epoch": 1.5261876239665364,
      "grad_norm": 0.39487841725349426,
      "learning_rate": 6.993404189426936e-06,
      "loss": 0.0153,
      "step": 932580
    },
    {
      "epoch": 1.5262203544051898,
      "grad_norm": 1.560982584953308,
      "learning_rate": 6.993338297213419e-06,
      "loss": 0.0227,
      "step": 932600
    },
    {
      "epoch": 1.5262530848438431,
      "grad_norm": 0.3436436951160431,
      "learning_rate": 6.993272404999901e-06,
      "loss": 0.018,
      "step": 932620
    },
    {
      "epoch": 1.5262858152824963,
      "grad_norm": 0.32203295826911926,
      "learning_rate": 6.993206512786385e-06,
      "loss": 0.0178,
      "step": 932640
    },
    {
      "epoch": 1.5263185457211499,
      "grad_norm": 0.20955903828144073,
      "learning_rate": 6.993140620572868e-06,
      "loss": 0.0182,
      "step": 932660
    },
    {
      "epoch": 1.526351276159803,
      "grad_norm": 1.0280845165252686,
      "learning_rate": 6.99307472835935e-06,
      "loss": 0.0202,
      "step": 932680
    },
    {
      "epoch": 1.5263840065984564,
      "grad_norm": 0.6653696894645691,
      "learning_rate": 6.993008836145834e-06,
      "loss": 0.0195,
      "step": 932700
    },
    {
      "epoch": 1.5264167370371098,
      "grad_norm": 0.5422894358634949,
      "learning_rate": 6.992942943932316e-06,
      "loss": 0.0142,
      "step": 932720
    },
    {
      "epoch": 1.5264494674757632,
      "grad_norm": 0.09294959157705307,
      "learning_rate": 6.992877051718799e-06,
      "loss": 0.0137,
      "step": 932740
    },
    {
      "epoch": 1.5264821979144165,
      "grad_norm": 0.9998263120651245,
      "learning_rate": 6.992811159505282e-06,
      "loss": 0.0248,
      "step": 932760
    },
    {
      "epoch": 1.5265149283530697,
      "grad_norm": 0.7727770209312439,
      "learning_rate": 6.992745267291765e-06,
      "loss": 0.0161,
      "step": 932780
    },
    {
      "epoch": 1.5265476587917233,
      "grad_norm": 0.6186190247535706,
      "learning_rate": 6.9926793750782474e-06,
      "loss": 0.0254,
      "step": 932800
    },
    {
      "epoch": 1.5265803892303764,
      "grad_norm": 0.32438427209854126,
      "learning_rate": 6.992613482864731e-06,
      "loss": 0.0208,
      "step": 932820
    },
    {
      "epoch": 1.5266131196690298,
      "grad_norm": 0.22265930473804474,
      "learning_rate": 6.992547590651213e-06,
      "loss": 0.0208,
      "step": 932840
    },
    {
      "epoch": 1.5266458501076832,
      "grad_norm": 0.23934675753116608,
      "learning_rate": 6.9924816984376965e-06,
      "loss": 0.0175,
      "step": 932860
    },
    {
      "epoch": 1.5266785805463363,
      "grad_norm": 0.33804047107696533,
      "learning_rate": 6.992415806224178e-06,
      "loss": 0.0167,
      "step": 932880
    },
    {
      "epoch": 1.52671131098499,
      "grad_norm": 0.2678062617778778,
      "learning_rate": 6.992349914010662e-06,
      "loss": 0.0224,
      "step": 932900
    },
    {
      "epoch": 1.526744041423643,
      "grad_norm": 0.29555076360702515,
      "learning_rate": 6.992284021797145e-06,
      "loss": 0.0205,
      "step": 932920
    },
    {
      "epoch": 1.5267767718622967,
      "grad_norm": 1.1560454368591309,
      "learning_rate": 6.9922181295836275e-06,
      "loss": 0.023,
      "step": 932940
    },
    {
      "epoch": 1.5268095023009498,
      "grad_norm": 0.7083300948143005,
      "learning_rate": 6.99215223737011e-06,
      "loss": 0.0205,
      "step": 932960
    },
    {
      "epoch": 1.5268422327396032,
      "grad_norm": 0.6091653108596802,
      "learning_rate": 6.992086345156594e-06,
      "loss": 0.0153,
      "step": 932980
    },
    {
      "epoch": 1.5268749631782566,
      "grad_norm": 0.41557759046554565,
      "learning_rate": 6.992020452943076e-06,
      "loss": 0.0204,
      "step": 933000
    },
    {
      "epoch": 1.5269076936169097,
      "grad_norm": 0.33952900767326355,
      "learning_rate": 6.991954560729559e-06,
      "loss": 0.023,
      "step": 933020
    },
    {
      "epoch": 1.5269404240555633,
      "grad_norm": 0.18453869223594666,
      "learning_rate": 6.991888668516043e-06,
      "loss": 0.0179,
      "step": 933040
    },
    {
      "epoch": 1.5269731544942164,
      "grad_norm": 0.7050175070762634,
      "learning_rate": 6.991822776302525e-06,
      "loss": 0.0196,
      "step": 933060
    },
    {
      "epoch": 1.5270058849328698,
      "grad_norm": 0.6533145308494568,
      "learning_rate": 6.991756884089008e-06,
      "loss": 0.0147,
      "step": 933080
    },
    {
      "epoch": 1.5270386153715232,
      "grad_norm": 0.1564185917377472,
      "learning_rate": 6.99169099187549e-06,
      "loss": 0.0219,
      "step": 933100
    },
    {
      "epoch": 1.5270713458101766,
      "grad_norm": 0.20852433145046234,
      "learning_rate": 6.991625099661974e-06,
      "loss": 0.0334,
      "step": 933120
    },
    {
      "epoch": 1.52710407624883,
      "grad_norm": 0.625329315662384,
      "learning_rate": 6.9915592074484566e-06,
      "loss": 0.0202,
      "step": 933140
    },
    {
      "epoch": 1.527136806687483,
      "grad_norm": 0.2890520989894867,
      "learning_rate": 6.991493315234939e-06,
      "loss": 0.02,
      "step": 933160
    },
    {
      "epoch": 1.5271695371261367,
      "grad_norm": 0.4148648679256439,
      "learning_rate": 6.991427423021422e-06,
      "loss": 0.0189,
      "step": 933180
    },
    {
      "epoch": 1.5272022675647898,
      "grad_norm": 0.626056432723999,
      "learning_rate": 6.991361530807906e-06,
      "loss": 0.0242,
      "step": 933200
    },
    {
      "epoch": 1.5272349980034432,
      "grad_norm": 0.387417733669281,
      "learning_rate": 6.9912956385943875e-06,
      "loss": 0.018,
      "step": 933220
    },
    {
      "epoch": 1.5272677284420966,
      "grad_norm": 0.3037303388118744,
      "learning_rate": 6.991229746380871e-06,
      "loss": 0.0169,
      "step": 933240
    },
    {
      "epoch": 1.52730045888075,
      "grad_norm": 0.6026328802108765,
      "learning_rate": 6.991163854167353e-06,
      "loss": 0.0259,
      "step": 933260
    },
    {
      "epoch": 1.5273331893194033,
      "grad_norm": 1.0558795928955078,
      "learning_rate": 6.991097961953837e-06,
      "loss": 0.0194,
      "step": 933280
    },
    {
      "epoch": 1.5273659197580565,
      "grad_norm": 0.7269089818000793,
      "learning_rate": 6.9910320697403185e-06,
      "loss": 0.0273,
      "step": 933300
    },
    {
      "epoch": 1.52739865019671,
      "grad_norm": 0.3834783434867859,
      "learning_rate": 6.990966177526802e-06,
      "loss": 0.0164,
      "step": 933320
    },
    {
      "epoch": 1.5274313806353632,
      "grad_norm": 0.16519270837306976,
      "learning_rate": 6.990900285313285e-06,
      "loss": 0.0145,
      "step": 933340
    },
    {
      "epoch": 1.5274641110740166,
      "grad_norm": 0.7746363282203674,
      "learning_rate": 6.990834393099768e-06,
      "loss": 0.0199,
      "step": 933360
    },
    {
      "epoch": 1.52749684151267,
      "grad_norm": 0.9375536441802979,
      "learning_rate": 6.990768500886251e-06,
      "loss": 0.0286,
      "step": 933380
    },
    {
      "epoch": 1.5275295719513233,
      "grad_norm": 1.1785365343093872,
      "learning_rate": 6.990702608672734e-06,
      "loss": 0.0153,
      "step": 933400
    },
    {
      "epoch": 1.5275623023899767,
      "grad_norm": 0.2548656463623047,
      "learning_rate": 6.9906367164592175e-06,
      "loss": 0.0149,
      "step": 933420
    },
    {
      "epoch": 1.5275950328286299,
      "grad_norm": 0.2387443333864212,
      "learning_rate": 6.990570824245699e-06,
      "loss": 0.0222,
      "step": 933440
    },
    {
      "epoch": 1.5276277632672834,
      "grad_norm": 0.21616865694522858,
      "learning_rate": 6.990504932032183e-06,
      "loss": 0.0165,
      "step": 933460
    },
    {
      "epoch": 1.5276604937059366,
      "grad_norm": 0.14243540167808533,
      "learning_rate": 6.990439039818665e-06,
      "loss": 0.0193,
      "step": 933480
    },
    {
      "epoch": 1.52769322414459,
      "grad_norm": 0.33520734310150146,
      "learning_rate": 6.990373147605148e-06,
      "loss": 0.0235,
      "step": 933500
    },
    {
      "epoch": 1.5277259545832433,
      "grad_norm": 0.5705293416976929,
      "learning_rate": 6.99030725539163e-06,
      "loss": 0.0282,
      "step": 933520
    },
    {
      "epoch": 1.5277586850218967,
      "grad_norm": 0.44068092107772827,
      "learning_rate": 6.990241363178114e-06,
      "loss": 0.0137,
      "step": 933540
    },
    {
      "epoch": 1.52779141546055,
      "grad_norm": 0.86845862865448,
      "learning_rate": 6.990175470964597e-06,
      "loss": 0.0239,
      "step": 933560
    },
    {
      "epoch": 1.5278241458992032,
      "grad_norm": 0.29747647047042847,
      "learning_rate": 6.990109578751079e-06,
      "loss": 0.0195,
      "step": 933580
    },
    {
      "epoch": 1.5278568763378568,
      "grad_norm": 0.7581491470336914,
      "learning_rate": 6.990043686537562e-06,
      "loss": 0.0339,
      "step": 933600
    },
    {
      "epoch": 1.52788960677651,
      "grad_norm": 0.3135848641395569,
      "learning_rate": 6.989977794324046e-06,
      "loss": 0.0178,
      "step": 933620
    },
    {
      "epoch": 1.5279223372151634,
      "grad_norm": 0.5402161478996277,
      "learning_rate": 6.989911902110528e-06,
      "loss": 0.0319,
      "step": 933640
    },
    {
      "epoch": 1.5279550676538167,
      "grad_norm": 0.4441653788089752,
      "learning_rate": 6.989846009897011e-06,
      "loss": 0.0158,
      "step": 933660
    },
    {
      "epoch": 1.5279877980924699,
      "grad_norm": 0.2657169997692108,
      "learning_rate": 6.989780117683493e-06,
      "loss": 0.0146,
      "step": 933680
    },
    {
      "epoch": 1.5280205285311235,
      "grad_norm": 0.5807595252990723,
      "learning_rate": 6.989714225469977e-06,
      "loss": 0.0209,
      "step": 933700
    },
    {
      "epoch": 1.5280532589697766,
      "grad_norm": 1.3660855293273926,
      "learning_rate": 6.98964833325646e-06,
      "loss": 0.0208,
      "step": 933720
    },
    {
      "epoch": 1.5280859894084302,
      "grad_norm": 0.453233003616333,
      "learning_rate": 6.989582441042942e-06,
      "loss": 0.0256,
      "step": 933740
    },
    {
      "epoch": 1.5281187198470834,
      "grad_norm": 0.8826919198036194,
      "learning_rate": 6.989516548829426e-06,
      "loss": 0.0175,
      "step": 933760
    },
    {
      "epoch": 1.5281514502857367,
      "grad_norm": 0.5149514675140381,
      "learning_rate": 6.9894506566159085e-06,
      "loss": 0.0172,
      "step": 933780
    },
    {
      "epoch": 1.52818418072439,
      "grad_norm": 0.3595542907714844,
      "learning_rate": 6.989384764402391e-06,
      "loss": 0.0187,
      "step": 933800
    },
    {
      "epoch": 1.5282169111630433,
      "grad_norm": 1.0695399045944214,
      "learning_rate": 6.989318872188874e-06,
      "loss": 0.0236,
      "step": 933820
    },
    {
      "epoch": 1.5282496416016969,
      "grad_norm": 0.26751044392585754,
      "learning_rate": 6.9892529799753575e-06,
      "loss": 0.0146,
      "step": 933840
    },
    {
      "epoch": 1.52828237204035,
      "grad_norm": 0.6666930913925171,
      "learning_rate": 6.9891870877618394e-06,
      "loss": 0.0241,
      "step": 933860
    },
    {
      "epoch": 1.5283151024790034,
      "grad_norm": 1.364927887916565,
      "learning_rate": 6.989121195548323e-06,
      "loss": 0.0203,
      "step": 933880
    },
    {
      "epoch": 1.5283478329176567,
      "grad_norm": 0.5594912171363831,
      "learning_rate": 6.989055303334805e-06,
      "loss": 0.0176,
      "step": 933900
    },
    {
      "epoch": 1.5283805633563101,
      "grad_norm": 0.4243089556694031,
      "learning_rate": 6.9889894111212885e-06,
      "loss": 0.0183,
      "step": 933920
    },
    {
      "epoch": 1.5284132937949635,
      "grad_norm": 0.10597740858793259,
      "learning_rate": 6.988923518907771e-06,
      "loss": 0.0304,
      "step": 933940
    },
    {
      "epoch": 1.5284460242336166,
      "grad_norm": 0.6144446730613708,
      "learning_rate": 6.988857626694254e-06,
      "loss": 0.022,
      "step": 933960
    },
    {
      "epoch": 1.5284787546722702,
      "grad_norm": 0.4093611240386963,
      "learning_rate": 6.988791734480737e-06,
      "loss": 0.0214,
      "step": 933980
    },
    {
      "epoch": 1.5285114851109234,
      "grad_norm": 0.8511373996734619,
      "learning_rate": 6.98872584226722e-06,
      "loss": 0.0278,
      "step": 934000
    },
    {
      "epoch": 1.5285442155495768,
      "grad_norm": 0.30640730261802673,
      "learning_rate": 6.988659950053702e-06,
      "loss": 0.0221,
      "step": 934020
    },
    {
      "epoch": 1.5285769459882301,
      "grad_norm": 0.18183603882789612,
      "learning_rate": 6.988594057840186e-06,
      "loss": 0.0143,
      "step": 934040
    },
    {
      "epoch": 1.5286096764268835,
      "grad_norm": 0.36236193776130676,
      "learning_rate": 6.988528165626668e-06,
      "loss": 0.0265,
      "step": 934060
    },
    {
      "epoch": 1.5286424068655369,
      "grad_norm": 0.5176925659179688,
      "learning_rate": 6.988462273413151e-06,
      "loss": 0.0253,
      "step": 934080
    },
    {
      "epoch": 1.52867513730419,
      "grad_norm": 0.4936104118824005,
      "learning_rate": 6.988396381199635e-06,
      "loss": 0.0165,
      "step": 934100
    },
    {
      "epoch": 1.5287078677428436,
      "grad_norm": 1.0853984355926514,
      "learning_rate": 6.988330488986117e-06,
      "loss": 0.0259,
      "step": 934120
    },
    {
      "epoch": 1.5287405981814968,
      "grad_norm": 0.24344594776630402,
      "learning_rate": 6.9882645967726e-06,
      "loss": 0.0126,
      "step": 934140
    },
    {
      "epoch": 1.5287733286201501,
      "grad_norm": 0.13425888121128082,
      "learning_rate": 6.988198704559083e-06,
      "loss": 0.0164,
      "step": 934160
    },
    {
      "epoch": 1.5288060590588035,
      "grad_norm": 0.6845518946647644,
      "learning_rate": 6.988132812345566e-06,
      "loss": 0.0153,
      "step": 934180
    },
    {
      "epoch": 1.5288387894974569,
      "grad_norm": 0.18355795741081238,
      "learning_rate": 6.9880669201320485e-06,
      "loss": 0.0317,
      "step": 934200
    },
    {
      "epoch": 1.5288715199361103,
      "grad_norm": 0.11645863950252533,
      "learning_rate": 6.988001027918532e-06,
      "loss": 0.0165,
      "step": 934220
    },
    {
      "epoch": 1.5289042503747634,
      "grad_norm": 0.8571961522102356,
      "learning_rate": 6.987935135705014e-06,
      "loss": 0.0218,
      "step": 934240
    },
    {
      "epoch": 1.528936980813417,
      "grad_norm": 1.4811537265777588,
      "learning_rate": 6.987869243491498e-06,
      "loss": 0.026,
      "step": 934260
    },
    {
      "epoch": 1.5289697112520702,
      "grad_norm": 0.1591322273015976,
      "learning_rate": 6.9878033512779795e-06,
      "loss": 0.0254,
      "step": 934280
    },
    {
      "epoch": 1.5290024416907235,
      "grad_norm": 0.819756031036377,
      "learning_rate": 6.987737459064463e-06,
      "loss": 0.0271,
      "step": 934300
    },
    {
      "epoch": 1.529035172129377,
      "grad_norm": 0.29532885551452637,
      "learning_rate": 6.987671566850945e-06,
      "loss": 0.0168,
      "step": 934320
    },
    {
      "epoch": 1.5290679025680303,
      "grad_norm": 0.7632012963294983,
      "learning_rate": 6.9876056746374286e-06,
      "loss": 0.0218,
      "step": 934340
    },
    {
      "epoch": 1.5291006330066836,
      "grad_norm": 0.5908033847808838,
      "learning_rate": 6.987539782423911e-06,
      "loss": 0.024,
      "step": 934360
    },
    {
      "epoch": 1.5291333634453368,
      "grad_norm": 0.39931920170783997,
      "learning_rate": 6.987473890210394e-06,
      "loss": 0.0234,
      "step": 934380
    },
    {
      "epoch": 1.5291660938839904,
      "grad_norm": 0.15515467524528503,
      "learning_rate": 6.987407997996877e-06,
      "loss": 0.0128,
      "step": 934400
    },
    {
      "epoch": 1.5291988243226435,
      "grad_norm": 0.3184798061847687,
      "learning_rate": 6.98734210578336e-06,
      "loss": 0.0259,
      "step": 934420
    },
    {
      "epoch": 1.529231554761297,
      "grad_norm": 0.4226025342941284,
      "learning_rate": 6.987276213569843e-06,
      "loss": 0.028,
      "step": 934440
    },
    {
      "epoch": 1.5292642851999503,
      "grad_norm": 2.9130399227142334,
      "learning_rate": 6.987210321356326e-06,
      "loss": 0.0267,
      "step": 934460
    },
    {
      "epoch": 1.5292970156386034,
      "grad_norm": 0.6331372857093811,
      "learning_rate": 6.9871444291428094e-06,
      "loss": 0.022,
      "step": 934480
    },
    {
      "epoch": 1.529329746077257,
      "grad_norm": 0.1378975659608841,
      "learning_rate": 6.987078536929291e-06,
      "loss": 0.0167,
      "step": 934500
    },
    {
      "epoch": 1.5293624765159102,
      "grad_norm": 0.42962411046028137,
      "learning_rate": 6.987012644715775e-06,
      "loss": 0.0199,
      "step": 934520
    },
    {
      "epoch": 1.5293952069545638,
      "grad_norm": 0.505096971988678,
      "learning_rate": 6.986946752502257e-06,
      "loss": 0.0172,
      "step": 934540
    },
    {
      "epoch": 1.529427937393217,
      "grad_norm": 1.1959227323532104,
      "learning_rate": 6.98688086028874e-06,
      "loss": 0.0206,
      "step": 934560
    },
    {
      "epoch": 1.5294606678318703,
      "grad_norm": 1.2699651718139648,
      "learning_rate": 6.986814968075223e-06,
      "loss": 0.0139,
      "step": 934580
    },
    {
      "epoch": 1.5294933982705237,
      "grad_norm": 0.23265540599822998,
      "learning_rate": 6.986749075861706e-06,
      "loss": 0.0126,
      "step": 934600
    },
    {
      "epoch": 1.5295261287091768,
      "grad_norm": 0.9254974722862244,
      "learning_rate": 6.986683183648189e-06,
      "loss": 0.0201,
      "step": 934620
    },
    {
      "epoch": 1.5295588591478304,
      "grad_norm": 0.3711282014846802,
      "learning_rate": 6.986617291434672e-06,
      "loss": 0.0241,
      "step": 934640
    },
    {
      "epoch": 1.5295915895864836,
      "grad_norm": 0.9116690754890442,
      "learning_rate": 6.986551399221154e-06,
      "loss": 0.0205,
      "step": 934660
    },
    {
      "epoch": 1.529624320025137,
      "grad_norm": 0.42091748118400574,
      "learning_rate": 6.986485507007638e-06,
      "loss": 0.0258,
      "step": 934680
    },
    {
      "epoch": 1.5296570504637903,
      "grad_norm": 1.4168262481689453,
      "learning_rate": 6.98641961479412e-06,
      "loss": 0.0131,
      "step": 934700
    },
    {
      "epoch": 1.5296897809024437,
      "grad_norm": 0.48577165603637695,
      "learning_rate": 6.986353722580603e-06,
      "loss": 0.021,
      "step": 934720
    },
    {
      "epoch": 1.529722511341097,
      "grad_norm": 0.6249799132347107,
      "learning_rate": 6.986287830367086e-06,
      "loss": 0.0138,
      "step": 934740
    },
    {
      "epoch": 1.5297552417797502,
      "grad_norm": 1.3070971965789795,
      "learning_rate": 6.986221938153569e-06,
      "loss": 0.0172,
      "step": 934760
    },
    {
      "epoch": 1.5297879722184038,
      "grad_norm": 0.6643670797348022,
      "learning_rate": 6.986156045940052e-06,
      "loss": 0.0157,
      "step": 934780
    },
    {
      "epoch": 1.529820702657057,
      "grad_norm": 0.543910026550293,
      "learning_rate": 6.986090153726535e-06,
      "loss": 0.021,
      "step": 934800
    },
    {
      "epoch": 1.5298534330957103,
      "grad_norm": 0.6983727812767029,
      "learning_rate": 6.986024261513018e-06,
      "loss": 0.0243,
      "step": 934820
    },
    {
      "epoch": 1.5298861635343637,
      "grad_norm": 0.6094639897346497,
      "learning_rate": 6.9859583692995004e-06,
      "loss": 0.0152,
      "step": 934840
    },
    {
      "epoch": 1.529918893973017,
      "grad_norm": 0.19855494797229767,
      "learning_rate": 6.985892477085984e-06,
      "loss": 0.0166,
      "step": 934860
    },
    {
      "epoch": 1.5299516244116704,
      "grad_norm": 0.20594409108161926,
      "learning_rate": 6.985826584872466e-06,
      "loss": 0.0283,
      "step": 934880
    },
    {
      "epoch": 1.5299843548503236,
      "grad_norm": 0.6173515915870667,
      "learning_rate": 6.9857606926589495e-06,
      "loss": 0.0151,
      "step": 934900
    },
    {
      "epoch": 1.5300170852889772,
      "grad_norm": 0.24728983640670776,
      "learning_rate": 6.985694800445431e-06,
      "loss": 0.0208,
      "step": 934920
    },
    {
      "epoch": 1.5300498157276303,
      "grad_norm": 0.3897555470466614,
      "learning_rate": 6.985628908231915e-06,
      "loss": 0.0272,
      "step": 934940
    },
    {
      "epoch": 1.5300825461662837,
      "grad_norm": 1.2848055362701416,
      "learning_rate": 6.985563016018398e-06,
      "loss": 0.029,
      "step": 934960
    },
    {
      "epoch": 1.530115276604937,
      "grad_norm": 0.40185442566871643,
      "learning_rate": 6.9854971238048805e-06,
      "loss": 0.0243,
      "step": 934980
    },
    {
      "epoch": 1.5301480070435904,
      "grad_norm": 0.4606277346611023,
      "learning_rate": 6.985431231591363e-06,
      "loss": 0.0203,
      "step": 935000
    },
    {
      "epoch": 1.5301807374822438,
      "grad_norm": 0.33887171745300293,
      "learning_rate": 6.985365339377847e-06,
      "loss": 0.0143,
      "step": 935020
    },
    {
      "epoch": 1.530213467920897,
      "grad_norm": 0.31170859932899475,
      "learning_rate": 6.985299447164329e-06,
      "loss": 0.0213,
      "step": 935040
    },
    {
      "epoch": 1.5302461983595506,
      "grad_norm": 0.750755786895752,
      "learning_rate": 6.985233554950812e-06,
      "loss": 0.0176,
      "step": 935060
    },
    {
      "epoch": 1.5302789287982037,
      "grad_norm": 0.6210861802101135,
      "learning_rate": 6.985167662737294e-06,
      "loss": 0.0156,
      "step": 935080
    },
    {
      "epoch": 1.530311659236857,
      "grad_norm": 0.6653426289558411,
      "learning_rate": 6.985101770523778e-06,
      "loss": 0.0297,
      "step": 935100
    },
    {
      "epoch": 1.5303443896755105,
      "grad_norm": 0.11124199628829956,
      "learning_rate": 6.985035878310261e-06,
      "loss": 0.0235,
      "step": 935120
    },
    {
      "epoch": 1.5303771201141638,
      "grad_norm": 0.7750478386878967,
      "learning_rate": 6.984969986096743e-06,
      "loss": 0.0196,
      "step": 935140
    },
    {
      "epoch": 1.5304098505528172,
      "grad_norm": 0.23523728549480438,
      "learning_rate": 6.984904093883227e-06,
      "loss": 0.0229,
      "step": 935160
    },
    {
      "epoch": 1.5304425809914703,
      "grad_norm": 0.825061559677124,
      "learning_rate": 6.9848382016697096e-06,
      "loss": 0.0234,
      "step": 935180
    },
    {
      "epoch": 1.530475311430124,
      "grad_norm": 0.3884052634239197,
      "learning_rate": 6.984772309456192e-06,
      "loss": 0.0178,
      "step": 935200
    },
    {
      "epoch": 1.530508041868777,
      "grad_norm": 0.3413587212562561,
      "learning_rate": 6.984706417242675e-06,
      "loss": 0.0226,
      "step": 935220
    },
    {
      "epoch": 1.5305407723074305,
      "grad_norm": 0.27265027165412903,
      "learning_rate": 6.984640525029159e-06,
      "loss": 0.0298,
      "step": 935240
    },
    {
      "epoch": 1.5305735027460838,
      "grad_norm": 0.9557211995124817,
      "learning_rate": 6.9845746328156405e-06,
      "loss": 0.0165,
      "step": 935260
    },
    {
      "epoch": 1.530606233184737,
      "grad_norm": 0.4889124929904938,
      "learning_rate": 6.984508740602124e-06,
      "loss": 0.0217,
      "step": 935280
    },
    {
      "epoch": 1.5306389636233906,
      "grad_norm": 0.15951327979564667,
      "learning_rate": 6.984442848388606e-06,
      "loss": 0.0187,
      "step": 935300
    },
    {
      "epoch": 1.5306716940620437,
      "grad_norm": 0.26041728258132935,
      "learning_rate": 6.98437695617509e-06,
      "loss": 0.0258,
      "step": 935320
    },
    {
      "epoch": 1.5307044245006973,
      "grad_norm": 0.8022401332855225,
      "learning_rate": 6.9843110639615715e-06,
      "loss": 0.0188,
      "step": 935340
    },
    {
      "epoch": 1.5307371549393505,
      "grad_norm": 0.2836446166038513,
      "learning_rate": 6.984245171748055e-06,
      "loss": 0.0145,
      "step": 935360
    },
    {
      "epoch": 1.5307698853780038,
      "grad_norm": 0.2391132265329361,
      "learning_rate": 6.984179279534538e-06,
      "loss": 0.0221,
      "step": 935380
    },
    {
      "epoch": 1.5308026158166572,
      "grad_norm": 1.4926396608352661,
      "learning_rate": 6.9841133873210206e-06,
      "loss": 0.0238,
      "step": 935400
    },
    {
      "epoch": 1.5308353462553104,
      "grad_norm": 0.8972170352935791,
      "learning_rate": 6.984047495107503e-06,
      "loss": 0.0155,
      "step": 935420
    },
    {
      "epoch": 1.530868076693964,
      "grad_norm": 0.33127856254577637,
      "learning_rate": 6.983981602893987e-06,
      "loss": 0.0164,
      "step": 935440
    },
    {
      "epoch": 1.5309008071326171,
      "grad_norm": 0.21805642545223236,
      "learning_rate": 6.983915710680469e-06,
      "loss": 0.0232,
      "step": 935460
    },
    {
      "epoch": 1.5309335375712705,
      "grad_norm": 0.2522018849849701,
      "learning_rate": 6.983849818466952e-06,
      "loss": 0.0232,
      "step": 935480
    },
    {
      "epoch": 1.5309662680099239,
      "grad_norm": 0.09351962059736252,
      "learning_rate": 6.983783926253436e-06,
      "loss": 0.0237,
      "step": 935500
    },
    {
      "epoch": 1.5309989984485772,
      "grad_norm": 0.2276352047920227,
      "learning_rate": 6.983718034039918e-06,
      "loss": 0.013,
      "step": 935520
    },
    {
      "epoch": 1.5310317288872306,
      "grad_norm": 0.2243330180644989,
      "learning_rate": 6.983652141826401e-06,
      "loss": 0.0268,
      "step": 935540
    },
    {
      "epoch": 1.5310644593258838,
      "grad_norm": 0.8264784812927246,
      "learning_rate": 6.983586249612883e-06,
      "loss": 0.0318,
      "step": 935560
    },
    {
      "epoch": 1.5310971897645373,
      "grad_norm": 2.365816354751587,
      "learning_rate": 6.983520357399367e-06,
      "loss": 0.0238,
      "step": 935580
    },
    {
      "epoch": 1.5311299202031905,
      "grad_norm": 0.2755067050457001,
      "learning_rate": 6.98345446518585e-06,
      "loss": 0.0201,
      "step": 935600
    },
    {
      "epoch": 1.5311626506418439,
      "grad_norm": 0.3241603970527649,
      "learning_rate": 6.983388572972332e-06,
      "loss": 0.0225,
      "step": 935620
    },
    {
      "epoch": 1.5311953810804972,
      "grad_norm": 0.5123308897018433,
      "learning_rate": 6.983322680758815e-06,
      "loss": 0.0217,
      "step": 935640
    },
    {
      "epoch": 1.5312281115191506,
      "grad_norm": 1.1851787567138672,
      "learning_rate": 6.983256788545299e-06,
      "loss": 0.0212,
      "step": 935660
    },
    {
      "epoch": 1.531260841957804,
      "grad_norm": 0.6481626629829407,
      "learning_rate": 6.983190896331781e-06,
      "loss": 0.0245,
      "step": 935680
    },
    {
      "epoch": 1.5312935723964571,
      "grad_norm": 0.17333804070949554,
      "learning_rate": 6.983125004118264e-06,
      "loss": 0.0168,
      "step": 935700
    },
    {
      "epoch": 1.5313263028351107,
      "grad_norm": 0.39818763732910156,
      "learning_rate": 6.983059111904746e-06,
      "loss": 0.016,
      "step": 935720
    },
    {
      "epoch": 1.5313590332737639,
      "grad_norm": 0.4624156653881073,
      "learning_rate": 6.98299321969123e-06,
      "loss": 0.0279,
      "step": 935740
    },
    {
      "epoch": 1.5313917637124173,
      "grad_norm": 0.481614351272583,
      "learning_rate": 6.982927327477712e-06,
      "loss": 0.0192,
      "step": 935760
    },
    {
      "epoch": 1.5314244941510706,
      "grad_norm": 4.836796283721924,
      "learning_rate": 6.982861435264195e-06,
      "loss": 0.0213,
      "step": 935780
    },
    {
      "epoch": 1.531457224589724,
      "grad_norm": 0.6991821527481079,
      "learning_rate": 6.982795543050678e-06,
      "loss": 0.0246,
      "step": 935800
    },
    {
      "epoch": 1.5314899550283774,
      "grad_norm": 0.2718410789966583,
      "learning_rate": 6.9827296508371615e-06,
      "loss": 0.0212,
      "step": 935820
    },
    {
      "epoch": 1.5315226854670305,
      "grad_norm": 0.5405269265174866,
      "learning_rate": 6.982663758623644e-06,
      "loss": 0.0253,
      "step": 935840
    },
    {
      "epoch": 1.5315554159056841,
      "grad_norm": 0.16843454539775848,
      "learning_rate": 6.982597866410127e-06,
      "loss": 0.0268,
      "step": 935860
    },
    {
      "epoch": 1.5315881463443373,
      "grad_norm": 0.9685387015342712,
      "learning_rate": 6.9825319741966105e-06,
      "loss": 0.0246,
      "step": 935880
    },
    {
      "epoch": 1.5316208767829906,
      "grad_norm": 0.8274223804473877,
      "learning_rate": 6.9824660819830924e-06,
      "loss": 0.0166,
      "step": 935900
    },
    {
      "epoch": 1.531653607221644,
      "grad_norm": 0.42629796266555786,
      "learning_rate": 6.982400189769576e-06,
      "loss": 0.0187,
      "step": 935920
    },
    {
      "epoch": 1.5316863376602972,
      "grad_norm": 0.20723958313465118,
      "learning_rate": 6.982334297556058e-06,
      "loss": 0.0217,
      "step": 935940
    },
    {
      "epoch": 1.5317190680989508,
      "grad_norm": 0.40610283613204956,
      "learning_rate": 6.9822684053425415e-06,
      "loss": 0.026,
      "step": 935960
    },
    {
      "epoch": 1.531751798537604,
      "grad_norm": 3.674255847930908,
      "learning_rate": 6.982202513129024e-06,
      "loss": 0.0238,
      "step": 935980
    },
    {
      "epoch": 1.5317845289762575,
      "grad_norm": 0.5626611113548279,
      "learning_rate": 6.982136620915507e-06,
      "loss": 0.0271,
      "step": 936000
    },
    {
      "epoch": 1.5318172594149106,
      "grad_norm": 1.203948974609375,
      "learning_rate": 6.98207072870199e-06,
      "loss": 0.0182,
      "step": 936020
    },
    {
      "epoch": 1.531849989853564,
      "grad_norm": 0.4760377109050751,
      "learning_rate": 6.982004836488473e-06,
      "loss": 0.0168,
      "step": 936040
    },
    {
      "epoch": 1.5318827202922174,
      "grad_norm": 0.25825414061546326,
      "learning_rate": 6.981938944274955e-06,
      "loss": 0.0203,
      "step": 936060
    },
    {
      "epoch": 1.5319154507308705,
      "grad_norm": 0.3172778785228729,
      "learning_rate": 6.981873052061439e-06,
      "loss": 0.0154,
      "step": 936080
    },
    {
      "epoch": 1.5319481811695241,
      "grad_norm": 0.5364610552787781,
      "learning_rate": 6.981807159847921e-06,
      "loss": 0.0219,
      "step": 936100
    },
    {
      "epoch": 1.5319809116081773,
      "grad_norm": 0.8971552848815918,
      "learning_rate": 6.981741267634404e-06,
      "loss": 0.0171,
      "step": 936120
    },
    {
      "epoch": 1.5320136420468307,
      "grad_norm": 0.24654142558574677,
      "learning_rate": 6.981675375420887e-06,
      "loss": 0.0249,
      "step": 936140
    },
    {
      "epoch": 1.532046372485484,
      "grad_norm": 0.9284900426864624,
      "learning_rate": 6.98160948320737e-06,
      "loss": 0.0241,
      "step": 936160
    },
    {
      "epoch": 1.5320791029241374,
      "grad_norm": 0.9393734931945801,
      "learning_rate": 6.981543590993853e-06,
      "loss": 0.0149,
      "step": 936180
    },
    {
      "epoch": 1.5321118333627908,
      "grad_norm": 0.3401844799518585,
      "learning_rate": 6.981477698780336e-06,
      "loss": 0.0126,
      "step": 936200
    },
    {
      "epoch": 1.532144563801444,
      "grad_norm": 0.6653257012367249,
      "learning_rate": 6.981411806566819e-06,
      "loss": 0.0192,
      "step": 936220
    },
    {
      "epoch": 1.5321772942400975,
      "grad_norm": 0.3363502621650696,
      "learning_rate": 6.9813459143533015e-06,
      "loss": 0.0167,
      "step": 936240
    },
    {
      "epoch": 1.5322100246787507,
      "grad_norm": 1.346622109413147,
      "learning_rate": 6.981280022139785e-06,
      "loss": 0.019,
      "step": 936260
    },
    {
      "epoch": 1.532242755117404,
      "grad_norm": 1.7717819213867188,
      "learning_rate": 6.981214129926267e-06,
      "loss": 0.0264,
      "step": 936280
    },
    {
      "epoch": 1.5322754855560574,
      "grad_norm": 1.8527518510818481,
      "learning_rate": 6.981148237712751e-06,
      "loss": 0.0154,
      "step": 936300
    },
    {
      "epoch": 1.5323082159947108,
      "grad_norm": 0.10710480809211731,
      "learning_rate": 6.9810823454992325e-06,
      "loss": 0.0286,
      "step": 936320
    },
    {
      "epoch": 1.5323409464333642,
      "grad_norm": 0.7752248644828796,
      "learning_rate": 6.981016453285716e-06,
      "loss": 0.0151,
      "step": 936340
    },
    {
      "epoch": 1.5323736768720173,
      "grad_norm": 0.6845085024833679,
      "learning_rate": 6.980950561072198e-06,
      "loss": 0.0211,
      "step": 936360
    },
    {
      "epoch": 1.532406407310671,
      "grad_norm": 0.2918369174003601,
      "learning_rate": 6.9808846688586816e-06,
      "loss": 0.0227,
      "step": 936380
    },
    {
      "epoch": 1.532439137749324,
      "grad_norm": 0.8144981861114502,
      "learning_rate": 6.980818776645164e-06,
      "loss": 0.0153,
      "step": 936400
    },
    {
      "epoch": 1.5324718681879774,
      "grad_norm": 0.12973982095718384,
      "learning_rate": 6.980752884431647e-06,
      "loss": 0.0195,
      "step": 936420
    },
    {
      "epoch": 1.5325045986266308,
      "grad_norm": 0.7581329345703125,
      "learning_rate": 6.98068699221813e-06,
      "loss": 0.029,
      "step": 936440
    },
    {
      "epoch": 1.5325373290652842,
      "grad_norm": 1.9852352142333984,
      "learning_rate": 6.980621100004613e-06,
      "loss": 0.0254,
      "step": 936460
    },
    {
      "epoch": 1.5325700595039375,
      "grad_norm": 0.5163992047309875,
      "learning_rate": 6.980555207791095e-06,
      "loss": 0.0265,
      "step": 936480
    },
    {
      "epoch": 1.5326027899425907,
      "grad_norm": 0.41710981726646423,
      "learning_rate": 6.980489315577579e-06,
      "loss": 0.0175,
      "step": 936500
    },
    {
      "epoch": 1.5326355203812443,
      "grad_norm": 0.7191121578216553,
      "learning_rate": 6.980423423364061e-06,
      "loss": 0.0258,
      "step": 936520
    },
    {
      "epoch": 1.5326682508198974,
      "grad_norm": 0.6307080984115601,
      "learning_rate": 6.980357531150544e-06,
      "loss": 0.0242,
      "step": 936540
    },
    {
      "epoch": 1.5327009812585508,
      "grad_norm": 0.4726736545562744,
      "learning_rate": 6.980291638937028e-06,
      "loss": 0.0158,
      "step": 936560
    },
    {
      "epoch": 1.5327337116972042,
      "grad_norm": 0.7547915577888489,
      "learning_rate": 6.98022574672351e-06,
      "loss": 0.0165,
      "step": 936580
    },
    {
      "epoch": 1.5327664421358576,
      "grad_norm": 0.7148471474647522,
      "learning_rate": 6.980159854509993e-06,
      "loss": 0.0182,
      "step": 936600
    },
    {
      "epoch": 1.532799172574511,
      "grad_norm": 0.2033299207687378,
      "learning_rate": 6.980093962296476e-06,
      "loss": 0.0237,
      "step": 936620
    },
    {
      "epoch": 1.532831903013164,
      "grad_norm": 0.6475666761398315,
      "learning_rate": 6.980028070082959e-06,
      "loss": 0.0156,
      "step": 936640
    },
    {
      "epoch": 1.5328646334518177,
      "grad_norm": 0.28345349431037903,
      "learning_rate": 6.979962177869442e-06,
      "loss": 0.0231,
      "step": 936660
    },
    {
      "epoch": 1.5328973638904708,
      "grad_norm": 0.22640319168567657,
      "learning_rate": 6.979896285655925e-06,
      "loss": 0.0209,
      "step": 936680
    },
    {
      "epoch": 1.5329300943291242,
      "grad_norm": 0.717054545879364,
      "learning_rate": 6.979830393442407e-06,
      "loss": 0.0167,
      "step": 936700
    },
    {
      "epoch": 1.5329628247677776,
      "grad_norm": 0.11427100002765656,
      "learning_rate": 6.979764501228891e-06,
      "loss": 0.0231,
      "step": 936720
    },
    {
      "epoch": 1.5329955552064307,
      "grad_norm": 0.965560257434845,
      "learning_rate": 6.979698609015373e-06,
      "loss": 0.0201,
      "step": 936740
    },
    {
      "epoch": 1.5330282856450843,
      "grad_norm": 0.3271660804748535,
      "learning_rate": 6.979632716801856e-06,
      "loss": 0.0171,
      "step": 936760
    },
    {
      "epoch": 1.5330610160837375,
      "grad_norm": 1.1404691934585571,
      "learning_rate": 6.979566824588339e-06,
      "loss": 0.0181,
      "step": 936780
    },
    {
      "epoch": 1.533093746522391,
      "grad_norm": 0.48422420024871826,
      "learning_rate": 6.979500932374822e-06,
      "loss": 0.0162,
      "step": 936800
    },
    {
      "epoch": 1.5331264769610442,
      "grad_norm": 0.1723162829875946,
      "learning_rate": 6.979435040161304e-06,
      "loss": 0.0133,
      "step": 936820
    },
    {
      "epoch": 1.5331592073996976,
      "grad_norm": 0.9103702306747437,
      "learning_rate": 6.979369147947788e-06,
      "loss": 0.0207,
      "step": 936840
    },
    {
      "epoch": 1.533191937838351,
      "grad_norm": 0.2810254395008087,
      "learning_rate": 6.97930325573427e-06,
      "loss": 0.0161,
      "step": 936860
    },
    {
      "epoch": 1.533224668277004,
      "grad_norm": 0.982943594455719,
      "learning_rate": 6.9792373635207535e-06,
      "loss": 0.0234,
      "step": 936880
    },
    {
      "epoch": 1.5332573987156577,
      "grad_norm": 0.3416989743709564,
      "learning_rate": 6.979171471307237e-06,
      "loss": 0.0243,
      "step": 936900
    },
    {
      "epoch": 1.5332901291543108,
      "grad_norm": 0.36948162317276,
      "learning_rate": 6.979105579093719e-06,
      "loss": 0.0204,
      "step": 936920
    },
    {
      "epoch": 1.5333228595929642,
      "grad_norm": 0.5896590352058411,
      "learning_rate": 6.9790396868802025e-06,
      "loss": 0.0157,
      "step": 936940
    },
    {
      "epoch": 1.5333555900316176,
      "grad_norm": 0.4954056739807129,
      "learning_rate": 6.978973794666684e-06,
      "loss": 0.0185,
      "step": 936960
    },
    {
      "epoch": 1.533388320470271,
      "grad_norm": 0.46182870864868164,
      "learning_rate": 6.978907902453168e-06,
      "loss": 0.0178,
      "step": 936980
    },
    {
      "epoch": 1.5334210509089243,
      "grad_norm": 0.13759997487068176,
      "learning_rate": 6.978842010239651e-06,
      "loss": 0.0159,
      "step": 937000
    },
    {
      "epoch": 1.5334537813475775,
      "grad_norm": 0.4296887516975403,
      "learning_rate": 6.9787761180261335e-06,
      "loss": 0.0133,
      "step": 937020
    },
    {
      "epoch": 1.533486511786231,
      "grad_norm": 0.915406346321106,
      "learning_rate": 6.978710225812616e-06,
      "loss": 0.0228,
      "step": 937040
    },
    {
      "epoch": 1.5335192422248842,
      "grad_norm": 1.896858811378479,
      "learning_rate": 6.9786443335991e-06,
      "loss": 0.0245,
      "step": 937060
    },
    {
      "epoch": 1.5335519726635376,
      "grad_norm": 0.33287978172302246,
      "learning_rate": 6.978578441385582e-06,
      "loss": 0.028,
      "step": 937080
    },
    {
      "epoch": 1.533584703102191,
      "grad_norm": 0.64335697889328,
      "learning_rate": 6.978512549172065e-06,
      "loss": 0.0201,
      "step": 937100
    },
    {
      "epoch": 1.5336174335408443,
      "grad_norm": 0.6253990530967712,
      "learning_rate": 6.978446656958547e-06,
      "loss": 0.0223,
      "step": 937120
    },
    {
      "epoch": 1.5336501639794977,
      "grad_norm": 0.4244332015514374,
      "learning_rate": 6.978380764745031e-06,
      "loss": 0.0177,
      "step": 937140
    },
    {
      "epoch": 1.5336828944181509,
      "grad_norm": 0.2402099221944809,
      "learning_rate": 6.978314872531513e-06,
      "loss": 0.0203,
      "step": 937160
    },
    {
      "epoch": 1.5337156248568045,
      "grad_norm": 0.7560787200927734,
      "learning_rate": 6.978248980317996e-06,
      "loss": 0.0151,
      "step": 937180
    },
    {
      "epoch": 1.5337483552954576,
      "grad_norm": 0.14015987515449524,
      "learning_rate": 6.978183088104479e-06,
      "loss": 0.025,
      "step": 937200
    },
    {
      "epoch": 1.533781085734111,
      "grad_norm": 1.3274493217468262,
      "learning_rate": 6.978117195890962e-06,
      "loss": 0.0279,
      "step": 937220
    },
    {
      "epoch": 1.5338138161727644,
      "grad_norm": 1.3821709156036377,
      "learning_rate": 6.978051303677445e-06,
      "loss": 0.042,
      "step": 937240
    },
    {
      "epoch": 1.5338465466114177,
      "grad_norm": 0.27183493971824646,
      "learning_rate": 6.977985411463928e-06,
      "loss": 0.0286,
      "step": 937260
    },
    {
      "epoch": 1.533879277050071,
      "grad_norm": 0.5437090396881104,
      "learning_rate": 6.977919519250412e-06,
      "loss": 0.0271,
      "step": 937280
    },
    {
      "epoch": 1.5339120074887242,
      "grad_norm": 1.5190497636795044,
      "learning_rate": 6.9778536270368935e-06,
      "loss": 0.021,
      "step": 937300
    },
    {
      "epoch": 1.5339447379273778,
      "grad_norm": 0.3193928003311157,
      "learning_rate": 6.977787734823377e-06,
      "loss": 0.0218,
      "step": 937320
    },
    {
      "epoch": 1.533977468366031,
      "grad_norm": 0.2131112664937973,
      "learning_rate": 6.977721842609859e-06,
      "loss": 0.0116,
      "step": 937340
    },
    {
      "epoch": 1.5340101988046844,
      "grad_norm": 0.24919795989990234,
      "learning_rate": 6.977655950396343e-06,
      "loss": 0.0193,
      "step": 937360
    },
    {
      "epoch": 1.5340429292433377,
      "grad_norm": 0.251912921667099,
      "learning_rate": 6.9775900581828245e-06,
      "loss": 0.0206,
      "step": 937380
    },
    {
      "epoch": 1.534075659681991,
      "grad_norm": 0.6522563695907593,
      "learning_rate": 6.977524165969308e-06,
      "loss": 0.0162,
      "step": 937400
    },
    {
      "epoch": 1.5341083901206445,
      "grad_norm": 0.2588318884372711,
      "learning_rate": 6.977458273755791e-06,
      "loss": 0.0146,
      "step": 937420
    },
    {
      "epoch": 1.5341411205592976,
      "grad_norm": 0.12042155116796494,
      "learning_rate": 6.9773923815422736e-06,
      "loss": 0.0164,
      "step": 937440
    },
    {
      "epoch": 1.5341738509979512,
      "grad_norm": 0.2238435447216034,
      "learning_rate": 6.977326489328756e-06,
      "loss": 0.0243,
      "step": 937460
    },
    {
      "epoch": 1.5342065814366044,
      "grad_norm": 0.2909625172615051,
      "learning_rate": 6.97726059711524e-06,
      "loss": 0.0278,
      "step": 937480
    },
    {
      "epoch": 1.5342393118752577,
      "grad_norm": 0.6693602800369263,
      "learning_rate": 6.977194704901722e-06,
      "loss": 0.0253,
      "step": 937500
    },
    {
      "epoch": 1.5342720423139111,
      "grad_norm": 0.858372151851654,
      "learning_rate": 6.977128812688205e-06,
      "loss": 0.0157,
      "step": 937520
    },
    {
      "epoch": 1.5343047727525643,
      "grad_norm": 1.1107397079467773,
      "learning_rate": 6.977062920474687e-06,
      "loss": 0.0204,
      "step": 937540
    },
    {
      "epoch": 1.5343375031912179,
      "grad_norm": 0.590937614440918,
      "learning_rate": 6.976997028261171e-06,
      "loss": 0.0322,
      "step": 937560
    },
    {
      "epoch": 1.534370233629871,
      "grad_norm": 1.3587204217910767,
      "learning_rate": 6.9769311360476544e-06,
      "loss": 0.0251,
      "step": 937580
    },
    {
      "epoch": 1.5344029640685246,
      "grad_norm": 0.4638555347919464,
      "learning_rate": 6.976865243834136e-06,
      "loss": 0.0263,
      "step": 937600
    },
    {
      "epoch": 1.5344356945071778,
      "grad_norm": 0.6331709027290344,
      "learning_rate": 6.97679935162062e-06,
      "loss": 0.017,
      "step": 937620
    },
    {
      "epoch": 1.5344684249458311,
      "grad_norm": 0.778134286403656,
      "learning_rate": 6.976733459407103e-06,
      "loss": 0.0188,
      "step": 937640
    },
    {
      "epoch": 1.5345011553844845,
      "grad_norm": 0.4051312506198883,
      "learning_rate": 6.976667567193585e-06,
      "loss": 0.021,
      "step": 937660
    },
    {
      "epoch": 1.5345338858231377,
      "grad_norm": 1.0571534633636475,
      "learning_rate": 6.976601674980068e-06,
      "loss": 0.0173,
      "step": 937680
    },
    {
      "epoch": 1.5345666162617912,
      "grad_norm": 0.5175085663795471,
      "learning_rate": 6.976535782766552e-06,
      "loss": 0.023,
      "step": 937700
    },
    {
      "epoch": 1.5345993467004444,
      "grad_norm": 0.33660057187080383,
      "learning_rate": 6.976469890553034e-06,
      "loss": 0.0184,
      "step": 937720
    },
    {
      "epoch": 1.5346320771390978,
      "grad_norm": 0.26670780777931213,
      "learning_rate": 6.976403998339517e-06,
      "loss": 0.0192,
      "step": 937740
    },
    {
      "epoch": 1.5346648075777511,
      "grad_norm": 0.920350968837738,
      "learning_rate": 6.976338106125999e-06,
      "loss": 0.0153,
      "step": 937760
    },
    {
      "epoch": 1.5346975380164045,
      "grad_norm": 0.34865522384643555,
      "learning_rate": 6.976272213912483e-06,
      "loss": 0.0262,
      "step": 937780
    },
    {
      "epoch": 1.5347302684550579,
      "grad_norm": 0.22309540212154388,
      "learning_rate": 6.976206321698965e-06,
      "loss": 0.026,
      "step": 937800
    },
    {
      "epoch": 1.534762998893711,
      "grad_norm": 0.9946044087409973,
      "learning_rate": 6.976140429485448e-06,
      "loss": 0.0203,
      "step": 937820
    },
    {
      "epoch": 1.5347957293323646,
      "grad_norm": 0.4236231744289398,
      "learning_rate": 6.976074537271931e-06,
      "loss": 0.0197,
      "step": 937840
    },
    {
      "epoch": 1.5348284597710178,
      "grad_norm": 0.97065669298172,
      "learning_rate": 6.9760086450584145e-06,
      "loss": 0.0131,
      "step": 937860
    },
    {
      "epoch": 1.5348611902096712,
      "grad_norm": 0.4656757116317749,
      "learning_rate": 6.975942752844896e-06,
      "loss": 0.0227,
      "step": 937880
    },
    {
      "epoch": 1.5348939206483245,
      "grad_norm": 0.24554893374443054,
      "learning_rate": 6.97587686063138e-06,
      "loss": 0.0227,
      "step": 937900
    },
    {
      "epoch": 1.534926651086978,
      "grad_norm": 2.006523847579956,
      "learning_rate": 6.975810968417862e-06,
      "loss": 0.0305,
      "step": 937920
    },
    {
      "epoch": 1.5349593815256313,
      "grad_norm": 2.3937244415283203,
      "learning_rate": 6.9757450762043454e-06,
      "loss": 0.0173,
      "step": 937940
    },
    {
      "epoch": 1.5349921119642844,
      "grad_norm": 0.09567396342754364,
      "learning_rate": 6.975679183990829e-06,
      "loss": 0.0202,
      "step": 937960
    },
    {
      "epoch": 1.535024842402938,
      "grad_norm": 0.4829021394252777,
      "learning_rate": 6.975613291777311e-06,
      "loss": 0.0208,
      "step": 937980
    },
    {
      "epoch": 1.5350575728415912,
      "grad_norm": 1.514971375465393,
      "learning_rate": 6.9755473995637945e-06,
      "loss": 0.0254,
      "step": 938000
    },
    {
      "epoch": 1.5350903032802445,
      "grad_norm": 0.49392393231391907,
      "learning_rate": 6.975481507350277e-06,
      "loss": 0.0187,
      "step": 938020
    },
    {
      "epoch": 1.535123033718898,
      "grad_norm": 0.37514394521713257,
      "learning_rate": 6.97541561513676e-06,
      "loss": 0.0162,
      "step": 938040
    },
    {
      "epoch": 1.5351557641575513,
      "grad_norm": 0.34148308634757996,
      "learning_rate": 6.975349722923243e-06,
      "loss": 0.019,
      "step": 938060
    },
    {
      "epoch": 1.5351884945962047,
      "grad_norm": 0.344780296087265,
      "learning_rate": 6.975283830709726e-06,
      "loss": 0.0269,
      "step": 938080
    },
    {
      "epoch": 1.5352212250348578,
      "grad_norm": 0.9432833194732666,
      "learning_rate": 6.975217938496208e-06,
      "loss": 0.025,
      "step": 938100
    },
    {
      "epoch": 1.5352539554735114,
      "grad_norm": 0.33024054765701294,
      "learning_rate": 6.975152046282692e-06,
      "loss": 0.021,
      "step": 938120
    },
    {
      "epoch": 1.5352866859121646,
      "grad_norm": 0.29558122158050537,
      "learning_rate": 6.975086154069174e-06,
      "loss": 0.0233,
      "step": 938140
    },
    {
      "epoch": 1.535319416350818,
      "grad_norm": 0.3353346586227417,
      "learning_rate": 6.975020261855657e-06,
      "loss": 0.0225,
      "step": 938160
    },
    {
      "epoch": 1.5353521467894713,
      "grad_norm": 0.3607514798641205,
      "learning_rate": 6.974954369642139e-06,
      "loss": 0.0171,
      "step": 938180
    },
    {
      "epoch": 1.5353848772281244,
      "grad_norm": 0.6819696426391602,
      "learning_rate": 6.974888477428623e-06,
      "loss": 0.0169,
      "step": 938200
    },
    {
      "epoch": 1.535417607666778,
      "grad_norm": 0.8093701601028442,
      "learning_rate": 6.9748225852151055e-06,
      "loss": 0.0198,
      "step": 938220
    },
    {
      "epoch": 1.5354503381054312,
      "grad_norm": 0.23600879311561584,
      "learning_rate": 6.974756693001588e-06,
      "loss": 0.0164,
      "step": 938240
    },
    {
      "epoch": 1.5354830685440848,
      "grad_norm": 0.6552351713180542,
      "learning_rate": 6.974690800788071e-06,
      "loss": 0.026,
      "step": 938260
    },
    {
      "epoch": 1.535515798982738,
      "grad_norm": 0.9279346466064453,
      "learning_rate": 6.9746249085745546e-06,
      "loss": 0.0209,
      "step": 938280
    },
    {
      "epoch": 1.5355485294213913,
      "grad_norm": 3.9128589630126953,
      "learning_rate": 6.974559016361037e-06,
      "loss": 0.0147,
      "step": 938300
    },
    {
      "epoch": 1.5355812598600447,
      "grad_norm": 0.7092241644859314,
      "learning_rate": 6.97449312414752e-06,
      "loss": 0.016,
      "step": 938320
    },
    {
      "epoch": 1.5356139902986978,
      "grad_norm": 0.28155168890953064,
      "learning_rate": 6.974427231934004e-06,
      "loss": 0.0224,
      "step": 938340
    },
    {
      "epoch": 1.5356467207373514,
      "grad_norm": 0.2631407678127289,
      "learning_rate": 6.9743613397204855e-06,
      "loss": 0.0258,
      "step": 938360
    },
    {
      "epoch": 1.5356794511760046,
      "grad_norm": 0.18675169348716736,
      "learning_rate": 6.974295447506969e-06,
      "loss": 0.0238,
      "step": 938380
    },
    {
      "epoch": 1.535712181614658,
      "grad_norm": 0.47375550866127014,
      "learning_rate": 6.974229555293451e-06,
      "loss": 0.022,
      "step": 938400
    },
    {
      "epoch": 1.5357449120533113,
      "grad_norm": 0.45167678594589233,
      "learning_rate": 6.974163663079935e-06,
      "loss": 0.0182,
      "step": 938420
    },
    {
      "epoch": 1.5357776424919647,
      "grad_norm": 0.7551408410072327,
      "learning_rate": 6.974097770866417e-06,
      "loss": 0.0198,
      "step": 938440
    },
    {
      "epoch": 1.535810372930618,
      "grad_norm": 0.2702515423297882,
      "learning_rate": 6.9740318786529e-06,
      "loss": 0.0318,
      "step": 938460
    },
    {
      "epoch": 1.5358431033692712,
      "grad_norm": 0.813685953617096,
      "learning_rate": 6.973965986439383e-06,
      "loss": 0.0293,
      "step": 938480
    },
    {
      "epoch": 1.5358758338079248,
      "grad_norm": 0.27144744992256165,
      "learning_rate": 6.973900094225866e-06,
      "loss": 0.0145,
      "step": 938500
    },
    {
      "epoch": 1.535908564246578,
      "grad_norm": 0.6584153175354004,
      "learning_rate": 6.973834202012348e-06,
      "loss": 0.02,
      "step": 938520
    },
    {
      "epoch": 1.5359412946852313,
      "grad_norm": 0.6386563777923584,
      "learning_rate": 6.973768309798832e-06,
      "loss": 0.0147,
      "step": 938540
    },
    {
      "epoch": 1.5359740251238847,
      "grad_norm": 0.3491738438606262,
      "learning_rate": 6.973702417585314e-06,
      "loss": 0.0185,
      "step": 938560
    },
    {
      "epoch": 1.536006755562538,
      "grad_norm": 1.2258092164993286,
      "learning_rate": 6.973636525371797e-06,
      "loss": 0.0237,
      "step": 938580
    },
    {
      "epoch": 1.5360394860011914,
      "grad_norm": 0.5053204894065857,
      "learning_rate": 6.97357063315828e-06,
      "loss": 0.0173,
      "step": 938600
    },
    {
      "epoch": 1.5360722164398446,
      "grad_norm": 0.9729120135307312,
      "learning_rate": 6.973504740944763e-06,
      "loss": 0.0274,
      "step": 938620
    },
    {
      "epoch": 1.5361049468784982,
      "grad_norm": 0.9319851398468018,
      "learning_rate": 6.973438848731246e-06,
      "loss": 0.0168,
      "step": 938640
    },
    {
      "epoch": 1.5361376773171513,
      "grad_norm": 0.15100060403347015,
      "learning_rate": 6.973372956517729e-06,
      "loss": 0.019,
      "step": 938660
    },
    {
      "epoch": 1.5361704077558047,
      "grad_norm": 0.5334511995315552,
      "learning_rate": 6.973307064304212e-06,
      "loss": 0.0189,
      "step": 938680
    },
    {
      "epoch": 1.536203138194458,
      "grad_norm": 0.3596363663673401,
      "learning_rate": 6.973241172090695e-06,
      "loss": 0.0196,
      "step": 938700
    },
    {
      "epoch": 1.5362358686331115,
      "grad_norm": 0.6775312423706055,
      "learning_rate": 6.973175279877178e-06,
      "loss": 0.021,
      "step": 938720
    },
    {
      "epoch": 1.5362685990717648,
      "grad_norm": 0.25191277265548706,
      "learning_rate": 6.97310938766366e-06,
      "loss": 0.0153,
      "step": 938740
    },
    {
      "epoch": 1.536301329510418,
      "grad_norm": 1.5070923566818237,
      "learning_rate": 6.973043495450144e-06,
      "loss": 0.0222,
      "step": 938760
    },
    {
      "epoch": 1.5363340599490716,
      "grad_norm": 0.34562721848487854,
      "learning_rate": 6.972977603236626e-06,
      "loss": 0.0247,
      "step": 938780
    },
    {
      "epoch": 1.5363667903877247,
      "grad_norm": 0.24107949435710907,
      "learning_rate": 6.972911711023109e-06,
      "loss": 0.0141,
      "step": 938800
    },
    {
      "epoch": 1.536399520826378,
      "grad_norm": 0.6143582463264465,
      "learning_rate": 6.972845818809592e-06,
      "loss": 0.016,
      "step": 938820
    },
    {
      "epoch": 1.5364322512650315,
      "grad_norm": 0.5621163845062256,
      "learning_rate": 6.972779926596075e-06,
      "loss": 0.0217,
      "step": 938840
    },
    {
      "epoch": 1.5364649817036848,
      "grad_norm": 0.6108573079109192,
      "learning_rate": 6.972714034382557e-06,
      "loss": 0.0279,
      "step": 938860
    },
    {
      "epoch": 1.5364977121423382,
      "grad_norm": 3.6883981227874756,
      "learning_rate": 6.972648142169041e-06,
      "loss": 0.0207,
      "step": 938880
    },
    {
      "epoch": 1.5365304425809914,
      "grad_norm": 0.3739604651927948,
      "learning_rate": 6.972582249955523e-06,
      "loss": 0.0215,
      "step": 938900
    },
    {
      "epoch": 1.536563173019645,
      "grad_norm": 0.6106355786323547,
      "learning_rate": 6.9725163577420065e-06,
      "loss": 0.018,
      "step": 938920
    },
    {
      "epoch": 1.536595903458298,
      "grad_norm": 0.6614173650741577,
      "learning_rate": 6.972450465528488e-06,
      "loss": 0.028,
      "step": 938940
    },
    {
      "epoch": 1.5366286338969515,
      "grad_norm": 0.9341703057289124,
      "learning_rate": 6.972384573314972e-06,
      "loss": 0.0274,
      "step": 938960
    },
    {
      "epoch": 1.5366613643356049,
      "grad_norm": 0.6437472105026245,
      "learning_rate": 6.972318681101455e-06,
      "loss": 0.0178,
      "step": 938980
    },
    {
      "epoch": 1.536694094774258,
      "grad_norm": 0.14555881917476654,
      "learning_rate": 6.972252788887937e-06,
      "loss": 0.0205,
      "step": 939000
    },
    {
      "epoch": 1.5367268252129116,
      "grad_norm": 0.32501137256622314,
      "learning_rate": 6.972186896674421e-06,
      "loss": 0.0179,
      "step": 939020
    },
    {
      "epoch": 1.5367595556515647,
      "grad_norm": 0.07567062228918076,
      "learning_rate": 6.972121004460904e-06,
      "loss": 0.0126,
      "step": 939040
    },
    {
      "epoch": 1.5367922860902183,
      "grad_norm": 0.5626848340034485,
      "learning_rate": 6.9720551122473865e-06,
      "loss": 0.0212,
      "step": 939060
    },
    {
      "epoch": 1.5368250165288715,
      "grad_norm": 0.4415043294429779,
      "learning_rate": 6.971989220033869e-06,
      "loss": 0.0217,
      "step": 939080
    },
    {
      "epoch": 1.5368577469675249,
      "grad_norm": 0.8178437948226929,
      "learning_rate": 6.971923327820353e-06,
      "loss": 0.035,
      "step": 939100
    },
    {
      "epoch": 1.5368904774061782,
      "grad_norm": 0.6380682587623596,
      "learning_rate": 6.971857435606835e-06,
      "loss": 0.0201,
      "step": 939120
    },
    {
      "epoch": 1.5369232078448314,
      "grad_norm": 0.4099632501602173,
      "learning_rate": 6.971791543393318e-06,
      "loss": 0.0164,
      "step": 939140
    },
    {
      "epoch": 1.536955938283485,
      "grad_norm": 0.935050904750824,
      "learning_rate": 6.9717256511798e-06,
      "loss": 0.022,
      "step": 939160
    },
    {
      "epoch": 1.5369886687221381,
      "grad_norm": 0.7831053733825684,
      "learning_rate": 6.971659758966284e-06,
      "loss": 0.017,
      "step": 939180
    },
    {
      "epoch": 1.5370213991607915,
      "grad_norm": 0.6644300222396851,
      "learning_rate": 6.971593866752766e-06,
      "loss": 0.0287,
      "step": 939200
    },
    {
      "epoch": 1.5370541295994449,
      "grad_norm": 0.3707895278930664,
      "learning_rate": 6.971527974539249e-06,
      "loss": 0.0215,
      "step": 939220
    },
    {
      "epoch": 1.5370868600380982,
      "grad_norm": 3.3642234802246094,
      "learning_rate": 6.971462082325732e-06,
      "loss": 0.0292,
      "step": 939240
    },
    {
      "epoch": 1.5371195904767516,
      "grad_norm": 0.18110254406929016,
      "learning_rate": 6.971396190112215e-06,
      "loss": 0.0178,
      "step": 939260
    },
    {
      "epoch": 1.5371523209154048,
      "grad_norm": 0.2675996422767639,
      "learning_rate": 6.9713302978986975e-06,
      "loss": 0.0219,
      "step": 939280
    },
    {
      "epoch": 1.5371850513540584,
      "grad_norm": 0.5152275562286377,
      "learning_rate": 6.971264405685181e-06,
      "loss": 0.0315,
      "step": 939300
    },
    {
      "epoch": 1.5372177817927115,
      "grad_norm": 0.34002694487571716,
      "learning_rate": 6.971198513471663e-06,
      "loss": 0.0202,
      "step": 939320
    },
    {
      "epoch": 1.5372505122313649,
      "grad_norm": 0.6222602725028992,
      "learning_rate": 6.9711326212581465e-06,
      "loss": 0.0222,
      "step": 939340
    },
    {
      "epoch": 1.5372832426700183,
      "grad_norm": 0.20070937275886536,
      "learning_rate": 6.97106672904463e-06,
      "loss": 0.0165,
      "step": 939360
    },
    {
      "epoch": 1.5373159731086716,
      "grad_norm": 0.12445571273565292,
      "learning_rate": 6.971000836831112e-06,
      "loss": 0.023,
      "step": 939380
    },
    {
      "epoch": 1.537348703547325,
      "grad_norm": 0.49747055768966675,
      "learning_rate": 6.970934944617596e-06,
      "loss": 0.0164,
      "step": 939400
    },
    {
      "epoch": 1.5373814339859782,
      "grad_norm": 0.4381023049354553,
      "learning_rate": 6.9708690524040775e-06,
      "loss": 0.0163,
      "step": 939420
    },
    {
      "epoch": 1.5374141644246317,
      "grad_norm": 1.014617919921875,
      "learning_rate": 6.970803160190561e-06,
      "loss": 0.0146,
      "step": 939440
    },
    {
      "epoch": 1.537446894863285,
      "grad_norm": 0.5424111485481262,
      "learning_rate": 6.970737267977044e-06,
      "loss": 0.0251,
      "step": 939460
    },
    {
      "epoch": 1.5374796253019383,
      "grad_norm": 1.4792550802230835,
      "learning_rate": 6.9706713757635266e-06,
      "loss": 0.0244,
      "step": 939480
    },
    {
      "epoch": 1.5375123557405916,
      "grad_norm": 1.248520851135254,
      "learning_rate": 6.970605483550009e-06,
      "loss": 0.019,
      "step": 939500
    },
    {
      "epoch": 1.537545086179245,
      "grad_norm": 0.6359220743179321,
      "learning_rate": 6.970539591336493e-06,
      "loss": 0.022,
      "step": 939520
    },
    {
      "epoch": 1.5375778166178984,
      "grad_norm": 0.3514259159564972,
      "learning_rate": 6.970473699122975e-06,
      "loss": 0.0186,
      "step": 939540
    },
    {
      "epoch": 1.5376105470565515,
      "grad_norm": 0.7714684009552002,
      "learning_rate": 6.970407806909458e-06,
      "loss": 0.0199,
      "step": 939560
    },
    {
      "epoch": 1.5376432774952051,
      "grad_norm": 0.39713606238365173,
      "learning_rate": 6.97034191469594e-06,
      "loss": 0.0157,
      "step": 939580
    },
    {
      "epoch": 1.5376760079338583,
      "grad_norm": 0.7288076877593994,
      "learning_rate": 6.970276022482424e-06,
      "loss": 0.0231,
      "step": 939600
    },
    {
      "epoch": 1.5377087383725117,
      "grad_norm": 0.8488507270812988,
      "learning_rate": 6.970210130268907e-06,
      "loss": 0.0191,
      "step": 939620
    },
    {
      "epoch": 1.537741468811165,
      "grad_norm": 1.7772339582443237,
      "learning_rate": 6.970144238055389e-06,
      "loss": 0.022,
      "step": 939640
    },
    {
      "epoch": 1.5377741992498184,
      "grad_norm": 0.2092619687318802,
      "learning_rate": 6.970078345841872e-06,
      "loss": 0.0173,
      "step": 939660
    },
    {
      "epoch": 1.5378069296884718,
      "grad_norm": 0.26567843556404114,
      "learning_rate": 6.970012453628356e-06,
      "loss": 0.0141,
      "step": 939680
    },
    {
      "epoch": 1.537839660127125,
      "grad_norm": 0.26844099164009094,
      "learning_rate": 6.969946561414838e-06,
      "loss": 0.0224,
      "step": 939700
    },
    {
      "epoch": 1.5378723905657785,
      "grad_norm": 0.24500998854637146,
      "learning_rate": 6.969880669201321e-06,
      "loss": 0.0235,
      "step": 939720
    },
    {
      "epoch": 1.5379051210044317,
      "grad_norm": 0.3228926658630371,
      "learning_rate": 6.969814776987805e-06,
      "loss": 0.02,
      "step": 939740
    },
    {
      "epoch": 1.537937851443085,
      "grad_norm": 0.46393707394599915,
      "learning_rate": 6.969748884774287e-06,
      "loss": 0.0157,
      "step": 939760
    },
    {
      "epoch": 1.5379705818817384,
      "grad_norm": 1.2863560914993286,
      "learning_rate": 6.96968299256077e-06,
      "loss": 0.0161,
      "step": 939780
    },
    {
      "epoch": 1.5380033123203916,
      "grad_norm": 0.08338150382041931,
      "learning_rate": 6.969617100347252e-06,
      "loss": 0.015,
      "step": 939800
    },
    {
      "epoch": 1.5380360427590452,
      "grad_norm": 0.258456826210022,
      "learning_rate": 6.969551208133736e-06,
      "loss": 0.0122,
      "step": 939820
    },
    {
      "epoch": 1.5380687731976983,
      "grad_norm": 0.8357143998146057,
      "learning_rate": 6.969485315920218e-06,
      "loss": 0.0182,
      "step": 939840
    },
    {
      "epoch": 1.538101503636352,
      "grad_norm": 1.015845537185669,
      "learning_rate": 6.969419423706701e-06,
      "loss": 0.0185,
      "step": 939860
    },
    {
      "epoch": 1.538134234075005,
      "grad_norm": 1.7776139974594116,
      "learning_rate": 6.969353531493184e-06,
      "loss": 0.0157,
      "step": 939880
    },
    {
      "epoch": 1.5381669645136584,
      "grad_norm": 0.21981607377529144,
      "learning_rate": 6.9692876392796675e-06,
      "loss": 0.0152,
      "step": 939900
    },
    {
      "epoch": 1.5381996949523118,
      "grad_norm": 0.40568238496780396,
      "learning_rate": 6.969221747066149e-06,
      "loss": 0.0174,
      "step": 939920
    },
    {
      "epoch": 1.538232425390965,
      "grad_norm": 0.3250153362751007,
      "learning_rate": 6.969155854852633e-06,
      "loss": 0.0192,
      "step": 939940
    },
    {
      "epoch": 1.5382651558296185,
      "grad_norm": 0.2607867121696472,
      "learning_rate": 6.969089962639115e-06,
      "loss": 0.0201,
      "step": 939960
    },
    {
      "epoch": 1.5382978862682717,
      "grad_norm": 1.542887568473816,
      "learning_rate": 6.9690240704255984e-06,
      "loss": 0.025,
      "step": 939980
    },
    {
      "epoch": 1.538330616706925,
      "grad_norm": 0.1797248274087906,
      "learning_rate": 6.96895817821208e-06,
      "loss": 0.0186,
      "step": 940000
    },
    {
      "epoch": 1.5383633471455784,
      "grad_norm": 0.23708048462867737,
      "learning_rate": 6.968892285998564e-06,
      "loss": 0.0176,
      "step": 940020
    },
    {
      "epoch": 1.5383960775842318,
      "grad_norm": 0.7086230516433716,
      "learning_rate": 6.968826393785047e-06,
      "loss": 0.0237,
      "step": 940040
    },
    {
      "epoch": 1.5384288080228852,
      "grad_norm": 0.3076375424861908,
      "learning_rate": 6.96876050157153e-06,
      "loss": 0.014,
      "step": 940060
    },
    {
      "epoch": 1.5384615384615383,
      "grad_norm": 0.5452402234077454,
      "learning_rate": 6.968694609358013e-06,
      "loss": 0.0217,
      "step": 940080
    },
    {
      "epoch": 1.538494268900192,
      "grad_norm": 0.42845866084098816,
      "learning_rate": 6.968628717144496e-06,
      "loss": 0.0244,
      "step": 940100
    },
    {
      "epoch": 1.538526999338845,
      "grad_norm": 1.5975111722946167,
      "learning_rate": 6.968562824930979e-06,
      "loss": 0.0219,
      "step": 940120
    },
    {
      "epoch": 1.5385597297774984,
      "grad_norm": 0.6354208588600159,
      "learning_rate": 6.968496932717461e-06,
      "loss": 0.0186,
      "step": 940140
    },
    {
      "epoch": 1.5385924602161518,
      "grad_norm": 0.1929299235343933,
      "learning_rate": 6.968431040503945e-06,
      "loss": 0.0224,
      "step": 940160
    },
    {
      "epoch": 1.5386251906548052,
      "grad_norm": 0.2829706072807312,
      "learning_rate": 6.968365148290427e-06,
      "loss": 0.0243,
      "step": 940180
    },
    {
      "epoch": 1.5386579210934586,
      "grad_norm": 0.9158600568771362,
      "learning_rate": 6.96829925607691e-06,
      "loss": 0.0162,
      "step": 940200
    },
    {
      "epoch": 1.5386906515321117,
      "grad_norm": 1.182459831237793,
      "learning_rate": 6.968233363863392e-06,
      "loss": 0.0185,
      "step": 940220
    },
    {
      "epoch": 1.5387233819707653,
      "grad_norm": 0.23947669565677643,
      "learning_rate": 6.968167471649876e-06,
      "loss": 0.0146,
      "step": 940240
    },
    {
      "epoch": 1.5387561124094185,
      "grad_norm": 0.37520840764045715,
      "learning_rate": 6.9681015794363585e-06,
      "loss": 0.0185,
      "step": 940260
    },
    {
      "epoch": 1.5387888428480718,
      "grad_norm": 0.7005710601806641,
      "learning_rate": 6.968035687222841e-06,
      "loss": 0.0253,
      "step": 940280
    },
    {
      "epoch": 1.5388215732867252,
      "grad_norm": 2.0202908515930176,
      "learning_rate": 6.967969795009324e-06,
      "loss": 0.0189,
      "step": 940300
    },
    {
      "epoch": 1.5388543037253786,
      "grad_norm": 0.39591050148010254,
      "learning_rate": 6.9679039027958076e-06,
      "loss": 0.0125,
      "step": 940320
    },
    {
      "epoch": 1.538887034164032,
      "grad_norm": 0.3962714672088623,
      "learning_rate": 6.9678380105822895e-06,
      "loss": 0.0189,
      "step": 940340
    },
    {
      "epoch": 1.538919764602685,
      "grad_norm": 0.8147262930870056,
      "learning_rate": 6.967772118368773e-06,
      "loss": 0.0226,
      "step": 940360
    },
    {
      "epoch": 1.5389524950413387,
      "grad_norm": 0.7797958850860596,
      "learning_rate": 6.967706226155255e-06,
      "loss": 0.0299,
      "step": 940380
    },
    {
      "epoch": 1.5389852254799918,
      "grad_norm": 0.3731798529624939,
      "learning_rate": 6.9676403339417385e-06,
      "loss": 0.0152,
      "step": 940400
    },
    {
      "epoch": 1.5390179559186452,
      "grad_norm": 0.09359896183013916,
      "learning_rate": 6.967574441728222e-06,
      "loss": 0.021,
      "step": 940420
    },
    {
      "epoch": 1.5390506863572986,
      "grad_norm": 0.7184454798698425,
      "learning_rate": 6.967508549514704e-06,
      "loss": 0.0239,
      "step": 940440
    },
    {
      "epoch": 1.539083416795952,
      "grad_norm": 0.1353188008069992,
      "learning_rate": 6.967442657301188e-06,
      "loss": 0.0201,
      "step": 940460
    },
    {
      "epoch": 1.5391161472346053,
      "grad_norm": 0.49953460693359375,
      "learning_rate": 6.96737676508767e-06,
      "loss": 0.0233,
      "step": 940480
    },
    {
      "epoch": 1.5391488776732585,
      "grad_norm": 0.8091740608215332,
      "learning_rate": 6.967310872874153e-06,
      "loss": 0.035,
      "step": 940500
    },
    {
      "epoch": 1.539181608111912,
      "grad_norm": 0.1144341453909874,
      "learning_rate": 6.967244980660636e-06,
      "loss": 0.017,
      "step": 940520
    },
    {
      "epoch": 1.5392143385505652,
      "grad_norm": 0.1193971186876297,
      "learning_rate": 6.967179088447119e-06,
      "loss": 0.0146,
      "step": 940540
    },
    {
      "epoch": 1.5392470689892186,
      "grad_norm": 0.43896743655204773,
      "learning_rate": 6.967113196233601e-06,
      "loss": 0.0143,
      "step": 940560
    },
    {
      "epoch": 1.539279799427872,
      "grad_norm": 0.16084794700145721,
      "learning_rate": 6.967047304020085e-06,
      "loss": 0.0197,
      "step": 940580
    },
    {
      "epoch": 1.5393125298665251,
      "grad_norm": 0.7865861654281616,
      "learning_rate": 6.966981411806567e-06,
      "loss": 0.0236,
      "step": 940600
    },
    {
      "epoch": 1.5393452603051787,
      "grad_norm": 0.24005703628063202,
      "learning_rate": 6.96691551959305e-06,
      "loss": 0.0122,
      "step": 940620
    },
    {
      "epoch": 1.5393779907438319,
      "grad_norm": 0.8291946053504944,
      "learning_rate": 6.966849627379533e-06,
      "loss": 0.0159,
      "step": 940640
    },
    {
      "epoch": 1.5394107211824855,
      "grad_norm": 0.25300025939941406,
      "learning_rate": 6.966783735166016e-06,
      "loss": 0.0176,
      "step": 940660
    },
    {
      "epoch": 1.5394434516211386,
      "grad_norm": 0.2558937668800354,
      "learning_rate": 6.9667178429524986e-06,
      "loss": 0.0158,
      "step": 940680
    },
    {
      "epoch": 1.539476182059792,
      "grad_norm": 0.47998854517936707,
      "learning_rate": 6.966651950738982e-06,
      "loss": 0.0191,
      "step": 940700
    },
    {
      "epoch": 1.5395089124984453,
      "grad_norm": 0.8613656163215637,
      "learning_rate": 6.966586058525464e-06,
      "loss": 0.0212,
      "step": 940720
    },
    {
      "epoch": 1.5395416429370985,
      "grad_norm": 0.2940526008605957,
      "learning_rate": 6.966520166311948e-06,
      "loss": 0.0198,
      "step": 940740
    },
    {
      "epoch": 1.539574373375752,
      "grad_norm": 0.34715360403060913,
      "learning_rate": 6.966454274098431e-06,
      "loss": 0.0189,
      "step": 940760
    },
    {
      "epoch": 1.5396071038144052,
      "grad_norm": 1.3800793886184692,
      "learning_rate": 6.966388381884913e-06,
      "loss": 0.0207,
      "step": 940780
    },
    {
      "epoch": 1.5396398342530586,
      "grad_norm": 0.8389132618904114,
      "learning_rate": 6.966322489671397e-06,
      "loss": 0.0156,
      "step": 940800
    },
    {
      "epoch": 1.539672564691712,
      "grad_norm": 0.9199216365814209,
      "learning_rate": 6.966256597457879e-06,
      "loss": 0.0198,
      "step": 940820
    },
    {
      "epoch": 1.5397052951303654,
      "grad_norm": 3.7043323516845703,
      "learning_rate": 6.966190705244362e-06,
      "loss": 0.0287,
      "step": 940840
    },
    {
      "epoch": 1.5397380255690187,
      "grad_norm": 0.4004877805709839,
      "learning_rate": 6.966124813030845e-06,
      "loss": 0.0202,
      "step": 940860
    },
    {
      "epoch": 1.5397707560076719,
      "grad_norm": 0.5621260404586792,
      "learning_rate": 6.966058920817328e-06,
      "loss": 0.014,
      "step": 940880
    },
    {
      "epoch": 1.5398034864463255,
      "grad_norm": 0.10717753320932388,
      "learning_rate": 6.96599302860381e-06,
      "loss": 0.0267,
      "step": 940900
    },
    {
      "epoch": 1.5398362168849786,
      "grad_norm": 0.8852091431617737,
      "learning_rate": 6.965927136390294e-06,
      "loss": 0.0168,
      "step": 940920
    },
    {
      "epoch": 1.539868947323632,
      "grad_norm": 0.5970515608787537,
      "learning_rate": 6.965861244176776e-06,
      "loss": 0.0235,
      "step": 940940
    },
    {
      "epoch": 1.5399016777622854,
      "grad_norm": 0.6377017498016357,
      "learning_rate": 6.9657953519632595e-06,
      "loss": 0.0173,
      "step": 940960
    },
    {
      "epoch": 1.5399344082009387,
      "grad_norm": 0.568453311920166,
      "learning_rate": 6.965729459749741e-06,
      "loss": 0.0267,
      "step": 940980
    },
    {
      "epoch": 1.5399671386395921,
      "grad_norm": 0.6353534460067749,
      "learning_rate": 6.965663567536225e-06,
      "loss": 0.0321,
      "step": 941000
    },
    {
      "epoch": 1.5399998690782453,
      "grad_norm": 3.957618236541748,
      "learning_rate": 6.965597675322707e-06,
      "loss": 0.0196,
      "step": 941020
    },
    {
      "epoch": 1.5400325995168989,
      "grad_norm": 0.5639002919197083,
      "learning_rate": 6.9655317831091904e-06,
      "loss": 0.0286,
      "step": 941040
    },
    {
      "epoch": 1.540065329955552,
      "grad_norm": 0.801121711730957,
      "learning_rate": 6.965465890895673e-06,
      "loss": 0.0223,
      "step": 941060
    },
    {
      "epoch": 1.5400980603942054,
      "grad_norm": 0.7338161468505859,
      "learning_rate": 6.965399998682156e-06,
      "loss": 0.0208,
      "step": 941080
    },
    {
      "epoch": 1.5401307908328588,
      "grad_norm": 3.381883144378662,
      "learning_rate": 6.9653341064686395e-06,
      "loss": 0.0176,
      "step": 941100
    },
    {
      "epoch": 1.5401635212715121,
      "grad_norm": 0.05324878543615341,
      "learning_rate": 6.965268214255122e-06,
      "loss": 0.0248,
      "step": 941120
    },
    {
      "epoch": 1.5401962517101655,
      "grad_norm": 0.17714399099349976,
      "learning_rate": 6.965202322041606e-06,
      "loss": 0.0247,
      "step": 941140
    },
    {
      "epoch": 1.5402289821488186,
      "grad_norm": 0.29204845428466797,
      "learning_rate": 6.965136429828088e-06,
      "loss": 0.014,
      "step": 941160
    },
    {
      "epoch": 1.5402617125874722,
      "grad_norm": 0.6885288953781128,
      "learning_rate": 6.965070537614571e-06,
      "loss": 0.0274,
      "step": 941180
    },
    {
      "epoch": 1.5402944430261254,
      "grad_norm": 0.594740629196167,
      "learning_rate": 6.965004645401053e-06,
      "loss": 0.0119,
      "step": 941200
    },
    {
      "epoch": 1.5403271734647788,
      "grad_norm": 0.5199074149131775,
      "learning_rate": 6.964938753187537e-06,
      "loss": 0.0204,
      "step": 941220
    },
    {
      "epoch": 1.5403599039034321,
      "grad_norm": 3.1794443130493164,
      "learning_rate": 6.964872860974019e-06,
      "loss": 0.0167,
      "step": 941240
    },
    {
      "epoch": 1.5403926343420853,
      "grad_norm": 0.39388173818588257,
      "learning_rate": 6.964806968760502e-06,
      "loss": 0.026,
      "step": 941260
    },
    {
      "epoch": 1.5404253647807389,
      "grad_norm": 0.736375629901886,
      "learning_rate": 6.964741076546985e-06,
      "loss": 0.0225,
      "step": 941280
    },
    {
      "epoch": 1.540458095219392,
      "grad_norm": 0.11993856728076935,
      "learning_rate": 6.964675184333468e-06,
      "loss": 0.0156,
      "step": 941300
    },
    {
      "epoch": 1.5404908256580456,
      "grad_norm": 0.17224375903606415,
      "learning_rate": 6.9646092921199505e-06,
      "loss": 0.0117,
      "step": 941320
    },
    {
      "epoch": 1.5405235560966988,
      "grad_norm": 0.37293073534965515,
      "learning_rate": 6.964543399906434e-06,
      "loss": 0.0175,
      "step": 941340
    },
    {
      "epoch": 1.5405562865353521,
      "grad_norm": 1.5914732217788696,
      "learning_rate": 6.964477507692916e-06,
      "loss": 0.0212,
      "step": 941360
    },
    {
      "epoch": 1.5405890169740055,
      "grad_norm": 0.5458531975746155,
      "learning_rate": 6.9644116154793995e-06,
      "loss": 0.0181,
      "step": 941380
    },
    {
      "epoch": 1.5406217474126587,
      "grad_norm": 1.977339267730713,
      "learning_rate": 6.9643457232658814e-06,
      "loss": 0.0185,
      "step": 941400
    },
    {
      "epoch": 1.5406544778513123,
      "grad_norm": 0.6764367818832397,
      "learning_rate": 6.964279831052365e-06,
      "loss": 0.0197,
      "step": 941420
    },
    {
      "epoch": 1.5406872082899654,
      "grad_norm": 0.6292606592178345,
      "learning_rate": 6.964213938838848e-06,
      "loss": 0.0175,
      "step": 941440
    },
    {
      "epoch": 1.5407199387286188,
      "grad_norm": 1.9253286123275757,
      "learning_rate": 6.9641480466253305e-06,
      "loss": 0.0217,
      "step": 941460
    },
    {
      "epoch": 1.5407526691672722,
      "grad_norm": 0.5679743885993958,
      "learning_rate": 6.964082154411814e-06,
      "loss": 0.018,
      "step": 941480
    },
    {
      "epoch": 1.5407853996059255,
      "grad_norm": 0.8584551215171814,
      "learning_rate": 6.964016262198297e-06,
      "loss": 0.0203,
      "step": 941500
    },
    {
      "epoch": 1.540818130044579,
      "grad_norm": 1.2918481826782227,
      "learning_rate": 6.9639503699847796e-06,
      "loss": 0.0233,
      "step": 941520
    },
    {
      "epoch": 1.540850860483232,
      "grad_norm": 0.4162044823169708,
      "learning_rate": 6.963884477771262e-06,
      "loss": 0.016,
      "step": 941540
    },
    {
      "epoch": 1.5408835909218856,
      "grad_norm": 1.7575939893722534,
      "learning_rate": 6.963818585557746e-06,
      "loss": 0.021,
      "step": 941560
    },
    {
      "epoch": 1.5409163213605388,
      "grad_norm": 0.5064393281936646,
      "learning_rate": 6.963752693344228e-06,
      "loss": 0.0165,
      "step": 941580
    },
    {
      "epoch": 1.5409490517991922,
      "grad_norm": 0.7798500657081604,
      "learning_rate": 6.963686801130711e-06,
      "loss": 0.0206,
      "step": 941600
    },
    {
      "epoch": 1.5409817822378455,
      "grad_norm": 0.27927616238594055,
      "learning_rate": 6.963620908917193e-06,
      "loss": 0.0311,
      "step": 941620
    },
    {
      "epoch": 1.541014512676499,
      "grad_norm": 0.6814810037612915,
      "learning_rate": 6.963555016703677e-06,
      "loss": 0.0206,
      "step": 941640
    },
    {
      "epoch": 1.5410472431151523,
      "grad_norm": 0.07561730593442917,
      "learning_rate": 6.96348912449016e-06,
      "loss": 0.0202,
      "step": 941660
    },
    {
      "epoch": 1.5410799735538054,
      "grad_norm": 0.33595821261405945,
      "learning_rate": 6.963423232276642e-06,
      "loss": 0.0301,
      "step": 941680
    },
    {
      "epoch": 1.541112703992459,
      "grad_norm": 0.32246944308280945,
      "learning_rate": 6.963357340063125e-06,
      "loss": 0.0153,
      "step": 941700
    },
    {
      "epoch": 1.5411454344311122,
      "grad_norm": 0.5119762420654297,
      "learning_rate": 6.963291447849609e-06,
      "loss": 0.0214,
      "step": 941720
    },
    {
      "epoch": 1.5411781648697656,
      "grad_norm": 0.6348947286605835,
      "learning_rate": 6.9632255556360906e-06,
      "loss": 0.0171,
      "step": 941740
    },
    {
      "epoch": 1.541210895308419,
      "grad_norm": 0.8076347708702087,
      "learning_rate": 6.963159663422574e-06,
      "loss": 0.0173,
      "step": 941760
    },
    {
      "epoch": 1.5412436257470723,
      "grad_norm": 0.25784868001937866,
      "learning_rate": 6.963093771209056e-06,
      "loss": 0.0239,
      "step": 941780
    },
    {
      "epoch": 1.5412763561857257,
      "grad_norm": 0.3290799558162689,
      "learning_rate": 6.96302787899554e-06,
      "loss": 0.0205,
      "step": 941800
    },
    {
      "epoch": 1.5413090866243788,
      "grad_norm": 0.20519870519638062,
      "learning_rate": 6.962961986782023e-06,
      "loss": 0.0292,
      "step": 941820
    },
    {
      "epoch": 1.5413418170630324,
      "grad_norm": 0.24713262915611267,
      "learning_rate": 6.962896094568505e-06,
      "loss": 0.032,
      "step": 941840
    },
    {
      "epoch": 1.5413745475016856,
      "grad_norm": 0.5613117814064026,
      "learning_rate": 6.962830202354989e-06,
      "loss": 0.0232,
      "step": 941860
    },
    {
      "epoch": 1.541407277940339,
      "grad_norm": 0.5144659280776978,
      "learning_rate": 6.9627643101414714e-06,
      "loss": 0.024,
      "step": 941880
    },
    {
      "epoch": 1.5414400083789923,
      "grad_norm": 0.608140766620636,
      "learning_rate": 6.962698417927954e-06,
      "loss": 0.024,
      "step": 941900
    },
    {
      "epoch": 1.5414727388176457,
      "grad_norm": 0.4138853847980499,
      "learning_rate": 6.962632525714437e-06,
      "loss": 0.0155,
      "step": 941920
    },
    {
      "epoch": 1.541505469256299,
      "grad_norm": 0.8427881002426147,
      "learning_rate": 6.9625666335009205e-06,
      "loss": 0.0408,
      "step": 941940
    },
    {
      "epoch": 1.5415381996949522,
      "grad_norm": 0.493849515914917,
      "learning_rate": 6.962500741287402e-06,
      "loss": 0.0163,
      "step": 941960
    },
    {
      "epoch": 1.5415709301336058,
      "grad_norm": 0.6911369562149048,
      "learning_rate": 6.962434849073886e-06,
      "loss": 0.0212,
      "step": 941980
    },
    {
      "epoch": 1.541603660572259,
      "grad_norm": 0.266470730304718,
      "learning_rate": 6.962368956860368e-06,
      "loss": 0.0121,
      "step": 942000
    },
    {
      "epoch": 1.5416363910109123,
      "grad_norm": 0.533389151096344,
      "learning_rate": 6.9623030646468515e-06,
      "loss": 0.0186,
      "step": 942020
    },
    {
      "epoch": 1.5416691214495657,
      "grad_norm": 0.17684994637966156,
      "learning_rate": 6.962237172433333e-06,
      "loss": 0.0256,
      "step": 942040
    },
    {
      "epoch": 1.5417018518882188,
      "grad_norm": 0.21016629040241241,
      "learning_rate": 6.962171280219817e-06,
      "loss": 0.0262,
      "step": 942060
    },
    {
      "epoch": 1.5417345823268724,
      "grad_norm": 1.0830351114273071,
      "learning_rate": 6.9621053880063e-06,
      "loss": 0.022,
      "step": 942080
    },
    {
      "epoch": 1.5417673127655256,
      "grad_norm": 0.19917826354503632,
      "learning_rate": 6.962039495792782e-06,
      "loss": 0.016,
      "step": 942100
    },
    {
      "epoch": 1.5418000432041792,
      "grad_norm": 0.9022798538208008,
      "learning_rate": 6.961973603579265e-06,
      "loss": 0.0216,
      "step": 942120
    },
    {
      "epoch": 1.5418327736428323,
      "grad_norm": 0.690266489982605,
      "learning_rate": 6.961907711365749e-06,
      "loss": 0.0144,
      "step": 942140
    },
    {
      "epoch": 1.5418655040814857,
      "grad_norm": 1.1308603286743164,
      "learning_rate": 6.9618418191522315e-06,
      "loss": 0.024,
      "step": 942160
    },
    {
      "epoch": 1.541898234520139,
      "grad_norm": 0.36839571595191956,
      "learning_rate": 6.961775926938714e-06,
      "loss": 0.0227,
      "step": 942180
    },
    {
      "epoch": 1.5419309649587922,
      "grad_norm": 0.1857004165649414,
      "learning_rate": 6.961710034725198e-06,
      "loss": 0.0185,
      "step": 942200
    },
    {
      "epoch": 1.5419636953974458,
      "grad_norm": 0.5292717814445496,
      "learning_rate": 6.96164414251168e-06,
      "loss": 0.0241,
      "step": 942220
    },
    {
      "epoch": 1.541996425836099,
      "grad_norm": 1.1393226385116577,
      "learning_rate": 6.961578250298163e-06,
      "loss": 0.0132,
      "step": 942240
    },
    {
      "epoch": 1.5420291562747523,
      "grad_norm": 0.723831057548523,
      "learning_rate": 6.961512358084645e-06,
      "loss": 0.0251,
      "step": 942260
    },
    {
      "epoch": 1.5420618867134057,
      "grad_norm": 0.29603713750839233,
      "learning_rate": 6.961446465871129e-06,
      "loss": 0.0187,
      "step": 942280
    },
    {
      "epoch": 1.542094617152059,
      "grad_norm": 0.37943556904792786,
      "learning_rate": 6.9613805736576115e-06,
      "loss": 0.0222,
      "step": 942300
    },
    {
      "epoch": 1.5421273475907125,
      "grad_norm": 0.296999454498291,
      "learning_rate": 6.961314681444094e-06,
      "loss": 0.0149,
      "step": 942320
    },
    {
      "epoch": 1.5421600780293656,
      "grad_norm": 0.573296308517456,
      "learning_rate": 6.961248789230577e-06,
      "loss": 0.0263,
      "step": 942340
    },
    {
      "epoch": 1.5421928084680192,
      "grad_norm": 0.7779824733734131,
      "learning_rate": 6.9611828970170606e-06,
      "loss": 0.0225,
      "step": 942360
    },
    {
      "epoch": 1.5422255389066724,
      "grad_norm": 1.1207128763198853,
      "learning_rate": 6.9611170048035425e-06,
      "loss": 0.0224,
      "step": 942380
    },
    {
      "epoch": 1.5422582693453257,
      "grad_norm": 0.4881884455680847,
      "learning_rate": 6.961051112590026e-06,
      "loss": 0.0227,
      "step": 942400
    },
    {
      "epoch": 1.542290999783979,
      "grad_norm": 0.8746522068977356,
      "learning_rate": 6.960985220376508e-06,
      "loss": 0.0183,
      "step": 942420
    },
    {
      "epoch": 1.5423237302226325,
      "grad_norm": 0.20119233429431915,
      "learning_rate": 6.9609193281629915e-06,
      "loss": 0.0192,
      "step": 942440
    },
    {
      "epoch": 1.5423564606612858,
      "grad_norm": 0.24002081155776978,
      "learning_rate": 6.960853435949474e-06,
      "loss": 0.022,
      "step": 942460
    },
    {
      "epoch": 1.542389191099939,
      "grad_norm": 0.3537823557853699,
      "learning_rate": 6.960787543735957e-06,
      "loss": 0.0225,
      "step": 942480
    },
    {
      "epoch": 1.5424219215385926,
      "grad_norm": 2.2639973163604736,
      "learning_rate": 6.96072165152244e-06,
      "loss": 0.0267,
      "step": 942500
    },
    {
      "epoch": 1.5424546519772457,
      "grad_norm": 0.5465196967124939,
      "learning_rate": 6.960655759308923e-06,
      "loss": 0.0221,
      "step": 942520
    },
    {
      "epoch": 1.542487382415899,
      "grad_norm": 0.5540567636489868,
      "learning_rate": 6.960589867095406e-06,
      "loss": 0.0209,
      "step": 942540
    },
    {
      "epoch": 1.5425201128545525,
      "grad_norm": 1.061866283416748,
      "learning_rate": 6.960523974881889e-06,
      "loss": 0.0281,
      "step": 942560
    },
    {
      "epoch": 1.5425528432932059,
      "grad_norm": 0.919304370880127,
      "learning_rate": 6.960458082668372e-06,
      "loss": 0.0211,
      "step": 942580
    },
    {
      "epoch": 1.5425855737318592,
      "grad_norm": 0.2480791062116623,
      "learning_rate": 6.960392190454854e-06,
      "loss": 0.0199,
      "step": 942600
    },
    {
      "epoch": 1.5426183041705124,
      "grad_norm": 0.2710915207862854,
      "learning_rate": 6.960326298241338e-06,
      "loss": 0.0215,
      "step": 942620
    },
    {
      "epoch": 1.542651034609166,
      "grad_norm": 0.20915846526622772,
      "learning_rate": 6.96026040602782e-06,
      "loss": 0.0268,
      "step": 942640
    },
    {
      "epoch": 1.5426837650478191,
      "grad_norm": 0.3918101191520691,
      "learning_rate": 6.960194513814303e-06,
      "loss": 0.0138,
      "step": 942660
    },
    {
      "epoch": 1.5427164954864725,
      "grad_norm": 1.0364301204681396,
      "learning_rate": 6.960128621600786e-06,
      "loss": 0.018,
      "step": 942680
    },
    {
      "epoch": 1.5427492259251259,
      "grad_norm": 0.4053860008716583,
      "learning_rate": 6.960062729387269e-06,
      "loss": 0.027,
      "step": 942700
    },
    {
      "epoch": 1.5427819563637792,
      "grad_norm": 0.37633371353149414,
      "learning_rate": 6.959996837173752e-06,
      "loss": 0.0158,
      "step": 942720
    },
    {
      "epoch": 1.5428146868024326,
      "grad_norm": 0.3125525414943695,
      "learning_rate": 6.959930944960235e-06,
      "loss": 0.0215,
      "step": 942740
    },
    {
      "epoch": 1.5428474172410858,
      "grad_norm": 0.3804115056991577,
      "learning_rate": 6.959865052746717e-06,
      "loss": 0.0206,
      "step": 942760
    },
    {
      "epoch": 1.5428801476797394,
      "grad_norm": 0.2273479551076889,
      "learning_rate": 6.959799160533201e-06,
      "loss": 0.0175,
      "step": 942780
    },
    {
      "epoch": 1.5429128781183925,
      "grad_norm": 0.3724467158317566,
      "learning_rate": 6.9597332683196825e-06,
      "loss": 0.0266,
      "step": 942800
    },
    {
      "epoch": 1.5429456085570459,
      "grad_norm": 0.2999943196773529,
      "learning_rate": 6.959667376106166e-06,
      "loss": 0.0194,
      "step": 942820
    },
    {
      "epoch": 1.5429783389956993,
      "grad_norm": 0.4053609371185303,
      "learning_rate": 6.959601483892649e-06,
      "loss": 0.021,
      "step": 942840
    },
    {
      "epoch": 1.5430110694343524,
      "grad_norm": 0.39796850085258484,
      "learning_rate": 6.959535591679132e-06,
      "loss": 0.0273,
      "step": 942860
    },
    {
      "epoch": 1.543043799873006,
      "grad_norm": 1.1222689151763916,
      "learning_rate": 6.959469699465615e-06,
      "loss": 0.0189,
      "step": 942880
    },
    {
      "epoch": 1.5430765303116591,
      "grad_norm": 0.15710322558879852,
      "learning_rate": 6.959403807252098e-06,
      "loss": 0.0272,
      "step": 942900
    },
    {
      "epoch": 1.5431092607503127,
      "grad_norm": 0.4746769964694977,
      "learning_rate": 6.959337915038581e-06,
      "loss": 0.0223,
      "step": 942920
    },
    {
      "epoch": 1.543141991188966,
      "grad_norm": 0.5667378902435303,
      "learning_rate": 6.959272022825063e-06,
      "loss": 0.024,
      "step": 942940
    },
    {
      "epoch": 1.5431747216276193,
      "grad_norm": 1.491497278213501,
      "learning_rate": 6.959206130611547e-06,
      "loss": 0.0304,
      "step": 942960
    },
    {
      "epoch": 1.5432074520662726,
      "grad_norm": 0.6359789967536926,
      "learning_rate": 6.959140238398029e-06,
      "loss": 0.0304,
      "step": 942980
    },
    {
      "epoch": 1.5432401825049258,
      "grad_norm": 3.86173677444458,
      "learning_rate": 6.9590743461845125e-06,
      "loss": 0.0233,
      "step": 943000
    },
    {
      "epoch": 1.5432729129435794,
      "grad_norm": 0.39284220337867737,
      "learning_rate": 6.959008453970994e-06,
      "loss": 0.0146,
      "step": 943020
    },
    {
      "epoch": 1.5433056433822325,
      "grad_norm": 0.33381351828575134,
      "learning_rate": 6.958942561757478e-06,
      "loss": 0.0233,
      "step": 943040
    },
    {
      "epoch": 1.543338373820886,
      "grad_norm": 0.8835104703903198,
      "learning_rate": 6.95887666954396e-06,
      "loss": 0.0164,
      "step": 943060
    },
    {
      "epoch": 1.5433711042595393,
      "grad_norm": 0.3062388002872467,
      "learning_rate": 6.9588107773304434e-06,
      "loss": 0.02,
      "step": 943080
    },
    {
      "epoch": 1.5434038346981926,
      "grad_norm": 1.5813547372817993,
      "learning_rate": 6.958744885116926e-06,
      "loss": 0.0165,
      "step": 943100
    },
    {
      "epoch": 1.543436565136846,
      "grad_norm": 1.5087894201278687,
      "learning_rate": 6.958678992903409e-06,
      "loss": 0.0192,
      "step": 943120
    },
    {
      "epoch": 1.5434692955754992,
      "grad_norm": 1.9170982837677002,
      "learning_rate": 6.958613100689892e-06,
      "loss": 0.0226,
      "step": 943140
    },
    {
      "epoch": 1.5435020260141528,
      "grad_norm": 0.5807046294212341,
      "learning_rate": 6.958547208476375e-06,
      "loss": 0.0165,
      "step": 943160
    },
    {
      "epoch": 1.543534756452806,
      "grad_norm": 0.7932655811309814,
      "learning_rate": 6.958481316262857e-06,
      "loss": 0.0236,
      "step": 943180
    },
    {
      "epoch": 1.5435674868914593,
      "grad_norm": 0.558205246925354,
      "learning_rate": 6.958415424049341e-06,
      "loss": 0.0178,
      "step": 943200
    },
    {
      "epoch": 1.5436002173301127,
      "grad_norm": 0.3888842463493347,
      "learning_rate": 6.958349531835824e-06,
      "loss": 0.024,
      "step": 943220
    },
    {
      "epoch": 1.543632947768766,
      "grad_norm": 0.32279831171035767,
      "learning_rate": 6.958283639622306e-06,
      "loss": 0.015,
      "step": 943240
    },
    {
      "epoch": 1.5436656782074194,
      "grad_norm": 0.4709918797016144,
      "learning_rate": 6.95821774740879e-06,
      "loss": 0.0161,
      "step": 943260
    },
    {
      "epoch": 1.5436984086460726,
      "grad_norm": 1.5596587657928467,
      "learning_rate": 6.958151855195272e-06,
      "loss": 0.0275,
      "step": 943280
    },
    {
      "epoch": 1.5437311390847261,
      "grad_norm": 1.395574688911438,
      "learning_rate": 6.958085962981755e-06,
      "loss": 0.0246,
      "step": 943300
    },
    {
      "epoch": 1.5437638695233793,
      "grad_norm": 0.973872184753418,
      "learning_rate": 6.958020070768238e-06,
      "loss": 0.0244,
      "step": 943320
    },
    {
      "epoch": 1.5437965999620327,
      "grad_norm": 0.27972593903541565,
      "learning_rate": 6.957954178554721e-06,
      "loss": 0.0161,
      "step": 943340
    },
    {
      "epoch": 1.543829330400686,
      "grad_norm": 1.14271080493927,
      "learning_rate": 6.9578882863412035e-06,
      "loss": 0.0225,
      "step": 943360
    },
    {
      "epoch": 1.5438620608393394,
      "grad_norm": 0.7993530035018921,
      "learning_rate": 6.957822394127687e-06,
      "loss": 0.015,
      "step": 943380
    },
    {
      "epoch": 1.5438947912779928,
      "grad_norm": 0.4872775971889496,
      "learning_rate": 6.957756501914169e-06,
      "loss": 0.0211,
      "step": 943400
    },
    {
      "epoch": 1.543927521716646,
      "grad_norm": 0.7549174427986145,
      "learning_rate": 6.9576906097006526e-06,
      "loss": 0.0178,
      "step": 943420
    },
    {
      "epoch": 1.5439602521552995,
      "grad_norm": 0.5896652936935425,
      "learning_rate": 6.9576247174871344e-06,
      "loss": 0.0171,
      "step": 943440
    },
    {
      "epoch": 1.5439929825939527,
      "grad_norm": 0.36065673828125,
      "learning_rate": 6.957558825273618e-06,
      "loss": 0.0186,
      "step": 943460
    },
    {
      "epoch": 1.544025713032606,
      "grad_norm": 2.614689588546753,
      "learning_rate": 6.957492933060101e-06,
      "loss": 0.0209,
      "step": 943480
    },
    {
      "epoch": 1.5440584434712594,
      "grad_norm": 0.5398449301719666,
      "learning_rate": 6.9574270408465835e-06,
      "loss": 0.0177,
      "step": 943500
    },
    {
      "epoch": 1.5440911739099128,
      "grad_norm": 0.36846479773521423,
      "learning_rate": 6.957361148633066e-06,
      "loss": 0.0217,
      "step": 943520
    },
    {
      "epoch": 1.5441239043485662,
      "grad_norm": 0.13167710602283478,
      "learning_rate": 6.95729525641955e-06,
      "loss": 0.0135,
      "step": 943540
    },
    {
      "epoch": 1.5441566347872193,
      "grad_norm": 0.8310326933860779,
      "learning_rate": 6.957229364206032e-06,
      "loss": 0.0334,
      "step": 943560
    },
    {
      "epoch": 1.544189365225873,
      "grad_norm": 0.3405710756778717,
      "learning_rate": 6.957163471992515e-06,
      "loss": 0.0137,
      "step": 943580
    },
    {
      "epoch": 1.544222095664526,
      "grad_norm": 0.5813692808151245,
      "learning_rate": 6.957097579778999e-06,
      "loss": 0.019,
      "step": 943600
    },
    {
      "epoch": 1.5442548261031794,
      "grad_norm": 0.5848501324653625,
      "learning_rate": 6.957031687565481e-06,
      "loss": 0.0241,
      "step": 943620
    },
    {
      "epoch": 1.5442875565418328,
      "grad_norm": 0.625473141670227,
      "learning_rate": 6.956965795351964e-06,
      "loss": 0.0193,
      "step": 943640
    },
    {
      "epoch": 1.544320286980486,
      "grad_norm": 0.6207583546638489,
      "learning_rate": 6.956899903138446e-06,
      "loss": 0.0264,
      "step": 943660
    },
    {
      "epoch": 1.5443530174191396,
      "grad_norm": 0.35673803091049194,
      "learning_rate": 6.95683401092493e-06,
      "loss": 0.0149,
      "step": 943680
    },
    {
      "epoch": 1.5443857478577927,
      "grad_norm": 0.4163863956928253,
      "learning_rate": 6.956768118711413e-06,
      "loss": 0.0245,
      "step": 943700
    },
    {
      "epoch": 1.5444184782964463,
      "grad_norm": 1.1027827262878418,
      "learning_rate": 6.956702226497895e-06,
      "loss": 0.0141,
      "step": 943720
    },
    {
      "epoch": 1.5444512087350994,
      "grad_norm": 0.38052019476890564,
      "learning_rate": 6.956636334284378e-06,
      "loss": 0.0158,
      "step": 943740
    },
    {
      "epoch": 1.5444839391737528,
      "grad_norm": 0.7051475644111633,
      "learning_rate": 6.956570442070862e-06,
      "loss": 0.0159,
      "step": 943760
    },
    {
      "epoch": 1.5445166696124062,
      "grad_norm": 0.41263192892074585,
      "learning_rate": 6.9565045498573436e-06,
      "loss": 0.0156,
      "step": 943780
    },
    {
      "epoch": 1.5445494000510593,
      "grad_norm": 1.9208869934082031,
      "learning_rate": 6.956438657643827e-06,
      "loss": 0.0196,
      "step": 943800
    },
    {
      "epoch": 1.544582130489713,
      "grad_norm": 7.9713454246521,
      "learning_rate": 6.956372765430309e-06,
      "loss": 0.0315,
      "step": 943820
    },
    {
      "epoch": 1.544614860928366,
      "grad_norm": 0.7400221824645996,
      "learning_rate": 6.956306873216793e-06,
      "loss": 0.0153,
      "step": 943840
    },
    {
      "epoch": 1.5446475913670195,
      "grad_norm": 1.6140918731689453,
      "learning_rate": 6.9562409810032745e-06,
      "loss": 0.0189,
      "step": 943860
    },
    {
      "epoch": 1.5446803218056728,
      "grad_norm": 0.5119562149047852,
      "learning_rate": 6.956175088789758e-06,
      "loss": 0.0153,
      "step": 943880
    },
    {
      "epoch": 1.5447130522443262,
      "grad_norm": 0.5973584055900574,
      "learning_rate": 6.956109196576241e-06,
      "loss": 0.0269,
      "step": 943900
    },
    {
      "epoch": 1.5447457826829796,
      "grad_norm": 0.5227092504501343,
      "learning_rate": 6.9560433043627244e-06,
      "loss": 0.0223,
      "step": 943920
    },
    {
      "epoch": 1.5447785131216327,
      "grad_norm": 0.8641139268875122,
      "learning_rate": 6.955977412149207e-06,
      "loss": 0.0223,
      "step": 943940
    },
    {
      "epoch": 1.5448112435602863,
      "grad_norm": 0.782628059387207,
      "learning_rate": 6.95591151993569e-06,
      "loss": 0.0253,
      "step": 943960
    },
    {
      "epoch": 1.5448439739989395,
      "grad_norm": 0.34941670298576355,
      "learning_rate": 6.9558456277221735e-06,
      "loss": 0.0181,
      "step": 943980
    },
    {
      "epoch": 1.5448767044375928,
      "grad_norm": 0.191226527094841,
      "learning_rate": 6.955779735508655e-06,
      "loss": 0.0141,
      "step": 944000
    },
    {
      "epoch": 1.5449094348762462,
      "grad_norm": 0.7179209589958191,
      "learning_rate": 6.955713843295139e-06,
      "loss": 0.0172,
      "step": 944020
    },
    {
      "epoch": 1.5449421653148996,
      "grad_norm": 0.7371687293052673,
      "learning_rate": 6.955647951081621e-06,
      "loss": 0.0195,
      "step": 944040
    },
    {
      "epoch": 1.544974895753553,
      "grad_norm": 0.17980258166790009,
      "learning_rate": 6.9555820588681045e-06,
      "loss": 0.0258,
      "step": 944060
    },
    {
      "epoch": 1.545007626192206,
      "grad_norm": 1.5045626163482666,
      "learning_rate": 6.955516166654586e-06,
      "loss": 0.0204,
      "step": 944080
    },
    {
      "epoch": 1.5450403566308597,
      "grad_norm": 0.3758236765861511,
      "learning_rate": 6.95545027444107e-06,
      "loss": 0.0216,
      "step": 944100
    },
    {
      "epoch": 1.5450730870695129,
      "grad_norm": 1.498817801475525,
      "learning_rate": 6.955384382227553e-06,
      "loss": 0.027,
      "step": 944120
    },
    {
      "epoch": 1.5451058175081662,
      "grad_norm": 0.3360970616340637,
      "learning_rate": 6.955318490014035e-06,
      "loss": 0.0157,
      "step": 944140
    },
    {
      "epoch": 1.5451385479468196,
      "grad_norm": 0.11406155675649643,
      "learning_rate": 6.955252597800518e-06,
      "loss": 0.0142,
      "step": 944160
    },
    {
      "epoch": 1.545171278385473,
      "grad_norm": 0.6203511357307434,
      "learning_rate": 6.955186705587002e-06,
      "loss": 0.0313,
      "step": 944180
    },
    {
      "epoch": 1.5452040088241263,
      "grad_norm": 0.36954814195632935,
      "learning_rate": 6.955120813373484e-06,
      "loss": 0.0208,
      "step": 944200
    },
    {
      "epoch": 1.5452367392627795,
      "grad_norm": 0.7751643061637878,
      "learning_rate": 6.955054921159967e-06,
      "loss": 0.0198,
      "step": 944220
    },
    {
      "epoch": 1.545269469701433,
      "grad_norm": 0.9369462728500366,
      "learning_rate": 6.954989028946449e-06,
      "loss": 0.0236,
      "step": 944240
    },
    {
      "epoch": 1.5453022001400862,
      "grad_norm": 0.18837623298168182,
      "learning_rate": 6.954923136732933e-06,
      "loss": 0.024,
      "step": 944260
    },
    {
      "epoch": 1.5453349305787396,
      "grad_norm": 0.1573382467031479,
      "learning_rate": 6.954857244519416e-06,
      "loss": 0.0174,
      "step": 944280
    },
    {
      "epoch": 1.545367661017393,
      "grad_norm": 0.2491285353899002,
      "learning_rate": 6.954791352305898e-06,
      "loss": 0.0194,
      "step": 944300
    },
    {
      "epoch": 1.5454003914560461,
      "grad_norm": 0.4783008098602295,
      "learning_rate": 6.954725460092382e-06,
      "loss": 0.0257,
      "step": 944320
    },
    {
      "epoch": 1.5454331218946997,
      "grad_norm": 0.7589864730834961,
      "learning_rate": 6.9546595678788645e-06,
      "loss": 0.0197,
      "step": 944340
    },
    {
      "epoch": 1.5454658523333529,
      "grad_norm": 0.7235294580459595,
      "learning_rate": 6.954593675665347e-06,
      "loss": 0.0172,
      "step": 944360
    },
    {
      "epoch": 1.5454985827720065,
      "grad_norm": 1.4560924768447876,
      "learning_rate": 6.95452778345183e-06,
      "loss": 0.0199,
      "step": 944380
    },
    {
      "epoch": 1.5455313132106596,
      "grad_norm": 0.19214600324630737,
      "learning_rate": 6.9544618912383136e-06,
      "loss": 0.0136,
      "step": 944400
    },
    {
      "epoch": 1.545564043649313,
      "grad_norm": 0.24732758104801178,
      "learning_rate": 6.9543959990247955e-06,
      "loss": 0.0165,
      "step": 944420
    },
    {
      "epoch": 1.5455967740879664,
      "grad_norm": 1.0072728395462036,
      "learning_rate": 6.954330106811279e-06,
      "loss": 0.0205,
      "step": 944440
    },
    {
      "epoch": 1.5456295045266195,
      "grad_norm": 0.5282397866249084,
      "learning_rate": 6.954264214597761e-06,
      "loss": 0.013,
      "step": 944460
    },
    {
      "epoch": 1.545662234965273,
      "grad_norm": 0.18135972321033478,
      "learning_rate": 6.9541983223842445e-06,
      "loss": 0.0249,
      "step": 944480
    },
    {
      "epoch": 1.5456949654039263,
      "grad_norm": 1.5983333587646484,
      "learning_rate": 6.954132430170727e-06,
      "loss": 0.0286,
      "step": 944500
    },
    {
      "epoch": 1.5457276958425796,
      "grad_norm": 3.5555965900421143,
      "learning_rate": 6.95406653795721e-06,
      "loss": 0.0209,
      "step": 944520
    },
    {
      "epoch": 1.545760426281233,
      "grad_norm": 0.5018669366836548,
      "learning_rate": 6.954000645743693e-06,
      "loss": 0.0119,
      "step": 944540
    },
    {
      "epoch": 1.5457931567198864,
      "grad_norm": 0.643168568611145,
      "learning_rate": 6.953934753530176e-06,
      "loss": 0.0207,
      "step": 944560
    },
    {
      "epoch": 1.5458258871585397,
      "grad_norm": 0.7152246236801147,
      "learning_rate": 6.953868861316658e-06,
      "loss": 0.0229,
      "step": 944580
    },
    {
      "epoch": 1.545858617597193,
      "grad_norm": 0.8928753733634949,
      "learning_rate": 6.953802969103142e-06,
      "loss": 0.0138,
      "step": 944600
    },
    {
      "epoch": 1.5458913480358465,
      "grad_norm": 0.26106390357017517,
      "learning_rate": 6.953737076889625e-06,
      "loss": 0.0254,
      "step": 944620
    },
    {
      "epoch": 1.5459240784744996,
      "grad_norm": 0.3408031761646271,
      "learning_rate": 6.953671184676107e-06,
      "loss": 0.0222,
      "step": 944640
    },
    {
      "epoch": 1.545956808913153,
      "grad_norm": Infinity,
      "learning_rate": 6.953605292462591e-06,
      "loss": 0.0243,
      "step": 944660
    },
    {
      "epoch": 1.5459895393518064,
      "grad_norm": 0.41513362526893616,
      "learning_rate": 6.953539400249073e-06,
      "loss": 0.0245,
      "step": 944680
    },
    {
      "epoch": 1.5460222697904598,
      "grad_norm": 0.26133203506469727,
      "learning_rate": 6.953473508035556e-06,
      "loss": 0.0245,
      "step": 944700
    },
    {
      "epoch": 1.5460550002291131,
      "grad_norm": 0.8371726870536804,
      "learning_rate": 6.953407615822039e-06,
      "loss": 0.0151,
      "step": 944720
    },
    {
      "epoch": 1.5460877306677663,
      "grad_norm": 0.4505043625831604,
      "learning_rate": 6.953341723608522e-06,
      "loss": 0.0223,
      "step": 944740
    },
    {
      "epoch": 1.5461204611064199,
      "grad_norm": 1.178982138633728,
      "learning_rate": 6.953275831395005e-06,
      "loss": 0.0246,
      "step": 944760
    },
    {
      "epoch": 1.546153191545073,
      "grad_norm": 0.6749117970466614,
      "learning_rate": 6.953209939181488e-06,
      "loss": 0.0276,
      "step": 944780
    },
    {
      "epoch": 1.5461859219837264,
      "grad_norm": 0.6694817543029785,
      "learning_rate": 6.95314404696797e-06,
      "loss": 0.0138,
      "step": 944800
    },
    {
      "epoch": 1.5462186524223798,
      "grad_norm": 0.7265160083770752,
      "learning_rate": 6.953078154754454e-06,
      "loss": 0.015,
      "step": 944820
    },
    {
      "epoch": 1.5462513828610331,
      "grad_norm": 0.31222957372665405,
      "learning_rate": 6.9530122625409355e-06,
      "loss": 0.0145,
      "step": 944840
    },
    {
      "epoch": 1.5462841132996865,
      "grad_norm": 0.9462710022926331,
      "learning_rate": 6.952946370327419e-06,
      "loss": 0.0219,
      "step": 944860
    },
    {
      "epoch": 1.5463168437383397,
      "grad_norm": 0.27957749366760254,
      "learning_rate": 6.952880478113901e-06,
      "loss": 0.0153,
      "step": 944880
    },
    {
      "epoch": 1.5463495741769933,
      "grad_norm": 1.0218294858932495,
      "learning_rate": 6.952814585900385e-06,
      "loss": 0.0258,
      "step": 944900
    },
    {
      "epoch": 1.5463823046156464,
      "grad_norm": 1.0947917699813843,
      "learning_rate": 6.952748693686867e-06,
      "loss": 0.033,
      "step": 944920
    },
    {
      "epoch": 1.5464150350542998,
      "grad_norm": 2.229881525039673,
      "learning_rate": 6.95268280147335e-06,
      "loss": 0.0264,
      "step": 944940
    },
    {
      "epoch": 1.5464477654929532,
      "grad_norm": 0.585063636302948,
      "learning_rate": 6.952616909259833e-06,
      "loss": 0.0146,
      "step": 944960
    },
    {
      "epoch": 1.5464804959316065,
      "grad_norm": 0.6235933899879456,
      "learning_rate": 6.952551017046316e-06,
      "loss": 0.0192,
      "step": 944980
    },
    {
      "epoch": 1.54651322637026,
      "grad_norm": 2.0656545162200928,
      "learning_rate": 6.9524851248328e-06,
      "loss": 0.0234,
      "step": 945000
    },
    {
      "epoch": 1.546545956808913,
      "grad_norm": 1.3244359493255615,
      "learning_rate": 6.952419232619282e-06,
      "loss": 0.0219,
      "step": 945020
    },
    {
      "epoch": 1.5465786872475666,
      "grad_norm": 0.7745022177696228,
      "learning_rate": 6.9523533404057655e-06,
      "loss": 0.0174,
      "step": 945040
    },
    {
      "epoch": 1.5466114176862198,
      "grad_norm": 0.4831588566303253,
      "learning_rate": 6.952287448192247e-06,
      "loss": 0.0202,
      "step": 945060
    },
    {
      "epoch": 1.5466441481248732,
      "grad_norm": 0.23704198002815247,
      "learning_rate": 6.952221555978731e-06,
      "loss": 0.0177,
      "step": 945080
    },
    {
      "epoch": 1.5466768785635265,
      "grad_norm": 0.15883974730968475,
      "learning_rate": 6.952155663765213e-06,
      "loss": 0.0146,
      "step": 945100
    },
    {
      "epoch": 1.5467096090021797,
      "grad_norm": 0.28237566351890564,
      "learning_rate": 6.9520897715516964e-06,
      "loss": 0.025,
      "step": 945120
    },
    {
      "epoch": 1.5467423394408333,
      "grad_norm": 0.3072871267795563,
      "learning_rate": 6.952023879338179e-06,
      "loss": 0.0252,
      "step": 945140
    },
    {
      "epoch": 1.5467750698794864,
      "grad_norm": 0.6833481192588806,
      "learning_rate": 6.951957987124662e-06,
      "loss": 0.0215,
      "step": 945160
    },
    {
      "epoch": 1.54680780031814,
      "grad_norm": 0.24720895290374756,
      "learning_rate": 6.951892094911145e-06,
      "loss": 0.0181,
      "step": 945180
    },
    {
      "epoch": 1.5468405307567932,
      "grad_norm": 0.416429728269577,
      "learning_rate": 6.951826202697628e-06,
      "loss": 0.0187,
      "step": 945200
    },
    {
      "epoch": 1.5468732611954465,
      "grad_norm": 1.680574655532837,
      "learning_rate": 6.95176031048411e-06,
      "loss": 0.0242,
      "step": 945220
    },
    {
      "epoch": 1.5469059916341,
      "grad_norm": 0.515813410282135,
      "learning_rate": 6.951694418270594e-06,
      "loss": 0.0175,
      "step": 945240
    },
    {
      "epoch": 1.546938722072753,
      "grad_norm": 0.08692459762096405,
      "learning_rate": 6.951628526057076e-06,
      "loss": 0.0205,
      "step": 945260
    },
    {
      "epoch": 1.5469714525114067,
      "grad_norm": 0.6671863794326782,
      "learning_rate": 6.951562633843559e-06,
      "loss": 0.02,
      "step": 945280
    },
    {
      "epoch": 1.5470041829500598,
      "grad_norm": 1.27456533908844,
      "learning_rate": 6.951496741630042e-06,
      "loss": 0.0157,
      "step": 945300
    },
    {
      "epoch": 1.5470369133887132,
      "grad_norm": 1.1488420963287354,
      "learning_rate": 6.951430849416525e-06,
      "loss": 0.0195,
      "step": 945320
    },
    {
      "epoch": 1.5470696438273666,
      "grad_norm": 0.37023666501045227,
      "learning_rate": 6.951364957203008e-06,
      "loss": 0.0216,
      "step": 945340
    },
    {
      "epoch": 1.54710237426602,
      "grad_norm": 0.19543756544589996,
      "learning_rate": 6.951299064989491e-06,
      "loss": 0.0164,
      "step": 945360
    },
    {
      "epoch": 1.5471351047046733,
      "grad_norm": 0.9090865254402161,
      "learning_rate": 6.951233172775974e-06,
      "loss": 0.0197,
      "step": 945380
    },
    {
      "epoch": 1.5471678351433265,
      "grad_norm": 0.37584805488586426,
      "learning_rate": 6.9511672805624565e-06,
      "loss": 0.0266,
      "step": 945400
    },
    {
      "epoch": 1.54720056558198,
      "grad_norm": 0.736128568649292,
      "learning_rate": 6.95110138834894e-06,
      "loss": 0.0151,
      "step": 945420
    },
    {
      "epoch": 1.5472332960206332,
      "grad_norm": 0.5096579790115356,
      "learning_rate": 6.951035496135422e-06,
      "loss": 0.0165,
      "step": 945440
    },
    {
      "epoch": 1.5472660264592866,
      "grad_norm": 1.1955695152282715,
      "learning_rate": 6.9509696039219056e-06,
      "loss": 0.0205,
      "step": 945460
    },
    {
      "epoch": 1.54729875689794,
      "grad_norm": 0.3164476156234741,
      "learning_rate": 6.9509037117083875e-06,
      "loss": 0.019,
      "step": 945480
    },
    {
      "epoch": 1.5473314873365933,
      "grad_norm": 0.5131344199180603,
      "learning_rate": 6.950837819494871e-06,
      "loss": 0.0258,
      "step": 945500
    },
    {
      "epoch": 1.5473642177752467,
      "grad_norm": 0.7652382850646973,
      "learning_rate": 6.950771927281354e-06,
      "loss": 0.0361,
      "step": 945520
    },
    {
      "epoch": 1.5473969482138998,
      "grad_norm": 0.49400243163108826,
      "learning_rate": 6.9507060350678365e-06,
      "loss": 0.0192,
      "step": 945540
    },
    {
      "epoch": 1.5474296786525534,
      "grad_norm": 0.7522111535072327,
      "learning_rate": 6.950640142854319e-06,
      "loss": 0.0243,
      "step": 945560
    },
    {
      "epoch": 1.5474624090912066,
      "grad_norm": 1.9870052337646484,
      "learning_rate": 6.950574250640803e-06,
      "loss": 0.0214,
      "step": 945580
    },
    {
      "epoch": 1.54749513952986,
      "grad_norm": 2.2541229724884033,
      "learning_rate": 6.950508358427285e-06,
      "loss": 0.0174,
      "step": 945600
    },
    {
      "epoch": 1.5475278699685133,
      "grad_norm": 0.545089840888977,
      "learning_rate": 6.950442466213768e-06,
      "loss": 0.016,
      "step": 945620
    },
    {
      "epoch": 1.5475606004071667,
      "grad_norm": 1.4902392625808716,
      "learning_rate": 6.95037657400025e-06,
      "loss": 0.0222,
      "step": 945640
    },
    {
      "epoch": 1.54759333084582,
      "grad_norm": 0.6229947805404663,
      "learning_rate": 6.950310681786734e-06,
      "loss": 0.0221,
      "step": 945660
    },
    {
      "epoch": 1.5476260612844732,
      "grad_norm": 0.8077480792999268,
      "learning_rate": 6.950244789573217e-06,
      "loss": 0.0154,
      "step": 945680
    },
    {
      "epoch": 1.5476587917231268,
      "grad_norm": 0.38472118973731995,
      "learning_rate": 6.950178897359699e-06,
      "loss": 0.0166,
      "step": 945700
    },
    {
      "epoch": 1.54769152216178,
      "grad_norm": 0.31857842206954956,
      "learning_rate": 6.950113005146183e-06,
      "loss": 0.0204,
      "step": 945720
    },
    {
      "epoch": 1.5477242526004333,
      "grad_norm": 0.4713592529296875,
      "learning_rate": 6.950047112932666e-06,
      "loss": 0.0175,
      "step": 945740
    },
    {
      "epoch": 1.5477569830390867,
      "grad_norm": 0.6077019572257996,
      "learning_rate": 6.949981220719148e-06,
      "loss": 0.0201,
      "step": 945760
    },
    {
      "epoch": 1.54778971347774,
      "grad_norm": 0.7760827541351318,
      "learning_rate": 6.949915328505631e-06,
      "loss": 0.0185,
      "step": 945780
    },
    {
      "epoch": 1.5478224439163935,
      "grad_norm": 0.4149206876754761,
      "learning_rate": 6.949849436292115e-06,
      "loss": 0.0183,
      "step": 945800
    },
    {
      "epoch": 1.5478551743550466,
      "grad_norm": 0.8885098099708557,
      "learning_rate": 6.9497835440785966e-06,
      "loss": 0.0246,
      "step": 945820
    },
    {
      "epoch": 1.5478879047937002,
      "grad_norm": 0.5143715143203735,
      "learning_rate": 6.94971765186508e-06,
      "loss": 0.0174,
      "step": 945840
    },
    {
      "epoch": 1.5479206352323533,
      "grad_norm": 0.5487055778503418,
      "learning_rate": 6.949651759651562e-06,
      "loss": 0.0231,
      "step": 945860
    },
    {
      "epoch": 1.5479533656710067,
      "grad_norm": 0.18873193860054016,
      "learning_rate": 6.949585867438046e-06,
      "loss": 0.0259,
      "step": 945880
    },
    {
      "epoch": 1.54798609610966,
      "grad_norm": 0.8309265971183777,
      "learning_rate": 6.9495199752245275e-06,
      "loss": 0.0216,
      "step": 945900
    },
    {
      "epoch": 1.5480188265483132,
      "grad_norm": 0.4923071563243866,
      "learning_rate": 6.949454083011011e-06,
      "loss": 0.0242,
      "step": 945920
    },
    {
      "epoch": 1.5480515569869668,
      "grad_norm": 0.18211974203586578,
      "learning_rate": 6.949388190797494e-06,
      "loss": 0.0179,
      "step": 945940
    },
    {
      "epoch": 1.54808428742562,
      "grad_norm": 0.17432527244091034,
      "learning_rate": 6.949322298583977e-06,
      "loss": 0.0184,
      "step": 945960
    },
    {
      "epoch": 1.5481170178642736,
      "grad_norm": 0.17432886362075806,
      "learning_rate": 6.949256406370459e-06,
      "loss": 0.0163,
      "step": 945980
    },
    {
      "epoch": 1.5481497483029267,
      "grad_norm": 1.053053855895996,
      "learning_rate": 6.949190514156943e-06,
      "loss": 0.0214,
      "step": 946000
    },
    {
      "epoch": 1.54818247874158,
      "grad_norm": 0.4647330045700073,
      "learning_rate": 6.949124621943425e-06,
      "loss": 0.0163,
      "step": 946020
    },
    {
      "epoch": 1.5482152091802335,
      "grad_norm": 0.4704623818397522,
      "learning_rate": 6.949058729729908e-06,
      "loss": 0.0165,
      "step": 946040
    },
    {
      "epoch": 1.5482479396188866,
      "grad_norm": 0.6455152034759521,
      "learning_rate": 6.948992837516392e-06,
      "loss": 0.0199,
      "step": 946060
    },
    {
      "epoch": 1.5482806700575402,
      "grad_norm": 0.34827500581741333,
      "learning_rate": 6.948926945302874e-06,
      "loss": 0.014,
      "step": 946080
    },
    {
      "epoch": 1.5483134004961934,
      "grad_norm": 0.8441739082336426,
      "learning_rate": 6.9488610530893575e-06,
      "loss": 0.0338,
      "step": 946100
    },
    {
      "epoch": 1.5483461309348467,
      "grad_norm": 1.938167691230774,
      "learning_rate": 6.948795160875839e-06,
      "loss": 0.0143,
      "step": 946120
    },
    {
      "epoch": 1.5483788613735001,
      "grad_norm": 0.2461540251970291,
      "learning_rate": 6.948729268662323e-06,
      "loss": 0.0244,
      "step": 946140
    },
    {
      "epoch": 1.5484115918121535,
      "grad_norm": 1.1883492469787598,
      "learning_rate": 6.948663376448806e-06,
      "loss": 0.0225,
      "step": 946160
    },
    {
      "epoch": 1.5484443222508069,
      "grad_norm": 0.2822555601596832,
      "learning_rate": 6.9485974842352884e-06,
      "loss": 0.0189,
      "step": 946180
    },
    {
      "epoch": 1.54847705268946,
      "grad_norm": 0.3649534285068512,
      "learning_rate": 6.948531592021771e-06,
      "loss": 0.0194,
      "step": 946200
    },
    {
      "epoch": 1.5485097831281136,
      "grad_norm": 0.33198681473731995,
      "learning_rate": 6.948465699808255e-06,
      "loss": 0.0211,
      "step": 946220
    },
    {
      "epoch": 1.5485425135667668,
      "grad_norm": 0.39360934495925903,
      "learning_rate": 6.948399807594737e-06,
      "loss": 0.0146,
      "step": 946240
    },
    {
      "epoch": 1.5485752440054201,
      "grad_norm": 0.2702210545539856,
      "learning_rate": 6.94833391538122e-06,
      "loss": 0.0182,
      "step": 946260
    },
    {
      "epoch": 1.5486079744440735,
      "grad_norm": 0.13084392249584198,
      "learning_rate": 6.948268023167702e-06,
      "loss": 0.0173,
      "step": 946280
    },
    {
      "epoch": 1.5486407048827269,
      "grad_norm": 0.18813034892082214,
      "learning_rate": 6.948202130954186e-06,
      "loss": 0.0174,
      "step": 946300
    },
    {
      "epoch": 1.5486734353213802,
      "grad_norm": 0.8652770519256592,
      "learning_rate": 6.9481362387406684e-06,
      "loss": 0.0218,
      "step": 946320
    },
    {
      "epoch": 1.5487061657600334,
      "grad_norm": 1.019902229309082,
      "learning_rate": 6.948070346527151e-06,
      "loss": 0.0214,
      "step": 946340
    },
    {
      "epoch": 1.548738896198687,
      "grad_norm": 0.906120777130127,
      "learning_rate": 6.948004454313634e-06,
      "loss": 0.0207,
      "step": 946360
    },
    {
      "epoch": 1.5487716266373401,
      "grad_norm": 0.7250258326530457,
      "learning_rate": 6.9479385621001175e-06,
      "loss": 0.0308,
      "step": 946380
    },
    {
      "epoch": 1.5488043570759935,
      "grad_norm": 0.23891818523406982,
      "learning_rate": 6.9478726698866e-06,
      "loss": 0.0216,
      "step": 946400
    },
    {
      "epoch": 1.5488370875146469,
      "grad_norm": 0.7089748978614807,
      "learning_rate": 6.947806777673083e-06,
      "loss": 0.0205,
      "step": 946420
    },
    {
      "epoch": 1.5488698179533003,
      "grad_norm": 0.8541207909584045,
      "learning_rate": 6.947740885459567e-06,
      "loss": 0.0175,
      "step": 946440
    },
    {
      "epoch": 1.5489025483919536,
      "grad_norm": 0.2893192768096924,
      "learning_rate": 6.9476749932460485e-06,
      "loss": 0.0231,
      "step": 946460
    },
    {
      "epoch": 1.5489352788306068,
      "grad_norm": 0.7206887602806091,
      "learning_rate": 6.947609101032532e-06,
      "loss": 0.024,
      "step": 946480
    },
    {
      "epoch": 1.5489680092692604,
      "grad_norm": 0.2812162935733795,
      "learning_rate": 6.947543208819014e-06,
      "loss": 0.0139,
      "step": 946500
    },
    {
      "epoch": 1.5490007397079135,
      "grad_norm": 0.2946983277797699,
      "learning_rate": 6.9474773166054975e-06,
      "loss": 0.0156,
      "step": 946520
    },
    {
      "epoch": 1.549033470146567,
      "grad_norm": 0.7003903388977051,
      "learning_rate": 6.94741142439198e-06,
      "loss": 0.0175,
      "step": 946540
    },
    {
      "epoch": 1.5490662005852203,
      "grad_norm": 0.2467830926179886,
      "learning_rate": 6.947345532178463e-06,
      "loss": 0.019,
      "step": 946560
    },
    {
      "epoch": 1.5490989310238734,
      "grad_norm": 0.9154459834098816,
      "learning_rate": 6.947279639964946e-06,
      "loss": 0.0191,
      "step": 946580
    },
    {
      "epoch": 1.549131661462527,
      "grad_norm": 0.5384625196456909,
      "learning_rate": 6.947213747751429e-06,
      "loss": 0.0165,
      "step": 946600
    },
    {
      "epoch": 1.5491643919011802,
      "grad_norm": 0.3343621492385864,
      "learning_rate": 6.947147855537911e-06,
      "loss": 0.0235,
      "step": 946620
    },
    {
      "epoch": 1.5491971223398338,
      "grad_norm": 0.5938562154769897,
      "learning_rate": 6.947081963324395e-06,
      "loss": 0.0209,
      "step": 946640
    },
    {
      "epoch": 1.549229852778487,
      "grad_norm": 0.2410667985677719,
      "learning_rate": 6.947016071110877e-06,
      "loss": 0.0223,
      "step": 946660
    },
    {
      "epoch": 1.5492625832171403,
      "grad_norm": 0.19039960205554962,
      "learning_rate": 6.94695017889736e-06,
      "loss": 0.0235,
      "step": 946680
    },
    {
      "epoch": 1.5492953136557936,
      "grad_norm": 0.5796600580215454,
      "learning_rate": 6.946884286683843e-06,
      "loss": 0.014,
      "step": 946700
    },
    {
      "epoch": 1.5493280440944468,
      "grad_norm": 0.41003257036209106,
      "learning_rate": 6.946818394470326e-06,
      "loss": 0.0233,
      "step": 946720
    },
    {
      "epoch": 1.5493607745331004,
      "grad_norm": 0.4628612697124481,
      "learning_rate": 6.946752502256809e-06,
      "loss": 0.0194,
      "step": 946740
    },
    {
      "epoch": 1.5493935049717535,
      "grad_norm": 1.0897499322891235,
      "learning_rate": 6.946686610043292e-06,
      "loss": 0.0206,
      "step": 946760
    },
    {
      "epoch": 1.549426235410407,
      "grad_norm": 0.16891376674175262,
      "learning_rate": 6.946620717829775e-06,
      "loss": 0.0187,
      "step": 946780
    },
    {
      "epoch": 1.5494589658490603,
      "grad_norm": 2.9829423427581787,
      "learning_rate": 6.946554825616258e-06,
      "loss": 0.0225,
      "step": 946800
    },
    {
      "epoch": 1.5494916962877137,
      "grad_norm": 0.6315812468528748,
      "learning_rate": 6.946488933402741e-06,
      "loss": 0.0219,
      "step": 946820
    },
    {
      "epoch": 1.549524426726367,
      "grad_norm": 0.42009812593460083,
      "learning_rate": 6.946423041189223e-06,
      "loss": 0.022,
      "step": 946840
    },
    {
      "epoch": 1.5495571571650202,
      "grad_norm": 0.9526090621948242,
      "learning_rate": 6.946357148975707e-06,
      "loss": 0.0152,
      "step": 946860
    },
    {
      "epoch": 1.5495898876036738,
      "grad_norm": 0.9268666505813599,
      "learning_rate": 6.9462912567621886e-06,
      "loss": 0.0207,
      "step": 946880
    },
    {
      "epoch": 1.549622618042327,
      "grad_norm": 0.2306327074766159,
      "learning_rate": 6.946225364548672e-06,
      "loss": 0.0177,
      "step": 946900
    },
    {
      "epoch": 1.5496553484809803,
      "grad_norm": 0.8099246621131897,
      "learning_rate": 6.946159472335154e-06,
      "loss": 0.027,
      "step": 946920
    },
    {
      "epoch": 1.5496880789196337,
      "grad_norm": 0.15030887722969055,
      "learning_rate": 6.946093580121638e-06,
      "loss": 0.0138,
      "step": 946940
    },
    {
      "epoch": 1.549720809358287,
      "grad_norm": 1.0584306716918945,
      "learning_rate": 6.94602768790812e-06,
      "loss": 0.0239,
      "step": 946960
    },
    {
      "epoch": 1.5497535397969404,
      "grad_norm": 0.15750543773174286,
      "learning_rate": 6.945961795694603e-06,
      "loss": 0.0216,
      "step": 946980
    },
    {
      "epoch": 1.5497862702355936,
      "grad_norm": 0.7495714426040649,
      "learning_rate": 6.945895903481086e-06,
      "loss": 0.0176,
      "step": 947000
    },
    {
      "epoch": 1.5498190006742472,
      "grad_norm": 0.5978493690490723,
      "learning_rate": 6.945830011267569e-06,
      "loss": 0.022,
      "step": 947020
    },
    {
      "epoch": 1.5498517311129003,
      "grad_norm": 0.7063153386116028,
      "learning_rate": 6.945764119054051e-06,
      "loss": 0.0276,
      "step": 947040
    },
    {
      "epoch": 1.5498844615515537,
      "grad_norm": 0.41920167207717896,
      "learning_rate": 6.945698226840535e-06,
      "loss": 0.0254,
      "step": 947060
    },
    {
      "epoch": 1.549917191990207,
      "grad_norm": 1.7415452003479004,
      "learning_rate": 6.9456323346270185e-06,
      "loss": 0.0266,
      "step": 947080
    },
    {
      "epoch": 1.5499499224288604,
      "grad_norm": 1.2527543306350708,
      "learning_rate": 6.9455664424135e-06,
      "loss": 0.0179,
      "step": 947100
    },
    {
      "epoch": 1.5499826528675138,
      "grad_norm": 0.5212468504905701,
      "learning_rate": 6.945500550199984e-06,
      "loss": 0.0155,
      "step": 947120
    },
    {
      "epoch": 1.550015383306167,
      "grad_norm": 0.807297945022583,
      "learning_rate": 6.945434657986466e-06,
      "loss": 0.0165,
      "step": 947140
    },
    {
      "epoch": 1.5500481137448205,
      "grad_norm": 0.4409499168395996,
      "learning_rate": 6.9453687657729494e-06,
      "loss": 0.0169,
      "step": 947160
    },
    {
      "epoch": 1.5500808441834737,
      "grad_norm": 1.4208403825759888,
      "learning_rate": 6.945302873559432e-06,
      "loss": 0.0245,
      "step": 947180
    },
    {
      "epoch": 1.550113574622127,
      "grad_norm": 0.36223286390304565,
      "learning_rate": 6.945236981345915e-06,
      "loss": 0.0185,
      "step": 947200
    },
    {
      "epoch": 1.5501463050607804,
      "grad_norm": 0.9797425866127014,
      "learning_rate": 6.945171089132398e-06,
      "loss": 0.0161,
      "step": 947220
    },
    {
      "epoch": 1.5501790354994338,
      "grad_norm": 0.1669641137123108,
      "learning_rate": 6.945105196918881e-06,
      "loss": 0.0218,
      "step": 947240
    },
    {
      "epoch": 1.5502117659380872,
      "grad_norm": 0.6076744794845581,
      "learning_rate": 6.945039304705363e-06,
      "loss": 0.017,
      "step": 947260
    },
    {
      "epoch": 1.5502444963767403,
      "grad_norm": 1.3540849685668945,
      "learning_rate": 6.944973412491847e-06,
      "loss": 0.0202,
      "step": 947280
    },
    {
      "epoch": 1.550277226815394,
      "grad_norm": 0.46221068501472473,
      "learning_rate": 6.944907520278329e-06,
      "loss": 0.0206,
      "step": 947300
    },
    {
      "epoch": 1.550309957254047,
      "grad_norm": 1.0374646186828613,
      "learning_rate": 6.944841628064812e-06,
      "loss": 0.0191,
      "step": 947320
    },
    {
      "epoch": 1.5503426876927004,
      "grad_norm": 0.9782633781433105,
      "learning_rate": 6.944775735851295e-06,
      "loss": 0.0232,
      "step": 947340
    },
    {
      "epoch": 1.5503754181313538,
      "grad_norm": 0.2706340253353119,
      "learning_rate": 6.944709843637778e-06,
      "loss": 0.0204,
      "step": 947360
    },
    {
      "epoch": 1.550408148570007,
      "grad_norm": 0.2866709530353546,
      "learning_rate": 6.9446439514242604e-06,
      "loss": 0.0313,
      "step": 947380
    },
    {
      "epoch": 1.5504408790086606,
      "grad_norm": 0.24114766716957092,
      "learning_rate": 6.944578059210744e-06,
      "loss": 0.0244,
      "step": 947400
    },
    {
      "epoch": 1.5504736094473137,
      "grad_norm": 0.3232155442237854,
      "learning_rate": 6.944512166997226e-06,
      "loss": 0.0165,
      "step": 947420
    },
    {
      "epoch": 1.5505063398859673,
      "grad_norm": 0.3061564266681671,
      "learning_rate": 6.9444462747837095e-06,
      "loss": 0.0213,
      "step": 947440
    },
    {
      "epoch": 1.5505390703246205,
      "grad_norm": 0.6645458936691284,
      "learning_rate": 6.944380382570193e-06,
      "loss": 0.0231,
      "step": 947460
    },
    {
      "epoch": 1.5505718007632738,
      "grad_norm": 0.4677176773548126,
      "learning_rate": 6.944314490356675e-06,
      "loss": 0.0249,
      "step": 947480
    },
    {
      "epoch": 1.5506045312019272,
      "grad_norm": 0.7612985968589783,
      "learning_rate": 6.9442485981431586e-06,
      "loss": 0.0177,
      "step": 947500
    },
    {
      "epoch": 1.5506372616405804,
      "grad_norm": 0.8989730477333069,
      "learning_rate": 6.9441827059296405e-06,
      "loss": 0.0159,
      "step": 947520
    },
    {
      "epoch": 1.550669992079234,
      "grad_norm": 0.34985989332199097,
      "learning_rate": 6.944116813716124e-06,
      "loss": 0.0202,
      "step": 947540
    },
    {
      "epoch": 1.550702722517887,
      "grad_norm": 0.7108535170555115,
      "learning_rate": 6.944050921502607e-06,
      "loss": 0.016,
      "step": 947560
    },
    {
      "epoch": 1.5507354529565405,
      "grad_norm": 0.14105334877967834,
      "learning_rate": 6.9439850292890895e-06,
      "loss": 0.0229,
      "step": 947580
    },
    {
      "epoch": 1.5507681833951938,
      "grad_norm": 0.28421345353126526,
      "learning_rate": 6.943919137075572e-06,
      "loss": 0.0205,
      "step": 947600
    },
    {
      "epoch": 1.5508009138338472,
      "grad_norm": 0.1634777933359146,
      "learning_rate": 6.943853244862056e-06,
      "loss": 0.0293,
      "step": 947620
    },
    {
      "epoch": 1.5508336442725006,
      "grad_norm": 1.3027156591415405,
      "learning_rate": 6.943787352648538e-06,
      "loss": 0.0213,
      "step": 947640
    },
    {
      "epoch": 1.5508663747111537,
      "grad_norm": 0.41676822304725647,
      "learning_rate": 6.943721460435021e-06,
      "loss": 0.0297,
      "step": 947660
    },
    {
      "epoch": 1.5508991051498073,
      "grad_norm": 0.23814721405506134,
      "learning_rate": 6.943655568221503e-06,
      "loss": 0.0245,
      "step": 947680
    },
    {
      "epoch": 1.5509318355884605,
      "grad_norm": 0.6030026078224182,
      "learning_rate": 6.943589676007987e-06,
      "loss": 0.0199,
      "step": 947700
    },
    {
      "epoch": 1.5509645660271139,
      "grad_norm": 0.8322109580039978,
      "learning_rate": 6.943523783794469e-06,
      "loss": 0.0237,
      "step": 947720
    },
    {
      "epoch": 1.5509972964657672,
      "grad_norm": 0.5939533114433289,
      "learning_rate": 6.943457891580952e-06,
      "loss": 0.0235,
      "step": 947740
    },
    {
      "epoch": 1.5510300269044206,
      "grad_norm": 0.43791916966438293,
      "learning_rate": 6.943391999367435e-06,
      "loss": 0.0249,
      "step": 947760
    },
    {
      "epoch": 1.551062757343074,
      "grad_norm": 0.29258301854133606,
      "learning_rate": 6.943326107153919e-06,
      "loss": 0.0267,
      "step": 947780
    },
    {
      "epoch": 1.5510954877817271,
      "grad_norm": 0.23747378587722778,
      "learning_rate": 6.943260214940401e-06,
      "loss": 0.0158,
      "step": 947800
    },
    {
      "epoch": 1.5511282182203807,
      "grad_norm": 0.9455474019050598,
      "learning_rate": 6.943194322726884e-06,
      "loss": 0.0121,
      "step": 947820
    },
    {
      "epoch": 1.5511609486590339,
      "grad_norm": 0.23687843978405,
      "learning_rate": 6.943128430513368e-06,
      "loss": 0.0135,
      "step": 947840
    },
    {
      "epoch": 1.5511936790976872,
      "grad_norm": 0.49114999175071716,
      "learning_rate": 6.9430625382998496e-06,
      "loss": 0.0252,
      "step": 947860
    },
    {
      "epoch": 1.5512264095363406,
      "grad_norm": 0.14932413399219513,
      "learning_rate": 6.942996646086333e-06,
      "loss": 0.017,
      "step": 947880
    },
    {
      "epoch": 1.551259139974994,
      "grad_norm": 0.27626773715019226,
      "learning_rate": 6.942930753872815e-06,
      "loss": 0.0247,
      "step": 947900
    },
    {
      "epoch": 1.5512918704136474,
      "grad_norm": 0.2948913276195526,
      "learning_rate": 6.942864861659299e-06,
      "loss": 0.0193,
      "step": 947920
    },
    {
      "epoch": 1.5513246008523005,
      "grad_norm": 0.5707778334617615,
      "learning_rate": 6.9427989694457805e-06,
      "loss": 0.0199,
      "step": 947940
    },
    {
      "epoch": 1.551357331290954,
      "grad_norm": 0.7662234902381897,
      "learning_rate": 6.942733077232264e-06,
      "loss": 0.0139,
      "step": 947960
    },
    {
      "epoch": 1.5513900617296073,
      "grad_norm": 0.5534738898277283,
      "learning_rate": 6.942667185018747e-06,
      "loss": 0.0203,
      "step": 947980
    },
    {
      "epoch": 1.5514227921682606,
      "grad_norm": 0.2862665355205536,
      "learning_rate": 6.94260129280523e-06,
      "loss": 0.0218,
      "step": 948000
    },
    {
      "epoch": 1.551455522606914,
      "grad_norm": 1.3886569738388062,
      "learning_rate": 6.942535400591712e-06,
      "loss": 0.0237,
      "step": 948020
    },
    {
      "epoch": 1.5514882530455674,
      "grad_norm": 0.45114344358444214,
      "learning_rate": 6.942469508378196e-06,
      "loss": 0.0171,
      "step": 948040
    },
    {
      "epoch": 1.5515209834842207,
      "grad_norm": 0.33304405212402344,
      "learning_rate": 6.942403616164678e-06,
      "loss": 0.0299,
      "step": 948060
    },
    {
      "epoch": 1.551553713922874,
      "grad_norm": 0.6940962076187134,
      "learning_rate": 6.942337723951161e-06,
      "loss": 0.0182,
      "step": 948080
    },
    {
      "epoch": 1.5515864443615275,
      "grad_norm": 0.4042680561542511,
      "learning_rate": 6.942271831737643e-06,
      "loss": 0.0131,
      "step": 948100
    },
    {
      "epoch": 1.5516191748001806,
      "grad_norm": 0.2753369212150574,
      "learning_rate": 6.942205939524127e-06,
      "loss": 0.0171,
      "step": 948120
    },
    {
      "epoch": 1.551651905238834,
      "grad_norm": 0.42397257685661316,
      "learning_rate": 6.9421400473106105e-06,
      "loss": 0.0189,
      "step": 948140
    },
    {
      "epoch": 1.5516846356774874,
      "grad_norm": 0.42040711641311646,
      "learning_rate": 6.942074155097092e-06,
      "loss": 0.0193,
      "step": 948160
    },
    {
      "epoch": 1.5517173661161405,
      "grad_norm": 0.4665971100330353,
      "learning_rate": 6.942008262883576e-06,
      "loss": 0.0207,
      "step": 948180
    },
    {
      "epoch": 1.5517500965547941,
      "grad_norm": 0.4269457161426544,
      "learning_rate": 6.941942370670059e-06,
      "loss": 0.0222,
      "step": 948200
    },
    {
      "epoch": 1.5517828269934473,
      "grad_norm": 0.39480865001678467,
      "learning_rate": 6.9418764784565414e-06,
      "loss": 0.0136,
      "step": 948220
    },
    {
      "epoch": 1.5518155574321009,
      "grad_norm": 0.3493003845214844,
      "learning_rate": 6.941810586243024e-06,
      "loss": 0.0152,
      "step": 948240
    },
    {
      "epoch": 1.551848287870754,
      "grad_norm": 0.5483138561248779,
      "learning_rate": 6.941744694029508e-06,
      "loss": 0.016,
      "step": 948260
    },
    {
      "epoch": 1.5518810183094074,
      "grad_norm": 0.603315532207489,
      "learning_rate": 6.94167880181599e-06,
      "loss": 0.0175,
      "step": 948280
    },
    {
      "epoch": 1.5519137487480608,
      "grad_norm": 0.4300112724304199,
      "learning_rate": 6.941612909602473e-06,
      "loss": 0.0175,
      "step": 948300
    },
    {
      "epoch": 1.551946479186714,
      "grad_norm": 0.5088562965393066,
      "learning_rate": 6.941547017388955e-06,
      "loss": 0.0126,
      "step": 948320
    },
    {
      "epoch": 1.5519792096253675,
      "grad_norm": 0.4688422977924347,
      "learning_rate": 6.941481125175439e-06,
      "loss": 0.0317,
      "step": 948340
    },
    {
      "epoch": 1.5520119400640207,
      "grad_norm": 0.3349723517894745,
      "learning_rate": 6.9414152329619215e-06,
      "loss": 0.0252,
      "step": 948360
    },
    {
      "epoch": 1.552044670502674,
      "grad_norm": 0.30082273483276367,
      "learning_rate": 6.941349340748404e-06,
      "loss": 0.0208,
      "step": 948380
    },
    {
      "epoch": 1.5520774009413274,
      "grad_norm": 0.22041918337345123,
      "learning_rate": 6.941283448534887e-06,
      "loss": 0.0207,
      "step": 948400
    },
    {
      "epoch": 1.5521101313799808,
      "grad_norm": 0.9828300476074219,
      "learning_rate": 6.9412175563213705e-06,
      "loss": 0.0149,
      "step": 948420
    },
    {
      "epoch": 1.5521428618186341,
      "grad_norm": 0.6052080392837524,
      "learning_rate": 6.941151664107852e-06,
      "loss": 0.0236,
      "step": 948440
    },
    {
      "epoch": 1.5521755922572873,
      "grad_norm": 0.37119022011756897,
      "learning_rate": 6.941085771894336e-06,
      "loss": 0.0212,
      "step": 948460
    },
    {
      "epoch": 1.552208322695941,
      "grad_norm": 0.7633179426193237,
      "learning_rate": 6.941019879680818e-06,
      "loss": 0.0166,
      "step": 948480
    },
    {
      "epoch": 1.552241053134594,
      "grad_norm": 0.5347039699554443,
      "learning_rate": 6.9409539874673015e-06,
      "loss": 0.0274,
      "step": 948500
    },
    {
      "epoch": 1.5522737835732474,
      "grad_norm": 0.5423445105552673,
      "learning_rate": 6.940888095253785e-06,
      "loss": 0.0192,
      "step": 948520
    },
    {
      "epoch": 1.5523065140119008,
      "grad_norm": 0.26657751202583313,
      "learning_rate": 6.940822203040267e-06,
      "loss": 0.0174,
      "step": 948540
    },
    {
      "epoch": 1.5523392444505542,
      "grad_norm": 0.6626608967781067,
      "learning_rate": 6.9407563108267505e-06,
      "loss": 0.0214,
      "step": 948560
    },
    {
      "epoch": 1.5523719748892075,
      "grad_norm": 0.2637438178062439,
      "learning_rate": 6.940690418613233e-06,
      "loss": 0.0217,
      "step": 948580
    },
    {
      "epoch": 1.5524047053278607,
      "grad_norm": 0.7383489608764648,
      "learning_rate": 6.940624526399716e-06,
      "loss": 0.0152,
      "step": 948600
    },
    {
      "epoch": 1.5524374357665143,
      "grad_norm": 0.5306352376937866,
      "learning_rate": 6.940558634186199e-06,
      "loss": 0.0272,
      "step": 948620
    },
    {
      "epoch": 1.5524701662051674,
      "grad_norm": 0.4682224988937378,
      "learning_rate": 6.940492741972682e-06,
      "loss": 0.0211,
      "step": 948640
    },
    {
      "epoch": 1.5525028966438208,
      "grad_norm": 2.3158481121063232,
      "learning_rate": 6.940426849759164e-06,
      "loss": 0.0251,
      "step": 948660
    },
    {
      "epoch": 1.5525356270824742,
      "grad_norm": 0.7800285220146179,
      "learning_rate": 6.940360957545648e-06,
      "loss": 0.0223,
      "step": 948680
    },
    {
      "epoch": 1.5525683575211275,
      "grad_norm": 0.5242952704429626,
      "learning_rate": 6.94029506533213e-06,
      "loss": 0.0154,
      "step": 948700
    },
    {
      "epoch": 1.552601087959781,
      "grad_norm": 0.38531380891799927,
      "learning_rate": 6.940229173118613e-06,
      "loss": 0.031,
      "step": 948720
    },
    {
      "epoch": 1.552633818398434,
      "grad_norm": 0.6557636260986328,
      "learning_rate": 6.940163280905095e-06,
      "loss": 0.0179,
      "step": 948740
    },
    {
      "epoch": 1.5526665488370877,
      "grad_norm": 1.1552706956863403,
      "learning_rate": 6.940097388691579e-06,
      "loss": 0.0322,
      "step": 948760
    },
    {
      "epoch": 1.5526992792757408,
      "grad_norm": 0.8065857291221619,
      "learning_rate": 6.9400314964780615e-06,
      "loss": 0.0128,
      "step": 948780
    },
    {
      "epoch": 1.5527320097143942,
      "grad_norm": 0.14785389602184296,
      "learning_rate": 6.939965604264544e-06,
      "loss": 0.0169,
      "step": 948800
    },
    {
      "epoch": 1.5527647401530476,
      "grad_norm": 1.6400047540664673,
      "learning_rate": 6.939899712051027e-06,
      "loss": 0.0166,
      "step": 948820
    },
    {
      "epoch": 1.552797470591701,
      "grad_norm": 1.208577275276184,
      "learning_rate": 6.939833819837511e-06,
      "loss": 0.025,
      "step": 948840
    },
    {
      "epoch": 1.5528302010303543,
      "grad_norm": 0.09094144403934479,
      "learning_rate": 6.939767927623993e-06,
      "loss": 0.0169,
      "step": 948860
    },
    {
      "epoch": 1.5528629314690074,
      "grad_norm": 0.33272087574005127,
      "learning_rate": 6.939702035410476e-06,
      "loss": 0.014,
      "step": 948880
    },
    {
      "epoch": 1.552895661907661,
      "grad_norm": 1.0731492042541504,
      "learning_rate": 6.93963614319696e-06,
      "loss": 0.0199,
      "step": 948900
    },
    {
      "epoch": 1.5529283923463142,
      "grad_norm": 1.0247706174850464,
      "learning_rate": 6.9395702509834416e-06,
      "loss": 0.0225,
      "step": 948920
    },
    {
      "epoch": 1.5529611227849676,
      "grad_norm": 0.942668616771698,
      "learning_rate": 6.939504358769925e-06,
      "loss": 0.0212,
      "step": 948940
    },
    {
      "epoch": 1.552993853223621,
      "grad_norm": 0.6926259994506836,
      "learning_rate": 6.939438466556407e-06,
      "loss": 0.0229,
      "step": 948960
    },
    {
      "epoch": 1.553026583662274,
      "grad_norm": 0.16177865862846375,
      "learning_rate": 6.939372574342891e-06,
      "loss": 0.0194,
      "step": 948980
    },
    {
      "epoch": 1.5530593141009277,
      "grad_norm": 0.2775963842868805,
      "learning_rate": 6.939306682129373e-06,
      "loss": 0.029,
      "step": 949000
    },
    {
      "epoch": 1.5530920445395808,
      "grad_norm": 0.11239542067050934,
      "learning_rate": 6.939240789915856e-06,
      "loss": 0.0177,
      "step": 949020
    },
    {
      "epoch": 1.5531247749782344,
      "grad_norm": 1.869441270828247,
      "learning_rate": 6.939174897702339e-06,
      "loss": 0.021,
      "step": 949040
    },
    {
      "epoch": 1.5531575054168876,
      "grad_norm": 0.2999504804611206,
      "learning_rate": 6.9391090054888224e-06,
      "loss": 0.0189,
      "step": 949060
    },
    {
      "epoch": 1.553190235855541,
      "grad_norm": 0.526019811630249,
      "learning_rate": 6.939043113275304e-06,
      "loss": 0.0167,
      "step": 949080
    },
    {
      "epoch": 1.5532229662941943,
      "grad_norm": 0.6292213201522827,
      "learning_rate": 6.938977221061788e-06,
      "loss": 0.0163,
      "step": 949100
    },
    {
      "epoch": 1.5532556967328475,
      "grad_norm": 0.5483694076538086,
      "learning_rate": 6.93891132884827e-06,
      "loss": 0.0195,
      "step": 949120
    },
    {
      "epoch": 1.553288427171501,
      "grad_norm": 3.6366982460021973,
      "learning_rate": 6.938845436634753e-06,
      "loss": 0.0162,
      "step": 949140
    },
    {
      "epoch": 1.5533211576101542,
      "grad_norm": 0.7674062848091125,
      "learning_rate": 6.938779544421236e-06,
      "loss": 0.0293,
      "step": 949160
    },
    {
      "epoch": 1.5533538880488076,
      "grad_norm": 2.1178085803985596,
      "learning_rate": 6.938713652207719e-06,
      "loss": 0.0215,
      "step": 949180
    },
    {
      "epoch": 1.553386618487461,
      "grad_norm": 0.15444053709506989,
      "learning_rate": 6.9386477599942025e-06,
      "loss": 0.0172,
      "step": 949200
    },
    {
      "epoch": 1.5534193489261143,
      "grad_norm": 0.5106958150863647,
      "learning_rate": 6.938581867780685e-06,
      "loss": 0.0216,
      "step": 949220
    },
    {
      "epoch": 1.5534520793647677,
      "grad_norm": 1.8000456094741821,
      "learning_rate": 6.938515975567168e-06,
      "loss": 0.0266,
      "step": 949240
    },
    {
      "epoch": 1.5534848098034209,
      "grad_norm": 0.34798043966293335,
      "learning_rate": 6.938450083353651e-06,
      "loss": 0.0234,
      "step": 949260
    },
    {
      "epoch": 1.5535175402420744,
      "grad_norm": 0.4459307789802551,
      "learning_rate": 6.938384191140134e-06,
      "loss": 0.0171,
      "step": 949280
    },
    {
      "epoch": 1.5535502706807276,
      "grad_norm": 1.384974718093872,
      "learning_rate": 6.938318298926616e-06,
      "loss": 0.0233,
      "step": 949300
    },
    {
      "epoch": 1.553583001119381,
      "grad_norm": 0.28082597255706787,
      "learning_rate": 6.9382524067131e-06,
      "loss": 0.0148,
      "step": 949320
    },
    {
      "epoch": 1.5536157315580343,
      "grad_norm": 0.17193643748760223,
      "learning_rate": 6.938186514499582e-06,
      "loss": 0.0233,
      "step": 949340
    },
    {
      "epoch": 1.5536484619966877,
      "grad_norm": 0.560685396194458,
      "learning_rate": 6.938120622286065e-06,
      "loss": 0.0231,
      "step": 949360
    },
    {
      "epoch": 1.553681192435341,
      "grad_norm": 0.384511262178421,
      "learning_rate": 6.938054730072548e-06,
      "loss": 0.0169,
      "step": 949380
    },
    {
      "epoch": 1.5537139228739942,
      "grad_norm": 0.6750276684761047,
      "learning_rate": 6.937988837859031e-06,
      "loss": 0.0168,
      "step": 949400
    },
    {
      "epoch": 1.5537466533126478,
      "grad_norm": 0.3007453978061676,
      "learning_rate": 6.9379229456455134e-06,
      "loss": 0.0219,
      "step": 949420
    },
    {
      "epoch": 1.553779383751301,
      "grad_norm": 0.8154052495956421,
      "learning_rate": 6.937857053431997e-06,
      "loss": 0.0178,
      "step": 949440
    },
    {
      "epoch": 1.5538121141899544,
      "grad_norm": 0.25735020637512207,
      "learning_rate": 6.937791161218479e-06,
      "loss": 0.0204,
      "step": 949460
    },
    {
      "epoch": 1.5538448446286077,
      "grad_norm": 0.13730809092521667,
      "learning_rate": 6.9377252690049625e-06,
      "loss": 0.0216,
      "step": 949480
    },
    {
      "epoch": 1.553877575067261,
      "grad_norm": 0.1565217226743698,
      "learning_rate": 6.937659376791444e-06,
      "loss": 0.0235,
      "step": 949500
    },
    {
      "epoch": 1.5539103055059145,
      "grad_norm": 0.29385384917259216,
      "learning_rate": 6.937593484577928e-06,
      "loss": 0.0222,
      "step": 949520
    },
    {
      "epoch": 1.5539430359445676,
      "grad_norm": 0.7262290120124817,
      "learning_rate": 6.937527592364411e-06,
      "loss": 0.0143,
      "step": 949540
    },
    {
      "epoch": 1.5539757663832212,
      "grad_norm": 0.21096880733966827,
      "learning_rate": 6.9374617001508935e-06,
      "loss": 0.0238,
      "step": 949560
    },
    {
      "epoch": 1.5540084968218744,
      "grad_norm": 0.439376562833786,
      "learning_rate": 6.937395807937377e-06,
      "loss": 0.0267,
      "step": 949580
    },
    {
      "epoch": 1.5540412272605277,
      "grad_norm": 0.27040085196495056,
      "learning_rate": 6.93732991572386e-06,
      "loss": 0.0158,
      "step": 949600
    },
    {
      "epoch": 1.554073957699181,
      "grad_norm": 0.3627241551876068,
      "learning_rate": 6.9372640235103425e-06,
      "loss": 0.0243,
      "step": 949620
    },
    {
      "epoch": 1.5541066881378343,
      "grad_norm": 0.11006621271371841,
      "learning_rate": 6.937198131296825e-06,
      "loss": 0.0229,
      "step": 949640
    },
    {
      "epoch": 1.5541394185764879,
      "grad_norm": 0.6758037209510803,
      "learning_rate": 6.937132239083309e-06,
      "loss": 0.0173,
      "step": 949660
    },
    {
      "epoch": 1.554172149015141,
      "grad_norm": 1.0095587968826294,
      "learning_rate": 6.937066346869791e-06,
      "loss": 0.0207,
      "step": 949680
    },
    {
      "epoch": 1.5542048794537946,
      "grad_norm": 0.5789926052093506,
      "learning_rate": 6.937000454656274e-06,
      "loss": 0.0224,
      "step": 949700
    },
    {
      "epoch": 1.5542376098924477,
      "grad_norm": 0.16338540613651276,
      "learning_rate": 6.936934562442756e-06,
      "loss": 0.0123,
      "step": 949720
    },
    {
      "epoch": 1.5542703403311011,
      "grad_norm": 1.1625638008117676,
      "learning_rate": 6.93686867022924e-06,
      "loss": 0.0234,
      "step": 949740
    },
    {
      "epoch": 1.5543030707697545,
      "grad_norm": 0.23407156765460968,
      "learning_rate": 6.936802778015722e-06,
      "loss": 0.0176,
      "step": 949760
    },
    {
      "epoch": 1.5543358012084076,
      "grad_norm": 0.6509019136428833,
      "learning_rate": 6.936736885802205e-06,
      "loss": 0.019,
      "step": 949780
    },
    {
      "epoch": 1.5543685316470612,
      "grad_norm": 0.4496808350086212,
      "learning_rate": 6.936670993588688e-06,
      "loss": 0.0227,
      "step": 949800
    },
    {
      "epoch": 1.5544012620857144,
      "grad_norm": 0.2551933526992798,
      "learning_rate": 6.936605101375171e-06,
      "loss": 0.0189,
      "step": 949820
    },
    {
      "epoch": 1.5544339925243678,
      "grad_norm": 0.8346825242042542,
      "learning_rate": 6.9365392091616535e-06,
      "loss": 0.0162,
      "step": 949840
    },
    {
      "epoch": 1.5544667229630211,
      "grad_norm": Infinity,
      "learning_rate": 6.936473316948137e-06,
      "loss": 0.02,
      "step": 949860
    },
    {
      "epoch": 1.5544994534016745,
      "grad_norm": 0.16485992074012756,
      "learning_rate": 6.936407424734619e-06,
      "loss": 0.0209,
      "step": 949880
    },
    {
      "epoch": 1.5545321838403279,
      "grad_norm": 1.4960646629333496,
      "learning_rate": 6.936341532521103e-06,
      "loss": 0.0252,
      "step": 949900
    },
    {
      "epoch": 1.554564914278981,
      "grad_norm": 1.141238808631897,
      "learning_rate": 6.936275640307586e-06,
      "loss": 0.0225,
      "step": 949920
    },
    {
      "epoch": 1.5545976447176346,
      "grad_norm": 0.682357907295227,
      "learning_rate": 6.936209748094068e-06,
      "loss": 0.017,
      "step": 949940
    },
    {
      "epoch": 1.5546303751562878,
      "grad_norm": 0.1737479269504547,
      "learning_rate": 6.936143855880552e-06,
      "loss": 0.0182,
      "step": 949960
    },
    {
      "epoch": 1.5546631055949411,
      "grad_norm": 0.5368373990058899,
      "learning_rate": 6.9360779636670335e-06,
      "loss": 0.018,
      "step": 949980
    },
    {
      "epoch": 1.5546958360335945,
      "grad_norm": 0.36516624689102173,
      "learning_rate": 6.936012071453517e-06,
      "loss": 0.014,
      "step": 950000
    },
    {
      "epoch": 1.5546958360335945,
      "eval_loss": 0.010587815195322037,
      "eval_runtime": 6500.1486,
      "eval_samples_per_second": 158.128,
      "eval_steps_per_second": 15.813,
      "eval_sts-dev_pearson_cosine": 0.9752902828325908,
      "eval_sts-dev_spearman_cosine": 0.8893216606326195,
      "step": 950000
    },
    {
      "epoch": 1.5547285664722479,
      "grad_norm": 0.3730841875076294,
      "learning_rate": 6.93594617924e-06,
      "loss": 0.0212,
      "step": 950020
    },
    {
      "epoch": 1.5547612969109013,
      "grad_norm": 0.22409136593341827,
      "learning_rate": 6.935880287026483e-06,
      "loss": 0.0221,
      "step": 950040
    },
    {
      "epoch": 1.5547940273495544,
      "grad_norm": 0.620140016078949,
      "learning_rate": 6.935814394812965e-06,
      "loss": 0.0191,
      "step": 950060
    },
    {
      "epoch": 1.554826757788208,
      "grad_norm": 0.08194348216056824,
      "learning_rate": 6.935748502599449e-06,
      "loss": 0.014,
      "step": 950080
    },
    {
      "epoch": 1.5548594882268612,
      "grad_norm": 0.8155680298805237,
      "learning_rate": 6.935682610385931e-06,
      "loss": 0.0357,
      "step": 950100
    },
    {
      "epoch": 1.5548922186655145,
      "grad_norm": 0.7951841354370117,
      "learning_rate": 6.935616718172414e-06,
      "loss": 0.0226,
      "step": 950120
    },
    {
      "epoch": 1.554924949104168,
      "grad_norm": 0.751599133014679,
      "learning_rate": 6.935550825958896e-06,
      "loss": 0.018,
      "step": 950140
    },
    {
      "epoch": 1.5549576795428213,
      "grad_norm": 0.3960009813308716,
      "learning_rate": 6.93548493374538e-06,
      "loss": 0.0217,
      "step": 950160
    },
    {
      "epoch": 1.5549904099814746,
      "grad_norm": 0.32829537987709045,
      "learning_rate": 6.935419041531863e-06,
      "loss": 0.0145,
      "step": 950180
    },
    {
      "epoch": 1.5550231404201278,
      "grad_norm": 1.3076876401901245,
      "learning_rate": 6.935353149318345e-06,
      "loss": 0.0264,
      "step": 950200
    },
    {
      "epoch": 1.5550558708587814,
      "grad_norm": 0.6707567572593689,
      "learning_rate": 6.935287257104828e-06,
      "loss": 0.0168,
      "step": 950220
    },
    {
      "epoch": 1.5550886012974345,
      "grad_norm": 0.375067800283432,
      "learning_rate": 6.935221364891312e-06,
      "loss": 0.0297,
      "step": 950240
    },
    {
      "epoch": 1.555121331736088,
      "grad_norm": 0.26927340030670166,
      "learning_rate": 6.9351554726777944e-06,
      "loss": 0.0203,
      "step": 950260
    },
    {
      "epoch": 1.5551540621747413,
      "grad_norm": 0.9300801157951355,
      "learning_rate": 6.935089580464277e-06,
      "loss": 0.0219,
      "step": 950280
    },
    {
      "epoch": 1.5551867926133947,
      "grad_norm": 0.42689448595046997,
      "learning_rate": 6.935023688250761e-06,
      "loss": 0.0186,
      "step": 950300
    },
    {
      "epoch": 1.555219523052048,
      "grad_norm": 0.7176163792610168,
      "learning_rate": 6.934957796037243e-06,
      "loss": 0.0195,
      "step": 950320
    },
    {
      "epoch": 1.5552522534907012,
      "grad_norm": 0.7734074592590332,
      "learning_rate": 6.934891903823726e-06,
      "loss": 0.0257,
      "step": 950340
    },
    {
      "epoch": 1.5552849839293548,
      "grad_norm": 0.2427363395690918,
      "learning_rate": 6.934826011610208e-06,
      "loss": 0.0174,
      "step": 950360
    },
    {
      "epoch": 1.555317714368008,
      "grad_norm": 0.24147003889083862,
      "learning_rate": 6.934760119396692e-06,
      "loss": 0.0196,
      "step": 950380
    },
    {
      "epoch": 1.5553504448066613,
      "grad_norm": 0.07563468813896179,
      "learning_rate": 6.9346942271831745e-06,
      "loss": 0.014,
      "step": 950400
    },
    {
      "epoch": 1.5553831752453147,
      "grad_norm": 1.1543017625808716,
      "learning_rate": 6.934628334969657e-06,
      "loss": 0.0191,
      "step": 950420
    },
    {
      "epoch": 1.5554159056839678,
      "grad_norm": 1.3404858112335205,
      "learning_rate": 6.93456244275614e-06,
      "loss": 0.0303,
      "step": 950440
    },
    {
      "epoch": 1.5554486361226214,
      "grad_norm": 0.89216548204422,
      "learning_rate": 6.9344965505426235e-06,
      "loss": 0.0211,
      "step": 950460
    },
    {
      "epoch": 1.5554813665612746,
      "grad_norm": 1.2528399229049683,
      "learning_rate": 6.934430658329105e-06,
      "loss": 0.0189,
      "step": 950480
    },
    {
      "epoch": 1.5555140969999282,
      "grad_norm": 2.7535767555236816,
      "learning_rate": 6.934364766115589e-06,
      "loss": 0.0238,
      "step": 950500
    },
    {
      "epoch": 1.5555468274385813,
      "grad_norm": 0.36674442887306213,
      "learning_rate": 6.934298873902071e-06,
      "loss": 0.0176,
      "step": 950520
    },
    {
      "epoch": 1.5555795578772347,
      "grad_norm": 0.23628126084804535,
      "learning_rate": 6.9342329816885545e-06,
      "loss": 0.0219,
      "step": 950540
    },
    {
      "epoch": 1.555612288315888,
      "grad_norm": 1.670692801475525,
      "learning_rate": 6.934167089475037e-06,
      "loss": 0.0207,
      "step": 950560
    },
    {
      "epoch": 1.5556450187545412,
      "grad_norm": 0.7173919677734375,
      "learning_rate": 6.93410119726152e-06,
      "loss": 0.0235,
      "step": 950580
    },
    {
      "epoch": 1.5556777491931948,
      "grad_norm": 0.4565258324146271,
      "learning_rate": 6.9340353050480036e-06,
      "loss": 0.0162,
      "step": 950600
    },
    {
      "epoch": 1.555710479631848,
      "grad_norm": 0.1618971973657608,
      "learning_rate": 6.933969412834486e-06,
      "loss": 0.0185,
      "step": 950620
    },
    {
      "epoch": 1.5557432100705013,
      "grad_norm": 0.5072218179702759,
      "learning_rate": 6.933903520620969e-06,
      "loss": 0.023,
      "step": 950640
    },
    {
      "epoch": 1.5557759405091547,
      "grad_norm": 0.22508393228054047,
      "learning_rate": 6.933837628407452e-06,
      "loss": 0.0163,
      "step": 950660
    },
    {
      "epoch": 1.555808670947808,
      "grad_norm": 0.42089352011680603,
      "learning_rate": 6.933771736193935e-06,
      "loss": 0.0176,
      "step": 950680
    },
    {
      "epoch": 1.5558414013864614,
      "grad_norm": 0.13587979972362518,
      "learning_rate": 6.933705843980417e-06,
      "loss": 0.0193,
      "step": 950700
    },
    {
      "epoch": 1.5558741318251146,
      "grad_norm": 0.19525614380836487,
      "learning_rate": 6.933639951766901e-06,
      "loss": 0.0151,
      "step": 950720
    },
    {
      "epoch": 1.5559068622637682,
      "grad_norm": 0.26895108819007874,
      "learning_rate": 6.933574059553383e-06,
      "loss": 0.0152,
      "step": 950740
    },
    {
      "epoch": 1.5559395927024213,
      "grad_norm": 0.3420860171318054,
      "learning_rate": 6.933508167339866e-06,
      "loss": 0.0163,
      "step": 950760
    },
    {
      "epoch": 1.5559723231410747,
      "grad_norm": 0.5590026378631592,
      "learning_rate": 6.933442275126348e-06,
      "loss": 0.0269,
      "step": 950780
    },
    {
      "epoch": 1.556005053579728,
      "grad_norm": 0.4258062243461609,
      "learning_rate": 6.933376382912832e-06,
      "loss": 0.0232,
      "step": 950800
    },
    {
      "epoch": 1.5560377840183814,
      "grad_norm": 1.5413693189620972,
      "learning_rate": 6.9333104906993145e-06,
      "loss": 0.0281,
      "step": 950820
    },
    {
      "epoch": 1.5560705144570348,
      "grad_norm": 0.30480295419692993,
      "learning_rate": 6.933244598485797e-06,
      "loss": 0.0177,
      "step": 950840
    },
    {
      "epoch": 1.556103244895688,
      "grad_norm": 0.25102129578590393,
      "learning_rate": 6.93317870627228e-06,
      "loss": 0.0238,
      "step": 950860
    },
    {
      "epoch": 1.5561359753343416,
      "grad_norm": 1.0735584497451782,
      "learning_rate": 6.933112814058764e-06,
      "loss": 0.025,
      "step": 950880
    },
    {
      "epoch": 1.5561687057729947,
      "grad_norm": 0.5713875889778137,
      "learning_rate": 6.9330469218452455e-06,
      "loss": 0.018,
      "step": 950900
    },
    {
      "epoch": 1.556201436211648,
      "grad_norm": 0.19790048897266388,
      "learning_rate": 6.932981029631729e-06,
      "loss": 0.0193,
      "step": 950920
    },
    {
      "epoch": 1.5562341666503015,
      "grad_norm": 0.322837233543396,
      "learning_rate": 6.932915137418211e-06,
      "loss": 0.0266,
      "step": 950940
    },
    {
      "epoch": 1.5562668970889548,
      "grad_norm": 0.2719995975494385,
      "learning_rate": 6.9328492452046946e-06,
      "loss": 0.0122,
      "step": 950960
    },
    {
      "epoch": 1.5562996275276082,
      "grad_norm": 0.3938828408718109,
      "learning_rate": 6.932783352991178e-06,
      "loss": 0.0233,
      "step": 950980
    },
    {
      "epoch": 1.5563323579662613,
      "grad_norm": 0.47723156213760376,
      "learning_rate": 6.93271746077766e-06,
      "loss": 0.0222,
      "step": 951000
    },
    {
      "epoch": 1.556365088404915,
      "grad_norm": 0.29251155257225037,
      "learning_rate": 6.932651568564144e-06,
      "loss": 0.0182,
      "step": 951020
    },
    {
      "epoch": 1.556397818843568,
      "grad_norm": 0.4772872030735016,
      "learning_rate": 6.932585676350626e-06,
      "loss": 0.0254,
      "step": 951040
    },
    {
      "epoch": 1.5564305492822215,
      "grad_norm": 0.5898081660270691,
      "learning_rate": 6.932519784137109e-06,
      "loss": 0.013,
      "step": 951060
    },
    {
      "epoch": 1.5564632797208748,
      "grad_norm": 0.582253634929657,
      "learning_rate": 6.932453891923592e-06,
      "loss": 0.0168,
      "step": 951080
    },
    {
      "epoch": 1.5564960101595282,
      "grad_norm": 1.163204312324524,
      "learning_rate": 6.9323879997100754e-06,
      "loss": 0.0286,
      "step": 951100
    },
    {
      "epoch": 1.5565287405981816,
      "grad_norm": 1.7240967750549316,
      "learning_rate": 6.932322107496557e-06,
      "loss": 0.0208,
      "step": 951120
    },
    {
      "epoch": 1.5565614710368347,
      "grad_norm": 0.23046128451824188,
      "learning_rate": 6.932256215283041e-06,
      "loss": 0.0135,
      "step": 951140
    },
    {
      "epoch": 1.5565942014754883,
      "grad_norm": 0.7193042039871216,
      "learning_rate": 6.932190323069523e-06,
      "loss": 0.0168,
      "step": 951160
    },
    {
      "epoch": 1.5566269319141415,
      "grad_norm": 0.741175651550293,
      "learning_rate": 6.932124430856006e-06,
      "loss": 0.0212,
      "step": 951180
    },
    {
      "epoch": 1.5566596623527948,
      "grad_norm": 0.34299734234809875,
      "learning_rate": 6.932058538642489e-06,
      "loss": 0.0181,
      "step": 951200
    },
    {
      "epoch": 1.5566923927914482,
      "grad_norm": 0.07046011090278625,
      "learning_rate": 6.931992646428972e-06,
      "loss": 0.0201,
      "step": 951220
    },
    {
      "epoch": 1.5567251232301014,
      "grad_norm": 1.0047110319137573,
      "learning_rate": 6.931926754215455e-06,
      "loss": 0.0206,
      "step": 951240
    },
    {
      "epoch": 1.556757853668755,
      "grad_norm": 0.6071298718452454,
      "learning_rate": 6.931860862001938e-06,
      "loss": 0.0178,
      "step": 951260
    },
    {
      "epoch": 1.5567905841074081,
      "grad_norm": 1.011356234550476,
      "learning_rate": 6.93179496978842e-06,
      "loss": 0.0135,
      "step": 951280
    },
    {
      "epoch": 1.5568233145460617,
      "grad_norm": 0.4860980212688446,
      "learning_rate": 6.931729077574904e-06,
      "loss": 0.018,
      "step": 951300
    },
    {
      "epoch": 1.5568560449847149,
      "grad_norm": 0.8465258479118347,
      "learning_rate": 6.931663185361387e-06,
      "loss": 0.0241,
      "step": 951320
    },
    {
      "epoch": 1.5568887754233682,
      "grad_norm": 0.14453817903995514,
      "learning_rate": 6.931597293147869e-06,
      "loss": 0.0247,
      "step": 951340
    },
    {
      "epoch": 1.5569215058620216,
      "grad_norm": 0.6027692556381226,
      "learning_rate": 6.931531400934353e-06,
      "loss": 0.0157,
      "step": 951360
    },
    {
      "epoch": 1.5569542363006748,
      "grad_norm": 0.16587594151496887,
      "learning_rate": 6.931465508720835e-06,
      "loss": 0.0213,
      "step": 951380
    },
    {
      "epoch": 1.5569869667393283,
      "grad_norm": 0.8613682985305786,
      "learning_rate": 6.931399616507318e-06,
      "loss": 0.0221,
      "step": 951400
    },
    {
      "epoch": 1.5570196971779815,
      "grad_norm": 0.7371513843536377,
      "learning_rate": 6.931333724293801e-06,
      "loss": 0.0188,
      "step": 951420
    },
    {
      "epoch": 1.5570524276166349,
      "grad_norm": 0.27610063552856445,
      "learning_rate": 6.931267832080284e-06,
      "loss": 0.0188,
      "step": 951440
    },
    {
      "epoch": 1.5570851580552882,
      "grad_norm": 1.2777022123336792,
      "learning_rate": 6.9312019398667664e-06,
      "loss": 0.0175,
      "step": 951460
    },
    {
      "epoch": 1.5571178884939416,
      "grad_norm": 0.46593379974365234,
      "learning_rate": 6.93113604765325e-06,
      "loss": 0.0295,
      "step": 951480
    },
    {
      "epoch": 1.557150618932595,
      "grad_norm": 0.8537212610244751,
      "learning_rate": 6.931070155439732e-06,
      "loss": 0.019,
      "step": 951500
    },
    {
      "epoch": 1.5571833493712481,
      "grad_norm": 0.620084822177887,
      "learning_rate": 6.9310042632262155e-06,
      "loss": 0.0166,
      "step": 951520
    },
    {
      "epoch": 1.5572160798099017,
      "grad_norm": 0.5584986209869385,
      "learning_rate": 6.930938371012697e-06,
      "loss": 0.0234,
      "step": 951540
    },
    {
      "epoch": 1.5572488102485549,
      "grad_norm": 1.0935640335083008,
      "learning_rate": 6.930872478799181e-06,
      "loss": 0.0219,
      "step": 951560
    },
    {
      "epoch": 1.5572815406872083,
      "grad_norm": 0.29473668336868286,
      "learning_rate": 6.930806586585663e-06,
      "loss": 0.0187,
      "step": 951580
    },
    {
      "epoch": 1.5573142711258616,
      "grad_norm": 0.6915514469146729,
      "learning_rate": 6.9307406943721465e-06,
      "loss": 0.0174,
      "step": 951600
    },
    {
      "epoch": 1.557347001564515,
      "grad_norm": 0.6939685940742493,
      "learning_rate": 6.930674802158629e-06,
      "loss": 0.0207,
      "step": 951620
    },
    {
      "epoch": 1.5573797320031684,
      "grad_norm": 0.10446290671825409,
      "learning_rate": 6.930608909945112e-06,
      "loss": 0.0238,
      "step": 951640
    },
    {
      "epoch": 1.5574124624418215,
      "grad_norm": 0.409356951713562,
      "learning_rate": 6.9305430177315955e-06,
      "loss": 0.0168,
      "step": 951660
    },
    {
      "epoch": 1.5574451928804751,
      "grad_norm": 0.32547953724861145,
      "learning_rate": 6.930477125518078e-06,
      "loss": 0.022,
      "step": 951680
    },
    {
      "epoch": 1.5574779233191283,
      "grad_norm": 0.34598666429519653,
      "learning_rate": 6.930411233304562e-06,
      "loss": 0.0203,
      "step": 951700
    },
    {
      "epoch": 1.5575106537577816,
      "grad_norm": 1.4747931957244873,
      "learning_rate": 6.930345341091044e-06,
      "loss": 0.0319,
      "step": 951720
    },
    {
      "epoch": 1.557543384196435,
      "grad_norm": 0.2981015145778656,
      "learning_rate": 6.930279448877527e-06,
      "loss": 0.0195,
      "step": 951740
    },
    {
      "epoch": 1.5575761146350884,
      "grad_norm": 0.8151450157165527,
      "learning_rate": 6.930213556664009e-06,
      "loss": 0.0321,
      "step": 951760
    },
    {
      "epoch": 1.5576088450737418,
      "grad_norm": 0.07542946934700012,
      "learning_rate": 6.930147664450493e-06,
      "loss": 0.017,
      "step": 951780
    },
    {
      "epoch": 1.557641575512395,
      "grad_norm": 0.7525684237480164,
      "learning_rate": 6.930081772236975e-06,
      "loss": 0.0229,
      "step": 951800
    },
    {
      "epoch": 1.5576743059510485,
      "grad_norm": 0.5817466974258423,
      "learning_rate": 6.930015880023458e-06,
      "loss": 0.0198,
      "step": 951820
    },
    {
      "epoch": 1.5577070363897016,
      "grad_norm": 0.4128473997116089,
      "learning_rate": 6.929949987809941e-06,
      "loss": 0.0187,
      "step": 951840
    },
    {
      "epoch": 1.557739766828355,
      "grad_norm": 0.3175346553325653,
      "learning_rate": 6.929884095596424e-06,
      "loss": 0.0225,
      "step": 951860
    },
    {
      "epoch": 1.5577724972670084,
      "grad_norm": 0.313644140958786,
      "learning_rate": 6.9298182033829065e-06,
      "loss": 0.0154,
      "step": 951880
    },
    {
      "epoch": 1.5578052277056618,
      "grad_norm": 0.805095374584198,
      "learning_rate": 6.92975231116939e-06,
      "loss": 0.0179,
      "step": 951900
    },
    {
      "epoch": 1.5578379581443151,
      "grad_norm": 0.9557561874389648,
      "learning_rate": 6.929686418955872e-06,
      "loss": 0.0201,
      "step": 951920
    },
    {
      "epoch": 1.5578706885829683,
      "grad_norm": 0.5817391872406006,
      "learning_rate": 6.929620526742356e-06,
      "loss": 0.0176,
      "step": 951940
    },
    {
      "epoch": 1.5579034190216219,
      "grad_norm": 0.3242100179195404,
      "learning_rate": 6.9295546345288375e-06,
      "loss": 0.0229,
      "step": 951960
    },
    {
      "epoch": 1.557936149460275,
      "grad_norm": 0.4833407402038574,
      "learning_rate": 6.929488742315321e-06,
      "loss": 0.0208,
      "step": 951980
    },
    {
      "epoch": 1.5579688798989284,
      "grad_norm": 0.48716074228286743,
      "learning_rate": 6.929422850101804e-06,
      "loss": 0.0156,
      "step": 952000
    },
    {
      "epoch": 1.5580016103375818,
      "grad_norm": 0.694568395614624,
      "learning_rate": 6.9293569578882865e-06,
      "loss": 0.0171,
      "step": 952020
    },
    {
      "epoch": 1.558034340776235,
      "grad_norm": 0.8201691508293152,
      "learning_rate": 6.92929106567477e-06,
      "loss": 0.0248,
      "step": 952040
    },
    {
      "epoch": 1.5580670712148885,
      "grad_norm": 1.6915125846862793,
      "learning_rate": 6.929225173461253e-06,
      "loss": 0.0167,
      "step": 952060
    },
    {
      "epoch": 1.5580998016535417,
      "grad_norm": 0.985170841217041,
      "learning_rate": 6.929159281247736e-06,
      "loss": 0.0185,
      "step": 952080
    },
    {
      "epoch": 1.5581325320921953,
      "grad_norm": 0.9191047549247742,
      "learning_rate": 6.929093389034218e-06,
      "loss": 0.0208,
      "step": 952100
    },
    {
      "epoch": 1.5581652625308484,
      "grad_norm": 0.7003219127655029,
      "learning_rate": 6.929027496820702e-06,
      "loss": 0.0331,
      "step": 952120
    },
    {
      "epoch": 1.5581979929695018,
      "grad_norm": 0.8348724246025085,
      "learning_rate": 6.928961604607184e-06,
      "loss": 0.0262,
      "step": 952140
    },
    {
      "epoch": 1.5582307234081552,
      "grad_norm": 0.15756604075431824,
      "learning_rate": 6.928895712393667e-06,
      "loss": 0.0246,
      "step": 952160
    },
    {
      "epoch": 1.5582634538468083,
      "grad_norm": 0.49403947591781616,
      "learning_rate": 6.928829820180149e-06,
      "loss": 0.0185,
      "step": 952180
    },
    {
      "epoch": 1.558296184285462,
      "grad_norm": 0.766749918460846,
      "learning_rate": 6.928763927966633e-06,
      "loss": 0.0276,
      "step": 952200
    },
    {
      "epoch": 1.558328914724115,
      "grad_norm": 0.8702818751335144,
      "learning_rate": 6.928698035753116e-06,
      "loss": 0.0156,
      "step": 952220
    },
    {
      "epoch": 1.5583616451627684,
      "grad_norm": 0.5136952996253967,
      "learning_rate": 6.928632143539598e-06,
      "loss": 0.0136,
      "step": 952240
    },
    {
      "epoch": 1.5583943756014218,
      "grad_norm": 0.3480828106403351,
      "learning_rate": 6.928566251326081e-06,
      "loss": 0.0182,
      "step": 952260
    },
    {
      "epoch": 1.5584271060400752,
      "grad_norm": 0.1213778629899025,
      "learning_rate": 6.928500359112565e-06,
      "loss": 0.0196,
      "step": 952280
    },
    {
      "epoch": 1.5584598364787285,
      "grad_norm": 0.09448615461587906,
      "learning_rate": 6.928434466899047e-06,
      "loss": 0.0142,
      "step": 952300
    },
    {
      "epoch": 1.5584925669173817,
      "grad_norm": 0.34178757667541504,
      "learning_rate": 6.92836857468553e-06,
      "loss": 0.0152,
      "step": 952320
    },
    {
      "epoch": 1.5585252973560353,
      "grad_norm": 0.565262496471405,
      "learning_rate": 6.928302682472012e-06,
      "loss": 0.0238,
      "step": 952340
    },
    {
      "epoch": 1.5585580277946884,
      "grad_norm": 0.8234509825706482,
      "learning_rate": 6.928236790258496e-06,
      "loss": 0.0202,
      "step": 952360
    },
    {
      "epoch": 1.5585907582333418,
      "grad_norm": 0.4344244599342346,
      "learning_rate": 6.928170898044979e-06,
      "loss": 0.0204,
      "step": 952380
    },
    {
      "epoch": 1.5586234886719952,
      "grad_norm": 0.3716462552547455,
      "learning_rate": 6.928105005831461e-06,
      "loss": 0.0163,
      "step": 952400
    },
    {
      "epoch": 1.5586562191106486,
      "grad_norm": 0.2921915352344513,
      "learning_rate": 6.928039113617945e-06,
      "loss": 0.0183,
      "step": 952420
    },
    {
      "epoch": 1.558688949549302,
      "grad_norm": 0.09299403429031372,
      "learning_rate": 6.9279732214044275e-06,
      "loss": 0.0165,
      "step": 952440
    },
    {
      "epoch": 1.558721679987955,
      "grad_norm": 0.6495214700698853,
      "learning_rate": 6.92790732919091e-06,
      "loss": 0.019,
      "step": 952460
    },
    {
      "epoch": 1.5587544104266087,
      "grad_norm": 1.5220911502838135,
      "learning_rate": 6.927841436977393e-06,
      "loss": 0.0163,
      "step": 952480
    },
    {
      "epoch": 1.5587871408652618,
      "grad_norm": 0.23797224462032318,
      "learning_rate": 6.9277755447638765e-06,
      "loss": 0.0174,
      "step": 952500
    },
    {
      "epoch": 1.5588198713039152,
      "grad_norm": 0.5564786791801453,
      "learning_rate": 6.9277096525503584e-06,
      "loss": 0.014,
      "step": 952520
    },
    {
      "epoch": 1.5588526017425686,
      "grad_norm": 1.1572388410568237,
      "learning_rate": 6.927643760336842e-06,
      "loss": 0.0247,
      "step": 952540
    },
    {
      "epoch": 1.558885332181222,
      "grad_norm": 0.8146982789039612,
      "learning_rate": 6.927577868123324e-06,
      "loss": 0.0202,
      "step": 952560
    },
    {
      "epoch": 1.5589180626198753,
      "grad_norm": 0.6640661358833313,
      "learning_rate": 6.9275119759098075e-06,
      "loss": 0.0271,
      "step": 952580
    },
    {
      "epoch": 1.5589507930585285,
      "grad_norm": 0.4179520606994629,
      "learning_rate": 6.927446083696289e-06,
      "loss": 0.0241,
      "step": 952600
    },
    {
      "epoch": 1.558983523497182,
      "grad_norm": 3.491053342819214,
      "learning_rate": 6.927380191482773e-06,
      "loss": 0.0158,
      "step": 952620
    },
    {
      "epoch": 1.5590162539358352,
      "grad_norm": 0.5790272951126099,
      "learning_rate": 6.927314299269256e-06,
      "loss": 0.0235,
      "step": 952640
    },
    {
      "epoch": 1.5590489843744886,
      "grad_norm": 0.5012717843055725,
      "learning_rate": 6.9272484070557385e-06,
      "loss": 0.0176,
      "step": 952660
    },
    {
      "epoch": 1.559081714813142,
      "grad_norm": 0.36510157585144043,
      "learning_rate": 6.927182514842221e-06,
      "loss": 0.025,
      "step": 952680
    },
    {
      "epoch": 1.559114445251795,
      "grad_norm": 0.3420920670032501,
      "learning_rate": 6.927116622628705e-06,
      "loss": 0.0244,
      "step": 952700
    },
    {
      "epoch": 1.5591471756904487,
      "grad_norm": 0.5005550980567932,
      "learning_rate": 6.9270507304151875e-06,
      "loss": 0.0295,
      "step": 952720
    },
    {
      "epoch": 1.5591799061291018,
      "grad_norm": 0.24144552648067474,
      "learning_rate": 6.92698483820167e-06,
      "loss": 0.0189,
      "step": 952740
    },
    {
      "epoch": 1.5592126365677554,
      "grad_norm": 0.3362407386302948,
      "learning_rate": 6.926918945988154e-06,
      "loss": 0.0183,
      "step": 952760
    },
    {
      "epoch": 1.5592453670064086,
      "grad_norm": 0.8060154318809509,
      "learning_rate": 6.926853053774636e-06,
      "loss": 0.0175,
      "step": 952780
    },
    {
      "epoch": 1.559278097445062,
      "grad_norm": 0.46852198243141174,
      "learning_rate": 6.926787161561119e-06,
      "loss": 0.0122,
      "step": 952800
    },
    {
      "epoch": 1.5593108278837153,
      "grad_norm": 0.35858938097953796,
      "learning_rate": 6.926721269347601e-06,
      "loss": 0.0234,
      "step": 952820
    },
    {
      "epoch": 1.5593435583223685,
      "grad_norm": 0.6130948662757874,
      "learning_rate": 6.926655377134085e-06,
      "loss": 0.0263,
      "step": 952840
    },
    {
      "epoch": 1.559376288761022,
      "grad_norm": 0.6985756158828735,
      "learning_rate": 6.9265894849205675e-06,
      "loss": 0.0225,
      "step": 952860
    },
    {
      "epoch": 1.5594090191996752,
      "grad_norm": 0.48911240696907043,
      "learning_rate": 6.92652359270705e-06,
      "loss": 0.0166,
      "step": 952880
    },
    {
      "epoch": 1.5594417496383286,
      "grad_norm": 0.4072093963623047,
      "learning_rate": 6.926457700493533e-06,
      "loss": 0.0176,
      "step": 952900
    },
    {
      "epoch": 1.559474480076982,
      "grad_norm": 0.7809642553329468,
      "learning_rate": 6.926391808280017e-06,
      "loss": 0.0155,
      "step": 952920
    },
    {
      "epoch": 1.5595072105156353,
      "grad_norm": 0.17048496007919312,
      "learning_rate": 6.9263259160664985e-06,
      "loss": 0.0189,
      "step": 952940
    },
    {
      "epoch": 1.5595399409542887,
      "grad_norm": 1.2131433486938477,
      "learning_rate": 6.926260023852982e-06,
      "loss": 0.0235,
      "step": 952960
    },
    {
      "epoch": 1.5595726713929419,
      "grad_norm": 0.49108800292015076,
      "learning_rate": 6.926194131639464e-06,
      "loss": 0.0175,
      "step": 952980
    },
    {
      "epoch": 1.5596054018315955,
      "grad_norm": 0.4078829884529114,
      "learning_rate": 6.9261282394259476e-06,
      "loss": 0.03,
      "step": 953000
    },
    {
      "epoch": 1.5596381322702486,
      "grad_norm": 0.4109877347946167,
      "learning_rate": 6.92606234721243e-06,
      "loss": 0.0256,
      "step": 953020
    },
    {
      "epoch": 1.559670862708902,
      "grad_norm": 0.42296427488327026,
      "learning_rate": 6.925996454998913e-06,
      "loss": 0.0205,
      "step": 953040
    },
    {
      "epoch": 1.5597035931475554,
      "grad_norm": 0.16394150257110596,
      "learning_rate": 6.925930562785396e-06,
      "loss": 0.0195,
      "step": 953060
    },
    {
      "epoch": 1.5597363235862087,
      "grad_norm": 0.06331808865070343,
      "learning_rate": 6.925864670571879e-06,
      "loss": 0.023,
      "step": 953080
    },
    {
      "epoch": 1.559769054024862,
      "grad_norm": 0.44387727975845337,
      "learning_rate": 6.925798778358362e-06,
      "loss": 0.02,
      "step": 953100
    },
    {
      "epoch": 1.5598017844635153,
      "grad_norm": 0.5038096308708191,
      "learning_rate": 6.925732886144845e-06,
      "loss": 0.02,
      "step": 953120
    },
    {
      "epoch": 1.5598345149021688,
      "grad_norm": 0.14093182981014252,
      "learning_rate": 6.9256669939313284e-06,
      "loss": 0.0126,
      "step": 953140
    },
    {
      "epoch": 1.559867245340822,
      "grad_norm": 0.28250664472579956,
      "learning_rate": 6.92560110171781e-06,
      "loss": 0.0167,
      "step": 953160
    },
    {
      "epoch": 1.5598999757794754,
      "grad_norm": 0.3953252136707306,
      "learning_rate": 6.925535209504294e-06,
      "loss": 0.0204,
      "step": 953180
    },
    {
      "epoch": 1.5599327062181287,
      "grad_norm": 0.8671207427978516,
      "learning_rate": 6.925469317290776e-06,
      "loss": 0.0194,
      "step": 953200
    },
    {
      "epoch": 1.5599654366567821,
      "grad_norm": 0.6855477690696716,
      "learning_rate": 6.925403425077259e-06,
      "loss": 0.0142,
      "step": 953220
    },
    {
      "epoch": 1.5599981670954355,
      "grad_norm": 0.5236924886703491,
      "learning_rate": 6.925337532863742e-06,
      "loss": 0.0272,
      "step": 953240
    },
    {
      "epoch": 1.5600308975340886,
      "grad_norm": 0.2796688675880432,
      "learning_rate": 6.925271640650225e-06,
      "loss": 0.0222,
      "step": 953260
    },
    {
      "epoch": 1.5600636279727422,
      "grad_norm": 0.40219977498054504,
      "learning_rate": 6.925205748436708e-06,
      "loss": 0.023,
      "step": 953280
    },
    {
      "epoch": 1.5600963584113954,
      "grad_norm": 0.22764411568641663,
      "learning_rate": 6.925139856223191e-06,
      "loss": 0.0349,
      "step": 953300
    },
    {
      "epoch": 1.5601290888500488,
      "grad_norm": 0.07613160461187363,
      "learning_rate": 6.925073964009673e-06,
      "loss": 0.0178,
      "step": 953320
    },
    {
      "epoch": 1.5601618192887021,
      "grad_norm": 0.8823311924934387,
      "learning_rate": 6.925008071796157e-06,
      "loss": 0.0291,
      "step": 953340
    },
    {
      "epoch": 1.5601945497273555,
      "grad_norm": 0.35943520069122314,
      "learning_rate": 6.924942179582639e-06,
      "loss": 0.0155,
      "step": 953360
    },
    {
      "epoch": 1.5602272801660089,
      "grad_norm": 0.8786236047744751,
      "learning_rate": 6.924876287369122e-06,
      "loss": 0.0163,
      "step": 953380
    },
    {
      "epoch": 1.560260010604662,
      "grad_norm": 0.851912260055542,
      "learning_rate": 6.924810395155605e-06,
      "loss": 0.0236,
      "step": 953400
    },
    {
      "epoch": 1.5602927410433156,
      "grad_norm": 0.7314111590385437,
      "learning_rate": 6.924744502942088e-06,
      "loss": 0.0137,
      "step": 953420
    },
    {
      "epoch": 1.5603254714819688,
      "grad_norm": 0.47588565945625305,
      "learning_rate": 6.924678610728571e-06,
      "loss": 0.0162,
      "step": 953440
    },
    {
      "epoch": 1.5603582019206221,
      "grad_norm": 0.08845143765211105,
      "learning_rate": 6.924612718515054e-06,
      "loss": 0.0131,
      "step": 953460
    },
    {
      "epoch": 1.5603909323592755,
      "grad_norm": 1.2729800939559937,
      "learning_rate": 6.924546826301537e-06,
      "loss": 0.0112,
      "step": 953480
    },
    {
      "epoch": 1.5604236627979287,
      "grad_norm": 0.3768572211265564,
      "learning_rate": 6.9244809340880195e-06,
      "loss": 0.0247,
      "step": 953500
    },
    {
      "epoch": 1.5604563932365823,
      "grad_norm": 0.40337130427360535,
      "learning_rate": 6.924415041874503e-06,
      "loss": 0.0286,
      "step": 953520
    },
    {
      "epoch": 1.5604891236752354,
      "grad_norm": 0.38543879985809326,
      "learning_rate": 6.924349149660985e-06,
      "loss": 0.0138,
      "step": 953540
    },
    {
      "epoch": 1.560521854113889,
      "grad_norm": 0.9069402813911438,
      "learning_rate": 6.9242832574474685e-06,
      "loss": 0.023,
      "step": 953560
    },
    {
      "epoch": 1.5605545845525421,
      "grad_norm": 0.7885248064994812,
      "learning_rate": 6.92421736523395e-06,
      "loss": 0.0151,
      "step": 953580
    },
    {
      "epoch": 1.5605873149911955,
      "grad_norm": 1.297515869140625,
      "learning_rate": 6.924151473020434e-06,
      "loss": 0.0244,
      "step": 953600
    },
    {
      "epoch": 1.560620045429849,
      "grad_norm": 0.16780856251716614,
      "learning_rate": 6.924085580806916e-06,
      "loss": 0.0226,
      "step": 953620
    },
    {
      "epoch": 1.560652775868502,
      "grad_norm": 2.9978511333465576,
      "learning_rate": 6.9240196885933995e-06,
      "loss": 0.0246,
      "step": 953640
    },
    {
      "epoch": 1.5606855063071556,
      "grad_norm": 2.024426221847534,
      "learning_rate": 6.923953796379882e-06,
      "loss": 0.0274,
      "step": 953660
    },
    {
      "epoch": 1.5607182367458088,
      "grad_norm": 0.42147770524024963,
      "learning_rate": 6.923887904166365e-06,
      "loss": 0.0204,
      "step": 953680
    },
    {
      "epoch": 1.5607509671844622,
      "grad_norm": 0.22459657490253448,
      "learning_rate": 6.923822011952848e-06,
      "loss": 0.0224,
      "step": 953700
    },
    {
      "epoch": 1.5607836976231155,
      "grad_norm": 0.38476473093032837,
      "learning_rate": 6.923756119739331e-06,
      "loss": 0.0156,
      "step": 953720
    },
    {
      "epoch": 1.560816428061769,
      "grad_norm": 0.23327288031578064,
      "learning_rate": 6.923690227525813e-06,
      "loss": 0.0183,
      "step": 953740
    },
    {
      "epoch": 1.5608491585004223,
      "grad_norm": 0.3244872987270355,
      "learning_rate": 6.923624335312297e-06,
      "loss": 0.0179,
      "step": 953760
    },
    {
      "epoch": 1.5608818889390754,
      "grad_norm": 0.7160485982894897,
      "learning_rate": 6.92355844309878e-06,
      "loss": 0.0177,
      "step": 953780
    },
    {
      "epoch": 1.560914619377729,
      "grad_norm": 0.8330587148666382,
      "learning_rate": 6.923492550885262e-06,
      "loss": 0.0246,
      "step": 953800
    },
    {
      "epoch": 1.5609473498163822,
      "grad_norm": 0.1152300089597702,
      "learning_rate": 6.923426658671746e-06,
      "loss": 0.0149,
      "step": 953820
    },
    {
      "epoch": 1.5609800802550355,
      "grad_norm": 0.37991029024124146,
      "learning_rate": 6.923360766458228e-06,
      "loss": 0.0151,
      "step": 953840
    },
    {
      "epoch": 1.561012810693689,
      "grad_norm": 0.8148552179336548,
      "learning_rate": 6.923294874244711e-06,
      "loss": 0.0295,
      "step": 953860
    },
    {
      "epoch": 1.5610455411323423,
      "grad_norm": 1.252121090888977,
      "learning_rate": 6.923228982031194e-06,
      "loss": 0.0159,
      "step": 953880
    },
    {
      "epoch": 1.5610782715709957,
      "grad_norm": 0.37745654582977295,
      "learning_rate": 6.923163089817677e-06,
      "loss": 0.0224,
      "step": 953900
    },
    {
      "epoch": 1.5611110020096488,
      "grad_norm": 0.982456624507904,
      "learning_rate": 6.9230971976041595e-06,
      "loss": 0.0307,
      "step": 953920
    },
    {
      "epoch": 1.5611437324483024,
      "grad_norm": 1.158989667892456,
      "learning_rate": 6.923031305390643e-06,
      "loss": 0.0239,
      "step": 953940
    },
    {
      "epoch": 1.5611764628869556,
      "grad_norm": 0.31450486183166504,
      "learning_rate": 6.922965413177125e-06,
      "loss": 0.022,
      "step": 953960
    },
    {
      "epoch": 1.561209193325609,
      "grad_norm": 1.4190882444381714,
      "learning_rate": 6.922899520963609e-06,
      "loss": 0.0192,
      "step": 953980
    },
    {
      "epoch": 1.5612419237642623,
      "grad_norm": 0.32716885209083557,
      "learning_rate": 6.9228336287500905e-06,
      "loss": 0.0178,
      "step": 954000
    },
    {
      "epoch": 1.5612746542029157,
      "grad_norm": 0.5960999727249146,
      "learning_rate": 6.922767736536574e-06,
      "loss": 0.0237,
      "step": 954020
    },
    {
      "epoch": 1.561307384641569,
      "grad_norm": 0.18888317048549652,
      "learning_rate": 6.922701844323057e-06,
      "loss": 0.0154,
      "step": 954040
    },
    {
      "epoch": 1.5613401150802222,
      "grad_norm": 0.4388570487499237,
      "learning_rate": 6.9226359521095396e-06,
      "loss": 0.0156,
      "step": 954060
    },
    {
      "epoch": 1.5613728455188758,
      "grad_norm": 0.6343097686767578,
      "learning_rate": 6.922570059896022e-06,
      "loss": 0.0223,
      "step": 954080
    },
    {
      "epoch": 1.561405575957529,
      "grad_norm": 1.2517603635787964,
      "learning_rate": 6.922504167682506e-06,
      "loss": 0.018,
      "step": 954100
    },
    {
      "epoch": 1.5614383063961823,
      "grad_norm": 0.8494725227355957,
      "learning_rate": 6.922438275468989e-06,
      "loss": 0.016,
      "step": 954120
    },
    {
      "epoch": 1.5614710368348357,
      "grad_norm": 0.6256969571113586,
      "learning_rate": 6.922372383255471e-06,
      "loss": 0.0194,
      "step": 954140
    },
    {
      "epoch": 1.561503767273489,
      "grad_norm": 0.6230124235153198,
      "learning_rate": 6.922306491041955e-06,
      "loss": 0.0198,
      "step": 954160
    },
    {
      "epoch": 1.5615364977121424,
      "grad_norm": 0.14442779123783112,
      "learning_rate": 6.922240598828437e-06,
      "loss": 0.022,
      "step": 954180
    },
    {
      "epoch": 1.5615692281507956,
      "grad_norm": 0.4020232856273651,
      "learning_rate": 6.9221747066149204e-06,
      "loss": 0.0161,
      "step": 954200
    },
    {
      "epoch": 1.5616019585894492,
      "grad_norm": 0.7583115100860596,
      "learning_rate": 6.922108814401402e-06,
      "loss": 0.0254,
      "step": 954220
    },
    {
      "epoch": 1.5616346890281023,
      "grad_norm": 0.3731190264225006,
      "learning_rate": 6.922042922187886e-06,
      "loss": 0.0206,
      "step": 954240
    },
    {
      "epoch": 1.5616674194667557,
      "grad_norm": 0.44668248295783997,
      "learning_rate": 6.921977029974369e-06,
      "loss": 0.0199,
      "step": 954260
    },
    {
      "epoch": 1.561700149905409,
      "grad_norm": 1.1413723230361938,
      "learning_rate": 6.921911137760851e-06,
      "loss": 0.0258,
      "step": 954280
    },
    {
      "epoch": 1.5617328803440622,
      "grad_norm": 0.9148570895195007,
      "learning_rate": 6.921845245547334e-06,
      "loss": 0.0154,
      "step": 954300
    },
    {
      "epoch": 1.5617656107827158,
      "grad_norm": 0.36991220712661743,
      "learning_rate": 6.921779353333818e-06,
      "loss": 0.0165,
      "step": 954320
    },
    {
      "epoch": 1.561798341221369,
      "grad_norm": 0.34867578744888306,
      "learning_rate": 6.9217134611203e-06,
      "loss": 0.017,
      "step": 954340
    },
    {
      "epoch": 1.5618310716600226,
      "grad_norm": 0.38607704639434814,
      "learning_rate": 6.921647568906783e-06,
      "loss": 0.0186,
      "step": 954360
    },
    {
      "epoch": 1.5618638020986757,
      "grad_norm": 0.6207263469696045,
      "learning_rate": 6.921581676693265e-06,
      "loss": 0.0184,
      "step": 954380
    },
    {
      "epoch": 1.561896532537329,
      "grad_norm": 0.3041122853755951,
      "learning_rate": 6.921515784479749e-06,
      "loss": 0.0353,
      "step": 954400
    },
    {
      "epoch": 1.5619292629759824,
      "grad_norm": 0.26348409056663513,
      "learning_rate": 6.9214498922662306e-06,
      "loss": 0.0241,
      "step": 954420
    },
    {
      "epoch": 1.5619619934146356,
      "grad_norm": 0.49825528264045715,
      "learning_rate": 6.921384000052714e-06,
      "loss": 0.0271,
      "step": 954440
    },
    {
      "epoch": 1.5619947238532892,
      "grad_norm": 0.6820170879364014,
      "learning_rate": 6.921318107839197e-06,
      "loss": 0.0153,
      "step": 954460
    },
    {
      "epoch": 1.5620274542919423,
      "grad_norm": 0.18698404729366302,
      "learning_rate": 6.9212522156256805e-06,
      "loss": 0.0259,
      "step": 954480
    },
    {
      "epoch": 1.5620601847305957,
      "grad_norm": 0.8522132039070129,
      "learning_rate": 6.921186323412163e-06,
      "loss": 0.0197,
      "step": 954500
    },
    {
      "epoch": 1.562092915169249,
      "grad_norm": 0.536676824092865,
      "learning_rate": 6.921120431198646e-06,
      "loss": 0.0227,
      "step": 954520
    },
    {
      "epoch": 1.5621256456079025,
      "grad_norm": 0.35254019498825073,
      "learning_rate": 6.9210545389851295e-06,
      "loss": 0.017,
      "step": 954540
    },
    {
      "epoch": 1.5621583760465558,
      "grad_norm": 0.8863633871078491,
      "learning_rate": 6.9209886467716114e-06,
      "loss": 0.0237,
      "step": 954560
    },
    {
      "epoch": 1.562191106485209,
      "grad_norm": 0.5846678614616394,
      "learning_rate": 6.920922754558095e-06,
      "loss": 0.0155,
      "step": 954580
    },
    {
      "epoch": 1.5622238369238626,
      "grad_norm": 0.06709317862987518,
      "learning_rate": 6.920856862344577e-06,
      "loss": 0.0231,
      "step": 954600
    },
    {
      "epoch": 1.5622565673625157,
      "grad_norm": 0.4238865077495575,
      "learning_rate": 6.9207909701310605e-06,
      "loss": 0.0242,
      "step": 954620
    },
    {
      "epoch": 1.562289297801169,
      "grad_norm": 0.44767481088638306,
      "learning_rate": 6.920725077917542e-06,
      "loss": 0.0179,
      "step": 954640
    },
    {
      "epoch": 1.5623220282398225,
      "grad_norm": 0.19822685420513153,
      "learning_rate": 6.920659185704026e-06,
      "loss": 0.0168,
      "step": 954660
    },
    {
      "epoch": 1.5623547586784758,
      "grad_norm": 0.14490915834903717,
      "learning_rate": 6.920593293490509e-06,
      "loss": 0.0316,
      "step": 954680
    },
    {
      "epoch": 1.5623874891171292,
      "grad_norm": 0.24579894542694092,
      "learning_rate": 6.9205274012769915e-06,
      "loss": 0.0197,
      "step": 954700
    },
    {
      "epoch": 1.5624202195557824,
      "grad_norm": 1.8275927305221558,
      "learning_rate": 6.920461509063474e-06,
      "loss": 0.0236,
      "step": 954720
    },
    {
      "epoch": 1.562452949994436,
      "grad_norm": 0.3652746081352234,
      "learning_rate": 6.920395616849958e-06,
      "loss": 0.0243,
      "step": 954740
    },
    {
      "epoch": 1.562485680433089,
      "grad_norm": 0.5026693940162659,
      "learning_rate": 6.92032972463644e-06,
      "loss": 0.0174,
      "step": 954760
    },
    {
      "epoch": 1.5625184108717425,
      "grad_norm": 2.021657943725586,
      "learning_rate": 6.920263832422923e-06,
      "loss": 0.0183,
      "step": 954780
    },
    {
      "epoch": 1.5625511413103959,
      "grad_norm": 0.63297039270401,
      "learning_rate": 6.920197940209405e-06,
      "loss": 0.0176,
      "step": 954800
    },
    {
      "epoch": 1.5625838717490492,
      "grad_norm": 0.413409560918808,
      "learning_rate": 6.920132047995889e-06,
      "loss": 0.0212,
      "step": 954820
    },
    {
      "epoch": 1.5626166021877026,
      "grad_norm": 0.27190524339675903,
      "learning_rate": 6.920066155782372e-06,
      "loss": 0.0177,
      "step": 954840
    },
    {
      "epoch": 1.5626493326263557,
      "grad_norm": 0.32262134552001953,
      "learning_rate": 6.920000263568854e-06,
      "loss": 0.0153,
      "step": 954860
    },
    {
      "epoch": 1.5626820630650093,
      "grad_norm": 0.37396758794784546,
      "learning_rate": 6.919934371355338e-06,
      "loss": 0.0256,
      "step": 954880
    },
    {
      "epoch": 1.5627147935036625,
      "grad_norm": 0.4169246554374695,
      "learning_rate": 6.9198684791418206e-06,
      "loss": 0.0171,
      "step": 954900
    },
    {
      "epoch": 1.5627475239423159,
      "grad_norm": 0.16391755640506744,
      "learning_rate": 6.919802586928303e-06,
      "loss": 0.022,
      "step": 954920
    },
    {
      "epoch": 1.5627802543809692,
      "grad_norm": 0.514274537563324,
      "learning_rate": 6.919736694714786e-06,
      "loss": 0.0232,
      "step": 954940
    },
    {
      "epoch": 1.5628129848196224,
      "grad_norm": 0.3607324957847595,
      "learning_rate": 6.91967080250127e-06,
      "loss": 0.0152,
      "step": 954960
    },
    {
      "epoch": 1.562845715258276,
      "grad_norm": 0.42728301882743835,
      "learning_rate": 6.9196049102877515e-06,
      "loss": 0.0191,
      "step": 954980
    },
    {
      "epoch": 1.5628784456969291,
      "grad_norm": 0.8047295808792114,
      "learning_rate": 6.919539018074235e-06,
      "loss": 0.0164,
      "step": 955000
    },
    {
      "epoch": 1.5629111761355827,
      "grad_norm": 0.38708850741386414,
      "learning_rate": 6.919473125860717e-06,
      "loss": 0.017,
      "step": 955020
    },
    {
      "epoch": 1.5629439065742359,
      "grad_norm": 0.9559176564216614,
      "learning_rate": 6.919407233647201e-06,
      "loss": 0.0254,
      "step": 955040
    },
    {
      "epoch": 1.5629766370128892,
      "grad_norm": 0.8186327219009399,
      "learning_rate": 6.919341341433683e-06,
      "loss": 0.022,
      "step": 955060
    },
    {
      "epoch": 1.5630093674515426,
      "grad_norm": 0.43955445289611816,
      "learning_rate": 6.919275449220166e-06,
      "loss": 0.022,
      "step": 955080
    },
    {
      "epoch": 1.5630420978901958,
      "grad_norm": 0.28683724999427795,
      "learning_rate": 6.919209557006649e-06,
      "loss": 0.0232,
      "step": 955100
    },
    {
      "epoch": 1.5630748283288494,
      "grad_norm": 0.15632624924182892,
      "learning_rate": 6.919143664793132e-06,
      "loss": 0.0189,
      "step": 955120
    },
    {
      "epoch": 1.5631075587675025,
      "grad_norm": 0.5256503224372864,
      "learning_rate": 6.919077772579614e-06,
      "loss": 0.0171,
      "step": 955140
    },
    {
      "epoch": 1.5631402892061559,
      "grad_norm": 1.285597562789917,
      "learning_rate": 6.919011880366098e-06,
      "loss": 0.0131,
      "step": 955160
    },
    {
      "epoch": 1.5631730196448093,
      "grad_norm": 0.5510412454605103,
      "learning_rate": 6.9189459881525814e-06,
      "loss": 0.015,
      "step": 955180
    },
    {
      "epoch": 1.5632057500834626,
      "grad_norm": 0.2774299681186676,
      "learning_rate": 6.918880095939063e-06,
      "loss": 0.016,
      "step": 955200
    },
    {
      "epoch": 1.563238480522116,
      "grad_norm": 0.7164899706840515,
      "learning_rate": 6.918814203725547e-06,
      "loss": 0.0161,
      "step": 955220
    },
    {
      "epoch": 1.5632712109607692,
      "grad_norm": 0.24811895191669464,
      "learning_rate": 6.918748311512029e-06,
      "loss": 0.0204,
      "step": 955240
    },
    {
      "epoch": 1.5633039413994227,
      "grad_norm": 1.0350607633590698,
      "learning_rate": 6.918682419298512e-06,
      "loss": 0.0191,
      "step": 955260
    },
    {
      "epoch": 1.563336671838076,
      "grad_norm": 0.915966272354126,
      "learning_rate": 6.918616527084995e-06,
      "loss": 0.0187,
      "step": 955280
    },
    {
      "epoch": 1.5633694022767293,
      "grad_norm": 0.27704206109046936,
      "learning_rate": 6.918550634871478e-06,
      "loss": 0.0208,
      "step": 955300
    },
    {
      "epoch": 1.5634021327153826,
      "grad_norm": 0.5557138323783875,
      "learning_rate": 6.918484742657961e-06,
      "loss": 0.0297,
      "step": 955320
    },
    {
      "epoch": 1.563434863154036,
      "grad_norm": 1.4784375429153442,
      "learning_rate": 6.918418850444444e-06,
      "loss": 0.0304,
      "step": 955340
    },
    {
      "epoch": 1.5634675935926894,
      "grad_norm": 0.5066922307014465,
      "learning_rate": 6.918352958230926e-06,
      "loss": 0.0289,
      "step": 955360
    },
    {
      "epoch": 1.5635003240313425,
      "grad_norm": 0.21720778942108154,
      "learning_rate": 6.91828706601741e-06,
      "loss": 0.0268,
      "step": 955380
    },
    {
      "epoch": 1.5635330544699961,
      "grad_norm": 0.3672780692577362,
      "learning_rate": 6.918221173803892e-06,
      "loss": 0.0169,
      "step": 955400
    },
    {
      "epoch": 1.5635657849086493,
      "grad_norm": 0.7978018522262573,
      "learning_rate": 6.918155281590375e-06,
      "loss": 0.0199,
      "step": 955420
    },
    {
      "epoch": 1.5635985153473027,
      "grad_norm": 0.8908841013908386,
      "learning_rate": 6.918089389376857e-06,
      "loss": 0.023,
      "step": 955440
    },
    {
      "epoch": 1.563631245785956,
      "grad_norm": 0.6448606848716736,
      "learning_rate": 6.918023497163341e-06,
      "loss": 0.014,
      "step": 955460
    },
    {
      "epoch": 1.5636639762246094,
      "grad_norm": 0.6102886199951172,
      "learning_rate": 6.917957604949823e-06,
      "loss": 0.0174,
      "step": 955480
    },
    {
      "epoch": 1.5636967066632628,
      "grad_norm": 0.17877967655658722,
      "learning_rate": 6.917891712736306e-06,
      "loss": 0.0213,
      "step": 955500
    },
    {
      "epoch": 1.563729437101916,
      "grad_norm": 0.39685001969337463,
      "learning_rate": 6.917825820522789e-06,
      "loss": 0.0176,
      "step": 955520
    },
    {
      "epoch": 1.5637621675405695,
      "grad_norm": 2.0700135231018066,
      "learning_rate": 6.9177599283092725e-06,
      "loss": 0.0233,
      "step": 955540
    },
    {
      "epoch": 1.5637948979792227,
      "grad_norm": 0.9478756189346313,
      "learning_rate": 6.917694036095756e-06,
      "loss": 0.0251,
      "step": 955560
    },
    {
      "epoch": 1.563827628417876,
      "grad_norm": 0.26250678300857544,
      "learning_rate": 6.917628143882238e-06,
      "loss": 0.0172,
      "step": 955580
    },
    {
      "epoch": 1.5638603588565294,
      "grad_norm": 0.4714249074459076,
      "learning_rate": 6.9175622516687215e-06,
      "loss": 0.0184,
      "step": 955600
    },
    {
      "epoch": 1.5638930892951828,
      "grad_norm": 0.8573354482650757,
      "learning_rate": 6.917496359455203e-06,
      "loss": 0.0273,
      "step": 955620
    },
    {
      "epoch": 1.5639258197338362,
      "grad_norm": 0.2608446776866913,
      "learning_rate": 6.917430467241687e-06,
      "loss": 0.0109,
      "step": 955640
    },
    {
      "epoch": 1.5639585501724893,
      "grad_norm": 0.7266879677772522,
      "learning_rate": 6.917364575028169e-06,
      "loss": 0.0236,
      "step": 955660
    },
    {
      "epoch": 1.563991280611143,
      "grad_norm": 0.6113893389701843,
      "learning_rate": 6.9172986828146525e-06,
      "loss": 0.0187,
      "step": 955680
    },
    {
      "epoch": 1.564024011049796,
      "grad_norm": 0.22166001796722412,
      "learning_rate": 6.917232790601135e-06,
      "loss": 0.0215,
      "step": 955700
    },
    {
      "epoch": 1.5640567414884494,
      "grad_norm": 0.992213249206543,
      "learning_rate": 6.917166898387618e-06,
      "loss": 0.0244,
      "step": 955720
    },
    {
      "epoch": 1.5640894719271028,
      "grad_norm": 1.0094795227050781,
      "learning_rate": 6.917101006174101e-06,
      "loss": 0.0179,
      "step": 955740
    },
    {
      "epoch": 1.564122202365756,
      "grad_norm": 0.6680509448051453,
      "learning_rate": 6.917035113960584e-06,
      "loss": 0.026,
      "step": 955760
    },
    {
      "epoch": 1.5641549328044095,
      "grad_norm": 0.37746912240982056,
      "learning_rate": 6.916969221747066e-06,
      "loss": 0.0264,
      "step": 955780
    },
    {
      "epoch": 1.5641876632430627,
      "grad_norm": 0.22788284718990326,
      "learning_rate": 6.91690332953355e-06,
      "loss": 0.0235,
      "step": 955800
    },
    {
      "epoch": 1.5642203936817163,
      "grad_norm": 1.2916327714920044,
      "learning_rate": 6.916837437320032e-06,
      "loss": 0.018,
      "step": 955820
    },
    {
      "epoch": 1.5642531241203694,
      "grad_norm": 1.3555623292922974,
      "learning_rate": 6.916771545106515e-06,
      "loss": 0.0253,
      "step": 955840
    },
    {
      "epoch": 1.5642858545590228,
      "grad_norm": 2.0901246070861816,
      "learning_rate": 6.916705652892998e-06,
      "loss": 0.0181,
      "step": 955860
    },
    {
      "epoch": 1.5643185849976762,
      "grad_norm": 1.7046582698822021,
      "learning_rate": 6.916639760679481e-06,
      "loss": 0.0303,
      "step": 955880
    },
    {
      "epoch": 1.5643513154363293,
      "grad_norm": 0.49059441685676575,
      "learning_rate": 6.916573868465964e-06,
      "loss": 0.0173,
      "step": 955900
    },
    {
      "epoch": 1.564384045874983,
      "grad_norm": 0.4221421778202057,
      "learning_rate": 6.916507976252447e-06,
      "loss": 0.0328,
      "step": 955920
    },
    {
      "epoch": 1.564416776313636,
      "grad_norm": 0.7186341881752014,
      "learning_rate": 6.91644208403893e-06,
      "loss": 0.0152,
      "step": 955940
    },
    {
      "epoch": 1.5644495067522894,
      "grad_norm": 1.4431686401367188,
      "learning_rate": 6.9163761918254125e-06,
      "loss": 0.0209,
      "step": 955960
    },
    {
      "epoch": 1.5644822371909428,
      "grad_norm": 0.5167948603630066,
      "learning_rate": 6.916310299611896e-06,
      "loss": 0.0221,
      "step": 955980
    },
    {
      "epoch": 1.5645149676295962,
      "grad_norm": 0.16856196522712708,
      "learning_rate": 6.916244407398378e-06,
      "loss": 0.0193,
      "step": 956000
    },
    {
      "epoch": 1.5645476980682496,
      "grad_norm": 0.619949996471405,
      "learning_rate": 6.916178515184862e-06,
      "loss": 0.0193,
      "step": 956020
    },
    {
      "epoch": 1.5645804285069027,
      "grad_norm": 0.6456658840179443,
      "learning_rate": 6.9161126229713435e-06,
      "loss": 0.02,
      "step": 956040
    },
    {
      "epoch": 1.5646131589455563,
      "grad_norm": 0.9362192749977112,
      "learning_rate": 6.916046730757827e-06,
      "loss": 0.0247,
      "step": 956060
    },
    {
      "epoch": 1.5646458893842095,
      "grad_norm": 0.8771438002586365,
      "learning_rate": 6.91598083854431e-06,
      "loss": 0.0147,
      "step": 956080
    },
    {
      "epoch": 1.5646786198228628,
      "grad_norm": 0.8409912586212158,
      "learning_rate": 6.9159149463307926e-06,
      "loss": 0.0237,
      "step": 956100
    },
    {
      "epoch": 1.5647113502615162,
      "grad_norm": 0.9414710402488708,
      "learning_rate": 6.915849054117275e-06,
      "loss": 0.0211,
      "step": 956120
    },
    {
      "epoch": 1.5647440807001696,
      "grad_norm": 0.20033910870552063,
      "learning_rate": 6.915783161903759e-06,
      "loss": 0.0143,
      "step": 956140
    },
    {
      "epoch": 1.564776811138823,
      "grad_norm": 0.4829566776752472,
      "learning_rate": 6.915717269690241e-06,
      "loss": 0.0156,
      "step": 956160
    },
    {
      "epoch": 1.564809541577476,
      "grad_norm": 2.222135305404663,
      "learning_rate": 6.915651377476724e-06,
      "loss": 0.0219,
      "step": 956180
    },
    {
      "epoch": 1.5648422720161297,
      "grad_norm": 0.9288486838340759,
      "learning_rate": 6.915585485263206e-06,
      "loss": 0.021,
      "step": 956200
    },
    {
      "epoch": 1.5648750024547828,
      "grad_norm": 0.3434523046016693,
      "learning_rate": 6.91551959304969e-06,
      "loss": 0.0344,
      "step": 956220
    },
    {
      "epoch": 1.5649077328934362,
      "grad_norm": 0.580085277557373,
      "learning_rate": 6.9154537008361734e-06,
      "loss": 0.0223,
      "step": 956240
    },
    {
      "epoch": 1.5649404633320896,
      "grad_norm": 0.6067537069320679,
      "learning_rate": 6.915387808622655e-06,
      "loss": 0.0175,
      "step": 956260
    },
    {
      "epoch": 1.564973193770743,
      "grad_norm": 0.3853088319301605,
      "learning_rate": 6.915321916409139e-06,
      "loss": 0.023,
      "step": 956280
    },
    {
      "epoch": 1.5650059242093963,
      "grad_norm": 0.5909001231193542,
      "learning_rate": 6.915256024195622e-06,
      "loss": 0.0254,
      "step": 956300
    },
    {
      "epoch": 1.5650386546480495,
      "grad_norm": 0.6833658814430237,
      "learning_rate": 6.915190131982104e-06,
      "loss": 0.0149,
      "step": 956320
    },
    {
      "epoch": 1.565071385086703,
      "grad_norm": 0.254011332988739,
      "learning_rate": 6.915124239768587e-06,
      "loss": 0.016,
      "step": 956340
    },
    {
      "epoch": 1.5651041155253562,
      "grad_norm": 0.23801667988300323,
      "learning_rate": 6.915058347555071e-06,
      "loss": 0.0223,
      "step": 956360
    },
    {
      "epoch": 1.5651368459640096,
      "grad_norm": 1.776147484779358,
      "learning_rate": 6.914992455341553e-06,
      "loss": 0.023,
      "step": 956380
    },
    {
      "epoch": 1.565169576402663,
      "grad_norm": 0.5163055062294006,
      "learning_rate": 6.914926563128036e-06,
      "loss": 0.0179,
      "step": 956400
    },
    {
      "epoch": 1.5652023068413163,
      "grad_norm": 0.17162734270095825,
      "learning_rate": 6.914860670914518e-06,
      "loss": 0.0357,
      "step": 956420
    },
    {
      "epoch": 1.5652350372799697,
      "grad_norm": 0.46417590975761414,
      "learning_rate": 6.914794778701002e-06,
      "loss": 0.0162,
      "step": 956440
    },
    {
      "epoch": 1.5652677677186229,
      "grad_norm": 0.47764986753463745,
      "learning_rate": 6.9147288864874836e-06,
      "loss": 0.0176,
      "step": 956460
    },
    {
      "epoch": 1.5653004981572765,
      "grad_norm": 0.6482047438621521,
      "learning_rate": 6.914662994273967e-06,
      "loss": 0.0181,
      "step": 956480
    },
    {
      "epoch": 1.5653332285959296,
      "grad_norm": 0.15703602135181427,
      "learning_rate": 6.91459710206045e-06,
      "loss": 0.0218,
      "step": 956500
    },
    {
      "epoch": 1.565365959034583,
      "grad_norm": 0.5450083017349243,
      "learning_rate": 6.914531209846933e-06,
      "loss": 0.0226,
      "step": 956520
    },
    {
      "epoch": 1.5653986894732363,
      "grad_norm": 0.25388985872268677,
      "learning_rate": 6.914465317633415e-06,
      "loss": 0.022,
      "step": 956540
    },
    {
      "epoch": 1.5654314199118895,
      "grad_norm": 1.1584974527359009,
      "learning_rate": 6.914399425419899e-06,
      "loss": 0.0192,
      "step": 956560
    },
    {
      "epoch": 1.565464150350543,
      "grad_norm": 0.6606882214546204,
      "learning_rate": 6.914333533206382e-06,
      "loss": 0.0211,
      "step": 956580
    },
    {
      "epoch": 1.5654968807891962,
      "grad_norm": 0.469173789024353,
      "learning_rate": 6.9142676409928644e-06,
      "loss": 0.022,
      "step": 956600
    },
    {
      "epoch": 1.5655296112278498,
      "grad_norm": 0.35898086428642273,
      "learning_rate": 6.914201748779348e-06,
      "loss": 0.0165,
      "step": 956620
    },
    {
      "epoch": 1.565562341666503,
      "grad_norm": 0.32143157720565796,
      "learning_rate": 6.91413585656583e-06,
      "loss": 0.0235,
      "step": 956640
    },
    {
      "epoch": 1.5655950721051564,
      "grad_norm": 0.23913295567035675,
      "learning_rate": 6.9140699643523135e-06,
      "loss": 0.0287,
      "step": 956660
    },
    {
      "epoch": 1.5656278025438097,
      "grad_norm": 0.41384515166282654,
      "learning_rate": 6.914004072138795e-06,
      "loss": 0.0209,
      "step": 956680
    },
    {
      "epoch": 1.5656605329824629,
      "grad_norm": 0.8964901566505432,
      "learning_rate": 6.913938179925279e-06,
      "loss": 0.0215,
      "step": 956700
    },
    {
      "epoch": 1.5656932634211165,
      "grad_norm": 0.6938781142234802,
      "learning_rate": 6.913872287711762e-06,
      "loss": 0.0144,
      "step": 956720
    },
    {
      "epoch": 1.5657259938597696,
      "grad_norm": 0.6439991593360901,
      "learning_rate": 6.9138063954982445e-06,
      "loss": 0.0172,
      "step": 956740
    },
    {
      "epoch": 1.565758724298423,
      "grad_norm": 1.0937724113464355,
      "learning_rate": 6.913740503284727e-06,
      "loss": 0.0296,
      "step": 956760
    },
    {
      "epoch": 1.5657914547370764,
      "grad_norm": 1.150001049041748,
      "learning_rate": 6.913674611071211e-06,
      "loss": 0.015,
      "step": 956780
    },
    {
      "epoch": 1.5658241851757297,
      "grad_norm": 2.1914432048797607,
      "learning_rate": 6.913608718857693e-06,
      "loss": 0.0197,
      "step": 956800
    },
    {
      "epoch": 1.5658569156143831,
      "grad_norm": 0.16381336748600006,
      "learning_rate": 6.913542826644176e-06,
      "loss": 0.0176,
      "step": 956820
    },
    {
      "epoch": 1.5658896460530363,
      "grad_norm": 0.26666969060897827,
      "learning_rate": 6.913476934430658e-06,
      "loss": 0.0201,
      "step": 956840
    },
    {
      "epoch": 1.5659223764916899,
      "grad_norm": 0.2255961000919342,
      "learning_rate": 6.913411042217142e-06,
      "loss": 0.0166,
      "step": 956860
    },
    {
      "epoch": 1.565955106930343,
      "grad_norm": 0.4397401809692383,
      "learning_rate": 6.9133451500036245e-06,
      "loss": 0.0231,
      "step": 956880
    },
    {
      "epoch": 1.5659878373689964,
      "grad_norm": 0.32487842440605164,
      "learning_rate": 6.913279257790107e-06,
      "loss": 0.022,
      "step": 956900
    },
    {
      "epoch": 1.5660205678076498,
      "grad_norm": 0.4900863766670227,
      "learning_rate": 6.91321336557659e-06,
      "loss": 0.0173,
      "step": 956920
    },
    {
      "epoch": 1.5660532982463031,
      "grad_norm": 0.3452207148075104,
      "learning_rate": 6.9131474733630736e-06,
      "loss": 0.0184,
      "step": 956940
    },
    {
      "epoch": 1.5660860286849565,
      "grad_norm": 1.1326655149459839,
      "learning_rate": 6.913081581149556e-06,
      "loss": 0.0225,
      "step": 956960
    },
    {
      "epoch": 1.5661187591236096,
      "grad_norm": 0.678601861000061,
      "learning_rate": 6.913015688936039e-06,
      "loss": 0.013,
      "step": 956980
    },
    {
      "epoch": 1.5661514895622632,
      "grad_norm": 0.13904504477977753,
      "learning_rate": 6.912949796722523e-06,
      "loss": 0.0152,
      "step": 957000
    },
    {
      "epoch": 1.5661842200009164,
      "grad_norm": 0.32493749260902405,
      "learning_rate": 6.9128839045090045e-06,
      "loss": 0.0264,
      "step": 957020
    },
    {
      "epoch": 1.5662169504395698,
      "grad_norm": 0.25795960426330566,
      "learning_rate": 6.912818012295488e-06,
      "loss": 0.0234,
      "step": 957040
    },
    {
      "epoch": 1.5662496808782231,
      "grad_norm": 1.6161543130874634,
      "learning_rate": 6.91275212008197e-06,
      "loss": 0.0122,
      "step": 957060
    },
    {
      "epoch": 1.5662824113168765,
      "grad_norm": 0.5528212785720825,
      "learning_rate": 6.912686227868454e-06,
      "loss": 0.0203,
      "step": 957080
    },
    {
      "epoch": 1.5663151417555299,
      "grad_norm": 0.6168619394302368,
      "learning_rate": 6.912620335654936e-06,
      "loss": 0.0182,
      "step": 957100
    },
    {
      "epoch": 1.566347872194183,
      "grad_norm": 0.17755429446697235,
      "learning_rate": 6.912554443441419e-06,
      "loss": 0.0193,
      "step": 957120
    },
    {
      "epoch": 1.5663806026328366,
      "grad_norm": 0.7382874488830566,
      "learning_rate": 6.912488551227902e-06,
      "loss": 0.015,
      "step": 957140
    },
    {
      "epoch": 1.5664133330714898,
      "grad_norm": 1.5044668912887573,
      "learning_rate": 6.912422659014385e-06,
      "loss": 0.0166,
      "step": 957160
    },
    {
      "epoch": 1.5664460635101431,
      "grad_norm": 0.5646078586578369,
      "learning_rate": 6.912356766800867e-06,
      "loss": 0.0187,
      "step": 957180
    },
    {
      "epoch": 1.5664787939487965,
      "grad_norm": 1.4284248352050781,
      "learning_rate": 6.912290874587351e-06,
      "loss": 0.0303,
      "step": 957200
    },
    {
      "epoch": 1.56651152438745,
      "grad_norm": 1.326246738433838,
      "learning_rate": 6.912224982373833e-06,
      "loss": 0.0294,
      "step": 957220
    },
    {
      "epoch": 1.5665442548261033,
      "grad_norm": 0.20657721161842346,
      "learning_rate": 6.912159090160316e-06,
      "loss": 0.0185,
      "step": 957240
    },
    {
      "epoch": 1.5665769852647564,
      "grad_norm": 1.6818617582321167,
      "learning_rate": 6.912093197946799e-06,
      "loss": 0.0202,
      "step": 957260
    },
    {
      "epoch": 1.56660971570341,
      "grad_norm": 0.7234667539596558,
      "learning_rate": 6.912027305733282e-06,
      "loss": 0.0222,
      "step": 957280
    },
    {
      "epoch": 1.5666424461420632,
      "grad_norm": 1.8544327020645142,
      "learning_rate": 6.911961413519765e-06,
      "loss": 0.0257,
      "step": 957300
    },
    {
      "epoch": 1.5666751765807165,
      "grad_norm": 0.5171155333518982,
      "learning_rate": 6.911895521306248e-06,
      "loss": 0.0174,
      "step": 957320
    },
    {
      "epoch": 1.56670790701937,
      "grad_norm": 0.6033373475074768,
      "learning_rate": 6.911829629092731e-06,
      "loss": 0.0175,
      "step": 957340
    },
    {
      "epoch": 1.566740637458023,
      "grad_norm": 0.36919716000556946,
      "learning_rate": 6.911763736879214e-06,
      "loss": 0.0219,
      "step": 957360
    },
    {
      "epoch": 1.5667733678966766,
      "grad_norm": 0.59586501121521,
      "learning_rate": 6.911697844665697e-06,
      "loss": 0.0234,
      "step": 957380
    },
    {
      "epoch": 1.5668060983353298,
      "grad_norm": 0.6211854219436646,
      "learning_rate": 6.911631952452179e-06,
      "loss": 0.0211,
      "step": 957400
    },
    {
      "epoch": 1.5668388287739834,
      "grad_norm": 0.7932543158531189,
      "learning_rate": 6.911566060238663e-06,
      "loss": 0.0328,
      "step": 957420
    },
    {
      "epoch": 1.5668715592126365,
      "grad_norm": 1.1261677742004395,
      "learning_rate": 6.911500168025145e-06,
      "loss": 0.0197,
      "step": 957440
    },
    {
      "epoch": 1.56690428965129,
      "grad_norm": 0.14766861498355865,
      "learning_rate": 6.911434275811628e-06,
      "loss": 0.0183,
      "step": 957460
    },
    {
      "epoch": 1.5669370200899433,
      "grad_norm": 0.9547879695892334,
      "learning_rate": 6.91136838359811e-06,
      "loss": 0.0151,
      "step": 957480
    },
    {
      "epoch": 1.5669697505285964,
      "grad_norm": 0.22409069538116455,
      "learning_rate": 6.911302491384594e-06,
      "loss": 0.0207,
      "step": 957500
    },
    {
      "epoch": 1.56700248096725,
      "grad_norm": 0.21443073451519012,
      "learning_rate": 6.911236599171076e-06,
      "loss": 0.0206,
      "step": 957520
    },
    {
      "epoch": 1.5670352114059032,
      "grad_norm": 0.452030211687088,
      "learning_rate": 6.911170706957559e-06,
      "loss": 0.0431,
      "step": 957540
    },
    {
      "epoch": 1.5670679418445566,
      "grad_norm": 0.2840500771999359,
      "learning_rate": 6.911104814744042e-06,
      "loss": 0.0199,
      "step": 957560
    },
    {
      "epoch": 1.56710067228321,
      "grad_norm": 0.7977369427680969,
      "learning_rate": 6.9110389225305255e-06,
      "loss": 0.0156,
      "step": 957580
    },
    {
      "epoch": 1.5671334027218633,
      "grad_norm": 1.5529693365097046,
      "learning_rate": 6.910973030317007e-06,
      "loss": 0.0146,
      "step": 957600
    },
    {
      "epoch": 1.5671661331605167,
      "grad_norm": 0.36465370655059814,
      "learning_rate": 6.910907138103491e-06,
      "loss": 0.0179,
      "step": 957620
    },
    {
      "epoch": 1.5671988635991698,
      "grad_norm": 0.20669209957122803,
      "learning_rate": 6.9108412458899745e-06,
      "loss": 0.0214,
      "step": 957640
    },
    {
      "epoch": 1.5672315940378234,
      "grad_norm": 0.8078558444976807,
      "learning_rate": 6.9107753536764564e-06,
      "loss": 0.0308,
      "step": 957660
    },
    {
      "epoch": 1.5672643244764766,
      "grad_norm": 0.8474050164222717,
      "learning_rate": 6.91070946146294e-06,
      "loss": 0.0176,
      "step": 957680
    },
    {
      "epoch": 1.56729705491513,
      "grad_norm": 0.9114223122596741,
      "learning_rate": 6.910643569249422e-06,
      "loss": 0.0217,
      "step": 957700
    },
    {
      "epoch": 1.5673297853537833,
      "grad_norm": 0.533098578453064,
      "learning_rate": 6.9105776770359055e-06,
      "loss": 0.018,
      "step": 957720
    },
    {
      "epoch": 1.5673625157924367,
      "grad_norm": 0.5162538886070251,
      "learning_rate": 6.910511784822388e-06,
      "loss": 0.0225,
      "step": 957740
    },
    {
      "epoch": 1.56739524623109,
      "grad_norm": 0.2435648888349533,
      "learning_rate": 6.910445892608871e-06,
      "loss": 0.021,
      "step": 957760
    },
    {
      "epoch": 1.5674279766697432,
      "grad_norm": 0.8565436005592346,
      "learning_rate": 6.910380000395354e-06,
      "loss": 0.0192,
      "step": 957780
    },
    {
      "epoch": 1.5674607071083968,
      "grad_norm": 0.707166314125061,
      "learning_rate": 6.910314108181837e-06,
      "loss": 0.0145,
      "step": 957800
    },
    {
      "epoch": 1.56749343754705,
      "grad_norm": 0.41694656014442444,
      "learning_rate": 6.910248215968319e-06,
      "loss": 0.0175,
      "step": 957820
    },
    {
      "epoch": 1.5675261679857033,
      "grad_norm": 0.47176453471183777,
      "learning_rate": 6.910182323754803e-06,
      "loss": 0.034,
      "step": 957840
    },
    {
      "epoch": 1.5675588984243567,
      "grad_norm": 0.30527475476264954,
      "learning_rate": 6.910116431541285e-06,
      "loss": 0.0179,
      "step": 957860
    },
    {
      "epoch": 1.56759162886301,
      "grad_norm": 0.8440704345703125,
      "learning_rate": 6.910050539327768e-06,
      "loss": 0.0195,
      "step": 957880
    },
    {
      "epoch": 1.5676243593016634,
      "grad_norm": 0.46371951699256897,
      "learning_rate": 6.909984647114251e-06,
      "loss": 0.0192,
      "step": 957900
    },
    {
      "epoch": 1.5676570897403166,
      "grad_norm": 0.49799323081970215,
      "learning_rate": 6.909918754900734e-06,
      "loss": 0.0224,
      "step": 957920
    },
    {
      "epoch": 1.5676898201789702,
      "grad_norm": 0.8571550250053406,
      "learning_rate": 6.9098528626872165e-06,
      "loss": 0.0231,
      "step": 957940
    },
    {
      "epoch": 1.5677225506176233,
      "grad_norm": 1.41274094581604,
      "learning_rate": 6.9097869704737e-06,
      "loss": 0.0258,
      "step": 957960
    },
    {
      "epoch": 1.5677552810562767,
      "grad_norm": 1.6212146282196045,
      "learning_rate": 6.909721078260182e-06,
      "loss": 0.0245,
      "step": 957980
    },
    {
      "epoch": 1.56778801149493,
      "grad_norm": 0.5969251990318298,
      "learning_rate": 6.9096551860466655e-06,
      "loss": 0.0209,
      "step": 958000
    },
    {
      "epoch": 1.5678207419335832,
      "grad_norm": 0.73790043592453,
      "learning_rate": 6.909589293833149e-06,
      "loss": 0.0215,
      "step": 958020
    },
    {
      "epoch": 1.5678534723722368,
      "grad_norm": 1.551803469657898,
      "learning_rate": 6.909523401619631e-06,
      "loss": 0.0213,
      "step": 958040
    },
    {
      "epoch": 1.56788620281089,
      "grad_norm": 0.21318453550338745,
      "learning_rate": 6.909457509406115e-06,
      "loss": 0.0163,
      "step": 958060
    },
    {
      "epoch": 1.5679189332495436,
      "grad_norm": 0.6827983856201172,
      "learning_rate": 6.9093916171925965e-06,
      "loss": 0.0229,
      "step": 958080
    },
    {
      "epoch": 1.5679516636881967,
      "grad_norm": 0.7565090656280518,
      "learning_rate": 6.90932572497908e-06,
      "loss": 0.0236,
      "step": 958100
    },
    {
      "epoch": 1.56798439412685,
      "grad_norm": 0.835960328578949,
      "learning_rate": 6.909259832765563e-06,
      "loss": 0.0212,
      "step": 958120
    },
    {
      "epoch": 1.5680171245655035,
      "grad_norm": 0.7205721735954285,
      "learning_rate": 6.9091939405520456e-06,
      "loss": 0.0234,
      "step": 958140
    },
    {
      "epoch": 1.5680498550041566,
      "grad_norm": 0.5000396370887756,
      "learning_rate": 6.909128048338528e-06,
      "loss": 0.0201,
      "step": 958160
    },
    {
      "epoch": 1.5680825854428102,
      "grad_norm": 0.33004432916641235,
      "learning_rate": 6.909062156125012e-06,
      "loss": 0.0229,
      "step": 958180
    },
    {
      "epoch": 1.5681153158814634,
      "grad_norm": 0.7564147710800171,
      "learning_rate": 6.908996263911494e-06,
      "loss": 0.0186,
      "step": 958200
    },
    {
      "epoch": 1.5681480463201167,
      "grad_norm": 0.526155948638916,
      "learning_rate": 6.908930371697977e-06,
      "loss": 0.0196,
      "step": 958220
    },
    {
      "epoch": 1.56818077675877,
      "grad_norm": 0.1733943670988083,
      "learning_rate": 6.908864479484459e-06,
      "loss": 0.0156,
      "step": 958240
    },
    {
      "epoch": 1.5682135071974235,
      "grad_norm": 1.0298702716827393,
      "learning_rate": 6.908798587270943e-06,
      "loss": 0.0151,
      "step": 958260
    },
    {
      "epoch": 1.5682462376360768,
      "grad_norm": 0.584828794002533,
      "learning_rate": 6.908732695057425e-06,
      "loss": 0.0138,
      "step": 958280
    },
    {
      "epoch": 1.56827896807473,
      "grad_norm": 0.5754055380821228,
      "learning_rate": 6.908666802843908e-06,
      "loss": 0.0337,
      "step": 958300
    },
    {
      "epoch": 1.5683116985133836,
      "grad_norm": 0.5095987319946289,
      "learning_rate": 6.908600910630391e-06,
      "loss": 0.0224,
      "step": 958320
    },
    {
      "epoch": 1.5683444289520367,
      "grad_norm": 0.5860859751701355,
      "learning_rate": 6.908535018416875e-06,
      "loss": 0.0162,
      "step": 958340
    },
    {
      "epoch": 1.5683771593906901,
      "grad_norm": 0.3118762969970703,
      "learning_rate": 6.908469126203357e-06,
      "loss": 0.0244,
      "step": 958360
    },
    {
      "epoch": 1.5684098898293435,
      "grad_norm": 0.5908623933792114,
      "learning_rate": 6.90840323398984e-06,
      "loss": 0.022,
      "step": 958380
    },
    {
      "epoch": 1.5684426202679969,
      "grad_norm": 0.14076027274131775,
      "learning_rate": 6.908337341776324e-06,
      "loss": 0.0184,
      "step": 958400
    },
    {
      "epoch": 1.5684753507066502,
      "grad_norm": 0.3390272557735443,
      "learning_rate": 6.908271449562806e-06,
      "loss": 0.0197,
      "step": 958420
    },
    {
      "epoch": 1.5685080811453034,
      "grad_norm": 0.528691291809082,
      "learning_rate": 6.908205557349289e-06,
      "loss": 0.0185,
      "step": 958440
    },
    {
      "epoch": 1.568540811583957,
      "grad_norm": 0.2218075543642044,
      "learning_rate": 6.908139665135771e-06,
      "loss": 0.0215,
      "step": 958460
    },
    {
      "epoch": 1.5685735420226101,
      "grad_norm": 0.3439154028892517,
      "learning_rate": 6.908073772922255e-06,
      "loss": 0.0166,
      "step": 958480
    },
    {
      "epoch": 1.5686062724612635,
      "grad_norm": 0.19213521480560303,
      "learning_rate": 6.9080078807087366e-06,
      "loss": 0.0181,
      "step": 958500
    },
    {
      "epoch": 1.5686390028999169,
      "grad_norm": 0.8964059948921204,
      "learning_rate": 6.90794198849522e-06,
      "loss": 0.0243,
      "step": 958520
    },
    {
      "epoch": 1.5686717333385702,
      "grad_norm": 0.4818330705165863,
      "learning_rate": 6.907876096281703e-06,
      "loss": 0.0151,
      "step": 958540
    },
    {
      "epoch": 1.5687044637772236,
      "grad_norm": 1.0816656351089478,
      "learning_rate": 6.907810204068186e-06,
      "loss": 0.0249,
      "step": 958560
    },
    {
      "epoch": 1.5687371942158768,
      "grad_norm": 1.9555917978286743,
      "learning_rate": 6.907744311854668e-06,
      "loss": 0.0179,
      "step": 958580
    },
    {
      "epoch": 1.5687699246545304,
      "grad_norm": 0.9379174113273621,
      "learning_rate": 6.907678419641152e-06,
      "loss": 0.0146,
      "step": 958600
    },
    {
      "epoch": 1.5688026550931835,
      "grad_norm": 0.5286641716957092,
      "learning_rate": 6.907612527427634e-06,
      "loss": 0.0203,
      "step": 958620
    },
    {
      "epoch": 1.5688353855318369,
      "grad_norm": 0.26625993847846985,
      "learning_rate": 6.9075466352141174e-06,
      "loss": 0.0236,
      "step": 958640
    },
    {
      "epoch": 1.5688681159704903,
      "grad_norm": 0.20210672914981842,
      "learning_rate": 6.907480743000599e-06,
      "loss": 0.0193,
      "step": 958660
    },
    {
      "epoch": 1.5689008464091436,
      "grad_norm": 1.0065523386001587,
      "learning_rate": 6.907414850787083e-06,
      "loss": 0.0249,
      "step": 958680
    },
    {
      "epoch": 1.568933576847797,
      "grad_norm": 0.4449205696582794,
      "learning_rate": 6.9073489585735665e-06,
      "loss": 0.0153,
      "step": 958700
    },
    {
      "epoch": 1.5689663072864501,
      "grad_norm": 0.1113082617521286,
      "learning_rate": 6.907283066360048e-06,
      "loss": 0.0175,
      "step": 958720
    },
    {
      "epoch": 1.5689990377251037,
      "grad_norm": 0.7623189687728882,
      "learning_rate": 6.907217174146532e-06,
      "loss": 0.0177,
      "step": 958740
    },
    {
      "epoch": 1.569031768163757,
      "grad_norm": 0.46240052580833435,
      "learning_rate": 6.907151281933015e-06,
      "loss": 0.0185,
      "step": 958760
    },
    {
      "epoch": 1.5690644986024103,
      "grad_norm": 0.285683810710907,
      "learning_rate": 6.9070853897194975e-06,
      "loss": 0.0218,
      "step": 958780
    },
    {
      "epoch": 1.5690972290410636,
      "grad_norm": 0.3685818910598755,
      "learning_rate": 6.90701949750598e-06,
      "loss": 0.0179,
      "step": 958800
    },
    {
      "epoch": 1.5691299594797168,
      "grad_norm": 1.1685914993286133,
      "learning_rate": 6.906953605292464e-06,
      "loss": 0.0203,
      "step": 958820
    },
    {
      "epoch": 1.5691626899183704,
      "grad_norm": 0.33417871594429016,
      "learning_rate": 6.906887713078946e-06,
      "loss": 0.0139,
      "step": 958840
    },
    {
      "epoch": 1.5691954203570235,
      "grad_norm": 0.8400236368179321,
      "learning_rate": 6.906821820865429e-06,
      "loss": 0.0147,
      "step": 958860
    },
    {
      "epoch": 1.5692281507956771,
      "grad_norm": 0.39178070425987244,
      "learning_rate": 6.906755928651911e-06,
      "loss": 0.0315,
      "step": 958880
    },
    {
      "epoch": 1.5692608812343303,
      "grad_norm": 0.4610307812690735,
      "learning_rate": 6.906690036438395e-06,
      "loss": 0.0174,
      "step": 958900
    },
    {
      "epoch": 1.5692936116729836,
      "grad_norm": 0.37999194860458374,
      "learning_rate": 6.9066241442248775e-06,
      "loss": 0.0206,
      "step": 958920
    },
    {
      "epoch": 1.569326342111637,
      "grad_norm": 0.6071269512176514,
      "learning_rate": 6.90655825201136e-06,
      "loss": 0.0238,
      "step": 958940
    },
    {
      "epoch": 1.5693590725502902,
      "grad_norm": 1.0052862167358398,
      "learning_rate": 6.906492359797843e-06,
      "loss": 0.0151,
      "step": 958960
    },
    {
      "epoch": 1.5693918029889438,
      "grad_norm": 0.04972993582487106,
      "learning_rate": 6.9064264675843266e-06,
      "loss": 0.0234,
      "step": 958980
    },
    {
      "epoch": 1.569424533427597,
      "grad_norm": 0.3021244406700134,
      "learning_rate": 6.9063605753708085e-06,
      "loss": 0.0213,
      "step": 959000
    },
    {
      "epoch": 1.5694572638662503,
      "grad_norm": 0.8883962631225586,
      "learning_rate": 6.906294683157292e-06,
      "loss": 0.0217,
      "step": 959020
    },
    {
      "epoch": 1.5694899943049037,
      "grad_norm": 0.7017924785614014,
      "learning_rate": 6.906228790943774e-06,
      "loss": 0.0219,
      "step": 959040
    },
    {
      "epoch": 1.569522724743557,
      "grad_norm": 0.44060757756233215,
      "learning_rate": 6.9061628987302575e-06,
      "loss": 0.0201,
      "step": 959060
    },
    {
      "epoch": 1.5695554551822104,
      "grad_norm": 0.31459489464759827,
      "learning_rate": 6.906097006516741e-06,
      "loss": 0.0185,
      "step": 959080
    },
    {
      "epoch": 1.5695881856208636,
      "grad_norm": 0.43274641036987305,
      "learning_rate": 6.906031114303223e-06,
      "loss": 0.0237,
      "step": 959100
    },
    {
      "epoch": 1.5696209160595171,
      "grad_norm": 0.3306821882724762,
      "learning_rate": 6.905965222089707e-06,
      "loss": 0.0227,
      "step": 959120
    },
    {
      "epoch": 1.5696536464981703,
      "grad_norm": 0.21932558715343475,
      "learning_rate": 6.905899329876189e-06,
      "loss": 0.0157,
      "step": 959140
    },
    {
      "epoch": 1.5696863769368237,
      "grad_norm": 1.067307710647583,
      "learning_rate": 6.905833437662672e-06,
      "loss": 0.0197,
      "step": 959160
    },
    {
      "epoch": 1.569719107375477,
      "grad_norm": 1.2244946956634521,
      "learning_rate": 6.905767545449155e-06,
      "loss": 0.017,
      "step": 959180
    },
    {
      "epoch": 1.5697518378141304,
      "grad_norm": 0.20973500609397888,
      "learning_rate": 6.905701653235638e-06,
      "loss": 0.0171,
      "step": 959200
    },
    {
      "epoch": 1.5697845682527838,
      "grad_norm": 0.11676590144634247,
      "learning_rate": 6.90563576102212e-06,
      "loss": 0.0214,
      "step": 959220
    },
    {
      "epoch": 1.569817298691437,
      "grad_norm": 1.1797091960906982,
      "learning_rate": 6.905569868808604e-06,
      "loss": 0.0217,
      "step": 959240
    },
    {
      "epoch": 1.5698500291300905,
      "grad_norm": 0.3559940457344055,
      "learning_rate": 6.905503976595086e-06,
      "loss": 0.0186,
      "step": 959260
    },
    {
      "epoch": 1.5698827595687437,
      "grad_norm": 0.3770414888858795,
      "learning_rate": 6.905438084381569e-06,
      "loss": 0.0164,
      "step": 959280
    },
    {
      "epoch": 1.569915490007397,
      "grad_norm": 0.13850080966949463,
      "learning_rate": 6.905372192168051e-06,
      "loss": 0.0185,
      "step": 959300
    },
    {
      "epoch": 1.5699482204460504,
      "grad_norm": 0.20812703669071198,
      "learning_rate": 6.905306299954535e-06,
      "loss": 0.0208,
      "step": 959320
    },
    {
      "epoch": 1.5699809508847038,
      "grad_norm": 1.226225733757019,
      "learning_rate": 6.9052404077410176e-06,
      "loss": 0.0187,
      "step": 959340
    },
    {
      "epoch": 1.5700136813233572,
      "grad_norm": 0.6134660243988037,
      "learning_rate": 6.9051745155275e-06,
      "loss": 0.0181,
      "step": 959360
    },
    {
      "epoch": 1.5700464117620103,
      "grad_norm": 0.6736909747123718,
      "learning_rate": 6.905108623313983e-06,
      "loss": 0.0224,
      "step": 959380
    },
    {
      "epoch": 1.570079142200664,
      "grad_norm": 0.43566980957984924,
      "learning_rate": 6.905042731100467e-06,
      "loss": 0.0152,
      "step": 959400
    },
    {
      "epoch": 1.570111872639317,
      "grad_norm": 0.26191258430480957,
      "learning_rate": 6.90497683888695e-06,
      "loss": 0.0263,
      "step": 959420
    },
    {
      "epoch": 1.5701446030779704,
      "grad_norm": 0.4340364634990692,
      "learning_rate": 6.904910946673432e-06,
      "loss": 0.0167,
      "step": 959440
    },
    {
      "epoch": 1.5701773335166238,
      "grad_norm": 1.8363609313964844,
      "learning_rate": 6.904845054459916e-06,
      "loss": 0.0198,
      "step": 959460
    },
    {
      "epoch": 1.5702100639552772,
      "grad_norm": 1.267682433128357,
      "learning_rate": 6.904779162246398e-06,
      "loss": 0.0239,
      "step": 959480
    },
    {
      "epoch": 1.5702427943939306,
      "grad_norm": 1.7190911769866943,
      "learning_rate": 6.904713270032881e-06,
      "loss": 0.0232,
      "step": 959500
    },
    {
      "epoch": 1.5702755248325837,
      "grad_norm": 0.1149914562702179,
      "learning_rate": 6.904647377819363e-06,
      "loss": 0.0194,
      "step": 959520
    },
    {
      "epoch": 1.5703082552712373,
      "grad_norm": 1.3862920999526978,
      "learning_rate": 6.904581485605847e-06,
      "loss": 0.0175,
      "step": 959540
    },
    {
      "epoch": 1.5703409857098904,
      "grad_norm": 0.7622985243797302,
      "learning_rate": 6.904515593392329e-06,
      "loss": 0.0202,
      "step": 959560
    },
    {
      "epoch": 1.5703737161485438,
      "grad_norm": 0.23106804490089417,
      "learning_rate": 6.904449701178812e-06,
      "loss": 0.0133,
      "step": 959580
    },
    {
      "epoch": 1.5704064465871972,
      "grad_norm": 0.17951880395412445,
      "learning_rate": 6.904383808965295e-06,
      "loss": 0.021,
      "step": 959600
    },
    {
      "epoch": 1.5704391770258503,
      "grad_norm": 1.1812092065811157,
      "learning_rate": 6.9043179167517785e-06,
      "loss": 0.0212,
      "step": 959620
    },
    {
      "epoch": 1.570471907464504,
      "grad_norm": 0.8236770629882812,
      "learning_rate": 6.90425202453826e-06,
      "loss": 0.0132,
      "step": 959640
    },
    {
      "epoch": 1.570504637903157,
      "grad_norm": 1.4830424785614014,
      "learning_rate": 6.904186132324744e-06,
      "loss": 0.0301,
      "step": 959660
    },
    {
      "epoch": 1.5705373683418107,
      "grad_norm": 0.9232874512672424,
      "learning_rate": 6.904120240111226e-06,
      "loss": 0.0179,
      "step": 959680
    },
    {
      "epoch": 1.5705700987804638,
      "grad_norm": 1.2591580152511597,
      "learning_rate": 6.9040543478977094e-06,
      "loss": 0.0228,
      "step": 959700
    },
    {
      "epoch": 1.5706028292191172,
      "grad_norm": 1.5878220796585083,
      "learning_rate": 6.903988455684192e-06,
      "loss": 0.02,
      "step": 959720
    },
    {
      "epoch": 1.5706355596577706,
      "grad_norm": 0.1408838927745819,
      "learning_rate": 6.903922563470675e-06,
      "loss": 0.0198,
      "step": 959740
    },
    {
      "epoch": 1.5706682900964237,
      "grad_norm": 0.910294234752655,
      "learning_rate": 6.9038566712571585e-06,
      "loss": 0.023,
      "step": 959760
    },
    {
      "epoch": 1.5707010205350773,
      "grad_norm": 0.38068848848342896,
      "learning_rate": 6.903790779043641e-06,
      "loss": 0.0202,
      "step": 959780
    },
    {
      "epoch": 1.5707337509737305,
      "grad_norm": 2.3953447341918945,
      "learning_rate": 6.903724886830124e-06,
      "loss": 0.0189,
      "step": 959800
    },
    {
      "epoch": 1.5707664814123838,
      "grad_norm": 0.7666575312614441,
      "learning_rate": 6.903658994616607e-06,
      "loss": 0.0187,
      "step": 959820
    },
    {
      "epoch": 1.5707992118510372,
      "grad_norm": 0.5670984983444214,
      "learning_rate": 6.90359310240309e-06,
      "loss": 0.0216,
      "step": 959840
    },
    {
      "epoch": 1.5708319422896906,
      "grad_norm": 0.27199283242225647,
      "learning_rate": 6.903527210189572e-06,
      "loss": 0.0242,
      "step": 959860
    },
    {
      "epoch": 1.570864672728344,
      "grad_norm": 0.24040690064430237,
      "learning_rate": 6.903461317976056e-06,
      "loss": 0.0179,
      "step": 959880
    },
    {
      "epoch": 1.570897403166997,
      "grad_norm": 0.639946460723877,
      "learning_rate": 6.903395425762538e-06,
      "loss": 0.0215,
      "step": 959900
    },
    {
      "epoch": 1.5709301336056507,
      "grad_norm": 0.6057097911834717,
      "learning_rate": 6.903329533549021e-06,
      "loss": 0.0177,
      "step": 959920
    },
    {
      "epoch": 1.5709628640443039,
      "grad_norm": 0.38077932596206665,
      "learning_rate": 6.903263641335504e-06,
      "loss": 0.0172,
      "step": 959940
    },
    {
      "epoch": 1.5709955944829572,
      "grad_norm": 0.7516641616821289,
      "learning_rate": 6.903197749121987e-06,
      "loss": 0.0207,
      "step": 959960
    },
    {
      "epoch": 1.5710283249216106,
      "grad_norm": 1.199536919593811,
      "learning_rate": 6.9031318569084695e-06,
      "loss": 0.0242,
      "step": 959980
    },
    {
      "epoch": 1.571061055360264,
      "grad_norm": 0.10576947778463364,
      "learning_rate": 6.903065964694953e-06,
      "loss": 0.022,
      "step": 960000
    },
    {
      "epoch": 1.5710937857989173,
      "grad_norm": 0.1614144742488861,
      "learning_rate": 6.903000072481435e-06,
      "loss": 0.0199,
      "step": 960020
    },
    {
      "epoch": 1.5711265162375705,
      "grad_norm": 0.5819287300109863,
      "learning_rate": 6.9029341802679185e-06,
      "loss": 0.0139,
      "step": 960040
    },
    {
      "epoch": 1.571159246676224,
      "grad_norm": 0.28269028663635254,
      "learning_rate": 6.9028682880544004e-06,
      "loss": 0.0279,
      "step": 960060
    },
    {
      "epoch": 1.5711919771148772,
      "grad_norm": 0.5525010824203491,
      "learning_rate": 6.902802395840884e-06,
      "loss": 0.0166,
      "step": 960080
    },
    {
      "epoch": 1.5712247075535306,
      "grad_norm": 0.8505045771598816,
      "learning_rate": 6.902736503627368e-06,
      "loss": 0.0263,
      "step": 960100
    },
    {
      "epoch": 1.571257437992184,
      "grad_norm": 0.9860318303108215,
      "learning_rate": 6.9026706114138495e-06,
      "loss": 0.0252,
      "step": 960120
    },
    {
      "epoch": 1.5712901684308374,
      "grad_norm": 0.5933125615119934,
      "learning_rate": 6.902604719200333e-06,
      "loss": 0.0118,
      "step": 960140
    },
    {
      "epoch": 1.5713228988694907,
      "grad_norm": 0.5879552960395813,
      "learning_rate": 6.902538826986816e-06,
      "loss": 0.0329,
      "step": 960160
    },
    {
      "epoch": 1.5713556293081439,
      "grad_norm": 0.5649843215942383,
      "learning_rate": 6.9024729347732986e-06,
      "loss": 0.0245,
      "step": 960180
    },
    {
      "epoch": 1.5713883597467975,
      "grad_norm": 0.28413155674934387,
      "learning_rate": 6.902407042559781e-06,
      "loss": 0.0154,
      "step": 960200
    },
    {
      "epoch": 1.5714210901854506,
      "grad_norm": 0.2015233188867569,
      "learning_rate": 6.902341150346265e-06,
      "loss": 0.0253,
      "step": 960220
    },
    {
      "epoch": 1.571453820624104,
      "grad_norm": 0.22026532888412476,
      "learning_rate": 6.902275258132747e-06,
      "loss": 0.0137,
      "step": 960240
    },
    {
      "epoch": 1.5714865510627574,
      "grad_norm": 0.5893740057945251,
      "learning_rate": 6.90220936591923e-06,
      "loss": 0.0264,
      "step": 960260
    },
    {
      "epoch": 1.5715192815014107,
      "grad_norm": 0.8589605689048767,
      "learning_rate": 6.902143473705712e-06,
      "loss": 0.0161,
      "step": 960280
    },
    {
      "epoch": 1.571552011940064,
      "grad_norm": 0.19002099335193634,
      "learning_rate": 6.902077581492196e-06,
      "loss": 0.0272,
      "step": 960300
    },
    {
      "epoch": 1.5715847423787173,
      "grad_norm": 0.14531868696212769,
      "learning_rate": 6.902011689278678e-06,
      "loss": 0.0197,
      "step": 960320
    },
    {
      "epoch": 1.5716174728173709,
      "grad_norm": 0.17342007160186768,
      "learning_rate": 6.901945797065161e-06,
      "loss": 0.0216,
      "step": 960340
    },
    {
      "epoch": 1.571650203256024,
      "grad_norm": 0.41634517908096313,
      "learning_rate": 6.901879904851644e-06,
      "loss": 0.0215,
      "step": 960360
    },
    {
      "epoch": 1.5716829336946774,
      "grad_norm": 0.3908119797706604,
      "learning_rate": 6.901814012638127e-06,
      "loss": 0.0285,
      "step": 960380
    },
    {
      "epoch": 1.5717156641333307,
      "grad_norm": 0.44427233934402466,
      "learning_rate": 6.9017481204246096e-06,
      "loss": 0.0152,
      "step": 960400
    },
    {
      "epoch": 1.571748394571984,
      "grad_norm": 0.7844053506851196,
      "learning_rate": 6.901682228211093e-06,
      "loss": 0.0253,
      "step": 960420
    },
    {
      "epoch": 1.5717811250106375,
      "grad_norm": 0.7246497869491577,
      "learning_rate": 6.901616335997575e-06,
      "loss": 0.0251,
      "step": 960440
    },
    {
      "epoch": 1.5718138554492906,
      "grad_norm": 1.4551007747650146,
      "learning_rate": 6.901550443784059e-06,
      "loss": 0.027,
      "step": 960460
    },
    {
      "epoch": 1.5718465858879442,
      "grad_norm": 0.41083380579948425,
      "learning_rate": 6.901484551570542e-06,
      "loss": 0.0212,
      "step": 960480
    },
    {
      "epoch": 1.5718793163265974,
      "grad_norm": 0.2905709743499756,
      "learning_rate": 6.901418659357024e-06,
      "loss": 0.0189,
      "step": 960500
    },
    {
      "epoch": 1.5719120467652508,
      "grad_norm": 0.15317021310329437,
      "learning_rate": 6.901352767143508e-06,
      "loss": 0.0153,
      "step": 960520
    },
    {
      "epoch": 1.5719447772039041,
      "grad_norm": 1.5966262817382812,
      "learning_rate": 6.90128687492999e-06,
      "loss": 0.03,
      "step": 960540
    },
    {
      "epoch": 1.5719775076425573,
      "grad_norm": 0.4837126135826111,
      "learning_rate": 6.901220982716473e-06,
      "loss": 0.018,
      "step": 960560
    },
    {
      "epoch": 1.5720102380812109,
      "grad_norm": 0.316898375749588,
      "learning_rate": 6.901155090502956e-06,
      "loss": 0.019,
      "step": 960580
    },
    {
      "epoch": 1.572042968519864,
      "grad_norm": 0.20169921219348907,
      "learning_rate": 6.901089198289439e-06,
      "loss": 0.0192,
      "step": 960600
    },
    {
      "epoch": 1.5720756989585174,
      "grad_norm": 0.1359105259180069,
      "learning_rate": 6.901023306075921e-06,
      "loss": 0.0254,
      "step": 960620
    },
    {
      "epoch": 1.5721084293971708,
      "grad_norm": 0.3974519670009613,
      "learning_rate": 6.900957413862405e-06,
      "loss": 0.0152,
      "step": 960640
    },
    {
      "epoch": 1.5721411598358241,
      "grad_norm": 0.4074995219707489,
      "learning_rate": 6.900891521648887e-06,
      "loss": 0.0155,
      "step": 960660
    },
    {
      "epoch": 1.5721738902744775,
      "grad_norm": 0.16971485316753387,
      "learning_rate": 6.9008256294353705e-06,
      "loss": 0.019,
      "step": 960680
    },
    {
      "epoch": 1.5722066207131307,
      "grad_norm": 0.46117720007896423,
      "learning_rate": 6.900759737221852e-06,
      "loss": 0.012,
      "step": 960700
    },
    {
      "epoch": 1.5722393511517843,
      "grad_norm": 0.8352869153022766,
      "learning_rate": 6.900693845008336e-06,
      "loss": 0.0245,
      "step": 960720
    },
    {
      "epoch": 1.5722720815904374,
      "grad_norm": 0.20168238878250122,
      "learning_rate": 6.900627952794819e-06,
      "loss": 0.016,
      "step": 960740
    },
    {
      "epoch": 1.5723048120290908,
      "grad_norm": 0.17317433655261993,
      "learning_rate": 6.900562060581301e-06,
      "loss": 0.0223,
      "step": 960760
    },
    {
      "epoch": 1.5723375424677442,
      "grad_norm": 0.3686215281486511,
      "learning_rate": 6.900496168367784e-06,
      "loss": 0.02,
      "step": 960780
    },
    {
      "epoch": 1.5723702729063975,
      "grad_norm": 0.3534599840641022,
      "learning_rate": 6.900430276154268e-06,
      "loss": 0.0228,
      "step": 960800
    },
    {
      "epoch": 1.572403003345051,
      "grad_norm": 0.09268525242805481,
      "learning_rate": 6.9003643839407505e-06,
      "loss": 0.0193,
      "step": 960820
    },
    {
      "epoch": 1.572435733783704,
      "grad_norm": 0.7804358005523682,
      "learning_rate": 6.900298491727233e-06,
      "loss": 0.0148,
      "step": 960840
    },
    {
      "epoch": 1.5724684642223576,
      "grad_norm": 0.22917096316814423,
      "learning_rate": 6.900232599513717e-06,
      "loss": 0.0194,
      "step": 960860
    },
    {
      "epoch": 1.5725011946610108,
      "grad_norm": 0.9275817275047302,
      "learning_rate": 6.900166707300199e-06,
      "loss": 0.0231,
      "step": 960880
    },
    {
      "epoch": 1.5725339250996642,
      "grad_norm": 0.23967115581035614,
      "learning_rate": 6.900100815086682e-06,
      "loss": 0.0193,
      "step": 960900
    },
    {
      "epoch": 1.5725666555383175,
      "grad_norm": 0.47917842864990234,
      "learning_rate": 6.900034922873164e-06,
      "loss": 0.0237,
      "step": 960920
    },
    {
      "epoch": 1.572599385976971,
      "grad_norm": 1.4338228702545166,
      "learning_rate": 6.899969030659648e-06,
      "loss": 0.0183,
      "step": 960940
    },
    {
      "epoch": 1.5726321164156243,
      "grad_norm": 0.48910465836524963,
      "learning_rate": 6.8999031384461305e-06,
      "loss": 0.0213,
      "step": 960960
    },
    {
      "epoch": 1.5726648468542774,
      "grad_norm": 0.7538202404975891,
      "learning_rate": 6.899837246232613e-06,
      "loss": 0.0213,
      "step": 960980
    },
    {
      "epoch": 1.572697577292931,
      "grad_norm": 0.5156115889549255,
      "learning_rate": 6.899771354019096e-06,
      "loss": 0.019,
      "step": 961000
    },
    {
      "epoch": 1.5727303077315842,
      "grad_norm": 0.4376274645328522,
      "learning_rate": 6.8997054618055796e-06,
      "loss": 0.0265,
      "step": 961020
    },
    {
      "epoch": 1.5727630381702375,
      "grad_norm": 0.9608411192893982,
      "learning_rate": 6.8996395695920615e-06,
      "loss": 0.0178,
      "step": 961040
    },
    {
      "epoch": 1.572795768608891,
      "grad_norm": 0.3152594268321991,
      "learning_rate": 6.899573677378545e-06,
      "loss": 0.029,
      "step": 961060
    },
    {
      "epoch": 1.572828499047544,
      "grad_norm": 0.6502811908721924,
      "learning_rate": 6.899507785165027e-06,
      "loss": 0.0185,
      "step": 961080
    },
    {
      "epoch": 1.5728612294861977,
      "grad_norm": 0.5208342671394348,
      "learning_rate": 6.8994418929515105e-06,
      "loss": 0.0198,
      "step": 961100
    },
    {
      "epoch": 1.5728939599248508,
      "grad_norm": 0.6370337009429932,
      "learning_rate": 6.899376000737993e-06,
      "loss": 0.0206,
      "step": 961120
    },
    {
      "epoch": 1.5729266903635044,
      "grad_norm": 1.174896001815796,
      "learning_rate": 6.899310108524476e-06,
      "loss": 0.0145,
      "step": 961140
    },
    {
      "epoch": 1.5729594208021576,
      "grad_norm": 0.8421132564544678,
      "learning_rate": 6.89924421631096e-06,
      "loss": 0.0185,
      "step": 961160
    },
    {
      "epoch": 1.572992151240811,
      "grad_norm": 0.733805239200592,
      "learning_rate": 6.899178324097442e-06,
      "loss": 0.0223,
      "step": 961180
    },
    {
      "epoch": 1.5730248816794643,
      "grad_norm": 1.4166868925094604,
      "learning_rate": 6.899112431883925e-06,
      "loss": 0.0217,
      "step": 961200
    },
    {
      "epoch": 1.5730576121181175,
      "grad_norm": 0.44216251373291016,
      "learning_rate": 6.899046539670408e-06,
      "loss": 0.0208,
      "step": 961220
    },
    {
      "epoch": 1.573090342556771,
      "grad_norm": 0.24319802224636078,
      "learning_rate": 6.898980647456891e-06,
      "loss": 0.0264,
      "step": 961240
    },
    {
      "epoch": 1.5731230729954242,
      "grad_norm": 0.19242200255393982,
      "learning_rate": 6.898914755243373e-06,
      "loss": 0.025,
      "step": 961260
    },
    {
      "epoch": 1.5731558034340776,
      "grad_norm": 0.35274726152420044,
      "learning_rate": 6.898848863029857e-06,
      "loss": 0.018,
      "step": 961280
    },
    {
      "epoch": 1.573188533872731,
      "grad_norm": 0.5969540476799011,
      "learning_rate": 6.898782970816339e-06,
      "loss": 0.0255,
      "step": 961300
    },
    {
      "epoch": 1.5732212643113843,
      "grad_norm": 0.4122559130191803,
      "learning_rate": 6.898717078602822e-06,
      "loss": 0.0264,
      "step": 961320
    },
    {
      "epoch": 1.5732539947500377,
      "grad_norm": 0.7696623802185059,
      "learning_rate": 6.898651186389304e-06,
      "loss": 0.0188,
      "step": 961340
    },
    {
      "epoch": 1.5732867251886908,
      "grad_norm": 0.42053401470184326,
      "learning_rate": 6.898585294175788e-06,
      "loss": 0.023,
      "step": 961360
    },
    {
      "epoch": 1.5733194556273444,
      "grad_norm": 0.38993144035339355,
      "learning_rate": 6.898519401962271e-06,
      "loss": 0.021,
      "step": 961380
    },
    {
      "epoch": 1.5733521860659976,
      "grad_norm": 0.2371763437986374,
      "learning_rate": 6.898453509748753e-06,
      "loss": 0.027,
      "step": 961400
    },
    {
      "epoch": 1.573384916504651,
      "grad_norm": 0.43228086829185486,
      "learning_rate": 6.898387617535236e-06,
      "loss": 0.0152,
      "step": 961420
    },
    {
      "epoch": 1.5734176469433043,
      "grad_norm": 0.5668655037879944,
      "learning_rate": 6.89832172532172e-06,
      "loss": 0.0191,
      "step": 961440
    },
    {
      "epoch": 1.5734503773819577,
      "grad_norm": 1.1607229709625244,
      "learning_rate": 6.8982558331082015e-06,
      "loss": 0.0308,
      "step": 961460
    },
    {
      "epoch": 1.573483107820611,
      "grad_norm": 0.5427349805831909,
      "learning_rate": 6.898189940894685e-06,
      "loss": 0.0185,
      "step": 961480
    },
    {
      "epoch": 1.5735158382592642,
      "grad_norm": 0.537800669670105,
      "learning_rate": 6.898124048681167e-06,
      "loss": 0.0272,
      "step": 961500
    },
    {
      "epoch": 1.5735485686979178,
      "grad_norm": 0.17673227190971375,
      "learning_rate": 6.898058156467651e-06,
      "loss": 0.0144,
      "step": 961520
    },
    {
      "epoch": 1.573581299136571,
      "grad_norm": 0.16965585947036743,
      "learning_rate": 6.897992264254134e-06,
      "loss": 0.0148,
      "step": 961540
    },
    {
      "epoch": 1.5736140295752243,
      "grad_norm": 0.9653432965278625,
      "learning_rate": 6.897926372040616e-06,
      "loss": 0.0177,
      "step": 961560
    },
    {
      "epoch": 1.5736467600138777,
      "grad_norm": 0.7780264019966125,
      "learning_rate": 6.8978604798271e-06,
      "loss": 0.026,
      "step": 961580
    },
    {
      "epoch": 1.573679490452531,
      "grad_norm": 0.37506723403930664,
      "learning_rate": 6.897794587613582e-06,
      "loss": 0.016,
      "step": 961600
    },
    {
      "epoch": 1.5737122208911845,
      "grad_norm": 0.7132375836372375,
      "learning_rate": 6.897728695400065e-06,
      "loss": 0.0177,
      "step": 961620
    },
    {
      "epoch": 1.5737449513298376,
      "grad_norm": 0.08753490447998047,
      "learning_rate": 6.897662803186548e-06,
      "loss": 0.0165,
      "step": 961640
    },
    {
      "epoch": 1.5737776817684912,
      "grad_norm": 0.1793133169412613,
      "learning_rate": 6.8975969109730315e-06,
      "loss": 0.0218,
      "step": 961660
    },
    {
      "epoch": 1.5738104122071443,
      "grad_norm": 0.3828226923942566,
      "learning_rate": 6.897531018759513e-06,
      "loss": 0.0203,
      "step": 961680
    },
    {
      "epoch": 1.5738431426457977,
      "grad_norm": 0.3505284786224365,
      "learning_rate": 6.897465126545997e-06,
      "loss": 0.0244,
      "step": 961700
    },
    {
      "epoch": 1.573875873084451,
      "grad_norm": 1.1895498037338257,
      "learning_rate": 6.897399234332479e-06,
      "loss": 0.0207,
      "step": 961720
    },
    {
      "epoch": 1.5739086035231045,
      "grad_norm": 0.7005401849746704,
      "learning_rate": 6.8973333421189624e-06,
      "loss": 0.0195,
      "step": 961740
    },
    {
      "epoch": 1.5739413339617578,
      "grad_norm": 0.21834798157215118,
      "learning_rate": 6.897267449905445e-06,
      "loss": 0.0225,
      "step": 961760
    },
    {
      "epoch": 1.573974064400411,
      "grad_norm": 0.12283877283334732,
      "learning_rate": 6.897201557691928e-06,
      "loss": 0.0219,
      "step": 961780
    },
    {
      "epoch": 1.5740067948390646,
      "grad_norm": 0.6461460590362549,
      "learning_rate": 6.897135665478411e-06,
      "loss": 0.0219,
      "step": 961800
    },
    {
      "epoch": 1.5740395252777177,
      "grad_norm": 0.12797793745994568,
      "learning_rate": 6.897069773264894e-06,
      "loss": 0.0166,
      "step": 961820
    },
    {
      "epoch": 1.574072255716371,
      "grad_norm": 0.30688488483428955,
      "learning_rate": 6.897003881051376e-06,
      "loss": 0.0191,
      "step": 961840
    },
    {
      "epoch": 1.5741049861550245,
      "grad_norm": 0.18292608857154846,
      "learning_rate": 6.89693798883786e-06,
      "loss": 0.0186,
      "step": 961860
    },
    {
      "epoch": 1.5741377165936776,
      "grad_norm": 0.8203612565994263,
      "learning_rate": 6.896872096624343e-06,
      "loss": 0.0171,
      "step": 961880
    },
    {
      "epoch": 1.5741704470323312,
      "grad_norm": 0.42006778717041016,
      "learning_rate": 6.896806204410825e-06,
      "loss": 0.0182,
      "step": 961900
    },
    {
      "epoch": 1.5742031774709844,
      "grad_norm": 1.6472339630126953,
      "learning_rate": 6.896740312197309e-06,
      "loss": 0.0202,
      "step": 961920
    },
    {
      "epoch": 1.574235907909638,
      "grad_norm": 0.3042430877685547,
      "learning_rate": 6.896674419983791e-06,
      "loss": 0.0228,
      "step": 961940
    },
    {
      "epoch": 1.5742686383482911,
      "grad_norm": 0.3803430199623108,
      "learning_rate": 6.896608527770274e-06,
      "loss": 0.0166,
      "step": 961960
    },
    {
      "epoch": 1.5743013687869445,
      "grad_norm": 0.9200219511985779,
      "learning_rate": 6.896542635556757e-06,
      "loss": 0.0208,
      "step": 961980
    },
    {
      "epoch": 1.5743340992255979,
      "grad_norm": 1.0598663091659546,
      "learning_rate": 6.89647674334324e-06,
      "loss": 0.0198,
      "step": 962000
    },
    {
      "epoch": 1.574366829664251,
      "grad_norm": 0.25313127040863037,
      "learning_rate": 6.8964108511297225e-06,
      "loss": 0.0218,
      "step": 962020
    },
    {
      "epoch": 1.5743995601029046,
      "grad_norm": 1.6743419170379639,
      "learning_rate": 6.896344958916206e-06,
      "loss": 0.0207,
      "step": 962040
    },
    {
      "epoch": 1.5744322905415578,
      "grad_norm": 0.4281293749809265,
      "learning_rate": 6.896279066702688e-06,
      "loss": 0.0193,
      "step": 962060
    },
    {
      "epoch": 1.5744650209802111,
      "grad_norm": 0.20837529003620148,
      "learning_rate": 6.8962131744891716e-06,
      "loss": 0.0173,
      "step": 962080
    },
    {
      "epoch": 1.5744977514188645,
      "grad_norm": 0.6568429470062256,
      "learning_rate": 6.8961472822756534e-06,
      "loss": 0.0268,
      "step": 962100
    },
    {
      "epoch": 1.5745304818575179,
      "grad_norm": 0.5066269040107727,
      "learning_rate": 6.896081390062137e-06,
      "loss": 0.0217,
      "step": 962120
    },
    {
      "epoch": 1.5745632122961712,
      "grad_norm": 0.5065448880195618,
      "learning_rate": 6.896015497848619e-06,
      "loss": 0.0229,
      "step": 962140
    },
    {
      "epoch": 1.5745959427348244,
      "grad_norm": 0.8488606810569763,
      "learning_rate": 6.8959496056351025e-06,
      "loss": 0.0221,
      "step": 962160
    },
    {
      "epoch": 1.574628673173478,
      "grad_norm": 0.5339030027389526,
      "learning_rate": 6.895883713421585e-06,
      "loss": 0.0286,
      "step": 962180
    },
    {
      "epoch": 1.5746614036121311,
      "grad_norm": 1.3019839525222778,
      "learning_rate": 6.895817821208069e-06,
      "loss": 0.0249,
      "step": 962200
    },
    {
      "epoch": 1.5746941340507845,
      "grad_norm": 0.3120749890804291,
      "learning_rate": 6.895751928994552e-06,
      "loss": 0.0195,
      "step": 962220
    },
    {
      "epoch": 1.5747268644894379,
      "grad_norm": 0.24257399141788483,
      "learning_rate": 6.895686036781034e-06,
      "loss": 0.0166,
      "step": 962240
    },
    {
      "epoch": 1.5747595949280913,
      "grad_norm": 0.5249837040901184,
      "learning_rate": 6.895620144567518e-06,
      "loss": 0.0182,
      "step": 962260
    },
    {
      "epoch": 1.5747923253667446,
      "grad_norm": 0.4270571172237396,
      "learning_rate": 6.895554252354e-06,
      "loss": 0.0161,
      "step": 962280
    },
    {
      "epoch": 1.5748250558053978,
      "grad_norm": 0.14921841025352478,
      "learning_rate": 6.895488360140483e-06,
      "loss": 0.0219,
      "step": 962300
    },
    {
      "epoch": 1.5748577862440514,
      "grad_norm": 0.2482476532459259,
      "learning_rate": 6.895422467926965e-06,
      "loss": 0.0215,
      "step": 962320
    },
    {
      "epoch": 1.5748905166827045,
      "grad_norm": 0.40096360445022583,
      "learning_rate": 6.895356575713449e-06,
      "loss": 0.0118,
      "step": 962340
    },
    {
      "epoch": 1.574923247121358,
      "grad_norm": 1.297472357749939,
      "learning_rate": 6.895290683499931e-06,
      "loss": 0.0225,
      "step": 962360
    },
    {
      "epoch": 1.5749559775600113,
      "grad_norm": 0.5731688141822815,
      "learning_rate": 6.895224791286414e-06,
      "loss": 0.0182,
      "step": 962380
    },
    {
      "epoch": 1.5749887079986646,
      "grad_norm": 0.1688227653503418,
      "learning_rate": 6.895158899072897e-06,
      "loss": 0.0134,
      "step": 962400
    },
    {
      "epoch": 1.575021438437318,
      "grad_norm": 0.36401355266571045,
      "learning_rate": 6.89509300685938e-06,
      "loss": 0.0269,
      "step": 962420
    },
    {
      "epoch": 1.5750541688759712,
      "grad_norm": 0.6518804430961609,
      "learning_rate": 6.8950271146458626e-06,
      "loss": 0.0155,
      "step": 962440
    },
    {
      "epoch": 1.5750868993146248,
      "grad_norm": 0.19573041796684265,
      "learning_rate": 6.894961222432346e-06,
      "loss": 0.0224,
      "step": 962460
    },
    {
      "epoch": 1.575119629753278,
      "grad_norm": 0.515969455242157,
      "learning_rate": 6.894895330218828e-06,
      "loss": 0.0181,
      "step": 962480
    },
    {
      "epoch": 1.5751523601919313,
      "grad_norm": 0.6902418732643127,
      "learning_rate": 6.894829438005312e-06,
      "loss": 0.0211,
      "step": 962500
    },
    {
      "epoch": 1.5751850906305847,
      "grad_norm": 0.43274226784706116,
      "learning_rate": 6.8947635457917935e-06,
      "loss": 0.0278,
      "step": 962520
    },
    {
      "epoch": 1.575217821069238,
      "grad_norm": 0.5395789742469788,
      "learning_rate": 6.894697653578277e-06,
      "loss": 0.0261,
      "step": 962540
    },
    {
      "epoch": 1.5752505515078914,
      "grad_norm": 0.2919846475124359,
      "learning_rate": 6.894631761364761e-06,
      "loss": 0.0184,
      "step": 962560
    },
    {
      "epoch": 1.5752832819465445,
      "grad_norm": 0.2904972732067108,
      "learning_rate": 6.894565869151243e-06,
      "loss": 0.0174,
      "step": 962580
    },
    {
      "epoch": 1.5753160123851981,
      "grad_norm": 0.7109282612800598,
      "learning_rate": 6.894499976937726e-06,
      "loss": 0.0208,
      "step": 962600
    },
    {
      "epoch": 1.5753487428238513,
      "grad_norm": 0.20790231227874756,
      "learning_rate": 6.894434084724209e-06,
      "loss": 0.025,
      "step": 962620
    },
    {
      "epoch": 1.5753814732625047,
      "grad_norm": 0.2995418310165405,
      "learning_rate": 6.894368192510692e-06,
      "loss": 0.0155,
      "step": 962640
    },
    {
      "epoch": 1.575414203701158,
      "grad_norm": 0.9245145320892334,
      "learning_rate": 6.894302300297174e-06,
      "loss": 0.0253,
      "step": 962660
    },
    {
      "epoch": 1.5754469341398112,
      "grad_norm": 0.16300879418849945,
      "learning_rate": 6.894236408083658e-06,
      "loss": 0.0192,
      "step": 962680
    },
    {
      "epoch": 1.5754796645784648,
      "grad_norm": 0.6766977906227112,
      "learning_rate": 6.89417051587014e-06,
      "loss": 0.0231,
      "step": 962700
    },
    {
      "epoch": 1.575512395017118,
      "grad_norm": 0.3063630759716034,
      "learning_rate": 6.8941046236566235e-06,
      "loss": 0.0199,
      "step": 962720
    },
    {
      "epoch": 1.5755451254557715,
      "grad_norm": 0.538261353969574,
      "learning_rate": 6.894038731443105e-06,
      "loss": 0.0132,
      "step": 962740
    },
    {
      "epoch": 1.5755778558944247,
      "grad_norm": 0.4840269088745117,
      "learning_rate": 6.893972839229589e-06,
      "loss": 0.0174,
      "step": 962760
    },
    {
      "epoch": 1.575610586333078,
      "grad_norm": 0.288971871137619,
      "learning_rate": 6.893906947016072e-06,
      "loss": 0.0181,
      "step": 962780
    },
    {
      "epoch": 1.5756433167717314,
      "grad_norm": 0.8062926530838013,
      "learning_rate": 6.893841054802554e-06,
      "loss": 0.0165,
      "step": 962800
    },
    {
      "epoch": 1.5756760472103846,
      "grad_norm": 1.3738654851913452,
      "learning_rate": 6.893775162589037e-06,
      "loss": 0.0164,
      "step": 962820
    },
    {
      "epoch": 1.5757087776490382,
      "grad_norm": 0.5918480157852173,
      "learning_rate": 6.893709270375521e-06,
      "loss": 0.024,
      "step": 962840
    },
    {
      "epoch": 1.5757415080876913,
      "grad_norm": 0.28683769702911377,
      "learning_rate": 6.893643378162003e-06,
      "loss": 0.0139,
      "step": 962860
    },
    {
      "epoch": 1.5757742385263447,
      "grad_norm": 0.20406869053840637,
      "learning_rate": 6.893577485948486e-06,
      "loss": 0.0203,
      "step": 962880
    },
    {
      "epoch": 1.575806968964998,
      "grad_norm": 0.22760039567947388,
      "learning_rate": 6.893511593734968e-06,
      "loss": 0.0128,
      "step": 962900
    },
    {
      "epoch": 1.5758396994036514,
      "grad_norm": 0.49813342094421387,
      "learning_rate": 6.893445701521452e-06,
      "loss": 0.0239,
      "step": 962920
    },
    {
      "epoch": 1.5758724298423048,
      "grad_norm": 0.24398453533649445,
      "learning_rate": 6.893379809307935e-06,
      "loss": 0.0232,
      "step": 962940
    },
    {
      "epoch": 1.575905160280958,
      "grad_norm": 0.638252854347229,
      "learning_rate": 6.893313917094417e-06,
      "loss": 0.0164,
      "step": 962960
    },
    {
      "epoch": 1.5759378907196115,
      "grad_norm": 0.5300801992416382,
      "learning_rate": 6.893248024880901e-06,
      "loss": 0.0215,
      "step": 962980
    },
    {
      "epoch": 1.5759706211582647,
      "grad_norm": 0.7033820152282715,
      "learning_rate": 6.8931821326673835e-06,
      "loss": 0.0177,
      "step": 963000
    },
    {
      "epoch": 1.576003351596918,
      "grad_norm": 0.20049288868904114,
      "learning_rate": 6.893116240453866e-06,
      "loss": 0.0158,
      "step": 963020
    },
    {
      "epoch": 1.5760360820355714,
      "grad_norm": 0.6313176155090332,
      "learning_rate": 6.893050348240349e-06,
      "loss": 0.0187,
      "step": 963040
    },
    {
      "epoch": 1.5760688124742248,
      "grad_norm": 0.20521153509616852,
      "learning_rate": 6.892984456026833e-06,
      "loss": 0.0214,
      "step": 963060
    },
    {
      "epoch": 1.5761015429128782,
      "grad_norm": 0.47210514545440674,
      "learning_rate": 6.8929185638133145e-06,
      "loss": 0.0157,
      "step": 963080
    },
    {
      "epoch": 1.5761342733515313,
      "grad_norm": 1.0335674285888672,
      "learning_rate": 6.892852671599798e-06,
      "loss": 0.0167,
      "step": 963100
    },
    {
      "epoch": 1.576167003790185,
      "grad_norm": 0.40712419152259827,
      "learning_rate": 6.89278677938628e-06,
      "loss": 0.0199,
      "step": 963120
    },
    {
      "epoch": 1.576199734228838,
      "grad_norm": 0.5601072907447815,
      "learning_rate": 6.8927208871727635e-06,
      "loss": 0.0223,
      "step": 963140
    },
    {
      "epoch": 1.5762324646674915,
      "grad_norm": 0.5401032567024231,
      "learning_rate": 6.8926549949592454e-06,
      "loss": 0.018,
      "step": 963160
    },
    {
      "epoch": 1.5762651951061448,
      "grad_norm": 0.4281475245952606,
      "learning_rate": 6.892589102745729e-06,
      "loss": 0.0224,
      "step": 963180
    },
    {
      "epoch": 1.5762979255447982,
      "grad_norm": 0.6820855736732483,
      "learning_rate": 6.892523210532212e-06,
      "loss": 0.0224,
      "step": 963200
    },
    {
      "epoch": 1.5763306559834516,
      "grad_norm": 0.5187458992004395,
      "learning_rate": 6.8924573183186945e-06,
      "loss": 0.0193,
      "step": 963220
    },
    {
      "epoch": 1.5763633864221047,
      "grad_norm": 0.5525606870651245,
      "learning_rate": 6.892391426105177e-06,
      "loss": 0.0112,
      "step": 963240
    },
    {
      "epoch": 1.5763961168607583,
      "grad_norm": 0.37376904487609863,
      "learning_rate": 6.892325533891661e-06,
      "loss": 0.0183,
      "step": 963260
    },
    {
      "epoch": 1.5764288472994115,
      "grad_norm": 0.3490876853466034,
      "learning_rate": 6.892259641678144e-06,
      "loss": 0.022,
      "step": 963280
    },
    {
      "epoch": 1.5764615777380648,
      "grad_norm": 0.4163309335708618,
      "learning_rate": 6.892193749464626e-06,
      "loss": 0.0241,
      "step": 963300
    },
    {
      "epoch": 1.5764943081767182,
      "grad_norm": 1.212281346321106,
      "learning_rate": 6.89212785725111e-06,
      "loss": 0.0217,
      "step": 963320
    },
    {
      "epoch": 1.5765270386153716,
      "grad_norm": 1.261392593383789,
      "learning_rate": 6.892061965037592e-06,
      "loss": 0.0163,
      "step": 963340
    },
    {
      "epoch": 1.576559769054025,
      "grad_norm": 0.34460729360580444,
      "learning_rate": 6.891996072824075e-06,
      "loss": 0.0185,
      "step": 963360
    },
    {
      "epoch": 1.576592499492678,
      "grad_norm": 0.19636471569538116,
      "learning_rate": 6.891930180610557e-06,
      "loss": 0.0304,
      "step": 963380
    },
    {
      "epoch": 1.5766252299313317,
      "grad_norm": 0.8548028469085693,
      "learning_rate": 6.891864288397041e-06,
      "loss": 0.0178,
      "step": 963400
    },
    {
      "epoch": 1.5766579603699848,
      "grad_norm": 1.1356446743011475,
      "learning_rate": 6.891798396183524e-06,
      "loss": 0.018,
      "step": 963420
    },
    {
      "epoch": 1.5766906908086382,
      "grad_norm": 0.36125364899635315,
      "learning_rate": 6.891732503970006e-06,
      "loss": 0.0153,
      "step": 963440
    },
    {
      "epoch": 1.5767234212472916,
      "grad_norm": 0.38011765480041504,
      "learning_rate": 6.891666611756489e-06,
      "loss": 0.0195,
      "step": 963460
    },
    {
      "epoch": 1.5767561516859447,
      "grad_norm": 0.6824076175689697,
      "learning_rate": 6.891600719542973e-06,
      "loss": 0.0243,
      "step": 963480
    },
    {
      "epoch": 1.5767888821245983,
      "grad_norm": 0.7354531288146973,
      "learning_rate": 6.8915348273294545e-06,
      "loss": 0.026,
      "step": 963500
    },
    {
      "epoch": 1.5768216125632515,
      "grad_norm": 0.599083662033081,
      "learning_rate": 6.891468935115938e-06,
      "loss": 0.0291,
      "step": 963520
    },
    {
      "epoch": 1.5768543430019049,
      "grad_norm": 0.23042982816696167,
      "learning_rate": 6.89140304290242e-06,
      "loss": 0.0162,
      "step": 963540
    },
    {
      "epoch": 1.5768870734405582,
      "grad_norm": 0.9260186553001404,
      "learning_rate": 6.891337150688904e-06,
      "loss": 0.024,
      "step": 963560
    },
    {
      "epoch": 1.5769198038792116,
      "grad_norm": 0.8676055669784546,
      "learning_rate": 6.891271258475386e-06,
      "loss": 0.021,
      "step": 963580
    },
    {
      "epoch": 1.576952534317865,
      "grad_norm": 0.5060045123100281,
      "learning_rate": 6.891205366261869e-06,
      "loss": 0.0197,
      "step": 963600
    },
    {
      "epoch": 1.5769852647565181,
      "grad_norm": 0.2735699415206909,
      "learning_rate": 6.891139474048353e-06,
      "loss": 0.0164,
      "step": 963620
    },
    {
      "epoch": 1.5770179951951717,
      "grad_norm": 1.1816428899765015,
      "learning_rate": 6.891073581834835e-06,
      "loss": 0.021,
      "step": 963640
    },
    {
      "epoch": 1.5770507256338249,
      "grad_norm": 0.18497596681118011,
      "learning_rate": 6.891007689621318e-06,
      "loss": 0.0171,
      "step": 963660
    },
    {
      "epoch": 1.5770834560724782,
      "grad_norm": 0.4130638837814331,
      "learning_rate": 6.890941797407801e-06,
      "loss": 0.028,
      "step": 963680
    },
    {
      "epoch": 1.5771161865111316,
      "grad_norm": 0.5190057754516602,
      "learning_rate": 6.8908759051942845e-06,
      "loss": 0.0255,
      "step": 963700
    },
    {
      "epoch": 1.577148916949785,
      "grad_norm": 0.12263943254947662,
      "learning_rate": 6.890810012980766e-06,
      "loss": 0.0281,
      "step": 963720
    },
    {
      "epoch": 1.5771816473884384,
      "grad_norm": 1.466745376586914,
      "learning_rate": 6.89074412076725e-06,
      "loss": 0.0205,
      "step": 963740
    },
    {
      "epoch": 1.5772143778270915,
      "grad_norm": 0.85613614320755,
      "learning_rate": 6.890678228553732e-06,
      "loss": 0.0241,
      "step": 963760
    },
    {
      "epoch": 1.577247108265745,
      "grad_norm": 1.103551983833313,
      "learning_rate": 6.8906123363402154e-06,
      "loss": 0.0136,
      "step": 963780
    },
    {
      "epoch": 1.5772798387043983,
      "grad_norm": 1.1593519449234009,
      "learning_rate": 6.890546444126698e-06,
      "loss": 0.0301,
      "step": 963800
    },
    {
      "epoch": 1.5773125691430516,
      "grad_norm": 0.39618566632270813,
      "learning_rate": 6.890480551913181e-06,
      "loss": 0.0136,
      "step": 963820
    },
    {
      "epoch": 1.577345299581705,
      "grad_norm": 0.7305014729499817,
      "learning_rate": 6.890414659699664e-06,
      "loss": 0.0107,
      "step": 963840
    },
    {
      "epoch": 1.5773780300203584,
      "grad_norm": 0.12638291716575623,
      "learning_rate": 6.890348767486147e-06,
      "loss": 0.0245,
      "step": 963860
    },
    {
      "epoch": 1.5774107604590117,
      "grad_norm": 0.1630849391222,
      "learning_rate": 6.890282875272629e-06,
      "loss": 0.0282,
      "step": 963880
    },
    {
      "epoch": 1.577443490897665,
      "grad_norm": 0.7713725566864014,
      "learning_rate": 6.890216983059113e-06,
      "loss": 0.0228,
      "step": 963900
    },
    {
      "epoch": 1.5774762213363185,
      "grad_norm": 0.7136065363883972,
      "learning_rate": 6.890151090845595e-06,
      "loss": 0.0175,
      "step": 963920
    },
    {
      "epoch": 1.5775089517749716,
      "grad_norm": 0.938510000705719,
      "learning_rate": 6.890085198632078e-06,
      "loss": 0.0253,
      "step": 963940
    },
    {
      "epoch": 1.577541682213625,
      "grad_norm": 0.19958050549030304,
      "learning_rate": 6.890019306418561e-06,
      "loss": 0.0309,
      "step": 963960
    },
    {
      "epoch": 1.5775744126522784,
      "grad_norm": 2.3158721923828125,
      "learning_rate": 6.889953414205044e-06,
      "loss": 0.0214,
      "step": 963980
    },
    {
      "epoch": 1.5776071430909318,
      "grad_norm": 0.1990080624818802,
      "learning_rate": 6.889887521991527e-06,
      "loss": 0.0251,
      "step": 964000
    },
    {
      "epoch": 1.5776398735295851,
      "grad_norm": 0.4148385226726532,
      "learning_rate": 6.88982162977801e-06,
      "loss": 0.0314,
      "step": 964020
    },
    {
      "epoch": 1.5776726039682383,
      "grad_norm": 0.9983958601951599,
      "learning_rate": 6.889755737564493e-06,
      "loss": 0.0211,
      "step": 964040
    },
    {
      "epoch": 1.5777053344068919,
      "grad_norm": 0.21869362890720367,
      "learning_rate": 6.8896898453509755e-06,
      "loss": 0.0168,
      "step": 964060
    },
    {
      "epoch": 1.577738064845545,
      "grad_norm": 0.7996470928192139,
      "learning_rate": 6.889623953137459e-06,
      "loss": 0.0175,
      "step": 964080
    },
    {
      "epoch": 1.5777707952841984,
      "grad_norm": 0.20382113754749298,
      "learning_rate": 6.889558060923941e-06,
      "loss": 0.0205,
      "step": 964100
    },
    {
      "epoch": 1.5778035257228518,
      "grad_norm": 0.20784346759319305,
      "learning_rate": 6.8894921687104246e-06,
      "loss": 0.0204,
      "step": 964120
    },
    {
      "epoch": 1.577836256161505,
      "grad_norm": 2.2117226123809814,
      "learning_rate": 6.8894262764969065e-06,
      "loss": 0.0256,
      "step": 964140
    },
    {
      "epoch": 1.5778689866001585,
      "grad_norm": 0.8164634704589844,
      "learning_rate": 6.88936038428339e-06,
      "loss": 0.0272,
      "step": 964160
    },
    {
      "epoch": 1.5779017170388117,
      "grad_norm": 0.3309905529022217,
      "learning_rate": 6.889294492069872e-06,
      "loss": 0.0146,
      "step": 964180
    },
    {
      "epoch": 1.5779344474774653,
      "grad_norm": 0.1648300290107727,
      "learning_rate": 6.8892285998563555e-06,
      "loss": 0.0148,
      "step": 964200
    },
    {
      "epoch": 1.5779671779161184,
      "grad_norm": 0.797174334526062,
      "learning_rate": 6.889162707642838e-06,
      "loss": 0.0194,
      "step": 964220
    },
    {
      "epoch": 1.5779999083547718,
      "grad_norm": 0.48767349123954773,
      "learning_rate": 6.889096815429321e-06,
      "loss": 0.0188,
      "step": 964240
    },
    {
      "epoch": 1.5780326387934251,
      "grad_norm": 0.14304347336292267,
      "learning_rate": 6.889030923215804e-06,
      "loss": 0.0174,
      "step": 964260
    },
    {
      "epoch": 1.5780653692320783,
      "grad_norm": 0.3176875710487366,
      "learning_rate": 6.888965031002287e-06,
      "loss": 0.0162,
      "step": 964280
    },
    {
      "epoch": 1.578098099670732,
      "grad_norm": 0.8441792130470276,
      "learning_rate": 6.888899138788769e-06,
      "loss": 0.0234,
      "step": 964300
    },
    {
      "epoch": 1.578130830109385,
      "grad_norm": 0.595311164855957,
      "learning_rate": 6.888833246575253e-06,
      "loss": 0.0186,
      "step": 964320
    },
    {
      "epoch": 1.5781635605480384,
      "grad_norm": 0.7090269327163696,
      "learning_rate": 6.888767354361736e-06,
      "loss": 0.0211,
      "step": 964340
    },
    {
      "epoch": 1.5781962909866918,
      "grad_norm": 1.0087790489196777,
      "learning_rate": 6.888701462148218e-06,
      "loss": 0.017,
      "step": 964360
    },
    {
      "epoch": 1.5782290214253452,
      "grad_norm": 0.9240311980247498,
      "learning_rate": 6.888635569934702e-06,
      "loss": 0.0157,
      "step": 964380
    },
    {
      "epoch": 1.5782617518639985,
      "grad_norm": 0.3493904173374176,
      "learning_rate": 6.888569677721184e-06,
      "loss": 0.014,
      "step": 964400
    },
    {
      "epoch": 1.5782944823026517,
      "grad_norm": 0.6332390904426575,
      "learning_rate": 6.888503785507667e-06,
      "loss": 0.0269,
      "step": 964420
    },
    {
      "epoch": 1.5783272127413053,
      "grad_norm": 0.45857539772987366,
      "learning_rate": 6.88843789329415e-06,
      "loss": 0.0233,
      "step": 964440
    },
    {
      "epoch": 1.5783599431799584,
      "grad_norm": 0.34910550713539124,
      "learning_rate": 6.888372001080633e-06,
      "loss": 0.0223,
      "step": 964460
    },
    {
      "epoch": 1.5783926736186118,
      "grad_norm": 1.209507703781128,
      "learning_rate": 6.8883061088671156e-06,
      "loss": 0.0299,
      "step": 964480
    },
    {
      "epoch": 1.5784254040572652,
      "grad_norm": 0.43160393834114075,
      "learning_rate": 6.888240216653599e-06,
      "loss": 0.0201,
      "step": 964500
    },
    {
      "epoch": 1.5784581344959185,
      "grad_norm": 1.0803792476654053,
      "learning_rate": 6.888174324440081e-06,
      "loss": 0.0248,
      "step": 964520
    },
    {
      "epoch": 1.578490864934572,
      "grad_norm": 0.8067718744277954,
      "learning_rate": 6.888108432226565e-06,
      "loss": 0.02,
      "step": 964540
    },
    {
      "epoch": 1.578523595373225,
      "grad_norm": 0.3669899106025696,
      "learning_rate": 6.8880425400130465e-06,
      "loss": 0.0202,
      "step": 964560
    },
    {
      "epoch": 1.5785563258118787,
      "grad_norm": 0.596659243106842,
      "learning_rate": 6.88797664779953e-06,
      "loss": 0.0223,
      "step": 964580
    },
    {
      "epoch": 1.5785890562505318,
      "grad_norm": 0.6650668978691101,
      "learning_rate": 6.887910755586013e-06,
      "loss": 0.019,
      "step": 964600
    },
    {
      "epoch": 1.5786217866891852,
      "grad_norm": 0.42544087767601013,
      "learning_rate": 6.887844863372496e-06,
      "loss": 0.0186,
      "step": 964620
    },
    {
      "epoch": 1.5786545171278386,
      "grad_norm": 0.33380213379859924,
      "learning_rate": 6.887778971158978e-06,
      "loss": 0.0159,
      "step": 964640
    },
    {
      "epoch": 1.578687247566492,
      "grad_norm": 0.24172531068325043,
      "learning_rate": 6.887713078945462e-06,
      "loss": 0.0137,
      "step": 964660
    },
    {
      "epoch": 1.5787199780051453,
      "grad_norm": 0.7409730553627014,
      "learning_rate": 6.887647186731945e-06,
      "loss": 0.0202,
      "step": 964680
    },
    {
      "epoch": 1.5787527084437984,
      "grad_norm": 0.6390595436096191,
      "learning_rate": 6.887581294518427e-06,
      "loss": 0.0303,
      "step": 964700
    },
    {
      "epoch": 1.578785438882452,
      "grad_norm": 0.4992314577102661,
      "learning_rate": 6.887515402304911e-06,
      "loss": 0.01,
      "step": 964720
    },
    {
      "epoch": 1.5788181693211052,
      "grad_norm": 1.0004411935806274,
      "learning_rate": 6.887449510091393e-06,
      "loss": 0.0286,
      "step": 964740
    },
    {
      "epoch": 1.5788508997597586,
      "grad_norm": 0.39829787611961365,
      "learning_rate": 6.8873836178778765e-06,
      "loss": 0.0177,
      "step": 964760
    },
    {
      "epoch": 1.578883630198412,
      "grad_norm": 0.12080417573451996,
      "learning_rate": 6.887317725664358e-06,
      "loss": 0.0184,
      "step": 964780
    },
    {
      "epoch": 1.5789163606370653,
      "grad_norm": 0.5037205219268799,
      "learning_rate": 6.887251833450842e-06,
      "loss": 0.0236,
      "step": 964800
    },
    {
      "epoch": 1.5789490910757187,
      "grad_norm": 0.36470678448677063,
      "learning_rate": 6.887185941237325e-06,
      "loss": 0.0175,
      "step": 964820
    },
    {
      "epoch": 1.5789818215143718,
      "grad_norm": 0.29043689370155334,
      "learning_rate": 6.8871200490238074e-06,
      "loss": 0.0312,
      "step": 964840
    },
    {
      "epoch": 1.5790145519530254,
      "grad_norm": 0.2708469033241272,
      "learning_rate": 6.88705415681029e-06,
      "loss": 0.0193,
      "step": 964860
    },
    {
      "epoch": 1.5790472823916786,
      "grad_norm": 0.5652458071708679,
      "learning_rate": 6.886988264596774e-06,
      "loss": 0.0255,
      "step": 964880
    },
    {
      "epoch": 1.579080012830332,
      "grad_norm": 0.3077433407306671,
      "learning_rate": 6.886922372383256e-06,
      "loss": 0.025,
      "step": 964900
    },
    {
      "epoch": 1.5791127432689853,
      "grad_norm": 1.2759300470352173,
      "learning_rate": 6.886856480169739e-06,
      "loss": 0.0175,
      "step": 964920
    },
    {
      "epoch": 1.5791454737076385,
      "grad_norm": 0.5020841360092163,
      "learning_rate": 6.886790587956221e-06,
      "loss": 0.0244,
      "step": 964940
    },
    {
      "epoch": 1.579178204146292,
      "grad_norm": 0.8284783363342285,
      "learning_rate": 6.886724695742705e-06,
      "loss": 0.0197,
      "step": 964960
    },
    {
      "epoch": 1.5792109345849452,
      "grad_norm": 0.28292208909988403,
      "learning_rate": 6.8866588035291875e-06,
      "loss": 0.0159,
      "step": 964980
    },
    {
      "epoch": 1.5792436650235988,
      "grad_norm": 0.8431704044342041,
      "learning_rate": 6.88659291131567e-06,
      "loss": 0.0216,
      "step": 965000
    },
    {
      "epoch": 1.579276395462252,
      "grad_norm": 0.5377498269081116,
      "learning_rate": 6.886527019102153e-06,
      "loss": 0.0201,
      "step": 965020
    },
    {
      "epoch": 1.5793091259009053,
      "grad_norm": 0.48675310611724854,
      "learning_rate": 6.8864611268886365e-06,
      "loss": 0.0241,
      "step": 965040
    },
    {
      "epoch": 1.5793418563395587,
      "grad_norm": 0.5172522664070129,
      "learning_rate": 6.886395234675119e-06,
      "loss": 0.0198,
      "step": 965060
    },
    {
      "epoch": 1.5793745867782119,
      "grad_norm": 0.12785564363002777,
      "learning_rate": 6.886329342461602e-06,
      "loss": 0.0144,
      "step": 965080
    },
    {
      "epoch": 1.5794073172168654,
      "grad_norm": 1.184326410293579,
      "learning_rate": 6.886263450248086e-06,
      "loss": 0.0179,
      "step": 965100
    },
    {
      "epoch": 1.5794400476555186,
      "grad_norm": 0.8570908308029175,
      "learning_rate": 6.8861975580345675e-06,
      "loss": 0.0226,
      "step": 965120
    },
    {
      "epoch": 1.579472778094172,
      "grad_norm": 0.07997015863656998,
      "learning_rate": 6.886131665821051e-06,
      "loss": 0.0155,
      "step": 965140
    },
    {
      "epoch": 1.5795055085328253,
      "grad_norm": 0.3518718481063843,
      "learning_rate": 6.886065773607533e-06,
      "loss": 0.0145,
      "step": 965160
    },
    {
      "epoch": 1.5795382389714787,
      "grad_norm": 1.147053837776184,
      "learning_rate": 6.8859998813940165e-06,
      "loss": 0.0301,
      "step": 965180
    },
    {
      "epoch": 1.579570969410132,
      "grad_norm": 0.5009989142417908,
      "learning_rate": 6.8859339891804984e-06,
      "loss": 0.02,
      "step": 965200
    },
    {
      "epoch": 1.5796036998487852,
      "grad_norm": 0.7817405462265015,
      "learning_rate": 6.885868096966982e-06,
      "loss": 0.0227,
      "step": 965220
    },
    {
      "epoch": 1.5796364302874388,
      "grad_norm": 0.2499692291021347,
      "learning_rate": 6.885802204753465e-06,
      "loss": 0.0196,
      "step": 965240
    },
    {
      "epoch": 1.579669160726092,
      "grad_norm": 0.6231915354728699,
      "learning_rate": 6.8857363125399475e-06,
      "loss": 0.0153,
      "step": 965260
    },
    {
      "epoch": 1.5797018911647454,
      "grad_norm": 2.5290284156799316,
      "learning_rate": 6.88567042032643e-06,
      "loss": 0.0201,
      "step": 965280
    },
    {
      "epoch": 1.5797346216033987,
      "grad_norm": 0.5624614953994751,
      "learning_rate": 6.885604528112914e-06,
      "loss": 0.018,
      "step": 965300
    },
    {
      "epoch": 1.579767352042052,
      "grad_norm": 0.5151152014732361,
      "learning_rate": 6.885538635899396e-06,
      "loss": 0.0232,
      "step": 965320
    },
    {
      "epoch": 1.5798000824807055,
      "grad_norm": 0.46086540818214417,
      "learning_rate": 6.885472743685879e-06,
      "loss": 0.0177,
      "step": 965340
    },
    {
      "epoch": 1.5798328129193586,
      "grad_norm": 0.20309561491012573,
      "learning_rate": 6.885406851472361e-06,
      "loss": 0.0167,
      "step": 965360
    },
    {
      "epoch": 1.5798655433580122,
      "grad_norm": 0.5624170899391174,
      "learning_rate": 6.885340959258845e-06,
      "loss": 0.0163,
      "step": 965380
    },
    {
      "epoch": 1.5798982737966654,
      "grad_norm": 0.22996364533901215,
      "learning_rate": 6.885275067045328e-06,
      "loss": 0.0171,
      "step": 965400
    },
    {
      "epoch": 1.5799310042353187,
      "grad_norm": 0.13621407747268677,
      "learning_rate": 6.88520917483181e-06,
      "loss": 0.0142,
      "step": 965420
    },
    {
      "epoch": 1.579963734673972,
      "grad_norm": 0.4036048948764801,
      "learning_rate": 6.885143282618294e-06,
      "loss": 0.0181,
      "step": 965440
    },
    {
      "epoch": 1.5799964651126255,
      "grad_norm": 0.5696567296981812,
      "learning_rate": 6.885077390404777e-06,
      "loss": 0.0271,
      "step": 965460
    },
    {
      "epoch": 1.5800291955512789,
      "grad_norm": 0.9352465271949768,
      "learning_rate": 6.885011498191259e-06,
      "loss": 0.0174,
      "step": 965480
    },
    {
      "epoch": 1.580061925989932,
      "grad_norm": 0.5900132656097412,
      "learning_rate": 6.884945605977742e-06,
      "loss": 0.0224,
      "step": 965500
    },
    {
      "epoch": 1.5800946564285856,
      "grad_norm": 0.7621725797653198,
      "learning_rate": 6.884879713764226e-06,
      "loss": 0.0195,
      "step": 965520
    },
    {
      "epoch": 1.5801273868672387,
      "grad_norm": 0.3671617805957794,
      "learning_rate": 6.8848138215507076e-06,
      "loss": 0.0214,
      "step": 965540
    },
    {
      "epoch": 1.5801601173058921,
      "grad_norm": 0.4362688362598419,
      "learning_rate": 6.884747929337191e-06,
      "loss": 0.0173,
      "step": 965560
    },
    {
      "epoch": 1.5801928477445455,
      "grad_norm": 0.5566855669021606,
      "learning_rate": 6.884682037123673e-06,
      "loss": 0.021,
      "step": 965580
    },
    {
      "epoch": 1.5802255781831989,
      "grad_norm": 0.4236622750759125,
      "learning_rate": 6.884616144910157e-06,
      "loss": 0.0213,
      "step": 965600
    },
    {
      "epoch": 1.5802583086218522,
      "grad_norm": 0.31746816635131836,
      "learning_rate": 6.884550252696639e-06,
      "loss": 0.0207,
      "step": 965620
    },
    {
      "epoch": 1.5802910390605054,
      "grad_norm": 1.0335204601287842,
      "learning_rate": 6.884484360483122e-06,
      "loss": 0.019,
      "step": 965640
    },
    {
      "epoch": 1.580323769499159,
      "grad_norm": 0.9862407445907593,
      "learning_rate": 6.884418468269605e-06,
      "loss": 0.0289,
      "step": 965660
    },
    {
      "epoch": 1.5803564999378121,
      "grad_norm": 2.9112179279327393,
      "learning_rate": 6.8843525760560884e-06,
      "loss": 0.0185,
      "step": 965680
    },
    {
      "epoch": 1.5803892303764655,
      "grad_norm": 0.16234242916107178,
      "learning_rate": 6.88428668384257e-06,
      "loss": 0.0196,
      "step": 965700
    },
    {
      "epoch": 1.5804219608151189,
      "grad_norm": 1.0093435049057007,
      "learning_rate": 6.884220791629054e-06,
      "loss": 0.0134,
      "step": 965720
    },
    {
      "epoch": 1.580454691253772,
      "grad_norm": 0.10123316198587418,
      "learning_rate": 6.8841548994155375e-06,
      "loss": 0.0184,
      "step": 965740
    },
    {
      "epoch": 1.5804874216924256,
      "grad_norm": 0.9578190445899963,
      "learning_rate": 6.884089007202019e-06,
      "loss": 0.0166,
      "step": 965760
    },
    {
      "epoch": 1.5805201521310788,
      "grad_norm": 0.7858930826187134,
      "learning_rate": 6.884023114988503e-06,
      "loss": 0.0243,
      "step": 965780
    },
    {
      "epoch": 1.5805528825697324,
      "grad_norm": 0.06915285438299179,
      "learning_rate": 6.883957222774985e-06,
      "loss": 0.0235,
      "step": 965800
    },
    {
      "epoch": 1.5805856130083855,
      "grad_norm": 1.0520448684692383,
      "learning_rate": 6.8838913305614685e-06,
      "loss": 0.0234,
      "step": 965820
    },
    {
      "epoch": 1.5806183434470389,
      "grad_norm": 0.18323618173599243,
      "learning_rate": 6.883825438347951e-06,
      "loss": 0.0137,
      "step": 965840
    },
    {
      "epoch": 1.5806510738856923,
      "grad_norm": 0.3001302182674408,
      "learning_rate": 6.883759546134434e-06,
      "loss": 0.0133,
      "step": 965860
    },
    {
      "epoch": 1.5806838043243454,
      "grad_norm": 0.5982248187065125,
      "learning_rate": 6.883693653920917e-06,
      "loss": 0.018,
      "step": 965880
    },
    {
      "epoch": 1.580716534762999,
      "grad_norm": 0.17490147054195404,
      "learning_rate": 6.8836277617074e-06,
      "loss": 0.0147,
      "step": 965900
    },
    {
      "epoch": 1.5807492652016522,
      "grad_norm": 0.5229796767234802,
      "learning_rate": 6.883561869493882e-06,
      "loss": 0.0229,
      "step": 965920
    },
    {
      "epoch": 1.5807819956403055,
      "grad_norm": 1.6321778297424316,
      "learning_rate": 6.883495977280366e-06,
      "loss": 0.023,
      "step": 965940
    },
    {
      "epoch": 1.580814726078959,
      "grad_norm": 0.4532260000705719,
      "learning_rate": 6.883430085066848e-06,
      "loss": 0.022,
      "step": 965960
    },
    {
      "epoch": 1.5808474565176123,
      "grad_norm": 0.5967881679534912,
      "learning_rate": 6.883364192853331e-06,
      "loss": 0.0235,
      "step": 965980
    },
    {
      "epoch": 1.5808801869562656,
      "grad_norm": 1.6766315698623657,
      "learning_rate": 6.883298300639813e-06,
      "loss": 0.0188,
      "step": 966000
    },
    {
      "epoch": 1.5809129173949188,
      "grad_norm": 0.313236266374588,
      "learning_rate": 6.883232408426297e-06,
      "loss": 0.0237,
      "step": 966020
    },
    {
      "epoch": 1.5809456478335724,
      "grad_norm": 0.286295086145401,
      "learning_rate": 6.8831665162127794e-06,
      "loss": 0.0186,
      "step": 966040
    },
    {
      "epoch": 1.5809783782722255,
      "grad_norm": 0.48593568801879883,
      "learning_rate": 6.883100623999263e-06,
      "loss": 0.0165,
      "step": 966060
    },
    {
      "epoch": 1.581011108710879,
      "grad_norm": 0.3383893072605133,
      "learning_rate": 6.883034731785746e-06,
      "loss": 0.0148,
      "step": 966080
    },
    {
      "epoch": 1.5810438391495323,
      "grad_norm": 0.4261395335197449,
      "learning_rate": 6.8829688395722285e-06,
      "loss": 0.0251,
      "step": 966100
    },
    {
      "epoch": 1.5810765695881857,
      "grad_norm": 0.39544546604156494,
      "learning_rate": 6.882902947358712e-06,
      "loss": 0.0197,
      "step": 966120
    },
    {
      "epoch": 1.581109300026839,
      "grad_norm": 1.478183388710022,
      "learning_rate": 6.882837055145194e-06,
      "loss": 0.0167,
      "step": 966140
    },
    {
      "epoch": 1.5811420304654922,
      "grad_norm": 0.8062809705734253,
      "learning_rate": 6.8827711629316776e-06,
      "loss": 0.0179,
      "step": 966160
    },
    {
      "epoch": 1.5811747609041458,
      "grad_norm": 0.3825180232524872,
      "learning_rate": 6.8827052707181595e-06,
      "loss": 0.0232,
      "step": 966180
    },
    {
      "epoch": 1.581207491342799,
      "grad_norm": 1.2099683284759521,
      "learning_rate": 6.882639378504643e-06,
      "loss": 0.0167,
      "step": 966200
    },
    {
      "epoch": 1.5812402217814523,
      "grad_norm": 0.9052413702011108,
      "learning_rate": 6.882573486291125e-06,
      "loss": 0.0287,
      "step": 966220
    },
    {
      "epoch": 1.5812729522201057,
      "grad_norm": 0.40116316080093384,
      "learning_rate": 6.8825075940776085e-06,
      "loss": 0.0207,
      "step": 966240
    },
    {
      "epoch": 1.581305682658759,
      "grad_norm": 0.4798911213874817,
      "learning_rate": 6.882441701864091e-06,
      "loss": 0.017,
      "step": 966260
    },
    {
      "epoch": 1.5813384130974124,
      "grad_norm": 0.10468690097332001,
      "learning_rate": 6.882375809650574e-06,
      "loss": 0.0177,
      "step": 966280
    },
    {
      "epoch": 1.5813711435360656,
      "grad_norm": 0.2649567723274231,
      "learning_rate": 6.882309917437057e-06,
      "loss": 0.0134,
      "step": 966300
    },
    {
      "epoch": 1.5814038739747192,
      "grad_norm": 0.22807560861110687,
      "learning_rate": 6.88224402522354e-06,
      "loss": 0.022,
      "step": 966320
    },
    {
      "epoch": 1.5814366044133723,
      "grad_norm": 0.2571732699871063,
      "learning_rate": 6.882178133010022e-06,
      "loss": 0.0127,
      "step": 966340
    },
    {
      "epoch": 1.5814693348520257,
      "grad_norm": 1.7261607646942139,
      "learning_rate": 6.882112240796506e-06,
      "loss": 0.0152,
      "step": 966360
    },
    {
      "epoch": 1.581502065290679,
      "grad_norm": 1.1642770767211914,
      "learning_rate": 6.882046348582988e-06,
      "loss": 0.0231,
      "step": 966380
    },
    {
      "epoch": 1.5815347957293322,
      "grad_norm": 0.6041749119758606,
      "learning_rate": 6.881980456369471e-06,
      "loss": 0.0262,
      "step": 966400
    },
    {
      "epoch": 1.5815675261679858,
      "grad_norm": 0.5021344423294067,
      "learning_rate": 6.881914564155954e-06,
      "loss": 0.0164,
      "step": 966420
    },
    {
      "epoch": 1.581600256606639,
      "grad_norm": 0.41460973024368286,
      "learning_rate": 6.881848671942437e-06,
      "loss": 0.026,
      "step": 966440
    },
    {
      "epoch": 1.5816329870452925,
      "grad_norm": 0.9254004955291748,
      "learning_rate": 6.88178277972892e-06,
      "loss": 0.0201,
      "step": 966460
    },
    {
      "epoch": 1.5816657174839457,
      "grad_norm": 0.36807459592819214,
      "learning_rate": 6.881716887515403e-06,
      "loss": 0.0245,
      "step": 966480
    },
    {
      "epoch": 1.581698447922599,
      "grad_norm": 0.2927107512950897,
      "learning_rate": 6.881650995301886e-06,
      "loss": 0.0211,
      "step": 966500
    },
    {
      "epoch": 1.5817311783612524,
      "grad_norm": 0.8344473838806152,
      "learning_rate": 6.881585103088369e-06,
      "loss": 0.0244,
      "step": 966520
    },
    {
      "epoch": 1.5817639087999056,
      "grad_norm": 1.4329564571380615,
      "learning_rate": 6.881519210874852e-06,
      "loss": 0.0241,
      "step": 966540
    },
    {
      "epoch": 1.5817966392385592,
      "grad_norm": 0.7877682447433472,
      "learning_rate": 6.881453318661334e-06,
      "loss": 0.0155,
      "step": 966560
    },
    {
      "epoch": 1.5818293696772123,
      "grad_norm": 0.31180790066719055,
      "learning_rate": 6.881387426447818e-06,
      "loss": 0.0164,
      "step": 966580
    },
    {
      "epoch": 1.5818621001158657,
      "grad_norm": 0.12842518091201782,
      "learning_rate": 6.8813215342342995e-06,
      "loss": 0.0232,
      "step": 966600
    },
    {
      "epoch": 1.581894830554519,
      "grad_norm": 0.4459116458892822,
      "learning_rate": 6.881255642020783e-06,
      "loss": 0.0168,
      "step": 966620
    },
    {
      "epoch": 1.5819275609931724,
      "grad_norm": 0.859383225440979,
      "learning_rate": 6.881189749807266e-06,
      "loss": 0.0188,
      "step": 966640
    },
    {
      "epoch": 1.5819602914318258,
      "grad_norm": 0.6454992294311523,
      "learning_rate": 6.881123857593749e-06,
      "loss": 0.0252,
      "step": 966660
    },
    {
      "epoch": 1.581993021870479,
      "grad_norm": 0.4458949565887451,
      "learning_rate": 6.881057965380231e-06,
      "loss": 0.021,
      "step": 966680
    },
    {
      "epoch": 1.5820257523091326,
      "grad_norm": 0.5875533223152161,
      "learning_rate": 6.880992073166715e-06,
      "loss": 0.019,
      "step": 966700
    },
    {
      "epoch": 1.5820584827477857,
      "grad_norm": 0.23861826956272125,
      "learning_rate": 6.880926180953197e-06,
      "loss": 0.021,
      "step": 966720
    },
    {
      "epoch": 1.582091213186439,
      "grad_norm": 0.7310017347335815,
      "learning_rate": 6.88086028873968e-06,
      "loss": 0.0248,
      "step": 966740
    },
    {
      "epoch": 1.5821239436250925,
      "grad_norm": 0.1475939303636551,
      "learning_rate": 6.880794396526162e-06,
      "loss": 0.0294,
      "step": 966760
    },
    {
      "epoch": 1.5821566740637458,
      "grad_norm": 0.20158863067626953,
      "learning_rate": 6.880728504312646e-06,
      "loss": 0.0173,
      "step": 966780
    },
    {
      "epoch": 1.5821894045023992,
      "grad_norm": 0.5961522459983826,
      "learning_rate": 6.8806626120991295e-06,
      "loss": 0.0208,
      "step": 966800
    },
    {
      "epoch": 1.5822221349410523,
      "grad_norm": 0.9295331239700317,
      "learning_rate": 6.880596719885611e-06,
      "loss": 0.0135,
      "step": 966820
    },
    {
      "epoch": 1.582254865379706,
      "grad_norm": 0.47365766763687134,
      "learning_rate": 6.880530827672095e-06,
      "loss": 0.0204,
      "step": 966840
    },
    {
      "epoch": 1.582287595818359,
      "grad_norm": 0.35434234142303467,
      "learning_rate": 6.880464935458578e-06,
      "loss": 0.0237,
      "step": 966860
    },
    {
      "epoch": 1.5823203262570125,
      "grad_norm": 2.504927396774292,
      "learning_rate": 6.8803990432450604e-06,
      "loss": 0.0221,
      "step": 966880
    },
    {
      "epoch": 1.5823530566956658,
      "grad_norm": 0.5377895832061768,
      "learning_rate": 6.880333151031543e-06,
      "loss": 0.0226,
      "step": 966900
    },
    {
      "epoch": 1.5823857871343192,
      "grad_norm": 0.5027275681495667,
      "learning_rate": 6.880267258818027e-06,
      "loss": 0.0112,
      "step": 966920
    },
    {
      "epoch": 1.5824185175729726,
      "grad_norm": 0.41754648089408875,
      "learning_rate": 6.880201366604509e-06,
      "loss": 0.0125,
      "step": 966940
    },
    {
      "epoch": 1.5824512480116257,
      "grad_norm": 1.1259620189666748,
      "learning_rate": 6.880135474390992e-06,
      "loss": 0.022,
      "step": 966960
    },
    {
      "epoch": 1.5824839784502793,
      "grad_norm": 0.5885187983512878,
      "learning_rate": 6.880069582177474e-06,
      "loss": 0.016,
      "step": 966980
    },
    {
      "epoch": 1.5825167088889325,
      "grad_norm": 0.4438472092151642,
      "learning_rate": 6.880003689963958e-06,
      "loss": 0.0145,
      "step": 967000
    },
    {
      "epoch": 1.5825494393275858,
      "grad_norm": 1.0319476127624512,
      "learning_rate": 6.87993779775044e-06,
      "loss": 0.0211,
      "step": 967020
    },
    {
      "epoch": 1.5825821697662392,
      "grad_norm": 0.33661994338035583,
      "learning_rate": 6.879871905536923e-06,
      "loss": 0.0154,
      "step": 967040
    },
    {
      "epoch": 1.5826149002048926,
      "grad_norm": 2.1078991889953613,
      "learning_rate": 6.879806013323406e-06,
      "loss": 0.0289,
      "step": 967060
    },
    {
      "epoch": 1.582647630643546,
      "grad_norm": 0.4343189597129822,
      "learning_rate": 6.879740121109889e-06,
      "loss": 0.0127,
      "step": 967080
    },
    {
      "epoch": 1.5826803610821991,
      "grad_norm": 1.2355482578277588,
      "learning_rate": 6.879674228896371e-06,
      "loss": 0.0189,
      "step": 967100
    },
    {
      "epoch": 1.5827130915208527,
      "grad_norm": 0.12134713679552078,
      "learning_rate": 6.879608336682855e-06,
      "loss": 0.0124,
      "step": 967120
    },
    {
      "epoch": 1.5827458219595059,
      "grad_norm": 0.06423045694828033,
      "learning_rate": 6.879542444469338e-06,
      "loss": 0.0162,
      "step": 967140
    },
    {
      "epoch": 1.5827785523981592,
      "grad_norm": 0.8700162172317505,
      "learning_rate": 6.8794765522558205e-06,
      "loss": 0.02,
      "step": 967160
    },
    {
      "epoch": 1.5828112828368126,
      "grad_norm": 0.5579283237457275,
      "learning_rate": 6.879410660042304e-06,
      "loss": 0.0151,
      "step": 967180
    },
    {
      "epoch": 1.5828440132754658,
      "grad_norm": 0.3611999750137329,
      "learning_rate": 6.879344767828786e-06,
      "loss": 0.0216,
      "step": 967200
    },
    {
      "epoch": 1.5828767437141193,
      "grad_norm": 0.6140447854995728,
      "learning_rate": 6.8792788756152696e-06,
      "loss": 0.0217,
      "step": 967220
    },
    {
      "epoch": 1.5829094741527725,
      "grad_norm": 0.5760776400566101,
      "learning_rate": 6.8792129834017514e-06,
      "loss": 0.0183,
      "step": 967240
    },
    {
      "epoch": 1.582942204591426,
      "grad_norm": 0.16826416552066803,
      "learning_rate": 6.879147091188235e-06,
      "loss": 0.0196,
      "step": 967260
    },
    {
      "epoch": 1.5829749350300792,
      "grad_norm": 0.20098116993904114,
      "learning_rate": 6.879081198974718e-06,
      "loss": 0.0153,
      "step": 967280
    },
    {
      "epoch": 1.5830076654687326,
      "grad_norm": 0.42272239923477173,
      "learning_rate": 6.8790153067612005e-06,
      "loss": 0.0135,
      "step": 967300
    },
    {
      "epoch": 1.583040395907386,
      "grad_norm": 0.4661270081996918,
      "learning_rate": 6.878949414547683e-06,
      "loss": 0.0221,
      "step": 967320
    },
    {
      "epoch": 1.5830731263460391,
      "grad_norm": 0.926048219203949,
      "learning_rate": 6.878883522334167e-06,
      "loss": 0.0201,
      "step": 967340
    },
    {
      "epoch": 1.5831058567846927,
      "grad_norm": 0.407596617937088,
      "learning_rate": 6.878817630120649e-06,
      "loss": 0.0204,
      "step": 967360
    },
    {
      "epoch": 1.5831385872233459,
      "grad_norm": 1.0615108013153076,
      "learning_rate": 6.878751737907132e-06,
      "loss": 0.0198,
      "step": 967380
    },
    {
      "epoch": 1.5831713176619993,
      "grad_norm": 0.38461002707481384,
      "learning_rate": 6.878685845693614e-06,
      "loss": 0.0179,
      "step": 967400
    },
    {
      "epoch": 1.5832040481006526,
      "grad_norm": 0.9135476350784302,
      "learning_rate": 6.878619953480098e-06,
      "loss": 0.028,
      "step": 967420
    },
    {
      "epoch": 1.583236778539306,
      "grad_norm": 0.9835931658744812,
      "learning_rate": 6.8785540612665805e-06,
      "loss": 0.0172,
      "step": 967440
    },
    {
      "epoch": 1.5832695089779594,
      "grad_norm": 0.17165295779705048,
      "learning_rate": 6.878488169053063e-06,
      "loss": 0.024,
      "step": 967460
    },
    {
      "epoch": 1.5833022394166125,
      "grad_norm": 0.40168052911758423,
      "learning_rate": 6.878422276839546e-06,
      "loss": 0.0188,
      "step": 967480
    },
    {
      "epoch": 1.5833349698552661,
      "grad_norm": 0.20079565048217773,
      "learning_rate": 6.87835638462603e-06,
      "loss": 0.0183,
      "step": 967500
    },
    {
      "epoch": 1.5833677002939193,
      "grad_norm": 1.002585768699646,
      "learning_rate": 6.878290492412512e-06,
      "loss": 0.0197,
      "step": 967520
    },
    {
      "epoch": 1.5834004307325726,
      "grad_norm": 0.2981502115726471,
      "learning_rate": 6.878224600198995e-06,
      "loss": 0.0217,
      "step": 967540
    },
    {
      "epoch": 1.583433161171226,
      "grad_norm": 0.4700658619403839,
      "learning_rate": 6.878158707985479e-06,
      "loss": 0.0179,
      "step": 967560
    },
    {
      "epoch": 1.5834658916098794,
      "grad_norm": 0.14701275527477264,
      "learning_rate": 6.8780928157719606e-06,
      "loss": 0.0181,
      "step": 967580
    },
    {
      "epoch": 1.5834986220485328,
      "grad_norm": 0.1663334220647812,
      "learning_rate": 6.878026923558444e-06,
      "loss": 0.0198,
      "step": 967600
    },
    {
      "epoch": 1.583531352487186,
      "grad_norm": 0.8143352270126343,
      "learning_rate": 6.877961031344926e-06,
      "loss": 0.0178,
      "step": 967620
    },
    {
      "epoch": 1.5835640829258395,
      "grad_norm": 0.2223924845457077,
      "learning_rate": 6.87789513913141e-06,
      "loss": 0.018,
      "step": 967640
    },
    {
      "epoch": 1.5835968133644927,
      "grad_norm": 0.22247503697872162,
      "learning_rate": 6.877829246917892e-06,
      "loss": 0.0242,
      "step": 967660
    },
    {
      "epoch": 1.583629543803146,
      "grad_norm": 0.11981882154941559,
      "learning_rate": 6.877763354704375e-06,
      "loss": 0.0207,
      "step": 967680
    },
    {
      "epoch": 1.5836622742417994,
      "grad_norm": 0.9571719169616699,
      "learning_rate": 6.877697462490858e-06,
      "loss": 0.0234,
      "step": 967700
    },
    {
      "epoch": 1.5836950046804528,
      "grad_norm": 0.40465500950813293,
      "learning_rate": 6.8776315702773414e-06,
      "loss": 0.0228,
      "step": 967720
    },
    {
      "epoch": 1.5837277351191061,
      "grad_norm": 0.3473987877368927,
      "learning_rate": 6.877565678063823e-06,
      "loss": 0.0194,
      "step": 967740
    },
    {
      "epoch": 1.5837604655577593,
      "grad_norm": 1.4466042518615723,
      "learning_rate": 6.877499785850307e-06,
      "loss": 0.0226,
      "step": 967760
    },
    {
      "epoch": 1.5837931959964129,
      "grad_norm": 0.0915965810418129,
      "learning_rate": 6.877433893636789e-06,
      "loss": 0.0197,
      "step": 967780
    },
    {
      "epoch": 1.583825926435066,
      "grad_norm": 0.11764189600944519,
      "learning_rate": 6.877368001423272e-06,
      "loss": 0.0164,
      "step": 967800
    },
    {
      "epoch": 1.5838586568737194,
      "grad_norm": 0.6085745096206665,
      "learning_rate": 6.877302109209755e-06,
      "loss": 0.0239,
      "step": 967820
    },
    {
      "epoch": 1.5838913873123728,
      "grad_norm": 0.2613161504268646,
      "learning_rate": 6.877236216996238e-06,
      "loss": 0.0235,
      "step": 967840
    },
    {
      "epoch": 1.5839241177510262,
      "grad_norm": 0.34530559182167053,
      "learning_rate": 6.8771703247827215e-06,
      "loss": 0.0184,
      "step": 967860
    },
    {
      "epoch": 1.5839568481896795,
      "grad_norm": 0.5388204455375671,
      "learning_rate": 6.877104432569204e-06,
      "loss": 0.0165,
      "step": 967880
    },
    {
      "epoch": 1.5839895786283327,
      "grad_norm": 2.061688184738159,
      "learning_rate": 6.877038540355687e-06,
      "loss": 0.0221,
      "step": 967900
    },
    {
      "epoch": 1.5840223090669863,
      "grad_norm": 0.5412585735321045,
      "learning_rate": 6.87697264814217e-06,
      "loss": 0.0139,
      "step": 967920
    },
    {
      "epoch": 1.5840550395056394,
      "grad_norm": 0.43388065695762634,
      "learning_rate": 6.876906755928653e-06,
      "loss": 0.0144,
      "step": 967940
    },
    {
      "epoch": 1.5840877699442928,
      "grad_norm": 1.0555033683776855,
      "learning_rate": 6.876840863715135e-06,
      "loss": 0.0233,
      "step": 967960
    },
    {
      "epoch": 1.5841205003829462,
      "grad_norm": 0.18416517972946167,
      "learning_rate": 6.876774971501619e-06,
      "loss": 0.0211,
      "step": 967980
    },
    {
      "epoch": 1.5841532308215993,
      "grad_norm": 0.3225236237049103,
      "learning_rate": 6.876709079288101e-06,
      "loss": 0.018,
      "step": 968000
    },
    {
      "epoch": 1.584185961260253,
      "grad_norm": 0.2152727246284485,
      "learning_rate": 6.876643187074584e-06,
      "loss": 0.0219,
      "step": 968020
    },
    {
      "epoch": 1.584218691698906,
      "grad_norm": 0.15532086789608002,
      "learning_rate": 6.876577294861066e-06,
      "loss": 0.0204,
      "step": 968040
    },
    {
      "epoch": 1.5842514221375597,
      "grad_norm": 1.3526078462600708,
      "learning_rate": 6.87651140264755e-06,
      "loss": 0.0207,
      "step": 968060
    },
    {
      "epoch": 1.5842841525762128,
      "grad_norm": 1.3027870655059814,
      "learning_rate": 6.8764455104340324e-06,
      "loss": 0.0253,
      "step": 968080
    },
    {
      "epoch": 1.5843168830148662,
      "grad_norm": 0.5376556515693665,
      "learning_rate": 6.876379618220515e-06,
      "loss": 0.0214,
      "step": 968100
    },
    {
      "epoch": 1.5843496134535195,
      "grad_norm": 0.16015058755874634,
      "learning_rate": 6.876313726006998e-06,
      "loss": 0.0145,
      "step": 968120
    },
    {
      "epoch": 1.5843823438921727,
      "grad_norm": 3.268648624420166,
      "learning_rate": 6.8762478337934815e-06,
      "loss": 0.0173,
      "step": 968140
    },
    {
      "epoch": 1.5844150743308263,
      "grad_norm": 1.5225796699523926,
      "learning_rate": 6.876181941579963e-06,
      "loss": 0.0271,
      "step": 968160
    },
    {
      "epoch": 1.5844478047694794,
      "grad_norm": 0.15154623985290527,
      "learning_rate": 6.876116049366447e-06,
      "loss": 0.0224,
      "step": 968180
    },
    {
      "epoch": 1.5844805352081328,
      "grad_norm": 1.3704149723052979,
      "learning_rate": 6.8760501571529306e-06,
      "loss": 0.0181,
      "step": 968200
    },
    {
      "epoch": 1.5845132656467862,
      "grad_norm": 0.4311138689517975,
      "learning_rate": 6.8759842649394125e-06,
      "loss": 0.0125,
      "step": 968220
    },
    {
      "epoch": 1.5845459960854396,
      "grad_norm": 0.5066298246383667,
      "learning_rate": 6.875918372725896e-06,
      "loss": 0.0149,
      "step": 968240
    },
    {
      "epoch": 1.584578726524093,
      "grad_norm": 0.8659588098526001,
      "learning_rate": 6.875852480512378e-06,
      "loss": 0.0251,
      "step": 968260
    },
    {
      "epoch": 1.584611456962746,
      "grad_norm": 0.5463780164718628,
      "learning_rate": 6.8757865882988615e-06,
      "loss": 0.0218,
      "step": 968280
    },
    {
      "epoch": 1.5846441874013997,
      "grad_norm": 0.5364730954170227,
      "learning_rate": 6.875720696085344e-06,
      "loss": 0.0229,
      "step": 968300
    },
    {
      "epoch": 1.5846769178400528,
      "grad_norm": 1.7884793281555176,
      "learning_rate": 6.875654803871827e-06,
      "loss": 0.0164,
      "step": 968320
    },
    {
      "epoch": 1.5847096482787062,
      "grad_norm": 0.7099689841270447,
      "learning_rate": 6.87558891165831e-06,
      "loss": 0.0228,
      "step": 968340
    },
    {
      "epoch": 1.5847423787173596,
      "grad_norm": 0.5058735013008118,
      "learning_rate": 6.875523019444793e-06,
      "loss": 0.0175,
      "step": 968360
    },
    {
      "epoch": 1.584775109156013,
      "grad_norm": 0.41461506485939026,
      "learning_rate": 6.875457127231275e-06,
      "loss": 0.0155,
      "step": 968380
    },
    {
      "epoch": 1.5848078395946663,
      "grad_norm": 0.6072506904602051,
      "learning_rate": 6.875391235017759e-06,
      "loss": 0.0193,
      "step": 968400
    },
    {
      "epoch": 1.5848405700333195,
      "grad_norm": 0.557461678981781,
      "learning_rate": 6.875325342804241e-06,
      "loss": 0.017,
      "step": 968420
    },
    {
      "epoch": 1.584873300471973,
      "grad_norm": 1.2749241590499878,
      "learning_rate": 6.875259450590724e-06,
      "loss": 0.0204,
      "step": 968440
    },
    {
      "epoch": 1.5849060309106262,
      "grad_norm": 0.37986090779304504,
      "learning_rate": 6.875193558377207e-06,
      "loss": 0.0217,
      "step": 968460
    },
    {
      "epoch": 1.5849387613492796,
      "grad_norm": 0.3592938184738159,
      "learning_rate": 6.87512766616369e-06,
      "loss": 0.0168,
      "step": 968480
    },
    {
      "epoch": 1.584971491787933,
      "grad_norm": 0.24177758395671844,
      "learning_rate": 6.8750617739501725e-06,
      "loss": 0.0253,
      "step": 968500
    },
    {
      "epoch": 1.5850042222265863,
      "grad_norm": 0.587266743183136,
      "learning_rate": 6.874995881736656e-06,
      "loss": 0.0221,
      "step": 968520
    },
    {
      "epoch": 1.5850369526652397,
      "grad_norm": 0.05712337791919708,
      "learning_rate": 6.874929989523138e-06,
      "loss": 0.019,
      "step": 968540
    },
    {
      "epoch": 1.5850696831038928,
      "grad_norm": 0.3887466490268707,
      "learning_rate": 6.874864097309622e-06,
      "loss": 0.0255,
      "step": 968560
    },
    {
      "epoch": 1.5851024135425464,
      "grad_norm": 1.2996397018432617,
      "learning_rate": 6.874798205096105e-06,
      "loss": 0.0179,
      "step": 968580
    },
    {
      "epoch": 1.5851351439811996,
      "grad_norm": 0.8707013130187988,
      "learning_rate": 6.874732312882587e-06,
      "loss": 0.0296,
      "step": 968600
    },
    {
      "epoch": 1.585167874419853,
      "grad_norm": 1.1945786476135254,
      "learning_rate": 6.874666420669071e-06,
      "loss": 0.0166,
      "step": 968620
    },
    {
      "epoch": 1.5852006048585063,
      "grad_norm": 0.4561872184276581,
      "learning_rate": 6.8746005284555525e-06,
      "loss": 0.02,
      "step": 968640
    },
    {
      "epoch": 1.5852333352971597,
      "grad_norm": 1.1460672616958618,
      "learning_rate": 6.874534636242036e-06,
      "loss": 0.0289,
      "step": 968660
    },
    {
      "epoch": 1.585266065735813,
      "grad_norm": 0.09472090750932693,
      "learning_rate": 6.874468744028519e-06,
      "loss": 0.0249,
      "step": 968680
    },
    {
      "epoch": 1.5852987961744662,
      "grad_norm": 1.3456023931503296,
      "learning_rate": 6.874402851815002e-06,
      "loss": 0.0186,
      "step": 968700
    },
    {
      "epoch": 1.5853315266131198,
      "grad_norm": 2.052978038787842,
      "learning_rate": 6.874336959601484e-06,
      "loss": 0.0284,
      "step": 968720
    },
    {
      "epoch": 1.585364257051773,
      "grad_norm": 0.6646464467048645,
      "learning_rate": 6.874271067387968e-06,
      "loss": 0.0316,
      "step": 968740
    },
    {
      "epoch": 1.5853969874904263,
      "grad_norm": 0.3169441223144531,
      "learning_rate": 6.87420517517445e-06,
      "loss": 0.0208,
      "step": 968760
    },
    {
      "epoch": 1.5854297179290797,
      "grad_norm": 2.229712724685669,
      "learning_rate": 6.874139282960933e-06,
      "loss": 0.0184,
      "step": 968780
    },
    {
      "epoch": 1.5854624483677329,
      "grad_norm": 0.30929258465766907,
      "learning_rate": 6.874073390747415e-06,
      "loss": 0.0229,
      "step": 968800
    },
    {
      "epoch": 1.5854951788063865,
      "grad_norm": 0.4387069046497345,
      "learning_rate": 6.874007498533899e-06,
      "loss": 0.0165,
      "step": 968820
    },
    {
      "epoch": 1.5855279092450396,
      "grad_norm": 0.8760846257209778,
      "learning_rate": 6.873941606320382e-06,
      "loss": 0.0109,
      "step": 968840
    },
    {
      "epoch": 1.5855606396836932,
      "grad_norm": 0.3903217017650604,
      "learning_rate": 6.873875714106864e-06,
      "loss": 0.0164,
      "step": 968860
    },
    {
      "epoch": 1.5855933701223464,
      "grad_norm": 0.6911621689796448,
      "learning_rate": 6.873809821893347e-06,
      "loss": 0.0142,
      "step": 968880
    },
    {
      "epoch": 1.5856261005609997,
      "grad_norm": 0.31783658266067505,
      "learning_rate": 6.873743929679831e-06,
      "loss": 0.0196,
      "step": 968900
    },
    {
      "epoch": 1.585658830999653,
      "grad_norm": 0.6095075607299805,
      "learning_rate": 6.8736780374663134e-06,
      "loss": 0.0211,
      "step": 968920
    },
    {
      "epoch": 1.5856915614383063,
      "grad_norm": 0.45131683349609375,
      "learning_rate": 6.873612145252796e-06,
      "loss": 0.0139,
      "step": 968940
    },
    {
      "epoch": 1.5857242918769598,
      "grad_norm": 0.4190177619457245,
      "learning_rate": 6.87354625303928e-06,
      "loss": 0.0204,
      "step": 968960
    },
    {
      "epoch": 1.585757022315613,
      "grad_norm": 2.350125789642334,
      "learning_rate": 6.873480360825762e-06,
      "loss": 0.019,
      "step": 968980
    },
    {
      "epoch": 1.5857897527542664,
      "grad_norm": 0.2184167355298996,
      "learning_rate": 6.873414468612245e-06,
      "loss": 0.0181,
      "step": 969000
    },
    {
      "epoch": 1.5858224831929197,
      "grad_norm": 0.31946974992752075,
      "learning_rate": 6.873348576398727e-06,
      "loss": 0.027,
      "step": 969020
    },
    {
      "epoch": 1.5858552136315731,
      "grad_norm": 0.33027151226997375,
      "learning_rate": 6.873282684185211e-06,
      "loss": 0.0214,
      "step": 969040
    },
    {
      "epoch": 1.5858879440702265,
      "grad_norm": 1.4235903024673462,
      "learning_rate": 6.873216791971693e-06,
      "loss": 0.0201,
      "step": 969060
    },
    {
      "epoch": 1.5859206745088796,
      "grad_norm": 0.25364065170288086,
      "learning_rate": 6.873150899758176e-06,
      "loss": 0.021,
      "step": 969080
    },
    {
      "epoch": 1.5859534049475332,
      "grad_norm": 1.7077014446258545,
      "learning_rate": 6.873085007544659e-06,
      "loss": 0.0145,
      "step": 969100
    },
    {
      "epoch": 1.5859861353861864,
      "grad_norm": 0.58379065990448,
      "learning_rate": 6.873019115331142e-06,
      "loss": 0.0187,
      "step": 969120
    },
    {
      "epoch": 1.5860188658248398,
      "grad_norm": 1.0863264799118042,
      "learning_rate": 6.8729532231176244e-06,
      "loss": 0.0276,
      "step": 969140
    },
    {
      "epoch": 1.5860515962634931,
      "grad_norm": 0.8564140796661377,
      "learning_rate": 6.872887330904108e-06,
      "loss": 0.0209,
      "step": 969160
    },
    {
      "epoch": 1.5860843267021465,
      "grad_norm": 1.3210643529891968,
      "learning_rate": 6.87282143869059e-06,
      "loss": 0.0144,
      "step": 969180
    },
    {
      "epoch": 1.5861170571407999,
      "grad_norm": 0.16981637477874756,
      "learning_rate": 6.8727555464770735e-06,
      "loss": 0.018,
      "step": 969200
    },
    {
      "epoch": 1.586149787579453,
      "grad_norm": 0.7467265129089355,
      "learning_rate": 6.872689654263555e-06,
      "loss": 0.0215,
      "step": 969220
    },
    {
      "epoch": 1.5861825180181066,
      "grad_norm": 0.7959210276603699,
      "learning_rate": 6.872623762050039e-06,
      "loss": 0.024,
      "step": 969240
    },
    {
      "epoch": 1.5862152484567598,
      "grad_norm": 1.840254306793213,
      "learning_rate": 6.8725578698365226e-06,
      "loss": 0.0166,
      "step": 969260
    },
    {
      "epoch": 1.5862479788954131,
      "grad_norm": 0.5639105439186096,
      "learning_rate": 6.8724919776230044e-06,
      "loss": 0.0196,
      "step": 969280
    },
    {
      "epoch": 1.5862807093340665,
      "grad_norm": 0.46009594202041626,
      "learning_rate": 6.872426085409488e-06,
      "loss": 0.0266,
      "step": 969300
    },
    {
      "epoch": 1.5863134397727199,
      "grad_norm": 1.1312106847763062,
      "learning_rate": 6.872360193195971e-06,
      "loss": 0.023,
      "step": 969320
    },
    {
      "epoch": 1.5863461702113733,
      "grad_norm": 0.3955799639225006,
      "learning_rate": 6.8722943009824535e-06,
      "loss": 0.0155,
      "step": 969340
    },
    {
      "epoch": 1.5863789006500264,
      "grad_norm": 0.294895738363266,
      "learning_rate": 6.872228408768936e-06,
      "loss": 0.0174,
      "step": 969360
    },
    {
      "epoch": 1.58641163108868,
      "grad_norm": 0.8997557163238525,
      "learning_rate": 6.87216251655542e-06,
      "loss": 0.0174,
      "step": 969380
    },
    {
      "epoch": 1.5864443615273331,
      "grad_norm": 0.3536025881767273,
      "learning_rate": 6.872096624341902e-06,
      "loss": 0.0274,
      "step": 969400
    },
    {
      "epoch": 1.5864770919659865,
      "grad_norm": 0.3296918272972107,
      "learning_rate": 6.872030732128385e-06,
      "loss": 0.019,
      "step": 969420
    },
    {
      "epoch": 1.58650982240464,
      "grad_norm": 0.20403572916984558,
      "learning_rate": 6.871964839914867e-06,
      "loss": 0.0229,
      "step": 969440
    },
    {
      "epoch": 1.586542552843293,
      "grad_norm": 1.3823614120483398,
      "learning_rate": 6.871898947701351e-06,
      "loss": 0.0202,
      "step": 969460
    },
    {
      "epoch": 1.5865752832819466,
      "grad_norm": 0.44694966077804565,
      "learning_rate": 6.8718330554878335e-06,
      "loss": 0.0248,
      "step": 969480
    },
    {
      "epoch": 1.5866080137205998,
      "grad_norm": 0.5674136281013489,
      "learning_rate": 6.871767163274316e-06,
      "loss": 0.0175,
      "step": 969500
    },
    {
      "epoch": 1.5866407441592534,
      "grad_norm": 1.1452648639678955,
      "learning_rate": 6.871701271060799e-06,
      "loss": 0.0199,
      "step": 969520
    },
    {
      "epoch": 1.5866734745979065,
      "grad_norm": 0.7282981276512146,
      "learning_rate": 6.871635378847283e-06,
      "loss": 0.0186,
      "step": 969540
    },
    {
      "epoch": 1.58670620503656,
      "grad_norm": 0.7432056069374084,
      "learning_rate": 6.8715694866337645e-06,
      "loss": 0.0142,
      "step": 969560
    },
    {
      "epoch": 1.5867389354752133,
      "grad_norm": 0.3533422350883484,
      "learning_rate": 6.871503594420248e-06,
      "loss": 0.0222,
      "step": 969580
    },
    {
      "epoch": 1.5867716659138664,
      "grad_norm": 0.3795079290866852,
      "learning_rate": 6.871437702206732e-06,
      "loss": 0.0153,
      "step": 969600
    },
    {
      "epoch": 1.58680439635252,
      "grad_norm": 0.29393094778060913,
      "learning_rate": 6.8713718099932136e-06,
      "loss": 0.0263,
      "step": 969620
    },
    {
      "epoch": 1.5868371267911732,
      "grad_norm": 0.5903648734092712,
      "learning_rate": 6.871305917779697e-06,
      "loss": 0.0169,
      "step": 969640
    },
    {
      "epoch": 1.5868698572298265,
      "grad_norm": 0.1764993965625763,
      "learning_rate": 6.871240025566179e-06,
      "loss": 0.0181,
      "step": 969660
    },
    {
      "epoch": 1.58690258766848,
      "grad_norm": 0.6967319846153259,
      "learning_rate": 6.871174133352663e-06,
      "loss": 0.0237,
      "step": 969680
    },
    {
      "epoch": 1.5869353181071333,
      "grad_norm": 0.29962605237960815,
      "learning_rate": 6.871108241139145e-06,
      "loss": 0.0193,
      "step": 969700
    },
    {
      "epoch": 1.5869680485457867,
      "grad_norm": 1.126775860786438,
      "learning_rate": 6.871042348925628e-06,
      "loss": 0.0199,
      "step": 969720
    },
    {
      "epoch": 1.5870007789844398,
      "grad_norm": 0.9693671464920044,
      "learning_rate": 6.870976456712111e-06,
      "loss": 0.0145,
      "step": 969740
    },
    {
      "epoch": 1.5870335094230934,
      "grad_norm": 0.7704110741615295,
      "learning_rate": 6.8709105644985944e-06,
      "loss": 0.0183,
      "step": 969760
    },
    {
      "epoch": 1.5870662398617466,
      "grad_norm": 0.561825692653656,
      "learning_rate": 6.870844672285076e-06,
      "loss": 0.024,
      "step": 969780
    },
    {
      "epoch": 1.5870989703004,
      "grad_norm": 0.5482513308525085,
      "learning_rate": 6.87077878007156e-06,
      "loss": 0.0124,
      "step": 969800
    },
    {
      "epoch": 1.5871317007390533,
      "grad_norm": 0.3363323211669922,
      "learning_rate": 6.870712887858042e-06,
      "loss": 0.0222,
      "step": 969820
    },
    {
      "epoch": 1.5871644311777067,
      "grad_norm": 0.6763564944267273,
      "learning_rate": 6.870646995644525e-06,
      "loss": 0.0144,
      "step": 969840
    },
    {
      "epoch": 1.58719716161636,
      "grad_norm": 0.34589460492134094,
      "learning_rate": 6.870581103431007e-06,
      "loss": 0.018,
      "step": 969860
    },
    {
      "epoch": 1.5872298920550132,
      "grad_norm": 0.34942561388015747,
      "learning_rate": 6.870515211217491e-06,
      "loss": 0.0216,
      "step": 969880
    },
    {
      "epoch": 1.5872626224936668,
      "grad_norm": 0.16693082451820374,
      "learning_rate": 6.870449319003974e-06,
      "loss": 0.0165,
      "step": 969900
    },
    {
      "epoch": 1.58729535293232,
      "grad_norm": 0.6246368885040283,
      "learning_rate": 6.870383426790456e-06,
      "loss": 0.0215,
      "step": 969920
    },
    {
      "epoch": 1.5873280833709733,
      "grad_norm": 0.6933702826499939,
      "learning_rate": 6.870317534576939e-06,
      "loss": 0.0237,
      "step": 969940
    },
    {
      "epoch": 1.5873608138096267,
      "grad_norm": 0.29108670353889465,
      "learning_rate": 6.870251642363423e-06,
      "loss": 0.0206,
      "step": 969960
    },
    {
      "epoch": 1.58739354424828,
      "grad_norm": 0.12945497035980225,
      "learning_rate": 6.870185750149906e-06,
      "loss": 0.0258,
      "step": 969980
    },
    {
      "epoch": 1.5874262746869334,
      "grad_norm": 0.139562726020813,
      "learning_rate": 6.870119857936388e-06,
      "loss": 0.0165,
      "step": 970000
    },
    {
      "epoch": 1.5874590051255866,
      "grad_norm": 0.29765868186950684,
      "learning_rate": 6.870053965722872e-06,
      "loss": 0.0164,
      "step": 970020
    },
    {
      "epoch": 1.5874917355642402,
      "grad_norm": 0.6036021709442139,
      "learning_rate": 6.869988073509354e-06,
      "loss": 0.0262,
      "step": 970040
    },
    {
      "epoch": 1.5875244660028933,
      "grad_norm": 2.0648930072784424,
      "learning_rate": 6.869922181295837e-06,
      "loss": 0.0247,
      "step": 970060
    },
    {
      "epoch": 1.5875571964415467,
      "grad_norm": 0.45782744884490967,
      "learning_rate": 6.869856289082319e-06,
      "loss": 0.0175,
      "step": 970080
    },
    {
      "epoch": 1.5875899268802,
      "grad_norm": 0.5808873772621155,
      "learning_rate": 6.869790396868803e-06,
      "loss": 0.0214,
      "step": 970100
    },
    {
      "epoch": 1.5876226573188534,
      "grad_norm": 1.0692912340164185,
      "learning_rate": 6.8697245046552854e-06,
      "loss": 0.024,
      "step": 970120
    },
    {
      "epoch": 1.5876553877575068,
      "grad_norm": 1.0810810327529907,
      "learning_rate": 6.869658612441768e-06,
      "loss": 0.0224,
      "step": 970140
    },
    {
      "epoch": 1.58768811819616,
      "grad_norm": 0.7882182598114014,
      "learning_rate": 6.869592720228251e-06,
      "loss": 0.0232,
      "step": 970160
    },
    {
      "epoch": 1.5877208486348136,
      "grad_norm": 0.6582963466644287,
      "learning_rate": 6.8695268280147345e-06,
      "loss": 0.0114,
      "step": 970180
    },
    {
      "epoch": 1.5877535790734667,
      "grad_norm": 1.6737059354782104,
      "learning_rate": 6.869460935801216e-06,
      "loss": 0.0171,
      "step": 970200
    },
    {
      "epoch": 1.58778630951212,
      "grad_norm": 0.5825338363647461,
      "learning_rate": 6.8693950435877e-06,
      "loss": 0.0157,
      "step": 970220
    },
    {
      "epoch": 1.5878190399507734,
      "grad_norm": 0.24469493329524994,
      "learning_rate": 6.869329151374182e-06,
      "loss": 0.0246,
      "step": 970240
    },
    {
      "epoch": 1.5878517703894266,
      "grad_norm": 0.23368050158023834,
      "learning_rate": 6.8692632591606655e-06,
      "loss": 0.0258,
      "step": 970260
    },
    {
      "epoch": 1.5878845008280802,
      "grad_norm": 0.19070807099342346,
      "learning_rate": 6.869197366947148e-06,
      "loss": 0.0232,
      "step": 970280
    },
    {
      "epoch": 1.5879172312667333,
      "grad_norm": 0.12766170501708984,
      "learning_rate": 6.869131474733631e-06,
      "loss": 0.0212,
      "step": 970300
    },
    {
      "epoch": 1.587949961705387,
      "grad_norm": 0.3828519284725189,
      "learning_rate": 6.8690655825201145e-06,
      "loss": 0.0176,
      "step": 970320
    },
    {
      "epoch": 1.58798269214404,
      "grad_norm": 0.29825472831726074,
      "learning_rate": 6.868999690306597e-06,
      "loss": 0.0251,
      "step": 970340
    },
    {
      "epoch": 1.5880154225826935,
      "grad_norm": 0.19476644694805145,
      "learning_rate": 6.86893379809308e-06,
      "loss": 0.0195,
      "step": 970360
    },
    {
      "epoch": 1.5880481530213468,
      "grad_norm": 0.6712878346443176,
      "learning_rate": 6.868867905879563e-06,
      "loss": 0.0173,
      "step": 970380
    },
    {
      "epoch": 1.58808088346,
      "grad_norm": 0.12888340651988983,
      "learning_rate": 6.868802013666046e-06,
      "loss": 0.0263,
      "step": 970400
    },
    {
      "epoch": 1.5881136138986536,
      "grad_norm": 0.49751684069633484,
      "learning_rate": 6.868736121452528e-06,
      "loss": 0.0205,
      "step": 970420
    },
    {
      "epoch": 1.5881463443373067,
      "grad_norm": 0.2661035358905792,
      "learning_rate": 6.868670229239012e-06,
      "loss": 0.0179,
      "step": 970440
    },
    {
      "epoch": 1.58817907477596,
      "grad_norm": 0.4631079137325287,
      "learning_rate": 6.868604337025494e-06,
      "loss": 0.0262,
      "step": 970460
    },
    {
      "epoch": 1.5882118052146135,
      "grad_norm": 0.35987040400505066,
      "learning_rate": 6.868538444811977e-06,
      "loss": 0.0185,
      "step": 970480
    },
    {
      "epoch": 1.5882445356532668,
      "grad_norm": 0.3366655707359314,
      "learning_rate": 6.86847255259846e-06,
      "loss": 0.0192,
      "step": 970500
    },
    {
      "epoch": 1.5882772660919202,
      "grad_norm": 0.3441852331161499,
      "learning_rate": 6.868406660384943e-06,
      "loss": 0.0135,
      "step": 970520
    },
    {
      "epoch": 1.5883099965305734,
      "grad_norm": 0.5312074422836304,
      "learning_rate": 6.8683407681714255e-06,
      "loss": 0.023,
      "step": 970540
    },
    {
      "epoch": 1.588342726969227,
      "grad_norm": 0.4738069772720337,
      "learning_rate": 6.868274875957909e-06,
      "loss": 0.0147,
      "step": 970560
    },
    {
      "epoch": 1.58837545740788,
      "grad_norm": 0.981529712677002,
      "learning_rate": 6.868208983744391e-06,
      "loss": 0.0205,
      "step": 970580
    },
    {
      "epoch": 1.5884081878465335,
      "grad_norm": 0.3255571722984314,
      "learning_rate": 6.868143091530875e-06,
      "loss": 0.0132,
      "step": 970600
    },
    {
      "epoch": 1.5884409182851869,
      "grad_norm": 0.5321522355079651,
      "learning_rate": 6.8680771993173565e-06,
      "loss": 0.0185,
      "step": 970620
    },
    {
      "epoch": 1.5884736487238402,
      "grad_norm": 0.4987182021141052,
      "learning_rate": 6.86801130710384e-06,
      "loss": 0.0197,
      "step": 970640
    },
    {
      "epoch": 1.5885063791624936,
      "grad_norm": 0.1648990660905838,
      "learning_rate": 6.867945414890324e-06,
      "loss": 0.0194,
      "step": 970660
    },
    {
      "epoch": 1.5885391096011467,
      "grad_norm": 1.0304689407348633,
      "learning_rate": 6.8678795226768055e-06,
      "loss": 0.0208,
      "step": 970680
    },
    {
      "epoch": 1.5885718400398003,
      "grad_norm": 0.1427510678768158,
      "learning_rate": 6.867813630463289e-06,
      "loss": 0.0149,
      "step": 970700
    },
    {
      "epoch": 1.5886045704784535,
      "grad_norm": 0.45179933309555054,
      "learning_rate": 6.867747738249772e-06,
      "loss": 0.0166,
      "step": 970720
    },
    {
      "epoch": 1.5886373009171069,
      "grad_norm": 0.30858516693115234,
      "learning_rate": 6.867681846036255e-06,
      "loss": 0.0131,
      "step": 970740
    },
    {
      "epoch": 1.5886700313557602,
      "grad_norm": 0.9488617181777954,
      "learning_rate": 6.867615953822737e-06,
      "loss": 0.0152,
      "step": 970760
    },
    {
      "epoch": 1.5887027617944136,
      "grad_norm": 0.5955773591995239,
      "learning_rate": 6.867550061609221e-06,
      "loss": 0.0167,
      "step": 970780
    },
    {
      "epoch": 1.588735492233067,
      "grad_norm": 0.7692384719848633,
      "learning_rate": 6.867484169395703e-06,
      "loss": 0.0213,
      "step": 970800
    },
    {
      "epoch": 1.5887682226717201,
      "grad_norm": 0.4911748170852661,
      "learning_rate": 6.867418277182186e-06,
      "loss": 0.0191,
      "step": 970820
    },
    {
      "epoch": 1.5888009531103737,
      "grad_norm": 0.4926376938819885,
      "learning_rate": 6.867352384968668e-06,
      "loss": 0.022,
      "step": 970840
    },
    {
      "epoch": 1.5888336835490269,
      "grad_norm": 0.5766829252243042,
      "learning_rate": 6.867286492755152e-06,
      "loss": 0.0202,
      "step": 970860
    },
    {
      "epoch": 1.5888664139876802,
      "grad_norm": 3.753530263900757,
      "learning_rate": 6.867220600541634e-06,
      "loss": 0.0176,
      "step": 970880
    },
    {
      "epoch": 1.5888991444263336,
      "grad_norm": 0.345160573720932,
      "learning_rate": 6.867154708328117e-06,
      "loss": 0.0206,
      "step": 970900
    },
    {
      "epoch": 1.588931874864987,
      "grad_norm": 0.32246819138526917,
      "learning_rate": 6.8670888161146e-06,
      "loss": 0.0187,
      "step": 970920
    },
    {
      "epoch": 1.5889646053036404,
      "grad_norm": 0.5000777840614319,
      "learning_rate": 6.867022923901083e-06,
      "loss": 0.018,
      "step": 970940
    },
    {
      "epoch": 1.5889973357422935,
      "grad_norm": 1.104871153831482,
      "learning_rate": 6.866957031687566e-06,
      "loss": 0.0204,
      "step": 970960
    },
    {
      "epoch": 1.589030066180947,
      "grad_norm": 0.9375301599502563,
      "learning_rate": 6.866891139474049e-06,
      "loss": 0.0252,
      "step": 970980
    },
    {
      "epoch": 1.5890627966196003,
      "grad_norm": 0.4324853718280792,
      "learning_rate": 6.866825247260531e-06,
      "loss": 0.0202,
      "step": 971000
    },
    {
      "epoch": 1.5890955270582536,
      "grad_norm": 1.3984110355377197,
      "learning_rate": 6.866759355047015e-06,
      "loss": 0.0241,
      "step": 971020
    },
    {
      "epoch": 1.589128257496907,
      "grad_norm": 1.2087538242340088,
      "learning_rate": 6.866693462833498e-06,
      "loss": 0.0202,
      "step": 971040
    },
    {
      "epoch": 1.5891609879355602,
      "grad_norm": 0.8823872208595276,
      "learning_rate": 6.86662757061998e-06,
      "loss": 0.0163,
      "step": 971060
    },
    {
      "epoch": 1.5891937183742137,
      "grad_norm": 0.7770871520042419,
      "learning_rate": 6.866561678406464e-06,
      "loss": 0.0206,
      "step": 971080
    },
    {
      "epoch": 1.589226448812867,
      "grad_norm": 1.5658208131790161,
      "learning_rate": 6.866495786192946e-06,
      "loss": 0.0199,
      "step": 971100
    },
    {
      "epoch": 1.5892591792515205,
      "grad_norm": 9.70786190032959,
      "learning_rate": 6.866429893979429e-06,
      "loss": 0.0183,
      "step": 971120
    },
    {
      "epoch": 1.5892919096901736,
      "grad_norm": 0.46111249923706055,
      "learning_rate": 6.866364001765912e-06,
      "loss": 0.022,
      "step": 971140
    },
    {
      "epoch": 1.589324640128827,
      "grad_norm": 0.3407736122608185,
      "learning_rate": 6.866298109552395e-06,
      "loss": 0.017,
      "step": 971160
    },
    {
      "epoch": 1.5893573705674804,
      "grad_norm": 0.6855374574661255,
      "learning_rate": 6.8662322173388774e-06,
      "loss": 0.0202,
      "step": 971180
    },
    {
      "epoch": 1.5893901010061335,
      "grad_norm": 0.8414501547813416,
      "learning_rate": 6.866166325125361e-06,
      "loss": 0.0377,
      "step": 971200
    },
    {
      "epoch": 1.5894228314447871,
      "grad_norm": 0.5044670701026917,
      "learning_rate": 6.866100432911843e-06,
      "loss": 0.0267,
      "step": 971220
    },
    {
      "epoch": 1.5894555618834403,
      "grad_norm": 0.5192407965660095,
      "learning_rate": 6.8660345406983265e-06,
      "loss": 0.0177,
      "step": 971240
    },
    {
      "epoch": 1.5894882923220937,
      "grad_norm": 1.0323373079299927,
      "learning_rate": 6.865968648484808e-06,
      "loss": 0.0365,
      "step": 971260
    },
    {
      "epoch": 1.589521022760747,
      "grad_norm": 0.4226093292236328,
      "learning_rate": 6.865902756271292e-06,
      "loss": 0.0166,
      "step": 971280
    },
    {
      "epoch": 1.5895537531994004,
      "grad_norm": 0.3426591753959656,
      "learning_rate": 6.865836864057775e-06,
      "loss": 0.0152,
      "step": 971300
    },
    {
      "epoch": 1.5895864836380538,
      "grad_norm": 0.9884153008460999,
      "learning_rate": 6.8657709718442575e-06,
      "loss": 0.0217,
      "step": 971320
    },
    {
      "epoch": 1.589619214076707,
      "grad_norm": 1.3831366300582886,
      "learning_rate": 6.86570507963074e-06,
      "loss": 0.015,
      "step": 971340
    },
    {
      "epoch": 1.5896519445153605,
      "grad_norm": 1.277769923210144,
      "learning_rate": 6.865639187417224e-06,
      "loss": 0.0173,
      "step": 971360
    },
    {
      "epoch": 1.5896846749540137,
      "grad_norm": 0.5501624941825867,
      "learning_rate": 6.8655732952037065e-06,
      "loss": 0.0156,
      "step": 971380
    },
    {
      "epoch": 1.589717405392667,
      "grad_norm": 0.20384933054447174,
      "learning_rate": 6.865507402990189e-06,
      "loss": 0.0143,
      "step": 971400
    },
    {
      "epoch": 1.5897501358313204,
      "grad_norm": 0.44003137946128845,
      "learning_rate": 6.865441510776673e-06,
      "loss": 0.0298,
      "step": 971420
    },
    {
      "epoch": 1.5897828662699738,
      "grad_norm": 3.603865623474121,
      "learning_rate": 6.865375618563155e-06,
      "loss": 0.0187,
      "step": 971440
    },
    {
      "epoch": 1.5898155967086272,
      "grad_norm": 0.3591407239437103,
      "learning_rate": 6.865309726349638e-06,
      "loss": 0.0149,
      "step": 971460
    },
    {
      "epoch": 1.5898483271472803,
      "grad_norm": 1.040900707244873,
      "learning_rate": 6.86524383413612e-06,
      "loss": 0.029,
      "step": 971480
    },
    {
      "epoch": 1.589881057585934,
      "grad_norm": 0.31707027554512024,
      "learning_rate": 6.865177941922604e-06,
      "loss": 0.0266,
      "step": 971500
    },
    {
      "epoch": 1.589913788024587,
      "grad_norm": 1.6926524639129639,
      "learning_rate": 6.8651120497090865e-06,
      "loss": 0.0204,
      "step": 971520
    },
    {
      "epoch": 1.5899465184632404,
      "grad_norm": 0.23625631630420685,
      "learning_rate": 6.865046157495569e-06,
      "loss": 0.0208,
      "step": 971540
    },
    {
      "epoch": 1.5899792489018938,
      "grad_norm": 0.27121293544769287,
      "learning_rate": 6.864980265282052e-06,
      "loss": 0.0144,
      "step": 971560
    },
    {
      "epoch": 1.5900119793405472,
      "grad_norm": 0.8631090521812439,
      "learning_rate": 6.864914373068536e-06,
      "loss": 0.029,
      "step": 971580
    },
    {
      "epoch": 1.5900447097792005,
      "grad_norm": 0.13074135780334473,
      "learning_rate": 6.8648484808550175e-06,
      "loss": 0.0203,
      "step": 971600
    },
    {
      "epoch": 1.5900774402178537,
      "grad_norm": 0.48028039932250977,
      "learning_rate": 6.864782588641501e-06,
      "loss": 0.0172,
      "step": 971620
    },
    {
      "epoch": 1.5901101706565073,
      "grad_norm": 1.1343258619308472,
      "learning_rate": 6.864716696427983e-06,
      "loss": 0.0273,
      "step": 971640
    },
    {
      "epoch": 1.5901429010951604,
      "grad_norm": 0.11873991787433624,
      "learning_rate": 6.8646508042144666e-06,
      "loss": 0.0164,
      "step": 971660
    },
    {
      "epoch": 1.5901756315338138,
      "grad_norm": 0.09224362671375275,
      "learning_rate": 6.864584912000949e-06,
      "loss": 0.0218,
      "step": 971680
    },
    {
      "epoch": 1.5902083619724672,
      "grad_norm": 1.5034915208816528,
      "learning_rate": 6.864519019787432e-06,
      "loss": 0.0248,
      "step": 971700
    },
    {
      "epoch": 1.5902410924111205,
      "grad_norm": 0.4342968761920929,
      "learning_rate": 6.864453127573916e-06,
      "loss": 0.0135,
      "step": 971720
    },
    {
      "epoch": 1.590273822849774,
      "grad_norm": 0.5732415318489075,
      "learning_rate": 6.864387235360398e-06,
      "loss": 0.0204,
      "step": 971740
    },
    {
      "epoch": 1.590306553288427,
      "grad_norm": 0.782180905342102,
      "learning_rate": 6.864321343146881e-06,
      "loss": 0.019,
      "step": 971760
    },
    {
      "epoch": 1.5903392837270807,
      "grad_norm": 0.3386234641075134,
      "learning_rate": 6.864255450933364e-06,
      "loss": 0.0299,
      "step": 971780
    },
    {
      "epoch": 1.5903720141657338,
      "grad_norm": 0.3725154399871826,
      "learning_rate": 6.8641895587198474e-06,
      "loss": 0.0212,
      "step": 971800
    },
    {
      "epoch": 1.5904047446043872,
      "grad_norm": 0.7798921465873718,
      "learning_rate": 6.864123666506329e-06,
      "loss": 0.0225,
      "step": 971820
    },
    {
      "epoch": 1.5904374750430406,
      "grad_norm": 0.5073072910308838,
      "learning_rate": 6.864057774292813e-06,
      "loss": 0.0177,
      "step": 971840
    },
    {
      "epoch": 1.5904702054816937,
      "grad_norm": 1.0066020488739014,
      "learning_rate": 6.863991882079295e-06,
      "loss": 0.0256,
      "step": 971860
    },
    {
      "epoch": 1.5905029359203473,
      "grad_norm": 0.34052038192749023,
      "learning_rate": 6.863925989865778e-06,
      "loss": 0.0212,
      "step": 971880
    },
    {
      "epoch": 1.5905356663590005,
      "grad_norm": 0.1714610904455185,
      "learning_rate": 6.86386009765226e-06,
      "loss": 0.0142,
      "step": 971900
    },
    {
      "epoch": 1.5905683967976538,
      "grad_norm": 0.20132935047149658,
      "learning_rate": 6.863794205438744e-06,
      "loss": 0.0248,
      "step": 971920
    },
    {
      "epoch": 1.5906011272363072,
      "grad_norm": 1.3983696699142456,
      "learning_rate": 6.863728313225227e-06,
      "loss": 0.0248,
      "step": 971940
    },
    {
      "epoch": 1.5906338576749606,
      "grad_norm": 0.3710348904132843,
      "learning_rate": 6.863662421011709e-06,
      "loss": 0.023,
      "step": 971960
    },
    {
      "epoch": 1.590666588113614,
      "grad_norm": 1.1407554149627686,
      "learning_rate": 6.863596528798192e-06,
      "loss": 0.0197,
      "step": 971980
    },
    {
      "epoch": 1.590699318552267,
      "grad_norm": 0.9918590188026428,
      "learning_rate": 6.863530636584676e-06,
      "loss": 0.0278,
      "step": 972000
    },
    {
      "epoch": 1.5907320489909207,
      "grad_norm": 0.9261815547943115,
      "learning_rate": 6.863464744371158e-06,
      "loss": 0.0186,
      "step": 972020
    },
    {
      "epoch": 1.5907647794295738,
      "grad_norm": 0.7442924380302429,
      "learning_rate": 6.863398852157641e-06,
      "loss": 0.0136,
      "step": 972040
    },
    {
      "epoch": 1.5907975098682272,
      "grad_norm": 0.5822290182113647,
      "learning_rate": 6.863332959944125e-06,
      "loss": 0.0221,
      "step": 972060
    },
    {
      "epoch": 1.5908302403068806,
      "grad_norm": 0.5929822325706482,
      "learning_rate": 6.863267067730607e-06,
      "loss": 0.017,
      "step": 972080
    },
    {
      "epoch": 1.590862970745534,
      "grad_norm": 0.3975955843925476,
      "learning_rate": 6.86320117551709e-06,
      "loss": 0.0228,
      "step": 972100
    },
    {
      "epoch": 1.5908957011841873,
      "grad_norm": 2.442335844039917,
      "learning_rate": 6.863135283303572e-06,
      "loss": 0.0191,
      "step": 972120
    },
    {
      "epoch": 1.5909284316228405,
      "grad_norm": 0.21412606537342072,
      "learning_rate": 6.863069391090056e-06,
      "loss": 0.0148,
      "step": 972140
    },
    {
      "epoch": 1.590961162061494,
      "grad_norm": 1.1827493906021118,
      "learning_rate": 6.8630034988765385e-06,
      "loss": 0.0172,
      "step": 972160
    },
    {
      "epoch": 1.5909938925001472,
      "grad_norm": 0.24788425862789154,
      "learning_rate": 6.862937606663021e-06,
      "loss": 0.0245,
      "step": 972180
    },
    {
      "epoch": 1.5910266229388006,
      "grad_norm": 2.6366894245147705,
      "learning_rate": 6.862871714449504e-06,
      "loss": 0.0174,
      "step": 972200
    },
    {
      "epoch": 1.591059353377454,
      "grad_norm": 0.774048924446106,
      "learning_rate": 6.8628058222359875e-06,
      "loss": 0.019,
      "step": 972220
    },
    {
      "epoch": 1.5910920838161073,
      "grad_norm": 1.0434404611587524,
      "learning_rate": 6.862739930022469e-06,
      "loss": 0.0184,
      "step": 972240
    },
    {
      "epoch": 1.5911248142547607,
      "grad_norm": 0.33872514963150024,
      "learning_rate": 6.862674037808953e-06,
      "loss": 0.0131,
      "step": 972260
    },
    {
      "epoch": 1.5911575446934139,
      "grad_norm": 0.1913515031337738,
      "learning_rate": 6.862608145595435e-06,
      "loss": 0.0227,
      "step": 972280
    },
    {
      "epoch": 1.5911902751320675,
      "grad_norm": 0.9046705961227417,
      "learning_rate": 6.8625422533819185e-06,
      "loss": 0.021,
      "step": 972300
    },
    {
      "epoch": 1.5912230055707206,
      "grad_norm": 0.5928112268447876,
      "learning_rate": 6.862476361168401e-06,
      "loss": 0.0219,
      "step": 972320
    },
    {
      "epoch": 1.591255736009374,
      "grad_norm": 0.8653550744056702,
      "learning_rate": 6.862410468954884e-06,
      "loss": 0.0229,
      "step": 972340
    },
    {
      "epoch": 1.5912884664480273,
      "grad_norm": 0.5338968634605408,
      "learning_rate": 6.862344576741367e-06,
      "loss": 0.0282,
      "step": 972360
    },
    {
      "epoch": 1.5913211968866807,
      "grad_norm": 0.824467122554779,
      "learning_rate": 6.86227868452785e-06,
      "loss": 0.0136,
      "step": 972380
    },
    {
      "epoch": 1.591353927325334,
      "grad_norm": 1.8853312730789185,
      "learning_rate": 6.862212792314332e-06,
      "loss": 0.0184,
      "step": 972400
    },
    {
      "epoch": 1.5913866577639872,
      "grad_norm": 0.6915768384933472,
      "learning_rate": 6.862146900100816e-06,
      "loss": 0.0179,
      "step": 972420
    },
    {
      "epoch": 1.5914193882026408,
      "grad_norm": 0.29557228088378906,
      "learning_rate": 6.862081007887299e-06,
      "loss": 0.0188,
      "step": 972440
    },
    {
      "epoch": 1.591452118641294,
      "grad_norm": 0.38179612159729004,
      "learning_rate": 6.862015115673781e-06,
      "loss": 0.0266,
      "step": 972460
    },
    {
      "epoch": 1.5914848490799474,
      "grad_norm": 0.20880095660686493,
      "learning_rate": 6.861949223460265e-06,
      "loss": 0.0215,
      "step": 972480
    },
    {
      "epoch": 1.5915175795186007,
      "grad_norm": 0.6790481209754944,
      "learning_rate": 6.861883331246747e-06,
      "loss": 0.0207,
      "step": 972500
    },
    {
      "epoch": 1.5915503099572539,
      "grad_norm": 0.7346012592315674,
      "learning_rate": 6.86181743903323e-06,
      "loss": 0.024,
      "step": 972520
    },
    {
      "epoch": 1.5915830403959075,
      "grad_norm": 0.6212902665138245,
      "learning_rate": 6.861751546819713e-06,
      "loss": 0.0205,
      "step": 972540
    },
    {
      "epoch": 1.5916157708345606,
      "grad_norm": 0.6101624965667725,
      "learning_rate": 6.861685654606196e-06,
      "loss": 0.0177,
      "step": 972560
    },
    {
      "epoch": 1.5916485012732142,
      "grad_norm": 0.46695607900619507,
      "learning_rate": 6.8616197623926785e-06,
      "loss": 0.0189,
      "step": 972580
    },
    {
      "epoch": 1.5916812317118674,
      "grad_norm": 0.28517186641693115,
      "learning_rate": 6.861553870179162e-06,
      "loss": 0.0139,
      "step": 972600
    },
    {
      "epoch": 1.5917139621505207,
      "grad_norm": 0.25529828667640686,
      "learning_rate": 6.861487977965644e-06,
      "loss": 0.0197,
      "step": 972620
    },
    {
      "epoch": 1.5917466925891741,
      "grad_norm": 0.15066590905189514,
      "learning_rate": 6.861422085752128e-06,
      "loss": 0.026,
      "step": 972640
    },
    {
      "epoch": 1.5917794230278273,
      "grad_norm": 0.32982996106147766,
      "learning_rate": 6.8613561935386095e-06,
      "loss": 0.0207,
      "step": 972660
    },
    {
      "epoch": 1.5918121534664809,
      "grad_norm": 1.1447367668151855,
      "learning_rate": 6.861290301325093e-06,
      "loss": 0.015,
      "step": 972680
    },
    {
      "epoch": 1.591844883905134,
      "grad_norm": 0.44537776708602905,
      "learning_rate": 6.861224409111575e-06,
      "loss": 0.0192,
      "step": 972700
    },
    {
      "epoch": 1.5918776143437874,
      "grad_norm": 0.9482408761978149,
      "learning_rate": 6.8611585168980586e-06,
      "loss": 0.0208,
      "step": 972720
    },
    {
      "epoch": 1.5919103447824408,
      "grad_norm": 0.3541840612888336,
      "learning_rate": 6.861092624684541e-06,
      "loss": 0.02,
      "step": 972740
    },
    {
      "epoch": 1.5919430752210941,
      "grad_norm": 0.287815660238266,
      "learning_rate": 6.861026732471025e-06,
      "loss": 0.0195,
      "step": 972760
    },
    {
      "epoch": 1.5919758056597475,
      "grad_norm": 0.992084264755249,
      "learning_rate": 6.860960840257508e-06,
      "loss": 0.0416,
      "step": 972780
    },
    {
      "epoch": 1.5920085360984007,
      "grad_norm": 0.48459526896476746,
      "learning_rate": 6.86089494804399e-06,
      "loss": 0.0155,
      "step": 972800
    },
    {
      "epoch": 1.5920412665370542,
      "grad_norm": 0.9748861789703369,
      "learning_rate": 6.860829055830474e-06,
      "loss": 0.03,
      "step": 972820
    },
    {
      "epoch": 1.5920739969757074,
      "grad_norm": 0.5660353899002075,
      "learning_rate": 6.860763163616956e-06,
      "loss": 0.0232,
      "step": 972840
    },
    {
      "epoch": 1.5921067274143608,
      "grad_norm": 0.8510521650314331,
      "learning_rate": 6.8606972714034394e-06,
      "loss": 0.0184,
      "step": 972860
    },
    {
      "epoch": 1.5921394578530141,
      "grad_norm": 0.6754724979400635,
      "learning_rate": 6.860631379189921e-06,
      "loss": 0.0155,
      "step": 972880
    },
    {
      "epoch": 1.5921721882916675,
      "grad_norm": 0.5912632942199707,
      "learning_rate": 6.860565486976405e-06,
      "loss": 0.0223,
      "step": 972900
    },
    {
      "epoch": 1.5922049187303209,
      "grad_norm": 0.6628013849258423,
      "learning_rate": 6.860499594762887e-06,
      "loss": 0.0158,
      "step": 972920
    },
    {
      "epoch": 1.592237649168974,
      "grad_norm": 0.2326570302248001,
      "learning_rate": 6.86043370254937e-06,
      "loss": 0.0215,
      "step": 972940
    },
    {
      "epoch": 1.5922703796076276,
      "grad_norm": 0.38892683386802673,
      "learning_rate": 6.860367810335853e-06,
      "loss": 0.0201,
      "step": 972960
    },
    {
      "epoch": 1.5923031100462808,
      "grad_norm": 1.5601028203964233,
      "learning_rate": 6.860301918122336e-06,
      "loss": 0.0258,
      "step": 972980
    },
    {
      "epoch": 1.5923358404849342,
      "grad_norm": 1.1064130067825317,
      "learning_rate": 6.860236025908819e-06,
      "loss": 0.0271,
      "step": 973000
    },
    {
      "epoch": 1.5923685709235875,
      "grad_norm": 0.22287829220294952,
      "learning_rate": 6.860170133695302e-06,
      "loss": 0.0166,
      "step": 973020
    },
    {
      "epoch": 1.592401301362241,
      "grad_norm": 0.6969154477119446,
      "learning_rate": 6.860104241481784e-06,
      "loss": 0.0242,
      "step": 973040
    },
    {
      "epoch": 1.5924340318008943,
      "grad_norm": 0.9592799544334412,
      "learning_rate": 6.860038349268268e-06,
      "loss": 0.0131,
      "step": 973060
    },
    {
      "epoch": 1.5924667622395474,
      "grad_norm": 0.6066054105758667,
      "learning_rate": 6.8599724570547496e-06,
      "loss": 0.0138,
      "step": 973080
    },
    {
      "epoch": 1.592499492678201,
      "grad_norm": 0.3434053361415863,
      "learning_rate": 6.859906564841233e-06,
      "loss": 0.0183,
      "step": 973100
    },
    {
      "epoch": 1.5925322231168542,
      "grad_norm": 0.2719613015651703,
      "learning_rate": 6.859840672627717e-06,
      "loss": 0.0149,
      "step": 973120
    },
    {
      "epoch": 1.5925649535555075,
      "grad_norm": 0.6817736029624939,
      "learning_rate": 6.859774780414199e-06,
      "loss": 0.0257,
      "step": 973140
    },
    {
      "epoch": 1.592597683994161,
      "grad_norm": 0.1729072630405426,
      "learning_rate": 6.859708888200682e-06,
      "loss": 0.0233,
      "step": 973160
    },
    {
      "epoch": 1.5926304144328143,
      "grad_norm": 0.16827034950256348,
      "learning_rate": 6.859642995987165e-06,
      "loss": 0.022,
      "step": 973180
    },
    {
      "epoch": 1.5926631448714677,
      "grad_norm": 0.42083340883255005,
      "learning_rate": 6.859577103773648e-06,
      "loss": 0.0177,
      "step": 973200
    },
    {
      "epoch": 1.5926958753101208,
      "grad_norm": 0.5514223575592041,
      "learning_rate": 6.8595112115601304e-06,
      "loss": 0.0196,
      "step": 973220
    },
    {
      "epoch": 1.5927286057487744,
      "grad_norm": 0.19982680678367615,
      "learning_rate": 6.859445319346614e-06,
      "loss": 0.0179,
      "step": 973240
    },
    {
      "epoch": 1.5927613361874275,
      "grad_norm": 0.5000705122947693,
      "learning_rate": 6.859379427133096e-06,
      "loss": 0.015,
      "step": 973260
    },
    {
      "epoch": 1.592794066626081,
      "grad_norm": 1.273105263710022,
      "learning_rate": 6.8593135349195795e-06,
      "loss": 0.0197,
      "step": 973280
    },
    {
      "epoch": 1.5928267970647343,
      "grad_norm": 1.441514492034912,
      "learning_rate": 6.859247642706061e-06,
      "loss": 0.0171,
      "step": 973300
    },
    {
      "epoch": 1.5928595275033874,
      "grad_norm": 0.6201706528663635,
      "learning_rate": 6.859181750492545e-06,
      "loss": 0.017,
      "step": 973320
    },
    {
      "epoch": 1.592892257942041,
      "grad_norm": 0.6366904377937317,
      "learning_rate": 6.859115858279028e-06,
      "loss": 0.0198,
      "step": 973340
    },
    {
      "epoch": 1.5929249883806942,
      "grad_norm": 0.36272281408309937,
      "learning_rate": 6.8590499660655105e-06,
      "loss": 0.0164,
      "step": 973360
    },
    {
      "epoch": 1.5929577188193478,
      "grad_norm": 0.14675168693065643,
      "learning_rate": 6.858984073851993e-06,
      "loss": 0.0228,
      "step": 973380
    },
    {
      "epoch": 1.592990449258001,
      "grad_norm": 0.2382989525794983,
      "learning_rate": 6.858918181638477e-06,
      "loss": 0.0305,
      "step": 973400
    },
    {
      "epoch": 1.5930231796966543,
      "grad_norm": 0.3911312520503998,
      "learning_rate": 6.858852289424959e-06,
      "loss": 0.0167,
      "step": 973420
    },
    {
      "epoch": 1.5930559101353077,
      "grad_norm": 0.2375953495502472,
      "learning_rate": 6.858786397211442e-06,
      "loss": 0.0154,
      "step": 973440
    },
    {
      "epoch": 1.5930886405739608,
      "grad_norm": 0.3475533127784729,
      "learning_rate": 6.858720504997924e-06,
      "loss": 0.0171,
      "step": 973460
    },
    {
      "epoch": 1.5931213710126144,
      "grad_norm": 0.3306879699230194,
      "learning_rate": 6.858654612784408e-06,
      "loss": 0.0148,
      "step": 973480
    },
    {
      "epoch": 1.5931541014512676,
      "grad_norm": 0.7968406677246094,
      "learning_rate": 6.858588720570891e-06,
      "loss": 0.0299,
      "step": 973500
    },
    {
      "epoch": 1.593186831889921,
      "grad_norm": 1.108873963356018,
      "learning_rate": 6.858522828357373e-06,
      "loss": 0.0298,
      "step": 973520
    },
    {
      "epoch": 1.5932195623285743,
      "grad_norm": 0.2956618070602417,
      "learning_rate": 6.858456936143857e-06,
      "loss": 0.0203,
      "step": 973540
    },
    {
      "epoch": 1.5932522927672277,
      "grad_norm": 0.6082636117935181,
      "learning_rate": 6.8583910439303396e-06,
      "loss": 0.0218,
      "step": 973560
    },
    {
      "epoch": 1.593285023205881,
      "grad_norm": 0.7705690264701843,
      "learning_rate": 6.858325151716822e-06,
      "loss": 0.0258,
      "step": 973580
    },
    {
      "epoch": 1.5933177536445342,
      "grad_norm": 0.4976780414581299,
      "learning_rate": 6.858259259503305e-06,
      "loss": 0.0167,
      "step": 973600
    },
    {
      "epoch": 1.5933504840831878,
      "grad_norm": 0.6630891561508179,
      "learning_rate": 6.858193367289789e-06,
      "loss": 0.0238,
      "step": 973620
    },
    {
      "epoch": 1.593383214521841,
      "grad_norm": 0.5783860683441162,
      "learning_rate": 6.8581274750762705e-06,
      "loss": 0.0208,
      "step": 973640
    },
    {
      "epoch": 1.5934159449604943,
      "grad_norm": 1.141089677810669,
      "learning_rate": 6.858061582862754e-06,
      "loss": 0.0206,
      "step": 973660
    },
    {
      "epoch": 1.5934486753991477,
      "grad_norm": 0.5815860629081726,
      "learning_rate": 6.857995690649236e-06,
      "loss": 0.0228,
      "step": 973680
    },
    {
      "epoch": 1.593481405837801,
      "grad_norm": 0.7437410950660706,
      "learning_rate": 6.85792979843572e-06,
      "loss": 0.0204,
      "step": 973700
    },
    {
      "epoch": 1.5935141362764544,
      "grad_norm": 1.0703667402267456,
      "learning_rate": 6.8578639062222015e-06,
      "loss": 0.0231,
      "step": 973720
    },
    {
      "epoch": 1.5935468667151076,
      "grad_norm": 0.4102973937988281,
      "learning_rate": 6.857798014008685e-06,
      "loss": 0.0179,
      "step": 973740
    },
    {
      "epoch": 1.5935795971537612,
      "grad_norm": 0.2010185271501541,
      "learning_rate": 6.857732121795168e-06,
      "loss": 0.0211,
      "step": 973760
    },
    {
      "epoch": 1.5936123275924143,
      "grad_norm": 0.5053187608718872,
      "learning_rate": 6.8576662295816505e-06,
      "loss": 0.0213,
      "step": 973780
    },
    {
      "epoch": 1.5936450580310677,
      "grad_norm": 0.4536799490451813,
      "learning_rate": 6.857600337368133e-06,
      "loss": 0.0245,
      "step": 973800
    },
    {
      "epoch": 1.593677788469721,
      "grad_norm": 1.0376750230789185,
      "learning_rate": 6.857534445154617e-06,
      "loss": 0.0204,
      "step": 973820
    },
    {
      "epoch": 1.5937105189083745,
      "grad_norm": 0.8878010511398315,
      "learning_rate": 6.8574685529411005e-06,
      "loss": 0.0136,
      "step": 973840
    },
    {
      "epoch": 1.5937432493470278,
      "grad_norm": 1.5542094707489014,
      "learning_rate": 6.857402660727582e-06,
      "loss": 0.0342,
      "step": 973860
    },
    {
      "epoch": 1.593775979785681,
      "grad_norm": 0.4897523820400238,
      "learning_rate": 6.857336768514066e-06,
      "loss": 0.0247,
      "step": 973880
    },
    {
      "epoch": 1.5938087102243346,
      "grad_norm": 0.2003922164440155,
      "learning_rate": 6.857270876300548e-06,
      "loss": 0.0174,
      "step": 973900
    },
    {
      "epoch": 1.5938414406629877,
      "grad_norm": 0.4052717089653015,
      "learning_rate": 6.857204984087031e-06,
      "loss": 0.0177,
      "step": 973920
    },
    {
      "epoch": 1.593874171101641,
      "grad_norm": 0.4906657934188843,
      "learning_rate": 6.857139091873513e-06,
      "loss": 0.0136,
      "step": 973940
    },
    {
      "epoch": 1.5939069015402945,
      "grad_norm": 0.12507852911949158,
      "learning_rate": 6.857073199659997e-06,
      "loss": 0.0163,
      "step": 973960
    },
    {
      "epoch": 1.5939396319789478,
      "grad_norm": 0.2326512485742569,
      "learning_rate": 6.85700730744648e-06,
      "loss": 0.0255,
      "step": 973980
    },
    {
      "epoch": 1.5939723624176012,
      "grad_norm": 0.8266363143920898,
      "learning_rate": 6.856941415232962e-06,
      "loss": 0.0276,
      "step": 974000
    },
    {
      "epoch": 1.5940050928562544,
      "grad_norm": 0.5968429446220398,
      "learning_rate": 6.856875523019445e-06,
      "loss": 0.0181,
      "step": 974020
    },
    {
      "epoch": 1.594037823294908,
      "grad_norm": 0.9642706513404846,
      "learning_rate": 6.856809630805929e-06,
      "loss": 0.0259,
      "step": 974040
    },
    {
      "epoch": 1.594070553733561,
      "grad_norm": 0.3974977135658264,
      "learning_rate": 6.856743738592411e-06,
      "loss": 0.0195,
      "step": 974060
    },
    {
      "epoch": 1.5941032841722145,
      "grad_norm": 0.5743908286094666,
      "learning_rate": 6.856677846378894e-06,
      "loss": 0.0099,
      "step": 974080
    },
    {
      "epoch": 1.5941360146108678,
      "grad_norm": 0.4331103265285492,
      "learning_rate": 6.856611954165376e-06,
      "loss": 0.0169,
      "step": 974100
    },
    {
      "epoch": 1.594168745049521,
      "grad_norm": 0.4032248258590698,
      "learning_rate": 6.85654606195186e-06,
      "loss": 0.0134,
      "step": 974120
    },
    {
      "epoch": 1.5942014754881746,
      "grad_norm": 0.26433873176574707,
      "learning_rate": 6.856480169738342e-06,
      "loss": 0.0176,
      "step": 974140
    },
    {
      "epoch": 1.5942342059268277,
      "grad_norm": 0.1517353653907776,
      "learning_rate": 6.856414277524825e-06,
      "loss": 0.0214,
      "step": 974160
    },
    {
      "epoch": 1.5942669363654813,
      "grad_norm": 0.31089895963668823,
      "learning_rate": 6.856348385311309e-06,
      "loss": 0.0103,
      "step": 974180
    },
    {
      "epoch": 1.5942996668041345,
      "grad_norm": 0.6392219066619873,
      "learning_rate": 6.8562824930977915e-06,
      "loss": 0.02,
      "step": 974200
    },
    {
      "epoch": 1.5943323972427879,
      "grad_norm": 0.17028102278709412,
      "learning_rate": 6.856216600884274e-06,
      "loss": 0.0169,
      "step": 974220
    },
    {
      "epoch": 1.5943651276814412,
      "grad_norm": 0.24616414308547974,
      "learning_rate": 6.856150708670757e-06,
      "loss": 0.0147,
      "step": 974240
    },
    {
      "epoch": 1.5943978581200944,
      "grad_norm": 0.98316890001297,
      "learning_rate": 6.8560848164572405e-06,
      "loss": 0.0205,
      "step": 974260
    },
    {
      "epoch": 1.594430588558748,
      "grad_norm": 0.4733768403530121,
      "learning_rate": 6.856018924243722e-06,
      "loss": 0.0236,
      "step": 974280
    },
    {
      "epoch": 1.5944633189974011,
      "grad_norm": 0.1348547786474228,
      "learning_rate": 6.855953032030206e-06,
      "loss": 0.0194,
      "step": 974300
    },
    {
      "epoch": 1.5944960494360545,
      "grad_norm": 0.9316121935844421,
      "learning_rate": 6.855887139816688e-06,
      "loss": 0.0258,
      "step": 974320
    },
    {
      "epoch": 1.5945287798747079,
      "grad_norm": 0.2851807177066803,
      "learning_rate": 6.8558212476031715e-06,
      "loss": 0.0324,
      "step": 974340
    },
    {
      "epoch": 1.5945615103133612,
      "grad_norm": 0.6205829977989197,
      "learning_rate": 6.855755355389654e-06,
      "loss": 0.0233,
      "step": 974360
    },
    {
      "epoch": 1.5945942407520146,
      "grad_norm": 0.22023236751556396,
      "learning_rate": 6.855689463176137e-06,
      "loss": 0.0236,
      "step": 974380
    },
    {
      "epoch": 1.5946269711906678,
      "grad_norm": 0.3393203616142273,
      "learning_rate": 6.85562357096262e-06,
      "loss": 0.0217,
      "step": 974400
    },
    {
      "epoch": 1.5946597016293214,
      "grad_norm": 0.2366640418767929,
      "learning_rate": 6.855557678749103e-06,
      "loss": 0.0191,
      "step": 974420
    },
    {
      "epoch": 1.5946924320679745,
      "grad_norm": 0.6467134356498718,
      "learning_rate": 6.855491786535585e-06,
      "loss": 0.0164,
      "step": 974440
    },
    {
      "epoch": 1.5947251625066279,
      "grad_norm": 0.36493220925331116,
      "learning_rate": 6.855425894322069e-06,
      "loss": 0.0141,
      "step": 974460
    },
    {
      "epoch": 1.5947578929452813,
      "grad_norm": 0.07565946877002716,
      "learning_rate": 6.855360002108551e-06,
      "loss": 0.0157,
      "step": 974480
    },
    {
      "epoch": 1.5947906233839346,
      "grad_norm": 0.5921943783760071,
      "learning_rate": 6.855294109895034e-06,
      "loss": 0.0207,
      "step": 974500
    },
    {
      "epoch": 1.594823353822588,
      "grad_norm": 0.38330286741256714,
      "learning_rate": 6.855228217681517e-06,
      "loss": 0.0247,
      "step": 974520
    },
    {
      "epoch": 1.5948560842612411,
      "grad_norm": 0.8823025822639465,
      "learning_rate": 6.855162325468e-06,
      "loss": 0.0177,
      "step": 974540
    },
    {
      "epoch": 1.5948888146998947,
      "grad_norm": 0.4814136326313019,
      "learning_rate": 6.855096433254483e-06,
      "loss": 0.0295,
      "step": 974560
    },
    {
      "epoch": 1.594921545138548,
      "grad_norm": 0.6708694696426392,
      "learning_rate": 6.855030541040966e-06,
      "loss": 0.0231,
      "step": 974580
    },
    {
      "epoch": 1.5949542755772013,
      "grad_norm": 0.5255374908447266,
      "learning_rate": 6.854964648827449e-06,
      "loss": 0.0127,
      "step": 974600
    },
    {
      "epoch": 1.5949870060158546,
      "grad_norm": 0.4746887981891632,
      "learning_rate": 6.8548987566139315e-06,
      "loss": 0.02,
      "step": 974620
    },
    {
      "epoch": 1.595019736454508,
      "grad_norm": 0.40444445610046387,
      "learning_rate": 6.854832864400415e-06,
      "loss": 0.0179,
      "step": 974640
    },
    {
      "epoch": 1.5950524668931614,
      "grad_norm": 0.956046462059021,
      "learning_rate": 6.854766972186897e-06,
      "loss": 0.0209,
      "step": 974660
    },
    {
      "epoch": 1.5950851973318145,
      "grad_norm": 0.9773014187812805,
      "learning_rate": 6.854701079973381e-06,
      "loss": 0.0205,
      "step": 974680
    },
    {
      "epoch": 1.5951179277704681,
      "grad_norm": 0.6356708407402039,
      "learning_rate": 6.8546351877598625e-06,
      "loss": 0.0213,
      "step": 974700
    },
    {
      "epoch": 1.5951506582091213,
      "grad_norm": 0.4025730788707733,
      "learning_rate": 6.854569295546346e-06,
      "loss": 0.022,
      "step": 974720
    },
    {
      "epoch": 1.5951833886477746,
      "grad_norm": 0.3922257721424103,
      "learning_rate": 6.854503403332828e-06,
      "loss": 0.0207,
      "step": 974740
    },
    {
      "epoch": 1.595216119086428,
      "grad_norm": 0.12699167430400848,
      "learning_rate": 6.8544375111193116e-06,
      "loss": 0.0133,
      "step": 974760
    },
    {
      "epoch": 1.5952488495250812,
      "grad_norm": 0.22377026081085205,
      "learning_rate": 6.854371618905794e-06,
      "loss": 0.0092,
      "step": 974780
    },
    {
      "epoch": 1.5952815799637348,
      "grad_norm": 0.14310769736766815,
      "learning_rate": 6.854305726692277e-06,
      "loss": 0.0158,
      "step": 974800
    },
    {
      "epoch": 1.595314310402388,
      "grad_norm": 0.5123357176780701,
      "learning_rate": 6.85423983447876e-06,
      "loss": 0.0207,
      "step": 974820
    },
    {
      "epoch": 1.5953470408410415,
      "grad_norm": 0.15095211565494537,
      "learning_rate": 6.854173942265243e-06,
      "loss": 0.02,
      "step": 974840
    },
    {
      "epoch": 1.5953797712796947,
      "grad_norm": 0.7184986472129822,
      "learning_rate": 6.854108050051725e-06,
      "loss": 0.0265,
      "step": 974860
    },
    {
      "epoch": 1.595412501718348,
      "grad_norm": 0.30261364579200745,
      "learning_rate": 6.854042157838209e-06,
      "loss": 0.0159,
      "step": 974880
    },
    {
      "epoch": 1.5954452321570014,
      "grad_norm": 1.1447181701660156,
      "learning_rate": 6.8539762656246924e-06,
      "loss": 0.0282,
      "step": 974900
    },
    {
      "epoch": 1.5954779625956546,
      "grad_norm": 1.8743045330047607,
      "learning_rate": 6.853910373411174e-06,
      "loss": 0.0203,
      "step": 974920
    },
    {
      "epoch": 1.5955106930343081,
      "grad_norm": 0.25166207551956177,
      "learning_rate": 6.853844481197658e-06,
      "loss": 0.0204,
      "step": 974940
    },
    {
      "epoch": 1.5955434234729613,
      "grad_norm": 0.30215996503829956,
      "learning_rate": 6.85377858898414e-06,
      "loss": 0.0136,
      "step": 974960
    },
    {
      "epoch": 1.5955761539116147,
      "grad_norm": 0.29529905319213867,
      "learning_rate": 6.853712696770623e-06,
      "loss": 0.0168,
      "step": 974980
    },
    {
      "epoch": 1.595608884350268,
      "grad_norm": 0.3855925500392914,
      "learning_rate": 6.853646804557106e-06,
      "loss": 0.0225,
      "step": 975000
    },
    {
      "epoch": 1.5956416147889214,
      "grad_norm": 0.08024758845567703,
      "learning_rate": 6.853580912343589e-06,
      "loss": 0.0273,
      "step": 975020
    },
    {
      "epoch": 1.5956743452275748,
      "grad_norm": 0.35244372487068176,
      "learning_rate": 6.853515020130072e-06,
      "loss": 0.0141,
      "step": 975040
    },
    {
      "epoch": 1.595707075666228,
      "grad_norm": 0.6527783870697021,
      "learning_rate": 6.853449127916555e-06,
      "loss": 0.0202,
      "step": 975060
    },
    {
      "epoch": 1.5957398061048815,
      "grad_norm": 0.6077830791473389,
      "learning_rate": 6.853383235703037e-06,
      "loss": 0.0288,
      "step": 975080
    },
    {
      "epoch": 1.5957725365435347,
      "grad_norm": 1.0904810428619385,
      "learning_rate": 6.853317343489521e-06,
      "loss": 0.0272,
      "step": 975100
    },
    {
      "epoch": 1.595805266982188,
      "grad_norm": 0.4325646460056305,
      "learning_rate": 6.8532514512760026e-06,
      "loss": 0.0124,
      "step": 975120
    },
    {
      "epoch": 1.5958379974208414,
      "grad_norm": 1.652384877204895,
      "learning_rate": 6.853185559062486e-06,
      "loss": 0.0226,
      "step": 975140
    },
    {
      "epoch": 1.5958707278594948,
      "grad_norm": 0.7848466634750366,
      "learning_rate": 6.853119666848969e-06,
      "loss": 0.0205,
      "step": 975160
    },
    {
      "epoch": 1.5959034582981482,
      "grad_norm": 0.39895978569984436,
      "learning_rate": 6.853053774635452e-06,
      "loss": 0.0221,
      "step": 975180
    },
    {
      "epoch": 1.5959361887368013,
      "grad_norm": 0.8463345170021057,
      "learning_rate": 6.852987882421934e-06,
      "loss": 0.0161,
      "step": 975200
    },
    {
      "epoch": 1.595968919175455,
      "grad_norm": 0.19223880767822266,
      "learning_rate": 6.852921990208418e-06,
      "loss": 0.0205,
      "step": 975220
    },
    {
      "epoch": 1.596001649614108,
      "grad_norm": 0.825370728969574,
      "learning_rate": 6.852856097994901e-06,
      "loss": 0.0165,
      "step": 975240
    },
    {
      "epoch": 1.5960343800527614,
      "grad_norm": 0.6207689642906189,
      "learning_rate": 6.8527902057813834e-06,
      "loss": 0.0228,
      "step": 975260
    },
    {
      "epoch": 1.5960671104914148,
      "grad_norm": 0.35569727420806885,
      "learning_rate": 6.852724313567867e-06,
      "loss": 0.0197,
      "step": 975280
    },
    {
      "epoch": 1.5960998409300682,
      "grad_norm": 1.1591800451278687,
      "learning_rate": 6.852658421354349e-06,
      "loss": 0.0225,
      "step": 975300
    },
    {
      "epoch": 1.5961325713687216,
      "grad_norm": 0.7162136435508728,
      "learning_rate": 6.8525925291408325e-06,
      "loss": 0.016,
      "step": 975320
    },
    {
      "epoch": 1.5961653018073747,
      "grad_norm": 0.40716564655303955,
      "learning_rate": 6.852526636927314e-06,
      "loss": 0.0154,
      "step": 975340
    },
    {
      "epoch": 1.5961980322460283,
      "grad_norm": 0.45731648802757263,
      "learning_rate": 6.852460744713798e-06,
      "loss": 0.0233,
      "step": 975360
    },
    {
      "epoch": 1.5962307626846814,
      "grad_norm": 0.38271355628967285,
      "learning_rate": 6.852394852500281e-06,
      "loss": 0.0232,
      "step": 975380
    },
    {
      "epoch": 1.5962634931233348,
      "grad_norm": 1.2839006185531616,
      "learning_rate": 6.8523289602867635e-06,
      "loss": 0.0208,
      "step": 975400
    },
    {
      "epoch": 1.5962962235619882,
      "grad_norm": 0.1692565232515335,
      "learning_rate": 6.852263068073246e-06,
      "loss": 0.0235,
      "step": 975420
    },
    {
      "epoch": 1.5963289540006416,
      "grad_norm": 0.6514762043952942,
      "learning_rate": 6.85219717585973e-06,
      "loss": 0.0179,
      "step": 975440
    },
    {
      "epoch": 1.596361684439295,
      "grad_norm": 0.3135973811149597,
      "learning_rate": 6.852131283646212e-06,
      "loss": 0.0145,
      "step": 975460
    },
    {
      "epoch": 1.596394414877948,
      "grad_norm": 0.6602524518966675,
      "learning_rate": 6.852065391432695e-06,
      "loss": 0.0172,
      "step": 975480
    },
    {
      "epoch": 1.5964271453166017,
      "grad_norm": 0.8514083027839661,
      "learning_rate": 6.851999499219177e-06,
      "loss": 0.0127,
      "step": 975500
    },
    {
      "epoch": 1.5964598757552548,
      "grad_norm": 0.36134347319602966,
      "learning_rate": 6.851933607005661e-06,
      "loss": 0.0183,
      "step": 975520
    },
    {
      "epoch": 1.5964926061939082,
      "grad_norm": 0.32303687930107117,
      "learning_rate": 6.8518677147921435e-06,
      "loss": 0.0174,
      "step": 975540
    },
    {
      "epoch": 1.5965253366325616,
      "grad_norm": 0.7177971601486206,
      "learning_rate": 6.851801822578626e-06,
      "loss": 0.0215,
      "step": 975560
    },
    {
      "epoch": 1.5965580670712147,
      "grad_norm": 0.24645744264125824,
      "learning_rate": 6.85173593036511e-06,
      "loss": 0.0263,
      "step": 975580
    },
    {
      "epoch": 1.5965907975098683,
      "grad_norm": 0.27885469794273376,
      "learning_rate": 6.8516700381515926e-06,
      "loss": 0.0198,
      "step": 975600
    },
    {
      "epoch": 1.5966235279485215,
      "grad_norm": 0.684707522392273,
      "learning_rate": 6.851604145938075e-06,
      "loss": 0.0189,
      "step": 975620
    },
    {
      "epoch": 1.596656258387175,
      "grad_norm": 0.4576663374900818,
      "learning_rate": 6.851538253724558e-06,
      "loss": 0.0215,
      "step": 975640
    },
    {
      "epoch": 1.5966889888258282,
      "grad_norm": 0.9281805753707886,
      "learning_rate": 6.851472361511042e-06,
      "loss": 0.0144,
      "step": 975660
    },
    {
      "epoch": 1.5967217192644816,
      "grad_norm": 0.08596912026405334,
      "learning_rate": 6.8514064692975235e-06,
      "loss": 0.0142,
      "step": 975680
    },
    {
      "epoch": 1.596754449703135,
      "grad_norm": 0.834995687007904,
      "learning_rate": 6.851340577084007e-06,
      "loss": 0.0154,
      "step": 975700
    },
    {
      "epoch": 1.596787180141788,
      "grad_norm": 0.34993696212768555,
      "learning_rate": 6.851274684870489e-06,
      "loss": 0.0199,
      "step": 975720
    },
    {
      "epoch": 1.5968199105804417,
      "grad_norm": 0.3490043878555298,
      "learning_rate": 6.851208792656973e-06,
      "loss": 0.0174,
      "step": 975740
    },
    {
      "epoch": 1.5968526410190949,
      "grad_norm": 0.5968341827392578,
      "learning_rate": 6.8511429004434545e-06,
      "loss": 0.0151,
      "step": 975760
    },
    {
      "epoch": 1.5968853714577482,
      "grad_norm": 0.24381625652313232,
      "learning_rate": 6.851077008229938e-06,
      "loss": 0.019,
      "step": 975780
    },
    {
      "epoch": 1.5969181018964016,
      "grad_norm": 0.7366082668304443,
      "learning_rate": 6.851011116016421e-06,
      "loss": 0.0258,
      "step": 975800
    },
    {
      "epoch": 1.596950832335055,
      "grad_norm": 0.5223879218101501,
      "learning_rate": 6.8509452238029035e-06,
      "loss": 0.0268,
      "step": 975820
    },
    {
      "epoch": 1.5969835627737083,
      "grad_norm": 0.5178454518318176,
      "learning_rate": 6.850879331589386e-06,
      "loss": 0.0203,
      "step": 975840
    },
    {
      "epoch": 1.5970162932123615,
      "grad_norm": 0.30407610535621643,
      "learning_rate": 6.85081343937587e-06,
      "loss": 0.0178,
      "step": 975860
    },
    {
      "epoch": 1.597049023651015,
      "grad_norm": 0.7095211744308472,
      "learning_rate": 6.850747547162352e-06,
      "loss": 0.0291,
      "step": 975880
    },
    {
      "epoch": 1.5970817540896682,
      "grad_norm": 0.8940027356147766,
      "learning_rate": 6.850681654948835e-06,
      "loss": 0.0197,
      "step": 975900
    },
    {
      "epoch": 1.5971144845283216,
      "grad_norm": 0.23548045754432678,
      "learning_rate": 6.850615762735317e-06,
      "loss": 0.023,
      "step": 975920
    },
    {
      "epoch": 1.597147214966975,
      "grad_norm": 0.31321436166763306,
      "learning_rate": 6.850549870521801e-06,
      "loss": 0.0131,
      "step": 975940
    },
    {
      "epoch": 1.5971799454056284,
      "grad_norm": 0.19092251360416412,
      "learning_rate": 6.850483978308284e-06,
      "loss": 0.018,
      "step": 975960
    },
    {
      "epoch": 1.5972126758442817,
      "grad_norm": 0.2882227897644043,
      "learning_rate": 6.850418086094766e-06,
      "loss": 0.0236,
      "step": 975980
    },
    {
      "epoch": 1.5972454062829349,
      "grad_norm": 0.4661184251308441,
      "learning_rate": 6.85035219388125e-06,
      "loss": 0.0181,
      "step": 976000
    },
    {
      "epoch": 1.5972781367215885,
      "grad_norm": 0.45065903663635254,
      "learning_rate": 6.850286301667733e-06,
      "loss": 0.0162,
      "step": 976020
    },
    {
      "epoch": 1.5973108671602416,
      "grad_norm": 1.0076806545257568,
      "learning_rate": 6.850220409454215e-06,
      "loss": 0.0161,
      "step": 976040
    },
    {
      "epoch": 1.597343597598895,
      "grad_norm": 0.6320891380310059,
      "learning_rate": 6.850154517240698e-06,
      "loss": 0.0274,
      "step": 976060
    },
    {
      "epoch": 1.5973763280375484,
      "grad_norm": 0.34549087285995483,
      "learning_rate": 6.850088625027182e-06,
      "loss": 0.0181,
      "step": 976080
    },
    {
      "epoch": 1.5974090584762017,
      "grad_norm": 0.3653773367404938,
      "learning_rate": 6.850022732813664e-06,
      "loss": 0.0144,
      "step": 976100
    },
    {
      "epoch": 1.597441788914855,
      "grad_norm": 2.386885404586792,
      "learning_rate": 6.849956840600147e-06,
      "loss": 0.0183,
      "step": 976120
    },
    {
      "epoch": 1.5974745193535083,
      "grad_norm": 0.3988100588321686,
      "learning_rate": 6.849890948386629e-06,
      "loss": 0.0239,
      "step": 976140
    },
    {
      "epoch": 1.5975072497921619,
      "grad_norm": 1.4033628702163696,
      "learning_rate": 6.849825056173113e-06,
      "loss": 0.022,
      "step": 976160
    },
    {
      "epoch": 1.597539980230815,
      "grad_norm": 0.47482335567474365,
      "learning_rate": 6.849759163959595e-06,
      "loss": 0.0166,
      "step": 976180
    },
    {
      "epoch": 1.5975727106694684,
      "grad_norm": 0.35838156938552856,
      "learning_rate": 6.849693271746078e-06,
      "loss": 0.0207,
      "step": 976200
    },
    {
      "epoch": 1.5976054411081217,
      "grad_norm": 0.3466579020023346,
      "learning_rate": 6.849627379532561e-06,
      "loss": 0.0141,
      "step": 976220
    },
    {
      "epoch": 1.5976381715467751,
      "grad_norm": 1.2931499481201172,
      "learning_rate": 6.8495614873190445e-06,
      "loss": 0.0184,
      "step": 976240
    },
    {
      "epoch": 1.5976709019854285,
      "grad_norm": 0.6423981189727783,
      "learning_rate": 6.849495595105526e-06,
      "loss": 0.0242,
      "step": 976260
    },
    {
      "epoch": 1.5977036324240816,
      "grad_norm": 0.7980770468711853,
      "learning_rate": 6.84942970289201e-06,
      "loss": 0.0233,
      "step": 976280
    },
    {
      "epoch": 1.5977363628627352,
      "grad_norm": 0.527465283870697,
      "learning_rate": 6.8493638106784935e-06,
      "loss": 0.0232,
      "step": 976300
    },
    {
      "epoch": 1.5977690933013884,
      "grad_norm": 1.14207124710083,
      "learning_rate": 6.8492979184649754e-06,
      "loss": 0.0207,
      "step": 976320
    },
    {
      "epoch": 1.5978018237400418,
      "grad_norm": 0.9136250615119934,
      "learning_rate": 6.849232026251459e-06,
      "loss": 0.0261,
      "step": 976340
    },
    {
      "epoch": 1.5978345541786951,
      "grad_norm": 1.3565430641174316,
      "learning_rate": 6.849166134037941e-06,
      "loss": 0.0218,
      "step": 976360
    },
    {
      "epoch": 1.5978672846173483,
      "grad_norm": 0.2199510633945465,
      "learning_rate": 6.8491002418244245e-06,
      "loss": 0.0175,
      "step": 976380
    },
    {
      "epoch": 1.5979000150560019,
      "grad_norm": 0.2599375545978546,
      "learning_rate": 6.849034349610907e-06,
      "loss": 0.0166,
      "step": 976400
    },
    {
      "epoch": 1.597932745494655,
      "grad_norm": 0.7894086241722107,
      "learning_rate": 6.84896845739739e-06,
      "loss": 0.0241,
      "step": 976420
    },
    {
      "epoch": 1.5979654759333086,
      "grad_norm": 0.45335572957992554,
      "learning_rate": 6.848902565183873e-06,
      "loss": 0.0181,
      "step": 976440
    },
    {
      "epoch": 1.5979982063719618,
      "grad_norm": 0.16305987536907196,
      "learning_rate": 6.848836672970356e-06,
      "loss": 0.0273,
      "step": 976460
    },
    {
      "epoch": 1.5980309368106151,
      "grad_norm": 0.26584309339523315,
      "learning_rate": 6.848770780756838e-06,
      "loss": 0.0204,
      "step": 976480
    },
    {
      "epoch": 1.5980636672492685,
      "grad_norm": 0.1654016673564911,
      "learning_rate": 6.848704888543322e-06,
      "loss": 0.0144,
      "step": 976500
    },
    {
      "epoch": 1.5980963976879217,
      "grad_norm": 3.597795248031616,
      "learning_rate": 6.848638996329804e-06,
      "loss": 0.0173,
      "step": 976520
    },
    {
      "epoch": 1.5981291281265753,
      "grad_norm": 1.101088523864746,
      "learning_rate": 6.848573104116287e-06,
      "loss": 0.026,
      "step": 976540
    },
    {
      "epoch": 1.5981618585652284,
      "grad_norm": 0.7545142769813538,
      "learning_rate": 6.848507211902769e-06,
      "loss": 0.0196,
      "step": 976560
    },
    {
      "epoch": 1.5981945890038818,
      "grad_norm": 0.3100111186504364,
      "learning_rate": 6.848441319689253e-06,
      "loss": 0.0146,
      "step": 976580
    },
    {
      "epoch": 1.5982273194425352,
      "grad_norm": 1.0966050624847412,
      "learning_rate": 6.8483754274757355e-06,
      "loss": 0.0205,
      "step": 976600
    },
    {
      "epoch": 1.5982600498811885,
      "grad_norm": 0.5816019177436829,
      "learning_rate": 6.848309535262219e-06,
      "loss": 0.0135,
      "step": 976620
    },
    {
      "epoch": 1.598292780319842,
      "grad_norm": 1.3103607892990112,
      "learning_rate": 6.848243643048702e-06,
      "loss": 0.0211,
      "step": 976640
    },
    {
      "epoch": 1.598325510758495,
      "grad_norm": 0.8191408514976501,
      "learning_rate": 6.8481777508351845e-06,
      "loss": 0.0212,
      "step": 976660
    },
    {
      "epoch": 1.5983582411971486,
      "grad_norm": 0.300015389919281,
      "learning_rate": 6.848111858621668e-06,
      "loss": 0.0211,
      "step": 976680
    },
    {
      "epoch": 1.5983909716358018,
      "grad_norm": 0.855728268623352,
      "learning_rate": 6.84804596640815e-06,
      "loss": 0.0213,
      "step": 976700
    },
    {
      "epoch": 1.5984237020744552,
      "grad_norm": 0.7340803742408752,
      "learning_rate": 6.847980074194634e-06,
      "loss": 0.0241,
      "step": 976720
    },
    {
      "epoch": 1.5984564325131085,
      "grad_norm": 0.7297040820121765,
      "learning_rate": 6.8479141819811155e-06,
      "loss": 0.0184,
      "step": 976740
    },
    {
      "epoch": 1.598489162951762,
      "grad_norm": 0.14368560910224915,
      "learning_rate": 6.847848289767599e-06,
      "loss": 0.0206,
      "step": 976760
    },
    {
      "epoch": 1.5985218933904153,
      "grad_norm": 0.1578553169965744,
      "learning_rate": 6.847782397554081e-06,
      "loss": 0.0222,
      "step": 976780
    },
    {
      "epoch": 1.5985546238290684,
      "grad_norm": 0.305377721786499,
      "learning_rate": 6.8477165053405646e-06,
      "loss": 0.0141,
      "step": 976800
    },
    {
      "epoch": 1.598587354267722,
      "grad_norm": 0.7290967702865601,
      "learning_rate": 6.847650613127047e-06,
      "loss": 0.0216,
      "step": 976820
    },
    {
      "epoch": 1.5986200847063752,
      "grad_norm": 1.3173799514770508,
      "learning_rate": 6.84758472091353e-06,
      "loss": 0.0263,
      "step": 976840
    },
    {
      "epoch": 1.5986528151450285,
      "grad_norm": 0.20587660372257233,
      "learning_rate": 6.847518828700013e-06,
      "loss": 0.0196,
      "step": 976860
    },
    {
      "epoch": 1.598685545583682,
      "grad_norm": 0.45757701992988586,
      "learning_rate": 6.847452936486496e-06,
      "loss": 0.0215,
      "step": 976880
    },
    {
      "epoch": 1.5987182760223353,
      "grad_norm": 0.6389749050140381,
      "learning_rate": 6.847387044272978e-06,
      "loss": 0.0184,
      "step": 976900
    },
    {
      "epoch": 1.5987510064609887,
      "grad_norm": 0.45490017533302307,
      "learning_rate": 6.847321152059462e-06,
      "loss": 0.0126,
      "step": 976920
    },
    {
      "epoch": 1.5987837368996418,
      "grad_norm": 0.9669018983840942,
      "learning_rate": 6.847255259845944e-06,
      "loss": 0.0272,
      "step": 976940
    },
    {
      "epoch": 1.5988164673382954,
      "grad_norm": 0.7913476228713989,
      "learning_rate": 6.847189367632427e-06,
      "loss": 0.0153,
      "step": 976960
    },
    {
      "epoch": 1.5988491977769486,
      "grad_norm": 0.8451572060585022,
      "learning_rate": 6.84712347541891e-06,
      "loss": 0.0194,
      "step": 976980
    },
    {
      "epoch": 1.598881928215602,
      "grad_norm": 0.590189516544342,
      "learning_rate": 6.847057583205393e-06,
      "loss": 0.024,
      "step": 977000
    },
    {
      "epoch": 1.5989146586542553,
      "grad_norm": 0.6897076368331909,
      "learning_rate": 6.846991690991876e-06,
      "loss": 0.01,
      "step": 977020
    },
    {
      "epoch": 1.5989473890929087,
      "grad_norm": 0.5504245758056641,
      "learning_rate": 6.846925798778359e-06,
      "loss": 0.0223,
      "step": 977040
    },
    {
      "epoch": 1.598980119531562,
      "grad_norm": 0.6195828318595886,
      "learning_rate": 6.846859906564842e-06,
      "loss": 0.0189,
      "step": 977060
    },
    {
      "epoch": 1.5990128499702152,
      "grad_norm": 1.3473327159881592,
      "learning_rate": 6.846794014351325e-06,
      "loss": 0.0257,
      "step": 977080
    },
    {
      "epoch": 1.5990455804088688,
      "grad_norm": 0.2637825906276703,
      "learning_rate": 6.846728122137808e-06,
      "loss": 0.0234,
      "step": 977100
    },
    {
      "epoch": 1.599078310847522,
      "grad_norm": 0.7183689475059509,
      "learning_rate": 6.84666222992429e-06,
      "loss": 0.0175,
      "step": 977120
    },
    {
      "epoch": 1.5991110412861753,
      "grad_norm": 1.0847041606903076,
      "learning_rate": 6.846596337710774e-06,
      "loss": 0.0239,
      "step": 977140
    },
    {
      "epoch": 1.5991437717248287,
      "grad_norm": 0.24909575283527374,
      "learning_rate": 6.846530445497256e-06,
      "loss": 0.0166,
      "step": 977160
    },
    {
      "epoch": 1.5991765021634818,
      "grad_norm": 0.3866066038608551,
      "learning_rate": 6.846464553283739e-06,
      "loss": 0.0222,
      "step": 977180
    },
    {
      "epoch": 1.5992092326021354,
      "grad_norm": 0.7510697245597839,
      "learning_rate": 6.846398661070222e-06,
      "loss": 0.0199,
      "step": 977200
    },
    {
      "epoch": 1.5992419630407886,
      "grad_norm": 0.8453870415687561,
      "learning_rate": 6.846332768856705e-06,
      "loss": 0.0234,
      "step": 977220
    },
    {
      "epoch": 1.5992746934794422,
      "grad_norm": 0.36542826890945435,
      "learning_rate": 6.846266876643187e-06,
      "loss": 0.0215,
      "step": 977240
    },
    {
      "epoch": 1.5993074239180953,
      "grad_norm": 1.1678544282913208,
      "learning_rate": 6.846200984429671e-06,
      "loss": 0.0137,
      "step": 977260
    },
    {
      "epoch": 1.5993401543567487,
      "grad_norm": 0.650871217250824,
      "learning_rate": 6.846135092216153e-06,
      "loss": 0.0241,
      "step": 977280
    },
    {
      "epoch": 1.599372884795402,
      "grad_norm": 0.04219682887196541,
      "learning_rate": 6.8460692000026365e-06,
      "loss": 0.0213,
      "step": 977300
    },
    {
      "epoch": 1.5994056152340552,
      "grad_norm": 0.21346734464168549,
      "learning_rate": 6.846003307789118e-06,
      "loss": 0.0234,
      "step": 977320
    },
    {
      "epoch": 1.5994383456727088,
      "grad_norm": 0.48116254806518555,
      "learning_rate": 6.845937415575602e-06,
      "loss": 0.0225,
      "step": 977340
    },
    {
      "epoch": 1.599471076111362,
      "grad_norm": 0.911348283290863,
      "learning_rate": 6.8458715233620855e-06,
      "loss": 0.0224,
      "step": 977360
    },
    {
      "epoch": 1.5995038065500153,
      "grad_norm": 1.4953221082687378,
      "learning_rate": 6.845805631148567e-06,
      "loss": 0.0205,
      "step": 977380
    },
    {
      "epoch": 1.5995365369886687,
      "grad_norm": 0.46586552262306213,
      "learning_rate": 6.845739738935051e-06,
      "loss": 0.0167,
      "step": 977400
    },
    {
      "epoch": 1.599569267427322,
      "grad_norm": 0.062146302312612534,
      "learning_rate": 6.845673846721534e-06,
      "loss": 0.019,
      "step": 977420
    },
    {
      "epoch": 1.5996019978659755,
      "grad_norm": 0.6925604939460754,
      "learning_rate": 6.8456079545080165e-06,
      "loss": 0.0144,
      "step": 977440
    },
    {
      "epoch": 1.5996347283046286,
      "grad_norm": 0.09346947073936462,
      "learning_rate": 6.845542062294499e-06,
      "loss": 0.0109,
      "step": 977460
    },
    {
      "epoch": 1.5996674587432822,
      "grad_norm": 0.4289030134677887,
      "learning_rate": 6.845476170080983e-06,
      "loss": 0.0218,
      "step": 977480
    },
    {
      "epoch": 1.5997001891819354,
      "grad_norm": 0.6624144911766052,
      "learning_rate": 6.845410277867465e-06,
      "loss": 0.0224,
      "step": 977500
    },
    {
      "epoch": 1.5997329196205887,
      "grad_norm": 1.933712124824524,
      "learning_rate": 6.845344385653948e-06,
      "loss": 0.0236,
      "step": 977520
    },
    {
      "epoch": 1.599765650059242,
      "grad_norm": 0.3415350019931793,
      "learning_rate": 6.84527849344043e-06,
      "loss": 0.0169,
      "step": 977540
    },
    {
      "epoch": 1.5997983804978955,
      "grad_norm": 0.2865180969238281,
      "learning_rate": 6.845212601226914e-06,
      "loss": 0.0244,
      "step": 977560
    },
    {
      "epoch": 1.5998311109365488,
      "grad_norm": 0.8784322738647461,
      "learning_rate": 6.845146709013396e-06,
      "loss": 0.0185,
      "step": 977580
    },
    {
      "epoch": 1.599863841375202,
      "grad_norm": 0.6027640104293823,
      "learning_rate": 6.845080816799879e-06,
      "loss": 0.0227,
      "step": 977600
    },
    {
      "epoch": 1.5998965718138556,
      "grad_norm": 0.24589042365550995,
      "learning_rate": 6.845014924586362e-06,
      "loss": 0.0293,
      "step": 977620
    },
    {
      "epoch": 1.5999293022525087,
      "grad_norm": 0.5760128498077393,
      "learning_rate": 6.844949032372845e-06,
      "loss": 0.0192,
      "step": 977640
    },
    {
      "epoch": 1.599962032691162,
      "grad_norm": 0.2495686411857605,
      "learning_rate": 6.8448831401593275e-06,
      "loss": 0.0204,
      "step": 977660
    },
    {
      "epoch": 1.5999947631298155,
      "grad_norm": 0.7476480603218079,
      "learning_rate": 6.844817247945811e-06,
      "loss": 0.0194,
      "step": 977680
    },
    {
      "epoch": 1.6000274935684689,
      "grad_norm": 2.1021125316619873,
      "learning_rate": 6.844751355732295e-06,
      "loss": 0.0276,
      "step": 977700
    },
    {
      "epoch": 1.6000602240071222,
      "grad_norm": 0.7550284266471863,
      "learning_rate": 6.8446854635187765e-06,
      "loss": 0.0262,
      "step": 977720
    },
    {
      "epoch": 1.6000929544457754,
      "grad_norm": 0.09522607922554016,
      "learning_rate": 6.84461957130526e-06,
      "loss": 0.0195,
      "step": 977740
    },
    {
      "epoch": 1.600125684884429,
      "grad_norm": 0.28724411129951477,
      "learning_rate": 6.844553679091742e-06,
      "loss": 0.0219,
      "step": 977760
    },
    {
      "epoch": 1.6001584153230821,
      "grad_norm": 0.22599327564239502,
      "learning_rate": 6.844487786878226e-06,
      "loss": 0.017,
      "step": 977780
    },
    {
      "epoch": 1.6001911457617355,
      "grad_norm": 0.5065092444419861,
      "learning_rate": 6.8444218946647075e-06,
      "loss": 0.0203,
      "step": 977800
    },
    {
      "epoch": 1.6002238762003889,
      "grad_norm": 0.9041852951049805,
      "learning_rate": 6.844356002451191e-06,
      "loss": 0.0247,
      "step": 977820
    },
    {
      "epoch": 1.600256606639042,
      "grad_norm": 0.6339555978775024,
      "learning_rate": 6.844290110237674e-06,
      "loss": 0.019,
      "step": 977840
    },
    {
      "epoch": 1.6002893370776956,
      "grad_norm": 0.4285266697406769,
      "learning_rate": 6.8442242180241566e-06,
      "loss": 0.0258,
      "step": 977860
    },
    {
      "epoch": 1.6003220675163488,
      "grad_norm": 0.37071916460990906,
      "learning_rate": 6.844158325810639e-06,
      "loss": 0.0156,
      "step": 977880
    },
    {
      "epoch": 1.6003547979550024,
      "grad_norm": 0.31006139516830444,
      "learning_rate": 6.844092433597123e-06,
      "loss": 0.0144,
      "step": 977900
    },
    {
      "epoch": 1.6003875283936555,
      "grad_norm": 1.7054790258407593,
      "learning_rate": 6.844026541383605e-06,
      "loss": 0.0121,
      "step": 977920
    },
    {
      "epoch": 1.6004202588323089,
      "grad_norm": 0.38618212938308716,
      "learning_rate": 6.843960649170088e-06,
      "loss": 0.0192,
      "step": 977940
    },
    {
      "epoch": 1.6004529892709622,
      "grad_norm": 0.7008490562438965,
      "learning_rate": 6.84389475695657e-06,
      "loss": 0.0157,
      "step": 977960
    },
    {
      "epoch": 1.6004857197096154,
      "grad_norm": 0.3087470233440399,
      "learning_rate": 6.843828864743054e-06,
      "loss": 0.0247,
      "step": 977980
    },
    {
      "epoch": 1.600518450148269,
      "grad_norm": 1.176406979560852,
      "learning_rate": 6.843762972529537e-06,
      "loss": 0.021,
      "step": 978000
    },
    {
      "epoch": 1.6005511805869221,
      "grad_norm": 0.3933885395526886,
      "learning_rate": 6.843697080316019e-06,
      "loss": 0.0181,
      "step": 978020
    },
    {
      "epoch": 1.6005839110255755,
      "grad_norm": 1.382546305656433,
      "learning_rate": 6.843631188102502e-06,
      "loss": 0.0232,
      "step": 978040
    },
    {
      "epoch": 1.6006166414642289,
      "grad_norm": 2.737826108932495,
      "learning_rate": 6.843565295888986e-06,
      "loss": 0.0194,
      "step": 978060
    },
    {
      "epoch": 1.6006493719028823,
      "grad_norm": 0.3325275778770447,
      "learning_rate": 6.843499403675468e-06,
      "loss": 0.0247,
      "step": 978080
    },
    {
      "epoch": 1.6006821023415356,
      "grad_norm": 1.4021813869476318,
      "learning_rate": 6.843433511461951e-06,
      "loss": 0.026,
      "step": 978100
    },
    {
      "epoch": 1.6007148327801888,
      "grad_norm": 0.17024220526218414,
      "learning_rate": 6.843367619248435e-06,
      "loss": 0.027,
      "step": 978120
    },
    {
      "epoch": 1.6007475632188424,
      "grad_norm": 0.4791882038116455,
      "learning_rate": 6.843301727034917e-06,
      "loss": 0.0235,
      "step": 978140
    },
    {
      "epoch": 1.6007802936574955,
      "grad_norm": 0.6378768682479858,
      "learning_rate": 6.8432358348214e-06,
      "loss": 0.0156,
      "step": 978160
    },
    {
      "epoch": 1.600813024096149,
      "grad_norm": 0.1489221602678299,
      "learning_rate": 6.843169942607882e-06,
      "loss": 0.021,
      "step": 978180
    },
    {
      "epoch": 1.6008457545348023,
      "grad_norm": 0.27533331513404846,
      "learning_rate": 6.843104050394366e-06,
      "loss": 0.0163,
      "step": 978200
    },
    {
      "epoch": 1.6008784849734556,
      "grad_norm": 0.5758277773857117,
      "learning_rate": 6.843038158180848e-06,
      "loss": 0.0223,
      "step": 978220
    },
    {
      "epoch": 1.600911215412109,
      "grad_norm": 0.3996625542640686,
      "learning_rate": 6.842972265967331e-06,
      "loss": 0.0198,
      "step": 978240
    },
    {
      "epoch": 1.6009439458507622,
      "grad_norm": 3.0284340381622314,
      "learning_rate": 6.842906373753814e-06,
      "loss": 0.0242,
      "step": 978260
    },
    {
      "epoch": 1.6009766762894158,
      "grad_norm": 1.0040183067321777,
      "learning_rate": 6.8428404815402975e-06,
      "loss": 0.0206,
      "step": 978280
    },
    {
      "epoch": 1.601009406728069,
      "grad_norm": 0.664036214351654,
      "learning_rate": 6.842774589326779e-06,
      "loss": 0.0219,
      "step": 978300
    },
    {
      "epoch": 1.6010421371667223,
      "grad_norm": 0.6983917355537415,
      "learning_rate": 6.842708697113263e-06,
      "loss": 0.0177,
      "step": 978320
    },
    {
      "epoch": 1.6010748676053757,
      "grad_norm": 0.4262063205242157,
      "learning_rate": 6.842642804899745e-06,
      "loss": 0.0279,
      "step": 978340
    },
    {
      "epoch": 1.601107598044029,
      "grad_norm": 0.14793947339057922,
      "learning_rate": 6.8425769126862284e-06,
      "loss": 0.0197,
      "step": 978360
    },
    {
      "epoch": 1.6011403284826824,
      "grad_norm": 0.40747860074043274,
      "learning_rate": 6.842511020472711e-06,
      "loss": 0.0162,
      "step": 978380
    },
    {
      "epoch": 1.6011730589213355,
      "grad_norm": 0.22122608125209808,
      "learning_rate": 6.842445128259194e-06,
      "loss": 0.0206,
      "step": 978400
    },
    {
      "epoch": 1.6012057893599891,
      "grad_norm": 0.713914155960083,
      "learning_rate": 6.8423792360456775e-06,
      "loss": 0.0231,
      "step": 978420
    },
    {
      "epoch": 1.6012385197986423,
      "grad_norm": 1.109114646911621,
      "learning_rate": 6.84231334383216e-06,
      "loss": 0.0227,
      "step": 978440
    },
    {
      "epoch": 1.6012712502372957,
      "grad_norm": 0.9057655334472656,
      "learning_rate": 6.842247451618643e-06,
      "loss": 0.0309,
      "step": 978460
    },
    {
      "epoch": 1.601303980675949,
      "grad_norm": 0.48528799414634705,
      "learning_rate": 6.842181559405126e-06,
      "loss": 0.0178,
      "step": 978480
    },
    {
      "epoch": 1.6013367111146024,
      "grad_norm": 0.8005934953689575,
      "learning_rate": 6.842115667191609e-06,
      "loss": 0.0226,
      "step": 978500
    },
    {
      "epoch": 1.6013694415532558,
      "grad_norm": 0.17350342869758606,
      "learning_rate": 6.842049774978091e-06,
      "loss": 0.016,
      "step": 978520
    },
    {
      "epoch": 1.601402171991909,
      "grad_norm": 0.7947432994842529,
      "learning_rate": 6.841983882764575e-06,
      "loss": 0.0152,
      "step": 978540
    },
    {
      "epoch": 1.6014349024305625,
      "grad_norm": 0.5661700963973999,
      "learning_rate": 6.841917990551057e-06,
      "loss": 0.0171,
      "step": 978560
    },
    {
      "epoch": 1.6014676328692157,
      "grad_norm": 0.3534603416919708,
      "learning_rate": 6.84185209833754e-06,
      "loss": 0.0206,
      "step": 978580
    },
    {
      "epoch": 1.601500363307869,
      "grad_norm": 0.1355113983154297,
      "learning_rate": 6.841786206124022e-06,
      "loss": 0.0158,
      "step": 978600
    },
    {
      "epoch": 1.6015330937465224,
      "grad_norm": 0.24832171201705933,
      "learning_rate": 6.841720313910506e-06,
      "loss": 0.0238,
      "step": 978620
    },
    {
      "epoch": 1.6015658241851756,
      "grad_norm": 0.5102494955062866,
      "learning_rate": 6.8416544216969885e-06,
      "loss": 0.0144,
      "step": 978640
    },
    {
      "epoch": 1.6015985546238292,
      "grad_norm": 0.5208460688591003,
      "learning_rate": 6.841588529483471e-06,
      "loss": 0.0258,
      "step": 978660
    },
    {
      "epoch": 1.6016312850624823,
      "grad_norm": 0.6502940058708191,
      "learning_rate": 6.841522637269954e-06,
      "loss": 0.0252,
      "step": 978680
    },
    {
      "epoch": 1.601664015501136,
      "grad_norm": 0.6482616066932678,
      "learning_rate": 6.8414567450564376e-06,
      "loss": 0.0183,
      "step": 978700
    },
    {
      "epoch": 1.601696745939789,
      "grad_norm": 0.2669624090194702,
      "learning_rate": 6.8413908528429194e-06,
      "loss": 0.015,
      "step": 978720
    },
    {
      "epoch": 1.6017294763784424,
      "grad_norm": 0.6466650366783142,
      "learning_rate": 6.841324960629403e-06,
      "loss": 0.0192,
      "step": 978740
    },
    {
      "epoch": 1.6017622068170958,
      "grad_norm": 0.8940961956977844,
      "learning_rate": 6.841259068415887e-06,
      "loss": 0.0225,
      "step": 978760
    },
    {
      "epoch": 1.601794937255749,
      "grad_norm": 0.1510302722454071,
      "learning_rate": 6.8411931762023685e-06,
      "loss": 0.0259,
      "step": 978780
    },
    {
      "epoch": 1.6018276676944025,
      "grad_norm": 0.8410887122154236,
      "learning_rate": 6.841127283988852e-06,
      "loss": 0.0169,
      "step": 978800
    },
    {
      "epoch": 1.6018603981330557,
      "grad_norm": 0.24315710365772247,
      "learning_rate": 6.841061391775334e-06,
      "loss": 0.0197,
      "step": 978820
    },
    {
      "epoch": 1.601893128571709,
      "grad_norm": 1.8310625553131104,
      "learning_rate": 6.8409954995618176e-06,
      "loss": 0.0165,
      "step": 978840
    },
    {
      "epoch": 1.6019258590103624,
      "grad_norm": 0.29929593205451965,
      "learning_rate": 6.8409296073483e-06,
      "loss": 0.011,
      "step": 978860
    },
    {
      "epoch": 1.6019585894490158,
      "grad_norm": 1.1185873746871948,
      "learning_rate": 6.840863715134783e-06,
      "loss": 0.0216,
      "step": 978880
    },
    {
      "epoch": 1.6019913198876692,
      "grad_norm": 0.23297320306301117,
      "learning_rate": 6.840797822921266e-06,
      "loss": 0.0298,
      "step": 978900
    },
    {
      "epoch": 1.6020240503263223,
      "grad_norm": 0.14748558402061462,
      "learning_rate": 6.840731930707749e-06,
      "loss": 0.022,
      "step": 978920
    },
    {
      "epoch": 1.602056780764976,
      "grad_norm": 0.5507667064666748,
      "learning_rate": 6.840666038494231e-06,
      "loss": 0.018,
      "step": 978940
    },
    {
      "epoch": 1.602089511203629,
      "grad_norm": 0.5716072916984558,
      "learning_rate": 6.840600146280715e-06,
      "loss": 0.0157,
      "step": 978960
    },
    {
      "epoch": 1.6021222416422825,
      "grad_norm": 0.14475740492343903,
      "learning_rate": 6.840534254067197e-06,
      "loss": 0.0226,
      "step": 978980
    },
    {
      "epoch": 1.6021549720809358,
      "grad_norm": 0.4234616756439209,
      "learning_rate": 6.84046836185368e-06,
      "loss": 0.0182,
      "step": 979000
    },
    {
      "epoch": 1.6021877025195892,
      "grad_norm": 0.1100538820028305,
      "learning_rate": 6.840402469640163e-06,
      "loss": 0.0211,
      "step": 979020
    },
    {
      "epoch": 1.6022204329582426,
      "grad_norm": 0.5319828987121582,
      "learning_rate": 6.840336577426646e-06,
      "loss": 0.0199,
      "step": 979040
    },
    {
      "epoch": 1.6022531633968957,
      "grad_norm": 1.6135085821151733,
      "learning_rate": 6.8402706852131286e-06,
      "loss": 0.0186,
      "step": 979060
    },
    {
      "epoch": 1.6022858938355493,
      "grad_norm": 0.5528706908226013,
      "learning_rate": 6.840204792999612e-06,
      "loss": 0.0269,
      "step": 979080
    },
    {
      "epoch": 1.6023186242742025,
      "grad_norm": 0.41681453585624695,
      "learning_rate": 6.840138900786095e-06,
      "loss": 0.0172,
      "step": 979100
    },
    {
      "epoch": 1.6023513547128558,
      "grad_norm": 0.2656858265399933,
      "learning_rate": 6.840073008572578e-06,
      "loss": 0.0155,
      "step": 979120
    },
    {
      "epoch": 1.6023840851515092,
      "grad_norm": 0.8461606502532959,
      "learning_rate": 6.840007116359061e-06,
      "loss": 0.0213,
      "step": 979140
    },
    {
      "epoch": 1.6024168155901626,
      "grad_norm": 0.6960790753364563,
      "learning_rate": 6.839941224145543e-06,
      "loss": 0.0193,
      "step": 979160
    },
    {
      "epoch": 1.602449546028816,
      "grad_norm": 0.4084547758102417,
      "learning_rate": 6.839875331932027e-06,
      "loss": 0.0225,
      "step": 979180
    },
    {
      "epoch": 1.602482276467469,
      "grad_norm": 0.45069026947021484,
      "learning_rate": 6.839809439718509e-06,
      "loss": 0.0187,
      "step": 979200
    },
    {
      "epoch": 1.6025150069061227,
      "grad_norm": 0.9282199740409851,
      "learning_rate": 6.839743547504992e-06,
      "loss": 0.0199,
      "step": 979220
    },
    {
      "epoch": 1.6025477373447758,
      "grad_norm": 0.47020024061203003,
      "learning_rate": 6.839677655291475e-06,
      "loss": 0.0188,
      "step": 979240
    },
    {
      "epoch": 1.6025804677834292,
      "grad_norm": 0.3194672167301178,
      "learning_rate": 6.839611763077958e-06,
      "loss": 0.0184,
      "step": 979260
    },
    {
      "epoch": 1.6026131982220826,
      "grad_norm": 0.18698038160800934,
      "learning_rate": 6.83954587086444e-06,
      "loss": 0.0246,
      "step": 979280
    },
    {
      "epoch": 1.602645928660736,
      "grad_norm": 0.6146106719970703,
      "learning_rate": 6.839479978650924e-06,
      "loss": 0.0176,
      "step": 979300
    },
    {
      "epoch": 1.6026786590993893,
      "grad_norm": 2.426020383834839,
      "learning_rate": 6.839414086437406e-06,
      "loss": 0.0209,
      "step": 979320
    },
    {
      "epoch": 1.6027113895380425,
      "grad_norm": 0.7267563343048096,
      "learning_rate": 6.8393481942238895e-06,
      "loss": 0.0256,
      "step": 979340
    },
    {
      "epoch": 1.602744119976696,
      "grad_norm": 0.5476229190826416,
      "learning_rate": 6.839282302010371e-06,
      "loss": 0.0207,
      "step": 979360
    },
    {
      "epoch": 1.6027768504153492,
      "grad_norm": 1.0374994277954102,
      "learning_rate": 6.839216409796855e-06,
      "loss": 0.0218,
      "step": 979380
    },
    {
      "epoch": 1.6028095808540026,
      "grad_norm": 0.9209258556365967,
      "learning_rate": 6.839150517583338e-06,
      "loss": 0.0212,
      "step": 979400
    },
    {
      "epoch": 1.602842311292656,
      "grad_norm": 0.37236493825912476,
      "learning_rate": 6.83908462536982e-06,
      "loss": 0.0188,
      "step": 979420
    },
    {
      "epoch": 1.6028750417313091,
      "grad_norm": 0.39233818650245667,
      "learning_rate": 6.839018733156303e-06,
      "loss": 0.0234,
      "step": 979440
    },
    {
      "epoch": 1.6029077721699627,
      "grad_norm": 0.7087711691856384,
      "learning_rate": 6.838952840942787e-06,
      "loss": 0.0294,
      "step": 979460
    },
    {
      "epoch": 1.6029405026086159,
      "grad_norm": 2.2860870361328125,
      "learning_rate": 6.8388869487292695e-06,
      "loss": 0.0215,
      "step": 979480
    },
    {
      "epoch": 1.6029732330472695,
      "grad_norm": 1.5007518529891968,
      "learning_rate": 6.838821056515752e-06,
      "loss": 0.0252,
      "step": 979500
    },
    {
      "epoch": 1.6030059634859226,
      "grad_norm": 0.26348164677619934,
      "learning_rate": 6.838755164302236e-06,
      "loss": 0.0217,
      "step": 979520
    },
    {
      "epoch": 1.603038693924576,
      "grad_norm": 0.43060874938964844,
      "learning_rate": 6.838689272088718e-06,
      "loss": 0.022,
      "step": 979540
    },
    {
      "epoch": 1.6030714243632294,
      "grad_norm": 0.3926406502723694,
      "learning_rate": 6.838623379875201e-06,
      "loss": 0.0253,
      "step": 979560
    },
    {
      "epoch": 1.6031041548018825,
      "grad_norm": 1.7885876893997192,
      "learning_rate": 6.838557487661683e-06,
      "loss": 0.0203,
      "step": 979580
    },
    {
      "epoch": 1.603136885240536,
      "grad_norm": 0.25869303941726685,
      "learning_rate": 6.838491595448167e-06,
      "loss": 0.025,
      "step": 979600
    },
    {
      "epoch": 1.6031696156791893,
      "grad_norm": 0.5546063184738159,
      "learning_rate": 6.838425703234649e-06,
      "loss": 0.0305,
      "step": 979620
    },
    {
      "epoch": 1.6032023461178426,
      "grad_norm": 0.8247004747390747,
      "learning_rate": 6.838359811021132e-06,
      "loss": 0.02,
      "step": 979640
    },
    {
      "epoch": 1.603235076556496,
      "grad_norm": 0.8404303789138794,
      "learning_rate": 6.838293918807615e-06,
      "loss": 0.0257,
      "step": 979660
    },
    {
      "epoch": 1.6032678069951494,
      "grad_norm": 1.3845772743225098,
      "learning_rate": 6.838228026594098e-06,
      "loss": 0.0251,
      "step": 979680
    },
    {
      "epoch": 1.6033005374338027,
      "grad_norm": 0.20843742787837982,
      "learning_rate": 6.8381621343805805e-06,
      "loss": 0.0161,
      "step": 979700
    },
    {
      "epoch": 1.603333267872456,
      "grad_norm": 0.23957498371601105,
      "learning_rate": 6.838096242167064e-06,
      "loss": 0.0199,
      "step": 979720
    },
    {
      "epoch": 1.6033659983111095,
      "grad_norm": 0.5498046278953552,
      "learning_rate": 6.838030349953546e-06,
      "loss": 0.0171,
      "step": 979740
    },
    {
      "epoch": 1.6033987287497626,
      "grad_norm": 0.594780683517456,
      "learning_rate": 6.8379644577400295e-06,
      "loss": 0.0306,
      "step": 979760
    },
    {
      "epoch": 1.603431459188416,
      "grad_norm": 2.1983895301818848,
      "learning_rate": 6.8378985655265114e-06,
      "loss": 0.0214,
      "step": 979780
    },
    {
      "epoch": 1.6034641896270694,
      "grad_norm": 0.26973670721054077,
      "learning_rate": 6.837832673312995e-06,
      "loss": 0.0242,
      "step": 979800
    },
    {
      "epoch": 1.6034969200657228,
      "grad_norm": 0.192244753241539,
      "learning_rate": 6.837766781099479e-06,
      "loss": 0.0183,
      "step": 979820
    },
    {
      "epoch": 1.6035296505043761,
      "grad_norm": 1.230860710144043,
      "learning_rate": 6.8377008888859605e-06,
      "loss": 0.0273,
      "step": 979840
    },
    {
      "epoch": 1.6035623809430293,
      "grad_norm": 0.2341318577528,
      "learning_rate": 6.837634996672444e-06,
      "loss": 0.0308,
      "step": 979860
    },
    {
      "epoch": 1.6035951113816829,
      "grad_norm": 0.5003833174705505,
      "learning_rate": 6.837569104458927e-06,
      "loss": 0.0135,
      "step": 979880
    },
    {
      "epoch": 1.603627841820336,
      "grad_norm": 1.1325726509094238,
      "learning_rate": 6.8375032122454096e-06,
      "loss": 0.0167,
      "step": 979900
    },
    {
      "epoch": 1.6036605722589894,
      "grad_norm": 0.3333275020122528,
      "learning_rate": 6.837437320031892e-06,
      "loss": 0.0258,
      "step": 979920
    },
    {
      "epoch": 1.6036933026976428,
      "grad_norm": 0.2016730010509491,
      "learning_rate": 6.837371427818376e-06,
      "loss": 0.019,
      "step": 979940
    },
    {
      "epoch": 1.6037260331362961,
      "grad_norm": 1.1023082733154297,
      "learning_rate": 6.837305535604858e-06,
      "loss": 0.0162,
      "step": 979960
    },
    {
      "epoch": 1.6037587635749495,
      "grad_norm": 0.19025088846683502,
      "learning_rate": 6.837239643391341e-06,
      "loss": 0.0264,
      "step": 979980
    },
    {
      "epoch": 1.6037914940136027,
      "grad_norm": 0.5771320462226868,
      "learning_rate": 6.837173751177823e-06,
      "loss": 0.0169,
      "step": 980000
    },
    {
      "epoch": 1.6038242244522563,
      "grad_norm": 1.0298981666564941,
      "learning_rate": 6.837107858964307e-06,
      "loss": 0.0224,
      "step": 980020
    },
    {
      "epoch": 1.6038569548909094,
      "grad_norm": 0.5426760911941528,
      "learning_rate": 6.83704196675079e-06,
      "loss": 0.0171,
      "step": 980040
    },
    {
      "epoch": 1.6038896853295628,
      "grad_norm": 0.3634502589702606,
      "learning_rate": 6.836976074537272e-06,
      "loss": 0.016,
      "step": 980060
    },
    {
      "epoch": 1.6039224157682161,
      "grad_norm": 1.243895411491394,
      "learning_rate": 6.836910182323755e-06,
      "loss": 0.0271,
      "step": 980080
    },
    {
      "epoch": 1.6039551462068695,
      "grad_norm": 0.6774051785469055,
      "learning_rate": 6.836844290110239e-06,
      "loss": 0.0269,
      "step": 980100
    },
    {
      "epoch": 1.603987876645523,
      "grad_norm": 0.3036858141422272,
      "learning_rate": 6.8367783978967205e-06,
      "loss": 0.0165,
      "step": 980120
    },
    {
      "epoch": 1.604020607084176,
      "grad_norm": 1.1149606704711914,
      "learning_rate": 6.836712505683204e-06,
      "loss": 0.0232,
      "step": 980140
    },
    {
      "epoch": 1.6040533375228296,
      "grad_norm": 0.6698583960533142,
      "learning_rate": 6.836646613469688e-06,
      "loss": 0.0265,
      "step": 980160
    },
    {
      "epoch": 1.6040860679614828,
      "grad_norm": 0.31337639689445496,
      "learning_rate": 6.83658072125617e-06,
      "loss": 0.0126,
      "step": 980180
    },
    {
      "epoch": 1.6041187984001362,
      "grad_norm": 2.409811019897461,
      "learning_rate": 6.836514829042653e-06,
      "loss": 0.0219,
      "step": 980200
    },
    {
      "epoch": 1.6041515288387895,
      "grad_norm": 0.15964284539222717,
      "learning_rate": 6.836448936829135e-06,
      "loss": 0.0179,
      "step": 980220
    },
    {
      "epoch": 1.6041842592774427,
      "grad_norm": 0.20265813171863556,
      "learning_rate": 6.836383044615619e-06,
      "loss": 0.0184,
      "step": 980240
    },
    {
      "epoch": 1.6042169897160963,
      "grad_norm": 0.46199652552604675,
      "learning_rate": 6.836317152402101e-06,
      "loss": 0.0201,
      "step": 980260
    },
    {
      "epoch": 1.6042497201547494,
      "grad_norm": 0.3706742525100708,
      "learning_rate": 6.836251260188584e-06,
      "loss": 0.0185,
      "step": 980280
    },
    {
      "epoch": 1.6042824505934028,
      "grad_norm": 0.2615306079387665,
      "learning_rate": 6.836185367975067e-06,
      "loss": 0.0182,
      "step": 980300
    },
    {
      "epoch": 1.6043151810320562,
      "grad_norm": 0.4383845627307892,
      "learning_rate": 6.8361194757615505e-06,
      "loss": 0.015,
      "step": 980320
    },
    {
      "epoch": 1.6043479114707095,
      "grad_norm": 0.5552520751953125,
      "learning_rate": 6.836053583548032e-06,
      "loss": 0.0206,
      "step": 980340
    },
    {
      "epoch": 1.604380641909363,
      "grad_norm": 0.3260810375213623,
      "learning_rate": 6.835987691334516e-06,
      "loss": 0.0192,
      "step": 980360
    },
    {
      "epoch": 1.604413372348016,
      "grad_norm": 0.9999014139175415,
      "learning_rate": 6.835921799120998e-06,
      "loss": 0.0238,
      "step": 980380
    },
    {
      "epoch": 1.6044461027866697,
      "grad_norm": 0.3060222864151001,
      "learning_rate": 6.8358559069074814e-06,
      "loss": 0.0179,
      "step": 980400
    },
    {
      "epoch": 1.6044788332253228,
      "grad_norm": 0.08281505852937698,
      "learning_rate": 6.835790014693963e-06,
      "loss": 0.0166,
      "step": 980420
    },
    {
      "epoch": 1.6045115636639762,
      "grad_norm": 0.5177768468856812,
      "learning_rate": 6.835724122480447e-06,
      "loss": 0.0146,
      "step": 980440
    },
    {
      "epoch": 1.6045442941026296,
      "grad_norm": 0.3213109076023102,
      "learning_rate": 6.83565823026693e-06,
      "loss": 0.021,
      "step": 980460
    },
    {
      "epoch": 1.604577024541283,
      "grad_norm": 0.21441206336021423,
      "learning_rate": 6.835592338053413e-06,
      "loss": 0.0161,
      "step": 980480
    },
    {
      "epoch": 1.6046097549799363,
      "grad_norm": 2.7837183475494385,
      "learning_rate": 6.835526445839895e-06,
      "loss": 0.0296,
      "step": 980500
    },
    {
      "epoch": 1.6046424854185894,
      "grad_norm": 1.346317172050476,
      "learning_rate": 6.835460553626379e-06,
      "loss": 0.032,
      "step": 980520
    },
    {
      "epoch": 1.604675215857243,
      "grad_norm": 0.23346027731895447,
      "learning_rate": 6.835394661412862e-06,
      "loss": 0.0154,
      "step": 980540
    },
    {
      "epoch": 1.6047079462958962,
      "grad_norm": 0.33403900265693665,
      "learning_rate": 6.835328769199344e-06,
      "loss": 0.0153,
      "step": 980560
    },
    {
      "epoch": 1.6047406767345496,
      "grad_norm": 0.14595097303390503,
      "learning_rate": 6.835262876985828e-06,
      "loss": 0.0243,
      "step": 980580
    },
    {
      "epoch": 1.604773407173203,
      "grad_norm": 0.7068358659744263,
      "learning_rate": 6.83519698477231e-06,
      "loss": 0.017,
      "step": 980600
    },
    {
      "epoch": 1.6048061376118563,
      "grad_norm": 0.5256437063217163,
      "learning_rate": 6.835131092558793e-06,
      "loss": 0.0225,
      "step": 980620
    },
    {
      "epoch": 1.6048388680505097,
      "grad_norm": 0.5299604535102844,
      "learning_rate": 6.835065200345275e-06,
      "loss": 0.0208,
      "step": 980640
    },
    {
      "epoch": 1.6048715984891628,
      "grad_norm": 1.4327820539474487,
      "learning_rate": 6.834999308131759e-06,
      "loss": 0.0187,
      "step": 980660
    },
    {
      "epoch": 1.6049043289278164,
      "grad_norm": 0.04066837579011917,
      "learning_rate": 6.8349334159182415e-06,
      "loss": 0.0228,
      "step": 980680
    },
    {
      "epoch": 1.6049370593664696,
      "grad_norm": 1.7916010618209839,
      "learning_rate": 6.834867523704724e-06,
      "loss": 0.0211,
      "step": 980700
    },
    {
      "epoch": 1.604969789805123,
      "grad_norm": 0.36662861704826355,
      "learning_rate": 6.834801631491207e-06,
      "loss": 0.0156,
      "step": 980720
    },
    {
      "epoch": 1.6050025202437763,
      "grad_norm": 1.3083800077438354,
      "learning_rate": 6.8347357392776906e-06,
      "loss": 0.0186,
      "step": 980740
    },
    {
      "epoch": 1.6050352506824297,
      "grad_norm": 0.3302176892757416,
      "learning_rate": 6.8346698470641724e-06,
      "loss": 0.0119,
      "step": 980760
    },
    {
      "epoch": 1.605067981121083,
      "grad_norm": 1.0894948244094849,
      "learning_rate": 6.834603954850656e-06,
      "loss": 0.0213,
      "step": 980780
    },
    {
      "epoch": 1.6051007115597362,
      "grad_norm": 0.36562567949295044,
      "learning_rate": 6.834538062637138e-06,
      "loss": 0.0188,
      "step": 980800
    },
    {
      "epoch": 1.6051334419983898,
      "grad_norm": 0.607684850692749,
      "learning_rate": 6.8344721704236215e-06,
      "loss": 0.0266,
      "step": 980820
    },
    {
      "epoch": 1.605166172437043,
      "grad_norm": 1.0722874402999878,
      "learning_rate": 6.834406278210104e-06,
      "loss": 0.0296,
      "step": 980840
    },
    {
      "epoch": 1.6051989028756963,
      "grad_norm": 0.2813635766506195,
      "learning_rate": 6.834340385996587e-06,
      "loss": 0.0204,
      "step": 980860
    },
    {
      "epoch": 1.6052316333143497,
      "grad_norm": 0.3939460217952728,
      "learning_rate": 6.834274493783071e-06,
      "loss": 0.0246,
      "step": 980880
    },
    {
      "epoch": 1.6052643637530029,
      "grad_norm": 0.7443326115608215,
      "learning_rate": 6.834208601569553e-06,
      "loss": 0.0203,
      "step": 980900
    },
    {
      "epoch": 1.6052970941916564,
      "grad_norm": 0.44777175784111023,
      "learning_rate": 6.834142709356036e-06,
      "loss": 0.016,
      "step": 980920
    },
    {
      "epoch": 1.6053298246303096,
      "grad_norm": 0.6646481156349182,
      "learning_rate": 6.834076817142519e-06,
      "loss": 0.0166,
      "step": 980940
    },
    {
      "epoch": 1.6053625550689632,
      "grad_norm": 1.1053977012634277,
      "learning_rate": 6.834010924929002e-06,
      "loss": 0.0146,
      "step": 980960
    },
    {
      "epoch": 1.6053952855076163,
      "grad_norm": 0.622673511505127,
      "learning_rate": 6.833945032715484e-06,
      "loss": 0.0232,
      "step": 980980
    },
    {
      "epoch": 1.6054280159462697,
      "grad_norm": 0.6876975297927856,
      "learning_rate": 6.833879140501968e-06,
      "loss": 0.0102,
      "step": 981000
    },
    {
      "epoch": 1.605460746384923,
      "grad_norm": 0.3343762159347534,
      "learning_rate": 6.83381324828845e-06,
      "loss": 0.0342,
      "step": 981020
    },
    {
      "epoch": 1.6054934768235762,
      "grad_norm": 0.29410067200660706,
      "learning_rate": 6.833747356074933e-06,
      "loss": 0.0174,
      "step": 981040
    },
    {
      "epoch": 1.6055262072622298,
      "grad_norm": 0.14309380948543549,
      "learning_rate": 6.833681463861416e-06,
      "loss": 0.0177,
      "step": 981060
    },
    {
      "epoch": 1.605558937700883,
      "grad_norm": 0.339026540517807,
      "learning_rate": 6.833615571647899e-06,
      "loss": 0.0141,
      "step": 981080
    },
    {
      "epoch": 1.6055916681395364,
      "grad_norm": 0.08024580776691437,
      "learning_rate": 6.8335496794343816e-06,
      "loss": 0.0154,
      "step": 981100
    },
    {
      "epoch": 1.6056243985781897,
      "grad_norm": 1.285421371459961,
      "learning_rate": 6.833483787220865e-06,
      "loss": 0.0208,
      "step": 981120
    },
    {
      "epoch": 1.605657129016843,
      "grad_norm": 0.7873215079307556,
      "learning_rate": 6.833417895007347e-06,
      "loss": 0.0204,
      "step": 981140
    },
    {
      "epoch": 1.6056898594554965,
      "grad_norm": 2.071261167526245,
      "learning_rate": 6.833352002793831e-06,
      "loss": 0.0165,
      "step": 981160
    },
    {
      "epoch": 1.6057225898941496,
      "grad_norm": 1.089231252670288,
      "learning_rate": 6.8332861105803125e-06,
      "loss": 0.0126,
      "step": 981180
    },
    {
      "epoch": 1.6057553203328032,
      "grad_norm": 0.3800862729549408,
      "learning_rate": 6.833220218366796e-06,
      "loss": 0.0121,
      "step": 981200
    },
    {
      "epoch": 1.6057880507714564,
      "grad_norm": 0.1898925006389618,
      "learning_rate": 6.83315432615328e-06,
      "loss": 0.0143,
      "step": 981220
    },
    {
      "epoch": 1.6058207812101097,
      "grad_norm": 0.35935741662979126,
      "learning_rate": 6.833088433939762e-06,
      "loss": 0.0177,
      "step": 981240
    },
    {
      "epoch": 1.605853511648763,
      "grad_norm": 0.7979668974876404,
      "learning_rate": 6.833022541726245e-06,
      "loss": 0.0258,
      "step": 981260
    },
    {
      "epoch": 1.6058862420874165,
      "grad_norm": 0.3143245577812195,
      "learning_rate": 6.832956649512728e-06,
      "loss": 0.018,
      "step": 981280
    },
    {
      "epoch": 1.6059189725260699,
      "grad_norm": 0.36959773302078247,
      "learning_rate": 6.832890757299211e-06,
      "loss": 0.0164,
      "step": 981300
    },
    {
      "epoch": 1.605951702964723,
      "grad_norm": 0.3921314477920532,
      "learning_rate": 6.832824865085693e-06,
      "loss": 0.0211,
      "step": 981320
    },
    {
      "epoch": 1.6059844334033766,
      "grad_norm": 0.5445086359977722,
      "learning_rate": 6.832758972872177e-06,
      "loss": 0.02,
      "step": 981340
    },
    {
      "epoch": 1.6060171638420297,
      "grad_norm": 0.3669116199016571,
      "learning_rate": 6.832693080658659e-06,
      "loss": 0.0153,
      "step": 981360
    },
    {
      "epoch": 1.6060498942806831,
      "grad_norm": 0.11215390264987946,
      "learning_rate": 6.8326271884451425e-06,
      "loss": 0.0241,
      "step": 981380
    },
    {
      "epoch": 1.6060826247193365,
      "grad_norm": 0.1664106845855713,
      "learning_rate": 6.832561296231624e-06,
      "loss": 0.0145,
      "step": 981400
    },
    {
      "epoch": 1.6061153551579899,
      "grad_norm": 0.5965652465820312,
      "learning_rate": 6.832495404018108e-06,
      "loss": 0.0269,
      "step": 981420
    },
    {
      "epoch": 1.6061480855966432,
      "grad_norm": 0.45765259861946106,
      "learning_rate": 6.83242951180459e-06,
      "loss": 0.0261,
      "step": 981440
    },
    {
      "epoch": 1.6061808160352964,
      "grad_norm": 0.4125939607620239,
      "learning_rate": 6.832363619591073e-06,
      "loss": 0.0194,
      "step": 981460
    },
    {
      "epoch": 1.60621354647395,
      "grad_norm": 0.5641689896583557,
      "learning_rate": 6.832297727377556e-06,
      "loss": 0.0147,
      "step": 981480
    },
    {
      "epoch": 1.6062462769126031,
      "grad_norm": 0.2813054323196411,
      "learning_rate": 6.832231835164039e-06,
      "loss": 0.0263,
      "step": 981500
    },
    {
      "epoch": 1.6062790073512565,
      "grad_norm": 1.0160316228866577,
      "learning_rate": 6.832165942950522e-06,
      "loss": 0.0155,
      "step": 981520
    },
    {
      "epoch": 1.6063117377899099,
      "grad_norm": 0.6376857757568359,
      "learning_rate": 6.832100050737005e-06,
      "loss": 0.0163,
      "step": 981540
    },
    {
      "epoch": 1.6063444682285632,
      "grad_norm": 1.3696167469024658,
      "learning_rate": 6.832034158523488e-06,
      "loss": 0.0173,
      "step": 981560
    },
    {
      "epoch": 1.6063771986672166,
      "grad_norm": 0.6549823880195618,
      "learning_rate": 6.831968266309971e-06,
      "loss": 0.0183,
      "step": 981580
    },
    {
      "epoch": 1.6064099291058698,
      "grad_norm": 0.4082736372947693,
      "learning_rate": 6.831902374096454e-06,
      "loss": 0.0108,
      "step": 981600
    },
    {
      "epoch": 1.6064426595445234,
      "grad_norm": 1.3364341259002686,
      "learning_rate": 6.831836481882936e-06,
      "loss": 0.0159,
      "step": 981620
    },
    {
      "epoch": 1.6064753899831765,
      "grad_norm": 0.09873133897781372,
      "learning_rate": 6.83177058966942e-06,
      "loss": 0.0156,
      "step": 981640
    },
    {
      "epoch": 1.6065081204218299,
      "grad_norm": 4.146657943725586,
      "learning_rate": 6.831704697455902e-06,
      "loss": 0.0167,
      "step": 981660
    },
    {
      "epoch": 1.6065408508604833,
      "grad_norm": 0.15482112765312195,
      "learning_rate": 6.831638805242385e-06,
      "loss": 0.008,
      "step": 981680
    },
    {
      "epoch": 1.6065735812991364,
      "grad_norm": 0.30082517862319946,
      "learning_rate": 6.831572913028868e-06,
      "loss": 0.012,
      "step": 981700
    },
    {
      "epoch": 1.60660631173779,
      "grad_norm": 0.30552932620048523,
      "learning_rate": 6.831507020815351e-06,
      "loss": 0.0331,
      "step": 981720
    },
    {
      "epoch": 1.6066390421764432,
      "grad_norm": 0.421121746301651,
      "learning_rate": 6.8314411286018335e-06,
      "loss": 0.0204,
      "step": 981740
    },
    {
      "epoch": 1.6066717726150967,
      "grad_norm": 0.3195737600326538,
      "learning_rate": 6.831375236388317e-06,
      "loss": 0.0246,
      "step": 981760
    },
    {
      "epoch": 1.60670450305375,
      "grad_norm": 0.5710253715515137,
      "learning_rate": 6.831309344174799e-06,
      "loss": 0.023,
      "step": 981780
    },
    {
      "epoch": 1.6067372334924033,
      "grad_norm": 1.2788106203079224,
      "learning_rate": 6.8312434519612825e-06,
      "loss": 0.025,
      "step": 981800
    },
    {
      "epoch": 1.6067699639310566,
      "grad_norm": 0.47811949253082275,
      "learning_rate": 6.8311775597477644e-06,
      "loss": 0.0164,
      "step": 981820
    },
    {
      "epoch": 1.6068026943697098,
      "grad_norm": 1.2924476861953735,
      "learning_rate": 6.831111667534248e-06,
      "loss": 0.0147,
      "step": 981840
    },
    {
      "epoch": 1.6068354248083634,
      "grad_norm": 0.17641279101371765,
      "learning_rate": 6.831045775320731e-06,
      "loss": 0.0271,
      "step": 981860
    },
    {
      "epoch": 1.6068681552470165,
      "grad_norm": 1.0431584119796753,
      "learning_rate": 6.8309798831072135e-06,
      "loss": 0.0232,
      "step": 981880
    },
    {
      "epoch": 1.60690088568567,
      "grad_norm": 0.10745900124311447,
      "learning_rate": 6.830913990893696e-06,
      "loss": 0.0204,
      "step": 981900
    },
    {
      "epoch": 1.6069336161243233,
      "grad_norm": 0.47814270853996277,
      "learning_rate": 6.83084809868018e-06,
      "loss": 0.0165,
      "step": 981920
    },
    {
      "epoch": 1.6069663465629767,
      "grad_norm": 0.29718708992004395,
      "learning_rate": 6.8307822064666626e-06,
      "loss": 0.0241,
      "step": 981940
    },
    {
      "epoch": 1.60699907700163,
      "grad_norm": 0.5375001430511475,
      "learning_rate": 6.830716314253145e-06,
      "loss": 0.0192,
      "step": 981960
    },
    {
      "epoch": 1.6070318074402832,
      "grad_norm": 0.4098914563655853,
      "learning_rate": 6.830650422039629e-06,
      "loss": 0.0246,
      "step": 981980
    },
    {
      "epoch": 1.6070645378789368,
      "grad_norm": 0.5108456611633301,
      "learning_rate": 6.830584529826111e-06,
      "loss": 0.0163,
      "step": 982000
    },
    {
      "epoch": 1.60709726831759,
      "grad_norm": 0.4104229807853699,
      "learning_rate": 6.830518637612594e-06,
      "loss": 0.0263,
      "step": 982020
    },
    {
      "epoch": 1.6071299987562433,
      "grad_norm": 1.415313959121704,
      "learning_rate": 6.830452745399076e-06,
      "loss": 0.0183,
      "step": 982040
    },
    {
      "epoch": 1.6071627291948967,
      "grad_norm": 0.8307639360427856,
      "learning_rate": 6.83038685318556e-06,
      "loss": 0.0198,
      "step": 982060
    },
    {
      "epoch": 1.60719545963355,
      "grad_norm": 0.493426650762558,
      "learning_rate": 6.830320960972043e-06,
      "loss": 0.0247,
      "step": 982080
    },
    {
      "epoch": 1.6072281900722034,
      "grad_norm": 0.5047317743301392,
      "learning_rate": 6.830255068758525e-06,
      "loss": 0.019,
      "step": 982100
    },
    {
      "epoch": 1.6072609205108566,
      "grad_norm": 0.25609076023101807,
      "learning_rate": 6.830189176545008e-06,
      "loss": 0.02,
      "step": 982120
    },
    {
      "epoch": 1.6072936509495102,
      "grad_norm": 0.5172982811927795,
      "learning_rate": 6.830123284331492e-06,
      "loss": 0.0163,
      "step": 982140
    },
    {
      "epoch": 1.6073263813881633,
      "grad_norm": 0.6531230807304382,
      "learning_rate": 6.8300573921179735e-06,
      "loss": 0.0166,
      "step": 982160
    },
    {
      "epoch": 1.6073591118268167,
      "grad_norm": 0.5781037211418152,
      "learning_rate": 6.829991499904457e-06,
      "loss": 0.0221,
      "step": 982180
    },
    {
      "epoch": 1.60739184226547,
      "grad_norm": 0.348752498626709,
      "learning_rate": 6.829925607690939e-06,
      "loss": 0.019,
      "step": 982200
    },
    {
      "epoch": 1.6074245727041234,
      "grad_norm": 0.3374691605567932,
      "learning_rate": 6.829859715477423e-06,
      "loss": 0.027,
      "step": 982220
    },
    {
      "epoch": 1.6074573031427768,
      "grad_norm": 0.7922703623771667,
      "learning_rate": 6.829793823263905e-06,
      "loss": 0.0222,
      "step": 982240
    },
    {
      "epoch": 1.60749003358143,
      "grad_norm": 0.44969889521598816,
      "learning_rate": 6.829727931050388e-06,
      "loss": 0.0195,
      "step": 982260
    },
    {
      "epoch": 1.6075227640200835,
      "grad_norm": 0.5143592953681946,
      "learning_rate": 6.829662038836872e-06,
      "loss": 0.0223,
      "step": 982280
    },
    {
      "epoch": 1.6075554944587367,
      "grad_norm": 0.4573879837989807,
      "learning_rate": 6.829596146623354e-06,
      "loss": 0.0238,
      "step": 982300
    },
    {
      "epoch": 1.60758822489739,
      "grad_norm": 0.21464397013187408,
      "learning_rate": 6.829530254409837e-06,
      "loss": 0.0161,
      "step": 982320
    },
    {
      "epoch": 1.6076209553360434,
      "grad_norm": 0.37124350666999817,
      "learning_rate": 6.82946436219632e-06,
      "loss": 0.0144,
      "step": 982340
    },
    {
      "epoch": 1.6076536857746968,
      "grad_norm": 0.46857592463493347,
      "learning_rate": 6.8293984699828035e-06,
      "loss": 0.0153,
      "step": 982360
    },
    {
      "epoch": 1.6076864162133502,
      "grad_norm": 1.0613189935684204,
      "learning_rate": 6.829332577769285e-06,
      "loss": 0.0211,
      "step": 982380
    },
    {
      "epoch": 1.6077191466520033,
      "grad_norm": 0.5506002306938171,
      "learning_rate": 6.829266685555769e-06,
      "loss": 0.0181,
      "step": 982400
    },
    {
      "epoch": 1.607751877090657,
      "grad_norm": 0.431453675031662,
      "learning_rate": 6.829200793342251e-06,
      "loss": 0.0167,
      "step": 982420
    },
    {
      "epoch": 1.60778460752931,
      "grad_norm": 0.6826524138450623,
      "learning_rate": 6.8291349011287344e-06,
      "loss": 0.015,
      "step": 982440
    },
    {
      "epoch": 1.6078173379679634,
      "grad_norm": 0.485162615776062,
      "learning_rate": 6.829069008915216e-06,
      "loss": 0.0163,
      "step": 982460
    },
    {
      "epoch": 1.6078500684066168,
      "grad_norm": 0.1689329445362091,
      "learning_rate": 6.8290031167017e-06,
      "loss": 0.0118,
      "step": 982480
    },
    {
      "epoch": 1.60788279884527,
      "grad_norm": 0.17563411593437195,
      "learning_rate": 6.828937224488183e-06,
      "loss": 0.0197,
      "step": 982500
    },
    {
      "epoch": 1.6079155292839236,
      "grad_norm": 0.5174856185913086,
      "learning_rate": 6.828871332274665e-06,
      "loss": 0.0198,
      "step": 982520
    },
    {
      "epoch": 1.6079482597225767,
      "grad_norm": 1.2640304565429688,
      "learning_rate": 6.828805440061148e-06,
      "loss": 0.0344,
      "step": 982540
    },
    {
      "epoch": 1.6079809901612303,
      "grad_norm": 0.7988510727882385,
      "learning_rate": 6.828739547847632e-06,
      "loss": 0.022,
      "step": 982560
    },
    {
      "epoch": 1.6080137205998835,
      "grad_norm": 1.1261310577392578,
      "learning_rate": 6.828673655634114e-06,
      "loss": 0.0203,
      "step": 982580
    },
    {
      "epoch": 1.6080464510385368,
      "grad_norm": 0.15467624366283417,
      "learning_rate": 6.828607763420597e-06,
      "loss": 0.0167,
      "step": 982600
    },
    {
      "epoch": 1.6080791814771902,
      "grad_norm": 0.7927875518798828,
      "learning_rate": 6.828541871207081e-06,
      "loss": 0.0223,
      "step": 982620
    },
    {
      "epoch": 1.6081119119158434,
      "grad_norm": 0.2334122657775879,
      "learning_rate": 6.828475978993563e-06,
      "loss": 0.0148,
      "step": 982640
    },
    {
      "epoch": 1.608144642354497,
      "grad_norm": 1.3899253606796265,
      "learning_rate": 6.828410086780046e-06,
      "loss": 0.0285,
      "step": 982660
    },
    {
      "epoch": 1.60817737279315,
      "grad_norm": 0.08235739916563034,
      "learning_rate": 6.828344194566528e-06,
      "loss": 0.0177,
      "step": 982680
    },
    {
      "epoch": 1.6082101032318035,
      "grad_norm": 0.7282041311264038,
      "learning_rate": 6.828278302353012e-06,
      "loss": 0.0238,
      "step": 982700
    },
    {
      "epoch": 1.6082428336704568,
      "grad_norm": 1.022403597831726,
      "learning_rate": 6.8282124101394945e-06,
      "loss": 0.0192,
      "step": 982720
    },
    {
      "epoch": 1.6082755641091102,
      "grad_norm": 0.17967811226844788,
      "learning_rate": 6.828146517925977e-06,
      "loss": 0.0168,
      "step": 982740
    },
    {
      "epoch": 1.6083082945477636,
      "grad_norm": 0.614340603351593,
      "learning_rate": 6.82808062571246e-06,
      "loss": 0.0169,
      "step": 982760
    },
    {
      "epoch": 1.6083410249864167,
      "grad_norm": 0.30686551332473755,
      "learning_rate": 6.8280147334989436e-06,
      "loss": 0.0234,
      "step": 982780
    },
    {
      "epoch": 1.6083737554250703,
      "grad_norm": 1.3130844831466675,
      "learning_rate": 6.8279488412854255e-06,
      "loss": 0.0246,
      "step": 982800
    },
    {
      "epoch": 1.6084064858637235,
      "grad_norm": 0.5060685276985168,
      "learning_rate": 6.827882949071909e-06,
      "loss": 0.0243,
      "step": 982820
    },
    {
      "epoch": 1.6084392163023769,
      "grad_norm": 0.8633927702903748,
      "learning_rate": 6.827817056858391e-06,
      "loss": 0.0232,
      "step": 982840
    },
    {
      "epoch": 1.6084719467410302,
      "grad_norm": 0.3501248359680176,
      "learning_rate": 6.8277511646448745e-06,
      "loss": 0.0276,
      "step": 982860
    },
    {
      "epoch": 1.6085046771796836,
      "grad_norm": 0.4983968138694763,
      "learning_rate": 6.827685272431357e-06,
      "loss": 0.0166,
      "step": 982880
    },
    {
      "epoch": 1.608537407618337,
      "grad_norm": 0.3312179446220398,
      "learning_rate": 6.82761938021784e-06,
      "loss": 0.0178,
      "step": 982900
    },
    {
      "epoch": 1.6085701380569901,
      "grad_norm": 0.32023125886917114,
      "learning_rate": 6.827553488004323e-06,
      "loss": 0.0177,
      "step": 982920
    },
    {
      "epoch": 1.6086028684956437,
      "grad_norm": 0.6354719996452332,
      "learning_rate": 6.827487595790806e-06,
      "loss": 0.0187,
      "step": 982940
    },
    {
      "epoch": 1.6086355989342969,
      "grad_norm": 0.731137216091156,
      "learning_rate": 6.827421703577288e-06,
      "loss": 0.0122,
      "step": 982960
    },
    {
      "epoch": 1.6086683293729502,
      "grad_norm": 0.5303803086280823,
      "learning_rate": 6.827355811363772e-06,
      "loss": 0.0225,
      "step": 982980
    },
    {
      "epoch": 1.6087010598116036,
      "grad_norm": 0.381279319524765,
      "learning_rate": 6.827289919150255e-06,
      "loss": 0.0208,
      "step": 983000
    },
    {
      "epoch": 1.608733790250257,
      "grad_norm": 0.2620919644832611,
      "learning_rate": 6.827224026936737e-06,
      "loss": 0.0131,
      "step": 983020
    },
    {
      "epoch": 1.6087665206889104,
      "grad_norm": 0.7286067008972168,
      "learning_rate": 6.827158134723221e-06,
      "loss": 0.0149,
      "step": 983040
    },
    {
      "epoch": 1.6087992511275635,
      "grad_norm": 0.31150519847869873,
      "learning_rate": 6.827092242509703e-06,
      "loss": 0.0148,
      "step": 983060
    },
    {
      "epoch": 1.608831981566217,
      "grad_norm": 0.2551051676273346,
      "learning_rate": 6.827026350296186e-06,
      "loss": 0.0144,
      "step": 983080
    },
    {
      "epoch": 1.6088647120048702,
      "grad_norm": 0.4758617579936981,
      "learning_rate": 6.826960458082669e-06,
      "loss": 0.0197,
      "step": 983100
    },
    {
      "epoch": 1.6088974424435236,
      "grad_norm": 0.2478937804698944,
      "learning_rate": 6.826894565869152e-06,
      "loss": 0.0275,
      "step": 983120
    },
    {
      "epoch": 1.608930172882177,
      "grad_norm": 3.2542316913604736,
      "learning_rate": 6.8268286736556346e-06,
      "loss": 0.0187,
      "step": 983140
    },
    {
      "epoch": 1.6089629033208301,
      "grad_norm": 1.1734364032745361,
      "learning_rate": 6.826762781442118e-06,
      "loss": 0.0217,
      "step": 983160
    },
    {
      "epoch": 1.6089956337594837,
      "grad_norm": 0.42916029691696167,
      "learning_rate": 6.8266968892286e-06,
      "loss": 0.0185,
      "step": 983180
    },
    {
      "epoch": 1.6090283641981369,
      "grad_norm": 0.653011679649353,
      "learning_rate": 6.826630997015084e-06,
      "loss": 0.0154,
      "step": 983200
    },
    {
      "epoch": 1.6090610946367905,
      "grad_norm": 0.239498108625412,
      "learning_rate": 6.8265651048015655e-06,
      "loss": 0.016,
      "step": 983220
    },
    {
      "epoch": 1.6090938250754436,
      "grad_norm": 0.5019493699073792,
      "learning_rate": 6.826499212588049e-06,
      "loss": 0.0231,
      "step": 983240
    },
    {
      "epoch": 1.609126555514097,
      "grad_norm": 0.4472476541996002,
      "learning_rate": 6.826433320374532e-06,
      "loss": 0.0146,
      "step": 983260
    },
    {
      "epoch": 1.6091592859527504,
      "grad_norm": 0.1791912466287613,
      "learning_rate": 6.826367428161015e-06,
      "loss": 0.0203,
      "step": 983280
    },
    {
      "epoch": 1.6091920163914035,
      "grad_norm": 0.7991510629653931,
      "learning_rate": 6.826301535947497e-06,
      "loss": 0.0203,
      "step": 983300
    },
    {
      "epoch": 1.6092247468300571,
      "grad_norm": 0.4038774371147156,
      "learning_rate": 6.826235643733981e-06,
      "loss": 0.0159,
      "step": 983320
    },
    {
      "epoch": 1.6092574772687103,
      "grad_norm": 0.21591642498970032,
      "learning_rate": 6.826169751520464e-06,
      "loss": 0.0214,
      "step": 983340
    },
    {
      "epoch": 1.6092902077073636,
      "grad_norm": 1.700209140777588,
      "learning_rate": 6.826103859306946e-06,
      "loss": 0.0184,
      "step": 983360
    },
    {
      "epoch": 1.609322938146017,
      "grad_norm": 1.3303736448287964,
      "learning_rate": 6.82603796709343e-06,
      "loss": 0.0227,
      "step": 983380
    },
    {
      "epoch": 1.6093556685846704,
      "grad_norm": 0.42413321137428284,
      "learning_rate": 6.825972074879912e-06,
      "loss": 0.0161,
      "step": 983400
    },
    {
      "epoch": 1.6093883990233238,
      "grad_norm": 0.22327233850955963,
      "learning_rate": 6.8259061826663955e-06,
      "loss": 0.0215,
      "step": 983420
    },
    {
      "epoch": 1.609421129461977,
      "grad_norm": 1.0425087213516235,
      "learning_rate": 6.825840290452877e-06,
      "loss": 0.0227,
      "step": 983440
    },
    {
      "epoch": 1.6094538599006305,
      "grad_norm": 0.4359625577926636,
      "learning_rate": 6.825774398239361e-06,
      "loss": 0.0263,
      "step": 983460
    },
    {
      "epoch": 1.6094865903392837,
      "grad_norm": 0.3218342661857605,
      "learning_rate": 6.825708506025843e-06,
      "loss": 0.0183,
      "step": 983480
    },
    {
      "epoch": 1.609519320777937,
      "grad_norm": 0.4277239441871643,
      "learning_rate": 6.8256426138123264e-06,
      "loss": 0.0174,
      "step": 983500
    },
    {
      "epoch": 1.6095520512165904,
      "grad_norm": 0.706286609172821,
      "learning_rate": 6.825576721598809e-06,
      "loss": 0.0249,
      "step": 983520
    },
    {
      "epoch": 1.6095847816552438,
      "grad_norm": 0.8154276013374329,
      "learning_rate": 6.825510829385292e-06,
      "loss": 0.0216,
      "step": 983540
    },
    {
      "epoch": 1.6096175120938971,
      "grad_norm": 0.4538039267063141,
      "learning_rate": 6.825444937171775e-06,
      "loss": 0.0185,
      "step": 983560
    },
    {
      "epoch": 1.6096502425325503,
      "grad_norm": 0.5574766993522644,
      "learning_rate": 6.825379044958258e-06,
      "loss": 0.0171,
      "step": 983580
    },
    {
      "epoch": 1.6096829729712039,
      "grad_norm": 0.2872169315814972,
      "learning_rate": 6.82531315274474e-06,
      "loss": 0.0252,
      "step": 983600
    },
    {
      "epoch": 1.609715703409857,
      "grad_norm": 0.5757054686546326,
      "learning_rate": 6.825247260531224e-06,
      "loss": 0.0241,
      "step": 983620
    },
    {
      "epoch": 1.6097484338485104,
      "grad_norm": 0.5672844648361206,
      "learning_rate": 6.825181368317706e-06,
      "loss": 0.0199,
      "step": 983640
    },
    {
      "epoch": 1.6097811642871638,
      "grad_norm": 0.2344512939453125,
      "learning_rate": 6.825115476104189e-06,
      "loss": 0.0217,
      "step": 983660
    },
    {
      "epoch": 1.6098138947258172,
      "grad_norm": 0.21656367182731628,
      "learning_rate": 6.825049583890673e-06,
      "loss": 0.0253,
      "step": 983680
    },
    {
      "epoch": 1.6098466251644705,
      "grad_norm": Infinity,
      "learning_rate": 6.824983691677155e-06,
      "loss": 0.0226,
      "step": 983700
    },
    {
      "epoch": 1.6098793556031237,
      "grad_norm": 0.7226370573043823,
      "learning_rate": 6.824917799463638e-06,
      "loss": 0.0247,
      "step": 983720
    },
    {
      "epoch": 1.6099120860417773,
      "grad_norm": 0.3515165448188782,
      "learning_rate": 6.824851907250121e-06,
      "loss": 0.0224,
      "step": 983740
    },
    {
      "epoch": 1.6099448164804304,
      "grad_norm": 0.32916438579559326,
      "learning_rate": 6.824786015036604e-06,
      "loss": 0.0188,
      "step": 983760
    },
    {
      "epoch": 1.6099775469190838,
      "grad_norm": 0.23624499142169952,
      "learning_rate": 6.8247201228230865e-06,
      "loss": 0.0243,
      "step": 983780
    },
    {
      "epoch": 1.6100102773577372,
      "grad_norm": 0.48921191692352295,
      "learning_rate": 6.82465423060957e-06,
      "loss": 0.0173,
      "step": 983800
    },
    {
      "epoch": 1.6100430077963905,
      "grad_norm": 0.07685267925262451,
      "learning_rate": 6.824588338396052e-06,
      "loss": 0.0286,
      "step": 983820
    },
    {
      "epoch": 1.610075738235044,
      "grad_norm": 0.4242025911808014,
      "learning_rate": 6.8245224461825355e-06,
      "loss": 0.0146,
      "step": 983840
    },
    {
      "epoch": 1.610108468673697,
      "grad_norm": 0.9622503519058228,
      "learning_rate": 6.8244565539690174e-06,
      "loss": 0.0218,
      "step": 983860
    },
    {
      "epoch": 1.6101411991123507,
      "grad_norm": 0.823817789554596,
      "learning_rate": 6.824390661755501e-06,
      "loss": 0.02,
      "step": 983880
    },
    {
      "epoch": 1.6101739295510038,
      "grad_norm": 0.6606898903846741,
      "learning_rate": 6.824324769541984e-06,
      "loss": 0.0132,
      "step": 983900
    },
    {
      "epoch": 1.6102066599896572,
      "grad_norm": 1.5287758111953735,
      "learning_rate": 6.8242588773284665e-06,
      "loss": 0.0205,
      "step": 983920
    },
    {
      "epoch": 1.6102393904283105,
      "grad_norm": 0.23107528686523438,
      "learning_rate": 6.824192985114949e-06,
      "loss": 0.0139,
      "step": 983940
    },
    {
      "epoch": 1.6102721208669637,
      "grad_norm": 1.9910733699798584,
      "learning_rate": 6.824127092901433e-06,
      "loss": 0.0216,
      "step": 983960
    },
    {
      "epoch": 1.6103048513056173,
      "grad_norm": 1.665617823600769,
      "learning_rate": 6.824061200687915e-06,
      "loss": 0.0192,
      "step": 983980
    },
    {
      "epoch": 1.6103375817442704,
      "grad_norm": 0.28282567858695984,
      "learning_rate": 6.823995308474398e-06,
      "loss": 0.0199,
      "step": 984000
    },
    {
      "epoch": 1.610370312182924,
      "grad_norm": 0.28952667117118835,
      "learning_rate": 6.82392941626088e-06,
      "loss": 0.0243,
      "step": 984020
    },
    {
      "epoch": 1.6104030426215772,
      "grad_norm": 0.31731167435646057,
      "learning_rate": 6.823863524047364e-06,
      "loss": 0.0232,
      "step": 984040
    },
    {
      "epoch": 1.6104357730602306,
      "grad_norm": 0.6803786158561707,
      "learning_rate": 6.823797631833847e-06,
      "loss": 0.0197,
      "step": 984060
    },
    {
      "epoch": 1.610468503498884,
      "grad_norm": 1.2206611633300781,
      "learning_rate": 6.823731739620329e-06,
      "loss": 0.0202,
      "step": 984080
    },
    {
      "epoch": 1.610501233937537,
      "grad_norm": 0.3063238859176636,
      "learning_rate": 6.823665847406813e-06,
      "loss": 0.0161,
      "step": 984100
    },
    {
      "epoch": 1.6105339643761907,
      "grad_norm": 0.25014305114746094,
      "learning_rate": 6.823599955193296e-06,
      "loss": 0.0228,
      "step": 984120
    },
    {
      "epoch": 1.6105666948148438,
      "grad_norm": 0.2416686862707138,
      "learning_rate": 6.823534062979778e-06,
      "loss": 0.0258,
      "step": 984140
    },
    {
      "epoch": 1.6105994252534972,
      "grad_norm": 0.04525294527411461,
      "learning_rate": 6.823468170766261e-06,
      "loss": 0.0237,
      "step": 984160
    },
    {
      "epoch": 1.6106321556921506,
      "grad_norm": 0.5800952911376953,
      "learning_rate": 6.823402278552745e-06,
      "loss": 0.0219,
      "step": 984180
    },
    {
      "epoch": 1.610664886130804,
      "grad_norm": 0.26679959893226624,
      "learning_rate": 6.8233363863392266e-06,
      "loss": 0.0223,
      "step": 984200
    },
    {
      "epoch": 1.6106976165694573,
      "grad_norm": 0.43744704127311707,
      "learning_rate": 6.82327049412571e-06,
      "loss": 0.0176,
      "step": 984220
    },
    {
      "epoch": 1.6107303470081105,
      "grad_norm": 0.9342846870422363,
      "learning_rate": 6.823204601912192e-06,
      "loss": 0.0163,
      "step": 984240
    },
    {
      "epoch": 1.610763077446764,
      "grad_norm": 1.3327397108078003,
      "learning_rate": 6.823138709698676e-06,
      "loss": 0.018,
      "step": 984260
    },
    {
      "epoch": 1.6107958078854172,
      "grad_norm": 1.5122361183166504,
      "learning_rate": 6.8230728174851575e-06,
      "loss": 0.0149,
      "step": 984280
    },
    {
      "epoch": 1.6108285383240706,
      "grad_norm": 0.9560078382492065,
      "learning_rate": 6.823006925271641e-06,
      "loss": 0.0145,
      "step": 984300
    },
    {
      "epoch": 1.610861268762724,
      "grad_norm": 0.18656152486801147,
      "learning_rate": 6.822941033058124e-06,
      "loss": 0.0185,
      "step": 984320
    },
    {
      "epoch": 1.6108939992013773,
      "grad_norm": 1.0528277158737183,
      "learning_rate": 6.822875140844607e-06,
      "loss": 0.018,
      "step": 984340
    },
    {
      "epoch": 1.6109267296400307,
      "grad_norm": 0.3829813301563263,
      "learning_rate": 6.822809248631089e-06,
      "loss": 0.0141,
      "step": 984360
    },
    {
      "epoch": 1.6109594600786838,
      "grad_norm": 0.39330875873565674,
      "learning_rate": 6.822743356417573e-06,
      "loss": 0.0203,
      "step": 984380
    },
    {
      "epoch": 1.6109921905173374,
      "grad_norm": 0.9256446957588196,
      "learning_rate": 6.8226774642040565e-06,
      "loss": 0.0216,
      "step": 984400
    },
    {
      "epoch": 1.6110249209559906,
      "grad_norm": 0.10235440731048584,
      "learning_rate": 6.822611571990538e-06,
      "loss": 0.0207,
      "step": 984420
    },
    {
      "epoch": 1.611057651394644,
      "grad_norm": 0.2036159634590149,
      "learning_rate": 6.822545679777022e-06,
      "loss": 0.0144,
      "step": 984440
    },
    {
      "epoch": 1.6110903818332973,
      "grad_norm": 0.862440288066864,
      "learning_rate": 6.822479787563504e-06,
      "loss": 0.0225,
      "step": 984460
    },
    {
      "epoch": 1.6111231122719507,
      "grad_norm": 0.1638251692056656,
      "learning_rate": 6.8224138953499875e-06,
      "loss": 0.0182,
      "step": 984480
    },
    {
      "epoch": 1.611155842710604,
      "grad_norm": 0.4659375250339508,
      "learning_rate": 6.822348003136469e-06,
      "loss": 0.0202,
      "step": 984500
    },
    {
      "epoch": 1.6111885731492572,
      "grad_norm": 0.0645693987607956,
      "learning_rate": 6.822282110922953e-06,
      "loss": 0.0198,
      "step": 984520
    },
    {
      "epoch": 1.6112213035879108,
      "grad_norm": 0.8397138118743896,
      "learning_rate": 6.822216218709436e-06,
      "loss": 0.0224,
      "step": 984540
    },
    {
      "epoch": 1.611254034026564,
      "grad_norm": 0.42796632647514343,
      "learning_rate": 6.822150326495918e-06,
      "loss": 0.0219,
      "step": 984560
    },
    {
      "epoch": 1.6112867644652173,
      "grad_norm": 0.5684685111045837,
      "learning_rate": 6.822084434282401e-06,
      "loss": 0.0253,
      "step": 984580
    },
    {
      "epoch": 1.6113194949038707,
      "grad_norm": 1.368873119354248,
      "learning_rate": 6.822018542068885e-06,
      "loss": 0.0195,
      "step": 984600
    },
    {
      "epoch": 1.611352225342524,
      "grad_norm": 0.6016051769256592,
      "learning_rate": 6.821952649855367e-06,
      "loss": 0.0307,
      "step": 984620
    },
    {
      "epoch": 1.6113849557811775,
      "grad_norm": 0.45618200302124023,
      "learning_rate": 6.82188675764185e-06,
      "loss": 0.0207,
      "step": 984640
    },
    {
      "epoch": 1.6114176862198306,
      "grad_norm": 0.8305718898773193,
      "learning_rate": 6.821820865428332e-06,
      "loss": 0.0277,
      "step": 984660
    },
    {
      "epoch": 1.6114504166584842,
      "grad_norm": 0.4424664378166199,
      "learning_rate": 6.821754973214816e-06,
      "loss": 0.0163,
      "step": 984680
    },
    {
      "epoch": 1.6114831470971374,
      "grad_norm": 0.21380235254764557,
      "learning_rate": 6.8216890810012984e-06,
      "loss": 0.0152,
      "step": 984700
    },
    {
      "epoch": 1.6115158775357907,
      "grad_norm": 0.2914034128189087,
      "learning_rate": 6.821623188787781e-06,
      "loss": 0.0214,
      "step": 984720
    },
    {
      "epoch": 1.611548607974444,
      "grad_norm": 0.21189525723457336,
      "learning_rate": 6.821557296574265e-06,
      "loss": 0.0226,
      "step": 984740
    },
    {
      "epoch": 1.6115813384130973,
      "grad_norm": 1.1158549785614014,
      "learning_rate": 6.8214914043607475e-06,
      "loss": 0.0212,
      "step": 984760
    },
    {
      "epoch": 1.6116140688517508,
      "grad_norm": 1.023314356803894,
      "learning_rate": 6.82142551214723e-06,
      "loss": 0.0179,
      "step": 984780
    },
    {
      "epoch": 1.611646799290404,
      "grad_norm": 0.19069723784923553,
      "learning_rate": 6.821359619933713e-06,
      "loss": 0.026,
      "step": 984800
    },
    {
      "epoch": 1.6116795297290576,
      "grad_norm": 0.5557515621185303,
      "learning_rate": 6.8212937277201966e-06,
      "loss": 0.0216,
      "step": 984820
    },
    {
      "epoch": 1.6117122601677107,
      "grad_norm": 1.3003723621368408,
      "learning_rate": 6.8212278355066785e-06,
      "loss": 0.024,
      "step": 984840
    },
    {
      "epoch": 1.6117449906063641,
      "grad_norm": 0.16329620778560638,
      "learning_rate": 6.821161943293162e-06,
      "loss": 0.0223,
      "step": 984860
    },
    {
      "epoch": 1.6117777210450175,
      "grad_norm": 0.23069727420806885,
      "learning_rate": 6.821096051079644e-06,
      "loss": 0.0251,
      "step": 984880
    },
    {
      "epoch": 1.6118104514836706,
      "grad_norm": 0.2856465280056,
      "learning_rate": 6.8210301588661275e-06,
      "loss": 0.0178,
      "step": 984900
    },
    {
      "epoch": 1.6118431819223242,
      "grad_norm": 1.8936243057250977,
      "learning_rate": 6.82096426665261e-06,
      "loss": 0.0228,
      "step": 984920
    },
    {
      "epoch": 1.6118759123609774,
      "grad_norm": 0.47399890422821045,
      "learning_rate": 6.820898374439093e-06,
      "loss": 0.0188,
      "step": 984940
    },
    {
      "epoch": 1.6119086427996308,
      "grad_norm": 0.6683242321014404,
      "learning_rate": 6.820832482225576e-06,
      "loss": 0.0225,
      "step": 984960
    },
    {
      "epoch": 1.6119413732382841,
      "grad_norm": 0.480839341878891,
      "learning_rate": 6.820766590012059e-06,
      "loss": 0.0144,
      "step": 984980
    },
    {
      "epoch": 1.6119741036769375,
      "grad_norm": 0.7368811368942261,
      "learning_rate": 6.820700697798541e-06,
      "loss": 0.0218,
      "step": 985000
    },
    {
      "epoch": 1.6120068341155909,
      "grad_norm": 0.8620418906211853,
      "learning_rate": 6.820634805585025e-06,
      "loss": 0.0198,
      "step": 985020
    },
    {
      "epoch": 1.612039564554244,
      "grad_norm": 0.4044196307659149,
      "learning_rate": 6.820568913371507e-06,
      "loss": 0.0208,
      "step": 985040
    },
    {
      "epoch": 1.6120722949928976,
      "grad_norm": 0.18879997730255127,
      "learning_rate": 6.82050302115799e-06,
      "loss": 0.0144,
      "step": 985060
    },
    {
      "epoch": 1.6121050254315508,
      "grad_norm": 1.0834356546401978,
      "learning_rate": 6.820437128944474e-06,
      "loss": 0.0217,
      "step": 985080
    },
    {
      "epoch": 1.6121377558702041,
      "grad_norm": 0.3922666311264038,
      "learning_rate": 6.820371236730956e-06,
      "loss": 0.0194,
      "step": 985100
    },
    {
      "epoch": 1.6121704863088575,
      "grad_norm": 0.2238548994064331,
      "learning_rate": 6.820305344517439e-06,
      "loss": 0.0165,
      "step": 985120
    },
    {
      "epoch": 1.6122032167475109,
      "grad_norm": 0.4613588750362396,
      "learning_rate": 6.820239452303922e-06,
      "loss": 0.0197,
      "step": 985140
    },
    {
      "epoch": 1.6122359471861643,
      "grad_norm": 0.44734129309654236,
      "learning_rate": 6.820173560090405e-06,
      "loss": 0.0191,
      "step": 985160
    },
    {
      "epoch": 1.6122686776248174,
      "grad_norm": 0.2785995304584503,
      "learning_rate": 6.820107667876888e-06,
      "loss": 0.02,
      "step": 985180
    },
    {
      "epoch": 1.612301408063471,
      "grad_norm": 0.49741560220718384,
      "learning_rate": 6.820041775663371e-06,
      "loss": 0.0232,
      "step": 985200
    },
    {
      "epoch": 1.6123341385021241,
      "grad_norm": 0.6546106338500977,
      "learning_rate": 6.819975883449853e-06,
      "loss": 0.0229,
      "step": 985220
    },
    {
      "epoch": 1.6123668689407775,
      "grad_norm": 0.28229159116744995,
      "learning_rate": 6.819909991236337e-06,
      "loss": 0.0193,
      "step": 985240
    },
    {
      "epoch": 1.612399599379431,
      "grad_norm": 0.35487690567970276,
      "learning_rate": 6.8198440990228185e-06,
      "loss": 0.0262,
      "step": 985260
    },
    {
      "epoch": 1.6124323298180843,
      "grad_norm": 0.36907297372817993,
      "learning_rate": 6.819778206809302e-06,
      "loss": 0.0176,
      "step": 985280
    },
    {
      "epoch": 1.6124650602567376,
      "grad_norm": 0.8382701873779297,
      "learning_rate": 6.819712314595784e-06,
      "loss": 0.0242,
      "step": 985300
    },
    {
      "epoch": 1.6124977906953908,
      "grad_norm": 1.2999320030212402,
      "learning_rate": 6.819646422382268e-06,
      "loss": 0.0258,
      "step": 985320
    },
    {
      "epoch": 1.6125305211340444,
      "grad_norm": 0.2526784837245941,
      "learning_rate": 6.81958053016875e-06,
      "loss": 0.0243,
      "step": 985340
    },
    {
      "epoch": 1.6125632515726975,
      "grad_norm": 0.4958435893058777,
      "learning_rate": 6.819514637955233e-06,
      "loss": 0.0181,
      "step": 985360
    },
    {
      "epoch": 1.612595982011351,
      "grad_norm": 0.49110203981399536,
      "learning_rate": 6.819448745741716e-06,
      "loss": 0.0175,
      "step": 985380
    },
    {
      "epoch": 1.6126287124500043,
      "grad_norm": 2.919222593307495,
      "learning_rate": 6.819382853528199e-06,
      "loss": 0.017,
      "step": 985400
    },
    {
      "epoch": 1.6126614428886576,
      "grad_norm": 0.6843831539154053,
      "learning_rate": 6.819316961314681e-06,
      "loss": 0.0227,
      "step": 985420
    },
    {
      "epoch": 1.612694173327311,
      "grad_norm": 0.3112718164920807,
      "learning_rate": 6.819251069101165e-06,
      "loss": 0.0179,
      "step": 985440
    },
    {
      "epoch": 1.6127269037659642,
      "grad_norm": 0.12130657583475113,
      "learning_rate": 6.8191851768876485e-06,
      "loss": 0.0183,
      "step": 985460
    },
    {
      "epoch": 1.6127596342046178,
      "grad_norm": 0.7315114140510559,
      "learning_rate": 6.81911928467413e-06,
      "loss": 0.0208,
      "step": 985480
    },
    {
      "epoch": 1.612792364643271,
      "grad_norm": 0.41290348768234253,
      "learning_rate": 6.819053392460614e-06,
      "loss": 0.029,
      "step": 985500
    },
    {
      "epoch": 1.6128250950819243,
      "grad_norm": 3.567260503768921,
      "learning_rate": 6.818987500247096e-06,
      "loss": 0.022,
      "step": 985520
    },
    {
      "epoch": 1.6128578255205777,
      "grad_norm": 0.503032922744751,
      "learning_rate": 6.8189216080335794e-06,
      "loss": 0.0174,
      "step": 985540
    },
    {
      "epoch": 1.6128905559592308,
      "grad_norm": 0.8817194104194641,
      "learning_rate": 6.818855715820062e-06,
      "loss": 0.0205,
      "step": 985560
    },
    {
      "epoch": 1.6129232863978844,
      "grad_norm": 0.24489517509937286,
      "learning_rate": 6.818789823606545e-06,
      "loss": 0.018,
      "step": 985580
    },
    {
      "epoch": 1.6129560168365376,
      "grad_norm": 0.287118524312973,
      "learning_rate": 6.818723931393028e-06,
      "loss": 0.0168,
      "step": 985600
    },
    {
      "epoch": 1.6129887472751911,
      "grad_norm": 0.5762951970100403,
      "learning_rate": 6.818658039179511e-06,
      "loss": 0.0243,
      "step": 985620
    },
    {
      "epoch": 1.6130214777138443,
      "grad_norm": 0.5488744974136353,
      "learning_rate": 6.818592146965993e-06,
      "loss": 0.0255,
      "step": 985640
    },
    {
      "epoch": 1.6130542081524977,
      "grad_norm": 0.31199777126312256,
      "learning_rate": 6.818526254752477e-06,
      "loss": 0.0188,
      "step": 985660
    },
    {
      "epoch": 1.613086938591151,
      "grad_norm": 0.7134727835655212,
      "learning_rate": 6.818460362538959e-06,
      "loss": 0.0283,
      "step": 985680
    },
    {
      "epoch": 1.6131196690298042,
      "grad_norm": 0.16734760999679565,
      "learning_rate": 6.818394470325442e-06,
      "loss": 0.0195,
      "step": 985700
    },
    {
      "epoch": 1.6131523994684578,
      "grad_norm": 0.49949225783348083,
      "learning_rate": 6.818328578111925e-06,
      "loss": 0.0201,
      "step": 985720
    },
    {
      "epoch": 1.613185129907111,
      "grad_norm": 0.19604235887527466,
      "learning_rate": 6.818262685898408e-06,
      "loss": 0.0145,
      "step": 985740
    },
    {
      "epoch": 1.6132178603457643,
      "grad_norm": 0.7642061114311218,
      "learning_rate": 6.81819679368489e-06,
      "loss": 0.0118,
      "step": 985760
    },
    {
      "epoch": 1.6132505907844177,
      "grad_norm": 0.1242595985531807,
      "learning_rate": 6.818130901471374e-06,
      "loss": 0.0211,
      "step": 985780
    },
    {
      "epoch": 1.613283321223071,
      "grad_norm": 1.0450878143310547,
      "learning_rate": 6.818065009257857e-06,
      "loss": 0.0204,
      "step": 985800
    },
    {
      "epoch": 1.6133160516617244,
      "grad_norm": 0.5964966416358948,
      "learning_rate": 6.8179991170443395e-06,
      "loss": 0.0141,
      "step": 985820
    },
    {
      "epoch": 1.6133487821003776,
      "grad_norm": 0.2193082571029663,
      "learning_rate": 6.817933224830823e-06,
      "loss": 0.0145,
      "step": 985840
    },
    {
      "epoch": 1.6133815125390312,
      "grad_norm": 0.2710888087749481,
      "learning_rate": 6.817867332617305e-06,
      "loss": 0.019,
      "step": 985860
    },
    {
      "epoch": 1.6134142429776843,
      "grad_norm": 0.3318004608154297,
      "learning_rate": 6.8178014404037886e-06,
      "loss": 0.0197,
      "step": 985880
    },
    {
      "epoch": 1.6134469734163377,
      "grad_norm": 0.641062319278717,
      "learning_rate": 6.8177355481902704e-06,
      "loss": 0.0177,
      "step": 985900
    },
    {
      "epoch": 1.613479703854991,
      "grad_norm": 0.6045729517936707,
      "learning_rate": 6.817669655976754e-06,
      "loss": 0.0168,
      "step": 985920
    },
    {
      "epoch": 1.6135124342936444,
      "grad_norm": 0.18470525741577148,
      "learning_rate": 6.817603763763237e-06,
      "loss": 0.0184,
      "step": 985940
    },
    {
      "epoch": 1.6135451647322978,
      "grad_norm": 0.4334039092063904,
      "learning_rate": 6.8175378715497195e-06,
      "loss": 0.0177,
      "step": 985960
    },
    {
      "epoch": 1.613577895170951,
      "grad_norm": 0.4892596900463104,
      "learning_rate": 6.817471979336202e-06,
      "loss": 0.0192,
      "step": 985980
    },
    {
      "epoch": 1.6136106256096046,
      "grad_norm": 0.415215402841568,
      "learning_rate": 6.817406087122686e-06,
      "loss": 0.0187,
      "step": 986000
    },
    {
      "epoch": 1.6136433560482577,
      "grad_norm": 0.18115487694740295,
      "learning_rate": 6.817340194909168e-06,
      "loss": 0.0181,
      "step": 986020
    },
    {
      "epoch": 1.613676086486911,
      "grad_norm": 0.7531505227088928,
      "learning_rate": 6.817274302695651e-06,
      "loss": 0.0185,
      "step": 986040
    },
    {
      "epoch": 1.6137088169255644,
      "grad_norm": 0.13062584400177002,
      "learning_rate": 6.817208410482133e-06,
      "loss": 0.0165,
      "step": 986060
    },
    {
      "epoch": 1.6137415473642178,
      "grad_norm": 0.464604914188385,
      "learning_rate": 6.817142518268617e-06,
      "loss": 0.021,
      "step": 986080
    },
    {
      "epoch": 1.6137742778028712,
      "grad_norm": 0.8824697732925415,
      "learning_rate": 6.8170766260550995e-06,
      "loss": 0.0138,
      "step": 986100
    },
    {
      "epoch": 1.6138070082415243,
      "grad_norm": 0.7422313690185547,
      "learning_rate": 6.817010733841582e-06,
      "loss": 0.0141,
      "step": 986120
    },
    {
      "epoch": 1.613839738680178,
      "grad_norm": 0.6404901742935181,
      "learning_rate": 6.816944841628066e-06,
      "loss": 0.0183,
      "step": 986140
    },
    {
      "epoch": 1.613872469118831,
      "grad_norm": 0.2050124853849411,
      "learning_rate": 6.816878949414549e-06,
      "loss": 0.0172,
      "step": 986160
    },
    {
      "epoch": 1.6139051995574845,
      "grad_norm": 0.405238538980484,
      "learning_rate": 6.816813057201031e-06,
      "loss": 0.0137,
      "step": 986180
    },
    {
      "epoch": 1.6139379299961378,
      "grad_norm": 0.3683488965034485,
      "learning_rate": 6.816747164987514e-06,
      "loss": 0.021,
      "step": 986200
    },
    {
      "epoch": 1.613970660434791,
      "grad_norm": 0.2859862148761749,
      "learning_rate": 6.816681272773998e-06,
      "loss": 0.0153,
      "step": 986220
    },
    {
      "epoch": 1.6140033908734446,
      "grad_norm": 0.5008773803710938,
      "learning_rate": 6.8166153805604796e-06,
      "loss": 0.0204,
      "step": 986240
    },
    {
      "epoch": 1.6140361213120977,
      "grad_norm": 0.07456724345684052,
      "learning_rate": 6.816549488346963e-06,
      "loss": 0.0182,
      "step": 986260
    },
    {
      "epoch": 1.6140688517507513,
      "grad_norm": 0.2885242700576782,
      "learning_rate": 6.816483596133445e-06,
      "loss": 0.0212,
      "step": 986280
    },
    {
      "epoch": 1.6141015821894045,
      "grad_norm": 3.5875132083892822,
      "learning_rate": 6.816417703919929e-06,
      "loss": 0.0228,
      "step": 986300
    },
    {
      "epoch": 1.6141343126280578,
      "grad_norm": 0.32811638712882996,
      "learning_rate": 6.8163518117064105e-06,
      "loss": 0.0175,
      "step": 986320
    },
    {
      "epoch": 1.6141670430667112,
      "grad_norm": 0.4168849289417267,
      "learning_rate": 6.816285919492894e-06,
      "loss": 0.0211,
      "step": 986340
    },
    {
      "epoch": 1.6141997735053644,
      "grad_norm": 0.6566267609596252,
      "learning_rate": 6.816220027279377e-06,
      "loss": 0.02,
      "step": 986360
    },
    {
      "epoch": 1.614232503944018,
      "grad_norm": 0.5115252733230591,
      "learning_rate": 6.81615413506586e-06,
      "loss": 0.0177,
      "step": 986380
    },
    {
      "epoch": 1.614265234382671,
      "grad_norm": 0.5189107656478882,
      "learning_rate": 6.816088242852342e-06,
      "loss": 0.0294,
      "step": 986400
    },
    {
      "epoch": 1.6142979648213245,
      "grad_norm": 0.4029211401939392,
      "learning_rate": 6.816022350638826e-06,
      "loss": 0.0145,
      "step": 986420
    },
    {
      "epoch": 1.6143306952599779,
      "grad_norm": 1.8064364194869995,
      "learning_rate": 6.815956458425308e-06,
      "loss": 0.0205,
      "step": 986440
    },
    {
      "epoch": 1.6143634256986312,
      "grad_norm": 0.04097944498062134,
      "learning_rate": 6.815890566211791e-06,
      "loss": 0.0239,
      "step": 986460
    },
    {
      "epoch": 1.6143961561372846,
      "grad_norm": 0.4843985140323639,
      "learning_rate": 6.815824673998273e-06,
      "loss": 0.0214,
      "step": 986480
    },
    {
      "epoch": 1.6144288865759377,
      "grad_norm": 0.732585072517395,
      "learning_rate": 6.815758781784757e-06,
      "loss": 0.0202,
      "step": 986500
    },
    {
      "epoch": 1.6144616170145913,
      "grad_norm": 0.24638164043426514,
      "learning_rate": 6.8156928895712405e-06,
      "loss": 0.0179,
      "step": 986520
    },
    {
      "epoch": 1.6144943474532445,
      "grad_norm": 0.5103272199630737,
      "learning_rate": 6.815626997357722e-06,
      "loss": 0.022,
      "step": 986540
    },
    {
      "epoch": 1.6145270778918979,
      "grad_norm": 0.3490198254585266,
      "learning_rate": 6.815561105144206e-06,
      "loss": 0.0311,
      "step": 986560
    },
    {
      "epoch": 1.6145598083305512,
      "grad_norm": 1.7306385040283203,
      "learning_rate": 6.815495212930689e-06,
      "loss": 0.0258,
      "step": 986580
    },
    {
      "epoch": 1.6145925387692046,
      "grad_norm": 1.0467270612716675,
      "learning_rate": 6.815429320717171e-06,
      "loss": 0.0158,
      "step": 986600
    },
    {
      "epoch": 1.614625269207858,
      "grad_norm": 0.5959059596061707,
      "learning_rate": 6.815363428503654e-06,
      "loss": 0.0203,
      "step": 986620
    },
    {
      "epoch": 1.6146579996465111,
      "grad_norm": 0.7526447176933289,
      "learning_rate": 6.815297536290138e-06,
      "loss": 0.0211,
      "step": 986640
    },
    {
      "epoch": 1.6146907300851647,
      "grad_norm": 1.3409117460250854,
      "learning_rate": 6.81523164407662e-06,
      "loss": 0.0161,
      "step": 986660
    },
    {
      "epoch": 1.6147234605238179,
      "grad_norm": 0.26675039529800415,
      "learning_rate": 6.815165751863103e-06,
      "loss": 0.0203,
      "step": 986680
    },
    {
      "epoch": 1.6147561909624712,
      "grad_norm": 0.20371906459331512,
      "learning_rate": 6.815099859649585e-06,
      "loss": 0.023,
      "step": 986700
    },
    {
      "epoch": 1.6147889214011246,
      "grad_norm": 0.5374213457107544,
      "learning_rate": 6.815033967436069e-06,
      "loss": 0.0175,
      "step": 986720
    },
    {
      "epoch": 1.614821651839778,
      "grad_norm": 0.2640857696533203,
      "learning_rate": 6.8149680752225514e-06,
      "loss": 0.0202,
      "step": 986740
    },
    {
      "epoch": 1.6148543822784314,
      "grad_norm": 0.17175525426864624,
      "learning_rate": 6.814902183009034e-06,
      "loss": 0.0195,
      "step": 986760
    },
    {
      "epoch": 1.6148871127170845,
      "grad_norm": 0.25661933422088623,
      "learning_rate": 6.814836290795517e-06,
      "loss": 0.0134,
      "step": 986780
    },
    {
      "epoch": 1.614919843155738,
      "grad_norm": 0.2979019284248352,
      "learning_rate": 6.8147703985820005e-06,
      "loss": 0.0301,
      "step": 986800
    },
    {
      "epoch": 1.6149525735943913,
      "grad_norm": 0.9754341840744019,
      "learning_rate": 6.814704506368482e-06,
      "loss": 0.0156,
      "step": 986820
    },
    {
      "epoch": 1.6149853040330446,
      "grad_norm": 0.46838411688804626,
      "learning_rate": 6.814638614154966e-06,
      "loss": 0.0184,
      "step": 986840
    },
    {
      "epoch": 1.615018034471698,
      "grad_norm": 0.46808692812919617,
      "learning_rate": 6.81457272194145e-06,
      "loss": 0.0193,
      "step": 986860
    },
    {
      "epoch": 1.6150507649103514,
      "grad_norm": 0.26862263679504395,
      "learning_rate": 6.8145068297279315e-06,
      "loss": 0.0146,
      "step": 986880
    },
    {
      "epoch": 1.6150834953490047,
      "grad_norm": 0.15928909182548523,
      "learning_rate": 6.814440937514415e-06,
      "loss": 0.0143,
      "step": 986900
    },
    {
      "epoch": 1.615116225787658,
      "grad_norm": 0.2714836001396179,
      "learning_rate": 6.814375045300897e-06,
      "loss": 0.0171,
      "step": 986920
    },
    {
      "epoch": 1.6151489562263115,
      "grad_norm": 0.12189432978630066,
      "learning_rate": 6.8143091530873805e-06,
      "loss": 0.0232,
      "step": 986940
    },
    {
      "epoch": 1.6151816866649646,
      "grad_norm": 0.5824578404426575,
      "learning_rate": 6.814243260873863e-06,
      "loss": 0.0236,
      "step": 986960
    },
    {
      "epoch": 1.615214417103618,
      "grad_norm": 0.19605478644371033,
      "learning_rate": 6.814177368660346e-06,
      "loss": 0.0197,
      "step": 986980
    },
    {
      "epoch": 1.6152471475422714,
      "grad_norm": 0.4464479088783264,
      "learning_rate": 6.814111476446829e-06,
      "loss": 0.0188,
      "step": 987000
    },
    {
      "epoch": 1.6152798779809245,
      "grad_norm": 0.09934897720813751,
      "learning_rate": 6.814045584233312e-06,
      "loss": 0.0228,
      "step": 987020
    },
    {
      "epoch": 1.6153126084195781,
      "grad_norm": 0.21331462264060974,
      "learning_rate": 6.813979692019794e-06,
      "loss": 0.0219,
      "step": 987040
    },
    {
      "epoch": 1.6153453388582313,
      "grad_norm": 0.2107018232345581,
      "learning_rate": 6.813913799806278e-06,
      "loss": 0.0293,
      "step": 987060
    },
    {
      "epoch": 1.6153780692968849,
      "grad_norm": 0.3102681338787079,
      "learning_rate": 6.81384790759276e-06,
      "loss": 0.0176,
      "step": 987080
    },
    {
      "epoch": 1.615410799735538,
      "grad_norm": 0.2651066184043884,
      "learning_rate": 6.813782015379243e-06,
      "loss": 0.017,
      "step": 987100
    },
    {
      "epoch": 1.6154435301741914,
      "grad_norm": 0.7207490801811218,
      "learning_rate": 6.813716123165725e-06,
      "loss": 0.0193,
      "step": 987120
    },
    {
      "epoch": 1.6154762606128448,
      "grad_norm": 0.25684109330177307,
      "learning_rate": 6.813650230952209e-06,
      "loss": 0.0205,
      "step": 987140
    },
    {
      "epoch": 1.615508991051498,
      "grad_norm": 0.5386205315589905,
      "learning_rate": 6.8135843387386915e-06,
      "loss": 0.0196,
      "step": 987160
    },
    {
      "epoch": 1.6155417214901515,
      "grad_norm": 0.3788174092769623,
      "learning_rate": 6.813518446525175e-06,
      "loss": 0.0228,
      "step": 987180
    },
    {
      "epoch": 1.6155744519288047,
      "grad_norm": 1.7742829322814941,
      "learning_rate": 6.813452554311658e-06,
      "loss": 0.0152,
      "step": 987200
    },
    {
      "epoch": 1.615607182367458,
      "grad_norm": 0.4332387447357178,
      "learning_rate": 6.813386662098141e-06,
      "loss": 0.0152,
      "step": 987220
    },
    {
      "epoch": 1.6156399128061114,
      "grad_norm": 1.3128215074539185,
      "learning_rate": 6.813320769884624e-06,
      "loss": 0.0218,
      "step": 987240
    },
    {
      "epoch": 1.6156726432447648,
      "grad_norm": 0.2036074995994568,
      "learning_rate": 6.813254877671106e-06,
      "loss": 0.0209,
      "step": 987260
    },
    {
      "epoch": 1.6157053736834182,
      "grad_norm": 0.46998897194862366,
      "learning_rate": 6.81318898545759e-06,
      "loss": 0.0216,
      "step": 987280
    },
    {
      "epoch": 1.6157381041220713,
      "grad_norm": 0.6645805835723877,
      "learning_rate": 6.8131230932440715e-06,
      "loss": 0.0275,
      "step": 987300
    },
    {
      "epoch": 1.615770834560725,
      "grad_norm": 0.2765420973300934,
      "learning_rate": 6.813057201030555e-06,
      "loss": 0.0115,
      "step": 987320
    },
    {
      "epoch": 1.615803564999378,
      "grad_norm": 0.3429078161716461,
      "learning_rate": 6.812991308817037e-06,
      "loss": 0.0177,
      "step": 987340
    },
    {
      "epoch": 1.6158362954380314,
      "grad_norm": 0.24504992365837097,
      "learning_rate": 6.812925416603521e-06,
      "loss": 0.0173,
      "step": 987360
    },
    {
      "epoch": 1.6158690258766848,
      "grad_norm": 0.698241114616394,
      "learning_rate": 6.812859524390003e-06,
      "loss": 0.0177,
      "step": 987380
    },
    {
      "epoch": 1.6159017563153382,
      "grad_norm": 0.2640143930912018,
      "learning_rate": 6.812793632176486e-06,
      "loss": 0.0242,
      "step": 987400
    },
    {
      "epoch": 1.6159344867539915,
      "grad_norm": 0.5673567056655884,
      "learning_rate": 6.812727739962969e-06,
      "loss": 0.0283,
      "step": 987420
    },
    {
      "epoch": 1.6159672171926447,
      "grad_norm": 0.4605860114097595,
      "learning_rate": 6.812661847749452e-06,
      "loss": 0.0218,
      "step": 987440
    },
    {
      "epoch": 1.6159999476312983,
      "grad_norm": 4.448883533477783,
      "learning_rate": 6.812595955535934e-06,
      "loss": 0.0247,
      "step": 987460
    },
    {
      "epoch": 1.6160326780699514,
      "grad_norm": 0.4651689827442169,
      "learning_rate": 6.812530063322418e-06,
      "loss": 0.0231,
      "step": 987480
    },
    {
      "epoch": 1.6160654085086048,
      "grad_norm": 0.40450727939605713,
      "learning_rate": 6.8124641711089e-06,
      "loss": 0.0253,
      "step": 987500
    },
    {
      "epoch": 1.6160981389472582,
      "grad_norm": 1.308579683303833,
      "learning_rate": 6.812398278895383e-06,
      "loss": 0.0239,
      "step": 987520
    },
    {
      "epoch": 1.6161308693859116,
      "grad_norm": 0.8926835656166077,
      "learning_rate": 6.812332386681867e-06,
      "loss": 0.0201,
      "step": 987540
    },
    {
      "epoch": 1.616163599824565,
      "grad_norm": 0.7419062852859497,
      "learning_rate": 6.812266494468349e-06,
      "loss": 0.0229,
      "step": 987560
    },
    {
      "epoch": 1.616196330263218,
      "grad_norm": 0.27451959252357483,
      "learning_rate": 6.8122006022548324e-06,
      "loss": 0.0193,
      "step": 987580
    },
    {
      "epoch": 1.6162290607018717,
      "grad_norm": 1.4721839427947998,
      "learning_rate": 6.812134710041315e-06,
      "loss": 0.0198,
      "step": 987600
    },
    {
      "epoch": 1.6162617911405248,
      "grad_norm": 1.2690632343292236,
      "learning_rate": 6.812068817827798e-06,
      "loss": 0.0197,
      "step": 987620
    },
    {
      "epoch": 1.6162945215791782,
      "grad_norm": 0.3231925964355469,
      "learning_rate": 6.812002925614281e-06,
      "loss": 0.0178,
      "step": 987640
    },
    {
      "epoch": 1.6163272520178316,
      "grad_norm": 0.28951555490493774,
      "learning_rate": 6.811937033400764e-06,
      "loss": 0.0201,
      "step": 987660
    },
    {
      "epoch": 1.616359982456485,
      "grad_norm": 0.14040599763393402,
      "learning_rate": 6.811871141187246e-06,
      "loss": 0.0207,
      "step": 987680
    },
    {
      "epoch": 1.6163927128951383,
      "grad_norm": 0.34818875789642334,
      "learning_rate": 6.81180524897373e-06,
      "loss": 0.0265,
      "step": 987700
    },
    {
      "epoch": 1.6164254433337915,
      "grad_norm": 0.8817338943481445,
      "learning_rate": 6.811739356760212e-06,
      "loss": 0.0196,
      "step": 987720
    },
    {
      "epoch": 1.616458173772445,
      "grad_norm": 0.1707824170589447,
      "learning_rate": 6.811673464546695e-06,
      "loss": 0.0146,
      "step": 987740
    },
    {
      "epoch": 1.6164909042110982,
      "grad_norm": 0.8450486660003662,
      "learning_rate": 6.811607572333178e-06,
      "loss": 0.0134,
      "step": 987760
    },
    {
      "epoch": 1.6165236346497516,
      "grad_norm": 1.1999903917312622,
      "learning_rate": 6.811541680119661e-06,
      "loss": 0.0239,
      "step": 987780
    },
    {
      "epoch": 1.616556365088405,
      "grad_norm": 2.2161428928375244,
      "learning_rate": 6.8114757879061434e-06,
      "loss": 0.0208,
      "step": 987800
    },
    {
      "epoch": 1.616589095527058,
      "grad_norm": 0.3857291340827942,
      "learning_rate": 6.811409895692627e-06,
      "loss": 0.0153,
      "step": 987820
    },
    {
      "epoch": 1.6166218259657117,
      "grad_norm": 0.3415393531322479,
      "learning_rate": 6.811344003479109e-06,
      "loss": 0.0189,
      "step": 987840
    },
    {
      "epoch": 1.6166545564043648,
      "grad_norm": 0.7424619197845459,
      "learning_rate": 6.8112781112655925e-06,
      "loss": 0.0214,
      "step": 987860
    },
    {
      "epoch": 1.6166872868430184,
      "grad_norm": 0.7291231155395508,
      "learning_rate": 6.811212219052074e-06,
      "loss": 0.0196,
      "step": 987880
    },
    {
      "epoch": 1.6167200172816716,
      "grad_norm": 1.2027910947799683,
      "learning_rate": 6.811146326838558e-06,
      "loss": 0.0259,
      "step": 987900
    },
    {
      "epoch": 1.616752747720325,
      "grad_norm": 0.4748586416244507,
      "learning_rate": 6.8110804346250416e-06,
      "loss": 0.0284,
      "step": 987920
    },
    {
      "epoch": 1.6167854781589783,
      "grad_norm": 0.5461596846580505,
      "learning_rate": 6.8110145424115235e-06,
      "loss": 0.0194,
      "step": 987940
    },
    {
      "epoch": 1.6168182085976315,
      "grad_norm": 0.13653792440891266,
      "learning_rate": 6.810948650198007e-06,
      "loss": 0.0236,
      "step": 987960
    },
    {
      "epoch": 1.616850939036285,
      "grad_norm": 0.40589797496795654,
      "learning_rate": 6.81088275798449e-06,
      "loss": 0.02,
      "step": 987980
    },
    {
      "epoch": 1.6168836694749382,
      "grad_norm": 1.2634828090667725,
      "learning_rate": 6.8108168657709725e-06,
      "loss": 0.0292,
      "step": 988000
    },
    {
      "epoch": 1.6169163999135916,
      "grad_norm": 0.8931384086608887,
      "learning_rate": 6.810750973557455e-06,
      "loss": 0.0257,
      "step": 988020
    },
    {
      "epoch": 1.616949130352245,
      "grad_norm": 0.6976660490036011,
      "learning_rate": 6.810685081343939e-06,
      "loss": 0.023,
      "step": 988040
    },
    {
      "epoch": 1.6169818607908983,
      "grad_norm": 0.8288599848747253,
      "learning_rate": 6.810619189130421e-06,
      "loss": 0.0198,
      "step": 988060
    },
    {
      "epoch": 1.6170145912295517,
      "grad_norm": 0.26524218916893005,
      "learning_rate": 6.810553296916904e-06,
      "loss": 0.0223,
      "step": 988080
    },
    {
      "epoch": 1.6170473216682049,
      "grad_norm": 1.119945764541626,
      "learning_rate": 6.810487404703386e-06,
      "loss": 0.0138,
      "step": 988100
    },
    {
      "epoch": 1.6170800521068585,
      "grad_norm": 0.3622477948665619,
      "learning_rate": 6.81042151248987e-06,
      "loss": 0.0195,
      "step": 988120
    },
    {
      "epoch": 1.6171127825455116,
      "grad_norm": 1.7286317348480225,
      "learning_rate": 6.810355620276352e-06,
      "loss": 0.0295,
      "step": 988140
    },
    {
      "epoch": 1.617145512984165,
      "grad_norm": 0.6079390645027161,
      "learning_rate": 6.810289728062835e-06,
      "loss": 0.0144,
      "step": 988160
    },
    {
      "epoch": 1.6171782434228184,
      "grad_norm": 0.4055672883987427,
      "learning_rate": 6.810223835849318e-06,
      "loss": 0.0204,
      "step": 988180
    },
    {
      "epoch": 1.6172109738614717,
      "grad_norm": 0.7601555585861206,
      "learning_rate": 6.810157943635801e-06,
      "loss": 0.0178,
      "step": 988200
    },
    {
      "epoch": 1.617243704300125,
      "grad_norm": 0.5931548476219177,
      "learning_rate": 6.8100920514222835e-06,
      "loss": 0.0166,
      "step": 988220
    },
    {
      "epoch": 1.6172764347387782,
      "grad_norm": 1.0015593767166138,
      "learning_rate": 6.810026159208767e-06,
      "loss": 0.023,
      "step": 988240
    },
    {
      "epoch": 1.6173091651774318,
      "grad_norm": 0.3884955644607544,
      "learning_rate": 6.809960266995251e-06,
      "loss": 0.0167,
      "step": 988260
    },
    {
      "epoch": 1.617341895616085,
      "grad_norm": 0.36676856875419617,
      "learning_rate": 6.8098943747817326e-06,
      "loss": 0.0156,
      "step": 988280
    },
    {
      "epoch": 1.6173746260547384,
      "grad_norm": 0.5363880395889282,
      "learning_rate": 6.809828482568216e-06,
      "loss": 0.0164,
      "step": 988300
    },
    {
      "epoch": 1.6174073564933917,
      "grad_norm": 1.126734733581543,
      "learning_rate": 6.809762590354698e-06,
      "loss": 0.022,
      "step": 988320
    },
    {
      "epoch": 1.617440086932045,
      "grad_norm": 0.8601335883140564,
      "learning_rate": 6.809696698141182e-06,
      "loss": 0.0204,
      "step": 988340
    },
    {
      "epoch": 1.6174728173706985,
      "grad_norm": 0.46459510922431946,
      "learning_rate": 6.8096308059276635e-06,
      "loss": 0.0197,
      "step": 988360
    },
    {
      "epoch": 1.6175055478093516,
      "grad_norm": 0.8621044754981995,
      "learning_rate": 6.809564913714147e-06,
      "loss": 0.0253,
      "step": 988380
    },
    {
      "epoch": 1.6175382782480052,
      "grad_norm": 0.3162238299846649,
      "learning_rate": 6.80949902150063e-06,
      "loss": 0.0257,
      "step": 988400
    },
    {
      "epoch": 1.6175710086866584,
      "grad_norm": 0.49228259921073914,
      "learning_rate": 6.809433129287113e-06,
      "loss": 0.0125,
      "step": 988420
    },
    {
      "epoch": 1.6176037391253117,
      "grad_norm": 0.45029687881469727,
      "learning_rate": 6.809367237073595e-06,
      "loss": 0.0137,
      "step": 988440
    },
    {
      "epoch": 1.6176364695639651,
      "grad_norm": 0.6407521367073059,
      "learning_rate": 6.809301344860079e-06,
      "loss": 0.022,
      "step": 988460
    },
    {
      "epoch": 1.6176692000026185,
      "grad_norm": 0.3405401408672333,
      "learning_rate": 6.809235452646561e-06,
      "loss": 0.0295,
      "step": 988480
    },
    {
      "epoch": 1.6177019304412719,
      "grad_norm": 0.08891395479440689,
      "learning_rate": 6.809169560433044e-06,
      "loss": 0.0191,
      "step": 988500
    },
    {
      "epoch": 1.617734660879925,
      "grad_norm": 0.42964908480644226,
      "learning_rate": 6.809103668219526e-06,
      "loss": 0.0163,
      "step": 988520
    },
    {
      "epoch": 1.6177673913185786,
      "grad_norm": 0.29804110527038574,
      "learning_rate": 6.80903777600601e-06,
      "loss": 0.0168,
      "step": 988540
    },
    {
      "epoch": 1.6178001217572318,
      "grad_norm": 0.6132293939590454,
      "learning_rate": 6.808971883792493e-06,
      "loss": 0.0175,
      "step": 988560
    },
    {
      "epoch": 1.6178328521958851,
      "grad_norm": 0.6415582895278931,
      "learning_rate": 6.808905991578975e-06,
      "loss": 0.0281,
      "step": 988580
    },
    {
      "epoch": 1.6178655826345385,
      "grad_norm": 0.22543266415596008,
      "learning_rate": 6.808840099365459e-06,
      "loss": 0.0183,
      "step": 988600
    },
    {
      "epoch": 1.6178983130731917,
      "grad_norm": 0.5665385127067566,
      "learning_rate": 6.808774207151942e-06,
      "loss": 0.0256,
      "step": 988620
    },
    {
      "epoch": 1.6179310435118452,
      "grad_norm": 0.12061084806919098,
      "learning_rate": 6.8087083149384244e-06,
      "loss": 0.0184,
      "step": 988640
    },
    {
      "epoch": 1.6179637739504984,
      "grad_norm": 0.12841728329658508,
      "learning_rate": 6.808642422724907e-06,
      "loss": 0.0125,
      "step": 988660
    },
    {
      "epoch": 1.6179965043891518,
      "grad_norm": 0.2749946713447571,
      "learning_rate": 6.808576530511391e-06,
      "loss": 0.0125,
      "step": 988680
    },
    {
      "epoch": 1.6180292348278051,
      "grad_norm": 0.6931690573692322,
      "learning_rate": 6.808510638297873e-06,
      "loss": 0.0243,
      "step": 988700
    },
    {
      "epoch": 1.6180619652664585,
      "grad_norm": 0.6480267643928528,
      "learning_rate": 6.808444746084356e-06,
      "loss": 0.0124,
      "step": 988720
    },
    {
      "epoch": 1.6180946957051119,
      "grad_norm": 0.9257902503013611,
      "learning_rate": 6.808378853870838e-06,
      "loss": 0.022,
      "step": 988740
    },
    {
      "epoch": 1.618127426143765,
      "grad_norm": 0.49843451380729675,
      "learning_rate": 6.808312961657322e-06,
      "loss": 0.0166,
      "step": 988760
    },
    {
      "epoch": 1.6181601565824186,
      "grad_norm": 0.234735369682312,
      "learning_rate": 6.8082470694438045e-06,
      "loss": 0.0212,
      "step": 988780
    },
    {
      "epoch": 1.6181928870210718,
      "grad_norm": 1.1877390146255493,
      "learning_rate": 6.808181177230287e-06,
      "loss": 0.0237,
      "step": 988800
    },
    {
      "epoch": 1.6182256174597252,
      "grad_norm": 0.8805053234100342,
      "learning_rate": 6.80811528501677e-06,
      "loss": 0.0214,
      "step": 988820
    },
    {
      "epoch": 1.6182583478983785,
      "grad_norm": 0.6567233204841614,
      "learning_rate": 6.8080493928032535e-06,
      "loss": 0.019,
      "step": 988840
    },
    {
      "epoch": 1.618291078337032,
      "grad_norm": 0.6303179860115051,
      "learning_rate": 6.807983500589735e-06,
      "loss": 0.0139,
      "step": 988860
    },
    {
      "epoch": 1.6183238087756853,
      "grad_norm": 0.7702708840370178,
      "learning_rate": 6.807917608376219e-06,
      "loss": 0.0262,
      "step": 988880
    },
    {
      "epoch": 1.6183565392143384,
      "grad_norm": 0.3088679015636444,
      "learning_rate": 6.807851716162701e-06,
      "loss": 0.0201,
      "step": 988900
    },
    {
      "epoch": 1.618389269652992,
      "grad_norm": 0.6240091323852539,
      "learning_rate": 6.8077858239491845e-06,
      "loss": 0.0154,
      "step": 988920
    },
    {
      "epoch": 1.6184220000916452,
      "grad_norm": 5.1115899085998535,
      "learning_rate": 6.807719931735667e-06,
      "loss": 0.0192,
      "step": 988940
    },
    {
      "epoch": 1.6184547305302985,
      "grad_norm": 0.16959765553474426,
      "learning_rate": 6.80765403952215e-06,
      "loss": 0.0184,
      "step": 988960
    },
    {
      "epoch": 1.618487460968952,
      "grad_norm": 0.46689826250076294,
      "learning_rate": 6.8075881473086335e-06,
      "loss": 0.0133,
      "step": 988980
    },
    {
      "epoch": 1.6185201914076053,
      "grad_norm": 0.4720386266708374,
      "learning_rate": 6.807522255095116e-06,
      "loss": 0.0154,
      "step": 989000
    },
    {
      "epoch": 1.6185529218462587,
      "grad_norm": 0.3942112922668457,
      "learning_rate": 6.807456362881599e-06,
      "loss": 0.0173,
      "step": 989020
    },
    {
      "epoch": 1.6185856522849118,
      "grad_norm": 0.43624770641326904,
      "learning_rate": 6.807390470668082e-06,
      "loss": 0.0198,
      "step": 989040
    },
    {
      "epoch": 1.6186183827235654,
      "grad_norm": 0.3866505026817322,
      "learning_rate": 6.807324578454565e-06,
      "loss": 0.019,
      "step": 989060
    },
    {
      "epoch": 1.6186511131622185,
      "grad_norm": 0.1503787338733673,
      "learning_rate": 6.807258686241047e-06,
      "loss": 0.0207,
      "step": 989080
    },
    {
      "epoch": 1.618683843600872,
      "grad_norm": 1.301243782043457,
      "learning_rate": 6.807192794027531e-06,
      "loss": 0.0215,
      "step": 989100
    },
    {
      "epoch": 1.6187165740395253,
      "grad_norm": 1.0126928091049194,
      "learning_rate": 6.807126901814013e-06,
      "loss": 0.0188,
      "step": 989120
    },
    {
      "epoch": 1.6187493044781787,
      "grad_norm": 0.2692399322986603,
      "learning_rate": 6.807061009600496e-06,
      "loss": 0.02,
      "step": 989140
    },
    {
      "epoch": 1.618782034916832,
      "grad_norm": 0.3602547347545624,
      "learning_rate": 6.806995117386978e-06,
      "loss": 0.0181,
      "step": 989160
    },
    {
      "epoch": 1.6188147653554852,
      "grad_norm": 1.091558814048767,
      "learning_rate": 6.806929225173462e-06,
      "loss": 0.0256,
      "step": 989180
    },
    {
      "epoch": 1.6188474957941388,
      "grad_norm": 1.449068546295166,
      "learning_rate": 6.8068633329599445e-06,
      "loss": 0.0163,
      "step": 989200
    },
    {
      "epoch": 1.618880226232792,
      "grad_norm": 0.4275498688220978,
      "learning_rate": 6.806797440746427e-06,
      "loss": 0.016,
      "step": 989220
    },
    {
      "epoch": 1.6189129566714453,
      "grad_norm": 0.5114026665687561,
      "learning_rate": 6.80673154853291e-06,
      "loss": 0.0266,
      "step": 989240
    },
    {
      "epoch": 1.6189456871100987,
      "grad_norm": 0.8881226181983948,
      "learning_rate": 6.806665656319394e-06,
      "loss": 0.0164,
      "step": 989260
    },
    {
      "epoch": 1.6189784175487518,
      "grad_norm": 0.20588697493076324,
      "learning_rate": 6.8065997641058755e-06,
      "loss": 0.0312,
      "step": 989280
    },
    {
      "epoch": 1.6190111479874054,
      "grad_norm": 0.4088464677333832,
      "learning_rate": 6.806533871892359e-06,
      "loss": 0.0155,
      "step": 989300
    },
    {
      "epoch": 1.6190438784260586,
      "grad_norm": 0.36830809712409973,
      "learning_rate": 6.806467979678843e-06,
      "loss": 0.015,
      "step": 989320
    },
    {
      "epoch": 1.6190766088647122,
      "grad_norm": 1.2539236545562744,
      "learning_rate": 6.8064020874653246e-06,
      "loss": 0.0135,
      "step": 989340
    },
    {
      "epoch": 1.6191093393033653,
      "grad_norm": 0.3586435616016388,
      "learning_rate": 6.806336195251808e-06,
      "loss": 0.0286,
      "step": 989360
    },
    {
      "epoch": 1.6191420697420187,
      "grad_norm": 0.3164112865924835,
      "learning_rate": 6.80627030303829e-06,
      "loss": 0.0223,
      "step": 989380
    },
    {
      "epoch": 1.619174800180672,
      "grad_norm": 0.2688230574131012,
      "learning_rate": 6.806204410824774e-06,
      "loss": 0.0186,
      "step": 989400
    },
    {
      "epoch": 1.6192075306193252,
      "grad_norm": 0.30450358986854553,
      "learning_rate": 6.806138518611256e-06,
      "loss": 0.0185,
      "step": 989420
    },
    {
      "epoch": 1.6192402610579788,
      "grad_norm": 0.449957937002182,
      "learning_rate": 6.806072626397739e-06,
      "loss": 0.0258,
      "step": 989440
    },
    {
      "epoch": 1.619272991496632,
      "grad_norm": 0.6077094078063965,
      "learning_rate": 6.806006734184222e-06,
      "loss": 0.0222,
      "step": 989460
    },
    {
      "epoch": 1.6193057219352853,
      "grad_norm": 0.41879355907440186,
      "learning_rate": 6.8059408419707054e-06,
      "loss": 0.018,
      "step": 989480
    },
    {
      "epoch": 1.6193384523739387,
      "grad_norm": 0.5009914636611938,
      "learning_rate": 6.805874949757187e-06,
      "loss": 0.0152,
      "step": 989500
    },
    {
      "epoch": 1.619371182812592,
      "grad_norm": 0.5306224822998047,
      "learning_rate": 6.805809057543671e-06,
      "loss": 0.0161,
      "step": 989520
    },
    {
      "epoch": 1.6194039132512454,
      "grad_norm": 0.19964082539081573,
      "learning_rate": 6.805743165330153e-06,
      "loss": 0.0225,
      "step": 989540
    },
    {
      "epoch": 1.6194366436898986,
      "grad_norm": 0.5831230282783508,
      "learning_rate": 6.805677273116636e-06,
      "loss": 0.0175,
      "step": 989560
    },
    {
      "epoch": 1.6194693741285522,
      "grad_norm": 0.6679693460464478,
      "learning_rate": 6.805611380903119e-06,
      "loss": 0.0195,
      "step": 989580
    },
    {
      "epoch": 1.6195021045672053,
      "grad_norm": 0.2455296367406845,
      "learning_rate": 6.805545488689602e-06,
      "loss": 0.0269,
      "step": 989600
    },
    {
      "epoch": 1.6195348350058587,
      "grad_norm": 0.13516081869602203,
      "learning_rate": 6.805479596476085e-06,
      "loss": 0.0207,
      "step": 989620
    },
    {
      "epoch": 1.619567565444512,
      "grad_norm": 0.04578447341918945,
      "learning_rate": 6.805413704262568e-06,
      "loss": 0.0121,
      "step": 989640
    },
    {
      "epoch": 1.6196002958831655,
      "grad_norm": 0.33315223455429077,
      "learning_rate": 6.805347812049051e-06,
      "loss": 0.0201,
      "step": 989660
    },
    {
      "epoch": 1.6196330263218188,
      "grad_norm": 0.26214712858200073,
      "learning_rate": 6.805281919835534e-06,
      "loss": 0.0204,
      "step": 989680
    },
    {
      "epoch": 1.619665756760472,
      "grad_norm": 0.3257666826248169,
      "learning_rate": 6.805216027622017e-06,
      "loss": 0.0133,
      "step": 989700
    },
    {
      "epoch": 1.6196984871991256,
      "grad_norm": 0.3433043360710144,
      "learning_rate": 6.805150135408499e-06,
      "loss": 0.0173,
      "step": 989720
    },
    {
      "epoch": 1.6197312176377787,
      "grad_norm": 0.3273327350616455,
      "learning_rate": 6.805084243194983e-06,
      "loss": 0.0227,
      "step": 989740
    },
    {
      "epoch": 1.619763948076432,
      "grad_norm": 1.364778757095337,
      "learning_rate": 6.805018350981465e-06,
      "loss": 0.0168,
      "step": 989760
    },
    {
      "epoch": 1.6197966785150855,
      "grad_norm": 0.4701317250728607,
      "learning_rate": 6.804952458767948e-06,
      "loss": 0.0223,
      "step": 989780
    },
    {
      "epoch": 1.6198294089537388,
      "grad_norm": 0.4519593417644501,
      "learning_rate": 6.804886566554431e-06,
      "loss": 0.021,
      "step": 989800
    },
    {
      "epoch": 1.6198621393923922,
      "grad_norm": 0.17953342199325562,
      "learning_rate": 6.804820674340914e-06,
      "loss": 0.0141,
      "step": 989820
    },
    {
      "epoch": 1.6198948698310454,
      "grad_norm": 0.43214625120162964,
      "learning_rate": 6.8047547821273964e-06,
      "loss": 0.0233,
      "step": 989840
    },
    {
      "epoch": 1.619927600269699,
      "grad_norm": 0.4626336395740509,
      "learning_rate": 6.80468888991388e-06,
      "loss": 0.0201,
      "step": 989860
    },
    {
      "epoch": 1.619960330708352,
      "grad_norm": 0.3151931166648865,
      "learning_rate": 6.804622997700362e-06,
      "loss": 0.0208,
      "step": 989880
    },
    {
      "epoch": 1.6199930611470055,
      "grad_norm": 0.5073336362838745,
      "learning_rate": 6.8045571054868455e-06,
      "loss": 0.0197,
      "step": 989900
    },
    {
      "epoch": 1.6200257915856588,
      "grad_norm": 0.3780630826950073,
      "learning_rate": 6.804491213273327e-06,
      "loss": 0.0182,
      "step": 989920
    },
    {
      "epoch": 1.6200585220243122,
      "grad_norm": 0.34812480211257935,
      "learning_rate": 6.804425321059811e-06,
      "loss": 0.016,
      "step": 989940
    },
    {
      "epoch": 1.6200912524629656,
      "grad_norm": 0.35272279381752014,
      "learning_rate": 6.804359428846294e-06,
      "loss": 0.0211,
      "step": 989960
    },
    {
      "epoch": 1.6201239829016187,
      "grad_norm": 0.2949787676334381,
      "learning_rate": 6.8042935366327765e-06,
      "loss": 0.0235,
      "step": 989980
    },
    {
      "epoch": 1.6201567133402723,
      "grad_norm": 1.1207245588302612,
      "learning_rate": 6.804227644419259e-06,
      "loss": 0.0201,
      "step": 990000
    },
    {
      "epoch": 1.6201894437789255,
      "grad_norm": 1.3246335983276367,
      "learning_rate": 6.804161752205743e-06,
      "loss": 0.0172,
      "step": 990020
    },
    {
      "epoch": 1.6202221742175789,
      "grad_norm": 0.9595701098442078,
      "learning_rate": 6.8040958599922255e-06,
      "loss": 0.0162,
      "step": 990040
    },
    {
      "epoch": 1.6202549046562322,
      "grad_norm": 1.754684567451477,
      "learning_rate": 6.804029967778708e-06,
      "loss": 0.0337,
      "step": 990060
    },
    {
      "epoch": 1.6202876350948854,
      "grad_norm": 0.3381357789039612,
      "learning_rate": 6.803964075565192e-06,
      "loss": 0.029,
      "step": 990080
    },
    {
      "epoch": 1.620320365533539,
      "grad_norm": 0.48809728026390076,
      "learning_rate": 6.803898183351674e-06,
      "loss": 0.0205,
      "step": 990100
    },
    {
      "epoch": 1.6203530959721921,
      "grad_norm": 0.9506679177284241,
      "learning_rate": 6.803832291138157e-06,
      "loss": 0.0228,
      "step": 990120
    },
    {
      "epoch": 1.6203858264108457,
      "grad_norm": 0.21758054196834564,
      "learning_rate": 6.803766398924639e-06,
      "loss": 0.019,
      "step": 990140
    },
    {
      "epoch": 1.6204185568494989,
      "grad_norm": 0.7310274243354797,
      "learning_rate": 6.803700506711123e-06,
      "loss": 0.0173,
      "step": 990160
    },
    {
      "epoch": 1.6204512872881522,
      "grad_norm": 0.4674350321292877,
      "learning_rate": 6.803634614497605e-06,
      "loss": 0.0146,
      "step": 990180
    },
    {
      "epoch": 1.6204840177268056,
      "grad_norm": 0.6579778790473938,
      "learning_rate": 6.803568722284088e-06,
      "loss": 0.0214,
      "step": 990200
    },
    {
      "epoch": 1.6205167481654588,
      "grad_norm": 0.1758667379617691,
      "learning_rate": 6.803502830070571e-06,
      "loss": 0.0233,
      "step": 990220
    },
    {
      "epoch": 1.6205494786041124,
      "grad_norm": 0.2665005922317505,
      "learning_rate": 6.803436937857054e-06,
      "loss": 0.0157,
      "step": 990240
    },
    {
      "epoch": 1.6205822090427655,
      "grad_norm": 0.2517467439174652,
      "learning_rate": 6.8033710456435365e-06,
      "loss": 0.0154,
      "step": 990260
    },
    {
      "epoch": 1.6206149394814189,
      "grad_norm": 0.7787078619003296,
      "learning_rate": 6.80330515343002e-06,
      "loss": 0.0234,
      "step": 990280
    },
    {
      "epoch": 1.6206476699200723,
      "grad_norm": 0.47097042202949524,
      "learning_rate": 6.803239261216502e-06,
      "loss": 0.0216,
      "step": 990300
    },
    {
      "epoch": 1.6206804003587256,
      "grad_norm": 0.11198445409536362,
      "learning_rate": 6.8031733690029856e-06,
      "loss": 0.0184,
      "step": 990320
    },
    {
      "epoch": 1.620713130797379,
      "grad_norm": 1.2928303480148315,
      "learning_rate": 6.8031074767894675e-06,
      "loss": 0.0216,
      "step": 990340
    },
    {
      "epoch": 1.6207458612360321,
      "grad_norm": 0.271941602230072,
      "learning_rate": 6.803041584575951e-06,
      "loss": 0.0178,
      "step": 990360
    },
    {
      "epoch": 1.6207785916746857,
      "grad_norm": 0.29088518023490906,
      "learning_rate": 6.802975692362435e-06,
      "loss": 0.0131,
      "step": 990380
    },
    {
      "epoch": 1.620811322113339,
      "grad_norm": 0.313547283411026,
      "learning_rate": 6.8029098001489165e-06,
      "loss": 0.0161,
      "step": 990400
    },
    {
      "epoch": 1.6208440525519923,
      "grad_norm": 0.22605852782726288,
      "learning_rate": 6.8028439079354e-06,
      "loss": 0.0163,
      "step": 990420
    },
    {
      "epoch": 1.6208767829906456,
      "grad_norm": 0.6550005674362183,
      "learning_rate": 6.802778015721883e-06,
      "loss": 0.0155,
      "step": 990440
    },
    {
      "epoch": 1.620909513429299,
      "grad_norm": 0.14280802011489868,
      "learning_rate": 6.802712123508366e-06,
      "loss": 0.0091,
      "step": 990460
    },
    {
      "epoch": 1.6209422438679524,
      "grad_norm": 0.5337522029876709,
      "learning_rate": 6.802646231294848e-06,
      "loss": 0.0259,
      "step": 990480
    },
    {
      "epoch": 1.6209749743066055,
      "grad_norm": 0.24359031021595,
      "learning_rate": 6.802580339081332e-06,
      "loss": 0.0234,
      "step": 990500
    },
    {
      "epoch": 1.6210077047452591,
      "grad_norm": 0.923008382320404,
      "learning_rate": 6.802514446867814e-06,
      "loss": 0.0258,
      "step": 990520
    },
    {
      "epoch": 1.6210404351839123,
      "grad_norm": 0.6202617883682251,
      "learning_rate": 6.802448554654297e-06,
      "loss": 0.0246,
      "step": 990540
    },
    {
      "epoch": 1.6210731656225656,
      "grad_norm": 0.4823487401008606,
      "learning_rate": 6.802382662440779e-06,
      "loss": 0.0238,
      "step": 990560
    },
    {
      "epoch": 1.621105896061219,
      "grad_norm": 1.4018079042434692,
      "learning_rate": 6.802316770227263e-06,
      "loss": 0.0268,
      "step": 990580
    },
    {
      "epoch": 1.6211386264998724,
      "grad_norm": 0.5363600254058838,
      "learning_rate": 6.802250878013746e-06,
      "loss": 0.024,
      "step": 990600
    },
    {
      "epoch": 1.6211713569385258,
      "grad_norm": 1.068516492843628,
      "learning_rate": 6.802184985800228e-06,
      "loss": 0.0151,
      "step": 990620
    },
    {
      "epoch": 1.621204087377179,
      "grad_norm": 1.6421294212341309,
      "learning_rate": 6.802119093586711e-06,
      "loss": 0.0232,
      "step": 990640
    },
    {
      "epoch": 1.6212368178158325,
      "grad_norm": 0.1371539980173111,
      "learning_rate": 6.802053201373195e-06,
      "loss": 0.0177,
      "step": 990660
    },
    {
      "epoch": 1.6212695482544857,
      "grad_norm": 0.28958961367607117,
      "learning_rate": 6.801987309159677e-06,
      "loss": 0.0248,
      "step": 990680
    },
    {
      "epoch": 1.621302278693139,
      "grad_norm": 0.5191863179206848,
      "learning_rate": 6.80192141694616e-06,
      "loss": 0.0173,
      "step": 990700
    },
    {
      "epoch": 1.6213350091317924,
      "grad_norm": 0.6018877625465393,
      "learning_rate": 6.801855524732644e-06,
      "loss": 0.0151,
      "step": 990720
    },
    {
      "epoch": 1.6213677395704458,
      "grad_norm": 0.9144697785377502,
      "learning_rate": 6.801789632519126e-06,
      "loss": 0.0274,
      "step": 990740
    },
    {
      "epoch": 1.6214004700090991,
      "grad_norm": 0.7905681133270264,
      "learning_rate": 6.801723740305609e-06,
      "loss": 0.0206,
      "step": 990760
    },
    {
      "epoch": 1.6214332004477523,
      "grad_norm": 0.10496842861175537,
      "learning_rate": 6.801657848092091e-06,
      "loss": 0.0155,
      "step": 990780
    },
    {
      "epoch": 1.621465930886406,
      "grad_norm": 1.555796504020691,
      "learning_rate": 6.801591955878575e-06,
      "loss": 0.0179,
      "step": 990800
    },
    {
      "epoch": 1.621498661325059,
      "grad_norm": 0.20236662030220032,
      "learning_rate": 6.8015260636650575e-06,
      "loss": 0.0174,
      "step": 990820
    },
    {
      "epoch": 1.6215313917637124,
      "grad_norm": 0.3193826675415039,
      "learning_rate": 6.80146017145154e-06,
      "loss": 0.0208,
      "step": 990840
    },
    {
      "epoch": 1.6215641222023658,
      "grad_norm": 0.13054783642292023,
      "learning_rate": 6.801394279238023e-06,
      "loss": 0.0168,
      "step": 990860
    },
    {
      "epoch": 1.621596852641019,
      "grad_norm": 1.650925874710083,
      "learning_rate": 6.8013283870245065e-06,
      "loss": 0.0154,
      "step": 990880
    },
    {
      "epoch": 1.6216295830796725,
      "grad_norm": 0.5075527429580688,
      "learning_rate": 6.801262494810988e-06,
      "loss": 0.0178,
      "step": 990900
    },
    {
      "epoch": 1.6216623135183257,
      "grad_norm": 0.4729083776473999,
      "learning_rate": 6.801196602597472e-06,
      "loss": 0.0164,
      "step": 990920
    },
    {
      "epoch": 1.6216950439569793,
      "grad_norm": 0.6482183337211609,
      "learning_rate": 6.801130710383954e-06,
      "loss": 0.0216,
      "step": 990940
    },
    {
      "epoch": 1.6217277743956324,
      "grad_norm": 0.7044003009796143,
      "learning_rate": 6.8010648181704375e-06,
      "loss": 0.0207,
      "step": 990960
    },
    {
      "epoch": 1.6217605048342858,
      "grad_norm": 0.11169818043708801,
      "learning_rate": 6.800998925956919e-06,
      "loss": 0.0138,
      "step": 990980
    },
    {
      "epoch": 1.6217932352729392,
      "grad_norm": 1.1926476955413818,
      "learning_rate": 6.800933033743403e-06,
      "loss": 0.0227,
      "step": 991000
    },
    {
      "epoch": 1.6218259657115923,
      "grad_norm": 0.3103702664375305,
      "learning_rate": 6.800867141529886e-06,
      "loss": 0.0175,
      "step": 991020
    },
    {
      "epoch": 1.621858696150246,
      "grad_norm": 0.25377190113067627,
      "learning_rate": 6.800801249316369e-06,
      "loss": 0.0205,
      "step": 991040
    },
    {
      "epoch": 1.621891426588899,
      "grad_norm": 1.6122852563858032,
      "learning_rate": 6.800735357102852e-06,
      "loss": 0.0182,
      "step": 991060
    },
    {
      "epoch": 1.6219241570275524,
      "grad_norm": 0.633061408996582,
      "learning_rate": 6.800669464889335e-06,
      "loss": 0.0195,
      "step": 991080
    },
    {
      "epoch": 1.6219568874662058,
      "grad_norm": 0.4019593894481659,
      "learning_rate": 6.800603572675818e-06,
      "loss": 0.0207,
      "step": 991100
    },
    {
      "epoch": 1.6219896179048592,
      "grad_norm": 0.24895596504211426,
      "learning_rate": 6.8005376804623e-06,
      "loss": 0.0254,
      "step": 991120
    },
    {
      "epoch": 1.6220223483435126,
      "grad_norm": 0.29006752371788025,
      "learning_rate": 6.800471788248784e-06,
      "loss": 0.0149,
      "step": 991140
    },
    {
      "epoch": 1.6220550787821657,
      "grad_norm": 0.2578986585140228,
      "learning_rate": 6.800405896035266e-06,
      "loss": 0.0173,
      "step": 991160
    },
    {
      "epoch": 1.6220878092208193,
      "grad_norm": 0.8487603664398193,
      "learning_rate": 6.800340003821749e-06,
      "loss": 0.0183,
      "step": 991180
    },
    {
      "epoch": 1.6221205396594724,
      "grad_norm": 0.5637750625610352,
      "learning_rate": 6.800274111608231e-06,
      "loss": 0.0163,
      "step": 991200
    },
    {
      "epoch": 1.6221532700981258,
      "grad_norm": 0.21734970808029175,
      "learning_rate": 6.800208219394715e-06,
      "loss": 0.0192,
      "step": 991220
    },
    {
      "epoch": 1.6221860005367792,
      "grad_norm": 1.4012515544891357,
      "learning_rate": 6.8001423271811975e-06,
      "loss": 0.0163,
      "step": 991240
    },
    {
      "epoch": 1.6222187309754326,
      "grad_norm": 0.22469887137413025,
      "learning_rate": 6.80007643496768e-06,
      "loss": 0.018,
      "step": 991260
    },
    {
      "epoch": 1.622251461414086,
      "grad_norm": 0.2401251494884491,
      "learning_rate": 6.800010542754163e-06,
      "loss": 0.0214,
      "step": 991280
    },
    {
      "epoch": 1.622284191852739,
      "grad_norm": 0.42202135920524597,
      "learning_rate": 6.799944650540647e-06,
      "loss": 0.0151,
      "step": 991300
    },
    {
      "epoch": 1.6223169222913927,
      "grad_norm": 0.38748347759246826,
      "learning_rate": 6.7998787583271285e-06,
      "loss": 0.0263,
      "step": 991320
    },
    {
      "epoch": 1.6223496527300458,
      "grad_norm": 0.8492792844772339,
      "learning_rate": 6.799812866113612e-06,
      "loss": 0.0198,
      "step": 991340
    },
    {
      "epoch": 1.6223823831686992,
      "grad_norm": 0.5616725087165833,
      "learning_rate": 6.799746973900094e-06,
      "loss": 0.0247,
      "step": 991360
    },
    {
      "epoch": 1.6224151136073526,
      "grad_norm": 0.46091756224632263,
      "learning_rate": 6.7996810816865776e-06,
      "loss": 0.0199,
      "step": 991380
    },
    {
      "epoch": 1.622447844046006,
      "grad_norm": 1.0074681043624878,
      "learning_rate": 6.79961518947306e-06,
      "loss": 0.0248,
      "step": 991400
    },
    {
      "epoch": 1.6224805744846593,
      "grad_norm": 0.7563386559486389,
      "learning_rate": 6.799549297259543e-06,
      "loss": 0.0181,
      "step": 991420
    },
    {
      "epoch": 1.6225133049233125,
      "grad_norm": 3.7502124309539795,
      "learning_rate": 6.799483405046027e-06,
      "loss": 0.0199,
      "step": 991440
    },
    {
      "epoch": 1.622546035361966,
      "grad_norm": 0.9160864949226379,
      "learning_rate": 6.799417512832509e-06,
      "loss": 0.0198,
      "step": 991460
    },
    {
      "epoch": 1.6225787658006192,
      "grad_norm": 1.2385584115982056,
      "learning_rate": 6.799351620618992e-06,
      "loss": 0.0161,
      "step": 991480
    },
    {
      "epoch": 1.6226114962392726,
      "grad_norm": 0.326355516910553,
      "learning_rate": 6.799285728405475e-06,
      "loss": 0.0145,
      "step": 991500
    },
    {
      "epoch": 1.622644226677926,
      "grad_norm": 0.7877287864685059,
      "learning_rate": 6.7992198361919584e-06,
      "loss": 0.0242,
      "step": 991520
    },
    {
      "epoch": 1.622676957116579,
      "grad_norm": 0.6954083442687988,
      "learning_rate": 6.79915394397844e-06,
      "loss": 0.023,
      "step": 991540
    },
    {
      "epoch": 1.6227096875552327,
      "grad_norm": 0.553753674030304,
      "learning_rate": 6.799088051764924e-06,
      "loss": 0.0213,
      "step": 991560
    },
    {
      "epoch": 1.6227424179938859,
      "grad_norm": 0.4204712510108948,
      "learning_rate": 6.799022159551406e-06,
      "loss": 0.0155,
      "step": 991580
    },
    {
      "epoch": 1.6227751484325394,
      "grad_norm": 1.1087934970855713,
      "learning_rate": 6.798956267337889e-06,
      "loss": 0.0199,
      "step": 991600
    },
    {
      "epoch": 1.6228078788711926,
      "grad_norm": 0.31413260102272034,
      "learning_rate": 6.798890375124372e-06,
      "loss": 0.0195,
      "step": 991620
    },
    {
      "epoch": 1.622840609309846,
      "grad_norm": 0.5644675493240356,
      "learning_rate": 6.798824482910855e-06,
      "loss": 0.0283,
      "step": 991640
    },
    {
      "epoch": 1.6228733397484993,
      "grad_norm": 0.3791411519050598,
      "learning_rate": 6.798758590697338e-06,
      "loss": 0.0169,
      "step": 991660
    },
    {
      "epoch": 1.6229060701871525,
      "grad_norm": 0.2612423002719879,
      "learning_rate": 6.798692698483821e-06,
      "loss": 0.0223,
      "step": 991680
    },
    {
      "epoch": 1.622938800625806,
      "grad_norm": 0.6636973023414612,
      "learning_rate": 6.798626806270303e-06,
      "loss": 0.0272,
      "step": 991700
    },
    {
      "epoch": 1.6229715310644592,
      "grad_norm": 2.563754081726074,
      "learning_rate": 6.798560914056787e-06,
      "loss": 0.0321,
      "step": 991720
    },
    {
      "epoch": 1.6230042615031126,
      "grad_norm": 0.38880622386932373,
      "learning_rate": 6.7984950218432686e-06,
      "loss": 0.0156,
      "step": 991740
    },
    {
      "epoch": 1.623036991941766,
      "grad_norm": 0.1829630732536316,
      "learning_rate": 6.798429129629752e-06,
      "loss": 0.0218,
      "step": 991760
    },
    {
      "epoch": 1.6230697223804194,
      "grad_norm": 0.4206165373325348,
      "learning_rate": 6.798363237416236e-06,
      "loss": 0.0263,
      "step": 991780
    },
    {
      "epoch": 1.6231024528190727,
      "grad_norm": 1.0895715951919556,
      "learning_rate": 6.798297345202718e-06,
      "loss": 0.026,
      "step": 991800
    },
    {
      "epoch": 1.6231351832577259,
      "grad_norm": 0.6684041619300842,
      "learning_rate": 6.798231452989201e-06,
      "loss": 0.0173,
      "step": 991820
    },
    {
      "epoch": 1.6231679136963795,
      "grad_norm": 0.599393904209137,
      "learning_rate": 6.798165560775684e-06,
      "loss": 0.0188,
      "step": 991840
    },
    {
      "epoch": 1.6232006441350326,
      "grad_norm": 0.7679361701011658,
      "learning_rate": 6.798099668562167e-06,
      "loss": 0.0232,
      "step": 991860
    },
    {
      "epoch": 1.623233374573686,
      "grad_norm": 0.5660843253135681,
      "learning_rate": 6.7980337763486494e-06,
      "loss": 0.0225,
      "step": 991880
    },
    {
      "epoch": 1.6232661050123394,
      "grad_norm": 0.5219517350196838,
      "learning_rate": 6.797967884135133e-06,
      "loss": 0.0138,
      "step": 991900
    },
    {
      "epoch": 1.6232988354509927,
      "grad_norm": 0.06950484216213226,
      "learning_rate": 6.797901991921615e-06,
      "loss": 0.0291,
      "step": 991920
    },
    {
      "epoch": 1.623331565889646,
      "grad_norm": 0.4444330334663391,
      "learning_rate": 6.7978360997080985e-06,
      "loss": 0.0104,
      "step": 991940
    },
    {
      "epoch": 1.6233642963282993,
      "grad_norm": 0.4876788556575775,
      "learning_rate": 6.79777020749458e-06,
      "loss": 0.0182,
      "step": 991960
    },
    {
      "epoch": 1.6233970267669529,
      "grad_norm": 0.830989420413971,
      "learning_rate": 6.797704315281064e-06,
      "loss": 0.0221,
      "step": 991980
    },
    {
      "epoch": 1.623429757205606,
      "grad_norm": 2.9288041591644287,
      "learning_rate": 6.797638423067546e-06,
      "loss": 0.0208,
      "step": 992000
    },
    {
      "epoch": 1.6234624876442594,
      "grad_norm": 0.47938549518585205,
      "learning_rate": 6.7975725308540295e-06,
      "loss": 0.0305,
      "step": 992020
    },
    {
      "epoch": 1.6234952180829127,
      "grad_norm": 0.18451783061027527,
      "learning_rate": 6.797506638640512e-06,
      "loss": 0.0198,
      "step": 992040
    },
    {
      "epoch": 1.6235279485215661,
      "grad_norm": 1.052322506904602,
      "learning_rate": 6.797440746426995e-06,
      "loss": 0.0245,
      "step": 992060
    },
    {
      "epoch": 1.6235606789602195,
      "grad_norm": 1.0622121095657349,
      "learning_rate": 6.797374854213478e-06,
      "loss": 0.0146,
      "step": 992080
    },
    {
      "epoch": 1.6235934093988726,
      "grad_norm": 0.31529733538627625,
      "learning_rate": 6.797308961999961e-06,
      "loss": 0.0195,
      "step": 992100
    },
    {
      "epoch": 1.6236261398375262,
      "grad_norm": 0.4989914000034332,
      "learning_rate": 6.797243069786445e-06,
      "loss": 0.0182,
      "step": 992120
    },
    {
      "epoch": 1.6236588702761794,
      "grad_norm": 0.11036129295825958,
      "learning_rate": 6.797177177572927e-06,
      "loss": 0.0111,
      "step": 992140
    },
    {
      "epoch": 1.6236916007148328,
      "grad_norm": 0.9674844741821289,
      "learning_rate": 6.79711128535941e-06,
      "loss": 0.0203,
      "step": 992160
    },
    {
      "epoch": 1.6237243311534861,
      "grad_norm": 0.6003998517990112,
      "learning_rate": 6.797045393145892e-06,
      "loss": 0.0153,
      "step": 992180
    },
    {
      "epoch": 1.6237570615921395,
      "grad_norm": 0.37014544010162354,
      "learning_rate": 6.796979500932376e-06,
      "loss": 0.0261,
      "step": 992200
    },
    {
      "epoch": 1.6237897920307929,
      "grad_norm": 0.4833047688007355,
      "learning_rate": 6.796913608718858e-06,
      "loss": 0.0148,
      "step": 992220
    },
    {
      "epoch": 1.623822522469446,
      "grad_norm": 0.7990520000457764,
      "learning_rate": 6.796847716505341e-06,
      "loss": 0.0227,
      "step": 992240
    },
    {
      "epoch": 1.6238552529080996,
      "grad_norm": 0.3978341519832611,
      "learning_rate": 6.796781824291824e-06,
      "loss": 0.0178,
      "step": 992260
    },
    {
      "epoch": 1.6238879833467528,
      "grad_norm": 0.29577478766441345,
      "learning_rate": 6.796715932078307e-06,
      "loss": 0.0204,
      "step": 992280
    },
    {
      "epoch": 1.6239207137854061,
      "grad_norm": 0.18560181558132172,
      "learning_rate": 6.7966500398647895e-06,
      "loss": 0.0243,
      "step": 992300
    },
    {
      "epoch": 1.6239534442240595,
      "grad_norm": 0.3749484121799469,
      "learning_rate": 6.796584147651273e-06,
      "loss": 0.0185,
      "step": 992320
    },
    {
      "epoch": 1.6239861746627127,
      "grad_norm": 0.4238924980163574,
      "learning_rate": 6.796518255437755e-06,
      "loss": 0.0281,
      "step": 992340
    },
    {
      "epoch": 1.6240189051013663,
      "grad_norm": 0.3022535741329193,
      "learning_rate": 6.796452363224239e-06,
      "loss": 0.023,
      "step": 992360
    },
    {
      "epoch": 1.6240516355400194,
      "grad_norm": 0.26082441210746765,
      "learning_rate": 6.7963864710107205e-06,
      "loss": 0.0232,
      "step": 992380
    },
    {
      "epoch": 1.624084365978673,
      "grad_norm": 1.1808969974517822,
      "learning_rate": 6.796320578797204e-06,
      "loss": 0.0155,
      "step": 992400
    },
    {
      "epoch": 1.6241170964173262,
      "grad_norm": 0.505435049533844,
      "learning_rate": 6.796254686583687e-06,
      "loss": 0.021,
      "step": 992420
    },
    {
      "epoch": 1.6241498268559795,
      "grad_norm": 0.858687698841095,
      "learning_rate": 6.7961887943701695e-06,
      "loss": 0.0314,
      "step": 992440
    },
    {
      "epoch": 1.624182557294633,
      "grad_norm": 0.35718491673469543,
      "learning_rate": 6.796122902156652e-06,
      "loss": 0.0225,
      "step": 992460
    },
    {
      "epoch": 1.624215287733286,
      "grad_norm": 0.8506571650505066,
      "learning_rate": 6.796057009943136e-06,
      "loss": 0.0207,
      "step": 992480
    },
    {
      "epoch": 1.6242480181719396,
      "grad_norm": 0.19228094816207886,
      "learning_rate": 6.795991117729619e-06,
      "loss": 0.0195,
      "step": 992500
    },
    {
      "epoch": 1.6242807486105928,
      "grad_norm": 0.28333863615989685,
      "learning_rate": 6.795925225516101e-06,
      "loss": 0.0223,
      "step": 992520
    },
    {
      "epoch": 1.6243134790492462,
      "grad_norm": 0.6426568627357483,
      "learning_rate": 6.795859333302585e-06,
      "loss": 0.0157,
      "step": 992540
    },
    {
      "epoch": 1.6243462094878995,
      "grad_norm": 0.40751275420188904,
      "learning_rate": 6.795793441089067e-06,
      "loss": 0.0197,
      "step": 992560
    },
    {
      "epoch": 1.624378939926553,
      "grad_norm": 2.2971949577331543,
      "learning_rate": 6.79572754887555e-06,
      "loss": 0.0145,
      "step": 992580
    },
    {
      "epoch": 1.6244116703652063,
      "grad_norm": 0.1712033599615097,
      "learning_rate": 6.795661656662032e-06,
      "loss": 0.0243,
      "step": 992600
    },
    {
      "epoch": 1.6244444008038594,
      "grad_norm": 0.4233514368534088,
      "learning_rate": 6.795595764448516e-06,
      "loss": 0.0224,
      "step": 992620
    },
    {
      "epoch": 1.624477131242513,
      "grad_norm": 0.622520923614502,
      "learning_rate": 6.795529872234999e-06,
      "loss": 0.0225,
      "step": 992640
    },
    {
      "epoch": 1.6245098616811662,
      "grad_norm": 0.33611613512039185,
      "learning_rate": 6.795463980021481e-06,
      "loss": 0.0244,
      "step": 992660
    },
    {
      "epoch": 1.6245425921198196,
      "grad_norm": 0.8394581079483032,
      "learning_rate": 6.795398087807964e-06,
      "loss": 0.0289,
      "step": 992680
    },
    {
      "epoch": 1.624575322558473,
      "grad_norm": 0.12014582753181458,
      "learning_rate": 6.795332195594448e-06,
      "loss": 0.0243,
      "step": 992700
    },
    {
      "epoch": 1.6246080529971263,
      "grad_norm": 0.21238163113594055,
      "learning_rate": 6.79526630338093e-06,
      "loss": 0.0131,
      "step": 992720
    },
    {
      "epoch": 1.6246407834357797,
      "grad_norm": 1.278765082359314,
      "learning_rate": 6.795200411167413e-06,
      "loss": 0.0257,
      "step": 992740
    },
    {
      "epoch": 1.6246735138744328,
      "grad_norm": 0.7597935199737549,
      "learning_rate": 6.795134518953895e-06,
      "loss": 0.0239,
      "step": 992760
    },
    {
      "epoch": 1.6247062443130864,
      "grad_norm": 0.14539919793605804,
      "learning_rate": 6.795068626740379e-06,
      "loss": 0.0194,
      "step": 992780
    },
    {
      "epoch": 1.6247389747517396,
      "grad_norm": 0.40943118929862976,
      "learning_rate": 6.795002734526861e-06,
      "loss": 0.0207,
      "step": 992800
    },
    {
      "epoch": 1.624771705190393,
      "grad_norm": 0.5178122520446777,
      "learning_rate": 6.794936842313344e-06,
      "loss": 0.0144,
      "step": 992820
    },
    {
      "epoch": 1.6248044356290463,
      "grad_norm": 0.9305810332298279,
      "learning_rate": 6.794870950099828e-06,
      "loss": 0.017,
      "step": 992840
    },
    {
      "epoch": 1.6248371660676997,
      "grad_norm": 0.28020405769348145,
      "learning_rate": 6.7948050578863105e-06,
      "loss": 0.0229,
      "step": 992860
    },
    {
      "epoch": 1.624869896506353,
      "grad_norm": 0.35879701375961304,
      "learning_rate": 6.794739165672793e-06,
      "loss": 0.0165,
      "step": 992880
    },
    {
      "epoch": 1.6249026269450062,
      "grad_norm": 0.204823300242424,
      "learning_rate": 6.794673273459276e-06,
      "loss": 0.0246,
      "step": 992900
    },
    {
      "epoch": 1.6249353573836598,
      "grad_norm": 0.8433603644371033,
      "learning_rate": 6.7946073812457595e-06,
      "loss": 0.0186,
      "step": 992920
    },
    {
      "epoch": 1.624968087822313,
      "grad_norm": 0.24954861402511597,
      "learning_rate": 6.794541489032241e-06,
      "loss": 0.0176,
      "step": 992940
    },
    {
      "epoch": 1.6250008182609663,
      "grad_norm": 0.8080060482025146,
      "learning_rate": 6.794475596818725e-06,
      "loss": 0.0279,
      "step": 992960
    },
    {
      "epoch": 1.6250335486996197,
      "grad_norm": 1.0501967668533325,
      "learning_rate": 6.794409704605207e-06,
      "loss": 0.0181,
      "step": 992980
    },
    {
      "epoch": 1.625066279138273,
      "grad_norm": 0.6296700835227966,
      "learning_rate": 6.7943438123916905e-06,
      "loss": 0.0249,
      "step": 993000
    },
    {
      "epoch": 1.6250990095769264,
      "grad_norm": 0.6682990789413452,
      "learning_rate": 6.794277920178172e-06,
      "loss": 0.0175,
      "step": 993020
    },
    {
      "epoch": 1.6251317400155796,
      "grad_norm": 0.39408352971076965,
      "learning_rate": 6.794212027964656e-06,
      "loss": 0.0196,
      "step": 993040
    },
    {
      "epoch": 1.6251644704542332,
      "grad_norm": 0.29994961619377136,
      "learning_rate": 6.794146135751139e-06,
      "loss": 0.0195,
      "step": 993060
    },
    {
      "epoch": 1.6251972008928863,
      "grad_norm": 0.37358027696609497,
      "learning_rate": 6.7940802435376214e-06,
      "loss": 0.0125,
      "step": 993080
    },
    {
      "epoch": 1.6252299313315397,
      "grad_norm": 0.7423043251037598,
      "learning_rate": 6.794014351324104e-06,
      "loss": 0.0196,
      "step": 993100
    },
    {
      "epoch": 1.625262661770193,
      "grad_norm": 3.4017281532287598,
      "learning_rate": 6.793948459110588e-06,
      "loss": 0.0125,
      "step": 993120
    },
    {
      "epoch": 1.6252953922088462,
      "grad_norm": 0.4791540801525116,
      "learning_rate": 6.79388256689707e-06,
      "loss": 0.0197,
      "step": 993140
    },
    {
      "epoch": 1.6253281226474998,
      "grad_norm": 0.34782668948173523,
      "learning_rate": 6.793816674683553e-06,
      "loss": 0.0163,
      "step": 993160
    },
    {
      "epoch": 1.625360853086153,
      "grad_norm": 0.2514457106590271,
      "learning_rate": 6.793750782470037e-06,
      "loss": 0.0211,
      "step": 993180
    },
    {
      "epoch": 1.6253935835248066,
      "grad_norm": 0.4077262878417969,
      "learning_rate": 6.793684890256519e-06,
      "loss": 0.019,
      "step": 993200
    },
    {
      "epoch": 1.6254263139634597,
      "grad_norm": 0.08049134910106659,
      "learning_rate": 6.793618998043002e-06,
      "loss": 0.0197,
      "step": 993220
    },
    {
      "epoch": 1.625459044402113,
      "grad_norm": 0.26260557770729065,
      "learning_rate": 6.793553105829484e-06,
      "loss": 0.0208,
      "step": 993240
    },
    {
      "epoch": 1.6254917748407665,
      "grad_norm": 1.6634526252746582,
      "learning_rate": 6.793487213615968e-06,
      "loss": 0.0207,
      "step": 993260
    },
    {
      "epoch": 1.6255245052794196,
      "grad_norm": 0.9081593155860901,
      "learning_rate": 6.7934213214024505e-06,
      "loss": 0.0142,
      "step": 993280
    },
    {
      "epoch": 1.6255572357180732,
      "grad_norm": 0.7760936617851257,
      "learning_rate": 6.793355429188933e-06,
      "loss": 0.0196,
      "step": 993300
    },
    {
      "epoch": 1.6255899661567264,
      "grad_norm": 0.3888205289840698,
      "learning_rate": 6.793289536975416e-06,
      "loss": 0.02,
      "step": 993320
    },
    {
      "epoch": 1.6256226965953797,
      "grad_norm": 0.8606140613555908,
      "learning_rate": 6.7932236447619e-06,
      "loss": 0.0148,
      "step": 993340
    },
    {
      "epoch": 1.625655427034033,
      "grad_norm": 0.2550440728664398,
      "learning_rate": 6.7931577525483815e-06,
      "loss": 0.014,
      "step": 993360
    },
    {
      "epoch": 1.6256881574726865,
      "grad_norm": 14.83207893371582,
      "learning_rate": 6.793091860334865e-06,
      "loss": 0.018,
      "step": 993380
    },
    {
      "epoch": 1.6257208879113398,
      "grad_norm": 0.91100013256073,
      "learning_rate": 6.793025968121347e-06,
      "loss": 0.0176,
      "step": 993400
    },
    {
      "epoch": 1.625753618349993,
      "grad_norm": 0.7638328671455383,
      "learning_rate": 6.7929600759078306e-06,
      "loss": 0.0175,
      "step": 993420
    },
    {
      "epoch": 1.6257863487886466,
      "grad_norm": 0.5270600914955139,
      "learning_rate": 6.792894183694313e-06,
      "loss": 0.0196,
      "step": 993440
    },
    {
      "epoch": 1.6258190792272997,
      "grad_norm": 0.31844422221183777,
      "learning_rate": 6.792828291480796e-06,
      "loss": 0.0214,
      "step": 993460
    },
    {
      "epoch": 1.625851809665953,
      "grad_norm": 0.20356452465057373,
      "learning_rate": 6.792762399267279e-06,
      "loss": 0.0162,
      "step": 993480
    },
    {
      "epoch": 1.6258845401046065,
      "grad_norm": 0.4024788737297058,
      "learning_rate": 6.792696507053762e-06,
      "loss": 0.0126,
      "step": 993500
    },
    {
      "epoch": 1.6259172705432599,
      "grad_norm": 1.4026895761489868,
      "learning_rate": 6.792630614840244e-06,
      "loss": 0.0216,
      "step": 993520
    },
    {
      "epoch": 1.6259500009819132,
      "grad_norm": 6.010965347290039,
      "learning_rate": 6.792564722626728e-06,
      "loss": 0.0169,
      "step": 993540
    },
    {
      "epoch": 1.6259827314205664,
      "grad_norm": 0.4462831914424896,
      "learning_rate": 6.7924988304132114e-06,
      "loss": 0.0159,
      "step": 993560
    },
    {
      "epoch": 1.62601546185922,
      "grad_norm": 1.2036185264587402,
      "learning_rate": 6.792432938199693e-06,
      "loss": 0.0139,
      "step": 993580
    },
    {
      "epoch": 1.6260481922978731,
      "grad_norm": 1.1993056535720825,
      "learning_rate": 6.792367045986177e-06,
      "loss": 0.0183,
      "step": 993600
    },
    {
      "epoch": 1.6260809227365265,
      "grad_norm": 0.7059556245803833,
      "learning_rate": 6.792301153772659e-06,
      "loss": 0.0206,
      "step": 993620
    },
    {
      "epoch": 1.6261136531751799,
      "grad_norm": 1.191473364830017,
      "learning_rate": 6.792235261559142e-06,
      "loss": 0.0175,
      "step": 993640
    },
    {
      "epoch": 1.6261463836138332,
      "grad_norm": 0.32763412594795227,
      "learning_rate": 6.792169369345625e-06,
      "loss": 0.0217,
      "step": 993660
    },
    {
      "epoch": 1.6261791140524866,
      "grad_norm": 0.7908000349998474,
      "learning_rate": 6.792103477132108e-06,
      "loss": 0.016,
      "step": 993680
    },
    {
      "epoch": 1.6262118444911398,
      "grad_norm": 1.1967040300369263,
      "learning_rate": 6.792037584918591e-06,
      "loss": 0.0225,
      "step": 993700
    },
    {
      "epoch": 1.6262445749297934,
      "grad_norm": 0.056028615683317184,
      "learning_rate": 6.791971692705074e-06,
      "loss": 0.0123,
      "step": 993720
    },
    {
      "epoch": 1.6262773053684465,
      "grad_norm": 0.5038045644760132,
      "learning_rate": 6.791905800491556e-06,
      "loss": 0.0151,
      "step": 993740
    },
    {
      "epoch": 1.6263100358070999,
      "grad_norm": 0.31985750794410706,
      "learning_rate": 6.79183990827804e-06,
      "loss": 0.0152,
      "step": 993760
    },
    {
      "epoch": 1.6263427662457532,
      "grad_norm": 0.31088998913764954,
      "learning_rate": 6.7917740160645216e-06,
      "loss": 0.0235,
      "step": 993780
    },
    {
      "epoch": 1.6263754966844066,
      "grad_norm": 0.5127268433570862,
      "learning_rate": 6.791708123851005e-06,
      "loss": 0.0171,
      "step": 993800
    },
    {
      "epoch": 1.62640822712306,
      "grad_norm": 0.6716707348823547,
      "learning_rate": 6.791642231637488e-06,
      "loss": 0.0119,
      "step": 993820
    },
    {
      "epoch": 1.6264409575617131,
      "grad_norm": 0.2563472390174866,
      "learning_rate": 6.791576339423971e-06,
      "loss": 0.0223,
      "step": 993840
    },
    {
      "epoch": 1.6264736880003667,
      "grad_norm": 0.42915311455726624,
      "learning_rate": 6.791510447210453e-06,
      "loss": 0.0182,
      "step": 993860
    },
    {
      "epoch": 1.6265064184390199,
      "grad_norm": 1.1838624477386475,
      "learning_rate": 6.791444554996937e-06,
      "loss": 0.0185,
      "step": 993880
    },
    {
      "epoch": 1.6265391488776733,
      "grad_norm": 0.3075597584247589,
      "learning_rate": 6.79137866278342e-06,
      "loss": 0.0155,
      "step": 993900
    },
    {
      "epoch": 1.6265718793163266,
      "grad_norm": 1.4093570709228516,
      "learning_rate": 6.7913127705699024e-06,
      "loss": 0.0187,
      "step": 993920
    },
    {
      "epoch": 1.6266046097549798,
      "grad_norm": 1.243292212486267,
      "learning_rate": 6.791246878356386e-06,
      "loss": 0.0223,
      "step": 993940
    },
    {
      "epoch": 1.6266373401936334,
      "grad_norm": 2.7860636711120605,
      "learning_rate": 6.791180986142868e-06,
      "loss": 0.0214,
      "step": 993960
    },
    {
      "epoch": 1.6266700706322865,
      "grad_norm": 0.14895771443843842,
      "learning_rate": 6.7911150939293515e-06,
      "loss": 0.019,
      "step": 993980
    },
    {
      "epoch": 1.6267028010709401,
      "grad_norm": 0.14787086844444275,
      "learning_rate": 6.791049201715833e-06,
      "loss": 0.0224,
      "step": 994000
    },
    {
      "epoch": 1.6267355315095933,
      "grad_norm": 1.1123183965682983,
      "learning_rate": 6.790983309502317e-06,
      "loss": 0.023,
      "step": 994020
    },
    {
      "epoch": 1.6267682619482466,
      "grad_norm": 0.4154779314994812,
      "learning_rate": 6.790917417288799e-06,
      "loss": 0.0191,
      "step": 994040
    },
    {
      "epoch": 1.6268009923869,
      "grad_norm": 0.41226470470428467,
      "learning_rate": 6.7908515250752825e-06,
      "loss": 0.015,
      "step": 994060
    },
    {
      "epoch": 1.6268337228255532,
      "grad_norm": 1.0991727113723755,
      "learning_rate": 6.790785632861765e-06,
      "loss": 0.0215,
      "step": 994080
    },
    {
      "epoch": 1.6268664532642068,
      "grad_norm": 0.22745825350284576,
      "learning_rate": 6.790719740648248e-06,
      "loss": 0.0148,
      "step": 994100
    },
    {
      "epoch": 1.62689918370286,
      "grad_norm": 0.7156854867935181,
      "learning_rate": 6.790653848434731e-06,
      "loss": 0.0245,
      "step": 994120
    },
    {
      "epoch": 1.6269319141415133,
      "grad_norm": 0.4750771224498749,
      "learning_rate": 6.790587956221214e-06,
      "loss": 0.0179,
      "step": 994140
    },
    {
      "epoch": 1.6269646445801667,
      "grad_norm": 0.17600317299365997,
      "learning_rate": 6.790522064007696e-06,
      "loss": 0.0136,
      "step": 994160
    },
    {
      "epoch": 1.62699737501882,
      "grad_norm": 1.1368778944015503,
      "learning_rate": 6.79045617179418e-06,
      "loss": 0.0145,
      "step": 994180
    },
    {
      "epoch": 1.6270301054574734,
      "grad_norm": 0.14319169521331787,
      "learning_rate": 6.790390279580662e-06,
      "loss": 0.0134,
      "step": 994200
    },
    {
      "epoch": 1.6270628358961265,
      "grad_norm": 0.7727348208427429,
      "learning_rate": 6.790324387367145e-06,
      "loss": 0.0286,
      "step": 994220
    },
    {
      "epoch": 1.6270955663347801,
      "grad_norm": 1.9565136432647705,
      "learning_rate": 6.790258495153629e-06,
      "loss": 0.0202,
      "step": 994240
    },
    {
      "epoch": 1.6271282967734333,
      "grad_norm": 0.9824026823043823,
      "learning_rate": 6.790192602940111e-06,
      "loss": 0.0239,
      "step": 994260
    },
    {
      "epoch": 1.6271610272120867,
      "grad_norm": 0.6428237557411194,
      "learning_rate": 6.790126710726594e-06,
      "loss": 0.0131,
      "step": 994280
    },
    {
      "epoch": 1.62719375765074,
      "grad_norm": 1.5054097175598145,
      "learning_rate": 6.790060818513077e-06,
      "loss": 0.0198,
      "step": 994300
    },
    {
      "epoch": 1.6272264880893934,
      "grad_norm": 0.5567317605018616,
      "learning_rate": 6.78999492629956e-06,
      "loss": 0.0259,
      "step": 994320
    },
    {
      "epoch": 1.6272592185280468,
      "grad_norm": 0.3491882085800171,
      "learning_rate": 6.7899290340860425e-06,
      "loss": 0.0167,
      "step": 994340
    },
    {
      "epoch": 1.6272919489667,
      "grad_norm": 0.18151219189167023,
      "learning_rate": 6.789863141872526e-06,
      "loss": 0.0171,
      "step": 994360
    },
    {
      "epoch": 1.6273246794053535,
      "grad_norm": 0.17015737295150757,
      "learning_rate": 6.789797249659008e-06,
      "loss": 0.0142,
      "step": 994380
    },
    {
      "epoch": 1.6273574098440067,
      "grad_norm": 1.3023682832717896,
      "learning_rate": 6.789731357445492e-06,
      "loss": 0.0245,
      "step": 994400
    },
    {
      "epoch": 1.62739014028266,
      "grad_norm": 0.29539185762405396,
      "learning_rate": 6.7896654652319735e-06,
      "loss": 0.0179,
      "step": 994420
    },
    {
      "epoch": 1.6274228707213134,
      "grad_norm": 0.9884406328201294,
      "learning_rate": 6.789599573018457e-06,
      "loss": 0.0247,
      "step": 994440
    },
    {
      "epoch": 1.6274556011599668,
      "grad_norm": 0.7365905046463013,
      "learning_rate": 6.78953368080494e-06,
      "loss": 0.023,
      "step": 994460
    },
    {
      "epoch": 1.6274883315986202,
      "grad_norm": 0.12478500604629517,
      "learning_rate": 6.7894677885914225e-06,
      "loss": 0.0216,
      "step": 994480
    },
    {
      "epoch": 1.6275210620372733,
      "grad_norm": 0.4967171847820282,
      "learning_rate": 6.789401896377905e-06,
      "loss": 0.0167,
      "step": 994500
    },
    {
      "epoch": 1.627553792475927,
      "grad_norm": 1.3607932329177856,
      "learning_rate": 6.789336004164389e-06,
      "loss": 0.0199,
      "step": 994520
    },
    {
      "epoch": 1.62758652291458,
      "grad_norm": 0.2501387596130371,
      "learning_rate": 6.789270111950871e-06,
      "loss": 0.0156,
      "step": 994540
    },
    {
      "epoch": 1.6276192533532334,
      "grad_norm": 0.3900471329689026,
      "learning_rate": 6.789204219737354e-06,
      "loss": 0.0233,
      "step": 994560
    },
    {
      "epoch": 1.6276519837918868,
      "grad_norm": 0.3809363543987274,
      "learning_rate": 6.789138327523838e-06,
      "loss": 0.0296,
      "step": 994580
    },
    {
      "epoch": 1.62768471423054,
      "grad_norm": 0.3650931119918823,
      "learning_rate": 6.78907243531032e-06,
      "loss": 0.0146,
      "step": 994600
    },
    {
      "epoch": 1.6277174446691935,
      "grad_norm": 0.7176247835159302,
      "learning_rate": 6.789006543096803e-06,
      "loss": 0.0142,
      "step": 994620
    },
    {
      "epoch": 1.6277501751078467,
      "grad_norm": 1.090937614440918,
      "learning_rate": 6.788940650883285e-06,
      "loss": 0.0207,
      "step": 994640
    },
    {
      "epoch": 1.6277829055465003,
      "grad_norm": 0.8657389879226685,
      "learning_rate": 6.788874758669769e-06,
      "loss": 0.0241,
      "step": 994660
    },
    {
      "epoch": 1.6278156359851534,
      "grad_norm": 0.14445608854293823,
      "learning_rate": 6.788808866456252e-06,
      "loss": 0.0167,
      "step": 994680
    },
    {
      "epoch": 1.6278483664238068,
      "grad_norm": 0.23480361700057983,
      "learning_rate": 6.788742974242734e-06,
      "loss": 0.0216,
      "step": 994700
    },
    {
      "epoch": 1.6278810968624602,
      "grad_norm": 0.17163313925266266,
      "learning_rate": 6.788677082029217e-06,
      "loss": 0.0174,
      "step": 994720
    },
    {
      "epoch": 1.6279138273011133,
      "grad_norm": 0.859027087688446,
      "learning_rate": 6.788611189815701e-06,
      "loss": 0.0246,
      "step": 994740
    },
    {
      "epoch": 1.627946557739767,
      "grad_norm": 0.3112636208534241,
      "learning_rate": 6.788545297602183e-06,
      "loss": 0.0229,
      "step": 994760
    },
    {
      "epoch": 1.62797928817842,
      "grad_norm": 0.38883376121520996,
      "learning_rate": 6.788479405388666e-06,
      "loss": 0.0171,
      "step": 994780
    },
    {
      "epoch": 1.6280120186170735,
      "grad_norm": 1.1566842794418335,
      "learning_rate": 6.788413513175148e-06,
      "loss": 0.0126,
      "step": 994800
    },
    {
      "epoch": 1.6280447490557268,
      "grad_norm": 0.18208655714988708,
      "learning_rate": 6.788347620961632e-06,
      "loss": 0.0234,
      "step": 994820
    },
    {
      "epoch": 1.6280774794943802,
      "grad_norm": 0.36538779735565186,
      "learning_rate": 6.7882817287481136e-06,
      "loss": 0.0195,
      "step": 994840
    },
    {
      "epoch": 1.6281102099330336,
      "grad_norm": 0.4262271821498871,
      "learning_rate": 6.788215836534597e-06,
      "loss": 0.0247,
      "step": 994860
    },
    {
      "epoch": 1.6281429403716867,
      "grad_norm": 0.43053287267684937,
      "learning_rate": 6.78814994432108e-06,
      "loss": 0.0194,
      "step": 994880
    },
    {
      "epoch": 1.6281756708103403,
      "grad_norm": 0.3601800501346588,
      "learning_rate": 6.7880840521075635e-06,
      "loss": 0.0172,
      "step": 994900
    },
    {
      "epoch": 1.6282084012489935,
      "grad_norm": 0.6200532913208008,
      "learning_rate": 6.788018159894045e-06,
      "loss": 0.0169,
      "step": 994920
    },
    {
      "epoch": 1.6282411316876468,
      "grad_norm": 0.3539451062679291,
      "learning_rate": 6.787952267680529e-06,
      "loss": 0.013,
      "step": 994940
    },
    {
      "epoch": 1.6282738621263002,
      "grad_norm": 1.6117440462112427,
      "learning_rate": 6.7878863754670125e-06,
      "loss": 0.0176,
      "step": 994960
    },
    {
      "epoch": 1.6283065925649536,
      "grad_norm": 1.11936616897583,
      "learning_rate": 6.7878204832534944e-06,
      "loss": 0.0281,
      "step": 994980
    },
    {
      "epoch": 1.628339323003607,
      "grad_norm": 1.028467059135437,
      "learning_rate": 6.787754591039978e-06,
      "loss": 0.0188,
      "step": 995000
    },
    {
      "epoch": 1.62837205344226,
      "grad_norm": 1.8010367155075073,
      "learning_rate": 6.78768869882646e-06,
      "loss": 0.0221,
      "step": 995020
    },
    {
      "epoch": 1.6284047838809137,
      "grad_norm": 0.8471329212188721,
      "learning_rate": 6.7876228066129435e-06,
      "loss": 0.0204,
      "step": 995040
    },
    {
      "epoch": 1.6284375143195668,
      "grad_norm": 0.12878094613552094,
      "learning_rate": 6.787556914399425e-06,
      "loss": 0.0214,
      "step": 995060
    },
    {
      "epoch": 1.6284702447582202,
      "grad_norm": 0.6413995027542114,
      "learning_rate": 6.787491022185909e-06,
      "loss": 0.0145,
      "step": 995080
    },
    {
      "epoch": 1.6285029751968736,
      "grad_norm": 0.16973596811294556,
      "learning_rate": 6.787425129972392e-06,
      "loss": 0.0286,
      "step": 995100
    },
    {
      "epoch": 1.628535705635527,
      "grad_norm": 0.4152553379535675,
      "learning_rate": 6.7873592377588745e-06,
      "loss": 0.0207,
      "step": 995120
    },
    {
      "epoch": 1.6285684360741803,
      "grad_norm": 0.8560008406639099,
      "learning_rate": 6.787293345545357e-06,
      "loss": 0.0234,
      "step": 995140
    },
    {
      "epoch": 1.6286011665128335,
      "grad_norm": 0.3101724684238434,
      "learning_rate": 6.787227453331841e-06,
      "loss": 0.0167,
      "step": 995160
    },
    {
      "epoch": 1.628633896951487,
      "grad_norm": 0.6150640845298767,
      "learning_rate": 6.787161561118323e-06,
      "loss": 0.0264,
      "step": 995180
    },
    {
      "epoch": 1.6286666273901402,
      "grad_norm": 0.2964046597480774,
      "learning_rate": 6.787095668904806e-06,
      "loss": 0.0139,
      "step": 995200
    },
    {
      "epoch": 1.6286993578287936,
      "grad_norm": 0.2409716695547104,
      "learning_rate": 6.787029776691288e-06,
      "loss": 0.0203,
      "step": 995220
    },
    {
      "epoch": 1.628732088267447,
      "grad_norm": 0.40517371892929077,
      "learning_rate": 6.786963884477772e-06,
      "loss": 0.016,
      "step": 995240
    },
    {
      "epoch": 1.6287648187061003,
      "grad_norm": 0.1986118108034134,
      "learning_rate": 6.7868979922642545e-06,
      "loss": 0.0144,
      "step": 995260
    },
    {
      "epoch": 1.6287975491447537,
      "grad_norm": 0.4597794711589813,
      "learning_rate": 6.786832100050737e-06,
      "loss": 0.0178,
      "step": 995280
    },
    {
      "epoch": 1.6288302795834069,
      "grad_norm": 0.2726510465145111,
      "learning_rate": 6.786766207837221e-06,
      "loss": 0.0178,
      "step": 995300
    },
    {
      "epoch": 1.6288630100220605,
      "grad_norm": 0.4121062755584717,
      "learning_rate": 6.7867003156237035e-06,
      "loss": 0.014,
      "step": 995320
    },
    {
      "epoch": 1.6288957404607136,
      "grad_norm": 0.8804023861885071,
      "learning_rate": 6.786634423410186e-06,
      "loss": 0.0189,
      "step": 995340
    },
    {
      "epoch": 1.628928470899367,
      "grad_norm": 0.5662436485290527,
      "learning_rate": 6.786568531196669e-06,
      "loss": 0.0128,
      "step": 995360
    },
    {
      "epoch": 1.6289612013380204,
      "grad_norm": 0.18431054055690765,
      "learning_rate": 6.786502638983153e-06,
      "loss": 0.0148,
      "step": 995380
    },
    {
      "epoch": 1.6289939317766735,
      "grad_norm": 1.453370451927185,
      "learning_rate": 6.7864367467696345e-06,
      "loss": 0.0179,
      "step": 995400
    },
    {
      "epoch": 1.629026662215327,
      "grad_norm": 0.4490692615509033,
      "learning_rate": 6.786370854556118e-06,
      "loss": 0.0201,
      "step": 995420
    },
    {
      "epoch": 1.6290593926539803,
      "grad_norm": 0.8008689880371094,
      "learning_rate": 6.7863049623426e-06,
      "loss": 0.0157,
      "step": 995440
    },
    {
      "epoch": 1.6290921230926338,
      "grad_norm": 0.3280752897262573,
      "learning_rate": 6.7862390701290836e-06,
      "loss": 0.0187,
      "step": 995460
    },
    {
      "epoch": 1.629124853531287,
      "grad_norm": 0.38594743609428406,
      "learning_rate": 6.786173177915566e-06,
      "loss": 0.0223,
      "step": 995480
    },
    {
      "epoch": 1.6291575839699404,
      "grad_norm": 0.13683418929576874,
      "learning_rate": 6.786107285702049e-06,
      "loss": 0.0126,
      "step": 995500
    },
    {
      "epoch": 1.6291903144085937,
      "grad_norm": 0.45464637875556946,
      "learning_rate": 6.786041393488532e-06,
      "loss": 0.0239,
      "step": 995520
    },
    {
      "epoch": 1.629223044847247,
      "grad_norm": 0.2507558763027191,
      "learning_rate": 6.785975501275015e-06,
      "loss": 0.0204,
      "step": 995540
    },
    {
      "epoch": 1.6292557752859005,
      "grad_norm": 0.9911700487136841,
      "learning_rate": 6.785909609061497e-06,
      "loss": 0.024,
      "step": 995560
    },
    {
      "epoch": 1.6292885057245536,
      "grad_norm": 0.19250263273715973,
      "learning_rate": 6.785843716847981e-06,
      "loss": 0.0201,
      "step": 995580
    },
    {
      "epoch": 1.629321236163207,
      "grad_norm": 0.15842945873737335,
      "learning_rate": 6.785777824634463e-06,
      "loss": 0.0112,
      "step": 995600
    },
    {
      "epoch": 1.6293539666018604,
      "grad_norm": 0.13622811436653137,
      "learning_rate": 6.785711932420946e-06,
      "loss": 0.012,
      "step": 995620
    },
    {
      "epoch": 1.6293866970405138,
      "grad_norm": 0.4242488443851471,
      "learning_rate": 6.78564604020743e-06,
      "loss": 0.025,
      "step": 995640
    },
    {
      "epoch": 1.6294194274791671,
      "grad_norm": 0.1786314696073532,
      "learning_rate": 6.785580147993912e-06,
      "loss": 0.0232,
      "step": 995660
    },
    {
      "epoch": 1.6294521579178203,
      "grad_norm": 0.2709258198738098,
      "learning_rate": 6.785514255780395e-06,
      "loss": 0.0193,
      "step": 995680
    },
    {
      "epoch": 1.6294848883564739,
      "grad_norm": 0.4082506000995636,
      "learning_rate": 6.785448363566878e-06,
      "loss": 0.0184,
      "step": 995700
    },
    {
      "epoch": 1.629517618795127,
      "grad_norm": 1.9943944215774536,
      "learning_rate": 6.785382471353361e-06,
      "loss": 0.0188,
      "step": 995720
    },
    {
      "epoch": 1.6295503492337804,
      "grad_norm": 0.6283198595046997,
      "learning_rate": 6.785316579139844e-06,
      "loss": 0.0202,
      "step": 995740
    },
    {
      "epoch": 1.6295830796724338,
      "grad_norm": 0.4715892970561981,
      "learning_rate": 6.785250686926327e-06,
      "loss": 0.0187,
      "step": 995760
    },
    {
      "epoch": 1.6296158101110871,
      "grad_norm": 0.4525893032550812,
      "learning_rate": 6.785184794712809e-06,
      "loss": 0.0217,
      "step": 995780
    },
    {
      "epoch": 1.6296485405497405,
      "grad_norm": 0.6537871360778809,
      "learning_rate": 6.785118902499293e-06,
      "loss": 0.0184,
      "step": 995800
    },
    {
      "epoch": 1.6296812709883937,
      "grad_norm": 0.7199217081069946,
      "learning_rate": 6.785053010285775e-06,
      "loss": 0.0292,
      "step": 995820
    },
    {
      "epoch": 1.6297140014270473,
      "grad_norm": 0.20453833043575287,
      "learning_rate": 6.784987118072258e-06,
      "loss": 0.0138,
      "step": 995840
    },
    {
      "epoch": 1.6297467318657004,
      "grad_norm": 0.18776856362819672,
      "learning_rate": 6.78492122585874e-06,
      "loss": 0.0178,
      "step": 995860
    },
    {
      "epoch": 1.6297794623043538,
      "grad_norm": 1.466251254081726,
      "learning_rate": 6.784855333645224e-06,
      "loss": 0.0155,
      "step": 995880
    },
    {
      "epoch": 1.6298121927430071,
      "grad_norm": 0.8699510097503662,
      "learning_rate": 6.784789441431706e-06,
      "loss": 0.0148,
      "step": 995900
    },
    {
      "epoch": 1.6298449231816605,
      "grad_norm": 0.620398223400116,
      "learning_rate": 6.784723549218189e-06,
      "loss": 0.0196,
      "step": 995920
    },
    {
      "epoch": 1.629877653620314,
      "grad_norm": 0.49259212613105774,
      "learning_rate": 6.784657657004672e-06,
      "loss": 0.0222,
      "step": 995940
    },
    {
      "epoch": 1.629910384058967,
      "grad_norm": 0.42719557881355286,
      "learning_rate": 6.7845917647911555e-06,
      "loss": 0.0132,
      "step": 995960
    },
    {
      "epoch": 1.6299431144976206,
      "grad_norm": 0.9255363345146179,
      "learning_rate": 6.784525872577637e-06,
      "loss": 0.0158,
      "step": 995980
    },
    {
      "epoch": 1.6299758449362738,
      "grad_norm": 1.3090438842773438,
      "learning_rate": 6.784459980364121e-06,
      "loss": 0.02,
      "step": 996000
    },
    {
      "epoch": 1.6300085753749272,
      "grad_norm": 1.9705796241760254,
      "learning_rate": 6.7843940881506045e-06,
      "loss": 0.0227,
      "step": 996020
    },
    {
      "epoch": 1.6300413058135805,
      "grad_norm": 0.23405562341213226,
      "learning_rate": 6.784328195937086e-06,
      "loss": 0.0198,
      "step": 996040
    },
    {
      "epoch": 1.630074036252234,
      "grad_norm": 0.03346138447523117,
      "learning_rate": 6.78426230372357e-06,
      "loss": 0.0165,
      "step": 996060
    },
    {
      "epoch": 1.6301067666908873,
      "grad_norm": 0.922214150428772,
      "learning_rate": 6.784196411510052e-06,
      "loss": 0.0229,
      "step": 996080
    },
    {
      "epoch": 1.6301394971295404,
      "grad_norm": 0.664482593536377,
      "learning_rate": 6.7841305192965355e-06,
      "loss": 0.0245,
      "step": 996100
    },
    {
      "epoch": 1.630172227568194,
      "grad_norm": 1.337554693222046,
      "learning_rate": 6.784064627083018e-06,
      "loss": 0.0226,
      "step": 996120
    },
    {
      "epoch": 1.6302049580068472,
      "grad_norm": 0.3103344738483429,
      "learning_rate": 6.783998734869501e-06,
      "loss": 0.0231,
      "step": 996140
    },
    {
      "epoch": 1.6302376884455005,
      "grad_norm": 1.3292524814605713,
      "learning_rate": 6.783932842655984e-06,
      "loss": 0.0166,
      "step": 996160
    },
    {
      "epoch": 1.630270418884154,
      "grad_norm": 0.5158169865608215,
      "learning_rate": 6.783866950442467e-06,
      "loss": 0.0128,
      "step": 996180
    },
    {
      "epoch": 1.630303149322807,
      "grad_norm": 1.2027679681777954,
      "learning_rate": 6.783801058228949e-06,
      "loss": 0.0209,
      "step": 996200
    },
    {
      "epoch": 1.6303358797614607,
      "grad_norm": 1.0407854318618774,
      "learning_rate": 6.783735166015433e-06,
      "loss": 0.0223,
      "step": 996220
    },
    {
      "epoch": 1.6303686102001138,
      "grad_norm": 0.5701547265052795,
      "learning_rate": 6.783669273801915e-06,
      "loss": 0.022,
      "step": 996240
    },
    {
      "epoch": 1.6304013406387674,
      "grad_norm": 0.0976642370223999,
      "learning_rate": 6.783603381588398e-06,
      "loss": 0.0187,
      "step": 996260
    },
    {
      "epoch": 1.6304340710774206,
      "grad_norm": 0.8745840191841125,
      "learning_rate": 6.783537489374881e-06,
      "loss": 0.0192,
      "step": 996280
    },
    {
      "epoch": 1.630466801516074,
      "grad_norm": 0.3751278817653656,
      "learning_rate": 6.783471597161364e-06,
      "loss": 0.0174,
      "step": 996300
    },
    {
      "epoch": 1.6304995319547273,
      "grad_norm": 0.9833837747573853,
      "learning_rate": 6.7834057049478465e-06,
      "loss": 0.0292,
      "step": 996320
    },
    {
      "epoch": 1.6305322623933804,
      "grad_norm": 0.7361981272697449,
      "learning_rate": 6.78333981273433e-06,
      "loss": 0.0229,
      "step": 996340
    },
    {
      "epoch": 1.630564992832034,
      "grad_norm": 1.1704519987106323,
      "learning_rate": 6.783273920520813e-06,
      "loss": 0.0208,
      "step": 996360
    },
    {
      "epoch": 1.6305977232706872,
      "grad_norm": 0.31650546193122864,
      "learning_rate": 6.7832080283072955e-06,
      "loss": 0.0215,
      "step": 996380
    },
    {
      "epoch": 1.6306304537093406,
      "grad_norm": 0.1953485906124115,
      "learning_rate": 6.783142136093779e-06,
      "loss": 0.0196,
      "step": 996400
    },
    {
      "epoch": 1.630663184147994,
      "grad_norm": 0.6785461902618408,
      "learning_rate": 6.783076243880261e-06,
      "loss": 0.0182,
      "step": 996420
    },
    {
      "epoch": 1.6306959145866473,
      "grad_norm": 0.48023679852485657,
      "learning_rate": 6.783010351666745e-06,
      "loss": 0.0176,
      "step": 996440
    },
    {
      "epoch": 1.6307286450253007,
      "grad_norm": 0.5540322065353394,
      "learning_rate": 6.7829444594532265e-06,
      "loss": 0.0253,
      "step": 996460
    },
    {
      "epoch": 1.6307613754639538,
      "grad_norm": 0.3759968876838684,
      "learning_rate": 6.78287856723971e-06,
      "loss": 0.0204,
      "step": 996480
    },
    {
      "epoch": 1.6307941059026074,
      "grad_norm": 0.7940255403518677,
      "learning_rate": 6.782812675026193e-06,
      "loss": 0.0187,
      "step": 996500
    },
    {
      "epoch": 1.6308268363412606,
      "grad_norm": 0.6950495839118958,
      "learning_rate": 6.7827467828126756e-06,
      "loss": 0.0136,
      "step": 996520
    },
    {
      "epoch": 1.630859566779914,
      "grad_norm": 0.3406769037246704,
      "learning_rate": 6.782680890599158e-06,
      "loss": 0.0203,
      "step": 996540
    },
    {
      "epoch": 1.6308922972185673,
      "grad_norm": 0.4260861575603485,
      "learning_rate": 6.782614998385642e-06,
      "loss": 0.0219,
      "step": 996560
    },
    {
      "epoch": 1.6309250276572207,
      "grad_norm": 0.8124068975448608,
      "learning_rate": 6.782549106172124e-06,
      "loss": 0.0227,
      "step": 996580
    },
    {
      "epoch": 1.630957758095874,
      "grad_norm": 1.05820894241333,
      "learning_rate": 6.782483213958607e-06,
      "loss": 0.03,
      "step": 996600
    },
    {
      "epoch": 1.6309904885345272,
      "grad_norm": 0.37178635597229004,
      "learning_rate": 6.782417321745089e-06,
      "loss": 0.0166,
      "step": 996620
    },
    {
      "epoch": 1.6310232189731808,
      "grad_norm": 0.8096741437911987,
      "learning_rate": 6.782351429531573e-06,
      "loss": 0.0219,
      "step": 996640
    },
    {
      "epoch": 1.631055949411834,
      "grad_norm": 0.6360604763031006,
      "learning_rate": 6.782285537318056e-06,
      "loss": 0.0145,
      "step": 996660
    },
    {
      "epoch": 1.6310886798504873,
      "grad_norm": 1.253220558166504,
      "learning_rate": 6.782219645104538e-06,
      "loss": 0.0239,
      "step": 996680
    },
    {
      "epoch": 1.6311214102891407,
      "grad_norm": 1.0065029859542847,
      "learning_rate": 6.782153752891022e-06,
      "loss": 0.0228,
      "step": 996700
    },
    {
      "epoch": 1.631154140727794,
      "grad_norm": 0.4107658863067627,
      "learning_rate": 6.782087860677505e-06,
      "loss": 0.0223,
      "step": 996720
    },
    {
      "epoch": 1.6311868711664474,
      "grad_norm": 0.6009916067123413,
      "learning_rate": 6.782021968463987e-06,
      "loss": 0.0227,
      "step": 996740
    },
    {
      "epoch": 1.6312196016051006,
      "grad_norm": 1.5428260564804077,
      "learning_rate": 6.78195607625047e-06,
      "loss": 0.0163,
      "step": 996760
    },
    {
      "epoch": 1.6312523320437542,
      "grad_norm": 0.6150886416435242,
      "learning_rate": 6.781890184036954e-06,
      "loss": 0.0251,
      "step": 996780
    },
    {
      "epoch": 1.6312850624824073,
      "grad_norm": 0.46194469928741455,
      "learning_rate": 6.781824291823436e-06,
      "loss": 0.0248,
      "step": 996800
    },
    {
      "epoch": 1.6313177929210607,
      "grad_norm": 0.16776613891124725,
      "learning_rate": 6.781758399609919e-06,
      "loss": 0.0164,
      "step": 996820
    },
    {
      "epoch": 1.631350523359714,
      "grad_norm": 0.6711044311523438,
      "learning_rate": 6.781692507396401e-06,
      "loss": 0.0306,
      "step": 996840
    },
    {
      "epoch": 1.6313832537983675,
      "grad_norm": 0.4917372465133667,
      "learning_rate": 6.781626615182885e-06,
      "loss": 0.0191,
      "step": 996860
    },
    {
      "epoch": 1.6314159842370208,
      "grad_norm": 0.6951122283935547,
      "learning_rate": 6.7815607229693666e-06,
      "loss": 0.0162,
      "step": 996880
    },
    {
      "epoch": 1.631448714675674,
      "grad_norm": 0.4956296980381012,
      "learning_rate": 6.78149483075585e-06,
      "loss": 0.027,
      "step": 996900
    },
    {
      "epoch": 1.6314814451143276,
      "grad_norm": 0.9660877585411072,
      "learning_rate": 6.781428938542333e-06,
      "loss": 0.0156,
      "step": 996920
    },
    {
      "epoch": 1.6315141755529807,
      "grad_norm": 0.6671388745307922,
      "learning_rate": 6.781363046328816e-06,
      "loss": 0.0226,
      "step": 996940
    },
    {
      "epoch": 1.631546905991634,
      "grad_norm": 0.5219019651412964,
      "learning_rate": 6.781297154115298e-06,
      "loss": 0.0248,
      "step": 996960
    },
    {
      "epoch": 1.6315796364302875,
      "grad_norm": 0.4076181948184967,
      "learning_rate": 6.781231261901782e-06,
      "loss": 0.0222,
      "step": 996980
    },
    {
      "epoch": 1.6316123668689406,
      "grad_norm": 0.5865180492401123,
      "learning_rate": 6.781165369688264e-06,
      "loss": 0.0258,
      "step": 997000
    },
    {
      "epoch": 1.6316450973075942,
      "grad_norm": 2.250237464904785,
      "learning_rate": 6.7810994774747474e-06,
      "loss": 0.0134,
      "step": 997020
    },
    {
      "epoch": 1.6316778277462474,
      "grad_norm": 0.7025965452194214,
      "learning_rate": 6.781033585261231e-06,
      "loss": 0.0184,
      "step": 997040
    },
    {
      "epoch": 1.6317105581849007,
      "grad_norm": 0.4548143148422241,
      "learning_rate": 6.780967693047713e-06,
      "loss": 0.0204,
      "step": 997060
    },
    {
      "epoch": 1.6317432886235541,
      "grad_norm": 1.588539481163025,
      "learning_rate": 6.7809018008341965e-06,
      "loss": 0.0178,
      "step": 997080
    },
    {
      "epoch": 1.6317760190622075,
      "grad_norm": 0.21211501955986023,
      "learning_rate": 6.780835908620678e-06,
      "loss": 0.0213,
      "step": 997100
    },
    {
      "epoch": 1.6318087495008609,
      "grad_norm": 0.41509395837783813,
      "learning_rate": 6.780770016407162e-06,
      "loss": 0.0113,
      "step": 997120
    },
    {
      "epoch": 1.631841479939514,
      "grad_norm": 0.6526911854743958,
      "learning_rate": 6.780704124193645e-06,
      "loss": 0.017,
      "step": 997140
    },
    {
      "epoch": 1.6318742103781676,
      "grad_norm": 0.9317203760147095,
      "learning_rate": 6.7806382319801275e-06,
      "loss": 0.0276,
      "step": 997160
    },
    {
      "epoch": 1.6319069408168208,
      "grad_norm": 0.4017254114151001,
      "learning_rate": 6.78057233976661e-06,
      "loss": 0.0157,
      "step": 997180
    },
    {
      "epoch": 1.6319396712554741,
      "grad_norm": 0.25663048028945923,
      "learning_rate": 6.780506447553094e-06,
      "loss": 0.0176,
      "step": 997200
    },
    {
      "epoch": 1.6319724016941275,
      "grad_norm": 1.821772813796997,
      "learning_rate": 6.780440555339576e-06,
      "loss": 0.0191,
      "step": 997220
    },
    {
      "epoch": 1.6320051321327809,
      "grad_norm": 0.23613139986991882,
      "learning_rate": 6.780374663126059e-06,
      "loss": 0.0167,
      "step": 997240
    },
    {
      "epoch": 1.6320378625714342,
      "grad_norm": 0.4432861804962158,
      "learning_rate": 6.780308770912541e-06,
      "loss": 0.0169,
      "step": 997260
    },
    {
      "epoch": 1.6320705930100874,
      "grad_norm": 0.3967004716396332,
      "learning_rate": 6.780242878699025e-06,
      "loss": 0.0178,
      "step": 997280
    },
    {
      "epoch": 1.632103323448741,
      "grad_norm": 0.48931893706321716,
      "learning_rate": 6.7801769864855075e-06,
      "loss": 0.0172,
      "step": 997300
    },
    {
      "epoch": 1.6321360538873941,
      "grad_norm": 0.16454018652439117,
      "learning_rate": 6.78011109427199e-06,
      "loss": 0.0268,
      "step": 997320
    },
    {
      "epoch": 1.6321687843260475,
      "grad_norm": 0.6879239082336426,
      "learning_rate": 6.780045202058473e-06,
      "loss": 0.0143,
      "step": 997340
    },
    {
      "epoch": 1.6322015147647009,
      "grad_norm": 2.053434133529663,
      "learning_rate": 6.7799793098449566e-06,
      "loss": 0.0154,
      "step": 997360
    },
    {
      "epoch": 1.6322342452033543,
      "grad_norm": 0.3985674977302551,
      "learning_rate": 6.7799134176314384e-06,
      "loss": 0.0231,
      "step": 997380
    },
    {
      "epoch": 1.6322669756420076,
      "grad_norm": 0.49207958579063416,
      "learning_rate": 6.779847525417922e-06,
      "loss": 0.0212,
      "step": 997400
    },
    {
      "epoch": 1.6322997060806608,
      "grad_norm": 0.4945923984050751,
      "learning_rate": 6.779781633204406e-06,
      "loss": 0.0194,
      "step": 997420
    },
    {
      "epoch": 1.6323324365193144,
      "grad_norm": 0.7772818803787231,
      "learning_rate": 6.7797157409908875e-06,
      "loss": 0.021,
      "step": 997440
    },
    {
      "epoch": 1.6323651669579675,
      "grad_norm": 1.1137053966522217,
      "learning_rate": 6.779649848777371e-06,
      "loss": 0.0192,
      "step": 997460
    },
    {
      "epoch": 1.632397897396621,
      "grad_norm": 0.9250268936157227,
      "learning_rate": 6.779583956563853e-06,
      "loss": 0.0321,
      "step": 997480
    },
    {
      "epoch": 1.6324306278352743,
      "grad_norm": 0.344784677028656,
      "learning_rate": 6.779518064350337e-06,
      "loss": 0.0125,
      "step": 997500
    },
    {
      "epoch": 1.6324633582739276,
      "grad_norm": 0.49093732237815857,
      "learning_rate": 6.779452172136819e-06,
      "loss": 0.0178,
      "step": 997520
    },
    {
      "epoch": 1.632496088712581,
      "grad_norm": 0.963638186454773,
      "learning_rate": 6.779386279923302e-06,
      "loss": 0.0253,
      "step": 997540
    },
    {
      "epoch": 1.6325288191512342,
      "grad_norm": 0.5184246897697449,
      "learning_rate": 6.779320387709785e-06,
      "loss": 0.0249,
      "step": 997560
    },
    {
      "epoch": 1.6325615495898878,
      "grad_norm": 0.33972135186195374,
      "learning_rate": 6.779254495496268e-06,
      "loss": 0.021,
      "step": 997580
    },
    {
      "epoch": 1.632594280028541,
      "grad_norm": 0.7837164402008057,
      "learning_rate": 6.77918860328275e-06,
      "loss": 0.0131,
      "step": 997600
    },
    {
      "epoch": 1.6326270104671943,
      "grad_norm": 0.7156662940979004,
      "learning_rate": 6.779122711069234e-06,
      "loss": 0.0215,
      "step": 997620
    },
    {
      "epoch": 1.6326597409058476,
      "grad_norm": 1.1763800382614136,
      "learning_rate": 6.779056818855716e-06,
      "loss": 0.0174,
      "step": 997640
    },
    {
      "epoch": 1.6326924713445008,
      "grad_norm": 0.49980294704437256,
      "learning_rate": 6.778990926642199e-06,
      "loss": 0.0325,
      "step": 997660
    },
    {
      "epoch": 1.6327252017831544,
      "grad_norm": 0.9209936857223511,
      "learning_rate": 6.778925034428682e-06,
      "loss": 0.017,
      "step": 997680
    },
    {
      "epoch": 1.6327579322218075,
      "grad_norm": 1.581396222114563,
      "learning_rate": 6.778859142215165e-06,
      "loss": 0.0209,
      "step": 997700
    },
    {
      "epoch": 1.6327906626604611,
      "grad_norm": 0.6540926098823547,
      "learning_rate": 6.7787932500016476e-06,
      "loss": 0.0164,
      "step": 997720
    },
    {
      "epoch": 1.6328233930991143,
      "grad_norm": 0.17719413340091705,
      "learning_rate": 6.778727357788131e-06,
      "loss": 0.0136,
      "step": 997740
    },
    {
      "epoch": 1.6328561235377677,
      "grad_norm": 1.0109453201293945,
      "learning_rate": 6.778661465574614e-06,
      "loss": 0.0167,
      "step": 997760
    },
    {
      "epoch": 1.632888853976421,
      "grad_norm": 0.19582727551460266,
      "learning_rate": 6.778595573361097e-06,
      "loss": 0.0186,
      "step": 997780
    },
    {
      "epoch": 1.6329215844150742,
      "grad_norm": 0.37438997626304626,
      "learning_rate": 6.77852968114758e-06,
      "loss": 0.0189,
      "step": 997800
    },
    {
      "epoch": 1.6329543148537278,
      "grad_norm": 0.8622647523880005,
      "learning_rate": 6.778463788934062e-06,
      "loss": 0.0195,
      "step": 997820
    },
    {
      "epoch": 1.632987045292381,
      "grad_norm": 0.29292312264442444,
      "learning_rate": 6.778397896720546e-06,
      "loss": 0.0258,
      "step": 997840
    },
    {
      "epoch": 1.6330197757310343,
      "grad_norm": 0.9487695693969727,
      "learning_rate": 6.778332004507028e-06,
      "loss": 0.0203,
      "step": 997860
    },
    {
      "epoch": 1.6330525061696877,
      "grad_norm": 0.7728996872901917,
      "learning_rate": 6.778266112293511e-06,
      "loss": 0.0201,
      "step": 997880
    },
    {
      "epoch": 1.633085236608341,
      "grad_norm": 0.8173414468765259,
      "learning_rate": 6.778200220079993e-06,
      "loss": 0.0178,
      "step": 997900
    },
    {
      "epoch": 1.6331179670469944,
      "grad_norm": 0.26658889651298523,
      "learning_rate": 6.778134327866477e-06,
      "loss": 0.0242,
      "step": 997920
    },
    {
      "epoch": 1.6331506974856476,
      "grad_norm": 0.9502723217010498,
      "learning_rate": 6.778068435652959e-06,
      "loss": 0.0187,
      "step": 997940
    },
    {
      "epoch": 1.6331834279243012,
      "grad_norm": 0.11108643561601639,
      "learning_rate": 6.778002543439442e-06,
      "loss": 0.0157,
      "step": 997960
    },
    {
      "epoch": 1.6332161583629543,
      "grad_norm": 0.43222707509994507,
      "learning_rate": 6.777936651225925e-06,
      "loss": 0.016,
      "step": 997980
    },
    {
      "epoch": 1.6332488888016077,
      "grad_norm": 0.6068564653396606,
      "learning_rate": 6.7778707590124085e-06,
      "loss": 0.0209,
      "step": 998000
    },
    {
      "epoch": 1.633281619240261,
      "grad_norm": 1.1721110343933105,
      "learning_rate": 6.77780486679889e-06,
      "loss": 0.0198,
      "step": 998020
    },
    {
      "epoch": 1.6333143496789144,
      "grad_norm": 0.9293203353881836,
      "learning_rate": 6.777738974585374e-06,
      "loss": 0.0182,
      "step": 998040
    },
    {
      "epoch": 1.6333470801175678,
      "grad_norm": 0.4659276008605957,
      "learning_rate": 6.777673082371856e-06,
      "loss": 0.021,
      "step": 998060
    },
    {
      "epoch": 1.633379810556221,
      "grad_norm": 0.34239932894706726,
      "learning_rate": 6.777607190158339e-06,
      "loss": 0.021,
      "step": 998080
    },
    {
      "epoch": 1.6334125409948745,
      "grad_norm": 0.45730671286582947,
      "learning_rate": 6.777541297944823e-06,
      "loss": 0.0166,
      "step": 998100
    },
    {
      "epoch": 1.6334452714335277,
      "grad_norm": 0.4737650156021118,
      "learning_rate": 6.777475405731305e-06,
      "loss": 0.0166,
      "step": 998120
    },
    {
      "epoch": 1.633478001872181,
      "grad_norm": 0.4706810414791107,
      "learning_rate": 6.7774095135177885e-06,
      "loss": 0.0216,
      "step": 998140
    },
    {
      "epoch": 1.6335107323108344,
      "grad_norm": 1.257413387298584,
      "learning_rate": 6.777343621304271e-06,
      "loss": 0.0162,
      "step": 998160
    },
    {
      "epoch": 1.6335434627494878,
      "grad_norm": 0.5539681911468506,
      "learning_rate": 6.777277729090754e-06,
      "loss": 0.0252,
      "step": 998180
    },
    {
      "epoch": 1.6335761931881412,
      "grad_norm": 0.5130266547203064,
      "learning_rate": 6.777211836877237e-06,
      "loss": 0.0242,
      "step": 998200
    },
    {
      "epoch": 1.6336089236267943,
      "grad_norm": 0.3768044710159302,
      "learning_rate": 6.77714594466372e-06,
      "loss": 0.0263,
      "step": 998220
    },
    {
      "epoch": 1.633641654065448,
      "grad_norm": 0.43053463101387024,
      "learning_rate": 6.777080052450202e-06,
      "loss": 0.0188,
      "step": 998240
    },
    {
      "epoch": 1.633674384504101,
      "grad_norm": 0.5309788584709167,
      "learning_rate": 6.777014160236686e-06,
      "loss": 0.0242,
      "step": 998260
    },
    {
      "epoch": 1.6337071149427544,
      "grad_norm": 0.38218438625335693,
      "learning_rate": 6.776948268023168e-06,
      "loss": 0.0169,
      "step": 998280
    },
    {
      "epoch": 1.6337398453814078,
      "grad_norm": 1.622480034828186,
      "learning_rate": 6.776882375809651e-06,
      "loss": 0.0182,
      "step": 998300
    },
    {
      "epoch": 1.6337725758200612,
      "grad_norm": 0.7459824681282043,
      "learning_rate": 6.776816483596134e-06,
      "loss": 0.0151,
      "step": 998320
    },
    {
      "epoch": 1.6338053062587146,
      "grad_norm": 0.3775921165943146,
      "learning_rate": 6.776750591382617e-06,
      "loss": 0.0216,
      "step": 998340
    },
    {
      "epoch": 1.6338380366973677,
      "grad_norm": 2.6147921085357666,
      "learning_rate": 6.7766846991690995e-06,
      "loss": 0.0147,
      "step": 998360
    },
    {
      "epoch": 1.6338707671360213,
      "grad_norm": 0.22409328818321228,
      "learning_rate": 6.776618806955583e-06,
      "loss": 0.0154,
      "step": 998380
    },
    {
      "epoch": 1.6339034975746745,
      "grad_norm": 0.30704447627067566,
      "learning_rate": 6.776552914742065e-06,
      "loss": 0.0244,
      "step": 998400
    },
    {
      "epoch": 1.6339362280133278,
      "grad_norm": 0.8624030351638794,
      "learning_rate": 6.7764870225285485e-06,
      "loss": 0.0174,
      "step": 998420
    },
    {
      "epoch": 1.6339689584519812,
      "grad_norm": 0.7866631746292114,
      "learning_rate": 6.7764211303150304e-06,
      "loss": 0.0154,
      "step": 998440
    },
    {
      "epoch": 1.6340016888906344,
      "grad_norm": 0.34473222494125366,
      "learning_rate": 6.776355238101514e-06,
      "loss": 0.0203,
      "step": 998460
    },
    {
      "epoch": 1.634034419329288,
      "grad_norm": 0.28028950095176697,
      "learning_rate": 6.776289345887998e-06,
      "loss": 0.0194,
      "step": 998480
    },
    {
      "epoch": 1.634067149767941,
      "grad_norm": 1.2203304767608643,
      "learning_rate": 6.7762234536744795e-06,
      "loss": 0.0149,
      "step": 998500
    },
    {
      "epoch": 1.6340998802065947,
      "grad_norm": 0.4671931266784668,
      "learning_rate": 6.776157561460963e-06,
      "loss": 0.0217,
      "step": 998520
    },
    {
      "epoch": 1.6341326106452478,
      "grad_norm": 0.46222081780433655,
      "learning_rate": 6.776091669247446e-06,
      "loss": 0.0241,
      "step": 998540
    },
    {
      "epoch": 1.6341653410839012,
      "grad_norm": 0.8633029460906982,
      "learning_rate": 6.7760257770339286e-06,
      "loss": 0.0187,
      "step": 998560
    },
    {
      "epoch": 1.6341980715225546,
      "grad_norm": 0.7797530293464661,
      "learning_rate": 6.775959884820411e-06,
      "loss": 0.0158,
      "step": 998580
    },
    {
      "epoch": 1.6342308019612077,
      "grad_norm": 0.5134669542312622,
      "learning_rate": 6.775893992606895e-06,
      "loss": 0.0187,
      "step": 998600
    },
    {
      "epoch": 1.6342635323998613,
      "grad_norm": 1.0302265882492065,
      "learning_rate": 6.775828100393377e-06,
      "loss": 0.02,
      "step": 998620
    },
    {
      "epoch": 1.6342962628385145,
      "grad_norm": 0.34159010648727417,
      "learning_rate": 6.77576220817986e-06,
      "loss": 0.02,
      "step": 998640
    },
    {
      "epoch": 1.6343289932771679,
      "grad_norm": 0.4579903185367584,
      "learning_rate": 6.775696315966342e-06,
      "loss": 0.0199,
      "step": 998660
    },
    {
      "epoch": 1.6343617237158212,
      "grad_norm": 0.8452255129814148,
      "learning_rate": 6.775630423752826e-06,
      "loss": 0.0126,
      "step": 998680
    },
    {
      "epoch": 1.6343944541544746,
      "grad_norm": 0.4247613847255707,
      "learning_rate": 6.775564531539308e-06,
      "loss": 0.0213,
      "step": 998700
    },
    {
      "epoch": 1.634427184593128,
      "grad_norm": 1.6517988443374634,
      "learning_rate": 6.775498639325791e-06,
      "loss": 0.0248,
      "step": 998720
    },
    {
      "epoch": 1.6344599150317811,
      "grad_norm": 0.12279754877090454,
      "learning_rate": 6.775432747112274e-06,
      "loss": 0.0183,
      "step": 998740
    },
    {
      "epoch": 1.6344926454704347,
      "grad_norm": 0.6834686398506165,
      "learning_rate": 6.775366854898757e-06,
      "loss": 0.0197,
      "step": 998760
    },
    {
      "epoch": 1.6345253759090879,
      "grad_norm": 0.5428266525268555,
      "learning_rate": 6.7753009626852395e-06,
      "loss": 0.0209,
      "step": 998780
    },
    {
      "epoch": 1.6345581063477412,
      "grad_norm": 0.08544861525297165,
      "learning_rate": 6.775235070471723e-06,
      "loss": 0.0179,
      "step": 998800
    },
    {
      "epoch": 1.6345908367863946,
      "grad_norm": 0.5358757972717285,
      "learning_rate": 6.775169178258207e-06,
      "loss": 0.0252,
      "step": 998820
    },
    {
      "epoch": 1.634623567225048,
      "grad_norm": 0.4343598484992981,
      "learning_rate": 6.775103286044689e-06,
      "loss": 0.0205,
      "step": 998840
    },
    {
      "epoch": 1.6346562976637014,
      "grad_norm": 0.743598997592926,
      "learning_rate": 6.775037393831172e-06,
      "loss": 0.0202,
      "step": 998860
    },
    {
      "epoch": 1.6346890281023545,
      "grad_norm": 0.47606930136680603,
      "learning_rate": 6.774971501617654e-06,
      "loss": 0.0225,
      "step": 998880
    },
    {
      "epoch": 1.634721758541008,
      "grad_norm": 1.0208494663238525,
      "learning_rate": 6.774905609404138e-06,
      "loss": 0.0244,
      "step": 998900
    },
    {
      "epoch": 1.6347544889796612,
      "grad_norm": 0.24884237349033356,
      "learning_rate": 6.7748397171906196e-06,
      "loss": 0.021,
      "step": 998920
    },
    {
      "epoch": 1.6347872194183146,
      "grad_norm": 0.27179843187332153,
      "learning_rate": 6.774773824977103e-06,
      "loss": 0.0122,
      "step": 998940
    },
    {
      "epoch": 1.634819949856968,
      "grad_norm": 1.033109188079834,
      "learning_rate": 6.774707932763586e-06,
      "loss": 0.0198,
      "step": 998960
    },
    {
      "epoch": 1.6348526802956214,
      "grad_norm": 0.5091870427131653,
      "learning_rate": 6.774642040550069e-06,
      "loss": 0.0155,
      "step": 998980
    },
    {
      "epoch": 1.6348854107342747,
      "grad_norm": 0.475578635931015,
      "learning_rate": 6.774576148336551e-06,
      "loss": 0.0169,
      "step": 999000
    },
    {
      "epoch": 1.6349181411729279,
      "grad_norm": 0.7001465559005737,
      "learning_rate": 6.774510256123035e-06,
      "loss": 0.0159,
      "step": 999020
    },
    {
      "epoch": 1.6349508716115815,
      "grad_norm": 1.17173171043396,
      "learning_rate": 6.774444363909517e-06,
      "loss": 0.0187,
      "step": 999040
    },
    {
      "epoch": 1.6349836020502346,
      "grad_norm": 1.0001565217971802,
      "learning_rate": 6.7743784716960004e-06,
      "loss": 0.0144,
      "step": 999060
    },
    {
      "epoch": 1.635016332488888,
      "grad_norm": 0.4082120954990387,
      "learning_rate": 6.774312579482482e-06,
      "loss": 0.018,
      "step": 999080
    },
    {
      "epoch": 1.6350490629275414,
      "grad_norm": 0.9697755575180054,
      "learning_rate": 6.774246687268966e-06,
      "loss": 0.0154,
      "step": 999100
    },
    {
      "epoch": 1.6350817933661947,
      "grad_norm": 0.051495496183633804,
      "learning_rate": 6.774180795055449e-06,
      "loss": 0.0212,
      "step": 999120
    },
    {
      "epoch": 1.6351145238048481,
      "grad_norm": 0.849525511264801,
      "learning_rate": 6.774114902841931e-06,
      "loss": 0.0189,
      "step": 999140
    },
    {
      "epoch": 1.6351472542435013,
      "grad_norm": 0.3483608663082123,
      "learning_rate": 6.774049010628415e-06,
      "loss": 0.0183,
      "step": 999160
    },
    {
      "epoch": 1.6351799846821549,
      "grad_norm": 0.3281192183494568,
      "learning_rate": 6.773983118414898e-06,
      "loss": 0.0187,
      "step": 999180
    },
    {
      "epoch": 1.635212715120808,
      "grad_norm": 0.3984331786632538,
      "learning_rate": 6.7739172262013805e-06,
      "loss": 0.0139,
      "step": 999200
    },
    {
      "epoch": 1.6352454455594614,
      "grad_norm": 0.9935648441314697,
      "learning_rate": 6.773851333987863e-06,
      "loss": 0.0142,
      "step": 999220
    },
    {
      "epoch": 1.6352781759981148,
      "grad_norm": 0.3119029104709625,
      "learning_rate": 6.773785441774347e-06,
      "loss": 0.0202,
      "step": 999240
    },
    {
      "epoch": 1.635310906436768,
      "grad_norm": 0.41551998257637024,
      "learning_rate": 6.773719549560829e-06,
      "loss": 0.0213,
      "step": 999260
    },
    {
      "epoch": 1.6353436368754215,
      "grad_norm": 0.23699501156806946,
      "learning_rate": 6.773653657347312e-06,
      "loss": 0.0199,
      "step": 999280
    },
    {
      "epoch": 1.6353763673140747,
      "grad_norm": 0.33363077044487,
      "learning_rate": 6.773587765133794e-06,
      "loss": 0.0191,
      "step": 999300
    },
    {
      "epoch": 1.6354090977527282,
      "grad_norm": 0.6805648803710938,
      "learning_rate": 6.773521872920278e-06,
      "loss": 0.0244,
      "step": 999320
    },
    {
      "epoch": 1.6354418281913814,
      "grad_norm": 0.4877166748046875,
      "learning_rate": 6.7734559807067605e-06,
      "loss": 0.0208,
      "step": 999340
    },
    {
      "epoch": 1.6354745586300348,
      "grad_norm": 0.707505464553833,
      "learning_rate": 6.773390088493243e-06,
      "loss": 0.0155,
      "step": 999360
    },
    {
      "epoch": 1.6355072890686881,
      "grad_norm": 0.3901931047439575,
      "learning_rate": 6.773324196279726e-06,
      "loss": 0.0138,
      "step": 999380
    },
    {
      "epoch": 1.6355400195073413,
      "grad_norm": 0.22473043203353882,
      "learning_rate": 6.7732583040662096e-06,
      "loss": 0.0183,
      "step": 999400
    },
    {
      "epoch": 1.6355727499459949,
      "grad_norm": 1.084635853767395,
      "learning_rate": 6.7731924118526915e-06,
      "loss": 0.0137,
      "step": 999420
    },
    {
      "epoch": 1.635605480384648,
      "grad_norm": 0.43822354078292847,
      "learning_rate": 6.773126519639175e-06,
      "loss": 0.0277,
      "step": 999440
    },
    {
      "epoch": 1.6356382108233014,
      "grad_norm": 0.5482980012893677,
      "learning_rate": 6.773060627425657e-06,
      "loss": 0.0162,
      "step": 999460
    },
    {
      "epoch": 1.6356709412619548,
      "grad_norm": 1.14235520362854,
      "learning_rate": 6.7729947352121405e-06,
      "loss": 0.0218,
      "step": 999480
    },
    {
      "epoch": 1.6357036717006082,
      "grad_norm": 0.4630916714668274,
      "learning_rate": 6.772928842998623e-06,
      "loss": 0.0196,
      "step": 999500
    },
    {
      "epoch": 1.6357364021392615,
      "grad_norm": 0.8415986895561218,
      "learning_rate": 6.772862950785106e-06,
      "loss": 0.0192,
      "step": 999520
    },
    {
      "epoch": 1.6357691325779147,
      "grad_norm": 0.24165192246437073,
      "learning_rate": 6.77279705857159e-06,
      "loss": 0.0235,
      "step": 999540
    },
    {
      "epoch": 1.6358018630165683,
      "grad_norm": 0.46634116768836975,
      "learning_rate": 6.772731166358072e-06,
      "loss": 0.0232,
      "step": 999560
    },
    {
      "epoch": 1.6358345934552214,
      "grad_norm": 1.1786803007125854,
      "learning_rate": 6.772665274144555e-06,
      "loss": 0.0287,
      "step": 999580
    },
    {
      "epoch": 1.6358673238938748,
      "grad_norm": 1.4552843570709229,
      "learning_rate": 6.772599381931038e-06,
      "loss": 0.019,
      "step": 999600
    },
    {
      "epoch": 1.6359000543325282,
      "grad_norm": 0.6716780662536621,
      "learning_rate": 6.772533489717521e-06,
      "loss": 0.0176,
      "step": 999620
    },
    {
      "epoch": 1.6359327847711815,
      "grad_norm": 0.3265746831893921,
      "learning_rate": 6.772467597504003e-06,
      "loss": 0.0152,
      "step": 999640
    },
    {
      "epoch": 1.635965515209835,
      "grad_norm": 1.1730639934539795,
      "learning_rate": 6.772401705290487e-06,
      "loss": 0.0228,
      "step": 999660
    },
    {
      "epoch": 1.635998245648488,
      "grad_norm": 1.5061928033828735,
      "learning_rate": 6.772335813076969e-06,
      "loss": 0.0187,
      "step": 999680
    },
    {
      "epoch": 1.6360309760871417,
      "grad_norm": 0.5311253070831299,
      "learning_rate": 6.772269920863452e-06,
      "loss": 0.0201,
      "step": 999700
    },
    {
      "epoch": 1.6360637065257948,
      "grad_norm": 0.22996403276920319,
      "learning_rate": 6.772204028649934e-06,
      "loss": 0.014,
      "step": 999720
    },
    {
      "epoch": 1.6360964369644482,
      "grad_norm": 1.0837382078170776,
      "learning_rate": 6.772138136436418e-06,
      "loss": 0.0128,
      "step": 999740
    },
    {
      "epoch": 1.6361291674031015,
      "grad_norm": 0.36307787895202637,
      "learning_rate": 6.7720722442229006e-06,
      "loss": 0.0236,
      "step": 999760
    },
    {
      "epoch": 1.636161897841755,
      "grad_norm": 1.167315125465393,
      "learning_rate": 6.772006352009383e-06,
      "loss": 0.0196,
      "step": 999780
    },
    {
      "epoch": 1.6361946282804083,
      "grad_norm": 0.8236961960792542,
      "learning_rate": 6.771940459795866e-06,
      "loss": 0.015,
      "step": 999800
    },
    {
      "epoch": 1.6362273587190614,
      "grad_norm": 1.6210672855377197,
      "learning_rate": 6.77187456758235e-06,
      "loss": 0.0175,
      "step": 999820
    },
    {
      "epoch": 1.636260089157715,
      "grad_norm": 0.6043092012405396,
      "learning_rate": 6.7718086753688315e-06,
      "loss": 0.0226,
      "step": 999840
    },
    {
      "epoch": 1.6362928195963682,
      "grad_norm": 1.7898260354995728,
      "learning_rate": 6.771742783155315e-06,
      "loss": 0.0213,
      "step": 999860
    },
    {
      "epoch": 1.6363255500350216,
      "grad_norm": 0.138565331697464,
      "learning_rate": 6.771676890941799e-06,
      "loss": 0.0251,
      "step": 999880
    },
    {
      "epoch": 1.636358280473675,
      "grad_norm": 0.8307439088821411,
      "learning_rate": 6.771610998728281e-06,
      "loss": 0.0202,
      "step": 999900
    },
    {
      "epoch": 1.636391010912328,
      "grad_norm": 0.6994392275810242,
      "learning_rate": 6.771545106514764e-06,
      "loss": 0.0178,
      "step": 999920
    },
    {
      "epoch": 1.6364237413509817,
      "grad_norm": 0.3612988293170929,
      "learning_rate": 6.771479214301246e-06,
      "loss": 0.0217,
      "step": 999940
    },
    {
      "epoch": 1.6364564717896348,
      "grad_norm": 0.6722326278686523,
      "learning_rate": 6.77141332208773e-06,
      "loss": 0.0256,
      "step": 999960
    },
    {
      "epoch": 1.6364892022282884,
      "grad_norm": 0.6398351788520813,
      "learning_rate": 6.771347429874212e-06,
      "loss": 0.0159,
      "step": 999980
    },
    {
      "epoch": 1.6365219326669416,
      "grad_norm": 0.8185103535652161,
      "learning_rate": 6.771281537660695e-06,
      "loss": 0.0202,
      "step": 1000000
    },
    {
      "epoch": 1.6365219326669416,
      "eval_loss": 0.010421110317111015,
      "eval_runtime": 6522.2609,
      "eval_samples_per_second": 157.592,
      "eval_steps_per_second": 15.759,
      "eval_sts-dev_pearson_cosine": 0.9757451110896957,
      "eval_sts-dev_spearman_cosine": 0.8897775371163437,
      "step": 1000000
    },
    {
      "epoch": 1.636554663105595,
      "grad_norm": 0.9778366684913635,
      "learning_rate": 6.771215645447178e-06,
      "loss": 0.0193,
      "step": 1000020
    },
    {
      "epoch": 1.6365873935442483,
      "grad_norm": 0.46094945073127747,
      "learning_rate": 6.7711497532336615e-06,
      "loss": 0.0176,
      "step": 1000040
    },
    {
      "epoch": 1.6366201239829015,
      "grad_norm": 0.4655529856681824,
      "learning_rate": 6.771083861020143e-06,
      "loss": 0.0158,
      "step": 1000060
    },
    {
      "epoch": 1.636652854421555,
      "grad_norm": 0.48160499334335327,
      "learning_rate": 6.771017968806627e-06,
      "loss": 0.021,
      "step": 1000080
    },
    {
      "epoch": 1.6366855848602082,
      "grad_norm": 0.8129319548606873,
      "learning_rate": 6.770952076593109e-06,
      "loss": 0.0243,
      "step": 1000100
    },
    {
      "epoch": 1.6367183152988616,
      "grad_norm": 1.2415552139282227,
      "learning_rate": 6.7708861843795924e-06,
      "loss": 0.0192,
      "step": 1000120
    },
    {
      "epoch": 1.636751045737515,
      "grad_norm": 0.354139119386673,
      "learning_rate": 6.770820292166075e-06,
      "loss": 0.0198,
      "step": 1000140
    },
    {
      "epoch": 1.6367837761761683,
      "grad_norm": 0.33094602823257446,
      "learning_rate": 6.770754399952558e-06,
      "loss": 0.0184,
      "step": 1000160
    },
    {
      "epoch": 1.6368165066148217,
      "grad_norm": 2.460925579071045,
      "learning_rate": 6.770688507739041e-06,
      "loss": 0.0204,
      "step": 1000180
    },
    {
      "epoch": 1.6368492370534748,
      "grad_norm": 1.9172401428222656,
      "learning_rate": 6.770622615525524e-06,
      "loss": 0.0297,
      "step": 1000200
    },
    {
      "epoch": 1.6368819674921284,
      "grad_norm": 3.329789638519287,
      "learning_rate": 6.770556723312007e-06,
      "loss": 0.0302,
      "step": 1000220
    },
    {
      "epoch": 1.6369146979307816,
      "grad_norm": 0.44023188948631287,
      "learning_rate": 6.77049083109849e-06,
      "loss": 0.0189,
      "step": 1000240
    },
    {
      "epoch": 1.636947428369435,
      "grad_norm": 0.10734865814447403,
      "learning_rate": 6.770424938884973e-06,
      "loss": 0.0241,
      "step": 1000260
    },
    {
      "epoch": 1.6369801588080883,
      "grad_norm": 0.8956825733184814,
      "learning_rate": 6.770359046671455e-06,
      "loss": 0.0186,
      "step": 1000280
    },
    {
      "epoch": 1.6370128892467417,
      "grad_norm": 1.5830347537994385,
      "learning_rate": 6.770293154457939e-06,
      "loss": 0.018,
      "step": 1000300
    },
    {
      "epoch": 1.637045619685395,
      "grad_norm": 0.21943995356559753,
      "learning_rate": 6.770227262244421e-06,
      "loss": 0.0177,
      "step": 1000320
    },
    {
      "epoch": 1.6370783501240482,
      "grad_norm": 1.4206392765045166,
      "learning_rate": 6.770161370030904e-06,
      "loss": 0.0231,
      "step": 1000340
    },
    {
      "epoch": 1.6371110805627018,
      "grad_norm": 0.11576645821332932,
      "learning_rate": 6.770095477817387e-06,
      "loss": 0.0119,
      "step": 1000360
    },
    {
      "epoch": 1.637143811001355,
      "grad_norm": 0.34684133529663086,
      "learning_rate": 6.77002958560387e-06,
      "loss": 0.0222,
      "step": 1000380
    },
    {
      "epoch": 1.6371765414400083,
      "grad_norm": 0.12989811599254608,
      "learning_rate": 6.7699636933903525e-06,
      "loss": 0.022,
      "step": 1000400
    },
    {
      "epoch": 1.6372092718786617,
      "grad_norm": 0.48698270320892334,
      "learning_rate": 6.769897801176836e-06,
      "loss": 0.0145,
      "step": 1000420
    },
    {
      "epoch": 1.637242002317315,
      "grad_norm": 0.9772467017173767,
      "learning_rate": 6.769831908963318e-06,
      "loss": 0.0273,
      "step": 1000440
    },
    {
      "epoch": 1.6372747327559685,
      "grad_norm": 0.8824894428253174,
      "learning_rate": 6.7697660167498015e-06,
      "loss": 0.0191,
      "step": 1000460
    },
    {
      "epoch": 1.6373074631946216,
      "grad_norm": 0.6181410551071167,
      "learning_rate": 6.7697001245362834e-06,
      "loss": 0.0227,
      "step": 1000480
    },
    {
      "epoch": 1.6373401936332752,
      "grad_norm": 0.2820797264575958,
      "learning_rate": 6.769634232322767e-06,
      "loss": 0.0161,
      "step": 1000500
    },
    {
      "epoch": 1.6373729240719284,
      "grad_norm": 0.3345884382724762,
      "learning_rate": 6.76956834010925e-06,
      "loss": 0.0172,
      "step": 1000520
    },
    {
      "epoch": 1.6374056545105817,
      "grad_norm": 0.9775364995002747,
      "learning_rate": 6.7695024478957325e-06,
      "loss": 0.0205,
      "step": 1000540
    },
    {
      "epoch": 1.637438384949235,
      "grad_norm": 0.3759308159351349,
      "learning_rate": 6.769436555682216e-06,
      "loss": 0.023,
      "step": 1000560
    },
    {
      "epoch": 1.6374711153878885,
      "grad_norm": 0.3074110746383667,
      "learning_rate": 6.769370663468699e-06,
      "loss": 0.0189,
      "step": 1000580
    },
    {
      "epoch": 1.6375038458265418,
      "grad_norm": 0.09574125707149506,
      "learning_rate": 6.7693047712551816e-06,
      "loss": 0.0212,
      "step": 1000600
    },
    {
      "epoch": 1.637536576265195,
      "grad_norm": 0.335950642824173,
      "learning_rate": 6.769238879041664e-06,
      "loss": 0.029,
      "step": 1000620
    },
    {
      "epoch": 1.6375693067038486,
      "grad_norm": 0.9529913067817688,
      "learning_rate": 6.769172986828148e-06,
      "loss": 0.0135,
      "step": 1000640
    },
    {
      "epoch": 1.6376020371425017,
      "grad_norm": 0.5054532885551453,
      "learning_rate": 6.76910709461463e-06,
      "loss": 0.0222,
      "step": 1000660
    },
    {
      "epoch": 1.6376347675811551,
      "grad_norm": 1.0077193975448608,
      "learning_rate": 6.769041202401113e-06,
      "loss": 0.0226,
      "step": 1000680
    },
    {
      "epoch": 1.6376674980198085,
      "grad_norm": 0.7054051756858826,
      "learning_rate": 6.768975310187595e-06,
      "loss": 0.0232,
      "step": 1000700
    },
    {
      "epoch": 1.6377002284584616,
      "grad_norm": 0.888600766658783,
      "learning_rate": 6.768909417974079e-06,
      "loss": 0.0179,
      "step": 1000720
    },
    {
      "epoch": 1.6377329588971152,
      "grad_norm": 0.603029727935791,
      "learning_rate": 6.768843525760561e-06,
      "loss": 0.0159,
      "step": 1000740
    },
    {
      "epoch": 1.6377656893357684,
      "grad_norm": 0.19711098074913025,
      "learning_rate": 6.768777633547044e-06,
      "loss": 0.0227,
      "step": 1000760
    },
    {
      "epoch": 1.637798419774422,
      "grad_norm": 0.55870121717453,
      "learning_rate": 6.768711741333527e-06,
      "loss": 0.0208,
      "step": 1000780
    },
    {
      "epoch": 1.6378311502130751,
      "grad_norm": 0.610748827457428,
      "learning_rate": 6.76864584912001e-06,
      "loss": 0.0219,
      "step": 1000800
    },
    {
      "epoch": 1.6378638806517285,
      "grad_norm": 1.8604117631912231,
      "learning_rate": 6.7685799569064926e-06,
      "loss": 0.0279,
      "step": 1000820
    },
    {
      "epoch": 1.6378966110903819,
      "grad_norm": 0.1121097207069397,
      "learning_rate": 6.768514064692976e-06,
      "loss": 0.0232,
      "step": 1000840
    },
    {
      "epoch": 1.637929341529035,
      "grad_norm": 0.14707165956497192,
      "learning_rate": 6.768448172479458e-06,
      "loss": 0.0173,
      "step": 1000860
    },
    {
      "epoch": 1.6379620719676886,
      "grad_norm": 0.10521363466978073,
      "learning_rate": 6.768382280265942e-06,
      "loss": 0.0187,
      "step": 1000880
    },
    {
      "epoch": 1.6379948024063418,
      "grad_norm": 0.44473370909690857,
      "learning_rate": 6.7683163880524235e-06,
      "loss": 0.0263,
      "step": 1000900
    },
    {
      "epoch": 1.6380275328449951,
      "grad_norm": 0.24898888170719147,
      "learning_rate": 6.768250495838907e-06,
      "loss": 0.0147,
      "step": 1000920
    },
    {
      "epoch": 1.6380602632836485,
      "grad_norm": 0.7379354238510132,
      "learning_rate": 6.768184603625391e-06,
      "loss": 0.0204,
      "step": 1000940
    },
    {
      "epoch": 1.6380929937223019,
      "grad_norm": 0.6797969937324524,
      "learning_rate": 6.768118711411873e-06,
      "loss": 0.0133,
      "step": 1000960
    },
    {
      "epoch": 1.6381257241609553,
      "grad_norm": 0.5042650699615479,
      "learning_rate": 6.768052819198356e-06,
      "loss": 0.0144,
      "step": 1000980
    },
    {
      "epoch": 1.6381584545996084,
      "grad_norm": 1.5318892002105713,
      "learning_rate": 6.767986926984839e-06,
      "loss": 0.0273,
      "step": 1001000
    },
    {
      "epoch": 1.638191185038262,
      "grad_norm": 0.18132051825523376,
      "learning_rate": 6.767921034771322e-06,
      "loss": 0.0174,
      "step": 1001020
    },
    {
      "epoch": 1.6382239154769151,
      "grad_norm": 0.5692088603973389,
      "learning_rate": 6.767855142557804e-06,
      "loss": 0.0208,
      "step": 1001040
    },
    {
      "epoch": 1.6382566459155685,
      "grad_norm": 0.3007747232913971,
      "learning_rate": 6.767789250344288e-06,
      "loss": 0.0225,
      "step": 1001060
    },
    {
      "epoch": 1.638289376354222,
      "grad_norm": 0.6761678457260132,
      "learning_rate": 6.76772335813077e-06,
      "loss": 0.0168,
      "step": 1001080
    },
    {
      "epoch": 1.6383221067928753,
      "grad_norm": 0.7067053914070129,
      "learning_rate": 6.7676574659172534e-06,
      "loss": 0.0165,
      "step": 1001100
    },
    {
      "epoch": 1.6383548372315286,
      "grad_norm": 0.36858394742012024,
      "learning_rate": 6.767591573703735e-06,
      "loss": 0.0157,
      "step": 1001120
    },
    {
      "epoch": 1.6383875676701818,
      "grad_norm": 0.1746087372303009,
      "learning_rate": 6.767525681490219e-06,
      "loss": 0.0119,
      "step": 1001140
    },
    {
      "epoch": 1.6384202981088354,
      "grad_norm": 0.21960067749023438,
      "learning_rate": 6.767459789276702e-06,
      "loss": 0.0193,
      "step": 1001160
    },
    {
      "epoch": 1.6384530285474885,
      "grad_norm": 0.6672824025154114,
      "learning_rate": 6.767393897063184e-06,
      "loss": 0.0183,
      "step": 1001180
    },
    {
      "epoch": 1.638485758986142,
      "grad_norm": 0.3295226991176605,
      "learning_rate": 6.767328004849667e-06,
      "loss": 0.0303,
      "step": 1001200
    },
    {
      "epoch": 1.6385184894247953,
      "grad_norm": 0.35917404294013977,
      "learning_rate": 6.767262112636151e-06,
      "loss": 0.0182,
      "step": 1001220
    },
    {
      "epoch": 1.6385512198634486,
      "grad_norm": 0.5861576199531555,
      "learning_rate": 6.767196220422633e-06,
      "loss": 0.0129,
      "step": 1001240
    },
    {
      "epoch": 1.638583950302102,
      "grad_norm": 1.0506120920181274,
      "learning_rate": 6.767130328209116e-06,
      "loss": 0.0327,
      "step": 1001260
    },
    {
      "epoch": 1.6386166807407552,
      "grad_norm": 1.145964503288269,
      "learning_rate": 6.7670644359956e-06,
      "loss": 0.0308,
      "step": 1001280
    },
    {
      "epoch": 1.6386494111794088,
      "grad_norm": 0.8090051412582397,
      "learning_rate": 6.766998543782082e-06,
      "loss": 0.0211,
      "step": 1001300
    },
    {
      "epoch": 1.638682141618062,
      "grad_norm": 0.32187268137931824,
      "learning_rate": 6.766932651568565e-06,
      "loss": 0.0278,
      "step": 1001320
    },
    {
      "epoch": 1.6387148720567153,
      "grad_norm": 0.3414454162120819,
      "learning_rate": 6.766866759355047e-06,
      "loss": 0.0124,
      "step": 1001340
    },
    {
      "epoch": 1.6387476024953687,
      "grad_norm": 0.9345800876617432,
      "learning_rate": 6.766800867141531e-06,
      "loss": 0.018,
      "step": 1001360
    },
    {
      "epoch": 1.638780332934022,
      "grad_norm": 0.35757747292518616,
      "learning_rate": 6.7667349749280135e-06,
      "loss": 0.0237,
      "step": 1001380
    },
    {
      "epoch": 1.6388130633726754,
      "grad_norm": 0.5611022710800171,
      "learning_rate": 6.766669082714496e-06,
      "loss": 0.018,
      "step": 1001400
    },
    {
      "epoch": 1.6388457938113286,
      "grad_norm": 0.2772761881351471,
      "learning_rate": 6.766603190500979e-06,
      "loss": 0.0157,
      "step": 1001420
    },
    {
      "epoch": 1.6388785242499821,
      "grad_norm": 0.3900597393512726,
      "learning_rate": 6.7665372982874626e-06,
      "loss": 0.0205,
      "step": 1001440
    },
    {
      "epoch": 1.6389112546886353,
      "grad_norm": 0.6564790606498718,
      "learning_rate": 6.7664714060739445e-06,
      "loss": 0.0178,
      "step": 1001460
    },
    {
      "epoch": 1.6389439851272887,
      "grad_norm": 0.5491861701011658,
      "learning_rate": 6.766405513860428e-06,
      "loss": 0.0175,
      "step": 1001480
    },
    {
      "epoch": 1.638976715565942,
      "grad_norm": 0.3425552248954773,
      "learning_rate": 6.76633962164691e-06,
      "loss": 0.0142,
      "step": 1001500
    },
    {
      "epoch": 1.6390094460045952,
      "grad_norm": 0.38328051567077637,
      "learning_rate": 6.7662737294333935e-06,
      "loss": 0.0199,
      "step": 1001520
    },
    {
      "epoch": 1.6390421764432488,
      "grad_norm": 0.8404744267463684,
      "learning_rate": 6.766207837219875e-06,
      "loss": 0.0217,
      "step": 1001540
    },
    {
      "epoch": 1.639074906881902,
      "grad_norm": 0.8331803679466248,
      "learning_rate": 6.766141945006359e-06,
      "loss": 0.0174,
      "step": 1001560
    },
    {
      "epoch": 1.6391076373205555,
      "grad_norm": 0.42346644401550293,
      "learning_rate": 6.766076052792842e-06,
      "loss": 0.0223,
      "step": 1001580
    },
    {
      "epoch": 1.6391403677592087,
      "grad_norm": 0.6959187984466553,
      "learning_rate": 6.766010160579325e-06,
      "loss": 0.0227,
      "step": 1001600
    },
    {
      "epoch": 1.639173098197862,
      "grad_norm": 0.3545437753200531,
      "learning_rate": 6.765944268365808e-06,
      "loss": 0.017,
      "step": 1001620
    },
    {
      "epoch": 1.6392058286365154,
      "grad_norm": 0.7837790846824646,
      "learning_rate": 6.765878376152291e-06,
      "loss": 0.0245,
      "step": 1001640
    },
    {
      "epoch": 1.6392385590751686,
      "grad_norm": 0.8872047066688538,
      "learning_rate": 6.765812483938774e-06,
      "loss": 0.0159,
      "step": 1001660
    },
    {
      "epoch": 1.6392712895138222,
      "grad_norm": 0.2689286172389984,
      "learning_rate": 6.765746591725256e-06,
      "loss": 0.0125,
      "step": 1001680
    },
    {
      "epoch": 1.6393040199524753,
      "grad_norm": 0.7450563311576843,
      "learning_rate": 6.76568069951174e-06,
      "loss": 0.0166,
      "step": 1001700
    },
    {
      "epoch": 1.6393367503911287,
      "grad_norm": 4.695182800292969,
      "learning_rate": 6.765614807298222e-06,
      "loss": 0.0182,
      "step": 1001720
    },
    {
      "epoch": 1.639369480829782,
      "grad_norm": 0.16487357020378113,
      "learning_rate": 6.765548915084705e-06,
      "loss": 0.02,
      "step": 1001740
    },
    {
      "epoch": 1.6394022112684354,
      "grad_norm": 0.1332807093858719,
      "learning_rate": 6.765483022871187e-06,
      "loss": 0.0272,
      "step": 1001760
    },
    {
      "epoch": 1.6394349417070888,
      "grad_norm": 0.5065292119979858,
      "learning_rate": 6.765417130657671e-06,
      "loss": 0.0175,
      "step": 1001780
    },
    {
      "epoch": 1.639467672145742,
      "grad_norm": 0.4483826756477356,
      "learning_rate": 6.7653512384441536e-06,
      "loss": 0.0217,
      "step": 1001800
    },
    {
      "epoch": 1.6395004025843956,
      "grad_norm": 0.8809248805046082,
      "learning_rate": 6.765285346230636e-06,
      "loss": 0.0191,
      "step": 1001820
    },
    {
      "epoch": 1.6395331330230487,
      "grad_norm": 0.973800539970398,
      "learning_rate": 6.765219454017119e-06,
      "loss": 0.024,
      "step": 1001840
    },
    {
      "epoch": 1.639565863461702,
      "grad_norm": 0.6907384395599365,
      "learning_rate": 6.765153561803603e-06,
      "loss": 0.0235,
      "step": 1001860
    },
    {
      "epoch": 1.6395985939003554,
      "grad_norm": 0.3090091347694397,
      "learning_rate": 6.7650876695900845e-06,
      "loss": 0.0231,
      "step": 1001880
    },
    {
      "epoch": 1.6396313243390088,
      "grad_norm": 0.16755908727645874,
      "learning_rate": 6.765021777376568e-06,
      "loss": 0.0173,
      "step": 1001900
    },
    {
      "epoch": 1.6396640547776622,
      "grad_norm": 0.1604393869638443,
      "learning_rate": 6.76495588516305e-06,
      "loss": 0.0241,
      "step": 1001920
    },
    {
      "epoch": 1.6396967852163153,
      "grad_norm": 0.2550617754459381,
      "learning_rate": 6.764889992949534e-06,
      "loss": 0.0229,
      "step": 1001940
    },
    {
      "epoch": 1.639729515654969,
      "grad_norm": 0.11013764888048172,
      "learning_rate": 6.764824100736016e-06,
      "loss": 0.0173,
      "step": 1001960
    },
    {
      "epoch": 1.639762246093622,
      "grad_norm": 0.4482704997062683,
      "learning_rate": 6.764758208522499e-06,
      "loss": 0.021,
      "step": 1001980
    },
    {
      "epoch": 1.6397949765322755,
      "grad_norm": 0.5447509288787842,
      "learning_rate": 6.764692316308983e-06,
      "loss": 0.0217,
      "step": 1002000
    },
    {
      "epoch": 1.6398277069709288,
      "grad_norm": 0.2767833173274994,
      "learning_rate": 6.764626424095465e-06,
      "loss": 0.0147,
      "step": 1002020
    },
    {
      "epoch": 1.6398604374095822,
      "grad_norm": 0.1554087996482849,
      "learning_rate": 6.764560531881948e-06,
      "loss": 0.0205,
      "step": 1002040
    },
    {
      "epoch": 1.6398931678482356,
      "grad_norm": 0.5134094953536987,
      "learning_rate": 6.764494639668431e-06,
      "loss": 0.0133,
      "step": 1002060
    },
    {
      "epoch": 1.6399258982868887,
      "grad_norm": 0.10672587901353836,
      "learning_rate": 6.7644287474549145e-06,
      "loss": 0.0208,
      "step": 1002080
    },
    {
      "epoch": 1.6399586287255423,
      "grad_norm": 1.4533631801605225,
      "learning_rate": 6.764362855241396e-06,
      "loss": 0.0227,
      "step": 1002100
    },
    {
      "epoch": 1.6399913591641955,
      "grad_norm": 1.4066379070281982,
      "learning_rate": 6.76429696302788e-06,
      "loss": 0.0255,
      "step": 1002120
    },
    {
      "epoch": 1.6400240896028488,
      "grad_norm": 0.1681617647409439,
      "learning_rate": 6.764231070814362e-06,
      "loss": 0.0188,
      "step": 1002140
    },
    {
      "epoch": 1.6400568200415022,
      "grad_norm": 0.32699936628341675,
      "learning_rate": 6.7641651786008454e-06,
      "loss": 0.014,
      "step": 1002160
    },
    {
      "epoch": 1.6400895504801556,
      "grad_norm": 0.5365212559700012,
      "learning_rate": 6.764099286387328e-06,
      "loss": 0.0219,
      "step": 1002180
    },
    {
      "epoch": 1.640122280918809,
      "grad_norm": 0.7802034020423889,
      "learning_rate": 6.764033394173811e-06,
      "loss": 0.0185,
      "step": 1002200
    },
    {
      "epoch": 1.6401550113574621,
      "grad_norm": 0.16851378977298737,
      "learning_rate": 6.763967501960294e-06,
      "loss": 0.0184,
      "step": 1002220
    },
    {
      "epoch": 1.6401877417961157,
      "grad_norm": 0.17214246094226837,
      "learning_rate": 6.763901609746777e-06,
      "loss": 0.0165,
      "step": 1002240
    },
    {
      "epoch": 1.6402204722347689,
      "grad_norm": 1.3028573989868164,
      "learning_rate": 6.763835717533259e-06,
      "loss": 0.0244,
      "step": 1002260
    },
    {
      "epoch": 1.6402532026734222,
      "grad_norm": 0.26022595167160034,
      "learning_rate": 6.763769825319743e-06,
      "loss": 0.0198,
      "step": 1002280
    },
    {
      "epoch": 1.6402859331120756,
      "grad_norm": 0.621167778968811,
      "learning_rate": 6.763703933106225e-06,
      "loss": 0.0212,
      "step": 1002300
    },
    {
      "epoch": 1.6403186635507288,
      "grad_norm": 0.22712741792201996,
      "learning_rate": 6.763638040892708e-06,
      "loss": 0.0252,
      "step": 1002320
    },
    {
      "epoch": 1.6403513939893823,
      "grad_norm": 0.6936545372009277,
      "learning_rate": 6.763572148679192e-06,
      "loss": 0.0209,
      "step": 1002340
    },
    {
      "epoch": 1.6403841244280355,
      "grad_norm": 0.29429230093955994,
      "learning_rate": 6.763506256465674e-06,
      "loss": 0.0164,
      "step": 1002360
    },
    {
      "epoch": 1.640416854866689,
      "grad_norm": 1.045790672302246,
      "learning_rate": 6.763440364252157e-06,
      "loss": 0.0135,
      "step": 1002380
    },
    {
      "epoch": 1.6404495853053422,
      "grad_norm": 0.26617228984832764,
      "learning_rate": 6.76337447203864e-06,
      "loss": 0.0202,
      "step": 1002400
    },
    {
      "epoch": 1.6404823157439956,
      "grad_norm": 0.3602951765060425,
      "learning_rate": 6.763308579825123e-06,
      "loss": 0.0141,
      "step": 1002420
    },
    {
      "epoch": 1.640515046182649,
      "grad_norm": 0.2380615472793579,
      "learning_rate": 6.7632426876116055e-06,
      "loss": 0.0175,
      "step": 1002440
    },
    {
      "epoch": 1.6405477766213021,
      "grad_norm": 0.14429116249084473,
      "learning_rate": 6.763176795398089e-06,
      "loss": 0.0145,
      "step": 1002460
    },
    {
      "epoch": 1.6405805070599557,
      "grad_norm": 0.5921956300735474,
      "learning_rate": 6.763110903184571e-06,
      "loss": 0.0169,
      "step": 1002480
    },
    {
      "epoch": 1.6406132374986089,
      "grad_norm": 0.31988826394081116,
      "learning_rate": 6.7630450109710545e-06,
      "loss": 0.0131,
      "step": 1002500
    },
    {
      "epoch": 1.6406459679372623,
      "grad_norm": 0.34085750579833984,
      "learning_rate": 6.7629791187575364e-06,
      "loss": 0.0153,
      "step": 1002520
    },
    {
      "epoch": 1.6406786983759156,
      "grad_norm": 0.6646111607551575,
      "learning_rate": 6.76291322654402e-06,
      "loss": 0.0213,
      "step": 1002540
    },
    {
      "epoch": 1.640711428814569,
      "grad_norm": 0.33622708916664124,
      "learning_rate": 6.762847334330502e-06,
      "loss": 0.0221,
      "step": 1002560
    },
    {
      "epoch": 1.6407441592532224,
      "grad_norm": 0.63239985704422,
      "learning_rate": 6.7627814421169855e-06,
      "loss": 0.0209,
      "step": 1002580
    },
    {
      "epoch": 1.6407768896918755,
      "grad_norm": 0.9833825826644897,
      "learning_rate": 6.762715549903468e-06,
      "loss": 0.0184,
      "step": 1002600
    },
    {
      "epoch": 1.6408096201305291,
      "grad_norm": 0.5429216027259827,
      "learning_rate": 6.762649657689951e-06,
      "loss": 0.0222,
      "step": 1002620
    },
    {
      "epoch": 1.6408423505691823,
      "grad_norm": 0.6693713068962097,
      "learning_rate": 6.762583765476434e-06,
      "loss": 0.0288,
      "step": 1002640
    },
    {
      "epoch": 1.6408750810078356,
      "grad_norm": 0.81449294090271,
      "learning_rate": 6.762517873262917e-06,
      "loss": 0.0191,
      "step": 1002660
    },
    {
      "epoch": 1.640907811446489,
      "grad_norm": 1.0302612781524658,
      "learning_rate": 6.762451981049401e-06,
      "loss": 0.0229,
      "step": 1002680
    },
    {
      "epoch": 1.6409405418851424,
      "grad_norm": 0.7486801147460938,
      "learning_rate": 6.762386088835883e-06,
      "loss": 0.0175,
      "step": 1002700
    },
    {
      "epoch": 1.6409732723237958,
      "grad_norm": 0.3653184771537781,
      "learning_rate": 6.762320196622366e-06,
      "loss": 0.0208,
      "step": 1002720
    },
    {
      "epoch": 1.641006002762449,
      "grad_norm": 0.44818827509880066,
      "learning_rate": 6.762254304408848e-06,
      "loss": 0.016,
      "step": 1002740
    },
    {
      "epoch": 1.6410387332011025,
      "grad_norm": 0.6235606074333191,
      "learning_rate": 6.762188412195332e-06,
      "loss": 0.0165,
      "step": 1002760
    },
    {
      "epoch": 1.6410714636397556,
      "grad_norm": 0.4667278826236725,
      "learning_rate": 6.762122519981814e-06,
      "loss": 0.0334,
      "step": 1002780
    },
    {
      "epoch": 1.641104194078409,
      "grad_norm": 0.9964396953582764,
      "learning_rate": 6.762056627768297e-06,
      "loss": 0.0322,
      "step": 1002800
    },
    {
      "epoch": 1.6411369245170624,
      "grad_norm": 0.9509936571121216,
      "learning_rate": 6.76199073555478e-06,
      "loss": 0.0165,
      "step": 1002820
    },
    {
      "epoch": 1.6411696549557158,
      "grad_norm": 1.331714391708374,
      "learning_rate": 6.761924843341263e-06,
      "loss": 0.0213,
      "step": 1002840
    },
    {
      "epoch": 1.6412023853943691,
      "grad_norm": 0.6696261763572693,
      "learning_rate": 6.7618589511277456e-06,
      "loss": 0.0192,
      "step": 1002860
    },
    {
      "epoch": 1.6412351158330223,
      "grad_norm": 0.3166276812553406,
      "learning_rate": 6.761793058914229e-06,
      "loss": 0.0161,
      "step": 1002880
    },
    {
      "epoch": 1.6412678462716759,
      "grad_norm": 0.2426598221063614,
      "learning_rate": 6.761727166700711e-06,
      "loss": 0.0157,
      "step": 1002900
    },
    {
      "epoch": 1.641300576710329,
      "grad_norm": 0.8788745403289795,
      "learning_rate": 6.761661274487195e-06,
      "loss": 0.0201,
      "step": 1002920
    },
    {
      "epoch": 1.6413333071489824,
      "grad_norm": 0.18342728912830353,
      "learning_rate": 6.7615953822736765e-06,
      "loss": 0.0164,
      "step": 1002940
    },
    {
      "epoch": 1.6413660375876358,
      "grad_norm": 0.5323405861854553,
      "learning_rate": 6.76152949006016e-06,
      "loss": 0.0138,
      "step": 1002960
    },
    {
      "epoch": 1.641398768026289,
      "grad_norm": 0.20805132389068604,
      "learning_rate": 6.761463597846643e-06,
      "loss": 0.0218,
      "step": 1002980
    },
    {
      "epoch": 1.6414314984649425,
      "grad_norm": 0.7529011368751526,
      "learning_rate": 6.761397705633126e-06,
      "loss": 0.025,
      "step": 1003000
    },
    {
      "epoch": 1.6414642289035957,
      "grad_norm": 0.9241191148757935,
      "learning_rate": 6.761331813419608e-06,
      "loss": 0.0207,
      "step": 1003020
    },
    {
      "epoch": 1.6414969593422493,
      "grad_norm": 0.19930589199066162,
      "learning_rate": 6.761265921206092e-06,
      "loss": 0.0191,
      "step": 1003040
    },
    {
      "epoch": 1.6415296897809024,
      "grad_norm": 0.45958730578422546,
      "learning_rate": 6.761200028992575e-06,
      "loss": 0.0193,
      "step": 1003060
    },
    {
      "epoch": 1.6415624202195558,
      "grad_norm": 0.19043110311031342,
      "learning_rate": 6.761134136779057e-06,
      "loss": 0.0197,
      "step": 1003080
    },
    {
      "epoch": 1.6415951506582092,
      "grad_norm": 0.21239593625068665,
      "learning_rate": 6.761068244565541e-06,
      "loss": 0.0146,
      "step": 1003100
    },
    {
      "epoch": 1.6416278810968623,
      "grad_norm": 0.5437114238739014,
      "learning_rate": 6.761002352352023e-06,
      "loss": 0.0232,
      "step": 1003120
    },
    {
      "epoch": 1.641660611535516,
      "grad_norm": 0.5844451189041138,
      "learning_rate": 6.7609364601385065e-06,
      "loss": 0.0221,
      "step": 1003140
    },
    {
      "epoch": 1.641693341974169,
      "grad_norm": 0.6202999353408813,
      "learning_rate": 6.760870567924988e-06,
      "loss": 0.0236,
      "step": 1003160
    },
    {
      "epoch": 1.6417260724128224,
      "grad_norm": 0.09885870665311813,
      "learning_rate": 6.760804675711472e-06,
      "loss": 0.0154,
      "step": 1003180
    },
    {
      "epoch": 1.6417588028514758,
      "grad_norm": 0.6510496735572815,
      "learning_rate": 6.760738783497955e-06,
      "loss": 0.0277,
      "step": 1003200
    },
    {
      "epoch": 1.6417915332901292,
      "grad_norm": 0.6481500267982483,
      "learning_rate": 6.760672891284437e-06,
      "loss": 0.0257,
      "step": 1003220
    },
    {
      "epoch": 1.6418242637287825,
      "grad_norm": 0.197431281208992,
      "learning_rate": 6.76060699907092e-06,
      "loss": 0.0242,
      "step": 1003240
    },
    {
      "epoch": 1.6418569941674357,
      "grad_norm": 1.0494918823242188,
      "learning_rate": 6.760541106857404e-06,
      "loss": 0.0238,
      "step": 1003260
    },
    {
      "epoch": 1.6418897246060893,
      "grad_norm": 0.746231734752655,
      "learning_rate": 6.760475214643886e-06,
      "loss": 0.0198,
      "step": 1003280
    },
    {
      "epoch": 1.6419224550447424,
      "grad_norm": 0.46218404173851013,
      "learning_rate": 6.760409322430369e-06,
      "loss": 0.0182,
      "step": 1003300
    },
    {
      "epoch": 1.6419551854833958,
      "grad_norm": 1.7484415769577026,
      "learning_rate": 6.760343430216851e-06,
      "loss": 0.0244,
      "step": 1003320
    },
    {
      "epoch": 1.6419879159220492,
      "grad_norm": 1.7518945932388306,
      "learning_rate": 6.760277538003335e-06,
      "loss": 0.0259,
      "step": 1003340
    },
    {
      "epoch": 1.6420206463607026,
      "grad_norm": 2.5170512199401855,
      "learning_rate": 6.7602116457898174e-06,
      "loss": 0.0167,
      "step": 1003360
    },
    {
      "epoch": 1.642053376799356,
      "grad_norm": 0.442016065120697,
      "learning_rate": 6.7601457535763e-06,
      "loss": 0.0205,
      "step": 1003380
    },
    {
      "epoch": 1.642086107238009,
      "grad_norm": 0.6604006290435791,
      "learning_rate": 6.760079861362784e-06,
      "loss": 0.0229,
      "step": 1003400
    },
    {
      "epoch": 1.6421188376766627,
      "grad_norm": 0.5741621255874634,
      "learning_rate": 6.7600139691492665e-06,
      "loss": 0.0207,
      "step": 1003420
    },
    {
      "epoch": 1.6421515681153158,
      "grad_norm": 0.4159613251686096,
      "learning_rate": 6.759948076935749e-06,
      "loss": 0.0172,
      "step": 1003440
    },
    {
      "epoch": 1.6421842985539692,
      "grad_norm": 1.9067788124084473,
      "learning_rate": 6.759882184722232e-06,
      "loss": 0.0265,
      "step": 1003460
    },
    {
      "epoch": 1.6422170289926226,
      "grad_norm": 0.5989028811454773,
      "learning_rate": 6.7598162925087156e-06,
      "loss": 0.021,
      "step": 1003480
    },
    {
      "epoch": 1.642249759431276,
      "grad_norm": 0.30379268527030945,
      "learning_rate": 6.7597504002951975e-06,
      "loss": 0.0219,
      "step": 1003500
    },
    {
      "epoch": 1.6422824898699293,
      "grad_norm": 0.17650893330574036,
      "learning_rate": 6.759684508081681e-06,
      "loss": 0.0207,
      "step": 1003520
    },
    {
      "epoch": 1.6423152203085825,
      "grad_norm": 1.2621376514434814,
      "learning_rate": 6.759618615868163e-06,
      "loss": 0.0229,
      "step": 1003540
    },
    {
      "epoch": 1.642347950747236,
      "grad_norm": 0.29477909207344055,
      "learning_rate": 6.7595527236546465e-06,
      "loss": 0.0306,
      "step": 1003560
    },
    {
      "epoch": 1.6423806811858892,
      "grad_norm": 0.935093104839325,
      "learning_rate": 6.7594868314411284e-06,
      "loss": 0.0191,
      "step": 1003580
    },
    {
      "epoch": 1.6424134116245426,
      "grad_norm": 0.3671172559261322,
      "learning_rate": 6.759420939227612e-06,
      "loss": 0.016,
      "step": 1003600
    },
    {
      "epoch": 1.642446142063196,
      "grad_norm": 0.24551375210285187,
      "learning_rate": 6.759355047014095e-06,
      "loss": 0.0157,
      "step": 1003620
    },
    {
      "epoch": 1.6424788725018493,
      "grad_norm": 1.263450264930725,
      "learning_rate": 6.7592891548005775e-06,
      "loss": 0.0162,
      "step": 1003640
    },
    {
      "epoch": 1.6425116029405027,
      "grad_norm": 0.5559632778167725,
      "learning_rate": 6.75922326258706e-06,
      "loss": 0.0259,
      "step": 1003660
    },
    {
      "epoch": 1.6425443333791558,
      "grad_norm": 2.56809663772583,
      "learning_rate": 6.759157370373544e-06,
      "loss": 0.0172,
      "step": 1003680
    },
    {
      "epoch": 1.6425770638178094,
      "grad_norm": 0.6175313591957092,
      "learning_rate": 6.759091478160026e-06,
      "loss": 0.0166,
      "step": 1003700
    },
    {
      "epoch": 1.6426097942564626,
      "grad_norm": 0.6589736938476562,
      "learning_rate": 6.759025585946509e-06,
      "loss": 0.0167,
      "step": 1003720
    },
    {
      "epoch": 1.642642524695116,
      "grad_norm": 0.4345945715904236,
      "learning_rate": 6.758959693732993e-06,
      "loss": 0.0201,
      "step": 1003740
    },
    {
      "epoch": 1.6426752551337693,
      "grad_norm": 0.35912707448005676,
      "learning_rate": 6.758893801519475e-06,
      "loss": 0.0221,
      "step": 1003760
    },
    {
      "epoch": 1.6427079855724225,
      "grad_norm": 1.1385574340820312,
      "learning_rate": 6.758827909305958e-06,
      "loss": 0.0171,
      "step": 1003780
    },
    {
      "epoch": 1.642740716011076,
      "grad_norm": 1.0782932043075562,
      "learning_rate": 6.75876201709244e-06,
      "loss": 0.0213,
      "step": 1003800
    },
    {
      "epoch": 1.6427734464497292,
      "grad_norm": 0.5677118301391602,
      "learning_rate": 6.758696124878924e-06,
      "loss": 0.0125,
      "step": 1003820
    },
    {
      "epoch": 1.6428061768883828,
      "grad_norm": 0.1479398012161255,
      "learning_rate": 6.758630232665407e-06,
      "loss": 0.0129,
      "step": 1003840
    },
    {
      "epoch": 1.642838907327036,
      "grad_norm": 0.20949265360832214,
      "learning_rate": 6.758564340451889e-06,
      "loss": 0.0147,
      "step": 1003860
    },
    {
      "epoch": 1.6428716377656893,
      "grad_norm": 1.539228916168213,
      "learning_rate": 6.758498448238372e-06,
      "loss": 0.0185,
      "step": 1003880
    },
    {
      "epoch": 1.6429043682043427,
      "grad_norm": 1.5621365308761597,
      "learning_rate": 6.758432556024856e-06,
      "loss": 0.0204,
      "step": 1003900
    },
    {
      "epoch": 1.6429370986429959,
      "grad_norm": 1.4835513830184937,
      "learning_rate": 6.7583666638113375e-06,
      "loss": 0.0217,
      "step": 1003920
    },
    {
      "epoch": 1.6429698290816495,
      "grad_norm": 0.7948142886161804,
      "learning_rate": 6.758300771597821e-06,
      "loss": 0.0167,
      "step": 1003940
    },
    {
      "epoch": 1.6430025595203026,
      "grad_norm": 0.15703271329402924,
      "learning_rate": 6.758234879384303e-06,
      "loss": 0.0254,
      "step": 1003960
    },
    {
      "epoch": 1.643035289958956,
      "grad_norm": 0.16580913960933685,
      "learning_rate": 6.758168987170787e-06,
      "loss": 0.0202,
      "step": 1003980
    },
    {
      "epoch": 1.6430680203976094,
      "grad_norm": 0.26943713426589966,
      "learning_rate": 6.758103094957269e-06,
      "loss": 0.0146,
      "step": 1004000
    },
    {
      "epoch": 1.6431007508362627,
      "grad_norm": 0.7183195948600769,
      "learning_rate": 6.758037202743752e-06,
      "loss": 0.0245,
      "step": 1004020
    },
    {
      "epoch": 1.643133481274916,
      "grad_norm": 0.4386225640773773,
      "learning_rate": 6.757971310530235e-06,
      "loss": 0.0189,
      "step": 1004040
    },
    {
      "epoch": 1.6431662117135692,
      "grad_norm": 1.235066294670105,
      "learning_rate": 6.757905418316718e-06,
      "loss": 0.0186,
      "step": 1004060
    },
    {
      "epoch": 1.6431989421522228,
      "grad_norm": 0.37492626905441284,
      "learning_rate": 6.757839526103201e-06,
      "loss": 0.0263,
      "step": 1004080
    },
    {
      "epoch": 1.643231672590876,
      "grad_norm": 0.8309088349342346,
      "learning_rate": 6.757773633889684e-06,
      "loss": 0.0227,
      "step": 1004100
    },
    {
      "epoch": 1.6432644030295294,
      "grad_norm": 0.15272369980812073,
      "learning_rate": 6.7577077416761675e-06,
      "loss": 0.0165,
      "step": 1004120
    },
    {
      "epoch": 1.6432971334681827,
      "grad_norm": 0.7943468689918518,
      "learning_rate": 6.757641849462649e-06,
      "loss": 0.0235,
      "step": 1004140
    },
    {
      "epoch": 1.643329863906836,
      "grad_norm": 0.31119808554649353,
      "learning_rate": 6.757575957249133e-06,
      "loss": 0.0148,
      "step": 1004160
    },
    {
      "epoch": 1.6433625943454895,
      "grad_norm": 0.33318597078323364,
      "learning_rate": 6.757510065035615e-06,
      "loss": 0.0194,
      "step": 1004180
    },
    {
      "epoch": 1.6433953247841426,
      "grad_norm": 0.24221056699752808,
      "learning_rate": 6.7574441728220984e-06,
      "loss": 0.0221,
      "step": 1004200
    },
    {
      "epoch": 1.6434280552227962,
      "grad_norm": 0.7314555644989014,
      "learning_rate": 6.757378280608581e-06,
      "loss": 0.0224,
      "step": 1004220
    },
    {
      "epoch": 1.6434607856614494,
      "grad_norm": 0.4025692641735077,
      "learning_rate": 6.757312388395064e-06,
      "loss": 0.0147,
      "step": 1004240
    },
    {
      "epoch": 1.6434935161001027,
      "grad_norm": 0.5864989757537842,
      "learning_rate": 6.757246496181547e-06,
      "loss": 0.0236,
      "step": 1004260
    },
    {
      "epoch": 1.6435262465387561,
      "grad_norm": 0.48161107301712036,
      "learning_rate": 6.75718060396803e-06,
      "loss": 0.0185,
      "step": 1004280
    },
    {
      "epoch": 1.6435589769774095,
      "grad_norm": 1.2064181566238403,
      "learning_rate": 6.757114711754512e-06,
      "loss": 0.019,
      "step": 1004300
    },
    {
      "epoch": 1.6435917074160629,
      "grad_norm": 0.5164520740509033,
      "learning_rate": 6.757048819540996e-06,
      "loss": 0.0202,
      "step": 1004320
    },
    {
      "epoch": 1.643624437854716,
      "grad_norm": 0.7622706294059753,
      "learning_rate": 6.756982927327478e-06,
      "loss": 0.0231,
      "step": 1004340
    },
    {
      "epoch": 1.6436571682933696,
      "grad_norm": 0.25644537806510925,
      "learning_rate": 6.756917035113961e-06,
      "loss": 0.0206,
      "step": 1004360
    },
    {
      "epoch": 1.6436898987320228,
      "grad_norm": 0.1924440562725067,
      "learning_rate": 6.756851142900444e-06,
      "loss": 0.0164,
      "step": 1004380
    },
    {
      "epoch": 1.6437226291706761,
      "grad_norm": 0.2900366187095642,
      "learning_rate": 6.756785250686927e-06,
      "loss": 0.0166,
      "step": 1004400
    },
    {
      "epoch": 1.6437553596093295,
      "grad_norm": 1.9459638595581055,
      "learning_rate": 6.756719358473409e-06,
      "loss": 0.02,
      "step": 1004420
    },
    {
      "epoch": 1.6437880900479829,
      "grad_norm": 0.6892768144607544,
      "learning_rate": 6.756653466259893e-06,
      "loss": 0.0235,
      "step": 1004440
    },
    {
      "epoch": 1.6438208204866362,
      "grad_norm": 0.5394024848937988,
      "learning_rate": 6.756587574046376e-06,
      "loss": 0.0201,
      "step": 1004460
    },
    {
      "epoch": 1.6438535509252894,
      "grad_norm": 0.31791427731513977,
      "learning_rate": 6.7565216818328585e-06,
      "loss": 0.0137,
      "step": 1004480
    },
    {
      "epoch": 1.643886281363943,
      "grad_norm": 0.2919921875,
      "learning_rate": 6.756455789619342e-06,
      "loss": 0.0118,
      "step": 1004500
    },
    {
      "epoch": 1.6439190118025961,
      "grad_norm": 1.0754594802856445,
      "learning_rate": 6.756389897405824e-06,
      "loss": 0.0165,
      "step": 1004520
    },
    {
      "epoch": 1.6439517422412495,
      "grad_norm": 0.38973361253738403,
      "learning_rate": 6.7563240051923076e-06,
      "loss": 0.0167,
      "step": 1004540
    },
    {
      "epoch": 1.6439844726799029,
      "grad_norm": 1.0346571207046509,
      "learning_rate": 6.7562581129787894e-06,
      "loss": 0.0169,
      "step": 1004560
    },
    {
      "epoch": 1.644017203118556,
      "grad_norm": 1.328069806098938,
      "learning_rate": 6.756192220765273e-06,
      "loss": 0.0191,
      "step": 1004580
    },
    {
      "epoch": 1.6440499335572096,
      "grad_norm": 0.9921166300773621,
      "learning_rate": 6.756126328551755e-06,
      "loss": 0.0186,
      "step": 1004600
    },
    {
      "epoch": 1.6440826639958628,
      "grad_norm": 0.6454060673713684,
      "learning_rate": 6.7560604363382385e-06,
      "loss": 0.0115,
      "step": 1004620
    },
    {
      "epoch": 1.6441153944345164,
      "grad_norm": 0.6276937127113342,
      "learning_rate": 6.755994544124721e-06,
      "loss": 0.0174,
      "step": 1004640
    },
    {
      "epoch": 1.6441481248731695,
      "grad_norm": 0.612880289554596,
      "learning_rate": 6.755928651911204e-06,
      "loss": 0.0212,
      "step": 1004660
    },
    {
      "epoch": 1.644180855311823,
      "grad_norm": 0.31476128101348877,
      "learning_rate": 6.755862759697687e-06,
      "loss": 0.0143,
      "step": 1004680
    },
    {
      "epoch": 1.6442135857504763,
      "grad_norm": 0.1646786779165268,
      "learning_rate": 6.75579686748417e-06,
      "loss": 0.0258,
      "step": 1004700
    },
    {
      "epoch": 1.6442463161891294,
      "grad_norm": 0.7353402376174927,
      "learning_rate": 6.755730975270652e-06,
      "loss": 0.0246,
      "step": 1004720
    },
    {
      "epoch": 1.644279046627783,
      "grad_norm": 0.16304847598075867,
      "learning_rate": 6.755665083057136e-06,
      "loss": 0.0243,
      "step": 1004740
    },
    {
      "epoch": 1.6443117770664362,
      "grad_norm": 0.23339907824993134,
      "learning_rate": 6.755599190843618e-06,
      "loss": 0.0185,
      "step": 1004760
    },
    {
      "epoch": 1.6443445075050895,
      "grad_norm": 0.4434942603111267,
      "learning_rate": 6.755533298630101e-06,
      "loss": 0.028,
      "step": 1004780
    },
    {
      "epoch": 1.644377237943743,
      "grad_norm": 0.34552672505378723,
      "learning_rate": 6.755467406416585e-06,
      "loss": 0.0169,
      "step": 1004800
    },
    {
      "epoch": 1.6444099683823963,
      "grad_norm": 2.8876335620880127,
      "learning_rate": 6.755401514203067e-06,
      "loss": 0.0212,
      "step": 1004820
    },
    {
      "epoch": 1.6444426988210497,
      "grad_norm": 0.46627965569496155,
      "learning_rate": 6.75533562198955e-06,
      "loss": 0.0197,
      "step": 1004840
    },
    {
      "epoch": 1.6444754292597028,
      "grad_norm": 0.2868686318397522,
      "learning_rate": 6.755269729776033e-06,
      "loss": 0.021,
      "step": 1004860
    },
    {
      "epoch": 1.6445081596983564,
      "grad_norm": 0.14467938244342804,
      "learning_rate": 6.755203837562516e-06,
      "loss": 0.0219,
      "step": 1004880
    },
    {
      "epoch": 1.6445408901370095,
      "grad_norm": 0.4957297742366791,
      "learning_rate": 6.7551379453489986e-06,
      "loss": 0.027,
      "step": 1004900
    },
    {
      "epoch": 1.644573620575663,
      "grad_norm": 0.8382090926170349,
      "learning_rate": 6.755072053135482e-06,
      "loss": 0.0165,
      "step": 1004920
    },
    {
      "epoch": 1.6446063510143163,
      "grad_norm": 0.23487496376037598,
      "learning_rate": 6.755006160921964e-06,
      "loss": 0.0279,
      "step": 1004940
    },
    {
      "epoch": 1.6446390814529697,
      "grad_norm": 0.33453989028930664,
      "learning_rate": 6.754940268708448e-06,
      "loss": 0.0194,
      "step": 1004960
    },
    {
      "epoch": 1.644671811891623,
      "grad_norm": 0.5937385559082031,
      "learning_rate": 6.7548743764949295e-06,
      "loss": 0.016,
      "step": 1004980
    },
    {
      "epoch": 1.6447045423302762,
      "grad_norm": 0.19545355439186096,
      "learning_rate": 6.754808484281413e-06,
      "loss": 0.0167,
      "step": 1005000
    },
    {
      "epoch": 1.6447372727689298,
      "grad_norm": 0.8659201860427856,
      "learning_rate": 6.754742592067896e-06,
      "loss": 0.024,
      "step": 1005020
    },
    {
      "epoch": 1.644770003207583,
      "grad_norm": 0.2306482344865799,
      "learning_rate": 6.754676699854379e-06,
      "loss": 0.0164,
      "step": 1005040
    },
    {
      "epoch": 1.6448027336462363,
      "grad_norm": 1.3452911376953125,
      "learning_rate": 6.754610807640861e-06,
      "loss": 0.013,
      "step": 1005060
    },
    {
      "epoch": 1.6448354640848897,
      "grad_norm": 0.6427509784698486,
      "learning_rate": 6.754544915427345e-06,
      "loss": 0.0182,
      "step": 1005080
    },
    {
      "epoch": 1.644868194523543,
      "grad_norm": 0.3631153106689453,
      "learning_rate": 6.754479023213827e-06,
      "loss": 0.0284,
      "step": 1005100
    },
    {
      "epoch": 1.6449009249621964,
      "grad_norm": 0.26797938346862793,
      "learning_rate": 6.75441313100031e-06,
      "loss": 0.0158,
      "step": 1005120
    },
    {
      "epoch": 1.6449336554008496,
      "grad_norm": 0.27823448181152344,
      "learning_rate": 6.754347238786794e-06,
      "loss": 0.0206,
      "step": 1005140
    },
    {
      "epoch": 1.6449663858395032,
      "grad_norm": 1.5845080614089966,
      "learning_rate": 6.754281346573276e-06,
      "loss": 0.0177,
      "step": 1005160
    },
    {
      "epoch": 1.6449991162781563,
      "grad_norm": 1.6049084663391113,
      "learning_rate": 6.7542154543597595e-06,
      "loss": 0.018,
      "step": 1005180
    },
    {
      "epoch": 1.6450318467168097,
      "grad_norm": 0.6081281304359436,
      "learning_rate": 6.754149562146241e-06,
      "loss": 0.0178,
      "step": 1005200
    },
    {
      "epoch": 1.645064577155463,
      "grad_norm": 0.299104779958725,
      "learning_rate": 6.754083669932725e-06,
      "loss": 0.0183,
      "step": 1005220
    },
    {
      "epoch": 1.6450973075941164,
      "grad_norm": 0.42460891604423523,
      "learning_rate": 6.754017777719208e-06,
      "loss": 0.0133,
      "step": 1005240
    },
    {
      "epoch": 1.6451300380327698,
      "grad_norm": 0.6989224553108215,
      "learning_rate": 6.75395188550569e-06,
      "loss": 0.0248,
      "step": 1005260
    },
    {
      "epoch": 1.645162768471423,
      "grad_norm": 0.8332599401473999,
      "learning_rate": 6.753885993292173e-06,
      "loss": 0.0139,
      "step": 1005280
    },
    {
      "epoch": 1.6451954989100765,
      "grad_norm": 2.428715467453003,
      "learning_rate": 6.753820101078657e-06,
      "loss": 0.0217,
      "step": 1005300
    },
    {
      "epoch": 1.6452282293487297,
      "grad_norm": 2.1031031608581543,
      "learning_rate": 6.753754208865139e-06,
      "loss": 0.0157,
      "step": 1005320
    },
    {
      "epoch": 1.645260959787383,
      "grad_norm": 0.24357634782791138,
      "learning_rate": 6.753688316651622e-06,
      "loss": 0.0184,
      "step": 1005340
    },
    {
      "epoch": 1.6452936902260364,
      "grad_norm": 0.6912030577659607,
      "learning_rate": 6.753622424438104e-06,
      "loss": 0.0172,
      "step": 1005360
    },
    {
      "epoch": 1.6453264206646896,
      "grad_norm": 0.6789406538009644,
      "learning_rate": 6.753556532224588e-06,
      "loss": 0.0169,
      "step": 1005380
    },
    {
      "epoch": 1.6453591511033432,
      "grad_norm": 0.4240339398384094,
      "learning_rate": 6.75349064001107e-06,
      "loss": 0.0138,
      "step": 1005400
    },
    {
      "epoch": 1.6453918815419963,
      "grad_norm": 1.261289119720459,
      "learning_rate": 6.753424747797553e-06,
      "loss": 0.0167,
      "step": 1005420
    },
    {
      "epoch": 1.6454246119806497,
      "grad_norm": 0.3421441614627838,
      "learning_rate": 6.753358855584036e-06,
      "loss": 0.021,
      "step": 1005440
    },
    {
      "epoch": 1.645457342419303,
      "grad_norm": 0.9572590589523315,
      "learning_rate": 6.7532929633705195e-06,
      "loss": 0.0306,
      "step": 1005460
    },
    {
      "epoch": 1.6454900728579565,
      "grad_norm": 0.5586603283882141,
      "learning_rate": 6.753227071157001e-06,
      "loss": 0.0151,
      "step": 1005480
    },
    {
      "epoch": 1.6455228032966098,
      "grad_norm": 0.4492010772228241,
      "learning_rate": 6.753161178943485e-06,
      "loss": 0.0253,
      "step": 1005500
    },
    {
      "epoch": 1.645555533735263,
      "grad_norm": 1.3617361783981323,
      "learning_rate": 6.753095286729969e-06,
      "loss": 0.019,
      "step": 1005520
    },
    {
      "epoch": 1.6455882641739166,
      "grad_norm": 0.3954014182090759,
      "learning_rate": 6.7530293945164505e-06,
      "loss": 0.0282,
      "step": 1005540
    },
    {
      "epoch": 1.6456209946125697,
      "grad_norm": 0.5520201325416565,
      "learning_rate": 6.752963502302934e-06,
      "loss": 0.0186,
      "step": 1005560
    },
    {
      "epoch": 1.645653725051223,
      "grad_norm": 0.35592737793922424,
      "learning_rate": 6.752897610089416e-06,
      "loss": 0.0103,
      "step": 1005580
    },
    {
      "epoch": 1.6456864554898765,
      "grad_norm": 1.5386862754821777,
      "learning_rate": 6.7528317178758995e-06,
      "loss": 0.0195,
      "step": 1005600
    },
    {
      "epoch": 1.6457191859285298,
      "grad_norm": 0.35705095529556274,
      "learning_rate": 6.7527658256623814e-06,
      "loss": 0.0176,
      "step": 1005620
    },
    {
      "epoch": 1.6457519163671832,
      "grad_norm": 0.17118383944034576,
      "learning_rate": 6.752699933448865e-06,
      "loss": 0.0211,
      "step": 1005640
    },
    {
      "epoch": 1.6457846468058364,
      "grad_norm": 0.18251991271972656,
      "learning_rate": 6.752634041235348e-06,
      "loss": 0.0241,
      "step": 1005660
    },
    {
      "epoch": 1.64581737724449,
      "grad_norm": 0.0486212819814682,
      "learning_rate": 6.7525681490218305e-06,
      "loss": 0.0183,
      "step": 1005680
    },
    {
      "epoch": 1.645850107683143,
      "grad_norm": 2.5177369117736816,
      "learning_rate": 6.752502256808313e-06,
      "loss": 0.019,
      "step": 1005700
    },
    {
      "epoch": 1.6458828381217965,
      "grad_norm": 0.2929501235485077,
      "learning_rate": 6.752436364594797e-06,
      "loss": 0.0209,
      "step": 1005720
    },
    {
      "epoch": 1.6459155685604498,
      "grad_norm": 0.3551347553730011,
      "learning_rate": 6.752370472381279e-06,
      "loss": 0.0238,
      "step": 1005740
    },
    {
      "epoch": 1.6459482989991032,
      "grad_norm": 2.2631685733795166,
      "learning_rate": 6.752304580167762e-06,
      "loss": 0.0228,
      "step": 1005760
    },
    {
      "epoch": 1.6459810294377566,
      "grad_norm": 0.12355519831180573,
      "learning_rate": 6.752238687954244e-06,
      "loss": 0.0194,
      "step": 1005780
    },
    {
      "epoch": 1.6460137598764097,
      "grad_norm": 3.035588502883911,
      "learning_rate": 6.752172795740728e-06,
      "loss": 0.0189,
      "step": 1005800
    },
    {
      "epoch": 1.6460464903150633,
      "grad_norm": 0.1625019609928131,
      "learning_rate": 6.7521069035272105e-06,
      "loss": 0.0139,
      "step": 1005820
    },
    {
      "epoch": 1.6460792207537165,
      "grad_norm": 0.7122662663459778,
      "learning_rate": 6.752041011313693e-06,
      "loss": 0.0195,
      "step": 1005840
    },
    {
      "epoch": 1.6461119511923699,
      "grad_norm": 0.2375703752040863,
      "learning_rate": 6.751975119100177e-06,
      "loss": 0.0186,
      "step": 1005860
    },
    {
      "epoch": 1.6461446816310232,
      "grad_norm": 0.7609575986862183,
      "learning_rate": 6.75190922688666e-06,
      "loss": 0.0154,
      "step": 1005880
    },
    {
      "epoch": 1.6461774120696766,
      "grad_norm": 1.4898216724395752,
      "learning_rate": 6.751843334673142e-06,
      "loss": 0.0184,
      "step": 1005900
    },
    {
      "epoch": 1.64621014250833,
      "grad_norm": 0.7192502021789551,
      "learning_rate": 6.751777442459625e-06,
      "loss": 0.0153,
      "step": 1005920
    },
    {
      "epoch": 1.6462428729469831,
      "grad_norm": 0.2010928839445114,
      "learning_rate": 6.751711550246109e-06,
      "loss": 0.0187,
      "step": 1005940
    },
    {
      "epoch": 1.6462756033856367,
      "grad_norm": 0.282298743724823,
      "learning_rate": 6.7516456580325905e-06,
      "loss": 0.0225,
      "step": 1005960
    },
    {
      "epoch": 1.6463083338242899,
      "grad_norm": 1.2381019592285156,
      "learning_rate": 6.751579765819074e-06,
      "loss": 0.0256,
      "step": 1005980
    },
    {
      "epoch": 1.6463410642629432,
      "grad_norm": 0.6485795974731445,
      "learning_rate": 6.751513873605556e-06,
      "loss": 0.0248,
      "step": 1006000
    },
    {
      "epoch": 1.6463737947015966,
      "grad_norm": 0.17516209185123444,
      "learning_rate": 6.75144798139204e-06,
      "loss": 0.0165,
      "step": 1006020
    },
    {
      "epoch": 1.6464065251402498,
      "grad_norm": 0.4339176416397095,
      "learning_rate": 6.751382089178522e-06,
      "loss": 0.0151,
      "step": 1006040
    },
    {
      "epoch": 1.6464392555789034,
      "grad_norm": 0.3860377371311188,
      "learning_rate": 6.751316196965005e-06,
      "loss": 0.0165,
      "step": 1006060
    },
    {
      "epoch": 1.6464719860175565,
      "grad_norm": 0.31779614090919495,
      "learning_rate": 6.751250304751488e-06,
      "loss": 0.0173,
      "step": 1006080
    },
    {
      "epoch": 1.64650471645621,
      "grad_norm": 0.362732470035553,
      "learning_rate": 6.751184412537971e-06,
      "loss": 0.0175,
      "step": 1006100
    },
    {
      "epoch": 1.6465374468948633,
      "grad_norm": 0.4474288821220398,
      "learning_rate": 6.751118520324453e-06,
      "loss": 0.0158,
      "step": 1006120
    },
    {
      "epoch": 1.6465701773335166,
      "grad_norm": 0.5432860255241394,
      "learning_rate": 6.751052628110937e-06,
      "loss": 0.0216,
      "step": 1006140
    },
    {
      "epoch": 1.64660290777217,
      "grad_norm": 0.735323965549469,
      "learning_rate": 6.750986735897419e-06,
      "loss": 0.0184,
      "step": 1006160
    },
    {
      "epoch": 1.6466356382108231,
      "grad_norm": 0.6293541193008423,
      "learning_rate": 6.750920843683902e-06,
      "loss": 0.0228,
      "step": 1006180
    },
    {
      "epoch": 1.6466683686494767,
      "grad_norm": 0.1524404138326645,
      "learning_rate": 6.750854951470386e-06,
      "loss": 0.0246,
      "step": 1006200
    },
    {
      "epoch": 1.64670109908813,
      "grad_norm": 0.3536692261695862,
      "learning_rate": 6.750789059256868e-06,
      "loss": 0.0251,
      "step": 1006220
    },
    {
      "epoch": 1.6467338295267833,
      "grad_norm": 1.067528247833252,
      "learning_rate": 6.7507231670433514e-06,
      "loss": 0.0191,
      "step": 1006240
    },
    {
      "epoch": 1.6467665599654366,
      "grad_norm": 0.6484874486923218,
      "learning_rate": 6.750657274829834e-06,
      "loss": 0.0163,
      "step": 1006260
    },
    {
      "epoch": 1.64679929040409,
      "grad_norm": 0.23253901302814484,
      "learning_rate": 6.750591382616317e-06,
      "loss": 0.0224,
      "step": 1006280
    },
    {
      "epoch": 1.6468320208427434,
      "grad_norm": 0.6681643128395081,
      "learning_rate": 6.7505254904028e-06,
      "loss": 0.0155,
      "step": 1006300
    },
    {
      "epoch": 1.6468647512813965,
      "grad_norm": 0.23822130262851715,
      "learning_rate": 6.750459598189283e-06,
      "loss": 0.017,
      "step": 1006320
    },
    {
      "epoch": 1.6468974817200501,
      "grad_norm": 0.6565128564834595,
      "learning_rate": 6.750393705975765e-06,
      "loss": 0.0217,
      "step": 1006340
    },
    {
      "epoch": 1.6469302121587033,
      "grad_norm": 1.443424105644226,
      "learning_rate": 6.750327813762249e-06,
      "loss": 0.0257,
      "step": 1006360
    },
    {
      "epoch": 1.6469629425973566,
      "grad_norm": 0.29256027936935425,
      "learning_rate": 6.750261921548731e-06,
      "loss": 0.0176,
      "step": 1006380
    },
    {
      "epoch": 1.64699567303601,
      "grad_norm": 0.8105043768882751,
      "learning_rate": 6.750196029335214e-06,
      "loss": 0.0168,
      "step": 1006400
    },
    {
      "epoch": 1.6470284034746634,
      "grad_norm": 0.6380283236503601,
      "learning_rate": 6.750130137121696e-06,
      "loss": 0.0192,
      "step": 1006420
    },
    {
      "epoch": 1.6470611339133168,
      "grad_norm": 0.20558100938796997,
      "learning_rate": 6.75006424490818e-06,
      "loss": 0.0182,
      "step": 1006440
    },
    {
      "epoch": 1.64709386435197,
      "grad_norm": 0.935662567615509,
      "learning_rate": 6.7499983526946624e-06,
      "loss": 0.0241,
      "step": 1006460
    },
    {
      "epoch": 1.6471265947906235,
      "grad_norm": 1.387151837348938,
      "learning_rate": 6.749932460481145e-06,
      "loss": 0.0181,
      "step": 1006480
    },
    {
      "epoch": 1.6471593252292767,
      "grad_norm": 0.7324629426002502,
      "learning_rate": 6.749866568267628e-06,
      "loss": 0.0206,
      "step": 1006500
    },
    {
      "epoch": 1.64719205566793,
      "grad_norm": 1.0931272506713867,
      "learning_rate": 6.7498006760541115e-06,
      "loss": 0.0139,
      "step": 1006520
    },
    {
      "epoch": 1.6472247861065834,
      "grad_norm": 0.20115652680397034,
      "learning_rate": 6.749734783840595e-06,
      "loss": 0.022,
      "step": 1006540
    },
    {
      "epoch": 1.6472575165452368,
      "grad_norm": 0.6725282073020935,
      "learning_rate": 6.749668891627077e-06,
      "loss": 0.0253,
      "step": 1006560
    },
    {
      "epoch": 1.6472902469838901,
      "grad_norm": 1.021077036857605,
      "learning_rate": 6.7496029994135606e-06,
      "loss": 0.0222,
      "step": 1006580
    },
    {
      "epoch": 1.6473229774225433,
      "grad_norm": 0.2621719539165497,
      "learning_rate": 6.7495371072000425e-06,
      "loss": 0.0206,
      "step": 1006600
    },
    {
      "epoch": 1.647355707861197,
      "grad_norm": 0.29259979724884033,
      "learning_rate": 6.749471214986526e-06,
      "loss": 0.0212,
      "step": 1006620
    },
    {
      "epoch": 1.64738843829985,
      "grad_norm": 0.14933036267757416,
      "learning_rate": 6.749405322773008e-06,
      "loss": 0.0238,
      "step": 1006640
    },
    {
      "epoch": 1.6474211687385034,
      "grad_norm": 0.346911758184433,
      "learning_rate": 6.7493394305594915e-06,
      "loss": 0.0163,
      "step": 1006660
    },
    {
      "epoch": 1.6474538991771568,
      "grad_norm": 0.12494441121816635,
      "learning_rate": 6.749273538345974e-06,
      "loss": 0.0212,
      "step": 1006680
    },
    {
      "epoch": 1.6474866296158102,
      "grad_norm": 0.4309263527393341,
      "learning_rate": 6.749207646132457e-06,
      "loss": 0.0178,
      "step": 1006700
    },
    {
      "epoch": 1.6475193600544635,
      "grad_norm": 0.3680865466594696,
      "learning_rate": 6.74914175391894e-06,
      "loss": 0.0199,
      "step": 1006720
    },
    {
      "epoch": 1.6475520904931167,
      "grad_norm": 0.22432437539100647,
      "learning_rate": 6.749075861705423e-06,
      "loss": 0.0204,
      "step": 1006740
    },
    {
      "epoch": 1.6475848209317703,
      "grad_norm": 0.6135860085487366,
      "learning_rate": 6.749009969491905e-06,
      "loss": 0.0252,
      "step": 1006760
    },
    {
      "epoch": 1.6476175513704234,
      "grad_norm": 0.23046132922172546,
      "learning_rate": 6.748944077278389e-06,
      "loss": 0.0224,
      "step": 1006780
    },
    {
      "epoch": 1.6476502818090768,
      "grad_norm": 0.10105215758085251,
      "learning_rate": 6.748878185064871e-06,
      "loss": 0.0175,
      "step": 1006800
    },
    {
      "epoch": 1.6476830122477302,
      "grad_norm": 0.14293871819972992,
      "learning_rate": 6.748812292851354e-06,
      "loss": 0.0161,
      "step": 1006820
    },
    {
      "epoch": 1.6477157426863833,
      "grad_norm": 0.41819509863853455,
      "learning_rate": 6.748746400637837e-06,
      "loss": 0.0168,
      "step": 1006840
    },
    {
      "epoch": 1.647748473125037,
      "grad_norm": 0.09729547798633575,
      "learning_rate": 6.74868050842432e-06,
      "loss": 0.0157,
      "step": 1006860
    },
    {
      "epoch": 1.64778120356369,
      "grad_norm": 0.22715522348880768,
      "learning_rate": 6.7486146162108025e-06,
      "loss": 0.0216,
      "step": 1006880
    },
    {
      "epoch": 1.6478139340023437,
      "grad_norm": 0.5159154534339905,
      "learning_rate": 6.748548723997286e-06,
      "loss": 0.023,
      "step": 1006900
    },
    {
      "epoch": 1.6478466644409968,
      "grad_norm": 0.6692408919334412,
      "learning_rate": 6.748482831783769e-06,
      "loss": 0.016,
      "step": 1006920
    },
    {
      "epoch": 1.6478793948796502,
      "grad_norm": 0.3769227862358093,
      "learning_rate": 6.7484169395702516e-06,
      "loss": 0.0226,
      "step": 1006940
    },
    {
      "epoch": 1.6479121253183036,
      "grad_norm": 1.2566096782684326,
      "learning_rate": 6.748351047356735e-06,
      "loss": 0.0164,
      "step": 1006960
    },
    {
      "epoch": 1.6479448557569567,
      "grad_norm": 0.7911190986633301,
      "learning_rate": 6.748285155143217e-06,
      "loss": 0.0166,
      "step": 1006980
    },
    {
      "epoch": 1.6479775861956103,
      "grad_norm": 0.17051555216312408,
      "learning_rate": 6.748219262929701e-06,
      "loss": 0.0167,
      "step": 1007000
    },
    {
      "epoch": 1.6480103166342635,
      "grad_norm": 0.6305622458457947,
      "learning_rate": 6.7481533707161825e-06,
      "loss": 0.0214,
      "step": 1007020
    },
    {
      "epoch": 1.6480430470729168,
      "grad_norm": 0.3450816571712494,
      "learning_rate": 6.748087478502666e-06,
      "loss": 0.0145,
      "step": 1007040
    },
    {
      "epoch": 1.6480757775115702,
      "grad_norm": 0.6635972857475281,
      "learning_rate": 6.748021586289149e-06,
      "loss": 0.0174,
      "step": 1007060
    },
    {
      "epoch": 1.6481085079502236,
      "grad_norm": 0.237350195646286,
      "learning_rate": 6.747955694075632e-06,
      "loss": 0.0149,
      "step": 1007080
    },
    {
      "epoch": 1.648141238388877,
      "grad_norm": 0.2500903606414795,
      "learning_rate": 6.747889801862114e-06,
      "loss": 0.0183,
      "step": 1007100
    },
    {
      "epoch": 1.64817396882753,
      "grad_norm": 0.4530773162841797,
      "learning_rate": 6.747823909648598e-06,
      "loss": 0.0232,
      "step": 1007120
    },
    {
      "epoch": 1.6482066992661837,
      "grad_norm": 0.4720524549484253,
      "learning_rate": 6.74775801743508e-06,
      "loss": 0.0245,
      "step": 1007140
    },
    {
      "epoch": 1.6482394297048368,
      "grad_norm": 0.8312495350837708,
      "learning_rate": 6.747692125221563e-06,
      "loss": 0.014,
      "step": 1007160
    },
    {
      "epoch": 1.6482721601434902,
      "grad_norm": 0.10631425678730011,
      "learning_rate": 6.747626233008045e-06,
      "loss": 0.0133,
      "step": 1007180
    },
    {
      "epoch": 1.6483048905821436,
      "grad_norm": 0.40467578172683716,
      "learning_rate": 6.747560340794529e-06,
      "loss": 0.0176,
      "step": 1007200
    },
    {
      "epoch": 1.648337621020797,
      "grad_norm": 0.1884162724018097,
      "learning_rate": 6.747494448581012e-06,
      "loss": 0.0168,
      "step": 1007220
    },
    {
      "epoch": 1.6483703514594503,
      "grad_norm": 1.38930344581604,
      "learning_rate": 6.747428556367494e-06,
      "loss": 0.0215,
      "step": 1007240
    },
    {
      "epoch": 1.6484030818981035,
      "grad_norm": 0.7319449782371521,
      "learning_rate": 6.747362664153978e-06,
      "loss": 0.0162,
      "step": 1007260
    },
    {
      "epoch": 1.648435812336757,
      "grad_norm": 0.45748424530029297,
      "learning_rate": 6.747296771940461e-06,
      "loss": 0.02,
      "step": 1007280
    },
    {
      "epoch": 1.6484685427754102,
      "grad_norm": 0.7133411169052124,
      "learning_rate": 6.7472308797269434e-06,
      "loss": 0.0244,
      "step": 1007300
    },
    {
      "epoch": 1.6485012732140636,
      "grad_norm": 0.342289000749588,
      "learning_rate": 6.747164987513426e-06,
      "loss": 0.0178,
      "step": 1007320
    },
    {
      "epoch": 1.648534003652717,
      "grad_norm": 1.4068814516067505,
      "learning_rate": 6.74709909529991e-06,
      "loss": 0.0289,
      "step": 1007340
    },
    {
      "epoch": 1.6485667340913703,
      "grad_norm": 0.6063947081565857,
      "learning_rate": 6.747033203086392e-06,
      "loss": 0.0186,
      "step": 1007360
    },
    {
      "epoch": 1.6485994645300237,
      "grad_norm": 0.620473325252533,
      "learning_rate": 6.746967310872875e-06,
      "loss": 0.0168,
      "step": 1007380
    },
    {
      "epoch": 1.6486321949686769,
      "grad_norm": 0.8813145756721497,
      "learning_rate": 6.746901418659357e-06,
      "loss": 0.0249,
      "step": 1007400
    },
    {
      "epoch": 1.6486649254073305,
      "grad_norm": 0.20974531769752502,
      "learning_rate": 6.746835526445841e-06,
      "loss": 0.0211,
      "step": 1007420
    },
    {
      "epoch": 1.6486976558459836,
      "grad_norm": 0.24341733753681183,
      "learning_rate": 6.746769634232323e-06,
      "loss": 0.0181,
      "step": 1007440
    },
    {
      "epoch": 1.648730386284637,
      "grad_norm": 0.5823729038238525,
      "learning_rate": 6.746703742018806e-06,
      "loss": 0.018,
      "step": 1007460
    },
    {
      "epoch": 1.6487631167232903,
      "grad_norm": 0.16727319359779358,
      "learning_rate": 6.746637849805289e-06,
      "loss": 0.0148,
      "step": 1007480
    },
    {
      "epoch": 1.6487958471619437,
      "grad_norm": 0.15943588316440582,
      "learning_rate": 6.746571957591772e-06,
      "loss": 0.0245,
      "step": 1007500
    },
    {
      "epoch": 1.648828577600597,
      "grad_norm": 0.39369824528694153,
      "learning_rate": 6.746506065378254e-06,
      "loss": 0.0225,
      "step": 1007520
    },
    {
      "epoch": 1.6488613080392502,
      "grad_norm": 0.44605451822280884,
      "learning_rate": 6.746440173164738e-06,
      "loss": 0.0193,
      "step": 1007540
    },
    {
      "epoch": 1.6488940384779038,
      "grad_norm": 1.0113259553909302,
      "learning_rate": 6.74637428095122e-06,
      "loss": 0.0183,
      "step": 1007560
    },
    {
      "epoch": 1.648926768916557,
      "grad_norm": 0.2554246187210083,
      "learning_rate": 6.7463083887377035e-06,
      "loss": 0.0142,
      "step": 1007580
    },
    {
      "epoch": 1.6489594993552104,
      "grad_norm": 0.18724682927131653,
      "learning_rate": 6.746242496524187e-06,
      "loss": 0.0169,
      "step": 1007600
    },
    {
      "epoch": 1.6489922297938637,
      "grad_norm": 0.24104824662208557,
      "learning_rate": 6.746176604310669e-06,
      "loss": 0.0232,
      "step": 1007620
    },
    {
      "epoch": 1.6490249602325169,
      "grad_norm": 0.4887103736400604,
      "learning_rate": 6.7461107120971525e-06,
      "loss": 0.0248,
      "step": 1007640
    },
    {
      "epoch": 1.6490576906711705,
      "grad_norm": 0.6628126502037048,
      "learning_rate": 6.7460448198836344e-06,
      "loss": 0.017,
      "step": 1007660
    },
    {
      "epoch": 1.6490904211098236,
      "grad_norm": 0.27863630652427673,
      "learning_rate": 6.745978927670118e-06,
      "loss": 0.016,
      "step": 1007680
    },
    {
      "epoch": 1.6491231515484772,
      "grad_norm": 0.6445027589797974,
      "learning_rate": 6.745913035456601e-06,
      "loss": 0.0142,
      "step": 1007700
    },
    {
      "epoch": 1.6491558819871304,
      "grad_norm": 0.37894976139068604,
      "learning_rate": 6.7458471432430835e-06,
      "loss": 0.0254,
      "step": 1007720
    },
    {
      "epoch": 1.6491886124257837,
      "grad_norm": 0.3558962345123291,
      "learning_rate": 6.745781251029566e-06,
      "loss": 0.0265,
      "step": 1007740
    },
    {
      "epoch": 1.6492213428644371,
      "grad_norm": 4.033872127532959,
      "learning_rate": 6.74571535881605e-06,
      "loss": 0.0195,
      "step": 1007760
    },
    {
      "epoch": 1.6492540733030903,
      "grad_norm": 0.873098611831665,
      "learning_rate": 6.745649466602532e-06,
      "loss": 0.0162,
      "step": 1007780
    },
    {
      "epoch": 1.6492868037417439,
      "grad_norm": 0.0905013456940651,
      "learning_rate": 6.745583574389015e-06,
      "loss": 0.0168,
      "step": 1007800
    },
    {
      "epoch": 1.649319534180397,
      "grad_norm": 0.09956896305084229,
      "learning_rate": 6.745517682175497e-06,
      "loss": 0.0218,
      "step": 1007820
    },
    {
      "epoch": 1.6493522646190504,
      "grad_norm": 0.24252410233020782,
      "learning_rate": 6.745451789961981e-06,
      "loss": 0.0191,
      "step": 1007840
    },
    {
      "epoch": 1.6493849950577038,
      "grad_norm": 0.2914878726005554,
      "learning_rate": 6.7453858977484635e-06,
      "loss": 0.019,
      "step": 1007860
    },
    {
      "epoch": 1.6494177254963571,
      "grad_norm": 0.7461156845092773,
      "learning_rate": 6.745320005534946e-06,
      "loss": 0.029,
      "step": 1007880
    },
    {
      "epoch": 1.6494504559350105,
      "grad_norm": 0.3118206858634949,
      "learning_rate": 6.745254113321429e-06,
      "loss": 0.0201,
      "step": 1007900
    },
    {
      "epoch": 1.6494831863736636,
      "grad_norm": 1.3016575574874878,
      "learning_rate": 6.745188221107913e-06,
      "loss": 0.0217,
      "step": 1007920
    },
    {
      "epoch": 1.6495159168123172,
      "grad_norm": 0.837080180644989,
      "learning_rate": 6.7451223288943945e-06,
      "loss": 0.0225,
      "step": 1007940
    },
    {
      "epoch": 1.6495486472509704,
      "grad_norm": 1.0243250131607056,
      "learning_rate": 6.745056436680878e-06,
      "loss": 0.0184,
      "step": 1007960
    },
    {
      "epoch": 1.6495813776896238,
      "grad_norm": 0.45842185616493225,
      "learning_rate": 6.744990544467362e-06,
      "loss": 0.0224,
      "step": 1007980
    },
    {
      "epoch": 1.6496141081282771,
      "grad_norm": 1.116137146949768,
      "learning_rate": 6.7449246522538436e-06,
      "loss": 0.0233,
      "step": 1008000
    },
    {
      "epoch": 1.6496468385669305,
      "grad_norm": 0.8805647492408752,
      "learning_rate": 6.744858760040327e-06,
      "loss": 0.0122,
      "step": 1008020
    },
    {
      "epoch": 1.6496795690055839,
      "grad_norm": 0.4822523295879364,
      "learning_rate": 6.744792867826809e-06,
      "loss": 0.0189,
      "step": 1008040
    },
    {
      "epoch": 1.649712299444237,
      "grad_norm": 0.7774369716644287,
      "learning_rate": 6.744726975613293e-06,
      "loss": 0.0143,
      "step": 1008060
    },
    {
      "epoch": 1.6497450298828906,
      "grad_norm": 0.22471216320991516,
      "learning_rate": 6.744661083399775e-06,
      "loss": 0.0183,
      "step": 1008080
    },
    {
      "epoch": 1.6497777603215438,
      "grad_norm": 0.22037829458713531,
      "learning_rate": 6.744595191186258e-06,
      "loss": 0.016,
      "step": 1008100
    },
    {
      "epoch": 1.6498104907601971,
      "grad_norm": 0.16623888909816742,
      "learning_rate": 6.744529298972741e-06,
      "loss": 0.0234,
      "step": 1008120
    },
    {
      "epoch": 1.6498432211988505,
      "grad_norm": 0.6050985455513,
      "learning_rate": 6.7444634067592244e-06,
      "loss": 0.0239,
      "step": 1008140
    },
    {
      "epoch": 1.649875951637504,
      "grad_norm": 0.6759190559387207,
      "learning_rate": 6.744397514545706e-06,
      "loss": 0.0156,
      "step": 1008160
    },
    {
      "epoch": 1.6499086820761573,
      "grad_norm": 1.5813734531402588,
      "learning_rate": 6.74433162233219e-06,
      "loss": 0.0231,
      "step": 1008180
    },
    {
      "epoch": 1.6499414125148104,
      "grad_norm": 0.3510797619819641,
      "learning_rate": 6.744265730118672e-06,
      "loss": 0.0164,
      "step": 1008200
    },
    {
      "epoch": 1.649974142953464,
      "grad_norm": 0.4482225179672241,
      "learning_rate": 6.744199837905155e-06,
      "loss": 0.0271,
      "step": 1008220
    },
    {
      "epoch": 1.6500068733921172,
      "grad_norm": 0.9484593272209167,
      "learning_rate": 6.744133945691638e-06,
      "loss": 0.0202,
      "step": 1008240
    },
    {
      "epoch": 1.6500396038307705,
      "grad_norm": 0.33263006806373596,
      "learning_rate": 6.744068053478121e-06,
      "loss": 0.0261,
      "step": 1008260
    },
    {
      "epoch": 1.650072334269424,
      "grad_norm": 0.29579654335975647,
      "learning_rate": 6.744002161264604e-06,
      "loss": 0.021,
      "step": 1008280
    },
    {
      "epoch": 1.650105064708077,
      "grad_norm": 0.9258264303207397,
      "learning_rate": 6.743936269051087e-06,
      "loss": 0.0223,
      "step": 1008300
    },
    {
      "epoch": 1.6501377951467306,
      "grad_norm": 0.3326614201068878,
      "learning_rate": 6.74387037683757e-06,
      "loss": 0.0215,
      "step": 1008320
    },
    {
      "epoch": 1.6501705255853838,
      "grad_norm": 2.180588960647583,
      "learning_rate": 6.743804484624053e-06,
      "loss": 0.0152,
      "step": 1008340
    },
    {
      "epoch": 1.6502032560240374,
      "grad_norm": 0.3650587201118469,
      "learning_rate": 6.743738592410536e-06,
      "loss": 0.0142,
      "step": 1008360
    },
    {
      "epoch": 1.6502359864626905,
      "grad_norm": 0.20863358676433563,
      "learning_rate": 6.743672700197018e-06,
      "loss": 0.0207,
      "step": 1008380
    },
    {
      "epoch": 1.650268716901344,
      "grad_norm": 1.2368000745773315,
      "learning_rate": 6.743606807983502e-06,
      "loss": 0.0212,
      "step": 1008400
    },
    {
      "epoch": 1.6503014473399973,
      "grad_norm": 0.28304582834243774,
      "learning_rate": 6.743540915769984e-06,
      "loss": 0.0271,
      "step": 1008420
    },
    {
      "epoch": 1.6503341777786504,
      "grad_norm": 0.990041971206665,
      "learning_rate": 6.743475023556467e-06,
      "loss": 0.0181,
      "step": 1008440
    },
    {
      "epoch": 1.650366908217304,
      "grad_norm": 0.3280067443847656,
      "learning_rate": 6.743409131342949e-06,
      "loss": 0.0136,
      "step": 1008460
    },
    {
      "epoch": 1.6503996386559572,
      "grad_norm": 0.7392475605010986,
      "learning_rate": 6.743343239129433e-06,
      "loss": 0.0166,
      "step": 1008480
    },
    {
      "epoch": 1.6504323690946106,
      "grad_norm": 0.7263889908790588,
      "learning_rate": 6.7432773469159154e-06,
      "loss": 0.0181,
      "step": 1008500
    },
    {
      "epoch": 1.650465099533264,
      "grad_norm": 0.7395638227462769,
      "learning_rate": 6.743211454702398e-06,
      "loss": 0.0242,
      "step": 1008520
    },
    {
      "epoch": 1.6504978299719173,
      "grad_norm": 2.1359660625457764,
      "learning_rate": 6.743145562488881e-06,
      "loss": 0.0292,
      "step": 1008540
    },
    {
      "epoch": 1.6505305604105707,
      "grad_norm": 0.2435215711593628,
      "learning_rate": 6.7430796702753645e-06,
      "loss": 0.016,
      "step": 1008560
    },
    {
      "epoch": 1.6505632908492238,
      "grad_norm": 0.6767560243606567,
      "learning_rate": 6.743013778061846e-06,
      "loss": 0.0337,
      "step": 1008580
    },
    {
      "epoch": 1.6505960212878774,
      "grad_norm": 0.8270167112350464,
      "learning_rate": 6.74294788584833e-06,
      "loss": 0.0164,
      "step": 1008600
    },
    {
      "epoch": 1.6506287517265306,
      "grad_norm": 0.21125595271587372,
      "learning_rate": 6.742881993634812e-06,
      "loss": 0.0225,
      "step": 1008620
    },
    {
      "epoch": 1.650661482165184,
      "grad_norm": 0.26115334033966064,
      "learning_rate": 6.7428161014212955e-06,
      "loss": 0.017,
      "step": 1008640
    },
    {
      "epoch": 1.6506942126038373,
      "grad_norm": 0.5863009691238403,
      "learning_rate": 6.742750209207779e-06,
      "loss": 0.0262,
      "step": 1008660
    },
    {
      "epoch": 1.6507269430424907,
      "grad_norm": 0.34177467226982117,
      "learning_rate": 6.742684316994261e-06,
      "loss": 0.0147,
      "step": 1008680
    },
    {
      "epoch": 1.650759673481144,
      "grad_norm": 0.27906665205955505,
      "learning_rate": 6.7426184247807445e-06,
      "loss": 0.0173,
      "step": 1008700
    },
    {
      "epoch": 1.6507924039197972,
      "grad_norm": 0.301191508769989,
      "learning_rate": 6.742552532567227e-06,
      "loss": 0.0175,
      "step": 1008720
    },
    {
      "epoch": 1.6508251343584508,
      "grad_norm": 0.5556188225746155,
      "learning_rate": 6.74248664035371e-06,
      "loss": 0.0201,
      "step": 1008740
    },
    {
      "epoch": 1.650857864797104,
      "grad_norm": 0.19281989336013794,
      "learning_rate": 6.742420748140193e-06,
      "loss": 0.0326,
      "step": 1008760
    },
    {
      "epoch": 1.6508905952357573,
      "grad_norm": 0.22820858657360077,
      "learning_rate": 6.742354855926676e-06,
      "loss": 0.0237,
      "step": 1008780
    },
    {
      "epoch": 1.6509233256744107,
      "grad_norm": 0.42946740984916687,
      "learning_rate": 6.742288963713158e-06,
      "loss": 0.0223,
      "step": 1008800
    },
    {
      "epoch": 1.650956056113064,
      "grad_norm": 0.21287910640239716,
      "learning_rate": 6.742223071499642e-06,
      "loss": 0.015,
      "step": 1008820
    },
    {
      "epoch": 1.6509887865517174,
      "grad_norm": 0.4051363170146942,
      "learning_rate": 6.742157179286124e-06,
      "loss": 0.0163,
      "step": 1008840
    },
    {
      "epoch": 1.6510215169903706,
      "grad_norm": 0.4648297429084778,
      "learning_rate": 6.742091287072607e-06,
      "loss": 0.0156,
      "step": 1008860
    },
    {
      "epoch": 1.6510542474290242,
      "grad_norm": 0.5553790330886841,
      "learning_rate": 6.74202539485909e-06,
      "loss": 0.0194,
      "step": 1008880
    },
    {
      "epoch": 1.6510869778676773,
      "grad_norm": 0.5504245162010193,
      "learning_rate": 6.741959502645573e-06,
      "loss": 0.0219,
      "step": 1008900
    },
    {
      "epoch": 1.6511197083063307,
      "grad_norm": 0.4177018404006958,
      "learning_rate": 6.7418936104320555e-06,
      "loss": 0.0137,
      "step": 1008920
    },
    {
      "epoch": 1.651152438744984,
      "grad_norm": 0.628278374671936,
      "learning_rate": 6.741827718218539e-06,
      "loss": 0.0173,
      "step": 1008940
    },
    {
      "epoch": 1.6511851691836374,
      "grad_norm": 0.14253059029579163,
      "learning_rate": 6.741761826005021e-06,
      "loss": 0.0186,
      "step": 1008960
    },
    {
      "epoch": 1.6512178996222908,
      "grad_norm": 0.5802589654922485,
      "learning_rate": 6.741695933791505e-06,
      "loss": 0.0155,
      "step": 1008980
    },
    {
      "epoch": 1.651250630060944,
      "grad_norm": 0.41365617513656616,
      "learning_rate": 6.7416300415779865e-06,
      "loss": 0.0124,
      "step": 1009000
    },
    {
      "epoch": 1.6512833604995976,
      "grad_norm": 1.2614765167236328,
      "learning_rate": 6.74156414936447e-06,
      "loss": 0.0261,
      "step": 1009020
    },
    {
      "epoch": 1.6513160909382507,
      "grad_norm": 0.3701379597187042,
      "learning_rate": 6.741498257150954e-06,
      "loss": 0.0183,
      "step": 1009040
    },
    {
      "epoch": 1.651348821376904,
      "grad_norm": 0.6443428993225098,
      "learning_rate": 6.7414323649374355e-06,
      "loss": 0.0183,
      "step": 1009060
    },
    {
      "epoch": 1.6513815518155575,
      "grad_norm": 0.08644332736730576,
      "learning_rate": 6.741366472723919e-06,
      "loss": 0.0246,
      "step": 1009080
    },
    {
      "epoch": 1.6514142822542106,
      "grad_norm": 0.400210976600647,
      "learning_rate": 6.741300580510402e-06,
      "loss": 0.0199,
      "step": 1009100
    },
    {
      "epoch": 1.6514470126928642,
      "grad_norm": 2.33620023727417,
      "learning_rate": 6.741234688296885e-06,
      "loss": 0.0294,
      "step": 1009120
    },
    {
      "epoch": 1.6514797431315174,
      "grad_norm": 0.41048556566238403,
      "learning_rate": 6.741168796083367e-06,
      "loss": 0.0219,
      "step": 1009140
    },
    {
      "epoch": 1.651512473570171,
      "grad_norm": 0.09384813904762268,
      "learning_rate": 6.741102903869851e-06,
      "loss": 0.0208,
      "step": 1009160
    },
    {
      "epoch": 1.651545204008824,
      "grad_norm": 0.8115410208702087,
      "learning_rate": 6.741037011656333e-06,
      "loss": 0.0188,
      "step": 1009180
    },
    {
      "epoch": 1.6515779344474775,
      "grad_norm": 0.7888398170471191,
      "learning_rate": 6.740971119442816e-06,
      "loss": 0.0219,
      "step": 1009200
    },
    {
      "epoch": 1.6516106648861308,
      "grad_norm": 0.49443480372428894,
      "learning_rate": 6.740905227229298e-06,
      "loss": 0.0125,
      "step": 1009220
    },
    {
      "epoch": 1.651643395324784,
      "grad_norm": 0.8801009654998779,
      "learning_rate": 6.740839335015782e-06,
      "loss": 0.0186,
      "step": 1009240
    },
    {
      "epoch": 1.6516761257634376,
      "grad_norm": 0.31326115131378174,
      "learning_rate": 6.740773442802264e-06,
      "loss": 0.0206,
      "step": 1009260
    },
    {
      "epoch": 1.6517088562020907,
      "grad_norm": 0.20338137447834015,
      "learning_rate": 6.740707550588747e-06,
      "loss": 0.0157,
      "step": 1009280
    },
    {
      "epoch": 1.651741586640744,
      "grad_norm": 0.4273661971092224,
      "learning_rate": 6.74064165837523e-06,
      "loss": 0.0127,
      "step": 1009300
    },
    {
      "epoch": 1.6517743170793975,
      "grad_norm": 0.23441921174526215,
      "learning_rate": 6.740575766161714e-06,
      "loss": 0.0193,
      "step": 1009320
    },
    {
      "epoch": 1.6518070475180509,
      "grad_norm": 0.23033307492733002,
      "learning_rate": 6.740509873948196e-06,
      "loss": 0.0197,
      "step": 1009340
    },
    {
      "epoch": 1.6518397779567042,
      "grad_norm": 0.2308194488286972,
      "learning_rate": 6.740443981734679e-06,
      "loss": 0.0171,
      "step": 1009360
    },
    {
      "epoch": 1.6518725083953574,
      "grad_norm": 0.6636398434638977,
      "learning_rate": 6.740378089521163e-06,
      "loss": 0.0272,
      "step": 1009380
    },
    {
      "epoch": 1.651905238834011,
      "grad_norm": 0.49239546060562134,
      "learning_rate": 6.740312197307645e-06,
      "loss": 0.0179,
      "step": 1009400
    },
    {
      "epoch": 1.6519379692726641,
      "grad_norm": 2.781651020050049,
      "learning_rate": 6.740246305094128e-06,
      "loss": 0.0253,
      "step": 1009420
    },
    {
      "epoch": 1.6519706997113175,
      "grad_norm": 0.3298465311527252,
      "learning_rate": 6.74018041288061e-06,
      "loss": 0.0223,
      "step": 1009440
    },
    {
      "epoch": 1.6520034301499709,
      "grad_norm": 0.19846348464488983,
      "learning_rate": 6.740114520667094e-06,
      "loss": 0.0197,
      "step": 1009460
    },
    {
      "epoch": 1.6520361605886242,
      "grad_norm": 0.2222658395767212,
      "learning_rate": 6.740048628453576e-06,
      "loss": 0.0218,
      "step": 1009480
    },
    {
      "epoch": 1.6520688910272776,
      "grad_norm": 0.741766095161438,
      "learning_rate": 6.739982736240059e-06,
      "loss": 0.0299,
      "step": 1009500
    },
    {
      "epoch": 1.6521016214659308,
      "grad_norm": 0.49038001894950867,
      "learning_rate": 6.739916844026542e-06,
      "loss": 0.0223,
      "step": 1009520
    },
    {
      "epoch": 1.6521343519045844,
      "grad_norm": 0.19559209048748016,
      "learning_rate": 6.739850951813025e-06,
      "loss": 0.0245,
      "step": 1009540
    },
    {
      "epoch": 1.6521670823432375,
      "grad_norm": 4.6576337814331055,
      "learning_rate": 6.739785059599507e-06,
      "loss": 0.0161,
      "step": 1009560
    },
    {
      "epoch": 1.6521998127818909,
      "grad_norm": 0.44116541743278503,
      "learning_rate": 6.739719167385991e-06,
      "loss": 0.0153,
      "step": 1009580
    },
    {
      "epoch": 1.6522325432205442,
      "grad_norm": 0.7521054744720459,
      "learning_rate": 6.739653275172473e-06,
      "loss": 0.0159,
      "step": 1009600
    },
    {
      "epoch": 1.6522652736591976,
      "grad_norm": 0.6160288453102112,
      "learning_rate": 6.7395873829589565e-06,
      "loss": 0.0169,
      "step": 1009620
    },
    {
      "epoch": 1.652298004097851,
      "grad_norm": 0.6003406047821045,
      "learning_rate": 6.739521490745438e-06,
      "loss": 0.021,
      "step": 1009640
    },
    {
      "epoch": 1.6523307345365041,
      "grad_norm": 0.6959946155548096,
      "learning_rate": 6.739455598531922e-06,
      "loss": 0.0239,
      "step": 1009660
    },
    {
      "epoch": 1.6523634649751577,
      "grad_norm": 0.8637959361076355,
      "learning_rate": 6.739389706318405e-06,
      "loss": 0.0293,
      "step": 1009680
    },
    {
      "epoch": 1.6523961954138109,
      "grad_norm": 0.49105149507522583,
      "learning_rate": 6.7393238141048874e-06,
      "loss": 0.0203,
      "step": 1009700
    },
    {
      "epoch": 1.6524289258524643,
      "grad_norm": 0.14099080860614777,
      "learning_rate": 6.739257921891371e-06,
      "loss": 0.0212,
      "step": 1009720
    },
    {
      "epoch": 1.6524616562911176,
      "grad_norm": 0.36485418677330017,
      "learning_rate": 6.739192029677854e-06,
      "loss": 0.0113,
      "step": 1009740
    },
    {
      "epoch": 1.652494386729771,
      "grad_norm": 1.0703771114349365,
      "learning_rate": 6.7391261374643365e-06,
      "loss": 0.0255,
      "step": 1009760
    },
    {
      "epoch": 1.6525271171684244,
      "grad_norm": 0.1509295552968979,
      "learning_rate": 6.739060245250819e-06,
      "loss": 0.0251,
      "step": 1009780
    },
    {
      "epoch": 1.6525598476070775,
      "grad_norm": 0.5843806862831116,
      "learning_rate": 6.738994353037303e-06,
      "loss": 0.0333,
      "step": 1009800
    },
    {
      "epoch": 1.6525925780457311,
      "grad_norm": 0.45967692136764526,
      "learning_rate": 6.738928460823785e-06,
      "loss": 0.014,
      "step": 1009820
    },
    {
      "epoch": 1.6526253084843843,
      "grad_norm": 0.3587155044078827,
      "learning_rate": 6.738862568610268e-06,
      "loss": 0.0169,
      "step": 1009840
    },
    {
      "epoch": 1.6526580389230376,
      "grad_norm": 0.5294410586357117,
      "learning_rate": 6.73879667639675e-06,
      "loss": 0.0248,
      "step": 1009860
    },
    {
      "epoch": 1.652690769361691,
      "grad_norm": 0.6694516539573669,
      "learning_rate": 6.738730784183234e-06,
      "loss": 0.0213,
      "step": 1009880
    },
    {
      "epoch": 1.6527234998003442,
      "grad_norm": 0.2785554826259613,
      "learning_rate": 6.7386648919697165e-06,
      "loss": 0.0174,
      "step": 1009900
    },
    {
      "epoch": 1.6527562302389978,
      "grad_norm": 1.124767541885376,
      "learning_rate": 6.738598999756199e-06,
      "loss": 0.0171,
      "step": 1009920
    },
    {
      "epoch": 1.652788960677651,
      "grad_norm": 0.7933827042579651,
      "learning_rate": 6.738533107542682e-06,
      "loss": 0.0233,
      "step": 1009940
    },
    {
      "epoch": 1.6528216911163045,
      "grad_norm": 0.3735673129558563,
      "learning_rate": 6.738467215329166e-06,
      "loss": 0.0134,
      "step": 1009960
    },
    {
      "epoch": 1.6528544215549577,
      "grad_norm": 0.4443453848361969,
      "learning_rate": 6.7384013231156475e-06,
      "loss": 0.0232,
      "step": 1009980
    },
    {
      "epoch": 1.652887151993611,
      "grad_norm": 0.1298002451658249,
      "learning_rate": 6.738335430902131e-06,
      "loss": 0.0164,
      "step": 1010000
    },
    {
      "epoch": 1.6529198824322644,
      "grad_norm": 0.185507133603096,
      "learning_rate": 6.738269538688613e-06,
      "loss": 0.0182,
      "step": 1010020
    },
    {
      "epoch": 1.6529526128709175,
      "grad_norm": 0.29482603073120117,
      "learning_rate": 6.7382036464750966e-06,
      "loss": 0.0183,
      "step": 1010040
    },
    {
      "epoch": 1.6529853433095711,
      "grad_norm": 0.37564969062805176,
      "learning_rate": 6.73813775426158e-06,
      "loss": 0.0216,
      "step": 1010060
    },
    {
      "epoch": 1.6530180737482243,
      "grad_norm": 0.9094703793525696,
      "learning_rate": 6.738071862048062e-06,
      "loss": 0.0181,
      "step": 1010080
    },
    {
      "epoch": 1.6530508041868777,
      "grad_norm": 0.10042991489171982,
      "learning_rate": 6.738005969834546e-06,
      "loss": 0.0122,
      "step": 1010100
    },
    {
      "epoch": 1.653083534625531,
      "grad_norm": 0.6946137547492981,
      "learning_rate": 6.737940077621028e-06,
      "loss": 0.0179,
      "step": 1010120
    },
    {
      "epoch": 1.6531162650641844,
      "grad_norm": 0.437252938747406,
      "learning_rate": 6.737874185407511e-06,
      "loss": 0.025,
      "step": 1010140
    },
    {
      "epoch": 1.6531489955028378,
      "grad_norm": 1.1477031707763672,
      "learning_rate": 6.737808293193994e-06,
      "loss": 0.0196,
      "step": 1010160
    },
    {
      "epoch": 1.653181725941491,
      "grad_norm": 0.15869316458702087,
      "learning_rate": 6.7377424009804774e-06,
      "loss": 0.0212,
      "step": 1010180
    },
    {
      "epoch": 1.6532144563801445,
      "grad_norm": 0.5499972701072693,
      "learning_rate": 6.737676508766959e-06,
      "loss": 0.0177,
      "step": 1010200
    },
    {
      "epoch": 1.6532471868187977,
      "grad_norm": 0.40861037373542786,
      "learning_rate": 6.737610616553443e-06,
      "loss": 0.0204,
      "step": 1010220
    },
    {
      "epoch": 1.653279917257451,
      "grad_norm": 0.31720516085624695,
      "learning_rate": 6.737544724339925e-06,
      "loss": 0.0222,
      "step": 1010240
    },
    {
      "epoch": 1.6533126476961044,
      "grad_norm": 0.2108914703130722,
      "learning_rate": 6.737478832126408e-06,
      "loss": 0.0191,
      "step": 1010260
    },
    {
      "epoch": 1.6533453781347578,
      "grad_norm": 0.20904259383678436,
      "learning_rate": 6.73741293991289e-06,
      "loss": 0.0198,
      "step": 1010280
    },
    {
      "epoch": 1.6533781085734112,
      "grad_norm": 0.36730825901031494,
      "learning_rate": 6.737347047699374e-06,
      "loss": 0.0164,
      "step": 1010300
    },
    {
      "epoch": 1.6534108390120643,
      "grad_norm": 0.1882326751947403,
      "learning_rate": 6.737281155485857e-06,
      "loss": 0.0107,
      "step": 1010320
    },
    {
      "epoch": 1.653443569450718,
      "grad_norm": 0.1675279587507248,
      "learning_rate": 6.737215263272339e-06,
      "loss": 0.0155,
      "step": 1010340
    },
    {
      "epoch": 1.653476299889371,
      "grad_norm": 0.18378229439258575,
      "learning_rate": 6.737149371058822e-06,
      "loss": 0.0206,
      "step": 1010360
    },
    {
      "epoch": 1.6535090303280244,
      "grad_norm": 0.6612232327461243,
      "learning_rate": 6.737083478845306e-06,
      "loss": 0.0183,
      "step": 1010380
    },
    {
      "epoch": 1.6535417607666778,
      "grad_norm": 0.11511743068695068,
      "learning_rate": 6.7370175866317876e-06,
      "loss": 0.0183,
      "step": 1010400
    },
    {
      "epoch": 1.6535744912053312,
      "grad_norm": 0.5916531085968018,
      "learning_rate": 6.736951694418271e-06,
      "loss": 0.0198,
      "step": 1010420
    },
    {
      "epoch": 1.6536072216439845,
      "grad_norm": 0.2883550524711609,
      "learning_rate": 6.736885802204755e-06,
      "loss": 0.0247,
      "step": 1010440
    },
    {
      "epoch": 1.6536399520826377,
      "grad_norm": 0.263602614402771,
      "learning_rate": 6.736819909991237e-06,
      "loss": 0.0198,
      "step": 1010460
    },
    {
      "epoch": 1.6536726825212913,
      "grad_norm": 0.15412582457065582,
      "learning_rate": 6.73675401777772e-06,
      "loss": 0.0157,
      "step": 1010480
    },
    {
      "epoch": 1.6537054129599444,
      "grad_norm": 0.5689229369163513,
      "learning_rate": 6.736688125564202e-06,
      "loss": 0.0251,
      "step": 1010500
    },
    {
      "epoch": 1.6537381433985978,
      "grad_norm": 1.2655816078186035,
      "learning_rate": 6.736622233350686e-06,
      "loss": 0.015,
      "step": 1010520
    },
    {
      "epoch": 1.6537708738372512,
      "grad_norm": 2.7268216609954834,
      "learning_rate": 6.7365563411371684e-06,
      "loss": 0.0192,
      "step": 1010540
    },
    {
      "epoch": 1.6538036042759046,
      "grad_norm": 0.4155198037624359,
      "learning_rate": 6.736490448923651e-06,
      "loss": 0.0245,
      "step": 1010560
    },
    {
      "epoch": 1.653836334714558,
      "grad_norm": 0.5148527026176453,
      "learning_rate": 6.736424556710134e-06,
      "loss": 0.015,
      "step": 1010580
    },
    {
      "epoch": 1.653869065153211,
      "grad_norm": 0.3792657256126404,
      "learning_rate": 6.7363586644966175e-06,
      "loss": 0.0131,
      "step": 1010600
    },
    {
      "epoch": 1.6539017955918647,
      "grad_norm": 0.1238190308213234,
      "learning_rate": 6.736292772283099e-06,
      "loss": 0.0169,
      "step": 1010620
    },
    {
      "epoch": 1.6539345260305178,
      "grad_norm": 0.8704882264137268,
      "learning_rate": 6.736226880069583e-06,
      "loss": 0.0124,
      "step": 1010640
    },
    {
      "epoch": 1.6539672564691712,
      "grad_norm": 0.8783555626869202,
      "learning_rate": 6.736160987856065e-06,
      "loss": 0.0193,
      "step": 1010660
    },
    {
      "epoch": 1.6539999869078246,
      "grad_norm": 1.1208293437957764,
      "learning_rate": 6.7360950956425485e-06,
      "loss": 0.0274,
      "step": 1010680
    },
    {
      "epoch": 1.6540327173464777,
      "grad_norm": 0.5806192755699158,
      "learning_rate": 6.736029203429031e-06,
      "loss": 0.0226,
      "step": 1010700
    },
    {
      "epoch": 1.6540654477851313,
      "grad_norm": 0.34892964363098145,
      "learning_rate": 6.735963311215514e-06,
      "loss": 0.0172,
      "step": 1010720
    },
    {
      "epoch": 1.6540981782237845,
      "grad_norm": 0.3801867961883545,
      "learning_rate": 6.735897419001997e-06,
      "loss": 0.0217,
      "step": 1010740
    },
    {
      "epoch": 1.654130908662438,
      "grad_norm": 0.24750380218029022,
      "learning_rate": 6.73583152678848e-06,
      "loss": 0.0301,
      "step": 1010760
    },
    {
      "epoch": 1.6541636391010912,
      "grad_norm": 1.3088724613189697,
      "learning_rate": 6.735765634574963e-06,
      "loss": 0.0199,
      "step": 1010780
    },
    {
      "epoch": 1.6541963695397446,
      "grad_norm": 0.24820008873939514,
      "learning_rate": 6.735699742361446e-06,
      "loss": 0.0207,
      "step": 1010800
    },
    {
      "epoch": 1.654229099978398,
      "grad_norm": 0.3637275695800781,
      "learning_rate": 6.735633850147929e-06,
      "loss": 0.015,
      "step": 1010820
    },
    {
      "epoch": 1.654261830417051,
      "grad_norm": 0.10624174028635025,
      "learning_rate": 6.735567957934411e-06,
      "loss": 0.0148,
      "step": 1010840
    },
    {
      "epoch": 1.6542945608557047,
      "grad_norm": 0.7262870669364929,
      "learning_rate": 6.735502065720895e-06,
      "loss": 0.0199,
      "step": 1010860
    },
    {
      "epoch": 1.6543272912943578,
      "grad_norm": 0.6142193675041199,
      "learning_rate": 6.735436173507377e-06,
      "loss": 0.0166,
      "step": 1010880
    },
    {
      "epoch": 1.6543600217330112,
      "grad_norm": 0.3946017920970917,
      "learning_rate": 6.73537028129386e-06,
      "loss": 0.0177,
      "step": 1010900
    },
    {
      "epoch": 1.6543927521716646,
      "grad_norm": 0.4576355814933777,
      "learning_rate": 6.735304389080343e-06,
      "loss": 0.0238,
      "step": 1010920
    },
    {
      "epoch": 1.654425482610318,
      "grad_norm": 0.12783516943454742,
      "learning_rate": 6.735238496866826e-06,
      "loss": 0.0193,
      "step": 1010940
    },
    {
      "epoch": 1.6544582130489713,
      "grad_norm": 0.3888014256954193,
      "learning_rate": 6.7351726046533085e-06,
      "loss": 0.0196,
      "step": 1010960
    },
    {
      "epoch": 1.6544909434876245,
      "grad_norm": 0.6242765784263611,
      "learning_rate": 6.735106712439792e-06,
      "loss": 0.0217,
      "step": 1010980
    },
    {
      "epoch": 1.654523673926278,
      "grad_norm": 1.5722339153289795,
      "learning_rate": 6.735040820226274e-06,
      "loss": 0.014,
      "step": 1011000
    },
    {
      "epoch": 1.6545564043649312,
      "grad_norm": 0.2914891242980957,
      "learning_rate": 6.734974928012758e-06,
      "loss": 0.0198,
      "step": 1011020
    },
    {
      "epoch": 1.6545891348035846,
      "grad_norm": 0.16592688858509064,
      "learning_rate": 6.7349090357992395e-06,
      "loss": 0.0195,
      "step": 1011040
    },
    {
      "epoch": 1.654621865242238,
      "grad_norm": 2.4479219913482666,
      "learning_rate": 6.734843143585723e-06,
      "loss": 0.0241,
      "step": 1011060
    },
    {
      "epoch": 1.6546545956808913,
      "grad_norm": 0.37096691131591797,
      "learning_rate": 6.734777251372206e-06,
      "loss": 0.0189,
      "step": 1011080
    },
    {
      "epoch": 1.6546873261195447,
      "grad_norm": 0.5734641551971436,
      "learning_rate": 6.7347113591586885e-06,
      "loss": 0.0302,
      "step": 1011100
    },
    {
      "epoch": 1.6547200565581979,
      "grad_norm": 0.891741931438446,
      "learning_rate": 6.734645466945172e-06,
      "loss": 0.0221,
      "step": 1011120
    },
    {
      "epoch": 1.6547527869968515,
      "grad_norm": 0.2556503415107727,
      "learning_rate": 6.734579574731655e-06,
      "loss": 0.0202,
      "step": 1011140
    },
    {
      "epoch": 1.6547855174355046,
      "grad_norm": 0.10243475437164307,
      "learning_rate": 6.734513682518138e-06,
      "loss": 0.0153,
      "step": 1011160
    },
    {
      "epoch": 1.654818247874158,
      "grad_norm": 0.5278322100639343,
      "learning_rate": 6.73444779030462e-06,
      "loss": 0.0219,
      "step": 1011180
    },
    {
      "epoch": 1.6548509783128114,
      "grad_norm": 0.49967122077941895,
      "learning_rate": 6.734381898091104e-06,
      "loss": 0.029,
      "step": 1011200
    },
    {
      "epoch": 1.6548837087514647,
      "grad_norm": 0.467205673456192,
      "learning_rate": 6.734316005877586e-06,
      "loss": 0.017,
      "step": 1011220
    },
    {
      "epoch": 1.654916439190118,
      "grad_norm": 1.0559759140014648,
      "learning_rate": 6.734250113664069e-06,
      "loss": 0.0138,
      "step": 1011240
    },
    {
      "epoch": 1.6549491696287713,
      "grad_norm": 0.44186484813690186,
      "learning_rate": 6.734184221450551e-06,
      "loss": 0.0215,
      "step": 1011260
    },
    {
      "epoch": 1.6549819000674248,
      "grad_norm": 0.6058656573295593,
      "learning_rate": 6.734118329237035e-06,
      "loss": 0.0195,
      "step": 1011280
    },
    {
      "epoch": 1.655014630506078,
      "grad_norm": 0.8732671141624451,
      "learning_rate": 6.734052437023517e-06,
      "loss": 0.0197,
      "step": 1011300
    },
    {
      "epoch": 1.6550473609447314,
      "grad_norm": 0.7981441617012024,
      "learning_rate": 6.73398654481e-06,
      "loss": 0.0203,
      "step": 1011320
    },
    {
      "epoch": 1.6550800913833847,
      "grad_norm": 0.3921613097190857,
      "learning_rate": 6.733920652596483e-06,
      "loss": 0.0151,
      "step": 1011340
    },
    {
      "epoch": 1.655112821822038,
      "grad_norm": 0.4924258291721344,
      "learning_rate": 6.733854760382966e-06,
      "loss": 0.0235,
      "step": 1011360
    },
    {
      "epoch": 1.6551455522606915,
      "grad_norm": 0.34930604696273804,
      "learning_rate": 6.733788868169449e-06,
      "loss": 0.0156,
      "step": 1011380
    },
    {
      "epoch": 1.6551782826993446,
      "grad_norm": 0.2272803783416748,
      "learning_rate": 6.733722975955932e-06,
      "loss": 0.0104,
      "step": 1011400
    },
    {
      "epoch": 1.6552110131379982,
      "grad_norm": 0.27251774072647095,
      "learning_rate": 6.733657083742414e-06,
      "loss": 0.0119,
      "step": 1011420
    },
    {
      "epoch": 1.6552437435766514,
      "grad_norm": 1.1647164821624756,
      "learning_rate": 6.733591191528898e-06,
      "loss": 0.0211,
      "step": 1011440
    },
    {
      "epoch": 1.6552764740153048,
      "grad_norm": 0.43579164147377014,
      "learning_rate": 6.7335252993153796e-06,
      "loss": 0.017,
      "step": 1011460
    },
    {
      "epoch": 1.6553092044539581,
      "grad_norm": 0.7975606918334961,
      "learning_rate": 6.733459407101863e-06,
      "loss": 0.0182,
      "step": 1011480
    },
    {
      "epoch": 1.6553419348926113,
      "grad_norm": 0.2634561061859131,
      "learning_rate": 6.733393514888347e-06,
      "loss": 0.017,
      "step": 1011500
    },
    {
      "epoch": 1.6553746653312649,
      "grad_norm": 0.36126869916915894,
      "learning_rate": 6.733327622674829e-06,
      "loss": 0.0211,
      "step": 1011520
    },
    {
      "epoch": 1.655407395769918,
      "grad_norm": 0.6151922345161438,
      "learning_rate": 6.733261730461312e-06,
      "loss": 0.0294,
      "step": 1011540
    },
    {
      "epoch": 1.6554401262085714,
      "grad_norm": 0.4220418334007263,
      "learning_rate": 6.733195838247795e-06,
      "loss": 0.024,
      "step": 1011560
    },
    {
      "epoch": 1.6554728566472248,
      "grad_norm": 0.5074353218078613,
      "learning_rate": 6.733129946034278e-06,
      "loss": 0.0161,
      "step": 1011580
    },
    {
      "epoch": 1.6555055870858781,
      "grad_norm": 0.7085673213005066,
      "learning_rate": 6.7330640538207604e-06,
      "loss": 0.0164,
      "step": 1011600
    },
    {
      "epoch": 1.6555383175245315,
      "grad_norm": 0.3048149347305298,
      "learning_rate": 6.732998161607244e-06,
      "loss": 0.0203,
      "step": 1011620
    },
    {
      "epoch": 1.6555710479631847,
      "grad_norm": 0.6189880967140198,
      "learning_rate": 6.732932269393726e-06,
      "loss": 0.0122,
      "step": 1011640
    },
    {
      "epoch": 1.6556037784018383,
      "grad_norm": 0.22635748982429504,
      "learning_rate": 6.7328663771802095e-06,
      "loss": 0.0228,
      "step": 1011660
    },
    {
      "epoch": 1.6556365088404914,
      "grad_norm": 0.1803339421749115,
      "learning_rate": 6.732800484966691e-06,
      "loss": 0.0217,
      "step": 1011680
    },
    {
      "epoch": 1.6556692392791448,
      "grad_norm": 0.5417380928993225,
      "learning_rate": 6.732734592753175e-06,
      "loss": 0.0213,
      "step": 1011700
    },
    {
      "epoch": 1.6557019697177981,
      "grad_norm": 0.12412914633750916,
      "learning_rate": 6.732668700539658e-06,
      "loss": 0.0169,
      "step": 1011720
    },
    {
      "epoch": 1.6557347001564515,
      "grad_norm": 1.0283629894256592,
      "learning_rate": 6.7326028083261405e-06,
      "loss": 0.0158,
      "step": 1011740
    },
    {
      "epoch": 1.655767430595105,
      "grad_norm": 0.6456547975540161,
      "learning_rate": 6.732536916112623e-06,
      "loss": 0.015,
      "step": 1011760
    },
    {
      "epoch": 1.655800161033758,
      "grad_norm": 0.28700730204582214,
      "learning_rate": 6.732471023899107e-06,
      "loss": 0.0226,
      "step": 1011780
    },
    {
      "epoch": 1.6558328914724116,
      "grad_norm": 1.075567364692688,
      "learning_rate": 6.732405131685589e-06,
      "loss": 0.0213,
      "step": 1011800
    },
    {
      "epoch": 1.6558656219110648,
      "grad_norm": 0.4391114413738251,
      "learning_rate": 6.732339239472072e-06,
      "loss": 0.0218,
      "step": 1011820
    },
    {
      "epoch": 1.6558983523497182,
      "grad_norm": 0.7959598302841187,
      "learning_rate": 6.732273347258556e-06,
      "loss": 0.0198,
      "step": 1011840
    },
    {
      "epoch": 1.6559310827883715,
      "grad_norm": 0.34053218364715576,
      "learning_rate": 6.732207455045038e-06,
      "loss": 0.0215,
      "step": 1011860
    },
    {
      "epoch": 1.655963813227025,
      "grad_norm": 0.3285645544528961,
      "learning_rate": 6.732141562831521e-06,
      "loss": 0.0264,
      "step": 1011880
    },
    {
      "epoch": 1.6559965436656783,
      "grad_norm": 0.6343644261360168,
      "learning_rate": 6.732075670618003e-06,
      "loss": 0.0184,
      "step": 1011900
    },
    {
      "epoch": 1.6560292741043314,
      "grad_norm": 0.34862592816352844,
      "learning_rate": 6.732009778404487e-06,
      "loss": 0.0156,
      "step": 1011920
    },
    {
      "epoch": 1.656062004542985,
      "grad_norm": 1.300196647644043,
      "learning_rate": 6.7319438861909695e-06,
      "loss": 0.0291,
      "step": 1011940
    },
    {
      "epoch": 1.6560947349816382,
      "grad_norm": 0.6681143045425415,
      "learning_rate": 6.731877993977452e-06,
      "loss": 0.0196,
      "step": 1011960
    },
    {
      "epoch": 1.6561274654202915,
      "grad_norm": 0.18952453136444092,
      "learning_rate": 6.731812101763935e-06,
      "loss": 0.015,
      "step": 1011980
    },
    {
      "epoch": 1.656160195858945,
      "grad_norm": 2.5851259231567383,
      "learning_rate": 6.731746209550419e-06,
      "loss": 0.0261,
      "step": 1012000
    },
    {
      "epoch": 1.6561929262975983,
      "grad_norm": 0.7635889649391174,
      "learning_rate": 6.7316803173369005e-06,
      "loss": 0.0141,
      "step": 1012020
    },
    {
      "epoch": 1.6562256567362517,
      "grad_norm": 0.6807326078414917,
      "learning_rate": 6.731614425123384e-06,
      "loss": 0.0128,
      "step": 1012040
    },
    {
      "epoch": 1.6562583871749048,
      "grad_norm": 0.3404925465583801,
      "learning_rate": 6.731548532909866e-06,
      "loss": 0.0187,
      "step": 1012060
    },
    {
      "epoch": 1.6562911176135584,
      "grad_norm": 0.38182318210601807,
      "learning_rate": 6.7314826406963496e-06,
      "loss": 0.0182,
      "step": 1012080
    },
    {
      "epoch": 1.6563238480522116,
      "grad_norm": 0.7258339524269104,
      "learning_rate": 6.731416748482832e-06,
      "loss": 0.0129,
      "step": 1012100
    },
    {
      "epoch": 1.656356578490865,
      "grad_norm": 0.07420897483825684,
      "learning_rate": 6.731350856269315e-06,
      "loss": 0.0165,
      "step": 1012120
    },
    {
      "epoch": 1.6563893089295183,
      "grad_norm": 0.16248109936714172,
      "learning_rate": 6.731284964055798e-06,
      "loss": 0.0136,
      "step": 1012140
    },
    {
      "epoch": 1.6564220393681715,
      "grad_norm": 0.741619348526001,
      "learning_rate": 6.731219071842281e-06,
      "loss": 0.0256,
      "step": 1012160
    },
    {
      "epoch": 1.656454769806825,
      "grad_norm": 0.5614299178123474,
      "learning_rate": 6.731153179628764e-06,
      "loss": 0.0182,
      "step": 1012180
    },
    {
      "epoch": 1.6564875002454782,
      "grad_norm": 0.9237062931060791,
      "learning_rate": 6.731087287415247e-06,
      "loss": 0.0168,
      "step": 1012200
    },
    {
      "epoch": 1.6565202306841318,
      "grad_norm": 1.018823504447937,
      "learning_rate": 6.7310213952017304e-06,
      "loss": 0.0203,
      "step": 1012220
    },
    {
      "epoch": 1.656552961122785,
      "grad_norm": 0.7276272773742676,
      "learning_rate": 6.730955502988212e-06,
      "loss": 0.0262,
      "step": 1012240
    },
    {
      "epoch": 1.6565856915614383,
      "grad_norm": 0.38565388321876526,
      "learning_rate": 6.730889610774696e-06,
      "loss": 0.0185,
      "step": 1012260
    },
    {
      "epoch": 1.6566184220000917,
      "grad_norm": 1.0516506433486938,
      "learning_rate": 6.730823718561178e-06,
      "loss": 0.0224,
      "step": 1012280
    },
    {
      "epoch": 1.6566511524387448,
      "grad_norm": 0.20135760307312012,
      "learning_rate": 6.730757826347661e-06,
      "loss": 0.0177,
      "step": 1012300
    },
    {
      "epoch": 1.6566838828773984,
      "grad_norm": 0.48823684453964233,
      "learning_rate": 6.730691934134143e-06,
      "loss": 0.0199,
      "step": 1012320
    },
    {
      "epoch": 1.6567166133160516,
      "grad_norm": 0.488552987575531,
      "learning_rate": 6.730626041920627e-06,
      "loss": 0.018,
      "step": 1012340
    },
    {
      "epoch": 1.656749343754705,
      "grad_norm": 0.6081786751747131,
      "learning_rate": 6.73056014970711e-06,
      "loss": 0.0201,
      "step": 1012360
    },
    {
      "epoch": 1.6567820741933583,
      "grad_norm": 0.3939777612686157,
      "learning_rate": 6.730494257493592e-06,
      "loss": 0.0206,
      "step": 1012380
    },
    {
      "epoch": 1.6568148046320117,
      "grad_norm": 0.30476707220077515,
      "learning_rate": 6.730428365280075e-06,
      "loss": 0.0219,
      "step": 1012400
    },
    {
      "epoch": 1.656847535070665,
      "grad_norm": 0.06592005491256714,
      "learning_rate": 6.730362473066559e-06,
      "loss": 0.0174,
      "step": 1012420
    },
    {
      "epoch": 1.6568802655093182,
      "grad_norm": 0.33463671803474426,
      "learning_rate": 6.730296580853041e-06,
      "loss": 0.0214,
      "step": 1012440
    },
    {
      "epoch": 1.6569129959479718,
      "grad_norm": 0.7542744278907776,
      "learning_rate": 6.730230688639524e-06,
      "loss": 0.0209,
      "step": 1012460
    },
    {
      "epoch": 1.656945726386625,
      "grad_norm": 0.29601386189460754,
      "learning_rate": 6.730164796426006e-06,
      "loss": 0.0144,
      "step": 1012480
    },
    {
      "epoch": 1.6569784568252783,
      "grad_norm": 0.4174822270870209,
      "learning_rate": 6.73009890421249e-06,
      "loss": 0.0182,
      "step": 1012500
    },
    {
      "epoch": 1.6570111872639317,
      "grad_norm": 0.2573260962963104,
      "learning_rate": 6.730033011998973e-06,
      "loss": 0.0246,
      "step": 1012520
    },
    {
      "epoch": 1.657043917702585,
      "grad_norm": 0.3731512427330017,
      "learning_rate": 6.729967119785455e-06,
      "loss": 0.0161,
      "step": 1012540
    },
    {
      "epoch": 1.6570766481412385,
      "grad_norm": 0.4104793071746826,
      "learning_rate": 6.729901227571939e-06,
      "loss": 0.0203,
      "step": 1012560
    },
    {
      "epoch": 1.6571093785798916,
      "grad_norm": 0.2741527855396271,
      "learning_rate": 6.7298353353584214e-06,
      "loss": 0.0277,
      "step": 1012580
    },
    {
      "epoch": 1.6571421090185452,
      "grad_norm": 0.5866652727127075,
      "learning_rate": 6.729769443144904e-06,
      "loss": 0.0154,
      "step": 1012600
    },
    {
      "epoch": 1.6571748394571983,
      "grad_norm": 0.41644176840782166,
      "learning_rate": 6.729703550931387e-06,
      "loss": 0.0178,
      "step": 1012620
    },
    {
      "epoch": 1.6572075698958517,
      "grad_norm": 0.778346836566925,
      "learning_rate": 6.7296376587178705e-06,
      "loss": 0.0318,
      "step": 1012640
    },
    {
      "epoch": 1.657240300334505,
      "grad_norm": 0.6979238390922546,
      "learning_rate": 6.729571766504352e-06,
      "loss": 0.0195,
      "step": 1012660
    },
    {
      "epoch": 1.6572730307731585,
      "grad_norm": 0.2787573039531708,
      "learning_rate": 6.729505874290836e-06,
      "loss": 0.0283,
      "step": 1012680
    },
    {
      "epoch": 1.6573057612118118,
      "grad_norm": 0.6472710371017456,
      "learning_rate": 6.729439982077318e-06,
      "loss": 0.0191,
      "step": 1012700
    },
    {
      "epoch": 1.657338491650465,
      "grad_norm": 0.399853378534317,
      "learning_rate": 6.7293740898638015e-06,
      "loss": 0.0198,
      "step": 1012720
    },
    {
      "epoch": 1.6573712220891186,
      "grad_norm": 0.6230823993682861,
      "learning_rate": 6.729308197650284e-06,
      "loss": 0.0213,
      "step": 1012740
    },
    {
      "epoch": 1.6574039525277717,
      "grad_norm": 0.5365987420082092,
      "learning_rate": 6.729242305436767e-06,
      "loss": 0.0182,
      "step": 1012760
    },
    {
      "epoch": 1.657436682966425,
      "grad_norm": 0.8152737617492676,
      "learning_rate": 6.72917641322325e-06,
      "loss": 0.0174,
      "step": 1012780
    },
    {
      "epoch": 1.6574694134050785,
      "grad_norm": 0.508797824382782,
      "learning_rate": 6.729110521009733e-06,
      "loss": 0.0202,
      "step": 1012800
    },
    {
      "epoch": 1.6575021438437318,
      "grad_norm": 1.2598265409469604,
      "learning_rate": 6.729044628796215e-06,
      "loss": 0.0205,
      "step": 1012820
    },
    {
      "epoch": 1.6575348742823852,
      "grad_norm": 0.839276909828186,
      "learning_rate": 6.728978736582699e-06,
      "loss": 0.03,
      "step": 1012840
    },
    {
      "epoch": 1.6575676047210384,
      "grad_norm": 0.12478288263082504,
      "learning_rate": 6.728912844369181e-06,
      "loss": 0.0207,
      "step": 1012860
    },
    {
      "epoch": 1.657600335159692,
      "grad_norm": 0.43944936990737915,
      "learning_rate": 6.728846952155664e-06,
      "loss": 0.0146,
      "step": 1012880
    },
    {
      "epoch": 1.6576330655983451,
      "grad_norm": 0.34052449464797974,
      "learning_rate": 6.728781059942148e-06,
      "loss": 0.0198,
      "step": 1012900
    },
    {
      "epoch": 1.6576657960369985,
      "grad_norm": 0.21160593628883362,
      "learning_rate": 6.72871516772863e-06,
      "loss": 0.0168,
      "step": 1012920
    },
    {
      "epoch": 1.6576985264756519,
      "grad_norm": 1.7675631046295166,
      "learning_rate": 6.728649275515113e-06,
      "loss": 0.0263,
      "step": 1012940
    },
    {
      "epoch": 1.657731256914305,
      "grad_norm": 0.3763512969017029,
      "learning_rate": 6.728583383301596e-06,
      "loss": 0.0202,
      "step": 1012960
    },
    {
      "epoch": 1.6577639873529586,
      "grad_norm": 0.9317084550857544,
      "learning_rate": 6.728517491088079e-06,
      "loss": 0.0253,
      "step": 1012980
    },
    {
      "epoch": 1.6577967177916118,
      "grad_norm": 0.5958262085914612,
      "learning_rate": 6.7284515988745615e-06,
      "loss": 0.0142,
      "step": 1013000
    },
    {
      "epoch": 1.6578294482302653,
      "grad_norm": 0.39708125591278076,
      "learning_rate": 6.728385706661045e-06,
      "loss": 0.0236,
      "step": 1013020
    },
    {
      "epoch": 1.6578621786689185,
      "grad_norm": 0.5622339844703674,
      "learning_rate": 6.728319814447527e-06,
      "loss": 0.0153,
      "step": 1013040
    },
    {
      "epoch": 1.6578949091075719,
      "grad_norm": 0.7348264455795288,
      "learning_rate": 6.728253922234011e-06,
      "loss": 0.0124,
      "step": 1013060
    },
    {
      "epoch": 1.6579276395462252,
      "grad_norm": 0.3283303380012512,
      "learning_rate": 6.7281880300204925e-06,
      "loss": 0.0142,
      "step": 1013080
    },
    {
      "epoch": 1.6579603699848784,
      "grad_norm": 0.6005235910415649,
      "learning_rate": 6.728122137806976e-06,
      "loss": 0.0168,
      "step": 1013100
    },
    {
      "epoch": 1.657993100423532,
      "grad_norm": 0.9652106761932373,
      "learning_rate": 6.728056245593458e-06,
      "loss": 0.0193,
      "step": 1013120
    },
    {
      "epoch": 1.6580258308621851,
      "grad_norm": 0.7659894824028015,
      "learning_rate": 6.7279903533799416e-06,
      "loss": 0.0193,
      "step": 1013140
    },
    {
      "epoch": 1.6580585613008385,
      "grad_norm": 0.7748382687568665,
      "learning_rate": 6.727924461166424e-06,
      "loss": 0.0173,
      "step": 1013160
    },
    {
      "epoch": 1.6580912917394919,
      "grad_norm": 0.2795807719230652,
      "learning_rate": 6.727858568952907e-06,
      "loss": 0.0231,
      "step": 1013180
    },
    {
      "epoch": 1.6581240221781453,
      "grad_norm": 0.448611319065094,
      "learning_rate": 6.72779267673939e-06,
      "loss": 0.0151,
      "step": 1013200
    },
    {
      "epoch": 1.6581567526167986,
      "grad_norm": 0.5683870911598206,
      "learning_rate": 6.727726784525873e-06,
      "loss": 0.016,
      "step": 1013220
    },
    {
      "epoch": 1.6581894830554518,
      "grad_norm": 0.3807578682899475,
      "learning_rate": 6.727660892312357e-06,
      "loss": 0.0181,
      "step": 1013240
    },
    {
      "epoch": 1.6582222134941054,
      "grad_norm": 0.06422795355319977,
      "learning_rate": 6.727595000098839e-06,
      "loss": 0.0242,
      "step": 1013260
    },
    {
      "epoch": 1.6582549439327585,
      "grad_norm": 0.43481412529945374,
      "learning_rate": 6.727529107885322e-06,
      "loss": 0.0137,
      "step": 1013280
    },
    {
      "epoch": 1.658287674371412,
      "grad_norm": 0.23423504829406738,
      "learning_rate": 6.727463215671804e-06,
      "loss": 0.0211,
      "step": 1013300
    },
    {
      "epoch": 1.6583204048100653,
      "grad_norm": 1.0594242811203003,
      "learning_rate": 6.727397323458288e-06,
      "loss": 0.0182,
      "step": 1013320
    },
    {
      "epoch": 1.6583531352487186,
      "grad_norm": 1.2995007038116455,
      "learning_rate": 6.72733143124477e-06,
      "loss": 0.0182,
      "step": 1013340
    },
    {
      "epoch": 1.658385865687372,
      "grad_norm": 1.195922613143921,
      "learning_rate": 6.727265539031253e-06,
      "loss": 0.0178,
      "step": 1013360
    },
    {
      "epoch": 1.6584185961260252,
      "grad_norm": 0.36540645360946655,
      "learning_rate": 6.727199646817736e-06,
      "loss": 0.0218,
      "step": 1013380
    },
    {
      "epoch": 1.6584513265646788,
      "grad_norm": 0.9224050045013428,
      "learning_rate": 6.727133754604219e-06,
      "loss": 0.0143,
      "step": 1013400
    },
    {
      "epoch": 1.658484057003332,
      "grad_norm": 0.27838408946990967,
      "learning_rate": 6.727067862390702e-06,
      "loss": 0.0175,
      "step": 1013420
    },
    {
      "epoch": 1.6585167874419853,
      "grad_norm": 0.4148831367492676,
      "learning_rate": 6.727001970177185e-06,
      "loss": 0.0154,
      "step": 1013440
    },
    {
      "epoch": 1.6585495178806386,
      "grad_norm": 0.6252264380455017,
      "learning_rate": 6.726936077963667e-06,
      "loss": 0.0138,
      "step": 1013460
    },
    {
      "epoch": 1.658582248319292,
      "grad_norm": 0.03052697144448757,
      "learning_rate": 6.726870185750151e-06,
      "loss": 0.0147,
      "step": 1013480
    },
    {
      "epoch": 1.6586149787579454,
      "grad_norm": 0.7512702345848083,
      "learning_rate": 6.7268042935366326e-06,
      "loss": 0.0257,
      "step": 1013500
    },
    {
      "epoch": 1.6586477091965985,
      "grad_norm": 0.4227268695831299,
      "learning_rate": 6.726738401323116e-06,
      "loss": 0.0174,
      "step": 1013520
    },
    {
      "epoch": 1.6586804396352521,
      "grad_norm": 0.3415851891040802,
      "learning_rate": 6.726672509109599e-06,
      "loss": 0.0207,
      "step": 1013540
    },
    {
      "epoch": 1.6587131700739053,
      "grad_norm": 0.2645208239555359,
      "learning_rate": 6.726606616896082e-06,
      "loss": 0.0159,
      "step": 1013560
    },
    {
      "epoch": 1.6587459005125587,
      "grad_norm": 0.9952999353408813,
      "learning_rate": 6.726540724682565e-06,
      "loss": 0.0153,
      "step": 1013580
    },
    {
      "epoch": 1.658778630951212,
      "grad_norm": 0.41346409916877747,
      "learning_rate": 6.726474832469048e-06,
      "loss": 0.0124,
      "step": 1013600
    },
    {
      "epoch": 1.6588113613898654,
      "grad_norm": 0.5969096422195435,
      "learning_rate": 6.726408940255531e-06,
      "loss": 0.0217,
      "step": 1013620
    },
    {
      "epoch": 1.6588440918285188,
      "grad_norm": 0.2321695238351822,
      "learning_rate": 6.7263430480420134e-06,
      "loss": 0.0221,
      "step": 1013640
    },
    {
      "epoch": 1.658876822267172,
      "grad_norm": 0.16116391122341156,
      "learning_rate": 6.726277155828497e-06,
      "loss": 0.0192,
      "step": 1013660
    },
    {
      "epoch": 1.6589095527058255,
      "grad_norm": 0.366862416267395,
      "learning_rate": 6.726211263614979e-06,
      "loss": 0.0141,
      "step": 1013680
    },
    {
      "epoch": 1.6589422831444787,
      "grad_norm": 0.5071017742156982,
      "learning_rate": 6.7261453714014625e-06,
      "loss": 0.0161,
      "step": 1013700
    },
    {
      "epoch": 1.658975013583132,
      "grad_norm": 0.5966053605079651,
      "learning_rate": 6.726079479187944e-06,
      "loss": 0.0227,
      "step": 1013720
    },
    {
      "epoch": 1.6590077440217854,
      "grad_norm": 0.19136524200439453,
      "learning_rate": 6.726013586974428e-06,
      "loss": 0.0251,
      "step": 1013740
    },
    {
      "epoch": 1.6590404744604386,
      "grad_norm": 0.5641351342201233,
      "learning_rate": 6.725947694760911e-06,
      "loss": 0.0199,
      "step": 1013760
    },
    {
      "epoch": 1.6590732048990922,
      "grad_norm": 0.7794100046157837,
      "learning_rate": 6.7258818025473935e-06,
      "loss": 0.0217,
      "step": 1013780
    },
    {
      "epoch": 1.6591059353377453,
      "grad_norm": 0.2777853012084961,
      "learning_rate": 6.725815910333876e-06,
      "loss": 0.0166,
      "step": 1013800
    },
    {
      "epoch": 1.6591386657763987,
      "grad_norm": 0.3865659534931183,
      "learning_rate": 6.72575001812036e-06,
      "loss": 0.021,
      "step": 1013820
    },
    {
      "epoch": 1.659171396215052,
      "grad_norm": 0.2738426625728607,
      "learning_rate": 6.725684125906842e-06,
      "loss": 0.0239,
      "step": 1013840
    },
    {
      "epoch": 1.6592041266537054,
      "grad_norm": 0.1549411416053772,
      "learning_rate": 6.725618233693325e-06,
      "loss": 0.0189,
      "step": 1013860
    },
    {
      "epoch": 1.6592368570923588,
      "grad_norm": 0.15290798246860504,
      "learning_rate": 6.725552341479807e-06,
      "loss": 0.0166,
      "step": 1013880
    },
    {
      "epoch": 1.659269587531012,
      "grad_norm": 0.5951600074768066,
      "learning_rate": 6.725486449266291e-06,
      "loss": 0.0202,
      "step": 1013900
    },
    {
      "epoch": 1.6593023179696655,
      "grad_norm": 0.20827901363372803,
      "learning_rate": 6.7254205570527735e-06,
      "loss": 0.0205,
      "step": 1013920
    },
    {
      "epoch": 1.6593350484083187,
      "grad_norm": 0.7467314004898071,
      "learning_rate": 6.725354664839256e-06,
      "loss": 0.0164,
      "step": 1013940
    },
    {
      "epoch": 1.659367778846972,
      "grad_norm": 0.31828102469444275,
      "learning_rate": 6.72528877262574e-06,
      "loss": 0.0149,
      "step": 1013960
    },
    {
      "epoch": 1.6594005092856254,
      "grad_norm": 0.28992459177970886,
      "learning_rate": 6.7252228804122225e-06,
      "loss": 0.0188,
      "step": 1013980
    },
    {
      "epoch": 1.6594332397242788,
      "grad_norm": 0.32422178983688354,
      "learning_rate": 6.725156988198705e-06,
      "loss": 0.0113,
      "step": 1014000
    },
    {
      "epoch": 1.6594659701629322,
      "grad_norm": 0.2573879361152649,
      "learning_rate": 6.725091095985188e-06,
      "loss": 0.015,
      "step": 1014020
    },
    {
      "epoch": 1.6594987006015853,
      "grad_norm": 0.3564576804637909,
      "learning_rate": 6.725025203771672e-06,
      "loss": 0.0152,
      "step": 1014040
    },
    {
      "epoch": 1.659531431040239,
      "grad_norm": 1.1824049949645996,
      "learning_rate": 6.7249593115581535e-06,
      "loss": 0.0253,
      "step": 1014060
    },
    {
      "epoch": 1.659564161478892,
      "grad_norm": 0.8370019197463989,
      "learning_rate": 6.724893419344637e-06,
      "loss": 0.013,
      "step": 1014080
    },
    {
      "epoch": 1.6595968919175454,
      "grad_norm": 0.25422894954681396,
      "learning_rate": 6.724827527131119e-06,
      "loss": 0.0156,
      "step": 1014100
    },
    {
      "epoch": 1.6596296223561988,
      "grad_norm": 0.1383647918701172,
      "learning_rate": 6.7247616349176026e-06,
      "loss": 0.0151,
      "step": 1014120
    },
    {
      "epoch": 1.6596623527948522,
      "grad_norm": 0.6022377014160156,
      "learning_rate": 6.7246957427040845e-06,
      "loss": 0.026,
      "step": 1014140
    },
    {
      "epoch": 1.6596950832335056,
      "grad_norm": 0.6151330471038818,
      "learning_rate": 6.724629850490568e-06,
      "loss": 0.0123,
      "step": 1014160
    },
    {
      "epoch": 1.6597278136721587,
      "grad_norm": 0.8661783933639526,
      "learning_rate": 6.724563958277051e-06,
      "loss": 0.0159,
      "step": 1014180
    },
    {
      "epoch": 1.6597605441108123,
      "grad_norm": 0.5212995409965515,
      "learning_rate": 6.7244980660635335e-06,
      "loss": 0.0139,
      "step": 1014200
    },
    {
      "epoch": 1.6597932745494655,
      "grad_norm": 0.49732694029808044,
      "learning_rate": 6.724432173850016e-06,
      "loss": 0.0209,
      "step": 1014220
    },
    {
      "epoch": 1.6598260049881188,
      "grad_norm": 0.23448215425014496,
      "learning_rate": 6.7243662816365e-06,
      "loss": 0.0185,
      "step": 1014240
    },
    {
      "epoch": 1.6598587354267722,
      "grad_norm": 0.32466813921928406,
      "learning_rate": 6.724300389422982e-06,
      "loss": 0.014,
      "step": 1014260
    },
    {
      "epoch": 1.6598914658654256,
      "grad_norm": 2.0545196533203125,
      "learning_rate": 6.724234497209465e-06,
      "loss": 0.021,
      "step": 1014280
    },
    {
      "epoch": 1.659924196304079,
      "grad_norm": 0.48046576976776123,
      "learning_rate": 6.724168604995949e-06,
      "loss": 0.0184,
      "step": 1014300
    },
    {
      "epoch": 1.659956926742732,
      "grad_norm": 1.4930437803268433,
      "learning_rate": 6.724102712782431e-06,
      "loss": 0.0322,
      "step": 1014320
    },
    {
      "epoch": 1.6599896571813857,
      "grad_norm": 0.6792482137680054,
      "learning_rate": 6.724036820568914e-06,
      "loss": 0.014,
      "step": 1014340
    },
    {
      "epoch": 1.6600223876200388,
      "grad_norm": 0.19507981836795807,
      "learning_rate": 6.723970928355396e-06,
      "loss": 0.0192,
      "step": 1014360
    },
    {
      "epoch": 1.6600551180586922,
      "grad_norm": 0.9894164204597473,
      "learning_rate": 6.72390503614188e-06,
      "loss": 0.0186,
      "step": 1014380
    },
    {
      "epoch": 1.6600878484973456,
      "grad_norm": 2.1031301021575928,
      "learning_rate": 6.723839143928363e-06,
      "loss": 0.0266,
      "step": 1014400
    },
    {
      "epoch": 1.6601205789359987,
      "grad_norm": 0.2524774372577667,
      "learning_rate": 6.723773251714845e-06,
      "loss": 0.0195,
      "step": 1014420
    },
    {
      "epoch": 1.6601533093746523,
      "grad_norm": 0.5518348217010498,
      "learning_rate": 6.723707359501328e-06,
      "loss": 0.0165,
      "step": 1014440
    },
    {
      "epoch": 1.6601860398133055,
      "grad_norm": 0.6109904050827026,
      "learning_rate": 6.723641467287812e-06,
      "loss": 0.0279,
      "step": 1014460
    },
    {
      "epoch": 1.660218770251959,
      "grad_norm": 0.168178528547287,
      "learning_rate": 6.723575575074294e-06,
      "loss": 0.0108,
      "step": 1014480
    },
    {
      "epoch": 1.6602515006906122,
      "grad_norm": 1.4495218992233276,
      "learning_rate": 6.723509682860777e-06,
      "loss": 0.0156,
      "step": 1014500
    },
    {
      "epoch": 1.6602842311292656,
      "grad_norm": 0.36792829632759094,
      "learning_rate": 6.723443790647259e-06,
      "loss": 0.0178,
      "step": 1014520
    },
    {
      "epoch": 1.660316961567919,
      "grad_norm": 0.19380956888198853,
      "learning_rate": 6.723377898433743e-06,
      "loss": 0.0199,
      "step": 1014540
    },
    {
      "epoch": 1.6603496920065721,
      "grad_norm": 3.1877353191375732,
      "learning_rate": 6.723312006220225e-06,
      "loss": 0.0241,
      "step": 1014560
    },
    {
      "epoch": 1.6603824224452257,
      "grad_norm": 0.3552793562412262,
      "learning_rate": 6.723246114006708e-06,
      "loss": 0.0242,
      "step": 1014580
    },
    {
      "epoch": 1.6604151528838789,
      "grad_norm": 0.2605166435241699,
      "learning_rate": 6.723180221793191e-06,
      "loss": 0.0124,
      "step": 1014600
    },
    {
      "epoch": 1.6604478833225322,
      "grad_norm": 0.6628111600875854,
      "learning_rate": 6.7231143295796745e-06,
      "loss": 0.0226,
      "step": 1014620
    },
    {
      "epoch": 1.6604806137611856,
      "grad_norm": 0.05661846697330475,
      "learning_rate": 6.723048437366157e-06,
      "loss": 0.0172,
      "step": 1014640
    },
    {
      "epoch": 1.660513344199839,
      "grad_norm": 0.19762608408927917,
      "learning_rate": 6.72298254515264e-06,
      "loss": 0.0159,
      "step": 1014660
    },
    {
      "epoch": 1.6605460746384924,
      "grad_norm": 0.17592988908290863,
      "learning_rate": 6.7229166529391235e-06,
      "loss": 0.0139,
      "step": 1014680
    },
    {
      "epoch": 1.6605788050771455,
      "grad_norm": 0.4921784996986389,
      "learning_rate": 6.722850760725605e-06,
      "loss": 0.0145,
      "step": 1014700
    },
    {
      "epoch": 1.660611535515799,
      "grad_norm": 0.18287251889705658,
      "learning_rate": 6.722784868512089e-06,
      "loss": 0.0209,
      "step": 1014720
    },
    {
      "epoch": 1.6606442659544522,
      "grad_norm": 0.2979922890663147,
      "learning_rate": 6.722718976298571e-06,
      "loss": 0.0219,
      "step": 1014740
    },
    {
      "epoch": 1.6606769963931056,
      "grad_norm": 0.3871167004108429,
      "learning_rate": 6.7226530840850545e-06,
      "loss": 0.0196,
      "step": 1014760
    },
    {
      "epoch": 1.660709726831759,
      "grad_norm": 0.37091881036758423,
      "learning_rate": 6.722587191871537e-06,
      "loss": 0.016,
      "step": 1014780
    },
    {
      "epoch": 1.6607424572704124,
      "grad_norm": 0.612058162689209,
      "learning_rate": 6.72252129965802e-06,
      "loss": 0.0138,
      "step": 1014800
    },
    {
      "epoch": 1.6607751877090657,
      "grad_norm": 0.5793543457984924,
      "learning_rate": 6.722455407444503e-06,
      "loss": 0.0187,
      "step": 1014820
    },
    {
      "epoch": 1.6608079181477189,
      "grad_norm": 0.22522729635238647,
      "learning_rate": 6.722389515230986e-06,
      "loss": 0.0225,
      "step": 1014840
    },
    {
      "epoch": 1.6608406485863725,
      "grad_norm": 0.21292883157730103,
      "learning_rate": 6.722323623017468e-06,
      "loss": 0.0142,
      "step": 1014860
    },
    {
      "epoch": 1.6608733790250256,
      "grad_norm": 1.178566813468933,
      "learning_rate": 6.722257730803952e-06,
      "loss": 0.0171,
      "step": 1014880
    },
    {
      "epoch": 1.660906109463679,
      "grad_norm": 0.2563536465167999,
      "learning_rate": 6.722191838590434e-06,
      "loss": 0.025,
      "step": 1014900
    },
    {
      "epoch": 1.6609388399023324,
      "grad_norm": 0.37119734287261963,
      "learning_rate": 6.722125946376917e-06,
      "loss": 0.0181,
      "step": 1014920
    },
    {
      "epoch": 1.6609715703409857,
      "grad_norm": 0.44851428270339966,
      "learning_rate": 6.7220600541634e-06,
      "loss": 0.023,
      "step": 1014940
    },
    {
      "epoch": 1.6610043007796391,
      "grad_norm": 0.2555241882801056,
      "learning_rate": 6.721994161949883e-06,
      "loss": 0.0163,
      "step": 1014960
    },
    {
      "epoch": 1.6610370312182923,
      "grad_norm": 0.562281608581543,
      "learning_rate": 6.7219282697363655e-06,
      "loss": 0.0245,
      "step": 1014980
    },
    {
      "epoch": 1.6610697616569459,
      "grad_norm": 0.3894527852535248,
      "learning_rate": 6.721862377522849e-06,
      "loss": 0.0174,
      "step": 1015000
    },
    {
      "epoch": 1.661102492095599,
      "grad_norm": 1.5271732807159424,
      "learning_rate": 6.721796485309332e-06,
      "loss": 0.0257,
      "step": 1015020
    },
    {
      "epoch": 1.6611352225342524,
      "grad_norm": 0.4678103029727936,
      "learning_rate": 6.7217305930958145e-06,
      "loss": 0.0225,
      "step": 1015040
    },
    {
      "epoch": 1.6611679529729058,
      "grad_norm": 2.052391529083252,
      "learning_rate": 6.721664700882298e-06,
      "loss": 0.0171,
      "step": 1015060
    },
    {
      "epoch": 1.6612006834115591,
      "grad_norm": 1.5901497602462769,
      "learning_rate": 6.72159880866878e-06,
      "loss": 0.0176,
      "step": 1015080
    },
    {
      "epoch": 1.6612334138502125,
      "grad_norm": 0.5225986838340759,
      "learning_rate": 6.721532916455264e-06,
      "loss": 0.0211,
      "step": 1015100
    },
    {
      "epoch": 1.6612661442888657,
      "grad_norm": 0.3033243715763092,
      "learning_rate": 6.7214670242417455e-06,
      "loss": 0.0201,
      "step": 1015120
    },
    {
      "epoch": 1.6612988747275192,
      "grad_norm": 0.9521214962005615,
      "learning_rate": 6.721401132028229e-06,
      "loss": 0.0121,
      "step": 1015140
    },
    {
      "epoch": 1.6613316051661724,
      "grad_norm": 0.3077060282230377,
      "learning_rate": 6.721335239814711e-06,
      "loss": 0.0187,
      "step": 1015160
    },
    {
      "epoch": 1.6613643356048258,
      "grad_norm": 0.24035416543483734,
      "learning_rate": 6.7212693476011946e-06,
      "loss": 0.0174,
      "step": 1015180
    },
    {
      "epoch": 1.6613970660434791,
      "grad_norm": 0.651399552822113,
      "learning_rate": 6.721203455387677e-06,
      "loss": 0.0274,
      "step": 1015200
    },
    {
      "epoch": 1.6614297964821323,
      "grad_norm": 0.7936351895332336,
      "learning_rate": 6.72113756317416e-06,
      "loss": 0.0156,
      "step": 1015220
    },
    {
      "epoch": 1.6614625269207859,
      "grad_norm": 0.40932750701904297,
      "learning_rate": 6.721071670960643e-06,
      "loss": 0.0145,
      "step": 1015240
    },
    {
      "epoch": 1.661495257359439,
      "grad_norm": 3.2819876670837402,
      "learning_rate": 6.721005778747126e-06,
      "loss": 0.0205,
      "step": 1015260
    },
    {
      "epoch": 1.6615279877980926,
      "grad_norm": 0.21654336154460907,
      "learning_rate": 6.720939886533608e-06,
      "loss": 0.0225,
      "step": 1015280
    },
    {
      "epoch": 1.6615607182367458,
      "grad_norm": 0.22978396713733673,
      "learning_rate": 6.720873994320092e-06,
      "loss": 0.0167,
      "step": 1015300
    },
    {
      "epoch": 1.6615934486753992,
      "grad_norm": 0.41758832335472107,
      "learning_rate": 6.720808102106574e-06,
      "loss": 0.0217,
      "step": 1015320
    },
    {
      "epoch": 1.6616261791140525,
      "grad_norm": 0.17979755997657776,
      "learning_rate": 6.720742209893057e-06,
      "loss": 0.0186,
      "step": 1015340
    },
    {
      "epoch": 1.6616589095527057,
      "grad_norm": 0.6606753468513489,
      "learning_rate": 6.720676317679541e-06,
      "loss": 0.0191,
      "step": 1015360
    },
    {
      "epoch": 1.6616916399913593,
      "grad_norm": 0.474567174911499,
      "learning_rate": 6.720610425466023e-06,
      "loss": 0.0234,
      "step": 1015380
    },
    {
      "epoch": 1.6617243704300124,
      "grad_norm": 0.8587796688079834,
      "learning_rate": 6.720544533252506e-06,
      "loss": 0.0265,
      "step": 1015400
    },
    {
      "epoch": 1.6617571008686658,
      "grad_norm": 0.2185918241739273,
      "learning_rate": 6.720478641038989e-06,
      "loss": 0.0243,
      "step": 1015420
    },
    {
      "epoch": 1.6617898313073192,
      "grad_norm": 0.8314565420150757,
      "learning_rate": 6.720412748825472e-06,
      "loss": 0.0186,
      "step": 1015440
    },
    {
      "epoch": 1.6618225617459725,
      "grad_norm": 1.0744829177856445,
      "learning_rate": 6.720346856611955e-06,
      "loss": 0.0163,
      "step": 1015460
    },
    {
      "epoch": 1.661855292184626,
      "grad_norm": 0.5379252433776855,
      "learning_rate": 6.720280964398438e-06,
      "loss": 0.0185,
      "step": 1015480
    },
    {
      "epoch": 1.661888022623279,
      "grad_norm": 0.19177298247814178,
      "learning_rate": 6.72021507218492e-06,
      "loss": 0.0169,
      "step": 1015500
    },
    {
      "epoch": 1.6619207530619327,
      "grad_norm": 0.8992465138435364,
      "learning_rate": 6.720149179971404e-06,
      "loss": 0.0233,
      "step": 1015520
    },
    {
      "epoch": 1.6619534835005858,
      "grad_norm": 0.1337231546640396,
      "learning_rate": 6.7200832877578856e-06,
      "loss": 0.0129,
      "step": 1015540
    },
    {
      "epoch": 1.6619862139392392,
      "grad_norm": 0.8963717222213745,
      "learning_rate": 6.720017395544369e-06,
      "loss": 0.027,
      "step": 1015560
    },
    {
      "epoch": 1.6620189443778925,
      "grad_norm": 0.18462087213993073,
      "learning_rate": 6.719951503330852e-06,
      "loss": 0.016,
      "step": 1015580
    },
    {
      "epoch": 1.662051674816546,
      "grad_norm": 0.270018070936203,
      "learning_rate": 6.719885611117335e-06,
      "loss": 0.0176,
      "step": 1015600
    },
    {
      "epoch": 1.6620844052551993,
      "grad_norm": 1.0471243858337402,
      "learning_rate": 6.719819718903817e-06,
      "loss": 0.022,
      "step": 1015620
    },
    {
      "epoch": 1.6621171356938524,
      "grad_norm": 0.5594004392623901,
      "learning_rate": 6.719753826690301e-06,
      "loss": 0.0206,
      "step": 1015640
    },
    {
      "epoch": 1.662149866132506,
      "grad_norm": 0.31297969818115234,
      "learning_rate": 6.719687934476783e-06,
      "loss": 0.0231,
      "step": 1015660
    },
    {
      "epoch": 1.6621825965711592,
      "grad_norm": 2.395190715789795,
      "learning_rate": 6.7196220422632664e-06,
      "loss": 0.0252,
      "step": 1015680
    },
    {
      "epoch": 1.6622153270098126,
      "grad_norm": 1.6739284992218018,
      "learning_rate": 6.71955615004975e-06,
      "loss": 0.0217,
      "step": 1015700
    },
    {
      "epoch": 1.662248057448466,
      "grad_norm": 0.3864611089229584,
      "learning_rate": 6.719490257836232e-06,
      "loss": 0.0178,
      "step": 1015720
    },
    {
      "epoch": 1.6622807878871193,
      "grad_norm": 0.46148058772087097,
      "learning_rate": 6.7194243656227155e-06,
      "loss": 0.0188,
      "step": 1015740
    },
    {
      "epoch": 1.6623135183257727,
      "grad_norm": 0.5115934610366821,
      "learning_rate": 6.719358473409197e-06,
      "loss": 0.0184,
      "step": 1015760
    },
    {
      "epoch": 1.6623462487644258,
      "grad_norm": 0.8106421232223511,
      "learning_rate": 6.719292581195681e-06,
      "loss": 0.0174,
      "step": 1015780
    },
    {
      "epoch": 1.6623789792030794,
      "grad_norm": 0.271838903427124,
      "learning_rate": 6.719226688982164e-06,
      "loss": 0.017,
      "step": 1015800
    },
    {
      "epoch": 1.6624117096417326,
      "grad_norm": 0.9884291291236877,
      "learning_rate": 6.7191607967686465e-06,
      "loss": 0.0216,
      "step": 1015820
    },
    {
      "epoch": 1.662444440080386,
      "grad_norm": 1.7690846920013428,
      "learning_rate": 6.719094904555129e-06,
      "loss": 0.0144,
      "step": 1015840
    },
    {
      "epoch": 1.6624771705190393,
      "grad_norm": 0.24077387154102325,
      "learning_rate": 6.719029012341613e-06,
      "loss": 0.0186,
      "step": 1015860
    },
    {
      "epoch": 1.6625099009576927,
      "grad_norm": 0.6736788153648376,
      "learning_rate": 6.718963120128095e-06,
      "loss": 0.0231,
      "step": 1015880
    },
    {
      "epoch": 1.662542631396346,
      "grad_norm": 0.7786415815353394,
      "learning_rate": 6.718897227914578e-06,
      "loss": 0.0164,
      "step": 1015900
    },
    {
      "epoch": 1.6625753618349992,
      "grad_norm": 0.15845254063606262,
      "learning_rate": 6.71883133570106e-06,
      "loss": 0.0221,
      "step": 1015920
    },
    {
      "epoch": 1.6626080922736528,
      "grad_norm": 0.8766049742698669,
      "learning_rate": 6.718765443487544e-06,
      "loss": 0.0145,
      "step": 1015940
    },
    {
      "epoch": 1.662640822712306,
      "grad_norm": 0.33949387073516846,
      "learning_rate": 6.718699551274026e-06,
      "loss": 0.0266,
      "step": 1015960
    },
    {
      "epoch": 1.6626735531509593,
      "grad_norm": 0.39556413888931274,
      "learning_rate": 6.718633659060509e-06,
      "loss": 0.0222,
      "step": 1015980
    },
    {
      "epoch": 1.6627062835896127,
      "grad_norm": 0.11047183722257614,
      "learning_rate": 6.718567766846992e-06,
      "loss": 0.021,
      "step": 1016000
    },
    {
      "epoch": 1.6627390140282658,
      "grad_norm": 0.3363153636455536,
      "learning_rate": 6.7185018746334756e-06,
      "loss": 0.0173,
      "step": 1016020
    },
    {
      "epoch": 1.6627717444669194,
      "grad_norm": 1.0966891050338745,
      "learning_rate": 6.718435982419958e-06,
      "loss": 0.0175,
      "step": 1016040
    },
    {
      "epoch": 1.6628044749055726,
      "grad_norm": 0.49327781796455383,
      "learning_rate": 6.718370090206441e-06,
      "loss": 0.0159,
      "step": 1016060
    },
    {
      "epoch": 1.6628372053442262,
      "grad_norm": 1.68894624710083,
      "learning_rate": 6.718304197992925e-06,
      "loss": 0.0215,
      "step": 1016080
    },
    {
      "epoch": 1.6628699357828793,
      "grad_norm": 0.45675623416900635,
      "learning_rate": 6.7182383057794065e-06,
      "loss": 0.0182,
      "step": 1016100
    },
    {
      "epoch": 1.6629026662215327,
      "grad_norm": 0.48675817251205444,
      "learning_rate": 6.71817241356589e-06,
      "loss": 0.0207,
      "step": 1016120
    },
    {
      "epoch": 1.662935396660186,
      "grad_norm": 0.8186332583427429,
      "learning_rate": 6.718106521352372e-06,
      "loss": 0.0266,
      "step": 1016140
    },
    {
      "epoch": 1.6629681270988392,
      "grad_norm": 0.6098966598510742,
      "learning_rate": 6.718040629138856e-06,
      "loss": 0.022,
      "step": 1016160
    },
    {
      "epoch": 1.6630008575374928,
      "grad_norm": 0.5652288198471069,
      "learning_rate": 6.7179747369253375e-06,
      "loss": 0.0257,
      "step": 1016180
    },
    {
      "epoch": 1.663033587976146,
      "grad_norm": 0.13378535211086273,
      "learning_rate": 6.717908844711821e-06,
      "loss": 0.0175,
      "step": 1016200
    },
    {
      "epoch": 1.6630663184147993,
      "grad_norm": 0.809081494808197,
      "learning_rate": 6.717842952498304e-06,
      "loss": 0.0148,
      "step": 1016220
    },
    {
      "epoch": 1.6630990488534527,
      "grad_norm": 1.1437187194824219,
      "learning_rate": 6.7177770602847865e-06,
      "loss": 0.0247,
      "step": 1016240
    },
    {
      "epoch": 1.663131779292106,
      "grad_norm": 0.4143622815608978,
      "learning_rate": 6.717711168071269e-06,
      "loss": 0.0267,
      "step": 1016260
    },
    {
      "epoch": 1.6631645097307595,
      "grad_norm": 1.5303620100021362,
      "learning_rate": 6.717645275857753e-06,
      "loss": 0.0174,
      "step": 1016280
    },
    {
      "epoch": 1.6631972401694126,
      "grad_norm": 1.2806049585342407,
      "learning_rate": 6.717579383644235e-06,
      "loss": 0.0202,
      "step": 1016300
    },
    {
      "epoch": 1.6632299706080662,
      "grad_norm": 1.3398120403289795,
      "learning_rate": 6.717513491430718e-06,
      "loss": 0.0198,
      "step": 1016320
    },
    {
      "epoch": 1.6632627010467194,
      "grad_norm": 1.0861915349960327,
      "learning_rate": 6.7174475992172e-06,
      "loss": 0.0218,
      "step": 1016340
    },
    {
      "epoch": 1.6632954314853727,
      "grad_norm": 0.6709402799606323,
      "learning_rate": 6.717381707003684e-06,
      "loss": 0.0145,
      "step": 1016360
    },
    {
      "epoch": 1.663328161924026,
      "grad_norm": 0.2972511947154999,
      "learning_rate": 6.7173158147901666e-06,
      "loss": 0.0166,
      "step": 1016380
    },
    {
      "epoch": 1.6633608923626795,
      "grad_norm": 0.5352423787117004,
      "learning_rate": 6.717249922576649e-06,
      "loss": 0.0171,
      "step": 1016400
    },
    {
      "epoch": 1.6633936228013328,
      "grad_norm": 0.8131283521652222,
      "learning_rate": 6.717184030363133e-06,
      "loss": 0.0149,
      "step": 1016420
    },
    {
      "epoch": 1.663426353239986,
      "grad_norm": 0.4271624982357025,
      "learning_rate": 6.717118138149616e-06,
      "loss": 0.0136,
      "step": 1016440
    },
    {
      "epoch": 1.6634590836786396,
      "grad_norm": 1.1128337383270264,
      "learning_rate": 6.717052245936098e-06,
      "loss": 0.0134,
      "step": 1016460
    },
    {
      "epoch": 1.6634918141172927,
      "grad_norm": 0.757548451423645,
      "learning_rate": 6.716986353722581e-06,
      "loss": 0.017,
      "step": 1016480
    },
    {
      "epoch": 1.6635245445559461,
      "grad_norm": 0.12084004282951355,
      "learning_rate": 6.716920461509065e-06,
      "loss": 0.0219,
      "step": 1016500
    },
    {
      "epoch": 1.6635572749945995,
      "grad_norm": 0.3941775858402252,
      "learning_rate": 6.716854569295547e-06,
      "loss": 0.0169,
      "step": 1016520
    },
    {
      "epoch": 1.6635900054332529,
      "grad_norm": 1.342850685119629,
      "learning_rate": 6.71678867708203e-06,
      "loss": 0.0156,
      "step": 1016540
    },
    {
      "epoch": 1.6636227358719062,
      "grad_norm": 0.15771619975566864,
      "learning_rate": 6.716722784868512e-06,
      "loss": 0.0201,
      "step": 1016560
    },
    {
      "epoch": 1.6636554663105594,
      "grad_norm": 0.2895044684410095,
      "learning_rate": 6.716656892654996e-06,
      "loss": 0.0225,
      "step": 1016580
    },
    {
      "epoch": 1.663688196749213,
      "grad_norm": 0.188773050904274,
      "learning_rate": 6.716591000441478e-06,
      "loss": 0.0264,
      "step": 1016600
    },
    {
      "epoch": 1.6637209271878661,
      "grad_norm": 0.24689838290214539,
      "learning_rate": 6.716525108227961e-06,
      "loss": 0.0196,
      "step": 1016620
    },
    {
      "epoch": 1.6637536576265195,
      "grad_norm": 0.5659186244010925,
      "learning_rate": 6.716459216014444e-06,
      "loss": 0.0176,
      "step": 1016640
    },
    {
      "epoch": 1.6637863880651729,
      "grad_norm": 0.4963098466396332,
      "learning_rate": 6.7163933238009275e-06,
      "loss": 0.0186,
      "step": 1016660
    },
    {
      "epoch": 1.663819118503826,
      "grad_norm": 0.5396426916122437,
      "learning_rate": 6.716327431587409e-06,
      "loss": 0.0148,
      "step": 1016680
    },
    {
      "epoch": 1.6638518489424796,
      "grad_norm": 0.20636849105358124,
      "learning_rate": 6.716261539373893e-06,
      "loss": 0.018,
      "step": 1016700
    },
    {
      "epoch": 1.6638845793811328,
      "grad_norm": 0.7118290066719055,
      "learning_rate": 6.716195647160375e-06,
      "loss": 0.0247,
      "step": 1016720
    },
    {
      "epoch": 1.6639173098197864,
      "grad_norm": 0.526981770992279,
      "learning_rate": 6.716129754946858e-06,
      "loss": 0.0219,
      "step": 1016740
    },
    {
      "epoch": 1.6639500402584395,
      "grad_norm": 0.6651606559753418,
      "learning_rate": 6.716063862733342e-06,
      "loss": 0.0205,
      "step": 1016760
    },
    {
      "epoch": 1.6639827706970929,
      "grad_norm": 0.3429310917854309,
      "learning_rate": 6.715997970519824e-06,
      "loss": 0.0176,
      "step": 1016780
    },
    {
      "epoch": 1.6640155011357463,
      "grad_norm": 0.6359017491340637,
      "learning_rate": 6.7159320783063075e-06,
      "loss": 0.0181,
      "step": 1016800
    },
    {
      "epoch": 1.6640482315743994,
      "grad_norm": 0.4647483825683594,
      "learning_rate": 6.71586618609279e-06,
      "loss": 0.0173,
      "step": 1016820
    },
    {
      "epoch": 1.664080962013053,
      "grad_norm": 0.8760517239570618,
      "learning_rate": 6.715800293879273e-06,
      "loss": 0.0251,
      "step": 1016840
    },
    {
      "epoch": 1.6641136924517062,
      "grad_norm": 0.7953943610191345,
      "learning_rate": 6.715734401665756e-06,
      "loss": 0.0205,
      "step": 1016860
    },
    {
      "epoch": 1.6641464228903595,
      "grad_norm": 1.0169627666473389,
      "learning_rate": 6.715668509452239e-06,
      "loss": 0.0232,
      "step": 1016880
    },
    {
      "epoch": 1.664179153329013,
      "grad_norm": 1.0797244310379028,
      "learning_rate": 6.715602617238721e-06,
      "loss": 0.0187,
      "step": 1016900
    },
    {
      "epoch": 1.6642118837676663,
      "grad_norm": 1.5178865194320679,
      "learning_rate": 6.715536725025205e-06,
      "loss": 0.0258,
      "step": 1016920
    },
    {
      "epoch": 1.6642446142063196,
      "grad_norm": 0.16742362082004547,
      "learning_rate": 6.715470832811687e-06,
      "loss": 0.0385,
      "step": 1016940
    },
    {
      "epoch": 1.6642773446449728,
      "grad_norm": 1.5215901136398315,
      "learning_rate": 6.71540494059817e-06,
      "loss": 0.0216,
      "step": 1016960
    },
    {
      "epoch": 1.6643100750836264,
      "grad_norm": 0.6402420997619629,
      "learning_rate": 6.715339048384652e-06,
      "loss": 0.0177,
      "step": 1016980
    },
    {
      "epoch": 1.6643428055222795,
      "grad_norm": 0.6363875865936279,
      "learning_rate": 6.715273156171136e-06,
      "loss": 0.0127,
      "step": 1017000
    },
    {
      "epoch": 1.664375535960933,
      "grad_norm": 0.7026151418685913,
      "learning_rate": 6.7152072639576185e-06,
      "loss": 0.0179,
      "step": 1017020
    },
    {
      "epoch": 1.6644082663995863,
      "grad_norm": 0.480292946100235,
      "learning_rate": 6.715141371744101e-06,
      "loss": 0.0153,
      "step": 1017040
    },
    {
      "epoch": 1.6644409968382397,
      "grad_norm": 0.753088653087616,
      "learning_rate": 6.715075479530584e-06,
      "loss": 0.0189,
      "step": 1017060
    },
    {
      "epoch": 1.664473727276893,
      "grad_norm": 0.3184185028076172,
      "learning_rate": 6.7150095873170675e-06,
      "loss": 0.0137,
      "step": 1017080
    },
    {
      "epoch": 1.6645064577155462,
      "grad_norm": 0.4849756360054016,
      "learning_rate": 6.714943695103551e-06,
      "loss": 0.0201,
      "step": 1017100
    },
    {
      "epoch": 1.6645391881541998,
      "grad_norm": 1.5037953853607178,
      "learning_rate": 6.714877802890033e-06,
      "loss": 0.0157,
      "step": 1017120
    },
    {
      "epoch": 1.664571918592853,
      "grad_norm": 0.20751385390758514,
      "learning_rate": 6.714811910676517e-06,
      "loss": 0.0159,
      "step": 1017140
    },
    {
      "epoch": 1.6646046490315063,
      "grad_norm": 0.6010164618492126,
      "learning_rate": 6.7147460184629985e-06,
      "loss": 0.0179,
      "step": 1017160
    },
    {
      "epoch": 1.6646373794701597,
      "grad_norm": 0.5717694759368896,
      "learning_rate": 6.714680126249482e-06,
      "loss": 0.0207,
      "step": 1017180
    },
    {
      "epoch": 1.664670109908813,
      "grad_norm": 0.4662921726703644,
      "learning_rate": 6.714614234035964e-06,
      "loss": 0.018,
      "step": 1017200
    },
    {
      "epoch": 1.6647028403474664,
      "grad_norm": 0.189180389046669,
      "learning_rate": 6.7145483418224476e-06,
      "loss": 0.0248,
      "step": 1017220
    },
    {
      "epoch": 1.6647355707861196,
      "grad_norm": 0.6275030970573425,
      "learning_rate": 6.71448244960893e-06,
      "loss": 0.0206,
      "step": 1017240
    },
    {
      "epoch": 1.6647683012247732,
      "grad_norm": 0.2095547914505005,
      "learning_rate": 6.714416557395413e-06,
      "loss": 0.0203,
      "step": 1017260
    },
    {
      "epoch": 1.6648010316634263,
      "grad_norm": 0.33283424377441406,
      "learning_rate": 6.714350665181896e-06,
      "loss": 0.0146,
      "step": 1017280
    },
    {
      "epoch": 1.6648337621020797,
      "grad_norm": 0.5592359304428101,
      "learning_rate": 6.714284772968379e-06,
      "loss": 0.0219,
      "step": 1017300
    },
    {
      "epoch": 1.664866492540733,
      "grad_norm": 0.5749767422676086,
      "learning_rate": 6.714218880754861e-06,
      "loss": 0.0198,
      "step": 1017320
    },
    {
      "epoch": 1.6648992229793864,
      "grad_norm": 0.5873318314552307,
      "learning_rate": 6.714152988541345e-06,
      "loss": 0.0216,
      "step": 1017340
    },
    {
      "epoch": 1.6649319534180398,
      "grad_norm": 0.7339425683021545,
      "learning_rate": 6.714087096327827e-06,
      "loss": 0.0199,
      "step": 1017360
    },
    {
      "epoch": 1.664964683856693,
      "grad_norm": 0.651629626750946,
      "learning_rate": 6.71402120411431e-06,
      "loss": 0.0172,
      "step": 1017380
    },
    {
      "epoch": 1.6649974142953465,
      "grad_norm": 0.6070908308029175,
      "learning_rate": 6.713955311900793e-06,
      "loss": 0.0125,
      "step": 1017400
    },
    {
      "epoch": 1.6650301447339997,
      "grad_norm": 0.16155435144901276,
      "learning_rate": 6.713889419687276e-06,
      "loss": 0.0211,
      "step": 1017420
    },
    {
      "epoch": 1.665062875172653,
      "grad_norm": 0.7086291313171387,
      "learning_rate": 6.7138235274737585e-06,
      "loss": 0.0238,
      "step": 1017440
    },
    {
      "epoch": 1.6650956056113064,
      "grad_norm": 0.7162829637527466,
      "learning_rate": 6.713757635260242e-06,
      "loss": 0.014,
      "step": 1017460
    },
    {
      "epoch": 1.6651283360499596,
      "grad_norm": 0.4080478250980377,
      "learning_rate": 6.713691743046725e-06,
      "loss": 0.0207,
      "step": 1017480
    },
    {
      "epoch": 1.6651610664886132,
      "grad_norm": 0.7085098028182983,
      "learning_rate": 6.713625850833208e-06,
      "loss": 0.016,
      "step": 1017500
    },
    {
      "epoch": 1.6651937969272663,
      "grad_norm": 0.7262420654296875,
      "learning_rate": 6.713559958619691e-06,
      "loss": 0.0136,
      "step": 1017520
    },
    {
      "epoch": 1.66522652736592,
      "grad_norm": 1.0835500955581665,
      "learning_rate": 6.713494066406173e-06,
      "loss": 0.0154,
      "step": 1017540
    },
    {
      "epoch": 1.665259257804573,
      "grad_norm": 0.6460955739021301,
      "learning_rate": 6.713428174192657e-06,
      "loss": 0.0158,
      "step": 1017560
    },
    {
      "epoch": 1.6652919882432264,
      "grad_norm": 0.21660836040973663,
      "learning_rate": 6.7133622819791386e-06,
      "loss": 0.0172,
      "step": 1017580
    },
    {
      "epoch": 1.6653247186818798,
      "grad_norm": 0.37275612354278564,
      "learning_rate": 6.713296389765622e-06,
      "loss": 0.0161,
      "step": 1017600
    },
    {
      "epoch": 1.665357449120533,
      "grad_norm": 0.3898562490940094,
      "learning_rate": 6.713230497552105e-06,
      "loss": 0.0265,
      "step": 1017620
    },
    {
      "epoch": 1.6653901795591866,
      "grad_norm": 0.19354091584682465,
      "learning_rate": 6.713164605338588e-06,
      "loss": 0.0158,
      "step": 1017640
    },
    {
      "epoch": 1.6654229099978397,
      "grad_norm": 0.5232540965080261,
      "learning_rate": 6.71309871312507e-06,
      "loss": 0.0219,
      "step": 1017660
    },
    {
      "epoch": 1.665455640436493,
      "grad_norm": 0.7076267600059509,
      "learning_rate": 6.713032820911554e-06,
      "loss": 0.0225,
      "step": 1017680
    },
    {
      "epoch": 1.6654883708751465,
      "grad_norm": 0.21702302992343903,
      "learning_rate": 6.712966928698036e-06,
      "loss": 0.0136,
      "step": 1017700
    },
    {
      "epoch": 1.6655211013137998,
      "grad_norm": 0.9839608073234558,
      "learning_rate": 6.7129010364845194e-06,
      "loss": 0.0182,
      "step": 1017720
    },
    {
      "epoch": 1.6655538317524532,
      "grad_norm": 0.32781538367271423,
      "learning_rate": 6.712835144271001e-06,
      "loss": 0.0259,
      "step": 1017740
    },
    {
      "epoch": 1.6655865621911063,
      "grad_norm": 0.2825092375278473,
      "learning_rate": 6.712769252057485e-06,
      "loss": 0.019,
      "step": 1017760
    },
    {
      "epoch": 1.66561929262976,
      "grad_norm": 0.18888190388679504,
      "learning_rate": 6.712703359843968e-06,
      "loss": 0.0151,
      "step": 1017780
    },
    {
      "epoch": 1.665652023068413,
      "grad_norm": 0.7907071709632874,
      "learning_rate": 6.71263746763045e-06,
      "loss": 0.0191,
      "step": 1017800
    },
    {
      "epoch": 1.6656847535070665,
      "grad_norm": 0.21799908578395844,
      "learning_rate": 6.712571575416934e-06,
      "loss": 0.0134,
      "step": 1017820
    },
    {
      "epoch": 1.6657174839457198,
      "grad_norm": 0.517121434211731,
      "learning_rate": 6.712505683203417e-06,
      "loss": 0.0187,
      "step": 1017840
    },
    {
      "epoch": 1.6657502143843732,
      "grad_norm": 1.141988754272461,
      "learning_rate": 6.7124397909898995e-06,
      "loss": 0.0184,
      "step": 1017860
    },
    {
      "epoch": 1.6657829448230266,
      "grad_norm": 0.09467163681983948,
      "learning_rate": 6.712373898776382e-06,
      "loss": 0.0273,
      "step": 1017880
    },
    {
      "epoch": 1.6658156752616797,
      "grad_norm": 1.1594642400741577,
      "learning_rate": 6.712308006562866e-06,
      "loss": 0.0212,
      "step": 1017900
    },
    {
      "epoch": 1.6658484057003333,
      "grad_norm": 6.9530029296875,
      "learning_rate": 6.712242114349348e-06,
      "loss": 0.0187,
      "step": 1017920
    },
    {
      "epoch": 1.6658811361389865,
      "grad_norm": 0.5433357954025269,
      "learning_rate": 6.712176222135831e-06,
      "loss": 0.0235,
      "step": 1017940
    },
    {
      "epoch": 1.6659138665776398,
      "grad_norm": 0.3188897967338562,
      "learning_rate": 6.712110329922313e-06,
      "loss": 0.0205,
      "step": 1017960
    },
    {
      "epoch": 1.6659465970162932,
      "grad_norm": 1.0600008964538574,
      "learning_rate": 6.712044437708797e-06,
      "loss": 0.0191,
      "step": 1017980
    },
    {
      "epoch": 1.6659793274549466,
      "grad_norm": 2.9682061672210693,
      "learning_rate": 6.711978545495279e-06,
      "loss": 0.0208,
      "step": 1018000
    },
    {
      "epoch": 1.6660120578936,
      "grad_norm": 1.2000389099121094,
      "learning_rate": 6.711912653281762e-06,
      "loss": 0.0175,
      "step": 1018020
    },
    {
      "epoch": 1.6660447883322531,
      "grad_norm": 0.3764890134334564,
      "learning_rate": 6.711846761068245e-06,
      "loss": 0.0289,
      "step": 1018040
    },
    {
      "epoch": 1.6660775187709067,
      "grad_norm": 0.16492432355880737,
      "learning_rate": 6.711780868854728e-06,
      "loss": 0.0166,
      "step": 1018060
    },
    {
      "epoch": 1.6661102492095599,
      "grad_norm": 0.3822392225265503,
      "learning_rate": 6.7117149766412105e-06,
      "loss": 0.0243,
      "step": 1018080
    },
    {
      "epoch": 1.6661429796482132,
      "grad_norm": 0.21882466971874237,
      "learning_rate": 6.711649084427694e-06,
      "loss": 0.0168,
      "step": 1018100
    },
    {
      "epoch": 1.6661757100868666,
      "grad_norm": 0.5113095045089722,
      "learning_rate": 6.711583192214176e-06,
      "loss": 0.0213,
      "step": 1018120
    },
    {
      "epoch": 1.66620844052552,
      "grad_norm": 0.4439423978328705,
      "learning_rate": 6.7115173000006595e-06,
      "loss": 0.0157,
      "step": 1018140
    },
    {
      "epoch": 1.6662411709641733,
      "grad_norm": 0.1677078753709793,
      "learning_rate": 6.711451407787143e-06,
      "loss": 0.02,
      "step": 1018160
    },
    {
      "epoch": 1.6662739014028265,
      "grad_norm": 0.8321513533592224,
      "learning_rate": 6.711385515573625e-06,
      "loss": 0.0225,
      "step": 1018180
    },
    {
      "epoch": 1.66630663184148,
      "grad_norm": 1.503859281539917,
      "learning_rate": 6.711319623360109e-06,
      "loss": 0.022,
      "step": 1018200
    },
    {
      "epoch": 1.6663393622801332,
      "grad_norm": 1.4440879821777344,
      "learning_rate": 6.7112537311465905e-06,
      "loss": 0.0249,
      "step": 1018220
    },
    {
      "epoch": 1.6663720927187866,
      "grad_norm": 0.8322733640670776,
      "learning_rate": 6.711187838933074e-06,
      "loss": 0.0266,
      "step": 1018240
    },
    {
      "epoch": 1.66640482315744,
      "grad_norm": 0.3060293197631836,
      "learning_rate": 6.711121946719557e-06,
      "loss": 0.0161,
      "step": 1018260
    },
    {
      "epoch": 1.6664375535960931,
      "grad_norm": 0.270679235458374,
      "learning_rate": 6.7110560545060395e-06,
      "loss": 0.0207,
      "step": 1018280
    },
    {
      "epoch": 1.6664702840347467,
      "grad_norm": 1.0212260484695435,
      "learning_rate": 6.710990162292522e-06,
      "loss": 0.0194,
      "step": 1018300
    },
    {
      "epoch": 1.6665030144733999,
      "grad_norm": 0.7525061368942261,
      "learning_rate": 6.710924270079006e-06,
      "loss": 0.0227,
      "step": 1018320
    },
    {
      "epoch": 1.6665357449120535,
      "grad_norm": 0.20984555780887604,
      "learning_rate": 6.710858377865488e-06,
      "loss": 0.0154,
      "step": 1018340
    },
    {
      "epoch": 1.6665684753507066,
      "grad_norm": 0.820263147354126,
      "learning_rate": 6.710792485651971e-06,
      "loss": 0.0177,
      "step": 1018360
    },
    {
      "epoch": 1.66660120578936,
      "grad_norm": 0.315799742937088,
      "learning_rate": 6.710726593438453e-06,
      "loss": 0.0212,
      "step": 1018380
    },
    {
      "epoch": 1.6666339362280134,
      "grad_norm": 0.6835719347000122,
      "learning_rate": 6.710660701224937e-06,
      "loss": 0.0138,
      "step": 1018400
    },
    {
      "epoch": 1.6666666666666665,
      "grad_norm": 0.6545594930648804,
      "learning_rate": 6.7105948090114196e-06,
      "loss": 0.0287,
      "step": 1018420
    },
    {
      "epoch": 1.6666993971053201,
      "grad_norm": 0.7461503148078918,
      "learning_rate": 6.710528916797902e-06,
      "loss": 0.0203,
      "step": 1018440
    },
    {
      "epoch": 1.6667321275439733,
      "grad_norm": 0.5221815705299377,
      "learning_rate": 6.710463024584385e-06,
      "loss": 0.0164,
      "step": 1018460
    },
    {
      "epoch": 1.6667648579826266,
      "grad_norm": 0.2836460471153259,
      "learning_rate": 6.710397132370869e-06,
      "loss": 0.0179,
      "step": 1018480
    },
    {
      "epoch": 1.66679758842128,
      "grad_norm": 0.42815208435058594,
      "learning_rate": 6.7103312401573505e-06,
      "loss": 0.0235,
      "step": 1018500
    },
    {
      "epoch": 1.6668303188599334,
      "grad_norm": 0.6362078785896301,
      "learning_rate": 6.710265347943834e-06,
      "loss": 0.0202,
      "step": 1018520
    },
    {
      "epoch": 1.6668630492985868,
      "grad_norm": 1.588739275932312,
      "learning_rate": 6.710199455730318e-06,
      "loss": 0.0288,
      "step": 1018540
    },
    {
      "epoch": 1.66689577973724,
      "grad_norm": 0.3478209376335144,
      "learning_rate": 6.7101335635168e-06,
      "loss": 0.0278,
      "step": 1018560
    },
    {
      "epoch": 1.6669285101758935,
      "grad_norm": 0.5728873610496521,
      "learning_rate": 6.710067671303283e-06,
      "loss": 0.0235,
      "step": 1018580
    },
    {
      "epoch": 1.6669612406145466,
      "grad_norm": 0.3974626362323761,
      "learning_rate": 6.710001779089765e-06,
      "loss": 0.016,
      "step": 1018600
    },
    {
      "epoch": 1.6669939710532,
      "grad_norm": 0.9088898301124573,
      "learning_rate": 6.709935886876249e-06,
      "loss": 0.0116,
      "step": 1018620
    },
    {
      "epoch": 1.6670267014918534,
      "grad_norm": 0.22280758619308472,
      "learning_rate": 6.709869994662731e-06,
      "loss": 0.0229,
      "step": 1018640
    },
    {
      "epoch": 1.6670594319305068,
      "grad_norm": 0.3876616060733795,
      "learning_rate": 6.709804102449214e-06,
      "loss": 0.0211,
      "step": 1018660
    },
    {
      "epoch": 1.6670921623691601,
      "grad_norm": 0.4286365211009979,
      "learning_rate": 6.709738210235697e-06,
      "loss": 0.021,
      "step": 1018680
    },
    {
      "epoch": 1.6671248928078133,
      "grad_norm": 1.530861735343933,
      "learning_rate": 6.7096723180221805e-06,
      "loss": 0.0184,
      "step": 1018700
    },
    {
      "epoch": 1.6671576232464669,
      "grad_norm": 5.464574337005615,
      "learning_rate": 6.709606425808662e-06,
      "loss": 0.0295,
      "step": 1018720
    },
    {
      "epoch": 1.66719035368512,
      "grad_norm": 0.6810048222541809,
      "learning_rate": 6.709540533595146e-06,
      "loss": 0.023,
      "step": 1018740
    },
    {
      "epoch": 1.6672230841237734,
      "grad_norm": 0.15772715210914612,
      "learning_rate": 6.709474641381628e-06,
      "loss": 0.0233,
      "step": 1018760
    },
    {
      "epoch": 1.6672558145624268,
      "grad_norm": 0.47238481044769287,
      "learning_rate": 6.7094087491681114e-06,
      "loss": 0.0224,
      "step": 1018780
    },
    {
      "epoch": 1.6672885450010801,
      "grad_norm": 0.49693068861961365,
      "learning_rate": 6.709342856954594e-06,
      "loss": 0.019,
      "step": 1018800
    },
    {
      "epoch": 1.6673212754397335,
      "grad_norm": 0.27360013127326965,
      "learning_rate": 6.709276964741077e-06,
      "loss": 0.0176,
      "step": 1018820
    },
    {
      "epoch": 1.6673540058783867,
      "grad_norm": 0.31945452094078064,
      "learning_rate": 6.70921107252756e-06,
      "loss": 0.0197,
      "step": 1018840
    },
    {
      "epoch": 1.6673867363170403,
      "grad_norm": 1.6684898138046265,
      "learning_rate": 6.709145180314043e-06,
      "loss": 0.0253,
      "step": 1018860
    },
    {
      "epoch": 1.6674194667556934,
      "grad_norm": 0.3140316605567932,
      "learning_rate": 6.709079288100526e-06,
      "loss": 0.0199,
      "step": 1018880
    },
    {
      "epoch": 1.6674521971943468,
      "grad_norm": 0.4371282458305359,
      "learning_rate": 6.709013395887009e-06,
      "loss": 0.0207,
      "step": 1018900
    },
    {
      "epoch": 1.6674849276330002,
      "grad_norm": 0.9433550834655762,
      "learning_rate": 6.708947503673492e-06,
      "loss": 0.0138,
      "step": 1018920
    },
    {
      "epoch": 1.6675176580716535,
      "grad_norm": 2.6391639709472656,
      "learning_rate": 6.708881611459974e-06,
      "loss": 0.0208,
      "step": 1018940
    },
    {
      "epoch": 1.667550388510307,
      "grad_norm": 0.3257451355457306,
      "learning_rate": 6.708815719246458e-06,
      "loss": 0.0112,
      "step": 1018960
    },
    {
      "epoch": 1.66758311894896,
      "grad_norm": 0.5633372068405151,
      "learning_rate": 6.70874982703294e-06,
      "loss": 0.0199,
      "step": 1018980
    },
    {
      "epoch": 1.6676158493876136,
      "grad_norm": 0.3912161886692047,
      "learning_rate": 6.708683934819423e-06,
      "loss": 0.0182,
      "step": 1019000
    },
    {
      "epoch": 1.6676485798262668,
      "grad_norm": 0.25292104482650757,
      "learning_rate": 6.708618042605905e-06,
      "loss": 0.0214,
      "step": 1019020
    },
    {
      "epoch": 1.6676813102649202,
      "grad_norm": 1.1014127731323242,
      "learning_rate": 6.708552150392389e-06,
      "loss": 0.0261,
      "step": 1019040
    },
    {
      "epoch": 1.6677140407035735,
      "grad_norm": 1.759049415588379,
      "learning_rate": 6.7084862581788715e-06,
      "loss": 0.0165,
      "step": 1019060
    },
    {
      "epoch": 1.6677467711422267,
      "grad_norm": 0.43828460574150085,
      "learning_rate": 6.708420365965354e-06,
      "loss": 0.0233,
      "step": 1019080
    },
    {
      "epoch": 1.6677795015808803,
      "grad_norm": 1.4807014465332031,
      "learning_rate": 6.708354473751837e-06,
      "loss": 0.0191,
      "step": 1019100
    },
    {
      "epoch": 1.6678122320195334,
      "grad_norm": 1.6343592405319214,
      "learning_rate": 6.7082885815383205e-06,
      "loss": 0.0266,
      "step": 1019120
    },
    {
      "epoch": 1.667844962458187,
      "grad_norm": 0.2974725663661957,
      "learning_rate": 6.7082226893248024e-06,
      "loss": 0.0162,
      "step": 1019140
    },
    {
      "epoch": 1.6678776928968402,
      "grad_norm": 1.183258295059204,
      "learning_rate": 6.708156797111286e-06,
      "loss": 0.0217,
      "step": 1019160
    },
    {
      "epoch": 1.6679104233354936,
      "grad_norm": 0.9027193188667297,
      "learning_rate": 6.708090904897768e-06,
      "loss": 0.0218,
      "step": 1019180
    },
    {
      "epoch": 1.667943153774147,
      "grad_norm": 4.612708568572998,
      "learning_rate": 6.7080250126842515e-06,
      "loss": 0.0175,
      "step": 1019200
    },
    {
      "epoch": 1.6679758842128,
      "grad_norm": 0.39095956087112427,
      "learning_rate": 6.707959120470735e-06,
      "loss": 0.0215,
      "step": 1019220
    },
    {
      "epoch": 1.6680086146514537,
      "grad_norm": 0.43581414222717285,
      "learning_rate": 6.707893228257217e-06,
      "loss": 0.0177,
      "step": 1019240
    },
    {
      "epoch": 1.6680413450901068,
      "grad_norm": 0.6620215773582458,
      "learning_rate": 6.7078273360437006e-06,
      "loss": 0.0184,
      "step": 1019260
    },
    {
      "epoch": 1.6680740755287602,
      "grad_norm": 1.6676796674728394,
      "learning_rate": 6.707761443830183e-06,
      "loss": 0.0154,
      "step": 1019280
    },
    {
      "epoch": 1.6681068059674136,
      "grad_norm": 0.33451706171035767,
      "learning_rate": 6.707695551616666e-06,
      "loss": 0.0162,
      "step": 1019300
    },
    {
      "epoch": 1.668139536406067,
      "grad_norm": 0.6187968254089355,
      "learning_rate": 6.707629659403149e-06,
      "loss": 0.0216,
      "step": 1019320
    },
    {
      "epoch": 1.6681722668447203,
      "grad_norm": 1.3646421432495117,
      "learning_rate": 6.707563767189632e-06,
      "loss": 0.0234,
      "step": 1019340
    },
    {
      "epoch": 1.6682049972833735,
      "grad_norm": 0.20759651064872742,
      "learning_rate": 6.707497874976114e-06,
      "loss": 0.0155,
      "step": 1019360
    },
    {
      "epoch": 1.668237727722027,
      "grad_norm": 1.0961875915527344,
      "learning_rate": 6.707431982762598e-06,
      "loss": 0.0294,
      "step": 1019380
    },
    {
      "epoch": 1.6682704581606802,
      "grad_norm": 0.8639732003211975,
      "learning_rate": 6.70736609054908e-06,
      "loss": 0.0169,
      "step": 1019400
    },
    {
      "epoch": 1.6683031885993336,
      "grad_norm": 0.7608222365379333,
      "learning_rate": 6.707300198335563e-06,
      "loss": 0.0182,
      "step": 1019420
    },
    {
      "epoch": 1.668335919037987,
      "grad_norm": 0.3555692732334137,
      "learning_rate": 6.707234306122046e-06,
      "loss": 0.0213,
      "step": 1019440
    },
    {
      "epoch": 1.6683686494766403,
      "grad_norm": 0.41422268748283386,
      "learning_rate": 6.707168413908529e-06,
      "loss": 0.0192,
      "step": 1019460
    },
    {
      "epoch": 1.6684013799152937,
      "grad_norm": 1.239213228225708,
      "learning_rate": 6.7071025216950116e-06,
      "loss": 0.0215,
      "step": 1019480
    },
    {
      "epoch": 1.6684341103539468,
      "grad_norm": 0.33736181259155273,
      "learning_rate": 6.707036629481495e-06,
      "loss": 0.0163,
      "step": 1019500
    },
    {
      "epoch": 1.6684668407926004,
      "grad_norm": 0.32868996262550354,
      "learning_rate": 6.706970737267977e-06,
      "loss": 0.0188,
      "step": 1019520
    },
    {
      "epoch": 1.6684995712312536,
      "grad_norm": 0.2573937475681305,
      "learning_rate": 6.706904845054461e-06,
      "loss": 0.0202,
      "step": 1019540
    },
    {
      "epoch": 1.668532301669907,
      "grad_norm": 0.08955816179513931,
      "learning_rate": 6.706838952840944e-06,
      "loss": 0.0209,
      "step": 1019560
    },
    {
      "epoch": 1.6685650321085603,
      "grad_norm": 0.06936708837747574,
      "learning_rate": 6.706773060627426e-06,
      "loss": 0.0202,
      "step": 1019580
    },
    {
      "epoch": 1.6685977625472137,
      "grad_norm": 1.4674352407455444,
      "learning_rate": 6.70670716841391e-06,
      "loss": 0.027,
      "step": 1019600
    },
    {
      "epoch": 1.668630492985867,
      "grad_norm": 0.7330238223075867,
      "learning_rate": 6.706641276200392e-06,
      "loss": 0.0165,
      "step": 1019620
    },
    {
      "epoch": 1.6686632234245202,
      "grad_norm": 0.8407678604125977,
      "learning_rate": 6.706575383986875e-06,
      "loss": 0.0125,
      "step": 1019640
    },
    {
      "epoch": 1.6686959538631738,
      "grad_norm": 0.42752039432525635,
      "learning_rate": 6.706509491773358e-06,
      "loss": 0.0187,
      "step": 1019660
    },
    {
      "epoch": 1.668728684301827,
      "grad_norm": 0.24861475825309753,
      "learning_rate": 6.706443599559841e-06,
      "loss": 0.0197,
      "step": 1019680
    },
    {
      "epoch": 1.6687614147404803,
      "grad_norm": 0.3125925362110138,
      "learning_rate": 6.706377707346323e-06,
      "loss": 0.0132,
      "step": 1019700
    },
    {
      "epoch": 1.6687941451791337,
      "grad_norm": 0.9365040063858032,
      "learning_rate": 6.706311815132807e-06,
      "loss": 0.0205,
      "step": 1019720
    },
    {
      "epoch": 1.6688268756177869,
      "grad_norm": 0.6310626864433289,
      "learning_rate": 6.706245922919289e-06,
      "loss": 0.0165,
      "step": 1019740
    },
    {
      "epoch": 1.6688596060564405,
      "grad_norm": 0.11842400580644608,
      "learning_rate": 6.7061800307057725e-06,
      "loss": 0.018,
      "step": 1019760
    },
    {
      "epoch": 1.6688923364950936,
      "grad_norm": 0.07787758857011795,
      "learning_rate": 6.706114138492254e-06,
      "loss": 0.0188,
      "step": 1019780
    },
    {
      "epoch": 1.6689250669337472,
      "grad_norm": 0.2323346734046936,
      "learning_rate": 6.706048246278738e-06,
      "loss": 0.0174,
      "step": 1019800
    },
    {
      "epoch": 1.6689577973724004,
      "grad_norm": 0.19396336376667023,
      "learning_rate": 6.70598235406522e-06,
      "loss": 0.0237,
      "step": 1019820
    },
    {
      "epoch": 1.6689905278110537,
      "grad_norm": 0.22722536325454712,
      "learning_rate": 6.705916461851703e-06,
      "loss": 0.0187,
      "step": 1019840
    },
    {
      "epoch": 1.669023258249707,
      "grad_norm": 0.7478309869766235,
      "learning_rate": 6.705850569638186e-06,
      "loss": 0.0219,
      "step": 1019860
    },
    {
      "epoch": 1.6690559886883602,
      "grad_norm": 0.7088032364845276,
      "learning_rate": 6.70578467742467e-06,
      "loss": 0.0187,
      "step": 1019880
    },
    {
      "epoch": 1.6690887191270138,
      "grad_norm": 0.5512495040893555,
      "learning_rate": 6.705718785211152e-06,
      "loss": 0.0219,
      "step": 1019900
    },
    {
      "epoch": 1.669121449565667,
      "grad_norm": 0.32513877749443054,
      "learning_rate": 6.705652892997635e-06,
      "loss": 0.0121,
      "step": 1019920
    },
    {
      "epoch": 1.6691541800043204,
      "grad_norm": 1.2458761930465698,
      "learning_rate": 6.705587000784119e-06,
      "loss": 0.0211,
      "step": 1019940
    },
    {
      "epoch": 1.6691869104429737,
      "grad_norm": 1.5409654378890991,
      "learning_rate": 6.705521108570601e-06,
      "loss": 0.02,
      "step": 1019960
    },
    {
      "epoch": 1.669219640881627,
      "grad_norm": 1.289889931678772,
      "learning_rate": 6.705455216357084e-06,
      "loss": 0.0214,
      "step": 1019980
    },
    {
      "epoch": 1.6692523713202805,
      "grad_norm": 0.5688507556915283,
      "learning_rate": 6.705389324143566e-06,
      "loss": 0.0195,
      "step": 1020000
    },
    {
      "epoch": 1.6692851017589336,
      "grad_norm": 0.8255680203437805,
      "learning_rate": 6.70532343193005e-06,
      "loss": 0.0122,
      "step": 1020020
    },
    {
      "epoch": 1.6693178321975872,
      "grad_norm": 0.6955511569976807,
      "learning_rate": 6.705257539716532e-06,
      "loss": 0.0156,
      "step": 1020040
    },
    {
      "epoch": 1.6693505626362404,
      "grad_norm": 1.499306082725525,
      "learning_rate": 6.705191647503015e-06,
      "loss": 0.0212,
      "step": 1020060
    },
    {
      "epoch": 1.6693832930748937,
      "grad_norm": 0.9855849146842957,
      "learning_rate": 6.705125755289498e-06,
      "loss": 0.0176,
      "step": 1020080
    },
    {
      "epoch": 1.6694160235135471,
      "grad_norm": 0.8086410760879517,
      "learning_rate": 6.705059863075981e-06,
      "loss": 0.0252,
      "step": 1020100
    },
    {
      "epoch": 1.6694487539522005,
      "grad_norm": 0.4435334801673889,
      "learning_rate": 6.7049939708624635e-06,
      "loss": 0.0174,
      "step": 1020120
    },
    {
      "epoch": 1.6694814843908539,
      "grad_norm": 0.592458188533783,
      "learning_rate": 6.704928078648947e-06,
      "loss": 0.022,
      "step": 1020140
    },
    {
      "epoch": 1.669514214829507,
      "grad_norm": 0.3970203399658203,
      "learning_rate": 6.704862186435429e-06,
      "loss": 0.0222,
      "step": 1020160
    },
    {
      "epoch": 1.6695469452681606,
      "grad_norm": 0.1481161117553711,
      "learning_rate": 6.7047962942219125e-06,
      "loss": 0.0192,
      "step": 1020180
    },
    {
      "epoch": 1.6695796757068138,
      "grad_norm": 0.27557533979415894,
      "learning_rate": 6.704730402008394e-06,
      "loss": 0.022,
      "step": 1020200
    },
    {
      "epoch": 1.6696124061454671,
      "grad_norm": 0.7333060503005981,
      "learning_rate": 6.704664509794878e-06,
      "loss": 0.0206,
      "step": 1020220
    },
    {
      "epoch": 1.6696451365841205,
      "grad_norm": 0.849389910697937,
      "learning_rate": 6.704598617581361e-06,
      "loss": 0.0142,
      "step": 1020240
    },
    {
      "epoch": 1.6696778670227739,
      "grad_norm": 0.6019580364227295,
      "learning_rate": 6.7045327253678435e-06,
      "loss": 0.0237,
      "step": 1020260
    },
    {
      "epoch": 1.6697105974614272,
      "grad_norm": 0.5363653898239136,
      "learning_rate": 6.704466833154327e-06,
      "loss": 0.0204,
      "step": 1020280
    },
    {
      "epoch": 1.6697433279000804,
      "grad_norm": 0.3089519441127777,
      "learning_rate": 6.70440094094081e-06,
      "loss": 0.0165,
      "step": 1020300
    },
    {
      "epoch": 1.669776058338734,
      "grad_norm": 0.6505593657493591,
      "learning_rate": 6.7043350487272926e-06,
      "loss": 0.0228,
      "step": 1020320
    },
    {
      "epoch": 1.6698087887773871,
      "grad_norm": 0.22746829688549042,
      "learning_rate": 6.704269156513775e-06,
      "loss": 0.0123,
      "step": 1020340
    },
    {
      "epoch": 1.6698415192160405,
      "grad_norm": 0.18054766952991486,
      "learning_rate": 6.704203264300259e-06,
      "loss": 0.0184,
      "step": 1020360
    },
    {
      "epoch": 1.6698742496546939,
      "grad_norm": 0.09801927953958511,
      "learning_rate": 6.704137372086741e-06,
      "loss": 0.0226,
      "step": 1020380
    },
    {
      "epoch": 1.6699069800933473,
      "grad_norm": 0.6391915082931519,
      "learning_rate": 6.704071479873224e-06,
      "loss": 0.0146,
      "step": 1020400
    },
    {
      "epoch": 1.6699397105320006,
      "grad_norm": 0.5547536015510559,
      "learning_rate": 6.704005587659706e-06,
      "loss": 0.0211,
      "step": 1020420
    },
    {
      "epoch": 1.6699724409706538,
      "grad_norm": 0.6852214336395264,
      "learning_rate": 6.70393969544619e-06,
      "loss": 0.021,
      "step": 1020440
    },
    {
      "epoch": 1.6700051714093074,
      "grad_norm": 0.057298846542835236,
      "learning_rate": 6.703873803232673e-06,
      "loss": 0.0199,
      "step": 1020460
    },
    {
      "epoch": 1.6700379018479605,
      "grad_norm": 1.5493007898330688,
      "learning_rate": 6.703807911019155e-06,
      "loss": 0.0184,
      "step": 1020480
    },
    {
      "epoch": 1.670070632286614,
      "grad_norm": 1.2123279571533203,
      "learning_rate": 6.703742018805638e-06,
      "loss": 0.0194,
      "step": 1020500
    },
    {
      "epoch": 1.6701033627252673,
      "grad_norm": 0.5223985910415649,
      "learning_rate": 6.703676126592122e-06,
      "loss": 0.0286,
      "step": 1020520
    },
    {
      "epoch": 1.6701360931639204,
      "grad_norm": 0.3568451404571533,
      "learning_rate": 6.7036102343786035e-06,
      "loss": 0.0215,
      "step": 1020540
    },
    {
      "epoch": 1.670168823602574,
      "grad_norm": 1.1939964294433594,
      "learning_rate": 6.703544342165087e-06,
      "loss": 0.0225,
      "step": 1020560
    },
    {
      "epoch": 1.6702015540412272,
      "grad_norm": 0.30065351724624634,
      "learning_rate": 6.703478449951569e-06,
      "loss": 0.0131,
      "step": 1020580
    },
    {
      "epoch": 1.6702342844798808,
      "grad_norm": 2.0978872776031494,
      "learning_rate": 6.703412557738053e-06,
      "loss": 0.0208,
      "step": 1020600
    },
    {
      "epoch": 1.670267014918534,
      "grad_norm": 0.5380196571350098,
      "learning_rate": 6.703346665524536e-06,
      "loss": 0.0246,
      "step": 1020620
    },
    {
      "epoch": 1.6702997453571873,
      "grad_norm": 0.35345977544784546,
      "learning_rate": 6.703280773311018e-06,
      "loss": 0.0153,
      "step": 1020640
    },
    {
      "epoch": 1.6703324757958407,
      "grad_norm": 0.6777926087379456,
      "learning_rate": 6.703214881097502e-06,
      "loss": 0.0254,
      "step": 1020660
    },
    {
      "epoch": 1.6703652062344938,
      "grad_norm": 0.43429046869277954,
      "learning_rate": 6.703148988883984e-06,
      "loss": 0.0097,
      "step": 1020680
    },
    {
      "epoch": 1.6703979366731474,
      "grad_norm": 0.08486933261156082,
      "learning_rate": 6.703083096670467e-06,
      "loss": 0.0225,
      "step": 1020700
    },
    {
      "epoch": 1.6704306671118005,
      "grad_norm": 1.267052412033081,
      "learning_rate": 6.70301720445695e-06,
      "loss": 0.0167,
      "step": 1020720
    },
    {
      "epoch": 1.670463397550454,
      "grad_norm": 0.13726001977920532,
      "learning_rate": 6.7029513122434335e-06,
      "loss": 0.018,
      "step": 1020740
    },
    {
      "epoch": 1.6704961279891073,
      "grad_norm": 0.45212918519973755,
      "learning_rate": 6.702885420029915e-06,
      "loss": 0.014,
      "step": 1020760
    },
    {
      "epoch": 1.6705288584277607,
      "grad_norm": 0.27117910981178284,
      "learning_rate": 6.702819527816399e-06,
      "loss": 0.0188,
      "step": 1020780
    },
    {
      "epoch": 1.670561588866414,
      "grad_norm": 0.49382612109184265,
      "learning_rate": 6.702753635602881e-06,
      "loss": 0.0175,
      "step": 1020800
    },
    {
      "epoch": 1.6705943193050672,
      "grad_norm": 0.44252079725265503,
      "learning_rate": 6.7026877433893644e-06,
      "loss": 0.0229,
      "step": 1020820
    },
    {
      "epoch": 1.6706270497437208,
      "grad_norm": 0.7022648453712463,
      "learning_rate": 6.702621851175846e-06,
      "loss": 0.0201,
      "step": 1020840
    },
    {
      "epoch": 1.670659780182374,
      "grad_norm": 0.5313448905944824,
      "learning_rate": 6.70255595896233e-06,
      "loss": 0.0172,
      "step": 1020860
    },
    {
      "epoch": 1.6706925106210273,
      "grad_norm": 0.02475021965801716,
      "learning_rate": 6.702490066748813e-06,
      "loss": 0.0123,
      "step": 1020880
    },
    {
      "epoch": 1.6707252410596807,
      "grad_norm": 0.20386779308319092,
      "learning_rate": 6.702424174535295e-06,
      "loss": 0.0162,
      "step": 1020900
    },
    {
      "epoch": 1.670757971498334,
      "grad_norm": 0.1684863418340683,
      "learning_rate": 6.702358282321778e-06,
      "loss": 0.0128,
      "step": 1020920
    },
    {
      "epoch": 1.6707907019369874,
      "grad_norm": 0.2134258896112442,
      "learning_rate": 6.702292390108262e-06,
      "loss": 0.0149,
      "step": 1020940
    },
    {
      "epoch": 1.6708234323756406,
      "grad_norm": 0.7926204800605774,
      "learning_rate": 6.702226497894744e-06,
      "loss": 0.0246,
      "step": 1020960
    },
    {
      "epoch": 1.6708561628142942,
      "grad_norm": 0.4806579053401947,
      "learning_rate": 6.702160605681227e-06,
      "loss": 0.0214,
      "step": 1020980
    },
    {
      "epoch": 1.6708888932529473,
      "grad_norm": 1.2343223094940186,
      "learning_rate": 6.702094713467711e-06,
      "loss": 0.0121,
      "step": 1021000
    },
    {
      "epoch": 1.6709216236916007,
      "grad_norm": 1.7323163747787476,
      "learning_rate": 6.702028821254193e-06,
      "loss": 0.0338,
      "step": 1021020
    },
    {
      "epoch": 1.670954354130254,
      "grad_norm": 0.9837846159934998,
      "learning_rate": 6.701962929040676e-06,
      "loss": 0.0241,
      "step": 1021040
    },
    {
      "epoch": 1.6709870845689074,
      "grad_norm": 0.8829460144042969,
      "learning_rate": 6.701897036827158e-06,
      "loss": 0.0233,
      "step": 1021060
    },
    {
      "epoch": 1.6710198150075608,
      "grad_norm": 0.24041393399238586,
      "learning_rate": 6.701831144613642e-06,
      "loss": 0.0159,
      "step": 1021080
    },
    {
      "epoch": 1.671052545446214,
      "grad_norm": 1.0164037942886353,
      "learning_rate": 6.7017652524001245e-06,
      "loss": 0.0214,
      "step": 1021100
    },
    {
      "epoch": 1.6710852758848675,
      "grad_norm": 0.5394562482833862,
      "learning_rate": 6.701699360186607e-06,
      "loss": 0.0133,
      "step": 1021120
    },
    {
      "epoch": 1.6711180063235207,
      "grad_norm": 1.1818422079086304,
      "learning_rate": 6.70163346797309e-06,
      "loss": 0.0208,
      "step": 1021140
    },
    {
      "epoch": 1.671150736762174,
      "grad_norm": 0.6264756321907043,
      "learning_rate": 6.7015675757595736e-06,
      "loss": 0.0242,
      "step": 1021160
    },
    {
      "epoch": 1.6711834672008274,
      "grad_norm": 0.5966476798057556,
      "learning_rate": 6.7015016835460554e-06,
      "loss": 0.0199,
      "step": 1021180
    },
    {
      "epoch": 1.6712161976394808,
      "grad_norm": 0.338741660118103,
      "learning_rate": 6.701435791332539e-06,
      "loss": 0.0179,
      "step": 1021200
    },
    {
      "epoch": 1.6712489280781342,
      "grad_norm": 0.32435640692710876,
      "learning_rate": 6.701369899119021e-06,
      "loss": 0.016,
      "step": 1021220
    },
    {
      "epoch": 1.6712816585167873,
      "grad_norm": 0.3129960000514984,
      "learning_rate": 6.7013040069055045e-06,
      "loss": 0.0239,
      "step": 1021240
    },
    {
      "epoch": 1.671314388955441,
      "grad_norm": 0.8847612738609314,
      "learning_rate": 6.701238114691987e-06,
      "loss": 0.0187,
      "step": 1021260
    },
    {
      "epoch": 1.671347119394094,
      "grad_norm": 0.3290237486362457,
      "learning_rate": 6.70117222247847e-06,
      "loss": 0.0219,
      "step": 1021280
    },
    {
      "epoch": 1.6713798498327475,
      "grad_norm": 0.7395205497741699,
      "learning_rate": 6.701106330264953e-06,
      "loss": 0.0156,
      "step": 1021300
    },
    {
      "epoch": 1.6714125802714008,
      "grad_norm": 0.7475533485412598,
      "learning_rate": 6.701040438051436e-06,
      "loss": 0.0205,
      "step": 1021320
    },
    {
      "epoch": 1.671445310710054,
      "grad_norm": 0.6386131644248962,
      "learning_rate": 6.700974545837919e-06,
      "loss": 0.0166,
      "step": 1021340
    },
    {
      "epoch": 1.6714780411487076,
      "grad_norm": 1.9024865627288818,
      "learning_rate": 6.700908653624402e-06,
      "loss": 0.0278,
      "step": 1021360
    },
    {
      "epoch": 1.6715107715873607,
      "grad_norm": 0.48404714465141296,
      "learning_rate": 6.700842761410885e-06,
      "loss": 0.0225,
      "step": 1021380
    },
    {
      "epoch": 1.6715435020260143,
      "grad_norm": 0.609906792640686,
      "learning_rate": 6.700776869197367e-06,
      "loss": 0.0204,
      "step": 1021400
    },
    {
      "epoch": 1.6715762324646675,
      "grad_norm": 0.38120967149734497,
      "learning_rate": 6.700710976983851e-06,
      "loss": 0.0189,
      "step": 1021420
    },
    {
      "epoch": 1.6716089629033208,
      "grad_norm": 0.5634992718696594,
      "learning_rate": 6.700645084770333e-06,
      "loss": 0.0221,
      "step": 1021440
    },
    {
      "epoch": 1.6716416933419742,
      "grad_norm": 0.3020849823951721,
      "learning_rate": 6.700579192556816e-06,
      "loss": 0.0132,
      "step": 1021460
    },
    {
      "epoch": 1.6716744237806274,
      "grad_norm": 0.8808677792549133,
      "learning_rate": 6.700513300343299e-06,
      "loss": 0.017,
      "step": 1021480
    },
    {
      "epoch": 1.671707154219281,
      "grad_norm": 0.4927583634853363,
      "learning_rate": 6.700447408129782e-06,
      "loss": 0.0133,
      "step": 1021500
    },
    {
      "epoch": 1.671739884657934,
      "grad_norm": 0.17973990738391876,
      "learning_rate": 6.7003815159162646e-06,
      "loss": 0.0146,
      "step": 1021520
    },
    {
      "epoch": 1.6717726150965875,
      "grad_norm": 3.7233643531799316,
      "learning_rate": 6.700315623702748e-06,
      "loss": 0.0134,
      "step": 1021540
    },
    {
      "epoch": 1.6718053455352408,
      "grad_norm": 0.4077039659023285,
      "learning_rate": 6.70024973148923e-06,
      "loss": 0.0208,
      "step": 1021560
    },
    {
      "epoch": 1.6718380759738942,
      "grad_norm": 1.6184593439102173,
      "learning_rate": 6.700183839275714e-06,
      "loss": 0.0272,
      "step": 1021580
    },
    {
      "epoch": 1.6718708064125476,
      "grad_norm": 0.9582682847976685,
      "learning_rate": 6.7001179470621955e-06,
      "loss": 0.0183,
      "step": 1021600
    },
    {
      "epoch": 1.6719035368512007,
      "grad_norm": 0.2992327809333801,
      "learning_rate": 6.700052054848679e-06,
      "loss": 0.0245,
      "step": 1021620
    },
    {
      "epoch": 1.6719362672898543,
      "grad_norm": 0.5029940605163574,
      "learning_rate": 6.699986162635162e-06,
      "loss": 0.0216,
      "step": 1021640
    },
    {
      "epoch": 1.6719689977285075,
      "grad_norm": 0.2337086796760559,
      "learning_rate": 6.699920270421645e-06,
      "loss": 0.0179,
      "step": 1021660
    },
    {
      "epoch": 1.6720017281671609,
      "grad_norm": 0.29453620314598083,
      "learning_rate": 6.699854378208128e-06,
      "loss": 0.0269,
      "step": 1021680
    },
    {
      "epoch": 1.6720344586058142,
      "grad_norm": 0.8180349469184875,
      "learning_rate": 6.699788485994611e-06,
      "loss": 0.0136,
      "step": 1021700
    },
    {
      "epoch": 1.6720671890444676,
      "grad_norm": 0.12760111689567566,
      "learning_rate": 6.699722593781094e-06,
      "loss": 0.0188,
      "step": 1021720
    },
    {
      "epoch": 1.672099919483121,
      "grad_norm": 0.6058645248413086,
      "learning_rate": 6.699656701567576e-06,
      "loss": 0.0211,
      "step": 1021740
    },
    {
      "epoch": 1.6721326499217741,
      "grad_norm": 0.6194970607757568,
      "learning_rate": 6.69959080935406e-06,
      "loss": 0.0194,
      "step": 1021760
    },
    {
      "epoch": 1.6721653803604277,
      "grad_norm": 0.5382916331291199,
      "learning_rate": 6.699524917140542e-06,
      "loss": 0.0123,
      "step": 1021780
    },
    {
      "epoch": 1.6721981107990809,
      "grad_norm": 2.995300531387329,
      "learning_rate": 6.6994590249270255e-06,
      "loss": 0.0258,
      "step": 1021800
    },
    {
      "epoch": 1.6722308412377342,
      "grad_norm": 0.24322912096977234,
      "learning_rate": 6.699393132713507e-06,
      "loss": 0.0167,
      "step": 1021820
    },
    {
      "epoch": 1.6722635716763876,
      "grad_norm": 0.19935135543346405,
      "learning_rate": 6.699327240499991e-06,
      "loss": 0.0187,
      "step": 1021840
    },
    {
      "epoch": 1.672296302115041,
      "grad_norm": 0.33911240100860596,
      "learning_rate": 6.699261348286473e-06,
      "loss": 0.0191,
      "step": 1021860
    },
    {
      "epoch": 1.6723290325536944,
      "grad_norm": 0.5821330547332764,
      "learning_rate": 6.699195456072956e-06,
      "loss": 0.0254,
      "step": 1021880
    },
    {
      "epoch": 1.6723617629923475,
      "grad_norm": 0.17735320329666138,
      "learning_rate": 6.699129563859439e-06,
      "loss": 0.0209,
      "step": 1021900
    },
    {
      "epoch": 1.672394493431001,
      "grad_norm": 2.4547579288482666,
      "learning_rate": 6.699063671645922e-06,
      "loss": 0.0227,
      "step": 1021920
    },
    {
      "epoch": 1.6724272238696543,
      "grad_norm": 0.13512766361236572,
      "learning_rate": 6.698997779432405e-06,
      "loss": 0.0221,
      "step": 1021940
    },
    {
      "epoch": 1.6724599543083076,
      "grad_norm": 1.1414786577224731,
      "learning_rate": 6.698931887218888e-06,
      "loss": 0.0202,
      "step": 1021960
    },
    {
      "epoch": 1.672492684746961,
      "grad_norm": 1.415016770362854,
      "learning_rate": 6.69886599500537e-06,
      "loss": 0.0275,
      "step": 1021980
    },
    {
      "epoch": 1.6725254151856144,
      "grad_norm": 1.1757287979125977,
      "learning_rate": 6.698800102791854e-06,
      "loss": 0.0193,
      "step": 1022000
    },
    {
      "epoch": 1.6725581456242677,
      "grad_norm": 0.639591634273529,
      "learning_rate": 6.698734210578337e-06,
      "loss": 0.0211,
      "step": 1022020
    },
    {
      "epoch": 1.672590876062921,
      "grad_norm": 0.815666913986206,
      "learning_rate": 6.698668318364819e-06,
      "loss": 0.0199,
      "step": 1022040
    },
    {
      "epoch": 1.6726236065015745,
      "grad_norm": 0.37253397703170776,
      "learning_rate": 6.698602426151303e-06,
      "loss": 0.0185,
      "step": 1022060
    },
    {
      "epoch": 1.6726563369402276,
      "grad_norm": 0.262004554271698,
      "learning_rate": 6.698536533937785e-06,
      "loss": 0.0152,
      "step": 1022080
    },
    {
      "epoch": 1.672689067378881,
      "grad_norm": 0.6651976108551025,
      "learning_rate": 6.698470641724268e-06,
      "loss": 0.0157,
      "step": 1022100
    },
    {
      "epoch": 1.6727217978175344,
      "grad_norm": 0.6448154449462891,
      "learning_rate": 6.698404749510751e-06,
      "loss": 0.0268,
      "step": 1022120
    },
    {
      "epoch": 1.6727545282561875,
      "grad_norm": 0.3659282624721527,
      "learning_rate": 6.698338857297234e-06,
      "loss": 0.0191,
      "step": 1022140
    },
    {
      "epoch": 1.6727872586948411,
      "grad_norm": 0.2866065204143524,
      "learning_rate": 6.6982729650837165e-06,
      "loss": 0.0206,
      "step": 1022160
    },
    {
      "epoch": 1.6728199891334943,
      "grad_norm": 0.20970885455608368,
      "learning_rate": 6.6982070728702e-06,
      "loss": 0.0145,
      "step": 1022180
    },
    {
      "epoch": 1.6728527195721477,
      "grad_norm": 0.3517451882362366,
      "learning_rate": 6.698141180656682e-06,
      "loss": 0.0226,
      "step": 1022200
    },
    {
      "epoch": 1.672885450010801,
      "grad_norm": 0.3653589189052582,
      "learning_rate": 6.6980752884431655e-06,
      "loss": 0.0199,
      "step": 1022220
    },
    {
      "epoch": 1.6729181804494544,
      "grad_norm": 0.24833634495735168,
      "learning_rate": 6.6980093962296474e-06,
      "loss": 0.0183,
      "step": 1022240
    },
    {
      "epoch": 1.6729509108881078,
      "grad_norm": 0.6754253506660461,
      "learning_rate": 6.697943504016131e-06,
      "loss": 0.0249,
      "step": 1022260
    },
    {
      "epoch": 1.672983641326761,
      "grad_norm": 1.4767327308654785,
      "learning_rate": 6.697877611802614e-06,
      "loss": 0.0224,
      "step": 1022280
    },
    {
      "epoch": 1.6730163717654145,
      "grad_norm": 1.1620609760284424,
      "learning_rate": 6.6978117195890965e-06,
      "loss": 0.012,
      "step": 1022300
    },
    {
      "epoch": 1.6730491022040677,
      "grad_norm": 0.3332083225250244,
      "learning_rate": 6.697745827375579e-06,
      "loss": 0.0239,
      "step": 1022320
    },
    {
      "epoch": 1.673081832642721,
      "grad_norm": 0.9892023801803589,
      "learning_rate": 6.697679935162063e-06,
      "loss": 0.0152,
      "step": 1022340
    },
    {
      "epoch": 1.6731145630813744,
      "grad_norm": 0.200588196516037,
      "learning_rate": 6.697614042948545e-06,
      "loss": 0.011,
      "step": 1022360
    },
    {
      "epoch": 1.6731472935200278,
      "grad_norm": 0.620478093624115,
      "learning_rate": 6.697548150735028e-06,
      "loss": 0.0309,
      "step": 1022380
    },
    {
      "epoch": 1.6731800239586812,
      "grad_norm": 0.08900425583124161,
      "learning_rate": 6.697482258521512e-06,
      "loss": 0.0178,
      "step": 1022400
    },
    {
      "epoch": 1.6732127543973343,
      "grad_norm": 0.26486966013908386,
      "learning_rate": 6.697416366307994e-06,
      "loss": 0.0141,
      "step": 1022420
    },
    {
      "epoch": 1.673245484835988,
      "grad_norm": 0.4875475764274597,
      "learning_rate": 6.697350474094477e-06,
      "loss": 0.0138,
      "step": 1022440
    },
    {
      "epoch": 1.673278215274641,
      "grad_norm": 1.2102956771850586,
      "learning_rate": 6.697284581880959e-06,
      "loss": 0.0124,
      "step": 1022460
    },
    {
      "epoch": 1.6733109457132944,
      "grad_norm": 0.20082633197307587,
      "learning_rate": 6.697218689667443e-06,
      "loss": 0.0153,
      "step": 1022480
    },
    {
      "epoch": 1.6733436761519478,
      "grad_norm": 0.16225320100784302,
      "learning_rate": 6.697152797453926e-06,
      "loss": 0.0205,
      "step": 1022500
    },
    {
      "epoch": 1.6733764065906012,
      "grad_norm": 0.8500077128410339,
      "learning_rate": 6.697086905240408e-06,
      "loss": 0.0195,
      "step": 1022520
    },
    {
      "epoch": 1.6734091370292545,
      "grad_norm": 0.9791631698608398,
      "learning_rate": 6.697021013026891e-06,
      "loss": 0.0255,
      "step": 1022540
    },
    {
      "epoch": 1.6734418674679077,
      "grad_norm": 0.6110812425613403,
      "learning_rate": 6.696955120813375e-06,
      "loss": 0.0196,
      "step": 1022560
    },
    {
      "epoch": 1.6734745979065613,
      "grad_norm": 0.14334680140018463,
      "learning_rate": 6.6968892285998565e-06,
      "loss": 0.0205,
      "step": 1022580
    },
    {
      "epoch": 1.6735073283452144,
      "grad_norm": 0.14636677503585815,
      "learning_rate": 6.69682333638634e-06,
      "loss": 0.0179,
      "step": 1022600
    },
    {
      "epoch": 1.6735400587838678,
      "grad_norm": 1.4547699689865112,
      "learning_rate": 6.696757444172822e-06,
      "loss": 0.0253,
      "step": 1022620
    },
    {
      "epoch": 1.6735727892225212,
      "grad_norm": 0.5946654081344604,
      "learning_rate": 6.696691551959306e-06,
      "loss": 0.0191,
      "step": 1022640
    },
    {
      "epoch": 1.6736055196611745,
      "grad_norm": 0.28222817182540894,
      "learning_rate": 6.696625659745788e-06,
      "loss": 0.0199,
      "step": 1022660
    },
    {
      "epoch": 1.673638250099828,
      "grad_norm": 0.9191034436225891,
      "learning_rate": 6.696559767532271e-06,
      "loss": 0.0209,
      "step": 1022680
    },
    {
      "epoch": 1.673670980538481,
      "grad_norm": 0.22333413362503052,
      "learning_rate": 6.696493875318754e-06,
      "loss": 0.0168,
      "step": 1022700
    },
    {
      "epoch": 1.6737037109771347,
      "grad_norm": 0.08724211156368256,
      "learning_rate": 6.696427983105237e-06,
      "loss": 0.0147,
      "step": 1022720
    },
    {
      "epoch": 1.6737364414157878,
      "grad_norm": 0.3211638629436493,
      "learning_rate": 6.69636209089172e-06,
      "loss": 0.014,
      "step": 1022740
    },
    {
      "epoch": 1.6737691718544412,
      "grad_norm": 3.04714298248291,
      "learning_rate": 6.696296198678203e-06,
      "loss": 0.0197,
      "step": 1022760
    },
    {
      "epoch": 1.6738019022930946,
      "grad_norm": 0.3022189736366272,
      "learning_rate": 6.6962303064646865e-06,
      "loss": 0.0173,
      "step": 1022780
    },
    {
      "epoch": 1.6738346327317477,
      "grad_norm": 0.5616432428359985,
      "learning_rate": 6.696164414251168e-06,
      "loss": 0.0172,
      "step": 1022800
    },
    {
      "epoch": 1.6738673631704013,
      "grad_norm": 0.6477834582328796,
      "learning_rate": 6.696098522037652e-06,
      "loss": 0.0242,
      "step": 1022820
    },
    {
      "epoch": 1.6739000936090545,
      "grad_norm": 0.40214070677757263,
      "learning_rate": 6.696032629824134e-06,
      "loss": 0.0268,
      "step": 1022840
    },
    {
      "epoch": 1.673932824047708,
      "grad_norm": 0.7117384076118469,
      "learning_rate": 6.6959667376106174e-06,
      "loss": 0.0207,
      "step": 1022860
    },
    {
      "epoch": 1.6739655544863612,
      "grad_norm": 0.2087869942188263,
      "learning_rate": 6.695900845397099e-06,
      "loss": 0.024,
      "step": 1022880
    },
    {
      "epoch": 1.6739982849250146,
      "grad_norm": 0.6882004141807556,
      "learning_rate": 6.695834953183583e-06,
      "loss": 0.0216,
      "step": 1022900
    },
    {
      "epoch": 1.674031015363668,
      "grad_norm": 0.37513068318367004,
      "learning_rate": 6.695769060970066e-06,
      "loss": 0.0187,
      "step": 1022920
    },
    {
      "epoch": 1.674063745802321,
      "grad_norm": 0.2374667078256607,
      "learning_rate": 6.695703168756548e-06,
      "loss": 0.0186,
      "step": 1022940
    },
    {
      "epoch": 1.6740964762409747,
      "grad_norm": 0.5627127289772034,
      "learning_rate": 6.695637276543031e-06,
      "loss": 0.0203,
      "step": 1022960
    },
    {
      "epoch": 1.6741292066796278,
      "grad_norm": 0.14896726608276367,
      "learning_rate": 6.695571384329515e-06,
      "loss": 0.0193,
      "step": 1022980
    },
    {
      "epoch": 1.6741619371182812,
      "grad_norm": 2.162813901901245,
      "learning_rate": 6.695505492115997e-06,
      "loss": 0.0172,
      "step": 1023000
    },
    {
      "epoch": 1.6741946675569346,
      "grad_norm": 0.300822377204895,
      "learning_rate": 6.69543959990248e-06,
      "loss": 0.0152,
      "step": 1023020
    },
    {
      "epoch": 1.674227397995588,
      "grad_norm": 2.7015607357025146,
      "learning_rate": 6.695373707688962e-06,
      "loss": 0.0226,
      "step": 1023040
    },
    {
      "epoch": 1.6742601284342413,
      "grad_norm": 0.6828796863555908,
      "learning_rate": 6.695307815475446e-06,
      "loss": 0.018,
      "step": 1023060
    },
    {
      "epoch": 1.6742928588728945,
      "grad_norm": 0.4479679465293884,
      "learning_rate": 6.695241923261929e-06,
      "loss": 0.0154,
      "step": 1023080
    },
    {
      "epoch": 1.674325589311548,
      "grad_norm": 0.28192082047462463,
      "learning_rate": 6.695176031048411e-06,
      "loss": 0.0239,
      "step": 1023100
    },
    {
      "epoch": 1.6743583197502012,
      "grad_norm": 0.5073615312576294,
      "learning_rate": 6.695110138834895e-06,
      "loss": 0.0155,
      "step": 1023120
    },
    {
      "epoch": 1.6743910501888546,
      "grad_norm": 0.7028315663337708,
      "learning_rate": 6.6950442466213775e-06,
      "loss": 0.0178,
      "step": 1023140
    },
    {
      "epoch": 1.674423780627508,
      "grad_norm": 0.5943334102630615,
      "learning_rate": 6.69497835440786e-06,
      "loss": 0.0207,
      "step": 1023160
    },
    {
      "epoch": 1.6744565110661613,
      "grad_norm": 0.8769848346710205,
      "learning_rate": 6.694912462194343e-06,
      "loss": 0.0245,
      "step": 1023180
    },
    {
      "epoch": 1.6744892415048147,
      "grad_norm": 1.7839775085449219,
      "learning_rate": 6.6948465699808266e-06,
      "loss": 0.0245,
      "step": 1023200
    },
    {
      "epoch": 1.6745219719434679,
      "grad_norm": 0.6618044376373291,
      "learning_rate": 6.6947806777673085e-06,
      "loss": 0.0207,
      "step": 1023220
    },
    {
      "epoch": 1.6745547023821215,
      "grad_norm": 0.8371243476867676,
      "learning_rate": 6.694714785553792e-06,
      "loss": 0.0238,
      "step": 1023240
    },
    {
      "epoch": 1.6745874328207746,
      "grad_norm": 1.508813738822937,
      "learning_rate": 6.694648893340274e-06,
      "loss": 0.0202,
      "step": 1023260
    },
    {
      "epoch": 1.674620163259428,
      "grad_norm": 0.19254691898822784,
      "learning_rate": 6.6945830011267575e-06,
      "loss": 0.0306,
      "step": 1023280
    },
    {
      "epoch": 1.6746528936980813,
      "grad_norm": 0.249635249376297,
      "learning_rate": 6.69451710891324e-06,
      "loss": 0.0194,
      "step": 1023300
    },
    {
      "epoch": 1.6746856241367347,
      "grad_norm": 0.42828649282455444,
      "learning_rate": 6.694451216699723e-06,
      "loss": 0.0176,
      "step": 1023320
    },
    {
      "epoch": 1.674718354575388,
      "grad_norm": 0.6249207854270935,
      "learning_rate": 6.694385324486206e-06,
      "loss": 0.0162,
      "step": 1023340
    },
    {
      "epoch": 1.6747510850140412,
      "grad_norm": 0.9014484286308289,
      "learning_rate": 6.694319432272689e-06,
      "loss": 0.0235,
      "step": 1023360
    },
    {
      "epoch": 1.6747838154526948,
      "grad_norm": 2.3517158031463623,
      "learning_rate": 6.694253540059171e-06,
      "loss": 0.0262,
      "step": 1023380
    },
    {
      "epoch": 1.674816545891348,
      "grad_norm": 0.2987127900123596,
      "learning_rate": 6.694187647845655e-06,
      "loss": 0.0108,
      "step": 1023400
    },
    {
      "epoch": 1.6748492763300014,
      "grad_norm": 0.2586481273174286,
      "learning_rate": 6.694121755632137e-06,
      "loss": 0.0236,
      "step": 1023420
    },
    {
      "epoch": 1.6748820067686547,
      "grad_norm": 0.45254674553871155,
      "learning_rate": 6.69405586341862e-06,
      "loss": 0.0168,
      "step": 1023440
    },
    {
      "epoch": 1.674914737207308,
      "grad_norm": 0.28921839594841003,
      "learning_rate": 6.693989971205104e-06,
      "loss": 0.0165,
      "step": 1023460
    },
    {
      "epoch": 1.6749474676459615,
      "grad_norm": 0.22824236750602722,
      "learning_rate": 6.693924078991586e-06,
      "loss": 0.0276,
      "step": 1023480
    },
    {
      "epoch": 1.6749801980846146,
      "grad_norm": 0.3241461217403412,
      "learning_rate": 6.693858186778069e-06,
      "loss": 0.0161,
      "step": 1023500
    },
    {
      "epoch": 1.6750129285232682,
      "grad_norm": 0.07500074058771133,
      "learning_rate": 6.693792294564552e-06,
      "loss": 0.0173,
      "step": 1023520
    },
    {
      "epoch": 1.6750456589619214,
      "grad_norm": 0.3368775546550751,
      "learning_rate": 6.693726402351035e-06,
      "loss": 0.023,
      "step": 1023540
    },
    {
      "epoch": 1.6750783894005747,
      "grad_norm": 0.47309327125549316,
      "learning_rate": 6.6936605101375176e-06,
      "loss": 0.0145,
      "step": 1023560
    },
    {
      "epoch": 1.6751111198392281,
      "grad_norm": 1.1901003122329712,
      "learning_rate": 6.693594617924001e-06,
      "loss": 0.0168,
      "step": 1023580
    },
    {
      "epoch": 1.6751438502778813,
      "grad_norm": 0.7305356860160828,
      "learning_rate": 6.693528725710483e-06,
      "loss": 0.0183,
      "step": 1023600
    },
    {
      "epoch": 1.6751765807165349,
      "grad_norm": 0.5090821981430054,
      "learning_rate": 6.693462833496967e-06,
      "loss": 0.0186,
      "step": 1023620
    },
    {
      "epoch": 1.675209311155188,
      "grad_norm": 1.401212215423584,
      "learning_rate": 6.6933969412834485e-06,
      "loss": 0.0134,
      "step": 1023640
    },
    {
      "epoch": 1.6752420415938416,
      "grad_norm": 0.9452376961708069,
      "learning_rate": 6.693331049069932e-06,
      "loss": 0.0173,
      "step": 1023660
    },
    {
      "epoch": 1.6752747720324948,
      "grad_norm": 0.6456345319747925,
      "learning_rate": 6.693265156856414e-06,
      "loss": 0.0172,
      "step": 1023680
    },
    {
      "epoch": 1.6753075024711481,
      "grad_norm": 0.5562205910682678,
      "learning_rate": 6.693199264642898e-06,
      "loss": 0.0192,
      "step": 1023700
    },
    {
      "epoch": 1.6753402329098015,
      "grad_norm": 0.7800679802894592,
      "learning_rate": 6.69313337242938e-06,
      "loss": 0.022,
      "step": 1023720
    },
    {
      "epoch": 1.6753729633484546,
      "grad_norm": 1.5531591176986694,
      "learning_rate": 6.693067480215864e-06,
      "loss": 0.0172,
      "step": 1023740
    },
    {
      "epoch": 1.6754056937871082,
      "grad_norm": 0.19863568246364594,
      "learning_rate": 6.693001588002346e-06,
      "loss": 0.0172,
      "step": 1023760
    },
    {
      "epoch": 1.6754384242257614,
      "grad_norm": 0.5938834547996521,
      "learning_rate": 6.692935695788829e-06,
      "loss": 0.0218,
      "step": 1023780
    },
    {
      "epoch": 1.6754711546644148,
      "grad_norm": 0.4671819806098938,
      "learning_rate": 6.692869803575313e-06,
      "loss": 0.0252,
      "step": 1023800
    },
    {
      "epoch": 1.6755038851030681,
      "grad_norm": 0.2907954752445221,
      "learning_rate": 6.692803911361795e-06,
      "loss": 0.019,
      "step": 1023820
    },
    {
      "epoch": 1.6755366155417215,
      "grad_norm": 1.1181919574737549,
      "learning_rate": 6.6927380191482785e-06,
      "loss": 0.0212,
      "step": 1023840
    },
    {
      "epoch": 1.6755693459803749,
      "grad_norm": 0.46040138602256775,
      "learning_rate": 6.69267212693476e-06,
      "loss": 0.0184,
      "step": 1023860
    },
    {
      "epoch": 1.675602076419028,
      "grad_norm": 0.6800105571746826,
      "learning_rate": 6.692606234721244e-06,
      "loss": 0.0253,
      "step": 1023880
    },
    {
      "epoch": 1.6756348068576816,
      "grad_norm": 0.9058270454406738,
      "learning_rate": 6.692540342507726e-06,
      "loss": 0.0215,
      "step": 1023900
    },
    {
      "epoch": 1.6756675372963348,
      "grad_norm": 0.26733604073524475,
      "learning_rate": 6.6924744502942094e-06,
      "loss": 0.0195,
      "step": 1023920
    },
    {
      "epoch": 1.6757002677349881,
      "grad_norm": 0.55289626121521,
      "learning_rate": 6.692408558080692e-06,
      "loss": 0.0191,
      "step": 1023940
    },
    {
      "epoch": 1.6757329981736415,
      "grad_norm": 1.1750150918960571,
      "learning_rate": 6.692342665867175e-06,
      "loss": 0.0183,
      "step": 1023960
    },
    {
      "epoch": 1.675765728612295,
      "grad_norm": 0.2932581603527069,
      "learning_rate": 6.692276773653658e-06,
      "loss": 0.022,
      "step": 1023980
    },
    {
      "epoch": 1.6757984590509483,
      "grad_norm": 0.24311648309230804,
      "learning_rate": 6.692210881440141e-06,
      "loss": 0.0201,
      "step": 1024000
    },
    {
      "epoch": 1.6758311894896014,
      "grad_norm": 0.16551411151885986,
      "learning_rate": 6.692144989226623e-06,
      "loss": 0.032,
      "step": 1024020
    },
    {
      "epoch": 1.675863919928255,
      "grad_norm": 0.3997536301612854,
      "learning_rate": 6.692079097013107e-06,
      "loss": 0.019,
      "step": 1024040
    },
    {
      "epoch": 1.6758966503669082,
      "grad_norm": 0.8519914150238037,
      "learning_rate": 6.692013204799589e-06,
      "loss": 0.0268,
      "step": 1024060
    },
    {
      "epoch": 1.6759293808055615,
      "grad_norm": 0.547761857509613,
      "learning_rate": 6.691947312586072e-06,
      "loss": 0.0202,
      "step": 1024080
    },
    {
      "epoch": 1.675962111244215,
      "grad_norm": 0.19108407199382782,
      "learning_rate": 6.691881420372555e-06,
      "loss": 0.0204,
      "step": 1024100
    },
    {
      "epoch": 1.6759948416828683,
      "grad_norm": 0.23183175921440125,
      "learning_rate": 6.691815528159038e-06,
      "loss": 0.0165,
      "step": 1024120
    },
    {
      "epoch": 1.6760275721215216,
      "grad_norm": 0.875228226184845,
      "learning_rate": 6.691749635945521e-06,
      "loss": 0.0189,
      "step": 1024140
    },
    {
      "epoch": 1.6760603025601748,
      "grad_norm": 0.6328390836715698,
      "learning_rate": 6.691683743732004e-06,
      "loss": 0.015,
      "step": 1024160
    },
    {
      "epoch": 1.6760930329988284,
      "grad_norm": 0.7092885971069336,
      "learning_rate": 6.691617851518487e-06,
      "loss": 0.0176,
      "step": 1024180
    },
    {
      "epoch": 1.6761257634374815,
      "grad_norm": 0.8120087385177612,
      "learning_rate": 6.6915519593049695e-06,
      "loss": 0.0189,
      "step": 1024200
    },
    {
      "epoch": 1.676158493876135,
      "grad_norm": 0.23924604058265686,
      "learning_rate": 6.691486067091453e-06,
      "loss": 0.024,
      "step": 1024220
    },
    {
      "epoch": 1.6761912243147883,
      "grad_norm": 0.2709572911262512,
      "learning_rate": 6.691420174877935e-06,
      "loss": 0.0191,
      "step": 1024240
    },
    {
      "epoch": 1.6762239547534417,
      "grad_norm": 0.7952573299407959,
      "learning_rate": 6.6913542826644185e-06,
      "loss": 0.0171,
      "step": 1024260
    },
    {
      "epoch": 1.676256685192095,
      "grad_norm": 0.4746408462524414,
      "learning_rate": 6.6912883904509004e-06,
      "loss": 0.0153,
      "step": 1024280
    },
    {
      "epoch": 1.6762894156307482,
      "grad_norm": 1.2504905462265015,
      "learning_rate": 6.691222498237384e-06,
      "loss": 0.0183,
      "step": 1024300
    },
    {
      "epoch": 1.6763221460694018,
      "grad_norm": 0.14199841022491455,
      "learning_rate": 6.691156606023867e-06,
      "loss": 0.0209,
      "step": 1024320
    },
    {
      "epoch": 1.676354876508055,
      "grad_norm": 0.45556339621543884,
      "learning_rate": 6.6910907138103495e-06,
      "loss": 0.0149,
      "step": 1024340
    },
    {
      "epoch": 1.6763876069467083,
      "grad_norm": 0.18934538960456848,
      "learning_rate": 6.691024821596832e-06,
      "loss": 0.0185,
      "step": 1024360
    },
    {
      "epoch": 1.6764203373853617,
      "grad_norm": 0.419549822807312,
      "learning_rate": 6.690958929383316e-06,
      "loss": 0.0098,
      "step": 1024380
    },
    {
      "epoch": 1.6764530678240148,
      "grad_norm": 1.215786337852478,
      "learning_rate": 6.690893037169798e-06,
      "loss": 0.0193,
      "step": 1024400
    },
    {
      "epoch": 1.6764857982626684,
      "grad_norm": 0.14464125037193298,
      "learning_rate": 6.690827144956281e-06,
      "loss": 0.0174,
      "step": 1024420
    },
    {
      "epoch": 1.6765185287013216,
      "grad_norm": 0.5400974154472351,
      "learning_rate": 6.690761252742763e-06,
      "loss": 0.0173,
      "step": 1024440
    },
    {
      "epoch": 1.6765512591399752,
      "grad_norm": 0.2016567438840866,
      "learning_rate": 6.690695360529247e-06,
      "loss": 0.0101,
      "step": 1024460
    },
    {
      "epoch": 1.6765839895786283,
      "grad_norm": 1.5128015279769897,
      "learning_rate": 6.6906294683157295e-06,
      "loss": 0.0199,
      "step": 1024480
    },
    {
      "epoch": 1.6766167200172817,
      "grad_norm": 0.4165736138820648,
      "learning_rate": 6.690563576102212e-06,
      "loss": 0.0194,
      "step": 1024500
    },
    {
      "epoch": 1.676649450455935,
      "grad_norm": 0.9417814016342163,
      "learning_rate": 6.690497683888696e-06,
      "loss": 0.0193,
      "step": 1024520
    },
    {
      "epoch": 1.6766821808945882,
      "grad_norm": 0.4338717758655548,
      "learning_rate": 6.690431791675179e-06,
      "loss": 0.0271,
      "step": 1024540
    },
    {
      "epoch": 1.6767149113332418,
      "grad_norm": 1.572433352470398,
      "learning_rate": 6.690365899461661e-06,
      "loss": 0.0177,
      "step": 1024560
    },
    {
      "epoch": 1.676747641771895,
      "grad_norm": 0.8966384530067444,
      "learning_rate": 6.690300007248144e-06,
      "loss": 0.0162,
      "step": 1024580
    },
    {
      "epoch": 1.6767803722105483,
      "grad_norm": 0.37037259340286255,
      "learning_rate": 6.690234115034628e-06,
      "loss": 0.0209,
      "step": 1024600
    },
    {
      "epoch": 1.6768131026492017,
      "grad_norm": 0.13991069793701172,
      "learning_rate": 6.6901682228211096e-06,
      "loss": 0.0152,
      "step": 1024620
    },
    {
      "epoch": 1.676845833087855,
      "grad_norm": 0.7002379894256592,
      "learning_rate": 6.690102330607593e-06,
      "loss": 0.0161,
      "step": 1024640
    },
    {
      "epoch": 1.6768785635265084,
      "grad_norm": 0.3075140416622162,
      "learning_rate": 6.690036438394075e-06,
      "loss": 0.0193,
      "step": 1024660
    },
    {
      "epoch": 1.6769112939651616,
      "grad_norm": 0.7220295667648315,
      "learning_rate": 6.689970546180559e-06,
      "loss": 0.0193,
      "step": 1024680
    },
    {
      "epoch": 1.6769440244038152,
      "grad_norm": 0.9579426646232605,
      "learning_rate": 6.6899046539670405e-06,
      "loss": 0.0137,
      "step": 1024700
    },
    {
      "epoch": 1.6769767548424683,
      "grad_norm": 0.28190508484840393,
      "learning_rate": 6.689838761753524e-06,
      "loss": 0.017,
      "step": 1024720
    },
    {
      "epoch": 1.6770094852811217,
      "grad_norm": 0.45292869210243225,
      "learning_rate": 6.689772869540007e-06,
      "loss": 0.0194,
      "step": 1024740
    },
    {
      "epoch": 1.677042215719775,
      "grad_norm": 0.7631000876426697,
      "learning_rate": 6.6897069773264896e-06,
      "loss": 0.0231,
      "step": 1024760
    },
    {
      "epoch": 1.6770749461584284,
      "grad_norm": 0.4792625606060028,
      "learning_rate": 6.689641085112972e-06,
      "loss": 0.0163,
      "step": 1024780
    },
    {
      "epoch": 1.6771076765970818,
      "grad_norm": 0.23117662966251373,
      "learning_rate": 6.689575192899456e-06,
      "loss": 0.0211,
      "step": 1024800
    },
    {
      "epoch": 1.677140407035735,
      "grad_norm": 1.3445155620574951,
      "learning_rate": 6.689509300685938e-06,
      "loss": 0.024,
      "step": 1024820
    },
    {
      "epoch": 1.6771731374743886,
      "grad_norm": 0.2564299702644348,
      "learning_rate": 6.689443408472421e-06,
      "loss": 0.0157,
      "step": 1024840
    },
    {
      "epoch": 1.6772058679130417,
      "grad_norm": 0.44550618529319763,
      "learning_rate": 6.689377516258905e-06,
      "loss": 0.0185,
      "step": 1024860
    },
    {
      "epoch": 1.677238598351695,
      "grad_norm": 0.9976489543914795,
      "learning_rate": 6.689311624045387e-06,
      "loss": 0.0227,
      "step": 1024880
    },
    {
      "epoch": 1.6772713287903485,
      "grad_norm": 0.8481858372688293,
      "learning_rate": 6.6892457318318704e-06,
      "loss": 0.0208,
      "step": 1024900
    },
    {
      "epoch": 1.6773040592290018,
      "grad_norm": 0.29842641949653625,
      "learning_rate": 6.689179839618352e-06,
      "loss": 0.0174,
      "step": 1024920
    },
    {
      "epoch": 1.6773367896676552,
      "grad_norm": 0.3210759162902832,
      "learning_rate": 6.689113947404836e-06,
      "loss": 0.0238,
      "step": 1024940
    },
    {
      "epoch": 1.6773695201063084,
      "grad_norm": 0.6932376623153687,
      "learning_rate": 6.689048055191319e-06,
      "loss": 0.022,
      "step": 1024960
    },
    {
      "epoch": 1.677402250544962,
      "grad_norm": 0.4928554594516754,
      "learning_rate": 6.688982162977801e-06,
      "loss": 0.0274,
      "step": 1024980
    },
    {
      "epoch": 1.677434980983615,
      "grad_norm": 0.5591022968292236,
      "learning_rate": 6.688916270764284e-06,
      "loss": 0.02,
      "step": 1025000
    },
    {
      "epoch": 1.6774677114222685,
      "grad_norm": 0.7911974787712097,
      "learning_rate": 6.688850378550768e-06,
      "loss": 0.0142,
      "step": 1025020
    },
    {
      "epoch": 1.6775004418609218,
      "grad_norm": 1.1397120952606201,
      "learning_rate": 6.68878448633725e-06,
      "loss": 0.0192,
      "step": 1025040
    },
    {
      "epoch": 1.677533172299575,
      "grad_norm": 0.656156063079834,
      "learning_rate": 6.688718594123733e-06,
      "loss": 0.0185,
      "step": 1025060
    },
    {
      "epoch": 1.6775659027382286,
      "grad_norm": 1.5904319286346436,
      "learning_rate": 6.688652701910215e-06,
      "loss": 0.0185,
      "step": 1025080
    },
    {
      "epoch": 1.6775986331768817,
      "grad_norm": 0.9062514305114746,
      "learning_rate": 6.688586809696699e-06,
      "loss": 0.0175,
      "step": 1025100
    },
    {
      "epoch": 1.6776313636155353,
      "grad_norm": 0.4846037030220032,
      "learning_rate": 6.6885209174831814e-06,
      "loss": 0.0241,
      "step": 1025120
    },
    {
      "epoch": 1.6776640940541885,
      "grad_norm": 0.16805198788642883,
      "learning_rate": 6.688455025269664e-06,
      "loss": 0.0113,
      "step": 1025140
    },
    {
      "epoch": 1.6776968244928419,
      "grad_norm": 0.8171416521072388,
      "learning_rate": 6.688389133056147e-06,
      "loss": 0.0205,
      "step": 1025160
    },
    {
      "epoch": 1.6777295549314952,
      "grad_norm": 1.3561955690383911,
      "learning_rate": 6.6883232408426305e-06,
      "loss": 0.0279,
      "step": 1025180
    },
    {
      "epoch": 1.6777622853701484,
      "grad_norm": 0.6954479813575745,
      "learning_rate": 6.688257348629113e-06,
      "loss": 0.0148,
      "step": 1025200
    },
    {
      "epoch": 1.677795015808802,
      "grad_norm": 0.2334689050912857,
      "learning_rate": 6.688191456415596e-06,
      "loss": 0.0175,
      "step": 1025220
    },
    {
      "epoch": 1.6778277462474551,
      "grad_norm": 0.12384099513292313,
      "learning_rate": 6.6881255642020796e-06,
      "loss": 0.0158,
      "step": 1025240
    },
    {
      "epoch": 1.6778604766861085,
      "grad_norm": 0.5055462718009949,
      "learning_rate": 6.6880596719885615e-06,
      "loss": 0.0266,
      "step": 1025260
    },
    {
      "epoch": 1.6778932071247619,
      "grad_norm": 0.2914302349090576,
      "learning_rate": 6.687993779775045e-06,
      "loss": 0.0208,
      "step": 1025280
    },
    {
      "epoch": 1.6779259375634152,
      "grad_norm": 0.7808594107627869,
      "learning_rate": 6.687927887561527e-06,
      "loss": 0.0219,
      "step": 1025300
    },
    {
      "epoch": 1.6779586680020686,
      "grad_norm": 0.22557416558265686,
      "learning_rate": 6.6878619953480105e-06,
      "loss": 0.02,
      "step": 1025320
    },
    {
      "epoch": 1.6779913984407218,
      "grad_norm": 0.30075201392173767,
      "learning_rate": 6.687796103134493e-06,
      "loss": 0.0102,
      "step": 1025340
    },
    {
      "epoch": 1.6780241288793754,
      "grad_norm": 0.6929885745048523,
      "learning_rate": 6.687730210920976e-06,
      "loss": 0.0178,
      "step": 1025360
    },
    {
      "epoch": 1.6780568593180285,
      "grad_norm": 0.5303898453712463,
      "learning_rate": 6.687664318707459e-06,
      "loss": 0.0185,
      "step": 1025380
    },
    {
      "epoch": 1.6780895897566819,
      "grad_norm": 0.713689923286438,
      "learning_rate": 6.687598426493942e-06,
      "loss": 0.023,
      "step": 1025400
    },
    {
      "epoch": 1.6781223201953352,
      "grad_norm": 0.25981277227401733,
      "learning_rate": 6.687532534280424e-06,
      "loss": 0.0188,
      "step": 1025420
    },
    {
      "epoch": 1.6781550506339886,
      "grad_norm": 1.0666310787200928,
      "learning_rate": 6.687466642066908e-06,
      "loss": 0.0232,
      "step": 1025440
    },
    {
      "epoch": 1.678187781072642,
      "grad_norm": 0.40474095940589905,
      "learning_rate": 6.68740074985339e-06,
      "loss": 0.0163,
      "step": 1025460
    },
    {
      "epoch": 1.6782205115112951,
      "grad_norm": 1.0974773168563843,
      "learning_rate": 6.687334857639873e-06,
      "loss": 0.0237,
      "step": 1025480
    },
    {
      "epoch": 1.6782532419499487,
      "grad_norm": 0.9508163332939148,
      "learning_rate": 6.687268965426356e-06,
      "loss": 0.0172,
      "step": 1025500
    },
    {
      "epoch": 1.6782859723886019,
      "grad_norm": 0.11627307534217834,
      "learning_rate": 6.687203073212839e-06,
      "loss": 0.0214,
      "step": 1025520
    },
    {
      "epoch": 1.6783187028272553,
      "grad_norm": 0.19496549665927887,
      "learning_rate": 6.687137180999322e-06,
      "loss": 0.0191,
      "step": 1025540
    },
    {
      "epoch": 1.6783514332659086,
      "grad_norm": 0.28609901666641235,
      "learning_rate": 6.687071288785805e-06,
      "loss": 0.0201,
      "step": 1025560
    },
    {
      "epoch": 1.678384163704562,
      "grad_norm": 0.6438100934028625,
      "learning_rate": 6.687005396572288e-06,
      "loss": 0.0214,
      "step": 1025580
    },
    {
      "epoch": 1.6784168941432154,
      "grad_norm": 1.5486178398132324,
      "learning_rate": 6.6869395043587706e-06,
      "loss": 0.0136,
      "step": 1025600
    },
    {
      "epoch": 1.6784496245818685,
      "grad_norm": 0.4036633372306824,
      "learning_rate": 6.686873612145254e-06,
      "loss": 0.0176,
      "step": 1025620
    },
    {
      "epoch": 1.6784823550205221,
      "grad_norm": 0.6269603967666626,
      "learning_rate": 6.686807719931736e-06,
      "loss": 0.0115,
      "step": 1025640
    },
    {
      "epoch": 1.6785150854591753,
      "grad_norm": 0.4218321144580841,
      "learning_rate": 6.68674182771822e-06,
      "loss": 0.0235,
      "step": 1025660
    },
    {
      "epoch": 1.6785478158978286,
      "grad_norm": 0.32253602147102356,
      "learning_rate": 6.6866759355047015e-06,
      "loss": 0.0271,
      "step": 1025680
    },
    {
      "epoch": 1.678580546336482,
      "grad_norm": 0.20033465325832367,
      "learning_rate": 6.686610043291185e-06,
      "loss": 0.0171,
      "step": 1025700
    },
    {
      "epoch": 1.6786132767751354,
      "grad_norm": 0.16455525159835815,
      "learning_rate": 6.686544151077667e-06,
      "loss": 0.0199,
      "step": 1025720
    },
    {
      "epoch": 1.6786460072137888,
      "grad_norm": 3.593352794647217,
      "learning_rate": 6.686478258864151e-06,
      "loss": 0.0202,
      "step": 1025740
    },
    {
      "epoch": 1.678678737652442,
      "grad_norm": 0.3381809592247009,
      "learning_rate": 6.686412366650633e-06,
      "loss": 0.0129,
      "step": 1025760
    },
    {
      "epoch": 1.6787114680910955,
      "grad_norm": 0.18622487783432007,
      "learning_rate": 6.686346474437116e-06,
      "loss": 0.0207,
      "step": 1025780
    },
    {
      "epoch": 1.6787441985297487,
      "grad_norm": 0.4077253043651581,
      "learning_rate": 6.686280582223599e-06,
      "loss": 0.0123,
      "step": 1025800
    },
    {
      "epoch": 1.678776928968402,
      "grad_norm": 0.6554193496704102,
      "learning_rate": 6.686214690010082e-06,
      "loss": 0.021,
      "step": 1025820
    },
    {
      "epoch": 1.6788096594070554,
      "grad_norm": 0.3770992159843445,
      "learning_rate": 6.686148797796564e-06,
      "loss": 0.0188,
      "step": 1025840
    },
    {
      "epoch": 1.6788423898457085,
      "grad_norm": 0.48983505368232727,
      "learning_rate": 6.686082905583048e-06,
      "loss": 0.0194,
      "step": 1025860
    },
    {
      "epoch": 1.6788751202843621,
      "grad_norm": 0.8455509543418884,
      "learning_rate": 6.68601701336953e-06,
      "loss": 0.0156,
      "step": 1025880
    },
    {
      "epoch": 1.6789078507230153,
      "grad_norm": 0.45860904455184937,
      "learning_rate": 6.685951121156013e-06,
      "loss": 0.0204,
      "step": 1025900
    },
    {
      "epoch": 1.6789405811616689,
      "grad_norm": 0.6913816928863525,
      "learning_rate": 6.685885228942497e-06,
      "loss": 0.0176,
      "step": 1025920
    },
    {
      "epoch": 1.678973311600322,
      "grad_norm": 0.129074826836586,
      "learning_rate": 6.685819336728979e-06,
      "loss": 0.0126,
      "step": 1025940
    },
    {
      "epoch": 1.6790060420389754,
      "grad_norm": 0.04998404160141945,
      "learning_rate": 6.6857534445154624e-06,
      "loss": 0.0217,
      "step": 1025960
    },
    {
      "epoch": 1.6790387724776288,
      "grad_norm": 0.4435032606124878,
      "learning_rate": 6.685687552301945e-06,
      "loss": 0.013,
      "step": 1025980
    },
    {
      "epoch": 1.679071502916282,
      "grad_norm": 0.29726266860961914,
      "learning_rate": 6.685621660088428e-06,
      "loss": 0.0179,
      "step": 1026000
    },
    {
      "epoch": 1.6791042333549355,
      "grad_norm": 0.12748469412326813,
      "learning_rate": 6.685555767874911e-06,
      "loss": 0.0248,
      "step": 1026020
    },
    {
      "epoch": 1.6791369637935887,
      "grad_norm": 0.824522852897644,
      "learning_rate": 6.685489875661394e-06,
      "loss": 0.0226,
      "step": 1026040
    },
    {
      "epoch": 1.679169694232242,
      "grad_norm": 1.064380168914795,
      "learning_rate": 6.685423983447876e-06,
      "loss": 0.019,
      "step": 1026060
    },
    {
      "epoch": 1.6792024246708954,
      "grad_norm": 0.233564093708992,
      "learning_rate": 6.68535809123436e-06,
      "loss": 0.019,
      "step": 1026080
    },
    {
      "epoch": 1.6792351551095488,
      "grad_norm": 1.1736688613891602,
      "learning_rate": 6.685292199020842e-06,
      "loss": 0.0344,
      "step": 1026100
    },
    {
      "epoch": 1.6792678855482022,
      "grad_norm": 1.1478588581085205,
      "learning_rate": 6.685226306807325e-06,
      "loss": 0.0286,
      "step": 1026120
    },
    {
      "epoch": 1.6793006159868553,
      "grad_norm": 0.44178834557533264,
      "learning_rate": 6.685160414593808e-06,
      "loss": 0.0181,
      "step": 1026140
    },
    {
      "epoch": 1.679333346425509,
      "grad_norm": 0.41198936104774475,
      "learning_rate": 6.685094522380291e-06,
      "loss": 0.0194,
      "step": 1026160
    },
    {
      "epoch": 1.679366076864162,
      "grad_norm": 0.9234551787376404,
      "learning_rate": 6.685028630166773e-06,
      "loss": 0.0301,
      "step": 1026180
    },
    {
      "epoch": 1.6793988073028154,
      "grad_norm": 0.6894903779029846,
      "learning_rate": 6.684962737953257e-06,
      "loss": 0.0182,
      "step": 1026200
    },
    {
      "epoch": 1.6794315377414688,
      "grad_norm": 0.6880254149436951,
      "learning_rate": 6.684896845739739e-06,
      "loss": 0.0207,
      "step": 1026220
    },
    {
      "epoch": 1.6794642681801222,
      "grad_norm": 0.7272108197212219,
      "learning_rate": 6.6848309535262225e-06,
      "loss": 0.0175,
      "step": 1026240
    },
    {
      "epoch": 1.6794969986187755,
      "grad_norm": 0.19375894963741302,
      "learning_rate": 6.684765061312706e-06,
      "loss": 0.0157,
      "step": 1026260
    },
    {
      "epoch": 1.6795297290574287,
      "grad_norm": 1.034254550933838,
      "learning_rate": 6.684699169099188e-06,
      "loss": 0.0231,
      "step": 1026280
    },
    {
      "epoch": 1.6795624594960823,
      "grad_norm": 1.458460807800293,
      "learning_rate": 6.6846332768856715e-06,
      "loss": 0.0249,
      "step": 1026300
    },
    {
      "epoch": 1.6795951899347354,
      "grad_norm": 0.17325469851493835,
      "learning_rate": 6.6845673846721534e-06,
      "loss": 0.023,
      "step": 1026320
    },
    {
      "epoch": 1.6796279203733888,
      "grad_norm": 0.2558050751686096,
      "learning_rate": 6.684501492458637e-06,
      "loss": 0.0187,
      "step": 1026340
    },
    {
      "epoch": 1.6796606508120422,
      "grad_norm": 1.391432762145996,
      "learning_rate": 6.68443560024512e-06,
      "loss": 0.0146,
      "step": 1026360
    },
    {
      "epoch": 1.6796933812506956,
      "grad_norm": 0.25367987155914307,
      "learning_rate": 6.6843697080316025e-06,
      "loss": 0.0169,
      "step": 1026380
    },
    {
      "epoch": 1.679726111689349,
      "grad_norm": 0.1701151430606842,
      "learning_rate": 6.684303815818085e-06,
      "loss": 0.0216,
      "step": 1026400
    },
    {
      "epoch": 1.679758842128002,
      "grad_norm": 0.7320255041122437,
      "learning_rate": 6.684237923604569e-06,
      "loss": 0.0218,
      "step": 1026420
    },
    {
      "epoch": 1.6797915725666557,
      "grad_norm": 0.8478599786758423,
      "learning_rate": 6.684172031391051e-06,
      "loss": 0.0189,
      "step": 1026440
    },
    {
      "epoch": 1.6798243030053088,
      "grad_norm": 0.23729634284973145,
      "learning_rate": 6.684106139177534e-06,
      "loss": 0.0224,
      "step": 1026460
    },
    {
      "epoch": 1.6798570334439622,
      "grad_norm": 0.1699577122926712,
      "learning_rate": 6.684040246964016e-06,
      "loss": 0.0248,
      "step": 1026480
    },
    {
      "epoch": 1.6798897638826156,
      "grad_norm": 0.38535189628601074,
      "learning_rate": 6.6839743547505e-06,
      "loss": 0.0153,
      "step": 1026500
    },
    {
      "epoch": 1.679922494321269,
      "grad_norm": 0.8549128174781799,
      "learning_rate": 6.6839084625369825e-06,
      "loss": 0.0229,
      "step": 1026520
    },
    {
      "epoch": 1.6799552247599223,
      "grad_norm": 0.3173237144947052,
      "learning_rate": 6.683842570323465e-06,
      "loss": 0.0209,
      "step": 1026540
    },
    {
      "epoch": 1.6799879551985755,
      "grad_norm": 0.11253625154495239,
      "learning_rate": 6.683776678109948e-06,
      "loss": 0.0152,
      "step": 1026560
    },
    {
      "epoch": 1.680020685637229,
      "grad_norm": 0.4147295653820038,
      "learning_rate": 6.683710785896432e-06,
      "loss": 0.0221,
      "step": 1026580
    },
    {
      "epoch": 1.6800534160758822,
      "grad_norm": 0.279245525598526,
      "learning_rate": 6.683644893682914e-06,
      "loss": 0.0145,
      "step": 1026600
    },
    {
      "epoch": 1.6800861465145356,
      "grad_norm": 0.33302468061447144,
      "learning_rate": 6.683579001469397e-06,
      "loss": 0.0261,
      "step": 1026620
    },
    {
      "epoch": 1.680118876953189,
      "grad_norm": 0.13223101198673248,
      "learning_rate": 6.683513109255881e-06,
      "loss": 0.0246,
      "step": 1026640
    },
    {
      "epoch": 1.680151607391842,
      "grad_norm": 0.6596654653549194,
      "learning_rate": 6.6834472170423626e-06,
      "loss": 0.0134,
      "step": 1026660
    },
    {
      "epoch": 1.6801843378304957,
      "grad_norm": 1.2401068210601807,
      "learning_rate": 6.683381324828846e-06,
      "loss": 0.0211,
      "step": 1026680
    },
    {
      "epoch": 1.6802170682691489,
      "grad_norm": 0.18330137431621552,
      "learning_rate": 6.683315432615328e-06,
      "loss": 0.0184,
      "step": 1026700
    },
    {
      "epoch": 1.6802497987078024,
      "grad_norm": 0.33384954929351807,
      "learning_rate": 6.683249540401812e-06,
      "loss": 0.0207,
      "step": 1026720
    },
    {
      "epoch": 1.6802825291464556,
      "grad_norm": 0.4851583242416382,
      "learning_rate": 6.6831836481882935e-06,
      "loss": 0.0153,
      "step": 1026740
    },
    {
      "epoch": 1.680315259585109,
      "grad_norm": 0.28407958149909973,
      "learning_rate": 6.683117755974777e-06,
      "loss": 0.0204,
      "step": 1026760
    },
    {
      "epoch": 1.6803479900237623,
      "grad_norm": 0.5151432156562805,
      "learning_rate": 6.68305186376126e-06,
      "loss": 0.0152,
      "step": 1026780
    },
    {
      "epoch": 1.6803807204624155,
      "grad_norm": 0.46643880009651184,
      "learning_rate": 6.682985971547743e-06,
      "loss": 0.0135,
      "step": 1026800
    },
    {
      "epoch": 1.680413450901069,
      "grad_norm": 0.26731765270233154,
      "learning_rate": 6.682920079334225e-06,
      "loss": 0.0209,
      "step": 1026820
    },
    {
      "epoch": 1.6804461813397222,
      "grad_norm": 0.22151991724967957,
      "learning_rate": 6.682854187120709e-06,
      "loss": 0.0186,
      "step": 1026840
    },
    {
      "epoch": 1.6804789117783756,
      "grad_norm": 0.2632361948490143,
      "learning_rate": 6.682788294907191e-06,
      "loss": 0.0217,
      "step": 1026860
    },
    {
      "epoch": 1.680511642217029,
      "grad_norm": 0.8525563478469849,
      "learning_rate": 6.682722402693674e-06,
      "loss": 0.012,
      "step": 1026880
    },
    {
      "epoch": 1.6805443726556824,
      "grad_norm": 1.1840530633926392,
      "learning_rate": 6.682656510480156e-06,
      "loss": 0.0217,
      "step": 1026900
    },
    {
      "epoch": 1.6805771030943357,
      "grad_norm": 2.654569387435913,
      "learning_rate": 6.68259061826664e-06,
      "loss": 0.0179,
      "step": 1026920
    },
    {
      "epoch": 1.6806098335329889,
      "grad_norm": 1.1010966300964355,
      "learning_rate": 6.682524726053123e-06,
      "loss": 0.0144,
      "step": 1026940
    },
    {
      "epoch": 1.6806425639716425,
      "grad_norm": 1.0636829137802124,
      "learning_rate": 6.682458833839605e-06,
      "loss": 0.0223,
      "step": 1026960
    },
    {
      "epoch": 1.6806752944102956,
      "grad_norm": 0.15647219121456146,
      "learning_rate": 6.682392941626089e-06,
      "loss": 0.0238,
      "step": 1026980
    },
    {
      "epoch": 1.680708024848949,
      "grad_norm": 1.014666199684143,
      "learning_rate": 6.682327049412572e-06,
      "loss": 0.0241,
      "step": 1027000
    },
    {
      "epoch": 1.6807407552876024,
      "grad_norm": 0.12579718232154846,
      "learning_rate": 6.682261157199054e-06,
      "loss": 0.0129,
      "step": 1027020
    },
    {
      "epoch": 1.6807734857262557,
      "grad_norm": 0.7244595885276794,
      "learning_rate": 6.682195264985537e-06,
      "loss": 0.0181,
      "step": 1027040
    },
    {
      "epoch": 1.680806216164909,
      "grad_norm": 0.6650177836418152,
      "learning_rate": 6.682129372772021e-06,
      "loss": 0.0116,
      "step": 1027060
    },
    {
      "epoch": 1.6808389466035623,
      "grad_norm": 0.288535475730896,
      "learning_rate": 6.682063480558503e-06,
      "loss": 0.0143,
      "step": 1027080
    },
    {
      "epoch": 1.6808716770422159,
      "grad_norm": 1.3199998140335083,
      "learning_rate": 6.681997588344986e-06,
      "loss": 0.022,
      "step": 1027100
    },
    {
      "epoch": 1.680904407480869,
      "grad_norm": 0.45210444927215576,
      "learning_rate": 6.681931696131468e-06,
      "loss": 0.0194,
      "step": 1027120
    },
    {
      "epoch": 1.6809371379195224,
      "grad_norm": 0.6327032446861267,
      "learning_rate": 6.681865803917952e-06,
      "loss": 0.0195,
      "step": 1027140
    },
    {
      "epoch": 1.6809698683581757,
      "grad_norm": 0.7484594583511353,
      "learning_rate": 6.6817999117044344e-06,
      "loss": 0.0247,
      "step": 1027160
    },
    {
      "epoch": 1.6810025987968291,
      "grad_norm": 0.3540102541446686,
      "learning_rate": 6.681734019490917e-06,
      "loss": 0.0215,
      "step": 1027180
    },
    {
      "epoch": 1.6810353292354825,
      "grad_norm": 0.21783488988876343,
      "learning_rate": 6.6816681272774e-06,
      "loss": 0.0181,
      "step": 1027200
    },
    {
      "epoch": 1.6810680596741356,
      "grad_norm": 0.5834366679191589,
      "learning_rate": 6.6816022350638835e-06,
      "loss": 0.0148,
      "step": 1027220
    },
    {
      "epoch": 1.6811007901127892,
      "grad_norm": 0.13616296648979187,
      "learning_rate": 6.681536342850365e-06,
      "loss": 0.0175,
      "step": 1027240
    },
    {
      "epoch": 1.6811335205514424,
      "grad_norm": 1.355831503868103,
      "learning_rate": 6.681470450636849e-06,
      "loss": 0.0202,
      "step": 1027260
    },
    {
      "epoch": 1.6811662509900958,
      "grad_norm": 1.0286381244659424,
      "learning_rate": 6.681404558423331e-06,
      "loss": 0.0178,
      "step": 1027280
    },
    {
      "epoch": 1.6811989814287491,
      "grad_norm": 1.4263157844543457,
      "learning_rate": 6.6813386662098145e-06,
      "loss": 0.02,
      "step": 1027300
    },
    {
      "epoch": 1.6812317118674025,
      "grad_norm": 0.6522294878959656,
      "learning_rate": 6.681272773996298e-06,
      "loss": 0.0219,
      "step": 1027320
    },
    {
      "epoch": 1.6812644423060559,
      "grad_norm": 1.9528107643127441,
      "learning_rate": 6.68120688178278e-06,
      "loss": 0.0182,
      "step": 1027340
    },
    {
      "epoch": 1.681297172744709,
      "grad_norm": 0.3746695816516876,
      "learning_rate": 6.6811409895692635e-06,
      "loss": 0.02,
      "step": 1027360
    },
    {
      "epoch": 1.6813299031833626,
      "grad_norm": 0.4239073395729065,
      "learning_rate": 6.681075097355746e-06,
      "loss": 0.0151,
      "step": 1027380
    },
    {
      "epoch": 1.6813626336220158,
      "grad_norm": 0.23798243701457977,
      "learning_rate": 6.681009205142229e-06,
      "loss": 0.0147,
      "step": 1027400
    },
    {
      "epoch": 1.6813953640606691,
      "grad_norm": 0.5305087566375732,
      "learning_rate": 6.680943312928712e-06,
      "loss": 0.0183,
      "step": 1027420
    },
    {
      "epoch": 1.6814280944993225,
      "grad_norm": 0.3596237897872925,
      "learning_rate": 6.680877420715195e-06,
      "loss": 0.0215,
      "step": 1027440
    },
    {
      "epoch": 1.6814608249379757,
      "grad_norm": 0.5280839800834656,
      "learning_rate": 6.680811528501677e-06,
      "loss": 0.017,
      "step": 1027460
    },
    {
      "epoch": 1.6814935553766293,
      "grad_norm": 0.48176586627960205,
      "learning_rate": 6.680745636288161e-06,
      "loss": 0.0222,
      "step": 1027480
    },
    {
      "epoch": 1.6815262858152824,
      "grad_norm": 0.29308968782424927,
      "learning_rate": 6.680679744074643e-06,
      "loss": 0.0193,
      "step": 1027500
    },
    {
      "epoch": 1.681559016253936,
      "grad_norm": 0.6955639719963074,
      "learning_rate": 6.680613851861126e-06,
      "loss": 0.0237,
      "step": 1027520
    },
    {
      "epoch": 1.6815917466925892,
      "grad_norm": 1.0929182767868042,
      "learning_rate": 6.680547959647608e-06,
      "loss": 0.0251,
      "step": 1027540
    },
    {
      "epoch": 1.6816244771312425,
      "grad_norm": 0.4370647966861725,
      "learning_rate": 6.680482067434092e-06,
      "loss": 0.0193,
      "step": 1027560
    },
    {
      "epoch": 1.681657207569896,
      "grad_norm": 0.23268286883831024,
      "learning_rate": 6.6804161752205745e-06,
      "loss": 0.018,
      "step": 1027580
    },
    {
      "epoch": 1.681689938008549,
      "grad_norm": 0.6042726039886475,
      "learning_rate": 6.680350283007058e-06,
      "loss": 0.0229,
      "step": 1027600
    },
    {
      "epoch": 1.6817226684472026,
      "grad_norm": 0.2219388633966446,
      "learning_rate": 6.68028439079354e-06,
      "loss": 0.0163,
      "step": 1027620
    },
    {
      "epoch": 1.6817553988858558,
      "grad_norm": 0.4162953197956085,
      "learning_rate": 6.680218498580024e-06,
      "loss": 0.0193,
      "step": 1027640
    },
    {
      "epoch": 1.6817881293245092,
      "grad_norm": 0.47126445174217224,
      "learning_rate": 6.680152606366507e-06,
      "loss": 0.0172,
      "step": 1027660
    },
    {
      "epoch": 1.6818208597631625,
      "grad_norm": 0.13589301705360413,
      "learning_rate": 6.680086714152989e-06,
      "loss": 0.0158,
      "step": 1027680
    },
    {
      "epoch": 1.681853590201816,
      "grad_norm": 0.3828449547290802,
      "learning_rate": 6.680020821939473e-06,
      "loss": 0.0113,
      "step": 1027700
    },
    {
      "epoch": 1.6818863206404693,
      "grad_norm": 0.9255455732345581,
      "learning_rate": 6.6799549297259545e-06,
      "loss": 0.0201,
      "step": 1027720
    },
    {
      "epoch": 1.6819190510791224,
      "grad_norm": 0.2850538492202759,
      "learning_rate": 6.679889037512438e-06,
      "loss": 0.0215,
      "step": 1027740
    },
    {
      "epoch": 1.681951781517776,
      "grad_norm": 1.2765616178512573,
      "learning_rate": 6.67982314529892e-06,
      "loss": 0.028,
      "step": 1027760
    },
    {
      "epoch": 1.6819845119564292,
      "grad_norm": 0.5514749884605408,
      "learning_rate": 6.679757253085404e-06,
      "loss": 0.0163,
      "step": 1027780
    },
    {
      "epoch": 1.6820172423950825,
      "grad_norm": 0.8886864185333252,
      "learning_rate": 6.679691360871886e-06,
      "loss": 0.0253,
      "step": 1027800
    },
    {
      "epoch": 1.682049972833736,
      "grad_norm": 0.44694286584854126,
      "learning_rate": 6.679625468658369e-06,
      "loss": 0.018,
      "step": 1027820
    },
    {
      "epoch": 1.6820827032723893,
      "grad_norm": 0.2537950277328491,
      "learning_rate": 6.679559576444852e-06,
      "loss": 0.0243,
      "step": 1027840
    },
    {
      "epoch": 1.6821154337110427,
      "grad_norm": 0.535905659198761,
      "learning_rate": 6.679493684231335e-06,
      "loss": 0.0195,
      "step": 1027860
    },
    {
      "epoch": 1.6821481641496958,
      "grad_norm": 0.12169411778450012,
      "learning_rate": 6.679427792017817e-06,
      "loss": 0.0192,
      "step": 1027880
    },
    {
      "epoch": 1.6821808945883494,
      "grad_norm": 0.1545589566230774,
      "learning_rate": 6.679361899804301e-06,
      "loss": 0.0176,
      "step": 1027900
    },
    {
      "epoch": 1.6822136250270026,
      "grad_norm": 0.49434173107147217,
      "learning_rate": 6.679296007590783e-06,
      "loss": 0.0166,
      "step": 1027920
    },
    {
      "epoch": 1.682246355465656,
      "grad_norm": 0.067701056599617,
      "learning_rate": 6.679230115377266e-06,
      "loss": 0.0143,
      "step": 1027940
    },
    {
      "epoch": 1.6822790859043093,
      "grad_norm": 0.2574251592159271,
      "learning_rate": 6.679164223163749e-06,
      "loss": 0.0202,
      "step": 1027960
    },
    {
      "epoch": 1.6823118163429627,
      "grad_norm": 1.0910377502441406,
      "learning_rate": 6.679098330950232e-06,
      "loss": 0.0215,
      "step": 1027980
    },
    {
      "epoch": 1.682344546781616,
      "grad_norm": 0.7963141798973083,
      "learning_rate": 6.679032438736715e-06,
      "loss": 0.0242,
      "step": 1028000
    },
    {
      "epoch": 1.6823772772202692,
      "grad_norm": 0.4187307059764862,
      "learning_rate": 6.678966546523198e-06,
      "loss": 0.0262,
      "step": 1028020
    },
    {
      "epoch": 1.6824100076589228,
      "grad_norm": 0.19809435307979584,
      "learning_rate": 6.678900654309681e-06,
      "loss": 0.0185,
      "step": 1028040
    },
    {
      "epoch": 1.682442738097576,
      "grad_norm": 0.6222549080848694,
      "learning_rate": 6.678834762096164e-06,
      "loss": 0.0221,
      "step": 1028060
    },
    {
      "epoch": 1.6824754685362293,
      "grad_norm": 0.8442192673683167,
      "learning_rate": 6.678768869882647e-06,
      "loss": 0.015,
      "step": 1028080
    },
    {
      "epoch": 1.6825081989748827,
      "grad_norm": 1.7140244245529175,
      "learning_rate": 6.678702977669129e-06,
      "loss": 0.0334,
      "step": 1028100
    },
    {
      "epoch": 1.6825409294135358,
      "grad_norm": 0.34323492646217346,
      "learning_rate": 6.678637085455613e-06,
      "loss": 0.0144,
      "step": 1028120
    },
    {
      "epoch": 1.6825736598521894,
      "grad_norm": 0.3721228837966919,
      "learning_rate": 6.678571193242095e-06,
      "loss": 0.0182,
      "step": 1028140
    },
    {
      "epoch": 1.6826063902908426,
      "grad_norm": 0.4253478944301605,
      "learning_rate": 6.678505301028578e-06,
      "loss": 0.0204,
      "step": 1028160
    },
    {
      "epoch": 1.6826391207294962,
      "grad_norm": 0.8734052777290344,
      "learning_rate": 6.678439408815061e-06,
      "loss": 0.0149,
      "step": 1028180
    },
    {
      "epoch": 1.6826718511681493,
      "grad_norm": 0.6588793396949768,
      "learning_rate": 6.678373516601544e-06,
      "loss": 0.0181,
      "step": 1028200
    },
    {
      "epoch": 1.6827045816068027,
      "grad_norm": 0.24019809067249298,
      "learning_rate": 6.678307624388026e-06,
      "loss": 0.0306,
      "step": 1028220
    },
    {
      "epoch": 1.682737312045456,
      "grad_norm": 0.18976137042045593,
      "learning_rate": 6.67824173217451e-06,
      "loss": 0.0177,
      "step": 1028240
    },
    {
      "epoch": 1.6827700424841092,
      "grad_norm": 0.2996731698513031,
      "learning_rate": 6.678175839960992e-06,
      "loss": 0.0181,
      "step": 1028260
    },
    {
      "epoch": 1.6828027729227628,
      "grad_norm": 0.12186070531606674,
      "learning_rate": 6.6781099477474755e-06,
      "loss": 0.0144,
      "step": 1028280
    },
    {
      "epoch": 1.682835503361416,
      "grad_norm": 0.45203396677970886,
      "learning_rate": 6.678044055533957e-06,
      "loss": 0.0198,
      "step": 1028300
    },
    {
      "epoch": 1.6828682338000693,
      "grad_norm": 2.456151008605957,
      "learning_rate": 6.677978163320441e-06,
      "loss": 0.024,
      "step": 1028320
    },
    {
      "epoch": 1.6829009642387227,
      "grad_norm": 4.230958938598633,
      "learning_rate": 6.677912271106924e-06,
      "loss": 0.025,
      "step": 1028340
    },
    {
      "epoch": 1.682933694677376,
      "grad_norm": 1.132641077041626,
      "learning_rate": 6.6778463788934064e-06,
      "loss": 0.0154,
      "step": 1028360
    },
    {
      "epoch": 1.6829664251160295,
      "grad_norm": 0.30847448110580444,
      "learning_rate": 6.67778048667989e-06,
      "loss": 0.0151,
      "step": 1028380
    },
    {
      "epoch": 1.6829991555546826,
      "grad_norm": 0.600295901298523,
      "learning_rate": 6.677714594466373e-06,
      "loss": 0.0161,
      "step": 1028400
    },
    {
      "epoch": 1.6830318859933362,
      "grad_norm": 0.5127629637718201,
      "learning_rate": 6.6776487022528555e-06,
      "loss": 0.0226,
      "step": 1028420
    },
    {
      "epoch": 1.6830646164319893,
      "grad_norm": 0.21794314682483673,
      "learning_rate": 6.677582810039338e-06,
      "loss": 0.0231,
      "step": 1028440
    },
    {
      "epoch": 1.6830973468706427,
      "grad_norm": 1.9672492742538452,
      "learning_rate": 6.677516917825822e-06,
      "loss": 0.0185,
      "step": 1028460
    },
    {
      "epoch": 1.683130077309296,
      "grad_norm": 0.6756597757339478,
      "learning_rate": 6.677451025612304e-06,
      "loss": 0.016,
      "step": 1028480
    },
    {
      "epoch": 1.6831628077479495,
      "grad_norm": 0.45096659660339355,
      "learning_rate": 6.677385133398787e-06,
      "loss": 0.0181,
      "step": 1028500
    },
    {
      "epoch": 1.6831955381866028,
      "grad_norm": 1.2166147232055664,
      "learning_rate": 6.677319241185269e-06,
      "loss": 0.0185,
      "step": 1028520
    },
    {
      "epoch": 1.683228268625256,
      "grad_norm": 0.3869111239910126,
      "learning_rate": 6.677253348971753e-06,
      "loss": 0.0226,
      "step": 1028540
    },
    {
      "epoch": 1.6832609990639096,
      "grad_norm": 0.45100459456443787,
      "learning_rate": 6.677187456758235e-06,
      "loss": 0.016,
      "step": 1028560
    },
    {
      "epoch": 1.6832937295025627,
      "grad_norm": 0.3727227747440338,
      "learning_rate": 6.677121564544718e-06,
      "loss": 0.0263,
      "step": 1028580
    },
    {
      "epoch": 1.683326459941216,
      "grad_norm": 0.3482639789581299,
      "learning_rate": 6.677055672331201e-06,
      "loss": 0.0161,
      "step": 1028600
    },
    {
      "epoch": 1.6833591903798695,
      "grad_norm": 0.34445223212242126,
      "learning_rate": 6.676989780117684e-06,
      "loss": 0.0188,
      "step": 1028620
    },
    {
      "epoch": 1.6833919208185228,
      "grad_norm": 0.37835031747817993,
      "learning_rate": 6.6769238879041665e-06,
      "loss": 0.0225,
      "step": 1028640
    },
    {
      "epoch": 1.6834246512571762,
      "grad_norm": 0.09042245894670486,
      "learning_rate": 6.67685799569065e-06,
      "loss": 0.0241,
      "step": 1028660
    },
    {
      "epoch": 1.6834573816958294,
      "grad_norm": 0.7384094595909119,
      "learning_rate": 6.676792103477132e-06,
      "loss": 0.0166,
      "step": 1028680
    },
    {
      "epoch": 1.683490112134483,
      "grad_norm": 0.6492967009544373,
      "learning_rate": 6.6767262112636156e-06,
      "loss": 0.0185,
      "step": 1028700
    },
    {
      "epoch": 1.6835228425731361,
      "grad_norm": 0.3322146534919739,
      "learning_rate": 6.676660319050099e-06,
      "loss": 0.0234,
      "step": 1028720
    },
    {
      "epoch": 1.6835555730117895,
      "grad_norm": 0.22272129356861115,
      "learning_rate": 6.676594426836581e-06,
      "loss": 0.0225,
      "step": 1028740
    },
    {
      "epoch": 1.6835883034504429,
      "grad_norm": 0.16586068272590637,
      "learning_rate": 6.676528534623065e-06,
      "loss": 0.0195,
      "step": 1028760
    },
    {
      "epoch": 1.6836210338890962,
      "grad_norm": 0.7286093235015869,
      "learning_rate": 6.6764626424095465e-06,
      "loss": 0.0164,
      "step": 1028780
    },
    {
      "epoch": 1.6836537643277496,
      "grad_norm": 1.2904021739959717,
      "learning_rate": 6.67639675019603e-06,
      "loss": 0.0291,
      "step": 1028800
    },
    {
      "epoch": 1.6836864947664028,
      "grad_norm": 0.9872530698776245,
      "learning_rate": 6.676330857982513e-06,
      "loss": 0.0232,
      "step": 1028820
    },
    {
      "epoch": 1.6837192252050563,
      "grad_norm": 0.21218861639499664,
      "learning_rate": 6.676264965768996e-06,
      "loss": 0.0161,
      "step": 1028840
    },
    {
      "epoch": 1.6837519556437095,
      "grad_norm": 0.52559894323349,
      "learning_rate": 6.676199073555478e-06,
      "loss": 0.0184,
      "step": 1028860
    },
    {
      "epoch": 1.6837846860823629,
      "grad_norm": 1.136936068534851,
      "learning_rate": 6.676133181341962e-06,
      "loss": 0.0284,
      "step": 1028880
    },
    {
      "epoch": 1.6838174165210162,
      "grad_norm": 1.3148480653762817,
      "learning_rate": 6.676067289128444e-06,
      "loss": 0.0123,
      "step": 1028900
    },
    {
      "epoch": 1.6838501469596694,
      "grad_norm": 0.2257487177848816,
      "learning_rate": 6.676001396914927e-06,
      "loss": 0.0204,
      "step": 1028920
    },
    {
      "epoch": 1.683882877398323,
      "grad_norm": 0.7795513272285461,
      "learning_rate": 6.675935504701409e-06,
      "loss": 0.0151,
      "step": 1028940
    },
    {
      "epoch": 1.6839156078369761,
      "grad_norm": 1.8198254108428955,
      "learning_rate": 6.675869612487893e-06,
      "loss": 0.0212,
      "step": 1028960
    },
    {
      "epoch": 1.6839483382756297,
      "grad_norm": 0.8178880214691162,
      "learning_rate": 6.675803720274376e-06,
      "loss": 0.0227,
      "step": 1028980
    },
    {
      "epoch": 1.6839810687142829,
      "grad_norm": 1.1602247953414917,
      "learning_rate": 6.675737828060858e-06,
      "loss": 0.0208,
      "step": 1029000
    },
    {
      "epoch": 1.6840137991529363,
      "grad_norm": 1.0089654922485352,
      "learning_rate": 6.675671935847341e-06,
      "loss": 0.0185,
      "step": 1029020
    },
    {
      "epoch": 1.6840465295915896,
      "grad_norm": 0.7622642517089844,
      "learning_rate": 6.675606043633825e-06,
      "loss": 0.0212,
      "step": 1029040
    },
    {
      "epoch": 1.6840792600302428,
      "grad_norm": 1.5523617267608643,
      "learning_rate": 6.675540151420307e-06,
      "loss": 0.0176,
      "step": 1029060
    },
    {
      "epoch": 1.6841119904688964,
      "grad_norm": 0.31462618708610535,
      "learning_rate": 6.67547425920679e-06,
      "loss": 0.0204,
      "step": 1029080
    },
    {
      "epoch": 1.6841447209075495,
      "grad_norm": 0.45753222703933716,
      "learning_rate": 6.675408366993274e-06,
      "loss": 0.0135,
      "step": 1029100
    },
    {
      "epoch": 1.684177451346203,
      "grad_norm": 0.17839615046977997,
      "learning_rate": 6.675342474779756e-06,
      "loss": 0.0133,
      "step": 1029120
    },
    {
      "epoch": 1.6842101817848563,
      "grad_norm": 0.13945163786411285,
      "learning_rate": 6.675276582566239e-06,
      "loss": 0.0242,
      "step": 1029140
    },
    {
      "epoch": 1.6842429122235096,
      "grad_norm": 0.5768744349479675,
      "learning_rate": 6.675210690352721e-06,
      "loss": 0.0225,
      "step": 1029160
    },
    {
      "epoch": 1.684275642662163,
      "grad_norm": 0.1497650146484375,
      "learning_rate": 6.675144798139205e-06,
      "loss": 0.0207,
      "step": 1029180
    },
    {
      "epoch": 1.6843083731008162,
      "grad_norm": 0.7063913941383362,
      "learning_rate": 6.6750789059256874e-06,
      "loss": 0.0137,
      "step": 1029200
    },
    {
      "epoch": 1.6843411035394698,
      "grad_norm": 0.14623889327049255,
      "learning_rate": 6.67501301371217e-06,
      "loss": 0.0124,
      "step": 1029220
    },
    {
      "epoch": 1.684373833978123,
      "grad_norm": 0.28994840383529663,
      "learning_rate": 6.674947121498653e-06,
      "loss": 0.0285,
      "step": 1029240
    },
    {
      "epoch": 1.6844065644167763,
      "grad_norm": 0.7312105894088745,
      "learning_rate": 6.6748812292851365e-06,
      "loss": 0.0153,
      "step": 1029260
    },
    {
      "epoch": 1.6844392948554296,
      "grad_norm": 0.045038022100925446,
      "learning_rate": 6.674815337071618e-06,
      "loss": 0.0253,
      "step": 1029280
    },
    {
      "epoch": 1.684472025294083,
      "grad_norm": 0.23795351386070251,
      "learning_rate": 6.674749444858102e-06,
      "loss": 0.0132,
      "step": 1029300
    },
    {
      "epoch": 1.6845047557327364,
      "grad_norm": 0.6674349308013916,
      "learning_rate": 6.674683552644584e-06,
      "loss": 0.0189,
      "step": 1029320
    },
    {
      "epoch": 1.6845374861713895,
      "grad_norm": 2.492475748062134,
      "learning_rate": 6.6746176604310675e-06,
      "loss": 0.0255,
      "step": 1029340
    },
    {
      "epoch": 1.6845702166100431,
      "grad_norm": 0.29977095127105713,
      "learning_rate": 6.67455176821755e-06,
      "loss": 0.0238,
      "step": 1029360
    },
    {
      "epoch": 1.6846029470486963,
      "grad_norm": 0.9460495114326477,
      "learning_rate": 6.674485876004033e-06,
      "loss": 0.0314,
      "step": 1029380
    },
    {
      "epoch": 1.6846356774873497,
      "grad_norm": 0.674113392829895,
      "learning_rate": 6.674419983790516e-06,
      "loss": 0.0215,
      "step": 1029400
    },
    {
      "epoch": 1.684668407926003,
      "grad_norm": 0.7195748090744019,
      "learning_rate": 6.674354091576999e-06,
      "loss": 0.0166,
      "step": 1029420
    },
    {
      "epoch": 1.6847011383646564,
      "grad_norm": 0.5991252064704895,
      "learning_rate": 6.674288199363482e-06,
      "loss": 0.0274,
      "step": 1029440
    },
    {
      "epoch": 1.6847338688033098,
      "grad_norm": 0.21372124552726746,
      "learning_rate": 6.674222307149965e-06,
      "loss": 0.0124,
      "step": 1029460
    },
    {
      "epoch": 1.684766599241963,
      "grad_norm": 0.9567667841911316,
      "learning_rate": 6.674156414936448e-06,
      "loss": 0.0239,
      "step": 1029480
    },
    {
      "epoch": 1.6847993296806165,
      "grad_norm": 0.2618626654148102,
      "learning_rate": 6.67409052272293e-06,
      "loss": 0.0154,
      "step": 1029500
    },
    {
      "epoch": 1.6848320601192697,
      "grad_norm": 0.1211627647280693,
      "learning_rate": 6.674024630509414e-06,
      "loss": 0.0144,
      "step": 1029520
    },
    {
      "epoch": 1.684864790557923,
      "grad_norm": 1.0351630449295044,
      "learning_rate": 6.673958738295896e-06,
      "loss": 0.0216,
      "step": 1029540
    },
    {
      "epoch": 1.6848975209965764,
      "grad_norm": 0.09643916040658951,
      "learning_rate": 6.673892846082379e-06,
      "loss": 0.0193,
      "step": 1029560
    },
    {
      "epoch": 1.6849302514352298,
      "grad_norm": 3.577526569366455,
      "learning_rate": 6.673826953868861e-06,
      "loss": 0.0207,
      "step": 1029580
    },
    {
      "epoch": 1.6849629818738832,
      "grad_norm": 1.367086410522461,
      "learning_rate": 6.673761061655345e-06,
      "loss": 0.0256,
      "step": 1029600
    },
    {
      "epoch": 1.6849957123125363,
      "grad_norm": 1.3140426874160767,
      "learning_rate": 6.6736951694418275e-06,
      "loss": 0.0218,
      "step": 1029620
    },
    {
      "epoch": 1.68502844275119,
      "grad_norm": 0.506706178188324,
      "learning_rate": 6.67362927722831e-06,
      "loss": 0.0202,
      "step": 1029640
    },
    {
      "epoch": 1.685061173189843,
      "grad_norm": 0.44978460669517517,
      "learning_rate": 6.673563385014793e-06,
      "loss": 0.0179,
      "step": 1029660
    },
    {
      "epoch": 1.6850939036284964,
      "grad_norm": 1.7158093452453613,
      "learning_rate": 6.673497492801277e-06,
      "loss": 0.0235,
      "step": 1029680
    },
    {
      "epoch": 1.6851266340671498,
      "grad_norm": 0.2500876188278198,
      "learning_rate": 6.6734316005877585e-06,
      "loss": 0.0157,
      "step": 1029700
    },
    {
      "epoch": 1.685159364505803,
      "grad_norm": 2.69792103767395,
      "learning_rate": 6.673365708374242e-06,
      "loss": 0.012,
      "step": 1029720
    },
    {
      "epoch": 1.6851920949444565,
      "grad_norm": 0.4727429449558258,
      "learning_rate": 6.673299816160724e-06,
      "loss": 0.0201,
      "step": 1029740
    },
    {
      "epoch": 1.6852248253831097,
      "grad_norm": 0.30984702706336975,
      "learning_rate": 6.6732339239472075e-06,
      "loss": 0.0205,
      "step": 1029760
    },
    {
      "epoch": 1.6852575558217633,
      "grad_norm": 0.7360008955001831,
      "learning_rate": 6.673168031733691e-06,
      "loss": 0.0161,
      "step": 1029780
    },
    {
      "epoch": 1.6852902862604164,
      "grad_norm": 2.127847671508789,
      "learning_rate": 6.673102139520173e-06,
      "loss": 0.0272,
      "step": 1029800
    },
    {
      "epoch": 1.6853230166990698,
      "grad_norm": 0.3049752712249756,
      "learning_rate": 6.673036247306657e-06,
      "loss": 0.0242,
      "step": 1029820
    },
    {
      "epoch": 1.6853557471377232,
      "grad_norm": 0.33432939648628235,
      "learning_rate": 6.672970355093139e-06,
      "loss": 0.0198,
      "step": 1029840
    },
    {
      "epoch": 1.6853884775763763,
      "grad_norm": 0.8740301132202148,
      "learning_rate": 6.672904462879622e-06,
      "loss": 0.0184,
      "step": 1029860
    },
    {
      "epoch": 1.68542120801503,
      "grad_norm": 0.977243959903717,
      "learning_rate": 6.672838570666105e-06,
      "loss": 0.0242,
      "step": 1029880
    },
    {
      "epoch": 1.685453938453683,
      "grad_norm": 0.22122929990291595,
      "learning_rate": 6.672772678452588e-06,
      "loss": 0.0139,
      "step": 1029900
    },
    {
      "epoch": 1.6854866688923364,
      "grad_norm": 0.11323437094688416,
      "learning_rate": 6.67270678623907e-06,
      "loss": 0.0172,
      "step": 1029920
    },
    {
      "epoch": 1.6855193993309898,
      "grad_norm": 0.359397292137146,
      "learning_rate": 6.672640894025554e-06,
      "loss": 0.0204,
      "step": 1029940
    },
    {
      "epoch": 1.6855521297696432,
      "grad_norm": 0.22452987730503082,
      "learning_rate": 6.672575001812036e-06,
      "loss": 0.0199,
      "step": 1029960
    },
    {
      "epoch": 1.6855848602082966,
      "grad_norm": 1.3062620162963867,
      "learning_rate": 6.672509109598519e-06,
      "loss": 0.0173,
      "step": 1029980
    },
    {
      "epoch": 1.6856175906469497,
      "grad_norm": 0.5784968137741089,
      "learning_rate": 6.672443217385002e-06,
      "loss": 0.0243,
      "step": 1030000
    },
    {
      "epoch": 1.6856503210856033,
      "grad_norm": 1.1132805347442627,
      "learning_rate": 6.672377325171485e-06,
      "loss": 0.0224,
      "step": 1030020
    },
    {
      "epoch": 1.6856830515242565,
      "grad_norm": 0.27723783254623413,
      "learning_rate": 6.672311432957968e-06,
      "loss": 0.0153,
      "step": 1030040
    },
    {
      "epoch": 1.6857157819629098,
      "grad_norm": 0.38127726316452026,
      "learning_rate": 6.672245540744451e-06,
      "loss": 0.0159,
      "step": 1030060
    },
    {
      "epoch": 1.6857485124015632,
      "grad_norm": 0.5796650648117065,
      "learning_rate": 6.672179648530933e-06,
      "loss": 0.0253,
      "step": 1030080
    },
    {
      "epoch": 1.6857812428402166,
      "grad_norm": 0.07191520929336548,
      "learning_rate": 6.672113756317417e-06,
      "loss": 0.0169,
      "step": 1030100
    },
    {
      "epoch": 1.68581397327887,
      "grad_norm": 0.21471652388572693,
      "learning_rate": 6.6720478641039e-06,
      "loss": 0.0159,
      "step": 1030120
    },
    {
      "epoch": 1.685846703717523,
      "grad_norm": 1.4178684949874878,
      "learning_rate": 6.671981971890382e-06,
      "loss": 0.0325,
      "step": 1030140
    },
    {
      "epoch": 1.6858794341561767,
      "grad_norm": 0.6839506030082703,
      "learning_rate": 6.671916079676866e-06,
      "loss": 0.0197,
      "step": 1030160
    },
    {
      "epoch": 1.6859121645948298,
      "grad_norm": 1.0801045894622803,
      "learning_rate": 6.671850187463348e-06,
      "loss": 0.0182,
      "step": 1030180
    },
    {
      "epoch": 1.6859448950334832,
      "grad_norm": 0.7422576546669006,
      "learning_rate": 6.671784295249831e-06,
      "loss": 0.0183,
      "step": 1030200
    },
    {
      "epoch": 1.6859776254721366,
      "grad_norm": 1.0992592573165894,
      "learning_rate": 6.671718403036314e-06,
      "loss": 0.0228,
      "step": 1030220
    },
    {
      "epoch": 1.68601035591079,
      "grad_norm": 0.7078109383583069,
      "learning_rate": 6.671652510822797e-06,
      "loss": 0.0186,
      "step": 1030240
    },
    {
      "epoch": 1.6860430863494433,
      "grad_norm": 0.45128345489501953,
      "learning_rate": 6.6715866186092794e-06,
      "loss": 0.016,
      "step": 1030260
    },
    {
      "epoch": 1.6860758167880965,
      "grad_norm": 0.5180104374885559,
      "learning_rate": 6.671520726395763e-06,
      "loss": 0.0222,
      "step": 1030280
    },
    {
      "epoch": 1.68610854722675,
      "grad_norm": 0.14279557764530182,
      "learning_rate": 6.671454834182245e-06,
      "loss": 0.0174,
      "step": 1030300
    },
    {
      "epoch": 1.6861412776654032,
      "grad_norm": 0.2094281017780304,
      "learning_rate": 6.6713889419687285e-06,
      "loss": 0.0187,
      "step": 1030320
    },
    {
      "epoch": 1.6861740081040566,
      "grad_norm": 0.9695241451263428,
      "learning_rate": 6.67132304975521e-06,
      "loss": 0.0144,
      "step": 1030340
    },
    {
      "epoch": 1.68620673854271,
      "grad_norm": 0.2843762934207916,
      "learning_rate": 6.671257157541694e-06,
      "loss": 0.0129,
      "step": 1030360
    },
    {
      "epoch": 1.6862394689813633,
      "grad_norm": 0.45036470890045166,
      "learning_rate": 6.671191265328177e-06,
      "loss": 0.0196,
      "step": 1030380
    },
    {
      "epoch": 1.6862721994200167,
      "grad_norm": 0.4050343334674835,
      "learning_rate": 6.6711253731146595e-06,
      "loss": 0.0212,
      "step": 1030400
    },
    {
      "epoch": 1.6863049298586699,
      "grad_norm": 0.3593195974826813,
      "learning_rate": 6.671059480901142e-06,
      "loss": 0.0188,
      "step": 1030420
    },
    {
      "epoch": 1.6863376602973235,
      "grad_norm": 0.332984060049057,
      "learning_rate": 6.670993588687626e-06,
      "loss": 0.0206,
      "step": 1030440
    },
    {
      "epoch": 1.6863703907359766,
      "grad_norm": 0.9943402409553528,
      "learning_rate": 6.670927696474108e-06,
      "loss": 0.0252,
      "step": 1030460
    },
    {
      "epoch": 1.68640312117463,
      "grad_norm": 0.30073869228363037,
      "learning_rate": 6.670861804260591e-06,
      "loss": 0.0169,
      "step": 1030480
    },
    {
      "epoch": 1.6864358516132834,
      "grad_norm": 0.3438100814819336,
      "learning_rate": 6.670795912047075e-06,
      "loss": 0.0171,
      "step": 1030500
    },
    {
      "epoch": 1.6864685820519365,
      "grad_norm": 0.45036423206329346,
      "learning_rate": 6.670730019833557e-06,
      "loss": 0.0168,
      "step": 1030520
    },
    {
      "epoch": 1.68650131249059,
      "grad_norm": 1.4302629232406616,
      "learning_rate": 6.67066412762004e-06,
      "loss": 0.0147,
      "step": 1030540
    },
    {
      "epoch": 1.6865340429292432,
      "grad_norm": 0.24752017855644226,
      "learning_rate": 6.670598235406522e-06,
      "loss": 0.0166,
      "step": 1030560
    },
    {
      "epoch": 1.6865667733678966,
      "grad_norm": 0.30569958686828613,
      "learning_rate": 6.670532343193006e-06,
      "loss": 0.0275,
      "step": 1030580
    },
    {
      "epoch": 1.68659950380655,
      "grad_norm": 1.207511067390442,
      "learning_rate": 6.670466450979488e-06,
      "loss": 0.0247,
      "step": 1030600
    },
    {
      "epoch": 1.6866322342452034,
      "grad_norm": 0.5104565620422363,
      "learning_rate": 6.670400558765971e-06,
      "loss": 0.0154,
      "step": 1030620
    },
    {
      "epoch": 1.6866649646838567,
      "grad_norm": 0.24458159506320953,
      "learning_rate": 6.670334666552454e-06,
      "loss": 0.0147,
      "step": 1030640
    },
    {
      "epoch": 1.6866976951225099,
      "grad_norm": 0.2697591483592987,
      "learning_rate": 6.670268774338937e-06,
      "loss": 0.0191,
      "step": 1030660
    },
    {
      "epoch": 1.6867304255611635,
      "grad_norm": 0.755609929561615,
      "learning_rate": 6.6702028821254195e-06,
      "loss": 0.022,
      "step": 1030680
    },
    {
      "epoch": 1.6867631559998166,
      "grad_norm": 2.7369086742401123,
      "learning_rate": 6.670136989911903e-06,
      "loss": 0.0184,
      "step": 1030700
    },
    {
      "epoch": 1.68679588643847,
      "grad_norm": 0.22332488000392914,
      "learning_rate": 6.670071097698385e-06,
      "loss": 0.0192,
      "step": 1030720
    },
    {
      "epoch": 1.6868286168771234,
      "grad_norm": 0.8411511778831482,
      "learning_rate": 6.6700052054848686e-06,
      "loss": 0.0208,
      "step": 1030740
    },
    {
      "epoch": 1.6868613473157767,
      "grad_norm": 0.9928370714187622,
      "learning_rate": 6.6699393132713505e-06,
      "loss": 0.0276,
      "step": 1030760
    },
    {
      "epoch": 1.6868940777544301,
      "grad_norm": 0.4148620665073395,
      "learning_rate": 6.669873421057834e-06,
      "loss": 0.0225,
      "step": 1030780
    },
    {
      "epoch": 1.6869268081930833,
      "grad_norm": 0.38216355443000793,
      "learning_rate": 6.669807528844317e-06,
      "loss": 0.0151,
      "step": 1030800
    },
    {
      "epoch": 1.6869595386317369,
      "grad_norm": 0.5208758115768433,
      "learning_rate": 6.6697416366307995e-06,
      "loss": 0.0156,
      "step": 1030820
    },
    {
      "epoch": 1.68699226907039,
      "grad_norm": 0.342925101518631,
      "learning_rate": 6.669675744417283e-06,
      "loss": 0.0217,
      "step": 1030840
    },
    {
      "epoch": 1.6870249995090434,
      "grad_norm": 1.269889235496521,
      "learning_rate": 6.669609852203766e-06,
      "loss": 0.0228,
      "step": 1030860
    },
    {
      "epoch": 1.6870577299476968,
      "grad_norm": 0.43613842129707336,
      "learning_rate": 6.669543959990249e-06,
      "loss": 0.0223,
      "step": 1030880
    },
    {
      "epoch": 1.6870904603863501,
      "grad_norm": 1.1737009286880493,
      "learning_rate": 6.669478067776731e-06,
      "loss": 0.0189,
      "step": 1030900
    },
    {
      "epoch": 1.6871231908250035,
      "grad_norm": 1.4744462966918945,
      "learning_rate": 6.669412175563215e-06,
      "loss": 0.0233,
      "step": 1030920
    },
    {
      "epoch": 1.6871559212636567,
      "grad_norm": 1.6248971223831177,
      "learning_rate": 6.669346283349697e-06,
      "loss": 0.0192,
      "step": 1030940
    },
    {
      "epoch": 1.6871886517023102,
      "grad_norm": 0.4913576543331146,
      "learning_rate": 6.66928039113618e-06,
      "loss": 0.0202,
      "step": 1030960
    },
    {
      "epoch": 1.6872213821409634,
      "grad_norm": 0.8373478651046753,
      "learning_rate": 6.669214498922662e-06,
      "loss": 0.0285,
      "step": 1030980
    },
    {
      "epoch": 1.6872541125796168,
      "grad_norm": 0.5369240045547485,
      "learning_rate": 6.669148606709146e-06,
      "loss": 0.0232,
      "step": 1031000
    },
    {
      "epoch": 1.6872868430182701,
      "grad_norm": 0.4675234854221344,
      "learning_rate": 6.669082714495629e-06,
      "loss": 0.0181,
      "step": 1031020
    },
    {
      "epoch": 1.6873195734569235,
      "grad_norm": 0.594248354434967,
      "learning_rate": 6.669016822282111e-06,
      "loss": 0.0209,
      "step": 1031040
    },
    {
      "epoch": 1.6873523038955769,
      "grad_norm": 0.819038987159729,
      "learning_rate": 6.668950930068594e-06,
      "loss": 0.0207,
      "step": 1031060
    },
    {
      "epoch": 1.68738503433423,
      "grad_norm": 0.9608726501464844,
      "learning_rate": 6.668885037855078e-06,
      "loss": 0.0241,
      "step": 1031080
    },
    {
      "epoch": 1.6874177647728836,
      "grad_norm": 0.25984764099121094,
      "learning_rate": 6.66881914564156e-06,
      "loss": 0.0226,
      "step": 1031100
    },
    {
      "epoch": 1.6874504952115368,
      "grad_norm": 1.0227712392807007,
      "learning_rate": 6.668753253428043e-06,
      "loss": 0.0268,
      "step": 1031120
    },
    {
      "epoch": 1.6874832256501902,
      "grad_norm": 0.40173912048339844,
      "learning_rate": 6.668687361214525e-06,
      "loss": 0.0155,
      "step": 1031140
    },
    {
      "epoch": 1.6875159560888435,
      "grad_norm": 0.30072566866874695,
      "learning_rate": 6.668621469001009e-06,
      "loss": 0.0199,
      "step": 1031160
    },
    {
      "epoch": 1.6875486865274967,
      "grad_norm": 2.2597594261169434,
      "learning_rate": 6.668555576787492e-06,
      "loss": 0.0187,
      "step": 1031180
    },
    {
      "epoch": 1.6875814169661503,
      "grad_norm": 0.49458396434783936,
      "learning_rate": 6.668489684573974e-06,
      "loss": 0.014,
      "step": 1031200
    },
    {
      "epoch": 1.6876141474048034,
      "grad_norm": 0.38304567337036133,
      "learning_rate": 6.668423792360458e-06,
      "loss": 0.0175,
      "step": 1031220
    },
    {
      "epoch": 1.687646877843457,
      "grad_norm": 0.24368025362491608,
      "learning_rate": 6.6683579001469405e-06,
      "loss": 0.0199,
      "step": 1031240
    },
    {
      "epoch": 1.6876796082821102,
      "grad_norm": 0.5707176923751831,
      "learning_rate": 6.668292007933423e-06,
      "loss": 0.0185,
      "step": 1031260
    },
    {
      "epoch": 1.6877123387207635,
      "grad_norm": 0.5861923098564148,
      "learning_rate": 6.668226115719906e-06,
      "loss": 0.0229,
      "step": 1031280
    },
    {
      "epoch": 1.687745069159417,
      "grad_norm": 0.17316563427448273,
      "learning_rate": 6.6681602235063895e-06,
      "loss": 0.0184,
      "step": 1031300
    },
    {
      "epoch": 1.68777779959807,
      "grad_norm": 0.15617786347866058,
      "learning_rate": 6.668094331292871e-06,
      "loss": 0.023,
      "step": 1031320
    },
    {
      "epoch": 1.6878105300367237,
      "grad_norm": 0.5087592601776123,
      "learning_rate": 6.668028439079355e-06,
      "loss": 0.017,
      "step": 1031340
    },
    {
      "epoch": 1.6878432604753768,
      "grad_norm": 0.18642902374267578,
      "learning_rate": 6.667962546865837e-06,
      "loss": 0.0171,
      "step": 1031360
    },
    {
      "epoch": 1.6878759909140302,
      "grad_norm": 0.33518341183662415,
      "learning_rate": 6.6678966546523205e-06,
      "loss": 0.0165,
      "step": 1031380
    },
    {
      "epoch": 1.6879087213526835,
      "grad_norm": 0.5311119556427002,
      "learning_rate": 6.667830762438802e-06,
      "loss": 0.0189,
      "step": 1031400
    },
    {
      "epoch": 1.687941451791337,
      "grad_norm": 0.35903382301330566,
      "learning_rate": 6.667764870225286e-06,
      "loss": 0.028,
      "step": 1031420
    },
    {
      "epoch": 1.6879741822299903,
      "grad_norm": 0.27161455154418945,
      "learning_rate": 6.667698978011769e-06,
      "loss": 0.0162,
      "step": 1031440
    },
    {
      "epoch": 1.6880069126686434,
      "grad_norm": 0.555131196975708,
      "learning_rate": 6.6676330857982514e-06,
      "loss": 0.0186,
      "step": 1031460
    },
    {
      "epoch": 1.688039643107297,
      "grad_norm": 0.6665171384811401,
      "learning_rate": 6.667567193584734e-06,
      "loss": 0.0271,
      "step": 1031480
    },
    {
      "epoch": 1.6880723735459502,
      "grad_norm": 0.7223649621009827,
      "learning_rate": 6.667501301371218e-06,
      "loss": 0.0161,
      "step": 1031500
    },
    {
      "epoch": 1.6881051039846036,
      "grad_norm": 0.051539096981287,
      "learning_rate": 6.667435409157701e-06,
      "loss": 0.0298,
      "step": 1031520
    },
    {
      "epoch": 1.688137834423257,
      "grad_norm": 0.17367565631866455,
      "learning_rate": 6.667369516944183e-06,
      "loss": 0.0242,
      "step": 1031540
    },
    {
      "epoch": 1.6881705648619103,
      "grad_norm": 0.7736333608627319,
      "learning_rate": 6.667303624730667e-06,
      "loss": 0.018,
      "step": 1031560
    },
    {
      "epoch": 1.6882032953005637,
      "grad_norm": 0.5400375723838806,
      "learning_rate": 6.667237732517149e-06,
      "loss": 0.0182,
      "step": 1031580
    },
    {
      "epoch": 1.6882360257392168,
      "grad_norm": 0.2929037809371948,
      "learning_rate": 6.667171840303632e-06,
      "loss": 0.018,
      "step": 1031600
    },
    {
      "epoch": 1.6882687561778704,
      "grad_norm": 0.702080488204956,
      "learning_rate": 6.667105948090114e-06,
      "loss": 0.0119,
      "step": 1031620
    },
    {
      "epoch": 1.6883014866165236,
      "grad_norm": 0.6975643634796143,
      "learning_rate": 6.667040055876598e-06,
      "loss": 0.021,
      "step": 1031640
    },
    {
      "epoch": 1.688334217055177,
      "grad_norm": 0.3338564336299896,
      "learning_rate": 6.6669741636630805e-06,
      "loss": 0.0222,
      "step": 1031660
    },
    {
      "epoch": 1.6883669474938303,
      "grad_norm": 0.1705385148525238,
      "learning_rate": 6.666908271449563e-06,
      "loss": 0.0204,
      "step": 1031680
    },
    {
      "epoch": 1.6883996779324837,
      "grad_norm": 0.5142771601676941,
      "learning_rate": 6.666842379236046e-06,
      "loss": 0.0238,
      "step": 1031700
    },
    {
      "epoch": 1.688432408371137,
      "grad_norm": 0.3556109070777893,
      "learning_rate": 6.66677648702253e-06,
      "loss": 0.0203,
      "step": 1031720
    },
    {
      "epoch": 1.6884651388097902,
      "grad_norm": 0.6214795112609863,
      "learning_rate": 6.6667105948090115e-06,
      "loss": 0.0208,
      "step": 1031740
    },
    {
      "epoch": 1.6884978692484438,
      "grad_norm": 1.0288431644439697,
      "learning_rate": 6.666644702595495e-06,
      "loss": 0.0206,
      "step": 1031760
    },
    {
      "epoch": 1.688530599687097,
      "grad_norm": 0.7248879075050354,
      "learning_rate": 6.666578810381977e-06,
      "loss": 0.0258,
      "step": 1031780
    },
    {
      "epoch": 1.6885633301257503,
      "grad_norm": 0.2924441397190094,
      "learning_rate": 6.6665129181684606e-06,
      "loss": 0.0238,
      "step": 1031800
    },
    {
      "epoch": 1.6885960605644037,
      "grad_norm": 0.4557165205478668,
      "learning_rate": 6.666447025954943e-06,
      "loss": 0.0133,
      "step": 1031820
    },
    {
      "epoch": 1.688628791003057,
      "grad_norm": 0.4043514132499695,
      "learning_rate": 6.666381133741426e-06,
      "loss": 0.0268,
      "step": 1031840
    },
    {
      "epoch": 1.6886615214417104,
      "grad_norm": 1.542261004447937,
      "learning_rate": 6.666315241527909e-06,
      "loss": 0.0198,
      "step": 1031860
    },
    {
      "epoch": 1.6886942518803636,
      "grad_norm": 1.3382399082183838,
      "learning_rate": 6.666249349314392e-06,
      "loss": 0.0261,
      "step": 1031880
    },
    {
      "epoch": 1.6887269823190172,
      "grad_norm": 0.942620575428009,
      "learning_rate": 6.666183457100875e-06,
      "loss": 0.0158,
      "step": 1031900
    },
    {
      "epoch": 1.6887597127576703,
      "grad_norm": 0.5970098376274109,
      "learning_rate": 6.666117564887358e-06,
      "loss": 0.0225,
      "step": 1031920
    },
    {
      "epoch": 1.6887924431963237,
      "grad_norm": 0.32495638728141785,
      "learning_rate": 6.6660516726738414e-06,
      "loss": 0.0182,
      "step": 1031940
    },
    {
      "epoch": 1.688825173634977,
      "grad_norm": 0.22407245635986328,
      "learning_rate": 6.665985780460323e-06,
      "loss": 0.0137,
      "step": 1031960
    },
    {
      "epoch": 1.6888579040736302,
      "grad_norm": 0.25625333189964294,
      "learning_rate": 6.665919888246807e-06,
      "loss": 0.0127,
      "step": 1031980
    },
    {
      "epoch": 1.6888906345122838,
      "grad_norm": 0.45785030722618103,
      "learning_rate": 6.665853996033289e-06,
      "loss": 0.0197,
      "step": 1032000
    },
    {
      "epoch": 1.688923364950937,
      "grad_norm": 0.39625707268714905,
      "learning_rate": 6.665788103819772e-06,
      "loss": 0.0174,
      "step": 1032020
    },
    {
      "epoch": 1.6889560953895906,
      "grad_norm": 0.30955275893211365,
      "learning_rate": 6.665722211606255e-06,
      "loss": 0.0148,
      "step": 1032040
    },
    {
      "epoch": 1.6889888258282437,
      "grad_norm": 1.673806071281433,
      "learning_rate": 6.665656319392738e-06,
      "loss": 0.0162,
      "step": 1032060
    },
    {
      "epoch": 1.689021556266897,
      "grad_norm": 0.877851128578186,
      "learning_rate": 6.665590427179221e-06,
      "loss": 0.0243,
      "step": 1032080
    },
    {
      "epoch": 1.6890542867055505,
      "grad_norm": 0.2661588191986084,
      "learning_rate": 6.665524534965704e-06,
      "loss": 0.0185,
      "step": 1032100
    },
    {
      "epoch": 1.6890870171442036,
      "grad_norm": 1.1074129343032837,
      "learning_rate": 6.665458642752186e-06,
      "loss": 0.0162,
      "step": 1032120
    },
    {
      "epoch": 1.6891197475828572,
      "grad_norm": 0.5336605310440063,
      "learning_rate": 6.66539275053867e-06,
      "loss": 0.0136,
      "step": 1032140
    },
    {
      "epoch": 1.6891524780215104,
      "grad_norm": 0.4937681257724762,
      "learning_rate": 6.6653268583251516e-06,
      "loss": 0.0232,
      "step": 1032160
    },
    {
      "epoch": 1.6891852084601637,
      "grad_norm": 0.24299311637878418,
      "learning_rate": 6.665260966111635e-06,
      "loss": 0.0175,
      "step": 1032180
    },
    {
      "epoch": 1.689217938898817,
      "grad_norm": 0.4043467342853546,
      "learning_rate": 6.665195073898118e-06,
      "loss": 0.0256,
      "step": 1032200
    },
    {
      "epoch": 1.6892506693374705,
      "grad_norm": 0.13331754505634308,
      "learning_rate": 6.665129181684601e-06,
      "loss": 0.0226,
      "step": 1032220
    },
    {
      "epoch": 1.6892833997761239,
      "grad_norm": 0.39798641204833984,
      "learning_rate": 6.665063289471084e-06,
      "loss": 0.0149,
      "step": 1032240
    },
    {
      "epoch": 1.689316130214777,
      "grad_norm": 0.5285201668739319,
      "learning_rate": 6.664997397257567e-06,
      "loss": 0.0138,
      "step": 1032260
    },
    {
      "epoch": 1.6893488606534306,
      "grad_norm": 0.590617299079895,
      "learning_rate": 6.66493150504405e-06,
      "loss": 0.019,
      "step": 1032280
    },
    {
      "epoch": 1.6893815910920837,
      "grad_norm": 1.616814374923706,
      "learning_rate": 6.6648656128305324e-06,
      "loss": 0.0115,
      "step": 1032300
    },
    {
      "epoch": 1.6894143215307371,
      "grad_norm": 0.2516929507255554,
      "learning_rate": 6.664799720617016e-06,
      "loss": 0.0161,
      "step": 1032320
    },
    {
      "epoch": 1.6894470519693905,
      "grad_norm": 0.20254731178283691,
      "learning_rate": 6.664733828403498e-06,
      "loss": 0.0229,
      "step": 1032340
    },
    {
      "epoch": 1.6894797824080439,
      "grad_norm": 0.6551108360290527,
      "learning_rate": 6.6646679361899815e-06,
      "loss": 0.0154,
      "step": 1032360
    },
    {
      "epoch": 1.6895125128466972,
      "grad_norm": 0.1736166924238205,
      "learning_rate": 6.664602043976463e-06,
      "loss": 0.02,
      "step": 1032380
    },
    {
      "epoch": 1.6895452432853504,
      "grad_norm": 0.5962881445884705,
      "learning_rate": 6.664536151762947e-06,
      "loss": 0.021,
      "step": 1032400
    },
    {
      "epoch": 1.689577973724004,
      "grad_norm": 0.4481922388076782,
      "learning_rate": 6.664470259549429e-06,
      "loss": 0.0178,
      "step": 1032420
    },
    {
      "epoch": 1.6896107041626571,
      "grad_norm": 0.22514550387859344,
      "learning_rate": 6.6644043673359125e-06,
      "loss": 0.0159,
      "step": 1032440
    },
    {
      "epoch": 1.6896434346013105,
      "grad_norm": 0.41431400179862976,
      "learning_rate": 6.664338475122395e-06,
      "loss": 0.0335,
      "step": 1032460
    },
    {
      "epoch": 1.6896761650399639,
      "grad_norm": 0.1559637039899826,
      "learning_rate": 6.664272582908878e-06,
      "loss": 0.0242,
      "step": 1032480
    },
    {
      "epoch": 1.6897088954786172,
      "grad_norm": 0.4227718710899353,
      "learning_rate": 6.664206690695361e-06,
      "loss": 0.0111,
      "step": 1032500
    },
    {
      "epoch": 1.6897416259172706,
      "grad_norm": 0.3363548815250397,
      "learning_rate": 6.664140798481844e-06,
      "loss": 0.0205,
      "step": 1032520
    },
    {
      "epoch": 1.6897743563559238,
      "grad_norm": 0.6246036291122437,
      "learning_rate": 6.664074906268326e-06,
      "loss": 0.0235,
      "step": 1032540
    },
    {
      "epoch": 1.6898070867945774,
      "grad_norm": 0.8148429989814758,
      "learning_rate": 6.66400901405481e-06,
      "loss": 0.022,
      "step": 1032560
    },
    {
      "epoch": 1.6898398172332305,
      "grad_norm": 0.18353694677352905,
      "learning_rate": 6.663943121841293e-06,
      "loss": 0.0178,
      "step": 1032580
    },
    {
      "epoch": 1.6898725476718839,
      "grad_norm": 0.18774616718292236,
      "learning_rate": 6.663877229627775e-06,
      "loss": 0.0238,
      "step": 1032600
    },
    {
      "epoch": 1.6899052781105373,
      "grad_norm": 0.15002624690532684,
      "learning_rate": 6.663811337414259e-06,
      "loss": 0.0213,
      "step": 1032620
    },
    {
      "epoch": 1.6899380085491906,
      "grad_norm": 0.5769153237342834,
      "learning_rate": 6.663745445200741e-06,
      "loss": 0.0164,
      "step": 1032640
    },
    {
      "epoch": 1.689970738987844,
      "grad_norm": 0.14586640894412994,
      "learning_rate": 6.663679552987224e-06,
      "loss": 0.0163,
      "step": 1032660
    },
    {
      "epoch": 1.6900034694264972,
      "grad_norm": 0.8136907815933228,
      "learning_rate": 6.663613660773707e-06,
      "loss": 0.0219,
      "step": 1032680
    },
    {
      "epoch": 1.6900361998651507,
      "grad_norm": 1.1668797731399536,
      "learning_rate": 6.66354776856019e-06,
      "loss": 0.0186,
      "step": 1032700
    },
    {
      "epoch": 1.690068930303804,
      "grad_norm": 0.4185011386871338,
      "learning_rate": 6.6634818763466725e-06,
      "loss": 0.0147,
      "step": 1032720
    },
    {
      "epoch": 1.6901016607424573,
      "grad_norm": 1.771654725074768,
      "learning_rate": 6.663415984133156e-06,
      "loss": 0.0177,
      "step": 1032740
    },
    {
      "epoch": 1.6901343911811106,
      "grad_norm": 0.30262693762779236,
      "learning_rate": 6.663350091919638e-06,
      "loss": 0.0182,
      "step": 1032760
    },
    {
      "epoch": 1.6901671216197638,
      "grad_norm": 0.813018262386322,
      "learning_rate": 6.663284199706122e-06,
      "loss": 0.0204,
      "step": 1032780
    },
    {
      "epoch": 1.6901998520584174,
      "grad_norm": 1.8226879835128784,
      "learning_rate": 6.6632183074926035e-06,
      "loss": 0.0255,
      "step": 1032800
    },
    {
      "epoch": 1.6902325824970705,
      "grad_norm": 0.6205890774726868,
      "learning_rate": 6.663152415279087e-06,
      "loss": 0.0107,
      "step": 1032820
    },
    {
      "epoch": 1.6902653129357241,
      "grad_norm": 0.7057992219924927,
      "learning_rate": 6.66308652306557e-06,
      "loss": 0.0166,
      "step": 1032840
    },
    {
      "epoch": 1.6902980433743773,
      "grad_norm": 0.2670333683490753,
      "learning_rate": 6.6630206308520525e-06,
      "loss": 0.0141,
      "step": 1032860
    },
    {
      "epoch": 1.6903307738130307,
      "grad_norm": 0.4960905909538269,
      "learning_rate": 6.662954738638535e-06,
      "loss": 0.0191,
      "step": 1032880
    },
    {
      "epoch": 1.690363504251684,
      "grad_norm": 0.876291036605835,
      "learning_rate": 6.662888846425019e-06,
      "loss": 0.0167,
      "step": 1032900
    },
    {
      "epoch": 1.6903962346903372,
      "grad_norm": 0.6671838164329529,
      "learning_rate": 6.662822954211501e-06,
      "loss": 0.0222,
      "step": 1032920
    },
    {
      "epoch": 1.6904289651289908,
      "grad_norm": 0.955600380897522,
      "learning_rate": 6.662757061997984e-06,
      "loss": 0.0151,
      "step": 1032940
    },
    {
      "epoch": 1.690461695567644,
      "grad_norm": 0.8295270800590515,
      "learning_rate": 6.662691169784468e-06,
      "loss": 0.0177,
      "step": 1032960
    },
    {
      "epoch": 1.6904944260062973,
      "grad_norm": 0.4925417900085449,
      "learning_rate": 6.66262527757095e-06,
      "loss": 0.0245,
      "step": 1032980
    },
    {
      "epoch": 1.6905271564449507,
      "grad_norm": 0.6622330546379089,
      "learning_rate": 6.662559385357433e-06,
      "loss": 0.0177,
      "step": 1033000
    },
    {
      "epoch": 1.690559886883604,
      "grad_norm": 0.4007852375507355,
      "learning_rate": 6.662493493143915e-06,
      "loss": 0.0181,
      "step": 1033020
    },
    {
      "epoch": 1.6905926173222574,
      "grad_norm": 0.585466742515564,
      "learning_rate": 6.662427600930399e-06,
      "loss": 0.0223,
      "step": 1033040
    },
    {
      "epoch": 1.6906253477609106,
      "grad_norm": 1.3252288103103638,
      "learning_rate": 6.662361708716882e-06,
      "loss": 0.028,
      "step": 1033060
    },
    {
      "epoch": 1.6906580781995642,
      "grad_norm": 1.1062304973602295,
      "learning_rate": 6.662295816503364e-06,
      "loss": 0.0131,
      "step": 1033080
    },
    {
      "epoch": 1.6906908086382173,
      "grad_norm": 0.7184312343597412,
      "learning_rate": 6.662229924289847e-06,
      "loss": 0.0186,
      "step": 1033100
    },
    {
      "epoch": 1.6907235390768707,
      "grad_norm": 0.525845468044281,
      "learning_rate": 6.662164032076331e-06,
      "loss": 0.0265,
      "step": 1033120
    },
    {
      "epoch": 1.690756269515524,
      "grad_norm": 0.9793688058853149,
      "learning_rate": 6.662098139862813e-06,
      "loss": 0.0173,
      "step": 1033140
    },
    {
      "epoch": 1.6907889999541774,
      "grad_norm": 1.260831594467163,
      "learning_rate": 6.662032247649296e-06,
      "loss": 0.0247,
      "step": 1033160
    },
    {
      "epoch": 1.6908217303928308,
      "grad_norm": 0.418673574924469,
      "learning_rate": 6.661966355435778e-06,
      "loss": 0.0164,
      "step": 1033180
    },
    {
      "epoch": 1.690854460831484,
      "grad_norm": 0.7638407945632935,
      "learning_rate": 6.661900463222262e-06,
      "loss": 0.0184,
      "step": 1033200
    },
    {
      "epoch": 1.6908871912701375,
      "grad_norm": 0.3542344272136688,
      "learning_rate": 6.661834571008744e-06,
      "loss": 0.0174,
      "step": 1033220
    },
    {
      "epoch": 1.6909199217087907,
      "grad_norm": 0.6102907657623291,
      "learning_rate": 6.661768678795227e-06,
      "loss": 0.0247,
      "step": 1033240
    },
    {
      "epoch": 1.690952652147444,
      "grad_norm": 0.40852221846580505,
      "learning_rate": 6.66170278658171e-06,
      "loss": 0.0134,
      "step": 1033260
    },
    {
      "epoch": 1.6909853825860974,
      "grad_norm": 0.4483497738838196,
      "learning_rate": 6.6616368943681935e-06,
      "loss": 0.0238,
      "step": 1033280
    },
    {
      "epoch": 1.6910181130247508,
      "grad_norm": 0.30983594059944153,
      "learning_rate": 6.661571002154676e-06,
      "loss": 0.0135,
      "step": 1033300
    },
    {
      "epoch": 1.6910508434634042,
      "grad_norm": 0.22143331170082092,
      "learning_rate": 6.661505109941159e-06,
      "loss": 0.024,
      "step": 1033320
    },
    {
      "epoch": 1.6910835739020573,
      "grad_norm": 0.21362829208374023,
      "learning_rate": 6.6614392177276425e-06,
      "loss": 0.0196,
      "step": 1033340
    },
    {
      "epoch": 1.691116304340711,
      "grad_norm": 1.0122522115707397,
      "learning_rate": 6.661373325514124e-06,
      "loss": 0.012,
      "step": 1033360
    },
    {
      "epoch": 1.691149034779364,
      "grad_norm": 0.24873661994934082,
      "learning_rate": 6.661307433300608e-06,
      "loss": 0.0208,
      "step": 1033380
    },
    {
      "epoch": 1.6911817652180174,
      "grad_norm": 0.10289933532476425,
      "learning_rate": 6.66124154108709e-06,
      "loss": 0.0197,
      "step": 1033400
    },
    {
      "epoch": 1.6912144956566708,
      "grad_norm": 0.6191507577896118,
      "learning_rate": 6.6611756488735735e-06,
      "loss": 0.0145,
      "step": 1033420
    },
    {
      "epoch": 1.691247226095324,
      "grad_norm": 0.37475070357322693,
      "learning_rate": 6.661109756660055e-06,
      "loss": 0.0177,
      "step": 1033440
    },
    {
      "epoch": 1.6912799565339776,
      "grad_norm": 0.6837370991706848,
      "learning_rate": 6.661043864446539e-06,
      "loss": 0.0173,
      "step": 1033460
    },
    {
      "epoch": 1.6913126869726307,
      "grad_norm": 0.33332619071006775,
      "learning_rate": 6.660977972233022e-06,
      "loss": 0.02,
      "step": 1033480
    },
    {
      "epoch": 1.6913454174112843,
      "grad_norm": 0.6861329078674316,
      "learning_rate": 6.6609120800195044e-06,
      "loss": 0.0155,
      "step": 1033500
    },
    {
      "epoch": 1.6913781478499375,
      "grad_norm": 0.3782830238342285,
      "learning_rate": 6.660846187805987e-06,
      "loss": 0.0185,
      "step": 1033520
    },
    {
      "epoch": 1.6914108782885908,
      "grad_norm": 0.7789934277534485,
      "learning_rate": 6.660780295592471e-06,
      "loss": 0.02,
      "step": 1033540
    },
    {
      "epoch": 1.6914436087272442,
      "grad_norm": 1.9763505458831787,
      "learning_rate": 6.660714403378953e-06,
      "loss": 0.0226,
      "step": 1033560
    },
    {
      "epoch": 1.6914763391658973,
      "grad_norm": 0.6549032330513,
      "learning_rate": 6.660648511165436e-06,
      "loss": 0.0237,
      "step": 1033580
    },
    {
      "epoch": 1.691509069604551,
      "grad_norm": 0.21124929189682007,
      "learning_rate": 6.660582618951918e-06,
      "loss": 0.0179,
      "step": 1033600
    },
    {
      "epoch": 1.691541800043204,
      "grad_norm": 0.922491729259491,
      "learning_rate": 6.660516726738402e-06,
      "loss": 0.0269,
      "step": 1033620
    },
    {
      "epoch": 1.6915745304818575,
      "grad_norm": 1.1959190368652344,
      "learning_rate": 6.660450834524885e-06,
      "loss": 0.0169,
      "step": 1033640
    },
    {
      "epoch": 1.6916072609205108,
      "grad_norm": 0.12886711955070496,
      "learning_rate": 6.660384942311367e-06,
      "loss": 0.0164,
      "step": 1033660
    },
    {
      "epoch": 1.6916399913591642,
      "grad_norm": 0.39829757809638977,
      "learning_rate": 6.660319050097851e-06,
      "loss": 0.0123,
      "step": 1033680
    },
    {
      "epoch": 1.6916727217978176,
      "grad_norm": 1.284684658050537,
      "learning_rate": 6.6602531578843335e-06,
      "loss": 0.0155,
      "step": 1033700
    },
    {
      "epoch": 1.6917054522364707,
      "grad_norm": 0.6229619383811951,
      "learning_rate": 6.660187265670816e-06,
      "loss": 0.0248,
      "step": 1033720
    },
    {
      "epoch": 1.6917381826751243,
      "grad_norm": 0.7942370772361755,
      "learning_rate": 6.660121373457299e-06,
      "loss": 0.0166,
      "step": 1033740
    },
    {
      "epoch": 1.6917709131137775,
      "grad_norm": 1.1094131469726562,
      "learning_rate": 6.660055481243783e-06,
      "loss": 0.0303,
      "step": 1033760
    },
    {
      "epoch": 1.6918036435524308,
      "grad_norm": 1.1371272802352905,
      "learning_rate": 6.6599895890302645e-06,
      "loss": 0.0239,
      "step": 1033780
    },
    {
      "epoch": 1.6918363739910842,
      "grad_norm": 0.6365320682525635,
      "learning_rate": 6.659923696816748e-06,
      "loss": 0.0173,
      "step": 1033800
    },
    {
      "epoch": 1.6918691044297376,
      "grad_norm": 0.471549391746521,
      "learning_rate": 6.65985780460323e-06,
      "loss": 0.0162,
      "step": 1033820
    },
    {
      "epoch": 1.691901834868391,
      "grad_norm": 0.3143373727798462,
      "learning_rate": 6.6597919123897136e-06,
      "loss": 0.0162,
      "step": 1033840
    },
    {
      "epoch": 1.6919345653070441,
      "grad_norm": 0.44562679529190063,
      "learning_rate": 6.659726020176196e-06,
      "loss": 0.0139,
      "step": 1033860
    },
    {
      "epoch": 1.6919672957456977,
      "grad_norm": 0.850287914276123,
      "learning_rate": 6.659660127962679e-06,
      "loss": 0.0198,
      "step": 1033880
    },
    {
      "epoch": 1.6920000261843509,
      "grad_norm": 0.4518074691295624,
      "learning_rate": 6.659594235749162e-06,
      "loss": 0.0153,
      "step": 1033900
    },
    {
      "epoch": 1.6920327566230042,
      "grad_norm": 0.09659772366285324,
      "learning_rate": 6.659528343535645e-06,
      "loss": 0.018,
      "step": 1033920
    },
    {
      "epoch": 1.6920654870616576,
      "grad_norm": 1.7092924118041992,
      "learning_rate": 6.659462451322127e-06,
      "loss": 0.0179,
      "step": 1033940
    },
    {
      "epoch": 1.692098217500311,
      "grad_norm": 0.6661953330039978,
      "learning_rate": 6.659396559108611e-06,
      "loss": 0.0198,
      "step": 1033960
    },
    {
      "epoch": 1.6921309479389643,
      "grad_norm": 0.3256007730960846,
      "learning_rate": 6.659330666895093e-06,
      "loss": 0.0214,
      "step": 1033980
    },
    {
      "epoch": 1.6921636783776175,
      "grad_norm": 0.570832371711731,
      "learning_rate": 6.659264774681576e-06,
      "loss": 0.0208,
      "step": 1034000
    },
    {
      "epoch": 1.692196408816271,
      "grad_norm": 0.340025395154953,
      "learning_rate": 6.65919888246806e-06,
      "loss": 0.0195,
      "step": 1034020
    },
    {
      "epoch": 1.6922291392549242,
      "grad_norm": 0.30878037214279175,
      "learning_rate": 6.659132990254542e-06,
      "loss": 0.0128,
      "step": 1034040
    },
    {
      "epoch": 1.6922618696935776,
      "grad_norm": 0.28491055965423584,
      "learning_rate": 6.659067098041025e-06,
      "loss": 0.0164,
      "step": 1034060
    },
    {
      "epoch": 1.692294600132231,
      "grad_norm": 3.3266308307647705,
      "learning_rate": 6.659001205827508e-06,
      "loss": 0.0264,
      "step": 1034080
    },
    {
      "epoch": 1.6923273305708844,
      "grad_norm": 0.687110424041748,
      "learning_rate": 6.658935313613991e-06,
      "loss": 0.0238,
      "step": 1034100
    },
    {
      "epoch": 1.6923600610095377,
      "grad_norm": 0.44191768765449524,
      "learning_rate": 6.658869421400474e-06,
      "loss": 0.0281,
      "step": 1034120
    },
    {
      "epoch": 1.6923927914481909,
      "grad_norm": 1.4273985624313354,
      "learning_rate": 6.658803529186957e-06,
      "loss": 0.0198,
      "step": 1034140
    },
    {
      "epoch": 1.6924255218868445,
      "grad_norm": 0.8154786825180054,
      "learning_rate": 6.658737636973439e-06,
      "loss": 0.0207,
      "step": 1034160
    },
    {
      "epoch": 1.6924582523254976,
      "grad_norm": 0.2910827100276947,
      "learning_rate": 6.658671744759923e-06,
      "loss": 0.0189,
      "step": 1034180
    },
    {
      "epoch": 1.692490982764151,
      "grad_norm": 0.28198474645614624,
      "learning_rate": 6.6586058525464046e-06,
      "loss": 0.0207,
      "step": 1034200
    },
    {
      "epoch": 1.6925237132028044,
      "grad_norm": 0.7378501892089844,
      "learning_rate": 6.658539960332888e-06,
      "loss": 0.0224,
      "step": 1034220
    },
    {
      "epoch": 1.6925564436414575,
      "grad_norm": 0.19669409096240997,
      "learning_rate": 6.65847406811937e-06,
      "loss": 0.0207,
      "step": 1034240
    },
    {
      "epoch": 1.6925891740801111,
      "grad_norm": 0.258451372385025,
      "learning_rate": 6.658408175905854e-06,
      "loss": 0.0203,
      "step": 1034260
    },
    {
      "epoch": 1.6926219045187643,
      "grad_norm": 0.2582584023475647,
      "learning_rate": 6.658342283692336e-06,
      "loss": 0.0177,
      "step": 1034280
    },
    {
      "epoch": 1.6926546349574179,
      "grad_norm": 0.4263247549533844,
      "learning_rate": 6.65827639147882e-06,
      "loss": 0.0215,
      "step": 1034300
    },
    {
      "epoch": 1.692687365396071,
      "grad_norm": 0.4761662483215332,
      "learning_rate": 6.658210499265302e-06,
      "loss": 0.0121,
      "step": 1034320
    },
    {
      "epoch": 1.6927200958347244,
      "grad_norm": 0.29572436213493347,
      "learning_rate": 6.6581446070517854e-06,
      "loss": 0.0198,
      "step": 1034340
    },
    {
      "epoch": 1.6927528262733778,
      "grad_norm": 0.773597002029419,
      "learning_rate": 6.658078714838269e-06,
      "loss": 0.022,
      "step": 1034360
    },
    {
      "epoch": 1.692785556712031,
      "grad_norm": 0.5248010754585266,
      "learning_rate": 6.658012822624751e-06,
      "loss": 0.0157,
      "step": 1034380
    },
    {
      "epoch": 1.6928182871506845,
      "grad_norm": 0.4028032422065735,
      "learning_rate": 6.6579469304112345e-06,
      "loss": 0.021,
      "step": 1034400
    },
    {
      "epoch": 1.6928510175893376,
      "grad_norm": 0.6772833466529846,
      "learning_rate": 6.657881038197716e-06,
      "loss": 0.0153,
      "step": 1034420
    },
    {
      "epoch": 1.692883748027991,
      "grad_norm": 0.24039167165756226,
      "learning_rate": 6.6578151459842e-06,
      "loss": 0.0162,
      "step": 1034440
    },
    {
      "epoch": 1.6929164784666444,
      "grad_norm": 0.09942300617694855,
      "learning_rate": 6.657749253770682e-06,
      "loss": 0.018,
      "step": 1034460
    },
    {
      "epoch": 1.6929492089052978,
      "grad_norm": 0.3528239130973816,
      "learning_rate": 6.6576833615571655e-06,
      "loss": 0.0216,
      "step": 1034480
    },
    {
      "epoch": 1.6929819393439511,
      "grad_norm": 0.5600518584251404,
      "learning_rate": 6.657617469343648e-06,
      "loss": 0.0216,
      "step": 1034500
    },
    {
      "epoch": 1.6930146697826043,
      "grad_norm": 0.27177584171295166,
      "learning_rate": 6.657551577130131e-06,
      "loss": 0.012,
      "step": 1034520
    },
    {
      "epoch": 1.6930474002212579,
      "grad_norm": 0.35007819533348083,
      "learning_rate": 6.657485684916614e-06,
      "loss": 0.0159,
      "step": 1034540
    },
    {
      "epoch": 1.693080130659911,
      "grad_norm": 0.20665878057479858,
      "learning_rate": 6.657419792703097e-06,
      "loss": 0.0199,
      "step": 1034560
    },
    {
      "epoch": 1.6931128610985644,
      "grad_norm": 0.757233202457428,
      "learning_rate": 6.657353900489579e-06,
      "loss": 0.0189,
      "step": 1034580
    },
    {
      "epoch": 1.6931455915372178,
      "grad_norm": 0.8052019476890564,
      "learning_rate": 6.657288008276063e-06,
      "loss": 0.0111,
      "step": 1034600
    },
    {
      "epoch": 1.6931783219758711,
      "grad_norm": 0.31697553396224976,
      "learning_rate": 6.657222116062545e-06,
      "loss": 0.0156,
      "step": 1034620
    },
    {
      "epoch": 1.6932110524145245,
      "grad_norm": 0.36081650853157043,
      "learning_rate": 6.657156223849028e-06,
      "loss": 0.0171,
      "step": 1034640
    },
    {
      "epoch": 1.6932437828531777,
      "grad_norm": 0.4852754473686218,
      "learning_rate": 6.657090331635511e-06,
      "loss": 0.0145,
      "step": 1034660
    },
    {
      "epoch": 1.6932765132918313,
      "grad_norm": 0.17751069366931915,
      "learning_rate": 6.657024439421994e-06,
      "loss": 0.0208,
      "step": 1034680
    },
    {
      "epoch": 1.6933092437304844,
      "grad_norm": 1.2834373712539673,
      "learning_rate": 6.656958547208477e-06,
      "loss": 0.0259,
      "step": 1034700
    },
    {
      "epoch": 1.6933419741691378,
      "grad_norm": 0.5594580769538879,
      "learning_rate": 6.65689265499496e-06,
      "loss": 0.0197,
      "step": 1034720
    },
    {
      "epoch": 1.6933747046077912,
      "grad_norm": 0.6691064834594727,
      "learning_rate": 6.656826762781443e-06,
      "loss": 0.0164,
      "step": 1034740
    },
    {
      "epoch": 1.6934074350464445,
      "grad_norm": 0.7343318462371826,
      "learning_rate": 6.6567608705679255e-06,
      "loss": 0.019,
      "step": 1034760
    },
    {
      "epoch": 1.693440165485098,
      "grad_norm": 0.7349353432655334,
      "learning_rate": 6.656694978354409e-06,
      "loss": 0.0203,
      "step": 1034780
    },
    {
      "epoch": 1.693472895923751,
      "grad_norm": 0.7227902412414551,
      "learning_rate": 6.656629086140891e-06,
      "loss": 0.0158,
      "step": 1034800
    },
    {
      "epoch": 1.6935056263624046,
      "grad_norm": 0.8131400346755981,
      "learning_rate": 6.656563193927375e-06,
      "loss": 0.0296,
      "step": 1034820
    },
    {
      "epoch": 1.6935383568010578,
      "grad_norm": 0.19766782224178314,
      "learning_rate": 6.6564973017138565e-06,
      "loss": 0.0173,
      "step": 1034840
    },
    {
      "epoch": 1.6935710872397112,
      "grad_norm": 0.5830159783363342,
      "learning_rate": 6.65643140950034e-06,
      "loss": 0.019,
      "step": 1034860
    },
    {
      "epoch": 1.6936038176783645,
      "grad_norm": 0.7057861089706421,
      "learning_rate": 6.656365517286823e-06,
      "loss": 0.0245,
      "step": 1034880
    },
    {
      "epoch": 1.693636548117018,
      "grad_norm": 0.47822141647338867,
      "learning_rate": 6.6562996250733055e-06,
      "loss": 0.017,
      "step": 1034900
    },
    {
      "epoch": 1.6936692785556713,
      "grad_norm": 0.20634864270687103,
      "learning_rate": 6.656233732859788e-06,
      "loss": 0.0231,
      "step": 1034920
    },
    {
      "epoch": 1.6937020089943244,
      "grad_norm": 0.4896948039531708,
      "learning_rate": 6.656167840646272e-06,
      "loss": 0.0196,
      "step": 1034940
    },
    {
      "epoch": 1.693734739432978,
      "grad_norm": 0.6018398404121399,
      "learning_rate": 6.656101948432754e-06,
      "loss": 0.0198,
      "step": 1034960
    },
    {
      "epoch": 1.6937674698716312,
      "grad_norm": 0.28469887375831604,
      "learning_rate": 6.656036056219237e-06,
      "loss": 0.0198,
      "step": 1034980
    },
    {
      "epoch": 1.6938002003102846,
      "grad_norm": 0.5258045196533203,
      "learning_rate": 6.655970164005719e-06,
      "loss": 0.0172,
      "step": 1035000
    },
    {
      "epoch": 1.693832930748938,
      "grad_norm": 0.24824786186218262,
      "learning_rate": 6.655904271792203e-06,
      "loss": 0.0233,
      "step": 1035020
    },
    {
      "epoch": 1.693865661187591,
      "grad_norm": 0.35236600041389465,
      "learning_rate": 6.655838379578686e-06,
      "loss": 0.0167,
      "step": 1035040
    },
    {
      "epoch": 1.6938983916262447,
      "grad_norm": 0.4270239770412445,
      "learning_rate": 6.655772487365168e-06,
      "loss": 0.0187,
      "step": 1035060
    },
    {
      "epoch": 1.6939311220648978,
      "grad_norm": 0.7384867072105408,
      "learning_rate": 6.655706595151652e-06,
      "loss": 0.0175,
      "step": 1035080
    },
    {
      "epoch": 1.6939638525035514,
      "grad_norm": 1.4220830202102661,
      "learning_rate": 6.655640702938135e-06,
      "loss": 0.0242,
      "step": 1035100
    },
    {
      "epoch": 1.6939965829422046,
      "grad_norm": 0.34424299001693726,
      "learning_rate": 6.655574810724617e-06,
      "loss": 0.0189,
      "step": 1035120
    },
    {
      "epoch": 1.694029313380858,
      "grad_norm": 1.159496545791626,
      "learning_rate": 6.6555089185111e-06,
      "loss": 0.0134,
      "step": 1035140
    },
    {
      "epoch": 1.6940620438195113,
      "grad_norm": 0.5964457988739014,
      "learning_rate": 6.655443026297584e-06,
      "loss": 0.0158,
      "step": 1035160
    },
    {
      "epoch": 1.6940947742581645,
      "grad_norm": 0.3771039843559265,
      "learning_rate": 6.655377134084066e-06,
      "loss": 0.0119,
      "step": 1035180
    },
    {
      "epoch": 1.694127504696818,
      "grad_norm": 1.0575741529464722,
      "learning_rate": 6.655311241870549e-06,
      "loss": 0.019,
      "step": 1035200
    },
    {
      "epoch": 1.6941602351354712,
      "grad_norm": 0.8663020133972168,
      "learning_rate": 6.655245349657031e-06,
      "loss": 0.0216,
      "step": 1035220
    },
    {
      "epoch": 1.6941929655741246,
      "grad_norm": 0.4202626645565033,
      "learning_rate": 6.655179457443515e-06,
      "loss": 0.0272,
      "step": 1035240
    },
    {
      "epoch": 1.694225696012778,
      "grad_norm": 0.18647408485412598,
      "learning_rate": 6.6551135652299966e-06,
      "loss": 0.0135,
      "step": 1035260
    },
    {
      "epoch": 1.6942584264514313,
      "grad_norm": 0.7775296568870544,
      "learning_rate": 6.65504767301648e-06,
      "loss": 0.0185,
      "step": 1035280
    },
    {
      "epoch": 1.6942911568900847,
      "grad_norm": 0.44589775800704956,
      "learning_rate": 6.654981780802963e-06,
      "loss": 0.0246,
      "step": 1035300
    },
    {
      "epoch": 1.6943238873287378,
      "grad_norm": 0.46079516410827637,
      "learning_rate": 6.654915888589446e-06,
      "loss": 0.0194,
      "step": 1035320
    },
    {
      "epoch": 1.6943566177673914,
      "grad_norm": 0.294565349817276,
      "learning_rate": 6.654849996375928e-06,
      "loss": 0.0129,
      "step": 1035340
    },
    {
      "epoch": 1.6943893482060446,
      "grad_norm": 0.8827158212661743,
      "learning_rate": 6.654784104162412e-06,
      "loss": 0.0221,
      "step": 1035360
    },
    {
      "epoch": 1.694422078644698,
      "grad_norm": 0.7783812880516052,
      "learning_rate": 6.654718211948894e-06,
      "loss": 0.021,
      "step": 1035380
    },
    {
      "epoch": 1.6944548090833513,
      "grad_norm": 0.7368773221969604,
      "learning_rate": 6.6546523197353774e-06,
      "loss": 0.0126,
      "step": 1035400
    },
    {
      "epoch": 1.6944875395220047,
      "grad_norm": 0.834395170211792,
      "learning_rate": 6.654586427521861e-06,
      "loss": 0.018,
      "step": 1035420
    },
    {
      "epoch": 1.694520269960658,
      "grad_norm": 0.26378878951072693,
      "learning_rate": 6.654520535308343e-06,
      "loss": 0.0175,
      "step": 1035440
    },
    {
      "epoch": 1.6945530003993112,
      "grad_norm": 1.472378134727478,
      "learning_rate": 6.6544546430948265e-06,
      "loss": 0.0208,
      "step": 1035460
    },
    {
      "epoch": 1.6945857308379648,
      "grad_norm": 0.13777485489845276,
      "learning_rate": 6.654388750881308e-06,
      "loss": 0.0228,
      "step": 1035480
    },
    {
      "epoch": 1.694618461276618,
      "grad_norm": 1.6354401111602783,
      "learning_rate": 6.654322858667792e-06,
      "loss": 0.0212,
      "step": 1035500
    },
    {
      "epoch": 1.6946511917152713,
      "grad_norm": 0.7893778085708618,
      "learning_rate": 6.654256966454275e-06,
      "loss": 0.028,
      "step": 1035520
    },
    {
      "epoch": 1.6946839221539247,
      "grad_norm": 0.09994786977767944,
      "learning_rate": 6.6541910742407575e-06,
      "loss": 0.0181,
      "step": 1035540
    },
    {
      "epoch": 1.694716652592578,
      "grad_norm": 0.5853919982910156,
      "learning_rate": 6.65412518202724e-06,
      "loss": 0.0247,
      "step": 1035560
    },
    {
      "epoch": 1.6947493830312315,
      "grad_norm": 0.44516220688819885,
      "learning_rate": 6.654059289813724e-06,
      "loss": 0.0205,
      "step": 1035580
    },
    {
      "epoch": 1.6947821134698846,
      "grad_norm": 1.0438528060913086,
      "learning_rate": 6.653993397600206e-06,
      "loss": 0.018,
      "step": 1035600
    },
    {
      "epoch": 1.6948148439085382,
      "grad_norm": 0.44979128241539,
      "learning_rate": 6.653927505386689e-06,
      "loss": 0.0197,
      "step": 1035620
    },
    {
      "epoch": 1.6948475743471914,
      "grad_norm": 5.265645980834961,
      "learning_rate": 6.653861613173171e-06,
      "loss": 0.0195,
      "step": 1035640
    },
    {
      "epoch": 1.6948803047858447,
      "grad_norm": 0.35917097330093384,
      "learning_rate": 6.653795720959655e-06,
      "loss": 0.0168,
      "step": 1035660
    },
    {
      "epoch": 1.694913035224498,
      "grad_norm": 0.8925690054893494,
      "learning_rate": 6.6537298287461375e-06,
      "loss": 0.0292,
      "step": 1035680
    },
    {
      "epoch": 1.6949457656631515,
      "grad_norm": 0.9628287553787231,
      "learning_rate": 6.65366393653262e-06,
      "loss": 0.0222,
      "step": 1035700
    },
    {
      "epoch": 1.6949784961018048,
      "grad_norm": 0.22517094016075134,
      "learning_rate": 6.653598044319103e-06,
      "loss": 0.0202,
      "step": 1035720
    },
    {
      "epoch": 1.695011226540458,
      "grad_norm": 0.3850482106208801,
      "learning_rate": 6.6535321521055865e-06,
      "loss": 0.0135,
      "step": 1035740
    },
    {
      "epoch": 1.6950439569791116,
      "grad_norm": 0.1506267786026001,
      "learning_rate": 6.653466259892069e-06,
      "loss": 0.0181,
      "step": 1035760
    },
    {
      "epoch": 1.6950766874177647,
      "grad_norm": 0.26141464710235596,
      "learning_rate": 6.653400367678552e-06,
      "loss": 0.0173,
      "step": 1035780
    },
    {
      "epoch": 1.695109417856418,
      "grad_norm": 0.6170311570167542,
      "learning_rate": 6.653334475465036e-06,
      "loss": 0.0202,
      "step": 1035800
    },
    {
      "epoch": 1.6951421482950715,
      "grad_norm": 0.4325483739376068,
      "learning_rate": 6.6532685832515175e-06,
      "loss": 0.0174,
      "step": 1035820
    },
    {
      "epoch": 1.6951748787337246,
      "grad_norm": 0.1569191813468933,
      "learning_rate": 6.653202691038001e-06,
      "loss": 0.0217,
      "step": 1035840
    },
    {
      "epoch": 1.6952076091723782,
      "grad_norm": 0.4111238718032837,
      "learning_rate": 6.653136798824483e-06,
      "loss": 0.0205,
      "step": 1035860
    },
    {
      "epoch": 1.6952403396110314,
      "grad_norm": 1.2423856258392334,
      "learning_rate": 6.6530709066109666e-06,
      "loss": 0.0254,
      "step": 1035880
    },
    {
      "epoch": 1.695273070049685,
      "grad_norm": 0.7069627642631531,
      "learning_rate": 6.653005014397449e-06,
      "loss": 0.0206,
      "step": 1035900
    },
    {
      "epoch": 1.6953058004883381,
      "grad_norm": 0.3558100163936615,
      "learning_rate": 6.652939122183932e-06,
      "loss": 0.0163,
      "step": 1035920
    },
    {
      "epoch": 1.6953385309269915,
      "grad_norm": 0.45439934730529785,
      "learning_rate": 6.652873229970415e-06,
      "loss": 0.0189,
      "step": 1035940
    },
    {
      "epoch": 1.6953712613656449,
      "grad_norm": 0.21045993268489838,
      "learning_rate": 6.652807337756898e-06,
      "loss": 0.0215,
      "step": 1035960
    },
    {
      "epoch": 1.695403991804298,
      "grad_norm": 0.8523923754692078,
      "learning_rate": 6.65274144554338e-06,
      "loss": 0.0196,
      "step": 1035980
    },
    {
      "epoch": 1.6954367222429516,
      "grad_norm": 0.28126105666160583,
      "learning_rate": 6.652675553329864e-06,
      "loss": 0.0243,
      "step": 1036000
    },
    {
      "epoch": 1.6954694526816048,
      "grad_norm": 4.932011604309082,
      "learning_rate": 6.652609661116346e-06,
      "loss": 0.0254,
      "step": 1036020
    },
    {
      "epoch": 1.6955021831202581,
      "grad_norm": 1.3033006191253662,
      "learning_rate": 6.652543768902829e-06,
      "loss": 0.0243,
      "step": 1036040
    },
    {
      "epoch": 1.6955349135589115,
      "grad_norm": 0.7506496906280518,
      "learning_rate": 6.652477876689312e-06,
      "loss": 0.0166,
      "step": 1036060
    },
    {
      "epoch": 1.6955676439975649,
      "grad_norm": 0.17145128548145294,
      "learning_rate": 6.652411984475795e-06,
      "loss": 0.017,
      "step": 1036080
    },
    {
      "epoch": 1.6956003744362182,
      "grad_norm": 0.3072247803211212,
      "learning_rate": 6.652346092262278e-06,
      "loss": 0.0195,
      "step": 1036100
    },
    {
      "epoch": 1.6956331048748714,
      "grad_norm": 0.4147457480430603,
      "learning_rate": 6.652280200048761e-06,
      "loss": 0.0205,
      "step": 1036120
    },
    {
      "epoch": 1.695665835313525,
      "grad_norm": 0.8215478658676147,
      "learning_rate": 6.652214307835244e-06,
      "loss": 0.0247,
      "step": 1036140
    },
    {
      "epoch": 1.6956985657521781,
      "grad_norm": 0.25492990016937256,
      "learning_rate": 6.652148415621727e-06,
      "loss": 0.0187,
      "step": 1036160
    },
    {
      "epoch": 1.6957312961908315,
      "grad_norm": 0.6476133465766907,
      "learning_rate": 6.65208252340821e-06,
      "loss": 0.0188,
      "step": 1036180
    },
    {
      "epoch": 1.6957640266294849,
      "grad_norm": 0.336319237947464,
      "learning_rate": 6.652016631194692e-06,
      "loss": 0.0227,
      "step": 1036200
    },
    {
      "epoch": 1.6957967570681383,
      "grad_norm": 0.1543026566505432,
      "learning_rate": 6.651950738981176e-06,
      "loss": 0.0193,
      "step": 1036220
    },
    {
      "epoch": 1.6958294875067916,
      "grad_norm": 1.0806809663772583,
      "learning_rate": 6.651884846767658e-06,
      "loss": 0.026,
      "step": 1036240
    },
    {
      "epoch": 1.6958622179454448,
      "grad_norm": 0.23444761335849762,
      "learning_rate": 6.651818954554141e-06,
      "loss": 0.0191,
      "step": 1036260
    },
    {
      "epoch": 1.6958949483840984,
      "grad_norm": 0.47257086634635925,
      "learning_rate": 6.651753062340623e-06,
      "loss": 0.0187,
      "step": 1036280
    },
    {
      "epoch": 1.6959276788227515,
      "grad_norm": 0.18008390069007874,
      "learning_rate": 6.651687170127107e-06,
      "loss": 0.0208,
      "step": 1036300
    },
    {
      "epoch": 1.695960409261405,
      "grad_norm": 0.6134202480316162,
      "learning_rate": 6.651621277913589e-06,
      "loss": 0.0151,
      "step": 1036320
    },
    {
      "epoch": 1.6959931397000583,
      "grad_norm": 0.9282878041267395,
      "learning_rate": 6.651555385700072e-06,
      "loss": 0.0167,
      "step": 1036340
    },
    {
      "epoch": 1.6960258701387116,
      "grad_norm": 1.1909339427947998,
      "learning_rate": 6.651489493486555e-06,
      "loss": 0.0247,
      "step": 1036360
    },
    {
      "epoch": 1.696058600577365,
      "grad_norm": 0.23533783853054047,
      "learning_rate": 6.6514236012730384e-06,
      "loss": 0.0149,
      "step": 1036380
    },
    {
      "epoch": 1.6960913310160182,
      "grad_norm": 0.07663696259260178,
      "learning_rate": 6.65135770905952e-06,
      "loss": 0.0165,
      "step": 1036400
    },
    {
      "epoch": 1.6961240614546718,
      "grad_norm": 0.3830081820487976,
      "learning_rate": 6.651291816846004e-06,
      "loss": 0.018,
      "step": 1036420
    },
    {
      "epoch": 1.696156791893325,
      "grad_norm": 0.06120193749666214,
      "learning_rate": 6.651225924632486e-06,
      "loss": 0.0153,
      "step": 1036440
    },
    {
      "epoch": 1.6961895223319783,
      "grad_norm": 0.2101372331380844,
      "learning_rate": 6.651160032418969e-06,
      "loss": 0.0168,
      "step": 1036460
    },
    {
      "epoch": 1.6962222527706317,
      "grad_norm": 0.32433730363845825,
      "learning_rate": 6.651094140205453e-06,
      "loss": 0.0232,
      "step": 1036480
    },
    {
      "epoch": 1.6962549832092848,
      "grad_norm": 0.6584444642066956,
      "learning_rate": 6.651028247991935e-06,
      "loss": 0.019,
      "step": 1036500
    },
    {
      "epoch": 1.6962877136479384,
      "grad_norm": 0.2849395275115967,
      "learning_rate": 6.6509623557784185e-06,
      "loss": 0.0208,
      "step": 1036520
    },
    {
      "epoch": 1.6963204440865916,
      "grad_norm": 0.7767043709754944,
      "learning_rate": 6.650896463564901e-06,
      "loss": 0.0234,
      "step": 1036540
    },
    {
      "epoch": 1.6963531745252451,
      "grad_norm": 0.3425176441669464,
      "learning_rate": 6.650830571351384e-06,
      "loss": 0.0172,
      "step": 1036560
    },
    {
      "epoch": 1.6963859049638983,
      "grad_norm": 0.8158935308456421,
      "learning_rate": 6.650764679137867e-06,
      "loss": 0.0141,
      "step": 1036580
    },
    {
      "epoch": 1.6964186354025517,
      "grad_norm": 0.3855114281177521,
      "learning_rate": 6.65069878692435e-06,
      "loss": 0.022,
      "step": 1036600
    },
    {
      "epoch": 1.696451365841205,
      "grad_norm": 0.8722621202468872,
      "learning_rate": 6.650632894710832e-06,
      "loss": 0.0211,
      "step": 1036620
    },
    {
      "epoch": 1.6964840962798582,
      "grad_norm": 0.820236086845398,
      "learning_rate": 6.650567002497316e-06,
      "loss": 0.0141,
      "step": 1036640
    },
    {
      "epoch": 1.6965168267185118,
      "grad_norm": 0.25852200388908386,
      "learning_rate": 6.650501110283798e-06,
      "loss": 0.0189,
      "step": 1036660
    },
    {
      "epoch": 1.696549557157165,
      "grad_norm": 0.13581886887550354,
      "learning_rate": 6.650435218070281e-06,
      "loss": 0.0192,
      "step": 1036680
    },
    {
      "epoch": 1.6965822875958183,
      "grad_norm": 0.2767290472984314,
      "learning_rate": 6.650369325856764e-06,
      "loss": 0.0155,
      "step": 1036700
    },
    {
      "epoch": 1.6966150180344717,
      "grad_norm": 0.5841378569602966,
      "learning_rate": 6.650303433643247e-06,
      "loss": 0.0243,
      "step": 1036720
    },
    {
      "epoch": 1.696647748473125,
      "grad_norm": 1.0621041059494019,
      "learning_rate": 6.6502375414297295e-06,
      "loss": 0.0192,
      "step": 1036740
    },
    {
      "epoch": 1.6966804789117784,
      "grad_norm": 0.5190213918685913,
      "learning_rate": 6.650171649216213e-06,
      "loss": 0.023,
      "step": 1036760
    },
    {
      "epoch": 1.6967132093504316,
      "grad_norm": 0.5767147541046143,
      "learning_rate": 6.650105757002695e-06,
      "loss": 0.0233,
      "step": 1036780
    },
    {
      "epoch": 1.6967459397890852,
      "grad_norm": 0.38603997230529785,
      "learning_rate": 6.6500398647891785e-06,
      "loss": 0.018,
      "step": 1036800
    },
    {
      "epoch": 1.6967786702277383,
      "grad_norm": 1.9869904518127441,
      "learning_rate": 6.649973972575662e-06,
      "loss": 0.0153,
      "step": 1036820
    },
    {
      "epoch": 1.6968114006663917,
      "grad_norm": 0.5873212814331055,
      "learning_rate": 6.649908080362144e-06,
      "loss": 0.0128,
      "step": 1036840
    },
    {
      "epoch": 1.696844131105045,
      "grad_norm": 0.3170003592967987,
      "learning_rate": 6.649842188148628e-06,
      "loss": 0.0203,
      "step": 1036860
    },
    {
      "epoch": 1.6968768615436984,
      "grad_norm": 1.418006181716919,
      "learning_rate": 6.6497762959351095e-06,
      "loss": 0.0205,
      "step": 1036880
    },
    {
      "epoch": 1.6969095919823518,
      "grad_norm": 0.6173067092895508,
      "learning_rate": 6.649710403721593e-06,
      "loss": 0.0154,
      "step": 1036900
    },
    {
      "epoch": 1.696942322421005,
      "grad_norm": 0.2054307460784912,
      "learning_rate": 6.649644511508076e-06,
      "loss": 0.0283,
      "step": 1036920
    },
    {
      "epoch": 1.6969750528596586,
      "grad_norm": 0.4323263466358185,
      "learning_rate": 6.6495786192945585e-06,
      "loss": 0.0214,
      "step": 1036940
    },
    {
      "epoch": 1.6970077832983117,
      "grad_norm": 0.22910957038402557,
      "learning_rate": 6.649512727081041e-06,
      "loss": 0.0252,
      "step": 1036960
    },
    {
      "epoch": 1.697040513736965,
      "grad_norm": 0.4327237904071808,
      "learning_rate": 6.649446834867525e-06,
      "loss": 0.0158,
      "step": 1036980
    },
    {
      "epoch": 1.6970732441756184,
      "grad_norm": 0.36876344680786133,
      "learning_rate": 6.649380942654007e-06,
      "loss": 0.0222,
      "step": 1037000
    },
    {
      "epoch": 1.6971059746142718,
      "grad_norm": 0.9666836261749268,
      "learning_rate": 6.64931505044049e-06,
      "loss": 0.0186,
      "step": 1037020
    },
    {
      "epoch": 1.6971387050529252,
      "grad_norm": 0.5695906281471252,
      "learning_rate": 6.649249158226972e-06,
      "loss": 0.0124,
      "step": 1037040
    },
    {
      "epoch": 1.6971714354915783,
      "grad_norm": 0.7869582772254944,
      "learning_rate": 6.649183266013456e-06,
      "loss": 0.0192,
      "step": 1037060
    },
    {
      "epoch": 1.697204165930232,
      "grad_norm": 1.225825548171997,
      "learning_rate": 6.6491173737999386e-06,
      "loss": 0.0165,
      "step": 1037080
    },
    {
      "epoch": 1.697236896368885,
      "grad_norm": 0.3667978048324585,
      "learning_rate": 6.649051481586421e-06,
      "loss": 0.0157,
      "step": 1037100
    },
    {
      "epoch": 1.6972696268075385,
      "grad_norm": 0.32336756587028503,
      "learning_rate": 6.648985589372904e-06,
      "loss": 0.0165,
      "step": 1037120
    },
    {
      "epoch": 1.6973023572461918,
      "grad_norm": 0.286774605512619,
      "learning_rate": 6.648919697159388e-06,
      "loss": 0.0124,
      "step": 1037140
    },
    {
      "epoch": 1.6973350876848452,
      "grad_norm": 0.2454485297203064,
      "learning_rate": 6.64885380494587e-06,
      "loss": 0.0205,
      "step": 1037160
    },
    {
      "epoch": 1.6973678181234986,
      "grad_norm": 0.7221212983131409,
      "learning_rate": 6.648787912732353e-06,
      "loss": 0.02,
      "step": 1037180
    },
    {
      "epoch": 1.6974005485621517,
      "grad_norm": 0.5768802165985107,
      "learning_rate": 6.648722020518837e-06,
      "loss": 0.0175,
      "step": 1037200
    },
    {
      "epoch": 1.6974332790008053,
      "grad_norm": 0.19523850083351135,
      "learning_rate": 6.648656128305319e-06,
      "loss": 0.0168,
      "step": 1037220
    },
    {
      "epoch": 1.6974660094394585,
      "grad_norm": 0.48840734362602234,
      "learning_rate": 6.648590236091802e-06,
      "loss": 0.0127,
      "step": 1037240
    },
    {
      "epoch": 1.6974987398781118,
      "grad_norm": 0.623173713684082,
      "learning_rate": 6.648524343878284e-06,
      "loss": 0.0255,
      "step": 1037260
    },
    {
      "epoch": 1.6975314703167652,
      "grad_norm": 0.5598993301391602,
      "learning_rate": 6.648458451664768e-06,
      "loss": 0.0201,
      "step": 1037280
    },
    {
      "epoch": 1.6975642007554184,
      "grad_norm": 0.4700098931789398,
      "learning_rate": 6.6483925594512496e-06,
      "loss": 0.0258,
      "step": 1037300
    },
    {
      "epoch": 1.697596931194072,
      "grad_norm": 2.9515204429626465,
      "learning_rate": 6.648326667237733e-06,
      "loss": 0.0223,
      "step": 1037320
    },
    {
      "epoch": 1.697629661632725,
      "grad_norm": 0.4370676279067993,
      "learning_rate": 6.648260775024216e-06,
      "loss": 0.0214,
      "step": 1037340
    },
    {
      "epoch": 1.6976623920713787,
      "grad_norm": 0.3467196226119995,
      "learning_rate": 6.648194882810699e-06,
      "loss": 0.0228,
      "step": 1037360
    },
    {
      "epoch": 1.6976951225100319,
      "grad_norm": 0.48238709568977356,
      "learning_rate": 6.648128990597181e-06,
      "loss": 0.0226,
      "step": 1037380
    },
    {
      "epoch": 1.6977278529486852,
      "grad_norm": 0.535779595375061,
      "learning_rate": 6.648063098383665e-06,
      "loss": 0.021,
      "step": 1037400
    },
    {
      "epoch": 1.6977605833873386,
      "grad_norm": 0.1654701977968216,
      "learning_rate": 6.647997206170147e-06,
      "loss": 0.0194,
      "step": 1037420
    },
    {
      "epoch": 1.6977933138259917,
      "grad_norm": 0.22824271023273468,
      "learning_rate": 6.6479313139566304e-06,
      "loss": 0.0189,
      "step": 1037440
    },
    {
      "epoch": 1.6978260442646453,
      "grad_norm": 0.671940267086029,
      "learning_rate": 6.647865421743112e-06,
      "loss": 0.0194,
      "step": 1037460
    },
    {
      "epoch": 1.6978587747032985,
      "grad_norm": 0.6962115168571472,
      "learning_rate": 6.647799529529596e-06,
      "loss": 0.0138,
      "step": 1037480
    },
    {
      "epoch": 1.6978915051419519,
      "grad_norm": 0.6662378311157227,
      "learning_rate": 6.6477336373160795e-06,
      "loss": 0.0223,
      "step": 1037500
    },
    {
      "epoch": 1.6979242355806052,
      "grad_norm": 0.12000112235546112,
      "learning_rate": 6.647667745102561e-06,
      "loss": 0.0183,
      "step": 1037520
    },
    {
      "epoch": 1.6979569660192586,
      "grad_norm": 0.12852051854133606,
      "learning_rate": 6.647601852889045e-06,
      "loss": 0.0162,
      "step": 1037540
    },
    {
      "epoch": 1.697989696457912,
      "grad_norm": 0.6754878163337708,
      "learning_rate": 6.647535960675528e-06,
      "loss": 0.0138,
      "step": 1037560
    },
    {
      "epoch": 1.6980224268965651,
      "grad_norm": 0.29720422625541687,
      "learning_rate": 6.6474700684620105e-06,
      "loss": 0.0256,
      "step": 1037580
    },
    {
      "epoch": 1.6980551573352187,
      "grad_norm": 0.32316455245018005,
      "learning_rate": 6.647404176248493e-06,
      "loss": 0.0213,
      "step": 1037600
    },
    {
      "epoch": 1.6980878877738719,
      "grad_norm": 0.697616696357727,
      "learning_rate": 6.647338284034977e-06,
      "loss": 0.026,
      "step": 1037620
    },
    {
      "epoch": 1.6981206182125252,
      "grad_norm": 1.6176769733428955,
      "learning_rate": 6.647272391821459e-06,
      "loss": 0.0216,
      "step": 1037640
    },
    {
      "epoch": 1.6981533486511786,
      "grad_norm": 0.35615992546081543,
      "learning_rate": 6.647206499607942e-06,
      "loss": 0.0192,
      "step": 1037660
    },
    {
      "epoch": 1.698186079089832,
      "grad_norm": 0.7165376543998718,
      "learning_rate": 6.647140607394424e-06,
      "loss": 0.0165,
      "step": 1037680
    },
    {
      "epoch": 1.6982188095284854,
      "grad_norm": 1.6138653755187988,
      "learning_rate": 6.647074715180908e-06,
      "loss": 0.0171,
      "step": 1037700
    },
    {
      "epoch": 1.6982515399671385,
      "grad_norm": 0.3078213334083557,
      "learning_rate": 6.6470088229673905e-06,
      "loss": 0.0214,
      "step": 1037720
    },
    {
      "epoch": 1.698284270405792,
      "grad_norm": 0.14892511069774628,
      "learning_rate": 6.646942930753873e-06,
      "loss": 0.0154,
      "step": 1037740
    },
    {
      "epoch": 1.6983170008444453,
      "grad_norm": 0.7630209922790527,
      "learning_rate": 6.646877038540356e-06,
      "loss": 0.0148,
      "step": 1037760
    },
    {
      "epoch": 1.6983497312830986,
      "grad_norm": 1.1747196912765503,
      "learning_rate": 6.6468111463268395e-06,
      "loss": 0.0203,
      "step": 1037780
    },
    {
      "epoch": 1.698382461721752,
      "grad_norm": 0.31486809253692627,
      "learning_rate": 6.6467452541133214e-06,
      "loss": 0.0155,
      "step": 1037800
    },
    {
      "epoch": 1.6984151921604054,
      "grad_norm": 0.2621583342552185,
      "learning_rate": 6.646679361899805e-06,
      "loss": 0.017,
      "step": 1037820
    },
    {
      "epoch": 1.6984479225990587,
      "grad_norm": 0.7666932344436646,
      "learning_rate": 6.646613469686287e-06,
      "loss": 0.0304,
      "step": 1037840
    },
    {
      "epoch": 1.698480653037712,
      "grad_norm": 0.17382608354091644,
      "learning_rate": 6.6465475774727705e-06,
      "loss": 0.017,
      "step": 1037860
    },
    {
      "epoch": 1.6985133834763655,
      "grad_norm": 0.6574020981788635,
      "learning_rate": 6.646481685259254e-06,
      "loss": 0.0152,
      "step": 1037880
    },
    {
      "epoch": 1.6985461139150186,
      "grad_norm": 1.3605886697769165,
      "learning_rate": 6.646415793045736e-06,
      "loss": 0.0265,
      "step": 1037900
    },
    {
      "epoch": 1.698578844353672,
      "grad_norm": 0.46353939175605774,
      "learning_rate": 6.6463499008322196e-06,
      "loss": 0.0227,
      "step": 1037920
    },
    {
      "epoch": 1.6986115747923254,
      "grad_norm": 0.8538169860839844,
      "learning_rate": 6.646284008618702e-06,
      "loss": 0.0193,
      "step": 1037940
    },
    {
      "epoch": 1.6986443052309788,
      "grad_norm": 0.5218173265457153,
      "learning_rate": 6.646218116405185e-06,
      "loss": 0.0149,
      "step": 1037960
    },
    {
      "epoch": 1.6986770356696321,
      "grad_norm": 0.6140075325965881,
      "learning_rate": 6.646152224191668e-06,
      "loss": 0.0166,
      "step": 1037980
    },
    {
      "epoch": 1.6987097661082853,
      "grad_norm": 0.057091668248176575,
      "learning_rate": 6.646086331978151e-06,
      "loss": 0.0145,
      "step": 1038000
    },
    {
      "epoch": 1.6987424965469389,
      "grad_norm": 0.29933181405067444,
      "learning_rate": 6.646020439764633e-06,
      "loss": 0.0172,
      "step": 1038020
    },
    {
      "epoch": 1.698775226985592,
      "grad_norm": 0.2230822592973709,
      "learning_rate": 6.645954547551117e-06,
      "loss": 0.0142,
      "step": 1038040
    },
    {
      "epoch": 1.6988079574242454,
      "grad_norm": 0.2109920233488083,
      "learning_rate": 6.645888655337599e-06,
      "loss": 0.0206,
      "step": 1038060
    },
    {
      "epoch": 1.6988406878628988,
      "grad_norm": 0.12278949469327927,
      "learning_rate": 6.645822763124082e-06,
      "loss": 0.0193,
      "step": 1038080
    },
    {
      "epoch": 1.698873418301552,
      "grad_norm": 0.177233025431633,
      "learning_rate": 6.645756870910564e-06,
      "loss": 0.0186,
      "step": 1038100
    },
    {
      "epoch": 1.6989061487402055,
      "grad_norm": 0.24297375977039337,
      "learning_rate": 6.645690978697048e-06,
      "loss": 0.0177,
      "step": 1038120
    },
    {
      "epoch": 1.6989388791788587,
      "grad_norm": 0.6721890568733215,
      "learning_rate": 6.6456250864835306e-06,
      "loss": 0.0213,
      "step": 1038140
    },
    {
      "epoch": 1.6989716096175123,
      "grad_norm": 0.4985482692718506,
      "learning_rate": 6.645559194270014e-06,
      "loss": 0.0176,
      "step": 1038160
    },
    {
      "epoch": 1.6990043400561654,
      "grad_norm": 0.48921892046928406,
      "learning_rate": 6.645493302056496e-06,
      "loss": 0.0163,
      "step": 1038180
    },
    {
      "epoch": 1.6990370704948188,
      "grad_norm": 0.662071943283081,
      "learning_rate": 6.64542740984298e-06,
      "loss": 0.0217,
      "step": 1038200
    },
    {
      "epoch": 1.6990698009334722,
      "grad_norm": 0.20981107652187347,
      "learning_rate": 6.645361517629463e-06,
      "loss": 0.0151,
      "step": 1038220
    },
    {
      "epoch": 1.6991025313721253,
      "grad_norm": 0.7718524932861328,
      "learning_rate": 6.645295625415945e-06,
      "loss": 0.0182,
      "step": 1038240
    },
    {
      "epoch": 1.699135261810779,
      "grad_norm": 0.3542235493659973,
      "learning_rate": 6.645229733202429e-06,
      "loss": 0.0161,
      "step": 1038260
    },
    {
      "epoch": 1.699167992249432,
      "grad_norm": 1.1398550271987915,
      "learning_rate": 6.645163840988911e-06,
      "loss": 0.0246,
      "step": 1038280
    },
    {
      "epoch": 1.6992007226880854,
      "grad_norm": 0.20821692049503326,
      "learning_rate": 6.645097948775394e-06,
      "loss": 0.0135,
      "step": 1038300
    },
    {
      "epoch": 1.6992334531267388,
      "grad_norm": 0.5123705267906189,
      "learning_rate": 6.645032056561876e-06,
      "loss": 0.0268,
      "step": 1038320
    },
    {
      "epoch": 1.6992661835653922,
      "grad_norm": 1.2919065952301025,
      "learning_rate": 6.64496616434836e-06,
      "loss": 0.0195,
      "step": 1038340
    },
    {
      "epoch": 1.6992989140040455,
      "grad_norm": 0.3979768455028534,
      "learning_rate": 6.644900272134842e-06,
      "loss": 0.0278,
      "step": 1038360
    },
    {
      "epoch": 1.6993316444426987,
      "grad_norm": 1.297487735748291,
      "learning_rate": 6.644834379921325e-06,
      "loss": 0.0229,
      "step": 1038380
    },
    {
      "epoch": 1.6993643748813523,
      "grad_norm": 0.9645532369613647,
      "learning_rate": 6.644768487707808e-06,
      "loss": 0.0237,
      "step": 1038400
    },
    {
      "epoch": 1.6993971053200054,
      "grad_norm": 0.824658215045929,
      "learning_rate": 6.6447025954942915e-06,
      "loss": 0.023,
      "step": 1038420
    },
    {
      "epoch": 1.6994298357586588,
      "grad_norm": 0.5163012146949768,
      "learning_rate": 6.644636703280773e-06,
      "loss": 0.024,
      "step": 1038440
    },
    {
      "epoch": 1.6994625661973122,
      "grad_norm": 0.30239617824554443,
      "learning_rate": 6.644570811067257e-06,
      "loss": 0.0207,
      "step": 1038460
    },
    {
      "epoch": 1.6994952966359655,
      "grad_norm": 0.7044906616210938,
      "learning_rate": 6.644504918853739e-06,
      "loss": 0.0189,
      "step": 1038480
    },
    {
      "epoch": 1.699528027074619,
      "grad_norm": 0.3800699710845947,
      "learning_rate": 6.644439026640222e-06,
      "loss": 0.0168,
      "step": 1038500
    },
    {
      "epoch": 1.699560757513272,
      "grad_norm": 0.26364004611968994,
      "learning_rate": 6.644373134426705e-06,
      "loss": 0.0117,
      "step": 1038520
    },
    {
      "epoch": 1.6995934879519257,
      "grad_norm": 0.1932268589735031,
      "learning_rate": 6.644307242213188e-06,
      "loss": 0.0225,
      "step": 1038540
    },
    {
      "epoch": 1.6996262183905788,
      "grad_norm": 0.6837917566299438,
      "learning_rate": 6.6442413499996715e-06,
      "loss": 0.0179,
      "step": 1038560
    },
    {
      "epoch": 1.6996589488292322,
      "grad_norm": 0.24054594337940216,
      "learning_rate": 6.644175457786154e-06,
      "loss": 0.0167,
      "step": 1038580
    },
    {
      "epoch": 1.6996916792678856,
      "grad_norm": 0.6771938800811768,
      "learning_rate": 6.644109565572637e-06,
      "loss": 0.0276,
      "step": 1038600
    },
    {
      "epoch": 1.699724409706539,
      "grad_norm": 0.11133458465337753,
      "learning_rate": 6.64404367335912e-06,
      "loss": 0.0146,
      "step": 1038620
    },
    {
      "epoch": 1.6997571401451923,
      "grad_norm": 1.1124670505523682,
      "learning_rate": 6.643977781145603e-06,
      "loss": 0.0117,
      "step": 1038640
    },
    {
      "epoch": 1.6997898705838455,
      "grad_norm": 0.3928071856498718,
      "learning_rate": 6.643911888932085e-06,
      "loss": 0.0103,
      "step": 1038660
    },
    {
      "epoch": 1.699822601022499,
      "grad_norm": 0.18621781468391418,
      "learning_rate": 6.643845996718569e-06,
      "loss": 0.0228,
      "step": 1038680
    },
    {
      "epoch": 1.6998553314611522,
      "grad_norm": 0.7348226308822632,
      "learning_rate": 6.643780104505051e-06,
      "loss": 0.0211,
      "step": 1038700
    },
    {
      "epoch": 1.6998880618998056,
      "grad_norm": 0.3680809736251831,
      "learning_rate": 6.643714212291534e-06,
      "loss": 0.0187,
      "step": 1038720
    },
    {
      "epoch": 1.699920792338459,
      "grad_norm": 0.254945307970047,
      "learning_rate": 6.643648320078017e-06,
      "loss": 0.0158,
      "step": 1038740
    },
    {
      "epoch": 1.6999535227771123,
      "grad_norm": 0.8154335618019104,
      "learning_rate": 6.6435824278645e-06,
      "loss": 0.0173,
      "step": 1038760
    },
    {
      "epoch": 1.6999862532157657,
      "grad_norm": 0.3785967230796814,
      "learning_rate": 6.6435165356509825e-06,
      "loss": 0.0156,
      "step": 1038780
    },
    {
      "epoch": 1.7000189836544188,
      "grad_norm": 0.4092646837234497,
      "learning_rate": 6.643450643437466e-06,
      "loss": 0.0218,
      "step": 1038800
    },
    {
      "epoch": 1.7000517140930724,
      "grad_norm": 0.42212364077568054,
      "learning_rate": 6.643384751223948e-06,
      "loss": 0.0196,
      "step": 1038820
    },
    {
      "epoch": 1.7000844445317256,
      "grad_norm": 0.18474692106246948,
      "learning_rate": 6.6433188590104315e-06,
      "loss": 0.0167,
      "step": 1038840
    },
    {
      "epoch": 1.700117174970379,
      "grad_norm": 0.9808387160301208,
      "learning_rate": 6.6432529667969134e-06,
      "loss": 0.0163,
      "step": 1038860
    },
    {
      "epoch": 1.7001499054090323,
      "grad_norm": 0.7224971055984497,
      "learning_rate": 6.643187074583397e-06,
      "loss": 0.0204,
      "step": 1038880
    },
    {
      "epoch": 1.7001826358476855,
      "grad_norm": 0.16362722218036652,
      "learning_rate": 6.64312118236988e-06,
      "loss": 0.0208,
      "step": 1038900
    },
    {
      "epoch": 1.700215366286339,
      "grad_norm": 0.6434659361839294,
      "learning_rate": 6.6430552901563625e-06,
      "loss": 0.0222,
      "step": 1038920
    },
    {
      "epoch": 1.7002480967249922,
      "grad_norm": 0.5154830813407898,
      "learning_rate": 6.642989397942846e-06,
      "loss": 0.0143,
      "step": 1038940
    },
    {
      "epoch": 1.7002808271636456,
      "grad_norm": 0.8325633406639099,
      "learning_rate": 6.642923505729329e-06,
      "loss": 0.0159,
      "step": 1038960
    },
    {
      "epoch": 1.700313557602299,
      "grad_norm": 0.19664062559604645,
      "learning_rate": 6.6428576135158116e-06,
      "loss": 0.0239,
      "step": 1038980
    },
    {
      "epoch": 1.7003462880409523,
      "grad_norm": 0.2917034924030304,
      "learning_rate": 6.642791721302294e-06,
      "loss": 0.0229,
      "step": 1039000
    },
    {
      "epoch": 1.7003790184796057,
      "grad_norm": 0.743069052696228,
      "learning_rate": 6.642725829088778e-06,
      "loss": 0.0358,
      "step": 1039020
    },
    {
      "epoch": 1.7004117489182589,
      "grad_norm": 0.9059755206108093,
      "learning_rate": 6.64265993687526e-06,
      "loss": 0.0216,
      "step": 1039040
    },
    {
      "epoch": 1.7004444793569125,
      "grad_norm": 0.4086931645870209,
      "learning_rate": 6.642594044661743e-06,
      "loss": 0.0164,
      "step": 1039060
    },
    {
      "epoch": 1.7004772097955656,
      "grad_norm": 0.24406780302524567,
      "learning_rate": 6.642528152448225e-06,
      "loss": 0.0197,
      "step": 1039080
    },
    {
      "epoch": 1.700509940234219,
      "grad_norm": 0.5294421315193176,
      "learning_rate": 6.642462260234709e-06,
      "loss": 0.0329,
      "step": 1039100
    },
    {
      "epoch": 1.7005426706728723,
      "grad_norm": 2.1790311336517334,
      "learning_rate": 6.642396368021191e-06,
      "loss": 0.0225,
      "step": 1039120
    },
    {
      "epoch": 1.7005754011115257,
      "grad_norm": 0.33560046553611755,
      "learning_rate": 6.642330475807674e-06,
      "loss": 0.0173,
      "step": 1039140
    },
    {
      "epoch": 1.700608131550179,
      "grad_norm": 0.4406793713569641,
      "learning_rate": 6.642264583594157e-06,
      "loss": 0.0214,
      "step": 1039160
    },
    {
      "epoch": 1.7006408619888322,
      "grad_norm": 0.2735391855239868,
      "learning_rate": 6.64219869138064e-06,
      "loss": 0.0155,
      "step": 1039180
    },
    {
      "epoch": 1.7006735924274858,
      "grad_norm": 3.40004825592041,
      "learning_rate": 6.6421327991671225e-06,
      "loss": 0.0233,
      "step": 1039200
    },
    {
      "epoch": 1.700706322866139,
      "grad_norm": 0.20458021759986877,
      "learning_rate": 6.642066906953606e-06,
      "loss": 0.0217,
      "step": 1039220
    },
    {
      "epoch": 1.7007390533047924,
      "grad_norm": 0.18747299909591675,
      "learning_rate": 6.642001014740088e-06,
      "loss": 0.0185,
      "step": 1039240
    },
    {
      "epoch": 1.7007717837434457,
      "grad_norm": 0.388452410697937,
      "learning_rate": 6.641935122526572e-06,
      "loss": 0.0126,
      "step": 1039260
    },
    {
      "epoch": 1.700804514182099,
      "grad_norm": 0.20206068456172943,
      "learning_rate": 6.641869230313055e-06,
      "loss": 0.0231,
      "step": 1039280
    },
    {
      "epoch": 1.7008372446207525,
      "grad_norm": 0.2637685537338257,
      "learning_rate": 6.641803338099537e-06,
      "loss": 0.0206,
      "step": 1039300
    },
    {
      "epoch": 1.7008699750594056,
      "grad_norm": 1.7607848644256592,
      "learning_rate": 6.641737445886021e-06,
      "loss": 0.0204,
      "step": 1039320
    },
    {
      "epoch": 1.7009027054980592,
      "grad_norm": 0.9389722943305969,
      "learning_rate": 6.6416715536725026e-06,
      "loss": 0.0226,
      "step": 1039340
    },
    {
      "epoch": 1.7009354359367124,
      "grad_norm": 0.09655535221099854,
      "learning_rate": 6.641605661458986e-06,
      "loss": 0.0261,
      "step": 1039360
    },
    {
      "epoch": 1.7009681663753657,
      "grad_norm": 0.45765939354896545,
      "learning_rate": 6.641539769245469e-06,
      "loss": 0.0203,
      "step": 1039380
    },
    {
      "epoch": 1.7010008968140191,
      "grad_norm": 1.0607609748840332,
      "learning_rate": 6.641473877031952e-06,
      "loss": 0.018,
      "step": 1039400
    },
    {
      "epoch": 1.7010336272526725,
      "grad_norm": 0.8842220306396484,
      "learning_rate": 6.641407984818434e-06,
      "loss": 0.0121,
      "step": 1039420
    },
    {
      "epoch": 1.7010663576913259,
      "grad_norm": 0.4039503037929535,
      "learning_rate": 6.641342092604918e-06,
      "loss": 0.0294,
      "step": 1039440
    },
    {
      "epoch": 1.701099088129979,
      "grad_norm": 0.4399745762348175,
      "learning_rate": 6.6412762003914e-06,
      "loss": 0.0196,
      "step": 1039460
    },
    {
      "epoch": 1.7011318185686326,
      "grad_norm": 0.41879400610923767,
      "learning_rate": 6.6412103081778834e-06,
      "loss": 0.023,
      "step": 1039480
    },
    {
      "epoch": 1.7011645490072858,
      "grad_norm": 0.404053658246994,
      "learning_rate": 6.641144415964365e-06,
      "loss": 0.0156,
      "step": 1039500
    },
    {
      "epoch": 1.7011972794459391,
      "grad_norm": 0.3053974509239197,
      "learning_rate": 6.641078523750849e-06,
      "loss": 0.0145,
      "step": 1039520
    },
    {
      "epoch": 1.7012300098845925,
      "grad_norm": 0.1491202414035797,
      "learning_rate": 6.641012631537332e-06,
      "loss": 0.0223,
      "step": 1039540
    },
    {
      "epoch": 1.7012627403232456,
      "grad_norm": 0.7825760841369629,
      "learning_rate": 6.640946739323814e-06,
      "loss": 0.0183,
      "step": 1039560
    },
    {
      "epoch": 1.7012954707618992,
      "grad_norm": 2.8729283809661865,
      "learning_rate": 6.640880847110297e-06,
      "loss": 0.0178,
      "step": 1039580
    },
    {
      "epoch": 1.7013282012005524,
      "grad_norm": 0.3142438530921936,
      "learning_rate": 6.640814954896781e-06,
      "loss": 0.0167,
      "step": 1039600
    },
    {
      "epoch": 1.701360931639206,
      "grad_norm": 0.5626765489578247,
      "learning_rate": 6.6407490626832635e-06,
      "loss": 0.0252,
      "step": 1039620
    },
    {
      "epoch": 1.7013936620778591,
      "grad_norm": 0.741497814655304,
      "learning_rate": 6.640683170469746e-06,
      "loss": 0.0178,
      "step": 1039640
    },
    {
      "epoch": 1.7014263925165125,
      "grad_norm": 0.4823848307132721,
      "learning_rate": 6.64061727825623e-06,
      "loss": 0.0215,
      "step": 1039660
    },
    {
      "epoch": 1.7014591229551659,
      "grad_norm": 0.1712963730096817,
      "learning_rate": 6.640551386042712e-06,
      "loss": 0.02,
      "step": 1039680
    },
    {
      "epoch": 1.701491853393819,
      "grad_norm": 0.6088727712631226,
      "learning_rate": 6.640485493829195e-06,
      "loss": 0.0152,
      "step": 1039700
    },
    {
      "epoch": 1.7015245838324726,
      "grad_norm": 0.5319502949714661,
      "learning_rate": 6.640419601615677e-06,
      "loss": 0.0179,
      "step": 1039720
    },
    {
      "epoch": 1.7015573142711258,
      "grad_norm": 0.27610692381858826,
      "learning_rate": 6.640353709402161e-06,
      "loss": 0.0184,
      "step": 1039740
    },
    {
      "epoch": 1.7015900447097791,
      "grad_norm": 0.1821114718914032,
      "learning_rate": 6.6402878171886435e-06,
      "loss": 0.0171,
      "step": 1039760
    },
    {
      "epoch": 1.7016227751484325,
      "grad_norm": 0.25551846623420715,
      "learning_rate": 6.640221924975126e-06,
      "loss": 0.0141,
      "step": 1039780
    },
    {
      "epoch": 1.701655505587086,
      "grad_norm": 0.4640519320964813,
      "learning_rate": 6.640156032761609e-06,
      "loss": 0.0163,
      "step": 1039800
    },
    {
      "epoch": 1.7016882360257393,
      "grad_norm": 0.44348403811454773,
      "learning_rate": 6.6400901405480926e-06,
      "loss": 0.0154,
      "step": 1039820
    },
    {
      "epoch": 1.7017209664643924,
      "grad_norm": 0.7991361021995544,
      "learning_rate": 6.6400242483345744e-06,
      "loss": 0.0179,
      "step": 1039840
    },
    {
      "epoch": 1.701753696903046,
      "grad_norm": 0.325987845659256,
      "learning_rate": 6.639958356121058e-06,
      "loss": 0.0204,
      "step": 1039860
    },
    {
      "epoch": 1.7017864273416992,
      "grad_norm": 0.3012901842594147,
      "learning_rate": 6.63989246390754e-06,
      "loss": 0.0117,
      "step": 1039880
    },
    {
      "epoch": 1.7018191577803525,
      "grad_norm": 0.3402499556541443,
      "learning_rate": 6.6398265716940235e-06,
      "loss": 0.02,
      "step": 1039900
    },
    {
      "epoch": 1.701851888219006,
      "grad_norm": 0.12605349719524384,
      "learning_rate": 6.639760679480506e-06,
      "loss": 0.0173,
      "step": 1039920
    },
    {
      "epoch": 1.7018846186576593,
      "grad_norm": 0.4833676815032959,
      "learning_rate": 6.639694787266989e-06,
      "loss": 0.0196,
      "step": 1039940
    },
    {
      "epoch": 1.7019173490963126,
      "grad_norm": 0.6440291404724121,
      "learning_rate": 6.639628895053472e-06,
      "loss": 0.0196,
      "step": 1039960
    },
    {
      "epoch": 1.7019500795349658,
      "grad_norm": 0.21332883834838867,
      "learning_rate": 6.639563002839955e-06,
      "loss": 0.019,
      "step": 1039980
    },
    {
      "epoch": 1.7019828099736194,
      "grad_norm": 0.5085699558258057,
      "learning_rate": 6.639497110626438e-06,
      "loss": 0.0221,
      "step": 1040000
    },
    {
      "epoch": 1.7020155404122725,
      "grad_norm": 0.27211660146713257,
      "learning_rate": 6.639431218412921e-06,
      "loss": 0.0208,
      "step": 1040020
    },
    {
      "epoch": 1.702048270850926,
      "grad_norm": 1.1589385271072388,
      "learning_rate": 6.639365326199404e-06,
      "loss": 0.0148,
      "step": 1040040
    },
    {
      "epoch": 1.7020810012895793,
      "grad_norm": 0.5097743272781372,
      "learning_rate": 6.639299433985886e-06,
      "loss": 0.0221,
      "step": 1040060
    },
    {
      "epoch": 1.7021137317282327,
      "grad_norm": 1.022338628768921,
      "learning_rate": 6.63923354177237e-06,
      "loss": 0.0213,
      "step": 1040080
    },
    {
      "epoch": 1.702146462166886,
      "grad_norm": 0.2789194583892822,
      "learning_rate": 6.639167649558852e-06,
      "loss": 0.0199,
      "step": 1040100
    },
    {
      "epoch": 1.7021791926055392,
      "grad_norm": 0.3007209897041321,
      "learning_rate": 6.639101757345335e-06,
      "loss": 0.0255,
      "step": 1040120
    },
    {
      "epoch": 1.7022119230441928,
      "grad_norm": 0.39804157614707947,
      "learning_rate": 6.639035865131817e-06,
      "loss": 0.0182,
      "step": 1040140
    },
    {
      "epoch": 1.702244653482846,
      "grad_norm": 0.3067961633205414,
      "learning_rate": 6.638969972918301e-06,
      "loss": 0.0238,
      "step": 1040160
    },
    {
      "epoch": 1.7022773839214993,
      "grad_norm": 0.4548111855983734,
      "learning_rate": 6.6389040807047836e-06,
      "loss": 0.0167,
      "step": 1040180
    },
    {
      "epoch": 1.7023101143601527,
      "grad_norm": 0.32293349504470825,
      "learning_rate": 6.638838188491266e-06,
      "loss": 0.0188,
      "step": 1040200
    },
    {
      "epoch": 1.702342844798806,
      "grad_norm": 0.1319257915019989,
      "learning_rate": 6.638772296277749e-06,
      "loss": 0.0207,
      "step": 1040220
    },
    {
      "epoch": 1.7023755752374594,
      "grad_norm": 0.8912035822868347,
      "learning_rate": 6.638706404064233e-06,
      "loss": 0.0174,
      "step": 1040240
    },
    {
      "epoch": 1.7024083056761126,
      "grad_norm": 1.1252660751342773,
      "learning_rate": 6.6386405118507145e-06,
      "loss": 0.0232,
      "step": 1040260
    },
    {
      "epoch": 1.7024410361147662,
      "grad_norm": 0.286823034286499,
      "learning_rate": 6.638574619637198e-06,
      "loss": 0.0179,
      "step": 1040280
    },
    {
      "epoch": 1.7024737665534193,
      "grad_norm": 0.729232907295227,
      "learning_rate": 6.63850872742368e-06,
      "loss": 0.0215,
      "step": 1040300
    },
    {
      "epoch": 1.7025064969920727,
      "grad_norm": 0.9494400024414062,
      "learning_rate": 6.638442835210164e-06,
      "loss": 0.0277,
      "step": 1040320
    },
    {
      "epoch": 1.702539227430726,
      "grad_norm": 0.2972833514213562,
      "learning_rate": 6.638376942996647e-06,
      "loss": 0.0238,
      "step": 1040340
    },
    {
      "epoch": 1.7025719578693792,
      "grad_norm": 0.4007909297943115,
      "learning_rate": 6.638311050783129e-06,
      "loss": 0.0211,
      "step": 1040360
    },
    {
      "epoch": 1.7026046883080328,
      "grad_norm": 0.3768383264541626,
      "learning_rate": 6.638245158569613e-06,
      "loss": 0.0113,
      "step": 1040380
    },
    {
      "epoch": 1.702637418746686,
      "grad_norm": 0.542249858379364,
      "learning_rate": 6.638179266356095e-06,
      "loss": 0.011,
      "step": 1040400
    },
    {
      "epoch": 1.7026701491853395,
      "grad_norm": 0.30888617038726807,
      "learning_rate": 6.638113374142578e-06,
      "loss": 0.0196,
      "step": 1040420
    },
    {
      "epoch": 1.7027028796239927,
      "grad_norm": 0.47082045674324036,
      "learning_rate": 6.638047481929061e-06,
      "loss": 0.0264,
      "step": 1040440
    },
    {
      "epoch": 1.702735610062646,
      "grad_norm": 0.04079883173108101,
      "learning_rate": 6.6379815897155445e-06,
      "loss": 0.016,
      "step": 1040460
    },
    {
      "epoch": 1.7027683405012994,
      "grad_norm": 0.5372440218925476,
      "learning_rate": 6.637915697502026e-06,
      "loss": 0.0192,
      "step": 1040480
    },
    {
      "epoch": 1.7028010709399526,
      "grad_norm": 0.4071084260940552,
      "learning_rate": 6.63784980528851e-06,
      "loss": 0.031,
      "step": 1040500
    },
    {
      "epoch": 1.7028338013786062,
      "grad_norm": 1.304389238357544,
      "learning_rate": 6.637783913074992e-06,
      "loss": 0.0174,
      "step": 1040520
    },
    {
      "epoch": 1.7028665318172593,
      "grad_norm": 0.6916555166244507,
      "learning_rate": 6.637718020861475e-06,
      "loss": 0.0186,
      "step": 1040540
    },
    {
      "epoch": 1.7028992622559127,
      "grad_norm": 0.13456913828849792,
      "learning_rate": 6.637652128647958e-06,
      "loss": 0.0138,
      "step": 1040560
    },
    {
      "epoch": 1.702931992694566,
      "grad_norm": 0.9328219890594482,
      "learning_rate": 6.637586236434441e-06,
      "loss": 0.0162,
      "step": 1040580
    },
    {
      "epoch": 1.7029647231332194,
      "grad_norm": 0.18124189972877502,
      "learning_rate": 6.637520344220924e-06,
      "loss": 0.0159,
      "step": 1040600
    },
    {
      "epoch": 1.7029974535718728,
      "grad_norm": 0.8311606645584106,
      "learning_rate": 6.637454452007407e-06,
      "loss": 0.0227,
      "step": 1040620
    },
    {
      "epoch": 1.703030184010526,
      "grad_norm": 0.15278840065002441,
      "learning_rate": 6.637388559793889e-06,
      "loss": 0.0207,
      "step": 1040640
    },
    {
      "epoch": 1.7030629144491796,
      "grad_norm": 0.7686072587966919,
      "learning_rate": 6.637322667580373e-06,
      "loss": 0.0144,
      "step": 1040660
    },
    {
      "epoch": 1.7030956448878327,
      "grad_norm": 0.4330451190471649,
      "learning_rate": 6.637256775366856e-06,
      "loss": 0.0158,
      "step": 1040680
    },
    {
      "epoch": 1.703128375326486,
      "grad_norm": 1.2041696310043335,
      "learning_rate": 6.637190883153338e-06,
      "loss": 0.0199,
      "step": 1040700
    },
    {
      "epoch": 1.7031611057651395,
      "grad_norm": 0.16097940504550934,
      "learning_rate": 6.637124990939822e-06,
      "loss": 0.0249,
      "step": 1040720
    },
    {
      "epoch": 1.7031938362037928,
      "grad_norm": 0.12913136184215546,
      "learning_rate": 6.637059098726304e-06,
      "loss": 0.0135,
      "step": 1040740
    },
    {
      "epoch": 1.7032265666424462,
      "grad_norm": 0.6356975436210632,
      "learning_rate": 6.636993206512787e-06,
      "loss": 0.0152,
      "step": 1040760
    },
    {
      "epoch": 1.7032592970810994,
      "grad_norm": 0.1779983937740326,
      "learning_rate": 6.63692731429927e-06,
      "loss": 0.0155,
      "step": 1040780
    },
    {
      "epoch": 1.703292027519753,
      "grad_norm": 0.2910291254520416,
      "learning_rate": 6.636861422085753e-06,
      "loss": 0.0171,
      "step": 1040800
    },
    {
      "epoch": 1.703324757958406,
      "grad_norm": 0.8032411932945251,
      "learning_rate": 6.6367955298722355e-06,
      "loss": 0.0268,
      "step": 1040820
    },
    {
      "epoch": 1.7033574883970595,
      "grad_norm": 0.36877426505088806,
      "learning_rate": 6.636729637658719e-06,
      "loss": 0.0126,
      "step": 1040840
    },
    {
      "epoch": 1.7033902188357128,
      "grad_norm": 0.24295774102210999,
      "learning_rate": 6.636663745445201e-06,
      "loss": 0.0115,
      "step": 1040860
    },
    {
      "epoch": 1.7034229492743662,
      "grad_norm": 0.1608472466468811,
      "learning_rate": 6.6365978532316845e-06,
      "loss": 0.021,
      "step": 1040880
    },
    {
      "epoch": 1.7034556797130196,
      "grad_norm": 0.5102371573448181,
      "learning_rate": 6.6365319610181664e-06,
      "loss": 0.0145,
      "step": 1040900
    },
    {
      "epoch": 1.7034884101516727,
      "grad_norm": 0.5286765694618225,
      "learning_rate": 6.63646606880465e-06,
      "loss": 0.0213,
      "step": 1040920
    },
    {
      "epoch": 1.7035211405903263,
      "grad_norm": 0.8792786002159119,
      "learning_rate": 6.636400176591133e-06,
      "loss": 0.0175,
      "step": 1040940
    },
    {
      "epoch": 1.7035538710289795,
      "grad_norm": 0.849933385848999,
      "learning_rate": 6.6363342843776155e-06,
      "loss": 0.0184,
      "step": 1040960
    },
    {
      "epoch": 1.7035866014676329,
      "grad_norm": 0.8430235385894775,
      "learning_rate": 6.636268392164098e-06,
      "loss": 0.015,
      "step": 1040980
    },
    {
      "epoch": 1.7036193319062862,
      "grad_norm": 3.3526172637939453,
      "learning_rate": 6.636202499950582e-06,
      "loss": 0.0221,
      "step": 1041000
    },
    {
      "epoch": 1.7036520623449396,
      "grad_norm": 0.38540545105934143,
      "learning_rate": 6.6361366077370646e-06,
      "loss": 0.0272,
      "step": 1041020
    },
    {
      "epoch": 1.703684792783593,
      "grad_norm": 0.3181039094924927,
      "learning_rate": 6.636070715523547e-06,
      "loss": 0.0122,
      "step": 1041040
    },
    {
      "epoch": 1.7037175232222461,
      "grad_norm": 0.6536728143692017,
      "learning_rate": 6.636004823310031e-06,
      "loss": 0.0264,
      "step": 1041060
    },
    {
      "epoch": 1.7037502536608997,
      "grad_norm": 0.27238228917121887,
      "learning_rate": 6.635938931096513e-06,
      "loss": 0.0217,
      "step": 1041080
    },
    {
      "epoch": 1.7037829840995529,
      "grad_norm": 1.5151563882827759,
      "learning_rate": 6.635873038882996e-06,
      "loss": 0.0196,
      "step": 1041100
    },
    {
      "epoch": 1.7038157145382062,
      "grad_norm": 0.3548629879951477,
      "learning_rate": 6.635807146669478e-06,
      "loss": 0.0279,
      "step": 1041120
    },
    {
      "epoch": 1.7038484449768596,
      "grad_norm": 1.343744158744812,
      "learning_rate": 6.635741254455962e-06,
      "loss": 0.0282,
      "step": 1041140
    },
    {
      "epoch": 1.7038811754155128,
      "grad_norm": 0.15551145374774933,
      "learning_rate": 6.635675362242444e-06,
      "loss": 0.0156,
      "step": 1041160
    },
    {
      "epoch": 1.7039139058541664,
      "grad_norm": 0.19549182057380676,
      "learning_rate": 6.635609470028927e-06,
      "loss": 0.0124,
      "step": 1041180
    },
    {
      "epoch": 1.7039466362928195,
      "grad_norm": 0.24311357736587524,
      "learning_rate": 6.63554357781541e-06,
      "loss": 0.0247,
      "step": 1041200
    },
    {
      "epoch": 1.703979366731473,
      "grad_norm": 0.13613155484199524,
      "learning_rate": 6.635477685601893e-06,
      "loss": 0.0131,
      "step": 1041220
    },
    {
      "epoch": 1.7040120971701262,
      "grad_norm": 0.15216398239135742,
      "learning_rate": 6.6354117933883755e-06,
      "loss": 0.023,
      "step": 1041240
    },
    {
      "epoch": 1.7040448276087796,
      "grad_norm": 0.8411847949028015,
      "learning_rate": 6.635345901174859e-06,
      "loss": 0.0292,
      "step": 1041260
    },
    {
      "epoch": 1.704077558047433,
      "grad_norm": 0.1315145343542099,
      "learning_rate": 6.635280008961341e-06,
      "loss": 0.0192,
      "step": 1041280
    },
    {
      "epoch": 1.7041102884860861,
      "grad_norm": 0.8001140356063843,
      "learning_rate": 6.635214116747825e-06,
      "loss": 0.0262,
      "step": 1041300
    },
    {
      "epoch": 1.7041430189247397,
      "grad_norm": 1.1239299774169922,
      "learning_rate": 6.6351482245343065e-06,
      "loss": 0.0178,
      "step": 1041320
    },
    {
      "epoch": 1.7041757493633929,
      "grad_norm": 0.498723566532135,
      "learning_rate": 6.63508233232079e-06,
      "loss": 0.0187,
      "step": 1041340
    },
    {
      "epoch": 1.7042084798020463,
      "grad_norm": 1.0650352239608765,
      "learning_rate": 6.635016440107273e-06,
      "loss": 0.0191,
      "step": 1041360
    },
    {
      "epoch": 1.7042412102406996,
      "grad_norm": 0.8752450942993164,
      "learning_rate": 6.6349505478937556e-06,
      "loss": 0.0159,
      "step": 1041380
    },
    {
      "epoch": 1.704273940679353,
      "grad_norm": 0.5121204853057861,
      "learning_rate": 6.634884655680239e-06,
      "loss": 0.0242,
      "step": 1041400
    },
    {
      "epoch": 1.7043066711180064,
      "grad_norm": 0.9134306907653809,
      "learning_rate": 6.634818763466722e-06,
      "loss": 0.0245,
      "step": 1041420
    },
    {
      "epoch": 1.7043394015566595,
      "grad_norm": 0.21256054937839508,
      "learning_rate": 6.634752871253205e-06,
      "loss": 0.0136,
      "step": 1041440
    },
    {
      "epoch": 1.7043721319953131,
      "grad_norm": 0.3923918902873993,
      "learning_rate": 6.634686979039687e-06,
      "loss": 0.0144,
      "step": 1041460
    },
    {
      "epoch": 1.7044048624339663,
      "grad_norm": 0.5183299779891968,
      "learning_rate": 6.634621086826171e-06,
      "loss": 0.0229,
      "step": 1041480
    },
    {
      "epoch": 1.7044375928726196,
      "grad_norm": 0.38808873295783997,
      "learning_rate": 6.634555194612653e-06,
      "loss": 0.0257,
      "step": 1041500
    },
    {
      "epoch": 1.704470323311273,
      "grad_norm": 0.31566038727760315,
      "learning_rate": 6.6344893023991364e-06,
      "loss": 0.0196,
      "step": 1041520
    },
    {
      "epoch": 1.7045030537499264,
      "grad_norm": 0.7925678491592407,
      "learning_rate": 6.634423410185618e-06,
      "loss": 0.0266,
      "step": 1041540
    },
    {
      "epoch": 1.7045357841885798,
      "grad_norm": 1.0405744314193726,
      "learning_rate": 6.634357517972102e-06,
      "loss": 0.0271,
      "step": 1041560
    },
    {
      "epoch": 1.704568514627233,
      "grad_norm": 0.23407821357250214,
      "learning_rate": 6.634291625758585e-06,
      "loss": 0.0194,
      "step": 1041580
    },
    {
      "epoch": 1.7046012450658865,
      "grad_norm": 0.9390332698822021,
      "learning_rate": 6.634225733545067e-06,
      "loss": 0.0236,
      "step": 1041600
    },
    {
      "epoch": 1.7046339755045397,
      "grad_norm": 0.32907962799072266,
      "learning_rate": 6.63415984133155e-06,
      "loss": 0.022,
      "step": 1041620
    },
    {
      "epoch": 1.704666705943193,
      "grad_norm": 0.37507396936416626,
      "learning_rate": 6.634093949118034e-06,
      "loss": 0.0168,
      "step": 1041640
    },
    {
      "epoch": 1.7046994363818464,
      "grad_norm": 0.39430496096611023,
      "learning_rate": 6.634028056904516e-06,
      "loss": 0.0172,
      "step": 1041660
    },
    {
      "epoch": 1.7047321668204998,
      "grad_norm": 1.6448615789413452,
      "learning_rate": 6.633962164690999e-06,
      "loss": 0.0253,
      "step": 1041680
    },
    {
      "epoch": 1.7047648972591531,
      "grad_norm": 0.8430742025375366,
      "learning_rate": 6.633896272477481e-06,
      "loss": 0.0247,
      "step": 1041700
    },
    {
      "epoch": 1.7047976276978063,
      "grad_norm": 0.824386715888977,
      "learning_rate": 6.633830380263965e-06,
      "loss": 0.0163,
      "step": 1041720
    },
    {
      "epoch": 1.70483035813646,
      "grad_norm": 0.4508405327796936,
      "learning_rate": 6.633764488050448e-06,
      "loss": 0.0182,
      "step": 1041740
    },
    {
      "epoch": 1.704863088575113,
      "grad_norm": 0.34701472520828247,
      "learning_rate": 6.63369859583693e-06,
      "loss": 0.0187,
      "step": 1041760
    },
    {
      "epoch": 1.7048958190137664,
      "grad_norm": 2.0083961486816406,
      "learning_rate": 6.633632703623414e-06,
      "loss": 0.0165,
      "step": 1041780
    },
    {
      "epoch": 1.7049285494524198,
      "grad_norm": 3.0675101280212402,
      "learning_rate": 6.6335668114098965e-06,
      "loss": 0.0143,
      "step": 1041800
    },
    {
      "epoch": 1.704961279891073,
      "grad_norm": 0.8255035281181335,
      "learning_rate": 6.633500919196379e-06,
      "loss": 0.0253,
      "step": 1041820
    },
    {
      "epoch": 1.7049940103297265,
      "grad_norm": 0.8760560154914856,
      "learning_rate": 6.633435026982862e-06,
      "loss": 0.0231,
      "step": 1041840
    },
    {
      "epoch": 1.7050267407683797,
      "grad_norm": 0.5784514546394348,
      "learning_rate": 6.6333691347693456e-06,
      "loss": 0.014,
      "step": 1041860
    },
    {
      "epoch": 1.7050594712070333,
      "grad_norm": 0.5696979761123657,
      "learning_rate": 6.6333032425558275e-06,
      "loss": 0.0192,
      "step": 1041880
    },
    {
      "epoch": 1.7050922016456864,
      "grad_norm": 0.5047650337219238,
      "learning_rate": 6.633237350342311e-06,
      "loss": 0.018,
      "step": 1041900
    },
    {
      "epoch": 1.7051249320843398,
      "grad_norm": 0.33895888924598694,
      "learning_rate": 6.633171458128793e-06,
      "loss": 0.0178,
      "step": 1041920
    },
    {
      "epoch": 1.7051576625229932,
      "grad_norm": 0.5200002193450928,
      "learning_rate": 6.6331055659152765e-06,
      "loss": 0.0148,
      "step": 1041940
    },
    {
      "epoch": 1.7051903929616463,
      "grad_norm": 0.5481671094894409,
      "learning_rate": 6.633039673701758e-06,
      "loss": 0.0181,
      "step": 1041960
    },
    {
      "epoch": 1.7052231234003,
      "grad_norm": 0.5479201674461365,
      "learning_rate": 6.632973781488242e-06,
      "loss": 0.0216,
      "step": 1041980
    },
    {
      "epoch": 1.705255853838953,
      "grad_norm": 0.3965676426887512,
      "learning_rate": 6.632907889274725e-06,
      "loss": 0.0179,
      "step": 1042000
    },
    {
      "epoch": 1.7052885842776064,
      "grad_norm": 1.0387808084487915,
      "learning_rate": 6.632841997061208e-06,
      "loss": 0.016,
      "step": 1042020
    },
    {
      "epoch": 1.7053213147162598,
      "grad_norm": 0.3397187888622284,
      "learning_rate": 6.63277610484769e-06,
      "loss": 0.0149,
      "step": 1042040
    },
    {
      "epoch": 1.7053540451549132,
      "grad_norm": 0.08854443579912186,
      "learning_rate": 6.632710212634174e-06,
      "loss": 0.0204,
      "step": 1042060
    },
    {
      "epoch": 1.7053867755935666,
      "grad_norm": 3.046703815460205,
      "learning_rate": 6.632644320420657e-06,
      "loss": 0.0212,
      "step": 1042080
    },
    {
      "epoch": 1.7054195060322197,
      "grad_norm": 0.918813169002533,
      "learning_rate": 6.632578428207139e-06,
      "loss": 0.0132,
      "step": 1042100
    },
    {
      "epoch": 1.7054522364708733,
      "grad_norm": 0.21161410212516785,
      "learning_rate": 6.632512535993623e-06,
      "loss": 0.0164,
      "step": 1042120
    },
    {
      "epoch": 1.7054849669095264,
      "grad_norm": 0.38022851943969727,
      "learning_rate": 6.632446643780105e-06,
      "loss": 0.0161,
      "step": 1042140
    },
    {
      "epoch": 1.7055176973481798,
      "grad_norm": 1.2602083683013916,
      "learning_rate": 6.632380751566588e-06,
      "loss": 0.012,
      "step": 1042160
    },
    {
      "epoch": 1.7055504277868332,
      "grad_norm": 0.23803158104419708,
      "learning_rate": 6.63231485935307e-06,
      "loss": 0.0175,
      "step": 1042180
    },
    {
      "epoch": 1.7055831582254866,
      "grad_norm": 0.12172214686870575,
      "learning_rate": 6.632248967139554e-06,
      "loss": 0.023,
      "step": 1042200
    },
    {
      "epoch": 1.70561588866414,
      "grad_norm": 0.333162397146225,
      "learning_rate": 6.6321830749260366e-06,
      "loss": 0.0168,
      "step": 1042220
    },
    {
      "epoch": 1.705648619102793,
      "grad_norm": 0.7864664793014526,
      "learning_rate": 6.632117182712519e-06,
      "loss": 0.0209,
      "step": 1042240
    },
    {
      "epoch": 1.7056813495414467,
      "grad_norm": 0.40045613050460815,
      "learning_rate": 6.632051290499002e-06,
      "loss": 0.0167,
      "step": 1042260
    },
    {
      "epoch": 1.7057140799800998,
      "grad_norm": 0.25757575035095215,
      "learning_rate": 6.631985398285486e-06,
      "loss": 0.0235,
      "step": 1042280
    },
    {
      "epoch": 1.7057468104187532,
      "grad_norm": 0.2160419523715973,
      "learning_rate": 6.6319195060719675e-06,
      "loss": 0.0201,
      "step": 1042300
    },
    {
      "epoch": 1.7057795408574066,
      "grad_norm": 0.26973336935043335,
      "learning_rate": 6.631853613858451e-06,
      "loss": 0.018,
      "step": 1042320
    },
    {
      "epoch": 1.70581227129606,
      "grad_norm": 0.1486709862947464,
      "learning_rate": 6.631787721644933e-06,
      "loss": 0.0182,
      "step": 1042340
    },
    {
      "epoch": 1.7058450017347133,
      "grad_norm": 1.1402935981750488,
      "learning_rate": 6.631721829431417e-06,
      "loss": 0.0248,
      "step": 1042360
    },
    {
      "epoch": 1.7058777321733665,
      "grad_norm": 0.2169683575630188,
      "learning_rate": 6.631655937217899e-06,
      "loss": 0.0114,
      "step": 1042380
    },
    {
      "epoch": 1.70591046261202,
      "grad_norm": 0.893340528011322,
      "learning_rate": 6.631590045004382e-06,
      "loss": 0.0195,
      "step": 1042400
    },
    {
      "epoch": 1.7059431930506732,
      "grad_norm": 0.96736741065979,
      "learning_rate": 6.631524152790865e-06,
      "loss": 0.0238,
      "step": 1042420
    },
    {
      "epoch": 1.7059759234893266,
      "grad_norm": 5.000832557678223,
      "learning_rate": 6.631458260577348e-06,
      "loss": 0.0175,
      "step": 1042440
    },
    {
      "epoch": 1.70600865392798,
      "grad_norm": 0.25518062710762024,
      "learning_rate": 6.631392368363831e-06,
      "loss": 0.0156,
      "step": 1042460
    },
    {
      "epoch": 1.7060413843666333,
      "grad_norm": 1.0704567432403564,
      "learning_rate": 6.631326476150314e-06,
      "loss": 0.0186,
      "step": 1042480
    },
    {
      "epoch": 1.7060741148052867,
      "grad_norm": 0.1453082412481308,
      "learning_rate": 6.6312605839367975e-06,
      "loss": 0.016,
      "step": 1042500
    },
    {
      "epoch": 1.7061068452439399,
      "grad_norm": 0.3509003221988678,
      "learning_rate": 6.631194691723279e-06,
      "loss": 0.0141,
      "step": 1042520
    },
    {
      "epoch": 1.7061395756825934,
      "grad_norm": 0.15961608290672302,
      "learning_rate": 6.631128799509763e-06,
      "loss": 0.0169,
      "step": 1042540
    },
    {
      "epoch": 1.7061723061212466,
      "grad_norm": 0.2814919352531433,
      "learning_rate": 6.631062907296245e-06,
      "loss": 0.0171,
      "step": 1042560
    },
    {
      "epoch": 1.7062050365599,
      "grad_norm": 0.5396407842636108,
      "learning_rate": 6.6309970150827284e-06,
      "loss": 0.0263,
      "step": 1042580
    },
    {
      "epoch": 1.7062377669985533,
      "grad_norm": 0.11962093412876129,
      "learning_rate": 6.630931122869211e-06,
      "loss": 0.018,
      "step": 1042600
    },
    {
      "epoch": 1.7062704974372065,
      "grad_norm": 0.19502806663513184,
      "learning_rate": 6.630865230655694e-06,
      "loss": 0.0111,
      "step": 1042620
    },
    {
      "epoch": 1.70630322787586,
      "grad_norm": 0.11266293376684189,
      "learning_rate": 6.630799338442177e-06,
      "loss": 0.0159,
      "step": 1042640
    },
    {
      "epoch": 1.7063359583145132,
      "grad_norm": 0.17559851706027985,
      "learning_rate": 6.63073344622866e-06,
      "loss": 0.0172,
      "step": 1042660
    },
    {
      "epoch": 1.7063686887531668,
      "grad_norm": 0.24468788504600525,
      "learning_rate": 6.630667554015142e-06,
      "loss": 0.0127,
      "step": 1042680
    },
    {
      "epoch": 1.70640141919182,
      "grad_norm": 0.42188701033592224,
      "learning_rate": 6.630601661801626e-06,
      "loss": 0.0268,
      "step": 1042700
    },
    {
      "epoch": 1.7064341496304734,
      "grad_norm": 0.8778553605079651,
      "learning_rate": 6.630535769588108e-06,
      "loss": 0.0271,
      "step": 1042720
    },
    {
      "epoch": 1.7064668800691267,
      "grad_norm": 0.045464422553777695,
      "learning_rate": 6.630469877374591e-06,
      "loss": 0.0165,
      "step": 1042740
    },
    {
      "epoch": 1.7064996105077799,
      "grad_norm": 0.37031224370002747,
      "learning_rate": 6.630403985161074e-06,
      "loss": 0.0165,
      "step": 1042760
    },
    {
      "epoch": 1.7065323409464335,
      "grad_norm": 0.40384921431541443,
      "learning_rate": 6.630338092947557e-06,
      "loss": 0.0217,
      "step": 1042780
    },
    {
      "epoch": 1.7065650713850866,
      "grad_norm": 0.30201256275177,
      "learning_rate": 6.63027220073404e-06,
      "loss": 0.0184,
      "step": 1042800
    },
    {
      "epoch": 1.70659780182374,
      "grad_norm": 0.5998997688293457,
      "learning_rate": 6.630206308520523e-06,
      "loss": 0.0156,
      "step": 1042820
    },
    {
      "epoch": 1.7066305322623934,
      "grad_norm": 0.3523307740688324,
      "learning_rate": 6.630140416307006e-06,
      "loss": 0.0253,
      "step": 1042840
    },
    {
      "epoch": 1.7066632627010467,
      "grad_norm": 0.5676623582839966,
      "learning_rate": 6.6300745240934885e-06,
      "loss": 0.0224,
      "step": 1042860
    },
    {
      "epoch": 1.7066959931397,
      "grad_norm": 0.4275737702846527,
      "learning_rate": 6.630008631879972e-06,
      "loss": 0.017,
      "step": 1042880
    },
    {
      "epoch": 1.7067287235783533,
      "grad_norm": 0.5254017114639282,
      "learning_rate": 6.629942739666454e-06,
      "loss": 0.0206,
      "step": 1042900
    },
    {
      "epoch": 1.7067614540170069,
      "grad_norm": 0.8952383995056152,
      "learning_rate": 6.6298768474529375e-06,
      "loss": 0.0203,
      "step": 1042920
    },
    {
      "epoch": 1.70679418445566,
      "grad_norm": 0.6972198486328125,
      "learning_rate": 6.6298109552394194e-06,
      "loss": 0.0184,
      "step": 1042940
    },
    {
      "epoch": 1.7068269148943134,
      "grad_norm": 0.4115470051765442,
      "learning_rate": 6.629745063025903e-06,
      "loss": 0.0155,
      "step": 1042960
    },
    {
      "epoch": 1.7068596453329667,
      "grad_norm": 0.967130184173584,
      "learning_rate": 6.629679170812385e-06,
      "loss": 0.0202,
      "step": 1042980
    },
    {
      "epoch": 1.7068923757716201,
      "grad_norm": 1.3280620574951172,
      "learning_rate": 6.6296132785988685e-06,
      "loss": 0.0135,
      "step": 1043000
    },
    {
      "epoch": 1.7069251062102735,
      "grad_norm": 0.7901796698570251,
      "learning_rate": 6.629547386385351e-06,
      "loss": 0.0205,
      "step": 1043020
    },
    {
      "epoch": 1.7069578366489266,
      "grad_norm": 0.5458583235740662,
      "learning_rate": 6.629481494171834e-06,
      "loss": 0.0301,
      "step": 1043040
    },
    {
      "epoch": 1.7069905670875802,
      "grad_norm": 0.3591777980327606,
      "learning_rate": 6.629415601958317e-06,
      "loss": 0.0179,
      "step": 1043060
    },
    {
      "epoch": 1.7070232975262334,
      "grad_norm": 0.29320189356803894,
      "learning_rate": 6.6293497097448e-06,
      "loss": 0.0142,
      "step": 1043080
    },
    {
      "epoch": 1.7070560279648868,
      "grad_norm": 0.5158866047859192,
      "learning_rate": 6.629283817531282e-06,
      "loss": 0.0142,
      "step": 1043100
    },
    {
      "epoch": 1.7070887584035401,
      "grad_norm": 0.8719939589500427,
      "learning_rate": 6.629217925317766e-06,
      "loss": 0.0119,
      "step": 1043120
    },
    {
      "epoch": 1.7071214888421935,
      "grad_norm": 0.33913472294807434,
      "learning_rate": 6.629152033104249e-06,
      "loss": 0.0172,
      "step": 1043140
    },
    {
      "epoch": 1.7071542192808469,
      "grad_norm": 0.3296651542186737,
      "learning_rate": 6.629086140890731e-06,
      "loss": 0.0241,
      "step": 1043160
    },
    {
      "epoch": 1.7071869497195,
      "grad_norm": 0.5120207667350769,
      "learning_rate": 6.629020248677215e-06,
      "loss": 0.0254,
      "step": 1043180
    },
    {
      "epoch": 1.7072196801581536,
      "grad_norm": 0.29688841104507446,
      "learning_rate": 6.628954356463697e-06,
      "loss": 0.0212,
      "step": 1043200
    },
    {
      "epoch": 1.7072524105968068,
      "grad_norm": 1.8357728719711304,
      "learning_rate": 6.62888846425018e-06,
      "loss": 0.0232,
      "step": 1043220
    },
    {
      "epoch": 1.7072851410354601,
      "grad_norm": 0.7468437552452087,
      "learning_rate": 6.628822572036663e-06,
      "loss": 0.02,
      "step": 1043240
    },
    {
      "epoch": 1.7073178714741135,
      "grad_norm": 1.356086015701294,
      "learning_rate": 6.628756679823146e-06,
      "loss": 0.0213,
      "step": 1043260
    },
    {
      "epoch": 1.7073506019127669,
      "grad_norm": 0.42122146487236023,
      "learning_rate": 6.6286907876096286e-06,
      "loss": 0.0261,
      "step": 1043280
    },
    {
      "epoch": 1.7073833323514203,
      "grad_norm": 0.5377969741821289,
      "learning_rate": 6.628624895396112e-06,
      "loss": 0.015,
      "step": 1043300
    },
    {
      "epoch": 1.7074160627900734,
      "grad_norm": 0.5378513932228088,
      "learning_rate": 6.628559003182594e-06,
      "loss": 0.0214,
      "step": 1043320
    },
    {
      "epoch": 1.707448793228727,
      "grad_norm": 0.7644886374473572,
      "learning_rate": 6.628493110969078e-06,
      "loss": 0.0149,
      "step": 1043340
    },
    {
      "epoch": 1.7074815236673802,
      "grad_norm": 0.09223797172307968,
      "learning_rate": 6.6284272187555595e-06,
      "loss": 0.0207,
      "step": 1043360
    },
    {
      "epoch": 1.7075142541060335,
      "grad_norm": 2.4818737506866455,
      "learning_rate": 6.628361326542043e-06,
      "loss": 0.0207,
      "step": 1043380
    },
    {
      "epoch": 1.707546984544687,
      "grad_norm": 0.8166514039039612,
      "learning_rate": 6.628295434328526e-06,
      "loss": 0.0242,
      "step": 1043400
    },
    {
      "epoch": 1.70757971498334,
      "grad_norm": 0.07596377283334732,
      "learning_rate": 6.628229542115009e-06,
      "loss": 0.0152,
      "step": 1043420
    },
    {
      "epoch": 1.7076124454219936,
      "grad_norm": 0.43122613430023193,
      "learning_rate": 6.628163649901491e-06,
      "loss": 0.0129,
      "step": 1043440
    },
    {
      "epoch": 1.7076451758606468,
      "grad_norm": 0.7091179490089417,
      "learning_rate": 6.628097757687975e-06,
      "loss": 0.0153,
      "step": 1043460
    },
    {
      "epoch": 1.7076779062993004,
      "grad_norm": 0.29877784848213196,
      "learning_rate": 6.628031865474457e-06,
      "loss": 0.0156,
      "step": 1043480
    },
    {
      "epoch": 1.7077106367379535,
      "grad_norm": 1.162663459777832,
      "learning_rate": 6.62796597326094e-06,
      "loss": 0.0214,
      "step": 1043500
    },
    {
      "epoch": 1.707743367176607,
      "grad_norm": 0.34836554527282715,
      "learning_rate": 6.627900081047424e-06,
      "loss": 0.0142,
      "step": 1043520
    },
    {
      "epoch": 1.7077760976152603,
      "grad_norm": 0.2931387424468994,
      "learning_rate": 6.627834188833906e-06,
      "loss": 0.0116,
      "step": 1043540
    },
    {
      "epoch": 1.7078088280539134,
      "grad_norm": 0.20405863225460052,
      "learning_rate": 6.6277682966203895e-06,
      "loss": 0.0144,
      "step": 1043560
    },
    {
      "epoch": 1.707841558492567,
      "grad_norm": 0.1910073608160019,
      "learning_rate": 6.627702404406871e-06,
      "loss": 0.0186,
      "step": 1043580
    },
    {
      "epoch": 1.7078742889312202,
      "grad_norm": 0.4153026342391968,
      "learning_rate": 6.627636512193355e-06,
      "loss": 0.0151,
      "step": 1043600
    },
    {
      "epoch": 1.7079070193698735,
      "grad_norm": 2.448495388031006,
      "learning_rate": 6.627570619979838e-06,
      "loss": 0.014,
      "step": 1043620
    },
    {
      "epoch": 1.707939749808527,
      "grad_norm": 0.3466629087924957,
      "learning_rate": 6.62750472776632e-06,
      "loss": 0.0206,
      "step": 1043640
    },
    {
      "epoch": 1.7079724802471803,
      "grad_norm": 0.2169138342142105,
      "learning_rate": 6.627438835552803e-06,
      "loss": 0.0076,
      "step": 1043660
    },
    {
      "epoch": 1.7080052106858337,
      "grad_norm": 0.8399973511695862,
      "learning_rate": 6.627372943339287e-06,
      "loss": 0.0235,
      "step": 1043680
    },
    {
      "epoch": 1.7080379411244868,
      "grad_norm": 0.10699029266834259,
      "learning_rate": 6.627307051125769e-06,
      "loss": 0.0142,
      "step": 1043700
    },
    {
      "epoch": 1.7080706715631404,
      "grad_norm": 0.9823867082595825,
      "learning_rate": 6.627241158912252e-06,
      "loss": 0.0174,
      "step": 1043720
    },
    {
      "epoch": 1.7081034020017936,
      "grad_norm": 0.32297441363334656,
      "learning_rate": 6.627175266698734e-06,
      "loss": 0.0191,
      "step": 1043740
    },
    {
      "epoch": 1.708136132440447,
      "grad_norm": 0.26760736107826233,
      "learning_rate": 6.627109374485218e-06,
      "loss": 0.0174,
      "step": 1043760
    },
    {
      "epoch": 1.7081688628791003,
      "grad_norm": 0.6454697847366333,
      "learning_rate": 6.6270434822717004e-06,
      "loss": 0.0151,
      "step": 1043780
    },
    {
      "epoch": 1.7082015933177537,
      "grad_norm": 2.079927682876587,
      "learning_rate": 6.626977590058183e-06,
      "loss": 0.0204,
      "step": 1043800
    },
    {
      "epoch": 1.708234323756407,
      "grad_norm": 0.7113078832626343,
      "learning_rate": 6.626911697844666e-06,
      "loss": 0.0169,
      "step": 1043820
    },
    {
      "epoch": 1.7082670541950602,
      "grad_norm": 1.1802717447280884,
      "learning_rate": 6.6268458056311495e-06,
      "loss": 0.0208,
      "step": 1043840
    },
    {
      "epoch": 1.7082997846337138,
      "grad_norm": 0.23484623432159424,
      "learning_rate": 6.626779913417632e-06,
      "loss": 0.0238,
      "step": 1043860
    },
    {
      "epoch": 1.708332515072367,
      "grad_norm": 0.27153560519218445,
      "learning_rate": 6.626714021204115e-06,
      "loss": 0.0187,
      "step": 1043880
    },
    {
      "epoch": 1.7083652455110203,
      "grad_norm": 0.2605818808078766,
      "learning_rate": 6.6266481289905986e-06,
      "loss": 0.0173,
      "step": 1043900
    },
    {
      "epoch": 1.7083979759496737,
      "grad_norm": 0.6252900958061218,
      "learning_rate": 6.6265822367770805e-06,
      "loss": 0.0306,
      "step": 1043920
    },
    {
      "epoch": 1.708430706388327,
      "grad_norm": 0.09465247392654419,
      "learning_rate": 6.626516344563564e-06,
      "loss": 0.022,
      "step": 1043940
    },
    {
      "epoch": 1.7084634368269804,
      "grad_norm": 0.7701157927513123,
      "learning_rate": 6.626450452350046e-06,
      "loss": 0.0192,
      "step": 1043960
    },
    {
      "epoch": 1.7084961672656336,
      "grad_norm": 0.3785337805747986,
      "learning_rate": 6.6263845601365295e-06,
      "loss": 0.0139,
      "step": 1043980
    },
    {
      "epoch": 1.7085288977042872,
      "grad_norm": 0.4476686716079712,
      "learning_rate": 6.626318667923011e-06,
      "loss": 0.0169,
      "step": 1044000
    },
    {
      "epoch": 1.7085616281429403,
      "grad_norm": 0.20479106903076172,
      "learning_rate": 6.626252775709495e-06,
      "loss": 0.0192,
      "step": 1044020
    },
    {
      "epoch": 1.7085943585815937,
      "grad_norm": 0.7202835083007812,
      "learning_rate": 6.626186883495978e-06,
      "loss": 0.0196,
      "step": 1044040
    },
    {
      "epoch": 1.708627089020247,
      "grad_norm": 0.24379083514213562,
      "learning_rate": 6.6261209912824605e-06,
      "loss": 0.0224,
      "step": 1044060
    },
    {
      "epoch": 1.7086598194589004,
      "grad_norm": 0.9025928974151611,
      "learning_rate": 6.626055099068943e-06,
      "loss": 0.0177,
      "step": 1044080
    },
    {
      "epoch": 1.7086925498975538,
      "grad_norm": 0.14671260118484497,
      "learning_rate": 6.625989206855427e-06,
      "loss": 0.0182,
      "step": 1044100
    },
    {
      "epoch": 1.708725280336207,
      "grad_norm": 0.730423629283905,
      "learning_rate": 6.625923314641909e-06,
      "loss": 0.0144,
      "step": 1044120
    },
    {
      "epoch": 1.7087580107748606,
      "grad_norm": 0.37269923090934753,
      "learning_rate": 6.625857422428392e-06,
      "loss": 0.0199,
      "step": 1044140
    },
    {
      "epoch": 1.7087907412135137,
      "grad_norm": 0.6358860731124878,
      "learning_rate": 6.625791530214874e-06,
      "loss": 0.0166,
      "step": 1044160
    },
    {
      "epoch": 1.708823471652167,
      "grad_norm": 0.5678154826164246,
      "learning_rate": 6.625725638001358e-06,
      "loss": 0.0208,
      "step": 1044180
    },
    {
      "epoch": 1.7088562020908205,
      "grad_norm": 0.3886919617652893,
      "learning_rate": 6.625659745787841e-06,
      "loss": 0.0143,
      "step": 1044200
    },
    {
      "epoch": 1.7088889325294736,
      "grad_norm": 0.3102293908596039,
      "learning_rate": 6.625593853574323e-06,
      "loss": 0.0202,
      "step": 1044220
    },
    {
      "epoch": 1.7089216629681272,
      "grad_norm": 0.09471011161804199,
      "learning_rate": 6.625527961360807e-06,
      "loss": 0.0247,
      "step": 1044240
    },
    {
      "epoch": 1.7089543934067803,
      "grad_norm": 0.349552720785141,
      "learning_rate": 6.62546206914729e-06,
      "loss": 0.0229,
      "step": 1044260
    },
    {
      "epoch": 1.708987123845434,
      "grad_norm": 0.5373296141624451,
      "learning_rate": 6.625396176933772e-06,
      "loss": 0.0141,
      "step": 1044280
    },
    {
      "epoch": 1.709019854284087,
      "grad_norm": 0.4471889138221741,
      "learning_rate": 6.625330284720255e-06,
      "loss": 0.0174,
      "step": 1044300
    },
    {
      "epoch": 1.7090525847227405,
      "grad_norm": 0.7291679382324219,
      "learning_rate": 6.625264392506739e-06,
      "loss": 0.0212,
      "step": 1044320
    },
    {
      "epoch": 1.7090853151613938,
      "grad_norm": 0.6700103282928467,
      "learning_rate": 6.6251985002932205e-06,
      "loss": 0.0221,
      "step": 1044340
    },
    {
      "epoch": 1.709118045600047,
      "grad_norm": 0.4179322421550751,
      "learning_rate": 6.625132608079704e-06,
      "loss": 0.0215,
      "step": 1044360
    },
    {
      "epoch": 1.7091507760387006,
      "grad_norm": 0.2940821051597595,
      "learning_rate": 6.625066715866186e-06,
      "loss": 0.019,
      "step": 1044380
    },
    {
      "epoch": 1.7091835064773537,
      "grad_norm": 0.6021007895469666,
      "learning_rate": 6.62500082365267e-06,
      "loss": 0.0128,
      "step": 1044400
    },
    {
      "epoch": 1.709216236916007,
      "grad_norm": 1.0427629947662354,
      "learning_rate": 6.624934931439152e-06,
      "loss": 0.02,
      "step": 1044420
    },
    {
      "epoch": 1.7092489673546605,
      "grad_norm": 0.3706618547439575,
      "learning_rate": 6.624869039225635e-06,
      "loss": 0.0146,
      "step": 1044440
    },
    {
      "epoch": 1.7092816977933138,
      "grad_norm": 0.9264219403266907,
      "learning_rate": 6.624803147012118e-06,
      "loss": 0.0154,
      "step": 1044460
    },
    {
      "epoch": 1.7093144282319672,
      "grad_norm": 0.4861947298049927,
      "learning_rate": 6.624737254798601e-06,
      "loss": 0.0174,
      "step": 1044480
    },
    {
      "epoch": 1.7093471586706204,
      "grad_norm": 0.5798776745796204,
      "learning_rate": 6.624671362585083e-06,
      "loss": 0.0255,
      "step": 1044500
    },
    {
      "epoch": 1.709379889109274,
      "grad_norm": 0.5297806262969971,
      "learning_rate": 6.624605470371567e-06,
      "loss": 0.0302,
      "step": 1044520
    },
    {
      "epoch": 1.7094126195479271,
      "grad_norm": 0.7365506887435913,
      "learning_rate": 6.6245395781580505e-06,
      "loss": 0.0168,
      "step": 1044540
    },
    {
      "epoch": 1.7094453499865805,
      "grad_norm": 0.7189943790435791,
      "learning_rate": 6.624473685944532e-06,
      "loss": 0.024,
      "step": 1044560
    },
    {
      "epoch": 1.7094780804252339,
      "grad_norm": 0.7976639270782471,
      "learning_rate": 6.624407793731016e-06,
      "loss": 0.0163,
      "step": 1044580
    },
    {
      "epoch": 1.7095108108638872,
      "grad_norm": 0.6769176721572876,
      "learning_rate": 6.624341901517498e-06,
      "loss": 0.0139,
      "step": 1044600
    },
    {
      "epoch": 1.7095435413025406,
      "grad_norm": 0.5823577642440796,
      "learning_rate": 6.6242760093039814e-06,
      "loss": 0.0304,
      "step": 1044620
    },
    {
      "epoch": 1.7095762717411938,
      "grad_norm": 0.204702228307724,
      "learning_rate": 6.624210117090464e-06,
      "loss": 0.0219,
      "step": 1044640
    },
    {
      "epoch": 1.7096090021798473,
      "grad_norm": 0.39292842149734497,
      "learning_rate": 6.624144224876947e-06,
      "loss": 0.0303,
      "step": 1044660
    },
    {
      "epoch": 1.7096417326185005,
      "grad_norm": 0.32999521493911743,
      "learning_rate": 6.62407833266343e-06,
      "loss": 0.0214,
      "step": 1044680
    },
    {
      "epoch": 1.7096744630571539,
      "grad_norm": 0.8693190217018127,
      "learning_rate": 6.624012440449913e-06,
      "loss": 0.0238,
      "step": 1044700
    },
    {
      "epoch": 1.7097071934958072,
      "grad_norm": 0.4177311956882477,
      "learning_rate": 6.623946548236395e-06,
      "loss": 0.0179,
      "step": 1044720
    },
    {
      "epoch": 1.7097399239344606,
      "grad_norm": 0.5986769795417786,
      "learning_rate": 6.623880656022879e-06,
      "loss": 0.0226,
      "step": 1044740
    },
    {
      "epoch": 1.709772654373114,
      "grad_norm": 0.30091744661331177,
      "learning_rate": 6.623814763809361e-06,
      "loss": 0.0214,
      "step": 1044760
    },
    {
      "epoch": 1.7098053848117671,
      "grad_norm": 1.2138043642044067,
      "learning_rate": 6.623748871595844e-06,
      "loss": 0.0245,
      "step": 1044780
    },
    {
      "epoch": 1.7098381152504207,
      "grad_norm": 0.23674872517585754,
      "learning_rate": 6.623682979382327e-06,
      "loss": 0.0152,
      "step": 1044800
    },
    {
      "epoch": 1.7098708456890739,
      "grad_norm": 1.2622064352035522,
      "learning_rate": 6.62361708716881e-06,
      "loss": 0.0236,
      "step": 1044820
    },
    {
      "epoch": 1.7099035761277273,
      "grad_norm": 0.5210122466087341,
      "learning_rate": 6.623551194955292e-06,
      "loss": 0.0167,
      "step": 1044840
    },
    {
      "epoch": 1.7099363065663806,
      "grad_norm": 0.5207340121269226,
      "learning_rate": 6.623485302741776e-06,
      "loss": 0.0189,
      "step": 1044860
    },
    {
      "epoch": 1.7099690370050338,
      "grad_norm": 1.0898247957229614,
      "learning_rate": 6.623419410528258e-06,
      "loss": 0.0198,
      "step": 1044880
    },
    {
      "epoch": 1.7100017674436874,
      "grad_norm": 1.8293009996414185,
      "learning_rate": 6.6233535183147415e-06,
      "loss": 0.0164,
      "step": 1044900
    },
    {
      "epoch": 1.7100344978823405,
      "grad_norm": 0.6339666247367859,
      "learning_rate": 6.623287626101225e-06,
      "loss": 0.018,
      "step": 1044920
    },
    {
      "epoch": 1.7100672283209941,
      "grad_norm": 0.13799455761909485,
      "learning_rate": 6.623221733887707e-06,
      "loss": 0.0146,
      "step": 1044940
    },
    {
      "epoch": 1.7100999587596473,
      "grad_norm": 0.4017387330532074,
      "learning_rate": 6.6231558416741906e-06,
      "loss": 0.0224,
      "step": 1044960
    },
    {
      "epoch": 1.7101326891983006,
      "grad_norm": 0.33680132031440735,
      "learning_rate": 6.6230899494606724e-06,
      "loss": 0.0287,
      "step": 1044980
    },
    {
      "epoch": 1.710165419636954,
      "grad_norm": 0.4273688495159149,
      "learning_rate": 6.623024057247156e-06,
      "loss": 0.0164,
      "step": 1045000
    },
    {
      "epoch": 1.7101981500756072,
      "grad_norm": 0.5288284420967102,
      "learning_rate": 6.622958165033638e-06,
      "loss": 0.015,
      "step": 1045020
    },
    {
      "epoch": 1.7102308805142608,
      "grad_norm": 0.3348469138145447,
      "learning_rate": 6.6228922728201215e-06,
      "loss": 0.028,
      "step": 1045040
    },
    {
      "epoch": 1.710263610952914,
      "grad_norm": 0.1529722660779953,
      "learning_rate": 6.622826380606604e-06,
      "loss": 0.022,
      "step": 1045060
    },
    {
      "epoch": 1.7102963413915673,
      "grad_norm": 0.9598711729049683,
      "learning_rate": 6.622760488393087e-06,
      "loss": 0.0158,
      "step": 1045080
    },
    {
      "epoch": 1.7103290718302206,
      "grad_norm": 1.8487600088119507,
      "learning_rate": 6.62269459617957e-06,
      "loss": 0.0183,
      "step": 1045100
    },
    {
      "epoch": 1.710361802268874,
      "grad_norm": 0.5261308550834656,
      "learning_rate": 6.622628703966053e-06,
      "loss": 0.0173,
      "step": 1045120
    },
    {
      "epoch": 1.7103945327075274,
      "grad_norm": 0.22190529108047485,
      "learning_rate": 6.622562811752535e-06,
      "loss": 0.0169,
      "step": 1045140
    },
    {
      "epoch": 1.7104272631461805,
      "grad_norm": 0.3623253405094147,
      "learning_rate": 6.622496919539019e-06,
      "loss": 0.0293,
      "step": 1045160
    },
    {
      "epoch": 1.7104599935848341,
      "grad_norm": 0.32460817694664,
      "learning_rate": 6.622431027325501e-06,
      "loss": 0.0149,
      "step": 1045180
    },
    {
      "epoch": 1.7104927240234873,
      "grad_norm": 0.5604182481765747,
      "learning_rate": 6.622365135111984e-06,
      "loss": 0.0175,
      "step": 1045200
    },
    {
      "epoch": 1.7105254544621407,
      "grad_norm": 0.1641550064086914,
      "learning_rate": 6.622299242898467e-06,
      "loss": 0.0199,
      "step": 1045220
    },
    {
      "epoch": 1.710558184900794,
      "grad_norm": 0.20330095291137695,
      "learning_rate": 6.62223335068495e-06,
      "loss": 0.0154,
      "step": 1045240
    },
    {
      "epoch": 1.7105909153394474,
      "grad_norm": 1.9594999551773071,
      "learning_rate": 6.622167458471433e-06,
      "loss": 0.0144,
      "step": 1045260
    },
    {
      "epoch": 1.7106236457781008,
      "grad_norm": 1.0695768594741821,
      "learning_rate": 6.622101566257916e-06,
      "loss": 0.0202,
      "step": 1045280
    },
    {
      "epoch": 1.710656376216754,
      "grad_norm": 0.5862858295440674,
      "learning_rate": 6.622035674044399e-06,
      "loss": 0.0266,
      "step": 1045300
    },
    {
      "epoch": 1.7106891066554075,
      "grad_norm": 0.2248746007680893,
      "learning_rate": 6.6219697818308816e-06,
      "loss": 0.0224,
      "step": 1045320
    },
    {
      "epoch": 1.7107218370940607,
      "grad_norm": 1.2732646465301514,
      "learning_rate": 6.621903889617365e-06,
      "loss": 0.0135,
      "step": 1045340
    },
    {
      "epoch": 1.710754567532714,
      "grad_norm": 0.6801916360855103,
      "learning_rate": 6.621837997403847e-06,
      "loss": 0.0152,
      "step": 1045360
    },
    {
      "epoch": 1.7107872979713674,
      "grad_norm": 0.9451360106468201,
      "learning_rate": 6.621772105190331e-06,
      "loss": 0.0242,
      "step": 1045380
    },
    {
      "epoch": 1.7108200284100208,
      "grad_norm": 0.5438559055328369,
      "learning_rate": 6.6217062129768125e-06,
      "loss": 0.0193,
      "step": 1045400
    },
    {
      "epoch": 1.7108527588486742,
      "grad_norm": 0.4463057816028595,
      "learning_rate": 6.621640320763296e-06,
      "loss": 0.0235,
      "step": 1045420
    },
    {
      "epoch": 1.7108854892873273,
      "grad_norm": 1.1050158739089966,
      "learning_rate": 6.621574428549779e-06,
      "loss": 0.0161,
      "step": 1045440
    },
    {
      "epoch": 1.710918219725981,
      "grad_norm": 1.1326662302017212,
      "learning_rate": 6.621508536336262e-06,
      "loss": 0.0216,
      "step": 1045460
    },
    {
      "epoch": 1.710950950164634,
      "grad_norm": 0.34450867772102356,
      "learning_rate": 6.621442644122744e-06,
      "loss": 0.0147,
      "step": 1045480
    },
    {
      "epoch": 1.7109836806032874,
      "grad_norm": 1.2570631504058838,
      "learning_rate": 6.621376751909228e-06,
      "loss": 0.0266,
      "step": 1045500
    },
    {
      "epoch": 1.7110164110419408,
      "grad_norm": 0.5633193254470825,
      "learning_rate": 6.62131085969571e-06,
      "loss": 0.0198,
      "step": 1045520
    },
    {
      "epoch": 1.7110491414805942,
      "grad_norm": 0.1182020977139473,
      "learning_rate": 6.621244967482193e-06,
      "loss": 0.0139,
      "step": 1045540
    },
    {
      "epoch": 1.7110818719192475,
      "grad_norm": 0.4009564518928528,
      "learning_rate": 6.621179075268675e-06,
      "loss": 0.0205,
      "step": 1045560
    },
    {
      "epoch": 1.7111146023579007,
      "grad_norm": 0.6712831258773804,
      "learning_rate": 6.621113183055159e-06,
      "loss": 0.0195,
      "step": 1045580
    },
    {
      "epoch": 1.7111473327965543,
      "grad_norm": 0.3364197909832001,
      "learning_rate": 6.6210472908416425e-06,
      "loss": 0.0233,
      "step": 1045600
    },
    {
      "epoch": 1.7111800632352074,
      "grad_norm": 0.3064727187156677,
      "learning_rate": 6.620981398628124e-06,
      "loss": 0.0177,
      "step": 1045620
    },
    {
      "epoch": 1.7112127936738608,
      "grad_norm": 1.9546693563461304,
      "learning_rate": 6.620915506414608e-06,
      "loss": 0.0204,
      "step": 1045640
    },
    {
      "epoch": 1.7112455241125142,
      "grad_norm": 1.4331703186035156,
      "learning_rate": 6.620849614201091e-06,
      "loss": 0.0248,
      "step": 1045660
    },
    {
      "epoch": 1.7112782545511673,
      "grad_norm": 0.3132253885269165,
      "learning_rate": 6.620783721987573e-06,
      "loss": 0.0218,
      "step": 1045680
    },
    {
      "epoch": 1.711310984989821,
      "grad_norm": 0.4459855258464813,
      "learning_rate": 6.620717829774056e-06,
      "loss": 0.0223,
      "step": 1045700
    },
    {
      "epoch": 1.711343715428474,
      "grad_norm": 0.5308396816253662,
      "learning_rate": 6.62065193756054e-06,
      "loss": 0.0152,
      "step": 1045720
    },
    {
      "epoch": 1.7113764458671277,
      "grad_norm": 0.3585203289985657,
      "learning_rate": 6.620586045347022e-06,
      "loss": 0.0133,
      "step": 1045740
    },
    {
      "epoch": 1.7114091763057808,
      "grad_norm": 1.2257349491119385,
      "learning_rate": 6.620520153133505e-06,
      "loss": 0.0218,
      "step": 1045760
    },
    {
      "epoch": 1.7114419067444342,
      "grad_norm": 0.7347083687782288,
      "learning_rate": 6.620454260919987e-06,
      "loss": 0.022,
      "step": 1045780
    },
    {
      "epoch": 1.7114746371830876,
      "grad_norm": 0.2942938506603241,
      "learning_rate": 6.620388368706471e-06,
      "loss": 0.018,
      "step": 1045800
    },
    {
      "epoch": 1.7115073676217407,
      "grad_norm": 0.1897163689136505,
      "learning_rate": 6.620322476492953e-06,
      "loss": 0.0244,
      "step": 1045820
    },
    {
      "epoch": 1.7115400980603943,
      "grad_norm": 0.4555929899215698,
      "learning_rate": 6.620256584279436e-06,
      "loss": 0.017,
      "step": 1045840
    },
    {
      "epoch": 1.7115728284990475,
      "grad_norm": 0.602419912815094,
      "learning_rate": 6.620190692065919e-06,
      "loss": 0.0146,
      "step": 1045860
    },
    {
      "epoch": 1.7116055589377008,
      "grad_norm": 1.0770519971847534,
      "learning_rate": 6.620124799852402e-06,
      "loss": 0.0245,
      "step": 1045880
    },
    {
      "epoch": 1.7116382893763542,
      "grad_norm": 0.3812788128852844,
      "learning_rate": 6.620058907638884e-06,
      "loss": 0.0155,
      "step": 1045900
    },
    {
      "epoch": 1.7116710198150076,
      "grad_norm": 0.4445393979549408,
      "learning_rate": 6.619993015425368e-06,
      "loss": 0.0149,
      "step": 1045920
    },
    {
      "epoch": 1.711703750253661,
      "grad_norm": 0.4241693317890167,
      "learning_rate": 6.61992712321185e-06,
      "loss": 0.0162,
      "step": 1045940
    },
    {
      "epoch": 1.711736480692314,
      "grad_norm": 0.17867997288703918,
      "learning_rate": 6.6198612309983335e-06,
      "loss": 0.0177,
      "step": 1045960
    },
    {
      "epoch": 1.7117692111309677,
      "grad_norm": 0.28354448080062866,
      "learning_rate": 6.619795338784817e-06,
      "loss": 0.0206,
      "step": 1045980
    },
    {
      "epoch": 1.7118019415696208,
      "grad_norm": 0.2613762617111206,
      "learning_rate": 6.619729446571299e-06,
      "loss": 0.013,
      "step": 1046000
    },
    {
      "epoch": 1.7118346720082742,
      "grad_norm": 0.1379040777683258,
      "learning_rate": 6.6196635543577825e-06,
      "loss": 0.019,
      "step": 1046020
    },
    {
      "epoch": 1.7118674024469276,
      "grad_norm": 0.39066314697265625,
      "learning_rate": 6.6195976621442644e-06,
      "loss": 0.0278,
      "step": 1046040
    },
    {
      "epoch": 1.711900132885581,
      "grad_norm": 0.749451220035553,
      "learning_rate": 6.619531769930748e-06,
      "loss": 0.0167,
      "step": 1046060
    },
    {
      "epoch": 1.7119328633242343,
      "grad_norm": 0.6547715067863464,
      "learning_rate": 6.619465877717231e-06,
      "loss": 0.0188,
      "step": 1046080
    },
    {
      "epoch": 1.7119655937628875,
      "grad_norm": 0.9650917053222656,
      "learning_rate": 6.6193999855037135e-06,
      "loss": 0.0195,
      "step": 1046100
    },
    {
      "epoch": 1.711998324201541,
      "grad_norm": 0.8929306864738464,
      "learning_rate": 6.619334093290196e-06,
      "loss": 0.0203,
      "step": 1046120
    },
    {
      "epoch": 1.7120310546401942,
      "grad_norm": 0.3070638179779053,
      "learning_rate": 6.61926820107668e-06,
      "loss": 0.017,
      "step": 1046140
    },
    {
      "epoch": 1.7120637850788476,
      "grad_norm": 0.42306631803512573,
      "learning_rate": 6.619202308863162e-06,
      "loss": 0.0171,
      "step": 1046160
    },
    {
      "epoch": 1.712096515517501,
      "grad_norm": 0.6040459871292114,
      "learning_rate": 6.619136416649645e-06,
      "loss": 0.0159,
      "step": 1046180
    },
    {
      "epoch": 1.7121292459561543,
      "grad_norm": 0.5334550142288208,
      "learning_rate": 6.619070524436127e-06,
      "loss": 0.0169,
      "step": 1046200
    },
    {
      "epoch": 1.7121619763948077,
      "grad_norm": 0.03593190386891365,
      "learning_rate": 6.619004632222611e-06,
      "loss": 0.0121,
      "step": 1046220
    },
    {
      "epoch": 1.7121947068334609,
      "grad_norm": 0.368307888507843,
      "learning_rate": 6.6189387400090935e-06,
      "loss": 0.0184,
      "step": 1046240
    },
    {
      "epoch": 1.7122274372721145,
      "grad_norm": 0.8680195212364197,
      "learning_rate": 6.618872847795576e-06,
      "loss": 0.0178,
      "step": 1046260
    },
    {
      "epoch": 1.7122601677107676,
      "grad_norm": 1.0652652978897095,
      "learning_rate": 6.618806955582059e-06,
      "loss": 0.0241,
      "step": 1046280
    },
    {
      "epoch": 1.712292898149421,
      "grad_norm": 0.21251527965068817,
      "learning_rate": 6.618741063368543e-06,
      "loss": 0.0244,
      "step": 1046300
    },
    {
      "epoch": 1.7123256285880744,
      "grad_norm": 1.1939709186553955,
      "learning_rate": 6.618675171155025e-06,
      "loss": 0.02,
      "step": 1046320
    },
    {
      "epoch": 1.7123583590267277,
      "grad_norm": 0.12095562368631363,
      "learning_rate": 6.618609278941508e-06,
      "loss": 0.0309,
      "step": 1046340
    },
    {
      "epoch": 1.712391089465381,
      "grad_norm": 0.706855833530426,
      "learning_rate": 6.618543386727992e-06,
      "loss": 0.0154,
      "step": 1046360
    },
    {
      "epoch": 1.7124238199040343,
      "grad_norm": 0.29237455129623413,
      "learning_rate": 6.6184774945144735e-06,
      "loss": 0.0328,
      "step": 1046380
    },
    {
      "epoch": 1.7124565503426878,
      "grad_norm": 0.36482828855514526,
      "learning_rate": 6.618411602300957e-06,
      "loss": 0.0201,
      "step": 1046400
    },
    {
      "epoch": 1.712489280781341,
      "grad_norm": 0.09735061973333359,
      "learning_rate": 6.618345710087439e-06,
      "loss": 0.0177,
      "step": 1046420
    },
    {
      "epoch": 1.7125220112199944,
      "grad_norm": 1.010908603668213,
      "learning_rate": 6.618279817873923e-06,
      "loss": 0.0199,
      "step": 1046440
    },
    {
      "epoch": 1.7125547416586477,
      "grad_norm": 0.4347665309906006,
      "learning_rate": 6.618213925660405e-06,
      "loss": 0.0187,
      "step": 1046460
    },
    {
      "epoch": 1.712587472097301,
      "grad_norm": 1.38359534740448,
      "learning_rate": 6.618148033446888e-06,
      "loss": 0.0143,
      "step": 1046480
    },
    {
      "epoch": 1.7126202025359545,
      "grad_norm": 0.762116014957428,
      "learning_rate": 6.618082141233371e-06,
      "loss": 0.0279,
      "step": 1046500
    },
    {
      "epoch": 1.7126529329746076,
      "grad_norm": 2.3477585315704346,
      "learning_rate": 6.618016249019854e-06,
      "loss": 0.0244,
      "step": 1046520
    },
    {
      "epoch": 1.7126856634132612,
      "grad_norm": 0.9322041273117065,
      "learning_rate": 6.617950356806336e-06,
      "loss": 0.0189,
      "step": 1046540
    },
    {
      "epoch": 1.7127183938519144,
      "grad_norm": 1.1159902811050415,
      "learning_rate": 6.61788446459282e-06,
      "loss": 0.0171,
      "step": 1046560
    },
    {
      "epoch": 1.7127511242905678,
      "grad_norm": 0.688055157661438,
      "learning_rate": 6.617818572379302e-06,
      "loss": 0.0186,
      "step": 1046580
    },
    {
      "epoch": 1.7127838547292211,
      "grad_norm": 0.8081637024879456,
      "learning_rate": 6.617752680165785e-06,
      "loss": 0.0195,
      "step": 1046600
    },
    {
      "epoch": 1.7128165851678743,
      "grad_norm": 0.26184800267219543,
      "learning_rate": 6.617686787952268e-06,
      "loss": 0.0158,
      "step": 1046620
    },
    {
      "epoch": 1.7128493156065279,
      "grad_norm": 0.5886021852493286,
      "learning_rate": 6.617620895738751e-06,
      "loss": 0.0187,
      "step": 1046640
    },
    {
      "epoch": 1.712882046045181,
      "grad_norm": 0.21966727077960968,
      "learning_rate": 6.6175550035252344e-06,
      "loss": 0.0173,
      "step": 1046660
    },
    {
      "epoch": 1.7129147764838344,
      "grad_norm": 0.21976499259471893,
      "learning_rate": 6.617489111311717e-06,
      "loss": 0.0136,
      "step": 1046680
    },
    {
      "epoch": 1.7129475069224878,
      "grad_norm": 0.3084806799888611,
      "learning_rate": 6.6174232190982e-06,
      "loss": 0.0159,
      "step": 1046700
    },
    {
      "epoch": 1.7129802373611411,
      "grad_norm": 0.7262547016143799,
      "learning_rate": 6.617357326884683e-06,
      "loss": 0.0191,
      "step": 1046720
    },
    {
      "epoch": 1.7130129677997945,
      "grad_norm": 0.5035134553909302,
      "learning_rate": 6.617291434671166e-06,
      "loss": 0.0144,
      "step": 1046740
    },
    {
      "epoch": 1.7130456982384477,
      "grad_norm": 0.5635263919830322,
      "learning_rate": 6.617225542457648e-06,
      "loss": 0.011,
      "step": 1046760
    },
    {
      "epoch": 1.7130784286771013,
      "grad_norm": 0.4494272470474243,
      "learning_rate": 6.617159650244132e-06,
      "loss": 0.011,
      "step": 1046780
    },
    {
      "epoch": 1.7131111591157544,
      "grad_norm": 0.17135845124721527,
      "learning_rate": 6.617093758030614e-06,
      "loss": 0.0286,
      "step": 1046800
    },
    {
      "epoch": 1.7131438895544078,
      "grad_norm": 0.9092399477958679,
      "learning_rate": 6.617027865817097e-06,
      "loss": 0.021,
      "step": 1046820
    },
    {
      "epoch": 1.7131766199930611,
      "grad_norm": 0.543479323387146,
      "learning_rate": 6.616961973603579e-06,
      "loss": 0.0115,
      "step": 1046840
    },
    {
      "epoch": 1.7132093504317145,
      "grad_norm": 0.48144400119781494,
      "learning_rate": 6.616896081390063e-06,
      "loss": 0.0137,
      "step": 1046860
    },
    {
      "epoch": 1.713242080870368,
      "grad_norm": 0.78880375623703,
      "learning_rate": 6.6168301891765454e-06,
      "loss": 0.0196,
      "step": 1046880
    },
    {
      "epoch": 1.713274811309021,
      "grad_norm": 1.2008370161056519,
      "learning_rate": 6.616764296963028e-06,
      "loss": 0.0213,
      "step": 1046900
    },
    {
      "epoch": 1.7133075417476746,
      "grad_norm": 0.3711809515953064,
      "learning_rate": 6.616698404749511e-06,
      "loss": 0.013,
      "step": 1046920
    },
    {
      "epoch": 1.7133402721863278,
      "grad_norm": 0.23160023987293243,
      "learning_rate": 6.6166325125359945e-06,
      "loss": 0.0169,
      "step": 1046940
    },
    {
      "epoch": 1.7133730026249812,
      "grad_norm": 0.9203470945358276,
      "learning_rate": 6.616566620322476e-06,
      "loss": 0.02,
      "step": 1046960
    },
    {
      "epoch": 1.7134057330636345,
      "grad_norm": 0.6723145842552185,
      "learning_rate": 6.61650072810896e-06,
      "loss": 0.0191,
      "step": 1046980
    },
    {
      "epoch": 1.713438463502288,
      "grad_norm": 0.12010153383016586,
      "learning_rate": 6.6164348358954436e-06,
      "loss": 0.0182,
      "step": 1047000
    },
    {
      "epoch": 1.7134711939409413,
      "grad_norm": 0.7069386839866638,
      "learning_rate": 6.6163689436819255e-06,
      "loss": 0.0167,
      "step": 1047020
    },
    {
      "epoch": 1.7135039243795944,
      "grad_norm": 0.5722262859344482,
      "learning_rate": 6.616303051468409e-06,
      "loss": 0.0133,
      "step": 1047040
    },
    {
      "epoch": 1.713536654818248,
      "grad_norm": 0.5038803219795227,
      "learning_rate": 6.616237159254891e-06,
      "loss": 0.015,
      "step": 1047060
    },
    {
      "epoch": 1.7135693852569012,
      "grad_norm": 0.7890506982803345,
      "learning_rate": 6.6161712670413745e-06,
      "loss": 0.0143,
      "step": 1047080
    },
    {
      "epoch": 1.7136021156955545,
      "grad_norm": 0.09585043042898178,
      "learning_rate": 6.616105374827857e-06,
      "loss": 0.0179,
      "step": 1047100
    },
    {
      "epoch": 1.713634846134208,
      "grad_norm": 0.05834769457578659,
      "learning_rate": 6.61603948261434e-06,
      "loss": 0.0138,
      "step": 1047120
    },
    {
      "epoch": 1.7136675765728613,
      "grad_norm": 0.5486938953399658,
      "learning_rate": 6.615973590400823e-06,
      "loss": 0.0156,
      "step": 1047140
    },
    {
      "epoch": 1.7137003070115147,
      "grad_norm": 0.859786868095398,
      "learning_rate": 6.615907698187306e-06,
      "loss": 0.0194,
      "step": 1047160
    },
    {
      "epoch": 1.7137330374501678,
      "grad_norm": 0.18899770081043243,
      "learning_rate": 6.615841805973788e-06,
      "loss": 0.0168,
      "step": 1047180
    },
    {
      "epoch": 1.7137657678888214,
      "grad_norm": 0.5012295842170715,
      "learning_rate": 6.615775913760272e-06,
      "loss": 0.0271,
      "step": 1047200
    },
    {
      "epoch": 1.7137984983274746,
      "grad_norm": 0.08648241311311722,
      "learning_rate": 6.615710021546754e-06,
      "loss": 0.0263,
      "step": 1047220
    },
    {
      "epoch": 1.713831228766128,
      "grad_norm": 0.36693331599235535,
      "learning_rate": 6.615644129333237e-06,
      "loss": 0.0256,
      "step": 1047240
    },
    {
      "epoch": 1.7138639592047813,
      "grad_norm": 0.5806031823158264,
      "learning_rate": 6.61557823711972e-06,
      "loss": 0.0205,
      "step": 1047260
    },
    {
      "epoch": 1.7138966896434344,
      "grad_norm": 0.8118646144866943,
      "learning_rate": 6.615512344906203e-06,
      "loss": 0.0164,
      "step": 1047280
    },
    {
      "epoch": 1.713929420082088,
      "grad_norm": 1.1815080642700195,
      "learning_rate": 6.6154464526926855e-06,
      "loss": 0.0208,
      "step": 1047300
    },
    {
      "epoch": 1.7139621505207412,
      "grad_norm": 1.5800237655639648,
      "learning_rate": 6.615380560479169e-06,
      "loss": 0.0165,
      "step": 1047320
    },
    {
      "epoch": 1.7139948809593948,
      "grad_norm": 0.29442405700683594,
      "learning_rate": 6.615314668265651e-06,
      "loss": 0.0171,
      "step": 1047340
    },
    {
      "epoch": 1.714027611398048,
      "grad_norm": 0.3780847191810608,
      "learning_rate": 6.6152487760521346e-06,
      "loss": 0.0215,
      "step": 1047360
    },
    {
      "epoch": 1.7140603418367013,
      "grad_norm": 1.024329662322998,
      "learning_rate": 6.615182883838618e-06,
      "loss": 0.0191,
      "step": 1047380
    },
    {
      "epoch": 1.7140930722753547,
      "grad_norm": 0.1580773890018463,
      "learning_rate": 6.6151169916251e-06,
      "loss": 0.0191,
      "step": 1047400
    },
    {
      "epoch": 1.7141258027140078,
      "grad_norm": 0.2955797612667084,
      "learning_rate": 6.615051099411584e-06,
      "loss": 0.017,
      "step": 1047420
    },
    {
      "epoch": 1.7141585331526614,
      "grad_norm": 0.4654967188835144,
      "learning_rate": 6.6149852071980655e-06,
      "loss": 0.0213,
      "step": 1047440
    },
    {
      "epoch": 1.7141912635913146,
      "grad_norm": 0.5794461965560913,
      "learning_rate": 6.614919314984549e-06,
      "loss": 0.0181,
      "step": 1047460
    },
    {
      "epoch": 1.714223994029968,
      "grad_norm": 0.12400602549314499,
      "learning_rate": 6.614853422771032e-06,
      "loss": 0.0209,
      "step": 1047480
    },
    {
      "epoch": 1.7142567244686213,
      "grad_norm": 1.2442269325256348,
      "learning_rate": 6.614787530557515e-06,
      "loss": 0.0176,
      "step": 1047500
    },
    {
      "epoch": 1.7142894549072747,
      "grad_norm": 0.1817476749420166,
      "learning_rate": 6.614721638343997e-06,
      "loss": 0.0169,
      "step": 1047520
    },
    {
      "epoch": 1.714322185345928,
      "grad_norm": 0.25383302569389343,
      "learning_rate": 6.614655746130481e-06,
      "loss": 0.0182,
      "step": 1047540
    },
    {
      "epoch": 1.7143549157845812,
      "grad_norm": 0.3527123034000397,
      "learning_rate": 6.614589853916963e-06,
      "loss": 0.0187,
      "step": 1047560
    },
    {
      "epoch": 1.7143876462232348,
      "grad_norm": 0.19556306302547455,
      "learning_rate": 6.614523961703446e-06,
      "loss": 0.0224,
      "step": 1047580
    },
    {
      "epoch": 1.714420376661888,
      "grad_norm": 0.7791407704353333,
      "learning_rate": 6.614458069489928e-06,
      "loss": 0.0193,
      "step": 1047600
    },
    {
      "epoch": 1.7144531071005413,
      "grad_norm": 0.48993581533432007,
      "learning_rate": 6.614392177276412e-06,
      "loss": 0.0254,
      "step": 1047620
    },
    {
      "epoch": 1.7144858375391947,
      "grad_norm": 1.0124527215957642,
      "learning_rate": 6.614326285062895e-06,
      "loss": 0.0192,
      "step": 1047640
    },
    {
      "epoch": 1.714518567977848,
      "grad_norm": 0.7830824851989746,
      "learning_rate": 6.614260392849377e-06,
      "loss": 0.0186,
      "step": 1047660
    },
    {
      "epoch": 1.7145512984165014,
      "grad_norm": 2.6112451553344727,
      "learning_rate": 6.61419450063586e-06,
      "loss": 0.0256,
      "step": 1047680
    },
    {
      "epoch": 1.7145840288551546,
      "grad_norm": 0.731621265411377,
      "learning_rate": 6.614128608422344e-06,
      "loss": 0.0133,
      "step": 1047700
    },
    {
      "epoch": 1.7146167592938082,
      "grad_norm": 0.2752235531806946,
      "learning_rate": 6.6140627162088264e-06,
      "loss": 0.0214,
      "step": 1047720
    },
    {
      "epoch": 1.7146494897324613,
      "grad_norm": 1.1302340030670166,
      "learning_rate": 6.613996823995309e-06,
      "loss": 0.0191,
      "step": 1047740
    },
    {
      "epoch": 1.7146822201711147,
      "grad_norm": 0.3868987262248993,
      "learning_rate": 6.613930931781793e-06,
      "loss": 0.0197,
      "step": 1047760
    },
    {
      "epoch": 1.714714950609768,
      "grad_norm": 0.23902733623981476,
      "learning_rate": 6.613865039568275e-06,
      "loss": 0.0175,
      "step": 1047780
    },
    {
      "epoch": 1.7147476810484215,
      "grad_norm": 0.4638961851596832,
      "learning_rate": 6.613799147354758e-06,
      "loss": 0.0236,
      "step": 1047800
    },
    {
      "epoch": 1.7147804114870748,
      "grad_norm": 0.20893684029579163,
      "learning_rate": 6.61373325514124e-06,
      "loss": 0.0175,
      "step": 1047820
    },
    {
      "epoch": 1.714813141925728,
      "grad_norm": 0.23170030117034912,
      "learning_rate": 6.613667362927724e-06,
      "loss": 0.0188,
      "step": 1047840
    },
    {
      "epoch": 1.7148458723643816,
      "grad_norm": 1.2692567110061646,
      "learning_rate": 6.613601470714206e-06,
      "loss": 0.0192,
      "step": 1047860
    },
    {
      "epoch": 1.7148786028030347,
      "grad_norm": 0.783233106136322,
      "learning_rate": 6.613535578500689e-06,
      "loss": 0.0156,
      "step": 1047880
    },
    {
      "epoch": 1.714911333241688,
      "grad_norm": 3.0313918590545654,
      "learning_rate": 6.613469686287172e-06,
      "loss": 0.0177,
      "step": 1047900
    },
    {
      "epoch": 1.7149440636803415,
      "grad_norm": 0.7478930354118347,
      "learning_rate": 6.613403794073655e-06,
      "loss": 0.0177,
      "step": 1047920
    },
    {
      "epoch": 1.7149767941189946,
      "grad_norm": 0.2904951274394989,
      "learning_rate": 6.613337901860137e-06,
      "loss": 0.0214,
      "step": 1047940
    },
    {
      "epoch": 1.7150095245576482,
      "grad_norm": 0.48555174469947815,
      "learning_rate": 6.613272009646621e-06,
      "loss": 0.0184,
      "step": 1047960
    },
    {
      "epoch": 1.7150422549963014,
      "grad_norm": 0.32277464866638184,
      "learning_rate": 6.613206117433103e-06,
      "loss": 0.0149,
      "step": 1047980
    },
    {
      "epoch": 1.715074985434955,
      "grad_norm": 0.9507369995117188,
      "learning_rate": 6.6131402252195865e-06,
      "loss": 0.0197,
      "step": 1048000
    },
    {
      "epoch": 1.715107715873608,
      "grad_norm": 0.6343783736228943,
      "learning_rate": 6.613074333006068e-06,
      "loss": 0.0255,
      "step": 1048020
    },
    {
      "epoch": 1.7151404463122615,
      "grad_norm": 0.43708959221839905,
      "learning_rate": 6.613008440792552e-06,
      "loss": 0.019,
      "step": 1048040
    },
    {
      "epoch": 1.7151731767509149,
      "grad_norm": 0.2841700613498688,
      "learning_rate": 6.6129425485790355e-06,
      "loss": 0.016,
      "step": 1048060
    },
    {
      "epoch": 1.715205907189568,
      "grad_norm": 1.804413914680481,
      "learning_rate": 6.6128766563655174e-06,
      "loss": 0.0159,
      "step": 1048080
    },
    {
      "epoch": 1.7152386376282216,
      "grad_norm": 0.8286880254745483,
      "learning_rate": 6.612810764152001e-06,
      "loss": 0.0118,
      "step": 1048100
    },
    {
      "epoch": 1.7152713680668747,
      "grad_norm": 0.3044400215148926,
      "learning_rate": 6.612744871938484e-06,
      "loss": 0.0232,
      "step": 1048120
    },
    {
      "epoch": 1.7153040985055281,
      "grad_norm": 0.3292211592197418,
      "learning_rate": 6.6126789797249665e-06,
      "loss": 0.0195,
      "step": 1048140
    },
    {
      "epoch": 1.7153368289441815,
      "grad_norm": 0.7499164342880249,
      "learning_rate": 6.612613087511449e-06,
      "loss": 0.0191,
      "step": 1048160
    },
    {
      "epoch": 1.7153695593828349,
      "grad_norm": 1.254897117614746,
      "learning_rate": 6.612547195297933e-06,
      "loss": 0.0152,
      "step": 1048180
    },
    {
      "epoch": 1.7154022898214882,
      "grad_norm": 0.28538399934768677,
      "learning_rate": 6.612481303084415e-06,
      "loss": 0.0155,
      "step": 1048200
    },
    {
      "epoch": 1.7154350202601414,
      "grad_norm": 0.3283860981464386,
      "learning_rate": 6.612415410870898e-06,
      "loss": 0.0152,
      "step": 1048220
    },
    {
      "epoch": 1.715467750698795,
      "grad_norm": 0.21545082330703735,
      "learning_rate": 6.61234951865738e-06,
      "loss": 0.0174,
      "step": 1048240
    },
    {
      "epoch": 1.7155004811374481,
      "grad_norm": 0.7458828687667847,
      "learning_rate": 6.612283626443864e-06,
      "loss": 0.0107,
      "step": 1048260
    },
    {
      "epoch": 1.7155332115761015,
      "grad_norm": 0.4703100919723511,
      "learning_rate": 6.6122177342303465e-06,
      "loss": 0.0215,
      "step": 1048280
    },
    {
      "epoch": 1.7155659420147549,
      "grad_norm": 0.3860001564025879,
      "learning_rate": 6.612151842016829e-06,
      "loss": 0.0195,
      "step": 1048300
    },
    {
      "epoch": 1.7155986724534082,
      "grad_norm": 0.45130589604377747,
      "learning_rate": 6.612085949803312e-06,
      "loss": 0.0249,
      "step": 1048320
    },
    {
      "epoch": 1.7156314028920616,
      "grad_norm": 0.17151129245758057,
      "learning_rate": 6.612020057589796e-06,
      "loss": 0.0214,
      "step": 1048340
    },
    {
      "epoch": 1.7156641333307148,
      "grad_norm": 0.48525071144104004,
      "learning_rate": 6.6119541653762775e-06,
      "loss": 0.0192,
      "step": 1048360
    },
    {
      "epoch": 1.7156968637693684,
      "grad_norm": 0.6786404252052307,
      "learning_rate": 6.611888273162761e-06,
      "loss": 0.0214,
      "step": 1048380
    },
    {
      "epoch": 1.7157295942080215,
      "grad_norm": 0.13820616900920868,
      "learning_rate": 6.611822380949243e-06,
      "loss": 0.0238,
      "step": 1048400
    },
    {
      "epoch": 1.7157623246466749,
      "grad_norm": 0.32973673939704895,
      "learning_rate": 6.6117564887357265e-06,
      "loss": 0.0232,
      "step": 1048420
    },
    {
      "epoch": 1.7157950550853283,
      "grad_norm": 0.15621080994606018,
      "learning_rate": 6.61169059652221e-06,
      "loss": 0.0172,
      "step": 1048440
    },
    {
      "epoch": 1.7158277855239816,
      "grad_norm": 0.23146893084049225,
      "learning_rate": 6.611624704308692e-06,
      "loss": 0.0177,
      "step": 1048460
    },
    {
      "epoch": 1.715860515962635,
      "grad_norm": 0.6186739802360535,
      "learning_rate": 6.611558812095176e-06,
      "loss": 0.0192,
      "step": 1048480
    },
    {
      "epoch": 1.7158932464012882,
      "grad_norm": 0.35489824414253235,
      "learning_rate": 6.611492919881658e-06,
      "loss": 0.0204,
      "step": 1048500
    },
    {
      "epoch": 1.7159259768399417,
      "grad_norm": 0.8072996735572815,
      "learning_rate": 6.611427027668141e-06,
      "loss": 0.0164,
      "step": 1048520
    },
    {
      "epoch": 1.715958707278595,
      "grad_norm": 1.1705727577209473,
      "learning_rate": 6.611361135454624e-06,
      "loss": 0.0155,
      "step": 1048540
    },
    {
      "epoch": 1.7159914377172483,
      "grad_norm": 0.9397342801094055,
      "learning_rate": 6.611295243241107e-06,
      "loss": 0.0225,
      "step": 1048560
    },
    {
      "epoch": 1.7160241681559016,
      "grad_norm": 0.683314323425293,
      "learning_rate": 6.611229351027589e-06,
      "loss": 0.013,
      "step": 1048580
    },
    {
      "epoch": 1.716056898594555,
      "grad_norm": 0.2213243991136551,
      "learning_rate": 6.611163458814073e-06,
      "loss": 0.0186,
      "step": 1048600
    },
    {
      "epoch": 1.7160896290332084,
      "grad_norm": 0.553344190120697,
      "learning_rate": 6.611097566600555e-06,
      "loss": 0.0182,
      "step": 1048620
    },
    {
      "epoch": 1.7161223594718615,
      "grad_norm": 0.19081448018550873,
      "learning_rate": 6.611031674387038e-06,
      "loss": 0.0201,
      "step": 1048640
    },
    {
      "epoch": 1.7161550899105151,
      "grad_norm": 0.3395789563655853,
      "learning_rate": 6.61096578217352e-06,
      "loss": 0.0178,
      "step": 1048660
    },
    {
      "epoch": 1.7161878203491683,
      "grad_norm": 0.18649114668369293,
      "learning_rate": 6.610899889960004e-06,
      "loss": 0.0189,
      "step": 1048680
    },
    {
      "epoch": 1.7162205507878217,
      "grad_norm": 0.28207650780677795,
      "learning_rate": 6.610833997746487e-06,
      "loss": 0.0225,
      "step": 1048700
    },
    {
      "epoch": 1.716253281226475,
      "grad_norm": 0.2996543347835541,
      "learning_rate": 6.61076810553297e-06,
      "loss": 0.0276,
      "step": 1048720
    },
    {
      "epoch": 1.7162860116651282,
      "grad_norm": 0.940110981464386,
      "learning_rate": 6.610702213319452e-06,
      "loss": 0.0362,
      "step": 1048740
    },
    {
      "epoch": 1.7163187421037818,
      "grad_norm": 0.1646246463060379,
      "learning_rate": 6.610636321105936e-06,
      "loss": 0.0133,
      "step": 1048760
    },
    {
      "epoch": 1.716351472542435,
      "grad_norm": 0.4445440173149109,
      "learning_rate": 6.610570428892419e-06,
      "loss": 0.0102,
      "step": 1048780
    },
    {
      "epoch": 1.7163842029810885,
      "grad_norm": 0.5454625487327576,
      "learning_rate": 6.610504536678901e-06,
      "loss": 0.0152,
      "step": 1048800
    },
    {
      "epoch": 1.7164169334197417,
      "grad_norm": 0.44024619460105896,
      "learning_rate": 6.610438644465385e-06,
      "loss": 0.0143,
      "step": 1048820
    },
    {
      "epoch": 1.716449663858395,
      "grad_norm": 1.1794919967651367,
      "learning_rate": 6.610372752251867e-06,
      "loss": 0.0195,
      "step": 1048840
    },
    {
      "epoch": 1.7164823942970484,
      "grad_norm": 0.4517322778701782,
      "learning_rate": 6.61030686003835e-06,
      "loss": 0.0187,
      "step": 1048860
    },
    {
      "epoch": 1.7165151247357016,
      "grad_norm": 0.27001145482063293,
      "learning_rate": 6.610240967824832e-06,
      "loss": 0.0183,
      "step": 1048880
    },
    {
      "epoch": 1.7165478551743552,
      "grad_norm": 0.34539955854415894,
      "learning_rate": 6.610175075611316e-06,
      "loss": 0.0157,
      "step": 1048900
    },
    {
      "epoch": 1.7165805856130083,
      "grad_norm": 0.43560025095939636,
      "learning_rate": 6.6101091833977984e-06,
      "loss": 0.0194,
      "step": 1048920
    },
    {
      "epoch": 1.7166133160516617,
      "grad_norm": 0.49370425939559937,
      "learning_rate": 6.610043291184281e-06,
      "loss": 0.0259,
      "step": 1048940
    },
    {
      "epoch": 1.716646046490315,
      "grad_norm": 0.5437792539596558,
      "learning_rate": 6.609977398970764e-06,
      "loss": 0.0192,
      "step": 1048960
    },
    {
      "epoch": 1.7166787769289684,
      "grad_norm": 0.3270164430141449,
      "learning_rate": 6.6099115067572475e-06,
      "loss": 0.0183,
      "step": 1048980
    },
    {
      "epoch": 1.7167115073676218,
      "grad_norm": 0.45403048396110535,
      "learning_rate": 6.609845614543729e-06,
      "loss": 0.0153,
      "step": 1049000
    },
    {
      "epoch": 1.716744237806275,
      "grad_norm": 0.3954148292541504,
      "learning_rate": 6.609779722330213e-06,
      "loss": 0.0177,
      "step": 1049020
    },
    {
      "epoch": 1.7167769682449285,
      "grad_norm": 0.5427399277687073,
      "learning_rate": 6.609713830116695e-06,
      "loss": 0.0263,
      "step": 1049040
    },
    {
      "epoch": 1.7168096986835817,
      "grad_norm": 0.8932937383651733,
      "learning_rate": 6.6096479379031785e-06,
      "loss": 0.0135,
      "step": 1049060
    },
    {
      "epoch": 1.716842429122235,
      "grad_norm": 0.27022746205329895,
      "learning_rate": 6.609582045689661e-06,
      "loss": 0.0176,
      "step": 1049080
    },
    {
      "epoch": 1.7168751595608884,
      "grad_norm": 0.40257728099823,
      "learning_rate": 6.609516153476144e-06,
      "loss": 0.0275,
      "step": 1049100
    },
    {
      "epoch": 1.7169078899995418,
      "grad_norm": 0.7730330228805542,
      "learning_rate": 6.6094502612626275e-06,
      "loss": 0.0189,
      "step": 1049120
    },
    {
      "epoch": 1.7169406204381952,
      "grad_norm": 0.7182964086532593,
      "learning_rate": 6.60938436904911e-06,
      "loss": 0.0136,
      "step": 1049140
    },
    {
      "epoch": 1.7169733508768483,
      "grad_norm": 0.5818984508514404,
      "learning_rate": 6.609318476835593e-06,
      "loss": 0.0187,
      "step": 1049160
    },
    {
      "epoch": 1.717006081315502,
      "grad_norm": 0.25068116188049316,
      "learning_rate": 6.609252584622076e-06,
      "loss": 0.0199,
      "step": 1049180
    },
    {
      "epoch": 1.717038811754155,
      "grad_norm": 0.5106604099273682,
      "learning_rate": 6.609186692408559e-06,
      "loss": 0.0148,
      "step": 1049200
    },
    {
      "epoch": 1.7170715421928084,
      "grad_norm": 0.766761064529419,
      "learning_rate": 6.609120800195041e-06,
      "loss": 0.0192,
      "step": 1049220
    },
    {
      "epoch": 1.7171042726314618,
      "grad_norm": 0.2672528922557831,
      "learning_rate": 6.609054907981525e-06,
      "loss": 0.0126,
      "step": 1049240
    },
    {
      "epoch": 1.7171370030701152,
      "grad_norm": 0.12114398181438446,
      "learning_rate": 6.608989015768007e-06,
      "loss": 0.0205,
      "step": 1049260
    },
    {
      "epoch": 1.7171697335087686,
      "grad_norm": 0.2870384752750397,
      "learning_rate": 6.60892312355449e-06,
      "loss": 0.0102,
      "step": 1049280
    },
    {
      "epoch": 1.7172024639474217,
      "grad_norm": 0.5168498754501343,
      "learning_rate": 6.608857231340973e-06,
      "loss": 0.016,
      "step": 1049300
    },
    {
      "epoch": 1.7172351943860753,
      "grad_norm": 0.20865754783153534,
      "learning_rate": 6.608791339127456e-06,
      "loss": 0.0159,
      "step": 1049320
    },
    {
      "epoch": 1.7172679248247285,
      "grad_norm": 1.5029933452606201,
      "learning_rate": 6.6087254469139385e-06,
      "loss": 0.0209,
      "step": 1049340
    },
    {
      "epoch": 1.7173006552633818,
      "grad_norm": 0.46841830015182495,
      "learning_rate": 6.608659554700422e-06,
      "loss": 0.0177,
      "step": 1049360
    },
    {
      "epoch": 1.7173333857020352,
      "grad_norm": 0.2490788996219635,
      "learning_rate": 6.608593662486904e-06,
      "loss": 0.014,
      "step": 1049380
    },
    {
      "epoch": 1.7173661161406886,
      "grad_norm": 0.3134450614452362,
      "learning_rate": 6.6085277702733876e-06,
      "loss": 0.0221,
      "step": 1049400
    },
    {
      "epoch": 1.717398846579342,
      "grad_norm": 0.3560850918292999,
      "learning_rate": 6.6084618780598695e-06,
      "loss": 0.0128,
      "step": 1049420
    },
    {
      "epoch": 1.717431577017995,
      "grad_norm": 0.2651306986808777,
      "learning_rate": 6.608395985846353e-06,
      "loss": 0.0178,
      "step": 1049440
    },
    {
      "epoch": 1.7174643074566487,
      "grad_norm": 0.4694042503833771,
      "learning_rate": 6.608330093632836e-06,
      "loss": 0.0243,
      "step": 1049460
    },
    {
      "epoch": 1.7174970378953018,
      "grad_norm": 0.10642825812101364,
      "learning_rate": 6.6082642014193185e-06,
      "loss": 0.0266,
      "step": 1049480
    },
    {
      "epoch": 1.7175297683339552,
      "grad_norm": 0.19930005073547363,
      "learning_rate": 6.608198309205802e-06,
      "loss": 0.0258,
      "step": 1049500
    },
    {
      "epoch": 1.7175624987726086,
      "grad_norm": 1.4606026411056519,
      "learning_rate": 6.608132416992285e-06,
      "loss": 0.0159,
      "step": 1049520
    },
    {
      "epoch": 1.7175952292112617,
      "grad_norm": 0.3745204210281372,
      "learning_rate": 6.608066524778768e-06,
      "loss": 0.0237,
      "step": 1049540
    },
    {
      "epoch": 1.7176279596499153,
      "grad_norm": 0.36141103506088257,
      "learning_rate": 6.60800063256525e-06,
      "loss": 0.0136,
      "step": 1049560
    },
    {
      "epoch": 1.7176606900885685,
      "grad_norm": 1.3270879983901978,
      "learning_rate": 6.607934740351734e-06,
      "loss": 0.0164,
      "step": 1049580
    },
    {
      "epoch": 1.717693420527222,
      "grad_norm": 0.1317642629146576,
      "learning_rate": 6.607868848138216e-06,
      "loss": 0.0157,
      "step": 1049600
    },
    {
      "epoch": 1.7177261509658752,
      "grad_norm": 0.5324792861938477,
      "learning_rate": 6.607802955924699e-06,
      "loss": 0.0183,
      "step": 1049620
    },
    {
      "epoch": 1.7177588814045286,
      "grad_norm": 0.7260202169418335,
      "learning_rate": 6.607737063711181e-06,
      "loss": 0.0183,
      "step": 1049640
    },
    {
      "epoch": 1.717791611843182,
      "grad_norm": 0.3795914947986603,
      "learning_rate": 6.607671171497665e-06,
      "loss": 0.0125,
      "step": 1049660
    },
    {
      "epoch": 1.7178243422818351,
      "grad_norm": 0.7122272849082947,
      "learning_rate": 6.607605279284147e-06,
      "loss": 0.016,
      "step": 1049680
    },
    {
      "epoch": 1.7178570727204887,
      "grad_norm": 2.1012845039367676,
      "learning_rate": 6.60753938707063e-06,
      "loss": 0.0277,
      "step": 1049700
    },
    {
      "epoch": 1.7178898031591419,
      "grad_norm": 0.49689701199531555,
      "learning_rate": 6.607473494857113e-06,
      "loss": 0.0133,
      "step": 1049720
    },
    {
      "epoch": 1.7179225335977952,
      "grad_norm": 0.3587453067302704,
      "learning_rate": 6.607407602643596e-06,
      "loss": 0.0178,
      "step": 1049740
    },
    {
      "epoch": 1.7179552640364486,
      "grad_norm": 0.5296192169189453,
      "learning_rate": 6.607341710430079e-06,
      "loss": 0.0145,
      "step": 1049760
    },
    {
      "epoch": 1.717987994475102,
      "grad_norm": 0.7604451179504395,
      "learning_rate": 6.607275818216562e-06,
      "loss": 0.0232,
      "step": 1049780
    },
    {
      "epoch": 1.7180207249137553,
      "grad_norm": 0.7749645113945007,
      "learning_rate": 6.607209926003044e-06,
      "loss": 0.0222,
      "step": 1049800
    },
    {
      "epoch": 1.7180534553524085,
      "grad_norm": 1.1511815786361694,
      "learning_rate": 6.607144033789528e-06,
      "loss": 0.0165,
      "step": 1049820
    },
    {
      "epoch": 1.718086185791062,
      "grad_norm": 0.22828355431556702,
      "learning_rate": 6.607078141576011e-06,
      "loss": 0.0153,
      "step": 1049840
    },
    {
      "epoch": 1.7181189162297152,
      "grad_norm": 0.20057566463947296,
      "learning_rate": 6.607012249362493e-06,
      "loss": 0.0219,
      "step": 1049860
    },
    {
      "epoch": 1.7181516466683686,
      "grad_norm": 0.7093085646629333,
      "learning_rate": 6.606946357148977e-06,
      "loss": 0.0134,
      "step": 1049880
    },
    {
      "epoch": 1.718184377107022,
      "grad_norm": 0.9154544472694397,
      "learning_rate": 6.606880464935459e-06,
      "loss": 0.02,
      "step": 1049900
    },
    {
      "epoch": 1.7182171075456754,
      "grad_norm": 0.3946985900402069,
      "learning_rate": 6.606814572721942e-06,
      "loss": 0.0229,
      "step": 1049920
    },
    {
      "epoch": 1.7182498379843287,
      "grad_norm": 0.4160606861114502,
      "learning_rate": 6.606748680508425e-06,
      "loss": 0.0161,
      "step": 1049940
    },
    {
      "epoch": 1.7182825684229819,
      "grad_norm": 0.8552483916282654,
      "learning_rate": 6.606682788294908e-06,
      "loss": 0.0174,
      "step": 1049960
    },
    {
      "epoch": 1.7183152988616355,
      "grad_norm": 0.3033442497253418,
      "learning_rate": 6.60661689608139e-06,
      "loss": 0.0181,
      "step": 1049980
    },
    {
      "epoch": 1.7183480293002886,
      "grad_norm": 0.5601529479026794,
      "learning_rate": 6.606551003867874e-06,
      "loss": 0.0157,
      "step": 1050000
    },
    {
      "epoch": 1.7183480293002886,
      "eval_loss": 0.009990934282541275,
      "eval_runtime": 6500.1378,
      "eval_samples_per_second": 158.129,
      "eval_steps_per_second": 15.813,
      "eval_sts-dev_pearson_cosine": 0.9766973787312386,
      "eval_sts-dev_spearman_cosine": 0.890068272646664,
      "step": 1050000
    },
    {
      "epoch": 1.718380759738942,
      "grad_norm": 0.9186133742332458,
      "learning_rate": 6.606485111654356e-06,
      "loss": 0.0121,
      "step": 1050020
    },
    {
      "epoch": 1.7184134901775954,
      "grad_norm": 0.26222604513168335,
      "learning_rate": 6.6064192194408395e-06,
      "loss": 0.0159,
      "step": 1050040
    },
    {
      "epoch": 1.7184462206162487,
      "grad_norm": 0.4755490720272064,
      "learning_rate": 6.606353327227321e-06,
      "loss": 0.012,
      "step": 1050060
    },
    {
      "epoch": 1.7184789510549021,
      "grad_norm": 0.6698612570762634,
      "learning_rate": 6.606287435013805e-06,
      "loss": 0.0219,
      "step": 1050080
    },
    {
      "epoch": 1.7185116814935553,
      "grad_norm": 0.7381844520568848,
      "learning_rate": 6.606221542800288e-06,
      "loss": 0.0131,
      "step": 1050100
    },
    {
      "epoch": 1.7185444119322089,
      "grad_norm": 0.8121177554130554,
      "learning_rate": 6.6061556505867704e-06,
      "loss": 0.0193,
      "step": 1050120
    },
    {
      "epoch": 1.718577142370862,
      "grad_norm": 0.9848542809486389,
      "learning_rate": 6.606089758373253e-06,
      "loss": 0.0169,
      "step": 1050140
    },
    {
      "epoch": 1.7186098728095154,
      "grad_norm": 0.6400281190872192,
      "learning_rate": 6.606023866159737e-06,
      "loss": 0.0243,
      "step": 1050160
    },
    {
      "epoch": 1.7186426032481688,
      "grad_norm": 0.21468456089496613,
      "learning_rate": 6.6059579739462195e-06,
      "loss": 0.0114,
      "step": 1050180
    },
    {
      "epoch": 1.718675333686822,
      "grad_norm": 0.48494771122932434,
      "learning_rate": 6.605892081732702e-06,
      "loss": 0.0194,
      "step": 1050200
    },
    {
      "epoch": 1.7187080641254755,
      "grad_norm": 0.7042320370674133,
      "learning_rate": 6.605826189519186e-06,
      "loss": 0.023,
      "step": 1050220
    },
    {
      "epoch": 1.7187407945641286,
      "grad_norm": 0.5862938761711121,
      "learning_rate": 6.605760297305668e-06,
      "loss": 0.0161,
      "step": 1050240
    },
    {
      "epoch": 1.7187735250027822,
      "grad_norm": 0.5189488530158997,
      "learning_rate": 6.605694405092151e-06,
      "loss": 0.0124,
      "step": 1050260
    },
    {
      "epoch": 1.7188062554414354,
      "grad_norm": 0.3648231327533722,
      "learning_rate": 6.605628512878633e-06,
      "loss": 0.017,
      "step": 1050280
    },
    {
      "epoch": 1.7188389858800888,
      "grad_norm": 0.07196571677923203,
      "learning_rate": 6.605562620665117e-06,
      "loss": 0.0183,
      "step": 1050300
    },
    {
      "epoch": 1.7188717163187421,
      "grad_norm": 0.2444092333316803,
      "learning_rate": 6.6054967284515995e-06,
      "loss": 0.0209,
      "step": 1050320
    },
    {
      "epoch": 1.7189044467573953,
      "grad_norm": 0.6875941157341003,
      "learning_rate": 6.605430836238082e-06,
      "loss": 0.0219,
      "step": 1050340
    },
    {
      "epoch": 1.7189371771960489,
      "grad_norm": 0.714127242565155,
      "learning_rate": 6.605364944024565e-06,
      "loss": 0.0213,
      "step": 1050360
    },
    {
      "epoch": 1.718969907634702,
      "grad_norm": 0.4430825412273407,
      "learning_rate": 6.605299051811049e-06,
      "loss": 0.0168,
      "step": 1050380
    },
    {
      "epoch": 1.7190026380733554,
      "grad_norm": 0.27854591608047485,
      "learning_rate": 6.6052331595975305e-06,
      "loss": 0.0156,
      "step": 1050400
    },
    {
      "epoch": 1.7190353685120088,
      "grad_norm": 0.5781921148300171,
      "learning_rate": 6.605167267384014e-06,
      "loss": 0.0178,
      "step": 1050420
    },
    {
      "epoch": 1.7190680989506621,
      "grad_norm": 0.0922522023320198,
      "learning_rate": 6.605101375170496e-06,
      "loss": 0.0161,
      "step": 1050440
    },
    {
      "epoch": 1.7191008293893155,
      "grad_norm": 0.18958346545696259,
      "learning_rate": 6.6050354829569796e-06,
      "loss": 0.0151,
      "step": 1050460
    },
    {
      "epoch": 1.7191335598279687,
      "grad_norm": 0.22183836996555328,
      "learning_rate": 6.604969590743462e-06,
      "loss": 0.0214,
      "step": 1050480
    },
    {
      "epoch": 1.7191662902666223,
      "grad_norm": 0.5655485391616821,
      "learning_rate": 6.604903698529945e-06,
      "loss": 0.0218,
      "step": 1050500
    },
    {
      "epoch": 1.7191990207052754,
      "grad_norm": 0.18119798600673676,
      "learning_rate": 6.604837806316429e-06,
      "loss": 0.0155,
      "step": 1050520
    },
    {
      "epoch": 1.7192317511439288,
      "grad_norm": 0.7267670035362244,
      "learning_rate": 6.604771914102911e-06,
      "loss": 0.0255,
      "step": 1050540
    },
    {
      "epoch": 1.7192644815825822,
      "grad_norm": 0.34240642189979553,
      "learning_rate": 6.604706021889394e-06,
      "loss": 0.0191,
      "step": 1050560
    },
    {
      "epoch": 1.7192972120212355,
      "grad_norm": 1.2479978799819946,
      "learning_rate": 6.604640129675877e-06,
      "loss": 0.0255,
      "step": 1050580
    },
    {
      "epoch": 1.719329942459889,
      "grad_norm": 0.23784290254116058,
      "learning_rate": 6.6045742374623604e-06,
      "loss": 0.0235,
      "step": 1050600
    },
    {
      "epoch": 1.719362672898542,
      "grad_norm": 0.5211156606674194,
      "learning_rate": 6.604508345248842e-06,
      "loss": 0.0199,
      "step": 1050620
    },
    {
      "epoch": 1.7193954033371956,
      "grad_norm": 0.36289775371551514,
      "learning_rate": 6.604442453035326e-06,
      "loss": 0.0199,
      "step": 1050640
    },
    {
      "epoch": 1.7194281337758488,
      "grad_norm": 0.1227489784359932,
      "learning_rate": 6.604376560821808e-06,
      "loss": 0.0138,
      "step": 1050660
    },
    {
      "epoch": 1.7194608642145022,
      "grad_norm": 0.949963390827179,
      "learning_rate": 6.604310668608291e-06,
      "loss": 0.0189,
      "step": 1050680
    },
    {
      "epoch": 1.7194935946531555,
      "grad_norm": 2.106419086456299,
      "learning_rate": 6.604244776394773e-06,
      "loss": 0.0188,
      "step": 1050700
    },
    {
      "epoch": 1.719526325091809,
      "grad_norm": 0.508411169052124,
      "learning_rate": 6.604178884181257e-06,
      "loss": 0.016,
      "step": 1050720
    },
    {
      "epoch": 1.7195590555304623,
      "grad_norm": 0.1811339110136032,
      "learning_rate": 6.60411299196774e-06,
      "loss": 0.0165,
      "step": 1050740
    },
    {
      "epoch": 1.7195917859691154,
      "grad_norm": 0.16741102933883667,
      "learning_rate": 6.604047099754222e-06,
      "loss": 0.0211,
      "step": 1050760
    },
    {
      "epoch": 1.719624516407769,
      "grad_norm": 1.1437410116195679,
      "learning_rate": 6.603981207540705e-06,
      "loss": 0.0279,
      "step": 1050780
    },
    {
      "epoch": 1.7196572468464222,
      "grad_norm": 0.4690929055213928,
      "learning_rate": 6.603915315327189e-06,
      "loss": 0.0195,
      "step": 1050800
    },
    {
      "epoch": 1.7196899772850756,
      "grad_norm": 0.7238861322402954,
      "learning_rate": 6.6038494231136706e-06,
      "loss": 0.0183,
      "step": 1050820
    },
    {
      "epoch": 1.719722707723729,
      "grad_norm": 0.7883681654930115,
      "learning_rate": 6.603783530900154e-06,
      "loss": 0.0212,
      "step": 1050840
    },
    {
      "epoch": 1.7197554381623823,
      "grad_norm": 0.4775972068309784,
      "learning_rate": 6.603717638686636e-06,
      "loss": 0.026,
      "step": 1050860
    },
    {
      "epoch": 1.7197881686010357,
      "grad_norm": 0.9013208150863647,
      "learning_rate": 6.60365174647312e-06,
      "loss": 0.0198,
      "step": 1050880
    },
    {
      "epoch": 1.7198208990396888,
      "grad_norm": 0.4550279974937439,
      "learning_rate": 6.603585854259603e-06,
      "loss": 0.0247,
      "step": 1050900
    },
    {
      "epoch": 1.7198536294783424,
      "grad_norm": 0.14542782306671143,
      "learning_rate": 6.603519962046085e-06,
      "loss": 0.0151,
      "step": 1050920
    },
    {
      "epoch": 1.7198863599169956,
      "grad_norm": 1.203257441520691,
      "learning_rate": 6.603454069832569e-06,
      "loss": 0.0179,
      "step": 1050940
    },
    {
      "epoch": 1.719919090355649,
      "grad_norm": 0.31286105513572693,
      "learning_rate": 6.6033881776190514e-06,
      "loss": 0.0138,
      "step": 1050960
    },
    {
      "epoch": 1.7199518207943023,
      "grad_norm": 0.1848214715719223,
      "learning_rate": 6.603322285405534e-06,
      "loss": 0.0182,
      "step": 1050980
    },
    {
      "epoch": 1.7199845512329555,
      "grad_norm": 0.14927731454372406,
      "learning_rate": 6.603256393192017e-06,
      "loss": 0.0188,
      "step": 1051000
    },
    {
      "epoch": 1.720017281671609,
      "grad_norm": 1.0068929195404053,
      "learning_rate": 6.6031905009785005e-06,
      "loss": 0.018,
      "step": 1051020
    },
    {
      "epoch": 1.7200500121102622,
      "grad_norm": 1.47784423828125,
      "learning_rate": 6.603124608764982e-06,
      "loss": 0.0314,
      "step": 1051040
    },
    {
      "epoch": 1.7200827425489158,
      "grad_norm": 0.4711415767669678,
      "learning_rate": 6.603058716551466e-06,
      "loss": 0.022,
      "step": 1051060
    },
    {
      "epoch": 1.720115472987569,
      "grad_norm": 0.4686394929885864,
      "learning_rate": 6.602992824337948e-06,
      "loss": 0.0189,
      "step": 1051080
    },
    {
      "epoch": 1.7201482034262223,
      "grad_norm": 0.7135688066482544,
      "learning_rate": 6.6029269321244315e-06,
      "loss": 0.0181,
      "step": 1051100
    },
    {
      "epoch": 1.7201809338648757,
      "grad_norm": 0.5864508152008057,
      "learning_rate": 6.602861039910914e-06,
      "loss": 0.0208,
      "step": 1051120
    },
    {
      "epoch": 1.7202136643035288,
      "grad_norm": 0.04581749811768532,
      "learning_rate": 6.602795147697397e-06,
      "loss": 0.0172,
      "step": 1051140
    },
    {
      "epoch": 1.7202463947421824,
      "grad_norm": 0.7197089791297913,
      "learning_rate": 6.60272925548388e-06,
      "loss": 0.0179,
      "step": 1051160
    },
    {
      "epoch": 1.7202791251808356,
      "grad_norm": 0.2597763240337372,
      "learning_rate": 6.602663363270363e-06,
      "loss": 0.0152,
      "step": 1051180
    },
    {
      "epoch": 1.720311855619489,
      "grad_norm": 0.2135677933692932,
      "learning_rate": 6.602597471056845e-06,
      "loss": 0.0212,
      "step": 1051200
    },
    {
      "epoch": 1.7203445860581423,
      "grad_norm": 0.8458020687103271,
      "learning_rate": 6.602531578843329e-06,
      "loss": 0.022,
      "step": 1051220
    },
    {
      "epoch": 1.7203773164967957,
      "grad_norm": 0.44566667079925537,
      "learning_rate": 6.602465686629812e-06,
      "loss": 0.0222,
      "step": 1051240
    },
    {
      "epoch": 1.720410046935449,
      "grad_norm": 0.8339390158653259,
      "learning_rate": 6.602399794416294e-06,
      "loss": 0.0209,
      "step": 1051260
    },
    {
      "epoch": 1.7204427773741022,
      "grad_norm": 0.20832577347755432,
      "learning_rate": 6.602333902202778e-06,
      "loss": 0.0227,
      "step": 1051280
    },
    {
      "epoch": 1.7204755078127558,
      "grad_norm": 1.0783392190933228,
      "learning_rate": 6.60226800998926e-06,
      "loss": 0.0151,
      "step": 1051300
    },
    {
      "epoch": 1.720508238251409,
      "grad_norm": 1.2093400955200195,
      "learning_rate": 6.602202117775743e-06,
      "loss": 0.0275,
      "step": 1051320
    },
    {
      "epoch": 1.7205409686900623,
      "grad_norm": 0.30590757727622986,
      "learning_rate": 6.602136225562226e-06,
      "loss": 0.0078,
      "step": 1051340
    },
    {
      "epoch": 1.7205736991287157,
      "grad_norm": 0.5900055170059204,
      "learning_rate": 6.602070333348709e-06,
      "loss": 0.0229,
      "step": 1051360
    },
    {
      "epoch": 1.720606429567369,
      "grad_norm": 0.09685948491096497,
      "learning_rate": 6.6020044411351915e-06,
      "loss": 0.0156,
      "step": 1051380
    },
    {
      "epoch": 1.7206391600060225,
      "grad_norm": 19.02439308166504,
      "learning_rate": 6.601938548921675e-06,
      "loss": 0.0153,
      "step": 1051400
    },
    {
      "epoch": 1.7206718904446756,
      "grad_norm": 0.46297088265419006,
      "learning_rate": 6.601872656708157e-06,
      "loss": 0.0155,
      "step": 1051420
    },
    {
      "epoch": 1.7207046208833292,
      "grad_norm": 0.2512343227863312,
      "learning_rate": 6.601806764494641e-06,
      "loss": 0.0133,
      "step": 1051440
    },
    {
      "epoch": 1.7207373513219824,
      "grad_norm": 0.6614056825637817,
      "learning_rate": 6.6017408722811225e-06,
      "loss": 0.0183,
      "step": 1051460
    },
    {
      "epoch": 1.7207700817606357,
      "grad_norm": 0.33972349762916565,
      "learning_rate": 6.601674980067606e-06,
      "loss": 0.0144,
      "step": 1051480
    },
    {
      "epoch": 1.720802812199289,
      "grad_norm": 0.8610217571258545,
      "learning_rate": 6.601609087854089e-06,
      "loss": 0.0177,
      "step": 1051500
    },
    {
      "epoch": 1.7208355426379425,
      "grad_norm": 0.15455345809459686,
      "learning_rate": 6.6015431956405715e-06,
      "loss": 0.0211,
      "step": 1051520
    },
    {
      "epoch": 1.7208682730765958,
      "grad_norm": 0.8634356260299683,
      "learning_rate": 6.601477303427054e-06,
      "loss": 0.0174,
      "step": 1051540
    },
    {
      "epoch": 1.720901003515249,
      "grad_norm": 0.47165200114250183,
      "learning_rate": 6.601411411213538e-06,
      "loss": 0.0182,
      "step": 1051560
    },
    {
      "epoch": 1.7209337339539026,
      "grad_norm": 0.21098639070987701,
      "learning_rate": 6.601345519000021e-06,
      "loss": 0.0149,
      "step": 1051580
    },
    {
      "epoch": 1.7209664643925557,
      "grad_norm": 0.43463975191116333,
      "learning_rate": 6.601279626786503e-06,
      "loss": 0.0176,
      "step": 1051600
    },
    {
      "epoch": 1.7209991948312091,
      "grad_norm": 0.8068764805793762,
      "learning_rate": 6.601213734572987e-06,
      "loss": 0.0187,
      "step": 1051620
    },
    {
      "epoch": 1.7210319252698625,
      "grad_norm": 0.2981117367744446,
      "learning_rate": 6.601147842359469e-06,
      "loss": 0.0206,
      "step": 1051640
    },
    {
      "epoch": 1.7210646557085159,
      "grad_norm": 0.8284966945648193,
      "learning_rate": 6.601081950145952e-06,
      "loss": 0.0219,
      "step": 1051660
    },
    {
      "epoch": 1.7210973861471692,
      "grad_norm": 1.3248462677001953,
      "learning_rate": 6.601016057932434e-06,
      "loss": 0.0335,
      "step": 1051680
    },
    {
      "epoch": 1.7211301165858224,
      "grad_norm": 0.37738659977912903,
      "learning_rate": 6.600950165718918e-06,
      "loss": 0.0152,
      "step": 1051700
    },
    {
      "epoch": 1.721162847024476,
      "grad_norm": 0.5018796920776367,
      "learning_rate": 6.6008842735054e-06,
      "loss": 0.0259,
      "step": 1051720
    },
    {
      "epoch": 1.7211955774631291,
      "grad_norm": 1.6706650257110596,
      "learning_rate": 6.600818381291883e-06,
      "loss": 0.0242,
      "step": 1051740
    },
    {
      "epoch": 1.7212283079017825,
      "grad_norm": 0.3791402280330658,
      "learning_rate": 6.600752489078366e-06,
      "loss": 0.0143,
      "step": 1051760
    },
    {
      "epoch": 1.7212610383404359,
      "grad_norm": 0.634303629398346,
      "learning_rate": 6.600686596864849e-06,
      "loss": 0.0167,
      "step": 1051780
    },
    {
      "epoch": 1.721293768779089,
      "grad_norm": 0.4828425943851471,
      "learning_rate": 6.600620704651332e-06,
      "loss": 0.0219,
      "step": 1051800
    },
    {
      "epoch": 1.7213264992177426,
      "grad_norm": 0.7822158932685852,
      "learning_rate": 6.600554812437815e-06,
      "loss": 0.0218,
      "step": 1051820
    },
    {
      "epoch": 1.7213592296563958,
      "grad_norm": 0.6497378945350647,
      "learning_rate": 6.600488920224297e-06,
      "loss": 0.0138,
      "step": 1051840
    },
    {
      "epoch": 1.7213919600950494,
      "grad_norm": 0.26665186882019043,
      "learning_rate": 6.600423028010781e-06,
      "loss": 0.0161,
      "step": 1051860
    },
    {
      "epoch": 1.7214246905337025,
      "grad_norm": 1.0220733880996704,
      "learning_rate": 6.6003571357972625e-06,
      "loss": 0.0158,
      "step": 1051880
    },
    {
      "epoch": 1.7214574209723559,
      "grad_norm": 0.7574503421783447,
      "learning_rate": 6.600291243583746e-06,
      "loss": 0.019,
      "step": 1051900
    },
    {
      "epoch": 1.7214901514110093,
      "grad_norm": 0.4327734708786011,
      "learning_rate": 6.600225351370229e-06,
      "loss": 0.0156,
      "step": 1051920
    },
    {
      "epoch": 1.7215228818496624,
      "grad_norm": 0.24476531147956848,
      "learning_rate": 6.600159459156712e-06,
      "loss": 0.0198,
      "step": 1051940
    },
    {
      "epoch": 1.721555612288316,
      "grad_norm": 0.8015703558921814,
      "learning_rate": 6.600093566943195e-06,
      "loss": 0.0225,
      "step": 1051960
    },
    {
      "epoch": 1.7215883427269691,
      "grad_norm": 0.40634849667549133,
      "learning_rate": 6.600027674729678e-06,
      "loss": 0.023,
      "step": 1051980
    },
    {
      "epoch": 1.7216210731656225,
      "grad_norm": 0.09264031052589417,
      "learning_rate": 6.599961782516161e-06,
      "loss": 0.0128,
      "step": 1052000
    },
    {
      "epoch": 1.721653803604276,
      "grad_norm": 0.3683288097381592,
      "learning_rate": 6.599895890302643e-06,
      "loss": 0.0204,
      "step": 1052020
    },
    {
      "epoch": 1.7216865340429293,
      "grad_norm": 0.3075941205024719,
      "learning_rate": 6.599829998089127e-06,
      "loss": 0.0185,
      "step": 1052040
    },
    {
      "epoch": 1.7217192644815826,
      "grad_norm": 0.3015340566635132,
      "learning_rate": 6.599764105875609e-06,
      "loss": 0.0147,
      "step": 1052060
    },
    {
      "epoch": 1.7217519949202358,
      "grad_norm": 0.6169281601905823,
      "learning_rate": 6.5996982136620925e-06,
      "loss": 0.0237,
      "step": 1052080
    },
    {
      "epoch": 1.7217847253588894,
      "grad_norm": 0.5604318380355835,
      "learning_rate": 6.599632321448574e-06,
      "loss": 0.0202,
      "step": 1052100
    },
    {
      "epoch": 1.7218174557975425,
      "grad_norm": 0.27944016456604004,
      "learning_rate": 6.599566429235058e-06,
      "loss": 0.0227,
      "step": 1052120
    },
    {
      "epoch": 1.721850186236196,
      "grad_norm": 0.5757959485054016,
      "learning_rate": 6.599500537021541e-06,
      "loss": 0.0171,
      "step": 1052140
    },
    {
      "epoch": 1.7218829166748493,
      "grad_norm": 0.2693502604961395,
      "learning_rate": 6.5994346448080234e-06,
      "loss": 0.0234,
      "step": 1052160
    },
    {
      "epoch": 1.7219156471135026,
      "grad_norm": 0.4163304269313812,
      "learning_rate": 6.599368752594506e-06,
      "loss": 0.017,
      "step": 1052180
    },
    {
      "epoch": 1.721948377552156,
      "grad_norm": 2.3484926223754883,
      "learning_rate": 6.59930286038099e-06,
      "loss": 0.0237,
      "step": 1052200
    },
    {
      "epoch": 1.7219811079908092,
      "grad_norm": 0.21117864549160004,
      "learning_rate": 6.599236968167472e-06,
      "loss": 0.0148,
      "step": 1052220
    },
    {
      "epoch": 1.7220138384294628,
      "grad_norm": 6.0652174949646,
      "learning_rate": 6.599171075953955e-06,
      "loss": 0.0249,
      "step": 1052240
    },
    {
      "epoch": 1.722046568868116,
      "grad_norm": 0.5285677313804626,
      "learning_rate": 6.599105183740437e-06,
      "loss": 0.0249,
      "step": 1052260
    },
    {
      "epoch": 1.7220792993067693,
      "grad_norm": 0.8862439393997192,
      "learning_rate": 6.599039291526921e-06,
      "loss": 0.0191,
      "step": 1052280
    },
    {
      "epoch": 1.7221120297454227,
      "grad_norm": 0.7370633482933044,
      "learning_rate": 6.598973399313404e-06,
      "loss": 0.015,
      "step": 1052300
    },
    {
      "epoch": 1.722144760184076,
      "grad_norm": 0.4120540916919708,
      "learning_rate": 6.598907507099886e-06,
      "loss": 0.0152,
      "step": 1052320
    },
    {
      "epoch": 1.7221774906227294,
      "grad_norm": 0.26672452688217163,
      "learning_rate": 6.59884161488637e-06,
      "loss": 0.0162,
      "step": 1052340
    },
    {
      "epoch": 1.7222102210613826,
      "grad_norm": 0.21004042029380798,
      "learning_rate": 6.5987757226728525e-06,
      "loss": 0.0199,
      "step": 1052360
    },
    {
      "epoch": 1.7222429515000361,
      "grad_norm": 0.18209850788116455,
      "learning_rate": 6.598709830459335e-06,
      "loss": 0.021,
      "step": 1052380
    },
    {
      "epoch": 1.7222756819386893,
      "grad_norm": 0.19361691176891327,
      "learning_rate": 6.598643938245818e-06,
      "loss": 0.0208,
      "step": 1052400
    },
    {
      "epoch": 1.7223084123773427,
      "grad_norm": 1.0282669067382812,
      "learning_rate": 6.598578046032302e-06,
      "loss": 0.0166,
      "step": 1052420
    },
    {
      "epoch": 1.722341142815996,
      "grad_norm": 0.4994908571243286,
      "learning_rate": 6.5985121538187835e-06,
      "loss": 0.0256,
      "step": 1052440
    },
    {
      "epoch": 1.7223738732546494,
      "grad_norm": 0.4904884696006775,
      "learning_rate": 6.598446261605267e-06,
      "loss": 0.0213,
      "step": 1052460
    },
    {
      "epoch": 1.7224066036933028,
      "grad_norm": 0.18760192394256592,
      "learning_rate": 6.598380369391749e-06,
      "loss": 0.0212,
      "step": 1052480
    },
    {
      "epoch": 1.722439334131956,
      "grad_norm": 0.18402588367462158,
      "learning_rate": 6.5983144771782326e-06,
      "loss": 0.0211,
      "step": 1052500
    },
    {
      "epoch": 1.7224720645706095,
      "grad_norm": 2.0815086364746094,
      "learning_rate": 6.5982485849647145e-06,
      "loss": 0.0237,
      "step": 1052520
    },
    {
      "epoch": 1.7225047950092627,
      "grad_norm": 0.5231262445449829,
      "learning_rate": 6.598182692751198e-06,
      "loss": 0.017,
      "step": 1052540
    },
    {
      "epoch": 1.722537525447916,
      "grad_norm": 0.10061097145080566,
      "learning_rate": 6.598116800537681e-06,
      "loss": 0.0261,
      "step": 1052560
    },
    {
      "epoch": 1.7225702558865694,
      "grad_norm": 0.45518577098846436,
      "learning_rate": 6.598050908324164e-06,
      "loss": 0.0165,
      "step": 1052580
    },
    {
      "epoch": 1.7226029863252226,
      "grad_norm": 0.5627871155738831,
      "learning_rate": 6.597985016110646e-06,
      "loss": 0.0154,
      "step": 1052600
    },
    {
      "epoch": 1.7226357167638762,
      "grad_norm": 0.4126996695995331,
      "learning_rate": 6.59791912389713e-06,
      "loss": 0.0213,
      "step": 1052620
    },
    {
      "epoch": 1.7226684472025293,
      "grad_norm": 0.45503148436546326,
      "learning_rate": 6.5978532316836134e-06,
      "loss": 0.0188,
      "step": 1052640
    },
    {
      "epoch": 1.722701177641183,
      "grad_norm": 0.1973148137331009,
      "learning_rate": 6.597787339470095e-06,
      "loss": 0.0223,
      "step": 1052660
    },
    {
      "epoch": 1.722733908079836,
      "grad_norm": 2.363004684448242,
      "learning_rate": 6.597721447256579e-06,
      "loss": 0.0197,
      "step": 1052680
    },
    {
      "epoch": 1.7227666385184894,
      "grad_norm": 0.2552014887332916,
      "learning_rate": 6.597655555043061e-06,
      "loss": 0.0203,
      "step": 1052700
    },
    {
      "epoch": 1.7227993689571428,
      "grad_norm": 0.140602245926857,
      "learning_rate": 6.597589662829544e-06,
      "loss": 0.0119,
      "step": 1052720
    },
    {
      "epoch": 1.722832099395796,
      "grad_norm": 0.9048817753791809,
      "learning_rate": 6.597523770616026e-06,
      "loss": 0.0262,
      "step": 1052740
    },
    {
      "epoch": 1.7228648298344496,
      "grad_norm": 0.5868963599205017,
      "learning_rate": 6.59745787840251e-06,
      "loss": 0.03,
      "step": 1052760
    },
    {
      "epoch": 1.7228975602731027,
      "grad_norm": 1.1876482963562012,
      "learning_rate": 6.597391986188993e-06,
      "loss": 0.0259,
      "step": 1052780
    },
    {
      "epoch": 1.722930290711756,
      "grad_norm": 0.5416037440299988,
      "learning_rate": 6.597326093975475e-06,
      "loss": 0.0121,
      "step": 1052800
    },
    {
      "epoch": 1.7229630211504094,
      "grad_norm": 0.11342739313840866,
      "learning_rate": 6.597260201761958e-06,
      "loss": 0.0212,
      "step": 1052820
    },
    {
      "epoch": 1.7229957515890628,
      "grad_norm": 1.7577489614486694,
      "learning_rate": 6.597194309548442e-06,
      "loss": 0.0325,
      "step": 1052840
    },
    {
      "epoch": 1.7230284820277162,
      "grad_norm": 0.42132455110549927,
      "learning_rate": 6.5971284173349236e-06,
      "loss": 0.0227,
      "step": 1052860
    },
    {
      "epoch": 1.7230612124663693,
      "grad_norm": 0.24999688565731049,
      "learning_rate": 6.597062525121407e-06,
      "loss": 0.0133,
      "step": 1052880
    },
    {
      "epoch": 1.723093942905023,
      "grad_norm": 0.9627630114555359,
      "learning_rate": 6.596996632907889e-06,
      "loss": 0.0173,
      "step": 1052900
    },
    {
      "epoch": 1.723126673343676,
      "grad_norm": 0.9274928569793701,
      "learning_rate": 6.596930740694373e-06,
      "loss": 0.0172,
      "step": 1052920
    },
    {
      "epoch": 1.7231594037823295,
      "grad_norm": 0.828402042388916,
      "learning_rate": 6.596864848480855e-06,
      "loss": 0.0169,
      "step": 1052940
    },
    {
      "epoch": 1.7231921342209828,
      "grad_norm": 0.19452719390392303,
      "learning_rate": 6.596798956267338e-06,
      "loss": 0.0178,
      "step": 1052960
    },
    {
      "epoch": 1.7232248646596362,
      "grad_norm": 0.8881159424781799,
      "learning_rate": 6.596733064053821e-06,
      "loss": 0.017,
      "step": 1052980
    },
    {
      "epoch": 1.7232575950982896,
      "grad_norm": 0.3093937933444977,
      "learning_rate": 6.5966671718403044e-06,
      "loss": 0.0128,
      "step": 1053000
    },
    {
      "epoch": 1.7232903255369427,
      "grad_norm": 0.5217315554618835,
      "learning_rate": 6.596601279626787e-06,
      "loss": 0.0174,
      "step": 1053020
    },
    {
      "epoch": 1.7233230559755963,
      "grad_norm": 0.8803166151046753,
      "learning_rate": 6.59653538741327e-06,
      "loss": 0.0168,
      "step": 1053040
    },
    {
      "epoch": 1.7233557864142495,
      "grad_norm": 0.2710844576358795,
      "learning_rate": 6.5964694951997535e-06,
      "loss": 0.0253,
      "step": 1053060
    },
    {
      "epoch": 1.7233885168529028,
      "grad_norm": 0.4994965195655823,
      "learning_rate": 6.596403602986235e-06,
      "loss": 0.0218,
      "step": 1053080
    },
    {
      "epoch": 1.7234212472915562,
      "grad_norm": 0.18813176453113556,
      "learning_rate": 6.596337710772719e-06,
      "loss": 0.0187,
      "step": 1053100
    },
    {
      "epoch": 1.7234539777302096,
      "grad_norm": 0.22589975595474243,
      "learning_rate": 6.596271818559201e-06,
      "loss": 0.0265,
      "step": 1053120
    },
    {
      "epoch": 1.723486708168863,
      "grad_norm": 0.22563819587230682,
      "learning_rate": 6.5962059263456845e-06,
      "loss": 0.0194,
      "step": 1053140
    },
    {
      "epoch": 1.723519438607516,
      "grad_norm": 0.48554110527038574,
      "learning_rate": 6.596140034132167e-06,
      "loss": 0.0259,
      "step": 1053160
    },
    {
      "epoch": 1.7235521690461697,
      "grad_norm": 0.4585714042186737,
      "learning_rate": 6.59607414191865e-06,
      "loss": 0.0137,
      "step": 1053180
    },
    {
      "epoch": 1.7235848994848229,
      "grad_norm": 0.25780442357063293,
      "learning_rate": 6.596008249705133e-06,
      "loss": 0.0233,
      "step": 1053200
    },
    {
      "epoch": 1.7236176299234762,
      "grad_norm": 1.0060898065567017,
      "learning_rate": 6.595942357491616e-06,
      "loss": 0.0197,
      "step": 1053220
    },
    {
      "epoch": 1.7236503603621296,
      "grad_norm": 0.8430548310279846,
      "learning_rate": 6.595876465278098e-06,
      "loss": 0.0211,
      "step": 1053240
    },
    {
      "epoch": 1.7236830908007827,
      "grad_norm": 1.152298092842102,
      "learning_rate": 6.595810573064582e-06,
      "loss": 0.0237,
      "step": 1053260
    },
    {
      "epoch": 1.7237158212394363,
      "grad_norm": 0.45305293798446655,
      "learning_rate": 6.595744680851064e-06,
      "loss": 0.0169,
      "step": 1053280
    },
    {
      "epoch": 1.7237485516780895,
      "grad_norm": 0.8352528214454651,
      "learning_rate": 6.595678788637547e-06,
      "loss": 0.0153,
      "step": 1053300
    },
    {
      "epoch": 1.723781282116743,
      "grad_norm": 0.5888050198554993,
      "learning_rate": 6.59561289642403e-06,
      "loss": 0.019,
      "step": 1053320
    },
    {
      "epoch": 1.7238140125553962,
      "grad_norm": 0.7815150618553162,
      "learning_rate": 6.595547004210513e-06,
      "loss": 0.0215,
      "step": 1053340
    },
    {
      "epoch": 1.7238467429940496,
      "grad_norm": 0.5135513544082642,
      "learning_rate": 6.595481111996996e-06,
      "loss": 0.017,
      "step": 1053360
    },
    {
      "epoch": 1.723879473432703,
      "grad_norm": 0.6317350268363953,
      "learning_rate": 6.595415219783479e-06,
      "loss": 0.0125,
      "step": 1053380
    },
    {
      "epoch": 1.7239122038713561,
      "grad_norm": 0.4890574812889099,
      "learning_rate": 6.595349327569962e-06,
      "loss": 0.0165,
      "step": 1053400
    },
    {
      "epoch": 1.7239449343100097,
      "grad_norm": 2.2694637775421143,
      "learning_rate": 6.5952834353564445e-06,
      "loss": 0.0163,
      "step": 1053420
    },
    {
      "epoch": 1.7239776647486629,
      "grad_norm": 0.05253518372774124,
      "learning_rate": 6.595217543142928e-06,
      "loss": 0.0174,
      "step": 1053440
    },
    {
      "epoch": 1.7240103951873162,
      "grad_norm": 0.6108434796333313,
      "learning_rate": 6.59515165092941e-06,
      "loss": 0.0163,
      "step": 1053460
    },
    {
      "epoch": 1.7240431256259696,
      "grad_norm": 0.27457407116889954,
      "learning_rate": 6.595085758715894e-06,
      "loss": 0.0162,
      "step": 1053480
    },
    {
      "epoch": 1.724075856064623,
      "grad_norm": 0.28301772475242615,
      "learning_rate": 6.5950198665023755e-06,
      "loss": 0.0225,
      "step": 1053500
    },
    {
      "epoch": 1.7241085865032764,
      "grad_norm": 0.7622479796409607,
      "learning_rate": 6.594953974288859e-06,
      "loss": 0.0231,
      "step": 1053520
    },
    {
      "epoch": 1.7241413169419295,
      "grad_norm": 0.3842325210571289,
      "learning_rate": 6.594888082075341e-06,
      "loss": 0.0258,
      "step": 1053540
    },
    {
      "epoch": 1.724174047380583,
      "grad_norm": 0.26144716143608093,
      "learning_rate": 6.5948221898618245e-06,
      "loss": 0.0281,
      "step": 1053560
    },
    {
      "epoch": 1.7242067778192363,
      "grad_norm": 1.083337426185608,
      "learning_rate": 6.594756297648307e-06,
      "loss": 0.0138,
      "step": 1053580
    },
    {
      "epoch": 1.7242395082578896,
      "grad_norm": 0.17077288031578064,
      "learning_rate": 6.59469040543479e-06,
      "loss": 0.0138,
      "step": 1053600
    },
    {
      "epoch": 1.724272238696543,
      "grad_norm": 0.3909366726875305,
      "learning_rate": 6.594624513221273e-06,
      "loss": 0.0168,
      "step": 1053620
    },
    {
      "epoch": 1.7243049691351964,
      "grad_norm": 0.4940807521343231,
      "learning_rate": 6.594558621007756e-06,
      "loss": 0.018,
      "step": 1053640
    },
    {
      "epoch": 1.7243376995738497,
      "grad_norm": 0.37876349687576294,
      "learning_rate": 6.594492728794238e-06,
      "loss": 0.0234,
      "step": 1053660
    },
    {
      "epoch": 1.724370430012503,
      "grad_norm": 0.12731511890888214,
      "learning_rate": 6.594426836580722e-06,
      "loss": 0.0158,
      "step": 1053680
    },
    {
      "epoch": 1.7244031604511565,
      "grad_norm": 0.46098142862319946,
      "learning_rate": 6.594360944367205e-06,
      "loss": 0.0246,
      "step": 1053700
    },
    {
      "epoch": 1.7244358908898096,
      "grad_norm": 1.378893494606018,
      "learning_rate": 6.594295052153687e-06,
      "loss": 0.0238,
      "step": 1053720
    },
    {
      "epoch": 1.724468621328463,
      "grad_norm": 0.5902942419052124,
      "learning_rate": 6.594229159940171e-06,
      "loss": 0.0125,
      "step": 1053740
    },
    {
      "epoch": 1.7245013517671164,
      "grad_norm": 0.42755651473999023,
      "learning_rate": 6.594163267726653e-06,
      "loss": 0.0233,
      "step": 1053760
    },
    {
      "epoch": 1.7245340822057698,
      "grad_norm": 0.07461024820804596,
      "learning_rate": 6.594097375513136e-06,
      "loss": 0.0203,
      "step": 1053780
    },
    {
      "epoch": 1.7245668126444231,
      "grad_norm": 0.23715394735336304,
      "learning_rate": 6.594031483299619e-06,
      "loss": 0.0118,
      "step": 1053800
    },
    {
      "epoch": 1.7245995430830763,
      "grad_norm": 0.6968924403190613,
      "learning_rate": 6.593965591086102e-06,
      "loss": 0.0177,
      "step": 1053820
    },
    {
      "epoch": 1.7246322735217299,
      "grad_norm": 0.9511936902999878,
      "learning_rate": 6.593899698872585e-06,
      "loss": 0.0166,
      "step": 1053840
    },
    {
      "epoch": 1.724665003960383,
      "grad_norm": 0.8091760873794556,
      "learning_rate": 6.593833806659068e-06,
      "loss": 0.0213,
      "step": 1053860
    },
    {
      "epoch": 1.7246977343990364,
      "grad_norm": 0.5483693480491638,
      "learning_rate": 6.59376791444555e-06,
      "loss": 0.0188,
      "step": 1053880
    },
    {
      "epoch": 1.7247304648376898,
      "grad_norm": 0.5485403537750244,
      "learning_rate": 6.593702022232034e-06,
      "loss": 0.0148,
      "step": 1053900
    },
    {
      "epoch": 1.7247631952763431,
      "grad_norm": 0.16172179579734802,
      "learning_rate": 6.5936361300185156e-06,
      "loss": 0.0161,
      "step": 1053920
    },
    {
      "epoch": 1.7247959257149965,
      "grad_norm": 0.2934762239456177,
      "learning_rate": 6.593570237804999e-06,
      "loss": 0.0265,
      "step": 1053940
    },
    {
      "epoch": 1.7248286561536497,
      "grad_norm": 0.2996895909309387,
      "learning_rate": 6.593504345591482e-06,
      "loss": 0.0115,
      "step": 1053960
    },
    {
      "epoch": 1.7248613865923033,
      "grad_norm": 0.363316148519516,
      "learning_rate": 6.593438453377965e-06,
      "loss": 0.0151,
      "step": 1053980
    },
    {
      "epoch": 1.7248941170309564,
      "grad_norm": 0.11952708661556244,
      "learning_rate": 6.593372561164447e-06,
      "loss": 0.0192,
      "step": 1054000
    },
    {
      "epoch": 1.7249268474696098,
      "grad_norm": 0.25554344058036804,
      "learning_rate": 6.593306668950931e-06,
      "loss": 0.0195,
      "step": 1054020
    },
    {
      "epoch": 1.7249595779082632,
      "grad_norm": 0.28915244340896606,
      "learning_rate": 6.593240776737414e-06,
      "loss": 0.0151,
      "step": 1054040
    },
    {
      "epoch": 1.7249923083469163,
      "grad_norm": 0.3784928619861603,
      "learning_rate": 6.5931748845238964e-06,
      "loss": 0.0145,
      "step": 1054060
    },
    {
      "epoch": 1.72502503878557,
      "grad_norm": 0.1443607360124588,
      "learning_rate": 6.59310899231038e-06,
      "loss": 0.0227,
      "step": 1054080
    },
    {
      "epoch": 1.725057769224223,
      "grad_norm": 0.13813799619674683,
      "learning_rate": 6.593043100096862e-06,
      "loss": 0.0256,
      "step": 1054100
    },
    {
      "epoch": 1.7250904996628766,
      "grad_norm": 0.32303014397621155,
      "learning_rate": 6.5929772078833455e-06,
      "loss": 0.0229,
      "step": 1054120
    },
    {
      "epoch": 1.7251232301015298,
      "grad_norm": 0.8833506107330322,
      "learning_rate": 6.592911315669827e-06,
      "loss": 0.0161,
      "step": 1054140
    },
    {
      "epoch": 1.7251559605401832,
      "grad_norm": 1.0079301595687866,
      "learning_rate": 6.592845423456311e-06,
      "loss": 0.0165,
      "step": 1054160
    },
    {
      "epoch": 1.7251886909788365,
      "grad_norm": 0.6391900181770325,
      "learning_rate": 6.592779531242794e-06,
      "loss": 0.0181,
      "step": 1054180
    },
    {
      "epoch": 1.7252214214174897,
      "grad_norm": 0.3974337875843048,
      "learning_rate": 6.5927136390292765e-06,
      "loss": 0.0132,
      "step": 1054200
    },
    {
      "epoch": 1.7252541518561433,
      "grad_norm": 1.2240928411483765,
      "learning_rate": 6.592647746815759e-06,
      "loss": 0.0185,
      "step": 1054220
    },
    {
      "epoch": 1.7252868822947964,
      "grad_norm": 0.30719345808029175,
      "learning_rate": 6.592581854602243e-06,
      "loss": 0.02,
      "step": 1054240
    },
    {
      "epoch": 1.7253196127334498,
      "grad_norm": 0.8687161207199097,
      "learning_rate": 6.592515962388725e-06,
      "loss": 0.0235,
      "step": 1054260
    },
    {
      "epoch": 1.7253523431721032,
      "grad_norm": 0.1663871854543686,
      "learning_rate": 6.592450070175208e-06,
      "loss": 0.0192,
      "step": 1054280
    },
    {
      "epoch": 1.7253850736107565,
      "grad_norm": 1.0711283683776855,
      "learning_rate": 6.59238417796169e-06,
      "loss": 0.0334,
      "step": 1054300
    },
    {
      "epoch": 1.72541780404941,
      "grad_norm": 0.3754766881465912,
      "learning_rate": 6.592318285748174e-06,
      "loss": 0.0172,
      "step": 1054320
    },
    {
      "epoch": 1.725450534488063,
      "grad_norm": 1.0038912296295166,
      "learning_rate": 6.5922523935346565e-06,
      "loss": 0.019,
      "step": 1054340
    },
    {
      "epoch": 1.7254832649267167,
      "grad_norm": 4.441783428192139,
      "learning_rate": 6.592186501321139e-06,
      "loss": 0.0139,
      "step": 1054360
    },
    {
      "epoch": 1.7255159953653698,
      "grad_norm": 0.2378918081521988,
      "learning_rate": 6.592120609107622e-06,
      "loss": 0.0184,
      "step": 1054380
    },
    {
      "epoch": 1.7255487258040232,
      "grad_norm": 0.6671803593635559,
      "learning_rate": 6.5920547168941055e-06,
      "loss": 0.0263,
      "step": 1054400
    },
    {
      "epoch": 1.7255814562426766,
      "grad_norm": 0.30993223190307617,
      "learning_rate": 6.591988824680588e-06,
      "loss": 0.0189,
      "step": 1054420
    },
    {
      "epoch": 1.72561418668133,
      "grad_norm": 0.1794787347316742,
      "learning_rate": 6.591922932467071e-06,
      "loss": 0.0242,
      "step": 1054440
    },
    {
      "epoch": 1.7256469171199833,
      "grad_norm": 0.7526696920394897,
      "learning_rate": 6.591857040253555e-06,
      "loss": 0.0232,
      "step": 1054460
    },
    {
      "epoch": 1.7256796475586365,
      "grad_norm": 0.553882896900177,
      "learning_rate": 6.5917911480400365e-06,
      "loss": 0.0151,
      "step": 1054480
    },
    {
      "epoch": 1.72571237799729,
      "grad_norm": 1.463725209236145,
      "learning_rate": 6.59172525582652e-06,
      "loss": 0.0184,
      "step": 1054500
    },
    {
      "epoch": 1.7257451084359432,
      "grad_norm": 0.28101271390914917,
      "learning_rate": 6.591659363613002e-06,
      "loss": 0.0164,
      "step": 1054520
    },
    {
      "epoch": 1.7257778388745966,
      "grad_norm": 0.4781753122806549,
      "learning_rate": 6.5915934713994856e-06,
      "loss": 0.0232,
      "step": 1054540
    },
    {
      "epoch": 1.72581056931325,
      "grad_norm": 0.5731159448623657,
      "learning_rate": 6.5915275791859675e-06,
      "loss": 0.0198,
      "step": 1054560
    },
    {
      "epoch": 1.7258432997519033,
      "grad_norm": 0.5862336158752441,
      "learning_rate": 6.591461686972451e-06,
      "loss": 0.0289,
      "step": 1054580
    },
    {
      "epoch": 1.7258760301905567,
      "grad_norm": 0.202509343624115,
      "learning_rate": 6.591395794758934e-06,
      "loss": 0.0232,
      "step": 1054600
    },
    {
      "epoch": 1.7259087606292098,
      "grad_norm": 0.710016131401062,
      "learning_rate": 6.5913299025454165e-06,
      "loss": 0.0173,
      "step": 1054620
    },
    {
      "epoch": 1.7259414910678634,
      "grad_norm": 0.2579071819782257,
      "learning_rate": 6.591264010331899e-06,
      "loss": 0.0168,
      "step": 1054640
    },
    {
      "epoch": 1.7259742215065166,
      "grad_norm": 0.2410183995962143,
      "learning_rate": 6.591198118118383e-06,
      "loss": 0.0217,
      "step": 1054660
    },
    {
      "epoch": 1.72600695194517,
      "grad_norm": 0.36020541191101074,
      "learning_rate": 6.591132225904865e-06,
      "loss": 0.0234,
      "step": 1054680
    },
    {
      "epoch": 1.7260396823838233,
      "grad_norm": 0.5344142317771912,
      "learning_rate": 6.591066333691348e-06,
      "loss": 0.0189,
      "step": 1054700
    },
    {
      "epoch": 1.7260724128224767,
      "grad_norm": 0.6546123623847961,
      "learning_rate": 6.59100044147783e-06,
      "loss": 0.0192,
      "step": 1054720
    },
    {
      "epoch": 1.72610514326113,
      "grad_norm": 0.5357767343521118,
      "learning_rate": 6.590934549264314e-06,
      "loss": 0.0203,
      "step": 1054740
    },
    {
      "epoch": 1.7261378736997832,
      "grad_norm": 0.3484318256378174,
      "learning_rate": 6.590868657050797e-06,
      "loss": 0.0165,
      "step": 1054760
    },
    {
      "epoch": 1.7261706041384368,
      "grad_norm": 0.386456161737442,
      "learning_rate": 6.590802764837279e-06,
      "loss": 0.0186,
      "step": 1054780
    },
    {
      "epoch": 1.72620333457709,
      "grad_norm": 0.21880468726158142,
      "learning_rate": 6.590736872623763e-06,
      "loss": 0.0169,
      "step": 1054800
    },
    {
      "epoch": 1.7262360650157433,
      "grad_norm": 0.2303757518529892,
      "learning_rate": 6.590670980410246e-06,
      "loss": 0.0185,
      "step": 1054820
    },
    {
      "epoch": 1.7262687954543967,
      "grad_norm": 0.18074741959571838,
      "learning_rate": 6.590605088196728e-06,
      "loss": 0.0217,
      "step": 1054840
    },
    {
      "epoch": 1.7263015258930499,
      "grad_norm": 0.36409056186676025,
      "learning_rate": 6.590539195983211e-06,
      "loss": 0.013,
      "step": 1054860
    },
    {
      "epoch": 1.7263342563317035,
      "grad_norm": 0.6430460810661316,
      "learning_rate": 6.590473303769695e-06,
      "loss": 0.0285,
      "step": 1054880
    },
    {
      "epoch": 1.7263669867703566,
      "grad_norm": 0.4069638252258301,
      "learning_rate": 6.590407411556177e-06,
      "loss": 0.0197,
      "step": 1054900
    },
    {
      "epoch": 1.7263997172090102,
      "grad_norm": 0.36831557750701904,
      "learning_rate": 6.59034151934266e-06,
      "loss": 0.0224,
      "step": 1054920
    },
    {
      "epoch": 1.7264324476476633,
      "grad_norm": 1.9888066053390503,
      "learning_rate": 6.590275627129142e-06,
      "loss": 0.0152,
      "step": 1054940
    },
    {
      "epoch": 1.7264651780863167,
      "grad_norm": 0.25275760889053345,
      "learning_rate": 6.590209734915626e-06,
      "loss": 0.0145,
      "step": 1054960
    },
    {
      "epoch": 1.72649790852497,
      "grad_norm": 0.13064627349376678,
      "learning_rate": 6.590143842702108e-06,
      "loss": 0.0199,
      "step": 1054980
    },
    {
      "epoch": 1.7265306389636232,
      "grad_norm": 0.26879599690437317,
      "learning_rate": 6.590077950488591e-06,
      "loss": 0.0167,
      "step": 1055000
    },
    {
      "epoch": 1.7265633694022768,
      "grad_norm": 0.9575691223144531,
      "learning_rate": 6.590012058275074e-06,
      "loss": 0.019,
      "step": 1055020
    },
    {
      "epoch": 1.72659609984093,
      "grad_norm": 0.9629245400428772,
      "learning_rate": 6.5899461660615575e-06,
      "loss": 0.0208,
      "step": 1055040
    },
    {
      "epoch": 1.7266288302795834,
      "grad_norm": 0.7085360884666443,
      "learning_rate": 6.589880273848039e-06,
      "loss": 0.0252,
      "step": 1055060
    },
    {
      "epoch": 1.7266615607182367,
      "grad_norm": 1.0304515361785889,
      "learning_rate": 6.589814381634523e-06,
      "loss": 0.0297,
      "step": 1055080
    },
    {
      "epoch": 1.72669429115689,
      "grad_norm": 0.555115282535553,
      "learning_rate": 6.5897484894210065e-06,
      "loss": 0.015,
      "step": 1055100
    },
    {
      "epoch": 1.7267270215955435,
      "grad_norm": 0.746008574962616,
      "learning_rate": 6.589682597207488e-06,
      "loss": 0.0242,
      "step": 1055120
    },
    {
      "epoch": 1.7267597520341966,
      "grad_norm": 0.7043073177337646,
      "learning_rate": 6.589616704993972e-06,
      "loss": 0.0173,
      "step": 1055140
    },
    {
      "epoch": 1.7267924824728502,
      "grad_norm": 0.07330118119716644,
      "learning_rate": 6.589550812780454e-06,
      "loss": 0.0175,
      "step": 1055160
    },
    {
      "epoch": 1.7268252129115034,
      "grad_norm": 0.03125021979212761,
      "learning_rate": 6.5894849205669375e-06,
      "loss": 0.0198,
      "step": 1055180
    },
    {
      "epoch": 1.7268579433501567,
      "grad_norm": 1.1245794296264648,
      "learning_rate": 6.58941902835342e-06,
      "loss": 0.0226,
      "step": 1055200
    },
    {
      "epoch": 1.7268906737888101,
      "grad_norm": 0.4458950459957123,
      "learning_rate": 6.589353136139903e-06,
      "loss": 0.026,
      "step": 1055220
    },
    {
      "epoch": 1.7269234042274635,
      "grad_norm": 0.3115106225013733,
      "learning_rate": 6.589287243926386e-06,
      "loss": 0.0265,
      "step": 1055240
    },
    {
      "epoch": 1.7269561346661169,
      "grad_norm": 1.2835725545883179,
      "learning_rate": 6.589221351712869e-06,
      "loss": 0.0189,
      "step": 1055260
    },
    {
      "epoch": 1.72698886510477,
      "grad_norm": 0.6700414419174194,
      "learning_rate": 6.589155459499351e-06,
      "loss": 0.0257,
      "step": 1055280
    },
    {
      "epoch": 1.7270215955434236,
      "grad_norm": 0.9448843002319336,
      "learning_rate": 6.589089567285835e-06,
      "loss": 0.0184,
      "step": 1055300
    },
    {
      "epoch": 1.7270543259820768,
      "grad_norm": 0.8786872625350952,
      "learning_rate": 6.589023675072317e-06,
      "loss": 0.0205,
      "step": 1055320
    },
    {
      "epoch": 1.7270870564207301,
      "grad_norm": 0.09919662028551102,
      "learning_rate": 6.5889577828588e-06,
      "loss": 0.0134,
      "step": 1055340
    },
    {
      "epoch": 1.7271197868593835,
      "grad_norm": 1.103635311126709,
      "learning_rate": 6.588891890645283e-06,
      "loss": 0.0219,
      "step": 1055360
    },
    {
      "epoch": 1.7271525172980369,
      "grad_norm": 0.7999851703643799,
      "learning_rate": 6.588825998431766e-06,
      "loss": 0.0232,
      "step": 1055380
    },
    {
      "epoch": 1.7271852477366902,
      "grad_norm": 1.569771647453308,
      "learning_rate": 6.5887601062182485e-06,
      "loss": 0.0197,
      "step": 1055400
    },
    {
      "epoch": 1.7272179781753434,
      "grad_norm": 2.00809645652771,
      "learning_rate": 6.588694214004732e-06,
      "loss": 0.0168,
      "step": 1055420
    },
    {
      "epoch": 1.727250708613997,
      "grad_norm": 0.3893246054649353,
      "learning_rate": 6.588628321791214e-06,
      "loss": 0.018,
      "step": 1055440
    },
    {
      "epoch": 1.7272834390526501,
      "grad_norm": 0.13010412454605103,
      "learning_rate": 6.5885624295776975e-06,
      "loss": 0.0153,
      "step": 1055460
    },
    {
      "epoch": 1.7273161694913035,
      "grad_norm": 0.5376077890396118,
      "learning_rate": 6.588496537364181e-06,
      "loss": 0.0239,
      "step": 1055480
    },
    {
      "epoch": 1.7273488999299569,
      "grad_norm": 0.6349525451660156,
      "learning_rate": 6.588430645150663e-06,
      "loss": 0.0216,
      "step": 1055500
    },
    {
      "epoch": 1.7273816303686103,
      "grad_norm": 0.6348690390586853,
      "learning_rate": 6.588364752937147e-06,
      "loss": 0.0223,
      "step": 1055520
    },
    {
      "epoch": 1.7274143608072636,
      "grad_norm": 0.5580123066902161,
      "learning_rate": 6.5882988607236285e-06,
      "loss": 0.017,
      "step": 1055540
    },
    {
      "epoch": 1.7274470912459168,
      "grad_norm": 1.29666268825531,
      "learning_rate": 6.588232968510112e-06,
      "loss": 0.0182,
      "step": 1055560
    },
    {
      "epoch": 1.7274798216845704,
      "grad_norm": 0.41682523488998413,
      "learning_rate": 6.588167076296594e-06,
      "loss": 0.0136,
      "step": 1055580
    },
    {
      "epoch": 1.7275125521232235,
      "grad_norm": 1.978682279586792,
      "learning_rate": 6.5881011840830776e-06,
      "loss": 0.0199,
      "step": 1055600
    },
    {
      "epoch": 1.727545282561877,
      "grad_norm": 0.8188827633857727,
      "learning_rate": 6.58803529186956e-06,
      "loss": 0.0187,
      "step": 1055620
    },
    {
      "epoch": 1.7275780130005303,
      "grad_norm": 0.718050479888916,
      "learning_rate": 6.587969399656043e-06,
      "loss": 0.0154,
      "step": 1055640
    },
    {
      "epoch": 1.7276107434391834,
      "grad_norm": 0.54692143201828,
      "learning_rate": 6.587903507442526e-06,
      "loss": 0.0133,
      "step": 1055660
    },
    {
      "epoch": 1.727643473877837,
      "grad_norm": 0.4958561956882477,
      "learning_rate": 6.587837615229009e-06,
      "loss": 0.0259,
      "step": 1055680
    },
    {
      "epoch": 1.7276762043164902,
      "grad_norm": 0.29715490341186523,
      "learning_rate": 6.587771723015491e-06,
      "loss": 0.0148,
      "step": 1055700
    },
    {
      "epoch": 1.7277089347551438,
      "grad_norm": 0.53795325756073,
      "learning_rate": 6.587705830801975e-06,
      "loss": 0.0163,
      "step": 1055720
    },
    {
      "epoch": 1.727741665193797,
      "grad_norm": 0.37196850776672363,
      "learning_rate": 6.587639938588457e-06,
      "loss": 0.0153,
      "step": 1055740
    },
    {
      "epoch": 1.7277743956324503,
      "grad_norm": 0.4804685711860657,
      "learning_rate": 6.58757404637494e-06,
      "loss": 0.0209,
      "step": 1055760
    },
    {
      "epoch": 1.7278071260711036,
      "grad_norm": 0.18472741544246674,
      "learning_rate": 6.587508154161423e-06,
      "loss": 0.0153,
      "step": 1055780
    },
    {
      "epoch": 1.7278398565097568,
      "grad_norm": 0.2386646419763565,
      "learning_rate": 6.587442261947906e-06,
      "loss": 0.0207,
      "step": 1055800
    },
    {
      "epoch": 1.7278725869484104,
      "grad_norm": 0.3261600136756897,
      "learning_rate": 6.587376369734389e-06,
      "loss": 0.0173,
      "step": 1055820
    },
    {
      "epoch": 1.7279053173870635,
      "grad_norm": 0.26632261276245117,
      "learning_rate": 6.587310477520872e-06,
      "loss": 0.0194,
      "step": 1055840
    },
    {
      "epoch": 1.727938047825717,
      "grad_norm": 0.06644453853368759,
      "learning_rate": 6.587244585307355e-06,
      "loss": 0.0108,
      "step": 1055860
    },
    {
      "epoch": 1.7279707782643703,
      "grad_norm": 1.0010666847229004,
      "learning_rate": 6.587178693093838e-06,
      "loss": 0.0176,
      "step": 1055880
    },
    {
      "epoch": 1.7280035087030237,
      "grad_norm": 0.3854620158672333,
      "learning_rate": 6.587112800880321e-06,
      "loss": 0.0171,
      "step": 1055900
    },
    {
      "epoch": 1.728036239141677,
      "grad_norm": 1.2977805137634277,
      "learning_rate": 6.587046908666803e-06,
      "loss": 0.0113,
      "step": 1055920
    },
    {
      "epoch": 1.7280689695803302,
      "grad_norm": 1.245787501335144,
      "learning_rate": 6.586981016453287e-06,
      "loss": 0.0235,
      "step": 1055940
    },
    {
      "epoch": 1.7281017000189838,
      "grad_norm": 0.9819181561470032,
      "learning_rate": 6.5869151242397686e-06,
      "loss": 0.0163,
      "step": 1055960
    },
    {
      "epoch": 1.728134430457637,
      "grad_norm": 0.4692257344722748,
      "learning_rate": 6.586849232026252e-06,
      "loss": 0.0191,
      "step": 1055980
    },
    {
      "epoch": 1.7281671608962903,
      "grad_norm": 1.2897520065307617,
      "learning_rate": 6.586783339812735e-06,
      "loss": 0.0185,
      "step": 1056000
    },
    {
      "epoch": 1.7281998913349437,
      "grad_norm": 0.27441149950027466,
      "learning_rate": 6.586717447599218e-06,
      "loss": 0.0172,
      "step": 1056020
    },
    {
      "epoch": 1.728232621773597,
      "grad_norm": 0.3466726839542389,
      "learning_rate": 6.5866515553857e-06,
      "loss": 0.0192,
      "step": 1056040
    },
    {
      "epoch": 1.7282653522122504,
      "grad_norm": 0.36693036556243896,
      "learning_rate": 6.586585663172184e-06,
      "loss": 0.0298,
      "step": 1056060
    },
    {
      "epoch": 1.7282980826509036,
      "grad_norm": 0.32646870613098145,
      "learning_rate": 6.586519770958666e-06,
      "loss": 0.0178,
      "step": 1056080
    },
    {
      "epoch": 1.7283308130895572,
      "grad_norm": 1.6280380487442017,
      "learning_rate": 6.5864538787451494e-06,
      "loss": 0.019,
      "step": 1056100
    },
    {
      "epoch": 1.7283635435282103,
      "grad_norm": 0.1811230331659317,
      "learning_rate": 6.586387986531631e-06,
      "loss": 0.0151,
      "step": 1056120
    },
    {
      "epoch": 1.7283962739668637,
      "grad_norm": 0.6156013607978821,
      "learning_rate": 6.586322094318115e-06,
      "loss": 0.0191,
      "step": 1056140
    },
    {
      "epoch": 1.728429004405517,
      "grad_norm": 0.18257078528404236,
      "learning_rate": 6.5862562021045985e-06,
      "loss": 0.0187,
      "step": 1056160
    },
    {
      "epoch": 1.7284617348441704,
      "grad_norm": 0.4429949223995209,
      "learning_rate": 6.58619030989108e-06,
      "loss": 0.0214,
      "step": 1056180
    },
    {
      "epoch": 1.7284944652828238,
      "grad_norm": 1.3016550540924072,
      "learning_rate": 6.586124417677564e-06,
      "loss": 0.0254,
      "step": 1056200
    },
    {
      "epoch": 1.728527195721477,
      "grad_norm": 0.36130133271217346,
      "learning_rate": 6.586058525464047e-06,
      "loss": 0.0253,
      "step": 1056220
    },
    {
      "epoch": 1.7285599261601305,
      "grad_norm": 0.535467803478241,
      "learning_rate": 6.5859926332505295e-06,
      "loss": 0.015,
      "step": 1056240
    },
    {
      "epoch": 1.7285926565987837,
      "grad_norm": 0.6141761541366577,
      "learning_rate": 6.585926741037012e-06,
      "loss": 0.0156,
      "step": 1056260
    },
    {
      "epoch": 1.728625387037437,
      "grad_norm": 0.3098239302635193,
      "learning_rate": 6.585860848823496e-06,
      "loss": 0.0191,
      "step": 1056280
    },
    {
      "epoch": 1.7286581174760904,
      "grad_norm": 0.545627772808075,
      "learning_rate": 6.585794956609978e-06,
      "loss": 0.0158,
      "step": 1056300
    },
    {
      "epoch": 1.7286908479147436,
      "grad_norm": 0.672105610370636,
      "learning_rate": 6.585729064396461e-06,
      "loss": 0.0237,
      "step": 1056320
    },
    {
      "epoch": 1.7287235783533972,
      "grad_norm": 0.5372036099433899,
      "learning_rate": 6.585663172182943e-06,
      "loss": 0.0248,
      "step": 1056340
    },
    {
      "epoch": 1.7287563087920503,
      "grad_norm": 0.4501478374004364,
      "learning_rate": 6.585597279969427e-06,
      "loss": 0.0193,
      "step": 1056360
    },
    {
      "epoch": 1.728789039230704,
      "grad_norm": 0.694076418876648,
      "learning_rate": 6.585531387755909e-06,
      "loss": 0.0193,
      "step": 1056380
    },
    {
      "epoch": 1.728821769669357,
      "grad_norm": 0.6893569827079773,
      "learning_rate": 6.585465495542392e-06,
      "loss": 0.0196,
      "step": 1056400
    },
    {
      "epoch": 1.7288545001080105,
      "grad_norm": 1.046393632888794,
      "learning_rate": 6.585399603328875e-06,
      "loss": 0.0209,
      "step": 1056420
    },
    {
      "epoch": 1.7288872305466638,
      "grad_norm": 0.49164411425590515,
      "learning_rate": 6.5853337111153586e-06,
      "loss": 0.0191,
      "step": 1056440
    },
    {
      "epoch": 1.728919960985317,
      "grad_norm": 0.29313310980796814,
      "learning_rate": 6.5852678189018404e-06,
      "loss": 0.0207,
      "step": 1056460
    },
    {
      "epoch": 1.7289526914239706,
      "grad_norm": 0.12925823032855988,
      "learning_rate": 6.585201926688324e-06,
      "loss": 0.0216,
      "step": 1056480
    },
    {
      "epoch": 1.7289854218626237,
      "grad_norm": 0.42828524112701416,
      "learning_rate": 6.585136034474808e-06,
      "loss": 0.0261,
      "step": 1056500
    },
    {
      "epoch": 1.729018152301277,
      "grad_norm": 0.3266483545303345,
      "learning_rate": 6.5850701422612895e-06,
      "loss": 0.0201,
      "step": 1056520
    },
    {
      "epoch": 1.7290508827399305,
      "grad_norm": 0.6415314078330994,
      "learning_rate": 6.585004250047773e-06,
      "loss": 0.0198,
      "step": 1056540
    },
    {
      "epoch": 1.7290836131785838,
      "grad_norm": 0.20012439787387848,
      "learning_rate": 6.584938357834255e-06,
      "loss": 0.0174,
      "step": 1056560
    },
    {
      "epoch": 1.7291163436172372,
      "grad_norm": 2.900928497314453,
      "learning_rate": 6.5848724656207386e-06,
      "loss": 0.0249,
      "step": 1056580
    },
    {
      "epoch": 1.7291490740558904,
      "grad_norm": 0.13678251206874847,
      "learning_rate": 6.5848065734072205e-06,
      "loss": 0.0199,
      "step": 1056600
    },
    {
      "epoch": 1.729181804494544,
      "grad_norm": 0.6773946285247803,
      "learning_rate": 6.584740681193704e-06,
      "loss": 0.0196,
      "step": 1056620
    },
    {
      "epoch": 1.729214534933197,
      "grad_norm": 0.9587275981903076,
      "learning_rate": 6.584674788980187e-06,
      "loss": 0.0186,
      "step": 1056640
    },
    {
      "epoch": 1.7292472653718505,
      "grad_norm": 0.3292875587940216,
      "learning_rate": 6.5846088967666695e-06,
      "loss": 0.0193,
      "step": 1056660
    },
    {
      "epoch": 1.7292799958105038,
      "grad_norm": 0.13300587236881256,
      "learning_rate": 6.584543004553152e-06,
      "loss": 0.0126,
      "step": 1056680
    },
    {
      "epoch": 1.7293127262491572,
      "grad_norm": 0.6431484222412109,
      "learning_rate": 6.584477112339636e-06,
      "loss": 0.0195,
      "step": 1056700
    },
    {
      "epoch": 1.7293454566878106,
      "grad_norm": 1.3239386081695557,
      "learning_rate": 6.584411220126118e-06,
      "loss": 0.0222,
      "step": 1056720
    },
    {
      "epoch": 1.7293781871264637,
      "grad_norm": 0.6350487470626831,
      "learning_rate": 6.584345327912601e-06,
      "loss": 0.0178,
      "step": 1056740
    },
    {
      "epoch": 1.7294109175651173,
      "grad_norm": 0.1560397744178772,
      "learning_rate": 6.584279435699083e-06,
      "loss": 0.0167,
      "step": 1056760
    },
    {
      "epoch": 1.7294436480037705,
      "grad_norm": 0.27275145053863525,
      "learning_rate": 6.584213543485567e-06,
      "loss": 0.0218,
      "step": 1056780
    },
    {
      "epoch": 1.7294763784424239,
      "grad_norm": 0.4310761094093323,
      "learning_rate": 6.5841476512720496e-06,
      "loss": 0.0125,
      "step": 1056800
    },
    {
      "epoch": 1.7295091088810772,
      "grad_norm": 0.23364324867725372,
      "learning_rate": 6.584081759058532e-06,
      "loss": 0.0151,
      "step": 1056820
    },
    {
      "epoch": 1.7295418393197306,
      "grad_norm": 0.8874908685684204,
      "learning_rate": 6.584015866845015e-06,
      "loss": 0.0279,
      "step": 1056840
    },
    {
      "epoch": 1.729574569758384,
      "grad_norm": 0.3016083836555481,
      "learning_rate": 6.583949974631499e-06,
      "loss": 0.0301,
      "step": 1056860
    },
    {
      "epoch": 1.7296073001970371,
      "grad_norm": 0.678671658039093,
      "learning_rate": 6.583884082417981e-06,
      "loss": 0.0148,
      "step": 1056880
    },
    {
      "epoch": 1.7296400306356907,
      "grad_norm": 0.7799248695373535,
      "learning_rate": 6.583818190204464e-06,
      "loss": 0.0164,
      "step": 1056900
    },
    {
      "epoch": 1.7296727610743439,
      "grad_norm": 0.23985683917999268,
      "learning_rate": 6.583752297990948e-06,
      "loss": 0.0205,
      "step": 1056920
    },
    {
      "epoch": 1.7297054915129972,
      "grad_norm": 0.47149398922920227,
      "learning_rate": 6.58368640577743e-06,
      "loss": 0.02,
      "step": 1056940
    },
    {
      "epoch": 1.7297382219516506,
      "grad_norm": 0.18182647228240967,
      "learning_rate": 6.583620513563913e-06,
      "loss": 0.0209,
      "step": 1056960
    },
    {
      "epoch": 1.729770952390304,
      "grad_norm": 0.3384559154510498,
      "learning_rate": 6.583554621350395e-06,
      "loss": 0.0162,
      "step": 1056980
    },
    {
      "epoch": 1.7298036828289574,
      "grad_norm": 0.49446699023246765,
      "learning_rate": 6.583488729136879e-06,
      "loss": 0.0112,
      "step": 1057000
    },
    {
      "epoch": 1.7298364132676105,
      "grad_norm": 0.11701496690511703,
      "learning_rate": 6.583422836923361e-06,
      "loss": 0.0127,
      "step": 1057020
    },
    {
      "epoch": 1.729869143706264,
      "grad_norm": 0.27394115924835205,
      "learning_rate": 6.583356944709844e-06,
      "loss": 0.0243,
      "step": 1057040
    },
    {
      "epoch": 1.7299018741449173,
      "grad_norm": 0.23499934375286102,
      "learning_rate": 6.583291052496327e-06,
      "loss": 0.0183,
      "step": 1057060
    },
    {
      "epoch": 1.7299346045835706,
      "grad_norm": 0.5365070104598999,
      "learning_rate": 6.5832251602828105e-06,
      "loss": 0.0235,
      "step": 1057080
    },
    {
      "epoch": 1.729967335022224,
      "grad_norm": 0.36691316962242126,
      "learning_rate": 6.583159268069292e-06,
      "loss": 0.0213,
      "step": 1057100
    },
    {
      "epoch": 1.7300000654608771,
      "grad_norm": 0.5257177948951721,
      "learning_rate": 6.583093375855776e-06,
      "loss": 0.013,
      "step": 1057120
    },
    {
      "epoch": 1.7300327958995307,
      "grad_norm": 0.2619204521179199,
      "learning_rate": 6.583027483642258e-06,
      "loss": 0.014,
      "step": 1057140
    },
    {
      "epoch": 1.730065526338184,
      "grad_norm": 0.35206139087677,
      "learning_rate": 6.582961591428741e-06,
      "loss": 0.0136,
      "step": 1057160
    },
    {
      "epoch": 1.7300982567768375,
      "grad_norm": 0.8663806915283203,
      "learning_rate": 6.582895699215224e-06,
      "loss": 0.0155,
      "step": 1057180
    },
    {
      "epoch": 1.7301309872154906,
      "grad_norm": 0.7836209535598755,
      "learning_rate": 6.582829807001707e-06,
      "loss": 0.0134,
      "step": 1057200
    },
    {
      "epoch": 1.730163717654144,
      "grad_norm": 0.43965235352516174,
      "learning_rate": 6.5827639147881905e-06,
      "loss": 0.0205,
      "step": 1057220
    },
    {
      "epoch": 1.7301964480927974,
      "grad_norm": 0.8114961385726929,
      "learning_rate": 6.582698022574673e-06,
      "loss": 0.0221,
      "step": 1057240
    },
    {
      "epoch": 1.7302291785314505,
      "grad_norm": 0.6682285666465759,
      "learning_rate": 6.582632130361156e-06,
      "loss": 0.02,
      "step": 1057260
    },
    {
      "epoch": 1.7302619089701041,
      "grad_norm": 0.5999786853790283,
      "learning_rate": 6.582566238147639e-06,
      "loss": 0.0125,
      "step": 1057280
    },
    {
      "epoch": 1.7302946394087573,
      "grad_norm": 0.6508513689041138,
      "learning_rate": 6.582500345934122e-06,
      "loss": 0.0156,
      "step": 1057300
    },
    {
      "epoch": 1.7303273698474106,
      "grad_norm": 0.7121164202690125,
      "learning_rate": 6.582434453720604e-06,
      "loss": 0.0172,
      "step": 1057320
    },
    {
      "epoch": 1.730360100286064,
      "grad_norm": 0.4952561855316162,
      "learning_rate": 6.582368561507088e-06,
      "loss": 0.0146,
      "step": 1057340
    },
    {
      "epoch": 1.7303928307247174,
      "grad_norm": 1.0420646667480469,
      "learning_rate": 6.58230266929357e-06,
      "loss": 0.0212,
      "step": 1057360
    },
    {
      "epoch": 1.7304255611633708,
      "grad_norm": 1.1471055746078491,
      "learning_rate": 6.582236777080053e-06,
      "loss": 0.0135,
      "step": 1057380
    },
    {
      "epoch": 1.730458291602024,
      "grad_norm": 0.5492916703224182,
      "learning_rate": 6.582170884866535e-06,
      "loss": 0.0159,
      "step": 1057400
    },
    {
      "epoch": 1.7304910220406775,
      "grad_norm": 1.1212341785430908,
      "learning_rate": 6.582104992653019e-06,
      "loss": 0.0271,
      "step": 1057420
    },
    {
      "epoch": 1.7305237524793307,
      "grad_norm": 0.42534464597702026,
      "learning_rate": 6.5820391004395015e-06,
      "loss": 0.0167,
      "step": 1057440
    },
    {
      "epoch": 1.730556482917984,
      "grad_norm": 0.3212800920009613,
      "learning_rate": 6.581973208225984e-06,
      "loss": 0.021,
      "step": 1057460
    },
    {
      "epoch": 1.7305892133566374,
      "grad_norm": 0.6021885275840759,
      "learning_rate": 6.581907316012467e-06,
      "loss": 0.0169,
      "step": 1057480
    },
    {
      "epoch": 1.7306219437952908,
      "grad_norm": 2.157785177230835,
      "learning_rate": 6.5818414237989505e-06,
      "loss": 0.0213,
      "step": 1057500
    },
    {
      "epoch": 1.7306546742339441,
      "grad_norm": 0.4889359474182129,
      "learning_rate": 6.5817755315854324e-06,
      "loss": 0.0259,
      "step": 1057520
    },
    {
      "epoch": 1.7306874046725973,
      "grad_norm": 0.23639097809791565,
      "learning_rate": 6.581709639371916e-06,
      "loss": 0.0178,
      "step": 1057540
    },
    {
      "epoch": 1.730720135111251,
      "grad_norm": 0.8084880113601685,
      "learning_rate": 6.5816437471584e-06,
      "loss": 0.0108,
      "step": 1057560
    },
    {
      "epoch": 1.730752865549904,
      "grad_norm": 0.48895561695098877,
      "learning_rate": 6.5815778549448815e-06,
      "loss": 0.0176,
      "step": 1057580
    },
    {
      "epoch": 1.7307855959885574,
      "grad_norm": 0.45840728282928467,
      "learning_rate": 6.581511962731365e-06,
      "loss": 0.0139,
      "step": 1057600
    },
    {
      "epoch": 1.7308183264272108,
      "grad_norm": 0.5602332353591919,
      "learning_rate": 6.581446070517847e-06,
      "loss": 0.0201,
      "step": 1057620
    },
    {
      "epoch": 1.7308510568658642,
      "grad_norm": 0.6598668098449707,
      "learning_rate": 6.5813801783043306e-06,
      "loss": 0.0199,
      "step": 1057640
    },
    {
      "epoch": 1.7308837873045175,
      "grad_norm": 0.2714182138442993,
      "learning_rate": 6.581314286090813e-06,
      "loss": 0.0236,
      "step": 1057660
    },
    {
      "epoch": 1.7309165177431707,
      "grad_norm": 1.3966128826141357,
      "learning_rate": 6.581248393877296e-06,
      "loss": 0.0249,
      "step": 1057680
    },
    {
      "epoch": 1.7309492481818243,
      "grad_norm": 0.5141590237617493,
      "learning_rate": 6.581182501663779e-06,
      "loss": 0.0183,
      "step": 1057700
    },
    {
      "epoch": 1.7309819786204774,
      "grad_norm": 0.31490427255630493,
      "learning_rate": 6.581116609450262e-06,
      "loss": 0.0178,
      "step": 1057720
    },
    {
      "epoch": 1.7310147090591308,
      "grad_norm": 0.4156266450881958,
      "learning_rate": 6.581050717236744e-06,
      "loss": 0.0182,
      "step": 1057740
    },
    {
      "epoch": 1.7310474394977842,
      "grad_norm": 0.5633217692375183,
      "learning_rate": 6.580984825023228e-06,
      "loss": 0.0211,
      "step": 1057760
    },
    {
      "epoch": 1.7310801699364375,
      "grad_norm": 0.30911368131637573,
      "learning_rate": 6.58091893280971e-06,
      "loss": 0.03,
      "step": 1057780
    },
    {
      "epoch": 1.731112900375091,
      "grad_norm": 0.9894086122512817,
      "learning_rate": 6.580853040596193e-06,
      "loss": 0.0173,
      "step": 1057800
    },
    {
      "epoch": 1.731145630813744,
      "grad_norm": 0.7140418887138367,
      "learning_rate": 6.580787148382676e-06,
      "loss": 0.0157,
      "step": 1057820
    },
    {
      "epoch": 1.7311783612523977,
      "grad_norm": 0.9350173473358154,
      "learning_rate": 6.580721256169159e-06,
      "loss": 0.0264,
      "step": 1057840
    },
    {
      "epoch": 1.7312110916910508,
      "grad_norm": 0.690635621547699,
      "learning_rate": 6.5806553639556415e-06,
      "loss": 0.0169,
      "step": 1057860
    },
    {
      "epoch": 1.7312438221297042,
      "grad_norm": 0.45747458934783936,
      "learning_rate": 6.580589471742125e-06,
      "loss": 0.0195,
      "step": 1057880
    },
    {
      "epoch": 1.7312765525683576,
      "grad_norm": 0.2796230614185333,
      "learning_rate": 6.580523579528607e-06,
      "loss": 0.0152,
      "step": 1057900
    },
    {
      "epoch": 1.7313092830070107,
      "grad_norm": 0.3700020909309387,
      "learning_rate": 6.580457687315091e-06,
      "loss": 0.0195,
      "step": 1057920
    },
    {
      "epoch": 1.7313420134456643,
      "grad_norm": 0.21619975566864014,
      "learning_rate": 6.580391795101574e-06,
      "loss": 0.0182,
      "step": 1057940
    },
    {
      "epoch": 1.7313747438843174,
      "grad_norm": 0.5781368017196655,
      "learning_rate": 6.580325902888056e-06,
      "loss": 0.0146,
      "step": 1057960
    },
    {
      "epoch": 1.731407474322971,
      "grad_norm": 0.10349390655755997,
      "learning_rate": 6.58026001067454e-06,
      "loss": 0.0205,
      "step": 1057980
    },
    {
      "epoch": 1.7314402047616242,
      "grad_norm": 0.5488967895507812,
      "learning_rate": 6.5801941184610216e-06,
      "loss": 0.0171,
      "step": 1058000
    },
    {
      "epoch": 1.7314729352002776,
      "grad_norm": 0.665203869342804,
      "learning_rate": 6.580128226247505e-06,
      "loss": 0.021,
      "step": 1058020
    },
    {
      "epoch": 1.731505665638931,
      "grad_norm": 0.4458184242248535,
      "learning_rate": 6.580062334033988e-06,
      "loss": 0.0186,
      "step": 1058040
    },
    {
      "epoch": 1.731538396077584,
      "grad_norm": 0.27326667308807373,
      "learning_rate": 6.579996441820471e-06,
      "loss": 0.0254,
      "step": 1058060
    },
    {
      "epoch": 1.7315711265162377,
      "grad_norm": 0.3592599630355835,
      "learning_rate": 6.579930549606953e-06,
      "loss": 0.0115,
      "step": 1058080
    },
    {
      "epoch": 1.7316038569548908,
      "grad_norm": 0.3522641062736511,
      "learning_rate": 6.579864657393437e-06,
      "loss": 0.0202,
      "step": 1058100
    },
    {
      "epoch": 1.7316365873935442,
      "grad_norm": 0.5195868015289307,
      "learning_rate": 6.579798765179919e-06,
      "loss": 0.019,
      "step": 1058120
    },
    {
      "epoch": 1.7316693178321976,
      "grad_norm": 0.7679192423820496,
      "learning_rate": 6.5797328729664024e-06,
      "loss": 0.0159,
      "step": 1058140
    },
    {
      "epoch": 1.731702048270851,
      "grad_norm": 0.3535742461681366,
      "learning_rate": 6.579666980752884e-06,
      "loss": 0.0104,
      "step": 1058160
    },
    {
      "epoch": 1.7317347787095043,
      "grad_norm": 0.2531043291091919,
      "learning_rate": 6.579601088539368e-06,
      "loss": 0.0215,
      "step": 1058180
    },
    {
      "epoch": 1.7317675091481575,
      "grad_norm": 0.9588281512260437,
      "learning_rate": 6.579535196325851e-06,
      "loss": 0.0192,
      "step": 1058200
    },
    {
      "epoch": 1.731800239586811,
      "grad_norm": 0.5376030206680298,
      "learning_rate": 6.579469304112333e-06,
      "loss": 0.0209,
      "step": 1058220
    },
    {
      "epoch": 1.7318329700254642,
      "grad_norm": 0.695219874382019,
      "learning_rate": 6.579403411898816e-06,
      "loss": 0.0166,
      "step": 1058240
    },
    {
      "epoch": 1.7318657004641176,
      "grad_norm": 0.7419455051422119,
      "learning_rate": 6.5793375196853e-06,
      "loss": 0.0149,
      "step": 1058260
    },
    {
      "epoch": 1.731898430902771,
      "grad_norm": 0.5872769355773926,
      "learning_rate": 6.5792716274717825e-06,
      "loss": 0.032,
      "step": 1058280
    },
    {
      "epoch": 1.7319311613414243,
      "grad_norm": 1.1645197868347168,
      "learning_rate": 6.579205735258265e-06,
      "loss": 0.0194,
      "step": 1058300
    },
    {
      "epoch": 1.7319638917800777,
      "grad_norm": 0.5151464343070984,
      "learning_rate": 6.579139843044749e-06,
      "loss": 0.0221,
      "step": 1058320
    },
    {
      "epoch": 1.7319966222187309,
      "grad_norm": 1.1115726232528687,
      "learning_rate": 6.579073950831231e-06,
      "loss": 0.0227,
      "step": 1058340
    },
    {
      "epoch": 1.7320293526573844,
      "grad_norm": 1.0262738466262817,
      "learning_rate": 6.579008058617714e-06,
      "loss": 0.0206,
      "step": 1058360
    },
    {
      "epoch": 1.7320620830960376,
      "grad_norm": 0.3049931824207306,
      "learning_rate": 6.578942166404196e-06,
      "loss": 0.0171,
      "step": 1058380
    },
    {
      "epoch": 1.732094813534691,
      "grad_norm": 0.2280036062002182,
      "learning_rate": 6.57887627419068e-06,
      "loss": 0.0154,
      "step": 1058400
    },
    {
      "epoch": 1.7321275439733443,
      "grad_norm": 0.6764751672744751,
      "learning_rate": 6.578810381977162e-06,
      "loss": 0.0168,
      "step": 1058420
    },
    {
      "epoch": 1.7321602744119977,
      "grad_norm": 0.7235329747200012,
      "learning_rate": 6.578744489763645e-06,
      "loss": 0.0218,
      "step": 1058440
    },
    {
      "epoch": 1.732193004850651,
      "grad_norm": 0.925521969795227,
      "learning_rate": 6.578678597550128e-06,
      "loss": 0.0142,
      "step": 1058460
    },
    {
      "epoch": 1.7322257352893042,
      "grad_norm": 0.5334980487823486,
      "learning_rate": 6.578612705336611e-06,
      "loss": 0.0138,
      "step": 1058480
    },
    {
      "epoch": 1.7322584657279578,
      "grad_norm": 1.0955805778503418,
      "learning_rate": 6.5785468131230935e-06,
      "loss": 0.019,
      "step": 1058500
    },
    {
      "epoch": 1.732291196166611,
      "grad_norm": 0.36297065019607544,
      "learning_rate": 6.578480920909577e-06,
      "loss": 0.0186,
      "step": 1058520
    },
    {
      "epoch": 1.7323239266052644,
      "grad_norm": 0.287095308303833,
      "learning_rate": 6.578415028696059e-06,
      "loss": 0.0152,
      "step": 1058540
    },
    {
      "epoch": 1.7323566570439177,
      "grad_norm": 0.1931309998035431,
      "learning_rate": 6.5783491364825425e-06,
      "loss": 0.0146,
      "step": 1058560
    },
    {
      "epoch": 1.7323893874825709,
      "grad_norm": 0.1194995790719986,
      "learning_rate": 6.578283244269024e-06,
      "loss": 0.0151,
      "step": 1058580
    },
    {
      "epoch": 1.7324221179212245,
      "grad_norm": 0.22541727125644684,
      "learning_rate": 6.578217352055508e-06,
      "loss": 0.0125,
      "step": 1058600
    },
    {
      "epoch": 1.7324548483598776,
      "grad_norm": 0.3437187075614929,
      "learning_rate": 6.578151459841992e-06,
      "loss": 0.0158,
      "step": 1058620
    },
    {
      "epoch": 1.7324875787985312,
      "grad_norm": 0.3385007381439209,
      "learning_rate": 6.5780855676284735e-06,
      "loss": 0.0262,
      "step": 1058640
    },
    {
      "epoch": 1.7325203092371844,
      "grad_norm": 0.31506407260894775,
      "learning_rate": 6.578019675414957e-06,
      "loss": 0.0163,
      "step": 1058660
    },
    {
      "epoch": 1.7325530396758377,
      "grad_norm": 0.2935779392719269,
      "learning_rate": 6.57795378320144e-06,
      "loss": 0.0207,
      "step": 1058680
    },
    {
      "epoch": 1.732585770114491,
      "grad_norm": 1.8407646417617798,
      "learning_rate": 6.5778878909879225e-06,
      "loss": 0.0155,
      "step": 1058700
    },
    {
      "epoch": 1.7326185005531443,
      "grad_norm": 1.486202359199524,
      "learning_rate": 6.577821998774405e-06,
      "loss": 0.0219,
      "step": 1058720
    },
    {
      "epoch": 1.7326512309917979,
      "grad_norm": 1.2790096998214722,
      "learning_rate": 6.577756106560889e-06,
      "loss": 0.0243,
      "step": 1058740
    },
    {
      "epoch": 1.732683961430451,
      "grad_norm": 0.7843859791755676,
      "learning_rate": 6.577690214347371e-06,
      "loss": 0.0208,
      "step": 1058760
    },
    {
      "epoch": 1.7327166918691044,
      "grad_norm": 0.3585003614425659,
      "learning_rate": 6.577624322133854e-06,
      "loss": 0.0132,
      "step": 1058780
    },
    {
      "epoch": 1.7327494223077577,
      "grad_norm": 0.7138866186141968,
      "learning_rate": 6.577558429920336e-06,
      "loss": 0.0198,
      "step": 1058800
    },
    {
      "epoch": 1.7327821527464111,
      "grad_norm": 0.5778495669364929,
      "learning_rate": 6.57749253770682e-06,
      "loss": 0.0169,
      "step": 1058820
    },
    {
      "epoch": 1.7328148831850645,
      "grad_norm": 0.4307295083999634,
      "learning_rate": 6.5774266454933026e-06,
      "loss": 0.0251,
      "step": 1058840
    },
    {
      "epoch": 1.7328476136237176,
      "grad_norm": 0.4362447261810303,
      "learning_rate": 6.577360753279785e-06,
      "loss": 0.0115,
      "step": 1058860
    },
    {
      "epoch": 1.7328803440623712,
      "grad_norm": 0.2960793673992157,
      "learning_rate": 6.577294861066268e-06,
      "loss": 0.0141,
      "step": 1058880
    },
    {
      "epoch": 1.7329130745010244,
      "grad_norm": 0.17298460006713867,
      "learning_rate": 6.577228968852752e-06,
      "loss": 0.0165,
      "step": 1058900
    },
    {
      "epoch": 1.7329458049396778,
      "grad_norm": 0.311489075422287,
      "learning_rate": 6.5771630766392335e-06,
      "loss": 0.0133,
      "step": 1058920
    },
    {
      "epoch": 1.7329785353783311,
      "grad_norm": 0.8010135293006897,
      "learning_rate": 6.577097184425717e-06,
      "loss": 0.0174,
      "step": 1058940
    },
    {
      "epoch": 1.7330112658169845,
      "grad_norm": 0.20891830325126648,
      "learning_rate": 6.577031292212199e-06,
      "loss": 0.0151,
      "step": 1058960
    },
    {
      "epoch": 1.7330439962556379,
      "grad_norm": 0.2949085235595703,
      "learning_rate": 6.576965399998683e-06,
      "loss": 0.0205,
      "step": 1058980
    },
    {
      "epoch": 1.733076726694291,
      "grad_norm": 0.5835866332054138,
      "learning_rate": 6.576899507785166e-06,
      "loss": 0.0256,
      "step": 1059000
    },
    {
      "epoch": 1.7331094571329446,
      "grad_norm": 0.4097537100315094,
      "learning_rate": 6.576833615571648e-06,
      "loss": 0.0232,
      "step": 1059020
    },
    {
      "epoch": 1.7331421875715978,
      "grad_norm": 1.2147325277328491,
      "learning_rate": 6.576767723358132e-06,
      "loss": 0.0196,
      "step": 1059040
    },
    {
      "epoch": 1.7331749180102511,
      "grad_norm": 1.1005610227584839,
      "learning_rate": 6.576701831144614e-06,
      "loss": 0.0122,
      "step": 1059060
    },
    {
      "epoch": 1.7332076484489045,
      "grad_norm": 0.3482631742954254,
      "learning_rate": 6.576635938931097e-06,
      "loss": 0.0154,
      "step": 1059080
    },
    {
      "epoch": 1.7332403788875579,
      "grad_norm": 0.3232755661010742,
      "learning_rate": 6.57657004671758e-06,
      "loss": 0.0218,
      "step": 1059100
    },
    {
      "epoch": 1.7332731093262113,
      "grad_norm": 0.16284051537513733,
      "learning_rate": 6.5765041545040635e-06,
      "loss": 0.0151,
      "step": 1059120
    },
    {
      "epoch": 1.7333058397648644,
      "grad_norm": 0.16100279986858368,
      "learning_rate": 6.576438262290545e-06,
      "loss": 0.0173,
      "step": 1059140
    },
    {
      "epoch": 1.733338570203518,
      "grad_norm": 0.3995835483074188,
      "learning_rate": 6.576372370077029e-06,
      "loss": 0.0147,
      "step": 1059160
    },
    {
      "epoch": 1.7333713006421712,
      "grad_norm": 0.4869338274002075,
      "learning_rate": 6.576306477863511e-06,
      "loss": 0.025,
      "step": 1059180
    },
    {
      "epoch": 1.7334040310808245,
      "grad_norm": 0.9382444024085999,
      "learning_rate": 6.5762405856499944e-06,
      "loss": 0.0223,
      "step": 1059200
    },
    {
      "epoch": 1.733436761519478,
      "grad_norm": 1.3710131645202637,
      "learning_rate": 6.576174693436477e-06,
      "loss": 0.0241,
      "step": 1059220
    },
    {
      "epoch": 1.7334694919581313,
      "grad_norm": 1.068648099899292,
      "learning_rate": 6.57610880122296e-06,
      "loss": 0.0208,
      "step": 1059240
    },
    {
      "epoch": 1.7335022223967846,
      "grad_norm": 0.6314718723297119,
      "learning_rate": 6.576042909009443e-06,
      "loss": 0.0204,
      "step": 1059260
    },
    {
      "epoch": 1.7335349528354378,
      "grad_norm": 1.2514243125915527,
      "learning_rate": 6.575977016795926e-06,
      "loss": 0.0225,
      "step": 1059280
    },
    {
      "epoch": 1.7335676832740914,
      "grad_norm": 0.6866119503974915,
      "learning_rate": 6.575911124582408e-06,
      "loss": 0.0238,
      "step": 1059300
    },
    {
      "epoch": 1.7336004137127445,
      "grad_norm": 0.3817410469055176,
      "learning_rate": 6.575845232368892e-06,
      "loss": 0.0208,
      "step": 1059320
    },
    {
      "epoch": 1.733633144151398,
      "grad_norm": 0.34221625328063965,
      "learning_rate": 6.575779340155375e-06,
      "loss": 0.0182,
      "step": 1059340
    },
    {
      "epoch": 1.7336658745900513,
      "grad_norm": 0.5936359763145447,
      "learning_rate": 6.575713447941857e-06,
      "loss": 0.0174,
      "step": 1059360
    },
    {
      "epoch": 1.7336986050287044,
      "grad_norm": 0.2660723328590393,
      "learning_rate": 6.575647555728341e-06,
      "loss": 0.0191,
      "step": 1059380
    },
    {
      "epoch": 1.733731335467358,
      "grad_norm": 0.20417146384716034,
      "learning_rate": 6.575581663514823e-06,
      "loss": 0.0127,
      "step": 1059400
    },
    {
      "epoch": 1.7337640659060112,
      "grad_norm": 0.7381467223167419,
      "learning_rate": 6.575515771301306e-06,
      "loss": 0.0211,
      "step": 1059420
    },
    {
      "epoch": 1.7337967963446648,
      "grad_norm": 0.463685005903244,
      "learning_rate": 6.575449879087788e-06,
      "loss": 0.0172,
      "step": 1059440
    },
    {
      "epoch": 1.733829526783318,
      "grad_norm": 0.2846868634223938,
      "learning_rate": 6.575383986874272e-06,
      "loss": 0.0149,
      "step": 1059460
    },
    {
      "epoch": 1.7338622572219713,
      "grad_norm": 0.6936646699905396,
      "learning_rate": 6.5753180946607545e-06,
      "loss": 0.0244,
      "step": 1059480
    },
    {
      "epoch": 1.7338949876606247,
      "grad_norm": 0.5552919507026672,
      "learning_rate": 6.575252202447237e-06,
      "loss": 0.0164,
      "step": 1059500
    },
    {
      "epoch": 1.7339277180992778,
      "grad_norm": 0.28311675786972046,
      "learning_rate": 6.57518631023372e-06,
      "loss": 0.0179,
      "step": 1059520
    },
    {
      "epoch": 1.7339604485379314,
      "grad_norm": 1.1064627170562744,
      "learning_rate": 6.5751204180202035e-06,
      "loss": 0.0309,
      "step": 1059540
    },
    {
      "epoch": 1.7339931789765846,
      "grad_norm": 1.06679368019104,
      "learning_rate": 6.5750545258066854e-06,
      "loss": 0.027,
      "step": 1059560
    },
    {
      "epoch": 1.734025909415238,
      "grad_norm": 0.9253636002540588,
      "learning_rate": 6.574988633593169e-06,
      "loss": 0.0212,
      "step": 1059580
    },
    {
      "epoch": 1.7340586398538913,
      "grad_norm": 1.149680256843567,
      "learning_rate": 6.574922741379651e-06,
      "loss": 0.0179,
      "step": 1059600
    },
    {
      "epoch": 1.7340913702925447,
      "grad_norm": 0.34551650285720825,
      "learning_rate": 6.5748568491661345e-06,
      "loss": 0.0134,
      "step": 1059620
    },
    {
      "epoch": 1.734124100731198,
      "grad_norm": 0.5781611800193787,
      "learning_rate": 6.574790956952617e-06,
      "loss": 0.0153,
      "step": 1059640
    },
    {
      "epoch": 1.7341568311698512,
      "grad_norm": 0.14688320457935333,
      "learning_rate": 6.5747250647391e-06,
      "loss": 0.0217,
      "step": 1059660
    },
    {
      "epoch": 1.7341895616085048,
      "grad_norm": 0.7769759297370911,
      "learning_rate": 6.5746591725255836e-06,
      "loss": 0.0201,
      "step": 1059680
    },
    {
      "epoch": 1.734222292047158,
      "grad_norm": 0.16031639277935028,
      "learning_rate": 6.574593280312066e-06,
      "loss": 0.0189,
      "step": 1059700
    },
    {
      "epoch": 1.7342550224858113,
      "grad_norm": 0.4724816679954529,
      "learning_rate": 6.574527388098549e-06,
      "loss": 0.0228,
      "step": 1059720
    },
    {
      "epoch": 1.7342877529244647,
      "grad_norm": 0.1942467838525772,
      "learning_rate": 6.574461495885032e-06,
      "loss": 0.0255,
      "step": 1059740
    },
    {
      "epoch": 1.734320483363118,
      "grad_norm": 0.5268847346305847,
      "learning_rate": 6.574395603671515e-06,
      "loss": 0.0208,
      "step": 1059760
    },
    {
      "epoch": 1.7343532138017714,
      "grad_norm": 0.25388002395629883,
      "learning_rate": 6.574329711457997e-06,
      "loss": 0.0151,
      "step": 1059780
    },
    {
      "epoch": 1.7343859442404246,
      "grad_norm": 2.048689842224121,
      "learning_rate": 6.574263819244481e-06,
      "loss": 0.0186,
      "step": 1059800
    },
    {
      "epoch": 1.7344186746790782,
      "grad_norm": 1.0153584480285645,
      "learning_rate": 6.574197927030963e-06,
      "loss": 0.0169,
      "step": 1059820
    },
    {
      "epoch": 1.7344514051177313,
      "grad_norm": 0.5816932916641235,
      "learning_rate": 6.574132034817446e-06,
      "loss": 0.0151,
      "step": 1059840
    },
    {
      "epoch": 1.7344841355563847,
      "grad_norm": 1.1600569486618042,
      "learning_rate": 6.574066142603929e-06,
      "loss": 0.0163,
      "step": 1059860
    },
    {
      "epoch": 1.734516865995038,
      "grad_norm": 0.7309288382530212,
      "learning_rate": 6.574000250390412e-06,
      "loss": 0.0247,
      "step": 1059880
    },
    {
      "epoch": 1.7345495964336914,
      "grad_norm": 0.5336069464683533,
      "learning_rate": 6.5739343581768946e-06,
      "loss": 0.0197,
      "step": 1059900
    },
    {
      "epoch": 1.7345823268723448,
      "grad_norm": 0.6966460347175598,
      "learning_rate": 6.573868465963378e-06,
      "loss": 0.0245,
      "step": 1059920
    },
    {
      "epoch": 1.734615057310998,
      "grad_norm": 0.4629570543766022,
      "learning_rate": 6.57380257374986e-06,
      "loss": 0.0153,
      "step": 1059940
    },
    {
      "epoch": 1.7346477877496516,
      "grad_norm": 0.5600165724754333,
      "learning_rate": 6.573736681536344e-06,
      "loss": 0.013,
      "step": 1059960
    },
    {
      "epoch": 1.7346805181883047,
      "grad_norm": 0.5828729271888733,
      "learning_rate": 6.5736707893228255e-06,
      "loss": 0.0136,
      "step": 1059980
    },
    {
      "epoch": 1.734713248626958,
      "grad_norm": 0.338235080242157,
      "learning_rate": 6.573604897109309e-06,
      "loss": 0.0185,
      "step": 1060000
    },
    {
      "epoch": 1.7347459790656115,
      "grad_norm": 0.3199053406715393,
      "learning_rate": 6.573539004895793e-06,
      "loss": 0.0203,
      "step": 1060020
    },
    {
      "epoch": 1.7347787095042648,
      "grad_norm": 0.6493438482284546,
      "learning_rate": 6.5734731126822746e-06,
      "loss": 0.0167,
      "step": 1060040
    },
    {
      "epoch": 1.7348114399429182,
      "grad_norm": 0.31621280312538147,
      "learning_rate": 6.573407220468758e-06,
      "loss": 0.0227,
      "step": 1060060
    },
    {
      "epoch": 1.7348441703815713,
      "grad_norm": 0.4290853440761566,
      "learning_rate": 6.573341328255241e-06,
      "loss": 0.0186,
      "step": 1060080
    },
    {
      "epoch": 1.734876900820225,
      "grad_norm": 0.7014037370681763,
      "learning_rate": 6.573275436041724e-06,
      "loss": 0.0171,
      "step": 1060100
    },
    {
      "epoch": 1.734909631258878,
      "grad_norm": 0.5412858724594116,
      "learning_rate": 6.573209543828206e-06,
      "loss": 0.0256,
      "step": 1060120
    },
    {
      "epoch": 1.7349423616975315,
      "grad_norm": 0.43028515577316284,
      "learning_rate": 6.57314365161469e-06,
      "loss": 0.0172,
      "step": 1060140
    },
    {
      "epoch": 1.7349750921361848,
      "grad_norm": 0.6790508031845093,
      "learning_rate": 6.573077759401172e-06,
      "loss": 0.0122,
      "step": 1060160
    },
    {
      "epoch": 1.735007822574838,
      "grad_norm": 0.4244750142097473,
      "learning_rate": 6.5730118671876554e-06,
      "loss": 0.029,
      "step": 1060180
    },
    {
      "epoch": 1.7350405530134916,
      "grad_norm": 0.15888898074626923,
      "learning_rate": 6.572945974974137e-06,
      "loss": 0.0154,
      "step": 1060200
    },
    {
      "epoch": 1.7350732834521447,
      "grad_norm": 0.6452891230583191,
      "learning_rate": 6.572880082760621e-06,
      "loss": 0.0177,
      "step": 1060220
    },
    {
      "epoch": 1.7351060138907983,
      "grad_norm": 0.5832391381263733,
      "learning_rate": 6.572814190547103e-06,
      "loss": 0.0205,
      "step": 1060240
    },
    {
      "epoch": 1.7351387443294515,
      "grad_norm": 0.7431972026824951,
      "learning_rate": 6.572748298333586e-06,
      "loss": 0.0162,
      "step": 1060260
    },
    {
      "epoch": 1.7351714747681048,
      "grad_norm": 0.8482252955436707,
      "learning_rate": 6.572682406120069e-06,
      "loss": 0.0271,
      "step": 1060280
    },
    {
      "epoch": 1.7352042052067582,
      "grad_norm": 0.574870765209198,
      "learning_rate": 6.572616513906552e-06,
      "loss": 0.0208,
      "step": 1060300
    },
    {
      "epoch": 1.7352369356454114,
      "grad_norm": 0.42928895354270935,
      "learning_rate": 6.572550621693035e-06,
      "loss": 0.014,
      "step": 1060320
    },
    {
      "epoch": 1.735269666084065,
      "grad_norm": 0.2975117862224579,
      "learning_rate": 6.572484729479518e-06,
      "loss": 0.0167,
      "step": 1060340
    },
    {
      "epoch": 1.7353023965227181,
      "grad_norm": 0.45393502712249756,
      "learning_rate": 6.572418837266e-06,
      "loss": 0.0239,
      "step": 1060360
    },
    {
      "epoch": 1.7353351269613715,
      "grad_norm": 0.7251265645027161,
      "learning_rate": 6.572352945052484e-06,
      "loss": 0.0155,
      "step": 1060380
    },
    {
      "epoch": 1.7353678574000249,
      "grad_norm": 0.531782865524292,
      "learning_rate": 6.572287052838967e-06,
      "loss": 0.0125,
      "step": 1060400
    },
    {
      "epoch": 1.7354005878386782,
      "grad_norm": 0.2702981233596802,
      "learning_rate": 6.572221160625449e-06,
      "loss": 0.026,
      "step": 1060420
    },
    {
      "epoch": 1.7354333182773316,
      "grad_norm": 0.5497785210609436,
      "learning_rate": 6.572155268411933e-06,
      "loss": 0.0186,
      "step": 1060440
    },
    {
      "epoch": 1.7354660487159848,
      "grad_norm": 0.6746448874473572,
      "learning_rate": 6.572089376198415e-06,
      "loss": 0.0294,
      "step": 1060460
    },
    {
      "epoch": 1.7354987791546383,
      "grad_norm": 0.40534406900405884,
      "learning_rate": 6.572023483984898e-06,
      "loss": 0.0188,
      "step": 1060480
    },
    {
      "epoch": 1.7355315095932915,
      "grad_norm": 0.3408109247684479,
      "learning_rate": 6.571957591771381e-06,
      "loss": 0.0154,
      "step": 1060500
    },
    {
      "epoch": 1.7355642400319449,
      "grad_norm": 0.5815654993057251,
      "learning_rate": 6.571891699557864e-06,
      "loss": 0.0222,
      "step": 1060520
    },
    {
      "epoch": 1.7355969704705982,
      "grad_norm": 0.3327430784702301,
      "learning_rate": 6.5718258073443465e-06,
      "loss": 0.0135,
      "step": 1060540
    },
    {
      "epoch": 1.7356297009092516,
      "grad_norm": 0.43604734539985657,
      "learning_rate": 6.57175991513083e-06,
      "loss": 0.0105,
      "step": 1060560
    },
    {
      "epoch": 1.735662431347905,
      "grad_norm": 0.13852709531784058,
      "learning_rate": 6.571694022917312e-06,
      "loss": 0.0132,
      "step": 1060580
    },
    {
      "epoch": 1.7356951617865581,
      "grad_norm": 0.6098799705505371,
      "learning_rate": 6.5716281307037955e-06,
      "loss": 0.0287,
      "step": 1060600
    },
    {
      "epoch": 1.7357278922252117,
      "grad_norm": 1.429183840751648,
      "learning_rate": 6.571562238490277e-06,
      "loss": 0.0272,
      "step": 1060620
    },
    {
      "epoch": 1.7357606226638649,
      "grad_norm": 0.7025725245475769,
      "learning_rate": 6.571496346276761e-06,
      "loss": 0.0137,
      "step": 1060640
    },
    {
      "epoch": 1.7357933531025183,
      "grad_norm": 0.4434435963630676,
      "learning_rate": 6.571430454063244e-06,
      "loss": 0.0251,
      "step": 1060660
    },
    {
      "epoch": 1.7358260835411716,
      "grad_norm": 1.0675851106643677,
      "learning_rate": 6.5713645618497265e-06,
      "loss": 0.0288,
      "step": 1060680
    },
    {
      "epoch": 1.735858813979825,
      "grad_norm": 1.3376479148864746,
      "learning_rate": 6.571298669636209e-06,
      "loss": 0.0209,
      "step": 1060700
    },
    {
      "epoch": 1.7358915444184784,
      "grad_norm": 0.703011155128479,
      "learning_rate": 6.571232777422693e-06,
      "loss": 0.0169,
      "step": 1060720
    },
    {
      "epoch": 1.7359242748571315,
      "grad_norm": 0.5022146105766296,
      "learning_rate": 6.5711668852091755e-06,
      "loss": 0.0186,
      "step": 1060740
    },
    {
      "epoch": 1.7359570052957851,
      "grad_norm": 0.7936590313911438,
      "learning_rate": 6.571100992995658e-06,
      "loss": 0.0173,
      "step": 1060760
    },
    {
      "epoch": 1.7359897357344383,
      "grad_norm": 1.7492938041687012,
      "learning_rate": 6.571035100782142e-06,
      "loss": 0.0147,
      "step": 1060780
    },
    {
      "epoch": 1.7360224661730916,
      "grad_norm": 0.6321842074394226,
      "learning_rate": 6.570969208568624e-06,
      "loss": 0.0173,
      "step": 1060800
    },
    {
      "epoch": 1.736055196611745,
      "grad_norm": 0.25020864605903625,
      "learning_rate": 6.570903316355107e-06,
      "loss": 0.0189,
      "step": 1060820
    },
    {
      "epoch": 1.7360879270503984,
      "grad_norm": 0.17582479119300842,
      "learning_rate": 6.570837424141589e-06,
      "loss": 0.0163,
      "step": 1060840
    },
    {
      "epoch": 1.7361206574890518,
      "grad_norm": 0.0901661217212677,
      "learning_rate": 6.570771531928073e-06,
      "loss": 0.0259,
      "step": 1060860
    },
    {
      "epoch": 1.736153387927705,
      "grad_norm": 0.4429562985897064,
      "learning_rate": 6.5707056397145556e-06,
      "loss": 0.0177,
      "step": 1060880
    },
    {
      "epoch": 1.7361861183663585,
      "grad_norm": 0.44877442717552185,
      "learning_rate": 6.570639747501038e-06,
      "loss": 0.0195,
      "step": 1060900
    },
    {
      "epoch": 1.7362188488050116,
      "grad_norm": 0.49306222796440125,
      "learning_rate": 6.570573855287521e-06,
      "loss": 0.0176,
      "step": 1060920
    },
    {
      "epoch": 1.736251579243665,
      "grad_norm": 0.45050644874572754,
      "learning_rate": 6.570507963074005e-06,
      "loss": 0.0216,
      "step": 1060940
    },
    {
      "epoch": 1.7362843096823184,
      "grad_norm": 0.8338256478309631,
      "learning_rate": 6.5704420708604865e-06,
      "loss": 0.0208,
      "step": 1060960
    },
    {
      "epoch": 1.7363170401209715,
      "grad_norm": 0.77406907081604,
      "learning_rate": 6.57037617864697e-06,
      "loss": 0.0245,
      "step": 1060980
    },
    {
      "epoch": 1.7363497705596251,
      "grad_norm": 0.22069504857063293,
      "learning_rate": 6.570310286433452e-06,
      "loss": 0.0114,
      "step": 1061000
    },
    {
      "epoch": 1.7363825009982783,
      "grad_norm": 0.4066219925880432,
      "learning_rate": 6.570244394219936e-06,
      "loss": 0.0257,
      "step": 1061020
    },
    {
      "epoch": 1.7364152314369319,
      "grad_norm": 0.40918707847595215,
      "learning_rate": 6.570178502006418e-06,
      "loss": 0.0208,
      "step": 1061040
    },
    {
      "epoch": 1.736447961875585,
      "grad_norm": 1.9687397480010986,
      "learning_rate": 6.570112609792901e-06,
      "loss": 0.0161,
      "step": 1061060
    },
    {
      "epoch": 1.7364806923142384,
      "grad_norm": 0.8754593729972839,
      "learning_rate": 6.570046717579385e-06,
      "loss": 0.0162,
      "step": 1061080
    },
    {
      "epoch": 1.7365134227528918,
      "grad_norm": 0.0829598605632782,
      "learning_rate": 6.569980825365867e-06,
      "loss": 0.0209,
      "step": 1061100
    },
    {
      "epoch": 1.736546153191545,
      "grad_norm": 0.13394980132579803,
      "learning_rate": 6.56991493315235e-06,
      "loss": 0.0166,
      "step": 1061120
    },
    {
      "epoch": 1.7365788836301985,
      "grad_norm": 0.8521124124526978,
      "learning_rate": 6.569849040938833e-06,
      "loss": 0.02,
      "step": 1061140
    },
    {
      "epoch": 1.7366116140688517,
      "grad_norm": 0.2907879948616028,
      "learning_rate": 6.5697831487253165e-06,
      "loss": 0.0206,
      "step": 1061160
    },
    {
      "epoch": 1.736644344507505,
      "grad_norm": 0.9699831604957581,
      "learning_rate": 6.569717256511798e-06,
      "loss": 0.0182,
      "step": 1061180
    },
    {
      "epoch": 1.7366770749461584,
      "grad_norm": 0.15567509829998016,
      "learning_rate": 6.569651364298282e-06,
      "loss": 0.0187,
      "step": 1061200
    },
    {
      "epoch": 1.7367098053848118,
      "grad_norm": 0.19965103268623352,
      "learning_rate": 6.569585472084764e-06,
      "loss": 0.0186,
      "step": 1061220
    },
    {
      "epoch": 1.7367425358234652,
      "grad_norm": 0.5376834869384766,
      "learning_rate": 6.5695195798712474e-06,
      "loss": 0.0145,
      "step": 1061240
    },
    {
      "epoch": 1.7367752662621183,
      "grad_norm": 0.6117430329322815,
      "learning_rate": 6.569453687657729e-06,
      "loss": 0.0268,
      "step": 1061260
    },
    {
      "epoch": 1.736807996700772,
      "grad_norm": 0.4528953731060028,
      "learning_rate": 6.569387795444213e-06,
      "loss": 0.0233,
      "step": 1061280
    },
    {
      "epoch": 1.736840727139425,
      "grad_norm": 0.26029863953590393,
      "learning_rate": 6.569321903230696e-06,
      "loss": 0.0202,
      "step": 1061300
    },
    {
      "epoch": 1.7368734575780784,
      "grad_norm": 0.5035609602928162,
      "learning_rate": 6.569256011017178e-06,
      "loss": 0.0237,
      "step": 1061320
    },
    {
      "epoch": 1.7369061880167318,
      "grad_norm": 0.7461770176887512,
      "learning_rate": 6.569190118803661e-06,
      "loss": 0.0157,
      "step": 1061340
    },
    {
      "epoch": 1.7369389184553852,
      "grad_norm": 0.24033083021640778,
      "learning_rate": 6.569124226590145e-06,
      "loss": 0.0104,
      "step": 1061360
    },
    {
      "epoch": 1.7369716488940385,
      "grad_norm": 0.26543405652046204,
      "learning_rate": 6.569058334376627e-06,
      "loss": 0.0194,
      "step": 1061380
    },
    {
      "epoch": 1.7370043793326917,
      "grad_norm": 0.6525557041168213,
      "learning_rate": 6.56899244216311e-06,
      "loss": 0.0205,
      "step": 1061400
    },
    {
      "epoch": 1.7370371097713453,
      "grad_norm": 0.2087608277797699,
      "learning_rate": 6.568926549949592e-06,
      "loss": 0.0252,
      "step": 1061420
    },
    {
      "epoch": 1.7370698402099984,
      "grad_norm": 0.9960150718688965,
      "learning_rate": 6.568860657736076e-06,
      "loss": 0.0137,
      "step": 1061440
    },
    {
      "epoch": 1.7371025706486518,
      "grad_norm": 0.20882122218608856,
      "learning_rate": 6.568794765522559e-06,
      "loss": 0.0166,
      "step": 1061460
    },
    {
      "epoch": 1.7371353010873052,
      "grad_norm": 0.9753770232200623,
      "learning_rate": 6.568728873309041e-06,
      "loss": 0.0267,
      "step": 1061480
    },
    {
      "epoch": 1.7371680315259586,
      "grad_norm": 0.18796680867671967,
      "learning_rate": 6.568662981095525e-06,
      "loss": 0.014,
      "step": 1061500
    },
    {
      "epoch": 1.737200761964612,
      "grad_norm": 0.35316142439842224,
      "learning_rate": 6.5685970888820075e-06,
      "loss": 0.0209,
      "step": 1061520
    },
    {
      "epoch": 1.737233492403265,
      "grad_norm": 0.9218857884407043,
      "learning_rate": 6.56853119666849e-06,
      "loss": 0.0183,
      "step": 1061540
    },
    {
      "epoch": 1.7372662228419187,
      "grad_norm": 0.7398395538330078,
      "learning_rate": 6.568465304454973e-06,
      "loss": 0.0156,
      "step": 1061560
    },
    {
      "epoch": 1.7372989532805718,
      "grad_norm": 0.15686003863811493,
      "learning_rate": 6.5683994122414565e-06,
      "loss": 0.0219,
      "step": 1061580
    },
    {
      "epoch": 1.7373316837192252,
      "grad_norm": 0.28774309158325195,
      "learning_rate": 6.5683335200279384e-06,
      "loss": 0.0174,
      "step": 1061600
    },
    {
      "epoch": 1.7373644141578786,
      "grad_norm": 0.4655173420906067,
      "learning_rate": 6.568267627814422e-06,
      "loss": 0.0115,
      "step": 1061620
    },
    {
      "epoch": 1.7373971445965317,
      "grad_norm": 0.5742805004119873,
      "learning_rate": 6.568201735600904e-06,
      "loss": 0.0179,
      "step": 1061640
    },
    {
      "epoch": 1.7374298750351853,
      "grad_norm": 0.6036278009414673,
      "learning_rate": 6.5681358433873875e-06,
      "loss": 0.0206,
      "step": 1061660
    },
    {
      "epoch": 1.7374626054738385,
      "grad_norm": 0.14373447000980377,
      "learning_rate": 6.56806995117387e-06,
      "loss": 0.0266,
      "step": 1061680
    },
    {
      "epoch": 1.737495335912492,
      "grad_norm": 0.29566770792007446,
      "learning_rate": 6.568004058960353e-06,
      "loss": 0.0249,
      "step": 1061700
    },
    {
      "epoch": 1.7375280663511452,
      "grad_norm": 0.099087193608284,
      "learning_rate": 6.567938166746836e-06,
      "loss": 0.0244,
      "step": 1061720
    },
    {
      "epoch": 1.7375607967897986,
      "grad_norm": 0.4460506737232208,
      "learning_rate": 6.567872274533319e-06,
      "loss": 0.0172,
      "step": 1061740
    },
    {
      "epoch": 1.737593527228452,
      "grad_norm": 1.1703579425811768,
      "learning_rate": 6.567806382319801e-06,
      "loss": 0.0162,
      "step": 1061760
    },
    {
      "epoch": 1.737626257667105,
      "grad_norm": 0.17959609627723694,
      "learning_rate": 6.567740490106285e-06,
      "loss": 0.0222,
      "step": 1061780
    },
    {
      "epoch": 1.7376589881057587,
      "grad_norm": 0.16215220093727112,
      "learning_rate": 6.567674597892768e-06,
      "loss": 0.0172,
      "step": 1061800
    },
    {
      "epoch": 1.7376917185444118,
      "grad_norm": 0.6409273743629456,
      "learning_rate": 6.56760870567925e-06,
      "loss": 0.0183,
      "step": 1061820
    },
    {
      "epoch": 1.7377244489830652,
      "grad_norm": 0.28454047441482544,
      "learning_rate": 6.567542813465734e-06,
      "loss": 0.0179,
      "step": 1061840
    },
    {
      "epoch": 1.7377571794217186,
      "grad_norm": 0.5559461712837219,
      "learning_rate": 6.567476921252216e-06,
      "loss": 0.0224,
      "step": 1061860
    },
    {
      "epoch": 1.737789909860372,
      "grad_norm": 0.3961859941482544,
      "learning_rate": 6.567411029038699e-06,
      "loss": 0.0219,
      "step": 1061880
    },
    {
      "epoch": 1.7378226402990253,
      "grad_norm": 0.341336190700531,
      "learning_rate": 6.567345136825182e-06,
      "loss": 0.011,
      "step": 1061900
    },
    {
      "epoch": 1.7378553707376785,
      "grad_norm": 0.539768397808075,
      "learning_rate": 6.567279244611665e-06,
      "loss": 0.0229,
      "step": 1061920
    },
    {
      "epoch": 1.737888101176332,
      "grad_norm": 2.574937343597412,
      "learning_rate": 6.5672133523981476e-06,
      "loss": 0.0167,
      "step": 1061940
    },
    {
      "epoch": 1.7379208316149852,
      "grad_norm": 0.24339981377124786,
      "learning_rate": 6.567147460184631e-06,
      "loss": 0.0222,
      "step": 1061960
    },
    {
      "epoch": 1.7379535620536386,
      "grad_norm": 1.5207526683807373,
      "learning_rate": 6.567081567971113e-06,
      "loss": 0.0195,
      "step": 1061980
    },
    {
      "epoch": 1.737986292492292,
      "grad_norm": 0.33637240529060364,
      "learning_rate": 6.567015675757597e-06,
      "loss": 0.0216,
      "step": 1062000
    },
    {
      "epoch": 1.7380190229309453,
      "grad_norm": 0.15860092639923096,
      "learning_rate": 6.5669497835440785e-06,
      "loss": 0.0261,
      "step": 1062020
    },
    {
      "epoch": 1.7380517533695987,
      "grad_norm": 0.32814207673072815,
      "learning_rate": 6.566883891330562e-06,
      "loss": 0.0175,
      "step": 1062040
    },
    {
      "epoch": 1.7380844838082519,
      "grad_norm": 0.21565575897693634,
      "learning_rate": 6.566817999117045e-06,
      "loss": 0.0192,
      "step": 1062060
    },
    {
      "epoch": 1.7381172142469055,
      "grad_norm": 0.21444597840309143,
      "learning_rate": 6.566752106903528e-06,
      "loss": 0.0156,
      "step": 1062080
    },
    {
      "epoch": 1.7381499446855586,
      "grad_norm": 0.31762462854385376,
      "learning_rate": 6.56668621469001e-06,
      "loss": 0.0263,
      "step": 1062100
    },
    {
      "epoch": 1.738182675124212,
      "grad_norm": 1.1774797439575195,
      "learning_rate": 6.566620322476494e-06,
      "loss": 0.018,
      "step": 1062120
    },
    {
      "epoch": 1.7382154055628654,
      "grad_norm": 0.1779058873653412,
      "learning_rate": 6.566554430262977e-06,
      "loss": 0.0174,
      "step": 1062140
    },
    {
      "epoch": 1.7382481360015187,
      "grad_norm": 0.21831925213336945,
      "learning_rate": 6.566488538049459e-06,
      "loss": 0.0147,
      "step": 1062160
    },
    {
      "epoch": 1.738280866440172,
      "grad_norm": 0.3783583343029022,
      "learning_rate": 6.566422645835943e-06,
      "loss": 0.0197,
      "step": 1062180
    },
    {
      "epoch": 1.7383135968788253,
      "grad_norm": 0.3327118158340454,
      "learning_rate": 6.566356753622425e-06,
      "loss": 0.0139,
      "step": 1062200
    },
    {
      "epoch": 1.7383463273174788,
      "grad_norm": 0.7611237168312073,
      "learning_rate": 6.5662908614089085e-06,
      "loss": 0.0156,
      "step": 1062220
    },
    {
      "epoch": 1.738379057756132,
      "grad_norm": 0.9278755187988281,
      "learning_rate": 6.56622496919539e-06,
      "loss": 0.013,
      "step": 1062240
    },
    {
      "epoch": 1.7384117881947854,
      "grad_norm": 0.12090055644512177,
      "learning_rate": 6.566159076981874e-06,
      "loss": 0.0197,
      "step": 1062260
    },
    {
      "epoch": 1.7384445186334387,
      "grad_norm": 0.5005308985710144,
      "learning_rate": 6.566093184768356e-06,
      "loss": 0.0155,
      "step": 1062280
    },
    {
      "epoch": 1.7384772490720921,
      "grad_norm": 2.297003746032715,
      "learning_rate": 6.566027292554839e-06,
      "loss": 0.0299,
      "step": 1062300
    },
    {
      "epoch": 1.7385099795107455,
      "grad_norm": 0.3227369785308838,
      "learning_rate": 6.565961400341322e-06,
      "loss": 0.0132,
      "step": 1062320
    },
    {
      "epoch": 1.7385427099493986,
      "grad_norm": 0.851070761680603,
      "learning_rate": 6.565895508127805e-06,
      "loss": 0.0181,
      "step": 1062340
    },
    {
      "epoch": 1.7385754403880522,
      "grad_norm": 0.09848833084106445,
      "learning_rate": 6.565829615914288e-06,
      "loss": 0.0196,
      "step": 1062360
    },
    {
      "epoch": 1.7386081708267054,
      "grad_norm": 0.2339414358139038,
      "learning_rate": 6.565763723700771e-06,
      "loss": 0.0191,
      "step": 1062380
    },
    {
      "epoch": 1.7386409012653588,
      "grad_norm": 1.1904104948043823,
      "learning_rate": 6.565697831487253e-06,
      "loss": 0.0246,
      "step": 1062400
    },
    {
      "epoch": 1.7386736317040121,
      "grad_norm": 0.7732158303260803,
      "learning_rate": 6.565631939273737e-06,
      "loss": 0.0279,
      "step": 1062420
    },
    {
      "epoch": 1.7387063621426653,
      "grad_norm": 0.6481981873512268,
      "learning_rate": 6.565566047060219e-06,
      "loss": 0.0226,
      "step": 1062440
    },
    {
      "epoch": 1.7387390925813189,
      "grad_norm": 1.3246657848358154,
      "learning_rate": 6.565500154846702e-06,
      "loss": 0.0153,
      "step": 1062460
    },
    {
      "epoch": 1.738771823019972,
      "grad_norm": 0.7284947037696838,
      "learning_rate": 6.565434262633185e-06,
      "loss": 0.0233,
      "step": 1062480
    },
    {
      "epoch": 1.7388045534586256,
      "grad_norm": 1.4259065389633179,
      "learning_rate": 6.565368370419668e-06,
      "loss": 0.0195,
      "step": 1062500
    },
    {
      "epoch": 1.7388372838972788,
      "grad_norm": 0.7671464681625366,
      "learning_rate": 6.565302478206151e-06,
      "loss": 0.0275,
      "step": 1062520
    },
    {
      "epoch": 1.7388700143359321,
      "grad_norm": 0.465928316116333,
      "learning_rate": 6.565236585992634e-06,
      "loss": 0.016,
      "step": 1062540
    },
    {
      "epoch": 1.7389027447745855,
      "grad_norm": 0.5734797716140747,
      "learning_rate": 6.565170693779117e-06,
      "loss": 0.0214,
      "step": 1062560
    },
    {
      "epoch": 1.7389354752132387,
      "grad_norm": 0.9271844625473022,
      "learning_rate": 6.5651048015655995e-06,
      "loss": 0.0262,
      "step": 1062580
    },
    {
      "epoch": 1.7389682056518923,
      "grad_norm": 1.0483680963516235,
      "learning_rate": 6.565038909352083e-06,
      "loss": 0.0204,
      "step": 1062600
    },
    {
      "epoch": 1.7390009360905454,
      "grad_norm": 0.7169899940490723,
      "learning_rate": 6.564973017138565e-06,
      "loss": 0.0271,
      "step": 1062620
    },
    {
      "epoch": 1.7390336665291988,
      "grad_norm": 0.6512178778648376,
      "learning_rate": 6.5649071249250485e-06,
      "loss": 0.0209,
      "step": 1062640
    },
    {
      "epoch": 1.7390663969678521,
      "grad_norm": 0.572114109992981,
      "learning_rate": 6.56484123271153e-06,
      "loss": 0.0184,
      "step": 1062660
    },
    {
      "epoch": 1.7390991274065055,
      "grad_norm": 0.25955405831336975,
      "learning_rate": 6.564775340498014e-06,
      "loss": 0.0203,
      "step": 1062680
    },
    {
      "epoch": 1.739131857845159,
      "grad_norm": 0.19643518328666687,
      "learning_rate": 6.564709448284497e-06,
      "loss": 0.0143,
      "step": 1062700
    },
    {
      "epoch": 1.739164588283812,
      "grad_norm": 0.7725955843925476,
      "learning_rate": 6.5646435560709795e-06,
      "loss": 0.0237,
      "step": 1062720
    },
    {
      "epoch": 1.7391973187224656,
      "grad_norm": 0.16104017198085785,
      "learning_rate": 6.564577663857462e-06,
      "loss": 0.0185,
      "step": 1062740
    },
    {
      "epoch": 1.7392300491611188,
      "grad_norm": 2.406608819961548,
      "learning_rate": 6.564511771643946e-06,
      "loss": 0.0251,
      "step": 1062760
    },
    {
      "epoch": 1.7392627795997722,
      "grad_norm": 0.8785505890846252,
      "learning_rate": 6.564445879430428e-06,
      "loss": 0.0196,
      "step": 1062780
    },
    {
      "epoch": 1.7392955100384255,
      "grad_norm": 0.4723871350288391,
      "learning_rate": 6.564379987216911e-06,
      "loss": 0.0176,
      "step": 1062800
    },
    {
      "epoch": 1.739328240477079,
      "grad_norm": 0.4764096736907959,
      "learning_rate": 6.564314095003393e-06,
      "loss": 0.0159,
      "step": 1062820
    },
    {
      "epoch": 1.7393609709157323,
      "grad_norm": 0.5023373365402222,
      "learning_rate": 6.564248202789877e-06,
      "loss": 0.0244,
      "step": 1062840
    },
    {
      "epoch": 1.7393937013543854,
      "grad_norm": 0.1759219765663147,
      "learning_rate": 6.56418231057636e-06,
      "loss": 0.0113,
      "step": 1062860
    },
    {
      "epoch": 1.739426431793039,
      "grad_norm": 0.4042278230190277,
      "learning_rate": 6.564116418362842e-06,
      "loss": 0.0211,
      "step": 1062880
    },
    {
      "epoch": 1.7394591622316922,
      "grad_norm": 0.24544428288936615,
      "learning_rate": 6.564050526149326e-06,
      "loss": 0.0131,
      "step": 1062900
    },
    {
      "epoch": 1.7394918926703455,
      "grad_norm": 0.3376721143722534,
      "learning_rate": 6.563984633935809e-06,
      "loss": 0.0262,
      "step": 1062920
    },
    {
      "epoch": 1.739524623108999,
      "grad_norm": 0.41175347566604614,
      "learning_rate": 6.563918741722291e-06,
      "loss": 0.03,
      "step": 1062940
    },
    {
      "epoch": 1.7395573535476523,
      "grad_norm": 0.28388333320617676,
      "learning_rate": 6.563852849508774e-06,
      "loss": 0.0135,
      "step": 1062960
    },
    {
      "epoch": 1.7395900839863057,
      "grad_norm": 0.9503499269485474,
      "learning_rate": 6.563786957295258e-06,
      "loss": 0.022,
      "step": 1062980
    },
    {
      "epoch": 1.7396228144249588,
      "grad_norm": 0.7185933589935303,
      "learning_rate": 6.5637210650817395e-06,
      "loss": 0.0213,
      "step": 1063000
    },
    {
      "epoch": 1.7396555448636124,
      "grad_norm": 1.0033942461013794,
      "learning_rate": 6.563655172868223e-06,
      "loss": 0.0182,
      "step": 1063020
    },
    {
      "epoch": 1.7396882753022656,
      "grad_norm": 0.7633822560310364,
      "learning_rate": 6.563589280654705e-06,
      "loss": 0.0227,
      "step": 1063040
    },
    {
      "epoch": 1.739721005740919,
      "grad_norm": 0.8728287816047668,
      "learning_rate": 6.563523388441189e-06,
      "loss": 0.017,
      "step": 1063060
    },
    {
      "epoch": 1.7397537361795723,
      "grad_norm": 0.07428840547800064,
      "learning_rate": 6.5634574962276705e-06,
      "loss": 0.0208,
      "step": 1063080
    },
    {
      "epoch": 1.7397864666182257,
      "grad_norm": 0.7222476601600647,
      "learning_rate": 6.563391604014154e-06,
      "loss": 0.0188,
      "step": 1063100
    },
    {
      "epoch": 1.739819197056879,
      "grad_norm": 1.8686083555221558,
      "learning_rate": 6.563325711800637e-06,
      "loss": 0.022,
      "step": 1063120
    },
    {
      "epoch": 1.7398519274955322,
      "grad_norm": 0.7761391997337341,
      "learning_rate": 6.56325981958712e-06,
      "loss": 0.0224,
      "step": 1063140
    },
    {
      "epoch": 1.7398846579341858,
      "grad_norm": 0.6485143899917603,
      "learning_rate": 6.563193927373602e-06,
      "loss": 0.0161,
      "step": 1063160
    },
    {
      "epoch": 1.739917388372839,
      "grad_norm": 0.6357682943344116,
      "learning_rate": 6.563128035160086e-06,
      "loss": 0.0237,
      "step": 1063180
    },
    {
      "epoch": 1.7399501188114923,
      "grad_norm": 0.6244919896125793,
      "learning_rate": 6.5630621429465695e-06,
      "loss": 0.0218,
      "step": 1063200
    },
    {
      "epoch": 1.7399828492501457,
      "grad_norm": 0.2677513659000397,
      "learning_rate": 6.562996250733051e-06,
      "loss": 0.0152,
      "step": 1063220
    },
    {
      "epoch": 1.7400155796887988,
      "grad_norm": 0.08700845390558243,
      "learning_rate": 6.562930358519535e-06,
      "loss": 0.0192,
      "step": 1063240
    },
    {
      "epoch": 1.7400483101274524,
      "grad_norm": 0.4389673173427582,
      "learning_rate": 6.562864466306017e-06,
      "loss": 0.022,
      "step": 1063260
    },
    {
      "epoch": 1.7400810405661056,
      "grad_norm": 0.26349911093711853,
      "learning_rate": 6.5627985740925004e-06,
      "loss": 0.0198,
      "step": 1063280
    },
    {
      "epoch": 1.7401137710047592,
      "grad_norm": 0.5869469046592712,
      "learning_rate": 6.562732681878982e-06,
      "loss": 0.0239,
      "step": 1063300
    },
    {
      "epoch": 1.7401465014434123,
      "grad_norm": 0.3480781614780426,
      "learning_rate": 6.562666789665466e-06,
      "loss": 0.0289,
      "step": 1063320
    },
    {
      "epoch": 1.7401792318820657,
      "grad_norm": 0.3988320827484131,
      "learning_rate": 6.562600897451949e-06,
      "loss": 0.0109,
      "step": 1063340
    },
    {
      "epoch": 1.740211962320719,
      "grad_norm": 0.27177175879478455,
      "learning_rate": 6.562535005238431e-06,
      "loss": 0.0138,
      "step": 1063360
    },
    {
      "epoch": 1.7402446927593722,
      "grad_norm": 0.48910558223724365,
      "learning_rate": 6.562469113024914e-06,
      "loss": 0.0121,
      "step": 1063380
    },
    {
      "epoch": 1.7402774231980258,
      "grad_norm": 0.08264157921075821,
      "learning_rate": 6.562403220811398e-06,
      "loss": 0.0206,
      "step": 1063400
    },
    {
      "epoch": 1.740310153636679,
      "grad_norm": 0.8542659282684326,
      "learning_rate": 6.56233732859788e-06,
      "loss": 0.01,
      "step": 1063420
    },
    {
      "epoch": 1.7403428840753323,
      "grad_norm": 0.3078674376010895,
      "learning_rate": 6.562271436384363e-06,
      "loss": 0.0191,
      "step": 1063440
    },
    {
      "epoch": 1.7403756145139857,
      "grad_norm": 0.8080117106437683,
      "learning_rate": 6.562205544170845e-06,
      "loss": 0.021,
      "step": 1063460
    },
    {
      "epoch": 1.740408344952639,
      "grad_norm": 0.7363014817237854,
      "learning_rate": 6.562139651957329e-06,
      "loss": 0.0242,
      "step": 1063480
    },
    {
      "epoch": 1.7404410753912924,
      "grad_norm": 0.12304165214300156,
      "learning_rate": 6.562073759743811e-06,
      "loss": 0.0179,
      "step": 1063500
    },
    {
      "epoch": 1.7404738058299456,
      "grad_norm": 0.3145466148853302,
      "learning_rate": 6.562007867530294e-06,
      "loss": 0.0154,
      "step": 1063520
    },
    {
      "epoch": 1.7405065362685992,
      "grad_norm": 0.24381330609321594,
      "learning_rate": 6.561941975316778e-06,
      "loss": 0.0112,
      "step": 1063540
    },
    {
      "epoch": 1.7405392667072523,
      "grad_norm": 0.17263393104076385,
      "learning_rate": 6.5618760831032605e-06,
      "loss": 0.0169,
      "step": 1063560
    },
    {
      "epoch": 1.7405719971459057,
      "grad_norm": 0.4952281713485718,
      "learning_rate": 6.561810190889743e-06,
      "loss": 0.0228,
      "step": 1063580
    },
    {
      "epoch": 1.740604727584559,
      "grad_norm": 0.52850741147995,
      "learning_rate": 6.561744298676226e-06,
      "loss": 0.0208,
      "step": 1063600
    },
    {
      "epoch": 1.7406374580232125,
      "grad_norm": 1.3116209506988525,
      "learning_rate": 6.5616784064627096e-06,
      "loss": 0.0163,
      "step": 1063620
    },
    {
      "epoch": 1.7406701884618658,
      "grad_norm": 1.1396727561950684,
      "learning_rate": 6.5616125142491914e-06,
      "loss": 0.0238,
      "step": 1063640
    },
    {
      "epoch": 1.740702918900519,
      "grad_norm": 0.42474499344825745,
      "learning_rate": 6.561546622035675e-06,
      "loss": 0.0216,
      "step": 1063660
    },
    {
      "epoch": 1.7407356493391726,
      "grad_norm": 0.17327532172203064,
      "learning_rate": 6.561480729822157e-06,
      "loss": 0.0203,
      "step": 1063680
    },
    {
      "epoch": 1.7407683797778257,
      "grad_norm": 0.6401236057281494,
      "learning_rate": 6.5614148376086405e-06,
      "loss": 0.0164,
      "step": 1063700
    },
    {
      "epoch": 1.740801110216479,
      "grad_norm": 0.6619565486907959,
      "learning_rate": 6.561348945395123e-06,
      "loss": 0.0178,
      "step": 1063720
    },
    {
      "epoch": 1.7408338406551325,
      "grad_norm": 0.355584979057312,
      "learning_rate": 6.561283053181606e-06,
      "loss": 0.0156,
      "step": 1063740
    },
    {
      "epoch": 1.7408665710937858,
      "grad_norm": 0.6758102178573608,
      "learning_rate": 6.561217160968089e-06,
      "loss": 0.0157,
      "step": 1063760
    },
    {
      "epoch": 1.7408993015324392,
      "grad_norm": 0.9219377636909485,
      "learning_rate": 6.561151268754572e-06,
      "loss": 0.0155,
      "step": 1063780
    },
    {
      "epoch": 1.7409320319710924,
      "grad_norm": 0.25666746497154236,
      "learning_rate": 6.561085376541054e-06,
      "loss": 0.0137,
      "step": 1063800
    },
    {
      "epoch": 1.740964762409746,
      "grad_norm": 0.3589000105857849,
      "learning_rate": 6.561019484327538e-06,
      "loss": 0.018,
      "step": 1063820
    },
    {
      "epoch": 1.740997492848399,
      "grad_norm": 0.2214362770318985,
      "learning_rate": 6.56095359211402e-06,
      "loss": 0.0151,
      "step": 1063840
    },
    {
      "epoch": 1.7410302232870525,
      "grad_norm": 1.5425934791564941,
      "learning_rate": 6.560887699900503e-06,
      "loss": 0.0176,
      "step": 1063860
    },
    {
      "epoch": 1.7410629537257059,
      "grad_norm": 1.6273826360702515,
      "learning_rate": 6.560821807686986e-06,
      "loss": 0.0242,
      "step": 1063880
    },
    {
      "epoch": 1.7410956841643592,
      "grad_norm": 0.42068910598754883,
      "learning_rate": 6.560755915473469e-06,
      "loss": 0.0191,
      "step": 1063900
    },
    {
      "epoch": 1.7411284146030126,
      "grad_norm": 0.21380285918712616,
      "learning_rate": 6.560690023259952e-06,
      "loss": 0.0255,
      "step": 1063920
    },
    {
      "epoch": 1.7411611450416657,
      "grad_norm": 0.5612468719482422,
      "learning_rate": 6.560624131046435e-06,
      "loss": 0.0229,
      "step": 1063940
    },
    {
      "epoch": 1.7411938754803193,
      "grad_norm": 0.3956560492515564,
      "learning_rate": 6.560558238832918e-06,
      "loss": 0.0201,
      "step": 1063960
    },
    {
      "epoch": 1.7412266059189725,
      "grad_norm": 0.6529189348220825,
      "learning_rate": 6.5604923466194006e-06,
      "loss": 0.0225,
      "step": 1063980
    },
    {
      "epoch": 1.7412593363576259,
      "grad_norm": 1.1915645599365234,
      "learning_rate": 6.560426454405884e-06,
      "loss": 0.0206,
      "step": 1064000
    },
    {
      "epoch": 1.7412920667962792,
      "grad_norm": 0.07064266502857208,
      "learning_rate": 6.560360562192366e-06,
      "loss": 0.0185,
      "step": 1064020
    },
    {
      "epoch": 1.7413247972349324,
      "grad_norm": 0.6926174759864807,
      "learning_rate": 6.56029466997885e-06,
      "loss": 0.016,
      "step": 1064040
    },
    {
      "epoch": 1.741357527673586,
      "grad_norm": 0.5403780937194824,
      "learning_rate": 6.5602287777653315e-06,
      "loss": 0.0211,
      "step": 1064060
    },
    {
      "epoch": 1.7413902581122391,
      "grad_norm": 0.1751081347465515,
      "learning_rate": 6.560162885551815e-06,
      "loss": 0.0184,
      "step": 1064080
    },
    {
      "epoch": 1.7414229885508927,
      "grad_norm": 0.2772114872932434,
      "learning_rate": 6.560096993338297e-06,
      "loss": 0.0167,
      "step": 1064100
    },
    {
      "epoch": 1.7414557189895459,
      "grad_norm": 0.752536952495575,
      "learning_rate": 6.560031101124781e-06,
      "loss": 0.0182,
      "step": 1064120
    },
    {
      "epoch": 1.7414884494281992,
      "grad_norm": 0.7954727411270142,
      "learning_rate": 6.559965208911263e-06,
      "loss": 0.0191,
      "step": 1064140
    },
    {
      "epoch": 1.7415211798668526,
      "grad_norm": 0.5292479395866394,
      "learning_rate": 6.559899316697746e-06,
      "loss": 0.0197,
      "step": 1064160
    },
    {
      "epoch": 1.7415539103055058,
      "grad_norm": 1.1835613250732422,
      "learning_rate": 6.559833424484229e-06,
      "loss": 0.0226,
      "step": 1064180
    },
    {
      "epoch": 1.7415866407441594,
      "grad_norm": 0.9090920686721802,
      "learning_rate": 6.559767532270712e-06,
      "loss": 0.023,
      "step": 1064200
    },
    {
      "epoch": 1.7416193711828125,
      "grad_norm": 0.986028790473938,
      "learning_rate": 6.559701640057194e-06,
      "loss": 0.0154,
      "step": 1064220
    },
    {
      "epoch": 1.7416521016214659,
      "grad_norm": 0.4720829725265503,
      "learning_rate": 6.559635747843678e-06,
      "loss": 0.0259,
      "step": 1064240
    },
    {
      "epoch": 1.7416848320601193,
      "grad_norm": 0.33343225717544556,
      "learning_rate": 6.5595698556301615e-06,
      "loss": 0.0226,
      "step": 1064260
    },
    {
      "epoch": 1.7417175624987726,
      "grad_norm": 0.27833133935928345,
      "learning_rate": 6.559503963416643e-06,
      "loss": 0.0158,
      "step": 1064280
    },
    {
      "epoch": 1.741750292937426,
      "grad_norm": 0.22562181949615479,
      "learning_rate": 6.559438071203127e-06,
      "loss": 0.0176,
      "step": 1064300
    },
    {
      "epoch": 1.7417830233760792,
      "grad_norm": 0.11164052039384842,
      "learning_rate": 6.559372178989609e-06,
      "loss": 0.0199,
      "step": 1064320
    },
    {
      "epoch": 1.7418157538147327,
      "grad_norm": 0.8027232885360718,
      "learning_rate": 6.559306286776092e-06,
      "loss": 0.0171,
      "step": 1064340
    },
    {
      "epoch": 1.741848484253386,
      "grad_norm": 0.19088755548000336,
      "learning_rate": 6.559240394562575e-06,
      "loss": 0.0157,
      "step": 1064360
    },
    {
      "epoch": 1.7418812146920393,
      "grad_norm": 0.107903391122818,
      "learning_rate": 6.559174502349058e-06,
      "loss": 0.0224,
      "step": 1064380
    },
    {
      "epoch": 1.7419139451306926,
      "grad_norm": 0.5746183395385742,
      "learning_rate": 6.559108610135541e-06,
      "loss": 0.0244,
      "step": 1064400
    },
    {
      "epoch": 1.741946675569346,
      "grad_norm": 0.2338424026966095,
      "learning_rate": 6.559042717922024e-06,
      "loss": 0.0232,
      "step": 1064420
    },
    {
      "epoch": 1.7419794060079994,
      "grad_norm": 0.3078036606311798,
      "learning_rate": 6.558976825708506e-06,
      "loss": 0.02,
      "step": 1064440
    },
    {
      "epoch": 1.7420121364466525,
      "grad_norm": 2.1077523231506348,
      "learning_rate": 6.55891093349499e-06,
      "loss": 0.0199,
      "step": 1064460
    },
    {
      "epoch": 1.7420448668853061,
      "grad_norm": 0.7610781192779541,
      "learning_rate": 6.558845041281472e-06,
      "loss": 0.0208,
      "step": 1064480
    },
    {
      "epoch": 1.7420775973239593,
      "grad_norm": 0.1524142622947693,
      "learning_rate": 6.558779149067955e-06,
      "loss": 0.0252,
      "step": 1064500
    },
    {
      "epoch": 1.7421103277626127,
      "grad_norm": 0.25537025928497314,
      "learning_rate": 6.558713256854438e-06,
      "loss": 0.0158,
      "step": 1064520
    },
    {
      "epoch": 1.742143058201266,
      "grad_norm": 0.4659554958343506,
      "learning_rate": 6.558647364640921e-06,
      "loss": 0.0193,
      "step": 1064540
    },
    {
      "epoch": 1.7421757886399194,
      "grad_norm": 0.8564597368240356,
      "learning_rate": 6.558581472427403e-06,
      "loss": 0.023,
      "step": 1064560
    },
    {
      "epoch": 1.7422085190785728,
      "grad_norm": 0.2061457335948944,
      "learning_rate": 6.558515580213887e-06,
      "loss": 0.0187,
      "step": 1064580
    },
    {
      "epoch": 1.742241249517226,
      "grad_norm": 0.9984498620033264,
      "learning_rate": 6.55844968800037e-06,
      "loss": 0.0205,
      "step": 1064600
    },
    {
      "epoch": 1.7422739799558795,
      "grad_norm": 0.6030651926994324,
      "learning_rate": 6.5583837957868525e-06,
      "loss": 0.027,
      "step": 1064620
    },
    {
      "epoch": 1.7423067103945327,
      "grad_norm": 0.1347825527191162,
      "learning_rate": 6.558317903573336e-06,
      "loss": 0.0136,
      "step": 1064640
    },
    {
      "epoch": 1.742339440833186,
      "grad_norm": 0.1928083449602127,
      "learning_rate": 6.558252011359818e-06,
      "loss": 0.0146,
      "step": 1064660
    },
    {
      "epoch": 1.7423721712718394,
      "grad_norm": 0.7052815556526184,
      "learning_rate": 6.5581861191463015e-06,
      "loss": 0.0183,
      "step": 1064680
    },
    {
      "epoch": 1.7424049017104926,
      "grad_norm": 3.0197079181671143,
      "learning_rate": 6.5581202269327834e-06,
      "loss": 0.0205,
      "step": 1064700
    },
    {
      "epoch": 1.7424376321491462,
      "grad_norm": 0.9612016677856445,
      "learning_rate": 6.558054334719267e-06,
      "loss": 0.017,
      "step": 1064720
    },
    {
      "epoch": 1.7424703625877993,
      "grad_norm": 0.4813959002494812,
      "learning_rate": 6.55798844250575e-06,
      "loss": 0.0129,
      "step": 1064740
    },
    {
      "epoch": 1.742503093026453,
      "grad_norm": 0.7009614706039429,
      "learning_rate": 6.5579225502922325e-06,
      "loss": 0.0271,
      "step": 1064760
    },
    {
      "epoch": 1.742535823465106,
      "grad_norm": 0.11749911308288574,
      "learning_rate": 6.557856658078715e-06,
      "loss": 0.0139,
      "step": 1064780
    },
    {
      "epoch": 1.7425685539037594,
      "grad_norm": 0.9731722474098206,
      "learning_rate": 6.557790765865199e-06,
      "loss": 0.0202,
      "step": 1064800
    },
    {
      "epoch": 1.7426012843424128,
      "grad_norm": 0.4195466637611389,
      "learning_rate": 6.557724873651681e-06,
      "loss": 0.0216,
      "step": 1064820
    },
    {
      "epoch": 1.742634014781066,
      "grad_norm": 0.6252444982528687,
      "learning_rate": 6.557658981438164e-06,
      "loss": 0.0197,
      "step": 1064840
    },
    {
      "epoch": 1.7426667452197195,
      "grad_norm": 0.12821613252162933,
      "learning_rate": 6.557593089224646e-06,
      "loss": 0.0191,
      "step": 1064860
    },
    {
      "epoch": 1.7426994756583727,
      "grad_norm": 0.8794275522232056,
      "learning_rate": 6.55752719701113e-06,
      "loss": 0.0198,
      "step": 1064880
    },
    {
      "epoch": 1.742732206097026,
      "grad_norm": 0.619564950466156,
      "learning_rate": 6.5574613047976125e-06,
      "loss": 0.016,
      "step": 1064900
    },
    {
      "epoch": 1.7427649365356794,
      "grad_norm": 0.6142730712890625,
      "learning_rate": 6.557395412584095e-06,
      "loss": 0.0131,
      "step": 1064920
    },
    {
      "epoch": 1.7427976669743328,
      "grad_norm": 0.2006964236497879,
      "learning_rate": 6.557329520370578e-06,
      "loss": 0.0186,
      "step": 1064940
    },
    {
      "epoch": 1.7428303974129862,
      "grad_norm": 0.23291710019111633,
      "learning_rate": 6.557263628157062e-06,
      "loss": 0.0211,
      "step": 1064960
    },
    {
      "epoch": 1.7428631278516393,
      "grad_norm": 0.14116431772708893,
      "learning_rate": 6.557197735943544e-06,
      "loss": 0.0176,
      "step": 1064980
    },
    {
      "epoch": 1.742895858290293,
      "grad_norm": 0.4318355917930603,
      "learning_rate": 6.557131843730027e-06,
      "loss": 0.0267,
      "step": 1065000
    },
    {
      "epoch": 1.742928588728946,
      "grad_norm": 0.5167666077613831,
      "learning_rate": 6.557065951516511e-06,
      "loss": 0.0231,
      "step": 1065020
    },
    {
      "epoch": 1.7429613191675994,
      "grad_norm": 0.5121589303016663,
      "learning_rate": 6.5570000593029925e-06,
      "loss": 0.016,
      "step": 1065040
    },
    {
      "epoch": 1.7429940496062528,
      "grad_norm": 0.24560584127902985,
      "learning_rate": 6.556934167089476e-06,
      "loss": 0.0188,
      "step": 1065060
    },
    {
      "epoch": 1.7430267800449062,
      "grad_norm": 0.2035447359085083,
      "learning_rate": 6.556868274875958e-06,
      "loss": 0.0164,
      "step": 1065080
    },
    {
      "epoch": 1.7430595104835596,
      "grad_norm": 1.5349854230880737,
      "learning_rate": 6.556802382662442e-06,
      "loss": 0.0136,
      "step": 1065100
    },
    {
      "epoch": 1.7430922409222127,
      "grad_norm": 0.14872591197490692,
      "learning_rate": 6.5567364904489235e-06,
      "loss": 0.0139,
      "step": 1065120
    },
    {
      "epoch": 1.7431249713608663,
      "grad_norm": 0.2604150176048279,
      "learning_rate": 6.556670598235407e-06,
      "loss": 0.017,
      "step": 1065140
    },
    {
      "epoch": 1.7431577017995195,
      "grad_norm": 0.1336527019739151,
      "learning_rate": 6.55660470602189e-06,
      "loss": 0.0279,
      "step": 1065160
    },
    {
      "epoch": 1.7431904322381728,
      "grad_norm": 0.5510104894638062,
      "learning_rate": 6.5565388138083726e-06,
      "loss": 0.0165,
      "step": 1065180
    },
    {
      "epoch": 1.7432231626768262,
      "grad_norm": 0.7364296913146973,
      "learning_rate": 6.556472921594855e-06,
      "loss": 0.015,
      "step": 1065200
    },
    {
      "epoch": 1.7432558931154796,
      "grad_norm": 0.462211012840271,
      "learning_rate": 6.556407029381339e-06,
      "loss": 0.0186,
      "step": 1065220
    },
    {
      "epoch": 1.743288623554133,
      "grad_norm": 1.056990385055542,
      "learning_rate": 6.556341137167821e-06,
      "loss": 0.0227,
      "step": 1065240
    },
    {
      "epoch": 1.743321353992786,
      "grad_norm": 1.3462409973144531,
      "learning_rate": 6.556275244954304e-06,
      "loss": 0.0209,
      "step": 1065260
    },
    {
      "epoch": 1.7433540844314397,
      "grad_norm": 0.7468079924583435,
      "learning_rate": 6.556209352740786e-06,
      "loss": 0.0182,
      "step": 1065280
    },
    {
      "epoch": 1.7433868148700928,
      "grad_norm": 0.8395391702651978,
      "learning_rate": 6.55614346052727e-06,
      "loss": 0.0201,
      "step": 1065300
    },
    {
      "epoch": 1.7434195453087462,
      "grad_norm": 0.3823765814304352,
      "learning_rate": 6.5560775683137534e-06,
      "loss": 0.024,
      "step": 1065320
    },
    {
      "epoch": 1.7434522757473996,
      "grad_norm": 0.6940099000930786,
      "learning_rate": 6.556011676100235e-06,
      "loss": 0.0164,
      "step": 1065340
    },
    {
      "epoch": 1.743485006186053,
      "grad_norm": 0.2589208781719208,
      "learning_rate": 6.555945783886719e-06,
      "loss": 0.0233,
      "step": 1065360
    },
    {
      "epoch": 1.7435177366247063,
      "grad_norm": 0.4612658619880676,
      "learning_rate": 6.555879891673202e-06,
      "loss": 0.0223,
      "step": 1065380
    },
    {
      "epoch": 1.7435504670633595,
      "grad_norm": 0.46018123626708984,
      "learning_rate": 6.555813999459684e-06,
      "loss": 0.0172,
      "step": 1065400
    },
    {
      "epoch": 1.743583197502013,
      "grad_norm": 0.5220429301261902,
      "learning_rate": 6.555748107246167e-06,
      "loss": 0.0158,
      "step": 1065420
    },
    {
      "epoch": 1.7436159279406662,
      "grad_norm": 0.4008677899837494,
      "learning_rate": 6.555682215032651e-06,
      "loss": 0.0264,
      "step": 1065440
    },
    {
      "epoch": 1.7436486583793196,
      "grad_norm": 0.9768916368484497,
      "learning_rate": 6.555616322819133e-06,
      "loss": 0.0223,
      "step": 1065460
    },
    {
      "epoch": 1.743681388817973,
      "grad_norm": 0.40137577056884766,
      "learning_rate": 6.555550430605616e-06,
      "loss": 0.0241,
      "step": 1065480
    },
    {
      "epoch": 1.7437141192566261,
      "grad_norm": 0.6008753180503845,
      "learning_rate": 6.555484538392098e-06,
      "loss": 0.0229,
      "step": 1065500
    },
    {
      "epoch": 1.7437468496952797,
      "grad_norm": 0.5318116545677185,
      "learning_rate": 6.555418646178582e-06,
      "loss": 0.0187,
      "step": 1065520
    },
    {
      "epoch": 1.7437795801339329,
      "grad_norm": 0.6819621920585632,
      "learning_rate": 6.5553527539650644e-06,
      "loss": 0.0224,
      "step": 1065540
    },
    {
      "epoch": 1.7438123105725865,
      "grad_norm": 0.7745567560195923,
      "learning_rate": 6.555286861751547e-06,
      "loss": 0.0118,
      "step": 1065560
    },
    {
      "epoch": 1.7438450410112396,
      "grad_norm": 0.4031445384025574,
      "learning_rate": 6.55522096953803e-06,
      "loss": 0.0221,
      "step": 1065580
    },
    {
      "epoch": 1.743877771449893,
      "grad_norm": 0.36348840594291687,
      "learning_rate": 6.5551550773245135e-06,
      "loss": 0.0202,
      "step": 1065600
    },
    {
      "epoch": 1.7439105018885463,
      "grad_norm": 0.9328469038009644,
      "learning_rate": 6.555089185110995e-06,
      "loss": 0.0242,
      "step": 1065620
    },
    {
      "epoch": 1.7439432323271995,
      "grad_norm": 0.08826599270105362,
      "learning_rate": 6.555023292897479e-06,
      "loss": 0.0141,
      "step": 1065640
    },
    {
      "epoch": 1.743975962765853,
      "grad_norm": 0.4509861171245575,
      "learning_rate": 6.5549574006839626e-06,
      "loss": 0.0134,
      "step": 1065660
    },
    {
      "epoch": 1.7440086932045062,
      "grad_norm": 0.6480459570884705,
      "learning_rate": 6.5548915084704445e-06,
      "loss": 0.019,
      "step": 1065680
    },
    {
      "epoch": 1.7440414236431596,
      "grad_norm": 0.5971344113349915,
      "learning_rate": 6.554825616256928e-06,
      "loss": 0.0175,
      "step": 1065700
    },
    {
      "epoch": 1.744074154081813,
      "grad_norm": 0.2572697401046753,
      "learning_rate": 6.55475972404341e-06,
      "loss": 0.015,
      "step": 1065720
    },
    {
      "epoch": 1.7441068845204664,
      "grad_norm": 0.320000022649765,
      "learning_rate": 6.5546938318298935e-06,
      "loss": 0.0125,
      "step": 1065740
    },
    {
      "epoch": 1.7441396149591197,
      "grad_norm": 0.39225131273269653,
      "learning_rate": 6.554627939616376e-06,
      "loss": 0.0148,
      "step": 1065760
    },
    {
      "epoch": 1.7441723453977729,
      "grad_norm": 0.8625128865242004,
      "learning_rate": 6.554562047402859e-06,
      "loss": 0.0184,
      "step": 1065780
    },
    {
      "epoch": 1.7442050758364265,
      "grad_norm": 2.60923171043396,
      "learning_rate": 6.554496155189342e-06,
      "loss": 0.0298,
      "step": 1065800
    },
    {
      "epoch": 1.7442378062750796,
      "grad_norm": 0.5887129902839661,
      "learning_rate": 6.554430262975825e-06,
      "loss": 0.0126,
      "step": 1065820
    },
    {
      "epoch": 1.744270536713733,
      "grad_norm": 0.7376278638839722,
      "learning_rate": 6.554364370762307e-06,
      "loss": 0.0187,
      "step": 1065840
    },
    {
      "epoch": 1.7443032671523864,
      "grad_norm": 0.43067866563796997,
      "learning_rate": 6.554298478548791e-06,
      "loss": 0.0186,
      "step": 1065860
    },
    {
      "epoch": 1.7443359975910397,
      "grad_norm": 0.6514492034912109,
      "learning_rate": 6.554232586335273e-06,
      "loss": 0.0153,
      "step": 1065880
    },
    {
      "epoch": 1.7443687280296931,
      "grad_norm": 0.8977767825126648,
      "learning_rate": 6.554166694121756e-06,
      "loss": 0.0179,
      "step": 1065900
    },
    {
      "epoch": 1.7444014584683463,
      "grad_norm": 0.3119294047355652,
      "learning_rate": 6.554100801908239e-06,
      "loss": 0.0161,
      "step": 1065920
    },
    {
      "epoch": 1.7444341889069999,
      "grad_norm": 0.42264261841773987,
      "learning_rate": 6.554034909694722e-06,
      "loss": 0.0214,
      "step": 1065940
    },
    {
      "epoch": 1.744466919345653,
      "grad_norm": 0.5874788761138916,
      "learning_rate": 6.5539690174812045e-06,
      "loss": 0.0268,
      "step": 1065960
    },
    {
      "epoch": 1.7444996497843064,
      "grad_norm": 1.05386221408844,
      "learning_rate": 6.553903125267688e-06,
      "loss": 0.0301,
      "step": 1065980
    },
    {
      "epoch": 1.7445323802229598,
      "grad_norm": 0.24121052026748657,
      "learning_rate": 6.553837233054171e-06,
      "loss": 0.0205,
      "step": 1066000
    },
    {
      "epoch": 1.7445651106616131,
      "grad_norm": 0.22227522730827332,
      "learning_rate": 6.5537713408406536e-06,
      "loss": 0.0187,
      "step": 1066020
    },
    {
      "epoch": 1.7445978411002665,
      "grad_norm": 2.1941347122192383,
      "learning_rate": 6.553705448627137e-06,
      "loss": 0.0217,
      "step": 1066040
    },
    {
      "epoch": 1.7446305715389197,
      "grad_norm": 1.0983110666275024,
      "learning_rate": 6.553639556413619e-06,
      "loss": 0.0319,
      "step": 1066060
    },
    {
      "epoch": 1.7446633019775732,
      "grad_norm": 0.16180099546909332,
      "learning_rate": 6.553573664200103e-06,
      "loss": 0.0195,
      "step": 1066080
    },
    {
      "epoch": 1.7446960324162264,
      "grad_norm": 0.3474736213684082,
      "learning_rate": 6.5535077719865845e-06,
      "loss": 0.0186,
      "step": 1066100
    },
    {
      "epoch": 1.7447287628548798,
      "grad_norm": 0.332806795835495,
      "learning_rate": 6.553441879773068e-06,
      "loss": 0.0161,
      "step": 1066120
    },
    {
      "epoch": 1.7447614932935331,
      "grad_norm": 0.6067469120025635,
      "learning_rate": 6.55337598755955e-06,
      "loss": 0.0141,
      "step": 1066140
    },
    {
      "epoch": 1.7447942237321865,
      "grad_norm": 0.15038608014583588,
      "learning_rate": 6.553310095346034e-06,
      "loss": 0.0191,
      "step": 1066160
    },
    {
      "epoch": 1.7448269541708399,
      "grad_norm": 0.42910629510879517,
      "learning_rate": 6.553244203132516e-06,
      "loss": 0.0183,
      "step": 1066180
    },
    {
      "epoch": 1.744859684609493,
      "grad_norm": 0.36477112770080566,
      "learning_rate": 6.553178310918999e-06,
      "loss": 0.0163,
      "step": 1066200
    },
    {
      "epoch": 1.7448924150481466,
      "grad_norm": 0.7603682279586792,
      "learning_rate": 6.553112418705482e-06,
      "loss": 0.0261,
      "step": 1066220
    },
    {
      "epoch": 1.7449251454867998,
      "grad_norm": 0.2888416051864624,
      "learning_rate": 6.553046526491965e-06,
      "loss": 0.0251,
      "step": 1066240
    },
    {
      "epoch": 1.7449578759254532,
      "grad_norm": 0.22062133252620697,
      "learning_rate": 6.552980634278447e-06,
      "loss": 0.0259,
      "step": 1066260
    },
    {
      "epoch": 1.7449906063641065,
      "grad_norm": 0.41839808225631714,
      "learning_rate": 6.552914742064931e-06,
      "loss": 0.0168,
      "step": 1066280
    },
    {
      "epoch": 1.7450233368027597,
      "grad_norm": 0.13347378373146057,
      "learning_rate": 6.552848849851413e-06,
      "loss": 0.0221,
      "step": 1066300
    },
    {
      "epoch": 1.7450560672414133,
      "grad_norm": 0.5220465064048767,
      "learning_rate": 6.552782957637896e-06,
      "loss": 0.0169,
      "step": 1066320
    },
    {
      "epoch": 1.7450887976800664,
      "grad_norm": 0.5521258115768433,
      "learning_rate": 6.552717065424379e-06,
      "loss": 0.018,
      "step": 1066340
    },
    {
      "epoch": 1.74512152811872,
      "grad_norm": 0.8472501635551453,
      "learning_rate": 6.552651173210862e-06,
      "loss": 0.0175,
      "step": 1066360
    },
    {
      "epoch": 1.7451542585573732,
      "grad_norm": 1.7589232921600342,
      "learning_rate": 6.5525852809973454e-06,
      "loss": 0.0216,
      "step": 1066380
    },
    {
      "epoch": 1.7451869889960265,
      "grad_norm": 0.6806879043579102,
      "learning_rate": 6.552519388783828e-06,
      "loss": 0.0167,
      "step": 1066400
    },
    {
      "epoch": 1.74521971943468,
      "grad_norm": 0.41250771284103394,
      "learning_rate": 6.552453496570311e-06,
      "loss": 0.0229,
      "step": 1066420
    },
    {
      "epoch": 1.745252449873333,
      "grad_norm": 0.26523280143737793,
      "learning_rate": 6.552387604356794e-06,
      "loss": 0.0144,
      "step": 1066440
    },
    {
      "epoch": 1.7452851803119867,
      "grad_norm": 0.8341912031173706,
      "learning_rate": 6.552321712143277e-06,
      "loss": 0.0217,
      "step": 1066460
    },
    {
      "epoch": 1.7453179107506398,
      "grad_norm": 0.05684712156653404,
      "learning_rate": 6.552255819929759e-06,
      "loss": 0.0212,
      "step": 1066480
    },
    {
      "epoch": 1.7453506411892932,
      "grad_norm": 0.5075603723526001,
      "learning_rate": 6.552189927716243e-06,
      "loss": 0.0204,
      "step": 1066500
    },
    {
      "epoch": 1.7453833716279465,
      "grad_norm": 0.18873165547847748,
      "learning_rate": 6.552124035502725e-06,
      "loss": 0.0143,
      "step": 1066520
    },
    {
      "epoch": 1.7454161020666,
      "grad_norm": 0.39079010486602783,
      "learning_rate": 6.552058143289208e-06,
      "loss": 0.0174,
      "step": 1066540
    },
    {
      "epoch": 1.7454488325052533,
      "grad_norm": 0.5748773813247681,
      "learning_rate": 6.551992251075691e-06,
      "loss": 0.0216,
      "step": 1066560
    },
    {
      "epoch": 1.7454815629439064,
      "grad_norm": 0.8044142127037048,
      "learning_rate": 6.551926358862174e-06,
      "loss": 0.02,
      "step": 1066580
    },
    {
      "epoch": 1.74551429338256,
      "grad_norm": 1.7922900915145874,
      "learning_rate": 6.551860466648656e-06,
      "loss": 0.0202,
      "step": 1066600
    },
    {
      "epoch": 1.7455470238212132,
      "grad_norm": 1.7387696504592896,
      "learning_rate": 6.55179457443514e-06,
      "loss": 0.0164,
      "step": 1066620
    },
    {
      "epoch": 1.7455797542598666,
      "grad_norm": 1.334720492362976,
      "learning_rate": 6.551728682221622e-06,
      "loss": 0.0253,
      "step": 1066640
    },
    {
      "epoch": 1.74561248469852,
      "grad_norm": 0.6497379541397095,
      "learning_rate": 6.5516627900081055e-06,
      "loss": 0.0234,
      "step": 1066660
    },
    {
      "epoch": 1.7456452151371733,
      "grad_norm": 0.7681494355201721,
      "learning_rate": 6.551596897794587e-06,
      "loss": 0.0224,
      "step": 1066680
    },
    {
      "epoch": 1.7456779455758267,
      "grad_norm": 0.6052809953689575,
      "learning_rate": 6.551531005581071e-06,
      "loss": 0.0238,
      "step": 1066700
    },
    {
      "epoch": 1.7457106760144798,
      "grad_norm": 0.370714396238327,
      "learning_rate": 6.5514651133675545e-06,
      "loss": 0.0158,
      "step": 1066720
    },
    {
      "epoch": 1.7457434064531334,
      "grad_norm": 0.5233438014984131,
      "learning_rate": 6.5513992211540364e-06,
      "loss": 0.0143,
      "step": 1066740
    },
    {
      "epoch": 1.7457761368917866,
      "grad_norm": 0.24443049728870392,
      "learning_rate": 6.55133332894052e-06,
      "loss": 0.0222,
      "step": 1066760
    },
    {
      "epoch": 1.74580886733044,
      "grad_norm": 0.7930476069450378,
      "learning_rate": 6.551267436727003e-06,
      "loss": 0.0173,
      "step": 1066780
    },
    {
      "epoch": 1.7458415977690933,
      "grad_norm": 0.5011414885520935,
      "learning_rate": 6.5512015445134855e-06,
      "loss": 0.0196,
      "step": 1066800
    },
    {
      "epoch": 1.7458743282077467,
      "grad_norm": 0.2043413668870926,
      "learning_rate": 6.551135652299968e-06,
      "loss": 0.0176,
      "step": 1066820
    },
    {
      "epoch": 1.7459070586464,
      "grad_norm": 0.15234926342964172,
      "learning_rate": 6.551069760086452e-06,
      "loss": 0.0172,
      "step": 1066840
    },
    {
      "epoch": 1.7459397890850532,
      "grad_norm": 0.8658218383789062,
      "learning_rate": 6.551003867872934e-06,
      "loss": 0.0211,
      "step": 1066860
    },
    {
      "epoch": 1.7459725195237068,
      "grad_norm": 0.6016963720321655,
      "learning_rate": 6.550937975659417e-06,
      "loss": 0.0153,
      "step": 1066880
    },
    {
      "epoch": 1.74600524996236,
      "grad_norm": 0.6914891600608826,
      "learning_rate": 6.550872083445899e-06,
      "loss": 0.0177,
      "step": 1066900
    },
    {
      "epoch": 1.7460379804010133,
      "grad_norm": 0.9230017066001892,
      "learning_rate": 6.550806191232383e-06,
      "loss": 0.0308,
      "step": 1066920
    },
    {
      "epoch": 1.7460707108396667,
      "grad_norm": 0.31193244457244873,
      "learning_rate": 6.550740299018865e-06,
      "loss": 0.0154,
      "step": 1066940
    },
    {
      "epoch": 1.74610344127832,
      "grad_norm": 0.12283606827259064,
      "learning_rate": 6.550674406805348e-06,
      "loss": 0.0231,
      "step": 1066960
    },
    {
      "epoch": 1.7461361717169734,
      "grad_norm": 0.17602798342704773,
      "learning_rate": 6.550608514591831e-06,
      "loss": 0.0218,
      "step": 1066980
    },
    {
      "epoch": 1.7461689021556266,
      "grad_norm": 0.3773980438709259,
      "learning_rate": 6.550542622378315e-06,
      "loss": 0.0135,
      "step": 1067000
    },
    {
      "epoch": 1.7462016325942802,
      "grad_norm": 1.0336335897445679,
      "learning_rate": 6.5504767301647965e-06,
      "loss": 0.0278,
      "step": 1067020
    },
    {
      "epoch": 1.7462343630329333,
      "grad_norm": 0.19654077291488647,
      "learning_rate": 6.55041083795128e-06,
      "loss": 0.0157,
      "step": 1067040
    },
    {
      "epoch": 1.7462670934715867,
      "grad_norm": 0.9226261377334595,
      "learning_rate": 6.550344945737764e-06,
      "loss": 0.018,
      "step": 1067060
    },
    {
      "epoch": 1.74629982391024,
      "grad_norm": 1.8124594688415527,
      "learning_rate": 6.5502790535242456e-06,
      "loss": 0.0152,
      "step": 1067080
    },
    {
      "epoch": 1.7463325543488932,
      "grad_norm": 0.5716363191604614,
      "learning_rate": 6.550213161310729e-06,
      "loss": 0.0191,
      "step": 1067100
    },
    {
      "epoch": 1.7463652847875468,
      "grad_norm": 0.5124779939651489,
      "learning_rate": 6.550147269097211e-06,
      "loss": 0.0232,
      "step": 1067120
    },
    {
      "epoch": 1.7463980152262,
      "grad_norm": 0.726641833782196,
      "learning_rate": 6.550081376883695e-06,
      "loss": 0.0196,
      "step": 1067140
    },
    {
      "epoch": 1.7464307456648533,
      "grad_norm": 0.40003862977027893,
      "learning_rate": 6.5500154846701765e-06,
      "loss": 0.0198,
      "step": 1067160
    },
    {
      "epoch": 1.7464634761035067,
      "grad_norm": 0.6778583526611328,
      "learning_rate": 6.54994959245666e-06,
      "loss": 0.0187,
      "step": 1067180
    },
    {
      "epoch": 1.74649620654216,
      "grad_norm": 0.1946476548910141,
      "learning_rate": 6.549883700243143e-06,
      "loss": 0.0151,
      "step": 1067200
    },
    {
      "epoch": 1.7465289369808135,
      "grad_norm": 0.4852701425552368,
      "learning_rate": 6.549817808029626e-06,
      "loss": 0.0239,
      "step": 1067220
    },
    {
      "epoch": 1.7465616674194666,
      "grad_norm": 0.9785916209220886,
      "learning_rate": 6.549751915816108e-06,
      "loss": 0.0153,
      "step": 1067240
    },
    {
      "epoch": 1.7465943978581202,
      "grad_norm": 0.27688068151474,
      "learning_rate": 6.549686023602592e-06,
      "loss": 0.0209,
      "step": 1067260
    },
    {
      "epoch": 1.7466271282967734,
      "grad_norm": 1.305810570716858,
      "learning_rate": 6.549620131389074e-06,
      "loss": 0.0218,
      "step": 1067280
    },
    {
      "epoch": 1.7466598587354267,
      "grad_norm": 0.2691161632537842,
      "learning_rate": 6.549554239175557e-06,
      "loss": 0.0158,
      "step": 1067300
    },
    {
      "epoch": 1.74669258917408,
      "grad_norm": 0.6210734248161316,
      "learning_rate": 6.549488346962039e-06,
      "loss": 0.0231,
      "step": 1067320
    },
    {
      "epoch": 1.7467253196127335,
      "grad_norm": 0.8468173146247864,
      "learning_rate": 6.549422454748523e-06,
      "loss": 0.0145,
      "step": 1067340
    },
    {
      "epoch": 1.7467580500513868,
      "grad_norm": 0.4065331518650055,
      "learning_rate": 6.549356562535006e-06,
      "loss": 0.0243,
      "step": 1067360
    },
    {
      "epoch": 1.74679078049004,
      "grad_norm": 0.35534799098968506,
      "learning_rate": 6.549290670321488e-06,
      "loss": 0.0148,
      "step": 1067380
    },
    {
      "epoch": 1.7468235109286936,
      "grad_norm": 0.29421529173851013,
      "learning_rate": 6.549224778107971e-06,
      "loss": 0.0293,
      "step": 1067400
    },
    {
      "epoch": 1.7468562413673467,
      "grad_norm": 0.4385652244091034,
      "learning_rate": 6.549158885894455e-06,
      "loss": 0.023,
      "step": 1067420
    },
    {
      "epoch": 1.7468889718060001,
      "grad_norm": 0.5156265497207642,
      "learning_rate": 6.549092993680937e-06,
      "loss": 0.0262,
      "step": 1067440
    },
    {
      "epoch": 1.7469217022446535,
      "grad_norm": 0.7204258441925049,
      "learning_rate": 6.54902710146742e-06,
      "loss": 0.0178,
      "step": 1067460
    },
    {
      "epoch": 1.7469544326833069,
      "grad_norm": 0.744901716709137,
      "learning_rate": 6.548961209253904e-06,
      "loss": 0.0141,
      "step": 1067480
    },
    {
      "epoch": 1.7469871631219602,
      "grad_norm": 0.7275922894477844,
      "learning_rate": 6.548895317040386e-06,
      "loss": 0.016,
      "step": 1067500
    },
    {
      "epoch": 1.7470198935606134,
      "grad_norm": 1.3823623657226562,
      "learning_rate": 6.548829424826869e-06,
      "loss": 0.0222,
      "step": 1067520
    },
    {
      "epoch": 1.747052623999267,
      "grad_norm": 0.7895016074180603,
      "learning_rate": 6.548763532613351e-06,
      "loss": 0.0225,
      "step": 1067540
    },
    {
      "epoch": 1.7470853544379201,
      "grad_norm": 0.6837993264198303,
      "learning_rate": 6.548697640399835e-06,
      "loss": 0.0181,
      "step": 1067560
    },
    {
      "epoch": 1.7471180848765735,
      "grad_norm": 0.273988276720047,
      "learning_rate": 6.5486317481863174e-06,
      "loss": 0.0167,
      "step": 1067580
    },
    {
      "epoch": 1.7471508153152269,
      "grad_norm": 0.9905403256416321,
      "learning_rate": 6.5485658559728e-06,
      "loss": 0.0166,
      "step": 1067600
    },
    {
      "epoch": 1.7471835457538802,
      "grad_norm": 1.8721277713775635,
      "learning_rate": 6.548499963759283e-06,
      "loss": 0.0187,
      "step": 1067620
    },
    {
      "epoch": 1.7472162761925336,
      "grad_norm": 0.6992104649543762,
      "learning_rate": 6.5484340715457665e-06,
      "loss": 0.0206,
      "step": 1067640
    },
    {
      "epoch": 1.7472490066311868,
      "grad_norm": 0.2516044080257416,
      "learning_rate": 6.548368179332248e-06,
      "loss": 0.0147,
      "step": 1067660
    },
    {
      "epoch": 1.7472817370698404,
      "grad_norm": 0.3338566720485687,
      "learning_rate": 6.548302287118732e-06,
      "loss": 0.0244,
      "step": 1067680
    },
    {
      "epoch": 1.7473144675084935,
      "grad_norm": 2.3005495071411133,
      "learning_rate": 6.548236394905214e-06,
      "loss": 0.0187,
      "step": 1067700
    },
    {
      "epoch": 1.7473471979471469,
      "grad_norm": 0.37292975187301636,
      "learning_rate": 6.5481705026916975e-06,
      "loss": 0.0216,
      "step": 1067720
    },
    {
      "epoch": 1.7473799283858003,
      "grad_norm": 0.13800187408924103,
      "learning_rate": 6.54810461047818e-06,
      "loss": 0.0219,
      "step": 1067740
    },
    {
      "epoch": 1.7474126588244534,
      "grad_norm": 0.18241605162620544,
      "learning_rate": 6.548038718264663e-06,
      "loss": 0.0129,
      "step": 1067760
    },
    {
      "epoch": 1.747445389263107,
      "grad_norm": 0.15849579870700836,
      "learning_rate": 6.5479728260511465e-06,
      "loss": 0.0161,
      "step": 1067780
    },
    {
      "epoch": 1.7474781197017601,
      "grad_norm": 0.41792744398117065,
      "learning_rate": 6.547906933837629e-06,
      "loss": 0.0178,
      "step": 1067800
    },
    {
      "epoch": 1.7475108501404137,
      "grad_norm": 0.5232916474342346,
      "learning_rate": 6.547841041624112e-06,
      "loss": 0.0233,
      "step": 1067820
    },
    {
      "epoch": 1.747543580579067,
      "grad_norm": 0.4430013597011566,
      "learning_rate": 6.547775149410595e-06,
      "loss": 0.0142,
      "step": 1067840
    },
    {
      "epoch": 1.7475763110177203,
      "grad_norm": 0.42705467343330383,
      "learning_rate": 6.547709257197078e-06,
      "loss": 0.0244,
      "step": 1067860
    },
    {
      "epoch": 1.7476090414563736,
      "grad_norm": 0.768062174320221,
      "learning_rate": 6.54764336498356e-06,
      "loss": 0.0167,
      "step": 1067880
    },
    {
      "epoch": 1.7476417718950268,
      "grad_norm": 0.4955635368824005,
      "learning_rate": 6.547577472770044e-06,
      "loss": 0.0255,
      "step": 1067900
    },
    {
      "epoch": 1.7476745023336804,
      "grad_norm": 0.31296464800834656,
      "learning_rate": 6.547511580556526e-06,
      "loss": 0.017,
      "step": 1067920
    },
    {
      "epoch": 1.7477072327723335,
      "grad_norm": 0.1860235631465912,
      "learning_rate": 6.547445688343009e-06,
      "loss": 0.016,
      "step": 1067940
    },
    {
      "epoch": 1.747739963210987,
      "grad_norm": 0.9608914852142334,
      "learning_rate": 6.547379796129491e-06,
      "loss": 0.0157,
      "step": 1067960
    },
    {
      "epoch": 1.7477726936496403,
      "grad_norm": 0.4866475760936737,
      "learning_rate": 6.547313903915975e-06,
      "loss": 0.0199,
      "step": 1067980
    },
    {
      "epoch": 1.7478054240882936,
      "grad_norm": 0.9368919730186462,
      "learning_rate": 6.5472480117024575e-06,
      "loss": 0.0372,
      "step": 1068000
    },
    {
      "epoch": 1.747838154526947,
      "grad_norm": 0.28634926676750183,
      "learning_rate": 6.54718211948894e-06,
      "loss": 0.0256,
      "step": 1068020
    },
    {
      "epoch": 1.7478708849656002,
      "grad_norm": 0.48349860310554504,
      "learning_rate": 6.547116227275423e-06,
      "loss": 0.0194,
      "step": 1068040
    },
    {
      "epoch": 1.7479036154042538,
      "grad_norm": 0.6567932367324829,
      "learning_rate": 6.547050335061907e-06,
      "loss": 0.0181,
      "step": 1068060
    },
    {
      "epoch": 1.747936345842907,
      "grad_norm": 0.6805065870285034,
      "learning_rate": 6.5469844428483885e-06,
      "loss": 0.0163,
      "step": 1068080
    },
    {
      "epoch": 1.7479690762815603,
      "grad_norm": 1.3309333324432373,
      "learning_rate": 6.546918550634872e-06,
      "loss": 0.0154,
      "step": 1068100
    },
    {
      "epoch": 1.7480018067202137,
      "grad_norm": 0.16245044767856598,
      "learning_rate": 6.546852658421356e-06,
      "loss": 0.0209,
      "step": 1068120
    },
    {
      "epoch": 1.748034537158867,
      "grad_norm": 0.4558807909488678,
      "learning_rate": 6.5467867662078375e-06,
      "loss": 0.0154,
      "step": 1068140
    },
    {
      "epoch": 1.7480672675975204,
      "grad_norm": 1.005356788635254,
      "learning_rate": 6.546720873994321e-06,
      "loss": 0.0194,
      "step": 1068160
    },
    {
      "epoch": 1.7480999980361736,
      "grad_norm": 0.45528051257133484,
      "learning_rate": 6.546654981780803e-06,
      "loss": 0.0152,
      "step": 1068180
    },
    {
      "epoch": 1.7481327284748271,
      "grad_norm": 0.8953711986541748,
      "learning_rate": 6.546589089567287e-06,
      "loss": 0.0142,
      "step": 1068200
    },
    {
      "epoch": 1.7481654589134803,
      "grad_norm": 1.6121476888656616,
      "learning_rate": 6.546523197353769e-06,
      "loss": 0.017,
      "step": 1068220
    },
    {
      "epoch": 1.7481981893521337,
      "grad_norm": 0.22868259251117706,
      "learning_rate": 6.546457305140252e-06,
      "loss": 0.0225,
      "step": 1068240
    },
    {
      "epoch": 1.748230919790787,
      "grad_norm": 1.0362112522125244,
      "learning_rate": 6.546391412926735e-06,
      "loss": 0.026,
      "step": 1068260
    },
    {
      "epoch": 1.7482636502294404,
      "grad_norm": 1.0131113529205322,
      "learning_rate": 6.546325520713218e-06,
      "loss": 0.0137,
      "step": 1068280
    },
    {
      "epoch": 1.7482963806680938,
      "grad_norm": 0.8238417506217957,
      "learning_rate": 6.5462596284997e-06,
      "loss": 0.0164,
      "step": 1068300
    },
    {
      "epoch": 1.748329111106747,
      "grad_norm": 0.2837066650390625,
      "learning_rate": 6.546193736286184e-06,
      "loss": 0.0182,
      "step": 1068320
    },
    {
      "epoch": 1.7483618415454005,
      "grad_norm": 0.19919860363006592,
      "learning_rate": 6.546127844072666e-06,
      "loss": 0.0191,
      "step": 1068340
    },
    {
      "epoch": 1.7483945719840537,
      "grad_norm": 0.18260030448436737,
      "learning_rate": 6.546061951859149e-06,
      "loss": 0.0223,
      "step": 1068360
    },
    {
      "epoch": 1.748427302422707,
      "grad_norm": 0.5103262662887573,
      "learning_rate": 6.545996059645632e-06,
      "loss": 0.0204,
      "step": 1068380
    },
    {
      "epoch": 1.7484600328613604,
      "grad_norm": 0.4521360397338867,
      "learning_rate": 6.545930167432115e-06,
      "loss": 0.0212,
      "step": 1068400
    },
    {
      "epoch": 1.7484927633000138,
      "grad_norm": 1.3931858539581299,
      "learning_rate": 6.545864275218598e-06,
      "loss": 0.0171,
      "step": 1068420
    },
    {
      "epoch": 1.7485254937386672,
      "grad_norm": 0.6582620143890381,
      "learning_rate": 6.545798383005081e-06,
      "loss": 0.0147,
      "step": 1068440
    },
    {
      "epoch": 1.7485582241773203,
      "grad_norm": 0.688050389289856,
      "learning_rate": 6.545732490791563e-06,
      "loss": 0.0149,
      "step": 1068460
    },
    {
      "epoch": 1.748590954615974,
      "grad_norm": 0.17432467639446259,
      "learning_rate": 6.545666598578047e-06,
      "loss": 0.0175,
      "step": 1068480
    },
    {
      "epoch": 1.748623685054627,
      "grad_norm": 0.2511798143386841,
      "learning_rate": 6.54560070636453e-06,
      "loss": 0.0143,
      "step": 1068500
    },
    {
      "epoch": 1.7486564154932804,
      "grad_norm": 0.44304925203323364,
      "learning_rate": 6.545534814151012e-06,
      "loss": 0.0199,
      "step": 1068520
    },
    {
      "epoch": 1.7486891459319338,
      "grad_norm": 0.6001504063606262,
      "learning_rate": 6.545468921937496e-06,
      "loss": 0.0186,
      "step": 1068540
    },
    {
      "epoch": 1.748721876370587,
      "grad_norm": 0.9183749556541443,
      "learning_rate": 6.545403029723978e-06,
      "loss": 0.0162,
      "step": 1068560
    },
    {
      "epoch": 1.7487546068092406,
      "grad_norm": 1.0116980075836182,
      "learning_rate": 6.545337137510461e-06,
      "loss": 0.0177,
      "step": 1068580
    },
    {
      "epoch": 1.7487873372478937,
      "grad_norm": 0.9387437105178833,
      "learning_rate": 6.545271245296944e-06,
      "loss": 0.0244,
      "step": 1068600
    },
    {
      "epoch": 1.7488200676865473,
      "grad_norm": 0.5139555335044861,
      "learning_rate": 6.545205353083427e-06,
      "loss": 0.0152,
      "step": 1068620
    },
    {
      "epoch": 1.7488527981252004,
      "grad_norm": 0.7128241658210754,
      "learning_rate": 6.545139460869909e-06,
      "loss": 0.0171,
      "step": 1068640
    },
    {
      "epoch": 1.7488855285638538,
      "grad_norm": 0.36791348457336426,
      "learning_rate": 6.545073568656393e-06,
      "loss": 0.019,
      "step": 1068660
    },
    {
      "epoch": 1.7489182590025072,
      "grad_norm": 4.942151069641113,
      "learning_rate": 6.545007676442875e-06,
      "loss": 0.0152,
      "step": 1068680
    },
    {
      "epoch": 1.7489509894411603,
      "grad_norm": 0.6884384751319885,
      "learning_rate": 6.5449417842293585e-06,
      "loss": 0.0138,
      "step": 1068700
    },
    {
      "epoch": 1.748983719879814,
      "grad_norm": 0.7828994393348694,
      "learning_rate": 6.54487589201584e-06,
      "loss": 0.0253,
      "step": 1068720
    },
    {
      "epoch": 1.749016450318467,
      "grad_norm": 0.14734527468681335,
      "learning_rate": 6.544809999802324e-06,
      "loss": 0.0162,
      "step": 1068740
    },
    {
      "epoch": 1.7490491807571205,
      "grad_norm": 0.2747246325016022,
      "learning_rate": 6.544744107588807e-06,
      "loss": 0.018,
      "step": 1068760
    },
    {
      "epoch": 1.7490819111957738,
      "grad_norm": 2.068194627761841,
      "learning_rate": 6.5446782153752894e-06,
      "loss": 0.0243,
      "step": 1068780
    },
    {
      "epoch": 1.7491146416344272,
      "grad_norm": 0.15091639757156372,
      "learning_rate": 6.544612323161772e-06,
      "loss": 0.0194,
      "step": 1068800
    },
    {
      "epoch": 1.7491473720730806,
      "grad_norm": 0.44823434948921204,
      "learning_rate": 6.544546430948256e-06,
      "loss": 0.0178,
      "step": 1068820
    },
    {
      "epoch": 1.7491801025117337,
      "grad_norm": 1.3347264528274536,
      "learning_rate": 6.5444805387347385e-06,
      "loss": 0.016,
      "step": 1068840
    },
    {
      "epoch": 1.7492128329503873,
      "grad_norm": 0.11552223563194275,
      "learning_rate": 6.544414646521221e-06,
      "loss": 0.0136,
      "step": 1068860
    },
    {
      "epoch": 1.7492455633890405,
      "grad_norm": 0.9991870522499084,
      "learning_rate": 6.544348754307705e-06,
      "loss": 0.0259,
      "step": 1068880
    },
    {
      "epoch": 1.7492782938276938,
      "grad_norm": 0.18314695358276367,
      "learning_rate": 6.544282862094187e-06,
      "loss": 0.0168,
      "step": 1068900
    },
    {
      "epoch": 1.7493110242663472,
      "grad_norm": 0.543485164642334,
      "learning_rate": 6.54421696988067e-06,
      "loss": 0.0157,
      "step": 1068920
    },
    {
      "epoch": 1.7493437547050006,
      "grad_norm": 0.1793241947889328,
      "learning_rate": 6.544151077667152e-06,
      "loss": 0.0189,
      "step": 1068940
    },
    {
      "epoch": 1.749376485143654,
      "grad_norm": 0.09647074341773987,
      "learning_rate": 6.544085185453636e-06,
      "loss": 0.0183,
      "step": 1068960
    },
    {
      "epoch": 1.749409215582307,
      "grad_norm": 0.18502302467823029,
      "learning_rate": 6.544019293240118e-06,
      "loss": 0.0197,
      "step": 1068980
    },
    {
      "epoch": 1.7494419460209607,
      "grad_norm": 0.8373228907585144,
      "learning_rate": 6.543953401026601e-06,
      "loss": 0.0142,
      "step": 1069000
    },
    {
      "epoch": 1.7494746764596139,
      "grad_norm": 0.7520425319671631,
      "learning_rate": 6.543887508813084e-06,
      "loss": 0.0279,
      "step": 1069020
    },
    {
      "epoch": 1.7495074068982672,
      "grad_norm": 2.1376266479492188,
      "learning_rate": 6.543821616599567e-06,
      "loss": 0.0211,
      "step": 1069040
    },
    {
      "epoch": 1.7495401373369206,
      "grad_norm": 0.7095142006874084,
      "learning_rate": 6.5437557243860495e-06,
      "loss": 0.0198,
      "step": 1069060
    },
    {
      "epoch": 1.749572867775574,
      "grad_norm": 0.30392172932624817,
      "learning_rate": 6.543689832172533e-06,
      "loss": 0.0147,
      "step": 1069080
    },
    {
      "epoch": 1.7496055982142273,
      "grad_norm": 0.8348641991615295,
      "learning_rate": 6.543623939959015e-06,
      "loss": 0.015,
      "step": 1069100
    },
    {
      "epoch": 1.7496383286528805,
      "grad_norm": 0.4848232567310333,
      "learning_rate": 6.5435580477454986e-06,
      "loss": 0.0287,
      "step": 1069120
    },
    {
      "epoch": 1.749671059091534,
      "grad_norm": 0.5278375148773193,
      "learning_rate": 6.5434921555319805e-06,
      "loss": 0.0247,
      "step": 1069140
    },
    {
      "epoch": 1.7497037895301872,
      "grad_norm": 0.8703275322914124,
      "learning_rate": 6.543426263318464e-06,
      "loss": 0.0152,
      "step": 1069160
    },
    {
      "epoch": 1.7497365199688406,
      "grad_norm": 0.2591869533061981,
      "learning_rate": 6.543360371104948e-06,
      "loss": 0.0175,
      "step": 1069180
    },
    {
      "epoch": 1.749769250407494,
      "grad_norm": 0.3680826723575592,
      "learning_rate": 6.5432944788914295e-06,
      "loss": 0.0211,
      "step": 1069200
    },
    {
      "epoch": 1.7498019808461474,
      "grad_norm": 0.4885849356651306,
      "learning_rate": 6.543228586677913e-06,
      "loss": 0.0113,
      "step": 1069220
    },
    {
      "epoch": 1.7498347112848007,
      "grad_norm": 0.734628438949585,
      "learning_rate": 6.543162694464396e-06,
      "loss": 0.0227,
      "step": 1069240
    },
    {
      "epoch": 1.7498674417234539,
      "grad_norm": 0.2503986358642578,
      "learning_rate": 6.543096802250879e-06,
      "loss": 0.0211,
      "step": 1069260
    },
    {
      "epoch": 1.7499001721621075,
      "grad_norm": 0.7670130729675293,
      "learning_rate": 6.543030910037361e-06,
      "loss": 0.0209,
      "step": 1069280
    },
    {
      "epoch": 1.7499329026007606,
      "grad_norm": 0.28024137020111084,
      "learning_rate": 6.542965017823845e-06,
      "loss": 0.0146,
      "step": 1069300
    },
    {
      "epoch": 1.749965633039414,
      "grad_norm": 0.21462157368659973,
      "learning_rate": 6.542899125610327e-06,
      "loss": 0.0226,
      "step": 1069320
    },
    {
      "epoch": 1.7499983634780674,
      "grad_norm": 0.6693316698074341,
      "learning_rate": 6.54283323339681e-06,
      "loss": 0.019,
      "step": 1069340
    },
    {
      "epoch": 1.7500310939167205,
      "grad_norm": 0.1340951919555664,
      "learning_rate": 6.542767341183292e-06,
      "loss": 0.0279,
      "step": 1069360
    },
    {
      "epoch": 1.750063824355374,
      "grad_norm": 0.791275143623352,
      "learning_rate": 6.542701448969776e-06,
      "loss": 0.0136,
      "step": 1069380
    },
    {
      "epoch": 1.7500965547940273,
      "grad_norm": 0.3575764000415802,
      "learning_rate": 6.542635556756259e-06,
      "loss": 0.0112,
      "step": 1069400
    },
    {
      "epoch": 1.7501292852326809,
      "grad_norm": 0.36609217524528503,
      "learning_rate": 6.542569664542741e-06,
      "loss": 0.0194,
      "step": 1069420
    },
    {
      "epoch": 1.750162015671334,
      "grad_norm": 0.4475347697734833,
      "learning_rate": 6.542503772329224e-06,
      "loss": 0.0202,
      "step": 1069440
    },
    {
      "epoch": 1.7501947461099874,
      "grad_norm": 0.38343295454978943,
      "learning_rate": 6.542437880115708e-06,
      "loss": 0.018,
      "step": 1069460
    },
    {
      "epoch": 1.7502274765486407,
      "grad_norm": 0.4892214238643646,
      "learning_rate": 6.5423719879021896e-06,
      "loss": 0.0303,
      "step": 1069480
    },
    {
      "epoch": 1.750260206987294,
      "grad_norm": 1.9524427652359009,
      "learning_rate": 6.542306095688673e-06,
      "loss": 0.0173,
      "step": 1069500
    },
    {
      "epoch": 1.7502929374259475,
      "grad_norm": 0.7591930031776428,
      "learning_rate": 6.542240203475157e-06,
      "loss": 0.0187,
      "step": 1069520
    },
    {
      "epoch": 1.7503256678646006,
      "grad_norm": 0.2656594216823578,
      "learning_rate": 6.542174311261639e-06,
      "loss": 0.016,
      "step": 1069540
    },
    {
      "epoch": 1.750358398303254,
      "grad_norm": 0.5342859029769897,
      "learning_rate": 6.542108419048122e-06,
      "loss": 0.0159,
      "step": 1069560
    },
    {
      "epoch": 1.7503911287419074,
      "grad_norm": 0.49275878071784973,
      "learning_rate": 6.542042526834604e-06,
      "loss": 0.023,
      "step": 1069580
    },
    {
      "epoch": 1.7504238591805608,
      "grad_norm": 0.7349313497543335,
      "learning_rate": 6.541976634621088e-06,
      "loss": 0.0217,
      "step": 1069600
    },
    {
      "epoch": 1.7504565896192141,
      "grad_norm": 0.45818328857421875,
      "learning_rate": 6.5419107424075704e-06,
      "loss": 0.0247,
      "step": 1069620
    },
    {
      "epoch": 1.7504893200578673,
      "grad_norm": 0.40161198377609253,
      "learning_rate": 6.541844850194053e-06,
      "loss": 0.0197,
      "step": 1069640
    },
    {
      "epoch": 1.7505220504965209,
      "grad_norm": 0.9600669145584106,
      "learning_rate": 6.541778957980536e-06,
      "loss": 0.0235,
      "step": 1069660
    },
    {
      "epoch": 1.750554780935174,
      "grad_norm": 0.20057068765163422,
      "learning_rate": 6.5417130657670195e-06,
      "loss": 0.0159,
      "step": 1069680
    },
    {
      "epoch": 1.7505875113738274,
      "grad_norm": 0.43273207545280457,
      "learning_rate": 6.541647173553501e-06,
      "loss": 0.02,
      "step": 1069700
    },
    {
      "epoch": 1.7506202418124808,
      "grad_norm": 0.5814642906188965,
      "learning_rate": 6.541581281339985e-06,
      "loss": 0.0209,
      "step": 1069720
    },
    {
      "epoch": 1.7506529722511341,
      "grad_norm": 0.11371693015098572,
      "learning_rate": 6.541515389126467e-06,
      "loss": 0.013,
      "step": 1069740
    },
    {
      "epoch": 1.7506857026897875,
      "grad_norm": 0.6535177826881409,
      "learning_rate": 6.5414494969129505e-06,
      "loss": 0.0198,
      "step": 1069760
    },
    {
      "epoch": 1.7507184331284407,
      "grad_norm": 0.42616501450538635,
      "learning_rate": 6.541383604699433e-06,
      "loss": 0.035,
      "step": 1069780
    },
    {
      "epoch": 1.7507511635670943,
      "grad_norm": 0.3441864550113678,
      "learning_rate": 6.541317712485916e-06,
      "loss": 0.0163,
      "step": 1069800
    },
    {
      "epoch": 1.7507838940057474,
      "grad_norm": 0.6648322939872742,
      "learning_rate": 6.541251820272399e-06,
      "loss": 0.029,
      "step": 1069820
    },
    {
      "epoch": 1.7508166244444008,
      "grad_norm": 0.8083367347717285,
      "learning_rate": 6.541185928058882e-06,
      "loss": 0.0212,
      "step": 1069840
    },
    {
      "epoch": 1.7508493548830542,
      "grad_norm": 2.7124440670013428,
      "learning_rate": 6.541120035845364e-06,
      "loss": 0.0253,
      "step": 1069860
    },
    {
      "epoch": 1.7508820853217075,
      "grad_norm": 0.6170055866241455,
      "learning_rate": 6.541054143631848e-06,
      "loss": 0.0206,
      "step": 1069880
    },
    {
      "epoch": 1.750914815760361,
      "grad_norm": 0.2992909252643585,
      "learning_rate": 6.540988251418331e-06,
      "loss": 0.0225,
      "step": 1069900
    },
    {
      "epoch": 1.750947546199014,
      "grad_norm": 0.10291250795125961,
      "learning_rate": 6.540922359204813e-06,
      "loss": 0.0168,
      "step": 1069920
    },
    {
      "epoch": 1.7509802766376676,
      "grad_norm": 0.4744754135608673,
      "learning_rate": 6.540856466991297e-06,
      "loss": 0.0215,
      "step": 1069940
    },
    {
      "epoch": 1.7510130070763208,
      "grad_norm": 0.7025232315063477,
      "learning_rate": 6.540790574777779e-06,
      "loss": 0.0167,
      "step": 1069960
    },
    {
      "epoch": 1.7510457375149742,
      "grad_norm": 0.3359362781047821,
      "learning_rate": 6.540724682564262e-06,
      "loss": 0.0251,
      "step": 1069980
    },
    {
      "epoch": 1.7510784679536275,
      "grad_norm": 0.5294713973999023,
      "learning_rate": 6.540658790350744e-06,
      "loss": 0.0171,
      "step": 1070000
    },
    {
      "epoch": 1.7511111983922807,
      "grad_norm": 0.13856053352355957,
      "learning_rate": 6.540592898137228e-06,
      "loss": 0.0199,
      "step": 1070020
    },
    {
      "epoch": 1.7511439288309343,
      "grad_norm": 0.6761851906776428,
      "learning_rate": 6.5405270059237105e-06,
      "loss": 0.0155,
      "step": 1070040
    },
    {
      "epoch": 1.7511766592695874,
      "grad_norm": 0.388033390045166,
      "learning_rate": 6.540461113710193e-06,
      "loss": 0.0143,
      "step": 1070060
    },
    {
      "epoch": 1.751209389708241,
      "grad_norm": 0.7874389886856079,
      "learning_rate": 6.540395221496676e-06,
      "loss": 0.0183,
      "step": 1070080
    },
    {
      "epoch": 1.7512421201468942,
      "grad_norm": 0.48849356174468994,
      "learning_rate": 6.54032932928316e-06,
      "loss": 0.0143,
      "step": 1070100
    },
    {
      "epoch": 1.7512748505855475,
      "grad_norm": 0.5575945377349854,
      "learning_rate": 6.5402634370696415e-06,
      "loss": 0.0171,
      "step": 1070120
    },
    {
      "epoch": 1.751307581024201,
      "grad_norm": 0.5373278260231018,
      "learning_rate": 6.540197544856125e-06,
      "loss": 0.0127,
      "step": 1070140
    },
    {
      "epoch": 1.751340311462854,
      "grad_norm": 0.7475212216377258,
      "learning_rate": 6.540131652642607e-06,
      "loss": 0.0169,
      "step": 1070160
    },
    {
      "epoch": 1.7513730419015077,
      "grad_norm": 0.9366459250450134,
      "learning_rate": 6.5400657604290905e-06,
      "loss": 0.0203,
      "step": 1070180
    },
    {
      "epoch": 1.7514057723401608,
      "grad_norm": 0.21482045948505402,
      "learning_rate": 6.539999868215573e-06,
      "loss": 0.0184,
      "step": 1070200
    },
    {
      "epoch": 1.7514385027788142,
      "grad_norm": 0.37514230608940125,
      "learning_rate": 6.539933976002056e-06,
      "loss": 0.0199,
      "step": 1070220
    },
    {
      "epoch": 1.7514712332174676,
      "grad_norm": 0.26011642813682556,
      "learning_rate": 6.53986808378854e-06,
      "loss": 0.0192,
      "step": 1070240
    },
    {
      "epoch": 1.751503963656121,
      "grad_norm": 0.555847704410553,
      "learning_rate": 6.539802191575022e-06,
      "loss": 0.0182,
      "step": 1070260
    },
    {
      "epoch": 1.7515366940947743,
      "grad_norm": 0.463248074054718,
      "learning_rate": 6.539736299361505e-06,
      "loss": 0.0147,
      "step": 1070280
    },
    {
      "epoch": 1.7515694245334275,
      "grad_norm": 0.495543897151947,
      "learning_rate": 6.539670407147988e-06,
      "loss": 0.0218,
      "step": 1070300
    },
    {
      "epoch": 1.751602154972081,
      "grad_norm": 0.2439783215522766,
      "learning_rate": 6.539604514934471e-06,
      "loss": 0.0205,
      "step": 1070320
    },
    {
      "epoch": 1.7516348854107342,
      "grad_norm": 0.5293930768966675,
      "learning_rate": 6.539538622720953e-06,
      "loss": 0.0202,
      "step": 1070340
    },
    {
      "epoch": 1.7516676158493876,
      "grad_norm": 0.7059395909309387,
      "learning_rate": 6.539472730507437e-06,
      "loss": 0.0209,
      "step": 1070360
    },
    {
      "epoch": 1.751700346288041,
      "grad_norm": 0.4622969329357147,
      "learning_rate": 6.539406838293919e-06,
      "loss": 0.0166,
      "step": 1070380
    },
    {
      "epoch": 1.7517330767266943,
      "grad_norm": 0.4668121635913849,
      "learning_rate": 6.539340946080402e-06,
      "loss": 0.0239,
      "step": 1070400
    },
    {
      "epoch": 1.7517658071653477,
      "grad_norm": 0.9394646286964417,
      "learning_rate": 6.539275053866885e-06,
      "loss": 0.0144,
      "step": 1070420
    },
    {
      "epoch": 1.7517985376040008,
      "grad_norm": 0.5086357593536377,
      "learning_rate": 6.539209161653368e-06,
      "loss": 0.0217,
      "step": 1070440
    },
    {
      "epoch": 1.7518312680426544,
      "grad_norm": 0.6907349228858948,
      "learning_rate": 6.539143269439851e-06,
      "loss": 0.0206,
      "step": 1070460
    },
    {
      "epoch": 1.7518639984813076,
      "grad_norm": 0.30215781927108765,
      "learning_rate": 6.539077377226334e-06,
      "loss": 0.0199,
      "step": 1070480
    },
    {
      "epoch": 1.751896728919961,
      "grad_norm": 0.4691944420337677,
      "learning_rate": 6.539011485012816e-06,
      "loss": 0.0232,
      "step": 1070500
    },
    {
      "epoch": 1.7519294593586143,
      "grad_norm": 0.19930537045001984,
      "learning_rate": 6.5389455927993e-06,
      "loss": 0.0207,
      "step": 1070520
    },
    {
      "epoch": 1.7519621897972677,
      "grad_norm": 0.17783479392528534,
      "learning_rate": 6.5388797005857816e-06,
      "loss": 0.0203,
      "step": 1070540
    },
    {
      "epoch": 1.751994920235921,
      "grad_norm": 0.50310218334198,
      "learning_rate": 6.538813808372265e-06,
      "loss": 0.0246,
      "step": 1070560
    },
    {
      "epoch": 1.7520276506745742,
      "grad_norm": 0.43112659454345703,
      "learning_rate": 6.538747916158749e-06,
      "loss": 0.0228,
      "step": 1070580
    },
    {
      "epoch": 1.7520603811132278,
      "grad_norm": 0.3407886326313019,
      "learning_rate": 6.538682023945231e-06,
      "loss": 0.0154,
      "step": 1070600
    },
    {
      "epoch": 1.752093111551881,
      "grad_norm": 0.2171960473060608,
      "learning_rate": 6.538616131731714e-06,
      "loss": 0.017,
      "step": 1070620
    },
    {
      "epoch": 1.7521258419905343,
      "grad_norm": 1.4581559896469116,
      "learning_rate": 6.538550239518197e-06,
      "loss": 0.0222,
      "step": 1070640
    },
    {
      "epoch": 1.7521585724291877,
      "grad_norm": 0.45339328050613403,
      "learning_rate": 6.53848434730468e-06,
      "loss": 0.0203,
      "step": 1070660
    },
    {
      "epoch": 1.752191302867841,
      "grad_norm": 0.48824751377105713,
      "learning_rate": 6.5384184550911624e-06,
      "loss": 0.0118,
      "step": 1070680
    },
    {
      "epoch": 1.7522240333064945,
      "grad_norm": 0.48771849274635315,
      "learning_rate": 6.538352562877646e-06,
      "loss": 0.0215,
      "step": 1070700
    },
    {
      "epoch": 1.7522567637451476,
      "grad_norm": 0.3134502172470093,
      "learning_rate": 6.538286670664128e-06,
      "loss": 0.0241,
      "step": 1070720
    },
    {
      "epoch": 1.7522894941838012,
      "grad_norm": 0.7819608449935913,
      "learning_rate": 6.5382207784506115e-06,
      "loss": 0.0172,
      "step": 1070740
    },
    {
      "epoch": 1.7523222246224543,
      "grad_norm": 0.6001984477043152,
      "learning_rate": 6.538154886237093e-06,
      "loss": 0.0161,
      "step": 1070760
    },
    {
      "epoch": 1.7523549550611077,
      "grad_norm": 0.7173269987106323,
      "learning_rate": 6.538088994023577e-06,
      "loss": 0.0178,
      "step": 1070780
    },
    {
      "epoch": 1.752387685499761,
      "grad_norm": 1.079147458076477,
      "learning_rate": 6.538023101810059e-06,
      "loss": 0.0196,
      "step": 1070800
    },
    {
      "epoch": 1.7524204159384142,
      "grad_norm": 0.3743428587913513,
      "learning_rate": 6.5379572095965424e-06,
      "loss": 0.0181,
      "step": 1070820
    },
    {
      "epoch": 1.7524531463770678,
      "grad_norm": 1.7941290140151978,
      "learning_rate": 6.537891317383025e-06,
      "loss": 0.0239,
      "step": 1070840
    },
    {
      "epoch": 1.752485876815721,
      "grad_norm": 0.7489839792251587,
      "learning_rate": 6.537825425169509e-06,
      "loss": 0.0168,
      "step": 1070860
    },
    {
      "epoch": 1.7525186072543746,
      "grad_norm": 0.6119499802589417,
      "learning_rate": 6.537759532955991e-06,
      "loss": 0.021,
      "step": 1070880
    },
    {
      "epoch": 1.7525513376930277,
      "grad_norm": 1.2441622018814087,
      "learning_rate": 6.537693640742474e-06,
      "loss": 0.0241,
      "step": 1070900
    },
    {
      "epoch": 1.752584068131681,
      "grad_norm": 0.3863360583782196,
      "learning_rate": 6.537627748528956e-06,
      "loss": 0.0213,
      "step": 1070920
    },
    {
      "epoch": 1.7526167985703345,
      "grad_norm": 0.24601750075817108,
      "learning_rate": 6.53756185631544e-06,
      "loss": 0.0236,
      "step": 1070940
    },
    {
      "epoch": 1.7526495290089876,
      "grad_norm": 0.6172472834587097,
      "learning_rate": 6.537495964101923e-06,
      "loss": 0.0135,
      "step": 1070960
    },
    {
      "epoch": 1.7526822594476412,
      "grad_norm": 0.7691872119903564,
      "learning_rate": 6.537430071888405e-06,
      "loss": 0.0224,
      "step": 1070980
    },
    {
      "epoch": 1.7527149898862944,
      "grad_norm": 0.5106704235076904,
      "learning_rate": 6.537364179674889e-06,
      "loss": 0.0121,
      "step": 1071000
    },
    {
      "epoch": 1.7527477203249477,
      "grad_norm": 0.43231385946273804,
      "learning_rate": 6.537298287461371e-06,
      "loss": 0.019,
      "step": 1071020
    },
    {
      "epoch": 1.7527804507636011,
      "grad_norm": 0.3369646966457367,
      "learning_rate": 6.537232395247854e-06,
      "loss": 0.0306,
      "step": 1071040
    },
    {
      "epoch": 1.7528131812022545,
      "grad_norm": 0.5138657689094543,
      "learning_rate": 6.537166503034337e-06,
      "loss": 0.0169,
      "step": 1071060
    },
    {
      "epoch": 1.7528459116409079,
      "grad_norm": 1.4067152738571167,
      "learning_rate": 6.53710061082082e-06,
      "loss": 0.0186,
      "step": 1071080
    },
    {
      "epoch": 1.752878642079561,
      "grad_norm": 0.3711644113063812,
      "learning_rate": 6.5370347186073025e-06,
      "loss": 0.0159,
      "step": 1071100
    },
    {
      "epoch": 1.7529113725182146,
      "grad_norm": 0.16116441786289215,
      "learning_rate": 6.536968826393786e-06,
      "loss": 0.0269,
      "step": 1071120
    },
    {
      "epoch": 1.7529441029568678,
      "grad_norm": 0.45395681262016296,
      "learning_rate": 6.536902934180268e-06,
      "loss": 0.0186,
      "step": 1071140
    },
    {
      "epoch": 1.7529768333955211,
      "grad_norm": 0.22388455271720886,
      "learning_rate": 6.5368370419667516e-06,
      "loss": 0.0168,
      "step": 1071160
    },
    {
      "epoch": 1.7530095638341745,
      "grad_norm": 0.6044869422912598,
      "learning_rate": 6.5367711497532335e-06,
      "loss": 0.0186,
      "step": 1071180
    },
    {
      "epoch": 1.7530422942728279,
      "grad_norm": 0.28030455112457275,
      "learning_rate": 6.536705257539717e-06,
      "loss": 0.0172,
      "step": 1071200
    },
    {
      "epoch": 1.7530750247114812,
      "grad_norm": 0.37852492928504944,
      "learning_rate": 6.5366393653262e-06,
      "loss": 0.0163,
      "step": 1071220
    },
    {
      "epoch": 1.7531077551501344,
      "grad_norm": 0.40259161591529846,
      "learning_rate": 6.5365734731126825e-06,
      "loss": 0.0167,
      "step": 1071240
    },
    {
      "epoch": 1.753140485588788,
      "grad_norm": 0.39548152685165405,
      "learning_rate": 6.536507580899165e-06,
      "loss": 0.0189,
      "step": 1071260
    },
    {
      "epoch": 1.7531732160274411,
      "grad_norm": 0.1229148656129837,
      "learning_rate": 6.536441688685649e-06,
      "loss": 0.0211,
      "step": 1071280
    },
    {
      "epoch": 1.7532059464660945,
      "grad_norm": 0.45634689927101135,
      "learning_rate": 6.536375796472132e-06,
      "loss": 0.0161,
      "step": 1071300
    },
    {
      "epoch": 1.7532386769047479,
      "grad_norm": 0.634925127029419,
      "learning_rate": 6.536309904258614e-06,
      "loss": 0.0207,
      "step": 1071320
    },
    {
      "epoch": 1.7532714073434013,
      "grad_norm": 1.9556934833526611,
      "learning_rate": 6.536244012045098e-06,
      "loss": 0.0254,
      "step": 1071340
    },
    {
      "epoch": 1.7533041377820546,
      "grad_norm": 0.32978856563568115,
      "learning_rate": 6.53617811983158e-06,
      "loss": 0.0239,
      "step": 1071360
    },
    {
      "epoch": 1.7533368682207078,
      "grad_norm": 0.7100169062614441,
      "learning_rate": 6.536112227618063e-06,
      "loss": 0.0199,
      "step": 1071380
    },
    {
      "epoch": 1.7533695986593614,
      "grad_norm": 0.3222922384738922,
      "learning_rate": 6.536046335404545e-06,
      "loss": 0.0131,
      "step": 1071400
    },
    {
      "epoch": 1.7534023290980145,
      "grad_norm": 0.8678998947143555,
      "learning_rate": 6.535980443191029e-06,
      "loss": 0.0237,
      "step": 1071420
    },
    {
      "epoch": 1.753435059536668,
      "grad_norm": 0.5586970448493958,
      "learning_rate": 6.535914550977512e-06,
      "loss": 0.0131,
      "step": 1071440
    },
    {
      "epoch": 1.7534677899753213,
      "grad_norm": 0.6389495730400085,
      "learning_rate": 6.535848658763994e-06,
      "loss": 0.0247,
      "step": 1071460
    },
    {
      "epoch": 1.7535005204139746,
      "grad_norm": 0.4805712103843689,
      "learning_rate": 6.535782766550477e-06,
      "loss": 0.0173,
      "step": 1071480
    },
    {
      "epoch": 1.753533250852628,
      "grad_norm": 1.3676073551177979,
      "learning_rate": 6.535716874336961e-06,
      "loss": 0.021,
      "step": 1071500
    },
    {
      "epoch": 1.7535659812912812,
      "grad_norm": 1.213234543800354,
      "learning_rate": 6.5356509821234426e-06,
      "loss": 0.0217,
      "step": 1071520
    },
    {
      "epoch": 1.7535987117299348,
      "grad_norm": 0.9420297741889954,
      "learning_rate": 6.535585089909926e-06,
      "loss": 0.0164,
      "step": 1071540
    },
    {
      "epoch": 1.753631442168588,
      "grad_norm": 8.45591926574707,
      "learning_rate": 6.535519197696408e-06,
      "loss": 0.011,
      "step": 1071560
    },
    {
      "epoch": 1.7536641726072413,
      "grad_norm": 2.268899917602539,
      "learning_rate": 6.535453305482892e-06,
      "loss": 0.0168,
      "step": 1071580
    },
    {
      "epoch": 1.7536969030458947,
      "grad_norm": 0.7952300906181335,
      "learning_rate": 6.535387413269374e-06,
      "loss": 0.0212,
      "step": 1071600
    },
    {
      "epoch": 1.7537296334845478,
      "grad_norm": 0.7742933630943298,
      "learning_rate": 6.535321521055857e-06,
      "loss": 0.0177,
      "step": 1071620
    },
    {
      "epoch": 1.7537623639232014,
      "grad_norm": 0.9391811490058899,
      "learning_rate": 6.535255628842341e-06,
      "loss": 0.0192,
      "step": 1071640
    },
    {
      "epoch": 1.7537950943618545,
      "grad_norm": 0.3478800058364868,
      "learning_rate": 6.5351897366288234e-06,
      "loss": 0.0195,
      "step": 1071660
    },
    {
      "epoch": 1.7538278248005081,
      "grad_norm": 0.7913644909858704,
      "learning_rate": 6.535123844415306e-06,
      "loss": 0.0271,
      "step": 1071680
    },
    {
      "epoch": 1.7538605552391613,
      "grad_norm": 0.18101981282234192,
      "learning_rate": 6.535057952201789e-06,
      "loss": 0.0186,
      "step": 1071700
    },
    {
      "epoch": 1.7538932856778147,
      "grad_norm": 0.3615734875202179,
      "learning_rate": 6.5349920599882725e-06,
      "loss": 0.0184,
      "step": 1071720
    },
    {
      "epoch": 1.753926016116468,
      "grad_norm": 0.4117189943790436,
      "learning_rate": 6.534926167774754e-06,
      "loss": 0.0205,
      "step": 1071740
    },
    {
      "epoch": 1.7539587465551212,
      "grad_norm": 0.5905424356460571,
      "learning_rate": 6.534860275561238e-06,
      "loss": 0.0135,
      "step": 1071760
    },
    {
      "epoch": 1.7539914769937748,
      "grad_norm": 0.8235834240913391,
      "learning_rate": 6.53479438334772e-06,
      "loss": 0.0197,
      "step": 1071780
    },
    {
      "epoch": 1.754024207432428,
      "grad_norm": 0.33032023906707764,
      "learning_rate": 6.5347284911342035e-06,
      "loss": 0.0155,
      "step": 1071800
    },
    {
      "epoch": 1.7540569378710813,
      "grad_norm": 0.7079257965087891,
      "learning_rate": 6.534662598920685e-06,
      "loss": 0.019,
      "step": 1071820
    },
    {
      "epoch": 1.7540896683097347,
      "grad_norm": 1.0475964546203613,
      "learning_rate": 6.534596706707169e-06,
      "loss": 0.0301,
      "step": 1071840
    },
    {
      "epoch": 1.754122398748388,
      "grad_norm": 0.43419691920280457,
      "learning_rate": 6.534530814493652e-06,
      "loss": 0.0164,
      "step": 1071860
    },
    {
      "epoch": 1.7541551291870414,
      "grad_norm": 0.9762334227561951,
      "learning_rate": 6.5344649222801344e-06,
      "loss": 0.0235,
      "step": 1071880
    },
    {
      "epoch": 1.7541878596256946,
      "grad_norm": 0.16717629134655,
      "learning_rate": 6.534399030066617e-06,
      "loss": 0.0178,
      "step": 1071900
    },
    {
      "epoch": 1.7542205900643482,
      "grad_norm": 0.40545186400413513,
      "learning_rate": 6.534333137853101e-06,
      "loss": 0.0196,
      "step": 1071920
    },
    {
      "epoch": 1.7542533205030013,
      "grad_norm": 0.5317044854164124,
      "learning_rate": 6.534267245639583e-06,
      "loss": 0.015,
      "step": 1071940
    },
    {
      "epoch": 1.7542860509416547,
      "grad_norm": 0.513684868812561,
      "learning_rate": 6.534201353426066e-06,
      "loss": 0.0136,
      "step": 1071960
    },
    {
      "epoch": 1.754318781380308,
      "grad_norm": 0.9354561567306519,
      "learning_rate": 6.53413546121255e-06,
      "loss": 0.0178,
      "step": 1071980
    },
    {
      "epoch": 1.7543515118189614,
      "grad_norm": 0.21078087389469147,
      "learning_rate": 6.534069568999032e-06,
      "loss": 0.0209,
      "step": 1072000
    },
    {
      "epoch": 1.7543842422576148,
      "grad_norm": 0.3436591923236847,
      "learning_rate": 6.534003676785515e-06,
      "loss": 0.0239,
      "step": 1072020
    },
    {
      "epoch": 1.754416972696268,
      "grad_norm": 0.3150566816329956,
      "learning_rate": 6.533937784571997e-06,
      "loss": 0.0199,
      "step": 1072040
    },
    {
      "epoch": 1.7544497031349215,
      "grad_norm": 0.15295647084712982,
      "learning_rate": 6.533871892358481e-06,
      "loss": 0.013,
      "step": 1072060
    },
    {
      "epoch": 1.7544824335735747,
      "grad_norm": 1.4685145616531372,
      "learning_rate": 6.5338060001449635e-06,
      "loss": 0.0271,
      "step": 1072080
    },
    {
      "epoch": 1.754515164012228,
      "grad_norm": 0.11188399791717529,
      "learning_rate": 6.533740107931446e-06,
      "loss": 0.0216,
      "step": 1072100
    },
    {
      "epoch": 1.7545478944508814,
      "grad_norm": 0.4237339496612549,
      "learning_rate": 6.533674215717929e-06,
      "loss": 0.0188,
      "step": 1072120
    },
    {
      "epoch": 1.7545806248895348,
      "grad_norm": 0.29856550693511963,
      "learning_rate": 6.533608323504413e-06,
      "loss": 0.0242,
      "step": 1072140
    },
    {
      "epoch": 1.7546133553281882,
      "grad_norm": 0.5900043249130249,
      "learning_rate": 6.5335424312908945e-06,
      "loss": 0.019,
      "step": 1072160
    },
    {
      "epoch": 1.7546460857668413,
      "grad_norm": 0.9390532970428467,
      "learning_rate": 6.533476539077378e-06,
      "loss": 0.0179,
      "step": 1072180
    },
    {
      "epoch": 1.754678816205495,
      "grad_norm": 1.4062610864639282,
      "learning_rate": 6.53341064686386e-06,
      "loss": 0.0143,
      "step": 1072200
    },
    {
      "epoch": 1.754711546644148,
      "grad_norm": 0.637667715549469,
      "learning_rate": 6.5333447546503435e-06,
      "loss": 0.0156,
      "step": 1072220
    },
    {
      "epoch": 1.7547442770828015,
      "grad_norm": 0.8281390070915222,
      "learning_rate": 6.533278862436826e-06,
      "loss": 0.0156,
      "step": 1072240
    },
    {
      "epoch": 1.7547770075214548,
      "grad_norm": 0.30650222301483154,
      "learning_rate": 6.533212970223309e-06,
      "loss": 0.0127,
      "step": 1072260
    },
    {
      "epoch": 1.7548097379601082,
      "grad_norm": 0.3995860815048218,
      "learning_rate": 6.533147078009792e-06,
      "loss": 0.015,
      "step": 1072280
    },
    {
      "epoch": 1.7548424683987616,
      "grad_norm": 0.6187408566474915,
      "learning_rate": 6.533081185796275e-06,
      "loss": 0.0165,
      "step": 1072300
    },
    {
      "epoch": 1.7548751988374147,
      "grad_norm": 0.9854273200035095,
      "learning_rate": 6.533015293582757e-06,
      "loss": 0.0176,
      "step": 1072320
    },
    {
      "epoch": 1.7549079292760683,
      "grad_norm": 0.22278837859630585,
      "learning_rate": 6.532949401369241e-06,
      "loss": 0.0237,
      "step": 1072340
    },
    {
      "epoch": 1.7549406597147215,
      "grad_norm": 0.6785991191864014,
      "learning_rate": 6.532883509155724e-06,
      "loss": 0.0158,
      "step": 1072360
    },
    {
      "epoch": 1.7549733901533748,
      "grad_norm": 0.2611681818962097,
      "learning_rate": 6.532817616942206e-06,
      "loss": 0.0209,
      "step": 1072380
    },
    {
      "epoch": 1.7550061205920282,
      "grad_norm": 0.4459265470504761,
      "learning_rate": 6.53275172472869e-06,
      "loss": 0.0262,
      "step": 1072400
    },
    {
      "epoch": 1.7550388510306814,
      "grad_norm": 1.191934585571289,
      "learning_rate": 6.532685832515172e-06,
      "loss": 0.0195,
      "step": 1072420
    },
    {
      "epoch": 1.755071581469335,
      "grad_norm": 1.2821913957595825,
      "learning_rate": 6.532619940301655e-06,
      "loss": 0.0167,
      "step": 1072440
    },
    {
      "epoch": 1.755104311907988,
      "grad_norm": 0.14190039038658142,
      "learning_rate": 6.532554048088138e-06,
      "loss": 0.0162,
      "step": 1072460
    },
    {
      "epoch": 1.7551370423466417,
      "grad_norm": 2.494291305541992,
      "learning_rate": 6.532488155874621e-06,
      "loss": 0.0348,
      "step": 1072480
    },
    {
      "epoch": 1.7551697727852948,
      "grad_norm": 0.4574229419231415,
      "learning_rate": 6.532422263661104e-06,
      "loss": 0.0163,
      "step": 1072500
    },
    {
      "epoch": 1.7552025032239482,
      "grad_norm": 0.21240520477294922,
      "learning_rate": 6.532356371447587e-06,
      "loss": 0.0149,
      "step": 1072520
    },
    {
      "epoch": 1.7552352336626016,
      "grad_norm": 0.20405420660972595,
      "learning_rate": 6.532290479234069e-06,
      "loss": 0.019,
      "step": 1072540
    },
    {
      "epoch": 1.7552679641012547,
      "grad_norm": 0.19598941504955292,
      "learning_rate": 6.532224587020553e-06,
      "loss": 0.0203,
      "step": 1072560
    },
    {
      "epoch": 1.7553006945399083,
      "grad_norm": 0.6212730407714844,
      "learning_rate": 6.5321586948070346e-06,
      "loss": 0.0186,
      "step": 1072580
    },
    {
      "epoch": 1.7553334249785615,
      "grad_norm": 0.16309238970279694,
      "learning_rate": 6.532092802593518e-06,
      "loss": 0.0148,
      "step": 1072600
    },
    {
      "epoch": 1.7553661554172149,
      "grad_norm": 0.06730709969997406,
      "learning_rate": 6.532026910380001e-06,
      "loss": 0.0171,
      "step": 1072620
    },
    {
      "epoch": 1.7553988858558682,
      "grad_norm": 0.7705890536308289,
      "learning_rate": 6.531961018166484e-06,
      "loss": 0.021,
      "step": 1072640
    },
    {
      "epoch": 1.7554316162945216,
      "grad_norm": 0.29360198974609375,
      "learning_rate": 6.531895125952966e-06,
      "loss": 0.0122,
      "step": 1072660
    },
    {
      "epoch": 1.755464346733175,
      "grad_norm": 0.5672102570533752,
      "learning_rate": 6.53182923373945e-06,
      "loss": 0.0251,
      "step": 1072680
    },
    {
      "epoch": 1.7554970771718281,
      "grad_norm": 0.43269962072372437,
      "learning_rate": 6.531763341525933e-06,
      "loss": 0.022,
      "step": 1072700
    },
    {
      "epoch": 1.7555298076104817,
      "grad_norm": 0.36054930090904236,
      "learning_rate": 6.5316974493124154e-06,
      "loss": 0.0199,
      "step": 1072720
    },
    {
      "epoch": 1.7555625380491349,
      "grad_norm": 0.42175477743148804,
      "learning_rate": 6.531631557098899e-06,
      "loss": 0.0203,
      "step": 1072740
    },
    {
      "epoch": 1.7555952684877882,
      "grad_norm": 1.3208993673324585,
      "learning_rate": 6.531565664885381e-06,
      "loss": 0.0257,
      "step": 1072760
    },
    {
      "epoch": 1.7556279989264416,
      "grad_norm": 1.1929247379302979,
      "learning_rate": 6.5314997726718645e-06,
      "loss": 0.0154,
      "step": 1072780
    },
    {
      "epoch": 1.755660729365095,
      "grad_norm": 0.4605531096458435,
      "learning_rate": 6.531433880458346e-06,
      "loss": 0.0224,
      "step": 1072800
    },
    {
      "epoch": 1.7556934598037484,
      "grad_norm": 1.1192255020141602,
      "learning_rate": 6.53136798824483e-06,
      "loss": 0.019,
      "step": 1072820
    },
    {
      "epoch": 1.7557261902424015,
      "grad_norm": 0.21974298357963562,
      "learning_rate": 6.531302096031312e-06,
      "loss": 0.0241,
      "step": 1072840
    },
    {
      "epoch": 1.755758920681055,
      "grad_norm": 1.4613598585128784,
      "learning_rate": 6.5312362038177955e-06,
      "loss": 0.015,
      "step": 1072860
    },
    {
      "epoch": 1.7557916511197083,
      "grad_norm": 1.197355031967163,
      "learning_rate": 6.531170311604278e-06,
      "loss": 0.0114,
      "step": 1072880
    },
    {
      "epoch": 1.7558243815583616,
      "grad_norm": 0.5832632184028625,
      "learning_rate": 6.531104419390761e-06,
      "loss": 0.0211,
      "step": 1072900
    },
    {
      "epoch": 1.755857111997015,
      "grad_norm": 0.3989487588405609,
      "learning_rate": 6.531038527177244e-06,
      "loss": 0.0162,
      "step": 1072920
    },
    {
      "epoch": 1.7558898424356684,
      "grad_norm": 1.0493310689926147,
      "learning_rate": 6.530972634963727e-06,
      "loss": 0.0265,
      "step": 1072940
    },
    {
      "epoch": 1.7559225728743217,
      "grad_norm": 0.6483852863311768,
      "learning_rate": 6.530906742750209e-06,
      "loss": 0.0161,
      "step": 1072960
    },
    {
      "epoch": 1.755955303312975,
      "grad_norm": 0.3039095401763916,
      "learning_rate": 6.530840850536693e-06,
      "loss": 0.0167,
      "step": 1072980
    },
    {
      "epoch": 1.7559880337516285,
      "grad_norm": 1.9637404680252075,
      "learning_rate": 6.530774958323175e-06,
      "loss": 0.0257,
      "step": 1073000
    },
    {
      "epoch": 1.7560207641902816,
      "grad_norm": 0.5553398132324219,
      "learning_rate": 6.530709066109658e-06,
      "loss": 0.0156,
      "step": 1073020
    },
    {
      "epoch": 1.756053494628935,
      "grad_norm": 0.9921548366546631,
      "learning_rate": 6.530643173896142e-06,
      "loss": 0.0237,
      "step": 1073040
    },
    {
      "epoch": 1.7560862250675884,
      "grad_norm": 0.9324045181274414,
      "learning_rate": 6.530577281682624e-06,
      "loss": 0.016,
      "step": 1073060
    },
    {
      "epoch": 1.7561189555062415,
      "grad_norm": 0.18918152153491974,
      "learning_rate": 6.530511389469107e-06,
      "loss": 0.0154,
      "step": 1073080
    },
    {
      "epoch": 1.7561516859448951,
      "grad_norm": 0.176991805434227,
      "learning_rate": 6.53044549725559e-06,
      "loss": 0.0172,
      "step": 1073100
    },
    {
      "epoch": 1.7561844163835483,
      "grad_norm": 2.0561771392822266,
      "learning_rate": 6.530379605042073e-06,
      "loss": 0.0196,
      "step": 1073120
    },
    {
      "epoch": 1.7562171468222019,
      "grad_norm": 0.3965972065925598,
      "learning_rate": 6.5303137128285555e-06,
      "loss": 0.0192,
      "step": 1073140
    },
    {
      "epoch": 1.756249877260855,
      "grad_norm": 0.14335037767887115,
      "learning_rate": 6.530247820615039e-06,
      "loss": 0.0161,
      "step": 1073160
    },
    {
      "epoch": 1.7562826076995084,
      "grad_norm": 0.3276723623275757,
      "learning_rate": 6.530181928401521e-06,
      "loss": 0.0119,
      "step": 1073180
    },
    {
      "epoch": 1.7563153381381618,
      "grad_norm": 0.6086409091949463,
      "learning_rate": 6.5301160361880046e-06,
      "loss": 0.0237,
      "step": 1073200
    },
    {
      "epoch": 1.756348068576815,
      "grad_norm": 1.6655426025390625,
      "learning_rate": 6.5300501439744865e-06,
      "loss": 0.016,
      "step": 1073220
    },
    {
      "epoch": 1.7563807990154685,
      "grad_norm": 1.5053091049194336,
      "learning_rate": 6.52998425176097e-06,
      "loss": 0.0155,
      "step": 1073240
    },
    {
      "epoch": 1.7564135294541217,
      "grad_norm": 0.5732386708259583,
      "learning_rate": 6.529918359547453e-06,
      "loss": 0.018,
      "step": 1073260
    },
    {
      "epoch": 1.756446259892775,
      "grad_norm": 1.1865192651748657,
      "learning_rate": 6.5298524673339355e-06,
      "loss": 0.0144,
      "step": 1073280
    },
    {
      "epoch": 1.7564789903314284,
      "grad_norm": 0.2201758474111557,
      "learning_rate": 6.529786575120418e-06,
      "loss": 0.0149,
      "step": 1073300
    },
    {
      "epoch": 1.7565117207700818,
      "grad_norm": 0.89736407995224,
      "learning_rate": 6.529720682906902e-06,
      "loss": 0.0225,
      "step": 1073320
    },
    {
      "epoch": 1.7565444512087351,
      "grad_norm": 1.1932400465011597,
      "learning_rate": 6.529654790693384e-06,
      "loss": 0.0188,
      "step": 1073340
    },
    {
      "epoch": 1.7565771816473883,
      "grad_norm": 0.11051277816295624,
      "learning_rate": 6.529588898479867e-06,
      "loss": 0.0131,
      "step": 1073360
    },
    {
      "epoch": 1.756609912086042,
      "grad_norm": 0.32646143436431885,
      "learning_rate": 6.529523006266349e-06,
      "loss": 0.024,
      "step": 1073380
    },
    {
      "epoch": 1.756642642524695,
      "grad_norm": 0.5217735171318054,
      "learning_rate": 6.529457114052833e-06,
      "loss": 0.0176,
      "step": 1073400
    },
    {
      "epoch": 1.7566753729633484,
      "grad_norm": 0.37066948413848877,
      "learning_rate": 6.529391221839316e-06,
      "loss": 0.0167,
      "step": 1073420
    },
    {
      "epoch": 1.7567081034020018,
      "grad_norm": 0.31932374835014343,
      "learning_rate": 6.529325329625798e-06,
      "loss": 0.0225,
      "step": 1073440
    },
    {
      "epoch": 1.7567408338406552,
      "grad_norm": 0.799457311630249,
      "learning_rate": 6.529259437412282e-06,
      "loss": 0.0212,
      "step": 1073460
    },
    {
      "epoch": 1.7567735642793085,
      "grad_norm": 1.0335004329681396,
      "learning_rate": 6.529193545198765e-06,
      "loss": 0.0201,
      "step": 1073480
    },
    {
      "epoch": 1.7568062947179617,
      "grad_norm": 0.2804296910762787,
      "learning_rate": 6.529127652985247e-06,
      "loss": 0.0148,
      "step": 1073500
    },
    {
      "epoch": 1.7568390251566153,
      "grad_norm": 1.2053035497665405,
      "learning_rate": 6.52906176077173e-06,
      "loss": 0.0195,
      "step": 1073520
    },
    {
      "epoch": 1.7568717555952684,
      "grad_norm": 1.9546940326690674,
      "learning_rate": 6.528995868558214e-06,
      "loss": 0.0198,
      "step": 1073540
    },
    {
      "epoch": 1.7569044860339218,
      "grad_norm": 0.36619865894317627,
      "learning_rate": 6.528929976344696e-06,
      "loss": 0.0198,
      "step": 1073560
    },
    {
      "epoch": 1.7569372164725752,
      "grad_norm": 1.1024302244186401,
      "learning_rate": 6.528864084131179e-06,
      "loss": 0.021,
      "step": 1073580
    },
    {
      "epoch": 1.7569699469112285,
      "grad_norm": 1.234963059425354,
      "learning_rate": 6.528798191917661e-06,
      "loss": 0.0158,
      "step": 1073600
    },
    {
      "epoch": 1.757002677349882,
      "grad_norm": 1.612717628479004,
      "learning_rate": 6.528732299704145e-06,
      "loss": 0.018,
      "step": 1073620
    },
    {
      "epoch": 1.757035407788535,
      "grad_norm": 1.3163821697235107,
      "learning_rate": 6.528666407490627e-06,
      "loss": 0.0251,
      "step": 1073640
    },
    {
      "epoch": 1.7570681382271887,
      "grad_norm": 0.5990674495697021,
      "learning_rate": 6.52860051527711e-06,
      "loss": 0.0108,
      "step": 1073660
    },
    {
      "epoch": 1.7571008686658418,
      "grad_norm": 0.12783779203891754,
      "learning_rate": 6.528534623063593e-06,
      "loss": 0.016,
      "step": 1073680
    },
    {
      "epoch": 1.7571335991044952,
      "grad_norm": 0.25962239503860474,
      "learning_rate": 6.5284687308500765e-06,
      "loss": 0.0181,
      "step": 1073700
    },
    {
      "epoch": 1.7571663295431486,
      "grad_norm": 0.5367910265922546,
      "learning_rate": 6.528402838636558e-06,
      "loss": 0.0117,
      "step": 1073720
    },
    {
      "epoch": 1.757199059981802,
      "grad_norm": 0.49932023882865906,
      "learning_rate": 6.528336946423042e-06,
      "loss": 0.0186,
      "step": 1073740
    },
    {
      "epoch": 1.7572317904204553,
      "grad_norm": 0.21318548917770386,
      "learning_rate": 6.5282710542095255e-06,
      "loss": 0.0207,
      "step": 1073760
    },
    {
      "epoch": 1.7572645208591084,
      "grad_norm": 0.46608415246009827,
      "learning_rate": 6.528205161996007e-06,
      "loss": 0.0173,
      "step": 1073780
    },
    {
      "epoch": 1.757297251297762,
      "grad_norm": 0.6838896870613098,
      "learning_rate": 6.528139269782491e-06,
      "loss": 0.0221,
      "step": 1073800
    },
    {
      "epoch": 1.7573299817364152,
      "grad_norm": 0.39621520042419434,
      "learning_rate": 6.528073377568973e-06,
      "loss": 0.0136,
      "step": 1073820
    },
    {
      "epoch": 1.7573627121750686,
      "grad_norm": 0.5025197267532349,
      "learning_rate": 6.5280074853554565e-06,
      "loss": 0.0245,
      "step": 1073840
    },
    {
      "epoch": 1.757395442613722,
      "grad_norm": 0.2920544147491455,
      "learning_rate": 6.527941593141938e-06,
      "loss": 0.0249,
      "step": 1073860
    },
    {
      "epoch": 1.757428173052375,
      "grad_norm": 0.9153175950050354,
      "learning_rate": 6.527875700928422e-06,
      "loss": 0.0188,
      "step": 1073880
    },
    {
      "epoch": 1.7574609034910287,
      "grad_norm": 0.500390887260437,
      "learning_rate": 6.527809808714905e-06,
      "loss": 0.021,
      "step": 1073900
    },
    {
      "epoch": 1.7574936339296818,
      "grad_norm": 0.6169205904006958,
      "learning_rate": 6.5277439165013874e-06,
      "loss": 0.0182,
      "step": 1073920
    },
    {
      "epoch": 1.7575263643683354,
      "grad_norm": 0.20864377915859222,
      "learning_rate": 6.52767802428787e-06,
      "loss": 0.0156,
      "step": 1073940
    },
    {
      "epoch": 1.7575590948069886,
      "grad_norm": 0.9977485537528992,
      "learning_rate": 6.527612132074354e-06,
      "loss": 0.0241,
      "step": 1073960
    },
    {
      "epoch": 1.757591825245642,
      "grad_norm": 1.1807254552841187,
      "learning_rate": 6.527546239860836e-06,
      "loss": 0.0168,
      "step": 1073980
    },
    {
      "epoch": 1.7576245556842953,
      "grad_norm": 0.9407674074172974,
      "learning_rate": 6.527480347647319e-06,
      "loss": 0.0153,
      "step": 1074000
    },
    {
      "epoch": 1.7576572861229485,
      "grad_norm": 3.5217268466949463,
      "learning_rate": 6.527414455433801e-06,
      "loss": 0.0261,
      "step": 1074020
    },
    {
      "epoch": 1.757690016561602,
      "grad_norm": 0.8943654894828796,
      "learning_rate": 6.527348563220285e-06,
      "loss": 0.0183,
      "step": 1074040
    },
    {
      "epoch": 1.7577227470002552,
      "grad_norm": 0.7758376002311707,
      "learning_rate": 6.5272826710067675e-06,
      "loss": 0.0212,
      "step": 1074060
    },
    {
      "epoch": 1.7577554774389086,
      "grad_norm": 0.6394690871238708,
      "learning_rate": 6.52721677879325e-06,
      "loss": 0.0164,
      "step": 1074080
    },
    {
      "epoch": 1.757788207877562,
      "grad_norm": 0.737170934677124,
      "learning_rate": 6.527150886579734e-06,
      "loss": 0.0206,
      "step": 1074100
    },
    {
      "epoch": 1.7578209383162153,
      "grad_norm": 1.0099503993988037,
      "learning_rate": 6.5270849943662165e-06,
      "loss": 0.0228,
      "step": 1074120
    },
    {
      "epoch": 1.7578536687548687,
      "grad_norm": 0.4215497374534607,
      "learning_rate": 6.527019102152699e-06,
      "loss": 0.0139,
      "step": 1074140
    },
    {
      "epoch": 1.7578863991935219,
      "grad_norm": 0.7095352411270142,
      "learning_rate": 6.526953209939182e-06,
      "loss": 0.0209,
      "step": 1074160
    },
    {
      "epoch": 1.7579191296321754,
      "grad_norm": 0.8659757971763611,
      "learning_rate": 6.526887317725666e-06,
      "loss": 0.01,
      "step": 1074180
    },
    {
      "epoch": 1.7579518600708286,
      "grad_norm": 0.4395810067653656,
      "learning_rate": 6.5268214255121475e-06,
      "loss": 0.0229,
      "step": 1074200
    },
    {
      "epoch": 1.757984590509482,
      "grad_norm": 0.8503167629241943,
      "learning_rate": 6.526755533298631e-06,
      "loss": 0.0143,
      "step": 1074220
    },
    {
      "epoch": 1.7580173209481353,
      "grad_norm": 0.272744357585907,
      "learning_rate": 6.526689641085113e-06,
      "loss": 0.0266,
      "step": 1074240
    },
    {
      "epoch": 1.7580500513867887,
      "grad_norm": 0.7141176462173462,
      "learning_rate": 6.5266237488715966e-06,
      "loss": 0.0159,
      "step": 1074260
    },
    {
      "epoch": 1.758082781825442,
      "grad_norm": 0.7142032384872437,
      "learning_rate": 6.526557856658079e-06,
      "loss": 0.0266,
      "step": 1074280
    },
    {
      "epoch": 1.7581155122640952,
      "grad_norm": 0.5631190538406372,
      "learning_rate": 6.526491964444562e-06,
      "loss": 0.0186,
      "step": 1074300
    },
    {
      "epoch": 1.7581482427027488,
      "grad_norm": 0.27797937393188477,
      "learning_rate": 6.526426072231045e-06,
      "loss": 0.0198,
      "step": 1074320
    },
    {
      "epoch": 1.758180973141402,
      "grad_norm": 0.474322646856308,
      "learning_rate": 6.526360180017528e-06,
      "loss": 0.0149,
      "step": 1074340
    },
    {
      "epoch": 1.7582137035800554,
      "grad_norm": 0.2568591237068176,
      "learning_rate": 6.52629428780401e-06,
      "loss": 0.0172,
      "step": 1074360
    },
    {
      "epoch": 1.7582464340187087,
      "grad_norm": 0.2373560070991516,
      "learning_rate": 6.526228395590494e-06,
      "loss": 0.0187,
      "step": 1074380
    },
    {
      "epoch": 1.758279164457362,
      "grad_norm": 1.1499571800231934,
      "learning_rate": 6.526162503376976e-06,
      "loss": 0.0241,
      "step": 1074400
    },
    {
      "epoch": 1.7583118948960155,
      "grad_norm": 0.7774880528450012,
      "learning_rate": 6.526096611163459e-06,
      "loss": 0.0186,
      "step": 1074420
    },
    {
      "epoch": 1.7583446253346686,
      "grad_norm": 0.4830039441585541,
      "learning_rate": 6.526030718949942e-06,
      "loss": 0.0188,
      "step": 1074440
    },
    {
      "epoch": 1.7583773557733222,
      "grad_norm": 0.4500628411769867,
      "learning_rate": 6.525964826736425e-06,
      "loss": 0.02,
      "step": 1074460
    },
    {
      "epoch": 1.7584100862119754,
      "grad_norm": 0.5214088559150696,
      "learning_rate": 6.525898934522908e-06,
      "loss": 0.0165,
      "step": 1074480
    },
    {
      "epoch": 1.7584428166506287,
      "grad_norm": 0.14539562165737152,
      "learning_rate": 6.525833042309391e-06,
      "loss": 0.013,
      "step": 1074500
    },
    {
      "epoch": 1.758475547089282,
      "grad_norm": 0.35898998379707336,
      "learning_rate": 6.525767150095874e-06,
      "loss": 0.0177,
      "step": 1074520
    },
    {
      "epoch": 1.7585082775279355,
      "grad_norm": 0.3221629858016968,
      "learning_rate": 6.525701257882357e-06,
      "loss": 0.0187,
      "step": 1074540
    },
    {
      "epoch": 1.7585410079665889,
      "grad_norm": 0.3255607783794403,
      "learning_rate": 6.52563536566884e-06,
      "loss": 0.0169,
      "step": 1074560
    },
    {
      "epoch": 1.758573738405242,
      "grad_norm": 0.25050482153892517,
      "learning_rate": 6.525569473455322e-06,
      "loss": 0.0245,
      "step": 1074580
    },
    {
      "epoch": 1.7586064688438956,
      "grad_norm": 0.4940466582775116,
      "learning_rate": 6.525503581241806e-06,
      "loss": 0.0175,
      "step": 1074600
    },
    {
      "epoch": 1.7586391992825487,
      "grad_norm": 1.0553436279296875,
      "learning_rate": 6.5254376890282876e-06,
      "loss": 0.0191,
      "step": 1074620
    },
    {
      "epoch": 1.7586719297212021,
      "grad_norm": 0.6391391158103943,
      "learning_rate": 6.525371796814771e-06,
      "loss": 0.0126,
      "step": 1074640
    },
    {
      "epoch": 1.7587046601598555,
      "grad_norm": 0.6068990230560303,
      "learning_rate": 6.525305904601253e-06,
      "loss": 0.0197,
      "step": 1074660
    },
    {
      "epoch": 1.7587373905985086,
      "grad_norm": 0.825599730014801,
      "learning_rate": 6.525240012387737e-06,
      "loss": 0.0176,
      "step": 1074680
    },
    {
      "epoch": 1.7587701210371622,
      "grad_norm": Infinity,
      "learning_rate": 6.525174120174219e-06,
      "loss": 0.0274,
      "step": 1074700
    },
    {
      "epoch": 1.7588028514758154,
      "grad_norm": Infinity,
      "learning_rate": 6.525108227960703e-06,
      "loss": 0.0178,
      "step": 1074720
    },
    {
      "epoch": 1.758835581914469,
      "grad_norm": 0.47040918469429016,
      "learning_rate": 6.525042335747185e-06,
      "loss": 0.0169,
      "step": 1074740
    },
    {
      "epoch": 1.7588683123531221,
      "grad_norm": 0.18627065420150757,
      "learning_rate": 6.5249764435336684e-06,
      "loss": 0.0166,
      "step": 1074760
    },
    {
      "epoch": 1.7589010427917755,
      "grad_norm": 0.593490719795227,
      "learning_rate": 6.52491055132015e-06,
      "loss": 0.017,
      "step": 1074780
    },
    {
      "epoch": 1.7589337732304289,
      "grad_norm": 0.46733036637306213,
      "learning_rate": 6.524844659106634e-06,
      "loss": 0.0229,
      "step": 1074800
    },
    {
      "epoch": 1.758966503669082,
      "grad_norm": 0.2695438265800476,
      "learning_rate": 6.5247787668931175e-06,
      "loss": 0.0235,
      "step": 1074820
    },
    {
      "epoch": 1.7589992341077356,
      "grad_norm": 0.4573979377746582,
      "learning_rate": 6.524712874679599e-06,
      "loss": 0.0176,
      "step": 1074840
    },
    {
      "epoch": 1.7590319645463888,
      "grad_norm": 0.40037107467651367,
      "learning_rate": 6.524646982466083e-06,
      "loss": 0.0212,
      "step": 1074860
    },
    {
      "epoch": 1.7590646949850421,
      "grad_norm": 0.6026121973991394,
      "learning_rate": 6.524581090252565e-06,
      "loss": 0.019,
      "step": 1074880
    },
    {
      "epoch": 1.7590974254236955,
      "grad_norm": 0.521474301815033,
      "learning_rate": 6.5245151980390485e-06,
      "loss": 0.0106,
      "step": 1074900
    },
    {
      "epoch": 1.7591301558623489,
      "grad_norm": 0.4762701988220215,
      "learning_rate": 6.524449305825531e-06,
      "loss": 0.0155,
      "step": 1074920
    },
    {
      "epoch": 1.7591628863010023,
      "grad_norm": 0.17658303678035736,
      "learning_rate": 6.524383413612014e-06,
      "loss": 0.0236,
      "step": 1074940
    },
    {
      "epoch": 1.7591956167396554,
      "grad_norm": 1.4691364765167236,
      "learning_rate": 6.524317521398497e-06,
      "loss": 0.0197,
      "step": 1074960
    },
    {
      "epoch": 1.759228347178309,
      "grad_norm": 0.0434250645339489,
      "learning_rate": 6.52425162918498e-06,
      "loss": 0.014,
      "step": 1074980
    },
    {
      "epoch": 1.7592610776169622,
      "grad_norm": 0.27814218401908875,
      "learning_rate": 6.524185736971462e-06,
      "loss": 0.0163,
      "step": 1075000
    },
    {
      "epoch": 1.7592938080556155,
      "grad_norm": 0.36070579290390015,
      "learning_rate": 6.524119844757946e-06,
      "loss": 0.0207,
      "step": 1075020
    },
    {
      "epoch": 1.759326538494269,
      "grad_norm": 0.10095752775669098,
      "learning_rate": 6.524053952544428e-06,
      "loss": 0.0137,
      "step": 1075040
    },
    {
      "epoch": 1.7593592689329223,
      "grad_norm": 0.8916778564453125,
      "learning_rate": 6.523988060330911e-06,
      "loss": 0.0156,
      "step": 1075060
    },
    {
      "epoch": 1.7593919993715756,
      "grad_norm": 0.5026158094406128,
      "learning_rate": 6.523922168117394e-06,
      "loss": 0.0168,
      "step": 1075080
    },
    {
      "epoch": 1.7594247298102288,
      "grad_norm": 0.4358852505683899,
      "learning_rate": 6.523856275903877e-06,
      "loss": 0.0131,
      "step": 1075100
    },
    {
      "epoch": 1.7594574602488824,
      "grad_norm": 0.47371742129325867,
      "learning_rate": 6.5237903836903594e-06,
      "loss": 0.0219,
      "step": 1075120
    },
    {
      "epoch": 1.7594901906875355,
      "grad_norm": 0.6910841464996338,
      "learning_rate": 6.523724491476843e-06,
      "loss": 0.0194,
      "step": 1075140
    },
    {
      "epoch": 1.759522921126189,
      "grad_norm": 0.7741225361824036,
      "learning_rate": 6.523658599263326e-06,
      "loss": 0.0324,
      "step": 1075160
    },
    {
      "epoch": 1.7595556515648423,
      "grad_norm": 1.149721384048462,
      "learning_rate": 6.5235927070498085e-06,
      "loss": 0.0249,
      "step": 1075180
    },
    {
      "epoch": 1.7595883820034957,
      "grad_norm": 0.4493958353996277,
      "learning_rate": 6.523526814836292e-06,
      "loss": 0.0185,
      "step": 1075200
    },
    {
      "epoch": 1.759621112442149,
      "grad_norm": 0.4189735949039459,
      "learning_rate": 6.523460922622774e-06,
      "loss": 0.0224,
      "step": 1075220
    },
    {
      "epoch": 1.7596538428808022,
      "grad_norm": 0.35039058327674866,
      "learning_rate": 6.523395030409258e-06,
      "loss": 0.0119,
      "step": 1075240
    },
    {
      "epoch": 1.7596865733194558,
      "grad_norm": 0.18353113532066345,
      "learning_rate": 6.5233291381957395e-06,
      "loss": 0.0206,
      "step": 1075260
    },
    {
      "epoch": 1.759719303758109,
      "grad_norm": 0.4865405857563019,
      "learning_rate": 6.523263245982223e-06,
      "loss": 0.0161,
      "step": 1075280
    },
    {
      "epoch": 1.7597520341967623,
      "grad_norm": 0.47450992465019226,
      "learning_rate": 6.523197353768706e-06,
      "loss": 0.0256,
      "step": 1075300
    },
    {
      "epoch": 1.7597847646354157,
      "grad_norm": 0.48056596517562866,
      "learning_rate": 6.5231314615551885e-06,
      "loss": 0.0184,
      "step": 1075320
    },
    {
      "epoch": 1.759817495074069,
      "grad_norm": 0.48994749784469604,
      "learning_rate": 6.523065569341671e-06,
      "loss": 0.0137,
      "step": 1075340
    },
    {
      "epoch": 1.7598502255127224,
      "grad_norm": 0.39445993304252625,
      "learning_rate": 6.522999677128155e-06,
      "loss": 0.0174,
      "step": 1075360
    },
    {
      "epoch": 1.7598829559513756,
      "grad_norm": 1.1865882873535156,
      "learning_rate": 6.522933784914637e-06,
      "loss": 0.0252,
      "step": 1075380
    },
    {
      "epoch": 1.7599156863900292,
      "grad_norm": 0.18071560561656952,
      "learning_rate": 6.52286789270112e-06,
      "loss": 0.0229,
      "step": 1075400
    },
    {
      "epoch": 1.7599484168286823,
      "grad_norm": 0.6489840149879456,
      "learning_rate": 6.522802000487602e-06,
      "loss": 0.0186,
      "step": 1075420
    },
    {
      "epoch": 1.7599811472673357,
      "grad_norm": 0.4158697724342346,
      "learning_rate": 6.522736108274086e-06,
      "loss": 0.0102,
      "step": 1075440
    },
    {
      "epoch": 1.760013877705989,
      "grad_norm": 0.19959788024425507,
      "learning_rate": 6.5226702160605686e-06,
      "loss": 0.0162,
      "step": 1075460
    },
    {
      "epoch": 1.7600466081446422,
      "grad_norm": 0.6265695095062256,
      "learning_rate": 6.522604323847051e-06,
      "loss": 0.0285,
      "step": 1075480
    },
    {
      "epoch": 1.7600793385832958,
      "grad_norm": 0.6016773581504822,
      "learning_rate": 6.522538431633535e-06,
      "loss": 0.0196,
      "step": 1075500
    },
    {
      "epoch": 1.760112069021949,
      "grad_norm": 0.6947320103645325,
      "learning_rate": 6.522472539420018e-06,
      "loss": 0.0204,
      "step": 1075520
    },
    {
      "epoch": 1.7601447994606023,
      "grad_norm": 0.21864581108093262,
      "learning_rate": 6.5224066472065e-06,
      "loss": 0.0253,
      "step": 1075540
    },
    {
      "epoch": 1.7601775298992557,
      "grad_norm": 0.9669100642204285,
      "learning_rate": 6.522340754992983e-06,
      "loss": 0.0169,
      "step": 1075560
    },
    {
      "epoch": 1.760210260337909,
      "grad_norm": 0.3796982169151306,
      "learning_rate": 6.522274862779467e-06,
      "loss": 0.0178,
      "step": 1075580
    },
    {
      "epoch": 1.7602429907765624,
      "grad_norm": 0.8112623691558838,
      "learning_rate": 6.522208970565949e-06,
      "loss": 0.0163,
      "step": 1075600
    },
    {
      "epoch": 1.7602757212152156,
      "grad_norm": 0.4483136236667633,
      "learning_rate": 6.522143078352432e-06,
      "loss": 0.0258,
      "step": 1075620
    },
    {
      "epoch": 1.7603084516538692,
      "grad_norm": 0.6594704985618591,
      "learning_rate": 6.522077186138914e-06,
      "loss": 0.0196,
      "step": 1075640
    },
    {
      "epoch": 1.7603411820925223,
      "grad_norm": 0.34260526299476624,
      "learning_rate": 6.522011293925398e-06,
      "loss": 0.0167,
      "step": 1075660
    },
    {
      "epoch": 1.7603739125311757,
      "grad_norm": 0.36250272393226624,
      "learning_rate": 6.5219454017118795e-06,
      "loss": 0.025,
      "step": 1075680
    },
    {
      "epoch": 1.760406642969829,
      "grad_norm": 0.5765344500541687,
      "learning_rate": 6.521879509498363e-06,
      "loss": 0.0224,
      "step": 1075700
    },
    {
      "epoch": 1.7604393734084824,
      "grad_norm": 0.27053573727607727,
      "learning_rate": 6.521813617284846e-06,
      "loss": 0.0226,
      "step": 1075720
    },
    {
      "epoch": 1.7604721038471358,
      "grad_norm": 0.9503439664840698,
      "learning_rate": 6.521747725071329e-06,
      "loss": 0.0262,
      "step": 1075740
    },
    {
      "epoch": 1.760504834285789,
      "grad_norm": 0.9555392265319824,
      "learning_rate": 6.521681832857811e-06,
      "loss": 0.0195,
      "step": 1075760
    },
    {
      "epoch": 1.7605375647244426,
      "grad_norm": 0.3236158490180969,
      "learning_rate": 6.521615940644295e-06,
      "loss": 0.0181,
      "step": 1075780
    },
    {
      "epoch": 1.7605702951630957,
      "grad_norm": 0.30980488657951355,
      "learning_rate": 6.521550048430777e-06,
      "loss": 0.0215,
      "step": 1075800
    },
    {
      "epoch": 1.760603025601749,
      "grad_norm": 0.3536166548728943,
      "learning_rate": 6.52148415621726e-06,
      "loss": 0.0157,
      "step": 1075820
    },
    {
      "epoch": 1.7606357560404025,
      "grad_norm": 0.393905371427536,
      "learning_rate": 6.521418264003742e-06,
      "loss": 0.0206,
      "step": 1075840
    },
    {
      "epoch": 1.7606684864790558,
      "grad_norm": 0.6576853394508362,
      "learning_rate": 6.521352371790226e-06,
      "loss": 0.0168,
      "step": 1075860
    },
    {
      "epoch": 1.7607012169177092,
      "grad_norm": 0.13964149355888367,
      "learning_rate": 6.5212864795767095e-06,
      "loss": 0.0265,
      "step": 1075880
    },
    {
      "epoch": 1.7607339473563623,
      "grad_norm": 1.1176410913467407,
      "learning_rate": 6.521220587363191e-06,
      "loss": 0.0252,
      "step": 1075900
    },
    {
      "epoch": 1.760766677795016,
      "grad_norm": 0.4678718149662018,
      "learning_rate": 6.521154695149675e-06,
      "loss": 0.0148,
      "step": 1075920
    },
    {
      "epoch": 1.760799408233669,
      "grad_norm": 0.17978405952453613,
      "learning_rate": 6.521088802936158e-06,
      "loss": 0.0163,
      "step": 1075940
    },
    {
      "epoch": 1.7608321386723225,
      "grad_norm": 0.470412939786911,
      "learning_rate": 6.5210229107226404e-06,
      "loss": 0.0219,
      "step": 1075960
    },
    {
      "epoch": 1.7608648691109758,
      "grad_norm": 0.2686084508895874,
      "learning_rate": 6.520957018509123e-06,
      "loss": 0.0227,
      "step": 1075980
    },
    {
      "epoch": 1.7608975995496292,
      "grad_norm": 0.22940681874752045,
      "learning_rate": 6.520891126295607e-06,
      "loss": 0.0164,
      "step": 1076000
    },
    {
      "epoch": 1.7609303299882826,
      "grad_norm": 1.275767207145691,
      "learning_rate": 6.520825234082089e-06,
      "loss": 0.0194,
      "step": 1076020
    },
    {
      "epoch": 1.7609630604269357,
      "grad_norm": 0.861482560634613,
      "learning_rate": 6.520759341868572e-06,
      "loss": 0.0218,
      "step": 1076040
    },
    {
      "epoch": 1.7609957908655893,
      "grad_norm": 0.12265230715274811,
      "learning_rate": 6.520693449655054e-06,
      "loss": 0.0271,
      "step": 1076060
    },
    {
      "epoch": 1.7610285213042425,
      "grad_norm": 0.7090508937835693,
      "learning_rate": 6.520627557441538e-06,
      "loss": 0.0174,
      "step": 1076080
    },
    {
      "epoch": 1.7610612517428959,
      "grad_norm": 0.6491227746009827,
      "learning_rate": 6.5205616652280205e-06,
      "loss": 0.0207,
      "step": 1076100
    },
    {
      "epoch": 1.7610939821815492,
      "grad_norm": 0.39597681164741516,
      "learning_rate": 6.520495773014503e-06,
      "loss": 0.021,
      "step": 1076120
    },
    {
      "epoch": 1.7611267126202024,
      "grad_norm": 3.3694040775299072,
      "learning_rate": 6.520429880800986e-06,
      "loss": 0.0226,
      "step": 1076140
    },
    {
      "epoch": 1.761159443058856,
      "grad_norm": 0.3662439286708832,
      "learning_rate": 6.5203639885874695e-06,
      "loss": 0.0137,
      "step": 1076160
    },
    {
      "epoch": 1.7611921734975091,
      "grad_norm": 0.5191517472267151,
      "learning_rate": 6.5202980963739514e-06,
      "loss": 0.0185,
      "step": 1076180
    },
    {
      "epoch": 1.7612249039361627,
      "grad_norm": 1.7041200399398804,
      "learning_rate": 6.520232204160435e-06,
      "loss": 0.0184,
      "step": 1076200
    },
    {
      "epoch": 1.7612576343748159,
      "grad_norm": 0.5281391143798828,
      "learning_rate": 6.520166311946919e-06,
      "loss": 0.0144,
      "step": 1076220
    },
    {
      "epoch": 1.7612903648134692,
      "grad_norm": 0.2625747323036194,
      "learning_rate": 6.5201004197334005e-06,
      "loss": 0.012,
      "step": 1076240
    },
    {
      "epoch": 1.7613230952521226,
      "grad_norm": 0.7730065584182739,
      "learning_rate": 6.520034527519884e-06,
      "loss": 0.0174,
      "step": 1076260
    },
    {
      "epoch": 1.7613558256907758,
      "grad_norm": 0.5032196640968323,
      "learning_rate": 6.519968635306366e-06,
      "loss": 0.0174,
      "step": 1076280
    },
    {
      "epoch": 1.7613885561294294,
      "grad_norm": 0.43064749240875244,
      "learning_rate": 6.5199027430928496e-06,
      "loss": 0.0179,
      "step": 1076300
    },
    {
      "epoch": 1.7614212865680825,
      "grad_norm": 0.7214627265930176,
      "learning_rate": 6.519836850879332e-06,
      "loss": 0.013,
      "step": 1076320
    },
    {
      "epoch": 1.7614540170067359,
      "grad_norm": 0.40091344714164734,
      "learning_rate": 6.519770958665815e-06,
      "loss": 0.026,
      "step": 1076340
    },
    {
      "epoch": 1.7614867474453892,
      "grad_norm": 0.4527592062950134,
      "learning_rate": 6.519705066452298e-06,
      "loss": 0.0238,
      "step": 1076360
    },
    {
      "epoch": 1.7615194778840426,
      "grad_norm": 0.5954077839851379,
      "learning_rate": 6.519639174238781e-06,
      "loss": 0.0261,
      "step": 1076380
    },
    {
      "epoch": 1.761552208322696,
      "grad_norm": 0.412268728017807,
      "learning_rate": 6.519573282025263e-06,
      "loss": 0.0172,
      "step": 1076400
    },
    {
      "epoch": 1.7615849387613491,
      "grad_norm": 0.6504013538360596,
      "learning_rate": 6.519507389811747e-06,
      "loss": 0.0202,
      "step": 1076420
    },
    {
      "epoch": 1.7616176692000027,
      "grad_norm": 0.3363981544971466,
      "learning_rate": 6.519441497598229e-06,
      "loss": 0.0134,
      "step": 1076440
    },
    {
      "epoch": 1.7616503996386559,
      "grad_norm": 1.0688642263412476,
      "learning_rate": 6.519375605384712e-06,
      "loss": 0.0194,
      "step": 1076460
    },
    {
      "epoch": 1.7616831300773093,
      "grad_norm": 1.1627813577651978,
      "learning_rate": 6.519309713171195e-06,
      "loss": 0.0182,
      "step": 1076480
    },
    {
      "epoch": 1.7617158605159626,
      "grad_norm": 0.4590827524662018,
      "learning_rate": 6.519243820957678e-06,
      "loss": 0.0239,
      "step": 1076500
    },
    {
      "epoch": 1.761748590954616,
      "grad_norm": 1.0406990051269531,
      "learning_rate": 6.5191779287441605e-06,
      "loss": 0.0197,
      "step": 1076520
    },
    {
      "epoch": 1.7617813213932694,
      "grad_norm": 0.7154975533485413,
      "learning_rate": 6.519112036530644e-06,
      "loss": 0.0161,
      "step": 1076540
    },
    {
      "epoch": 1.7618140518319225,
      "grad_norm": 0.2779570519924164,
      "learning_rate": 6.519046144317127e-06,
      "loss": 0.0143,
      "step": 1076560
    },
    {
      "epoch": 1.7618467822705761,
      "grad_norm": 1.1773425340652466,
      "learning_rate": 6.51898025210361e-06,
      "loss": 0.021,
      "step": 1076580
    },
    {
      "epoch": 1.7618795127092293,
      "grad_norm": 1.1521559953689575,
      "learning_rate": 6.518914359890093e-06,
      "loss": 0.0264,
      "step": 1076600
    },
    {
      "epoch": 1.7619122431478826,
      "grad_norm": 1.250996470451355,
      "learning_rate": 6.518848467676575e-06,
      "loss": 0.0223,
      "step": 1076620
    },
    {
      "epoch": 1.761944973586536,
      "grad_norm": 0.45129555463790894,
      "learning_rate": 6.518782575463059e-06,
      "loss": 0.0174,
      "step": 1076640
    },
    {
      "epoch": 1.7619777040251894,
      "grad_norm": 0.9274128079414368,
      "learning_rate": 6.5187166832495406e-06,
      "loss": 0.0213,
      "step": 1076660
    },
    {
      "epoch": 1.7620104344638428,
      "grad_norm": 0.22803668677806854,
      "learning_rate": 6.518650791036024e-06,
      "loss": 0.0179,
      "step": 1076680
    },
    {
      "epoch": 1.762043164902496,
      "grad_norm": 0.42679184675216675,
      "learning_rate": 6.518584898822506e-06,
      "loss": 0.0242,
      "step": 1076700
    },
    {
      "epoch": 1.7620758953411495,
      "grad_norm": 0.3214205801486969,
      "learning_rate": 6.51851900660899e-06,
      "loss": 0.0114,
      "step": 1076720
    },
    {
      "epoch": 1.7621086257798027,
      "grad_norm": 0.09234437346458435,
      "learning_rate": 6.518453114395472e-06,
      "loss": 0.0256,
      "step": 1076740
    },
    {
      "epoch": 1.762141356218456,
      "grad_norm": 0.6787122488021851,
      "learning_rate": 6.518387222181955e-06,
      "loss": 0.0154,
      "step": 1076760
    },
    {
      "epoch": 1.7621740866571094,
      "grad_norm": 0.8598667979240417,
      "learning_rate": 6.518321329968438e-06,
      "loss": 0.0226,
      "step": 1076780
    },
    {
      "epoch": 1.7622068170957628,
      "grad_norm": 0.29894691705703735,
      "learning_rate": 6.5182554377549214e-06,
      "loss": 0.0194,
      "step": 1076800
    },
    {
      "epoch": 1.7622395475344161,
      "grad_norm": 0.5395342111587524,
      "learning_rate": 6.518189545541403e-06,
      "loss": 0.0173,
      "step": 1076820
    },
    {
      "epoch": 1.7622722779730693,
      "grad_norm": 1.166256308555603,
      "learning_rate": 6.518123653327887e-06,
      "loss": 0.0152,
      "step": 1076840
    },
    {
      "epoch": 1.7623050084117229,
      "grad_norm": 0.3774675130844116,
      "learning_rate": 6.518057761114369e-06,
      "loss": 0.0176,
      "step": 1076860
    },
    {
      "epoch": 1.762337738850376,
      "grad_norm": 0.20196425914764404,
      "learning_rate": 6.517991868900852e-06,
      "loss": 0.0205,
      "step": 1076880
    },
    {
      "epoch": 1.7623704692890294,
      "grad_norm": 0.12386281788349152,
      "learning_rate": 6.517925976687335e-06,
      "loss": 0.0098,
      "step": 1076900
    },
    {
      "epoch": 1.7624031997276828,
      "grad_norm": 0.27395278215408325,
      "learning_rate": 6.517860084473818e-06,
      "loss": 0.0205,
      "step": 1076920
    },
    {
      "epoch": 1.762435930166336,
      "grad_norm": 0.26049095392227173,
      "learning_rate": 6.5177941922603015e-06,
      "loss": 0.0183,
      "step": 1076940
    },
    {
      "epoch": 1.7624686606049895,
      "grad_norm": 0.7889097332954407,
      "learning_rate": 6.517728300046784e-06,
      "loss": 0.0199,
      "step": 1076960
    },
    {
      "epoch": 1.7625013910436427,
      "grad_norm": 0.12122822552919388,
      "learning_rate": 6.517662407833267e-06,
      "loss": 0.0183,
      "step": 1076980
    },
    {
      "epoch": 1.7625341214822963,
      "grad_norm": 0.1944596767425537,
      "learning_rate": 6.51759651561975e-06,
      "loss": 0.0218,
      "step": 1077000
    },
    {
      "epoch": 1.7625668519209494,
      "grad_norm": 0.6432259678840637,
      "learning_rate": 6.517530623406233e-06,
      "loss": 0.0251,
      "step": 1077020
    },
    {
      "epoch": 1.7625995823596028,
      "grad_norm": 0.7347744107246399,
      "learning_rate": 6.517464731192715e-06,
      "loss": 0.0236,
      "step": 1077040
    },
    {
      "epoch": 1.7626323127982562,
      "grad_norm": 0.7532027363777161,
      "learning_rate": 6.517398838979199e-06,
      "loss": 0.0301,
      "step": 1077060
    },
    {
      "epoch": 1.7626650432369093,
      "grad_norm": 0.3063147962093353,
      "learning_rate": 6.517332946765681e-06,
      "loss": 0.011,
      "step": 1077080
    },
    {
      "epoch": 1.762697773675563,
      "grad_norm": 1.3883510828018188,
      "learning_rate": 6.517267054552164e-06,
      "loss": 0.0169,
      "step": 1077100
    },
    {
      "epoch": 1.762730504114216,
      "grad_norm": 0.16481642425060272,
      "learning_rate": 6.517201162338647e-06,
      "loss": 0.0201,
      "step": 1077120
    },
    {
      "epoch": 1.7627632345528694,
      "grad_norm": 0.43746861815452576,
      "learning_rate": 6.51713527012513e-06,
      "loss": 0.0247,
      "step": 1077140
    },
    {
      "epoch": 1.7627959649915228,
      "grad_norm": 0.6704441905021667,
      "learning_rate": 6.5170693779116125e-06,
      "loss": 0.0175,
      "step": 1077160
    },
    {
      "epoch": 1.7628286954301762,
      "grad_norm": 0.4711363613605499,
      "learning_rate": 6.517003485698096e-06,
      "loss": 0.0204,
      "step": 1077180
    },
    {
      "epoch": 1.7628614258688295,
      "grad_norm": 0.29427099227905273,
      "learning_rate": 6.516937593484578e-06,
      "loss": 0.0155,
      "step": 1077200
    },
    {
      "epoch": 1.7628941563074827,
      "grad_norm": 0.41916728019714355,
      "learning_rate": 6.5168717012710615e-06,
      "loss": 0.0165,
      "step": 1077220
    },
    {
      "epoch": 1.7629268867461363,
      "grad_norm": 0.21058417856693268,
      "learning_rate": 6.516805809057543e-06,
      "loss": 0.0197,
      "step": 1077240
    },
    {
      "epoch": 1.7629596171847894,
      "grad_norm": 1.0210442543029785,
      "learning_rate": 6.516739916844027e-06,
      "loss": 0.0236,
      "step": 1077260
    },
    {
      "epoch": 1.7629923476234428,
      "grad_norm": 0.9055072069168091,
      "learning_rate": 6.516674024630511e-06,
      "loss": 0.0166,
      "step": 1077280
    },
    {
      "epoch": 1.7630250780620962,
      "grad_norm": 1.3174169063568115,
      "learning_rate": 6.5166081324169925e-06,
      "loss": 0.0259,
      "step": 1077300
    },
    {
      "epoch": 1.7630578085007496,
      "grad_norm": 0.26445281505584717,
      "learning_rate": 6.516542240203476e-06,
      "loss": 0.0247,
      "step": 1077320
    },
    {
      "epoch": 1.763090538939403,
      "grad_norm": 0.6223804950714111,
      "learning_rate": 6.516476347989959e-06,
      "loss": 0.024,
      "step": 1077340
    },
    {
      "epoch": 1.763123269378056,
      "grad_norm": 0.5468877553939819,
      "learning_rate": 6.5164104557764415e-06,
      "loss": 0.0211,
      "step": 1077360
    },
    {
      "epoch": 1.7631559998167097,
      "grad_norm": 0.798897922039032,
      "learning_rate": 6.516344563562924e-06,
      "loss": 0.0193,
      "step": 1077380
    },
    {
      "epoch": 1.7631887302553628,
      "grad_norm": 0.31477978825569153,
      "learning_rate": 6.516278671349408e-06,
      "loss": 0.0141,
      "step": 1077400
    },
    {
      "epoch": 1.7632214606940162,
      "grad_norm": 0.658599853515625,
      "learning_rate": 6.51621277913589e-06,
      "loss": 0.016,
      "step": 1077420
    },
    {
      "epoch": 1.7632541911326696,
      "grad_norm": 0.14995808899402618,
      "learning_rate": 6.516146886922373e-06,
      "loss": 0.0202,
      "step": 1077440
    },
    {
      "epoch": 1.763286921571323,
      "grad_norm": 2.0426807403564453,
      "learning_rate": 6.516080994708855e-06,
      "loss": 0.0177,
      "step": 1077460
    },
    {
      "epoch": 1.7633196520099763,
      "grad_norm": 0.46115612983703613,
      "learning_rate": 6.516015102495339e-06,
      "loss": 0.0185,
      "step": 1077480
    },
    {
      "epoch": 1.7633523824486295,
      "grad_norm": 0.32609328627586365,
      "learning_rate": 6.5159492102818216e-06,
      "loss": 0.0208,
      "step": 1077500
    },
    {
      "epoch": 1.763385112887283,
      "grad_norm": 0.6625633239746094,
      "learning_rate": 6.515883318068304e-06,
      "loss": 0.0142,
      "step": 1077520
    },
    {
      "epoch": 1.7634178433259362,
      "grad_norm": 0.6932738423347473,
      "learning_rate": 6.515817425854787e-06,
      "loss": 0.0151,
      "step": 1077540
    },
    {
      "epoch": 1.7634505737645896,
      "grad_norm": 0.4971519410610199,
      "learning_rate": 6.515751533641271e-06,
      "loss": 0.0228,
      "step": 1077560
    },
    {
      "epoch": 1.763483304203243,
      "grad_norm": 0.21143831312656403,
      "learning_rate": 6.5156856414277525e-06,
      "loss": 0.0183,
      "step": 1077580
    },
    {
      "epoch": 1.7635160346418963,
      "grad_norm": 0.25057369470596313,
      "learning_rate": 6.515619749214236e-06,
      "loss": 0.0261,
      "step": 1077600
    },
    {
      "epoch": 1.7635487650805497,
      "grad_norm": 0.1960577368736267,
      "learning_rate": 6.51555385700072e-06,
      "loss": 0.0207,
      "step": 1077620
    },
    {
      "epoch": 1.7635814955192028,
      "grad_norm": 0.49025973677635193,
      "learning_rate": 6.515487964787202e-06,
      "loss": 0.0136,
      "step": 1077640
    },
    {
      "epoch": 1.7636142259578564,
      "grad_norm": 1.1972142457962036,
      "learning_rate": 6.515422072573685e-06,
      "loss": 0.0214,
      "step": 1077660
    },
    {
      "epoch": 1.7636469563965096,
      "grad_norm": 0.9743058681488037,
      "learning_rate": 6.515356180360167e-06,
      "loss": 0.018,
      "step": 1077680
    },
    {
      "epoch": 1.763679686835163,
      "grad_norm": 0.5543457269668579,
      "learning_rate": 6.515290288146651e-06,
      "loss": 0.0126,
      "step": 1077700
    },
    {
      "epoch": 1.7637124172738163,
      "grad_norm": 0.5793946385383606,
      "learning_rate": 6.5152243959331326e-06,
      "loss": 0.0216,
      "step": 1077720
    },
    {
      "epoch": 1.7637451477124695,
      "grad_norm": 0.46948838233947754,
      "learning_rate": 6.515158503719616e-06,
      "loss": 0.0169,
      "step": 1077740
    },
    {
      "epoch": 1.763777878151123,
      "grad_norm": 0.13189998269081116,
      "learning_rate": 6.515092611506099e-06,
      "loss": 0.0168,
      "step": 1077760
    },
    {
      "epoch": 1.7638106085897762,
      "grad_norm": 0.3004460036754608,
      "learning_rate": 6.515026719292582e-06,
      "loss": 0.0198,
      "step": 1077780
    },
    {
      "epoch": 1.7638433390284298,
      "grad_norm": 0.5976276993751526,
      "learning_rate": 6.514960827079064e-06,
      "loss": 0.0237,
      "step": 1077800
    },
    {
      "epoch": 1.763876069467083,
      "grad_norm": 0.6671267747879028,
      "learning_rate": 6.514894934865548e-06,
      "loss": 0.0201,
      "step": 1077820
    },
    {
      "epoch": 1.7639087999057363,
      "grad_norm": 0.5386610627174377,
      "learning_rate": 6.51482904265203e-06,
      "loss": 0.0175,
      "step": 1077840
    },
    {
      "epoch": 1.7639415303443897,
      "grad_norm": 0.5784636735916138,
      "learning_rate": 6.5147631504385134e-06,
      "loss": 0.014,
      "step": 1077860
    },
    {
      "epoch": 1.7639742607830429,
      "grad_norm": 0.29148364067077637,
      "learning_rate": 6.514697258224995e-06,
      "loss": 0.0132,
      "step": 1077880
    },
    {
      "epoch": 1.7640069912216965,
      "grad_norm": 0.24013051390647888,
      "learning_rate": 6.514631366011479e-06,
      "loss": 0.0119,
      "step": 1077900
    },
    {
      "epoch": 1.7640397216603496,
      "grad_norm": 0.6066713929176331,
      "learning_rate": 6.514565473797962e-06,
      "loss": 0.0172,
      "step": 1077920
    },
    {
      "epoch": 1.764072452099003,
      "grad_norm": 0.3483327329158783,
      "learning_rate": 6.514499581584444e-06,
      "loss": 0.0245,
      "step": 1077940
    },
    {
      "epoch": 1.7641051825376564,
      "grad_norm": 0.506120502948761,
      "learning_rate": 6.514433689370927e-06,
      "loss": 0.0249,
      "step": 1077960
    },
    {
      "epoch": 1.7641379129763097,
      "grad_norm": 1.0166847705841064,
      "learning_rate": 6.514367797157411e-06,
      "loss": 0.0215,
      "step": 1077980
    },
    {
      "epoch": 1.764170643414963,
      "grad_norm": 0.42132508754730225,
      "learning_rate": 6.5143019049438935e-06,
      "loss": 0.0204,
      "step": 1078000
    },
    {
      "epoch": 1.7642033738536163,
      "grad_norm": 0.4069715142250061,
      "learning_rate": 6.514236012730376e-06,
      "loss": 0.0195,
      "step": 1078020
    },
    {
      "epoch": 1.7642361042922698,
      "grad_norm": 0.6810454726219177,
      "learning_rate": 6.51417012051686e-06,
      "loss": 0.0212,
      "step": 1078040
    },
    {
      "epoch": 1.764268834730923,
      "grad_norm": 0.19195511937141418,
      "learning_rate": 6.514104228303342e-06,
      "loss": 0.0178,
      "step": 1078060
    },
    {
      "epoch": 1.7643015651695764,
      "grad_norm": 0.6306500434875488,
      "learning_rate": 6.514038336089825e-06,
      "loss": 0.0224,
      "step": 1078080
    },
    {
      "epoch": 1.7643342956082297,
      "grad_norm": 0.46799346804618835,
      "learning_rate": 6.513972443876307e-06,
      "loss": 0.0219,
      "step": 1078100
    },
    {
      "epoch": 1.7643670260468831,
      "grad_norm": 0.944130003452301,
      "learning_rate": 6.513906551662791e-06,
      "loss": 0.0244,
      "step": 1078120
    },
    {
      "epoch": 1.7643997564855365,
      "grad_norm": 0.7891477942466736,
      "learning_rate": 6.5138406594492735e-06,
      "loss": 0.0305,
      "step": 1078140
    },
    {
      "epoch": 1.7644324869241896,
      "grad_norm": 0.7978854775428772,
      "learning_rate": 6.513774767235756e-06,
      "loss": 0.0223,
      "step": 1078160
    },
    {
      "epoch": 1.7644652173628432,
      "grad_norm": 0.3136962056159973,
      "learning_rate": 6.513708875022239e-06,
      "loss": 0.0144,
      "step": 1078180
    },
    {
      "epoch": 1.7644979478014964,
      "grad_norm": 0.22450341284275055,
      "learning_rate": 6.5136429828087225e-06,
      "loss": 0.0141,
      "step": 1078200
    },
    {
      "epoch": 1.7645306782401498,
      "grad_norm": 0.2009676694869995,
      "learning_rate": 6.5135770905952044e-06,
      "loss": 0.0161,
      "step": 1078220
    },
    {
      "epoch": 1.7645634086788031,
      "grad_norm": 0.4091753661632538,
      "learning_rate": 6.513511198381688e-06,
      "loss": 0.0205,
      "step": 1078240
    },
    {
      "epoch": 1.7645961391174565,
      "grad_norm": 0.980921745300293,
      "learning_rate": 6.51344530616817e-06,
      "loss": 0.0217,
      "step": 1078260
    },
    {
      "epoch": 1.7646288695561099,
      "grad_norm": 0.14495849609375,
      "learning_rate": 6.5133794139546535e-06,
      "loss": 0.0172,
      "step": 1078280
    },
    {
      "epoch": 1.764661599994763,
      "grad_norm": 0.37410661578178406,
      "learning_rate": 6.513313521741136e-06,
      "loss": 0.0187,
      "step": 1078300
    },
    {
      "epoch": 1.7646943304334166,
      "grad_norm": 0.1606287807226181,
      "learning_rate": 6.513247629527619e-06,
      "loss": 0.0206,
      "step": 1078320
    },
    {
      "epoch": 1.7647270608720698,
      "grad_norm": 1.1505223512649536,
      "learning_rate": 6.5131817373141026e-06,
      "loss": 0.0247,
      "step": 1078340
    },
    {
      "epoch": 1.7647597913107231,
      "grad_norm": 0.417785108089447,
      "learning_rate": 6.513115845100585e-06,
      "loss": 0.0207,
      "step": 1078360
    },
    {
      "epoch": 1.7647925217493765,
      "grad_norm": 0.6621679067611694,
      "learning_rate": 6.513049952887068e-06,
      "loss": 0.0195,
      "step": 1078380
    },
    {
      "epoch": 1.7648252521880297,
      "grad_norm": 1.0408868789672852,
      "learning_rate": 6.512984060673551e-06,
      "loss": 0.0128,
      "step": 1078400
    },
    {
      "epoch": 1.7648579826266833,
      "grad_norm": 0.7268038988113403,
      "learning_rate": 6.512918168460034e-06,
      "loss": 0.0163,
      "step": 1078420
    },
    {
      "epoch": 1.7648907130653364,
      "grad_norm": 0.32456496357917786,
      "learning_rate": 6.512852276246516e-06,
      "loss": 0.0199,
      "step": 1078440
    },
    {
      "epoch": 1.76492344350399,
      "grad_norm": 0.7305017113685608,
      "learning_rate": 6.512786384033e-06,
      "loss": 0.0186,
      "step": 1078460
    },
    {
      "epoch": 1.7649561739426431,
      "grad_norm": 0.8368313908576965,
      "learning_rate": 6.512720491819482e-06,
      "loss": 0.0289,
      "step": 1078480
    },
    {
      "epoch": 1.7649889043812965,
      "grad_norm": 0.7257175445556641,
      "learning_rate": 6.512654599605965e-06,
      "loss": 0.0222,
      "step": 1078500
    },
    {
      "epoch": 1.76502163481995,
      "grad_norm": 1.4071625471115112,
      "learning_rate": 6.512588707392447e-06,
      "loss": 0.0125,
      "step": 1078520
    },
    {
      "epoch": 1.765054365258603,
      "grad_norm": 0.3481195271015167,
      "learning_rate": 6.512522815178931e-06,
      "loss": 0.0111,
      "step": 1078540
    },
    {
      "epoch": 1.7650870956972566,
      "grad_norm": 0.20324723422527313,
      "learning_rate": 6.5124569229654136e-06,
      "loss": 0.0159,
      "step": 1078560
    },
    {
      "epoch": 1.7651198261359098,
      "grad_norm": 0.5690408945083618,
      "learning_rate": 6.512391030751896e-06,
      "loss": 0.0234,
      "step": 1078580
    },
    {
      "epoch": 1.7651525565745632,
      "grad_norm": 0.34327012300491333,
      "learning_rate": 6.512325138538379e-06,
      "loss": 0.02,
      "step": 1078600
    },
    {
      "epoch": 1.7651852870132165,
      "grad_norm": 0.7034321427345276,
      "learning_rate": 6.512259246324863e-06,
      "loss": 0.0222,
      "step": 1078620
    },
    {
      "epoch": 1.76521801745187,
      "grad_norm": 0.47415557503700256,
      "learning_rate": 6.5121933541113445e-06,
      "loss": 0.0179,
      "step": 1078640
    },
    {
      "epoch": 1.7652507478905233,
      "grad_norm": 0.9102247357368469,
      "learning_rate": 6.512127461897828e-06,
      "loss": 0.0217,
      "step": 1078660
    },
    {
      "epoch": 1.7652834783291764,
      "grad_norm": 0.12295923382043839,
      "learning_rate": 6.512061569684312e-06,
      "loss": 0.0144,
      "step": 1078680
    },
    {
      "epoch": 1.76531620876783,
      "grad_norm": 0.49794626235961914,
      "learning_rate": 6.511995677470794e-06,
      "loss": 0.0136,
      "step": 1078700
    },
    {
      "epoch": 1.7653489392064832,
      "grad_norm": 1.1969938278198242,
      "learning_rate": 6.511929785257277e-06,
      "loss": 0.0204,
      "step": 1078720
    },
    {
      "epoch": 1.7653816696451365,
      "grad_norm": 1.441739797592163,
      "learning_rate": 6.511863893043759e-06,
      "loss": 0.0254,
      "step": 1078740
    },
    {
      "epoch": 1.76541440008379,
      "grad_norm": 1.2994201183319092,
      "learning_rate": 6.511798000830243e-06,
      "loss": 0.0149,
      "step": 1078760
    },
    {
      "epoch": 1.7654471305224433,
      "grad_norm": 0.5192785263061523,
      "learning_rate": 6.511732108616725e-06,
      "loss": 0.0155,
      "step": 1078780
    },
    {
      "epoch": 1.7654798609610967,
      "grad_norm": 0.13396763801574707,
      "learning_rate": 6.511666216403208e-06,
      "loss": 0.0218,
      "step": 1078800
    },
    {
      "epoch": 1.7655125913997498,
      "grad_norm": 0.3665345311164856,
      "learning_rate": 6.511600324189691e-06,
      "loss": 0.0208,
      "step": 1078820
    },
    {
      "epoch": 1.7655453218384034,
      "grad_norm": 0.18947383761405945,
      "learning_rate": 6.5115344319761745e-06,
      "loss": 0.0236,
      "step": 1078840
    },
    {
      "epoch": 1.7655780522770566,
      "grad_norm": 0.11619818210601807,
      "learning_rate": 6.511468539762656e-06,
      "loss": 0.0116,
      "step": 1078860
    },
    {
      "epoch": 1.76561078271571,
      "grad_norm": 0.7092569470405579,
      "learning_rate": 6.51140264754914e-06,
      "loss": 0.022,
      "step": 1078880
    },
    {
      "epoch": 1.7656435131543633,
      "grad_norm": 0.6992318630218506,
      "learning_rate": 6.511336755335622e-06,
      "loss": 0.021,
      "step": 1078900
    },
    {
      "epoch": 1.7656762435930167,
      "grad_norm": 1.1784281730651855,
      "learning_rate": 6.511270863122105e-06,
      "loss": 0.0171,
      "step": 1078920
    },
    {
      "epoch": 1.76570897403167,
      "grad_norm": 0.6112678647041321,
      "learning_rate": 6.511204970908588e-06,
      "loss": 0.0156,
      "step": 1078940
    },
    {
      "epoch": 1.7657417044703232,
      "grad_norm": 0.3240300416946411,
      "learning_rate": 6.511139078695071e-06,
      "loss": 0.0135,
      "step": 1078960
    },
    {
      "epoch": 1.7657744349089768,
      "grad_norm": 0.10708653181791306,
      "learning_rate": 6.511073186481554e-06,
      "loss": 0.0231,
      "step": 1078980
    },
    {
      "epoch": 1.76580716534763,
      "grad_norm": 0.7168908715248108,
      "learning_rate": 6.511007294268037e-06,
      "loss": 0.0233,
      "step": 1079000
    },
    {
      "epoch": 1.7658398957862833,
      "grad_norm": 0.4341805875301361,
      "learning_rate": 6.51094140205452e-06,
      "loss": 0.0212,
      "step": 1079020
    },
    {
      "epoch": 1.7658726262249367,
      "grad_norm": 0.6600117087364197,
      "learning_rate": 6.510875509841003e-06,
      "loss": 0.017,
      "step": 1079040
    },
    {
      "epoch": 1.76590535666359,
      "grad_norm": 0.16008184850215912,
      "learning_rate": 6.510809617627486e-06,
      "loss": 0.018,
      "step": 1079060
    },
    {
      "epoch": 1.7659380871022434,
      "grad_norm": 0.588275134563446,
      "learning_rate": 6.510743725413968e-06,
      "loss": 0.0165,
      "step": 1079080
    },
    {
      "epoch": 1.7659708175408966,
      "grad_norm": 0.13657720386981964,
      "learning_rate": 6.510677833200452e-06,
      "loss": 0.0229,
      "step": 1079100
    },
    {
      "epoch": 1.7660035479795502,
      "grad_norm": 0.7999234795570374,
      "learning_rate": 6.510611940986934e-06,
      "loss": 0.0191,
      "step": 1079120
    },
    {
      "epoch": 1.7660362784182033,
      "grad_norm": 0.22527606785297394,
      "learning_rate": 6.510546048773417e-06,
      "loss": 0.0098,
      "step": 1079140
    },
    {
      "epoch": 1.7660690088568567,
      "grad_norm": 0.3329204022884369,
      "learning_rate": 6.5104801565599e-06,
      "loss": 0.0202,
      "step": 1079160
    },
    {
      "epoch": 1.76610173929551,
      "grad_norm": 0.9514659643173218,
      "learning_rate": 6.510414264346383e-06,
      "loss": 0.0262,
      "step": 1079180
    },
    {
      "epoch": 1.7661344697341632,
      "grad_norm": 0.6653234362602234,
      "learning_rate": 6.5103483721328655e-06,
      "loss": 0.0225,
      "step": 1079200
    },
    {
      "epoch": 1.7661672001728168,
      "grad_norm": 0.47145596146583557,
      "learning_rate": 6.510282479919349e-06,
      "loss": 0.0159,
      "step": 1079220
    },
    {
      "epoch": 1.76619993061147,
      "grad_norm": 0.3783097565174103,
      "learning_rate": 6.510216587705831e-06,
      "loss": 0.0248,
      "step": 1079240
    },
    {
      "epoch": 1.7662326610501236,
      "grad_norm": 0.3890581429004669,
      "learning_rate": 6.5101506954923145e-06,
      "loss": 0.0198,
      "step": 1079260
    },
    {
      "epoch": 1.7662653914887767,
      "grad_norm": 1.519228219985962,
      "learning_rate": 6.510084803278796e-06,
      "loss": 0.0173,
      "step": 1079280
    },
    {
      "epoch": 1.76629812192743,
      "grad_norm": 1.0553841590881348,
      "learning_rate": 6.51001891106528e-06,
      "loss": 0.0172,
      "step": 1079300
    },
    {
      "epoch": 1.7663308523660834,
      "grad_norm": 0.4319688379764557,
      "learning_rate": 6.509953018851763e-06,
      "loss": 0.0237,
      "step": 1079320
    },
    {
      "epoch": 1.7663635828047366,
      "grad_norm": 0.2976842224597931,
      "learning_rate": 6.5098871266382455e-06,
      "loss": 0.0139,
      "step": 1079340
    },
    {
      "epoch": 1.7663963132433902,
      "grad_norm": 0.5891386270523071,
      "learning_rate": 6.509821234424728e-06,
      "loss": 0.0178,
      "step": 1079360
    },
    {
      "epoch": 1.7664290436820433,
      "grad_norm": 0.19462445378303528,
      "learning_rate": 6.509755342211212e-06,
      "loss": 0.0259,
      "step": 1079380
    },
    {
      "epoch": 1.7664617741206967,
      "grad_norm": 2.6676907539367676,
      "learning_rate": 6.5096894499976946e-06,
      "loss": 0.0202,
      "step": 1079400
    },
    {
      "epoch": 1.76649450455935,
      "grad_norm": 0.30159229040145874,
      "learning_rate": 6.509623557784177e-06,
      "loss": 0.0156,
      "step": 1079420
    },
    {
      "epoch": 1.7665272349980035,
      "grad_norm": 0.12388470023870468,
      "learning_rate": 6.509557665570661e-06,
      "loss": 0.0204,
      "step": 1079440
    },
    {
      "epoch": 1.7665599654366568,
      "grad_norm": 0.1947013884782791,
      "learning_rate": 6.509491773357143e-06,
      "loss": 0.0183,
      "step": 1079460
    },
    {
      "epoch": 1.76659269587531,
      "grad_norm": 1.3313082456588745,
      "learning_rate": 6.509425881143626e-06,
      "loss": 0.0233,
      "step": 1079480
    },
    {
      "epoch": 1.7666254263139636,
      "grad_norm": 1.831875205039978,
      "learning_rate": 6.509359988930108e-06,
      "loss": 0.0206,
      "step": 1079500
    },
    {
      "epoch": 1.7666581567526167,
      "grad_norm": 0.2541351318359375,
      "learning_rate": 6.509294096716592e-06,
      "loss": 0.019,
      "step": 1079520
    },
    {
      "epoch": 1.76669088719127,
      "grad_norm": 1.0736753940582275,
      "learning_rate": 6.509228204503074e-06,
      "loss": 0.0159,
      "step": 1079540
    },
    {
      "epoch": 1.7667236176299235,
      "grad_norm": 0.3555545210838318,
      "learning_rate": 6.509162312289557e-06,
      "loss": 0.0268,
      "step": 1079560
    },
    {
      "epoch": 1.7667563480685768,
      "grad_norm": 0.32792720198631287,
      "learning_rate": 6.50909642007604e-06,
      "loss": 0.0191,
      "step": 1079580
    },
    {
      "epoch": 1.7667890785072302,
      "grad_norm": 1.3400418758392334,
      "learning_rate": 6.509030527862523e-06,
      "loss": 0.0201,
      "step": 1079600
    },
    {
      "epoch": 1.7668218089458834,
      "grad_norm": 0.12670685350894928,
      "learning_rate": 6.5089646356490055e-06,
      "loss": 0.017,
      "step": 1079620
    },
    {
      "epoch": 1.766854539384537,
      "grad_norm": 0.5451666712760925,
      "learning_rate": 6.508898743435489e-06,
      "loss": 0.014,
      "step": 1079640
    },
    {
      "epoch": 1.76688726982319,
      "grad_norm": 0.6594448685646057,
      "learning_rate": 6.508832851221971e-06,
      "loss": 0.0165,
      "step": 1079660
    },
    {
      "epoch": 1.7669200002618435,
      "grad_norm": 0.10360903292894363,
      "learning_rate": 6.508766959008455e-06,
      "loss": 0.0192,
      "step": 1079680
    },
    {
      "epoch": 1.7669527307004969,
      "grad_norm": 0.2844579219818115,
      "learning_rate": 6.5087010667949365e-06,
      "loss": 0.0168,
      "step": 1079700
    },
    {
      "epoch": 1.7669854611391502,
      "grad_norm": 0.36347198486328125,
      "learning_rate": 6.50863517458142e-06,
      "loss": 0.0232,
      "step": 1079720
    },
    {
      "epoch": 1.7670181915778036,
      "grad_norm": 0.18841707706451416,
      "learning_rate": 6.508569282367904e-06,
      "loss": 0.0173,
      "step": 1079740
    },
    {
      "epoch": 1.7670509220164567,
      "grad_norm": 0.6894353628158569,
      "learning_rate": 6.5085033901543856e-06,
      "loss": 0.0143,
      "step": 1079760
    },
    {
      "epoch": 1.7670836524551103,
      "grad_norm": 0.3367556929588318,
      "learning_rate": 6.508437497940869e-06,
      "loss": 0.0216,
      "step": 1079780
    },
    {
      "epoch": 1.7671163828937635,
      "grad_norm": 0.4822196960449219,
      "learning_rate": 6.508371605727352e-06,
      "loss": 0.0155,
      "step": 1079800
    },
    {
      "epoch": 1.7671491133324169,
      "grad_norm": 0.19295960664749146,
      "learning_rate": 6.508305713513835e-06,
      "loss": 0.0129,
      "step": 1079820
    },
    {
      "epoch": 1.7671818437710702,
      "grad_norm": 0.32570889592170715,
      "learning_rate": 6.508239821300317e-06,
      "loss": 0.0225,
      "step": 1079840
    },
    {
      "epoch": 1.7672145742097236,
      "grad_norm": 2.7132935523986816,
      "learning_rate": 6.508173929086801e-06,
      "loss": 0.0176,
      "step": 1079860
    },
    {
      "epoch": 1.767247304648377,
      "grad_norm": 0.2525525689125061,
      "learning_rate": 6.508108036873283e-06,
      "loss": 0.0204,
      "step": 1079880
    },
    {
      "epoch": 1.7672800350870301,
      "grad_norm": 0.3975406885147095,
      "learning_rate": 6.5080421446597664e-06,
      "loss": 0.0204,
      "step": 1079900
    },
    {
      "epoch": 1.7673127655256837,
      "grad_norm": 0.14252430200576782,
      "learning_rate": 6.507976252446248e-06,
      "loss": 0.0217,
      "step": 1079920
    },
    {
      "epoch": 1.7673454959643369,
      "grad_norm": 0.12951426208019257,
      "learning_rate": 6.507910360232732e-06,
      "loss": 0.012,
      "step": 1079940
    },
    {
      "epoch": 1.7673782264029902,
      "grad_norm": 1.251963496208191,
      "learning_rate": 6.507844468019215e-06,
      "loss": 0.0178,
      "step": 1079960
    },
    {
      "epoch": 1.7674109568416436,
      "grad_norm": 0.9074038863182068,
      "learning_rate": 6.507778575805697e-06,
      "loss": 0.0221,
      "step": 1079980
    },
    {
      "epoch": 1.7674436872802968,
      "grad_norm": 0.5154322981834412,
      "learning_rate": 6.50771268359218e-06,
      "loss": 0.0173,
      "step": 1080000
    },
    {
      "epoch": 1.7674764177189504,
      "grad_norm": 1.0936774015426636,
      "learning_rate": 6.507646791378664e-06,
      "loss": 0.0173,
      "step": 1080020
    },
    {
      "epoch": 1.7675091481576035,
      "grad_norm": 0.3020329177379608,
      "learning_rate": 6.507580899165146e-06,
      "loss": 0.0237,
      "step": 1080040
    },
    {
      "epoch": 1.767541878596257,
      "grad_norm": 0.5208077430725098,
      "learning_rate": 6.507515006951629e-06,
      "loss": 0.0227,
      "step": 1080060
    },
    {
      "epoch": 1.7675746090349103,
      "grad_norm": 0.08753327280282974,
      "learning_rate": 6.507449114738113e-06,
      "loss": 0.0287,
      "step": 1080080
    },
    {
      "epoch": 1.7676073394735636,
      "grad_norm": 1.6259928941726685,
      "learning_rate": 6.507383222524595e-06,
      "loss": 0.0206,
      "step": 1080100
    },
    {
      "epoch": 1.767640069912217,
      "grad_norm": 0.6576216220855713,
      "learning_rate": 6.507317330311078e-06,
      "loss": 0.0206,
      "step": 1080120
    },
    {
      "epoch": 1.7676728003508702,
      "grad_norm": 1.8889782428741455,
      "learning_rate": 6.50725143809756e-06,
      "loss": 0.0239,
      "step": 1080140
    },
    {
      "epoch": 1.7677055307895237,
      "grad_norm": 0.6991997361183167,
      "learning_rate": 6.507185545884044e-06,
      "loss": 0.0247,
      "step": 1080160
    },
    {
      "epoch": 1.767738261228177,
      "grad_norm": 0.508293867111206,
      "learning_rate": 6.5071196536705265e-06,
      "loss": 0.0201,
      "step": 1080180
    },
    {
      "epoch": 1.7677709916668303,
      "grad_norm": 0.38152045011520386,
      "learning_rate": 6.507053761457009e-06,
      "loss": 0.0147,
      "step": 1080200
    },
    {
      "epoch": 1.7678037221054836,
      "grad_norm": 0.39263221621513367,
      "learning_rate": 6.506987869243492e-06,
      "loss": 0.0214,
      "step": 1080220
    },
    {
      "epoch": 1.767836452544137,
      "grad_norm": 0.24871762096881866,
      "learning_rate": 6.5069219770299756e-06,
      "loss": 0.0139,
      "step": 1080240
    },
    {
      "epoch": 1.7678691829827904,
      "grad_norm": 0.3686526119709015,
      "learning_rate": 6.5068560848164574e-06,
      "loss": 0.0159,
      "step": 1080260
    },
    {
      "epoch": 1.7679019134214435,
      "grad_norm": 1.212199330329895,
      "learning_rate": 6.506790192602941e-06,
      "loss": 0.0165,
      "step": 1080280
    },
    {
      "epoch": 1.7679346438600971,
      "grad_norm": 0.745207667350769,
      "learning_rate": 6.506724300389423e-06,
      "loss": 0.0174,
      "step": 1080300
    },
    {
      "epoch": 1.7679673742987503,
      "grad_norm": 0.47583767771720886,
      "learning_rate": 6.5066584081759065e-06,
      "loss": 0.0205,
      "step": 1080320
    },
    {
      "epoch": 1.7680001047374037,
      "grad_norm": 0.8932846188545227,
      "learning_rate": 6.506592515962389e-06,
      "loss": 0.0175,
      "step": 1080340
    },
    {
      "epoch": 1.768032835176057,
      "grad_norm": 0.7074446082115173,
      "learning_rate": 6.506526623748872e-06,
      "loss": 0.0221,
      "step": 1080360
    },
    {
      "epoch": 1.7680655656147104,
      "grad_norm": 0.8768642544746399,
      "learning_rate": 6.506460731535355e-06,
      "loss": 0.0241,
      "step": 1080380
    },
    {
      "epoch": 1.7680982960533638,
      "grad_norm": 0.7353348135948181,
      "learning_rate": 6.506394839321838e-06,
      "loss": 0.022,
      "step": 1080400
    },
    {
      "epoch": 1.768131026492017,
      "grad_norm": 0.3349688649177551,
      "learning_rate": 6.50632894710832e-06,
      "loss": 0.0136,
      "step": 1080420
    },
    {
      "epoch": 1.7681637569306705,
      "grad_norm": 0.2596897482872009,
      "learning_rate": 6.506263054894804e-06,
      "loss": 0.0174,
      "step": 1080440
    },
    {
      "epoch": 1.7681964873693237,
      "grad_norm": 1.3974958658218384,
      "learning_rate": 6.506197162681287e-06,
      "loss": 0.0216,
      "step": 1080460
    },
    {
      "epoch": 1.768229217807977,
      "grad_norm": 1.4851081371307373,
      "learning_rate": 6.506131270467769e-06,
      "loss": 0.02,
      "step": 1080480
    },
    {
      "epoch": 1.7682619482466304,
      "grad_norm": 0.05765174329280853,
      "learning_rate": 6.506065378254253e-06,
      "loss": 0.0243,
      "step": 1080500
    },
    {
      "epoch": 1.7682946786852838,
      "grad_norm": 0.33677196502685547,
      "learning_rate": 6.505999486040735e-06,
      "loss": 0.0217,
      "step": 1080520
    },
    {
      "epoch": 1.7683274091239372,
      "grad_norm": 0.3349965810775757,
      "learning_rate": 6.505933593827218e-06,
      "loss": 0.0162,
      "step": 1080540
    },
    {
      "epoch": 1.7683601395625903,
      "grad_norm": 0.16789664328098297,
      "learning_rate": 6.5058677016137e-06,
      "loss": 0.0274,
      "step": 1080560
    },
    {
      "epoch": 1.768392870001244,
      "grad_norm": 0.24120886623859406,
      "learning_rate": 6.505801809400184e-06,
      "loss": 0.0227,
      "step": 1080580
    },
    {
      "epoch": 1.768425600439897,
      "grad_norm": 0.31349310278892517,
      "learning_rate": 6.5057359171866666e-06,
      "loss": 0.0216,
      "step": 1080600
    },
    {
      "epoch": 1.7684583308785504,
      "grad_norm": 0.2999330759048462,
      "learning_rate": 6.505670024973149e-06,
      "loss": 0.0121,
      "step": 1080620
    },
    {
      "epoch": 1.7684910613172038,
      "grad_norm": 0.2636606991291046,
      "learning_rate": 6.505604132759632e-06,
      "loss": 0.0169,
      "step": 1080640
    },
    {
      "epoch": 1.7685237917558572,
      "grad_norm": 0.6624388694763184,
      "learning_rate": 6.505538240546116e-06,
      "loss": 0.0239,
      "step": 1080660
    },
    {
      "epoch": 1.7685565221945105,
      "grad_norm": 0.2531443238258362,
      "learning_rate": 6.5054723483325975e-06,
      "loss": 0.0145,
      "step": 1080680
    },
    {
      "epoch": 1.7685892526331637,
      "grad_norm": 1.4364209175109863,
      "learning_rate": 6.505406456119081e-06,
      "loss": 0.0258,
      "step": 1080700
    },
    {
      "epoch": 1.7686219830718173,
      "grad_norm": 0.7023689150810242,
      "learning_rate": 6.505340563905563e-06,
      "loss": 0.0302,
      "step": 1080720
    },
    {
      "epoch": 1.7686547135104704,
      "grad_norm": 0.31562814116477966,
      "learning_rate": 6.505274671692047e-06,
      "loss": 0.0207,
      "step": 1080740
    },
    {
      "epoch": 1.7686874439491238,
      "grad_norm": 1.9021397829055786,
      "learning_rate": 6.505208779478529e-06,
      "loss": 0.0272,
      "step": 1080760
    },
    {
      "epoch": 1.7687201743877772,
      "grad_norm": 1.2045589685440063,
      "learning_rate": 6.505142887265012e-06,
      "loss": 0.0214,
      "step": 1080780
    },
    {
      "epoch": 1.7687529048264303,
      "grad_norm": 0.4570190906524658,
      "learning_rate": 6.505076995051496e-06,
      "loss": 0.024,
      "step": 1080800
    },
    {
      "epoch": 1.768785635265084,
      "grad_norm": 0.3517965078353882,
      "learning_rate": 6.505011102837978e-06,
      "loss": 0.021,
      "step": 1080820
    },
    {
      "epoch": 1.768818365703737,
      "grad_norm": 0.42127445340156555,
      "learning_rate": 6.504945210624461e-06,
      "loss": 0.0156,
      "step": 1080840
    },
    {
      "epoch": 1.7688510961423907,
      "grad_norm": 1.3269984722137451,
      "learning_rate": 6.504879318410944e-06,
      "loss": 0.02,
      "step": 1080860
    },
    {
      "epoch": 1.7688838265810438,
      "grad_norm": 0.3596147894859314,
      "learning_rate": 6.5048134261974275e-06,
      "loss": 0.0236,
      "step": 1080880
    },
    {
      "epoch": 1.7689165570196972,
      "grad_norm": 0.4521239399909973,
      "learning_rate": 6.504747533983909e-06,
      "loss": 0.0159,
      "step": 1080900
    },
    {
      "epoch": 1.7689492874583506,
      "grad_norm": 1.7874292135238647,
      "learning_rate": 6.504681641770393e-06,
      "loss": 0.0228,
      "step": 1080920
    },
    {
      "epoch": 1.7689820178970037,
      "grad_norm": 0.41983848810195923,
      "learning_rate": 6.504615749556875e-06,
      "loss": 0.0169,
      "step": 1080940
    },
    {
      "epoch": 1.7690147483356573,
      "grad_norm": 0.6221924424171448,
      "learning_rate": 6.504549857343358e-06,
      "loss": 0.0183,
      "step": 1080960
    },
    {
      "epoch": 1.7690474787743105,
      "grad_norm": 1.0519970655441284,
      "learning_rate": 6.504483965129841e-06,
      "loss": 0.0291,
      "step": 1080980
    },
    {
      "epoch": 1.7690802092129638,
      "grad_norm": 1.6934503316879272,
      "learning_rate": 6.504418072916324e-06,
      "loss": 0.0296,
      "step": 1081000
    },
    {
      "epoch": 1.7691129396516172,
      "grad_norm": 0.6704292297363281,
      "learning_rate": 6.504352180702807e-06,
      "loss": 0.0252,
      "step": 1081020
    },
    {
      "epoch": 1.7691456700902706,
      "grad_norm": 1.020216941833496,
      "learning_rate": 6.50428628848929e-06,
      "loss": 0.02,
      "step": 1081040
    },
    {
      "epoch": 1.769178400528924,
      "grad_norm": 0.6739206910133362,
      "learning_rate": 6.504220396275772e-06,
      "loss": 0.0237,
      "step": 1081060
    },
    {
      "epoch": 1.769211130967577,
      "grad_norm": 0.883327841758728,
      "learning_rate": 6.504154504062256e-06,
      "loss": 0.0175,
      "step": 1081080
    },
    {
      "epoch": 1.7692438614062307,
      "grad_norm": 0.5825978517532349,
      "learning_rate": 6.504088611848738e-06,
      "loss": 0.0234,
      "step": 1081100
    },
    {
      "epoch": 1.7692765918448838,
      "grad_norm": 0.13319748640060425,
      "learning_rate": 6.504022719635221e-06,
      "loss": 0.0117,
      "step": 1081120
    },
    {
      "epoch": 1.7693093222835372,
      "grad_norm": 0.11296092718839645,
      "learning_rate": 6.503956827421705e-06,
      "loss": 0.0164,
      "step": 1081140
    },
    {
      "epoch": 1.7693420527221906,
      "grad_norm": 0.3738129436969757,
      "learning_rate": 6.503890935208187e-06,
      "loss": 0.0186,
      "step": 1081160
    },
    {
      "epoch": 1.769374783160844,
      "grad_norm": 0.2607510983943939,
      "learning_rate": 6.50382504299467e-06,
      "loss": 0.0226,
      "step": 1081180
    },
    {
      "epoch": 1.7694075135994973,
      "grad_norm": 0.4055878818035126,
      "learning_rate": 6.503759150781153e-06,
      "loss": 0.0182,
      "step": 1081200
    },
    {
      "epoch": 1.7694402440381505,
      "grad_norm": 0.41167283058166504,
      "learning_rate": 6.503693258567636e-06,
      "loss": 0.0164,
      "step": 1081220
    },
    {
      "epoch": 1.769472974476804,
      "grad_norm": 0.17599205672740936,
      "learning_rate": 6.5036273663541185e-06,
      "loss": 0.0143,
      "step": 1081240
    },
    {
      "epoch": 1.7695057049154572,
      "grad_norm": 0.6372926235198975,
      "learning_rate": 6.503561474140602e-06,
      "loss": 0.0161,
      "step": 1081260
    },
    {
      "epoch": 1.7695384353541106,
      "grad_norm": 0.2579270899295807,
      "learning_rate": 6.503495581927084e-06,
      "loss": 0.0203,
      "step": 1081280
    },
    {
      "epoch": 1.769571165792764,
      "grad_norm": 1.0882513523101807,
      "learning_rate": 6.5034296897135675e-06,
      "loss": 0.0144,
      "step": 1081300
    },
    {
      "epoch": 1.7696038962314173,
      "grad_norm": 1.0609004497528076,
      "learning_rate": 6.5033637975000494e-06,
      "loss": 0.0185,
      "step": 1081320
    },
    {
      "epoch": 1.7696366266700707,
      "grad_norm": 0.4232543408870697,
      "learning_rate": 6.503297905286533e-06,
      "loss": 0.0165,
      "step": 1081340
    },
    {
      "epoch": 1.7696693571087239,
      "grad_norm": 0.5874645113945007,
      "learning_rate": 6.503232013073015e-06,
      "loss": 0.0175,
      "step": 1081360
    },
    {
      "epoch": 1.7697020875473775,
      "grad_norm": 0.9391828179359436,
      "learning_rate": 6.5031661208594985e-06,
      "loss": 0.0121,
      "step": 1081380
    },
    {
      "epoch": 1.7697348179860306,
      "grad_norm": 0.525556743144989,
      "learning_rate": 6.503100228645981e-06,
      "loss": 0.0243,
      "step": 1081400
    },
    {
      "epoch": 1.769767548424684,
      "grad_norm": 1.0348824262619019,
      "learning_rate": 6.503034336432465e-06,
      "loss": 0.018,
      "step": 1081420
    },
    {
      "epoch": 1.7698002788633374,
      "grad_norm": 0.5313556790351868,
      "learning_rate": 6.502968444218947e-06,
      "loss": 0.0128,
      "step": 1081440
    },
    {
      "epoch": 1.7698330093019905,
      "grad_norm": 0.268952876329422,
      "learning_rate": 6.50290255200543e-06,
      "loss": 0.0202,
      "step": 1081460
    },
    {
      "epoch": 1.769865739740644,
      "grad_norm": 0.8199341297149658,
      "learning_rate": 6.502836659791914e-06,
      "loss": 0.0152,
      "step": 1081480
    },
    {
      "epoch": 1.7698984701792972,
      "grad_norm": 0.11843821406364441,
      "learning_rate": 6.502770767578396e-06,
      "loss": 0.0158,
      "step": 1081500
    },
    {
      "epoch": 1.7699312006179508,
      "grad_norm": 0.5573596358299255,
      "learning_rate": 6.502704875364879e-06,
      "loss": 0.0221,
      "step": 1081520
    },
    {
      "epoch": 1.769963931056604,
      "grad_norm": 0.7902140021324158,
      "learning_rate": 6.502638983151361e-06,
      "loss": 0.0242,
      "step": 1081540
    },
    {
      "epoch": 1.7699966614952574,
      "grad_norm": 0.3099101781845093,
      "learning_rate": 6.502573090937845e-06,
      "loss": 0.0192,
      "step": 1081560
    },
    {
      "epoch": 1.7700293919339107,
      "grad_norm": 0.9143524169921875,
      "learning_rate": 6.502507198724327e-06,
      "loss": 0.0156,
      "step": 1081580
    },
    {
      "epoch": 1.7700621223725639,
      "grad_norm": 0.3508029282093048,
      "learning_rate": 6.50244130651081e-06,
      "loss": 0.0221,
      "step": 1081600
    },
    {
      "epoch": 1.7700948528112175,
      "grad_norm": 0.29946452379226685,
      "learning_rate": 6.502375414297293e-06,
      "loss": 0.0171,
      "step": 1081620
    },
    {
      "epoch": 1.7701275832498706,
      "grad_norm": 1.0357749462127686,
      "learning_rate": 6.502309522083776e-06,
      "loss": 0.017,
      "step": 1081640
    },
    {
      "epoch": 1.770160313688524,
      "grad_norm": 0.5772885084152222,
      "learning_rate": 6.5022436298702585e-06,
      "loss": 0.0185,
      "step": 1081660
    },
    {
      "epoch": 1.7701930441271774,
      "grad_norm": 0.6077800989151001,
      "learning_rate": 6.502177737656742e-06,
      "loss": 0.0153,
      "step": 1081680
    },
    {
      "epoch": 1.7702257745658307,
      "grad_norm": 0.12642641365528107,
      "learning_rate": 6.502111845443224e-06,
      "loss": 0.0179,
      "step": 1081700
    },
    {
      "epoch": 1.7702585050044841,
      "grad_norm": 0.4033983647823334,
      "learning_rate": 6.502045953229708e-06,
      "loss": 0.0229,
      "step": 1081720
    },
    {
      "epoch": 1.7702912354431373,
      "grad_norm": 0.6351619958877563,
      "learning_rate": 6.5019800610161895e-06,
      "loss": 0.0129,
      "step": 1081740
    },
    {
      "epoch": 1.7703239658817909,
      "grad_norm": 0.42443761229515076,
      "learning_rate": 6.501914168802673e-06,
      "loss": 0.0194,
      "step": 1081760
    },
    {
      "epoch": 1.770356696320444,
      "grad_norm": 0.4685647487640381,
      "learning_rate": 6.501848276589156e-06,
      "loss": 0.0164,
      "step": 1081780
    },
    {
      "epoch": 1.7703894267590974,
      "grad_norm": 2.0652267932891846,
      "learning_rate": 6.5017823843756386e-06,
      "loss": 0.0161,
      "step": 1081800
    },
    {
      "epoch": 1.7704221571977508,
      "grad_norm": 0.3087950050830841,
      "learning_rate": 6.501716492162121e-06,
      "loss": 0.0211,
      "step": 1081820
    },
    {
      "epoch": 1.7704548876364041,
      "grad_norm": 0.1486351191997528,
      "learning_rate": 6.501650599948605e-06,
      "loss": 0.0175,
      "step": 1081840
    },
    {
      "epoch": 1.7704876180750575,
      "grad_norm": 0.8164526224136353,
      "learning_rate": 6.501584707735088e-06,
      "loss": 0.0225,
      "step": 1081860
    },
    {
      "epoch": 1.7705203485137107,
      "grad_norm": 0.7296937108039856,
      "learning_rate": 6.50151881552157e-06,
      "loss": 0.0151,
      "step": 1081880
    },
    {
      "epoch": 1.7705530789523642,
      "grad_norm": 0.8966114521026611,
      "learning_rate": 6.501452923308054e-06,
      "loss": 0.0127,
      "step": 1081900
    },
    {
      "epoch": 1.7705858093910174,
      "grad_norm": 0.6707199215888977,
      "learning_rate": 6.501387031094536e-06,
      "loss": 0.0148,
      "step": 1081920
    },
    {
      "epoch": 1.7706185398296708,
      "grad_norm": 0.6736094951629639,
      "learning_rate": 6.5013211388810194e-06,
      "loss": 0.0272,
      "step": 1081940
    },
    {
      "epoch": 1.7706512702683241,
      "grad_norm": 0.34416934847831726,
      "learning_rate": 6.501255246667501e-06,
      "loss": 0.028,
      "step": 1081960
    },
    {
      "epoch": 1.7706840007069775,
      "grad_norm": 0.17209599912166595,
      "learning_rate": 6.501189354453985e-06,
      "loss": 0.0137,
      "step": 1081980
    },
    {
      "epoch": 1.7707167311456309,
      "grad_norm": 0.7975106835365295,
      "learning_rate": 6.501123462240468e-06,
      "loss": 0.0221,
      "step": 1082000
    },
    {
      "epoch": 1.770749461584284,
      "grad_norm": 0.38139671087265015,
      "learning_rate": 6.50105757002695e-06,
      "loss": 0.0221,
      "step": 1082020
    },
    {
      "epoch": 1.7707821920229376,
      "grad_norm": 0.1864285171031952,
      "learning_rate": 6.500991677813433e-06,
      "loss": 0.0182,
      "step": 1082040
    },
    {
      "epoch": 1.7708149224615908,
      "grad_norm": 0.1565096378326416,
      "learning_rate": 6.500925785599917e-06,
      "loss": 0.0245,
      "step": 1082060
    },
    {
      "epoch": 1.7708476529002442,
      "grad_norm": 0.7363520860671997,
      "learning_rate": 6.500859893386399e-06,
      "loss": 0.0216,
      "step": 1082080
    },
    {
      "epoch": 1.7708803833388975,
      "grad_norm": 0.3906809389591217,
      "learning_rate": 6.500794001172882e-06,
      "loss": 0.02,
      "step": 1082100
    },
    {
      "epoch": 1.770913113777551,
      "grad_norm": 1.9497984647750854,
      "learning_rate": 6.500728108959364e-06,
      "loss": 0.0299,
      "step": 1082120
    },
    {
      "epoch": 1.7709458442162043,
      "grad_norm": 0.6883776187896729,
      "learning_rate": 6.500662216745848e-06,
      "loss": 0.0203,
      "step": 1082140
    },
    {
      "epoch": 1.7709785746548574,
      "grad_norm": 0.7016273736953735,
      "learning_rate": 6.5005963245323304e-06,
      "loss": 0.0214,
      "step": 1082160
    },
    {
      "epoch": 1.771011305093511,
      "grad_norm": 1.2385010719299316,
      "learning_rate": 6.500530432318813e-06,
      "loss": 0.0171,
      "step": 1082180
    },
    {
      "epoch": 1.7710440355321642,
      "grad_norm": 0.3468243181705475,
      "learning_rate": 6.500464540105297e-06,
      "loss": 0.0248,
      "step": 1082200
    },
    {
      "epoch": 1.7710767659708175,
      "grad_norm": 0.6125131845474243,
      "learning_rate": 6.5003986478917795e-06,
      "loss": 0.0147,
      "step": 1082220
    },
    {
      "epoch": 1.771109496409471,
      "grad_norm": 0.5328645706176758,
      "learning_rate": 6.500332755678262e-06,
      "loss": 0.0234,
      "step": 1082240
    },
    {
      "epoch": 1.771142226848124,
      "grad_norm": 0.48626670241355896,
      "learning_rate": 6.500266863464745e-06,
      "loss": 0.018,
      "step": 1082260
    },
    {
      "epoch": 1.7711749572867777,
      "grad_norm": 0.9148176908493042,
      "learning_rate": 6.5002009712512286e-06,
      "loss": 0.0222,
      "step": 1082280
    },
    {
      "epoch": 1.7712076877254308,
      "grad_norm": 0.3453483283519745,
      "learning_rate": 6.5001350790377104e-06,
      "loss": 0.0187,
      "step": 1082300
    },
    {
      "epoch": 1.7712404181640844,
      "grad_norm": 0.41036033630371094,
      "learning_rate": 6.500069186824194e-06,
      "loss": 0.0173,
      "step": 1082320
    },
    {
      "epoch": 1.7712731486027375,
      "grad_norm": 0.20068304240703583,
      "learning_rate": 6.500003294610676e-06,
      "loss": 0.0185,
      "step": 1082340
    },
    {
      "epoch": 1.771305879041391,
      "grad_norm": 0.3042975962162018,
      "learning_rate": 6.4999374023971595e-06,
      "loss": 0.0256,
      "step": 1082360
    },
    {
      "epoch": 1.7713386094800443,
      "grad_norm": 0.25836730003356934,
      "learning_rate": 6.499871510183641e-06,
      "loss": 0.0159,
      "step": 1082380
    },
    {
      "epoch": 1.7713713399186974,
      "grad_norm": 1.381649136543274,
      "learning_rate": 6.499805617970125e-06,
      "loss": 0.024,
      "step": 1082400
    },
    {
      "epoch": 1.771404070357351,
      "grad_norm": 0.8259224891662598,
      "learning_rate": 6.499739725756608e-06,
      "loss": 0.0189,
      "step": 1082420
    },
    {
      "epoch": 1.7714368007960042,
      "grad_norm": 0.7871452569961548,
      "learning_rate": 6.4996738335430905e-06,
      "loss": 0.0228,
      "step": 1082440
    },
    {
      "epoch": 1.7714695312346576,
      "grad_norm": 0.943943977355957,
      "learning_rate": 6.499607941329573e-06,
      "loss": 0.0243,
      "step": 1082460
    },
    {
      "epoch": 1.771502261673311,
      "grad_norm": 0.4417667090892792,
      "learning_rate": 6.499542049116057e-06,
      "loss": 0.0205,
      "step": 1082480
    },
    {
      "epoch": 1.7715349921119643,
      "grad_norm": 0.5754361748695374,
      "learning_rate": 6.499476156902539e-06,
      "loss": 0.0141,
      "step": 1082500
    },
    {
      "epoch": 1.7715677225506177,
      "grad_norm": 0.4613533616065979,
      "learning_rate": 6.499410264689022e-06,
      "loss": 0.0214,
      "step": 1082520
    },
    {
      "epoch": 1.7716004529892708,
      "grad_norm": 0.16677819192409515,
      "learning_rate": 6.499344372475506e-06,
      "loss": 0.0183,
      "step": 1082540
    },
    {
      "epoch": 1.7716331834279244,
      "grad_norm": 0.1707833707332611,
      "learning_rate": 6.499278480261988e-06,
      "loss": 0.0106,
      "step": 1082560
    },
    {
      "epoch": 1.7716659138665776,
      "grad_norm": 0.9187225699424744,
      "learning_rate": 6.499212588048471e-06,
      "loss": 0.0222,
      "step": 1082580
    },
    {
      "epoch": 1.771698644305231,
      "grad_norm": 0.15010204911231995,
      "learning_rate": 6.499146695834953e-06,
      "loss": 0.0137,
      "step": 1082600
    },
    {
      "epoch": 1.7717313747438843,
      "grad_norm": 0.8053582906723022,
      "learning_rate": 6.499080803621437e-06,
      "loss": 0.0209,
      "step": 1082620
    },
    {
      "epoch": 1.7717641051825377,
      "grad_norm": 0.7112513184547424,
      "learning_rate": 6.4990149114079196e-06,
      "loss": 0.0203,
      "step": 1082640
    },
    {
      "epoch": 1.771796835621191,
      "grad_norm": 0.22568786144256592,
      "learning_rate": 6.498949019194402e-06,
      "loss": 0.0194,
      "step": 1082660
    },
    {
      "epoch": 1.7718295660598442,
      "grad_norm": 0.26442357897758484,
      "learning_rate": 6.498883126980885e-06,
      "loss": 0.018,
      "step": 1082680
    },
    {
      "epoch": 1.7718622964984978,
      "grad_norm": 0.33618220686912537,
      "learning_rate": 6.498817234767369e-06,
      "loss": 0.016,
      "step": 1082700
    },
    {
      "epoch": 1.771895026937151,
      "grad_norm": 0.19986304640769958,
      "learning_rate": 6.4987513425538505e-06,
      "loss": 0.0117,
      "step": 1082720
    },
    {
      "epoch": 1.7719277573758043,
      "grad_norm": 0.3389924168586731,
      "learning_rate": 6.498685450340334e-06,
      "loss": 0.015,
      "step": 1082740
    },
    {
      "epoch": 1.7719604878144577,
      "grad_norm": 0.4388135075569153,
      "learning_rate": 6.498619558126816e-06,
      "loss": 0.0215,
      "step": 1082760
    },
    {
      "epoch": 1.771993218253111,
      "grad_norm": 0.39637845754623413,
      "learning_rate": 6.4985536659133e-06,
      "loss": 0.0191,
      "step": 1082780
    },
    {
      "epoch": 1.7720259486917644,
      "grad_norm": 0.8242387771606445,
      "learning_rate": 6.498487773699782e-06,
      "loss": 0.0134,
      "step": 1082800
    },
    {
      "epoch": 1.7720586791304176,
      "grad_norm": 0.4168587028980255,
      "learning_rate": 6.498421881486265e-06,
      "loss": 0.0164,
      "step": 1082820
    },
    {
      "epoch": 1.7720914095690712,
      "grad_norm": 0.19478823244571686,
      "learning_rate": 6.498355989272748e-06,
      "loss": 0.0221,
      "step": 1082840
    },
    {
      "epoch": 1.7721241400077243,
      "grad_norm": 0.18535733222961426,
      "learning_rate": 6.498290097059231e-06,
      "loss": 0.0178,
      "step": 1082860
    },
    {
      "epoch": 1.7721568704463777,
      "grad_norm": 0.3641115725040436,
      "learning_rate": 6.498224204845713e-06,
      "loss": 0.014,
      "step": 1082880
    },
    {
      "epoch": 1.772189600885031,
      "grad_norm": 0.4523075222969055,
      "learning_rate": 6.498158312632197e-06,
      "loss": 0.0162,
      "step": 1082900
    },
    {
      "epoch": 1.7722223313236845,
      "grad_norm": 0.6606311202049255,
      "learning_rate": 6.4980924204186805e-06,
      "loss": 0.0131,
      "step": 1082920
    },
    {
      "epoch": 1.7722550617623378,
      "grad_norm": 0.25603556632995605,
      "learning_rate": 6.498026528205162e-06,
      "loss": 0.0085,
      "step": 1082940
    },
    {
      "epoch": 1.772287792200991,
      "grad_norm": 0.48941493034362793,
      "learning_rate": 6.497960635991646e-06,
      "loss": 0.0134,
      "step": 1082960
    },
    {
      "epoch": 1.7723205226396446,
      "grad_norm": 0.15912316739559174,
      "learning_rate": 6.497894743778128e-06,
      "loss": 0.0132,
      "step": 1082980
    },
    {
      "epoch": 1.7723532530782977,
      "grad_norm": 0.6984201073646545,
      "learning_rate": 6.497828851564611e-06,
      "loss": 0.0216,
      "step": 1083000
    },
    {
      "epoch": 1.772385983516951,
      "grad_norm": 0.6642324924468994,
      "learning_rate": 6.497762959351094e-06,
      "loss": 0.0219,
      "step": 1083020
    },
    {
      "epoch": 1.7724187139556045,
      "grad_norm": 0.7170011401176453,
      "learning_rate": 6.497697067137577e-06,
      "loss": 0.0164,
      "step": 1083040
    },
    {
      "epoch": 1.7724514443942576,
      "grad_norm": 0.3341938257217407,
      "learning_rate": 6.49763117492406e-06,
      "loss": 0.0246,
      "step": 1083060
    },
    {
      "epoch": 1.7724841748329112,
      "grad_norm": 0.23721730709075928,
      "learning_rate": 6.497565282710543e-06,
      "loss": 0.018,
      "step": 1083080
    },
    {
      "epoch": 1.7725169052715644,
      "grad_norm": 1.754814624786377,
      "learning_rate": 6.497499390497025e-06,
      "loss": 0.0231,
      "step": 1083100
    },
    {
      "epoch": 1.772549635710218,
      "grad_norm": 0.9702261090278625,
      "learning_rate": 6.497433498283509e-06,
      "loss": 0.0151,
      "step": 1083120
    },
    {
      "epoch": 1.772582366148871,
      "grad_norm": 0.4860527515411377,
      "learning_rate": 6.497367606069991e-06,
      "loss": 0.0173,
      "step": 1083140
    },
    {
      "epoch": 1.7726150965875245,
      "grad_norm": 0.25503259897232056,
      "learning_rate": 6.497301713856474e-06,
      "loss": 0.0172,
      "step": 1083160
    },
    {
      "epoch": 1.7726478270261778,
      "grad_norm": 0.23120222985744476,
      "learning_rate": 6.497235821642957e-06,
      "loss": 0.02,
      "step": 1083180
    },
    {
      "epoch": 1.772680557464831,
      "grad_norm": 0.15711447596549988,
      "learning_rate": 6.49716992942944e-06,
      "loss": 0.0166,
      "step": 1083200
    },
    {
      "epoch": 1.7727132879034846,
      "grad_norm": 0.4261963665485382,
      "learning_rate": 6.497104037215922e-06,
      "loss": 0.0164,
      "step": 1083220
    },
    {
      "epoch": 1.7727460183421377,
      "grad_norm": 0.25477758049964905,
      "learning_rate": 6.497038145002406e-06,
      "loss": 0.0149,
      "step": 1083240
    },
    {
      "epoch": 1.7727787487807911,
      "grad_norm": 0.3552296757698059,
      "learning_rate": 6.496972252788889e-06,
      "loss": 0.0165,
      "step": 1083260
    },
    {
      "epoch": 1.7728114792194445,
      "grad_norm": 0.7757576107978821,
      "learning_rate": 6.4969063605753715e-06,
      "loss": 0.0181,
      "step": 1083280
    },
    {
      "epoch": 1.7728442096580979,
      "grad_norm": 1.450682282447815,
      "learning_rate": 6.496840468361855e-06,
      "loss": 0.0204,
      "step": 1083300
    },
    {
      "epoch": 1.7728769400967512,
      "grad_norm": 2.6293246746063232,
      "learning_rate": 6.496774576148337e-06,
      "loss": 0.0271,
      "step": 1083320
    },
    {
      "epoch": 1.7729096705354044,
      "grad_norm": 0.9823920130729675,
      "learning_rate": 6.4967086839348205e-06,
      "loss": 0.0191,
      "step": 1083340
    },
    {
      "epoch": 1.772942400974058,
      "grad_norm": 0.18505750596523285,
      "learning_rate": 6.4966427917213024e-06,
      "loss": 0.0171,
      "step": 1083360
    },
    {
      "epoch": 1.7729751314127111,
      "grad_norm": 0.6527271866798401,
      "learning_rate": 6.496576899507786e-06,
      "loss": 0.0197,
      "step": 1083380
    },
    {
      "epoch": 1.7730078618513645,
      "grad_norm": 0.21858637034893036,
      "learning_rate": 6.496511007294268e-06,
      "loss": 0.0173,
      "step": 1083400
    },
    {
      "epoch": 1.7730405922900179,
      "grad_norm": 0.7178942561149597,
      "learning_rate": 6.4964451150807515e-06,
      "loss": 0.0254,
      "step": 1083420
    },
    {
      "epoch": 1.7730733227286712,
      "grad_norm": 0.25240540504455566,
      "learning_rate": 6.496379222867234e-06,
      "loss": 0.0204,
      "step": 1083440
    },
    {
      "epoch": 1.7731060531673246,
      "grad_norm": 0.22336427867412567,
      "learning_rate": 6.496313330653717e-06,
      "loss": 0.0144,
      "step": 1083460
    },
    {
      "epoch": 1.7731387836059778,
      "grad_norm": 0.6678862571716309,
      "learning_rate": 6.4962474384402e-06,
      "loss": 0.0132,
      "step": 1083480
    },
    {
      "epoch": 1.7731715140446314,
      "grad_norm": 0.41643568873405457,
      "learning_rate": 6.496181546226683e-06,
      "loss": 0.0159,
      "step": 1083500
    },
    {
      "epoch": 1.7732042444832845,
      "grad_norm": 0.5279228687286377,
      "learning_rate": 6.496115654013165e-06,
      "loss": 0.02,
      "step": 1083520
    },
    {
      "epoch": 1.7732369749219379,
      "grad_norm": 1.394950032234192,
      "learning_rate": 6.496049761799649e-06,
      "loss": 0.0237,
      "step": 1083540
    },
    {
      "epoch": 1.7732697053605913,
      "grad_norm": 0.2246108502149582,
      "learning_rate": 6.495983869586131e-06,
      "loss": 0.0195,
      "step": 1083560
    },
    {
      "epoch": 1.7733024357992446,
      "grad_norm": 0.2996329367160797,
      "learning_rate": 6.495917977372614e-06,
      "loss": 0.0179,
      "step": 1083580
    },
    {
      "epoch": 1.773335166237898,
      "grad_norm": 0.145112082362175,
      "learning_rate": 6.495852085159098e-06,
      "loss": 0.0192,
      "step": 1083600
    },
    {
      "epoch": 1.7733678966765511,
      "grad_norm": 0.263531357049942,
      "learning_rate": 6.49578619294558e-06,
      "loss": 0.0201,
      "step": 1083620
    },
    {
      "epoch": 1.7734006271152047,
      "grad_norm": 0.21937164664268494,
      "learning_rate": 6.495720300732063e-06,
      "loss": 0.0223,
      "step": 1083640
    },
    {
      "epoch": 1.773433357553858,
      "grad_norm": 0.5055155158042908,
      "learning_rate": 6.495654408518546e-06,
      "loss": 0.0121,
      "step": 1083660
    },
    {
      "epoch": 1.7734660879925113,
      "grad_norm": 0.37062203884124756,
      "learning_rate": 6.495588516305029e-06,
      "loss": 0.0205,
      "step": 1083680
    },
    {
      "epoch": 1.7734988184311646,
      "grad_norm": 0.556513249874115,
      "learning_rate": 6.4955226240915115e-06,
      "loss": 0.0156,
      "step": 1083700
    },
    {
      "epoch": 1.773531548869818,
      "grad_norm": 0.5760868787765503,
      "learning_rate": 6.495456731877995e-06,
      "loss": 0.012,
      "step": 1083720
    },
    {
      "epoch": 1.7735642793084714,
      "grad_norm": 0.5451653599739075,
      "learning_rate": 6.495390839664477e-06,
      "loss": 0.0251,
      "step": 1083740
    },
    {
      "epoch": 1.7735970097471245,
      "grad_norm": 0.23262853920459747,
      "learning_rate": 6.495324947450961e-06,
      "loss": 0.0118,
      "step": 1083760
    },
    {
      "epoch": 1.7736297401857781,
      "grad_norm": 0.403239905834198,
      "learning_rate": 6.4952590552374425e-06,
      "loss": 0.0159,
      "step": 1083780
    },
    {
      "epoch": 1.7736624706244313,
      "grad_norm": 0.6741772890090942,
      "learning_rate": 6.495193163023926e-06,
      "loss": 0.0185,
      "step": 1083800
    },
    {
      "epoch": 1.7736952010630846,
      "grad_norm": 0.855797529220581,
      "learning_rate": 6.495127270810409e-06,
      "loss": 0.0145,
      "step": 1083820
    },
    {
      "epoch": 1.773727931501738,
      "grad_norm": 0.5489407181739807,
      "learning_rate": 6.4950613785968916e-06,
      "loss": 0.0162,
      "step": 1083840
    },
    {
      "epoch": 1.7737606619403912,
      "grad_norm": 0.1514987200498581,
      "learning_rate": 6.494995486383374e-06,
      "loss": 0.0203,
      "step": 1083860
    },
    {
      "epoch": 1.7737933923790448,
      "grad_norm": 0.45914024114608765,
      "learning_rate": 6.494929594169858e-06,
      "loss": 0.022,
      "step": 1083880
    },
    {
      "epoch": 1.773826122817698,
      "grad_norm": 1.4281641244888306,
      "learning_rate": 6.49486370195634e-06,
      "loss": 0.0202,
      "step": 1083900
    },
    {
      "epoch": 1.7738588532563513,
      "grad_norm": 0.2873246371746063,
      "learning_rate": 6.494797809742823e-06,
      "loss": 0.0269,
      "step": 1083920
    },
    {
      "epoch": 1.7738915836950047,
      "grad_norm": 0.11022564768791199,
      "learning_rate": 6.494731917529305e-06,
      "loss": 0.0109,
      "step": 1083940
    },
    {
      "epoch": 1.773924314133658,
      "grad_norm": 0.22520294785499573,
      "learning_rate": 6.494666025315789e-06,
      "loss": 0.0216,
      "step": 1083960
    },
    {
      "epoch": 1.7739570445723114,
      "grad_norm": 1.09324049949646,
      "learning_rate": 6.4946001331022724e-06,
      "loss": 0.0192,
      "step": 1083980
    },
    {
      "epoch": 1.7739897750109646,
      "grad_norm": 0.996849536895752,
      "learning_rate": 6.494534240888754e-06,
      "loss": 0.0267,
      "step": 1084000
    },
    {
      "epoch": 1.7740225054496181,
      "grad_norm": 0.1360737830400467,
      "learning_rate": 6.494468348675238e-06,
      "loss": 0.0127,
      "step": 1084020
    },
    {
      "epoch": 1.7740552358882713,
      "grad_norm": 0.3472345769405365,
      "learning_rate": 6.494402456461721e-06,
      "loss": 0.0144,
      "step": 1084040
    },
    {
      "epoch": 1.7740879663269247,
      "grad_norm": 1.1433182954788208,
      "learning_rate": 6.494336564248203e-06,
      "loss": 0.0192,
      "step": 1084060
    },
    {
      "epoch": 1.774120696765578,
      "grad_norm": 0.13166426122188568,
      "learning_rate": 6.494270672034686e-06,
      "loss": 0.021,
      "step": 1084080
    },
    {
      "epoch": 1.7741534272042314,
      "grad_norm": 0.28219133615493774,
      "learning_rate": 6.49420477982117e-06,
      "loss": 0.0108,
      "step": 1084100
    },
    {
      "epoch": 1.7741861576428848,
      "grad_norm": 0.3430491089820862,
      "learning_rate": 6.494138887607652e-06,
      "loss": 0.0128,
      "step": 1084120
    },
    {
      "epoch": 1.774218888081538,
      "grad_norm": 0.7791557312011719,
      "learning_rate": 6.494072995394135e-06,
      "loss": 0.0195,
      "step": 1084140
    },
    {
      "epoch": 1.7742516185201915,
      "grad_norm": 0.5637602806091309,
      "learning_rate": 6.494007103180617e-06,
      "loss": 0.0257,
      "step": 1084160
    },
    {
      "epoch": 1.7742843489588447,
      "grad_norm": 1.2012767791748047,
      "learning_rate": 6.493941210967101e-06,
      "loss": 0.018,
      "step": 1084180
    },
    {
      "epoch": 1.774317079397498,
      "grad_norm": 0.9037542939186096,
      "learning_rate": 6.4938753187535834e-06,
      "loss": 0.0224,
      "step": 1084200
    },
    {
      "epoch": 1.7743498098361514,
      "grad_norm": 0.6383032202720642,
      "learning_rate": 6.493809426540066e-06,
      "loss": 0.0165,
      "step": 1084220
    },
    {
      "epoch": 1.7743825402748048,
      "grad_norm": 0.15945349633693695,
      "learning_rate": 6.493743534326549e-06,
      "loss": 0.0321,
      "step": 1084240
    },
    {
      "epoch": 1.7744152707134582,
      "grad_norm": 0.21160666644573212,
      "learning_rate": 6.4936776421130325e-06,
      "loss": 0.0165,
      "step": 1084260
    },
    {
      "epoch": 1.7744480011521113,
      "grad_norm": 0.3470086455345154,
      "learning_rate": 6.493611749899514e-06,
      "loss": 0.0182,
      "step": 1084280
    },
    {
      "epoch": 1.774480731590765,
      "grad_norm": 0.1039067804813385,
      "learning_rate": 6.493545857685998e-06,
      "loss": 0.0149,
      "step": 1084300
    },
    {
      "epoch": 1.774513462029418,
      "grad_norm": 0.91842120885849,
      "learning_rate": 6.4934799654724816e-06,
      "loss": 0.0378,
      "step": 1084320
    },
    {
      "epoch": 1.7745461924680714,
      "grad_norm": 0.24257998168468475,
      "learning_rate": 6.4934140732589635e-06,
      "loss": 0.0184,
      "step": 1084340
    },
    {
      "epoch": 1.7745789229067248,
      "grad_norm": 0.8463786244392395,
      "learning_rate": 6.493348181045447e-06,
      "loss": 0.0248,
      "step": 1084360
    },
    {
      "epoch": 1.7746116533453782,
      "grad_norm": 0.18404337763786316,
      "learning_rate": 6.493282288831929e-06,
      "loss": 0.0165,
      "step": 1084380
    },
    {
      "epoch": 1.7746443837840316,
      "grad_norm": 0.54512619972229,
      "learning_rate": 6.4932163966184125e-06,
      "loss": 0.0198,
      "step": 1084400
    },
    {
      "epoch": 1.7746771142226847,
      "grad_norm": 0.36034268140792847,
      "learning_rate": 6.493150504404894e-06,
      "loss": 0.0172,
      "step": 1084420
    },
    {
      "epoch": 1.7747098446613383,
      "grad_norm": 0.6491565108299255,
      "learning_rate": 6.493084612191378e-06,
      "loss": 0.022,
      "step": 1084440
    },
    {
      "epoch": 1.7747425750999914,
      "grad_norm": 0.40480321645736694,
      "learning_rate": 6.493018719977861e-06,
      "loss": 0.0119,
      "step": 1084460
    },
    {
      "epoch": 1.7747753055386448,
      "grad_norm": 0.19784508645534515,
      "learning_rate": 6.4929528277643435e-06,
      "loss": 0.0237,
      "step": 1084480
    },
    {
      "epoch": 1.7748080359772982,
      "grad_norm": 1.6757779121398926,
      "learning_rate": 6.492886935550826e-06,
      "loss": 0.0231,
      "step": 1084500
    },
    {
      "epoch": 1.7748407664159513,
      "grad_norm": 0.3619738221168518,
      "learning_rate": 6.49282104333731e-06,
      "loss": 0.0159,
      "step": 1084520
    },
    {
      "epoch": 1.774873496854605,
      "grad_norm": 0.3754175901412964,
      "learning_rate": 6.492755151123792e-06,
      "loss": 0.022,
      "step": 1084540
    },
    {
      "epoch": 1.774906227293258,
      "grad_norm": 0.5965369939804077,
      "learning_rate": 6.492689258910275e-06,
      "loss": 0.0181,
      "step": 1084560
    },
    {
      "epoch": 1.7749389577319117,
      "grad_norm": 0.6940889954566956,
      "learning_rate": 6.492623366696757e-06,
      "loss": 0.0161,
      "step": 1084580
    },
    {
      "epoch": 1.7749716881705648,
      "grad_norm": 0.2649361193180084,
      "learning_rate": 6.492557474483241e-06,
      "loss": 0.0188,
      "step": 1084600
    },
    {
      "epoch": 1.7750044186092182,
      "grad_norm": 0.2882135510444641,
      "learning_rate": 6.4924915822697235e-06,
      "loss": 0.0134,
      "step": 1084620
    },
    {
      "epoch": 1.7750371490478716,
      "grad_norm": 0.8028513789176941,
      "learning_rate": 6.492425690056206e-06,
      "loss": 0.0203,
      "step": 1084640
    },
    {
      "epoch": 1.7750698794865247,
      "grad_norm": 0.7255434393882751,
      "learning_rate": 6.49235979784269e-06,
      "loss": 0.0152,
      "step": 1084660
    },
    {
      "epoch": 1.7751026099251783,
      "grad_norm": 0.6466940641403198,
      "learning_rate": 6.4922939056291726e-06,
      "loss": 0.0172,
      "step": 1084680
    },
    {
      "epoch": 1.7751353403638315,
      "grad_norm": 0.5230353474617004,
      "learning_rate": 6.492228013415655e-06,
      "loss": 0.0138,
      "step": 1084700
    },
    {
      "epoch": 1.7751680708024848,
      "grad_norm": 1.730586290359497,
      "learning_rate": 6.492162121202138e-06,
      "loss": 0.0179,
      "step": 1084720
    },
    {
      "epoch": 1.7752008012411382,
      "grad_norm": 0.5009931325912476,
      "learning_rate": 6.492096228988622e-06,
      "loss": 0.016,
      "step": 1084740
    },
    {
      "epoch": 1.7752335316797916,
      "grad_norm": 0.08437546342611313,
      "learning_rate": 6.4920303367751035e-06,
      "loss": 0.0182,
      "step": 1084760
    },
    {
      "epoch": 1.775266262118445,
      "grad_norm": 0.48342281579971313,
      "learning_rate": 6.491964444561587e-06,
      "loss": 0.0274,
      "step": 1084780
    },
    {
      "epoch": 1.775298992557098,
      "grad_norm": 1.1926485300064087,
      "learning_rate": 6.491898552348069e-06,
      "loss": 0.0184,
      "step": 1084800
    },
    {
      "epoch": 1.7753317229957517,
      "grad_norm": 0.32047751545906067,
      "learning_rate": 6.491832660134553e-06,
      "loss": 0.017,
      "step": 1084820
    },
    {
      "epoch": 1.7753644534344049,
      "grad_norm": 0.5571969747543335,
      "learning_rate": 6.491766767921035e-06,
      "loss": 0.0152,
      "step": 1084840
    },
    {
      "epoch": 1.7753971838730582,
      "grad_norm": 0.30587196350097656,
      "learning_rate": 6.491700875707518e-06,
      "loss": 0.0104,
      "step": 1084860
    },
    {
      "epoch": 1.7754299143117116,
      "grad_norm": 0.1705387681722641,
      "learning_rate": 6.491634983494001e-06,
      "loss": 0.0122,
      "step": 1084880
    },
    {
      "epoch": 1.775462644750365,
      "grad_norm": 0.48948830366134644,
      "learning_rate": 6.491569091280484e-06,
      "loss": 0.0236,
      "step": 1084900
    },
    {
      "epoch": 1.7754953751890183,
      "grad_norm": 0.5371986031532288,
      "learning_rate": 6.491503199066966e-06,
      "loss": 0.0256,
      "step": 1084920
    },
    {
      "epoch": 1.7755281056276715,
      "grad_norm": 0.6570989489555359,
      "learning_rate": 6.49143730685345e-06,
      "loss": 0.0226,
      "step": 1084940
    },
    {
      "epoch": 1.775560836066325,
      "grad_norm": 0.8354004621505737,
      "learning_rate": 6.491371414639932e-06,
      "loss": 0.0278,
      "step": 1084960
    },
    {
      "epoch": 1.7755935665049782,
      "grad_norm": 0.9680694341659546,
      "learning_rate": 6.491305522426415e-06,
      "loss": 0.016,
      "step": 1084980
    },
    {
      "epoch": 1.7756262969436316,
      "grad_norm": 1.0157761573791504,
      "learning_rate": 6.491239630212899e-06,
      "loss": 0.0202,
      "step": 1085000
    },
    {
      "epoch": 1.775659027382285,
      "grad_norm": 1.2875734567642212,
      "learning_rate": 6.491173737999381e-06,
      "loss": 0.0221,
      "step": 1085020
    },
    {
      "epoch": 1.7756917578209384,
      "grad_norm": 0.2460382580757141,
      "learning_rate": 6.4911078457858644e-06,
      "loss": 0.0211,
      "step": 1085040
    },
    {
      "epoch": 1.7757244882595917,
      "grad_norm": 1.2023483514785767,
      "learning_rate": 6.491041953572347e-06,
      "loss": 0.0145,
      "step": 1085060
    },
    {
      "epoch": 1.7757572186982449,
      "grad_norm": 3.281874656677246,
      "learning_rate": 6.49097606135883e-06,
      "loss": 0.0154,
      "step": 1085080
    },
    {
      "epoch": 1.7757899491368985,
      "grad_norm": 0.37237390875816345,
      "learning_rate": 6.490910169145313e-06,
      "loss": 0.0316,
      "step": 1085100
    },
    {
      "epoch": 1.7758226795755516,
      "grad_norm": 0.44757235050201416,
      "learning_rate": 6.490844276931796e-06,
      "loss": 0.017,
      "step": 1085120
    },
    {
      "epoch": 1.775855410014205,
      "grad_norm": 0.4031214118003845,
      "learning_rate": 6.490778384718278e-06,
      "loss": 0.0198,
      "step": 1085140
    },
    {
      "epoch": 1.7758881404528584,
      "grad_norm": 0.5358092784881592,
      "learning_rate": 6.490712492504762e-06,
      "loss": 0.0163,
      "step": 1085160
    },
    {
      "epoch": 1.7759208708915117,
      "grad_norm": 0.39410412311553955,
      "learning_rate": 6.490646600291244e-06,
      "loss": 0.0221,
      "step": 1085180
    },
    {
      "epoch": 1.775953601330165,
      "grad_norm": 1.9102874994277954,
      "learning_rate": 6.490580708077727e-06,
      "loss": 0.0165,
      "step": 1085200
    },
    {
      "epoch": 1.7759863317688183,
      "grad_norm": 0.9890233874320984,
      "learning_rate": 6.490514815864209e-06,
      "loss": 0.0171,
      "step": 1085220
    },
    {
      "epoch": 1.7760190622074719,
      "grad_norm": 0.8429034352302551,
      "learning_rate": 6.490448923650693e-06,
      "loss": 0.0179,
      "step": 1085240
    },
    {
      "epoch": 1.776051792646125,
      "grad_norm": 0.8735320568084717,
      "learning_rate": 6.490383031437175e-06,
      "loss": 0.0165,
      "step": 1085260
    },
    {
      "epoch": 1.7760845230847784,
      "grad_norm": 0.5798661708831787,
      "learning_rate": 6.490317139223659e-06,
      "loss": 0.0207,
      "step": 1085280
    },
    {
      "epoch": 1.7761172535234317,
      "grad_norm": 0.13358402252197266,
      "learning_rate": 6.490251247010141e-06,
      "loss": 0.0166,
      "step": 1085300
    },
    {
      "epoch": 1.776149983962085,
      "grad_norm": 0.9753890037536621,
      "learning_rate": 6.4901853547966245e-06,
      "loss": 0.0219,
      "step": 1085320
    },
    {
      "epoch": 1.7761827144007385,
      "grad_norm": 1.766759991645813,
      "learning_rate": 6.490119462583106e-06,
      "loss": 0.0251,
      "step": 1085340
    },
    {
      "epoch": 1.7762154448393916,
      "grad_norm": 0.4957069456577301,
      "learning_rate": 6.49005357036959e-06,
      "loss": 0.0159,
      "step": 1085360
    },
    {
      "epoch": 1.7762481752780452,
      "grad_norm": 0.22838938236236572,
      "learning_rate": 6.4899876781560735e-06,
      "loss": 0.0149,
      "step": 1085380
    },
    {
      "epoch": 1.7762809057166984,
      "grad_norm": 0.4214121699333191,
      "learning_rate": 6.4899217859425554e-06,
      "loss": 0.0196,
      "step": 1085400
    },
    {
      "epoch": 1.7763136361553518,
      "grad_norm": 0.4362333416938782,
      "learning_rate": 6.489855893729039e-06,
      "loss": 0.0176,
      "step": 1085420
    },
    {
      "epoch": 1.7763463665940051,
      "grad_norm": 0.5903715491294861,
      "learning_rate": 6.489790001515521e-06,
      "loss": 0.0167,
      "step": 1085440
    },
    {
      "epoch": 1.7763790970326583,
      "grad_norm": 0.7234898209571838,
      "learning_rate": 6.4897241093020045e-06,
      "loss": 0.0213,
      "step": 1085460
    },
    {
      "epoch": 1.7764118274713119,
      "grad_norm": 0.4683215916156769,
      "learning_rate": 6.489658217088487e-06,
      "loss": 0.0241,
      "step": 1085480
    },
    {
      "epoch": 1.776444557909965,
      "grad_norm": 1.6541271209716797,
      "learning_rate": 6.48959232487497e-06,
      "loss": 0.032,
      "step": 1085500
    },
    {
      "epoch": 1.7764772883486184,
      "grad_norm": 0.2578248977661133,
      "learning_rate": 6.489526432661453e-06,
      "loss": 0.0147,
      "step": 1085520
    },
    {
      "epoch": 1.7765100187872718,
      "grad_norm": 0.12520603835582733,
      "learning_rate": 6.489460540447936e-06,
      "loss": 0.0172,
      "step": 1085540
    },
    {
      "epoch": 1.7765427492259251,
      "grad_norm": 0.3908306360244751,
      "learning_rate": 6.489394648234418e-06,
      "loss": 0.0122,
      "step": 1085560
    },
    {
      "epoch": 1.7765754796645785,
      "grad_norm": 1.9647752046585083,
      "learning_rate": 6.489328756020902e-06,
      "loss": 0.0247,
      "step": 1085580
    },
    {
      "epoch": 1.7766082101032317,
      "grad_norm": 1.6073427200317383,
      "learning_rate": 6.489262863807384e-06,
      "loss": 0.0172,
      "step": 1085600
    },
    {
      "epoch": 1.7766409405418853,
      "grad_norm": 0.31227123737335205,
      "learning_rate": 6.489196971593867e-06,
      "loss": 0.0223,
      "step": 1085620
    },
    {
      "epoch": 1.7766736709805384,
      "grad_norm": 0.31441277265548706,
      "learning_rate": 6.48913107938035e-06,
      "loss": 0.0213,
      "step": 1085640
    },
    {
      "epoch": 1.7767064014191918,
      "grad_norm": 0.4635356664657593,
      "learning_rate": 6.489065187166833e-06,
      "loss": 0.0234,
      "step": 1085660
    },
    {
      "epoch": 1.7767391318578452,
      "grad_norm": 0.19521798193454742,
      "learning_rate": 6.4889992949533155e-06,
      "loss": 0.0155,
      "step": 1085680
    },
    {
      "epoch": 1.7767718622964985,
      "grad_norm": 0.5758662819862366,
      "learning_rate": 6.488933402739799e-06,
      "loss": 0.0229,
      "step": 1085700
    },
    {
      "epoch": 1.776804592735152,
      "grad_norm": 0.2980917692184448,
      "learning_rate": 6.488867510526282e-06,
      "loss": 0.0144,
      "step": 1085720
    },
    {
      "epoch": 1.776837323173805,
      "grad_norm": 0.31976696848869324,
      "learning_rate": 6.4888016183127646e-06,
      "loss": 0.0236,
      "step": 1085740
    },
    {
      "epoch": 1.7768700536124586,
      "grad_norm": 0.5777184963226318,
      "learning_rate": 6.488735726099248e-06,
      "loss": 0.0245,
      "step": 1085760
    },
    {
      "epoch": 1.7769027840511118,
      "grad_norm": 0.8389379382133484,
      "learning_rate": 6.48866983388573e-06,
      "loss": 0.0174,
      "step": 1085780
    },
    {
      "epoch": 1.7769355144897652,
      "grad_norm": 0.3352426588535309,
      "learning_rate": 6.488603941672214e-06,
      "loss": 0.0256,
      "step": 1085800
    },
    {
      "epoch": 1.7769682449284185,
      "grad_norm": 0.6644447445869446,
      "learning_rate": 6.4885380494586955e-06,
      "loss": 0.0139,
      "step": 1085820
    },
    {
      "epoch": 1.777000975367072,
      "grad_norm": 0.31685617566108704,
      "learning_rate": 6.488472157245179e-06,
      "loss": 0.0207,
      "step": 1085840
    },
    {
      "epoch": 1.7770337058057253,
      "grad_norm": 0.9339263439178467,
      "learning_rate": 6.488406265031662e-06,
      "loss": 0.0234,
      "step": 1085860
    },
    {
      "epoch": 1.7770664362443784,
      "grad_norm": 0.7245919704437256,
      "learning_rate": 6.488340372818145e-06,
      "loss": 0.0169,
      "step": 1085880
    },
    {
      "epoch": 1.777099166683032,
      "grad_norm": 1.6175217628479004,
      "learning_rate": 6.488274480604627e-06,
      "loss": 0.0191,
      "step": 1085900
    },
    {
      "epoch": 1.7771318971216852,
      "grad_norm": 0.1904473453760147,
      "learning_rate": 6.488208588391111e-06,
      "loss": 0.0184,
      "step": 1085920
    },
    {
      "epoch": 1.7771646275603386,
      "grad_norm": 0.8122977018356323,
      "learning_rate": 6.488142696177593e-06,
      "loss": 0.0217,
      "step": 1085940
    },
    {
      "epoch": 1.777197357998992,
      "grad_norm": 0.1952771097421646,
      "learning_rate": 6.488076803964076e-06,
      "loss": 0.0195,
      "step": 1085960
    },
    {
      "epoch": 1.7772300884376453,
      "grad_norm": 0.17593573033809662,
      "learning_rate": 6.488010911750558e-06,
      "loss": 0.0187,
      "step": 1085980
    },
    {
      "epoch": 1.7772628188762987,
      "grad_norm": 0.34591174125671387,
      "learning_rate": 6.487945019537042e-06,
      "loss": 0.0256,
      "step": 1086000
    },
    {
      "epoch": 1.7772955493149518,
      "grad_norm": 0.41405758261680603,
      "learning_rate": 6.487879127323525e-06,
      "loss": 0.0161,
      "step": 1086020
    },
    {
      "epoch": 1.7773282797536054,
      "grad_norm": 0.3587181866168976,
      "learning_rate": 6.487813235110007e-06,
      "loss": 0.0129,
      "step": 1086040
    },
    {
      "epoch": 1.7773610101922586,
      "grad_norm": 0.3625010550022125,
      "learning_rate": 6.487747342896491e-06,
      "loss": 0.0164,
      "step": 1086060
    },
    {
      "epoch": 1.777393740630912,
      "grad_norm": 0.9604213237762451,
      "learning_rate": 6.487681450682974e-06,
      "loss": 0.0169,
      "step": 1086080
    },
    {
      "epoch": 1.7774264710695653,
      "grad_norm": 0.21972616016864777,
      "learning_rate": 6.487615558469456e-06,
      "loss": 0.0214,
      "step": 1086100
    },
    {
      "epoch": 1.7774592015082185,
      "grad_norm": 1.0978666543960571,
      "learning_rate": 6.487549666255939e-06,
      "loss": 0.0219,
      "step": 1086120
    },
    {
      "epoch": 1.777491931946872,
      "grad_norm": 0.3361456096172333,
      "learning_rate": 6.487483774042423e-06,
      "loss": 0.0175,
      "step": 1086140
    },
    {
      "epoch": 1.7775246623855252,
      "grad_norm": 0.216270312666893,
      "learning_rate": 6.487417881828905e-06,
      "loss": 0.0134,
      "step": 1086160
    },
    {
      "epoch": 1.7775573928241788,
      "grad_norm": 0.595649242401123,
      "learning_rate": 6.487351989615388e-06,
      "loss": 0.021,
      "step": 1086180
    },
    {
      "epoch": 1.777590123262832,
      "grad_norm": 0.6339609622955322,
      "learning_rate": 6.48728609740187e-06,
      "loss": 0.0217,
      "step": 1086200
    },
    {
      "epoch": 1.7776228537014853,
      "grad_norm": 1.316772222518921,
      "learning_rate": 6.487220205188354e-06,
      "loss": 0.0264,
      "step": 1086220
    },
    {
      "epoch": 1.7776555841401387,
      "grad_norm": 0.17458824813365936,
      "learning_rate": 6.487154312974836e-06,
      "loss": 0.0105,
      "step": 1086240
    },
    {
      "epoch": 1.7776883145787918,
      "grad_norm": 0.6406936645507812,
      "learning_rate": 6.487088420761319e-06,
      "loss": 0.0159,
      "step": 1086260
    },
    {
      "epoch": 1.7777210450174454,
      "grad_norm": 0.16505908966064453,
      "learning_rate": 6.487022528547802e-06,
      "loss": 0.0189,
      "step": 1086280
    },
    {
      "epoch": 1.7777537754560986,
      "grad_norm": 0.5034252405166626,
      "learning_rate": 6.486956636334285e-06,
      "loss": 0.0161,
      "step": 1086300
    },
    {
      "epoch": 1.777786505894752,
      "grad_norm": 0.31869304180145264,
      "learning_rate": 6.486890744120767e-06,
      "loss": 0.0156,
      "step": 1086320
    },
    {
      "epoch": 1.7778192363334053,
      "grad_norm": 0.811743974685669,
      "learning_rate": 6.486824851907251e-06,
      "loss": 0.0165,
      "step": 1086340
    },
    {
      "epoch": 1.7778519667720587,
      "grad_norm": 0.5559048056602478,
      "learning_rate": 6.486758959693733e-06,
      "loss": 0.0141,
      "step": 1086360
    },
    {
      "epoch": 1.777884697210712,
      "grad_norm": 1.654973030090332,
      "learning_rate": 6.4866930674802165e-06,
      "loss": 0.0288,
      "step": 1086380
    },
    {
      "epoch": 1.7779174276493652,
      "grad_norm": 0.33452004194259644,
      "learning_rate": 6.486627175266698e-06,
      "loss": 0.0165,
      "step": 1086400
    },
    {
      "epoch": 1.7779501580880188,
      "grad_norm": 0.33603018522262573,
      "learning_rate": 6.486561283053182e-06,
      "loss": 0.0164,
      "step": 1086420
    },
    {
      "epoch": 1.777982888526672,
      "grad_norm": 0.6034388542175293,
      "learning_rate": 6.4864953908396655e-06,
      "loss": 0.0174,
      "step": 1086440
    },
    {
      "epoch": 1.7780156189653253,
      "grad_norm": 0.8363707661628723,
      "learning_rate": 6.486429498626147e-06,
      "loss": 0.0224,
      "step": 1086460
    },
    {
      "epoch": 1.7780483494039787,
      "grad_norm": 0.8335784077644348,
      "learning_rate": 6.486363606412631e-06,
      "loss": 0.0185,
      "step": 1086480
    },
    {
      "epoch": 1.778081079842632,
      "grad_norm": 0.6217744946479797,
      "learning_rate": 6.486297714199114e-06,
      "loss": 0.0236,
      "step": 1086500
    },
    {
      "epoch": 1.7781138102812855,
      "grad_norm": 0.38821521401405334,
      "learning_rate": 6.4862318219855965e-06,
      "loss": 0.0202,
      "step": 1086520
    },
    {
      "epoch": 1.7781465407199386,
      "grad_norm": 3.462367296218872,
      "learning_rate": 6.486165929772079e-06,
      "loss": 0.0139,
      "step": 1086540
    },
    {
      "epoch": 1.7781792711585922,
      "grad_norm": 0.3669726550579071,
      "learning_rate": 6.486100037558563e-06,
      "loss": 0.0126,
      "step": 1086560
    },
    {
      "epoch": 1.7782120015972454,
      "grad_norm": 0.6797536015510559,
      "learning_rate": 6.486034145345045e-06,
      "loss": 0.0188,
      "step": 1086580
    },
    {
      "epoch": 1.7782447320358987,
      "grad_norm": 1.3405051231384277,
      "learning_rate": 6.485968253131528e-06,
      "loss": 0.0216,
      "step": 1086600
    },
    {
      "epoch": 1.778277462474552,
      "grad_norm": 0.7465121746063232,
      "learning_rate": 6.48590236091801e-06,
      "loss": 0.0174,
      "step": 1086620
    },
    {
      "epoch": 1.7783101929132055,
      "grad_norm": 0.5493785738945007,
      "learning_rate": 6.485836468704494e-06,
      "loss": 0.0128,
      "step": 1086640
    },
    {
      "epoch": 1.7783429233518588,
      "grad_norm": 0.46550852060317993,
      "learning_rate": 6.4857705764909765e-06,
      "loss": 0.0231,
      "step": 1086660
    },
    {
      "epoch": 1.778375653790512,
      "grad_norm": 0.1914365291595459,
      "learning_rate": 6.485704684277459e-06,
      "loss": 0.0151,
      "step": 1086680
    },
    {
      "epoch": 1.7784083842291656,
      "grad_norm": 1.1751818656921387,
      "learning_rate": 6.485638792063942e-06,
      "loss": 0.0158,
      "step": 1086700
    },
    {
      "epoch": 1.7784411146678187,
      "grad_norm": 0.7623216509819031,
      "learning_rate": 6.485572899850426e-06,
      "loss": 0.0245,
      "step": 1086720
    },
    {
      "epoch": 1.778473845106472,
      "grad_norm": 0.3555258512496948,
      "learning_rate": 6.4855070076369075e-06,
      "loss": 0.0138,
      "step": 1086740
    },
    {
      "epoch": 1.7785065755451255,
      "grad_norm": 0.8191147446632385,
      "learning_rate": 6.485441115423391e-06,
      "loss": 0.0206,
      "step": 1086760
    },
    {
      "epoch": 1.7785393059837786,
      "grad_norm": 0.3248541057109833,
      "learning_rate": 6.485375223209875e-06,
      "loss": 0.0117,
      "step": 1086780
    },
    {
      "epoch": 1.7785720364224322,
      "grad_norm": 0.31656789779663086,
      "learning_rate": 6.4853093309963565e-06,
      "loss": 0.0195,
      "step": 1086800
    },
    {
      "epoch": 1.7786047668610854,
      "grad_norm": 0.2606351971626282,
      "learning_rate": 6.48524343878284e-06,
      "loss": 0.0112,
      "step": 1086820
    },
    {
      "epoch": 1.778637497299739,
      "grad_norm": 0.36599650979042053,
      "learning_rate": 6.485177546569322e-06,
      "loss": 0.0162,
      "step": 1086840
    },
    {
      "epoch": 1.7786702277383921,
      "grad_norm": 0.20739050209522247,
      "learning_rate": 6.485111654355806e-06,
      "loss": 0.0164,
      "step": 1086860
    },
    {
      "epoch": 1.7787029581770455,
      "grad_norm": 0.2318689376115799,
      "learning_rate": 6.485045762142288e-06,
      "loss": 0.0201,
      "step": 1086880
    },
    {
      "epoch": 1.7787356886156989,
      "grad_norm": 0.23968200385570526,
      "learning_rate": 6.484979869928771e-06,
      "loss": 0.0219,
      "step": 1086900
    },
    {
      "epoch": 1.778768419054352,
      "grad_norm": 0.2827174961566925,
      "learning_rate": 6.484913977715254e-06,
      "loss": 0.0249,
      "step": 1086920
    },
    {
      "epoch": 1.7788011494930056,
      "grad_norm": 0.7468206882476807,
      "learning_rate": 6.484848085501737e-06,
      "loss": 0.0211,
      "step": 1086940
    },
    {
      "epoch": 1.7788338799316588,
      "grad_norm": 0.594517171382904,
      "learning_rate": 6.484782193288219e-06,
      "loss": 0.0146,
      "step": 1086960
    },
    {
      "epoch": 1.7788666103703121,
      "grad_norm": 0.19587580859661102,
      "learning_rate": 6.484716301074703e-06,
      "loss": 0.0167,
      "step": 1086980
    },
    {
      "epoch": 1.7788993408089655,
      "grad_norm": 0.42000725865364075,
      "learning_rate": 6.484650408861185e-06,
      "loss": 0.0235,
      "step": 1087000
    },
    {
      "epoch": 1.7789320712476189,
      "grad_norm": 0.3103362023830414,
      "learning_rate": 6.484584516647668e-06,
      "loss": 0.019,
      "step": 1087020
    },
    {
      "epoch": 1.7789648016862722,
      "grad_norm": 0.5688890218734741,
      "learning_rate": 6.484518624434151e-06,
      "loss": 0.0179,
      "step": 1087040
    },
    {
      "epoch": 1.7789975321249254,
      "grad_norm": 2.411346435546875,
      "learning_rate": 6.484452732220634e-06,
      "loss": 0.0192,
      "step": 1087060
    },
    {
      "epoch": 1.779030262563579,
      "grad_norm": 0.16488932073116302,
      "learning_rate": 6.484386840007117e-06,
      "loss": 0.0202,
      "step": 1087080
    },
    {
      "epoch": 1.7790629930022321,
      "grad_norm": 0.3831382095813751,
      "learning_rate": 6.4843209477936e-06,
      "loss": 0.0231,
      "step": 1087100
    },
    {
      "epoch": 1.7790957234408855,
      "grad_norm": 0.9000231623649597,
      "learning_rate": 6.484255055580083e-06,
      "loss": 0.0275,
      "step": 1087120
    },
    {
      "epoch": 1.7791284538795389,
      "grad_norm": 0.3967953622341156,
      "learning_rate": 6.484189163366566e-06,
      "loss": 0.0142,
      "step": 1087140
    },
    {
      "epoch": 1.7791611843181923,
      "grad_norm": 0.6927214860916138,
      "learning_rate": 6.484123271153049e-06,
      "loss": 0.0194,
      "step": 1087160
    },
    {
      "epoch": 1.7791939147568456,
      "grad_norm": 0.3809884190559387,
      "learning_rate": 6.484057378939531e-06,
      "loss": 0.0228,
      "step": 1087180
    },
    {
      "epoch": 1.7792266451954988,
      "grad_norm": 1.2115240097045898,
      "learning_rate": 6.483991486726015e-06,
      "loss": 0.0227,
      "step": 1087200
    },
    {
      "epoch": 1.7792593756341524,
      "grad_norm": 0.3604985475540161,
      "learning_rate": 6.483925594512497e-06,
      "loss": 0.0223,
      "step": 1087220
    },
    {
      "epoch": 1.7792921060728055,
      "grad_norm": 1.3044565916061401,
      "learning_rate": 6.48385970229898e-06,
      "loss": 0.0239,
      "step": 1087240
    },
    {
      "epoch": 1.779324836511459,
      "grad_norm": 1.2231202125549316,
      "learning_rate": 6.483793810085462e-06,
      "loss": 0.0239,
      "step": 1087260
    },
    {
      "epoch": 1.7793575669501123,
      "grad_norm": 0.9012696146965027,
      "learning_rate": 6.483727917871946e-06,
      "loss": 0.0199,
      "step": 1087280
    },
    {
      "epoch": 1.7793902973887656,
      "grad_norm": 0.6808004975318909,
      "learning_rate": 6.483662025658428e-06,
      "loss": 0.0168,
      "step": 1087300
    },
    {
      "epoch": 1.779423027827419,
      "grad_norm": 1.464092493057251,
      "learning_rate": 6.483596133444911e-06,
      "loss": 0.0168,
      "step": 1087320
    },
    {
      "epoch": 1.7794557582660722,
      "grad_norm": 0.2712927758693695,
      "learning_rate": 6.483530241231394e-06,
      "loss": 0.0162,
      "step": 1087340
    },
    {
      "epoch": 1.7794884887047258,
      "grad_norm": 0.34478262066841125,
      "learning_rate": 6.4834643490178775e-06,
      "loss": 0.0118,
      "step": 1087360
    },
    {
      "epoch": 1.779521219143379,
      "grad_norm": 0.5851582288742065,
      "learning_rate": 6.483398456804359e-06,
      "loss": 0.0145,
      "step": 1087380
    },
    {
      "epoch": 1.7795539495820323,
      "grad_norm": 1.277019739151001,
      "learning_rate": 6.483332564590843e-06,
      "loss": 0.0213,
      "step": 1087400
    },
    {
      "epoch": 1.7795866800206857,
      "grad_norm": 0.203025221824646,
      "learning_rate": 6.483266672377325e-06,
      "loss": 0.0153,
      "step": 1087420
    },
    {
      "epoch": 1.779619410459339,
      "grad_norm": 1.539697527885437,
      "learning_rate": 6.4832007801638084e-06,
      "loss": 0.016,
      "step": 1087440
    },
    {
      "epoch": 1.7796521408979924,
      "grad_norm": 0.5450804233551025,
      "learning_rate": 6.483134887950291e-06,
      "loss": 0.0177,
      "step": 1087460
    },
    {
      "epoch": 1.7796848713366455,
      "grad_norm": 0.74399334192276,
      "learning_rate": 6.483068995736774e-06,
      "loss": 0.0219,
      "step": 1087480
    },
    {
      "epoch": 1.7797176017752991,
      "grad_norm": 0.3801519572734833,
      "learning_rate": 6.4830031035232575e-06,
      "loss": 0.0146,
      "step": 1087500
    },
    {
      "epoch": 1.7797503322139523,
      "grad_norm": 1.0321664810180664,
      "learning_rate": 6.48293721130974e-06,
      "loss": 0.028,
      "step": 1087520
    },
    {
      "epoch": 1.7797830626526057,
      "grad_norm": 0.8411241173744202,
      "learning_rate": 6.482871319096223e-06,
      "loss": 0.0199,
      "step": 1087540
    },
    {
      "epoch": 1.779815793091259,
      "grad_norm": 0.7393056750297546,
      "learning_rate": 6.482805426882706e-06,
      "loss": 0.0186,
      "step": 1087560
    },
    {
      "epoch": 1.7798485235299122,
      "grad_norm": 0.7292803525924683,
      "learning_rate": 6.482739534669189e-06,
      "loss": 0.0176,
      "step": 1087580
    },
    {
      "epoch": 1.7798812539685658,
      "grad_norm": 0.2875574231147766,
      "learning_rate": 6.482673642455671e-06,
      "loss": 0.0248,
      "step": 1087600
    },
    {
      "epoch": 1.779913984407219,
      "grad_norm": 0.332984983921051,
      "learning_rate": 6.482607750242155e-06,
      "loss": 0.0152,
      "step": 1087620
    },
    {
      "epoch": 1.7799467148458725,
      "grad_norm": 0.6817961931228638,
      "learning_rate": 6.482541858028637e-06,
      "loss": 0.0116,
      "step": 1087640
    },
    {
      "epoch": 1.7799794452845257,
      "grad_norm": 0.8551784157752991,
      "learning_rate": 6.48247596581512e-06,
      "loss": 0.0169,
      "step": 1087660
    },
    {
      "epoch": 1.780012175723179,
      "grad_norm": 0.6879386305809021,
      "learning_rate": 6.482410073601603e-06,
      "loss": 0.0197,
      "step": 1087680
    },
    {
      "epoch": 1.7800449061618324,
      "grad_norm": 0.2646743059158325,
      "learning_rate": 6.482344181388086e-06,
      "loss": 0.0178,
      "step": 1087700
    },
    {
      "epoch": 1.7800776366004856,
      "grad_norm": 0.050325218588113785,
      "learning_rate": 6.4822782891745685e-06,
      "loss": 0.0127,
      "step": 1087720
    },
    {
      "epoch": 1.7801103670391392,
      "grad_norm": 1.2551440000534058,
      "learning_rate": 6.482212396961052e-06,
      "loss": 0.0136,
      "step": 1087740
    },
    {
      "epoch": 1.7801430974777923,
      "grad_norm": 0.6048571467399597,
      "learning_rate": 6.482146504747534e-06,
      "loss": 0.0207,
      "step": 1087760
    },
    {
      "epoch": 1.7801758279164457,
      "grad_norm": 1.2358611822128296,
      "learning_rate": 6.4820806125340176e-06,
      "loss": 0.0298,
      "step": 1087780
    },
    {
      "epoch": 1.780208558355099,
      "grad_norm": 0.593786358833313,
      "learning_rate": 6.4820147203204995e-06,
      "loss": 0.0219,
      "step": 1087800
    },
    {
      "epoch": 1.7802412887937524,
      "grad_norm": 0.141371950507164,
      "learning_rate": 6.481948828106983e-06,
      "loss": 0.0191,
      "step": 1087820
    },
    {
      "epoch": 1.7802740192324058,
      "grad_norm": 0.42725875973701477,
      "learning_rate": 6.481882935893467e-06,
      "loss": 0.0192,
      "step": 1087840
    },
    {
      "epoch": 1.780306749671059,
      "grad_norm": 0.3843095600605011,
      "learning_rate": 6.4818170436799485e-06,
      "loss": 0.0199,
      "step": 1087860
    },
    {
      "epoch": 1.7803394801097125,
      "grad_norm": 0.46068307757377625,
      "learning_rate": 6.481751151466432e-06,
      "loss": 0.0161,
      "step": 1087880
    },
    {
      "epoch": 1.7803722105483657,
      "grad_norm": 0.29219958186149597,
      "learning_rate": 6.481685259252915e-06,
      "loss": 0.0212,
      "step": 1087900
    },
    {
      "epoch": 1.780404940987019,
      "grad_norm": 0.2453460693359375,
      "learning_rate": 6.481619367039398e-06,
      "loss": 0.0167,
      "step": 1087920
    },
    {
      "epoch": 1.7804376714256724,
      "grad_norm": 0.1714816391468048,
      "learning_rate": 6.48155347482588e-06,
      "loss": 0.0273,
      "step": 1087940
    },
    {
      "epoch": 1.7804704018643258,
      "grad_norm": 0.8716426491737366,
      "learning_rate": 6.481487582612364e-06,
      "loss": 0.0117,
      "step": 1087960
    },
    {
      "epoch": 1.7805031323029792,
      "grad_norm": 0.5715981125831604,
      "learning_rate": 6.481421690398846e-06,
      "loss": 0.0134,
      "step": 1087980
    },
    {
      "epoch": 1.7805358627416323,
      "grad_norm": 0.5256865620613098,
      "learning_rate": 6.481355798185329e-06,
      "loss": 0.0203,
      "step": 1088000
    },
    {
      "epoch": 1.780568593180286,
      "grad_norm": 0.3085135519504547,
      "learning_rate": 6.481289905971811e-06,
      "loss": 0.0209,
      "step": 1088020
    },
    {
      "epoch": 1.780601323618939,
      "grad_norm": 0.3366594910621643,
      "learning_rate": 6.481224013758295e-06,
      "loss": 0.0198,
      "step": 1088040
    },
    {
      "epoch": 1.7806340540575925,
      "grad_norm": 0.9742546081542969,
      "learning_rate": 6.481158121544778e-06,
      "loss": 0.0135,
      "step": 1088060
    },
    {
      "epoch": 1.7806667844962458,
      "grad_norm": 0.6310577392578125,
      "learning_rate": 6.48109222933126e-06,
      "loss": 0.0175,
      "step": 1088080
    },
    {
      "epoch": 1.7806995149348992,
      "grad_norm": 0.3934876322746277,
      "learning_rate": 6.481026337117743e-06,
      "loss": 0.0119,
      "step": 1088100
    },
    {
      "epoch": 1.7807322453735526,
      "grad_norm": 0.45832619071006775,
      "learning_rate": 6.480960444904227e-06,
      "loss": 0.0215,
      "step": 1088120
    },
    {
      "epoch": 1.7807649758122057,
      "grad_norm": 0.19017794728279114,
      "learning_rate": 6.4808945526907086e-06,
      "loss": 0.0121,
      "step": 1088140
    },
    {
      "epoch": 1.7807977062508593,
      "grad_norm": 0.5804161429405212,
      "learning_rate": 6.480828660477192e-06,
      "loss": 0.0183,
      "step": 1088160
    },
    {
      "epoch": 1.7808304366895125,
      "grad_norm": 0.7278818488121033,
      "learning_rate": 6.480762768263676e-06,
      "loss": 0.022,
      "step": 1088180
    },
    {
      "epoch": 1.7808631671281658,
      "grad_norm": 0.3470412790775299,
      "learning_rate": 6.480696876050158e-06,
      "loss": 0.0166,
      "step": 1088200
    },
    {
      "epoch": 1.7808958975668192,
      "grad_norm": 0.8929350972175598,
      "learning_rate": 6.480630983836641e-06,
      "loss": 0.0244,
      "step": 1088220
    },
    {
      "epoch": 1.7809286280054726,
      "grad_norm": 1.194097876548767,
      "learning_rate": 6.480565091623123e-06,
      "loss": 0.013,
      "step": 1088240
    },
    {
      "epoch": 1.780961358444126,
      "grad_norm": 0.17493900656700134,
      "learning_rate": 6.480499199409607e-06,
      "loss": 0.0238,
      "step": 1088260
    },
    {
      "epoch": 1.780994088882779,
      "grad_norm": 0.5336042046546936,
      "learning_rate": 6.480433307196089e-06,
      "loss": 0.0185,
      "step": 1088280
    },
    {
      "epoch": 1.7810268193214327,
      "grad_norm": 0.21441563963890076,
      "learning_rate": 6.480367414982572e-06,
      "loss": 0.0221,
      "step": 1088300
    },
    {
      "epoch": 1.7810595497600858,
      "grad_norm": 0.3541359603404999,
      "learning_rate": 6.480301522769055e-06,
      "loss": 0.017,
      "step": 1088320
    },
    {
      "epoch": 1.7810922801987392,
      "grad_norm": 0.10240274667739868,
      "learning_rate": 6.480235630555538e-06,
      "loss": 0.0143,
      "step": 1088340
    },
    {
      "epoch": 1.7811250106373926,
      "grad_norm": 0.4650728404521942,
      "learning_rate": 6.48016973834202e-06,
      "loss": 0.014,
      "step": 1088360
    },
    {
      "epoch": 1.7811577410760457,
      "grad_norm": 0.9067944288253784,
      "learning_rate": 6.480103846128504e-06,
      "loss": 0.0129,
      "step": 1088380
    },
    {
      "epoch": 1.7811904715146993,
      "grad_norm": 0.26993387937545776,
      "learning_rate": 6.480037953914986e-06,
      "loss": 0.0189,
      "step": 1088400
    },
    {
      "epoch": 1.7812232019533525,
      "grad_norm": 0.18777640163898468,
      "learning_rate": 6.4799720617014695e-06,
      "loss": 0.0138,
      "step": 1088420
    },
    {
      "epoch": 1.781255932392006,
      "grad_norm": 0.5105229020118713,
      "learning_rate": 6.479906169487951e-06,
      "loss": 0.0173,
      "step": 1088440
    },
    {
      "epoch": 1.7812886628306592,
      "grad_norm": 0.7989529967308044,
      "learning_rate": 6.479840277274435e-06,
      "loss": 0.0187,
      "step": 1088460
    },
    {
      "epoch": 1.7813213932693126,
      "grad_norm": 0.18042439222335815,
      "learning_rate": 6.479774385060918e-06,
      "loss": 0.0173,
      "step": 1088480
    },
    {
      "epoch": 1.781354123707966,
      "grad_norm": 0.36991050839424133,
      "learning_rate": 6.4797084928474004e-06,
      "loss": 0.0166,
      "step": 1088500
    },
    {
      "epoch": 1.7813868541466191,
      "grad_norm": 0.23529325425624847,
      "learning_rate": 6.479642600633884e-06,
      "loss": 0.0152,
      "step": 1088520
    },
    {
      "epoch": 1.7814195845852727,
      "grad_norm": 0.5091123580932617,
      "learning_rate": 6.479576708420367e-06,
      "loss": 0.0231,
      "step": 1088540
    },
    {
      "epoch": 1.7814523150239259,
      "grad_norm": 1.9326502084732056,
      "learning_rate": 6.4795108162068495e-06,
      "loss": 0.0205,
      "step": 1088560
    },
    {
      "epoch": 1.7814850454625792,
      "grad_norm": 0.4182414710521698,
      "learning_rate": 6.479444923993332e-06,
      "loss": 0.0184,
      "step": 1088580
    },
    {
      "epoch": 1.7815177759012326,
      "grad_norm": 0.48836061358451843,
      "learning_rate": 6.479379031779816e-06,
      "loss": 0.0175,
      "step": 1088600
    },
    {
      "epoch": 1.781550506339886,
      "grad_norm": 0.509629487991333,
      "learning_rate": 6.479313139566298e-06,
      "loss": 0.0225,
      "step": 1088620
    },
    {
      "epoch": 1.7815832367785394,
      "grad_norm": 0.9040552973747253,
      "learning_rate": 6.479247247352781e-06,
      "loss": 0.0165,
      "step": 1088640
    },
    {
      "epoch": 1.7816159672171925,
      "grad_norm": 1.3633639812469482,
      "learning_rate": 6.479181355139263e-06,
      "loss": 0.0203,
      "step": 1088660
    },
    {
      "epoch": 1.781648697655846,
      "grad_norm": 0.22882075607776642,
      "learning_rate": 6.479115462925747e-06,
      "loss": 0.013,
      "step": 1088680
    },
    {
      "epoch": 1.7816814280944993,
      "grad_norm": 0.3026663064956665,
      "learning_rate": 6.4790495707122295e-06,
      "loss": 0.0109,
      "step": 1088700
    },
    {
      "epoch": 1.7817141585331526,
      "grad_norm": 0.13074924051761627,
      "learning_rate": 6.478983678498712e-06,
      "loss": 0.0165,
      "step": 1088720
    },
    {
      "epoch": 1.781746888971806,
      "grad_norm": 0.49753451347351074,
      "learning_rate": 6.478917786285195e-06,
      "loss": 0.0139,
      "step": 1088740
    },
    {
      "epoch": 1.7817796194104594,
      "grad_norm": 0.500738799571991,
      "learning_rate": 6.478851894071679e-06,
      "loss": 0.0223,
      "step": 1088760
    },
    {
      "epoch": 1.7818123498491127,
      "grad_norm": 0.262791246175766,
      "learning_rate": 6.4787860018581605e-06,
      "loss": 0.0245,
      "step": 1088780
    },
    {
      "epoch": 1.781845080287766,
      "grad_norm": 1.3074802160263062,
      "learning_rate": 6.478720109644644e-06,
      "loss": 0.0219,
      "step": 1088800
    },
    {
      "epoch": 1.7818778107264195,
      "grad_norm": 0.9785941243171692,
      "learning_rate": 6.478654217431126e-06,
      "loss": 0.0195,
      "step": 1088820
    },
    {
      "epoch": 1.7819105411650726,
      "grad_norm": 0.5562183260917664,
      "learning_rate": 6.4785883252176095e-06,
      "loss": 0.0125,
      "step": 1088840
    },
    {
      "epoch": 1.781943271603726,
      "grad_norm": 2.5583951473236084,
      "learning_rate": 6.478522433004092e-06,
      "loss": 0.0302,
      "step": 1088860
    },
    {
      "epoch": 1.7819760020423794,
      "grad_norm": 0.24041490256786346,
      "learning_rate": 6.478456540790575e-06,
      "loss": 0.023,
      "step": 1088880
    },
    {
      "epoch": 1.7820087324810328,
      "grad_norm": 1.0697131156921387,
      "learning_rate": 6.478390648577059e-06,
      "loss": 0.0252,
      "step": 1088900
    },
    {
      "epoch": 1.7820414629196861,
      "grad_norm": 0.1916976273059845,
      "learning_rate": 6.478324756363541e-06,
      "loss": 0.0163,
      "step": 1088920
    },
    {
      "epoch": 1.7820741933583393,
      "grad_norm": 0.31108805537223816,
      "learning_rate": 6.478258864150024e-06,
      "loss": 0.0273,
      "step": 1088940
    },
    {
      "epoch": 1.7821069237969929,
      "grad_norm": 0.1591225564479828,
      "learning_rate": 6.478192971936507e-06,
      "loss": 0.0167,
      "step": 1088960
    },
    {
      "epoch": 1.782139654235646,
      "grad_norm": 2.630409002304077,
      "learning_rate": 6.47812707972299e-06,
      "loss": 0.0193,
      "step": 1088980
    },
    {
      "epoch": 1.7821723846742994,
      "grad_norm": 1.2261019945144653,
      "learning_rate": 6.478061187509472e-06,
      "loss": 0.0141,
      "step": 1089000
    },
    {
      "epoch": 1.7822051151129528,
      "grad_norm": 0.57923823595047,
      "learning_rate": 6.477995295295956e-06,
      "loss": 0.0168,
      "step": 1089020
    },
    {
      "epoch": 1.7822378455516061,
      "grad_norm": 0.3005230128765106,
      "learning_rate": 6.477929403082438e-06,
      "loss": 0.0207,
      "step": 1089040
    },
    {
      "epoch": 1.7822705759902595,
      "grad_norm": 0.7520186901092529,
      "learning_rate": 6.477863510868921e-06,
      "loss": 0.0199,
      "step": 1089060
    },
    {
      "epoch": 1.7823033064289127,
      "grad_norm": 0.2899145781993866,
      "learning_rate": 6.477797618655403e-06,
      "loss": 0.0224,
      "step": 1089080
    },
    {
      "epoch": 1.7823360368675663,
      "grad_norm": 0.18493309617042542,
      "learning_rate": 6.477731726441887e-06,
      "loss": 0.0192,
      "step": 1089100
    },
    {
      "epoch": 1.7823687673062194,
      "grad_norm": 0.6607695817947388,
      "learning_rate": 6.47766583422837e-06,
      "loss": 0.0224,
      "step": 1089120
    },
    {
      "epoch": 1.7824014977448728,
      "grad_norm": 0.6355273127555847,
      "learning_rate": 6.477599942014853e-06,
      "loss": 0.0216,
      "step": 1089140
    },
    {
      "epoch": 1.7824342281835261,
      "grad_norm": 0.5196930766105652,
      "learning_rate": 6.477534049801335e-06,
      "loss": 0.0143,
      "step": 1089160
    },
    {
      "epoch": 1.7824669586221793,
      "grad_norm": 0.1676764339208603,
      "learning_rate": 6.477468157587819e-06,
      "loss": 0.0263,
      "step": 1089180
    },
    {
      "epoch": 1.782499689060833,
      "grad_norm": 0.437152624130249,
      "learning_rate": 6.4774022653743006e-06,
      "loss": 0.0222,
      "step": 1089200
    },
    {
      "epoch": 1.782532419499486,
      "grad_norm": 0.3371538817882538,
      "learning_rate": 6.477336373160784e-06,
      "loss": 0.0098,
      "step": 1089220
    },
    {
      "epoch": 1.7825651499381396,
      "grad_norm": 0.4171925485134125,
      "learning_rate": 6.477270480947268e-06,
      "loss": 0.0171,
      "step": 1089240
    },
    {
      "epoch": 1.7825978803767928,
      "grad_norm": 0.3300110399723053,
      "learning_rate": 6.47720458873375e-06,
      "loss": 0.0161,
      "step": 1089260
    },
    {
      "epoch": 1.7826306108154462,
      "grad_norm": 0.4191613495349884,
      "learning_rate": 6.477138696520233e-06,
      "loss": 0.0143,
      "step": 1089280
    },
    {
      "epoch": 1.7826633412540995,
      "grad_norm": 0.5044807195663452,
      "learning_rate": 6.477072804306715e-06,
      "loss": 0.0213,
      "step": 1089300
    },
    {
      "epoch": 1.7826960716927527,
      "grad_norm": 0.22723227739334106,
      "learning_rate": 6.477006912093199e-06,
      "loss": 0.0211,
      "step": 1089320
    },
    {
      "epoch": 1.7827288021314063,
      "grad_norm": 1.053104281425476,
      "learning_rate": 6.4769410198796814e-06,
      "loss": 0.0162,
      "step": 1089340
    },
    {
      "epoch": 1.7827615325700594,
      "grad_norm": 0.36429858207702637,
      "learning_rate": 6.476875127666164e-06,
      "loss": 0.0122,
      "step": 1089360
    },
    {
      "epoch": 1.7827942630087128,
      "grad_norm": 1.3850194215774536,
      "learning_rate": 6.476809235452647e-06,
      "loss": 0.0212,
      "step": 1089380
    },
    {
      "epoch": 1.7828269934473662,
      "grad_norm": 0.11675805598497391,
      "learning_rate": 6.4767433432391305e-06,
      "loss": 0.0208,
      "step": 1089400
    },
    {
      "epoch": 1.7828597238860195,
      "grad_norm": 0.650428295135498,
      "learning_rate": 6.476677451025612e-06,
      "loss": 0.0182,
      "step": 1089420
    },
    {
      "epoch": 1.782892454324673,
      "grad_norm": 0.6416979432106018,
      "learning_rate": 6.476611558812096e-06,
      "loss": 0.024,
      "step": 1089440
    },
    {
      "epoch": 1.782925184763326,
      "grad_norm": 1.10227370262146,
      "learning_rate": 6.476545666598578e-06,
      "loss": 0.0185,
      "step": 1089460
    },
    {
      "epoch": 1.7829579152019797,
      "grad_norm": 0.4903438985347748,
      "learning_rate": 6.4764797743850615e-06,
      "loss": 0.0339,
      "step": 1089480
    },
    {
      "epoch": 1.7829906456406328,
      "grad_norm": 0.879280149936676,
      "learning_rate": 6.476413882171544e-06,
      "loss": 0.0271,
      "step": 1089500
    },
    {
      "epoch": 1.7830233760792862,
      "grad_norm": 0.4821455478668213,
      "learning_rate": 6.476347989958027e-06,
      "loss": 0.0177,
      "step": 1089520
    },
    {
      "epoch": 1.7830561065179396,
      "grad_norm": 0.28479453921318054,
      "learning_rate": 6.47628209774451e-06,
      "loss": 0.0202,
      "step": 1089540
    },
    {
      "epoch": 1.783088836956593,
      "grad_norm": 1.6940484046936035,
      "learning_rate": 6.476216205530993e-06,
      "loss": 0.0215,
      "step": 1089560
    },
    {
      "epoch": 1.7831215673952463,
      "grad_norm": 1.1881123781204224,
      "learning_rate": 6.476150313317476e-06,
      "loss": 0.0219,
      "step": 1089580
    },
    {
      "epoch": 1.7831542978338994,
      "grad_norm": 0.2661999762058258,
      "learning_rate": 6.476084421103959e-06,
      "loss": 0.0151,
      "step": 1089600
    },
    {
      "epoch": 1.783187028272553,
      "grad_norm": 0.2917608320713043,
      "learning_rate": 6.476018528890442e-06,
      "loss": 0.0155,
      "step": 1089620
    },
    {
      "epoch": 1.7832197587112062,
      "grad_norm": 0.6352883577346802,
      "learning_rate": 6.475952636676924e-06,
      "loss": 0.0228,
      "step": 1089640
    },
    {
      "epoch": 1.7832524891498596,
      "grad_norm": 0.28732311725616455,
      "learning_rate": 6.475886744463408e-06,
      "loss": 0.0241,
      "step": 1089660
    },
    {
      "epoch": 1.783285219588513,
      "grad_norm": 0.43239760398864746,
      "learning_rate": 6.47582085224989e-06,
      "loss": 0.014,
      "step": 1089680
    },
    {
      "epoch": 1.7833179500271663,
      "grad_norm": 0.7085054516792297,
      "learning_rate": 6.475754960036373e-06,
      "loss": 0.0239,
      "step": 1089700
    },
    {
      "epoch": 1.7833506804658197,
      "grad_norm": 0.7351522445678711,
      "learning_rate": 6.475689067822856e-06,
      "loss": 0.0159,
      "step": 1089720
    },
    {
      "epoch": 1.7833834109044728,
      "grad_norm": 2.9167230129241943,
      "learning_rate": 6.475623175609339e-06,
      "loss": 0.0247,
      "step": 1089740
    },
    {
      "epoch": 1.7834161413431264,
      "grad_norm": 0.6103605628013611,
      "learning_rate": 6.4755572833958215e-06,
      "loss": 0.0166,
      "step": 1089760
    },
    {
      "epoch": 1.7834488717817796,
      "grad_norm": 0.2621679902076721,
      "learning_rate": 6.475491391182305e-06,
      "loss": 0.0178,
      "step": 1089780
    },
    {
      "epoch": 1.783481602220433,
      "grad_norm": 0.1726199984550476,
      "learning_rate": 6.475425498968787e-06,
      "loss": 0.0177,
      "step": 1089800
    },
    {
      "epoch": 1.7835143326590863,
      "grad_norm": 0.34650176763534546,
      "learning_rate": 6.4753596067552706e-06,
      "loss": 0.0189,
      "step": 1089820
    },
    {
      "epoch": 1.7835470630977395,
      "grad_norm": 0.6239224672317505,
      "learning_rate": 6.4752937145417525e-06,
      "loss": 0.0219,
      "step": 1089840
    },
    {
      "epoch": 1.783579793536393,
      "grad_norm": 0.1838468760251999,
      "learning_rate": 6.475227822328236e-06,
      "loss": 0.0224,
      "step": 1089860
    },
    {
      "epoch": 1.7836125239750462,
      "grad_norm": 0.1994769126176834,
      "learning_rate": 6.475161930114719e-06,
      "loss": 0.019,
      "step": 1089880
    },
    {
      "epoch": 1.7836452544136998,
      "grad_norm": 0.15088161826133728,
      "learning_rate": 6.4750960379012015e-06,
      "loss": 0.0203,
      "step": 1089900
    },
    {
      "epoch": 1.783677984852353,
      "grad_norm": 0.17379175126552582,
      "learning_rate": 6.475030145687684e-06,
      "loss": 0.0164,
      "step": 1089920
    },
    {
      "epoch": 1.7837107152910063,
      "grad_norm": 0.23364649713039398,
      "learning_rate": 6.474964253474168e-06,
      "loss": 0.0189,
      "step": 1089940
    },
    {
      "epoch": 1.7837434457296597,
      "grad_norm": 0.9655233025550842,
      "learning_rate": 6.474898361260651e-06,
      "loss": 0.0195,
      "step": 1089960
    },
    {
      "epoch": 1.7837761761683129,
      "grad_norm": 0.3456270694732666,
      "learning_rate": 6.474832469047133e-06,
      "loss": 0.0261,
      "step": 1089980
    },
    {
      "epoch": 1.7838089066069664,
      "grad_norm": 0.14507147669792175,
      "learning_rate": 6.474766576833617e-06,
      "loss": 0.0148,
      "step": 1090000
    },
    {
      "epoch": 1.7838416370456196,
      "grad_norm": 0.40160608291625977,
      "learning_rate": 6.474700684620099e-06,
      "loss": 0.0118,
      "step": 1090020
    },
    {
      "epoch": 1.783874367484273,
      "grad_norm": 0.36430102586746216,
      "learning_rate": 6.474634792406582e-06,
      "loss": 0.019,
      "step": 1090040
    },
    {
      "epoch": 1.7839070979229263,
      "grad_norm": 2.379796266555786,
      "learning_rate": 6.474568900193064e-06,
      "loss": 0.0217,
      "step": 1090060
    },
    {
      "epoch": 1.7839398283615797,
      "grad_norm": 0.2970697283744812,
      "learning_rate": 6.474503007979548e-06,
      "loss": 0.0116,
      "step": 1090080
    },
    {
      "epoch": 1.783972558800233,
      "grad_norm": 0.1980508714914322,
      "learning_rate": 6.47443711576603e-06,
      "loss": 0.0158,
      "step": 1090100
    },
    {
      "epoch": 1.7840052892388862,
      "grad_norm": 1.0231040716171265,
      "learning_rate": 6.474371223552513e-06,
      "loss": 0.0217,
      "step": 1090120
    },
    {
      "epoch": 1.7840380196775398,
      "grad_norm": 0.7299237251281738,
      "learning_rate": 6.474305331338996e-06,
      "loss": 0.0218,
      "step": 1090140
    },
    {
      "epoch": 1.784070750116193,
      "grad_norm": 0.1768852323293686,
      "learning_rate": 6.474239439125479e-06,
      "loss": 0.0166,
      "step": 1090160
    },
    {
      "epoch": 1.7841034805548464,
      "grad_norm": 0.1690342277288437,
      "learning_rate": 6.474173546911962e-06,
      "loss": 0.0143,
      "step": 1090180
    },
    {
      "epoch": 1.7841362109934997,
      "grad_norm": 1.0164976119995117,
      "learning_rate": 6.474107654698445e-06,
      "loss": 0.0159,
      "step": 1090200
    },
    {
      "epoch": 1.784168941432153,
      "grad_norm": 0.2994702160358429,
      "learning_rate": 6.474041762484927e-06,
      "loss": 0.0174,
      "step": 1090220
    },
    {
      "epoch": 1.7842016718708065,
      "grad_norm": 0.6459841132164001,
      "learning_rate": 6.473975870271411e-06,
      "loss": 0.02,
      "step": 1090240
    },
    {
      "epoch": 1.7842344023094596,
      "grad_norm": 0.5357953906059265,
      "learning_rate": 6.4739099780578925e-06,
      "loss": 0.0215,
      "step": 1090260
    },
    {
      "epoch": 1.7842671327481132,
      "grad_norm": 1.2515578269958496,
      "learning_rate": 6.473844085844376e-06,
      "loss": 0.0229,
      "step": 1090280
    },
    {
      "epoch": 1.7842998631867664,
      "grad_norm": 0.590277373790741,
      "learning_rate": 6.47377819363086e-06,
      "loss": 0.0195,
      "step": 1090300
    },
    {
      "epoch": 1.7843325936254197,
      "grad_norm": 0.8499365448951721,
      "learning_rate": 6.473712301417342e-06,
      "loss": 0.0207,
      "step": 1090320
    },
    {
      "epoch": 1.784365324064073,
      "grad_norm": 0.19625267386436462,
      "learning_rate": 6.473646409203825e-06,
      "loss": 0.0205,
      "step": 1090340
    },
    {
      "epoch": 1.7843980545027265,
      "grad_norm": 1.783392310142517,
      "learning_rate": 6.473580516990308e-06,
      "loss": 0.0175,
      "step": 1090360
    },
    {
      "epoch": 1.7844307849413799,
      "grad_norm": 0.28668877482414246,
      "learning_rate": 6.473514624776791e-06,
      "loss": 0.0245,
      "step": 1090380
    },
    {
      "epoch": 1.784463515380033,
      "grad_norm": 0.30487948656082153,
      "learning_rate": 6.473448732563273e-06,
      "loss": 0.0194,
      "step": 1090400
    },
    {
      "epoch": 1.7844962458186866,
      "grad_norm": 0.4810793697834015,
      "learning_rate": 6.473382840349757e-06,
      "loss": 0.0179,
      "step": 1090420
    },
    {
      "epoch": 1.7845289762573397,
      "grad_norm": 0.2531399726867676,
      "learning_rate": 6.473316948136239e-06,
      "loss": 0.0247,
      "step": 1090440
    },
    {
      "epoch": 1.7845617066959931,
      "grad_norm": 0.7081204652786255,
      "learning_rate": 6.4732510559227225e-06,
      "loss": 0.0179,
      "step": 1090460
    },
    {
      "epoch": 1.7845944371346465,
      "grad_norm": 0.5705333948135376,
      "learning_rate": 6.473185163709204e-06,
      "loss": 0.0152,
      "step": 1090480
    },
    {
      "epoch": 1.7846271675732999,
      "grad_norm": 0.6363921761512756,
      "learning_rate": 6.473119271495688e-06,
      "loss": 0.0291,
      "step": 1090500
    },
    {
      "epoch": 1.7846598980119532,
      "grad_norm": 0.40894097089767456,
      "learning_rate": 6.473053379282171e-06,
      "loss": 0.0148,
      "step": 1090520
    },
    {
      "epoch": 1.7846926284506064,
      "grad_norm": 0.2145862579345703,
      "learning_rate": 6.4729874870686534e-06,
      "loss": 0.0217,
      "step": 1090540
    },
    {
      "epoch": 1.78472535888926,
      "grad_norm": 0.23305481672286987,
      "learning_rate": 6.472921594855136e-06,
      "loss": 0.0236,
      "step": 1090560
    },
    {
      "epoch": 1.7847580893279131,
      "grad_norm": 0.23556165397167206,
      "learning_rate": 6.47285570264162e-06,
      "loss": 0.0122,
      "step": 1090580
    },
    {
      "epoch": 1.7847908197665665,
      "grad_norm": 0.20864075422286987,
      "learning_rate": 6.472789810428102e-06,
      "loss": 0.024,
      "step": 1090600
    },
    {
      "epoch": 1.7848235502052199,
      "grad_norm": 0.5219604969024658,
      "learning_rate": 6.472723918214585e-06,
      "loss": 0.017,
      "step": 1090620
    },
    {
      "epoch": 1.784856280643873,
      "grad_norm": 0.1352386623620987,
      "learning_rate": 6.472658026001069e-06,
      "loss": 0.0189,
      "step": 1090640
    },
    {
      "epoch": 1.7848890110825266,
      "grad_norm": 0.3789691925048828,
      "learning_rate": 6.472592133787551e-06,
      "loss": 0.0151,
      "step": 1090660
    },
    {
      "epoch": 1.7849217415211798,
      "grad_norm": 1.2812573909759521,
      "learning_rate": 6.472526241574034e-06,
      "loss": 0.0238,
      "step": 1090680
    },
    {
      "epoch": 1.7849544719598334,
      "grad_norm": 0.9748283624649048,
      "learning_rate": 6.472460349360516e-06,
      "loss": 0.0088,
      "step": 1090700
    },
    {
      "epoch": 1.7849872023984865,
      "grad_norm": 0.23092158138751984,
      "learning_rate": 6.472394457147e-06,
      "loss": 0.0189,
      "step": 1090720
    },
    {
      "epoch": 1.7850199328371399,
      "grad_norm": 0.7720562815666199,
      "learning_rate": 6.4723285649334825e-06,
      "loss": 0.0287,
      "step": 1090740
    },
    {
      "epoch": 1.7850526632757933,
      "grad_norm": 1.0785603523254395,
      "learning_rate": 6.472262672719965e-06,
      "loss": 0.0194,
      "step": 1090760
    },
    {
      "epoch": 1.7850853937144464,
      "grad_norm": 0.11247210204601288,
      "learning_rate": 6.472196780506448e-06,
      "loss": 0.0249,
      "step": 1090780
    },
    {
      "epoch": 1.7851181241531,
      "grad_norm": 0.45536673069000244,
      "learning_rate": 6.472130888292932e-06,
      "loss": 0.0192,
      "step": 1090800
    },
    {
      "epoch": 1.7851508545917532,
      "grad_norm": 0.4974711537361145,
      "learning_rate": 6.4720649960794135e-06,
      "loss": 0.017,
      "step": 1090820
    },
    {
      "epoch": 1.7851835850304065,
      "grad_norm": 1.081725835800171,
      "learning_rate": 6.471999103865897e-06,
      "loss": 0.0191,
      "step": 1090840
    },
    {
      "epoch": 1.78521631546906,
      "grad_norm": 0.5354293584823608,
      "learning_rate": 6.471933211652379e-06,
      "loss": 0.0155,
      "step": 1090860
    },
    {
      "epoch": 1.7852490459077133,
      "grad_norm": 0.22655004262924194,
      "learning_rate": 6.4718673194388626e-06,
      "loss": 0.019,
      "step": 1090880
    },
    {
      "epoch": 1.7852817763463666,
      "grad_norm": 0.2990414798259735,
      "learning_rate": 6.471801427225345e-06,
      "loss": 0.0128,
      "step": 1090900
    },
    {
      "epoch": 1.7853145067850198,
      "grad_norm": 0.3249817490577698,
      "learning_rate": 6.471735535011828e-06,
      "loss": 0.0202,
      "step": 1090920
    },
    {
      "epoch": 1.7853472372236734,
      "grad_norm": 0.7771356105804443,
      "learning_rate": 6.471669642798311e-06,
      "loss": 0.0204,
      "step": 1090940
    },
    {
      "epoch": 1.7853799676623265,
      "grad_norm": 1.5854965448379517,
      "learning_rate": 6.471603750584794e-06,
      "loss": 0.0167,
      "step": 1090960
    },
    {
      "epoch": 1.78541269810098,
      "grad_norm": 0.4237397313117981,
      "learning_rate": 6.471537858371277e-06,
      "loss": 0.0201,
      "step": 1090980
    },
    {
      "epoch": 1.7854454285396333,
      "grad_norm": 0.3095865845680237,
      "learning_rate": 6.47147196615776e-06,
      "loss": 0.0184,
      "step": 1091000
    },
    {
      "epoch": 1.7854781589782867,
      "grad_norm": 1.0153063535690308,
      "learning_rate": 6.4714060739442434e-06,
      "loss": 0.0144,
      "step": 1091020
    },
    {
      "epoch": 1.78551088941694,
      "grad_norm": 0.36993208527565,
      "learning_rate": 6.471340181730725e-06,
      "loss": 0.0213,
      "step": 1091040
    },
    {
      "epoch": 1.7855436198555932,
      "grad_norm": 0.13784129917621613,
      "learning_rate": 6.471274289517209e-06,
      "loss": 0.0213,
      "step": 1091060
    },
    {
      "epoch": 1.7855763502942468,
      "grad_norm": 0.8471152186393738,
      "learning_rate": 6.471208397303691e-06,
      "loss": 0.0181,
      "step": 1091080
    },
    {
      "epoch": 1.7856090807329,
      "grad_norm": 0.7732781171798706,
      "learning_rate": 6.471142505090174e-06,
      "loss": 0.0244,
      "step": 1091100
    },
    {
      "epoch": 1.7856418111715533,
      "grad_norm": 0.623762845993042,
      "learning_rate": 6.471076612876656e-06,
      "loss": 0.0221,
      "step": 1091120
    },
    {
      "epoch": 1.7856745416102067,
      "grad_norm": 0.6260251402854919,
      "learning_rate": 6.47101072066314e-06,
      "loss": 0.0306,
      "step": 1091140
    },
    {
      "epoch": 1.78570727204886,
      "grad_norm": 0.477496862411499,
      "learning_rate": 6.470944828449623e-06,
      "loss": 0.0242,
      "step": 1091160
    },
    {
      "epoch": 1.7857400024875134,
      "grad_norm": 0.401973158121109,
      "learning_rate": 6.470878936236105e-06,
      "loss": 0.0161,
      "step": 1091180
    },
    {
      "epoch": 1.7857727329261666,
      "grad_norm": 0.22228220105171204,
      "learning_rate": 6.470813044022588e-06,
      "loss": 0.0183,
      "step": 1091200
    },
    {
      "epoch": 1.7858054633648202,
      "grad_norm": 0.8193192481994629,
      "learning_rate": 6.470747151809072e-06,
      "loss": 0.0148,
      "step": 1091220
    },
    {
      "epoch": 1.7858381938034733,
      "grad_norm": 0.2334824800491333,
      "learning_rate": 6.4706812595955536e-06,
      "loss": 0.0182,
      "step": 1091240
    },
    {
      "epoch": 1.7858709242421267,
      "grad_norm": 0.7677262425422668,
      "learning_rate": 6.470615367382037e-06,
      "loss": 0.0165,
      "step": 1091260
    },
    {
      "epoch": 1.78590365468078,
      "grad_norm": 0.1469307839870453,
      "learning_rate": 6.470549475168519e-06,
      "loss": 0.0136,
      "step": 1091280
    },
    {
      "epoch": 1.7859363851194334,
      "grad_norm": 0.5832127928733826,
      "learning_rate": 6.470483582955003e-06,
      "loss": 0.0332,
      "step": 1091300
    },
    {
      "epoch": 1.7859691155580868,
      "grad_norm": 0.17008426785469055,
      "learning_rate": 6.470417690741485e-06,
      "loss": 0.0195,
      "step": 1091320
    },
    {
      "epoch": 1.78600184599674,
      "grad_norm": 0.6047459244728088,
      "learning_rate": 6.470351798527968e-06,
      "loss": 0.0192,
      "step": 1091340
    },
    {
      "epoch": 1.7860345764353935,
      "grad_norm": 0.337417334318161,
      "learning_rate": 6.470285906314452e-06,
      "loss": 0.0184,
      "step": 1091360
    },
    {
      "epoch": 1.7860673068740467,
      "grad_norm": 1.4911370277404785,
      "learning_rate": 6.4702200141009344e-06,
      "loss": 0.0138,
      "step": 1091380
    },
    {
      "epoch": 1.7861000373127,
      "grad_norm": 0.6640719175338745,
      "learning_rate": 6.470154121887417e-06,
      "loss": 0.0286,
      "step": 1091400
    },
    {
      "epoch": 1.7861327677513534,
      "grad_norm": 0.3393530547618866,
      "learning_rate": 6.4700882296739e-06,
      "loss": 0.0151,
      "step": 1091420
    },
    {
      "epoch": 1.7861654981900066,
      "grad_norm": 0.489931583404541,
      "learning_rate": 6.4700223374603835e-06,
      "loss": 0.0136,
      "step": 1091440
    },
    {
      "epoch": 1.7861982286286602,
      "grad_norm": 1.050968050956726,
      "learning_rate": 6.469956445246865e-06,
      "loss": 0.0162,
      "step": 1091460
    },
    {
      "epoch": 1.7862309590673133,
      "grad_norm": 0.5396685600280762,
      "learning_rate": 6.469890553033349e-06,
      "loss": 0.0205,
      "step": 1091480
    },
    {
      "epoch": 1.786263689505967,
      "grad_norm": 1.375799298286438,
      "learning_rate": 6.469824660819831e-06,
      "loss": 0.0192,
      "step": 1091500
    },
    {
      "epoch": 1.78629641994462,
      "grad_norm": 0.293025404214859,
      "learning_rate": 6.4697587686063145e-06,
      "loss": 0.0242,
      "step": 1091520
    },
    {
      "epoch": 1.7863291503832734,
      "grad_norm": 0.6481765508651733,
      "learning_rate": 6.469692876392797e-06,
      "loss": 0.0201,
      "step": 1091540
    },
    {
      "epoch": 1.7863618808219268,
      "grad_norm": 1.0894285440444946,
      "learning_rate": 6.46962698417928e-06,
      "loss": 0.0161,
      "step": 1091560
    },
    {
      "epoch": 1.78639461126058,
      "grad_norm": 0.6139749884605408,
      "learning_rate": 6.469561091965763e-06,
      "loss": 0.0168,
      "step": 1091580
    },
    {
      "epoch": 1.7864273416992336,
      "grad_norm": 0.3955070972442627,
      "learning_rate": 6.469495199752246e-06,
      "loss": 0.0152,
      "step": 1091600
    },
    {
      "epoch": 1.7864600721378867,
      "grad_norm": 0.8701744079589844,
      "learning_rate": 6.469429307538728e-06,
      "loss": 0.0208,
      "step": 1091620
    },
    {
      "epoch": 1.78649280257654,
      "grad_norm": 0.24618542194366455,
      "learning_rate": 6.469363415325212e-06,
      "loss": 0.0156,
      "step": 1091640
    },
    {
      "epoch": 1.7865255330151935,
      "grad_norm": 0.7795299291610718,
      "learning_rate": 6.469297523111694e-06,
      "loss": 0.0169,
      "step": 1091660
    },
    {
      "epoch": 1.7865582634538468,
      "grad_norm": 0.4790685772895813,
      "learning_rate": 6.469231630898177e-06,
      "loss": 0.0185,
      "step": 1091680
    },
    {
      "epoch": 1.7865909938925002,
      "grad_norm": 0.5019998550415039,
      "learning_rate": 6.469165738684661e-06,
      "loss": 0.0159,
      "step": 1091700
    },
    {
      "epoch": 1.7866237243311534,
      "grad_norm": 0.98652184009552,
      "learning_rate": 6.469099846471143e-06,
      "loss": 0.0213,
      "step": 1091720
    },
    {
      "epoch": 1.786656454769807,
      "grad_norm": 0.16649025678634644,
      "learning_rate": 6.469033954257626e-06,
      "loss": 0.0193,
      "step": 1091740
    },
    {
      "epoch": 1.78668918520846,
      "grad_norm": 0.5118898153305054,
      "learning_rate": 6.468968062044109e-06,
      "loss": 0.0132,
      "step": 1091760
    },
    {
      "epoch": 1.7867219156471135,
      "grad_norm": 0.3352794051170349,
      "learning_rate": 6.468902169830592e-06,
      "loss": 0.0149,
      "step": 1091780
    },
    {
      "epoch": 1.7867546460857668,
      "grad_norm": 0.44983288645744324,
      "learning_rate": 6.4688362776170745e-06,
      "loss": 0.0189,
      "step": 1091800
    },
    {
      "epoch": 1.7867873765244202,
      "grad_norm": 0.2627525329589844,
      "learning_rate": 6.468770385403558e-06,
      "loss": 0.014,
      "step": 1091820
    },
    {
      "epoch": 1.7868201069630736,
      "grad_norm": 1.8864436149597168,
      "learning_rate": 6.46870449319004e-06,
      "loss": 0.0163,
      "step": 1091840
    },
    {
      "epoch": 1.7868528374017267,
      "grad_norm": 0.375195175409317,
      "learning_rate": 6.4686386009765236e-06,
      "loss": 0.0176,
      "step": 1091860
    },
    {
      "epoch": 1.7868855678403803,
      "grad_norm": 0.5568264126777649,
      "learning_rate": 6.4685727087630055e-06,
      "loss": 0.0126,
      "step": 1091880
    },
    {
      "epoch": 1.7869182982790335,
      "grad_norm": 0.33196955919265747,
      "learning_rate": 6.468506816549489e-06,
      "loss": 0.0157,
      "step": 1091900
    },
    {
      "epoch": 1.7869510287176869,
      "grad_norm": 0.5022287964820862,
      "learning_rate": 6.468440924335972e-06,
      "loss": 0.018,
      "step": 1091920
    },
    {
      "epoch": 1.7869837591563402,
      "grad_norm": 0.7643707990646362,
      "learning_rate": 6.4683750321224545e-06,
      "loss": 0.0156,
      "step": 1091940
    },
    {
      "epoch": 1.7870164895949936,
      "grad_norm": 1.4662429094314575,
      "learning_rate": 6.468309139908937e-06,
      "loss": 0.0208,
      "step": 1091960
    },
    {
      "epoch": 1.787049220033647,
      "grad_norm": 0.45485660433769226,
      "learning_rate": 6.468243247695421e-06,
      "loss": 0.0176,
      "step": 1091980
    },
    {
      "epoch": 1.7870819504723001,
      "grad_norm": 0.2809015214443207,
      "learning_rate": 6.468177355481903e-06,
      "loss": 0.0168,
      "step": 1092000
    },
    {
      "epoch": 1.7871146809109537,
      "grad_norm": 0.26332345604896545,
      "learning_rate": 6.468111463268386e-06,
      "loss": 0.0159,
      "step": 1092020
    },
    {
      "epoch": 1.7871474113496069,
      "grad_norm": 0.24748218059539795,
      "learning_rate": 6.46804557105487e-06,
      "loss": 0.0111,
      "step": 1092040
    },
    {
      "epoch": 1.7871801417882602,
      "grad_norm": 0.17698043584823608,
      "learning_rate": 6.467979678841352e-06,
      "loss": 0.0254,
      "step": 1092060
    },
    {
      "epoch": 1.7872128722269136,
      "grad_norm": 0.6663296818733215,
      "learning_rate": 6.467913786627835e-06,
      "loss": 0.0336,
      "step": 1092080
    },
    {
      "epoch": 1.787245602665567,
      "grad_norm": 0.3494366705417633,
      "learning_rate": 6.467847894414317e-06,
      "loss": 0.0168,
      "step": 1092100
    },
    {
      "epoch": 1.7872783331042204,
      "grad_norm": 0.6560596823692322,
      "learning_rate": 6.467782002200801e-06,
      "loss": 0.0116,
      "step": 1092120
    },
    {
      "epoch": 1.7873110635428735,
      "grad_norm": 0.2925652265548706,
      "learning_rate": 6.467716109987283e-06,
      "loss": 0.0208,
      "step": 1092140
    },
    {
      "epoch": 1.787343793981527,
      "grad_norm": 0.45114967226982117,
      "learning_rate": 6.467650217773766e-06,
      "loss": 0.0162,
      "step": 1092160
    },
    {
      "epoch": 1.7873765244201802,
      "grad_norm": 0.23285362124443054,
      "learning_rate": 6.467584325560249e-06,
      "loss": 0.0172,
      "step": 1092180
    },
    {
      "epoch": 1.7874092548588336,
      "grad_norm": 0.24843306839466095,
      "learning_rate": 6.467518433346732e-06,
      "loss": 0.0153,
      "step": 1092200
    },
    {
      "epoch": 1.787441985297487,
      "grad_norm": 0.3282153010368347,
      "learning_rate": 6.467452541133215e-06,
      "loss": 0.0187,
      "step": 1092220
    },
    {
      "epoch": 1.7874747157361401,
      "grad_norm": 0.6454399228096008,
      "learning_rate": 6.467386648919698e-06,
      "loss": 0.0163,
      "step": 1092240
    },
    {
      "epoch": 1.7875074461747937,
      "grad_norm": 0.24518488347530365,
      "learning_rate": 6.46732075670618e-06,
      "loss": 0.0205,
      "step": 1092260
    },
    {
      "epoch": 1.7875401766134469,
      "grad_norm": 0.22452746331691742,
      "learning_rate": 6.467254864492664e-06,
      "loss": 0.0153,
      "step": 1092280
    },
    {
      "epoch": 1.7875729070521003,
      "grad_norm": 0.6167881488800049,
      "learning_rate": 6.4671889722791455e-06,
      "loss": 0.0175,
      "step": 1092300
    },
    {
      "epoch": 1.7876056374907536,
      "grad_norm": 0.39583486318588257,
      "learning_rate": 6.467123080065629e-06,
      "loss": 0.0193,
      "step": 1092320
    },
    {
      "epoch": 1.787638367929407,
      "grad_norm": 1.0444592237472534,
      "learning_rate": 6.467057187852112e-06,
      "loss": 0.0219,
      "step": 1092340
    },
    {
      "epoch": 1.7876710983680604,
      "grad_norm": 0.36252573132514954,
      "learning_rate": 6.466991295638595e-06,
      "loss": 0.019,
      "step": 1092360
    },
    {
      "epoch": 1.7877038288067135,
      "grad_norm": 1.617983102798462,
      "learning_rate": 6.466925403425077e-06,
      "loss": 0.0144,
      "step": 1092380
    },
    {
      "epoch": 1.7877365592453671,
      "grad_norm": 3.2819418907165527,
      "learning_rate": 6.466859511211561e-06,
      "loss": 0.0181,
      "step": 1092400
    },
    {
      "epoch": 1.7877692896840203,
      "grad_norm": 0.47023218870162964,
      "learning_rate": 6.466793618998044e-06,
      "loss": 0.0161,
      "step": 1092420
    },
    {
      "epoch": 1.7878020201226736,
      "grad_norm": 0.5759791135787964,
      "learning_rate": 6.466727726784526e-06,
      "loss": 0.0241,
      "step": 1092440
    },
    {
      "epoch": 1.787834750561327,
      "grad_norm": 0.08237090706825256,
      "learning_rate": 6.46666183457101e-06,
      "loss": 0.0148,
      "step": 1092460
    },
    {
      "epoch": 1.7878674809999804,
      "grad_norm": 0.5751988887786865,
      "learning_rate": 6.466595942357492e-06,
      "loss": 0.0165,
      "step": 1092480
    },
    {
      "epoch": 1.7879002114386338,
      "grad_norm": 0.23875105381011963,
      "learning_rate": 6.4665300501439755e-06,
      "loss": 0.0274,
      "step": 1092500
    },
    {
      "epoch": 1.787932941877287,
      "grad_norm": 0.3120787739753723,
      "learning_rate": 6.466464157930457e-06,
      "loss": 0.0178,
      "step": 1092520
    },
    {
      "epoch": 1.7879656723159405,
      "grad_norm": 0.31600338220596313,
      "learning_rate": 6.466398265716941e-06,
      "loss": 0.0174,
      "step": 1092540
    },
    {
      "epoch": 1.7879984027545937,
      "grad_norm": 0.7637401223182678,
      "learning_rate": 6.466332373503424e-06,
      "loss": 0.0194,
      "step": 1092560
    },
    {
      "epoch": 1.788031133193247,
      "grad_norm": 0.30957522988319397,
      "learning_rate": 6.4662664812899064e-06,
      "loss": 0.018,
      "step": 1092580
    },
    {
      "epoch": 1.7880638636319004,
      "grad_norm": 0.4399854242801666,
      "learning_rate": 6.466200589076389e-06,
      "loss": 0.0299,
      "step": 1092600
    },
    {
      "epoch": 1.7880965940705538,
      "grad_norm": 0.6334060430526733,
      "learning_rate": 6.466134696862873e-06,
      "loss": 0.0199,
      "step": 1092620
    },
    {
      "epoch": 1.7881293245092071,
      "grad_norm": 0.6792554259300232,
      "learning_rate": 6.466068804649355e-06,
      "loss": 0.0189,
      "step": 1092640
    },
    {
      "epoch": 1.7881620549478603,
      "grad_norm": 0.5255066752433777,
      "learning_rate": 6.466002912435838e-06,
      "loss": 0.0203,
      "step": 1092660
    },
    {
      "epoch": 1.7881947853865139,
      "grad_norm": 0.29727527499198914,
      "learning_rate": 6.46593702022232e-06,
      "loss": 0.018,
      "step": 1092680
    },
    {
      "epoch": 1.788227515825167,
      "grad_norm": 0.581479549407959,
      "learning_rate": 6.465871128008804e-06,
      "loss": 0.0154,
      "step": 1092700
    },
    {
      "epoch": 1.7882602462638204,
      "grad_norm": 0.14796678721904755,
      "learning_rate": 6.4658052357952865e-06,
      "loss": 0.0303,
      "step": 1092720
    },
    {
      "epoch": 1.7882929767024738,
      "grad_norm": 1.1469284296035767,
      "learning_rate": 6.465739343581769e-06,
      "loss": 0.0182,
      "step": 1092740
    },
    {
      "epoch": 1.7883257071411272,
      "grad_norm": 0.17707459628582,
      "learning_rate": 6.465673451368253e-06,
      "loss": 0.0155,
      "step": 1092760
    },
    {
      "epoch": 1.7883584375797805,
      "grad_norm": 0.0955190360546112,
      "learning_rate": 6.4656075591547355e-06,
      "loss": 0.021,
      "step": 1092780
    },
    {
      "epoch": 1.7883911680184337,
      "grad_norm": 0.6078179478645325,
      "learning_rate": 6.465541666941218e-06,
      "loss": 0.0162,
      "step": 1092800
    },
    {
      "epoch": 1.7884238984570873,
      "grad_norm": 0.6580467224121094,
      "learning_rate": 6.465475774727701e-06,
      "loss": 0.0198,
      "step": 1092820
    },
    {
      "epoch": 1.7884566288957404,
      "grad_norm": 1.669661521911621,
      "learning_rate": 6.465409882514185e-06,
      "loss": 0.0211,
      "step": 1092840
    },
    {
      "epoch": 1.7884893593343938,
      "grad_norm": 0.29968440532684326,
      "learning_rate": 6.4653439903006665e-06,
      "loss": 0.0178,
      "step": 1092860
    },
    {
      "epoch": 1.7885220897730472,
      "grad_norm": 2.295840263366699,
      "learning_rate": 6.46527809808715e-06,
      "loss": 0.0244,
      "step": 1092880
    },
    {
      "epoch": 1.7885548202117003,
      "grad_norm": 0.42907464504241943,
      "learning_rate": 6.465212205873632e-06,
      "loss": 0.0184,
      "step": 1092900
    },
    {
      "epoch": 1.788587550650354,
      "grad_norm": 0.37914004921913147,
      "learning_rate": 6.4651463136601156e-06,
      "loss": 0.016,
      "step": 1092920
    },
    {
      "epoch": 1.788620281089007,
      "grad_norm": 0.3214879035949707,
      "learning_rate": 6.4650804214465975e-06,
      "loss": 0.0185,
      "step": 1092940
    },
    {
      "epoch": 1.7886530115276607,
      "grad_norm": 0.3253955841064453,
      "learning_rate": 6.465014529233081e-06,
      "loss": 0.0166,
      "step": 1092960
    },
    {
      "epoch": 1.7886857419663138,
      "grad_norm": 1.9010318517684937,
      "learning_rate": 6.464948637019564e-06,
      "loss": 0.0188,
      "step": 1092980
    },
    {
      "epoch": 1.7887184724049672,
      "grad_norm": 0.12275553494691849,
      "learning_rate": 6.4648827448060465e-06,
      "loss": 0.0169,
      "step": 1093000
    },
    {
      "epoch": 1.7887512028436205,
      "grad_norm": 0.1446596384048462,
      "learning_rate": 6.464816852592529e-06,
      "loss": 0.0146,
      "step": 1093020
    },
    {
      "epoch": 1.7887839332822737,
      "grad_norm": 0.8003836274147034,
      "learning_rate": 6.464750960379013e-06,
      "loss": 0.0196,
      "step": 1093040
    },
    {
      "epoch": 1.7888166637209273,
      "grad_norm": 0.4186880886554718,
      "learning_rate": 6.464685068165495e-06,
      "loss": 0.0201,
      "step": 1093060
    },
    {
      "epoch": 1.7888493941595804,
      "grad_norm": 0.8466712832450867,
      "learning_rate": 6.464619175951978e-06,
      "loss": 0.0152,
      "step": 1093080
    },
    {
      "epoch": 1.7888821245982338,
      "grad_norm": 0.3510931134223938,
      "learning_rate": 6.464553283738462e-06,
      "loss": 0.0129,
      "step": 1093100
    },
    {
      "epoch": 1.7889148550368872,
      "grad_norm": 0.1920301467180252,
      "learning_rate": 6.464487391524944e-06,
      "loss": 0.022,
      "step": 1093120
    },
    {
      "epoch": 1.7889475854755406,
      "grad_norm": 0.1549382209777832,
      "learning_rate": 6.464421499311427e-06,
      "loss": 0.0122,
      "step": 1093140
    },
    {
      "epoch": 1.788980315914194,
      "grad_norm": 0.47403615713119507,
      "learning_rate": 6.464355607097909e-06,
      "loss": 0.0164,
      "step": 1093160
    },
    {
      "epoch": 1.789013046352847,
      "grad_norm": 0.3416392505168915,
      "learning_rate": 6.464289714884393e-06,
      "loss": 0.0174,
      "step": 1093180
    },
    {
      "epoch": 1.7890457767915007,
      "grad_norm": 0.9661351442337036,
      "learning_rate": 6.464223822670876e-06,
      "loss": 0.0208,
      "step": 1093200
    },
    {
      "epoch": 1.7890785072301538,
      "grad_norm": 0.8759148120880127,
      "learning_rate": 6.464157930457358e-06,
      "loss": 0.0182,
      "step": 1093220
    },
    {
      "epoch": 1.7891112376688072,
      "grad_norm": 0.2742811143398285,
      "learning_rate": 6.464092038243841e-06,
      "loss": 0.0221,
      "step": 1093240
    },
    {
      "epoch": 1.7891439681074606,
      "grad_norm": 0.7719189524650574,
      "learning_rate": 6.464026146030325e-06,
      "loss": 0.0242,
      "step": 1093260
    },
    {
      "epoch": 1.789176698546114,
      "grad_norm": 1.0871473550796509,
      "learning_rate": 6.4639602538168066e-06,
      "loss": 0.019,
      "step": 1093280
    },
    {
      "epoch": 1.7892094289847673,
      "grad_norm": 0.9839940667152405,
      "learning_rate": 6.46389436160329e-06,
      "loss": 0.0188,
      "step": 1093300
    },
    {
      "epoch": 1.7892421594234205,
      "grad_norm": 1.0681527853012085,
      "learning_rate": 6.463828469389772e-06,
      "loss": 0.0183,
      "step": 1093320
    },
    {
      "epoch": 1.789274889862074,
      "grad_norm": 0.14894971251487732,
      "learning_rate": 6.463762577176256e-06,
      "loss": 0.0137,
      "step": 1093340
    },
    {
      "epoch": 1.7893076203007272,
      "grad_norm": 0.2931216061115265,
      "learning_rate": 6.463696684962738e-06,
      "loss": 0.0181,
      "step": 1093360
    },
    {
      "epoch": 1.7893403507393806,
      "grad_norm": 0.26161476969718933,
      "learning_rate": 6.463630792749221e-06,
      "loss": 0.0192,
      "step": 1093380
    },
    {
      "epoch": 1.789373081178034,
      "grad_norm": 0.3496367037296295,
      "learning_rate": 6.463564900535704e-06,
      "loss": 0.0166,
      "step": 1093400
    },
    {
      "epoch": 1.7894058116166873,
      "grad_norm": 0.31944528222084045,
      "learning_rate": 6.4634990083221874e-06,
      "loss": 0.0161,
      "step": 1093420
    },
    {
      "epoch": 1.7894385420553407,
      "grad_norm": 0.39087334275245667,
      "learning_rate": 6.463433116108669e-06,
      "loss": 0.0181,
      "step": 1093440
    },
    {
      "epoch": 1.7894712724939938,
      "grad_norm": 2.7523648738861084,
      "learning_rate": 6.463367223895153e-06,
      "loss": 0.022,
      "step": 1093460
    },
    {
      "epoch": 1.7895040029326474,
      "grad_norm": 0.22541826963424683,
      "learning_rate": 6.4633013316816365e-06,
      "loss": 0.0217,
      "step": 1093480
    },
    {
      "epoch": 1.7895367333713006,
      "grad_norm": 1.2751224040985107,
      "learning_rate": 6.463235439468118e-06,
      "loss": 0.0226,
      "step": 1093500
    },
    {
      "epoch": 1.789569463809954,
      "grad_norm": 2.4354753494262695,
      "learning_rate": 6.463169547254602e-06,
      "loss": 0.0231,
      "step": 1093520
    },
    {
      "epoch": 1.7896021942486073,
      "grad_norm": 1.0375295877456665,
      "learning_rate": 6.463103655041084e-06,
      "loss": 0.0177,
      "step": 1093540
    },
    {
      "epoch": 1.7896349246872607,
      "grad_norm": 0.3158629238605499,
      "learning_rate": 6.4630377628275675e-06,
      "loss": 0.0133,
      "step": 1093560
    },
    {
      "epoch": 1.789667655125914,
      "grad_norm": 0.27832165360450745,
      "learning_rate": 6.46297187061405e-06,
      "loss": 0.0139,
      "step": 1093580
    },
    {
      "epoch": 1.7897003855645672,
      "grad_norm": 0.36441943049430847,
      "learning_rate": 6.462905978400533e-06,
      "loss": 0.0117,
      "step": 1093600
    },
    {
      "epoch": 1.7897331160032208,
      "grad_norm": 0.3137691915035248,
      "learning_rate": 6.462840086187016e-06,
      "loss": 0.0225,
      "step": 1093620
    },
    {
      "epoch": 1.789765846441874,
      "grad_norm": 1.6912497282028198,
      "learning_rate": 6.462774193973499e-06,
      "loss": 0.0187,
      "step": 1093640
    },
    {
      "epoch": 1.7897985768805273,
      "grad_norm": 0.30236154794692993,
      "learning_rate": 6.462708301759981e-06,
      "loss": 0.0175,
      "step": 1093660
    },
    {
      "epoch": 1.7898313073191807,
      "grad_norm": 0.34728196263313293,
      "learning_rate": 6.462642409546465e-06,
      "loss": 0.018,
      "step": 1093680
    },
    {
      "epoch": 1.7898640377578339,
      "grad_norm": 0.4639810621738434,
      "learning_rate": 6.462576517332947e-06,
      "loss": 0.021,
      "step": 1093700
    },
    {
      "epoch": 1.7898967681964875,
      "grad_norm": 0.21573451161384583,
      "learning_rate": 6.46251062511943e-06,
      "loss": 0.0113,
      "step": 1093720
    },
    {
      "epoch": 1.7899294986351406,
      "grad_norm": 0.3063683807849884,
      "learning_rate": 6.462444732905913e-06,
      "loss": 0.0258,
      "step": 1093740
    },
    {
      "epoch": 1.7899622290737942,
      "grad_norm": 0.5522088408470154,
      "learning_rate": 6.462378840692396e-06,
      "loss": 0.0204,
      "step": 1093760
    },
    {
      "epoch": 1.7899949595124474,
      "grad_norm": 0.3812948763370514,
      "learning_rate": 6.4623129484788784e-06,
      "loss": 0.0162,
      "step": 1093780
    },
    {
      "epoch": 1.7900276899511007,
      "grad_norm": 0.3961474597454071,
      "learning_rate": 6.462247056265362e-06,
      "loss": 0.0202,
      "step": 1093800
    },
    {
      "epoch": 1.790060420389754,
      "grad_norm": 0.4102817177772522,
      "learning_rate": 6.462181164051845e-06,
      "loss": 0.0204,
      "step": 1093820
    },
    {
      "epoch": 1.7900931508284073,
      "grad_norm": 0.12953442335128784,
      "learning_rate": 6.4621152718383275e-06,
      "loss": 0.021,
      "step": 1093840
    },
    {
      "epoch": 1.7901258812670608,
      "grad_norm": 0.17870000004768372,
      "learning_rate": 6.462049379624811e-06,
      "loss": 0.0203,
      "step": 1093860
    },
    {
      "epoch": 1.790158611705714,
      "grad_norm": 0.9838453531265259,
      "learning_rate": 6.461983487411293e-06,
      "loss": 0.0144,
      "step": 1093880
    },
    {
      "epoch": 1.7901913421443674,
      "grad_norm": 0.519344687461853,
      "learning_rate": 6.461917595197777e-06,
      "loss": 0.0144,
      "step": 1093900
    },
    {
      "epoch": 1.7902240725830207,
      "grad_norm": 0.15687613189220428,
      "learning_rate": 6.4618517029842585e-06,
      "loss": 0.0217,
      "step": 1093920
    },
    {
      "epoch": 1.7902568030216741,
      "grad_norm": 0.4264429211616516,
      "learning_rate": 6.461785810770742e-06,
      "loss": 0.0173,
      "step": 1093940
    },
    {
      "epoch": 1.7902895334603275,
      "grad_norm": 0.394666850566864,
      "learning_rate": 6.461719918557224e-06,
      "loss": 0.0223,
      "step": 1093960
    },
    {
      "epoch": 1.7903222638989806,
      "grad_norm": 0.5838890671730042,
      "learning_rate": 6.4616540263437075e-06,
      "loss": 0.0151,
      "step": 1093980
    },
    {
      "epoch": 1.7903549943376342,
      "grad_norm": 0.383372962474823,
      "learning_rate": 6.46158813413019e-06,
      "loss": 0.0177,
      "step": 1094000
    },
    {
      "epoch": 1.7903877247762874,
      "grad_norm": 0.9420751333236694,
      "learning_rate": 6.461522241916673e-06,
      "loss": 0.0154,
      "step": 1094020
    },
    {
      "epoch": 1.7904204552149408,
      "grad_norm": 0.11715354025363922,
      "learning_rate": 6.461456349703156e-06,
      "loss": 0.0245,
      "step": 1094040
    },
    {
      "epoch": 1.7904531856535941,
      "grad_norm": 0.19445295631885529,
      "learning_rate": 6.461390457489639e-06,
      "loss": 0.0183,
      "step": 1094060
    },
    {
      "epoch": 1.7904859160922475,
      "grad_norm": 1.0015811920166016,
      "learning_rate": 6.461324565276121e-06,
      "loss": 0.0222,
      "step": 1094080
    },
    {
      "epoch": 1.7905186465309009,
      "grad_norm": 0.1708284616470337,
      "learning_rate": 6.461258673062605e-06,
      "loss": 0.0133,
      "step": 1094100
    },
    {
      "epoch": 1.790551376969554,
      "grad_norm": 0.5478436946868896,
      "learning_rate": 6.461192780849087e-06,
      "loss": 0.0152,
      "step": 1094120
    },
    {
      "epoch": 1.7905841074082076,
      "grad_norm": 0.9797654151916504,
      "learning_rate": 6.46112688863557e-06,
      "loss": 0.0234,
      "step": 1094140
    },
    {
      "epoch": 1.7906168378468608,
      "grad_norm": 0.8405101895332336,
      "learning_rate": 6.461060996422054e-06,
      "loss": 0.0236,
      "step": 1094160
    },
    {
      "epoch": 1.7906495682855141,
      "grad_norm": 0.7064056992530823,
      "learning_rate": 6.460995104208536e-06,
      "loss": 0.0168,
      "step": 1094180
    },
    {
      "epoch": 1.7906822987241675,
      "grad_norm": 0.13180285692214966,
      "learning_rate": 6.460929211995019e-06,
      "loss": 0.0202,
      "step": 1094200
    },
    {
      "epoch": 1.7907150291628209,
      "grad_norm": 0.5134096741676331,
      "learning_rate": 6.460863319781502e-06,
      "loss": 0.0193,
      "step": 1094220
    },
    {
      "epoch": 1.7907477596014743,
      "grad_norm": 0.8078036904335022,
      "learning_rate": 6.460797427567985e-06,
      "loss": 0.0209,
      "step": 1094240
    },
    {
      "epoch": 1.7907804900401274,
      "grad_norm": 0.3606381416320801,
      "learning_rate": 6.460731535354468e-06,
      "loss": 0.021,
      "step": 1094260
    },
    {
      "epoch": 1.790813220478781,
      "grad_norm": 0.17375697195529938,
      "learning_rate": 6.460665643140951e-06,
      "loss": 0.0142,
      "step": 1094280
    },
    {
      "epoch": 1.7908459509174341,
      "grad_norm": 0.6126991510391235,
      "learning_rate": 6.460599750927433e-06,
      "loss": 0.0124,
      "step": 1094300
    },
    {
      "epoch": 1.7908786813560875,
      "grad_norm": 0.6622626781463623,
      "learning_rate": 6.460533858713917e-06,
      "loss": 0.0173,
      "step": 1094320
    },
    {
      "epoch": 1.790911411794741,
      "grad_norm": 0.5972896814346313,
      "learning_rate": 6.4604679665003986e-06,
      "loss": 0.0242,
      "step": 1094340
    },
    {
      "epoch": 1.7909441422333943,
      "grad_norm": 0.6701776385307312,
      "learning_rate": 6.460402074286882e-06,
      "loss": 0.0198,
      "step": 1094360
    },
    {
      "epoch": 1.7909768726720476,
      "grad_norm": 0.09278431534767151,
      "learning_rate": 6.460336182073365e-06,
      "loss": 0.0128,
      "step": 1094380
    },
    {
      "epoch": 1.7910096031107008,
      "grad_norm": 0.4550592005252838,
      "learning_rate": 6.460270289859848e-06,
      "loss": 0.0144,
      "step": 1094400
    },
    {
      "epoch": 1.7910423335493544,
      "grad_norm": 0.8804372549057007,
      "learning_rate": 6.46020439764633e-06,
      "loss": 0.0195,
      "step": 1094420
    },
    {
      "epoch": 1.7910750639880075,
      "grad_norm": 0.5488700866699219,
      "learning_rate": 6.460138505432814e-06,
      "loss": 0.0201,
      "step": 1094440
    },
    {
      "epoch": 1.791107794426661,
      "grad_norm": 0.8080362677574158,
      "learning_rate": 6.460072613219296e-06,
      "loss": 0.0265,
      "step": 1094460
    },
    {
      "epoch": 1.7911405248653143,
      "grad_norm": 0.6417173743247986,
      "learning_rate": 6.460006721005779e-06,
      "loss": 0.0238,
      "step": 1094480
    },
    {
      "epoch": 1.7911732553039674,
      "grad_norm": 1.1234182119369507,
      "learning_rate": 6.459940828792263e-06,
      "loss": 0.0168,
      "step": 1094500
    },
    {
      "epoch": 1.791205985742621,
      "grad_norm": 0.5201354026794434,
      "learning_rate": 6.459874936578745e-06,
      "loss": 0.0164,
      "step": 1094520
    },
    {
      "epoch": 1.7912387161812742,
      "grad_norm": 0.7729844450950623,
      "learning_rate": 6.4598090443652285e-06,
      "loss": 0.0214,
      "step": 1094540
    },
    {
      "epoch": 1.7912714466199278,
      "grad_norm": 0.4216890037059784,
      "learning_rate": 6.45974315215171e-06,
      "loss": 0.0161,
      "step": 1094560
    },
    {
      "epoch": 1.791304177058581,
      "grad_norm": 0.3752012550830841,
      "learning_rate": 6.459677259938194e-06,
      "loss": 0.0325,
      "step": 1094580
    },
    {
      "epoch": 1.7913369074972343,
      "grad_norm": 0.23000985383987427,
      "learning_rate": 6.459611367724677e-06,
      "loss": 0.0205,
      "step": 1094600
    },
    {
      "epoch": 1.7913696379358877,
      "grad_norm": 0.48707646131515503,
      "learning_rate": 6.4595454755111594e-06,
      "loss": 0.0197,
      "step": 1094620
    },
    {
      "epoch": 1.7914023683745408,
      "grad_norm": 0.347255676984787,
      "learning_rate": 6.459479583297642e-06,
      "loss": 0.0218,
      "step": 1094640
    },
    {
      "epoch": 1.7914350988131944,
      "grad_norm": 0.16751059889793396,
      "learning_rate": 6.459413691084126e-06,
      "loss": 0.0202,
      "step": 1094660
    },
    {
      "epoch": 1.7914678292518476,
      "grad_norm": 0.42240485548973083,
      "learning_rate": 6.459347798870608e-06,
      "loss": 0.0219,
      "step": 1094680
    },
    {
      "epoch": 1.791500559690501,
      "grad_norm": 0.30313950777053833,
      "learning_rate": 6.459281906657091e-06,
      "loss": 0.0202,
      "step": 1094700
    },
    {
      "epoch": 1.7915332901291543,
      "grad_norm": 1.2211209535598755,
      "learning_rate": 6.459216014443573e-06,
      "loss": 0.0166,
      "step": 1094720
    },
    {
      "epoch": 1.7915660205678077,
      "grad_norm": 0.27613526582717896,
      "learning_rate": 6.459150122230057e-06,
      "loss": 0.0188,
      "step": 1094740
    },
    {
      "epoch": 1.791598751006461,
      "grad_norm": 1.0826880931854248,
      "learning_rate": 6.4590842300165395e-06,
      "loss": 0.0221,
      "step": 1094760
    },
    {
      "epoch": 1.7916314814451142,
      "grad_norm": 0.30077871680259705,
      "learning_rate": 6.459018337803022e-06,
      "loss": 0.0158,
      "step": 1094780
    },
    {
      "epoch": 1.7916642118837678,
      "grad_norm": 4.031558036804199,
      "learning_rate": 6.458952445589505e-06,
      "loss": 0.0144,
      "step": 1094800
    },
    {
      "epoch": 1.791696942322421,
      "grad_norm": 0.37768223881721497,
      "learning_rate": 6.4588865533759885e-06,
      "loss": 0.0116,
      "step": 1094820
    },
    {
      "epoch": 1.7917296727610743,
      "grad_norm": 0.5085431337356567,
      "learning_rate": 6.4588206611624704e-06,
      "loss": 0.0163,
      "step": 1094840
    },
    {
      "epoch": 1.7917624031997277,
      "grad_norm": 0.1382378339767456,
      "learning_rate": 6.458754768948954e-06,
      "loss": 0.0189,
      "step": 1094860
    },
    {
      "epoch": 1.791795133638381,
      "grad_norm": 10.444278717041016,
      "learning_rate": 6.458688876735438e-06,
      "loss": 0.0256,
      "step": 1094880
    },
    {
      "epoch": 1.7918278640770344,
      "grad_norm": 0.8473899364471436,
      "learning_rate": 6.4586229845219195e-06,
      "loss": 0.0182,
      "step": 1094900
    },
    {
      "epoch": 1.7918605945156876,
      "grad_norm": 1.901545524597168,
      "learning_rate": 6.458557092308403e-06,
      "loss": 0.022,
      "step": 1094920
    },
    {
      "epoch": 1.7918933249543412,
      "grad_norm": 0.2333585023880005,
      "learning_rate": 6.458491200094885e-06,
      "loss": 0.0119,
      "step": 1094940
    },
    {
      "epoch": 1.7919260553929943,
      "grad_norm": 1.5234755277633667,
      "learning_rate": 6.4584253078813686e-06,
      "loss": 0.0267,
      "step": 1094960
    },
    {
      "epoch": 1.7919587858316477,
      "grad_norm": 0.6112486720085144,
      "learning_rate": 6.4583594156678505e-06,
      "loss": 0.0241,
      "step": 1094980
    },
    {
      "epoch": 1.791991516270301,
      "grad_norm": 0.414217472076416,
      "learning_rate": 6.458293523454334e-06,
      "loss": 0.0155,
      "step": 1095000
    },
    {
      "epoch": 1.7920242467089544,
      "grad_norm": 0.09030348807573318,
      "learning_rate": 6.458227631240817e-06,
      "loss": 0.0234,
      "step": 1095020
    },
    {
      "epoch": 1.7920569771476078,
      "grad_norm": 0.9298678040504456,
      "learning_rate": 6.4581617390272995e-06,
      "loss": 0.0146,
      "step": 1095040
    },
    {
      "epoch": 1.792089707586261,
      "grad_norm": 0.2846337854862213,
      "learning_rate": 6.458095846813782e-06,
      "loss": 0.0182,
      "step": 1095060
    },
    {
      "epoch": 1.7921224380249146,
      "grad_norm": 0.5099053978919983,
      "learning_rate": 6.458029954600266e-06,
      "loss": 0.0226,
      "step": 1095080
    },
    {
      "epoch": 1.7921551684635677,
      "grad_norm": 0.4165044128894806,
      "learning_rate": 6.457964062386748e-06,
      "loss": 0.0154,
      "step": 1095100
    },
    {
      "epoch": 1.792187898902221,
      "grad_norm": 0.18049806356430054,
      "learning_rate": 6.457898170173231e-06,
      "loss": 0.0196,
      "step": 1095120
    },
    {
      "epoch": 1.7922206293408744,
      "grad_norm": 1.0705039501190186,
      "learning_rate": 6.457832277959713e-06,
      "loss": 0.0234,
      "step": 1095140
    },
    {
      "epoch": 1.7922533597795276,
      "grad_norm": 0.7480227947235107,
      "learning_rate": 6.457766385746197e-06,
      "loss": 0.0197,
      "step": 1095160
    },
    {
      "epoch": 1.7922860902181812,
      "grad_norm": 0.22305069863796234,
      "learning_rate": 6.4577004935326795e-06,
      "loss": 0.0127,
      "step": 1095180
    },
    {
      "epoch": 1.7923188206568343,
      "grad_norm": 0.36128583550453186,
      "learning_rate": 6.457634601319162e-06,
      "loss": 0.0203,
      "step": 1095200
    },
    {
      "epoch": 1.792351551095488,
      "grad_norm": 0.4822205603122711,
      "learning_rate": 6.457568709105646e-06,
      "loss": 0.0165,
      "step": 1095220
    },
    {
      "epoch": 1.792384281534141,
      "grad_norm": 1.8400371074676514,
      "learning_rate": 6.457502816892129e-06,
      "loss": 0.0226,
      "step": 1095240
    },
    {
      "epoch": 1.7924170119727945,
      "grad_norm": 0.27412569522857666,
      "learning_rate": 6.457436924678611e-06,
      "loss": 0.0235,
      "step": 1095260
    },
    {
      "epoch": 1.7924497424114478,
      "grad_norm": 0.4158352315425873,
      "learning_rate": 6.457371032465094e-06,
      "loss": 0.0202,
      "step": 1095280
    },
    {
      "epoch": 1.792482472850101,
      "grad_norm": 0.45539790391921997,
      "learning_rate": 6.457305140251578e-06,
      "loss": 0.0119,
      "step": 1095300
    },
    {
      "epoch": 1.7925152032887546,
      "grad_norm": 0.6447953581809998,
      "learning_rate": 6.4572392480380596e-06,
      "loss": 0.02,
      "step": 1095320
    },
    {
      "epoch": 1.7925479337274077,
      "grad_norm": 0.4962618052959442,
      "learning_rate": 6.457173355824543e-06,
      "loss": 0.0129,
      "step": 1095340
    },
    {
      "epoch": 1.792580664166061,
      "grad_norm": 0.6229163408279419,
      "learning_rate": 6.457107463611025e-06,
      "loss": 0.015,
      "step": 1095360
    },
    {
      "epoch": 1.7926133946047145,
      "grad_norm": 0.908665120601654,
      "learning_rate": 6.457041571397509e-06,
      "loss": 0.0235,
      "step": 1095380
    },
    {
      "epoch": 1.7926461250433678,
      "grad_norm": 0.9656710624694824,
      "learning_rate": 6.456975679183991e-06,
      "loss": 0.0153,
      "step": 1095400
    },
    {
      "epoch": 1.7926788554820212,
      "grad_norm": 1.996642827987671,
      "learning_rate": 6.456909786970474e-06,
      "loss": 0.0228,
      "step": 1095420
    },
    {
      "epoch": 1.7927115859206744,
      "grad_norm": 0.5199609994888306,
      "learning_rate": 6.456843894756957e-06,
      "loss": 0.0204,
      "step": 1095440
    },
    {
      "epoch": 1.792744316359328,
      "grad_norm": 0.6962102651596069,
      "learning_rate": 6.4567780025434404e-06,
      "loss": 0.0166,
      "step": 1095460
    },
    {
      "epoch": 1.792777046797981,
      "grad_norm": 0.20317420363426208,
      "learning_rate": 6.456712110329922e-06,
      "loss": 0.0215,
      "step": 1095480
    },
    {
      "epoch": 1.7928097772366345,
      "grad_norm": 0.2905804514884949,
      "learning_rate": 6.456646218116406e-06,
      "loss": 0.0175,
      "step": 1095500
    },
    {
      "epoch": 1.7928425076752879,
      "grad_norm": 0.1226668432354927,
      "learning_rate": 6.456580325902888e-06,
      "loss": 0.0232,
      "step": 1095520
    },
    {
      "epoch": 1.7928752381139412,
      "grad_norm": 0.38847827911376953,
      "learning_rate": 6.456514433689371e-06,
      "loss": 0.0228,
      "step": 1095540
    },
    {
      "epoch": 1.7929079685525946,
      "grad_norm": 1.2250728607177734,
      "learning_rate": 6.456448541475855e-06,
      "loss": 0.0178,
      "step": 1095560
    },
    {
      "epoch": 1.7929406989912477,
      "grad_norm": 0.34085792303085327,
      "learning_rate": 6.456382649262337e-06,
      "loss": 0.0235,
      "step": 1095580
    },
    {
      "epoch": 1.7929734294299013,
      "grad_norm": 4.7341742515563965,
      "learning_rate": 6.4563167570488205e-06,
      "loss": 0.0142,
      "step": 1095600
    },
    {
      "epoch": 1.7930061598685545,
      "grad_norm": 0.574135422706604,
      "learning_rate": 6.456250864835303e-06,
      "loss": 0.0216,
      "step": 1095620
    },
    {
      "epoch": 1.7930388903072079,
      "grad_norm": 0.2935539484024048,
      "learning_rate": 6.456184972621786e-06,
      "loss": 0.0145,
      "step": 1095640
    },
    {
      "epoch": 1.7930716207458612,
      "grad_norm": 0.5574111342430115,
      "learning_rate": 6.456119080408269e-06,
      "loss": 0.0152,
      "step": 1095660
    },
    {
      "epoch": 1.7931043511845146,
      "grad_norm": 0.4494907557964325,
      "learning_rate": 6.456053188194752e-06,
      "loss": 0.0173,
      "step": 1095680
    },
    {
      "epoch": 1.793137081623168,
      "grad_norm": 0.39221957325935364,
      "learning_rate": 6.455987295981234e-06,
      "loss": 0.0136,
      "step": 1095700
    },
    {
      "epoch": 1.7931698120618211,
      "grad_norm": 0.15196403861045837,
      "learning_rate": 6.455921403767718e-06,
      "loss": 0.0123,
      "step": 1095720
    },
    {
      "epoch": 1.7932025425004747,
      "grad_norm": 0.19248494505882263,
      "learning_rate": 6.4558555115542e-06,
      "loss": 0.0183,
      "step": 1095740
    },
    {
      "epoch": 1.7932352729391279,
      "grad_norm": 0.1650741696357727,
      "learning_rate": 6.455789619340683e-06,
      "loss": 0.0254,
      "step": 1095760
    },
    {
      "epoch": 1.7932680033777813,
      "grad_norm": 1.1506855487823486,
      "learning_rate": 6.455723727127165e-06,
      "loss": 0.0256,
      "step": 1095780
    },
    {
      "epoch": 1.7933007338164346,
      "grad_norm": 0.8753132820129395,
      "learning_rate": 6.455657834913649e-06,
      "loss": 0.0155,
      "step": 1095800
    },
    {
      "epoch": 1.793333464255088,
      "grad_norm": 1.018924355506897,
      "learning_rate": 6.4555919427001315e-06,
      "loss": 0.0131,
      "step": 1095820
    },
    {
      "epoch": 1.7933661946937414,
      "grad_norm": 0.3111647367477417,
      "learning_rate": 6.455526050486615e-06,
      "loss": 0.0173,
      "step": 1095840
    },
    {
      "epoch": 1.7933989251323945,
      "grad_norm": 0.7767637968063354,
      "learning_rate": 6.455460158273097e-06,
      "loss": 0.0203,
      "step": 1095860
    },
    {
      "epoch": 1.7934316555710481,
      "grad_norm": 0.34466564655303955,
      "learning_rate": 6.4553942660595805e-06,
      "loss": 0.0182,
      "step": 1095880
    },
    {
      "epoch": 1.7934643860097013,
      "grad_norm": 0.25937145948410034,
      "learning_rate": 6.455328373846062e-06,
      "loss": 0.0188,
      "step": 1095900
    },
    {
      "epoch": 1.7934971164483546,
      "grad_norm": 0.055485330522060394,
      "learning_rate": 6.455262481632546e-06,
      "loss": 0.0195,
      "step": 1095920
    },
    {
      "epoch": 1.793529846887008,
      "grad_norm": 0.7833171486854553,
      "learning_rate": 6.45519658941903e-06,
      "loss": 0.0141,
      "step": 1095940
    },
    {
      "epoch": 1.7935625773256612,
      "grad_norm": 0.6705991625785828,
      "learning_rate": 6.4551306972055115e-06,
      "loss": 0.0186,
      "step": 1095960
    },
    {
      "epoch": 1.7935953077643148,
      "grad_norm": 0.2674960792064667,
      "learning_rate": 6.455064804991995e-06,
      "loss": 0.0265,
      "step": 1095980
    },
    {
      "epoch": 1.793628038202968,
      "grad_norm": 0.20246194303035736,
      "learning_rate": 6.454998912778477e-06,
      "loss": 0.0166,
      "step": 1096000
    },
    {
      "epoch": 1.7936607686416215,
      "grad_norm": 0.28401288390159607,
      "learning_rate": 6.4549330205649605e-06,
      "loss": 0.0122,
      "step": 1096020
    },
    {
      "epoch": 1.7936934990802746,
      "grad_norm": 0.4591871500015259,
      "learning_rate": 6.454867128351443e-06,
      "loss": 0.0153,
      "step": 1096040
    },
    {
      "epoch": 1.793726229518928,
      "grad_norm": 0.4981469511985779,
      "learning_rate": 6.454801236137926e-06,
      "loss": 0.0153,
      "step": 1096060
    },
    {
      "epoch": 1.7937589599575814,
      "grad_norm": 2.070758581161499,
      "learning_rate": 6.454735343924409e-06,
      "loss": 0.0208,
      "step": 1096080
    },
    {
      "epoch": 1.7937916903962345,
      "grad_norm": 0.3471578061580658,
      "learning_rate": 6.454669451710892e-06,
      "loss": 0.0136,
      "step": 1096100
    },
    {
      "epoch": 1.7938244208348881,
      "grad_norm": 0.21091626584529877,
      "learning_rate": 6.454603559497374e-06,
      "loss": 0.0139,
      "step": 1096120
    },
    {
      "epoch": 1.7938571512735413,
      "grad_norm": 0.09559576958417892,
      "learning_rate": 6.454537667283858e-06,
      "loss": 0.018,
      "step": 1096140
    },
    {
      "epoch": 1.7938898817121947,
      "grad_norm": 0.9559938311576843,
      "learning_rate": 6.45447177507034e-06,
      "loss": 0.0209,
      "step": 1096160
    },
    {
      "epoch": 1.793922612150848,
      "grad_norm": 0.6470156908035278,
      "learning_rate": 6.454405882856823e-06,
      "loss": 0.0208,
      "step": 1096180
    },
    {
      "epoch": 1.7939553425895014,
      "grad_norm": 0.44505056738853455,
      "learning_rate": 6.454339990643306e-06,
      "loss": 0.0177,
      "step": 1096200
    },
    {
      "epoch": 1.7939880730281548,
      "grad_norm": 0.5671343803405762,
      "learning_rate": 6.454274098429789e-06,
      "loss": 0.0231,
      "step": 1096220
    },
    {
      "epoch": 1.794020803466808,
      "grad_norm": 0.36045485734939575,
      "learning_rate": 6.4542082062162715e-06,
      "loss": 0.0193,
      "step": 1096240
    },
    {
      "epoch": 1.7940535339054615,
      "grad_norm": 0.5098423957824707,
      "learning_rate": 6.454142314002755e-06,
      "loss": 0.0221,
      "step": 1096260
    },
    {
      "epoch": 1.7940862643441147,
      "grad_norm": 0.525494396686554,
      "learning_rate": 6.454076421789238e-06,
      "loss": 0.0203,
      "step": 1096280
    },
    {
      "epoch": 1.794118994782768,
      "grad_norm": 0.2734754979610443,
      "learning_rate": 6.454010529575721e-06,
      "loss": 0.0157,
      "step": 1096300
    },
    {
      "epoch": 1.7941517252214214,
      "grad_norm": 0.5390104055404663,
      "learning_rate": 6.453944637362204e-06,
      "loss": 0.0188,
      "step": 1096320
    },
    {
      "epoch": 1.7941844556600748,
      "grad_norm": 2.420197010040283,
      "learning_rate": 6.453878745148686e-06,
      "loss": 0.0185,
      "step": 1096340
    },
    {
      "epoch": 1.7942171860987282,
      "grad_norm": 0.34779006242752075,
      "learning_rate": 6.45381285293517e-06,
      "loss": 0.0168,
      "step": 1096360
    },
    {
      "epoch": 1.7942499165373813,
      "grad_norm": 1.0615466833114624,
      "learning_rate": 6.4537469607216516e-06,
      "loss": 0.0305,
      "step": 1096380
    },
    {
      "epoch": 1.794282646976035,
      "grad_norm": 0.5582106113433838,
      "learning_rate": 6.453681068508135e-06,
      "loss": 0.0115,
      "step": 1096400
    },
    {
      "epoch": 1.794315377414688,
      "grad_norm": 0.38095399737358093,
      "learning_rate": 6.453615176294618e-06,
      "loss": 0.0157,
      "step": 1096420
    },
    {
      "epoch": 1.7943481078533414,
      "grad_norm": 1.3234986066818237,
      "learning_rate": 6.453549284081101e-06,
      "loss": 0.0169,
      "step": 1096440
    },
    {
      "epoch": 1.7943808382919948,
      "grad_norm": 1.1603314876556396,
      "learning_rate": 6.453483391867583e-06,
      "loss": 0.0213,
      "step": 1096460
    },
    {
      "epoch": 1.7944135687306482,
      "grad_norm": 0.29533565044403076,
      "learning_rate": 6.453417499654067e-06,
      "loss": 0.016,
      "step": 1096480
    },
    {
      "epoch": 1.7944462991693015,
      "grad_norm": 0.3067050576210022,
      "learning_rate": 6.453351607440549e-06,
      "loss": 0.0227,
      "step": 1096500
    },
    {
      "epoch": 1.7944790296079547,
      "grad_norm": 0.47309765219688416,
      "learning_rate": 6.4532857152270324e-06,
      "loss": 0.0203,
      "step": 1096520
    },
    {
      "epoch": 1.7945117600466083,
      "grad_norm": 0.468963086605072,
      "learning_rate": 6.453219823013514e-06,
      "loss": 0.0187,
      "step": 1096540
    },
    {
      "epoch": 1.7945444904852614,
      "grad_norm": 0.5636706352233887,
      "learning_rate": 6.453153930799998e-06,
      "loss": 0.0199,
      "step": 1096560
    },
    {
      "epoch": 1.7945772209239148,
      "grad_norm": 0.22991856932640076,
      "learning_rate": 6.453088038586481e-06,
      "loss": 0.0127,
      "step": 1096580
    },
    {
      "epoch": 1.7946099513625682,
      "grad_norm": 1.4823757410049438,
      "learning_rate": 6.453022146372963e-06,
      "loss": 0.021,
      "step": 1096600
    },
    {
      "epoch": 1.7946426818012216,
      "grad_norm": 0.1512167900800705,
      "learning_rate": 6.452956254159447e-06,
      "loss": 0.0229,
      "step": 1096620
    },
    {
      "epoch": 1.794675412239875,
      "grad_norm": 0.4893207252025604,
      "learning_rate": 6.45289036194593e-06,
      "loss": 0.023,
      "step": 1096640
    },
    {
      "epoch": 1.794708142678528,
      "grad_norm": 1.0169050693511963,
      "learning_rate": 6.4528244697324125e-06,
      "loss": 0.0177,
      "step": 1096660
    },
    {
      "epoch": 1.7947408731171817,
      "grad_norm": 1.532706618309021,
      "learning_rate": 6.452758577518895e-06,
      "loss": 0.0216,
      "step": 1096680
    },
    {
      "epoch": 1.7947736035558348,
      "grad_norm": 0.5531772971153259,
      "learning_rate": 6.452692685305379e-06,
      "loss": 0.022,
      "step": 1096700
    },
    {
      "epoch": 1.7948063339944882,
      "grad_norm": 0.357602596282959,
      "learning_rate": 6.452626793091861e-06,
      "loss": 0.0149,
      "step": 1096720
    },
    {
      "epoch": 1.7948390644331416,
      "grad_norm": 0.26322945952415466,
      "learning_rate": 6.452560900878344e-06,
      "loss": 0.0135,
      "step": 1096740
    },
    {
      "epoch": 1.7948717948717947,
      "grad_norm": 0.2652965486049652,
      "learning_rate": 6.452495008664826e-06,
      "loss": 0.0174,
      "step": 1096760
    },
    {
      "epoch": 1.7949045253104483,
      "grad_norm": 0.6044501662254333,
      "learning_rate": 6.45242911645131e-06,
      "loss": 0.025,
      "step": 1096780
    },
    {
      "epoch": 1.7949372557491015,
      "grad_norm": 0.25429943203926086,
      "learning_rate": 6.452363224237792e-06,
      "loss": 0.0199,
      "step": 1096800
    },
    {
      "epoch": 1.794969986187755,
      "grad_norm": 0.8397541642189026,
      "learning_rate": 6.452297332024275e-06,
      "loss": 0.0225,
      "step": 1096820
    },
    {
      "epoch": 1.7950027166264082,
      "grad_norm": 0.21470612287521362,
      "learning_rate": 6.452231439810758e-06,
      "loss": 0.016,
      "step": 1096840
    },
    {
      "epoch": 1.7950354470650616,
      "grad_norm": 0.7780201435089111,
      "learning_rate": 6.452165547597241e-06,
      "loss": 0.0203,
      "step": 1096860
    },
    {
      "epoch": 1.795068177503715,
      "grad_norm": 0.29969602823257446,
      "learning_rate": 6.4520996553837234e-06,
      "loss": 0.0225,
      "step": 1096880
    },
    {
      "epoch": 1.795100907942368,
      "grad_norm": 0.9035898447036743,
      "learning_rate": 6.452033763170207e-06,
      "loss": 0.0214,
      "step": 1096900
    },
    {
      "epoch": 1.7951336383810217,
      "grad_norm": 0.49104437232017517,
      "learning_rate": 6.451967870956689e-06,
      "loss": 0.0195,
      "step": 1096920
    },
    {
      "epoch": 1.7951663688196748,
      "grad_norm": 0.6101372838020325,
      "learning_rate": 6.4519019787431725e-06,
      "loss": 0.0235,
      "step": 1096940
    },
    {
      "epoch": 1.7951990992583282,
      "grad_norm": 0.625881552696228,
      "learning_rate": 6.451836086529656e-06,
      "loss": 0.0173,
      "step": 1096960
    },
    {
      "epoch": 1.7952318296969816,
      "grad_norm": 0.4632434844970703,
      "learning_rate": 6.451770194316138e-06,
      "loss": 0.0248,
      "step": 1096980
    },
    {
      "epoch": 1.795264560135635,
      "grad_norm": 0.8699290752410889,
      "learning_rate": 6.4517043021026216e-06,
      "loss": 0.0145,
      "step": 1097000
    },
    {
      "epoch": 1.7952972905742883,
      "grad_norm": 0.401753693819046,
      "learning_rate": 6.4516384098891035e-06,
      "loss": 0.0178,
      "step": 1097020
    },
    {
      "epoch": 1.7953300210129415,
      "grad_norm": 0.7742786407470703,
      "learning_rate": 6.451572517675587e-06,
      "loss": 0.02,
      "step": 1097040
    },
    {
      "epoch": 1.795362751451595,
      "grad_norm": 0.315518319606781,
      "learning_rate": 6.45150662546207e-06,
      "loss": 0.0121,
      "step": 1097060
    },
    {
      "epoch": 1.7953954818902482,
      "grad_norm": 1.2579283714294434,
      "learning_rate": 6.4514407332485525e-06,
      "loss": 0.0207,
      "step": 1097080
    },
    {
      "epoch": 1.7954282123289016,
      "grad_norm": 0.8194981217384338,
      "learning_rate": 6.451374841035035e-06,
      "loss": 0.0167,
      "step": 1097100
    },
    {
      "epoch": 1.795460942767555,
      "grad_norm": 0.6854661703109741,
      "learning_rate": 6.451308948821519e-06,
      "loss": 0.0263,
      "step": 1097120
    },
    {
      "epoch": 1.7954936732062083,
      "grad_norm": 0.1746535748243332,
      "learning_rate": 6.451243056608001e-06,
      "loss": 0.0177,
      "step": 1097140
    },
    {
      "epoch": 1.7955264036448617,
      "grad_norm": 1.775499701499939,
      "learning_rate": 6.451177164394484e-06,
      "loss": 0.019,
      "step": 1097160
    },
    {
      "epoch": 1.7955591340835149,
      "grad_norm": 0.9230082035064697,
      "learning_rate": 6.451111272180966e-06,
      "loss": 0.0227,
      "step": 1097180
    },
    {
      "epoch": 1.7955918645221685,
      "grad_norm": 1.0736514329910278,
      "learning_rate": 6.45104537996745e-06,
      "loss": 0.0187,
      "step": 1097200
    },
    {
      "epoch": 1.7956245949608216,
      "grad_norm": 0.1459333449602127,
      "learning_rate": 6.4509794877539326e-06,
      "loss": 0.0233,
      "step": 1097220
    },
    {
      "epoch": 1.795657325399475,
      "grad_norm": 0.038886651396751404,
      "learning_rate": 6.450913595540415e-06,
      "loss": 0.0192,
      "step": 1097240
    },
    {
      "epoch": 1.7956900558381284,
      "grad_norm": 0.4522522985935211,
      "learning_rate": 6.450847703326898e-06,
      "loss": 0.0159,
      "step": 1097260
    },
    {
      "epoch": 1.7957227862767817,
      "grad_norm": 0.11288526654243469,
      "learning_rate": 6.450781811113382e-06,
      "loss": 0.0209,
      "step": 1097280
    },
    {
      "epoch": 1.795755516715435,
      "grad_norm": 0.5333532691001892,
      "learning_rate": 6.4507159188998635e-06,
      "loss": 0.0212,
      "step": 1097300
    },
    {
      "epoch": 1.7957882471540882,
      "grad_norm": 0.5378667712211609,
      "learning_rate": 6.450650026686347e-06,
      "loss": 0.0139,
      "step": 1097320
    },
    {
      "epoch": 1.7958209775927418,
      "grad_norm": 1.0156917572021484,
      "learning_rate": 6.450584134472831e-06,
      "loss": 0.0221,
      "step": 1097340
    },
    {
      "epoch": 1.795853708031395,
      "grad_norm": 0.8473551273345947,
      "learning_rate": 6.450518242259313e-06,
      "loss": 0.0108,
      "step": 1097360
    },
    {
      "epoch": 1.7958864384700484,
      "grad_norm": 0.9539719820022583,
      "learning_rate": 6.450452350045796e-06,
      "loss": 0.0212,
      "step": 1097380
    },
    {
      "epoch": 1.7959191689087017,
      "grad_norm": 0.13612103462219238,
      "learning_rate": 6.450386457832278e-06,
      "loss": 0.0153,
      "step": 1097400
    },
    {
      "epoch": 1.795951899347355,
      "grad_norm": 0.6708200573921204,
      "learning_rate": 6.450320565618762e-06,
      "loss": 0.0158,
      "step": 1097420
    },
    {
      "epoch": 1.7959846297860085,
      "grad_norm": 0.23413273692131042,
      "learning_rate": 6.450254673405244e-06,
      "loss": 0.0189,
      "step": 1097440
    },
    {
      "epoch": 1.7960173602246616,
      "grad_norm": 0.5048810243606567,
      "learning_rate": 6.450188781191727e-06,
      "loss": 0.0181,
      "step": 1097460
    },
    {
      "epoch": 1.7960500906633152,
      "grad_norm": 0.7159578204154968,
      "learning_rate": 6.45012288897821e-06,
      "loss": 0.0197,
      "step": 1097480
    },
    {
      "epoch": 1.7960828211019684,
      "grad_norm": 0.14924483001232147,
      "learning_rate": 6.4500569967646935e-06,
      "loss": 0.0182,
      "step": 1097500
    },
    {
      "epoch": 1.7961155515406217,
      "grad_norm": 1.0156515836715698,
      "learning_rate": 6.449991104551175e-06,
      "loss": 0.0205,
      "step": 1097520
    },
    {
      "epoch": 1.7961482819792751,
      "grad_norm": 0.880463182926178,
      "learning_rate": 6.449925212337659e-06,
      "loss": 0.0154,
      "step": 1097540
    },
    {
      "epoch": 1.7961810124179283,
      "grad_norm": 0.6262348890304565,
      "learning_rate": 6.449859320124141e-06,
      "loss": 0.0213,
      "step": 1097560
    },
    {
      "epoch": 1.7962137428565819,
      "grad_norm": 0.6025868058204651,
      "learning_rate": 6.449793427910624e-06,
      "loss": 0.0191,
      "step": 1097580
    },
    {
      "epoch": 1.796246473295235,
      "grad_norm": 0.31612733006477356,
      "learning_rate": 6.449727535697107e-06,
      "loss": 0.0177,
      "step": 1097600
    },
    {
      "epoch": 1.7962792037338886,
      "grad_norm": 0.4781477749347687,
      "learning_rate": 6.44966164348359e-06,
      "loss": 0.0171,
      "step": 1097620
    },
    {
      "epoch": 1.7963119341725418,
      "grad_norm": 0.421141654253006,
      "learning_rate": 6.449595751270073e-06,
      "loss": 0.023,
      "step": 1097640
    },
    {
      "epoch": 1.7963446646111951,
      "grad_norm": 0.4136960208415985,
      "learning_rate": 6.449529859056556e-06,
      "loss": 0.0233,
      "step": 1097660
    },
    {
      "epoch": 1.7963773950498485,
      "grad_norm": 0.9401471018791199,
      "learning_rate": 6.449463966843039e-06,
      "loss": 0.0119,
      "step": 1097680
    },
    {
      "epoch": 1.7964101254885017,
      "grad_norm": 0.19782356917858124,
      "learning_rate": 6.449398074629522e-06,
      "loss": 0.0236,
      "step": 1097700
    },
    {
      "epoch": 1.7964428559271552,
      "grad_norm": 0.32897570729255676,
      "learning_rate": 6.449332182416005e-06,
      "loss": 0.0156,
      "step": 1097720
    },
    {
      "epoch": 1.7964755863658084,
      "grad_norm": 0.9579097628593445,
      "learning_rate": 6.449266290202487e-06,
      "loss": 0.0141,
      "step": 1097740
    },
    {
      "epoch": 1.7965083168044618,
      "grad_norm": 0.21363465487957,
      "learning_rate": 6.449200397988971e-06,
      "loss": 0.022,
      "step": 1097760
    },
    {
      "epoch": 1.7965410472431151,
      "grad_norm": 0.37478986382484436,
      "learning_rate": 6.449134505775453e-06,
      "loss": 0.0156,
      "step": 1097780
    },
    {
      "epoch": 1.7965737776817685,
      "grad_norm": 0.8032864928245544,
      "learning_rate": 6.449068613561936e-06,
      "loss": 0.0204,
      "step": 1097800
    },
    {
      "epoch": 1.7966065081204219,
      "grad_norm": 0.23782065510749817,
      "learning_rate": 6.449002721348418e-06,
      "loss": 0.0191,
      "step": 1097820
    },
    {
      "epoch": 1.796639238559075,
      "grad_norm": 0.7607043981552124,
      "learning_rate": 6.448936829134902e-06,
      "loss": 0.0178,
      "step": 1097840
    },
    {
      "epoch": 1.7966719689977286,
      "grad_norm": 0.601680338382721,
      "learning_rate": 6.4488709369213845e-06,
      "loss": 0.0171,
      "step": 1097860
    },
    {
      "epoch": 1.7967046994363818,
      "grad_norm": 1.2953118085861206,
      "learning_rate": 6.448805044707867e-06,
      "loss": 0.017,
      "step": 1097880
    },
    {
      "epoch": 1.7967374298750352,
      "grad_norm": 0.6631301641464233,
      "learning_rate": 6.44873915249435e-06,
      "loss": 0.0203,
      "step": 1097900
    },
    {
      "epoch": 1.7967701603136885,
      "grad_norm": 0.44969794154167175,
      "learning_rate": 6.4486732602808335e-06,
      "loss": 0.0181,
      "step": 1097920
    },
    {
      "epoch": 1.796802890752342,
      "grad_norm": 0.5520439147949219,
      "learning_rate": 6.448607368067315e-06,
      "loss": 0.0217,
      "step": 1097940
    },
    {
      "epoch": 1.7968356211909953,
      "grad_norm": 0.24568484723567963,
      "learning_rate": 6.448541475853799e-06,
      "loss": 0.0195,
      "step": 1097960
    },
    {
      "epoch": 1.7968683516296484,
      "grad_norm": 0.1339159905910492,
      "learning_rate": 6.448475583640281e-06,
      "loss": 0.0181,
      "step": 1097980
    },
    {
      "epoch": 1.796901082068302,
      "grad_norm": 0.06384684890508652,
      "learning_rate": 6.4484096914267645e-06,
      "loss": 0.0203,
      "step": 1098000
    },
    {
      "epoch": 1.7969338125069552,
      "grad_norm": 2.4203379154205322,
      "learning_rate": 6.448343799213248e-06,
      "loss": 0.0218,
      "step": 1098020
    },
    {
      "epoch": 1.7969665429456085,
      "grad_norm": 0.1103992611169815,
      "learning_rate": 6.44827790699973e-06,
      "loss": 0.0191,
      "step": 1098040
    },
    {
      "epoch": 1.796999273384262,
      "grad_norm": 1.1209944486618042,
      "learning_rate": 6.4482120147862136e-06,
      "loss": 0.0253,
      "step": 1098060
    },
    {
      "epoch": 1.7970320038229153,
      "grad_norm": 1.0341694355010986,
      "learning_rate": 6.448146122572696e-06,
      "loss": 0.0221,
      "step": 1098080
    },
    {
      "epoch": 1.7970647342615687,
      "grad_norm": 0.9656119346618652,
      "learning_rate": 6.448080230359179e-06,
      "loss": 0.0255,
      "step": 1098100
    },
    {
      "epoch": 1.7970974647002218,
      "grad_norm": 0.5925784111022949,
      "learning_rate": 6.448014338145662e-06,
      "loss": 0.0118,
      "step": 1098120
    },
    {
      "epoch": 1.7971301951388754,
      "grad_norm": 0.2580689787864685,
      "learning_rate": 6.447948445932145e-06,
      "loss": 0.0131,
      "step": 1098140
    },
    {
      "epoch": 1.7971629255775285,
      "grad_norm": 0.4649942219257355,
      "learning_rate": 6.447882553718627e-06,
      "loss": 0.0163,
      "step": 1098160
    },
    {
      "epoch": 1.797195656016182,
      "grad_norm": 0.8053608536720276,
      "learning_rate": 6.447816661505111e-06,
      "loss": 0.0132,
      "step": 1098180
    },
    {
      "epoch": 1.7972283864548353,
      "grad_norm": 0.3526557683944702,
      "learning_rate": 6.447750769291593e-06,
      "loss": 0.0265,
      "step": 1098200
    },
    {
      "epoch": 1.7972611168934884,
      "grad_norm": 0.3274306356906891,
      "learning_rate": 6.447684877078076e-06,
      "loss": 0.0188,
      "step": 1098220
    },
    {
      "epoch": 1.797293847332142,
      "grad_norm": 0.6144737005233765,
      "learning_rate": 6.447618984864559e-06,
      "loss": 0.0252,
      "step": 1098240
    },
    {
      "epoch": 1.7973265777707952,
      "grad_norm": 0.3764154314994812,
      "learning_rate": 6.447553092651042e-06,
      "loss": 0.0223,
      "step": 1098260
    },
    {
      "epoch": 1.7973593082094488,
      "grad_norm": 1.2063846588134766,
      "learning_rate": 6.4474872004375245e-06,
      "loss": 0.0181,
      "step": 1098280
    },
    {
      "epoch": 1.797392038648102,
      "grad_norm": 0.776576817035675,
      "learning_rate": 6.447421308224008e-06,
      "loss": 0.019,
      "step": 1098300
    },
    {
      "epoch": 1.7974247690867553,
      "grad_norm": 0.1473475843667984,
      "learning_rate": 6.44735541601049e-06,
      "loss": 0.0177,
      "step": 1098320
    },
    {
      "epoch": 1.7974574995254087,
      "grad_norm": 1.5787627696990967,
      "learning_rate": 6.447289523796974e-06,
      "loss": 0.0227,
      "step": 1098340
    },
    {
      "epoch": 1.7974902299640618,
      "grad_norm": 0.34772977232933044,
      "learning_rate": 6.4472236315834555e-06,
      "loss": 0.0228,
      "step": 1098360
    },
    {
      "epoch": 1.7975229604027154,
      "grad_norm": 0.3521752953529358,
      "learning_rate": 6.447157739369939e-06,
      "loss": 0.0239,
      "step": 1098380
    },
    {
      "epoch": 1.7975556908413686,
      "grad_norm": 0.8873413801193237,
      "learning_rate": 6.447091847156423e-06,
      "loss": 0.0241,
      "step": 1098400
    },
    {
      "epoch": 1.797588421280022,
      "grad_norm": 0.32602739334106445,
      "learning_rate": 6.4470259549429046e-06,
      "loss": 0.0194,
      "step": 1098420
    },
    {
      "epoch": 1.7976211517186753,
      "grad_norm": 0.8662844300270081,
      "learning_rate": 6.446960062729388e-06,
      "loss": 0.0168,
      "step": 1098440
    },
    {
      "epoch": 1.7976538821573287,
      "grad_norm": 0.7058100700378418,
      "learning_rate": 6.446894170515871e-06,
      "loss": 0.0181,
      "step": 1098460
    },
    {
      "epoch": 1.797686612595982,
      "grad_norm": 0.40123486518859863,
      "learning_rate": 6.446828278302354e-06,
      "loss": 0.0206,
      "step": 1098480
    },
    {
      "epoch": 1.7977193430346352,
      "grad_norm": 0.8147908449172974,
      "learning_rate": 6.446762386088836e-06,
      "loss": 0.0177,
      "step": 1098500
    },
    {
      "epoch": 1.7977520734732888,
      "grad_norm": 0.3888653516769409,
      "learning_rate": 6.44669649387532e-06,
      "loss": 0.0175,
      "step": 1098520
    },
    {
      "epoch": 1.797784803911942,
      "grad_norm": 0.6704077124595642,
      "learning_rate": 6.446630601661802e-06,
      "loss": 0.0193,
      "step": 1098540
    },
    {
      "epoch": 1.7978175343505953,
      "grad_norm": 0.6856595277786255,
      "learning_rate": 6.4465647094482854e-06,
      "loss": 0.0243,
      "step": 1098560
    },
    {
      "epoch": 1.7978502647892487,
      "grad_norm": 0.6612074971199036,
      "learning_rate": 6.446498817234767e-06,
      "loss": 0.0236,
      "step": 1098580
    },
    {
      "epoch": 1.797882995227902,
      "grad_norm": 0.09657387435436249,
      "learning_rate": 6.446432925021251e-06,
      "loss": 0.016,
      "step": 1098600
    },
    {
      "epoch": 1.7979157256665554,
      "grad_norm": 0.7927025556564331,
      "learning_rate": 6.446367032807734e-06,
      "loss": 0.0194,
      "step": 1098620
    },
    {
      "epoch": 1.7979484561052086,
      "grad_norm": 0.8368659615516663,
      "learning_rate": 6.446301140594216e-06,
      "loss": 0.0221,
      "step": 1098640
    },
    {
      "epoch": 1.7979811865438622,
      "grad_norm": 1.6635262966156006,
      "learning_rate": 6.446235248380699e-06,
      "loss": 0.0207,
      "step": 1098660
    },
    {
      "epoch": 1.7980139169825153,
      "grad_norm": 0.8499613404273987,
      "learning_rate": 6.446169356167183e-06,
      "loss": 0.0144,
      "step": 1098680
    },
    {
      "epoch": 1.7980466474211687,
      "grad_norm": 0.37306877970695496,
      "learning_rate": 6.446103463953665e-06,
      "loss": 0.0262,
      "step": 1098700
    },
    {
      "epoch": 1.798079377859822,
      "grad_norm": 0.18293878436088562,
      "learning_rate": 6.446037571740148e-06,
      "loss": 0.0149,
      "step": 1098720
    },
    {
      "epoch": 1.7981121082984755,
      "grad_norm": 1.7474260330200195,
      "learning_rate": 6.445971679526632e-06,
      "loss": 0.0196,
      "step": 1098740
    },
    {
      "epoch": 1.7981448387371288,
      "grad_norm": 0.6365993022918701,
      "learning_rate": 6.445905787313114e-06,
      "loss": 0.0198,
      "step": 1098760
    },
    {
      "epoch": 1.798177569175782,
      "grad_norm": 0.23899786174297333,
      "learning_rate": 6.445839895099597e-06,
      "loss": 0.0172,
      "step": 1098780
    },
    {
      "epoch": 1.7982102996144356,
      "grad_norm": 0.5661676526069641,
      "learning_rate": 6.445774002886079e-06,
      "loss": 0.0221,
      "step": 1098800
    },
    {
      "epoch": 1.7982430300530887,
      "grad_norm": 0.28598690032958984,
      "learning_rate": 6.445708110672563e-06,
      "loss": 0.0171,
      "step": 1098820
    },
    {
      "epoch": 1.798275760491742,
      "grad_norm": 0.6941928267478943,
      "learning_rate": 6.445642218459045e-06,
      "loss": 0.0134,
      "step": 1098840
    },
    {
      "epoch": 1.7983084909303955,
      "grad_norm": 0.2853056788444519,
      "learning_rate": 6.445576326245528e-06,
      "loss": 0.018,
      "step": 1098860
    },
    {
      "epoch": 1.7983412213690488,
      "grad_norm": 0.1296868920326233,
      "learning_rate": 6.445510434032011e-06,
      "loss": 0.0199,
      "step": 1098880
    },
    {
      "epoch": 1.7983739518077022,
      "grad_norm": 0.8370933532714844,
      "learning_rate": 6.445444541818494e-06,
      "loss": 0.0239,
      "step": 1098900
    },
    {
      "epoch": 1.7984066822463554,
      "grad_norm": 0.3397720158100128,
      "learning_rate": 6.4453786496049764e-06,
      "loss": 0.0174,
      "step": 1098920
    },
    {
      "epoch": 1.798439412685009,
      "grad_norm": 0.23037764430046082,
      "learning_rate": 6.44531275739146e-06,
      "loss": 0.0097,
      "step": 1098940
    },
    {
      "epoch": 1.798472143123662,
      "grad_norm": 0.15099762380123138,
      "learning_rate": 6.445246865177942e-06,
      "loss": 0.0125,
      "step": 1098960
    },
    {
      "epoch": 1.7985048735623155,
      "grad_norm": 0.3165169060230255,
      "learning_rate": 6.4451809729644255e-06,
      "loss": 0.0151,
      "step": 1098980
    },
    {
      "epoch": 1.7985376040009688,
      "grad_norm": 2.829402446746826,
      "learning_rate": 6.445115080750907e-06,
      "loss": 0.0297,
      "step": 1099000
    },
    {
      "epoch": 1.798570334439622,
      "grad_norm": 0.47659772634506226,
      "learning_rate": 6.445049188537391e-06,
      "loss": 0.0253,
      "step": 1099020
    },
    {
      "epoch": 1.7986030648782756,
      "grad_norm": 0.09563726931810379,
      "learning_rate": 6.444983296323874e-06,
      "loss": 0.0215,
      "step": 1099040
    },
    {
      "epoch": 1.7986357953169287,
      "grad_norm": 0.5149261355400085,
      "learning_rate": 6.4449174041103565e-06,
      "loss": 0.0139,
      "step": 1099060
    },
    {
      "epoch": 1.7986685257555823,
      "grad_norm": 0.47135114669799805,
      "learning_rate": 6.44485151189684e-06,
      "loss": 0.0205,
      "step": 1099080
    },
    {
      "epoch": 1.7987012561942355,
      "grad_norm": 0.17575609683990479,
      "learning_rate": 6.444785619683323e-06,
      "loss": 0.0122,
      "step": 1099100
    },
    {
      "epoch": 1.7987339866328889,
      "grad_norm": 0.603995680809021,
      "learning_rate": 6.4447197274698055e-06,
      "loss": 0.0192,
      "step": 1099120
    },
    {
      "epoch": 1.7987667170715422,
      "grad_norm": 0.6469171643257141,
      "learning_rate": 6.444653835256288e-06,
      "loss": 0.0151,
      "step": 1099140
    },
    {
      "epoch": 1.7987994475101954,
      "grad_norm": 0.5505967736244202,
      "learning_rate": 6.444587943042772e-06,
      "loss": 0.017,
      "step": 1099160
    },
    {
      "epoch": 1.798832177948849,
      "grad_norm": 0.3819245398044586,
      "learning_rate": 6.444522050829254e-06,
      "loss": 0.0273,
      "step": 1099180
    },
    {
      "epoch": 1.7988649083875021,
      "grad_norm": 0.22031669318675995,
      "learning_rate": 6.444456158615737e-06,
      "loss": 0.0165,
      "step": 1099200
    },
    {
      "epoch": 1.7988976388261555,
      "grad_norm": 0.9161601662635803,
      "learning_rate": 6.444390266402219e-06,
      "loss": 0.0163,
      "step": 1099220
    },
    {
      "epoch": 1.7989303692648089,
      "grad_norm": 0.09357526153326035,
      "learning_rate": 6.444324374188703e-06,
      "loss": 0.0162,
      "step": 1099240
    },
    {
      "epoch": 1.7989630997034622,
      "grad_norm": 0.4539022445678711,
      "learning_rate": 6.4442584819751856e-06,
      "loss": 0.0164,
      "step": 1099260
    },
    {
      "epoch": 1.7989958301421156,
      "grad_norm": 0.10336627066135406,
      "learning_rate": 6.444192589761668e-06,
      "loss": 0.0217,
      "step": 1099280
    },
    {
      "epoch": 1.7990285605807688,
      "grad_norm": 0.1417759209871292,
      "learning_rate": 6.444126697548151e-06,
      "loss": 0.0155,
      "step": 1099300
    },
    {
      "epoch": 1.7990612910194224,
      "grad_norm": 0.2751135528087616,
      "learning_rate": 6.444060805334635e-06,
      "loss": 0.0187,
      "step": 1099320
    },
    {
      "epoch": 1.7990940214580755,
      "grad_norm": 0.9583763480186462,
      "learning_rate": 6.4439949131211165e-06,
      "loss": 0.0188,
      "step": 1099340
    },
    {
      "epoch": 1.7991267518967289,
      "grad_norm": 0.3624230623245239,
      "learning_rate": 6.4439290209076e-06,
      "loss": 0.0111,
      "step": 1099360
    },
    {
      "epoch": 1.7991594823353823,
      "grad_norm": 0.14287015795707703,
      "learning_rate": 6.443863128694082e-06,
      "loss": 0.0162,
      "step": 1099380
    },
    {
      "epoch": 1.7991922127740356,
      "grad_norm": 0.16460265219211578,
      "learning_rate": 6.443797236480566e-06,
      "loss": 0.0231,
      "step": 1099400
    },
    {
      "epoch": 1.799224943212689,
      "grad_norm": 0.7662637829780579,
      "learning_rate": 6.443731344267048e-06,
      "loss": 0.0221,
      "step": 1099420
    },
    {
      "epoch": 1.7992576736513421,
      "grad_norm": 2.066688060760498,
      "learning_rate": 6.443665452053531e-06,
      "loss": 0.0251,
      "step": 1099440
    },
    {
      "epoch": 1.7992904040899957,
      "grad_norm": 0.184168741106987,
      "learning_rate": 6.443599559840015e-06,
      "loss": 0.0201,
      "step": 1099460
    },
    {
      "epoch": 1.799323134528649,
      "grad_norm": 1.3825911283493042,
      "learning_rate": 6.443533667626497e-06,
      "loss": 0.0231,
      "step": 1099480
    },
    {
      "epoch": 1.7993558649673023,
      "grad_norm": 4.842366695404053,
      "learning_rate": 6.44346777541298e-06,
      "loss": 0.0223,
      "step": 1099500
    },
    {
      "epoch": 1.7993885954059556,
      "grad_norm": 0.15392762422561646,
      "learning_rate": 6.443401883199463e-06,
      "loss": 0.0144,
      "step": 1099520
    },
    {
      "epoch": 1.799421325844609,
      "grad_norm": 1.0624018907546997,
      "learning_rate": 6.4433359909859465e-06,
      "loss": 0.0271,
      "step": 1099540
    },
    {
      "epoch": 1.7994540562832624,
      "grad_norm": 0.6170057058334351,
      "learning_rate": 6.443270098772428e-06,
      "loss": 0.0144,
      "step": 1099560
    },
    {
      "epoch": 1.7994867867219155,
      "grad_norm": 0.19178149104118347,
      "learning_rate": 6.443204206558912e-06,
      "loss": 0.0153,
      "step": 1099580
    },
    {
      "epoch": 1.7995195171605691,
      "grad_norm": 0.2609681785106659,
      "learning_rate": 6.443138314345394e-06,
      "loss": 0.0139,
      "step": 1099600
    },
    {
      "epoch": 1.7995522475992223,
      "grad_norm": 1.1911295652389526,
      "learning_rate": 6.443072422131877e-06,
      "loss": 0.0219,
      "step": 1099620
    },
    {
      "epoch": 1.7995849780378756,
      "grad_norm": 0.774768054485321,
      "learning_rate": 6.443006529918359e-06,
      "loss": 0.0234,
      "step": 1099640
    },
    {
      "epoch": 1.799617708476529,
      "grad_norm": 0.30179452896118164,
      "learning_rate": 6.442940637704843e-06,
      "loss": 0.0185,
      "step": 1099660
    },
    {
      "epoch": 1.7996504389151824,
      "grad_norm": 0.20437097549438477,
      "learning_rate": 6.442874745491326e-06,
      "loss": 0.0131,
      "step": 1099680
    },
    {
      "epoch": 1.7996831693538358,
      "grad_norm": 0.3511278033256531,
      "learning_rate": 6.442808853277809e-06,
      "loss": 0.0232,
      "step": 1099700
    },
    {
      "epoch": 1.799715899792489,
      "grad_norm": 0.6764723062515259,
      "learning_rate": 6.442742961064291e-06,
      "loss": 0.0146,
      "step": 1099720
    },
    {
      "epoch": 1.7997486302311425,
      "grad_norm": 3.5341742038726807,
      "learning_rate": 6.442677068850775e-06,
      "loss": 0.0171,
      "step": 1099740
    },
    {
      "epoch": 1.7997813606697957,
      "grad_norm": 1.0527737140655518,
      "learning_rate": 6.442611176637257e-06,
      "loss": 0.0131,
      "step": 1099760
    },
    {
      "epoch": 1.799814091108449,
      "grad_norm": 0.516698956489563,
      "learning_rate": 6.44254528442374e-06,
      "loss": 0.0213,
      "step": 1099780
    },
    {
      "epoch": 1.7998468215471024,
      "grad_norm": 0.3459785580635071,
      "learning_rate": 6.442479392210224e-06,
      "loss": 0.0156,
      "step": 1099800
    },
    {
      "epoch": 1.7998795519857556,
      "grad_norm": 0.43684881925582886,
      "learning_rate": 6.442413499996706e-06,
      "loss": 0.0217,
      "step": 1099820
    },
    {
      "epoch": 1.7999122824244091,
      "grad_norm": 0.7314164042472839,
      "learning_rate": 6.442347607783189e-06,
      "loss": 0.0167,
      "step": 1099840
    },
    {
      "epoch": 1.7999450128630623,
      "grad_norm": 0.7487033009529114,
      "learning_rate": 6.442281715569671e-06,
      "loss": 0.0264,
      "step": 1099860
    },
    {
      "epoch": 1.799977743301716,
      "grad_norm": 0.3575701117515564,
      "learning_rate": 6.442215823356155e-06,
      "loss": 0.021,
      "step": 1099880
    },
    {
      "epoch": 1.800010473740369,
      "grad_norm": 0.3143760859966278,
      "learning_rate": 6.4421499311426375e-06,
      "loss": 0.0205,
      "step": 1099900
    },
    {
      "epoch": 1.8000432041790224,
      "grad_norm": 0.7949225306510925,
      "learning_rate": 6.44208403892912e-06,
      "loss": 0.0209,
      "step": 1099920
    },
    {
      "epoch": 1.8000759346176758,
      "grad_norm": 3.1716504096984863,
      "learning_rate": 6.442018146715603e-06,
      "loss": 0.0126,
      "step": 1099940
    },
    {
      "epoch": 1.800108665056329,
      "grad_norm": 0.19067221879959106,
      "learning_rate": 6.4419522545020865e-06,
      "loss": 0.0162,
      "step": 1099960
    },
    {
      "epoch": 1.8001413954949825,
      "grad_norm": 0.5797996520996094,
      "learning_rate": 6.4418863622885684e-06,
      "loss": 0.017,
      "step": 1099980
    },
    {
      "epoch": 1.8001741259336357,
      "grad_norm": 0.5774976015090942,
      "learning_rate": 6.441820470075052e-06,
      "loss": 0.022,
      "step": 1100000
    },
    {
      "epoch": 1.8001741259336357,
      "eval_loss": 0.009843386709690094,
      "eval_runtime": 6524.2021,
      "eval_samples_per_second": 157.545,
      "eval_steps_per_second": 15.755,
      "eval_sts-dev_pearson_cosine": 0.9771321838128866,
      "eval_sts-dev_spearman_cosine": 0.8905146115839151,
      "step": 1100000
    },
    {
      "epoch": 1.800206856372289,
      "grad_norm": 0.21490631997585297,
      "learning_rate": 6.441754577861534e-06,
      "loss": 0.025,
      "step": 1100020
    },
    {
      "epoch": 1.8002395868109424,
      "grad_norm": 0.22061166167259216,
      "learning_rate": 6.4416886856480175e-06,
      "loss": 0.0118,
      "step": 1100040
    },
    {
      "epoch": 1.8002723172495958,
      "grad_norm": 0.4983935058116913,
      "learning_rate": 6.4416227934345e-06,
      "loss": 0.0138,
      "step": 1100060
    },
    {
      "epoch": 1.8003050476882492,
      "grad_norm": 0.6526800990104675,
      "learning_rate": 6.441556901220983e-06,
      "loss": 0.0196,
      "step": 1100080
    },
    {
      "epoch": 1.8003377781269023,
      "grad_norm": 1.775282382965088,
      "learning_rate": 6.441491009007466e-06,
      "loss": 0.0201,
      "step": 1100100
    },
    {
      "epoch": 1.800370508565556,
      "grad_norm": 0.7000213861465454,
      "learning_rate": 6.441425116793949e-06,
      "loss": 0.0166,
      "step": 1100120
    },
    {
      "epoch": 1.800403239004209,
      "grad_norm": 0.6368481516838074,
      "learning_rate": 6.441359224580432e-06,
      "loss": 0.0248,
      "step": 1100140
    },
    {
      "epoch": 1.8004359694428624,
      "grad_norm": 0.2331884801387787,
      "learning_rate": 6.441293332366915e-06,
      "loss": 0.0158,
      "step": 1100160
    },
    {
      "epoch": 1.8004686998815158,
      "grad_norm": 0.20969794690608978,
      "learning_rate": 6.441227440153398e-06,
      "loss": 0.0202,
      "step": 1100180
    },
    {
      "epoch": 1.8005014303201692,
      "grad_norm": 0.14055033028125763,
      "learning_rate": 6.44116154793988e-06,
      "loss": 0.0177,
      "step": 1100200
    },
    {
      "epoch": 1.8005341607588226,
      "grad_norm": 0.4093545377254486,
      "learning_rate": 6.441095655726364e-06,
      "loss": 0.0144,
      "step": 1100220
    },
    {
      "epoch": 1.8005668911974757,
      "grad_norm": 0.1237960010766983,
      "learning_rate": 6.441029763512846e-06,
      "loss": 0.0225,
      "step": 1100240
    },
    {
      "epoch": 1.8005996216361293,
      "grad_norm": 0.19247014820575714,
      "learning_rate": 6.440963871299329e-06,
      "loss": 0.0169,
      "step": 1100260
    },
    {
      "epoch": 1.8006323520747824,
      "grad_norm": 0.8805488348007202,
      "learning_rate": 6.440897979085812e-06,
      "loss": 0.0196,
      "step": 1100280
    },
    {
      "epoch": 1.8006650825134358,
      "grad_norm": 0.4321160316467285,
      "learning_rate": 6.440832086872295e-06,
      "loss": 0.017,
      "step": 1100300
    },
    {
      "epoch": 1.8006978129520892,
      "grad_norm": 0.1621999740600586,
      "learning_rate": 6.4407661946587775e-06,
      "loss": 0.0202,
      "step": 1100320
    },
    {
      "epoch": 1.8007305433907426,
      "grad_norm": 0.44402432441711426,
      "learning_rate": 6.440700302445261e-06,
      "loss": 0.0156,
      "step": 1100340
    },
    {
      "epoch": 1.800763273829396,
      "grad_norm": 0.39575567841529846,
      "learning_rate": 6.440634410231743e-06,
      "loss": 0.0138,
      "step": 1100360
    },
    {
      "epoch": 1.800796004268049,
      "grad_norm": 0.5368131399154663,
      "learning_rate": 6.440568518018227e-06,
      "loss": 0.0201,
      "step": 1100380
    },
    {
      "epoch": 1.8008287347067027,
      "grad_norm": 0.42178022861480713,
      "learning_rate": 6.4405026258047085e-06,
      "loss": 0.0196,
      "step": 1100400
    },
    {
      "epoch": 1.8008614651453558,
      "grad_norm": 0.3579745292663574,
      "learning_rate": 6.440436733591192e-06,
      "loss": 0.0199,
      "step": 1100420
    },
    {
      "epoch": 1.8008941955840092,
      "grad_norm": 0.4882543087005615,
      "learning_rate": 6.440370841377675e-06,
      "loss": 0.0232,
      "step": 1100440
    },
    {
      "epoch": 1.8009269260226626,
      "grad_norm": 0.6878477334976196,
      "learning_rate": 6.4403049491641576e-06,
      "loss": 0.0178,
      "step": 1100460
    },
    {
      "epoch": 1.800959656461316,
      "grad_norm": 0.3786255419254303,
      "learning_rate": 6.440239056950641e-06,
      "loss": 0.0221,
      "step": 1100480
    },
    {
      "epoch": 1.8009923868999693,
      "grad_norm": 0.20101958513259888,
      "learning_rate": 6.440173164737124e-06,
      "loss": 0.0205,
      "step": 1100500
    },
    {
      "epoch": 1.8010251173386225,
      "grad_norm": 0.11350931227207184,
      "learning_rate": 6.440107272523607e-06,
      "loss": 0.0186,
      "step": 1100520
    },
    {
      "epoch": 1.801057847777276,
      "grad_norm": 0.8876226544380188,
      "learning_rate": 6.440041380310089e-06,
      "loss": 0.0173,
      "step": 1100540
    },
    {
      "epoch": 1.8010905782159292,
      "grad_norm": 0.27751708030700684,
      "learning_rate": 6.439975488096573e-06,
      "loss": 0.0127,
      "step": 1100560
    },
    {
      "epoch": 1.8011233086545826,
      "grad_norm": 0.21793362498283386,
      "learning_rate": 6.439909595883055e-06,
      "loss": 0.0128,
      "step": 1100580
    },
    {
      "epoch": 1.801156039093236,
      "grad_norm": 0.9753024578094482,
      "learning_rate": 6.4398437036695384e-06,
      "loss": 0.0183,
      "step": 1100600
    },
    {
      "epoch": 1.8011887695318891,
      "grad_norm": 1.2092838287353516,
      "learning_rate": 6.43977781145602e-06,
      "loss": 0.0227,
      "step": 1100620
    },
    {
      "epoch": 1.8012214999705427,
      "grad_norm": 0.27635475993156433,
      "learning_rate": 6.439711919242504e-06,
      "loss": 0.0253,
      "step": 1100640
    },
    {
      "epoch": 1.8012542304091959,
      "grad_norm": 0.2948192358016968,
      "learning_rate": 6.439646027028986e-06,
      "loss": 0.0135,
      "step": 1100660
    },
    {
      "epoch": 1.8012869608478492,
      "grad_norm": 0.6112898588180542,
      "learning_rate": 6.439580134815469e-06,
      "loss": 0.0132,
      "step": 1100680
    },
    {
      "epoch": 1.8013196912865026,
      "grad_norm": 0.32930856943130493,
      "learning_rate": 6.439514242601952e-06,
      "loss": 0.0175,
      "step": 1100700
    },
    {
      "epoch": 1.801352421725156,
      "grad_norm": 0.3808259665966034,
      "learning_rate": 6.439448350388435e-06,
      "loss": 0.0208,
      "step": 1100720
    },
    {
      "epoch": 1.8013851521638093,
      "grad_norm": 0.13472437858581543,
      "learning_rate": 6.439382458174918e-06,
      "loss": 0.018,
      "step": 1100740
    },
    {
      "epoch": 1.8014178826024625,
      "grad_norm": 0.8571262955665588,
      "learning_rate": 6.439316565961401e-06,
      "loss": 0.0197,
      "step": 1100760
    },
    {
      "epoch": 1.801450613041116,
      "grad_norm": 1.8340342044830322,
      "learning_rate": 6.439250673747883e-06,
      "loss": 0.0226,
      "step": 1100780
    },
    {
      "epoch": 1.8014833434797692,
      "grad_norm": 0.38939085602760315,
      "learning_rate": 6.439184781534367e-06,
      "loss": 0.0116,
      "step": 1100800
    },
    {
      "epoch": 1.8015160739184226,
      "grad_norm": 0.17693118751049042,
      "learning_rate": 6.439118889320849e-06,
      "loss": 0.0176,
      "step": 1100820
    },
    {
      "epoch": 1.801548804357076,
      "grad_norm": 0.23253695666790009,
      "learning_rate": 6.439052997107332e-06,
      "loss": 0.0162,
      "step": 1100840
    },
    {
      "epoch": 1.8015815347957294,
      "grad_norm": 0.5646161437034607,
      "learning_rate": 6.438987104893816e-06,
      "loss": 0.0144,
      "step": 1100860
    },
    {
      "epoch": 1.8016142652343827,
      "grad_norm": 0.7588680982589722,
      "learning_rate": 6.438921212680298e-06,
      "loss": 0.0121,
      "step": 1100880
    },
    {
      "epoch": 1.8016469956730359,
      "grad_norm": 0.16318157315254211,
      "learning_rate": 6.438855320466781e-06,
      "loss": 0.0209,
      "step": 1100900
    },
    {
      "epoch": 1.8016797261116895,
      "grad_norm": 0.7441313862800598,
      "learning_rate": 6.438789428253264e-06,
      "loss": 0.0193,
      "step": 1100920
    },
    {
      "epoch": 1.8017124565503426,
      "grad_norm": 0.18208849430084229,
      "learning_rate": 6.438723536039747e-06,
      "loss": 0.0154,
      "step": 1100940
    },
    {
      "epoch": 1.801745186988996,
      "grad_norm": 0.13103623688220978,
      "learning_rate": 6.4386576438262295e-06,
      "loss": 0.0192,
      "step": 1100960
    },
    {
      "epoch": 1.8017779174276494,
      "grad_norm": 0.6412267684936523,
      "learning_rate": 6.438591751612713e-06,
      "loss": 0.0195,
      "step": 1100980
    },
    {
      "epoch": 1.8018106478663027,
      "grad_norm": 1.4548368453979492,
      "learning_rate": 6.438525859399195e-06,
      "loss": 0.0119,
      "step": 1101000
    },
    {
      "epoch": 1.8018433783049561,
      "grad_norm": 1.0268136262893677,
      "learning_rate": 6.4384599671856785e-06,
      "loss": 0.0308,
      "step": 1101020
    },
    {
      "epoch": 1.8018761087436093,
      "grad_norm": 0.22978147864341736,
      "learning_rate": 6.43839407497216e-06,
      "loss": 0.0164,
      "step": 1101040
    },
    {
      "epoch": 1.8019088391822629,
      "grad_norm": 0.5170550346374512,
      "learning_rate": 6.438328182758644e-06,
      "loss": 0.0158,
      "step": 1101060
    },
    {
      "epoch": 1.801941569620916,
      "grad_norm": 0.8879603147506714,
      "learning_rate": 6.438262290545127e-06,
      "loss": 0.0212,
      "step": 1101080
    },
    {
      "epoch": 1.8019743000595694,
      "grad_norm": 0.5034281015396118,
      "learning_rate": 6.4381963983316095e-06,
      "loss": 0.0154,
      "step": 1101100
    },
    {
      "epoch": 1.8020070304982228,
      "grad_norm": 0.6011089086532593,
      "learning_rate": 6.438130506118092e-06,
      "loss": 0.0113,
      "step": 1101120
    },
    {
      "epoch": 1.8020397609368761,
      "grad_norm": 0.32259872555732727,
      "learning_rate": 6.438064613904576e-06,
      "loss": 0.0132,
      "step": 1101140
    },
    {
      "epoch": 1.8020724913755295,
      "grad_norm": 0.21150463819503784,
      "learning_rate": 6.437998721691058e-06,
      "loss": 0.0156,
      "step": 1101160
    },
    {
      "epoch": 1.8021052218141826,
      "grad_norm": 0.3329395055770874,
      "learning_rate": 6.437932829477541e-06,
      "loss": 0.0157,
      "step": 1101180
    },
    {
      "epoch": 1.8021379522528362,
      "grad_norm": 0.1477336883544922,
      "learning_rate": 6.437866937264025e-06,
      "loss": 0.0229,
      "step": 1101200
    },
    {
      "epoch": 1.8021706826914894,
      "grad_norm": 0.20162004232406616,
      "learning_rate": 6.437801045050507e-06,
      "loss": 0.0172,
      "step": 1101220
    },
    {
      "epoch": 1.8022034131301428,
      "grad_norm": 0.4986603260040283,
      "learning_rate": 6.43773515283699e-06,
      "loss": 0.0204,
      "step": 1101240
    },
    {
      "epoch": 1.8022361435687961,
      "grad_norm": 0.43770477175712585,
      "learning_rate": 6.437669260623472e-06,
      "loss": 0.0145,
      "step": 1101260
    },
    {
      "epoch": 1.8022688740074493,
      "grad_norm": 0.8582679629325867,
      "learning_rate": 6.437603368409956e-06,
      "loss": 0.0165,
      "step": 1101280
    },
    {
      "epoch": 1.8023016044461029,
      "grad_norm": 0.39237070083618164,
      "learning_rate": 6.4375374761964386e-06,
      "loss": 0.0167,
      "step": 1101300
    },
    {
      "epoch": 1.802334334884756,
      "grad_norm": 0.9416365027427673,
      "learning_rate": 6.437471583982921e-06,
      "loss": 0.0151,
      "step": 1101320
    },
    {
      "epoch": 1.8023670653234096,
      "grad_norm": 0.2571598291397095,
      "learning_rate": 6.437405691769404e-06,
      "loss": 0.0222,
      "step": 1101340
    },
    {
      "epoch": 1.8023997957620628,
      "grad_norm": 1.132981777191162,
      "learning_rate": 6.437339799555888e-06,
      "loss": 0.0205,
      "step": 1101360
    },
    {
      "epoch": 1.8024325262007161,
      "grad_norm": 1.7021217346191406,
      "learning_rate": 6.4372739073423695e-06,
      "loss": 0.0201,
      "step": 1101380
    },
    {
      "epoch": 1.8024652566393695,
      "grad_norm": 0.4438890814781189,
      "learning_rate": 6.437208015128853e-06,
      "loss": 0.0157,
      "step": 1101400
    },
    {
      "epoch": 1.8024979870780227,
      "grad_norm": 1.8189371824264526,
      "learning_rate": 6.437142122915335e-06,
      "loss": 0.0213,
      "step": 1101420
    },
    {
      "epoch": 1.8025307175166763,
      "grad_norm": 1.549032211303711,
      "learning_rate": 6.437076230701819e-06,
      "loss": 0.0185,
      "step": 1101440
    },
    {
      "epoch": 1.8025634479553294,
      "grad_norm": 0.6930055022239685,
      "learning_rate": 6.437010338488301e-06,
      "loss": 0.0174,
      "step": 1101460
    },
    {
      "epoch": 1.8025961783939828,
      "grad_norm": 0.4879258871078491,
      "learning_rate": 6.436944446274784e-06,
      "loss": 0.0239,
      "step": 1101480
    },
    {
      "epoch": 1.8026289088326362,
      "grad_norm": 0.34481582045555115,
      "learning_rate": 6.436878554061267e-06,
      "loss": 0.013,
      "step": 1101500
    },
    {
      "epoch": 1.8026616392712895,
      "grad_norm": 0.24265900254249573,
      "learning_rate": 6.43681266184775e-06,
      "loss": 0.0181,
      "step": 1101520
    },
    {
      "epoch": 1.802694369709943,
      "grad_norm": 0.36082497239112854,
      "learning_rate": 6.436746769634233e-06,
      "loss": 0.0219,
      "step": 1101540
    },
    {
      "epoch": 1.802727100148596,
      "grad_norm": 0.5194503664970398,
      "learning_rate": 6.436680877420716e-06,
      "loss": 0.014,
      "step": 1101560
    },
    {
      "epoch": 1.8027598305872496,
      "grad_norm": 1.1070644855499268,
      "learning_rate": 6.4366149852071995e-06,
      "loss": 0.0268,
      "step": 1101580
    },
    {
      "epoch": 1.8027925610259028,
      "grad_norm": 0.42156359553337097,
      "learning_rate": 6.436549092993681e-06,
      "loss": 0.0163,
      "step": 1101600
    },
    {
      "epoch": 1.8028252914645562,
      "grad_norm": 0.633604109287262,
      "learning_rate": 6.436483200780165e-06,
      "loss": 0.018,
      "step": 1101620
    },
    {
      "epoch": 1.8028580219032095,
      "grad_norm": 0.8351585865020752,
      "learning_rate": 6.436417308566647e-06,
      "loss": 0.0116,
      "step": 1101640
    },
    {
      "epoch": 1.802890752341863,
      "grad_norm": 0.8329461812973022,
      "learning_rate": 6.4363514163531304e-06,
      "loss": 0.0221,
      "step": 1101660
    },
    {
      "epoch": 1.8029234827805163,
      "grad_norm": 0.36570438742637634,
      "learning_rate": 6.436285524139612e-06,
      "loss": 0.0219,
      "step": 1101680
    },
    {
      "epoch": 1.8029562132191694,
      "grad_norm": 0.33261218667030334,
      "learning_rate": 6.436219631926096e-06,
      "loss": 0.0142,
      "step": 1101700
    },
    {
      "epoch": 1.802988943657823,
      "grad_norm": 1.2095177173614502,
      "learning_rate": 6.436153739712579e-06,
      "loss": 0.0234,
      "step": 1101720
    },
    {
      "epoch": 1.8030216740964762,
      "grad_norm": 0.16580136120319366,
      "learning_rate": 6.436087847499061e-06,
      "loss": 0.0146,
      "step": 1101740
    },
    {
      "epoch": 1.8030544045351296,
      "grad_norm": 0.47734737396240234,
      "learning_rate": 6.436021955285544e-06,
      "loss": 0.0138,
      "step": 1101760
    },
    {
      "epoch": 1.803087134973783,
      "grad_norm": 0.18974818289279938,
      "learning_rate": 6.435956063072028e-06,
      "loss": 0.0142,
      "step": 1101780
    },
    {
      "epoch": 1.8031198654124363,
      "grad_norm": 0.8790299892425537,
      "learning_rate": 6.43589017085851e-06,
      "loss": 0.0185,
      "step": 1101800
    },
    {
      "epoch": 1.8031525958510897,
      "grad_norm": 0.4089786410331726,
      "learning_rate": 6.435824278644993e-06,
      "loss": 0.0148,
      "step": 1101820
    },
    {
      "epoch": 1.8031853262897428,
      "grad_norm": 0.5419492125511169,
      "learning_rate": 6.435758386431475e-06,
      "loss": 0.0164,
      "step": 1101840
    },
    {
      "epoch": 1.8032180567283964,
      "grad_norm": 0.37813183665275574,
      "learning_rate": 6.435692494217959e-06,
      "loss": 0.015,
      "step": 1101860
    },
    {
      "epoch": 1.8032507871670496,
      "grad_norm": 0.10790491104125977,
      "learning_rate": 6.435626602004441e-06,
      "loss": 0.0109,
      "step": 1101880
    },
    {
      "epoch": 1.803283517605703,
      "grad_norm": 0.8094467520713806,
      "learning_rate": 6.435560709790924e-06,
      "loss": 0.0175,
      "step": 1101900
    },
    {
      "epoch": 1.8033162480443563,
      "grad_norm": 0.8813210725784302,
      "learning_rate": 6.435494817577408e-06,
      "loss": 0.0155,
      "step": 1101920
    },
    {
      "epoch": 1.8033489784830097,
      "grad_norm": 0.5928633809089661,
      "learning_rate": 6.4354289253638905e-06,
      "loss": 0.0227,
      "step": 1101940
    },
    {
      "epoch": 1.803381708921663,
      "grad_norm": 0.14376768469810486,
      "learning_rate": 6.435363033150373e-06,
      "loss": 0.019,
      "step": 1101960
    },
    {
      "epoch": 1.8034144393603162,
      "grad_norm": 0.5371038317680359,
      "learning_rate": 6.435297140936856e-06,
      "loss": 0.0171,
      "step": 1101980
    },
    {
      "epoch": 1.8034471697989698,
      "grad_norm": 0.331361323595047,
      "learning_rate": 6.4352312487233395e-06,
      "loss": 0.02,
      "step": 1102000
    },
    {
      "epoch": 1.803479900237623,
      "grad_norm": 0.334441214799881,
      "learning_rate": 6.4351653565098214e-06,
      "loss": 0.0158,
      "step": 1102020
    },
    {
      "epoch": 1.8035126306762763,
      "grad_norm": 0.6612862348556519,
      "learning_rate": 6.435099464296305e-06,
      "loss": 0.016,
      "step": 1102040
    },
    {
      "epoch": 1.8035453611149297,
      "grad_norm": 0.160527303814888,
      "learning_rate": 6.435033572082787e-06,
      "loss": 0.0188,
      "step": 1102060
    },
    {
      "epoch": 1.8035780915535828,
      "grad_norm": 0.059774402529001236,
      "learning_rate": 6.4349676798692705e-06,
      "loss": 0.0214,
      "step": 1102080
    },
    {
      "epoch": 1.8036108219922364,
      "grad_norm": 0.6744025349617004,
      "learning_rate": 6.434901787655753e-06,
      "loss": 0.0162,
      "step": 1102100
    },
    {
      "epoch": 1.8036435524308896,
      "grad_norm": 0.8370162844657898,
      "learning_rate": 6.434835895442236e-06,
      "loss": 0.0213,
      "step": 1102120
    },
    {
      "epoch": 1.8036762828695432,
      "grad_norm": 0.32396313548088074,
      "learning_rate": 6.434770003228719e-06,
      "loss": 0.0142,
      "step": 1102140
    },
    {
      "epoch": 1.8037090133081963,
      "grad_norm": 0.22011740505695343,
      "learning_rate": 6.434704111015202e-06,
      "loss": 0.0202,
      "step": 1102160
    },
    {
      "epoch": 1.8037417437468497,
      "grad_norm": 0.3302273154258728,
      "learning_rate": 6.434638218801684e-06,
      "loss": 0.0281,
      "step": 1102180
    },
    {
      "epoch": 1.803774474185503,
      "grad_norm": 0.5255743861198425,
      "learning_rate": 6.434572326588168e-06,
      "loss": 0.0132,
      "step": 1102200
    },
    {
      "epoch": 1.8038072046241562,
      "grad_norm": 0.16897134482860565,
      "learning_rate": 6.43450643437465e-06,
      "loss": 0.0152,
      "step": 1102220
    },
    {
      "epoch": 1.8038399350628098,
      "grad_norm": 0.685448408126831,
      "learning_rate": 6.434440542161133e-06,
      "loss": 0.0191,
      "step": 1102240
    },
    {
      "epoch": 1.803872665501463,
      "grad_norm": 0.8969343304634094,
      "learning_rate": 6.434374649947617e-06,
      "loss": 0.0208,
      "step": 1102260
    },
    {
      "epoch": 1.8039053959401163,
      "grad_norm": 0.4673728942871094,
      "learning_rate": 6.434308757734099e-06,
      "loss": 0.02,
      "step": 1102280
    },
    {
      "epoch": 1.8039381263787697,
      "grad_norm": 0.2841759920120239,
      "learning_rate": 6.434242865520582e-06,
      "loss": 0.0248,
      "step": 1102300
    },
    {
      "epoch": 1.803970856817423,
      "grad_norm": 0.7586455345153809,
      "learning_rate": 6.434176973307065e-06,
      "loss": 0.0178,
      "step": 1102320
    },
    {
      "epoch": 1.8040035872560765,
      "grad_norm": 0.9435626268386841,
      "learning_rate": 6.434111081093548e-06,
      "loss": 0.0207,
      "step": 1102340
    },
    {
      "epoch": 1.8040363176947296,
      "grad_norm": 0.8121697902679443,
      "learning_rate": 6.4340451888800306e-06,
      "loss": 0.0201,
      "step": 1102360
    },
    {
      "epoch": 1.8040690481333832,
      "grad_norm": 0.8869174122810364,
      "learning_rate": 6.433979296666514e-06,
      "loss": 0.0145,
      "step": 1102380
    },
    {
      "epoch": 1.8041017785720364,
      "grad_norm": 0.4864296317100525,
      "learning_rate": 6.433913404452996e-06,
      "loss": 0.0232,
      "step": 1102400
    },
    {
      "epoch": 1.8041345090106897,
      "grad_norm": 0.4104187786579132,
      "learning_rate": 6.43384751223948e-06,
      "loss": 0.0222,
      "step": 1102420
    },
    {
      "epoch": 1.804167239449343,
      "grad_norm": 0.2095903754234314,
      "learning_rate": 6.4337816200259615e-06,
      "loss": 0.0157,
      "step": 1102440
    },
    {
      "epoch": 1.8041999698879965,
      "grad_norm": 0.6904124021530151,
      "learning_rate": 6.433715727812445e-06,
      "loss": 0.0139,
      "step": 1102460
    },
    {
      "epoch": 1.8042327003266498,
      "grad_norm": 1.3660441637039185,
      "learning_rate": 6.433649835598928e-06,
      "loss": 0.0276,
      "step": 1102480
    },
    {
      "epoch": 1.804265430765303,
      "grad_norm": 0.34845998883247375,
      "learning_rate": 6.433583943385411e-06,
      "loss": 0.017,
      "step": 1102500
    },
    {
      "epoch": 1.8042981612039566,
      "grad_norm": 0.5248504281044006,
      "learning_rate": 6.433518051171893e-06,
      "loss": 0.0162,
      "step": 1102520
    },
    {
      "epoch": 1.8043308916426097,
      "grad_norm": 0.4828827381134033,
      "learning_rate": 6.433452158958377e-06,
      "loss": 0.0169,
      "step": 1102540
    },
    {
      "epoch": 1.804363622081263,
      "grad_norm": 0.25746187567710876,
      "learning_rate": 6.433386266744859e-06,
      "loss": 0.0168,
      "step": 1102560
    },
    {
      "epoch": 1.8043963525199165,
      "grad_norm": 0.5975104570388794,
      "learning_rate": 6.433320374531342e-06,
      "loss": 0.019,
      "step": 1102580
    },
    {
      "epoch": 1.8044290829585699,
      "grad_norm": 0.42747262120246887,
      "learning_rate": 6.433254482317826e-06,
      "loss": 0.0263,
      "step": 1102600
    },
    {
      "epoch": 1.8044618133972232,
      "grad_norm": 0.38924500346183777,
      "learning_rate": 6.433188590104308e-06,
      "loss": 0.0182,
      "step": 1102620
    },
    {
      "epoch": 1.8044945438358764,
      "grad_norm": 0.6695871353149414,
      "learning_rate": 6.4331226978907914e-06,
      "loss": 0.0141,
      "step": 1102640
    },
    {
      "epoch": 1.80452727427453,
      "grad_norm": 0.3037739098072052,
      "learning_rate": 6.433056805677273e-06,
      "loss": 0.0232,
      "step": 1102660
    },
    {
      "epoch": 1.8045600047131831,
      "grad_norm": 0.4043181836605072,
      "learning_rate": 6.432990913463757e-06,
      "loss": 0.0203,
      "step": 1102680
    },
    {
      "epoch": 1.8045927351518365,
      "grad_norm": 0.42614972591400146,
      "learning_rate": 6.432925021250239e-06,
      "loss": 0.0176,
      "step": 1102700
    },
    {
      "epoch": 1.8046254655904899,
      "grad_norm": 0.4432375133037567,
      "learning_rate": 6.432859129036722e-06,
      "loss": 0.0135,
      "step": 1102720
    },
    {
      "epoch": 1.8046581960291432,
      "grad_norm": 0.3420044481754303,
      "learning_rate": 6.432793236823205e-06,
      "loss": 0.0186,
      "step": 1102740
    },
    {
      "epoch": 1.8046909264677966,
      "grad_norm": 1.124955654144287,
      "learning_rate": 6.432727344609688e-06,
      "loss": 0.0278,
      "step": 1102760
    },
    {
      "epoch": 1.8047236569064498,
      "grad_norm": 0.41262760758399963,
      "learning_rate": 6.432661452396171e-06,
      "loss": 0.0224,
      "step": 1102780
    },
    {
      "epoch": 1.8047563873451034,
      "grad_norm": 0.29988494515419006,
      "learning_rate": 6.432595560182654e-06,
      "loss": 0.0222,
      "step": 1102800
    },
    {
      "epoch": 1.8047891177837565,
      "grad_norm": 0.39126691222190857,
      "learning_rate": 6.432529667969136e-06,
      "loss": 0.0222,
      "step": 1102820
    },
    {
      "epoch": 1.8048218482224099,
      "grad_norm": 0.523044228553772,
      "learning_rate": 6.43246377575562e-06,
      "loss": 0.0174,
      "step": 1102840
    },
    {
      "epoch": 1.8048545786610632,
      "grad_norm": 0.30360645055770874,
      "learning_rate": 6.432397883542102e-06,
      "loss": 0.019,
      "step": 1102860
    },
    {
      "epoch": 1.8048873090997164,
      "grad_norm": 0.2105039656162262,
      "learning_rate": 6.432331991328585e-06,
      "loss": 0.0186,
      "step": 1102880
    },
    {
      "epoch": 1.80492003953837,
      "grad_norm": 0.13426296412944794,
      "learning_rate": 6.432266099115068e-06,
      "loss": 0.0258,
      "step": 1102900
    },
    {
      "epoch": 1.8049527699770231,
      "grad_norm": 0.9893357157707214,
      "learning_rate": 6.432200206901551e-06,
      "loss": 0.0192,
      "step": 1102920
    },
    {
      "epoch": 1.8049855004156767,
      "grad_norm": 0.32418930530548096,
      "learning_rate": 6.432134314688033e-06,
      "loss": 0.0201,
      "step": 1102940
    },
    {
      "epoch": 1.8050182308543299,
      "grad_norm": 0.41138580441474915,
      "learning_rate": 6.432068422474517e-06,
      "loss": 0.0155,
      "step": 1102960
    },
    {
      "epoch": 1.8050509612929833,
      "grad_norm": 0.6172399520874023,
      "learning_rate": 6.432002530261e-06,
      "loss": 0.0147,
      "step": 1102980
    },
    {
      "epoch": 1.8050836917316366,
      "grad_norm": 0.24903886020183563,
      "learning_rate": 6.4319366380474825e-06,
      "loss": 0.0211,
      "step": 1103000
    },
    {
      "epoch": 1.8051164221702898,
      "grad_norm": 0.20827792584896088,
      "learning_rate": 6.431870745833966e-06,
      "loss": 0.027,
      "step": 1103020
    },
    {
      "epoch": 1.8051491526089434,
      "grad_norm": 0.29998210072517395,
      "learning_rate": 6.431804853620448e-06,
      "loss": 0.0104,
      "step": 1103040
    },
    {
      "epoch": 1.8051818830475965,
      "grad_norm": 0.40267622470855713,
      "learning_rate": 6.4317389614069315e-06,
      "loss": 0.0169,
      "step": 1103060
    },
    {
      "epoch": 1.80521461348625,
      "grad_norm": 0.2973020076751709,
      "learning_rate": 6.431673069193413e-06,
      "loss": 0.0136,
      "step": 1103080
    },
    {
      "epoch": 1.8052473439249033,
      "grad_norm": 0.8782591223716736,
      "learning_rate": 6.431607176979897e-06,
      "loss": 0.0245,
      "step": 1103100
    },
    {
      "epoch": 1.8052800743635566,
      "grad_norm": 0.29134100675582886,
      "learning_rate": 6.43154128476638e-06,
      "loss": 0.0226,
      "step": 1103120
    },
    {
      "epoch": 1.80531280480221,
      "grad_norm": 0.5809491872787476,
      "learning_rate": 6.4314753925528625e-06,
      "loss": 0.0178,
      "step": 1103140
    },
    {
      "epoch": 1.8053455352408632,
      "grad_norm": 0.09830794483423233,
      "learning_rate": 6.431409500339345e-06,
      "loss": 0.0191,
      "step": 1103160
    },
    {
      "epoch": 1.8053782656795168,
      "grad_norm": 0.531363844871521,
      "learning_rate": 6.431343608125829e-06,
      "loss": 0.0143,
      "step": 1103180
    },
    {
      "epoch": 1.80541099611817,
      "grad_norm": 0.5619561672210693,
      "learning_rate": 6.431277715912311e-06,
      "loss": 0.0196,
      "step": 1103200
    },
    {
      "epoch": 1.8054437265568233,
      "grad_norm": 0.39949578046798706,
      "learning_rate": 6.431211823698794e-06,
      "loss": 0.02,
      "step": 1103220
    },
    {
      "epoch": 1.8054764569954767,
      "grad_norm": 0.5014640688896179,
      "learning_rate": 6.431145931485276e-06,
      "loss": 0.0168,
      "step": 1103240
    },
    {
      "epoch": 1.80550918743413,
      "grad_norm": 0.5153996348381042,
      "learning_rate": 6.43108003927176e-06,
      "loss": 0.0155,
      "step": 1103260
    },
    {
      "epoch": 1.8055419178727834,
      "grad_norm": 0.6067171096801758,
      "learning_rate": 6.4310141470582425e-06,
      "loss": 0.017,
      "step": 1103280
    },
    {
      "epoch": 1.8055746483114365,
      "grad_norm": 0.7492713332176208,
      "learning_rate": 6.430948254844725e-06,
      "loss": 0.0181,
      "step": 1103300
    },
    {
      "epoch": 1.8056073787500901,
      "grad_norm": 0.1645120531320572,
      "learning_rate": 6.430882362631209e-06,
      "loss": 0.0233,
      "step": 1103320
    },
    {
      "epoch": 1.8056401091887433,
      "grad_norm": 0.4524906277656555,
      "learning_rate": 6.4308164704176916e-06,
      "loss": 0.0182,
      "step": 1103340
    },
    {
      "epoch": 1.8056728396273967,
      "grad_norm": 0.45693379640579224,
      "learning_rate": 6.430750578204174e-06,
      "loss": 0.0205,
      "step": 1103360
    },
    {
      "epoch": 1.80570557006605,
      "grad_norm": 0.38459715247154236,
      "learning_rate": 6.430684685990657e-06,
      "loss": 0.0217,
      "step": 1103380
    },
    {
      "epoch": 1.8057383005047034,
      "grad_norm": 0.7465302348136902,
      "learning_rate": 6.430618793777141e-06,
      "loss": 0.0177,
      "step": 1103400
    },
    {
      "epoch": 1.8057710309433568,
      "grad_norm": 0.20654162764549255,
      "learning_rate": 6.4305529015636225e-06,
      "loss": 0.0161,
      "step": 1103420
    },
    {
      "epoch": 1.80580376138201,
      "grad_norm": 0.1113346517086029,
      "learning_rate": 6.430487009350106e-06,
      "loss": 0.0156,
      "step": 1103440
    },
    {
      "epoch": 1.8058364918206635,
      "grad_norm": 1.3593581914901733,
      "learning_rate": 6.430421117136588e-06,
      "loss": 0.0167,
      "step": 1103460
    },
    {
      "epoch": 1.8058692222593167,
      "grad_norm": 0.2779286205768585,
      "learning_rate": 6.430355224923072e-06,
      "loss": 0.016,
      "step": 1103480
    },
    {
      "epoch": 1.80590195269797,
      "grad_norm": 0.24199584126472473,
      "learning_rate": 6.4302893327095535e-06,
      "loss": 0.0161,
      "step": 1103500
    },
    {
      "epoch": 1.8059346831366234,
      "grad_norm": 0.44896891713142395,
      "learning_rate": 6.430223440496037e-06,
      "loss": 0.0177,
      "step": 1103520
    },
    {
      "epoch": 1.8059674135752766,
      "grad_norm": 0.22010281682014465,
      "learning_rate": 6.43015754828252e-06,
      "loss": 0.0233,
      "step": 1103540
    },
    {
      "epoch": 1.8060001440139302,
      "grad_norm": 0.44588449597358704,
      "learning_rate": 6.430091656069003e-06,
      "loss": 0.0147,
      "step": 1103560
    },
    {
      "epoch": 1.8060328744525833,
      "grad_norm": 0.5316476821899414,
      "learning_rate": 6.430025763855485e-06,
      "loss": 0.0147,
      "step": 1103580
    },
    {
      "epoch": 1.806065604891237,
      "grad_norm": 0.7322069406509399,
      "learning_rate": 6.429959871641969e-06,
      "loss": 0.0215,
      "step": 1103600
    },
    {
      "epoch": 1.80609833532989,
      "grad_norm": 0.4179753363132477,
      "learning_rate": 6.429893979428451e-06,
      "loss": 0.0217,
      "step": 1103620
    },
    {
      "epoch": 1.8061310657685434,
      "grad_norm": 0.6284239888191223,
      "learning_rate": 6.429828087214934e-06,
      "loss": 0.0189,
      "step": 1103640
    },
    {
      "epoch": 1.8061637962071968,
      "grad_norm": 0.21196357905864716,
      "learning_rate": 6.429762195001418e-06,
      "loss": 0.0238,
      "step": 1103660
    },
    {
      "epoch": 1.80619652664585,
      "grad_norm": 0.19420920312404633,
      "learning_rate": 6.4296963027879e-06,
      "loss": 0.0136,
      "step": 1103680
    },
    {
      "epoch": 1.8062292570845035,
      "grad_norm": 0.29144129157066345,
      "learning_rate": 6.4296304105743834e-06,
      "loss": 0.0236,
      "step": 1103700
    },
    {
      "epoch": 1.8062619875231567,
      "grad_norm": 0.22018666565418243,
      "learning_rate": 6.429564518360865e-06,
      "loss": 0.0141,
      "step": 1103720
    },
    {
      "epoch": 1.80629471796181,
      "grad_norm": 0.4416475296020508,
      "learning_rate": 6.429498626147349e-06,
      "loss": 0.0173,
      "step": 1103740
    },
    {
      "epoch": 1.8063274484004634,
      "grad_norm": 0.1849478781223297,
      "learning_rate": 6.429432733933832e-06,
      "loss": 0.0178,
      "step": 1103760
    },
    {
      "epoch": 1.8063601788391168,
      "grad_norm": 0.9831500053405762,
      "learning_rate": 6.429366841720314e-06,
      "loss": 0.0192,
      "step": 1103780
    },
    {
      "epoch": 1.8063929092777702,
      "grad_norm": 0.6029419302940369,
      "learning_rate": 6.429300949506797e-06,
      "loss": 0.0226,
      "step": 1103800
    },
    {
      "epoch": 1.8064256397164233,
      "grad_norm": 0.8546704649925232,
      "learning_rate": 6.429235057293281e-06,
      "loss": 0.0219,
      "step": 1103820
    },
    {
      "epoch": 1.806458370155077,
      "grad_norm": 0.2989586591720581,
      "learning_rate": 6.429169165079763e-06,
      "loss": 0.0164,
      "step": 1103840
    },
    {
      "epoch": 1.80649110059373,
      "grad_norm": 0.2386823296546936,
      "learning_rate": 6.429103272866246e-06,
      "loss": 0.0207,
      "step": 1103860
    },
    {
      "epoch": 1.8065238310323835,
      "grad_norm": 0.4357602298259735,
      "learning_rate": 6.429037380652728e-06,
      "loss": 0.0153,
      "step": 1103880
    },
    {
      "epoch": 1.8065565614710368,
      "grad_norm": 0.29579001665115356,
      "learning_rate": 6.428971488439212e-06,
      "loss": 0.019,
      "step": 1103900
    },
    {
      "epoch": 1.8065892919096902,
      "grad_norm": 0.5116810202598572,
      "learning_rate": 6.428905596225694e-06,
      "loss": 0.0103,
      "step": 1103920
    },
    {
      "epoch": 1.8066220223483436,
      "grad_norm": 0.211186021566391,
      "learning_rate": 6.428839704012177e-06,
      "loss": 0.0217,
      "step": 1103940
    },
    {
      "epoch": 1.8066547527869967,
      "grad_norm": 0.5601550936698914,
      "learning_rate": 6.42877381179866e-06,
      "loss": 0.0138,
      "step": 1103960
    },
    {
      "epoch": 1.8066874832256503,
      "grad_norm": 0.8706363439559937,
      "learning_rate": 6.4287079195851435e-06,
      "loss": 0.0243,
      "step": 1103980
    },
    {
      "epoch": 1.8067202136643035,
      "grad_norm": 1.4231992959976196,
      "learning_rate": 6.428642027371626e-06,
      "loss": 0.0226,
      "step": 1104000
    },
    {
      "epoch": 1.8067529441029568,
      "grad_norm": 0.6814775466918945,
      "learning_rate": 6.428576135158109e-06,
      "loss": 0.0206,
      "step": 1104020
    },
    {
      "epoch": 1.8067856745416102,
      "grad_norm": 0.9413179755210876,
      "learning_rate": 6.4285102429445925e-06,
      "loss": 0.0229,
      "step": 1104040
    },
    {
      "epoch": 1.8068184049802636,
      "grad_norm": 0.9345667958259583,
      "learning_rate": 6.4284443507310744e-06,
      "loss": 0.0222,
      "step": 1104060
    },
    {
      "epoch": 1.806851135418917,
      "grad_norm": 0.18262970447540283,
      "learning_rate": 6.428378458517558e-06,
      "loss": 0.012,
      "step": 1104080
    },
    {
      "epoch": 1.80688386585757,
      "grad_norm": 0.4019276797771454,
      "learning_rate": 6.42831256630404e-06,
      "loss": 0.0164,
      "step": 1104100
    },
    {
      "epoch": 1.8069165962962237,
      "grad_norm": 0.7190123200416565,
      "learning_rate": 6.4282466740905235e-06,
      "loss": 0.0173,
      "step": 1104120
    },
    {
      "epoch": 1.8069493267348768,
      "grad_norm": 0.9420817494392395,
      "learning_rate": 6.428180781877006e-06,
      "loss": 0.017,
      "step": 1104140
    },
    {
      "epoch": 1.8069820571735302,
      "grad_norm": 1.5915075540542603,
      "learning_rate": 6.428114889663489e-06,
      "loss": 0.0172,
      "step": 1104160
    },
    {
      "epoch": 1.8070147876121836,
      "grad_norm": 0.11698745936155319,
      "learning_rate": 6.428048997449972e-06,
      "loss": 0.0232,
      "step": 1104180
    },
    {
      "epoch": 1.807047518050837,
      "grad_norm": 0.109455406665802,
      "learning_rate": 6.427983105236455e-06,
      "loss": 0.0231,
      "step": 1104200
    },
    {
      "epoch": 1.8070802484894903,
      "grad_norm": 0.3045620024204254,
      "learning_rate": 6.427917213022937e-06,
      "loss": 0.0158,
      "step": 1104220
    },
    {
      "epoch": 1.8071129789281435,
      "grad_norm": 0.2882714867591858,
      "learning_rate": 6.427851320809421e-06,
      "loss": 0.0195,
      "step": 1104240
    },
    {
      "epoch": 1.807145709366797,
      "grad_norm": 0.7181265354156494,
      "learning_rate": 6.427785428595903e-06,
      "loss": 0.0154,
      "step": 1104260
    },
    {
      "epoch": 1.8071784398054502,
      "grad_norm": 0.6563905477523804,
      "learning_rate": 6.427719536382386e-06,
      "loss": 0.0186,
      "step": 1104280
    },
    {
      "epoch": 1.8072111702441036,
      "grad_norm": 0.19085007905960083,
      "learning_rate": 6.427653644168869e-06,
      "loss": 0.0214,
      "step": 1104300
    },
    {
      "epoch": 1.807243900682757,
      "grad_norm": 0.5127153396606445,
      "learning_rate": 6.427587751955352e-06,
      "loss": 0.015,
      "step": 1104320
    },
    {
      "epoch": 1.8072766311214101,
      "grad_norm": 0.6033309102058411,
      "learning_rate": 6.4275218597418345e-06,
      "loss": 0.0174,
      "step": 1104340
    },
    {
      "epoch": 1.8073093615600637,
      "grad_norm": 0.0845509022474289,
      "learning_rate": 6.427455967528318e-06,
      "loss": 0.014,
      "step": 1104360
    },
    {
      "epoch": 1.8073420919987169,
      "grad_norm": 0.3779674470424652,
      "learning_rate": 6.427390075314801e-06,
      "loss": 0.0208,
      "step": 1104380
    },
    {
      "epoch": 1.8073748224373705,
      "grad_norm": 0.187204509973526,
      "learning_rate": 6.4273241831012836e-06,
      "loss": 0.0115,
      "step": 1104400
    },
    {
      "epoch": 1.8074075528760236,
      "grad_norm": 0.4954535961151123,
      "learning_rate": 6.427258290887767e-06,
      "loss": 0.016,
      "step": 1104420
    },
    {
      "epoch": 1.807440283314677,
      "grad_norm": 0.6436496376991272,
      "learning_rate": 6.427192398674249e-06,
      "loss": 0.0175,
      "step": 1104440
    },
    {
      "epoch": 1.8074730137533304,
      "grad_norm": 0.5236903429031372,
      "learning_rate": 6.427126506460733e-06,
      "loss": 0.0241,
      "step": 1104460
    },
    {
      "epoch": 1.8075057441919835,
      "grad_norm": 0.3195773959159851,
      "learning_rate": 6.4270606142472145e-06,
      "loss": 0.0185,
      "step": 1104480
    },
    {
      "epoch": 1.807538474630637,
      "grad_norm": 0.6202885508537292,
      "learning_rate": 6.426994722033698e-06,
      "loss": 0.0166,
      "step": 1104500
    },
    {
      "epoch": 1.8075712050692903,
      "grad_norm": 2.455648422241211,
      "learning_rate": 6.42692882982018e-06,
      "loss": 0.0251,
      "step": 1104520
    },
    {
      "epoch": 1.8076039355079436,
      "grad_norm": 1.3441320657730103,
      "learning_rate": 6.426862937606664e-06,
      "loss": 0.0158,
      "step": 1104540
    },
    {
      "epoch": 1.807636665946597,
      "grad_norm": 0.3951800763607025,
      "learning_rate": 6.426797045393146e-06,
      "loss": 0.0165,
      "step": 1104560
    },
    {
      "epoch": 1.8076693963852504,
      "grad_norm": 0.512955904006958,
      "learning_rate": 6.426731153179629e-06,
      "loss": 0.0115,
      "step": 1104580
    },
    {
      "epoch": 1.8077021268239037,
      "grad_norm": 0.6105929613113403,
      "learning_rate": 6.426665260966112e-06,
      "loss": 0.0108,
      "step": 1104600
    },
    {
      "epoch": 1.807734857262557,
      "grad_norm": 0.30303406715393066,
      "learning_rate": 6.426599368752595e-06,
      "loss": 0.0209,
      "step": 1104620
    },
    {
      "epoch": 1.8077675877012105,
      "grad_norm": 0.8373895287513733,
      "learning_rate": 6.426533476539077e-06,
      "loss": 0.0174,
      "step": 1104640
    },
    {
      "epoch": 1.8078003181398636,
      "grad_norm": 0.25254660844802856,
      "learning_rate": 6.426467584325561e-06,
      "loss": 0.0115,
      "step": 1104660
    },
    {
      "epoch": 1.807833048578517,
      "grad_norm": 0.4326205849647522,
      "learning_rate": 6.426401692112043e-06,
      "loss": 0.0184,
      "step": 1104680
    },
    {
      "epoch": 1.8078657790171704,
      "grad_norm": 0.11467909067869186,
      "learning_rate": 6.426335799898526e-06,
      "loss": 0.0196,
      "step": 1104700
    },
    {
      "epoch": 1.8078985094558238,
      "grad_norm": 0.3612844944000244,
      "learning_rate": 6.42626990768501e-06,
      "loss": 0.0167,
      "step": 1104720
    },
    {
      "epoch": 1.8079312398944771,
      "grad_norm": 0.2716796100139618,
      "learning_rate": 6.426204015471492e-06,
      "loss": 0.0176,
      "step": 1104740
    },
    {
      "epoch": 1.8079639703331303,
      "grad_norm": 0.21515104174613953,
      "learning_rate": 6.426138123257975e-06,
      "loss": 0.016,
      "step": 1104760
    },
    {
      "epoch": 1.8079967007717839,
      "grad_norm": 0.5860588550567627,
      "learning_rate": 6.426072231044458e-06,
      "loss": 0.0197,
      "step": 1104780
    },
    {
      "epoch": 1.808029431210437,
      "grad_norm": 0.6357226371765137,
      "learning_rate": 6.426006338830941e-06,
      "loss": 0.013,
      "step": 1104800
    },
    {
      "epoch": 1.8080621616490904,
      "grad_norm": 0.5682836771011353,
      "learning_rate": 6.425940446617424e-06,
      "loss": 0.0201,
      "step": 1104820
    },
    {
      "epoch": 1.8080948920877438,
      "grad_norm": 0.15929356217384338,
      "learning_rate": 6.425874554403907e-06,
      "loss": 0.0148,
      "step": 1104840
    },
    {
      "epoch": 1.8081276225263971,
      "grad_norm": 0.7084524631500244,
      "learning_rate": 6.425808662190389e-06,
      "loss": 0.017,
      "step": 1104860
    },
    {
      "epoch": 1.8081603529650505,
      "grad_norm": 0.5291875600814819,
      "learning_rate": 6.425742769976873e-06,
      "loss": 0.0116,
      "step": 1104880
    },
    {
      "epoch": 1.8081930834037037,
      "grad_norm": 0.3325171172618866,
      "learning_rate": 6.425676877763355e-06,
      "loss": 0.0145,
      "step": 1104900
    },
    {
      "epoch": 1.8082258138423573,
      "grad_norm": 0.24721767008304596,
      "learning_rate": 6.425610985549838e-06,
      "loss": 0.0186,
      "step": 1104920
    },
    {
      "epoch": 1.8082585442810104,
      "grad_norm": 0.4515775442123413,
      "learning_rate": 6.425545093336321e-06,
      "loss": 0.0111,
      "step": 1104940
    },
    {
      "epoch": 1.8082912747196638,
      "grad_norm": 0.852105438709259,
      "learning_rate": 6.425479201122804e-06,
      "loss": 0.018,
      "step": 1104960
    },
    {
      "epoch": 1.8083240051583171,
      "grad_norm": 1.0951277017593384,
      "learning_rate": 6.425413308909286e-06,
      "loss": 0.0296,
      "step": 1104980
    },
    {
      "epoch": 1.8083567355969705,
      "grad_norm": 0.8852738738059998,
      "learning_rate": 6.42534741669577e-06,
      "loss": 0.0279,
      "step": 1105000
    },
    {
      "epoch": 1.808389466035624,
      "grad_norm": 0.7411404252052307,
      "learning_rate": 6.425281524482252e-06,
      "loss": 0.0191,
      "step": 1105020
    },
    {
      "epoch": 1.808422196474277,
      "grad_norm": 0.8074131011962891,
      "learning_rate": 6.4252156322687355e-06,
      "loss": 0.0153,
      "step": 1105040
    },
    {
      "epoch": 1.8084549269129306,
      "grad_norm": 1.4759929180145264,
      "learning_rate": 6.425149740055219e-06,
      "loss": 0.0164,
      "step": 1105060
    },
    {
      "epoch": 1.8084876573515838,
      "grad_norm": 0.9997683167457581,
      "learning_rate": 6.425083847841701e-06,
      "loss": 0.0273,
      "step": 1105080
    },
    {
      "epoch": 1.8085203877902372,
      "grad_norm": 0.5885451436042786,
      "learning_rate": 6.4250179556281845e-06,
      "loss": 0.0248,
      "step": 1105100
    },
    {
      "epoch": 1.8085531182288905,
      "grad_norm": 0.2622773349285126,
      "learning_rate": 6.4249520634146664e-06,
      "loss": 0.0148,
      "step": 1105120
    },
    {
      "epoch": 1.8085858486675437,
      "grad_norm": 0.1765618622303009,
      "learning_rate": 6.42488617120115e-06,
      "loss": 0.0091,
      "step": 1105140
    },
    {
      "epoch": 1.8086185791061973,
      "grad_norm": 0.7521490454673767,
      "learning_rate": 6.424820278987633e-06,
      "loss": 0.0159,
      "step": 1105160
    },
    {
      "epoch": 1.8086513095448504,
      "grad_norm": 0.25566771626472473,
      "learning_rate": 6.4247543867741155e-06,
      "loss": 0.0181,
      "step": 1105180
    },
    {
      "epoch": 1.808684039983504,
      "grad_norm": 0.9537702798843384,
      "learning_rate": 6.424688494560598e-06,
      "loss": 0.0157,
      "step": 1105200
    },
    {
      "epoch": 1.8087167704221572,
      "grad_norm": 0.1160142570734024,
      "learning_rate": 6.424622602347082e-06,
      "loss": 0.0226,
      "step": 1105220
    },
    {
      "epoch": 1.8087495008608105,
      "grad_norm": 0.34996309876441956,
      "learning_rate": 6.424556710133564e-06,
      "loss": 0.0167,
      "step": 1105240
    },
    {
      "epoch": 1.808782231299464,
      "grad_norm": 0.316530704498291,
      "learning_rate": 6.424490817920047e-06,
      "loss": 0.017,
      "step": 1105260
    },
    {
      "epoch": 1.808814961738117,
      "grad_norm": 0.27666807174682617,
      "learning_rate": 6.424424925706529e-06,
      "loss": 0.0174,
      "step": 1105280
    },
    {
      "epoch": 1.8088476921767707,
      "grad_norm": 0.22003808617591858,
      "learning_rate": 6.424359033493013e-06,
      "loss": 0.0217,
      "step": 1105300
    },
    {
      "epoch": 1.8088804226154238,
      "grad_norm": 0.24189652502536774,
      "learning_rate": 6.4242931412794955e-06,
      "loss": 0.0157,
      "step": 1105320
    },
    {
      "epoch": 1.8089131530540772,
      "grad_norm": 0.23070158064365387,
      "learning_rate": 6.424227249065978e-06,
      "loss": 0.0222,
      "step": 1105340
    },
    {
      "epoch": 1.8089458834927306,
      "grad_norm": 1.0107420682907104,
      "learning_rate": 6.424161356852461e-06,
      "loss": 0.0149,
      "step": 1105360
    },
    {
      "epoch": 1.808978613931384,
      "grad_norm": 0.5109047293663025,
      "learning_rate": 6.424095464638945e-06,
      "loss": 0.0133,
      "step": 1105380
    },
    {
      "epoch": 1.8090113443700373,
      "grad_norm": 0.20792727172374725,
      "learning_rate": 6.4240295724254265e-06,
      "loss": 0.0175,
      "step": 1105400
    },
    {
      "epoch": 1.8090440748086904,
      "grad_norm": 0.26001814007759094,
      "learning_rate": 6.42396368021191e-06,
      "loss": 0.0251,
      "step": 1105420
    },
    {
      "epoch": 1.809076805247344,
      "grad_norm": 0.18880149722099304,
      "learning_rate": 6.423897787998394e-06,
      "loss": 0.0189,
      "step": 1105440
    },
    {
      "epoch": 1.8091095356859972,
      "grad_norm": 0.27630534768104553,
      "learning_rate": 6.4238318957848755e-06,
      "loss": 0.0223,
      "step": 1105460
    },
    {
      "epoch": 1.8091422661246506,
      "grad_norm": 1.6504886150360107,
      "learning_rate": 6.423766003571359e-06,
      "loss": 0.0198,
      "step": 1105480
    },
    {
      "epoch": 1.809174996563304,
      "grad_norm": 0.5688806772232056,
      "learning_rate": 6.423700111357841e-06,
      "loss": 0.0146,
      "step": 1105500
    },
    {
      "epoch": 1.8092077270019573,
      "grad_norm": 1.1221131086349487,
      "learning_rate": 6.423634219144325e-06,
      "loss": 0.0245,
      "step": 1105520
    },
    {
      "epoch": 1.8092404574406107,
      "grad_norm": 0.37764179706573486,
      "learning_rate": 6.4235683269308065e-06,
      "loss": 0.0183,
      "step": 1105540
    },
    {
      "epoch": 1.8092731878792638,
      "grad_norm": 0.6355268955230713,
      "learning_rate": 6.42350243471729e-06,
      "loss": 0.0161,
      "step": 1105560
    },
    {
      "epoch": 1.8093059183179174,
      "grad_norm": 0.6962646842002869,
      "learning_rate": 6.423436542503773e-06,
      "loss": 0.0131,
      "step": 1105580
    },
    {
      "epoch": 1.8093386487565706,
      "grad_norm": 0.5299342274665833,
      "learning_rate": 6.4233706502902556e-06,
      "loss": 0.0166,
      "step": 1105600
    },
    {
      "epoch": 1.809371379195224,
      "grad_norm": 0.5893181562423706,
      "learning_rate": 6.423304758076738e-06,
      "loss": 0.0215,
      "step": 1105620
    },
    {
      "epoch": 1.8094041096338773,
      "grad_norm": 0.24700383841991425,
      "learning_rate": 6.423238865863222e-06,
      "loss": 0.0279,
      "step": 1105640
    },
    {
      "epoch": 1.8094368400725307,
      "grad_norm": 3.871382236480713,
      "learning_rate": 6.423172973649704e-06,
      "loss": 0.0142,
      "step": 1105660
    },
    {
      "epoch": 1.809469570511184,
      "grad_norm": 0.8771786689758301,
      "learning_rate": 6.423107081436187e-06,
      "loss": 0.0214,
      "step": 1105680
    },
    {
      "epoch": 1.8095023009498372,
      "grad_norm": 0.38630011677742004,
      "learning_rate": 6.423041189222669e-06,
      "loss": 0.0151,
      "step": 1105700
    },
    {
      "epoch": 1.8095350313884908,
      "grad_norm": 0.8021940588951111,
      "learning_rate": 6.422975297009153e-06,
      "loss": 0.0244,
      "step": 1105720
    },
    {
      "epoch": 1.809567761827144,
      "grad_norm": 0.4060765504837036,
      "learning_rate": 6.422909404795636e-06,
      "loss": 0.0154,
      "step": 1105740
    },
    {
      "epoch": 1.8096004922657973,
      "grad_norm": 0.2644364833831787,
      "learning_rate": 6.422843512582118e-06,
      "loss": 0.0193,
      "step": 1105760
    },
    {
      "epoch": 1.8096332227044507,
      "grad_norm": 0.5877288579940796,
      "learning_rate": 6.422777620368602e-06,
      "loss": 0.0216,
      "step": 1105780
    },
    {
      "epoch": 1.809665953143104,
      "grad_norm": 0.2035362273454666,
      "learning_rate": 6.422711728155085e-06,
      "loss": 0.0147,
      "step": 1105800
    },
    {
      "epoch": 1.8096986835817575,
      "grad_norm": 0.36737746000289917,
      "learning_rate": 6.422645835941567e-06,
      "loss": 0.0149,
      "step": 1105820
    },
    {
      "epoch": 1.8097314140204106,
      "grad_norm": 0.16948378086090088,
      "learning_rate": 6.42257994372805e-06,
      "loss": 0.0166,
      "step": 1105840
    },
    {
      "epoch": 1.8097641444590642,
      "grad_norm": 0.8008536696434021,
      "learning_rate": 6.422514051514534e-06,
      "loss": 0.0175,
      "step": 1105860
    },
    {
      "epoch": 1.8097968748977173,
      "grad_norm": 0.318036824464798,
      "learning_rate": 6.422448159301016e-06,
      "loss": 0.0157,
      "step": 1105880
    },
    {
      "epoch": 1.8098296053363707,
      "grad_norm": 0.5477235913276672,
      "learning_rate": 6.422382267087499e-06,
      "loss": 0.021,
      "step": 1105900
    },
    {
      "epoch": 1.809862335775024,
      "grad_norm": 0.42022764682769775,
      "learning_rate": 6.422316374873981e-06,
      "loss": 0.016,
      "step": 1105920
    },
    {
      "epoch": 1.8098950662136772,
      "grad_norm": 1.054070234298706,
      "learning_rate": 6.422250482660465e-06,
      "loss": 0.0145,
      "step": 1105940
    },
    {
      "epoch": 1.8099277966523308,
      "grad_norm": 1.1393722295761108,
      "learning_rate": 6.422184590446947e-06,
      "loss": 0.0276,
      "step": 1105960
    },
    {
      "epoch": 1.809960527090984,
      "grad_norm": 0.10986977070569992,
      "learning_rate": 6.42211869823343e-06,
      "loss": 0.0126,
      "step": 1105980
    },
    {
      "epoch": 1.8099932575296376,
      "grad_norm": 0.38993892073631287,
      "learning_rate": 6.422052806019913e-06,
      "loss": 0.0215,
      "step": 1106000
    },
    {
      "epoch": 1.8100259879682907,
      "grad_norm": 0.318765789270401,
      "learning_rate": 6.4219869138063965e-06,
      "loss": 0.0213,
      "step": 1106020
    },
    {
      "epoch": 1.810058718406944,
      "grad_norm": 1.108020305633545,
      "learning_rate": 6.421921021592878e-06,
      "loss": 0.0187,
      "step": 1106040
    },
    {
      "epoch": 1.8100914488455975,
      "grad_norm": 0.69529789686203,
      "learning_rate": 6.421855129379362e-06,
      "loss": 0.0119,
      "step": 1106060
    },
    {
      "epoch": 1.8101241792842506,
      "grad_norm": 0.6715689301490784,
      "learning_rate": 6.421789237165844e-06,
      "loss": 0.0272,
      "step": 1106080
    },
    {
      "epoch": 1.8101569097229042,
      "grad_norm": 0.3017917573451996,
      "learning_rate": 6.4217233449523274e-06,
      "loss": 0.0154,
      "step": 1106100
    },
    {
      "epoch": 1.8101896401615574,
      "grad_norm": 1.660035490989685,
      "learning_rate": 6.421657452738811e-06,
      "loss": 0.0232,
      "step": 1106120
    },
    {
      "epoch": 1.8102223706002107,
      "grad_norm": 0.30788522958755493,
      "learning_rate": 6.421591560525293e-06,
      "loss": 0.023,
      "step": 1106140
    },
    {
      "epoch": 1.8102551010388641,
      "grad_norm": 0.7472070455551147,
      "learning_rate": 6.4215256683117765e-06,
      "loss": 0.0158,
      "step": 1106160
    },
    {
      "epoch": 1.8102878314775175,
      "grad_norm": 0.9554430246353149,
      "learning_rate": 6.421459776098259e-06,
      "loss": 0.0173,
      "step": 1106180
    },
    {
      "epoch": 1.8103205619161709,
      "grad_norm": 0.162626251578331,
      "learning_rate": 6.421393883884742e-06,
      "loss": 0.0195,
      "step": 1106200
    },
    {
      "epoch": 1.810353292354824,
      "grad_norm": 0.6152205467224121,
      "learning_rate": 6.421327991671225e-06,
      "loss": 0.015,
      "step": 1106220
    },
    {
      "epoch": 1.8103860227934776,
      "grad_norm": 1.12082839012146,
      "learning_rate": 6.421262099457708e-06,
      "loss": 0.0165,
      "step": 1106240
    },
    {
      "epoch": 1.8104187532321308,
      "grad_norm": 0.3990562856197357,
      "learning_rate": 6.42119620724419e-06,
      "loss": 0.0155,
      "step": 1106260
    },
    {
      "epoch": 1.8104514836707841,
      "grad_norm": 0.2611802816390991,
      "learning_rate": 6.421130315030674e-06,
      "loss": 0.0171,
      "step": 1106280
    },
    {
      "epoch": 1.8104842141094375,
      "grad_norm": 0.3097025752067566,
      "learning_rate": 6.421064422817156e-06,
      "loss": 0.0129,
      "step": 1106300
    },
    {
      "epoch": 1.8105169445480909,
      "grad_norm": 0.08122555911540985,
      "learning_rate": 6.420998530603639e-06,
      "loss": 0.0355,
      "step": 1106320
    },
    {
      "epoch": 1.8105496749867442,
      "grad_norm": 0.36310142278671265,
      "learning_rate": 6.420932638390122e-06,
      "loss": 0.0177,
      "step": 1106340
    },
    {
      "epoch": 1.8105824054253974,
      "grad_norm": 0.9209704399108887,
      "learning_rate": 6.420866746176605e-06,
      "loss": 0.0148,
      "step": 1106360
    },
    {
      "epoch": 1.810615135864051,
      "grad_norm": 0.8521245718002319,
      "learning_rate": 6.4208008539630875e-06,
      "loss": 0.0216,
      "step": 1106380
    },
    {
      "epoch": 1.8106478663027041,
      "grad_norm": 0.23339053988456726,
      "learning_rate": 6.420734961749571e-06,
      "loss": 0.0211,
      "step": 1106400
    },
    {
      "epoch": 1.8106805967413575,
      "grad_norm": 0.14338836073875427,
      "learning_rate": 6.420669069536053e-06,
      "loss": 0.0202,
      "step": 1106420
    },
    {
      "epoch": 1.8107133271800109,
      "grad_norm": 0.057837799191474915,
      "learning_rate": 6.4206031773225366e-06,
      "loss": 0.0216,
      "step": 1106440
    },
    {
      "epoch": 1.8107460576186643,
      "grad_norm": 0.2713582515716553,
      "learning_rate": 6.42053728510902e-06,
      "loss": 0.0192,
      "step": 1106460
    },
    {
      "epoch": 1.8107787880573176,
      "grad_norm": 0.7766260504722595,
      "learning_rate": 6.420471392895502e-06,
      "loss": 0.0215,
      "step": 1106480
    },
    {
      "epoch": 1.8108115184959708,
      "grad_norm": 1.1209532022476196,
      "learning_rate": 6.420405500681986e-06,
      "loss": 0.0208,
      "step": 1106500
    },
    {
      "epoch": 1.8108442489346244,
      "grad_norm": 0.24005943536758423,
      "learning_rate": 6.4203396084684675e-06,
      "loss": 0.0176,
      "step": 1106520
    },
    {
      "epoch": 1.8108769793732775,
      "grad_norm": 1.0259500741958618,
      "learning_rate": 6.420273716254951e-06,
      "loss": 0.0221,
      "step": 1106540
    },
    {
      "epoch": 1.810909709811931,
      "grad_norm": 0.5967205762863159,
      "learning_rate": 6.420207824041433e-06,
      "loss": 0.0108,
      "step": 1106560
    },
    {
      "epoch": 1.8109424402505843,
      "grad_norm": 0.4805753827095032,
      "learning_rate": 6.420141931827917e-06,
      "loss": 0.0191,
      "step": 1106580
    },
    {
      "epoch": 1.8109751706892374,
      "grad_norm": 0.2862061560153961,
      "learning_rate": 6.420076039614399e-06,
      "loss": 0.0144,
      "step": 1106600
    },
    {
      "epoch": 1.811007901127891,
      "grad_norm": 0.39701130986213684,
      "learning_rate": 6.420010147400882e-06,
      "loss": 0.0178,
      "step": 1106620
    },
    {
      "epoch": 1.8110406315665442,
      "grad_norm": 0.19604578614234924,
      "learning_rate": 6.419944255187365e-06,
      "loss": 0.0185,
      "step": 1106640
    },
    {
      "epoch": 1.8110733620051978,
      "grad_norm": 0.19000723958015442,
      "learning_rate": 6.419878362973848e-06,
      "loss": 0.0227,
      "step": 1106660
    },
    {
      "epoch": 1.811106092443851,
      "grad_norm": 0.5224339962005615,
      "learning_rate": 6.41981247076033e-06,
      "loss": 0.0216,
      "step": 1106680
    },
    {
      "epoch": 1.8111388228825043,
      "grad_norm": 0.21770906448364258,
      "learning_rate": 6.419746578546814e-06,
      "loss": 0.0199,
      "step": 1106700
    },
    {
      "epoch": 1.8111715533211576,
      "grad_norm": 0.3432028293609619,
      "learning_rate": 6.419680686333296e-06,
      "loss": 0.018,
      "step": 1106720
    },
    {
      "epoch": 1.8112042837598108,
      "grad_norm": 0.14493094384670258,
      "learning_rate": 6.419614794119779e-06,
      "loss": 0.0126,
      "step": 1106740
    },
    {
      "epoch": 1.8112370141984644,
      "grad_norm": 0.745695173740387,
      "learning_rate": 6.419548901906262e-06,
      "loss": 0.0298,
      "step": 1106760
    },
    {
      "epoch": 1.8112697446371175,
      "grad_norm": 0.22830705344676971,
      "learning_rate": 6.419483009692745e-06,
      "loss": 0.0186,
      "step": 1106780
    },
    {
      "epoch": 1.811302475075771,
      "grad_norm": 0.13993483781814575,
      "learning_rate": 6.4194171174792276e-06,
      "loss": 0.0154,
      "step": 1106800
    },
    {
      "epoch": 1.8113352055144243,
      "grad_norm": 1.013662338256836,
      "learning_rate": 6.419351225265711e-06,
      "loss": 0.0133,
      "step": 1106820
    },
    {
      "epoch": 1.8113679359530777,
      "grad_norm": 0.44307178258895874,
      "learning_rate": 6.419285333052194e-06,
      "loss": 0.0196,
      "step": 1106840
    },
    {
      "epoch": 1.811400666391731,
      "grad_norm": 0.8459484577178955,
      "learning_rate": 6.419219440838677e-06,
      "loss": 0.0175,
      "step": 1106860
    },
    {
      "epoch": 1.8114333968303842,
      "grad_norm": 0.4286074936389923,
      "learning_rate": 6.41915354862516e-06,
      "loss": 0.0185,
      "step": 1106880
    },
    {
      "epoch": 1.8114661272690378,
      "grad_norm": 0.7887505292892456,
      "learning_rate": 6.419087656411642e-06,
      "loss": 0.0193,
      "step": 1106900
    },
    {
      "epoch": 1.811498857707691,
      "grad_norm": 0.39917150139808655,
      "learning_rate": 6.419021764198126e-06,
      "loss": 0.0126,
      "step": 1106920
    },
    {
      "epoch": 1.8115315881463443,
      "grad_norm": 0.3919423520565033,
      "learning_rate": 6.418955871984608e-06,
      "loss": 0.0259,
      "step": 1106940
    },
    {
      "epoch": 1.8115643185849977,
      "grad_norm": 0.38451695442199707,
      "learning_rate": 6.418889979771091e-06,
      "loss": 0.0235,
      "step": 1106960
    },
    {
      "epoch": 1.811597049023651,
      "grad_norm": 0.06781672686338425,
      "learning_rate": 6.418824087557574e-06,
      "loss": 0.0193,
      "step": 1106980
    },
    {
      "epoch": 1.8116297794623044,
      "grad_norm": 0.14914032816886902,
      "learning_rate": 6.418758195344057e-06,
      "loss": 0.0113,
      "step": 1107000
    },
    {
      "epoch": 1.8116625099009576,
      "grad_norm": 0.7804015874862671,
      "learning_rate": 6.418692303130539e-06,
      "loss": 0.0221,
      "step": 1107020
    },
    {
      "epoch": 1.8116952403396112,
      "grad_norm": 0.12659326195716858,
      "learning_rate": 6.418626410917023e-06,
      "loss": 0.0175,
      "step": 1107040
    },
    {
      "epoch": 1.8117279707782643,
      "grad_norm": 1.12420654296875,
      "learning_rate": 6.418560518703505e-06,
      "loss": 0.018,
      "step": 1107060
    },
    {
      "epoch": 1.8117607012169177,
      "grad_norm": 0.357246994972229,
      "learning_rate": 6.4184946264899885e-06,
      "loss": 0.026,
      "step": 1107080
    },
    {
      "epoch": 1.811793431655571,
      "grad_norm": 0.2282530814409256,
      "learning_rate": 6.41842873427647e-06,
      "loss": 0.0263,
      "step": 1107100
    },
    {
      "epoch": 1.8118261620942244,
      "grad_norm": 0.4826677441596985,
      "learning_rate": 6.418362842062954e-06,
      "loss": 0.0218,
      "step": 1107120
    },
    {
      "epoch": 1.8118588925328778,
      "grad_norm": 0.7083385586738586,
      "learning_rate": 6.418296949849437e-06,
      "loss": 0.0184,
      "step": 1107140
    },
    {
      "epoch": 1.811891622971531,
      "grad_norm": 0.46487462520599365,
      "learning_rate": 6.4182310576359194e-06,
      "loss": 0.0168,
      "step": 1107160
    },
    {
      "epoch": 1.8119243534101845,
      "grad_norm": 0.9414156675338745,
      "learning_rate": 6.418165165422403e-06,
      "loss": 0.0149,
      "step": 1107180
    },
    {
      "epoch": 1.8119570838488377,
      "grad_norm": 0.04490232095122337,
      "learning_rate": 6.418099273208886e-06,
      "loss": 0.0226,
      "step": 1107200
    },
    {
      "epoch": 1.811989814287491,
      "grad_norm": 1.1716502904891968,
      "learning_rate": 6.4180333809953685e-06,
      "loss": 0.0143,
      "step": 1107220
    },
    {
      "epoch": 1.8120225447261444,
      "grad_norm": 0.8476881384849548,
      "learning_rate": 6.417967488781851e-06,
      "loss": 0.0187,
      "step": 1107240
    },
    {
      "epoch": 1.8120552751647978,
      "grad_norm": 0.6253207921981812,
      "learning_rate": 6.417901596568335e-06,
      "loss": 0.0165,
      "step": 1107260
    },
    {
      "epoch": 1.8120880056034512,
      "grad_norm": 0.2203662395477295,
      "learning_rate": 6.417835704354817e-06,
      "loss": 0.0192,
      "step": 1107280
    },
    {
      "epoch": 1.8121207360421043,
      "grad_norm": 5.582848072052002,
      "learning_rate": 6.4177698121413e-06,
      "loss": 0.0258,
      "step": 1107300
    },
    {
      "epoch": 1.812153466480758,
      "grad_norm": 0.7278090715408325,
      "learning_rate": 6.417703919927782e-06,
      "loss": 0.0185,
      "step": 1107320
    },
    {
      "epoch": 1.812186196919411,
      "grad_norm": 1.2100934982299805,
      "learning_rate": 6.417638027714266e-06,
      "loss": 0.0186,
      "step": 1107340
    },
    {
      "epoch": 1.8122189273580644,
      "grad_norm": 0.5720466375350952,
      "learning_rate": 6.417572135500748e-06,
      "loss": 0.02,
      "step": 1107360
    },
    {
      "epoch": 1.8122516577967178,
      "grad_norm": 0.17172881960868835,
      "learning_rate": 6.417506243287231e-06,
      "loss": 0.0162,
      "step": 1107380
    },
    {
      "epoch": 1.812284388235371,
      "grad_norm": 0.2444954365491867,
      "learning_rate": 6.417440351073714e-06,
      "loss": 0.0217,
      "step": 1107400
    },
    {
      "epoch": 1.8123171186740246,
      "grad_norm": 0.5891453623771667,
      "learning_rate": 6.417374458860197e-06,
      "loss": 0.0145,
      "step": 1107420
    },
    {
      "epoch": 1.8123498491126777,
      "grad_norm": 0.7350122928619385,
      "learning_rate": 6.4173085666466795e-06,
      "loss": 0.0202,
      "step": 1107440
    },
    {
      "epoch": 1.8123825795513313,
      "grad_norm": 1.02987539768219,
      "learning_rate": 6.417242674433163e-06,
      "loss": 0.0221,
      "step": 1107460
    },
    {
      "epoch": 1.8124153099899845,
      "grad_norm": 0.738344669342041,
      "learning_rate": 6.417176782219645e-06,
      "loss": 0.0266,
      "step": 1107480
    },
    {
      "epoch": 1.8124480404286378,
      "grad_norm": 0.20345762372016907,
      "learning_rate": 6.4171108900061285e-06,
      "loss": 0.02,
      "step": 1107500
    },
    {
      "epoch": 1.8124807708672912,
      "grad_norm": 0.44719842076301575,
      "learning_rate": 6.417044997792612e-06,
      "loss": 0.0162,
      "step": 1107520
    },
    {
      "epoch": 1.8125135013059444,
      "grad_norm": 0.3631075918674469,
      "learning_rate": 6.416979105579094e-06,
      "loss": 0.0193,
      "step": 1107540
    },
    {
      "epoch": 1.812546231744598,
      "grad_norm": 0.29900336265563965,
      "learning_rate": 6.416913213365578e-06,
      "loss": 0.0206,
      "step": 1107560
    },
    {
      "epoch": 1.812578962183251,
      "grad_norm": 0.32775938510894775,
      "learning_rate": 6.4168473211520595e-06,
      "loss": 0.0159,
      "step": 1107580
    },
    {
      "epoch": 1.8126116926219045,
      "grad_norm": 1.3039311170578003,
      "learning_rate": 6.416781428938543e-06,
      "loss": 0.0172,
      "step": 1107600
    },
    {
      "epoch": 1.8126444230605578,
      "grad_norm": 0.11672397702932358,
      "learning_rate": 6.416715536725026e-06,
      "loss": 0.0102,
      "step": 1107620
    },
    {
      "epoch": 1.8126771534992112,
      "grad_norm": 0.39076051115989685,
      "learning_rate": 6.4166496445115086e-06,
      "loss": 0.0217,
      "step": 1107640
    },
    {
      "epoch": 1.8127098839378646,
      "grad_norm": 0.04796583205461502,
      "learning_rate": 6.416583752297991e-06,
      "loss": 0.0209,
      "step": 1107660
    },
    {
      "epoch": 1.8127426143765177,
      "grad_norm": 1.0994608402252197,
      "learning_rate": 6.416517860084475e-06,
      "loss": 0.0176,
      "step": 1107680
    },
    {
      "epoch": 1.8127753448151713,
      "grad_norm": 1.484925389289856,
      "learning_rate": 6.416451967870957e-06,
      "loss": 0.0224,
      "step": 1107700
    },
    {
      "epoch": 1.8128080752538245,
      "grad_norm": 0.4273141026496887,
      "learning_rate": 6.41638607565744e-06,
      "loss": 0.0205,
      "step": 1107720
    },
    {
      "epoch": 1.8128408056924779,
      "grad_norm": 0.7593836784362793,
      "learning_rate": 6.416320183443922e-06,
      "loss": 0.0202,
      "step": 1107740
    },
    {
      "epoch": 1.8128735361311312,
      "grad_norm": 0.38890770077705383,
      "learning_rate": 6.416254291230406e-06,
      "loss": 0.0223,
      "step": 1107760
    },
    {
      "epoch": 1.8129062665697846,
      "grad_norm": 0.5556579232215881,
      "learning_rate": 6.416188399016889e-06,
      "loss": 0.0239,
      "step": 1107780
    },
    {
      "epoch": 1.812938997008438,
      "grad_norm": 0.10015841573476791,
      "learning_rate": 6.416122506803371e-06,
      "loss": 0.0215,
      "step": 1107800
    },
    {
      "epoch": 1.8129717274470911,
      "grad_norm": 0.20214346051216125,
      "learning_rate": 6.416056614589854e-06,
      "loss": 0.0127,
      "step": 1107820
    },
    {
      "epoch": 1.8130044578857447,
      "grad_norm": 0.560724675655365,
      "learning_rate": 6.415990722376338e-06,
      "loss": 0.0193,
      "step": 1107840
    },
    {
      "epoch": 1.8130371883243979,
      "grad_norm": 0.16768646240234375,
      "learning_rate": 6.4159248301628196e-06,
      "loss": 0.0122,
      "step": 1107860
    },
    {
      "epoch": 1.8130699187630512,
      "grad_norm": 0.1561211794614792,
      "learning_rate": 6.415858937949303e-06,
      "loss": 0.0156,
      "step": 1107880
    },
    {
      "epoch": 1.8131026492017046,
      "grad_norm": 0.8336860537528992,
      "learning_rate": 6.415793045735787e-06,
      "loss": 0.019,
      "step": 1107900
    },
    {
      "epoch": 1.813135379640358,
      "grad_norm": 0.18483541905879974,
      "learning_rate": 6.415727153522269e-06,
      "loss": 0.0159,
      "step": 1107920
    },
    {
      "epoch": 1.8131681100790114,
      "grad_norm": 0.6236808896064758,
      "learning_rate": 6.415661261308752e-06,
      "loss": 0.0207,
      "step": 1107940
    },
    {
      "epoch": 1.8132008405176645,
      "grad_norm": 0.12104719877243042,
      "learning_rate": 6.415595369095234e-06,
      "loss": 0.014,
      "step": 1107960
    },
    {
      "epoch": 1.813233570956318,
      "grad_norm": 0.2806999385356903,
      "learning_rate": 6.415529476881718e-06,
      "loss": 0.0181,
      "step": 1107980
    },
    {
      "epoch": 1.8132663013949712,
      "grad_norm": 1.0733907222747803,
      "learning_rate": 6.4154635846682004e-06,
      "loss": 0.0174,
      "step": 1108000
    },
    {
      "epoch": 1.8132990318336246,
      "grad_norm": 1.042683720588684,
      "learning_rate": 6.415397692454683e-06,
      "loss": 0.0183,
      "step": 1108020
    },
    {
      "epoch": 1.813331762272278,
      "grad_norm": 0.2130802571773529,
      "learning_rate": 6.415331800241166e-06,
      "loss": 0.0143,
      "step": 1108040
    },
    {
      "epoch": 1.8133644927109314,
      "grad_norm": 0.2782067358493805,
      "learning_rate": 6.4152659080276495e-06,
      "loss": 0.0231,
      "step": 1108060
    },
    {
      "epoch": 1.8133972231495847,
      "grad_norm": 0.3471570909023285,
      "learning_rate": 6.415200015814131e-06,
      "loss": 0.0129,
      "step": 1108080
    },
    {
      "epoch": 1.8134299535882379,
      "grad_norm": 0.6241176724433899,
      "learning_rate": 6.415134123600615e-06,
      "loss": 0.0178,
      "step": 1108100
    },
    {
      "epoch": 1.8134626840268915,
      "grad_norm": 1.115480661392212,
      "learning_rate": 6.415068231387097e-06,
      "loss": 0.0286,
      "step": 1108120
    },
    {
      "epoch": 1.8134954144655446,
      "grad_norm": 0.4039360582828522,
      "learning_rate": 6.4150023391735805e-06,
      "loss": 0.0199,
      "step": 1108140
    },
    {
      "epoch": 1.813528144904198,
      "grad_norm": 0.44370463490486145,
      "learning_rate": 6.414936446960063e-06,
      "loss": 0.0228,
      "step": 1108160
    },
    {
      "epoch": 1.8135608753428514,
      "grad_norm": 0.2686612010002136,
      "learning_rate": 6.414870554746546e-06,
      "loss": 0.021,
      "step": 1108180
    },
    {
      "epoch": 1.8135936057815045,
      "grad_norm": 0.8681975603103638,
      "learning_rate": 6.414804662533029e-06,
      "loss": 0.0205,
      "step": 1108200
    },
    {
      "epoch": 1.8136263362201581,
      "grad_norm": 0.33850976824760437,
      "learning_rate": 6.414738770319512e-06,
      "loss": 0.0149,
      "step": 1108220
    },
    {
      "epoch": 1.8136590666588113,
      "grad_norm": 0.23823314905166626,
      "learning_rate": 6.414672878105995e-06,
      "loss": 0.0149,
      "step": 1108240
    },
    {
      "epoch": 1.8136917970974649,
      "grad_norm": 0.3231554329395294,
      "learning_rate": 6.414606985892478e-06,
      "loss": 0.0129,
      "step": 1108260
    },
    {
      "epoch": 1.813724527536118,
      "grad_norm": 1.01300048828125,
      "learning_rate": 6.414541093678961e-06,
      "loss": 0.0157,
      "step": 1108280
    },
    {
      "epoch": 1.8137572579747714,
      "grad_norm": 0.10404536873102188,
      "learning_rate": 6.414475201465443e-06,
      "loss": 0.0167,
      "step": 1108300
    },
    {
      "epoch": 1.8137899884134248,
      "grad_norm": 0.3106445074081421,
      "learning_rate": 6.414409309251927e-06,
      "loss": 0.0126,
      "step": 1108320
    },
    {
      "epoch": 1.813822718852078,
      "grad_norm": 0.2020253688097,
      "learning_rate": 6.414343417038409e-06,
      "loss": 0.017,
      "step": 1108340
    },
    {
      "epoch": 1.8138554492907315,
      "grad_norm": 0.2147563248872757,
      "learning_rate": 6.414277524824892e-06,
      "loss": 0.0173,
      "step": 1108360
    },
    {
      "epoch": 1.8138881797293847,
      "grad_norm": 0.26643577218055725,
      "learning_rate": 6.414211632611374e-06,
      "loss": 0.019,
      "step": 1108380
    },
    {
      "epoch": 1.813920910168038,
      "grad_norm": 0.47262224555015564,
      "learning_rate": 6.414145740397858e-06,
      "loss": 0.0226,
      "step": 1108400
    },
    {
      "epoch": 1.8139536406066914,
      "grad_norm": 0.48773419857025146,
      "learning_rate": 6.4140798481843405e-06,
      "loss": 0.0197,
      "step": 1108420
    },
    {
      "epoch": 1.8139863710453448,
      "grad_norm": 0.27892905473709106,
      "learning_rate": 6.414013955970823e-06,
      "loss": 0.0326,
      "step": 1108440
    },
    {
      "epoch": 1.8140191014839981,
      "grad_norm": 0.5865004062652588,
      "learning_rate": 6.413948063757306e-06,
      "loss": 0.0178,
      "step": 1108460
    },
    {
      "epoch": 1.8140518319226513,
      "grad_norm": 0.3731931746006012,
      "learning_rate": 6.4138821715437896e-06,
      "loss": 0.015,
      "step": 1108480
    },
    {
      "epoch": 1.8140845623613049,
      "grad_norm": 0.257443904876709,
      "learning_rate": 6.4138162793302715e-06,
      "loss": 0.0166,
      "step": 1108500
    },
    {
      "epoch": 1.814117292799958,
      "grad_norm": 1.1746494770050049,
      "learning_rate": 6.413750387116755e-06,
      "loss": 0.0207,
      "step": 1108520
    },
    {
      "epoch": 1.8141500232386114,
      "grad_norm": 0.9419543147087097,
      "learning_rate": 6.413684494903237e-06,
      "loss": 0.018,
      "step": 1108540
    },
    {
      "epoch": 1.8141827536772648,
      "grad_norm": 2.6857075691223145,
      "learning_rate": 6.4136186026897205e-06,
      "loss": 0.0125,
      "step": 1108560
    },
    {
      "epoch": 1.8142154841159182,
      "grad_norm": 0.6501283049583435,
      "learning_rate": 6.413552710476204e-06,
      "loss": 0.0207,
      "step": 1108580
    },
    {
      "epoch": 1.8142482145545715,
      "grad_norm": 0.8341816663742065,
      "learning_rate": 6.413486818262686e-06,
      "loss": 0.0145,
      "step": 1108600
    },
    {
      "epoch": 1.8142809449932247,
      "grad_norm": 1.5126196146011353,
      "learning_rate": 6.41342092604917e-06,
      "loss": 0.0219,
      "step": 1108620
    },
    {
      "epoch": 1.8143136754318783,
      "grad_norm": 0.17068788409233093,
      "learning_rate": 6.413355033835652e-06,
      "loss": 0.0158,
      "step": 1108640
    },
    {
      "epoch": 1.8143464058705314,
      "grad_norm": 0.27242153882980347,
      "learning_rate": 6.413289141622135e-06,
      "loss": 0.0273,
      "step": 1108660
    },
    {
      "epoch": 1.8143791363091848,
      "grad_norm": 0.6977998614311218,
      "learning_rate": 6.413223249408618e-06,
      "loss": 0.0164,
      "step": 1108680
    },
    {
      "epoch": 1.8144118667478382,
      "grad_norm": 0.2872506380081177,
      "learning_rate": 6.413157357195101e-06,
      "loss": 0.0214,
      "step": 1108700
    },
    {
      "epoch": 1.8144445971864915,
      "grad_norm": 0.43520408868789673,
      "learning_rate": 6.413091464981583e-06,
      "loss": 0.0149,
      "step": 1108720
    },
    {
      "epoch": 1.814477327625145,
      "grad_norm": 0.9996972680091858,
      "learning_rate": 6.413025572768067e-06,
      "loss": 0.0179,
      "step": 1108740
    },
    {
      "epoch": 1.814510058063798,
      "grad_norm": 0.2614133358001709,
      "learning_rate": 6.412959680554549e-06,
      "loss": 0.0151,
      "step": 1108760
    },
    {
      "epoch": 1.8145427885024517,
      "grad_norm": 0.6083435416221619,
      "learning_rate": 6.412893788341032e-06,
      "loss": 0.0257,
      "step": 1108780
    },
    {
      "epoch": 1.8145755189411048,
      "grad_norm": 0.3660764694213867,
      "learning_rate": 6.412827896127515e-06,
      "loss": 0.0122,
      "step": 1108800
    },
    {
      "epoch": 1.8146082493797582,
      "grad_norm": 0.4851313531398773,
      "learning_rate": 6.412762003913998e-06,
      "loss": 0.0177,
      "step": 1108820
    },
    {
      "epoch": 1.8146409798184115,
      "grad_norm": 0.5609328150749207,
      "learning_rate": 6.412696111700481e-06,
      "loss": 0.0226,
      "step": 1108840
    },
    {
      "epoch": 1.814673710257065,
      "grad_norm": 0.24469926953315735,
      "learning_rate": 6.412630219486964e-06,
      "loss": 0.0232,
      "step": 1108860
    },
    {
      "epoch": 1.8147064406957183,
      "grad_norm": 0.23624491691589355,
      "learning_rate": 6.412564327273446e-06,
      "loss": 0.0177,
      "step": 1108880
    },
    {
      "epoch": 1.8147391711343714,
      "grad_norm": 0.6038669943809509,
      "learning_rate": 6.41249843505993e-06,
      "loss": 0.0165,
      "step": 1108900
    },
    {
      "epoch": 1.814771901573025,
      "grad_norm": 0.1997743397951126,
      "learning_rate": 6.4124325428464115e-06,
      "loss": 0.0183,
      "step": 1108920
    },
    {
      "epoch": 1.8148046320116782,
      "grad_norm": 0.31676799058914185,
      "learning_rate": 6.412366650632895e-06,
      "loss": 0.0174,
      "step": 1108940
    },
    {
      "epoch": 1.8148373624503316,
      "grad_norm": 0.42343515157699585,
      "learning_rate": 6.412300758419379e-06,
      "loss": 0.011,
      "step": 1108960
    },
    {
      "epoch": 1.814870092888985,
      "grad_norm": 0.47724223136901855,
      "learning_rate": 6.412234866205861e-06,
      "loss": 0.0148,
      "step": 1108980
    },
    {
      "epoch": 1.814902823327638,
      "grad_norm": 0.35968175530433655,
      "learning_rate": 6.412168973992344e-06,
      "loss": 0.018,
      "step": 1109000
    },
    {
      "epoch": 1.8149355537662917,
      "grad_norm": 0.9560927152633667,
      "learning_rate": 6.412103081778827e-06,
      "loss": 0.0207,
      "step": 1109020
    },
    {
      "epoch": 1.8149682842049448,
      "grad_norm": Infinity,
      "learning_rate": 6.41203718956531e-06,
      "loss": 0.0184,
      "step": 1109040
    },
    {
      "epoch": 1.8150010146435982,
      "grad_norm": 0.3329688608646393,
      "learning_rate": 6.411971297351792e-06,
      "loss": 0.0142,
      "step": 1109060
    },
    {
      "epoch": 1.8150337450822516,
      "grad_norm": 1.0862711668014526,
      "learning_rate": 6.411905405138276e-06,
      "loss": 0.0281,
      "step": 1109080
    },
    {
      "epoch": 1.815066475520905,
      "grad_norm": 0.9681622982025146,
      "learning_rate": 6.411839512924758e-06,
      "loss": 0.0192,
      "step": 1109100
    },
    {
      "epoch": 1.8150992059595583,
      "grad_norm": 0.3041098713874817,
      "learning_rate": 6.4117736207112415e-06,
      "loss": 0.0263,
      "step": 1109120
    },
    {
      "epoch": 1.8151319363982115,
      "grad_norm": 1.8922516107559204,
      "learning_rate": 6.411707728497723e-06,
      "loss": 0.0234,
      "step": 1109140
    },
    {
      "epoch": 1.815164666836865,
      "grad_norm": 0.4818447232246399,
      "learning_rate": 6.411641836284207e-06,
      "loss": 0.0139,
      "step": 1109160
    },
    {
      "epoch": 1.8151973972755182,
      "grad_norm": 0.841713011264801,
      "learning_rate": 6.41157594407069e-06,
      "loss": 0.015,
      "step": 1109180
    },
    {
      "epoch": 1.8152301277141716,
      "grad_norm": 0.7003256678581238,
      "learning_rate": 6.4115100518571724e-06,
      "loss": 0.0183,
      "step": 1109200
    },
    {
      "epoch": 1.815262858152825,
      "grad_norm": 0.7663708925247192,
      "learning_rate": 6.411444159643655e-06,
      "loss": 0.0183,
      "step": 1109220
    },
    {
      "epoch": 1.8152955885914783,
      "grad_norm": 0.4793311357498169,
      "learning_rate": 6.411378267430139e-06,
      "loss": 0.0211,
      "step": 1109240
    },
    {
      "epoch": 1.8153283190301317,
      "grad_norm": 3.0884833335876465,
      "learning_rate": 6.411312375216621e-06,
      "loss": 0.0317,
      "step": 1109260
    },
    {
      "epoch": 1.8153610494687848,
      "grad_norm": 0.7231703996658325,
      "learning_rate": 6.411246483003104e-06,
      "loss": 0.0205,
      "step": 1109280
    },
    {
      "epoch": 1.8153937799074384,
      "grad_norm": 0.2273951917886734,
      "learning_rate": 6.411180590789588e-06,
      "loss": 0.0198,
      "step": 1109300
    },
    {
      "epoch": 1.8154265103460916,
      "grad_norm": 1.1374269723892212,
      "learning_rate": 6.41111469857607e-06,
      "loss": 0.0208,
      "step": 1109320
    },
    {
      "epoch": 1.815459240784745,
      "grad_norm": 1.1787251234054565,
      "learning_rate": 6.411048806362553e-06,
      "loss": 0.0164,
      "step": 1109340
    },
    {
      "epoch": 1.8154919712233983,
      "grad_norm": 0.722871720790863,
      "learning_rate": 6.410982914149035e-06,
      "loss": 0.014,
      "step": 1109360
    },
    {
      "epoch": 1.8155247016620517,
      "grad_norm": 0.38707786798477173,
      "learning_rate": 6.410917021935519e-06,
      "loss": 0.019,
      "step": 1109380
    },
    {
      "epoch": 1.815557432100705,
      "grad_norm": 0.616481602191925,
      "learning_rate": 6.410851129722001e-06,
      "loss": 0.0218,
      "step": 1109400
    },
    {
      "epoch": 1.8155901625393582,
      "grad_norm": 0.40543490648269653,
      "learning_rate": 6.410785237508484e-06,
      "loss": 0.0209,
      "step": 1109420
    },
    {
      "epoch": 1.8156228929780118,
      "grad_norm": 0.6322044730186462,
      "learning_rate": 6.410719345294967e-06,
      "loss": 0.0147,
      "step": 1109440
    },
    {
      "epoch": 1.815655623416665,
      "grad_norm": 0.7919152975082397,
      "learning_rate": 6.41065345308145e-06,
      "loss": 0.0195,
      "step": 1109460
    },
    {
      "epoch": 1.8156883538553183,
      "grad_norm": 0.6849753856658936,
      "learning_rate": 6.4105875608679325e-06,
      "loss": 0.0225,
      "step": 1109480
    },
    {
      "epoch": 1.8157210842939717,
      "grad_norm": 1.1914585828781128,
      "learning_rate": 6.410521668654416e-06,
      "loss": 0.0234,
      "step": 1109500
    },
    {
      "epoch": 1.815753814732625,
      "grad_norm": 0.4862535893917084,
      "learning_rate": 6.410455776440898e-06,
      "loss": 0.0231,
      "step": 1109520
    },
    {
      "epoch": 1.8157865451712785,
      "grad_norm": 0.2244807481765747,
      "learning_rate": 6.4103898842273816e-06,
      "loss": 0.0116,
      "step": 1109540
    },
    {
      "epoch": 1.8158192756099316,
      "grad_norm": 0.38160502910614014,
      "learning_rate": 6.4103239920138634e-06,
      "loss": 0.0218,
      "step": 1109560
    },
    {
      "epoch": 1.8158520060485852,
      "grad_norm": 0.49311375617980957,
      "learning_rate": 6.410258099800347e-06,
      "loss": 0.0103,
      "step": 1109580
    },
    {
      "epoch": 1.8158847364872384,
      "grad_norm": 0.24805279076099396,
      "learning_rate": 6.41019220758683e-06,
      "loss": 0.0175,
      "step": 1109600
    },
    {
      "epoch": 1.8159174669258917,
      "grad_norm": 0.14775300025939941,
      "learning_rate": 6.4101263153733125e-06,
      "loss": 0.0185,
      "step": 1109620
    },
    {
      "epoch": 1.815950197364545,
      "grad_norm": 0.6707296967506409,
      "learning_rate": 6.410060423159796e-06,
      "loss": 0.0186,
      "step": 1109640
    },
    {
      "epoch": 1.8159829278031983,
      "grad_norm": 1.0209033489227295,
      "learning_rate": 6.409994530946279e-06,
      "loss": 0.0181,
      "step": 1109660
    },
    {
      "epoch": 1.8160156582418518,
      "grad_norm": 0.12393902242183685,
      "learning_rate": 6.409928638732762e-06,
      "loss": 0.0135,
      "step": 1109680
    },
    {
      "epoch": 1.816048388680505,
      "grad_norm": 0.6830553412437439,
      "learning_rate": 6.409862746519244e-06,
      "loss": 0.0224,
      "step": 1109700
    },
    {
      "epoch": 1.8160811191191586,
      "grad_norm": 0.0626068040728569,
      "learning_rate": 6.409796854305728e-06,
      "loss": 0.0194,
      "step": 1109720
    },
    {
      "epoch": 1.8161138495578117,
      "grad_norm": 1.0500215291976929,
      "learning_rate": 6.40973096209221e-06,
      "loss": 0.0152,
      "step": 1109740
    },
    {
      "epoch": 1.8161465799964651,
      "grad_norm": 0.21542218327522278,
      "learning_rate": 6.409665069878693e-06,
      "loss": 0.0188,
      "step": 1109760
    },
    {
      "epoch": 1.8161793104351185,
      "grad_norm": 0.18006682395935059,
      "learning_rate": 6.409599177665175e-06,
      "loss": 0.0208,
      "step": 1109780
    },
    {
      "epoch": 1.8162120408737716,
      "grad_norm": 1.6947740316390991,
      "learning_rate": 6.409533285451659e-06,
      "loss": 0.0161,
      "step": 1109800
    },
    {
      "epoch": 1.8162447713124252,
      "grad_norm": 0.5487866997718811,
      "learning_rate": 6.409467393238142e-06,
      "loss": 0.0111,
      "step": 1109820
    },
    {
      "epoch": 1.8162775017510784,
      "grad_norm": 0.37542781233787537,
      "learning_rate": 6.409401501024624e-06,
      "loss": 0.0187,
      "step": 1109840
    },
    {
      "epoch": 1.8163102321897318,
      "grad_norm": 0.26996663212776184,
      "learning_rate": 6.409335608811107e-06,
      "loss": 0.0148,
      "step": 1109860
    },
    {
      "epoch": 1.8163429626283851,
      "grad_norm": 0.11731505393981934,
      "learning_rate": 6.409269716597591e-06,
      "loss": 0.0135,
      "step": 1109880
    },
    {
      "epoch": 1.8163756930670385,
      "grad_norm": 0.4241524636745453,
      "learning_rate": 6.4092038243840726e-06,
      "loss": 0.0149,
      "step": 1109900
    },
    {
      "epoch": 1.8164084235056919,
      "grad_norm": 0.31582626700401306,
      "learning_rate": 6.409137932170556e-06,
      "loss": 0.0229,
      "step": 1109920
    },
    {
      "epoch": 1.816441153944345,
      "grad_norm": 0.27954381704330444,
      "learning_rate": 6.409072039957038e-06,
      "loss": 0.02,
      "step": 1109940
    },
    {
      "epoch": 1.8164738843829986,
      "grad_norm": 1.117514729499817,
      "learning_rate": 6.409006147743522e-06,
      "loss": 0.021,
      "step": 1109960
    },
    {
      "epoch": 1.8165066148216518,
      "grad_norm": 0.5044496655464172,
      "learning_rate": 6.408940255530005e-06,
      "loss": 0.0181,
      "step": 1109980
    },
    {
      "epoch": 1.8165393452603051,
      "grad_norm": 0.3117266297340393,
      "learning_rate": 6.408874363316487e-06,
      "loss": 0.0153,
      "step": 1110000
    },
    {
      "epoch": 1.8165720756989585,
      "grad_norm": 0.8378980159759521,
      "learning_rate": 6.408808471102971e-06,
      "loss": 0.0198,
      "step": 1110020
    },
    {
      "epoch": 1.8166048061376119,
      "grad_norm": 0.710619330406189,
      "learning_rate": 6.4087425788894534e-06,
      "loss": 0.0169,
      "step": 1110040
    },
    {
      "epoch": 1.8166375365762653,
      "grad_norm": 0.3145360052585602,
      "learning_rate": 6.408676686675936e-06,
      "loss": 0.0166,
      "step": 1110060
    },
    {
      "epoch": 1.8166702670149184,
      "grad_norm": 0.7190663814544678,
      "learning_rate": 6.408610794462419e-06,
      "loss": 0.0166,
      "step": 1110080
    },
    {
      "epoch": 1.816702997453572,
      "grad_norm": 0.16126325726509094,
      "learning_rate": 6.4085449022489025e-06,
      "loss": 0.0225,
      "step": 1110100
    },
    {
      "epoch": 1.8167357278922251,
      "grad_norm": 0.2982275187969208,
      "learning_rate": 6.408479010035384e-06,
      "loss": 0.0201,
      "step": 1110120
    },
    {
      "epoch": 1.8167684583308785,
      "grad_norm": 0.40228497982025146,
      "learning_rate": 6.408413117821868e-06,
      "loss": 0.015,
      "step": 1110140
    },
    {
      "epoch": 1.816801188769532,
      "grad_norm": 0.509757936000824,
      "learning_rate": 6.40834722560835e-06,
      "loss": 0.0119,
      "step": 1110160
    },
    {
      "epoch": 1.8168339192081853,
      "grad_norm": 0.5522820949554443,
      "learning_rate": 6.4082813333948335e-06,
      "loss": 0.0146,
      "step": 1110180
    },
    {
      "epoch": 1.8168666496468386,
      "grad_norm": 0.6031805872917175,
      "learning_rate": 6.408215441181315e-06,
      "loss": 0.0216,
      "step": 1110200
    },
    {
      "epoch": 1.8168993800854918,
      "grad_norm": 0.34326091408729553,
      "learning_rate": 6.408149548967799e-06,
      "loss": 0.0165,
      "step": 1110220
    },
    {
      "epoch": 1.8169321105241454,
      "grad_norm": 0.24360060691833496,
      "learning_rate": 6.408083656754282e-06,
      "loss": 0.0136,
      "step": 1110240
    },
    {
      "epoch": 1.8169648409627985,
      "grad_norm": 0.7844210267066956,
      "learning_rate": 6.408017764540765e-06,
      "loss": 0.0205,
      "step": 1110260
    },
    {
      "epoch": 1.816997571401452,
      "grad_norm": 0.8280216455459595,
      "learning_rate": 6.407951872327247e-06,
      "loss": 0.0272,
      "step": 1110280
    },
    {
      "epoch": 1.8170303018401053,
      "grad_norm": 0.3684927821159363,
      "learning_rate": 6.407885980113731e-06,
      "loss": 0.0196,
      "step": 1110300
    },
    {
      "epoch": 1.8170630322787586,
      "grad_norm": 1.0795049667358398,
      "learning_rate": 6.407820087900213e-06,
      "loss": 0.0231,
      "step": 1110320
    },
    {
      "epoch": 1.817095762717412,
      "grad_norm": 0.3526551127433777,
      "learning_rate": 6.407754195686696e-06,
      "loss": 0.0106,
      "step": 1110340
    },
    {
      "epoch": 1.8171284931560652,
      "grad_norm": 0.2878381013870239,
      "learning_rate": 6.40768830347318e-06,
      "loss": 0.0188,
      "step": 1110360
    },
    {
      "epoch": 1.8171612235947188,
      "grad_norm": 0.740767240524292,
      "learning_rate": 6.407622411259662e-06,
      "loss": 0.0239,
      "step": 1110380
    },
    {
      "epoch": 1.817193954033372,
      "grad_norm": 0.2972395718097687,
      "learning_rate": 6.407556519046145e-06,
      "loss": 0.024,
      "step": 1110400
    },
    {
      "epoch": 1.8172266844720253,
      "grad_norm": 0.31401580572128296,
      "learning_rate": 6.407490626832627e-06,
      "loss": 0.014,
      "step": 1110420
    },
    {
      "epoch": 1.8172594149106787,
      "grad_norm": 0.8236762285232544,
      "learning_rate": 6.407424734619111e-06,
      "loss": 0.0205,
      "step": 1110440
    },
    {
      "epoch": 1.8172921453493318,
      "grad_norm": 2.062370777130127,
      "learning_rate": 6.4073588424055935e-06,
      "loss": 0.0211,
      "step": 1110460
    },
    {
      "epoch": 1.8173248757879854,
      "grad_norm": 0.35032930970191956,
      "learning_rate": 6.407292950192076e-06,
      "loss": 0.0119,
      "step": 1110480
    },
    {
      "epoch": 1.8173576062266386,
      "grad_norm": 0.2236122041940689,
      "learning_rate": 6.407227057978559e-06,
      "loss": 0.015,
      "step": 1110500
    },
    {
      "epoch": 1.8173903366652921,
      "grad_norm": 0.7490411996841431,
      "learning_rate": 6.407161165765043e-06,
      "loss": 0.0266,
      "step": 1110520
    },
    {
      "epoch": 1.8174230671039453,
      "grad_norm": 0.11613873392343521,
      "learning_rate": 6.4070952735515245e-06,
      "loss": 0.0122,
      "step": 1110540
    },
    {
      "epoch": 1.8174557975425987,
      "grad_norm": 0.2924216389656067,
      "learning_rate": 6.407029381338008e-06,
      "loss": 0.0213,
      "step": 1110560
    },
    {
      "epoch": 1.817488527981252,
      "grad_norm": 0.423782616853714,
      "learning_rate": 6.40696348912449e-06,
      "loss": 0.0282,
      "step": 1110580
    },
    {
      "epoch": 1.8175212584199052,
      "grad_norm": 0.509289562702179,
      "learning_rate": 6.4068975969109735e-06,
      "loss": 0.0242,
      "step": 1110600
    },
    {
      "epoch": 1.8175539888585588,
      "grad_norm": 0.9568563103675842,
      "learning_rate": 6.406831704697456e-06,
      "loss": 0.0174,
      "step": 1110620
    },
    {
      "epoch": 1.817586719297212,
      "grad_norm": 1.786117434501648,
      "learning_rate": 6.406765812483939e-06,
      "loss": 0.0174,
      "step": 1110640
    },
    {
      "epoch": 1.8176194497358653,
      "grad_norm": 0.18772445619106293,
      "learning_rate": 6.406699920270422e-06,
      "loss": 0.0206,
      "step": 1110660
    },
    {
      "epoch": 1.8176521801745187,
      "grad_norm": 0.3099730610847473,
      "learning_rate": 6.406634028056905e-06,
      "loss": 0.0148,
      "step": 1110680
    },
    {
      "epoch": 1.817684910613172,
      "grad_norm": 2.2427597045898438,
      "learning_rate": 6.406568135843388e-06,
      "loss": 0.0149,
      "step": 1110700
    },
    {
      "epoch": 1.8177176410518254,
      "grad_norm": 0.4940619468688965,
      "learning_rate": 6.406502243629871e-06,
      "loss": 0.0161,
      "step": 1110720
    },
    {
      "epoch": 1.8177503714904786,
      "grad_norm": 0.4772208034992218,
      "learning_rate": 6.406436351416354e-06,
      "loss": 0.0213,
      "step": 1110740
    },
    {
      "epoch": 1.8177831019291322,
      "grad_norm": 0.09702685475349426,
      "learning_rate": 6.406370459202836e-06,
      "loss": 0.019,
      "step": 1110760
    },
    {
      "epoch": 1.8178158323677853,
      "grad_norm": 0.5967967510223389,
      "learning_rate": 6.40630456698932e-06,
      "loss": 0.0148,
      "step": 1110780
    },
    {
      "epoch": 1.8178485628064387,
      "grad_norm": 0.902542233467102,
      "learning_rate": 6.406238674775802e-06,
      "loss": 0.0229,
      "step": 1110800
    },
    {
      "epoch": 1.817881293245092,
      "grad_norm": 0.19106410443782806,
      "learning_rate": 6.406172782562285e-06,
      "loss": 0.0273,
      "step": 1110820
    },
    {
      "epoch": 1.8179140236837454,
      "grad_norm": 0.6187683343887329,
      "learning_rate": 6.406106890348768e-06,
      "loss": 0.012,
      "step": 1110840
    },
    {
      "epoch": 1.8179467541223988,
      "grad_norm": 0.173828125,
      "learning_rate": 6.406040998135251e-06,
      "loss": 0.0261,
      "step": 1110860
    },
    {
      "epoch": 1.817979484561052,
      "grad_norm": 0.5379753708839417,
      "learning_rate": 6.405975105921734e-06,
      "loss": 0.0214,
      "step": 1110880
    },
    {
      "epoch": 1.8180122149997056,
      "grad_norm": 0.7861575484275818,
      "learning_rate": 6.405909213708217e-06,
      "loss": 0.02,
      "step": 1110900
    },
    {
      "epoch": 1.8180449454383587,
      "grad_norm": 0.315613329410553,
      "learning_rate": 6.405843321494699e-06,
      "loss": 0.0214,
      "step": 1110920
    },
    {
      "epoch": 1.818077675877012,
      "grad_norm": 0.922447144985199,
      "learning_rate": 6.405777429281183e-06,
      "loss": 0.0204,
      "step": 1110940
    },
    {
      "epoch": 1.8181104063156655,
      "grad_norm": 0.23813888430595398,
      "learning_rate": 6.4057115370676645e-06,
      "loss": 0.019,
      "step": 1110960
    },
    {
      "epoch": 1.8181431367543188,
      "grad_norm": 0.2818020284175873,
      "learning_rate": 6.405645644854148e-06,
      "loss": 0.0141,
      "step": 1110980
    },
    {
      "epoch": 1.8181758671929722,
      "grad_norm": 0.07147659361362457,
      "learning_rate": 6.405579752640631e-06,
      "loss": 0.0296,
      "step": 1111000
    },
    {
      "epoch": 1.8182085976316253,
      "grad_norm": 0.5913657546043396,
      "learning_rate": 6.405513860427114e-06,
      "loss": 0.0212,
      "step": 1111020
    },
    {
      "epoch": 1.818241328070279,
      "grad_norm": 0.4718019366264343,
      "learning_rate": 6.405447968213597e-06,
      "loss": 0.021,
      "step": 1111040
    },
    {
      "epoch": 1.818274058508932,
      "grad_norm": 1.0038107633590698,
      "learning_rate": 6.40538207600008e-06,
      "loss": 0.0229,
      "step": 1111060
    },
    {
      "epoch": 1.8183067889475855,
      "grad_norm": 0.20800626277923584,
      "learning_rate": 6.405316183786563e-06,
      "loss": 0.0189,
      "step": 1111080
    },
    {
      "epoch": 1.8183395193862388,
      "grad_norm": 0.31199586391448975,
      "learning_rate": 6.405250291573045e-06,
      "loss": 0.021,
      "step": 1111100
    },
    {
      "epoch": 1.8183722498248922,
      "grad_norm": 0.27398353815078735,
      "learning_rate": 6.405184399359529e-06,
      "loss": 0.0174,
      "step": 1111120
    },
    {
      "epoch": 1.8184049802635456,
      "grad_norm": 3.3846442699432373,
      "learning_rate": 6.405118507146011e-06,
      "loss": 0.0126,
      "step": 1111140
    },
    {
      "epoch": 1.8184377107021987,
      "grad_norm": 1.8279656171798706,
      "learning_rate": 6.4050526149324945e-06,
      "loss": 0.0131,
      "step": 1111160
    },
    {
      "epoch": 1.8184704411408523,
      "grad_norm": 0.9786363840103149,
      "learning_rate": 6.404986722718976e-06,
      "loss": 0.0202,
      "step": 1111180
    },
    {
      "epoch": 1.8185031715795055,
      "grad_norm": 0.6690781116485596,
      "learning_rate": 6.40492083050546e-06,
      "loss": 0.0224,
      "step": 1111200
    },
    {
      "epoch": 1.8185359020181588,
      "grad_norm": 0.6203265190124512,
      "learning_rate": 6.404854938291942e-06,
      "loss": 0.016,
      "step": 1111220
    },
    {
      "epoch": 1.8185686324568122,
      "grad_norm": 0.8469093441963196,
      "learning_rate": 6.4047890460784254e-06,
      "loss": 0.0172,
      "step": 1111240
    },
    {
      "epoch": 1.8186013628954654,
      "grad_norm": 0.35097038745880127,
      "learning_rate": 6.404723153864908e-06,
      "loss": 0.0175,
      "step": 1111260
    },
    {
      "epoch": 1.818634093334119,
      "grad_norm": 0.4901886284351349,
      "learning_rate": 6.404657261651391e-06,
      "loss": 0.0192,
      "step": 1111280
    },
    {
      "epoch": 1.8186668237727721,
      "grad_norm": 0.32853537797927856,
      "learning_rate": 6.404591369437874e-06,
      "loss": 0.0171,
      "step": 1111300
    },
    {
      "epoch": 1.8186995542114257,
      "grad_norm": 0.22126349806785583,
      "learning_rate": 6.404525477224357e-06,
      "loss": 0.0166,
      "step": 1111320
    },
    {
      "epoch": 1.8187322846500789,
      "grad_norm": 0.6988253593444824,
      "learning_rate": 6.404459585010839e-06,
      "loss": 0.0143,
      "step": 1111340
    },
    {
      "epoch": 1.8187650150887322,
      "grad_norm": 0.5828489065170288,
      "learning_rate": 6.404393692797323e-06,
      "loss": 0.0247,
      "step": 1111360
    },
    {
      "epoch": 1.8187977455273856,
      "grad_norm": 0.38137519359588623,
      "learning_rate": 6.404327800583805e-06,
      "loss": 0.0216,
      "step": 1111380
    },
    {
      "epoch": 1.8188304759660388,
      "grad_norm": 0.5679538249969482,
      "learning_rate": 6.404261908370288e-06,
      "loss": 0.0176,
      "step": 1111400
    },
    {
      "epoch": 1.8188632064046923,
      "grad_norm": 0.4922788441181183,
      "learning_rate": 6.404196016156772e-06,
      "loss": 0.0233,
      "step": 1111420
    },
    {
      "epoch": 1.8188959368433455,
      "grad_norm": 0.4522228240966797,
      "learning_rate": 6.404130123943254e-06,
      "loss": 0.0149,
      "step": 1111440
    },
    {
      "epoch": 1.8189286672819989,
      "grad_norm": 0.08116477727890015,
      "learning_rate": 6.404064231729737e-06,
      "loss": 0.0206,
      "step": 1111460
    },
    {
      "epoch": 1.8189613977206522,
      "grad_norm": 0.7143620252609253,
      "learning_rate": 6.40399833951622e-06,
      "loss": 0.0197,
      "step": 1111480
    },
    {
      "epoch": 1.8189941281593056,
      "grad_norm": 0.27372032403945923,
      "learning_rate": 6.403932447302703e-06,
      "loss": 0.0184,
      "step": 1111500
    },
    {
      "epoch": 1.819026858597959,
      "grad_norm": 0.27209949493408203,
      "learning_rate": 6.4038665550891855e-06,
      "loss": 0.0207,
      "step": 1111520
    },
    {
      "epoch": 1.8190595890366121,
      "grad_norm": 0.18929266929626465,
      "learning_rate": 6.403800662875669e-06,
      "loss": 0.0136,
      "step": 1111540
    },
    {
      "epoch": 1.8190923194752657,
      "grad_norm": 0.8930637240409851,
      "learning_rate": 6.403734770662151e-06,
      "loss": 0.0164,
      "step": 1111560
    },
    {
      "epoch": 1.8191250499139189,
      "grad_norm": 0.08830223232507706,
      "learning_rate": 6.4036688784486346e-06,
      "loss": 0.0167,
      "step": 1111580
    },
    {
      "epoch": 1.8191577803525723,
      "grad_norm": 0.3398912250995636,
      "learning_rate": 6.4036029862351165e-06,
      "loss": 0.0254,
      "step": 1111600
    },
    {
      "epoch": 1.8191905107912256,
      "grad_norm": 0.6424620151519775,
      "learning_rate": 6.4035370940216e-06,
      "loss": 0.02,
      "step": 1111620
    },
    {
      "epoch": 1.819223241229879,
      "grad_norm": 0.4453139007091522,
      "learning_rate": 6.403471201808083e-06,
      "loss": 0.0176,
      "step": 1111640
    },
    {
      "epoch": 1.8192559716685324,
      "grad_norm": 0.2632623016834259,
      "learning_rate": 6.4034053095945655e-06,
      "loss": 0.0164,
      "step": 1111660
    },
    {
      "epoch": 1.8192887021071855,
      "grad_norm": 0.16097167134284973,
      "learning_rate": 6.403339417381048e-06,
      "loss": 0.0119,
      "step": 1111680
    },
    {
      "epoch": 1.8193214325458391,
      "grad_norm": 0.22273510694503784,
      "learning_rate": 6.403273525167532e-06,
      "loss": 0.0211,
      "step": 1111700
    },
    {
      "epoch": 1.8193541629844923,
      "grad_norm": 0.8505346775054932,
      "learning_rate": 6.403207632954014e-06,
      "loss": 0.0262,
      "step": 1111720
    },
    {
      "epoch": 1.8193868934231456,
      "grad_norm": 0.9158464074134827,
      "learning_rate": 6.403141740740497e-06,
      "loss": 0.0237,
      "step": 1111740
    },
    {
      "epoch": 1.819419623861799,
      "grad_norm": 0.2847627103328705,
      "learning_rate": 6.403075848526981e-06,
      "loss": 0.0158,
      "step": 1111760
    },
    {
      "epoch": 1.8194523543004524,
      "grad_norm": 0.20369523763656616,
      "learning_rate": 6.403009956313463e-06,
      "loss": 0.0151,
      "step": 1111780
    },
    {
      "epoch": 1.8194850847391058,
      "grad_norm": 0.13326716423034668,
      "learning_rate": 6.402944064099946e-06,
      "loss": 0.0187,
      "step": 1111800
    },
    {
      "epoch": 1.819517815177759,
      "grad_norm": 0.2869875729084015,
      "learning_rate": 6.402878171886428e-06,
      "loss": 0.0155,
      "step": 1111820
    },
    {
      "epoch": 1.8195505456164125,
      "grad_norm": 0.22981974482536316,
      "learning_rate": 6.402812279672912e-06,
      "loss": 0.0161,
      "step": 1111840
    },
    {
      "epoch": 1.8195832760550656,
      "grad_norm": 0.4532810151576996,
      "learning_rate": 6.402746387459395e-06,
      "loss": 0.0304,
      "step": 1111860
    },
    {
      "epoch": 1.819616006493719,
      "grad_norm": 0.2772581875324249,
      "learning_rate": 6.402680495245877e-06,
      "loss": 0.0194,
      "step": 1111880
    },
    {
      "epoch": 1.8196487369323724,
      "grad_norm": 0.655998945236206,
      "learning_rate": 6.40261460303236e-06,
      "loss": 0.028,
      "step": 1111900
    },
    {
      "epoch": 1.8196814673710255,
      "grad_norm": 0.7737617492675781,
      "learning_rate": 6.402548710818844e-06,
      "loss": 0.0122,
      "step": 1111920
    },
    {
      "epoch": 1.8197141978096791,
      "grad_norm": 2.9496853351593018,
      "learning_rate": 6.4024828186053256e-06,
      "loss": 0.0164,
      "step": 1111940
    },
    {
      "epoch": 1.8197469282483323,
      "grad_norm": 1.4392566680908203,
      "learning_rate": 6.402416926391809e-06,
      "loss": 0.0173,
      "step": 1111960
    },
    {
      "epoch": 1.8197796586869859,
      "grad_norm": 0.18663930892944336,
      "learning_rate": 6.402351034178291e-06,
      "loss": 0.019,
      "step": 1111980
    },
    {
      "epoch": 1.819812389125639,
      "grad_norm": 0.5636159181594849,
      "learning_rate": 6.402285141964775e-06,
      "loss": 0.0145,
      "step": 1112000
    },
    {
      "epoch": 1.8198451195642924,
      "grad_norm": 0.16463881731033325,
      "learning_rate": 6.402219249751257e-06,
      "loss": 0.0146,
      "step": 1112020
    },
    {
      "epoch": 1.8198778500029458,
      "grad_norm": 0.543390154838562,
      "learning_rate": 6.40215335753774e-06,
      "loss": 0.0289,
      "step": 1112040
    },
    {
      "epoch": 1.819910580441599,
      "grad_norm": 0.22964319586753845,
      "learning_rate": 6.402087465324223e-06,
      "loss": 0.0169,
      "step": 1112060
    },
    {
      "epoch": 1.8199433108802525,
      "grad_norm": 0.2466137558221817,
      "learning_rate": 6.4020215731107064e-06,
      "loss": 0.0164,
      "step": 1112080
    },
    {
      "epoch": 1.8199760413189057,
      "grad_norm": 1.7623767852783203,
      "learning_rate": 6.401955680897189e-06,
      "loss": 0.012,
      "step": 1112100
    },
    {
      "epoch": 1.820008771757559,
      "grad_norm": 0.456996351480484,
      "learning_rate": 6.401889788683672e-06,
      "loss": 0.0178,
      "step": 1112120
    },
    {
      "epoch": 1.8200415021962124,
      "grad_norm": 0.6541454792022705,
      "learning_rate": 6.4018238964701555e-06,
      "loss": 0.0192,
      "step": 1112140
    },
    {
      "epoch": 1.8200742326348658,
      "grad_norm": 0.25655895471572876,
      "learning_rate": 6.401758004256637e-06,
      "loss": 0.0161,
      "step": 1112160
    },
    {
      "epoch": 1.8201069630735192,
      "grad_norm": 1.0248886346817017,
      "learning_rate": 6.401692112043121e-06,
      "loss": 0.013,
      "step": 1112180
    },
    {
      "epoch": 1.8201396935121723,
      "grad_norm": 0.6058119535446167,
      "learning_rate": 6.401626219829603e-06,
      "loss": 0.0194,
      "step": 1112200
    },
    {
      "epoch": 1.820172423950826,
      "grad_norm": 0.9850524663925171,
      "learning_rate": 6.4015603276160865e-06,
      "loss": 0.0147,
      "step": 1112220
    },
    {
      "epoch": 1.820205154389479,
      "grad_norm": 0.7611474394798279,
      "learning_rate": 6.401494435402568e-06,
      "loss": 0.0217,
      "step": 1112240
    },
    {
      "epoch": 1.8202378848281324,
      "grad_norm": 0.7452678680419922,
      "learning_rate": 6.401428543189052e-06,
      "loss": 0.0165,
      "step": 1112260
    },
    {
      "epoch": 1.8202706152667858,
      "grad_norm": 1.5413013696670532,
      "learning_rate": 6.401362650975535e-06,
      "loss": 0.021,
      "step": 1112280
    },
    {
      "epoch": 1.8203033457054392,
      "grad_norm": 1.0962032079696655,
      "learning_rate": 6.4012967587620174e-06,
      "loss": 0.0306,
      "step": 1112300
    },
    {
      "epoch": 1.8203360761440925,
      "grad_norm": 0.3871477246284485,
      "learning_rate": 6.4012308665485e-06,
      "loss": 0.0188,
      "step": 1112320
    },
    {
      "epoch": 1.8203688065827457,
      "grad_norm": 0.7213197350502014,
      "learning_rate": 6.401164974334984e-06,
      "loss": 0.0158,
      "step": 1112340
    },
    {
      "epoch": 1.8204015370213993,
      "grad_norm": 0.21123798191547394,
      "learning_rate": 6.401099082121466e-06,
      "loss": 0.02,
      "step": 1112360
    },
    {
      "epoch": 1.8204342674600524,
      "grad_norm": 0.7925052046775818,
      "learning_rate": 6.401033189907949e-06,
      "loss": 0.0178,
      "step": 1112380
    },
    {
      "epoch": 1.8204669978987058,
      "grad_norm": 0.4916006922721863,
      "learning_rate": 6.400967297694431e-06,
      "loss": 0.0164,
      "step": 1112400
    },
    {
      "epoch": 1.8204997283373592,
      "grad_norm": 0.36980774998664856,
      "learning_rate": 6.400901405480915e-06,
      "loss": 0.0236,
      "step": 1112420
    },
    {
      "epoch": 1.8205324587760126,
      "grad_norm": 0.4206002950668335,
      "learning_rate": 6.4008355132673975e-06,
      "loss": 0.0142,
      "step": 1112440
    },
    {
      "epoch": 1.820565189214666,
      "grad_norm": 0.1470135599374771,
      "learning_rate": 6.40076962105388e-06,
      "loss": 0.0179,
      "step": 1112460
    },
    {
      "epoch": 1.820597919653319,
      "grad_norm": 0.9025600552558899,
      "learning_rate": 6.400703728840364e-06,
      "loss": 0.0183,
      "step": 1112480
    },
    {
      "epoch": 1.8206306500919727,
      "grad_norm": 0.6253392100334167,
      "learning_rate": 6.4006378366268465e-06,
      "loss": 0.0192,
      "step": 1112500
    },
    {
      "epoch": 1.8206633805306258,
      "grad_norm": 0.3548843562602997,
      "learning_rate": 6.400571944413329e-06,
      "loss": 0.0112,
      "step": 1112520
    },
    {
      "epoch": 1.8206961109692792,
      "grad_norm": 0.6438056826591492,
      "learning_rate": 6.400506052199812e-06,
      "loss": 0.0155,
      "step": 1112540
    },
    {
      "epoch": 1.8207288414079326,
      "grad_norm": 0.7430167198181152,
      "learning_rate": 6.400440159986296e-06,
      "loss": 0.0209,
      "step": 1112560
    },
    {
      "epoch": 1.820761571846586,
      "grad_norm": 1.2413665056228638,
      "learning_rate": 6.4003742677727775e-06,
      "loss": 0.0177,
      "step": 1112580
    },
    {
      "epoch": 1.8207943022852393,
      "grad_norm": 0.2403201013803482,
      "learning_rate": 6.400308375559261e-06,
      "loss": 0.0206,
      "step": 1112600
    },
    {
      "epoch": 1.8208270327238925,
      "grad_norm": 0.5518162250518799,
      "learning_rate": 6.400242483345743e-06,
      "loss": 0.0154,
      "step": 1112620
    },
    {
      "epoch": 1.820859763162546,
      "grad_norm": 0.23852743208408356,
      "learning_rate": 6.4001765911322265e-06,
      "loss": 0.0137,
      "step": 1112640
    },
    {
      "epoch": 1.8208924936011992,
      "grad_norm": 0.7178024649620056,
      "learning_rate": 6.400110698918709e-06,
      "loss": 0.0154,
      "step": 1112660
    },
    {
      "epoch": 1.8209252240398526,
      "grad_norm": 0.6120633482933044,
      "learning_rate": 6.400044806705192e-06,
      "loss": 0.022,
      "step": 1112680
    },
    {
      "epoch": 1.820957954478506,
      "grad_norm": 0.4147219657897949,
      "learning_rate": 6.399978914491675e-06,
      "loss": 0.0181,
      "step": 1112700
    },
    {
      "epoch": 1.820990684917159,
      "grad_norm": 0.1558106392621994,
      "learning_rate": 6.399913022278158e-06,
      "loss": 0.0231,
      "step": 1112720
    },
    {
      "epoch": 1.8210234153558127,
      "grad_norm": 0.25024697184562683,
      "learning_rate": 6.39984713006464e-06,
      "loss": 0.0226,
      "step": 1112740
    },
    {
      "epoch": 1.8210561457944658,
      "grad_norm": 0.26190757751464844,
      "learning_rate": 6.399781237851124e-06,
      "loss": 0.0253,
      "step": 1112760
    },
    {
      "epoch": 1.8210888762331194,
      "grad_norm": 0.3205980956554413,
      "learning_rate": 6.399715345637606e-06,
      "loss": 0.0195,
      "step": 1112780
    },
    {
      "epoch": 1.8211216066717726,
      "grad_norm": 0.7743537425994873,
      "learning_rate": 6.399649453424089e-06,
      "loss": 0.0205,
      "step": 1112800
    },
    {
      "epoch": 1.821154337110426,
      "grad_norm": 0.5031322836875916,
      "learning_rate": 6.399583561210573e-06,
      "loss": 0.0174,
      "step": 1112820
    },
    {
      "epoch": 1.8211870675490793,
      "grad_norm": 0.6553122401237488,
      "learning_rate": 6.399517668997055e-06,
      "loss": 0.0187,
      "step": 1112840
    },
    {
      "epoch": 1.8212197979877325,
      "grad_norm": 0.6360917687416077,
      "learning_rate": 6.399451776783538e-06,
      "loss": 0.0178,
      "step": 1112860
    },
    {
      "epoch": 1.821252528426386,
      "grad_norm": 0.43812012672424316,
      "learning_rate": 6.399385884570021e-06,
      "loss": 0.0137,
      "step": 1112880
    },
    {
      "epoch": 1.8212852588650392,
      "grad_norm": 0.5958142876625061,
      "learning_rate": 6.399319992356504e-06,
      "loss": 0.0175,
      "step": 1112900
    },
    {
      "epoch": 1.8213179893036926,
      "grad_norm": 1.4352222681045532,
      "learning_rate": 6.399254100142987e-06,
      "loss": 0.0221,
      "step": 1112920
    },
    {
      "epoch": 1.821350719742346,
      "grad_norm": 0.7524920701980591,
      "learning_rate": 6.39918820792947e-06,
      "loss": 0.0146,
      "step": 1112940
    },
    {
      "epoch": 1.8213834501809993,
      "grad_norm": 0.8353212475776672,
      "learning_rate": 6.399122315715952e-06,
      "loss": 0.0175,
      "step": 1112960
    },
    {
      "epoch": 1.8214161806196527,
      "grad_norm": 0.15905584394931793,
      "learning_rate": 6.399056423502436e-06,
      "loss": 0.0217,
      "step": 1112980
    },
    {
      "epoch": 1.8214489110583059,
      "grad_norm": 0.07546458393335342,
      "learning_rate": 6.3989905312889176e-06,
      "loss": 0.0182,
      "step": 1113000
    },
    {
      "epoch": 1.8214816414969595,
      "grad_norm": 0.9331426024436951,
      "learning_rate": 6.398924639075401e-06,
      "loss": 0.0166,
      "step": 1113020
    },
    {
      "epoch": 1.8215143719356126,
      "grad_norm": 0.28040897846221924,
      "learning_rate": 6.398858746861884e-06,
      "loss": 0.0204,
      "step": 1113040
    },
    {
      "epoch": 1.821547102374266,
      "grad_norm": 0.21794186532497406,
      "learning_rate": 6.398792854648367e-06,
      "loss": 0.0169,
      "step": 1113060
    },
    {
      "epoch": 1.8215798328129194,
      "grad_norm": 0.3104378879070282,
      "learning_rate": 6.398726962434849e-06,
      "loss": 0.0175,
      "step": 1113080
    },
    {
      "epoch": 1.8216125632515727,
      "grad_norm": 0.685265839099884,
      "learning_rate": 6.398661070221333e-06,
      "loss": 0.0155,
      "step": 1113100
    },
    {
      "epoch": 1.821645293690226,
      "grad_norm": 0.42705580592155457,
      "learning_rate": 6.398595178007815e-06,
      "loss": 0.0134,
      "step": 1113120
    },
    {
      "epoch": 1.8216780241288792,
      "grad_norm": 0.5680234432220459,
      "learning_rate": 6.3985292857942984e-06,
      "loss": 0.0237,
      "step": 1113140
    },
    {
      "epoch": 1.8217107545675328,
      "grad_norm": 0.24820712208747864,
      "learning_rate": 6.398463393580782e-06,
      "loss": 0.0169,
      "step": 1113160
    },
    {
      "epoch": 1.821743485006186,
      "grad_norm": 0.2847185432910919,
      "learning_rate": 6.398397501367264e-06,
      "loss": 0.0171,
      "step": 1113180
    },
    {
      "epoch": 1.8217762154448394,
      "grad_norm": 1.011441707611084,
      "learning_rate": 6.3983316091537475e-06,
      "loss": 0.0172,
      "step": 1113200
    },
    {
      "epoch": 1.8218089458834927,
      "grad_norm": 2.500633716583252,
      "learning_rate": 6.398265716940229e-06,
      "loss": 0.0145,
      "step": 1113220
    },
    {
      "epoch": 1.821841676322146,
      "grad_norm": 0.30158859491348267,
      "learning_rate": 6.398199824726713e-06,
      "loss": 0.0218,
      "step": 1113240
    },
    {
      "epoch": 1.8218744067607995,
      "grad_norm": 0.23445354402065277,
      "learning_rate": 6.398133932513195e-06,
      "loss": 0.0189,
      "step": 1113260
    },
    {
      "epoch": 1.8219071371994526,
      "grad_norm": 0.3185512125492096,
      "learning_rate": 6.3980680402996785e-06,
      "loss": 0.0171,
      "step": 1113280
    },
    {
      "epoch": 1.8219398676381062,
      "grad_norm": 0.7426723837852478,
      "learning_rate": 6.398002148086161e-06,
      "loss": 0.0163,
      "step": 1113300
    },
    {
      "epoch": 1.8219725980767594,
      "grad_norm": 0.24068205058574677,
      "learning_rate": 6.397936255872644e-06,
      "loss": 0.0143,
      "step": 1113320
    },
    {
      "epoch": 1.8220053285154127,
      "grad_norm": 1.1324928998947144,
      "learning_rate": 6.397870363659127e-06,
      "loss": 0.0225,
      "step": 1113340
    },
    {
      "epoch": 1.8220380589540661,
      "grad_norm": 1.7381556034088135,
      "learning_rate": 6.39780447144561e-06,
      "loss": 0.019,
      "step": 1113360
    },
    {
      "epoch": 1.8220707893927195,
      "grad_norm": 0.2057313472032547,
      "learning_rate": 6.397738579232092e-06,
      "loss": 0.0206,
      "step": 1113380
    },
    {
      "epoch": 1.8221035198313729,
      "grad_norm": 0.9130662083625793,
      "learning_rate": 6.397672687018576e-06,
      "loss": 0.0311,
      "step": 1113400
    },
    {
      "epoch": 1.822136250270026,
      "grad_norm": 0.6804637908935547,
      "learning_rate": 6.397606794805058e-06,
      "loss": 0.0186,
      "step": 1113420
    },
    {
      "epoch": 1.8221689807086796,
      "grad_norm": 1.1374399662017822,
      "learning_rate": 6.397540902591541e-06,
      "loss": 0.013,
      "step": 1113440
    },
    {
      "epoch": 1.8222017111473328,
      "grad_norm": 2.824284315109253,
      "learning_rate": 6.397475010378024e-06,
      "loss": 0.0194,
      "step": 1113460
    },
    {
      "epoch": 1.8222344415859861,
      "grad_norm": 0.8753806948661804,
      "learning_rate": 6.397409118164507e-06,
      "loss": 0.0207,
      "step": 1113480
    },
    {
      "epoch": 1.8222671720246395,
      "grad_norm": 0.5782433748245239,
      "learning_rate": 6.39734322595099e-06,
      "loss": 0.0251,
      "step": 1113500
    },
    {
      "epoch": 1.8222999024632927,
      "grad_norm": 0.44849780201911926,
      "learning_rate": 6.397277333737473e-06,
      "loss": 0.0178,
      "step": 1113520
    },
    {
      "epoch": 1.8223326329019462,
      "grad_norm": 0.8652679324150085,
      "learning_rate": 6.397211441523956e-06,
      "loss": 0.0294,
      "step": 1113540
    },
    {
      "epoch": 1.8223653633405994,
      "grad_norm": 0.8358428478240967,
      "learning_rate": 6.3971455493104385e-06,
      "loss": 0.0188,
      "step": 1113560
    },
    {
      "epoch": 1.822398093779253,
      "grad_norm": 0.2199864238500595,
      "learning_rate": 6.397079657096922e-06,
      "loss": 0.016,
      "step": 1113580
    },
    {
      "epoch": 1.8224308242179061,
      "grad_norm": 0.2893819510936737,
      "learning_rate": 6.397013764883404e-06,
      "loss": 0.019,
      "step": 1113600
    },
    {
      "epoch": 1.8224635546565595,
      "grad_norm": 0.22893162071704865,
      "learning_rate": 6.3969478726698876e-06,
      "loss": 0.022,
      "step": 1113620
    },
    {
      "epoch": 1.8224962850952129,
      "grad_norm": 0.36983436346054077,
      "learning_rate": 6.3968819804563695e-06,
      "loss": 0.0145,
      "step": 1113640
    },
    {
      "epoch": 1.822529015533866,
      "grad_norm": 0.2882780432701111,
      "learning_rate": 6.396816088242853e-06,
      "loss": 0.0133,
      "step": 1113660
    },
    {
      "epoch": 1.8225617459725196,
      "grad_norm": 1.04432213306427,
      "learning_rate": 6.396750196029336e-06,
      "loss": 0.0197,
      "step": 1113680
    },
    {
      "epoch": 1.8225944764111728,
      "grad_norm": 0.6072490811347961,
      "learning_rate": 6.3966843038158185e-06,
      "loss": 0.0197,
      "step": 1113700
    },
    {
      "epoch": 1.8226272068498262,
      "grad_norm": 0.3357737064361572,
      "learning_rate": 6.396618411602301e-06,
      "loss": 0.0146,
      "step": 1113720
    },
    {
      "epoch": 1.8226599372884795,
      "grad_norm": 0.40578100085258484,
      "learning_rate": 6.396552519388785e-06,
      "loss": 0.0223,
      "step": 1113740
    },
    {
      "epoch": 1.822692667727133,
      "grad_norm": 0.2909359037876129,
      "learning_rate": 6.396486627175267e-06,
      "loss": 0.0122,
      "step": 1113760
    },
    {
      "epoch": 1.8227253981657863,
      "grad_norm": 0.24406252801418304,
      "learning_rate": 6.39642073496175e-06,
      "loss": 0.0163,
      "step": 1113780
    },
    {
      "epoch": 1.8227581286044394,
      "grad_norm": 0.22315102815628052,
      "learning_rate": 6.396354842748232e-06,
      "loss": 0.021,
      "step": 1113800
    },
    {
      "epoch": 1.822790859043093,
      "grad_norm": 0.25009819865226746,
      "learning_rate": 6.396288950534716e-06,
      "loss": 0.0162,
      "step": 1113820
    },
    {
      "epoch": 1.8228235894817462,
      "grad_norm": 0.30903664231300354,
      "learning_rate": 6.3962230583211986e-06,
      "loss": 0.015,
      "step": 1113840
    },
    {
      "epoch": 1.8228563199203995,
      "grad_norm": 0.7870553731918335,
      "learning_rate": 6.396157166107681e-06,
      "loss": 0.0171,
      "step": 1113860
    },
    {
      "epoch": 1.822889050359053,
      "grad_norm": 1.0051257610321045,
      "learning_rate": 6.396091273894165e-06,
      "loss": 0.0271,
      "step": 1113880
    },
    {
      "epoch": 1.8229217807977063,
      "grad_norm": 0.3634517192840576,
      "learning_rate": 6.396025381680648e-06,
      "loss": 0.0167,
      "step": 1113900
    },
    {
      "epoch": 1.8229545112363597,
      "grad_norm": 0.5664086937904358,
      "learning_rate": 6.39595948946713e-06,
      "loss": 0.0174,
      "step": 1113920
    },
    {
      "epoch": 1.8229872416750128,
      "grad_norm": 0.09042207896709442,
      "learning_rate": 6.395893597253613e-06,
      "loss": 0.014,
      "step": 1113940
    },
    {
      "epoch": 1.8230199721136664,
      "grad_norm": 1.1464213132858276,
      "learning_rate": 6.395827705040097e-06,
      "loss": 0.0234,
      "step": 1113960
    },
    {
      "epoch": 1.8230527025523195,
      "grad_norm": 0.558925211429596,
      "learning_rate": 6.395761812826579e-06,
      "loss": 0.0233,
      "step": 1113980
    },
    {
      "epoch": 1.823085432990973,
      "grad_norm": 0.6623010635375977,
      "learning_rate": 6.395695920613062e-06,
      "loss": 0.021,
      "step": 1114000
    },
    {
      "epoch": 1.8231181634296263,
      "grad_norm": 0.15746071934700012,
      "learning_rate": 6.395630028399544e-06,
      "loss": 0.0223,
      "step": 1114020
    },
    {
      "epoch": 1.8231508938682797,
      "grad_norm": 0.41811177134513855,
      "learning_rate": 6.395564136186028e-06,
      "loss": 0.0207,
      "step": 1114040
    },
    {
      "epoch": 1.823183624306933,
      "grad_norm": 0.6901924014091492,
      "learning_rate": 6.3954982439725095e-06,
      "loss": 0.0237,
      "step": 1114060
    },
    {
      "epoch": 1.8232163547455862,
      "grad_norm": 0.573336124420166,
      "learning_rate": 6.395432351758993e-06,
      "loss": 0.0172,
      "step": 1114080
    },
    {
      "epoch": 1.8232490851842398,
      "grad_norm": 0.2930520474910736,
      "learning_rate": 6.395366459545476e-06,
      "loss": 0.0231,
      "step": 1114100
    },
    {
      "epoch": 1.823281815622893,
      "grad_norm": 0.44901353120803833,
      "learning_rate": 6.3953005673319594e-06,
      "loss": 0.0164,
      "step": 1114120
    },
    {
      "epoch": 1.8233145460615463,
      "grad_norm": 0.292309433221817,
      "learning_rate": 6.395234675118441e-06,
      "loss": 0.0122,
      "step": 1114140
    },
    {
      "epoch": 1.8233472765001997,
      "grad_norm": 0.31330356001853943,
      "learning_rate": 6.395168782904925e-06,
      "loss": 0.018,
      "step": 1114160
    },
    {
      "epoch": 1.823380006938853,
      "grad_norm": 0.21792291104793549,
      "learning_rate": 6.395102890691407e-06,
      "loss": 0.0185,
      "step": 1114180
    },
    {
      "epoch": 1.8234127373775064,
      "grad_norm": 0.29051998257637024,
      "learning_rate": 6.39503699847789e-06,
      "loss": 0.0155,
      "step": 1114200
    },
    {
      "epoch": 1.8234454678161596,
      "grad_norm": 0.5572921633720398,
      "learning_rate": 6.394971106264374e-06,
      "loss": 0.0174,
      "step": 1114220
    },
    {
      "epoch": 1.8234781982548132,
      "grad_norm": 0.1967092603445053,
      "learning_rate": 6.394905214050856e-06,
      "loss": 0.0183,
      "step": 1114240
    },
    {
      "epoch": 1.8235109286934663,
      "grad_norm": 0.07236640155315399,
      "learning_rate": 6.3948393218373395e-06,
      "loss": 0.0144,
      "step": 1114260
    },
    {
      "epoch": 1.8235436591321197,
      "grad_norm": 0.9686111807823181,
      "learning_rate": 6.394773429623821e-06,
      "loss": 0.0116,
      "step": 1114280
    },
    {
      "epoch": 1.823576389570773,
      "grad_norm": 0.19497662782669067,
      "learning_rate": 6.394707537410305e-06,
      "loss": 0.0182,
      "step": 1114300
    },
    {
      "epoch": 1.8236091200094262,
      "grad_norm": 0.25462087988853455,
      "learning_rate": 6.394641645196788e-06,
      "loss": 0.0185,
      "step": 1114320
    },
    {
      "epoch": 1.8236418504480798,
      "grad_norm": 0.7378532886505127,
      "learning_rate": 6.3945757529832704e-06,
      "loss": 0.0182,
      "step": 1114340
    },
    {
      "epoch": 1.823674580886733,
      "grad_norm": 0.11147244274616241,
      "learning_rate": 6.394509860769753e-06,
      "loss": 0.012,
      "step": 1114360
    },
    {
      "epoch": 1.8237073113253865,
      "grad_norm": 0.0722326785326004,
      "learning_rate": 6.394443968556237e-06,
      "loss": 0.0264,
      "step": 1114380
    },
    {
      "epoch": 1.8237400417640397,
      "grad_norm": 0.3432670831680298,
      "learning_rate": 6.394378076342719e-06,
      "loss": 0.0123,
      "step": 1114400
    },
    {
      "epoch": 1.823772772202693,
      "grad_norm": 0.2226048856973648,
      "learning_rate": 6.394312184129202e-06,
      "loss": 0.0188,
      "step": 1114420
    },
    {
      "epoch": 1.8238055026413464,
      "grad_norm": 0.19502867758274078,
      "learning_rate": 6.394246291915684e-06,
      "loss": 0.0205,
      "step": 1114440
    },
    {
      "epoch": 1.8238382330799996,
      "grad_norm": 0.5878322720527649,
      "learning_rate": 6.394180399702168e-06,
      "loss": 0.015,
      "step": 1114460
    },
    {
      "epoch": 1.8238709635186532,
      "grad_norm": 2.394335985183716,
      "learning_rate": 6.3941145074886505e-06,
      "loss": 0.0183,
      "step": 1114480
    },
    {
      "epoch": 1.8239036939573063,
      "grad_norm": 0.7034022808074951,
      "learning_rate": 6.394048615275133e-06,
      "loss": 0.0247,
      "step": 1114500
    },
    {
      "epoch": 1.8239364243959597,
      "grad_norm": 0.0776498019695282,
      "learning_rate": 6.393982723061616e-06,
      "loss": 0.0143,
      "step": 1114520
    },
    {
      "epoch": 1.823969154834613,
      "grad_norm": 1.5315006971359253,
      "learning_rate": 6.3939168308480995e-06,
      "loss": 0.0173,
      "step": 1114540
    },
    {
      "epoch": 1.8240018852732665,
      "grad_norm": 0.8872703909873962,
      "learning_rate": 6.393850938634582e-06,
      "loss": 0.0177,
      "step": 1114560
    },
    {
      "epoch": 1.8240346157119198,
      "grad_norm": 0.8038986325263977,
      "learning_rate": 6.393785046421065e-06,
      "loss": 0.0242,
      "step": 1114580
    },
    {
      "epoch": 1.824067346150573,
      "grad_norm": 0.502285361289978,
      "learning_rate": 6.393719154207549e-06,
      "loss": 0.0168,
      "step": 1114600
    },
    {
      "epoch": 1.8241000765892266,
      "grad_norm": 0.3414502441883087,
      "learning_rate": 6.3936532619940305e-06,
      "loss": 0.0198,
      "step": 1114620
    },
    {
      "epoch": 1.8241328070278797,
      "grad_norm": 0.09886829555034637,
      "learning_rate": 6.393587369780514e-06,
      "loss": 0.0212,
      "step": 1114640
    },
    {
      "epoch": 1.824165537466533,
      "grad_norm": 0.2738822400569916,
      "learning_rate": 6.393521477566996e-06,
      "loss": 0.021,
      "step": 1114660
    },
    {
      "epoch": 1.8241982679051865,
      "grad_norm": 0.6821513772010803,
      "learning_rate": 6.3934555853534796e-06,
      "loss": 0.0139,
      "step": 1114680
    },
    {
      "epoch": 1.8242309983438398,
      "grad_norm": 0.658334493637085,
      "learning_rate": 6.393389693139962e-06,
      "loss": 0.0153,
      "step": 1114700
    },
    {
      "epoch": 1.8242637287824932,
      "grad_norm": 0.40323224663734436,
      "learning_rate": 6.393323800926445e-06,
      "loss": 0.0259,
      "step": 1114720
    },
    {
      "epoch": 1.8242964592211464,
      "grad_norm": 0.2965840995311737,
      "learning_rate": 6.393257908712928e-06,
      "loss": 0.0174,
      "step": 1114740
    },
    {
      "epoch": 1.8243291896598,
      "grad_norm": 0.980877161026001,
      "learning_rate": 6.393192016499411e-06,
      "loss": 0.0236,
      "step": 1114760
    },
    {
      "epoch": 1.824361920098453,
      "grad_norm": 0.11431454122066498,
      "learning_rate": 6.393126124285893e-06,
      "loss": 0.0253,
      "step": 1114780
    },
    {
      "epoch": 1.8243946505371065,
      "grad_norm": 0.8376885652542114,
      "learning_rate": 6.393060232072377e-06,
      "loss": 0.0251,
      "step": 1114800
    },
    {
      "epoch": 1.8244273809757598,
      "grad_norm": 0.1228434145450592,
      "learning_rate": 6.392994339858859e-06,
      "loss": 0.0258,
      "step": 1114820
    },
    {
      "epoch": 1.8244601114144132,
      "grad_norm": 0.3217501938343048,
      "learning_rate": 6.392928447645342e-06,
      "loss": 0.0178,
      "step": 1114840
    },
    {
      "epoch": 1.8244928418530666,
      "grad_norm": 0.34569844603538513,
      "learning_rate": 6.392862555431825e-06,
      "loss": 0.0197,
      "step": 1114860
    },
    {
      "epoch": 1.8245255722917197,
      "grad_norm": 0.9230211973190308,
      "learning_rate": 6.392796663218308e-06,
      "loss": 0.0144,
      "step": 1114880
    },
    {
      "epoch": 1.8245583027303733,
      "grad_norm": 0.19424517452716827,
      "learning_rate": 6.3927307710047905e-06,
      "loss": 0.0117,
      "step": 1114900
    },
    {
      "epoch": 1.8245910331690265,
      "grad_norm": 0.2839902937412262,
      "learning_rate": 6.392664878791274e-06,
      "loss": 0.0155,
      "step": 1114920
    },
    {
      "epoch": 1.8246237636076799,
      "grad_norm": 0.23915903270244598,
      "learning_rate": 6.392598986577757e-06,
      "loss": 0.0249,
      "step": 1114940
    },
    {
      "epoch": 1.8246564940463332,
      "grad_norm": 0.5472974181175232,
      "learning_rate": 6.39253309436424e-06,
      "loss": 0.0188,
      "step": 1114960
    },
    {
      "epoch": 1.8246892244849864,
      "grad_norm": 0.05514354631304741,
      "learning_rate": 6.392467202150723e-06,
      "loss": 0.0168,
      "step": 1114980
    },
    {
      "epoch": 1.82472195492364,
      "grad_norm": 0.4909612536430359,
      "learning_rate": 6.392401309937205e-06,
      "loss": 0.0171,
      "step": 1115000
    },
    {
      "epoch": 1.8247546853622931,
      "grad_norm": 0.6470691561698914,
      "learning_rate": 6.392335417723689e-06,
      "loss": 0.0212,
      "step": 1115020
    },
    {
      "epoch": 1.8247874158009467,
      "grad_norm": 0.30646058917045593,
      "learning_rate": 6.3922695255101706e-06,
      "loss": 0.0201,
      "step": 1115040
    },
    {
      "epoch": 1.8248201462395999,
      "grad_norm": 1.0899505615234375,
      "learning_rate": 6.392203633296654e-06,
      "loss": 0.0294,
      "step": 1115060
    },
    {
      "epoch": 1.8248528766782532,
      "grad_norm": 0.6172601580619812,
      "learning_rate": 6.392137741083136e-06,
      "loss": 0.0201,
      "step": 1115080
    },
    {
      "epoch": 1.8248856071169066,
      "grad_norm": 0.5137275457382202,
      "learning_rate": 6.39207184886962e-06,
      "loss": 0.0153,
      "step": 1115100
    },
    {
      "epoch": 1.8249183375555598,
      "grad_norm": 2.0217435359954834,
      "learning_rate": 6.392005956656102e-06,
      "loss": 0.0164,
      "step": 1115120
    },
    {
      "epoch": 1.8249510679942134,
      "grad_norm": 0.6186324954032898,
      "learning_rate": 6.391940064442585e-06,
      "loss": 0.0173,
      "step": 1115140
    },
    {
      "epoch": 1.8249837984328665,
      "grad_norm": 0.26087433099746704,
      "learning_rate": 6.391874172229068e-06,
      "loss": 0.0137,
      "step": 1115160
    },
    {
      "epoch": 1.8250165288715199,
      "grad_norm": 0.6237192749977112,
      "learning_rate": 6.3918082800155514e-06,
      "loss": 0.02,
      "step": 1115180
    },
    {
      "epoch": 1.8250492593101733,
      "grad_norm": 0.27603259682655334,
      "learning_rate": 6.391742387802033e-06,
      "loss": 0.0189,
      "step": 1115200
    },
    {
      "epoch": 1.8250819897488266,
      "grad_norm": 0.5386022329330444,
      "learning_rate": 6.391676495588517e-06,
      "loss": 0.0207,
      "step": 1115220
    },
    {
      "epoch": 1.82511472018748,
      "grad_norm": 0.8448696136474609,
      "learning_rate": 6.391610603374999e-06,
      "loss": 0.0172,
      "step": 1115240
    },
    {
      "epoch": 1.8251474506261331,
      "grad_norm": 0.8770344853401184,
      "learning_rate": 6.391544711161482e-06,
      "loss": 0.0226,
      "step": 1115260
    },
    {
      "epoch": 1.8251801810647867,
      "grad_norm": 0.5033779740333557,
      "learning_rate": 6.391478818947966e-06,
      "loss": 0.0211,
      "step": 1115280
    },
    {
      "epoch": 1.82521291150344,
      "grad_norm": 0.4111505150794983,
      "learning_rate": 6.391412926734448e-06,
      "loss": 0.0286,
      "step": 1115300
    },
    {
      "epoch": 1.8252456419420933,
      "grad_norm": 0.4132658541202545,
      "learning_rate": 6.3913470345209315e-06,
      "loss": 0.0182,
      "step": 1115320
    },
    {
      "epoch": 1.8252783723807466,
      "grad_norm": 0.7217640280723572,
      "learning_rate": 6.391281142307414e-06,
      "loss": 0.0099,
      "step": 1115340
    },
    {
      "epoch": 1.8253111028194,
      "grad_norm": 0.6980408430099487,
      "learning_rate": 6.391215250093897e-06,
      "loss": 0.023,
      "step": 1115360
    },
    {
      "epoch": 1.8253438332580534,
      "grad_norm": 1.2704459428787231,
      "learning_rate": 6.39114935788038e-06,
      "loss": 0.0253,
      "step": 1115380
    },
    {
      "epoch": 1.8253765636967065,
      "grad_norm": 0.19373083114624023,
      "learning_rate": 6.391083465666863e-06,
      "loss": 0.0159,
      "step": 1115400
    },
    {
      "epoch": 1.8254092941353601,
      "grad_norm": 1.017976999282837,
      "learning_rate": 6.391017573453345e-06,
      "loss": 0.0206,
      "step": 1115420
    },
    {
      "epoch": 1.8254420245740133,
      "grad_norm": 0.7064029574394226,
      "learning_rate": 6.390951681239829e-06,
      "loss": 0.0183,
      "step": 1115440
    },
    {
      "epoch": 1.8254747550126666,
      "grad_norm": 0.18592099845409393,
      "learning_rate": 6.390885789026311e-06,
      "loss": 0.026,
      "step": 1115460
    },
    {
      "epoch": 1.82550748545132,
      "grad_norm": 1.1613359451293945,
      "learning_rate": 6.390819896812794e-06,
      "loss": 0.0274,
      "step": 1115480
    },
    {
      "epoch": 1.8255402158899734,
      "grad_norm": 0.11408254504203796,
      "learning_rate": 6.390754004599277e-06,
      "loss": 0.0226,
      "step": 1115500
    },
    {
      "epoch": 1.8255729463286268,
      "grad_norm": 0.37947511672973633,
      "learning_rate": 6.39068811238576e-06,
      "loss": 0.0157,
      "step": 1115520
    },
    {
      "epoch": 1.82560567676728,
      "grad_norm": 0.695533037185669,
      "learning_rate": 6.3906222201722424e-06,
      "loss": 0.0197,
      "step": 1115540
    },
    {
      "epoch": 1.8256384072059335,
      "grad_norm": 0.3657677173614502,
      "learning_rate": 6.390556327958726e-06,
      "loss": 0.0227,
      "step": 1115560
    },
    {
      "epoch": 1.8256711376445867,
      "grad_norm": 0.19326433539390564,
      "learning_rate": 6.390490435745208e-06,
      "loss": 0.0183,
      "step": 1115580
    },
    {
      "epoch": 1.82570386808324,
      "grad_norm": 0.5432508587837219,
      "learning_rate": 6.3904245435316915e-06,
      "loss": 0.0171,
      "step": 1115600
    },
    {
      "epoch": 1.8257365985218934,
      "grad_norm": 0.2569287419319153,
      "learning_rate": 6.390358651318175e-06,
      "loss": 0.015,
      "step": 1115620
    },
    {
      "epoch": 1.8257693289605468,
      "grad_norm": 0.5898647904396057,
      "learning_rate": 6.390292759104657e-06,
      "loss": 0.0177,
      "step": 1115640
    },
    {
      "epoch": 1.8258020593992002,
      "grad_norm": 0.1534578800201416,
      "learning_rate": 6.3902268668911406e-06,
      "loss": 0.012,
      "step": 1115660
    },
    {
      "epoch": 1.8258347898378533,
      "grad_norm": 0.5406786799430847,
      "learning_rate": 6.3901609746776225e-06,
      "loss": 0.0258,
      "step": 1115680
    },
    {
      "epoch": 1.825867520276507,
      "grad_norm": 0.1497058868408203,
      "learning_rate": 6.390095082464106e-06,
      "loss": 0.0169,
      "step": 1115700
    },
    {
      "epoch": 1.82590025071516,
      "grad_norm": 0.1271154135465622,
      "learning_rate": 6.390029190250589e-06,
      "loss": 0.0139,
      "step": 1115720
    },
    {
      "epoch": 1.8259329811538134,
      "grad_norm": 0.22055087983608246,
      "learning_rate": 6.3899632980370715e-06,
      "loss": 0.0182,
      "step": 1115740
    },
    {
      "epoch": 1.8259657115924668,
      "grad_norm": 0.35981202125549316,
      "learning_rate": 6.389897405823554e-06,
      "loss": 0.0174,
      "step": 1115760
    },
    {
      "epoch": 1.82599844203112,
      "grad_norm": 0.13306738436222076,
      "learning_rate": 6.389831513610038e-06,
      "loss": 0.0169,
      "step": 1115780
    },
    {
      "epoch": 1.8260311724697735,
      "grad_norm": 0.41316479444503784,
      "learning_rate": 6.38976562139652e-06,
      "loss": 0.0165,
      "step": 1115800
    },
    {
      "epoch": 1.8260639029084267,
      "grad_norm": 0.14545086026191711,
      "learning_rate": 6.389699729183003e-06,
      "loss": 0.0133,
      "step": 1115820
    },
    {
      "epoch": 1.8260966333470803,
      "grad_norm": 0.6638427972793579,
      "learning_rate": 6.389633836969485e-06,
      "loss": 0.0306,
      "step": 1115840
    },
    {
      "epoch": 1.8261293637857334,
      "grad_norm": 0.31980404257774353,
      "learning_rate": 6.389567944755969e-06,
      "loss": 0.0127,
      "step": 1115860
    },
    {
      "epoch": 1.8261620942243868,
      "grad_norm": 3.466015338897705,
      "learning_rate": 6.3895020525424516e-06,
      "loss": 0.0252,
      "step": 1115880
    },
    {
      "epoch": 1.8261948246630402,
      "grad_norm": 2.6494851112365723,
      "learning_rate": 6.389436160328934e-06,
      "loss": 0.0174,
      "step": 1115900
    },
    {
      "epoch": 1.8262275551016933,
      "grad_norm": 1.6765507459640503,
      "learning_rate": 6.389370268115417e-06,
      "loss": 0.0185,
      "step": 1115920
    },
    {
      "epoch": 1.826260285540347,
      "grad_norm": 0.46526727080345154,
      "learning_rate": 6.389304375901901e-06,
      "loss": 0.0144,
      "step": 1115940
    },
    {
      "epoch": 1.826293015979,
      "grad_norm": 1.7286158800125122,
      "learning_rate": 6.389238483688383e-06,
      "loss": 0.0149,
      "step": 1115960
    },
    {
      "epoch": 1.8263257464176534,
      "grad_norm": 0.6138122081756592,
      "learning_rate": 6.389172591474866e-06,
      "loss": 0.0151,
      "step": 1115980
    },
    {
      "epoch": 1.8263584768563068,
      "grad_norm": 0.6217402815818787,
      "learning_rate": 6.38910669926135e-06,
      "loss": 0.016,
      "step": 1116000
    },
    {
      "epoch": 1.8263912072949602,
      "grad_norm": 0.30559754371643066,
      "learning_rate": 6.389040807047832e-06,
      "loss": 0.0224,
      "step": 1116020
    },
    {
      "epoch": 1.8264239377336136,
      "grad_norm": 0.7651811838150024,
      "learning_rate": 6.388974914834315e-06,
      "loss": 0.0164,
      "step": 1116040
    },
    {
      "epoch": 1.8264566681722667,
      "grad_norm": 2.0649354457855225,
      "learning_rate": 6.388909022620797e-06,
      "loss": 0.0224,
      "step": 1116060
    },
    {
      "epoch": 1.8264893986109203,
      "grad_norm": 0.7991088628768921,
      "learning_rate": 6.388843130407281e-06,
      "loss": 0.0219,
      "step": 1116080
    },
    {
      "epoch": 1.8265221290495735,
      "grad_norm": 0.2574867606163025,
      "learning_rate": 6.3887772381937625e-06,
      "loss": 0.0106,
      "step": 1116100
    },
    {
      "epoch": 1.8265548594882268,
      "grad_norm": 0.8821592926979065,
      "learning_rate": 6.388711345980246e-06,
      "loss": 0.0139,
      "step": 1116120
    },
    {
      "epoch": 1.8265875899268802,
      "grad_norm": 0.7746049761772156,
      "learning_rate": 6.388645453766729e-06,
      "loss": 0.019,
      "step": 1116140
    },
    {
      "epoch": 1.8266203203655336,
      "grad_norm": 0.10350126773118973,
      "learning_rate": 6.388579561553212e-06,
      "loss": 0.02,
      "step": 1116160
    },
    {
      "epoch": 1.826653050804187,
      "grad_norm": 0.38885363936424255,
      "learning_rate": 6.388513669339694e-06,
      "loss": 0.0192,
      "step": 1116180
    },
    {
      "epoch": 1.82668578124284,
      "grad_norm": 0.505633533000946,
      "learning_rate": 6.388447777126178e-06,
      "loss": 0.0127,
      "step": 1116200
    },
    {
      "epoch": 1.8267185116814937,
      "grad_norm": 0.36095669865608215,
      "learning_rate": 6.38838188491266e-06,
      "loss": 0.0152,
      "step": 1116220
    },
    {
      "epoch": 1.8267512421201468,
      "grad_norm": 1.0622615814208984,
      "learning_rate": 6.388315992699143e-06,
      "loss": 0.0223,
      "step": 1116240
    },
    {
      "epoch": 1.8267839725588002,
      "grad_norm": 0.5355604290962219,
      "learning_rate": 6.388250100485625e-06,
      "loss": 0.021,
      "step": 1116260
    },
    {
      "epoch": 1.8268167029974536,
      "grad_norm": 0.3563956916332245,
      "learning_rate": 6.388184208272109e-06,
      "loss": 0.0158,
      "step": 1116280
    },
    {
      "epoch": 1.826849433436107,
      "grad_norm": 0.2710445523262024,
      "learning_rate": 6.388118316058592e-06,
      "loss": 0.0226,
      "step": 1116300
    },
    {
      "epoch": 1.8268821638747603,
      "grad_norm": 1.207693099975586,
      "learning_rate": 6.388052423845074e-06,
      "loss": 0.0164,
      "step": 1116320
    },
    {
      "epoch": 1.8269148943134135,
      "grad_norm": 0.2562509775161743,
      "learning_rate": 6.387986531631558e-06,
      "loss": 0.0225,
      "step": 1116340
    },
    {
      "epoch": 1.826947624752067,
      "grad_norm": 0.5218042731285095,
      "learning_rate": 6.387920639418041e-06,
      "loss": 0.031,
      "step": 1116360
    },
    {
      "epoch": 1.8269803551907202,
      "grad_norm": 1.104778528213501,
      "learning_rate": 6.3878547472045234e-06,
      "loss": 0.02,
      "step": 1116380
    },
    {
      "epoch": 1.8270130856293736,
      "grad_norm": 0.8023556470870972,
      "learning_rate": 6.387788854991006e-06,
      "loss": 0.0177,
      "step": 1116400
    },
    {
      "epoch": 1.827045816068027,
      "grad_norm": 0.18425828218460083,
      "learning_rate": 6.38772296277749e-06,
      "loss": 0.0157,
      "step": 1116420
    },
    {
      "epoch": 1.8270785465066803,
      "grad_norm": 1.0608460903167725,
      "learning_rate": 6.387657070563972e-06,
      "loss": 0.0214,
      "step": 1116440
    },
    {
      "epoch": 1.8271112769453337,
      "grad_norm": 2.282764196395874,
      "learning_rate": 6.387591178350455e-06,
      "loss": 0.0217,
      "step": 1116460
    },
    {
      "epoch": 1.8271440073839869,
      "grad_norm": 0.12373141199350357,
      "learning_rate": 6.387525286136937e-06,
      "loss": 0.0168,
      "step": 1116480
    },
    {
      "epoch": 1.8271767378226405,
      "grad_norm": 0.16088557243347168,
      "learning_rate": 6.387459393923421e-06,
      "loss": 0.014,
      "step": 1116500
    },
    {
      "epoch": 1.8272094682612936,
      "grad_norm": 0.7213588953018188,
      "learning_rate": 6.3873935017099035e-06,
      "loss": 0.011,
      "step": 1116520
    },
    {
      "epoch": 1.827242198699947,
      "grad_norm": 0.5428468585014343,
      "learning_rate": 6.387327609496386e-06,
      "loss": 0.0187,
      "step": 1116540
    },
    {
      "epoch": 1.8272749291386003,
      "grad_norm": 0.5214377045631409,
      "learning_rate": 6.387261717282869e-06,
      "loss": 0.0186,
      "step": 1116560
    },
    {
      "epoch": 1.8273076595772535,
      "grad_norm": 0.609736979007721,
      "learning_rate": 6.3871958250693525e-06,
      "loss": 0.0223,
      "step": 1116580
    },
    {
      "epoch": 1.827340390015907,
      "grad_norm": 0.4913659989833832,
      "learning_rate": 6.3871299328558344e-06,
      "loss": 0.0238,
      "step": 1116600
    },
    {
      "epoch": 1.8273731204545602,
      "grad_norm": 0.5192713141441345,
      "learning_rate": 6.387064040642318e-06,
      "loss": 0.0245,
      "step": 1116620
    },
    {
      "epoch": 1.8274058508932138,
      "grad_norm": 0.08379501849412918,
      "learning_rate": 6.3869981484288e-06,
      "loss": 0.0205,
      "step": 1116640
    },
    {
      "epoch": 1.827438581331867,
      "grad_norm": 0.6072304248809814,
      "learning_rate": 6.3869322562152835e-06,
      "loss": 0.0206,
      "step": 1116660
    },
    {
      "epoch": 1.8274713117705204,
      "grad_norm": 0.37119194865226746,
      "learning_rate": 6.386866364001767e-06,
      "loss": 0.0217,
      "step": 1116680
    },
    {
      "epoch": 1.8275040422091737,
      "grad_norm": 0.17466522753238678,
      "learning_rate": 6.386800471788249e-06,
      "loss": 0.016,
      "step": 1116700
    },
    {
      "epoch": 1.8275367726478269,
      "grad_norm": 0.62681645154953,
      "learning_rate": 6.3867345795747326e-06,
      "loss": 0.0194,
      "step": 1116720
    },
    {
      "epoch": 1.8275695030864805,
      "grad_norm": 0.2827449440956116,
      "learning_rate": 6.386668687361215e-06,
      "loss": 0.0181,
      "step": 1116740
    },
    {
      "epoch": 1.8276022335251336,
      "grad_norm": 0.49801087379455566,
      "learning_rate": 6.386602795147698e-06,
      "loss": 0.013,
      "step": 1116760
    },
    {
      "epoch": 1.827634963963787,
      "grad_norm": 0.23288877308368683,
      "learning_rate": 6.386536902934181e-06,
      "loss": 0.0148,
      "step": 1116780
    },
    {
      "epoch": 1.8276676944024404,
      "grad_norm": 0.21321158111095428,
      "learning_rate": 6.386471010720664e-06,
      "loss": 0.0187,
      "step": 1116800
    },
    {
      "epoch": 1.8277004248410937,
      "grad_norm": 0.8972122073173523,
      "learning_rate": 6.386405118507146e-06,
      "loss": 0.0218,
      "step": 1116820
    },
    {
      "epoch": 1.8277331552797471,
      "grad_norm": 1.0982239246368408,
      "learning_rate": 6.38633922629363e-06,
      "loss": 0.0152,
      "step": 1116840
    },
    {
      "epoch": 1.8277658857184003,
      "grad_norm": 1.615718126296997,
      "learning_rate": 6.386273334080112e-06,
      "loss": 0.0125,
      "step": 1116860
    },
    {
      "epoch": 1.8277986161570539,
      "grad_norm": 0.8748733997344971,
      "learning_rate": 6.386207441866595e-06,
      "loss": 0.0156,
      "step": 1116880
    },
    {
      "epoch": 1.827831346595707,
      "grad_norm": 0.971397340297699,
      "learning_rate": 6.386141549653078e-06,
      "loss": 0.0132,
      "step": 1116900
    },
    {
      "epoch": 1.8278640770343604,
      "grad_norm": 0.9810870885848999,
      "learning_rate": 6.386075657439561e-06,
      "loss": 0.0212,
      "step": 1116920
    },
    {
      "epoch": 1.8278968074730138,
      "grad_norm": 0.6702203750610352,
      "learning_rate": 6.3860097652260435e-06,
      "loss": 0.0175,
      "step": 1116940
    },
    {
      "epoch": 1.8279295379116671,
      "grad_norm": 1.547039270401001,
      "learning_rate": 6.385943873012527e-06,
      "loss": 0.0213,
      "step": 1116960
    },
    {
      "epoch": 1.8279622683503205,
      "grad_norm": 0.6667112708091736,
      "learning_rate": 6.385877980799009e-06,
      "loss": 0.0184,
      "step": 1116980
    },
    {
      "epoch": 1.8279949987889736,
      "grad_norm": 1.0704576969146729,
      "learning_rate": 6.385812088585493e-06,
      "loss": 0.0193,
      "step": 1117000
    },
    {
      "epoch": 1.8280277292276272,
      "grad_norm": 0.5761677622795105,
      "learning_rate": 6.385746196371976e-06,
      "loss": 0.0175,
      "step": 1117020
    },
    {
      "epoch": 1.8280604596662804,
      "grad_norm": 0.420283704996109,
      "learning_rate": 6.385680304158458e-06,
      "loss": 0.0151,
      "step": 1117040
    },
    {
      "epoch": 1.8280931901049338,
      "grad_norm": 0.3965984880924225,
      "learning_rate": 6.385614411944942e-06,
      "loss": 0.0162,
      "step": 1117060
    },
    {
      "epoch": 1.8281259205435871,
      "grad_norm": 0.3080204129219055,
      "learning_rate": 6.3855485197314236e-06,
      "loss": 0.0176,
      "step": 1117080
    },
    {
      "epoch": 1.8281586509822405,
      "grad_norm": 0.8344411849975586,
      "learning_rate": 6.385482627517907e-06,
      "loss": 0.0201,
      "step": 1117100
    },
    {
      "epoch": 1.8281913814208939,
      "grad_norm": 0.5795542001724243,
      "learning_rate": 6.385416735304389e-06,
      "loss": 0.0129,
      "step": 1117120
    },
    {
      "epoch": 1.828224111859547,
      "grad_norm": 0.39963439106941223,
      "learning_rate": 6.385350843090873e-06,
      "loss": 0.0197,
      "step": 1117140
    },
    {
      "epoch": 1.8282568422982006,
      "grad_norm": 0.30164656043052673,
      "learning_rate": 6.385284950877355e-06,
      "loss": 0.0204,
      "step": 1117160
    },
    {
      "epoch": 1.8282895727368538,
      "grad_norm": 0.23508882522583008,
      "learning_rate": 6.385219058663838e-06,
      "loss": 0.0262,
      "step": 1117180
    },
    {
      "epoch": 1.8283223031755071,
      "grad_norm": 0.13652649521827698,
      "learning_rate": 6.385153166450321e-06,
      "loss": 0.0172,
      "step": 1117200
    },
    {
      "epoch": 1.8283550336141605,
      "grad_norm": 0.4259376525878906,
      "learning_rate": 6.3850872742368044e-06,
      "loss": 0.0153,
      "step": 1117220
    },
    {
      "epoch": 1.828387764052814,
      "grad_norm": 0.2620936632156372,
      "learning_rate": 6.385021382023286e-06,
      "loss": 0.0202,
      "step": 1117240
    },
    {
      "epoch": 1.8284204944914673,
      "grad_norm": 0.6278592944145203,
      "learning_rate": 6.38495548980977e-06,
      "loss": 0.0144,
      "step": 1117260
    },
    {
      "epoch": 1.8284532249301204,
      "grad_norm": 0.34523752331733704,
      "learning_rate": 6.384889597596252e-06,
      "loss": 0.0185,
      "step": 1117280
    },
    {
      "epoch": 1.828485955368774,
      "grad_norm": 0.5717586874961853,
      "learning_rate": 6.384823705382735e-06,
      "loss": 0.0186,
      "step": 1117300
    },
    {
      "epoch": 1.8285186858074272,
      "grad_norm": 0.22088590264320374,
      "learning_rate": 6.384757813169218e-06,
      "loss": 0.0185,
      "step": 1117320
    },
    {
      "epoch": 1.8285514162460805,
      "grad_norm": 0.11245560646057129,
      "learning_rate": 6.384691920955701e-06,
      "loss": 0.0182,
      "step": 1117340
    },
    {
      "epoch": 1.828584146684734,
      "grad_norm": 0.6288512945175171,
      "learning_rate": 6.384626028742184e-06,
      "loss": 0.0208,
      "step": 1117360
    },
    {
      "epoch": 1.828616877123387,
      "grad_norm": 0.2747337520122528,
      "learning_rate": 6.384560136528667e-06,
      "loss": 0.0164,
      "step": 1117380
    },
    {
      "epoch": 1.8286496075620406,
      "grad_norm": 0.1481507122516632,
      "learning_rate": 6.38449424431515e-06,
      "loss": 0.0196,
      "step": 1117400
    },
    {
      "epoch": 1.8286823380006938,
      "grad_norm": 0.5560766458511353,
      "learning_rate": 6.384428352101633e-06,
      "loss": 0.022,
      "step": 1117420
    },
    {
      "epoch": 1.8287150684393472,
      "grad_norm": 0.7439082860946655,
      "learning_rate": 6.384362459888116e-06,
      "loss": 0.0163,
      "step": 1117440
    },
    {
      "epoch": 1.8287477988780005,
      "grad_norm": 0.7959098815917969,
      "learning_rate": 6.384296567674598e-06,
      "loss": 0.0265,
      "step": 1117460
    },
    {
      "epoch": 1.828780529316654,
      "grad_norm": 0.5791351199150085,
      "learning_rate": 6.384230675461082e-06,
      "loss": 0.0175,
      "step": 1117480
    },
    {
      "epoch": 1.8288132597553073,
      "grad_norm": 0.9548202157020569,
      "learning_rate": 6.384164783247564e-06,
      "loss": 0.0137,
      "step": 1117500
    },
    {
      "epoch": 1.8288459901939604,
      "grad_norm": 0.4903741776943207,
      "learning_rate": 6.384098891034047e-06,
      "loss": 0.0205,
      "step": 1117520
    },
    {
      "epoch": 1.828878720632614,
      "grad_norm": 0.6990675926208496,
      "learning_rate": 6.38403299882053e-06,
      "loss": 0.0179,
      "step": 1117540
    },
    {
      "epoch": 1.8289114510712672,
      "grad_norm": 0.6058134436607361,
      "learning_rate": 6.383967106607013e-06,
      "loss": 0.0185,
      "step": 1117560
    },
    {
      "epoch": 1.8289441815099206,
      "grad_norm": 0.738949716091156,
      "learning_rate": 6.3839012143934954e-06,
      "loss": 0.0222,
      "step": 1117580
    },
    {
      "epoch": 1.828976911948574,
      "grad_norm": 0.12315250188112259,
      "learning_rate": 6.383835322179979e-06,
      "loss": 0.0158,
      "step": 1117600
    },
    {
      "epoch": 1.8290096423872273,
      "grad_norm": 0.8661600351333618,
      "learning_rate": 6.383769429966461e-06,
      "loss": 0.0172,
      "step": 1117620
    },
    {
      "epoch": 1.8290423728258807,
      "grad_norm": 0.8627276420593262,
      "learning_rate": 6.3837035377529445e-06,
      "loss": 0.0123,
      "step": 1117640
    },
    {
      "epoch": 1.8290751032645338,
      "grad_norm": 0.5611705183982849,
      "learning_rate": 6.383637645539426e-06,
      "loss": 0.0153,
      "step": 1117660
    },
    {
      "epoch": 1.8291078337031874,
      "grad_norm": 1.003867745399475,
      "learning_rate": 6.38357175332591e-06,
      "loss": 0.0207,
      "step": 1117680
    },
    {
      "epoch": 1.8291405641418406,
      "grad_norm": 0.46019411087036133,
      "learning_rate": 6.383505861112393e-06,
      "loss": 0.0209,
      "step": 1117700
    },
    {
      "epoch": 1.829173294580494,
      "grad_norm": 0.564659595489502,
      "learning_rate": 6.3834399688988755e-06,
      "loss": 0.0169,
      "step": 1117720
    },
    {
      "epoch": 1.8292060250191473,
      "grad_norm": 0.47713106870651245,
      "learning_rate": 6.383374076685359e-06,
      "loss": 0.0253,
      "step": 1117740
    },
    {
      "epoch": 1.8292387554578007,
      "grad_norm": 2.219832181930542,
      "learning_rate": 6.383308184471842e-06,
      "loss": 0.0247,
      "step": 1117760
    },
    {
      "epoch": 1.829271485896454,
      "grad_norm": 0.48839494585990906,
      "learning_rate": 6.3832422922583245e-06,
      "loss": 0.0172,
      "step": 1117780
    },
    {
      "epoch": 1.8293042163351072,
      "grad_norm": 0.5428368449211121,
      "learning_rate": 6.383176400044807e-06,
      "loss": 0.0206,
      "step": 1117800
    },
    {
      "epoch": 1.8293369467737608,
      "grad_norm": 1.231155276298523,
      "learning_rate": 6.383110507831291e-06,
      "loss": 0.0195,
      "step": 1117820
    },
    {
      "epoch": 1.829369677212414,
      "grad_norm": 0.7218905091285706,
      "learning_rate": 6.383044615617773e-06,
      "loss": 0.0137,
      "step": 1117840
    },
    {
      "epoch": 1.8294024076510673,
      "grad_norm": 0.12072008848190308,
      "learning_rate": 6.382978723404256e-06,
      "loss": 0.0141,
      "step": 1117860
    },
    {
      "epoch": 1.8294351380897207,
      "grad_norm": 0.10903678834438324,
      "learning_rate": 6.382912831190738e-06,
      "loss": 0.0117,
      "step": 1117880
    },
    {
      "epoch": 1.829467868528374,
      "grad_norm": 0.6374529004096985,
      "learning_rate": 6.382846938977222e-06,
      "loss": 0.0128,
      "step": 1117900
    },
    {
      "epoch": 1.8295005989670274,
      "grad_norm": 0.7822726368904114,
      "learning_rate": 6.382781046763704e-06,
      "loss": 0.0184,
      "step": 1117920
    },
    {
      "epoch": 1.8295333294056806,
      "grad_norm": 1.8886288404464722,
      "learning_rate": 6.382715154550187e-06,
      "loss": 0.016,
      "step": 1117940
    },
    {
      "epoch": 1.8295660598443342,
      "grad_norm": 0.5150263905525208,
      "learning_rate": 6.38264926233667e-06,
      "loss": 0.0175,
      "step": 1117960
    },
    {
      "epoch": 1.8295987902829873,
      "grad_norm": 0.544187605381012,
      "learning_rate": 6.382583370123154e-06,
      "loss": 0.0137,
      "step": 1117980
    },
    {
      "epoch": 1.8296315207216407,
      "grad_norm": 0.245242178440094,
      "learning_rate": 6.3825174779096355e-06,
      "loss": 0.0183,
      "step": 1118000
    },
    {
      "epoch": 1.829664251160294,
      "grad_norm": 0.6333398222923279,
      "learning_rate": 6.382451585696119e-06,
      "loss": 0.0188,
      "step": 1118020
    },
    {
      "epoch": 1.8296969815989472,
      "grad_norm": 0.3415493667125702,
      "learning_rate": 6.382385693482601e-06,
      "loss": 0.0149,
      "step": 1118040
    },
    {
      "epoch": 1.8297297120376008,
      "grad_norm": 0.43583977222442627,
      "learning_rate": 6.382319801269085e-06,
      "loss": 0.0211,
      "step": 1118060
    },
    {
      "epoch": 1.829762442476254,
      "grad_norm": 0.25552263855934143,
      "learning_rate": 6.382253909055568e-06,
      "loss": 0.0154,
      "step": 1118080
    },
    {
      "epoch": 1.8297951729149076,
      "grad_norm": 0.14546000957489014,
      "learning_rate": 6.38218801684205e-06,
      "loss": 0.0141,
      "step": 1118100
    },
    {
      "epoch": 1.8298279033535607,
      "grad_norm": 0.6484404802322388,
      "learning_rate": 6.382122124628534e-06,
      "loss": 0.0131,
      "step": 1118120
    },
    {
      "epoch": 1.829860633792214,
      "grad_norm": 0.3649018406867981,
      "learning_rate": 6.3820562324150155e-06,
      "loss": 0.0174,
      "step": 1118140
    },
    {
      "epoch": 1.8298933642308675,
      "grad_norm": 0.44536301493644714,
      "learning_rate": 6.381990340201499e-06,
      "loss": 0.013,
      "step": 1118160
    },
    {
      "epoch": 1.8299260946695206,
      "grad_norm": 0.626133382320404,
      "learning_rate": 6.381924447987982e-06,
      "loss": 0.0151,
      "step": 1118180
    },
    {
      "epoch": 1.8299588251081742,
      "grad_norm": 0.3689190149307251,
      "learning_rate": 6.381858555774465e-06,
      "loss": 0.0187,
      "step": 1118200
    },
    {
      "epoch": 1.8299915555468274,
      "grad_norm": 0.10242778062820435,
      "learning_rate": 6.381792663560947e-06,
      "loss": 0.0214,
      "step": 1118220
    },
    {
      "epoch": 1.8300242859854807,
      "grad_norm": 0.1499902307987213,
      "learning_rate": 6.381726771347431e-06,
      "loss": 0.0137,
      "step": 1118240
    },
    {
      "epoch": 1.830057016424134,
      "grad_norm": 0.33179742097854614,
      "learning_rate": 6.381660879133913e-06,
      "loss": 0.0152,
      "step": 1118260
    },
    {
      "epoch": 1.8300897468627875,
      "grad_norm": 0.14079752564430237,
      "learning_rate": 6.381594986920396e-06,
      "loss": 0.0276,
      "step": 1118280
    },
    {
      "epoch": 1.8301224773014408,
      "grad_norm": 0.3297469913959503,
      "learning_rate": 6.381529094706878e-06,
      "loss": 0.0208,
      "step": 1118300
    },
    {
      "epoch": 1.830155207740094,
      "grad_norm": 0.5467894673347473,
      "learning_rate": 6.381463202493362e-06,
      "loss": 0.0152,
      "step": 1118320
    },
    {
      "epoch": 1.8301879381787476,
      "grad_norm": 0.2649083435535431,
      "learning_rate": 6.381397310279845e-06,
      "loss": 0.0123,
      "step": 1118340
    },
    {
      "epoch": 1.8302206686174007,
      "grad_norm": 0.6966809630393982,
      "learning_rate": 6.381331418066327e-06,
      "loss": 0.0201,
      "step": 1118360
    },
    {
      "epoch": 1.830253399056054,
      "grad_norm": 0.36736035346984863,
      "learning_rate": 6.38126552585281e-06,
      "loss": 0.0185,
      "step": 1118380
    },
    {
      "epoch": 1.8302861294947075,
      "grad_norm": 0.9269031286239624,
      "learning_rate": 6.381199633639294e-06,
      "loss": 0.0103,
      "step": 1118400
    },
    {
      "epoch": 1.8303188599333609,
      "grad_norm": 0.34168219566345215,
      "learning_rate": 6.381133741425776e-06,
      "loss": 0.0198,
      "step": 1118420
    },
    {
      "epoch": 1.8303515903720142,
      "grad_norm": 0.36734539270401,
      "learning_rate": 6.381067849212259e-06,
      "loss": 0.0163,
      "step": 1118440
    },
    {
      "epoch": 1.8303843208106674,
      "grad_norm": 1.1076836585998535,
      "learning_rate": 6.381001956998743e-06,
      "loss": 0.0157,
      "step": 1118460
    },
    {
      "epoch": 1.830417051249321,
      "grad_norm": 0.5544065833091736,
      "learning_rate": 6.380936064785225e-06,
      "loss": 0.0281,
      "step": 1118480
    },
    {
      "epoch": 1.8304497816879741,
      "grad_norm": 1.8767509460449219,
      "learning_rate": 6.380870172571708e-06,
      "loss": 0.0133,
      "step": 1118500
    },
    {
      "epoch": 1.8304825121266275,
      "grad_norm": 0.5045934319496155,
      "learning_rate": 6.38080428035819e-06,
      "loss": 0.0236,
      "step": 1118520
    },
    {
      "epoch": 1.8305152425652809,
      "grad_norm": 0.15686435997486115,
      "learning_rate": 6.380738388144674e-06,
      "loss": 0.0197,
      "step": 1118540
    },
    {
      "epoch": 1.8305479730039342,
      "grad_norm": 0.16062982380390167,
      "learning_rate": 6.3806724959311565e-06,
      "loss": 0.023,
      "step": 1118560
    },
    {
      "epoch": 1.8305807034425876,
      "grad_norm": 0.6016315221786499,
      "learning_rate": 6.380606603717639e-06,
      "loss": 0.0215,
      "step": 1118580
    },
    {
      "epoch": 1.8306134338812408,
      "grad_norm": 0.5582244992256165,
      "learning_rate": 6.380540711504122e-06,
      "loss": 0.0235,
      "step": 1118600
    },
    {
      "epoch": 1.8306461643198944,
      "grad_norm": 0.43126818537712097,
      "learning_rate": 6.3804748192906055e-06,
      "loss": 0.0179,
      "step": 1118620
    },
    {
      "epoch": 1.8306788947585475,
      "grad_norm": 0.4334913194179535,
      "learning_rate": 6.3804089270770874e-06,
      "loss": 0.0179,
      "step": 1118640
    },
    {
      "epoch": 1.8307116251972009,
      "grad_norm": 0.5452485680580139,
      "learning_rate": 6.380343034863571e-06,
      "loss": 0.0215,
      "step": 1118660
    },
    {
      "epoch": 1.8307443556358542,
      "grad_norm": 0.8790184855461121,
      "learning_rate": 6.380277142650053e-06,
      "loss": 0.0368,
      "step": 1118680
    },
    {
      "epoch": 1.8307770860745076,
      "grad_norm": 0.2009686529636383,
      "learning_rate": 6.3802112504365365e-06,
      "loss": 0.0279,
      "step": 1118700
    },
    {
      "epoch": 1.830809816513161,
      "grad_norm": 1.47557532787323,
      "learning_rate": 6.380145358223019e-06,
      "loss": 0.022,
      "step": 1118720
    },
    {
      "epoch": 1.8308425469518141,
      "grad_norm": 0.714841902256012,
      "learning_rate": 6.380079466009502e-06,
      "loss": 0.0239,
      "step": 1118740
    },
    {
      "epoch": 1.8308752773904677,
      "grad_norm": 0.7065799236297607,
      "learning_rate": 6.380013573795985e-06,
      "loss": 0.0181,
      "step": 1118760
    },
    {
      "epoch": 1.8309080078291209,
      "grad_norm": 0.6018840670585632,
      "learning_rate": 6.379947681582468e-06,
      "loss": 0.0132,
      "step": 1118780
    },
    {
      "epoch": 1.8309407382677743,
      "grad_norm": 0.7493141293525696,
      "learning_rate": 6.379881789368951e-06,
      "loss": 0.0166,
      "step": 1118800
    },
    {
      "epoch": 1.8309734687064276,
      "grad_norm": 0.7331552505493164,
      "learning_rate": 6.379815897155434e-06,
      "loss": 0.0162,
      "step": 1118820
    },
    {
      "epoch": 1.8310061991450808,
      "grad_norm": 0.32009726762771606,
      "learning_rate": 6.379750004941917e-06,
      "loss": 0.0197,
      "step": 1118840
    },
    {
      "epoch": 1.8310389295837344,
      "grad_norm": 0.18097756803035736,
      "learning_rate": 6.379684112728399e-06,
      "loss": 0.0156,
      "step": 1118860
    },
    {
      "epoch": 1.8310716600223875,
      "grad_norm": 1.26064133644104,
      "learning_rate": 6.379618220514883e-06,
      "loss": 0.0267,
      "step": 1118880
    },
    {
      "epoch": 1.8311043904610411,
      "grad_norm": 0.49241843819618225,
      "learning_rate": 6.379552328301365e-06,
      "loss": 0.0188,
      "step": 1118900
    },
    {
      "epoch": 1.8311371208996943,
      "grad_norm": 0.30797797441482544,
      "learning_rate": 6.379486436087848e-06,
      "loss": 0.0178,
      "step": 1118920
    },
    {
      "epoch": 1.8311698513383476,
      "grad_norm": 0.5324105024337769,
      "learning_rate": 6.37942054387433e-06,
      "loss": 0.0165,
      "step": 1118940
    },
    {
      "epoch": 1.831202581777001,
      "grad_norm": 1.721256136894226,
      "learning_rate": 6.379354651660814e-06,
      "loss": 0.0121,
      "step": 1118960
    },
    {
      "epoch": 1.8312353122156542,
      "grad_norm": 0.17900222539901733,
      "learning_rate": 6.3792887594472965e-06,
      "loss": 0.0213,
      "step": 1118980
    },
    {
      "epoch": 1.8312680426543078,
      "grad_norm": 0.3800615072250366,
      "learning_rate": 6.379222867233779e-06,
      "loss": 0.0228,
      "step": 1119000
    },
    {
      "epoch": 1.831300773092961,
      "grad_norm": 1.1909465789794922,
      "learning_rate": 6.379156975020262e-06,
      "loss": 0.02,
      "step": 1119020
    },
    {
      "epoch": 1.8313335035316143,
      "grad_norm": 0.6578608155250549,
      "learning_rate": 6.379091082806746e-06,
      "loss": 0.0199,
      "step": 1119040
    },
    {
      "epoch": 1.8313662339702677,
      "grad_norm": 0.30734848976135254,
      "learning_rate": 6.3790251905932275e-06,
      "loss": 0.0155,
      "step": 1119060
    },
    {
      "epoch": 1.831398964408921,
      "grad_norm": 0.6703370213508606,
      "learning_rate": 6.378959298379711e-06,
      "loss": 0.0123,
      "step": 1119080
    },
    {
      "epoch": 1.8314316948475744,
      "grad_norm": 0.07456987351179123,
      "learning_rate": 6.378893406166193e-06,
      "loss": 0.0167,
      "step": 1119100
    },
    {
      "epoch": 1.8314644252862275,
      "grad_norm": 0.6453279256820679,
      "learning_rate": 6.3788275139526766e-06,
      "loss": 0.0141,
      "step": 1119120
    },
    {
      "epoch": 1.8314971557248811,
      "grad_norm": 1.2586166858673096,
      "learning_rate": 6.37876162173916e-06,
      "loss": 0.0152,
      "step": 1119140
    },
    {
      "epoch": 1.8315298861635343,
      "grad_norm": 0.9114313721656799,
      "learning_rate": 6.378695729525642e-06,
      "loss": 0.0202,
      "step": 1119160
    },
    {
      "epoch": 1.8315626166021877,
      "grad_norm": 0.20750251412391663,
      "learning_rate": 6.378629837312126e-06,
      "loss": 0.0208,
      "step": 1119180
    },
    {
      "epoch": 1.831595347040841,
      "grad_norm": 0.3938957154750824,
      "learning_rate": 6.378563945098608e-06,
      "loss": 0.0174,
      "step": 1119200
    },
    {
      "epoch": 1.8316280774794944,
      "grad_norm": 0.5599414110183716,
      "learning_rate": 6.378498052885091e-06,
      "loss": 0.014,
      "step": 1119220
    },
    {
      "epoch": 1.8316608079181478,
      "grad_norm": 0.5104630589485168,
      "learning_rate": 6.378432160671574e-06,
      "loss": 0.0164,
      "step": 1119240
    },
    {
      "epoch": 1.831693538356801,
      "grad_norm": 0.723097026348114,
      "learning_rate": 6.3783662684580574e-06,
      "loss": 0.0191,
      "step": 1119260
    },
    {
      "epoch": 1.8317262687954545,
      "grad_norm": 0.8069360852241516,
      "learning_rate": 6.378300376244539e-06,
      "loss": 0.0175,
      "step": 1119280
    },
    {
      "epoch": 1.8317589992341077,
      "grad_norm": 0.24066667258739471,
      "learning_rate": 6.378234484031023e-06,
      "loss": 0.0228,
      "step": 1119300
    },
    {
      "epoch": 1.831791729672761,
      "grad_norm": 0.36603307723999023,
      "learning_rate": 6.378168591817505e-06,
      "loss": 0.0148,
      "step": 1119320
    },
    {
      "epoch": 1.8318244601114144,
      "grad_norm": 0.15448959171772003,
      "learning_rate": 6.378102699603988e-06,
      "loss": 0.0173,
      "step": 1119340
    },
    {
      "epoch": 1.8318571905500678,
      "grad_norm": 0.2535279393196106,
      "learning_rate": 6.378036807390471e-06,
      "loss": 0.0165,
      "step": 1119360
    },
    {
      "epoch": 1.8318899209887212,
      "grad_norm": 0.6915388107299805,
      "learning_rate": 6.377970915176954e-06,
      "loss": 0.0251,
      "step": 1119380
    },
    {
      "epoch": 1.8319226514273743,
      "grad_norm": 0.20316071808338165,
      "learning_rate": 6.377905022963437e-06,
      "loss": 0.0168,
      "step": 1119400
    },
    {
      "epoch": 1.831955381866028,
      "grad_norm": 0.30130112171173096,
      "learning_rate": 6.37783913074992e-06,
      "loss": 0.0154,
      "step": 1119420
    },
    {
      "epoch": 1.831988112304681,
      "grad_norm": 0.8369547128677368,
      "learning_rate": 6.377773238536402e-06,
      "loss": 0.0153,
      "step": 1119440
    },
    {
      "epoch": 1.8320208427433344,
      "grad_norm": 0.2134028822183609,
      "learning_rate": 6.377707346322886e-06,
      "loss": 0.021,
      "step": 1119460
    },
    {
      "epoch": 1.8320535731819878,
      "grad_norm": 0.3346302807331085,
      "learning_rate": 6.377641454109369e-06,
      "loss": 0.0219,
      "step": 1119480
    },
    {
      "epoch": 1.8320863036206412,
      "grad_norm": 1.322463870048523,
      "learning_rate": 6.377575561895851e-06,
      "loss": 0.0276,
      "step": 1119500
    },
    {
      "epoch": 1.8321190340592945,
      "grad_norm": 0.13405705988407135,
      "learning_rate": 6.377509669682335e-06,
      "loss": 0.0093,
      "step": 1119520
    },
    {
      "epoch": 1.8321517644979477,
      "grad_norm": 0.4916971027851105,
      "learning_rate": 6.377443777468817e-06,
      "loss": 0.0146,
      "step": 1119540
    },
    {
      "epoch": 1.8321844949366013,
      "grad_norm": 0.25649967789649963,
      "learning_rate": 6.3773778852553e-06,
      "loss": 0.0183,
      "step": 1119560
    },
    {
      "epoch": 1.8322172253752544,
      "grad_norm": 0.4089905917644501,
      "learning_rate": 6.377311993041783e-06,
      "loss": 0.0231,
      "step": 1119580
    },
    {
      "epoch": 1.8322499558139078,
      "grad_norm": 0.3765101730823517,
      "learning_rate": 6.377246100828266e-06,
      "loss": 0.0243,
      "step": 1119600
    },
    {
      "epoch": 1.8322826862525612,
      "grad_norm": 0.4482796788215637,
      "learning_rate": 6.3771802086147485e-06,
      "loss": 0.0162,
      "step": 1119620
    },
    {
      "epoch": 1.8323154166912143,
      "grad_norm": 0.4549352526664734,
      "learning_rate": 6.377114316401232e-06,
      "loss": 0.0129,
      "step": 1119640
    },
    {
      "epoch": 1.832348147129868,
      "grad_norm": 1.0492124557495117,
      "learning_rate": 6.377048424187714e-06,
      "loss": 0.0192,
      "step": 1119660
    },
    {
      "epoch": 1.832380877568521,
      "grad_norm": 0.80712890625,
      "learning_rate": 6.3769825319741975e-06,
      "loss": 0.0289,
      "step": 1119680
    },
    {
      "epoch": 1.8324136080071747,
      "grad_norm": 0.8076155185699463,
      "learning_rate": 6.376916639760679e-06,
      "loss": 0.0247,
      "step": 1119700
    },
    {
      "epoch": 1.8324463384458278,
      "grad_norm": 0.4211311340332031,
      "learning_rate": 6.376850747547163e-06,
      "loss": 0.0198,
      "step": 1119720
    },
    {
      "epoch": 1.8324790688844812,
      "grad_norm": 0.26972976326942444,
      "learning_rate": 6.376784855333646e-06,
      "loss": 0.0176,
      "step": 1119740
    },
    {
      "epoch": 1.8325117993231346,
      "grad_norm": 0.6892540454864502,
      "learning_rate": 6.3767189631201285e-06,
      "loss": 0.022,
      "step": 1119760
    },
    {
      "epoch": 1.8325445297617877,
      "grad_norm": 0.11394671350717545,
      "learning_rate": 6.376653070906611e-06,
      "loss": 0.0191,
      "step": 1119780
    },
    {
      "epoch": 1.8325772602004413,
      "grad_norm": 0.13611546158790588,
      "learning_rate": 6.376587178693095e-06,
      "loss": 0.0135,
      "step": 1119800
    },
    {
      "epoch": 1.8326099906390945,
      "grad_norm": 0.25454065203666687,
      "learning_rate": 6.376521286479577e-06,
      "loss": 0.0191,
      "step": 1119820
    },
    {
      "epoch": 1.8326427210777478,
      "grad_norm": 0.0780809074640274,
      "learning_rate": 6.37645539426606e-06,
      "loss": 0.0274,
      "step": 1119840
    },
    {
      "epoch": 1.8326754515164012,
      "grad_norm": 0.2931600511074066,
      "learning_rate": 6.376389502052544e-06,
      "loss": 0.0123,
      "step": 1119860
    },
    {
      "epoch": 1.8327081819550546,
      "grad_norm": 0.5432161092758179,
      "learning_rate": 6.376323609839026e-06,
      "loss": 0.0176,
      "step": 1119880
    },
    {
      "epoch": 1.832740912393708,
      "grad_norm": 0.18051663041114807,
      "learning_rate": 6.376257717625509e-06,
      "loss": 0.0241,
      "step": 1119900
    },
    {
      "epoch": 1.832773642832361,
      "grad_norm": 0.30855146050453186,
      "learning_rate": 6.376191825411991e-06,
      "loss": 0.0207,
      "step": 1119920
    },
    {
      "epoch": 1.8328063732710147,
      "grad_norm": 2.1475026607513428,
      "learning_rate": 6.376125933198475e-06,
      "loss": 0.0136,
      "step": 1119940
    },
    {
      "epoch": 1.8328391037096678,
      "grad_norm": 0.1531592458486557,
      "learning_rate": 6.376060040984957e-06,
      "loss": 0.0146,
      "step": 1119960
    },
    {
      "epoch": 1.8328718341483212,
      "grad_norm": 0.2727610766887665,
      "learning_rate": 6.37599414877144e-06,
      "loss": 0.0173,
      "step": 1119980
    },
    {
      "epoch": 1.8329045645869746,
      "grad_norm": 0.8370193839073181,
      "learning_rate": 6.375928256557923e-06,
      "loss": 0.014,
      "step": 1120000
    },
    {
      "epoch": 1.832937295025628,
      "grad_norm": 0.2618166208267212,
      "learning_rate": 6.375862364344406e-06,
      "loss": 0.0202,
      "step": 1120020
    },
    {
      "epoch": 1.8329700254642813,
      "grad_norm": 0.25874754786491394,
      "learning_rate": 6.3757964721308885e-06,
      "loss": 0.0215,
      "step": 1120040
    },
    {
      "epoch": 1.8330027559029345,
      "grad_norm": 0.8039510846138,
      "learning_rate": 6.375730579917372e-06,
      "loss": 0.0164,
      "step": 1120060
    },
    {
      "epoch": 1.833035486341588,
      "grad_norm": 1.1471874713897705,
      "learning_rate": 6.375664687703854e-06,
      "loss": 0.0278,
      "step": 1120080
    },
    {
      "epoch": 1.8330682167802412,
      "grad_norm": 0.28371137380599976,
      "learning_rate": 6.375598795490338e-06,
      "loss": 0.0218,
      "step": 1120100
    },
    {
      "epoch": 1.8331009472188946,
      "grad_norm": 0.39340734481811523,
      "learning_rate": 6.3755329032768195e-06,
      "loss": 0.0099,
      "step": 1120120
    },
    {
      "epoch": 1.833133677657548,
      "grad_norm": 0.16907137632369995,
      "learning_rate": 6.375467011063303e-06,
      "loss": 0.0155,
      "step": 1120140
    },
    {
      "epoch": 1.8331664080962013,
      "grad_norm": 1.02881920337677,
      "learning_rate": 6.375401118849786e-06,
      "loss": 0.0191,
      "step": 1120160
    },
    {
      "epoch": 1.8331991385348547,
      "grad_norm": 0.9714810848236084,
      "learning_rate": 6.3753352266362686e-06,
      "loss": 0.0164,
      "step": 1120180
    },
    {
      "epoch": 1.8332318689735079,
      "grad_norm": 0.86835777759552,
      "learning_rate": 6.375269334422752e-06,
      "loss": 0.0181,
      "step": 1120200
    },
    {
      "epoch": 1.8332645994121615,
      "grad_norm": 0.22889344394207,
      "learning_rate": 6.375203442209235e-06,
      "loss": 0.0198,
      "step": 1120220
    },
    {
      "epoch": 1.8332973298508146,
      "grad_norm": 0.6516262888908386,
      "learning_rate": 6.375137549995718e-06,
      "loss": 0.0172,
      "step": 1120240
    },
    {
      "epoch": 1.833330060289468,
      "grad_norm": 0.16051073372364044,
      "learning_rate": 6.3750716577822e-06,
      "loss": 0.014,
      "step": 1120260
    },
    {
      "epoch": 1.8333627907281214,
      "grad_norm": 0.46323058009147644,
      "learning_rate": 6.375005765568684e-06,
      "loss": 0.02,
      "step": 1120280
    },
    {
      "epoch": 1.8333955211667745,
      "grad_norm": 0.1802673041820526,
      "learning_rate": 6.374939873355166e-06,
      "loss": 0.0179,
      "step": 1120300
    },
    {
      "epoch": 1.833428251605428,
      "grad_norm": 0.4489186704158783,
      "learning_rate": 6.3748739811416494e-06,
      "loss": 0.017,
      "step": 1120320
    },
    {
      "epoch": 1.8334609820440813,
      "grad_norm": 0.336786150932312,
      "learning_rate": 6.374808088928131e-06,
      "loss": 0.0188,
      "step": 1120340
    },
    {
      "epoch": 1.8334937124827348,
      "grad_norm": 0.3081545829772949,
      "learning_rate": 6.374742196714615e-06,
      "loss": 0.0146,
      "step": 1120360
    },
    {
      "epoch": 1.833526442921388,
      "grad_norm": 0.7575868368148804,
      "learning_rate": 6.374676304501098e-06,
      "loss": 0.021,
      "step": 1120380
    },
    {
      "epoch": 1.8335591733600414,
      "grad_norm": 0.5219550132751465,
      "learning_rate": 6.37461041228758e-06,
      "loss": 0.012,
      "step": 1120400
    },
    {
      "epoch": 1.8335919037986947,
      "grad_norm": 0.242371067404747,
      "learning_rate": 6.374544520074063e-06,
      "loss": 0.0182,
      "step": 1120420
    },
    {
      "epoch": 1.833624634237348,
      "grad_norm": 1.0621299743652344,
      "learning_rate": 6.374478627860547e-06,
      "loss": 0.0189,
      "step": 1120440
    },
    {
      "epoch": 1.8336573646760015,
      "grad_norm": 0.43223193287849426,
      "learning_rate": 6.374412735647029e-06,
      "loss": 0.0183,
      "step": 1120460
    },
    {
      "epoch": 1.8336900951146546,
      "grad_norm": 0.5192256569862366,
      "learning_rate": 6.374346843433512e-06,
      "loss": 0.0243,
      "step": 1120480
    },
    {
      "epoch": 1.833722825553308,
      "grad_norm": 0.2970787286758423,
      "learning_rate": 6.374280951219994e-06,
      "loss": 0.0183,
      "step": 1120500
    },
    {
      "epoch": 1.8337555559919614,
      "grad_norm": 0.803335428237915,
      "learning_rate": 6.374215059006478e-06,
      "loss": 0.0245,
      "step": 1120520
    },
    {
      "epoch": 1.8337882864306148,
      "grad_norm": 0.3493022918701172,
      "learning_rate": 6.374149166792961e-06,
      "loss": 0.0166,
      "step": 1120540
    },
    {
      "epoch": 1.8338210168692681,
      "grad_norm": 0.3002450168132782,
      "learning_rate": 6.374083274579443e-06,
      "loss": 0.0159,
      "step": 1120560
    },
    {
      "epoch": 1.8338537473079213,
      "grad_norm": 0.11964154243469238,
      "learning_rate": 6.374017382365927e-06,
      "loss": 0.0154,
      "step": 1120580
    },
    {
      "epoch": 1.8338864777465749,
      "grad_norm": 1.1109139919281006,
      "learning_rate": 6.3739514901524095e-06,
      "loss": 0.0168,
      "step": 1120600
    },
    {
      "epoch": 1.833919208185228,
      "grad_norm": 0.18962711095809937,
      "learning_rate": 6.373885597938892e-06,
      "loss": 0.0176,
      "step": 1120620
    },
    {
      "epoch": 1.8339519386238814,
      "grad_norm": 0.7858668565750122,
      "learning_rate": 6.373819705725375e-06,
      "loss": 0.0163,
      "step": 1120640
    },
    {
      "epoch": 1.8339846690625348,
      "grad_norm": 0.08446463197469711,
      "learning_rate": 6.3737538135118585e-06,
      "loss": 0.0204,
      "step": 1120660
    },
    {
      "epoch": 1.8340173995011881,
      "grad_norm": 0.21569004654884338,
      "learning_rate": 6.3736879212983404e-06,
      "loss": 0.0207,
      "step": 1120680
    },
    {
      "epoch": 1.8340501299398415,
      "grad_norm": 1.3981292247772217,
      "learning_rate": 6.373622029084824e-06,
      "loss": 0.0177,
      "step": 1120700
    },
    {
      "epoch": 1.8340828603784947,
      "grad_norm": 0.14605681598186493,
      "learning_rate": 6.373556136871306e-06,
      "loss": 0.0209,
      "step": 1120720
    },
    {
      "epoch": 1.8341155908171483,
      "grad_norm": 0.09995521605014801,
      "learning_rate": 6.3734902446577895e-06,
      "loss": 0.0158,
      "step": 1120740
    },
    {
      "epoch": 1.8341483212558014,
      "grad_norm": 0.43478721380233765,
      "learning_rate": 6.373424352444272e-06,
      "loss": 0.0231,
      "step": 1120760
    },
    {
      "epoch": 1.8341810516944548,
      "grad_norm": 0.4832969009876251,
      "learning_rate": 6.373358460230755e-06,
      "loss": 0.0157,
      "step": 1120780
    },
    {
      "epoch": 1.8342137821331082,
      "grad_norm": 0.3899479806423187,
      "learning_rate": 6.373292568017238e-06,
      "loss": 0.018,
      "step": 1120800
    },
    {
      "epoch": 1.8342465125717615,
      "grad_norm": 0.24919112026691437,
      "learning_rate": 6.373226675803721e-06,
      "loss": 0.0135,
      "step": 1120820
    },
    {
      "epoch": 1.834279243010415,
      "grad_norm": 0.3239886462688446,
      "learning_rate": 6.373160783590203e-06,
      "loss": 0.0234,
      "step": 1120840
    },
    {
      "epoch": 1.834311973449068,
      "grad_norm": 0.8276765942573547,
      "learning_rate": 6.373094891376687e-06,
      "loss": 0.0242,
      "step": 1120860
    },
    {
      "epoch": 1.8343447038877216,
      "grad_norm": 0.36201754212379456,
      "learning_rate": 6.373028999163169e-06,
      "loss": 0.0085,
      "step": 1120880
    },
    {
      "epoch": 1.8343774343263748,
      "grad_norm": 1.0457433462142944,
      "learning_rate": 6.372963106949652e-06,
      "loss": 0.019,
      "step": 1120900
    },
    {
      "epoch": 1.8344101647650282,
      "grad_norm": 0.436545729637146,
      "learning_rate": 6.372897214736136e-06,
      "loss": 0.0197,
      "step": 1120920
    },
    {
      "epoch": 1.8344428952036815,
      "grad_norm": 0.481038898229599,
      "learning_rate": 6.372831322522618e-06,
      "loss": 0.015,
      "step": 1120940
    },
    {
      "epoch": 1.834475625642335,
      "grad_norm": 0.27265477180480957,
      "learning_rate": 6.372765430309101e-06,
      "loss": 0.0141,
      "step": 1120960
    },
    {
      "epoch": 1.8345083560809883,
      "grad_norm": 0.5441858768463135,
      "learning_rate": 6.372699538095583e-06,
      "loss": 0.0192,
      "step": 1120980
    },
    {
      "epoch": 1.8345410865196414,
      "grad_norm": 1.6445733308792114,
      "learning_rate": 6.372633645882067e-06,
      "loss": 0.0222,
      "step": 1121000
    },
    {
      "epoch": 1.834573816958295,
      "grad_norm": 0.6484279632568359,
      "learning_rate": 6.3725677536685496e-06,
      "loss": 0.0291,
      "step": 1121020
    },
    {
      "epoch": 1.8346065473969482,
      "grad_norm": 0.394692063331604,
      "learning_rate": 6.372501861455032e-06,
      "loss": 0.0171,
      "step": 1121040
    },
    {
      "epoch": 1.8346392778356015,
      "grad_norm": 0.5727494955062866,
      "learning_rate": 6.372435969241515e-06,
      "loss": 0.0146,
      "step": 1121060
    },
    {
      "epoch": 1.834672008274255,
      "grad_norm": 0.48519811034202576,
      "learning_rate": 6.372370077027999e-06,
      "loss": 0.0251,
      "step": 1121080
    },
    {
      "epoch": 1.834704738712908,
      "grad_norm": 0.4061174988746643,
      "learning_rate": 6.3723041848144805e-06,
      "loss": 0.0152,
      "step": 1121100
    },
    {
      "epoch": 1.8347374691515617,
      "grad_norm": 0.2328050434589386,
      "learning_rate": 6.372238292600964e-06,
      "loss": 0.0179,
      "step": 1121120
    },
    {
      "epoch": 1.8347701995902148,
      "grad_norm": 1.0604373216629028,
      "learning_rate": 6.372172400387446e-06,
      "loss": 0.0162,
      "step": 1121140
    },
    {
      "epoch": 1.8348029300288684,
      "grad_norm": 0.16871681809425354,
      "learning_rate": 6.37210650817393e-06,
      "loss": 0.0166,
      "step": 1121160
    },
    {
      "epoch": 1.8348356604675216,
      "grad_norm": 1.0554198026657104,
      "learning_rate": 6.372040615960412e-06,
      "loss": 0.0195,
      "step": 1121180
    },
    {
      "epoch": 1.834868390906175,
      "grad_norm": 0.25312376022338867,
      "learning_rate": 6.371974723746895e-06,
      "loss": 0.016,
      "step": 1121200
    },
    {
      "epoch": 1.8349011213448283,
      "grad_norm": 0.2218884527683258,
      "learning_rate": 6.371908831533378e-06,
      "loss": 0.0243,
      "step": 1121220
    },
    {
      "epoch": 1.8349338517834815,
      "grad_norm": 0.6394246220588684,
      "learning_rate": 6.371842939319861e-06,
      "loss": 0.0239,
      "step": 1121240
    },
    {
      "epoch": 1.834966582222135,
      "grad_norm": 1.1600282192230225,
      "learning_rate": 6.371777047106344e-06,
      "loss": 0.0221,
      "step": 1121260
    },
    {
      "epoch": 1.8349993126607882,
      "grad_norm": 0.38891905546188354,
      "learning_rate": 6.371711154892827e-06,
      "loss": 0.0194,
      "step": 1121280
    },
    {
      "epoch": 1.8350320430994416,
      "grad_norm": 0.5963310599327087,
      "learning_rate": 6.3716452626793105e-06,
      "loss": 0.0248,
      "step": 1121300
    },
    {
      "epoch": 1.835064773538095,
      "grad_norm": 0.29656872153282166,
      "learning_rate": 6.371579370465792e-06,
      "loss": 0.0185,
      "step": 1121320
    },
    {
      "epoch": 1.8350975039767483,
      "grad_norm": 0.6047425866127014,
      "learning_rate": 6.371513478252276e-06,
      "loss": 0.015,
      "step": 1121340
    },
    {
      "epoch": 1.8351302344154017,
      "grad_norm": 0.21960194408893585,
      "learning_rate": 6.371447586038758e-06,
      "loss": 0.0212,
      "step": 1121360
    },
    {
      "epoch": 1.8351629648540548,
      "grad_norm": 0.6486682891845703,
      "learning_rate": 6.371381693825241e-06,
      "loss": 0.0107,
      "step": 1121380
    },
    {
      "epoch": 1.8351956952927084,
      "grad_norm": 0.5996423363685608,
      "learning_rate": 6.371315801611724e-06,
      "loss": 0.0156,
      "step": 1121400
    },
    {
      "epoch": 1.8352284257313616,
      "grad_norm": 0.4942001700401306,
      "learning_rate": 6.371249909398207e-06,
      "loss": 0.013,
      "step": 1121420
    },
    {
      "epoch": 1.835261156170015,
      "grad_norm": 0.7936222553253174,
      "learning_rate": 6.37118401718469e-06,
      "loss": 0.0259,
      "step": 1121440
    },
    {
      "epoch": 1.8352938866086683,
      "grad_norm": 1.807953119277954,
      "learning_rate": 6.371118124971173e-06,
      "loss": 0.0206,
      "step": 1121460
    },
    {
      "epoch": 1.8353266170473217,
      "grad_norm": 0.2734530568122864,
      "learning_rate": 6.371052232757655e-06,
      "loss": 0.0153,
      "step": 1121480
    },
    {
      "epoch": 1.835359347485975,
      "grad_norm": 1.1580078601837158,
      "learning_rate": 6.370986340544139e-06,
      "loss": 0.0227,
      "step": 1121500
    },
    {
      "epoch": 1.8353920779246282,
      "grad_norm": 0.9536592960357666,
      "learning_rate": 6.370920448330621e-06,
      "loss": 0.0222,
      "step": 1121520
    },
    {
      "epoch": 1.8354248083632818,
      "grad_norm": 0.9250829219818115,
      "learning_rate": 6.370854556117104e-06,
      "loss": 0.0221,
      "step": 1121540
    },
    {
      "epoch": 1.835457538801935,
      "grad_norm": 0.13393399119377136,
      "learning_rate": 6.370788663903587e-06,
      "loss": 0.0092,
      "step": 1121560
    },
    {
      "epoch": 1.8354902692405883,
      "grad_norm": 0.8735666871070862,
      "learning_rate": 6.37072277169007e-06,
      "loss": 0.0207,
      "step": 1121580
    },
    {
      "epoch": 1.8355229996792417,
      "grad_norm": 0.4301600158214569,
      "learning_rate": 6.370656879476553e-06,
      "loss": 0.0189,
      "step": 1121600
    },
    {
      "epoch": 1.835555730117895,
      "grad_norm": 0.20475932955741882,
      "learning_rate": 6.370590987263036e-06,
      "loss": 0.0217,
      "step": 1121620
    },
    {
      "epoch": 1.8355884605565485,
      "grad_norm": 0.21034693717956543,
      "learning_rate": 6.370525095049519e-06,
      "loss": 0.0277,
      "step": 1121640
    },
    {
      "epoch": 1.8356211909952016,
      "grad_norm": 0.20197239518165588,
      "learning_rate": 6.3704592028360015e-06,
      "loss": 0.014,
      "step": 1121660
    },
    {
      "epoch": 1.8356539214338552,
      "grad_norm": 0.33224594593048096,
      "learning_rate": 6.370393310622485e-06,
      "loss": 0.0183,
      "step": 1121680
    },
    {
      "epoch": 1.8356866518725083,
      "grad_norm": 0.3060159385204315,
      "learning_rate": 6.370327418408967e-06,
      "loss": 0.0141,
      "step": 1121700
    },
    {
      "epoch": 1.8357193823111617,
      "grad_norm": 0.17326036095619202,
      "learning_rate": 6.3702615261954505e-06,
      "loss": 0.0148,
      "step": 1121720
    },
    {
      "epoch": 1.835752112749815,
      "grad_norm": 0.33811211585998535,
      "learning_rate": 6.370195633981932e-06,
      "loss": 0.0164,
      "step": 1121740
    },
    {
      "epoch": 1.8357848431884685,
      "grad_norm": 0.13467738032341003,
      "learning_rate": 6.370129741768416e-06,
      "loss": 0.0122,
      "step": 1121760
    },
    {
      "epoch": 1.8358175736271218,
      "grad_norm": 0.5489765405654907,
      "learning_rate": 6.370063849554898e-06,
      "loss": 0.0177,
      "step": 1121780
    },
    {
      "epoch": 1.835850304065775,
      "grad_norm": 0.480781614780426,
      "learning_rate": 6.3699979573413815e-06,
      "loss": 0.0219,
      "step": 1121800
    },
    {
      "epoch": 1.8358830345044286,
      "grad_norm": 0.20341874659061432,
      "learning_rate": 6.369932065127864e-06,
      "loss": 0.0099,
      "step": 1121820
    },
    {
      "epoch": 1.8359157649430817,
      "grad_norm": 0.48437315225601196,
      "learning_rate": 6.369866172914348e-06,
      "loss": 0.0135,
      "step": 1121840
    },
    {
      "epoch": 1.835948495381735,
      "grad_norm": 0.5861943960189819,
      "learning_rate": 6.36980028070083e-06,
      "loss": 0.017,
      "step": 1121860
    },
    {
      "epoch": 1.8359812258203885,
      "grad_norm": 0.5280289053916931,
      "learning_rate": 6.369734388487313e-06,
      "loss": 0.0204,
      "step": 1121880
    },
    {
      "epoch": 1.8360139562590416,
      "grad_norm": 0.28902560472488403,
      "learning_rate": 6.369668496273795e-06,
      "loss": 0.0217,
      "step": 1121900
    },
    {
      "epoch": 1.8360466866976952,
      "grad_norm": 0.8567232489585876,
      "learning_rate": 6.369602604060279e-06,
      "loss": 0.0196,
      "step": 1121920
    },
    {
      "epoch": 1.8360794171363484,
      "grad_norm": 0.49438056349754333,
      "learning_rate": 6.369536711846762e-06,
      "loss": 0.0243,
      "step": 1121940
    },
    {
      "epoch": 1.836112147575002,
      "grad_norm": 0.4397905170917511,
      "learning_rate": 6.369470819633244e-06,
      "loss": 0.0203,
      "step": 1121960
    },
    {
      "epoch": 1.8361448780136551,
      "grad_norm": 0.5251007080078125,
      "learning_rate": 6.369404927419728e-06,
      "loss": 0.0188,
      "step": 1121980
    },
    {
      "epoch": 1.8361776084523085,
      "grad_norm": 0.13524624705314636,
      "learning_rate": 6.36933903520621e-06,
      "loss": 0.0265,
      "step": 1122000
    },
    {
      "epoch": 1.8362103388909619,
      "grad_norm": 0.31542181968688965,
      "learning_rate": 6.369273142992693e-06,
      "loss": 0.0155,
      "step": 1122020
    },
    {
      "epoch": 1.836243069329615,
      "grad_norm": 0.285663366317749,
      "learning_rate": 6.369207250779176e-06,
      "loss": 0.0121,
      "step": 1122040
    },
    {
      "epoch": 1.8362757997682686,
      "grad_norm": 0.6223841309547424,
      "learning_rate": 6.369141358565659e-06,
      "loss": 0.0207,
      "step": 1122060
    },
    {
      "epoch": 1.8363085302069218,
      "grad_norm": 0.28848135471343994,
      "learning_rate": 6.3690754663521415e-06,
      "loss": 0.0171,
      "step": 1122080
    },
    {
      "epoch": 1.8363412606455751,
      "grad_norm": 0.2842372953891754,
      "learning_rate": 6.369009574138625e-06,
      "loss": 0.0154,
      "step": 1122100
    },
    {
      "epoch": 1.8363739910842285,
      "grad_norm": 0.7682511210441589,
      "learning_rate": 6.368943681925107e-06,
      "loss": 0.0187,
      "step": 1122120
    },
    {
      "epoch": 1.8364067215228819,
      "grad_norm": 0.377620130777359,
      "learning_rate": 6.368877789711591e-06,
      "loss": 0.0166,
      "step": 1122140
    },
    {
      "epoch": 1.8364394519615352,
      "grad_norm": 1.2335381507873535,
      "learning_rate": 6.3688118974980725e-06,
      "loss": 0.0181,
      "step": 1122160
    },
    {
      "epoch": 1.8364721824001884,
      "grad_norm": 1.2422465085983276,
      "learning_rate": 6.368746005284556e-06,
      "loss": 0.0202,
      "step": 1122180
    },
    {
      "epoch": 1.836504912838842,
      "grad_norm": 1.5337550640106201,
      "learning_rate": 6.368680113071039e-06,
      "loss": 0.0191,
      "step": 1122200
    },
    {
      "epoch": 1.8365376432774951,
      "grad_norm": 2.321478843688965,
      "learning_rate": 6.3686142208575216e-06,
      "loss": 0.021,
      "step": 1122220
    },
    {
      "epoch": 1.8365703737161485,
      "grad_norm": 0.6167763471603394,
      "learning_rate": 6.368548328644004e-06,
      "loss": 0.0243,
      "step": 1122240
    },
    {
      "epoch": 1.8366031041548019,
      "grad_norm": 0.7505013346672058,
      "learning_rate": 6.368482436430488e-06,
      "loss": 0.0136,
      "step": 1122260
    },
    {
      "epoch": 1.8366358345934553,
      "grad_norm": 0.4931045174598694,
      "learning_rate": 6.36841654421697e-06,
      "loss": 0.0212,
      "step": 1122280
    },
    {
      "epoch": 1.8366685650321086,
      "grad_norm": 0.34633079171180725,
      "learning_rate": 6.368350652003453e-06,
      "loss": 0.0215,
      "step": 1122300
    },
    {
      "epoch": 1.8367012954707618,
      "grad_norm": 0.3267675042152405,
      "learning_rate": 6.368284759789937e-06,
      "loss": 0.0116,
      "step": 1122320
    },
    {
      "epoch": 1.8367340259094154,
      "grad_norm": 0.23608775436878204,
      "learning_rate": 6.368218867576419e-06,
      "loss": 0.0127,
      "step": 1122340
    },
    {
      "epoch": 1.8367667563480685,
      "grad_norm": 0.8700177669525146,
      "learning_rate": 6.3681529753629024e-06,
      "loss": 0.0196,
      "step": 1122360
    },
    {
      "epoch": 1.836799486786722,
      "grad_norm": 0.287320613861084,
      "learning_rate": 6.368087083149384e-06,
      "loss": 0.0201,
      "step": 1122380
    },
    {
      "epoch": 1.8368322172253753,
      "grad_norm": 0.761745035648346,
      "learning_rate": 6.368021190935868e-06,
      "loss": 0.0322,
      "step": 1122400
    },
    {
      "epoch": 1.8368649476640286,
      "grad_norm": 0.4299437403678894,
      "learning_rate": 6.367955298722351e-06,
      "loss": 0.0213,
      "step": 1122420
    },
    {
      "epoch": 1.836897678102682,
      "grad_norm": 0.3221159875392914,
      "learning_rate": 6.367889406508833e-06,
      "loss": 0.0202,
      "step": 1122440
    },
    {
      "epoch": 1.8369304085413352,
      "grad_norm": 0.09332064539194107,
      "learning_rate": 6.367823514295316e-06,
      "loss": 0.0222,
      "step": 1122460
    },
    {
      "epoch": 1.8369631389799888,
      "grad_norm": 0.4958477020263672,
      "learning_rate": 6.3677576220818e-06,
      "loss": 0.0146,
      "step": 1122480
    },
    {
      "epoch": 1.836995869418642,
      "grad_norm": 0.16767363250255585,
      "learning_rate": 6.367691729868282e-06,
      "loss": 0.0157,
      "step": 1122500
    },
    {
      "epoch": 1.8370285998572953,
      "grad_norm": 0.10537364333868027,
      "learning_rate": 6.367625837654765e-06,
      "loss": 0.0196,
      "step": 1122520
    },
    {
      "epoch": 1.8370613302959486,
      "grad_norm": 0.48501935601234436,
      "learning_rate": 6.367559945441247e-06,
      "loss": 0.0233,
      "step": 1122540
    },
    {
      "epoch": 1.837094060734602,
      "grad_norm": 0.19235077500343323,
      "learning_rate": 6.367494053227731e-06,
      "loss": 0.0205,
      "step": 1122560
    },
    {
      "epoch": 1.8371267911732554,
      "grad_norm": 0.9036149978637695,
      "learning_rate": 6.367428161014213e-06,
      "loss": 0.0142,
      "step": 1122580
    },
    {
      "epoch": 1.8371595216119085,
      "grad_norm": 0.4205692410469055,
      "learning_rate": 6.367362268800696e-06,
      "loss": 0.0191,
      "step": 1122600
    },
    {
      "epoch": 1.8371922520505621,
      "grad_norm": 0.39429906010627747,
      "learning_rate": 6.367296376587179e-06,
      "loss": 0.0198,
      "step": 1122620
    },
    {
      "epoch": 1.8372249824892153,
      "grad_norm": 0.6174315214157104,
      "learning_rate": 6.3672304843736625e-06,
      "loss": 0.0175,
      "step": 1122640
    },
    {
      "epoch": 1.8372577129278687,
      "grad_norm": 1.0909243822097778,
      "learning_rate": 6.367164592160145e-06,
      "loss": 0.0244,
      "step": 1122660
    },
    {
      "epoch": 1.837290443366522,
      "grad_norm": 0.8261104822158813,
      "learning_rate": 6.367098699946628e-06,
      "loss": 0.018,
      "step": 1122680
    },
    {
      "epoch": 1.8373231738051752,
      "grad_norm": 0.3509311079978943,
      "learning_rate": 6.3670328077331116e-06,
      "loss": 0.0129,
      "step": 1122700
    },
    {
      "epoch": 1.8373559042438288,
      "grad_norm": 0.8742679357528687,
      "learning_rate": 6.3669669155195934e-06,
      "loss": 0.0174,
      "step": 1122720
    },
    {
      "epoch": 1.837388634682482,
      "grad_norm": 0.12213675677776337,
      "learning_rate": 6.366901023306077e-06,
      "loss": 0.0211,
      "step": 1122740
    },
    {
      "epoch": 1.8374213651211355,
      "grad_norm": 0.8618712425231934,
      "learning_rate": 6.366835131092559e-06,
      "loss": 0.0158,
      "step": 1122760
    },
    {
      "epoch": 1.8374540955597887,
      "grad_norm": 0.2329440414905548,
      "learning_rate": 6.3667692388790425e-06,
      "loss": 0.0226,
      "step": 1122780
    },
    {
      "epoch": 1.837486825998442,
      "grad_norm": 0.7449320554733276,
      "learning_rate": 6.366703346665524e-06,
      "loss": 0.0239,
      "step": 1122800
    },
    {
      "epoch": 1.8375195564370954,
      "grad_norm": 0.8935085535049438,
      "learning_rate": 6.366637454452008e-06,
      "loss": 0.022,
      "step": 1122820
    },
    {
      "epoch": 1.8375522868757486,
      "grad_norm": 0.40860143303871155,
      "learning_rate": 6.366571562238491e-06,
      "loss": 0.0189,
      "step": 1122840
    },
    {
      "epoch": 1.8375850173144022,
      "grad_norm": 1.4551992416381836,
      "learning_rate": 6.3665056700249735e-06,
      "loss": 0.024,
      "step": 1122860
    },
    {
      "epoch": 1.8376177477530553,
      "grad_norm": 3.3820226192474365,
      "learning_rate": 6.366439777811456e-06,
      "loss": 0.0222,
      "step": 1122880
    },
    {
      "epoch": 1.8376504781917087,
      "grad_norm": 0.3464028537273407,
      "learning_rate": 6.36637388559794e-06,
      "loss": 0.0145,
      "step": 1122900
    },
    {
      "epoch": 1.837683208630362,
      "grad_norm": 0.9971398711204529,
      "learning_rate": 6.366307993384422e-06,
      "loss": 0.0184,
      "step": 1122920
    },
    {
      "epoch": 1.8377159390690154,
      "grad_norm": 0.9114465713500977,
      "learning_rate": 6.366242101170905e-06,
      "loss": 0.0211,
      "step": 1122940
    },
    {
      "epoch": 1.8377486695076688,
      "grad_norm": 0.9273506999015808,
      "learning_rate": 6.366176208957387e-06,
      "loss": 0.0193,
      "step": 1122960
    },
    {
      "epoch": 1.837781399946322,
      "grad_norm": 1.0401378870010376,
      "learning_rate": 6.366110316743871e-06,
      "loss": 0.022,
      "step": 1122980
    },
    {
      "epoch": 1.8378141303849755,
      "grad_norm": 0.12457062304019928,
      "learning_rate": 6.366044424530354e-06,
      "loss": 0.0189,
      "step": 1123000
    },
    {
      "epoch": 1.8378468608236287,
      "grad_norm": 0.44326505064964294,
      "learning_rate": 6.365978532316836e-06,
      "loss": 0.0329,
      "step": 1123020
    },
    {
      "epoch": 1.837879591262282,
      "grad_norm": 0.35942742228507996,
      "learning_rate": 6.36591264010332e-06,
      "loss": 0.0141,
      "step": 1123040
    },
    {
      "epoch": 1.8379123217009354,
      "grad_norm": 0.6357188820838928,
      "learning_rate": 6.3658467478898026e-06,
      "loss": 0.0169,
      "step": 1123060
    },
    {
      "epoch": 1.8379450521395888,
      "grad_norm": 0.32038581371307373,
      "learning_rate": 6.365780855676285e-06,
      "loss": 0.0162,
      "step": 1123080
    },
    {
      "epoch": 1.8379777825782422,
      "grad_norm": 0.5254125595092773,
      "learning_rate": 6.365714963462768e-06,
      "loss": 0.0222,
      "step": 1123100
    },
    {
      "epoch": 1.8380105130168953,
      "grad_norm": 0.26245585083961487,
      "learning_rate": 6.365649071249252e-06,
      "loss": 0.017,
      "step": 1123120
    },
    {
      "epoch": 1.838043243455549,
      "grad_norm": 0.21592441201210022,
      "learning_rate": 6.3655831790357335e-06,
      "loss": 0.0173,
      "step": 1123140
    },
    {
      "epoch": 1.838075973894202,
      "grad_norm": 0.15684032440185547,
      "learning_rate": 6.365517286822217e-06,
      "loss": 0.0109,
      "step": 1123160
    },
    {
      "epoch": 1.8381087043328554,
      "grad_norm": 0.4385019540786743,
      "learning_rate": 6.365451394608699e-06,
      "loss": 0.0197,
      "step": 1123180
    },
    {
      "epoch": 1.8381414347715088,
      "grad_norm": 0.35887086391448975,
      "learning_rate": 6.365385502395183e-06,
      "loss": 0.0198,
      "step": 1123200
    },
    {
      "epoch": 1.8381741652101622,
      "grad_norm": 0.2826603949069977,
      "learning_rate": 6.365319610181665e-06,
      "loss": 0.013,
      "step": 1123220
    },
    {
      "epoch": 1.8382068956488156,
      "grad_norm": 0.5210837125778198,
      "learning_rate": 6.365253717968148e-06,
      "loss": 0.0169,
      "step": 1123240
    },
    {
      "epoch": 1.8382396260874687,
      "grad_norm": 0.4867599606513977,
      "learning_rate": 6.365187825754631e-06,
      "loss": 0.021,
      "step": 1123260
    },
    {
      "epoch": 1.8382723565261223,
      "grad_norm": 0.4744592607021332,
      "learning_rate": 6.365121933541114e-06,
      "loss": 0.0169,
      "step": 1123280
    },
    {
      "epoch": 1.8383050869647755,
      "grad_norm": 0.3190115988254547,
      "learning_rate": 6.365056041327596e-06,
      "loss": 0.0234,
      "step": 1123300
    },
    {
      "epoch": 1.8383378174034288,
      "grad_norm": 0.4786371886730194,
      "learning_rate": 6.36499014911408e-06,
      "loss": 0.0215,
      "step": 1123320
    },
    {
      "epoch": 1.8383705478420822,
      "grad_norm": 0.06594112515449524,
      "learning_rate": 6.364924256900562e-06,
      "loss": 0.0116,
      "step": 1123340
    },
    {
      "epoch": 1.8384032782807354,
      "grad_norm": 0.7362751960754395,
      "learning_rate": 6.364858364687045e-06,
      "loss": 0.0163,
      "step": 1123360
    },
    {
      "epoch": 1.838436008719389,
      "grad_norm": 0.17161764204502106,
      "learning_rate": 6.364792472473529e-06,
      "loss": 0.0192,
      "step": 1123380
    },
    {
      "epoch": 1.838468739158042,
      "grad_norm": 0.28376340866088867,
      "learning_rate": 6.364726580260011e-06,
      "loss": 0.0124,
      "step": 1123400
    },
    {
      "epoch": 1.8385014695966957,
      "grad_norm": 0.6419649720191956,
      "learning_rate": 6.364660688046494e-06,
      "loss": 0.018,
      "step": 1123420
    },
    {
      "epoch": 1.8385342000353488,
      "grad_norm": 1.3025286197662354,
      "learning_rate": 6.364594795832977e-06,
      "loss": 0.0207,
      "step": 1123440
    },
    {
      "epoch": 1.8385669304740022,
      "grad_norm": 0.9824765920639038,
      "learning_rate": 6.36452890361946e-06,
      "loss": 0.0194,
      "step": 1123460
    },
    {
      "epoch": 1.8385996609126556,
      "grad_norm": 3.613884210586548,
      "learning_rate": 6.364463011405943e-06,
      "loss": 0.0259,
      "step": 1123480
    },
    {
      "epoch": 1.8386323913513087,
      "grad_norm": 0.7174533605575562,
      "learning_rate": 6.364397119192426e-06,
      "loss": 0.0162,
      "step": 1123500
    },
    {
      "epoch": 1.8386651217899623,
      "grad_norm": 1.5494152307510376,
      "learning_rate": 6.364331226978908e-06,
      "loss": 0.018,
      "step": 1123520
    },
    {
      "epoch": 1.8386978522286155,
      "grad_norm": 0.16595356166362762,
      "learning_rate": 6.364265334765392e-06,
      "loss": 0.0229,
      "step": 1123540
    },
    {
      "epoch": 1.8387305826672689,
      "grad_norm": 0.28096917271614075,
      "learning_rate": 6.364199442551874e-06,
      "loss": 0.0209,
      "step": 1123560
    },
    {
      "epoch": 1.8387633131059222,
      "grad_norm": 0.411830335855484,
      "learning_rate": 6.364133550338357e-06,
      "loss": 0.019,
      "step": 1123580
    },
    {
      "epoch": 1.8387960435445756,
      "grad_norm": 0.3328993022441864,
      "learning_rate": 6.36406765812484e-06,
      "loss": 0.0111,
      "step": 1123600
    },
    {
      "epoch": 1.838828773983229,
      "grad_norm": 0.5989677309989929,
      "learning_rate": 6.364001765911323e-06,
      "loss": 0.0234,
      "step": 1123620
    },
    {
      "epoch": 1.8388615044218821,
      "grad_norm": 0.21922001242637634,
      "learning_rate": 6.363935873697805e-06,
      "loss": 0.0197,
      "step": 1123640
    },
    {
      "epoch": 1.8388942348605357,
      "grad_norm": 0.42040735483169556,
      "learning_rate": 6.363869981484289e-06,
      "loss": 0.0151,
      "step": 1123660
    },
    {
      "epoch": 1.8389269652991889,
      "grad_norm": 1.7210278511047363,
      "learning_rate": 6.363804089270771e-06,
      "loss": 0.0231,
      "step": 1123680
    },
    {
      "epoch": 1.8389596957378422,
      "grad_norm": 1.6501933336257935,
      "learning_rate": 6.3637381970572545e-06,
      "loss": 0.0186,
      "step": 1123700
    },
    {
      "epoch": 1.8389924261764956,
      "grad_norm": 0.3901364803314209,
      "learning_rate": 6.363672304843738e-06,
      "loss": 0.0159,
      "step": 1123720
    },
    {
      "epoch": 1.839025156615149,
      "grad_norm": 1.1067864894866943,
      "learning_rate": 6.36360641263022e-06,
      "loss": 0.0202,
      "step": 1123740
    },
    {
      "epoch": 1.8390578870538024,
      "grad_norm": 0.24041323363780975,
      "learning_rate": 6.3635405204167035e-06,
      "loss": 0.0283,
      "step": 1123760
    },
    {
      "epoch": 1.8390906174924555,
      "grad_norm": 0.8198737502098083,
      "learning_rate": 6.3634746282031854e-06,
      "loss": 0.0328,
      "step": 1123780
    },
    {
      "epoch": 1.839123347931109,
      "grad_norm": 0.1340271234512329,
      "learning_rate": 6.363408735989669e-06,
      "loss": 0.0274,
      "step": 1123800
    },
    {
      "epoch": 1.8391560783697622,
      "grad_norm": 0.7412592172622681,
      "learning_rate": 6.363342843776151e-06,
      "loss": 0.0172,
      "step": 1123820
    },
    {
      "epoch": 1.8391888088084156,
      "grad_norm": 0.3279831111431122,
      "learning_rate": 6.3632769515626345e-06,
      "loss": 0.0153,
      "step": 1123840
    },
    {
      "epoch": 1.839221539247069,
      "grad_norm": 0.1693340241909027,
      "learning_rate": 6.363211059349117e-06,
      "loss": 0.0145,
      "step": 1123860
    },
    {
      "epoch": 1.8392542696857224,
      "grad_norm": 0.37438786029815674,
      "learning_rate": 6.3631451671356e-06,
      "loss": 0.0207,
      "step": 1123880
    },
    {
      "epoch": 1.8392870001243757,
      "grad_norm": 0.24397434294223785,
      "learning_rate": 6.363079274922083e-06,
      "loss": 0.0126,
      "step": 1123900
    },
    {
      "epoch": 1.8393197305630289,
      "grad_norm": 0.15384891629219055,
      "learning_rate": 6.363013382708566e-06,
      "loss": 0.0207,
      "step": 1123920
    },
    {
      "epoch": 1.8393524610016825,
      "grad_norm": 1.5225032567977905,
      "learning_rate": 6.362947490495048e-06,
      "loss": 0.0156,
      "step": 1123940
    },
    {
      "epoch": 1.8393851914403356,
      "grad_norm": 0.5088614821434021,
      "learning_rate": 6.362881598281532e-06,
      "loss": 0.0259,
      "step": 1123960
    },
    {
      "epoch": 1.839417921878989,
      "grad_norm": 0.5458850264549255,
      "learning_rate": 6.362815706068014e-06,
      "loss": 0.0177,
      "step": 1123980
    },
    {
      "epoch": 1.8394506523176424,
      "grad_norm": 1.0211589336395264,
      "learning_rate": 6.362749813854497e-06,
      "loss": 0.018,
      "step": 1124000
    },
    {
      "epoch": 1.8394833827562957,
      "grad_norm": 1.0971852540969849,
      "learning_rate": 6.36268392164098e-06,
      "loss": 0.0189,
      "step": 1124020
    },
    {
      "epoch": 1.8395161131949491,
      "grad_norm": 0.8093122243881226,
      "learning_rate": 6.362618029427463e-06,
      "loss": 0.0232,
      "step": 1124040
    },
    {
      "epoch": 1.8395488436336023,
      "grad_norm": 0.284225195646286,
      "learning_rate": 6.362552137213946e-06,
      "loss": 0.0184,
      "step": 1124060
    },
    {
      "epoch": 1.8395815740722559,
      "grad_norm": 0.717263400554657,
      "learning_rate": 6.362486245000429e-06,
      "loss": 0.0188,
      "step": 1124080
    },
    {
      "epoch": 1.839614304510909,
      "grad_norm": 0.2609972357749939,
      "learning_rate": 6.362420352786912e-06,
      "loss": 0.0229,
      "step": 1124100
    },
    {
      "epoch": 1.8396470349495624,
      "grad_norm": 0.4374949634075165,
      "learning_rate": 6.3623544605733945e-06,
      "loss": 0.0219,
      "step": 1124120
    },
    {
      "epoch": 1.8396797653882158,
      "grad_norm": 0.49361032247543335,
      "learning_rate": 6.362288568359878e-06,
      "loss": 0.0182,
      "step": 1124140
    },
    {
      "epoch": 1.839712495826869,
      "grad_norm": 0.5222461819648743,
      "learning_rate": 6.36222267614636e-06,
      "loss": 0.0138,
      "step": 1124160
    },
    {
      "epoch": 1.8397452262655225,
      "grad_norm": 1.5436251163482666,
      "learning_rate": 6.362156783932844e-06,
      "loss": 0.0223,
      "step": 1124180
    },
    {
      "epoch": 1.8397779567041757,
      "grad_norm": 0.972933292388916,
      "learning_rate": 6.3620908917193255e-06,
      "loss": 0.0153,
      "step": 1124200
    },
    {
      "epoch": 1.8398106871428292,
      "grad_norm": 0.1453145295381546,
      "learning_rate": 6.362024999505809e-06,
      "loss": 0.0139,
      "step": 1124220
    },
    {
      "epoch": 1.8398434175814824,
      "grad_norm": 0.1798238903284073,
      "learning_rate": 6.361959107292292e-06,
      "loss": 0.023,
      "step": 1124240
    },
    {
      "epoch": 1.8398761480201358,
      "grad_norm": 0.7692621350288391,
      "learning_rate": 6.3618932150787746e-06,
      "loss": 0.0176,
      "step": 1124260
    },
    {
      "epoch": 1.8399088784587891,
      "grad_norm": 0.6913931369781494,
      "learning_rate": 6.361827322865257e-06,
      "loss": 0.0182,
      "step": 1124280
    },
    {
      "epoch": 1.8399416088974423,
      "grad_norm": 0.5604162812232971,
      "learning_rate": 6.361761430651741e-06,
      "loss": 0.015,
      "step": 1124300
    },
    {
      "epoch": 1.8399743393360959,
      "grad_norm": 0.49680984020233154,
      "learning_rate": 6.361695538438223e-06,
      "loss": 0.0125,
      "step": 1124320
    },
    {
      "epoch": 1.840007069774749,
      "grad_norm": 0.8295053243637085,
      "learning_rate": 6.361629646224706e-06,
      "loss": 0.021,
      "step": 1124340
    },
    {
      "epoch": 1.8400398002134024,
      "grad_norm": 1.4072014093399048,
      "learning_rate": 6.361563754011188e-06,
      "loss": 0.0296,
      "step": 1124360
    },
    {
      "epoch": 1.8400725306520558,
      "grad_norm": 0.32504263520240784,
      "learning_rate": 6.361497861797672e-06,
      "loss": 0.0186,
      "step": 1124380
    },
    {
      "epoch": 1.8401052610907092,
      "grad_norm": 0.3438125252723694,
      "learning_rate": 6.361431969584155e-06,
      "loss": 0.0121,
      "step": 1124400
    },
    {
      "epoch": 1.8401379915293625,
      "grad_norm": 0.12188352644443512,
      "learning_rate": 6.361366077370637e-06,
      "loss": 0.0222,
      "step": 1124420
    },
    {
      "epoch": 1.8401707219680157,
      "grad_norm": 1.0043470859527588,
      "learning_rate": 6.361300185157121e-06,
      "loss": 0.0182,
      "step": 1124440
    },
    {
      "epoch": 1.8402034524066693,
      "grad_norm": 0.6317722201347351,
      "learning_rate": 6.361234292943604e-06,
      "loss": 0.0226,
      "step": 1124460
    },
    {
      "epoch": 1.8402361828453224,
      "grad_norm": 0.2329174429178238,
      "learning_rate": 6.361168400730086e-06,
      "loss": 0.0135,
      "step": 1124480
    },
    {
      "epoch": 1.8402689132839758,
      "grad_norm": 0.20087844133377075,
      "learning_rate": 6.361102508516569e-06,
      "loss": 0.0243,
      "step": 1124500
    },
    {
      "epoch": 1.8403016437226292,
      "grad_norm": 0.2548564672470093,
      "learning_rate": 6.361036616303053e-06,
      "loss": 0.0126,
      "step": 1124520
    },
    {
      "epoch": 1.8403343741612825,
      "grad_norm": 0.36475151777267456,
      "learning_rate": 6.360970724089535e-06,
      "loss": 0.0125,
      "step": 1124540
    },
    {
      "epoch": 1.840367104599936,
      "grad_norm": 0.47940394282341003,
      "learning_rate": 6.360904831876018e-06,
      "loss": 0.0153,
      "step": 1124560
    },
    {
      "epoch": 1.840399835038589,
      "grad_norm": 0.35575130581855774,
      "learning_rate": 6.3608389396625e-06,
      "loss": 0.0171,
      "step": 1124580
    },
    {
      "epoch": 1.8404325654772427,
      "grad_norm": 0.808945894241333,
      "learning_rate": 6.360773047448984e-06,
      "loss": 0.0213,
      "step": 1124600
    },
    {
      "epoch": 1.8404652959158958,
      "grad_norm": 0.2688796818256378,
      "learning_rate": 6.3607071552354664e-06,
      "loss": 0.0119,
      "step": 1124620
    },
    {
      "epoch": 1.8404980263545492,
      "grad_norm": 0.6413858532905579,
      "learning_rate": 6.360641263021949e-06,
      "loss": 0.0187,
      "step": 1124640
    },
    {
      "epoch": 1.8405307567932025,
      "grad_norm": 0.8181702494621277,
      "learning_rate": 6.360575370808432e-06,
      "loss": 0.0197,
      "step": 1124660
    },
    {
      "epoch": 1.840563487231856,
      "grad_norm": 0.7531448006629944,
      "learning_rate": 6.3605094785949155e-06,
      "loss": 0.0208,
      "step": 1124680
    },
    {
      "epoch": 1.8405962176705093,
      "grad_norm": 0.6679644584655762,
      "learning_rate": 6.360443586381397e-06,
      "loss": 0.0179,
      "step": 1124700
    },
    {
      "epoch": 1.8406289481091624,
      "grad_norm": 0.3093319237232208,
      "learning_rate": 6.360377694167881e-06,
      "loss": 0.0171,
      "step": 1124720
    },
    {
      "epoch": 1.840661678547816,
      "grad_norm": 0.4451931118965149,
      "learning_rate": 6.360311801954363e-06,
      "loss": 0.0207,
      "step": 1124740
    },
    {
      "epoch": 1.8406944089864692,
      "grad_norm": 0.4753856956958771,
      "learning_rate": 6.3602459097408465e-06,
      "loss": 0.0123,
      "step": 1124760
    },
    {
      "epoch": 1.8407271394251226,
      "grad_norm": 0.9538988471031189,
      "learning_rate": 6.36018001752733e-06,
      "loss": 0.0155,
      "step": 1124780
    },
    {
      "epoch": 1.840759869863776,
      "grad_norm": 1.0625025033950806,
      "learning_rate": 6.360114125313812e-06,
      "loss": 0.0146,
      "step": 1124800
    },
    {
      "epoch": 1.8407926003024293,
      "grad_norm": 0.12659457325935364,
      "learning_rate": 6.3600482331002955e-06,
      "loss": 0.0212,
      "step": 1124820
    },
    {
      "epoch": 1.8408253307410827,
      "grad_norm": 0.226856529712677,
      "learning_rate": 6.359982340886777e-06,
      "loss": 0.015,
      "step": 1124840
    },
    {
      "epoch": 1.8408580611797358,
      "grad_norm": 0.3591075539588928,
      "learning_rate": 6.359916448673261e-06,
      "loss": 0.0237,
      "step": 1124860
    },
    {
      "epoch": 1.8408907916183894,
      "grad_norm": 0.3684171736240387,
      "learning_rate": 6.359850556459744e-06,
      "loss": 0.0153,
      "step": 1124880
    },
    {
      "epoch": 1.8409235220570426,
      "grad_norm": 0.5535577535629272,
      "learning_rate": 6.3597846642462265e-06,
      "loss": 0.0206,
      "step": 1124900
    },
    {
      "epoch": 1.840956252495696,
      "grad_norm": 0.2995968461036682,
      "learning_rate": 6.359718772032709e-06,
      "loss": 0.013,
      "step": 1124920
    },
    {
      "epoch": 1.8409889829343493,
      "grad_norm": 0.35577499866485596,
      "learning_rate": 6.359652879819193e-06,
      "loss": 0.0147,
      "step": 1124940
    },
    {
      "epoch": 1.8410217133730025,
      "grad_norm": 0.16138619184494019,
      "learning_rate": 6.359586987605675e-06,
      "loss": 0.0127,
      "step": 1124960
    },
    {
      "epoch": 1.841054443811656,
      "grad_norm": 1.2183970212936401,
      "learning_rate": 6.359521095392158e-06,
      "loss": 0.0309,
      "step": 1124980
    },
    {
      "epoch": 1.8410871742503092,
      "grad_norm": 0.2658027112483978,
      "learning_rate": 6.35945520317864e-06,
      "loss": 0.0176,
      "step": 1125000
    },
    {
      "epoch": 1.8411199046889628,
      "grad_norm": 0.27944374084472656,
      "learning_rate": 6.359389310965124e-06,
      "loss": 0.0165,
      "step": 1125020
    },
    {
      "epoch": 1.841152635127616,
      "grad_norm": 0.09998274594545364,
      "learning_rate": 6.3593234187516065e-06,
      "loss": 0.0144,
      "step": 1125040
    },
    {
      "epoch": 1.8411853655662693,
      "grad_norm": 1.44172203540802,
      "learning_rate": 6.359257526538089e-06,
      "loss": 0.0199,
      "step": 1125060
    },
    {
      "epoch": 1.8412180960049227,
      "grad_norm": 0.8648189306259155,
      "learning_rate": 6.359191634324572e-06,
      "loss": 0.0213,
      "step": 1125080
    },
    {
      "epoch": 1.8412508264435758,
      "grad_norm": 9.892463684082031,
      "learning_rate": 6.3591257421110556e-06,
      "loss": 0.0224,
      "step": 1125100
    },
    {
      "epoch": 1.8412835568822294,
      "grad_norm": 0.18319039046764374,
      "learning_rate": 6.359059849897538e-06,
      "loss": 0.0186,
      "step": 1125120
    },
    {
      "epoch": 1.8413162873208826,
      "grad_norm": 2.366323709487915,
      "learning_rate": 6.358993957684021e-06,
      "loss": 0.0186,
      "step": 1125140
    },
    {
      "epoch": 1.841349017759536,
      "grad_norm": 0.6023594737052917,
      "learning_rate": 6.358928065470505e-06,
      "loss": 0.0173,
      "step": 1125160
    },
    {
      "epoch": 1.8413817481981893,
      "grad_norm": 0.37134140729904175,
      "learning_rate": 6.3588621732569865e-06,
      "loss": 0.0132,
      "step": 1125180
    },
    {
      "epoch": 1.8414144786368427,
      "grad_norm": 1.1534581184387207,
      "learning_rate": 6.35879628104347e-06,
      "loss": 0.0173,
      "step": 1125200
    },
    {
      "epoch": 1.841447209075496,
      "grad_norm": 0.7383776307106018,
      "learning_rate": 6.358730388829952e-06,
      "loss": 0.0139,
      "step": 1125220
    },
    {
      "epoch": 1.8414799395141492,
      "grad_norm": 0.2048228234052658,
      "learning_rate": 6.358664496616436e-06,
      "loss": 0.0109,
      "step": 1125240
    },
    {
      "epoch": 1.8415126699528028,
      "grad_norm": 0.3560652434825897,
      "learning_rate": 6.358598604402918e-06,
      "loss": 0.0176,
      "step": 1125260
    },
    {
      "epoch": 1.841545400391456,
      "grad_norm": 0.36755305528640747,
      "learning_rate": 6.358532712189401e-06,
      "loss": 0.0212,
      "step": 1125280
    },
    {
      "epoch": 1.8415781308301093,
      "grad_norm": 0.3863239884376526,
      "learning_rate": 6.358466819975884e-06,
      "loss": 0.0284,
      "step": 1125300
    },
    {
      "epoch": 1.8416108612687627,
      "grad_norm": 0.7223076820373535,
      "learning_rate": 6.358400927762367e-06,
      "loss": 0.0197,
      "step": 1125320
    },
    {
      "epoch": 1.841643591707416,
      "grad_norm": 0.5429407358169556,
      "learning_rate": 6.358335035548849e-06,
      "loss": 0.0183,
      "step": 1125340
    },
    {
      "epoch": 1.8416763221460695,
      "grad_norm": 0.37349849939346313,
      "learning_rate": 6.358269143335333e-06,
      "loss": 0.0179,
      "step": 1125360
    },
    {
      "epoch": 1.8417090525847226,
      "grad_norm": 0.3105604946613312,
      "learning_rate": 6.358203251121815e-06,
      "loss": 0.0191,
      "step": 1125380
    },
    {
      "epoch": 1.8417417830233762,
      "grad_norm": 0.5576801896095276,
      "learning_rate": 6.358137358908298e-06,
      "loss": 0.0256,
      "step": 1125400
    },
    {
      "epoch": 1.8417745134620294,
      "grad_norm": 0.15934017300605774,
      "learning_rate": 6.358071466694781e-06,
      "loss": 0.0116,
      "step": 1125420
    },
    {
      "epoch": 1.8418072439006827,
      "grad_norm": 0.6242040991783142,
      "learning_rate": 6.358005574481264e-06,
      "loss": 0.0141,
      "step": 1125440
    },
    {
      "epoch": 1.841839974339336,
      "grad_norm": 0.7364638447761536,
      "learning_rate": 6.3579396822677474e-06,
      "loss": 0.0187,
      "step": 1125460
    },
    {
      "epoch": 1.8418727047779895,
      "grad_norm": 0.16578485071659088,
      "learning_rate": 6.35787379005423e-06,
      "loss": 0.0152,
      "step": 1125480
    },
    {
      "epoch": 1.8419054352166429,
      "grad_norm": 0.08999555557966232,
      "learning_rate": 6.357807897840713e-06,
      "loss": 0.0251,
      "step": 1125500
    },
    {
      "epoch": 1.841938165655296,
      "grad_norm": 1.6579042673110962,
      "learning_rate": 6.357742005627196e-06,
      "loss": 0.0229,
      "step": 1125520
    },
    {
      "epoch": 1.8419708960939496,
      "grad_norm": 0.4261038601398468,
      "learning_rate": 6.357676113413679e-06,
      "loss": 0.0178,
      "step": 1125540
    },
    {
      "epoch": 1.8420036265326027,
      "grad_norm": 0.6334424614906311,
      "learning_rate": 6.357610221200161e-06,
      "loss": 0.0191,
      "step": 1125560
    },
    {
      "epoch": 1.8420363569712561,
      "grad_norm": 0.639700710773468,
      "learning_rate": 6.357544328986645e-06,
      "loss": 0.0186,
      "step": 1125580
    },
    {
      "epoch": 1.8420690874099095,
      "grad_norm": 0.13578729331493378,
      "learning_rate": 6.357478436773127e-06,
      "loss": 0.0169,
      "step": 1125600
    },
    {
      "epoch": 1.8421018178485629,
      "grad_norm": 0.6351635456085205,
      "learning_rate": 6.35741254455961e-06,
      "loss": 0.0272,
      "step": 1125620
    },
    {
      "epoch": 1.8421345482872162,
      "grad_norm": 0.2538360059261322,
      "learning_rate": 6.357346652346092e-06,
      "loss": 0.0143,
      "step": 1125640
    },
    {
      "epoch": 1.8421672787258694,
      "grad_norm": 0.3220733404159546,
      "learning_rate": 6.357280760132576e-06,
      "loss": 0.0172,
      "step": 1125660
    },
    {
      "epoch": 1.842200009164523,
      "grad_norm": 0.23608025908470154,
      "learning_rate": 6.357214867919058e-06,
      "loss": 0.0198,
      "step": 1125680
    },
    {
      "epoch": 1.8422327396031761,
      "grad_norm": 0.29547256231307983,
      "learning_rate": 6.357148975705541e-06,
      "loss": 0.0139,
      "step": 1125700
    },
    {
      "epoch": 1.8422654700418295,
      "grad_norm": 0.1309342384338379,
      "learning_rate": 6.357083083492024e-06,
      "loss": 0.0224,
      "step": 1125720
    },
    {
      "epoch": 1.8422982004804829,
      "grad_norm": 0.32071349024772644,
      "learning_rate": 6.3570171912785075e-06,
      "loss": 0.0147,
      "step": 1125740
    },
    {
      "epoch": 1.842330930919136,
      "grad_norm": 0.6333125829696655,
      "learning_rate": 6.356951299064989e-06,
      "loss": 0.0141,
      "step": 1125760
    },
    {
      "epoch": 1.8423636613577896,
      "grad_norm": 1.5304646492004395,
      "learning_rate": 6.356885406851473e-06,
      "loss": 0.022,
      "step": 1125780
    },
    {
      "epoch": 1.8423963917964428,
      "grad_norm": 0.8261861801147461,
      "learning_rate": 6.356819514637955e-06,
      "loss": 0.0307,
      "step": 1125800
    },
    {
      "epoch": 1.8424291222350961,
      "grad_norm": 0.5454663634300232,
      "learning_rate": 6.3567536224244384e-06,
      "loss": 0.0161,
      "step": 1125820
    },
    {
      "epoch": 1.8424618526737495,
      "grad_norm": 0.3247072398662567,
      "learning_rate": 6.356687730210922e-06,
      "loss": 0.0159,
      "step": 1125840
    },
    {
      "epoch": 1.8424945831124029,
      "grad_norm": 0.36180055141448975,
      "learning_rate": 6.356621837997404e-06,
      "loss": 0.0222,
      "step": 1125860
    },
    {
      "epoch": 1.8425273135510563,
      "grad_norm": 0.2676675319671631,
      "learning_rate": 6.3565559457838875e-06,
      "loss": 0.0152,
      "step": 1125880
    },
    {
      "epoch": 1.8425600439897094,
      "grad_norm": 0.8015318512916565,
      "learning_rate": 6.35649005357037e-06,
      "loss": 0.0156,
      "step": 1125900
    },
    {
      "epoch": 1.842592774428363,
      "grad_norm": 2.284196138381958,
      "learning_rate": 6.356424161356853e-06,
      "loss": 0.0224,
      "step": 1125920
    },
    {
      "epoch": 1.8426255048670162,
      "grad_norm": 0.2831026017665863,
      "learning_rate": 6.356358269143336e-06,
      "loss": 0.0221,
      "step": 1125940
    },
    {
      "epoch": 1.8426582353056695,
      "grad_norm": 0.4511353671550751,
      "learning_rate": 6.356292376929819e-06,
      "loss": 0.0145,
      "step": 1125960
    },
    {
      "epoch": 1.842690965744323,
      "grad_norm": 0.3841712474822998,
      "learning_rate": 6.356226484716301e-06,
      "loss": 0.0164,
      "step": 1125980
    },
    {
      "epoch": 1.8427236961829763,
      "grad_norm": 0.4319272041320801,
      "learning_rate": 6.356160592502785e-06,
      "loss": 0.016,
      "step": 1126000
    },
    {
      "epoch": 1.8427564266216296,
      "grad_norm": 0.8544762134552002,
      "learning_rate": 6.356094700289267e-06,
      "loss": 0.0183,
      "step": 1126020
    },
    {
      "epoch": 1.8427891570602828,
      "grad_norm": 1.2562737464904785,
      "learning_rate": 6.35602880807575e-06,
      "loss": 0.0204,
      "step": 1126040
    },
    {
      "epoch": 1.8428218874989364,
      "grad_norm": 0.6525930762290955,
      "learning_rate": 6.355962915862233e-06,
      "loss": 0.0165,
      "step": 1126060
    },
    {
      "epoch": 1.8428546179375895,
      "grad_norm": 0.35212212800979614,
      "learning_rate": 6.355897023648716e-06,
      "loss": 0.0156,
      "step": 1126080
    },
    {
      "epoch": 1.842887348376243,
      "grad_norm": 1.3737915754318237,
      "learning_rate": 6.3558311314351985e-06,
      "loss": 0.0143,
      "step": 1126100
    },
    {
      "epoch": 1.8429200788148963,
      "grad_norm": 0.13833047449588776,
      "learning_rate": 6.355765239221682e-06,
      "loss": 0.0234,
      "step": 1126120
    },
    {
      "epoch": 1.8429528092535497,
      "grad_norm": 0.37293800711631775,
      "learning_rate": 6.355699347008164e-06,
      "loss": 0.0197,
      "step": 1126140
    },
    {
      "epoch": 1.842985539692203,
      "grad_norm": 0.6668845415115356,
      "learning_rate": 6.3556334547946476e-06,
      "loss": 0.0193,
      "step": 1126160
    },
    {
      "epoch": 1.8430182701308562,
      "grad_norm": 0.1536962240934372,
      "learning_rate": 6.355567562581131e-06,
      "loss": 0.0187,
      "step": 1126180
    },
    {
      "epoch": 1.8430510005695098,
      "grad_norm": 0.510478138923645,
      "learning_rate": 6.355501670367613e-06,
      "loss": 0.0156,
      "step": 1126200
    },
    {
      "epoch": 1.843083731008163,
      "grad_norm": 0.5393177270889282,
      "learning_rate": 6.355435778154097e-06,
      "loss": 0.0127,
      "step": 1126220
    },
    {
      "epoch": 1.8431164614468163,
      "grad_norm": 0.17385783791542053,
      "learning_rate": 6.3553698859405785e-06,
      "loss": 0.0191,
      "step": 1126240
    },
    {
      "epoch": 1.8431491918854697,
      "grad_norm": 0.3829915225505829,
      "learning_rate": 6.355303993727062e-06,
      "loss": 0.0188,
      "step": 1126260
    },
    {
      "epoch": 1.843181922324123,
      "grad_norm": 0.920933187007904,
      "learning_rate": 6.355238101513545e-06,
      "loss": 0.0204,
      "step": 1126280
    },
    {
      "epoch": 1.8432146527627764,
      "grad_norm": 1.6389703750610352,
      "learning_rate": 6.3551722093000276e-06,
      "loss": 0.0181,
      "step": 1126300
    },
    {
      "epoch": 1.8432473832014296,
      "grad_norm": 0.2623762786388397,
      "learning_rate": 6.35510631708651e-06,
      "loss": 0.0184,
      "step": 1126320
    },
    {
      "epoch": 1.8432801136400832,
      "grad_norm": 1.159096598625183,
      "learning_rate": 6.355040424872994e-06,
      "loss": 0.0191,
      "step": 1126340
    },
    {
      "epoch": 1.8433128440787363,
      "grad_norm": 1.019763708114624,
      "learning_rate": 6.354974532659476e-06,
      "loss": 0.021,
      "step": 1126360
    },
    {
      "epoch": 1.8433455745173897,
      "grad_norm": 1.9072511196136475,
      "learning_rate": 6.354908640445959e-06,
      "loss": 0.0258,
      "step": 1126380
    },
    {
      "epoch": 1.843378304956043,
      "grad_norm": 0.7039114832878113,
      "learning_rate": 6.354842748232441e-06,
      "loss": 0.0309,
      "step": 1126400
    },
    {
      "epoch": 1.8434110353946962,
      "grad_norm": 0.46013057231903076,
      "learning_rate": 6.354776856018925e-06,
      "loss": 0.0218,
      "step": 1126420
    },
    {
      "epoch": 1.8434437658333498,
      "grad_norm": 0.5958336591720581,
      "learning_rate": 6.354710963805408e-06,
      "loss": 0.0164,
      "step": 1126440
    },
    {
      "epoch": 1.843476496272003,
      "grad_norm": 0.46019768714904785,
      "learning_rate": 6.35464507159189e-06,
      "loss": 0.0118,
      "step": 1126460
    },
    {
      "epoch": 1.8435092267106565,
      "grad_norm": 0.9351934790611267,
      "learning_rate": 6.354579179378373e-06,
      "loss": 0.0116,
      "step": 1126480
    },
    {
      "epoch": 1.8435419571493097,
      "grad_norm": 0.6211774349212646,
      "learning_rate": 6.354513287164857e-06,
      "loss": 0.0176,
      "step": 1126500
    },
    {
      "epoch": 1.843574687587963,
      "grad_norm": 0.6794266104698181,
      "learning_rate": 6.354447394951339e-06,
      "loss": 0.0268,
      "step": 1126520
    },
    {
      "epoch": 1.8436074180266164,
      "grad_norm": 0.2795623242855072,
      "learning_rate": 6.354381502737822e-06,
      "loss": 0.0218,
      "step": 1126540
    },
    {
      "epoch": 1.8436401484652696,
      "grad_norm": 0.6888389587402344,
      "learning_rate": 6.354315610524306e-06,
      "loss": 0.0152,
      "step": 1126560
    },
    {
      "epoch": 1.8436728789039232,
      "grad_norm": 0.6197541356086731,
      "learning_rate": 6.354249718310788e-06,
      "loss": 0.0164,
      "step": 1126580
    },
    {
      "epoch": 1.8437056093425763,
      "grad_norm": 1.154025673866272,
      "learning_rate": 6.354183826097271e-06,
      "loss": 0.0151,
      "step": 1126600
    },
    {
      "epoch": 1.8437383397812297,
      "grad_norm": 0.3626880645751953,
      "learning_rate": 6.354117933883753e-06,
      "loss": 0.0183,
      "step": 1126620
    },
    {
      "epoch": 1.843771070219883,
      "grad_norm": 0.222255140542984,
      "learning_rate": 6.354052041670237e-06,
      "loss": 0.0229,
      "step": 1126640
    },
    {
      "epoch": 1.8438038006585364,
      "grad_norm": 0.2793389856815338,
      "learning_rate": 6.353986149456719e-06,
      "loss": 0.0169,
      "step": 1126660
    },
    {
      "epoch": 1.8438365310971898,
      "grad_norm": 0.2865140736103058,
      "learning_rate": 6.353920257243202e-06,
      "loss": 0.017,
      "step": 1126680
    },
    {
      "epoch": 1.843869261535843,
      "grad_norm": 0.18262724578380585,
      "learning_rate": 6.353854365029685e-06,
      "loss": 0.02,
      "step": 1126700
    },
    {
      "epoch": 1.8439019919744966,
      "grad_norm": 0.2960678040981293,
      "learning_rate": 6.353788472816168e-06,
      "loss": 0.022,
      "step": 1126720
    },
    {
      "epoch": 1.8439347224131497,
      "grad_norm": 0.25633421540260315,
      "learning_rate": 6.35372258060265e-06,
      "loss": 0.0164,
      "step": 1126740
    },
    {
      "epoch": 1.843967452851803,
      "grad_norm": 0.63663250207901,
      "learning_rate": 6.353656688389134e-06,
      "loss": 0.0201,
      "step": 1126760
    },
    {
      "epoch": 1.8440001832904565,
      "grad_norm": 0.14584308862686157,
      "learning_rate": 6.353590796175616e-06,
      "loss": 0.0177,
      "step": 1126780
    },
    {
      "epoch": 1.8440329137291098,
      "grad_norm": 0.9509120583534241,
      "learning_rate": 6.3535249039620995e-06,
      "loss": 0.0225,
      "step": 1126800
    },
    {
      "epoch": 1.8440656441677632,
      "grad_norm": 0.5118891000747681,
      "learning_rate": 6.353459011748581e-06,
      "loss": 0.0215,
      "step": 1126820
    },
    {
      "epoch": 1.8440983746064163,
      "grad_norm": 0.1891907900571823,
      "learning_rate": 6.353393119535065e-06,
      "loss": 0.0271,
      "step": 1126840
    },
    {
      "epoch": 1.84413110504507,
      "grad_norm": 0.40415698289871216,
      "learning_rate": 6.353327227321548e-06,
      "loss": 0.0159,
      "step": 1126860
    },
    {
      "epoch": 1.844163835483723,
      "grad_norm": 0.7903435230255127,
      "learning_rate": 6.35326133510803e-06,
      "loss": 0.0183,
      "step": 1126880
    },
    {
      "epoch": 1.8441965659223765,
      "grad_norm": 0.8183502554893494,
      "learning_rate": 6.353195442894514e-06,
      "loss": 0.0152,
      "step": 1126900
    },
    {
      "epoch": 1.8442292963610298,
      "grad_norm": 0.45334747433662415,
      "learning_rate": 6.353129550680997e-06,
      "loss": 0.0204,
      "step": 1126920
    },
    {
      "epoch": 1.8442620267996832,
      "grad_norm": 0.2785263955593109,
      "learning_rate": 6.3530636584674795e-06,
      "loss": 0.0182,
      "step": 1126940
    },
    {
      "epoch": 1.8442947572383366,
      "grad_norm": 0.18877241015434265,
      "learning_rate": 6.352997766253962e-06,
      "loss": 0.0285,
      "step": 1126960
    },
    {
      "epoch": 1.8443274876769897,
      "grad_norm": 0.6430699825286865,
      "learning_rate": 6.352931874040446e-06,
      "loss": 0.0151,
      "step": 1126980
    },
    {
      "epoch": 1.8443602181156433,
      "grad_norm": 0.7553769946098328,
      "learning_rate": 6.352865981826928e-06,
      "loss": 0.026,
      "step": 1127000
    },
    {
      "epoch": 1.8443929485542965,
      "grad_norm": 0.9658340811729431,
      "learning_rate": 6.352800089613411e-06,
      "loss": 0.0284,
      "step": 1127020
    },
    {
      "epoch": 1.8444256789929498,
      "grad_norm": 0.44773396849632263,
      "learning_rate": 6.352734197399893e-06,
      "loss": 0.0332,
      "step": 1127040
    },
    {
      "epoch": 1.8444584094316032,
      "grad_norm": 0.15142366290092468,
      "learning_rate": 6.352668305186377e-06,
      "loss": 0.0103,
      "step": 1127060
    },
    {
      "epoch": 1.8444911398702566,
      "grad_norm": 0.5005709528923035,
      "learning_rate": 6.3526024129728595e-06,
      "loss": 0.0197,
      "step": 1127080
    },
    {
      "epoch": 1.84452387030891,
      "grad_norm": 0.4283616244792938,
      "learning_rate": 6.352536520759342e-06,
      "loss": 0.017,
      "step": 1127100
    },
    {
      "epoch": 1.8445566007475631,
      "grad_norm": 2.2510616779327393,
      "learning_rate": 6.352470628545825e-06,
      "loss": 0.016,
      "step": 1127120
    },
    {
      "epoch": 1.8445893311862167,
      "grad_norm": 0.22516568005084991,
      "learning_rate": 6.3524047363323086e-06,
      "loss": 0.0149,
      "step": 1127140
    },
    {
      "epoch": 1.8446220616248699,
      "grad_norm": 0.34059637784957886,
      "learning_rate": 6.3523388441187905e-06,
      "loss": 0.0163,
      "step": 1127160
    },
    {
      "epoch": 1.8446547920635232,
      "grad_norm": 0.10130835324525833,
      "learning_rate": 6.352272951905274e-06,
      "loss": 0.0213,
      "step": 1127180
    },
    {
      "epoch": 1.8446875225021766,
      "grad_norm": 0.3334343135356903,
      "learning_rate": 6.352207059691756e-06,
      "loss": 0.0189,
      "step": 1127200
    },
    {
      "epoch": 1.8447202529408298,
      "grad_norm": 0.3737940788269043,
      "learning_rate": 6.3521411674782395e-06,
      "loss": 0.0252,
      "step": 1127220
    },
    {
      "epoch": 1.8447529833794833,
      "grad_norm": 0.4093090295791626,
      "learning_rate": 6.352075275264723e-06,
      "loss": 0.0243,
      "step": 1127240
    },
    {
      "epoch": 1.8447857138181365,
      "grad_norm": 0.20492023229599,
      "learning_rate": 6.352009383051205e-06,
      "loss": 0.0148,
      "step": 1127260
    },
    {
      "epoch": 1.84481844425679,
      "grad_norm": 0.31001755595207214,
      "learning_rate": 6.351943490837689e-06,
      "loss": 0.0125,
      "step": 1127280
    },
    {
      "epoch": 1.8448511746954432,
      "grad_norm": 0.333913654088974,
      "learning_rate": 6.351877598624171e-06,
      "loss": 0.0166,
      "step": 1127300
    },
    {
      "epoch": 1.8448839051340966,
      "grad_norm": 0.20844107866287231,
      "learning_rate": 6.351811706410654e-06,
      "loss": 0.0177,
      "step": 1127320
    },
    {
      "epoch": 1.84491663557275,
      "grad_norm": 0.5075238943099976,
      "learning_rate": 6.351745814197137e-06,
      "loss": 0.0171,
      "step": 1127340
    },
    {
      "epoch": 1.8449493660114031,
      "grad_norm": 0.29158926010131836,
      "learning_rate": 6.35167992198362e-06,
      "loss": 0.0201,
      "step": 1127360
    },
    {
      "epoch": 1.8449820964500567,
      "grad_norm": 0.9231884479522705,
      "learning_rate": 6.351614029770102e-06,
      "loss": 0.0216,
      "step": 1127380
    },
    {
      "epoch": 1.8450148268887099,
      "grad_norm": 0.969211220741272,
      "learning_rate": 6.351548137556586e-06,
      "loss": 0.0175,
      "step": 1127400
    },
    {
      "epoch": 1.8450475573273633,
      "grad_norm": 0.11757856607437134,
      "learning_rate": 6.351482245343068e-06,
      "loss": 0.0146,
      "step": 1127420
    },
    {
      "epoch": 1.8450802877660166,
      "grad_norm": 0.7371532917022705,
      "learning_rate": 6.351416353129551e-06,
      "loss": 0.0223,
      "step": 1127440
    },
    {
      "epoch": 1.84511301820467,
      "grad_norm": 0.7243651151657104,
      "learning_rate": 6.351350460916034e-06,
      "loss": 0.0221,
      "step": 1127460
    },
    {
      "epoch": 1.8451457486433234,
      "grad_norm": 0.4767763912677765,
      "learning_rate": 6.351284568702517e-06,
      "loss": 0.0181,
      "step": 1127480
    },
    {
      "epoch": 1.8451784790819765,
      "grad_norm": 0.5638799667358398,
      "learning_rate": 6.351218676489e-06,
      "loss": 0.0206,
      "step": 1127500
    },
    {
      "epoch": 1.8452112095206301,
      "grad_norm": 0.6503459215164185,
      "learning_rate": 6.351152784275483e-06,
      "loss": 0.0297,
      "step": 1127520
    },
    {
      "epoch": 1.8452439399592833,
      "grad_norm": 0.12123314291238785,
      "learning_rate": 6.351086892061965e-06,
      "loss": 0.0172,
      "step": 1127540
    },
    {
      "epoch": 1.8452766703979366,
      "grad_norm": 0.6889773011207581,
      "learning_rate": 6.351020999848449e-06,
      "loss": 0.0165,
      "step": 1127560
    },
    {
      "epoch": 1.84530940083659,
      "grad_norm": 0.3065430819988251,
      "learning_rate": 6.350955107634932e-06,
      "loss": 0.0211,
      "step": 1127580
    },
    {
      "epoch": 1.8453421312752434,
      "grad_norm": 0.5355775952339172,
      "learning_rate": 6.350889215421414e-06,
      "loss": 0.019,
      "step": 1127600
    },
    {
      "epoch": 1.8453748617138968,
      "grad_norm": 0.43108779191970825,
      "learning_rate": 6.350823323207898e-06,
      "loss": 0.0149,
      "step": 1127620
    },
    {
      "epoch": 1.84540759215255,
      "grad_norm": 1.1179835796356201,
      "learning_rate": 6.35075743099438e-06,
      "loss": 0.017,
      "step": 1127640
    },
    {
      "epoch": 1.8454403225912035,
      "grad_norm": 0.8957136273384094,
      "learning_rate": 6.350691538780863e-06,
      "loss": 0.0158,
      "step": 1127660
    },
    {
      "epoch": 1.8454730530298566,
      "grad_norm": 0.6027213335037231,
      "learning_rate": 6.350625646567345e-06,
      "loss": 0.0174,
      "step": 1127680
    },
    {
      "epoch": 1.84550578346851,
      "grad_norm": 0.5631486177444458,
      "learning_rate": 6.350559754353829e-06,
      "loss": 0.0197,
      "step": 1127700
    },
    {
      "epoch": 1.8455385139071634,
      "grad_norm": 0.42702925205230713,
      "learning_rate": 6.350493862140311e-06,
      "loss": 0.0216,
      "step": 1127720
    },
    {
      "epoch": 1.8455712443458168,
      "grad_norm": 0.13053251802921295,
      "learning_rate": 6.350427969926794e-06,
      "loss": 0.0201,
      "step": 1127740
    },
    {
      "epoch": 1.8456039747844701,
      "grad_norm": 0.35294637084007263,
      "learning_rate": 6.350362077713277e-06,
      "loss": 0.0277,
      "step": 1127760
    },
    {
      "epoch": 1.8456367052231233,
      "grad_norm": 0.05896277353167534,
      "learning_rate": 6.3502961854997605e-06,
      "loss": 0.0231,
      "step": 1127780
    },
    {
      "epoch": 1.8456694356617769,
      "grad_norm": 0.3633142411708832,
      "learning_rate": 6.350230293286242e-06,
      "loss": 0.0208,
      "step": 1127800
    },
    {
      "epoch": 1.84570216610043,
      "grad_norm": 0.26793748140335083,
      "learning_rate": 6.350164401072726e-06,
      "loss": 0.0187,
      "step": 1127820
    },
    {
      "epoch": 1.8457348965390834,
      "grad_norm": 0.4963407516479492,
      "learning_rate": 6.350098508859208e-06,
      "loss": 0.0269,
      "step": 1127840
    },
    {
      "epoch": 1.8457676269777368,
      "grad_norm": 0.5675594806671143,
      "learning_rate": 6.3500326166456914e-06,
      "loss": 0.0143,
      "step": 1127860
    },
    {
      "epoch": 1.8458003574163901,
      "grad_norm": 0.3512640595436096,
      "learning_rate": 6.349966724432174e-06,
      "loss": 0.0125,
      "step": 1127880
    },
    {
      "epoch": 1.8458330878550435,
      "grad_norm": 0.5519614219665527,
      "learning_rate": 6.349900832218657e-06,
      "loss": 0.0257,
      "step": 1127900
    },
    {
      "epoch": 1.8458658182936967,
      "grad_norm": 0.45416852831840515,
      "learning_rate": 6.34983494000514e-06,
      "loss": 0.0163,
      "step": 1127920
    },
    {
      "epoch": 1.8458985487323503,
      "grad_norm": 0.263997346162796,
      "learning_rate": 6.349769047791623e-06,
      "loss": 0.0218,
      "step": 1127940
    },
    {
      "epoch": 1.8459312791710034,
      "grad_norm": 1.479206919670105,
      "learning_rate": 6.349703155578106e-06,
      "loss": 0.0157,
      "step": 1127960
    },
    {
      "epoch": 1.8459640096096568,
      "grad_norm": 0.40957382321357727,
      "learning_rate": 6.349637263364589e-06,
      "loss": 0.0155,
      "step": 1127980
    },
    {
      "epoch": 1.8459967400483102,
      "grad_norm": 0.3965773284435272,
      "learning_rate": 6.349571371151072e-06,
      "loss": 0.0234,
      "step": 1128000
    },
    {
      "epoch": 1.8460294704869633,
      "grad_norm": 0.5715372562408447,
      "learning_rate": 6.349505478937554e-06,
      "loss": 0.0191,
      "step": 1128020
    },
    {
      "epoch": 1.846062200925617,
      "grad_norm": 0.8431943655014038,
      "learning_rate": 6.349439586724038e-06,
      "loss": 0.0173,
      "step": 1128040
    },
    {
      "epoch": 1.84609493136427,
      "grad_norm": 0.13920088112354279,
      "learning_rate": 6.34937369451052e-06,
      "loss": 0.0197,
      "step": 1128060
    },
    {
      "epoch": 1.8461276618029236,
      "grad_norm": 0.320361852645874,
      "learning_rate": 6.349307802297003e-06,
      "loss": 0.0132,
      "step": 1128080
    },
    {
      "epoch": 1.8461603922415768,
      "grad_norm": 0.244658425450325,
      "learning_rate": 6.349241910083486e-06,
      "loss": 0.0103,
      "step": 1128100
    },
    {
      "epoch": 1.8461931226802302,
      "grad_norm": 0.783551812171936,
      "learning_rate": 6.349176017869969e-06,
      "loss": 0.0174,
      "step": 1128120
    },
    {
      "epoch": 1.8462258531188835,
      "grad_norm": 0.0662456750869751,
      "learning_rate": 6.3491101256564515e-06,
      "loss": 0.0151,
      "step": 1128140
    },
    {
      "epoch": 1.8462585835575367,
      "grad_norm": 0.5511236786842346,
      "learning_rate": 6.349044233442935e-06,
      "loss": 0.0199,
      "step": 1128160
    },
    {
      "epoch": 1.8462913139961903,
      "grad_norm": 0.4265594780445099,
      "learning_rate": 6.348978341229417e-06,
      "loss": 0.0177,
      "step": 1128180
    },
    {
      "epoch": 1.8463240444348434,
      "grad_norm": 0.6260258555412292,
      "learning_rate": 6.3489124490159006e-06,
      "loss": 0.0152,
      "step": 1128200
    },
    {
      "epoch": 1.8463567748734968,
      "grad_norm": 0.4338463544845581,
      "learning_rate": 6.3488465568023824e-06,
      "loss": 0.012,
      "step": 1128220
    },
    {
      "epoch": 1.8463895053121502,
      "grad_norm": 0.3564163148403168,
      "learning_rate": 6.348780664588866e-06,
      "loss": 0.0141,
      "step": 1128240
    },
    {
      "epoch": 1.8464222357508036,
      "grad_norm": 0.2998788356781006,
      "learning_rate": 6.348714772375349e-06,
      "loss": 0.0143,
      "step": 1128260
    },
    {
      "epoch": 1.846454966189457,
      "grad_norm": 0.500360369682312,
      "learning_rate": 6.3486488801618315e-06,
      "loss": 0.0201,
      "step": 1128280
    },
    {
      "epoch": 1.84648769662811,
      "grad_norm": 0.6827187538146973,
      "learning_rate": 6.348582987948315e-06,
      "loss": 0.0153,
      "step": 1128300
    },
    {
      "epoch": 1.8465204270667637,
      "grad_norm": 0.27014267444610596,
      "learning_rate": 6.348517095734798e-06,
      "loss": 0.0244,
      "step": 1128320
    },
    {
      "epoch": 1.8465531575054168,
      "grad_norm": 0.46972376108169556,
      "learning_rate": 6.348451203521281e-06,
      "loss": 0.0175,
      "step": 1128340
    },
    {
      "epoch": 1.8465858879440702,
      "grad_norm": 0.6633522510528564,
      "learning_rate": 6.348385311307763e-06,
      "loss": 0.0229,
      "step": 1128360
    },
    {
      "epoch": 1.8466186183827236,
      "grad_norm": 0.2122717946767807,
      "learning_rate": 6.348319419094247e-06,
      "loss": 0.022,
      "step": 1128380
    },
    {
      "epoch": 1.846651348821377,
      "grad_norm": 0.8224278688430786,
      "learning_rate": 6.348253526880729e-06,
      "loss": 0.0192,
      "step": 1128400
    },
    {
      "epoch": 1.8466840792600303,
      "grad_norm": 0.152159184217453,
      "learning_rate": 6.348187634667212e-06,
      "loss": 0.0103,
      "step": 1128420
    },
    {
      "epoch": 1.8467168096986835,
      "grad_norm": 0.1797337830066681,
      "learning_rate": 6.348121742453694e-06,
      "loss": 0.0137,
      "step": 1128440
    },
    {
      "epoch": 1.846749540137337,
      "grad_norm": 0.1499527245759964,
      "learning_rate": 6.348055850240178e-06,
      "loss": 0.0164,
      "step": 1128460
    },
    {
      "epoch": 1.8467822705759902,
      "grad_norm": 0.502491295337677,
      "learning_rate": 6.34798995802666e-06,
      "loss": 0.0132,
      "step": 1128480
    },
    {
      "epoch": 1.8468150010146436,
      "grad_norm": 0.5103595852851868,
      "learning_rate": 6.347924065813143e-06,
      "loss": 0.0227,
      "step": 1128500
    },
    {
      "epoch": 1.846847731453297,
      "grad_norm": 0.20371459424495697,
      "learning_rate": 6.347858173599626e-06,
      "loss": 0.0117,
      "step": 1128520
    },
    {
      "epoch": 1.8468804618919503,
      "grad_norm": 1.3620333671569824,
      "learning_rate": 6.34779228138611e-06,
      "loss": 0.0197,
      "step": 1128540
    },
    {
      "epoch": 1.8469131923306037,
      "grad_norm": 0.7558073997497559,
      "learning_rate": 6.3477263891725916e-06,
      "loss": 0.017,
      "step": 1128560
    },
    {
      "epoch": 1.8469459227692568,
      "grad_norm": 0.259480744600296,
      "learning_rate": 6.347660496959075e-06,
      "loss": 0.018,
      "step": 1128580
    },
    {
      "epoch": 1.8469786532079104,
      "grad_norm": 0.27514728903770447,
      "learning_rate": 6.347594604745557e-06,
      "loss": 0.0164,
      "step": 1128600
    },
    {
      "epoch": 1.8470113836465636,
      "grad_norm": 0.3427131772041321,
      "learning_rate": 6.347528712532041e-06,
      "loss": 0.0167,
      "step": 1128620
    },
    {
      "epoch": 1.847044114085217,
      "grad_norm": 1.1024636030197144,
      "learning_rate": 6.347462820318524e-06,
      "loss": 0.0129,
      "step": 1128640
    },
    {
      "epoch": 1.8470768445238703,
      "grad_norm": 0.4219341278076172,
      "learning_rate": 6.347396928105006e-06,
      "loss": 0.0148,
      "step": 1128660
    },
    {
      "epoch": 1.8471095749625235,
      "grad_norm": 0.4722602665424347,
      "learning_rate": 6.34733103589149e-06,
      "loss": 0.0118,
      "step": 1128680
    },
    {
      "epoch": 1.847142305401177,
      "grad_norm": 0.2399647980928421,
      "learning_rate": 6.347265143677972e-06,
      "loss": 0.0111,
      "step": 1128700
    },
    {
      "epoch": 1.8471750358398302,
      "grad_norm": 0.6207873821258545,
      "learning_rate": 6.347199251464455e-06,
      "loss": 0.0241,
      "step": 1128720
    },
    {
      "epoch": 1.8472077662784838,
      "grad_norm": 0.5616385340690613,
      "learning_rate": 6.347133359250938e-06,
      "loss": 0.0172,
      "step": 1128740
    },
    {
      "epoch": 1.847240496717137,
      "grad_norm": 0.4695170521736145,
      "learning_rate": 6.347067467037421e-06,
      "loss": 0.0216,
      "step": 1128760
    },
    {
      "epoch": 1.8472732271557903,
      "grad_norm": 0.6088758707046509,
      "learning_rate": 6.347001574823903e-06,
      "loss": 0.0238,
      "step": 1128780
    },
    {
      "epoch": 1.8473059575944437,
      "grad_norm": 0.362389475107193,
      "learning_rate": 6.346935682610387e-06,
      "loss": 0.0175,
      "step": 1128800
    },
    {
      "epoch": 1.8473386880330969,
      "grad_norm": 0.9386892318725586,
      "learning_rate": 6.346869790396869e-06,
      "loss": 0.0232,
      "step": 1128820
    },
    {
      "epoch": 1.8473714184717505,
      "grad_norm": 0.4854724407196045,
      "learning_rate": 6.3468038981833525e-06,
      "loss": 0.0143,
      "step": 1128840
    },
    {
      "epoch": 1.8474041489104036,
      "grad_norm": 0.34999921917915344,
      "learning_rate": 6.346738005969834e-06,
      "loss": 0.0221,
      "step": 1128860
    },
    {
      "epoch": 1.847436879349057,
      "grad_norm": 0.8970498442649841,
      "learning_rate": 6.346672113756318e-06,
      "loss": 0.0217,
      "step": 1128880
    },
    {
      "epoch": 1.8474696097877104,
      "grad_norm": 0.38365447521209717,
      "learning_rate": 6.346606221542801e-06,
      "loss": 0.0162,
      "step": 1128900
    },
    {
      "epoch": 1.8475023402263637,
      "grad_norm": 0.1802351474761963,
      "learning_rate": 6.346540329329283e-06,
      "loss": 0.0203,
      "step": 1128920
    },
    {
      "epoch": 1.847535070665017,
      "grad_norm": 0.3556418716907501,
      "learning_rate": 6.346474437115766e-06,
      "loss": 0.0162,
      "step": 1128940
    },
    {
      "epoch": 1.8475678011036702,
      "grad_norm": 1.9220225811004639,
      "learning_rate": 6.34640854490225e-06,
      "loss": 0.0201,
      "step": 1128960
    },
    {
      "epoch": 1.8476005315423238,
      "grad_norm": 2.9400854110717773,
      "learning_rate": 6.3463426526887325e-06,
      "loss": 0.0208,
      "step": 1128980
    },
    {
      "epoch": 1.847633261980977,
      "grad_norm": 0.3347599506378174,
      "learning_rate": 6.346276760475215e-06,
      "loss": 0.022,
      "step": 1129000
    },
    {
      "epoch": 1.8476659924196304,
      "grad_norm": 1.3229703903198242,
      "learning_rate": 6.346210868261699e-06,
      "loss": 0.0159,
      "step": 1129020
    },
    {
      "epoch": 1.8476987228582837,
      "grad_norm": 1.2293736934661865,
      "learning_rate": 6.346144976048181e-06,
      "loss": 0.0191,
      "step": 1129040
    },
    {
      "epoch": 1.847731453296937,
      "grad_norm": 0.4123327136039734,
      "learning_rate": 6.346079083834664e-06,
      "loss": 0.0125,
      "step": 1129060
    },
    {
      "epoch": 1.8477641837355905,
      "grad_norm": 2.804957628250122,
      "learning_rate": 6.346013191621146e-06,
      "loss": 0.0191,
      "step": 1129080
    },
    {
      "epoch": 1.8477969141742436,
      "grad_norm": 0.48290178179740906,
      "learning_rate": 6.34594729940763e-06,
      "loss": 0.0219,
      "step": 1129100
    },
    {
      "epoch": 1.8478296446128972,
      "grad_norm": 0.10073177516460419,
      "learning_rate": 6.3458814071941125e-06,
      "loss": 0.0149,
      "step": 1129120
    },
    {
      "epoch": 1.8478623750515504,
      "grad_norm": 0.6310199499130249,
      "learning_rate": 6.345815514980595e-06,
      "loss": 0.0145,
      "step": 1129140
    },
    {
      "epoch": 1.8478951054902037,
      "grad_norm": 1.300831913948059,
      "learning_rate": 6.345749622767078e-06,
      "loss": 0.0271,
      "step": 1129160
    },
    {
      "epoch": 1.8479278359288571,
      "grad_norm": 0.20090699195861816,
      "learning_rate": 6.345683730553562e-06,
      "loss": 0.0149,
      "step": 1129180
    },
    {
      "epoch": 1.8479605663675105,
      "grad_norm": 0.7472286820411682,
      "learning_rate": 6.3456178383400435e-06,
      "loss": 0.016,
      "step": 1129200
    },
    {
      "epoch": 1.8479932968061639,
      "grad_norm": 0.11396234482526779,
      "learning_rate": 6.345551946126527e-06,
      "loss": 0.0172,
      "step": 1129220
    },
    {
      "epoch": 1.848026027244817,
      "grad_norm": 0.18861640989780426,
      "learning_rate": 6.345486053913009e-06,
      "loss": 0.0149,
      "step": 1129240
    },
    {
      "epoch": 1.8480587576834706,
      "grad_norm": 0.22813507914543152,
      "learning_rate": 6.3454201616994925e-06,
      "loss": 0.0244,
      "step": 1129260
    },
    {
      "epoch": 1.8480914881221238,
      "grad_norm": 0.8071107268333435,
      "learning_rate": 6.345354269485975e-06,
      "loss": 0.0147,
      "step": 1129280
    },
    {
      "epoch": 1.8481242185607771,
      "grad_norm": 0.43345603346824646,
      "learning_rate": 6.345288377272458e-06,
      "loss": 0.0179,
      "step": 1129300
    },
    {
      "epoch": 1.8481569489994305,
      "grad_norm": 0.6378272771835327,
      "learning_rate": 6.345222485058941e-06,
      "loss": 0.0159,
      "step": 1129320
    },
    {
      "epoch": 1.8481896794380839,
      "grad_norm": 0.33311089873313904,
      "learning_rate": 6.345156592845424e-06,
      "loss": 0.0119,
      "step": 1129340
    },
    {
      "epoch": 1.8482224098767372,
      "grad_norm": 0.6251155734062195,
      "learning_rate": 6.345090700631907e-06,
      "loss": 0.0235,
      "step": 1129360
    },
    {
      "epoch": 1.8482551403153904,
      "grad_norm": 0.09445575624704361,
      "learning_rate": 6.34502480841839e-06,
      "loss": 0.021,
      "step": 1129380
    },
    {
      "epoch": 1.848287870754044,
      "grad_norm": 6.214787483215332,
      "learning_rate": 6.344958916204873e-06,
      "loss": 0.0192,
      "step": 1129400
    },
    {
      "epoch": 1.8483206011926971,
      "grad_norm": 0.23849917948246002,
      "learning_rate": 6.344893023991355e-06,
      "loss": 0.0199,
      "step": 1129420
    },
    {
      "epoch": 1.8483533316313505,
      "grad_norm": 0.6502779126167297,
      "learning_rate": 6.344827131777839e-06,
      "loss": 0.0247,
      "step": 1129440
    },
    {
      "epoch": 1.8483860620700039,
      "grad_norm": 0.11741700768470764,
      "learning_rate": 6.344761239564321e-06,
      "loss": 0.0161,
      "step": 1129460
    },
    {
      "epoch": 1.848418792508657,
      "grad_norm": 0.12444252520799637,
      "learning_rate": 6.344695347350804e-06,
      "loss": 0.0206,
      "step": 1129480
    },
    {
      "epoch": 1.8484515229473106,
      "grad_norm": 0.20577788352966309,
      "learning_rate": 6.344629455137286e-06,
      "loss": 0.0194,
      "step": 1129500
    },
    {
      "epoch": 1.8484842533859638,
      "grad_norm": 0.3626347482204437,
      "learning_rate": 6.34456356292377e-06,
      "loss": 0.0195,
      "step": 1129520
    },
    {
      "epoch": 1.8485169838246174,
      "grad_norm": 0.9267938733100891,
      "learning_rate": 6.344497670710253e-06,
      "loss": 0.0162,
      "step": 1129540
    },
    {
      "epoch": 1.8485497142632705,
      "grad_norm": 0.5459095239639282,
      "learning_rate": 6.344431778496735e-06,
      "loss": 0.016,
      "step": 1129560
    },
    {
      "epoch": 1.848582444701924,
      "grad_norm": 0.25281822681427,
      "learning_rate": 6.344365886283218e-06,
      "loss": 0.0143,
      "step": 1129580
    },
    {
      "epoch": 1.8486151751405773,
      "grad_norm": 0.5899405479431152,
      "learning_rate": 6.344299994069702e-06,
      "loss": 0.0132,
      "step": 1129600
    },
    {
      "epoch": 1.8486479055792304,
      "grad_norm": 0.6508946418762207,
      "learning_rate": 6.3442341018561835e-06,
      "loss": 0.016,
      "step": 1129620
    },
    {
      "epoch": 1.848680636017884,
      "grad_norm": 0.9193522334098816,
      "learning_rate": 6.344168209642667e-06,
      "loss": 0.0236,
      "step": 1129640
    },
    {
      "epoch": 1.8487133664565372,
      "grad_norm": 0.15414254367351532,
      "learning_rate": 6.344102317429149e-06,
      "loss": 0.0135,
      "step": 1129660
    },
    {
      "epoch": 1.8487460968951905,
      "grad_norm": 0.5845109224319458,
      "learning_rate": 6.344036425215633e-06,
      "loss": 0.0187,
      "step": 1129680
    },
    {
      "epoch": 1.848778827333844,
      "grad_norm": 0.9329329133033752,
      "learning_rate": 6.343970533002116e-06,
      "loss": 0.0212,
      "step": 1129700
    },
    {
      "epoch": 1.8488115577724973,
      "grad_norm": 0.629692554473877,
      "learning_rate": 6.343904640788598e-06,
      "loss": 0.017,
      "step": 1129720
    },
    {
      "epoch": 1.8488442882111507,
      "grad_norm": 0.7287863492965698,
      "learning_rate": 6.343838748575082e-06,
      "loss": 0.021,
      "step": 1129740
    },
    {
      "epoch": 1.8488770186498038,
      "grad_norm": 0.45682865381240845,
      "learning_rate": 6.343772856361564e-06,
      "loss": 0.0152,
      "step": 1129760
    },
    {
      "epoch": 1.8489097490884574,
      "grad_norm": 0.2392173856496811,
      "learning_rate": 6.343706964148047e-06,
      "loss": 0.0229,
      "step": 1129780
    },
    {
      "epoch": 1.8489424795271105,
      "grad_norm": 1.4406259059906006,
      "learning_rate": 6.34364107193453e-06,
      "loss": 0.0202,
      "step": 1129800
    },
    {
      "epoch": 1.848975209965764,
      "grad_norm": 1.1600695848464966,
      "learning_rate": 6.3435751797210135e-06,
      "loss": 0.0149,
      "step": 1129820
    },
    {
      "epoch": 1.8490079404044173,
      "grad_norm": 0.1748546063899994,
      "learning_rate": 6.343509287507495e-06,
      "loss": 0.0234,
      "step": 1129840
    },
    {
      "epoch": 1.8490406708430707,
      "grad_norm": 0.2186349630355835,
      "learning_rate": 6.343443395293979e-06,
      "loss": 0.0202,
      "step": 1129860
    },
    {
      "epoch": 1.849073401281724,
      "grad_norm": 1.405814290046692,
      "learning_rate": 6.343377503080461e-06,
      "loss": 0.0186,
      "step": 1129880
    },
    {
      "epoch": 1.8491061317203772,
      "grad_norm": 0.6853727698326111,
      "learning_rate": 6.3433116108669444e-06,
      "loss": 0.0156,
      "step": 1129900
    },
    {
      "epoch": 1.8491388621590308,
      "grad_norm": 0.2646333873271942,
      "learning_rate": 6.343245718653427e-06,
      "loss": 0.0152,
      "step": 1129920
    },
    {
      "epoch": 1.849171592597684,
      "grad_norm": 0.5928282141685486,
      "learning_rate": 6.34317982643991e-06,
      "loss": 0.0192,
      "step": 1129940
    },
    {
      "epoch": 1.8492043230363373,
      "grad_norm": 0.29699113965034485,
      "learning_rate": 6.343113934226393e-06,
      "loss": 0.0124,
      "step": 1129960
    },
    {
      "epoch": 1.8492370534749907,
      "grad_norm": 0.05777781084179878,
      "learning_rate": 6.343048042012876e-06,
      "loss": 0.0261,
      "step": 1129980
    },
    {
      "epoch": 1.849269783913644,
      "grad_norm": 0.9234185814857483,
      "learning_rate": 6.342982149799358e-06,
      "loss": 0.0239,
      "step": 1130000
    },
    {
      "epoch": 1.8493025143522974,
      "grad_norm": 0.23368243873119354,
      "learning_rate": 6.342916257585842e-06,
      "loss": 0.0189,
      "step": 1130020
    },
    {
      "epoch": 1.8493352447909506,
      "grad_norm": 0.888648271560669,
      "learning_rate": 6.342850365372325e-06,
      "loss": 0.0152,
      "step": 1130040
    },
    {
      "epoch": 1.8493679752296042,
      "grad_norm": 0.8617005348205566,
      "learning_rate": 6.342784473158807e-06,
      "loss": 0.0216,
      "step": 1130060
    },
    {
      "epoch": 1.8494007056682573,
      "grad_norm": 0.7143313884735107,
      "learning_rate": 6.342718580945291e-06,
      "loss": 0.0193,
      "step": 1130080
    },
    {
      "epoch": 1.8494334361069107,
      "grad_norm": 0.5342406630516052,
      "learning_rate": 6.342652688731773e-06,
      "loss": 0.0144,
      "step": 1130100
    },
    {
      "epoch": 1.849466166545564,
      "grad_norm": 1.0259774923324585,
      "learning_rate": 6.342586796518256e-06,
      "loss": 0.0221,
      "step": 1130120
    },
    {
      "epoch": 1.8494988969842174,
      "grad_norm": 0.6228278875350952,
      "learning_rate": 6.342520904304739e-06,
      "loss": 0.0177,
      "step": 1130140
    },
    {
      "epoch": 1.8495316274228708,
      "grad_norm": 0.31185564398765564,
      "learning_rate": 6.342455012091222e-06,
      "loss": 0.0115,
      "step": 1130160
    },
    {
      "epoch": 1.849564357861524,
      "grad_norm": 0.22770752012729645,
      "learning_rate": 6.3423891198777045e-06,
      "loss": 0.0172,
      "step": 1130180
    },
    {
      "epoch": 1.8495970883001775,
      "grad_norm": 0.19208678603172302,
      "learning_rate": 6.342323227664188e-06,
      "loss": 0.019,
      "step": 1130200
    },
    {
      "epoch": 1.8496298187388307,
      "grad_norm": 0.7387849688529968,
      "learning_rate": 6.34225733545067e-06,
      "loss": 0.0196,
      "step": 1130220
    },
    {
      "epoch": 1.849662549177484,
      "grad_norm": 1.0493870973587036,
      "learning_rate": 6.3421914432371536e-06,
      "loss": 0.0169,
      "step": 1130240
    },
    {
      "epoch": 1.8496952796161374,
      "grad_norm": 0.2071496546268463,
      "learning_rate": 6.3421255510236355e-06,
      "loss": 0.0215,
      "step": 1130260
    },
    {
      "epoch": 1.8497280100547906,
      "grad_norm": 0.4533250331878662,
      "learning_rate": 6.342059658810119e-06,
      "loss": 0.0126,
      "step": 1130280
    },
    {
      "epoch": 1.8497607404934442,
      "grad_norm": 0.3509043753147125,
      "learning_rate": 6.341993766596602e-06,
      "loss": 0.0149,
      "step": 1130300
    },
    {
      "epoch": 1.8497934709320973,
      "grad_norm": 0.33306267857551575,
      "learning_rate": 6.3419278743830845e-06,
      "loss": 0.0122,
      "step": 1130320
    },
    {
      "epoch": 1.849826201370751,
      "grad_norm": 0.13521462678909302,
      "learning_rate": 6.341861982169567e-06,
      "loss": 0.0195,
      "step": 1130340
    },
    {
      "epoch": 1.849858931809404,
      "grad_norm": 0.3211122751235962,
      "learning_rate": 6.341796089956051e-06,
      "loss": 0.0105,
      "step": 1130360
    },
    {
      "epoch": 1.8498916622480575,
      "grad_norm": 0.22800812125205994,
      "learning_rate": 6.341730197742533e-06,
      "loss": 0.0146,
      "step": 1130380
    },
    {
      "epoch": 1.8499243926867108,
      "grad_norm": 0.9378607273101807,
      "learning_rate": 6.341664305529016e-06,
      "loss": 0.0162,
      "step": 1130400
    },
    {
      "epoch": 1.849957123125364,
      "grad_norm": 0.2628033459186554,
      "learning_rate": 6.3415984133155e-06,
      "loss": 0.0245,
      "step": 1130420
    },
    {
      "epoch": 1.8499898535640176,
      "grad_norm": 0.7200304269790649,
      "learning_rate": 6.341532521101982e-06,
      "loss": 0.0167,
      "step": 1130440
    },
    {
      "epoch": 1.8500225840026707,
      "grad_norm": 0.4633418917655945,
      "learning_rate": 6.341466628888465e-06,
      "loss": 0.014,
      "step": 1130460
    },
    {
      "epoch": 1.850055314441324,
      "grad_norm": 0.8546851873397827,
      "learning_rate": 6.341400736674947e-06,
      "loss": 0.0201,
      "step": 1130480
    },
    {
      "epoch": 1.8500880448799775,
      "grad_norm": 0.574467122554779,
      "learning_rate": 6.341334844461431e-06,
      "loss": 0.0147,
      "step": 1130500
    },
    {
      "epoch": 1.8501207753186308,
      "grad_norm": 0.5289615392684937,
      "learning_rate": 6.341268952247913e-06,
      "loss": 0.0196,
      "step": 1130520
    },
    {
      "epoch": 1.8501535057572842,
      "grad_norm": 0.5025378465652466,
      "learning_rate": 6.341203060034396e-06,
      "loss": 0.0165,
      "step": 1130540
    },
    {
      "epoch": 1.8501862361959374,
      "grad_norm": 0.20163953304290771,
      "learning_rate": 6.341137167820879e-06,
      "loss": 0.019,
      "step": 1130560
    },
    {
      "epoch": 1.850218966634591,
      "grad_norm": 0.3653167486190796,
      "learning_rate": 6.341071275607362e-06,
      "loss": 0.018,
      "step": 1130580
    },
    {
      "epoch": 1.850251697073244,
      "grad_norm": 0.3387921452522278,
      "learning_rate": 6.3410053833938446e-06,
      "loss": 0.0178,
      "step": 1130600
    },
    {
      "epoch": 1.8502844275118975,
      "grad_norm": 0.5371093153953552,
      "learning_rate": 6.340939491180328e-06,
      "loss": 0.0209,
      "step": 1130620
    },
    {
      "epoch": 1.8503171579505509,
      "grad_norm": 0.1941695660352707,
      "learning_rate": 6.34087359896681e-06,
      "loss": 0.0198,
      "step": 1130640
    },
    {
      "epoch": 1.8503498883892042,
      "grad_norm": 0.12276505678892136,
      "learning_rate": 6.340807706753294e-06,
      "loss": 0.0194,
      "step": 1130660
    },
    {
      "epoch": 1.8503826188278576,
      "grad_norm": 0.6288204193115234,
      "learning_rate": 6.3407418145397755e-06,
      "loss": 0.0219,
      "step": 1130680
    },
    {
      "epoch": 1.8504153492665107,
      "grad_norm": 0.2854563295841217,
      "learning_rate": 6.340675922326259e-06,
      "loss": 0.0207,
      "step": 1130700
    },
    {
      "epoch": 1.8504480797051643,
      "grad_norm": 0.8257024884223938,
      "learning_rate": 6.340610030112742e-06,
      "loss": 0.0175,
      "step": 1130720
    },
    {
      "epoch": 1.8504808101438175,
      "grad_norm": 0.702170729637146,
      "learning_rate": 6.340544137899225e-06,
      "loss": 0.018,
      "step": 1130740
    },
    {
      "epoch": 1.8505135405824709,
      "grad_norm": 0.3209228813648224,
      "learning_rate": 6.340478245685708e-06,
      "loss": 0.0142,
      "step": 1130760
    },
    {
      "epoch": 1.8505462710211242,
      "grad_norm": 0.30261293053627014,
      "learning_rate": 6.340412353472191e-06,
      "loss": 0.0098,
      "step": 1130780
    },
    {
      "epoch": 1.8505790014597776,
      "grad_norm": 0.27649685740470886,
      "learning_rate": 6.340346461258674e-06,
      "loss": 0.0163,
      "step": 1130800
    },
    {
      "epoch": 1.850611731898431,
      "grad_norm": 0.84636390209198,
      "learning_rate": 6.340280569045156e-06,
      "loss": 0.0173,
      "step": 1130820
    },
    {
      "epoch": 1.8506444623370841,
      "grad_norm": 0.17949433624744415,
      "learning_rate": 6.34021467683164e-06,
      "loss": 0.0224,
      "step": 1130840
    },
    {
      "epoch": 1.8506771927757377,
      "grad_norm": 0.1536211371421814,
      "learning_rate": 6.340148784618122e-06,
      "loss": 0.0218,
      "step": 1130860
    },
    {
      "epoch": 1.8507099232143909,
      "grad_norm": 0.6847289800643921,
      "learning_rate": 6.3400828924046055e-06,
      "loss": 0.0209,
      "step": 1130880
    },
    {
      "epoch": 1.8507426536530442,
      "grad_norm": 0.4966823160648346,
      "learning_rate": 6.340017000191087e-06,
      "loss": 0.0144,
      "step": 1130900
    },
    {
      "epoch": 1.8507753840916976,
      "grad_norm": 0.524534285068512,
      "learning_rate": 6.339951107977571e-06,
      "loss": 0.0146,
      "step": 1130920
    },
    {
      "epoch": 1.850808114530351,
      "grad_norm": 0.3715192675590515,
      "learning_rate": 6.339885215764054e-06,
      "loss": 0.0223,
      "step": 1130940
    },
    {
      "epoch": 1.8508408449690044,
      "grad_norm": 0.8288831114768982,
      "learning_rate": 6.3398193235505364e-06,
      "loss": 0.0174,
      "step": 1130960
    },
    {
      "epoch": 1.8508735754076575,
      "grad_norm": 0.2383943349123001,
      "learning_rate": 6.339753431337019e-06,
      "loss": 0.0106,
      "step": 1130980
    },
    {
      "epoch": 1.850906305846311,
      "grad_norm": 1.2633875608444214,
      "learning_rate": 6.339687539123503e-06,
      "loss": 0.019,
      "step": 1131000
    },
    {
      "epoch": 1.8509390362849643,
      "grad_norm": 0.29779207706451416,
      "learning_rate": 6.339621646909985e-06,
      "loss": 0.0153,
      "step": 1131020
    },
    {
      "epoch": 1.8509717667236176,
      "grad_norm": 0.50479656457901,
      "learning_rate": 6.339555754696468e-06,
      "loss": 0.0188,
      "step": 1131040
    },
    {
      "epoch": 1.851004497162271,
      "grad_norm": 0.6985387206077576,
      "learning_rate": 6.33948986248295e-06,
      "loss": 0.02,
      "step": 1131060
    },
    {
      "epoch": 1.8510372276009242,
      "grad_norm": 1.0447899103164673,
      "learning_rate": 6.339423970269434e-06,
      "loss": 0.0156,
      "step": 1131080
    },
    {
      "epoch": 1.8510699580395777,
      "grad_norm": 0.7800400853157043,
      "learning_rate": 6.339358078055917e-06,
      "loss": 0.0259,
      "step": 1131100
    },
    {
      "epoch": 1.851102688478231,
      "grad_norm": 0.8987565040588379,
      "learning_rate": 6.339292185842399e-06,
      "loss": 0.0212,
      "step": 1131120
    },
    {
      "epoch": 1.8511354189168845,
      "grad_norm": 0.3892028033733368,
      "learning_rate": 6.339226293628883e-06,
      "loss": 0.0183,
      "step": 1131140
    },
    {
      "epoch": 1.8511681493555376,
      "grad_norm": 0.7441497445106506,
      "learning_rate": 6.3391604014153655e-06,
      "loss": 0.0155,
      "step": 1131160
    },
    {
      "epoch": 1.851200879794191,
      "grad_norm": 0.21333402395248413,
      "learning_rate": 6.339094509201848e-06,
      "loss": 0.0174,
      "step": 1131180
    },
    {
      "epoch": 1.8512336102328444,
      "grad_norm": 0.05510648712515831,
      "learning_rate": 6.339028616988331e-06,
      "loss": 0.0202,
      "step": 1131200
    },
    {
      "epoch": 1.8512663406714975,
      "grad_norm": 0.1905769407749176,
      "learning_rate": 6.338962724774815e-06,
      "loss": 0.0119,
      "step": 1131220
    },
    {
      "epoch": 1.8512990711101511,
      "grad_norm": 0.7376692295074463,
      "learning_rate": 6.3388968325612965e-06,
      "loss": 0.0203,
      "step": 1131240
    },
    {
      "epoch": 1.8513318015488043,
      "grad_norm": 0.5198673605918884,
      "learning_rate": 6.33883094034778e-06,
      "loss": 0.0152,
      "step": 1131260
    },
    {
      "epoch": 1.8513645319874577,
      "grad_norm": 0.6158515810966492,
      "learning_rate": 6.338765048134262e-06,
      "loss": 0.013,
      "step": 1131280
    },
    {
      "epoch": 1.851397262426111,
      "grad_norm": 0.25732752680778503,
      "learning_rate": 6.3386991559207455e-06,
      "loss": 0.0208,
      "step": 1131300
    },
    {
      "epoch": 1.8514299928647644,
      "grad_norm": 0.4108847975730896,
      "learning_rate": 6.338633263707228e-06,
      "loss": 0.0182,
      "step": 1131320
    },
    {
      "epoch": 1.8514627233034178,
      "grad_norm": 0.12378191202878952,
      "learning_rate": 6.338567371493711e-06,
      "loss": 0.0292,
      "step": 1131340
    },
    {
      "epoch": 1.851495453742071,
      "grad_norm": 0.17138153314590454,
      "learning_rate": 6.338501479280194e-06,
      "loss": 0.0139,
      "step": 1131360
    },
    {
      "epoch": 1.8515281841807245,
      "grad_norm": 0.16821224987506866,
      "learning_rate": 6.338435587066677e-06,
      "loss": 0.0197,
      "step": 1131380
    },
    {
      "epoch": 1.8515609146193777,
      "grad_norm": 0.8926171064376831,
      "learning_rate": 6.338369694853159e-06,
      "loss": 0.0378,
      "step": 1131400
    },
    {
      "epoch": 1.851593645058031,
      "grad_norm": 0.46923530101776123,
      "learning_rate": 6.338303802639643e-06,
      "loss": 0.0134,
      "step": 1131420
    },
    {
      "epoch": 1.8516263754966844,
      "grad_norm": 0.9001820087432861,
      "learning_rate": 6.338237910426126e-06,
      "loss": 0.0182,
      "step": 1131440
    },
    {
      "epoch": 1.8516591059353378,
      "grad_norm": 0.3059546947479248,
      "learning_rate": 6.338172018212608e-06,
      "loss": 0.0104,
      "step": 1131460
    },
    {
      "epoch": 1.8516918363739912,
      "grad_norm": 0.5173782110214233,
      "learning_rate": 6.338106125999092e-06,
      "loss": 0.0166,
      "step": 1131480
    },
    {
      "epoch": 1.8517245668126443,
      "grad_norm": 0.2638733386993408,
      "learning_rate": 6.338040233785574e-06,
      "loss": 0.0179,
      "step": 1131500
    },
    {
      "epoch": 1.851757297251298,
      "grad_norm": 0.39390626549720764,
      "learning_rate": 6.337974341572057e-06,
      "loss": 0.0189,
      "step": 1131520
    },
    {
      "epoch": 1.851790027689951,
      "grad_norm": 0.7460911870002747,
      "learning_rate": 6.337908449358539e-06,
      "loss": 0.0178,
      "step": 1131540
    },
    {
      "epoch": 1.8518227581286044,
      "grad_norm": 0.675254225730896,
      "learning_rate": 6.337842557145023e-06,
      "loss": 0.0226,
      "step": 1131560
    },
    {
      "epoch": 1.8518554885672578,
      "grad_norm": 0.4119325578212738,
      "learning_rate": 6.337776664931506e-06,
      "loss": 0.021,
      "step": 1131580
    },
    {
      "epoch": 1.8518882190059112,
      "grad_norm": 0.26093193888664246,
      "learning_rate": 6.337710772717988e-06,
      "loss": 0.0158,
      "step": 1131600
    },
    {
      "epoch": 1.8519209494445645,
      "grad_norm": 0.7210449576377869,
      "learning_rate": 6.337644880504471e-06,
      "loss": 0.013,
      "step": 1131620
    },
    {
      "epoch": 1.8519536798832177,
      "grad_norm": 0.5659429430961609,
      "learning_rate": 6.337578988290955e-06,
      "loss": 0.0119,
      "step": 1131640
    },
    {
      "epoch": 1.8519864103218713,
      "grad_norm": 0.18153773248195648,
      "learning_rate": 6.3375130960774366e-06,
      "loss": 0.0227,
      "step": 1131660
    },
    {
      "epoch": 1.8520191407605244,
      "grad_norm": 0.8154892325401306,
      "learning_rate": 6.33744720386392e-06,
      "loss": 0.0198,
      "step": 1131680
    },
    {
      "epoch": 1.8520518711991778,
      "grad_norm": 0.10742669552564621,
      "learning_rate": 6.337381311650402e-06,
      "loss": 0.0194,
      "step": 1131700
    },
    {
      "epoch": 1.8520846016378312,
      "grad_norm": 0.8678341507911682,
      "learning_rate": 6.337315419436886e-06,
      "loss": 0.0146,
      "step": 1131720
    },
    {
      "epoch": 1.8521173320764843,
      "grad_norm": 0.2743055522441864,
      "learning_rate": 6.337249527223368e-06,
      "loss": 0.0123,
      "step": 1131740
    },
    {
      "epoch": 1.852150062515138,
      "grad_norm": 0.23679475486278534,
      "learning_rate": 6.337183635009851e-06,
      "loss": 0.019,
      "step": 1131760
    },
    {
      "epoch": 1.852182792953791,
      "grad_norm": 1.2820146083831787,
      "learning_rate": 6.337117742796334e-06,
      "loss": 0.013,
      "step": 1131780
    },
    {
      "epoch": 1.8522155233924447,
      "grad_norm": 0.3543722927570343,
      "learning_rate": 6.3370518505828174e-06,
      "loss": 0.0162,
      "step": 1131800
    },
    {
      "epoch": 1.8522482538310978,
      "grad_norm": 0.9355912208557129,
      "learning_rate": 6.3369859583693e-06,
      "loss": 0.0235,
      "step": 1131820
    },
    {
      "epoch": 1.8522809842697512,
      "grad_norm": 0.7592072486877441,
      "learning_rate": 6.336920066155783e-06,
      "loss": 0.0176,
      "step": 1131840
    },
    {
      "epoch": 1.8523137147084046,
      "grad_norm": 0.2767462730407715,
      "learning_rate": 6.3368541739422665e-06,
      "loss": 0.0199,
      "step": 1131860
    },
    {
      "epoch": 1.8523464451470577,
      "grad_norm": 1.4704142808914185,
      "learning_rate": 6.336788281728748e-06,
      "loss": 0.013,
      "step": 1131880
    },
    {
      "epoch": 1.8523791755857113,
      "grad_norm": 0.20237690210342407,
      "learning_rate": 6.336722389515232e-06,
      "loss": 0.02,
      "step": 1131900
    },
    {
      "epoch": 1.8524119060243645,
      "grad_norm": 1.0599159002304077,
      "learning_rate": 6.336656497301714e-06,
      "loss": 0.0201,
      "step": 1131920
    },
    {
      "epoch": 1.8524446364630178,
      "grad_norm": 0.6175066232681274,
      "learning_rate": 6.3365906050881975e-06,
      "loss": 0.0234,
      "step": 1131940
    },
    {
      "epoch": 1.8524773669016712,
      "grad_norm": 0.7364146709442139,
      "learning_rate": 6.33652471287468e-06,
      "loss": 0.0217,
      "step": 1131960
    },
    {
      "epoch": 1.8525100973403246,
      "grad_norm": 0.5089961886405945,
      "learning_rate": 6.336458820661163e-06,
      "loss": 0.0227,
      "step": 1131980
    },
    {
      "epoch": 1.852542827778978,
      "grad_norm": 0.610695481300354,
      "learning_rate": 6.336392928447646e-06,
      "loss": 0.0202,
      "step": 1132000
    },
    {
      "epoch": 1.852575558217631,
      "grad_norm": 0.36293768882751465,
      "learning_rate": 6.336327036234129e-06,
      "loss": 0.0141,
      "step": 1132020
    },
    {
      "epoch": 1.8526082886562847,
      "grad_norm": 0.3245810568332672,
      "learning_rate": 6.336261144020611e-06,
      "loss": 0.0143,
      "step": 1132040
    },
    {
      "epoch": 1.8526410190949378,
      "grad_norm": 0.7039110064506531,
      "learning_rate": 6.336195251807095e-06,
      "loss": 0.0152,
      "step": 1132060
    },
    {
      "epoch": 1.8526737495335912,
      "grad_norm": 0.3529643714427948,
      "learning_rate": 6.336129359593577e-06,
      "loss": 0.0131,
      "step": 1132080
    },
    {
      "epoch": 1.8527064799722446,
      "grad_norm": 0.30800026655197144,
      "learning_rate": 6.33606346738006e-06,
      "loss": 0.0174,
      "step": 1132100
    },
    {
      "epoch": 1.852739210410898,
      "grad_norm": 0.3027467131614685,
      "learning_rate": 6.335997575166543e-06,
      "loss": 0.0194,
      "step": 1132120
    },
    {
      "epoch": 1.8527719408495513,
      "grad_norm": 0.3997223377227783,
      "learning_rate": 6.335931682953026e-06,
      "loss": 0.0175,
      "step": 1132140
    },
    {
      "epoch": 1.8528046712882045,
      "grad_norm": 0.8505234122276306,
      "learning_rate": 6.335865790739509e-06,
      "loss": 0.0176,
      "step": 1132160
    },
    {
      "epoch": 1.852837401726858,
      "grad_norm": 0.2554413974285126,
      "learning_rate": 6.335799898525992e-06,
      "loss": 0.0161,
      "step": 1132180
    },
    {
      "epoch": 1.8528701321655112,
      "grad_norm": 0.3343108594417572,
      "learning_rate": 6.335734006312475e-06,
      "loss": 0.023,
      "step": 1132200
    },
    {
      "epoch": 1.8529028626041646,
      "grad_norm": 0.6527360677719116,
      "learning_rate": 6.3356681140989575e-06,
      "loss": 0.0176,
      "step": 1132220
    },
    {
      "epoch": 1.852935593042818,
      "grad_norm": 0.6286351084709167,
      "learning_rate": 6.335602221885441e-06,
      "loss": 0.0145,
      "step": 1132240
    },
    {
      "epoch": 1.8529683234814713,
      "grad_norm": 0.7496854066848755,
      "learning_rate": 6.335536329671923e-06,
      "loss": 0.0177,
      "step": 1132260
    },
    {
      "epoch": 1.8530010539201247,
      "grad_norm": 0.39042410254478455,
      "learning_rate": 6.3354704374584066e-06,
      "loss": 0.0187,
      "step": 1132280
    },
    {
      "epoch": 1.8530337843587779,
      "grad_norm": 0.3772192597389221,
      "learning_rate": 6.3354045452448885e-06,
      "loss": 0.0199,
      "step": 1132300
    },
    {
      "epoch": 1.8530665147974315,
      "grad_norm": 0.10908275097608566,
      "learning_rate": 6.335338653031372e-06,
      "loss": 0.0154,
      "step": 1132320
    },
    {
      "epoch": 1.8530992452360846,
      "grad_norm": 0.24733524024486542,
      "learning_rate": 6.335272760817854e-06,
      "loss": 0.0106,
      "step": 1132340
    },
    {
      "epoch": 1.853131975674738,
      "grad_norm": 0.6949149966239929,
      "learning_rate": 6.3352068686043375e-06,
      "loss": 0.0143,
      "step": 1132360
    },
    {
      "epoch": 1.8531647061133913,
      "grad_norm": 0.3426445424556732,
      "learning_rate": 6.33514097639082e-06,
      "loss": 0.0144,
      "step": 1132380
    },
    {
      "epoch": 1.8531974365520447,
      "grad_norm": 0.9155674576759338,
      "learning_rate": 6.335075084177304e-06,
      "loss": 0.0165,
      "step": 1132400
    },
    {
      "epoch": 1.853230166990698,
      "grad_norm": 0.20005258917808533,
      "learning_rate": 6.335009191963786e-06,
      "loss": 0.0157,
      "step": 1132420
    },
    {
      "epoch": 1.8532628974293512,
      "grad_norm": 0.07706261426210403,
      "learning_rate": 6.334943299750269e-06,
      "loss": 0.0279,
      "step": 1132440
    },
    {
      "epoch": 1.8532956278680048,
      "grad_norm": 0.5128179788589478,
      "learning_rate": 6.334877407536751e-06,
      "loss": 0.0208,
      "step": 1132460
    },
    {
      "epoch": 1.853328358306658,
      "grad_norm": 0.1814817488193512,
      "learning_rate": 6.334811515323235e-06,
      "loss": 0.0212,
      "step": 1132480
    },
    {
      "epoch": 1.8533610887453114,
      "grad_norm": 0.2710743844509125,
      "learning_rate": 6.334745623109718e-06,
      "loss": 0.0247,
      "step": 1132500
    },
    {
      "epoch": 1.8533938191839647,
      "grad_norm": 0.9207590818405151,
      "learning_rate": 6.3346797308962e-06,
      "loss": 0.0199,
      "step": 1132520
    },
    {
      "epoch": 1.8534265496226179,
      "grad_norm": 0.39159095287323,
      "learning_rate": 6.334613838682684e-06,
      "loss": 0.0252,
      "step": 1132540
    },
    {
      "epoch": 1.8534592800612715,
      "grad_norm": 0.29926225543022156,
      "learning_rate": 6.334547946469166e-06,
      "loss": 0.0145,
      "step": 1132560
    },
    {
      "epoch": 1.8534920104999246,
      "grad_norm": 0.20512093603610992,
      "learning_rate": 6.334482054255649e-06,
      "loss": 0.0183,
      "step": 1132580
    },
    {
      "epoch": 1.8535247409385782,
      "grad_norm": 0.7902443408966064,
      "learning_rate": 6.334416162042132e-06,
      "loss": 0.0151,
      "step": 1132600
    },
    {
      "epoch": 1.8535574713772314,
      "grad_norm": 0.3124261200428009,
      "learning_rate": 6.334350269828615e-06,
      "loss": 0.0182,
      "step": 1132620
    },
    {
      "epoch": 1.8535902018158847,
      "grad_norm": 0.23301635682582855,
      "learning_rate": 6.334284377615098e-06,
      "loss": 0.0183,
      "step": 1132640
    },
    {
      "epoch": 1.8536229322545381,
      "grad_norm": 0.26528269052505493,
      "learning_rate": 6.334218485401581e-06,
      "loss": 0.0188,
      "step": 1132660
    },
    {
      "epoch": 1.8536556626931913,
      "grad_norm": 0.23459866642951965,
      "learning_rate": 6.334152593188063e-06,
      "loss": 0.0172,
      "step": 1132680
    },
    {
      "epoch": 1.8536883931318449,
      "grad_norm": 0.4688413739204407,
      "learning_rate": 6.334086700974547e-06,
      "loss": 0.0245,
      "step": 1132700
    },
    {
      "epoch": 1.853721123570498,
      "grad_norm": 0.3665996193885803,
      "learning_rate": 6.3340208087610285e-06,
      "loss": 0.0152,
      "step": 1132720
    },
    {
      "epoch": 1.8537538540091514,
      "grad_norm": 0.6705021262168884,
      "learning_rate": 6.333954916547512e-06,
      "loss": 0.0195,
      "step": 1132740
    },
    {
      "epoch": 1.8537865844478048,
      "grad_norm": 0.4109605550765991,
      "learning_rate": 6.333889024333995e-06,
      "loss": 0.0121,
      "step": 1132760
    },
    {
      "epoch": 1.8538193148864581,
      "grad_norm": 0.1788579672574997,
      "learning_rate": 6.333823132120478e-06,
      "loss": 0.0131,
      "step": 1132780
    },
    {
      "epoch": 1.8538520453251115,
      "grad_norm": 0.18229393661022186,
      "learning_rate": 6.33375723990696e-06,
      "loss": 0.0196,
      "step": 1132800
    },
    {
      "epoch": 1.8538847757637646,
      "grad_norm": 0.5796471238136292,
      "learning_rate": 6.333691347693444e-06,
      "loss": 0.0212,
      "step": 1132820
    },
    {
      "epoch": 1.8539175062024182,
      "grad_norm": 0.2834290564060211,
      "learning_rate": 6.333625455479926e-06,
      "loss": 0.0134,
      "step": 1132840
    },
    {
      "epoch": 1.8539502366410714,
      "grad_norm": 0.3503863513469696,
      "learning_rate": 6.333559563266409e-06,
      "loss": 0.0158,
      "step": 1132860
    },
    {
      "epoch": 1.8539829670797248,
      "grad_norm": 0.23100584745407104,
      "learning_rate": 6.333493671052893e-06,
      "loss": 0.0166,
      "step": 1132880
    },
    {
      "epoch": 1.8540156975183781,
      "grad_norm": 0.2323499470949173,
      "learning_rate": 6.333427778839375e-06,
      "loss": 0.0186,
      "step": 1132900
    },
    {
      "epoch": 1.8540484279570315,
      "grad_norm": 0.3290567100048065,
      "learning_rate": 6.3333618866258585e-06,
      "loss": 0.0192,
      "step": 1132920
    },
    {
      "epoch": 1.8540811583956849,
      "grad_norm": 0.39583820104599,
      "learning_rate": 6.33329599441234e-06,
      "loss": 0.0142,
      "step": 1132940
    },
    {
      "epoch": 1.854113888834338,
      "grad_norm": 0.221237912774086,
      "learning_rate": 6.333230102198824e-06,
      "loss": 0.0094,
      "step": 1132960
    },
    {
      "epoch": 1.8541466192729916,
      "grad_norm": 0.4053466320037842,
      "learning_rate": 6.333164209985307e-06,
      "loss": 0.0162,
      "step": 1132980
    },
    {
      "epoch": 1.8541793497116448,
      "grad_norm": 0.4836473762989044,
      "learning_rate": 6.3330983177717894e-06,
      "loss": 0.0225,
      "step": 1133000
    },
    {
      "epoch": 1.8542120801502981,
      "grad_norm": 0.23846444487571716,
      "learning_rate": 6.333032425558272e-06,
      "loss": 0.0157,
      "step": 1133020
    },
    {
      "epoch": 1.8542448105889515,
      "grad_norm": 0.87366783618927,
      "learning_rate": 6.332966533344756e-06,
      "loss": 0.0159,
      "step": 1133040
    },
    {
      "epoch": 1.854277541027605,
      "grad_norm": 1.3764002323150635,
      "learning_rate": 6.332900641131238e-06,
      "loss": 0.0175,
      "step": 1133060
    },
    {
      "epoch": 1.8543102714662583,
      "grad_norm": 0.41762417554855347,
      "learning_rate": 6.332834748917721e-06,
      "loss": 0.0172,
      "step": 1133080
    },
    {
      "epoch": 1.8543430019049114,
      "grad_norm": 0.48206502199172974,
      "learning_rate": 6.332768856704203e-06,
      "loss": 0.0224,
      "step": 1133100
    },
    {
      "epoch": 1.854375732343565,
      "grad_norm": 0.10804613679647446,
      "learning_rate": 6.332702964490687e-06,
      "loss": 0.0132,
      "step": 1133120
    },
    {
      "epoch": 1.8544084627822182,
      "grad_norm": 3.123772621154785,
      "learning_rate": 6.3326370722771695e-06,
      "loss": 0.0164,
      "step": 1133140
    },
    {
      "epoch": 1.8544411932208715,
      "grad_norm": 0.3236117362976074,
      "learning_rate": 6.332571180063652e-06,
      "loss": 0.0195,
      "step": 1133160
    },
    {
      "epoch": 1.854473923659525,
      "grad_norm": 1.702924132347107,
      "learning_rate": 6.332505287850135e-06,
      "loss": 0.0236,
      "step": 1133180
    },
    {
      "epoch": 1.8545066540981783,
      "grad_norm": 0.39646807312965393,
      "learning_rate": 6.3324393956366185e-06,
      "loss": 0.0179,
      "step": 1133200
    },
    {
      "epoch": 1.8545393845368316,
      "grad_norm": 0.47101032733917236,
      "learning_rate": 6.332373503423101e-06,
      "loss": 0.0212,
      "step": 1133220
    },
    {
      "epoch": 1.8545721149754848,
      "grad_norm": 0.1383814662694931,
      "learning_rate": 6.332307611209584e-06,
      "loss": 0.0129,
      "step": 1133240
    },
    {
      "epoch": 1.8546048454141384,
      "grad_norm": 0.5420436859130859,
      "learning_rate": 6.332241718996068e-06,
      "loss": 0.017,
      "step": 1133260
    },
    {
      "epoch": 1.8546375758527915,
      "grad_norm": 0.535454511642456,
      "learning_rate": 6.3321758267825495e-06,
      "loss": 0.0179,
      "step": 1133280
    },
    {
      "epoch": 1.854670306291445,
      "grad_norm": 0.13669228553771973,
      "learning_rate": 6.332109934569033e-06,
      "loss": 0.0189,
      "step": 1133300
    },
    {
      "epoch": 1.8547030367300983,
      "grad_norm": 0.9305263757705688,
      "learning_rate": 6.332044042355515e-06,
      "loss": 0.0148,
      "step": 1133320
    },
    {
      "epoch": 1.8547357671687514,
      "grad_norm": 0.13475759327411652,
      "learning_rate": 6.3319781501419986e-06,
      "loss": 0.0224,
      "step": 1133340
    },
    {
      "epoch": 1.854768497607405,
      "grad_norm": 0.3345103859901428,
      "learning_rate": 6.3319122579284804e-06,
      "loss": 0.0123,
      "step": 1133360
    },
    {
      "epoch": 1.8548012280460582,
      "grad_norm": 0.32453617453575134,
      "learning_rate": 6.331846365714964e-06,
      "loss": 0.0178,
      "step": 1133380
    },
    {
      "epoch": 1.8548339584847118,
      "grad_norm": 0.6605687141418457,
      "learning_rate": 6.331780473501447e-06,
      "loss": 0.018,
      "step": 1133400
    },
    {
      "epoch": 1.854866688923365,
      "grad_norm": 0.5420171618461609,
      "learning_rate": 6.3317145812879295e-06,
      "loss": 0.0151,
      "step": 1133420
    },
    {
      "epoch": 1.8548994193620183,
      "grad_norm": 0.6320105791091919,
      "learning_rate": 6.331648689074412e-06,
      "loss": 0.0295,
      "step": 1133440
    },
    {
      "epoch": 1.8549321498006717,
      "grad_norm": 1.4015394449234009,
      "learning_rate": 6.331582796860896e-06,
      "loss": 0.0191,
      "step": 1133460
    },
    {
      "epoch": 1.8549648802393248,
      "grad_norm": 0.2505245804786682,
      "learning_rate": 6.331516904647378e-06,
      "loss": 0.0186,
      "step": 1133480
    },
    {
      "epoch": 1.8549976106779784,
      "grad_norm": 0.5916975140571594,
      "learning_rate": 6.331451012433861e-06,
      "loss": 0.014,
      "step": 1133500
    },
    {
      "epoch": 1.8550303411166316,
      "grad_norm": 0.20941562950611115,
      "learning_rate": 6.331385120220343e-06,
      "loss": 0.0164,
      "step": 1133520
    },
    {
      "epoch": 1.855063071555285,
      "grad_norm": 0.5657663345336914,
      "learning_rate": 6.331319228006827e-06,
      "loss": 0.0163,
      "step": 1133540
    },
    {
      "epoch": 1.8550958019939383,
      "grad_norm": 0.25777485966682434,
      "learning_rate": 6.33125333579331e-06,
      "loss": 0.0117,
      "step": 1133560
    },
    {
      "epoch": 1.8551285324325917,
      "grad_norm": 0.5116323232650757,
      "learning_rate": 6.331187443579792e-06,
      "loss": 0.0188,
      "step": 1133580
    },
    {
      "epoch": 1.855161262871245,
      "grad_norm": 0.11876428127288818,
      "learning_rate": 6.331121551366276e-06,
      "loss": 0.0176,
      "step": 1133600
    },
    {
      "epoch": 1.8551939933098982,
      "grad_norm": 0.4194844365119934,
      "learning_rate": 6.331055659152759e-06,
      "loss": 0.0153,
      "step": 1133620
    },
    {
      "epoch": 1.8552267237485518,
      "grad_norm": 0.3626892566680908,
      "learning_rate": 6.330989766939241e-06,
      "loss": 0.013,
      "step": 1133640
    },
    {
      "epoch": 1.855259454187205,
      "grad_norm": 0.25380998849868774,
      "learning_rate": 6.330923874725724e-06,
      "loss": 0.0167,
      "step": 1133660
    },
    {
      "epoch": 1.8552921846258583,
      "grad_norm": 0.668476939201355,
      "learning_rate": 6.330857982512208e-06,
      "loss": 0.0179,
      "step": 1133680
    },
    {
      "epoch": 1.8553249150645117,
      "grad_norm": 0.49282488226890564,
      "learning_rate": 6.3307920902986896e-06,
      "loss": 0.0166,
      "step": 1133700
    },
    {
      "epoch": 1.855357645503165,
      "grad_norm": 0.8348429799079895,
      "learning_rate": 6.330726198085173e-06,
      "loss": 0.0174,
      "step": 1133720
    },
    {
      "epoch": 1.8553903759418184,
      "grad_norm": 0.13706521689891815,
      "learning_rate": 6.330660305871655e-06,
      "loss": 0.0161,
      "step": 1133740
    },
    {
      "epoch": 1.8554231063804716,
      "grad_norm": 0.3450086712837219,
      "learning_rate": 6.330594413658139e-06,
      "loss": 0.0219,
      "step": 1133760
    },
    {
      "epoch": 1.8554558368191252,
      "grad_norm": 0.39521530270576477,
      "learning_rate": 6.330528521444621e-06,
      "loss": 0.0215,
      "step": 1133780
    },
    {
      "epoch": 1.8554885672577783,
      "grad_norm": 0.4679190516471863,
      "learning_rate": 6.330462629231104e-06,
      "loss": 0.0194,
      "step": 1133800
    },
    {
      "epoch": 1.8555212976964317,
      "grad_norm": 0.31467393040657043,
      "learning_rate": 6.330396737017587e-06,
      "loss": 0.0168,
      "step": 1133820
    },
    {
      "epoch": 1.855554028135085,
      "grad_norm": 0.14826591312885284,
      "learning_rate": 6.3303308448040704e-06,
      "loss": 0.0092,
      "step": 1133840
    },
    {
      "epoch": 1.8555867585737384,
      "grad_norm": 0.4792678952217102,
      "learning_rate": 6.330264952590552e-06,
      "loss": 0.0252,
      "step": 1133860
    },
    {
      "epoch": 1.8556194890123918,
      "grad_norm": 0.35501131415367126,
      "learning_rate": 6.330199060377036e-06,
      "loss": 0.0148,
      "step": 1133880
    },
    {
      "epoch": 1.855652219451045,
      "grad_norm": 0.21453255414962769,
      "learning_rate": 6.330133168163518e-06,
      "loss": 0.014,
      "step": 1133900
    },
    {
      "epoch": 1.8556849498896986,
      "grad_norm": 0.31455859541893005,
      "learning_rate": 6.330067275950001e-06,
      "loss": 0.0213,
      "step": 1133920
    },
    {
      "epoch": 1.8557176803283517,
      "grad_norm": 0.9077568054199219,
      "learning_rate": 6.330001383736485e-06,
      "loss": 0.0212,
      "step": 1133940
    },
    {
      "epoch": 1.855750410767005,
      "grad_norm": 0.7715252637863159,
      "learning_rate": 6.329935491522967e-06,
      "loss": 0.0286,
      "step": 1133960
    },
    {
      "epoch": 1.8557831412056585,
      "grad_norm": 0.23157177865505219,
      "learning_rate": 6.3298695993094505e-06,
      "loss": 0.0168,
      "step": 1133980
    },
    {
      "epoch": 1.8558158716443118,
      "grad_norm": 0.6778032183647156,
      "learning_rate": 6.329803707095933e-06,
      "loss": 0.0174,
      "step": 1134000
    },
    {
      "epoch": 1.8558486020829652,
      "grad_norm": 0.2618899345397949,
      "learning_rate": 6.329737814882416e-06,
      "loss": 0.0174,
      "step": 1134020
    },
    {
      "epoch": 1.8558813325216184,
      "grad_norm": 0.19780921936035156,
      "learning_rate": 6.329671922668899e-06,
      "loss": 0.0183,
      "step": 1134040
    },
    {
      "epoch": 1.855914062960272,
      "grad_norm": 0.7876753807067871,
      "learning_rate": 6.329606030455382e-06,
      "loss": 0.0234,
      "step": 1134060
    },
    {
      "epoch": 1.855946793398925,
      "grad_norm": 0.743047297000885,
      "learning_rate": 6.329540138241864e-06,
      "loss": 0.0155,
      "step": 1134080
    },
    {
      "epoch": 1.8559795238375785,
      "grad_norm": 0.22548004984855652,
      "learning_rate": 6.329474246028348e-06,
      "loss": 0.0148,
      "step": 1134100
    },
    {
      "epoch": 1.8560122542762318,
      "grad_norm": 0.6849916577339172,
      "learning_rate": 6.32940835381483e-06,
      "loss": 0.0225,
      "step": 1134120
    },
    {
      "epoch": 1.856044984714885,
      "grad_norm": 0.531255841255188,
      "learning_rate": 6.329342461601313e-06,
      "loss": 0.019,
      "step": 1134140
    },
    {
      "epoch": 1.8560777151535386,
      "grad_norm": 0.6008460521697998,
      "learning_rate": 6.329276569387796e-06,
      "loss": 0.0229,
      "step": 1134160
    },
    {
      "epoch": 1.8561104455921917,
      "grad_norm": 0.18030065298080444,
      "learning_rate": 6.329210677174279e-06,
      "loss": 0.011,
      "step": 1134180
    },
    {
      "epoch": 1.856143176030845,
      "grad_norm": 0.32691001892089844,
      "learning_rate": 6.3291447849607614e-06,
      "loss": 0.0235,
      "step": 1134200
    },
    {
      "epoch": 1.8561759064694985,
      "grad_norm": 0.9909719824790955,
      "learning_rate": 6.329078892747245e-06,
      "loss": 0.0165,
      "step": 1134220
    },
    {
      "epoch": 1.8562086369081519,
      "grad_norm": 0.8156806230545044,
      "learning_rate": 6.329013000533727e-06,
      "loss": 0.0205,
      "step": 1134240
    },
    {
      "epoch": 1.8562413673468052,
      "grad_norm": 0.8221979737281799,
      "learning_rate": 6.3289471083202105e-06,
      "loss": 0.0242,
      "step": 1134260
    },
    {
      "epoch": 1.8562740977854584,
      "grad_norm": 0.09924633800983429,
      "learning_rate": 6.328881216106694e-06,
      "loss": 0.0154,
      "step": 1134280
    },
    {
      "epoch": 1.856306828224112,
      "grad_norm": 0.9234494566917419,
      "learning_rate": 6.328815323893176e-06,
      "loss": 0.0166,
      "step": 1134300
    },
    {
      "epoch": 1.8563395586627651,
      "grad_norm": 0.6888952255249023,
      "learning_rate": 6.32874943167966e-06,
      "loss": 0.0267,
      "step": 1134320
    },
    {
      "epoch": 1.8563722891014185,
      "grad_norm": 2.0592448711395264,
      "learning_rate": 6.3286835394661415e-06,
      "loss": 0.0248,
      "step": 1134340
    },
    {
      "epoch": 1.8564050195400719,
      "grad_norm": 0.7526177763938904,
      "learning_rate": 6.328617647252625e-06,
      "loss": 0.0138,
      "step": 1134360
    },
    {
      "epoch": 1.8564377499787252,
      "grad_norm": 0.6024923920631409,
      "learning_rate": 6.328551755039107e-06,
      "loss": 0.0215,
      "step": 1134380
    },
    {
      "epoch": 1.8564704804173786,
      "grad_norm": 1.3691213130950928,
      "learning_rate": 6.3284858628255905e-06,
      "loss": 0.0246,
      "step": 1134400
    },
    {
      "epoch": 1.8565032108560318,
      "grad_norm": 0.2818780839443207,
      "learning_rate": 6.328419970612073e-06,
      "loss": 0.0181,
      "step": 1134420
    },
    {
      "epoch": 1.8565359412946854,
      "grad_norm": 0.2727467715740204,
      "learning_rate": 6.328354078398556e-06,
      "loss": 0.0151,
      "step": 1134440
    },
    {
      "epoch": 1.8565686717333385,
      "grad_norm": 0.34656623005867004,
      "learning_rate": 6.328288186185039e-06,
      "loss": 0.01,
      "step": 1134460
    },
    {
      "epoch": 1.8566014021719919,
      "grad_norm": 0.1629878133535385,
      "learning_rate": 6.328222293971522e-06,
      "loss": 0.0127,
      "step": 1134480
    },
    {
      "epoch": 1.8566341326106452,
      "grad_norm": 1.318163275718689,
      "learning_rate": 6.328156401758004e-06,
      "loss": 0.0208,
      "step": 1134500
    },
    {
      "epoch": 1.8566668630492986,
      "grad_norm": 1.2582738399505615,
      "learning_rate": 6.328090509544488e-06,
      "loss": 0.0182,
      "step": 1134520
    },
    {
      "epoch": 1.856699593487952,
      "grad_norm": 3.581127882003784,
      "learning_rate": 6.32802461733097e-06,
      "loss": 0.0189,
      "step": 1134540
    },
    {
      "epoch": 1.8567323239266051,
      "grad_norm": 0.2648564875125885,
      "learning_rate": 6.327958725117453e-06,
      "loss": 0.0168,
      "step": 1134560
    },
    {
      "epoch": 1.8567650543652587,
      "grad_norm": 0.6020193696022034,
      "learning_rate": 6.327892832903936e-06,
      "loss": 0.0186,
      "step": 1134580
    },
    {
      "epoch": 1.8567977848039119,
      "grad_norm": 0.5296335816383362,
      "learning_rate": 6.327826940690419e-06,
      "loss": 0.0183,
      "step": 1134600
    },
    {
      "epoch": 1.8568305152425653,
      "grad_norm": 0.223464697599411,
      "learning_rate": 6.327761048476902e-06,
      "loss": 0.0138,
      "step": 1134620
    },
    {
      "epoch": 1.8568632456812186,
      "grad_norm": 0.6121490597724915,
      "learning_rate": 6.327695156263385e-06,
      "loss": 0.0247,
      "step": 1134640
    },
    {
      "epoch": 1.856895976119872,
      "grad_norm": 0.7433351278305054,
      "learning_rate": 6.327629264049868e-06,
      "loss": 0.0167,
      "step": 1134660
    },
    {
      "epoch": 1.8569287065585254,
      "grad_norm": 0.7119141221046448,
      "learning_rate": 6.327563371836351e-06,
      "loss": 0.0112,
      "step": 1134680
    },
    {
      "epoch": 1.8569614369971785,
      "grad_norm": 0.45614248514175415,
      "learning_rate": 6.327497479622834e-06,
      "loss": 0.0231,
      "step": 1134700
    },
    {
      "epoch": 1.8569941674358321,
      "grad_norm": 0.3902340829372406,
      "learning_rate": 6.327431587409316e-06,
      "loss": 0.0121,
      "step": 1134720
    },
    {
      "epoch": 1.8570268978744853,
      "grad_norm": 0.7426854968070984,
      "learning_rate": 6.3273656951958e-06,
      "loss": 0.0174,
      "step": 1134740
    },
    {
      "epoch": 1.8570596283131386,
      "grad_norm": 0.1492130607366562,
      "learning_rate": 6.3272998029822815e-06,
      "loss": 0.0215,
      "step": 1134760
    },
    {
      "epoch": 1.857092358751792,
      "grad_norm": 0.12238913774490356,
      "learning_rate": 6.327233910768765e-06,
      "loss": 0.0217,
      "step": 1134780
    },
    {
      "epoch": 1.8571250891904452,
      "grad_norm": 1.1149412393569946,
      "learning_rate": 6.327168018555248e-06,
      "loss": 0.019,
      "step": 1134800
    },
    {
      "epoch": 1.8571578196290988,
      "grad_norm": 0.38582730293273926,
      "learning_rate": 6.327102126341731e-06,
      "loss": 0.0144,
      "step": 1134820
    },
    {
      "epoch": 1.857190550067752,
      "grad_norm": 0.7853809595108032,
      "learning_rate": 6.327036234128213e-06,
      "loss": 0.0237,
      "step": 1134840
    },
    {
      "epoch": 1.8572232805064055,
      "grad_norm": 0.6913800239562988,
      "learning_rate": 6.326970341914697e-06,
      "loss": 0.0152,
      "step": 1134860
    },
    {
      "epoch": 1.8572560109450587,
      "grad_norm": 0.11475466191768646,
      "learning_rate": 6.326904449701179e-06,
      "loss": 0.0242,
      "step": 1134880
    },
    {
      "epoch": 1.857288741383712,
      "grad_norm": 0.9058498740196228,
      "learning_rate": 6.326838557487662e-06,
      "loss": 0.0173,
      "step": 1134900
    },
    {
      "epoch": 1.8573214718223654,
      "grad_norm": 0.21315182745456696,
      "learning_rate": 6.326772665274144e-06,
      "loss": 0.0154,
      "step": 1134920
    },
    {
      "epoch": 1.8573542022610185,
      "grad_norm": 0.45579618215560913,
      "learning_rate": 6.326706773060628e-06,
      "loss": 0.0159,
      "step": 1134940
    },
    {
      "epoch": 1.8573869326996721,
      "grad_norm": 0.3502861261367798,
      "learning_rate": 6.3266408808471115e-06,
      "loss": 0.0162,
      "step": 1134960
    },
    {
      "epoch": 1.8574196631383253,
      "grad_norm": 1.3137156963348389,
      "learning_rate": 6.326574988633593e-06,
      "loss": 0.0233,
      "step": 1134980
    },
    {
      "epoch": 1.8574523935769787,
      "grad_norm": 0.7401471138000488,
      "learning_rate": 6.326509096420077e-06,
      "loss": 0.0173,
      "step": 1135000
    },
    {
      "epoch": 1.857485124015632,
      "grad_norm": 0.6676747798919678,
      "learning_rate": 6.32644320420656e-06,
      "loss": 0.018,
      "step": 1135020
    },
    {
      "epoch": 1.8575178544542854,
      "grad_norm": 0.47883033752441406,
      "learning_rate": 6.3263773119930424e-06,
      "loss": 0.0234,
      "step": 1135040
    },
    {
      "epoch": 1.8575505848929388,
      "grad_norm": 0.5957788825035095,
      "learning_rate": 6.326311419779525e-06,
      "loss": 0.0186,
      "step": 1135060
    },
    {
      "epoch": 1.857583315331592,
      "grad_norm": 0.2623966634273529,
      "learning_rate": 6.326245527566009e-06,
      "loss": 0.0137,
      "step": 1135080
    },
    {
      "epoch": 1.8576160457702455,
      "grad_norm": 0.26717060804367065,
      "learning_rate": 6.326179635352491e-06,
      "loss": 0.0177,
      "step": 1135100
    },
    {
      "epoch": 1.8576487762088987,
      "grad_norm": 0.6964609026908875,
      "learning_rate": 6.326113743138974e-06,
      "loss": 0.0166,
      "step": 1135120
    },
    {
      "epoch": 1.857681506647552,
      "grad_norm": 0.6890629529953003,
      "learning_rate": 6.326047850925456e-06,
      "loss": 0.0209,
      "step": 1135140
    },
    {
      "epoch": 1.8577142370862054,
      "grad_norm": 0.24266570806503296,
      "learning_rate": 6.32598195871194e-06,
      "loss": 0.0161,
      "step": 1135160
    },
    {
      "epoch": 1.8577469675248588,
      "grad_norm": 0.2105146199464798,
      "learning_rate": 6.3259160664984225e-06,
      "loss": 0.027,
      "step": 1135180
    },
    {
      "epoch": 1.8577796979635122,
      "grad_norm": 0.27826163172721863,
      "learning_rate": 6.325850174284905e-06,
      "loss": 0.0179,
      "step": 1135200
    },
    {
      "epoch": 1.8578124284021653,
      "grad_norm": 0.24445229768753052,
      "learning_rate": 6.325784282071388e-06,
      "loss": 0.0156,
      "step": 1135220
    },
    {
      "epoch": 1.857845158840819,
      "grad_norm": 0.7096765041351318,
      "learning_rate": 6.3257183898578715e-06,
      "loss": 0.0221,
      "step": 1135240
    },
    {
      "epoch": 1.857877889279472,
      "grad_norm": 1.4419976472854614,
      "learning_rate": 6.3256524976443534e-06,
      "loss": 0.0217,
      "step": 1135260
    },
    {
      "epoch": 1.8579106197181254,
      "grad_norm": 0.3013812005519867,
      "learning_rate": 6.325586605430837e-06,
      "loss": 0.0112,
      "step": 1135280
    },
    {
      "epoch": 1.8579433501567788,
      "grad_norm": 0.22831496596336365,
      "learning_rate": 6.325520713217319e-06,
      "loss": 0.013,
      "step": 1135300
    },
    {
      "epoch": 1.8579760805954322,
      "grad_norm": 0.4000263214111328,
      "learning_rate": 6.3254548210038025e-06,
      "loss": 0.0104,
      "step": 1135320
    },
    {
      "epoch": 1.8580088110340856,
      "grad_norm": 0.19716215133666992,
      "learning_rate": 6.325388928790286e-06,
      "loss": 0.0143,
      "step": 1135340
    },
    {
      "epoch": 1.8580415414727387,
      "grad_norm": 0.4806451201438904,
      "learning_rate": 6.325323036576768e-06,
      "loss": 0.023,
      "step": 1135360
    },
    {
      "epoch": 1.8580742719113923,
      "grad_norm": 0.5500198602676392,
      "learning_rate": 6.3252571443632516e-06,
      "loss": 0.0168,
      "step": 1135380
    },
    {
      "epoch": 1.8581070023500454,
      "grad_norm": 0.8666127920150757,
      "learning_rate": 6.3251912521497335e-06,
      "loss": 0.0242,
      "step": 1135400
    },
    {
      "epoch": 1.8581397327886988,
      "grad_norm": 0.36650508642196655,
      "learning_rate": 6.325125359936217e-06,
      "loss": 0.0192,
      "step": 1135420
    },
    {
      "epoch": 1.8581724632273522,
      "grad_norm": 0.10861703753471375,
      "learning_rate": 6.3250594677227e-06,
      "loss": 0.018,
      "step": 1135440
    },
    {
      "epoch": 1.8582051936660056,
      "grad_norm": 0.3714628219604492,
      "learning_rate": 6.3249935755091825e-06,
      "loss": 0.012,
      "step": 1135460
    },
    {
      "epoch": 1.858237924104659,
      "grad_norm": 1.3634600639343262,
      "learning_rate": 6.324927683295665e-06,
      "loss": 0.0208,
      "step": 1135480
    },
    {
      "epoch": 1.858270654543312,
      "grad_norm": 0.20122148096561432,
      "learning_rate": 6.324861791082149e-06,
      "loss": 0.0177,
      "step": 1135500
    },
    {
      "epoch": 1.8583033849819657,
      "grad_norm": 0.5002301931381226,
      "learning_rate": 6.324795898868631e-06,
      "loss": 0.0156,
      "step": 1135520
    },
    {
      "epoch": 1.8583361154206188,
      "grad_norm": 0.6735402941703796,
      "learning_rate": 6.324730006655114e-06,
      "loss": 0.0233,
      "step": 1135540
    },
    {
      "epoch": 1.8583688458592722,
      "grad_norm": 0.262781023979187,
      "learning_rate": 6.324664114441596e-06,
      "loss": 0.019,
      "step": 1135560
    },
    {
      "epoch": 1.8584015762979256,
      "grad_norm": 0.5003625750541687,
      "learning_rate": 6.32459822222808e-06,
      "loss": 0.0166,
      "step": 1135580
    },
    {
      "epoch": 1.8584343067365787,
      "grad_norm": 0.3080238699913025,
      "learning_rate": 6.3245323300145625e-06,
      "loss": 0.0171,
      "step": 1135600
    },
    {
      "epoch": 1.8584670371752323,
      "grad_norm": 0.3532218337059021,
      "learning_rate": 6.324466437801045e-06,
      "loss": 0.0198,
      "step": 1135620
    },
    {
      "epoch": 1.8584997676138855,
      "grad_norm": 0.9285757541656494,
      "learning_rate": 6.324400545587528e-06,
      "loss": 0.0393,
      "step": 1135640
    },
    {
      "epoch": 1.858532498052539,
      "grad_norm": 0.07582336664199829,
      "learning_rate": 6.324334653374012e-06,
      "loss": 0.0143,
      "step": 1135660
    },
    {
      "epoch": 1.8585652284911922,
      "grad_norm": 0.16476339101791382,
      "learning_rate": 6.324268761160494e-06,
      "loss": 0.0154,
      "step": 1135680
    },
    {
      "epoch": 1.8585979589298456,
      "grad_norm": 0.5919853448867798,
      "learning_rate": 6.324202868946977e-06,
      "loss": 0.0192,
      "step": 1135700
    },
    {
      "epoch": 1.858630689368499,
      "grad_norm": 1.1480683088302612,
      "learning_rate": 6.324136976733461e-06,
      "loss": 0.0239,
      "step": 1135720
    },
    {
      "epoch": 1.858663419807152,
      "grad_norm": 0.6006003022193909,
      "learning_rate": 6.3240710845199426e-06,
      "loss": 0.0151,
      "step": 1135740
    },
    {
      "epoch": 1.8586961502458057,
      "grad_norm": 0.02796434424817562,
      "learning_rate": 6.324005192306426e-06,
      "loss": 0.0177,
      "step": 1135760
    },
    {
      "epoch": 1.8587288806844589,
      "grad_norm": 0.46170756220817566,
      "learning_rate": 6.323939300092908e-06,
      "loss": 0.0235,
      "step": 1135780
    },
    {
      "epoch": 1.8587616111231122,
      "grad_norm": 0.07290145754814148,
      "learning_rate": 6.323873407879392e-06,
      "loss": 0.0171,
      "step": 1135800
    },
    {
      "epoch": 1.8587943415617656,
      "grad_norm": 0.12944750487804413,
      "learning_rate": 6.323807515665874e-06,
      "loss": 0.0177,
      "step": 1135820
    },
    {
      "epoch": 1.858827072000419,
      "grad_norm": 0.6623649001121521,
      "learning_rate": 6.323741623452357e-06,
      "loss": 0.0137,
      "step": 1135840
    },
    {
      "epoch": 1.8588598024390723,
      "grad_norm": 0.9052219390869141,
      "learning_rate": 6.32367573123884e-06,
      "loss": 0.0193,
      "step": 1135860
    },
    {
      "epoch": 1.8588925328777255,
      "grad_norm": 0.626771092414856,
      "learning_rate": 6.3236098390253234e-06,
      "loss": 0.02,
      "step": 1135880
    },
    {
      "epoch": 1.858925263316379,
      "grad_norm": 0.22588388621807098,
      "learning_rate": 6.323543946811805e-06,
      "loss": 0.0156,
      "step": 1135900
    },
    {
      "epoch": 1.8589579937550322,
      "grad_norm": 0.24226275086402893,
      "learning_rate": 6.323478054598289e-06,
      "loss": 0.0219,
      "step": 1135920
    },
    {
      "epoch": 1.8589907241936856,
      "grad_norm": 0.748078465461731,
      "learning_rate": 6.323412162384771e-06,
      "loss": 0.0178,
      "step": 1135940
    },
    {
      "epoch": 1.859023454632339,
      "grad_norm": 0.16057918965816498,
      "learning_rate": 6.323346270171254e-06,
      "loss": 0.024,
      "step": 1135960
    },
    {
      "epoch": 1.8590561850709924,
      "grad_norm": 0.26831021904945374,
      "learning_rate": 6.323280377957737e-06,
      "loss": 0.0211,
      "step": 1135980
    },
    {
      "epoch": 1.8590889155096457,
      "grad_norm": 0.2808127999305725,
      "learning_rate": 6.32321448574422e-06,
      "loss": 0.0149,
      "step": 1136000
    },
    {
      "epoch": 1.8591216459482989,
      "grad_norm": 0.5506753921508789,
      "learning_rate": 6.3231485935307035e-06,
      "loss": 0.019,
      "step": 1136020
    },
    {
      "epoch": 1.8591543763869525,
      "grad_norm": 0.30647045373916626,
      "learning_rate": 6.323082701317186e-06,
      "loss": 0.0262,
      "step": 1136040
    },
    {
      "epoch": 1.8591871068256056,
      "grad_norm": 0.3594066798686981,
      "learning_rate": 6.323016809103669e-06,
      "loss": 0.0172,
      "step": 1136060
    },
    {
      "epoch": 1.859219837264259,
      "grad_norm": 0.3649281859397888,
      "learning_rate": 6.322950916890152e-06,
      "loss": 0.0214,
      "step": 1136080
    },
    {
      "epoch": 1.8592525677029124,
      "grad_norm": 0.35137319564819336,
      "learning_rate": 6.322885024676635e-06,
      "loss": 0.0164,
      "step": 1136100
    },
    {
      "epoch": 1.8592852981415657,
      "grad_norm": 0.8142462372779846,
      "learning_rate": 6.322819132463117e-06,
      "loss": 0.0183,
      "step": 1136120
    },
    {
      "epoch": 1.859318028580219,
      "grad_norm": 0.28809595108032227,
      "learning_rate": 6.322753240249601e-06,
      "loss": 0.0149,
      "step": 1136140
    },
    {
      "epoch": 1.8593507590188723,
      "grad_norm": 0.4512844681739807,
      "learning_rate": 6.322687348036083e-06,
      "loss": 0.0205,
      "step": 1136160
    },
    {
      "epoch": 1.8593834894575259,
      "grad_norm": 0.4487161338329315,
      "learning_rate": 6.322621455822566e-06,
      "loss": 0.0147,
      "step": 1136180
    },
    {
      "epoch": 1.859416219896179,
      "grad_norm": 0.34562206268310547,
      "learning_rate": 6.322555563609048e-06,
      "loss": 0.0176,
      "step": 1136200
    },
    {
      "epoch": 1.8594489503348324,
      "grad_norm": 0.3180064857006073,
      "learning_rate": 6.322489671395532e-06,
      "loss": 0.0221,
      "step": 1136220
    },
    {
      "epoch": 1.8594816807734857,
      "grad_norm": 0.18474635481834412,
      "learning_rate": 6.3224237791820145e-06,
      "loss": 0.0133,
      "step": 1136240
    },
    {
      "epoch": 1.8595144112121391,
      "grad_norm": 0.2506653666496277,
      "learning_rate": 6.322357886968498e-06,
      "loss": 0.0264,
      "step": 1136260
    },
    {
      "epoch": 1.8595471416507925,
      "grad_norm": 1.051835060119629,
      "learning_rate": 6.32229199475498e-06,
      "loss": 0.0143,
      "step": 1136280
    },
    {
      "epoch": 1.8595798720894456,
      "grad_norm": 0.4511646628379822,
      "learning_rate": 6.3222261025414635e-06,
      "loss": 0.0171,
      "step": 1136300
    },
    {
      "epoch": 1.8596126025280992,
      "grad_norm": 0.2850286066532135,
      "learning_rate": 6.322160210327945e-06,
      "loss": 0.0178,
      "step": 1136320
    },
    {
      "epoch": 1.8596453329667524,
      "grad_norm": 0.12637339532375336,
      "learning_rate": 6.322094318114429e-06,
      "loss": 0.0146,
      "step": 1136340
    },
    {
      "epoch": 1.8596780634054058,
      "grad_norm": 0.5705124139785767,
      "learning_rate": 6.322028425900911e-06,
      "loss": 0.0226,
      "step": 1136360
    },
    {
      "epoch": 1.8597107938440591,
      "grad_norm": 0.34175267815589905,
      "learning_rate": 6.3219625336873945e-06,
      "loss": 0.0162,
      "step": 1136380
    },
    {
      "epoch": 1.8597435242827123,
      "grad_norm": 0.689476490020752,
      "learning_rate": 6.321896641473878e-06,
      "loss": 0.0159,
      "step": 1136400
    },
    {
      "epoch": 1.8597762547213659,
      "grad_norm": 0.6936295032501221,
      "learning_rate": 6.32183074926036e-06,
      "loss": 0.0226,
      "step": 1136420
    },
    {
      "epoch": 1.859808985160019,
      "grad_norm": 0.6187572479248047,
      "learning_rate": 6.3217648570468435e-06,
      "loss": 0.0162,
      "step": 1136440
    },
    {
      "epoch": 1.8598417155986726,
      "grad_norm": 0.38238516449928284,
      "learning_rate": 6.321698964833326e-06,
      "loss": 0.0118,
      "step": 1136460
    },
    {
      "epoch": 1.8598744460373258,
      "grad_norm": 0.1839015781879425,
      "learning_rate": 6.321633072619809e-06,
      "loss": 0.014,
      "step": 1136480
    },
    {
      "epoch": 1.8599071764759791,
      "grad_norm": 0.5639309287071228,
      "learning_rate": 6.321567180406292e-06,
      "loss": 0.0153,
      "step": 1136500
    },
    {
      "epoch": 1.8599399069146325,
      "grad_norm": 0.12655651569366455,
      "learning_rate": 6.321501288192775e-06,
      "loss": 0.0191,
      "step": 1136520
    },
    {
      "epoch": 1.8599726373532857,
      "grad_norm": 0.2723272740840912,
      "learning_rate": 6.321435395979257e-06,
      "loss": 0.0233,
      "step": 1136540
    },
    {
      "epoch": 1.8600053677919393,
      "grad_norm": 0.6141886711120605,
      "learning_rate": 6.321369503765741e-06,
      "loss": 0.012,
      "step": 1136560
    },
    {
      "epoch": 1.8600380982305924,
      "grad_norm": 0.41604408621788025,
      "learning_rate": 6.321303611552223e-06,
      "loss": 0.0112,
      "step": 1136580
    },
    {
      "epoch": 1.8600708286692458,
      "grad_norm": 0.35302454233169556,
      "learning_rate": 6.321237719338706e-06,
      "loss": 0.0173,
      "step": 1136600
    },
    {
      "epoch": 1.8601035591078992,
      "grad_norm": 0.6441168785095215,
      "learning_rate": 6.321171827125189e-06,
      "loss": 0.0123,
      "step": 1136620
    },
    {
      "epoch": 1.8601362895465525,
      "grad_norm": 1.5694218873977661,
      "learning_rate": 6.321105934911672e-06,
      "loss": 0.0194,
      "step": 1136640
    },
    {
      "epoch": 1.860169019985206,
      "grad_norm": 0.3017588257789612,
      "learning_rate": 6.3210400426981545e-06,
      "loss": 0.0155,
      "step": 1136660
    },
    {
      "epoch": 1.860201750423859,
      "grad_norm": 0.18308518826961517,
      "learning_rate": 6.320974150484638e-06,
      "loss": 0.0197,
      "step": 1136680
    },
    {
      "epoch": 1.8602344808625126,
      "grad_norm": 0.3104345500469208,
      "learning_rate": 6.32090825827112e-06,
      "loss": 0.0188,
      "step": 1136700
    },
    {
      "epoch": 1.8602672113011658,
      "grad_norm": 0.7690802216529846,
      "learning_rate": 6.320842366057604e-06,
      "loss": 0.0169,
      "step": 1136720
    },
    {
      "epoch": 1.8602999417398192,
      "grad_norm": 0.36779752373695374,
      "learning_rate": 6.320776473844087e-06,
      "loss": 0.02,
      "step": 1136740
    },
    {
      "epoch": 1.8603326721784725,
      "grad_norm": 0.7761862277984619,
      "learning_rate": 6.320710581630569e-06,
      "loss": 0.0222,
      "step": 1136760
    },
    {
      "epoch": 1.860365402617126,
      "grad_norm": 0.6033392548561096,
      "learning_rate": 6.320644689417053e-06,
      "loss": 0.0139,
      "step": 1136780
    },
    {
      "epoch": 1.8603981330557793,
      "grad_norm": 0.16207058727741241,
      "learning_rate": 6.3205787972035346e-06,
      "loss": 0.0169,
      "step": 1136800
    },
    {
      "epoch": 1.8604308634944324,
      "grad_norm": 0.9799814820289612,
      "learning_rate": 6.320512904990018e-06,
      "loss": 0.0171,
      "step": 1136820
    },
    {
      "epoch": 1.860463593933086,
      "grad_norm": 0.4532831907272339,
      "learning_rate": 6.320447012776501e-06,
      "loss": 0.0219,
      "step": 1136840
    },
    {
      "epoch": 1.8604963243717392,
      "grad_norm": 1.3028825521469116,
      "learning_rate": 6.320381120562984e-06,
      "loss": 0.0166,
      "step": 1136860
    },
    {
      "epoch": 1.8605290548103925,
      "grad_norm": 0.5260552167892456,
      "learning_rate": 6.320315228349466e-06,
      "loss": 0.023,
      "step": 1136880
    },
    {
      "epoch": 1.860561785249046,
      "grad_norm": 0.1818661093711853,
      "learning_rate": 6.32024933613595e-06,
      "loss": 0.0173,
      "step": 1136900
    },
    {
      "epoch": 1.8605945156876993,
      "grad_norm": 0.8427072763442993,
      "learning_rate": 6.320183443922432e-06,
      "loss": 0.0229,
      "step": 1136920
    },
    {
      "epoch": 1.8606272461263527,
      "grad_norm": 0.3852250277996063,
      "learning_rate": 6.3201175517089154e-06,
      "loss": 0.0149,
      "step": 1136940
    },
    {
      "epoch": 1.8606599765650058,
      "grad_norm": 0.7688574194908142,
      "learning_rate": 6.320051659495397e-06,
      "loss": 0.0183,
      "step": 1136960
    },
    {
      "epoch": 1.8606927070036594,
      "grad_norm": 0.35210126638412476,
      "learning_rate": 6.319985767281881e-06,
      "loss": 0.0237,
      "step": 1136980
    },
    {
      "epoch": 1.8607254374423126,
      "grad_norm": 0.47362056374549866,
      "learning_rate": 6.319919875068364e-06,
      "loss": 0.0155,
      "step": 1137000
    },
    {
      "epoch": 1.860758167880966,
      "grad_norm": 0.35525551438331604,
      "learning_rate": 6.319853982854846e-06,
      "loss": 0.018,
      "step": 1137020
    },
    {
      "epoch": 1.8607908983196193,
      "grad_norm": 0.9801177978515625,
      "learning_rate": 6.319788090641329e-06,
      "loss": 0.0172,
      "step": 1137040
    },
    {
      "epoch": 1.8608236287582725,
      "grad_norm": 0.2536649703979492,
      "learning_rate": 6.319722198427813e-06,
      "loss": 0.024,
      "step": 1137060
    },
    {
      "epoch": 1.860856359196926,
      "grad_norm": 0.22014065086841583,
      "learning_rate": 6.3196563062142954e-06,
      "loss": 0.015,
      "step": 1137080
    },
    {
      "epoch": 1.8608890896355792,
      "grad_norm": 0.3061465919017792,
      "learning_rate": 6.319590414000778e-06,
      "loss": 0.0155,
      "step": 1137100
    },
    {
      "epoch": 1.8609218200742328,
      "grad_norm": 0.40234798192977905,
      "learning_rate": 6.319524521787262e-06,
      "loss": 0.0121,
      "step": 1137120
    },
    {
      "epoch": 1.860954550512886,
      "grad_norm": 0.20429207384586334,
      "learning_rate": 6.319458629573744e-06,
      "loss": 0.0269,
      "step": 1137140
    },
    {
      "epoch": 1.8609872809515393,
      "grad_norm": 1.2531925439834595,
      "learning_rate": 6.319392737360227e-06,
      "loss": 0.0234,
      "step": 1137160
    },
    {
      "epoch": 1.8610200113901927,
      "grad_norm": 0.8847643733024597,
      "learning_rate": 6.319326845146709e-06,
      "loss": 0.0167,
      "step": 1137180
    },
    {
      "epoch": 1.8610527418288458,
      "grad_norm": 0.5155724883079529,
      "learning_rate": 6.319260952933193e-06,
      "loss": 0.0189,
      "step": 1137200
    },
    {
      "epoch": 1.8610854722674994,
      "grad_norm": 0.4740283489227295,
      "learning_rate": 6.319195060719675e-06,
      "loss": 0.0125,
      "step": 1137220
    },
    {
      "epoch": 1.8611182027061526,
      "grad_norm": 0.6417014002799988,
      "learning_rate": 6.319129168506158e-06,
      "loss": 0.0162,
      "step": 1137240
    },
    {
      "epoch": 1.861150933144806,
      "grad_norm": 0.5491817593574524,
      "learning_rate": 6.319063276292641e-06,
      "loss": 0.0257,
      "step": 1137260
    },
    {
      "epoch": 1.8611836635834593,
      "grad_norm": 0.35996899008750916,
      "learning_rate": 6.318997384079124e-06,
      "loss": 0.0188,
      "step": 1137280
    },
    {
      "epoch": 1.8612163940221127,
      "grad_norm": 0.7536325454711914,
      "learning_rate": 6.3189314918656064e-06,
      "loss": 0.0238,
      "step": 1137300
    },
    {
      "epoch": 1.861249124460766,
      "grad_norm": 0.3831758201122284,
      "learning_rate": 6.31886559965209e-06,
      "loss": 0.0193,
      "step": 1137320
    },
    {
      "epoch": 1.8612818548994192,
      "grad_norm": 0.2050720602273941,
      "learning_rate": 6.318799707438572e-06,
      "loss": 0.0153,
      "step": 1137340
    },
    {
      "epoch": 1.8613145853380728,
      "grad_norm": 0.10093008726835251,
      "learning_rate": 6.3187338152250555e-06,
      "loss": 0.0144,
      "step": 1137360
    },
    {
      "epoch": 1.861347315776726,
      "grad_norm": 0.5273529291152954,
      "learning_rate": 6.318667923011537e-06,
      "loss": 0.019,
      "step": 1137380
    },
    {
      "epoch": 1.8613800462153793,
      "grad_norm": 0.45077112317085266,
      "learning_rate": 6.318602030798021e-06,
      "loss": 0.0153,
      "step": 1137400
    },
    {
      "epoch": 1.8614127766540327,
      "grad_norm": 5.185321807861328,
      "learning_rate": 6.318536138584504e-06,
      "loss": 0.0209,
      "step": 1137420
    },
    {
      "epoch": 1.861445507092686,
      "grad_norm": 0.9939810633659363,
      "learning_rate": 6.3184702463709865e-06,
      "loss": 0.0151,
      "step": 1137440
    },
    {
      "epoch": 1.8614782375313395,
      "grad_norm": 0.5365689992904663,
      "learning_rate": 6.31840435415747e-06,
      "loss": 0.0199,
      "step": 1137460
    },
    {
      "epoch": 1.8615109679699926,
      "grad_norm": 0.72511887550354,
      "learning_rate": 6.318338461943953e-06,
      "loss": 0.0244,
      "step": 1137480
    },
    {
      "epoch": 1.8615436984086462,
      "grad_norm": 0.4802757501602173,
      "learning_rate": 6.3182725697304355e-06,
      "loss": 0.0188,
      "step": 1137500
    },
    {
      "epoch": 1.8615764288472993,
      "grad_norm": 0.12518982589244843,
      "learning_rate": 6.318206677516918e-06,
      "loss": 0.0195,
      "step": 1137520
    },
    {
      "epoch": 1.8616091592859527,
      "grad_norm": 0.6334208846092224,
      "learning_rate": 6.318140785303402e-06,
      "loss": 0.0205,
      "step": 1137540
    },
    {
      "epoch": 1.861641889724606,
      "grad_norm": 0.18651048839092255,
      "learning_rate": 6.318074893089884e-06,
      "loss": 0.017,
      "step": 1137560
    },
    {
      "epoch": 1.8616746201632595,
      "grad_norm": 0.37447866797447205,
      "learning_rate": 6.318009000876367e-06,
      "loss": 0.0222,
      "step": 1137580
    },
    {
      "epoch": 1.8617073506019128,
      "grad_norm": 0.7200194001197815,
      "learning_rate": 6.317943108662849e-06,
      "loss": 0.0261,
      "step": 1137600
    },
    {
      "epoch": 1.861740081040566,
      "grad_norm": 0.15494942665100098,
      "learning_rate": 6.317877216449333e-06,
      "loss": 0.0155,
      "step": 1137620
    },
    {
      "epoch": 1.8617728114792196,
      "grad_norm": 0.10490193963050842,
      "learning_rate": 6.3178113242358156e-06,
      "loss": 0.0228,
      "step": 1137640
    },
    {
      "epoch": 1.8618055419178727,
      "grad_norm": 0.23414789140224457,
      "learning_rate": 6.317745432022298e-06,
      "loss": 0.0213,
      "step": 1137660
    },
    {
      "epoch": 1.861838272356526,
      "grad_norm": 0.36169758439064026,
      "learning_rate": 6.317679539808781e-06,
      "loss": 0.0174,
      "step": 1137680
    },
    {
      "epoch": 1.8618710027951795,
      "grad_norm": 1.0497039556503296,
      "learning_rate": 6.317613647595265e-06,
      "loss": 0.0191,
      "step": 1137700
    },
    {
      "epoch": 1.8619037332338328,
      "grad_norm": 0.5492817163467407,
      "learning_rate": 6.3175477553817465e-06,
      "loss": 0.0155,
      "step": 1137720
    },
    {
      "epoch": 1.8619364636724862,
      "grad_norm": 0.2892853915691376,
      "learning_rate": 6.31748186316823e-06,
      "loss": 0.0125,
      "step": 1137740
    },
    {
      "epoch": 1.8619691941111394,
      "grad_norm": 0.191398024559021,
      "learning_rate": 6.317415970954712e-06,
      "loss": 0.0125,
      "step": 1137760
    },
    {
      "epoch": 1.862001924549793,
      "grad_norm": 0.8040120005607605,
      "learning_rate": 6.3173500787411956e-06,
      "loss": 0.0224,
      "step": 1137780
    },
    {
      "epoch": 1.8620346549884461,
      "grad_norm": 0.28374573588371277,
      "learning_rate": 6.317284186527679e-06,
      "loss": 0.0105,
      "step": 1137800
    },
    {
      "epoch": 1.8620673854270995,
      "grad_norm": 0.47952646017074585,
      "learning_rate": 6.317218294314161e-06,
      "loss": 0.0221,
      "step": 1137820
    },
    {
      "epoch": 1.8621001158657529,
      "grad_norm": 0.9344043731689453,
      "learning_rate": 6.317152402100645e-06,
      "loss": 0.0232,
      "step": 1137840
    },
    {
      "epoch": 1.862132846304406,
      "grad_norm": 0.1930144876241684,
      "learning_rate": 6.317086509887127e-06,
      "loss": 0.0207,
      "step": 1137860
    },
    {
      "epoch": 1.8621655767430596,
      "grad_norm": 0.38778606057167053,
      "learning_rate": 6.31702061767361e-06,
      "loss": 0.0169,
      "step": 1137880
    },
    {
      "epoch": 1.8621983071817128,
      "grad_norm": 0.3075445890426636,
      "learning_rate": 6.316954725460093e-06,
      "loss": 0.0209,
      "step": 1137900
    },
    {
      "epoch": 1.8622310376203663,
      "grad_norm": 1.2179224491119385,
      "learning_rate": 6.3168888332465764e-06,
      "loss": 0.0181,
      "step": 1137920
    },
    {
      "epoch": 1.8622637680590195,
      "grad_norm": 0.6479854583740234,
      "learning_rate": 6.316822941033058e-06,
      "loss": 0.0287,
      "step": 1137940
    },
    {
      "epoch": 1.8622964984976729,
      "grad_norm": 0.26342132687568665,
      "learning_rate": 6.316757048819542e-06,
      "loss": 0.0192,
      "step": 1137960
    },
    {
      "epoch": 1.8623292289363262,
      "grad_norm": 0.45218080282211304,
      "learning_rate": 6.316691156606024e-06,
      "loss": 0.0166,
      "step": 1137980
    },
    {
      "epoch": 1.8623619593749794,
      "grad_norm": 0.3599207401275635,
      "learning_rate": 6.316625264392507e-06,
      "loss": 0.0179,
      "step": 1138000
    },
    {
      "epoch": 1.862394689813633,
      "grad_norm": 0.6624833941459656,
      "learning_rate": 6.31655937217899e-06,
      "loss": 0.0165,
      "step": 1138020
    },
    {
      "epoch": 1.8624274202522861,
      "grad_norm": 0.25009602308273315,
      "learning_rate": 6.316493479965473e-06,
      "loss": 0.0142,
      "step": 1138040
    },
    {
      "epoch": 1.8624601506909395,
      "grad_norm": 0.1491600126028061,
      "learning_rate": 6.316427587751956e-06,
      "loss": 0.0153,
      "step": 1138060
    },
    {
      "epoch": 1.8624928811295929,
      "grad_norm": 0.8297709822654724,
      "learning_rate": 6.316361695538439e-06,
      "loss": 0.0211,
      "step": 1138080
    },
    {
      "epoch": 1.8625256115682463,
      "grad_norm": 0.4599815011024475,
      "learning_rate": 6.316295803324921e-06,
      "loss": 0.0198,
      "step": 1138100
    },
    {
      "epoch": 1.8625583420068996,
      "grad_norm": 0.27150821685791016,
      "learning_rate": 6.316229911111405e-06,
      "loss": 0.0155,
      "step": 1138120
    },
    {
      "epoch": 1.8625910724455528,
      "grad_norm": 0.28137776255607605,
      "learning_rate": 6.316164018897888e-06,
      "loss": 0.017,
      "step": 1138140
    },
    {
      "epoch": 1.8626238028842064,
      "grad_norm": 0.192669078707695,
      "learning_rate": 6.31609812668437e-06,
      "loss": 0.0237,
      "step": 1138160
    },
    {
      "epoch": 1.8626565333228595,
      "grad_norm": 0.3126618564128876,
      "learning_rate": 6.316032234470854e-06,
      "loss": 0.019,
      "step": 1138180
    },
    {
      "epoch": 1.862689263761513,
      "grad_norm": 0.35951921343803406,
      "learning_rate": 6.315966342257336e-06,
      "loss": 0.013,
      "step": 1138200
    },
    {
      "epoch": 1.8627219942001663,
      "grad_norm": 0.5196416974067688,
      "learning_rate": 6.315900450043819e-06,
      "loss": 0.0246,
      "step": 1138220
    },
    {
      "epoch": 1.8627547246388196,
      "grad_norm": 0.36829692125320435,
      "learning_rate": 6.315834557830301e-06,
      "loss": 0.0159,
      "step": 1138240
    },
    {
      "epoch": 1.862787455077473,
      "grad_norm": 0.34965917468070984,
      "learning_rate": 6.315768665616785e-06,
      "loss": 0.0205,
      "step": 1138260
    },
    {
      "epoch": 1.8628201855161262,
      "grad_norm": 0.20062929391860962,
      "learning_rate": 6.3157027734032675e-06,
      "loss": 0.0164,
      "step": 1138280
    },
    {
      "epoch": 1.8628529159547798,
      "grad_norm": 0.5827299952507019,
      "learning_rate": 6.31563688118975e-06,
      "loss": 0.0146,
      "step": 1138300
    },
    {
      "epoch": 1.862885646393433,
      "grad_norm": 0.6578881740570068,
      "learning_rate": 6.315570988976233e-06,
      "loss": 0.0265,
      "step": 1138320
    },
    {
      "epoch": 1.8629183768320863,
      "grad_norm": 0.22405341267585754,
      "learning_rate": 6.3155050967627165e-06,
      "loss": 0.0245,
      "step": 1138340
    },
    {
      "epoch": 1.8629511072707396,
      "grad_norm": 0.7702292203903198,
      "learning_rate": 6.315439204549198e-06,
      "loss": 0.0153,
      "step": 1138360
    },
    {
      "epoch": 1.862983837709393,
      "grad_norm": 0.40047696232795715,
      "learning_rate": 6.315373312335682e-06,
      "loss": 0.0134,
      "step": 1138380
    },
    {
      "epoch": 1.8630165681480464,
      "grad_norm": 0.4316353499889374,
      "learning_rate": 6.315307420122164e-06,
      "loss": 0.018,
      "step": 1138400
    },
    {
      "epoch": 1.8630492985866995,
      "grad_norm": 0.08742248266935349,
      "learning_rate": 6.3152415279086475e-06,
      "loss": 0.018,
      "step": 1138420
    },
    {
      "epoch": 1.8630820290253531,
      "grad_norm": 0.1978759467601776,
      "learning_rate": 6.31517563569513e-06,
      "loss": 0.0165,
      "step": 1138440
    },
    {
      "epoch": 1.8631147594640063,
      "grad_norm": 3.6655707359313965,
      "learning_rate": 6.315109743481613e-06,
      "loss": 0.0154,
      "step": 1138460
    },
    {
      "epoch": 1.8631474899026597,
      "grad_norm": 2.4027278423309326,
      "learning_rate": 6.3150438512680965e-06,
      "loss": 0.0237,
      "step": 1138480
    },
    {
      "epoch": 1.863180220341313,
      "grad_norm": 0.1711883842945099,
      "learning_rate": 6.314977959054579e-06,
      "loss": 0.0155,
      "step": 1138500
    },
    {
      "epoch": 1.8632129507799664,
      "grad_norm": 0.7006945610046387,
      "learning_rate": 6.314912066841062e-06,
      "loss": 0.0199,
      "step": 1138520
    },
    {
      "epoch": 1.8632456812186198,
      "grad_norm": 1.4362667798995972,
      "learning_rate": 6.314846174627545e-06,
      "loss": 0.013,
      "step": 1138540
    },
    {
      "epoch": 1.863278411657273,
      "grad_norm": 0.2072780430316925,
      "learning_rate": 6.314780282414028e-06,
      "loss": 0.0218,
      "step": 1138560
    },
    {
      "epoch": 1.8633111420959265,
      "grad_norm": 0.4409460723400116,
      "learning_rate": 6.31471439020051e-06,
      "loss": 0.0121,
      "step": 1138580
    },
    {
      "epoch": 1.8633438725345797,
      "grad_norm": 0.10515176504850388,
      "learning_rate": 6.314648497986994e-06,
      "loss": 0.0241,
      "step": 1138600
    },
    {
      "epoch": 1.863376602973233,
      "grad_norm": 0.289449006319046,
      "learning_rate": 6.314582605773476e-06,
      "loss": 0.0133,
      "step": 1138620
    },
    {
      "epoch": 1.8634093334118864,
      "grad_norm": 0.09205953776836395,
      "learning_rate": 6.314516713559959e-06,
      "loss": 0.0153,
      "step": 1138640
    },
    {
      "epoch": 1.8634420638505396,
      "grad_norm": 0.41478022933006287,
      "learning_rate": 6.314450821346442e-06,
      "loss": 0.0201,
      "step": 1138660
    },
    {
      "epoch": 1.8634747942891932,
      "grad_norm": 0.4118416905403137,
      "learning_rate": 6.314384929132925e-06,
      "loss": 0.0202,
      "step": 1138680
    },
    {
      "epoch": 1.8635075247278463,
      "grad_norm": 0.4361851215362549,
      "learning_rate": 6.3143190369194075e-06,
      "loss": 0.0198,
      "step": 1138700
    },
    {
      "epoch": 1.8635402551665,
      "grad_norm": 1.0609050989151,
      "learning_rate": 6.314253144705891e-06,
      "loss": 0.0144,
      "step": 1138720
    },
    {
      "epoch": 1.863572985605153,
      "grad_norm": 0.3907408118247986,
      "learning_rate": 6.314187252492373e-06,
      "loss": 0.0193,
      "step": 1138740
    },
    {
      "epoch": 1.8636057160438064,
      "grad_norm": 0.27893003821372986,
      "learning_rate": 6.314121360278857e-06,
      "loss": 0.0175,
      "step": 1138760
    },
    {
      "epoch": 1.8636384464824598,
      "grad_norm": 0.5210493803024292,
      "learning_rate": 6.3140554680653385e-06,
      "loss": 0.018,
      "step": 1138780
    },
    {
      "epoch": 1.863671176921113,
      "grad_norm": 0.5222463011741638,
      "learning_rate": 6.313989575851822e-06,
      "loss": 0.014,
      "step": 1138800
    },
    {
      "epoch": 1.8637039073597665,
      "grad_norm": 0.1706005185842514,
      "learning_rate": 6.313923683638305e-06,
      "loss": 0.0207,
      "step": 1138820
    },
    {
      "epoch": 1.8637366377984197,
      "grad_norm": 0.5713078379631042,
      "learning_rate": 6.3138577914247876e-06,
      "loss": 0.0193,
      "step": 1138840
    },
    {
      "epoch": 1.863769368237073,
      "grad_norm": 0.2876167595386505,
      "learning_rate": 6.313791899211271e-06,
      "loss": 0.0127,
      "step": 1138860
    },
    {
      "epoch": 1.8638020986757264,
      "grad_norm": 0.5817264914512634,
      "learning_rate": 6.313726006997754e-06,
      "loss": 0.02,
      "step": 1138880
    },
    {
      "epoch": 1.8638348291143798,
      "grad_norm": 0.8921112418174744,
      "learning_rate": 6.313660114784237e-06,
      "loss": 0.0196,
      "step": 1138900
    },
    {
      "epoch": 1.8638675595530332,
      "grad_norm": 0.28696170449256897,
      "learning_rate": 6.313594222570719e-06,
      "loss": 0.0125,
      "step": 1138920
    },
    {
      "epoch": 1.8639002899916863,
      "grad_norm": 0.8726131916046143,
      "learning_rate": 6.313528330357203e-06,
      "loss": 0.0254,
      "step": 1138940
    },
    {
      "epoch": 1.86393302043034,
      "grad_norm": 0.31646332144737244,
      "learning_rate": 6.313462438143685e-06,
      "loss": 0.0208,
      "step": 1138960
    },
    {
      "epoch": 1.863965750868993,
      "grad_norm": 0.1961243897676468,
      "learning_rate": 6.3133965459301684e-06,
      "loss": 0.0204,
      "step": 1138980
    },
    {
      "epoch": 1.8639984813076464,
      "grad_norm": 0.7143728137016296,
      "learning_rate": 6.31333065371665e-06,
      "loss": 0.0175,
      "step": 1139000
    },
    {
      "epoch": 1.8640312117462998,
      "grad_norm": 0.8888664245605469,
      "learning_rate": 6.313264761503134e-06,
      "loss": 0.0181,
      "step": 1139020
    },
    {
      "epoch": 1.8640639421849532,
      "grad_norm": 0.16206486523151398,
      "learning_rate": 6.313198869289617e-06,
      "loss": 0.0176,
      "step": 1139040
    },
    {
      "epoch": 1.8640966726236066,
      "grad_norm": 0.587454080581665,
      "learning_rate": 6.313132977076099e-06,
      "loss": 0.0206,
      "step": 1139060
    },
    {
      "epoch": 1.8641294030622597,
      "grad_norm": 0.2041981816291809,
      "learning_rate": 6.313067084862582e-06,
      "loss": 0.0096,
      "step": 1139080
    },
    {
      "epoch": 1.8641621335009133,
      "grad_norm": 0.4344428479671478,
      "learning_rate": 6.313001192649066e-06,
      "loss": 0.0188,
      "step": 1139100
    },
    {
      "epoch": 1.8641948639395665,
      "grad_norm": 0.16634616255760193,
      "learning_rate": 6.312935300435548e-06,
      "loss": 0.0259,
      "step": 1139120
    },
    {
      "epoch": 1.8642275943782198,
      "grad_norm": 0.42924609780311584,
      "learning_rate": 6.312869408222031e-06,
      "loss": 0.019,
      "step": 1139140
    },
    {
      "epoch": 1.8642603248168732,
      "grad_norm": 0.05588391050696373,
      "learning_rate": 6.312803516008513e-06,
      "loss": 0.016,
      "step": 1139160
    },
    {
      "epoch": 1.8642930552555266,
      "grad_norm": 1.621233344078064,
      "learning_rate": 6.312737623794997e-06,
      "loss": 0.0191,
      "step": 1139180
    },
    {
      "epoch": 1.86432578569418,
      "grad_norm": 0.8345561623573303,
      "learning_rate": 6.31267173158148e-06,
      "loss": 0.0222,
      "step": 1139200
    },
    {
      "epoch": 1.864358516132833,
      "grad_norm": 0.5300208330154419,
      "learning_rate": 6.312605839367962e-06,
      "loss": 0.021,
      "step": 1139220
    },
    {
      "epoch": 1.8643912465714867,
      "grad_norm": 0.11252531409263611,
      "learning_rate": 6.312539947154446e-06,
      "loss": 0.0183,
      "step": 1139240
    },
    {
      "epoch": 1.8644239770101398,
      "grad_norm": 0.7216994762420654,
      "learning_rate": 6.312474054940928e-06,
      "loss": 0.0202,
      "step": 1139260
    },
    {
      "epoch": 1.8644567074487932,
      "grad_norm": 1.024848461151123,
      "learning_rate": 6.312408162727411e-06,
      "loss": 0.0214,
      "step": 1139280
    },
    {
      "epoch": 1.8644894378874466,
      "grad_norm": 1.5500996112823486,
      "learning_rate": 6.312342270513894e-06,
      "loss": 0.0105,
      "step": 1139300
    },
    {
      "epoch": 1.8645221683261,
      "grad_norm": 0.7526657581329346,
      "learning_rate": 6.312276378300377e-06,
      "loss": 0.0174,
      "step": 1139320
    },
    {
      "epoch": 1.8645548987647533,
      "grad_norm": 0.1775466799736023,
      "learning_rate": 6.3122104860868594e-06,
      "loss": 0.0135,
      "step": 1139340
    },
    {
      "epoch": 1.8645876292034065,
      "grad_norm": 0.22136516869068146,
      "learning_rate": 6.312144593873343e-06,
      "loss": 0.0173,
      "step": 1139360
    },
    {
      "epoch": 1.86462035964206,
      "grad_norm": 0.6689574718475342,
      "learning_rate": 6.312078701659825e-06,
      "loss": 0.0169,
      "step": 1139380
    },
    {
      "epoch": 1.8646530900807132,
      "grad_norm": 0.37891292572021484,
      "learning_rate": 6.3120128094463085e-06,
      "loss": 0.0141,
      "step": 1139400
    },
    {
      "epoch": 1.8646858205193666,
      "grad_norm": 0.3071783781051636,
      "learning_rate": 6.31194691723279e-06,
      "loss": 0.0159,
      "step": 1139420
    },
    {
      "epoch": 1.86471855095802,
      "grad_norm": 0.19354958832263947,
      "learning_rate": 6.311881025019274e-06,
      "loss": 0.0222,
      "step": 1139440
    },
    {
      "epoch": 1.8647512813966731,
      "grad_norm": 0.5376856327056885,
      "learning_rate": 6.311815132805757e-06,
      "loss": 0.0163,
      "step": 1139460
    },
    {
      "epoch": 1.8647840118353267,
      "grad_norm": 0.7500982284545898,
      "learning_rate": 6.3117492405922395e-06,
      "loss": 0.0187,
      "step": 1139480
    },
    {
      "epoch": 1.8648167422739799,
      "grad_norm": 0.4942333996295929,
      "learning_rate": 6.311683348378722e-06,
      "loss": 0.0227,
      "step": 1139500
    },
    {
      "epoch": 1.8648494727126335,
      "grad_norm": 0.6035692095756531,
      "learning_rate": 6.311617456165206e-06,
      "loss": 0.0113,
      "step": 1139520
    },
    {
      "epoch": 1.8648822031512866,
      "grad_norm": 1.4916634559631348,
      "learning_rate": 6.3115515639516885e-06,
      "loss": 0.0253,
      "step": 1139540
    },
    {
      "epoch": 1.86491493358994,
      "grad_norm": 0.4764200448989868,
      "learning_rate": 6.311485671738171e-06,
      "loss": 0.0179,
      "step": 1139560
    },
    {
      "epoch": 1.8649476640285934,
      "grad_norm": 0.1789758801460266,
      "learning_rate": 6.311419779524655e-06,
      "loss": 0.0164,
      "step": 1139580
    },
    {
      "epoch": 1.8649803944672465,
      "grad_norm": 0.3478621542453766,
      "learning_rate": 6.311353887311137e-06,
      "loss": 0.0216,
      "step": 1139600
    },
    {
      "epoch": 1.8650131249059,
      "grad_norm": 0.6179441809654236,
      "learning_rate": 6.31128799509762e-06,
      "loss": 0.0149,
      "step": 1139620
    },
    {
      "epoch": 1.8650458553445532,
      "grad_norm": 0.37540316581726074,
      "learning_rate": 6.311222102884102e-06,
      "loss": 0.0185,
      "step": 1139640
    },
    {
      "epoch": 1.8650785857832066,
      "grad_norm": 0.33304882049560547,
      "learning_rate": 6.311156210670586e-06,
      "loss": 0.013,
      "step": 1139660
    },
    {
      "epoch": 1.86511131622186,
      "grad_norm": 0.5819520950317383,
      "learning_rate": 6.3110903184570686e-06,
      "loss": 0.021,
      "step": 1139680
    },
    {
      "epoch": 1.8651440466605134,
      "grad_norm": 0.2618837356567383,
      "learning_rate": 6.311024426243551e-06,
      "loss": 0.0157,
      "step": 1139700
    },
    {
      "epoch": 1.8651767770991667,
      "grad_norm": 0.3563506603240967,
      "learning_rate": 6.310958534030034e-06,
      "loss": 0.0147,
      "step": 1139720
    },
    {
      "epoch": 1.8652095075378199,
      "grad_norm": 0.39949285984039307,
      "learning_rate": 6.310892641816518e-06,
      "loss": 0.0206,
      "step": 1139740
    },
    {
      "epoch": 1.8652422379764735,
      "grad_norm": 0.21157823503017426,
      "learning_rate": 6.3108267496029995e-06,
      "loss": 0.0179,
      "step": 1139760
    },
    {
      "epoch": 1.8652749684151266,
      "grad_norm": 0.12083515524864197,
      "learning_rate": 6.310760857389483e-06,
      "loss": 0.0181,
      "step": 1139780
    },
    {
      "epoch": 1.86530769885378,
      "grad_norm": 0.2190946489572525,
      "learning_rate": 6.310694965175965e-06,
      "loss": 0.023,
      "step": 1139800
    },
    {
      "epoch": 1.8653404292924334,
      "grad_norm": 0.16097861528396606,
      "learning_rate": 6.310629072962449e-06,
      "loss": 0.0196,
      "step": 1139820
    },
    {
      "epoch": 1.8653731597310867,
      "grad_norm": 0.15997068583965302,
      "learning_rate": 6.310563180748931e-06,
      "loss": 0.0168,
      "step": 1139840
    },
    {
      "epoch": 1.8654058901697401,
      "grad_norm": 0.20428387820720673,
      "learning_rate": 6.310497288535414e-06,
      "loss": 0.0142,
      "step": 1139860
    },
    {
      "epoch": 1.8654386206083933,
      "grad_norm": 0.5269314646720886,
      "learning_rate": 6.310431396321897e-06,
      "loss": 0.0177,
      "step": 1139880
    },
    {
      "epoch": 1.8654713510470469,
      "grad_norm": 4.499660015106201,
      "learning_rate": 6.31036550410838e-06,
      "loss": 0.0202,
      "step": 1139900
    },
    {
      "epoch": 1.8655040814857,
      "grad_norm": 0.5150066614151001,
      "learning_rate": 6.310299611894863e-06,
      "loss": 0.0173,
      "step": 1139920
    },
    {
      "epoch": 1.8655368119243534,
      "grad_norm": 0.17890110611915588,
      "learning_rate": 6.310233719681346e-06,
      "loss": 0.0148,
      "step": 1139940
    },
    {
      "epoch": 1.8655695423630068,
      "grad_norm": 0.5684406161308289,
      "learning_rate": 6.3101678274678295e-06,
      "loss": 0.0165,
      "step": 1139960
    },
    {
      "epoch": 1.8656022728016601,
      "grad_norm": 0.43033602833747864,
      "learning_rate": 6.310101935254311e-06,
      "loss": 0.0161,
      "step": 1139980
    },
    {
      "epoch": 1.8656350032403135,
      "grad_norm": 0.7777265310287476,
      "learning_rate": 6.310036043040795e-06,
      "loss": 0.0171,
      "step": 1140000
    },
    {
      "epoch": 1.8656677336789667,
      "grad_norm": 0.228446364402771,
      "learning_rate": 6.309970150827277e-06,
      "loss": 0.0166,
      "step": 1140020
    },
    {
      "epoch": 1.8657004641176202,
      "grad_norm": 1.2310740947723389,
      "learning_rate": 6.30990425861376e-06,
      "loss": 0.0264,
      "step": 1140040
    },
    {
      "epoch": 1.8657331945562734,
      "grad_norm": 0.5036394596099854,
      "learning_rate": 6.309838366400242e-06,
      "loss": 0.0166,
      "step": 1140060
    },
    {
      "epoch": 1.8657659249949268,
      "grad_norm": 0.07030466198921204,
      "learning_rate": 6.309772474186726e-06,
      "loss": 0.0211,
      "step": 1140080
    },
    {
      "epoch": 1.8657986554335801,
      "grad_norm": 0.21353940665721893,
      "learning_rate": 6.309706581973209e-06,
      "loss": 0.0175,
      "step": 1140100
    },
    {
      "epoch": 1.8658313858722333,
      "grad_norm": 0.21279411017894745,
      "learning_rate": 6.309640689759691e-06,
      "loss": 0.0185,
      "step": 1140120
    },
    {
      "epoch": 1.8658641163108869,
      "grad_norm": 1.6561018228530884,
      "learning_rate": 6.309574797546174e-06,
      "loss": 0.0212,
      "step": 1140140
    },
    {
      "epoch": 1.86589684674954,
      "grad_norm": 0.07246503233909607,
      "learning_rate": 6.309508905332658e-06,
      "loss": 0.0219,
      "step": 1140160
    },
    {
      "epoch": 1.8659295771881936,
      "grad_norm": 0.16389411687850952,
      "learning_rate": 6.30944301311914e-06,
      "loss": 0.017,
      "step": 1140180
    },
    {
      "epoch": 1.8659623076268468,
      "grad_norm": 0.1360631287097931,
      "learning_rate": 6.309377120905623e-06,
      "loss": 0.0159,
      "step": 1140200
    },
    {
      "epoch": 1.8659950380655002,
      "grad_norm": 0.7624844312667847,
      "learning_rate": 6.309311228692105e-06,
      "loss": 0.0167,
      "step": 1140220
    },
    {
      "epoch": 1.8660277685041535,
      "grad_norm": 0.9553595185279846,
      "learning_rate": 6.309245336478589e-06,
      "loss": 0.0193,
      "step": 1140240
    },
    {
      "epoch": 1.8660604989428067,
      "grad_norm": 0.8728338479995728,
      "learning_rate": 6.309179444265072e-06,
      "loss": 0.015,
      "step": 1140260
    },
    {
      "epoch": 1.8660932293814603,
      "grad_norm": 0.16768673062324524,
      "learning_rate": 6.309113552051554e-06,
      "loss": 0.0106,
      "step": 1140280
    },
    {
      "epoch": 1.8661259598201134,
      "grad_norm": 0.6733577847480774,
      "learning_rate": 6.309047659838038e-06,
      "loss": 0.0204,
      "step": 1140300
    },
    {
      "epoch": 1.8661586902587668,
      "grad_norm": 0.5659214854240417,
      "learning_rate": 6.3089817676245205e-06,
      "loss": 0.0173,
      "step": 1140320
    },
    {
      "epoch": 1.8661914206974202,
      "grad_norm": 0.822138786315918,
      "learning_rate": 6.308915875411003e-06,
      "loss": 0.0169,
      "step": 1140340
    },
    {
      "epoch": 1.8662241511360735,
      "grad_norm": 1.369480013847351,
      "learning_rate": 6.308849983197486e-06,
      "loss": 0.0173,
      "step": 1140360
    },
    {
      "epoch": 1.866256881574727,
      "grad_norm": 0.19721277058124542,
      "learning_rate": 6.3087840909839695e-06,
      "loss": 0.0182,
      "step": 1140380
    },
    {
      "epoch": 1.86628961201338,
      "grad_norm": 0.4605312645435333,
      "learning_rate": 6.308718198770451e-06,
      "loss": 0.0224,
      "step": 1140400
    },
    {
      "epoch": 1.8663223424520337,
      "grad_norm": 0.2702418565750122,
      "learning_rate": 6.308652306556935e-06,
      "loss": 0.0141,
      "step": 1140420
    },
    {
      "epoch": 1.8663550728906868,
      "grad_norm": 0.3436829447746277,
      "learning_rate": 6.308586414343417e-06,
      "loss": 0.0225,
      "step": 1140440
    },
    {
      "epoch": 1.8663878033293402,
      "grad_norm": 0.639838457107544,
      "learning_rate": 6.3085205221299005e-06,
      "loss": 0.0182,
      "step": 1140460
    },
    {
      "epoch": 1.8664205337679936,
      "grad_norm": 0.6534569263458252,
      "learning_rate": 6.308454629916383e-06,
      "loss": 0.0147,
      "step": 1140480
    },
    {
      "epoch": 1.866453264206647,
      "grad_norm": 0.3559359014034271,
      "learning_rate": 6.308388737702866e-06,
      "loss": 0.0181,
      "step": 1140500
    },
    {
      "epoch": 1.8664859946453003,
      "grad_norm": 0.3612261414527893,
      "learning_rate": 6.308322845489349e-06,
      "loss": 0.0212,
      "step": 1140520
    },
    {
      "epoch": 1.8665187250839534,
      "grad_norm": 0.16977450251579285,
      "learning_rate": 6.308256953275832e-06,
      "loss": 0.0121,
      "step": 1140540
    },
    {
      "epoch": 1.866551455522607,
      "grad_norm": 0.5086151957511902,
      "learning_rate": 6.308191061062314e-06,
      "loss": 0.0195,
      "step": 1140560
    },
    {
      "epoch": 1.8665841859612602,
      "grad_norm": 0.4013789892196655,
      "learning_rate": 6.308125168848798e-06,
      "loss": 0.0107,
      "step": 1140580
    },
    {
      "epoch": 1.8666169163999136,
      "grad_norm": 0.43805816769599915,
      "learning_rate": 6.308059276635281e-06,
      "loss": 0.0212,
      "step": 1140600
    },
    {
      "epoch": 1.866649646838567,
      "grad_norm": 0.18087808787822723,
      "learning_rate": 6.307993384421763e-06,
      "loss": 0.0172,
      "step": 1140620
    },
    {
      "epoch": 1.8666823772772203,
      "grad_norm": 0.18058688938617706,
      "learning_rate": 6.307927492208247e-06,
      "loss": 0.0192,
      "step": 1140640
    },
    {
      "epoch": 1.8667151077158737,
      "grad_norm": 0.42857280373573303,
      "learning_rate": 6.307861599994729e-06,
      "loss": 0.0167,
      "step": 1140660
    },
    {
      "epoch": 1.8667478381545268,
      "grad_norm": 1.2550691366195679,
      "learning_rate": 6.307795707781212e-06,
      "loss": 0.0202,
      "step": 1140680
    },
    {
      "epoch": 1.8667805685931804,
      "grad_norm": 0.339786559343338,
      "learning_rate": 6.307729815567695e-06,
      "loss": 0.0228,
      "step": 1140700
    },
    {
      "epoch": 1.8668132990318336,
      "grad_norm": 0.3384561240673065,
      "learning_rate": 6.307663923354178e-06,
      "loss": 0.0254,
      "step": 1140720
    },
    {
      "epoch": 1.866846029470487,
      "grad_norm": 1.322550654411316,
      "learning_rate": 6.3075980311406605e-06,
      "loss": 0.0261,
      "step": 1140740
    },
    {
      "epoch": 1.8668787599091403,
      "grad_norm": 0.11238816380500793,
      "learning_rate": 6.307532138927144e-06,
      "loss": 0.0152,
      "step": 1140760
    },
    {
      "epoch": 1.8669114903477937,
      "grad_norm": 0.5587042570114136,
      "learning_rate": 6.307466246713626e-06,
      "loss": 0.0154,
      "step": 1140780
    },
    {
      "epoch": 1.866944220786447,
      "grad_norm": 0.9442281126976013,
      "learning_rate": 6.30740035450011e-06,
      "loss": 0.0183,
      "step": 1140800
    },
    {
      "epoch": 1.8669769512251002,
      "grad_norm": 2.649578332901001,
      "learning_rate": 6.3073344622865915e-06,
      "loss": 0.0291,
      "step": 1140820
    },
    {
      "epoch": 1.8670096816637538,
      "grad_norm": 0.5738612413406372,
      "learning_rate": 6.307268570073075e-06,
      "loss": 0.014,
      "step": 1140840
    },
    {
      "epoch": 1.867042412102407,
      "grad_norm": 0.33752015233039856,
      "learning_rate": 6.307202677859558e-06,
      "loss": 0.0156,
      "step": 1140860
    },
    {
      "epoch": 1.8670751425410603,
      "grad_norm": 0.19579117000102997,
      "learning_rate": 6.3071367856460406e-06,
      "loss": 0.0185,
      "step": 1140880
    },
    {
      "epoch": 1.8671078729797137,
      "grad_norm": 0.6439324617385864,
      "learning_rate": 6.307070893432523e-06,
      "loss": 0.0195,
      "step": 1140900
    },
    {
      "epoch": 1.8671406034183669,
      "grad_norm": 0.15289436280727386,
      "learning_rate": 6.307005001219007e-06,
      "loss": 0.0206,
      "step": 1140920
    },
    {
      "epoch": 1.8671733338570204,
      "grad_norm": 0.0973779484629631,
      "learning_rate": 6.30693910900549e-06,
      "loss": 0.0212,
      "step": 1140940
    },
    {
      "epoch": 1.8672060642956736,
      "grad_norm": 0.2505043148994446,
      "learning_rate": 6.306873216791972e-06,
      "loss": 0.0222,
      "step": 1140960
    },
    {
      "epoch": 1.8672387947343272,
      "grad_norm": 0.45529019832611084,
      "learning_rate": 6.306807324578456e-06,
      "loss": 0.0183,
      "step": 1140980
    },
    {
      "epoch": 1.8672715251729803,
      "grad_norm": 0.3874034285545349,
      "learning_rate": 6.306741432364938e-06,
      "loss": 0.0236,
      "step": 1141000
    },
    {
      "epoch": 1.8673042556116337,
      "grad_norm": 1.4454971551895142,
      "learning_rate": 6.3066755401514214e-06,
      "loss": 0.0183,
      "step": 1141020
    },
    {
      "epoch": 1.867336986050287,
      "grad_norm": 1.2549107074737549,
      "learning_rate": 6.306609647937903e-06,
      "loss": 0.0143,
      "step": 1141040
    },
    {
      "epoch": 1.8673697164889402,
      "grad_norm": 0.7887166738510132,
      "learning_rate": 6.306543755724387e-06,
      "loss": 0.0176,
      "step": 1141060
    },
    {
      "epoch": 1.8674024469275938,
      "grad_norm": 1.246569037437439,
      "learning_rate": 6.306477863510869e-06,
      "loss": 0.0195,
      "step": 1141080
    },
    {
      "epoch": 1.867435177366247,
      "grad_norm": 0.30064982175827026,
      "learning_rate": 6.306411971297352e-06,
      "loss": 0.0149,
      "step": 1141100
    },
    {
      "epoch": 1.8674679078049004,
      "grad_norm": 0.09765764325857162,
      "learning_rate": 6.306346079083835e-06,
      "loss": 0.0204,
      "step": 1141120
    },
    {
      "epoch": 1.8675006382435537,
      "grad_norm": 0.34598907828330994,
      "learning_rate": 6.306280186870318e-06,
      "loss": 0.0208,
      "step": 1141140
    },
    {
      "epoch": 1.867533368682207,
      "grad_norm": 0.7181887030601501,
      "learning_rate": 6.306214294656801e-06,
      "loss": 0.0252,
      "step": 1141160
    },
    {
      "epoch": 1.8675660991208605,
      "grad_norm": 0.14369578659534454,
      "learning_rate": 6.306148402443284e-06,
      "loss": 0.0167,
      "step": 1141180
    },
    {
      "epoch": 1.8675988295595136,
      "grad_norm": 0.31800636649131775,
      "learning_rate": 6.306082510229766e-06,
      "loss": 0.0183,
      "step": 1141200
    },
    {
      "epoch": 1.8676315599981672,
      "grad_norm": 1.3897724151611328,
      "learning_rate": 6.30601661801625e-06,
      "loss": 0.0199,
      "step": 1141220
    },
    {
      "epoch": 1.8676642904368204,
      "grad_norm": 0.5074985027313232,
      "learning_rate": 6.3059507258027316e-06,
      "loss": 0.0138,
      "step": 1141240
    },
    {
      "epoch": 1.8676970208754737,
      "grad_norm": 0.5667464137077332,
      "learning_rate": 6.305884833589215e-06,
      "loss": 0.0168,
      "step": 1141260
    },
    {
      "epoch": 1.867729751314127,
      "grad_norm": 1.149925708770752,
      "learning_rate": 6.305818941375698e-06,
      "loss": 0.0188,
      "step": 1141280
    },
    {
      "epoch": 1.8677624817527805,
      "grad_norm": 0.1785319596529007,
      "learning_rate": 6.305753049162181e-06,
      "loss": 0.022,
      "step": 1141300
    },
    {
      "epoch": 1.8677952121914339,
      "grad_norm": 0.8217202425003052,
      "learning_rate": 6.305687156948664e-06,
      "loss": 0.0185,
      "step": 1141320
    },
    {
      "epoch": 1.867827942630087,
      "grad_norm": 0.3543740510940552,
      "learning_rate": 6.305621264735147e-06,
      "loss": 0.0187,
      "step": 1141340
    },
    {
      "epoch": 1.8678606730687406,
      "grad_norm": 0.47469601035118103,
      "learning_rate": 6.30555537252163e-06,
      "loss": 0.0268,
      "step": 1141360
    },
    {
      "epoch": 1.8678934035073937,
      "grad_norm": 0.39469006657600403,
      "learning_rate": 6.3054894803081124e-06,
      "loss": 0.0164,
      "step": 1141380
    },
    {
      "epoch": 1.8679261339460471,
      "grad_norm": 0.44776007533073425,
      "learning_rate": 6.305423588094596e-06,
      "loss": 0.0218,
      "step": 1141400
    },
    {
      "epoch": 1.8679588643847005,
      "grad_norm": 1.6202900409698486,
      "learning_rate": 6.305357695881078e-06,
      "loss": 0.0186,
      "step": 1141420
    },
    {
      "epoch": 1.8679915948233539,
      "grad_norm": 1.160841703414917,
      "learning_rate": 6.3052918036675615e-06,
      "loss": 0.0147,
      "step": 1141440
    },
    {
      "epoch": 1.8680243252620072,
      "grad_norm": 0.4208744764328003,
      "learning_rate": 6.305225911454043e-06,
      "loss": 0.0149,
      "step": 1141460
    },
    {
      "epoch": 1.8680570557006604,
      "grad_norm": 1.300370693206787,
      "learning_rate": 6.305160019240527e-06,
      "loss": 0.0212,
      "step": 1141480
    },
    {
      "epoch": 1.868089786139314,
      "grad_norm": 0.07055005431175232,
      "learning_rate": 6.30509412702701e-06,
      "loss": 0.0158,
      "step": 1141500
    },
    {
      "epoch": 1.8681225165779671,
      "grad_norm": 0.4521191418170929,
      "learning_rate": 6.3050282348134925e-06,
      "loss": 0.0156,
      "step": 1141520
    },
    {
      "epoch": 1.8681552470166205,
      "grad_norm": 0.6564018726348877,
      "learning_rate": 6.304962342599975e-06,
      "loss": 0.0162,
      "step": 1141540
    },
    {
      "epoch": 1.8681879774552739,
      "grad_norm": 0.4082643985748291,
      "learning_rate": 6.304896450386459e-06,
      "loss": 0.0182,
      "step": 1141560
    },
    {
      "epoch": 1.8682207078939272,
      "grad_norm": 0.4310528039932251,
      "learning_rate": 6.304830558172941e-06,
      "loss": 0.0183,
      "step": 1141580
    },
    {
      "epoch": 1.8682534383325806,
      "grad_norm": 1.249965786933899,
      "learning_rate": 6.304764665959424e-06,
      "loss": 0.0239,
      "step": 1141600
    },
    {
      "epoch": 1.8682861687712338,
      "grad_norm": 0.1951891928911209,
      "learning_rate": 6.304698773745906e-06,
      "loss": 0.021,
      "step": 1141620
    },
    {
      "epoch": 1.8683188992098874,
      "grad_norm": 0.4657130539417267,
      "learning_rate": 6.30463288153239e-06,
      "loss": 0.0118,
      "step": 1141640
    },
    {
      "epoch": 1.8683516296485405,
      "grad_norm": 0.13445855677127838,
      "learning_rate": 6.304566989318873e-06,
      "loss": 0.0162,
      "step": 1141660
    },
    {
      "epoch": 1.8683843600871939,
      "grad_norm": 0.39919304847717285,
      "learning_rate": 6.304501097105355e-06,
      "loss": 0.0239,
      "step": 1141680
    },
    {
      "epoch": 1.8684170905258473,
      "grad_norm": 0.6328827142715454,
      "learning_rate": 6.304435204891839e-06,
      "loss": 0.022,
      "step": 1141700
    },
    {
      "epoch": 1.8684498209645004,
      "grad_norm": 0.4972802996635437,
      "learning_rate": 6.3043693126783216e-06,
      "loss": 0.019,
      "step": 1141720
    },
    {
      "epoch": 1.868482551403154,
      "grad_norm": 0.2995541989803314,
      "learning_rate": 6.304303420464804e-06,
      "loss": 0.0173,
      "step": 1141740
    },
    {
      "epoch": 1.8685152818418072,
      "grad_norm": 0.22976012527942657,
      "learning_rate": 6.304237528251287e-06,
      "loss": 0.0154,
      "step": 1141760
    },
    {
      "epoch": 1.8685480122804607,
      "grad_norm": 0.5387496948242188,
      "learning_rate": 6.304171636037771e-06,
      "loss": 0.0186,
      "step": 1141780
    },
    {
      "epoch": 1.868580742719114,
      "grad_norm": 0.3497759699821472,
      "learning_rate": 6.3041057438242525e-06,
      "loss": 0.0182,
      "step": 1141800
    },
    {
      "epoch": 1.8686134731577673,
      "grad_norm": 0.586974024772644,
      "learning_rate": 6.304039851610736e-06,
      "loss": 0.016,
      "step": 1141820
    },
    {
      "epoch": 1.8686462035964206,
      "grad_norm": 0.7820054888725281,
      "learning_rate": 6.303973959397218e-06,
      "loss": 0.0199,
      "step": 1141840
    },
    {
      "epoch": 1.8686789340350738,
      "grad_norm": 0.3902328908443451,
      "learning_rate": 6.303908067183702e-06,
      "loss": 0.0221,
      "step": 1141860
    },
    {
      "epoch": 1.8687116644737274,
      "grad_norm": 0.13174377381801605,
      "learning_rate": 6.303842174970184e-06,
      "loss": 0.0179,
      "step": 1141880
    },
    {
      "epoch": 1.8687443949123805,
      "grad_norm": 0.959986686706543,
      "learning_rate": 6.303776282756667e-06,
      "loss": 0.0225,
      "step": 1141900
    },
    {
      "epoch": 1.868777125351034,
      "grad_norm": 0.28717041015625,
      "learning_rate": 6.30371039054315e-06,
      "loss": 0.0142,
      "step": 1141920
    },
    {
      "epoch": 1.8688098557896873,
      "grad_norm": 0.3370295464992523,
      "learning_rate": 6.303644498329633e-06,
      "loss": 0.0211,
      "step": 1141940
    },
    {
      "epoch": 1.8688425862283407,
      "grad_norm": 0.2998299300670624,
      "learning_rate": 6.303578606116115e-06,
      "loss": 0.0116,
      "step": 1141960
    },
    {
      "epoch": 1.868875316666994,
      "grad_norm": 0.5061600208282471,
      "learning_rate": 6.303512713902599e-06,
      "loss": 0.0155,
      "step": 1141980
    },
    {
      "epoch": 1.8689080471056472,
      "grad_norm": 0.2951081395149231,
      "learning_rate": 6.3034468216890825e-06,
      "loss": 0.0163,
      "step": 1142000
    },
    {
      "epoch": 1.8689407775443008,
      "grad_norm": 0.5660786032676697,
      "learning_rate": 6.303380929475564e-06,
      "loss": 0.026,
      "step": 1142020
    },
    {
      "epoch": 1.868973507982954,
      "grad_norm": 0.41773316264152527,
      "learning_rate": 6.303315037262048e-06,
      "loss": 0.0171,
      "step": 1142040
    },
    {
      "epoch": 1.8690062384216073,
      "grad_norm": 0.061658915132284164,
      "learning_rate": 6.30324914504853e-06,
      "loss": 0.0202,
      "step": 1142060
    },
    {
      "epoch": 1.8690389688602607,
      "grad_norm": 0.6806156039237976,
      "learning_rate": 6.303183252835013e-06,
      "loss": 0.0195,
      "step": 1142080
    },
    {
      "epoch": 1.869071699298914,
      "grad_norm": 0.9451518654823303,
      "learning_rate": 6.303117360621495e-06,
      "loss": 0.0153,
      "step": 1142100
    },
    {
      "epoch": 1.8691044297375674,
      "grad_norm": 0.9246883392333984,
      "learning_rate": 6.303051468407979e-06,
      "loss": 0.015,
      "step": 1142120
    },
    {
      "epoch": 1.8691371601762206,
      "grad_norm": 0.4511067867279053,
      "learning_rate": 6.302985576194462e-06,
      "loss": 0.0163,
      "step": 1142140
    },
    {
      "epoch": 1.8691698906148742,
      "grad_norm": 0.5710530281066895,
      "learning_rate": 6.302919683980944e-06,
      "loss": 0.0179,
      "step": 1142160
    },
    {
      "epoch": 1.8692026210535273,
      "grad_norm": 0.36157169938087463,
      "learning_rate": 6.302853791767427e-06,
      "loss": 0.0165,
      "step": 1142180
    },
    {
      "epoch": 1.8692353514921807,
      "grad_norm": 0.22799503803253174,
      "learning_rate": 6.302787899553911e-06,
      "loss": 0.0151,
      "step": 1142200
    },
    {
      "epoch": 1.869268081930834,
      "grad_norm": 2.710750102996826,
      "learning_rate": 6.302722007340393e-06,
      "loss": 0.0177,
      "step": 1142220
    },
    {
      "epoch": 1.8693008123694874,
      "grad_norm": 1.0735440254211426,
      "learning_rate": 6.302656115126876e-06,
      "loss": 0.0197,
      "step": 1142240
    },
    {
      "epoch": 1.8693335428081408,
      "grad_norm": 0.0859566330909729,
      "learning_rate": 6.302590222913358e-06,
      "loss": 0.0183,
      "step": 1142260
    },
    {
      "epoch": 1.869366273246794,
      "grad_norm": 0.49888524413108826,
      "learning_rate": 6.302524330699842e-06,
      "loss": 0.0167,
      "step": 1142280
    },
    {
      "epoch": 1.8693990036854475,
      "grad_norm": 0.5298977494239807,
      "learning_rate": 6.302458438486324e-06,
      "loss": 0.0217,
      "step": 1142300
    },
    {
      "epoch": 1.8694317341241007,
      "grad_norm": 0.7754972577095032,
      "learning_rate": 6.302392546272807e-06,
      "loss": 0.0146,
      "step": 1142320
    },
    {
      "epoch": 1.869464464562754,
      "grad_norm": 1.3136398792266846,
      "learning_rate": 6.30232665405929e-06,
      "loss": 0.0236,
      "step": 1142340
    },
    {
      "epoch": 1.8694971950014074,
      "grad_norm": 0.23033151030540466,
      "learning_rate": 6.3022607618457735e-06,
      "loss": 0.0204,
      "step": 1142360
    },
    {
      "epoch": 1.8695299254400608,
      "grad_norm": 0.4031422734260559,
      "learning_rate": 6.302194869632256e-06,
      "loss": 0.0183,
      "step": 1142380
    },
    {
      "epoch": 1.8695626558787142,
      "grad_norm": 0.2404564619064331,
      "learning_rate": 6.302128977418739e-06,
      "loss": 0.0243,
      "step": 1142400
    },
    {
      "epoch": 1.8695953863173673,
      "grad_norm": 0.11749883741140366,
      "learning_rate": 6.3020630852052225e-06,
      "loss": 0.0123,
      "step": 1142420
    },
    {
      "epoch": 1.869628116756021,
      "grad_norm": 1.0435764789581299,
      "learning_rate": 6.3019971929917044e-06,
      "loss": 0.0126,
      "step": 1142440
    },
    {
      "epoch": 1.869660847194674,
      "grad_norm": 0.2539884150028229,
      "learning_rate": 6.301931300778188e-06,
      "loss": 0.0142,
      "step": 1142460
    },
    {
      "epoch": 1.8696935776333274,
      "grad_norm": 0.4905218183994293,
      "learning_rate": 6.30186540856467e-06,
      "loss": 0.0136,
      "step": 1142480
    },
    {
      "epoch": 1.8697263080719808,
      "grad_norm": 0.1621161699295044,
      "learning_rate": 6.3017995163511535e-06,
      "loss": 0.0178,
      "step": 1142500
    },
    {
      "epoch": 1.869759038510634,
      "grad_norm": 1.2413651943206787,
      "learning_rate": 6.301733624137636e-06,
      "loss": 0.0143,
      "step": 1142520
    },
    {
      "epoch": 1.8697917689492876,
      "grad_norm": 1.0189430713653564,
      "learning_rate": 6.301667731924119e-06,
      "loss": 0.0162,
      "step": 1142540
    },
    {
      "epoch": 1.8698244993879407,
      "grad_norm": 0.8005030155181885,
      "learning_rate": 6.301601839710602e-06,
      "loss": 0.0195,
      "step": 1142560
    },
    {
      "epoch": 1.869857229826594,
      "grad_norm": 0.40425801277160645,
      "learning_rate": 6.301535947497085e-06,
      "loss": 0.0162,
      "step": 1142580
    },
    {
      "epoch": 1.8698899602652475,
      "grad_norm": 0.1600314974784851,
      "learning_rate": 6.301470055283567e-06,
      "loss": 0.0173,
      "step": 1142600
    },
    {
      "epoch": 1.8699226907039008,
      "grad_norm": 0.7718504667282104,
      "learning_rate": 6.301404163070051e-06,
      "loss": 0.0172,
      "step": 1142620
    },
    {
      "epoch": 1.8699554211425542,
      "grad_norm": 0.21475620567798615,
      "learning_rate": 6.301338270856533e-06,
      "loss": 0.0177,
      "step": 1142640
    },
    {
      "epoch": 1.8699881515812073,
      "grad_norm": 0.09576455503702164,
      "learning_rate": 6.301272378643016e-06,
      "loss": 0.0223,
      "step": 1142660
    },
    {
      "epoch": 1.870020882019861,
      "grad_norm": 1.3599371910095215,
      "learning_rate": 6.301206486429499e-06,
      "loss": 0.0188,
      "step": 1142680
    },
    {
      "epoch": 1.870053612458514,
      "grad_norm": 0.35426315665245056,
      "learning_rate": 6.301140594215982e-06,
      "loss": 0.0147,
      "step": 1142700
    },
    {
      "epoch": 1.8700863428971675,
      "grad_norm": 0.7198019623756409,
      "learning_rate": 6.301074702002465e-06,
      "loss": 0.0227,
      "step": 1142720
    },
    {
      "epoch": 1.8701190733358208,
      "grad_norm": 0.3658158779144287,
      "learning_rate": 6.301008809788948e-06,
      "loss": 0.0123,
      "step": 1142740
    },
    {
      "epoch": 1.8701518037744742,
      "grad_norm": 0.2949077785015106,
      "learning_rate": 6.300942917575431e-06,
      "loss": 0.0148,
      "step": 1142760
    },
    {
      "epoch": 1.8701845342131276,
      "grad_norm": 0.5675217509269714,
      "learning_rate": 6.3008770253619135e-06,
      "loss": 0.0174,
      "step": 1142780
    },
    {
      "epoch": 1.8702172646517807,
      "grad_norm": 1.106633186340332,
      "learning_rate": 6.300811133148397e-06,
      "loss": 0.0259,
      "step": 1142800
    },
    {
      "epoch": 1.8702499950904343,
      "grad_norm": 0.1005418449640274,
      "learning_rate": 6.300745240934879e-06,
      "loss": 0.02,
      "step": 1142820
    },
    {
      "epoch": 1.8702827255290875,
      "grad_norm": 0.7477877140045166,
      "learning_rate": 6.300679348721363e-06,
      "loss": 0.0222,
      "step": 1142840
    },
    {
      "epoch": 1.8703154559677408,
      "grad_norm": 0.31523436307907104,
      "learning_rate": 6.3006134565078445e-06,
      "loss": 0.0173,
      "step": 1142860
    },
    {
      "epoch": 1.8703481864063942,
      "grad_norm": 0.67498779296875,
      "learning_rate": 6.300547564294328e-06,
      "loss": 0.0206,
      "step": 1142880
    },
    {
      "epoch": 1.8703809168450476,
      "grad_norm": 1.7297987937927246,
      "learning_rate": 6.30048167208081e-06,
      "loss": 0.018,
      "step": 1142900
    },
    {
      "epoch": 1.870413647283701,
      "grad_norm": 0.1649620085954666,
      "learning_rate": 6.3004157798672936e-06,
      "loss": 0.023,
      "step": 1142920
    },
    {
      "epoch": 1.8704463777223541,
      "grad_norm": 6.154623031616211,
      "learning_rate": 6.300349887653776e-06,
      "loss": 0.0212,
      "step": 1142940
    },
    {
      "epoch": 1.8704791081610077,
      "grad_norm": 0.34554001688957214,
      "learning_rate": 6.30028399544026e-06,
      "loss": 0.0187,
      "step": 1142960
    },
    {
      "epoch": 1.8705118385996609,
      "grad_norm": 0.10769679397344589,
      "learning_rate": 6.300218103226742e-06,
      "loss": 0.0214,
      "step": 1142980
    },
    {
      "epoch": 1.8705445690383142,
      "grad_norm": 0.17894209921360016,
      "learning_rate": 6.300152211013225e-06,
      "loss": 0.0145,
      "step": 1143000
    },
    {
      "epoch": 1.8705772994769676,
      "grad_norm": 0.4038892090320587,
      "learning_rate": 6.300086318799707e-06,
      "loss": 0.0177,
      "step": 1143020
    },
    {
      "epoch": 1.870610029915621,
      "grad_norm": 0.5086791515350342,
      "learning_rate": 6.300020426586191e-06,
      "loss": 0.0225,
      "step": 1143040
    },
    {
      "epoch": 1.8706427603542743,
      "grad_norm": 0.5126818418502808,
      "learning_rate": 6.2999545343726744e-06,
      "loss": 0.0274,
      "step": 1143060
    },
    {
      "epoch": 1.8706754907929275,
      "grad_norm": 3.182387351989746,
      "learning_rate": 6.299888642159156e-06,
      "loss": 0.0237,
      "step": 1143080
    },
    {
      "epoch": 1.870708221231581,
      "grad_norm": 0.2489672601222992,
      "learning_rate": 6.29982274994564e-06,
      "loss": 0.0112,
      "step": 1143100
    },
    {
      "epoch": 1.8707409516702342,
      "grad_norm": 0.5544744729995728,
      "learning_rate": 6.299756857732122e-06,
      "loss": 0.0156,
      "step": 1143120
    },
    {
      "epoch": 1.8707736821088876,
      "grad_norm": 0.46830910444259644,
      "learning_rate": 6.299690965518605e-06,
      "loss": 0.0164,
      "step": 1143140
    },
    {
      "epoch": 1.870806412547541,
      "grad_norm": 0.305947870016098,
      "learning_rate": 6.299625073305088e-06,
      "loss": 0.0202,
      "step": 1143160
    },
    {
      "epoch": 1.8708391429861941,
      "grad_norm": 0.27086496353149414,
      "learning_rate": 6.299559181091571e-06,
      "loss": 0.0107,
      "step": 1143180
    },
    {
      "epoch": 1.8708718734248477,
      "grad_norm": 0.8650925755500793,
      "learning_rate": 6.299493288878054e-06,
      "loss": 0.0188,
      "step": 1143200
    },
    {
      "epoch": 1.8709046038635009,
      "grad_norm": 0.41082602739334106,
      "learning_rate": 6.299427396664537e-06,
      "loss": 0.0155,
      "step": 1143220
    },
    {
      "epoch": 1.8709373343021545,
      "grad_norm": 0.6816286444664001,
      "learning_rate": 6.299361504451019e-06,
      "loss": 0.0181,
      "step": 1143240
    },
    {
      "epoch": 1.8709700647408076,
      "grad_norm": 0.25035738945007324,
      "learning_rate": 6.299295612237503e-06,
      "loss": 0.0179,
      "step": 1143260
    },
    {
      "epoch": 1.871002795179461,
      "grad_norm": 0.3640196919441223,
      "learning_rate": 6.299229720023985e-06,
      "loss": 0.0137,
      "step": 1143280
    },
    {
      "epoch": 1.8710355256181144,
      "grad_norm": 0.6607719659805298,
      "learning_rate": 6.299163827810468e-06,
      "loss": 0.0231,
      "step": 1143300
    },
    {
      "epoch": 1.8710682560567675,
      "grad_norm": 0.6687928438186646,
      "learning_rate": 6.299097935596951e-06,
      "loss": 0.0136,
      "step": 1143320
    },
    {
      "epoch": 1.8711009864954211,
      "grad_norm": 1.5314949750900269,
      "learning_rate": 6.299032043383434e-06,
      "loss": 0.0226,
      "step": 1143340
    },
    {
      "epoch": 1.8711337169340743,
      "grad_norm": 1.605881929397583,
      "learning_rate": 6.298966151169916e-06,
      "loss": 0.0141,
      "step": 1143360
    },
    {
      "epoch": 1.8711664473727276,
      "grad_norm": 0.5972421765327454,
      "learning_rate": 6.2989002589564e-06,
      "loss": 0.0224,
      "step": 1143380
    },
    {
      "epoch": 1.871199177811381,
      "grad_norm": 0.26771777868270874,
      "learning_rate": 6.298834366742882e-06,
      "loss": 0.0128,
      "step": 1143400
    },
    {
      "epoch": 1.8712319082500344,
      "grad_norm": 0.4675431549549103,
      "learning_rate": 6.2987684745293655e-06,
      "loss": 0.0156,
      "step": 1143420
    },
    {
      "epoch": 1.8712646386886878,
      "grad_norm": 0.09460168331861496,
      "learning_rate": 6.298702582315849e-06,
      "loss": 0.015,
      "step": 1143440
    },
    {
      "epoch": 1.871297369127341,
      "grad_norm": 0.502137303352356,
      "learning_rate": 6.298636690102331e-06,
      "loss": 0.0181,
      "step": 1143460
    },
    {
      "epoch": 1.8713300995659945,
      "grad_norm": 0.35273653268814087,
      "learning_rate": 6.2985707978888145e-06,
      "loss": 0.0167,
      "step": 1143480
    },
    {
      "epoch": 1.8713628300046476,
      "grad_norm": 0.7775339484214783,
      "learning_rate": 6.298504905675296e-06,
      "loss": 0.0183,
      "step": 1143500
    },
    {
      "epoch": 1.871395560443301,
      "grad_norm": 0.2238665074110031,
      "learning_rate": 6.29843901346178e-06,
      "loss": 0.0195,
      "step": 1143520
    },
    {
      "epoch": 1.8714282908819544,
      "grad_norm": 0.10167837142944336,
      "learning_rate": 6.298373121248263e-06,
      "loss": 0.0207,
      "step": 1143540
    },
    {
      "epoch": 1.8714610213206078,
      "grad_norm": 0.4960744380950928,
      "learning_rate": 6.2983072290347455e-06,
      "loss": 0.0206,
      "step": 1143560
    },
    {
      "epoch": 1.8714937517592611,
      "grad_norm": 0.5337275862693787,
      "learning_rate": 6.298241336821228e-06,
      "loss": 0.017,
      "step": 1143580
    },
    {
      "epoch": 1.8715264821979143,
      "grad_norm": 0.5909790992736816,
      "learning_rate": 6.298175444607712e-06,
      "loss": 0.0223,
      "step": 1143600
    },
    {
      "epoch": 1.8715592126365679,
      "grad_norm": 0.3820892870426178,
      "learning_rate": 6.298109552394194e-06,
      "loss": 0.0211,
      "step": 1143620
    },
    {
      "epoch": 1.871591943075221,
      "grad_norm": 0.8546322584152222,
      "learning_rate": 6.298043660180677e-06,
      "loss": 0.0217,
      "step": 1143640
    },
    {
      "epoch": 1.8716246735138744,
      "grad_norm": 0.5764012932777405,
      "learning_rate": 6.297977767967159e-06,
      "loss": 0.0191,
      "step": 1143660
    },
    {
      "epoch": 1.8716574039525278,
      "grad_norm": 0.2584337294101715,
      "learning_rate": 6.297911875753643e-06,
      "loss": 0.0199,
      "step": 1143680
    },
    {
      "epoch": 1.8716901343911811,
      "grad_norm": 0.48496657609939575,
      "learning_rate": 6.2978459835401255e-06,
      "loss": 0.0247,
      "step": 1143700
    },
    {
      "epoch": 1.8717228648298345,
      "grad_norm": 0.32061630487442017,
      "learning_rate": 6.297780091326608e-06,
      "loss": 0.0183,
      "step": 1143720
    },
    {
      "epoch": 1.8717555952684877,
      "grad_norm": 0.4749927222728729,
      "learning_rate": 6.297714199113091e-06,
      "loss": 0.0237,
      "step": 1143740
    },
    {
      "epoch": 1.8717883257071413,
      "grad_norm": 1.2192480564117432,
      "learning_rate": 6.2976483068995746e-06,
      "loss": 0.0175,
      "step": 1143760
    },
    {
      "epoch": 1.8718210561457944,
      "grad_norm": 0.4046013057231903,
      "learning_rate": 6.297582414686057e-06,
      "loss": 0.0138,
      "step": 1143780
    },
    {
      "epoch": 1.8718537865844478,
      "grad_norm": 0.44255533814430237,
      "learning_rate": 6.29751652247254e-06,
      "loss": 0.0202,
      "step": 1143800
    },
    {
      "epoch": 1.8718865170231012,
      "grad_norm": 0.6569175720214844,
      "learning_rate": 6.297450630259024e-06,
      "loss": 0.0168,
      "step": 1143820
    },
    {
      "epoch": 1.8719192474617545,
      "grad_norm": 0.20026390254497528,
      "learning_rate": 6.2973847380455055e-06,
      "loss": 0.0116,
      "step": 1143840
    },
    {
      "epoch": 1.871951977900408,
      "grad_norm": 0.4832703471183777,
      "learning_rate": 6.297318845831989e-06,
      "loss": 0.0235,
      "step": 1143860
    },
    {
      "epoch": 1.871984708339061,
      "grad_norm": 1.934012532234192,
      "learning_rate": 6.297252953618471e-06,
      "loss": 0.0238,
      "step": 1143880
    },
    {
      "epoch": 1.8720174387777146,
      "grad_norm": 1.5162701606750488,
      "learning_rate": 6.297187061404955e-06,
      "loss": 0.0265,
      "step": 1143900
    },
    {
      "epoch": 1.8720501692163678,
      "grad_norm": 0.11290588974952698,
      "learning_rate": 6.2971211691914365e-06,
      "loss": 0.014,
      "step": 1143920
    },
    {
      "epoch": 1.8720828996550212,
      "grad_norm": 0.7197077870368958,
      "learning_rate": 6.29705527697792e-06,
      "loss": 0.015,
      "step": 1143940
    },
    {
      "epoch": 1.8721156300936745,
      "grad_norm": 0.8042939901351929,
      "learning_rate": 6.296989384764403e-06,
      "loss": 0.0156,
      "step": 1143960
    },
    {
      "epoch": 1.8721483605323277,
      "grad_norm": 0.28871074318885803,
      "learning_rate": 6.2969234925508856e-06,
      "loss": 0.0139,
      "step": 1143980
    },
    {
      "epoch": 1.8721810909709813,
      "grad_norm": 0.998839259147644,
      "learning_rate": 6.296857600337368e-06,
      "loss": 0.0212,
      "step": 1144000
    },
    {
      "epoch": 1.8722138214096344,
      "grad_norm": 0.32650402188301086,
      "learning_rate": 6.296791708123852e-06,
      "loss": 0.0194,
      "step": 1144020
    },
    {
      "epoch": 1.872246551848288,
      "grad_norm": 0.5354228615760803,
      "learning_rate": 6.296725815910334e-06,
      "loss": 0.0153,
      "step": 1144040
    },
    {
      "epoch": 1.8722792822869412,
      "grad_norm": 0.2971358001232147,
      "learning_rate": 6.296659923696817e-06,
      "loss": 0.0194,
      "step": 1144060
    },
    {
      "epoch": 1.8723120127255946,
      "grad_norm": 0.36154767870903015,
      "learning_rate": 6.296594031483299e-06,
      "loss": 0.0162,
      "step": 1144080
    },
    {
      "epoch": 1.872344743164248,
      "grad_norm": 0.2324003130197525,
      "learning_rate": 6.296528139269783e-06,
      "loss": 0.0207,
      "step": 1144100
    },
    {
      "epoch": 1.872377473602901,
      "grad_norm": 0.3919982612133026,
      "learning_rate": 6.2964622470562664e-06,
      "loss": 0.025,
      "step": 1144120
    },
    {
      "epoch": 1.8724102040415547,
      "grad_norm": 0.45334500074386597,
      "learning_rate": 6.296396354842748e-06,
      "loss": 0.0215,
      "step": 1144140
    },
    {
      "epoch": 1.8724429344802078,
      "grad_norm": 0.5158001184463501,
      "learning_rate": 6.296330462629232e-06,
      "loss": 0.0222,
      "step": 1144160
    },
    {
      "epoch": 1.8724756649188612,
      "grad_norm": 0.16637417674064636,
      "learning_rate": 6.296264570415715e-06,
      "loss": 0.0116,
      "step": 1144180
    },
    {
      "epoch": 1.8725083953575146,
      "grad_norm": 1.272314190864563,
      "learning_rate": 6.296198678202197e-06,
      "loss": 0.0212,
      "step": 1144200
    },
    {
      "epoch": 1.872541125796168,
      "grad_norm": 0.3177635967731476,
      "learning_rate": 6.29613278598868e-06,
      "loss": 0.0202,
      "step": 1144220
    },
    {
      "epoch": 1.8725738562348213,
      "grad_norm": 0.20392821729183197,
      "learning_rate": 6.296066893775164e-06,
      "loss": 0.0148,
      "step": 1144240
    },
    {
      "epoch": 1.8726065866734745,
      "grad_norm": 0.4323917329311371,
      "learning_rate": 6.296001001561646e-06,
      "loss": 0.0177,
      "step": 1144260
    },
    {
      "epoch": 1.872639317112128,
      "grad_norm": 0.43262162804603577,
      "learning_rate": 6.295935109348129e-06,
      "loss": 0.0181,
      "step": 1144280
    },
    {
      "epoch": 1.8726720475507812,
      "grad_norm": 0.4810652434825897,
      "learning_rate": 6.295869217134611e-06,
      "loss": 0.0185,
      "step": 1144300
    },
    {
      "epoch": 1.8727047779894346,
      "grad_norm": 0.11605436354875565,
      "learning_rate": 6.295803324921095e-06,
      "loss": 0.0103,
      "step": 1144320
    },
    {
      "epoch": 1.872737508428088,
      "grad_norm": 1.0023351907730103,
      "learning_rate": 6.295737432707577e-06,
      "loss": 0.0281,
      "step": 1144340
    },
    {
      "epoch": 1.8727702388667413,
      "grad_norm": 0.48751893639564514,
      "learning_rate": 6.29567154049406e-06,
      "loss": 0.0187,
      "step": 1144360
    },
    {
      "epoch": 1.8728029693053947,
      "grad_norm": 0.6404011249542236,
      "learning_rate": 6.295605648280543e-06,
      "loss": 0.0137,
      "step": 1144380
    },
    {
      "epoch": 1.8728356997440478,
      "grad_norm": 0.28639402985572815,
      "learning_rate": 6.2955397560670265e-06,
      "loss": 0.0223,
      "step": 1144400
    },
    {
      "epoch": 1.8728684301827014,
      "grad_norm": 0.7338860630989075,
      "learning_rate": 6.295473863853508e-06,
      "loss": 0.0172,
      "step": 1144420
    },
    {
      "epoch": 1.8729011606213546,
      "grad_norm": 0.35735175013542175,
      "learning_rate": 6.295407971639992e-06,
      "loss": 0.0219,
      "step": 1144440
    },
    {
      "epoch": 1.872933891060008,
      "grad_norm": 0.9787489771842957,
      "learning_rate": 6.2953420794264755e-06,
      "loss": 0.0178,
      "step": 1144460
    },
    {
      "epoch": 1.8729666214986613,
      "grad_norm": 1.0862255096435547,
      "learning_rate": 6.2952761872129574e-06,
      "loss": 0.0168,
      "step": 1144480
    },
    {
      "epoch": 1.8729993519373147,
      "grad_norm": 0.23474937677383423,
      "learning_rate": 6.295210294999441e-06,
      "loss": 0.0262,
      "step": 1144500
    },
    {
      "epoch": 1.873032082375968,
      "grad_norm": 0.29373040795326233,
      "learning_rate": 6.295144402785923e-06,
      "loss": 0.0109,
      "step": 1144520
    },
    {
      "epoch": 1.8730648128146212,
      "grad_norm": 0.5401219725608826,
      "learning_rate": 6.2950785105724065e-06,
      "loss": 0.0182,
      "step": 1144540
    },
    {
      "epoch": 1.8730975432532748,
      "grad_norm": 0.12939108908176422,
      "learning_rate": 6.295012618358889e-06,
      "loss": 0.0162,
      "step": 1144560
    },
    {
      "epoch": 1.873130273691928,
      "grad_norm": 0.5850638151168823,
      "learning_rate": 6.294946726145372e-06,
      "loss": 0.0217,
      "step": 1144580
    },
    {
      "epoch": 1.8731630041305813,
      "grad_norm": 0.4323398470878601,
      "learning_rate": 6.294880833931855e-06,
      "loss": 0.0213,
      "step": 1144600
    },
    {
      "epoch": 1.8731957345692347,
      "grad_norm": 0.20130407810211182,
      "learning_rate": 6.294814941718338e-06,
      "loss": 0.0133,
      "step": 1144620
    },
    {
      "epoch": 1.873228465007888,
      "grad_norm": 0.23187263309955597,
      "learning_rate": 6.29474904950482e-06,
      "loss": 0.023,
      "step": 1144640
    },
    {
      "epoch": 1.8732611954465415,
      "grad_norm": 0.9983370304107666,
      "learning_rate": 6.294683157291304e-06,
      "loss": 0.0223,
      "step": 1144660
    },
    {
      "epoch": 1.8732939258851946,
      "grad_norm": 0.4038349688053131,
      "learning_rate": 6.294617265077786e-06,
      "loss": 0.0209,
      "step": 1144680
    },
    {
      "epoch": 1.8733266563238482,
      "grad_norm": 0.07856721431016922,
      "learning_rate": 6.294551372864269e-06,
      "loss": 0.0175,
      "step": 1144700
    },
    {
      "epoch": 1.8733593867625014,
      "grad_norm": 0.45103171467781067,
      "learning_rate": 6.294485480650752e-06,
      "loss": 0.0181,
      "step": 1144720
    },
    {
      "epoch": 1.8733921172011547,
      "grad_norm": 0.5361839532852173,
      "learning_rate": 6.294419588437235e-06,
      "loss": 0.0129,
      "step": 1144740
    },
    {
      "epoch": 1.873424847639808,
      "grad_norm": 0.3392989933490753,
      "learning_rate": 6.2943536962237175e-06,
      "loss": 0.0145,
      "step": 1144760
    },
    {
      "epoch": 1.8734575780784612,
      "grad_norm": 0.8707777261734009,
      "learning_rate": 6.294287804010201e-06,
      "loss": 0.0271,
      "step": 1144780
    },
    {
      "epoch": 1.8734903085171148,
      "grad_norm": 0.11324287950992584,
      "learning_rate": 6.294221911796683e-06,
      "loss": 0.0229,
      "step": 1144800
    },
    {
      "epoch": 1.873523038955768,
      "grad_norm": 0.10824689269065857,
      "learning_rate": 6.2941560195831666e-06,
      "loss": 0.0128,
      "step": 1144820
    },
    {
      "epoch": 1.8735557693944216,
      "grad_norm": 0.6249271035194397,
      "learning_rate": 6.29409012736965e-06,
      "loss": 0.0175,
      "step": 1144840
    },
    {
      "epoch": 1.8735884998330747,
      "grad_norm": 0.22030065953731537,
      "learning_rate": 6.294024235156132e-06,
      "loss": 0.0126,
      "step": 1144860
    },
    {
      "epoch": 1.873621230271728,
      "grad_norm": 0.5347476601600647,
      "learning_rate": 6.293958342942616e-06,
      "loss": 0.0232,
      "step": 1144880
    },
    {
      "epoch": 1.8736539607103815,
      "grad_norm": 0.605692446231842,
      "learning_rate": 6.2938924507290975e-06,
      "loss": 0.0262,
      "step": 1144900
    },
    {
      "epoch": 1.8736866911490346,
      "grad_norm": 1.4334349632263184,
      "learning_rate": 6.293826558515581e-06,
      "loss": 0.0265,
      "step": 1144920
    },
    {
      "epoch": 1.8737194215876882,
      "grad_norm": 0.7575347423553467,
      "learning_rate": 6.293760666302063e-06,
      "loss": 0.0231,
      "step": 1144940
    },
    {
      "epoch": 1.8737521520263414,
      "grad_norm": 0.425824910402298,
      "learning_rate": 6.293694774088547e-06,
      "loss": 0.011,
      "step": 1144960
    },
    {
      "epoch": 1.8737848824649947,
      "grad_norm": 0.4612722098827362,
      "learning_rate": 6.293628881875029e-06,
      "loss": 0.0167,
      "step": 1144980
    },
    {
      "epoch": 1.8738176129036481,
      "grad_norm": 0.36864057183265686,
      "learning_rate": 6.293562989661512e-06,
      "loss": 0.0158,
      "step": 1145000
    },
    {
      "epoch": 1.8738503433423015,
      "grad_norm": 2.8263134956359863,
      "learning_rate": 6.293497097447995e-06,
      "loss": 0.021,
      "step": 1145020
    },
    {
      "epoch": 1.8738830737809549,
      "grad_norm": 0.3107863962650299,
      "learning_rate": 6.293431205234478e-06,
      "loss": 0.0166,
      "step": 1145040
    },
    {
      "epoch": 1.873915804219608,
      "grad_norm": 0.5214697122573853,
      "learning_rate": 6.29336531302096e-06,
      "loss": 0.0215,
      "step": 1145060
    },
    {
      "epoch": 1.8739485346582616,
      "grad_norm": 0.7960860133171082,
      "learning_rate": 6.293299420807444e-06,
      "loss": 0.0185,
      "step": 1145080
    },
    {
      "epoch": 1.8739812650969148,
      "grad_norm": 0.9539991617202759,
      "learning_rate": 6.293233528593926e-06,
      "loss": 0.0168,
      "step": 1145100
    },
    {
      "epoch": 1.8740139955355681,
      "grad_norm": 0.5380651950836182,
      "learning_rate": 6.293167636380409e-06,
      "loss": 0.0191,
      "step": 1145120
    },
    {
      "epoch": 1.8740467259742215,
      "grad_norm": 0.40737712383270264,
      "learning_rate": 6.293101744166892e-06,
      "loss": 0.0116,
      "step": 1145140
    },
    {
      "epoch": 1.8740794564128749,
      "grad_norm": 0.7955002784729004,
      "learning_rate": 6.293035851953375e-06,
      "loss": 0.023,
      "step": 1145160
    },
    {
      "epoch": 1.8741121868515283,
      "grad_norm": 0.7941142320632935,
      "learning_rate": 6.292969959739858e-06,
      "loss": 0.0189,
      "step": 1145180
    },
    {
      "epoch": 1.8741449172901814,
      "grad_norm": 0.47927388548851013,
      "learning_rate": 6.292904067526341e-06,
      "loss": 0.0201,
      "step": 1145200
    },
    {
      "epoch": 1.874177647728835,
      "grad_norm": 0.9834155440330505,
      "learning_rate": 6.292838175312824e-06,
      "loss": 0.0248,
      "step": 1145220
    },
    {
      "epoch": 1.8742103781674881,
      "grad_norm": 0.16960616409778595,
      "learning_rate": 6.292772283099307e-06,
      "loss": 0.0142,
      "step": 1145240
    },
    {
      "epoch": 1.8742431086061415,
      "grad_norm": 0.21392226219177246,
      "learning_rate": 6.29270639088579e-06,
      "loss": 0.017,
      "step": 1145260
    },
    {
      "epoch": 1.874275839044795,
      "grad_norm": 0.562274158000946,
      "learning_rate": 6.292640498672272e-06,
      "loss": 0.0198,
      "step": 1145280
    },
    {
      "epoch": 1.8743085694834483,
      "grad_norm": 0.4131830036640167,
      "learning_rate": 6.292574606458756e-06,
      "loss": 0.0125,
      "step": 1145300
    },
    {
      "epoch": 1.8743412999221016,
      "grad_norm": 0.2931644022464752,
      "learning_rate": 6.292508714245238e-06,
      "loss": 0.012,
      "step": 1145320
    },
    {
      "epoch": 1.8743740303607548,
      "grad_norm": 0.6420799493789673,
      "learning_rate": 6.292442822031721e-06,
      "loss": 0.0221,
      "step": 1145340
    },
    {
      "epoch": 1.8744067607994084,
      "grad_norm": 0.5047335028648376,
      "learning_rate": 6.292376929818204e-06,
      "loss": 0.0156,
      "step": 1145360
    },
    {
      "epoch": 1.8744394912380615,
      "grad_norm": 0.532338559627533,
      "learning_rate": 6.292311037604687e-06,
      "loss": 0.0165,
      "step": 1145380
    },
    {
      "epoch": 1.874472221676715,
      "grad_norm": 0.2460012584924698,
      "learning_rate": 6.292245145391169e-06,
      "loss": 0.0182,
      "step": 1145400
    },
    {
      "epoch": 1.8745049521153683,
      "grad_norm": 0.07522410154342651,
      "learning_rate": 6.292179253177653e-06,
      "loss": 0.0182,
      "step": 1145420
    },
    {
      "epoch": 1.8745376825540214,
      "grad_norm": 0.39446982741355896,
      "learning_rate": 6.292113360964135e-06,
      "loss": 0.02,
      "step": 1145440
    },
    {
      "epoch": 1.874570412992675,
      "grad_norm": 0.34705856442451477,
      "learning_rate": 6.2920474687506185e-06,
      "loss": 0.0143,
      "step": 1145460
    },
    {
      "epoch": 1.8746031434313282,
      "grad_norm": 0.4736258089542389,
      "learning_rate": 6.2919815765371e-06,
      "loss": 0.019,
      "step": 1145480
    },
    {
      "epoch": 1.8746358738699818,
      "grad_norm": 1.247044324874878,
      "learning_rate": 6.291915684323584e-06,
      "loss": 0.0175,
      "step": 1145500
    },
    {
      "epoch": 1.874668604308635,
      "grad_norm": 1.5600112676620483,
      "learning_rate": 6.2918497921100675e-06,
      "loss": 0.0154,
      "step": 1145520
    },
    {
      "epoch": 1.8747013347472883,
      "grad_norm": 0.1859605461359024,
      "learning_rate": 6.291783899896549e-06,
      "loss": 0.0203,
      "step": 1145540
    },
    {
      "epoch": 1.8747340651859417,
      "grad_norm": 0.7327584624290466,
      "learning_rate": 6.291718007683033e-06,
      "loss": 0.0135,
      "step": 1145560
    },
    {
      "epoch": 1.8747667956245948,
      "grad_norm": 0.843838632106781,
      "learning_rate": 6.291652115469516e-06,
      "loss": 0.019,
      "step": 1145580
    },
    {
      "epoch": 1.8747995260632484,
      "grad_norm": 0.08981788158416748,
      "learning_rate": 6.2915862232559985e-06,
      "loss": 0.0124,
      "step": 1145600
    },
    {
      "epoch": 1.8748322565019016,
      "grad_norm": 0.7745525240898132,
      "learning_rate": 6.291520331042481e-06,
      "loss": 0.0173,
      "step": 1145620
    },
    {
      "epoch": 1.874864986940555,
      "grad_norm": 1.9796112775802612,
      "learning_rate": 6.291454438828965e-06,
      "loss": 0.0149,
      "step": 1145640
    },
    {
      "epoch": 1.8748977173792083,
      "grad_norm": 0.3241024315357208,
      "learning_rate": 6.291388546615447e-06,
      "loss": 0.0167,
      "step": 1145660
    },
    {
      "epoch": 1.8749304478178617,
      "grad_norm": 0.9427024126052856,
      "learning_rate": 6.29132265440193e-06,
      "loss": 0.0108,
      "step": 1145680
    },
    {
      "epoch": 1.874963178256515,
      "grad_norm": 0.8250539302825928,
      "learning_rate": 6.291256762188412e-06,
      "loss": 0.0163,
      "step": 1145700
    },
    {
      "epoch": 1.8749959086951682,
      "grad_norm": 0.529267430305481,
      "learning_rate": 6.291190869974896e-06,
      "loss": 0.0197,
      "step": 1145720
    },
    {
      "epoch": 1.8750286391338218,
      "grad_norm": 0.39361339807510376,
      "learning_rate": 6.2911249777613785e-06,
      "loss": 0.015,
      "step": 1145740
    },
    {
      "epoch": 1.875061369572475,
      "grad_norm": 0.2475953847169876,
      "learning_rate": 6.291059085547861e-06,
      "loss": 0.0113,
      "step": 1145760
    },
    {
      "epoch": 1.8750941000111283,
      "grad_norm": 0.3274156451225281,
      "learning_rate": 6.290993193334344e-06,
      "loss": 0.0166,
      "step": 1145780
    },
    {
      "epoch": 1.8751268304497817,
      "grad_norm": 0.45802342891693115,
      "learning_rate": 6.290927301120828e-06,
      "loss": 0.0239,
      "step": 1145800
    },
    {
      "epoch": 1.875159560888435,
      "grad_norm": 0.14462822675704956,
      "learning_rate": 6.2908614089073095e-06,
      "loss": 0.015,
      "step": 1145820
    },
    {
      "epoch": 1.8751922913270884,
      "grad_norm": 0.24504627287387848,
      "learning_rate": 6.290795516693793e-06,
      "loss": 0.0137,
      "step": 1145840
    },
    {
      "epoch": 1.8752250217657416,
      "grad_norm": 1.7065788507461548,
      "learning_rate": 6.290729624480275e-06,
      "loss": 0.0176,
      "step": 1145860
    },
    {
      "epoch": 1.8752577522043952,
      "grad_norm": 0.6300073266029358,
      "learning_rate": 6.2906637322667585e-06,
      "loss": 0.0155,
      "step": 1145880
    },
    {
      "epoch": 1.8752904826430483,
      "grad_norm": 0.1555902361869812,
      "learning_rate": 6.290597840053242e-06,
      "loss": 0.0204,
      "step": 1145900
    },
    {
      "epoch": 1.8753232130817017,
      "grad_norm": 0.21859505772590637,
      "learning_rate": 6.290531947839724e-06,
      "loss": 0.0156,
      "step": 1145920
    },
    {
      "epoch": 1.875355943520355,
      "grad_norm": 0.9606056809425354,
      "learning_rate": 6.290466055626208e-06,
      "loss": 0.0235,
      "step": 1145940
    },
    {
      "epoch": 1.8753886739590084,
      "grad_norm": 0.16968578100204468,
      "learning_rate": 6.2904001634126895e-06,
      "loss": 0.0213,
      "step": 1145960
    },
    {
      "epoch": 1.8754214043976618,
      "grad_norm": 1.4445104598999023,
      "learning_rate": 6.290334271199173e-06,
      "loss": 0.0161,
      "step": 1145980
    },
    {
      "epoch": 1.875454134836315,
      "grad_norm": 0.2145698517560959,
      "learning_rate": 6.290268378985656e-06,
      "loss": 0.0163,
      "step": 1146000
    },
    {
      "epoch": 1.8754868652749686,
      "grad_norm": 0.37429141998291016,
      "learning_rate": 6.2902024867721386e-06,
      "loss": 0.0127,
      "step": 1146020
    },
    {
      "epoch": 1.8755195957136217,
      "grad_norm": 0.43162763118743896,
      "learning_rate": 6.290136594558621e-06,
      "loss": 0.0151,
      "step": 1146040
    },
    {
      "epoch": 1.875552326152275,
      "grad_norm": 0.45867520570755005,
      "learning_rate": 6.290070702345105e-06,
      "loss": 0.026,
      "step": 1146060
    },
    {
      "epoch": 1.8755850565909284,
      "grad_norm": 0.7479612827301025,
      "learning_rate": 6.290004810131587e-06,
      "loss": 0.0214,
      "step": 1146080
    },
    {
      "epoch": 1.8756177870295818,
      "grad_norm": 0.6709712147712708,
      "learning_rate": 6.28993891791807e-06,
      "loss": 0.0132,
      "step": 1146100
    },
    {
      "epoch": 1.8756505174682352,
      "grad_norm": 0.1819687932729721,
      "learning_rate": 6.289873025704552e-06,
      "loss": 0.0207,
      "step": 1146120
    },
    {
      "epoch": 1.8756832479068883,
      "grad_norm": 0.2978507876396179,
      "learning_rate": 6.289807133491036e-06,
      "loss": 0.0236,
      "step": 1146140
    },
    {
      "epoch": 1.875715978345542,
      "grad_norm": 0.14391054213047028,
      "learning_rate": 6.289741241277519e-06,
      "loss": 0.0175,
      "step": 1146160
    },
    {
      "epoch": 1.875748708784195,
      "grad_norm": 0.1898806393146515,
      "learning_rate": 6.289675349064001e-06,
      "loss": 0.0155,
      "step": 1146180
    },
    {
      "epoch": 1.8757814392228485,
      "grad_norm": 0.42481184005737305,
      "learning_rate": 6.289609456850484e-06,
      "loss": 0.0167,
      "step": 1146200
    },
    {
      "epoch": 1.8758141696615018,
      "grad_norm": 0.1405922919511795,
      "learning_rate": 6.289543564636968e-06,
      "loss": 0.0181,
      "step": 1146220
    },
    {
      "epoch": 1.875846900100155,
      "grad_norm": 1.2587536573410034,
      "learning_rate": 6.28947767242345e-06,
      "loss": 0.0144,
      "step": 1146240
    },
    {
      "epoch": 1.8758796305388086,
      "grad_norm": 0.6398008465766907,
      "learning_rate": 6.289411780209933e-06,
      "loss": 0.0186,
      "step": 1146260
    },
    {
      "epoch": 1.8759123609774617,
      "grad_norm": 1.7415443658828735,
      "learning_rate": 6.289345887996417e-06,
      "loss": 0.0164,
      "step": 1146280
    },
    {
      "epoch": 1.8759450914161153,
      "grad_norm": 0.3151538074016571,
      "learning_rate": 6.289279995782899e-06,
      "loss": 0.016,
      "step": 1146300
    },
    {
      "epoch": 1.8759778218547685,
      "grad_norm": 0.13206028938293457,
      "learning_rate": 6.289214103569382e-06,
      "loss": 0.0233,
      "step": 1146320
    },
    {
      "epoch": 1.8760105522934218,
      "grad_norm": 0.7732785940170288,
      "learning_rate": 6.289148211355864e-06,
      "loss": 0.0194,
      "step": 1146340
    },
    {
      "epoch": 1.8760432827320752,
      "grad_norm": 0.6891800165176392,
      "learning_rate": 6.289082319142348e-06,
      "loss": 0.022,
      "step": 1146360
    },
    {
      "epoch": 1.8760760131707284,
      "grad_norm": 0.15981051325798035,
      "learning_rate": 6.28901642692883e-06,
      "loss": 0.0152,
      "step": 1146380
    },
    {
      "epoch": 1.876108743609382,
      "grad_norm": 0.9392929673194885,
      "learning_rate": 6.288950534715313e-06,
      "loss": 0.0185,
      "step": 1146400
    },
    {
      "epoch": 1.876141474048035,
      "grad_norm": 0.756428599357605,
      "learning_rate": 6.288884642501796e-06,
      "loss": 0.0244,
      "step": 1146420
    },
    {
      "epoch": 1.8761742044866885,
      "grad_norm": 0.676628589630127,
      "learning_rate": 6.2888187502882795e-06,
      "loss": 0.0177,
      "step": 1146440
    },
    {
      "epoch": 1.8762069349253419,
      "grad_norm": 0.31111884117126465,
      "learning_rate": 6.288752858074761e-06,
      "loss": 0.0191,
      "step": 1146460
    },
    {
      "epoch": 1.8762396653639952,
      "grad_norm": 0.3566665053367615,
      "learning_rate": 6.288686965861245e-06,
      "loss": 0.0178,
      "step": 1146480
    },
    {
      "epoch": 1.8762723958026486,
      "grad_norm": 0.24070985615253448,
      "learning_rate": 6.288621073647727e-06,
      "loss": 0.0261,
      "step": 1146500
    },
    {
      "epoch": 1.8763051262413017,
      "grad_norm": 0.09570860117673874,
      "learning_rate": 6.2885551814342104e-06,
      "loss": 0.0159,
      "step": 1146520
    },
    {
      "epoch": 1.8763378566799553,
      "grad_norm": 1.258252739906311,
      "learning_rate": 6.288489289220693e-06,
      "loss": 0.0157,
      "step": 1146540
    },
    {
      "epoch": 1.8763705871186085,
      "grad_norm": 0.4619346261024475,
      "learning_rate": 6.288423397007176e-06,
      "loss": 0.0177,
      "step": 1146560
    },
    {
      "epoch": 1.8764033175572619,
      "grad_norm": 0.5152677893638611,
      "learning_rate": 6.2883575047936595e-06,
      "loss": 0.0166,
      "step": 1146580
    },
    {
      "epoch": 1.8764360479959152,
      "grad_norm": 1.0849415063858032,
      "learning_rate": 6.288291612580142e-06,
      "loss": 0.0198,
      "step": 1146600
    },
    {
      "epoch": 1.8764687784345686,
      "grad_norm": 0.41708341240882874,
      "learning_rate": 6.288225720366625e-06,
      "loss": 0.0155,
      "step": 1146620
    },
    {
      "epoch": 1.876501508873222,
      "grad_norm": 0.1336982697248459,
      "learning_rate": 6.288159828153108e-06,
      "loss": 0.0121,
      "step": 1146640
    },
    {
      "epoch": 1.8765342393118751,
      "grad_norm": 1.0478876829147339,
      "learning_rate": 6.288093935939591e-06,
      "loss": 0.0161,
      "step": 1146660
    },
    {
      "epoch": 1.8765669697505287,
      "grad_norm": 0.5079134106636047,
      "learning_rate": 6.288028043726073e-06,
      "loss": 0.017,
      "step": 1146680
    },
    {
      "epoch": 1.8765997001891819,
      "grad_norm": 0.9538344740867615,
      "learning_rate": 6.287962151512557e-06,
      "loss": 0.0192,
      "step": 1146700
    },
    {
      "epoch": 1.8766324306278352,
      "grad_norm": 0.3853222131729126,
      "learning_rate": 6.287896259299039e-06,
      "loss": 0.0235,
      "step": 1146720
    },
    {
      "epoch": 1.8766651610664886,
      "grad_norm": 0.4574277400970459,
      "learning_rate": 6.287830367085522e-06,
      "loss": 0.0174,
      "step": 1146740
    },
    {
      "epoch": 1.876697891505142,
      "grad_norm": 0.8441387414932251,
      "learning_rate": 6.287764474872004e-06,
      "loss": 0.0251,
      "step": 1146760
    },
    {
      "epoch": 1.8767306219437954,
      "grad_norm": 0.596982479095459,
      "learning_rate": 6.287698582658488e-06,
      "loss": 0.0179,
      "step": 1146780
    },
    {
      "epoch": 1.8767633523824485,
      "grad_norm": 0.5330756902694702,
      "learning_rate": 6.2876326904449705e-06,
      "loss": 0.0193,
      "step": 1146800
    },
    {
      "epoch": 1.876796082821102,
      "grad_norm": 0.6354882717132568,
      "learning_rate": 6.287566798231454e-06,
      "loss": 0.0208,
      "step": 1146820
    },
    {
      "epoch": 1.8768288132597553,
      "grad_norm": 0.33193454146385193,
      "learning_rate": 6.287500906017936e-06,
      "loss": 0.0162,
      "step": 1146840
    },
    {
      "epoch": 1.8768615436984086,
      "grad_norm": 0.38698631525039673,
      "learning_rate": 6.2874350138044196e-06,
      "loss": 0.0184,
      "step": 1146860
    },
    {
      "epoch": 1.876894274137062,
      "grad_norm": 0.8970388770103455,
      "learning_rate": 6.2873691215909015e-06,
      "loss": 0.0133,
      "step": 1146880
    },
    {
      "epoch": 1.8769270045757154,
      "grad_norm": 0.4242820143699646,
      "learning_rate": 6.287303229377385e-06,
      "loss": 0.0204,
      "step": 1146900
    },
    {
      "epoch": 1.8769597350143687,
      "grad_norm": 0.164667546749115,
      "learning_rate": 6.287237337163869e-06,
      "loss": 0.0146,
      "step": 1146920
    },
    {
      "epoch": 1.876992465453022,
      "grad_norm": 1.0484704971313477,
      "learning_rate": 6.2871714449503505e-06,
      "loss": 0.0133,
      "step": 1146940
    },
    {
      "epoch": 1.8770251958916755,
      "grad_norm": 0.40571579337120056,
      "learning_rate": 6.287105552736834e-06,
      "loss": 0.0157,
      "step": 1146960
    },
    {
      "epoch": 1.8770579263303286,
      "grad_norm": 0.4053013324737549,
      "learning_rate": 6.287039660523316e-06,
      "loss": 0.014,
      "step": 1146980
    },
    {
      "epoch": 1.877090656768982,
      "grad_norm": 1.0605812072753906,
      "learning_rate": 6.2869737683098e-06,
      "loss": 0.0235,
      "step": 1147000
    },
    {
      "epoch": 1.8771233872076354,
      "grad_norm": 0.3958902955055237,
      "learning_rate": 6.286907876096282e-06,
      "loss": 0.0108,
      "step": 1147020
    },
    {
      "epoch": 1.8771561176462885,
      "grad_norm": 0.20693697035312653,
      "learning_rate": 6.286841983882765e-06,
      "loss": 0.0117,
      "step": 1147040
    },
    {
      "epoch": 1.8771888480849421,
      "grad_norm": 0.6534655094146729,
      "learning_rate": 6.286776091669248e-06,
      "loss": 0.0161,
      "step": 1147060
    },
    {
      "epoch": 1.8772215785235953,
      "grad_norm": 0.6292014122009277,
      "learning_rate": 6.286710199455731e-06,
      "loss": 0.0242,
      "step": 1147080
    },
    {
      "epoch": 1.8772543089622489,
      "grad_norm": 0.3039934039115906,
      "learning_rate": 6.286644307242213e-06,
      "loss": 0.0196,
      "step": 1147100
    },
    {
      "epoch": 1.877287039400902,
      "grad_norm": 0.5054822564125061,
      "learning_rate": 6.286578415028697e-06,
      "loss": 0.0186,
      "step": 1147120
    },
    {
      "epoch": 1.8773197698395554,
      "grad_norm": 0.6829516887664795,
      "learning_rate": 6.286512522815179e-06,
      "loss": 0.0153,
      "step": 1147140
    },
    {
      "epoch": 1.8773525002782088,
      "grad_norm": 1.1009215116500854,
      "learning_rate": 6.286446630601662e-06,
      "loss": 0.0145,
      "step": 1147160
    },
    {
      "epoch": 1.877385230716862,
      "grad_norm": 0.07190585136413574,
      "learning_rate": 6.286380738388145e-06,
      "loss": 0.016,
      "step": 1147180
    },
    {
      "epoch": 1.8774179611555155,
      "grad_norm": 0.8437473177909851,
      "learning_rate": 6.286314846174628e-06,
      "loss": 0.0229,
      "step": 1147200
    },
    {
      "epoch": 1.8774506915941687,
      "grad_norm": 0.6343148350715637,
      "learning_rate": 6.2862489539611106e-06,
      "loss": 0.0201,
      "step": 1147220
    },
    {
      "epoch": 1.877483422032822,
      "grad_norm": 0.29286620020866394,
      "learning_rate": 6.286183061747594e-06,
      "loss": 0.0165,
      "step": 1147240
    },
    {
      "epoch": 1.8775161524714754,
      "grad_norm": 0.5280492901802063,
      "learning_rate": 6.286117169534076e-06,
      "loss": 0.0211,
      "step": 1147260
    },
    {
      "epoch": 1.8775488829101288,
      "grad_norm": 0.40292152762413025,
      "learning_rate": 6.28605127732056e-06,
      "loss": 0.032,
      "step": 1147280
    },
    {
      "epoch": 1.8775816133487822,
      "grad_norm": 0.5260620713233948,
      "learning_rate": 6.285985385107043e-06,
      "loss": 0.0172,
      "step": 1147300
    },
    {
      "epoch": 1.8776143437874353,
      "grad_norm": 0.9525927901268005,
      "learning_rate": 6.285919492893525e-06,
      "loss": 0.0222,
      "step": 1147320
    },
    {
      "epoch": 1.877647074226089,
      "grad_norm": 0.23928561806678772,
      "learning_rate": 6.285853600680009e-06,
      "loss": 0.0119,
      "step": 1147340
    },
    {
      "epoch": 1.877679804664742,
      "grad_norm": 0.3440377414226532,
      "learning_rate": 6.285787708466491e-06,
      "loss": 0.0141,
      "step": 1147360
    },
    {
      "epoch": 1.8777125351033954,
      "grad_norm": 0.5574147701263428,
      "learning_rate": 6.285721816252974e-06,
      "loss": 0.0179,
      "step": 1147380
    },
    {
      "epoch": 1.8777452655420488,
      "grad_norm": 0.2601659595966339,
      "learning_rate": 6.285655924039457e-06,
      "loss": 0.013,
      "step": 1147400
    },
    {
      "epoch": 1.8777779959807022,
      "grad_norm": 0.097145676612854,
      "learning_rate": 6.28559003182594e-06,
      "loss": 0.0134,
      "step": 1147420
    },
    {
      "epoch": 1.8778107264193555,
      "grad_norm": 0.5130653977394104,
      "learning_rate": 6.285524139612422e-06,
      "loss": 0.0159,
      "step": 1147440
    },
    {
      "epoch": 1.8778434568580087,
      "grad_norm": 0.0756397396326065,
      "learning_rate": 6.285458247398906e-06,
      "loss": 0.0143,
      "step": 1147460
    },
    {
      "epoch": 1.8778761872966623,
      "grad_norm": 1.523710012435913,
      "learning_rate": 6.285392355185388e-06,
      "loss": 0.0172,
      "step": 1147480
    },
    {
      "epoch": 1.8779089177353154,
      "grad_norm": 0.32193538546562195,
      "learning_rate": 6.2853264629718715e-06,
      "loss": 0.0196,
      "step": 1147500
    },
    {
      "epoch": 1.8779416481739688,
      "grad_norm": 0.24900366365909576,
      "learning_rate": 6.285260570758353e-06,
      "loss": 0.0186,
      "step": 1147520
    },
    {
      "epoch": 1.8779743786126222,
      "grad_norm": 0.16962699592113495,
      "learning_rate": 6.285194678544837e-06,
      "loss": 0.0193,
      "step": 1147540
    },
    {
      "epoch": 1.8780071090512755,
      "grad_norm": 0.21070483326911926,
      "learning_rate": 6.28512878633132e-06,
      "loss": 0.0184,
      "step": 1147560
    },
    {
      "epoch": 1.878039839489929,
      "grad_norm": 0.9521340727806091,
      "learning_rate": 6.2850628941178024e-06,
      "loss": 0.0154,
      "step": 1147580
    },
    {
      "epoch": 1.878072569928582,
      "grad_norm": 0.3504694998264313,
      "learning_rate": 6.284997001904285e-06,
      "loss": 0.023,
      "step": 1147600
    },
    {
      "epoch": 1.8781053003672357,
      "grad_norm": 0.3652156591415405,
      "learning_rate": 6.284931109690769e-06,
      "loss": 0.0108,
      "step": 1147620
    },
    {
      "epoch": 1.8781380308058888,
      "grad_norm": 1.4614489078521729,
      "learning_rate": 6.2848652174772515e-06,
      "loss": 0.0138,
      "step": 1147640
    },
    {
      "epoch": 1.8781707612445422,
      "grad_norm": 0.3470252752304077,
      "learning_rate": 6.284799325263734e-06,
      "loss": 0.0199,
      "step": 1147660
    },
    {
      "epoch": 1.8782034916831956,
      "grad_norm": 0.6591466665267944,
      "learning_rate": 6.284733433050218e-06,
      "loss": 0.0244,
      "step": 1147680
    },
    {
      "epoch": 1.878236222121849,
      "grad_norm": 0.7178213000297546,
      "learning_rate": 6.2846675408367e-06,
      "loss": 0.0249,
      "step": 1147700
    },
    {
      "epoch": 1.8782689525605023,
      "grad_norm": 0.6785619258880615,
      "learning_rate": 6.284601648623183e-06,
      "loss": 0.0216,
      "step": 1147720
    },
    {
      "epoch": 1.8783016829991555,
      "grad_norm": 0.40371257066726685,
      "learning_rate": 6.284535756409665e-06,
      "loss": 0.0246,
      "step": 1147740
    },
    {
      "epoch": 1.878334413437809,
      "grad_norm": 0.7217395901679993,
      "learning_rate": 6.284469864196149e-06,
      "loss": 0.0174,
      "step": 1147760
    },
    {
      "epoch": 1.8783671438764622,
      "grad_norm": 0.4938059151172638,
      "learning_rate": 6.284403971982631e-06,
      "loss": 0.0271,
      "step": 1147780
    },
    {
      "epoch": 1.8783998743151156,
      "grad_norm": 0.19362780451774597,
      "learning_rate": 6.284338079769114e-06,
      "loss": 0.0154,
      "step": 1147800
    },
    {
      "epoch": 1.878432604753769,
      "grad_norm": 1.4160548448562622,
      "learning_rate": 6.284272187555597e-06,
      "loss": 0.0175,
      "step": 1147820
    },
    {
      "epoch": 1.878465335192422,
      "grad_norm": 0.44900327920913696,
      "learning_rate": 6.28420629534208e-06,
      "loss": 0.0161,
      "step": 1147840
    },
    {
      "epoch": 1.8784980656310757,
      "grad_norm": 0.4523589015007019,
      "learning_rate": 6.2841404031285625e-06,
      "loss": 0.0168,
      "step": 1147860
    },
    {
      "epoch": 1.8785307960697288,
      "grad_norm": 0.07374873012304306,
      "learning_rate": 6.284074510915046e-06,
      "loss": 0.0168,
      "step": 1147880
    },
    {
      "epoch": 1.8785635265083824,
      "grad_norm": 0.7860811948776245,
      "learning_rate": 6.284008618701528e-06,
      "loss": 0.0245,
      "step": 1147900
    },
    {
      "epoch": 1.8785962569470356,
      "grad_norm": 0.18880490958690643,
      "learning_rate": 6.2839427264880115e-06,
      "loss": 0.0186,
      "step": 1147920
    },
    {
      "epoch": 1.878628987385689,
      "grad_norm": 0.2354419082403183,
      "learning_rate": 6.2838768342744934e-06,
      "loss": 0.0107,
      "step": 1147940
    },
    {
      "epoch": 1.8786617178243423,
      "grad_norm": 0.4180662930011749,
      "learning_rate": 6.283810942060977e-06,
      "loss": 0.0187,
      "step": 1147960
    },
    {
      "epoch": 1.8786944482629955,
      "grad_norm": 0.47433316707611084,
      "learning_rate": 6.283745049847461e-06,
      "loss": 0.0156,
      "step": 1147980
    },
    {
      "epoch": 1.878727178701649,
      "grad_norm": 0.09957718104124069,
      "learning_rate": 6.2836791576339425e-06,
      "loss": 0.0196,
      "step": 1148000
    },
    {
      "epoch": 1.8787599091403022,
      "grad_norm": 0.6954643130302429,
      "learning_rate": 6.283613265420426e-06,
      "loss": 0.0191,
      "step": 1148020
    },
    {
      "epoch": 1.8787926395789556,
      "grad_norm": 0.8570370674133301,
      "learning_rate": 6.283547373206909e-06,
      "loss": 0.0209,
      "step": 1148040
    },
    {
      "epoch": 1.878825370017609,
      "grad_norm": 0.693276047706604,
      "learning_rate": 6.2834814809933916e-06,
      "loss": 0.0133,
      "step": 1148060
    },
    {
      "epoch": 1.8788581004562623,
      "grad_norm": 0.34281423687934875,
      "learning_rate": 6.283415588779874e-06,
      "loss": 0.0184,
      "step": 1148080
    },
    {
      "epoch": 1.8788908308949157,
      "grad_norm": 0.1479414701461792,
      "learning_rate": 6.283349696566358e-06,
      "loss": 0.0132,
      "step": 1148100
    },
    {
      "epoch": 1.8789235613335689,
      "grad_norm": 0.27791067957878113,
      "learning_rate": 6.28328380435284e-06,
      "loss": 0.0174,
      "step": 1148120
    },
    {
      "epoch": 1.8789562917722225,
      "grad_norm": 0.6157552003860474,
      "learning_rate": 6.283217912139323e-06,
      "loss": 0.0141,
      "step": 1148140
    },
    {
      "epoch": 1.8789890222108756,
      "grad_norm": 0.480898916721344,
      "learning_rate": 6.283152019925805e-06,
      "loss": 0.0151,
      "step": 1148160
    },
    {
      "epoch": 1.879021752649529,
      "grad_norm": 0.5539448261260986,
      "learning_rate": 6.283086127712289e-06,
      "loss": 0.0166,
      "step": 1148180
    },
    {
      "epoch": 1.8790544830881823,
      "grad_norm": 0.2196241170167923,
      "learning_rate": 6.283020235498772e-06,
      "loss": 0.0178,
      "step": 1148200
    },
    {
      "epoch": 1.8790872135268357,
      "grad_norm": 1.0042885541915894,
      "learning_rate": 6.282954343285254e-06,
      "loss": 0.016,
      "step": 1148220
    },
    {
      "epoch": 1.879119943965489,
      "grad_norm": 0.4532898962497711,
      "learning_rate": 6.282888451071737e-06,
      "loss": 0.0169,
      "step": 1148240
    },
    {
      "epoch": 1.8791526744041422,
      "grad_norm": 3.2678656578063965,
      "learning_rate": 6.282822558858221e-06,
      "loss": 0.019,
      "step": 1148260
    },
    {
      "epoch": 1.8791854048427958,
      "grad_norm": 0.5174042582511902,
      "learning_rate": 6.2827566666447026e-06,
      "loss": 0.0167,
      "step": 1148280
    },
    {
      "epoch": 1.879218135281449,
      "grad_norm": 0.398104190826416,
      "learning_rate": 6.282690774431186e-06,
      "loss": 0.0183,
      "step": 1148300
    },
    {
      "epoch": 1.8792508657201024,
      "grad_norm": 0.26300352811813354,
      "learning_rate": 6.282624882217668e-06,
      "loss": 0.0125,
      "step": 1148320
    },
    {
      "epoch": 1.8792835961587557,
      "grad_norm": 0.3662451505661011,
      "learning_rate": 6.282558990004152e-06,
      "loss": 0.0218,
      "step": 1148340
    },
    {
      "epoch": 1.879316326597409,
      "grad_norm": 0.690717875957489,
      "learning_rate": 6.282493097790635e-06,
      "loss": 0.018,
      "step": 1148360
    },
    {
      "epoch": 1.8793490570360625,
      "grad_norm": 0.5704402327537537,
      "learning_rate": 6.282427205577117e-06,
      "loss": 0.0147,
      "step": 1148380
    },
    {
      "epoch": 1.8793817874747156,
      "grad_norm": 0.3109276592731476,
      "learning_rate": 6.282361313363601e-06,
      "loss": 0.0211,
      "step": 1148400
    },
    {
      "epoch": 1.8794145179133692,
      "grad_norm": 0.28661713004112244,
      "learning_rate": 6.2822954211500834e-06,
      "loss": 0.017,
      "step": 1148420
    },
    {
      "epoch": 1.8794472483520224,
      "grad_norm": 0.16749395430088043,
      "learning_rate": 6.282229528936566e-06,
      "loss": 0.0205,
      "step": 1148440
    },
    {
      "epoch": 1.8794799787906757,
      "grad_norm": 0.2972489595413208,
      "learning_rate": 6.282163636723049e-06,
      "loss": 0.0171,
      "step": 1148460
    },
    {
      "epoch": 1.8795127092293291,
      "grad_norm": 0.9943625926971436,
      "learning_rate": 6.2820977445095325e-06,
      "loss": 0.0187,
      "step": 1148480
    },
    {
      "epoch": 1.8795454396679823,
      "grad_norm": 0.6278508901596069,
      "learning_rate": 6.282031852296014e-06,
      "loss": 0.0176,
      "step": 1148500
    },
    {
      "epoch": 1.8795781701066359,
      "grad_norm": 0.46498700976371765,
      "learning_rate": 6.281965960082498e-06,
      "loss": 0.0233,
      "step": 1148520
    },
    {
      "epoch": 1.879610900545289,
      "grad_norm": 0.8045922517776489,
      "learning_rate": 6.28190006786898e-06,
      "loss": 0.0163,
      "step": 1148540
    },
    {
      "epoch": 1.8796436309839426,
      "grad_norm": 0.5316991806030273,
      "learning_rate": 6.2818341756554634e-06,
      "loss": 0.0204,
      "step": 1148560
    },
    {
      "epoch": 1.8796763614225958,
      "grad_norm": 1.2390222549438477,
      "learning_rate": 6.281768283441946e-06,
      "loss": 0.024,
      "step": 1148580
    },
    {
      "epoch": 1.8797090918612491,
      "grad_norm": 0.5227403044700623,
      "learning_rate": 6.281702391228429e-06,
      "loss": 0.0116,
      "step": 1148600
    },
    {
      "epoch": 1.8797418222999025,
      "grad_norm": 0.19984203577041626,
      "learning_rate": 6.281636499014912e-06,
      "loss": 0.0129,
      "step": 1148620
    },
    {
      "epoch": 1.8797745527385556,
      "grad_norm": 0.7447508573532104,
      "learning_rate": 6.281570606801395e-06,
      "loss": 0.0208,
      "step": 1148640
    },
    {
      "epoch": 1.8798072831772092,
      "grad_norm": 0.8427842855453491,
      "learning_rate": 6.281504714587877e-06,
      "loss": 0.0147,
      "step": 1148660
    },
    {
      "epoch": 1.8798400136158624,
      "grad_norm": 0.4789901077747345,
      "learning_rate": 6.281438822374361e-06,
      "loss": 0.0094,
      "step": 1148680
    },
    {
      "epoch": 1.8798727440545158,
      "grad_norm": 1.2523778676986694,
      "learning_rate": 6.281372930160844e-06,
      "loss": 0.0231,
      "step": 1148700
    },
    {
      "epoch": 1.8799054744931691,
      "grad_norm": 0.7925753593444824,
      "learning_rate": 6.281307037947326e-06,
      "loss": 0.0197,
      "step": 1148720
    },
    {
      "epoch": 1.8799382049318225,
      "grad_norm": 0.4030037522315979,
      "learning_rate": 6.28124114573381e-06,
      "loss": 0.0156,
      "step": 1148740
    },
    {
      "epoch": 1.8799709353704759,
      "grad_norm": 0.5109418034553528,
      "learning_rate": 6.281175253520292e-06,
      "loss": 0.0208,
      "step": 1148760
    },
    {
      "epoch": 1.880003665809129,
      "grad_norm": 0.062269262969493866,
      "learning_rate": 6.281109361306775e-06,
      "loss": 0.0182,
      "step": 1148780
    },
    {
      "epoch": 1.8800363962477826,
      "grad_norm": 0.21851859986782074,
      "learning_rate": 6.281043469093257e-06,
      "loss": 0.0177,
      "step": 1148800
    },
    {
      "epoch": 1.8800691266864358,
      "grad_norm": 0.23716871440410614,
      "learning_rate": 6.280977576879741e-06,
      "loss": 0.0102,
      "step": 1148820
    },
    {
      "epoch": 1.8801018571250891,
      "grad_norm": 0.7174387574195862,
      "learning_rate": 6.2809116846662235e-06,
      "loss": 0.0132,
      "step": 1148840
    },
    {
      "epoch": 1.8801345875637425,
      "grad_norm": 3.080130100250244,
      "learning_rate": 6.280845792452706e-06,
      "loss": 0.0188,
      "step": 1148860
    },
    {
      "epoch": 1.880167318002396,
      "grad_norm": 0.3515080213546753,
      "learning_rate": 6.280779900239189e-06,
      "loss": 0.0222,
      "step": 1148880
    },
    {
      "epoch": 1.8802000484410493,
      "grad_norm": 0.37829673290252686,
      "learning_rate": 6.2807140080256726e-06,
      "loss": 0.0227,
      "step": 1148900
    },
    {
      "epoch": 1.8802327788797024,
      "grad_norm": 0.6336743831634521,
      "learning_rate": 6.2806481158121545e-06,
      "loss": 0.0199,
      "step": 1148920
    },
    {
      "epoch": 1.880265509318356,
      "grad_norm": 0.2760067582130432,
      "learning_rate": 6.280582223598638e-06,
      "loss": 0.0225,
      "step": 1148940
    },
    {
      "epoch": 1.8802982397570092,
      "grad_norm": 0.49222126603126526,
      "learning_rate": 6.28051633138512e-06,
      "loss": 0.02,
      "step": 1148960
    },
    {
      "epoch": 1.8803309701956625,
      "grad_norm": 0.3042909801006317,
      "learning_rate": 6.2804504391716035e-06,
      "loss": 0.0147,
      "step": 1148980
    },
    {
      "epoch": 1.880363700634316,
      "grad_norm": 0.7746831178665161,
      "learning_rate": 6.280384546958086e-06,
      "loss": 0.0204,
      "step": 1149000
    },
    {
      "epoch": 1.8803964310729693,
      "grad_norm": 1.4305152893066406,
      "learning_rate": 6.280318654744569e-06,
      "loss": 0.0258,
      "step": 1149020
    },
    {
      "epoch": 1.8804291615116226,
      "grad_norm": 0.8587619066238403,
      "learning_rate": 6.280252762531053e-06,
      "loss": 0.0155,
      "step": 1149040
    },
    {
      "epoch": 1.8804618919502758,
      "grad_norm": 0.2493012398481369,
      "learning_rate": 6.280186870317535e-06,
      "loss": 0.0285,
      "step": 1149060
    },
    {
      "epoch": 1.8804946223889294,
      "grad_norm": 2.147644281387329,
      "learning_rate": 6.280120978104018e-06,
      "loss": 0.0259,
      "step": 1149080
    },
    {
      "epoch": 1.8805273528275825,
      "grad_norm": 0.9027650952339172,
      "learning_rate": 6.280055085890501e-06,
      "loss": 0.0163,
      "step": 1149100
    },
    {
      "epoch": 1.880560083266236,
      "grad_norm": 0.5125093460083008,
      "learning_rate": 6.279989193676984e-06,
      "loss": 0.0151,
      "step": 1149120
    },
    {
      "epoch": 1.8805928137048893,
      "grad_norm": 0.4155441224575043,
      "learning_rate": 6.279923301463466e-06,
      "loss": 0.0215,
      "step": 1149140
    },
    {
      "epoch": 1.8806255441435427,
      "grad_norm": 0.16552753746509552,
      "learning_rate": 6.27985740924995e-06,
      "loss": 0.0133,
      "step": 1149160
    },
    {
      "epoch": 1.880658274582196,
      "grad_norm": 0.22809861600399017,
      "learning_rate": 6.279791517036432e-06,
      "loss": 0.0203,
      "step": 1149180
    },
    {
      "epoch": 1.8806910050208492,
      "grad_norm": 0.2676461935043335,
      "learning_rate": 6.279725624822915e-06,
      "loss": 0.0152,
      "step": 1149200
    },
    {
      "epoch": 1.8807237354595028,
      "grad_norm": 0.47577425837516785,
      "learning_rate": 6.279659732609398e-06,
      "loss": 0.0182,
      "step": 1149220
    },
    {
      "epoch": 1.880756465898156,
      "grad_norm": 0.11969175189733505,
      "learning_rate": 6.279593840395881e-06,
      "loss": 0.0199,
      "step": 1149240
    },
    {
      "epoch": 1.8807891963368093,
      "grad_norm": 0.18738868832588196,
      "learning_rate": 6.2795279481823636e-06,
      "loss": 0.0119,
      "step": 1149260
    },
    {
      "epoch": 1.8808219267754627,
      "grad_norm": 0.6930506229400635,
      "learning_rate": 6.279462055968847e-06,
      "loss": 0.0258,
      "step": 1149280
    },
    {
      "epoch": 1.8808546572141158,
      "grad_norm": 0.20420606434345245,
      "learning_rate": 6.279396163755329e-06,
      "loss": 0.018,
      "step": 1149300
    },
    {
      "epoch": 1.8808873876527694,
      "grad_norm": 1.03245210647583,
      "learning_rate": 6.279330271541813e-06,
      "loss": 0.0206,
      "step": 1149320
    },
    {
      "epoch": 1.8809201180914226,
      "grad_norm": 0.37940192222595215,
      "learning_rate": 6.2792643793282945e-06,
      "loss": 0.0191,
      "step": 1149340
    },
    {
      "epoch": 1.8809528485300762,
      "grad_norm": 0.34206146001815796,
      "learning_rate": 6.279198487114778e-06,
      "loss": 0.0192,
      "step": 1149360
    },
    {
      "epoch": 1.8809855789687293,
      "grad_norm": 0.11649257689714432,
      "learning_rate": 6.279132594901261e-06,
      "loss": 0.0164,
      "step": 1149380
    },
    {
      "epoch": 1.8810183094073827,
      "grad_norm": 0.35423219203948975,
      "learning_rate": 6.279066702687744e-06,
      "loss": 0.0221,
      "step": 1149400
    },
    {
      "epoch": 1.881051039846036,
      "grad_norm": 0.44427263736724854,
      "learning_rate": 6.279000810474227e-06,
      "loss": 0.0136,
      "step": 1149420
    },
    {
      "epoch": 1.8810837702846892,
      "grad_norm": 0.5762843489646912,
      "learning_rate": 6.27893491826071e-06,
      "loss": 0.0148,
      "step": 1149440
    },
    {
      "epoch": 1.8811165007233428,
      "grad_norm": 0.48803281784057617,
      "learning_rate": 6.278869026047193e-06,
      "loss": 0.0208,
      "step": 1149460
    },
    {
      "epoch": 1.881149231161996,
      "grad_norm": 0.28398337960243225,
      "learning_rate": 6.278803133833675e-06,
      "loss": 0.0199,
      "step": 1149480
    },
    {
      "epoch": 1.8811819616006493,
      "grad_norm": 0.3447229862213135,
      "learning_rate": 6.278737241620159e-06,
      "loss": 0.0169,
      "step": 1149500
    },
    {
      "epoch": 1.8812146920393027,
      "grad_norm": 0.3209206163883209,
      "learning_rate": 6.278671349406641e-06,
      "loss": 0.0111,
      "step": 1149520
    },
    {
      "epoch": 1.881247422477956,
      "grad_norm": 0.7973915934562683,
      "learning_rate": 6.2786054571931245e-06,
      "loss": 0.0257,
      "step": 1149540
    },
    {
      "epoch": 1.8812801529166094,
      "grad_norm": 0.6270477771759033,
      "learning_rate": 6.278539564979606e-06,
      "loss": 0.0154,
      "step": 1149560
    },
    {
      "epoch": 1.8813128833552626,
      "grad_norm": 0.3290243446826935,
      "learning_rate": 6.27847367276609e-06,
      "loss": 0.0177,
      "step": 1149580
    },
    {
      "epoch": 1.8813456137939162,
      "grad_norm": 0.9637965559959412,
      "learning_rate": 6.278407780552573e-06,
      "loss": 0.0168,
      "step": 1149600
    },
    {
      "epoch": 1.8813783442325693,
      "grad_norm": 0.40914052724838257,
      "learning_rate": 6.2783418883390554e-06,
      "loss": 0.0145,
      "step": 1149620
    },
    {
      "epoch": 1.8814110746712227,
      "grad_norm": 0.636634349822998,
      "learning_rate": 6.278275996125538e-06,
      "loss": 0.0159,
      "step": 1149640
    },
    {
      "epoch": 1.881443805109876,
      "grad_norm": 0.1586546003818512,
      "learning_rate": 6.278210103912022e-06,
      "loss": 0.0225,
      "step": 1149660
    },
    {
      "epoch": 1.8814765355485294,
      "grad_norm": 0.2285909652709961,
      "learning_rate": 6.278144211698504e-06,
      "loss": 0.0128,
      "step": 1149680
    },
    {
      "epoch": 1.8815092659871828,
      "grad_norm": 0.15645352005958557,
      "learning_rate": 6.278078319484987e-06,
      "loss": 0.0164,
      "step": 1149700
    },
    {
      "epoch": 1.881541996425836,
      "grad_norm": 0.17018534243106842,
      "learning_rate": 6.278012427271469e-06,
      "loss": 0.0255,
      "step": 1149720
    },
    {
      "epoch": 1.8815747268644896,
      "grad_norm": 0.23454166948795319,
      "learning_rate": 6.277946535057953e-06,
      "loss": 0.012,
      "step": 1149740
    },
    {
      "epoch": 1.8816074573031427,
      "grad_norm": 0.5628791451454163,
      "learning_rate": 6.277880642844436e-06,
      "loss": 0.0135,
      "step": 1149760
    },
    {
      "epoch": 1.881640187741796,
      "grad_norm": 0.6356195211410522,
      "learning_rate": 6.277814750630918e-06,
      "loss": 0.0194,
      "step": 1149780
    },
    {
      "epoch": 1.8816729181804495,
      "grad_norm": 0.7349265813827515,
      "learning_rate": 6.277748858417402e-06,
      "loss": 0.0181,
      "step": 1149800
    },
    {
      "epoch": 1.8817056486191028,
      "grad_norm": 1.838011384010315,
      "learning_rate": 6.277682966203884e-06,
      "loss": 0.0162,
      "step": 1149820
    },
    {
      "epoch": 1.8817383790577562,
      "grad_norm": 0.5077320337295532,
      "learning_rate": 6.277617073990367e-06,
      "loss": 0.0265,
      "step": 1149840
    },
    {
      "epoch": 1.8817711094964094,
      "grad_norm": 0.42847388982772827,
      "learning_rate": 6.27755118177685e-06,
      "loss": 0.017,
      "step": 1149860
    },
    {
      "epoch": 1.881803839935063,
      "grad_norm": 0.2628294825553894,
      "learning_rate": 6.277485289563333e-06,
      "loss": 0.0143,
      "step": 1149880
    },
    {
      "epoch": 1.881836570373716,
      "grad_norm": 0.4999120235443115,
      "learning_rate": 6.2774193973498155e-06,
      "loss": 0.0171,
      "step": 1149900
    },
    {
      "epoch": 1.8818693008123695,
      "grad_norm": 0.30429428815841675,
      "learning_rate": 6.277353505136299e-06,
      "loss": 0.0184,
      "step": 1149920
    },
    {
      "epoch": 1.8819020312510228,
      "grad_norm": 0.12390180677175522,
      "learning_rate": 6.277287612922781e-06,
      "loss": 0.0136,
      "step": 1149940
    },
    {
      "epoch": 1.8819347616896762,
      "grad_norm": 0.0973985567688942,
      "learning_rate": 6.2772217207092645e-06,
      "loss": 0.0143,
      "step": 1149960
    },
    {
      "epoch": 1.8819674921283296,
      "grad_norm": 0.36644724011421204,
      "learning_rate": 6.2771558284957464e-06,
      "loss": 0.0228,
      "step": 1149980
    },
    {
      "epoch": 1.8820002225669827,
      "grad_norm": 1.1871250867843628,
      "learning_rate": 6.27708993628223e-06,
      "loss": 0.0216,
      "step": 1150000
    },
    {
      "epoch": 1.8820002225669827,
      "eval_loss": 0.009559311904013157,
      "eval_runtime": 6523.453,
      "eval_samples_per_second": 157.563,
      "eval_steps_per_second": 15.756,
      "eval_sts-dev_pearson_cosine": 0.9777464781685921,
      "eval_sts-dev_spearman_cosine": 0.8906822172833087,
      "step": 1150000
    },
    {
      "epoch": 1.8820329530056363,
      "grad_norm": 0.76866614818573,
      "learning_rate": 6.277024044068713e-06,
      "loss": 0.02,
      "step": 1150020
    },
    {
      "epoch": 1.8820656834442895,
      "grad_norm": 0.1840580850839615,
      "learning_rate": 6.2769581518551955e-06,
      "loss": 0.0124,
      "step": 1150040
    },
    {
      "epoch": 1.8820984138829429,
      "grad_norm": 0.5117810368537903,
      "learning_rate": 6.276892259641678e-06,
      "loss": 0.0187,
      "step": 1150060
    },
    {
      "epoch": 1.8821311443215962,
      "grad_norm": 0.21624889969825745,
      "learning_rate": 6.276826367428162e-06,
      "loss": 0.0151,
      "step": 1150080
    },
    {
      "epoch": 1.8821638747602494,
      "grad_norm": 0.6266183853149414,
      "learning_rate": 6.2767604752146446e-06,
      "loss": 0.0148,
      "step": 1150100
    },
    {
      "epoch": 1.882196605198903,
      "grad_norm": 0.8496038913726807,
      "learning_rate": 6.276694583001127e-06,
      "loss": 0.0173,
      "step": 1150120
    },
    {
      "epoch": 1.8822293356375561,
      "grad_norm": 0.37670430541038513,
      "learning_rate": 6.276628690787611e-06,
      "loss": 0.0143,
      "step": 1150140
    },
    {
      "epoch": 1.8822620660762097,
      "grad_norm": 0.2959997057914734,
      "learning_rate": 6.276562798574093e-06,
      "loss": 0.0232,
      "step": 1150160
    },
    {
      "epoch": 1.8822947965148629,
      "grad_norm": 0.29947707056999207,
      "learning_rate": 6.276496906360576e-06,
      "loss": 0.017,
      "step": 1150180
    },
    {
      "epoch": 1.8823275269535162,
      "grad_norm": 0.957044780254364,
      "learning_rate": 6.276431014147058e-06,
      "loss": 0.0181,
      "step": 1150200
    },
    {
      "epoch": 1.8823602573921696,
      "grad_norm": 1.2914228439331055,
      "learning_rate": 6.276365121933542e-06,
      "loss": 0.0244,
      "step": 1150220
    },
    {
      "epoch": 1.8823929878308228,
      "grad_norm": 0.4265097379684448,
      "learning_rate": 6.276299229720025e-06,
      "loss": 0.0177,
      "step": 1150240
    },
    {
      "epoch": 1.8824257182694764,
      "grad_norm": 0.2689517140388489,
      "learning_rate": 6.276233337506507e-06,
      "loss": 0.0179,
      "step": 1150260
    },
    {
      "epoch": 1.8824584487081295,
      "grad_norm": 1.3094189167022705,
      "learning_rate": 6.27616744529299e-06,
      "loss": 0.0218,
      "step": 1150280
    },
    {
      "epoch": 1.8824911791467829,
      "grad_norm": 0.2810347080230713,
      "learning_rate": 6.276101553079474e-06,
      "loss": 0.0216,
      "step": 1150300
    },
    {
      "epoch": 1.8825239095854363,
      "grad_norm": 0.4101108908653259,
      "learning_rate": 6.2760356608659556e-06,
      "loss": 0.0132,
      "step": 1150320
    },
    {
      "epoch": 1.8825566400240896,
      "grad_norm": 0.6229338049888611,
      "learning_rate": 6.275969768652439e-06,
      "loss": 0.0141,
      "step": 1150340
    },
    {
      "epoch": 1.882589370462743,
      "grad_norm": 0.7605021595954895,
      "learning_rate": 6.275903876438921e-06,
      "loss": 0.0212,
      "step": 1150360
    },
    {
      "epoch": 1.8826221009013961,
      "grad_norm": 0.15242283046245575,
      "learning_rate": 6.275837984225405e-06,
      "loss": 0.0142,
      "step": 1150380
    },
    {
      "epoch": 1.8826548313400497,
      "grad_norm": 1.1633336544036865,
      "learning_rate": 6.275772092011887e-06,
      "loss": 0.0161,
      "step": 1150400
    },
    {
      "epoch": 1.882687561778703,
      "grad_norm": 0.676398754119873,
      "learning_rate": 6.27570619979837e-06,
      "loss": 0.0133,
      "step": 1150420
    },
    {
      "epoch": 1.8827202922173563,
      "grad_norm": 0.5140600800514221,
      "learning_rate": 6.275640307584854e-06,
      "loss": 0.0214,
      "step": 1150440
    },
    {
      "epoch": 1.8827530226560096,
      "grad_norm": 0.4080166518688202,
      "learning_rate": 6.2755744153713364e-06,
      "loss": 0.0188,
      "step": 1150460
    },
    {
      "epoch": 1.882785753094663,
      "grad_norm": 0.3585638701915741,
      "learning_rate": 6.275508523157819e-06,
      "loss": 0.0103,
      "step": 1150480
    },
    {
      "epoch": 1.8828184835333164,
      "grad_norm": 1.523138403892517,
      "learning_rate": 6.275442630944302e-06,
      "loss": 0.0162,
      "step": 1150500
    },
    {
      "epoch": 1.8828512139719695,
      "grad_norm": 2.1973588466644287,
      "learning_rate": 6.2753767387307855e-06,
      "loss": 0.0164,
      "step": 1150520
    },
    {
      "epoch": 1.8828839444106231,
      "grad_norm": 0.29811564087867737,
      "learning_rate": 6.275310846517267e-06,
      "loss": 0.0185,
      "step": 1150540
    },
    {
      "epoch": 1.8829166748492763,
      "grad_norm": 0.23340781033039093,
      "learning_rate": 6.275244954303751e-06,
      "loss": 0.0114,
      "step": 1150560
    },
    {
      "epoch": 1.8829494052879296,
      "grad_norm": 0.935481607913971,
      "learning_rate": 6.275179062090233e-06,
      "loss": 0.0166,
      "step": 1150580
    },
    {
      "epoch": 1.882982135726583,
      "grad_norm": 0.15601365268230438,
      "learning_rate": 6.2751131698767165e-06,
      "loss": 0.0135,
      "step": 1150600
    },
    {
      "epoch": 1.8830148661652364,
      "grad_norm": 0.3608028292655945,
      "learning_rate": 6.275047277663198e-06,
      "loss": 0.0184,
      "step": 1150620
    },
    {
      "epoch": 1.8830475966038898,
      "grad_norm": 0.1961613893508911,
      "learning_rate": 6.274981385449682e-06,
      "loss": 0.0134,
      "step": 1150640
    },
    {
      "epoch": 1.883080327042543,
      "grad_norm": 0.17691512405872345,
      "learning_rate": 6.274915493236165e-06,
      "loss": 0.0152,
      "step": 1150660
    },
    {
      "epoch": 1.8831130574811965,
      "grad_norm": 0.16661903262138367,
      "learning_rate": 6.274849601022648e-06,
      "loss": 0.0163,
      "step": 1150680
    },
    {
      "epoch": 1.8831457879198497,
      "grad_norm": 0.6291240453720093,
      "learning_rate": 6.27478370880913e-06,
      "loss": 0.0119,
      "step": 1150700
    },
    {
      "epoch": 1.883178518358503,
      "grad_norm": 0.5641211867332458,
      "learning_rate": 6.274717816595614e-06,
      "loss": 0.0178,
      "step": 1150720
    },
    {
      "epoch": 1.8832112487971564,
      "grad_norm": 1.079920768737793,
      "learning_rate": 6.274651924382096e-06,
      "loss": 0.0116,
      "step": 1150740
    },
    {
      "epoch": 1.8832439792358098,
      "grad_norm": 0.48616617918014526,
      "learning_rate": 6.274586032168579e-06,
      "loss": 0.02,
      "step": 1150760
    },
    {
      "epoch": 1.8832767096744631,
      "grad_norm": 0.988806426525116,
      "learning_rate": 6.274520139955061e-06,
      "loss": 0.0187,
      "step": 1150780
    },
    {
      "epoch": 1.8833094401131163,
      "grad_norm": 0.15516391396522522,
      "learning_rate": 6.274454247741545e-06,
      "loss": 0.0211,
      "step": 1150800
    },
    {
      "epoch": 1.88334217055177,
      "grad_norm": 0.2548491358757019,
      "learning_rate": 6.274388355528028e-06,
      "loss": 0.0149,
      "step": 1150820
    },
    {
      "epoch": 1.883374900990423,
      "grad_norm": 0.21682842075824738,
      "learning_rate": 6.27432246331451e-06,
      "loss": 0.0141,
      "step": 1150840
    },
    {
      "epoch": 1.8834076314290764,
      "grad_norm": 1.0756707191467285,
      "learning_rate": 6.274256571100994e-06,
      "loss": 0.0126,
      "step": 1150860
    },
    {
      "epoch": 1.8834403618677298,
      "grad_norm": 0.5129273533821106,
      "learning_rate": 6.2741906788874765e-06,
      "loss": 0.0157,
      "step": 1150880
    },
    {
      "epoch": 1.883473092306383,
      "grad_norm": 0.2961779832839966,
      "learning_rate": 6.274124786673959e-06,
      "loss": 0.0138,
      "step": 1150900
    },
    {
      "epoch": 1.8835058227450365,
      "grad_norm": 0.6401380896568298,
      "learning_rate": 6.274058894460442e-06,
      "loss": 0.0136,
      "step": 1150920
    },
    {
      "epoch": 1.8835385531836897,
      "grad_norm": 0.44159916043281555,
      "learning_rate": 6.2739930022469256e-06,
      "loss": 0.023,
      "step": 1150940
    },
    {
      "epoch": 1.8835712836223433,
      "grad_norm": 0.3087885081768036,
      "learning_rate": 6.2739271100334075e-06,
      "loss": 0.0186,
      "step": 1150960
    },
    {
      "epoch": 1.8836040140609964,
      "grad_norm": 0.25301897525787354,
      "learning_rate": 6.273861217819891e-06,
      "loss": 0.0188,
      "step": 1150980
    },
    {
      "epoch": 1.8836367444996498,
      "grad_norm": 0.48530974984169006,
      "learning_rate": 6.273795325606373e-06,
      "loss": 0.0224,
      "step": 1151000
    },
    {
      "epoch": 1.8836694749383032,
      "grad_norm": 1.102758526802063,
      "learning_rate": 6.2737294333928565e-06,
      "loss": 0.0228,
      "step": 1151020
    },
    {
      "epoch": 1.8837022053769563,
      "grad_norm": 0.34519073367118835,
      "learning_rate": 6.273663541179339e-06,
      "loss": 0.0181,
      "step": 1151040
    },
    {
      "epoch": 1.88373493581561,
      "grad_norm": 0.5859336853027344,
      "learning_rate": 6.273597648965822e-06,
      "loss": 0.0148,
      "step": 1151060
    },
    {
      "epoch": 1.883767666254263,
      "grad_norm": 0.679962694644928,
      "learning_rate": 6.273531756752305e-06,
      "loss": 0.0168,
      "step": 1151080
    },
    {
      "epoch": 1.8838003966929164,
      "grad_norm": 1.0064797401428223,
      "learning_rate": 6.273465864538788e-06,
      "loss": 0.0208,
      "step": 1151100
    },
    {
      "epoch": 1.8838331271315698,
      "grad_norm": 0.7889117002487183,
      "learning_rate": 6.27339997232527e-06,
      "loss": 0.0123,
      "step": 1151120
    },
    {
      "epoch": 1.8838658575702232,
      "grad_norm": 0.33939123153686523,
      "learning_rate": 6.273334080111754e-06,
      "loss": 0.0135,
      "step": 1151140
    },
    {
      "epoch": 1.8838985880088766,
      "grad_norm": 0.3897686302661896,
      "learning_rate": 6.273268187898237e-06,
      "loss": 0.0158,
      "step": 1151160
    },
    {
      "epoch": 1.8839313184475297,
      "grad_norm": 0.9404065608978271,
      "learning_rate": 6.273202295684719e-06,
      "loss": 0.0144,
      "step": 1151180
    },
    {
      "epoch": 1.8839640488861833,
      "grad_norm": 0.753288984298706,
      "learning_rate": 6.273136403471203e-06,
      "loss": 0.0223,
      "step": 1151200
    },
    {
      "epoch": 1.8839967793248364,
      "grad_norm": 0.1589227020740509,
      "learning_rate": 6.273070511257685e-06,
      "loss": 0.0171,
      "step": 1151220
    },
    {
      "epoch": 1.8840295097634898,
      "grad_norm": 0.7819592356681824,
      "learning_rate": 6.273004619044168e-06,
      "loss": 0.019,
      "step": 1151240
    },
    {
      "epoch": 1.8840622402021432,
      "grad_norm": 0.215071901679039,
      "learning_rate": 6.272938726830651e-06,
      "loss": 0.0205,
      "step": 1151260
    },
    {
      "epoch": 1.8840949706407966,
      "grad_norm": 0.2774921953678131,
      "learning_rate": 6.272872834617134e-06,
      "loss": 0.0296,
      "step": 1151280
    },
    {
      "epoch": 1.88412770107945,
      "grad_norm": 0.18154987692832947,
      "learning_rate": 6.272806942403617e-06,
      "loss": 0.0183,
      "step": 1151300
    },
    {
      "epoch": 1.884160431518103,
      "grad_norm": 0.07007184624671936,
      "learning_rate": 6.2727410501901e-06,
      "loss": 0.0184,
      "step": 1151320
    },
    {
      "epoch": 1.8841931619567567,
      "grad_norm": 0.38287702202796936,
      "learning_rate": 6.272675157976582e-06,
      "loss": 0.014,
      "step": 1151340
    },
    {
      "epoch": 1.8842258923954098,
      "grad_norm": 0.2461584508419037,
      "learning_rate": 6.272609265763066e-06,
      "loss": 0.0133,
      "step": 1151360
    },
    {
      "epoch": 1.8842586228340632,
      "grad_norm": 0.4621369540691376,
      "learning_rate": 6.2725433735495475e-06,
      "loss": 0.0136,
      "step": 1151380
    },
    {
      "epoch": 1.8842913532727166,
      "grad_norm": 0.309409499168396,
      "learning_rate": 6.272477481336031e-06,
      "loss": 0.0157,
      "step": 1151400
    },
    {
      "epoch": 1.88432408371137,
      "grad_norm": 0.5883254408836365,
      "learning_rate": 6.272411589122514e-06,
      "loss": 0.0208,
      "step": 1151420
    },
    {
      "epoch": 1.8843568141500233,
      "grad_norm": 0.15201601386070251,
      "learning_rate": 6.272345696908997e-06,
      "loss": 0.0163,
      "step": 1151440
    },
    {
      "epoch": 1.8843895445886765,
      "grad_norm": 1.1479642391204834,
      "learning_rate": 6.272279804695479e-06,
      "loss": 0.0197,
      "step": 1151460
    },
    {
      "epoch": 1.88442227502733,
      "grad_norm": 0.34071239829063416,
      "learning_rate": 6.272213912481963e-06,
      "loss": 0.019,
      "step": 1151480
    },
    {
      "epoch": 1.8844550054659832,
      "grad_norm": 0.16303329169750214,
      "learning_rate": 6.272148020268446e-06,
      "loss": 0.0155,
      "step": 1151500
    },
    {
      "epoch": 1.8844877359046366,
      "grad_norm": 0.17785249650478363,
      "learning_rate": 6.272082128054928e-06,
      "loss": 0.0138,
      "step": 1151520
    },
    {
      "epoch": 1.88452046634329,
      "grad_norm": 0.17931176722049713,
      "learning_rate": 6.272016235841412e-06,
      "loss": 0.0142,
      "step": 1151540
    },
    {
      "epoch": 1.884553196781943,
      "grad_norm": 0.7552043199539185,
      "learning_rate": 6.271950343627894e-06,
      "loss": 0.017,
      "step": 1151560
    },
    {
      "epoch": 1.8845859272205967,
      "grad_norm": 0.9621965289115906,
      "learning_rate": 6.2718844514143775e-06,
      "loss": 0.0148,
      "step": 1151580
    },
    {
      "epoch": 1.8846186576592499,
      "grad_norm": 0.47599056363105774,
      "learning_rate": 6.271818559200859e-06,
      "loss": 0.0188,
      "step": 1151600
    },
    {
      "epoch": 1.8846513880979034,
      "grad_norm": 0.3328804671764374,
      "learning_rate": 6.271752666987343e-06,
      "loss": 0.0144,
      "step": 1151620
    },
    {
      "epoch": 1.8846841185365566,
      "grad_norm": 0.3193765878677368,
      "learning_rate": 6.271686774773825e-06,
      "loss": 0.0131,
      "step": 1151640
    },
    {
      "epoch": 1.88471684897521,
      "grad_norm": 1.9828606843948364,
      "learning_rate": 6.2716208825603084e-06,
      "loss": 0.0162,
      "step": 1151660
    },
    {
      "epoch": 1.8847495794138633,
      "grad_norm": 0.37265893816947937,
      "learning_rate": 6.271554990346791e-06,
      "loss": 0.0151,
      "step": 1151680
    },
    {
      "epoch": 1.8847823098525165,
      "grad_norm": 1.703888177871704,
      "learning_rate": 6.271489098133274e-06,
      "loss": 0.0185,
      "step": 1151700
    },
    {
      "epoch": 1.88481504029117,
      "grad_norm": 0.21402013301849365,
      "learning_rate": 6.271423205919757e-06,
      "loss": 0.0182,
      "step": 1151720
    },
    {
      "epoch": 1.8848477707298232,
      "grad_norm": 0.5852549076080322,
      "learning_rate": 6.27135731370624e-06,
      "loss": 0.0233,
      "step": 1151740
    },
    {
      "epoch": 1.8848805011684766,
      "grad_norm": 0.13466601073741913,
      "learning_rate": 6.271291421492722e-06,
      "loss": 0.0226,
      "step": 1151760
    },
    {
      "epoch": 1.88491323160713,
      "grad_norm": 0.59032142162323,
      "learning_rate": 6.271225529279206e-06,
      "loss": 0.0186,
      "step": 1151780
    },
    {
      "epoch": 1.8849459620457834,
      "grad_norm": 0.38885897397994995,
      "learning_rate": 6.271159637065688e-06,
      "loss": 0.0137,
      "step": 1151800
    },
    {
      "epoch": 1.8849786924844367,
      "grad_norm": 0.30300673842430115,
      "learning_rate": 6.271093744852171e-06,
      "loss": 0.0256,
      "step": 1151820
    },
    {
      "epoch": 1.8850114229230899,
      "grad_norm": 0.23058944940567017,
      "learning_rate": 6.271027852638654e-06,
      "loss": 0.0172,
      "step": 1151840
    },
    {
      "epoch": 1.8850441533617435,
      "grad_norm": 0.4233560562133789,
      "learning_rate": 6.270961960425137e-06,
      "loss": 0.0197,
      "step": 1151860
    },
    {
      "epoch": 1.8850768838003966,
      "grad_norm": 0.3324681520462036,
      "learning_rate": 6.27089606821162e-06,
      "loss": 0.0171,
      "step": 1151880
    },
    {
      "epoch": 1.88510961423905,
      "grad_norm": 0.12874732911586761,
      "learning_rate": 6.270830175998103e-06,
      "loss": 0.0174,
      "step": 1151900
    },
    {
      "epoch": 1.8851423446777034,
      "grad_norm": 0.47005313634872437,
      "learning_rate": 6.270764283784586e-06,
      "loss": 0.0191,
      "step": 1151920
    },
    {
      "epoch": 1.8851750751163567,
      "grad_norm": 0.36906301975250244,
      "learning_rate": 6.2706983915710685e-06,
      "loss": 0.0172,
      "step": 1151940
    },
    {
      "epoch": 1.88520780555501,
      "grad_norm": 0.24914096295833588,
      "learning_rate": 6.270632499357552e-06,
      "loss": 0.0142,
      "step": 1151960
    },
    {
      "epoch": 1.8852405359936633,
      "grad_norm": 0.752495288848877,
      "learning_rate": 6.270566607144034e-06,
      "loss": 0.0181,
      "step": 1151980
    },
    {
      "epoch": 1.8852732664323169,
      "grad_norm": 0.24547411501407623,
      "learning_rate": 6.2705007149305176e-06,
      "loss": 0.0175,
      "step": 1152000
    },
    {
      "epoch": 1.88530599687097,
      "grad_norm": 0.44573983550071716,
      "learning_rate": 6.2704348227169994e-06,
      "loss": 0.0159,
      "step": 1152020
    },
    {
      "epoch": 1.8853387273096234,
      "grad_norm": 0.5690205693244934,
      "learning_rate": 6.270368930503483e-06,
      "loss": 0.0251,
      "step": 1152040
    },
    {
      "epoch": 1.8853714577482767,
      "grad_norm": 0.09458911418914795,
      "learning_rate": 6.270303038289966e-06,
      "loss": 0.024,
      "step": 1152060
    },
    {
      "epoch": 1.8854041881869301,
      "grad_norm": 0.39475536346435547,
      "learning_rate": 6.2702371460764485e-06,
      "loss": 0.0242,
      "step": 1152080
    },
    {
      "epoch": 1.8854369186255835,
      "grad_norm": 1.0466506481170654,
      "learning_rate": 6.270171253862931e-06,
      "loss": 0.0238,
      "step": 1152100
    },
    {
      "epoch": 1.8854696490642366,
      "grad_norm": 0.3109866976737976,
      "learning_rate": 6.270105361649415e-06,
      "loss": 0.0186,
      "step": 1152120
    },
    {
      "epoch": 1.8855023795028902,
      "grad_norm": 0.23593223094940186,
      "learning_rate": 6.270039469435897e-06,
      "loss": 0.0194,
      "step": 1152140
    },
    {
      "epoch": 1.8855351099415434,
      "grad_norm": 1.3143229484558105,
      "learning_rate": 6.26997357722238e-06,
      "loss": 0.0204,
      "step": 1152160
    },
    {
      "epoch": 1.8855678403801968,
      "grad_norm": 0.18720676004886627,
      "learning_rate": 6.269907685008862e-06,
      "loss": 0.0161,
      "step": 1152180
    },
    {
      "epoch": 1.8856005708188501,
      "grad_norm": 0.39461687207221985,
      "learning_rate": 6.269841792795346e-06,
      "loss": 0.0182,
      "step": 1152200
    },
    {
      "epoch": 1.8856333012575035,
      "grad_norm": 0.7252928018569946,
      "learning_rate": 6.269775900581829e-06,
      "loss": 0.0175,
      "step": 1152220
    },
    {
      "epoch": 1.8856660316961569,
      "grad_norm": 0.5067145824432373,
      "learning_rate": 6.269710008368311e-06,
      "loss": 0.0173,
      "step": 1152240
    },
    {
      "epoch": 1.88569876213481,
      "grad_norm": 0.6480017304420471,
      "learning_rate": 6.269644116154795e-06,
      "loss": 0.0093,
      "step": 1152260
    },
    {
      "epoch": 1.8857314925734636,
      "grad_norm": 0.5855037569999695,
      "learning_rate": 6.269578223941278e-06,
      "loss": 0.0232,
      "step": 1152280
    },
    {
      "epoch": 1.8857642230121168,
      "grad_norm": 0.18336427211761475,
      "learning_rate": 6.26951233172776e-06,
      "loss": 0.0223,
      "step": 1152300
    },
    {
      "epoch": 1.8857969534507701,
      "grad_norm": 1.0822534561157227,
      "learning_rate": 6.269446439514243e-06,
      "loss": 0.02,
      "step": 1152320
    },
    {
      "epoch": 1.8858296838894235,
      "grad_norm": 0.4470082223415375,
      "learning_rate": 6.269380547300727e-06,
      "loss": 0.0246,
      "step": 1152340
    },
    {
      "epoch": 1.8858624143280767,
      "grad_norm": 0.47552523016929626,
      "learning_rate": 6.2693146550872086e-06,
      "loss": 0.0169,
      "step": 1152360
    },
    {
      "epoch": 1.8858951447667303,
      "grad_norm": 0.5843846201896667,
      "learning_rate": 6.269248762873692e-06,
      "loss": 0.0195,
      "step": 1152380
    },
    {
      "epoch": 1.8859278752053834,
      "grad_norm": 0.3950897753238678,
      "learning_rate": 6.269182870660174e-06,
      "loss": 0.0204,
      "step": 1152400
    },
    {
      "epoch": 1.885960605644037,
      "grad_norm": 0.1210474893450737,
      "learning_rate": 6.269116978446658e-06,
      "loss": 0.0212,
      "step": 1152420
    },
    {
      "epoch": 1.8859933360826902,
      "grad_norm": 0.2215481996536255,
      "learning_rate": 6.26905108623314e-06,
      "loss": 0.0116,
      "step": 1152440
    },
    {
      "epoch": 1.8860260665213435,
      "grad_norm": 0.463429719209671,
      "learning_rate": 6.268985194019623e-06,
      "loss": 0.0221,
      "step": 1152460
    },
    {
      "epoch": 1.886058796959997,
      "grad_norm": 0.986270546913147,
      "learning_rate": 6.268919301806106e-06,
      "loss": 0.0223,
      "step": 1152480
    },
    {
      "epoch": 1.88609152739865,
      "grad_norm": 0.8363441228866577,
      "learning_rate": 6.2688534095925894e-06,
      "loss": 0.0201,
      "step": 1152500
    },
    {
      "epoch": 1.8861242578373036,
      "grad_norm": 1.028745412826538,
      "learning_rate": 6.268787517379071e-06,
      "loss": 0.0164,
      "step": 1152520
    },
    {
      "epoch": 1.8861569882759568,
      "grad_norm": 0.1049952358007431,
      "learning_rate": 6.268721625165555e-06,
      "loss": 0.018,
      "step": 1152540
    },
    {
      "epoch": 1.8861897187146102,
      "grad_norm": 0.857552170753479,
      "learning_rate": 6.2686557329520385e-06,
      "loss": 0.015,
      "step": 1152560
    },
    {
      "epoch": 1.8862224491532635,
      "grad_norm": 1.0144710540771484,
      "learning_rate": 6.26858984073852e-06,
      "loss": 0.0248,
      "step": 1152580
    },
    {
      "epoch": 1.886255179591917,
      "grad_norm": 0.7277079820632935,
      "learning_rate": 6.268523948525004e-06,
      "loss": 0.0187,
      "step": 1152600
    },
    {
      "epoch": 1.8862879100305703,
      "grad_norm": 0.5072827935218811,
      "learning_rate": 6.268458056311486e-06,
      "loss": 0.0233,
      "step": 1152620
    },
    {
      "epoch": 1.8863206404692234,
      "grad_norm": 0.29358971118927,
      "learning_rate": 6.2683921640979695e-06,
      "loss": 0.0106,
      "step": 1152640
    },
    {
      "epoch": 1.886353370907877,
      "grad_norm": 0.409767746925354,
      "learning_rate": 6.268326271884451e-06,
      "loss": 0.0203,
      "step": 1152660
    },
    {
      "epoch": 1.8863861013465302,
      "grad_norm": 0.9973176121711731,
      "learning_rate": 6.268260379670935e-06,
      "loss": 0.0141,
      "step": 1152680
    },
    {
      "epoch": 1.8864188317851835,
      "grad_norm": 0.29386046528816223,
      "learning_rate": 6.268194487457418e-06,
      "loss": 0.0203,
      "step": 1152700
    },
    {
      "epoch": 1.886451562223837,
      "grad_norm": 0.31938791275024414,
      "learning_rate": 6.2681285952439e-06,
      "loss": 0.0191,
      "step": 1152720
    },
    {
      "epoch": 1.8864842926624903,
      "grad_norm": 0.9724239110946655,
      "learning_rate": 6.268062703030383e-06,
      "loss": 0.0122,
      "step": 1152740
    },
    {
      "epoch": 1.8865170231011437,
      "grad_norm": 0.23722365498542786,
      "learning_rate": 6.267996810816867e-06,
      "loss": 0.0113,
      "step": 1152760
    },
    {
      "epoch": 1.8865497535397968,
      "grad_norm": 0.3633381724357605,
      "learning_rate": 6.267930918603349e-06,
      "loss": 0.0228,
      "step": 1152780
    },
    {
      "epoch": 1.8865824839784504,
      "grad_norm": 0.07584179192781448,
      "learning_rate": 6.267865026389832e-06,
      "loss": 0.0126,
      "step": 1152800
    },
    {
      "epoch": 1.8866152144171036,
      "grad_norm": 0.5172999501228333,
      "learning_rate": 6.267799134176314e-06,
      "loss": 0.0163,
      "step": 1152820
    },
    {
      "epoch": 1.886647944855757,
      "grad_norm": 1.3578078746795654,
      "learning_rate": 6.267733241962798e-06,
      "loss": 0.0184,
      "step": 1152840
    },
    {
      "epoch": 1.8866806752944103,
      "grad_norm": 0.28646019101142883,
      "learning_rate": 6.2676673497492804e-06,
      "loss": 0.0267,
      "step": 1152860
    },
    {
      "epoch": 1.8867134057330637,
      "grad_norm": 0.4840790331363678,
      "learning_rate": 6.267601457535763e-06,
      "loss": 0.0225,
      "step": 1152880
    },
    {
      "epoch": 1.886746136171717,
      "grad_norm": 0.32626599073410034,
      "learning_rate": 6.267535565322246e-06,
      "loss": 0.0187,
      "step": 1152900
    },
    {
      "epoch": 1.8867788666103702,
      "grad_norm": 0.644191563129425,
      "learning_rate": 6.2674696731087295e-06,
      "loss": 0.017,
      "step": 1152920
    },
    {
      "epoch": 1.8868115970490238,
      "grad_norm": 0.47707730531692505,
      "learning_rate": 6.267403780895212e-06,
      "loss": 0.0137,
      "step": 1152940
    },
    {
      "epoch": 1.886844327487677,
      "grad_norm": 0.06976316124200821,
      "learning_rate": 6.267337888681695e-06,
      "loss": 0.0174,
      "step": 1152960
    },
    {
      "epoch": 1.8868770579263303,
      "grad_norm": 0.42926815152168274,
      "learning_rate": 6.267271996468179e-06,
      "loss": 0.0153,
      "step": 1152980
    },
    {
      "epoch": 1.8869097883649837,
      "grad_norm": 0.5455487370491028,
      "learning_rate": 6.2672061042546605e-06,
      "loss": 0.025,
      "step": 1153000
    },
    {
      "epoch": 1.886942518803637,
      "grad_norm": 0.488206148147583,
      "learning_rate": 6.267140212041144e-06,
      "loss": 0.0211,
      "step": 1153020
    },
    {
      "epoch": 1.8869752492422904,
      "grad_norm": 0.2433115690946579,
      "learning_rate": 6.267074319827626e-06,
      "loss": 0.0242,
      "step": 1153040
    },
    {
      "epoch": 1.8870079796809436,
      "grad_norm": 0.5771623849868774,
      "learning_rate": 6.2670084276141095e-06,
      "loss": 0.0269,
      "step": 1153060
    },
    {
      "epoch": 1.8870407101195972,
      "grad_norm": 0.5862498879432678,
      "learning_rate": 6.266942535400592e-06,
      "loss": 0.0199,
      "step": 1153080
    },
    {
      "epoch": 1.8870734405582503,
      "grad_norm": 0.3617279827594757,
      "learning_rate": 6.266876643187075e-06,
      "loss": 0.0199,
      "step": 1153100
    },
    {
      "epoch": 1.8871061709969037,
      "grad_norm": 1.11594557762146,
      "learning_rate": 6.266810750973558e-06,
      "loss": 0.0157,
      "step": 1153120
    },
    {
      "epoch": 1.887138901435557,
      "grad_norm": 0.19782909750938416,
      "learning_rate": 6.266744858760041e-06,
      "loss": 0.0232,
      "step": 1153140
    },
    {
      "epoch": 1.8871716318742102,
      "grad_norm": 0.3767778277397156,
      "learning_rate": 6.266678966546523e-06,
      "loss": 0.0184,
      "step": 1153160
    },
    {
      "epoch": 1.8872043623128638,
      "grad_norm": 0.3869069516658783,
      "learning_rate": 6.266613074333007e-06,
      "loss": 0.0132,
      "step": 1153180
    },
    {
      "epoch": 1.887237092751517,
      "grad_norm": 0.6167915463447571,
      "learning_rate": 6.266547182119489e-06,
      "loss": 0.0253,
      "step": 1153200
    },
    {
      "epoch": 1.8872698231901706,
      "grad_norm": 0.4559275805950165,
      "learning_rate": 6.266481289905972e-06,
      "loss": 0.0175,
      "step": 1153220
    },
    {
      "epoch": 1.8873025536288237,
      "grad_norm": 1.2755991220474243,
      "learning_rate": 6.266415397692455e-06,
      "loss": 0.0201,
      "step": 1153240
    },
    {
      "epoch": 1.887335284067477,
      "grad_norm": 0.2718317210674286,
      "learning_rate": 6.266349505478938e-06,
      "loss": 0.0261,
      "step": 1153260
    },
    {
      "epoch": 1.8873680145061305,
      "grad_norm": 0.5036317706108093,
      "learning_rate": 6.266283613265421e-06,
      "loss": 0.0177,
      "step": 1153280
    },
    {
      "epoch": 1.8874007449447836,
      "grad_norm": 0.19203445315361023,
      "learning_rate": 6.266217721051904e-06,
      "loss": 0.0179,
      "step": 1153300
    },
    {
      "epoch": 1.8874334753834372,
      "grad_norm": 0.8359750509262085,
      "learning_rate": 6.266151828838387e-06,
      "loss": 0.0184,
      "step": 1153320
    },
    {
      "epoch": 1.8874662058220903,
      "grad_norm": 0.2056509256362915,
      "learning_rate": 6.26608593662487e-06,
      "loss": 0.0135,
      "step": 1153340
    },
    {
      "epoch": 1.8874989362607437,
      "grad_norm": 0.6801028251647949,
      "learning_rate": 6.266020044411353e-06,
      "loss": 0.0217,
      "step": 1153360
    },
    {
      "epoch": 1.887531666699397,
      "grad_norm": 2.065805435180664,
      "learning_rate": 6.265954152197835e-06,
      "loss": 0.013,
      "step": 1153380
    },
    {
      "epoch": 1.8875643971380505,
      "grad_norm": 0.20098213851451874,
      "learning_rate": 6.265888259984319e-06,
      "loss": 0.0128,
      "step": 1153400
    },
    {
      "epoch": 1.8875971275767038,
      "grad_norm": 1.0154914855957031,
      "learning_rate": 6.2658223677708005e-06,
      "loss": 0.0258,
      "step": 1153420
    },
    {
      "epoch": 1.887629858015357,
      "grad_norm": 0.7019158601760864,
      "learning_rate": 6.265756475557284e-06,
      "loss": 0.0179,
      "step": 1153440
    },
    {
      "epoch": 1.8876625884540106,
      "grad_norm": 0.7409683465957642,
      "learning_rate": 6.265690583343767e-06,
      "loss": 0.0188,
      "step": 1153460
    },
    {
      "epoch": 1.8876953188926637,
      "grad_norm": 0.46087342500686646,
      "learning_rate": 6.26562469113025e-06,
      "loss": 0.0144,
      "step": 1153480
    },
    {
      "epoch": 1.887728049331317,
      "grad_norm": 0.42310526967048645,
      "learning_rate": 6.265558798916732e-06,
      "loss": 0.0194,
      "step": 1153500
    },
    {
      "epoch": 1.8877607797699705,
      "grad_norm": 3.1446738243103027,
      "learning_rate": 6.265492906703216e-06,
      "loss": 0.0219,
      "step": 1153520
    },
    {
      "epoch": 1.8877935102086238,
      "grad_norm": 0.30173417925834656,
      "learning_rate": 6.265427014489698e-06,
      "loss": 0.0157,
      "step": 1153540
    },
    {
      "epoch": 1.8878262406472772,
      "grad_norm": 0.45596566796302795,
      "learning_rate": 6.265361122276181e-06,
      "loss": 0.0211,
      "step": 1153560
    },
    {
      "epoch": 1.8878589710859304,
      "grad_norm": 0.3435550332069397,
      "learning_rate": 6.265295230062663e-06,
      "loss": 0.0183,
      "step": 1153580
    },
    {
      "epoch": 1.887891701524584,
      "grad_norm": 0.5121763944625854,
      "learning_rate": 6.265229337849147e-06,
      "loss": 0.0123,
      "step": 1153600
    },
    {
      "epoch": 1.8879244319632371,
      "grad_norm": 0.25240498781204224,
      "learning_rate": 6.2651634456356305e-06,
      "loss": 0.0175,
      "step": 1153620
    },
    {
      "epoch": 1.8879571624018905,
      "grad_norm": 0.18538986146450043,
      "learning_rate": 6.265097553422112e-06,
      "loss": 0.0167,
      "step": 1153640
    },
    {
      "epoch": 1.8879898928405439,
      "grad_norm": 0.21055789291858673,
      "learning_rate": 6.265031661208596e-06,
      "loss": 0.0176,
      "step": 1153660
    },
    {
      "epoch": 1.8880226232791972,
      "grad_norm": 0.40034061670303345,
      "learning_rate": 6.264965768995078e-06,
      "loss": 0.0182,
      "step": 1153680
    },
    {
      "epoch": 1.8880553537178506,
      "grad_norm": 0.5327939391136169,
      "learning_rate": 6.2648998767815614e-06,
      "loss": 0.0273,
      "step": 1153700
    },
    {
      "epoch": 1.8880880841565038,
      "grad_norm": 1.1914006471633911,
      "learning_rate": 6.264833984568044e-06,
      "loss": 0.0203,
      "step": 1153720
    },
    {
      "epoch": 1.8881208145951573,
      "grad_norm": 0.37849363684654236,
      "learning_rate": 6.264768092354527e-06,
      "loss": 0.0151,
      "step": 1153740
    },
    {
      "epoch": 1.8881535450338105,
      "grad_norm": 0.8648372292518616,
      "learning_rate": 6.26470220014101e-06,
      "loss": 0.0143,
      "step": 1153760
    },
    {
      "epoch": 1.8881862754724639,
      "grad_norm": 0.7413357496261597,
      "learning_rate": 6.264636307927493e-06,
      "loss": 0.0216,
      "step": 1153780
    },
    {
      "epoch": 1.8882190059111172,
      "grad_norm": 0.34153759479522705,
      "learning_rate": 6.264570415713975e-06,
      "loss": 0.0171,
      "step": 1153800
    },
    {
      "epoch": 1.8882517363497704,
      "grad_norm": 0.31462106108665466,
      "learning_rate": 6.264504523500459e-06,
      "loss": 0.0141,
      "step": 1153820
    },
    {
      "epoch": 1.888284466788424,
      "grad_norm": 0.4392186999320984,
      "learning_rate": 6.264438631286941e-06,
      "loss": 0.0111,
      "step": 1153840
    },
    {
      "epoch": 1.8883171972270771,
      "grad_norm": 0.3189586102962494,
      "learning_rate": 6.264372739073424e-06,
      "loss": 0.0149,
      "step": 1153860
    },
    {
      "epoch": 1.8883499276657307,
      "grad_norm": 0.5989952087402344,
      "learning_rate": 6.264306846859907e-06,
      "loss": 0.0162,
      "step": 1153880
    },
    {
      "epoch": 1.8883826581043839,
      "grad_norm": 0.48490071296691895,
      "learning_rate": 6.26424095464639e-06,
      "loss": 0.0152,
      "step": 1153900
    },
    {
      "epoch": 1.8884153885430373,
      "grad_norm": 0.4108978509902954,
      "learning_rate": 6.2641750624328724e-06,
      "loss": 0.0147,
      "step": 1153920
    },
    {
      "epoch": 1.8884481189816906,
      "grad_norm": 0.17884516716003418,
      "learning_rate": 6.264109170219356e-06,
      "loss": 0.024,
      "step": 1153940
    },
    {
      "epoch": 1.8884808494203438,
      "grad_norm": 0.3565120995044708,
      "learning_rate": 6.264043278005839e-06,
      "loss": 0.0217,
      "step": 1153960
    },
    {
      "epoch": 1.8885135798589974,
      "grad_norm": 0.6004021763801575,
      "learning_rate": 6.2639773857923215e-06,
      "loss": 0.0175,
      "step": 1153980
    },
    {
      "epoch": 1.8885463102976505,
      "grad_norm": 0.31432488560676575,
      "learning_rate": 6.263911493578805e-06,
      "loss": 0.017,
      "step": 1154000
    },
    {
      "epoch": 1.888579040736304,
      "grad_norm": 0.10346971452236176,
      "learning_rate": 6.263845601365287e-06,
      "loss": 0.0162,
      "step": 1154020
    },
    {
      "epoch": 1.8886117711749573,
      "grad_norm": 0.5209211111068726,
      "learning_rate": 6.2637797091517706e-06,
      "loss": 0.0181,
      "step": 1154040
    },
    {
      "epoch": 1.8886445016136106,
      "grad_norm": 1.0339339971542358,
      "learning_rate": 6.2637138169382525e-06,
      "loss": 0.0199,
      "step": 1154060
    },
    {
      "epoch": 1.888677232052264,
      "grad_norm": 0.45873117446899414,
      "learning_rate": 6.263647924724736e-06,
      "loss": 0.0173,
      "step": 1154080
    },
    {
      "epoch": 1.8887099624909172,
      "grad_norm": 0.30009281635284424,
      "learning_rate": 6.263582032511219e-06,
      "loss": 0.0218,
      "step": 1154100
    },
    {
      "epoch": 1.8887426929295708,
      "grad_norm": 0.24810625612735748,
      "learning_rate": 6.2635161402977015e-06,
      "loss": 0.0203,
      "step": 1154120
    },
    {
      "epoch": 1.888775423368224,
      "grad_norm": 0.24054023623466492,
      "learning_rate": 6.263450248084184e-06,
      "loss": 0.0192,
      "step": 1154140
    },
    {
      "epoch": 1.8888081538068773,
      "grad_norm": 0.27924516797065735,
      "learning_rate": 6.263384355870668e-06,
      "loss": 0.0119,
      "step": 1154160
    },
    {
      "epoch": 1.8888408842455306,
      "grad_norm": 0.5438651442527771,
      "learning_rate": 6.26331846365715e-06,
      "loss": 0.0212,
      "step": 1154180
    },
    {
      "epoch": 1.888873614684184,
      "grad_norm": 2.1472556591033936,
      "learning_rate": 6.263252571443633e-06,
      "loss": 0.0133,
      "step": 1154200
    },
    {
      "epoch": 1.8889063451228374,
      "grad_norm": 0.49491238594055176,
      "learning_rate": 6.263186679230115e-06,
      "loss": 0.0159,
      "step": 1154220
    },
    {
      "epoch": 1.8889390755614905,
      "grad_norm": 0.3968311846256256,
      "learning_rate": 6.263120787016599e-06,
      "loss": 0.0239,
      "step": 1154240
    },
    {
      "epoch": 1.8889718060001441,
      "grad_norm": 0.238965705037117,
      "learning_rate": 6.2630548948030815e-06,
      "loss": 0.0284,
      "step": 1154260
    },
    {
      "epoch": 1.8890045364387973,
      "grad_norm": 0.08589620888233185,
      "learning_rate": 6.262989002589564e-06,
      "loss": 0.0139,
      "step": 1154280
    },
    {
      "epoch": 1.8890372668774507,
      "grad_norm": 0.2540755569934845,
      "learning_rate": 6.262923110376047e-06,
      "loss": 0.0198,
      "step": 1154300
    },
    {
      "epoch": 1.889069997316104,
      "grad_norm": 0.45121145248413086,
      "learning_rate": 6.262857218162531e-06,
      "loss": 0.0162,
      "step": 1154320
    },
    {
      "epoch": 1.8891027277547574,
      "grad_norm": 0.8857627511024475,
      "learning_rate": 6.262791325949013e-06,
      "loss": 0.0276,
      "step": 1154340
    },
    {
      "epoch": 1.8891354581934108,
      "grad_norm": 0.33759456872940063,
      "learning_rate": 6.262725433735496e-06,
      "loss": 0.024,
      "step": 1154360
    },
    {
      "epoch": 1.889168188632064,
      "grad_norm": 0.27505365014076233,
      "learning_rate": 6.26265954152198e-06,
      "loss": 0.0156,
      "step": 1154380
    },
    {
      "epoch": 1.8892009190707175,
      "grad_norm": 0.26439592242240906,
      "learning_rate": 6.2625936493084616e-06,
      "loss": 0.0104,
      "step": 1154400
    },
    {
      "epoch": 1.8892336495093707,
      "grad_norm": 0.6303254961967468,
      "learning_rate": 6.262527757094945e-06,
      "loss": 0.0206,
      "step": 1154420
    },
    {
      "epoch": 1.889266379948024,
      "grad_norm": 0.4824520945549011,
      "learning_rate": 6.262461864881427e-06,
      "loss": 0.0134,
      "step": 1154440
    },
    {
      "epoch": 1.8892991103866774,
      "grad_norm": 0.814542293548584,
      "learning_rate": 6.262395972667911e-06,
      "loss": 0.0191,
      "step": 1154460
    },
    {
      "epoch": 1.8893318408253308,
      "grad_norm": 0.4482482671737671,
      "learning_rate": 6.2623300804543925e-06,
      "loss": 0.0172,
      "step": 1154480
    },
    {
      "epoch": 1.8893645712639842,
      "grad_norm": 0.41151779890060425,
      "learning_rate": 6.262264188240876e-06,
      "loss": 0.021,
      "step": 1154500
    },
    {
      "epoch": 1.8893973017026373,
      "grad_norm": 0.15555177628993988,
      "learning_rate": 6.262198296027359e-06,
      "loss": 0.023,
      "step": 1154520
    },
    {
      "epoch": 1.889430032141291,
      "grad_norm": 0.35732513666152954,
      "learning_rate": 6.262132403813842e-06,
      "loss": 0.0149,
      "step": 1154540
    },
    {
      "epoch": 1.889462762579944,
      "grad_norm": 0.15148873627185822,
      "learning_rate": 6.262066511600324e-06,
      "loss": 0.0143,
      "step": 1154560
    },
    {
      "epoch": 1.8894954930185974,
      "grad_norm": 0.18905510008335114,
      "learning_rate": 6.262000619386808e-06,
      "loss": 0.0138,
      "step": 1154580
    },
    {
      "epoch": 1.8895282234572508,
      "grad_norm": 0.3985458016395569,
      "learning_rate": 6.26193472717329e-06,
      "loss": 0.0192,
      "step": 1154600
    },
    {
      "epoch": 1.889560953895904,
      "grad_norm": 0.2878226637840271,
      "learning_rate": 6.261868834959773e-06,
      "loss": 0.0211,
      "step": 1154620
    },
    {
      "epoch": 1.8895936843345575,
      "grad_norm": 1.0185887813568115,
      "learning_rate": 6.261802942746255e-06,
      "loss": 0.0184,
      "step": 1154640
    },
    {
      "epoch": 1.8896264147732107,
      "grad_norm": 0.1547032743692398,
      "learning_rate": 6.261737050532739e-06,
      "loss": 0.016,
      "step": 1154660
    },
    {
      "epoch": 1.8896591452118643,
      "grad_norm": 0.2794601619243622,
      "learning_rate": 6.2616711583192225e-06,
      "loss": 0.0251,
      "step": 1154680
    },
    {
      "epoch": 1.8896918756505174,
      "grad_norm": 0.3804500102996826,
      "learning_rate": 6.261605266105704e-06,
      "loss": 0.0174,
      "step": 1154700
    },
    {
      "epoch": 1.8897246060891708,
      "grad_norm": 0.8682355880737305,
      "learning_rate": 6.261539373892188e-06,
      "loss": 0.0183,
      "step": 1154720
    },
    {
      "epoch": 1.8897573365278242,
      "grad_norm": 0.17142371833324432,
      "learning_rate": 6.261473481678671e-06,
      "loss": 0.0087,
      "step": 1154740
    },
    {
      "epoch": 1.8897900669664773,
      "grad_norm": 0.194579616189003,
      "learning_rate": 6.2614075894651534e-06,
      "loss": 0.0149,
      "step": 1154760
    },
    {
      "epoch": 1.889822797405131,
      "grad_norm": 0.16220030188560486,
      "learning_rate": 6.261341697251636e-06,
      "loss": 0.0233,
      "step": 1154780
    },
    {
      "epoch": 1.889855527843784,
      "grad_norm": 0.1625346541404724,
      "learning_rate": 6.26127580503812e-06,
      "loss": 0.0217,
      "step": 1154800
    },
    {
      "epoch": 1.8898882582824374,
      "grad_norm": 0.3735980689525604,
      "learning_rate": 6.261209912824602e-06,
      "loss": 0.0227,
      "step": 1154820
    },
    {
      "epoch": 1.8899209887210908,
      "grad_norm": 0.9418748617172241,
      "learning_rate": 6.261144020611085e-06,
      "loss": 0.0213,
      "step": 1154840
    },
    {
      "epoch": 1.8899537191597442,
      "grad_norm": 0.1942141205072403,
      "learning_rate": 6.261078128397567e-06,
      "loss": 0.0246,
      "step": 1154860
    },
    {
      "epoch": 1.8899864495983976,
      "grad_norm": 0.570896565914154,
      "learning_rate": 6.261012236184051e-06,
      "loss": 0.019,
      "step": 1154880
    },
    {
      "epoch": 1.8900191800370507,
      "grad_norm": 0.5479193329811096,
      "learning_rate": 6.2609463439705335e-06,
      "loss": 0.0151,
      "step": 1154900
    },
    {
      "epoch": 1.8900519104757043,
      "grad_norm": 0.5400863885879517,
      "learning_rate": 6.260880451757016e-06,
      "loss": 0.0184,
      "step": 1154920
    },
    {
      "epoch": 1.8900846409143575,
      "grad_norm": 0.13343599438667297,
      "learning_rate": 6.260814559543499e-06,
      "loss": 0.0174,
      "step": 1154940
    },
    {
      "epoch": 1.8901173713530108,
      "grad_norm": 0.22512508928775787,
      "learning_rate": 6.2607486673299825e-06,
      "loss": 0.0168,
      "step": 1154960
    },
    {
      "epoch": 1.8901501017916642,
      "grad_norm": 0.6110854148864746,
      "learning_rate": 6.260682775116464e-06,
      "loss": 0.0152,
      "step": 1154980
    },
    {
      "epoch": 1.8901828322303176,
      "grad_norm": 0.49349093437194824,
      "learning_rate": 6.260616882902948e-06,
      "loss": 0.0149,
      "step": 1155000
    },
    {
      "epoch": 1.890215562668971,
      "grad_norm": 0.9037660956382751,
      "learning_rate": 6.260550990689432e-06,
      "loss": 0.0165,
      "step": 1155020
    },
    {
      "epoch": 1.890248293107624,
      "grad_norm": 0.5333676338195801,
      "learning_rate": 6.2604850984759135e-06,
      "loss": 0.021,
      "step": 1155040
    },
    {
      "epoch": 1.8902810235462777,
      "grad_norm": 0.24798040091991425,
      "learning_rate": 6.260419206262397e-06,
      "loss": 0.0138,
      "step": 1155060
    },
    {
      "epoch": 1.8903137539849308,
      "grad_norm": 0.6260000467300415,
      "learning_rate": 6.260353314048879e-06,
      "loss": 0.0138,
      "step": 1155080
    },
    {
      "epoch": 1.8903464844235842,
      "grad_norm": 1.2392255067825317,
      "learning_rate": 6.2602874218353625e-06,
      "loss": 0.013,
      "step": 1155100
    },
    {
      "epoch": 1.8903792148622376,
      "grad_norm": 0.1486547589302063,
      "learning_rate": 6.260221529621845e-06,
      "loss": 0.0256,
      "step": 1155120
    },
    {
      "epoch": 1.890411945300891,
      "grad_norm": 4.751842021942139,
      "learning_rate": 6.260155637408328e-06,
      "loss": 0.0287,
      "step": 1155140
    },
    {
      "epoch": 1.8904446757395443,
      "grad_norm": 0.42264753580093384,
      "learning_rate": 6.260089745194811e-06,
      "loss": 0.0151,
      "step": 1155160
    },
    {
      "epoch": 1.8904774061781975,
      "grad_norm": 0.47034668922424316,
      "learning_rate": 6.260023852981294e-06,
      "loss": 0.0182,
      "step": 1155180
    },
    {
      "epoch": 1.890510136616851,
      "grad_norm": 0.3625103235244751,
      "learning_rate": 6.259957960767776e-06,
      "loss": 0.0133,
      "step": 1155200
    },
    {
      "epoch": 1.8905428670555042,
      "grad_norm": 0.4883310794830322,
      "learning_rate": 6.25989206855426e-06,
      "loss": 0.0177,
      "step": 1155220
    },
    {
      "epoch": 1.8905755974941576,
      "grad_norm": 0.5791193842887878,
      "learning_rate": 6.259826176340742e-06,
      "loss": 0.0202,
      "step": 1155240
    },
    {
      "epoch": 1.890608327932811,
      "grad_norm": 0.1735217422246933,
      "learning_rate": 6.259760284127225e-06,
      "loss": 0.0199,
      "step": 1155260
    },
    {
      "epoch": 1.8906410583714643,
      "grad_norm": 1.62888765335083,
      "learning_rate": 6.259694391913708e-06,
      "loss": 0.0121,
      "step": 1155280
    },
    {
      "epoch": 1.8906737888101177,
      "grad_norm": 0.4015054702758789,
      "learning_rate": 6.259628499700191e-06,
      "loss": 0.019,
      "step": 1155300
    },
    {
      "epoch": 1.8907065192487709,
      "grad_norm": 0.6060751080513,
      "learning_rate": 6.2595626074866735e-06,
      "loss": 0.0193,
      "step": 1155320
    },
    {
      "epoch": 1.8907392496874245,
      "grad_norm": 0.4940422773361206,
      "learning_rate": 6.259496715273157e-06,
      "loss": 0.0195,
      "step": 1155340
    },
    {
      "epoch": 1.8907719801260776,
      "grad_norm": 0.45458462834358215,
      "learning_rate": 6.259430823059639e-06,
      "loss": 0.021,
      "step": 1155360
    },
    {
      "epoch": 1.890804710564731,
      "grad_norm": 0.6071348786354065,
      "learning_rate": 6.259364930846123e-06,
      "loss": 0.0271,
      "step": 1155380
    },
    {
      "epoch": 1.8908374410033844,
      "grad_norm": 0.880194365978241,
      "learning_rate": 6.259299038632606e-06,
      "loss": 0.0147,
      "step": 1155400
    },
    {
      "epoch": 1.8908701714420375,
      "grad_norm": 1.2204011678695679,
      "learning_rate": 6.259233146419088e-06,
      "loss": 0.0154,
      "step": 1155420
    },
    {
      "epoch": 1.890902901880691,
      "grad_norm": 1.1073522567749023,
      "learning_rate": 6.259167254205572e-06,
      "loss": 0.0157,
      "step": 1155440
    },
    {
      "epoch": 1.8909356323193443,
      "grad_norm": 0.15413306653499603,
      "learning_rate": 6.2591013619920536e-06,
      "loss": 0.0187,
      "step": 1155460
    },
    {
      "epoch": 1.8909683627579978,
      "grad_norm": 1.4428547620773315,
      "learning_rate": 6.259035469778537e-06,
      "loss": 0.017,
      "step": 1155480
    },
    {
      "epoch": 1.891001093196651,
      "grad_norm": 0.4588276445865631,
      "learning_rate": 6.258969577565019e-06,
      "loss": 0.0146,
      "step": 1155500
    },
    {
      "epoch": 1.8910338236353044,
      "grad_norm": 0.37352168560028076,
      "learning_rate": 6.258903685351503e-06,
      "loss": 0.0134,
      "step": 1155520
    },
    {
      "epoch": 1.8910665540739577,
      "grad_norm": 1.085517406463623,
      "learning_rate": 6.258837793137985e-06,
      "loss": 0.0221,
      "step": 1155540
    },
    {
      "epoch": 1.891099284512611,
      "grad_norm": 2.3676671981811523,
      "learning_rate": 6.258771900924468e-06,
      "loss": 0.0227,
      "step": 1155560
    },
    {
      "epoch": 1.8911320149512645,
      "grad_norm": 0.6690233945846558,
      "learning_rate": 6.258706008710951e-06,
      "loss": 0.0186,
      "step": 1155580
    },
    {
      "epoch": 1.8911647453899176,
      "grad_norm": 0.566375732421875,
      "learning_rate": 6.2586401164974344e-06,
      "loss": 0.0175,
      "step": 1155600
    },
    {
      "epoch": 1.891197475828571,
      "grad_norm": 0.2876449227333069,
      "learning_rate": 6.258574224283916e-06,
      "loss": 0.015,
      "step": 1155620
    },
    {
      "epoch": 1.8912302062672244,
      "grad_norm": 4.5120320320129395,
      "learning_rate": 6.2585083320704e-06,
      "loss": 0.0151,
      "step": 1155640
    },
    {
      "epoch": 1.8912629367058778,
      "grad_norm": 0.8061317205429077,
      "learning_rate": 6.258442439856882e-06,
      "loss": 0.0296,
      "step": 1155660
    },
    {
      "epoch": 1.8912956671445311,
      "grad_norm": 0.62591952085495,
      "learning_rate": 6.258376547643365e-06,
      "loss": 0.0158,
      "step": 1155680
    },
    {
      "epoch": 1.8913283975831843,
      "grad_norm": 0.3163983225822449,
      "learning_rate": 6.258310655429848e-06,
      "loss": 0.0098,
      "step": 1155700
    },
    {
      "epoch": 1.8913611280218379,
      "grad_norm": 0.5049968957901001,
      "learning_rate": 6.258244763216331e-06,
      "loss": 0.018,
      "step": 1155720
    },
    {
      "epoch": 1.891393858460491,
      "grad_norm": 0.18789689242839813,
      "learning_rate": 6.2581788710028145e-06,
      "loss": 0.0122,
      "step": 1155740
    },
    {
      "epoch": 1.8914265888991444,
      "grad_norm": 0.9156715273857117,
      "learning_rate": 6.258112978789297e-06,
      "loss": 0.0229,
      "step": 1155760
    },
    {
      "epoch": 1.8914593193377978,
      "grad_norm": 1.5149435997009277,
      "learning_rate": 6.25804708657578e-06,
      "loss": 0.0161,
      "step": 1155780
    },
    {
      "epoch": 1.8914920497764511,
      "grad_norm": 0.31622084975242615,
      "learning_rate": 6.257981194362263e-06,
      "loss": 0.0171,
      "step": 1155800
    },
    {
      "epoch": 1.8915247802151045,
      "grad_norm": 0.4782186448574066,
      "learning_rate": 6.257915302148746e-06,
      "loss": 0.0192,
      "step": 1155820
    },
    {
      "epoch": 1.8915575106537577,
      "grad_norm": 2.832359790802002,
      "learning_rate": 6.257849409935228e-06,
      "loss": 0.0219,
      "step": 1155840
    },
    {
      "epoch": 1.8915902410924113,
      "grad_norm": 0.5183905959129333,
      "learning_rate": 6.257783517721712e-06,
      "loss": 0.0256,
      "step": 1155860
    },
    {
      "epoch": 1.8916229715310644,
      "grad_norm": 0.3844769299030304,
      "learning_rate": 6.257717625508194e-06,
      "loss": 0.0204,
      "step": 1155880
    },
    {
      "epoch": 1.8916557019697178,
      "grad_norm": 0.5508729815483093,
      "learning_rate": 6.257651733294677e-06,
      "loss": 0.024,
      "step": 1155900
    },
    {
      "epoch": 1.8916884324083711,
      "grad_norm": 0.3749440610408783,
      "learning_rate": 6.25758584108116e-06,
      "loss": 0.0175,
      "step": 1155920
    },
    {
      "epoch": 1.8917211628470245,
      "grad_norm": 0.6401680111885071,
      "learning_rate": 6.257519948867643e-06,
      "loss": 0.0179,
      "step": 1155940
    },
    {
      "epoch": 1.891753893285678,
      "grad_norm": 0.5362768173217773,
      "learning_rate": 6.2574540566541254e-06,
      "loss": 0.0192,
      "step": 1155960
    },
    {
      "epoch": 1.891786623724331,
      "grad_norm": 0.2929396629333496,
      "learning_rate": 6.257388164440609e-06,
      "loss": 0.0288,
      "step": 1155980
    },
    {
      "epoch": 1.8918193541629846,
      "grad_norm": 0.4207005500793457,
      "learning_rate": 6.257322272227091e-06,
      "loss": 0.0146,
      "step": 1156000
    },
    {
      "epoch": 1.8918520846016378,
      "grad_norm": 0.9419838190078735,
      "learning_rate": 6.2572563800135745e-06,
      "loss": 0.0229,
      "step": 1156020
    },
    {
      "epoch": 1.8918848150402912,
      "grad_norm": 0.5942354798316956,
      "learning_rate": 6.257190487800056e-06,
      "loss": 0.012,
      "step": 1156040
    },
    {
      "epoch": 1.8919175454789445,
      "grad_norm": 0.5696319341659546,
      "learning_rate": 6.25712459558654e-06,
      "loss": 0.0169,
      "step": 1156060
    },
    {
      "epoch": 1.891950275917598,
      "grad_norm": 0.4965215027332306,
      "learning_rate": 6.2570587033730236e-06,
      "loss": 0.0141,
      "step": 1156080
    },
    {
      "epoch": 1.8919830063562513,
      "grad_norm": 0.15489426255226135,
      "learning_rate": 6.2569928111595055e-06,
      "loss": 0.0119,
      "step": 1156100
    },
    {
      "epoch": 1.8920157367949044,
      "grad_norm": 0.3526003956794739,
      "learning_rate": 6.256926918945989e-06,
      "loss": 0.0114,
      "step": 1156120
    },
    {
      "epoch": 1.892048467233558,
      "grad_norm": 0.3135021924972534,
      "learning_rate": 6.256861026732472e-06,
      "loss": 0.0148,
      "step": 1156140
    },
    {
      "epoch": 1.8920811976722112,
      "grad_norm": 0.23062022030353546,
      "learning_rate": 6.2567951345189545e-06,
      "loss": 0.0188,
      "step": 1156160
    },
    {
      "epoch": 1.8921139281108645,
      "grad_norm": 0.33994796872138977,
      "learning_rate": 6.256729242305437e-06,
      "loss": 0.0155,
      "step": 1156180
    },
    {
      "epoch": 1.892146658549518,
      "grad_norm": 0.21209204196929932,
      "learning_rate": 6.256663350091921e-06,
      "loss": 0.0213,
      "step": 1156200
    },
    {
      "epoch": 1.892179388988171,
      "grad_norm": 0.4203052818775177,
      "learning_rate": 6.256597457878403e-06,
      "loss": 0.0141,
      "step": 1156220
    },
    {
      "epoch": 1.8922121194268247,
      "grad_norm": 0.97026526927948,
      "learning_rate": 6.256531565664886e-06,
      "loss": 0.0204,
      "step": 1156240
    },
    {
      "epoch": 1.8922448498654778,
      "grad_norm": 0.31517499685287476,
      "learning_rate": 6.256465673451368e-06,
      "loss": 0.0156,
      "step": 1156260
    },
    {
      "epoch": 1.8922775803041314,
      "grad_norm": 0.4359944760799408,
      "learning_rate": 6.256399781237852e-06,
      "loss": 0.0122,
      "step": 1156280
    },
    {
      "epoch": 1.8923103107427846,
      "grad_norm": 0.53692227602005,
      "learning_rate": 6.2563338890243346e-06,
      "loss": 0.0185,
      "step": 1156300
    },
    {
      "epoch": 1.892343041181438,
      "grad_norm": 0.5812587141990662,
      "learning_rate": 6.256267996810817e-06,
      "loss": 0.0211,
      "step": 1156320
    },
    {
      "epoch": 1.8923757716200913,
      "grad_norm": 0.266343355178833,
      "learning_rate": 6.2562021045973e-06,
      "loss": 0.0168,
      "step": 1156340
    },
    {
      "epoch": 1.8924085020587444,
      "grad_norm": 0.6054789423942566,
      "learning_rate": 6.256136212383784e-06,
      "loss": 0.0233,
      "step": 1156360
    },
    {
      "epoch": 1.892441232497398,
      "grad_norm": 0.6958197355270386,
      "learning_rate": 6.2560703201702655e-06,
      "loss": 0.0158,
      "step": 1156380
    },
    {
      "epoch": 1.8924739629360512,
      "grad_norm": 0.5198823809623718,
      "learning_rate": 6.256004427956749e-06,
      "loss": 0.0188,
      "step": 1156400
    },
    {
      "epoch": 1.8925066933747046,
      "grad_norm": 0.378694623708725,
      "learning_rate": 6.255938535743233e-06,
      "loss": 0.0157,
      "step": 1156420
    },
    {
      "epoch": 1.892539423813358,
      "grad_norm": 0.8071741461753845,
      "learning_rate": 6.255872643529715e-06,
      "loss": 0.017,
      "step": 1156440
    },
    {
      "epoch": 1.8925721542520113,
      "grad_norm": 0.6912499666213989,
      "learning_rate": 6.255806751316198e-06,
      "loss": 0.0172,
      "step": 1156460
    },
    {
      "epoch": 1.8926048846906647,
      "grad_norm": 0.49602770805358887,
      "learning_rate": 6.25574085910268e-06,
      "loss": 0.0189,
      "step": 1156480
    },
    {
      "epoch": 1.8926376151293178,
      "grad_norm": 0.3625819683074951,
      "learning_rate": 6.255674966889164e-06,
      "loss": 0.0115,
      "step": 1156500
    },
    {
      "epoch": 1.8926703455679714,
      "grad_norm": 0.8868457674980164,
      "learning_rate": 6.2556090746756455e-06,
      "loss": 0.0198,
      "step": 1156520
    },
    {
      "epoch": 1.8927030760066246,
      "grad_norm": 0.43225404620170593,
      "learning_rate": 6.255543182462129e-06,
      "loss": 0.011,
      "step": 1156540
    },
    {
      "epoch": 1.892735806445278,
      "grad_norm": 0.2818307876586914,
      "learning_rate": 6.255477290248612e-06,
      "loss": 0.0162,
      "step": 1156560
    },
    {
      "epoch": 1.8927685368839313,
      "grad_norm": 0.08007368445396423,
      "learning_rate": 6.255411398035095e-06,
      "loss": 0.0178,
      "step": 1156580
    },
    {
      "epoch": 1.8928012673225847,
      "grad_norm": 0.4950878918170929,
      "learning_rate": 6.255345505821577e-06,
      "loss": 0.0178,
      "step": 1156600
    },
    {
      "epoch": 1.892833997761238,
      "grad_norm": 0.5918560028076172,
      "learning_rate": 6.255279613608061e-06,
      "loss": 0.0221,
      "step": 1156620
    },
    {
      "epoch": 1.8928667281998912,
      "grad_norm": 0.3422313332557678,
      "learning_rate": 6.255213721394543e-06,
      "loss": 0.0198,
      "step": 1156640
    },
    {
      "epoch": 1.8928994586385448,
      "grad_norm": 0.5429394841194153,
      "learning_rate": 6.255147829181026e-06,
      "loss": 0.0214,
      "step": 1156660
    },
    {
      "epoch": 1.892932189077198,
      "grad_norm": 0.7272819876670837,
      "learning_rate": 6.255081936967508e-06,
      "loss": 0.02,
      "step": 1156680
    },
    {
      "epoch": 1.8929649195158513,
      "grad_norm": 0.4305099844932556,
      "learning_rate": 6.255016044753992e-06,
      "loss": 0.0157,
      "step": 1156700
    },
    {
      "epoch": 1.8929976499545047,
      "grad_norm": 1.3195104598999023,
      "learning_rate": 6.254950152540475e-06,
      "loss": 0.0143,
      "step": 1156720
    },
    {
      "epoch": 1.893030380393158,
      "grad_norm": 0.422445148229599,
      "learning_rate": 6.254884260326957e-06,
      "loss": 0.0244,
      "step": 1156740
    },
    {
      "epoch": 1.8930631108318114,
      "grad_norm": 0.6708666086196899,
      "learning_rate": 6.25481836811344e-06,
      "loss": 0.0204,
      "step": 1156760
    },
    {
      "epoch": 1.8930958412704646,
      "grad_norm": 0.4176090955734253,
      "learning_rate": 6.254752475899924e-06,
      "loss": 0.02,
      "step": 1156780
    },
    {
      "epoch": 1.8931285717091182,
      "grad_norm": 0.09656882286071777,
      "learning_rate": 6.2546865836864064e-06,
      "loss": 0.0176,
      "step": 1156800
    },
    {
      "epoch": 1.8931613021477713,
      "grad_norm": 0.3873593211174011,
      "learning_rate": 6.254620691472889e-06,
      "loss": 0.0161,
      "step": 1156820
    },
    {
      "epoch": 1.8931940325864247,
      "grad_norm": 0.5131728053092957,
      "learning_rate": 6.254554799259373e-06,
      "loss": 0.0183,
      "step": 1156840
    },
    {
      "epoch": 1.893226763025078,
      "grad_norm": 0.5925673842430115,
      "learning_rate": 6.254488907045855e-06,
      "loss": 0.0195,
      "step": 1156860
    },
    {
      "epoch": 1.8932594934637312,
      "grad_norm": 0.303857684135437,
      "learning_rate": 6.254423014832338e-06,
      "loss": 0.0207,
      "step": 1156880
    },
    {
      "epoch": 1.8932922239023848,
      "grad_norm": 1.057851791381836,
      "learning_rate": 6.25435712261882e-06,
      "loss": 0.0172,
      "step": 1156900
    },
    {
      "epoch": 1.893324954341038,
      "grad_norm": 0.17200474441051483,
      "learning_rate": 6.254291230405304e-06,
      "loss": 0.0151,
      "step": 1156920
    },
    {
      "epoch": 1.8933576847796916,
      "grad_norm": 1.2697882652282715,
      "learning_rate": 6.2542253381917865e-06,
      "loss": 0.0202,
      "step": 1156940
    },
    {
      "epoch": 1.8933904152183447,
      "grad_norm": 0.25138434767723083,
      "learning_rate": 6.254159445978269e-06,
      "loss": 0.0142,
      "step": 1156960
    },
    {
      "epoch": 1.893423145656998,
      "grad_norm": 1.247768759727478,
      "learning_rate": 6.254093553764752e-06,
      "loss": 0.0218,
      "step": 1156980
    },
    {
      "epoch": 1.8934558760956515,
      "grad_norm": 0.7508002519607544,
      "learning_rate": 6.2540276615512355e-06,
      "loss": 0.0176,
      "step": 1157000
    },
    {
      "epoch": 1.8934886065343046,
      "grad_norm": 0.3579261600971222,
      "learning_rate": 6.253961769337717e-06,
      "loss": 0.0211,
      "step": 1157020
    },
    {
      "epoch": 1.8935213369729582,
      "grad_norm": 0.7589316964149475,
      "learning_rate": 6.253895877124201e-06,
      "loss": 0.0212,
      "step": 1157040
    },
    {
      "epoch": 1.8935540674116114,
      "grad_norm": 0.7989194989204407,
      "learning_rate": 6.253829984910683e-06,
      "loss": 0.0179,
      "step": 1157060
    },
    {
      "epoch": 1.8935867978502647,
      "grad_norm": 1.1839768886566162,
      "learning_rate": 6.2537640926971665e-06,
      "loss": 0.011,
      "step": 1157080
    },
    {
      "epoch": 1.893619528288918,
      "grad_norm": 1.8877639770507812,
      "learning_rate": 6.253698200483649e-06,
      "loss": 0.0205,
      "step": 1157100
    },
    {
      "epoch": 1.8936522587275715,
      "grad_norm": 0.5137959122657776,
      "learning_rate": 6.253632308270132e-06,
      "loss": 0.0117,
      "step": 1157120
    },
    {
      "epoch": 1.8936849891662249,
      "grad_norm": 0.22314396500587463,
      "learning_rate": 6.2535664160566156e-06,
      "loss": 0.0166,
      "step": 1157140
    },
    {
      "epoch": 1.893717719604878,
      "grad_norm": 0.6100656390190125,
      "learning_rate": 6.253500523843098e-06,
      "loss": 0.0228,
      "step": 1157160
    },
    {
      "epoch": 1.8937504500435316,
      "grad_norm": 0.21617627143859863,
      "learning_rate": 6.253434631629581e-06,
      "loss": 0.0244,
      "step": 1157180
    },
    {
      "epoch": 1.8937831804821847,
      "grad_norm": 0.09215912222862244,
      "learning_rate": 6.253368739416064e-06,
      "loss": 0.0226,
      "step": 1157200
    },
    {
      "epoch": 1.8938159109208381,
      "grad_norm": 0.5419250130653381,
      "learning_rate": 6.253302847202547e-06,
      "loss": 0.0299,
      "step": 1157220
    },
    {
      "epoch": 1.8938486413594915,
      "grad_norm": 0.247541144490242,
      "learning_rate": 6.253236954989029e-06,
      "loss": 0.0154,
      "step": 1157240
    },
    {
      "epoch": 1.8938813717981449,
      "grad_norm": 0.3269718289375305,
      "learning_rate": 6.253171062775513e-06,
      "loss": 0.0187,
      "step": 1157260
    },
    {
      "epoch": 1.8939141022367982,
      "grad_norm": 0.6281632781028748,
      "learning_rate": 6.253105170561995e-06,
      "loss": 0.0206,
      "step": 1157280
    },
    {
      "epoch": 1.8939468326754514,
      "grad_norm": 0.47469523549079895,
      "learning_rate": 6.253039278348478e-06,
      "loss": 0.0243,
      "step": 1157300
    },
    {
      "epoch": 1.893979563114105,
      "grad_norm": 6.437277317047119,
      "learning_rate": 6.25297338613496e-06,
      "loss": 0.0164,
      "step": 1157320
    },
    {
      "epoch": 1.8940122935527581,
      "grad_norm": 0.3015189468860626,
      "learning_rate": 6.252907493921444e-06,
      "loss": 0.0153,
      "step": 1157340
    },
    {
      "epoch": 1.8940450239914115,
      "grad_norm": 0.6291047930717468,
      "learning_rate": 6.2528416017079265e-06,
      "loss": 0.0163,
      "step": 1157360
    },
    {
      "epoch": 1.8940777544300649,
      "grad_norm": 0.5740943551063538,
      "learning_rate": 6.25277570949441e-06,
      "loss": 0.0143,
      "step": 1157380
    },
    {
      "epoch": 1.8941104848687182,
      "grad_norm": 0.30866822600364685,
      "learning_rate": 6.252709817280892e-06,
      "loss": 0.0214,
      "step": 1157400
    },
    {
      "epoch": 1.8941432153073716,
      "grad_norm": 0.6155827641487122,
      "learning_rate": 6.252643925067376e-06,
      "loss": 0.0207,
      "step": 1157420
    },
    {
      "epoch": 1.8941759457460248,
      "grad_norm": 0.13592979311943054,
      "learning_rate": 6.2525780328538575e-06,
      "loss": 0.0138,
      "step": 1157440
    },
    {
      "epoch": 1.8942086761846784,
      "grad_norm": 1.1675945520401,
      "learning_rate": 6.252512140640341e-06,
      "loss": 0.023,
      "step": 1157460
    },
    {
      "epoch": 1.8942414066233315,
      "grad_norm": 3.0841658115386963,
      "learning_rate": 6.252446248426825e-06,
      "loss": 0.0196,
      "step": 1157480
    },
    {
      "epoch": 1.8942741370619849,
      "grad_norm": 0.35709694027900696,
      "learning_rate": 6.2523803562133066e-06,
      "loss": 0.0153,
      "step": 1157500
    },
    {
      "epoch": 1.8943068675006383,
      "grad_norm": 0.6037652492523193,
      "learning_rate": 6.25231446399979e-06,
      "loss": 0.013,
      "step": 1157520
    },
    {
      "epoch": 1.8943395979392916,
      "grad_norm": 0.4964018166065216,
      "learning_rate": 6.252248571786272e-06,
      "loss": 0.0241,
      "step": 1157540
    },
    {
      "epoch": 1.894372328377945,
      "grad_norm": 0.4752524495124817,
      "learning_rate": 6.252182679572756e-06,
      "loss": 0.0153,
      "step": 1157560
    },
    {
      "epoch": 1.8944050588165982,
      "grad_norm": 0.17494571208953857,
      "learning_rate": 6.252116787359238e-06,
      "loss": 0.0096,
      "step": 1157580
    },
    {
      "epoch": 1.8944377892552517,
      "grad_norm": 0.2712777256965637,
      "learning_rate": 6.252050895145721e-06,
      "loss": 0.0198,
      "step": 1157600
    },
    {
      "epoch": 1.894470519693905,
      "grad_norm": 0.1881493777036667,
      "learning_rate": 6.251985002932204e-06,
      "loss": 0.0195,
      "step": 1157620
    },
    {
      "epoch": 1.8945032501325583,
      "grad_norm": 0.5237241983413696,
      "learning_rate": 6.2519191107186874e-06,
      "loss": 0.0167,
      "step": 1157640
    },
    {
      "epoch": 1.8945359805712116,
      "grad_norm": 0.11331923305988312,
      "learning_rate": 6.251853218505169e-06,
      "loss": 0.0198,
      "step": 1157660
    },
    {
      "epoch": 1.8945687110098648,
      "grad_norm": 0.1347646713256836,
      "learning_rate": 6.251787326291653e-06,
      "loss": 0.0131,
      "step": 1157680
    },
    {
      "epoch": 1.8946014414485184,
      "grad_norm": 0.9497708082199097,
      "learning_rate": 6.251721434078135e-06,
      "loss": 0.0147,
      "step": 1157700
    },
    {
      "epoch": 1.8946341718871715,
      "grad_norm": 0.3464199900627136,
      "learning_rate": 6.251655541864618e-06,
      "loss": 0.0141,
      "step": 1157720
    },
    {
      "epoch": 1.8946669023258251,
      "grad_norm": 1.1574002504348755,
      "learning_rate": 6.251589649651101e-06,
      "loss": 0.0279,
      "step": 1157740
    },
    {
      "epoch": 1.8946996327644783,
      "grad_norm": 0.2470456063747406,
      "learning_rate": 6.251523757437584e-06,
      "loss": 0.0327,
      "step": 1157760
    },
    {
      "epoch": 1.8947323632031317,
      "grad_norm": 1.8888826370239258,
      "learning_rate": 6.251457865224067e-06,
      "loss": 0.0215,
      "step": 1157780
    },
    {
      "epoch": 1.894765093641785,
      "grad_norm": 0.9311997890472412,
      "learning_rate": 6.25139197301055e-06,
      "loss": 0.0153,
      "step": 1157800
    },
    {
      "epoch": 1.8947978240804382,
      "grad_norm": 0.3912915885448456,
      "learning_rate": 6.251326080797032e-06,
      "loss": 0.0133,
      "step": 1157820
    },
    {
      "epoch": 1.8948305545190918,
      "grad_norm": 0.5442639589309692,
      "learning_rate": 6.251260188583516e-06,
      "loss": 0.0133,
      "step": 1157840
    },
    {
      "epoch": 1.894863284957745,
      "grad_norm": 0.5470080971717834,
      "learning_rate": 6.251194296369999e-06,
      "loss": 0.0195,
      "step": 1157860
    },
    {
      "epoch": 1.8948960153963983,
      "grad_norm": 0.1840100884437561,
      "learning_rate": 6.251128404156481e-06,
      "loss": 0.016,
      "step": 1157880
    },
    {
      "epoch": 1.8949287458350517,
      "grad_norm": 0.4247871935367584,
      "learning_rate": 6.251062511942965e-06,
      "loss": 0.0219,
      "step": 1157900
    },
    {
      "epoch": 1.894961476273705,
      "grad_norm": 0.40641212463378906,
      "learning_rate": 6.250996619729447e-06,
      "loss": 0.0194,
      "step": 1157920
    },
    {
      "epoch": 1.8949942067123584,
      "grad_norm": 0.3024898171424866,
      "learning_rate": 6.25093072751593e-06,
      "loss": 0.0199,
      "step": 1157940
    },
    {
      "epoch": 1.8950269371510116,
      "grad_norm": 0.24771393835544586,
      "learning_rate": 6.250864835302413e-06,
      "loss": 0.0153,
      "step": 1157960
    },
    {
      "epoch": 1.8950596675896652,
      "grad_norm": 0.5582504272460938,
      "learning_rate": 6.250798943088896e-06,
      "loss": 0.0169,
      "step": 1157980
    },
    {
      "epoch": 1.8950923980283183,
      "grad_norm": 0.32270970940589905,
      "learning_rate": 6.2507330508753784e-06,
      "loss": 0.0153,
      "step": 1158000
    },
    {
      "epoch": 1.8951251284669717,
      "grad_norm": 0.3425365686416626,
      "learning_rate": 6.250667158661862e-06,
      "loss": 0.0192,
      "step": 1158020
    },
    {
      "epoch": 1.895157858905625,
      "grad_norm": 0.6590294241905212,
      "learning_rate": 6.250601266448344e-06,
      "loss": 0.0136,
      "step": 1158040
    },
    {
      "epoch": 1.8951905893442784,
      "grad_norm": 0.3457053303718567,
      "learning_rate": 6.2505353742348275e-06,
      "loss": 0.02,
      "step": 1158060
    },
    {
      "epoch": 1.8952233197829318,
      "grad_norm": 0.3492625057697296,
      "learning_rate": 6.250469482021309e-06,
      "loss": 0.0209,
      "step": 1158080
    },
    {
      "epoch": 1.895256050221585,
      "grad_norm": 0.5048966407775879,
      "learning_rate": 6.250403589807793e-06,
      "loss": 0.0156,
      "step": 1158100
    },
    {
      "epoch": 1.8952887806602385,
      "grad_norm": 0.6714798808097839,
      "learning_rate": 6.250337697594276e-06,
      "loss": 0.0156,
      "step": 1158120
    },
    {
      "epoch": 1.8953215110988917,
      "grad_norm": 0.23530593514442444,
      "learning_rate": 6.2502718053807585e-06,
      "loss": 0.0135,
      "step": 1158140
    },
    {
      "epoch": 1.895354241537545,
      "grad_norm": 0.2923585772514343,
      "learning_rate": 6.250205913167241e-06,
      "loss": 0.0138,
      "step": 1158160
    },
    {
      "epoch": 1.8953869719761984,
      "grad_norm": 0.4973889887332916,
      "learning_rate": 6.250140020953725e-06,
      "loss": 0.0158,
      "step": 1158180
    },
    {
      "epoch": 1.8954197024148518,
      "grad_norm": 0.34399956464767456,
      "learning_rate": 6.2500741287402075e-06,
      "loss": 0.0225,
      "step": 1158200
    },
    {
      "epoch": 1.8954524328535052,
      "grad_norm": 0.8854667544364929,
      "learning_rate": 6.25000823652669e-06,
      "loss": 0.0179,
      "step": 1158220
    },
    {
      "epoch": 1.8954851632921583,
      "grad_norm": 0.31785422563552856,
      "learning_rate": 6.249942344313174e-06,
      "loss": 0.0163,
      "step": 1158240
    },
    {
      "epoch": 1.895517893730812,
      "grad_norm": 0.5216412544250488,
      "learning_rate": 6.249876452099656e-06,
      "loss": 0.0242,
      "step": 1158260
    },
    {
      "epoch": 1.895550624169465,
      "grad_norm": 0.29064473509788513,
      "learning_rate": 6.249810559886139e-06,
      "loss": 0.0173,
      "step": 1158280
    },
    {
      "epoch": 1.8955833546081184,
      "grad_norm": 0.44310280680656433,
      "learning_rate": 6.249744667672621e-06,
      "loss": 0.0185,
      "step": 1158300
    },
    {
      "epoch": 1.8956160850467718,
      "grad_norm": 0.4069691002368927,
      "learning_rate": 6.249678775459105e-06,
      "loss": 0.0138,
      "step": 1158320
    },
    {
      "epoch": 1.8956488154854252,
      "grad_norm": 0.4802132248878479,
      "learning_rate": 6.249612883245587e-06,
      "loss": 0.0143,
      "step": 1158340
    },
    {
      "epoch": 1.8956815459240786,
      "grad_norm": 0.11017850041389465,
      "learning_rate": 6.24954699103207e-06,
      "loss": 0.0168,
      "step": 1158360
    },
    {
      "epoch": 1.8957142763627317,
      "grad_norm": 0.6854580640792847,
      "learning_rate": 6.249481098818553e-06,
      "loss": 0.0227,
      "step": 1158380
    },
    {
      "epoch": 1.8957470068013853,
      "grad_norm": 0.272350937128067,
      "learning_rate": 6.249415206605036e-06,
      "loss": 0.017,
      "step": 1158400
    },
    {
      "epoch": 1.8957797372400385,
      "grad_norm": 0.23002906143665314,
      "learning_rate": 6.2493493143915185e-06,
      "loss": 0.0182,
      "step": 1158420
    },
    {
      "epoch": 1.8958124676786918,
      "grad_norm": 0.7503336071968079,
      "learning_rate": 6.249283422178002e-06,
      "loss": 0.0186,
      "step": 1158440
    },
    {
      "epoch": 1.8958451981173452,
      "grad_norm": 0.48595356941223145,
      "learning_rate": 6.249217529964484e-06,
      "loss": 0.0284,
      "step": 1158460
    },
    {
      "epoch": 1.8958779285559983,
      "grad_norm": 0.3249861001968384,
      "learning_rate": 6.249151637750968e-06,
      "loss": 0.0187,
      "step": 1158480
    },
    {
      "epoch": 1.895910658994652,
      "grad_norm": 0.21024547517299652,
      "learning_rate": 6.2490857455374495e-06,
      "loss": 0.0219,
      "step": 1158500
    },
    {
      "epoch": 1.895943389433305,
      "grad_norm": 0.5207704901695251,
      "learning_rate": 6.249019853323933e-06,
      "loss": 0.024,
      "step": 1158520
    },
    {
      "epoch": 1.8959761198719587,
      "grad_norm": 0.31773582100868225,
      "learning_rate": 6.248953961110417e-06,
      "loss": 0.0167,
      "step": 1158540
    },
    {
      "epoch": 1.8960088503106118,
      "grad_norm": 2.2705233097076416,
      "learning_rate": 6.2488880688968985e-06,
      "loss": 0.0241,
      "step": 1158560
    },
    {
      "epoch": 1.8960415807492652,
      "grad_norm": 0.36919987201690674,
      "learning_rate": 6.248822176683382e-06,
      "loss": 0.0102,
      "step": 1158580
    },
    {
      "epoch": 1.8960743111879186,
      "grad_norm": 0.29843440651893616,
      "learning_rate": 6.248756284469865e-06,
      "loss": 0.0128,
      "step": 1158600
    },
    {
      "epoch": 1.8961070416265717,
      "grad_norm": 0.21860051155090332,
      "learning_rate": 6.248690392256348e-06,
      "loss": 0.0159,
      "step": 1158620
    },
    {
      "epoch": 1.8961397720652253,
      "grad_norm": 0.2635859251022339,
      "learning_rate": 6.24862450004283e-06,
      "loss": 0.0181,
      "step": 1158640
    },
    {
      "epoch": 1.8961725025038785,
      "grad_norm": 0.5730980634689331,
      "learning_rate": 6.248558607829314e-06,
      "loss": 0.0222,
      "step": 1158660
    },
    {
      "epoch": 1.8962052329425318,
      "grad_norm": 0.510726809501648,
      "learning_rate": 6.248492715615796e-06,
      "loss": 0.0175,
      "step": 1158680
    },
    {
      "epoch": 1.8962379633811852,
      "grad_norm": 0.655885636806488,
      "learning_rate": 6.248426823402279e-06,
      "loss": 0.0241,
      "step": 1158700
    },
    {
      "epoch": 1.8962706938198386,
      "grad_norm": 0.9255350828170776,
      "learning_rate": 6.248360931188761e-06,
      "loss": 0.0151,
      "step": 1158720
    },
    {
      "epoch": 1.896303424258492,
      "grad_norm": 0.2027822732925415,
      "learning_rate": 6.248295038975245e-06,
      "loss": 0.0158,
      "step": 1158740
    },
    {
      "epoch": 1.8963361546971451,
      "grad_norm": 1.077616810798645,
      "learning_rate": 6.248229146761728e-06,
      "loss": 0.0203,
      "step": 1158760
    },
    {
      "epoch": 1.8963688851357987,
      "grad_norm": 0.4575687348842621,
      "learning_rate": 6.24816325454821e-06,
      "loss": 0.0181,
      "step": 1158780
    },
    {
      "epoch": 1.8964016155744519,
      "grad_norm": 0.9081813097000122,
      "learning_rate": 6.248097362334693e-06,
      "loss": 0.0189,
      "step": 1158800
    },
    {
      "epoch": 1.8964343460131052,
      "grad_norm": 0.12418209761381149,
      "learning_rate": 6.248031470121177e-06,
      "loss": 0.0151,
      "step": 1158820
    },
    {
      "epoch": 1.8964670764517586,
      "grad_norm": 0.977793276309967,
      "learning_rate": 6.247965577907659e-06,
      "loss": 0.0155,
      "step": 1158840
    },
    {
      "epoch": 1.896499806890412,
      "grad_norm": 0.7582606077194214,
      "learning_rate": 6.247899685694142e-06,
      "loss": 0.0167,
      "step": 1158860
    },
    {
      "epoch": 1.8965325373290653,
      "grad_norm": 0.37791335582733154,
      "learning_rate": 6.247833793480624e-06,
      "loss": 0.0116,
      "step": 1158880
    },
    {
      "epoch": 1.8965652677677185,
      "grad_norm": 0.6757276058197021,
      "learning_rate": 6.247767901267108e-06,
      "loss": 0.0266,
      "step": 1158900
    },
    {
      "epoch": 1.896597998206372,
      "grad_norm": 1.0022417306900024,
      "learning_rate": 6.247702009053591e-06,
      "loss": 0.0193,
      "step": 1158920
    },
    {
      "epoch": 1.8966307286450252,
      "grad_norm": 0.5599479079246521,
      "learning_rate": 6.247636116840073e-06,
      "loss": 0.0144,
      "step": 1158940
    },
    {
      "epoch": 1.8966634590836786,
      "grad_norm": 0.5646977424621582,
      "learning_rate": 6.247570224626557e-06,
      "loss": 0.0157,
      "step": 1158960
    },
    {
      "epoch": 1.896696189522332,
      "grad_norm": 0.9541404843330383,
      "learning_rate": 6.2475043324130395e-06,
      "loss": 0.0222,
      "step": 1158980
    },
    {
      "epoch": 1.8967289199609854,
      "grad_norm": 0.024078989401459694,
      "learning_rate": 6.247438440199522e-06,
      "loss": 0.0126,
      "step": 1159000
    },
    {
      "epoch": 1.8967616503996387,
      "grad_norm": 0.457489013671875,
      "learning_rate": 6.247372547986005e-06,
      "loss": 0.0212,
      "step": 1159020
    },
    {
      "epoch": 1.8967943808382919,
      "grad_norm": 0.507459282875061,
      "learning_rate": 6.2473066557724885e-06,
      "loss": 0.0157,
      "step": 1159040
    },
    {
      "epoch": 1.8968271112769455,
      "grad_norm": 0.3718658685684204,
      "learning_rate": 6.2472407635589704e-06,
      "loss": 0.0299,
      "step": 1159060
    },
    {
      "epoch": 1.8968598417155986,
      "grad_norm": 0.46365484595298767,
      "learning_rate": 6.247174871345454e-06,
      "loss": 0.0191,
      "step": 1159080
    },
    {
      "epoch": 1.896892572154252,
      "grad_norm": 0.1410478800535202,
      "learning_rate": 6.247108979131936e-06,
      "loss": 0.0133,
      "step": 1159100
    },
    {
      "epoch": 1.8969253025929054,
      "grad_norm": 0.37552258372306824,
      "learning_rate": 6.2470430869184195e-06,
      "loss": 0.0152,
      "step": 1159120
    },
    {
      "epoch": 1.8969580330315587,
      "grad_norm": 0.5596728920936584,
      "learning_rate": 6.246977194704902e-06,
      "loss": 0.015,
      "step": 1159140
    },
    {
      "epoch": 1.8969907634702121,
      "grad_norm": 0.6471738815307617,
      "learning_rate": 6.246911302491385e-06,
      "loss": 0.0111,
      "step": 1159160
    },
    {
      "epoch": 1.8970234939088653,
      "grad_norm": 0.476001501083374,
      "learning_rate": 6.246845410277868e-06,
      "loss": 0.0204,
      "step": 1159180
    },
    {
      "epoch": 1.8970562243475189,
      "grad_norm": 0.8408863544464111,
      "learning_rate": 6.246779518064351e-06,
      "loss": 0.0142,
      "step": 1159200
    },
    {
      "epoch": 1.897088954786172,
      "grad_norm": 0.47598299384117126,
      "learning_rate": 6.246713625850833e-06,
      "loss": 0.0144,
      "step": 1159220
    },
    {
      "epoch": 1.8971216852248254,
      "grad_norm": 0.6172435283660889,
      "learning_rate": 6.246647733637317e-06,
      "loss": 0.0256,
      "step": 1159240
    },
    {
      "epoch": 1.8971544156634788,
      "grad_norm": 0.15494786202907562,
      "learning_rate": 6.2465818414238e-06,
      "loss": 0.0164,
      "step": 1159260
    },
    {
      "epoch": 1.897187146102132,
      "grad_norm": 0.9272703528404236,
      "learning_rate": 6.246515949210282e-06,
      "loss": 0.0211,
      "step": 1159280
    },
    {
      "epoch": 1.8972198765407855,
      "grad_norm": 0.3145148754119873,
      "learning_rate": 6.246450056996766e-06,
      "loss": 0.0168,
      "step": 1159300
    },
    {
      "epoch": 1.8972526069794386,
      "grad_norm": 0.3716907501220703,
      "learning_rate": 6.246384164783248e-06,
      "loss": 0.0145,
      "step": 1159320
    },
    {
      "epoch": 1.8972853374180922,
      "grad_norm": 0.08504412323236465,
      "learning_rate": 6.246318272569731e-06,
      "loss": 0.0196,
      "step": 1159340
    },
    {
      "epoch": 1.8973180678567454,
      "grad_norm": 0.3157848119735718,
      "learning_rate": 6.246252380356213e-06,
      "loss": 0.0154,
      "step": 1159360
    },
    {
      "epoch": 1.8973507982953988,
      "grad_norm": 0.24307626485824585,
      "learning_rate": 6.246186488142697e-06,
      "loss": 0.0134,
      "step": 1159380
    },
    {
      "epoch": 1.8973835287340521,
      "grad_norm": 0.4021334648132324,
      "learning_rate": 6.2461205959291795e-06,
      "loss": 0.0153,
      "step": 1159400
    },
    {
      "epoch": 1.8974162591727053,
      "grad_norm": 0.388090580701828,
      "learning_rate": 6.246054703715662e-06,
      "loss": 0.0145,
      "step": 1159420
    },
    {
      "epoch": 1.8974489896113589,
      "grad_norm": 0.40543076395988464,
      "learning_rate": 6.245988811502145e-06,
      "loss": 0.0166,
      "step": 1159440
    },
    {
      "epoch": 1.897481720050012,
      "grad_norm": 0.4068426787853241,
      "learning_rate": 6.245922919288629e-06,
      "loss": 0.0236,
      "step": 1159460
    },
    {
      "epoch": 1.8975144504886654,
      "grad_norm": 0.120845727622509,
      "learning_rate": 6.2458570270751105e-06,
      "loss": 0.0153,
      "step": 1159480
    },
    {
      "epoch": 1.8975471809273188,
      "grad_norm": 0.36366328597068787,
      "learning_rate": 6.245791134861594e-06,
      "loss": 0.0211,
      "step": 1159500
    },
    {
      "epoch": 1.8975799113659721,
      "grad_norm": 0.31466543674468994,
      "learning_rate": 6.245725242648076e-06,
      "loss": 0.0215,
      "step": 1159520
    },
    {
      "epoch": 1.8976126418046255,
      "grad_norm": 0.2188796103000641,
      "learning_rate": 6.2456593504345596e-06,
      "loss": 0.0146,
      "step": 1159540
    },
    {
      "epoch": 1.8976453722432787,
      "grad_norm": 0.17771075665950775,
      "learning_rate": 6.245593458221042e-06,
      "loss": 0.0234,
      "step": 1159560
    },
    {
      "epoch": 1.8976781026819323,
      "grad_norm": 0.3471408486366272,
      "learning_rate": 6.245527566007525e-06,
      "loss": 0.0128,
      "step": 1159580
    },
    {
      "epoch": 1.8977108331205854,
      "grad_norm": 0.5906445980072021,
      "learning_rate": 6.245461673794009e-06,
      "loss": 0.023,
      "step": 1159600
    },
    {
      "epoch": 1.8977435635592388,
      "grad_norm": 1.3005402088165283,
      "learning_rate": 6.245395781580491e-06,
      "loss": 0.0234,
      "step": 1159620
    },
    {
      "epoch": 1.8977762939978922,
      "grad_norm": 0.28919997811317444,
      "learning_rate": 6.245329889366974e-06,
      "loss": 0.0188,
      "step": 1159640
    },
    {
      "epoch": 1.8978090244365455,
      "grad_norm": 0.44301965832710266,
      "learning_rate": 6.245263997153457e-06,
      "loss": 0.0132,
      "step": 1159660
    },
    {
      "epoch": 1.897841754875199,
      "grad_norm": 0.05057656764984131,
      "learning_rate": 6.2451981049399404e-06,
      "loss": 0.0151,
      "step": 1159680
    },
    {
      "epoch": 1.897874485313852,
      "grad_norm": 0.23884254693984985,
      "learning_rate": 6.245132212726422e-06,
      "loss": 0.0135,
      "step": 1159700
    },
    {
      "epoch": 1.8979072157525056,
      "grad_norm": 0.2277986705303192,
      "learning_rate": 6.245066320512906e-06,
      "loss": 0.0166,
      "step": 1159720
    },
    {
      "epoch": 1.8979399461911588,
      "grad_norm": 0.6189702749252319,
      "learning_rate": 6.245000428299388e-06,
      "loss": 0.0218,
      "step": 1159740
    },
    {
      "epoch": 1.8979726766298122,
      "grad_norm": 0.7686262130737305,
      "learning_rate": 6.244934536085871e-06,
      "loss": 0.0221,
      "step": 1159760
    },
    {
      "epoch": 1.8980054070684655,
      "grad_norm": 0.591694176197052,
      "learning_rate": 6.244868643872354e-06,
      "loss": 0.0156,
      "step": 1159780
    },
    {
      "epoch": 1.898038137507119,
      "grad_norm": 1.4016863107681274,
      "learning_rate": 6.244802751658837e-06,
      "loss": 0.022,
      "step": 1159800
    },
    {
      "epoch": 1.8980708679457723,
      "grad_norm": 0.5679614543914795,
      "learning_rate": 6.24473685944532e-06,
      "loss": 0.0193,
      "step": 1159820
    },
    {
      "epoch": 1.8981035983844254,
      "grad_norm": 0.48466289043426514,
      "learning_rate": 6.244670967231803e-06,
      "loss": 0.0193,
      "step": 1159840
    },
    {
      "epoch": 1.898136328823079,
      "grad_norm": 0.08261648565530777,
      "learning_rate": 6.244605075018285e-06,
      "loss": 0.0219,
      "step": 1159860
    },
    {
      "epoch": 1.8981690592617322,
      "grad_norm": 1.2726789712905884,
      "learning_rate": 6.244539182804769e-06,
      "loss": 0.0215,
      "step": 1159880
    },
    {
      "epoch": 1.8982017897003856,
      "grad_norm": 0.19607369601726532,
      "learning_rate": 6.244473290591251e-06,
      "loss": 0.0221,
      "step": 1159900
    },
    {
      "epoch": 1.898234520139039,
      "grad_norm": 0.3116376996040344,
      "learning_rate": 6.244407398377734e-06,
      "loss": 0.0173,
      "step": 1159920
    },
    {
      "epoch": 1.898267250577692,
      "grad_norm": 0.18731586635112762,
      "learning_rate": 6.244341506164218e-06,
      "loss": 0.0163,
      "step": 1159940
    },
    {
      "epoch": 1.8982999810163457,
      "grad_norm": 0.5288774967193604,
      "learning_rate": 6.2442756139507e-06,
      "loss": 0.022,
      "step": 1159960
    },
    {
      "epoch": 1.8983327114549988,
      "grad_norm": 0.19689571857452393,
      "learning_rate": 6.244209721737183e-06,
      "loss": 0.0227,
      "step": 1159980
    },
    {
      "epoch": 1.8983654418936524,
      "grad_norm": 0.691356897354126,
      "learning_rate": 6.244143829523666e-06,
      "loss": 0.0181,
      "step": 1160000
    },
    {
      "epoch": 1.8983981723323056,
      "grad_norm": 0.9439455270767212,
      "learning_rate": 6.244077937310149e-06,
      "loss": 0.0301,
      "step": 1160020
    },
    {
      "epoch": 1.898430902770959,
      "grad_norm": 0.2601735591888428,
      "learning_rate": 6.2440120450966315e-06,
      "loss": 0.0161,
      "step": 1160040
    },
    {
      "epoch": 1.8984636332096123,
      "grad_norm": 1.994575023651123,
      "learning_rate": 6.243946152883115e-06,
      "loss": 0.0164,
      "step": 1160060
    },
    {
      "epoch": 1.8984963636482655,
      "grad_norm": 0.07624143362045288,
      "learning_rate": 6.243880260669597e-06,
      "loss": 0.0198,
      "step": 1160080
    },
    {
      "epoch": 1.898529094086919,
      "grad_norm": 0.2300814539194107,
      "learning_rate": 6.2438143684560805e-06,
      "loss": 0.014,
      "step": 1160100
    },
    {
      "epoch": 1.8985618245255722,
      "grad_norm": 0.17663326859474182,
      "learning_rate": 6.243748476242562e-06,
      "loss": 0.0134,
      "step": 1160120
    },
    {
      "epoch": 1.8985945549642256,
      "grad_norm": 0.51876300573349,
      "learning_rate": 6.243682584029046e-06,
      "loss": 0.0204,
      "step": 1160140
    },
    {
      "epoch": 1.898627285402879,
      "grad_norm": 0.5250255465507507,
      "learning_rate": 6.243616691815529e-06,
      "loss": 0.02,
      "step": 1160160
    },
    {
      "epoch": 1.8986600158415323,
      "grad_norm": 1.1074340343475342,
      "learning_rate": 6.2435507996020115e-06,
      "loss": 0.0198,
      "step": 1160180
    },
    {
      "epoch": 1.8986927462801857,
      "grad_norm": 0.43801262974739075,
      "learning_rate": 6.243484907388494e-06,
      "loss": 0.0171,
      "step": 1160200
    },
    {
      "epoch": 1.8987254767188388,
      "grad_norm": 0.2937208116054535,
      "learning_rate": 6.243419015174978e-06,
      "loss": 0.0123,
      "step": 1160220
    },
    {
      "epoch": 1.8987582071574924,
      "grad_norm": 2.6066789627075195,
      "learning_rate": 6.24335312296146e-06,
      "loss": 0.0221,
      "step": 1160240
    },
    {
      "epoch": 1.8987909375961456,
      "grad_norm": 0.2472202181816101,
      "learning_rate": 6.243287230747943e-06,
      "loss": 0.0147,
      "step": 1160260
    },
    {
      "epoch": 1.898823668034799,
      "grad_norm": 0.812405526638031,
      "learning_rate": 6.243221338534425e-06,
      "loss": 0.0201,
      "step": 1160280
    },
    {
      "epoch": 1.8988563984734523,
      "grad_norm": 0.3257495164871216,
      "learning_rate": 6.243155446320909e-06,
      "loss": 0.0143,
      "step": 1160300
    },
    {
      "epoch": 1.8988891289121057,
      "grad_norm": 0.42303532361984253,
      "learning_rate": 6.243089554107392e-06,
      "loss": 0.0219,
      "step": 1160320
    },
    {
      "epoch": 1.898921859350759,
      "grad_norm": 0.6628577709197998,
      "learning_rate": 6.243023661893874e-06,
      "loss": 0.0223,
      "step": 1160340
    },
    {
      "epoch": 1.8989545897894122,
      "grad_norm": 0.41580551862716675,
      "learning_rate": 6.242957769680358e-06,
      "loss": 0.0149,
      "step": 1160360
    },
    {
      "epoch": 1.8989873202280658,
      "grad_norm": 0.832891047000885,
      "learning_rate": 6.24289187746684e-06,
      "loss": 0.0166,
      "step": 1160380
    },
    {
      "epoch": 1.899020050666719,
      "grad_norm": 0.5855984091758728,
      "learning_rate": 6.242825985253323e-06,
      "loss": 0.0176,
      "step": 1160400
    },
    {
      "epoch": 1.8990527811053723,
      "grad_norm": 0.5402636528015137,
      "learning_rate": 6.242760093039806e-06,
      "loss": 0.0206,
      "step": 1160420
    },
    {
      "epoch": 1.8990855115440257,
      "grad_norm": 0.5006890296936035,
      "learning_rate": 6.242694200826289e-06,
      "loss": 0.0141,
      "step": 1160440
    },
    {
      "epoch": 1.899118241982679,
      "grad_norm": 0.4220468997955322,
      "learning_rate": 6.2426283086127715e-06,
      "loss": 0.0218,
      "step": 1160460
    },
    {
      "epoch": 1.8991509724213325,
      "grad_norm": 0.2697876989841461,
      "learning_rate": 6.242562416399255e-06,
      "loss": 0.0108,
      "step": 1160480
    },
    {
      "epoch": 1.8991837028599856,
      "grad_norm": 0.24910426139831543,
      "learning_rate": 6.242496524185737e-06,
      "loss": 0.0186,
      "step": 1160500
    },
    {
      "epoch": 1.8992164332986392,
      "grad_norm": 0.3609997630119324,
      "learning_rate": 6.242430631972221e-06,
      "loss": 0.0154,
      "step": 1160520
    },
    {
      "epoch": 1.8992491637372924,
      "grad_norm": 0.32141709327697754,
      "learning_rate": 6.2423647397587025e-06,
      "loss": 0.016,
      "step": 1160540
    },
    {
      "epoch": 1.8992818941759457,
      "grad_norm": 0.5010538101196289,
      "learning_rate": 6.242298847545186e-06,
      "loss": 0.0173,
      "step": 1160560
    },
    {
      "epoch": 1.899314624614599,
      "grad_norm": 0.4878595769405365,
      "learning_rate": 6.242232955331669e-06,
      "loss": 0.0215,
      "step": 1160580
    },
    {
      "epoch": 1.8993473550532525,
      "grad_norm": 0.37288162112236023,
      "learning_rate": 6.2421670631181516e-06,
      "loss": 0.0167,
      "step": 1160600
    },
    {
      "epoch": 1.8993800854919058,
      "grad_norm": 4.948544502258301,
      "learning_rate": 6.242101170904634e-06,
      "loss": 0.0165,
      "step": 1160620
    },
    {
      "epoch": 1.899412815930559,
      "grad_norm": 0.6100314259529114,
      "learning_rate": 6.242035278691118e-06,
      "loss": 0.0173,
      "step": 1160640
    },
    {
      "epoch": 1.8994455463692126,
      "grad_norm": 0.40197381377220154,
      "learning_rate": 6.241969386477601e-06,
      "loss": 0.0228,
      "step": 1160660
    },
    {
      "epoch": 1.8994782768078657,
      "grad_norm": 0.40569278597831726,
      "learning_rate": 6.241903494264083e-06,
      "loss": 0.0157,
      "step": 1160680
    },
    {
      "epoch": 1.8995110072465191,
      "grad_norm": 1.161953091621399,
      "learning_rate": 6.241837602050567e-06,
      "loss": 0.0264,
      "step": 1160700
    },
    {
      "epoch": 1.8995437376851725,
      "grad_norm": 0.7678607106208801,
      "learning_rate": 6.241771709837049e-06,
      "loss": 0.0232,
      "step": 1160720
    },
    {
      "epoch": 1.8995764681238256,
      "grad_norm": 0.2810589075088501,
      "learning_rate": 6.241705817623532e-06,
      "loss": 0.0227,
      "step": 1160740
    },
    {
      "epoch": 1.8996091985624792,
      "grad_norm": 0.19808033108711243,
      "learning_rate": 6.241639925410014e-06,
      "loss": 0.0276,
      "step": 1160760
    },
    {
      "epoch": 1.8996419290011324,
      "grad_norm": 0.8552348613739014,
      "learning_rate": 6.241574033196498e-06,
      "loss": 0.0233,
      "step": 1160780
    },
    {
      "epoch": 1.899674659439786,
      "grad_norm": 0.3425116539001465,
      "learning_rate": 6.241508140982981e-06,
      "loss": 0.0141,
      "step": 1160800
    },
    {
      "epoch": 1.8997073898784391,
      "grad_norm": 0.979252815246582,
      "learning_rate": 6.241442248769463e-06,
      "loss": 0.0159,
      "step": 1160820
    },
    {
      "epoch": 1.8997401203170925,
      "grad_norm": 0.4656102955341339,
      "learning_rate": 6.241376356555946e-06,
      "loss": 0.017,
      "step": 1160840
    },
    {
      "epoch": 1.8997728507557459,
      "grad_norm": 1.044960856437683,
      "learning_rate": 6.24131046434243e-06,
      "loss": 0.0175,
      "step": 1160860
    },
    {
      "epoch": 1.899805581194399,
      "grad_norm": 0.10040435940027237,
      "learning_rate": 6.241244572128912e-06,
      "loss": 0.0183,
      "step": 1160880
    },
    {
      "epoch": 1.8998383116330526,
      "grad_norm": 0.44176626205444336,
      "learning_rate": 6.241178679915395e-06,
      "loss": 0.0158,
      "step": 1160900
    },
    {
      "epoch": 1.8998710420717058,
      "grad_norm": 0.17881344258785248,
      "learning_rate": 6.241112787701877e-06,
      "loss": 0.0209,
      "step": 1160920
    },
    {
      "epoch": 1.8999037725103591,
      "grad_norm": 0.6627097129821777,
      "learning_rate": 6.241046895488361e-06,
      "loss": 0.015,
      "step": 1160940
    },
    {
      "epoch": 1.8999365029490125,
      "grad_norm": 0.14674830436706543,
      "learning_rate": 6.240981003274843e-06,
      "loss": 0.0122,
      "step": 1160960
    },
    {
      "epoch": 1.8999692333876659,
      "grad_norm": 0.17057901620864868,
      "learning_rate": 6.240915111061326e-06,
      "loss": 0.0166,
      "step": 1160980
    },
    {
      "epoch": 1.9000019638263193,
      "grad_norm": 0.558413565158844,
      "learning_rate": 6.24084921884781e-06,
      "loss": 0.0165,
      "step": 1161000
    },
    {
      "epoch": 1.9000346942649724,
      "grad_norm": 0.214688241481781,
      "learning_rate": 6.2407833266342925e-06,
      "loss": 0.0238,
      "step": 1161020
    },
    {
      "epoch": 1.900067424703626,
      "grad_norm": 0.2573685646057129,
      "learning_rate": 6.240717434420775e-06,
      "loss": 0.0213,
      "step": 1161040
    },
    {
      "epoch": 1.9001001551422791,
      "grad_norm": 0.1698221117258072,
      "learning_rate": 6.240651542207258e-06,
      "loss": 0.018,
      "step": 1161060
    },
    {
      "epoch": 1.9001328855809325,
      "grad_norm": 0.3510405123233795,
      "learning_rate": 6.2405856499937415e-06,
      "loss": 0.0168,
      "step": 1161080
    },
    {
      "epoch": 1.900165616019586,
      "grad_norm": 0.7677997946739197,
      "learning_rate": 6.2405197577802234e-06,
      "loss": 0.0178,
      "step": 1161100
    },
    {
      "epoch": 1.9001983464582393,
      "grad_norm": 0.32901808619499207,
      "learning_rate": 6.240453865566707e-06,
      "loss": 0.0201,
      "step": 1161120
    },
    {
      "epoch": 1.9002310768968926,
      "grad_norm": 0.31521061062812805,
      "learning_rate": 6.240387973353189e-06,
      "loss": 0.017,
      "step": 1161140
    },
    {
      "epoch": 1.9002638073355458,
      "grad_norm": 0.06569277495145798,
      "learning_rate": 6.2403220811396725e-06,
      "loss": 0.0112,
      "step": 1161160
    },
    {
      "epoch": 1.9002965377741994,
      "grad_norm": 0.20595911145210266,
      "learning_rate": 6.240256188926154e-06,
      "loss": 0.0198,
      "step": 1161180
    },
    {
      "epoch": 1.9003292682128525,
      "grad_norm": 0.7857295274734497,
      "learning_rate": 6.240190296712638e-06,
      "loss": 0.0174,
      "step": 1161200
    },
    {
      "epoch": 1.900361998651506,
      "grad_norm": 0.3214811086654663,
      "learning_rate": 6.240124404499121e-06,
      "loss": 0.0204,
      "step": 1161220
    },
    {
      "epoch": 1.9003947290901593,
      "grad_norm": 0.7548895478248596,
      "learning_rate": 6.240058512285604e-06,
      "loss": 0.021,
      "step": 1161240
    },
    {
      "epoch": 1.9004274595288126,
      "grad_norm": 0.22778885066509247,
      "learning_rate": 6.239992620072086e-06,
      "loss": 0.0212,
      "step": 1161260
    },
    {
      "epoch": 1.900460189967466,
      "grad_norm": 0.8614256978034973,
      "learning_rate": 6.23992672785857e-06,
      "loss": 0.0168,
      "step": 1161280
    },
    {
      "epoch": 1.9004929204061192,
      "grad_norm": 0.36639204621315,
      "learning_rate": 6.239860835645052e-06,
      "loss": 0.0202,
      "step": 1161300
    },
    {
      "epoch": 1.9005256508447728,
      "grad_norm": 0.7046723365783691,
      "learning_rate": 6.239794943431535e-06,
      "loss": 0.0146,
      "step": 1161320
    },
    {
      "epoch": 1.900558381283426,
      "grad_norm": 0.07107146084308624,
      "learning_rate": 6.239729051218017e-06,
      "loss": 0.017,
      "step": 1161340
    },
    {
      "epoch": 1.9005911117220793,
      "grad_norm": 0.5326068997383118,
      "learning_rate": 6.239663159004501e-06,
      "loss": 0.0279,
      "step": 1161360
    },
    {
      "epoch": 1.9006238421607327,
      "grad_norm": 0.15570680797100067,
      "learning_rate": 6.239597266790984e-06,
      "loss": 0.0171,
      "step": 1161380
    },
    {
      "epoch": 1.900656572599386,
      "grad_norm": 0.16027691960334778,
      "learning_rate": 6.239531374577466e-06,
      "loss": 0.0121,
      "step": 1161400
    },
    {
      "epoch": 1.9006893030380394,
      "grad_norm": 0.17652259767055511,
      "learning_rate": 6.23946548236395e-06,
      "loss": 0.0162,
      "step": 1161420
    },
    {
      "epoch": 1.9007220334766926,
      "grad_norm": 0.7493352890014648,
      "learning_rate": 6.2393995901504325e-06,
      "loss": 0.0178,
      "step": 1161440
    },
    {
      "epoch": 1.9007547639153461,
      "grad_norm": 1.507521390914917,
      "learning_rate": 6.239333697936915e-06,
      "loss": 0.016,
      "step": 1161460
    },
    {
      "epoch": 1.9007874943539993,
      "grad_norm": 0.6236284375190735,
      "learning_rate": 6.239267805723398e-06,
      "loss": 0.0126,
      "step": 1161480
    },
    {
      "epoch": 1.9008202247926527,
      "grad_norm": 0.07143531739711761,
      "learning_rate": 6.239201913509882e-06,
      "loss": 0.0233,
      "step": 1161500
    },
    {
      "epoch": 1.900852955231306,
      "grad_norm": 0.15286242961883545,
      "learning_rate": 6.2391360212963635e-06,
      "loss": 0.0187,
      "step": 1161520
    },
    {
      "epoch": 1.9008856856699592,
      "grad_norm": 0.671672523021698,
      "learning_rate": 6.239070129082847e-06,
      "loss": 0.0152,
      "step": 1161540
    },
    {
      "epoch": 1.9009184161086128,
      "grad_norm": 0.4611900746822357,
      "learning_rate": 6.239004236869329e-06,
      "loss": 0.0191,
      "step": 1161560
    },
    {
      "epoch": 1.900951146547266,
      "grad_norm": 0.7613705992698669,
      "learning_rate": 6.2389383446558126e-06,
      "loss": 0.0209,
      "step": 1161580
    },
    {
      "epoch": 1.9009838769859195,
      "grad_norm": 0.3410620391368866,
      "learning_rate": 6.238872452442295e-06,
      "loss": 0.0209,
      "step": 1161600
    },
    {
      "epoch": 1.9010166074245727,
      "grad_norm": 0.08912954479455948,
      "learning_rate": 6.238806560228778e-06,
      "loss": 0.0092,
      "step": 1161620
    },
    {
      "epoch": 1.901049337863226,
      "grad_norm": 0.5621810555458069,
      "learning_rate": 6.238740668015261e-06,
      "loss": 0.017,
      "step": 1161640
    },
    {
      "epoch": 1.9010820683018794,
      "grad_norm": 0.3065285086631775,
      "learning_rate": 6.238674775801744e-06,
      "loss": 0.0167,
      "step": 1161660
    },
    {
      "epoch": 1.9011147987405326,
      "grad_norm": 0.8126353621482849,
      "learning_rate": 6.238608883588226e-06,
      "loss": 0.0179,
      "step": 1161680
    },
    {
      "epoch": 1.9011475291791862,
      "grad_norm": 1.53802490234375,
      "learning_rate": 6.23854299137471e-06,
      "loss": 0.0165,
      "step": 1161700
    },
    {
      "epoch": 1.9011802596178393,
      "grad_norm": 0.29231977462768555,
      "learning_rate": 6.2384770991611934e-06,
      "loss": 0.018,
      "step": 1161720
    },
    {
      "epoch": 1.9012129900564927,
      "grad_norm": 0.4172387421131134,
      "learning_rate": 6.238411206947675e-06,
      "loss": 0.018,
      "step": 1161740
    },
    {
      "epoch": 1.901245720495146,
      "grad_norm": 0.11021462082862854,
      "learning_rate": 6.238345314734159e-06,
      "loss": 0.0108,
      "step": 1161760
    },
    {
      "epoch": 1.9012784509337994,
      "grad_norm": 0.3739188313484192,
      "learning_rate": 6.238279422520641e-06,
      "loss": 0.0162,
      "step": 1161780
    },
    {
      "epoch": 1.9013111813724528,
      "grad_norm": 0.6342058181762695,
      "learning_rate": 6.238213530307124e-06,
      "loss": 0.0187,
      "step": 1161800
    },
    {
      "epoch": 1.901343911811106,
      "grad_norm": 0.49515628814697266,
      "learning_rate": 6.238147638093607e-06,
      "loss": 0.0245,
      "step": 1161820
    },
    {
      "epoch": 1.9013766422497596,
      "grad_norm": 0.20309148728847504,
      "learning_rate": 6.23808174588009e-06,
      "loss": 0.0213,
      "step": 1161840
    },
    {
      "epoch": 1.9014093726884127,
      "grad_norm": 0.7294250130653381,
      "learning_rate": 6.238015853666573e-06,
      "loss": 0.0198,
      "step": 1161860
    },
    {
      "epoch": 1.901442103127066,
      "grad_norm": 0.38422858715057373,
      "learning_rate": 6.237949961453056e-06,
      "loss": 0.018,
      "step": 1161880
    },
    {
      "epoch": 1.9014748335657194,
      "grad_norm": 0.34060266613960266,
      "learning_rate": 6.237884069239538e-06,
      "loss": 0.0201,
      "step": 1161900
    },
    {
      "epoch": 1.9015075640043728,
      "grad_norm": 0.187262162566185,
      "learning_rate": 6.237818177026022e-06,
      "loss": 0.0201,
      "step": 1161920
    },
    {
      "epoch": 1.9015402944430262,
      "grad_norm": 0.35390162467956543,
      "learning_rate": 6.237752284812504e-06,
      "loss": 0.0198,
      "step": 1161940
    },
    {
      "epoch": 1.9015730248816793,
      "grad_norm": 0.3515874147415161,
      "learning_rate": 6.237686392598987e-06,
      "loss": 0.0126,
      "step": 1161960
    },
    {
      "epoch": 1.901605755320333,
      "grad_norm": 1.9582594633102417,
      "learning_rate": 6.23762050038547e-06,
      "loss": 0.0208,
      "step": 1161980
    },
    {
      "epoch": 1.901638485758986,
      "grad_norm": 0.3962560296058655,
      "learning_rate": 6.237554608171953e-06,
      "loss": 0.017,
      "step": 1162000
    },
    {
      "epoch": 1.9016712161976395,
      "grad_norm": 0.2790556848049164,
      "learning_rate": 6.237488715958435e-06,
      "loss": 0.0251,
      "step": 1162020
    },
    {
      "epoch": 1.9017039466362928,
      "grad_norm": 0.2555127441883087,
      "learning_rate": 6.237422823744919e-06,
      "loss": 0.0193,
      "step": 1162040
    },
    {
      "epoch": 1.9017366770749462,
      "grad_norm": 0.6495326161384583,
      "learning_rate": 6.237356931531402e-06,
      "loss": 0.0186,
      "step": 1162060
    },
    {
      "epoch": 1.9017694075135996,
      "grad_norm": 0.9961349368095398,
      "learning_rate": 6.2372910393178845e-06,
      "loss": 0.0164,
      "step": 1162080
    },
    {
      "epoch": 1.9018021379522527,
      "grad_norm": 0.3100060820579529,
      "learning_rate": 6.237225147104368e-06,
      "loss": 0.0271,
      "step": 1162100
    },
    {
      "epoch": 1.9018348683909063,
      "grad_norm": 0.7817510366439819,
      "learning_rate": 6.23715925489085e-06,
      "loss": 0.0221,
      "step": 1162120
    },
    {
      "epoch": 1.9018675988295595,
      "grad_norm": 1.1836719512939453,
      "learning_rate": 6.2370933626773335e-06,
      "loss": 0.0106,
      "step": 1162140
    },
    {
      "epoch": 1.9019003292682128,
      "grad_norm": 0.6416207551956177,
      "learning_rate": 6.237027470463815e-06,
      "loss": 0.0155,
      "step": 1162160
    },
    {
      "epoch": 1.9019330597068662,
      "grad_norm": 0.4726584553718567,
      "learning_rate": 6.236961578250299e-06,
      "loss": 0.017,
      "step": 1162180
    },
    {
      "epoch": 1.9019657901455194,
      "grad_norm": 0.21611085534095764,
      "learning_rate": 6.236895686036781e-06,
      "loss": 0.0146,
      "step": 1162200
    },
    {
      "epoch": 1.901998520584173,
      "grad_norm": 0.219527006149292,
      "learning_rate": 6.2368297938232645e-06,
      "loss": 0.02,
      "step": 1162220
    },
    {
      "epoch": 1.902031251022826,
      "grad_norm": 0.09781519323587418,
      "learning_rate": 6.236763901609747e-06,
      "loss": 0.0169,
      "step": 1162240
    },
    {
      "epoch": 1.9020639814614797,
      "grad_norm": 0.7258156538009644,
      "learning_rate": 6.23669800939623e-06,
      "loss": 0.0214,
      "step": 1162260
    },
    {
      "epoch": 1.9020967119001329,
      "grad_norm": 2.3072400093078613,
      "learning_rate": 6.236632117182713e-06,
      "loss": 0.0234,
      "step": 1162280
    },
    {
      "epoch": 1.9021294423387862,
      "grad_norm": 0.42757827043533325,
      "learning_rate": 6.236566224969196e-06,
      "loss": 0.0212,
      "step": 1162300
    },
    {
      "epoch": 1.9021621727774396,
      "grad_norm": 1.7431750297546387,
      "learning_rate": 6.236500332755678e-06,
      "loss": 0.0172,
      "step": 1162320
    },
    {
      "epoch": 1.9021949032160927,
      "grad_norm": 0.2823954224586487,
      "learning_rate": 6.236434440542162e-06,
      "loss": 0.0261,
      "step": 1162340
    },
    {
      "epoch": 1.9022276336547463,
      "grad_norm": 0.2964261472225189,
      "learning_rate": 6.236368548328644e-06,
      "loss": 0.012,
      "step": 1162360
    },
    {
      "epoch": 1.9022603640933995,
      "grad_norm": 0.4284028708934784,
      "learning_rate": 6.236302656115127e-06,
      "loss": 0.0161,
      "step": 1162380
    },
    {
      "epoch": 1.9022930945320529,
      "grad_norm": 0.38868746161460876,
      "learning_rate": 6.23623676390161e-06,
      "loss": 0.0198,
      "step": 1162400
    },
    {
      "epoch": 1.9023258249707062,
      "grad_norm": 0.7719491124153137,
      "learning_rate": 6.236170871688093e-06,
      "loss": 0.019,
      "step": 1162420
    },
    {
      "epoch": 1.9023585554093596,
      "grad_norm": 1.1943806409835815,
      "learning_rate": 6.236104979474576e-06,
      "loss": 0.0125,
      "step": 1162440
    },
    {
      "epoch": 1.902391285848013,
      "grad_norm": 0.7973197102546692,
      "learning_rate": 6.236039087261059e-06,
      "loss": 0.0218,
      "step": 1162460
    },
    {
      "epoch": 1.9024240162866661,
      "grad_norm": 0.5362218022346497,
      "learning_rate": 6.235973195047542e-06,
      "loss": 0.0192,
      "step": 1162480
    },
    {
      "epoch": 1.9024567467253197,
      "grad_norm": 0.4069729447364807,
      "learning_rate": 6.2359073028340245e-06,
      "loss": 0.0138,
      "step": 1162500
    },
    {
      "epoch": 1.9024894771639729,
      "grad_norm": 0.6361430287361145,
      "learning_rate": 6.235841410620508e-06,
      "loss": 0.0162,
      "step": 1162520
    },
    {
      "epoch": 1.9025222076026262,
      "grad_norm": 1.0216528177261353,
      "learning_rate": 6.23577551840699e-06,
      "loss": 0.0193,
      "step": 1162540
    },
    {
      "epoch": 1.9025549380412796,
      "grad_norm": 0.4967467486858368,
      "learning_rate": 6.235709626193474e-06,
      "loss": 0.0223,
      "step": 1162560
    },
    {
      "epoch": 1.902587668479933,
      "grad_norm": 0.5191435217857361,
      "learning_rate": 6.2356437339799555e-06,
      "loss": 0.0173,
      "step": 1162580
    },
    {
      "epoch": 1.9026203989185864,
      "grad_norm": 0.27386370301246643,
      "learning_rate": 6.235577841766439e-06,
      "loss": 0.0172,
      "step": 1162600
    },
    {
      "epoch": 1.9026531293572395,
      "grad_norm": 0.6091684103012085,
      "learning_rate": 6.235511949552922e-06,
      "loss": 0.0153,
      "step": 1162620
    },
    {
      "epoch": 1.902685859795893,
      "grad_norm": 0.5127497911453247,
      "learning_rate": 6.2354460573394046e-06,
      "loss": 0.0215,
      "step": 1162640
    },
    {
      "epoch": 1.9027185902345463,
      "grad_norm": 0.9800795316696167,
      "learning_rate": 6.235380165125887e-06,
      "loss": 0.0205,
      "step": 1162660
    },
    {
      "epoch": 1.9027513206731996,
      "grad_norm": 0.8454222083091736,
      "learning_rate": 6.235314272912371e-06,
      "loss": 0.0137,
      "step": 1162680
    },
    {
      "epoch": 1.902784051111853,
      "grad_norm": 0.8122621178627014,
      "learning_rate": 6.235248380698853e-06,
      "loss": 0.0112,
      "step": 1162700
    },
    {
      "epoch": 1.9028167815505064,
      "grad_norm": 0.3933570981025696,
      "learning_rate": 6.235182488485336e-06,
      "loss": 0.0149,
      "step": 1162720
    },
    {
      "epoch": 1.9028495119891597,
      "grad_norm": 0.31208106875419617,
      "learning_rate": 6.235116596271818e-06,
      "loss": 0.0201,
      "step": 1162740
    },
    {
      "epoch": 1.902882242427813,
      "grad_norm": 1.1730875968933105,
      "learning_rate": 6.235050704058302e-06,
      "loss": 0.0203,
      "step": 1162760
    },
    {
      "epoch": 1.9029149728664665,
      "grad_norm": 0.5803682804107666,
      "learning_rate": 6.2349848118447854e-06,
      "loss": 0.0208,
      "step": 1162780
    },
    {
      "epoch": 1.9029477033051196,
      "grad_norm": 0.35698020458221436,
      "learning_rate": 6.234918919631267e-06,
      "loss": 0.0165,
      "step": 1162800
    },
    {
      "epoch": 1.902980433743773,
      "grad_norm": 0.23114919662475586,
      "learning_rate": 6.234853027417751e-06,
      "loss": 0.0178,
      "step": 1162820
    },
    {
      "epoch": 1.9030131641824264,
      "grad_norm": 1.347574234008789,
      "learning_rate": 6.234787135204234e-06,
      "loss": 0.0222,
      "step": 1162840
    },
    {
      "epoch": 1.9030458946210798,
      "grad_norm": 0.1858442723751068,
      "learning_rate": 6.234721242990716e-06,
      "loss": 0.0183,
      "step": 1162860
    },
    {
      "epoch": 1.9030786250597331,
      "grad_norm": 0.5722434520721436,
      "learning_rate": 6.234655350777199e-06,
      "loss": 0.0184,
      "step": 1162880
    },
    {
      "epoch": 1.9031113554983863,
      "grad_norm": 0.5680496096611023,
      "learning_rate": 6.234589458563683e-06,
      "loss": 0.0172,
      "step": 1162900
    },
    {
      "epoch": 1.9031440859370399,
      "grad_norm": 0.23324652016162872,
      "learning_rate": 6.234523566350165e-06,
      "loss": 0.0144,
      "step": 1162920
    },
    {
      "epoch": 1.903176816375693,
      "grad_norm": 0.5893736481666565,
      "learning_rate": 6.234457674136648e-06,
      "loss": 0.0197,
      "step": 1162940
    },
    {
      "epoch": 1.9032095468143464,
      "grad_norm": 0.5480705499649048,
      "learning_rate": 6.23439178192313e-06,
      "loss": 0.0173,
      "step": 1162960
    },
    {
      "epoch": 1.9032422772529998,
      "grad_norm": 0.0832490473985672,
      "learning_rate": 6.234325889709614e-06,
      "loss": 0.0178,
      "step": 1162980
    },
    {
      "epoch": 1.903275007691653,
      "grad_norm": 0.2730269134044647,
      "learning_rate": 6.234259997496096e-06,
      "loss": 0.0242,
      "step": 1163000
    },
    {
      "epoch": 1.9033077381303065,
      "grad_norm": 0.9036020040512085,
      "learning_rate": 6.234194105282579e-06,
      "loss": 0.0182,
      "step": 1163020
    },
    {
      "epoch": 1.9033404685689597,
      "grad_norm": 0.2082626223564148,
      "learning_rate": 6.234128213069062e-06,
      "loss": 0.0175,
      "step": 1163040
    },
    {
      "epoch": 1.9033731990076133,
      "grad_norm": 1.2935757637023926,
      "learning_rate": 6.2340623208555455e-06,
      "loss": 0.0142,
      "step": 1163060
    },
    {
      "epoch": 1.9034059294462664,
      "grad_norm": 0.40688905119895935,
      "learning_rate": 6.233996428642027e-06,
      "loss": 0.0255,
      "step": 1163080
    },
    {
      "epoch": 1.9034386598849198,
      "grad_norm": 0.29991140961647034,
      "learning_rate": 6.233930536428511e-06,
      "loss": 0.0252,
      "step": 1163100
    },
    {
      "epoch": 1.9034713903235732,
      "grad_norm": 0.1678546816110611,
      "learning_rate": 6.2338646442149945e-06,
      "loss": 0.0142,
      "step": 1163120
    },
    {
      "epoch": 1.9035041207622263,
      "grad_norm": 0.13733027875423431,
      "learning_rate": 6.2337987520014764e-06,
      "loss": 0.0244,
      "step": 1163140
    },
    {
      "epoch": 1.90353685120088,
      "grad_norm": 0.4872058629989624,
      "learning_rate": 6.23373285978796e-06,
      "loss": 0.0197,
      "step": 1163160
    },
    {
      "epoch": 1.903569581639533,
      "grad_norm": 0.27107754349708557,
      "learning_rate": 6.233666967574442e-06,
      "loss": 0.0168,
      "step": 1163180
    },
    {
      "epoch": 1.9036023120781864,
      "grad_norm": 0.4095955193042755,
      "learning_rate": 6.2336010753609255e-06,
      "loss": 0.0169,
      "step": 1163200
    },
    {
      "epoch": 1.9036350425168398,
      "grad_norm": 0.42126238346099854,
      "learning_rate": 6.233535183147407e-06,
      "loss": 0.0223,
      "step": 1163220
    },
    {
      "epoch": 1.9036677729554932,
      "grad_norm": 0.5513380765914917,
      "learning_rate": 6.233469290933891e-06,
      "loss": 0.0108,
      "step": 1163240
    },
    {
      "epoch": 1.9037005033941465,
      "grad_norm": 0.2614559233188629,
      "learning_rate": 6.233403398720374e-06,
      "loss": 0.0231,
      "step": 1163260
    },
    {
      "epoch": 1.9037332338327997,
      "grad_norm": 0.22966575622558594,
      "learning_rate": 6.2333375065068565e-06,
      "loss": 0.0158,
      "step": 1163280
    },
    {
      "epoch": 1.9037659642714533,
      "grad_norm": 0.453717440366745,
      "learning_rate": 6.233271614293339e-06,
      "loss": 0.015,
      "step": 1163300
    },
    {
      "epoch": 1.9037986947101064,
      "grad_norm": 0.45028969645500183,
      "learning_rate": 6.233205722079823e-06,
      "loss": 0.0183,
      "step": 1163320
    },
    {
      "epoch": 1.9038314251487598,
      "grad_norm": 0.19545964896678925,
      "learning_rate": 6.233139829866305e-06,
      "loss": 0.0116,
      "step": 1163340
    },
    {
      "epoch": 1.9038641555874132,
      "grad_norm": 0.30520445108413696,
      "learning_rate": 6.233073937652788e-06,
      "loss": 0.0192,
      "step": 1163360
    },
    {
      "epoch": 1.9038968860260665,
      "grad_norm": 0.5183236598968506,
      "learning_rate": 6.23300804543927e-06,
      "loss": 0.0199,
      "step": 1163380
    },
    {
      "epoch": 1.90392961646472,
      "grad_norm": 0.27578166127204895,
      "learning_rate": 6.232942153225754e-06,
      "loss": 0.0191,
      "step": 1163400
    },
    {
      "epoch": 1.903962346903373,
      "grad_norm": 0.7416081428527832,
      "learning_rate": 6.2328762610122365e-06,
      "loss": 0.0213,
      "step": 1163420
    },
    {
      "epoch": 1.9039950773420267,
      "grad_norm": 0.346310555934906,
      "learning_rate": 6.232810368798719e-06,
      "loss": 0.0253,
      "step": 1163440
    },
    {
      "epoch": 1.9040278077806798,
      "grad_norm": 0.1643659472465515,
      "learning_rate": 6.232744476585203e-06,
      "loss": 0.0168,
      "step": 1163460
    },
    {
      "epoch": 1.9040605382193332,
      "grad_norm": 0.49563315510749817,
      "learning_rate": 6.2326785843716856e-06,
      "loss": 0.0121,
      "step": 1163480
    },
    {
      "epoch": 1.9040932686579866,
      "grad_norm": 1.4490145444869995,
      "learning_rate": 6.232612692158168e-06,
      "loss": 0.0153,
      "step": 1163500
    },
    {
      "epoch": 1.90412599909664,
      "grad_norm": 0.6677651405334473,
      "learning_rate": 6.232546799944651e-06,
      "loss": 0.0223,
      "step": 1163520
    },
    {
      "epoch": 1.9041587295352933,
      "grad_norm": 0.3576979339122772,
      "learning_rate": 6.232480907731135e-06,
      "loss": 0.0158,
      "step": 1163540
    },
    {
      "epoch": 1.9041914599739465,
      "grad_norm": 0.49746328592300415,
      "learning_rate": 6.2324150155176165e-06,
      "loss": 0.017,
      "step": 1163560
    },
    {
      "epoch": 1.9042241904126,
      "grad_norm": 0.6462671160697937,
      "learning_rate": 6.2323491233041e-06,
      "loss": 0.0169,
      "step": 1163580
    },
    {
      "epoch": 1.9042569208512532,
      "grad_norm": 0.31572768092155457,
      "learning_rate": 6.232283231090582e-06,
      "loss": 0.0116,
      "step": 1163600
    },
    {
      "epoch": 1.9042896512899066,
      "grad_norm": 0.24790212512016296,
      "learning_rate": 6.232217338877066e-06,
      "loss": 0.0184,
      "step": 1163620
    },
    {
      "epoch": 1.90432238172856,
      "grad_norm": 0.6727238893508911,
      "learning_rate": 6.232151446663548e-06,
      "loss": 0.0198,
      "step": 1163640
    },
    {
      "epoch": 1.9043551121672133,
      "grad_norm": 0.8113672137260437,
      "learning_rate": 6.232085554450031e-06,
      "loss": 0.0179,
      "step": 1163660
    },
    {
      "epoch": 1.9043878426058667,
      "grad_norm": 0.44990357756614685,
      "learning_rate": 6.232019662236514e-06,
      "loss": 0.0102,
      "step": 1163680
    },
    {
      "epoch": 1.9044205730445198,
      "grad_norm": 0.4344366192817688,
      "learning_rate": 6.231953770022997e-06,
      "loss": 0.0174,
      "step": 1163700
    },
    {
      "epoch": 1.9044533034831734,
      "grad_norm": 0.40487128496170044,
      "learning_rate": 6.231887877809479e-06,
      "loss": 0.0188,
      "step": 1163720
    },
    {
      "epoch": 1.9044860339218266,
      "grad_norm": 0.4236890971660614,
      "learning_rate": 6.231821985595963e-06,
      "loss": 0.0173,
      "step": 1163740
    },
    {
      "epoch": 1.90451876436048,
      "grad_norm": 0.671714186668396,
      "learning_rate": 6.231756093382445e-06,
      "loss": 0.0199,
      "step": 1163760
    },
    {
      "epoch": 1.9045514947991333,
      "grad_norm": 0.8204960823059082,
      "learning_rate": 6.231690201168928e-06,
      "loss": 0.0167,
      "step": 1163780
    },
    {
      "epoch": 1.9045842252377865,
      "grad_norm": 0.1670190542936325,
      "learning_rate": 6.231624308955411e-06,
      "loss": 0.0215,
      "step": 1163800
    },
    {
      "epoch": 1.90461695567644,
      "grad_norm": 1.373823642730713,
      "learning_rate": 6.231558416741894e-06,
      "loss": 0.0121,
      "step": 1163820
    },
    {
      "epoch": 1.9046496861150932,
      "grad_norm": 0.31763702630996704,
      "learning_rate": 6.231492524528377e-06,
      "loss": 0.0258,
      "step": 1163840
    },
    {
      "epoch": 1.9046824165537468,
      "grad_norm": 0.8484976291656494,
      "learning_rate": 6.23142663231486e-06,
      "loss": 0.0132,
      "step": 1163860
    },
    {
      "epoch": 1.9047151469924,
      "grad_norm": 0.43890243768692017,
      "learning_rate": 6.231360740101343e-06,
      "loss": 0.0173,
      "step": 1163880
    },
    {
      "epoch": 1.9047478774310533,
      "grad_norm": 0.9802352786064148,
      "learning_rate": 6.231294847887826e-06,
      "loss": 0.0194,
      "step": 1163900
    },
    {
      "epoch": 1.9047806078697067,
      "grad_norm": 2.235966920852661,
      "learning_rate": 6.231228955674309e-06,
      "loss": 0.0267,
      "step": 1163920
    },
    {
      "epoch": 1.9048133383083599,
      "grad_norm": 0.21621045470237732,
      "learning_rate": 6.231163063460791e-06,
      "loss": 0.0259,
      "step": 1163940
    },
    {
      "epoch": 1.9048460687470135,
      "grad_norm": 0.34570834040641785,
      "learning_rate": 6.231097171247275e-06,
      "loss": 0.0137,
      "step": 1163960
    },
    {
      "epoch": 1.9048787991856666,
      "grad_norm": 0.28872641921043396,
      "learning_rate": 6.231031279033757e-06,
      "loss": 0.0175,
      "step": 1163980
    },
    {
      "epoch": 1.90491152962432,
      "grad_norm": 1.8508754968643188,
      "learning_rate": 6.23096538682024e-06,
      "loss": 0.0176,
      "step": 1164000
    },
    {
      "epoch": 1.9049442600629733,
      "grad_norm": 0.267307311296463,
      "learning_rate": 6.230899494606723e-06,
      "loss": 0.0157,
      "step": 1164020
    },
    {
      "epoch": 1.9049769905016267,
      "grad_norm": 0.5516441464424133,
      "learning_rate": 6.230833602393206e-06,
      "loss": 0.0135,
      "step": 1164040
    },
    {
      "epoch": 1.90500972094028,
      "grad_norm": 0.756425142288208,
      "learning_rate": 6.230767710179688e-06,
      "loss": 0.0231,
      "step": 1164060
    },
    {
      "epoch": 1.9050424513789332,
      "grad_norm": 0.6025910973548889,
      "learning_rate": 6.230701817966172e-06,
      "loss": 0.0156,
      "step": 1164080
    },
    {
      "epoch": 1.9050751818175868,
      "grad_norm": 0.6882736682891846,
      "learning_rate": 6.230635925752654e-06,
      "loss": 0.0196,
      "step": 1164100
    },
    {
      "epoch": 1.90510791225624,
      "grad_norm": 0.25234904885292053,
      "learning_rate": 6.2305700335391375e-06,
      "loss": 0.0174,
      "step": 1164120
    },
    {
      "epoch": 1.9051406426948934,
      "grad_norm": 0.2295428067445755,
      "learning_rate": 6.230504141325619e-06,
      "loss": 0.015,
      "step": 1164140
    },
    {
      "epoch": 1.9051733731335467,
      "grad_norm": 0.9000756144523621,
      "learning_rate": 6.230438249112103e-06,
      "loss": 0.0207,
      "step": 1164160
    },
    {
      "epoch": 1.9052061035722,
      "grad_norm": 0.31247782707214355,
      "learning_rate": 6.2303723568985865e-06,
      "loss": 0.0159,
      "step": 1164180
    },
    {
      "epoch": 1.9052388340108535,
      "grad_norm": 0.6258864402770996,
      "learning_rate": 6.230306464685068e-06,
      "loss": 0.0206,
      "step": 1164200
    },
    {
      "epoch": 1.9052715644495066,
      "grad_norm": 0.23528432846069336,
      "learning_rate": 6.230240572471552e-06,
      "loss": 0.0226,
      "step": 1164220
    },
    {
      "epoch": 1.9053042948881602,
      "grad_norm": 0.5931859612464905,
      "learning_rate": 6.230174680258034e-06,
      "loss": 0.0175,
      "step": 1164240
    },
    {
      "epoch": 1.9053370253268134,
      "grad_norm": 0.6442784667015076,
      "learning_rate": 6.2301087880445175e-06,
      "loss": 0.0217,
      "step": 1164260
    },
    {
      "epoch": 1.9053697557654667,
      "grad_norm": 0.8623798489570618,
      "learning_rate": 6.230042895831e-06,
      "loss": 0.0184,
      "step": 1164280
    },
    {
      "epoch": 1.9054024862041201,
      "grad_norm": 0.17792212963104248,
      "learning_rate": 6.229977003617483e-06,
      "loss": 0.0185,
      "step": 1164300
    },
    {
      "epoch": 1.9054352166427735,
      "grad_norm": 0.08094802498817444,
      "learning_rate": 6.229911111403966e-06,
      "loss": 0.0157,
      "step": 1164320
    },
    {
      "epoch": 1.9054679470814269,
      "grad_norm": 0.5625694990158081,
      "learning_rate": 6.229845219190449e-06,
      "loss": 0.0171,
      "step": 1164340
    },
    {
      "epoch": 1.90550067752008,
      "grad_norm": 0.6628824472427368,
      "learning_rate": 6.229779326976931e-06,
      "loss": 0.0123,
      "step": 1164360
    },
    {
      "epoch": 1.9055334079587336,
      "grad_norm": 0.8254974484443665,
      "learning_rate": 6.229713434763415e-06,
      "loss": 0.0185,
      "step": 1164380
    },
    {
      "epoch": 1.9055661383973868,
      "grad_norm": 0.7847411632537842,
      "learning_rate": 6.229647542549897e-06,
      "loss": 0.0176,
      "step": 1164400
    },
    {
      "epoch": 1.9055988688360401,
      "grad_norm": 0.2950703501701355,
      "learning_rate": 6.22958165033638e-06,
      "loss": 0.0196,
      "step": 1164420
    },
    {
      "epoch": 1.9056315992746935,
      "grad_norm": 0.19300419092178345,
      "learning_rate": 6.229515758122863e-06,
      "loss": 0.0114,
      "step": 1164440
    },
    {
      "epoch": 1.9056643297133469,
      "grad_norm": 0.6846320033073425,
      "learning_rate": 6.229449865909346e-06,
      "loss": 0.0266,
      "step": 1164460
    },
    {
      "epoch": 1.9056970601520002,
      "grad_norm": 0.4264664649963379,
      "learning_rate": 6.2293839736958285e-06,
      "loss": 0.015,
      "step": 1164480
    },
    {
      "epoch": 1.9057297905906534,
      "grad_norm": 0.2374279797077179,
      "learning_rate": 6.229318081482312e-06,
      "loss": 0.0153,
      "step": 1164500
    },
    {
      "epoch": 1.905762521029307,
      "grad_norm": 0.422932505607605,
      "learning_rate": 6.229252189268795e-06,
      "loss": 0.0201,
      "step": 1164520
    },
    {
      "epoch": 1.9057952514679601,
      "grad_norm": 0.16990168392658234,
      "learning_rate": 6.2291862970552775e-06,
      "loss": 0.0237,
      "step": 1164540
    },
    {
      "epoch": 1.9058279819066135,
      "grad_norm": 0.6960234642028809,
      "learning_rate": 6.229120404841761e-06,
      "loss": 0.0124,
      "step": 1164560
    },
    {
      "epoch": 1.9058607123452669,
      "grad_norm": 0.16266898810863495,
      "learning_rate": 6.229054512628243e-06,
      "loss": 0.0127,
      "step": 1164580
    },
    {
      "epoch": 1.90589344278392,
      "grad_norm": 1.1669834852218628,
      "learning_rate": 6.228988620414727e-06,
      "loss": 0.0225,
      "step": 1164600
    },
    {
      "epoch": 1.9059261732225736,
      "grad_norm": 0.4587067663669586,
      "learning_rate": 6.2289227282012085e-06,
      "loss": 0.0115,
      "step": 1164620
    },
    {
      "epoch": 1.9059589036612268,
      "grad_norm": 0.26304784417152405,
      "learning_rate": 6.228856835987692e-06,
      "loss": 0.0191,
      "step": 1164640
    },
    {
      "epoch": 1.9059916340998804,
      "grad_norm": 0.7282434701919556,
      "learning_rate": 6.228790943774175e-06,
      "loss": 0.0202,
      "step": 1164660
    },
    {
      "epoch": 1.9060243645385335,
      "grad_norm": 1.2619562149047852,
      "learning_rate": 6.2287250515606576e-06,
      "loss": 0.0159,
      "step": 1164680
    },
    {
      "epoch": 1.906057094977187,
      "grad_norm": 0.34714066982269287,
      "learning_rate": 6.22865915934714e-06,
      "loss": 0.0168,
      "step": 1164700
    },
    {
      "epoch": 1.9060898254158403,
      "grad_norm": 0.1269901692867279,
      "learning_rate": 6.228593267133624e-06,
      "loss": 0.0149,
      "step": 1164720
    },
    {
      "epoch": 1.9061225558544934,
      "grad_norm": 0.48460301756858826,
      "learning_rate": 6.228527374920106e-06,
      "loss": 0.0209,
      "step": 1164740
    },
    {
      "epoch": 1.906155286293147,
      "grad_norm": 0.6457757353782654,
      "learning_rate": 6.228461482706589e-06,
      "loss": 0.0195,
      "step": 1164760
    },
    {
      "epoch": 1.9061880167318002,
      "grad_norm": 0.2567424476146698,
      "learning_rate": 6.228395590493071e-06,
      "loss": 0.0271,
      "step": 1164780
    },
    {
      "epoch": 1.9062207471704535,
      "grad_norm": 0.384285569190979,
      "learning_rate": 6.228329698279555e-06,
      "loss": 0.0266,
      "step": 1164800
    },
    {
      "epoch": 1.906253477609107,
      "grad_norm": 0.498374342918396,
      "learning_rate": 6.228263806066038e-06,
      "loss": 0.0255,
      "step": 1164820
    },
    {
      "epoch": 1.9062862080477603,
      "grad_norm": 0.07861974835395813,
      "learning_rate": 6.22819791385252e-06,
      "loss": 0.0232,
      "step": 1164840
    },
    {
      "epoch": 1.9063189384864136,
      "grad_norm": 0.29395413398742676,
      "learning_rate": 6.228132021639003e-06,
      "loss": 0.012,
      "step": 1164860
    },
    {
      "epoch": 1.9063516689250668,
      "grad_norm": 0.8846017122268677,
      "learning_rate": 6.228066129425487e-06,
      "loss": 0.0186,
      "step": 1164880
    },
    {
      "epoch": 1.9063843993637204,
      "grad_norm": 0.28762495517730713,
      "learning_rate": 6.228000237211969e-06,
      "loss": 0.0134,
      "step": 1164900
    },
    {
      "epoch": 1.9064171298023735,
      "grad_norm": 0.29420608282089233,
      "learning_rate": 6.227934344998452e-06,
      "loss": 0.0148,
      "step": 1164920
    },
    {
      "epoch": 1.906449860241027,
      "grad_norm": 0.4815188944339752,
      "learning_rate": 6.227868452784936e-06,
      "loss": 0.018,
      "step": 1164940
    },
    {
      "epoch": 1.9064825906796803,
      "grad_norm": 1.3566277027130127,
      "learning_rate": 6.227802560571418e-06,
      "loss": 0.0205,
      "step": 1164960
    },
    {
      "epoch": 1.9065153211183337,
      "grad_norm": 0.3525697588920593,
      "learning_rate": 6.227736668357901e-06,
      "loss": 0.0203,
      "step": 1164980
    },
    {
      "epoch": 1.906548051556987,
      "grad_norm": 0.11999731510877609,
      "learning_rate": 6.227670776144383e-06,
      "loss": 0.0112,
      "step": 1165000
    },
    {
      "epoch": 1.9065807819956402,
      "grad_norm": 0.4239851236343384,
      "learning_rate": 6.227604883930867e-06,
      "loss": 0.0146,
      "step": 1165020
    },
    {
      "epoch": 1.9066135124342938,
      "grad_norm": 0.2161725014448166,
      "learning_rate": 6.2275389917173486e-06,
      "loss": 0.0158,
      "step": 1165040
    },
    {
      "epoch": 1.906646242872947,
      "grad_norm": 0.9849335551261902,
      "learning_rate": 6.227473099503832e-06,
      "loss": 0.0267,
      "step": 1165060
    },
    {
      "epoch": 1.9066789733116003,
      "grad_norm": 0.8341761231422424,
      "learning_rate": 6.227407207290315e-06,
      "loss": 0.0233,
      "step": 1165080
    },
    {
      "epoch": 1.9067117037502537,
      "grad_norm": 0.595670223236084,
      "learning_rate": 6.2273413150767985e-06,
      "loss": 0.0178,
      "step": 1165100
    },
    {
      "epoch": 1.906744434188907,
      "grad_norm": 0.36393630504608154,
      "learning_rate": 6.22727542286328e-06,
      "loss": 0.0157,
      "step": 1165120
    },
    {
      "epoch": 1.9067771646275604,
      "grad_norm": 0.21780000627040863,
      "learning_rate": 6.227209530649764e-06,
      "loss": 0.0196,
      "step": 1165140
    },
    {
      "epoch": 1.9068098950662136,
      "grad_norm": 1.291218638420105,
      "learning_rate": 6.227143638436246e-06,
      "loss": 0.0298,
      "step": 1165160
    },
    {
      "epoch": 1.9068426255048672,
      "grad_norm": 0.39752522110939026,
      "learning_rate": 6.2270777462227294e-06,
      "loss": 0.0188,
      "step": 1165180
    },
    {
      "epoch": 1.9068753559435203,
      "grad_norm": 0.3699570596218109,
      "learning_rate": 6.227011854009211e-06,
      "loss": 0.0142,
      "step": 1165200
    },
    {
      "epoch": 1.9069080863821737,
      "grad_norm": 0.28237900137901306,
      "learning_rate": 6.226945961795695e-06,
      "loss": 0.0105,
      "step": 1165220
    },
    {
      "epoch": 1.906940816820827,
      "grad_norm": 0.12740372121334076,
      "learning_rate": 6.2268800695821785e-06,
      "loss": 0.0292,
      "step": 1165240
    },
    {
      "epoch": 1.9069735472594802,
      "grad_norm": 0.15048561990261078,
      "learning_rate": 6.22681417736866e-06,
      "loss": 0.0148,
      "step": 1165260
    },
    {
      "epoch": 1.9070062776981338,
      "grad_norm": 0.08975138515233994,
      "learning_rate": 6.226748285155144e-06,
      "loss": 0.0246,
      "step": 1165280
    },
    {
      "epoch": 1.907039008136787,
      "grad_norm": 0.6967723369598389,
      "learning_rate": 6.226682392941627e-06,
      "loss": 0.0185,
      "step": 1165300
    },
    {
      "epoch": 1.9070717385754405,
      "grad_norm": 0.8392429351806641,
      "learning_rate": 6.2266165007281095e-06,
      "loss": 0.0209,
      "step": 1165320
    },
    {
      "epoch": 1.9071044690140937,
      "grad_norm": 0.6237812042236328,
      "learning_rate": 6.226550608514592e-06,
      "loss": 0.0228,
      "step": 1165340
    },
    {
      "epoch": 1.907137199452747,
      "grad_norm": 0.3069559335708618,
      "learning_rate": 6.226484716301076e-06,
      "loss": 0.0186,
      "step": 1165360
    },
    {
      "epoch": 1.9071699298914004,
      "grad_norm": 0.07199045270681381,
      "learning_rate": 6.226418824087558e-06,
      "loss": 0.0183,
      "step": 1165380
    },
    {
      "epoch": 1.9072026603300536,
      "grad_norm": 0.3819485604763031,
      "learning_rate": 6.226352931874041e-06,
      "loss": 0.0219,
      "step": 1165400
    },
    {
      "epoch": 1.9072353907687072,
      "grad_norm": 0.21768750250339508,
      "learning_rate": 6.226287039660523e-06,
      "loss": 0.0129,
      "step": 1165420
    },
    {
      "epoch": 1.9072681212073603,
      "grad_norm": 0.12333453446626663,
      "learning_rate": 6.226221147447007e-06,
      "loss": 0.0165,
      "step": 1165440
    },
    {
      "epoch": 1.9073008516460137,
      "grad_norm": 1.1225368976593018,
      "learning_rate": 6.2261552552334895e-06,
      "loss": 0.0183,
      "step": 1165460
    },
    {
      "epoch": 1.907333582084667,
      "grad_norm": 0.3326178789138794,
      "learning_rate": 6.226089363019972e-06,
      "loss": 0.0112,
      "step": 1165480
    },
    {
      "epoch": 1.9073663125233205,
      "grad_norm": 1.9961498975753784,
      "learning_rate": 6.226023470806455e-06,
      "loss": 0.0253,
      "step": 1165500
    },
    {
      "epoch": 1.9073990429619738,
      "grad_norm": 0.18002763390541077,
      "learning_rate": 6.2259575785929386e-06,
      "loss": 0.0189,
      "step": 1165520
    },
    {
      "epoch": 1.907431773400627,
      "grad_norm": 0.8437187671661377,
      "learning_rate": 6.2258916863794205e-06,
      "loss": 0.021,
      "step": 1165540
    },
    {
      "epoch": 1.9074645038392806,
      "grad_norm": 0.8417794704437256,
      "learning_rate": 6.225825794165904e-06,
      "loss": 0.0183,
      "step": 1165560
    },
    {
      "epoch": 1.9074972342779337,
      "grad_norm": 0.646509051322937,
      "learning_rate": 6.225759901952388e-06,
      "loss": 0.02,
      "step": 1165580
    },
    {
      "epoch": 1.907529964716587,
      "grad_norm": 0.6511125564575195,
      "learning_rate": 6.2256940097388695e-06,
      "loss": 0.0239,
      "step": 1165600
    },
    {
      "epoch": 1.9075626951552405,
      "grad_norm": 0.5356690883636475,
      "learning_rate": 6.225628117525353e-06,
      "loss": 0.0182,
      "step": 1165620
    },
    {
      "epoch": 1.9075954255938938,
      "grad_norm": 0.465823233127594,
      "learning_rate": 6.225562225311835e-06,
      "loss": 0.0151,
      "step": 1165640
    },
    {
      "epoch": 1.9076281560325472,
      "grad_norm": 0.3363974988460541,
      "learning_rate": 6.225496333098319e-06,
      "loss": 0.0173,
      "step": 1165660
    },
    {
      "epoch": 1.9076608864712004,
      "grad_norm": 1.025048851966858,
      "learning_rate": 6.225430440884801e-06,
      "loss": 0.0259,
      "step": 1165680
    },
    {
      "epoch": 1.907693616909854,
      "grad_norm": 0.3602522015571594,
      "learning_rate": 6.225364548671284e-06,
      "loss": 0.0261,
      "step": 1165700
    },
    {
      "epoch": 1.907726347348507,
      "grad_norm": 0.7884449362754822,
      "learning_rate": 6.225298656457767e-06,
      "loss": 0.0108,
      "step": 1165720
    },
    {
      "epoch": 1.9077590777871605,
      "grad_norm": 0.390024870634079,
      "learning_rate": 6.22523276424425e-06,
      "loss": 0.0237,
      "step": 1165740
    },
    {
      "epoch": 1.9077918082258138,
      "grad_norm": 0.2020816057920456,
      "learning_rate": 6.225166872030732e-06,
      "loss": 0.0178,
      "step": 1165760
    },
    {
      "epoch": 1.9078245386644672,
      "grad_norm": 0.1657896190881729,
      "learning_rate": 6.225100979817216e-06,
      "loss": 0.0166,
      "step": 1165780
    },
    {
      "epoch": 1.9078572691031206,
      "grad_norm": 0.3595277667045593,
      "learning_rate": 6.225035087603698e-06,
      "loss": 0.0142,
      "step": 1165800
    },
    {
      "epoch": 1.9078899995417737,
      "grad_norm": 1.64327871799469,
      "learning_rate": 6.224969195390181e-06,
      "loss": 0.0204,
      "step": 1165820
    },
    {
      "epoch": 1.9079227299804273,
      "grad_norm": 0.8700327277183533,
      "learning_rate": 6.224903303176664e-06,
      "loss": 0.0161,
      "step": 1165840
    },
    {
      "epoch": 1.9079554604190805,
      "grad_norm": 0.31158649921417236,
      "learning_rate": 6.224837410963147e-06,
      "loss": 0.0239,
      "step": 1165860
    },
    {
      "epoch": 1.9079881908577339,
      "grad_norm": 0.6151852607727051,
      "learning_rate": 6.2247715187496296e-06,
      "loss": 0.0162,
      "step": 1165880
    },
    {
      "epoch": 1.9080209212963872,
      "grad_norm": 0.599060595035553,
      "learning_rate": 6.224705626536113e-06,
      "loss": 0.0157,
      "step": 1165900
    },
    {
      "epoch": 1.9080536517350406,
      "grad_norm": 1.3338295221328735,
      "learning_rate": 6.224639734322596e-06,
      "loss": 0.0214,
      "step": 1165920
    },
    {
      "epoch": 1.908086382173694,
      "grad_norm": 0.09841805696487427,
      "learning_rate": 6.224573842109079e-06,
      "loss": 0.0102,
      "step": 1165940
    },
    {
      "epoch": 1.9081191126123471,
      "grad_norm": 0.4869949221611023,
      "learning_rate": 6.224507949895562e-06,
      "loss": 0.0191,
      "step": 1165960
    },
    {
      "epoch": 1.9081518430510007,
      "grad_norm": 0.1545339673757553,
      "learning_rate": 6.224442057682044e-06,
      "loss": 0.012,
      "step": 1165980
    },
    {
      "epoch": 1.9081845734896539,
      "grad_norm": 0.41821035742759705,
      "learning_rate": 6.224376165468528e-06,
      "loss": 0.0126,
      "step": 1166000
    },
    {
      "epoch": 1.9082173039283072,
      "grad_norm": 0.5092307329177856,
      "learning_rate": 6.22431027325501e-06,
      "loss": 0.0243,
      "step": 1166020
    },
    {
      "epoch": 1.9082500343669606,
      "grad_norm": 0.29148128628730774,
      "learning_rate": 6.224244381041493e-06,
      "loss": 0.0143,
      "step": 1166040
    },
    {
      "epoch": 1.9082827648056138,
      "grad_norm": 0.6242397427558899,
      "learning_rate": 6.224178488827975e-06,
      "loss": 0.0303,
      "step": 1166060
    },
    {
      "epoch": 1.9083154952442674,
      "grad_norm": 0.1558566689491272,
      "learning_rate": 6.224112596614459e-06,
      "loss": 0.0133,
      "step": 1166080
    },
    {
      "epoch": 1.9083482256829205,
      "grad_norm": 0.4391942620277405,
      "learning_rate": 6.224046704400941e-06,
      "loss": 0.0154,
      "step": 1166100
    },
    {
      "epoch": 1.908380956121574,
      "grad_norm": 0.17862601578235626,
      "learning_rate": 6.223980812187424e-06,
      "loss": 0.0179,
      "step": 1166120
    },
    {
      "epoch": 1.9084136865602273,
      "grad_norm": 0.10222796350717545,
      "learning_rate": 6.223914919973907e-06,
      "loss": 0.0183,
      "step": 1166140
    },
    {
      "epoch": 1.9084464169988806,
      "grad_norm": 0.37234050035476685,
      "learning_rate": 6.2238490277603905e-06,
      "loss": 0.013,
      "step": 1166160
    },
    {
      "epoch": 1.908479147437534,
      "grad_norm": 0.21259823441505432,
      "learning_rate": 6.223783135546872e-06,
      "loss": 0.0184,
      "step": 1166180
    },
    {
      "epoch": 1.9085118778761871,
      "grad_norm": 0.2655580937862396,
      "learning_rate": 6.223717243333356e-06,
      "loss": 0.0273,
      "step": 1166200
    },
    {
      "epoch": 1.9085446083148407,
      "grad_norm": 0.20533473789691925,
      "learning_rate": 6.223651351119838e-06,
      "loss": 0.0115,
      "step": 1166220
    },
    {
      "epoch": 1.908577338753494,
      "grad_norm": 0.498954176902771,
      "learning_rate": 6.2235854589063214e-06,
      "loss": 0.0188,
      "step": 1166240
    },
    {
      "epoch": 1.9086100691921473,
      "grad_norm": 0.4594799280166626,
      "learning_rate": 6.223519566692804e-06,
      "loss": 0.0151,
      "step": 1166260
    },
    {
      "epoch": 1.9086427996308006,
      "grad_norm": 0.20963847637176514,
      "learning_rate": 6.223453674479287e-06,
      "loss": 0.0149,
      "step": 1166280
    },
    {
      "epoch": 1.908675530069454,
      "grad_norm": 0.3465617895126343,
      "learning_rate": 6.2233877822657705e-06,
      "loss": 0.0183,
      "step": 1166300
    },
    {
      "epoch": 1.9087082605081074,
      "grad_norm": 0.32620686292648315,
      "learning_rate": 6.223321890052253e-06,
      "loss": 0.0303,
      "step": 1166320
    },
    {
      "epoch": 1.9087409909467605,
      "grad_norm": 0.30523040890693665,
      "learning_rate": 6.223255997838736e-06,
      "loss": 0.0204,
      "step": 1166340
    },
    {
      "epoch": 1.9087737213854141,
      "grad_norm": 0.6548872590065002,
      "learning_rate": 6.223190105625219e-06,
      "loss": 0.0121,
      "step": 1166360
    },
    {
      "epoch": 1.9088064518240673,
      "grad_norm": 0.22665852308273315,
      "learning_rate": 6.223124213411702e-06,
      "loss": 0.0196,
      "step": 1166380
    },
    {
      "epoch": 1.9088391822627206,
      "grad_norm": 0.3311305344104767,
      "learning_rate": 6.223058321198184e-06,
      "loss": 0.0185,
      "step": 1166400
    },
    {
      "epoch": 1.908871912701374,
      "grad_norm": 0.3871249556541443,
      "learning_rate": 6.222992428984668e-06,
      "loss": 0.0115,
      "step": 1166420
    },
    {
      "epoch": 1.9089046431400274,
      "grad_norm": 0.6457870602607727,
      "learning_rate": 6.22292653677115e-06,
      "loss": 0.0238,
      "step": 1166440
    },
    {
      "epoch": 1.9089373735786808,
      "grad_norm": 0.9378104209899902,
      "learning_rate": 6.222860644557633e-06,
      "loss": 0.0133,
      "step": 1166460
    },
    {
      "epoch": 1.908970104017334,
      "grad_norm": 0.5285190343856812,
      "learning_rate": 6.222794752344116e-06,
      "loss": 0.0148,
      "step": 1166480
    },
    {
      "epoch": 1.9090028344559875,
      "grad_norm": 0.4730784595012665,
      "learning_rate": 6.222728860130599e-06,
      "loss": 0.0165,
      "step": 1166500
    },
    {
      "epoch": 1.9090355648946407,
      "grad_norm": 0.14252406358718872,
      "learning_rate": 6.2226629679170815e-06,
      "loss": 0.0184,
      "step": 1166520
    },
    {
      "epoch": 1.909068295333294,
      "grad_norm": 0.34353333711624146,
      "learning_rate": 6.222597075703565e-06,
      "loss": 0.0157,
      "step": 1166540
    },
    {
      "epoch": 1.9091010257719474,
      "grad_norm": 0.5783889889717102,
      "learning_rate": 6.222531183490047e-06,
      "loss": 0.0223,
      "step": 1166560
    },
    {
      "epoch": 1.9091337562106008,
      "grad_norm": 0.6754776239395142,
      "learning_rate": 6.2224652912765305e-06,
      "loss": 0.0155,
      "step": 1166580
    },
    {
      "epoch": 1.9091664866492541,
      "grad_norm": 0.572729766368866,
      "learning_rate": 6.2223993990630124e-06,
      "loss": 0.0159,
      "step": 1166600
    },
    {
      "epoch": 1.9091992170879073,
      "grad_norm": 1.9419738054275513,
      "learning_rate": 6.222333506849496e-06,
      "loss": 0.0166,
      "step": 1166620
    },
    {
      "epoch": 1.909231947526561,
      "grad_norm": 0.29145678877830505,
      "learning_rate": 6.22226761463598e-06,
      "loss": 0.02,
      "step": 1166640
    },
    {
      "epoch": 1.909264677965214,
      "grad_norm": 0.2882395088672638,
      "learning_rate": 6.2222017224224615e-06,
      "loss": 0.0181,
      "step": 1166660
    },
    {
      "epoch": 1.9092974084038674,
      "grad_norm": 0.4329383373260498,
      "learning_rate": 6.222135830208945e-06,
      "loss": 0.0197,
      "step": 1166680
    },
    {
      "epoch": 1.9093301388425208,
      "grad_norm": 0.38025203347206116,
      "learning_rate": 6.222069937995428e-06,
      "loss": 0.0196,
      "step": 1166700
    },
    {
      "epoch": 1.9093628692811742,
      "grad_norm": 0.6064794659614563,
      "learning_rate": 6.2220040457819106e-06,
      "loss": 0.0155,
      "step": 1166720
    },
    {
      "epoch": 1.9093955997198275,
      "grad_norm": 1.3568309545516968,
      "learning_rate": 6.221938153568393e-06,
      "loss": 0.014,
      "step": 1166740
    },
    {
      "epoch": 1.9094283301584807,
      "grad_norm": 0.24987956881523132,
      "learning_rate": 6.221872261354877e-06,
      "loss": 0.017,
      "step": 1166760
    },
    {
      "epoch": 1.9094610605971343,
      "grad_norm": 0.704403817653656,
      "learning_rate": 6.221806369141359e-06,
      "loss": 0.0166,
      "step": 1166780
    },
    {
      "epoch": 1.9094937910357874,
      "grad_norm": 0.4280863404273987,
      "learning_rate": 6.221740476927842e-06,
      "loss": 0.0224,
      "step": 1166800
    },
    {
      "epoch": 1.9095265214744408,
      "grad_norm": 0.32414206862449646,
      "learning_rate": 6.221674584714324e-06,
      "loss": 0.0181,
      "step": 1166820
    },
    {
      "epoch": 1.9095592519130942,
      "grad_norm": 1.092344880104065,
      "learning_rate": 6.221608692500808e-06,
      "loss": 0.0229,
      "step": 1166840
    },
    {
      "epoch": 1.9095919823517473,
      "grad_norm": 0.708516538143158,
      "learning_rate": 6.221542800287291e-06,
      "loss": 0.0186,
      "step": 1166860
    },
    {
      "epoch": 1.909624712790401,
      "grad_norm": 0.19579388201236725,
      "learning_rate": 6.221476908073773e-06,
      "loss": 0.0228,
      "step": 1166880
    },
    {
      "epoch": 1.909657443229054,
      "grad_norm": 0.32268863916397095,
      "learning_rate": 6.221411015860256e-06,
      "loss": 0.0133,
      "step": 1166900
    },
    {
      "epoch": 1.9096901736677077,
      "grad_norm": 0.7249774932861328,
      "learning_rate": 6.22134512364674e-06,
      "loss": 0.0178,
      "step": 1166920
    },
    {
      "epoch": 1.9097229041063608,
      "grad_norm": 0.3296845257282257,
      "learning_rate": 6.2212792314332216e-06,
      "loss": 0.0188,
      "step": 1166940
    },
    {
      "epoch": 1.9097556345450142,
      "grad_norm": 0.535762369632721,
      "learning_rate": 6.221213339219705e-06,
      "loss": 0.0164,
      "step": 1166960
    },
    {
      "epoch": 1.9097883649836676,
      "grad_norm": 0.29606229066848755,
      "learning_rate": 6.221147447006189e-06,
      "loss": 0.0173,
      "step": 1166980
    },
    {
      "epoch": 1.9098210954223207,
      "grad_norm": 1.8493409156799316,
      "learning_rate": 6.221081554792671e-06,
      "loss": 0.0163,
      "step": 1167000
    },
    {
      "epoch": 1.9098538258609743,
      "grad_norm": 0.6236498355865479,
      "learning_rate": 6.221015662579154e-06,
      "loss": 0.015,
      "step": 1167020
    },
    {
      "epoch": 1.9098865562996274,
      "grad_norm": 0.2943587601184845,
      "learning_rate": 6.220949770365636e-06,
      "loss": 0.0155,
      "step": 1167040
    },
    {
      "epoch": 1.9099192867382808,
      "grad_norm": 0.20016048848628998,
      "learning_rate": 6.22088387815212e-06,
      "loss": 0.0216,
      "step": 1167060
    },
    {
      "epoch": 1.9099520171769342,
      "grad_norm": 0.7896040678024292,
      "learning_rate": 6.220817985938602e-06,
      "loss": 0.014,
      "step": 1167080
    },
    {
      "epoch": 1.9099847476155876,
      "grad_norm": 0.3315821886062622,
      "learning_rate": 6.220752093725085e-06,
      "loss": 0.0153,
      "step": 1167100
    },
    {
      "epoch": 1.910017478054241,
      "grad_norm": 0.461535781621933,
      "learning_rate": 6.220686201511568e-06,
      "loss": 0.0197,
      "step": 1167120
    },
    {
      "epoch": 1.910050208492894,
      "grad_norm": 0.26666268706321716,
      "learning_rate": 6.220620309298051e-06,
      "loss": 0.0212,
      "step": 1167140
    },
    {
      "epoch": 1.9100829389315477,
      "grad_norm": 0.3444162905216217,
      "learning_rate": 6.220554417084533e-06,
      "loss": 0.0146,
      "step": 1167160
    },
    {
      "epoch": 1.9101156693702008,
      "grad_norm": 0.3216453492641449,
      "learning_rate": 6.220488524871017e-06,
      "loss": 0.0149,
      "step": 1167180
    },
    {
      "epoch": 1.9101483998088542,
      "grad_norm": 0.4756704568862915,
      "learning_rate": 6.220422632657499e-06,
      "loss": 0.0289,
      "step": 1167200
    },
    {
      "epoch": 1.9101811302475076,
      "grad_norm": 0.27589693665504456,
      "learning_rate": 6.2203567404439825e-06,
      "loss": 0.0142,
      "step": 1167220
    },
    {
      "epoch": 1.910213860686161,
      "grad_norm": 1.0916672945022583,
      "learning_rate": 6.220290848230464e-06,
      "loss": 0.0146,
      "step": 1167240
    },
    {
      "epoch": 1.9102465911248143,
      "grad_norm": 0.6390326619148254,
      "learning_rate": 6.220224956016948e-06,
      "loss": 0.016,
      "step": 1167260
    },
    {
      "epoch": 1.9102793215634675,
      "grad_norm": 0.42756959795951843,
      "learning_rate": 6.220159063803431e-06,
      "loss": 0.0189,
      "step": 1167280
    },
    {
      "epoch": 1.910312052002121,
      "grad_norm": 0.995966911315918,
      "learning_rate": 6.220093171589913e-06,
      "loss": 0.0239,
      "step": 1167300
    },
    {
      "epoch": 1.9103447824407742,
      "grad_norm": 0.4112532138824463,
      "learning_rate": 6.220027279376396e-06,
      "loss": 0.0216,
      "step": 1167320
    },
    {
      "epoch": 1.9103775128794276,
      "grad_norm": 1.642332911491394,
      "learning_rate": 6.21996138716288e-06,
      "loss": 0.0141,
      "step": 1167340
    },
    {
      "epoch": 1.910410243318081,
      "grad_norm": 0.27304378151893616,
      "learning_rate": 6.2198954949493625e-06,
      "loss": 0.0168,
      "step": 1167360
    },
    {
      "epoch": 1.9104429737567343,
      "grad_norm": 0.624807596206665,
      "learning_rate": 6.219829602735845e-06,
      "loss": 0.0152,
      "step": 1167380
    },
    {
      "epoch": 1.9104757041953877,
      "grad_norm": 1.6069917678833008,
      "learning_rate": 6.219763710522329e-06,
      "loss": 0.0174,
      "step": 1167400
    },
    {
      "epoch": 1.9105084346340409,
      "grad_norm": 0.4300846755504608,
      "learning_rate": 6.219697818308811e-06,
      "loss": 0.0257,
      "step": 1167420
    },
    {
      "epoch": 1.9105411650726944,
      "grad_norm": 0.4377792179584503,
      "learning_rate": 6.219631926095294e-06,
      "loss": 0.0196,
      "step": 1167440
    },
    {
      "epoch": 1.9105738955113476,
      "grad_norm": 0.28812310099601746,
      "learning_rate": 6.219566033881776e-06,
      "loss": 0.0211,
      "step": 1167460
    },
    {
      "epoch": 1.910606625950001,
      "grad_norm": 0.17341306805610657,
      "learning_rate": 6.21950014166826e-06,
      "loss": 0.0157,
      "step": 1167480
    },
    {
      "epoch": 1.9106393563886543,
      "grad_norm": 0.06482290476560593,
      "learning_rate": 6.2194342494547425e-06,
      "loss": 0.0196,
      "step": 1167500
    },
    {
      "epoch": 1.9106720868273077,
      "grad_norm": 0.1659747064113617,
      "learning_rate": 6.219368357241225e-06,
      "loss": 0.0133,
      "step": 1167520
    },
    {
      "epoch": 1.910704817265961,
      "grad_norm": 1.0510833263397217,
      "learning_rate": 6.219302465027708e-06,
      "loss": 0.0179,
      "step": 1167540
    },
    {
      "epoch": 1.9107375477046142,
      "grad_norm": 0.12284775823354721,
      "learning_rate": 6.2192365728141916e-06,
      "loss": 0.0172,
      "step": 1167560
    },
    {
      "epoch": 1.9107702781432678,
      "grad_norm": 0.47791051864624023,
      "learning_rate": 6.2191706806006735e-06,
      "loss": 0.021,
      "step": 1167580
    },
    {
      "epoch": 1.910803008581921,
      "grad_norm": 0.6708905696868896,
      "learning_rate": 6.219104788387157e-06,
      "loss": 0.0155,
      "step": 1167600
    },
    {
      "epoch": 1.9108357390205744,
      "grad_norm": 0.34637874364852905,
      "learning_rate": 6.219038896173639e-06,
      "loss": 0.0163,
      "step": 1167620
    },
    {
      "epoch": 1.9108684694592277,
      "grad_norm": 0.35289689898490906,
      "learning_rate": 6.2189730039601225e-06,
      "loss": 0.0216,
      "step": 1167640
    },
    {
      "epoch": 1.9109011998978809,
      "grad_norm": 1.0090909004211426,
      "learning_rate": 6.218907111746605e-06,
      "loss": 0.0183,
      "step": 1167660
    },
    {
      "epoch": 1.9109339303365345,
      "grad_norm": 1.34893798828125,
      "learning_rate": 6.218841219533088e-06,
      "loss": 0.014,
      "step": 1167680
    },
    {
      "epoch": 1.9109666607751876,
      "grad_norm": 0.9768165946006775,
      "learning_rate": 6.218775327319572e-06,
      "loss": 0.0161,
      "step": 1167700
    },
    {
      "epoch": 1.9109993912138412,
      "grad_norm": 0.33475008606910706,
      "learning_rate": 6.218709435106054e-06,
      "loss": 0.0155,
      "step": 1167720
    },
    {
      "epoch": 1.9110321216524944,
      "grad_norm": 0.2884295582771301,
      "learning_rate": 6.218643542892537e-06,
      "loss": 0.0158,
      "step": 1167740
    },
    {
      "epoch": 1.9110648520911477,
      "grad_norm": 0.30097684264183044,
      "learning_rate": 6.21857765067902e-06,
      "loss": 0.0267,
      "step": 1167760
    },
    {
      "epoch": 1.911097582529801,
      "grad_norm": 0.35416561365127563,
      "learning_rate": 6.218511758465503e-06,
      "loss": 0.0189,
      "step": 1167780
    },
    {
      "epoch": 1.9111303129684543,
      "grad_norm": 0.4194091558456421,
      "learning_rate": 6.218445866251985e-06,
      "loss": 0.0132,
      "step": 1167800
    },
    {
      "epoch": 1.9111630434071079,
      "grad_norm": 1.599410891532898,
      "learning_rate": 6.218379974038469e-06,
      "loss": 0.0134,
      "step": 1167820
    },
    {
      "epoch": 1.911195773845761,
      "grad_norm": 0.5739635825157166,
      "learning_rate": 6.218314081824951e-06,
      "loss": 0.0249,
      "step": 1167840
    },
    {
      "epoch": 1.9112285042844144,
      "grad_norm": 0.33642300963401794,
      "learning_rate": 6.218248189611434e-06,
      "loss": 0.0201,
      "step": 1167860
    },
    {
      "epoch": 1.9112612347230677,
      "grad_norm": 0.8940622806549072,
      "learning_rate": 6.218182297397917e-06,
      "loss": 0.0192,
      "step": 1167880
    },
    {
      "epoch": 1.9112939651617211,
      "grad_norm": 0.42251744866371155,
      "learning_rate": 6.2181164051844e-06,
      "loss": 0.019,
      "step": 1167900
    },
    {
      "epoch": 1.9113266956003745,
      "grad_norm": 1.177253246307373,
      "learning_rate": 6.218050512970883e-06,
      "loss": 0.0194,
      "step": 1167920
    },
    {
      "epoch": 1.9113594260390276,
      "grad_norm": 0.7114242911338806,
      "learning_rate": 6.217984620757366e-06,
      "loss": 0.0266,
      "step": 1167940
    },
    {
      "epoch": 1.9113921564776812,
      "grad_norm": 0.41429224610328674,
      "learning_rate": 6.217918728543848e-06,
      "loss": 0.0177,
      "step": 1167960
    },
    {
      "epoch": 1.9114248869163344,
      "grad_norm": 0.5765770077705383,
      "learning_rate": 6.217852836330332e-06,
      "loss": 0.0221,
      "step": 1167980
    },
    {
      "epoch": 1.9114576173549878,
      "grad_norm": 0.46609824895858765,
      "learning_rate": 6.2177869441168135e-06,
      "loss": 0.0126,
      "step": 1168000
    },
    {
      "epoch": 1.9114903477936411,
      "grad_norm": 0.8137971758842468,
      "learning_rate": 6.217721051903297e-06,
      "loss": 0.0137,
      "step": 1168020
    },
    {
      "epoch": 1.9115230782322945,
      "grad_norm": 0.8071662783622742,
      "learning_rate": 6.217655159689781e-06,
      "loss": 0.0158,
      "step": 1168040
    },
    {
      "epoch": 1.9115558086709479,
      "grad_norm": 0.5200061798095703,
      "learning_rate": 6.217589267476263e-06,
      "loss": 0.0197,
      "step": 1168060
    },
    {
      "epoch": 1.911588539109601,
      "grad_norm": 0.5947363972663879,
      "learning_rate": 6.217523375262746e-06,
      "loss": 0.0207,
      "step": 1168080
    },
    {
      "epoch": 1.9116212695482546,
      "grad_norm": 0.31641727685928345,
      "learning_rate": 6.217457483049228e-06,
      "loss": 0.0201,
      "step": 1168100
    },
    {
      "epoch": 1.9116539999869078,
      "grad_norm": 0.05783914029598236,
      "learning_rate": 6.217391590835712e-06,
      "loss": 0.016,
      "step": 1168120
    },
    {
      "epoch": 1.9116867304255611,
      "grad_norm": 0.106133833527565,
      "learning_rate": 6.217325698622194e-06,
      "loss": 0.0148,
      "step": 1168140
    },
    {
      "epoch": 1.9117194608642145,
      "grad_norm": 0.4824451208114624,
      "learning_rate": 6.217259806408677e-06,
      "loss": 0.0142,
      "step": 1168160
    },
    {
      "epoch": 1.9117521913028679,
      "grad_norm": 0.6428446769714355,
      "learning_rate": 6.21719391419516e-06,
      "loss": 0.0116,
      "step": 1168180
    },
    {
      "epoch": 1.9117849217415213,
      "grad_norm": 0.11631190776824951,
      "learning_rate": 6.2171280219816435e-06,
      "loss": 0.0144,
      "step": 1168200
    },
    {
      "epoch": 1.9118176521801744,
      "grad_norm": 1.3118549585342407,
      "learning_rate": 6.217062129768125e-06,
      "loss": 0.0164,
      "step": 1168220
    },
    {
      "epoch": 1.911850382618828,
      "grad_norm": 0.8617379069328308,
      "learning_rate": 6.216996237554609e-06,
      "loss": 0.0215,
      "step": 1168240
    },
    {
      "epoch": 1.9118831130574812,
      "grad_norm": 0.3099927604198456,
      "learning_rate": 6.216930345341091e-06,
      "loss": 0.0123,
      "step": 1168260
    },
    {
      "epoch": 1.9119158434961345,
      "grad_norm": 0.4673231542110443,
      "learning_rate": 6.2168644531275744e-06,
      "loss": 0.0165,
      "step": 1168280
    },
    {
      "epoch": 1.911948573934788,
      "grad_norm": 0.9345015287399292,
      "learning_rate": 6.216798560914057e-06,
      "loss": 0.0143,
      "step": 1168300
    },
    {
      "epoch": 1.911981304373441,
      "grad_norm": 0.26692262291908264,
      "learning_rate": 6.21673266870054e-06,
      "loss": 0.0166,
      "step": 1168320
    },
    {
      "epoch": 1.9120140348120946,
      "grad_norm": 1.1594343185424805,
      "learning_rate": 6.216666776487023e-06,
      "loss": 0.0223,
      "step": 1168340
    },
    {
      "epoch": 1.9120467652507478,
      "grad_norm": 0.36972010135650635,
      "learning_rate": 6.216600884273506e-06,
      "loss": 0.0135,
      "step": 1168360
    },
    {
      "epoch": 1.9120794956894014,
      "grad_norm": 0.16438910365104675,
      "learning_rate": 6.216534992059988e-06,
      "loss": 0.0171,
      "step": 1168380
    },
    {
      "epoch": 1.9121122261280545,
      "grad_norm": 0.3515860438346863,
      "learning_rate": 6.216469099846472e-06,
      "loss": 0.0127,
      "step": 1168400
    },
    {
      "epoch": 1.912144956566708,
      "grad_norm": 0.4386546313762665,
      "learning_rate": 6.216403207632955e-06,
      "loss": 0.0124,
      "step": 1168420
    },
    {
      "epoch": 1.9121776870053613,
      "grad_norm": 0.5450066328048706,
      "learning_rate": 6.216337315419437e-06,
      "loss": 0.0272,
      "step": 1168440
    },
    {
      "epoch": 1.9122104174440144,
      "grad_norm": 0.3062523305416107,
      "learning_rate": 6.216271423205921e-06,
      "loss": 0.0124,
      "step": 1168460
    },
    {
      "epoch": 1.912243147882668,
      "grad_norm": 0.30681997537612915,
      "learning_rate": 6.216205530992403e-06,
      "loss": 0.0136,
      "step": 1168480
    },
    {
      "epoch": 1.9122758783213212,
      "grad_norm": 0.6103343963623047,
      "learning_rate": 6.216139638778886e-06,
      "loss": 0.0234,
      "step": 1168500
    },
    {
      "epoch": 1.9123086087599745,
      "grad_norm": 0.7761383056640625,
      "learning_rate": 6.216073746565369e-06,
      "loss": 0.0184,
      "step": 1168520
    },
    {
      "epoch": 1.912341339198628,
      "grad_norm": 0.6977013945579529,
      "learning_rate": 6.216007854351852e-06,
      "loss": 0.0216,
      "step": 1168540
    },
    {
      "epoch": 1.9123740696372813,
      "grad_norm": 0.33494827151298523,
      "learning_rate": 6.2159419621383345e-06,
      "loss": 0.0137,
      "step": 1168560
    },
    {
      "epoch": 1.9124068000759347,
      "grad_norm": 0.32780930399894714,
      "learning_rate": 6.215876069924818e-06,
      "loss": 0.0119,
      "step": 1168580
    },
    {
      "epoch": 1.9124395305145878,
      "grad_norm": 1.3564835786819458,
      "learning_rate": 6.2158101777113e-06,
      "loss": 0.0212,
      "step": 1168600
    },
    {
      "epoch": 1.9124722609532414,
      "grad_norm": 1.3307455778121948,
      "learning_rate": 6.2157442854977836e-06,
      "loss": 0.0198,
      "step": 1168620
    },
    {
      "epoch": 1.9125049913918946,
      "grad_norm": 0.22412370145320892,
      "learning_rate": 6.2156783932842654e-06,
      "loss": 0.0234,
      "step": 1168640
    },
    {
      "epoch": 1.912537721830548,
      "grad_norm": 0.4488433301448822,
      "learning_rate": 6.215612501070749e-06,
      "loss": 0.0156,
      "step": 1168660
    },
    {
      "epoch": 1.9125704522692013,
      "grad_norm": 0.236540749669075,
      "learning_rate": 6.215546608857232e-06,
      "loss": 0.0171,
      "step": 1168680
    },
    {
      "epoch": 1.9126031827078547,
      "grad_norm": 0.4965631365776062,
      "learning_rate": 6.2154807166437145e-06,
      "loss": 0.0186,
      "step": 1168700
    },
    {
      "epoch": 1.912635913146508,
      "grad_norm": 0.8672568202018738,
      "learning_rate": 6.215414824430197e-06,
      "loss": 0.0268,
      "step": 1168720
    },
    {
      "epoch": 1.9126686435851612,
      "grad_norm": 0.8855812549591064,
      "learning_rate": 6.215348932216681e-06,
      "loss": 0.0164,
      "step": 1168740
    },
    {
      "epoch": 1.9127013740238148,
      "grad_norm": 1.2073028087615967,
      "learning_rate": 6.215283040003164e-06,
      "loss": 0.0151,
      "step": 1168760
    },
    {
      "epoch": 1.912734104462468,
      "grad_norm": 0.14839762449264526,
      "learning_rate": 6.215217147789646e-06,
      "loss": 0.0102,
      "step": 1168780
    },
    {
      "epoch": 1.9127668349011213,
      "grad_norm": 0.22066499292850494,
      "learning_rate": 6.21515125557613e-06,
      "loss": 0.0208,
      "step": 1168800
    },
    {
      "epoch": 1.9127995653397747,
      "grad_norm": 0.42219245433807373,
      "learning_rate": 6.215085363362612e-06,
      "loss": 0.015,
      "step": 1168820
    },
    {
      "epoch": 1.912832295778428,
      "grad_norm": 0.3565543293952942,
      "learning_rate": 6.215019471149095e-06,
      "loss": 0.0268,
      "step": 1168840
    },
    {
      "epoch": 1.9128650262170814,
      "grad_norm": 0.8825567960739136,
      "learning_rate": 6.214953578935577e-06,
      "loss": 0.0179,
      "step": 1168860
    },
    {
      "epoch": 1.9128977566557346,
      "grad_norm": 0.17363159358501434,
      "learning_rate": 6.214887686722061e-06,
      "loss": 0.0141,
      "step": 1168880
    },
    {
      "epoch": 1.9129304870943882,
      "grad_norm": 0.8189854621887207,
      "learning_rate": 6.214821794508543e-06,
      "loss": 0.0206,
      "step": 1168900
    },
    {
      "epoch": 1.9129632175330413,
      "grad_norm": 0.7483174800872803,
      "learning_rate": 6.214755902295026e-06,
      "loss": 0.0169,
      "step": 1168920
    },
    {
      "epoch": 1.9129959479716947,
      "grad_norm": 0.7346873879432678,
      "learning_rate": 6.214690010081509e-06,
      "loss": 0.0209,
      "step": 1168940
    },
    {
      "epoch": 1.913028678410348,
      "grad_norm": 1.2362711429595947,
      "learning_rate": 6.214624117867993e-06,
      "loss": 0.0147,
      "step": 1168960
    },
    {
      "epoch": 1.9130614088490014,
      "grad_norm": 0.4692712724208832,
      "learning_rate": 6.2145582256544746e-06,
      "loss": 0.0135,
      "step": 1168980
    },
    {
      "epoch": 1.9130941392876548,
      "grad_norm": 0.7391197681427002,
      "learning_rate": 6.214492333440958e-06,
      "loss": 0.0125,
      "step": 1169000
    },
    {
      "epoch": 1.913126869726308,
      "grad_norm": 0.613580048084259,
      "learning_rate": 6.21442644122744e-06,
      "loss": 0.0158,
      "step": 1169020
    },
    {
      "epoch": 1.9131596001649616,
      "grad_norm": 1.456422209739685,
      "learning_rate": 6.214360549013924e-06,
      "loss": 0.015,
      "step": 1169040
    },
    {
      "epoch": 1.9131923306036147,
      "grad_norm": 1.631118893623352,
      "learning_rate": 6.2142946568004055e-06,
      "loss": 0.0165,
      "step": 1169060
    },
    {
      "epoch": 1.913225061042268,
      "grad_norm": 0.21698947250843048,
      "learning_rate": 6.214228764586889e-06,
      "loss": 0.0186,
      "step": 1169080
    },
    {
      "epoch": 1.9132577914809215,
      "grad_norm": 0.1545814424753189,
      "learning_rate": 6.214162872373373e-06,
      "loss": 0.0152,
      "step": 1169100
    },
    {
      "epoch": 1.9132905219195746,
      "grad_norm": 0.9859092831611633,
      "learning_rate": 6.214096980159855e-06,
      "loss": 0.0135,
      "step": 1169120
    },
    {
      "epoch": 1.9133232523582282,
      "grad_norm": 0.5229673981666565,
      "learning_rate": 6.214031087946338e-06,
      "loss": 0.016,
      "step": 1169140
    },
    {
      "epoch": 1.9133559827968813,
      "grad_norm": 0.4884006679058075,
      "learning_rate": 6.213965195732821e-06,
      "loss": 0.0198,
      "step": 1169160
    },
    {
      "epoch": 1.913388713235535,
      "grad_norm": 0.33286789059638977,
      "learning_rate": 6.213899303519304e-06,
      "loss": 0.0168,
      "step": 1169180
    },
    {
      "epoch": 1.913421443674188,
      "grad_norm": 0.5779086947441101,
      "learning_rate": 6.213833411305786e-06,
      "loss": 0.017,
      "step": 1169200
    },
    {
      "epoch": 1.9134541741128415,
      "grad_norm": 0.8493179678916931,
      "learning_rate": 6.21376751909227e-06,
      "loss": 0.0199,
      "step": 1169220
    },
    {
      "epoch": 1.9134869045514948,
      "grad_norm": 1.0633010864257812,
      "learning_rate": 6.213701626878752e-06,
      "loss": 0.0229,
      "step": 1169240
    },
    {
      "epoch": 1.913519634990148,
      "grad_norm": 0.20464499294757843,
      "learning_rate": 6.2136357346652355e-06,
      "loss": 0.018,
      "step": 1169260
    },
    {
      "epoch": 1.9135523654288016,
      "grad_norm": 0.5999630093574524,
      "learning_rate": 6.213569842451717e-06,
      "loss": 0.0152,
      "step": 1169280
    },
    {
      "epoch": 1.9135850958674547,
      "grad_norm": 0.7853408455848694,
      "learning_rate": 6.213503950238201e-06,
      "loss": 0.0191,
      "step": 1169300
    },
    {
      "epoch": 1.913617826306108,
      "grad_norm": 0.45927688479423523,
      "learning_rate": 6.213438058024684e-06,
      "loss": 0.02,
      "step": 1169320
    },
    {
      "epoch": 1.9136505567447615,
      "grad_norm": 0.30983439087867737,
      "learning_rate": 6.213372165811166e-06,
      "loss": 0.0177,
      "step": 1169340
    },
    {
      "epoch": 1.9136832871834148,
      "grad_norm": 0.8485368490219116,
      "learning_rate": 6.213306273597649e-06,
      "loss": 0.0155,
      "step": 1169360
    },
    {
      "epoch": 1.9137160176220682,
      "grad_norm": 0.5758755207061768,
      "learning_rate": 6.213240381384133e-06,
      "loss": 0.0135,
      "step": 1169380
    },
    {
      "epoch": 1.9137487480607214,
      "grad_norm": 2.5213675498962402,
      "learning_rate": 6.213174489170615e-06,
      "loss": 0.0214,
      "step": 1169400
    },
    {
      "epoch": 1.913781478499375,
      "grad_norm": 0.15567058324813843,
      "learning_rate": 6.213108596957098e-06,
      "loss": 0.0135,
      "step": 1169420
    },
    {
      "epoch": 1.9138142089380281,
      "grad_norm": 0.7058715224266052,
      "learning_rate": 6.213042704743582e-06,
      "loss": 0.0189,
      "step": 1169440
    },
    {
      "epoch": 1.9138469393766815,
      "grad_norm": 1.0369961261749268,
      "learning_rate": 6.212976812530064e-06,
      "loss": 0.0199,
      "step": 1169460
    },
    {
      "epoch": 1.9138796698153349,
      "grad_norm": 0.1729850023984909,
      "learning_rate": 6.212910920316547e-06,
      "loss": 0.0155,
      "step": 1169480
    },
    {
      "epoch": 1.9139124002539882,
      "grad_norm": 0.3622948229312897,
      "learning_rate": 6.212845028103029e-06,
      "loss": 0.0154,
      "step": 1169500
    },
    {
      "epoch": 1.9139451306926416,
      "grad_norm": 0.20129939913749695,
      "learning_rate": 6.212779135889513e-06,
      "loss": 0.0152,
      "step": 1169520
    },
    {
      "epoch": 1.9139778611312948,
      "grad_norm": 0.4358789622783661,
      "learning_rate": 6.2127132436759955e-06,
      "loss": 0.0165,
      "step": 1169540
    },
    {
      "epoch": 1.9140105915699483,
      "grad_norm": 0.6556224226951599,
      "learning_rate": 6.212647351462478e-06,
      "loss": 0.0195,
      "step": 1169560
    },
    {
      "epoch": 1.9140433220086015,
      "grad_norm": 0.5704535841941833,
      "learning_rate": 6.212581459248961e-06,
      "loss": 0.0124,
      "step": 1169580
    },
    {
      "epoch": 1.9140760524472549,
      "grad_norm": 0.22317613661289215,
      "learning_rate": 6.2125155670354446e-06,
      "loss": 0.0276,
      "step": 1169600
    },
    {
      "epoch": 1.9141087828859082,
      "grad_norm": 0.3502785265445709,
      "learning_rate": 6.2124496748219265e-06,
      "loss": 0.0179,
      "step": 1169620
    },
    {
      "epoch": 1.9141415133245616,
      "grad_norm": 0.3777785003185272,
      "learning_rate": 6.21238378260841e-06,
      "loss": 0.0142,
      "step": 1169640
    },
    {
      "epoch": 1.914174243763215,
      "grad_norm": 0.19627556204795837,
      "learning_rate": 6.212317890394892e-06,
      "loss": 0.0178,
      "step": 1169660
    },
    {
      "epoch": 1.9142069742018681,
      "grad_norm": 1.3125388622283936,
      "learning_rate": 6.2122519981813755e-06,
      "loss": 0.0197,
      "step": 1169680
    },
    {
      "epoch": 1.9142397046405217,
      "grad_norm": 0.2735789716243744,
      "learning_rate": 6.212186105967858e-06,
      "loss": 0.0138,
      "step": 1169700
    },
    {
      "epoch": 1.9142724350791749,
      "grad_norm": 4.659542083740234,
      "learning_rate": 6.212120213754341e-06,
      "loss": 0.0186,
      "step": 1169720
    },
    {
      "epoch": 1.9143051655178283,
      "grad_norm": 0.2693977355957031,
      "learning_rate": 6.212054321540824e-06,
      "loss": 0.0176,
      "step": 1169740
    },
    {
      "epoch": 1.9143378959564816,
      "grad_norm": 0.1566847711801529,
      "learning_rate": 6.211988429327307e-06,
      "loss": 0.017,
      "step": 1169760
    },
    {
      "epoch": 1.914370626395135,
      "grad_norm": 1.3771313428878784,
      "learning_rate": 6.211922537113789e-06,
      "loss": 0.0217,
      "step": 1169780
    },
    {
      "epoch": 1.9144033568337884,
      "grad_norm": 1.2207387685775757,
      "learning_rate": 6.211856644900273e-06,
      "loss": 0.0247,
      "step": 1169800
    },
    {
      "epoch": 1.9144360872724415,
      "grad_norm": 0.31960365176200867,
      "learning_rate": 6.211790752686756e-06,
      "loss": 0.0139,
      "step": 1169820
    },
    {
      "epoch": 1.9144688177110951,
      "grad_norm": 0.3357732594013214,
      "learning_rate": 6.211724860473238e-06,
      "loss": 0.0209,
      "step": 1169840
    },
    {
      "epoch": 1.9145015481497483,
      "grad_norm": 0.05934036523103714,
      "learning_rate": 6.211658968259722e-06,
      "loss": 0.0157,
      "step": 1169860
    },
    {
      "epoch": 1.9145342785884016,
      "grad_norm": 0.3265909254550934,
      "learning_rate": 6.211593076046204e-06,
      "loss": 0.0151,
      "step": 1169880
    },
    {
      "epoch": 1.914567009027055,
      "grad_norm": 0.3757561445236206,
      "learning_rate": 6.211527183832687e-06,
      "loss": 0.0221,
      "step": 1169900
    },
    {
      "epoch": 1.9145997394657082,
      "grad_norm": 0.22841496765613556,
      "learning_rate": 6.211461291619169e-06,
      "loss": 0.0187,
      "step": 1169920
    },
    {
      "epoch": 1.9146324699043618,
      "grad_norm": 0.1770954430103302,
      "learning_rate": 6.211395399405653e-06,
      "loss": 0.0132,
      "step": 1169940
    },
    {
      "epoch": 1.914665200343015,
      "grad_norm": 0.22768811881542206,
      "learning_rate": 6.211329507192136e-06,
      "loss": 0.0137,
      "step": 1169960
    },
    {
      "epoch": 1.9146979307816685,
      "grad_norm": 1.0744903087615967,
      "learning_rate": 6.211263614978618e-06,
      "loss": 0.0144,
      "step": 1169980
    },
    {
      "epoch": 1.9147306612203217,
      "grad_norm": 0.6928357481956482,
      "learning_rate": 6.211197722765101e-06,
      "loss": 0.0237,
      "step": 1170000
    },
    {
      "epoch": 1.914763391658975,
      "grad_norm": 0.3719543218612671,
      "learning_rate": 6.211131830551585e-06,
      "loss": 0.0181,
      "step": 1170020
    },
    {
      "epoch": 1.9147961220976284,
      "grad_norm": 0.5226643681526184,
      "learning_rate": 6.2110659383380665e-06,
      "loss": 0.0157,
      "step": 1170040
    },
    {
      "epoch": 1.9148288525362815,
      "grad_norm": 0.21154071390628815,
      "learning_rate": 6.21100004612455e-06,
      "loss": 0.0269,
      "step": 1170060
    },
    {
      "epoch": 1.9148615829749351,
      "grad_norm": 0.3007914423942566,
      "learning_rate": 6.210934153911032e-06,
      "loss": 0.0174,
      "step": 1170080
    },
    {
      "epoch": 1.9148943134135883,
      "grad_norm": 1.251392126083374,
      "learning_rate": 6.210868261697516e-06,
      "loss": 0.0104,
      "step": 1170100
    },
    {
      "epoch": 1.9149270438522417,
      "grad_norm": 0.8807639479637146,
      "learning_rate": 6.210802369483998e-06,
      "loss": 0.0174,
      "step": 1170120
    },
    {
      "epoch": 1.914959774290895,
      "grad_norm": 0.291281521320343,
      "learning_rate": 6.210736477270481e-06,
      "loss": 0.0173,
      "step": 1170140
    },
    {
      "epoch": 1.9149925047295484,
      "grad_norm": 0.291109174489975,
      "learning_rate": 6.210670585056965e-06,
      "loss": 0.0256,
      "step": 1170160
    },
    {
      "epoch": 1.9150252351682018,
      "grad_norm": 1.0385417938232422,
      "learning_rate": 6.210604692843447e-06,
      "loss": 0.0201,
      "step": 1170180
    },
    {
      "epoch": 1.915057965606855,
      "grad_norm": 0.32121267914772034,
      "learning_rate": 6.21053880062993e-06,
      "loss": 0.0194,
      "step": 1170200
    },
    {
      "epoch": 1.9150906960455085,
      "grad_norm": 2.660337448120117,
      "learning_rate": 6.210472908416413e-06,
      "loss": 0.0186,
      "step": 1170220
    },
    {
      "epoch": 1.9151234264841617,
      "grad_norm": 1.1367579698562622,
      "learning_rate": 6.2104070162028965e-06,
      "loss": 0.0193,
      "step": 1170240
    },
    {
      "epoch": 1.915156156922815,
      "grad_norm": 0.5568230748176575,
      "learning_rate": 6.210341123989378e-06,
      "loss": 0.0215,
      "step": 1170260
    },
    {
      "epoch": 1.9151888873614684,
      "grad_norm": 0.2188839316368103,
      "learning_rate": 6.210275231775862e-06,
      "loss": 0.0228,
      "step": 1170280
    },
    {
      "epoch": 1.9152216178001218,
      "grad_norm": 0.32484567165374756,
      "learning_rate": 6.210209339562344e-06,
      "loss": 0.0181,
      "step": 1170300
    },
    {
      "epoch": 1.9152543482387752,
      "grad_norm": 0.2709622383117676,
      "learning_rate": 6.2101434473488274e-06,
      "loss": 0.0131,
      "step": 1170320
    },
    {
      "epoch": 1.9152870786774283,
      "grad_norm": 0.4175589978694916,
      "learning_rate": 6.21007755513531e-06,
      "loss": 0.0241,
      "step": 1170340
    },
    {
      "epoch": 1.915319809116082,
      "grad_norm": 0.4199846684932709,
      "learning_rate": 6.210011662921793e-06,
      "loss": 0.0143,
      "step": 1170360
    },
    {
      "epoch": 1.915352539554735,
      "grad_norm": 0.08532427251338959,
      "learning_rate": 6.209945770708276e-06,
      "loss": 0.024,
      "step": 1170380
    },
    {
      "epoch": 1.9153852699933884,
      "grad_norm": 0.43398305773735046,
      "learning_rate": 6.209879878494759e-06,
      "loss": 0.0139,
      "step": 1170400
    },
    {
      "epoch": 1.9154180004320418,
      "grad_norm": 1.8950519561767578,
      "learning_rate": 6.209813986281241e-06,
      "loss": 0.0163,
      "step": 1170420
    },
    {
      "epoch": 1.9154507308706952,
      "grad_norm": 0.1183951273560524,
      "learning_rate": 6.209748094067725e-06,
      "loss": 0.018,
      "step": 1170440
    },
    {
      "epoch": 1.9154834613093485,
      "grad_norm": 0.9567676782608032,
      "learning_rate": 6.209682201854207e-06,
      "loss": 0.0228,
      "step": 1170460
    },
    {
      "epoch": 1.9155161917480017,
      "grad_norm": 0.908344566822052,
      "learning_rate": 6.20961630964069e-06,
      "loss": 0.0152,
      "step": 1170480
    },
    {
      "epoch": 1.9155489221866553,
      "grad_norm": 0.3873794674873352,
      "learning_rate": 6.209550417427174e-06,
      "loss": 0.0186,
      "step": 1170500
    },
    {
      "epoch": 1.9155816526253084,
      "grad_norm": 0.25078824162483215,
      "learning_rate": 6.209484525213656e-06,
      "loss": 0.0139,
      "step": 1170520
    },
    {
      "epoch": 1.9156143830639618,
      "grad_norm": 0.4398415684700012,
      "learning_rate": 6.209418633000139e-06,
      "loss": 0.0237,
      "step": 1170540
    },
    {
      "epoch": 1.9156471135026152,
      "grad_norm": 0.28067803382873535,
      "learning_rate": 6.209352740786622e-06,
      "loss": 0.0112,
      "step": 1170560
    },
    {
      "epoch": 1.9156798439412683,
      "grad_norm": 0.5299820303916931,
      "learning_rate": 6.209286848573105e-06,
      "loss": 0.0136,
      "step": 1170580
    },
    {
      "epoch": 1.915712574379922,
      "grad_norm": 0.454074501991272,
      "learning_rate": 6.2092209563595875e-06,
      "loss": 0.0233,
      "step": 1170600
    },
    {
      "epoch": 1.915745304818575,
      "grad_norm": 0.3205726444721222,
      "learning_rate": 6.209155064146071e-06,
      "loss": 0.0126,
      "step": 1170620
    },
    {
      "epoch": 1.9157780352572287,
      "grad_norm": 0.33859509229660034,
      "learning_rate": 6.209089171932553e-06,
      "loss": 0.0233,
      "step": 1170640
    },
    {
      "epoch": 1.9158107656958818,
      "grad_norm": 1.6689529418945312,
      "learning_rate": 6.2090232797190366e-06,
      "loss": 0.0215,
      "step": 1170660
    },
    {
      "epoch": 1.9158434961345352,
      "grad_norm": 3.0355143547058105,
      "learning_rate": 6.2089573875055185e-06,
      "loss": 0.0203,
      "step": 1170680
    },
    {
      "epoch": 1.9158762265731886,
      "grad_norm": 0.4452727735042572,
      "learning_rate": 6.208891495292002e-06,
      "loss": 0.0209,
      "step": 1170700
    },
    {
      "epoch": 1.9159089570118417,
      "grad_norm": 0.6632367372512817,
      "learning_rate": 6.208825603078485e-06,
      "loss": 0.0156,
      "step": 1170720
    },
    {
      "epoch": 1.9159416874504953,
      "grad_norm": 0.19141167402267456,
      "learning_rate": 6.2087597108649675e-06,
      "loss": 0.0209,
      "step": 1170740
    },
    {
      "epoch": 1.9159744178891485,
      "grad_norm": 0.23163051903247833,
      "learning_rate": 6.20869381865145e-06,
      "loss": 0.0209,
      "step": 1170760
    },
    {
      "epoch": 1.9160071483278018,
      "grad_norm": 0.20768120884895325,
      "learning_rate": 6.208627926437934e-06,
      "loss": 0.0164,
      "step": 1170780
    },
    {
      "epoch": 1.9160398787664552,
      "grad_norm": 0.30388370156288147,
      "learning_rate": 6.208562034224416e-06,
      "loss": 0.0167,
      "step": 1170800
    },
    {
      "epoch": 1.9160726092051086,
      "grad_norm": 2.2886013984680176,
      "learning_rate": 6.208496142010899e-06,
      "loss": 0.0191,
      "step": 1170820
    },
    {
      "epoch": 1.916105339643762,
      "grad_norm": 0.6671006679534912,
      "learning_rate": 6.208430249797381e-06,
      "loss": 0.0146,
      "step": 1170840
    },
    {
      "epoch": 1.916138070082415,
      "grad_norm": 0.5682473182678223,
      "learning_rate": 6.208364357583865e-06,
      "loss": 0.0146,
      "step": 1170860
    },
    {
      "epoch": 1.9161708005210687,
      "grad_norm": 0.5771946310997009,
      "learning_rate": 6.208298465370348e-06,
      "loss": 0.0182,
      "step": 1170880
    },
    {
      "epoch": 1.9162035309597218,
      "grad_norm": 0.05556125193834305,
      "learning_rate": 6.20823257315683e-06,
      "loss": 0.0158,
      "step": 1170900
    },
    {
      "epoch": 1.9162362613983752,
      "grad_norm": 0.33480069041252136,
      "learning_rate": 6.208166680943314e-06,
      "loss": 0.0213,
      "step": 1170920
    },
    {
      "epoch": 1.9162689918370286,
      "grad_norm": 1.2271738052368164,
      "learning_rate": 6.208100788729796e-06,
      "loss": 0.0171,
      "step": 1170940
    },
    {
      "epoch": 1.916301722275682,
      "grad_norm": 0.2663578689098358,
      "learning_rate": 6.208034896516279e-06,
      "loss": 0.0197,
      "step": 1170960
    },
    {
      "epoch": 1.9163344527143353,
      "grad_norm": 0.554434597492218,
      "learning_rate": 6.207969004302762e-06,
      "loss": 0.0167,
      "step": 1170980
    },
    {
      "epoch": 1.9163671831529885,
      "grad_norm": 0.9399669766426086,
      "learning_rate": 6.207903112089245e-06,
      "loss": 0.0254,
      "step": 1171000
    },
    {
      "epoch": 1.916399913591642,
      "grad_norm": 0.20316021144390106,
      "learning_rate": 6.2078372198757276e-06,
      "loss": 0.0265,
      "step": 1171020
    },
    {
      "epoch": 1.9164326440302952,
      "grad_norm": 1.100721836090088,
      "learning_rate": 6.207771327662211e-06,
      "loss": 0.0208,
      "step": 1171040
    },
    {
      "epoch": 1.9164653744689486,
      "grad_norm": 0.3458475172519684,
      "learning_rate": 6.207705435448693e-06,
      "loss": 0.0179,
      "step": 1171060
    },
    {
      "epoch": 1.916498104907602,
      "grad_norm": 0.41284841299057007,
      "learning_rate": 6.207639543235177e-06,
      "loss": 0.0147,
      "step": 1171080
    },
    {
      "epoch": 1.9165308353462553,
      "grad_norm": 0.30076539516448975,
      "learning_rate": 6.2075736510216585e-06,
      "loss": 0.0246,
      "step": 1171100
    },
    {
      "epoch": 1.9165635657849087,
      "grad_norm": 0.6816052198410034,
      "learning_rate": 6.207507758808142e-06,
      "loss": 0.0188,
      "step": 1171120
    },
    {
      "epoch": 1.9165962962235619,
      "grad_norm": 0.7795361876487732,
      "learning_rate": 6.207441866594625e-06,
      "loss": 0.0131,
      "step": 1171140
    },
    {
      "epoch": 1.9166290266622155,
      "grad_norm": 0.6881524920463562,
      "learning_rate": 6.207375974381108e-06,
      "loss": 0.019,
      "step": 1171160
    },
    {
      "epoch": 1.9166617571008686,
      "grad_norm": 0.42870238423347473,
      "learning_rate": 6.20731008216759e-06,
      "loss": 0.0219,
      "step": 1171180
    },
    {
      "epoch": 1.916694487539522,
      "grad_norm": 0.398480623960495,
      "learning_rate": 6.207244189954074e-06,
      "loss": 0.0185,
      "step": 1171200
    },
    {
      "epoch": 1.9167272179781754,
      "grad_norm": 0.49890267848968506,
      "learning_rate": 6.207178297740557e-06,
      "loss": 0.0209,
      "step": 1171220
    },
    {
      "epoch": 1.9167599484168287,
      "grad_norm": 0.21129503846168518,
      "learning_rate": 6.207112405527039e-06,
      "loss": 0.0208,
      "step": 1171240
    },
    {
      "epoch": 1.916792678855482,
      "grad_norm": 0.8866633176803589,
      "learning_rate": 6.207046513313523e-06,
      "loss": 0.0173,
      "step": 1171260
    },
    {
      "epoch": 1.9168254092941353,
      "grad_norm": 0.7367944717407227,
      "learning_rate": 6.206980621100005e-06,
      "loss": 0.0189,
      "step": 1171280
    },
    {
      "epoch": 1.9168581397327888,
      "grad_norm": 0.7834066152572632,
      "learning_rate": 6.2069147288864885e-06,
      "loss": 0.0139,
      "step": 1171300
    },
    {
      "epoch": 1.916890870171442,
      "grad_norm": 0.5150384306907654,
      "learning_rate": 6.20684883667297e-06,
      "loss": 0.0135,
      "step": 1171320
    },
    {
      "epoch": 1.9169236006100954,
      "grad_norm": 0.11026352643966675,
      "learning_rate": 6.206782944459454e-06,
      "loss": 0.0182,
      "step": 1171340
    },
    {
      "epoch": 1.9169563310487487,
      "grad_norm": 0.11727358400821686,
      "learning_rate": 6.206717052245937e-06,
      "loss": 0.0138,
      "step": 1171360
    },
    {
      "epoch": 1.916989061487402,
      "grad_norm": 0.4158204197883606,
      "learning_rate": 6.2066511600324194e-06,
      "loss": 0.0111,
      "step": 1171380
    },
    {
      "epoch": 1.9170217919260555,
      "grad_norm": 0.17242185771465302,
      "learning_rate": 6.206585267818902e-06,
      "loss": 0.02,
      "step": 1171400
    },
    {
      "epoch": 1.9170545223647086,
      "grad_norm": 0.422028511762619,
      "learning_rate": 6.206519375605386e-06,
      "loss": 0.0135,
      "step": 1171420
    },
    {
      "epoch": 1.9170872528033622,
      "grad_norm": 0.9841755032539368,
      "learning_rate": 6.206453483391868e-06,
      "loss": 0.0278,
      "step": 1171440
    },
    {
      "epoch": 1.9171199832420154,
      "grad_norm": 0.4547162652015686,
      "learning_rate": 6.206387591178351e-06,
      "loss": 0.0253,
      "step": 1171460
    },
    {
      "epoch": 1.9171527136806688,
      "grad_norm": 0.8242934942245483,
      "learning_rate": 6.206321698964833e-06,
      "loss": 0.0208,
      "step": 1171480
    },
    {
      "epoch": 1.9171854441193221,
      "grad_norm": 0.09882485866546631,
      "learning_rate": 6.206255806751317e-06,
      "loss": 0.0155,
      "step": 1171500
    },
    {
      "epoch": 1.9172181745579753,
      "grad_norm": 0.5669483542442322,
      "learning_rate": 6.2061899145377995e-06,
      "loss": 0.013,
      "step": 1171520
    },
    {
      "epoch": 1.9172509049966289,
      "grad_norm": 1.1267788410186768,
      "learning_rate": 6.206124022324282e-06,
      "loss": 0.018,
      "step": 1171540
    },
    {
      "epoch": 1.917283635435282,
      "grad_norm": 0.5545942187309265,
      "learning_rate": 6.206058130110766e-06,
      "loss": 0.0168,
      "step": 1171560
    },
    {
      "epoch": 1.9173163658739354,
      "grad_norm": 0.9252411127090454,
      "learning_rate": 6.2059922378972485e-06,
      "loss": 0.0196,
      "step": 1171580
    },
    {
      "epoch": 1.9173490963125888,
      "grad_norm": 0.1477295458316803,
      "learning_rate": 6.205926345683731e-06,
      "loss": 0.016,
      "step": 1171600
    },
    {
      "epoch": 1.9173818267512421,
      "grad_norm": 0.3580765724182129,
      "learning_rate": 6.205860453470214e-06,
      "loss": 0.0195,
      "step": 1171620
    },
    {
      "epoch": 1.9174145571898955,
      "grad_norm": 0.39704036712646484,
      "learning_rate": 6.205794561256698e-06,
      "loss": 0.0235,
      "step": 1171640
    },
    {
      "epoch": 1.9174472876285487,
      "grad_norm": 0.7421284317970276,
      "learning_rate": 6.2057286690431795e-06,
      "loss": 0.0141,
      "step": 1171660
    },
    {
      "epoch": 1.9174800180672023,
      "grad_norm": 0.8270124197006226,
      "learning_rate": 6.205662776829663e-06,
      "loss": 0.0347,
      "step": 1171680
    },
    {
      "epoch": 1.9175127485058554,
      "grad_norm": 0.3048393726348877,
      "learning_rate": 6.205596884616145e-06,
      "loss": 0.0192,
      "step": 1171700
    },
    {
      "epoch": 1.9175454789445088,
      "grad_norm": 0.5829189419746399,
      "learning_rate": 6.2055309924026285e-06,
      "loss": 0.0153,
      "step": 1171720
    },
    {
      "epoch": 1.9175782093831621,
      "grad_norm": 1.3560312986373901,
      "learning_rate": 6.2054651001891104e-06,
      "loss": 0.026,
      "step": 1171740
    },
    {
      "epoch": 1.9176109398218155,
      "grad_norm": 0.25006818771362305,
      "learning_rate": 6.205399207975594e-06,
      "loss": 0.0154,
      "step": 1171760
    },
    {
      "epoch": 1.917643670260469,
      "grad_norm": 5.5402750968933105,
      "learning_rate": 6.205333315762077e-06,
      "loss": 0.0173,
      "step": 1171780
    },
    {
      "epoch": 1.917676400699122,
      "grad_norm": 0.7232057452201843,
      "learning_rate": 6.20526742354856e-06,
      "loss": 0.0223,
      "step": 1171800
    },
    {
      "epoch": 1.9177091311377756,
      "grad_norm": 0.43608835339546204,
      "learning_rate": 6.205201531335042e-06,
      "loss": 0.0176,
      "step": 1171820
    },
    {
      "epoch": 1.9177418615764288,
      "grad_norm": 0.5054790377616882,
      "learning_rate": 6.205135639121526e-06,
      "loss": 0.0175,
      "step": 1171840
    },
    {
      "epoch": 1.9177745920150822,
      "grad_norm": 0.5209124684333801,
      "learning_rate": 6.205069746908008e-06,
      "loss": 0.0171,
      "step": 1171860
    },
    {
      "epoch": 1.9178073224537355,
      "grad_norm": 0.273455411195755,
      "learning_rate": 6.205003854694491e-06,
      "loss": 0.0182,
      "step": 1171880
    },
    {
      "epoch": 1.917840052892389,
      "grad_norm": 0.3900625705718994,
      "learning_rate": 6.204937962480975e-06,
      "loss": 0.0173,
      "step": 1171900
    },
    {
      "epoch": 1.9178727833310423,
      "grad_norm": 0.2060225009918213,
      "learning_rate": 6.204872070267457e-06,
      "loss": 0.0165,
      "step": 1171920
    },
    {
      "epoch": 1.9179055137696954,
      "grad_norm": 0.4154377281665802,
      "learning_rate": 6.20480617805394e-06,
      "loss": 0.0208,
      "step": 1171940
    },
    {
      "epoch": 1.917938244208349,
      "grad_norm": 0.18024590611457825,
      "learning_rate": 6.204740285840422e-06,
      "loss": 0.0199,
      "step": 1171960
    },
    {
      "epoch": 1.9179709746470022,
      "grad_norm": 0.8060873746871948,
      "learning_rate": 6.204674393626906e-06,
      "loss": 0.0164,
      "step": 1171980
    },
    {
      "epoch": 1.9180037050856555,
      "grad_norm": 0.36419591307640076,
      "learning_rate": 6.204608501413389e-06,
      "loss": 0.0136,
      "step": 1172000
    },
    {
      "epoch": 1.918036435524309,
      "grad_norm": 0.1212707981467247,
      "learning_rate": 6.204542609199871e-06,
      "loss": 0.0129,
      "step": 1172020
    },
    {
      "epoch": 1.9180691659629623,
      "grad_norm": 0.30411839485168457,
      "learning_rate": 6.204476716986354e-06,
      "loss": 0.0145,
      "step": 1172040
    },
    {
      "epoch": 1.9181018964016157,
      "grad_norm": 0.7983999848365784,
      "learning_rate": 6.204410824772838e-06,
      "loss": 0.012,
      "step": 1172060
    },
    {
      "epoch": 1.9181346268402688,
      "grad_norm": 1.408235788345337,
      "learning_rate": 6.2043449325593196e-06,
      "loss": 0.0179,
      "step": 1172080
    },
    {
      "epoch": 1.9181673572789224,
      "grad_norm": 0.38863351941108704,
      "learning_rate": 6.204279040345803e-06,
      "loss": 0.0134,
      "step": 1172100
    },
    {
      "epoch": 1.9182000877175756,
      "grad_norm": 0.8641635775566101,
      "learning_rate": 6.204213148132285e-06,
      "loss": 0.0238,
      "step": 1172120
    },
    {
      "epoch": 1.918232818156229,
      "grad_norm": 0.2371530383825302,
      "learning_rate": 6.204147255918769e-06,
      "loss": 0.0191,
      "step": 1172140
    },
    {
      "epoch": 1.9182655485948823,
      "grad_norm": 0.1908724159002304,
      "learning_rate": 6.204081363705251e-06,
      "loss": 0.0141,
      "step": 1172160
    },
    {
      "epoch": 1.9182982790335354,
      "grad_norm": 0.7892295122146606,
      "learning_rate": 6.204015471491734e-06,
      "loss": 0.0161,
      "step": 1172180
    },
    {
      "epoch": 1.918331009472189,
      "grad_norm": 1.2261155843734741,
      "learning_rate": 6.203949579278217e-06,
      "loss": 0.021,
      "step": 1172200
    },
    {
      "epoch": 1.9183637399108422,
      "grad_norm": 0.7545529007911682,
      "learning_rate": 6.2038836870647e-06,
      "loss": 0.0195,
      "step": 1172220
    },
    {
      "epoch": 1.9183964703494958,
      "grad_norm": 0.5338444113731384,
      "learning_rate": 6.203817794851182e-06,
      "loss": 0.0262,
      "step": 1172240
    },
    {
      "epoch": 1.918429200788149,
      "grad_norm": 0.30127909779548645,
      "learning_rate": 6.203751902637666e-06,
      "loss": 0.0095,
      "step": 1172260
    },
    {
      "epoch": 1.9184619312268023,
      "grad_norm": 0.39795804023742676,
      "learning_rate": 6.2036860104241495e-06,
      "loss": 0.0195,
      "step": 1172280
    },
    {
      "epoch": 1.9184946616654557,
      "grad_norm": 0.6692842841148376,
      "learning_rate": 6.203620118210631e-06,
      "loss": 0.0165,
      "step": 1172300
    },
    {
      "epoch": 1.9185273921041088,
      "grad_norm": 0.8268699049949646,
      "learning_rate": 6.203554225997115e-06,
      "loss": 0.0186,
      "step": 1172320
    },
    {
      "epoch": 1.9185601225427624,
      "grad_norm": 0.10929805785417557,
      "learning_rate": 6.203488333783597e-06,
      "loss": 0.0101,
      "step": 1172340
    },
    {
      "epoch": 1.9185928529814156,
      "grad_norm": 0.5582870841026306,
      "learning_rate": 6.2034224415700804e-06,
      "loss": 0.0195,
      "step": 1172360
    },
    {
      "epoch": 1.918625583420069,
      "grad_norm": 0.5715454816818237,
      "learning_rate": 6.203356549356563e-06,
      "loss": 0.0193,
      "step": 1172380
    },
    {
      "epoch": 1.9186583138587223,
      "grad_norm": 0.20365504920482635,
      "learning_rate": 6.203290657143046e-06,
      "loss": 0.0164,
      "step": 1172400
    },
    {
      "epoch": 1.9186910442973757,
      "grad_norm": 0.530270516872406,
      "learning_rate": 6.203224764929529e-06,
      "loss": 0.0177,
      "step": 1172420
    },
    {
      "epoch": 1.918723774736029,
      "grad_norm": 0.3462257981300354,
      "learning_rate": 6.203158872716012e-06,
      "loss": 0.0203,
      "step": 1172440
    },
    {
      "epoch": 1.9187565051746822,
      "grad_norm": 1.3449677228927612,
      "learning_rate": 6.203092980502494e-06,
      "loss": 0.0246,
      "step": 1172460
    },
    {
      "epoch": 1.9187892356133358,
      "grad_norm": 0.30863332748413086,
      "learning_rate": 6.203027088288978e-06,
      "loss": 0.02,
      "step": 1172480
    },
    {
      "epoch": 1.918821966051989,
      "grad_norm": 0.23051336407661438,
      "learning_rate": 6.20296119607546e-06,
      "loss": 0.014,
      "step": 1172500
    },
    {
      "epoch": 1.9188546964906423,
      "grad_norm": 1.3414254188537598,
      "learning_rate": 6.202895303861943e-06,
      "loss": 0.0168,
      "step": 1172520
    },
    {
      "epoch": 1.9188874269292957,
      "grad_norm": 0.547179639339447,
      "learning_rate": 6.202829411648426e-06,
      "loss": 0.0192,
      "step": 1172540
    },
    {
      "epoch": 1.918920157367949,
      "grad_norm": 0.1734132468700409,
      "learning_rate": 6.202763519434909e-06,
      "loss": 0.0266,
      "step": 1172560
    },
    {
      "epoch": 1.9189528878066024,
      "grad_norm": 0.6978381872177124,
      "learning_rate": 6.2026976272213914e-06,
      "loss": 0.0157,
      "step": 1172580
    },
    {
      "epoch": 1.9189856182452556,
      "grad_norm": 0.4380435049533844,
      "learning_rate": 6.202631735007875e-06,
      "loss": 0.0184,
      "step": 1172600
    },
    {
      "epoch": 1.9190183486839092,
      "grad_norm": 0.1275489628314972,
      "learning_rate": 6.202565842794358e-06,
      "loss": 0.0158,
      "step": 1172620
    },
    {
      "epoch": 1.9190510791225623,
      "grad_norm": 0.21508967876434326,
      "learning_rate": 6.2024999505808405e-06,
      "loss": 0.022,
      "step": 1172640
    },
    {
      "epoch": 1.9190838095612157,
      "grad_norm": 0.9623483419418335,
      "learning_rate": 6.202434058367324e-06,
      "loss": 0.0157,
      "step": 1172660
    },
    {
      "epoch": 1.919116539999869,
      "grad_norm": 0.250944048166275,
      "learning_rate": 6.202368166153806e-06,
      "loss": 0.0168,
      "step": 1172680
    },
    {
      "epoch": 1.9191492704385225,
      "grad_norm": 0.30868807435035706,
      "learning_rate": 6.2023022739402896e-06,
      "loss": 0.0211,
      "step": 1172700
    },
    {
      "epoch": 1.9191820008771758,
      "grad_norm": 0.8886248469352722,
      "learning_rate": 6.2022363817267715e-06,
      "loss": 0.0171,
      "step": 1172720
    },
    {
      "epoch": 1.919214731315829,
      "grad_norm": 0.35203129053115845,
      "learning_rate": 6.202170489513255e-06,
      "loss": 0.0248,
      "step": 1172740
    },
    {
      "epoch": 1.9192474617544826,
      "grad_norm": 0.22969123721122742,
      "learning_rate": 6.202104597299737e-06,
      "loss": 0.0165,
      "step": 1172760
    },
    {
      "epoch": 1.9192801921931357,
      "grad_norm": 0.21604235470294952,
      "learning_rate": 6.2020387050862205e-06,
      "loss": 0.0162,
      "step": 1172780
    },
    {
      "epoch": 1.919312922631789,
      "grad_norm": 0.5752337574958801,
      "learning_rate": 6.201972812872703e-06,
      "loss": 0.0233,
      "step": 1172800
    },
    {
      "epoch": 1.9193456530704425,
      "grad_norm": 0.526721715927124,
      "learning_rate": 6.201906920659186e-06,
      "loss": 0.0247,
      "step": 1172820
    },
    {
      "epoch": 1.9193783835090958,
      "grad_norm": 0.27192622423171997,
      "learning_rate": 6.201841028445669e-06,
      "loss": 0.0169,
      "step": 1172840
    },
    {
      "epoch": 1.9194111139477492,
      "grad_norm": 0.808947741985321,
      "learning_rate": 6.201775136232152e-06,
      "loss": 0.0135,
      "step": 1172860
    },
    {
      "epoch": 1.9194438443864024,
      "grad_norm": 0.08088278025388718,
      "learning_rate": 6.201709244018634e-06,
      "loss": 0.0173,
      "step": 1172880
    },
    {
      "epoch": 1.919476574825056,
      "grad_norm": 0.10485924035310745,
      "learning_rate": 6.201643351805118e-06,
      "loss": 0.0159,
      "step": 1172900
    },
    {
      "epoch": 1.919509305263709,
      "grad_norm": 0.4698185324668884,
      "learning_rate": 6.2015774595916e-06,
      "loss": 0.0184,
      "step": 1172920
    },
    {
      "epoch": 1.9195420357023625,
      "grad_norm": 0.20814408361911774,
      "learning_rate": 6.201511567378083e-06,
      "loss": 0.0195,
      "step": 1172940
    },
    {
      "epoch": 1.9195747661410159,
      "grad_norm": 0.2824134826660156,
      "learning_rate": 6.201445675164567e-06,
      "loss": 0.0144,
      "step": 1172960
    },
    {
      "epoch": 1.919607496579669,
      "grad_norm": 3.5348544120788574,
      "learning_rate": 6.201379782951049e-06,
      "loss": 0.0221,
      "step": 1172980
    },
    {
      "epoch": 1.9196402270183226,
      "grad_norm": 0.5248426795005798,
      "learning_rate": 6.201313890737532e-06,
      "loss": 0.0141,
      "step": 1173000
    },
    {
      "epoch": 1.9196729574569757,
      "grad_norm": 0.7310869097709656,
      "learning_rate": 6.201247998524015e-06,
      "loss": 0.019,
      "step": 1173020
    },
    {
      "epoch": 1.9197056878956293,
      "grad_norm": 0.6612933278083801,
      "learning_rate": 6.201182106310498e-06,
      "loss": 0.0196,
      "step": 1173040
    },
    {
      "epoch": 1.9197384183342825,
      "grad_norm": 0.5745638012886047,
      "learning_rate": 6.2011162140969806e-06,
      "loss": 0.019,
      "step": 1173060
    },
    {
      "epoch": 1.9197711487729359,
      "grad_norm": 0.2296898365020752,
      "learning_rate": 6.201050321883464e-06,
      "loss": 0.0093,
      "step": 1173080
    },
    {
      "epoch": 1.9198038792115892,
      "grad_norm": 1.2052230834960938,
      "learning_rate": 6.200984429669946e-06,
      "loss": 0.0202,
      "step": 1173100
    },
    {
      "epoch": 1.9198366096502424,
      "grad_norm": 0.5204453468322754,
      "learning_rate": 6.20091853745643e-06,
      "loss": 0.0158,
      "step": 1173120
    },
    {
      "epoch": 1.919869340088896,
      "grad_norm": 0.12912006676197052,
      "learning_rate": 6.2008526452429115e-06,
      "loss": 0.0188,
      "step": 1173140
    },
    {
      "epoch": 1.9199020705275491,
      "grad_norm": 0.35828617215156555,
      "learning_rate": 6.200786753029395e-06,
      "loss": 0.0151,
      "step": 1173160
    },
    {
      "epoch": 1.9199348009662025,
      "grad_norm": 1.1103429794311523,
      "learning_rate": 6.200720860815878e-06,
      "loss": 0.0177,
      "step": 1173180
    },
    {
      "epoch": 1.9199675314048559,
      "grad_norm": 0.7052242755889893,
      "learning_rate": 6.200654968602361e-06,
      "loss": 0.0194,
      "step": 1173200
    },
    {
      "epoch": 1.9200002618435092,
      "grad_norm": 1.3399986028671265,
      "learning_rate": 6.200589076388843e-06,
      "loss": 0.0164,
      "step": 1173220
    },
    {
      "epoch": 1.9200329922821626,
      "grad_norm": 0.18357859551906586,
      "learning_rate": 6.200523184175327e-06,
      "loss": 0.0181,
      "step": 1173240
    },
    {
      "epoch": 1.9200657227208158,
      "grad_norm": 0.9692514538764954,
      "learning_rate": 6.200457291961809e-06,
      "loss": 0.014,
      "step": 1173260
    },
    {
      "epoch": 1.9200984531594694,
      "grad_norm": 1.2207940816879272,
      "learning_rate": 6.200391399748292e-06,
      "loss": 0.0198,
      "step": 1173280
    },
    {
      "epoch": 1.9201311835981225,
      "grad_norm": 0.16673238575458527,
      "learning_rate": 6.200325507534774e-06,
      "loss": 0.0115,
      "step": 1173300
    },
    {
      "epoch": 1.9201639140367759,
      "grad_norm": 0.5209946632385254,
      "learning_rate": 6.200259615321258e-06,
      "loss": 0.0149,
      "step": 1173320
    },
    {
      "epoch": 1.9201966444754293,
      "grad_norm": 0.7774590849876404,
      "learning_rate": 6.2001937231077415e-06,
      "loss": 0.0168,
      "step": 1173340
    },
    {
      "epoch": 1.9202293749140826,
      "grad_norm": 0.3417460322380066,
      "learning_rate": 6.200127830894223e-06,
      "loss": 0.0153,
      "step": 1173360
    },
    {
      "epoch": 1.920262105352736,
      "grad_norm": 0.17883910238742828,
      "learning_rate": 6.200061938680707e-06,
      "loss": 0.0122,
      "step": 1173380
    },
    {
      "epoch": 1.9202948357913892,
      "grad_norm": 0.4981323480606079,
      "learning_rate": 6.19999604646719e-06,
      "loss": 0.0191,
      "step": 1173400
    },
    {
      "epoch": 1.9203275662300427,
      "grad_norm": 0.3215441405773163,
      "learning_rate": 6.1999301542536724e-06,
      "loss": 0.0227,
      "step": 1173420
    },
    {
      "epoch": 1.920360296668696,
      "grad_norm": 0.47387203574180603,
      "learning_rate": 6.199864262040155e-06,
      "loss": 0.0194,
      "step": 1173440
    },
    {
      "epoch": 1.9203930271073493,
      "grad_norm": 0.21500267088413239,
      "learning_rate": 6.199798369826639e-06,
      "loss": 0.0142,
      "step": 1173460
    },
    {
      "epoch": 1.9204257575460026,
      "grad_norm": 0.5264351963996887,
      "learning_rate": 6.199732477613121e-06,
      "loss": 0.0153,
      "step": 1173480
    },
    {
      "epoch": 1.920458487984656,
      "grad_norm": 0.886906087398529,
      "learning_rate": 6.199666585399604e-06,
      "loss": 0.0269,
      "step": 1173500
    },
    {
      "epoch": 1.9204912184233094,
      "grad_norm": 3.1111793518066406,
      "learning_rate": 6.199600693186086e-06,
      "loss": 0.0161,
      "step": 1173520
    },
    {
      "epoch": 1.9205239488619625,
      "grad_norm": 0.34867221117019653,
      "learning_rate": 6.19953480097257e-06,
      "loss": 0.0134,
      "step": 1173540
    },
    {
      "epoch": 1.9205566793006161,
      "grad_norm": 0.21900007128715515,
      "learning_rate": 6.1994689087590525e-06,
      "loss": 0.016,
      "step": 1173560
    },
    {
      "epoch": 1.9205894097392693,
      "grad_norm": 0.06632498651742935,
      "learning_rate": 6.199403016545535e-06,
      "loss": 0.0315,
      "step": 1173580
    },
    {
      "epoch": 1.9206221401779227,
      "grad_norm": 0.47418755292892456,
      "learning_rate": 6.199337124332018e-06,
      "loss": 0.0196,
      "step": 1173600
    },
    {
      "epoch": 1.920654870616576,
      "grad_norm": 0.4646722674369812,
      "learning_rate": 6.1992712321185015e-06,
      "loss": 0.0172,
      "step": 1173620
    },
    {
      "epoch": 1.9206876010552292,
      "grad_norm": 0.6121295690536499,
      "learning_rate": 6.199205339904983e-06,
      "loss": 0.0153,
      "step": 1173640
    },
    {
      "epoch": 1.9207203314938828,
      "grad_norm": 0.8956001400947571,
      "learning_rate": 6.199139447691467e-06,
      "loss": 0.0165,
      "step": 1173660
    },
    {
      "epoch": 1.920753061932536,
      "grad_norm": 0.34286943078041077,
      "learning_rate": 6.199073555477951e-06,
      "loss": 0.0162,
      "step": 1173680
    },
    {
      "epoch": 1.9207857923711895,
      "grad_norm": 0.3175290822982788,
      "learning_rate": 6.1990076632644325e-06,
      "loss": 0.0172,
      "step": 1173700
    },
    {
      "epoch": 1.9208185228098427,
      "grad_norm": 0.6327998042106628,
      "learning_rate": 6.198941771050916e-06,
      "loss": 0.0179,
      "step": 1173720
    },
    {
      "epoch": 1.920851253248496,
      "grad_norm": 0.4167935848236084,
      "learning_rate": 6.198875878837398e-06,
      "loss": 0.0224,
      "step": 1173740
    },
    {
      "epoch": 1.9208839836871494,
      "grad_norm": 1.437276005744934,
      "learning_rate": 6.1988099866238815e-06,
      "loss": 0.0151,
      "step": 1173760
    },
    {
      "epoch": 1.9209167141258026,
      "grad_norm": 0.5802576541900635,
      "learning_rate": 6.1987440944103634e-06,
      "loss": 0.0144,
      "step": 1173780
    },
    {
      "epoch": 1.9209494445644562,
      "grad_norm": 0.4079318642616272,
      "learning_rate": 6.198678202196847e-06,
      "loss": 0.0156,
      "step": 1173800
    },
    {
      "epoch": 1.9209821750031093,
      "grad_norm": 1.4770147800445557,
      "learning_rate": 6.19861230998333e-06,
      "loss": 0.0184,
      "step": 1173820
    },
    {
      "epoch": 1.9210149054417627,
      "grad_norm": 0.2893565595149994,
      "learning_rate": 6.1985464177698125e-06,
      "loss": 0.0237,
      "step": 1173840
    },
    {
      "epoch": 1.921047635880416,
      "grad_norm": 0.7094515562057495,
      "learning_rate": 6.198480525556295e-06,
      "loss": 0.0195,
      "step": 1173860
    },
    {
      "epoch": 1.9210803663190694,
      "grad_norm": 0.13411788642406464,
      "learning_rate": 6.198414633342779e-06,
      "loss": 0.019,
      "step": 1173880
    },
    {
      "epoch": 1.9211130967577228,
      "grad_norm": 0.6231836080551147,
      "learning_rate": 6.198348741129261e-06,
      "loss": 0.0215,
      "step": 1173900
    },
    {
      "epoch": 1.921145827196376,
      "grad_norm": 0.5665280818939209,
      "learning_rate": 6.198282848915744e-06,
      "loss": 0.0152,
      "step": 1173920
    },
    {
      "epoch": 1.9211785576350295,
      "grad_norm": 0.9120964407920837,
      "learning_rate": 6.198216956702226e-06,
      "loss": 0.0161,
      "step": 1173940
    },
    {
      "epoch": 1.9212112880736827,
      "grad_norm": 1.0935673713684082,
      "learning_rate": 6.19815106448871e-06,
      "loss": 0.0131,
      "step": 1173960
    },
    {
      "epoch": 1.921244018512336,
      "grad_norm": 0.4475114643573761,
      "learning_rate": 6.1980851722751925e-06,
      "loss": 0.0152,
      "step": 1173980
    },
    {
      "epoch": 1.9212767489509894,
      "grad_norm": 0.26930713653564453,
      "learning_rate": 6.198019280061675e-06,
      "loss": 0.0118,
      "step": 1174000
    },
    {
      "epoch": 1.9213094793896428,
      "grad_norm": 0.7585636377334595,
      "learning_rate": 6.197953387848159e-06,
      "loss": 0.0167,
      "step": 1174020
    },
    {
      "epoch": 1.9213422098282962,
      "grad_norm": 1.1045994758605957,
      "learning_rate": 6.197887495634642e-06,
      "loss": 0.0169,
      "step": 1174040
    },
    {
      "epoch": 1.9213749402669493,
      "grad_norm": 0.2974470257759094,
      "learning_rate": 6.197821603421124e-06,
      "loss": 0.0185,
      "step": 1174060
    },
    {
      "epoch": 1.921407670705603,
      "grad_norm": 0.6412075757980347,
      "learning_rate": 6.197755711207607e-06,
      "loss": 0.0171,
      "step": 1174080
    },
    {
      "epoch": 1.921440401144256,
      "grad_norm": 0.1410323679447174,
      "learning_rate": 6.197689818994091e-06,
      "loss": 0.0224,
      "step": 1174100
    },
    {
      "epoch": 1.9214731315829094,
      "grad_norm": 1.1289260387420654,
      "learning_rate": 6.1976239267805726e-06,
      "loss": 0.0177,
      "step": 1174120
    },
    {
      "epoch": 1.9215058620215628,
      "grad_norm": 0.6584008932113647,
      "learning_rate": 6.197558034567056e-06,
      "loss": 0.0162,
      "step": 1174140
    },
    {
      "epoch": 1.9215385924602162,
      "grad_norm": 0.22900225222110748,
      "learning_rate": 6.197492142353538e-06,
      "loss": 0.0147,
      "step": 1174160
    },
    {
      "epoch": 1.9215713228988696,
      "grad_norm": 0.4630782902240753,
      "learning_rate": 6.197426250140022e-06,
      "loss": 0.0167,
      "step": 1174180
    },
    {
      "epoch": 1.9216040533375227,
      "grad_norm": 0.12958727777004242,
      "learning_rate": 6.197360357926504e-06,
      "loss": 0.0159,
      "step": 1174200
    },
    {
      "epoch": 1.9216367837761763,
      "grad_norm": 0.27277880907058716,
      "learning_rate": 6.197294465712987e-06,
      "loss": 0.026,
      "step": 1174220
    },
    {
      "epoch": 1.9216695142148295,
      "grad_norm": 0.4115653336048126,
      "learning_rate": 6.19722857349947e-06,
      "loss": 0.023,
      "step": 1174240
    },
    {
      "epoch": 1.9217022446534828,
      "grad_norm": 1.985809087753296,
      "learning_rate": 6.1971626812859534e-06,
      "loss": 0.0181,
      "step": 1174260
    },
    {
      "epoch": 1.9217349750921362,
      "grad_norm": 0.3799986243247986,
      "learning_rate": 6.197096789072435e-06,
      "loss": 0.0214,
      "step": 1174280
    },
    {
      "epoch": 1.9217677055307896,
      "grad_norm": 0.7407832741737366,
      "learning_rate": 6.197030896858919e-06,
      "loss": 0.0177,
      "step": 1174300
    },
    {
      "epoch": 1.921800435969443,
      "grad_norm": NaN,
      "learning_rate": 6.196965004645401e-06,
      "loss": 0.0206,
      "step": 1174320
    },
    {
      "epoch": 1.921833166408096,
      "grad_norm": 0.7797203660011292,
      "learning_rate": 6.196899112431884e-06,
      "loss": 0.0187,
      "step": 1174340
    },
    {
      "epoch": 1.9218658968467497,
      "grad_norm": 0.43864625692367554,
      "learning_rate": 6.196833220218367e-06,
      "loss": 0.0219,
      "step": 1174360
    },
    {
      "epoch": 1.9218986272854028,
      "grad_norm": 0.07033918797969818,
      "learning_rate": 6.19676732800485e-06,
      "loss": 0.0172,
      "step": 1174380
    },
    {
      "epoch": 1.9219313577240562,
      "grad_norm": 0.07647446542978287,
      "learning_rate": 6.1967014357913335e-06,
      "loss": 0.0101,
      "step": 1174400
    },
    {
      "epoch": 1.9219640881627096,
      "grad_norm": 2.026405096054077,
      "learning_rate": 6.196635543577816e-06,
      "loss": 0.0217,
      "step": 1174420
    },
    {
      "epoch": 1.9219968186013627,
      "grad_norm": 0.926337718963623,
      "learning_rate": 6.196569651364299e-06,
      "loss": 0.0146,
      "step": 1174440
    },
    {
      "epoch": 1.9220295490400163,
      "grad_norm": 1.1027382612228394,
      "learning_rate": 6.196503759150782e-06,
      "loss": 0.0203,
      "step": 1174460
    },
    {
      "epoch": 1.9220622794786695,
      "grad_norm": 0.1502033919095993,
      "learning_rate": 6.196437866937265e-06,
      "loss": 0.0153,
      "step": 1174480
    },
    {
      "epoch": 1.922095009917323,
      "grad_norm": 0.3343471586704254,
      "learning_rate": 6.196371974723747e-06,
      "loss": 0.0224,
      "step": 1174500
    },
    {
      "epoch": 1.9221277403559762,
      "grad_norm": 1.318940281867981,
      "learning_rate": 6.196306082510231e-06,
      "loss": 0.0191,
      "step": 1174520
    },
    {
      "epoch": 1.9221604707946296,
      "grad_norm": 0.6861463189125061,
      "learning_rate": 6.196240190296713e-06,
      "loss": 0.0165,
      "step": 1174540
    },
    {
      "epoch": 1.922193201233283,
      "grad_norm": 0.7591241002082825,
      "learning_rate": 6.196174298083196e-06,
      "loss": 0.0127,
      "step": 1174560
    },
    {
      "epoch": 1.9222259316719361,
      "grad_norm": 0.2647910416126251,
      "learning_rate": 6.196108405869679e-06,
      "loss": 0.0182,
      "step": 1174580
    },
    {
      "epoch": 1.9222586621105897,
      "grad_norm": 0.47078344225883484,
      "learning_rate": 6.196042513656162e-06,
      "loss": 0.0159,
      "step": 1174600
    },
    {
      "epoch": 1.9222913925492429,
      "grad_norm": 0.5922239422798157,
      "learning_rate": 6.1959766214426444e-06,
      "loss": 0.0116,
      "step": 1174620
    },
    {
      "epoch": 1.9223241229878962,
      "grad_norm": 0.338610976934433,
      "learning_rate": 6.195910729229128e-06,
      "loss": 0.0247,
      "step": 1174640
    },
    {
      "epoch": 1.9223568534265496,
      "grad_norm": 0.18806488811969757,
      "learning_rate": 6.19584483701561e-06,
      "loss": 0.0188,
      "step": 1174660
    },
    {
      "epoch": 1.922389583865203,
      "grad_norm": 0.19770418107509613,
      "learning_rate": 6.1957789448020935e-06,
      "loss": 0.0253,
      "step": 1174680
    },
    {
      "epoch": 1.9224223143038563,
      "grad_norm": 0.5628039836883545,
      "learning_rate": 6.195713052588575e-06,
      "loss": 0.0208,
      "step": 1174700
    },
    {
      "epoch": 1.9224550447425095,
      "grad_norm": 0.8757843375205994,
      "learning_rate": 6.195647160375059e-06,
      "loss": 0.0174,
      "step": 1174720
    },
    {
      "epoch": 1.922487775181163,
      "grad_norm": 0.35568082332611084,
      "learning_rate": 6.1955812681615426e-06,
      "loss": 0.019,
      "step": 1174740
    },
    {
      "epoch": 1.9225205056198162,
      "grad_norm": 0.3899941146373749,
      "learning_rate": 6.1955153759480245e-06,
      "loss": 0.0101,
      "step": 1174760
    },
    {
      "epoch": 1.9225532360584696,
      "grad_norm": 0.8012840747833252,
      "learning_rate": 6.195449483734508e-06,
      "loss": 0.0228,
      "step": 1174780
    },
    {
      "epoch": 1.922585966497123,
      "grad_norm": 1.4272191524505615,
      "learning_rate": 6.19538359152099e-06,
      "loss": 0.0184,
      "step": 1174800
    },
    {
      "epoch": 1.9226186969357764,
      "grad_norm": 0.587421715259552,
      "learning_rate": 6.1953176993074735e-06,
      "loss": 0.0151,
      "step": 1174820
    },
    {
      "epoch": 1.9226514273744297,
      "grad_norm": 0.19243791699409485,
      "learning_rate": 6.195251807093956e-06,
      "loss": 0.018,
      "step": 1174840
    },
    {
      "epoch": 1.9226841578130829,
      "grad_norm": 0.3906590938568115,
      "learning_rate": 6.195185914880439e-06,
      "loss": 0.0131,
      "step": 1174860
    },
    {
      "epoch": 1.9227168882517365,
      "grad_norm": 0.23280926048755646,
      "learning_rate": 6.195120022666922e-06,
      "loss": 0.0209,
      "step": 1174880
    },
    {
      "epoch": 1.9227496186903896,
      "grad_norm": 0.11339014023542404,
      "learning_rate": 6.195054130453405e-06,
      "loss": 0.0186,
      "step": 1174900
    },
    {
      "epoch": 1.922782349129043,
      "grad_norm": 0.5772890448570251,
      "learning_rate": 6.194988238239887e-06,
      "loss": 0.0182,
      "step": 1174920
    },
    {
      "epoch": 1.9228150795676964,
      "grad_norm": 0.6267529129981995,
      "learning_rate": 6.194922346026371e-06,
      "loss": 0.0169,
      "step": 1174940
    },
    {
      "epoch": 1.9228478100063497,
      "grad_norm": 11.832441329956055,
      "learning_rate": 6.194856453812853e-06,
      "loss": 0.0235,
      "step": 1174960
    },
    {
      "epoch": 1.9228805404450031,
      "grad_norm": 0.5805454254150391,
      "learning_rate": 6.194790561599336e-06,
      "loss": 0.02,
      "step": 1174980
    },
    {
      "epoch": 1.9229132708836563,
      "grad_norm": 0.4596659243106842,
      "learning_rate": 6.194724669385819e-06,
      "loss": 0.0125,
      "step": 1175000
    },
    {
      "epoch": 1.9229460013223099,
      "grad_norm": 0.11135595291852951,
      "learning_rate": 6.194658777172302e-06,
      "loss": 0.0193,
      "step": 1175020
    },
    {
      "epoch": 1.922978731760963,
      "grad_norm": 0.24998115003108978,
      "learning_rate": 6.1945928849587845e-06,
      "loss": 0.0158,
      "step": 1175040
    },
    {
      "epoch": 1.9230114621996164,
      "grad_norm": 0.5184581875801086,
      "learning_rate": 6.194526992745268e-06,
      "loss": 0.0196,
      "step": 1175060
    },
    {
      "epoch": 1.9230441926382698,
      "grad_norm": 0.5358587503433228,
      "learning_rate": 6.194461100531751e-06,
      "loss": 0.0163,
      "step": 1175080
    },
    {
      "epoch": 1.9230769230769231,
      "grad_norm": 0.5642287135124207,
      "learning_rate": 6.194395208318234e-06,
      "loss": 0.0133,
      "step": 1175100
    },
    {
      "epoch": 1.9231096535155765,
      "grad_norm": 0.21833425760269165,
      "learning_rate": 6.194329316104717e-06,
      "loss": 0.0103,
      "step": 1175120
    },
    {
      "epoch": 1.9231423839542297,
      "grad_norm": 0.38676533102989197,
      "learning_rate": 6.194263423891199e-06,
      "loss": 0.0187,
      "step": 1175140
    },
    {
      "epoch": 1.9231751143928832,
      "grad_norm": 0.5756140351295471,
      "learning_rate": 6.194197531677683e-06,
      "loss": 0.0128,
      "step": 1175160
    },
    {
      "epoch": 1.9232078448315364,
      "grad_norm": 0.19145402312278748,
      "learning_rate": 6.1941316394641645e-06,
      "loss": 0.017,
      "step": 1175180
    },
    {
      "epoch": 1.9232405752701898,
      "grad_norm": 1.4958308935165405,
      "learning_rate": 6.194065747250648e-06,
      "loss": 0.0151,
      "step": 1175200
    },
    {
      "epoch": 1.9232733057088431,
      "grad_norm": 0.7049445509910583,
      "learning_rate": 6.193999855037131e-06,
      "loss": 0.0231,
      "step": 1175220
    },
    {
      "epoch": 1.9233060361474963,
      "grad_norm": 0.3254826068878174,
      "learning_rate": 6.193933962823614e-06,
      "loss": 0.0158,
      "step": 1175240
    },
    {
      "epoch": 1.9233387665861499,
      "grad_norm": 0.7130314111709595,
      "learning_rate": 6.193868070610096e-06,
      "loss": 0.0135,
      "step": 1175260
    },
    {
      "epoch": 1.923371497024803,
      "grad_norm": 0.1580880880355835,
      "learning_rate": 6.19380217839658e-06,
      "loss": 0.0238,
      "step": 1175280
    },
    {
      "epoch": 1.9234042274634566,
      "grad_norm": 0.06748313456773758,
      "learning_rate": 6.193736286183062e-06,
      "loss": 0.0181,
      "step": 1175300
    },
    {
      "epoch": 1.9234369579021098,
      "grad_norm": 0.7474624514579773,
      "learning_rate": 6.193670393969545e-06,
      "loss": 0.0169,
      "step": 1175320
    },
    {
      "epoch": 1.9234696883407632,
      "grad_norm": 0.16854876279830933,
      "learning_rate": 6.193604501756027e-06,
      "loss": 0.0188,
      "step": 1175340
    },
    {
      "epoch": 1.9235024187794165,
      "grad_norm": 0.6436309218406677,
      "learning_rate": 6.193538609542511e-06,
      "loss": 0.0202,
      "step": 1175360
    },
    {
      "epoch": 1.9235351492180697,
      "grad_norm": 0.20134933292865753,
      "learning_rate": 6.193472717328994e-06,
      "loss": 0.0159,
      "step": 1175380
    },
    {
      "epoch": 1.9235678796567233,
      "grad_norm": 0.08626328408718109,
      "learning_rate": 6.193406825115476e-06,
      "loss": 0.0164,
      "step": 1175400
    },
    {
      "epoch": 1.9236006100953764,
      "grad_norm": 0.49856919050216675,
      "learning_rate": 6.19334093290196e-06,
      "loss": 0.0205,
      "step": 1175420
    },
    {
      "epoch": 1.9236333405340298,
      "grad_norm": 0.5382188558578491,
      "learning_rate": 6.193275040688443e-06,
      "loss": 0.013,
      "step": 1175440
    },
    {
      "epoch": 1.9236660709726832,
      "grad_norm": 0.53914475440979,
      "learning_rate": 6.1932091484749254e-06,
      "loss": 0.0152,
      "step": 1175460
    },
    {
      "epoch": 1.9236988014113365,
      "grad_norm": 0.2371441125869751,
      "learning_rate": 6.193143256261408e-06,
      "loss": 0.0158,
      "step": 1175480
    },
    {
      "epoch": 1.92373153184999,
      "grad_norm": 0.7994690537452698,
      "learning_rate": 6.193077364047892e-06,
      "loss": 0.0168,
      "step": 1175500
    },
    {
      "epoch": 1.923764262288643,
      "grad_norm": 0.5277066826820374,
      "learning_rate": 6.193011471834374e-06,
      "loss": 0.0157,
      "step": 1175520
    },
    {
      "epoch": 1.9237969927272967,
      "grad_norm": 0.4954223334789276,
      "learning_rate": 6.192945579620857e-06,
      "loss": 0.0204,
      "step": 1175540
    },
    {
      "epoch": 1.9238297231659498,
      "grad_norm": 0.10214978456497192,
      "learning_rate": 6.192879687407339e-06,
      "loss": 0.0174,
      "step": 1175560
    },
    {
      "epoch": 1.9238624536046032,
      "grad_norm": 0.17216230928897858,
      "learning_rate": 6.192813795193823e-06,
      "loss": 0.0197,
      "step": 1175580
    },
    {
      "epoch": 1.9238951840432565,
      "grad_norm": 0.8742263913154602,
      "learning_rate": 6.192747902980305e-06,
      "loss": 0.0227,
      "step": 1175600
    },
    {
      "epoch": 1.92392791448191,
      "grad_norm": 0.657849907875061,
      "learning_rate": 6.192682010766788e-06,
      "loss": 0.0259,
      "step": 1175620
    },
    {
      "epoch": 1.9239606449205633,
      "grad_norm": 0.659954309463501,
      "learning_rate": 6.192616118553271e-06,
      "loss": 0.0142,
      "step": 1175640
    },
    {
      "epoch": 1.9239933753592164,
      "grad_norm": 0.47597557306289673,
      "learning_rate": 6.1925502263397545e-06,
      "loss": 0.0186,
      "step": 1175660
    },
    {
      "epoch": 1.92402610579787,
      "grad_norm": 0.5265977382659912,
      "learning_rate": 6.192484334126236e-06,
      "loss": 0.024,
      "step": 1175680
    },
    {
      "epoch": 1.9240588362365232,
      "grad_norm": 0.6545737385749817,
      "learning_rate": 6.19241844191272e-06,
      "loss": 0.0217,
      "step": 1175700
    },
    {
      "epoch": 1.9240915666751766,
      "grad_norm": 0.7290865182876587,
      "learning_rate": 6.192352549699202e-06,
      "loss": 0.0318,
      "step": 1175720
    },
    {
      "epoch": 1.92412429711383,
      "grad_norm": 0.4396509826183319,
      "learning_rate": 6.1922866574856855e-06,
      "loss": 0.0189,
      "step": 1175740
    },
    {
      "epoch": 1.9241570275524833,
      "grad_norm": 0.8475087285041809,
      "learning_rate": 6.192220765272167e-06,
      "loss": 0.0195,
      "step": 1175760
    },
    {
      "epoch": 1.9241897579911367,
      "grad_norm": 0.6012837290763855,
      "learning_rate": 6.192154873058651e-06,
      "loss": 0.0189,
      "step": 1175780
    },
    {
      "epoch": 1.9242224884297898,
      "grad_norm": 1.0971726179122925,
      "learning_rate": 6.1920889808451346e-06,
      "loss": 0.024,
      "step": 1175800
    },
    {
      "epoch": 1.9242552188684434,
      "grad_norm": 0.2645726203918457,
      "learning_rate": 6.1920230886316164e-06,
      "loss": 0.0192,
      "step": 1175820
    },
    {
      "epoch": 1.9242879493070966,
      "grad_norm": 0.39653733372688293,
      "learning_rate": 6.1919571964181e-06,
      "loss": 0.0125,
      "step": 1175840
    },
    {
      "epoch": 1.92432067974575,
      "grad_norm": 0.6229570508003235,
      "learning_rate": 6.191891304204583e-06,
      "loss": 0.0197,
      "step": 1175860
    },
    {
      "epoch": 1.9243534101844033,
      "grad_norm": 0.6160222291946411,
      "learning_rate": 6.1918254119910655e-06,
      "loss": 0.0194,
      "step": 1175880
    },
    {
      "epoch": 1.9243861406230567,
      "grad_norm": 0.8036445379257202,
      "learning_rate": 6.191759519777548e-06,
      "loss": 0.0208,
      "step": 1175900
    },
    {
      "epoch": 1.92441887106171,
      "grad_norm": 1.0744558572769165,
      "learning_rate": 6.191693627564032e-06,
      "loss": 0.0225,
      "step": 1175920
    },
    {
      "epoch": 1.9244516015003632,
      "grad_norm": 0.3224155306816101,
      "learning_rate": 6.191627735350514e-06,
      "loss": 0.0256,
      "step": 1175940
    },
    {
      "epoch": 1.9244843319390168,
      "grad_norm": 0.13063476979732513,
      "learning_rate": 6.191561843136997e-06,
      "loss": 0.0139,
      "step": 1175960
    },
    {
      "epoch": 1.92451706237767,
      "grad_norm": 0.7826693654060364,
      "learning_rate": 6.191495950923479e-06,
      "loss": 0.0176,
      "step": 1175980
    },
    {
      "epoch": 1.9245497928163233,
      "grad_norm": 2.6172263622283936,
      "learning_rate": 6.191430058709963e-06,
      "loss": 0.0177,
      "step": 1176000
    },
    {
      "epoch": 1.9245825232549767,
      "grad_norm": 0.13824526965618134,
      "learning_rate": 6.1913641664964455e-06,
      "loss": 0.0184,
      "step": 1176020
    },
    {
      "epoch": 1.9246152536936298,
      "grad_norm": 0.722912073135376,
      "learning_rate": 6.191298274282928e-06,
      "loss": 0.0212,
      "step": 1176040
    },
    {
      "epoch": 1.9246479841322834,
      "grad_norm": 0.4749395251274109,
      "learning_rate": 6.191232382069411e-06,
      "loss": 0.0136,
      "step": 1176060
    },
    {
      "epoch": 1.9246807145709366,
      "grad_norm": 0.8443830609321594,
      "learning_rate": 6.191166489855895e-06,
      "loss": 0.0129,
      "step": 1176080
    },
    {
      "epoch": 1.9247134450095902,
      "grad_norm": 0.8664548993110657,
      "learning_rate": 6.1911005976423765e-06,
      "loss": 0.0137,
      "step": 1176100
    },
    {
      "epoch": 1.9247461754482433,
      "grad_norm": 0.984176516532898,
      "learning_rate": 6.19103470542886e-06,
      "loss": 0.0174,
      "step": 1176120
    },
    {
      "epoch": 1.9247789058868967,
      "grad_norm": 0.2807570695877075,
      "learning_rate": 6.190968813215344e-06,
      "loss": 0.0163,
      "step": 1176140
    },
    {
      "epoch": 1.92481163632555,
      "grad_norm": 0.1260749250650406,
      "learning_rate": 6.1909029210018256e-06,
      "loss": 0.0219,
      "step": 1176160
    },
    {
      "epoch": 1.9248443667642032,
      "grad_norm": 0.587425172328949,
      "learning_rate": 6.190837028788309e-06,
      "loss": 0.0149,
      "step": 1176180
    },
    {
      "epoch": 1.9248770972028568,
      "grad_norm": 0.3395393490791321,
      "learning_rate": 6.190771136574791e-06,
      "loss": 0.0114,
      "step": 1176200
    },
    {
      "epoch": 1.92490982764151,
      "grad_norm": 0.06103481352329254,
      "learning_rate": 6.190705244361275e-06,
      "loss": 0.014,
      "step": 1176220
    },
    {
      "epoch": 1.9249425580801633,
      "grad_norm": 0.16782230138778687,
      "learning_rate": 6.190639352147757e-06,
      "loss": 0.0286,
      "step": 1176240
    },
    {
      "epoch": 1.9249752885188167,
      "grad_norm": 0.3290693759918213,
      "learning_rate": 6.19057345993424e-06,
      "loss": 0.016,
      "step": 1176260
    },
    {
      "epoch": 1.92500801895747,
      "grad_norm": 0.7376677989959717,
      "learning_rate": 6.190507567720723e-06,
      "loss": 0.0137,
      "step": 1176280
    },
    {
      "epoch": 1.9250407493961235,
      "grad_norm": 0.7287271618843079,
      "learning_rate": 6.1904416755072064e-06,
      "loss": 0.0144,
      "step": 1176300
    },
    {
      "epoch": 1.9250734798347766,
      "grad_norm": 0.4572169780731201,
      "learning_rate": 6.190375783293688e-06,
      "loss": 0.0126,
      "step": 1176320
    },
    {
      "epoch": 1.9251062102734302,
      "grad_norm": 0.8913576006889343,
      "learning_rate": 6.190309891080172e-06,
      "loss": 0.0237,
      "step": 1176340
    },
    {
      "epoch": 1.9251389407120834,
      "grad_norm": 0.7899566292762756,
      "learning_rate": 6.190243998866654e-06,
      "loss": 0.0252,
      "step": 1176360
    },
    {
      "epoch": 1.9251716711507367,
      "grad_norm": 0.6975441575050354,
      "learning_rate": 6.190178106653137e-06,
      "loss": 0.0255,
      "step": 1176380
    },
    {
      "epoch": 1.92520440158939,
      "grad_norm": 0.425021767616272,
      "learning_rate": 6.19011221443962e-06,
      "loss": 0.0106,
      "step": 1176400
    },
    {
      "epoch": 1.9252371320280435,
      "grad_norm": 0.310639351606369,
      "learning_rate": 6.190046322226103e-06,
      "loss": 0.0155,
      "step": 1176420
    },
    {
      "epoch": 1.9252698624666968,
      "grad_norm": 0.3119034171104431,
      "learning_rate": 6.189980430012586e-06,
      "loss": 0.0214,
      "step": 1176440
    },
    {
      "epoch": 1.92530259290535,
      "grad_norm": 0.15921539068222046,
      "learning_rate": 6.189914537799069e-06,
      "loss": 0.0189,
      "step": 1176460
    },
    {
      "epoch": 1.9253353233440036,
      "grad_norm": 0.4077121317386627,
      "learning_rate": 6.189848645585552e-06,
      "loss": 0.0154,
      "step": 1176480
    },
    {
      "epoch": 1.9253680537826567,
      "grad_norm": 0.17587390542030334,
      "learning_rate": 6.189782753372035e-06,
      "loss": 0.0189,
      "step": 1176500
    },
    {
      "epoch": 1.9254007842213101,
      "grad_norm": 0.36155739426612854,
      "learning_rate": 6.189716861158518e-06,
      "loss": 0.018,
      "step": 1176520
    },
    {
      "epoch": 1.9254335146599635,
      "grad_norm": 0.5824143886566162,
      "learning_rate": 6.189650968945e-06,
      "loss": 0.0157,
      "step": 1176540
    },
    {
      "epoch": 1.9254662450986169,
      "grad_norm": 0.8165348172187805,
      "learning_rate": 6.189585076731484e-06,
      "loss": 0.0281,
      "step": 1176560
    },
    {
      "epoch": 1.9254989755372702,
      "grad_norm": 0.49474596977233887,
      "learning_rate": 6.189519184517966e-06,
      "loss": 0.0169,
      "step": 1176580
    },
    {
      "epoch": 1.9255317059759234,
      "grad_norm": 0.3014857769012451,
      "learning_rate": 6.189453292304449e-06,
      "loss": 0.0226,
      "step": 1176600
    },
    {
      "epoch": 1.925564436414577,
      "grad_norm": 0.3692074120044708,
      "learning_rate": 6.189387400090931e-06,
      "loss": 0.0206,
      "step": 1176620
    },
    {
      "epoch": 1.9255971668532301,
      "grad_norm": 0.29486486315727234,
      "learning_rate": 6.189321507877415e-06,
      "loss": 0.0154,
      "step": 1176640
    },
    {
      "epoch": 1.9256298972918835,
      "grad_norm": 0.4623313546180725,
      "learning_rate": 6.1892556156638974e-06,
      "loss": 0.0249,
      "step": 1176660
    },
    {
      "epoch": 1.9256626277305369,
      "grad_norm": 0.27487435936927795,
      "learning_rate": 6.18918972345038e-06,
      "loss": 0.0165,
      "step": 1176680
    },
    {
      "epoch": 1.92569535816919,
      "grad_norm": 0.6044173836708069,
      "learning_rate": 6.189123831236863e-06,
      "loss": 0.0191,
      "step": 1176700
    },
    {
      "epoch": 1.9257280886078436,
      "grad_norm": 1.4111354351043701,
      "learning_rate": 6.1890579390233465e-06,
      "loss": 0.0151,
      "step": 1176720
    },
    {
      "epoch": 1.9257608190464968,
      "grad_norm": 0.3812899887561798,
      "learning_rate": 6.188992046809828e-06,
      "loss": 0.019,
      "step": 1176740
    },
    {
      "epoch": 1.9257935494851504,
      "grad_norm": 0.3028161823749542,
      "learning_rate": 6.188926154596312e-06,
      "loss": 0.0182,
      "step": 1176760
    },
    {
      "epoch": 1.9258262799238035,
      "grad_norm": 0.13107310235500336,
      "learning_rate": 6.188860262382794e-06,
      "loss": 0.0104,
      "step": 1176780
    },
    {
      "epoch": 1.9258590103624569,
      "grad_norm": 0.517767608165741,
      "learning_rate": 6.1887943701692775e-06,
      "loss": 0.0154,
      "step": 1176800
    },
    {
      "epoch": 1.9258917408011103,
      "grad_norm": 0.4210808575153351,
      "learning_rate": 6.18872847795576e-06,
      "loss": 0.0245,
      "step": 1176820
    },
    {
      "epoch": 1.9259244712397634,
      "grad_norm": 0.8777729272842407,
      "learning_rate": 6.188662585742243e-06,
      "loss": 0.0192,
      "step": 1176840
    },
    {
      "epoch": 1.925957201678417,
      "grad_norm": 0.05173513665795326,
      "learning_rate": 6.1885966935287265e-06,
      "loss": 0.0226,
      "step": 1176860
    },
    {
      "epoch": 1.9259899321170701,
      "grad_norm": 0.5495457053184509,
      "learning_rate": 6.188530801315209e-06,
      "loss": 0.015,
      "step": 1176880
    },
    {
      "epoch": 1.9260226625557235,
      "grad_norm": 0.1269521564245224,
      "learning_rate": 6.188464909101692e-06,
      "loss": 0.0168,
      "step": 1176900
    },
    {
      "epoch": 1.926055392994377,
      "grad_norm": 0.3555279076099396,
      "learning_rate": 6.188399016888175e-06,
      "loss": 0.0215,
      "step": 1176920
    },
    {
      "epoch": 1.9260881234330303,
      "grad_norm": 0.29950010776519775,
      "learning_rate": 6.188333124674658e-06,
      "loss": 0.0156,
      "step": 1176940
    },
    {
      "epoch": 1.9261208538716836,
      "grad_norm": 0.37288063764572144,
      "learning_rate": 6.18826723246114e-06,
      "loss": 0.0134,
      "step": 1176960
    },
    {
      "epoch": 1.9261535843103368,
      "grad_norm": 1.0125771760940552,
      "learning_rate": 6.188201340247624e-06,
      "loss": 0.0155,
      "step": 1176980
    },
    {
      "epoch": 1.9261863147489904,
      "grad_norm": 0.17445573210716248,
      "learning_rate": 6.188135448034106e-06,
      "loss": 0.0137,
      "step": 1177000
    },
    {
      "epoch": 1.9262190451876435,
      "grad_norm": 0.5284199714660645,
      "learning_rate": 6.188069555820589e-06,
      "loss": 0.0246,
      "step": 1177020
    },
    {
      "epoch": 1.926251775626297,
      "grad_norm": 1.517220139503479,
      "learning_rate": 6.188003663607072e-06,
      "loss": 0.0196,
      "step": 1177040
    },
    {
      "epoch": 1.9262845060649503,
      "grad_norm": 0.6820918321609497,
      "learning_rate": 6.187937771393555e-06,
      "loss": 0.0122,
      "step": 1177060
    },
    {
      "epoch": 1.9263172365036036,
      "grad_norm": 0.25962090492248535,
      "learning_rate": 6.1878718791800375e-06,
      "loss": 0.0145,
      "step": 1177080
    },
    {
      "epoch": 1.926349966942257,
      "grad_norm": 0.42263415455818176,
      "learning_rate": 6.187805986966521e-06,
      "loss": 0.0142,
      "step": 1177100
    },
    {
      "epoch": 1.9263826973809102,
      "grad_norm": 1.557779312133789,
      "learning_rate": 6.187740094753003e-06,
      "loss": 0.022,
      "step": 1177120
    },
    {
      "epoch": 1.9264154278195638,
      "grad_norm": 0.9237872362136841,
      "learning_rate": 6.187674202539487e-06,
      "loss": 0.021,
      "step": 1177140
    },
    {
      "epoch": 1.926448158258217,
      "grad_norm": 0.6460991501808167,
      "learning_rate": 6.1876083103259685e-06,
      "loss": 0.0192,
      "step": 1177160
    },
    {
      "epoch": 1.9264808886968703,
      "grad_norm": 0.47214657068252563,
      "learning_rate": 6.187542418112452e-06,
      "loss": 0.0197,
      "step": 1177180
    },
    {
      "epoch": 1.9265136191355237,
      "grad_norm": 0.35280540585517883,
      "learning_rate": 6.187476525898936e-06,
      "loss": 0.0198,
      "step": 1177200
    },
    {
      "epoch": 1.926546349574177,
      "grad_norm": 0.7843561768531799,
      "learning_rate": 6.1874106336854175e-06,
      "loss": 0.0169,
      "step": 1177220
    },
    {
      "epoch": 1.9265790800128304,
      "grad_norm": 0.317889004945755,
      "learning_rate": 6.187344741471901e-06,
      "loss": 0.0187,
      "step": 1177240
    },
    {
      "epoch": 1.9266118104514836,
      "grad_norm": 0.24797889590263367,
      "learning_rate": 6.187278849258384e-06,
      "loss": 0.0112,
      "step": 1177260
    },
    {
      "epoch": 1.9266445408901371,
      "grad_norm": 0.548126757144928,
      "learning_rate": 6.187212957044867e-06,
      "loss": 0.0162,
      "step": 1177280
    },
    {
      "epoch": 1.9266772713287903,
      "grad_norm": 0.321405827999115,
      "learning_rate": 6.187147064831349e-06,
      "loss": 0.0199,
      "step": 1177300
    },
    {
      "epoch": 1.9267100017674437,
      "grad_norm": 0.24495506286621094,
      "learning_rate": 6.187081172617833e-06,
      "loss": 0.0137,
      "step": 1177320
    },
    {
      "epoch": 1.926742732206097,
      "grad_norm": 0.12729674577713013,
      "learning_rate": 6.187015280404315e-06,
      "loss": 0.014,
      "step": 1177340
    },
    {
      "epoch": 1.9267754626447504,
      "grad_norm": 0.9899797439575195,
      "learning_rate": 6.186949388190798e-06,
      "loss": 0.0189,
      "step": 1177360
    },
    {
      "epoch": 1.9268081930834038,
      "grad_norm": 0.20959807932376862,
      "learning_rate": 6.18688349597728e-06,
      "loss": 0.0178,
      "step": 1177380
    },
    {
      "epoch": 1.926840923522057,
      "grad_norm": 0.13786138594150543,
      "learning_rate": 6.186817603763764e-06,
      "loss": 0.0181,
      "step": 1177400
    },
    {
      "epoch": 1.9268736539607105,
      "grad_norm": 1.1969584226608276,
      "learning_rate": 6.186751711550247e-06,
      "loss": 0.0149,
      "step": 1177420
    },
    {
      "epoch": 1.9269063843993637,
      "grad_norm": 0.198025643825531,
      "learning_rate": 6.186685819336729e-06,
      "loss": 0.0148,
      "step": 1177440
    },
    {
      "epoch": 1.926939114838017,
      "grad_norm": 0.31146982312202454,
      "learning_rate": 6.186619927123212e-06,
      "loss": 0.0171,
      "step": 1177460
    },
    {
      "epoch": 1.9269718452766704,
      "grad_norm": 1.0828660726547241,
      "learning_rate": 6.186554034909696e-06,
      "loss": 0.0188,
      "step": 1177480
    },
    {
      "epoch": 1.9270045757153236,
      "grad_norm": 0.6558162569999695,
      "learning_rate": 6.186488142696178e-06,
      "loss": 0.0135,
      "step": 1177500
    },
    {
      "epoch": 1.9270373061539772,
      "grad_norm": 0.8398255705833435,
      "learning_rate": 6.186422250482661e-06,
      "loss": 0.0207,
      "step": 1177520
    },
    {
      "epoch": 1.9270700365926303,
      "grad_norm": 0.4681355357170105,
      "learning_rate": 6.186356358269145e-06,
      "loss": 0.017,
      "step": 1177540
    },
    {
      "epoch": 1.927102767031284,
      "grad_norm": 0.38062047958374023,
      "learning_rate": 6.186290466055627e-06,
      "loss": 0.0155,
      "step": 1177560
    },
    {
      "epoch": 1.927135497469937,
      "grad_norm": 0.46522632241249084,
      "learning_rate": 6.18622457384211e-06,
      "loss": 0.0174,
      "step": 1177580
    },
    {
      "epoch": 1.9271682279085904,
      "grad_norm": 0.6360587477684021,
      "learning_rate": 6.186158681628592e-06,
      "loss": 0.019,
      "step": 1177600
    },
    {
      "epoch": 1.9272009583472438,
      "grad_norm": 0.7454711198806763,
      "learning_rate": 6.186092789415076e-06,
      "loss": 0.0157,
      "step": 1177620
    },
    {
      "epoch": 1.927233688785897,
      "grad_norm": 0.5226321220397949,
      "learning_rate": 6.186026897201558e-06,
      "loss": 0.0192,
      "step": 1177640
    },
    {
      "epoch": 1.9272664192245506,
      "grad_norm": 0.44223538041114807,
      "learning_rate": 6.185961004988041e-06,
      "loss": 0.0192,
      "step": 1177660
    },
    {
      "epoch": 1.9272991496632037,
      "grad_norm": 0.4608800411224365,
      "learning_rate": 6.185895112774524e-06,
      "loss": 0.0172,
      "step": 1177680
    },
    {
      "epoch": 1.927331880101857,
      "grad_norm": 0.9606110453605652,
      "learning_rate": 6.185829220561007e-06,
      "loss": 0.0191,
      "step": 1177700
    },
    {
      "epoch": 1.9273646105405104,
      "grad_norm": 0.6969595551490784,
      "learning_rate": 6.1857633283474894e-06,
      "loss": 0.0192,
      "step": 1177720
    },
    {
      "epoch": 1.9273973409791638,
      "grad_norm": 0.26365816593170166,
      "learning_rate": 6.185697436133973e-06,
      "loss": 0.018,
      "step": 1177740
    },
    {
      "epoch": 1.9274300714178172,
      "grad_norm": 1.5764570236206055,
      "learning_rate": 6.185631543920455e-06,
      "loss": 0.0119,
      "step": 1177760
    },
    {
      "epoch": 1.9274628018564703,
      "grad_norm": 0.2701754868030548,
      "learning_rate": 6.1855656517069385e-06,
      "loss": 0.0186,
      "step": 1177780
    },
    {
      "epoch": 1.927495532295124,
      "grad_norm": 0.29145315289497375,
      "learning_rate": 6.18549975949342e-06,
      "loss": 0.014,
      "step": 1177800
    },
    {
      "epoch": 1.927528262733777,
      "grad_norm": 0.45424792170524597,
      "learning_rate": 6.185433867279904e-06,
      "loss": 0.0149,
      "step": 1177820
    },
    {
      "epoch": 1.9275609931724305,
      "grad_norm": 0.14745351672172546,
      "learning_rate": 6.185367975066387e-06,
      "loss": 0.025,
      "step": 1177840
    },
    {
      "epoch": 1.9275937236110838,
      "grad_norm": 0.9340405464172363,
      "learning_rate": 6.1853020828528695e-06,
      "loss": 0.0166,
      "step": 1177860
    },
    {
      "epoch": 1.9276264540497372,
      "grad_norm": 0.6259831786155701,
      "learning_rate": 6.185236190639352e-06,
      "loss": 0.0238,
      "step": 1177880
    },
    {
      "epoch": 1.9276591844883906,
      "grad_norm": 0.47909656167030334,
      "learning_rate": 6.185170298425836e-06,
      "loss": 0.0215,
      "step": 1177900
    },
    {
      "epoch": 1.9276919149270437,
      "grad_norm": 0.5570106506347656,
      "learning_rate": 6.1851044062123185e-06,
      "loss": 0.0242,
      "step": 1177920
    },
    {
      "epoch": 1.9277246453656973,
      "grad_norm": 0.5566302537918091,
      "learning_rate": 6.185038513998801e-06,
      "loss": 0.024,
      "step": 1177940
    },
    {
      "epoch": 1.9277573758043505,
      "grad_norm": 0.32596343755722046,
      "learning_rate": 6.184972621785285e-06,
      "loss": 0.0194,
      "step": 1177960
    },
    {
      "epoch": 1.9277901062430038,
      "grad_norm": 0.13569369912147522,
      "learning_rate": 6.184906729571767e-06,
      "loss": 0.0112,
      "step": 1177980
    },
    {
      "epoch": 1.9278228366816572,
      "grad_norm": 0.10473199188709259,
      "learning_rate": 6.18484083735825e-06,
      "loss": 0.0084,
      "step": 1178000
    },
    {
      "epoch": 1.9278555671203106,
      "grad_norm": 0.40051454305648804,
      "learning_rate": 6.184774945144732e-06,
      "loss": 0.017,
      "step": 1178020
    },
    {
      "epoch": 1.927888297558964,
      "grad_norm": 0.5402420163154602,
      "learning_rate": 6.184709052931216e-06,
      "loss": 0.0247,
      "step": 1178040
    },
    {
      "epoch": 1.927921027997617,
      "grad_norm": 0.3481731116771698,
      "learning_rate": 6.1846431607176985e-06,
      "loss": 0.0119,
      "step": 1178060
    },
    {
      "epoch": 1.9279537584362707,
      "grad_norm": 0.29697972536087036,
      "learning_rate": 6.184577268504181e-06,
      "loss": 0.0108,
      "step": 1178080
    },
    {
      "epoch": 1.9279864888749239,
      "grad_norm": 0.6030595302581787,
      "learning_rate": 6.184511376290664e-06,
      "loss": 0.0229,
      "step": 1178100
    },
    {
      "epoch": 1.9280192193135772,
      "grad_norm": 1.2103726863861084,
      "learning_rate": 6.184445484077148e-06,
      "loss": 0.0195,
      "step": 1178120
    },
    {
      "epoch": 1.9280519497522306,
      "grad_norm": 0.8128750324249268,
      "learning_rate": 6.1843795918636295e-06,
      "loss": 0.0154,
      "step": 1178140
    },
    {
      "epoch": 1.928084680190884,
      "grad_norm": 0.10286743938922882,
      "learning_rate": 6.184313699650113e-06,
      "loss": 0.0201,
      "step": 1178160
    },
    {
      "epoch": 1.9281174106295373,
      "grad_norm": 0.42172518372535706,
      "learning_rate": 6.184247807436595e-06,
      "loss": 0.0208,
      "step": 1178180
    },
    {
      "epoch": 1.9281501410681905,
      "grad_norm": 0.31886646151542664,
      "learning_rate": 6.1841819152230786e-06,
      "loss": 0.0178,
      "step": 1178200
    },
    {
      "epoch": 1.928182871506844,
      "grad_norm": 0.2091672122478485,
      "learning_rate": 6.184116023009561e-06,
      "loss": 0.022,
      "step": 1178220
    },
    {
      "epoch": 1.9282156019454972,
      "grad_norm": 0.0868421196937561,
      "learning_rate": 6.184050130796044e-06,
      "loss": 0.0162,
      "step": 1178240
    },
    {
      "epoch": 1.9282483323841506,
      "grad_norm": 0.5473302006721497,
      "learning_rate": 6.183984238582528e-06,
      "loss": 0.0217,
      "step": 1178260
    },
    {
      "epoch": 1.928281062822804,
      "grad_norm": 1.1878509521484375,
      "learning_rate": 6.18391834636901e-06,
      "loss": 0.0207,
      "step": 1178280
    },
    {
      "epoch": 1.9283137932614571,
      "grad_norm": 0.83994460105896,
      "learning_rate": 6.183852454155493e-06,
      "loss": 0.0162,
      "step": 1178300
    },
    {
      "epoch": 1.9283465237001107,
      "grad_norm": 0.11921072751283646,
      "learning_rate": 6.183786561941976e-06,
      "loss": 0.0187,
      "step": 1178320
    },
    {
      "epoch": 1.9283792541387639,
      "grad_norm": 0.29086464643478394,
      "learning_rate": 6.1837206697284594e-06,
      "loss": 0.0214,
      "step": 1178340
    },
    {
      "epoch": 1.9284119845774175,
      "grad_norm": 0.3563746511936188,
      "learning_rate": 6.183654777514941e-06,
      "loss": 0.0191,
      "step": 1178360
    },
    {
      "epoch": 1.9284447150160706,
      "grad_norm": 0.0498453713953495,
      "learning_rate": 6.183588885301425e-06,
      "loss": 0.0117,
      "step": 1178380
    },
    {
      "epoch": 1.928477445454724,
      "grad_norm": 1.5714900493621826,
      "learning_rate": 6.183522993087907e-06,
      "loss": 0.0238,
      "step": 1178400
    },
    {
      "epoch": 1.9285101758933774,
      "grad_norm": 0.7276965975761414,
      "learning_rate": 6.18345710087439e-06,
      "loss": 0.0177,
      "step": 1178420
    },
    {
      "epoch": 1.9285429063320305,
      "grad_norm": 0.2394944727420807,
      "learning_rate": 6.183391208660873e-06,
      "loss": 0.0219,
      "step": 1178440
    },
    {
      "epoch": 1.928575636770684,
      "grad_norm": 0.33906131982803345,
      "learning_rate": 6.183325316447356e-06,
      "loss": 0.0197,
      "step": 1178460
    },
    {
      "epoch": 1.9286083672093373,
      "grad_norm": 0.8693894743919373,
      "learning_rate": 6.183259424233839e-06,
      "loss": 0.018,
      "step": 1178480
    },
    {
      "epoch": 1.9286410976479906,
      "grad_norm": 0.6807934641838074,
      "learning_rate": 6.183193532020322e-06,
      "loss": 0.0123,
      "step": 1178500
    },
    {
      "epoch": 1.928673828086644,
      "grad_norm": 0.8381405472755432,
      "learning_rate": 6.183127639806804e-06,
      "loss": 0.0182,
      "step": 1178520
    },
    {
      "epoch": 1.9287065585252974,
      "grad_norm": 0.4097614288330078,
      "learning_rate": 6.183061747593288e-06,
      "loss": 0.0135,
      "step": 1178540
    },
    {
      "epoch": 1.9287392889639507,
      "grad_norm": 0.3977421224117279,
      "learning_rate": 6.18299585537977e-06,
      "loss": 0.0147,
      "step": 1178560
    },
    {
      "epoch": 1.928772019402604,
      "grad_norm": 0.2736292779445648,
      "learning_rate": 6.182929963166253e-06,
      "loss": 0.0176,
      "step": 1178580
    },
    {
      "epoch": 1.9288047498412575,
      "grad_norm": 1.2896054983139038,
      "learning_rate": 6.182864070952737e-06,
      "loss": 0.0172,
      "step": 1178600
    },
    {
      "epoch": 1.9288374802799106,
      "grad_norm": 0.16500389575958252,
      "learning_rate": 6.182798178739219e-06,
      "loss": 0.0254,
      "step": 1178620
    },
    {
      "epoch": 1.928870210718564,
      "grad_norm": 0.18670186400413513,
      "learning_rate": 6.182732286525702e-06,
      "loss": 0.0165,
      "step": 1178640
    },
    {
      "epoch": 1.9289029411572174,
      "grad_norm": 1.0693613290786743,
      "learning_rate": 6.182666394312184e-06,
      "loss": 0.0162,
      "step": 1178660
    },
    {
      "epoch": 1.9289356715958708,
      "grad_norm": 0.24123762547969818,
      "learning_rate": 6.182600502098668e-06,
      "loss": 0.0198,
      "step": 1178680
    },
    {
      "epoch": 1.9289684020345241,
      "grad_norm": 0.315247505903244,
      "learning_rate": 6.1825346098851505e-06,
      "loss": 0.0242,
      "step": 1178700
    },
    {
      "epoch": 1.9290011324731773,
      "grad_norm": 0.3271655738353729,
      "learning_rate": 6.182468717671633e-06,
      "loss": 0.0177,
      "step": 1178720
    },
    {
      "epoch": 1.9290338629118309,
      "grad_norm": 0.4673844575881958,
      "learning_rate": 6.182402825458116e-06,
      "loss": 0.0216,
      "step": 1178740
    },
    {
      "epoch": 1.929066593350484,
      "grad_norm": 0.5075463056564331,
      "learning_rate": 6.1823369332445995e-06,
      "loss": 0.0139,
      "step": 1178760
    },
    {
      "epoch": 1.9290993237891374,
      "grad_norm": 0.17082445323467255,
      "learning_rate": 6.182271041031081e-06,
      "loss": 0.0206,
      "step": 1178780
    },
    {
      "epoch": 1.9291320542277908,
      "grad_norm": 0.1909579634666443,
      "learning_rate": 6.182205148817565e-06,
      "loss": 0.0168,
      "step": 1178800
    },
    {
      "epoch": 1.9291647846664441,
      "grad_norm": 0.21242518723011017,
      "learning_rate": 6.182139256604047e-06,
      "loss": 0.0161,
      "step": 1178820
    },
    {
      "epoch": 1.9291975151050975,
      "grad_norm": 0.29392626881599426,
      "learning_rate": 6.1820733643905305e-06,
      "loss": 0.0168,
      "step": 1178840
    },
    {
      "epoch": 1.9292302455437507,
      "grad_norm": 0.3119919002056122,
      "learning_rate": 6.182007472177013e-06,
      "loss": 0.0146,
      "step": 1178860
    },
    {
      "epoch": 1.9292629759824043,
      "grad_norm": 0.33466774225234985,
      "learning_rate": 6.181941579963496e-06,
      "loss": 0.0192,
      "step": 1178880
    },
    {
      "epoch": 1.9292957064210574,
      "grad_norm": 1.7364708185195923,
      "learning_rate": 6.181875687749979e-06,
      "loss": 0.0151,
      "step": 1178900
    },
    {
      "epoch": 1.9293284368597108,
      "grad_norm": 0.8569269776344299,
      "learning_rate": 6.181809795536462e-06,
      "loss": 0.0173,
      "step": 1178920
    },
    {
      "epoch": 1.9293611672983642,
      "grad_norm": 0.29581981897354126,
      "learning_rate": 6.181743903322945e-06,
      "loss": 0.0137,
      "step": 1178940
    },
    {
      "epoch": 1.9293938977370175,
      "grad_norm": 0.15781475603580475,
      "learning_rate": 6.181678011109428e-06,
      "loss": 0.0178,
      "step": 1178960
    },
    {
      "epoch": 1.929426628175671,
      "grad_norm": 1.464615821838379,
      "learning_rate": 6.181612118895911e-06,
      "loss": 0.0197,
      "step": 1178980
    },
    {
      "epoch": 1.929459358614324,
      "grad_norm": 0.23805750906467438,
      "learning_rate": 6.181546226682393e-06,
      "loss": 0.0173,
      "step": 1179000
    },
    {
      "epoch": 1.9294920890529776,
      "grad_norm": 0.13985158503055573,
      "learning_rate": 6.181480334468877e-06,
      "loss": 0.0168,
      "step": 1179020
    },
    {
      "epoch": 1.9295248194916308,
      "grad_norm": 0.10917747765779495,
      "learning_rate": 6.181414442255359e-06,
      "loss": 0.0176,
      "step": 1179040
    },
    {
      "epoch": 1.9295575499302842,
      "grad_norm": 1.303660273551941,
      "learning_rate": 6.181348550041842e-06,
      "loss": 0.0185,
      "step": 1179060
    },
    {
      "epoch": 1.9295902803689375,
      "grad_norm": 0.9975578784942627,
      "learning_rate": 6.181282657828325e-06,
      "loss": 0.019,
      "step": 1179080
    },
    {
      "epoch": 1.9296230108075907,
      "grad_norm": 0.25999757647514343,
      "learning_rate": 6.181216765614808e-06,
      "loss": 0.0218,
      "step": 1179100
    },
    {
      "epoch": 1.9296557412462443,
      "grad_norm": 0.18865029513835907,
      "learning_rate": 6.1811508734012905e-06,
      "loss": 0.0114,
      "step": 1179120
    },
    {
      "epoch": 1.9296884716848974,
      "grad_norm": 0.6553061008453369,
      "learning_rate": 6.181084981187774e-06,
      "loss": 0.0201,
      "step": 1179140
    },
    {
      "epoch": 1.9297212021235508,
      "grad_norm": 0.7301578521728516,
      "learning_rate": 6.181019088974256e-06,
      "loss": 0.0254,
      "step": 1179160
    },
    {
      "epoch": 1.9297539325622042,
      "grad_norm": 0.5426439046859741,
      "learning_rate": 6.18095319676074e-06,
      "loss": 0.0167,
      "step": 1179180
    },
    {
      "epoch": 1.9297866630008575,
      "grad_norm": 0.2630927860736847,
      "learning_rate": 6.1808873045472215e-06,
      "loss": 0.0258,
      "step": 1179200
    },
    {
      "epoch": 1.929819393439511,
      "grad_norm": 0.3018735945224762,
      "learning_rate": 6.180821412333705e-06,
      "loss": 0.0138,
      "step": 1179220
    },
    {
      "epoch": 1.929852123878164,
      "grad_norm": 1.4594460725784302,
      "learning_rate": 6.180755520120188e-06,
      "loss": 0.0217,
      "step": 1179240
    },
    {
      "epoch": 1.9298848543168177,
      "grad_norm": 0.19234471023082733,
      "learning_rate": 6.1806896279066706e-06,
      "loss": 0.0192,
      "step": 1179260
    },
    {
      "epoch": 1.9299175847554708,
      "grad_norm": 0.42817947268486023,
      "learning_rate": 6.180623735693153e-06,
      "loss": 0.014,
      "step": 1179280
    },
    {
      "epoch": 1.9299503151941242,
      "grad_norm": 0.27773088216781616,
      "learning_rate": 6.180557843479637e-06,
      "loss": 0.0154,
      "step": 1179300
    },
    {
      "epoch": 1.9299830456327776,
      "grad_norm": 0.28659531474113464,
      "learning_rate": 6.18049195126612e-06,
      "loss": 0.0179,
      "step": 1179320
    },
    {
      "epoch": 1.930015776071431,
      "grad_norm": 1.5592390298843384,
      "learning_rate": 6.180426059052602e-06,
      "loss": 0.0185,
      "step": 1179340
    },
    {
      "epoch": 1.9300485065100843,
      "grad_norm": 0.6858028769493103,
      "learning_rate": 6.180360166839086e-06,
      "loss": 0.0156,
      "step": 1179360
    },
    {
      "epoch": 1.9300812369487375,
      "grad_norm": 0.20405659079551697,
      "learning_rate": 6.180294274625568e-06,
      "loss": 0.0151,
      "step": 1179380
    },
    {
      "epoch": 1.930113967387391,
      "grad_norm": 0.4282260835170746,
      "learning_rate": 6.1802283824120514e-06,
      "loss": 0.0161,
      "step": 1179400
    },
    {
      "epoch": 1.9301466978260442,
      "grad_norm": 0.3046967089176178,
      "learning_rate": 6.180162490198533e-06,
      "loss": 0.0161,
      "step": 1179420
    },
    {
      "epoch": 1.9301794282646976,
      "grad_norm": 0.43336448073387146,
      "learning_rate": 6.180096597985017e-06,
      "loss": 0.0161,
      "step": 1179440
    },
    {
      "epoch": 1.930212158703351,
      "grad_norm": 0.4584848880767822,
      "learning_rate": 6.180030705771499e-06,
      "loss": 0.011,
      "step": 1179460
    },
    {
      "epoch": 1.9302448891420043,
      "grad_norm": 0.24710194766521454,
      "learning_rate": 6.179964813557982e-06,
      "loss": 0.0157,
      "step": 1179480
    },
    {
      "epoch": 1.9302776195806577,
      "grad_norm": 0.30921486020088196,
      "learning_rate": 6.179898921344465e-06,
      "loss": 0.0203,
      "step": 1179500
    },
    {
      "epoch": 1.9303103500193108,
      "grad_norm": 0.4358559548854828,
      "learning_rate": 6.179833029130949e-06,
      "loss": 0.0158,
      "step": 1179520
    },
    {
      "epoch": 1.9303430804579644,
      "grad_norm": 0.24961824715137482,
      "learning_rate": 6.179767136917431e-06,
      "loss": 0.0102,
      "step": 1179540
    },
    {
      "epoch": 1.9303758108966176,
      "grad_norm": 0.5150519609451294,
      "learning_rate": 6.179701244703914e-06,
      "loss": 0.0209,
      "step": 1179560
    },
    {
      "epoch": 1.930408541335271,
      "grad_norm": 0.40647467970848083,
      "learning_rate": 6.179635352490396e-06,
      "loss": 0.0178,
      "step": 1179580
    },
    {
      "epoch": 1.9304412717739243,
      "grad_norm": 0.048893678933382034,
      "learning_rate": 6.17956946027688e-06,
      "loss": 0.015,
      "step": 1179600
    },
    {
      "epoch": 1.9304740022125777,
      "grad_norm": 0.10274206846952438,
      "learning_rate": 6.1795035680633616e-06,
      "loss": 0.0141,
      "step": 1179620
    },
    {
      "epoch": 1.930506732651231,
      "grad_norm": 0.9203630685806274,
      "learning_rate": 6.179437675849845e-06,
      "loss": 0.014,
      "step": 1179640
    },
    {
      "epoch": 1.9305394630898842,
      "grad_norm": 0.9280462265014648,
      "learning_rate": 6.179371783636329e-06,
      "loss": 0.0148,
      "step": 1179660
    },
    {
      "epoch": 1.9305721935285378,
      "grad_norm": 0.2926648259162903,
      "learning_rate": 6.179305891422811e-06,
      "loss": 0.0141,
      "step": 1179680
    },
    {
      "epoch": 1.930604923967191,
      "grad_norm": 0.7193924784660339,
      "learning_rate": 6.179239999209294e-06,
      "loss": 0.015,
      "step": 1179700
    },
    {
      "epoch": 1.9306376544058443,
      "grad_norm": 0.5109297633171082,
      "learning_rate": 6.179174106995777e-06,
      "loss": 0.0208,
      "step": 1179720
    },
    {
      "epoch": 1.9306703848444977,
      "grad_norm": 0.9551330208778381,
      "learning_rate": 6.17910821478226e-06,
      "loss": 0.0179,
      "step": 1179740
    },
    {
      "epoch": 1.9307031152831509,
      "grad_norm": 0.6705327033996582,
      "learning_rate": 6.1790423225687424e-06,
      "loss": 0.0145,
      "step": 1179760
    },
    {
      "epoch": 1.9307358457218045,
      "grad_norm": 0.15819784998893738,
      "learning_rate": 6.178976430355226e-06,
      "loss": 0.0166,
      "step": 1179780
    },
    {
      "epoch": 1.9307685761604576,
      "grad_norm": 0.45668989419937134,
      "learning_rate": 6.178910538141708e-06,
      "loss": 0.0214,
      "step": 1179800
    },
    {
      "epoch": 1.9308013065991112,
      "grad_norm": 0.5543688535690308,
      "learning_rate": 6.1788446459281915e-06,
      "loss": 0.0187,
      "step": 1179820
    },
    {
      "epoch": 1.9308340370377644,
      "grad_norm": 0.35680437088012695,
      "learning_rate": 6.178778753714673e-06,
      "loss": 0.0206,
      "step": 1179840
    },
    {
      "epoch": 1.9308667674764177,
      "grad_norm": 0.5516273379325867,
      "learning_rate": 6.178712861501157e-06,
      "loss": 0.0136,
      "step": 1179860
    },
    {
      "epoch": 1.930899497915071,
      "grad_norm": 0.10568378865718842,
      "learning_rate": 6.17864696928764e-06,
      "loss": 0.0146,
      "step": 1179880
    },
    {
      "epoch": 1.9309322283537242,
      "grad_norm": 0.19135819375514984,
      "learning_rate": 6.1785810770741225e-06,
      "loss": 0.0166,
      "step": 1179900
    },
    {
      "epoch": 1.9309649587923778,
      "grad_norm": 0.724152684211731,
      "learning_rate": 6.178515184860605e-06,
      "loss": 0.019,
      "step": 1179920
    },
    {
      "epoch": 1.930997689231031,
      "grad_norm": 0.1314862221479416,
      "learning_rate": 6.178449292647089e-06,
      "loss": 0.0208,
      "step": 1179940
    },
    {
      "epoch": 1.9310304196696844,
      "grad_norm": 0.4339136779308319,
      "learning_rate": 6.178383400433571e-06,
      "loss": 0.0229,
      "step": 1179960
    },
    {
      "epoch": 1.9310631501083377,
      "grad_norm": 0.5119196772575378,
      "learning_rate": 6.178317508220054e-06,
      "loss": 0.014,
      "step": 1179980
    },
    {
      "epoch": 1.931095880546991,
      "grad_norm": 0.4800625145435333,
      "learning_rate": 6.178251616006538e-06,
      "loss": 0.0183,
      "step": 1180000
    },
    {
      "epoch": 1.9311286109856445,
      "grad_norm": 1.0029174089431763,
      "learning_rate": 6.17818572379302e-06,
      "loss": 0.0201,
      "step": 1180020
    },
    {
      "epoch": 1.9311613414242976,
      "grad_norm": 0.23325490951538086,
      "learning_rate": 6.178119831579503e-06,
      "loss": 0.0315,
      "step": 1180040
    },
    {
      "epoch": 1.9311940718629512,
      "grad_norm": 0.3227483928203583,
      "learning_rate": 6.178053939365985e-06,
      "loss": 0.0114,
      "step": 1180060
    },
    {
      "epoch": 1.9312268023016044,
      "grad_norm": 0.8149076104164124,
      "learning_rate": 6.177988047152469e-06,
      "loss": 0.0204,
      "step": 1180080
    },
    {
      "epoch": 1.9312595327402577,
      "grad_norm": 0.4755561053752899,
      "learning_rate": 6.1779221549389516e-06,
      "loss": 0.0221,
      "step": 1180100
    },
    {
      "epoch": 1.9312922631789111,
      "grad_norm": 0.16631747782230377,
      "learning_rate": 6.177856262725434e-06,
      "loss": 0.0199,
      "step": 1180120
    },
    {
      "epoch": 1.9313249936175645,
      "grad_norm": 3.198204517364502,
      "learning_rate": 6.177790370511917e-06,
      "loss": 0.0182,
      "step": 1180140
    },
    {
      "epoch": 1.9313577240562179,
      "grad_norm": 0.3841516077518463,
      "learning_rate": 6.177724478298401e-06,
      "loss": 0.0173,
      "step": 1180160
    },
    {
      "epoch": 1.931390454494871,
      "grad_norm": 0.15652461349964142,
      "learning_rate": 6.1776585860848825e-06,
      "loss": 0.0113,
      "step": 1180180
    },
    {
      "epoch": 1.9314231849335246,
      "grad_norm": 0.33226269483566284,
      "learning_rate": 6.177592693871366e-06,
      "loss": 0.02,
      "step": 1180200
    },
    {
      "epoch": 1.9314559153721778,
      "grad_norm": 0.943046510219574,
      "learning_rate": 6.177526801657848e-06,
      "loss": 0.0147,
      "step": 1180220
    },
    {
      "epoch": 1.9314886458108311,
      "grad_norm": 0.12890177965164185,
      "learning_rate": 6.177460909444332e-06,
      "loss": 0.0127,
      "step": 1180240
    },
    {
      "epoch": 1.9315213762494845,
      "grad_norm": 1.8583751916885376,
      "learning_rate": 6.177395017230814e-06,
      "loss": 0.0193,
      "step": 1180260
    },
    {
      "epoch": 1.9315541066881379,
      "grad_norm": 0.5528055429458618,
      "learning_rate": 6.177329125017297e-06,
      "loss": 0.0197,
      "step": 1180280
    },
    {
      "epoch": 1.9315868371267912,
      "grad_norm": 0.4404050409793854,
      "learning_rate": 6.17726323280378e-06,
      "loss": 0.0129,
      "step": 1180300
    },
    {
      "epoch": 1.9316195675654444,
      "grad_norm": 0.25633588433265686,
      "learning_rate": 6.177197340590263e-06,
      "loss": 0.0171,
      "step": 1180320
    },
    {
      "epoch": 1.931652298004098,
      "grad_norm": 0.5271217823028564,
      "learning_rate": 6.177131448376745e-06,
      "loss": 0.0129,
      "step": 1180340
    },
    {
      "epoch": 1.9316850284427511,
      "grad_norm": 0.6798845529556274,
      "learning_rate": 6.177065556163229e-06,
      "loss": 0.0188,
      "step": 1180360
    },
    {
      "epoch": 1.9317177588814045,
      "grad_norm": 0.2791571021080017,
      "learning_rate": 6.1769996639497124e-06,
      "loss": 0.0131,
      "step": 1180380
    },
    {
      "epoch": 1.9317504893200579,
      "grad_norm": 0.1839778870344162,
      "learning_rate": 6.176933771736194e-06,
      "loss": 0.0253,
      "step": 1180400
    },
    {
      "epoch": 1.9317832197587113,
      "grad_norm": 0.5753489136695862,
      "learning_rate": 6.176867879522678e-06,
      "loss": 0.0232,
      "step": 1180420
    },
    {
      "epoch": 1.9318159501973646,
      "grad_norm": 1.272382140159607,
      "learning_rate": 6.17680198730916e-06,
      "loss": 0.0205,
      "step": 1180440
    },
    {
      "epoch": 1.9318486806360178,
      "grad_norm": 0.23128503561019897,
      "learning_rate": 6.176736095095643e-06,
      "loss": 0.0172,
      "step": 1180460
    },
    {
      "epoch": 1.9318814110746714,
      "grad_norm": 0.391144335269928,
      "learning_rate": 6.176670202882125e-06,
      "loss": 0.0133,
      "step": 1180480
    },
    {
      "epoch": 1.9319141415133245,
      "grad_norm": 0.12603415548801422,
      "learning_rate": 6.176604310668609e-06,
      "loss": 0.0258,
      "step": 1180500
    },
    {
      "epoch": 1.931946871951978,
      "grad_norm": 0.47863760590553284,
      "learning_rate": 6.176538418455092e-06,
      "loss": 0.0271,
      "step": 1180520
    },
    {
      "epoch": 1.9319796023906313,
      "grad_norm": 0.928748369216919,
      "learning_rate": 6.176472526241574e-06,
      "loss": 0.0169,
      "step": 1180540
    },
    {
      "epoch": 1.9320123328292844,
      "grad_norm": 0.41048383712768555,
      "learning_rate": 6.176406634028057e-06,
      "loss": 0.016,
      "step": 1180560
    },
    {
      "epoch": 1.932045063267938,
      "grad_norm": 1.9978752136230469,
      "learning_rate": 6.176340741814541e-06,
      "loss": 0.0166,
      "step": 1180580
    },
    {
      "epoch": 1.9320777937065912,
      "grad_norm": 0.5237518548965454,
      "learning_rate": 6.176274849601023e-06,
      "loss": 0.0112,
      "step": 1180600
    },
    {
      "epoch": 1.9321105241452448,
      "grad_norm": 0.24608922004699707,
      "learning_rate": 6.176208957387506e-06,
      "loss": 0.0254,
      "step": 1180620
    },
    {
      "epoch": 1.932143254583898,
      "grad_norm": 0.8317824006080627,
      "learning_rate": 6.176143065173988e-06,
      "loss": 0.0116,
      "step": 1180640
    },
    {
      "epoch": 1.9321759850225513,
      "grad_norm": 0.03690829128026962,
      "learning_rate": 6.176077172960472e-06,
      "loss": 0.0193,
      "step": 1180660
    },
    {
      "epoch": 1.9322087154612047,
      "grad_norm": 0.8659048676490784,
      "learning_rate": 6.176011280746954e-06,
      "loss": 0.0123,
      "step": 1180680
    },
    {
      "epoch": 1.9322414458998578,
      "grad_norm": 0.584632396697998,
      "learning_rate": 6.175945388533437e-06,
      "loss": 0.0197,
      "step": 1180700
    },
    {
      "epoch": 1.9322741763385114,
      "grad_norm": 0.7758060097694397,
      "learning_rate": 6.175879496319921e-06,
      "loss": 0.0165,
      "step": 1180720
    },
    {
      "epoch": 1.9323069067771645,
      "grad_norm": 0.1796998381614685,
      "learning_rate": 6.1758136041064035e-06,
      "loss": 0.0166,
      "step": 1180740
    },
    {
      "epoch": 1.932339637215818,
      "grad_norm": 0.6404821872711182,
      "learning_rate": 6.175747711892886e-06,
      "loss": 0.0139,
      "step": 1180760
    },
    {
      "epoch": 1.9323723676544713,
      "grad_norm": 0.23008452355861664,
      "learning_rate": 6.175681819679369e-06,
      "loss": 0.015,
      "step": 1180780
    },
    {
      "epoch": 1.9324050980931247,
      "grad_norm": 0.2263394296169281,
      "learning_rate": 6.1756159274658525e-06,
      "loss": 0.0227,
      "step": 1180800
    },
    {
      "epoch": 1.932437828531778,
      "grad_norm": 0.2541083097457886,
      "learning_rate": 6.175550035252334e-06,
      "loss": 0.0154,
      "step": 1180820
    },
    {
      "epoch": 1.9324705589704312,
      "grad_norm": 0.45092394948005676,
      "learning_rate": 6.175484143038818e-06,
      "loss": 0.0291,
      "step": 1180840
    },
    {
      "epoch": 1.9325032894090848,
      "grad_norm": 0.3620590567588806,
      "learning_rate": 6.1754182508253e-06,
      "loss": 0.0202,
      "step": 1180860
    },
    {
      "epoch": 1.932536019847738,
      "grad_norm": 0.34885743260383606,
      "learning_rate": 6.1753523586117835e-06,
      "loss": 0.0176,
      "step": 1180880
    },
    {
      "epoch": 1.9325687502863913,
      "grad_norm": 0.48274698853492737,
      "learning_rate": 6.175286466398266e-06,
      "loss": 0.0126,
      "step": 1180900
    },
    {
      "epoch": 1.9326014807250447,
      "grad_norm": 0.8852288722991943,
      "learning_rate": 6.175220574184749e-06,
      "loss": 0.0212,
      "step": 1180920
    },
    {
      "epoch": 1.932634211163698,
      "grad_norm": 0.11299602687358856,
      "learning_rate": 6.175154681971232e-06,
      "loss": 0.0276,
      "step": 1180940
    },
    {
      "epoch": 1.9326669416023514,
      "grad_norm": 0.49556660652160645,
      "learning_rate": 6.175088789757715e-06,
      "loss": 0.0208,
      "step": 1180960
    },
    {
      "epoch": 1.9326996720410046,
      "grad_norm": 0.3447534143924713,
      "learning_rate": 6.175022897544197e-06,
      "loss": 0.0124,
      "step": 1180980
    },
    {
      "epoch": 1.9327324024796582,
      "grad_norm": 0.20750996470451355,
      "learning_rate": 6.174957005330681e-06,
      "loss": 0.0143,
      "step": 1181000
    },
    {
      "epoch": 1.9327651329183113,
      "grad_norm": 0.1558723747730255,
      "learning_rate": 6.174891113117163e-06,
      "loss": 0.0171,
      "step": 1181020
    },
    {
      "epoch": 1.9327978633569647,
      "grad_norm": 1.0506633520126343,
      "learning_rate": 6.174825220903646e-06,
      "loss": 0.0181,
      "step": 1181040
    },
    {
      "epoch": 1.932830593795618,
      "grad_norm": 0.15366177260875702,
      "learning_rate": 6.17475932869013e-06,
      "loss": 0.015,
      "step": 1181060
    },
    {
      "epoch": 1.9328633242342714,
      "grad_norm": 0.10520706325769424,
      "learning_rate": 6.174693436476612e-06,
      "loss": 0.0175,
      "step": 1181080
    },
    {
      "epoch": 1.9328960546729248,
      "grad_norm": 0.44472068548202515,
      "learning_rate": 6.174627544263095e-06,
      "loss": 0.0133,
      "step": 1181100
    },
    {
      "epoch": 1.932928785111578,
      "grad_norm": 0.4482309818267822,
      "learning_rate": 6.174561652049578e-06,
      "loss": 0.0133,
      "step": 1181120
    },
    {
      "epoch": 1.9329615155502315,
      "grad_norm": 0.44348663091659546,
      "learning_rate": 6.174495759836061e-06,
      "loss": 0.0132,
      "step": 1181140
    },
    {
      "epoch": 1.9329942459888847,
      "grad_norm": 0.4648560583591461,
      "learning_rate": 6.1744298676225435e-06,
      "loss": 0.013,
      "step": 1181160
    },
    {
      "epoch": 1.933026976427538,
      "grad_norm": 0.4377332031726837,
      "learning_rate": 6.174363975409027e-06,
      "loss": 0.015,
      "step": 1181180
    },
    {
      "epoch": 1.9330597068661914,
      "grad_norm": 0.2314945012331009,
      "learning_rate": 6.174298083195509e-06,
      "loss": 0.0111,
      "step": 1181200
    },
    {
      "epoch": 1.9330924373048448,
      "grad_norm": 8.313278198242188,
      "learning_rate": 6.174232190981993e-06,
      "loss": 0.0158,
      "step": 1181220
    },
    {
      "epoch": 1.9331251677434982,
      "grad_norm": 0.4734036326408386,
      "learning_rate": 6.1741662987684745e-06,
      "loss": 0.0201,
      "step": 1181240
    },
    {
      "epoch": 1.9331578981821513,
      "grad_norm": 0.35959452390670776,
      "learning_rate": 6.174100406554958e-06,
      "loss": 0.0134,
      "step": 1181260
    },
    {
      "epoch": 1.933190628620805,
      "grad_norm": 0.5232834815979004,
      "learning_rate": 6.174034514341441e-06,
      "loss": 0.0161,
      "step": 1181280
    },
    {
      "epoch": 1.933223359059458,
      "grad_norm": 0.6764764785766602,
      "learning_rate": 6.1739686221279236e-06,
      "loss": 0.0173,
      "step": 1181300
    },
    {
      "epoch": 1.9332560894981115,
      "grad_norm": 0.44032448530197144,
      "learning_rate": 6.173902729914406e-06,
      "loss": 0.0222,
      "step": 1181320
    },
    {
      "epoch": 1.9332888199367648,
      "grad_norm": 0.525972843170166,
      "learning_rate": 6.17383683770089e-06,
      "loss": 0.0171,
      "step": 1181340
    },
    {
      "epoch": 1.933321550375418,
      "grad_norm": 0.37655186653137207,
      "learning_rate": 6.173770945487372e-06,
      "loss": 0.0194,
      "step": 1181360
    },
    {
      "epoch": 1.9333542808140716,
      "grad_norm": 0.2092793732881546,
      "learning_rate": 6.173705053273855e-06,
      "loss": 0.0182,
      "step": 1181380
    },
    {
      "epoch": 1.9333870112527247,
      "grad_norm": 0.2726861536502838,
      "learning_rate": 6.173639161060339e-06,
      "loss": 0.0202,
      "step": 1181400
    },
    {
      "epoch": 1.9334197416913783,
      "grad_norm": 0.5859913229942322,
      "learning_rate": 6.173573268846821e-06,
      "loss": 0.0204,
      "step": 1181420
    },
    {
      "epoch": 1.9334524721300315,
      "grad_norm": 0.6762101054191589,
      "learning_rate": 6.1735073766333044e-06,
      "loss": 0.0158,
      "step": 1181440
    },
    {
      "epoch": 1.9334852025686848,
      "grad_norm": 0.6545898914337158,
      "learning_rate": 6.173441484419786e-06,
      "loss": 0.0209,
      "step": 1181460
    },
    {
      "epoch": 1.9335179330073382,
      "grad_norm": 0.24496407806873322,
      "learning_rate": 6.17337559220627e-06,
      "loss": 0.0174,
      "step": 1181480
    },
    {
      "epoch": 1.9335506634459914,
      "grad_norm": 0.23300932347774506,
      "learning_rate": 6.173309699992752e-06,
      "loss": 0.0225,
      "step": 1181500
    },
    {
      "epoch": 1.933583393884645,
      "grad_norm": 0.3314153254032135,
      "learning_rate": 6.173243807779235e-06,
      "loss": 0.0196,
      "step": 1181520
    },
    {
      "epoch": 1.933616124323298,
      "grad_norm": 0.4144588112831116,
      "learning_rate": 6.173177915565718e-06,
      "loss": 0.0158,
      "step": 1181540
    },
    {
      "epoch": 1.9336488547619515,
      "grad_norm": 0.061438266187906265,
      "learning_rate": 6.173112023352201e-06,
      "loss": 0.019,
      "step": 1181560
    },
    {
      "epoch": 1.9336815852006048,
      "grad_norm": 0.37908869981765747,
      "learning_rate": 6.173046131138684e-06,
      "loss": 0.0146,
      "step": 1181580
    },
    {
      "epoch": 1.9337143156392582,
      "grad_norm": 0.6646749973297119,
      "learning_rate": 6.172980238925167e-06,
      "loss": 0.0168,
      "step": 1181600
    },
    {
      "epoch": 1.9337470460779116,
      "grad_norm": 0.7402974367141724,
      "learning_rate": 6.172914346711649e-06,
      "loss": 0.0176,
      "step": 1181620
    },
    {
      "epoch": 1.9337797765165647,
      "grad_norm": 0.35242289304733276,
      "learning_rate": 6.172848454498133e-06,
      "loss": 0.0173,
      "step": 1181640
    },
    {
      "epoch": 1.9338125069552183,
      "grad_norm": 0.9385631680488586,
      "learning_rate": 6.1727825622846146e-06,
      "loss": 0.0181,
      "step": 1181660
    },
    {
      "epoch": 1.9338452373938715,
      "grad_norm": 0.16803568601608276,
      "learning_rate": 6.172716670071098e-06,
      "loss": 0.0166,
      "step": 1181680
    },
    {
      "epoch": 1.9338779678325249,
      "grad_norm": 0.2877926230430603,
      "learning_rate": 6.172650777857581e-06,
      "loss": 0.0143,
      "step": 1181700
    },
    {
      "epoch": 1.9339106982711782,
      "grad_norm": 0.1360790729522705,
      "learning_rate": 6.172584885644064e-06,
      "loss": 0.0222,
      "step": 1181720
    },
    {
      "epoch": 1.9339434287098316,
      "grad_norm": 0.2161446064710617,
      "learning_rate": 6.172518993430546e-06,
      "loss": 0.0186,
      "step": 1181740
    },
    {
      "epoch": 1.933976159148485,
      "grad_norm": 0.5601730942726135,
      "learning_rate": 6.17245310121703e-06,
      "loss": 0.0151,
      "step": 1181760
    },
    {
      "epoch": 1.9340088895871381,
      "grad_norm": 0.23268276453018188,
      "learning_rate": 6.172387209003513e-06,
      "loss": 0.014,
      "step": 1181780
    },
    {
      "epoch": 1.9340416200257917,
      "grad_norm": 0.18181365728378296,
      "learning_rate": 6.1723213167899954e-06,
      "loss": 0.0224,
      "step": 1181800
    },
    {
      "epoch": 1.9340743504644449,
      "grad_norm": 2.206482172012329,
      "learning_rate": 6.172255424576479e-06,
      "loss": 0.0239,
      "step": 1181820
    },
    {
      "epoch": 1.9341070809030982,
      "grad_norm": 0.4658108651638031,
      "learning_rate": 6.172189532362961e-06,
      "loss": 0.0227,
      "step": 1181840
    },
    {
      "epoch": 1.9341398113417516,
      "grad_norm": 0.5876531004905701,
      "learning_rate": 6.1721236401494445e-06,
      "loss": 0.0207,
      "step": 1181860
    },
    {
      "epoch": 1.934172541780405,
      "grad_norm": 0.7869080901145935,
      "learning_rate": 6.172057747935926e-06,
      "loss": 0.0214,
      "step": 1181880
    },
    {
      "epoch": 1.9342052722190584,
      "grad_norm": 0.2170787751674652,
      "learning_rate": 6.17199185572241e-06,
      "loss": 0.0164,
      "step": 1181900
    },
    {
      "epoch": 1.9342380026577115,
      "grad_norm": 0.22330184280872345,
      "learning_rate": 6.171925963508893e-06,
      "loss": 0.0213,
      "step": 1181920
    },
    {
      "epoch": 1.934270733096365,
      "grad_norm": 0.19031625986099243,
      "learning_rate": 6.1718600712953755e-06,
      "loss": 0.0217,
      "step": 1181940
    },
    {
      "epoch": 1.9343034635350183,
      "grad_norm": 1.2066373825073242,
      "learning_rate": 6.171794179081858e-06,
      "loss": 0.0197,
      "step": 1181960
    },
    {
      "epoch": 1.9343361939736716,
      "grad_norm": 0.3926049470901489,
      "learning_rate": 6.171728286868342e-06,
      "loss": 0.0134,
      "step": 1181980
    },
    {
      "epoch": 1.934368924412325,
      "grad_norm": 0.3488517999649048,
      "learning_rate": 6.171662394654824e-06,
      "loss": 0.017,
      "step": 1182000
    },
    {
      "epoch": 1.9344016548509781,
      "grad_norm": 0.5331538319587708,
      "learning_rate": 6.171596502441307e-06,
      "loss": 0.0126,
      "step": 1182020
    },
    {
      "epoch": 1.9344343852896317,
      "grad_norm": 0.4245838224887848,
      "learning_rate": 6.171530610227789e-06,
      "loss": 0.0136,
      "step": 1182040
    },
    {
      "epoch": 1.934467115728285,
      "grad_norm": 0.541017472743988,
      "learning_rate": 6.171464718014273e-06,
      "loss": 0.0241,
      "step": 1182060
    },
    {
      "epoch": 1.9344998461669385,
      "grad_norm": 0.14924129843711853,
      "learning_rate": 6.1713988258007555e-06,
      "loss": 0.012,
      "step": 1182080
    },
    {
      "epoch": 1.9345325766055916,
      "grad_norm": 0.13309623301029205,
      "learning_rate": 6.171332933587238e-06,
      "loss": 0.0179,
      "step": 1182100
    },
    {
      "epoch": 1.934565307044245,
      "grad_norm": 0.5919020771980286,
      "learning_rate": 6.171267041373722e-06,
      "loss": 0.016,
      "step": 1182120
    },
    {
      "epoch": 1.9345980374828984,
      "grad_norm": 0.5060020089149475,
      "learning_rate": 6.1712011491602046e-06,
      "loss": 0.0217,
      "step": 1182140
    },
    {
      "epoch": 1.9346307679215515,
      "grad_norm": 0.5847771763801575,
      "learning_rate": 6.171135256946687e-06,
      "loss": 0.0201,
      "step": 1182160
    },
    {
      "epoch": 1.9346634983602051,
      "grad_norm": 1.6775308847427368,
      "learning_rate": 6.17106936473317e-06,
      "loss": 0.0266,
      "step": 1182180
    },
    {
      "epoch": 1.9346962287988583,
      "grad_norm": 2.4535956382751465,
      "learning_rate": 6.171003472519654e-06,
      "loss": 0.03,
      "step": 1182200
    },
    {
      "epoch": 1.9347289592375116,
      "grad_norm": 0.2429661899805069,
      "learning_rate": 6.1709375803061355e-06,
      "loss": 0.0159,
      "step": 1182220
    },
    {
      "epoch": 1.934761689676165,
      "grad_norm": 0.5122093558311462,
      "learning_rate": 6.170871688092619e-06,
      "loss": 0.0166,
      "step": 1182240
    },
    {
      "epoch": 1.9347944201148184,
      "grad_norm": 0.4844667911529541,
      "learning_rate": 6.170805795879101e-06,
      "loss": 0.0192,
      "step": 1182260
    },
    {
      "epoch": 1.9348271505534718,
      "grad_norm": 0.41769397258758545,
      "learning_rate": 6.170739903665585e-06,
      "loss": 0.0199,
      "step": 1182280
    },
    {
      "epoch": 1.934859880992125,
      "grad_norm": 0.28542232513427734,
      "learning_rate": 6.170674011452067e-06,
      "loss": 0.0152,
      "step": 1182300
    },
    {
      "epoch": 1.9348926114307785,
      "grad_norm": 0.4597155749797821,
      "learning_rate": 6.17060811923855e-06,
      "loss": 0.0165,
      "step": 1182320
    },
    {
      "epoch": 1.9349253418694317,
      "grad_norm": 0.6401404142379761,
      "learning_rate": 6.170542227025033e-06,
      "loss": 0.0195,
      "step": 1182340
    },
    {
      "epoch": 1.934958072308085,
      "grad_norm": 0.30794018507003784,
      "learning_rate": 6.170476334811516e-06,
      "loss": 0.0256,
      "step": 1182360
    },
    {
      "epoch": 1.9349908027467384,
      "grad_norm": 0.47830095887184143,
      "learning_rate": 6.170410442597998e-06,
      "loss": 0.0172,
      "step": 1182380
    },
    {
      "epoch": 1.9350235331853918,
      "grad_norm": 1.0682342052459717,
      "learning_rate": 6.170344550384482e-06,
      "loss": 0.0265,
      "step": 1182400
    },
    {
      "epoch": 1.9350562636240451,
      "grad_norm": 0.3116765022277832,
      "learning_rate": 6.170278658170964e-06,
      "loss": 0.0194,
      "step": 1182420
    },
    {
      "epoch": 1.9350889940626983,
      "grad_norm": 0.6616901755332947,
      "learning_rate": 6.170212765957447e-06,
      "loss": 0.0159,
      "step": 1182440
    },
    {
      "epoch": 1.935121724501352,
      "grad_norm": 0.4248964786529541,
      "learning_rate": 6.170146873743931e-06,
      "loss": 0.0187,
      "step": 1182460
    },
    {
      "epoch": 1.935154454940005,
      "grad_norm": 1.0963190793991089,
      "learning_rate": 6.170080981530413e-06,
      "loss": 0.0204,
      "step": 1182480
    },
    {
      "epoch": 1.9351871853786584,
      "grad_norm": 0.584613561630249,
      "learning_rate": 6.170015089316896e-06,
      "loss": 0.0159,
      "step": 1182500
    },
    {
      "epoch": 1.9352199158173118,
      "grad_norm": 0.3040786385536194,
      "learning_rate": 6.169949197103378e-06,
      "loss": 0.019,
      "step": 1182520
    },
    {
      "epoch": 1.9352526462559652,
      "grad_norm": 0.4430800974369049,
      "learning_rate": 6.169883304889862e-06,
      "loss": 0.0185,
      "step": 1182540
    },
    {
      "epoch": 1.9352853766946185,
      "grad_norm": 0.23681391775608063,
      "learning_rate": 6.169817412676345e-06,
      "loss": 0.0134,
      "step": 1182560
    },
    {
      "epoch": 1.9353181071332717,
      "grad_norm": 0.6395648717880249,
      "learning_rate": 6.169751520462827e-06,
      "loss": 0.0203,
      "step": 1182580
    },
    {
      "epoch": 1.9353508375719253,
      "grad_norm": 0.5276119112968445,
      "learning_rate": 6.16968562824931e-06,
      "loss": 0.0135,
      "step": 1182600
    },
    {
      "epoch": 1.9353835680105784,
      "grad_norm": 0.3240238130092621,
      "learning_rate": 6.169619736035794e-06,
      "loss": 0.02,
      "step": 1182620
    },
    {
      "epoch": 1.9354162984492318,
      "grad_norm": 0.518675684928894,
      "learning_rate": 6.169553843822276e-06,
      "loss": 0.013,
      "step": 1182640
    },
    {
      "epoch": 1.9354490288878852,
      "grad_norm": 0.5364424586296082,
      "learning_rate": 6.169487951608759e-06,
      "loss": 0.0164,
      "step": 1182660
    },
    {
      "epoch": 1.9354817593265385,
      "grad_norm": 1.015006184577942,
      "learning_rate": 6.169422059395241e-06,
      "loss": 0.0239,
      "step": 1182680
    },
    {
      "epoch": 1.935514489765192,
      "grad_norm": 0.320613294839859,
      "learning_rate": 6.169356167181725e-06,
      "loss": 0.0204,
      "step": 1182700
    },
    {
      "epoch": 1.935547220203845,
      "grad_norm": 0.19036045670509338,
      "learning_rate": 6.169290274968207e-06,
      "loss": 0.013,
      "step": 1182720
    },
    {
      "epoch": 1.9355799506424987,
      "grad_norm": 0.5586531162261963,
      "learning_rate": 6.16922438275469e-06,
      "loss": 0.0268,
      "step": 1182740
    },
    {
      "epoch": 1.9356126810811518,
      "grad_norm": 0.31552109122276306,
      "learning_rate": 6.169158490541173e-06,
      "loss": 0.0236,
      "step": 1182760
    },
    {
      "epoch": 1.9356454115198052,
      "grad_norm": 1.0002162456512451,
      "learning_rate": 6.1690925983276565e-06,
      "loss": 0.0226,
      "step": 1182780
    },
    {
      "epoch": 1.9356781419584586,
      "grad_norm": 0.7673907279968262,
      "learning_rate": 6.169026706114138e-06,
      "loss": 0.0156,
      "step": 1182800
    },
    {
      "epoch": 1.9357108723971117,
      "grad_norm": 0.2469455897808075,
      "learning_rate": 6.168960813900622e-06,
      "loss": 0.0193,
      "step": 1182820
    },
    {
      "epoch": 1.9357436028357653,
      "grad_norm": 0.49052107334136963,
      "learning_rate": 6.1688949216871055e-06,
      "loss": 0.0208,
      "step": 1182840
    },
    {
      "epoch": 1.9357763332744184,
      "grad_norm": 0.4710697531700134,
      "learning_rate": 6.1688290294735874e-06,
      "loss": 0.0238,
      "step": 1182860
    },
    {
      "epoch": 1.935809063713072,
      "grad_norm": 0.4160258173942566,
      "learning_rate": 6.168763137260071e-06,
      "loss": 0.0134,
      "step": 1182880
    },
    {
      "epoch": 1.9358417941517252,
      "grad_norm": 0.22345273196697235,
      "learning_rate": 6.168697245046553e-06,
      "loss": 0.0189,
      "step": 1182900
    },
    {
      "epoch": 1.9358745245903786,
      "grad_norm": 0.6607335209846497,
      "learning_rate": 6.1686313528330365e-06,
      "loss": 0.0143,
      "step": 1182920
    },
    {
      "epoch": 1.935907255029032,
      "grad_norm": 0.11385612934827805,
      "learning_rate": 6.168565460619519e-06,
      "loss": 0.0161,
      "step": 1182940
    },
    {
      "epoch": 1.935939985467685,
      "grad_norm": 0.32053208351135254,
      "learning_rate": 6.168499568406002e-06,
      "loss": 0.0163,
      "step": 1182960
    },
    {
      "epoch": 1.9359727159063387,
      "grad_norm": 0.37377700209617615,
      "learning_rate": 6.168433676192485e-06,
      "loss": 0.0146,
      "step": 1182980
    },
    {
      "epoch": 1.9360054463449918,
      "grad_norm": 0.3351670801639557,
      "learning_rate": 6.168367783978968e-06,
      "loss": 0.022,
      "step": 1183000
    },
    {
      "epoch": 1.9360381767836452,
      "grad_norm": 0.1644597053527832,
      "learning_rate": 6.16830189176545e-06,
      "loss": 0.0142,
      "step": 1183020
    },
    {
      "epoch": 1.9360709072222986,
      "grad_norm": 0.21722431480884552,
      "learning_rate": 6.168235999551934e-06,
      "loss": 0.0176,
      "step": 1183040
    },
    {
      "epoch": 1.936103637660952,
      "grad_norm": 0.6298348307609558,
      "learning_rate": 6.168170107338416e-06,
      "loss": 0.0223,
      "step": 1183060
    },
    {
      "epoch": 1.9361363680996053,
      "grad_norm": 1.2572778463363647,
      "learning_rate": 6.168104215124899e-06,
      "loss": 0.0197,
      "step": 1183080
    },
    {
      "epoch": 1.9361690985382585,
      "grad_norm": 0.37131667137145996,
      "learning_rate": 6.168038322911382e-06,
      "loss": 0.018,
      "step": 1183100
    },
    {
      "epoch": 1.936201828976912,
      "grad_norm": 0.540148138999939,
      "learning_rate": 6.167972430697865e-06,
      "loss": 0.0157,
      "step": 1183120
    },
    {
      "epoch": 1.9362345594155652,
      "grad_norm": 1.0241179466247559,
      "learning_rate": 6.1679065384843475e-06,
      "loss": 0.015,
      "step": 1183140
    },
    {
      "epoch": 1.9362672898542186,
      "grad_norm": 0.6573527455329895,
      "learning_rate": 6.167840646270831e-06,
      "loss": 0.015,
      "step": 1183160
    },
    {
      "epoch": 1.936300020292872,
      "grad_norm": 0.35872164368629456,
      "learning_rate": 6.167774754057314e-06,
      "loss": 0.0174,
      "step": 1183180
    },
    {
      "epoch": 1.9363327507315253,
      "grad_norm": 0.5093996524810791,
      "learning_rate": 6.1677088618437965e-06,
      "loss": 0.0109,
      "step": 1183200
    },
    {
      "epoch": 1.9363654811701787,
      "grad_norm": 0.23259589076042175,
      "learning_rate": 6.16764296963028e-06,
      "loss": 0.0225,
      "step": 1183220
    },
    {
      "epoch": 1.9363982116088319,
      "grad_norm": 0.31768569350242615,
      "learning_rate": 6.167577077416762e-06,
      "loss": 0.0158,
      "step": 1183240
    },
    {
      "epoch": 1.9364309420474854,
      "grad_norm": 0.5790935158729553,
      "learning_rate": 6.167511185203246e-06,
      "loss": 0.0147,
      "step": 1183260
    },
    {
      "epoch": 1.9364636724861386,
      "grad_norm": 0.39979252219200134,
      "learning_rate": 6.1674452929897275e-06,
      "loss": 0.0233,
      "step": 1183280
    },
    {
      "epoch": 1.936496402924792,
      "grad_norm": 0.2963767349720001,
      "learning_rate": 6.167379400776211e-06,
      "loss": 0.0151,
      "step": 1183300
    },
    {
      "epoch": 1.9365291333634453,
      "grad_norm": 0.9265122413635254,
      "learning_rate": 6.167313508562693e-06,
      "loss": 0.0137,
      "step": 1183320
    },
    {
      "epoch": 1.9365618638020987,
      "grad_norm": 1.0981916189193726,
      "learning_rate": 6.1672476163491766e-06,
      "loss": 0.012,
      "step": 1183340
    },
    {
      "epoch": 1.936594594240752,
      "grad_norm": 0.8926990032196045,
      "learning_rate": 6.167181724135659e-06,
      "loss": 0.0149,
      "step": 1183360
    },
    {
      "epoch": 1.9366273246794052,
      "grad_norm": 0.13142606616020203,
      "learning_rate": 6.167115831922143e-06,
      "loss": 0.0159,
      "step": 1183380
    },
    {
      "epoch": 1.9366600551180588,
      "grad_norm": 0.8764538168907166,
      "learning_rate": 6.167049939708625e-06,
      "loss": 0.0229,
      "step": 1183400
    },
    {
      "epoch": 1.936692785556712,
      "grad_norm": 0.08476453274488449,
      "learning_rate": 6.166984047495108e-06,
      "loss": 0.0169,
      "step": 1183420
    },
    {
      "epoch": 1.9367255159953654,
      "grad_norm": 0.5857225656509399,
      "learning_rate": 6.16691815528159e-06,
      "loss": 0.0167,
      "step": 1183440
    },
    {
      "epoch": 1.9367582464340187,
      "grad_norm": 0.5501680970191956,
      "learning_rate": 6.166852263068074e-06,
      "loss": 0.0283,
      "step": 1183460
    },
    {
      "epoch": 1.936790976872672,
      "grad_norm": 0.23431794345378876,
      "learning_rate": 6.166786370854556e-06,
      "loss": 0.0204,
      "step": 1183480
    },
    {
      "epoch": 1.9368237073113255,
      "grad_norm": 0.6000322699546814,
      "learning_rate": 6.166720478641039e-06,
      "loss": 0.0201,
      "step": 1183500
    },
    {
      "epoch": 1.9368564377499786,
      "grad_norm": 0.3939738869667053,
      "learning_rate": 6.166654586427523e-06,
      "loss": 0.0142,
      "step": 1183520
    },
    {
      "epoch": 1.9368891681886322,
      "grad_norm": 0.7486414313316345,
      "learning_rate": 6.166588694214005e-06,
      "loss": 0.0182,
      "step": 1183540
    },
    {
      "epoch": 1.9369218986272854,
      "grad_norm": 0.6466900110244751,
      "learning_rate": 6.166522802000488e-06,
      "loss": 0.017,
      "step": 1183560
    },
    {
      "epoch": 1.9369546290659387,
      "grad_norm": 0.10270557552576065,
      "learning_rate": 6.166456909786971e-06,
      "loss": 0.0212,
      "step": 1183580
    },
    {
      "epoch": 1.936987359504592,
      "grad_norm": 1.0131558179855347,
      "learning_rate": 6.166391017573454e-06,
      "loss": 0.0204,
      "step": 1183600
    },
    {
      "epoch": 1.9370200899432453,
      "grad_norm": 0.1719244122505188,
      "learning_rate": 6.166325125359937e-06,
      "loss": 0.0151,
      "step": 1183620
    },
    {
      "epoch": 1.9370528203818989,
      "grad_norm": 1.1387345790863037,
      "learning_rate": 6.16625923314642e-06,
      "loss": 0.0159,
      "step": 1183640
    },
    {
      "epoch": 1.937085550820552,
      "grad_norm": 1.5968912839889526,
      "learning_rate": 6.166193340932902e-06,
      "loss": 0.0142,
      "step": 1183660
    },
    {
      "epoch": 1.9371182812592056,
      "grad_norm": 0.8072847723960876,
      "learning_rate": 6.166127448719386e-06,
      "loss": 0.018,
      "step": 1183680
    },
    {
      "epoch": 1.9371510116978587,
      "grad_norm": 0.1912131905555725,
      "learning_rate": 6.166061556505868e-06,
      "loss": 0.0181,
      "step": 1183700
    },
    {
      "epoch": 1.9371837421365121,
      "grad_norm": 1.362961769104004,
      "learning_rate": 6.165995664292351e-06,
      "loss": 0.0181,
      "step": 1183720
    },
    {
      "epoch": 1.9372164725751655,
      "grad_norm": 0.14847172796726227,
      "learning_rate": 6.165929772078834e-06,
      "loss": 0.0223,
      "step": 1183740
    },
    {
      "epoch": 1.9372492030138186,
      "grad_norm": 0.5888856053352356,
      "learning_rate": 6.165863879865317e-06,
      "loss": 0.0157,
      "step": 1183760
    },
    {
      "epoch": 1.9372819334524722,
      "grad_norm": 0.15178868174552917,
      "learning_rate": 6.165797987651799e-06,
      "loss": 0.0192,
      "step": 1183780
    },
    {
      "epoch": 1.9373146638911254,
      "grad_norm": 0.3045480251312256,
      "learning_rate": 6.165732095438283e-06,
      "loss": 0.0131,
      "step": 1183800
    },
    {
      "epoch": 1.9373473943297788,
      "grad_norm": 0.6404573917388916,
      "learning_rate": 6.165666203224765e-06,
      "loss": 0.0169,
      "step": 1183820
    },
    {
      "epoch": 1.9373801247684321,
      "grad_norm": 0.680022656917572,
      "learning_rate": 6.1656003110112484e-06,
      "loss": 0.0207,
      "step": 1183840
    },
    {
      "epoch": 1.9374128552070855,
      "grad_norm": 0.2742939889431,
      "learning_rate": 6.16553441879773e-06,
      "loss": 0.0201,
      "step": 1183860
    },
    {
      "epoch": 1.9374455856457389,
      "grad_norm": 0.22552390396595,
      "learning_rate": 6.165468526584214e-06,
      "loss": 0.0156,
      "step": 1183880
    },
    {
      "epoch": 1.937478316084392,
      "grad_norm": 0.25912734866142273,
      "learning_rate": 6.1654026343706975e-06,
      "loss": 0.0153,
      "step": 1183900
    },
    {
      "epoch": 1.9375110465230456,
      "grad_norm": 1.4898316860198975,
      "learning_rate": 6.165336742157179e-06,
      "loss": 0.0176,
      "step": 1183920
    },
    {
      "epoch": 1.9375437769616988,
      "grad_norm": 0.0918356403708458,
      "learning_rate": 6.165270849943663e-06,
      "loss": 0.0148,
      "step": 1183940
    },
    {
      "epoch": 1.9375765074003521,
      "grad_norm": 0.4417559802532196,
      "learning_rate": 6.165204957730146e-06,
      "loss": 0.0207,
      "step": 1183960
    },
    {
      "epoch": 1.9376092378390055,
      "grad_norm": 0.4088655114173889,
      "learning_rate": 6.1651390655166285e-06,
      "loss": 0.0138,
      "step": 1183980
    },
    {
      "epoch": 1.9376419682776589,
      "grad_norm": 1.1890933513641357,
      "learning_rate": 6.165073173303111e-06,
      "loss": 0.0126,
      "step": 1184000
    },
    {
      "epoch": 1.9376746987163123,
      "grad_norm": 0.9937520027160645,
      "learning_rate": 6.165007281089595e-06,
      "loss": 0.0176,
      "step": 1184020
    },
    {
      "epoch": 1.9377074291549654,
      "grad_norm": 0.611049234867096,
      "learning_rate": 6.164941388876077e-06,
      "loss": 0.0152,
      "step": 1184040
    },
    {
      "epoch": 1.937740159593619,
      "grad_norm": 1.062583565711975,
      "learning_rate": 6.16487549666256e-06,
      "loss": 0.0154,
      "step": 1184060
    },
    {
      "epoch": 1.9377728900322722,
      "grad_norm": 0.3986816108226776,
      "learning_rate": 6.164809604449042e-06,
      "loss": 0.0108,
      "step": 1184080
    },
    {
      "epoch": 1.9378056204709255,
      "grad_norm": 0.4257306158542633,
      "learning_rate": 6.164743712235526e-06,
      "loss": 0.0155,
      "step": 1184100
    },
    {
      "epoch": 1.937838350909579,
      "grad_norm": 0.29082250595092773,
      "learning_rate": 6.1646778200220085e-06,
      "loss": 0.0152,
      "step": 1184120
    },
    {
      "epoch": 1.9378710813482323,
      "grad_norm": 0.6523345112800598,
      "learning_rate": 6.164611927808491e-06,
      "loss": 0.0156,
      "step": 1184140
    },
    {
      "epoch": 1.9379038117868856,
      "grad_norm": 0.6455606818199158,
      "learning_rate": 6.164546035594974e-06,
      "loss": 0.0171,
      "step": 1184160
    },
    {
      "epoch": 1.9379365422255388,
      "grad_norm": 0.36127254366874695,
      "learning_rate": 6.1644801433814576e-06,
      "loss": 0.013,
      "step": 1184180
    },
    {
      "epoch": 1.9379692726641924,
      "grad_norm": 0.4164157807826996,
      "learning_rate": 6.1644142511679395e-06,
      "loss": 0.016,
      "step": 1184200
    },
    {
      "epoch": 1.9380020031028455,
      "grad_norm": 0.34875360131263733,
      "learning_rate": 6.164348358954423e-06,
      "loss": 0.0174,
      "step": 1184220
    },
    {
      "epoch": 1.938034733541499,
      "grad_norm": 0.4179980754852295,
      "learning_rate": 6.164282466740907e-06,
      "loss": 0.0163,
      "step": 1184240
    },
    {
      "epoch": 1.9380674639801523,
      "grad_norm": 0.4607302248477936,
      "learning_rate": 6.1642165745273885e-06,
      "loss": 0.0222,
      "step": 1184260
    },
    {
      "epoch": 1.9381001944188057,
      "grad_norm": 0.5821235775947571,
      "learning_rate": 6.164150682313872e-06,
      "loss": 0.0188,
      "step": 1184280
    },
    {
      "epoch": 1.938132924857459,
      "grad_norm": 0.34747251868247986,
      "learning_rate": 6.164084790100354e-06,
      "loss": 0.0247,
      "step": 1184300
    },
    {
      "epoch": 1.9381656552961122,
      "grad_norm": 0.314529150724411,
      "learning_rate": 6.164018897886838e-06,
      "loss": 0.0261,
      "step": 1184320
    },
    {
      "epoch": 1.9381983857347658,
      "grad_norm": 0.33217543363571167,
      "learning_rate": 6.1639530056733195e-06,
      "loss": 0.0134,
      "step": 1184340
    },
    {
      "epoch": 1.938231116173419,
      "grad_norm": 0.7017529606819153,
      "learning_rate": 6.163887113459803e-06,
      "loss": 0.0105,
      "step": 1184360
    },
    {
      "epoch": 1.9382638466120723,
      "grad_norm": 0.0752471461892128,
      "learning_rate": 6.163821221246286e-06,
      "loss": 0.0162,
      "step": 1184380
    },
    {
      "epoch": 1.9382965770507257,
      "grad_norm": 0.47353750467300415,
      "learning_rate": 6.1637553290327685e-06,
      "loss": 0.0142,
      "step": 1184400
    },
    {
      "epoch": 1.9383293074893788,
      "grad_norm": 0.19382435083389282,
      "learning_rate": 6.163689436819251e-06,
      "loss": 0.0147,
      "step": 1184420
    },
    {
      "epoch": 1.9383620379280324,
      "grad_norm": 0.4240899682044983,
      "learning_rate": 6.163623544605735e-06,
      "loss": 0.0153,
      "step": 1184440
    },
    {
      "epoch": 1.9383947683666856,
      "grad_norm": 0.3459709584712982,
      "learning_rate": 6.163557652392217e-06,
      "loss": 0.0218,
      "step": 1184460
    },
    {
      "epoch": 1.9384274988053392,
      "grad_norm": 0.6027637720108032,
      "learning_rate": 6.1634917601787e-06,
      "loss": 0.0166,
      "step": 1184480
    },
    {
      "epoch": 1.9384602292439923,
      "grad_norm": 1.1563301086425781,
      "learning_rate": 6.163425867965182e-06,
      "loss": 0.0255,
      "step": 1184500
    },
    {
      "epoch": 1.9384929596826457,
      "grad_norm": 0.8448167443275452,
      "learning_rate": 6.163359975751666e-06,
      "loss": 0.0172,
      "step": 1184520
    },
    {
      "epoch": 1.938525690121299,
      "grad_norm": 0.7189684510231018,
      "learning_rate": 6.1632940835381486e-06,
      "loss": 0.0182,
      "step": 1184540
    },
    {
      "epoch": 1.9385584205599522,
      "grad_norm": 0.4709768295288086,
      "learning_rate": 6.163228191324631e-06,
      "loss": 0.0112,
      "step": 1184560
    },
    {
      "epoch": 1.9385911509986058,
      "grad_norm": 0.17394515872001648,
      "learning_rate": 6.163162299111115e-06,
      "loss": 0.017,
      "step": 1184580
    },
    {
      "epoch": 1.938623881437259,
      "grad_norm": 0.42377322912216187,
      "learning_rate": 6.163096406897598e-06,
      "loss": 0.0191,
      "step": 1184600
    },
    {
      "epoch": 1.9386566118759123,
      "grad_norm": 1.047025442123413,
      "learning_rate": 6.16303051468408e-06,
      "loss": 0.0151,
      "step": 1184620
    },
    {
      "epoch": 1.9386893423145657,
      "grad_norm": 0.11884592473506927,
      "learning_rate": 6.162964622470563e-06,
      "loss": 0.0191,
      "step": 1184640
    },
    {
      "epoch": 1.938722072753219,
      "grad_norm": 0.37283191084861755,
      "learning_rate": 6.162898730257047e-06,
      "loss": 0.0151,
      "step": 1184660
    },
    {
      "epoch": 1.9387548031918724,
      "grad_norm": 0.07021906971931458,
      "learning_rate": 6.162832838043529e-06,
      "loss": 0.0149,
      "step": 1184680
    },
    {
      "epoch": 1.9387875336305256,
      "grad_norm": 0.7204676866531372,
      "learning_rate": 6.162766945830012e-06,
      "loss": 0.026,
      "step": 1184700
    },
    {
      "epoch": 1.9388202640691792,
      "grad_norm": 0.22011901438236237,
      "learning_rate": 6.162701053616494e-06,
      "loss": 0.0194,
      "step": 1184720
    },
    {
      "epoch": 1.9388529945078323,
      "grad_norm": 0.5417916774749756,
      "learning_rate": 6.162635161402978e-06,
      "loss": 0.0196,
      "step": 1184740
    },
    {
      "epoch": 1.9388857249464857,
      "grad_norm": 1.2545703649520874,
      "learning_rate": 6.16256926918946e-06,
      "loss": 0.0152,
      "step": 1184760
    },
    {
      "epoch": 1.938918455385139,
      "grad_norm": 0.5622147917747498,
      "learning_rate": 6.162503376975943e-06,
      "loss": 0.0183,
      "step": 1184780
    },
    {
      "epoch": 1.9389511858237924,
      "grad_norm": 1.400380253791809,
      "learning_rate": 6.162437484762426e-06,
      "loss": 0.019,
      "step": 1184800
    },
    {
      "epoch": 1.9389839162624458,
      "grad_norm": 0.25467976927757263,
      "learning_rate": 6.1623715925489095e-06,
      "loss": 0.0158,
      "step": 1184820
    },
    {
      "epoch": 1.939016646701099,
      "grad_norm": 0.13593630492687225,
      "learning_rate": 6.162305700335391e-06,
      "loss": 0.017,
      "step": 1184840
    },
    {
      "epoch": 1.9390493771397526,
      "grad_norm": 0.2839645743370056,
      "learning_rate": 6.162239808121875e-06,
      "loss": 0.0214,
      "step": 1184860
    },
    {
      "epoch": 1.9390821075784057,
      "grad_norm": 0.4080740511417389,
      "learning_rate": 6.162173915908357e-06,
      "loss": 0.02,
      "step": 1184880
    },
    {
      "epoch": 1.939114838017059,
      "grad_norm": 0.6302212476730347,
      "learning_rate": 6.1621080236948404e-06,
      "loss": 0.0126,
      "step": 1184900
    },
    {
      "epoch": 1.9391475684557125,
      "grad_norm": 1.1184039115905762,
      "learning_rate": 6.162042131481324e-06,
      "loss": 0.0233,
      "step": 1184920
    },
    {
      "epoch": 1.9391802988943658,
      "grad_norm": 0.20389088988304138,
      "learning_rate": 6.161976239267806e-06,
      "loss": 0.0176,
      "step": 1184940
    },
    {
      "epoch": 1.9392130293330192,
      "grad_norm": 0.2581993639469147,
      "learning_rate": 6.1619103470542895e-06,
      "loss": 0.0151,
      "step": 1184960
    },
    {
      "epoch": 1.9392457597716724,
      "grad_norm": 0.2796626091003418,
      "learning_rate": 6.161844454840772e-06,
      "loss": 0.0245,
      "step": 1184980
    },
    {
      "epoch": 1.939278490210326,
      "grad_norm": 0.6461237668991089,
      "learning_rate": 6.161778562627255e-06,
      "loss": 0.0172,
      "step": 1185000
    },
    {
      "epoch": 1.939311220648979,
      "grad_norm": 0.8821778297424316,
      "learning_rate": 6.161712670413738e-06,
      "loss": 0.0184,
      "step": 1185020
    },
    {
      "epoch": 1.9393439510876325,
      "grad_norm": 1.3511290550231934,
      "learning_rate": 6.161646778200221e-06,
      "loss": 0.0238,
      "step": 1185040
    },
    {
      "epoch": 1.9393766815262858,
      "grad_norm": 0.17431114614009857,
      "learning_rate": 6.161580885986703e-06,
      "loss": 0.0133,
      "step": 1185060
    },
    {
      "epoch": 1.939409411964939,
      "grad_norm": 0.503028929233551,
      "learning_rate": 6.161514993773187e-06,
      "loss": 0.0183,
      "step": 1185080
    },
    {
      "epoch": 1.9394421424035926,
      "grad_norm": 0.27626538276672363,
      "learning_rate": 6.161449101559669e-06,
      "loss": 0.0135,
      "step": 1185100
    },
    {
      "epoch": 1.9394748728422457,
      "grad_norm": 0.6007818579673767,
      "learning_rate": 6.161383209346152e-06,
      "loss": 0.0175,
      "step": 1185120
    },
    {
      "epoch": 1.9395076032808993,
      "grad_norm": 0.09664703160524368,
      "learning_rate": 6.161317317132635e-06,
      "loss": 0.0156,
      "step": 1185140
    },
    {
      "epoch": 1.9395403337195525,
      "grad_norm": 0.47722306847572327,
      "learning_rate": 6.161251424919118e-06,
      "loss": 0.0173,
      "step": 1185160
    },
    {
      "epoch": 1.9395730641582059,
      "grad_norm": 0.3637859523296356,
      "learning_rate": 6.1611855327056005e-06,
      "loss": 0.023,
      "step": 1185180
    },
    {
      "epoch": 1.9396057945968592,
      "grad_norm": 0.7988002300262451,
      "learning_rate": 6.161119640492084e-06,
      "loss": 0.0191,
      "step": 1185200
    },
    {
      "epoch": 1.9396385250355124,
      "grad_norm": 0.26378947496414185,
      "learning_rate": 6.161053748278566e-06,
      "loss": 0.0134,
      "step": 1185220
    },
    {
      "epoch": 1.939671255474166,
      "grad_norm": 0.29956191778182983,
      "learning_rate": 6.1609878560650495e-06,
      "loss": 0.0105,
      "step": 1185240
    },
    {
      "epoch": 1.9397039859128191,
      "grad_norm": 0.6411019563674927,
      "learning_rate": 6.1609219638515314e-06,
      "loss": 0.0098,
      "step": 1185260
    },
    {
      "epoch": 1.9397367163514725,
      "grad_norm": 0.18236534297466278,
      "learning_rate": 6.160856071638015e-06,
      "loss": 0.0216,
      "step": 1185280
    },
    {
      "epoch": 1.9397694467901259,
      "grad_norm": 0.4248165786266327,
      "learning_rate": 6.160790179424499e-06,
      "loss": 0.014,
      "step": 1185300
    },
    {
      "epoch": 1.9398021772287792,
      "grad_norm": 0.329495906829834,
      "learning_rate": 6.1607242872109805e-06,
      "loss": 0.0184,
      "step": 1185320
    },
    {
      "epoch": 1.9398349076674326,
      "grad_norm": 0.09259431809186935,
      "learning_rate": 6.160658394997464e-06,
      "loss": 0.0167,
      "step": 1185340
    },
    {
      "epoch": 1.9398676381060858,
      "grad_norm": 0.31124550104141235,
      "learning_rate": 6.160592502783946e-06,
      "loss": 0.0163,
      "step": 1185360
    },
    {
      "epoch": 1.9399003685447394,
      "grad_norm": 0.1600823700428009,
      "learning_rate": 6.1605266105704296e-06,
      "loss": 0.0199,
      "step": 1185380
    },
    {
      "epoch": 1.9399330989833925,
      "grad_norm": 1.3732610940933228,
      "learning_rate": 6.160460718356912e-06,
      "loss": 0.0165,
      "step": 1185400
    },
    {
      "epoch": 1.9399658294220459,
      "grad_norm": 1.1540796756744385,
      "learning_rate": 6.160394826143395e-06,
      "loss": 0.0299,
      "step": 1185420
    },
    {
      "epoch": 1.9399985598606992,
      "grad_norm": 0.40756741166114807,
      "learning_rate": 6.160328933929878e-06,
      "loss": 0.0184,
      "step": 1185440
    },
    {
      "epoch": 1.9400312902993526,
      "grad_norm": 0.39442020654678345,
      "learning_rate": 6.160263041716361e-06,
      "loss": 0.0229,
      "step": 1185460
    },
    {
      "epoch": 1.940064020738006,
      "grad_norm": 0.30198386311531067,
      "learning_rate": 6.160197149502843e-06,
      "loss": 0.0159,
      "step": 1185480
    },
    {
      "epoch": 1.9400967511766591,
      "grad_norm": 0.1632038652896881,
      "learning_rate": 6.160131257289327e-06,
      "loss": 0.0159,
      "step": 1185500
    },
    {
      "epoch": 1.9401294816153127,
      "grad_norm": 1.2648074626922607,
      "learning_rate": 6.160065365075809e-06,
      "loss": 0.0236,
      "step": 1185520
    },
    {
      "epoch": 1.9401622120539659,
      "grad_norm": 0.19196467101573944,
      "learning_rate": 6.159999472862292e-06,
      "loss": 0.0265,
      "step": 1185540
    },
    {
      "epoch": 1.9401949424926193,
      "grad_norm": 0.49702441692352295,
      "learning_rate": 6.159933580648775e-06,
      "loss": 0.0186,
      "step": 1185560
    },
    {
      "epoch": 1.9402276729312726,
      "grad_norm": 0.539146363735199,
      "learning_rate": 6.159867688435258e-06,
      "loss": 0.0161,
      "step": 1185580
    },
    {
      "epoch": 1.940260403369926,
      "grad_norm": 0.3671247661113739,
      "learning_rate": 6.1598017962217406e-06,
      "loss": 0.0179,
      "step": 1185600
    },
    {
      "epoch": 1.9402931338085794,
      "grad_norm": 0.22710168361663818,
      "learning_rate": 6.159735904008224e-06,
      "loss": 0.0193,
      "step": 1185620
    },
    {
      "epoch": 1.9403258642472325,
      "grad_norm": 0.42185312509536743,
      "learning_rate": 6.159670011794707e-06,
      "loss": 0.0174,
      "step": 1185640
    },
    {
      "epoch": 1.9403585946858861,
      "grad_norm": 0.5348814129829407,
      "learning_rate": 6.15960411958119e-06,
      "loss": 0.0164,
      "step": 1185660
    },
    {
      "epoch": 1.9403913251245393,
      "grad_norm": 0.25312259793281555,
      "learning_rate": 6.159538227367673e-06,
      "loss": 0.0185,
      "step": 1185680
    },
    {
      "epoch": 1.9404240555631926,
      "grad_norm": 0.48894819617271423,
      "learning_rate": 6.159472335154155e-06,
      "loss": 0.0165,
      "step": 1185700
    },
    {
      "epoch": 1.940456786001846,
      "grad_norm": 0.5408350229263306,
      "learning_rate": 6.159406442940639e-06,
      "loss": 0.0187,
      "step": 1185720
    },
    {
      "epoch": 1.9404895164404994,
      "grad_norm": 0.356777548789978,
      "learning_rate": 6.159340550727121e-06,
      "loss": 0.0134,
      "step": 1185740
    },
    {
      "epoch": 1.9405222468791528,
      "grad_norm": 0.5601022839546204,
      "learning_rate": 6.159274658513604e-06,
      "loss": 0.019,
      "step": 1185760
    },
    {
      "epoch": 1.940554977317806,
      "grad_norm": 0.7033481001853943,
      "learning_rate": 6.159208766300087e-06,
      "loss": 0.0191,
      "step": 1185780
    },
    {
      "epoch": 1.9405877077564595,
      "grad_norm": 4.455079555511475,
      "learning_rate": 6.15914287408657e-06,
      "loss": 0.0164,
      "step": 1185800
    },
    {
      "epoch": 1.9406204381951127,
      "grad_norm": 0.662824809551239,
      "learning_rate": 6.159076981873052e-06,
      "loss": 0.0234,
      "step": 1185820
    },
    {
      "epoch": 1.940653168633766,
      "grad_norm": 0.1608111560344696,
      "learning_rate": 6.159011089659536e-06,
      "loss": 0.0206,
      "step": 1185840
    },
    {
      "epoch": 1.9406858990724194,
      "grad_norm": 0.29967573285102844,
      "learning_rate": 6.158945197446018e-06,
      "loss": 0.0262,
      "step": 1185860
    },
    {
      "epoch": 1.9407186295110725,
      "grad_norm": 0.42090195417404175,
      "learning_rate": 6.1588793052325015e-06,
      "loss": 0.0253,
      "step": 1185880
    },
    {
      "epoch": 1.9407513599497261,
      "grad_norm": 0.41025471687316895,
      "learning_rate": 6.158813413018983e-06,
      "loss": 0.0225,
      "step": 1185900
    },
    {
      "epoch": 1.9407840903883793,
      "grad_norm": 0.31819847226142883,
      "learning_rate": 6.158747520805467e-06,
      "loss": 0.0167,
      "step": 1185920
    },
    {
      "epoch": 1.9408168208270329,
      "grad_norm": 0.31767481565475464,
      "learning_rate": 6.15868162859195e-06,
      "loss": 0.0188,
      "step": 1185940
    },
    {
      "epoch": 1.940849551265686,
      "grad_norm": 0.5411015152931213,
      "learning_rate": 6.158615736378432e-06,
      "loss": 0.0182,
      "step": 1185960
    },
    {
      "epoch": 1.9408822817043394,
      "grad_norm": 0.5001444220542908,
      "learning_rate": 6.158549844164916e-06,
      "loss": 0.0125,
      "step": 1185980
    },
    {
      "epoch": 1.9409150121429928,
      "grad_norm": 0.3735179603099823,
      "learning_rate": 6.158483951951399e-06,
      "loss": 0.0197,
      "step": 1186000
    },
    {
      "epoch": 1.940947742581646,
      "grad_norm": 0.8855879902839661,
      "learning_rate": 6.1584180597378815e-06,
      "loss": 0.0232,
      "step": 1186020
    },
    {
      "epoch": 1.9409804730202995,
      "grad_norm": 0.6756439208984375,
      "learning_rate": 6.158352167524364e-06,
      "loss": 0.0154,
      "step": 1186040
    },
    {
      "epoch": 1.9410132034589527,
      "grad_norm": 0.5737373232841492,
      "learning_rate": 6.158286275310848e-06,
      "loss": 0.031,
      "step": 1186060
    },
    {
      "epoch": 1.941045933897606,
      "grad_norm": 0.2436845749616623,
      "learning_rate": 6.15822038309733e-06,
      "loss": 0.0168,
      "step": 1186080
    },
    {
      "epoch": 1.9410786643362594,
      "grad_norm": 0.5741407871246338,
      "learning_rate": 6.158154490883813e-06,
      "loss": 0.0145,
      "step": 1186100
    },
    {
      "epoch": 1.9411113947749128,
      "grad_norm": 0.2767319679260254,
      "learning_rate": 6.158088598670295e-06,
      "loss": 0.01,
      "step": 1186120
    },
    {
      "epoch": 1.9411441252135662,
      "grad_norm": 1.108783483505249,
      "learning_rate": 6.158022706456779e-06,
      "loss": 0.0178,
      "step": 1186140
    },
    {
      "epoch": 1.9411768556522193,
      "grad_norm": 0.7208028435707092,
      "learning_rate": 6.1579568142432615e-06,
      "loss": 0.0236,
      "step": 1186160
    },
    {
      "epoch": 1.941209586090873,
      "grad_norm": 1.1226178407669067,
      "learning_rate": 6.157890922029744e-06,
      "loss": 0.0157,
      "step": 1186180
    },
    {
      "epoch": 1.941242316529526,
      "grad_norm": 0.6717082262039185,
      "learning_rate": 6.157825029816227e-06,
      "loss": 0.0199,
      "step": 1186200
    },
    {
      "epoch": 1.9412750469681794,
      "grad_norm": 0.3133256137371063,
      "learning_rate": 6.1577591376027106e-06,
      "loss": 0.0179,
      "step": 1186220
    },
    {
      "epoch": 1.9413077774068328,
      "grad_norm": 0.8705429434776306,
      "learning_rate": 6.1576932453891925e-06,
      "loss": 0.0153,
      "step": 1186240
    },
    {
      "epoch": 1.9413405078454862,
      "grad_norm": 0.1723535656929016,
      "learning_rate": 6.157627353175676e-06,
      "loss": 0.0154,
      "step": 1186260
    },
    {
      "epoch": 1.9413732382841395,
      "grad_norm": 1.178517460823059,
      "learning_rate": 6.157561460962158e-06,
      "loss": 0.0209,
      "step": 1186280
    },
    {
      "epoch": 1.9414059687227927,
      "grad_norm": 0.239347442984581,
      "learning_rate": 6.1574955687486415e-06,
      "loss": 0.0176,
      "step": 1186300
    },
    {
      "epoch": 1.9414386991614463,
      "grad_norm": 0.34959641098976135,
      "learning_rate": 6.1574296765351234e-06,
      "loss": 0.0174,
      "step": 1186320
    },
    {
      "epoch": 1.9414714296000994,
      "grad_norm": 0.5879173874855042,
      "learning_rate": 6.157363784321607e-06,
      "loss": 0.0196,
      "step": 1186340
    },
    {
      "epoch": 1.9415041600387528,
      "grad_norm": 0.2069445550441742,
      "learning_rate": 6.157297892108091e-06,
      "loss": 0.0128,
      "step": 1186360
    },
    {
      "epoch": 1.9415368904774062,
      "grad_norm": 0.2462855726480484,
      "learning_rate": 6.1572319998945725e-06,
      "loss": 0.0226,
      "step": 1186380
    },
    {
      "epoch": 1.9415696209160596,
      "grad_norm": 0.5705112814903259,
      "learning_rate": 6.157166107681056e-06,
      "loss": 0.0178,
      "step": 1186400
    },
    {
      "epoch": 1.941602351354713,
      "grad_norm": 0.7207019925117493,
      "learning_rate": 6.157100215467539e-06,
      "loss": 0.0208,
      "step": 1186420
    },
    {
      "epoch": 1.941635081793366,
      "grad_norm": 1.13584566116333,
      "learning_rate": 6.1570343232540216e-06,
      "loss": 0.0272,
      "step": 1186440
    },
    {
      "epoch": 1.9416678122320197,
      "grad_norm": 0.9399576783180237,
      "learning_rate": 6.156968431040504e-06,
      "loss": 0.0199,
      "step": 1186460
    },
    {
      "epoch": 1.9417005426706728,
      "grad_norm": 0.09122173488140106,
      "learning_rate": 6.156902538826988e-06,
      "loss": 0.0199,
      "step": 1186480
    },
    {
      "epoch": 1.9417332731093262,
      "grad_norm": 9.418170928955078,
      "learning_rate": 6.15683664661347e-06,
      "loss": 0.0245,
      "step": 1186500
    },
    {
      "epoch": 1.9417660035479796,
      "grad_norm": 0.7471350431442261,
      "learning_rate": 6.156770754399953e-06,
      "loss": 0.0151,
      "step": 1186520
    },
    {
      "epoch": 1.941798733986633,
      "grad_norm": 0.23624657094478607,
      "learning_rate": 6.156704862186435e-06,
      "loss": 0.0234,
      "step": 1186540
    },
    {
      "epoch": 1.9418314644252863,
      "grad_norm": 0.25228023529052734,
      "learning_rate": 6.156638969972919e-06,
      "loss": 0.0156,
      "step": 1186560
    },
    {
      "epoch": 1.9418641948639395,
      "grad_norm": 0.41726937890052795,
      "learning_rate": 6.156573077759402e-06,
      "loss": 0.0191,
      "step": 1186580
    },
    {
      "epoch": 1.941896925302593,
      "grad_norm": 0.3463769257068634,
      "learning_rate": 6.156507185545884e-06,
      "loss": 0.0209,
      "step": 1186600
    },
    {
      "epoch": 1.9419296557412462,
      "grad_norm": 0.21634800732135773,
      "learning_rate": 6.156441293332367e-06,
      "loss": 0.0226,
      "step": 1186620
    },
    {
      "epoch": 1.9419623861798996,
      "grad_norm": 0.3319777548313141,
      "learning_rate": 6.156375401118851e-06,
      "loss": 0.0166,
      "step": 1186640
    },
    {
      "epoch": 1.941995116618553,
      "grad_norm": 0.6090202331542969,
      "learning_rate": 6.1563095089053325e-06,
      "loss": 0.0184,
      "step": 1186660
    },
    {
      "epoch": 1.942027847057206,
      "grad_norm": 0.3369324207305908,
      "learning_rate": 6.156243616691816e-06,
      "loss": 0.0184,
      "step": 1186680
    },
    {
      "epoch": 1.9420605774958597,
      "grad_norm": 0.27873972058296204,
      "learning_rate": 6.1561777244783e-06,
      "loss": 0.0196,
      "step": 1186700
    },
    {
      "epoch": 1.9420933079345128,
      "grad_norm": 0.2728244960308075,
      "learning_rate": 6.156111832264782e-06,
      "loss": 0.0162,
      "step": 1186720
    },
    {
      "epoch": 1.9421260383731664,
      "grad_norm": 0.3629525601863861,
      "learning_rate": 6.156045940051265e-06,
      "loss": 0.016,
      "step": 1186740
    },
    {
      "epoch": 1.9421587688118196,
      "grad_norm": 0.32723498344421387,
      "learning_rate": 6.155980047837747e-06,
      "loss": 0.0218,
      "step": 1186760
    },
    {
      "epoch": 1.942191499250473,
      "grad_norm": 0.5515305399894714,
      "learning_rate": 6.155914155624231e-06,
      "loss": 0.0136,
      "step": 1186780
    },
    {
      "epoch": 1.9422242296891263,
      "grad_norm": 0.11739274859428406,
      "learning_rate": 6.155848263410713e-06,
      "loss": 0.0124,
      "step": 1186800
    },
    {
      "epoch": 1.9422569601277795,
      "grad_norm": 0.6008366942405701,
      "learning_rate": 6.155782371197196e-06,
      "loss": 0.0207,
      "step": 1186820
    },
    {
      "epoch": 1.942289690566433,
      "grad_norm": 1.090875267982483,
      "learning_rate": 6.155716478983679e-06,
      "loss": 0.0143,
      "step": 1186840
    },
    {
      "epoch": 1.9423224210050862,
      "grad_norm": 0.12155141681432724,
      "learning_rate": 6.1556505867701625e-06,
      "loss": 0.0163,
      "step": 1186860
    },
    {
      "epoch": 1.9423551514437396,
      "grad_norm": 0.1930067241191864,
      "learning_rate": 6.155584694556644e-06,
      "loss": 0.02,
      "step": 1186880
    },
    {
      "epoch": 1.942387881882393,
      "grad_norm": 0.34802910685539246,
      "learning_rate": 6.155518802343128e-06,
      "loss": 0.0158,
      "step": 1186900
    },
    {
      "epoch": 1.9424206123210463,
      "grad_norm": 1.3444392681121826,
      "learning_rate": 6.15545291012961e-06,
      "loss": 0.0192,
      "step": 1186920
    },
    {
      "epoch": 1.9424533427596997,
      "grad_norm": 0.3082311451435089,
      "learning_rate": 6.1553870179160934e-06,
      "loss": 0.0158,
      "step": 1186940
    },
    {
      "epoch": 1.9424860731983529,
      "grad_norm": 0.8133317828178406,
      "learning_rate": 6.155321125702576e-06,
      "loss": 0.017,
      "step": 1186960
    },
    {
      "epoch": 1.9425188036370065,
      "grad_norm": 0.3424284756183624,
      "learning_rate": 6.155255233489059e-06,
      "loss": 0.0162,
      "step": 1186980
    },
    {
      "epoch": 1.9425515340756596,
      "grad_norm": 0.4464610517024994,
      "learning_rate": 6.155189341275542e-06,
      "loss": 0.0235,
      "step": 1187000
    },
    {
      "epoch": 1.942584264514313,
      "grad_norm": 0.4987713396549225,
      "learning_rate": 6.155123449062025e-06,
      "loss": 0.0183,
      "step": 1187020
    },
    {
      "epoch": 1.9426169949529664,
      "grad_norm": 0.33204126358032227,
      "learning_rate": 6.155057556848508e-06,
      "loss": 0.0147,
      "step": 1187040
    },
    {
      "epoch": 1.9426497253916197,
      "grad_norm": 0.5946652293205261,
      "learning_rate": 6.154991664634991e-06,
      "loss": 0.0198,
      "step": 1187060
    },
    {
      "epoch": 1.942682455830273,
      "grad_norm": 0.32916441559791565,
      "learning_rate": 6.154925772421474e-06,
      "loss": 0.0158,
      "step": 1187080
    },
    {
      "epoch": 1.9427151862689263,
      "grad_norm": 0.49220648407936096,
      "learning_rate": 6.154859880207956e-06,
      "loss": 0.0121,
      "step": 1187100
    },
    {
      "epoch": 1.9427479167075798,
      "grad_norm": 0.7326292991638184,
      "learning_rate": 6.15479398799444e-06,
      "loss": 0.0135,
      "step": 1187120
    },
    {
      "epoch": 1.942780647146233,
      "grad_norm": 0.7594352960586548,
      "learning_rate": 6.154728095780922e-06,
      "loss": 0.0228,
      "step": 1187140
    },
    {
      "epoch": 1.9428133775848864,
      "grad_norm": 0.15741777420043945,
      "learning_rate": 6.154662203567405e-06,
      "loss": 0.0191,
      "step": 1187160
    },
    {
      "epoch": 1.9428461080235397,
      "grad_norm": 0.6811975240707397,
      "learning_rate": 6.154596311353887e-06,
      "loss": 0.02,
      "step": 1187180
    },
    {
      "epoch": 1.9428788384621931,
      "grad_norm": 0.41419893503189087,
      "learning_rate": 6.154530419140371e-06,
      "loss": 0.0111,
      "step": 1187200
    },
    {
      "epoch": 1.9429115689008465,
      "grad_norm": 0.49654147028923035,
      "learning_rate": 6.1544645269268535e-06,
      "loss": 0.0152,
      "step": 1187220
    },
    {
      "epoch": 1.9429442993394996,
      "grad_norm": 0.6006001234054565,
      "learning_rate": 6.154398634713336e-06,
      "loss": 0.015,
      "step": 1187240
    },
    {
      "epoch": 1.9429770297781532,
      "grad_norm": 0.1184171661734581,
      "learning_rate": 6.154332742499819e-06,
      "loss": 0.0161,
      "step": 1187260
    },
    {
      "epoch": 1.9430097602168064,
      "grad_norm": 0.5793466567993164,
      "learning_rate": 6.1542668502863026e-06,
      "loss": 0.017,
      "step": 1187280
    },
    {
      "epoch": 1.9430424906554598,
      "grad_norm": 1.1016167402267456,
      "learning_rate": 6.1542009580727844e-06,
      "loss": 0.0179,
      "step": 1187300
    },
    {
      "epoch": 1.9430752210941131,
      "grad_norm": 0.46862202882766724,
      "learning_rate": 6.154135065859268e-06,
      "loss": 0.019,
      "step": 1187320
    },
    {
      "epoch": 1.9431079515327665,
      "grad_norm": 1.1449294090270996,
      "learning_rate": 6.15406917364575e-06,
      "loss": 0.015,
      "step": 1187340
    },
    {
      "epoch": 1.9431406819714199,
      "grad_norm": 0.36767056584358215,
      "learning_rate": 6.1540032814322335e-06,
      "loss": 0.0119,
      "step": 1187360
    },
    {
      "epoch": 1.943173412410073,
      "grad_norm": 0.6372286677360535,
      "learning_rate": 6.153937389218716e-06,
      "loss": 0.016,
      "step": 1187380
    },
    {
      "epoch": 1.9432061428487266,
      "grad_norm": 0.22557705640792847,
      "learning_rate": 6.153871497005199e-06,
      "loss": 0.0176,
      "step": 1187400
    },
    {
      "epoch": 1.9432388732873798,
      "grad_norm": 0.20037272572517395,
      "learning_rate": 6.153805604791683e-06,
      "loss": 0.0146,
      "step": 1187420
    },
    {
      "epoch": 1.9432716037260331,
      "grad_norm": 0.36260879039764404,
      "learning_rate": 6.153739712578165e-06,
      "loss": 0.0122,
      "step": 1187440
    },
    {
      "epoch": 1.9433043341646865,
      "grad_norm": 0.19753260910511017,
      "learning_rate": 6.153673820364648e-06,
      "loss": 0.0179,
      "step": 1187460
    },
    {
      "epoch": 1.9433370646033397,
      "grad_norm": 0.6349703669548035,
      "learning_rate": 6.153607928151131e-06,
      "loss": 0.0178,
      "step": 1187480
    },
    {
      "epoch": 1.9433697950419933,
      "grad_norm": 0.23709701001644135,
      "learning_rate": 6.153542035937614e-06,
      "loss": 0.019,
      "step": 1187500
    },
    {
      "epoch": 1.9434025254806464,
      "grad_norm": 0.9037704467773438,
      "learning_rate": 6.153476143724096e-06,
      "loss": 0.0249,
      "step": 1187520
    },
    {
      "epoch": 1.9434352559192998,
      "grad_norm": 0.17919094860553741,
      "learning_rate": 6.15341025151058e-06,
      "loss": 0.0112,
      "step": 1187540
    },
    {
      "epoch": 1.9434679863579531,
      "grad_norm": 0.5067839026451111,
      "learning_rate": 6.153344359297062e-06,
      "loss": 0.0181,
      "step": 1187560
    },
    {
      "epoch": 1.9435007167966065,
      "grad_norm": 0.1414307951927185,
      "learning_rate": 6.153278467083545e-06,
      "loss": 0.0135,
      "step": 1187580
    },
    {
      "epoch": 1.94353344723526,
      "grad_norm": 0.8649210333824158,
      "learning_rate": 6.153212574870028e-06,
      "loss": 0.0196,
      "step": 1187600
    },
    {
      "epoch": 1.943566177673913,
      "grad_norm": 0.564998984336853,
      "learning_rate": 6.153146682656511e-06,
      "loss": 0.0118,
      "step": 1187620
    },
    {
      "epoch": 1.9435989081125666,
      "grad_norm": 1.1206227540969849,
      "learning_rate": 6.1530807904429936e-06,
      "loss": 0.0145,
      "step": 1187640
    },
    {
      "epoch": 1.9436316385512198,
      "grad_norm": 0.1764192283153534,
      "learning_rate": 6.153014898229477e-06,
      "loss": 0.0208,
      "step": 1187660
    },
    {
      "epoch": 1.9436643689898732,
      "grad_norm": 0.5379799604415894,
      "learning_rate": 6.152949006015959e-06,
      "loss": 0.0137,
      "step": 1187680
    },
    {
      "epoch": 1.9436970994285265,
      "grad_norm": 0.6859302520751953,
      "learning_rate": 6.152883113802443e-06,
      "loss": 0.0183,
      "step": 1187700
    },
    {
      "epoch": 1.94372982986718,
      "grad_norm": 0.7215505838394165,
      "learning_rate": 6.1528172215889245e-06,
      "loss": 0.0178,
      "step": 1187720
    },
    {
      "epoch": 1.9437625603058333,
      "grad_norm": 0.4413178563117981,
      "learning_rate": 6.152751329375408e-06,
      "loss": 0.0262,
      "step": 1187740
    },
    {
      "epoch": 1.9437952907444864,
      "grad_norm": 0.16428376734256744,
      "learning_rate": 6.152685437161892e-06,
      "loss": 0.0151,
      "step": 1187760
    },
    {
      "epoch": 1.94382802118314,
      "grad_norm": 0.09275931119918823,
      "learning_rate": 6.152619544948374e-06,
      "loss": 0.0161,
      "step": 1187780
    },
    {
      "epoch": 1.9438607516217932,
      "grad_norm": 0.35174861550331116,
      "learning_rate": 6.152553652734857e-06,
      "loss": 0.0149,
      "step": 1187800
    },
    {
      "epoch": 1.9438934820604465,
      "grad_norm": 0.8220487833023071,
      "learning_rate": 6.15248776052134e-06,
      "loss": 0.0251,
      "step": 1187820
    },
    {
      "epoch": 1.9439262124991,
      "grad_norm": 0.24454419314861298,
      "learning_rate": 6.152421868307823e-06,
      "loss": 0.0274,
      "step": 1187840
    },
    {
      "epoch": 1.9439589429377533,
      "grad_norm": 0.5115363001823425,
      "learning_rate": 6.152355976094305e-06,
      "loss": 0.0216,
      "step": 1187860
    },
    {
      "epoch": 1.9439916733764067,
      "grad_norm": 0.9845454692840576,
      "learning_rate": 6.152290083880789e-06,
      "loss": 0.0169,
      "step": 1187880
    },
    {
      "epoch": 1.9440244038150598,
      "grad_norm": 1.0876814126968384,
      "learning_rate": 6.152224191667271e-06,
      "loss": 0.021,
      "step": 1187900
    },
    {
      "epoch": 1.9440571342537134,
      "grad_norm": 0.13118593394756317,
      "learning_rate": 6.1521582994537545e-06,
      "loss": 0.0133,
      "step": 1187920
    },
    {
      "epoch": 1.9440898646923666,
      "grad_norm": 0.6082883477210999,
      "learning_rate": 6.152092407240236e-06,
      "loss": 0.0142,
      "step": 1187940
    },
    {
      "epoch": 1.94412259513102,
      "grad_norm": 0.621317982673645,
      "learning_rate": 6.15202651502672e-06,
      "loss": 0.0173,
      "step": 1187960
    },
    {
      "epoch": 1.9441553255696733,
      "grad_norm": 1.204040288925171,
      "learning_rate": 6.151960622813203e-06,
      "loss": 0.018,
      "step": 1187980
    },
    {
      "epoch": 1.9441880560083267,
      "grad_norm": 0.40717050433158875,
      "learning_rate": 6.151894730599685e-06,
      "loss": 0.0188,
      "step": 1188000
    },
    {
      "epoch": 1.94422078644698,
      "grad_norm": 0.5671408772468567,
      "learning_rate": 6.151828838386168e-06,
      "loss": 0.0204,
      "step": 1188020
    },
    {
      "epoch": 1.9442535168856332,
      "grad_norm": 0.37724339962005615,
      "learning_rate": 6.151762946172652e-06,
      "loss": 0.013,
      "step": 1188040
    },
    {
      "epoch": 1.9442862473242868,
      "grad_norm": 0.42187488079071045,
      "learning_rate": 6.151697053959134e-06,
      "loss": 0.0193,
      "step": 1188060
    },
    {
      "epoch": 1.94431897776294,
      "grad_norm": 0.15012185275554657,
      "learning_rate": 6.151631161745617e-06,
      "loss": 0.0188,
      "step": 1188080
    },
    {
      "epoch": 1.9443517082015933,
      "grad_norm": 0.18512824177742004,
      "learning_rate": 6.151565269532101e-06,
      "loss": 0.0167,
      "step": 1188100
    },
    {
      "epoch": 1.9443844386402467,
      "grad_norm": 0.4056662917137146,
      "learning_rate": 6.151499377318583e-06,
      "loss": 0.0142,
      "step": 1188120
    },
    {
      "epoch": 1.9444171690788998,
      "grad_norm": 0.48774704337120056,
      "learning_rate": 6.151433485105066e-06,
      "loss": 0.0235,
      "step": 1188140
    },
    {
      "epoch": 1.9444498995175534,
      "grad_norm": 0.5023360848426819,
      "learning_rate": 6.151367592891548e-06,
      "loss": 0.02,
      "step": 1188160
    },
    {
      "epoch": 1.9444826299562066,
      "grad_norm": 0.16000698506832123,
      "learning_rate": 6.151301700678032e-06,
      "loss": 0.0174,
      "step": 1188180
    },
    {
      "epoch": 1.9445153603948602,
      "grad_norm": 1.2379262447357178,
      "learning_rate": 6.151235808464514e-06,
      "loss": 0.0209,
      "step": 1188200
    },
    {
      "epoch": 1.9445480908335133,
      "grad_norm": 0.41636529564857483,
      "learning_rate": 6.151169916250997e-06,
      "loss": 0.0183,
      "step": 1188220
    },
    {
      "epoch": 1.9445808212721667,
      "grad_norm": 0.7448819875717163,
      "learning_rate": 6.15110402403748e-06,
      "loss": 0.0131,
      "step": 1188240
    },
    {
      "epoch": 1.94461355171082,
      "grad_norm": 2.9488656520843506,
      "learning_rate": 6.151038131823963e-06,
      "loss": 0.0132,
      "step": 1188260
    },
    {
      "epoch": 1.9446462821494732,
      "grad_norm": 0.33202648162841797,
      "learning_rate": 6.1509722396104455e-06,
      "loss": 0.019,
      "step": 1188280
    },
    {
      "epoch": 1.9446790125881268,
      "grad_norm": 0.4896058440208435,
      "learning_rate": 6.150906347396929e-06,
      "loss": 0.0208,
      "step": 1188300
    },
    {
      "epoch": 1.94471174302678,
      "grad_norm": 0.2762054204940796,
      "learning_rate": 6.150840455183411e-06,
      "loss": 0.0169,
      "step": 1188320
    },
    {
      "epoch": 1.9447444734654333,
      "grad_norm": 1.1865078210830688,
      "learning_rate": 6.1507745629698945e-06,
      "loss": 0.0134,
      "step": 1188340
    },
    {
      "epoch": 1.9447772039040867,
      "grad_norm": 0.8346880078315735,
      "learning_rate": 6.1507086707563764e-06,
      "loss": 0.0099,
      "step": 1188360
    },
    {
      "epoch": 1.94480993434274,
      "grad_norm": 0.2933332622051239,
      "learning_rate": 6.15064277854286e-06,
      "loss": 0.0132,
      "step": 1188380
    },
    {
      "epoch": 1.9448426647813934,
      "grad_norm": 0.5641545057296753,
      "learning_rate": 6.150576886329343e-06,
      "loss": 0.0204,
      "step": 1188400
    },
    {
      "epoch": 1.9448753952200466,
      "grad_norm": 1.0030252933502197,
      "learning_rate": 6.1505109941158255e-06,
      "loss": 0.0142,
      "step": 1188420
    },
    {
      "epoch": 1.9449081256587002,
      "grad_norm": 0.3541596233844757,
      "learning_rate": 6.150445101902309e-06,
      "loss": 0.016,
      "step": 1188440
    },
    {
      "epoch": 1.9449408560973533,
      "grad_norm": 0.4279702603816986,
      "learning_rate": 6.150379209688792e-06,
      "loss": 0.0125,
      "step": 1188460
    },
    {
      "epoch": 1.9449735865360067,
      "grad_norm": 0.1590193212032318,
      "learning_rate": 6.1503133174752746e-06,
      "loss": 0.0185,
      "step": 1188480
    },
    {
      "epoch": 1.94500631697466,
      "grad_norm": 0.33689963817596436,
      "learning_rate": 6.150247425261757e-06,
      "loss": 0.0257,
      "step": 1188500
    },
    {
      "epoch": 1.9450390474133135,
      "grad_norm": 0.5346510410308838,
      "learning_rate": 6.150181533048241e-06,
      "loss": 0.0251,
      "step": 1188520
    },
    {
      "epoch": 1.9450717778519668,
      "grad_norm": 0.16328831017017365,
      "learning_rate": 6.150115640834723e-06,
      "loss": 0.0187,
      "step": 1188540
    },
    {
      "epoch": 1.94510450829062,
      "grad_norm": 0.13928848505020142,
      "learning_rate": 6.150049748621206e-06,
      "loss": 0.0195,
      "step": 1188560
    },
    {
      "epoch": 1.9451372387292736,
      "grad_norm": 0.6772793531417847,
      "learning_rate": 6.149983856407688e-06,
      "loss": 0.0155,
      "step": 1188580
    },
    {
      "epoch": 1.9451699691679267,
      "grad_norm": 0.22736886143684387,
      "learning_rate": 6.149917964194172e-06,
      "loss": 0.0151,
      "step": 1188600
    },
    {
      "epoch": 1.94520269960658,
      "grad_norm": 0.8007376790046692,
      "learning_rate": 6.149852071980655e-06,
      "loss": 0.0134,
      "step": 1188620
    },
    {
      "epoch": 1.9452354300452335,
      "grad_norm": 0.9127843379974365,
      "learning_rate": 6.149786179767137e-06,
      "loss": 0.0111,
      "step": 1188640
    },
    {
      "epoch": 1.9452681604838868,
      "grad_norm": 0.3591286838054657,
      "learning_rate": 6.14972028755362e-06,
      "loss": 0.0266,
      "step": 1188660
    },
    {
      "epoch": 1.9453008909225402,
      "grad_norm": 0.5261914134025574,
      "learning_rate": 6.149654395340104e-06,
      "loss": 0.0159,
      "step": 1188680
    },
    {
      "epoch": 1.9453336213611934,
      "grad_norm": 0.6272909641265869,
      "learning_rate": 6.1495885031265855e-06,
      "loss": 0.0174,
      "step": 1188700
    },
    {
      "epoch": 1.945366351799847,
      "grad_norm": 0.4514826834201813,
      "learning_rate": 6.149522610913069e-06,
      "loss": 0.0171,
      "step": 1188720
    },
    {
      "epoch": 1.9453990822385,
      "grad_norm": 0.6929365396499634,
      "learning_rate": 6.149456718699551e-06,
      "loss": 0.0104,
      "step": 1188740
    },
    {
      "epoch": 1.9454318126771535,
      "grad_norm": 0.2003566026687622,
      "learning_rate": 6.149390826486035e-06,
      "loss": 0.0161,
      "step": 1188760
    },
    {
      "epoch": 1.9454645431158069,
      "grad_norm": 0.43012556433677673,
      "learning_rate": 6.149324934272517e-06,
      "loss": 0.0278,
      "step": 1188780
    },
    {
      "epoch": 1.9454972735544602,
      "grad_norm": 0.3892520070075989,
      "learning_rate": 6.149259042059e-06,
      "loss": 0.0171,
      "step": 1188800
    },
    {
      "epoch": 1.9455300039931136,
      "grad_norm": 0.19585250318050385,
      "learning_rate": 6.149193149845484e-06,
      "loss": 0.0189,
      "step": 1188820
    },
    {
      "epoch": 1.9455627344317667,
      "grad_norm": 0.5042254328727722,
      "learning_rate": 6.149127257631966e-06,
      "loss": 0.0169,
      "step": 1188840
    },
    {
      "epoch": 1.9455954648704203,
      "grad_norm": 0.6554737687110901,
      "learning_rate": 6.149061365418449e-06,
      "loss": 0.0174,
      "step": 1188860
    },
    {
      "epoch": 1.9456281953090735,
      "grad_norm": 0.4458169639110565,
      "learning_rate": 6.148995473204932e-06,
      "loss": 0.0276,
      "step": 1188880
    },
    {
      "epoch": 1.9456609257477269,
      "grad_norm": 0.8000440001487732,
      "learning_rate": 6.1489295809914155e-06,
      "loss": 0.0146,
      "step": 1188900
    },
    {
      "epoch": 1.9456936561863802,
      "grad_norm": 0.2715734541416168,
      "learning_rate": 6.148863688777897e-06,
      "loss": 0.02,
      "step": 1188920
    },
    {
      "epoch": 1.9457263866250334,
      "grad_norm": 0.729193389415741,
      "learning_rate": 6.148797796564381e-06,
      "loss": 0.0197,
      "step": 1188940
    },
    {
      "epoch": 1.945759117063687,
      "grad_norm": 1.26985502243042,
      "learning_rate": 6.148731904350863e-06,
      "loss": 0.0207,
      "step": 1188960
    },
    {
      "epoch": 1.9457918475023401,
      "grad_norm": 0.2982251048088074,
      "learning_rate": 6.1486660121373464e-06,
      "loss": 0.0148,
      "step": 1188980
    },
    {
      "epoch": 1.9458245779409937,
      "grad_norm": 0.39067456126213074,
      "learning_rate": 6.148600119923829e-06,
      "loss": 0.0113,
      "step": 1189000
    },
    {
      "epoch": 1.9458573083796469,
      "grad_norm": 2.0718698501586914,
      "learning_rate": 6.148534227710312e-06,
      "loss": 0.0203,
      "step": 1189020
    },
    {
      "epoch": 1.9458900388183002,
      "grad_norm": 0.28798967599868774,
      "learning_rate": 6.148468335496795e-06,
      "loss": 0.0166,
      "step": 1189040
    },
    {
      "epoch": 1.9459227692569536,
      "grad_norm": 0.24392516911029816,
      "learning_rate": 6.148402443283278e-06,
      "loss": 0.0129,
      "step": 1189060
    },
    {
      "epoch": 1.9459554996956068,
      "grad_norm": 0.6381853818893433,
      "learning_rate": 6.14833655106976e-06,
      "loss": 0.0216,
      "step": 1189080
    },
    {
      "epoch": 1.9459882301342604,
      "grad_norm": 0.31824588775634766,
      "learning_rate": 6.148270658856244e-06,
      "loss": 0.0167,
      "step": 1189100
    },
    {
      "epoch": 1.9460209605729135,
      "grad_norm": 0.5401257276535034,
      "learning_rate": 6.148204766642726e-06,
      "loss": 0.016,
      "step": 1189120
    },
    {
      "epoch": 1.9460536910115669,
      "grad_norm": 0.2732606530189514,
      "learning_rate": 6.148138874429209e-06,
      "loss": 0.01,
      "step": 1189140
    },
    {
      "epoch": 1.9460864214502203,
      "grad_norm": 0.3745046854019165,
      "learning_rate": 6.148072982215693e-06,
      "loss": 0.0163,
      "step": 1189160
    },
    {
      "epoch": 1.9461191518888736,
      "grad_norm": 2.7338497638702393,
      "learning_rate": 6.148007090002175e-06,
      "loss": 0.0141,
      "step": 1189180
    },
    {
      "epoch": 1.946151882327527,
      "grad_norm": 2.3483991622924805,
      "learning_rate": 6.147941197788658e-06,
      "loss": 0.032,
      "step": 1189200
    },
    {
      "epoch": 1.9461846127661802,
      "grad_norm": 0.2606027126312256,
      "learning_rate": 6.14787530557514e-06,
      "loss": 0.0176,
      "step": 1189220
    },
    {
      "epoch": 1.9462173432048337,
      "grad_norm": 1.2286163568496704,
      "learning_rate": 6.147809413361624e-06,
      "loss": 0.0205,
      "step": 1189240
    },
    {
      "epoch": 1.946250073643487,
      "grad_norm": 0.44793373346328735,
      "learning_rate": 6.1477435211481065e-06,
      "loss": 0.0212,
      "step": 1189260
    },
    {
      "epoch": 1.9462828040821403,
      "grad_norm": 1.0428946018218994,
      "learning_rate": 6.147677628934589e-06,
      "loss": 0.0178,
      "step": 1189280
    },
    {
      "epoch": 1.9463155345207936,
      "grad_norm": 0.19572816789150238,
      "learning_rate": 6.147611736721072e-06,
      "loss": 0.0166,
      "step": 1189300
    },
    {
      "epoch": 1.946348264959447,
      "grad_norm": 0.29176995158195496,
      "learning_rate": 6.1475458445075556e-06,
      "loss": 0.021,
      "step": 1189320
    },
    {
      "epoch": 1.9463809953981004,
      "grad_norm": 2.1948931217193604,
      "learning_rate": 6.1474799522940375e-06,
      "loss": 0.0247,
      "step": 1189340
    },
    {
      "epoch": 1.9464137258367535,
      "grad_norm": 4.294912815093994,
      "learning_rate": 6.147414060080521e-06,
      "loss": 0.02,
      "step": 1189360
    },
    {
      "epoch": 1.9464464562754071,
      "grad_norm": 0.9524936676025391,
      "learning_rate": 6.147348167867003e-06,
      "loss": 0.0189,
      "step": 1189380
    },
    {
      "epoch": 1.9464791867140603,
      "grad_norm": 0.7917273640632629,
      "learning_rate": 6.1472822756534865e-06,
      "loss": 0.0162,
      "step": 1189400
    },
    {
      "epoch": 1.9465119171527137,
      "grad_norm": 0.06212732568383217,
      "learning_rate": 6.147216383439969e-06,
      "loss": 0.0128,
      "step": 1189420
    },
    {
      "epoch": 1.946544647591367,
      "grad_norm": 0.24208536744117737,
      "learning_rate": 6.147150491226452e-06,
      "loss": 0.0205,
      "step": 1189440
    },
    {
      "epoch": 1.9465773780300204,
      "grad_norm": 0.40109527111053467,
      "learning_rate": 6.147084599012935e-06,
      "loss": 0.0112,
      "step": 1189460
    },
    {
      "epoch": 1.9466101084686738,
      "grad_norm": 0.4664739966392517,
      "learning_rate": 6.147018706799418e-06,
      "loss": 0.0163,
      "step": 1189480
    },
    {
      "epoch": 1.946642838907327,
      "grad_norm": 0.39501824975013733,
      "learning_rate": 6.146952814585901e-06,
      "loss": 0.0156,
      "step": 1189500
    },
    {
      "epoch": 1.9466755693459805,
      "grad_norm": 0.29709798097610474,
      "learning_rate": 6.146886922372384e-06,
      "loss": 0.0175,
      "step": 1189520
    },
    {
      "epoch": 1.9467082997846337,
      "grad_norm": 7.528409957885742,
      "learning_rate": 6.146821030158867e-06,
      "loss": 0.0157,
      "step": 1189540
    },
    {
      "epoch": 1.946741030223287,
      "grad_norm": 0.17135848104953766,
      "learning_rate": 6.146755137945349e-06,
      "loss": 0.019,
      "step": 1189560
    },
    {
      "epoch": 1.9467737606619404,
      "grad_norm": 0.3782110810279846,
      "learning_rate": 6.146689245731833e-06,
      "loss": 0.017,
      "step": 1189580
    },
    {
      "epoch": 1.9468064911005938,
      "grad_norm": 0.3671071529388428,
      "learning_rate": 6.146623353518315e-06,
      "loss": 0.0208,
      "step": 1189600
    },
    {
      "epoch": 1.9468392215392472,
      "grad_norm": 0.34102359414100647,
      "learning_rate": 6.146557461304798e-06,
      "loss": 0.0227,
      "step": 1189620
    },
    {
      "epoch": 1.9468719519779003,
      "grad_norm": 0.2211250513792038,
      "learning_rate": 6.146491569091281e-06,
      "loss": 0.0127,
      "step": 1189640
    },
    {
      "epoch": 1.946904682416554,
      "grad_norm": 0.6766705513000488,
      "learning_rate": 6.146425676877764e-06,
      "loss": 0.0182,
      "step": 1189660
    },
    {
      "epoch": 1.946937412855207,
      "grad_norm": 0.2948632538318634,
      "learning_rate": 6.1463597846642466e-06,
      "loss": 0.0174,
      "step": 1189680
    },
    {
      "epoch": 1.9469701432938604,
      "grad_norm": 0.2895349860191345,
      "learning_rate": 6.14629389245073e-06,
      "loss": 0.0192,
      "step": 1189700
    },
    {
      "epoch": 1.9470028737325138,
      "grad_norm": 0.432241827249527,
      "learning_rate": 6.146228000237212e-06,
      "loss": 0.0105,
      "step": 1189720
    },
    {
      "epoch": 1.947035604171167,
      "grad_norm": 0.7164552807807922,
      "learning_rate": 6.146162108023696e-06,
      "loss": 0.0214,
      "step": 1189740
    },
    {
      "epoch": 1.9470683346098205,
      "grad_norm": 0.3731224238872528,
      "learning_rate": 6.1460962158101775e-06,
      "loss": 0.0152,
      "step": 1189760
    },
    {
      "epoch": 1.9471010650484737,
      "grad_norm": 0.20758657157421112,
      "learning_rate": 6.146030323596661e-06,
      "loss": 0.0154,
      "step": 1189780
    },
    {
      "epoch": 1.9471337954871273,
      "grad_norm": 0.35465702414512634,
      "learning_rate": 6.145964431383144e-06,
      "loss": 0.0128,
      "step": 1189800
    },
    {
      "epoch": 1.9471665259257804,
      "grad_norm": 2.5533957481384277,
      "learning_rate": 6.145898539169627e-06,
      "loss": 0.0139,
      "step": 1189820
    },
    {
      "epoch": 1.9471992563644338,
      "grad_norm": 0.2744123637676239,
      "learning_rate": 6.145832646956109e-06,
      "loss": 0.0192,
      "step": 1189840
    },
    {
      "epoch": 1.9472319868030872,
      "grad_norm": 0.868251383304596,
      "learning_rate": 6.145766754742593e-06,
      "loss": 0.0162,
      "step": 1189860
    },
    {
      "epoch": 1.9472647172417403,
      "grad_norm": 0.07004250586032867,
      "learning_rate": 6.145700862529076e-06,
      "loss": 0.0201,
      "step": 1189880
    },
    {
      "epoch": 1.947297447680394,
      "grad_norm": 0.2564760446548462,
      "learning_rate": 6.145634970315558e-06,
      "loss": 0.0171,
      "step": 1189900
    },
    {
      "epoch": 1.947330178119047,
      "grad_norm": 1.5520710945129395,
      "learning_rate": 6.145569078102042e-06,
      "loss": 0.0276,
      "step": 1189920
    },
    {
      "epoch": 1.9473629085577004,
      "grad_norm": 0.2995353639125824,
      "learning_rate": 6.145503185888524e-06,
      "loss": 0.0163,
      "step": 1189940
    },
    {
      "epoch": 1.9473956389963538,
      "grad_norm": 0.6884639859199524,
      "learning_rate": 6.1454372936750075e-06,
      "loss": 0.0178,
      "step": 1189960
    },
    {
      "epoch": 1.9474283694350072,
      "grad_norm": 0.8559292554855347,
      "learning_rate": 6.145371401461489e-06,
      "loss": 0.0169,
      "step": 1189980
    },
    {
      "epoch": 1.9474610998736606,
      "grad_norm": 0.48381373286247253,
      "learning_rate": 6.145305509247973e-06,
      "loss": 0.0157,
      "step": 1190000
    },
    {
      "epoch": 1.9474938303123137,
      "grad_norm": 0.321198433637619,
      "learning_rate": 6.145239617034455e-06,
      "loss": 0.0156,
      "step": 1190020
    },
    {
      "epoch": 1.9475265607509673,
      "grad_norm": 0.29883450269699097,
      "learning_rate": 6.1451737248209384e-06,
      "loss": 0.0193,
      "step": 1190040
    },
    {
      "epoch": 1.9475592911896205,
      "grad_norm": 1.0808500051498413,
      "learning_rate": 6.145107832607421e-06,
      "loss": 0.0164,
      "step": 1190060
    },
    {
      "epoch": 1.9475920216282738,
      "grad_norm": 0.23275816440582275,
      "learning_rate": 6.145041940393905e-06,
      "loss": 0.0167,
      "step": 1190080
    },
    {
      "epoch": 1.9476247520669272,
      "grad_norm": 0.9950770735740662,
      "learning_rate": 6.144976048180387e-06,
      "loss": 0.0166,
      "step": 1190100
    },
    {
      "epoch": 1.9476574825055806,
      "grad_norm": 0.8692297339439392,
      "learning_rate": 6.14491015596687e-06,
      "loss": 0.0141,
      "step": 1190120
    },
    {
      "epoch": 1.947690212944234,
      "grad_norm": 0.5341479778289795,
      "learning_rate": 6.144844263753352e-06,
      "loss": 0.0189,
      "step": 1190140
    },
    {
      "epoch": 1.947722943382887,
      "grad_norm": 0.9057266712188721,
      "learning_rate": 6.144778371539836e-06,
      "loss": 0.0133,
      "step": 1190160
    },
    {
      "epoch": 1.9477556738215407,
      "grad_norm": 0.8427404165267944,
      "learning_rate": 6.144712479326318e-06,
      "loss": 0.0204,
      "step": 1190180
    },
    {
      "epoch": 1.9477884042601938,
      "grad_norm": 0.4986029863357544,
      "learning_rate": 6.144646587112801e-06,
      "loss": 0.0144,
      "step": 1190200
    },
    {
      "epoch": 1.9478211346988472,
      "grad_norm": 0.7410159111022949,
      "learning_rate": 6.144580694899285e-06,
      "loss": 0.0167,
      "step": 1190220
    },
    {
      "epoch": 1.9478538651375006,
      "grad_norm": 0.17861749231815338,
      "learning_rate": 6.144514802685767e-06,
      "loss": 0.0201,
      "step": 1190240
    },
    {
      "epoch": 1.947886595576154,
      "grad_norm": 0.5965777635574341,
      "learning_rate": 6.14444891047225e-06,
      "loss": 0.0126,
      "step": 1190260
    },
    {
      "epoch": 1.9479193260148073,
      "grad_norm": 0.11705714464187622,
      "learning_rate": 6.144383018258733e-06,
      "loss": 0.0218,
      "step": 1190280
    },
    {
      "epoch": 1.9479520564534605,
      "grad_norm": 0.24964627623558044,
      "learning_rate": 6.144317126045216e-06,
      "loss": 0.015,
      "step": 1190300
    },
    {
      "epoch": 1.947984786892114,
      "grad_norm": 0.49772346019744873,
      "learning_rate": 6.1442512338316985e-06,
      "loss": 0.0188,
      "step": 1190320
    },
    {
      "epoch": 1.9480175173307672,
      "grad_norm": 0.4421791434288025,
      "learning_rate": 6.144185341618182e-06,
      "loss": 0.0226,
      "step": 1190340
    },
    {
      "epoch": 1.9480502477694206,
      "grad_norm": 0.0995636060833931,
      "learning_rate": 6.144119449404664e-06,
      "loss": 0.0173,
      "step": 1190360
    },
    {
      "epoch": 1.948082978208074,
      "grad_norm": 0.6348304152488708,
      "learning_rate": 6.1440535571911475e-06,
      "loss": 0.0245,
      "step": 1190380
    },
    {
      "epoch": 1.9481157086467271,
      "grad_norm": 0.15253479778766632,
      "learning_rate": 6.1439876649776294e-06,
      "loss": 0.0181,
      "step": 1190400
    },
    {
      "epoch": 1.9481484390853807,
      "grad_norm": 0.671875536441803,
      "learning_rate": 6.143921772764113e-06,
      "loss": 0.0256,
      "step": 1190420
    },
    {
      "epoch": 1.9481811695240339,
      "grad_norm": 0.4269099533557892,
      "learning_rate": 6.143855880550596e-06,
      "loss": 0.0167,
      "step": 1190440
    },
    {
      "epoch": 1.9482138999626875,
      "grad_norm": 0.47697481513023376,
      "learning_rate": 6.1437899883370785e-06,
      "loss": 0.0193,
      "step": 1190460
    },
    {
      "epoch": 1.9482466304013406,
      "grad_norm": 0.4943183362483978,
      "learning_rate": 6.143724096123561e-06,
      "loss": 0.0156,
      "step": 1190480
    },
    {
      "epoch": 1.948279360839994,
      "grad_norm": 3.9533469676971436,
      "learning_rate": 6.143658203910045e-06,
      "loss": 0.0231,
      "step": 1190500
    },
    {
      "epoch": 1.9483120912786474,
      "grad_norm": 0.2586480975151062,
      "learning_rate": 6.143592311696527e-06,
      "loss": 0.0173,
      "step": 1190520
    },
    {
      "epoch": 1.9483448217173005,
      "grad_norm": 1.2720327377319336,
      "learning_rate": 6.14352641948301e-06,
      "loss": 0.0245,
      "step": 1190540
    },
    {
      "epoch": 1.948377552155954,
      "grad_norm": 0.42643871903419495,
      "learning_rate": 6.143460527269494e-06,
      "loss": 0.014,
      "step": 1190560
    },
    {
      "epoch": 1.9484102825946072,
      "grad_norm": 0.6565696597099304,
      "learning_rate": 6.143394635055976e-06,
      "loss": 0.0158,
      "step": 1190580
    },
    {
      "epoch": 1.9484430130332606,
      "grad_norm": 0.9454774856567383,
      "learning_rate": 6.143328742842459e-06,
      "loss": 0.029,
      "step": 1190600
    },
    {
      "epoch": 1.948475743471914,
      "grad_norm": 0.4442048966884613,
      "learning_rate": 6.143262850628941e-06,
      "loss": 0.0171,
      "step": 1190620
    },
    {
      "epoch": 1.9485084739105674,
      "grad_norm": 0.7619981169700623,
      "learning_rate": 6.143196958415425e-06,
      "loss": 0.0214,
      "step": 1190640
    },
    {
      "epoch": 1.9485412043492207,
      "grad_norm": 0.457402765750885,
      "learning_rate": 6.143131066201908e-06,
      "loss": 0.0208,
      "step": 1190660
    },
    {
      "epoch": 1.9485739347878739,
      "grad_norm": 0.6203154921531677,
      "learning_rate": 6.14306517398839e-06,
      "loss": 0.0151,
      "step": 1190680
    },
    {
      "epoch": 1.9486066652265275,
      "grad_norm": 0.758479118347168,
      "learning_rate": 6.142999281774873e-06,
      "loss": 0.0168,
      "step": 1190700
    },
    {
      "epoch": 1.9486393956651806,
      "grad_norm": 0.7943676710128784,
      "learning_rate": 6.142933389561357e-06,
      "loss": 0.0121,
      "step": 1190720
    },
    {
      "epoch": 1.948672126103834,
      "grad_norm": 0.38722190260887146,
      "learning_rate": 6.1428674973478386e-06,
      "loss": 0.0108,
      "step": 1190740
    },
    {
      "epoch": 1.9487048565424874,
      "grad_norm": 0.2714528739452362,
      "learning_rate": 6.142801605134322e-06,
      "loss": 0.0254,
      "step": 1190760
    },
    {
      "epoch": 1.9487375869811407,
      "grad_norm": 0.21301257610321045,
      "learning_rate": 6.142735712920804e-06,
      "loss": 0.0169,
      "step": 1190780
    },
    {
      "epoch": 1.9487703174197941,
      "grad_norm": 0.6093232035636902,
      "learning_rate": 6.142669820707288e-06,
      "loss": 0.0125,
      "step": 1190800
    },
    {
      "epoch": 1.9488030478584473,
      "grad_norm": 0.6388498544692993,
      "learning_rate": 6.14260392849377e-06,
      "loss": 0.0167,
      "step": 1190820
    },
    {
      "epoch": 1.9488357782971009,
      "grad_norm": 2.09081768989563,
      "learning_rate": 6.142538036280253e-06,
      "loss": 0.0222,
      "step": 1190840
    },
    {
      "epoch": 1.948868508735754,
      "grad_norm": 0.7366392612457275,
      "learning_rate": 6.142472144066736e-06,
      "loss": 0.0152,
      "step": 1190860
    },
    {
      "epoch": 1.9489012391744074,
      "grad_norm": 0.6134086847305298,
      "learning_rate": 6.1424062518532194e-06,
      "loss": 0.0177,
      "step": 1190880
    },
    {
      "epoch": 1.9489339696130608,
      "grad_norm": 0.1407318115234375,
      "learning_rate": 6.142340359639702e-06,
      "loss": 0.0181,
      "step": 1190900
    },
    {
      "epoch": 1.9489667000517141,
      "grad_norm": 1.9647890329360962,
      "learning_rate": 6.142274467426185e-06,
      "loss": 0.0113,
      "step": 1190920
    },
    {
      "epoch": 1.9489994304903675,
      "grad_norm": 0.7883482575416565,
      "learning_rate": 6.1422085752126685e-06,
      "loss": 0.0139,
      "step": 1190940
    },
    {
      "epoch": 1.9490321609290207,
      "grad_norm": 0.2247551530599594,
      "learning_rate": 6.14214268299915e-06,
      "loss": 0.0168,
      "step": 1190960
    },
    {
      "epoch": 1.9490648913676742,
      "grad_norm": 0.8138907551765442,
      "learning_rate": 6.142076790785634e-06,
      "loss": 0.0242,
      "step": 1190980
    },
    {
      "epoch": 1.9490976218063274,
      "grad_norm": 0.3880050480365753,
      "learning_rate": 6.142010898572116e-06,
      "loss": 0.0202,
      "step": 1191000
    },
    {
      "epoch": 1.9491303522449808,
      "grad_norm": 0.22822348773479462,
      "learning_rate": 6.1419450063585995e-06,
      "loss": 0.0186,
      "step": 1191020
    },
    {
      "epoch": 1.9491630826836341,
      "grad_norm": 0.2534317374229431,
      "learning_rate": 6.141879114145081e-06,
      "loss": 0.0138,
      "step": 1191040
    },
    {
      "epoch": 1.9491958131222875,
      "grad_norm": 0.46416187286376953,
      "learning_rate": 6.141813221931565e-06,
      "loss": 0.0183,
      "step": 1191060
    },
    {
      "epoch": 1.9492285435609409,
      "grad_norm": 0.09122330695390701,
      "learning_rate": 6.141747329718048e-06,
      "loss": 0.0231,
      "step": 1191080
    },
    {
      "epoch": 1.949261273999594,
      "grad_norm": 0.40555471181869507,
      "learning_rate": 6.14168143750453e-06,
      "loss": 0.0178,
      "step": 1191100
    },
    {
      "epoch": 1.9492940044382476,
      "grad_norm": 0.627442479133606,
      "learning_rate": 6.141615545291013e-06,
      "loss": 0.0207,
      "step": 1191120
    },
    {
      "epoch": 1.9493267348769008,
      "grad_norm": 0.7397102117538452,
      "learning_rate": 6.141549653077497e-06,
      "loss": 0.0229,
      "step": 1191140
    },
    {
      "epoch": 1.9493594653155542,
      "grad_norm": 0.09485360234975815,
      "learning_rate": 6.141483760863979e-06,
      "loss": 0.0103,
      "step": 1191160
    },
    {
      "epoch": 1.9493921957542075,
      "grad_norm": 1.1556931734085083,
      "learning_rate": 6.141417868650462e-06,
      "loss": 0.0142,
      "step": 1191180
    },
    {
      "epoch": 1.9494249261928607,
      "grad_norm": 0.4938666522502899,
      "learning_rate": 6.141351976436944e-06,
      "loss": 0.0197,
      "step": 1191200
    },
    {
      "epoch": 1.9494576566315143,
      "grad_norm": 0.22891055047512054,
      "learning_rate": 6.141286084223428e-06,
      "loss": 0.0198,
      "step": 1191220
    },
    {
      "epoch": 1.9494903870701674,
      "grad_norm": 1.2100528478622437,
      "learning_rate": 6.1412201920099104e-06,
      "loss": 0.019,
      "step": 1191240
    },
    {
      "epoch": 1.949523117508821,
      "grad_norm": 0.08577574789524078,
      "learning_rate": 6.141154299796393e-06,
      "loss": 0.0152,
      "step": 1191260
    },
    {
      "epoch": 1.9495558479474742,
      "grad_norm": 0.354885071516037,
      "learning_rate": 6.141088407582877e-06,
      "loss": 0.0127,
      "step": 1191280
    },
    {
      "epoch": 1.9495885783861275,
      "grad_norm": 0.23683544993400574,
      "learning_rate": 6.1410225153693595e-06,
      "loss": 0.0179,
      "step": 1191300
    },
    {
      "epoch": 1.949621308824781,
      "grad_norm": 0.9889534711837769,
      "learning_rate": 6.140956623155842e-06,
      "loss": 0.0181,
      "step": 1191320
    },
    {
      "epoch": 1.949654039263434,
      "grad_norm": 0.2933772802352905,
      "learning_rate": 6.140890730942325e-06,
      "loss": 0.0156,
      "step": 1191340
    },
    {
      "epoch": 1.9496867697020877,
      "grad_norm": 0.24377214908599854,
      "learning_rate": 6.1408248387288086e-06,
      "loss": 0.0202,
      "step": 1191360
    },
    {
      "epoch": 1.9497195001407408,
      "grad_norm": 0.21295395493507385,
      "learning_rate": 6.1407589465152905e-06,
      "loss": 0.0148,
      "step": 1191380
    },
    {
      "epoch": 1.9497522305793942,
      "grad_norm": 0.09418991208076477,
      "learning_rate": 6.140693054301774e-06,
      "loss": 0.0144,
      "step": 1191400
    },
    {
      "epoch": 1.9497849610180475,
      "grad_norm": 0.6636608839035034,
      "learning_rate": 6.140627162088256e-06,
      "loss": 0.0159,
      "step": 1191420
    },
    {
      "epoch": 1.949817691456701,
      "grad_norm": 0.2989365756511688,
      "learning_rate": 6.1405612698747395e-06,
      "loss": 0.0209,
      "step": 1191440
    },
    {
      "epoch": 1.9498504218953543,
      "grad_norm": 0.542559802532196,
      "learning_rate": 6.140495377661222e-06,
      "loss": 0.0226,
      "step": 1191460
    },
    {
      "epoch": 1.9498831523340074,
      "grad_norm": 0.3215559124946594,
      "learning_rate": 6.140429485447705e-06,
      "loss": 0.0192,
      "step": 1191480
    },
    {
      "epoch": 1.949915882772661,
      "grad_norm": 0.3906424641609192,
      "learning_rate": 6.140363593234188e-06,
      "loss": 0.0121,
      "step": 1191500
    },
    {
      "epoch": 1.9499486132113142,
      "grad_norm": 0.5316583514213562,
      "learning_rate": 6.140297701020671e-06,
      "loss": 0.0158,
      "step": 1191520
    },
    {
      "epoch": 1.9499813436499676,
      "grad_norm": 0.5468546748161316,
      "learning_rate": 6.140231808807153e-06,
      "loss": 0.0208,
      "step": 1191540
    },
    {
      "epoch": 1.950014074088621,
      "grad_norm": 0.16949787735939026,
      "learning_rate": 6.140165916593637e-06,
      "loss": 0.0125,
      "step": 1191560
    },
    {
      "epoch": 1.9500468045272743,
      "grad_norm": 0.3947286307811737,
      "learning_rate": 6.140100024380119e-06,
      "loss": 0.016,
      "step": 1191580
    },
    {
      "epoch": 1.9500795349659277,
      "grad_norm": 0.6676463484764099,
      "learning_rate": 6.140034132166602e-06,
      "loss": 0.0211,
      "step": 1191600
    },
    {
      "epoch": 1.9501122654045808,
      "grad_norm": 0.5079681873321533,
      "learning_rate": 6.139968239953086e-06,
      "loss": 0.0197,
      "step": 1191620
    },
    {
      "epoch": 1.9501449958432344,
      "grad_norm": 1.5977957248687744,
      "learning_rate": 6.139902347739568e-06,
      "loss": 0.0228,
      "step": 1191640
    },
    {
      "epoch": 1.9501777262818876,
      "grad_norm": 0.4908826947212219,
      "learning_rate": 6.139836455526051e-06,
      "loss": 0.017,
      "step": 1191660
    },
    {
      "epoch": 1.950210456720541,
      "grad_norm": 1.4847537279129028,
      "learning_rate": 6.139770563312534e-06,
      "loss": 0.019,
      "step": 1191680
    },
    {
      "epoch": 1.9502431871591943,
      "grad_norm": 0.5428668856620789,
      "learning_rate": 6.139704671099017e-06,
      "loss": 0.0128,
      "step": 1191700
    },
    {
      "epoch": 1.9502759175978477,
      "grad_norm": 0.26957786083221436,
      "learning_rate": 6.1396387788855e-06,
      "loss": 0.019,
      "step": 1191720
    },
    {
      "epoch": 1.950308648036501,
      "grad_norm": 0.7068458795547485,
      "learning_rate": 6.139572886671983e-06,
      "loss": 0.0216,
      "step": 1191740
    },
    {
      "epoch": 1.9503413784751542,
      "grad_norm": 0.2676162123680115,
      "learning_rate": 6.139506994458465e-06,
      "loss": 0.0106,
      "step": 1191760
    },
    {
      "epoch": 1.9503741089138078,
      "grad_norm": 0.566352367401123,
      "learning_rate": 6.139441102244949e-06,
      "loss": 0.0102,
      "step": 1191780
    },
    {
      "epoch": 1.950406839352461,
      "grad_norm": 0.2338067889213562,
      "learning_rate": 6.1393752100314305e-06,
      "loss": 0.0145,
      "step": 1191800
    },
    {
      "epoch": 1.9504395697911143,
      "grad_norm": 0.16208569705486298,
      "learning_rate": 6.139309317817914e-06,
      "loss": 0.0131,
      "step": 1191820
    },
    {
      "epoch": 1.9504723002297677,
      "grad_norm": 0.2529852092266083,
      "learning_rate": 6.139243425604397e-06,
      "loss": 0.0113,
      "step": 1191840
    },
    {
      "epoch": 1.950505030668421,
      "grad_norm": 7.658806800842285,
      "learning_rate": 6.13917753339088e-06,
      "loss": 0.0174,
      "step": 1191860
    },
    {
      "epoch": 1.9505377611070744,
      "grad_norm": 0.7308605313301086,
      "learning_rate": 6.139111641177362e-06,
      "loss": 0.0191,
      "step": 1191880
    },
    {
      "epoch": 1.9505704915457276,
      "grad_norm": 0.1770053654909134,
      "learning_rate": 6.139045748963846e-06,
      "loss": 0.0141,
      "step": 1191900
    },
    {
      "epoch": 1.9506032219843812,
      "grad_norm": 0.637886643409729,
      "learning_rate": 6.138979856750328e-06,
      "loss": 0.0203,
      "step": 1191920
    },
    {
      "epoch": 1.9506359524230343,
      "grad_norm": 0.35322806239128113,
      "learning_rate": 6.138913964536811e-06,
      "loss": 0.0187,
      "step": 1191940
    },
    {
      "epoch": 1.9506686828616877,
      "grad_norm": 0.34106531739234924,
      "learning_rate": 6.138848072323295e-06,
      "loss": 0.011,
      "step": 1191960
    },
    {
      "epoch": 1.950701413300341,
      "grad_norm": 0.13888196647167206,
      "learning_rate": 6.138782180109777e-06,
      "loss": 0.0203,
      "step": 1191980
    },
    {
      "epoch": 1.9507341437389942,
      "grad_norm": 0.4980379343032837,
      "learning_rate": 6.1387162878962605e-06,
      "loss": 0.0297,
      "step": 1192000
    },
    {
      "epoch": 1.9507668741776478,
      "grad_norm": 0.43938541412353516,
      "learning_rate": 6.138650395682742e-06,
      "loss": 0.0125,
      "step": 1192020
    },
    {
      "epoch": 1.950799604616301,
      "grad_norm": 0.31413784623146057,
      "learning_rate": 6.138584503469226e-06,
      "loss": 0.0132,
      "step": 1192040
    },
    {
      "epoch": 1.9508323350549546,
      "grad_norm": 0.7117068767547607,
      "learning_rate": 6.138518611255708e-06,
      "loss": 0.0223,
      "step": 1192060
    },
    {
      "epoch": 1.9508650654936077,
      "grad_norm": 0.4844360947608948,
      "learning_rate": 6.1384527190421914e-06,
      "loss": 0.0109,
      "step": 1192080
    },
    {
      "epoch": 1.950897795932261,
      "grad_norm": 0.5366451740264893,
      "learning_rate": 6.138386826828674e-06,
      "loss": 0.018,
      "step": 1192100
    },
    {
      "epoch": 1.9509305263709145,
      "grad_norm": 0.21466006338596344,
      "learning_rate": 6.138320934615157e-06,
      "loss": 0.0152,
      "step": 1192120
    },
    {
      "epoch": 1.9509632568095676,
      "grad_norm": 0.24123011529445648,
      "learning_rate": 6.13825504240164e-06,
      "loss": 0.0202,
      "step": 1192140
    },
    {
      "epoch": 1.9509959872482212,
      "grad_norm": 0.17240038514137268,
      "learning_rate": 6.138189150188123e-06,
      "loss": 0.0185,
      "step": 1192160
    },
    {
      "epoch": 1.9510287176868744,
      "grad_norm": 0.4735281765460968,
      "learning_rate": 6.138123257974605e-06,
      "loss": 0.021,
      "step": 1192180
    },
    {
      "epoch": 1.9510614481255277,
      "grad_norm": 0.13338404893875122,
      "learning_rate": 6.138057365761089e-06,
      "loss": 0.0161,
      "step": 1192200
    },
    {
      "epoch": 1.951094178564181,
      "grad_norm": 0.6365656852722168,
      "learning_rate": 6.137991473547571e-06,
      "loss": 0.0278,
      "step": 1192220
    },
    {
      "epoch": 1.9511269090028345,
      "grad_norm": 0.25473520159721375,
      "learning_rate": 6.137925581334054e-06,
      "loss": 0.0148,
      "step": 1192240
    },
    {
      "epoch": 1.9511596394414878,
      "grad_norm": 0.4134707450866699,
      "learning_rate": 6.137859689120537e-06,
      "loss": 0.0179,
      "step": 1192260
    },
    {
      "epoch": 1.951192369880141,
      "grad_norm": 0.46205562353134155,
      "learning_rate": 6.13779379690702e-06,
      "loss": 0.0152,
      "step": 1192280
    },
    {
      "epoch": 1.9512251003187946,
      "grad_norm": 1.0401464700698853,
      "learning_rate": 6.137727904693502e-06,
      "loss": 0.0279,
      "step": 1192300
    },
    {
      "epoch": 1.9512578307574477,
      "grad_norm": 0.3651329576969147,
      "learning_rate": 6.137662012479986e-06,
      "loss": 0.0234,
      "step": 1192320
    },
    {
      "epoch": 1.9512905611961011,
      "grad_norm": 1.1022206544876099,
      "learning_rate": 6.137596120266469e-06,
      "loss": 0.0211,
      "step": 1192340
    },
    {
      "epoch": 1.9513232916347545,
      "grad_norm": 0.10158554464578629,
      "learning_rate": 6.1375302280529515e-06,
      "loss": 0.016,
      "step": 1192360
    },
    {
      "epoch": 1.9513560220734079,
      "grad_norm": 1.2429924011230469,
      "learning_rate": 6.137464335839435e-06,
      "loss": 0.0167,
      "step": 1192380
    },
    {
      "epoch": 1.9513887525120612,
      "grad_norm": 0.38614094257354736,
      "learning_rate": 6.137398443625917e-06,
      "loss": 0.0198,
      "step": 1192400
    },
    {
      "epoch": 1.9514214829507144,
      "grad_norm": 0.23427583277225494,
      "learning_rate": 6.1373325514124006e-06,
      "loss": 0.0173,
      "step": 1192420
    },
    {
      "epoch": 1.951454213389368,
      "grad_norm": 0.7403954267501831,
      "learning_rate": 6.1372666591988824e-06,
      "loss": 0.0165,
      "step": 1192440
    },
    {
      "epoch": 1.9514869438280211,
      "grad_norm": 1.038651704788208,
      "learning_rate": 6.137200766985366e-06,
      "loss": 0.0111,
      "step": 1192460
    },
    {
      "epoch": 1.9515196742666745,
      "grad_norm": 0.10111349076032639,
      "learning_rate": 6.137134874771849e-06,
      "loss": 0.0262,
      "step": 1192480
    },
    {
      "epoch": 1.9515524047053279,
      "grad_norm": 1.073591709136963,
      "learning_rate": 6.1370689825583315e-06,
      "loss": 0.0168,
      "step": 1192500
    },
    {
      "epoch": 1.9515851351439812,
      "grad_norm": 0.09471403807401657,
      "learning_rate": 6.137003090344814e-06,
      "loss": 0.0145,
      "step": 1192520
    },
    {
      "epoch": 1.9516178655826346,
      "grad_norm": 0.15963095426559448,
      "learning_rate": 6.136937198131298e-06,
      "loss": 0.0149,
      "step": 1192540
    },
    {
      "epoch": 1.9516505960212878,
      "grad_norm": 0.7547267079353333,
      "learning_rate": 6.13687130591778e-06,
      "loss": 0.0245,
      "step": 1192560
    },
    {
      "epoch": 1.9516833264599414,
      "grad_norm": 0.798106849193573,
      "learning_rate": 6.136805413704263e-06,
      "loss": 0.0258,
      "step": 1192580
    },
    {
      "epoch": 1.9517160568985945,
      "grad_norm": 0.598590612411499,
      "learning_rate": 6.136739521490745e-06,
      "loss": 0.019,
      "step": 1192600
    },
    {
      "epoch": 1.9517487873372479,
      "grad_norm": 0.6457776427268982,
      "learning_rate": 6.136673629277229e-06,
      "loss": 0.0198,
      "step": 1192620
    },
    {
      "epoch": 1.9517815177759013,
      "grad_norm": 0.21686220169067383,
      "learning_rate": 6.1366077370637115e-06,
      "loss": 0.0127,
      "step": 1192640
    },
    {
      "epoch": 1.9518142482145546,
      "grad_norm": 0.43651941418647766,
      "learning_rate": 6.136541844850194e-06,
      "loss": 0.0188,
      "step": 1192660
    },
    {
      "epoch": 1.951846978653208,
      "grad_norm": 0.24954816699028015,
      "learning_rate": 6.136475952636678e-06,
      "loss": 0.0152,
      "step": 1192680
    },
    {
      "epoch": 1.9518797090918611,
      "grad_norm": 0.08586031198501587,
      "learning_rate": 6.136410060423161e-06,
      "loss": 0.0205,
      "step": 1192700
    },
    {
      "epoch": 1.9519124395305147,
      "grad_norm": 1.8095111846923828,
      "learning_rate": 6.136344168209643e-06,
      "loss": 0.0163,
      "step": 1192720
    },
    {
      "epoch": 1.951945169969168,
      "grad_norm": 0.40595731139183044,
      "learning_rate": 6.136278275996126e-06,
      "loss": 0.0148,
      "step": 1192740
    },
    {
      "epoch": 1.9519779004078213,
      "grad_norm": 0.22619278728961945,
      "learning_rate": 6.13621238378261e-06,
      "loss": 0.0181,
      "step": 1192760
    },
    {
      "epoch": 1.9520106308464746,
      "grad_norm": 0.539539098739624,
      "learning_rate": 6.1361464915690916e-06,
      "loss": 0.0224,
      "step": 1192780
    },
    {
      "epoch": 1.9520433612851278,
      "grad_norm": 0.5065821409225464,
      "learning_rate": 6.136080599355575e-06,
      "loss": 0.0153,
      "step": 1192800
    },
    {
      "epoch": 1.9520760917237814,
      "grad_norm": 0.3137078285217285,
      "learning_rate": 6.136014707142057e-06,
      "loss": 0.015,
      "step": 1192820
    },
    {
      "epoch": 1.9521088221624345,
      "grad_norm": 0.2101370394229889,
      "learning_rate": 6.135948814928541e-06,
      "loss": 0.017,
      "step": 1192840
    },
    {
      "epoch": 1.9521415526010881,
      "grad_norm": 0.077244833111763,
      "learning_rate": 6.135882922715023e-06,
      "loss": 0.0243,
      "step": 1192860
    },
    {
      "epoch": 1.9521742830397413,
      "grad_norm": 0.8786972761154175,
      "learning_rate": 6.135817030501506e-06,
      "loss": 0.0177,
      "step": 1192880
    },
    {
      "epoch": 1.9522070134783946,
      "grad_norm": 0.07760487496852875,
      "learning_rate": 6.135751138287989e-06,
      "loss": 0.0244,
      "step": 1192900
    },
    {
      "epoch": 1.952239743917048,
      "grad_norm": 0.0795121043920517,
      "learning_rate": 6.1356852460744724e-06,
      "loss": 0.0174,
      "step": 1192920
    },
    {
      "epoch": 1.9522724743557012,
      "grad_norm": 0.9239093065261841,
      "learning_rate": 6.135619353860954e-06,
      "loss": 0.0205,
      "step": 1192940
    },
    {
      "epoch": 1.9523052047943548,
      "grad_norm": 0.13077977299690247,
      "learning_rate": 6.135553461647438e-06,
      "loss": 0.0162,
      "step": 1192960
    },
    {
      "epoch": 1.952337935233008,
      "grad_norm": 0.1334000676870346,
      "learning_rate": 6.13548756943392e-06,
      "loss": 0.0214,
      "step": 1192980
    },
    {
      "epoch": 1.9523706656716613,
      "grad_norm": 0.13303396105766296,
      "learning_rate": 6.135421677220403e-06,
      "loss": 0.0184,
      "step": 1193000
    },
    {
      "epoch": 1.9524033961103147,
      "grad_norm": 1.0177366733551025,
      "learning_rate": 6.135355785006887e-06,
      "loss": 0.0151,
      "step": 1193020
    },
    {
      "epoch": 1.952436126548968,
      "grad_norm": 0.13790662586688995,
      "learning_rate": 6.135289892793369e-06,
      "loss": 0.0142,
      "step": 1193040
    },
    {
      "epoch": 1.9524688569876214,
      "grad_norm": 0.44867655634880066,
      "learning_rate": 6.1352240005798525e-06,
      "loss": 0.022,
      "step": 1193060
    },
    {
      "epoch": 1.9525015874262746,
      "grad_norm": 0.8557742834091187,
      "learning_rate": 6.135158108366334e-06,
      "loss": 0.0193,
      "step": 1193080
    },
    {
      "epoch": 1.9525343178649281,
      "grad_norm": 0.14380373060703278,
      "learning_rate": 6.135092216152818e-06,
      "loss": 0.0229,
      "step": 1193100
    },
    {
      "epoch": 1.9525670483035813,
      "grad_norm": 0.517127513885498,
      "learning_rate": 6.135026323939301e-06,
      "loss": 0.0204,
      "step": 1193120
    },
    {
      "epoch": 1.9525997787422347,
      "grad_norm": 0.7556421160697937,
      "learning_rate": 6.134960431725783e-06,
      "loss": 0.0269,
      "step": 1193140
    },
    {
      "epoch": 1.952632509180888,
      "grad_norm": 0.5264706611633301,
      "learning_rate": 6.134894539512266e-06,
      "loss": 0.0184,
      "step": 1193160
    },
    {
      "epoch": 1.9526652396195414,
      "grad_norm": 0.7931070923805237,
      "learning_rate": 6.13482864729875e-06,
      "loss": 0.0201,
      "step": 1193180
    },
    {
      "epoch": 1.9526979700581948,
      "grad_norm": 1.5342204570770264,
      "learning_rate": 6.134762755085232e-06,
      "loss": 0.0211,
      "step": 1193200
    },
    {
      "epoch": 1.952730700496848,
      "grad_norm": 0.29022952914237976,
      "learning_rate": 6.134696862871715e-06,
      "loss": 0.0182,
      "step": 1193220
    },
    {
      "epoch": 1.9527634309355015,
      "grad_norm": 0.7652283906936646,
      "learning_rate": 6.134630970658197e-06,
      "loss": 0.0225,
      "step": 1193240
    },
    {
      "epoch": 1.9527961613741547,
      "grad_norm": 0.6404750943183899,
      "learning_rate": 6.134565078444681e-06,
      "loss": 0.0169,
      "step": 1193260
    },
    {
      "epoch": 1.952828891812808,
      "grad_norm": 0.5072709918022156,
      "learning_rate": 6.1344991862311634e-06,
      "loss": 0.0218,
      "step": 1193280
    },
    {
      "epoch": 1.9528616222514614,
      "grad_norm": 1.4745064973831177,
      "learning_rate": 6.134433294017646e-06,
      "loss": 0.0181,
      "step": 1193300
    },
    {
      "epoch": 1.9528943526901148,
      "grad_norm": 0.2381965070962906,
      "learning_rate": 6.134367401804129e-06,
      "loss": 0.018,
      "step": 1193320
    },
    {
      "epoch": 1.9529270831287682,
      "grad_norm": 1.0605485439300537,
      "learning_rate": 6.1343015095906125e-06,
      "loss": 0.0185,
      "step": 1193340
    },
    {
      "epoch": 1.9529598135674213,
      "grad_norm": 0.36782148480415344,
      "learning_rate": 6.134235617377094e-06,
      "loss": 0.0182,
      "step": 1193360
    },
    {
      "epoch": 1.952992544006075,
      "grad_norm": 0.4558364152908325,
      "learning_rate": 6.134169725163578e-06,
      "loss": 0.0177,
      "step": 1193380
    },
    {
      "epoch": 1.953025274444728,
      "grad_norm": 0.6576103568077087,
      "learning_rate": 6.1341038329500616e-06,
      "loss": 0.0134,
      "step": 1193400
    },
    {
      "epoch": 1.9530580048833814,
      "grad_norm": 0.6725014448165894,
      "learning_rate": 6.1340379407365435e-06,
      "loss": 0.0143,
      "step": 1193420
    },
    {
      "epoch": 1.9530907353220348,
      "grad_norm": 0.16049031913280487,
      "learning_rate": 6.133972048523027e-06,
      "loss": 0.0135,
      "step": 1193440
    },
    {
      "epoch": 1.953123465760688,
      "grad_norm": 0.32878756523132324,
      "learning_rate": 6.133906156309509e-06,
      "loss": 0.0196,
      "step": 1193460
    },
    {
      "epoch": 1.9531561961993416,
      "grad_norm": 0.6773079633712769,
      "learning_rate": 6.1338402640959925e-06,
      "loss": 0.0257,
      "step": 1193480
    },
    {
      "epoch": 1.9531889266379947,
      "grad_norm": 0.3010435402393341,
      "learning_rate": 6.133774371882475e-06,
      "loss": 0.0157,
      "step": 1193500
    },
    {
      "epoch": 1.9532216570766483,
      "grad_norm": 0.43836215138435364,
      "learning_rate": 6.133708479668958e-06,
      "loss": 0.0144,
      "step": 1193520
    },
    {
      "epoch": 1.9532543875153014,
      "grad_norm": 0.37183985114097595,
      "learning_rate": 6.133642587455441e-06,
      "loss": 0.0126,
      "step": 1193540
    },
    {
      "epoch": 1.9532871179539548,
      "grad_norm": 0.22494786977767944,
      "learning_rate": 6.133576695241924e-06,
      "loss": 0.0127,
      "step": 1193560
    },
    {
      "epoch": 1.9533198483926082,
      "grad_norm": 0.339328408241272,
      "learning_rate": 6.133510803028406e-06,
      "loss": 0.0269,
      "step": 1193580
    },
    {
      "epoch": 1.9533525788312613,
      "grad_norm": 1.055712342262268,
      "learning_rate": 6.13344491081489e-06,
      "loss": 0.0198,
      "step": 1193600
    },
    {
      "epoch": 1.953385309269915,
      "grad_norm": 0.7092655897140503,
      "learning_rate": 6.133379018601372e-06,
      "loss": 0.0166,
      "step": 1193620
    },
    {
      "epoch": 1.953418039708568,
      "grad_norm": 0.7407519221305847,
      "learning_rate": 6.133313126387855e-06,
      "loss": 0.0151,
      "step": 1193640
    },
    {
      "epoch": 1.9534507701472215,
      "grad_norm": 0.4102766811847687,
      "learning_rate": 6.133247234174338e-06,
      "loss": 0.0178,
      "step": 1193660
    },
    {
      "epoch": 1.9534835005858748,
      "grad_norm": 0.6492825746536255,
      "learning_rate": 6.133181341960821e-06,
      "loss": 0.0252,
      "step": 1193680
    },
    {
      "epoch": 1.9535162310245282,
      "grad_norm": 0.8236875534057617,
      "learning_rate": 6.1331154497473035e-06,
      "loss": 0.0202,
      "step": 1193700
    },
    {
      "epoch": 1.9535489614631816,
      "grad_norm": 0.30203405022621155,
      "learning_rate": 6.133049557533787e-06,
      "loss": 0.0118,
      "step": 1193720
    },
    {
      "epoch": 1.9535816919018347,
      "grad_norm": 0.7943184971809387,
      "learning_rate": 6.13298366532027e-06,
      "loss": 0.0174,
      "step": 1193740
    },
    {
      "epoch": 1.9536144223404883,
      "grad_norm": 4.859832286834717,
      "learning_rate": 6.132917773106753e-06,
      "loss": 0.0173,
      "step": 1193760
    },
    {
      "epoch": 1.9536471527791415,
      "grad_norm": 0.743225634098053,
      "learning_rate": 6.132851880893236e-06,
      "loss": 0.015,
      "step": 1193780
    },
    {
      "epoch": 1.9536798832177948,
      "grad_norm": 0.35877564549446106,
      "learning_rate": 6.132785988679718e-06,
      "loss": 0.0106,
      "step": 1193800
    },
    {
      "epoch": 1.9537126136564482,
      "grad_norm": 0.1781625747680664,
      "learning_rate": 6.132720096466202e-06,
      "loss": 0.0198,
      "step": 1193820
    },
    {
      "epoch": 1.9537453440951016,
      "grad_norm": 0.697056233882904,
      "learning_rate": 6.1326542042526835e-06,
      "loss": 0.0246,
      "step": 1193840
    },
    {
      "epoch": 1.953778074533755,
      "grad_norm": 0.50569748878479,
      "learning_rate": 6.132588312039167e-06,
      "loss": 0.0197,
      "step": 1193860
    },
    {
      "epoch": 1.953810804972408,
      "grad_norm": 0.4178701341152191,
      "learning_rate": 6.132522419825649e-06,
      "loss": 0.0161,
      "step": 1193880
    },
    {
      "epoch": 1.9538435354110617,
      "grad_norm": 0.8989404439926147,
      "learning_rate": 6.132456527612133e-06,
      "loss": 0.0232,
      "step": 1193900
    },
    {
      "epoch": 1.9538762658497149,
      "grad_norm": 0.1820651888847351,
      "learning_rate": 6.132390635398615e-06,
      "loss": 0.0214,
      "step": 1193920
    },
    {
      "epoch": 1.9539089962883682,
      "grad_norm": 0.31671756505966187,
      "learning_rate": 6.132324743185099e-06,
      "loss": 0.0209,
      "step": 1193940
    },
    {
      "epoch": 1.9539417267270216,
      "grad_norm": 0.26415178179740906,
      "learning_rate": 6.132258850971581e-06,
      "loss": 0.0171,
      "step": 1193960
    },
    {
      "epoch": 1.953974457165675,
      "grad_norm": 0.7893696427345276,
      "learning_rate": 6.132192958758064e-06,
      "loss": 0.0153,
      "step": 1193980
    },
    {
      "epoch": 1.9540071876043283,
      "grad_norm": 0.4462808072566986,
      "learning_rate": 6.132127066544546e-06,
      "loss": 0.0247,
      "step": 1194000
    },
    {
      "epoch": 1.9540399180429815,
      "grad_norm": 1.2116854190826416,
      "learning_rate": 6.13206117433103e-06,
      "loss": 0.0192,
      "step": 1194020
    },
    {
      "epoch": 1.954072648481635,
      "grad_norm": 0.2779828608036041,
      "learning_rate": 6.131995282117512e-06,
      "loss": 0.0198,
      "step": 1194040
    },
    {
      "epoch": 1.9541053789202882,
      "grad_norm": 1.5673032999038696,
      "learning_rate": 6.131929389903995e-06,
      "loss": 0.0142,
      "step": 1194060
    },
    {
      "epoch": 1.9541381093589416,
      "grad_norm": 0.38580620288848877,
      "learning_rate": 6.131863497690479e-06,
      "loss": 0.0179,
      "step": 1194080
    },
    {
      "epoch": 1.954170839797595,
      "grad_norm": 0.5580668449401855,
      "learning_rate": 6.131797605476961e-06,
      "loss": 0.0188,
      "step": 1194100
    },
    {
      "epoch": 1.9542035702362484,
      "grad_norm": 1.3922396898269653,
      "learning_rate": 6.1317317132634444e-06,
      "loss": 0.0152,
      "step": 1194120
    },
    {
      "epoch": 1.9542363006749017,
      "grad_norm": 0.5365352034568787,
      "learning_rate": 6.131665821049927e-06,
      "loss": 0.0222,
      "step": 1194140
    },
    {
      "epoch": 1.9542690311135549,
      "grad_norm": 0.0964912474155426,
      "learning_rate": 6.13159992883641e-06,
      "loss": 0.0136,
      "step": 1194160
    },
    {
      "epoch": 1.9543017615522085,
      "grad_norm": 0.12602977454662323,
      "learning_rate": 6.131534036622893e-06,
      "loss": 0.023,
      "step": 1194180
    },
    {
      "epoch": 1.9543344919908616,
      "grad_norm": 1.1573148965835571,
      "learning_rate": 6.131468144409376e-06,
      "loss": 0.023,
      "step": 1194200
    },
    {
      "epoch": 1.954367222429515,
      "grad_norm": 1.0811177492141724,
      "learning_rate": 6.131402252195858e-06,
      "loss": 0.0129,
      "step": 1194220
    },
    {
      "epoch": 1.9543999528681684,
      "grad_norm": 0.7122905850410461,
      "learning_rate": 6.131336359982342e-06,
      "loss": 0.0219,
      "step": 1194240
    },
    {
      "epoch": 1.9544326833068215,
      "grad_norm": 0.4645248353481293,
      "learning_rate": 6.131270467768824e-06,
      "loss": 0.0171,
      "step": 1194260
    },
    {
      "epoch": 1.954465413745475,
      "grad_norm": 0.38681453466415405,
      "learning_rate": 6.131204575555307e-06,
      "loss": 0.0158,
      "step": 1194280
    },
    {
      "epoch": 1.9544981441841283,
      "grad_norm": 0.7247009873390198,
      "learning_rate": 6.13113868334179e-06,
      "loss": 0.0149,
      "step": 1194300
    },
    {
      "epoch": 1.9545308746227819,
      "grad_norm": 0.29801496863365173,
      "learning_rate": 6.131072791128273e-06,
      "loss": 0.018,
      "step": 1194320
    },
    {
      "epoch": 1.954563605061435,
      "grad_norm": 0.23919177055358887,
      "learning_rate": 6.1310068989147554e-06,
      "loss": 0.0178,
      "step": 1194340
    },
    {
      "epoch": 1.9545963355000884,
      "grad_norm": 0.6867833137512207,
      "learning_rate": 6.130941006701239e-06,
      "loss": 0.0172,
      "step": 1194360
    },
    {
      "epoch": 1.9546290659387417,
      "grad_norm": 0.6790853142738342,
      "learning_rate": 6.130875114487721e-06,
      "loss": 0.0178,
      "step": 1194380
    },
    {
      "epoch": 1.954661796377395,
      "grad_norm": 0.21121637523174286,
      "learning_rate": 6.1308092222742045e-06,
      "loss": 0.021,
      "step": 1194400
    },
    {
      "epoch": 1.9546945268160485,
      "grad_norm": 0.2082248032093048,
      "learning_rate": 6.130743330060688e-06,
      "loss": 0.0123,
      "step": 1194420
    },
    {
      "epoch": 1.9547272572547016,
      "grad_norm": 0.27492430806159973,
      "learning_rate": 6.13067743784717e-06,
      "loss": 0.0214,
      "step": 1194440
    },
    {
      "epoch": 1.954759987693355,
      "grad_norm": 0.09574352949857712,
      "learning_rate": 6.1306115456336536e-06,
      "loss": 0.0205,
      "step": 1194460
    },
    {
      "epoch": 1.9547927181320084,
      "grad_norm": 0.22741705179214478,
      "learning_rate": 6.1305456534201355e-06,
      "loss": 0.0176,
      "step": 1194480
    },
    {
      "epoch": 1.9548254485706618,
      "grad_norm": 0.2991965413093567,
      "learning_rate": 6.130479761206619e-06,
      "loss": 0.0246,
      "step": 1194500
    },
    {
      "epoch": 1.9548581790093151,
      "grad_norm": 0.124540314078331,
      "learning_rate": 6.130413868993102e-06,
      "loss": 0.0219,
      "step": 1194520
    },
    {
      "epoch": 1.9548909094479683,
      "grad_norm": 0.46504977345466614,
      "learning_rate": 6.1303479767795845e-06,
      "loss": 0.0108,
      "step": 1194540
    },
    {
      "epoch": 1.9549236398866219,
      "grad_norm": 0.0627337247133255,
      "learning_rate": 6.130282084566067e-06,
      "loss": 0.015,
      "step": 1194560
    },
    {
      "epoch": 1.954956370325275,
      "grad_norm": 0.571733295917511,
      "learning_rate": 6.130216192352551e-06,
      "loss": 0.0155,
      "step": 1194580
    },
    {
      "epoch": 1.9549891007639284,
      "grad_norm": 2.6241719722747803,
      "learning_rate": 6.130150300139033e-06,
      "loss": 0.0177,
      "step": 1194600
    },
    {
      "epoch": 1.9550218312025818,
      "grad_norm": 0.27674680948257446,
      "learning_rate": 6.130084407925516e-06,
      "loss": 0.0132,
      "step": 1194620
    },
    {
      "epoch": 1.9550545616412351,
      "grad_norm": 0.5004231929779053,
      "learning_rate": 6.130018515711998e-06,
      "loss": 0.0138,
      "step": 1194640
    },
    {
      "epoch": 1.9550872920798885,
      "grad_norm": 0.41409897804260254,
      "learning_rate": 6.129952623498482e-06,
      "loss": 0.0203,
      "step": 1194660
    },
    {
      "epoch": 1.9551200225185417,
      "grad_norm": 0.13625694811344147,
      "learning_rate": 6.1298867312849645e-06,
      "loss": 0.0167,
      "step": 1194680
    },
    {
      "epoch": 1.9551527529571953,
      "grad_norm": 0.27874556183815,
      "learning_rate": 6.129820839071447e-06,
      "loss": 0.0132,
      "step": 1194700
    },
    {
      "epoch": 1.9551854833958484,
      "grad_norm": 0.32382476329803467,
      "learning_rate": 6.12975494685793e-06,
      "loss": 0.0148,
      "step": 1194720
    },
    {
      "epoch": 1.9552182138345018,
      "grad_norm": 0.40899625420570374,
      "learning_rate": 6.129689054644414e-06,
      "loss": 0.0139,
      "step": 1194740
    },
    {
      "epoch": 1.9552509442731552,
      "grad_norm": 0.3043922185897827,
      "learning_rate": 6.1296231624308955e-06,
      "loss": 0.0181,
      "step": 1194760
    },
    {
      "epoch": 1.9552836747118085,
      "grad_norm": 0.6788812279701233,
      "learning_rate": 6.129557270217379e-06,
      "loss": 0.0155,
      "step": 1194780
    },
    {
      "epoch": 1.955316405150462,
      "grad_norm": 1.407356858253479,
      "learning_rate": 6.129491378003863e-06,
      "loss": 0.0128,
      "step": 1194800
    },
    {
      "epoch": 1.955349135589115,
      "grad_norm": 0.33646348118782043,
      "learning_rate": 6.1294254857903446e-06,
      "loss": 0.0277,
      "step": 1194820
    },
    {
      "epoch": 1.9553818660277686,
      "grad_norm": 0.1650344729423523,
      "learning_rate": 6.129359593576828e-06,
      "loss": 0.0163,
      "step": 1194840
    },
    {
      "epoch": 1.9554145964664218,
      "grad_norm": 0.20043940842151642,
      "learning_rate": 6.12929370136331e-06,
      "loss": 0.0177,
      "step": 1194860
    },
    {
      "epoch": 1.9554473269050752,
      "grad_norm": 0.5649013519287109,
      "learning_rate": 6.129227809149794e-06,
      "loss": 0.0175,
      "step": 1194880
    },
    {
      "epoch": 1.9554800573437285,
      "grad_norm": 0.17788013815879822,
      "learning_rate": 6.1291619169362755e-06,
      "loss": 0.0206,
      "step": 1194900
    },
    {
      "epoch": 1.955512787782382,
      "grad_norm": 0.23439203202724457,
      "learning_rate": 6.129096024722759e-06,
      "loss": 0.0123,
      "step": 1194920
    },
    {
      "epoch": 1.9555455182210353,
      "grad_norm": 0.4962097704410553,
      "learning_rate": 6.129030132509242e-06,
      "loss": 0.0141,
      "step": 1194940
    },
    {
      "epoch": 1.9555782486596884,
      "grad_norm": 0.6484092473983765,
      "learning_rate": 6.128964240295725e-06,
      "loss": 0.0156,
      "step": 1194960
    },
    {
      "epoch": 1.955610979098342,
      "grad_norm": 0.18946313858032227,
      "learning_rate": 6.128898348082207e-06,
      "loss": 0.0115,
      "step": 1194980
    },
    {
      "epoch": 1.9556437095369952,
      "grad_norm": 0.2375999242067337,
      "learning_rate": 6.128832455868691e-06,
      "loss": 0.017,
      "step": 1195000
    },
    {
      "epoch": 1.9556764399756486,
      "grad_norm": 0.5460687875747681,
      "learning_rate": 6.128766563655173e-06,
      "loss": 0.0187,
      "step": 1195020
    },
    {
      "epoch": 1.955709170414302,
      "grad_norm": 0.6702134013175964,
      "learning_rate": 6.128700671441656e-06,
      "loss": 0.0178,
      "step": 1195040
    },
    {
      "epoch": 1.955741900852955,
      "grad_norm": 0.2059040367603302,
      "learning_rate": 6.128634779228138e-06,
      "loss": 0.0156,
      "step": 1195060
    },
    {
      "epoch": 1.9557746312916087,
      "grad_norm": 0.5819776654243469,
      "learning_rate": 6.128568887014622e-06,
      "loss": 0.0214,
      "step": 1195080
    },
    {
      "epoch": 1.9558073617302618,
      "grad_norm": 0.28168684244155884,
      "learning_rate": 6.128502994801105e-06,
      "loss": 0.0127,
      "step": 1195100
    },
    {
      "epoch": 1.9558400921689154,
      "grad_norm": 0.22738859057426453,
      "learning_rate": 6.128437102587587e-06,
      "loss": 0.0169,
      "step": 1195120
    },
    {
      "epoch": 1.9558728226075686,
      "grad_norm": 1.4048147201538086,
      "learning_rate": 6.128371210374071e-06,
      "loss": 0.018,
      "step": 1195140
    },
    {
      "epoch": 1.955905553046222,
      "grad_norm": 0.5917647480964661,
      "learning_rate": 6.128305318160554e-06,
      "loss": 0.0152,
      "step": 1195160
    },
    {
      "epoch": 1.9559382834848753,
      "grad_norm": 0.42629432678222656,
      "learning_rate": 6.1282394259470364e-06,
      "loss": 0.0269,
      "step": 1195180
    },
    {
      "epoch": 1.9559710139235285,
      "grad_norm": 0.5177538394927979,
      "learning_rate": 6.128173533733519e-06,
      "loss": 0.0196,
      "step": 1195200
    },
    {
      "epoch": 1.956003744362182,
      "grad_norm": 1.0938953161239624,
      "learning_rate": 6.128107641520003e-06,
      "loss": 0.0219,
      "step": 1195220
    },
    {
      "epoch": 1.9560364748008352,
      "grad_norm": 0.4094216227531433,
      "learning_rate": 6.128041749306485e-06,
      "loss": 0.0283,
      "step": 1195240
    },
    {
      "epoch": 1.9560692052394886,
      "grad_norm": 0.38526132702827454,
      "learning_rate": 6.127975857092968e-06,
      "loss": 0.0186,
      "step": 1195260
    },
    {
      "epoch": 1.956101935678142,
      "grad_norm": 0.7845700979232788,
      "learning_rate": 6.12790996487945e-06,
      "loss": 0.0189,
      "step": 1195280
    },
    {
      "epoch": 1.9561346661167953,
      "grad_norm": 0.17494598031044006,
      "learning_rate": 6.127844072665934e-06,
      "loss": 0.018,
      "step": 1195300
    },
    {
      "epoch": 1.9561673965554487,
      "grad_norm": 0.2939433157444,
      "learning_rate": 6.1277781804524164e-06,
      "loss": 0.025,
      "step": 1195320
    },
    {
      "epoch": 1.9562001269941018,
      "grad_norm": 0.11155368387699127,
      "learning_rate": 6.127712288238899e-06,
      "loss": 0.0218,
      "step": 1195340
    },
    {
      "epoch": 1.9562328574327554,
      "grad_norm": 0.3399003744125366,
      "learning_rate": 6.127646396025382e-06,
      "loss": 0.0186,
      "step": 1195360
    },
    {
      "epoch": 1.9562655878714086,
      "grad_norm": 0.18348290026187897,
      "learning_rate": 6.1275805038118655e-06,
      "loss": 0.0208,
      "step": 1195380
    },
    {
      "epoch": 1.956298318310062,
      "grad_norm": 0.11277785152196884,
      "learning_rate": 6.127514611598347e-06,
      "loss": 0.017,
      "step": 1195400
    },
    {
      "epoch": 1.9563310487487153,
      "grad_norm": 0.20031628012657166,
      "learning_rate": 6.127448719384831e-06,
      "loss": 0.0122,
      "step": 1195420
    },
    {
      "epoch": 1.9563637791873687,
      "grad_norm": 1.4479714632034302,
      "learning_rate": 6.127382827171313e-06,
      "loss": 0.0176,
      "step": 1195440
    },
    {
      "epoch": 1.956396509626022,
      "grad_norm": 0.0938359797000885,
      "learning_rate": 6.1273169349577965e-06,
      "loss": 0.0147,
      "step": 1195460
    },
    {
      "epoch": 1.9564292400646752,
      "grad_norm": 0.3393042981624603,
      "learning_rate": 6.12725104274428e-06,
      "loss": 0.0159,
      "step": 1195480
    },
    {
      "epoch": 1.9564619705033288,
      "grad_norm": 0.48487958312034607,
      "learning_rate": 6.127185150530762e-06,
      "loss": 0.0165,
      "step": 1195500
    },
    {
      "epoch": 1.956494700941982,
      "grad_norm": 0.3072853684425354,
      "learning_rate": 6.1271192583172455e-06,
      "loss": 0.0284,
      "step": 1195520
    },
    {
      "epoch": 1.9565274313806353,
      "grad_norm": 0.3192421793937683,
      "learning_rate": 6.127053366103728e-06,
      "loss": 0.0159,
      "step": 1195540
    },
    {
      "epoch": 1.9565601618192887,
      "grad_norm": 1.6140429973602295,
      "learning_rate": 6.126987473890211e-06,
      "loss": 0.0171,
      "step": 1195560
    },
    {
      "epoch": 1.956592892257942,
      "grad_norm": 0.3078593611717224,
      "learning_rate": 6.126921581676694e-06,
      "loss": 0.0143,
      "step": 1195580
    },
    {
      "epoch": 1.9566256226965955,
      "grad_norm": 0.27194124460220337,
      "learning_rate": 6.126855689463177e-06,
      "loss": 0.0137,
      "step": 1195600
    },
    {
      "epoch": 1.9566583531352486,
      "grad_norm": 0.6350882053375244,
      "learning_rate": 6.126789797249659e-06,
      "loss": 0.0228,
      "step": 1195620
    },
    {
      "epoch": 1.9566910835739022,
      "grad_norm": 0.3187059760093689,
      "learning_rate": 6.126723905036143e-06,
      "loss": 0.0139,
      "step": 1195640
    },
    {
      "epoch": 1.9567238140125554,
      "grad_norm": 0.5424032807350159,
      "learning_rate": 6.126658012822625e-06,
      "loss": 0.0148,
      "step": 1195660
    },
    {
      "epoch": 1.9567565444512087,
      "grad_norm": 0.6885177493095398,
      "learning_rate": 6.126592120609108e-06,
      "loss": 0.0287,
      "step": 1195680
    },
    {
      "epoch": 1.956789274889862,
      "grad_norm": 0.17476186156272888,
      "learning_rate": 6.126526228395591e-06,
      "loss": 0.0185,
      "step": 1195700
    },
    {
      "epoch": 1.9568220053285155,
      "grad_norm": 0.45460912585258484,
      "learning_rate": 6.126460336182074e-06,
      "loss": 0.0241,
      "step": 1195720
    },
    {
      "epoch": 1.9568547357671688,
      "grad_norm": 0.20179757475852966,
      "learning_rate": 6.1263944439685565e-06,
      "loss": 0.0184,
      "step": 1195740
    },
    {
      "epoch": 1.956887466205822,
      "grad_norm": 0.6297143697738647,
      "learning_rate": 6.12632855175504e-06,
      "loss": 0.0111,
      "step": 1195760
    },
    {
      "epoch": 1.9569201966444756,
      "grad_norm": 0.3762264549732208,
      "learning_rate": 6.126262659541522e-06,
      "loss": 0.0211,
      "step": 1195780
    },
    {
      "epoch": 1.9569529270831287,
      "grad_norm": 1.2725204229354858,
      "learning_rate": 6.126196767328006e-06,
      "loss": 0.0208,
      "step": 1195800
    },
    {
      "epoch": 1.956985657521782,
      "grad_norm": 0.8002179861068726,
      "learning_rate": 6.1261308751144875e-06,
      "loss": 0.016,
      "step": 1195820
    },
    {
      "epoch": 1.9570183879604355,
      "grad_norm": 0.4418313503265381,
      "learning_rate": 6.126064982900971e-06,
      "loss": 0.0162,
      "step": 1195840
    },
    {
      "epoch": 1.9570511183990886,
      "grad_norm": 0.5950918793678284,
      "learning_rate": 6.125999090687455e-06,
      "loss": 0.0162,
      "step": 1195860
    },
    {
      "epoch": 1.9570838488377422,
      "grad_norm": 0.2816290557384491,
      "learning_rate": 6.1259331984739366e-06,
      "loss": 0.0149,
      "step": 1195880
    },
    {
      "epoch": 1.9571165792763954,
      "grad_norm": 0.34828025102615356,
      "learning_rate": 6.12586730626042e-06,
      "loss": 0.0211,
      "step": 1195900
    },
    {
      "epoch": 1.9571493097150487,
      "grad_norm": 0.3885686993598938,
      "learning_rate": 6.125801414046902e-06,
      "loss": 0.015,
      "step": 1195920
    },
    {
      "epoch": 1.9571820401537021,
      "grad_norm": 1.2403364181518555,
      "learning_rate": 6.125735521833386e-06,
      "loss": 0.0187,
      "step": 1195940
    },
    {
      "epoch": 1.9572147705923555,
      "grad_norm": 0.9105014204978943,
      "learning_rate": 6.125669629619868e-06,
      "loss": 0.0128,
      "step": 1195960
    },
    {
      "epoch": 1.9572475010310089,
      "grad_norm": 1.1837024688720703,
      "learning_rate": 6.125603737406351e-06,
      "loss": 0.0176,
      "step": 1195980
    },
    {
      "epoch": 1.957280231469662,
      "grad_norm": 0.10779908299446106,
      "learning_rate": 6.125537845192834e-06,
      "loss": 0.0193,
      "step": 1196000
    },
    {
      "epoch": 1.9573129619083156,
      "grad_norm": 0.30289599299430847,
      "learning_rate": 6.125471952979317e-06,
      "loss": 0.0106,
      "step": 1196020
    },
    {
      "epoch": 1.9573456923469688,
      "grad_norm": 1.7284409999847412,
      "learning_rate": 6.125406060765799e-06,
      "loss": 0.028,
      "step": 1196040
    },
    {
      "epoch": 1.9573784227856221,
      "grad_norm": 0.5367129445075989,
      "learning_rate": 6.125340168552283e-06,
      "loss": 0.013,
      "step": 1196060
    },
    {
      "epoch": 1.9574111532242755,
      "grad_norm": 0.5275208353996277,
      "learning_rate": 6.125274276338765e-06,
      "loss": 0.0274,
      "step": 1196080
    },
    {
      "epoch": 1.9574438836629289,
      "grad_norm": 0.13900673389434814,
      "learning_rate": 6.125208384125248e-06,
      "loss": 0.0203,
      "step": 1196100
    },
    {
      "epoch": 1.9574766141015822,
      "grad_norm": 0.5908636450767517,
      "learning_rate": 6.125142491911731e-06,
      "loss": 0.0202,
      "step": 1196120
    },
    {
      "epoch": 1.9575093445402354,
      "grad_norm": 2.573331594467163,
      "learning_rate": 6.125076599698214e-06,
      "loss": 0.0172,
      "step": 1196140
    },
    {
      "epoch": 1.957542074978889,
      "grad_norm": 0.9272427558898926,
      "learning_rate": 6.125010707484697e-06,
      "loss": 0.0241,
      "step": 1196160
    },
    {
      "epoch": 1.9575748054175421,
      "grad_norm": 0.15252196788787842,
      "learning_rate": 6.12494481527118e-06,
      "loss": 0.0134,
      "step": 1196180
    },
    {
      "epoch": 1.9576075358561955,
      "grad_norm": 0.3170059323310852,
      "learning_rate": 6.124878923057663e-06,
      "loss": 0.014,
      "step": 1196200
    },
    {
      "epoch": 1.9576402662948489,
      "grad_norm": 0.5030290484428406,
      "learning_rate": 6.124813030844146e-06,
      "loss": 0.0218,
      "step": 1196220
    },
    {
      "epoch": 1.9576729967335023,
      "grad_norm": 0.6904460191726685,
      "learning_rate": 6.124747138630629e-06,
      "loss": 0.0152,
      "step": 1196240
    },
    {
      "epoch": 1.9577057271721556,
      "grad_norm": 0.5310131907463074,
      "learning_rate": 6.124681246417111e-06,
      "loss": 0.0222,
      "step": 1196260
    },
    {
      "epoch": 1.9577384576108088,
      "grad_norm": 0.6101818680763245,
      "learning_rate": 6.124615354203595e-06,
      "loss": 0.0211,
      "step": 1196280
    },
    {
      "epoch": 1.9577711880494624,
      "grad_norm": 0.39614835381507874,
      "learning_rate": 6.124549461990077e-06,
      "loss": 0.0153,
      "step": 1196300
    },
    {
      "epoch": 1.9578039184881155,
      "grad_norm": 0.9453635215759277,
      "learning_rate": 6.12448356977656e-06,
      "loss": 0.0215,
      "step": 1196320
    },
    {
      "epoch": 1.957836648926769,
      "grad_norm": 0.3652271032333374,
      "learning_rate": 6.124417677563043e-06,
      "loss": 0.0132,
      "step": 1196340
    },
    {
      "epoch": 1.9578693793654223,
      "grad_norm": 0.26192837953567505,
      "learning_rate": 6.124351785349526e-06,
      "loss": 0.0232,
      "step": 1196360
    },
    {
      "epoch": 1.9579021098040756,
      "grad_norm": 0.5410754084587097,
      "learning_rate": 6.1242858931360084e-06,
      "loss": 0.0159,
      "step": 1196380
    },
    {
      "epoch": 1.957934840242729,
      "grad_norm": 0.6534922122955322,
      "learning_rate": 6.124220000922492e-06,
      "loss": 0.0154,
      "step": 1196400
    },
    {
      "epoch": 1.9579675706813822,
      "grad_norm": 0.5951136350631714,
      "learning_rate": 6.124154108708974e-06,
      "loss": 0.016,
      "step": 1196420
    },
    {
      "epoch": 1.9580003011200358,
      "grad_norm": 0.25000593066215515,
      "learning_rate": 6.1240882164954575e-06,
      "loss": 0.0211,
      "step": 1196440
    },
    {
      "epoch": 1.958033031558689,
      "grad_norm": 0.21329693496227264,
      "learning_rate": 6.124022324281939e-06,
      "loss": 0.0181,
      "step": 1196460
    },
    {
      "epoch": 1.9580657619973423,
      "grad_norm": 0.25018852949142456,
      "learning_rate": 6.123956432068423e-06,
      "loss": 0.018,
      "step": 1196480
    },
    {
      "epoch": 1.9580984924359957,
      "grad_norm": 0.13030679523944855,
      "learning_rate": 6.123890539854906e-06,
      "loss": 0.0179,
      "step": 1196500
    },
    {
      "epoch": 1.9581312228746488,
      "grad_norm": 0.4512559771537781,
      "learning_rate": 6.1238246476413885e-06,
      "loss": 0.0132,
      "step": 1196520
    },
    {
      "epoch": 1.9581639533133024,
      "grad_norm": 0.1830422580242157,
      "learning_rate": 6.123758755427872e-06,
      "loss": 0.0215,
      "step": 1196540
    },
    {
      "epoch": 1.9581966837519555,
      "grad_norm": 0.5239883661270142,
      "learning_rate": 6.123692863214355e-06,
      "loss": 0.0155,
      "step": 1196560
    },
    {
      "epoch": 1.9582294141906091,
      "grad_norm": 0.23746493458747864,
      "learning_rate": 6.1236269710008375e-06,
      "loss": 0.0123,
      "step": 1196580
    },
    {
      "epoch": 1.9582621446292623,
      "grad_norm": 0.7212526202201843,
      "learning_rate": 6.12356107878732e-06,
      "loss": 0.0208,
      "step": 1196600
    },
    {
      "epoch": 1.9582948750679157,
      "grad_norm": 0.9132381081581116,
      "learning_rate": 6.123495186573804e-06,
      "loss": 0.02,
      "step": 1196620
    },
    {
      "epoch": 1.958327605506569,
      "grad_norm": 1.1439861059188843,
      "learning_rate": 6.123429294360286e-06,
      "loss": 0.0253,
      "step": 1196640
    },
    {
      "epoch": 1.9583603359452222,
      "grad_norm": 0.3181154429912567,
      "learning_rate": 6.123363402146769e-06,
      "loss": 0.0207,
      "step": 1196660
    },
    {
      "epoch": 1.9583930663838758,
      "grad_norm": 0.6473036408424377,
      "learning_rate": 6.123297509933251e-06,
      "loss": 0.0187,
      "step": 1196680
    },
    {
      "epoch": 1.958425796822529,
      "grad_norm": 0.8791866302490234,
      "learning_rate": 6.123231617719735e-06,
      "loss": 0.025,
      "step": 1196700
    },
    {
      "epoch": 1.9584585272611823,
      "grad_norm": 0.46099260449409485,
      "learning_rate": 6.1231657255062175e-06,
      "loss": 0.0171,
      "step": 1196720
    },
    {
      "epoch": 1.9584912576998357,
      "grad_norm": 0.5362161993980408,
      "learning_rate": 6.1230998332927e-06,
      "loss": 0.021,
      "step": 1196740
    },
    {
      "epoch": 1.958523988138489,
      "grad_norm": 0.5520269870758057,
      "learning_rate": 6.123033941079183e-06,
      "loss": 0.0185,
      "step": 1196760
    },
    {
      "epoch": 1.9585567185771424,
      "grad_norm": 1.2596522569656372,
      "learning_rate": 6.122968048865667e-06,
      "loss": 0.0154,
      "step": 1196780
    },
    {
      "epoch": 1.9585894490157956,
      "grad_norm": 0.19525951147079468,
      "learning_rate": 6.1229021566521485e-06,
      "loss": 0.0173,
      "step": 1196800
    },
    {
      "epoch": 1.9586221794544492,
      "grad_norm": 0.49533382058143616,
      "learning_rate": 6.122836264438632e-06,
      "loss": 0.0116,
      "step": 1196820
    },
    {
      "epoch": 1.9586549098931023,
      "grad_norm": 0.12262652814388275,
      "learning_rate": 6.122770372225114e-06,
      "loss": 0.0157,
      "step": 1196840
    },
    {
      "epoch": 1.9586876403317557,
      "grad_norm": 0.4969538450241089,
      "learning_rate": 6.1227044800115976e-06,
      "loss": 0.0148,
      "step": 1196860
    },
    {
      "epoch": 1.958720370770409,
      "grad_norm": 0.6986263394355774,
      "learning_rate": 6.1226385877980795e-06,
      "loss": 0.0156,
      "step": 1196880
    },
    {
      "epoch": 1.9587531012090624,
      "grad_norm": 0.6885212063789368,
      "learning_rate": 6.122572695584563e-06,
      "loss": 0.0175,
      "step": 1196900
    },
    {
      "epoch": 1.9587858316477158,
      "grad_norm": 0.6084768772125244,
      "learning_rate": 6.122506803371047e-06,
      "loss": 0.0195,
      "step": 1196920
    },
    {
      "epoch": 1.958818562086369,
      "grad_norm": 0.37819576263427734,
      "learning_rate": 6.1224409111575285e-06,
      "loss": 0.0192,
      "step": 1196940
    },
    {
      "epoch": 1.9588512925250225,
      "grad_norm": 0.6704740524291992,
      "learning_rate": 6.122375018944012e-06,
      "loss": 0.0149,
      "step": 1196960
    },
    {
      "epoch": 1.9588840229636757,
      "grad_norm": 0.43209725618362427,
      "learning_rate": 6.122309126730495e-06,
      "loss": 0.013,
      "step": 1196980
    },
    {
      "epoch": 1.958916753402329,
      "grad_norm": 0.34688010811805725,
      "learning_rate": 6.122243234516978e-06,
      "loss": 0.021,
      "step": 1197000
    },
    {
      "epoch": 1.9589494838409824,
      "grad_norm": 0.2665746510028839,
      "learning_rate": 6.12217734230346e-06,
      "loss": 0.0141,
      "step": 1197020
    },
    {
      "epoch": 1.9589822142796358,
      "grad_norm": 0.34013187885284424,
      "learning_rate": 6.122111450089944e-06,
      "loss": 0.0239,
      "step": 1197040
    },
    {
      "epoch": 1.9590149447182892,
      "grad_norm": 0.98310786485672,
      "learning_rate": 6.122045557876426e-06,
      "loss": 0.0118,
      "step": 1197060
    },
    {
      "epoch": 1.9590476751569423,
      "grad_norm": 0.41862884163856506,
      "learning_rate": 6.121979665662909e-06,
      "loss": 0.0181,
      "step": 1197080
    },
    {
      "epoch": 1.959080405595596,
      "grad_norm": 0.72506183385849,
      "learning_rate": 6.121913773449391e-06,
      "loss": 0.0229,
      "step": 1197100
    },
    {
      "epoch": 1.959113136034249,
      "grad_norm": 0.23430675268173218,
      "learning_rate": 6.121847881235875e-06,
      "loss": 0.012,
      "step": 1197120
    },
    {
      "epoch": 1.9591458664729025,
      "grad_norm": 0.3767216205596924,
      "learning_rate": 6.121781989022358e-06,
      "loss": 0.0169,
      "step": 1197140
    },
    {
      "epoch": 1.9591785969115558,
      "grad_norm": 0.3065696060657501,
      "learning_rate": 6.12171609680884e-06,
      "loss": 0.0162,
      "step": 1197160
    },
    {
      "epoch": 1.9592113273502092,
      "grad_norm": 0.33517947793006897,
      "learning_rate": 6.121650204595323e-06,
      "loss": 0.0168,
      "step": 1197180
    },
    {
      "epoch": 1.9592440577888626,
      "grad_norm": 1.3976503610610962,
      "learning_rate": 6.121584312381807e-06,
      "loss": 0.0163,
      "step": 1197200
    },
    {
      "epoch": 1.9592767882275157,
      "grad_norm": 0.28176894783973694,
      "learning_rate": 6.121518420168289e-06,
      "loss": 0.0167,
      "step": 1197220
    },
    {
      "epoch": 1.9593095186661693,
      "grad_norm": 0.46928635239601135,
      "learning_rate": 6.121452527954772e-06,
      "loss": 0.014,
      "step": 1197240
    },
    {
      "epoch": 1.9593422491048225,
      "grad_norm": 0.6037676930427551,
      "learning_rate": 6.121386635741256e-06,
      "loss": 0.0174,
      "step": 1197260
    },
    {
      "epoch": 1.9593749795434758,
      "grad_norm": 0.17446652054786682,
      "learning_rate": 6.121320743527738e-06,
      "loss": 0.0172,
      "step": 1197280
    },
    {
      "epoch": 1.9594077099821292,
      "grad_norm": 0.5793338418006897,
      "learning_rate": 6.121254851314221e-06,
      "loss": 0.0204,
      "step": 1197300
    },
    {
      "epoch": 1.9594404404207824,
      "grad_norm": 3.4901933670043945,
      "learning_rate": 6.121188959100703e-06,
      "loss": 0.0177,
      "step": 1197320
    },
    {
      "epoch": 1.959473170859436,
      "grad_norm": 1.6654361486434937,
      "learning_rate": 6.121123066887187e-06,
      "loss": 0.0199,
      "step": 1197340
    },
    {
      "epoch": 1.959505901298089,
      "grad_norm": 0.7553606629371643,
      "learning_rate": 6.1210571746736695e-06,
      "loss": 0.0239,
      "step": 1197360
    },
    {
      "epoch": 1.9595386317367427,
      "grad_norm": 0.23210686445236206,
      "learning_rate": 6.120991282460152e-06,
      "loss": 0.0202,
      "step": 1197380
    },
    {
      "epoch": 1.9595713621753958,
      "grad_norm": 0.8244020342826843,
      "learning_rate": 6.120925390246635e-06,
      "loss": 0.0122,
      "step": 1197400
    },
    {
      "epoch": 1.9596040926140492,
      "grad_norm": 0.6209969520568848,
      "learning_rate": 6.1208594980331185e-06,
      "loss": 0.0154,
      "step": 1197420
    },
    {
      "epoch": 1.9596368230527026,
      "grad_norm": 0.23499074578285217,
      "learning_rate": 6.1207936058196e-06,
      "loss": 0.0143,
      "step": 1197440
    },
    {
      "epoch": 1.9596695534913557,
      "grad_norm": 0.6859135031700134,
      "learning_rate": 6.120727713606084e-06,
      "loss": 0.0134,
      "step": 1197460
    },
    {
      "epoch": 1.9597022839300093,
      "grad_norm": 0.7325655817985535,
      "learning_rate": 6.120661821392566e-06,
      "loss": 0.0186,
      "step": 1197480
    },
    {
      "epoch": 1.9597350143686625,
      "grad_norm": 0.48399773240089417,
      "learning_rate": 6.1205959291790495e-06,
      "loss": 0.0121,
      "step": 1197500
    },
    {
      "epoch": 1.9597677448073159,
      "grad_norm": 1.0877430438995361,
      "learning_rate": 6.120530036965532e-06,
      "loss": 0.0215,
      "step": 1197520
    },
    {
      "epoch": 1.9598004752459692,
      "grad_norm": 1.233222246170044,
      "learning_rate": 6.120464144752015e-06,
      "loss": 0.0158,
      "step": 1197540
    },
    {
      "epoch": 1.9598332056846226,
      "grad_norm": 0.3495780825614929,
      "learning_rate": 6.120398252538498e-06,
      "loss": 0.0168,
      "step": 1197560
    },
    {
      "epoch": 1.959865936123276,
      "grad_norm": 0.6680465340614319,
      "learning_rate": 6.120332360324981e-06,
      "loss": 0.0249,
      "step": 1197580
    },
    {
      "epoch": 1.9598986665619291,
      "grad_norm": 0.49275079369544983,
      "learning_rate": 6.120266468111464e-06,
      "loss": 0.0254,
      "step": 1197600
    },
    {
      "epoch": 1.9599313970005827,
      "grad_norm": 0.36211076378822327,
      "learning_rate": 6.120200575897947e-06,
      "loss": 0.0215,
      "step": 1197620
    },
    {
      "epoch": 1.9599641274392359,
      "grad_norm": 0.8539656400680542,
      "learning_rate": 6.12013468368443e-06,
      "loss": 0.0154,
      "step": 1197640
    },
    {
      "epoch": 1.9599968578778892,
      "grad_norm": 0.2825673520565033,
      "learning_rate": 6.120068791470912e-06,
      "loss": 0.0142,
      "step": 1197660
    },
    {
      "epoch": 1.9600295883165426,
      "grad_norm": 0.1005435436964035,
      "learning_rate": 6.120002899257396e-06,
      "loss": 0.0225,
      "step": 1197680
    },
    {
      "epoch": 1.960062318755196,
      "grad_norm": 7.3367815017700195,
      "learning_rate": 6.119937007043878e-06,
      "loss": 0.0128,
      "step": 1197700
    },
    {
      "epoch": 1.9600950491938494,
      "grad_norm": 0.6181791424751282,
      "learning_rate": 6.119871114830361e-06,
      "loss": 0.0182,
      "step": 1197720
    },
    {
      "epoch": 1.9601277796325025,
      "grad_norm": 0.9207283854484558,
      "learning_rate": 6.119805222616843e-06,
      "loss": 0.019,
      "step": 1197740
    },
    {
      "epoch": 1.960160510071156,
      "grad_norm": 0.6177857518196106,
      "learning_rate": 6.119739330403327e-06,
      "loss": 0.0193,
      "step": 1197760
    },
    {
      "epoch": 1.9601932405098093,
      "grad_norm": 0.21742364764213562,
      "learning_rate": 6.1196734381898095e-06,
      "loss": 0.0222,
      "step": 1197780
    },
    {
      "epoch": 1.9602259709484626,
      "grad_norm": 0.21940264105796814,
      "learning_rate": 6.119607545976293e-06,
      "loss": 0.0208,
      "step": 1197800
    },
    {
      "epoch": 1.960258701387116,
      "grad_norm": 0.22564642131328583,
      "learning_rate": 6.119541653762775e-06,
      "loss": 0.0153,
      "step": 1197820
    },
    {
      "epoch": 1.9602914318257694,
      "grad_norm": 0.2775866985321045,
      "learning_rate": 6.119475761549259e-06,
      "loss": 0.014,
      "step": 1197840
    },
    {
      "epoch": 1.9603241622644227,
      "grad_norm": 0.2546486556529999,
      "learning_rate": 6.1194098693357405e-06,
      "loss": 0.018,
      "step": 1197860
    },
    {
      "epoch": 1.960356892703076,
      "grad_norm": 0.28907325863838196,
      "learning_rate": 6.119343977122224e-06,
      "loss": 0.0193,
      "step": 1197880
    },
    {
      "epoch": 1.9603896231417295,
      "grad_norm": 0.7566267251968384,
      "learning_rate": 6.119278084908706e-06,
      "loss": 0.0193,
      "step": 1197900
    },
    {
      "epoch": 1.9604223535803826,
      "grad_norm": 0.40463417768478394,
      "learning_rate": 6.1192121926951896e-06,
      "loss": 0.0225,
      "step": 1197920
    },
    {
      "epoch": 1.960455084019036,
      "grad_norm": 0.9790937900543213,
      "learning_rate": 6.119146300481673e-06,
      "loss": 0.0258,
      "step": 1197940
    },
    {
      "epoch": 1.9604878144576894,
      "grad_norm": 1.2273212671279907,
      "learning_rate": 6.119080408268155e-06,
      "loss": 0.0172,
      "step": 1197960
    },
    {
      "epoch": 1.9605205448963428,
      "grad_norm": 0.47951540350914,
      "learning_rate": 6.119014516054639e-06,
      "loss": 0.0153,
      "step": 1197980
    },
    {
      "epoch": 1.9605532753349961,
      "grad_norm": 0.8303915858268738,
      "learning_rate": 6.118948623841121e-06,
      "loss": 0.0133,
      "step": 1198000
    },
    {
      "epoch": 1.9605860057736493,
      "grad_norm": 0.18075154721736908,
      "learning_rate": 6.118882731627604e-06,
      "loss": 0.0142,
      "step": 1198020
    },
    {
      "epoch": 1.9606187362123029,
      "grad_norm": 0.5394690632820129,
      "learning_rate": 6.118816839414087e-06,
      "loss": 0.0181,
      "step": 1198040
    },
    {
      "epoch": 1.960651466650956,
      "grad_norm": 0.19103744626045227,
      "learning_rate": 6.1187509472005704e-06,
      "loss": 0.0138,
      "step": 1198060
    },
    {
      "epoch": 1.9606841970896094,
      "grad_norm": 0.7694354057312012,
      "learning_rate": 6.118685054987052e-06,
      "loss": 0.023,
      "step": 1198080
    },
    {
      "epoch": 1.9607169275282628,
      "grad_norm": 0.5598762631416321,
      "learning_rate": 6.118619162773536e-06,
      "loss": 0.0192,
      "step": 1198100
    },
    {
      "epoch": 1.960749657966916,
      "grad_norm": 2.9938223361968994,
      "learning_rate": 6.118553270560018e-06,
      "loss": 0.0199,
      "step": 1198120
    },
    {
      "epoch": 1.9607823884055695,
      "grad_norm": 0.059906501322984695,
      "learning_rate": 6.118487378346501e-06,
      "loss": 0.0145,
      "step": 1198140
    },
    {
      "epoch": 1.9608151188442227,
      "grad_norm": 0.5990522503852844,
      "learning_rate": 6.118421486132984e-06,
      "loss": 0.0218,
      "step": 1198160
    },
    {
      "epoch": 1.9608478492828763,
      "grad_norm": 1.6576931476593018,
      "learning_rate": 6.118355593919467e-06,
      "loss": 0.0228,
      "step": 1198180
    },
    {
      "epoch": 1.9608805797215294,
      "grad_norm": 0.48095637559890747,
      "learning_rate": 6.11828970170595e-06,
      "loss": 0.0125,
      "step": 1198200
    },
    {
      "epoch": 1.9609133101601828,
      "grad_norm": 0.4984898567199707,
      "learning_rate": 6.118223809492433e-06,
      "loss": 0.0148,
      "step": 1198220
    },
    {
      "epoch": 1.9609460405988361,
      "grad_norm": 0.7201111912727356,
      "learning_rate": 6.118157917278915e-06,
      "loss": 0.0217,
      "step": 1198240
    },
    {
      "epoch": 1.9609787710374893,
      "grad_norm": 0.12334028631448746,
      "learning_rate": 6.118092025065399e-06,
      "loss": 0.0136,
      "step": 1198260
    },
    {
      "epoch": 1.961011501476143,
      "grad_norm": 0.8429381251335144,
      "learning_rate": 6.1180261328518806e-06,
      "loss": 0.0182,
      "step": 1198280
    },
    {
      "epoch": 1.961044231914796,
      "grad_norm": 0.4378378391265869,
      "learning_rate": 6.117960240638364e-06,
      "loss": 0.0206,
      "step": 1198300
    },
    {
      "epoch": 1.9610769623534494,
      "grad_norm": 0.28554248809814453,
      "learning_rate": 6.117894348424848e-06,
      "loss": 0.0228,
      "step": 1198320
    },
    {
      "epoch": 1.9611096927921028,
      "grad_norm": 0.9407123923301697,
      "learning_rate": 6.11782845621133e-06,
      "loss": 0.0251,
      "step": 1198340
    },
    {
      "epoch": 1.9611424232307562,
      "grad_norm": 0.3139934241771698,
      "learning_rate": 6.117762563997813e-06,
      "loss": 0.0112,
      "step": 1198360
    },
    {
      "epoch": 1.9611751536694095,
      "grad_norm": 0.2792436182498932,
      "learning_rate": 6.117696671784296e-06,
      "loss": 0.0171,
      "step": 1198380
    },
    {
      "epoch": 1.9612078841080627,
      "grad_norm": 0.40761852264404297,
      "learning_rate": 6.117630779570779e-06,
      "loss": 0.0158,
      "step": 1198400
    },
    {
      "epoch": 1.9612406145467163,
      "grad_norm": 0.44389933347702026,
      "learning_rate": 6.1175648873572614e-06,
      "loss": 0.0201,
      "step": 1198420
    },
    {
      "epoch": 1.9612733449853694,
      "grad_norm": 0.5770900249481201,
      "learning_rate": 6.117498995143745e-06,
      "loss": 0.0236,
      "step": 1198440
    },
    {
      "epoch": 1.9613060754240228,
      "grad_norm": 0.18995890021324158,
      "learning_rate": 6.117433102930227e-06,
      "loss": 0.0193,
      "step": 1198460
    },
    {
      "epoch": 1.9613388058626762,
      "grad_norm": 0.17559827864170074,
      "learning_rate": 6.1173672107167105e-06,
      "loss": 0.0157,
      "step": 1198480
    },
    {
      "epoch": 1.9613715363013295,
      "grad_norm": 0.4253300130367279,
      "learning_rate": 6.117301318503192e-06,
      "loss": 0.02,
      "step": 1198500
    },
    {
      "epoch": 1.961404266739983,
      "grad_norm": 0.321000874042511,
      "learning_rate": 6.117235426289676e-06,
      "loss": 0.0241,
      "step": 1198520
    },
    {
      "epoch": 1.961436997178636,
      "grad_norm": 0.4421299397945404,
      "learning_rate": 6.117169534076159e-06,
      "loss": 0.0205,
      "step": 1198540
    },
    {
      "epoch": 1.9614697276172897,
      "grad_norm": 0.5147904753684998,
      "learning_rate": 6.1171036418626415e-06,
      "loss": 0.0142,
      "step": 1198560
    },
    {
      "epoch": 1.9615024580559428,
      "grad_norm": 0.5515759587287903,
      "learning_rate": 6.117037749649124e-06,
      "loss": 0.0158,
      "step": 1198580
    },
    {
      "epoch": 1.9615351884945962,
      "grad_norm": 1.2885243892669678,
      "learning_rate": 6.116971857435608e-06,
      "loss": 0.014,
      "step": 1198600
    },
    {
      "epoch": 1.9615679189332496,
      "grad_norm": 0.32815316319465637,
      "learning_rate": 6.11690596522209e-06,
      "loss": 0.0186,
      "step": 1198620
    },
    {
      "epoch": 1.961600649371903,
      "grad_norm": 0.11782585829496384,
      "learning_rate": 6.116840073008573e-06,
      "loss": 0.0146,
      "step": 1198640
    },
    {
      "epoch": 1.9616333798105563,
      "grad_norm": 0.15678024291992188,
      "learning_rate": 6.116774180795057e-06,
      "loss": 0.014,
      "step": 1198660
    },
    {
      "epoch": 1.9616661102492094,
      "grad_norm": 0.47111430764198303,
      "learning_rate": 6.116708288581539e-06,
      "loss": 0.0192,
      "step": 1198680
    },
    {
      "epoch": 1.961698840687863,
      "grad_norm": 0.44729456305503845,
      "learning_rate": 6.116642396368022e-06,
      "loss": 0.0217,
      "step": 1198700
    },
    {
      "epoch": 1.9617315711265162,
      "grad_norm": 0.8142495155334473,
      "learning_rate": 6.116576504154504e-06,
      "loss": 0.0167,
      "step": 1198720
    },
    {
      "epoch": 1.9617643015651696,
      "grad_norm": 0.8280009031295776,
      "learning_rate": 6.116510611940988e-06,
      "loss": 0.0241,
      "step": 1198740
    },
    {
      "epoch": 1.961797032003823,
      "grad_norm": 2.038464307785034,
      "learning_rate": 6.11644471972747e-06,
      "loss": 0.0193,
      "step": 1198760
    },
    {
      "epoch": 1.961829762442476,
      "grad_norm": 0.19417378306388855,
      "learning_rate": 6.116378827513953e-06,
      "loss": 0.0201,
      "step": 1198780
    },
    {
      "epoch": 1.9618624928811297,
      "grad_norm": 0.44965460896492004,
      "learning_rate": 6.116312935300436e-06,
      "loss": 0.0134,
      "step": 1198800
    },
    {
      "epoch": 1.9618952233197828,
      "grad_norm": 0.26891282200813293,
      "learning_rate": 6.116247043086919e-06,
      "loss": 0.0213,
      "step": 1198820
    },
    {
      "epoch": 1.9619279537584364,
      "grad_norm": 0.5866296887397766,
      "learning_rate": 6.1161811508734015e-06,
      "loss": 0.0246,
      "step": 1198840
    },
    {
      "epoch": 1.9619606841970896,
      "grad_norm": 0.44086718559265137,
      "learning_rate": 6.116115258659885e-06,
      "loss": 0.0179,
      "step": 1198860
    },
    {
      "epoch": 1.961993414635743,
      "grad_norm": 0.4539427161216736,
      "learning_rate": 6.116049366446367e-06,
      "loss": 0.0158,
      "step": 1198880
    },
    {
      "epoch": 1.9620261450743963,
      "grad_norm": 0.25178858637809753,
      "learning_rate": 6.115983474232851e-06,
      "loss": 0.0162,
      "step": 1198900
    },
    {
      "epoch": 1.9620588755130495,
      "grad_norm": 1.5327783823013306,
      "learning_rate": 6.1159175820193325e-06,
      "loss": 0.0259,
      "step": 1198920
    },
    {
      "epoch": 1.962091605951703,
      "grad_norm": 0.7754977345466614,
      "learning_rate": 6.115851689805816e-06,
      "loss": 0.0224,
      "step": 1198940
    },
    {
      "epoch": 1.9621243363903562,
      "grad_norm": 1.0904903411865234,
      "learning_rate": 6.115785797592299e-06,
      "loss": 0.0242,
      "step": 1198960
    },
    {
      "epoch": 1.9621570668290096,
      "grad_norm": 0.1936054527759552,
      "learning_rate": 6.1157199053787815e-06,
      "loss": 0.0162,
      "step": 1198980
    },
    {
      "epoch": 1.962189797267663,
      "grad_norm": 0.5311370491981506,
      "learning_rate": 6.115654013165265e-06,
      "loss": 0.0208,
      "step": 1199000
    },
    {
      "epoch": 1.9622225277063163,
      "grad_norm": 0.9497743844985962,
      "learning_rate": 6.115588120951748e-06,
      "loss": 0.0179,
      "step": 1199020
    },
    {
      "epoch": 1.9622552581449697,
      "grad_norm": 0.18101496994495392,
      "learning_rate": 6.115522228738231e-06,
      "loss": 0.0183,
      "step": 1199040
    },
    {
      "epoch": 1.9622879885836229,
      "grad_norm": 0.743924617767334,
      "learning_rate": 6.115456336524713e-06,
      "loss": 0.0188,
      "step": 1199060
    },
    {
      "epoch": 1.9623207190222764,
      "grad_norm": 0.39552024006843567,
      "learning_rate": 6.115390444311197e-06,
      "loss": 0.0169,
      "step": 1199080
    },
    {
      "epoch": 1.9623534494609296,
      "grad_norm": 0.3571617007255554,
      "learning_rate": 6.115324552097679e-06,
      "loss": 0.0204,
      "step": 1199100
    },
    {
      "epoch": 1.962386179899583,
      "grad_norm": 0.603813886642456,
      "learning_rate": 6.115258659884162e-06,
      "loss": 0.0129,
      "step": 1199120
    },
    {
      "epoch": 1.9624189103382363,
      "grad_norm": 0.21201765537261963,
      "learning_rate": 6.115192767670644e-06,
      "loss": 0.0142,
      "step": 1199140
    },
    {
      "epoch": 1.9624516407768897,
      "grad_norm": 0.24450059235095978,
      "learning_rate": 6.115126875457128e-06,
      "loss": 0.0215,
      "step": 1199160
    },
    {
      "epoch": 1.962484371215543,
      "grad_norm": 1.5247373580932617,
      "learning_rate": 6.115060983243611e-06,
      "loss": 0.0162,
      "step": 1199180
    },
    {
      "epoch": 1.9625171016541962,
      "grad_norm": 0.4343429505825043,
      "learning_rate": 6.114995091030093e-06,
      "loss": 0.0182,
      "step": 1199200
    },
    {
      "epoch": 1.9625498320928498,
      "grad_norm": 1.451878547668457,
      "learning_rate": 6.114929198816576e-06,
      "loss": 0.0128,
      "step": 1199220
    },
    {
      "epoch": 1.962582562531503,
      "grad_norm": 0.5369346737861633,
      "learning_rate": 6.11486330660306e-06,
      "loss": 0.0102,
      "step": 1199240
    },
    {
      "epoch": 1.9626152929701564,
      "grad_norm": 0.10925296694040298,
      "learning_rate": 6.114797414389542e-06,
      "loss": 0.0188,
      "step": 1199260
    },
    {
      "epoch": 1.9626480234088097,
      "grad_norm": 0.2012617588043213,
      "learning_rate": 6.114731522176025e-06,
      "loss": 0.0181,
      "step": 1199280
    },
    {
      "epoch": 1.962680753847463,
      "grad_norm": 0.2334458976984024,
      "learning_rate": 6.114665629962507e-06,
      "loss": 0.0222,
      "step": 1199300
    },
    {
      "epoch": 1.9627134842861165,
      "grad_norm": 0.2629830241203308,
      "learning_rate": 6.114599737748991e-06,
      "loss": 0.0181,
      "step": 1199320
    },
    {
      "epoch": 1.9627462147247696,
      "grad_norm": 2.4727413654327393,
      "learning_rate": 6.114533845535473e-06,
      "loss": 0.0162,
      "step": 1199340
    },
    {
      "epoch": 1.9627789451634232,
      "grad_norm": 0.2881321310997009,
      "learning_rate": 6.114467953321956e-06,
      "loss": 0.0138,
      "step": 1199360
    },
    {
      "epoch": 1.9628116756020764,
      "grad_norm": 0.993804395198822,
      "learning_rate": 6.11440206110844e-06,
      "loss": 0.0219,
      "step": 1199380
    },
    {
      "epoch": 1.9628444060407297,
      "grad_norm": 0.5585334897041321,
      "learning_rate": 6.1143361688949225e-06,
      "loss": 0.0205,
      "step": 1199400
    },
    {
      "epoch": 1.9628771364793831,
      "grad_norm": 0.39157047867774963,
      "learning_rate": 6.114270276681405e-06,
      "loss": 0.0232,
      "step": 1199420
    },
    {
      "epoch": 1.9629098669180365,
      "grad_norm": 0.5430716872215271,
      "learning_rate": 6.114204384467888e-06,
      "loss": 0.0125,
      "step": 1199440
    },
    {
      "epoch": 1.9629425973566899,
      "grad_norm": 0.9487070441246033,
      "learning_rate": 6.1141384922543715e-06,
      "loss": 0.0166,
      "step": 1199460
    },
    {
      "epoch": 1.962975327795343,
      "grad_norm": 0.1854390650987625,
      "learning_rate": 6.114072600040853e-06,
      "loss": 0.0185,
      "step": 1199480
    },
    {
      "epoch": 1.9630080582339966,
      "grad_norm": 0.6410268545150757,
      "learning_rate": 6.114006707827337e-06,
      "loss": 0.0163,
      "step": 1199500
    },
    {
      "epoch": 1.9630407886726498,
      "grad_norm": 1.0869005918502808,
      "learning_rate": 6.113940815613819e-06,
      "loss": 0.0165,
      "step": 1199520
    },
    {
      "epoch": 1.9630735191113031,
      "grad_norm": 0.22131465375423431,
      "learning_rate": 6.1138749234003025e-06,
      "loss": 0.0192,
      "step": 1199540
    },
    {
      "epoch": 1.9631062495499565,
      "grad_norm": 0.3676617741584778,
      "learning_rate": 6.113809031186785e-06,
      "loss": 0.0186,
      "step": 1199560
    },
    {
      "epoch": 1.9631389799886096,
      "grad_norm": 0.5658361911773682,
      "learning_rate": 6.113743138973268e-06,
      "loss": 0.0202,
      "step": 1199580
    },
    {
      "epoch": 1.9631717104272632,
      "grad_norm": 0.549550473690033,
      "learning_rate": 6.113677246759751e-06,
      "loss": 0.0172,
      "step": 1199600
    },
    {
      "epoch": 1.9632044408659164,
      "grad_norm": 0.5979169607162476,
      "learning_rate": 6.113611354546234e-06,
      "loss": 0.0141,
      "step": 1199620
    },
    {
      "epoch": 1.96323717130457,
      "grad_norm": 0.4873744249343872,
      "learning_rate": 6.113545462332716e-06,
      "loss": 0.0199,
      "step": 1199640
    },
    {
      "epoch": 1.9632699017432231,
      "grad_norm": 0.7043904662132263,
      "learning_rate": 6.1134795701192e-06,
      "loss": 0.0208,
      "step": 1199660
    },
    {
      "epoch": 1.9633026321818765,
      "grad_norm": 0.5011950731277466,
      "learning_rate": 6.113413677905682e-06,
      "loss": 0.0149,
      "step": 1199680
    },
    {
      "epoch": 1.9633353626205299,
      "grad_norm": 0.49608567357063293,
      "learning_rate": 6.113347785692165e-06,
      "loss": 0.0151,
      "step": 1199700
    },
    {
      "epoch": 1.963368093059183,
      "grad_norm": 0.41980814933776855,
      "learning_rate": 6.113281893478649e-06,
      "loss": 0.0221,
      "step": 1199720
    },
    {
      "epoch": 1.9634008234978366,
      "grad_norm": 0.3717850148677826,
      "learning_rate": 6.113216001265131e-06,
      "loss": 0.0255,
      "step": 1199740
    },
    {
      "epoch": 1.9634335539364898,
      "grad_norm": 0.10369203984737396,
      "learning_rate": 6.113150109051614e-06,
      "loss": 0.0253,
      "step": 1199760
    },
    {
      "epoch": 1.9634662843751431,
      "grad_norm": 0.1830897331237793,
      "learning_rate": 6.113084216838096e-06,
      "loss": 0.0241,
      "step": 1199780
    },
    {
      "epoch": 1.9634990148137965,
      "grad_norm": 0.6425437331199646,
      "learning_rate": 6.11301832462458e-06,
      "loss": 0.0198,
      "step": 1199800
    },
    {
      "epoch": 1.96353174525245,
      "grad_norm": 0.346988707780838,
      "learning_rate": 6.1129524324110625e-06,
      "loss": 0.0257,
      "step": 1199820
    },
    {
      "epoch": 1.9635644756911033,
      "grad_norm": 0.5370664596557617,
      "learning_rate": 6.112886540197545e-06,
      "loss": 0.0139,
      "step": 1199840
    },
    {
      "epoch": 1.9635972061297564,
      "grad_norm": 0.2029714435338974,
      "learning_rate": 6.112820647984028e-06,
      "loss": 0.0189,
      "step": 1199860
    },
    {
      "epoch": 1.96362993656841,
      "grad_norm": 0.09711949527263641,
      "learning_rate": 6.112754755770512e-06,
      "loss": 0.018,
      "step": 1199880
    },
    {
      "epoch": 1.9636626670070632,
      "grad_norm": 0.683719277381897,
      "learning_rate": 6.1126888635569935e-06,
      "loss": 0.0244,
      "step": 1199900
    },
    {
      "epoch": 1.9636953974457165,
      "grad_norm": 0.6455564498901367,
      "learning_rate": 6.112622971343477e-06,
      "loss": 0.0261,
      "step": 1199920
    },
    {
      "epoch": 1.96372812788437,
      "grad_norm": 0.43538764119148254,
      "learning_rate": 6.112557079129959e-06,
      "loss": 0.0138,
      "step": 1199940
    },
    {
      "epoch": 1.9637608583230233,
      "grad_norm": 0.9148781299591064,
      "learning_rate": 6.1124911869164426e-06,
      "loss": 0.0157,
      "step": 1199960
    },
    {
      "epoch": 1.9637935887616766,
      "grad_norm": 0.5985924005508423,
      "learning_rate": 6.112425294702925e-06,
      "loss": 0.0169,
      "step": 1199980
    },
    {
      "epoch": 1.9638263192003298,
      "grad_norm": 0.42808422446250916,
      "learning_rate": 6.112359402489408e-06,
      "loss": 0.0206,
      "step": 1200000
    },
    {
      "epoch": 1.9638263192003298,
      "eval_loss": 0.009273970499634743,
      "eval_runtime": 6509.5941,
      "eval_samples_per_second": 157.899,
      "eval_steps_per_second": 15.79,
      "eval_sts-dev_pearson_cosine": 0.978515031221234,
      "eval_sts-dev_spearman_cosine": 0.8913727347989711,
      "step": 1200000
    },
    {
      "epoch": 1.9638590496389834,
      "grad_norm": 1.0884371995925903,
      "learning_rate": 6.112293510275891e-06,
      "loss": 0.0178,
      "step": 1200020
    },
    {
      "epoch": 1.9638917800776365,
      "grad_norm": 0.20996668934822083,
      "learning_rate": 6.112227618062374e-06,
      "loss": 0.0228,
      "step": 1200040
    },
    {
      "epoch": 1.96392451051629,
      "grad_norm": 1.6968610286712646,
      "learning_rate": 6.112161725848857e-06,
      "loss": 0.0119,
      "step": 1200060
    },
    {
      "epoch": 1.9639572409549433,
      "grad_norm": 0.20458005368709564,
      "learning_rate": 6.11209583363534e-06,
      "loss": 0.0234,
      "step": 1200080
    },
    {
      "epoch": 1.9639899713935967,
      "grad_norm": 0.17619973421096802,
      "learning_rate": 6.1120299414218234e-06,
      "loss": 0.017,
      "step": 1200100
    },
    {
      "epoch": 1.96402270183225,
      "grad_norm": 1.0104479789733887,
      "learning_rate": 6.111964049208305e-06,
      "loss": 0.0199,
      "step": 1200120
    },
    {
      "epoch": 1.9640554322709032,
      "grad_norm": 0.7332583665847778,
      "learning_rate": 6.111898156994789e-06,
      "loss": 0.0164,
      "step": 1200140
    },
    {
      "epoch": 1.9640881627095568,
      "grad_norm": 0.6537703275680542,
      "learning_rate": 6.111832264781271e-06,
      "loss": 0.0167,
      "step": 1200160
    },
    {
      "epoch": 1.96412089314821,
      "grad_norm": 0.875645637512207,
      "learning_rate": 6.111766372567754e-06,
      "loss": 0.0136,
      "step": 1200180
    },
    {
      "epoch": 1.9641536235868633,
      "grad_norm": 0.934483528137207,
      "learning_rate": 6.111700480354237e-06,
      "loss": 0.0173,
      "step": 1200200
    },
    {
      "epoch": 1.9641863540255167,
      "grad_norm": 0.2728157341480255,
      "learning_rate": 6.11163458814072e-06,
      "loss": 0.019,
      "step": 1200220
    },
    {
      "epoch": 1.96421908446417,
      "grad_norm": 0.4291536509990692,
      "learning_rate": 6.111568695927203e-06,
      "loss": 0.0133,
      "step": 1200240
    },
    {
      "epoch": 1.9642518149028234,
      "grad_norm": 0.45750999450683594,
      "learning_rate": 6.111502803713686e-06,
      "loss": 0.0201,
      "step": 1200260
    },
    {
      "epoch": 1.9642845453414766,
      "grad_norm": 0.8466161489486694,
      "learning_rate": 6.111436911500168e-06,
      "loss": 0.0163,
      "step": 1200280
    },
    {
      "epoch": 1.9643172757801302,
      "grad_norm": 0.6160518527030945,
      "learning_rate": 6.111371019286652e-06,
      "loss": 0.02,
      "step": 1200300
    },
    {
      "epoch": 1.9643500062187833,
      "grad_norm": 0.5731765627861023,
      "learning_rate": 6.1113051270731336e-06,
      "loss": 0.0133,
      "step": 1200320
    },
    {
      "epoch": 1.9643827366574367,
      "grad_norm": 0.10450895875692368,
      "learning_rate": 6.111239234859617e-06,
      "loss": 0.0161,
      "step": 1200340
    },
    {
      "epoch": 1.96441546709609,
      "grad_norm": 0.8272095918655396,
      "learning_rate": 6.1111733426461e-06,
      "loss": 0.0219,
      "step": 1200360
    },
    {
      "epoch": 1.9644481975347432,
      "grad_norm": 0.37155357003211975,
      "learning_rate": 6.111107450432583e-06,
      "loss": 0.0216,
      "step": 1200380
    },
    {
      "epoch": 1.9644809279733968,
      "grad_norm": 0.18837352097034454,
      "learning_rate": 6.111041558219066e-06,
      "loss": 0.0166,
      "step": 1200400
    },
    {
      "epoch": 1.96451365841205,
      "grad_norm": 1.0218825340270996,
      "learning_rate": 6.110975666005549e-06,
      "loss": 0.0232,
      "step": 1200420
    },
    {
      "epoch": 1.9645463888507035,
      "grad_norm": 2.328695774078369,
      "learning_rate": 6.110909773792032e-06,
      "loss": 0.0207,
      "step": 1200440
    },
    {
      "epoch": 1.9645791192893567,
      "grad_norm": 0.6734186410903931,
      "learning_rate": 6.1108438815785144e-06,
      "loss": 0.0145,
      "step": 1200460
    },
    {
      "epoch": 1.96461184972801,
      "grad_norm": 0.32950738072395325,
      "learning_rate": 6.110777989364998e-06,
      "loss": 0.0125,
      "step": 1200480
    },
    {
      "epoch": 1.9646445801666634,
      "grad_norm": 0.2386421114206314,
      "learning_rate": 6.11071209715148e-06,
      "loss": 0.0134,
      "step": 1200500
    },
    {
      "epoch": 1.9646773106053166,
      "grad_norm": 0.33223089575767517,
      "learning_rate": 6.1106462049379635e-06,
      "loss": 0.0136,
      "step": 1200520
    },
    {
      "epoch": 1.9647100410439702,
      "grad_norm": 0.3724604547023773,
      "learning_rate": 6.110580312724445e-06,
      "loss": 0.019,
      "step": 1200540
    },
    {
      "epoch": 1.9647427714826233,
      "grad_norm": 0.6132773160934448,
      "learning_rate": 6.110514420510929e-06,
      "loss": 0.0122,
      "step": 1200560
    },
    {
      "epoch": 1.9647755019212767,
      "grad_norm": 0.1723281294107437,
      "learning_rate": 6.110448528297412e-06,
      "loss": 0.0153,
      "step": 1200580
    },
    {
      "epoch": 1.96480823235993,
      "grad_norm": 0.636348307132721,
      "learning_rate": 6.1103826360838945e-06,
      "loss": 0.0189,
      "step": 1200600
    },
    {
      "epoch": 1.9648409627985834,
      "grad_norm": 1.1742054224014282,
      "learning_rate": 6.110316743870377e-06,
      "loss": 0.0176,
      "step": 1200620
    },
    {
      "epoch": 1.9648736932372368,
      "grad_norm": 0.4391030967235565,
      "learning_rate": 6.110250851656861e-06,
      "loss": 0.0206,
      "step": 1200640
    },
    {
      "epoch": 1.96490642367589,
      "grad_norm": 0.07152906805276871,
      "learning_rate": 6.110184959443343e-06,
      "loss": 0.0137,
      "step": 1200660
    },
    {
      "epoch": 1.9649391541145436,
      "grad_norm": 0.06028838828206062,
      "learning_rate": 6.110119067229826e-06,
      "loss": 0.0208,
      "step": 1200680
    },
    {
      "epoch": 1.9649718845531967,
      "grad_norm": 0.5939804911613464,
      "learning_rate": 6.110053175016308e-06,
      "loss": 0.024,
      "step": 1200700
    },
    {
      "epoch": 1.96500461499185,
      "grad_norm": 0.7266172766685486,
      "learning_rate": 6.109987282802792e-06,
      "loss": 0.0225,
      "step": 1200720
    },
    {
      "epoch": 1.9650373454305035,
      "grad_norm": 0.4252418279647827,
      "learning_rate": 6.109921390589274e-06,
      "loss": 0.0227,
      "step": 1200740
    },
    {
      "epoch": 1.9650700758691568,
      "grad_norm": 0.2391752004623413,
      "learning_rate": 6.109855498375757e-06,
      "loss": 0.0208,
      "step": 1200760
    },
    {
      "epoch": 1.9651028063078102,
      "grad_norm": 0.7611262202262878,
      "learning_rate": 6.109789606162241e-06,
      "loss": 0.0246,
      "step": 1200780
    },
    {
      "epoch": 1.9651355367464634,
      "grad_norm": 0.9499941468238831,
      "learning_rate": 6.109723713948723e-06,
      "loss": 0.0171,
      "step": 1200800
    },
    {
      "epoch": 1.965168267185117,
      "grad_norm": 0.48122847080230713,
      "learning_rate": 6.109657821735206e-06,
      "loss": 0.0163,
      "step": 1200820
    },
    {
      "epoch": 1.96520099762377,
      "grad_norm": 1.1052988767623901,
      "learning_rate": 6.109591929521689e-06,
      "loss": 0.0195,
      "step": 1200840
    },
    {
      "epoch": 1.9652337280624235,
      "grad_norm": 0.33682748675346375,
      "learning_rate": 6.109526037308172e-06,
      "loss": 0.0209,
      "step": 1200860
    },
    {
      "epoch": 1.9652664585010768,
      "grad_norm": 0.26005321741104126,
      "learning_rate": 6.1094601450946545e-06,
      "loss": 0.0196,
      "step": 1200880
    },
    {
      "epoch": 1.9652991889397302,
      "grad_norm": 1.2241308689117432,
      "learning_rate": 6.109394252881138e-06,
      "loss": 0.0203,
      "step": 1200900
    },
    {
      "epoch": 1.9653319193783836,
      "grad_norm": 0.3678514361381531,
      "learning_rate": 6.10932836066762e-06,
      "loss": 0.0148,
      "step": 1200920
    },
    {
      "epoch": 1.9653646498170367,
      "grad_norm": 0.3723576068878174,
      "learning_rate": 6.109262468454104e-06,
      "loss": 0.0211,
      "step": 1200940
    },
    {
      "epoch": 1.9653973802556903,
      "grad_norm": 0.5089500546455383,
      "learning_rate": 6.1091965762405855e-06,
      "loss": 0.0178,
      "step": 1200960
    },
    {
      "epoch": 1.9654301106943435,
      "grad_norm": 0.7188481688499451,
      "learning_rate": 6.109130684027069e-06,
      "loss": 0.0148,
      "step": 1200980
    },
    {
      "epoch": 1.9654628411329969,
      "grad_norm": 0.5308364629745483,
      "learning_rate": 6.109064791813552e-06,
      "loss": 0.0165,
      "step": 1201000
    },
    {
      "epoch": 1.9654955715716502,
      "grad_norm": 1.4618406295776367,
      "learning_rate": 6.1089988996000345e-06,
      "loss": 0.023,
      "step": 1201020
    },
    {
      "epoch": 1.9655283020103036,
      "grad_norm": 0.37937265634536743,
      "learning_rate": 6.108933007386517e-06,
      "loss": 0.0226,
      "step": 1201040
    },
    {
      "epoch": 1.965561032448957,
      "grad_norm": 1.165787935256958,
      "learning_rate": 6.108867115173001e-06,
      "loss": 0.0203,
      "step": 1201060
    },
    {
      "epoch": 1.9655937628876101,
      "grad_norm": 0.20635339617729187,
      "learning_rate": 6.108801222959483e-06,
      "loss": 0.0297,
      "step": 1201080
    },
    {
      "epoch": 1.9656264933262637,
      "grad_norm": 0.5972411036491394,
      "learning_rate": 6.108735330745966e-06,
      "loss": 0.0197,
      "step": 1201100
    },
    {
      "epoch": 1.9656592237649169,
      "grad_norm": 0.8089554309844971,
      "learning_rate": 6.10866943853245e-06,
      "loss": 0.0212,
      "step": 1201120
    },
    {
      "epoch": 1.9656919542035702,
      "grad_norm": 0.500662088394165,
      "learning_rate": 6.108603546318932e-06,
      "loss": 0.023,
      "step": 1201140
    },
    {
      "epoch": 1.9657246846422236,
      "grad_norm": 0.42287325859069824,
      "learning_rate": 6.108537654105415e-06,
      "loss": 0.0118,
      "step": 1201160
    },
    {
      "epoch": 1.9657574150808768,
      "grad_norm": 0.31616082787513733,
      "learning_rate": 6.108471761891897e-06,
      "loss": 0.024,
      "step": 1201180
    },
    {
      "epoch": 1.9657901455195304,
      "grad_norm": 0.22088102996349335,
      "learning_rate": 6.108405869678381e-06,
      "loss": 0.0113,
      "step": 1201200
    },
    {
      "epoch": 1.9658228759581835,
      "grad_norm": 0.3230409026145935,
      "learning_rate": 6.108339977464864e-06,
      "loss": 0.0115,
      "step": 1201220
    },
    {
      "epoch": 1.965855606396837,
      "grad_norm": 0.858190655708313,
      "learning_rate": 6.108274085251346e-06,
      "loss": 0.0231,
      "step": 1201240
    },
    {
      "epoch": 1.9658883368354902,
      "grad_norm": 0.11964454501867294,
      "learning_rate": 6.108208193037829e-06,
      "loss": 0.0123,
      "step": 1201260
    },
    {
      "epoch": 1.9659210672741436,
      "grad_norm": 0.4645513892173767,
      "learning_rate": 6.108142300824313e-06,
      "loss": 0.0279,
      "step": 1201280
    },
    {
      "epoch": 1.965953797712797,
      "grad_norm": 0.09970088303089142,
      "learning_rate": 6.108076408610795e-06,
      "loss": 0.0175,
      "step": 1201300
    },
    {
      "epoch": 1.9659865281514501,
      "grad_norm": 0.4142252504825592,
      "learning_rate": 6.108010516397278e-06,
      "loss": 0.0124,
      "step": 1201320
    },
    {
      "epoch": 1.9660192585901037,
      "grad_norm": 0.5447568893432617,
      "learning_rate": 6.10794462418376e-06,
      "loss": 0.0187,
      "step": 1201340
    },
    {
      "epoch": 1.9660519890287569,
      "grad_norm": 0.2969893217086792,
      "learning_rate": 6.107878731970244e-06,
      "loss": 0.0136,
      "step": 1201360
    },
    {
      "epoch": 1.9660847194674103,
      "grad_norm": 0.3269023597240448,
      "learning_rate": 6.107812839756726e-06,
      "loss": 0.0138,
      "step": 1201380
    },
    {
      "epoch": 1.9661174499060636,
      "grad_norm": 0.670044481754303,
      "learning_rate": 6.107746947543209e-06,
      "loss": 0.0171,
      "step": 1201400
    },
    {
      "epoch": 1.966150180344717,
      "grad_norm": 0.17505255341529846,
      "learning_rate": 6.107681055329692e-06,
      "loss": 0.0219,
      "step": 1201420
    },
    {
      "epoch": 1.9661829107833704,
      "grad_norm": 0.5176382064819336,
      "learning_rate": 6.1076151631161755e-06,
      "loss": 0.0157,
      "step": 1201440
    },
    {
      "epoch": 1.9662156412220235,
      "grad_norm": 0.829892098903656,
      "learning_rate": 6.107549270902658e-06,
      "loss": 0.0201,
      "step": 1201460
    },
    {
      "epoch": 1.9662483716606771,
      "grad_norm": 1.0119346380233765,
      "learning_rate": 6.107483378689141e-06,
      "loss": 0.0241,
      "step": 1201480
    },
    {
      "epoch": 1.9662811020993303,
      "grad_norm": 0.5019885301589966,
      "learning_rate": 6.1074174864756245e-06,
      "loss": 0.0198,
      "step": 1201500
    },
    {
      "epoch": 1.9663138325379836,
      "grad_norm": 0.6826300024986267,
      "learning_rate": 6.1073515942621064e-06,
      "loss": 0.0165,
      "step": 1201520
    },
    {
      "epoch": 1.966346562976637,
      "grad_norm": 0.36410143971443176,
      "learning_rate": 6.10728570204859e-06,
      "loss": 0.0127,
      "step": 1201540
    },
    {
      "epoch": 1.9663792934152904,
      "grad_norm": 0.25981444120407104,
      "learning_rate": 6.107219809835072e-06,
      "loss": 0.0249,
      "step": 1201560
    },
    {
      "epoch": 1.9664120238539438,
      "grad_norm": 1.6341129541397095,
      "learning_rate": 6.1071539176215555e-06,
      "loss": 0.0165,
      "step": 1201580
    },
    {
      "epoch": 1.966444754292597,
      "grad_norm": 0.5997673869132996,
      "learning_rate": 6.107088025408037e-06,
      "loss": 0.0201,
      "step": 1201600
    },
    {
      "epoch": 1.9664774847312505,
      "grad_norm": 0.2535148561000824,
      "learning_rate": 6.107022133194521e-06,
      "loss": 0.0253,
      "step": 1201620
    },
    {
      "epoch": 1.9665102151699037,
      "grad_norm": 0.45119595527648926,
      "learning_rate": 6.106956240981004e-06,
      "loss": 0.0192,
      "step": 1201640
    },
    {
      "epoch": 1.966542945608557,
      "grad_norm": 0.17866910994052887,
      "learning_rate": 6.1068903487674865e-06,
      "loss": 0.0192,
      "step": 1201660
    },
    {
      "epoch": 1.9665756760472104,
      "grad_norm": 0.8441129922866821,
      "learning_rate": 6.106824456553969e-06,
      "loss": 0.0178,
      "step": 1201680
    },
    {
      "epoch": 1.9666084064858638,
      "grad_norm": 0.8238416314125061,
      "learning_rate": 6.106758564340453e-06,
      "loss": 0.0173,
      "step": 1201700
    },
    {
      "epoch": 1.9666411369245171,
      "grad_norm": 0.9085993766784668,
      "learning_rate": 6.106692672126935e-06,
      "loss": 0.0161,
      "step": 1201720
    },
    {
      "epoch": 1.9666738673631703,
      "grad_norm": 0.6968375444412231,
      "learning_rate": 6.106626779913418e-06,
      "loss": 0.0157,
      "step": 1201740
    },
    {
      "epoch": 1.9667065978018239,
      "grad_norm": 0.33065521717071533,
      "learning_rate": 6.1065608876999e-06,
      "loss": 0.0206,
      "step": 1201760
    },
    {
      "epoch": 1.966739328240477,
      "grad_norm": 0.42716893553733826,
      "learning_rate": 6.106494995486384e-06,
      "loss": 0.0246,
      "step": 1201780
    },
    {
      "epoch": 1.9667720586791304,
      "grad_norm": 0.3528917729854584,
      "learning_rate": 6.1064291032728665e-06,
      "loss": 0.0123,
      "step": 1201800
    },
    {
      "epoch": 1.9668047891177838,
      "grad_norm": 0.5359640717506409,
      "learning_rate": 6.106363211059349e-06,
      "loss": 0.013,
      "step": 1201820
    },
    {
      "epoch": 1.966837519556437,
      "grad_norm": 0.5328733325004578,
      "learning_rate": 6.106297318845833e-06,
      "loss": 0.019,
      "step": 1201840
    },
    {
      "epoch": 1.9668702499950905,
      "grad_norm": 0.40396735072135925,
      "learning_rate": 6.1062314266323155e-06,
      "loss": 0.0183,
      "step": 1201860
    },
    {
      "epoch": 1.9669029804337437,
      "grad_norm": 0.4254080653190613,
      "learning_rate": 6.106165534418798e-06,
      "loss": 0.0147,
      "step": 1201880
    },
    {
      "epoch": 1.9669357108723973,
      "grad_norm": 0.41219303011894226,
      "learning_rate": 6.106099642205281e-06,
      "loss": 0.0131,
      "step": 1201900
    },
    {
      "epoch": 1.9669684413110504,
      "grad_norm": 0.4377391040325165,
      "learning_rate": 6.106033749991765e-06,
      "loss": 0.0194,
      "step": 1201920
    },
    {
      "epoch": 1.9670011717497038,
      "grad_norm": 0.7450125217437744,
      "learning_rate": 6.1059678577782465e-06,
      "loss": 0.0152,
      "step": 1201940
    },
    {
      "epoch": 1.9670339021883572,
      "grad_norm": 0.8167262673377991,
      "learning_rate": 6.10590196556473e-06,
      "loss": 0.0203,
      "step": 1201960
    },
    {
      "epoch": 1.9670666326270103,
      "grad_norm": 0.46147650480270386,
      "learning_rate": 6.105836073351212e-06,
      "loss": 0.0247,
      "step": 1201980
    },
    {
      "epoch": 1.967099363065664,
      "grad_norm": 1.2391042709350586,
      "learning_rate": 6.1057701811376956e-06,
      "loss": 0.0161,
      "step": 1202000
    },
    {
      "epoch": 1.967132093504317,
      "grad_norm": 1.094992756843567,
      "learning_rate": 6.105704288924178e-06,
      "loss": 0.0283,
      "step": 1202020
    },
    {
      "epoch": 1.9671648239429704,
      "grad_norm": 0.1414717733860016,
      "learning_rate": 6.105638396710661e-06,
      "loss": 0.0159,
      "step": 1202040
    },
    {
      "epoch": 1.9671975543816238,
      "grad_norm": 0.4388522207736969,
      "learning_rate": 6.105572504497144e-06,
      "loss": 0.0268,
      "step": 1202060
    },
    {
      "epoch": 1.9672302848202772,
      "grad_norm": 0.9056524634361267,
      "learning_rate": 6.105506612283627e-06,
      "loss": 0.0173,
      "step": 1202080
    },
    {
      "epoch": 1.9672630152589305,
      "grad_norm": 0.8727253675460815,
      "learning_rate": 6.105440720070109e-06,
      "loss": 0.0203,
      "step": 1202100
    },
    {
      "epoch": 1.9672957456975837,
      "grad_norm": 0.18943506479263306,
      "learning_rate": 6.105374827856593e-06,
      "loss": 0.0156,
      "step": 1202120
    },
    {
      "epoch": 1.9673284761362373,
      "grad_norm": 0.5148642063140869,
      "learning_rate": 6.105308935643075e-06,
      "loss": 0.0205,
      "step": 1202140
    },
    {
      "epoch": 1.9673612065748904,
      "grad_norm": 0.29236507415771484,
      "learning_rate": 6.105243043429558e-06,
      "loss": 0.0197,
      "step": 1202160
    },
    {
      "epoch": 1.9673939370135438,
      "grad_norm": 0.13296489417552948,
      "learning_rate": 6.105177151216042e-06,
      "loss": 0.0133,
      "step": 1202180
    },
    {
      "epoch": 1.9674266674521972,
      "grad_norm": 0.542712390422821,
      "learning_rate": 6.105111259002524e-06,
      "loss": 0.0168,
      "step": 1202200
    },
    {
      "epoch": 1.9674593978908506,
      "grad_norm": 0.06686697900295258,
      "learning_rate": 6.105045366789007e-06,
      "loss": 0.0161,
      "step": 1202220
    },
    {
      "epoch": 1.967492128329504,
      "grad_norm": 0.42558667063713074,
      "learning_rate": 6.10497947457549e-06,
      "loss": 0.0164,
      "step": 1202240
    },
    {
      "epoch": 1.967524858768157,
      "grad_norm": 0.1570553332567215,
      "learning_rate": 6.104913582361973e-06,
      "loss": 0.0125,
      "step": 1202260
    },
    {
      "epoch": 1.9675575892068107,
      "grad_norm": 0.27691659331321716,
      "learning_rate": 6.104847690148456e-06,
      "loss": 0.0169,
      "step": 1202280
    },
    {
      "epoch": 1.9675903196454638,
      "grad_norm": 0.29777073860168457,
      "learning_rate": 6.104781797934939e-06,
      "loss": 0.0123,
      "step": 1202300
    },
    {
      "epoch": 1.9676230500841172,
      "grad_norm": 3.3546292781829834,
      "learning_rate": 6.104715905721421e-06,
      "loss": 0.0165,
      "step": 1202320
    },
    {
      "epoch": 1.9676557805227706,
      "grad_norm": 0.6325172185897827,
      "learning_rate": 6.104650013507905e-06,
      "loss": 0.0181,
      "step": 1202340
    },
    {
      "epoch": 1.967688510961424,
      "grad_norm": 0.5138806104660034,
      "learning_rate": 6.104584121294387e-06,
      "loss": 0.0265,
      "step": 1202360
    },
    {
      "epoch": 1.9677212414000773,
      "grad_norm": 0.2647039294242859,
      "learning_rate": 6.10451822908087e-06,
      "loss": 0.0132,
      "step": 1202380
    },
    {
      "epoch": 1.9677539718387305,
      "grad_norm": 0.23932242393493652,
      "learning_rate": 6.104452336867353e-06,
      "loss": 0.0191,
      "step": 1202400
    },
    {
      "epoch": 1.967786702277384,
      "grad_norm": 0.32060447335243225,
      "learning_rate": 6.104386444653836e-06,
      "loss": 0.016,
      "step": 1202420
    },
    {
      "epoch": 1.9678194327160372,
      "grad_norm": 0.7713907957077026,
      "learning_rate": 6.104320552440318e-06,
      "loss": 0.0168,
      "step": 1202440
    },
    {
      "epoch": 1.9678521631546906,
      "grad_norm": 0.17847026884555817,
      "learning_rate": 6.104254660226802e-06,
      "loss": 0.0215,
      "step": 1202460
    },
    {
      "epoch": 1.967884893593344,
      "grad_norm": 0.10684507340192795,
      "learning_rate": 6.104188768013284e-06,
      "loss": 0.0122,
      "step": 1202480
    },
    {
      "epoch": 1.9679176240319973,
      "grad_norm": 0.28569021821022034,
      "learning_rate": 6.1041228757997675e-06,
      "loss": 0.0191,
      "step": 1202500
    },
    {
      "epoch": 1.9679503544706507,
      "grad_norm": 0.2827296257019043,
      "learning_rate": 6.104056983586251e-06,
      "loss": 0.0193,
      "step": 1202520
    },
    {
      "epoch": 1.9679830849093038,
      "grad_norm": 0.5878398418426514,
      "learning_rate": 6.103991091372733e-06,
      "loss": 0.0167,
      "step": 1202540
    },
    {
      "epoch": 1.9680158153479574,
      "grad_norm": 0.20914842188358307,
      "learning_rate": 6.1039251991592165e-06,
      "loss": 0.0127,
      "step": 1202560
    },
    {
      "epoch": 1.9680485457866106,
      "grad_norm": 0.2386854887008667,
      "learning_rate": 6.103859306945698e-06,
      "loss": 0.0119,
      "step": 1202580
    },
    {
      "epoch": 1.968081276225264,
      "grad_norm": 0.5005977153778076,
      "learning_rate": 6.103793414732182e-06,
      "loss": 0.0236,
      "step": 1202600
    },
    {
      "epoch": 1.9681140066639173,
      "grad_norm": 0.5421041250228882,
      "learning_rate": 6.103727522518664e-06,
      "loss": 0.0142,
      "step": 1202620
    },
    {
      "epoch": 1.9681467371025705,
      "grad_norm": 0.47120243310928345,
      "learning_rate": 6.1036616303051475e-06,
      "loss": 0.0142,
      "step": 1202640
    },
    {
      "epoch": 1.968179467541224,
      "grad_norm": 0.19663579761981964,
      "learning_rate": 6.10359573809163e-06,
      "loss": 0.0139,
      "step": 1202660
    },
    {
      "epoch": 1.9682121979798772,
      "grad_norm": 0.4279816448688507,
      "learning_rate": 6.103529845878113e-06,
      "loss": 0.0163,
      "step": 1202680
    },
    {
      "epoch": 1.9682449284185308,
      "grad_norm": 0.24474452435970306,
      "learning_rate": 6.103463953664596e-06,
      "loss": 0.0153,
      "step": 1202700
    },
    {
      "epoch": 1.968277658857184,
      "grad_norm": 0.18725956976413727,
      "learning_rate": 6.103398061451079e-06,
      "loss": 0.0129,
      "step": 1202720
    },
    {
      "epoch": 1.9683103892958373,
      "grad_norm": 1.5379778146743774,
      "learning_rate": 6.103332169237561e-06,
      "loss": 0.0178,
      "step": 1202740
    },
    {
      "epoch": 1.9683431197344907,
      "grad_norm": 0.3697604537010193,
      "learning_rate": 6.103266277024045e-06,
      "loss": 0.0157,
      "step": 1202760
    },
    {
      "epoch": 1.9683758501731439,
      "grad_norm": 0.38162097334861755,
      "learning_rate": 6.103200384810527e-06,
      "loss": 0.0146,
      "step": 1202780
    },
    {
      "epoch": 1.9684085806117975,
      "grad_norm": 3.053316116333008,
      "learning_rate": 6.10313449259701e-06,
      "loss": 0.0197,
      "step": 1202800
    },
    {
      "epoch": 1.9684413110504506,
      "grad_norm": 0.4813607335090637,
      "learning_rate": 6.103068600383493e-06,
      "loss": 0.0153,
      "step": 1202820
    },
    {
      "epoch": 1.968474041489104,
      "grad_norm": 0.7338922619819641,
      "learning_rate": 6.103002708169976e-06,
      "loss": 0.0167,
      "step": 1202840
    },
    {
      "epoch": 1.9685067719277574,
      "grad_norm": 0.36976340413093567,
      "learning_rate": 6.1029368159564585e-06,
      "loss": 0.0224,
      "step": 1202860
    },
    {
      "epoch": 1.9685395023664107,
      "grad_norm": 0.9429665207862854,
      "learning_rate": 6.102870923742942e-06,
      "loss": 0.0164,
      "step": 1202880
    },
    {
      "epoch": 1.968572232805064,
      "grad_norm": 0.9816731214523315,
      "learning_rate": 6.102805031529425e-06,
      "loss": 0.0176,
      "step": 1202900
    },
    {
      "epoch": 1.9686049632437173,
      "grad_norm": 0.44095349311828613,
      "learning_rate": 6.1027391393159075e-06,
      "loss": 0.0207,
      "step": 1202920
    },
    {
      "epoch": 1.9686376936823708,
      "grad_norm": 0.10045704245567322,
      "learning_rate": 6.102673247102391e-06,
      "loss": 0.02,
      "step": 1202940
    },
    {
      "epoch": 1.968670424121024,
      "grad_norm": 11.907007217407227,
      "learning_rate": 6.102607354888873e-06,
      "loss": 0.0123,
      "step": 1202960
    },
    {
      "epoch": 1.9687031545596774,
      "grad_norm": 0.20045188069343567,
      "learning_rate": 6.102541462675357e-06,
      "loss": 0.0186,
      "step": 1202980
    },
    {
      "epoch": 1.9687358849983307,
      "grad_norm": 0.48919573426246643,
      "learning_rate": 6.1024755704618385e-06,
      "loss": 0.0087,
      "step": 1203000
    },
    {
      "epoch": 1.9687686154369841,
      "grad_norm": 0.048362746834754944,
      "learning_rate": 6.102409678248322e-06,
      "loss": 0.0204,
      "step": 1203020
    },
    {
      "epoch": 1.9688013458756375,
      "grad_norm": 0.2685486674308777,
      "learning_rate": 6.102343786034805e-06,
      "loss": 0.0142,
      "step": 1203040
    },
    {
      "epoch": 1.9688340763142906,
      "grad_norm": 0.4754716455936432,
      "learning_rate": 6.1022778938212876e-06,
      "loss": 0.0186,
      "step": 1203060
    },
    {
      "epoch": 1.9688668067529442,
      "grad_norm": 0.5001397132873535,
      "learning_rate": 6.10221200160777e-06,
      "loss": 0.0163,
      "step": 1203080
    },
    {
      "epoch": 1.9688995371915974,
      "grad_norm": 0.39170530438423157,
      "learning_rate": 6.102146109394254e-06,
      "loss": 0.0256,
      "step": 1203100
    },
    {
      "epoch": 1.9689322676302508,
      "grad_norm": 0.6976650357246399,
      "learning_rate": 6.102080217180736e-06,
      "loss": 0.0241,
      "step": 1203120
    },
    {
      "epoch": 1.9689649980689041,
      "grad_norm": 0.4719323515892029,
      "learning_rate": 6.102014324967219e-06,
      "loss": 0.0226,
      "step": 1203140
    },
    {
      "epoch": 1.9689977285075575,
      "grad_norm": 0.3865649104118347,
      "learning_rate": 6.101948432753701e-06,
      "loss": 0.0164,
      "step": 1203160
    },
    {
      "epoch": 1.9690304589462109,
      "grad_norm": 1.1506943702697754,
      "learning_rate": 6.101882540540185e-06,
      "loss": 0.0169,
      "step": 1203180
    },
    {
      "epoch": 1.969063189384864,
      "grad_norm": 0.6885486841201782,
      "learning_rate": 6.101816648326668e-06,
      "loss": 0.0233,
      "step": 1203200
    },
    {
      "epoch": 1.9690959198235176,
      "grad_norm": 0.2752250134944916,
      "learning_rate": 6.10175075611315e-06,
      "loss": 0.0087,
      "step": 1203220
    },
    {
      "epoch": 1.9691286502621708,
      "grad_norm": 0.12184492498636246,
      "learning_rate": 6.101684863899634e-06,
      "loss": 0.0145,
      "step": 1203240
    },
    {
      "epoch": 1.9691613807008241,
      "grad_norm": 0.4446180462837219,
      "learning_rate": 6.101618971686117e-06,
      "loss": 0.0122,
      "step": 1203260
    },
    {
      "epoch": 1.9691941111394775,
      "grad_norm": 0.3864479660987854,
      "learning_rate": 6.101553079472599e-06,
      "loss": 0.0232,
      "step": 1203280
    },
    {
      "epoch": 1.9692268415781309,
      "grad_norm": 0.8661083579063416,
      "learning_rate": 6.101487187259082e-06,
      "loss": 0.014,
      "step": 1203300
    },
    {
      "epoch": 1.9692595720167843,
      "grad_norm": 0.3836638927459717,
      "learning_rate": 6.101421295045566e-06,
      "loss": 0.0312,
      "step": 1203320
    },
    {
      "epoch": 1.9692923024554374,
      "grad_norm": 0.23147141933441162,
      "learning_rate": 6.101355402832048e-06,
      "loss": 0.0175,
      "step": 1203340
    },
    {
      "epoch": 1.969325032894091,
      "grad_norm": 0.2888532876968384,
      "learning_rate": 6.101289510618531e-06,
      "loss": 0.0145,
      "step": 1203360
    },
    {
      "epoch": 1.9693577633327441,
      "grad_norm": 0.9999822974205017,
      "learning_rate": 6.101223618405013e-06,
      "loss": 0.0207,
      "step": 1203380
    },
    {
      "epoch": 1.9693904937713975,
      "grad_norm": 1.982193112373352,
      "learning_rate": 6.101157726191497e-06,
      "loss": 0.0152,
      "step": 1203400
    },
    {
      "epoch": 1.969423224210051,
      "grad_norm": 0.7943559288978577,
      "learning_rate": 6.101091833977979e-06,
      "loss": 0.0149,
      "step": 1203420
    },
    {
      "epoch": 1.969455954648704,
      "grad_norm": 0.3715842366218567,
      "learning_rate": 6.101025941764462e-06,
      "loss": 0.0143,
      "step": 1203440
    },
    {
      "epoch": 1.9694886850873576,
      "grad_norm": 0.08217936009168625,
      "learning_rate": 6.100960049550945e-06,
      "loss": 0.0141,
      "step": 1203460
    },
    {
      "epoch": 1.9695214155260108,
      "grad_norm": 0.30412641167640686,
      "learning_rate": 6.1008941573374285e-06,
      "loss": 0.0152,
      "step": 1203480
    },
    {
      "epoch": 1.9695541459646644,
      "grad_norm": 0.467634916305542,
      "learning_rate": 6.10082826512391e-06,
      "loss": 0.0199,
      "step": 1203500
    },
    {
      "epoch": 1.9695868764033175,
      "grad_norm": 3.2741687297821045,
      "learning_rate": 6.100762372910394e-06,
      "loss": 0.0217,
      "step": 1203520
    },
    {
      "epoch": 1.969619606841971,
      "grad_norm": 0.43901705741882324,
      "learning_rate": 6.100696480696876e-06,
      "loss": 0.015,
      "step": 1203540
    },
    {
      "epoch": 1.9696523372806243,
      "grad_norm": 0.1922110915184021,
      "learning_rate": 6.1006305884833594e-06,
      "loss": 0.0149,
      "step": 1203560
    },
    {
      "epoch": 1.9696850677192774,
      "grad_norm": 0.5980851650238037,
      "learning_rate": 6.100564696269843e-06,
      "loss": 0.0171,
      "step": 1203580
    },
    {
      "epoch": 1.969717798157931,
      "grad_norm": 0.7409059405326843,
      "learning_rate": 6.100498804056325e-06,
      "loss": 0.0257,
      "step": 1203600
    },
    {
      "epoch": 1.9697505285965842,
      "grad_norm": 0.1662445217370987,
      "learning_rate": 6.1004329118428085e-06,
      "loss": 0.0232,
      "step": 1203620
    },
    {
      "epoch": 1.9697832590352375,
      "grad_norm": 0.5287325382232666,
      "learning_rate": 6.10036701962929e-06,
      "loss": 0.0124,
      "step": 1203640
    },
    {
      "epoch": 1.969815989473891,
      "grad_norm": 0.6566404104232788,
      "learning_rate": 6.100301127415774e-06,
      "loss": 0.0199,
      "step": 1203660
    },
    {
      "epoch": 1.9698487199125443,
      "grad_norm": 0.8463941216468811,
      "learning_rate": 6.100235235202257e-06,
      "loss": 0.0144,
      "step": 1203680
    },
    {
      "epoch": 1.9698814503511977,
      "grad_norm": 0.20714649558067322,
      "learning_rate": 6.1001693429887395e-06,
      "loss": 0.014,
      "step": 1203700
    },
    {
      "epoch": 1.9699141807898508,
      "grad_norm": 0.375511109828949,
      "learning_rate": 6.100103450775222e-06,
      "loss": 0.0172,
      "step": 1203720
    },
    {
      "epoch": 1.9699469112285044,
      "grad_norm": 0.10109061002731323,
      "learning_rate": 6.100037558561706e-06,
      "loss": 0.0177,
      "step": 1203740
    },
    {
      "epoch": 1.9699796416671576,
      "grad_norm": 1.1840800046920776,
      "learning_rate": 6.099971666348188e-06,
      "loss": 0.0167,
      "step": 1203760
    },
    {
      "epoch": 1.970012372105811,
      "grad_norm": 0.05626258626580238,
      "learning_rate": 6.099905774134671e-06,
      "loss": 0.0237,
      "step": 1203780
    },
    {
      "epoch": 1.9700451025444643,
      "grad_norm": 0.28862348198890686,
      "learning_rate": 6.099839881921153e-06,
      "loss": 0.0214,
      "step": 1203800
    },
    {
      "epoch": 1.9700778329831177,
      "grad_norm": 0.2463182955980301,
      "learning_rate": 6.099773989707637e-06,
      "loss": 0.0144,
      "step": 1203820
    },
    {
      "epoch": 1.970110563421771,
      "grad_norm": 0.15127691626548767,
      "learning_rate": 6.0997080974941195e-06,
      "loss": 0.0196,
      "step": 1203840
    },
    {
      "epoch": 1.9701432938604242,
      "grad_norm": 0.20020060241222382,
      "learning_rate": 6.099642205280602e-06,
      "loss": 0.0195,
      "step": 1203860
    },
    {
      "epoch": 1.9701760242990778,
      "grad_norm": 0.4277474880218506,
      "learning_rate": 6.099576313067085e-06,
      "loss": 0.0174,
      "step": 1203880
    },
    {
      "epoch": 1.970208754737731,
      "grad_norm": 0.6407997012138367,
      "learning_rate": 6.0995104208535686e-06,
      "loss": 0.0208,
      "step": 1203900
    },
    {
      "epoch": 1.9702414851763843,
      "grad_norm": 0.768787682056427,
      "learning_rate": 6.099444528640051e-06,
      "loss": 0.0207,
      "step": 1203920
    },
    {
      "epoch": 1.9702742156150377,
      "grad_norm": 0.5952809453010559,
      "learning_rate": 6.099378636426534e-06,
      "loss": 0.0171,
      "step": 1203940
    },
    {
      "epoch": 1.970306946053691,
      "grad_norm": 0.8369495868682861,
      "learning_rate": 6.099312744213018e-06,
      "loss": 0.0162,
      "step": 1203960
    },
    {
      "epoch": 1.9703396764923444,
      "grad_norm": 0.9983772039413452,
      "learning_rate": 6.0992468519994995e-06,
      "loss": 0.0152,
      "step": 1203980
    },
    {
      "epoch": 1.9703724069309976,
      "grad_norm": 0.4790143072605133,
      "learning_rate": 6.099180959785983e-06,
      "loss": 0.0183,
      "step": 1204000
    },
    {
      "epoch": 1.9704051373696512,
      "grad_norm": 0.22102408111095428,
      "learning_rate": 6.099115067572465e-06,
      "loss": 0.0145,
      "step": 1204020
    },
    {
      "epoch": 1.9704378678083043,
      "grad_norm": 1.082785725593567,
      "learning_rate": 6.099049175358949e-06,
      "loss": 0.0176,
      "step": 1204040
    },
    {
      "epoch": 1.9704705982469577,
      "grad_norm": 0.306217223405838,
      "learning_rate": 6.098983283145431e-06,
      "loss": 0.0168,
      "step": 1204060
    },
    {
      "epoch": 1.970503328685611,
      "grad_norm": 0.0807596817612648,
      "learning_rate": 6.098917390931914e-06,
      "loss": 0.0192,
      "step": 1204080
    },
    {
      "epoch": 1.9705360591242644,
      "grad_norm": 0.28804007172584534,
      "learning_rate": 6.098851498718397e-06,
      "loss": 0.017,
      "step": 1204100
    },
    {
      "epoch": 1.9705687895629178,
      "grad_norm": 1.200848937034607,
      "learning_rate": 6.09878560650488e-06,
      "loss": 0.0165,
      "step": 1204120
    },
    {
      "epoch": 1.970601520001571,
      "grad_norm": 0.4700186550617218,
      "learning_rate": 6.098719714291362e-06,
      "loss": 0.0149,
      "step": 1204140
    },
    {
      "epoch": 1.9706342504402246,
      "grad_norm": 0.33970150351524353,
      "learning_rate": 6.098653822077846e-06,
      "loss": 0.0228,
      "step": 1204160
    },
    {
      "epoch": 1.9706669808788777,
      "grad_norm": 0.5186737179756165,
      "learning_rate": 6.098587929864328e-06,
      "loss": 0.0165,
      "step": 1204180
    },
    {
      "epoch": 1.970699711317531,
      "grad_norm": 0.3411327004432678,
      "learning_rate": 6.098522037650811e-06,
      "loss": 0.0151,
      "step": 1204200
    },
    {
      "epoch": 1.9707324417561844,
      "grad_norm": 0.46868741512298584,
      "learning_rate": 6.098456145437294e-06,
      "loss": 0.0134,
      "step": 1204220
    },
    {
      "epoch": 1.9707651721948376,
      "grad_norm": 0.4345417320728302,
      "learning_rate": 6.098390253223777e-06,
      "loss": 0.0177,
      "step": 1204240
    },
    {
      "epoch": 1.9707979026334912,
      "grad_norm": 0.563885509967804,
      "learning_rate": 6.0983243610102596e-06,
      "loss": 0.0161,
      "step": 1204260
    },
    {
      "epoch": 1.9708306330721443,
      "grad_norm": 0.25679317116737366,
      "learning_rate": 6.098258468796743e-06,
      "loss": 0.0175,
      "step": 1204280
    },
    {
      "epoch": 1.9708633635107977,
      "grad_norm": 0.2571674883365631,
      "learning_rate": 6.098192576583226e-06,
      "loss": 0.0171,
      "step": 1204300
    },
    {
      "epoch": 1.970896093949451,
      "grad_norm": 0.34207725524902344,
      "learning_rate": 6.098126684369709e-06,
      "loss": 0.017,
      "step": 1204320
    },
    {
      "epoch": 1.9709288243881045,
      "grad_norm": 0.06328844279050827,
      "learning_rate": 6.098060792156192e-06,
      "loss": 0.0133,
      "step": 1204340
    },
    {
      "epoch": 1.9709615548267578,
      "grad_norm": 0.30292758345603943,
      "learning_rate": 6.097994899942674e-06,
      "loss": 0.0137,
      "step": 1204360
    },
    {
      "epoch": 1.970994285265411,
      "grad_norm": 0.17643575370311737,
      "learning_rate": 6.097929007729158e-06,
      "loss": 0.0153,
      "step": 1204380
    },
    {
      "epoch": 1.9710270157040646,
      "grad_norm": 0.1555255800485611,
      "learning_rate": 6.09786311551564e-06,
      "loss": 0.0228,
      "step": 1204400
    },
    {
      "epoch": 1.9710597461427177,
      "grad_norm": 0.4411309063434601,
      "learning_rate": 6.097797223302123e-06,
      "loss": 0.0187,
      "step": 1204420
    },
    {
      "epoch": 1.971092476581371,
      "grad_norm": 0.6670335531234741,
      "learning_rate": 6.097731331088605e-06,
      "loss": 0.0207,
      "step": 1204440
    },
    {
      "epoch": 1.9711252070200245,
      "grad_norm": 0.4380806088447571,
      "learning_rate": 6.097665438875089e-06,
      "loss": 0.0153,
      "step": 1204460
    },
    {
      "epoch": 1.9711579374586778,
      "grad_norm": 0.14159579575061798,
      "learning_rate": 6.097599546661571e-06,
      "loss": 0.0174,
      "step": 1204480
    },
    {
      "epoch": 1.9711906678973312,
      "grad_norm": 0.39640820026397705,
      "learning_rate": 6.097533654448055e-06,
      "loss": 0.021,
      "step": 1204500
    },
    {
      "epoch": 1.9712233983359844,
      "grad_norm": 0.752052903175354,
      "learning_rate": 6.097467762234537e-06,
      "loss": 0.0146,
      "step": 1204520
    },
    {
      "epoch": 1.971256128774638,
      "grad_norm": 0.3829789161682129,
      "learning_rate": 6.0974018700210205e-06,
      "loss": 0.0216,
      "step": 1204540
    },
    {
      "epoch": 1.9712888592132911,
      "grad_norm": 1.5959993600845337,
      "learning_rate": 6.097335977807502e-06,
      "loss": 0.0222,
      "step": 1204560
    },
    {
      "epoch": 1.9713215896519445,
      "grad_norm": 0.2952096462249756,
      "learning_rate": 6.097270085593986e-06,
      "loss": 0.0208,
      "step": 1204580
    },
    {
      "epoch": 1.9713543200905979,
      "grad_norm": 0.344247967004776,
      "learning_rate": 6.097204193380468e-06,
      "loss": 0.0182,
      "step": 1204600
    },
    {
      "epoch": 1.9713870505292512,
      "grad_norm": 0.3301486670970917,
      "learning_rate": 6.097138301166951e-06,
      "loss": 0.0174,
      "step": 1204620
    },
    {
      "epoch": 1.9714197809679046,
      "grad_norm": 1.5512580871582031,
      "learning_rate": 6.097072408953435e-06,
      "loss": 0.0187,
      "step": 1204640
    },
    {
      "epoch": 1.9714525114065578,
      "grad_norm": 0.28275391459465027,
      "learning_rate": 6.097006516739917e-06,
      "loss": 0.0108,
      "step": 1204660
    },
    {
      "epoch": 1.9714852418452113,
      "grad_norm": 0.5275321006774902,
      "learning_rate": 6.0969406245264005e-06,
      "loss": 0.0206,
      "step": 1204680
    },
    {
      "epoch": 1.9715179722838645,
      "grad_norm": 0.8032481670379639,
      "learning_rate": 6.096874732312883e-06,
      "loss": 0.0153,
      "step": 1204700
    },
    {
      "epoch": 1.9715507027225179,
      "grad_norm": 0.37086114287376404,
      "learning_rate": 6.096808840099366e-06,
      "loss": 0.0129,
      "step": 1204720
    },
    {
      "epoch": 1.9715834331611712,
      "grad_norm": 0.6360819339752197,
      "learning_rate": 6.096742947885849e-06,
      "loss": 0.0205,
      "step": 1204740
    },
    {
      "epoch": 1.9716161635998246,
      "grad_norm": 0.08476116508245468,
      "learning_rate": 6.096677055672332e-06,
      "loss": 0.0165,
      "step": 1204760
    },
    {
      "epoch": 1.971648894038478,
      "grad_norm": 0.3338923454284668,
      "learning_rate": 6.096611163458814e-06,
      "loss": 0.0247,
      "step": 1204780
    },
    {
      "epoch": 1.9716816244771311,
      "grad_norm": 0.3472759425640106,
      "learning_rate": 6.096545271245298e-06,
      "loss": 0.0159,
      "step": 1204800
    },
    {
      "epoch": 1.9717143549157847,
      "grad_norm": 0.15337613224983215,
      "learning_rate": 6.09647937903178e-06,
      "loss": 0.0211,
      "step": 1204820
    },
    {
      "epoch": 1.9717470853544379,
      "grad_norm": 0.2533790171146393,
      "learning_rate": 6.096413486818263e-06,
      "loss": 0.0178,
      "step": 1204840
    },
    {
      "epoch": 1.9717798157930913,
      "grad_norm": 0.2667476236820221,
      "learning_rate": 6.096347594604746e-06,
      "loss": 0.0149,
      "step": 1204860
    },
    {
      "epoch": 1.9718125462317446,
      "grad_norm": 0.7241207361221313,
      "learning_rate": 6.096281702391229e-06,
      "loss": 0.018,
      "step": 1204880
    },
    {
      "epoch": 1.9718452766703978,
      "grad_norm": 0.45229172706604004,
      "learning_rate": 6.0962158101777115e-06,
      "loss": 0.016,
      "step": 1204900
    },
    {
      "epoch": 1.9718780071090514,
      "grad_norm": 0.5277020931243896,
      "learning_rate": 6.096149917964195e-06,
      "loss": 0.0149,
      "step": 1204920
    },
    {
      "epoch": 1.9719107375477045,
      "grad_norm": 0.18484936654567719,
      "learning_rate": 6.096084025750677e-06,
      "loss": 0.0118,
      "step": 1204940
    },
    {
      "epoch": 1.9719434679863581,
      "grad_norm": 0.19586895406246185,
      "learning_rate": 6.0960181335371605e-06,
      "loss": 0.0131,
      "step": 1204960
    },
    {
      "epoch": 1.9719761984250113,
      "grad_norm": 0.41049519181251526,
      "learning_rate": 6.095952241323644e-06,
      "loss": 0.0155,
      "step": 1204980
    },
    {
      "epoch": 1.9720089288636646,
      "grad_norm": 0.5036017894744873,
      "learning_rate": 6.095886349110126e-06,
      "loss": 0.0152,
      "step": 1205000
    },
    {
      "epoch": 1.972041659302318,
      "grad_norm": 0.787796676158905,
      "learning_rate": 6.09582045689661e-06,
      "loss": 0.0157,
      "step": 1205020
    },
    {
      "epoch": 1.9720743897409712,
      "grad_norm": 1.1950582265853882,
      "learning_rate": 6.0957545646830915e-06,
      "loss": 0.0221,
      "step": 1205040
    },
    {
      "epoch": 1.9721071201796248,
      "grad_norm": 0.4955724775791168,
      "learning_rate": 6.095688672469575e-06,
      "loss": 0.0175,
      "step": 1205060
    },
    {
      "epoch": 1.972139850618278,
      "grad_norm": 1.4013054370880127,
      "learning_rate": 6.095622780256058e-06,
      "loss": 0.0229,
      "step": 1205080
    },
    {
      "epoch": 1.9721725810569313,
      "grad_norm": 1.0503438711166382,
      "learning_rate": 6.0955568880425406e-06,
      "loss": 0.0141,
      "step": 1205100
    },
    {
      "epoch": 1.9722053114955846,
      "grad_norm": 0.32996708154678345,
      "learning_rate": 6.095490995829023e-06,
      "loss": 0.0163,
      "step": 1205120
    },
    {
      "epoch": 1.972238041934238,
      "grad_norm": 0.615044116973877,
      "learning_rate": 6.095425103615507e-06,
      "loss": 0.0165,
      "step": 1205140
    },
    {
      "epoch": 1.9722707723728914,
      "grad_norm": 0.13507044315338135,
      "learning_rate": 6.095359211401989e-06,
      "loss": 0.0234,
      "step": 1205160
    },
    {
      "epoch": 1.9723035028115445,
      "grad_norm": 1.1054996252059937,
      "learning_rate": 6.095293319188472e-06,
      "loss": 0.0155,
      "step": 1205180
    },
    {
      "epoch": 1.9723362332501981,
      "grad_norm": 0.9941684603691101,
      "learning_rate": 6.095227426974954e-06,
      "loss": 0.0192,
      "step": 1205200
    },
    {
      "epoch": 1.9723689636888513,
      "grad_norm": 0.4074769616127014,
      "learning_rate": 6.095161534761438e-06,
      "loss": 0.0189,
      "step": 1205220
    },
    {
      "epoch": 1.9724016941275047,
      "grad_norm": 0.219121515750885,
      "learning_rate": 6.095095642547921e-06,
      "loss": 0.0204,
      "step": 1205240
    },
    {
      "epoch": 1.972434424566158,
      "grad_norm": 0.7271326780319214,
      "learning_rate": 6.095029750334403e-06,
      "loss": 0.0226,
      "step": 1205260
    },
    {
      "epoch": 1.9724671550048114,
      "grad_norm": 0.6900116801261902,
      "learning_rate": 6.094963858120886e-06,
      "loss": 0.0211,
      "step": 1205280
    },
    {
      "epoch": 1.9724998854434648,
      "grad_norm": 0.1467154324054718,
      "learning_rate": 6.09489796590737e-06,
      "loss": 0.0215,
      "step": 1205300
    },
    {
      "epoch": 1.972532615882118,
      "grad_norm": 0.26971495151519775,
      "learning_rate": 6.0948320736938515e-06,
      "loss": 0.0272,
      "step": 1205320
    },
    {
      "epoch": 1.9725653463207715,
      "grad_norm": 0.14980386197566986,
      "learning_rate": 6.094766181480335e-06,
      "loss": 0.0198,
      "step": 1205340
    },
    {
      "epoch": 1.9725980767594247,
      "grad_norm": 1.629472255706787,
      "learning_rate": 6.094700289266819e-06,
      "loss": 0.0221,
      "step": 1205360
    },
    {
      "epoch": 1.972630807198078,
      "grad_norm": 0.5251955389976501,
      "learning_rate": 6.094634397053301e-06,
      "loss": 0.0202,
      "step": 1205380
    },
    {
      "epoch": 1.9726635376367314,
      "grad_norm": 0.3491191864013672,
      "learning_rate": 6.094568504839784e-06,
      "loss": 0.0143,
      "step": 1205400
    },
    {
      "epoch": 1.9726962680753848,
      "grad_norm": 0.3477752208709717,
      "learning_rate": 6.094502612626266e-06,
      "loss": 0.0154,
      "step": 1205420
    },
    {
      "epoch": 1.9727289985140382,
      "grad_norm": 0.38486048579216003,
      "learning_rate": 6.09443672041275e-06,
      "loss": 0.0242,
      "step": 1205440
    },
    {
      "epoch": 1.9727617289526913,
      "grad_norm": 0.1946040689945221,
      "learning_rate": 6.0943708281992316e-06,
      "loss": 0.0133,
      "step": 1205460
    },
    {
      "epoch": 1.972794459391345,
      "grad_norm": 0.7716406583786011,
      "learning_rate": 6.094304935985715e-06,
      "loss": 0.0259,
      "step": 1205480
    },
    {
      "epoch": 1.972827189829998,
      "grad_norm": 0.15859197080135345,
      "learning_rate": 6.094239043772198e-06,
      "loss": 0.0179,
      "step": 1205500
    },
    {
      "epoch": 1.9728599202686514,
      "grad_norm": 0.09220904111862183,
      "learning_rate": 6.094173151558681e-06,
      "loss": 0.0154,
      "step": 1205520
    },
    {
      "epoch": 1.9728926507073048,
      "grad_norm": 0.2051192969083786,
      "learning_rate": 6.094107259345163e-06,
      "loss": 0.0084,
      "step": 1205540
    },
    {
      "epoch": 1.9729253811459582,
      "grad_norm": 0.7755353450775146,
      "learning_rate": 6.094041367131647e-06,
      "loss": 0.0175,
      "step": 1205560
    },
    {
      "epoch": 1.9729581115846115,
      "grad_norm": 0.1804649382829666,
      "learning_rate": 6.093975474918129e-06,
      "loss": 0.0193,
      "step": 1205580
    },
    {
      "epoch": 1.9729908420232647,
      "grad_norm": 0.17186003923416138,
      "learning_rate": 6.0939095827046124e-06,
      "loss": 0.0124,
      "step": 1205600
    },
    {
      "epoch": 1.9730235724619183,
      "grad_norm": 0.8705087304115295,
      "learning_rate": 6.093843690491094e-06,
      "loss": 0.0144,
      "step": 1205620
    },
    {
      "epoch": 1.9730563029005714,
      "grad_norm": 0.5191030502319336,
      "learning_rate": 6.093777798277578e-06,
      "loss": 0.0209,
      "step": 1205640
    },
    {
      "epoch": 1.9730890333392248,
      "grad_norm": 0.22614550590515137,
      "learning_rate": 6.093711906064061e-06,
      "loss": 0.028,
      "step": 1205660
    },
    {
      "epoch": 1.9731217637778782,
      "grad_norm": 0.25547710061073303,
      "learning_rate": 6.093646013850543e-06,
      "loss": 0.0101,
      "step": 1205680
    },
    {
      "epoch": 1.9731544942165313,
      "grad_norm": 0.8315914869308472,
      "learning_rate": 6.093580121637027e-06,
      "loss": 0.021,
      "step": 1205700
    },
    {
      "epoch": 1.973187224655185,
      "grad_norm": 0.475301057100296,
      "learning_rate": 6.09351422942351e-06,
      "loss": 0.0153,
      "step": 1205720
    },
    {
      "epoch": 1.973219955093838,
      "grad_norm": 0.868192195892334,
      "learning_rate": 6.0934483372099925e-06,
      "loss": 0.0146,
      "step": 1205740
    },
    {
      "epoch": 1.9732526855324917,
      "grad_norm": 0.38514840602874756,
      "learning_rate": 6.093382444996475e-06,
      "loss": 0.0152,
      "step": 1205760
    },
    {
      "epoch": 1.9732854159711448,
      "grad_norm": 0.8511325120925903,
      "learning_rate": 6.093316552782959e-06,
      "loss": 0.0178,
      "step": 1205780
    },
    {
      "epoch": 1.9733181464097982,
      "grad_norm": 0.35583895444869995,
      "learning_rate": 6.093250660569441e-06,
      "loss": 0.0209,
      "step": 1205800
    },
    {
      "epoch": 1.9733508768484516,
      "grad_norm": 0.9196544289588928,
      "learning_rate": 6.093184768355924e-06,
      "loss": 0.0198,
      "step": 1205820
    },
    {
      "epoch": 1.9733836072871047,
      "grad_norm": 0.466421514749527,
      "learning_rate": 6.093118876142406e-06,
      "loss": 0.0142,
      "step": 1205840
    },
    {
      "epoch": 1.9734163377257583,
      "grad_norm": 1.1418205499649048,
      "learning_rate": 6.09305298392889e-06,
      "loss": 0.021,
      "step": 1205860
    },
    {
      "epoch": 1.9734490681644115,
      "grad_norm": 0.27937832474708557,
      "learning_rate": 6.0929870917153725e-06,
      "loss": 0.0187,
      "step": 1205880
    },
    {
      "epoch": 1.9734817986030648,
      "grad_norm": 1.480393409729004,
      "learning_rate": 6.092921199501855e-06,
      "loss": 0.0248,
      "step": 1205900
    },
    {
      "epoch": 1.9735145290417182,
      "grad_norm": 0.5824968218803406,
      "learning_rate": 6.092855307288338e-06,
      "loss": 0.0196,
      "step": 1205920
    },
    {
      "epoch": 1.9735472594803716,
      "grad_norm": 1.5373454093933105,
      "learning_rate": 6.0927894150748216e-06,
      "loss": 0.0202,
      "step": 1205940
    },
    {
      "epoch": 1.973579989919025,
      "grad_norm": 0.49987709522247314,
      "learning_rate": 6.0927235228613035e-06,
      "loss": 0.0193,
      "step": 1205960
    },
    {
      "epoch": 1.973612720357678,
      "grad_norm": 0.4506491422653198,
      "learning_rate": 6.092657630647787e-06,
      "loss": 0.02,
      "step": 1205980
    },
    {
      "epoch": 1.9736454507963317,
      "grad_norm": 0.12672650814056396,
      "learning_rate": 6.092591738434269e-06,
      "loss": 0.0268,
      "step": 1206000
    },
    {
      "epoch": 1.9736781812349848,
      "grad_norm": 0.19459551572799683,
      "learning_rate": 6.0925258462207525e-06,
      "loss": 0.0148,
      "step": 1206020
    },
    {
      "epoch": 1.9737109116736382,
      "grad_norm": 0.3830467462539673,
      "learning_rate": 6.092459954007236e-06,
      "loss": 0.0134,
      "step": 1206040
    },
    {
      "epoch": 1.9737436421122916,
      "grad_norm": 0.1356426626443863,
      "learning_rate": 6.092394061793718e-06,
      "loss": 0.0165,
      "step": 1206060
    },
    {
      "epoch": 1.973776372550945,
      "grad_norm": 1.2396306991577148,
      "learning_rate": 6.092328169580202e-06,
      "loss": 0.0249,
      "step": 1206080
    },
    {
      "epoch": 1.9738091029895983,
      "grad_norm": 0.6748862862586975,
      "learning_rate": 6.092262277366684e-06,
      "loss": 0.0148,
      "step": 1206100
    },
    {
      "epoch": 1.9738418334282515,
      "grad_norm": 1.6760727167129517,
      "learning_rate": 6.092196385153167e-06,
      "loss": 0.0198,
      "step": 1206120
    },
    {
      "epoch": 1.973874563866905,
      "grad_norm": 0.3894394338130951,
      "learning_rate": 6.09213049293965e-06,
      "loss": 0.0154,
      "step": 1206140
    },
    {
      "epoch": 1.9739072943055582,
      "grad_norm": 0.42802444100379944,
      "learning_rate": 6.092064600726133e-06,
      "loss": 0.0164,
      "step": 1206160
    },
    {
      "epoch": 1.9739400247442116,
      "grad_norm": 0.20690889656543732,
      "learning_rate": 6.091998708512615e-06,
      "loss": 0.0207,
      "step": 1206180
    },
    {
      "epoch": 1.973972755182865,
      "grad_norm": 0.24965427815914154,
      "learning_rate": 6.091932816299099e-06,
      "loss": 0.0193,
      "step": 1206200
    },
    {
      "epoch": 1.9740054856215183,
      "grad_norm": 1.4286558628082275,
      "learning_rate": 6.091866924085581e-06,
      "loss": 0.0211,
      "step": 1206220
    },
    {
      "epoch": 1.9740382160601717,
      "grad_norm": 1.8973281383514404,
      "learning_rate": 6.091801031872064e-06,
      "loss": 0.0229,
      "step": 1206240
    },
    {
      "epoch": 1.9740709464988249,
      "grad_norm": 1.2402222156524658,
      "learning_rate": 6.091735139658547e-06,
      "loss": 0.0141,
      "step": 1206260
    },
    {
      "epoch": 1.9741036769374785,
      "grad_norm": 0.2339826226234436,
      "learning_rate": 6.09166924744503e-06,
      "loss": 0.0213,
      "step": 1206280
    },
    {
      "epoch": 1.9741364073761316,
      "grad_norm": 0.54219651222229,
      "learning_rate": 6.0916033552315126e-06,
      "loss": 0.0162,
      "step": 1206300
    },
    {
      "epoch": 1.974169137814785,
      "grad_norm": 0.3509218096733093,
      "learning_rate": 6.091537463017996e-06,
      "loss": 0.0147,
      "step": 1206320
    },
    {
      "epoch": 1.9742018682534384,
      "grad_norm": 1.1180888414382935,
      "learning_rate": 6.091471570804478e-06,
      "loss": 0.0213,
      "step": 1206340
    },
    {
      "epoch": 1.9742345986920917,
      "grad_norm": 0.4167158603668213,
      "learning_rate": 6.091405678590962e-06,
      "loss": 0.0199,
      "step": 1206360
    },
    {
      "epoch": 1.974267329130745,
      "grad_norm": 0.2769125699996948,
      "learning_rate": 6.091339786377445e-06,
      "loss": 0.0136,
      "step": 1206380
    },
    {
      "epoch": 1.9743000595693982,
      "grad_norm": 0.14561626315116882,
      "learning_rate": 6.091273894163927e-06,
      "loss": 0.0155,
      "step": 1206400
    },
    {
      "epoch": 1.9743327900080518,
      "grad_norm": 0.8396089673042297,
      "learning_rate": 6.091208001950411e-06,
      "loss": 0.0147,
      "step": 1206420
    },
    {
      "epoch": 1.974365520446705,
      "grad_norm": 0.7220427989959717,
      "learning_rate": 6.091142109736893e-06,
      "loss": 0.0193,
      "step": 1206440
    },
    {
      "epoch": 1.9743982508853584,
      "grad_norm": 0.8920984268188477,
      "learning_rate": 6.091076217523376e-06,
      "loss": 0.0171,
      "step": 1206460
    },
    {
      "epoch": 1.9744309813240117,
      "grad_norm": 0.14697816967964172,
      "learning_rate": 6.091010325309858e-06,
      "loss": 0.0146,
      "step": 1206480
    },
    {
      "epoch": 1.9744637117626649,
      "grad_norm": 2.532904863357544,
      "learning_rate": 6.090944433096342e-06,
      "loss": 0.013,
      "step": 1206500
    },
    {
      "epoch": 1.9744964422013185,
      "grad_norm": 0.5989012122154236,
      "learning_rate": 6.090878540882824e-06,
      "loss": 0.0125,
      "step": 1206520
    },
    {
      "epoch": 1.9745291726399716,
      "grad_norm": 0.08268211036920547,
      "learning_rate": 6.090812648669307e-06,
      "loss": 0.0199,
      "step": 1206540
    },
    {
      "epoch": 1.9745619030786252,
      "grad_norm": 0.6424031853675842,
      "learning_rate": 6.09074675645579e-06,
      "loss": 0.0177,
      "step": 1206560
    },
    {
      "epoch": 1.9745946335172784,
      "grad_norm": 0.24791105091571808,
      "learning_rate": 6.0906808642422735e-06,
      "loss": 0.0183,
      "step": 1206580
    },
    {
      "epoch": 1.9746273639559317,
      "grad_norm": 0.4907476007938385,
      "learning_rate": 6.090614972028755e-06,
      "loss": 0.0186,
      "step": 1206600
    },
    {
      "epoch": 1.9746600943945851,
      "grad_norm": 0.23690010607242584,
      "learning_rate": 6.090549079815239e-06,
      "loss": 0.0188,
      "step": 1206620
    },
    {
      "epoch": 1.9746928248332383,
      "grad_norm": 0.22238846123218536,
      "learning_rate": 6.090483187601721e-06,
      "loss": 0.0071,
      "step": 1206640
    },
    {
      "epoch": 1.9747255552718919,
      "grad_norm": 0.26432740688323975,
      "learning_rate": 6.0904172953882044e-06,
      "loss": 0.0132,
      "step": 1206660
    },
    {
      "epoch": 1.974758285710545,
      "grad_norm": 0.09246516972780228,
      "learning_rate": 6.090351403174687e-06,
      "loss": 0.0143,
      "step": 1206680
    },
    {
      "epoch": 1.9747910161491984,
      "grad_norm": 1.1272743940353394,
      "learning_rate": 6.09028551096117e-06,
      "loss": 0.021,
      "step": 1206700
    },
    {
      "epoch": 1.9748237465878518,
      "grad_norm": 0.31171560287475586,
      "learning_rate": 6.090219618747653e-06,
      "loss": 0.021,
      "step": 1206720
    },
    {
      "epoch": 1.9748564770265051,
      "grad_norm": 0.4865439236164093,
      "learning_rate": 6.090153726534136e-06,
      "loss": 0.0121,
      "step": 1206740
    },
    {
      "epoch": 1.9748892074651585,
      "grad_norm": 0.4016694128513336,
      "learning_rate": 6.090087834320619e-06,
      "loss": 0.0115,
      "step": 1206760
    },
    {
      "epoch": 1.9749219379038117,
      "grad_norm": 0.7796391248703003,
      "learning_rate": 6.090021942107102e-06,
      "loss": 0.0201,
      "step": 1206780
    },
    {
      "epoch": 1.9749546683424652,
      "grad_norm": 0.1530037224292755,
      "learning_rate": 6.089956049893585e-06,
      "loss": 0.0217,
      "step": 1206800
    },
    {
      "epoch": 1.9749873987811184,
      "grad_norm": 0.4141261875629425,
      "learning_rate": 6.089890157680067e-06,
      "loss": 0.0209,
      "step": 1206820
    },
    {
      "epoch": 1.9750201292197718,
      "grad_norm": 0.2961360216140747,
      "learning_rate": 6.089824265466551e-06,
      "loss": 0.0117,
      "step": 1206840
    },
    {
      "epoch": 1.9750528596584251,
      "grad_norm": 0.8980838656425476,
      "learning_rate": 6.089758373253033e-06,
      "loss": 0.0215,
      "step": 1206860
    },
    {
      "epoch": 1.9750855900970785,
      "grad_norm": 0.4671536684036255,
      "learning_rate": 6.089692481039516e-06,
      "loss": 0.021,
      "step": 1206880
    },
    {
      "epoch": 1.9751183205357319,
      "grad_norm": 0.5274918675422668,
      "learning_rate": 6.089626588825999e-06,
      "loss": 0.015,
      "step": 1206900
    },
    {
      "epoch": 1.975151050974385,
      "grad_norm": 0.45978376269340515,
      "learning_rate": 6.089560696612482e-06,
      "loss": 0.0159,
      "step": 1206920
    },
    {
      "epoch": 1.9751837814130386,
      "grad_norm": 0.3786662220954895,
      "learning_rate": 6.0894948043989645e-06,
      "loss": 0.0162,
      "step": 1206940
    },
    {
      "epoch": 1.9752165118516918,
      "grad_norm": 0.45419424772262573,
      "learning_rate": 6.089428912185448e-06,
      "loss": 0.0166,
      "step": 1206960
    },
    {
      "epoch": 1.9752492422903452,
      "grad_norm": 0.4487418830394745,
      "learning_rate": 6.08936301997193e-06,
      "loss": 0.0201,
      "step": 1206980
    },
    {
      "epoch": 1.9752819727289985,
      "grad_norm": 0.3821128308773041,
      "learning_rate": 6.0892971277584135e-06,
      "loss": 0.0211,
      "step": 1207000
    },
    {
      "epoch": 1.975314703167652,
      "grad_norm": 0.06458302587270737,
      "learning_rate": 6.0892312355448954e-06,
      "loss": 0.0174,
      "step": 1207020
    },
    {
      "epoch": 1.9753474336063053,
      "grad_norm": 2.18496036529541,
      "learning_rate": 6.089165343331379e-06,
      "loss": 0.0259,
      "step": 1207040
    },
    {
      "epoch": 1.9753801640449584,
      "grad_norm": 0.2503114342689514,
      "learning_rate": 6.089099451117862e-06,
      "loss": 0.0195,
      "step": 1207060
    },
    {
      "epoch": 1.975412894483612,
      "grad_norm": 0.9814637899398804,
      "learning_rate": 6.0890335589043445e-06,
      "loss": 0.0206,
      "step": 1207080
    },
    {
      "epoch": 1.9754456249222652,
      "grad_norm": 0.1922677606344223,
      "learning_rate": 6.088967666690828e-06,
      "loss": 0.0187,
      "step": 1207100
    },
    {
      "epoch": 1.9754783553609185,
      "grad_norm": 0.3749106526374817,
      "learning_rate": 6.088901774477311e-06,
      "loss": 0.0245,
      "step": 1207120
    },
    {
      "epoch": 1.975511085799572,
      "grad_norm": 0.35319289565086365,
      "learning_rate": 6.0888358822637936e-06,
      "loss": 0.016,
      "step": 1207140
    },
    {
      "epoch": 1.975543816238225,
      "grad_norm": 1.2226572036743164,
      "learning_rate": 6.088769990050276e-06,
      "loss": 0.0258,
      "step": 1207160
    },
    {
      "epoch": 1.9755765466768787,
      "grad_norm": 0.12859654426574707,
      "learning_rate": 6.08870409783676e-06,
      "loss": 0.0214,
      "step": 1207180
    },
    {
      "epoch": 1.9756092771155318,
      "grad_norm": 0.5419642329216003,
      "learning_rate": 6.088638205623242e-06,
      "loss": 0.0175,
      "step": 1207200
    },
    {
      "epoch": 1.9756420075541854,
      "grad_norm": 0.32203274965286255,
      "learning_rate": 6.088572313409725e-06,
      "loss": 0.0176,
      "step": 1207220
    },
    {
      "epoch": 1.9756747379928385,
      "grad_norm": 1.3157708644866943,
      "learning_rate": 6.088506421196207e-06,
      "loss": 0.0237,
      "step": 1207240
    },
    {
      "epoch": 1.975707468431492,
      "grad_norm": 0.8324166536331177,
      "learning_rate": 6.088440528982691e-06,
      "loss": 0.0235,
      "step": 1207260
    },
    {
      "epoch": 1.9757401988701453,
      "grad_norm": 0.811507523059845,
      "learning_rate": 6.088374636769174e-06,
      "loss": 0.0144,
      "step": 1207280
    },
    {
      "epoch": 1.9757729293087984,
      "grad_norm": 1.1743909120559692,
      "learning_rate": 6.088308744555656e-06,
      "loss": 0.0205,
      "step": 1207300
    },
    {
      "epoch": 1.975805659747452,
      "grad_norm": 0.143295019865036,
      "learning_rate": 6.088242852342139e-06,
      "loss": 0.0237,
      "step": 1207320
    },
    {
      "epoch": 1.9758383901861052,
      "grad_norm": 0.8162484169006348,
      "learning_rate": 6.088176960128623e-06,
      "loss": 0.0117,
      "step": 1207340
    },
    {
      "epoch": 1.9758711206247586,
      "grad_norm": 0.5196589231491089,
      "learning_rate": 6.0881110679151046e-06,
      "loss": 0.0208,
      "step": 1207360
    },
    {
      "epoch": 1.975903851063412,
      "grad_norm": 0.2686326205730438,
      "learning_rate": 6.088045175701588e-06,
      "loss": 0.027,
      "step": 1207380
    },
    {
      "epoch": 1.9759365815020653,
      "grad_norm": 0.3220987617969513,
      "learning_rate": 6.08797928348807e-06,
      "loss": 0.016,
      "step": 1207400
    },
    {
      "epoch": 1.9759693119407187,
      "grad_norm": 0.1605958789587021,
      "learning_rate": 6.087913391274554e-06,
      "loss": 0.0132,
      "step": 1207420
    },
    {
      "epoch": 1.9760020423793718,
      "grad_norm": 0.5073971748352051,
      "learning_rate": 6.087847499061037e-06,
      "loss": 0.0182,
      "step": 1207440
    },
    {
      "epoch": 1.9760347728180254,
      "grad_norm": 0.5840228796005249,
      "learning_rate": 6.087781606847519e-06,
      "loss": 0.0114,
      "step": 1207460
    },
    {
      "epoch": 1.9760675032566786,
      "grad_norm": 1.1003589630126953,
      "learning_rate": 6.087715714634003e-06,
      "loss": 0.0195,
      "step": 1207480
    },
    {
      "epoch": 1.976100233695332,
      "grad_norm": 0.9311810731887817,
      "learning_rate": 6.0876498224204846e-06,
      "loss": 0.0173,
      "step": 1207500
    },
    {
      "epoch": 1.9761329641339853,
      "grad_norm": 0.50699383020401,
      "learning_rate": 6.087583930206968e-06,
      "loss": 0.0167,
      "step": 1207520
    },
    {
      "epoch": 1.9761656945726387,
      "grad_norm": 1.790584683418274,
      "learning_rate": 6.087518037993451e-06,
      "loss": 0.0213,
      "step": 1207540
    },
    {
      "epoch": 1.976198425011292,
      "grad_norm": 0.5742546916007996,
      "learning_rate": 6.087452145779934e-06,
      "loss": 0.0169,
      "step": 1207560
    },
    {
      "epoch": 1.9762311554499452,
      "grad_norm": 0.1673971712589264,
      "learning_rate": 6.087386253566416e-06,
      "loss": 0.018,
      "step": 1207580
    },
    {
      "epoch": 1.9762638858885988,
      "grad_norm": 0.7575511336326599,
      "learning_rate": 6.0873203613529e-06,
      "loss": 0.0164,
      "step": 1207600
    },
    {
      "epoch": 1.976296616327252,
      "grad_norm": 0.3507532477378845,
      "learning_rate": 6.087254469139382e-06,
      "loss": 0.021,
      "step": 1207620
    },
    {
      "epoch": 1.9763293467659053,
      "grad_norm": 0.4811858534812927,
      "learning_rate": 6.0871885769258654e-06,
      "loss": 0.0117,
      "step": 1207640
    },
    {
      "epoch": 1.9763620772045587,
      "grad_norm": 0.10433550924062729,
      "learning_rate": 6.087122684712347e-06,
      "loss": 0.0227,
      "step": 1207660
    },
    {
      "epoch": 1.976394807643212,
      "grad_norm": 0.15226326882839203,
      "learning_rate": 6.087056792498831e-06,
      "loss": 0.0143,
      "step": 1207680
    },
    {
      "epoch": 1.9764275380818654,
      "grad_norm": 1.3056511878967285,
      "learning_rate": 6.086990900285314e-06,
      "loss": 0.0174,
      "step": 1207700
    },
    {
      "epoch": 1.9764602685205186,
      "grad_norm": 0.26441603899002075,
      "learning_rate": 6.086925008071796e-06,
      "loss": 0.0215,
      "step": 1207720
    },
    {
      "epoch": 1.9764929989591722,
      "grad_norm": 0.353537380695343,
      "learning_rate": 6.086859115858279e-06,
      "loss": 0.0197,
      "step": 1207740
    },
    {
      "epoch": 1.9765257293978253,
      "grad_norm": 0.25535285472869873,
      "learning_rate": 6.086793223644763e-06,
      "loss": 0.0254,
      "step": 1207760
    },
    {
      "epoch": 1.9765584598364787,
      "grad_norm": 0.5437219738960266,
      "learning_rate": 6.086727331431245e-06,
      "loss": 0.0184,
      "step": 1207780
    },
    {
      "epoch": 1.976591190275132,
      "grad_norm": 0.2107408344745636,
      "learning_rate": 6.086661439217728e-06,
      "loss": 0.0135,
      "step": 1207800
    },
    {
      "epoch": 1.9766239207137855,
      "grad_norm": 1.6626551151275635,
      "learning_rate": 6.086595547004212e-06,
      "loss": 0.0212,
      "step": 1207820
    },
    {
      "epoch": 1.9766566511524388,
      "grad_norm": 0.7298615574836731,
      "learning_rate": 6.086529654790694e-06,
      "loss": 0.0215,
      "step": 1207840
    },
    {
      "epoch": 1.976689381591092,
      "grad_norm": 0.4680449664592743,
      "learning_rate": 6.086463762577177e-06,
      "loss": 0.0165,
      "step": 1207860
    },
    {
      "epoch": 1.9767221120297456,
      "grad_norm": 1.5707440376281738,
      "learning_rate": 6.086397870363659e-06,
      "loss": 0.0183,
      "step": 1207880
    },
    {
      "epoch": 1.9767548424683987,
      "grad_norm": 1.8831504583358765,
      "learning_rate": 6.086331978150143e-06,
      "loss": 0.0193,
      "step": 1207900
    },
    {
      "epoch": 1.976787572907052,
      "grad_norm": 0.5872549414634705,
      "learning_rate": 6.0862660859366255e-06,
      "loss": 0.018,
      "step": 1207920
    },
    {
      "epoch": 1.9768203033457055,
      "grad_norm": 0.46310317516326904,
      "learning_rate": 6.086200193723108e-06,
      "loss": 0.0183,
      "step": 1207940
    },
    {
      "epoch": 1.9768530337843586,
      "grad_norm": 0.459350049495697,
      "learning_rate": 6.086134301509591e-06,
      "loss": 0.0185,
      "step": 1207960
    },
    {
      "epoch": 1.9768857642230122,
      "grad_norm": 0.9472044706344604,
      "learning_rate": 6.0860684092960746e-06,
      "loss": 0.016,
      "step": 1207980
    },
    {
      "epoch": 1.9769184946616654,
      "grad_norm": 0.6789266467094421,
      "learning_rate": 6.0860025170825565e-06,
      "loss": 0.0198,
      "step": 1208000
    },
    {
      "epoch": 1.976951225100319,
      "grad_norm": 0.12525632977485657,
      "learning_rate": 6.08593662486904e-06,
      "loss": 0.0187,
      "step": 1208020
    },
    {
      "epoch": 1.976983955538972,
      "grad_norm": 0.36065006256103516,
      "learning_rate": 6.085870732655522e-06,
      "loss": 0.0121,
      "step": 1208040
    },
    {
      "epoch": 1.9770166859776255,
      "grad_norm": 0.47367528080940247,
      "learning_rate": 6.0858048404420055e-06,
      "loss": 0.0207,
      "step": 1208060
    },
    {
      "epoch": 1.9770494164162788,
      "grad_norm": 0.30572664737701416,
      "learning_rate": 6.085738948228488e-06,
      "loss": 0.0139,
      "step": 1208080
    },
    {
      "epoch": 1.977082146854932,
      "grad_norm": 0.7373164296150208,
      "learning_rate": 6.085673056014971e-06,
      "loss": 0.0192,
      "step": 1208100
    },
    {
      "epoch": 1.9771148772935856,
      "grad_norm": 0.44864422082901,
      "learning_rate": 6.085607163801454e-06,
      "loss": 0.016,
      "step": 1208120
    },
    {
      "epoch": 1.9771476077322387,
      "grad_norm": 0.34044599533081055,
      "learning_rate": 6.085541271587937e-06,
      "loss": 0.0185,
      "step": 1208140
    },
    {
      "epoch": 1.9771803381708921,
      "grad_norm": 0.46345508098602295,
      "learning_rate": 6.08547537937442e-06,
      "loss": 0.0208,
      "step": 1208160
    },
    {
      "epoch": 1.9772130686095455,
      "grad_norm": 0.14453381299972534,
      "learning_rate": 6.085409487160903e-06,
      "loss": 0.0198,
      "step": 1208180
    },
    {
      "epoch": 1.9772457990481989,
      "grad_norm": 0.45915672183036804,
      "learning_rate": 6.085343594947386e-06,
      "loss": 0.0157,
      "step": 1208200
    },
    {
      "epoch": 1.9772785294868522,
      "grad_norm": 0.6875948309898376,
      "learning_rate": 6.085277702733868e-06,
      "loss": 0.0169,
      "step": 1208220
    },
    {
      "epoch": 1.9773112599255054,
      "grad_norm": 0.6675159931182861,
      "learning_rate": 6.085211810520352e-06,
      "loss": 0.029,
      "step": 1208240
    },
    {
      "epoch": 1.977343990364159,
      "grad_norm": 0.48780107498168945,
      "learning_rate": 6.085145918306834e-06,
      "loss": 0.0162,
      "step": 1208260
    },
    {
      "epoch": 1.9773767208028121,
      "grad_norm": 0.484324187040329,
      "learning_rate": 6.085080026093317e-06,
      "loss": 0.0198,
      "step": 1208280
    },
    {
      "epoch": 1.9774094512414655,
      "grad_norm": 0.1867380291223526,
      "learning_rate": 6.085014133879799e-06,
      "loss": 0.0176,
      "step": 1208300
    },
    {
      "epoch": 1.9774421816801189,
      "grad_norm": 0.5356417298316956,
      "learning_rate": 6.084948241666283e-06,
      "loss": 0.0172,
      "step": 1208320
    },
    {
      "epoch": 1.9774749121187722,
      "grad_norm": 0.6536238193511963,
      "learning_rate": 6.0848823494527656e-06,
      "loss": 0.0197,
      "step": 1208340
    },
    {
      "epoch": 1.9775076425574256,
      "grad_norm": 0.28958505392074585,
      "learning_rate": 6.084816457239249e-06,
      "loss": 0.0198,
      "step": 1208360
    },
    {
      "epoch": 1.9775403729960788,
      "grad_norm": 0.4111438989639282,
      "learning_rate": 6.084750565025731e-06,
      "loss": 0.018,
      "step": 1208380
    },
    {
      "epoch": 1.9775731034347324,
      "grad_norm": 0.27896228432655334,
      "learning_rate": 6.084684672812215e-06,
      "loss": 0.0174,
      "step": 1208400
    },
    {
      "epoch": 1.9776058338733855,
      "grad_norm": 0.41856649518013,
      "learning_rate": 6.0846187805986965e-06,
      "loss": 0.0193,
      "step": 1208420
    },
    {
      "epoch": 1.9776385643120389,
      "grad_norm": 0.1899849772453308,
      "learning_rate": 6.08455288838518e-06,
      "loss": 0.0121,
      "step": 1208440
    },
    {
      "epoch": 1.9776712947506923,
      "grad_norm": 1.3915631771087646,
      "learning_rate": 6.084486996171662e-06,
      "loss": 0.0194,
      "step": 1208460
    },
    {
      "epoch": 1.9777040251893456,
      "grad_norm": 0.20951910316944122,
      "learning_rate": 6.084421103958146e-06,
      "loss": 0.0165,
      "step": 1208480
    },
    {
      "epoch": 1.977736755627999,
      "grad_norm": 0.4490838348865509,
      "learning_rate": 6.084355211744629e-06,
      "loss": 0.0116,
      "step": 1208500
    },
    {
      "epoch": 1.9777694860666521,
      "grad_norm": 0.27297550439834595,
      "learning_rate": 6.084289319531111e-06,
      "loss": 0.011,
      "step": 1208520
    },
    {
      "epoch": 1.9778022165053057,
      "grad_norm": 0.38183102011680603,
      "learning_rate": 6.084223427317595e-06,
      "loss": 0.0119,
      "step": 1208540
    },
    {
      "epoch": 1.977834946943959,
      "grad_norm": 0.7260420918464661,
      "learning_rate": 6.084157535104077e-06,
      "loss": 0.0177,
      "step": 1208560
    },
    {
      "epoch": 1.9778676773826123,
      "grad_norm": 0.7139224410057068,
      "learning_rate": 6.08409164289056e-06,
      "loss": 0.0217,
      "step": 1208580
    },
    {
      "epoch": 1.9779004078212656,
      "grad_norm": 0.2480204850435257,
      "learning_rate": 6.084025750677043e-06,
      "loss": 0.0176,
      "step": 1208600
    },
    {
      "epoch": 1.977933138259919,
      "grad_norm": 0.9603475332260132,
      "learning_rate": 6.0839598584635265e-06,
      "loss": 0.0223,
      "step": 1208620
    },
    {
      "epoch": 1.9779658686985724,
      "grad_norm": 0.316386878490448,
      "learning_rate": 6.083893966250008e-06,
      "loss": 0.0174,
      "step": 1208640
    },
    {
      "epoch": 1.9779985991372255,
      "grad_norm": 0.25165417790412903,
      "learning_rate": 6.083828074036492e-06,
      "loss": 0.0146,
      "step": 1208660
    },
    {
      "epoch": 1.9780313295758791,
      "grad_norm": 0.2511078417301178,
      "learning_rate": 6.083762181822974e-06,
      "loss": 0.0243,
      "step": 1208680
    },
    {
      "epoch": 1.9780640600145323,
      "grad_norm": 0.6129595637321472,
      "learning_rate": 6.0836962896094574e-06,
      "loss": 0.0241,
      "step": 1208700
    },
    {
      "epoch": 1.9780967904531856,
      "grad_norm": 0.4289632737636566,
      "learning_rate": 6.08363039739594e-06,
      "loss": 0.0208,
      "step": 1208720
    },
    {
      "epoch": 1.978129520891839,
      "grad_norm": 0.4147227108478546,
      "learning_rate": 6.083564505182423e-06,
      "loss": 0.0203,
      "step": 1208740
    },
    {
      "epoch": 1.9781622513304922,
      "grad_norm": 1.7876595258712769,
      "learning_rate": 6.083498612968906e-06,
      "loss": 0.0164,
      "step": 1208760
    },
    {
      "epoch": 1.9781949817691458,
      "grad_norm": 0.7125131487846375,
      "learning_rate": 6.083432720755389e-06,
      "loss": 0.0289,
      "step": 1208780
    },
    {
      "epoch": 1.978227712207799,
      "grad_norm": 0.24765174090862274,
      "learning_rate": 6.083366828541871e-06,
      "loss": 0.0175,
      "step": 1208800
    },
    {
      "epoch": 1.9782604426464525,
      "grad_norm": 0.48279860615730286,
      "learning_rate": 6.083300936328355e-06,
      "loss": 0.0243,
      "step": 1208820
    },
    {
      "epoch": 1.9782931730851057,
      "grad_norm": 1.906781554222107,
      "learning_rate": 6.083235044114837e-06,
      "loss": 0.0182,
      "step": 1208840
    },
    {
      "epoch": 1.978325903523759,
      "grad_norm": 0.5122391581535339,
      "learning_rate": 6.08316915190132e-06,
      "loss": 0.0167,
      "step": 1208860
    },
    {
      "epoch": 1.9783586339624124,
      "grad_norm": 0.2256811261177063,
      "learning_rate": 6.083103259687804e-06,
      "loss": 0.0179,
      "step": 1208880
    },
    {
      "epoch": 1.9783913644010656,
      "grad_norm": 0.07418715953826904,
      "learning_rate": 6.083037367474286e-06,
      "loss": 0.0087,
      "step": 1208900
    },
    {
      "epoch": 1.9784240948397191,
      "grad_norm": 0.2132929116487503,
      "learning_rate": 6.082971475260769e-06,
      "loss": 0.0165,
      "step": 1208920
    },
    {
      "epoch": 1.9784568252783723,
      "grad_norm": 0.3284364938735962,
      "learning_rate": 6.082905583047252e-06,
      "loss": 0.0184,
      "step": 1208940
    },
    {
      "epoch": 1.9784895557170257,
      "grad_norm": 0.421237975358963,
      "learning_rate": 6.082839690833735e-06,
      "loss": 0.0219,
      "step": 1208960
    },
    {
      "epoch": 1.978522286155679,
      "grad_norm": 0.5701918601989746,
      "learning_rate": 6.0827737986202175e-06,
      "loss": 0.0163,
      "step": 1208980
    },
    {
      "epoch": 1.9785550165943324,
      "grad_norm": 0.5555758476257324,
      "learning_rate": 6.082707906406701e-06,
      "loss": 0.0172,
      "step": 1209000
    },
    {
      "epoch": 1.9785877470329858,
      "grad_norm": 1.2329472303390503,
      "learning_rate": 6.082642014193183e-06,
      "loss": 0.0137,
      "step": 1209020
    },
    {
      "epoch": 1.978620477471639,
      "grad_norm": 0.38941413164138794,
      "learning_rate": 6.0825761219796665e-06,
      "loss": 0.0132,
      "step": 1209040
    },
    {
      "epoch": 1.9786532079102925,
      "grad_norm": 2.50170636177063,
      "learning_rate": 6.0825102297661484e-06,
      "loss": 0.0172,
      "step": 1209060
    },
    {
      "epoch": 1.9786859383489457,
      "grad_norm": 0.19540278613567352,
      "learning_rate": 6.082444337552632e-06,
      "loss": 0.0177,
      "step": 1209080
    },
    {
      "epoch": 1.978718668787599,
      "grad_norm": 0.37715253233909607,
      "learning_rate": 6.082378445339115e-06,
      "loss": 0.0191,
      "step": 1209100
    },
    {
      "epoch": 1.9787513992262524,
      "grad_norm": 0.6770355701446533,
      "learning_rate": 6.0823125531255975e-06,
      "loss": 0.0136,
      "step": 1209120
    },
    {
      "epoch": 1.9787841296649058,
      "grad_norm": 0.2519169747829437,
      "learning_rate": 6.08224666091208e-06,
      "loss": 0.0245,
      "step": 1209140
    },
    {
      "epoch": 1.9788168601035592,
      "grad_norm": 0.6614425778388977,
      "learning_rate": 6.082180768698564e-06,
      "loss": 0.0159,
      "step": 1209160
    },
    {
      "epoch": 1.9788495905422123,
      "grad_norm": 0.9012529253959656,
      "learning_rate": 6.082114876485046e-06,
      "loss": 0.0169,
      "step": 1209180
    },
    {
      "epoch": 1.978882320980866,
      "grad_norm": 1.1257119178771973,
      "learning_rate": 6.082048984271529e-06,
      "loss": 0.0153,
      "step": 1209200
    },
    {
      "epoch": 1.978915051419519,
      "grad_norm": 0.6183540225028992,
      "learning_rate": 6.081983092058013e-06,
      "loss": 0.0215,
      "step": 1209220
    },
    {
      "epoch": 1.9789477818581724,
      "grad_norm": 0.21148741245269775,
      "learning_rate": 6.081917199844495e-06,
      "loss": 0.0156,
      "step": 1209240
    },
    {
      "epoch": 1.9789805122968258,
      "grad_norm": 0.47455140948295593,
      "learning_rate": 6.081851307630978e-06,
      "loss": 0.019,
      "step": 1209260
    },
    {
      "epoch": 1.9790132427354792,
      "grad_norm": 0.16117209196090698,
      "learning_rate": 6.08178541541746e-06,
      "loss": 0.0199,
      "step": 1209280
    },
    {
      "epoch": 1.9790459731741326,
      "grad_norm": 0.4654582440853119,
      "learning_rate": 6.081719523203944e-06,
      "loss": 0.0247,
      "step": 1209300
    },
    {
      "epoch": 1.9790787036127857,
      "grad_norm": 0.2528958320617676,
      "learning_rate": 6.081653630990426e-06,
      "loss": 0.0165,
      "step": 1209320
    },
    {
      "epoch": 1.9791114340514393,
      "grad_norm": 0.5427772402763367,
      "learning_rate": 6.081587738776909e-06,
      "loss": 0.0229,
      "step": 1209340
    },
    {
      "epoch": 1.9791441644900925,
      "grad_norm": 0.08751970529556274,
      "learning_rate": 6.081521846563392e-06,
      "loss": 0.0205,
      "step": 1209360
    },
    {
      "epoch": 1.9791768949287458,
      "grad_norm": 0.4347095191478729,
      "learning_rate": 6.081455954349875e-06,
      "loss": 0.0193,
      "step": 1209380
    },
    {
      "epoch": 1.9792096253673992,
      "grad_norm": 0.7204911112785339,
      "learning_rate": 6.0813900621363576e-06,
      "loss": 0.014,
      "step": 1209400
    },
    {
      "epoch": 1.9792423558060526,
      "grad_norm": 0.34571972489356995,
      "learning_rate": 6.081324169922841e-06,
      "loss": 0.0205,
      "step": 1209420
    },
    {
      "epoch": 1.979275086244706,
      "grad_norm": 1.8128782510757446,
      "learning_rate": 6.081258277709323e-06,
      "loss": 0.0258,
      "step": 1209440
    },
    {
      "epoch": 1.979307816683359,
      "grad_norm": 0.1411302387714386,
      "learning_rate": 6.081192385495807e-06,
      "loss": 0.0125,
      "step": 1209460
    },
    {
      "epoch": 1.9793405471220127,
      "grad_norm": 0.2356332242488861,
      "learning_rate": 6.0811264932822885e-06,
      "loss": 0.0196,
      "step": 1209480
    },
    {
      "epoch": 1.9793732775606658,
      "grad_norm": 0.5721228122711182,
      "learning_rate": 6.081060601068772e-06,
      "loss": 0.0142,
      "step": 1209500
    },
    {
      "epoch": 1.9794060079993192,
      "grad_norm": 0.38397905230522156,
      "learning_rate": 6.080994708855255e-06,
      "loss": 0.0209,
      "step": 1209520
    },
    {
      "epoch": 1.9794387384379726,
      "grad_norm": 0.8403357863426208,
      "learning_rate": 6.080928816641738e-06,
      "loss": 0.0253,
      "step": 1209540
    },
    {
      "epoch": 1.9794714688766257,
      "grad_norm": 0.4595414400100708,
      "learning_rate": 6.080862924428221e-06,
      "loss": 0.0199,
      "step": 1209560
    },
    {
      "epoch": 1.9795041993152793,
      "grad_norm": 0.6717397570610046,
      "learning_rate": 6.080797032214704e-06,
      "loss": 0.0188,
      "step": 1209580
    },
    {
      "epoch": 1.9795369297539325,
      "grad_norm": 0.6075974702835083,
      "learning_rate": 6.080731140001187e-06,
      "loss": 0.027,
      "step": 1209600
    },
    {
      "epoch": 1.979569660192586,
      "grad_norm": 0.5759485363960266,
      "learning_rate": 6.080665247787669e-06,
      "loss": 0.0207,
      "step": 1209620
    },
    {
      "epoch": 1.9796023906312392,
      "grad_norm": 0.39245519042015076,
      "learning_rate": 6.080599355574153e-06,
      "loss": 0.0233,
      "step": 1209640
    },
    {
      "epoch": 1.9796351210698926,
      "grad_norm": 0.2928712069988251,
      "learning_rate": 6.080533463360635e-06,
      "loss": 0.0213,
      "step": 1209660
    },
    {
      "epoch": 1.979667851508546,
      "grad_norm": 0.2695496082305908,
      "learning_rate": 6.0804675711471185e-06,
      "loss": 0.0221,
      "step": 1209680
    },
    {
      "epoch": 1.9797005819471991,
      "grad_norm": 1.0680477619171143,
      "learning_rate": 6.0804016789336e-06,
      "loss": 0.0204,
      "step": 1209700
    },
    {
      "epoch": 1.9797333123858527,
      "grad_norm": 0.7599075436592102,
      "learning_rate": 6.080335786720084e-06,
      "loss": 0.0205,
      "step": 1209720
    },
    {
      "epoch": 1.9797660428245059,
      "grad_norm": 0.7781155705451965,
      "learning_rate": 6.080269894506567e-06,
      "loss": 0.0175,
      "step": 1209740
    },
    {
      "epoch": 1.9797987732631592,
      "grad_norm": 0.4316166341304779,
      "learning_rate": 6.080204002293049e-06,
      "loss": 0.0143,
      "step": 1209760
    },
    {
      "epoch": 1.9798315037018126,
      "grad_norm": 0.4279356598854065,
      "learning_rate": 6.080138110079532e-06,
      "loss": 0.0152,
      "step": 1209780
    },
    {
      "epoch": 1.979864234140466,
      "grad_norm": 0.076382577419281,
      "learning_rate": 6.080072217866016e-06,
      "loss": 0.0196,
      "step": 1209800
    },
    {
      "epoch": 1.9798969645791193,
      "grad_norm": 0.5827566981315613,
      "learning_rate": 6.080006325652498e-06,
      "loss": 0.019,
      "step": 1209820
    },
    {
      "epoch": 1.9799296950177725,
      "grad_norm": 0.09487981349229813,
      "learning_rate": 6.079940433438981e-06,
      "loss": 0.0216,
      "step": 1209840
    },
    {
      "epoch": 1.979962425456426,
      "grad_norm": 1.3072113990783691,
      "learning_rate": 6.079874541225463e-06,
      "loss": 0.0192,
      "step": 1209860
    },
    {
      "epoch": 1.9799951558950792,
      "grad_norm": 0.08990258723497391,
      "learning_rate": 6.079808649011947e-06,
      "loss": 0.0184,
      "step": 1209880
    },
    {
      "epoch": 1.9800278863337326,
      "grad_norm": 0.28170162439346313,
      "learning_rate": 6.07974275679843e-06,
      "loss": 0.017,
      "step": 1209900
    },
    {
      "epoch": 1.980060616772386,
      "grad_norm": 1.2998418807983398,
      "learning_rate": 6.079676864584912e-06,
      "loss": 0.021,
      "step": 1209920
    },
    {
      "epoch": 1.9800933472110394,
      "grad_norm": 0.16241708397865295,
      "learning_rate": 6.079610972371396e-06,
      "loss": 0.0216,
      "step": 1209940
    },
    {
      "epoch": 1.9801260776496927,
      "grad_norm": 0.28409722447395325,
      "learning_rate": 6.0795450801578785e-06,
      "loss": 0.0099,
      "step": 1209960
    },
    {
      "epoch": 1.9801588080883459,
      "grad_norm": 0.75894695520401,
      "learning_rate": 6.079479187944361e-06,
      "loss": 0.0204,
      "step": 1209980
    },
    {
      "epoch": 1.9801915385269995,
      "grad_norm": 0.39313027262687683,
      "learning_rate": 6.079413295730844e-06,
      "loss": 0.0228,
      "step": 1210000
    },
    {
      "epoch": 1.9802242689656526,
      "grad_norm": 0.426608145236969,
      "learning_rate": 6.0793474035173276e-06,
      "loss": 0.0185,
      "step": 1210020
    },
    {
      "epoch": 1.980256999404306,
      "grad_norm": 0.20050789415836334,
      "learning_rate": 6.0792815113038095e-06,
      "loss": 0.0158,
      "step": 1210040
    },
    {
      "epoch": 1.9802897298429594,
      "grad_norm": 0.8414369225502014,
      "learning_rate": 6.079215619090293e-06,
      "loss": 0.0143,
      "step": 1210060
    },
    {
      "epoch": 1.9803224602816127,
      "grad_norm": 0.40261462330818176,
      "learning_rate": 6.079149726876775e-06,
      "loss": 0.0142,
      "step": 1210080
    },
    {
      "epoch": 1.9803551907202661,
      "grad_norm": 0.658133327960968,
      "learning_rate": 6.0790838346632585e-06,
      "loss": 0.0123,
      "step": 1210100
    },
    {
      "epoch": 1.9803879211589193,
      "grad_norm": 0.5292583107948303,
      "learning_rate": 6.079017942449741e-06,
      "loss": 0.012,
      "step": 1210120
    },
    {
      "epoch": 1.9804206515975729,
      "grad_norm": 0.3446936309337616,
      "learning_rate": 6.078952050236224e-06,
      "loss": 0.0181,
      "step": 1210140
    },
    {
      "epoch": 1.980453382036226,
      "grad_norm": 0.5061699748039246,
      "learning_rate": 6.078886158022707e-06,
      "loss": 0.0158,
      "step": 1210160
    },
    {
      "epoch": 1.9804861124748794,
      "grad_norm": 0.5961516499519348,
      "learning_rate": 6.07882026580919e-06,
      "loss": 0.0145,
      "step": 1210180
    },
    {
      "epoch": 1.9805188429135328,
      "grad_norm": 0.25847509503364563,
      "learning_rate": 6.078754373595672e-06,
      "loss": 0.018,
      "step": 1210200
    },
    {
      "epoch": 1.980551573352186,
      "grad_norm": 0.5583288073539734,
      "learning_rate": 6.078688481382156e-06,
      "loss": 0.0172,
      "step": 1210220
    },
    {
      "epoch": 1.9805843037908395,
      "grad_norm": 0.9702983498573303,
      "learning_rate": 6.078622589168638e-06,
      "loss": 0.02,
      "step": 1210240
    },
    {
      "epoch": 1.9806170342294926,
      "grad_norm": 0.6062489151954651,
      "learning_rate": 6.078556696955121e-06,
      "loss": 0.0182,
      "step": 1210260
    },
    {
      "epoch": 1.9806497646681462,
      "grad_norm": 0.26003098487854004,
      "learning_rate": 6.078490804741605e-06,
      "loss": 0.0184,
      "step": 1210280
    },
    {
      "epoch": 1.9806824951067994,
      "grad_norm": 0.36518436670303345,
      "learning_rate": 6.078424912528087e-06,
      "loss": 0.0204,
      "step": 1210300
    },
    {
      "epoch": 1.9807152255454528,
      "grad_norm": 0.27793005108833313,
      "learning_rate": 6.07835902031457e-06,
      "loss": 0.0113,
      "step": 1210320
    },
    {
      "epoch": 1.9807479559841061,
      "grad_norm": 0.37314072251319885,
      "learning_rate": 6.078293128101052e-06,
      "loss": 0.0135,
      "step": 1210340
    },
    {
      "epoch": 1.9807806864227593,
      "grad_norm": 0.7633456587791443,
      "learning_rate": 6.078227235887536e-06,
      "loss": 0.0188,
      "step": 1210360
    },
    {
      "epoch": 1.9808134168614129,
      "grad_norm": 0.3628600239753723,
      "learning_rate": 6.078161343674019e-06,
      "loss": 0.0228,
      "step": 1210380
    },
    {
      "epoch": 1.980846147300066,
      "grad_norm": 0.20869281888008118,
      "learning_rate": 6.078095451460501e-06,
      "loss": 0.0122,
      "step": 1210400
    },
    {
      "epoch": 1.9808788777387194,
      "grad_norm": 0.5369300842285156,
      "learning_rate": 6.078029559246984e-06,
      "loss": 0.018,
      "step": 1210420
    },
    {
      "epoch": 1.9809116081773728,
      "grad_norm": 0.9030117988586426,
      "learning_rate": 6.077963667033468e-06,
      "loss": 0.0135,
      "step": 1210440
    },
    {
      "epoch": 1.9809443386160261,
      "grad_norm": 0.28532055020332336,
      "learning_rate": 6.0778977748199495e-06,
      "loss": 0.0111,
      "step": 1210460
    },
    {
      "epoch": 1.9809770690546795,
      "grad_norm": 0.7732106447219849,
      "learning_rate": 6.077831882606433e-06,
      "loss": 0.0219,
      "step": 1210480
    },
    {
      "epoch": 1.9810097994933327,
      "grad_norm": 0.6873599290847778,
      "learning_rate": 6.077765990392915e-06,
      "loss": 0.014,
      "step": 1210500
    },
    {
      "epoch": 1.9810425299319863,
      "grad_norm": 0.8641045689582825,
      "learning_rate": 6.077700098179399e-06,
      "loss": 0.0135,
      "step": 1210520
    },
    {
      "epoch": 1.9810752603706394,
      "grad_norm": 0.5058709383010864,
      "learning_rate": 6.077634205965881e-06,
      "loss": 0.0165,
      "step": 1210540
    },
    {
      "epoch": 1.9811079908092928,
      "grad_norm": 0.10950645804405212,
      "learning_rate": 6.077568313752364e-06,
      "loss": 0.015,
      "step": 1210560
    },
    {
      "epoch": 1.9811407212479462,
      "grad_norm": 0.4394698143005371,
      "learning_rate": 6.077502421538847e-06,
      "loss": 0.0113,
      "step": 1210580
    },
    {
      "epoch": 1.9811734516865995,
      "grad_norm": 1.0219800472259521,
      "learning_rate": 6.07743652932533e-06,
      "loss": 0.0154,
      "step": 1210600
    },
    {
      "epoch": 1.981206182125253,
      "grad_norm": 0.3471620976924896,
      "learning_rate": 6.077370637111813e-06,
      "loss": 0.0219,
      "step": 1210620
    },
    {
      "epoch": 1.981238912563906,
      "grad_norm": 1.2771166563034058,
      "learning_rate": 6.077304744898296e-06,
      "loss": 0.0171,
      "step": 1210640
    },
    {
      "epoch": 1.9812716430025596,
      "grad_norm": 0.23632942140102386,
      "learning_rate": 6.0772388526847795e-06,
      "loss": 0.0152,
      "step": 1210660
    },
    {
      "epoch": 1.9813043734412128,
      "grad_norm": 0.3153890371322632,
      "learning_rate": 6.077172960471261e-06,
      "loss": 0.0204,
      "step": 1210680
    },
    {
      "epoch": 1.9813371038798662,
      "grad_norm": 0.29089781641960144,
      "learning_rate": 6.077107068257745e-06,
      "loss": 0.0151,
      "step": 1210700
    },
    {
      "epoch": 1.9813698343185195,
      "grad_norm": 0.43885356187820435,
      "learning_rate": 6.077041176044227e-06,
      "loss": 0.0171,
      "step": 1210720
    },
    {
      "epoch": 1.981402564757173,
      "grad_norm": 0.27203795313835144,
      "learning_rate": 6.0769752838307104e-06,
      "loss": 0.0171,
      "step": 1210740
    },
    {
      "epoch": 1.9814352951958263,
      "grad_norm": 0.44839614629745483,
      "learning_rate": 6.076909391617193e-06,
      "loss": 0.0214,
      "step": 1210760
    },
    {
      "epoch": 1.9814680256344794,
      "grad_norm": 0.38120967149734497,
      "learning_rate": 6.076843499403676e-06,
      "loss": 0.0161,
      "step": 1210780
    },
    {
      "epoch": 1.981500756073133,
      "grad_norm": 0.14479394257068634,
      "learning_rate": 6.076777607190159e-06,
      "loss": 0.0165,
      "step": 1210800
    },
    {
      "epoch": 1.9815334865117862,
      "grad_norm": 0.7124380469322205,
      "learning_rate": 6.076711714976642e-06,
      "loss": 0.0261,
      "step": 1210820
    },
    {
      "epoch": 1.9815662169504396,
      "grad_norm": 1.3335061073303223,
      "learning_rate": 6.076645822763124e-06,
      "loss": 0.027,
      "step": 1210840
    },
    {
      "epoch": 1.981598947389093,
      "grad_norm": 0.9080885648727417,
      "learning_rate": 6.076579930549608e-06,
      "loss": 0.0258,
      "step": 1210860
    },
    {
      "epoch": 1.9816316778277463,
      "grad_norm": 0.576209306716919,
      "learning_rate": 6.07651403833609e-06,
      "loss": 0.0157,
      "step": 1210880
    },
    {
      "epoch": 1.9816644082663997,
      "grad_norm": 0.276289701461792,
      "learning_rate": 6.076448146122573e-06,
      "loss": 0.0202,
      "step": 1210900
    },
    {
      "epoch": 1.9816971387050528,
      "grad_norm": 0.29734230041503906,
      "learning_rate": 6.076382253909056e-06,
      "loss": 0.0243,
      "step": 1210920
    },
    {
      "epoch": 1.9817298691437064,
      "grad_norm": 0.5323484539985657,
      "learning_rate": 6.076316361695539e-06,
      "loss": 0.0214,
      "step": 1210940
    },
    {
      "epoch": 1.9817625995823596,
      "grad_norm": 0.8109753131866455,
      "learning_rate": 6.076250469482022e-06,
      "loss": 0.0117,
      "step": 1210960
    },
    {
      "epoch": 1.981795330021013,
      "grad_norm": 0.7282178997993469,
      "learning_rate": 6.076184577268505e-06,
      "loss": 0.017,
      "step": 1210980
    },
    {
      "epoch": 1.9818280604596663,
      "grad_norm": 0.2733805775642395,
      "learning_rate": 6.076118685054988e-06,
      "loss": 0.0202,
      "step": 1211000
    },
    {
      "epoch": 1.9818607908983195,
      "grad_norm": 0.12867608666419983,
      "learning_rate": 6.0760527928414705e-06,
      "loss": 0.0105,
      "step": 1211020
    },
    {
      "epoch": 1.981893521336973,
      "grad_norm": 0.7402322292327881,
      "learning_rate": 6.075986900627954e-06,
      "loss": 0.0246,
      "step": 1211040
    },
    {
      "epoch": 1.9819262517756262,
      "grad_norm": 0.13143962621688843,
      "learning_rate": 6.075921008414436e-06,
      "loss": 0.015,
      "step": 1211060
    },
    {
      "epoch": 1.9819589822142798,
      "grad_norm": 0.7990437746047974,
      "learning_rate": 6.0758551162009196e-06,
      "loss": 0.0189,
      "step": 1211080
    },
    {
      "epoch": 1.981991712652933,
      "grad_norm": 0.41520562767982483,
      "learning_rate": 6.0757892239874014e-06,
      "loss": 0.0175,
      "step": 1211100
    },
    {
      "epoch": 1.9820244430915863,
      "grad_norm": 1.040459394454956,
      "learning_rate": 6.075723331773885e-06,
      "loss": 0.0154,
      "step": 1211120
    },
    {
      "epoch": 1.9820571735302397,
      "grad_norm": 0.4451155960559845,
      "learning_rate": 6.075657439560368e-06,
      "loss": 0.014,
      "step": 1211140
    },
    {
      "epoch": 1.9820899039688928,
      "grad_norm": 0.5743030905723572,
      "learning_rate": 6.0755915473468505e-06,
      "loss": 0.0116,
      "step": 1211160
    },
    {
      "epoch": 1.9821226344075464,
      "grad_norm": 0.880357563495636,
      "learning_rate": 6.075525655133333e-06,
      "loss": 0.0161,
      "step": 1211180
    },
    {
      "epoch": 1.9821553648461996,
      "grad_norm": 0.9685976505279541,
      "learning_rate": 6.075459762919817e-06,
      "loss": 0.0143,
      "step": 1211200
    },
    {
      "epoch": 1.982188095284853,
      "grad_norm": 1.5235110521316528,
      "learning_rate": 6.075393870706299e-06,
      "loss": 0.0263,
      "step": 1211220
    },
    {
      "epoch": 1.9822208257235063,
      "grad_norm": 0.6420862078666687,
      "learning_rate": 6.075327978492782e-06,
      "loss": 0.0147,
      "step": 1211240
    },
    {
      "epoch": 1.9822535561621597,
      "grad_norm": 1.675978183746338,
      "learning_rate": 6.075262086279264e-06,
      "loss": 0.0217,
      "step": 1211260
    },
    {
      "epoch": 1.982286286600813,
      "grad_norm": 0.8503998517990112,
      "learning_rate": 6.075196194065748e-06,
      "loss": 0.0218,
      "step": 1211280
    },
    {
      "epoch": 1.9823190170394662,
      "grad_norm": 0.045960549265146255,
      "learning_rate": 6.07513030185223e-06,
      "loss": 0.0157,
      "step": 1211300
    },
    {
      "epoch": 1.9823517474781198,
      "grad_norm": 0.9130628108978271,
      "learning_rate": 6.075064409638713e-06,
      "loss": 0.0257,
      "step": 1211320
    },
    {
      "epoch": 1.982384477916773,
      "grad_norm": 0.16435977816581726,
      "learning_rate": 6.074998517425197e-06,
      "loss": 0.0181,
      "step": 1211340
    },
    {
      "epoch": 1.9824172083554263,
      "grad_norm": 0.4240801930427551,
      "learning_rate": 6.074932625211679e-06,
      "loss": 0.0141,
      "step": 1211360
    },
    {
      "epoch": 1.9824499387940797,
      "grad_norm": 0.38532260060310364,
      "learning_rate": 6.074866732998162e-06,
      "loss": 0.0142,
      "step": 1211380
    },
    {
      "epoch": 1.982482669232733,
      "grad_norm": 0.24844641983509064,
      "learning_rate": 6.074800840784645e-06,
      "loss": 0.0178,
      "step": 1211400
    },
    {
      "epoch": 1.9825153996713865,
      "grad_norm": 0.844636082649231,
      "learning_rate": 6.074734948571128e-06,
      "loss": 0.0159,
      "step": 1211420
    },
    {
      "epoch": 1.9825481301100396,
      "grad_norm": 0.29244017601013184,
      "learning_rate": 6.0746690563576106e-06,
      "loss": 0.02,
      "step": 1211440
    },
    {
      "epoch": 1.9825808605486932,
      "grad_norm": 0.5589752793312073,
      "learning_rate": 6.074603164144094e-06,
      "loss": 0.02,
      "step": 1211460
    },
    {
      "epoch": 1.9826135909873464,
      "grad_norm": 0.3001020848751068,
      "learning_rate": 6.074537271930576e-06,
      "loss": 0.0155,
      "step": 1211480
    },
    {
      "epoch": 1.9826463214259997,
      "grad_norm": 0.14820222556591034,
      "learning_rate": 6.07447137971706e-06,
      "loss": 0.0175,
      "step": 1211500
    },
    {
      "epoch": 1.982679051864653,
      "grad_norm": 0.4522832930088043,
      "learning_rate": 6.0744054875035415e-06,
      "loss": 0.0276,
      "step": 1211520
    },
    {
      "epoch": 1.9827117823033065,
      "grad_norm": 0.11047659814357758,
      "learning_rate": 6.074339595290025e-06,
      "loss": 0.0098,
      "step": 1211540
    },
    {
      "epoch": 1.9827445127419598,
      "grad_norm": 1.806832194328308,
      "learning_rate": 6.074273703076508e-06,
      "loss": 0.0141,
      "step": 1211560
    },
    {
      "epoch": 1.982777243180613,
      "grad_norm": 0.19608083367347717,
      "learning_rate": 6.074207810862991e-06,
      "loss": 0.0112,
      "step": 1211580
    },
    {
      "epoch": 1.9828099736192666,
      "grad_norm": 0.4021643102169037,
      "learning_rate": 6.074141918649473e-06,
      "loss": 0.0221,
      "step": 1211600
    },
    {
      "epoch": 1.9828427040579197,
      "grad_norm": 0.6777587532997131,
      "learning_rate": 6.074076026435957e-06,
      "loss": 0.0164,
      "step": 1211620
    },
    {
      "epoch": 1.982875434496573,
      "grad_norm": 1.399693489074707,
      "learning_rate": 6.074010134222439e-06,
      "loss": 0.0212,
      "step": 1211640
    },
    {
      "epoch": 1.9829081649352265,
      "grad_norm": 0.16744232177734375,
      "learning_rate": 6.073944242008922e-06,
      "loss": 0.0155,
      "step": 1211660
    },
    {
      "epoch": 1.9829408953738799,
      "grad_norm": 0.8862906694412231,
      "learning_rate": 6.073878349795406e-06,
      "loss": 0.0245,
      "step": 1211680
    },
    {
      "epoch": 1.9829736258125332,
      "grad_norm": 0.050185978412628174,
      "learning_rate": 6.073812457581888e-06,
      "loss": 0.0127,
      "step": 1211700
    },
    {
      "epoch": 1.9830063562511864,
      "grad_norm": 0.4330303966999054,
      "learning_rate": 6.0737465653683715e-06,
      "loss": 0.0208,
      "step": 1211720
    },
    {
      "epoch": 1.98303908668984,
      "grad_norm": 0.4900483191013336,
      "learning_rate": 6.073680673154853e-06,
      "loss": 0.0155,
      "step": 1211740
    },
    {
      "epoch": 1.9830718171284931,
      "grad_norm": 0.3848011791706085,
      "learning_rate": 6.073614780941337e-06,
      "loss": 0.0199,
      "step": 1211760
    },
    {
      "epoch": 1.9831045475671465,
      "grad_norm": 0.6140270233154297,
      "learning_rate": 6.07354888872782e-06,
      "loss": 0.0163,
      "step": 1211780
    },
    {
      "epoch": 1.9831372780057999,
      "grad_norm": 0.1441098153591156,
      "learning_rate": 6.073482996514302e-06,
      "loss": 0.0143,
      "step": 1211800
    },
    {
      "epoch": 1.983170008444453,
      "grad_norm": 0.7533811926841736,
      "learning_rate": 6.073417104300785e-06,
      "loss": 0.0267,
      "step": 1211820
    },
    {
      "epoch": 1.9832027388831066,
      "grad_norm": 0.1872750073671341,
      "learning_rate": 6.073351212087269e-06,
      "loss": 0.0213,
      "step": 1211840
    },
    {
      "epoch": 1.9832354693217598,
      "grad_norm": 0.67721027135849,
      "learning_rate": 6.073285319873751e-06,
      "loss": 0.0145,
      "step": 1211860
    },
    {
      "epoch": 1.9832681997604134,
      "grad_norm": 0.1671876311302185,
      "learning_rate": 6.073219427660234e-06,
      "loss": 0.0154,
      "step": 1211880
    },
    {
      "epoch": 1.9833009301990665,
      "grad_norm": 0.35070788860321045,
      "learning_rate": 6.073153535446716e-06,
      "loss": 0.0181,
      "step": 1211900
    },
    {
      "epoch": 1.9833336606377199,
      "grad_norm": 0.8091742992401123,
      "learning_rate": 6.0730876432332e-06,
      "loss": 0.0145,
      "step": 1211920
    },
    {
      "epoch": 1.9833663910763732,
      "grad_norm": 0.24311748147010803,
      "learning_rate": 6.0730217510196824e-06,
      "loss": 0.0243,
      "step": 1211940
    },
    {
      "epoch": 1.9833991215150264,
      "grad_norm": 0.2673688232898712,
      "learning_rate": 6.072955858806165e-06,
      "loss": 0.0126,
      "step": 1211960
    },
    {
      "epoch": 1.98343185195368,
      "grad_norm": 0.5344060659408569,
      "learning_rate": 6.072889966592648e-06,
      "loss": 0.0173,
      "step": 1211980
    },
    {
      "epoch": 1.9834645823923331,
      "grad_norm": 0.12007725983858109,
      "learning_rate": 6.0728240743791315e-06,
      "loss": 0.0192,
      "step": 1212000
    },
    {
      "epoch": 1.9834973128309865,
      "grad_norm": 0.8205261826515198,
      "learning_rate": 6.072758182165614e-06,
      "loss": 0.0195,
      "step": 1212020
    },
    {
      "epoch": 1.9835300432696399,
      "grad_norm": 3.9590699672698975,
      "learning_rate": 6.072692289952097e-06,
      "loss": 0.0193,
      "step": 1212040
    },
    {
      "epoch": 1.9835627737082933,
      "grad_norm": 0.30565011501312256,
      "learning_rate": 6.072626397738581e-06,
      "loss": 0.0186,
      "step": 1212060
    },
    {
      "epoch": 1.9835955041469466,
      "grad_norm": 0.7342587113380432,
      "learning_rate": 6.0725605055250625e-06,
      "loss": 0.0184,
      "step": 1212080
    },
    {
      "epoch": 1.9836282345855998,
      "grad_norm": 0.5370945334434509,
      "learning_rate": 6.072494613311546e-06,
      "loss": 0.0139,
      "step": 1212100
    },
    {
      "epoch": 1.9836609650242534,
      "grad_norm": 0.3091007471084595,
      "learning_rate": 6.072428721098028e-06,
      "loss": 0.0188,
      "step": 1212120
    },
    {
      "epoch": 1.9836936954629065,
      "grad_norm": 0.30543777346611023,
      "learning_rate": 6.0723628288845115e-06,
      "loss": 0.0197,
      "step": 1212140
    },
    {
      "epoch": 1.98372642590156,
      "grad_norm": 0.536614179611206,
      "learning_rate": 6.0722969366709934e-06,
      "loss": 0.0215,
      "step": 1212160
    },
    {
      "epoch": 1.9837591563402133,
      "grad_norm": 0.3070943057537079,
      "learning_rate": 6.072231044457477e-06,
      "loss": 0.0189,
      "step": 1212180
    },
    {
      "epoch": 1.9837918867788666,
      "grad_norm": 0.8485475182533264,
      "learning_rate": 6.07216515224396e-06,
      "loss": 0.0145,
      "step": 1212200
    },
    {
      "epoch": 1.98382461721752,
      "grad_norm": 1.0108084678649902,
      "learning_rate": 6.072099260030443e-06,
      "loss": 0.0214,
      "step": 1212220
    },
    {
      "epoch": 1.9838573476561732,
      "grad_norm": 0.11863292008638382,
      "learning_rate": 6.072033367816925e-06,
      "loss": 0.0217,
      "step": 1212240
    },
    {
      "epoch": 1.9838900780948268,
      "grad_norm": 0.4632977247238159,
      "learning_rate": 6.071967475603409e-06,
      "loss": 0.0211,
      "step": 1212260
    },
    {
      "epoch": 1.98392280853348,
      "grad_norm": 0.8171110153198242,
      "learning_rate": 6.071901583389891e-06,
      "loss": 0.0121,
      "step": 1212280
    },
    {
      "epoch": 1.9839555389721333,
      "grad_norm": 0.5046077966690063,
      "learning_rate": 6.071835691176374e-06,
      "loss": 0.0161,
      "step": 1212300
    },
    {
      "epoch": 1.9839882694107867,
      "grad_norm": 0.35499805212020874,
      "learning_rate": 6.071769798962856e-06,
      "loss": 0.0154,
      "step": 1212320
    },
    {
      "epoch": 1.98402099984944,
      "grad_norm": 0.5385482311248779,
      "learning_rate": 6.07170390674934e-06,
      "loss": 0.0169,
      "step": 1212340
    },
    {
      "epoch": 1.9840537302880934,
      "grad_norm": 0.3106951415538788,
      "learning_rate": 6.0716380145358225e-06,
      "loss": 0.0202,
      "step": 1212360
    },
    {
      "epoch": 1.9840864607267465,
      "grad_norm": 0.22096621990203857,
      "learning_rate": 6.071572122322305e-06,
      "loss": 0.0196,
      "step": 1212380
    },
    {
      "epoch": 1.9841191911654001,
      "grad_norm": 0.21774448454380035,
      "learning_rate": 6.071506230108789e-06,
      "loss": 0.0139,
      "step": 1212400
    },
    {
      "epoch": 1.9841519216040533,
      "grad_norm": 1.705299973487854,
      "learning_rate": 6.071440337895272e-06,
      "loss": 0.0173,
      "step": 1212420
    },
    {
      "epoch": 1.9841846520427067,
      "grad_norm": 0.32697007060050964,
      "learning_rate": 6.071374445681754e-06,
      "loss": 0.0149,
      "step": 1212440
    },
    {
      "epoch": 1.98421738248136,
      "grad_norm": 0.45901432633399963,
      "learning_rate": 6.071308553468237e-06,
      "loss": 0.0154,
      "step": 1212460
    },
    {
      "epoch": 1.9842501129200134,
      "grad_norm": 3.558511257171631,
      "learning_rate": 6.071242661254721e-06,
      "loss": 0.0189,
      "step": 1212480
    },
    {
      "epoch": 1.9842828433586668,
      "grad_norm": 0.33248060941696167,
      "learning_rate": 6.0711767690412025e-06,
      "loss": 0.0219,
      "step": 1212500
    },
    {
      "epoch": 1.98431557379732,
      "grad_norm": 0.7555317878723145,
      "learning_rate": 6.071110876827686e-06,
      "loss": 0.017,
      "step": 1212520
    },
    {
      "epoch": 1.9843483042359735,
      "grad_norm": 0.14963595569133759,
      "learning_rate": 6.071044984614168e-06,
      "loss": 0.0152,
      "step": 1212540
    },
    {
      "epoch": 1.9843810346746267,
      "grad_norm": 0.22816354036331177,
      "learning_rate": 6.070979092400652e-06,
      "loss": 0.0111,
      "step": 1212560
    },
    {
      "epoch": 1.98441376511328,
      "grad_norm": 0.945950448513031,
      "learning_rate": 6.070913200187134e-06,
      "loss": 0.0187,
      "step": 1212580
    },
    {
      "epoch": 1.9844464955519334,
      "grad_norm": 0.2289992868900299,
      "learning_rate": 6.070847307973617e-06,
      "loss": 0.0145,
      "step": 1212600
    },
    {
      "epoch": 1.9844792259905866,
      "grad_norm": 0.44959214329719543,
      "learning_rate": 6.0707814157601e-06,
      "loss": 0.0164,
      "step": 1212620
    },
    {
      "epoch": 1.9845119564292402,
      "grad_norm": 0.2430230677127838,
      "learning_rate": 6.070715523546583e-06,
      "loss": 0.0197,
      "step": 1212640
    },
    {
      "epoch": 1.9845446868678933,
      "grad_norm": 0.23630006611347198,
      "learning_rate": 6.070649631333065e-06,
      "loss": 0.0181,
      "step": 1212660
    },
    {
      "epoch": 1.9845774173065467,
      "grad_norm": 0.2957075834274292,
      "learning_rate": 6.070583739119549e-06,
      "loss": 0.0117,
      "step": 1212680
    },
    {
      "epoch": 1.9846101477452,
      "grad_norm": 0.36760103702545166,
      "learning_rate": 6.070517846906031e-06,
      "loss": 0.0152,
      "step": 1212700
    },
    {
      "epoch": 1.9846428781838534,
      "grad_norm": 1.1602269411087036,
      "learning_rate": 6.070451954692514e-06,
      "loss": 0.016,
      "step": 1212720
    },
    {
      "epoch": 1.9846756086225068,
      "grad_norm": 0.4692925810813904,
      "learning_rate": 6.070386062478998e-06,
      "loss": 0.0145,
      "step": 1212740
    },
    {
      "epoch": 1.98470833906116,
      "grad_norm": 0.3186277151107788,
      "learning_rate": 6.07032017026548e-06,
      "loss": 0.0142,
      "step": 1212760
    },
    {
      "epoch": 1.9847410694998135,
      "grad_norm": 0.7916399836540222,
      "learning_rate": 6.0702542780519634e-06,
      "loss": 0.0118,
      "step": 1212780
    },
    {
      "epoch": 1.9847737999384667,
      "grad_norm": 0.1831713765859604,
      "learning_rate": 6.070188385838446e-06,
      "loss": 0.0153,
      "step": 1212800
    },
    {
      "epoch": 1.98480653037712,
      "grad_norm": 0.4337488114833832,
      "learning_rate": 6.070122493624929e-06,
      "loss": 0.0152,
      "step": 1212820
    },
    {
      "epoch": 1.9848392608157734,
      "grad_norm": 0.14997778832912445,
      "learning_rate": 6.070056601411412e-06,
      "loss": 0.0109,
      "step": 1212840
    },
    {
      "epoch": 1.9848719912544268,
      "grad_norm": 0.3186628818511963,
      "learning_rate": 6.069990709197895e-06,
      "loss": 0.0175,
      "step": 1212860
    },
    {
      "epoch": 1.9849047216930802,
      "grad_norm": 0.48696938157081604,
      "learning_rate": 6.069924816984377e-06,
      "loss": 0.0169,
      "step": 1212880
    },
    {
      "epoch": 1.9849374521317333,
      "grad_norm": 0.5812217593193054,
      "learning_rate": 6.069858924770861e-06,
      "loss": 0.0231,
      "step": 1212900
    },
    {
      "epoch": 1.984970182570387,
      "grad_norm": 0.5771549940109253,
      "learning_rate": 6.069793032557343e-06,
      "loss": 0.0166,
      "step": 1212920
    },
    {
      "epoch": 1.98500291300904,
      "grad_norm": 0.540196418762207,
      "learning_rate": 6.069727140343826e-06,
      "loss": 0.0166,
      "step": 1212940
    },
    {
      "epoch": 1.9850356434476935,
      "grad_norm": 0.04665188491344452,
      "learning_rate": 6.069661248130309e-06,
      "loss": 0.0143,
      "step": 1212960
    },
    {
      "epoch": 1.9850683738863468,
      "grad_norm": 1.1195456981658936,
      "learning_rate": 6.069595355916792e-06,
      "loss": 0.0187,
      "step": 1212980
    },
    {
      "epoch": 1.9851011043250002,
      "grad_norm": 1.6381704807281494,
      "learning_rate": 6.0695294637032744e-06,
      "loss": 0.0177,
      "step": 1213000
    },
    {
      "epoch": 1.9851338347636536,
      "grad_norm": 0.8381497859954834,
      "learning_rate": 6.069463571489758e-06,
      "loss": 0.024,
      "step": 1213020
    },
    {
      "epoch": 1.9851665652023067,
      "grad_norm": 1.3615020513534546,
      "learning_rate": 6.06939767927624e-06,
      "loss": 0.0128,
      "step": 1213040
    },
    {
      "epoch": 1.9851992956409603,
      "grad_norm": 1.1307810544967651,
      "learning_rate": 6.0693317870627235e-06,
      "loss": 0.0183,
      "step": 1213060
    },
    {
      "epoch": 1.9852320260796135,
      "grad_norm": 0.169737309217453,
      "learning_rate": 6.069265894849207e-06,
      "loss": 0.0138,
      "step": 1213080
    },
    {
      "epoch": 1.9852647565182668,
      "grad_norm": 1.4417834281921387,
      "learning_rate": 6.069200002635689e-06,
      "loss": 0.0272,
      "step": 1213100
    },
    {
      "epoch": 1.9852974869569202,
      "grad_norm": 0.2519344091415405,
      "learning_rate": 6.0691341104221726e-06,
      "loss": 0.0126,
      "step": 1213120
    },
    {
      "epoch": 1.9853302173955736,
      "grad_norm": 0.3874015510082245,
      "learning_rate": 6.0690682182086545e-06,
      "loss": 0.0164,
      "step": 1213140
    },
    {
      "epoch": 1.985362947834227,
      "grad_norm": 0.0918193832039833,
      "learning_rate": 6.069002325995138e-06,
      "loss": 0.0247,
      "step": 1213160
    },
    {
      "epoch": 1.98539567827288,
      "grad_norm": 1.290917992591858,
      "learning_rate": 6.06893643378162e-06,
      "loss": 0.0181,
      "step": 1213180
    },
    {
      "epoch": 1.9854284087115337,
      "grad_norm": 0.5440295934677124,
      "learning_rate": 6.0688705415681035e-06,
      "loss": 0.016,
      "step": 1213200
    },
    {
      "epoch": 1.9854611391501868,
      "grad_norm": 0.2713179588317871,
      "learning_rate": 6.068804649354586e-06,
      "loss": 0.0139,
      "step": 1213220
    },
    {
      "epoch": 1.9854938695888402,
      "grad_norm": 0.6928325891494751,
      "learning_rate": 6.068738757141069e-06,
      "loss": 0.0202,
      "step": 1213240
    },
    {
      "epoch": 1.9855266000274936,
      "grad_norm": 0.6444659233093262,
      "learning_rate": 6.068672864927552e-06,
      "loss": 0.0195,
      "step": 1213260
    },
    {
      "epoch": 1.9855593304661467,
      "grad_norm": 0.7076519131660461,
      "learning_rate": 6.068606972714035e-06,
      "loss": 0.0223,
      "step": 1213280
    },
    {
      "epoch": 1.9855920609048003,
      "grad_norm": 0.4995426833629608,
      "learning_rate": 6.068541080500517e-06,
      "loss": 0.0193,
      "step": 1213300
    },
    {
      "epoch": 1.9856247913434535,
      "grad_norm": 0.8320302963256836,
      "learning_rate": 6.068475188287001e-06,
      "loss": 0.0198,
      "step": 1213320
    },
    {
      "epoch": 1.985657521782107,
      "grad_norm": 1.1512041091918945,
      "learning_rate": 6.068409296073483e-06,
      "loss": 0.0209,
      "step": 1213340
    },
    {
      "epoch": 1.9856902522207602,
      "grad_norm": 0.22934062778949738,
      "learning_rate": 6.068343403859966e-06,
      "loss": 0.0193,
      "step": 1213360
    },
    {
      "epoch": 1.9857229826594136,
      "grad_norm": 0.23069675266742706,
      "learning_rate": 6.068277511646449e-06,
      "loss": 0.0183,
      "step": 1213380
    },
    {
      "epoch": 1.985755713098067,
      "grad_norm": 0.2894457280635834,
      "learning_rate": 6.068211619432932e-06,
      "loss": 0.0143,
      "step": 1213400
    },
    {
      "epoch": 1.9857884435367201,
      "grad_norm": 0.2642157971858978,
      "learning_rate": 6.068145727219415e-06,
      "loss": 0.0189,
      "step": 1213420
    },
    {
      "epoch": 1.9858211739753737,
      "grad_norm": 0.16690772771835327,
      "learning_rate": 6.068079835005898e-06,
      "loss": 0.0198,
      "step": 1213440
    },
    {
      "epoch": 1.9858539044140269,
      "grad_norm": 0.5837579369544983,
      "learning_rate": 6.068013942792381e-06,
      "loss": 0.0201,
      "step": 1213460
    },
    {
      "epoch": 1.9858866348526802,
      "grad_norm": 0.48803967237472534,
      "learning_rate": 6.0679480505788636e-06,
      "loss": 0.017,
      "step": 1213480
    },
    {
      "epoch": 1.9859193652913336,
      "grad_norm": 1.8550053834915161,
      "learning_rate": 6.067882158365347e-06,
      "loss": 0.0182,
      "step": 1213500
    },
    {
      "epoch": 1.985952095729987,
      "grad_norm": 0.9564182758331299,
      "learning_rate": 6.067816266151829e-06,
      "loss": 0.0191,
      "step": 1213520
    },
    {
      "epoch": 1.9859848261686404,
      "grad_norm": 0.44977280497550964,
      "learning_rate": 6.067750373938313e-06,
      "loss": 0.0119,
      "step": 1213540
    },
    {
      "epoch": 1.9860175566072935,
      "grad_norm": 0.8364108800888062,
      "learning_rate": 6.0676844817247945e-06,
      "loss": 0.0154,
      "step": 1213560
    },
    {
      "epoch": 1.986050287045947,
      "grad_norm": 0.16440840065479279,
      "learning_rate": 6.067618589511278e-06,
      "loss": 0.0117,
      "step": 1213580
    },
    {
      "epoch": 1.9860830174846003,
      "grad_norm": 0.24665163457393646,
      "learning_rate": 6.067552697297761e-06,
      "loss": 0.0146,
      "step": 1213600
    },
    {
      "epoch": 1.9861157479232536,
      "grad_norm": 0.45056217908859253,
      "learning_rate": 6.067486805084244e-06,
      "loss": 0.0161,
      "step": 1213620
    },
    {
      "epoch": 1.986148478361907,
      "grad_norm": 0.41570958495140076,
      "learning_rate": 6.067420912870726e-06,
      "loss": 0.0151,
      "step": 1213640
    },
    {
      "epoch": 1.9861812088005604,
      "grad_norm": 1.2498058080673218,
      "learning_rate": 6.06735502065721e-06,
      "loss": 0.013,
      "step": 1213660
    },
    {
      "epoch": 1.9862139392392137,
      "grad_norm": 0.14243245124816895,
      "learning_rate": 6.067289128443692e-06,
      "loss": 0.0133,
      "step": 1213680
    },
    {
      "epoch": 1.986246669677867,
      "grad_norm": 0.29828956723213196,
      "learning_rate": 6.067223236230175e-06,
      "loss": 0.0151,
      "step": 1213700
    },
    {
      "epoch": 1.9862794001165205,
      "grad_norm": 0.3380107581615448,
      "learning_rate": 6.067157344016657e-06,
      "loss": 0.0105,
      "step": 1213720
    },
    {
      "epoch": 1.9863121305551736,
      "grad_norm": 1.4334251880645752,
      "learning_rate": 6.067091451803141e-06,
      "loss": 0.022,
      "step": 1213740
    },
    {
      "epoch": 1.986344860993827,
      "grad_norm": 0.9574874043464661,
      "learning_rate": 6.067025559589624e-06,
      "loss": 0.0178,
      "step": 1213760
    },
    {
      "epoch": 1.9863775914324804,
      "grad_norm": 0.2574261724948883,
      "learning_rate": 6.066959667376106e-06,
      "loss": 0.0145,
      "step": 1213780
    },
    {
      "epoch": 1.9864103218711338,
      "grad_norm": 0.4936383366584778,
      "learning_rate": 6.06689377516259e-06,
      "loss": 0.0209,
      "step": 1213800
    },
    {
      "epoch": 1.9864430523097871,
      "grad_norm": 0.430703341960907,
      "learning_rate": 6.066827882949073e-06,
      "loss": 0.0128,
      "step": 1213820
    },
    {
      "epoch": 1.9864757827484403,
      "grad_norm": 0.21554596722126007,
      "learning_rate": 6.0667619907355554e-06,
      "loss": 0.0142,
      "step": 1213840
    },
    {
      "epoch": 1.9865085131870939,
      "grad_norm": 0.4925844073295593,
      "learning_rate": 6.066696098522038e-06,
      "loss": 0.0154,
      "step": 1213860
    },
    {
      "epoch": 1.986541243625747,
      "grad_norm": 0.46802717447280884,
      "learning_rate": 6.066630206308522e-06,
      "loss": 0.0192,
      "step": 1213880
    },
    {
      "epoch": 1.9865739740644004,
      "grad_norm": 0.365915447473526,
      "learning_rate": 6.066564314095004e-06,
      "loss": 0.0178,
      "step": 1213900
    },
    {
      "epoch": 1.9866067045030538,
      "grad_norm": 0.6161880493164062,
      "learning_rate": 6.066498421881487e-06,
      "loss": 0.0164,
      "step": 1213920
    },
    {
      "epoch": 1.9866394349417071,
      "grad_norm": 0.749961256980896,
      "learning_rate": 6.066432529667969e-06,
      "loss": 0.0155,
      "step": 1213940
    },
    {
      "epoch": 1.9866721653803605,
      "grad_norm": 0.5266364216804504,
      "learning_rate": 6.066366637454453e-06,
      "loss": 0.02,
      "step": 1213960
    },
    {
      "epoch": 1.9867048958190137,
      "grad_norm": 0.5083633065223694,
      "learning_rate": 6.0663007452409355e-06,
      "loss": 0.016,
      "step": 1213980
    },
    {
      "epoch": 1.9867376262576673,
      "grad_norm": 0.30259495973587036,
      "learning_rate": 6.066234853027418e-06,
      "loss": 0.0196,
      "step": 1214000
    },
    {
      "epoch": 1.9867703566963204,
      "grad_norm": 0.5682714581489563,
      "learning_rate": 6.066168960813901e-06,
      "loss": 0.0194,
      "step": 1214020
    },
    {
      "epoch": 1.9868030871349738,
      "grad_norm": 0.26797720789909363,
      "learning_rate": 6.0661030686003845e-06,
      "loss": 0.0138,
      "step": 1214040
    },
    {
      "epoch": 1.9868358175736271,
      "grad_norm": 0.4382428526878357,
      "learning_rate": 6.066037176386866e-06,
      "loss": 0.0124,
      "step": 1214060
    },
    {
      "epoch": 1.9868685480122803,
      "grad_norm": 0.13838863372802734,
      "learning_rate": 6.06597128417335e-06,
      "loss": 0.0143,
      "step": 1214080
    },
    {
      "epoch": 1.986901278450934,
      "grad_norm": 0.1900537759065628,
      "learning_rate": 6.065905391959832e-06,
      "loss": 0.0189,
      "step": 1214100
    },
    {
      "epoch": 1.986934008889587,
      "grad_norm": 0.2177884578704834,
      "learning_rate": 6.0658394997463155e-06,
      "loss": 0.0207,
      "step": 1214120
    },
    {
      "epoch": 1.9869667393282406,
      "grad_norm": 0.095762237906456,
      "learning_rate": 6.065773607532799e-06,
      "loss": 0.0131,
      "step": 1214140
    },
    {
      "epoch": 1.9869994697668938,
      "grad_norm": 0.37674757838249207,
      "learning_rate": 6.065707715319281e-06,
      "loss": 0.013,
      "step": 1214160
    },
    {
      "epoch": 1.9870322002055472,
      "grad_norm": 0.39535126090049744,
      "learning_rate": 6.0656418231057645e-06,
      "loss": 0.0183,
      "step": 1214180
    },
    {
      "epoch": 1.9870649306442005,
      "grad_norm": 0.39949265122413635,
      "learning_rate": 6.0655759308922464e-06,
      "loss": 0.0125,
      "step": 1214200
    },
    {
      "epoch": 1.9870976610828537,
      "grad_norm": 0.42123281955718994,
      "learning_rate": 6.06551003867873e-06,
      "loss": 0.0168,
      "step": 1214220
    },
    {
      "epoch": 1.9871303915215073,
      "grad_norm": 0.20318128168582916,
      "learning_rate": 6.065444146465213e-06,
      "loss": 0.0144,
      "step": 1214240
    },
    {
      "epoch": 1.9871631219601604,
      "grad_norm": 0.8184121251106262,
      "learning_rate": 6.0653782542516955e-06,
      "loss": 0.0166,
      "step": 1214260
    },
    {
      "epoch": 1.9871958523988138,
      "grad_norm": 0.1753690391778946,
      "learning_rate": 6.065312362038178e-06,
      "loss": 0.0172,
      "step": 1214280
    },
    {
      "epoch": 1.9872285828374672,
      "grad_norm": 1.3208459615707397,
      "learning_rate": 6.065246469824662e-06,
      "loss": 0.0292,
      "step": 1214300
    },
    {
      "epoch": 1.9872613132761205,
      "grad_norm": 0.4548667371273041,
      "learning_rate": 6.065180577611144e-06,
      "loss": 0.0205,
      "step": 1214320
    },
    {
      "epoch": 1.987294043714774,
      "grad_norm": 0.2556447386741638,
      "learning_rate": 6.065114685397627e-06,
      "loss": 0.0182,
      "step": 1214340
    },
    {
      "epoch": 1.987326774153427,
      "grad_norm": 0.18513363599777222,
      "learning_rate": 6.065048793184109e-06,
      "loss": 0.0232,
      "step": 1214360
    },
    {
      "epoch": 1.9873595045920807,
      "grad_norm": 0.7878296375274658,
      "learning_rate": 6.064982900970593e-06,
      "loss": 0.0098,
      "step": 1214380
    },
    {
      "epoch": 1.9873922350307338,
      "grad_norm": 0.11243537068367004,
      "learning_rate": 6.0649170087570755e-06,
      "loss": 0.0163,
      "step": 1214400
    },
    {
      "epoch": 1.9874249654693872,
      "grad_norm": 0.47056224942207336,
      "learning_rate": 6.064851116543558e-06,
      "loss": 0.0163,
      "step": 1214420
    },
    {
      "epoch": 1.9874576959080406,
      "grad_norm": 0.8439534902572632,
      "learning_rate": 6.064785224330041e-06,
      "loss": 0.0168,
      "step": 1214440
    },
    {
      "epoch": 1.987490426346694,
      "grad_norm": 0.7001416683197021,
      "learning_rate": 6.064719332116525e-06,
      "loss": 0.0223,
      "step": 1214460
    },
    {
      "epoch": 1.9875231567853473,
      "grad_norm": 0.26004987955093384,
      "learning_rate": 6.064653439903007e-06,
      "loss": 0.0136,
      "step": 1214480
    },
    {
      "epoch": 1.9875558872240005,
      "grad_norm": 0.8443286418914795,
      "learning_rate": 6.06458754768949e-06,
      "loss": 0.0217,
      "step": 1214500
    },
    {
      "epoch": 1.987588617662654,
      "grad_norm": 0.8295469284057617,
      "learning_rate": 6.064521655475974e-06,
      "loss": 0.0247,
      "step": 1214520
    },
    {
      "epoch": 1.9876213481013072,
      "grad_norm": 0.31940871477127075,
      "learning_rate": 6.0644557632624556e-06,
      "loss": 0.0176,
      "step": 1214540
    },
    {
      "epoch": 1.9876540785399606,
      "grad_norm": 0.5579445958137512,
      "learning_rate": 6.064389871048939e-06,
      "loss": 0.0221,
      "step": 1214560
    },
    {
      "epoch": 1.987686808978614,
      "grad_norm": 0.35394710302352905,
      "learning_rate": 6.064323978835421e-06,
      "loss": 0.0179,
      "step": 1214580
    },
    {
      "epoch": 1.9877195394172673,
      "grad_norm": 0.2195742428302765,
      "learning_rate": 6.064258086621905e-06,
      "loss": 0.0244,
      "step": 1214600
    },
    {
      "epoch": 1.9877522698559207,
      "grad_norm": 1.4452292919158936,
      "learning_rate": 6.064192194408387e-06,
      "loss": 0.0133,
      "step": 1214620
    },
    {
      "epoch": 1.9877850002945738,
      "grad_norm": 0.29553088545799255,
      "learning_rate": 6.06412630219487e-06,
      "loss": 0.012,
      "step": 1214640
    },
    {
      "epoch": 1.9878177307332274,
      "grad_norm": 0.30468788743019104,
      "learning_rate": 6.064060409981353e-06,
      "loss": 0.018,
      "step": 1214660
    },
    {
      "epoch": 1.9878504611718806,
      "grad_norm": 0.5823758244514465,
      "learning_rate": 6.0639945177678364e-06,
      "loss": 0.0188,
      "step": 1214680
    },
    {
      "epoch": 1.987883191610534,
      "grad_norm": 0.3057352602481842,
      "learning_rate": 6.063928625554318e-06,
      "loss": 0.0169,
      "step": 1214700
    },
    {
      "epoch": 1.9879159220491873,
      "grad_norm": 0.591259777545929,
      "learning_rate": 6.063862733340802e-06,
      "loss": 0.0144,
      "step": 1214720
    },
    {
      "epoch": 1.9879486524878407,
      "grad_norm": 0.3228696882724762,
      "learning_rate": 6.063796841127284e-06,
      "loss": 0.0232,
      "step": 1214740
    },
    {
      "epoch": 1.987981382926494,
      "grad_norm": 0.8068032264709473,
      "learning_rate": 6.063730948913767e-06,
      "loss": 0.0173,
      "step": 1214760
    },
    {
      "epoch": 1.9880141133651472,
      "grad_norm": 1.3728324174880981,
      "learning_rate": 6.06366505670025e-06,
      "loss": 0.0137,
      "step": 1214780
    },
    {
      "epoch": 1.9880468438038008,
      "grad_norm": 0.20568472146987915,
      "learning_rate": 6.063599164486733e-06,
      "loss": 0.024,
      "step": 1214800
    },
    {
      "epoch": 1.988079574242454,
      "grad_norm": 0.40565672516822815,
      "learning_rate": 6.063533272273216e-06,
      "loss": 0.0169,
      "step": 1214820
    },
    {
      "epoch": 1.9881123046811073,
      "grad_norm": 0.22840949892997742,
      "learning_rate": 6.063467380059699e-06,
      "loss": 0.0101,
      "step": 1214840
    },
    {
      "epoch": 1.9881450351197607,
      "grad_norm": 0.1519235372543335,
      "learning_rate": 6.063401487846182e-06,
      "loss": 0.0146,
      "step": 1214860
    },
    {
      "epoch": 1.9881777655584139,
      "grad_norm": 0.4929291009902954,
      "learning_rate": 6.063335595632665e-06,
      "loss": 0.0149,
      "step": 1214880
    },
    {
      "epoch": 1.9882104959970675,
      "grad_norm": 0.4804888367652893,
      "learning_rate": 6.063269703419148e-06,
      "loss": 0.0105,
      "step": 1214900
    },
    {
      "epoch": 1.9882432264357206,
      "grad_norm": 0.20193535089492798,
      "learning_rate": 6.06320381120563e-06,
      "loss": 0.0203,
      "step": 1214920
    },
    {
      "epoch": 1.9882759568743742,
      "grad_norm": 0.496224045753479,
      "learning_rate": 6.063137918992114e-06,
      "loss": 0.0195,
      "step": 1214940
    },
    {
      "epoch": 1.9883086873130273,
      "grad_norm": 0.5283045172691345,
      "learning_rate": 6.063072026778596e-06,
      "loss": 0.0157,
      "step": 1214960
    },
    {
      "epoch": 1.9883414177516807,
      "grad_norm": 0.6018334031105042,
      "learning_rate": 6.063006134565079e-06,
      "loss": 0.0193,
      "step": 1214980
    },
    {
      "epoch": 1.988374148190334,
      "grad_norm": 1.025017261505127,
      "learning_rate": 6.062940242351562e-06,
      "loss": 0.0116,
      "step": 1215000
    },
    {
      "epoch": 1.9884068786289872,
      "grad_norm": 0.5867024064064026,
      "learning_rate": 6.062874350138045e-06,
      "loss": 0.0169,
      "step": 1215020
    },
    {
      "epoch": 1.9884396090676408,
      "grad_norm": 0.1250821352005005,
      "learning_rate": 6.0628084579245274e-06,
      "loss": 0.0129,
      "step": 1215040
    },
    {
      "epoch": 1.988472339506294,
      "grad_norm": 0.1443849802017212,
      "learning_rate": 6.062742565711011e-06,
      "loss": 0.0183,
      "step": 1215060
    },
    {
      "epoch": 1.9885050699449474,
      "grad_norm": 0.632340133190155,
      "learning_rate": 6.062676673497493e-06,
      "loss": 0.0155,
      "step": 1215080
    },
    {
      "epoch": 1.9885378003836007,
      "grad_norm": 0.7833899855613708,
      "learning_rate": 6.0626107812839765e-06,
      "loss": 0.0144,
      "step": 1215100
    },
    {
      "epoch": 1.988570530822254,
      "grad_norm": 0.40246886014938354,
      "learning_rate": 6.062544889070458e-06,
      "loss": 0.0161,
      "step": 1215120
    },
    {
      "epoch": 1.9886032612609075,
      "grad_norm": 0.5386398434638977,
      "learning_rate": 6.062478996856942e-06,
      "loss": 0.0204,
      "step": 1215140
    },
    {
      "epoch": 1.9886359916995606,
      "grad_norm": 0.5307239294052124,
      "learning_rate": 6.062413104643424e-06,
      "loss": 0.017,
      "step": 1215160
    },
    {
      "epoch": 1.9886687221382142,
      "grad_norm": 1.124887228012085,
      "learning_rate": 6.0623472124299075e-06,
      "loss": 0.0162,
      "step": 1215180
    },
    {
      "epoch": 1.9887014525768674,
      "grad_norm": 0.20930202305316925,
      "learning_rate": 6.062281320216391e-06,
      "loss": 0.0183,
      "step": 1215200
    },
    {
      "epoch": 1.9887341830155207,
      "grad_norm": 0.21569594740867615,
      "learning_rate": 6.062215428002873e-06,
      "loss": 0.0161,
      "step": 1215220
    },
    {
      "epoch": 1.9887669134541741,
      "grad_norm": 1.098976731300354,
      "learning_rate": 6.0621495357893565e-06,
      "loss": 0.0166,
      "step": 1215240
    },
    {
      "epoch": 1.9887996438928275,
      "grad_norm": 1.002909779548645,
      "learning_rate": 6.062083643575839e-06,
      "loss": 0.0127,
      "step": 1215260
    },
    {
      "epoch": 1.9888323743314809,
      "grad_norm": 0.20913325250148773,
      "learning_rate": 6.062017751362322e-06,
      "loss": 0.0152,
      "step": 1215280
    },
    {
      "epoch": 1.988865104770134,
      "grad_norm": 1.1478524208068848,
      "learning_rate": 6.061951859148805e-06,
      "loss": 0.0248,
      "step": 1215300
    },
    {
      "epoch": 1.9888978352087876,
      "grad_norm": 0.1568993479013443,
      "learning_rate": 6.061885966935288e-06,
      "loss": 0.0225,
      "step": 1215320
    },
    {
      "epoch": 1.9889305656474408,
      "grad_norm": 0.7052934169769287,
      "learning_rate": 6.06182007472177e-06,
      "loss": 0.0244,
      "step": 1215340
    },
    {
      "epoch": 1.9889632960860941,
      "grad_norm": 1.5272291898727417,
      "learning_rate": 6.061754182508254e-06,
      "loss": 0.0239,
      "step": 1215360
    },
    {
      "epoch": 1.9889960265247475,
      "grad_norm": 0.2280822992324829,
      "learning_rate": 6.061688290294736e-06,
      "loss": 0.0166,
      "step": 1215380
    },
    {
      "epoch": 1.9890287569634009,
      "grad_norm": 0.2715909779071808,
      "learning_rate": 6.061622398081219e-06,
      "loss": 0.0174,
      "step": 1215400
    },
    {
      "epoch": 1.9890614874020542,
      "grad_norm": 0.49381181597709656,
      "learning_rate": 6.061556505867702e-06,
      "loss": 0.0197,
      "step": 1215420
    },
    {
      "epoch": 1.9890942178407074,
      "grad_norm": 0.5035249590873718,
      "learning_rate": 6.061490613654185e-06,
      "loss": 0.0215,
      "step": 1215440
    },
    {
      "epoch": 1.989126948279361,
      "grad_norm": 0.28693893551826477,
      "learning_rate": 6.0614247214406675e-06,
      "loss": 0.0172,
      "step": 1215460
    },
    {
      "epoch": 1.9891596787180141,
      "grad_norm": 0.3354356586933136,
      "learning_rate": 6.061358829227151e-06,
      "loss": 0.0126,
      "step": 1215480
    },
    {
      "epoch": 1.9891924091566675,
      "grad_norm": 0.1455007791519165,
      "learning_rate": 6.061292937013633e-06,
      "loss": 0.0108,
      "step": 1215500
    },
    {
      "epoch": 1.9892251395953209,
      "grad_norm": 0.282722145318985,
      "learning_rate": 6.061227044800117e-06,
      "loss": 0.0198,
      "step": 1215520
    },
    {
      "epoch": 1.989257870033974,
      "grad_norm": 0.49884942173957825,
      "learning_rate": 6.0611611525866e-06,
      "loss": 0.0187,
      "step": 1215540
    },
    {
      "epoch": 1.9892906004726276,
      "grad_norm": 0.1522771716117859,
      "learning_rate": 6.061095260373082e-06,
      "loss": 0.0213,
      "step": 1215560
    },
    {
      "epoch": 1.9893233309112808,
      "grad_norm": 0.14404521882534027,
      "learning_rate": 6.061029368159566e-06,
      "loss": 0.0164,
      "step": 1215580
    },
    {
      "epoch": 1.9893560613499344,
      "grad_norm": 0.8713765144348145,
      "learning_rate": 6.0609634759460475e-06,
      "loss": 0.0193,
      "step": 1215600
    },
    {
      "epoch": 1.9893887917885875,
      "grad_norm": 0.5829263925552368,
      "learning_rate": 6.060897583732531e-06,
      "loss": 0.0115,
      "step": 1215620
    },
    {
      "epoch": 1.989421522227241,
      "grad_norm": 0.3722195029258728,
      "learning_rate": 6.060831691519014e-06,
      "loss": 0.0155,
      "step": 1215640
    },
    {
      "epoch": 1.9894542526658943,
      "grad_norm": 0.23453190922737122,
      "learning_rate": 6.060765799305497e-06,
      "loss": 0.0227,
      "step": 1215660
    },
    {
      "epoch": 1.9894869831045474,
      "grad_norm": 0.43822425603866577,
      "learning_rate": 6.060699907091979e-06,
      "loss": 0.0163,
      "step": 1215680
    },
    {
      "epoch": 1.989519713543201,
      "grad_norm": 0.3265737295150757,
      "learning_rate": 6.060634014878463e-06,
      "loss": 0.0244,
      "step": 1215700
    },
    {
      "epoch": 1.9895524439818542,
      "grad_norm": 0.32992953062057495,
      "learning_rate": 6.060568122664945e-06,
      "loss": 0.0168,
      "step": 1215720
    },
    {
      "epoch": 1.9895851744205075,
      "grad_norm": 1.2518166303634644,
      "learning_rate": 6.060502230451428e-06,
      "loss": 0.0194,
      "step": 1215740
    },
    {
      "epoch": 1.989617904859161,
      "grad_norm": 0.4180157482624054,
      "learning_rate": 6.06043633823791e-06,
      "loss": 0.0161,
      "step": 1215760
    },
    {
      "epoch": 1.9896506352978143,
      "grad_norm": 0.2057245373725891,
      "learning_rate": 6.060370446024394e-06,
      "loss": 0.0177,
      "step": 1215780
    },
    {
      "epoch": 1.9896833657364676,
      "grad_norm": 0.922878623008728,
      "learning_rate": 6.060304553810877e-06,
      "loss": 0.0123,
      "step": 1215800
    },
    {
      "epoch": 1.9897160961751208,
      "grad_norm": 0.13710126280784607,
      "learning_rate": 6.060238661597359e-06,
      "loss": 0.0156,
      "step": 1215820
    },
    {
      "epoch": 1.9897488266137744,
      "grad_norm": 0.23840785026550293,
      "learning_rate": 6.060172769383842e-06,
      "loss": 0.021,
      "step": 1215840
    },
    {
      "epoch": 1.9897815570524275,
      "grad_norm": 0.1644304394721985,
      "learning_rate": 6.060106877170326e-06,
      "loss": 0.0199,
      "step": 1215860
    },
    {
      "epoch": 1.989814287491081,
      "grad_norm": 0.559351921081543,
      "learning_rate": 6.0600409849568084e-06,
      "loss": 0.0146,
      "step": 1215880
    },
    {
      "epoch": 1.9898470179297343,
      "grad_norm": 0.2990571856498718,
      "learning_rate": 6.059975092743291e-06,
      "loss": 0.0194,
      "step": 1215900
    },
    {
      "epoch": 1.9898797483683877,
      "grad_norm": 0.22838778793811798,
      "learning_rate": 6.059909200529775e-06,
      "loss": 0.0148,
      "step": 1215920
    },
    {
      "epoch": 1.989912478807041,
      "grad_norm": 0.37803658843040466,
      "learning_rate": 6.059843308316257e-06,
      "loss": 0.0188,
      "step": 1215940
    },
    {
      "epoch": 1.9899452092456942,
      "grad_norm": 0.2949877679347992,
      "learning_rate": 6.05977741610274e-06,
      "loss": 0.0146,
      "step": 1215960
    },
    {
      "epoch": 1.9899779396843478,
      "grad_norm": 0.7311155200004578,
      "learning_rate": 6.059711523889222e-06,
      "loss": 0.0202,
      "step": 1215980
    },
    {
      "epoch": 1.990010670123001,
      "grad_norm": 0.8490710258483887,
      "learning_rate": 6.059645631675706e-06,
      "loss": 0.023,
      "step": 1216000
    },
    {
      "epoch": 1.9900434005616543,
      "grad_norm": 0.8620944619178772,
      "learning_rate": 6.059579739462188e-06,
      "loss": 0.0257,
      "step": 1216020
    },
    {
      "epoch": 1.9900761310003077,
      "grad_norm": 0.9118128418922424,
      "learning_rate": 6.059513847248671e-06,
      "loss": 0.0223,
      "step": 1216040
    },
    {
      "epoch": 1.990108861438961,
      "grad_norm": 0.39895936846733093,
      "learning_rate": 6.059447955035154e-06,
      "loss": 0.0237,
      "step": 1216060
    },
    {
      "epoch": 1.9901415918776144,
      "grad_norm": 0.06086093187332153,
      "learning_rate": 6.059382062821637e-06,
      "loss": 0.0135,
      "step": 1216080
    },
    {
      "epoch": 1.9901743223162676,
      "grad_norm": 0.2905970811843872,
      "learning_rate": 6.059316170608119e-06,
      "loss": 0.0255,
      "step": 1216100
    },
    {
      "epoch": 1.9902070527549212,
      "grad_norm": 0.33648017048835754,
      "learning_rate": 6.059250278394603e-06,
      "loss": 0.0188,
      "step": 1216120
    },
    {
      "epoch": 1.9902397831935743,
      "grad_norm": 0.8790313601493835,
      "learning_rate": 6.059184386181085e-06,
      "loss": 0.0176,
      "step": 1216140
    },
    {
      "epoch": 1.9902725136322277,
      "grad_norm": 0.4130072295665741,
      "learning_rate": 6.0591184939675685e-06,
      "loss": 0.0193,
      "step": 1216160
    },
    {
      "epoch": 1.990305244070881,
      "grad_norm": 0.27710577845573425,
      "learning_rate": 6.05905260175405e-06,
      "loss": 0.016,
      "step": 1216180
    },
    {
      "epoch": 1.9903379745095344,
      "grad_norm": 0.2289367914199829,
      "learning_rate": 6.058986709540534e-06,
      "loss": 0.0215,
      "step": 1216200
    },
    {
      "epoch": 1.9903707049481878,
      "grad_norm": 0.6246472001075745,
      "learning_rate": 6.058920817327017e-06,
      "loss": 0.0142,
      "step": 1216220
    },
    {
      "epoch": 1.990403435386841,
      "grad_norm": 0.2695809006690979,
      "learning_rate": 6.0588549251134994e-06,
      "loss": 0.0216,
      "step": 1216240
    },
    {
      "epoch": 1.9904361658254945,
      "grad_norm": 0.10887542366981506,
      "learning_rate": 6.058789032899983e-06,
      "loss": 0.0124,
      "step": 1216260
    },
    {
      "epoch": 1.9904688962641477,
      "grad_norm": 0.4155386984348297,
      "learning_rate": 6.058723140686466e-06,
      "loss": 0.0259,
      "step": 1216280
    },
    {
      "epoch": 1.990501626702801,
      "grad_norm": 0.929247260093689,
      "learning_rate": 6.0586572484729485e-06,
      "loss": 0.0209,
      "step": 1216300
    },
    {
      "epoch": 1.9905343571414544,
      "grad_norm": 0.5731057524681091,
      "learning_rate": 6.058591356259431e-06,
      "loss": 0.0184,
      "step": 1216320
    },
    {
      "epoch": 1.9905670875801076,
      "grad_norm": 0.10269375890493393,
      "learning_rate": 6.058525464045915e-06,
      "loss": 0.0162,
      "step": 1216340
    },
    {
      "epoch": 1.9905998180187612,
      "grad_norm": 0.5226993560791016,
      "learning_rate": 6.058459571832397e-06,
      "loss": 0.0167,
      "step": 1216360
    },
    {
      "epoch": 1.9906325484574143,
      "grad_norm": 1.0056816339492798,
      "learning_rate": 6.05839367961888e-06,
      "loss": 0.0217,
      "step": 1216380
    },
    {
      "epoch": 1.990665278896068,
      "grad_norm": 0.18882262706756592,
      "learning_rate": 6.058327787405362e-06,
      "loss": 0.0187,
      "step": 1216400
    },
    {
      "epoch": 1.990698009334721,
      "grad_norm": 0.18562625348567963,
      "learning_rate": 6.058261895191846e-06,
      "loss": 0.0149,
      "step": 1216420
    },
    {
      "epoch": 1.9907307397733744,
      "grad_norm": 0.38279664516448975,
      "learning_rate": 6.0581960029783285e-06,
      "loss": 0.0202,
      "step": 1216440
    },
    {
      "epoch": 1.9907634702120278,
      "grad_norm": 1.1142057180404663,
      "learning_rate": 6.058130110764811e-06,
      "loss": 0.0249,
      "step": 1216460
    },
    {
      "epoch": 1.990796200650681,
      "grad_norm": 0.12978588044643402,
      "learning_rate": 6.058064218551294e-06,
      "loss": 0.0134,
      "step": 1216480
    },
    {
      "epoch": 1.9908289310893346,
      "grad_norm": 0.32627812027931213,
      "learning_rate": 6.057998326337778e-06,
      "loss": 0.0152,
      "step": 1216500
    },
    {
      "epoch": 1.9908616615279877,
      "grad_norm": 0.2730811536312103,
      "learning_rate": 6.0579324341242595e-06,
      "loss": 0.0185,
      "step": 1216520
    },
    {
      "epoch": 1.990894391966641,
      "grad_norm": 0.14033390581607819,
      "learning_rate": 6.057866541910743e-06,
      "loss": 0.0182,
      "step": 1216540
    },
    {
      "epoch": 1.9909271224052945,
      "grad_norm": 0.360479474067688,
      "learning_rate": 6.057800649697225e-06,
      "loss": 0.0116,
      "step": 1216560
    },
    {
      "epoch": 1.9909598528439478,
      "grad_norm": 0.19051072001457214,
      "learning_rate": 6.0577347574837086e-06,
      "loss": 0.018,
      "step": 1216580
    },
    {
      "epoch": 1.9909925832826012,
      "grad_norm": 0.9794908761978149,
      "learning_rate": 6.057668865270192e-06,
      "loss": 0.0153,
      "step": 1216600
    },
    {
      "epoch": 1.9910253137212544,
      "grad_norm": 0.20583131909370422,
      "learning_rate": 6.057602973056674e-06,
      "loss": 0.02,
      "step": 1216620
    },
    {
      "epoch": 1.991058044159908,
      "grad_norm": 0.7554397583007812,
      "learning_rate": 6.057537080843158e-06,
      "loss": 0.011,
      "step": 1216640
    },
    {
      "epoch": 1.991090774598561,
      "grad_norm": 0.4071684777736664,
      "learning_rate": 6.05747118862964e-06,
      "loss": 0.0193,
      "step": 1216660
    },
    {
      "epoch": 1.9911235050372145,
      "grad_norm": 0.04818614199757576,
      "learning_rate": 6.057405296416123e-06,
      "loss": 0.0191,
      "step": 1216680
    },
    {
      "epoch": 1.9911562354758678,
      "grad_norm": 0.7769530415534973,
      "learning_rate": 6.057339404202606e-06,
      "loss": 0.0156,
      "step": 1216700
    },
    {
      "epoch": 1.9911889659145212,
      "grad_norm": 1.400247573852539,
      "learning_rate": 6.0572735119890894e-06,
      "loss": 0.0199,
      "step": 1216720
    },
    {
      "epoch": 1.9912216963531746,
      "grad_norm": 0.5429450869560242,
      "learning_rate": 6.057207619775571e-06,
      "loss": 0.0173,
      "step": 1216740
    },
    {
      "epoch": 1.9912544267918277,
      "grad_norm": 0.18752345442771912,
      "learning_rate": 6.057141727562055e-06,
      "loss": 0.0162,
      "step": 1216760
    },
    {
      "epoch": 1.9912871572304813,
      "grad_norm": 0.9349015951156616,
      "learning_rate": 6.057075835348537e-06,
      "loss": 0.0172,
      "step": 1216780
    },
    {
      "epoch": 1.9913198876691345,
      "grad_norm": 0.15228326618671417,
      "learning_rate": 6.05700994313502e-06,
      "loss": 0.0165,
      "step": 1216800
    },
    {
      "epoch": 1.9913526181077879,
      "grad_norm": 0.46415671706199646,
      "learning_rate": 6.056944050921503e-06,
      "loss": 0.0209,
      "step": 1216820
    },
    {
      "epoch": 1.9913853485464412,
      "grad_norm": 0.5855443477630615,
      "learning_rate": 6.056878158707986e-06,
      "loss": 0.0141,
      "step": 1216840
    },
    {
      "epoch": 1.9914180789850946,
      "grad_norm": 0.7044895887374878,
      "learning_rate": 6.056812266494469e-06,
      "loss": 0.0164,
      "step": 1216860
    },
    {
      "epoch": 1.991450809423748,
      "grad_norm": 0.743021547794342,
      "learning_rate": 6.056746374280952e-06,
      "loss": 0.0173,
      "step": 1216880
    },
    {
      "epoch": 1.9914835398624011,
      "grad_norm": 0.9675588607788086,
      "learning_rate": 6.056680482067434e-06,
      "loss": 0.0136,
      "step": 1216900
    },
    {
      "epoch": 1.9915162703010547,
      "grad_norm": 1.2780359983444214,
      "learning_rate": 6.056614589853918e-06,
      "loss": 0.0144,
      "step": 1216920
    },
    {
      "epoch": 1.9915490007397079,
      "grad_norm": 0.8557407855987549,
      "learning_rate": 6.056548697640401e-06,
      "loss": 0.0167,
      "step": 1216940
    },
    {
      "epoch": 1.9915817311783612,
      "grad_norm": 0.6253563165664673,
      "learning_rate": 6.056482805426883e-06,
      "loss": 0.0223,
      "step": 1216960
    },
    {
      "epoch": 1.9916144616170146,
      "grad_norm": 0.33322787284851074,
      "learning_rate": 6.056416913213367e-06,
      "loss": 0.0189,
      "step": 1216980
    },
    {
      "epoch": 1.991647192055668,
      "grad_norm": 0.15891912579536438,
      "learning_rate": 6.056351020999849e-06,
      "loss": 0.0139,
      "step": 1217000
    },
    {
      "epoch": 1.9916799224943214,
      "grad_norm": 0.4016041159629822,
      "learning_rate": 6.056285128786332e-06,
      "loss": 0.0183,
      "step": 1217020
    },
    {
      "epoch": 1.9917126529329745,
      "grad_norm": 0.3544335961341858,
      "learning_rate": 6.056219236572814e-06,
      "loss": 0.02,
      "step": 1217040
    },
    {
      "epoch": 1.991745383371628,
      "grad_norm": 0.7245391607284546,
      "learning_rate": 6.056153344359298e-06,
      "loss": 0.0212,
      "step": 1217060
    },
    {
      "epoch": 1.9917781138102812,
      "grad_norm": 0.19788885116577148,
      "learning_rate": 6.0560874521457804e-06,
      "loss": 0.0145,
      "step": 1217080
    },
    {
      "epoch": 1.9918108442489346,
      "grad_norm": 0.10714882612228394,
      "learning_rate": 6.056021559932263e-06,
      "loss": 0.0155,
      "step": 1217100
    },
    {
      "epoch": 1.991843574687588,
      "grad_norm": 0.5271053314208984,
      "learning_rate": 6.055955667718746e-06,
      "loss": 0.0155,
      "step": 1217120
    },
    {
      "epoch": 1.9918763051262411,
      "grad_norm": 0.3461697995662689,
      "learning_rate": 6.0558897755052295e-06,
      "loss": 0.0136,
      "step": 1217140
    },
    {
      "epoch": 1.9919090355648947,
      "grad_norm": 0.5544165372848511,
      "learning_rate": 6.055823883291711e-06,
      "loss": 0.0214,
      "step": 1217160
    },
    {
      "epoch": 1.9919417660035479,
      "grad_norm": 0.6665958166122437,
      "learning_rate": 6.055757991078195e-06,
      "loss": 0.0197,
      "step": 1217180
    },
    {
      "epoch": 1.9919744964422015,
      "grad_norm": 0.22960855066776276,
      "learning_rate": 6.055692098864677e-06,
      "loss": 0.013,
      "step": 1217200
    },
    {
      "epoch": 1.9920072268808546,
      "grad_norm": 0.5076107978820801,
      "learning_rate": 6.0556262066511605e-06,
      "loss": 0.0135,
      "step": 1217220
    },
    {
      "epoch": 1.992039957319508,
      "grad_norm": 0.24089300632476807,
      "learning_rate": 6.055560314437643e-06,
      "loss": 0.0166,
      "step": 1217240
    },
    {
      "epoch": 1.9920726877581614,
      "grad_norm": 0.6790108680725098,
      "learning_rate": 6.055494422224126e-06,
      "loss": 0.0123,
      "step": 1217260
    },
    {
      "epoch": 1.9921054181968145,
      "grad_norm": 0.14072415232658386,
      "learning_rate": 6.055428530010609e-06,
      "loss": 0.0115,
      "step": 1217280
    },
    {
      "epoch": 1.9921381486354681,
      "grad_norm": 0.22130900621414185,
      "learning_rate": 6.055362637797092e-06,
      "loss": 0.016,
      "step": 1217300
    },
    {
      "epoch": 1.9921708790741213,
      "grad_norm": 1.1469005346298218,
      "learning_rate": 6.055296745583575e-06,
      "loss": 0.0159,
      "step": 1217320
    },
    {
      "epoch": 1.9922036095127746,
      "grad_norm": 0.7357633113861084,
      "learning_rate": 6.055230853370058e-06,
      "loss": 0.024,
      "step": 1217340
    },
    {
      "epoch": 1.992236339951428,
      "grad_norm": 1.0440126657485962,
      "learning_rate": 6.055164961156541e-06,
      "loss": 0.028,
      "step": 1217360
    },
    {
      "epoch": 1.9922690703900814,
      "grad_norm": 1.0560519695281982,
      "learning_rate": 6.055099068943023e-06,
      "loss": 0.0168,
      "step": 1217380
    },
    {
      "epoch": 1.9923018008287348,
      "grad_norm": 0.16400106251239777,
      "learning_rate": 6.055033176729507e-06,
      "loss": 0.0134,
      "step": 1217400
    },
    {
      "epoch": 1.992334531267388,
      "grad_norm": 0.0654766857624054,
      "learning_rate": 6.054967284515989e-06,
      "loss": 0.0174,
      "step": 1217420
    },
    {
      "epoch": 1.9923672617060415,
      "grad_norm": 0.5814550518989563,
      "learning_rate": 6.054901392302472e-06,
      "loss": 0.0146,
      "step": 1217440
    },
    {
      "epoch": 1.9923999921446947,
      "grad_norm": 0.3230140805244446,
      "learning_rate": 6.054835500088955e-06,
      "loss": 0.0186,
      "step": 1217460
    },
    {
      "epoch": 1.992432722583348,
      "grad_norm": 0.6122151613235474,
      "learning_rate": 6.054769607875438e-06,
      "loss": 0.0182,
      "step": 1217480
    },
    {
      "epoch": 1.9924654530220014,
      "grad_norm": 0.6635233163833618,
      "learning_rate": 6.0547037156619205e-06,
      "loss": 0.0164,
      "step": 1217500
    },
    {
      "epoch": 1.9924981834606548,
      "grad_norm": 0.15140333771705627,
      "learning_rate": 6.054637823448404e-06,
      "loss": 0.0205,
      "step": 1217520
    },
    {
      "epoch": 1.9925309138993081,
      "grad_norm": 0.43154534697532654,
      "learning_rate": 6.054571931234886e-06,
      "loss": 0.023,
      "step": 1217540
    },
    {
      "epoch": 1.9925636443379613,
      "grad_norm": 0.24449484050273895,
      "learning_rate": 6.05450603902137e-06,
      "loss": 0.0193,
      "step": 1217560
    },
    {
      "epoch": 1.9925963747766149,
      "grad_norm": 0.40821316838264465,
      "learning_rate": 6.0544401468078515e-06,
      "loss": 0.0147,
      "step": 1217580
    },
    {
      "epoch": 1.992629105215268,
      "grad_norm": 0.3257198631763458,
      "learning_rate": 6.054374254594335e-06,
      "loss": 0.0149,
      "step": 1217600
    },
    {
      "epoch": 1.9926618356539214,
      "grad_norm": 0.3543647229671478,
      "learning_rate": 6.054308362380818e-06,
      "loss": 0.0182,
      "step": 1217620
    },
    {
      "epoch": 1.9926945660925748,
      "grad_norm": 1.007920503616333,
      "learning_rate": 6.0542424701673005e-06,
      "loss": 0.0206,
      "step": 1217640
    },
    {
      "epoch": 1.9927272965312282,
      "grad_norm": 0.9432977437973022,
      "learning_rate": 6.054176577953784e-06,
      "loss": 0.0197,
      "step": 1217660
    },
    {
      "epoch": 1.9927600269698815,
      "grad_norm": 0.9401953816413879,
      "learning_rate": 6.054110685740267e-06,
      "loss": 0.0201,
      "step": 1217680
    },
    {
      "epoch": 1.9927927574085347,
      "grad_norm": 0.4142706096172333,
      "learning_rate": 6.05404479352675e-06,
      "loss": 0.0176,
      "step": 1217700
    },
    {
      "epoch": 1.9928254878471883,
      "grad_norm": 0.9032691121101379,
      "learning_rate": 6.053978901313232e-06,
      "loss": 0.0299,
      "step": 1217720
    },
    {
      "epoch": 1.9928582182858414,
      "grad_norm": 0.14576414227485657,
      "learning_rate": 6.053913009099716e-06,
      "loss": 0.0123,
      "step": 1217740
    },
    {
      "epoch": 1.9928909487244948,
      "grad_norm": 0.23349925875663757,
      "learning_rate": 6.053847116886198e-06,
      "loss": 0.0213,
      "step": 1217760
    },
    {
      "epoch": 1.9929236791631482,
      "grad_norm": 0.18778501451015472,
      "learning_rate": 6.053781224672681e-06,
      "loss": 0.0121,
      "step": 1217780
    },
    {
      "epoch": 1.9929564096018015,
      "grad_norm": 0.509118378162384,
      "learning_rate": 6.053715332459163e-06,
      "loss": 0.0197,
      "step": 1217800
    },
    {
      "epoch": 1.992989140040455,
      "grad_norm": 0.18206122517585754,
      "learning_rate": 6.053649440245647e-06,
      "loss": 0.0237,
      "step": 1217820
    },
    {
      "epoch": 1.993021870479108,
      "grad_norm": 0.31695666909217834,
      "learning_rate": 6.05358354803213e-06,
      "loss": 0.0157,
      "step": 1217840
    },
    {
      "epoch": 1.9930546009177617,
      "grad_norm": 1.1148197650909424,
      "learning_rate": 6.053517655818612e-06,
      "loss": 0.0184,
      "step": 1217860
    },
    {
      "epoch": 1.9930873313564148,
      "grad_norm": 0.5024538040161133,
      "learning_rate": 6.053451763605095e-06,
      "loss": 0.0162,
      "step": 1217880
    },
    {
      "epoch": 1.9931200617950682,
      "grad_norm": 0.38114801049232483,
      "learning_rate": 6.053385871391579e-06,
      "loss": 0.0161,
      "step": 1217900
    },
    {
      "epoch": 1.9931527922337215,
      "grad_norm": 0.2510794699192047,
      "learning_rate": 6.053319979178061e-06,
      "loss": 0.0165,
      "step": 1217920
    },
    {
      "epoch": 1.9931855226723747,
      "grad_norm": 0.3085204064846039,
      "learning_rate": 6.053254086964544e-06,
      "loss": 0.0168,
      "step": 1217940
    },
    {
      "epoch": 1.9932182531110283,
      "grad_norm": 0.274110347032547,
      "learning_rate": 6.053188194751026e-06,
      "loss": 0.0118,
      "step": 1217960
    },
    {
      "epoch": 1.9932509835496814,
      "grad_norm": 0.3779723644256592,
      "learning_rate": 6.05312230253751e-06,
      "loss": 0.0267,
      "step": 1217980
    },
    {
      "epoch": 1.993283713988335,
      "grad_norm": 0.2845969796180725,
      "learning_rate": 6.053056410323993e-06,
      "loss": 0.02,
      "step": 1218000
    },
    {
      "epoch": 1.9933164444269882,
      "grad_norm": 0.5842177271842957,
      "learning_rate": 6.052990518110475e-06,
      "loss": 0.0196,
      "step": 1218020
    },
    {
      "epoch": 1.9933491748656416,
      "grad_norm": 0.43682730197906494,
      "learning_rate": 6.052924625896959e-06,
      "loss": 0.0128,
      "step": 1218040
    },
    {
      "epoch": 1.993381905304295,
      "grad_norm": 0.6543427109718323,
      "learning_rate": 6.052858733683441e-06,
      "loss": 0.0162,
      "step": 1218060
    },
    {
      "epoch": 1.993414635742948,
      "grad_norm": 0.15839847922325134,
      "learning_rate": 6.052792841469924e-06,
      "loss": 0.0201,
      "step": 1218080
    },
    {
      "epoch": 1.9934473661816017,
      "grad_norm": 1.1925890445709229,
      "learning_rate": 6.052726949256407e-06,
      "loss": 0.0159,
      "step": 1218100
    },
    {
      "epoch": 1.9934800966202548,
      "grad_norm": 0.34156620502471924,
      "learning_rate": 6.05266105704289e-06,
      "loss": 0.0229,
      "step": 1218120
    },
    {
      "epoch": 1.9935128270589082,
      "grad_norm": 0.14363127946853638,
      "learning_rate": 6.0525951648293724e-06,
      "loss": 0.0171,
      "step": 1218140
    },
    {
      "epoch": 1.9935455574975616,
      "grad_norm": 0.8292443156242371,
      "learning_rate": 6.052529272615856e-06,
      "loss": 0.028,
      "step": 1218160
    },
    {
      "epoch": 1.993578287936215,
      "grad_norm": 0.17011581361293793,
      "learning_rate": 6.052463380402338e-06,
      "loss": 0.0149,
      "step": 1218180
    },
    {
      "epoch": 1.9936110183748683,
      "grad_norm": 0.3053581118583679,
      "learning_rate": 6.0523974881888215e-06,
      "loss": 0.0193,
      "step": 1218200
    },
    {
      "epoch": 1.9936437488135215,
      "grad_norm": 0.227203831076622,
      "learning_rate": 6.052331595975303e-06,
      "loss": 0.0096,
      "step": 1218220
    },
    {
      "epoch": 1.993676479252175,
      "grad_norm": 0.46762409806251526,
      "learning_rate": 6.052265703761787e-06,
      "loss": 0.0145,
      "step": 1218240
    },
    {
      "epoch": 1.9937092096908282,
      "grad_norm": 0.3955402374267578,
      "learning_rate": 6.05219981154827e-06,
      "loss": 0.0148,
      "step": 1218260
    },
    {
      "epoch": 1.9937419401294816,
      "grad_norm": 0.2212449163198471,
      "learning_rate": 6.0521339193347524e-06,
      "loss": 0.018,
      "step": 1218280
    },
    {
      "epoch": 1.993774670568135,
      "grad_norm": 0.18780560791492462,
      "learning_rate": 6.052068027121235e-06,
      "loss": 0.019,
      "step": 1218300
    },
    {
      "epoch": 1.9938074010067883,
      "grad_norm": 0.2412256896495819,
      "learning_rate": 6.052002134907719e-06,
      "loss": 0.0241,
      "step": 1218320
    },
    {
      "epoch": 1.9938401314454417,
      "grad_norm": 2.195396900177002,
      "learning_rate": 6.051936242694201e-06,
      "loss": 0.0167,
      "step": 1218340
    },
    {
      "epoch": 1.9938728618840948,
      "grad_norm": 0.11257744580507278,
      "learning_rate": 6.051870350480684e-06,
      "loss": 0.0162,
      "step": 1218360
    },
    {
      "epoch": 1.9939055923227484,
      "grad_norm": 0.2103738635778427,
      "learning_rate": 6.051804458267168e-06,
      "loss": 0.0144,
      "step": 1218380
    },
    {
      "epoch": 1.9939383227614016,
      "grad_norm": 0.36589911580085754,
      "learning_rate": 6.05173856605365e-06,
      "loss": 0.0124,
      "step": 1218400
    },
    {
      "epoch": 1.993971053200055,
      "grad_norm": 0.9162418842315674,
      "learning_rate": 6.051672673840133e-06,
      "loss": 0.0186,
      "step": 1218420
    },
    {
      "epoch": 1.9940037836387083,
      "grad_norm": 0.5412121415138245,
      "learning_rate": 6.051606781626615e-06,
      "loss": 0.0184,
      "step": 1218440
    },
    {
      "epoch": 1.9940365140773617,
      "grad_norm": 0.20610368251800537,
      "learning_rate": 6.051540889413099e-06,
      "loss": 0.0183,
      "step": 1218460
    },
    {
      "epoch": 1.994069244516015,
      "grad_norm": 1.405515432357788,
      "learning_rate": 6.0514749971995815e-06,
      "loss": 0.0118,
      "step": 1218480
    },
    {
      "epoch": 1.9941019749546682,
      "grad_norm": 1.2677613496780396,
      "learning_rate": 6.051409104986064e-06,
      "loss": 0.0181,
      "step": 1218500
    },
    {
      "epoch": 1.9941347053933218,
      "grad_norm": 0.7661594152450562,
      "learning_rate": 6.051343212772547e-06,
      "loss": 0.0176,
      "step": 1218520
    },
    {
      "epoch": 1.994167435831975,
      "grad_norm": 0.46060439944267273,
      "learning_rate": 6.051277320559031e-06,
      "loss": 0.0132,
      "step": 1218540
    },
    {
      "epoch": 1.9942001662706283,
      "grad_norm": 0.5191060304641724,
      "learning_rate": 6.0512114283455125e-06,
      "loss": 0.0154,
      "step": 1218560
    },
    {
      "epoch": 1.9942328967092817,
      "grad_norm": 0.4143610894680023,
      "learning_rate": 6.051145536131996e-06,
      "loss": 0.0278,
      "step": 1218580
    },
    {
      "epoch": 1.9942656271479349,
      "grad_norm": 0.28468242287635803,
      "learning_rate": 6.051079643918478e-06,
      "loss": 0.0144,
      "step": 1218600
    },
    {
      "epoch": 1.9942983575865885,
      "grad_norm": 0.8152039051055908,
      "learning_rate": 6.0510137517049616e-06,
      "loss": 0.0253,
      "step": 1218620
    },
    {
      "epoch": 1.9943310880252416,
      "grad_norm": 0.8065894246101379,
      "learning_rate": 6.050947859491444e-06,
      "loss": 0.0257,
      "step": 1218640
    },
    {
      "epoch": 1.9943638184638952,
      "grad_norm": 0.6572986841201782,
      "learning_rate": 6.050881967277927e-06,
      "loss": 0.0162,
      "step": 1218660
    },
    {
      "epoch": 1.9943965489025484,
      "grad_norm": 0.1255277842283249,
      "learning_rate": 6.05081607506441e-06,
      "loss": 0.0223,
      "step": 1218680
    },
    {
      "epoch": 1.9944292793412017,
      "grad_norm": 3.5095221996307373,
      "learning_rate": 6.050750182850893e-06,
      "loss": 0.0347,
      "step": 1218700
    },
    {
      "epoch": 1.994462009779855,
      "grad_norm": 0.798675537109375,
      "learning_rate": 6.050684290637376e-06,
      "loss": 0.0147,
      "step": 1218720
    },
    {
      "epoch": 1.9944947402185083,
      "grad_norm": 0.4299972355365753,
      "learning_rate": 6.050618398423859e-06,
      "loss": 0.0179,
      "step": 1218740
    },
    {
      "epoch": 1.9945274706571618,
      "grad_norm": 0.7557530403137207,
      "learning_rate": 6.0505525062103424e-06,
      "loss": 0.0159,
      "step": 1218760
    },
    {
      "epoch": 1.994560201095815,
      "grad_norm": 0.3587953448295593,
      "learning_rate": 6.050486613996824e-06,
      "loss": 0.0114,
      "step": 1218780
    },
    {
      "epoch": 1.9945929315344684,
      "grad_norm": 0.06506697833538055,
      "learning_rate": 6.050420721783308e-06,
      "loss": 0.0257,
      "step": 1218800
    },
    {
      "epoch": 1.9946256619731217,
      "grad_norm": 0.16683682799339294,
      "learning_rate": 6.05035482956979e-06,
      "loss": 0.0115,
      "step": 1218820
    },
    {
      "epoch": 1.9946583924117751,
      "grad_norm": 0.44135287404060364,
      "learning_rate": 6.050288937356273e-06,
      "loss": 0.0238,
      "step": 1218840
    },
    {
      "epoch": 1.9946911228504285,
      "grad_norm": 0.48730403184890747,
      "learning_rate": 6.050223045142755e-06,
      "loss": 0.0163,
      "step": 1218860
    },
    {
      "epoch": 1.9947238532890816,
      "grad_norm": 0.43445897102355957,
      "learning_rate": 6.050157152929239e-06,
      "loss": 0.0218,
      "step": 1218880
    },
    {
      "epoch": 1.9947565837277352,
      "grad_norm": 0.4024829864501953,
      "learning_rate": 6.050091260715722e-06,
      "loss": 0.0154,
      "step": 1218900
    },
    {
      "epoch": 1.9947893141663884,
      "grad_norm": 0.7289885878562927,
      "learning_rate": 6.050025368502205e-06,
      "loss": 0.022,
      "step": 1218920
    },
    {
      "epoch": 1.9948220446050418,
      "grad_norm": 0.7489954829216003,
      "learning_rate": 6.049959476288687e-06,
      "loss": 0.0282,
      "step": 1218940
    },
    {
      "epoch": 1.9948547750436951,
      "grad_norm": 0.2959364950656891,
      "learning_rate": 6.049893584075171e-06,
      "loss": 0.0185,
      "step": 1218960
    },
    {
      "epoch": 1.9948875054823485,
      "grad_norm": 0.19074352085590363,
      "learning_rate": 6.0498276918616526e-06,
      "loss": 0.0093,
      "step": 1218980
    },
    {
      "epoch": 1.9949202359210019,
      "grad_norm": 0.8304041028022766,
      "learning_rate": 6.049761799648136e-06,
      "loss": 0.0196,
      "step": 1219000
    },
    {
      "epoch": 1.994952966359655,
      "grad_norm": 0.626952052116394,
      "learning_rate": 6.049695907434618e-06,
      "loss": 0.0163,
      "step": 1219020
    },
    {
      "epoch": 1.9949856967983086,
      "grad_norm": 0.7245553135871887,
      "learning_rate": 6.049630015221102e-06,
      "loss": 0.017,
      "step": 1219040
    },
    {
      "epoch": 1.9950184272369618,
      "grad_norm": 0.1526959389448166,
      "learning_rate": 6.049564123007585e-06,
      "loss": 0.0188,
      "step": 1219060
    },
    {
      "epoch": 1.9950511576756151,
      "grad_norm": 0.07267403602600098,
      "learning_rate": 6.049498230794067e-06,
      "loss": 0.0159,
      "step": 1219080
    },
    {
      "epoch": 1.9950838881142685,
      "grad_norm": 0.18501687049865723,
      "learning_rate": 6.049432338580551e-06,
      "loss": 0.0161,
      "step": 1219100
    },
    {
      "epoch": 1.9951166185529219,
      "grad_norm": 0.25125792622566223,
      "learning_rate": 6.0493664463670334e-06,
      "loss": 0.0164,
      "step": 1219120
    },
    {
      "epoch": 1.9951493489915753,
      "grad_norm": 0.3768564462661743,
      "learning_rate": 6.049300554153516e-06,
      "loss": 0.0137,
      "step": 1219140
    },
    {
      "epoch": 1.9951820794302284,
      "grad_norm": 0.36557793617248535,
      "learning_rate": 6.049234661939999e-06,
      "loss": 0.0144,
      "step": 1219160
    },
    {
      "epoch": 1.995214809868882,
      "grad_norm": 0.2040427029132843,
      "learning_rate": 6.0491687697264825e-06,
      "loss": 0.0258,
      "step": 1219180
    },
    {
      "epoch": 1.9952475403075352,
      "grad_norm": 0.3423623740673065,
      "learning_rate": 6.049102877512964e-06,
      "loss": 0.021,
      "step": 1219200
    },
    {
      "epoch": 1.9952802707461885,
      "grad_norm": 0.8251057267189026,
      "learning_rate": 6.049036985299448e-06,
      "loss": 0.0162,
      "step": 1219220
    },
    {
      "epoch": 1.995313001184842,
      "grad_norm": 1.5342203378677368,
      "learning_rate": 6.04897109308593e-06,
      "loss": 0.0141,
      "step": 1219240
    },
    {
      "epoch": 1.9953457316234953,
      "grad_norm": 0.09806805104017258,
      "learning_rate": 6.0489052008724135e-06,
      "loss": 0.0187,
      "step": 1219260
    },
    {
      "epoch": 1.9953784620621486,
      "grad_norm": 0.47144174575805664,
      "learning_rate": 6.048839308658896e-06,
      "loss": 0.0212,
      "step": 1219280
    },
    {
      "epoch": 1.9954111925008018,
      "grad_norm": 0.989436686038971,
      "learning_rate": 6.048773416445379e-06,
      "loss": 0.0164,
      "step": 1219300
    },
    {
      "epoch": 1.9954439229394554,
      "grad_norm": 0.47404131293296814,
      "learning_rate": 6.048707524231862e-06,
      "loss": 0.0218,
      "step": 1219320
    },
    {
      "epoch": 1.9954766533781085,
      "grad_norm": 1.0320684909820557,
      "learning_rate": 6.048641632018345e-06,
      "loss": 0.0171,
      "step": 1219340
    },
    {
      "epoch": 1.995509383816762,
      "grad_norm": 0.29350847005844116,
      "learning_rate": 6.048575739804827e-06,
      "loss": 0.0204,
      "step": 1219360
    },
    {
      "epoch": 1.9955421142554153,
      "grad_norm": 0.32513976097106934,
      "learning_rate": 6.048509847591311e-06,
      "loss": 0.0166,
      "step": 1219380
    },
    {
      "epoch": 1.9955748446940684,
      "grad_norm": 0.3499203622341156,
      "learning_rate": 6.048443955377794e-06,
      "loss": 0.0087,
      "step": 1219400
    },
    {
      "epoch": 1.995607575132722,
      "grad_norm": 0.16453827917575836,
      "learning_rate": 6.048378063164276e-06,
      "loss": 0.0122,
      "step": 1219420
    },
    {
      "epoch": 1.9956403055713752,
      "grad_norm": 0.8237523436546326,
      "learning_rate": 6.04831217095076e-06,
      "loss": 0.0194,
      "step": 1219440
    },
    {
      "epoch": 1.9956730360100288,
      "grad_norm": 0.43757709860801697,
      "learning_rate": 6.048246278737242e-06,
      "loss": 0.0154,
      "step": 1219460
    },
    {
      "epoch": 1.995705766448682,
      "grad_norm": 0.9383922219276428,
      "learning_rate": 6.048180386523725e-06,
      "loss": 0.0206,
      "step": 1219480
    },
    {
      "epoch": 1.9957384968873353,
      "grad_norm": 1.7432427406311035,
      "learning_rate": 6.048114494310208e-06,
      "loss": 0.0217,
      "step": 1219500
    },
    {
      "epoch": 1.9957712273259887,
      "grad_norm": 0.18429821729660034,
      "learning_rate": 6.048048602096691e-06,
      "loss": 0.0216,
      "step": 1219520
    },
    {
      "epoch": 1.9958039577646418,
      "grad_norm": 0.3458104729652405,
      "learning_rate": 6.0479827098831735e-06,
      "loss": 0.0146,
      "step": 1219540
    },
    {
      "epoch": 1.9958366882032954,
      "grad_norm": 0.17333653569221497,
      "learning_rate": 6.047916817669657e-06,
      "loss": 0.0214,
      "step": 1219560
    },
    {
      "epoch": 1.9958694186419486,
      "grad_norm": 0.5470845699310303,
      "learning_rate": 6.047850925456139e-06,
      "loss": 0.0149,
      "step": 1219580
    },
    {
      "epoch": 1.995902149080602,
      "grad_norm": 0.43035462498664856,
      "learning_rate": 6.047785033242623e-06,
      "loss": 0.014,
      "step": 1219600
    },
    {
      "epoch": 1.9959348795192553,
      "grad_norm": 0.23511618375778198,
      "learning_rate": 6.0477191410291045e-06,
      "loss": 0.0202,
      "step": 1219620
    },
    {
      "epoch": 1.9959676099579087,
      "grad_norm": 1.406805157661438,
      "learning_rate": 6.047653248815588e-06,
      "loss": 0.021,
      "step": 1219640
    },
    {
      "epoch": 1.996000340396562,
      "grad_norm": 0.37471991777420044,
      "learning_rate": 6.047587356602071e-06,
      "loss": 0.0108,
      "step": 1219660
    },
    {
      "epoch": 1.9960330708352152,
      "grad_norm": 0.3895833194255829,
      "learning_rate": 6.0475214643885535e-06,
      "loss": 0.0173,
      "step": 1219680
    },
    {
      "epoch": 1.9960658012738688,
      "grad_norm": 0.5864596962928772,
      "learning_rate": 6.047455572175036e-06,
      "loss": 0.0138,
      "step": 1219700
    },
    {
      "epoch": 1.996098531712522,
      "grad_norm": 0.8479230403900146,
      "learning_rate": 6.04738967996152e-06,
      "loss": 0.0114,
      "step": 1219720
    },
    {
      "epoch": 1.9961312621511753,
      "grad_norm": 0.42655205726623535,
      "learning_rate": 6.047323787748002e-06,
      "loss": 0.0158,
      "step": 1219740
    },
    {
      "epoch": 1.9961639925898287,
      "grad_norm": 0.421209454536438,
      "learning_rate": 6.047257895534485e-06,
      "loss": 0.0156,
      "step": 1219760
    },
    {
      "epoch": 1.996196723028482,
      "grad_norm": 0.5963245034217834,
      "learning_rate": 6.047192003320969e-06,
      "loss": 0.0136,
      "step": 1219780
    },
    {
      "epoch": 1.9962294534671354,
      "grad_norm": 0.3650436997413635,
      "learning_rate": 6.047126111107451e-06,
      "loss": 0.0143,
      "step": 1219800
    },
    {
      "epoch": 1.9962621839057886,
      "grad_norm": 0.41440844535827637,
      "learning_rate": 6.047060218893934e-06,
      "loss": 0.0236,
      "step": 1219820
    },
    {
      "epoch": 1.9962949143444422,
      "grad_norm": 0.3203829228878021,
      "learning_rate": 6.046994326680416e-06,
      "loss": 0.017,
      "step": 1219840
    },
    {
      "epoch": 1.9963276447830953,
      "grad_norm": 0.6470485925674438,
      "learning_rate": 6.0469284344669e-06,
      "loss": 0.0181,
      "step": 1219860
    },
    {
      "epoch": 1.9963603752217487,
      "grad_norm": 0.48219388723373413,
      "learning_rate": 6.046862542253382e-06,
      "loss": 0.0228,
      "step": 1219880
    },
    {
      "epoch": 1.996393105660402,
      "grad_norm": 0.09594869613647461,
      "learning_rate": 6.046796650039865e-06,
      "loss": 0.0107,
      "step": 1219900
    },
    {
      "epoch": 1.9964258360990554,
      "grad_norm": 0.8989588618278503,
      "learning_rate": 6.046730757826348e-06,
      "loss": 0.0155,
      "step": 1219920
    },
    {
      "epoch": 1.9964585665377088,
      "grad_norm": 0.315464586019516,
      "learning_rate": 6.046664865612831e-06,
      "loss": 0.0152,
      "step": 1219940
    },
    {
      "epoch": 1.996491296976362,
      "grad_norm": 0.3769330084323883,
      "learning_rate": 6.046598973399314e-06,
      "loss": 0.0202,
      "step": 1219960
    },
    {
      "epoch": 1.9965240274150156,
      "grad_norm": 0.7450719475746155,
      "learning_rate": 6.046533081185797e-06,
      "loss": 0.0194,
      "step": 1219980
    },
    {
      "epoch": 1.9965567578536687,
      "grad_norm": 0.6583200693130493,
      "learning_rate": 6.046467188972279e-06,
      "loss": 0.0173,
      "step": 1220000
    },
    {
      "epoch": 1.996589488292322,
      "grad_norm": 0.5308414101600647,
      "learning_rate": 6.046401296758763e-06,
      "loss": 0.0167,
      "step": 1220020
    },
    {
      "epoch": 1.9966222187309755,
      "grad_norm": 0.6769556403160095,
      "learning_rate": 6.0463354045452446e-06,
      "loss": 0.0198,
      "step": 1220040
    },
    {
      "epoch": 1.9966549491696288,
      "grad_norm": 0.6821449995040894,
      "learning_rate": 6.046269512331728e-06,
      "loss": 0.0137,
      "step": 1220060
    },
    {
      "epoch": 1.9966876796082822,
      "grad_norm": 1.2705875635147095,
      "learning_rate": 6.046203620118211e-06,
      "loss": 0.0141,
      "step": 1220080
    },
    {
      "epoch": 1.9967204100469353,
      "grad_norm": 0.3246798813343048,
      "learning_rate": 6.046137727904694e-06,
      "loss": 0.0147,
      "step": 1220100
    },
    {
      "epoch": 1.996753140485589,
      "grad_norm": 0.2780791223049164,
      "learning_rate": 6.046071835691177e-06,
      "loss": 0.0206,
      "step": 1220120
    },
    {
      "epoch": 1.996785870924242,
      "grad_norm": 0.08759503811597824,
      "learning_rate": 6.04600594347766e-06,
      "loss": 0.0236,
      "step": 1220140
    },
    {
      "epoch": 1.9968186013628955,
      "grad_norm": 0.44570404291152954,
      "learning_rate": 6.045940051264143e-06,
      "loss": 0.0137,
      "step": 1220160
    },
    {
      "epoch": 1.9968513318015488,
      "grad_norm": 0.46758556365966797,
      "learning_rate": 6.0458741590506254e-06,
      "loss": 0.0145,
      "step": 1220180
    },
    {
      "epoch": 1.996884062240202,
      "grad_norm": 0.30859482288360596,
      "learning_rate": 6.045808266837109e-06,
      "loss": 0.0197,
      "step": 1220200
    },
    {
      "epoch": 1.9969167926788556,
      "grad_norm": 0.157881498336792,
      "learning_rate": 6.045742374623591e-06,
      "loss": 0.0213,
      "step": 1220220
    },
    {
      "epoch": 1.9969495231175087,
      "grad_norm": 0.4179787039756775,
      "learning_rate": 6.0456764824100745e-06,
      "loss": 0.021,
      "step": 1220240
    },
    {
      "epoch": 1.9969822535561623,
      "grad_norm": 0.18045401573181152,
      "learning_rate": 6.045610590196556e-06,
      "loss": 0.0205,
      "step": 1220260
    },
    {
      "epoch": 1.9970149839948155,
      "grad_norm": 1.0805385112762451,
      "learning_rate": 6.04554469798304e-06,
      "loss": 0.0145,
      "step": 1220280
    },
    {
      "epoch": 1.9970477144334688,
      "grad_norm": 0.3883034884929657,
      "learning_rate": 6.045478805769523e-06,
      "loss": 0.0204,
      "step": 1220300
    },
    {
      "epoch": 1.9970804448721222,
      "grad_norm": 0.5568737387657166,
      "learning_rate": 6.0454129135560055e-06,
      "loss": 0.0131,
      "step": 1220320
    },
    {
      "epoch": 1.9971131753107754,
      "grad_norm": 1.055076003074646,
      "learning_rate": 6.045347021342488e-06,
      "loss": 0.0219,
      "step": 1220340
    },
    {
      "epoch": 1.997145905749429,
      "grad_norm": 3.0513837337493896,
      "learning_rate": 6.045281129128972e-06,
      "loss": 0.0215,
      "step": 1220360
    },
    {
      "epoch": 1.9971786361880821,
      "grad_norm": 0.44256922602653503,
      "learning_rate": 6.045215236915454e-06,
      "loss": 0.0251,
      "step": 1220380
    },
    {
      "epoch": 1.9972113666267355,
      "grad_norm": 0.3335118293762207,
      "learning_rate": 6.045149344701937e-06,
      "loss": 0.017,
      "step": 1220400
    },
    {
      "epoch": 1.9972440970653889,
      "grad_norm": 0.28831571340560913,
      "learning_rate": 6.045083452488419e-06,
      "loss": 0.0116,
      "step": 1220420
    },
    {
      "epoch": 1.9972768275040422,
      "grad_norm": 0.8536890149116516,
      "learning_rate": 6.045017560274903e-06,
      "loss": 0.0137,
      "step": 1220440
    },
    {
      "epoch": 1.9973095579426956,
      "grad_norm": 0.24396447837352753,
      "learning_rate": 6.044951668061386e-06,
      "loss": 0.0175,
      "step": 1220460
    },
    {
      "epoch": 1.9973422883813488,
      "grad_norm": 0.12217283248901367,
      "learning_rate": 6.044885775847868e-06,
      "loss": 0.0158,
      "step": 1220480
    },
    {
      "epoch": 1.9973750188200023,
      "grad_norm": 0.3728529214859009,
      "learning_rate": 6.044819883634352e-06,
      "loss": 0.0161,
      "step": 1220500
    },
    {
      "epoch": 1.9974077492586555,
      "grad_norm": 0.47954052686691284,
      "learning_rate": 6.0447539914208345e-06,
      "loss": 0.0136,
      "step": 1220520
    },
    {
      "epoch": 1.9974404796973089,
      "grad_norm": 0.15624240040779114,
      "learning_rate": 6.044688099207317e-06,
      "loss": 0.0152,
      "step": 1220540
    },
    {
      "epoch": 1.9974732101359622,
      "grad_norm": 1.0496826171875,
      "learning_rate": 6.0446222069938e-06,
      "loss": 0.0185,
      "step": 1220560
    },
    {
      "epoch": 1.9975059405746156,
      "grad_norm": 0.3196271061897278,
      "learning_rate": 6.044556314780284e-06,
      "loss": 0.0171,
      "step": 1220580
    },
    {
      "epoch": 1.997538671013269,
      "grad_norm": 0.38798004388809204,
      "learning_rate": 6.0444904225667655e-06,
      "loss": 0.0158,
      "step": 1220600
    },
    {
      "epoch": 1.9975714014519221,
      "grad_norm": 0.24772560596466064,
      "learning_rate": 6.044424530353249e-06,
      "loss": 0.0135,
      "step": 1220620
    },
    {
      "epoch": 1.9976041318905757,
      "grad_norm": 1.6349340677261353,
      "learning_rate": 6.044358638139731e-06,
      "loss": 0.0179,
      "step": 1220640
    },
    {
      "epoch": 1.9976368623292289,
      "grad_norm": 0.5980433225631714,
      "learning_rate": 6.0442927459262146e-06,
      "loss": 0.0149,
      "step": 1220660
    },
    {
      "epoch": 1.9976695927678823,
      "grad_norm": 0.524573564529419,
      "learning_rate": 6.044226853712697e-06,
      "loss": 0.0204,
      "step": 1220680
    },
    {
      "epoch": 1.9977023232065356,
      "grad_norm": 1.046020269393921,
      "learning_rate": 6.04416096149918e-06,
      "loss": 0.015,
      "step": 1220700
    },
    {
      "epoch": 1.997735053645189,
      "grad_norm": 0.1428350955247879,
      "learning_rate": 6.044095069285663e-06,
      "loss": 0.0137,
      "step": 1220720
    },
    {
      "epoch": 1.9977677840838424,
      "grad_norm": 0.36052897572517395,
      "learning_rate": 6.044029177072146e-06,
      "loss": 0.0163,
      "step": 1220740
    },
    {
      "epoch": 1.9978005145224955,
      "grad_norm": 0.38634559512138367,
      "learning_rate": 6.043963284858628e-06,
      "loss": 0.0206,
      "step": 1220760
    },
    {
      "epoch": 1.9978332449611491,
      "grad_norm": 0.12081744521856308,
      "learning_rate": 6.043897392645112e-06,
      "loss": 0.0225,
      "step": 1220780
    },
    {
      "epoch": 1.9978659753998023,
      "grad_norm": 0.548491358757019,
      "learning_rate": 6.043831500431594e-06,
      "loss": 0.0152,
      "step": 1220800
    },
    {
      "epoch": 1.9978987058384556,
      "grad_norm": 0.12061324715614319,
      "learning_rate": 6.043765608218077e-06,
      "loss": 0.0184,
      "step": 1220820
    },
    {
      "epoch": 1.997931436277109,
      "grad_norm": 0.3775814175605774,
      "learning_rate": 6.043699716004561e-06,
      "loss": 0.0142,
      "step": 1220840
    },
    {
      "epoch": 1.9979641667157624,
      "grad_norm": 0.7340020537376404,
      "learning_rate": 6.043633823791043e-06,
      "loss": 0.0123,
      "step": 1220860
    },
    {
      "epoch": 1.9979968971544158,
      "grad_norm": 0.9833443760871887,
      "learning_rate": 6.043567931577526e-06,
      "loss": 0.0188,
      "step": 1220880
    },
    {
      "epoch": 1.998029627593069,
      "grad_norm": 1.1292331218719482,
      "learning_rate": 6.043502039364008e-06,
      "loss": 0.0208,
      "step": 1220900
    },
    {
      "epoch": 1.9980623580317225,
      "grad_norm": 0.17249053716659546,
      "learning_rate": 6.043436147150492e-06,
      "loss": 0.021,
      "step": 1220920
    },
    {
      "epoch": 1.9980950884703756,
      "grad_norm": 0.5671520233154297,
      "learning_rate": 6.043370254936975e-06,
      "loss": 0.0231,
      "step": 1220940
    },
    {
      "epoch": 1.998127818909029,
      "grad_norm": 0.6536344885826111,
      "learning_rate": 6.043304362723457e-06,
      "loss": 0.0228,
      "step": 1220960
    },
    {
      "epoch": 1.9981605493476824,
      "grad_norm": 0.6076210141181946,
      "learning_rate": 6.04323847050994e-06,
      "loss": 0.0173,
      "step": 1220980
    },
    {
      "epoch": 1.9981932797863355,
      "grad_norm": 0.2436821609735489,
      "learning_rate": 6.043172578296424e-06,
      "loss": 0.0159,
      "step": 1221000
    },
    {
      "epoch": 1.9982260102249891,
      "grad_norm": 0.6133850812911987,
      "learning_rate": 6.043106686082906e-06,
      "loss": 0.0221,
      "step": 1221020
    },
    {
      "epoch": 1.9982587406636423,
      "grad_norm": 0.5318171977996826,
      "learning_rate": 6.043040793869389e-06,
      "loss": 0.0153,
      "step": 1221040
    },
    {
      "epoch": 1.9982914711022957,
      "grad_norm": 0.5255840420722961,
      "learning_rate": 6.042974901655871e-06,
      "loss": 0.0155,
      "step": 1221060
    },
    {
      "epoch": 1.998324201540949,
      "grad_norm": 0.48918232321739197,
      "learning_rate": 6.042909009442355e-06,
      "loss": 0.0179,
      "step": 1221080
    },
    {
      "epoch": 1.9983569319796024,
      "grad_norm": 0.43034788966178894,
      "learning_rate": 6.042843117228837e-06,
      "loss": 0.0139,
      "step": 1221100
    },
    {
      "epoch": 1.9983896624182558,
      "grad_norm": 0.32903510332107544,
      "learning_rate": 6.04277722501532e-06,
      "loss": 0.0178,
      "step": 1221120
    },
    {
      "epoch": 1.998422392856909,
      "grad_norm": 0.3855810761451721,
      "learning_rate": 6.042711332801803e-06,
      "loss": 0.0193,
      "step": 1221140
    },
    {
      "epoch": 1.9984551232955625,
      "grad_norm": 0.5105814337730408,
      "learning_rate": 6.0426454405882865e-06,
      "loss": 0.0166,
      "step": 1221160
    },
    {
      "epoch": 1.9984878537342157,
      "grad_norm": 0.4794941842556,
      "learning_rate": 6.042579548374769e-06,
      "loss": 0.0189,
      "step": 1221180
    },
    {
      "epoch": 1.998520584172869,
      "grad_norm": 0.5993584394454956,
      "learning_rate": 6.042513656161252e-06,
      "loss": 0.0132,
      "step": 1221200
    },
    {
      "epoch": 1.9985533146115224,
      "grad_norm": 0.14617234468460083,
      "learning_rate": 6.0424477639477355e-06,
      "loss": 0.0111,
      "step": 1221220
    },
    {
      "epoch": 1.9985860450501758,
      "grad_norm": 0.2990714907646179,
      "learning_rate": 6.042381871734217e-06,
      "loss": 0.0155,
      "step": 1221240
    },
    {
      "epoch": 1.9986187754888292,
      "grad_norm": 0.6807911992073059,
      "learning_rate": 6.042315979520701e-06,
      "loss": 0.0122,
      "step": 1221260
    },
    {
      "epoch": 1.9986515059274823,
      "grad_norm": 0.42949748039245605,
      "learning_rate": 6.042250087307183e-06,
      "loss": 0.0185,
      "step": 1221280
    },
    {
      "epoch": 1.998684236366136,
      "grad_norm": 0.19586612284183502,
      "learning_rate": 6.0421841950936665e-06,
      "loss": 0.0218,
      "step": 1221300
    },
    {
      "epoch": 1.998716966804789,
      "grad_norm": 0.5960497260093689,
      "learning_rate": 6.042118302880149e-06,
      "loss": 0.0128,
      "step": 1221320
    },
    {
      "epoch": 1.9987496972434424,
      "grad_norm": 0.2841164469718933,
      "learning_rate": 6.042052410666632e-06,
      "loss": 0.021,
      "step": 1221340
    },
    {
      "epoch": 1.9987824276820958,
      "grad_norm": 0.4725308418273926,
      "learning_rate": 6.041986518453115e-06,
      "loss": 0.0169,
      "step": 1221360
    },
    {
      "epoch": 1.9988151581207492,
      "grad_norm": 0.920054018497467,
      "learning_rate": 6.041920626239598e-06,
      "loss": 0.0103,
      "step": 1221380
    },
    {
      "epoch": 1.9988478885594025,
      "grad_norm": 1.2862567901611328,
      "learning_rate": 6.04185473402608e-06,
      "loss": 0.0261,
      "step": 1221400
    },
    {
      "epoch": 1.9988806189980557,
      "grad_norm": 0.6551181674003601,
      "learning_rate": 6.041788841812564e-06,
      "loss": 0.0139,
      "step": 1221420
    },
    {
      "epoch": 1.9989133494367093,
      "grad_norm": 0.22327886521816254,
      "learning_rate": 6.041722949599046e-06,
      "loss": 0.0161,
      "step": 1221440
    },
    {
      "epoch": 1.9989460798753624,
      "grad_norm": 0.16968096792697906,
      "learning_rate": 6.041657057385529e-06,
      "loss": 0.0155,
      "step": 1221460
    },
    {
      "epoch": 1.9989788103140158,
      "grad_norm": 0.48331668972969055,
      "learning_rate": 6.041591165172012e-06,
      "loss": 0.0203,
      "step": 1221480
    },
    {
      "epoch": 1.9990115407526692,
      "grad_norm": 1.8808242082595825,
      "learning_rate": 6.041525272958495e-06,
      "loss": 0.02,
      "step": 1221500
    },
    {
      "epoch": 1.9990442711913226,
      "grad_norm": 0.22385545074939728,
      "learning_rate": 6.041459380744978e-06,
      "loss": 0.0151,
      "step": 1221520
    },
    {
      "epoch": 1.999077001629976,
      "grad_norm": 0.2528085708618164,
      "learning_rate": 6.041393488531461e-06,
      "loss": 0.0148,
      "step": 1221540
    },
    {
      "epoch": 1.999109732068629,
      "grad_norm": 0.10612094402313232,
      "learning_rate": 6.041327596317944e-06,
      "loss": 0.0145,
      "step": 1221560
    },
    {
      "epoch": 1.9991424625072827,
      "grad_norm": 0.4818928837776184,
      "learning_rate": 6.0412617041044265e-06,
      "loss": 0.0178,
      "step": 1221580
    },
    {
      "epoch": 1.9991751929459358,
      "grad_norm": 0.2336539775133133,
      "learning_rate": 6.04119581189091e-06,
      "loss": 0.0159,
      "step": 1221600
    },
    {
      "epoch": 1.9992079233845892,
      "grad_norm": 0.787284791469574,
      "learning_rate": 6.041129919677392e-06,
      "loss": 0.019,
      "step": 1221620
    },
    {
      "epoch": 1.9992406538232426,
      "grad_norm": 0.6896514296531677,
      "learning_rate": 6.041064027463876e-06,
      "loss": 0.0182,
      "step": 1221640
    },
    {
      "epoch": 1.9992733842618957,
      "grad_norm": 0.7981201410293579,
      "learning_rate": 6.0409981352503575e-06,
      "loss": 0.02,
      "step": 1221660
    },
    {
      "epoch": 1.9993061147005493,
      "grad_norm": 0.7490298748016357,
      "learning_rate": 6.040932243036841e-06,
      "loss": 0.0202,
      "step": 1221680
    },
    {
      "epoch": 1.9993388451392025,
      "grad_norm": 0.7269903421401978,
      "learning_rate": 6.040866350823324e-06,
      "loss": 0.0159,
      "step": 1221700
    },
    {
      "epoch": 1.999371575577856,
      "grad_norm": 0.3490962088108063,
      "learning_rate": 6.0408004586098066e-06,
      "loss": 0.0231,
      "step": 1221720
    },
    {
      "epoch": 1.9994043060165092,
      "grad_norm": 1.8154840469360352,
      "learning_rate": 6.040734566396289e-06,
      "loss": 0.018,
      "step": 1221740
    },
    {
      "epoch": 1.9994370364551626,
      "grad_norm": 0.2334931343793869,
      "learning_rate": 6.040668674182773e-06,
      "loss": 0.0165,
      "step": 1221760
    },
    {
      "epoch": 1.999469766893816,
      "grad_norm": 0.5931709408760071,
      "learning_rate": 6.040602781969255e-06,
      "loss": 0.0172,
      "step": 1221780
    },
    {
      "epoch": 1.999502497332469,
      "grad_norm": 0.8916232585906982,
      "learning_rate": 6.040536889755738e-06,
      "loss": 0.0145,
      "step": 1221800
    },
    {
      "epoch": 1.9995352277711227,
      "grad_norm": 0.12938901782035828,
      "learning_rate": 6.04047099754222e-06,
      "loss": 0.0152,
      "step": 1221820
    },
    {
      "epoch": 1.9995679582097758,
      "grad_norm": 0.7847040295600891,
      "learning_rate": 6.040405105328704e-06,
      "loss": 0.0282,
      "step": 1221840
    },
    {
      "epoch": 1.9996006886484292,
      "grad_norm": 0.9343494176864624,
      "learning_rate": 6.040339213115186e-06,
      "loss": 0.0145,
      "step": 1221860
    },
    {
      "epoch": 1.9996334190870826,
      "grad_norm": 0.1733705699443817,
      "learning_rate": 6.040273320901669e-06,
      "loss": 0.0141,
      "step": 1221880
    },
    {
      "epoch": 1.999666149525736,
      "grad_norm": 0.4056204855442047,
      "learning_rate": 6.040207428688153e-06,
      "loss": 0.0146,
      "step": 1221900
    },
    {
      "epoch": 1.9996988799643893,
      "grad_norm": 0.0734487920999527,
      "learning_rate": 6.040141536474635e-06,
      "loss": 0.0142,
      "step": 1221920
    },
    {
      "epoch": 1.9997316104030425,
      "grad_norm": 1.0434656143188477,
      "learning_rate": 6.040075644261118e-06,
      "loss": 0.023,
      "step": 1221940
    },
    {
      "epoch": 1.999764340841696,
      "grad_norm": 0.4384162127971649,
      "learning_rate": 6.040009752047601e-06,
      "loss": 0.0178,
      "step": 1221960
    },
    {
      "epoch": 1.9997970712803492,
      "grad_norm": 2.092710256576538,
      "learning_rate": 6.039943859834084e-06,
      "loss": 0.017,
      "step": 1221980
    },
    {
      "epoch": 1.9998298017190026,
      "grad_norm": 1.05023992061615,
      "learning_rate": 6.039877967620567e-06,
      "loss": 0.0216,
      "step": 1222000
    },
    {
      "epoch": 1.999862532157656,
      "grad_norm": 2.070911407470703,
      "learning_rate": 6.03981207540705e-06,
      "loss": 0.0299,
      "step": 1222020
    },
    {
      "epoch": 1.9998952625963093,
      "grad_norm": 0.5774217247962952,
      "learning_rate": 6.039746183193532e-06,
      "loss": 0.0232,
      "step": 1222040
    },
    {
      "epoch": 1.9999279930349627,
      "grad_norm": 0.25706782937049866,
      "learning_rate": 6.039680290980016e-06,
      "loss": 0.0187,
      "step": 1222060
    },
    {
      "epoch": 1.9999607234736159,
      "grad_norm": 0.6966260671615601,
      "learning_rate": 6.0396143987664976e-06,
      "loss": 0.0212,
      "step": 1222080
    },
    {
      "epoch": 1.9999934539122695,
      "grad_norm": 0.6016024947166443,
      "learning_rate": 6.039548506552981e-06,
      "loss": 0.0157,
      "step": 1222100
    },
    {
      "epoch": 2.0000261843509226,
      "grad_norm": 0.46261513233184814,
      "learning_rate": 6.039482614339464e-06,
      "loss": 0.0142,
      "step": 1222120
    },
    {
      "epoch": 2.000058914789576,
      "grad_norm": 0.6420915126800537,
      "learning_rate": 6.039416722125947e-06,
      "loss": 0.0114,
      "step": 1222140
    },
    {
      "epoch": 2.0000916452282294,
      "grad_norm": 0.3742426931858063,
      "learning_rate": 6.039350829912429e-06,
      "loss": 0.0116,
      "step": 1222160
    },
    {
      "epoch": 2.0001243756668825,
      "grad_norm": 1.9032474756240845,
      "learning_rate": 6.039284937698913e-06,
      "loss": 0.0202,
      "step": 1222180
    },
    {
      "epoch": 2.000157106105536,
      "grad_norm": 0.3608556091785431,
      "learning_rate": 6.039219045485395e-06,
      "loss": 0.0148,
      "step": 1222200
    },
    {
      "epoch": 2.0001898365441892,
      "grad_norm": 0.33974727988243103,
      "learning_rate": 6.0391531532718784e-06,
      "loss": 0.0144,
      "step": 1222220
    },
    {
      "epoch": 2.000222566982843,
      "grad_norm": 0.30228391289711,
      "learning_rate": 6.039087261058362e-06,
      "loss": 0.0113,
      "step": 1222240
    },
    {
      "epoch": 2.000255297421496,
      "grad_norm": 0.7427421808242798,
      "learning_rate": 6.039021368844844e-06,
      "loss": 0.0164,
      "step": 1222260
    },
    {
      "epoch": 2.0002880278601496,
      "grad_norm": 0.42415767908096313,
      "learning_rate": 6.0389554766313275e-06,
      "loss": 0.0096,
      "step": 1222280
    },
    {
      "epoch": 2.0003207582988027,
      "grad_norm": 0.3209080994129181,
      "learning_rate": 6.038889584417809e-06,
      "loss": 0.015,
      "step": 1222300
    },
    {
      "epoch": 2.000353488737456,
      "grad_norm": 0.4825674891471863,
      "learning_rate": 6.038823692204293e-06,
      "loss": 0.017,
      "step": 1222320
    },
    {
      "epoch": 2.0003862191761095,
      "grad_norm": 0.10433461517095566,
      "learning_rate": 6.038757799990776e-06,
      "loss": 0.0143,
      "step": 1222340
    },
    {
      "epoch": 2.0004189496147626,
      "grad_norm": 0.5343055725097656,
      "learning_rate": 6.0386919077772585e-06,
      "loss": 0.0146,
      "step": 1222360
    },
    {
      "epoch": 2.0004516800534162,
      "grad_norm": 0.4428478479385376,
      "learning_rate": 6.038626015563741e-06,
      "loss": 0.012,
      "step": 1222380
    },
    {
      "epoch": 2.0004844104920694,
      "grad_norm": 0.42904338240623474,
      "learning_rate": 6.038560123350225e-06,
      "loss": 0.0125,
      "step": 1222400
    },
    {
      "epoch": 2.000517140930723,
      "grad_norm": 0.28810471296310425,
      "learning_rate": 6.038494231136707e-06,
      "loss": 0.0134,
      "step": 1222420
    },
    {
      "epoch": 2.000549871369376,
      "grad_norm": 0.19675394892692566,
      "learning_rate": 6.03842833892319e-06,
      "loss": 0.0146,
      "step": 1222440
    },
    {
      "epoch": 2.0005826018080293,
      "grad_norm": 0.363599568605423,
      "learning_rate": 6.038362446709672e-06,
      "loss": 0.0163,
      "step": 1222460
    },
    {
      "epoch": 2.000615332246683,
      "grad_norm": 0.1636093109846115,
      "learning_rate": 6.038296554496156e-06,
      "loss": 0.0115,
      "step": 1222480
    },
    {
      "epoch": 2.000648062685336,
      "grad_norm": 1.7464433908462524,
      "learning_rate": 6.0382306622826385e-06,
      "loss": 0.0212,
      "step": 1222500
    },
    {
      "epoch": 2.0006807931239896,
      "grad_norm": 0.3467298746109009,
      "learning_rate": 6.038164770069121e-06,
      "loss": 0.0139,
      "step": 1222520
    },
    {
      "epoch": 2.0007135235626428,
      "grad_norm": 0.6284914016723633,
      "learning_rate": 6.038098877855604e-06,
      "loss": 0.0111,
      "step": 1222540
    },
    {
      "epoch": 2.000746254001296,
      "grad_norm": 0.14663487672805786,
      "learning_rate": 6.0380329856420876e-06,
      "loss": 0.0162,
      "step": 1222560
    },
    {
      "epoch": 2.0007789844399495,
      "grad_norm": 0.1157480776309967,
      "learning_rate": 6.03796709342857e-06,
      "loss": 0.0138,
      "step": 1222580
    },
    {
      "epoch": 2.0008117148786027,
      "grad_norm": 0.4134036600589752,
      "learning_rate": 6.037901201215053e-06,
      "loss": 0.0229,
      "step": 1222600
    },
    {
      "epoch": 2.0008444453172562,
      "grad_norm": 0.4217250645160675,
      "learning_rate": 6.037835309001537e-06,
      "loss": 0.0134,
      "step": 1222620
    },
    {
      "epoch": 2.0008771757559094,
      "grad_norm": 0.7350265979766846,
      "learning_rate": 6.0377694167880185e-06,
      "loss": 0.0146,
      "step": 1222640
    },
    {
      "epoch": 2.000909906194563,
      "grad_norm": 0.44440847635269165,
      "learning_rate": 6.037703524574502e-06,
      "loss": 0.0204,
      "step": 1222660
    },
    {
      "epoch": 2.000942636633216,
      "grad_norm": 0.35073864459991455,
      "learning_rate": 6.037637632360984e-06,
      "loss": 0.0104,
      "step": 1222680
    },
    {
      "epoch": 2.0009753670718693,
      "grad_norm": 0.19003157317638397,
      "learning_rate": 6.037571740147468e-06,
      "loss": 0.0164,
      "step": 1222700
    },
    {
      "epoch": 2.001008097510523,
      "grad_norm": 0.7343645095825195,
      "learning_rate": 6.0375058479339495e-06,
      "loss": 0.0125,
      "step": 1222720
    },
    {
      "epoch": 2.001040827949176,
      "grad_norm": 0.08225496113300323,
      "learning_rate": 6.037439955720433e-06,
      "loss": 0.0193,
      "step": 1222740
    },
    {
      "epoch": 2.0010735583878296,
      "grad_norm": 0.269543319940567,
      "learning_rate": 6.037374063506916e-06,
      "loss": 0.0233,
      "step": 1222760
    },
    {
      "epoch": 2.001106288826483,
      "grad_norm": 0.5356580018997192,
      "learning_rate": 6.037308171293399e-06,
      "loss": 0.0121,
      "step": 1222780
    },
    {
      "epoch": 2.0011390192651364,
      "grad_norm": 0.5022740364074707,
      "learning_rate": 6.037242279079881e-06,
      "loss": 0.0114,
      "step": 1222800
    },
    {
      "epoch": 2.0011717497037895,
      "grad_norm": 0.07905887067317963,
      "learning_rate": 6.037176386866365e-06,
      "loss": 0.0239,
      "step": 1222820
    },
    {
      "epoch": 2.0012044801424427,
      "grad_norm": 0.6061772108078003,
      "learning_rate": 6.037110494652847e-06,
      "loss": 0.0196,
      "step": 1222840
    },
    {
      "epoch": 2.0012372105810963,
      "grad_norm": 0.12583769857883453,
      "learning_rate": 6.03704460243933e-06,
      "loss": 0.0095,
      "step": 1222860
    },
    {
      "epoch": 2.0012699410197494,
      "grad_norm": 0.8288244009017944,
      "learning_rate": 6.036978710225812e-06,
      "loss": 0.0125,
      "step": 1222880
    },
    {
      "epoch": 2.001302671458403,
      "grad_norm": 0.2520681917667389,
      "learning_rate": 6.036912818012296e-06,
      "loss": 0.015,
      "step": 1222900
    },
    {
      "epoch": 2.001335401897056,
      "grad_norm": 0.3760837912559509,
      "learning_rate": 6.036846925798779e-06,
      "loss": 0.0113,
      "step": 1222920
    },
    {
      "epoch": 2.0013681323357098,
      "grad_norm": 0.12990738451480865,
      "learning_rate": 6.036781033585261e-06,
      "loss": 0.0196,
      "step": 1222940
    },
    {
      "epoch": 2.001400862774363,
      "grad_norm": 0.38598737120628357,
      "learning_rate": 6.036715141371745e-06,
      "loss": 0.0202,
      "step": 1222960
    },
    {
      "epoch": 2.001433593213016,
      "grad_norm": 0.16683949530124664,
      "learning_rate": 6.036649249158228e-06,
      "loss": 0.0094,
      "step": 1222980
    },
    {
      "epoch": 2.0014663236516697,
      "grad_norm": 0.606605589389801,
      "learning_rate": 6.03658335694471e-06,
      "loss": 0.0205,
      "step": 1223000
    },
    {
      "epoch": 2.001499054090323,
      "grad_norm": 0.23206590116024017,
      "learning_rate": 6.036517464731193e-06,
      "loss": 0.0147,
      "step": 1223020
    },
    {
      "epoch": 2.0015317845289764,
      "grad_norm": 0.6856670379638672,
      "learning_rate": 6.036451572517677e-06,
      "loss": 0.0121,
      "step": 1223040
    },
    {
      "epoch": 2.0015645149676295,
      "grad_norm": 0.08734673261642456,
      "learning_rate": 6.036385680304159e-06,
      "loss": 0.019,
      "step": 1223060
    },
    {
      "epoch": 2.001597245406283,
      "grad_norm": 0.36591285467147827,
      "learning_rate": 6.036319788090642e-06,
      "loss": 0.0147,
      "step": 1223080
    },
    {
      "epoch": 2.0016299758449363,
      "grad_norm": 0.4703797996044159,
      "learning_rate": 6.036253895877124e-06,
      "loss": 0.0136,
      "step": 1223100
    },
    {
      "epoch": 2.0016627062835894,
      "grad_norm": 0.33879098296165466,
      "learning_rate": 6.036188003663608e-06,
      "loss": 0.0157,
      "step": 1223120
    },
    {
      "epoch": 2.001695436722243,
      "grad_norm": 0.7510295510292053,
      "learning_rate": 6.03612211145009e-06,
      "loss": 0.0154,
      "step": 1223140
    },
    {
      "epoch": 2.001728167160896,
      "grad_norm": 0.5948439836502075,
      "learning_rate": 6.036056219236573e-06,
      "loss": 0.0149,
      "step": 1223160
    },
    {
      "epoch": 2.00176089759955,
      "grad_norm": 0.211724653840065,
      "learning_rate": 6.035990327023056e-06,
      "loss": 0.0212,
      "step": 1223180
    },
    {
      "epoch": 2.001793628038203,
      "grad_norm": 0.3598863184452057,
      "learning_rate": 6.0359244348095395e-06,
      "loss": 0.0155,
      "step": 1223200
    },
    {
      "epoch": 2.0018263584768565,
      "grad_norm": 0.307302862405777,
      "learning_rate": 6.035858542596021e-06,
      "loss": 0.0166,
      "step": 1223220
    },
    {
      "epoch": 2.0018590889155097,
      "grad_norm": 0.14907874166965485,
      "learning_rate": 6.035792650382505e-06,
      "loss": 0.0223,
      "step": 1223240
    },
    {
      "epoch": 2.001891819354163,
      "grad_norm": 0.7788983583450317,
      "learning_rate": 6.035726758168987e-06,
      "loss": 0.0105,
      "step": 1223260
    },
    {
      "epoch": 2.0019245497928164,
      "grad_norm": 1.3116649389266968,
      "learning_rate": 6.03566086595547e-06,
      "loss": 0.0112,
      "step": 1223280
    },
    {
      "epoch": 2.0019572802314696,
      "grad_norm": 0.4640100300312042,
      "learning_rate": 6.035594973741954e-06,
      "loss": 0.0173,
      "step": 1223300
    },
    {
      "epoch": 2.001990010670123,
      "grad_norm": 0.3995456099510193,
      "learning_rate": 6.035529081528436e-06,
      "loss": 0.0152,
      "step": 1223320
    },
    {
      "epoch": 2.0020227411087763,
      "grad_norm": 0.5691125988960266,
      "learning_rate": 6.0354631893149195e-06,
      "loss": 0.0159,
      "step": 1223340
    },
    {
      "epoch": 2.0020554715474295,
      "grad_norm": 0.29915308952331543,
      "learning_rate": 6.035397297101402e-06,
      "loss": 0.016,
      "step": 1223360
    },
    {
      "epoch": 2.002088201986083,
      "grad_norm": 0.6727917194366455,
      "learning_rate": 6.035331404887885e-06,
      "loss": 0.0173,
      "step": 1223380
    },
    {
      "epoch": 2.002120932424736,
      "grad_norm": 0.4286048710346222,
      "learning_rate": 6.035265512674368e-06,
      "loss": 0.0206,
      "step": 1223400
    },
    {
      "epoch": 2.00215366286339,
      "grad_norm": 0.4537118375301361,
      "learning_rate": 6.035199620460851e-06,
      "loss": 0.0194,
      "step": 1223420
    },
    {
      "epoch": 2.002186393302043,
      "grad_norm": 0.6973970532417297,
      "learning_rate": 6.035133728247333e-06,
      "loss": 0.0093,
      "step": 1223440
    },
    {
      "epoch": 2.0022191237406965,
      "grad_norm": 0.2813688814640045,
      "learning_rate": 6.035067836033817e-06,
      "loss": 0.0147,
      "step": 1223460
    },
    {
      "epoch": 2.0022518541793497,
      "grad_norm": 0.09425246715545654,
      "learning_rate": 6.035001943820299e-06,
      "loss": 0.0146,
      "step": 1223480
    },
    {
      "epoch": 2.002284584618003,
      "grad_norm": 0.19873006641864777,
      "learning_rate": 6.034936051606782e-06,
      "loss": 0.0158,
      "step": 1223500
    },
    {
      "epoch": 2.0023173150566564,
      "grad_norm": 0.3937681317329407,
      "learning_rate": 6.034870159393265e-06,
      "loss": 0.0151,
      "step": 1223520
    },
    {
      "epoch": 2.0023500454953096,
      "grad_norm": 0.1774405837059021,
      "learning_rate": 6.034804267179748e-06,
      "loss": 0.0116,
      "step": 1223540
    },
    {
      "epoch": 2.002382775933963,
      "grad_norm": 1.123980164527893,
      "learning_rate": 6.0347383749662305e-06,
      "loss": 0.0155,
      "step": 1223560
    },
    {
      "epoch": 2.0024155063726163,
      "grad_norm": 0.1961057335138321,
      "learning_rate": 6.034672482752714e-06,
      "loss": 0.0124,
      "step": 1223580
    },
    {
      "epoch": 2.00244823681127,
      "grad_norm": 0.1501932591199875,
      "learning_rate": 6.034606590539196e-06,
      "loss": 0.0102,
      "step": 1223600
    },
    {
      "epoch": 2.002480967249923,
      "grad_norm": 0.2925497889518738,
      "learning_rate": 6.0345406983256795e-06,
      "loss": 0.0127,
      "step": 1223620
    },
    {
      "epoch": 2.0025136976885762,
      "grad_norm": 0.5770562887191772,
      "learning_rate": 6.034474806112163e-06,
      "loss": 0.0186,
      "step": 1223640
    },
    {
      "epoch": 2.00254642812723,
      "grad_norm": 0.3978562653064728,
      "learning_rate": 6.034408913898645e-06,
      "loss": 0.0179,
      "step": 1223660
    },
    {
      "epoch": 2.002579158565883,
      "grad_norm": 0.6234185099601746,
      "learning_rate": 6.034343021685129e-06,
      "loss": 0.0166,
      "step": 1223680
    },
    {
      "epoch": 2.0026118890045366,
      "grad_norm": 0.2592056393623352,
      "learning_rate": 6.0342771294716105e-06,
      "loss": 0.0164,
      "step": 1223700
    },
    {
      "epoch": 2.0026446194431897,
      "grad_norm": 0.2583523094654083,
      "learning_rate": 6.034211237258094e-06,
      "loss": 0.0094,
      "step": 1223720
    },
    {
      "epoch": 2.0026773498818433,
      "grad_norm": 0.3865657448768616,
      "learning_rate": 6.034145345044576e-06,
      "loss": 0.0183,
      "step": 1223740
    },
    {
      "epoch": 2.0027100803204965,
      "grad_norm": 0.3729238510131836,
      "learning_rate": 6.0340794528310596e-06,
      "loss": 0.016,
      "step": 1223760
    },
    {
      "epoch": 2.0027428107591496,
      "grad_norm": 0.18918947875499725,
      "learning_rate": 6.034013560617542e-06,
      "loss": 0.0213,
      "step": 1223780
    },
    {
      "epoch": 2.002775541197803,
      "grad_norm": 1.2851531505584717,
      "learning_rate": 6.033947668404025e-06,
      "loss": 0.0156,
      "step": 1223800
    },
    {
      "epoch": 2.0028082716364564,
      "grad_norm": 0.3999408781528473,
      "learning_rate": 6.033881776190508e-06,
      "loss": 0.0123,
      "step": 1223820
    },
    {
      "epoch": 2.00284100207511,
      "grad_norm": 0.27212920784950256,
      "learning_rate": 6.033815883976991e-06,
      "loss": 0.0131,
      "step": 1223840
    },
    {
      "epoch": 2.002873732513763,
      "grad_norm": 0.9892444610595703,
      "learning_rate": 6.033749991763473e-06,
      "loss": 0.0178,
      "step": 1223860
    },
    {
      "epoch": 2.0029064629524167,
      "grad_norm": 0.1927911788225174,
      "learning_rate": 6.033684099549957e-06,
      "loss": 0.0134,
      "step": 1223880
    },
    {
      "epoch": 2.00293919339107,
      "grad_norm": 0.15286123752593994,
      "learning_rate": 6.033618207336439e-06,
      "loss": 0.0151,
      "step": 1223900
    },
    {
      "epoch": 2.002971923829723,
      "grad_norm": 0.6917442679405212,
      "learning_rate": 6.033552315122922e-06,
      "loss": 0.0182,
      "step": 1223920
    },
    {
      "epoch": 2.0030046542683766,
      "grad_norm": 0.306000292301178,
      "learning_rate": 6.033486422909405e-06,
      "loss": 0.0143,
      "step": 1223940
    },
    {
      "epoch": 2.0030373847070297,
      "grad_norm": 0.23558183014392853,
      "learning_rate": 6.033420530695888e-06,
      "loss": 0.0125,
      "step": 1223960
    },
    {
      "epoch": 2.0030701151456833,
      "grad_norm": 0.18709984421730042,
      "learning_rate": 6.033354638482371e-06,
      "loss": 0.0188,
      "step": 1223980
    },
    {
      "epoch": 2.0031028455843365,
      "grad_norm": 1.1096508502960205,
      "learning_rate": 6.033288746268854e-06,
      "loss": 0.0149,
      "step": 1224000
    },
    {
      "epoch": 2.0031355760229896,
      "grad_norm": 0.3055257201194763,
      "learning_rate": 6.033222854055337e-06,
      "loss": 0.0103,
      "step": 1224020
    },
    {
      "epoch": 2.0031683064616432,
      "grad_norm": 0.3358205258846283,
      "learning_rate": 6.03315696184182e-06,
      "loss": 0.0162,
      "step": 1224040
    },
    {
      "epoch": 2.0032010369002964,
      "grad_norm": 0.3334810733795166,
      "learning_rate": 6.033091069628303e-06,
      "loss": 0.0154,
      "step": 1224060
    },
    {
      "epoch": 2.00323376733895,
      "grad_norm": 0.5398038029670715,
      "learning_rate": 6.033025177414785e-06,
      "loss": 0.0151,
      "step": 1224080
    },
    {
      "epoch": 2.003266497777603,
      "grad_norm": 0.37814876437187195,
      "learning_rate": 6.032959285201269e-06,
      "loss": 0.015,
      "step": 1224100
    },
    {
      "epoch": 2.0032992282162567,
      "grad_norm": 0.11899370700120926,
      "learning_rate": 6.0328933929877506e-06,
      "loss": 0.0151,
      "step": 1224120
    },
    {
      "epoch": 2.00333195865491,
      "grad_norm": 0.19737985730171204,
      "learning_rate": 6.032827500774234e-06,
      "loss": 0.0167,
      "step": 1224140
    },
    {
      "epoch": 2.003364689093563,
      "grad_norm": 0.9064337015151978,
      "learning_rate": 6.032761608560717e-06,
      "loss": 0.0119,
      "step": 1224160
    },
    {
      "epoch": 2.0033974195322166,
      "grad_norm": 0.36714398860931396,
      "learning_rate": 6.0326957163472e-06,
      "loss": 0.0154,
      "step": 1224180
    },
    {
      "epoch": 2.0034301499708698,
      "grad_norm": 0.22481222450733185,
      "learning_rate": 6.032629824133682e-06,
      "loss": 0.0131,
      "step": 1224200
    },
    {
      "epoch": 2.0034628804095234,
      "grad_norm": 0.12433046102523804,
      "learning_rate": 6.032563931920166e-06,
      "loss": 0.014,
      "step": 1224220
    },
    {
      "epoch": 2.0034956108481765,
      "grad_norm": 1.3446455001831055,
      "learning_rate": 6.032498039706648e-06,
      "loss": 0.0153,
      "step": 1224240
    },
    {
      "epoch": 2.00352834128683,
      "grad_norm": 0.3564808964729309,
      "learning_rate": 6.0324321474931314e-06,
      "loss": 0.0125,
      "step": 1224260
    },
    {
      "epoch": 2.0035610717254833,
      "grad_norm": 0.4257739782333374,
      "learning_rate": 6.032366255279613e-06,
      "loss": 0.0152,
      "step": 1224280
    },
    {
      "epoch": 2.0035938021641364,
      "grad_norm": 0.42427459359169006,
      "learning_rate": 6.032300363066097e-06,
      "loss": 0.0147,
      "step": 1224300
    },
    {
      "epoch": 2.00362653260279,
      "grad_norm": 0.42568260431289673,
      "learning_rate": 6.03223447085258e-06,
      "loss": 0.0171,
      "step": 1224320
    },
    {
      "epoch": 2.003659263041443,
      "grad_norm": 0.2820573151111603,
      "learning_rate": 6.032168578639062e-06,
      "loss": 0.0136,
      "step": 1224340
    },
    {
      "epoch": 2.0036919934800967,
      "grad_norm": 0.34438902139663696,
      "learning_rate": 6.032102686425546e-06,
      "loss": 0.0168,
      "step": 1224360
    },
    {
      "epoch": 2.00372472391875,
      "grad_norm": 1.1806752681732178,
      "learning_rate": 6.032036794212029e-06,
      "loss": 0.0185,
      "step": 1224380
    },
    {
      "epoch": 2.0037574543574035,
      "grad_norm": 1.3949447870254517,
      "learning_rate": 6.0319709019985115e-06,
      "loss": 0.0193,
      "step": 1224400
    },
    {
      "epoch": 2.0037901847960566,
      "grad_norm": 0.5282091498374939,
      "learning_rate": 6.031905009784994e-06,
      "loss": 0.0172,
      "step": 1224420
    },
    {
      "epoch": 2.00382291523471,
      "grad_norm": 0.29281482100486755,
      "learning_rate": 6.031839117571478e-06,
      "loss": 0.0119,
      "step": 1224440
    },
    {
      "epoch": 2.0038556456733634,
      "grad_norm": 0.5765005946159363,
      "learning_rate": 6.03177322535796e-06,
      "loss": 0.0191,
      "step": 1224460
    },
    {
      "epoch": 2.0038883761120165,
      "grad_norm": 0.4111303985118866,
      "learning_rate": 6.031707333144443e-06,
      "loss": 0.0126,
      "step": 1224480
    },
    {
      "epoch": 2.00392110655067,
      "grad_norm": 0.361747682094574,
      "learning_rate": 6.031641440930925e-06,
      "loss": 0.0148,
      "step": 1224500
    },
    {
      "epoch": 2.0039538369893233,
      "grad_norm": 0.6557356119155884,
      "learning_rate": 6.031575548717409e-06,
      "loss": 0.0172,
      "step": 1224520
    },
    {
      "epoch": 2.003986567427977,
      "grad_norm": 0.31935274600982666,
      "learning_rate": 6.0315096565038915e-06,
      "loss": 0.0124,
      "step": 1224540
    },
    {
      "epoch": 2.00401929786663,
      "grad_norm": 0.16592995822429657,
      "learning_rate": 6.031443764290374e-06,
      "loss": 0.0165,
      "step": 1224560
    },
    {
      "epoch": 2.004052028305283,
      "grad_norm": 0.13522844016551971,
      "learning_rate": 6.031377872076857e-06,
      "loss": 0.0163,
      "step": 1224580
    },
    {
      "epoch": 2.0040847587439368,
      "grad_norm": 0.34304678440093994,
      "learning_rate": 6.0313119798633406e-06,
      "loss": 0.0134,
      "step": 1224600
    },
    {
      "epoch": 2.00411748918259,
      "grad_norm": 0.5185806751251221,
      "learning_rate": 6.0312460876498225e-06,
      "loss": 0.0211,
      "step": 1224620
    },
    {
      "epoch": 2.0041502196212435,
      "grad_norm": 0.3593970537185669,
      "learning_rate": 6.031180195436306e-06,
      "loss": 0.0147,
      "step": 1224640
    },
    {
      "epoch": 2.0041829500598967,
      "grad_norm": 0.2895148694515228,
      "learning_rate": 6.031114303222788e-06,
      "loss": 0.0122,
      "step": 1224660
    },
    {
      "epoch": 2.0042156804985503,
      "grad_norm": 0.547143280506134,
      "learning_rate": 6.0310484110092715e-06,
      "loss": 0.0179,
      "step": 1224680
    },
    {
      "epoch": 2.0042484109372034,
      "grad_norm": 0.14402033388614655,
      "learning_rate": 6.030982518795755e-06,
      "loss": 0.0247,
      "step": 1224700
    },
    {
      "epoch": 2.0042811413758566,
      "grad_norm": 0.2409418225288391,
      "learning_rate": 6.030916626582237e-06,
      "loss": 0.016,
      "step": 1224720
    },
    {
      "epoch": 2.00431387181451,
      "grad_norm": 0.325206458568573,
      "learning_rate": 6.030850734368721e-06,
      "loss": 0.0129,
      "step": 1224740
    },
    {
      "epoch": 2.0043466022531633,
      "grad_norm": 0.0930337980389595,
      "learning_rate": 6.0307848421552025e-06,
      "loss": 0.0176,
      "step": 1224760
    },
    {
      "epoch": 2.004379332691817,
      "grad_norm": 0.7575500011444092,
      "learning_rate": 6.030718949941686e-06,
      "loss": 0.0126,
      "step": 1224780
    },
    {
      "epoch": 2.00441206313047,
      "grad_norm": 0.2888745367527008,
      "learning_rate": 6.030653057728169e-06,
      "loss": 0.0154,
      "step": 1224800
    },
    {
      "epoch": 2.004444793569123,
      "grad_norm": 0.2608526051044464,
      "learning_rate": 6.0305871655146515e-06,
      "loss": 0.0155,
      "step": 1224820
    },
    {
      "epoch": 2.004477524007777,
      "grad_norm": 0.128440260887146,
      "learning_rate": 6.030521273301134e-06,
      "loss": 0.0145,
      "step": 1224840
    },
    {
      "epoch": 2.00451025444643,
      "grad_norm": 0.7810267806053162,
      "learning_rate": 6.030455381087618e-06,
      "loss": 0.0139,
      "step": 1224860
    },
    {
      "epoch": 2.0045429848850835,
      "grad_norm": 0.6104456186294556,
      "learning_rate": 6.0303894888741e-06,
      "loss": 0.0157,
      "step": 1224880
    },
    {
      "epoch": 2.0045757153237367,
      "grad_norm": 0.3370600640773773,
      "learning_rate": 6.030323596660583e-06,
      "loss": 0.0169,
      "step": 1224900
    },
    {
      "epoch": 2.0046084457623903,
      "grad_norm": 0.7432395219802856,
      "learning_rate": 6.030257704447065e-06,
      "loss": 0.0113,
      "step": 1224920
    },
    {
      "epoch": 2.0046411762010434,
      "grad_norm": 1.4268128871917725,
      "learning_rate": 6.030191812233549e-06,
      "loss": 0.0114,
      "step": 1224940
    },
    {
      "epoch": 2.0046739066396966,
      "grad_norm": 1.6463608741760254,
      "learning_rate": 6.0301259200200316e-06,
      "loss": 0.0178,
      "step": 1224960
    },
    {
      "epoch": 2.00470663707835,
      "grad_norm": 0.3586215674877167,
      "learning_rate": 6.030060027806514e-06,
      "loss": 0.0135,
      "step": 1224980
    },
    {
      "epoch": 2.0047393675170033,
      "grad_norm": 0.9420548677444458,
      "learning_rate": 6.029994135592997e-06,
      "loss": 0.0128,
      "step": 1225000
    },
    {
      "epoch": 2.004772097955657,
      "grad_norm": 1.0036046504974365,
      "learning_rate": 6.029928243379481e-06,
      "loss": 0.0157,
      "step": 1225020
    },
    {
      "epoch": 2.00480482839431,
      "grad_norm": 0.34650856256484985,
      "learning_rate": 6.029862351165963e-06,
      "loss": 0.0177,
      "step": 1225040
    },
    {
      "epoch": 2.0048375588329637,
      "grad_norm": 0.4367949664592743,
      "learning_rate": 6.029796458952446e-06,
      "loss": 0.0155,
      "step": 1225060
    },
    {
      "epoch": 2.004870289271617,
      "grad_norm": 0.835675835609436,
      "learning_rate": 6.02973056673893e-06,
      "loss": 0.0185,
      "step": 1225080
    },
    {
      "epoch": 2.00490301971027,
      "grad_norm": 0.6347092390060425,
      "learning_rate": 6.029664674525412e-06,
      "loss": 0.0205,
      "step": 1225100
    },
    {
      "epoch": 2.0049357501489236,
      "grad_norm": 0.5197484493255615,
      "learning_rate": 6.029598782311895e-06,
      "loss": 0.0146,
      "step": 1225120
    },
    {
      "epoch": 2.0049684805875767,
      "grad_norm": 0.4958436191082001,
      "learning_rate": 6.029532890098377e-06,
      "loss": 0.0186,
      "step": 1225140
    },
    {
      "epoch": 2.0050012110262303,
      "grad_norm": 1.5000941753387451,
      "learning_rate": 6.029466997884861e-06,
      "loss": 0.0204,
      "step": 1225160
    },
    {
      "epoch": 2.0050339414648835,
      "grad_norm": 0.22164440155029297,
      "learning_rate": 6.029401105671343e-06,
      "loss": 0.017,
      "step": 1225180
    },
    {
      "epoch": 2.005066671903537,
      "grad_norm": 0.11170785129070282,
      "learning_rate": 6.029335213457826e-06,
      "loss": 0.0098,
      "step": 1225200
    },
    {
      "epoch": 2.00509940234219,
      "grad_norm": 0.42375028133392334,
      "learning_rate": 6.029269321244309e-06,
      "loss": 0.0094,
      "step": 1225220
    },
    {
      "epoch": 2.0051321327808433,
      "grad_norm": 0.320573091506958,
      "learning_rate": 6.0292034290307925e-06,
      "loss": 0.0125,
      "step": 1225240
    },
    {
      "epoch": 2.005164863219497,
      "grad_norm": 0.29028087854385376,
      "learning_rate": 6.029137536817274e-06,
      "loss": 0.0123,
      "step": 1225260
    },
    {
      "epoch": 2.00519759365815,
      "grad_norm": 0.7586820721626282,
      "learning_rate": 6.029071644603758e-06,
      "loss": 0.0141,
      "step": 1225280
    },
    {
      "epoch": 2.0052303240968037,
      "grad_norm": 0.4761984348297119,
      "learning_rate": 6.02900575239024e-06,
      "loss": 0.0134,
      "step": 1225300
    },
    {
      "epoch": 2.005263054535457,
      "grad_norm": 0.41342636942863464,
      "learning_rate": 6.0289398601767234e-06,
      "loss": 0.0111,
      "step": 1225320
    },
    {
      "epoch": 2.0052957849741104,
      "grad_norm": 0.4549380838871002,
      "learning_rate": 6.028873967963206e-06,
      "loss": 0.0182,
      "step": 1225340
    },
    {
      "epoch": 2.0053285154127636,
      "grad_norm": 0.31658369302749634,
      "learning_rate": 6.028808075749689e-06,
      "loss": 0.0133,
      "step": 1225360
    },
    {
      "epoch": 2.0053612458514167,
      "grad_norm": 0.6386006474494934,
      "learning_rate": 6.0287421835361725e-06,
      "loss": 0.0124,
      "step": 1225380
    },
    {
      "epoch": 2.0053939762900703,
      "grad_norm": 0.6681435108184814,
      "learning_rate": 6.028676291322655e-06,
      "loss": 0.0124,
      "step": 1225400
    },
    {
      "epoch": 2.0054267067287235,
      "grad_norm": 0.22234134376049042,
      "learning_rate": 6.028610399109138e-06,
      "loss": 0.0135,
      "step": 1225420
    },
    {
      "epoch": 2.005459437167377,
      "grad_norm": 0.897249162197113,
      "learning_rate": 6.028544506895621e-06,
      "loss": 0.0135,
      "step": 1225440
    },
    {
      "epoch": 2.00549216760603,
      "grad_norm": 0.30176156759262085,
      "learning_rate": 6.028478614682104e-06,
      "loss": 0.0175,
      "step": 1225460
    },
    {
      "epoch": 2.005524898044684,
      "grad_norm": 0.33785995841026306,
      "learning_rate": 6.028412722468586e-06,
      "loss": 0.0208,
      "step": 1225480
    },
    {
      "epoch": 2.005557628483337,
      "grad_norm": 0.4990294873714447,
      "learning_rate": 6.02834683025507e-06,
      "loss": 0.0217,
      "step": 1225500
    },
    {
      "epoch": 2.00559035892199,
      "grad_norm": 0.04889480024576187,
      "learning_rate": 6.028280938041552e-06,
      "loss": 0.015,
      "step": 1225520
    },
    {
      "epoch": 2.0056230893606437,
      "grad_norm": 0.8908076882362366,
      "learning_rate": 6.028215045828035e-06,
      "loss": 0.017,
      "step": 1225540
    },
    {
      "epoch": 2.005655819799297,
      "grad_norm": 0.4500870704650879,
      "learning_rate": 6.028149153614518e-06,
      "loss": 0.017,
      "step": 1225560
    },
    {
      "epoch": 2.0056885502379505,
      "grad_norm": 0.7261946201324463,
      "learning_rate": 6.028083261401001e-06,
      "loss": 0.0179,
      "step": 1225580
    },
    {
      "epoch": 2.0057212806766036,
      "grad_norm": 0.38045427203178406,
      "learning_rate": 6.0280173691874835e-06,
      "loss": 0.0143,
      "step": 1225600
    },
    {
      "epoch": 2.0057540111152568,
      "grad_norm": 0.5050378441810608,
      "learning_rate": 6.027951476973967e-06,
      "loss": 0.0155,
      "step": 1225620
    },
    {
      "epoch": 2.0057867415539103,
      "grad_norm": 0.1585230827331543,
      "learning_rate": 6.027885584760449e-06,
      "loss": 0.013,
      "step": 1225640
    },
    {
      "epoch": 2.0058194719925635,
      "grad_norm": 1.4250991344451904,
      "learning_rate": 6.0278196925469325e-06,
      "loss": 0.0235,
      "step": 1225660
    },
    {
      "epoch": 2.005852202431217,
      "grad_norm": 1.2090065479278564,
      "learning_rate": 6.0277538003334144e-06,
      "loss": 0.018,
      "step": 1225680
    },
    {
      "epoch": 2.0058849328698702,
      "grad_norm": 0.9258989095687866,
      "learning_rate": 6.027687908119898e-06,
      "loss": 0.018,
      "step": 1225700
    },
    {
      "epoch": 2.005917663308524,
      "grad_norm": 0.640485405921936,
      "learning_rate": 6.02762201590638e-06,
      "loss": 0.017,
      "step": 1225720
    },
    {
      "epoch": 2.005950393747177,
      "grad_norm": 0.4550510048866272,
      "learning_rate": 6.0275561236928635e-06,
      "loss": 0.0158,
      "step": 1225740
    },
    {
      "epoch": 2.00598312418583,
      "grad_norm": 0.20348413288593292,
      "learning_rate": 6.027490231479347e-06,
      "loss": 0.0159,
      "step": 1225760
    },
    {
      "epoch": 2.0060158546244837,
      "grad_norm": 0.29171910881996155,
      "learning_rate": 6.027424339265829e-06,
      "loss": 0.0157,
      "step": 1225780
    },
    {
      "epoch": 2.006048585063137,
      "grad_norm": 0.1345764547586441,
      "learning_rate": 6.0273584470523126e-06,
      "loss": 0.0111,
      "step": 1225800
    },
    {
      "epoch": 2.0060813155017905,
      "grad_norm": 0.5123981237411499,
      "learning_rate": 6.027292554838795e-06,
      "loss": 0.0151,
      "step": 1225820
    },
    {
      "epoch": 2.0061140459404436,
      "grad_norm": 0.1631348878145218,
      "learning_rate": 6.027226662625278e-06,
      "loss": 0.0147,
      "step": 1225840
    },
    {
      "epoch": 2.006146776379097,
      "grad_norm": 0.44906964898109436,
      "learning_rate": 6.027160770411761e-06,
      "loss": 0.0144,
      "step": 1225860
    },
    {
      "epoch": 2.0061795068177504,
      "grad_norm": 0.3565795123577118,
      "learning_rate": 6.027094878198244e-06,
      "loss": 0.0131,
      "step": 1225880
    },
    {
      "epoch": 2.0062122372564035,
      "grad_norm": 0.42132100462913513,
      "learning_rate": 6.027028985984726e-06,
      "loss": 0.0166,
      "step": 1225900
    },
    {
      "epoch": 2.006244967695057,
      "grad_norm": 4.843565940856934,
      "learning_rate": 6.02696309377121e-06,
      "loss": 0.0145,
      "step": 1225920
    },
    {
      "epoch": 2.0062776981337103,
      "grad_norm": 0.8066726326942444,
      "learning_rate": 6.026897201557692e-06,
      "loss": 0.016,
      "step": 1225940
    },
    {
      "epoch": 2.006310428572364,
      "grad_norm": 0.1736837476491928,
      "learning_rate": 6.026831309344175e-06,
      "loss": 0.0203,
      "step": 1225960
    },
    {
      "epoch": 2.006343159011017,
      "grad_norm": 0.08927775174379349,
      "learning_rate": 6.026765417130658e-06,
      "loss": 0.0191,
      "step": 1225980
    },
    {
      "epoch": 2.0063758894496706,
      "grad_norm": 0.12042590975761414,
      "learning_rate": 6.026699524917141e-06,
      "loss": 0.0148,
      "step": 1226000
    },
    {
      "epoch": 2.0064086198883238,
      "grad_norm": 0.7868331670761108,
      "learning_rate": 6.0266336327036236e-06,
      "loss": 0.0202,
      "step": 1226020
    },
    {
      "epoch": 2.006441350326977,
      "grad_norm": 0.11015242338180542,
      "learning_rate": 6.026567740490107e-06,
      "loss": 0.0116,
      "step": 1226040
    },
    {
      "epoch": 2.0064740807656305,
      "grad_norm": 0.28327158093452454,
      "learning_rate": 6.026501848276589e-06,
      "loss": 0.0212,
      "step": 1226060
    },
    {
      "epoch": 2.0065068112042836,
      "grad_norm": 0.4505389928817749,
      "learning_rate": 6.026435956063073e-06,
      "loss": 0.0178,
      "step": 1226080
    },
    {
      "epoch": 2.0065395416429372,
      "grad_norm": 0.29324960708618164,
      "learning_rate": 6.026370063849556e-06,
      "loss": 0.0145,
      "step": 1226100
    },
    {
      "epoch": 2.0065722720815904,
      "grad_norm": 0.8851131796836853,
      "learning_rate": 6.026304171636038e-06,
      "loss": 0.0182,
      "step": 1226120
    },
    {
      "epoch": 2.006605002520244,
      "grad_norm": 0.10438241809606552,
      "learning_rate": 6.026238279422522e-06,
      "loss": 0.0133,
      "step": 1226140
    },
    {
      "epoch": 2.006637732958897,
      "grad_norm": 0.7228714227676392,
      "learning_rate": 6.026172387209004e-06,
      "loss": 0.0138,
      "step": 1226160
    },
    {
      "epoch": 2.0066704633975503,
      "grad_norm": 0.5343668460845947,
      "learning_rate": 6.026106494995487e-06,
      "loss": 0.015,
      "step": 1226180
    },
    {
      "epoch": 2.006703193836204,
      "grad_norm": 1.6776576042175293,
      "learning_rate": 6.02604060278197e-06,
      "loss": 0.0141,
      "step": 1226200
    },
    {
      "epoch": 2.006735924274857,
      "grad_norm": 0.8957772850990295,
      "learning_rate": 6.025974710568453e-06,
      "loss": 0.0184,
      "step": 1226220
    },
    {
      "epoch": 2.0067686547135106,
      "grad_norm": 0.35636335611343384,
      "learning_rate": 6.025908818354935e-06,
      "loss": 0.0158,
      "step": 1226240
    },
    {
      "epoch": 2.0068013851521638,
      "grad_norm": 0.33725327253341675,
      "learning_rate": 6.025842926141419e-06,
      "loss": 0.0173,
      "step": 1226260
    },
    {
      "epoch": 2.0068341155908174,
      "grad_norm": 0.40521183609962463,
      "learning_rate": 6.025777033927901e-06,
      "loss": 0.0133,
      "step": 1226280
    },
    {
      "epoch": 2.0068668460294705,
      "grad_norm": 0.25685635209083557,
      "learning_rate": 6.0257111417143845e-06,
      "loss": 0.0201,
      "step": 1226300
    },
    {
      "epoch": 2.0068995764681237,
      "grad_norm": 0.10107776522636414,
      "learning_rate": 6.025645249500866e-06,
      "loss": 0.0183,
      "step": 1226320
    },
    {
      "epoch": 2.0069323069067773,
      "grad_norm": 0.22162564098834991,
      "learning_rate": 6.02557935728735e-06,
      "loss": 0.0196,
      "step": 1226340
    },
    {
      "epoch": 2.0069650373454304,
      "grad_norm": 0.7190473079681396,
      "learning_rate": 6.025513465073833e-06,
      "loss": 0.0136,
      "step": 1226360
    },
    {
      "epoch": 2.006997767784084,
      "grad_norm": 0.48370516300201416,
      "learning_rate": 6.025447572860315e-06,
      "loss": 0.0158,
      "step": 1226380
    },
    {
      "epoch": 2.007030498222737,
      "grad_norm": 1.708345651626587,
      "learning_rate": 6.025381680646798e-06,
      "loss": 0.0176,
      "step": 1226400
    },
    {
      "epoch": 2.0070632286613903,
      "grad_norm": 0.7422227263450623,
      "learning_rate": 6.025315788433282e-06,
      "loss": 0.0139,
      "step": 1226420
    },
    {
      "epoch": 2.007095959100044,
      "grad_norm": 0.22206421196460724,
      "learning_rate": 6.0252498962197645e-06,
      "loss": 0.0182,
      "step": 1226440
    },
    {
      "epoch": 2.007128689538697,
      "grad_norm": 0.3149913549423218,
      "learning_rate": 6.025184004006247e-06,
      "loss": 0.0189,
      "step": 1226460
    },
    {
      "epoch": 2.0071614199773506,
      "grad_norm": 0.5215367078781128,
      "learning_rate": 6.025118111792731e-06,
      "loss": 0.0159,
      "step": 1226480
    },
    {
      "epoch": 2.007194150416004,
      "grad_norm": 0.3534851372241974,
      "learning_rate": 6.025052219579213e-06,
      "loss": 0.0098,
      "step": 1226500
    },
    {
      "epoch": 2.0072268808546574,
      "grad_norm": 0.4388202130794525,
      "learning_rate": 6.024986327365696e-06,
      "loss": 0.0154,
      "step": 1226520
    },
    {
      "epoch": 2.0072596112933105,
      "grad_norm": 0.27293646335601807,
      "learning_rate": 6.024920435152178e-06,
      "loss": 0.0167,
      "step": 1226540
    },
    {
      "epoch": 2.0072923417319637,
      "grad_norm": 0.11456544697284698,
      "learning_rate": 6.024854542938662e-06,
      "loss": 0.0134,
      "step": 1226560
    },
    {
      "epoch": 2.0073250721706173,
      "grad_norm": 0.34513676166534424,
      "learning_rate": 6.024788650725144e-06,
      "loss": 0.0169,
      "step": 1226580
    },
    {
      "epoch": 2.0073578026092704,
      "grad_norm": 0.6129868626594543,
      "learning_rate": 6.024722758511627e-06,
      "loss": 0.0133,
      "step": 1226600
    },
    {
      "epoch": 2.007390533047924,
      "grad_norm": 0.22114846110343933,
      "learning_rate": 6.02465686629811e-06,
      "loss": 0.0178,
      "step": 1226620
    },
    {
      "epoch": 2.007423263486577,
      "grad_norm": 0.2351343035697937,
      "learning_rate": 6.0245909740845936e-06,
      "loss": 0.011,
      "step": 1226640
    },
    {
      "epoch": 2.0074559939252308,
      "grad_norm": 0.44653457403182983,
      "learning_rate": 6.0245250818710755e-06,
      "loss": 0.0188,
      "step": 1226660
    },
    {
      "epoch": 2.007488724363884,
      "grad_norm": 0.9111863970756531,
      "learning_rate": 6.024459189657559e-06,
      "loss": 0.014,
      "step": 1226680
    },
    {
      "epoch": 2.007521454802537,
      "grad_norm": 0.8126004338264465,
      "learning_rate": 6.024393297444041e-06,
      "loss": 0.0188,
      "step": 1226700
    },
    {
      "epoch": 2.0075541852411907,
      "grad_norm": 0.5750139951705933,
      "learning_rate": 6.0243274052305245e-06,
      "loss": 0.0154,
      "step": 1226720
    },
    {
      "epoch": 2.007586915679844,
      "grad_norm": 0.5663276314735413,
      "learning_rate": 6.024261513017006e-06,
      "loss": 0.0138,
      "step": 1226740
    },
    {
      "epoch": 2.0076196461184974,
      "grad_norm": 0.13646550476551056,
      "learning_rate": 6.02419562080349e-06,
      "loss": 0.0122,
      "step": 1226760
    },
    {
      "epoch": 2.0076523765571506,
      "grad_norm": 0.3816526532173157,
      "learning_rate": 6.024129728589973e-06,
      "loss": 0.0167,
      "step": 1226780
    },
    {
      "epoch": 2.007685106995804,
      "grad_norm": 0.11315029114484787,
      "learning_rate": 6.0240638363764555e-06,
      "loss": 0.0195,
      "step": 1226800
    },
    {
      "epoch": 2.0077178374344573,
      "grad_norm": 0.20862318575382233,
      "learning_rate": 6.023997944162939e-06,
      "loss": 0.0204,
      "step": 1226820
    },
    {
      "epoch": 2.0077505678731105,
      "grad_norm": 0.6030262112617493,
      "learning_rate": 6.023932051949422e-06,
      "loss": 0.0217,
      "step": 1226840
    },
    {
      "epoch": 2.007783298311764,
      "grad_norm": 0.506980299949646,
      "learning_rate": 6.0238661597359046e-06,
      "loss": 0.0148,
      "step": 1226860
    },
    {
      "epoch": 2.007816028750417,
      "grad_norm": 0.13923439383506775,
      "learning_rate": 6.023800267522387e-06,
      "loss": 0.0125,
      "step": 1226880
    },
    {
      "epoch": 2.007848759189071,
      "grad_norm": 0.28736236691474915,
      "learning_rate": 6.023734375308871e-06,
      "loss": 0.0125,
      "step": 1226900
    },
    {
      "epoch": 2.007881489627724,
      "grad_norm": 0.27063313126564026,
      "learning_rate": 6.023668483095353e-06,
      "loss": 0.0164,
      "step": 1226920
    },
    {
      "epoch": 2.0079142200663775,
      "grad_norm": 1.1793149709701538,
      "learning_rate": 6.023602590881836e-06,
      "loss": 0.0133,
      "step": 1226940
    },
    {
      "epoch": 2.0079469505050307,
      "grad_norm": 0.6027597784996033,
      "learning_rate": 6.023536698668318e-06,
      "loss": 0.0139,
      "step": 1226960
    },
    {
      "epoch": 2.007979680943684,
      "grad_norm": 0.5471397638320923,
      "learning_rate": 6.023470806454802e-06,
      "loss": 0.017,
      "step": 1226980
    },
    {
      "epoch": 2.0080124113823374,
      "grad_norm": 0.7631794810295105,
      "learning_rate": 6.023404914241285e-06,
      "loss": 0.0131,
      "step": 1227000
    },
    {
      "epoch": 2.0080451418209906,
      "grad_norm": 0.6124439835548401,
      "learning_rate": 6.023339022027767e-06,
      "loss": 0.0217,
      "step": 1227020
    },
    {
      "epoch": 2.008077872259644,
      "grad_norm": 0.33868977427482605,
      "learning_rate": 6.02327312981425e-06,
      "loss": 0.0096,
      "step": 1227040
    },
    {
      "epoch": 2.0081106026982973,
      "grad_norm": 0.17941662669181824,
      "learning_rate": 6.023207237600734e-06,
      "loss": 0.018,
      "step": 1227060
    },
    {
      "epoch": 2.0081433331369505,
      "grad_norm": 0.5319119095802307,
      "learning_rate": 6.0231413453872155e-06,
      "loss": 0.0183,
      "step": 1227080
    },
    {
      "epoch": 2.008176063575604,
      "grad_norm": 0.1985539346933365,
      "learning_rate": 6.023075453173699e-06,
      "loss": 0.0109,
      "step": 1227100
    },
    {
      "epoch": 2.0082087940142572,
      "grad_norm": 0.40152469277381897,
      "learning_rate": 6.023009560960181e-06,
      "loss": 0.0151,
      "step": 1227120
    },
    {
      "epoch": 2.008241524452911,
      "grad_norm": 0.5865761637687683,
      "learning_rate": 6.022943668746665e-06,
      "loss": 0.0227,
      "step": 1227140
    },
    {
      "epoch": 2.008274254891564,
      "grad_norm": 0.3732672333717346,
      "learning_rate": 6.022877776533148e-06,
      "loss": 0.0144,
      "step": 1227160
    },
    {
      "epoch": 2.0083069853302176,
      "grad_norm": 0.6981611251831055,
      "learning_rate": 6.02281188431963e-06,
      "loss": 0.0141,
      "step": 1227180
    },
    {
      "epoch": 2.0083397157688707,
      "grad_norm": 0.2607765793800354,
      "learning_rate": 6.022745992106114e-06,
      "loss": 0.0186,
      "step": 1227200
    },
    {
      "epoch": 2.008372446207524,
      "grad_norm": 0.28749480843544006,
      "learning_rate": 6.022680099892596e-06,
      "loss": 0.0122,
      "step": 1227220
    },
    {
      "epoch": 2.0084051766461775,
      "grad_norm": 0.5915899276733398,
      "learning_rate": 6.022614207679079e-06,
      "loss": 0.015,
      "step": 1227240
    },
    {
      "epoch": 2.0084379070848306,
      "grad_norm": 0.6874311566352844,
      "learning_rate": 6.022548315465562e-06,
      "loss": 0.0114,
      "step": 1227260
    },
    {
      "epoch": 2.008470637523484,
      "grad_norm": 0.17127883434295654,
      "learning_rate": 6.0224824232520455e-06,
      "loss": 0.0153,
      "step": 1227280
    },
    {
      "epoch": 2.0085033679621374,
      "grad_norm": 0.184360533952713,
      "learning_rate": 6.022416531038527e-06,
      "loss": 0.0146,
      "step": 1227300
    },
    {
      "epoch": 2.008536098400791,
      "grad_norm": 0.3835940361022949,
      "learning_rate": 6.022350638825011e-06,
      "loss": 0.0141,
      "step": 1227320
    },
    {
      "epoch": 2.008568828839444,
      "grad_norm": 0.18124021589756012,
      "learning_rate": 6.022284746611493e-06,
      "loss": 0.0139,
      "step": 1227340
    },
    {
      "epoch": 2.0086015592780972,
      "grad_norm": 0.38519981503486633,
      "learning_rate": 6.0222188543979764e-06,
      "loss": 0.016,
      "step": 1227360
    },
    {
      "epoch": 2.008634289716751,
      "grad_norm": 0.5505515933036804,
      "learning_rate": 6.022152962184459e-06,
      "loss": 0.0179,
      "step": 1227380
    },
    {
      "epoch": 2.008667020155404,
      "grad_norm": 0.3959536850452423,
      "learning_rate": 6.022087069970942e-06,
      "loss": 0.018,
      "step": 1227400
    },
    {
      "epoch": 2.0086997505940576,
      "grad_norm": 0.8054025769233704,
      "learning_rate": 6.022021177757425e-06,
      "loss": 0.0219,
      "step": 1227420
    },
    {
      "epoch": 2.0087324810327107,
      "grad_norm": 0.8313052654266357,
      "learning_rate": 6.021955285543908e-06,
      "loss": 0.0208,
      "step": 1227440
    },
    {
      "epoch": 2.0087652114713643,
      "grad_norm": 0.6899093985557556,
      "learning_rate": 6.02188939333039e-06,
      "loss": 0.0149,
      "step": 1227460
    },
    {
      "epoch": 2.0087979419100175,
      "grad_norm": 0.2021646648645401,
      "learning_rate": 6.021823501116874e-06,
      "loss": 0.0171,
      "step": 1227480
    },
    {
      "epoch": 2.0088306723486706,
      "grad_norm": 0.5724133849143982,
      "learning_rate": 6.021757608903357e-06,
      "loss": 0.0231,
      "step": 1227500
    },
    {
      "epoch": 2.0088634027873242,
      "grad_norm": 0.3736586570739746,
      "learning_rate": 6.021691716689839e-06,
      "loss": 0.016,
      "step": 1227520
    },
    {
      "epoch": 2.0088961332259774,
      "grad_norm": 0.3242764472961426,
      "learning_rate": 6.021625824476323e-06,
      "loss": 0.0125,
      "step": 1227540
    },
    {
      "epoch": 2.008928863664631,
      "grad_norm": 1.6148048639297485,
      "learning_rate": 6.021559932262805e-06,
      "loss": 0.0194,
      "step": 1227560
    },
    {
      "epoch": 2.008961594103284,
      "grad_norm": 0.2780245244503021,
      "learning_rate": 6.021494040049288e-06,
      "loss": 0.0145,
      "step": 1227580
    },
    {
      "epoch": 2.0089943245419377,
      "grad_norm": 0.72661954164505,
      "learning_rate": 6.02142814783577e-06,
      "loss": 0.025,
      "step": 1227600
    },
    {
      "epoch": 2.009027054980591,
      "grad_norm": 0.5595473647117615,
      "learning_rate": 6.021362255622254e-06,
      "loss": 0.0152,
      "step": 1227620
    },
    {
      "epoch": 2.009059785419244,
      "grad_norm": 0.34915265440940857,
      "learning_rate": 6.0212963634087365e-06,
      "loss": 0.0161,
      "step": 1227640
    },
    {
      "epoch": 2.0090925158578976,
      "grad_norm": 0.1005864217877388,
      "learning_rate": 6.021230471195219e-06,
      "loss": 0.0111,
      "step": 1227660
    },
    {
      "epoch": 2.0091252462965508,
      "grad_norm": 0.1641608476638794,
      "learning_rate": 6.021164578981702e-06,
      "loss": 0.0228,
      "step": 1227680
    },
    {
      "epoch": 2.0091579767352044,
      "grad_norm": 0.17256316542625427,
      "learning_rate": 6.0210986867681856e-06,
      "loss": 0.0104,
      "step": 1227700
    },
    {
      "epoch": 2.0091907071738575,
      "grad_norm": 0.5059282779693604,
      "learning_rate": 6.0210327945546674e-06,
      "loss": 0.0143,
      "step": 1227720
    },
    {
      "epoch": 2.009223437612511,
      "grad_norm": 0.07336322963237762,
      "learning_rate": 6.020966902341151e-06,
      "loss": 0.0106,
      "step": 1227740
    },
    {
      "epoch": 2.0092561680511642,
      "grad_norm": 0.5106291174888611,
      "learning_rate": 6.020901010127633e-06,
      "loss": 0.0176,
      "step": 1227760
    },
    {
      "epoch": 2.0092888984898174,
      "grad_norm": 0.5228101015090942,
      "learning_rate": 6.0208351179141165e-06,
      "loss": 0.0125,
      "step": 1227780
    },
    {
      "epoch": 2.009321628928471,
      "grad_norm": 0.4201902747154236,
      "learning_rate": 6.020769225700599e-06,
      "loss": 0.015,
      "step": 1227800
    },
    {
      "epoch": 2.009354359367124,
      "grad_norm": 0.6402708292007446,
      "learning_rate": 6.020703333487082e-06,
      "loss": 0.0169,
      "step": 1227820
    },
    {
      "epoch": 2.0093870898057777,
      "grad_norm": 0.2144860178232193,
      "learning_rate": 6.020637441273565e-06,
      "loss": 0.0153,
      "step": 1227840
    },
    {
      "epoch": 2.009419820244431,
      "grad_norm": 0.28281140327453613,
      "learning_rate": 6.020571549060048e-06,
      "loss": 0.0152,
      "step": 1227860
    },
    {
      "epoch": 2.009452550683084,
      "grad_norm": 0.2847467362880707,
      "learning_rate": 6.020505656846531e-06,
      "loss": 0.0186,
      "step": 1227880
    },
    {
      "epoch": 2.0094852811217376,
      "grad_norm": 0.2752729058265686,
      "learning_rate": 6.020439764633014e-06,
      "loss": 0.0145,
      "step": 1227900
    },
    {
      "epoch": 2.009518011560391,
      "grad_norm": 0.21045979857444763,
      "learning_rate": 6.020373872419497e-06,
      "loss": 0.0153,
      "step": 1227920
    },
    {
      "epoch": 2.0095507419990444,
      "grad_norm": 0.3643869161605835,
      "learning_rate": 6.020307980205979e-06,
      "loss": 0.0183,
      "step": 1227940
    },
    {
      "epoch": 2.0095834724376975,
      "grad_norm": 0.528791606426239,
      "learning_rate": 6.020242087992463e-06,
      "loss": 0.0135,
      "step": 1227960
    },
    {
      "epoch": 2.009616202876351,
      "grad_norm": 0.3969956934452057,
      "learning_rate": 6.020176195778945e-06,
      "loss": 0.0109,
      "step": 1227980
    },
    {
      "epoch": 2.0096489333150043,
      "grad_norm": 0.21057327091693878,
      "learning_rate": 6.020110303565428e-06,
      "loss": 0.0136,
      "step": 1228000
    },
    {
      "epoch": 2.0096816637536574,
      "grad_norm": 0.22055858373641968,
      "learning_rate": 6.020044411351911e-06,
      "loss": 0.0142,
      "step": 1228020
    },
    {
      "epoch": 2.009714394192311,
      "grad_norm": 0.18366892635822296,
      "learning_rate": 6.019978519138394e-06,
      "loss": 0.0139,
      "step": 1228040
    },
    {
      "epoch": 2.009747124630964,
      "grad_norm": 0.2834092378616333,
      "learning_rate": 6.0199126269248766e-06,
      "loss": 0.0153,
      "step": 1228060
    },
    {
      "epoch": 2.0097798550696178,
      "grad_norm": 0.16997632384300232,
      "learning_rate": 6.01984673471136e-06,
      "loss": 0.0127,
      "step": 1228080
    },
    {
      "epoch": 2.009812585508271,
      "grad_norm": 0.23048098385334015,
      "learning_rate": 6.019780842497842e-06,
      "loss": 0.0132,
      "step": 1228100
    },
    {
      "epoch": 2.0098453159469245,
      "grad_norm": 0.538585901260376,
      "learning_rate": 6.019714950284326e-06,
      "loss": 0.0168,
      "step": 1228120
    },
    {
      "epoch": 2.0098780463855777,
      "grad_norm": 0.1480533927679062,
      "learning_rate": 6.0196490580708075e-06,
      "loss": 0.0136,
      "step": 1228140
    },
    {
      "epoch": 2.009910776824231,
      "grad_norm": 0.2592417299747467,
      "learning_rate": 6.019583165857291e-06,
      "loss": 0.0198,
      "step": 1228160
    },
    {
      "epoch": 2.0099435072628844,
      "grad_norm": 0.2945173382759094,
      "learning_rate": 6.019517273643774e-06,
      "loss": 0.0127,
      "step": 1228180
    },
    {
      "epoch": 2.0099762377015375,
      "grad_norm": 0.56862473487854,
      "learning_rate": 6.019451381430257e-06,
      "loss": 0.0127,
      "step": 1228200
    },
    {
      "epoch": 2.010008968140191,
      "grad_norm": 0.627441942691803,
      "learning_rate": 6.01938548921674e-06,
      "loss": 0.0202,
      "step": 1228220
    },
    {
      "epoch": 2.0100416985788443,
      "grad_norm": 1.5727810859680176,
      "learning_rate": 6.019319597003223e-06,
      "loss": 0.0198,
      "step": 1228240
    },
    {
      "epoch": 2.010074429017498,
      "grad_norm": 0.7241994738578796,
      "learning_rate": 6.019253704789706e-06,
      "loss": 0.0128,
      "step": 1228260
    },
    {
      "epoch": 2.010107159456151,
      "grad_norm": 0.23546025156974792,
      "learning_rate": 6.019187812576188e-06,
      "loss": 0.0247,
      "step": 1228280
    },
    {
      "epoch": 2.010139889894804,
      "grad_norm": 0.6171746253967285,
      "learning_rate": 6.019121920362672e-06,
      "loss": 0.0164,
      "step": 1228300
    },
    {
      "epoch": 2.010172620333458,
      "grad_norm": 0.27656692266464233,
      "learning_rate": 6.019056028149154e-06,
      "loss": 0.0136,
      "step": 1228320
    },
    {
      "epoch": 2.010205350772111,
      "grad_norm": 0.6284191012382507,
      "learning_rate": 6.0189901359356375e-06,
      "loss": 0.012,
      "step": 1228340
    },
    {
      "epoch": 2.0102380812107645,
      "grad_norm": 0.4007972776889801,
      "learning_rate": 6.018924243722119e-06,
      "loss": 0.0126,
      "step": 1228360
    },
    {
      "epoch": 2.0102708116494177,
      "grad_norm": 0.8676570057868958,
      "learning_rate": 6.018858351508603e-06,
      "loss": 0.0187,
      "step": 1228380
    },
    {
      "epoch": 2.0103035420880713,
      "grad_norm": 0.46902427077293396,
      "learning_rate": 6.018792459295086e-06,
      "loss": 0.0187,
      "step": 1228400
    },
    {
      "epoch": 2.0103362725267244,
      "grad_norm": 1.2282706499099731,
      "learning_rate": 6.018726567081568e-06,
      "loss": 0.015,
      "step": 1228420
    },
    {
      "epoch": 2.0103690029653776,
      "grad_norm": 0.26079702377319336,
      "learning_rate": 6.018660674868051e-06,
      "loss": 0.0138,
      "step": 1228440
    },
    {
      "epoch": 2.010401733404031,
      "grad_norm": 0.6148037910461426,
      "learning_rate": 6.018594782654535e-06,
      "loss": 0.0149,
      "step": 1228460
    },
    {
      "epoch": 2.0104344638426843,
      "grad_norm": 0.3273760974407196,
      "learning_rate": 6.018528890441017e-06,
      "loss": 0.017,
      "step": 1228480
    },
    {
      "epoch": 2.010467194281338,
      "grad_norm": 0.3995833098888397,
      "learning_rate": 6.0184629982275e-06,
      "loss": 0.0165,
      "step": 1228500
    },
    {
      "epoch": 2.010499924719991,
      "grad_norm": 0.4089573621749878,
      "learning_rate": 6.018397106013982e-06,
      "loss": 0.0149,
      "step": 1228520
    },
    {
      "epoch": 2.0105326551586447,
      "grad_norm": 1.463512897491455,
      "learning_rate": 6.018331213800466e-06,
      "loss": 0.0173,
      "step": 1228540
    },
    {
      "epoch": 2.010565385597298,
      "grad_norm": 0.3688684403896332,
      "learning_rate": 6.018265321586949e-06,
      "loss": 0.0168,
      "step": 1228560
    },
    {
      "epoch": 2.010598116035951,
      "grad_norm": 0.294544517993927,
      "learning_rate": 6.018199429373431e-06,
      "loss": 0.0106,
      "step": 1228580
    },
    {
      "epoch": 2.0106308464746045,
      "grad_norm": 0.6928373575210571,
      "learning_rate": 6.018133537159915e-06,
      "loss": 0.0168,
      "step": 1228600
    },
    {
      "epoch": 2.0106635769132577,
      "grad_norm": 0.995782196521759,
      "learning_rate": 6.018067644946397e-06,
      "loss": 0.0172,
      "step": 1228620
    },
    {
      "epoch": 2.0106963073519113,
      "grad_norm": 0.7701922655105591,
      "learning_rate": 6.01800175273288e-06,
      "loss": 0.0128,
      "step": 1228640
    },
    {
      "epoch": 2.0107290377905644,
      "grad_norm": 0.4876612722873688,
      "learning_rate": 6.017935860519363e-06,
      "loss": 0.0165,
      "step": 1228660
    },
    {
      "epoch": 2.0107617682292176,
      "grad_norm": 0.22796709835529327,
      "learning_rate": 6.017869968305846e-06,
      "loss": 0.0124,
      "step": 1228680
    },
    {
      "epoch": 2.010794498667871,
      "grad_norm": 0.48609983921051025,
      "learning_rate": 6.0178040760923285e-06,
      "loss": 0.0115,
      "step": 1228700
    },
    {
      "epoch": 2.0108272291065243,
      "grad_norm": 0.267837256193161,
      "learning_rate": 6.017738183878812e-06,
      "loss": 0.0148,
      "step": 1228720
    },
    {
      "epoch": 2.010859959545178,
      "grad_norm": 0.8123835921287537,
      "learning_rate": 6.017672291665294e-06,
      "loss": 0.017,
      "step": 1228740
    },
    {
      "epoch": 2.010892689983831,
      "grad_norm": 0.2403428852558136,
      "learning_rate": 6.0176063994517775e-06,
      "loss": 0.0158,
      "step": 1228760
    },
    {
      "epoch": 2.0109254204224847,
      "grad_norm": 0.48340195417404175,
      "learning_rate": 6.0175405072382594e-06,
      "loss": 0.0145,
      "step": 1228780
    },
    {
      "epoch": 2.010958150861138,
      "grad_norm": 0.3096259832382202,
      "learning_rate": 6.017474615024743e-06,
      "loss": 0.0168,
      "step": 1228800
    },
    {
      "epoch": 2.010990881299791,
      "grad_norm": 0.6101552844047546,
      "learning_rate": 6.017408722811226e-06,
      "loss": 0.0165,
      "step": 1228820
    },
    {
      "epoch": 2.0110236117384446,
      "grad_norm": 0.3506346046924591,
      "learning_rate": 6.0173428305977085e-06,
      "loss": 0.0199,
      "step": 1228840
    },
    {
      "epoch": 2.0110563421770977,
      "grad_norm": 0.2500694990158081,
      "learning_rate": 6.017276938384191e-06,
      "loss": 0.0135,
      "step": 1228860
    },
    {
      "epoch": 2.0110890726157513,
      "grad_norm": 0.47772416472435,
      "learning_rate": 6.017211046170675e-06,
      "loss": 0.0203,
      "step": 1228880
    },
    {
      "epoch": 2.0111218030544045,
      "grad_norm": 0.9318038821220398,
      "learning_rate": 6.0171451539571576e-06,
      "loss": 0.0202,
      "step": 1228900
    },
    {
      "epoch": 2.011154533493058,
      "grad_norm": 0.32566502690315247,
      "learning_rate": 6.01707926174364e-06,
      "loss": 0.014,
      "step": 1228920
    },
    {
      "epoch": 2.011187263931711,
      "grad_norm": 0.33823153376579285,
      "learning_rate": 6.017013369530124e-06,
      "loss": 0.0199,
      "step": 1228940
    },
    {
      "epoch": 2.0112199943703644,
      "grad_norm": 0.35257354378700256,
      "learning_rate": 6.016947477316606e-06,
      "loss": 0.0127,
      "step": 1228960
    },
    {
      "epoch": 2.011252724809018,
      "grad_norm": 0.1019245833158493,
      "learning_rate": 6.016881585103089e-06,
      "loss": 0.019,
      "step": 1228980
    },
    {
      "epoch": 2.011285455247671,
      "grad_norm": 0.7403084635734558,
      "learning_rate": 6.016815692889571e-06,
      "loss": 0.0126,
      "step": 1229000
    },
    {
      "epoch": 2.0113181856863247,
      "grad_norm": 0.1647595763206482,
      "learning_rate": 6.016749800676055e-06,
      "loss": 0.0168,
      "step": 1229020
    },
    {
      "epoch": 2.011350916124978,
      "grad_norm": 0.21389593183994293,
      "learning_rate": 6.016683908462538e-06,
      "loss": 0.0174,
      "step": 1229040
    },
    {
      "epoch": 2.0113836465636314,
      "grad_norm": 0.09593231230974197,
      "learning_rate": 6.01661801624902e-06,
      "loss": 0.0125,
      "step": 1229060
    },
    {
      "epoch": 2.0114163770022846,
      "grad_norm": 0.37656474113464355,
      "learning_rate": 6.016552124035503e-06,
      "loss": 0.017,
      "step": 1229080
    },
    {
      "epoch": 2.0114491074409377,
      "grad_norm": 0.30689114332199097,
      "learning_rate": 6.016486231821987e-06,
      "loss": 0.017,
      "step": 1229100
    },
    {
      "epoch": 2.0114818378795913,
      "grad_norm": 0.697894811630249,
      "learning_rate": 6.0164203396084685e-06,
      "loss": 0.0115,
      "step": 1229120
    },
    {
      "epoch": 2.0115145683182445,
      "grad_norm": 1.3737610578536987,
      "learning_rate": 6.016354447394952e-06,
      "loss": 0.0144,
      "step": 1229140
    },
    {
      "epoch": 2.011547298756898,
      "grad_norm": 0.16345638036727905,
      "learning_rate": 6.016288555181434e-06,
      "loss": 0.0203,
      "step": 1229160
    },
    {
      "epoch": 2.0115800291955512,
      "grad_norm": 0.1636669784784317,
      "learning_rate": 6.016222662967918e-06,
      "loss": 0.022,
      "step": 1229180
    },
    {
      "epoch": 2.011612759634205,
      "grad_norm": 0.49639764428138733,
      "learning_rate": 6.0161567707544e-06,
      "loss": 0.0155,
      "step": 1229200
    },
    {
      "epoch": 2.011645490072858,
      "grad_norm": 0.9031341075897217,
      "learning_rate": 6.016090878540883e-06,
      "loss": 0.0129,
      "step": 1229220
    },
    {
      "epoch": 2.011678220511511,
      "grad_norm": 0.3523007929325104,
      "learning_rate": 6.016024986327366e-06,
      "loss": 0.0136,
      "step": 1229240
    },
    {
      "epoch": 2.0117109509501647,
      "grad_norm": 0.5156408548355103,
      "learning_rate": 6.015959094113849e-06,
      "loss": 0.0218,
      "step": 1229260
    },
    {
      "epoch": 2.011743681388818,
      "grad_norm": 0.3624066710472107,
      "learning_rate": 6.015893201900332e-06,
      "loss": 0.0162,
      "step": 1229280
    },
    {
      "epoch": 2.0117764118274715,
      "grad_norm": 0.16356045007705688,
      "learning_rate": 6.015827309686815e-06,
      "loss": 0.015,
      "step": 1229300
    },
    {
      "epoch": 2.0118091422661246,
      "grad_norm": 0.6963512897491455,
      "learning_rate": 6.0157614174732985e-06,
      "loss": 0.0194,
      "step": 1229320
    },
    {
      "epoch": 2.011841872704778,
      "grad_norm": 0.7665498852729797,
      "learning_rate": 6.01569552525978e-06,
      "loss": 0.018,
      "step": 1229340
    },
    {
      "epoch": 2.0118746031434314,
      "grad_norm": 0.4353765547275543,
      "learning_rate": 6.015629633046264e-06,
      "loss": 0.0225,
      "step": 1229360
    },
    {
      "epoch": 2.0119073335820845,
      "grad_norm": 0.3266426622867584,
      "learning_rate": 6.015563740832746e-06,
      "loss": 0.0162,
      "step": 1229380
    },
    {
      "epoch": 2.011940064020738,
      "grad_norm": 0.25038719177246094,
      "learning_rate": 6.0154978486192294e-06,
      "loss": 0.0144,
      "step": 1229400
    },
    {
      "epoch": 2.0119727944593913,
      "grad_norm": 0.5223196148872375,
      "learning_rate": 6.015431956405712e-06,
      "loss": 0.0148,
      "step": 1229420
    },
    {
      "epoch": 2.012005524898045,
      "grad_norm": 1.0062073469161987,
      "learning_rate": 6.015366064192195e-06,
      "loss": 0.0162,
      "step": 1229440
    },
    {
      "epoch": 2.012038255336698,
      "grad_norm": 0.9172870516777039,
      "learning_rate": 6.015300171978678e-06,
      "loss": 0.0147,
      "step": 1229460
    },
    {
      "epoch": 2.012070985775351,
      "grad_norm": 0.19565041363239288,
      "learning_rate": 6.015234279765161e-06,
      "loss": 0.0214,
      "step": 1229480
    },
    {
      "epoch": 2.0121037162140047,
      "grad_norm": 0.32593366503715515,
      "learning_rate": 6.015168387551643e-06,
      "loss": 0.0158,
      "step": 1229500
    },
    {
      "epoch": 2.012136446652658,
      "grad_norm": 0.4206654727458954,
      "learning_rate": 6.015102495338127e-06,
      "loss": 0.0154,
      "step": 1229520
    },
    {
      "epoch": 2.0121691770913115,
      "grad_norm": 0.3271133005619049,
      "learning_rate": 6.015036603124609e-06,
      "loss": 0.0141,
      "step": 1229540
    },
    {
      "epoch": 2.0122019075299646,
      "grad_norm": 2.015634536743164,
      "learning_rate": 6.014970710911092e-06,
      "loss": 0.0137,
      "step": 1229560
    },
    {
      "epoch": 2.0122346379686182,
      "grad_norm": 0.38400498032569885,
      "learning_rate": 6.014904818697574e-06,
      "loss": 0.0114,
      "step": 1229580
    },
    {
      "epoch": 2.0122673684072714,
      "grad_norm": 0.3837226629257202,
      "learning_rate": 6.014838926484058e-06,
      "loss": 0.017,
      "step": 1229600
    },
    {
      "epoch": 2.0123000988459245,
      "grad_norm": 0.7515317797660828,
      "learning_rate": 6.014773034270541e-06,
      "loss": 0.0169,
      "step": 1229620
    },
    {
      "epoch": 2.012332829284578,
      "grad_norm": 0.15420536696910858,
      "learning_rate": 6.014707142057023e-06,
      "loss": 0.0147,
      "step": 1229640
    },
    {
      "epoch": 2.0123655597232313,
      "grad_norm": 0.2847747206687927,
      "learning_rate": 6.014641249843507e-06,
      "loss": 0.0154,
      "step": 1229660
    },
    {
      "epoch": 2.012398290161885,
      "grad_norm": 0.2970518469810486,
      "learning_rate": 6.0145753576299895e-06,
      "loss": 0.017,
      "step": 1229680
    },
    {
      "epoch": 2.012431020600538,
      "grad_norm": 0.8205089569091797,
      "learning_rate": 6.014509465416472e-06,
      "loss": 0.0155,
      "step": 1229700
    },
    {
      "epoch": 2.0124637510391916,
      "grad_norm": 1.129616618156433,
      "learning_rate": 6.014443573202955e-06,
      "loss": 0.022,
      "step": 1229720
    },
    {
      "epoch": 2.0124964814778448,
      "grad_norm": 0.7987738847732544,
      "learning_rate": 6.0143776809894386e-06,
      "loss": 0.0181,
      "step": 1229740
    },
    {
      "epoch": 2.012529211916498,
      "grad_norm": 0.21664811670780182,
      "learning_rate": 6.0143117887759204e-06,
      "loss": 0.0117,
      "step": 1229760
    },
    {
      "epoch": 2.0125619423551515,
      "grad_norm": 0.2703593969345093,
      "learning_rate": 6.014245896562404e-06,
      "loss": 0.0115,
      "step": 1229780
    },
    {
      "epoch": 2.0125946727938047,
      "grad_norm": 0.30623412132263184,
      "learning_rate": 6.014180004348886e-06,
      "loss": 0.0135,
      "step": 1229800
    },
    {
      "epoch": 2.0126274032324583,
      "grad_norm": 0.6014374494552612,
      "learning_rate": 6.0141141121353695e-06,
      "loss": 0.0166,
      "step": 1229820
    },
    {
      "epoch": 2.0126601336711114,
      "grad_norm": 0.40570563077926636,
      "learning_rate": 6.014048219921852e-06,
      "loss": 0.0143,
      "step": 1229840
    },
    {
      "epoch": 2.012692864109765,
      "grad_norm": 0.23101846873760223,
      "learning_rate": 6.013982327708335e-06,
      "loss": 0.016,
      "step": 1229860
    },
    {
      "epoch": 2.012725594548418,
      "grad_norm": 0.17891699075698853,
      "learning_rate": 6.013916435494818e-06,
      "loss": 0.0129,
      "step": 1229880
    },
    {
      "epoch": 2.0127583249870713,
      "grad_norm": 0.3675610423088074,
      "learning_rate": 6.013850543281301e-06,
      "loss": 0.0155,
      "step": 1229900
    },
    {
      "epoch": 2.012791055425725,
      "grad_norm": 0.33460867404937744,
      "learning_rate": 6.013784651067783e-06,
      "loss": 0.0105,
      "step": 1229920
    },
    {
      "epoch": 2.012823785864378,
      "grad_norm": 0.39828044176101685,
      "learning_rate": 6.013718758854267e-06,
      "loss": 0.0165,
      "step": 1229940
    },
    {
      "epoch": 2.0128565163030316,
      "grad_norm": 0.3999267816543579,
      "learning_rate": 6.01365286664075e-06,
      "loss": 0.0176,
      "step": 1229960
    },
    {
      "epoch": 2.012889246741685,
      "grad_norm": 0.6254801154136658,
      "learning_rate": 6.013586974427232e-06,
      "loss": 0.0168,
      "step": 1229980
    },
    {
      "epoch": 2.0129219771803384,
      "grad_norm": 0.48288023471832275,
      "learning_rate": 6.013521082213716e-06,
      "loss": 0.0201,
      "step": 1230000
    },
    {
      "epoch": 2.0129547076189915,
      "grad_norm": 0.2873140573501587,
      "learning_rate": 6.013455190000198e-06,
      "loss": 0.0136,
      "step": 1230020
    },
    {
      "epoch": 2.0129874380576447,
      "grad_norm": 0.24411508440971375,
      "learning_rate": 6.013389297786681e-06,
      "loss": 0.0124,
      "step": 1230040
    },
    {
      "epoch": 2.0130201684962983,
      "grad_norm": 1.1932756900787354,
      "learning_rate": 6.013323405573164e-06,
      "loss": 0.0222,
      "step": 1230060
    },
    {
      "epoch": 2.0130528989349514,
      "grad_norm": 0.2521553635597229,
      "learning_rate": 6.013257513359647e-06,
      "loss": 0.0176,
      "step": 1230080
    },
    {
      "epoch": 2.013085629373605,
      "grad_norm": 0.49830570816993713,
      "learning_rate": 6.0131916211461296e-06,
      "loss": 0.0154,
      "step": 1230100
    },
    {
      "epoch": 2.013118359812258,
      "grad_norm": 1.6218414306640625,
      "learning_rate": 6.013125728932613e-06,
      "loss": 0.0152,
      "step": 1230120
    },
    {
      "epoch": 2.0131510902509113,
      "grad_norm": 0.7928922176361084,
      "learning_rate": 6.013059836719095e-06,
      "loss": 0.0144,
      "step": 1230140
    },
    {
      "epoch": 2.013183820689565,
      "grad_norm": 0.17647278308868408,
      "learning_rate": 6.012993944505579e-06,
      "loss": 0.0132,
      "step": 1230160
    },
    {
      "epoch": 2.013216551128218,
      "grad_norm": 0.6069343686103821,
      "learning_rate": 6.0129280522920605e-06,
      "loss": 0.0255,
      "step": 1230180
    },
    {
      "epoch": 2.0132492815668717,
      "grad_norm": 1.0310708284378052,
      "learning_rate": 6.012862160078544e-06,
      "loss": 0.0173,
      "step": 1230200
    },
    {
      "epoch": 2.013282012005525,
      "grad_norm": 0.1698404997587204,
      "learning_rate": 6.012796267865027e-06,
      "loss": 0.0118,
      "step": 1230220
    },
    {
      "epoch": 2.0133147424441784,
      "grad_norm": 0.3175817131996155,
      "learning_rate": 6.01273037565151e-06,
      "loss": 0.0168,
      "step": 1230240
    },
    {
      "epoch": 2.0133474728828316,
      "grad_norm": 0.30745917558670044,
      "learning_rate": 6.012664483437992e-06,
      "loss": 0.0105,
      "step": 1230260
    },
    {
      "epoch": 2.0133802033214847,
      "grad_norm": 0.19062215089797974,
      "learning_rate": 6.012598591224476e-06,
      "loss": 0.0193,
      "step": 1230280
    },
    {
      "epoch": 2.0134129337601383,
      "grad_norm": 0.8868634104728699,
      "learning_rate": 6.012532699010958e-06,
      "loss": 0.0153,
      "step": 1230300
    },
    {
      "epoch": 2.0134456641987915,
      "grad_norm": 1.0464553833007812,
      "learning_rate": 6.012466806797441e-06,
      "loss": 0.0151,
      "step": 1230320
    },
    {
      "epoch": 2.013478394637445,
      "grad_norm": 0.3229498565196991,
      "learning_rate": 6.012400914583925e-06,
      "loss": 0.0167,
      "step": 1230340
    },
    {
      "epoch": 2.013511125076098,
      "grad_norm": 0.2775716483592987,
      "learning_rate": 6.012335022370407e-06,
      "loss": 0.0156,
      "step": 1230360
    },
    {
      "epoch": 2.013543855514752,
      "grad_norm": 0.21499836444854736,
      "learning_rate": 6.0122691301568905e-06,
      "loss": 0.0246,
      "step": 1230380
    },
    {
      "epoch": 2.013576585953405,
      "grad_norm": 0.12607185542583466,
      "learning_rate": 6.012203237943372e-06,
      "loss": 0.0202,
      "step": 1230400
    },
    {
      "epoch": 2.013609316392058,
      "grad_norm": 0.21135668456554413,
      "learning_rate": 6.012137345729856e-06,
      "loss": 0.0177,
      "step": 1230420
    },
    {
      "epoch": 2.0136420468307117,
      "grad_norm": 0.4203006327152252,
      "learning_rate": 6.012071453516338e-06,
      "loss": 0.0152,
      "step": 1230440
    },
    {
      "epoch": 2.013674777269365,
      "grad_norm": 0.3362666368484497,
      "learning_rate": 6.012005561302821e-06,
      "loss": 0.0092,
      "step": 1230460
    },
    {
      "epoch": 2.0137075077080184,
      "grad_norm": 0.16229447722434998,
      "learning_rate": 6.011939669089304e-06,
      "loss": 0.01,
      "step": 1230480
    },
    {
      "epoch": 2.0137402381466716,
      "grad_norm": 0.6998298168182373,
      "learning_rate": 6.011873776875788e-06,
      "loss": 0.0135,
      "step": 1230500
    },
    {
      "epoch": 2.013772968585325,
      "grad_norm": 0.3510427176952362,
      "learning_rate": 6.01180788466227e-06,
      "loss": 0.0232,
      "step": 1230520
    },
    {
      "epoch": 2.0138056990239783,
      "grad_norm": 0.26323002576828003,
      "learning_rate": 6.011741992448753e-06,
      "loss": 0.0163,
      "step": 1230540
    },
    {
      "epoch": 2.0138384294626315,
      "grad_norm": 0.20648254454135895,
      "learning_rate": 6.011676100235235e-06,
      "loss": 0.0105,
      "step": 1230560
    },
    {
      "epoch": 2.013871159901285,
      "grad_norm": 0.4105761647224426,
      "learning_rate": 6.011610208021719e-06,
      "loss": 0.0174,
      "step": 1230580
    },
    {
      "epoch": 2.013903890339938,
      "grad_norm": 0.44955864548683167,
      "learning_rate": 6.011544315808201e-06,
      "loss": 0.0137,
      "step": 1230600
    },
    {
      "epoch": 2.013936620778592,
      "grad_norm": 1.0667301416397095,
      "learning_rate": 6.011478423594684e-06,
      "loss": 0.0179,
      "step": 1230620
    },
    {
      "epoch": 2.013969351217245,
      "grad_norm": 0.28736624121665955,
      "learning_rate": 6.011412531381167e-06,
      "loss": 0.0162,
      "step": 1230640
    },
    {
      "epoch": 2.0140020816558986,
      "grad_norm": 0.3001146912574768,
      "learning_rate": 6.01134663916765e-06,
      "loss": 0.015,
      "step": 1230660
    },
    {
      "epoch": 2.0140348120945517,
      "grad_norm": 0.20520269870758057,
      "learning_rate": 6.011280746954133e-06,
      "loss": 0.017,
      "step": 1230680
    },
    {
      "epoch": 2.014067542533205,
      "grad_norm": 0.39491701126098633,
      "learning_rate": 6.011214854740616e-06,
      "loss": 0.015,
      "step": 1230700
    },
    {
      "epoch": 2.0141002729718585,
      "grad_norm": 0.23503702878952026,
      "learning_rate": 6.011148962527099e-06,
      "loss": 0.0159,
      "step": 1230720
    },
    {
      "epoch": 2.0141330034105116,
      "grad_norm": 0.6864945292472839,
      "learning_rate": 6.0110830703135815e-06,
      "loss": 0.0093,
      "step": 1230740
    },
    {
      "epoch": 2.014165733849165,
      "grad_norm": 0.13552019000053406,
      "learning_rate": 6.011017178100065e-06,
      "loss": 0.0097,
      "step": 1230760
    },
    {
      "epoch": 2.0141984642878183,
      "grad_norm": 0.5812126994132996,
      "learning_rate": 6.010951285886547e-06,
      "loss": 0.0153,
      "step": 1230780
    },
    {
      "epoch": 2.014231194726472,
      "grad_norm": 0.23854927718639374,
      "learning_rate": 6.0108853936730305e-06,
      "loss": 0.0126,
      "step": 1230800
    },
    {
      "epoch": 2.014263925165125,
      "grad_norm": 1.0127841234207153,
      "learning_rate": 6.0108195014595124e-06,
      "loss": 0.0144,
      "step": 1230820
    },
    {
      "epoch": 2.0142966556037782,
      "grad_norm": 0.5793346166610718,
      "learning_rate": 6.010753609245996e-06,
      "loss": 0.0154,
      "step": 1230840
    },
    {
      "epoch": 2.014329386042432,
      "grad_norm": 0.2673802971839905,
      "learning_rate": 6.010687717032479e-06,
      "loss": 0.0189,
      "step": 1230860
    },
    {
      "epoch": 2.014362116481085,
      "grad_norm": 0.3463672995567322,
      "learning_rate": 6.0106218248189615e-06,
      "loss": 0.017,
      "step": 1230880
    },
    {
      "epoch": 2.0143948469197386,
      "grad_norm": 0.2223578542470932,
      "learning_rate": 6.010555932605444e-06,
      "loss": 0.016,
      "step": 1230900
    },
    {
      "epoch": 2.0144275773583917,
      "grad_norm": 0.6189950704574585,
      "learning_rate": 6.010490040391928e-06,
      "loss": 0.0184,
      "step": 1230920
    },
    {
      "epoch": 2.014460307797045,
      "grad_norm": 0.7663252353668213,
      "learning_rate": 6.01042414817841e-06,
      "loss": 0.0204,
      "step": 1230940
    },
    {
      "epoch": 2.0144930382356985,
      "grad_norm": 0.05836857482790947,
      "learning_rate": 6.010358255964893e-06,
      "loss": 0.022,
      "step": 1230960
    },
    {
      "epoch": 2.0145257686743516,
      "grad_norm": 0.5752989053726196,
      "learning_rate": 6.010292363751375e-06,
      "loss": 0.019,
      "step": 1230980
    },
    {
      "epoch": 2.014558499113005,
      "grad_norm": 0.8211491703987122,
      "learning_rate": 6.010226471537859e-06,
      "loss": 0.0161,
      "step": 1231000
    },
    {
      "epoch": 2.0145912295516584,
      "grad_norm": 0.28636354207992554,
      "learning_rate": 6.010160579324342e-06,
      "loss": 0.0115,
      "step": 1231020
    },
    {
      "epoch": 2.014623959990312,
      "grad_norm": 0.3797803521156311,
      "learning_rate": 6.010094687110824e-06,
      "loss": 0.0168,
      "step": 1231040
    },
    {
      "epoch": 2.014656690428965,
      "grad_norm": 0.9437550902366638,
      "learning_rate": 6.010028794897308e-06,
      "loss": 0.0157,
      "step": 1231060
    },
    {
      "epoch": 2.0146894208676183,
      "grad_norm": 0.18629688024520874,
      "learning_rate": 6.009962902683791e-06,
      "loss": 0.0158,
      "step": 1231080
    },
    {
      "epoch": 2.014722151306272,
      "grad_norm": 0.11917988210916519,
      "learning_rate": 6.009897010470273e-06,
      "loss": 0.0171,
      "step": 1231100
    },
    {
      "epoch": 2.014754881744925,
      "grad_norm": 0.4190135598182678,
      "learning_rate": 6.009831118256756e-06,
      "loss": 0.0137,
      "step": 1231120
    },
    {
      "epoch": 2.0147876121835786,
      "grad_norm": 0.47166502475738525,
      "learning_rate": 6.00976522604324e-06,
      "loss": 0.0192,
      "step": 1231140
    },
    {
      "epoch": 2.0148203426222318,
      "grad_norm": 0.4276927709579468,
      "learning_rate": 6.0096993338297215e-06,
      "loss": 0.0192,
      "step": 1231160
    },
    {
      "epoch": 2.0148530730608853,
      "grad_norm": 0.28469499945640564,
      "learning_rate": 6.009633441616205e-06,
      "loss": 0.0184,
      "step": 1231180
    },
    {
      "epoch": 2.0148858034995385,
      "grad_norm": 0.5810707807540894,
      "learning_rate": 6.009567549402687e-06,
      "loss": 0.0205,
      "step": 1231200
    },
    {
      "epoch": 2.0149185339381916,
      "grad_norm": 0.10297194868326187,
      "learning_rate": 6.009501657189171e-06,
      "loss": 0.007,
      "step": 1231220
    },
    {
      "epoch": 2.0149512643768452,
      "grad_norm": 0.11404094099998474,
      "learning_rate": 6.009435764975653e-06,
      "loss": 0.0166,
      "step": 1231240
    },
    {
      "epoch": 2.0149839948154984,
      "grad_norm": 1.157752275466919,
      "learning_rate": 6.009369872762136e-06,
      "loss": 0.0172,
      "step": 1231260
    },
    {
      "epoch": 2.015016725254152,
      "grad_norm": 1.022855520248413,
      "learning_rate": 6.009303980548619e-06,
      "loss": 0.0146,
      "step": 1231280
    },
    {
      "epoch": 2.015049455692805,
      "grad_norm": 0.5645750164985657,
      "learning_rate": 6.009238088335102e-06,
      "loss": 0.0164,
      "step": 1231300
    },
    {
      "epoch": 2.0150821861314587,
      "grad_norm": 0.09838953614234924,
      "learning_rate": 6.009172196121584e-06,
      "loss": 0.0171,
      "step": 1231320
    },
    {
      "epoch": 2.015114916570112,
      "grad_norm": 1.3664003610610962,
      "learning_rate": 6.009106303908068e-06,
      "loss": 0.015,
      "step": 1231340
    },
    {
      "epoch": 2.015147647008765,
      "grad_norm": 0.9764280319213867,
      "learning_rate": 6.0090404116945515e-06,
      "loss": 0.0126,
      "step": 1231360
    },
    {
      "epoch": 2.0151803774474186,
      "grad_norm": 0.3540259301662445,
      "learning_rate": 6.008974519481033e-06,
      "loss": 0.0161,
      "step": 1231380
    },
    {
      "epoch": 2.0152131078860718,
      "grad_norm": 0.9279976487159729,
      "learning_rate": 6.008908627267517e-06,
      "loss": 0.0182,
      "step": 1231400
    },
    {
      "epoch": 2.0152458383247254,
      "grad_norm": 1.4544639587402344,
      "learning_rate": 6.008842735053999e-06,
      "loss": 0.0135,
      "step": 1231420
    },
    {
      "epoch": 2.0152785687633785,
      "grad_norm": 0.7829217314720154,
      "learning_rate": 6.0087768428404824e-06,
      "loss": 0.0161,
      "step": 1231440
    },
    {
      "epoch": 2.015311299202032,
      "grad_norm": 0.45614513754844666,
      "learning_rate": 6.008710950626964e-06,
      "loss": 0.0146,
      "step": 1231460
    },
    {
      "epoch": 2.0153440296406853,
      "grad_norm": 0.28147661685943604,
      "learning_rate": 6.008645058413448e-06,
      "loss": 0.0174,
      "step": 1231480
    },
    {
      "epoch": 2.0153767600793384,
      "grad_norm": 0.46296167373657227,
      "learning_rate": 6.008579166199931e-06,
      "loss": 0.0165,
      "step": 1231500
    },
    {
      "epoch": 2.015409490517992,
      "grad_norm": 0.1073315292596817,
      "learning_rate": 6.008513273986413e-06,
      "loss": 0.0167,
      "step": 1231520
    },
    {
      "epoch": 2.015442220956645,
      "grad_norm": 0.2461983561515808,
      "learning_rate": 6.008447381772896e-06,
      "loss": 0.0122,
      "step": 1231540
    },
    {
      "epoch": 2.0154749513952988,
      "grad_norm": 0.674346387386322,
      "learning_rate": 6.00838148955938e-06,
      "loss": 0.0216,
      "step": 1231560
    },
    {
      "epoch": 2.015507681833952,
      "grad_norm": 0.3511197566986084,
      "learning_rate": 6.008315597345862e-06,
      "loss": 0.0148,
      "step": 1231580
    },
    {
      "epoch": 2.0155404122726055,
      "grad_norm": 0.3955395519733429,
      "learning_rate": 6.008249705132345e-06,
      "loss": 0.0138,
      "step": 1231600
    },
    {
      "epoch": 2.0155731427112586,
      "grad_norm": 0.3869914710521698,
      "learning_rate": 6.008183812918827e-06,
      "loss": 0.0152,
      "step": 1231620
    },
    {
      "epoch": 2.015605873149912,
      "grad_norm": 0.4863099455833435,
      "learning_rate": 6.008117920705311e-06,
      "loss": 0.0221,
      "step": 1231640
    },
    {
      "epoch": 2.0156386035885654,
      "grad_norm": 0.28324395418167114,
      "learning_rate": 6.0080520284917934e-06,
      "loss": 0.0206,
      "step": 1231660
    },
    {
      "epoch": 2.0156713340272185,
      "grad_norm": 0.13252733647823334,
      "learning_rate": 6.007986136278276e-06,
      "loss": 0.0107,
      "step": 1231680
    },
    {
      "epoch": 2.015704064465872,
      "grad_norm": 0.705245852470398,
      "learning_rate": 6.007920244064759e-06,
      "loss": 0.013,
      "step": 1231700
    },
    {
      "epoch": 2.0157367949045253,
      "grad_norm": 0.3556064963340759,
      "learning_rate": 6.0078543518512425e-06,
      "loss": 0.0129,
      "step": 1231720
    },
    {
      "epoch": 2.0157695253431784,
      "grad_norm": 0.09621118009090424,
      "learning_rate": 6.007788459637725e-06,
      "loss": 0.0156,
      "step": 1231740
    },
    {
      "epoch": 2.015802255781832,
      "grad_norm": 0.10637826472520828,
      "learning_rate": 6.007722567424208e-06,
      "loss": 0.0172,
      "step": 1231760
    },
    {
      "epoch": 2.015834986220485,
      "grad_norm": 0.8219373822212219,
      "learning_rate": 6.0076566752106916e-06,
      "loss": 0.0125,
      "step": 1231780
    },
    {
      "epoch": 2.0158677166591388,
      "grad_norm": 0.2777763903141022,
      "learning_rate": 6.0075907829971735e-06,
      "loss": 0.0146,
      "step": 1231800
    },
    {
      "epoch": 2.015900447097792,
      "grad_norm": 0.42828384041786194,
      "learning_rate": 6.007524890783657e-06,
      "loss": 0.0199,
      "step": 1231820
    },
    {
      "epoch": 2.0159331775364455,
      "grad_norm": 0.3432545065879822,
      "learning_rate": 6.007458998570139e-06,
      "loss": 0.0262,
      "step": 1231840
    },
    {
      "epoch": 2.0159659079750987,
      "grad_norm": 0.22266118228435516,
      "learning_rate": 6.0073931063566225e-06,
      "loss": 0.0136,
      "step": 1231860
    },
    {
      "epoch": 2.015998638413752,
      "grad_norm": 0.5740404725074768,
      "learning_rate": 6.007327214143105e-06,
      "loss": 0.0146,
      "step": 1231880
    },
    {
      "epoch": 2.0160313688524054,
      "grad_norm": 0.3896329998970032,
      "learning_rate": 6.007261321929588e-06,
      "loss": 0.0165,
      "step": 1231900
    },
    {
      "epoch": 2.0160640992910586,
      "grad_norm": 0.9042608141899109,
      "learning_rate": 6.007195429716071e-06,
      "loss": 0.016,
      "step": 1231920
    },
    {
      "epoch": 2.016096829729712,
      "grad_norm": 0.35471269488334656,
      "learning_rate": 6.007129537502554e-06,
      "loss": 0.0094,
      "step": 1231940
    },
    {
      "epoch": 2.0161295601683653,
      "grad_norm": 0.45682957768440247,
      "learning_rate": 6.007063645289036e-06,
      "loss": 0.0145,
      "step": 1231960
    },
    {
      "epoch": 2.016162290607019,
      "grad_norm": 0.24417421221733093,
      "learning_rate": 6.00699775307552e-06,
      "loss": 0.011,
      "step": 1231980
    },
    {
      "epoch": 2.016195021045672,
      "grad_norm": 0.23219698667526245,
      "learning_rate": 6.006931860862002e-06,
      "loss": 0.0152,
      "step": 1232000
    },
    {
      "epoch": 2.016227751484325,
      "grad_norm": 0.4027675986289978,
      "learning_rate": 6.006865968648485e-06,
      "loss": 0.0159,
      "step": 1232020
    },
    {
      "epoch": 2.016260481922979,
      "grad_norm": 0.6116904616355896,
      "learning_rate": 6.006800076434968e-06,
      "loss": 0.0155,
      "step": 1232040
    },
    {
      "epoch": 2.016293212361632,
      "grad_norm": 2.4466981887817383,
      "learning_rate": 6.006734184221451e-06,
      "loss": 0.0195,
      "step": 1232060
    },
    {
      "epoch": 2.0163259428002855,
      "grad_norm": 0.4174900949001312,
      "learning_rate": 6.006668292007934e-06,
      "loss": 0.0106,
      "step": 1232080
    },
    {
      "epoch": 2.0163586732389387,
      "grad_norm": 0.35618045926094055,
      "learning_rate": 6.006602399794417e-06,
      "loss": 0.0234,
      "step": 1232100
    },
    {
      "epoch": 2.0163914036775923,
      "grad_norm": 0.7670769095420837,
      "learning_rate": 6.0065365075809e-06,
      "loss": 0.0138,
      "step": 1232120
    },
    {
      "epoch": 2.0164241341162454,
      "grad_norm": 0.4723186492919922,
      "learning_rate": 6.0064706153673826e-06,
      "loss": 0.0185,
      "step": 1232140
    },
    {
      "epoch": 2.0164568645548986,
      "grad_norm": 0.16866618394851685,
      "learning_rate": 6.006404723153866e-06,
      "loss": 0.013,
      "step": 1232160
    },
    {
      "epoch": 2.016489594993552,
      "grad_norm": 0.5684670209884644,
      "learning_rate": 6.006338830940348e-06,
      "loss": 0.0117,
      "step": 1232180
    },
    {
      "epoch": 2.0165223254322053,
      "grad_norm": 0.9168916344642639,
      "learning_rate": 6.006272938726832e-06,
      "loss": 0.0114,
      "step": 1232200
    },
    {
      "epoch": 2.016555055870859,
      "grad_norm": 0.22269567847251892,
      "learning_rate": 6.0062070465133135e-06,
      "loss": 0.0181,
      "step": 1232220
    },
    {
      "epoch": 2.016587786309512,
      "grad_norm": 0.5149476528167725,
      "learning_rate": 6.006141154299797e-06,
      "loss": 0.0217,
      "step": 1232240
    },
    {
      "epoch": 2.0166205167481657,
      "grad_norm": 0.5501821041107178,
      "learning_rate": 6.00607526208628e-06,
      "loss": 0.0158,
      "step": 1232260
    },
    {
      "epoch": 2.016653247186819,
      "grad_norm": 0.086996890604496,
      "learning_rate": 6.006009369872763e-06,
      "loss": 0.0118,
      "step": 1232280
    },
    {
      "epoch": 2.016685977625472,
      "grad_norm": 0.805593729019165,
      "learning_rate": 6.005943477659245e-06,
      "loss": 0.0175,
      "step": 1232300
    },
    {
      "epoch": 2.0167187080641256,
      "grad_norm": 0.07162650674581528,
      "learning_rate": 6.005877585445729e-06,
      "loss": 0.0146,
      "step": 1232320
    },
    {
      "epoch": 2.0167514385027787,
      "grad_norm": 0.13340435922145844,
      "learning_rate": 6.005811693232211e-06,
      "loss": 0.0143,
      "step": 1232340
    },
    {
      "epoch": 2.0167841689414323,
      "grad_norm": 0.9150121212005615,
      "learning_rate": 6.005745801018694e-06,
      "loss": 0.018,
      "step": 1232360
    },
    {
      "epoch": 2.0168168993800855,
      "grad_norm": 0.25360336899757385,
      "learning_rate": 6.005679908805176e-06,
      "loss": 0.0156,
      "step": 1232380
    },
    {
      "epoch": 2.016849629818739,
      "grad_norm": 0.7614660263061523,
      "learning_rate": 6.00561401659166e-06,
      "loss": 0.0186,
      "step": 1232400
    },
    {
      "epoch": 2.016882360257392,
      "grad_norm": 0.0976380705833435,
      "learning_rate": 6.0055481243781435e-06,
      "loss": 0.0105,
      "step": 1232420
    },
    {
      "epoch": 2.0169150906960454,
      "grad_norm": 0.2407504767179489,
      "learning_rate": 6.005482232164625e-06,
      "loss": 0.0208,
      "step": 1232440
    },
    {
      "epoch": 2.016947821134699,
      "grad_norm": 4.993256568908691,
      "learning_rate": 6.005416339951109e-06,
      "loss": 0.0124,
      "step": 1232460
    },
    {
      "epoch": 2.016980551573352,
      "grad_norm": 0.34219205379486084,
      "learning_rate": 6.005350447737591e-06,
      "loss": 0.0194,
      "step": 1232480
    },
    {
      "epoch": 2.0170132820120057,
      "grad_norm": 0.6315392255783081,
      "learning_rate": 6.0052845555240744e-06,
      "loss": 0.0148,
      "step": 1232500
    },
    {
      "epoch": 2.017046012450659,
      "grad_norm": 0.16551785171031952,
      "learning_rate": 6.005218663310557e-06,
      "loss": 0.0133,
      "step": 1232520
    },
    {
      "epoch": 2.017078742889312,
      "grad_norm": 0.20808973908424377,
      "learning_rate": 6.00515277109704e-06,
      "loss": 0.0105,
      "step": 1232540
    },
    {
      "epoch": 2.0171114733279656,
      "grad_norm": 0.1502329707145691,
      "learning_rate": 6.005086878883523e-06,
      "loss": 0.0135,
      "step": 1232560
    },
    {
      "epoch": 2.0171442037666187,
      "grad_norm": 1.7864354848861694,
      "learning_rate": 6.005020986670006e-06,
      "loss": 0.0121,
      "step": 1232580
    },
    {
      "epoch": 2.0171769342052723,
      "grad_norm": 0.3302551209926605,
      "learning_rate": 6.004955094456488e-06,
      "loss": 0.0127,
      "step": 1232600
    },
    {
      "epoch": 2.0172096646439255,
      "grad_norm": 0.3535151779651642,
      "learning_rate": 6.004889202242972e-06,
      "loss": 0.0103,
      "step": 1232620
    },
    {
      "epoch": 2.017242395082579,
      "grad_norm": 0.24573595821857452,
      "learning_rate": 6.004823310029454e-06,
      "loss": 0.0172,
      "step": 1232640
    },
    {
      "epoch": 2.0172751255212322,
      "grad_norm": 0.13839127123355865,
      "learning_rate": 6.004757417815937e-06,
      "loss": 0.0105,
      "step": 1232660
    },
    {
      "epoch": 2.0173078559598854,
      "grad_norm": 0.41285911202430725,
      "learning_rate": 6.00469152560242e-06,
      "loss": 0.0154,
      "step": 1232680
    },
    {
      "epoch": 2.017340586398539,
      "grad_norm": 0.3766365647315979,
      "learning_rate": 6.004625633388903e-06,
      "loss": 0.0159,
      "step": 1232700
    },
    {
      "epoch": 2.017373316837192,
      "grad_norm": 0.677595853805542,
      "learning_rate": 6.004559741175385e-06,
      "loss": 0.0174,
      "step": 1232720
    },
    {
      "epoch": 2.0174060472758457,
      "grad_norm": 0.20854344964027405,
      "learning_rate": 6.004493848961869e-06,
      "loss": 0.0133,
      "step": 1232740
    },
    {
      "epoch": 2.017438777714499,
      "grad_norm": 0.34410977363586426,
      "learning_rate": 6.004427956748351e-06,
      "loss": 0.0158,
      "step": 1232760
    },
    {
      "epoch": 2.0174715081531525,
      "grad_norm": 0.38025277853012085,
      "learning_rate": 6.0043620645348345e-06,
      "loss": 0.0122,
      "step": 1232780
    },
    {
      "epoch": 2.0175042385918056,
      "grad_norm": 0.14622516930103302,
      "learning_rate": 6.004296172321318e-06,
      "loss": 0.012,
      "step": 1232800
    },
    {
      "epoch": 2.0175369690304588,
      "grad_norm": 0.32593244314193726,
      "learning_rate": 6.0042302801078e-06,
      "loss": 0.0117,
      "step": 1232820
    },
    {
      "epoch": 2.0175696994691124,
      "grad_norm": 0.4833448827266693,
      "learning_rate": 6.0041643878942835e-06,
      "loss": 0.0142,
      "step": 1232840
    },
    {
      "epoch": 2.0176024299077655,
      "grad_norm": 0.44699299335479736,
      "learning_rate": 6.0040984956807654e-06,
      "loss": 0.0149,
      "step": 1232860
    },
    {
      "epoch": 2.017635160346419,
      "grad_norm": 0.31868353486061096,
      "learning_rate": 6.004032603467249e-06,
      "loss": 0.0167,
      "step": 1232880
    },
    {
      "epoch": 2.0176678907850722,
      "grad_norm": 0.7261435389518738,
      "learning_rate": 6.003966711253732e-06,
      "loss": 0.0107,
      "step": 1232900
    },
    {
      "epoch": 2.017700621223726,
      "grad_norm": 0.30088409781455994,
      "learning_rate": 6.0039008190402145e-06,
      "loss": 0.0178,
      "step": 1232920
    },
    {
      "epoch": 2.017733351662379,
      "grad_norm": 0.6939902901649475,
      "learning_rate": 6.003834926826697e-06,
      "loss": 0.0179,
      "step": 1232940
    },
    {
      "epoch": 2.017766082101032,
      "grad_norm": 0.11820153146982193,
      "learning_rate": 6.003769034613181e-06,
      "loss": 0.0169,
      "step": 1232960
    },
    {
      "epoch": 2.0177988125396857,
      "grad_norm": 0.45165422558784485,
      "learning_rate": 6.003703142399663e-06,
      "loss": 0.0148,
      "step": 1232980
    },
    {
      "epoch": 2.017831542978339,
      "grad_norm": 0.10509856045246124,
      "learning_rate": 6.003637250186146e-06,
      "loss": 0.0135,
      "step": 1233000
    },
    {
      "epoch": 2.0178642734169925,
      "grad_norm": 0.4184247851371765,
      "learning_rate": 6.003571357972628e-06,
      "loss": 0.0169,
      "step": 1233020
    },
    {
      "epoch": 2.0178970038556456,
      "grad_norm": 0.08616049587726593,
      "learning_rate": 6.003505465759112e-06,
      "loss": 0.0161,
      "step": 1233040
    },
    {
      "epoch": 2.0179297342942992,
      "grad_norm": 0.47442755103111267,
      "learning_rate": 6.0034395735455945e-06,
      "loss": 0.0161,
      "step": 1233060
    },
    {
      "epoch": 2.0179624647329524,
      "grad_norm": 0.29846876859664917,
      "learning_rate": 6.003373681332077e-06,
      "loss": 0.017,
      "step": 1233080
    },
    {
      "epoch": 2.0179951951716055,
      "grad_norm": 0.6363793015480042,
      "learning_rate": 6.00330778911856e-06,
      "loss": 0.0136,
      "step": 1233100
    },
    {
      "epoch": 2.018027925610259,
      "grad_norm": 0.29730695486068726,
      "learning_rate": 6.003241896905044e-06,
      "loss": 0.0157,
      "step": 1233120
    },
    {
      "epoch": 2.0180606560489123,
      "grad_norm": 0.32073429226875305,
      "learning_rate": 6.003176004691526e-06,
      "loss": 0.0168,
      "step": 1233140
    },
    {
      "epoch": 2.018093386487566,
      "grad_norm": 0.3824899196624756,
      "learning_rate": 6.003110112478009e-06,
      "loss": 0.0146,
      "step": 1233160
    },
    {
      "epoch": 2.018126116926219,
      "grad_norm": 0.4571356475353241,
      "learning_rate": 6.003044220264493e-06,
      "loss": 0.01,
      "step": 1233180
    },
    {
      "epoch": 2.018158847364872,
      "grad_norm": 4.444809436798096,
      "learning_rate": 6.0029783280509746e-06,
      "loss": 0.019,
      "step": 1233200
    },
    {
      "epoch": 2.0181915778035258,
      "grad_norm": 0.09264741092920303,
      "learning_rate": 6.002912435837458e-06,
      "loss": 0.0195,
      "step": 1233220
    },
    {
      "epoch": 2.018224308242179,
      "grad_norm": 0.1800021082162857,
      "learning_rate": 6.00284654362394e-06,
      "loss": 0.0159,
      "step": 1233240
    },
    {
      "epoch": 2.0182570386808325,
      "grad_norm": 0.3652082085609436,
      "learning_rate": 6.002780651410424e-06,
      "loss": 0.0151,
      "step": 1233260
    },
    {
      "epoch": 2.0182897691194857,
      "grad_norm": 0.6774224638938904,
      "learning_rate": 6.002714759196906e-06,
      "loss": 0.0133,
      "step": 1233280
    },
    {
      "epoch": 2.0183224995581392,
      "grad_norm": 0.16118209064006805,
      "learning_rate": 6.002648866983389e-06,
      "loss": 0.0113,
      "step": 1233300
    },
    {
      "epoch": 2.0183552299967924,
      "grad_norm": 0.3097885549068451,
      "learning_rate": 6.002582974769872e-06,
      "loss": 0.0149,
      "step": 1233320
    },
    {
      "epoch": 2.0183879604354455,
      "grad_norm": 0.23219265043735504,
      "learning_rate": 6.0025170825563554e-06,
      "loss": 0.0183,
      "step": 1233340
    },
    {
      "epoch": 2.018420690874099,
      "grad_norm": 0.3974376916885376,
      "learning_rate": 6.002451190342837e-06,
      "loss": 0.0092,
      "step": 1233360
    },
    {
      "epoch": 2.0184534213127523,
      "grad_norm": 0.18946775794029236,
      "learning_rate": 6.002385298129321e-06,
      "loss": 0.0114,
      "step": 1233380
    },
    {
      "epoch": 2.018486151751406,
      "grad_norm": 0.21606655418872833,
      "learning_rate": 6.002319405915803e-06,
      "loss": 0.0188,
      "step": 1233400
    },
    {
      "epoch": 2.018518882190059,
      "grad_norm": 0.3691960275173187,
      "learning_rate": 6.002253513702286e-06,
      "loss": 0.0128,
      "step": 1233420
    },
    {
      "epoch": 2.0185516126287126,
      "grad_norm": 0.6379034519195557,
      "learning_rate": 6.002187621488768e-06,
      "loss": 0.0134,
      "step": 1233440
    },
    {
      "epoch": 2.018584343067366,
      "grad_norm": 0.2052142322063446,
      "learning_rate": 6.002121729275252e-06,
      "loss": 0.0114,
      "step": 1233460
    },
    {
      "epoch": 2.018617073506019,
      "grad_norm": 0.7238366007804871,
      "learning_rate": 6.0020558370617355e-06,
      "loss": 0.0179,
      "step": 1233480
    },
    {
      "epoch": 2.0186498039446725,
      "grad_norm": 0.14134643971920013,
      "learning_rate": 6.001989944848217e-06,
      "loss": 0.0137,
      "step": 1233500
    },
    {
      "epoch": 2.0186825343833257,
      "grad_norm": 0.5206947922706604,
      "learning_rate": 6.001924052634701e-06,
      "loss": 0.0146,
      "step": 1233520
    },
    {
      "epoch": 2.0187152648219793,
      "grad_norm": 0.51474928855896,
      "learning_rate": 6.001858160421184e-06,
      "loss": 0.0183,
      "step": 1233540
    },
    {
      "epoch": 2.0187479952606324,
      "grad_norm": 0.3235952854156494,
      "learning_rate": 6.001792268207666e-06,
      "loss": 0.0139,
      "step": 1233560
    },
    {
      "epoch": 2.018780725699286,
      "grad_norm": 0.26297760009765625,
      "learning_rate": 6.001726375994149e-06,
      "loss": 0.014,
      "step": 1233580
    },
    {
      "epoch": 2.018813456137939,
      "grad_norm": 0.22749552130699158,
      "learning_rate": 6.001660483780633e-06,
      "loss": 0.0226,
      "step": 1233600
    },
    {
      "epoch": 2.0188461865765923,
      "grad_norm": 0.09126956015825272,
      "learning_rate": 6.001594591567115e-06,
      "loss": 0.0087,
      "step": 1233620
    },
    {
      "epoch": 2.018878917015246,
      "grad_norm": 0.27272525429725647,
      "learning_rate": 6.001528699353598e-06,
      "loss": 0.0187,
      "step": 1233640
    },
    {
      "epoch": 2.018911647453899,
      "grad_norm": 0.11799035221338272,
      "learning_rate": 6.00146280714008e-06,
      "loss": 0.0138,
      "step": 1233660
    },
    {
      "epoch": 2.0189443778925527,
      "grad_norm": 0.09756962954998016,
      "learning_rate": 6.001396914926564e-06,
      "loss": 0.0162,
      "step": 1233680
    },
    {
      "epoch": 2.018977108331206,
      "grad_norm": 0.4684720039367676,
      "learning_rate": 6.0013310227130464e-06,
      "loss": 0.0139,
      "step": 1233700
    },
    {
      "epoch": 2.0190098387698594,
      "grad_norm": 0.6610854864120483,
      "learning_rate": 6.001265130499529e-06,
      "loss": 0.0173,
      "step": 1233720
    },
    {
      "epoch": 2.0190425692085125,
      "grad_norm": 0.28385013341903687,
      "learning_rate": 6.001199238286012e-06,
      "loss": 0.0184,
      "step": 1233740
    },
    {
      "epoch": 2.0190752996471657,
      "grad_norm": 0.40518292784690857,
      "learning_rate": 6.0011333460724955e-06,
      "loss": 0.0128,
      "step": 1233760
    },
    {
      "epoch": 2.0191080300858193,
      "grad_norm": 0.05307827144861221,
      "learning_rate": 6.001067453858977e-06,
      "loss": 0.0185,
      "step": 1233780
    },
    {
      "epoch": 2.0191407605244724,
      "grad_norm": 0.25878387689590454,
      "learning_rate": 6.001001561645461e-06,
      "loss": 0.0165,
      "step": 1233800
    },
    {
      "epoch": 2.019173490963126,
      "grad_norm": 0.6820731163024902,
      "learning_rate": 6.000935669431943e-06,
      "loss": 0.0206,
      "step": 1233820
    },
    {
      "epoch": 2.019206221401779,
      "grad_norm": 0.7917588353157043,
      "learning_rate": 6.0008697772184265e-06,
      "loss": 0.0177,
      "step": 1233840
    },
    {
      "epoch": 2.019238951840433,
      "grad_norm": 1.0349088907241821,
      "learning_rate": 6.00080388500491e-06,
      "loss": 0.0154,
      "step": 1233860
    },
    {
      "epoch": 2.019271682279086,
      "grad_norm": 0.3150671720504761,
      "learning_rate": 6.000737992791392e-06,
      "loss": 0.0168,
      "step": 1233880
    },
    {
      "epoch": 2.019304412717739,
      "grad_norm": 0.24620692431926727,
      "learning_rate": 6.0006721005778755e-06,
      "loss": 0.0091,
      "step": 1233900
    },
    {
      "epoch": 2.0193371431563927,
      "grad_norm": 0.36210617423057556,
      "learning_rate": 6.000606208364358e-06,
      "loss": 0.015,
      "step": 1233920
    },
    {
      "epoch": 2.019369873595046,
      "grad_norm": 0.8785460591316223,
      "learning_rate": 6.000540316150841e-06,
      "loss": 0.0161,
      "step": 1233940
    },
    {
      "epoch": 2.0194026040336994,
      "grad_norm": 0.6047940254211426,
      "learning_rate": 6.000474423937324e-06,
      "loss": 0.0209,
      "step": 1233960
    },
    {
      "epoch": 2.0194353344723526,
      "grad_norm": 0.5693150758743286,
      "learning_rate": 6.000408531723807e-06,
      "loss": 0.019,
      "step": 1233980
    },
    {
      "epoch": 2.0194680649110057,
      "grad_norm": 0.19066154956817627,
      "learning_rate": 6.000342639510289e-06,
      "loss": 0.0084,
      "step": 1234000
    },
    {
      "epoch": 2.0195007953496593,
      "grad_norm": 0.2021198570728302,
      "learning_rate": 6.000276747296773e-06,
      "loss": 0.0218,
      "step": 1234020
    },
    {
      "epoch": 2.0195335257883125,
      "grad_norm": 0.19393578171730042,
      "learning_rate": 6.000210855083255e-06,
      "loss": 0.0145,
      "step": 1234040
    },
    {
      "epoch": 2.019566256226966,
      "grad_norm": 1.031660556793213,
      "learning_rate": 6.000144962869738e-06,
      "loss": 0.0142,
      "step": 1234060
    },
    {
      "epoch": 2.019598986665619,
      "grad_norm": 0.3944435715675354,
      "learning_rate": 6.000079070656221e-06,
      "loss": 0.019,
      "step": 1234080
    },
    {
      "epoch": 2.019631717104273,
      "grad_norm": 0.6177664399147034,
      "learning_rate": 6.000013178442704e-06,
      "loss": 0.0128,
      "step": 1234100
    },
    {
      "epoch": 2.019664447542926,
      "grad_norm": 0.2789304256439209,
      "learning_rate": 5.9999472862291865e-06,
      "loss": 0.0156,
      "step": 1234120
    },
    {
      "epoch": 2.019697177981579,
      "grad_norm": 1.0444248914718628,
      "learning_rate": 5.99988139401567e-06,
      "loss": 0.0146,
      "step": 1234140
    },
    {
      "epoch": 2.0197299084202327,
      "grad_norm": 0.46891018748283386,
      "learning_rate": 5.999815501802152e-06,
      "loss": 0.0151,
      "step": 1234160
    },
    {
      "epoch": 2.019762638858886,
      "grad_norm": 0.10393763333559036,
      "learning_rate": 5.999749609588636e-06,
      "loss": 0.0151,
      "step": 1234180
    },
    {
      "epoch": 2.0197953692975394,
      "grad_norm": 0.6370909810066223,
      "learning_rate": 5.999683717375119e-06,
      "loss": 0.0139,
      "step": 1234200
    },
    {
      "epoch": 2.0198280997361926,
      "grad_norm": 0.20457710325717926,
      "learning_rate": 5.999617825161601e-06,
      "loss": 0.0198,
      "step": 1234220
    },
    {
      "epoch": 2.019860830174846,
      "grad_norm": 0.16324123740196228,
      "learning_rate": 5.999551932948085e-06,
      "loss": 0.0159,
      "step": 1234240
    },
    {
      "epoch": 2.0198935606134993,
      "grad_norm": 0.4393872022628784,
      "learning_rate": 5.9994860407345665e-06,
      "loss": 0.0124,
      "step": 1234260
    },
    {
      "epoch": 2.0199262910521525,
      "grad_norm": 0.47870779037475586,
      "learning_rate": 5.99942014852105e-06,
      "loss": 0.0248,
      "step": 1234280
    },
    {
      "epoch": 2.019959021490806,
      "grad_norm": 0.5021301507949829,
      "learning_rate": 5.999354256307532e-06,
      "loss": 0.0118,
      "step": 1234300
    },
    {
      "epoch": 2.0199917519294592,
      "grad_norm": 0.7147089838981628,
      "learning_rate": 5.999288364094016e-06,
      "loss": 0.0202,
      "step": 1234320
    },
    {
      "epoch": 2.020024482368113,
      "grad_norm": 0.4570344388484955,
      "learning_rate": 5.999222471880498e-06,
      "loss": 0.0136,
      "step": 1234340
    },
    {
      "epoch": 2.020057212806766,
      "grad_norm": 0.31819289922714233,
      "learning_rate": 5.999156579666981e-06,
      "loss": 0.0153,
      "step": 1234360
    },
    {
      "epoch": 2.0200899432454196,
      "grad_norm": 0.41204404830932617,
      "learning_rate": 5.999090687453464e-06,
      "loss": 0.0147,
      "step": 1234380
    },
    {
      "epoch": 2.0201226736840727,
      "grad_norm": 0.32386231422424316,
      "learning_rate": 5.999024795239947e-06,
      "loss": 0.0155,
      "step": 1234400
    },
    {
      "epoch": 2.020155404122726,
      "grad_norm": 0.7935214042663574,
      "learning_rate": 5.998958903026429e-06,
      "loss": 0.0161,
      "step": 1234420
    },
    {
      "epoch": 2.0201881345613795,
      "grad_norm": 0.36141231656074524,
      "learning_rate": 5.998893010812913e-06,
      "loss": 0.013,
      "step": 1234440
    },
    {
      "epoch": 2.0202208650000326,
      "grad_norm": 0.14598925411701202,
      "learning_rate": 5.998827118599395e-06,
      "loss": 0.0178,
      "step": 1234460
    },
    {
      "epoch": 2.020253595438686,
      "grad_norm": 0.6219468116760254,
      "learning_rate": 5.998761226385878e-06,
      "loss": 0.0173,
      "step": 1234480
    },
    {
      "epoch": 2.0202863258773394,
      "grad_norm": 0.4889845550060272,
      "learning_rate": 5.998695334172361e-06,
      "loss": 0.013,
      "step": 1234500
    },
    {
      "epoch": 2.020319056315993,
      "grad_norm": 0.5161170959472656,
      "learning_rate": 5.998629441958844e-06,
      "loss": 0.0191,
      "step": 1234520
    },
    {
      "epoch": 2.020351786754646,
      "grad_norm": 0.8223476409912109,
      "learning_rate": 5.9985635497453274e-06,
      "loss": 0.011,
      "step": 1234540
    },
    {
      "epoch": 2.0203845171932993,
      "grad_norm": 0.9487639665603638,
      "learning_rate": 5.99849765753181e-06,
      "loss": 0.0171,
      "step": 1234560
    },
    {
      "epoch": 2.020417247631953,
      "grad_norm": 0.47731927037239075,
      "learning_rate": 5.998431765318293e-06,
      "loss": 0.0085,
      "step": 1234580
    },
    {
      "epoch": 2.020449978070606,
      "grad_norm": 0.8223890066146851,
      "learning_rate": 5.998365873104776e-06,
      "loss": 0.0155,
      "step": 1234600
    },
    {
      "epoch": 2.0204827085092596,
      "grad_norm": 0.47182729840278625,
      "learning_rate": 5.998299980891259e-06,
      "loss": 0.0133,
      "step": 1234620
    },
    {
      "epoch": 2.0205154389479127,
      "grad_norm": 0.12763623893260956,
      "learning_rate": 5.998234088677741e-06,
      "loss": 0.0226,
      "step": 1234640
    },
    {
      "epoch": 2.020548169386566,
      "grad_norm": 0.337029367685318,
      "learning_rate": 5.998168196464225e-06,
      "loss": 0.0152,
      "step": 1234660
    },
    {
      "epoch": 2.0205808998252195,
      "grad_norm": 0.2213725745677948,
      "learning_rate": 5.998102304250707e-06,
      "loss": 0.0115,
      "step": 1234680
    },
    {
      "epoch": 2.0206136302638726,
      "grad_norm": 0.2912807762622833,
      "learning_rate": 5.99803641203719e-06,
      "loss": 0.0338,
      "step": 1234700
    },
    {
      "epoch": 2.0206463607025262,
      "grad_norm": 0.1369168758392334,
      "learning_rate": 5.997970519823673e-06,
      "loss": 0.0124,
      "step": 1234720
    },
    {
      "epoch": 2.0206790911411794,
      "grad_norm": 0.30508580803871155,
      "learning_rate": 5.997904627610156e-06,
      "loss": 0.0141,
      "step": 1234740
    },
    {
      "epoch": 2.020711821579833,
      "grad_norm": 1.4379743337631226,
      "learning_rate": 5.997838735396638e-06,
      "loss": 0.0205,
      "step": 1234760
    },
    {
      "epoch": 2.020744552018486,
      "grad_norm": 0.47566312551498413,
      "learning_rate": 5.997772843183122e-06,
      "loss": 0.0134,
      "step": 1234780
    },
    {
      "epoch": 2.0207772824571393,
      "grad_norm": 0.1536950021982193,
      "learning_rate": 5.997706950969604e-06,
      "loss": 0.0175,
      "step": 1234800
    },
    {
      "epoch": 2.020810012895793,
      "grad_norm": 0.12988974153995514,
      "learning_rate": 5.9976410587560875e-06,
      "loss": 0.0195,
      "step": 1234820
    },
    {
      "epoch": 2.020842743334446,
      "grad_norm": 0.11055750399827957,
      "learning_rate": 5.997575166542569e-06,
      "loss": 0.0139,
      "step": 1234840
    },
    {
      "epoch": 2.0208754737730996,
      "grad_norm": 0.9893559217453003,
      "learning_rate": 5.997509274329053e-06,
      "loss": 0.0185,
      "step": 1234860
    },
    {
      "epoch": 2.0209082042117528,
      "grad_norm": 0.31533101201057434,
      "learning_rate": 5.9974433821155366e-06,
      "loss": 0.0168,
      "step": 1234880
    },
    {
      "epoch": 2.0209409346504064,
      "grad_norm": 0.3669735789299011,
      "learning_rate": 5.9973774899020184e-06,
      "loss": 0.0144,
      "step": 1234900
    },
    {
      "epoch": 2.0209736650890595,
      "grad_norm": 0.2508839964866638,
      "learning_rate": 5.997311597688502e-06,
      "loss": 0.0147,
      "step": 1234920
    },
    {
      "epoch": 2.0210063955277127,
      "grad_norm": 0.22989363968372345,
      "learning_rate": 5.997245705474985e-06,
      "loss": 0.0152,
      "step": 1234940
    },
    {
      "epoch": 2.0210391259663663,
      "grad_norm": 0.12451327592134476,
      "learning_rate": 5.9971798132614675e-06,
      "loss": 0.013,
      "step": 1234960
    },
    {
      "epoch": 2.0210718564050194,
      "grad_norm": 0.09372711926698685,
      "learning_rate": 5.99711392104795e-06,
      "loss": 0.0098,
      "step": 1234980
    },
    {
      "epoch": 2.021104586843673,
      "grad_norm": 0.8432716727256775,
      "learning_rate": 5.997048028834434e-06,
      "loss": 0.0204,
      "step": 1235000
    },
    {
      "epoch": 2.021137317282326,
      "grad_norm": 0.38558661937713623,
      "learning_rate": 5.996982136620916e-06,
      "loss": 0.0112,
      "step": 1235020
    },
    {
      "epoch": 2.0211700477209797,
      "grad_norm": 0.4217047393321991,
      "learning_rate": 5.996916244407399e-06,
      "loss": 0.0131,
      "step": 1235040
    },
    {
      "epoch": 2.021202778159633,
      "grad_norm": 0.5294826030731201,
      "learning_rate": 5.996850352193881e-06,
      "loss": 0.0125,
      "step": 1235060
    },
    {
      "epoch": 2.021235508598286,
      "grad_norm": 0.2919671833515167,
      "learning_rate": 5.996784459980365e-06,
      "loss": 0.0154,
      "step": 1235080
    },
    {
      "epoch": 2.0212682390369396,
      "grad_norm": 0.5646304488182068,
      "learning_rate": 5.9967185677668475e-06,
      "loss": 0.0097,
      "step": 1235100
    },
    {
      "epoch": 2.021300969475593,
      "grad_norm": 0.7449913620948792,
      "learning_rate": 5.99665267555333e-06,
      "loss": 0.0171,
      "step": 1235120
    },
    {
      "epoch": 2.0213336999142464,
      "grad_norm": 1.000484585762024,
      "learning_rate": 5.996586783339813e-06,
      "loss": 0.013,
      "step": 1235140
    },
    {
      "epoch": 2.0213664303528995,
      "grad_norm": 0.2823479473590851,
      "learning_rate": 5.996520891126297e-06,
      "loss": 0.0133,
      "step": 1235160
    },
    {
      "epoch": 2.021399160791553,
      "grad_norm": 0.23714281618595123,
      "learning_rate": 5.9964549989127785e-06,
      "loss": 0.0148,
      "step": 1235180
    },
    {
      "epoch": 2.0214318912302063,
      "grad_norm": 0.33640384674072266,
      "learning_rate": 5.996389106699262e-06,
      "loss": 0.0095,
      "step": 1235200
    },
    {
      "epoch": 2.0214646216688594,
      "grad_norm": 0.7775502800941467,
      "learning_rate": 5.996323214485744e-06,
      "loss": 0.0165,
      "step": 1235220
    },
    {
      "epoch": 2.021497352107513,
      "grad_norm": 1.4733959436416626,
      "learning_rate": 5.9962573222722276e-06,
      "loss": 0.02,
      "step": 1235240
    },
    {
      "epoch": 2.021530082546166,
      "grad_norm": 0.24104171991348267,
      "learning_rate": 5.996191430058711e-06,
      "loss": 0.0121,
      "step": 1235260
    },
    {
      "epoch": 2.0215628129848198,
      "grad_norm": 0.5156314373016357,
      "learning_rate": 5.996125537845193e-06,
      "loss": 0.0085,
      "step": 1235280
    },
    {
      "epoch": 2.021595543423473,
      "grad_norm": 0.09698706120252609,
      "learning_rate": 5.996059645631677e-06,
      "loss": 0.0098,
      "step": 1235300
    },
    {
      "epoch": 2.0216282738621265,
      "grad_norm": 0.8490419387817383,
      "learning_rate": 5.9959937534181585e-06,
      "loss": 0.0125,
      "step": 1235320
    },
    {
      "epoch": 2.0216610043007797,
      "grad_norm": 1.705865740776062,
      "learning_rate": 5.995927861204642e-06,
      "loss": 0.0183,
      "step": 1235340
    },
    {
      "epoch": 2.021693734739433,
      "grad_norm": 0.18228253722190857,
      "learning_rate": 5.995861968991125e-06,
      "loss": 0.0142,
      "step": 1235360
    },
    {
      "epoch": 2.0217264651780864,
      "grad_norm": 0.2918420732021332,
      "learning_rate": 5.995796076777608e-06,
      "loss": 0.0138,
      "step": 1235380
    },
    {
      "epoch": 2.0217591956167396,
      "grad_norm": 0.6741676926612854,
      "learning_rate": 5.99573018456409e-06,
      "loss": 0.0173,
      "step": 1235400
    },
    {
      "epoch": 2.021791926055393,
      "grad_norm": 0.36434659361839294,
      "learning_rate": 5.995664292350574e-06,
      "loss": 0.0153,
      "step": 1235420
    },
    {
      "epoch": 2.0218246564940463,
      "grad_norm": 0.48395469784736633,
      "learning_rate": 5.995598400137056e-06,
      "loss": 0.0137,
      "step": 1235440
    },
    {
      "epoch": 2.0218573869326995,
      "grad_norm": 0.3363417088985443,
      "learning_rate": 5.995532507923539e-06,
      "loss": 0.0153,
      "step": 1235460
    },
    {
      "epoch": 2.021890117371353,
      "grad_norm": 0.3529159128665924,
      "learning_rate": 5.995466615710021e-06,
      "loss": 0.0126,
      "step": 1235480
    },
    {
      "epoch": 2.021922847810006,
      "grad_norm": 0.8515915274620056,
      "learning_rate": 5.995400723496505e-06,
      "loss": 0.019,
      "step": 1235500
    },
    {
      "epoch": 2.02195557824866,
      "grad_norm": 0.30461055040359497,
      "learning_rate": 5.995334831282988e-06,
      "loss": 0.0116,
      "step": 1235520
    },
    {
      "epoch": 2.021988308687313,
      "grad_norm": 0.10097700357437134,
      "learning_rate": 5.99526893906947e-06,
      "loss": 0.0125,
      "step": 1235540
    },
    {
      "epoch": 2.0220210391259665,
      "grad_norm": 0.11380551010370255,
      "learning_rate": 5.995203046855953e-06,
      "loss": 0.0127,
      "step": 1235560
    },
    {
      "epoch": 2.0220537695646197,
      "grad_norm": 0.9073804616928101,
      "learning_rate": 5.995137154642437e-06,
      "loss": 0.0182,
      "step": 1235580
    },
    {
      "epoch": 2.022086500003273,
      "grad_norm": 0.16224586963653564,
      "learning_rate": 5.995071262428919e-06,
      "loss": 0.0201,
      "step": 1235600
    },
    {
      "epoch": 2.0221192304419264,
      "grad_norm": 0.21046695113182068,
      "learning_rate": 5.995005370215402e-06,
      "loss": 0.0156,
      "step": 1235620
    },
    {
      "epoch": 2.0221519608805796,
      "grad_norm": 0.21884708106517792,
      "learning_rate": 5.994939478001886e-06,
      "loss": 0.0161,
      "step": 1235640
    },
    {
      "epoch": 2.022184691319233,
      "grad_norm": 0.3639829158782959,
      "learning_rate": 5.994873585788368e-06,
      "loss": 0.0135,
      "step": 1235660
    },
    {
      "epoch": 2.0222174217578863,
      "grad_norm": 0.1926104575395584,
      "learning_rate": 5.994807693574851e-06,
      "loss": 0.0131,
      "step": 1235680
    },
    {
      "epoch": 2.02225015219654,
      "grad_norm": 0.6613208055496216,
      "learning_rate": 5.994741801361333e-06,
      "loss": 0.0128,
      "step": 1235700
    },
    {
      "epoch": 2.022282882635193,
      "grad_norm": 0.3598824441432953,
      "learning_rate": 5.994675909147817e-06,
      "loss": 0.0146,
      "step": 1235720
    },
    {
      "epoch": 2.022315613073846,
      "grad_norm": 0.8305769562721252,
      "learning_rate": 5.9946100169342994e-06,
      "loss": 0.0168,
      "step": 1235740
    },
    {
      "epoch": 2.0223483435125,
      "grad_norm": 0.6236466765403748,
      "learning_rate": 5.994544124720782e-06,
      "loss": 0.013,
      "step": 1235760
    },
    {
      "epoch": 2.022381073951153,
      "grad_norm": 0.30900925397872925,
      "learning_rate": 5.994478232507265e-06,
      "loss": 0.0107,
      "step": 1235780
    },
    {
      "epoch": 2.0224138043898066,
      "grad_norm": 1.1501245498657227,
      "learning_rate": 5.9944123402937485e-06,
      "loss": 0.0188,
      "step": 1235800
    },
    {
      "epoch": 2.0224465348284597,
      "grad_norm": 0.15473610162734985,
      "learning_rate": 5.99434644808023e-06,
      "loss": 0.0199,
      "step": 1235820
    },
    {
      "epoch": 2.0224792652671133,
      "grad_norm": 1.223767638206482,
      "learning_rate": 5.994280555866714e-06,
      "loss": 0.0144,
      "step": 1235840
    },
    {
      "epoch": 2.0225119957057665,
      "grad_norm": 1.5414689779281616,
      "learning_rate": 5.994214663653196e-06,
      "loss": 0.0162,
      "step": 1235860
    },
    {
      "epoch": 2.0225447261444196,
      "grad_norm": 0.4845559298992157,
      "learning_rate": 5.9941487714396795e-06,
      "loss": 0.0104,
      "step": 1235880
    },
    {
      "epoch": 2.022577456583073,
      "grad_norm": 0.404329776763916,
      "learning_rate": 5.994082879226162e-06,
      "loss": 0.0104,
      "step": 1235900
    },
    {
      "epoch": 2.0226101870217263,
      "grad_norm": 0.29556792974472046,
      "learning_rate": 5.994016987012645e-06,
      "loss": 0.0144,
      "step": 1235920
    },
    {
      "epoch": 2.02264291746038,
      "grad_norm": 1.375569462776184,
      "learning_rate": 5.9939510947991285e-06,
      "loss": 0.0127,
      "step": 1235940
    },
    {
      "epoch": 2.022675647899033,
      "grad_norm": 0.330940306186676,
      "learning_rate": 5.993885202585611e-06,
      "loss": 0.0153,
      "step": 1235960
    },
    {
      "epoch": 2.0227083783376867,
      "grad_norm": 0.25158727169036865,
      "learning_rate": 5.993819310372094e-06,
      "loss": 0.0168,
      "step": 1235980
    },
    {
      "epoch": 2.02274110877634,
      "grad_norm": 0.34532850980758667,
      "learning_rate": 5.993753418158577e-06,
      "loss": 0.0148,
      "step": 1236000
    },
    {
      "epoch": 2.022773839214993,
      "grad_norm": 0.2849656641483307,
      "learning_rate": 5.99368752594506e-06,
      "loss": 0.0192,
      "step": 1236020
    },
    {
      "epoch": 2.0228065696536466,
      "grad_norm": 0.24157613515853882,
      "learning_rate": 5.993621633731542e-06,
      "loss": 0.0233,
      "step": 1236040
    },
    {
      "epoch": 2.0228393000922997,
      "grad_norm": 0.24245138466358185,
      "learning_rate": 5.993555741518026e-06,
      "loss": 0.0144,
      "step": 1236060
    },
    {
      "epoch": 2.0228720305309533,
      "grad_norm": 0.753939151763916,
      "learning_rate": 5.993489849304508e-06,
      "loss": 0.0115,
      "step": 1236080
    },
    {
      "epoch": 2.0229047609696065,
      "grad_norm": 0.6710662245750427,
      "learning_rate": 5.993423957090991e-06,
      "loss": 0.0101,
      "step": 1236100
    },
    {
      "epoch": 2.02293749140826,
      "grad_norm": 1.040502905845642,
      "learning_rate": 5.993358064877474e-06,
      "loss": 0.0139,
      "step": 1236120
    },
    {
      "epoch": 2.022970221846913,
      "grad_norm": 0.33175384998321533,
      "learning_rate": 5.993292172663957e-06,
      "loss": 0.0164,
      "step": 1236140
    },
    {
      "epoch": 2.0230029522855664,
      "grad_norm": 0.4951630234718323,
      "learning_rate": 5.9932262804504395e-06,
      "loss": 0.0207,
      "step": 1236160
    },
    {
      "epoch": 2.02303568272422,
      "grad_norm": 0.4459571838378906,
      "learning_rate": 5.993160388236923e-06,
      "loss": 0.0135,
      "step": 1236180
    },
    {
      "epoch": 2.023068413162873,
      "grad_norm": 0.7224324345588684,
      "learning_rate": 5.993094496023405e-06,
      "loss": 0.0189,
      "step": 1236200
    },
    {
      "epoch": 2.0231011436015267,
      "grad_norm": 0.06966996192932129,
      "learning_rate": 5.993028603809889e-06,
      "loss": 0.014,
      "step": 1236220
    },
    {
      "epoch": 2.02313387404018,
      "grad_norm": 0.4985744059085846,
      "learning_rate": 5.9929627115963705e-06,
      "loss": 0.0197,
      "step": 1236240
    },
    {
      "epoch": 2.023166604478833,
      "grad_norm": 1.0898404121398926,
      "learning_rate": 5.992896819382854e-06,
      "loss": 0.0189,
      "step": 1236260
    },
    {
      "epoch": 2.0231993349174866,
      "grad_norm": 0.7103421092033386,
      "learning_rate": 5.992830927169336e-06,
      "loss": 0.0211,
      "step": 1236280
    },
    {
      "epoch": 2.0232320653561398,
      "grad_norm": 0.4129970669746399,
      "learning_rate": 5.9927650349558195e-06,
      "loss": 0.0127,
      "step": 1236300
    },
    {
      "epoch": 2.0232647957947933,
      "grad_norm": 0.2872585952281952,
      "learning_rate": 5.992699142742303e-06,
      "loss": 0.0175,
      "step": 1236320
    },
    {
      "epoch": 2.0232975262334465,
      "grad_norm": 0.17028196156024933,
      "learning_rate": 5.992633250528785e-06,
      "loss": 0.0167,
      "step": 1236340
    },
    {
      "epoch": 2.0233302566721,
      "grad_norm": 0.5620770454406738,
      "learning_rate": 5.992567358315269e-06,
      "loss": 0.0127,
      "step": 1236360
    },
    {
      "epoch": 2.0233629871107532,
      "grad_norm": 0.4002014398574829,
      "learning_rate": 5.992501466101751e-06,
      "loss": 0.015,
      "step": 1236380
    },
    {
      "epoch": 2.0233957175494064,
      "grad_norm": 0.5086123943328857,
      "learning_rate": 5.992435573888234e-06,
      "loss": 0.0121,
      "step": 1236400
    },
    {
      "epoch": 2.02342844798806,
      "grad_norm": 0.5547686219215393,
      "learning_rate": 5.992369681674717e-06,
      "loss": 0.0121,
      "step": 1236420
    },
    {
      "epoch": 2.023461178426713,
      "grad_norm": 0.507397472858429,
      "learning_rate": 5.9923037894612e-06,
      "loss": 0.0175,
      "step": 1236440
    },
    {
      "epoch": 2.0234939088653667,
      "grad_norm": 0.23454758524894714,
      "learning_rate": 5.992237897247682e-06,
      "loss": 0.0145,
      "step": 1236460
    },
    {
      "epoch": 2.02352663930402,
      "grad_norm": 0.15696075558662415,
      "learning_rate": 5.992172005034166e-06,
      "loss": 0.0135,
      "step": 1236480
    },
    {
      "epoch": 2.0235593697426735,
      "grad_norm": 0.2281370460987091,
      "learning_rate": 5.992106112820648e-06,
      "loss": 0.0163,
      "step": 1236500
    },
    {
      "epoch": 2.0235921001813266,
      "grad_norm": 0.6179180145263672,
      "learning_rate": 5.992040220607131e-06,
      "loss": 0.0151,
      "step": 1236520
    },
    {
      "epoch": 2.0236248306199798,
      "grad_norm": 0.9336678385734558,
      "learning_rate": 5.991974328393614e-06,
      "loss": 0.0168,
      "step": 1236540
    },
    {
      "epoch": 2.0236575610586334,
      "grad_norm": 0.2641502916812897,
      "learning_rate": 5.991908436180097e-06,
      "loss": 0.0173,
      "step": 1236560
    },
    {
      "epoch": 2.0236902914972865,
      "grad_norm": 0.1827910989522934,
      "learning_rate": 5.99184254396658e-06,
      "loss": 0.0207,
      "step": 1236580
    },
    {
      "epoch": 2.02372302193594,
      "grad_norm": 0.6127064824104309,
      "learning_rate": 5.991776651753063e-06,
      "loss": 0.0159,
      "step": 1236600
    },
    {
      "epoch": 2.0237557523745933,
      "grad_norm": 0.39768069982528687,
      "learning_rate": 5.991710759539545e-06,
      "loss": 0.0142,
      "step": 1236620
    },
    {
      "epoch": 2.023788482813247,
      "grad_norm": 0.22973549365997314,
      "learning_rate": 5.991644867326029e-06,
      "loss": 0.016,
      "step": 1236640
    },
    {
      "epoch": 2.0238212132519,
      "grad_norm": 0.5253748297691345,
      "learning_rate": 5.991578975112512e-06,
      "loss": 0.0144,
      "step": 1236660
    },
    {
      "epoch": 2.023853943690553,
      "grad_norm": 0.32207271456718445,
      "learning_rate": 5.991513082898994e-06,
      "loss": 0.0163,
      "step": 1236680
    },
    {
      "epoch": 2.0238866741292068,
      "grad_norm": 0.5149385929107666,
      "learning_rate": 5.991447190685478e-06,
      "loss": 0.0139,
      "step": 1236700
    },
    {
      "epoch": 2.02391940456786,
      "grad_norm": 0.7669569253921509,
      "learning_rate": 5.99138129847196e-06,
      "loss": 0.01,
      "step": 1236720
    },
    {
      "epoch": 2.0239521350065135,
      "grad_norm": 0.31602683663368225,
      "learning_rate": 5.991315406258443e-06,
      "loss": 0.0126,
      "step": 1236740
    },
    {
      "epoch": 2.0239848654451666,
      "grad_norm": 0.2813914716243744,
      "learning_rate": 5.991249514044926e-06,
      "loss": 0.0207,
      "step": 1236760
    },
    {
      "epoch": 2.0240175958838202,
      "grad_norm": 0.3811483085155487,
      "learning_rate": 5.991183621831409e-06,
      "loss": 0.0141,
      "step": 1236780
    },
    {
      "epoch": 2.0240503263224734,
      "grad_norm": 0.24097543954849243,
      "learning_rate": 5.9911177296178914e-06,
      "loss": 0.0132,
      "step": 1236800
    },
    {
      "epoch": 2.0240830567611265,
      "grad_norm": 0.7060279250144958,
      "learning_rate": 5.991051837404375e-06,
      "loss": 0.0152,
      "step": 1236820
    },
    {
      "epoch": 2.02411578719978,
      "grad_norm": 0.5330749750137329,
      "learning_rate": 5.990985945190857e-06,
      "loss": 0.0165,
      "step": 1236840
    },
    {
      "epoch": 2.0241485176384333,
      "grad_norm": 0.7870370745658875,
      "learning_rate": 5.9909200529773405e-06,
      "loss": 0.02,
      "step": 1236860
    },
    {
      "epoch": 2.024181248077087,
      "grad_norm": 0.4451194107532501,
      "learning_rate": 5.990854160763822e-06,
      "loss": 0.0138,
      "step": 1236880
    },
    {
      "epoch": 2.02421397851574,
      "grad_norm": 0.12533605098724365,
      "learning_rate": 5.990788268550306e-06,
      "loss": 0.0237,
      "step": 1236900
    },
    {
      "epoch": 2.0242467089543936,
      "grad_norm": 2.341719388961792,
      "learning_rate": 5.990722376336789e-06,
      "loss": 0.016,
      "step": 1236920
    },
    {
      "epoch": 2.0242794393930468,
      "grad_norm": 0.13463981449604034,
      "learning_rate": 5.9906564841232715e-06,
      "loss": 0.0164,
      "step": 1236940
    },
    {
      "epoch": 2.0243121698317,
      "grad_norm": 0.37867555022239685,
      "learning_rate": 5.990590591909754e-06,
      "loss": 0.0147,
      "step": 1236960
    },
    {
      "epoch": 2.0243449002703535,
      "grad_norm": 0.2083074301481247,
      "learning_rate": 5.990524699696238e-06,
      "loss": 0.0108,
      "step": 1236980
    },
    {
      "epoch": 2.0243776307090067,
      "grad_norm": 0.525518000125885,
      "learning_rate": 5.9904588074827205e-06,
      "loss": 0.0175,
      "step": 1237000
    },
    {
      "epoch": 2.0244103611476603,
      "grad_norm": 0.2447350025177002,
      "learning_rate": 5.990392915269203e-06,
      "loss": 0.0181,
      "step": 1237020
    },
    {
      "epoch": 2.0244430915863134,
      "grad_norm": 0.09644372016191483,
      "learning_rate": 5.990327023055687e-06,
      "loss": 0.017,
      "step": 1237040
    },
    {
      "epoch": 2.0244758220249666,
      "grad_norm": 0.31207284331321716,
      "learning_rate": 5.990261130842169e-06,
      "loss": 0.0228,
      "step": 1237060
    },
    {
      "epoch": 2.02450855246362,
      "grad_norm": 0.454723060131073,
      "learning_rate": 5.990195238628652e-06,
      "loss": 0.013,
      "step": 1237080
    },
    {
      "epoch": 2.0245412829022733,
      "grad_norm": 0.15687993168830872,
      "learning_rate": 5.990129346415134e-06,
      "loss": 0.0174,
      "step": 1237100
    },
    {
      "epoch": 2.024574013340927,
      "grad_norm": 0.7297454476356506,
      "learning_rate": 5.990063454201618e-06,
      "loss": 0.0146,
      "step": 1237120
    },
    {
      "epoch": 2.02460674377958,
      "grad_norm": 0.601052463054657,
      "learning_rate": 5.9899975619881e-06,
      "loss": 0.0128,
      "step": 1237140
    },
    {
      "epoch": 2.0246394742182336,
      "grad_norm": 0.43274354934692383,
      "learning_rate": 5.989931669774583e-06,
      "loss": 0.0088,
      "step": 1237160
    },
    {
      "epoch": 2.024672204656887,
      "grad_norm": 0.377274751663208,
      "learning_rate": 5.989865777561066e-06,
      "loss": 0.0169,
      "step": 1237180
    },
    {
      "epoch": 2.02470493509554,
      "grad_norm": 0.44965970516204834,
      "learning_rate": 5.98979988534755e-06,
      "loss": 0.0222,
      "step": 1237200
    },
    {
      "epoch": 2.0247376655341935,
      "grad_norm": 0.2966216802597046,
      "learning_rate": 5.9897339931340315e-06,
      "loss": 0.0115,
      "step": 1237220
    },
    {
      "epoch": 2.0247703959728467,
      "grad_norm": 1.0944613218307495,
      "learning_rate": 5.989668100920515e-06,
      "loss": 0.0138,
      "step": 1237240
    },
    {
      "epoch": 2.0248031264115003,
      "grad_norm": 1.076137661933899,
      "learning_rate": 5.989602208706997e-06,
      "loss": 0.0156,
      "step": 1237260
    },
    {
      "epoch": 2.0248358568501534,
      "grad_norm": 1.1554877758026123,
      "learning_rate": 5.9895363164934806e-06,
      "loss": 0.0197,
      "step": 1237280
    },
    {
      "epoch": 2.024868587288807,
      "grad_norm": 0.12566235661506653,
      "learning_rate": 5.9894704242799625e-06,
      "loss": 0.0165,
      "step": 1237300
    },
    {
      "epoch": 2.02490131772746,
      "grad_norm": 0.18399707973003387,
      "learning_rate": 5.989404532066446e-06,
      "loss": 0.0129,
      "step": 1237320
    },
    {
      "epoch": 2.0249340481661133,
      "grad_norm": 0.7671129703521729,
      "learning_rate": 5.989338639852929e-06,
      "loss": 0.0192,
      "step": 1237340
    },
    {
      "epoch": 2.024966778604767,
      "grad_norm": 0.33271291851997375,
      "learning_rate": 5.9892727476394115e-06,
      "loss": 0.0194,
      "step": 1237360
    },
    {
      "epoch": 2.02499950904342,
      "grad_norm": 0.34752756357192993,
      "learning_rate": 5.989206855425895e-06,
      "loss": 0.0163,
      "step": 1237380
    },
    {
      "epoch": 2.0250322394820737,
      "grad_norm": 0.48094692826271057,
      "learning_rate": 5.989140963212378e-06,
      "loss": 0.0146,
      "step": 1237400
    },
    {
      "epoch": 2.025064969920727,
      "grad_norm": 0.38076743483543396,
      "learning_rate": 5.989075070998861e-06,
      "loss": 0.0137,
      "step": 1237420
    },
    {
      "epoch": 2.0250977003593804,
      "grad_norm": 0.37984907627105713,
      "learning_rate": 5.989009178785343e-06,
      "loss": 0.0172,
      "step": 1237440
    },
    {
      "epoch": 2.0251304307980336,
      "grad_norm": 0.04174391180276871,
      "learning_rate": 5.988943286571827e-06,
      "loss": 0.0122,
      "step": 1237460
    },
    {
      "epoch": 2.0251631612366867,
      "grad_norm": 0.48099932074546814,
      "learning_rate": 5.988877394358309e-06,
      "loss": 0.0113,
      "step": 1237480
    },
    {
      "epoch": 2.0251958916753403,
      "grad_norm": 0.3423764705657959,
      "learning_rate": 5.988811502144792e-06,
      "loss": 0.0174,
      "step": 1237500
    },
    {
      "epoch": 2.0252286221139935,
      "grad_norm": 0.25597426295280457,
      "learning_rate": 5.988745609931274e-06,
      "loss": 0.0166,
      "step": 1237520
    },
    {
      "epoch": 2.025261352552647,
      "grad_norm": 0.25369131565093994,
      "learning_rate": 5.988679717717758e-06,
      "loss": 0.0145,
      "step": 1237540
    },
    {
      "epoch": 2.0252940829913,
      "grad_norm": 0.14259223639965057,
      "learning_rate": 5.988613825504241e-06,
      "loss": 0.0192,
      "step": 1237560
    },
    {
      "epoch": 2.025326813429954,
      "grad_norm": 0.49224165081977844,
      "learning_rate": 5.988547933290723e-06,
      "loss": 0.0157,
      "step": 1237580
    },
    {
      "epoch": 2.025359543868607,
      "grad_norm": 1.3496122360229492,
      "learning_rate": 5.988482041077206e-06,
      "loss": 0.0134,
      "step": 1237600
    },
    {
      "epoch": 2.02539227430726,
      "grad_norm": 0.42571166157722473,
      "learning_rate": 5.98841614886369e-06,
      "loss": 0.0153,
      "step": 1237620
    },
    {
      "epoch": 2.0254250047459137,
      "grad_norm": 1.0600112676620483,
      "learning_rate": 5.988350256650172e-06,
      "loss": 0.0146,
      "step": 1237640
    },
    {
      "epoch": 2.025457735184567,
      "grad_norm": 0.3204306960105896,
      "learning_rate": 5.988284364436655e-06,
      "loss": 0.0101,
      "step": 1237660
    },
    {
      "epoch": 2.0254904656232204,
      "grad_norm": 1.1940855979919434,
      "learning_rate": 5.988218472223137e-06,
      "loss": 0.0266,
      "step": 1237680
    },
    {
      "epoch": 2.0255231960618736,
      "grad_norm": 1.136523723602295,
      "learning_rate": 5.988152580009621e-06,
      "loss": 0.018,
      "step": 1237700
    },
    {
      "epoch": 2.0255559265005267,
      "grad_norm": 0.21068432927131653,
      "learning_rate": 5.988086687796104e-06,
      "loss": 0.0154,
      "step": 1237720
    },
    {
      "epoch": 2.0255886569391803,
      "grad_norm": 0.15784822404384613,
      "learning_rate": 5.988020795582586e-06,
      "loss": 0.0114,
      "step": 1237740
    },
    {
      "epoch": 2.0256213873778335,
      "grad_norm": 0.4181455373764038,
      "learning_rate": 5.98795490336907e-06,
      "loss": 0.022,
      "step": 1237760
    },
    {
      "epoch": 2.025654117816487,
      "grad_norm": 1.5346451997756958,
      "learning_rate": 5.9878890111555525e-06,
      "loss": 0.0126,
      "step": 1237780
    },
    {
      "epoch": 2.0256868482551402,
      "grad_norm": 0.47596806287765503,
      "learning_rate": 5.987823118942035e-06,
      "loss": 0.0152,
      "step": 1237800
    },
    {
      "epoch": 2.025719578693794,
      "grad_norm": 0.17470349371433258,
      "learning_rate": 5.987757226728518e-06,
      "loss": 0.0117,
      "step": 1237820
    },
    {
      "epoch": 2.025752309132447,
      "grad_norm": 0.6254397034645081,
      "learning_rate": 5.9876913345150015e-06,
      "loss": 0.0217,
      "step": 1237840
    },
    {
      "epoch": 2.0257850395711,
      "grad_norm": 0.8541942834854126,
      "learning_rate": 5.987625442301483e-06,
      "loss": 0.0134,
      "step": 1237860
    },
    {
      "epoch": 2.0258177700097537,
      "grad_norm": 0.2609936594963074,
      "learning_rate": 5.987559550087967e-06,
      "loss": 0.0126,
      "step": 1237880
    },
    {
      "epoch": 2.025850500448407,
      "grad_norm": 0.6755371689796448,
      "learning_rate": 5.987493657874449e-06,
      "loss": 0.0235,
      "step": 1237900
    },
    {
      "epoch": 2.0258832308870605,
      "grad_norm": 0.2963601350784302,
      "learning_rate": 5.9874277656609325e-06,
      "loss": 0.0139,
      "step": 1237920
    },
    {
      "epoch": 2.0259159613257136,
      "grad_norm": 0.957904040813446,
      "learning_rate": 5.987361873447415e-06,
      "loss": 0.0145,
      "step": 1237940
    },
    {
      "epoch": 2.025948691764367,
      "grad_norm": 0.41165077686309814,
      "learning_rate": 5.987295981233898e-06,
      "loss": 0.0119,
      "step": 1237960
    },
    {
      "epoch": 2.0259814222030204,
      "grad_norm": 1.1796478033065796,
      "learning_rate": 5.987230089020381e-06,
      "loss": 0.0252,
      "step": 1237980
    },
    {
      "epoch": 2.0260141526416735,
      "grad_norm": 0.46368134021759033,
      "learning_rate": 5.987164196806864e-06,
      "loss": 0.0152,
      "step": 1238000
    },
    {
      "epoch": 2.026046883080327,
      "grad_norm": 0.6803056001663208,
      "learning_rate": 5.987098304593346e-06,
      "loss": 0.0176,
      "step": 1238020
    },
    {
      "epoch": 2.0260796135189802,
      "grad_norm": 0.39192065596580505,
      "learning_rate": 5.98703241237983e-06,
      "loss": 0.0137,
      "step": 1238040
    },
    {
      "epoch": 2.026112343957634,
      "grad_norm": 0.23693381249904633,
      "learning_rate": 5.986966520166313e-06,
      "loss": 0.014,
      "step": 1238060
    },
    {
      "epoch": 2.026145074396287,
      "grad_norm": 0.30670756101608276,
      "learning_rate": 5.986900627952795e-06,
      "loss": 0.0142,
      "step": 1238080
    },
    {
      "epoch": 2.0261778048349406,
      "grad_norm": 0.16242781281471252,
      "learning_rate": 5.986834735739279e-06,
      "loss": 0.01,
      "step": 1238100
    },
    {
      "epoch": 2.0262105352735937,
      "grad_norm": 0.0809980034828186,
      "learning_rate": 5.986768843525761e-06,
      "loss": 0.0104,
      "step": 1238120
    },
    {
      "epoch": 2.026243265712247,
      "grad_norm": 0.35921311378479004,
      "learning_rate": 5.986702951312244e-06,
      "loss": 0.0196,
      "step": 1238140
    },
    {
      "epoch": 2.0262759961509005,
      "grad_norm": 0.4241028130054474,
      "learning_rate": 5.986637059098726e-06,
      "loss": 0.0187,
      "step": 1238160
    },
    {
      "epoch": 2.0263087265895536,
      "grad_norm": 1.0446430444717407,
      "learning_rate": 5.98657116688521e-06,
      "loss": 0.0144,
      "step": 1238180
    },
    {
      "epoch": 2.0263414570282072,
      "grad_norm": 0.29055914282798767,
      "learning_rate": 5.9865052746716925e-06,
      "loss": 0.0173,
      "step": 1238200
    },
    {
      "epoch": 2.0263741874668604,
      "grad_norm": 0.401926726102829,
      "learning_rate": 5.986439382458175e-06,
      "loss": 0.0159,
      "step": 1238220
    },
    {
      "epoch": 2.026406917905514,
      "grad_norm": 0.12877947092056274,
      "learning_rate": 5.986373490244658e-06,
      "loss": 0.014,
      "step": 1238240
    },
    {
      "epoch": 2.026439648344167,
      "grad_norm": 0.6418298482894897,
      "learning_rate": 5.986307598031142e-06,
      "loss": 0.0177,
      "step": 1238260
    },
    {
      "epoch": 2.0264723787828203,
      "grad_norm": 0.20104297995567322,
      "learning_rate": 5.9862417058176235e-06,
      "loss": 0.0127,
      "step": 1238280
    },
    {
      "epoch": 2.026505109221474,
      "grad_norm": 0.5373315811157227,
      "learning_rate": 5.986175813604107e-06,
      "loss": 0.015,
      "step": 1238300
    },
    {
      "epoch": 2.026537839660127,
      "grad_norm": 0.5867630243301392,
      "learning_rate": 5.986109921390589e-06,
      "loss": 0.0191,
      "step": 1238320
    },
    {
      "epoch": 2.0265705700987806,
      "grad_norm": 0.7079774141311646,
      "learning_rate": 5.9860440291770726e-06,
      "loss": 0.018,
      "step": 1238340
    },
    {
      "epoch": 2.0266033005374338,
      "grad_norm": 0.26704519987106323,
      "learning_rate": 5.985978136963555e-06,
      "loss": 0.0141,
      "step": 1238360
    },
    {
      "epoch": 2.0266360309760874,
      "grad_norm": 1.167641043663025,
      "learning_rate": 5.985912244750038e-06,
      "loss": 0.0252,
      "step": 1238380
    },
    {
      "epoch": 2.0266687614147405,
      "grad_norm": 0.3700031340122223,
      "learning_rate": 5.985846352536522e-06,
      "loss": 0.0098,
      "step": 1238400
    },
    {
      "epoch": 2.0267014918533937,
      "grad_norm": 0.3504524528980255,
      "learning_rate": 5.985780460323004e-06,
      "loss": 0.0153,
      "step": 1238420
    },
    {
      "epoch": 2.0267342222920472,
      "grad_norm": 0.5995878577232361,
      "learning_rate": 5.985714568109487e-06,
      "loss": 0.0149,
      "step": 1238440
    },
    {
      "epoch": 2.0267669527307004,
      "grad_norm": 0.34654492139816284,
      "learning_rate": 5.98564867589597e-06,
      "loss": 0.011,
      "step": 1238460
    },
    {
      "epoch": 2.026799683169354,
      "grad_norm": 0.39318785071372986,
      "learning_rate": 5.9855827836824534e-06,
      "loss": 0.0145,
      "step": 1238480
    },
    {
      "epoch": 2.026832413608007,
      "grad_norm": 0.4410366714000702,
      "learning_rate": 5.985516891468935e-06,
      "loss": 0.0185,
      "step": 1238500
    },
    {
      "epoch": 2.0268651440466603,
      "grad_norm": 0.2933109700679779,
      "learning_rate": 5.985450999255419e-06,
      "loss": 0.0155,
      "step": 1238520
    },
    {
      "epoch": 2.026897874485314,
      "grad_norm": 0.14264708757400513,
      "learning_rate": 5.985385107041901e-06,
      "loss": 0.0123,
      "step": 1238540
    },
    {
      "epoch": 2.026930604923967,
      "grad_norm": 0.3301349878311157,
      "learning_rate": 5.985319214828384e-06,
      "loss": 0.0117,
      "step": 1238560
    },
    {
      "epoch": 2.0269633353626206,
      "grad_norm": 0.13735385239124298,
      "learning_rate": 5.985253322614867e-06,
      "loss": 0.0158,
      "step": 1238580
    },
    {
      "epoch": 2.026996065801274,
      "grad_norm": 0.18232814967632294,
      "learning_rate": 5.98518743040135e-06,
      "loss": 0.0161,
      "step": 1238600
    },
    {
      "epoch": 2.0270287962399274,
      "grad_norm": 0.741494357585907,
      "learning_rate": 5.985121538187833e-06,
      "loss": 0.0196,
      "step": 1238620
    },
    {
      "epoch": 2.0270615266785805,
      "grad_norm": 0.4165719747543335,
      "learning_rate": 5.985055645974316e-06,
      "loss": 0.0086,
      "step": 1238640
    },
    {
      "epoch": 2.0270942571172337,
      "grad_norm": 0.3079662024974823,
      "learning_rate": 5.984989753760798e-06,
      "loss": 0.0191,
      "step": 1238660
    },
    {
      "epoch": 2.0271269875558873,
      "grad_norm": 0.3834080696105957,
      "learning_rate": 5.984923861547282e-06,
      "loss": 0.0136,
      "step": 1238680
    },
    {
      "epoch": 2.0271597179945404,
      "grad_norm": 0.4409547746181488,
      "learning_rate": 5.9848579693337636e-06,
      "loss": 0.0186,
      "step": 1238700
    },
    {
      "epoch": 2.027192448433194,
      "grad_norm": 0.9706099629402161,
      "learning_rate": 5.984792077120247e-06,
      "loss": 0.01,
      "step": 1238720
    },
    {
      "epoch": 2.027225178871847,
      "grad_norm": 0.10187634080648422,
      "learning_rate": 5.98472618490673e-06,
      "loss": 0.0175,
      "step": 1238740
    },
    {
      "epoch": 2.0272579093105008,
      "grad_norm": 0.15948480367660522,
      "learning_rate": 5.984660292693213e-06,
      "loss": 0.0217,
      "step": 1238760
    },
    {
      "epoch": 2.027290639749154,
      "grad_norm": 1.0391850471496582,
      "learning_rate": 5.984594400479696e-06,
      "loss": 0.0141,
      "step": 1238780
    },
    {
      "epoch": 2.027323370187807,
      "grad_norm": 0.33845922350883484,
      "learning_rate": 5.984528508266179e-06,
      "loss": 0.0251,
      "step": 1238800
    },
    {
      "epoch": 2.0273561006264607,
      "grad_norm": 0.9092894196510315,
      "learning_rate": 5.984462616052662e-06,
      "loss": 0.0187,
      "step": 1238820
    },
    {
      "epoch": 2.027388831065114,
      "grad_norm": 0.3270352780818939,
      "learning_rate": 5.9843967238391444e-06,
      "loss": 0.0118,
      "step": 1238840
    },
    {
      "epoch": 2.0274215615037674,
      "grad_norm": 0.7431052923202515,
      "learning_rate": 5.984330831625628e-06,
      "loss": 0.0134,
      "step": 1238860
    },
    {
      "epoch": 2.0274542919424206,
      "grad_norm": 0.30113986134529114,
      "learning_rate": 5.98426493941211e-06,
      "loss": 0.0146,
      "step": 1238880
    },
    {
      "epoch": 2.027487022381074,
      "grad_norm": 0.5132097005844116,
      "learning_rate": 5.9841990471985935e-06,
      "loss": 0.0191,
      "step": 1238900
    },
    {
      "epoch": 2.0275197528197273,
      "grad_norm": 0.6074469685554504,
      "learning_rate": 5.984133154985075e-06,
      "loss": 0.0187,
      "step": 1238920
    },
    {
      "epoch": 2.0275524832583804,
      "grad_norm": 2.3568313121795654,
      "learning_rate": 5.984067262771559e-06,
      "loss": 0.0162,
      "step": 1238940
    },
    {
      "epoch": 2.027585213697034,
      "grad_norm": 0.7131777405738831,
      "learning_rate": 5.984001370558042e-06,
      "loss": 0.0102,
      "step": 1238960
    },
    {
      "epoch": 2.027617944135687,
      "grad_norm": 0.19052451848983765,
      "learning_rate": 5.9839354783445245e-06,
      "loss": 0.0178,
      "step": 1238980
    },
    {
      "epoch": 2.027650674574341,
      "grad_norm": 0.7814857363700867,
      "learning_rate": 5.983869586131007e-06,
      "loss": 0.0176,
      "step": 1239000
    },
    {
      "epoch": 2.027683405012994,
      "grad_norm": 2.2023396492004395,
      "learning_rate": 5.983803693917491e-06,
      "loss": 0.0165,
      "step": 1239020
    },
    {
      "epoch": 2.0277161354516475,
      "grad_norm": 0.7273065447807312,
      "learning_rate": 5.983737801703973e-06,
      "loss": 0.0146,
      "step": 1239040
    },
    {
      "epoch": 2.0277488658903007,
      "grad_norm": 0.7044718265533447,
      "learning_rate": 5.983671909490456e-06,
      "loss": 0.02,
      "step": 1239060
    },
    {
      "epoch": 2.027781596328954,
      "grad_norm": 0.200691357254982,
      "learning_rate": 5.983606017276938e-06,
      "loss": 0.0144,
      "step": 1239080
    },
    {
      "epoch": 2.0278143267676074,
      "grad_norm": 0.20437473058700562,
      "learning_rate": 5.983540125063422e-06,
      "loss": 0.0168,
      "step": 1239100
    },
    {
      "epoch": 2.0278470572062606,
      "grad_norm": 0.2930379807949066,
      "learning_rate": 5.983474232849905e-06,
      "loss": 0.0156,
      "step": 1239120
    },
    {
      "epoch": 2.027879787644914,
      "grad_norm": 0.12760654091835022,
      "learning_rate": 5.983408340636387e-06,
      "loss": 0.0186,
      "step": 1239140
    },
    {
      "epoch": 2.0279125180835673,
      "grad_norm": 0.6189785599708557,
      "learning_rate": 5.983342448422871e-06,
      "loss": 0.0181,
      "step": 1239160
    },
    {
      "epoch": 2.027945248522221,
      "grad_norm": 0.38245314359664917,
      "learning_rate": 5.983276556209353e-06,
      "loss": 0.0123,
      "step": 1239180
    },
    {
      "epoch": 2.027977978960874,
      "grad_norm": 0.11494486033916473,
      "learning_rate": 5.983210663995836e-06,
      "loss": 0.0132,
      "step": 1239200
    },
    {
      "epoch": 2.028010709399527,
      "grad_norm": 0.6268255114555359,
      "learning_rate": 5.983144771782319e-06,
      "loss": 0.0113,
      "step": 1239220
    },
    {
      "epoch": 2.028043439838181,
      "grad_norm": 0.15462079644203186,
      "learning_rate": 5.983078879568802e-06,
      "loss": 0.0228,
      "step": 1239240
    },
    {
      "epoch": 2.028076170276834,
      "grad_norm": 0.2901468575000763,
      "learning_rate": 5.9830129873552845e-06,
      "loss": 0.018,
      "step": 1239260
    },
    {
      "epoch": 2.0281089007154876,
      "grad_norm": 0.42125192284584045,
      "learning_rate": 5.982947095141768e-06,
      "loss": 0.0209,
      "step": 1239280
    },
    {
      "epoch": 2.0281416311541407,
      "grad_norm": 0.12116511911153793,
      "learning_rate": 5.98288120292825e-06,
      "loss": 0.0203,
      "step": 1239300
    },
    {
      "epoch": 2.028174361592794,
      "grad_norm": 0.14160653948783875,
      "learning_rate": 5.9828153107147336e-06,
      "loss": 0.0158,
      "step": 1239320
    },
    {
      "epoch": 2.0282070920314474,
      "grad_norm": 0.1189131811261177,
      "learning_rate": 5.9827494185012155e-06,
      "loss": 0.009,
      "step": 1239340
    },
    {
      "epoch": 2.0282398224701006,
      "grad_norm": 0.4241265058517456,
      "learning_rate": 5.982683526287699e-06,
      "loss": 0.015,
      "step": 1239360
    },
    {
      "epoch": 2.028272552908754,
      "grad_norm": 0.874970018863678,
      "learning_rate": 5.982617634074182e-06,
      "loss": 0.0181,
      "step": 1239380
    },
    {
      "epoch": 2.0283052833474073,
      "grad_norm": 0.2672041058540344,
      "learning_rate": 5.9825517418606645e-06,
      "loss": 0.0108,
      "step": 1239400
    },
    {
      "epoch": 2.028338013786061,
      "grad_norm": 0.18832211196422577,
      "learning_rate": 5.982485849647147e-06,
      "loss": 0.0226,
      "step": 1239420
    },
    {
      "epoch": 2.028370744224714,
      "grad_norm": 0.2991626560688019,
      "learning_rate": 5.982419957433631e-06,
      "loss": 0.0125,
      "step": 1239440
    },
    {
      "epoch": 2.0284034746633672,
      "grad_norm": 1.1363557577133179,
      "learning_rate": 5.982354065220114e-06,
      "loss": 0.0274,
      "step": 1239460
    },
    {
      "epoch": 2.028436205102021,
      "grad_norm": 0.852711021900177,
      "learning_rate": 5.982288173006596e-06,
      "loss": 0.0163,
      "step": 1239480
    },
    {
      "epoch": 2.028468935540674,
      "grad_norm": 0.1932985484600067,
      "learning_rate": 5.98222228079308e-06,
      "loss": 0.0112,
      "step": 1239500
    },
    {
      "epoch": 2.0285016659793276,
      "grad_norm": 0.07332294434309006,
      "learning_rate": 5.982156388579562e-06,
      "loss": 0.0112,
      "step": 1239520
    },
    {
      "epoch": 2.0285343964179807,
      "grad_norm": 0.4062882959842682,
      "learning_rate": 5.982090496366045e-06,
      "loss": 0.0112,
      "step": 1239540
    },
    {
      "epoch": 2.0285671268566343,
      "grad_norm": 0.5768901705741882,
      "learning_rate": 5.982024604152527e-06,
      "loss": 0.0172,
      "step": 1239560
    },
    {
      "epoch": 2.0285998572952875,
      "grad_norm": 0.5811227560043335,
      "learning_rate": 5.981958711939011e-06,
      "loss": 0.0172,
      "step": 1239580
    },
    {
      "epoch": 2.0286325877339406,
      "grad_norm": 0.5301954746246338,
      "learning_rate": 5.981892819725494e-06,
      "loss": 0.0117,
      "step": 1239600
    },
    {
      "epoch": 2.028665318172594,
      "grad_norm": 0.14266401529312134,
      "learning_rate": 5.981826927511976e-06,
      "loss": 0.012,
      "step": 1239620
    },
    {
      "epoch": 2.0286980486112474,
      "grad_norm": 0.3585716485977173,
      "learning_rate": 5.981761035298459e-06,
      "loss": 0.0098,
      "step": 1239640
    },
    {
      "epoch": 2.028730779049901,
      "grad_norm": 0.2839304506778717,
      "learning_rate": 5.981695143084943e-06,
      "loss": 0.0209,
      "step": 1239660
    },
    {
      "epoch": 2.028763509488554,
      "grad_norm": 0.4480327367782593,
      "learning_rate": 5.981629250871425e-06,
      "loss": 0.0157,
      "step": 1239680
    },
    {
      "epoch": 2.0287962399272077,
      "grad_norm": 0.27262407541275024,
      "learning_rate": 5.981563358657908e-06,
      "loss": 0.015,
      "step": 1239700
    },
    {
      "epoch": 2.028828970365861,
      "grad_norm": 0.4236587882041931,
      "learning_rate": 5.98149746644439e-06,
      "loss": 0.0142,
      "step": 1239720
    },
    {
      "epoch": 2.028861700804514,
      "grad_norm": 0.6913493275642395,
      "learning_rate": 5.981431574230874e-06,
      "loss": 0.0154,
      "step": 1239740
    },
    {
      "epoch": 2.0288944312431676,
      "grad_norm": 0.42692670226097107,
      "learning_rate": 5.981365682017356e-06,
      "loss": 0.0114,
      "step": 1239760
    },
    {
      "epoch": 2.0289271616818207,
      "grad_norm": 0.19329936802387238,
      "learning_rate": 5.981299789803839e-06,
      "loss": 0.0134,
      "step": 1239780
    },
    {
      "epoch": 2.0289598921204743,
      "grad_norm": 0.3874245584011078,
      "learning_rate": 5.981233897590322e-06,
      "loss": 0.0164,
      "step": 1239800
    },
    {
      "epoch": 2.0289926225591275,
      "grad_norm": 0.4486857056617737,
      "learning_rate": 5.9811680053768055e-06,
      "loss": 0.0127,
      "step": 1239820
    },
    {
      "epoch": 2.029025352997781,
      "grad_norm": 0.5004224181175232,
      "learning_rate": 5.981102113163288e-06,
      "loss": 0.0271,
      "step": 1239840
    },
    {
      "epoch": 2.0290580834364342,
      "grad_norm": 0.5705287456512451,
      "learning_rate": 5.981036220949771e-06,
      "loss": 0.0152,
      "step": 1239860
    },
    {
      "epoch": 2.0290908138750874,
      "grad_norm": 0.41009700298309326,
      "learning_rate": 5.9809703287362545e-06,
      "loss": 0.0126,
      "step": 1239880
    },
    {
      "epoch": 2.029123544313741,
      "grad_norm": 0.32447749376296997,
      "learning_rate": 5.980904436522736e-06,
      "loss": 0.015,
      "step": 1239900
    },
    {
      "epoch": 2.029156274752394,
      "grad_norm": 1.1445691585540771,
      "learning_rate": 5.98083854430922e-06,
      "loss": 0.0246,
      "step": 1239920
    },
    {
      "epoch": 2.0291890051910477,
      "grad_norm": 0.3002316653728485,
      "learning_rate": 5.980772652095702e-06,
      "loss": 0.0145,
      "step": 1239940
    },
    {
      "epoch": 2.029221735629701,
      "grad_norm": 0.5543040633201599,
      "learning_rate": 5.9807067598821855e-06,
      "loss": 0.0172,
      "step": 1239960
    },
    {
      "epoch": 2.0292544660683545,
      "grad_norm": 0.5052641034126282,
      "learning_rate": 5.980640867668668e-06,
      "loss": 0.0217,
      "step": 1239980
    },
    {
      "epoch": 2.0292871965070076,
      "grad_norm": 0.14793144166469574,
      "learning_rate": 5.980574975455151e-06,
      "loss": 0.0214,
      "step": 1240000
    },
    {
      "epoch": 2.0293199269456608,
      "grad_norm": 0.1386486440896988,
      "learning_rate": 5.980509083241634e-06,
      "loss": 0.0131,
      "step": 1240020
    },
    {
      "epoch": 2.0293526573843144,
      "grad_norm": 1.0243362188339233,
      "learning_rate": 5.980443191028117e-06,
      "loss": 0.0136,
      "step": 1240040
    },
    {
      "epoch": 2.0293853878229675,
      "grad_norm": 0.6930661201477051,
      "learning_rate": 5.980377298814599e-06,
      "loss": 0.02,
      "step": 1240060
    },
    {
      "epoch": 2.029418118261621,
      "grad_norm": 0.5985390543937683,
      "learning_rate": 5.980311406601083e-06,
      "loss": 0.0179,
      "step": 1240080
    },
    {
      "epoch": 2.0294508487002743,
      "grad_norm": 0.6361006498336792,
      "learning_rate": 5.980245514387565e-06,
      "loss": 0.0187,
      "step": 1240100
    },
    {
      "epoch": 2.0294835791389274,
      "grad_norm": 0.09679803252220154,
      "learning_rate": 5.980179622174048e-06,
      "loss": 0.0109,
      "step": 1240120
    },
    {
      "epoch": 2.029516309577581,
      "grad_norm": 0.26598843932151794,
      "learning_rate": 5.98011372996053e-06,
      "loss": 0.0101,
      "step": 1240140
    },
    {
      "epoch": 2.029549040016234,
      "grad_norm": 0.5242035984992981,
      "learning_rate": 5.980047837747014e-06,
      "loss": 0.0154,
      "step": 1240160
    },
    {
      "epoch": 2.0295817704548877,
      "grad_norm": 0.4199558198451996,
      "learning_rate": 5.979981945533497e-06,
      "loss": 0.0143,
      "step": 1240180
    },
    {
      "epoch": 2.029614500893541,
      "grad_norm": 0.30018502473831177,
      "learning_rate": 5.979916053319979e-06,
      "loss": 0.0137,
      "step": 1240200
    },
    {
      "epoch": 2.0296472313321945,
      "grad_norm": 0.6489844918251038,
      "learning_rate": 5.979850161106463e-06,
      "loss": 0.0182,
      "step": 1240220
    },
    {
      "epoch": 2.0296799617708476,
      "grad_norm": 0.2930867671966553,
      "learning_rate": 5.9797842688929455e-06,
      "loss": 0.0113,
      "step": 1240240
    },
    {
      "epoch": 2.029712692209501,
      "grad_norm": 0.8333543539047241,
      "learning_rate": 5.979718376679428e-06,
      "loss": 0.0125,
      "step": 1240260
    },
    {
      "epoch": 2.0297454226481544,
      "grad_norm": 0.1925104409456253,
      "learning_rate": 5.979652484465911e-06,
      "loss": 0.0152,
      "step": 1240280
    },
    {
      "epoch": 2.0297781530868075,
      "grad_norm": 0.5332989692687988,
      "learning_rate": 5.979586592252395e-06,
      "loss": 0.0114,
      "step": 1240300
    },
    {
      "epoch": 2.029810883525461,
      "grad_norm": 0.1888989955186844,
      "learning_rate": 5.9795207000388765e-06,
      "loss": 0.011,
      "step": 1240320
    },
    {
      "epoch": 2.0298436139641143,
      "grad_norm": 0.4966486990451813,
      "learning_rate": 5.97945480782536e-06,
      "loss": 0.0168,
      "step": 1240340
    },
    {
      "epoch": 2.029876344402768,
      "grad_norm": 0.7474010586738586,
      "learning_rate": 5.979388915611842e-06,
      "loss": 0.0165,
      "step": 1240360
    },
    {
      "epoch": 2.029909074841421,
      "grad_norm": 0.3108775019645691,
      "learning_rate": 5.9793230233983256e-06,
      "loss": 0.011,
      "step": 1240380
    },
    {
      "epoch": 2.029941805280074,
      "grad_norm": 0.29606392979621887,
      "learning_rate": 5.979257131184808e-06,
      "loss": 0.0153,
      "step": 1240400
    },
    {
      "epoch": 2.0299745357187278,
      "grad_norm": 0.9409095644950867,
      "learning_rate": 5.979191238971291e-06,
      "loss": 0.0211,
      "step": 1240420
    },
    {
      "epoch": 2.030007266157381,
      "grad_norm": 0.43800806999206543,
      "learning_rate": 5.979125346757774e-06,
      "loss": 0.0166,
      "step": 1240440
    },
    {
      "epoch": 2.0300399965960345,
      "grad_norm": 0.3031744360923767,
      "learning_rate": 5.979059454544257e-06,
      "loss": 0.0149,
      "step": 1240460
    },
    {
      "epoch": 2.0300727270346877,
      "grad_norm": 0.9015055298805237,
      "learning_rate": 5.978993562330739e-06,
      "loss": 0.0223,
      "step": 1240480
    },
    {
      "epoch": 2.0301054574733413,
      "grad_norm": 0.5639482736587524,
      "learning_rate": 5.978927670117223e-06,
      "loss": 0.0156,
      "step": 1240500
    },
    {
      "epoch": 2.0301381879119944,
      "grad_norm": 0.3282049000263214,
      "learning_rate": 5.9788617779037064e-06,
      "loss": 0.0155,
      "step": 1240520
    },
    {
      "epoch": 2.0301709183506476,
      "grad_norm": 1.1656479835510254,
      "learning_rate": 5.978795885690188e-06,
      "loss": 0.0193,
      "step": 1240540
    },
    {
      "epoch": 2.030203648789301,
      "grad_norm": 0.2649669051170349,
      "learning_rate": 5.978729993476672e-06,
      "loss": 0.0244,
      "step": 1240560
    },
    {
      "epoch": 2.0302363792279543,
      "grad_norm": 0.6920097470283508,
      "learning_rate": 5.978664101263154e-06,
      "loss": 0.0095,
      "step": 1240580
    },
    {
      "epoch": 2.030269109666608,
      "grad_norm": 0.22003917396068573,
      "learning_rate": 5.978598209049637e-06,
      "loss": 0.0157,
      "step": 1240600
    },
    {
      "epoch": 2.030301840105261,
      "grad_norm": 0.2781710922718048,
      "learning_rate": 5.97853231683612e-06,
      "loss": 0.0215,
      "step": 1240620
    },
    {
      "epoch": 2.0303345705439146,
      "grad_norm": 1.2818150520324707,
      "learning_rate": 5.978466424622603e-06,
      "loss": 0.0108,
      "step": 1240640
    },
    {
      "epoch": 2.030367300982568,
      "grad_norm": 0.13922113180160522,
      "learning_rate": 5.978400532409086e-06,
      "loss": 0.0158,
      "step": 1240660
    },
    {
      "epoch": 2.030400031421221,
      "grad_norm": 0.731086254119873,
      "learning_rate": 5.978334640195569e-06,
      "loss": 0.0181,
      "step": 1240680
    },
    {
      "epoch": 2.0304327618598745,
      "grad_norm": 0.4263356924057007,
      "learning_rate": 5.978268747982051e-06,
      "loss": 0.0151,
      "step": 1240700
    },
    {
      "epoch": 2.0304654922985277,
      "grad_norm": 0.18291960656642914,
      "learning_rate": 5.978202855768535e-06,
      "loss": 0.0104,
      "step": 1240720
    },
    {
      "epoch": 2.0304982227371813,
      "grad_norm": 0.34283769130706787,
      "learning_rate": 5.9781369635550166e-06,
      "loss": 0.019,
      "step": 1240740
    },
    {
      "epoch": 2.0305309531758344,
      "grad_norm": 0.9881521463394165,
      "learning_rate": 5.9780710713415e-06,
      "loss": 0.0156,
      "step": 1240760
    },
    {
      "epoch": 2.0305636836144876,
      "grad_norm": 0.5123565196990967,
      "learning_rate": 5.978005179127983e-06,
      "loss": 0.0201,
      "step": 1240780
    },
    {
      "epoch": 2.030596414053141,
      "grad_norm": 0.13828763365745544,
      "learning_rate": 5.977939286914466e-06,
      "loss": 0.0117,
      "step": 1240800
    },
    {
      "epoch": 2.0306291444917943,
      "grad_norm": 0.18999144434928894,
      "learning_rate": 5.977873394700948e-06,
      "loss": 0.017,
      "step": 1240820
    },
    {
      "epoch": 2.030661874930448,
      "grad_norm": 0.5309728980064392,
      "learning_rate": 5.977807502487432e-06,
      "loss": 0.0224,
      "step": 1240840
    },
    {
      "epoch": 2.030694605369101,
      "grad_norm": 0.23694652318954468,
      "learning_rate": 5.977741610273915e-06,
      "loss": 0.009,
      "step": 1240860
    },
    {
      "epoch": 2.0307273358077547,
      "grad_norm": 0.25205183029174805,
      "learning_rate": 5.9776757180603974e-06,
      "loss": 0.0185,
      "step": 1240880
    },
    {
      "epoch": 2.030760066246408,
      "grad_norm": 0.2738724648952484,
      "learning_rate": 5.977609825846881e-06,
      "loss": 0.0162,
      "step": 1240900
    },
    {
      "epoch": 2.030792796685061,
      "grad_norm": 0.16934487223625183,
      "learning_rate": 5.977543933633363e-06,
      "loss": 0.0148,
      "step": 1240920
    },
    {
      "epoch": 2.0308255271237146,
      "grad_norm": 0.09665308892726898,
      "learning_rate": 5.9774780414198465e-06,
      "loss": 0.0134,
      "step": 1240940
    },
    {
      "epoch": 2.0308582575623677,
      "grad_norm": 0.5933234095573425,
      "learning_rate": 5.977412149206328e-06,
      "loss": 0.0167,
      "step": 1240960
    },
    {
      "epoch": 2.0308909880010213,
      "grad_norm": 0.3524552583694458,
      "learning_rate": 5.977346256992812e-06,
      "loss": 0.0157,
      "step": 1240980
    },
    {
      "epoch": 2.0309237184396745,
      "grad_norm": 0.6578848958015442,
      "learning_rate": 5.977280364779294e-06,
      "loss": 0.0122,
      "step": 1241000
    },
    {
      "epoch": 2.030956448878328,
      "grad_norm": 1.2698885202407837,
      "learning_rate": 5.9772144725657775e-06,
      "loss": 0.0153,
      "step": 1241020
    },
    {
      "epoch": 2.030989179316981,
      "grad_norm": 0.8519920706748962,
      "learning_rate": 5.97714858035226e-06,
      "loss": 0.0141,
      "step": 1241040
    },
    {
      "epoch": 2.0310219097556343,
      "grad_norm": 0.8385887742042542,
      "learning_rate": 5.977082688138744e-06,
      "loss": 0.0117,
      "step": 1241060
    },
    {
      "epoch": 2.031054640194288,
      "grad_norm": 0.5338603258132935,
      "learning_rate": 5.977016795925226e-06,
      "loss": 0.0178,
      "step": 1241080
    },
    {
      "epoch": 2.031087370632941,
      "grad_norm": 1.0270304679870605,
      "learning_rate": 5.976950903711709e-06,
      "loss": 0.0167,
      "step": 1241100
    },
    {
      "epoch": 2.0311201010715947,
      "grad_norm": 0.06368318945169449,
      "learning_rate": 5.976885011498191e-06,
      "loss": 0.0131,
      "step": 1241120
    },
    {
      "epoch": 2.031152831510248,
      "grad_norm": 0.30783525109291077,
      "learning_rate": 5.976819119284675e-06,
      "loss": 0.0144,
      "step": 1241140
    },
    {
      "epoch": 2.0311855619489014,
      "grad_norm": 0.2196546345949173,
      "learning_rate": 5.976753227071157e-06,
      "loss": 0.0147,
      "step": 1241160
    },
    {
      "epoch": 2.0312182923875546,
      "grad_norm": 0.6932557821273804,
      "learning_rate": 5.97668733485764e-06,
      "loss": 0.0096,
      "step": 1241180
    },
    {
      "epoch": 2.0312510228262077,
      "grad_norm": 0.15280947089195251,
      "learning_rate": 5.976621442644123e-06,
      "loss": 0.0155,
      "step": 1241200
    },
    {
      "epoch": 2.0312837532648613,
      "grad_norm": 2.061366558074951,
      "learning_rate": 5.976555550430606e-06,
      "loss": 0.0155,
      "step": 1241220
    },
    {
      "epoch": 2.0313164837035145,
      "grad_norm": 0.3935152590274811,
      "learning_rate": 5.976489658217089e-06,
      "loss": 0.0148,
      "step": 1241240
    },
    {
      "epoch": 2.031349214142168,
      "grad_norm": 0.3681006133556366,
      "learning_rate": 5.976423766003572e-06,
      "loss": 0.015,
      "step": 1241260
    },
    {
      "epoch": 2.031381944580821,
      "grad_norm": 0.36472731828689575,
      "learning_rate": 5.976357873790055e-06,
      "loss": 0.0182,
      "step": 1241280
    },
    {
      "epoch": 2.031414675019475,
      "grad_norm": 0.26398757100105286,
      "learning_rate": 5.9762919815765375e-06,
      "loss": 0.0131,
      "step": 1241300
    },
    {
      "epoch": 2.031447405458128,
      "grad_norm": 1.6317517757415771,
      "learning_rate": 5.976226089363021e-06,
      "loss": 0.0197,
      "step": 1241320
    },
    {
      "epoch": 2.031480135896781,
      "grad_norm": 0.34153833985328674,
      "learning_rate": 5.976160197149503e-06,
      "loss": 0.0166,
      "step": 1241340
    },
    {
      "epoch": 2.0315128663354347,
      "grad_norm": 0.7132506966590881,
      "learning_rate": 5.976094304935987e-06,
      "loss": 0.0103,
      "step": 1241360
    },
    {
      "epoch": 2.031545596774088,
      "grad_norm": 0.8683800101280212,
      "learning_rate": 5.9760284127224685e-06,
      "loss": 0.0129,
      "step": 1241380
    },
    {
      "epoch": 2.0315783272127415,
      "grad_norm": 1.0661083459854126,
      "learning_rate": 5.975962520508952e-06,
      "loss": 0.0179,
      "step": 1241400
    },
    {
      "epoch": 2.0316110576513946,
      "grad_norm": 0.27769365906715393,
      "learning_rate": 5.975896628295435e-06,
      "loss": 0.0172,
      "step": 1241420
    },
    {
      "epoch": 2.031643788090048,
      "grad_norm": 1.0231455564498901,
      "learning_rate": 5.9758307360819175e-06,
      "loss": 0.0134,
      "step": 1241440
    },
    {
      "epoch": 2.0316765185287013,
      "grad_norm": 0.41681116819381714,
      "learning_rate": 5.9757648438684e-06,
      "loss": 0.0146,
      "step": 1241460
    },
    {
      "epoch": 2.0317092489673545,
      "grad_norm": 0.29379263520240784,
      "learning_rate": 5.975698951654884e-06,
      "loss": 0.017,
      "step": 1241480
    },
    {
      "epoch": 2.031741979406008,
      "grad_norm": 0.22977468371391296,
      "learning_rate": 5.975633059441366e-06,
      "loss": 0.0159,
      "step": 1241500
    },
    {
      "epoch": 2.0317747098446612,
      "grad_norm": 1.082741618156433,
      "learning_rate": 5.975567167227849e-06,
      "loss": 0.0153,
      "step": 1241520
    },
    {
      "epoch": 2.031807440283315,
      "grad_norm": 0.8633774518966675,
      "learning_rate": 5.975501275014331e-06,
      "loss": 0.0156,
      "step": 1241540
    },
    {
      "epoch": 2.031840170721968,
      "grad_norm": 0.2161901295185089,
      "learning_rate": 5.975435382800815e-06,
      "loss": 0.014,
      "step": 1241560
    },
    {
      "epoch": 2.031872901160621,
      "grad_norm": 0.5712186098098755,
      "learning_rate": 5.975369490587298e-06,
      "loss": 0.0144,
      "step": 1241580
    },
    {
      "epoch": 2.0319056315992747,
      "grad_norm": 0.24934464693069458,
      "learning_rate": 5.97530359837378e-06,
      "loss": 0.0117,
      "step": 1241600
    },
    {
      "epoch": 2.031938362037928,
      "grad_norm": 0.3203413486480713,
      "learning_rate": 5.975237706160264e-06,
      "loss": 0.0177,
      "step": 1241620
    },
    {
      "epoch": 2.0319710924765815,
      "grad_norm": 0.5671863555908203,
      "learning_rate": 5.975171813946747e-06,
      "loss": 0.0162,
      "step": 1241640
    },
    {
      "epoch": 2.0320038229152346,
      "grad_norm": 0.5595580339431763,
      "learning_rate": 5.975105921733229e-06,
      "loss": 0.0154,
      "step": 1241660
    },
    {
      "epoch": 2.032036553353888,
      "grad_norm": 0.34716638922691345,
      "learning_rate": 5.975040029519712e-06,
      "loss": 0.0167,
      "step": 1241680
    },
    {
      "epoch": 2.0320692837925414,
      "grad_norm": 0.20968003571033478,
      "learning_rate": 5.974974137306196e-06,
      "loss": 0.0193,
      "step": 1241700
    },
    {
      "epoch": 2.0321020142311945,
      "grad_norm": 0.6177575588226318,
      "learning_rate": 5.974908245092678e-06,
      "loss": 0.0129,
      "step": 1241720
    },
    {
      "epoch": 2.032134744669848,
      "grad_norm": 0.3508053421974182,
      "learning_rate": 5.974842352879161e-06,
      "loss": 0.0148,
      "step": 1241740
    },
    {
      "epoch": 2.0321674751085013,
      "grad_norm": 0.5109310746192932,
      "learning_rate": 5.974776460665643e-06,
      "loss": 0.0149,
      "step": 1241760
    },
    {
      "epoch": 2.032200205547155,
      "grad_norm": 0.14943437278270721,
      "learning_rate": 5.974710568452127e-06,
      "loss": 0.0105,
      "step": 1241780
    },
    {
      "epoch": 2.032232935985808,
      "grad_norm": 0.2855619788169861,
      "learning_rate": 5.974644676238609e-06,
      "loss": 0.0152,
      "step": 1241800
    },
    {
      "epoch": 2.0322656664244616,
      "grad_norm": 0.8652759790420532,
      "learning_rate": 5.974578784025092e-06,
      "loss": 0.0183,
      "step": 1241820
    },
    {
      "epoch": 2.0322983968631148,
      "grad_norm": 0.1761285364627838,
      "learning_rate": 5.974512891811575e-06,
      "loss": 0.0184,
      "step": 1241840
    },
    {
      "epoch": 2.032331127301768,
      "grad_norm": 1.0070067644119263,
      "learning_rate": 5.9744469995980585e-06,
      "loss": 0.0138,
      "step": 1241860
    },
    {
      "epoch": 2.0323638577404215,
      "grad_norm": 0.468073308467865,
      "learning_rate": 5.97438110738454e-06,
      "loss": 0.0163,
      "step": 1241880
    },
    {
      "epoch": 2.0323965881790746,
      "grad_norm": 0.3157528042793274,
      "learning_rate": 5.974315215171024e-06,
      "loss": 0.0188,
      "step": 1241900
    },
    {
      "epoch": 2.0324293186177282,
      "grad_norm": 0.22613847255706787,
      "learning_rate": 5.9742493229575075e-06,
      "loss": 0.0148,
      "step": 1241920
    },
    {
      "epoch": 2.0324620490563814,
      "grad_norm": 0.555965781211853,
      "learning_rate": 5.974183430743989e-06,
      "loss": 0.0229,
      "step": 1241940
    },
    {
      "epoch": 2.032494779495035,
      "grad_norm": 0.46125322580337524,
      "learning_rate": 5.974117538530473e-06,
      "loss": 0.0124,
      "step": 1241960
    },
    {
      "epoch": 2.032527509933688,
      "grad_norm": 0.4790400266647339,
      "learning_rate": 5.974051646316955e-06,
      "loss": 0.0171,
      "step": 1241980
    },
    {
      "epoch": 2.0325602403723413,
      "grad_norm": 0.39216068387031555,
      "learning_rate": 5.9739857541034385e-06,
      "loss": 0.0107,
      "step": 1242000
    },
    {
      "epoch": 2.032592970810995,
      "grad_norm": 0.050306472927331924,
      "learning_rate": 5.97391986188992e-06,
      "loss": 0.0104,
      "step": 1242020
    },
    {
      "epoch": 2.032625701249648,
      "grad_norm": 0.3303150236606598,
      "learning_rate": 5.973853969676404e-06,
      "loss": 0.0142,
      "step": 1242040
    },
    {
      "epoch": 2.0326584316883016,
      "grad_norm": 0.5723065137863159,
      "learning_rate": 5.973788077462887e-06,
      "loss": 0.012,
      "step": 1242060
    },
    {
      "epoch": 2.0326911621269548,
      "grad_norm": 0.6283974647521973,
      "learning_rate": 5.9737221852493694e-06,
      "loss": 0.0178,
      "step": 1242080
    },
    {
      "epoch": 2.0327238925656084,
      "grad_norm": 0.6348394751548767,
      "learning_rate": 5.973656293035852e-06,
      "loss": 0.0203,
      "step": 1242100
    },
    {
      "epoch": 2.0327566230042615,
      "grad_norm": 0.10227332264184952,
      "learning_rate": 5.973590400822336e-06,
      "loss": 0.0089,
      "step": 1242120
    },
    {
      "epoch": 2.0327893534429147,
      "grad_norm": 0.4377627670764923,
      "learning_rate": 5.973524508608818e-06,
      "loss": 0.0134,
      "step": 1242140
    },
    {
      "epoch": 2.0328220838815683,
      "grad_norm": 0.5751863718032837,
      "learning_rate": 5.973458616395301e-06,
      "loss": 0.0189,
      "step": 1242160
    },
    {
      "epoch": 2.0328548143202214,
      "grad_norm": 0.32727551460266113,
      "learning_rate": 5.973392724181783e-06,
      "loss": 0.016,
      "step": 1242180
    },
    {
      "epoch": 2.032887544758875,
      "grad_norm": 0.5893256068229675,
      "learning_rate": 5.973326831968267e-06,
      "loss": 0.0112,
      "step": 1242200
    },
    {
      "epoch": 2.032920275197528,
      "grad_norm": 0.5837976932525635,
      "learning_rate": 5.9732609397547495e-06,
      "loss": 0.0083,
      "step": 1242220
    },
    {
      "epoch": 2.0329530056361818,
      "grad_norm": 1.1365939378738403,
      "learning_rate": 5.973195047541232e-06,
      "loss": 0.0184,
      "step": 1242240
    },
    {
      "epoch": 2.032985736074835,
      "grad_norm": 0.14870001375675201,
      "learning_rate": 5.973129155327715e-06,
      "loss": 0.0129,
      "step": 1242260
    },
    {
      "epoch": 2.033018466513488,
      "grad_norm": 0.07274990528821945,
      "learning_rate": 5.9730632631141985e-06,
      "loss": 0.0136,
      "step": 1242280
    },
    {
      "epoch": 2.0330511969521416,
      "grad_norm": 0.3729233741760254,
      "learning_rate": 5.972997370900681e-06,
      "loss": 0.0149,
      "step": 1242300
    },
    {
      "epoch": 2.033083927390795,
      "grad_norm": 0.21458648145198822,
      "learning_rate": 5.972931478687164e-06,
      "loss": 0.0105,
      "step": 1242320
    },
    {
      "epoch": 2.0331166578294484,
      "grad_norm": 0.10811354219913483,
      "learning_rate": 5.972865586473648e-06,
      "loss": 0.011,
      "step": 1242340
    },
    {
      "epoch": 2.0331493882681015,
      "grad_norm": 0.28654539585113525,
      "learning_rate": 5.9727996942601295e-06,
      "loss": 0.0176,
      "step": 1242360
    },
    {
      "epoch": 2.0331821187067547,
      "grad_norm": 0.2260456085205078,
      "learning_rate": 5.972733802046613e-06,
      "loss": 0.0202,
      "step": 1242380
    },
    {
      "epoch": 2.0332148491454083,
      "grad_norm": 0.3569198250770569,
      "learning_rate": 5.972667909833095e-06,
      "loss": 0.0162,
      "step": 1242400
    },
    {
      "epoch": 2.0332475795840614,
      "grad_norm": 0.09502144157886505,
      "learning_rate": 5.9726020176195786e-06,
      "loss": 0.0146,
      "step": 1242420
    },
    {
      "epoch": 2.033280310022715,
      "grad_norm": 0.5478466153144836,
      "learning_rate": 5.972536125406061e-06,
      "loss": 0.0178,
      "step": 1242440
    },
    {
      "epoch": 2.033313040461368,
      "grad_norm": 0.3844836950302124,
      "learning_rate": 5.972470233192544e-06,
      "loss": 0.0166,
      "step": 1242460
    },
    {
      "epoch": 2.0333457709000218,
      "grad_norm": 0.24598540365695953,
      "learning_rate": 5.972404340979027e-06,
      "loss": 0.0131,
      "step": 1242480
    },
    {
      "epoch": 2.033378501338675,
      "grad_norm": 0.701869010925293,
      "learning_rate": 5.97233844876551e-06,
      "loss": 0.0155,
      "step": 1242500
    },
    {
      "epoch": 2.033411231777328,
      "grad_norm": 1.3572553396224976,
      "learning_rate": 5.972272556551992e-06,
      "loss": 0.0181,
      "step": 1242520
    },
    {
      "epoch": 2.0334439622159817,
      "grad_norm": 0.08520597219467163,
      "learning_rate": 5.972206664338476e-06,
      "loss": 0.0164,
      "step": 1242540
    },
    {
      "epoch": 2.033476692654635,
      "grad_norm": 0.4339863955974579,
      "learning_rate": 5.972140772124958e-06,
      "loss": 0.0162,
      "step": 1242560
    },
    {
      "epoch": 2.0335094230932884,
      "grad_norm": 0.5659146904945374,
      "learning_rate": 5.972074879911441e-06,
      "loss": 0.0152,
      "step": 1242580
    },
    {
      "epoch": 2.0335421535319416,
      "grad_norm": 0.2017727494239807,
      "learning_rate": 5.972008987697924e-06,
      "loss": 0.014,
      "step": 1242600
    },
    {
      "epoch": 2.033574883970595,
      "grad_norm": 0.49494948983192444,
      "learning_rate": 5.971943095484407e-06,
      "loss": 0.025,
      "step": 1242620
    },
    {
      "epoch": 2.0336076144092483,
      "grad_norm": 0.444487601518631,
      "learning_rate": 5.97187720327089e-06,
      "loss": 0.0171,
      "step": 1242640
    },
    {
      "epoch": 2.0336403448479015,
      "grad_norm": 0.16778120398521423,
      "learning_rate": 5.971811311057373e-06,
      "loss": 0.0141,
      "step": 1242660
    },
    {
      "epoch": 2.033673075286555,
      "grad_norm": 3.050111770629883,
      "learning_rate": 5.971745418843856e-06,
      "loss": 0.014,
      "step": 1242680
    },
    {
      "epoch": 2.033705805725208,
      "grad_norm": 0.29576173424720764,
      "learning_rate": 5.971679526630339e-06,
      "loss": 0.0126,
      "step": 1242700
    },
    {
      "epoch": 2.033738536163862,
      "grad_norm": 0.3219883441925049,
      "learning_rate": 5.971613634416822e-06,
      "loss": 0.0192,
      "step": 1242720
    },
    {
      "epoch": 2.033771266602515,
      "grad_norm": 0.5553197860717773,
      "learning_rate": 5.971547742203304e-06,
      "loss": 0.0089,
      "step": 1242740
    },
    {
      "epoch": 2.0338039970411685,
      "grad_norm": 0.21244369447231293,
      "learning_rate": 5.971481849989788e-06,
      "loss": 0.0128,
      "step": 1242760
    },
    {
      "epoch": 2.0338367274798217,
      "grad_norm": 0.30913132429122925,
      "learning_rate": 5.9714159577762696e-06,
      "loss": 0.0207,
      "step": 1242780
    },
    {
      "epoch": 2.033869457918475,
      "grad_norm": 0.2832082509994507,
      "learning_rate": 5.971350065562753e-06,
      "loss": 0.0168,
      "step": 1242800
    },
    {
      "epoch": 2.0339021883571284,
      "grad_norm": 0.28660154342651367,
      "learning_rate": 5.971284173349236e-06,
      "loss": 0.0155,
      "step": 1242820
    },
    {
      "epoch": 2.0339349187957816,
      "grad_norm": 0.46702879667282104,
      "learning_rate": 5.971218281135719e-06,
      "loss": 0.0164,
      "step": 1242840
    },
    {
      "epoch": 2.033967649234435,
      "grad_norm": 0.2985213100910187,
      "learning_rate": 5.971152388922201e-06,
      "loss": 0.017,
      "step": 1242860
    },
    {
      "epoch": 2.0340003796730883,
      "grad_norm": 0.34295663237571716,
      "learning_rate": 5.971086496708685e-06,
      "loss": 0.0175,
      "step": 1242880
    },
    {
      "epoch": 2.034033110111742,
      "grad_norm": 1.3304728269577026,
      "learning_rate": 5.971020604495167e-06,
      "loss": 0.0165,
      "step": 1242900
    },
    {
      "epoch": 2.034065840550395,
      "grad_norm": 0.8749656081199646,
      "learning_rate": 5.9709547122816504e-06,
      "loss": 0.0236,
      "step": 1242920
    },
    {
      "epoch": 2.0340985709890482,
      "grad_norm": 0.32816368341445923,
      "learning_rate": 5.970888820068132e-06,
      "loss": 0.0105,
      "step": 1242940
    },
    {
      "epoch": 2.034131301427702,
      "grad_norm": 0.4528380334377289,
      "learning_rate": 5.970822927854616e-06,
      "loss": 0.0138,
      "step": 1242960
    },
    {
      "epoch": 2.034164031866355,
      "grad_norm": 0.19218376278877258,
      "learning_rate": 5.9707570356410995e-06,
      "loss": 0.013,
      "step": 1242980
    },
    {
      "epoch": 2.0341967623050086,
      "grad_norm": 0.3496999740600586,
      "learning_rate": 5.970691143427581e-06,
      "loss": 0.0152,
      "step": 1243000
    },
    {
      "epoch": 2.0342294927436617,
      "grad_norm": 0.12431452423334122,
      "learning_rate": 5.970625251214065e-06,
      "loss": 0.0215,
      "step": 1243020
    },
    {
      "epoch": 2.0342622231823153,
      "grad_norm": 0.08252933621406555,
      "learning_rate": 5.970559359000547e-06,
      "loss": 0.0103,
      "step": 1243040
    },
    {
      "epoch": 2.0342949536209685,
      "grad_norm": 0.6192725300788879,
      "learning_rate": 5.9704934667870305e-06,
      "loss": 0.011,
      "step": 1243060
    },
    {
      "epoch": 2.0343276840596216,
      "grad_norm": 0.22542330622673035,
      "learning_rate": 5.970427574573513e-06,
      "loss": 0.0111,
      "step": 1243080
    },
    {
      "epoch": 2.034360414498275,
      "grad_norm": 0.20026828348636627,
      "learning_rate": 5.970361682359996e-06,
      "loss": 0.0155,
      "step": 1243100
    },
    {
      "epoch": 2.0343931449369284,
      "grad_norm": 0.29110056161880493,
      "learning_rate": 5.970295790146479e-06,
      "loss": 0.0238,
      "step": 1243120
    },
    {
      "epoch": 2.034425875375582,
      "grad_norm": 0.43292000889778137,
      "learning_rate": 5.970229897932962e-06,
      "loss": 0.0117,
      "step": 1243140
    },
    {
      "epoch": 2.034458605814235,
      "grad_norm": 0.7639350891113281,
      "learning_rate": 5.970164005719444e-06,
      "loss": 0.0143,
      "step": 1243160
    },
    {
      "epoch": 2.0344913362528882,
      "grad_norm": 0.30072489380836487,
      "learning_rate": 5.970098113505928e-06,
      "loss": 0.0147,
      "step": 1243180
    },
    {
      "epoch": 2.034524066691542,
      "grad_norm": 0.413174569606781,
      "learning_rate": 5.97003222129241e-06,
      "loss": 0.0142,
      "step": 1243200
    },
    {
      "epoch": 2.034556797130195,
      "grad_norm": 0.3587666451931,
      "learning_rate": 5.969966329078893e-06,
      "loss": 0.0183,
      "step": 1243220
    },
    {
      "epoch": 2.0345895275688486,
      "grad_norm": 0.45953091979026794,
      "learning_rate": 5.969900436865376e-06,
      "loss": 0.0173,
      "step": 1243240
    },
    {
      "epoch": 2.0346222580075017,
      "grad_norm": 0.451264351606369,
      "learning_rate": 5.969834544651859e-06,
      "loss": 0.0143,
      "step": 1243260
    },
    {
      "epoch": 2.0346549884461553,
      "grad_norm": 0.4108612537384033,
      "learning_rate": 5.9697686524383415e-06,
      "loss": 0.0158,
      "step": 1243280
    },
    {
      "epoch": 2.0346877188848085,
      "grad_norm": 0.399051308631897,
      "learning_rate": 5.969702760224825e-06,
      "loss": 0.0138,
      "step": 1243300
    },
    {
      "epoch": 2.0347204493234616,
      "grad_norm": 0.2001616656780243,
      "learning_rate": 5.969636868011307e-06,
      "loss": 0.0127,
      "step": 1243320
    },
    {
      "epoch": 2.0347531797621152,
      "grad_norm": 0.5686421394348145,
      "learning_rate": 5.9695709757977905e-06,
      "loss": 0.0269,
      "step": 1243340
    },
    {
      "epoch": 2.0347859102007684,
      "grad_norm": 0.37283843755722046,
      "learning_rate": 5.969505083584274e-06,
      "loss": 0.0164,
      "step": 1243360
    },
    {
      "epoch": 2.034818640639422,
      "grad_norm": 0.14542685449123383,
      "learning_rate": 5.969439191370756e-06,
      "loss": 0.0084,
      "step": 1243380
    },
    {
      "epoch": 2.034851371078075,
      "grad_norm": 0.3017805218696594,
      "learning_rate": 5.96937329915724e-06,
      "loss": 0.0103,
      "step": 1243400
    },
    {
      "epoch": 2.0348841015167287,
      "grad_norm": 0.8831616640090942,
      "learning_rate": 5.9693074069437215e-06,
      "loss": 0.011,
      "step": 1243420
    },
    {
      "epoch": 2.034916831955382,
      "grad_norm": 0.28899717330932617,
      "learning_rate": 5.969241514730205e-06,
      "loss": 0.0219,
      "step": 1243440
    },
    {
      "epoch": 2.034949562394035,
      "grad_norm": 0.9931879639625549,
      "learning_rate": 5.969175622516688e-06,
      "loss": 0.0168,
      "step": 1243460
    },
    {
      "epoch": 2.0349822928326886,
      "grad_norm": 0.301232248544693,
      "learning_rate": 5.9691097303031705e-06,
      "loss": 0.0126,
      "step": 1243480
    },
    {
      "epoch": 2.0350150232713418,
      "grad_norm": 0.5873624682426453,
      "learning_rate": 5.969043838089653e-06,
      "loss": 0.0231,
      "step": 1243500
    },
    {
      "epoch": 2.0350477537099954,
      "grad_norm": 1.3088316917419434,
      "learning_rate": 5.968977945876137e-06,
      "loss": 0.0189,
      "step": 1243520
    },
    {
      "epoch": 2.0350804841486485,
      "grad_norm": 0.36607134342193604,
      "learning_rate": 5.968912053662619e-06,
      "loss": 0.0181,
      "step": 1243540
    },
    {
      "epoch": 2.035113214587302,
      "grad_norm": 0.31835830211639404,
      "learning_rate": 5.968846161449102e-06,
      "loss": 0.0131,
      "step": 1243560
    },
    {
      "epoch": 2.0351459450259552,
      "grad_norm": 0.35251757502555847,
      "learning_rate": 5.968780269235584e-06,
      "loss": 0.0173,
      "step": 1243580
    },
    {
      "epoch": 2.0351786754646084,
      "grad_norm": 0.24925602972507477,
      "learning_rate": 5.968714377022068e-06,
      "loss": 0.0164,
      "step": 1243600
    },
    {
      "epoch": 2.035211405903262,
      "grad_norm": 0.43378376960754395,
      "learning_rate": 5.9686484848085506e-06,
      "loss": 0.0192,
      "step": 1243620
    },
    {
      "epoch": 2.035244136341915,
      "grad_norm": 0.32681164145469666,
      "learning_rate": 5.968582592595033e-06,
      "loss": 0.0127,
      "step": 1243640
    },
    {
      "epoch": 2.0352768667805687,
      "grad_norm": 0.38307827711105347,
      "learning_rate": 5.968516700381516e-06,
      "loss": 0.012,
      "step": 1243660
    },
    {
      "epoch": 2.035309597219222,
      "grad_norm": 0.18110598623752594,
      "learning_rate": 5.968450808168e-06,
      "loss": 0.011,
      "step": 1243680
    },
    {
      "epoch": 2.0353423276578755,
      "grad_norm": 0.8388378620147705,
      "learning_rate": 5.968384915954482e-06,
      "loss": 0.0166,
      "step": 1243700
    },
    {
      "epoch": 2.0353750580965286,
      "grad_norm": 0.357105553150177,
      "learning_rate": 5.968319023740965e-06,
      "loss": 0.0145,
      "step": 1243720
    },
    {
      "epoch": 2.035407788535182,
      "grad_norm": 0.18869931995868683,
      "learning_rate": 5.968253131527449e-06,
      "loss": 0.0133,
      "step": 1243740
    },
    {
      "epoch": 2.0354405189738354,
      "grad_norm": 0.29579782485961914,
      "learning_rate": 5.968187239313931e-06,
      "loss": 0.0156,
      "step": 1243760
    },
    {
      "epoch": 2.0354732494124885,
      "grad_norm": 0.36865049600601196,
      "learning_rate": 5.968121347100414e-06,
      "loss": 0.0214,
      "step": 1243780
    },
    {
      "epoch": 2.035505979851142,
      "grad_norm": 0.4589443504810333,
      "learning_rate": 5.968055454886896e-06,
      "loss": 0.0162,
      "step": 1243800
    },
    {
      "epoch": 2.0355387102897953,
      "grad_norm": 0.4327318072319031,
      "learning_rate": 5.96798956267338e-06,
      "loss": 0.0116,
      "step": 1243820
    },
    {
      "epoch": 2.0355714407284484,
      "grad_norm": 0.3272298574447632,
      "learning_rate": 5.967923670459862e-06,
      "loss": 0.0196,
      "step": 1243840
    },
    {
      "epoch": 2.035604171167102,
      "grad_norm": 0.33129894733428955,
      "learning_rate": 5.967857778246345e-06,
      "loss": 0.0139,
      "step": 1243860
    },
    {
      "epoch": 2.035636901605755,
      "grad_norm": 0.3184226155281067,
      "learning_rate": 5.967791886032828e-06,
      "loss": 0.0169,
      "step": 1243880
    },
    {
      "epoch": 2.0356696320444088,
      "grad_norm": 0.37651538848876953,
      "learning_rate": 5.9677259938193115e-06,
      "loss": 0.0179,
      "step": 1243900
    },
    {
      "epoch": 2.035702362483062,
      "grad_norm": 0.4714495539665222,
      "learning_rate": 5.967660101605793e-06,
      "loss": 0.0127,
      "step": 1243920
    },
    {
      "epoch": 2.0357350929217155,
      "grad_norm": 0.08513117581605911,
      "learning_rate": 5.967594209392277e-06,
      "loss": 0.0127,
      "step": 1243940
    },
    {
      "epoch": 2.0357678233603687,
      "grad_norm": 0.3300558626651764,
      "learning_rate": 5.967528317178759e-06,
      "loss": 0.0131,
      "step": 1243960
    },
    {
      "epoch": 2.035800553799022,
      "grad_norm": 0.29903796315193176,
      "learning_rate": 5.9674624249652424e-06,
      "loss": 0.017,
      "step": 1243980
    },
    {
      "epoch": 2.0358332842376754,
      "grad_norm": 0.5212671160697937,
      "learning_rate": 5.967396532751724e-06,
      "loss": 0.0155,
      "step": 1244000
    },
    {
      "epoch": 2.0358660146763286,
      "grad_norm": 0.21163822710514069,
      "learning_rate": 5.967330640538208e-06,
      "loss": 0.0147,
      "step": 1244020
    },
    {
      "epoch": 2.035898745114982,
      "grad_norm": 0.15812312066555023,
      "learning_rate": 5.9672647483246915e-06,
      "loss": 0.0094,
      "step": 1244040
    },
    {
      "epoch": 2.0359314755536353,
      "grad_norm": 0.6269547939300537,
      "learning_rate": 5.967198856111173e-06,
      "loss": 0.0116,
      "step": 1244060
    },
    {
      "epoch": 2.035964205992289,
      "grad_norm": 0.4979712963104248,
      "learning_rate": 5.967132963897657e-06,
      "loss": 0.0151,
      "step": 1244080
    },
    {
      "epoch": 2.035996936430942,
      "grad_norm": 1.1789346933364868,
      "learning_rate": 5.96706707168414e-06,
      "loss": 0.0203,
      "step": 1244100
    },
    {
      "epoch": 2.036029666869595,
      "grad_norm": 0.8346746563911438,
      "learning_rate": 5.9670011794706225e-06,
      "loss": 0.0211,
      "step": 1244120
    },
    {
      "epoch": 2.036062397308249,
      "grad_norm": 0.11406832188367844,
      "learning_rate": 5.966935287257105e-06,
      "loss": 0.024,
      "step": 1244140
    },
    {
      "epoch": 2.036095127746902,
      "grad_norm": 1.375872254371643,
      "learning_rate": 5.966869395043589e-06,
      "loss": 0.0154,
      "step": 1244160
    },
    {
      "epoch": 2.0361278581855555,
      "grad_norm": 0.3497241139411926,
      "learning_rate": 5.966803502830071e-06,
      "loss": 0.023,
      "step": 1244180
    },
    {
      "epoch": 2.0361605886242087,
      "grad_norm": 0.5530561208724976,
      "learning_rate": 5.966737610616554e-06,
      "loss": 0.0135,
      "step": 1244200
    },
    {
      "epoch": 2.0361933190628623,
      "grad_norm": 0.15687306225299835,
      "learning_rate": 5.966671718403036e-06,
      "loss": 0.0201,
      "step": 1244220
    },
    {
      "epoch": 2.0362260495015154,
      "grad_norm": 0.6251398921012878,
      "learning_rate": 5.96660582618952e-06,
      "loss": 0.0135,
      "step": 1244240
    },
    {
      "epoch": 2.0362587799401686,
      "grad_norm": 0.304759681224823,
      "learning_rate": 5.9665399339760025e-06,
      "loss": 0.0182,
      "step": 1244260
    },
    {
      "epoch": 2.036291510378822,
      "grad_norm": 0.3422929346561432,
      "learning_rate": 5.966474041762485e-06,
      "loss": 0.0147,
      "step": 1244280
    },
    {
      "epoch": 2.0363242408174753,
      "grad_norm": 0.447849303483963,
      "learning_rate": 5.966408149548968e-06,
      "loss": 0.0102,
      "step": 1244300
    },
    {
      "epoch": 2.036356971256129,
      "grad_norm": 0.21355146169662476,
      "learning_rate": 5.9663422573354515e-06,
      "loss": 0.014,
      "step": 1244320
    },
    {
      "epoch": 2.036389701694782,
      "grad_norm": 0.6014355421066284,
      "learning_rate": 5.9662763651219334e-06,
      "loss": 0.0147,
      "step": 1244340
    },
    {
      "epoch": 2.0364224321334357,
      "grad_norm": 0.3362591862678528,
      "learning_rate": 5.966210472908417e-06,
      "loss": 0.0174,
      "step": 1244360
    },
    {
      "epoch": 2.036455162572089,
      "grad_norm": 0.42609351873397827,
      "learning_rate": 5.966144580694901e-06,
      "loss": 0.0125,
      "step": 1244380
    },
    {
      "epoch": 2.036487893010742,
      "grad_norm": 0.17734165489673615,
      "learning_rate": 5.9660786884813825e-06,
      "loss": 0.0141,
      "step": 1244400
    },
    {
      "epoch": 2.0365206234493956,
      "grad_norm": 0.147409588098526,
      "learning_rate": 5.966012796267866e-06,
      "loss": 0.0132,
      "step": 1244420
    },
    {
      "epoch": 2.0365533538880487,
      "grad_norm": 0.1657486855983734,
      "learning_rate": 5.965946904054348e-06,
      "loss": 0.0102,
      "step": 1244440
    },
    {
      "epoch": 2.0365860843267023,
      "grad_norm": 0.2168082296848297,
      "learning_rate": 5.9658810118408316e-06,
      "loss": 0.0139,
      "step": 1244460
    },
    {
      "epoch": 2.0366188147653554,
      "grad_norm": 0.17887215316295624,
      "learning_rate": 5.965815119627314e-06,
      "loss": 0.0137,
      "step": 1244480
    },
    {
      "epoch": 2.036651545204009,
      "grad_norm": 0.2510282099246979,
      "learning_rate": 5.965749227413797e-06,
      "loss": 0.0214,
      "step": 1244500
    },
    {
      "epoch": 2.036684275642662,
      "grad_norm": 0.31394967436790466,
      "learning_rate": 5.96568333520028e-06,
      "loss": 0.015,
      "step": 1244520
    },
    {
      "epoch": 2.0367170060813153,
      "grad_norm": 0.9026044607162476,
      "learning_rate": 5.965617442986763e-06,
      "loss": 0.0202,
      "step": 1244540
    },
    {
      "epoch": 2.036749736519969,
      "grad_norm": 0.6359677910804749,
      "learning_rate": 5.965551550773245e-06,
      "loss": 0.0136,
      "step": 1244560
    },
    {
      "epoch": 2.036782466958622,
      "grad_norm": 0.32033076882362366,
      "learning_rate": 5.965485658559729e-06,
      "loss": 0.0154,
      "step": 1244580
    },
    {
      "epoch": 2.0368151973972757,
      "grad_norm": 0.3778574466705322,
      "learning_rate": 5.965419766346211e-06,
      "loss": 0.0185,
      "step": 1244600
    },
    {
      "epoch": 2.036847927835929,
      "grad_norm": 0.45365482568740845,
      "learning_rate": 5.965353874132694e-06,
      "loss": 0.0206,
      "step": 1244620
    },
    {
      "epoch": 2.036880658274582,
      "grad_norm": 4.942691802978516,
      "learning_rate": 5.965287981919177e-06,
      "loss": 0.0158,
      "step": 1244640
    },
    {
      "epoch": 2.0369133887132356,
      "grad_norm": 0.2638346552848816,
      "learning_rate": 5.96522208970566e-06,
      "loss": 0.018,
      "step": 1244660
    },
    {
      "epoch": 2.0369461191518887,
      "grad_norm": 0.45387986302375793,
      "learning_rate": 5.9651561974921426e-06,
      "loss": 0.0137,
      "step": 1244680
    },
    {
      "epoch": 2.0369788495905423,
      "grad_norm": 0.4110826551914215,
      "learning_rate": 5.965090305278626e-06,
      "loss": 0.0154,
      "step": 1244700
    },
    {
      "epoch": 2.0370115800291955,
      "grad_norm": 1.8175137042999268,
      "learning_rate": 5.965024413065108e-06,
      "loss": 0.0155,
      "step": 1244720
    },
    {
      "epoch": 2.037044310467849,
      "grad_norm": 0.21046660840511322,
      "learning_rate": 5.964958520851592e-06,
      "loss": 0.0194,
      "step": 1244740
    },
    {
      "epoch": 2.037077040906502,
      "grad_norm": 0.13183115422725677,
      "learning_rate": 5.964892628638075e-06,
      "loss": 0.0169,
      "step": 1244760
    },
    {
      "epoch": 2.0371097713451554,
      "grad_norm": 0.06100320816040039,
      "learning_rate": 5.964826736424557e-06,
      "loss": 0.0148,
      "step": 1244780
    },
    {
      "epoch": 2.037142501783809,
      "grad_norm": 0.24850988388061523,
      "learning_rate": 5.964760844211041e-06,
      "loss": 0.0151,
      "step": 1244800
    },
    {
      "epoch": 2.037175232222462,
      "grad_norm": 1.0670990943908691,
      "learning_rate": 5.964694951997523e-06,
      "loss": 0.0169,
      "step": 1244820
    },
    {
      "epoch": 2.0372079626611157,
      "grad_norm": 1.0137883424758911,
      "learning_rate": 5.964629059784006e-06,
      "loss": 0.0238,
      "step": 1244840
    },
    {
      "epoch": 2.037240693099769,
      "grad_norm": 0.486127108335495,
      "learning_rate": 5.964563167570488e-06,
      "loss": 0.0186,
      "step": 1244860
    },
    {
      "epoch": 2.0372734235384224,
      "grad_norm": 0.39710694551467896,
      "learning_rate": 5.964497275356972e-06,
      "loss": 0.0108,
      "step": 1244880
    },
    {
      "epoch": 2.0373061539770756,
      "grad_norm": 0.8694505095481873,
      "learning_rate": 5.964431383143454e-06,
      "loss": 0.0178,
      "step": 1244900
    },
    {
      "epoch": 2.0373388844157287,
      "grad_norm": 0.5781644582748413,
      "learning_rate": 5.964365490929938e-06,
      "loss": 0.0134,
      "step": 1244920
    },
    {
      "epoch": 2.0373716148543823,
      "grad_norm": 1.2966722249984741,
      "learning_rate": 5.96429959871642e-06,
      "loss": 0.0174,
      "step": 1244940
    },
    {
      "epoch": 2.0374043452930355,
      "grad_norm": 0.353704035282135,
      "learning_rate": 5.9642337065029035e-06,
      "loss": 0.0151,
      "step": 1244960
    },
    {
      "epoch": 2.037437075731689,
      "grad_norm": 0.2995012700557709,
      "learning_rate": 5.964167814289385e-06,
      "loss": 0.0152,
      "step": 1244980
    },
    {
      "epoch": 2.0374698061703422,
      "grad_norm": 0.8415022492408752,
      "learning_rate": 5.964101922075869e-06,
      "loss": 0.0159,
      "step": 1245000
    },
    {
      "epoch": 2.037502536608996,
      "grad_norm": 0.0764986202120781,
      "learning_rate": 5.964036029862351e-06,
      "loss": 0.0132,
      "step": 1245020
    },
    {
      "epoch": 2.037535267047649,
      "grad_norm": 0.5794179439544678,
      "learning_rate": 5.963970137648834e-06,
      "loss": 0.0155,
      "step": 1245040
    },
    {
      "epoch": 2.037567997486302,
      "grad_norm": 0.2793252170085907,
      "learning_rate": 5.963904245435317e-06,
      "loss": 0.0128,
      "step": 1245060
    },
    {
      "epoch": 2.0376007279249557,
      "grad_norm": 0.6166189908981323,
      "learning_rate": 5.9638383532218e-06,
      "loss": 0.0113,
      "step": 1245080
    },
    {
      "epoch": 2.037633458363609,
      "grad_norm": 0.13185694813728333,
      "learning_rate": 5.9637724610082835e-06,
      "loss": 0.0151,
      "step": 1245100
    },
    {
      "epoch": 2.0376661888022625,
      "grad_norm": 0.4019289016723633,
      "learning_rate": 5.963706568794766e-06,
      "loss": 0.0158,
      "step": 1245120
    },
    {
      "epoch": 2.0376989192409156,
      "grad_norm": 0.21319052577018738,
      "learning_rate": 5.963640676581249e-06,
      "loss": 0.0171,
      "step": 1245140
    },
    {
      "epoch": 2.037731649679569,
      "grad_norm": 0.8930636644363403,
      "learning_rate": 5.963574784367732e-06,
      "loss": 0.0235,
      "step": 1245160
    },
    {
      "epoch": 2.0377643801182224,
      "grad_norm": 0.16179995238780975,
      "learning_rate": 5.963508892154215e-06,
      "loss": 0.0202,
      "step": 1245180
    },
    {
      "epoch": 2.0377971105568755,
      "grad_norm": 0.6057363748550415,
      "learning_rate": 5.963442999940697e-06,
      "loss": 0.0141,
      "step": 1245200
    },
    {
      "epoch": 2.037829840995529,
      "grad_norm": 0.3897676467895508,
      "learning_rate": 5.963377107727181e-06,
      "loss": 0.0211,
      "step": 1245220
    },
    {
      "epoch": 2.0378625714341823,
      "grad_norm": 0.5867478847503662,
      "learning_rate": 5.963311215513663e-06,
      "loss": 0.0128,
      "step": 1245240
    },
    {
      "epoch": 2.037895301872836,
      "grad_norm": 0.9497931599617004,
      "learning_rate": 5.963245323300146e-06,
      "loss": 0.0158,
      "step": 1245260
    },
    {
      "epoch": 2.037928032311489,
      "grad_norm": 0.1972932517528534,
      "learning_rate": 5.963179431086629e-06,
      "loss": 0.0185,
      "step": 1245280
    },
    {
      "epoch": 2.0379607627501426,
      "grad_norm": 0.855202853679657,
      "learning_rate": 5.963113538873112e-06,
      "loss": 0.0158,
      "step": 1245300
    },
    {
      "epoch": 2.0379934931887957,
      "grad_norm": 0.5830962061882019,
      "learning_rate": 5.9630476466595945e-06,
      "loss": 0.0128,
      "step": 1245320
    },
    {
      "epoch": 2.038026223627449,
      "grad_norm": 0.22137941420078278,
      "learning_rate": 5.962981754446078e-06,
      "loss": 0.0148,
      "step": 1245340
    },
    {
      "epoch": 2.0380589540661025,
      "grad_norm": 0.24125145375728607,
      "learning_rate": 5.96291586223256e-06,
      "loss": 0.0122,
      "step": 1245360
    },
    {
      "epoch": 2.0380916845047556,
      "grad_norm": 0.5193182229995728,
      "learning_rate": 5.9628499700190435e-06,
      "loss": 0.0194,
      "step": 1245380
    },
    {
      "epoch": 2.0381244149434092,
      "grad_norm": 0.8789782524108887,
      "learning_rate": 5.962784077805525e-06,
      "loss": 0.0178,
      "step": 1245400
    },
    {
      "epoch": 2.0381571453820624,
      "grad_norm": 0.16166603565216064,
      "learning_rate": 5.962718185592009e-06,
      "loss": 0.0134,
      "step": 1245420
    },
    {
      "epoch": 2.0381898758207155,
      "grad_norm": 0.19606724381446838,
      "learning_rate": 5.962652293378493e-06,
      "loss": 0.0115,
      "step": 1245440
    },
    {
      "epoch": 2.038222606259369,
      "grad_norm": 0.4049055278301239,
      "learning_rate": 5.9625864011649745e-06,
      "loss": 0.0128,
      "step": 1245460
    },
    {
      "epoch": 2.0382553366980223,
      "grad_norm": 0.6724303960800171,
      "learning_rate": 5.962520508951458e-06,
      "loss": 0.0186,
      "step": 1245480
    },
    {
      "epoch": 2.038288067136676,
      "grad_norm": 0.1186528131365776,
      "learning_rate": 5.962454616737941e-06,
      "loss": 0.0147,
      "step": 1245500
    },
    {
      "epoch": 2.038320797575329,
      "grad_norm": 0.3944203555583954,
      "learning_rate": 5.9623887245244236e-06,
      "loss": 0.0179,
      "step": 1245520
    },
    {
      "epoch": 2.0383535280139826,
      "grad_norm": 0.22736382484436035,
      "learning_rate": 5.962322832310906e-06,
      "loss": 0.016,
      "step": 1245540
    },
    {
      "epoch": 2.0383862584526358,
      "grad_norm": 0.20792870223522186,
      "learning_rate": 5.96225694009739e-06,
      "loss": 0.0138,
      "step": 1245560
    },
    {
      "epoch": 2.038418988891289,
      "grad_norm": 0.5183387398719788,
      "learning_rate": 5.962191047883872e-06,
      "loss": 0.0208,
      "step": 1245580
    },
    {
      "epoch": 2.0384517193299425,
      "grad_norm": 0.33836066722869873,
      "learning_rate": 5.962125155670355e-06,
      "loss": 0.0102,
      "step": 1245600
    },
    {
      "epoch": 2.0384844497685957,
      "grad_norm": 0.5202897787094116,
      "learning_rate": 5.962059263456837e-06,
      "loss": 0.0126,
      "step": 1245620
    },
    {
      "epoch": 2.0385171802072493,
      "grad_norm": 0.4517885148525238,
      "learning_rate": 5.961993371243321e-06,
      "loss": 0.0213,
      "step": 1245640
    },
    {
      "epoch": 2.0385499106459024,
      "grad_norm": 0.8077364563941956,
      "learning_rate": 5.961927479029804e-06,
      "loss": 0.0162,
      "step": 1245660
    },
    {
      "epoch": 2.038582641084556,
      "grad_norm": 0.1615992784500122,
      "learning_rate": 5.961861586816286e-06,
      "loss": 0.0125,
      "step": 1245680
    },
    {
      "epoch": 2.038615371523209,
      "grad_norm": 0.12591972947120667,
      "learning_rate": 5.961795694602769e-06,
      "loss": 0.0115,
      "step": 1245700
    },
    {
      "epoch": 2.0386481019618623,
      "grad_norm": 0.435395210981369,
      "learning_rate": 5.961729802389253e-06,
      "loss": 0.0145,
      "step": 1245720
    },
    {
      "epoch": 2.038680832400516,
      "grad_norm": 1.593663215637207,
      "learning_rate": 5.9616639101757345e-06,
      "loss": 0.0126,
      "step": 1245740
    },
    {
      "epoch": 2.038713562839169,
      "grad_norm": 0.2723643183708191,
      "learning_rate": 5.961598017962218e-06,
      "loss": 0.0116,
      "step": 1245760
    },
    {
      "epoch": 2.0387462932778226,
      "grad_norm": 0.6170637011528015,
      "learning_rate": 5.9615321257487e-06,
      "loss": 0.0134,
      "step": 1245780
    },
    {
      "epoch": 2.038779023716476,
      "grad_norm": 0.4406881034374237,
      "learning_rate": 5.961466233535184e-06,
      "loss": 0.0103,
      "step": 1245800
    },
    {
      "epoch": 2.0388117541551294,
      "grad_norm": 0.11964322626590729,
      "learning_rate": 5.961400341321667e-06,
      "loss": 0.0131,
      "step": 1245820
    },
    {
      "epoch": 2.0388444845937825,
      "grad_norm": 1.1057462692260742,
      "learning_rate": 5.961334449108149e-06,
      "loss": 0.0107,
      "step": 1245840
    },
    {
      "epoch": 2.0388772150324357,
      "grad_norm": 0.401705265045166,
      "learning_rate": 5.961268556894633e-06,
      "loss": 0.0203,
      "step": 1245860
    },
    {
      "epoch": 2.0389099454710893,
      "grad_norm": 0.4685921370983124,
      "learning_rate": 5.9612026646811146e-06,
      "loss": 0.0144,
      "step": 1245880
    },
    {
      "epoch": 2.0389426759097424,
      "grad_norm": 1.0083669424057007,
      "learning_rate": 5.961136772467598e-06,
      "loss": 0.0186,
      "step": 1245900
    },
    {
      "epoch": 2.038975406348396,
      "grad_norm": 0.6239112615585327,
      "learning_rate": 5.961070880254081e-06,
      "loss": 0.0155,
      "step": 1245920
    },
    {
      "epoch": 2.039008136787049,
      "grad_norm": 0.4911993145942688,
      "learning_rate": 5.961004988040564e-06,
      "loss": 0.0109,
      "step": 1245940
    },
    {
      "epoch": 2.0390408672257028,
      "grad_norm": 0.2298196703195572,
      "learning_rate": 5.960939095827046e-06,
      "loss": 0.0159,
      "step": 1245960
    },
    {
      "epoch": 2.039073597664356,
      "grad_norm": 0.3350191116333008,
      "learning_rate": 5.96087320361353e-06,
      "loss": 0.012,
      "step": 1245980
    },
    {
      "epoch": 2.039106328103009,
      "grad_norm": 0.43268194794654846,
      "learning_rate": 5.960807311400012e-06,
      "loss": 0.0194,
      "step": 1246000
    },
    {
      "epoch": 2.0391390585416627,
      "grad_norm": 0.6609897613525391,
      "learning_rate": 5.9607414191864954e-06,
      "loss": 0.0171,
      "step": 1246020
    },
    {
      "epoch": 2.039171788980316,
      "grad_norm": 0.4120202958583832,
      "learning_rate": 5.960675526972977e-06,
      "loss": 0.0153,
      "step": 1246040
    },
    {
      "epoch": 2.0392045194189694,
      "grad_norm": 0.30834928154945374,
      "learning_rate": 5.960609634759461e-06,
      "loss": 0.0192,
      "step": 1246060
    },
    {
      "epoch": 2.0392372498576226,
      "grad_norm": 0.3181644380092621,
      "learning_rate": 5.960543742545944e-06,
      "loss": 0.0101,
      "step": 1246080
    },
    {
      "epoch": 2.039269980296276,
      "grad_norm": 0.9053604006767273,
      "learning_rate": 5.960477850332426e-06,
      "loss": 0.0123,
      "step": 1246100
    },
    {
      "epoch": 2.0393027107349293,
      "grad_norm": 0.07122514396905899,
      "learning_rate": 5.960411958118909e-06,
      "loss": 0.0168,
      "step": 1246120
    },
    {
      "epoch": 2.0393354411735825,
      "grad_norm": 0.28673192858695984,
      "learning_rate": 5.960346065905393e-06,
      "loss": 0.0138,
      "step": 1246140
    },
    {
      "epoch": 2.039368171612236,
      "grad_norm": 1.078078269958496,
      "learning_rate": 5.9602801736918755e-06,
      "loss": 0.0169,
      "step": 1246160
    },
    {
      "epoch": 2.039400902050889,
      "grad_norm": 0.28088006377220154,
      "learning_rate": 5.960214281478358e-06,
      "loss": 0.0065,
      "step": 1246180
    },
    {
      "epoch": 2.039433632489543,
      "grad_norm": 0.42080652713775635,
      "learning_rate": 5.960148389264842e-06,
      "loss": 0.0179,
      "step": 1246200
    },
    {
      "epoch": 2.039466362928196,
      "grad_norm": 0.3152966797351837,
      "learning_rate": 5.960082497051324e-06,
      "loss": 0.0206,
      "step": 1246220
    },
    {
      "epoch": 2.039499093366849,
      "grad_norm": 0.14181697368621826,
      "learning_rate": 5.960016604837807e-06,
      "loss": 0.0143,
      "step": 1246240
    },
    {
      "epoch": 2.0395318238055027,
      "grad_norm": 0.6012980341911316,
      "learning_rate": 5.959950712624289e-06,
      "loss": 0.0137,
      "step": 1246260
    },
    {
      "epoch": 2.039564554244156,
      "grad_norm": 1.1508077383041382,
      "learning_rate": 5.959884820410773e-06,
      "loss": 0.02,
      "step": 1246280
    },
    {
      "epoch": 2.0395972846828094,
      "grad_norm": 0.3912926912307739,
      "learning_rate": 5.9598189281972555e-06,
      "loss": 0.0107,
      "step": 1246300
    },
    {
      "epoch": 2.0396300151214626,
      "grad_norm": 0.92354816198349,
      "learning_rate": 5.959753035983738e-06,
      "loss": 0.0149,
      "step": 1246320
    },
    {
      "epoch": 2.039662745560116,
      "grad_norm": 0.19975869357585907,
      "learning_rate": 5.959687143770221e-06,
      "loss": 0.0208,
      "step": 1246340
    },
    {
      "epoch": 2.0396954759987693,
      "grad_norm": 0.1087682694196701,
      "learning_rate": 5.9596212515567046e-06,
      "loss": 0.0122,
      "step": 1246360
    },
    {
      "epoch": 2.0397282064374225,
      "grad_norm": 0.8595012426376343,
      "learning_rate": 5.9595553593431864e-06,
      "loss": 0.0122,
      "step": 1246380
    },
    {
      "epoch": 2.039760936876076,
      "grad_norm": 0.21092484891414642,
      "learning_rate": 5.95948946712967e-06,
      "loss": 0.0196,
      "step": 1246400
    },
    {
      "epoch": 2.039793667314729,
      "grad_norm": 0.8450123071670532,
      "learning_rate": 5.959423574916152e-06,
      "loss": 0.0137,
      "step": 1246420
    },
    {
      "epoch": 2.039826397753383,
      "grad_norm": 0.5212904810905457,
      "learning_rate": 5.9593576827026355e-06,
      "loss": 0.0127,
      "step": 1246440
    },
    {
      "epoch": 2.039859128192036,
      "grad_norm": 0.22832739353179932,
      "learning_rate": 5.959291790489118e-06,
      "loss": 0.0216,
      "step": 1246460
    },
    {
      "epoch": 2.0398918586306896,
      "grad_norm": 0.37757408618927,
      "learning_rate": 5.959225898275601e-06,
      "loss": 0.0146,
      "step": 1246480
    },
    {
      "epoch": 2.0399245890693427,
      "grad_norm": 0.8261735439300537,
      "learning_rate": 5.959160006062085e-06,
      "loss": 0.0165,
      "step": 1246500
    },
    {
      "epoch": 2.039957319507996,
      "grad_norm": 0.37300530076026917,
      "learning_rate": 5.959094113848567e-06,
      "loss": 0.0171,
      "step": 1246520
    },
    {
      "epoch": 2.0399900499466495,
      "grad_norm": 1.0482243299484253,
      "learning_rate": 5.95902822163505e-06,
      "loss": 0.0195,
      "step": 1246540
    },
    {
      "epoch": 2.0400227803853026,
      "grad_norm": 0.44514650106430054,
      "learning_rate": 5.958962329421533e-06,
      "loss": 0.0203,
      "step": 1246560
    },
    {
      "epoch": 2.040055510823956,
      "grad_norm": 0.1883542686700821,
      "learning_rate": 5.958896437208016e-06,
      "loss": 0.0208,
      "step": 1246580
    },
    {
      "epoch": 2.0400882412626093,
      "grad_norm": 0.3237421214580536,
      "learning_rate": 5.958830544994498e-06,
      "loss": 0.0122,
      "step": 1246600
    },
    {
      "epoch": 2.040120971701263,
      "grad_norm": 0.23215658962726593,
      "learning_rate": 5.958764652780982e-06,
      "loss": 0.023,
      "step": 1246620
    },
    {
      "epoch": 2.040153702139916,
      "grad_norm": 0.236667662858963,
      "learning_rate": 5.958698760567464e-06,
      "loss": 0.0194,
      "step": 1246640
    },
    {
      "epoch": 2.0401864325785692,
      "grad_norm": 0.3666260242462158,
      "learning_rate": 5.958632868353947e-06,
      "loss": 0.0134,
      "step": 1246660
    },
    {
      "epoch": 2.040219163017223,
      "grad_norm": 0.2735687494277954,
      "learning_rate": 5.95856697614043e-06,
      "loss": 0.0203,
      "step": 1246680
    },
    {
      "epoch": 2.040251893455876,
      "grad_norm": 0.4196928143501282,
      "learning_rate": 5.958501083926913e-06,
      "loss": 0.0129,
      "step": 1246700
    },
    {
      "epoch": 2.0402846238945296,
      "grad_norm": 0.32580357789993286,
      "learning_rate": 5.9584351917133956e-06,
      "loss": 0.0168,
      "step": 1246720
    },
    {
      "epoch": 2.0403173543331827,
      "grad_norm": 0.12821495532989502,
      "learning_rate": 5.958369299499879e-06,
      "loss": 0.0115,
      "step": 1246740
    },
    {
      "epoch": 2.0403500847718363,
      "grad_norm": 2.681145429611206,
      "learning_rate": 5.958303407286361e-06,
      "loss": 0.0166,
      "step": 1246760
    },
    {
      "epoch": 2.0403828152104895,
      "grad_norm": 0.7600249648094177,
      "learning_rate": 5.958237515072845e-06,
      "loss": 0.0233,
      "step": 1246780
    },
    {
      "epoch": 2.0404155456491426,
      "grad_norm": 0.0749993547797203,
      "learning_rate": 5.9581716228593265e-06,
      "loss": 0.0142,
      "step": 1246800
    },
    {
      "epoch": 2.040448276087796,
      "grad_norm": 0.7848251461982727,
      "learning_rate": 5.95810573064581e-06,
      "loss": 0.0192,
      "step": 1246820
    },
    {
      "epoch": 2.0404810065264494,
      "grad_norm": 0.24574220180511475,
      "learning_rate": 5.958039838432292e-06,
      "loss": 0.0167,
      "step": 1246840
    },
    {
      "epoch": 2.040513736965103,
      "grad_norm": 0.34321361780166626,
      "learning_rate": 5.957973946218776e-06,
      "loss": 0.0198,
      "step": 1246860
    },
    {
      "epoch": 2.040546467403756,
      "grad_norm": 0.29231762886047363,
      "learning_rate": 5.957908054005259e-06,
      "loss": 0.0188,
      "step": 1246880
    },
    {
      "epoch": 2.0405791978424093,
      "grad_norm": 0.18720436096191406,
      "learning_rate": 5.957842161791741e-06,
      "loss": 0.0168,
      "step": 1246900
    },
    {
      "epoch": 2.040611928281063,
      "grad_norm": 0.7851740121841431,
      "learning_rate": 5.957776269578225e-06,
      "loss": 0.0144,
      "step": 1246920
    },
    {
      "epoch": 2.040644658719716,
      "grad_norm": 0.4280641973018646,
      "learning_rate": 5.957710377364707e-06,
      "loss": 0.0148,
      "step": 1246940
    },
    {
      "epoch": 2.0406773891583696,
      "grad_norm": 0.10959452390670776,
      "learning_rate": 5.95764448515119e-06,
      "loss": 0.0121,
      "step": 1246960
    },
    {
      "epoch": 2.0407101195970228,
      "grad_norm": 3.8181633949279785,
      "learning_rate": 5.957578592937673e-06,
      "loss": 0.0154,
      "step": 1246980
    },
    {
      "epoch": 2.0407428500356763,
      "grad_norm": 0.27302777767181396,
      "learning_rate": 5.9575127007241565e-06,
      "loss": 0.0223,
      "step": 1247000
    },
    {
      "epoch": 2.0407755804743295,
      "grad_norm": 0.7692083120346069,
      "learning_rate": 5.957446808510638e-06,
      "loss": 0.0205,
      "step": 1247020
    },
    {
      "epoch": 2.0408083109129826,
      "grad_norm": 0.6946594715118408,
      "learning_rate": 5.957380916297122e-06,
      "loss": 0.0162,
      "step": 1247040
    },
    {
      "epoch": 2.0408410413516362,
      "grad_norm": 0.6372349262237549,
      "learning_rate": 5.957315024083604e-06,
      "loss": 0.0112,
      "step": 1247060
    },
    {
      "epoch": 2.0408737717902894,
      "grad_norm": 0.6847140192985535,
      "learning_rate": 5.957249131870087e-06,
      "loss": 0.0159,
      "step": 1247080
    },
    {
      "epoch": 2.040906502228943,
      "grad_norm": 0.2606847882270813,
      "learning_rate": 5.95718323965657e-06,
      "loss": 0.0088,
      "step": 1247100
    },
    {
      "epoch": 2.040939232667596,
      "grad_norm": 0.21162889897823334,
      "learning_rate": 5.957117347443053e-06,
      "loss": 0.0205,
      "step": 1247120
    },
    {
      "epoch": 2.0409719631062497,
      "grad_norm": 0.261601060628891,
      "learning_rate": 5.957051455229536e-06,
      "loss": 0.021,
      "step": 1247140
    },
    {
      "epoch": 2.041004693544903,
      "grad_norm": 0.05861455202102661,
      "learning_rate": 5.956985563016019e-06,
      "loss": 0.0191,
      "step": 1247160
    },
    {
      "epoch": 2.041037423983556,
      "grad_norm": 0.5030899047851562,
      "learning_rate": 5.956919670802501e-06,
      "loss": 0.018,
      "step": 1247180
    },
    {
      "epoch": 2.0410701544222096,
      "grad_norm": 0.5353884100914001,
      "learning_rate": 5.956853778588985e-06,
      "loss": 0.0183,
      "step": 1247200
    },
    {
      "epoch": 2.0411028848608628,
      "grad_norm": 0.6916801333427429,
      "learning_rate": 5.956787886375468e-06,
      "loss": 0.0193,
      "step": 1247220
    },
    {
      "epoch": 2.0411356152995164,
      "grad_norm": 0.1497783064842224,
      "learning_rate": 5.95672199416195e-06,
      "loss": 0.0174,
      "step": 1247240
    },
    {
      "epoch": 2.0411683457381695,
      "grad_norm": 0.478426456451416,
      "learning_rate": 5.956656101948434e-06,
      "loss": 0.0144,
      "step": 1247260
    },
    {
      "epoch": 2.041201076176823,
      "grad_norm": 0.415833443403244,
      "learning_rate": 5.956590209734916e-06,
      "loss": 0.019,
      "step": 1247280
    },
    {
      "epoch": 2.0412338066154763,
      "grad_norm": 0.14430023729801178,
      "learning_rate": 5.956524317521399e-06,
      "loss": 0.0184,
      "step": 1247300
    },
    {
      "epoch": 2.0412665370541294,
      "grad_norm": 0.20159423351287842,
      "learning_rate": 5.956458425307882e-06,
      "loss": 0.0167,
      "step": 1247320
    },
    {
      "epoch": 2.041299267492783,
      "grad_norm": 0.15941175818443298,
      "learning_rate": 5.956392533094365e-06,
      "loss": 0.0111,
      "step": 1247340
    },
    {
      "epoch": 2.041331997931436,
      "grad_norm": 0.5110167264938354,
      "learning_rate": 5.9563266408808475e-06,
      "loss": 0.0093,
      "step": 1247360
    },
    {
      "epoch": 2.0413647283700898,
      "grad_norm": 0.6722974181175232,
      "learning_rate": 5.956260748667331e-06,
      "loss": 0.0142,
      "step": 1247380
    },
    {
      "epoch": 2.041397458808743,
      "grad_norm": 0.1741665005683899,
      "learning_rate": 5.956194856453813e-06,
      "loss": 0.0192,
      "step": 1247400
    },
    {
      "epoch": 2.0414301892473965,
      "grad_norm": 0.2963144779205322,
      "learning_rate": 5.9561289642402965e-06,
      "loss": 0.0108,
      "step": 1247420
    },
    {
      "epoch": 2.0414629196860496,
      "grad_norm": 0.27649274468421936,
      "learning_rate": 5.9560630720267784e-06,
      "loss": 0.0201,
      "step": 1247440
    },
    {
      "epoch": 2.041495650124703,
      "grad_norm": 0.1974305957555771,
      "learning_rate": 5.955997179813262e-06,
      "loss": 0.0226,
      "step": 1247460
    },
    {
      "epoch": 2.0415283805633564,
      "grad_norm": 0.1259101927280426,
      "learning_rate": 5.955931287599745e-06,
      "loss": 0.0148,
      "step": 1247480
    },
    {
      "epoch": 2.0415611110020095,
      "grad_norm": 0.3745546340942383,
      "learning_rate": 5.9558653953862275e-06,
      "loss": 0.0189,
      "step": 1247500
    },
    {
      "epoch": 2.041593841440663,
      "grad_norm": 0.2391103357076645,
      "learning_rate": 5.95579950317271e-06,
      "loss": 0.0106,
      "step": 1247520
    },
    {
      "epoch": 2.0416265718793163,
      "grad_norm": 0.31545084714889526,
      "learning_rate": 5.955733610959194e-06,
      "loss": 0.0146,
      "step": 1247540
    },
    {
      "epoch": 2.04165930231797,
      "grad_norm": 0.3439311981201172,
      "learning_rate": 5.9556677187456766e-06,
      "loss": 0.0162,
      "step": 1247560
    },
    {
      "epoch": 2.041692032756623,
      "grad_norm": 0.4041936993598938,
      "learning_rate": 5.955601826532159e-06,
      "loss": 0.0129,
      "step": 1247580
    },
    {
      "epoch": 2.041724763195276,
      "grad_norm": 0.30643200874328613,
      "learning_rate": 5.955535934318643e-06,
      "loss": 0.0136,
      "step": 1247600
    },
    {
      "epoch": 2.0417574936339298,
      "grad_norm": 0.15539608895778656,
      "learning_rate": 5.955470042105125e-06,
      "loss": 0.0133,
      "step": 1247620
    },
    {
      "epoch": 2.041790224072583,
      "grad_norm": 0.7421714067459106,
      "learning_rate": 5.955404149891608e-06,
      "loss": 0.0182,
      "step": 1247640
    },
    {
      "epoch": 2.0418229545112365,
      "grad_norm": 0.3838181793689728,
      "learning_rate": 5.95533825767809e-06,
      "loss": 0.0128,
      "step": 1247660
    },
    {
      "epoch": 2.0418556849498897,
      "grad_norm": 0.3231995403766632,
      "learning_rate": 5.955272365464574e-06,
      "loss": 0.023,
      "step": 1247680
    },
    {
      "epoch": 2.041888415388543,
      "grad_norm": 0.3113024830818176,
      "learning_rate": 5.955206473251057e-06,
      "loss": 0.0148,
      "step": 1247700
    },
    {
      "epoch": 2.0419211458271964,
      "grad_norm": 0.23574429750442505,
      "learning_rate": 5.955140581037539e-06,
      "loss": 0.0153,
      "step": 1247720
    },
    {
      "epoch": 2.0419538762658496,
      "grad_norm": 1.1628342866897583,
      "learning_rate": 5.955074688824022e-06,
      "loss": 0.0261,
      "step": 1247740
    },
    {
      "epoch": 2.041986606704503,
      "grad_norm": 0.548324465751648,
      "learning_rate": 5.955008796610506e-06,
      "loss": 0.0168,
      "step": 1247760
    },
    {
      "epoch": 2.0420193371431563,
      "grad_norm": 0.23899151384830475,
      "learning_rate": 5.9549429043969875e-06,
      "loss": 0.0084,
      "step": 1247780
    },
    {
      "epoch": 2.04205206758181,
      "grad_norm": 0.3237060308456421,
      "learning_rate": 5.954877012183471e-06,
      "loss": 0.0199,
      "step": 1247800
    },
    {
      "epoch": 2.042084798020463,
      "grad_norm": 1.1634137630462646,
      "learning_rate": 5.954811119969953e-06,
      "loss": 0.0191,
      "step": 1247820
    },
    {
      "epoch": 2.042117528459116,
      "grad_norm": 0.761322557926178,
      "learning_rate": 5.954745227756437e-06,
      "loss": 0.0213,
      "step": 1247840
    },
    {
      "epoch": 2.04215025889777,
      "grad_norm": 0.3848573565483093,
      "learning_rate": 5.9546793355429185e-06,
      "loss": 0.0102,
      "step": 1247860
    },
    {
      "epoch": 2.042182989336423,
      "grad_norm": 1.3713356256484985,
      "learning_rate": 5.954613443329402e-06,
      "loss": 0.0166,
      "step": 1247880
    },
    {
      "epoch": 2.0422157197750765,
      "grad_norm": 0.3301560878753662,
      "learning_rate": 5.954547551115886e-06,
      "loss": 0.0147,
      "step": 1247900
    },
    {
      "epoch": 2.0422484502137297,
      "grad_norm": 0.7945975661277771,
      "learning_rate": 5.9544816589023676e-06,
      "loss": 0.0188,
      "step": 1247920
    },
    {
      "epoch": 2.0422811806523833,
      "grad_norm": 0.17335151135921478,
      "learning_rate": 5.954415766688851e-06,
      "loss": 0.0212,
      "step": 1247940
    },
    {
      "epoch": 2.0423139110910364,
      "grad_norm": 0.30536967515945435,
      "learning_rate": 5.954349874475334e-06,
      "loss": 0.0122,
      "step": 1247960
    },
    {
      "epoch": 2.0423466415296896,
      "grad_norm": 1.3765621185302734,
      "learning_rate": 5.954283982261817e-06,
      "loss": 0.0114,
      "step": 1247980
    },
    {
      "epoch": 2.042379371968343,
      "grad_norm": 0.31195518374443054,
      "learning_rate": 5.954218090048299e-06,
      "loss": 0.0178,
      "step": 1248000
    },
    {
      "epoch": 2.0424121024069963,
      "grad_norm": 0.4628739058971405,
      "learning_rate": 5.954152197834783e-06,
      "loss": 0.0202,
      "step": 1248020
    },
    {
      "epoch": 2.04244483284565,
      "grad_norm": 0.4455881118774414,
      "learning_rate": 5.954086305621265e-06,
      "loss": 0.0151,
      "step": 1248040
    },
    {
      "epoch": 2.042477563284303,
      "grad_norm": 1.4237678050994873,
      "learning_rate": 5.9540204134077484e-06,
      "loss": 0.0186,
      "step": 1248060
    },
    {
      "epoch": 2.0425102937229567,
      "grad_norm": 0.9137765765190125,
      "learning_rate": 5.95395452119423e-06,
      "loss": 0.0141,
      "step": 1248080
    },
    {
      "epoch": 2.04254302416161,
      "grad_norm": 0.7492512464523315,
      "learning_rate": 5.953888628980714e-06,
      "loss": 0.0206,
      "step": 1248100
    },
    {
      "epoch": 2.042575754600263,
      "grad_norm": 0.09862065315246582,
      "learning_rate": 5.953822736767197e-06,
      "loss": 0.0187,
      "step": 1248120
    },
    {
      "epoch": 2.0426084850389166,
      "grad_norm": 0.6518423557281494,
      "learning_rate": 5.953756844553679e-06,
      "loss": 0.012,
      "step": 1248140
    },
    {
      "epoch": 2.0426412154775697,
      "grad_norm": 0.2350812405347824,
      "learning_rate": 5.953690952340162e-06,
      "loss": 0.0128,
      "step": 1248160
    },
    {
      "epoch": 2.0426739459162233,
      "grad_norm": 0.3833848536014557,
      "learning_rate": 5.953625060126646e-06,
      "loss": 0.0216,
      "step": 1248180
    },
    {
      "epoch": 2.0427066763548765,
      "grad_norm": 0.437299519777298,
      "learning_rate": 5.953559167913128e-06,
      "loss": 0.0135,
      "step": 1248200
    },
    {
      "epoch": 2.04273940679353,
      "grad_norm": 1.8536369800567627,
      "learning_rate": 5.953493275699611e-06,
      "loss": 0.0174,
      "step": 1248220
    },
    {
      "epoch": 2.042772137232183,
      "grad_norm": 0.25288125872612,
      "learning_rate": 5.953427383486093e-06,
      "loss": 0.0246,
      "step": 1248240
    },
    {
      "epoch": 2.0428048676708364,
      "grad_norm": 0.49584200978279114,
      "learning_rate": 5.953361491272577e-06,
      "loss": 0.0128,
      "step": 1248260
    },
    {
      "epoch": 2.04283759810949,
      "grad_norm": 0.18220047652721405,
      "learning_rate": 5.95329559905906e-06,
      "loss": 0.022,
      "step": 1248280
    },
    {
      "epoch": 2.042870328548143,
      "grad_norm": 0.5144750475883484,
      "learning_rate": 5.953229706845542e-06,
      "loss": 0.0117,
      "step": 1248300
    },
    {
      "epoch": 2.0429030589867967,
      "grad_norm": 0.09326963871717453,
      "learning_rate": 5.953163814632026e-06,
      "loss": 0.0183,
      "step": 1248320
    },
    {
      "epoch": 2.04293578942545,
      "grad_norm": 2.229400396347046,
      "learning_rate": 5.9530979224185085e-06,
      "loss": 0.0182,
      "step": 1248340
    },
    {
      "epoch": 2.042968519864103,
      "grad_norm": 0.5646975636482239,
      "learning_rate": 5.953032030204991e-06,
      "loss": 0.0158,
      "step": 1248360
    },
    {
      "epoch": 2.0430012503027566,
      "grad_norm": 0.20569436252117157,
      "learning_rate": 5.952966137991474e-06,
      "loss": 0.0113,
      "step": 1248380
    },
    {
      "epoch": 2.0430339807414097,
      "grad_norm": 0.24702005088329315,
      "learning_rate": 5.9529002457779576e-06,
      "loss": 0.0188,
      "step": 1248400
    },
    {
      "epoch": 2.0430667111800633,
      "grad_norm": 0.2834119200706482,
      "learning_rate": 5.9528343535644395e-06,
      "loss": 0.0204,
      "step": 1248420
    },
    {
      "epoch": 2.0430994416187165,
      "grad_norm": 0.8432003855705261,
      "learning_rate": 5.952768461350923e-06,
      "loss": 0.0164,
      "step": 1248440
    },
    {
      "epoch": 2.04313217205737,
      "grad_norm": 0.2206581085920334,
      "learning_rate": 5.952702569137405e-06,
      "loss": 0.0137,
      "step": 1248460
    },
    {
      "epoch": 2.0431649024960232,
      "grad_norm": 0.46839484572410583,
      "learning_rate": 5.9526366769238885e-06,
      "loss": 0.0169,
      "step": 1248480
    },
    {
      "epoch": 2.0431976329346764,
      "grad_norm": 0.39143243432044983,
      "learning_rate": 5.952570784710371e-06,
      "loss": 0.0248,
      "step": 1248500
    },
    {
      "epoch": 2.04323036337333,
      "grad_norm": 0.6694062352180481,
      "learning_rate": 5.952504892496854e-06,
      "loss": 0.0167,
      "step": 1248520
    },
    {
      "epoch": 2.043263093811983,
      "grad_norm": 0.14763537049293518,
      "learning_rate": 5.952439000283337e-06,
      "loss": 0.0113,
      "step": 1248540
    },
    {
      "epoch": 2.0432958242506367,
      "grad_norm": 0.5098245739936829,
      "learning_rate": 5.95237310806982e-06,
      "loss": 0.0106,
      "step": 1248560
    },
    {
      "epoch": 2.04332855468929,
      "grad_norm": 1.1947513818740845,
      "learning_rate": 5.952307215856302e-06,
      "loss": 0.017,
      "step": 1248580
    },
    {
      "epoch": 2.0433612851279435,
      "grad_norm": 0.37611350417137146,
      "learning_rate": 5.952241323642786e-06,
      "loss": 0.0188,
      "step": 1248600
    },
    {
      "epoch": 2.0433940155665966,
      "grad_norm": 0.22330063581466675,
      "learning_rate": 5.952175431429269e-06,
      "loss": 0.0153,
      "step": 1248620
    },
    {
      "epoch": 2.0434267460052498,
      "grad_norm": 0.29029157757759094,
      "learning_rate": 5.952109539215751e-06,
      "loss": 0.0148,
      "step": 1248640
    },
    {
      "epoch": 2.0434594764439034,
      "grad_norm": 0.20708943903446198,
      "learning_rate": 5.952043647002235e-06,
      "loss": 0.0151,
      "step": 1248660
    },
    {
      "epoch": 2.0434922068825565,
      "grad_norm": 0.20396621525287628,
      "learning_rate": 5.951977754788717e-06,
      "loss": 0.0119,
      "step": 1248680
    },
    {
      "epoch": 2.04352493732121,
      "grad_norm": 0.41701701283454895,
      "learning_rate": 5.9519118625752e-06,
      "loss": 0.0154,
      "step": 1248700
    },
    {
      "epoch": 2.0435576677598633,
      "grad_norm": 0.6480998992919922,
      "learning_rate": 5.951845970361682e-06,
      "loss": 0.0149,
      "step": 1248720
    },
    {
      "epoch": 2.043590398198517,
      "grad_norm": 0.2416963130235672,
      "learning_rate": 5.951780078148166e-06,
      "loss": 0.0118,
      "step": 1248740
    },
    {
      "epoch": 2.04362312863717,
      "grad_norm": 0.2770722508430481,
      "learning_rate": 5.9517141859346486e-06,
      "loss": 0.0216,
      "step": 1248760
    },
    {
      "epoch": 2.043655859075823,
      "grad_norm": 0.48194658756256104,
      "learning_rate": 5.951648293721131e-06,
      "loss": 0.0126,
      "step": 1248780
    },
    {
      "epoch": 2.0436885895144767,
      "grad_norm": 1.2909737825393677,
      "learning_rate": 5.951582401507614e-06,
      "loss": 0.0131,
      "step": 1248800
    },
    {
      "epoch": 2.04372131995313,
      "grad_norm": 0.4785730242729187,
      "learning_rate": 5.951516509294098e-06,
      "loss": 0.0201,
      "step": 1248820
    },
    {
      "epoch": 2.0437540503917835,
      "grad_norm": 0.5162619948387146,
      "learning_rate": 5.9514506170805795e-06,
      "loss": 0.0245,
      "step": 1248840
    },
    {
      "epoch": 2.0437867808304366,
      "grad_norm": 0.6212587356567383,
      "learning_rate": 5.951384724867063e-06,
      "loss": 0.0183,
      "step": 1248860
    },
    {
      "epoch": 2.0438195112690902,
      "grad_norm": 0.2958259880542755,
      "learning_rate": 5.951318832653545e-06,
      "loss": 0.0104,
      "step": 1248880
    },
    {
      "epoch": 2.0438522417077434,
      "grad_norm": 0.38171347975730896,
      "learning_rate": 5.951252940440029e-06,
      "loss": 0.0102,
      "step": 1248900
    },
    {
      "epoch": 2.0438849721463965,
      "grad_norm": 0.8615254759788513,
      "learning_rate": 5.951187048226511e-06,
      "loss": 0.0131,
      "step": 1248920
    },
    {
      "epoch": 2.04391770258505,
      "grad_norm": 0.2504204213619232,
      "learning_rate": 5.951121156012994e-06,
      "loss": 0.0085,
      "step": 1248940
    },
    {
      "epoch": 2.0439504330237033,
      "grad_norm": 0.7170212268829346,
      "learning_rate": 5.951055263799478e-06,
      "loss": 0.0141,
      "step": 1248960
    },
    {
      "epoch": 2.043983163462357,
      "grad_norm": 0.28201887011528015,
      "learning_rate": 5.95098937158596e-06,
      "loss": 0.0262,
      "step": 1248980
    },
    {
      "epoch": 2.04401589390101,
      "grad_norm": 0.6317047476768494,
      "learning_rate": 5.950923479372443e-06,
      "loss": 0.0181,
      "step": 1249000
    },
    {
      "epoch": 2.0440486243396636,
      "grad_norm": 0.5293687582015991,
      "learning_rate": 5.950857587158926e-06,
      "loss": 0.013,
      "step": 1249020
    },
    {
      "epoch": 2.0440813547783168,
      "grad_norm": 0.2838505208492279,
      "learning_rate": 5.9507916949454095e-06,
      "loss": 0.0152,
      "step": 1249040
    },
    {
      "epoch": 2.04411408521697,
      "grad_norm": 0.21702252328395844,
      "learning_rate": 5.950725802731891e-06,
      "loss": 0.0131,
      "step": 1249060
    },
    {
      "epoch": 2.0441468156556235,
      "grad_norm": 0.5534514784812927,
      "learning_rate": 5.950659910518375e-06,
      "loss": 0.0188,
      "step": 1249080
    },
    {
      "epoch": 2.0441795460942767,
      "grad_norm": 1.5283533334732056,
      "learning_rate": 5.950594018304857e-06,
      "loss": 0.0134,
      "step": 1249100
    },
    {
      "epoch": 2.0442122765329303,
      "grad_norm": 0.35880765318870544,
      "learning_rate": 5.9505281260913404e-06,
      "loss": 0.0195,
      "step": 1249120
    },
    {
      "epoch": 2.0442450069715834,
      "grad_norm": 0.29539424180984497,
      "learning_rate": 5.950462233877823e-06,
      "loss": 0.012,
      "step": 1249140
    },
    {
      "epoch": 2.044277737410237,
      "grad_norm": 0.26566964387893677,
      "learning_rate": 5.950396341664306e-06,
      "loss": 0.0134,
      "step": 1249160
    },
    {
      "epoch": 2.04431046784889,
      "grad_norm": 0.24898728728294373,
      "learning_rate": 5.950330449450789e-06,
      "loss": 0.0102,
      "step": 1249180
    },
    {
      "epoch": 2.0443431982875433,
      "grad_norm": 0.2907848656177521,
      "learning_rate": 5.950264557237272e-06,
      "loss": 0.0138,
      "step": 1249200
    },
    {
      "epoch": 2.044375928726197,
      "grad_norm": 1.3114049434661865,
      "learning_rate": 5.950198665023754e-06,
      "loss": 0.0118,
      "step": 1249220
    },
    {
      "epoch": 2.04440865916485,
      "grad_norm": 0.14948920905590057,
      "learning_rate": 5.950132772810238e-06,
      "loss": 0.019,
      "step": 1249240
    },
    {
      "epoch": 2.0444413896035036,
      "grad_norm": 0.18993650376796722,
      "learning_rate": 5.95006688059672e-06,
      "loss": 0.0163,
      "step": 1249260
    },
    {
      "epoch": 2.044474120042157,
      "grad_norm": 0.31961098313331604,
      "learning_rate": 5.950000988383203e-06,
      "loss": 0.0144,
      "step": 1249280
    },
    {
      "epoch": 2.04450685048081,
      "grad_norm": 0.6707789301872253,
      "learning_rate": 5.949935096169686e-06,
      "loss": 0.0177,
      "step": 1249300
    },
    {
      "epoch": 2.0445395809194635,
      "grad_norm": 0.29880857467651367,
      "learning_rate": 5.949869203956169e-06,
      "loss": 0.018,
      "step": 1249320
    },
    {
      "epoch": 2.0445723113581167,
      "grad_norm": 0.3245321810245514,
      "learning_rate": 5.949803311742652e-06,
      "loss": 0.0106,
      "step": 1249340
    },
    {
      "epoch": 2.0446050417967703,
      "grad_norm": 0.1671445220708847,
      "learning_rate": 5.949737419529135e-06,
      "loss": 0.0184,
      "step": 1249360
    },
    {
      "epoch": 2.0446377722354234,
      "grad_norm": 0.41032543778419495,
      "learning_rate": 5.949671527315618e-06,
      "loss": 0.0214,
      "step": 1249380
    },
    {
      "epoch": 2.044670502674077,
      "grad_norm": 0.44046494364738464,
      "learning_rate": 5.9496056351021005e-06,
      "loss": 0.0187,
      "step": 1249400
    },
    {
      "epoch": 2.04470323311273,
      "grad_norm": 0.027196792885661125,
      "learning_rate": 5.949539742888584e-06,
      "loss": 0.0134,
      "step": 1249420
    },
    {
      "epoch": 2.0447359635513833,
      "grad_norm": 0.6644960641860962,
      "learning_rate": 5.949473850675066e-06,
      "loss": 0.016,
      "step": 1249440
    },
    {
      "epoch": 2.044768693990037,
      "grad_norm": 0.5355489253997803,
      "learning_rate": 5.9494079584615495e-06,
      "loss": 0.0139,
      "step": 1249460
    },
    {
      "epoch": 2.04480142442869,
      "grad_norm": 0.39128366112709045,
      "learning_rate": 5.9493420662480314e-06,
      "loss": 0.0104,
      "step": 1249480
    },
    {
      "epoch": 2.0448341548673437,
      "grad_norm": 0.43355458974838257,
      "learning_rate": 5.949276174034515e-06,
      "loss": 0.0167,
      "step": 1249500
    },
    {
      "epoch": 2.044866885305997,
      "grad_norm": 0.21475106477737427,
      "learning_rate": 5.949210281820998e-06,
      "loss": 0.0183,
      "step": 1249520
    },
    {
      "epoch": 2.0448996157446504,
      "grad_norm": 0.20382100343704224,
      "learning_rate": 5.9491443896074805e-06,
      "loss": 0.0177,
      "step": 1249540
    },
    {
      "epoch": 2.0449323461833036,
      "grad_norm": 0.2533693015575409,
      "learning_rate": 5.949078497393963e-06,
      "loss": 0.017,
      "step": 1249560
    },
    {
      "epoch": 2.0449650766219567,
      "grad_norm": 0.1710207164287567,
      "learning_rate": 5.949012605180447e-06,
      "loss": 0.0137,
      "step": 1249580
    },
    {
      "epoch": 2.0449978070606103,
      "grad_norm": 0.0666850358247757,
      "learning_rate": 5.948946712966929e-06,
      "loss": 0.0121,
      "step": 1249600
    },
    {
      "epoch": 2.0450305374992634,
      "grad_norm": 0.26829612255096436,
      "learning_rate": 5.948880820753412e-06,
      "loss": 0.0136,
      "step": 1249620
    },
    {
      "epoch": 2.045063267937917,
      "grad_norm": 0.23958882689476013,
      "learning_rate": 5.948814928539894e-06,
      "loss": 0.0226,
      "step": 1249640
    },
    {
      "epoch": 2.04509599837657,
      "grad_norm": 0.42440441250801086,
      "learning_rate": 5.948749036326378e-06,
      "loss": 0.013,
      "step": 1249660
    },
    {
      "epoch": 2.045128728815224,
      "grad_norm": 0.3478694558143616,
      "learning_rate": 5.948683144112861e-06,
      "loss": 0.01,
      "step": 1249680
    },
    {
      "epoch": 2.045161459253877,
      "grad_norm": 0.4204288125038147,
      "learning_rate": 5.948617251899343e-06,
      "loss": 0.0121,
      "step": 1249700
    },
    {
      "epoch": 2.04519418969253,
      "grad_norm": 0.3073505163192749,
      "learning_rate": 5.948551359685827e-06,
      "loss": 0.0218,
      "step": 1249720
    },
    {
      "epoch": 2.0452269201311837,
      "grad_norm": 0.43878036737442017,
      "learning_rate": 5.948485467472309e-06,
      "loss": 0.0136,
      "step": 1249740
    },
    {
      "epoch": 2.045259650569837,
      "grad_norm": 0.12227392941713333,
      "learning_rate": 5.948419575258792e-06,
      "loss": 0.0196,
      "step": 1249760
    },
    {
      "epoch": 2.0452923810084904,
      "grad_norm": 1.0711590051651,
      "learning_rate": 5.948353683045275e-06,
      "loss": 0.0226,
      "step": 1249780
    },
    {
      "epoch": 2.0453251114471436,
      "grad_norm": 0.519009530544281,
      "learning_rate": 5.948287790831758e-06,
      "loss": 0.0285,
      "step": 1249800
    },
    {
      "epoch": 2.045357841885797,
      "grad_norm": 0.12740592658519745,
      "learning_rate": 5.9482218986182406e-06,
      "loss": 0.0128,
      "step": 1249820
    },
    {
      "epoch": 2.0453905723244503,
      "grad_norm": 0.19982346892356873,
      "learning_rate": 5.948156006404724e-06,
      "loss": 0.0165,
      "step": 1249840
    },
    {
      "epoch": 2.0454233027631035,
      "grad_norm": 0.40072938799858093,
      "learning_rate": 5.948090114191206e-06,
      "loss": 0.0164,
      "step": 1249860
    },
    {
      "epoch": 2.045456033201757,
      "grad_norm": 0.7561532855033875,
      "learning_rate": 5.94802422197769e-06,
      "loss": 0.012,
      "step": 1249880
    },
    {
      "epoch": 2.04548876364041,
      "grad_norm": 0.969847559928894,
      "learning_rate": 5.9479583297641715e-06,
      "loss": 0.0225,
      "step": 1249900
    },
    {
      "epoch": 2.045521494079064,
      "grad_norm": 0.5211102962493896,
      "learning_rate": 5.947892437550655e-06,
      "loss": 0.0153,
      "step": 1249920
    },
    {
      "epoch": 2.045554224517717,
      "grad_norm": 0.4642140865325928,
      "learning_rate": 5.947826545337138e-06,
      "loss": 0.0163,
      "step": 1249940
    },
    {
      "epoch": 2.04558695495637,
      "grad_norm": 0.4062453806400299,
      "learning_rate": 5.947760653123621e-06,
      "loss": 0.0144,
      "step": 1249960
    },
    {
      "epoch": 2.0456196853950237,
      "grad_norm": 0.16497178375720978,
      "learning_rate": 5.947694760910103e-06,
      "loss": 0.0129,
      "step": 1249980
    },
    {
      "epoch": 2.045652415833677,
      "grad_norm": 0.42169901728630066,
      "learning_rate": 5.947628868696587e-06,
      "loss": 0.0118,
      "step": 1250000
    },
    {
      "epoch": 2.045652415833677,
      "eval_loss": 0.009147759526968002,
      "eval_runtime": 6518.8286,
      "eval_samples_per_second": 157.675,
      "eval_steps_per_second": 15.768,
      "eval_sts-dev_pearson_cosine": 0.9787419127924516,
      "eval_sts-dev_spearman_cosine": 0.8915347940164415,
      "step": 1250000
    },
    {
      "epoch": 2.0456851462723304,
      "grad_norm": 0.32302892208099365,
      "learning_rate": 5.94756297648307e-06,
      "loss": 0.0229,
      "step": 1250020
    },
    {
      "epoch": 2.0457178767109836,
      "grad_norm": 2.8558506965637207,
      "learning_rate": 5.947497084269552e-06,
      "loss": 0.0164,
      "step": 1250040
    },
    {
      "epoch": 2.045750607149637,
      "grad_norm": 0.971304714679718,
      "learning_rate": 5.947431192056036e-06,
      "loss": 0.0177,
      "step": 1250060
    },
    {
      "epoch": 2.0457833375882903,
      "grad_norm": 0.5541622042655945,
      "learning_rate": 5.947365299842518e-06,
      "loss": 0.0144,
      "step": 1250080
    },
    {
      "epoch": 2.0458160680269435,
      "grad_norm": 0.7863869071006775,
      "learning_rate": 5.9472994076290014e-06,
      "loss": 0.0151,
      "step": 1250100
    },
    {
      "epoch": 2.045848798465597,
      "grad_norm": 0.10948201268911362,
      "learning_rate": 5.947233515415483e-06,
      "loss": 0.0111,
      "step": 1250120
    },
    {
      "epoch": 2.0458815289042502,
      "grad_norm": 0.2607559859752655,
      "learning_rate": 5.947167623201967e-06,
      "loss": 0.0177,
      "step": 1250140
    },
    {
      "epoch": 2.045914259342904,
      "grad_norm": 0.514607846736908,
      "learning_rate": 5.94710173098845e-06,
      "loss": 0.0178,
      "step": 1250160
    },
    {
      "epoch": 2.045946989781557,
      "grad_norm": 0.41258537769317627,
      "learning_rate": 5.947035838774932e-06,
      "loss": 0.0123,
      "step": 1250180
    },
    {
      "epoch": 2.0459797202202106,
      "grad_norm": 0.22408582270145416,
      "learning_rate": 5.946969946561415e-06,
      "loss": 0.0148,
      "step": 1250200
    },
    {
      "epoch": 2.0460124506588637,
      "grad_norm": 0.11030520498752594,
      "learning_rate": 5.946904054347899e-06,
      "loss": 0.014,
      "step": 1250220
    },
    {
      "epoch": 2.046045181097517,
      "grad_norm": 0.5710016489028931,
      "learning_rate": 5.946838162134381e-06,
      "loss": 0.0188,
      "step": 1250240
    },
    {
      "epoch": 2.0460779115361705,
      "grad_norm": 0.2708614468574524,
      "learning_rate": 5.946772269920864e-06,
      "loss": 0.0148,
      "step": 1250260
    },
    {
      "epoch": 2.0461106419748236,
      "grad_norm": 0.2815699577331543,
      "learning_rate": 5.946706377707346e-06,
      "loss": 0.0167,
      "step": 1250280
    },
    {
      "epoch": 2.046143372413477,
      "grad_norm": 0.2035132646560669,
      "learning_rate": 5.94664048549383e-06,
      "loss": 0.0164,
      "step": 1250300
    },
    {
      "epoch": 2.0461761028521304,
      "grad_norm": 1.285306692123413,
      "learning_rate": 5.9465745932803124e-06,
      "loss": 0.0137,
      "step": 1250320
    },
    {
      "epoch": 2.046208833290784,
      "grad_norm": 0.12807850539684296,
      "learning_rate": 5.946508701066795e-06,
      "loss": 0.012,
      "step": 1250340
    },
    {
      "epoch": 2.046241563729437,
      "grad_norm": 0.5708789825439453,
      "learning_rate": 5.946442808853279e-06,
      "loss": 0.0211,
      "step": 1250360
    },
    {
      "epoch": 2.0462742941680903,
      "grad_norm": 0.4764057695865631,
      "learning_rate": 5.9463769166397615e-06,
      "loss": 0.015,
      "step": 1250380
    },
    {
      "epoch": 2.046307024606744,
      "grad_norm": 0.3203290104866028,
      "learning_rate": 5.946311024426244e-06,
      "loss": 0.0145,
      "step": 1250400
    },
    {
      "epoch": 2.046339755045397,
      "grad_norm": 0.530704140663147,
      "learning_rate": 5.946245132212727e-06,
      "loss": 0.0198,
      "step": 1250420
    },
    {
      "epoch": 2.0463724854840506,
      "grad_norm": 0.5508776903152466,
      "learning_rate": 5.9461792399992106e-06,
      "loss": 0.011,
      "step": 1250440
    },
    {
      "epoch": 2.0464052159227037,
      "grad_norm": 0.26161083579063416,
      "learning_rate": 5.9461133477856925e-06,
      "loss": 0.0159,
      "step": 1250460
    },
    {
      "epoch": 2.0464379463613573,
      "grad_norm": 0.41862717270851135,
      "learning_rate": 5.946047455572176e-06,
      "loss": 0.0119,
      "step": 1250480
    },
    {
      "epoch": 2.0464706768000105,
      "grad_norm": 0.18624120950698853,
      "learning_rate": 5.945981563358658e-06,
      "loss": 0.0163,
      "step": 1250500
    },
    {
      "epoch": 2.0465034072386636,
      "grad_norm": 0.18218016624450684,
      "learning_rate": 5.9459156711451415e-06,
      "loss": 0.0182,
      "step": 1250520
    },
    {
      "epoch": 2.0465361376773172,
      "grad_norm": 0.35446977615356445,
      "learning_rate": 5.945849778931624e-06,
      "loss": 0.0119,
      "step": 1250540
    },
    {
      "epoch": 2.0465688681159704,
      "grad_norm": 0.5838800072669983,
      "learning_rate": 5.945783886718107e-06,
      "loss": 0.017,
      "step": 1250560
    },
    {
      "epoch": 2.046601598554624,
      "grad_norm": 0.242461159825325,
      "learning_rate": 5.94571799450459e-06,
      "loss": 0.0093,
      "step": 1250580
    },
    {
      "epoch": 2.046634328993277,
      "grad_norm": 1.2007718086242676,
      "learning_rate": 5.945652102291073e-06,
      "loss": 0.0191,
      "step": 1250600
    },
    {
      "epoch": 2.0466670594319307,
      "grad_norm": 0.15301506221294403,
      "learning_rate": 5.945586210077555e-06,
      "loss": 0.0145,
      "step": 1250620
    },
    {
      "epoch": 2.046699789870584,
      "grad_norm": 0.36484500765800476,
      "learning_rate": 5.945520317864039e-06,
      "loss": 0.012,
      "step": 1250640
    },
    {
      "epoch": 2.046732520309237,
      "grad_norm": 0.13093408942222595,
      "learning_rate": 5.945454425650521e-06,
      "loss": 0.013,
      "step": 1250660
    },
    {
      "epoch": 2.0467652507478906,
      "grad_norm": 0.4115165174007416,
      "learning_rate": 5.945388533437004e-06,
      "loss": 0.0216,
      "step": 1250680
    },
    {
      "epoch": 2.0467979811865438,
      "grad_norm": 0.20006412267684937,
      "learning_rate": 5.945322641223486e-06,
      "loss": 0.0161,
      "step": 1250700
    },
    {
      "epoch": 2.0468307116251974,
      "grad_norm": 0.3578372597694397,
      "learning_rate": 5.94525674900997e-06,
      "loss": 0.0129,
      "step": 1250720
    },
    {
      "epoch": 2.0468634420638505,
      "grad_norm": 0.37600401043891907,
      "learning_rate": 5.945190856796453e-06,
      "loss": 0.0126,
      "step": 1250740
    },
    {
      "epoch": 2.0468961725025037,
      "grad_norm": 0.11693120002746582,
      "learning_rate": 5.945124964582935e-06,
      "loss": 0.0145,
      "step": 1250760
    },
    {
      "epoch": 2.0469289029411573,
      "grad_norm": 0.7731480002403259,
      "learning_rate": 5.945059072369419e-06,
      "loss": 0.0177,
      "step": 1250780
    },
    {
      "epoch": 2.0469616333798104,
      "grad_norm": 0.7651594877243042,
      "learning_rate": 5.9449931801559016e-06,
      "loss": 0.016,
      "step": 1250800
    },
    {
      "epoch": 2.046994363818464,
      "grad_norm": 0.22224995493888855,
      "learning_rate": 5.944927287942384e-06,
      "loss": 0.0188,
      "step": 1250820
    },
    {
      "epoch": 2.047027094257117,
      "grad_norm": 0.9305557608604431,
      "learning_rate": 5.944861395728867e-06,
      "loss": 0.0193,
      "step": 1250840
    },
    {
      "epoch": 2.0470598246957707,
      "grad_norm": 0.48822852969169617,
      "learning_rate": 5.944795503515351e-06,
      "loss": 0.0187,
      "step": 1250860
    },
    {
      "epoch": 2.047092555134424,
      "grad_norm": 0.7801545262336731,
      "learning_rate": 5.9447296113018325e-06,
      "loss": 0.0135,
      "step": 1250880
    },
    {
      "epoch": 2.047125285573077,
      "grad_norm": 0.840059220790863,
      "learning_rate": 5.944663719088316e-06,
      "loss": 0.0142,
      "step": 1250900
    },
    {
      "epoch": 2.0471580160117306,
      "grad_norm": 0.2654978930950165,
      "learning_rate": 5.944597826874798e-06,
      "loss": 0.0174,
      "step": 1250920
    },
    {
      "epoch": 2.047190746450384,
      "grad_norm": 0.9938622713088989,
      "learning_rate": 5.944531934661282e-06,
      "loss": 0.0186,
      "step": 1250940
    },
    {
      "epoch": 2.0472234768890374,
      "grad_norm": 0.6333704590797424,
      "learning_rate": 5.944466042447764e-06,
      "loss": 0.0155,
      "step": 1250960
    },
    {
      "epoch": 2.0472562073276905,
      "grad_norm": 1.3482580184936523,
      "learning_rate": 5.944400150234247e-06,
      "loss": 0.0148,
      "step": 1250980
    },
    {
      "epoch": 2.047288937766344,
      "grad_norm": 0.15474997460842133,
      "learning_rate": 5.94433425802073e-06,
      "loss": 0.0134,
      "step": 1251000
    },
    {
      "epoch": 2.0473216682049973,
      "grad_norm": 0.4840237498283386,
      "learning_rate": 5.944268365807213e-06,
      "loss": 0.0199,
      "step": 1251020
    },
    {
      "epoch": 2.0473543986436504,
      "grad_norm": 0.19936010241508484,
      "learning_rate": 5.944202473593695e-06,
      "loss": 0.0167,
      "step": 1251040
    },
    {
      "epoch": 2.047387129082304,
      "grad_norm": 0.7726275324821472,
      "learning_rate": 5.944136581380179e-06,
      "loss": 0.0092,
      "step": 1251060
    },
    {
      "epoch": 2.047419859520957,
      "grad_norm": 0.6958503723144531,
      "learning_rate": 5.9440706891666625e-06,
      "loss": 0.0123,
      "step": 1251080
    },
    {
      "epoch": 2.0474525899596108,
      "grad_norm": 1.0176085233688354,
      "learning_rate": 5.944004796953144e-06,
      "loss": 0.0128,
      "step": 1251100
    },
    {
      "epoch": 2.047485320398264,
      "grad_norm": 0.28949329257011414,
      "learning_rate": 5.943938904739628e-06,
      "loss": 0.0099,
      "step": 1251120
    },
    {
      "epoch": 2.0475180508369175,
      "grad_norm": 0.19866929948329926,
      "learning_rate": 5.94387301252611e-06,
      "loss": 0.0109,
      "step": 1251140
    },
    {
      "epoch": 2.0475507812755707,
      "grad_norm": 0.801717221736908,
      "learning_rate": 5.9438071203125934e-06,
      "loss": 0.0132,
      "step": 1251160
    },
    {
      "epoch": 2.047583511714224,
      "grad_norm": 1.1147133111953735,
      "learning_rate": 5.943741228099076e-06,
      "loss": 0.0188,
      "step": 1251180
    },
    {
      "epoch": 2.0476162421528774,
      "grad_norm": 0.9124207496643066,
      "learning_rate": 5.943675335885559e-06,
      "loss": 0.0175,
      "step": 1251200
    },
    {
      "epoch": 2.0476489725915306,
      "grad_norm": 0.6367093920707703,
      "learning_rate": 5.943609443672042e-06,
      "loss": 0.0137,
      "step": 1251220
    },
    {
      "epoch": 2.047681703030184,
      "grad_norm": 0.5040225982666016,
      "learning_rate": 5.943543551458525e-06,
      "loss": 0.0262,
      "step": 1251240
    },
    {
      "epoch": 2.0477144334688373,
      "grad_norm": 0.3720220923423767,
      "learning_rate": 5.943477659245007e-06,
      "loss": 0.0117,
      "step": 1251260
    },
    {
      "epoch": 2.047747163907491,
      "grad_norm": 0.6971437931060791,
      "learning_rate": 5.943411767031491e-06,
      "loss": 0.0172,
      "step": 1251280
    },
    {
      "epoch": 2.047779894346144,
      "grad_norm": 0.3781246840953827,
      "learning_rate": 5.943345874817973e-06,
      "loss": 0.015,
      "step": 1251300
    },
    {
      "epoch": 2.047812624784797,
      "grad_norm": 0.43759971857070923,
      "learning_rate": 5.943279982604456e-06,
      "loss": 0.0183,
      "step": 1251320
    },
    {
      "epoch": 2.047845355223451,
      "grad_norm": 0.4636390805244446,
      "learning_rate": 5.943214090390939e-06,
      "loss": 0.013,
      "step": 1251340
    },
    {
      "epoch": 2.047878085662104,
      "grad_norm": 0.6917898058891296,
      "learning_rate": 5.943148198177422e-06,
      "loss": 0.0114,
      "step": 1251360
    },
    {
      "epoch": 2.0479108161007575,
      "grad_norm": 0.86521977186203,
      "learning_rate": 5.943082305963904e-06,
      "loss": 0.0171,
      "step": 1251380
    },
    {
      "epoch": 2.0479435465394107,
      "grad_norm": 0.8267899751663208,
      "learning_rate": 5.943016413750388e-06,
      "loss": 0.0161,
      "step": 1251400
    },
    {
      "epoch": 2.047976276978064,
      "grad_norm": 0.07815535366535187,
      "learning_rate": 5.942950521536871e-06,
      "loss": 0.0239,
      "step": 1251420
    },
    {
      "epoch": 2.0480090074167174,
      "grad_norm": 0.9514543414115906,
      "learning_rate": 5.9428846293233535e-06,
      "loss": 0.0173,
      "step": 1251440
    },
    {
      "epoch": 2.0480417378553706,
      "grad_norm": 0.5591170787811279,
      "learning_rate": 5.942818737109837e-06,
      "loss": 0.0181,
      "step": 1251460
    },
    {
      "epoch": 2.048074468294024,
      "grad_norm": 0.5794780850410461,
      "learning_rate": 5.942752844896319e-06,
      "loss": 0.0126,
      "step": 1251480
    },
    {
      "epoch": 2.0481071987326773,
      "grad_norm": 0.13024069368839264,
      "learning_rate": 5.9426869526828025e-06,
      "loss": 0.0168,
      "step": 1251500
    },
    {
      "epoch": 2.048139929171331,
      "grad_norm": 0.6641352772712708,
      "learning_rate": 5.9426210604692844e-06,
      "loss": 0.0178,
      "step": 1251520
    },
    {
      "epoch": 2.048172659609984,
      "grad_norm": 1.1353404521942139,
      "learning_rate": 5.942555168255768e-06,
      "loss": 0.0179,
      "step": 1251540
    },
    {
      "epoch": 2.048205390048637,
      "grad_norm": 0.7119787931442261,
      "learning_rate": 5.94248927604225e-06,
      "loss": 0.0186,
      "step": 1251560
    },
    {
      "epoch": 2.048238120487291,
      "grad_norm": 0.20476150512695312,
      "learning_rate": 5.9424233838287335e-06,
      "loss": 0.0132,
      "step": 1251580
    },
    {
      "epoch": 2.048270850925944,
      "grad_norm": 0.325113445520401,
      "learning_rate": 5.942357491615216e-06,
      "loss": 0.0113,
      "step": 1251600
    },
    {
      "epoch": 2.0483035813645976,
      "grad_norm": 0.3858295977115631,
      "learning_rate": 5.9422915994017e-06,
      "loss": 0.0134,
      "step": 1251620
    },
    {
      "epoch": 2.0483363118032507,
      "grad_norm": 0.32645776867866516,
      "learning_rate": 5.942225707188182e-06,
      "loss": 0.0182,
      "step": 1251640
    },
    {
      "epoch": 2.0483690422419043,
      "grad_norm": 0.8491397500038147,
      "learning_rate": 5.942159814974665e-06,
      "loss": 0.0185,
      "step": 1251660
    },
    {
      "epoch": 2.0484017726805575,
      "grad_norm": 0.17905496060848236,
      "learning_rate": 5.942093922761147e-06,
      "loss": 0.0193,
      "step": 1251680
    },
    {
      "epoch": 2.0484345031192106,
      "grad_norm": 0.1231912299990654,
      "learning_rate": 5.942028030547631e-06,
      "loss": 0.0111,
      "step": 1251700
    },
    {
      "epoch": 2.048467233557864,
      "grad_norm": 0.18280163407325745,
      "learning_rate": 5.941962138334113e-06,
      "loss": 0.0148,
      "step": 1251720
    },
    {
      "epoch": 2.0484999639965173,
      "grad_norm": 0.28382381796836853,
      "learning_rate": 5.941896246120596e-06,
      "loss": 0.0168,
      "step": 1251740
    },
    {
      "epoch": 2.048532694435171,
      "grad_norm": 0.3048538565635681,
      "learning_rate": 5.941830353907079e-06,
      "loss": 0.0213,
      "step": 1251760
    },
    {
      "epoch": 2.048565424873824,
      "grad_norm": 0.08060872554779053,
      "learning_rate": 5.941764461693562e-06,
      "loss": 0.0162,
      "step": 1251780
    },
    {
      "epoch": 2.0485981553124777,
      "grad_norm": 0.4983912408351898,
      "learning_rate": 5.941698569480045e-06,
      "loss": 0.0157,
      "step": 1251800
    },
    {
      "epoch": 2.048630885751131,
      "grad_norm": 0.2359742522239685,
      "learning_rate": 5.941632677266528e-06,
      "loss": 0.0155,
      "step": 1251820
    },
    {
      "epoch": 2.048663616189784,
      "grad_norm": 0.11440461128950119,
      "learning_rate": 5.941566785053011e-06,
      "loss": 0.0139,
      "step": 1251840
    },
    {
      "epoch": 2.0486963466284376,
      "grad_norm": 0.20746949315071106,
      "learning_rate": 5.9415008928394936e-06,
      "loss": 0.012,
      "step": 1251860
    },
    {
      "epoch": 2.0487290770670907,
      "grad_norm": 0.3273763954639435,
      "learning_rate": 5.941435000625977e-06,
      "loss": 0.0151,
      "step": 1251880
    },
    {
      "epoch": 2.0487618075057443,
      "grad_norm": 0.11893542110919952,
      "learning_rate": 5.941369108412459e-06,
      "loss": 0.0152,
      "step": 1251900
    },
    {
      "epoch": 2.0487945379443975,
      "grad_norm": 0.5418125987052917,
      "learning_rate": 5.941303216198943e-06,
      "loss": 0.0206,
      "step": 1251920
    },
    {
      "epoch": 2.048827268383051,
      "grad_norm": 0.5994380116462708,
      "learning_rate": 5.9412373239854245e-06,
      "loss": 0.0134,
      "step": 1251940
    },
    {
      "epoch": 2.048859998821704,
      "grad_norm": 0.28059279918670654,
      "learning_rate": 5.941171431771908e-06,
      "loss": 0.0163,
      "step": 1251960
    },
    {
      "epoch": 2.0488927292603574,
      "grad_norm": 0.3728993237018585,
      "learning_rate": 5.941105539558391e-06,
      "loss": 0.0183,
      "step": 1251980
    },
    {
      "epoch": 2.048925459699011,
      "grad_norm": 0.9647617340087891,
      "learning_rate": 5.941039647344874e-06,
      "loss": 0.0175,
      "step": 1252000
    },
    {
      "epoch": 2.048958190137664,
      "grad_norm": 0.3065885901451111,
      "learning_rate": 5.940973755131356e-06,
      "loss": 0.0083,
      "step": 1252020
    },
    {
      "epoch": 2.0489909205763177,
      "grad_norm": 0.2756812572479248,
      "learning_rate": 5.94090786291784e-06,
      "loss": 0.0171,
      "step": 1252040
    },
    {
      "epoch": 2.049023651014971,
      "grad_norm": 0.3460175395011902,
      "learning_rate": 5.940841970704322e-06,
      "loss": 0.0137,
      "step": 1252060
    },
    {
      "epoch": 2.0490563814536245,
      "grad_norm": 0.4911571443080902,
      "learning_rate": 5.940776078490805e-06,
      "loss": 0.0215,
      "step": 1252080
    },
    {
      "epoch": 2.0490891118922776,
      "grad_norm": 0.11345088481903076,
      "learning_rate": 5.940710186277287e-06,
      "loss": 0.0189,
      "step": 1252100
    },
    {
      "epoch": 2.0491218423309308,
      "grad_norm": 0.45597174763679504,
      "learning_rate": 5.940644294063771e-06,
      "loss": 0.014,
      "step": 1252120
    },
    {
      "epoch": 2.0491545727695843,
      "grad_norm": 0.4541805386543274,
      "learning_rate": 5.9405784018502545e-06,
      "loss": 0.0178,
      "step": 1252140
    },
    {
      "epoch": 2.0491873032082375,
      "grad_norm": 0.22336503863334656,
      "learning_rate": 5.940512509636736e-06,
      "loss": 0.0098,
      "step": 1252160
    },
    {
      "epoch": 2.049220033646891,
      "grad_norm": 0.14156179130077362,
      "learning_rate": 5.94044661742322e-06,
      "loss": 0.017,
      "step": 1252180
    },
    {
      "epoch": 2.0492527640855442,
      "grad_norm": 0.3996278941631317,
      "learning_rate": 5.940380725209703e-06,
      "loss": 0.0129,
      "step": 1252200
    },
    {
      "epoch": 2.049285494524198,
      "grad_norm": 0.2922651469707489,
      "learning_rate": 5.940314832996185e-06,
      "loss": 0.0177,
      "step": 1252220
    },
    {
      "epoch": 2.049318224962851,
      "grad_norm": 0.47868815064430237,
      "learning_rate": 5.940248940782668e-06,
      "loss": 0.0121,
      "step": 1252240
    },
    {
      "epoch": 2.049350955401504,
      "grad_norm": 0.7987003326416016,
      "learning_rate": 5.940183048569152e-06,
      "loss": 0.0154,
      "step": 1252260
    },
    {
      "epoch": 2.0493836858401577,
      "grad_norm": 0.3549881875514984,
      "learning_rate": 5.940117156355634e-06,
      "loss": 0.0139,
      "step": 1252280
    },
    {
      "epoch": 2.049416416278811,
      "grad_norm": 0.31964823603630066,
      "learning_rate": 5.940051264142117e-06,
      "loss": 0.0108,
      "step": 1252300
    },
    {
      "epoch": 2.0494491467174645,
      "grad_norm": 0.5939409136772156,
      "learning_rate": 5.939985371928599e-06,
      "loss": 0.0159,
      "step": 1252320
    },
    {
      "epoch": 2.0494818771561176,
      "grad_norm": 0.6758173704147339,
      "learning_rate": 5.939919479715083e-06,
      "loss": 0.0159,
      "step": 1252340
    },
    {
      "epoch": 2.0495146075947708,
      "grad_norm": 0.9263565540313721,
      "learning_rate": 5.9398535875015654e-06,
      "loss": 0.0217,
      "step": 1252360
    },
    {
      "epoch": 2.0495473380334244,
      "grad_norm": 1.0982636213302612,
      "learning_rate": 5.939787695288048e-06,
      "loss": 0.0179,
      "step": 1252380
    },
    {
      "epoch": 2.0495800684720775,
      "grad_norm": 0.36078333854675293,
      "learning_rate": 5.939721803074531e-06,
      "loss": 0.0208,
      "step": 1252400
    },
    {
      "epoch": 2.049612798910731,
      "grad_norm": 0.6693476438522339,
      "learning_rate": 5.9396559108610145e-06,
      "loss": 0.0165,
      "step": 1252420
    },
    {
      "epoch": 2.0496455293493843,
      "grad_norm": 0.6386140584945679,
      "learning_rate": 5.939590018647496e-06,
      "loss": 0.0126,
      "step": 1252440
    },
    {
      "epoch": 2.049678259788038,
      "grad_norm": 0.25480636954307556,
      "learning_rate": 5.93952412643398e-06,
      "loss": 0.0129,
      "step": 1252460
    },
    {
      "epoch": 2.049710990226691,
      "grad_norm": 0.37436923384666443,
      "learning_rate": 5.9394582342204636e-06,
      "loss": 0.0153,
      "step": 1252480
    },
    {
      "epoch": 2.049743720665344,
      "grad_norm": 0.2558697760105133,
      "learning_rate": 5.9393923420069455e-06,
      "loss": 0.0148,
      "step": 1252500
    },
    {
      "epoch": 2.0497764511039978,
      "grad_norm": 0.20172357559204102,
      "learning_rate": 5.939326449793429e-06,
      "loss": 0.0101,
      "step": 1252520
    },
    {
      "epoch": 2.049809181542651,
      "grad_norm": 0.3523763418197632,
      "learning_rate": 5.939260557579911e-06,
      "loss": 0.0217,
      "step": 1252540
    },
    {
      "epoch": 2.0498419119813045,
      "grad_norm": 0.42988133430480957,
      "learning_rate": 5.9391946653663945e-06,
      "loss": 0.0156,
      "step": 1252560
    },
    {
      "epoch": 2.0498746424199576,
      "grad_norm": 0.62088942527771,
      "learning_rate": 5.9391287731528764e-06,
      "loss": 0.0136,
      "step": 1252580
    },
    {
      "epoch": 2.0499073728586112,
      "grad_norm": 0.21403062343597412,
      "learning_rate": 5.93906288093936e-06,
      "loss": 0.014,
      "step": 1252600
    },
    {
      "epoch": 2.0499401032972644,
      "grad_norm": 0.21924620866775513,
      "learning_rate": 5.938996988725843e-06,
      "loss": 0.0109,
      "step": 1252620
    },
    {
      "epoch": 2.0499728337359175,
      "grad_norm": 0.28984639048576355,
      "learning_rate": 5.9389310965123255e-06,
      "loss": 0.0169,
      "step": 1252640
    },
    {
      "epoch": 2.050005564174571,
      "grad_norm": 0.45107200741767883,
      "learning_rate": 5.938865204298808e-06,
      "loss": 0.0224,
      "step": 1252660
    },
    {
      "epoch": 2.0500382946132243,
      "grad_norm": 0.5284817814826965,
      "learning_rate": 5.938799312085292e-06,
      "loss": 0.0164,
      "step": 1252680
    },
    {
      "epoch": 2.050071025051878,
      "grad_norm": 0.22201180458068848,
      "learning_rate": 5.938733419871774e-06,
      "loss": 0.0148,
      "step": 1252700
    },
    {
      "epoch": 2.050103755490531,
      "grad_norm": 1.1307035684585571,
      "learning_rate": 5.938667527658257e-06,
      "loss": 0.0151,
      "step": 1252720
    },
    {
      "epoch": 2.0501364859291846,
      "grad_norm": 0.3136841654777527,
      "learning_rate": 5.938601635444739e-06,
      "loss": 0.0161,
      "step": 1252740
    },
    {
      "epoch": 2.0501692163678378,
      "grad_norm": 0.42342838644981384,
      "learning_rate": 5.938535743231223e-06,
      "loss": 0.0122,
      "step": 1252760
    },
    {
      "epoch": 2.050201946806491,
      "grad_norm": 0.4496663808822632,
      "learning_rate": 5.9384698510177055e-06,
      "loss": 0.0231,
      "step": 1252780
    },
    {
      "epoch": 2.0502346772451445,
      "grad_norm": 0.4079674482345581,
      "learning_rate": 5.938403958804188e-06,
      "loss": 0.0138,
      "step": 1252800
    },
    {
      "epoch": 2.0502674076837977,
      "grad_norm": 0.27585285902023315,
      "learning_rate": 5.938338066590671e-06,
      "loss": 0.0163,
      "step": 1252820
    },
    {
      "epoch": 2.0503001381224513,
      "grad_norm": 0.10784254968166351,
      "learning_rate": 5.938272174377155e-06,
      "loss": 0.014,
      "step": 1252840
    },
    {
      "epoch": 2.0503328685611044,
      "grad_norm": 0.47495344281196594,
      "learning_rate": 5.938206282163637e-06,
      "loss": 0.0159,
      "step": 1252860
    },
    {
      "epoch": 2.050365598999758,
      "grad_norm": 0.13802461326122284,
      "learning_rate": 5.93814038995012e-06,
      "loss": 0.0174,
      "step": 1252880
    },
    {
      "epoch": 2.050398329438411,
      "grad_norm": 0.28785568475723267,
      "learning_rate": 5.938074497736604e-06,
      "loss": 0.012,
      "step": 1252900
    },
    {
      "epoch": 2.0504310598770643,
      "grad_norm": 0.441623330116272,
      "learning_rate": 5.9380086055230855e-06,
      "loss": 0.0223,
      "step": 1252920
    },
    {
      "epoch": 2.050463790315718,
      "grad_norm": 0.5276170969009399,
      "learning_rate": 5.937942713309569e-06,
      "loss": 0.0118,
      "step": 1252940
    },
    {
      "epoch": 2.050496520754371,
      "grad_norm": 0.12724460661411285,
      "learning_rate": 5.937876821096051e-06,
      "loss": 0.0177,
      "step": 1252960
    },
    {
      "epoch": 2.0505292511930246,
      "grad_norm": 0.22625359892845154,
      "learning_rate": 5.937810928882535e-06,
      "loss": 0.0182,
      "step": 1252980
    },
    {
      "epoch": 2.050561981631678,
      "grad_norm": 0.2132522314786911,
      "learning_rate": 5.937745036669017e-06,
      "loss": 0.0138,
      "step": 1253000
    },
    {
      "epoch": 2.050594712070331,
      "grad_norm": 0.3747483789920807,
      "learning_rate": 5.9376791444555e-06,
      "loss": 0.0173,
      "step": 1253020
    },
    {
      "epoch": 2.0506274425089845,
      "grad_norm": 0.4993869960308075,
      "learning_rate": 5.937613252241983e-06,
      "loss": 0.009,
      "step": 1253040
    },
    {
      "epoch": 2.0506601729476377,
      "grad_norm": 0.1340111643075943,
      "learning_rate": 5.937547360028466e-06,
      "loss": 0.0134,
      "step": 1253060
    },
    {
      "epoch": 2.0506929033862913,
      "grad_norm": 0.15742774307727814,
      "learning_rate": 5.937481467814948e-06,
      "loss": 0.0109,
      "step": 1253080
    },
    {
      "epoch": 2.0507256338249444,
      "grad_norm": 0.3087681233882904,
      "learning_rate": 5.937415575601432e-06,
      "loss": 0.0184,
      "step": 1253100
    },
    {
      "epoch": 2.050758364263598,
      "grad_norm": 0.6402403116226196,
      "learning_rate": 5.937349683387914e-06,
      "loss": 0.0111,
      "step": 1253120
    },
    {
      "epoch": 2.050791094702251,
      "grad_norm": 0.9965559244155884,
      "learning_rate": 5.937283791174397e-06,
      "loss": 0.0247,
      "step": 1253140
    },
    {
      "epoch": 2.0508238251409043,
      "grad_norm": 0.08825018256902695,
      "learning_rate": 5.93721789896088e-06,
      "loss": 0.0201,
      "step": 1253160
    },
    {
      "epoch": 2.050856555579558,
      "grad_norm": 0.151127427816391,
      "learning_rate": 5.937152006747363e-06,
      "loss": 0.0156,
      "step": 1253180
    },
    {
      "epoch": 2.050889286018211,
      "grad_norm": 0.3580835461616516,
      "learning_rate": 5.9370861145338464e-06,
      "loss": 0.022,
      "step": 1253200
    },
    {
      "epoch": 2.0509220164568647,
      "grad_norm": 0.23024262487888336,
      "learning_rate": 5.937020222320329e-06,
      "loss": 0.0118,
      "step": 1253220
    },
    {
      "epoch": 2.050954746895518,
      "grad_norm": 0.43387773633003235,
      "learning_rate": 5.936954330106812e-06,
      "loss": 0.0121,
      "step": 1253240
    },
    {
      "epoch": 2.0509874773341714,
      "grad_norm": 0.3410053253173828,
      "learning_rate": 5.936888437893295e-06,
      "loss": 0.0194,
      "step": 1253260
    },
    {
      "epoch": 2.0510202077728246,
      "grad_norm": 0.458735853433609,
      "learning_rate": 5.936822545679778e-06,
      "loss": 0.0176,
      "step": 1253280
    },
    {
      "epoch": 2.0510529382114777,
      "grad_norm": 0.29065781831741333,
      "learning_rate": 5.93675665346626e-06,
      "loss": 0.0117,
      "step": 1253300
    },
    {
      "epoch": 2.0510856686501313,
      "grad_norm": 0.7829903960227966,
      "learning_rate": 5.936690761252744e-06,
      "loss": 0.0149,
      "step": 1253320
    },
    {
      "epoch": 2.0511183990887845,
      "grad_norm": 0.8374248147010803,
      "learning_rate": 5.936624869039226e-06,
      "loss": 0.0137,
      "step": 1253340
    },
    {
      "epoch": 2.051151129527438,
      "grad_norm": 0.36222243309020996,
      "learning_rate": 5.936558976825709e-06,
      "loss": 0.0125,
      "step": 1253360
    },
    {
      "epoch": 2.051183859966091,
      "grad_norm": 0.5566585659980774,
      "learning_rate": 5.936493084612192e-06,
      "loss": 0.0144,
      "step": 1253380
    },
    {
      "epoch": 2.051216590404745,
      "grad_norm": 3.6791279315948486,
      "learning_rate": 5.936427192398675e-06,
      "loss": 0.0213,
      "step": 1253400
    },
    {
      "epoch": 2.051249320843398,
      "grad_norm": 0.22667987644672394,
      "learning_rate": 5.936361300185157e-06,
      "loss": 0.0139,
      "step": 1253420
    },
    {
      "epoch": 2.051282051282051,
      "grad_norm": 0.5470727682113647,
      "learning_rate": 5.936295407971641e-06,
      "loss": 0.0139,
      "step": 1253440
    },
    {
      "epoch": 2.0513147817207047,
      "grad_norm": 0.8487567901611328,
      "learning_rate": 5.936229515758123e-06,
      "loss": 0.0127,
      "step": 1253460
    },
    {
      "epoch": 2.051347512159358,
      "grad_norm": 0.3770935833454132,
      "learning_rate": 5.9361636235446065e-06,
      "loss": 0.0148,
      "step": 1253480
    },
    {
      "epoch": 2.0513802425980114,
      "grad_norm": 0.7974528670310974,
      "learning_rate": 5.936097731331088e-06,
      "loss": 0.0126,
      "step": 1253500
    },
    {
      "epoch": 2.0514129730366646,
      "grad_norm": 0.4909573197364807,
      "learning_rate": 5.936031839117572e-06,
      "loss": 0.0123,
      "step": 1253520
    },
    {
      "epoch": 2.051445703475318,
      "grad_norm": 0.3094121813774109,
      "learning_rate": 5.9359659469040556e-06,
      "loss": 0.0185,
      "step": 1253540
    },
    {
      "epoch": 2.0514784339139713,
      "grad_norm": 0.41767099499702454,
      "learning_rate": 5.9359000546905374e-06,
      "loss": 0.0212,
      "step": 1253560
    },
    {
      "epoch": 2.0515111643526245,
      "grad_norm": 0.28699350357055664,
      "learning_rate": 5.935834162477021e-06,
      "loss": 0.0156,
      "step": 1253580
    },
    {
      "epoch": 2.051543894791278,
      "grad_norm": 0.472708523273468,
      "learning_rate": 5.935768270263503e-06,
      "loss": 0.0147,
      "step": 1253600
    },
    {
      "epoch": 2.0515766252299312,
      "grad_norm": 0.16639918088912964,
      "learning_rate": 5.9357023780499865e-06,
      "loss": 0.0143,
      "step": 1253620
    },
    {
      "epoch": 2.051609355668585,
      "grad_norm": 0.09267012029886246,
      "learning_rate": 5.935636485836469e-06,
      "loss": 0.0197,
      "step": 1253640
    },
    {
      "epoch": 2.051642086107238,
      "grad_norm": 1.024372935295105,
      "learning_rate": 5.935570593622952e-06,
      "loss": 0.0199,
      "step": 1253660
    },
    {
      "epoch": 2.0516748165458916,
      "grad_norm": 0.24302726984024048,
      "learning_rate": 5.935504701409435e-06,
      "loss": 0.0164,
      "step": 1253680
    },
    {
      "epoch": 2.0517075469845447,
      "grad_norm": 0.3965175449848175,
      "learning_rate": 5.935438809195918e-06,
      "loss": 0.0117,
      "step": 1253700
    },
    {
      "epoch": 2.051740277423198,
      "grad_norm": 0.2525547742843628,
      "learning_rate": 5.9353729169824e-06,
      "loss": 0.0202,
      "step": 1253720
    },
    {
      "epoch": 2.0517730078618515,
      "grad_norm": 0.40920814871788025,
      "learning_rate": 5.935307024768884e-06,
      "loss": 0.0148,
      "step": 1253740
    },
    {
      "epoch": 2.0518057383005046,
      "grad_norm": 0.7913705110549927,
      "learning_rate": 5.935241132555366e-06,
      "loss": 0.0143,
      "step": 1253760
    },
    {
      "epoch": 2.051838468739158,
      "grad_norm": 0.3240625560283661,
      "learning_rate": 5.935175240341849e-06,
      "loss": 0.0151,
      "step": 1253780
    },
    {
      "epoch": 2.0518711991778114,
      "grad_norm": 0.4036976993083954,
      "learning_rate": 5.935109348128332e-06,
      "loss": 0.0156,
      "step": 1253800
    },
    {
      "epoch": 2.0519039296164645,
      "grad_norm": 0.40823227167129517,
      "learning_rate": 5.935043455914815e-06,
      "loss": 0.0161,
      "step": 1253820
    },
    {
      "epoch": 2.051936660055118,
      "grad_norm": 0.28546395897865295,
      "learning_rate": 5.9349775637012975e-06,
      "loss": 0.0153,
      "step": 1253840
    },
    {
      "epoch": 2.0519693904937713,
      "grad_norm": 1.038973331451416,
      "learning_rate": 5.934911671487781e-06,
      "loss": 0.0164,
      "step": 1253860
    },
    {
      "epoch": 2.052002120932425,
      "grad_norm": 0.124630406498909,
      "learning_rate": 5.934845779274264e-06,
      "loss": 0.0145,
      "step": 1253880
    },
    {
      "epoch": 2.052034851371078,
      "grad_norm": 0.056631769984960556,
      "learning_rate": 5.9347798870607466e-06,
      "loss": 0.0152,
      "step": 1253900
    },
    {
      "epoch": 2.0520675818097316,
      "grad_norm": 0.6600392460823059,
      "learning_rate": 5.93471399484723e-06,
      "loss": 0.016,
      "step": 1253920
    },
    {
      "epoch": 2.0521003122483847,
      "grad_norm": 0.4501936733722687,
      "learning_rate": 5.934648102633712e-06,
      "loss": 0.0143,
      "step": 1253940
    },
    {
      "epoch": 2.052133042687038,
      "grad_norm": 0.5059641003608704,
      "learning_rate": 5.934582210420196e-06,
      "loss": 0.0112,
      "step": 1253960
    },
    {
      "epoch": 2.0521657731256915,
      "grad_norm": 0.2876676023006439,
      "learning_rate": 5.9345163182066775e-06,
      "loss": 0.0196,
      "step": 1253980
    },
    {
      "epoch": 2.0521985035643446,
      "grad_norm": 0.6909398436546326,
      "learning_rate": 5.934450425993161e-06,
      "loss": 0.0117,
      "step": 1254000
    },
    {
      "epoch": 2.0522312340029982,
      "grad_norm": 0.4431419372558594,
      "learning_rate": 5.934384533779644e-06,
      "loss": 0.0223,
      "step": 1254020
    },
    {
      "epoch": 2.0522639644416514,
      "grad_norm": 0.16200411319732666,
      "learning_rate": 5.934318641566127e-06,
      "loss": 0.0156,
      "step": 1254040
    },
    {
      "epoch": 2.052296694880305,
      "grad_norm": 0.38817474246025085,
      "learning_rate": 5.934252749352609e-06,
      "loss": 0.0146,
      "step": 1254060
    },
    {
      "epoch": 2.052329425318958,
      "grad_norm": 0.6406174302101135,
      "learning_rate": 5.934186857139093e-06,
      "loss": 0.0123,
      "step": 1254080
    },
    {
      "epoch": 2.0523621557576113,
      "grad_norm": 0.22225728631019592,
      "learning_rate": 5.934120964925575e-06,
      "loss": 0.0171,
      "step": 1254100
    },
    {
      "epoch": 2.052394886196265,
      "grad_norm": 0.065528005361557,
      "learning_rate": 5.934055072712058e-06,
      "loss": 0.0199,
      "step": 1254120
    },
    {
      "epoch": 2.052427616634918,
      "grad_norm": 0.392702579498291,
      "learning_rate": 5.93398918049854e-06,
      "loss": 0.0196,
      "step": 1254140
    },
    {
      "epoch": 2.0524603470735716,
      "grad_norm": 0.170392706990242,
      "learning_rate": 5.933923288285024e-06,
      "loss": 0.0158,
      "step": 1254160
    },
    {
      "epoch": 2.0524930775122248,
      "grad_norm": 0.7618127465248108,
      "learning_rate": 5.933857396071507e-06,
      "loss": 0.0194,
      "step": 1254180
    },
    {
      "epoch": 2.0525258079508784,
      "grad_norm": 0.5374180674552917,
      "learning_rate": 5.933791503857989e-06,
      "loss": 0.0189,
      "step": 1254200
    },
    {
      "epoch": 2.0525585383895315,
      "grad_norm": 0.09475719928741455,
      "learning_rate": 5.933725611644472e-06,
      "loss": 0.0195,
      "step": 1254220
    },
    {
      "epoch": 2.0525912688281847,
      "grad_norm": 0.16992761194705963,
      "learning_rate": 5.933659719430956e-06,
      "loss": 0.0124,
      "step": 1254240
    },
    {
      "epoch": 2.0526239992668383,
      "grad_norm": 0.3438144326210022,
      "learning_rate": 5.933593827217438e-06,
      "loss": 0.0128,
      "step": 1254260
    },
    {
      "epoch": 2.0526567297054914,
      "grad_norm": 0.25624167919158936,
      "learning_rate": 5.933527935003921e-06,
      "loss": 0.0233,
      "step": 1254280
    },
    {
      "epoch": 2.052689460144145,
      "grad_norm": 0.31033065915107727,
      "learning_rate": 5.933462042790405e-06,
      "loss": 0.0157,
      "step": 1254300
    },
    {
      "epoch": 2.052722190582798,
      "grad_norm": 0.2914467751979828,
      "learning_rate": 5.933396150576887e-06,
      "loss": 0.015,
      "step": 1254320
    },
    {
      "epoch": 2.0527549210214517,
      "grad_norm": 0.4955756366252899,
      "learning_rate": 5.93333025836337e-06,
      "loss": 0.0193,
      "step": 1254340
    },
    {
      "epoch": 2.052787651460105,
      "grad_norm": 0.7232423424720764,
      "learning_rate": 5.933264366149852e-06,
      "loss": 0.0155,
      "step": 1254360
    },
    {
      "epoch": 2.052820381898758,
      "grad_norm": 0.4755859971046448,
      "learning_rate": 5.933198473936336e-06,
      "loss": 0.0176,
      "step": 1254380
    },
    {
      "epoch": 2.0528531123374116,
      "grad_norm": 0.48646676540374756,
      "learning_rate": 5.9331325817228184e-06,
      "loss": 0.0106,
      "step": 1254400
    },
    {
      "epoch": 2.052885842776065,
      "grad_norm": 0.31080424785614014,
      "learning_rate": 5.933066689509301e-06,
      "loss": 0.0188,
      "step": 1254420
    },
    {
      "epoch": 2.0529185732147184,
      "grad_norm": 0.25776994228363037,
      "learning_rate": 5.933000797295784e-06,
      "loss": 0.0106,
      "step": 1254440
    },
    {
      "epoch": 2.0529513036533715,
      "grad_norm": 0.5856603384017944,
      "learning_rate": 5.9329349050822675e-06,
      "loss": 0.0222,
      "step": 1254460
    },
    {
      "epoch": 2.0529840340920247,
      "grad_norm": 1.00700843334198,
      "learning_rate": 5.932869012868749e-06,
      "loss": 0.0259,
      "step": 1254480
    },
    {
      "epoch": 2.0530167645306783,
      "grad_norm": 0.5047051906585693,
      "learning_rate": 5.932803120655233e-06,
      "loss": 0.0126,
      "step": 1254500
    },
    {
      "epoch": 2.0530494949693314,
      "grad_norm": 0.37281638383865356,
      "learning_rate": 5.932737228441715e-06,
      "loss": 0.0144,
      "step": 1254520
    },
    {
      "epoch": 2.053082225407985,
      "grad_norm": 0.24720753729343414,
      "learning_rate": 5.9326713362281985e-06,
      "loss": 0.0094,
      "step": 1254540
    },
    {
      "epoch": 2.053114955846638,
      "grad_norm": 0.8242179155349731,
      "learning_rate": 5.93260544401468e-06,
      "loss": 0.0144,
      "step": 1254560
    },
    {
      "epoch": 2.0531476862852918,
      "grad_norm": 0.22035422921180725,
      "learning_rate": 5.932539551801164e-06,
      "loss": 0.0127,
      "step": 1254580
    },
    {
      "epoch": 2.053180416723945,
      "grad_norm": 1.6652488708496094,
      "learning_rate": 5.9324736595876475e-06,
      "loss": 0.0202,
      "step": 1254600
    },
    {
      "epoch": 2.053213147162598,
      "grad_norm": 0.8207335472106934,
      "learning_rate": 5.9324077673741294e-06,
      "loss": 0.0196,
      "step": 1254620
    },
    {
      "epoch": 2.0532458776012517,
      "grad_norm": 0.10292167216539383,
      "learning_rate": 5.932341875160613e-06,
      "loss": 0.0203,
      "step": 1254640
    },
    {
      "epoch": 2.053278608039905,
      "grad_norm": 0.20528237521648407,
      "learning_rate": 5.932275982947096e-06,
      "loss": 0.0161,
      "step": 1254660
    },
    {
      "epoch": 2.0533113384785584,
      "grad_norm": 0.44055530428886414,
      "learning_rate": 5.9322100907335785e-06,
      "loss": 0.0171,
      "step": 1254680
    },
    {
      "epoch": 2.0533440689172116,
      "grad_norm": 0.5420222282409668,
      "learning_rate": 5.932144198520061e-06,
      "loss": 0.0185,
      "step": 1254700
    },
    {
      "epoch": 2.053376799355865,
      "grad_norm": 0.5977020263671875,
      "learning_rate": 5.932078306306545e-06,
      "loss": 0.01,
      "step": 1254720
    },
    {
      "epoch": 2.0534095297945183,
      "grad_norm": 0.8572003841400146,
      "learning_rate": 5.932012414093027e-06,
      "loss": 0.0151,
      "step": 1254740
    },
    {
      "epoch": 2.0534422602331714,
      "grad_norm": 0.30029526352882385,
      "learning_rate": 5.93194652187951e-06,
      "loss": 0.0155,
      "step": 1254760
    },
    {
      "epoch": 2.053474990671825,
      "grad_norm": 0.7179191708564758,
      "learning_rate": 5.931880629665992e-06,
      "loss": 0.0127,
      "step": 1254780
    },
    {
      "epoch": 2.053507721110478,
      "grad_norm": 0.39278385043144226,
      "learning_rate": 5.931814737452476e-06,
      "loss": 0.0141,
      "step": 1254800
    },
    {
      "epoch": 2.053540451549132,
      "grad_norm": 0.4920722246170044,
      "learning_rate": 5.9317488452389585e-06,
      "loss": 0.016,
      "step": 1254820
    },
    {
      "epoch": 2.053573181987785,
      "grad_norm": 0.9129524827003479,
      "learning_rate": 5.931682953025441e-06,
      "loss": 0.0103,
      "step": 1254840
    },
    {
      "epoch": 2.0536059124264385,
      "grad_norm": 1.168825387954712,
      "learning_rate": 5.931617060811924e-06,
      "loss": 0.0151,
      "step": 1254860
    },
    {
      "epoch": 2.0536386428650917,
      "grad_norm": 0.29172393679618835,
      "learning_rate": 5.931551168598408e-06,
      "loss": 0.0201,
      "step": 1254880
    },
    {
      "epoch": 2.053671373303745,
      "grad_norm": 0.5065069198608398,
      "learning_rate": 5.9314852763848895e-06,
      "loss": 0.014,
      "step": 1254900
    },
    {
      "epoch": 2.0537041037423984,
      "grad_norm": 0.8305818438529968,
      "learning_rate": 5.931419384171373e-06,
      "loss": 0.0236,
      "step": 1254920
    },
    {
      "epoch": 2.0537368341810516,
      "grad_norm": 1.6357147693634033,
      "learning_rate": 5.931353491957857e-06,
      "loss": 0.0127,
      "step": 1254940
    },
    {
      "epoch": 2.053769564619705,
      "grad_norm": 0.5121824741363525,
      "learning_rate": 5.9312875997443385e-06,
      "loss": 0.0139,
      "step": 1254960
    },
    {
      "epoch": 2.0538022950583583,
      "grad_norm": 0.6365246772766113,
      "learning_rate": 5.931221707530822e-06,
      "loss": 0.0202,
      "step": 1254980
    },
    {
      "epoch": 2.053835025497012,
      "grad_norm": 0.5517683029174805,
      "learning_rate": 5.931155815317304e-06,
      "loss": 0.0147,
      "step": 1255000
    },
    {
      "epoch": 2.053867755935665,
      "grad_norm": 0.09573537111282349,
      "learning_rate": 5.931089923103788e-06,
      "loss": 0.0136,
      "step": 1255020
    },
    {
      "epoch": 2.053900486374318,
      "grad_norm": 0.21404142677783966,
      "learning_rate": 5.93102403089027e-06,
      "loss": 0.0132,
      "step": 1255040
    },
    {
      "epoch": 2.053933216812972,
      "grad_norm": 0.39235720038414,
      "learning_rate": 5.930958138676753e-06,
      "loss": 0.0169,
      "step": 1255060
    },
    {
      "epoch": 2.053965947251625,
      "grad_norm": 0.6375845074653625,
      "learning_rate": 5.930892246463236e-06,
      "loss": 0.0119,
      "step": 1255080
    },
    {
      "epoch": 2.0539986776902786,
      "grad_norm": 0.5358296632766724,
      "learning_rate": 5.930826354249719e-06,
      "loss": 0.0145,
      "step": 1255100
    },
    {
      "epoch": 2.0540314081289317,
      "grad_norm": 0.24527545273303986,
      "learning_rate": 5.930760462036201e-06,
      "loss": 0.0141,
      "step": 1255120
    },
    {
      "epoch": 2.0540641385675853,
      "grad_norm": 0.9848695993423462,
      "learning_rate": 5.930694569822685e-06,
      "loss": 0.0215,
      "step": 1255140
    },
    {
      "epoch": 2.0540968690062384,
      "grad_norm": 0.16175681352615356,
      "learning_rate": 5.930628677609167e-06,
      "loss": 0.0133,
      "step": 1255160
    },
    {
      "epoch": 2.0541295994448916,
      "grad_norm": 0.36983945965766907,
      "learning_rate": 5.93056278539565e-06,
      "loss": 0.0139,
      "step": 1255180
    },
    {
      "epoch": 2.054162329883545,
      "grad_norm": 0.17871025204658508,
      "learning_rate": 5.930496893182133e-06,
      "loss": 0.0092,
      "step": 1255200
    },
    {
      "epoch": 2.0541950603221983,
      "grad_norm": 0.7630621194839478,
      "learning_rate": 5.930431000968616e-06,
      "loss": 0.019,
      "step": 1255220
    },
    {
      "epoch": 2.054227790760852,
      "grad_norm": 0.18814589083194733,
      "learning_rate": 5.930365108755099e-06,
      "loss": 0.0172,
      "step": 1255240
    },
    {
      "epoch": 2.054260521199505,
      "grad_norm": 0.9092550873756409,
      "learning_rate": 5.930299216541582e-06,
      "loss": 0.0216,
      "step": 1255260
    },
    {
      "epoch": 2.0542932516381587,
      "grad_norm": 0.5267077684402466,
      "learning_rate": 5.930233324328064e-06,
      "loss": 0.0114,
      "step": 1255280
    },
    {
      "epoch": 2.054325982076812,
      "grad_norm": 1.1670091152191162,
      "learning_rate": 5.930167432114548e-06,
      "loss": 0.0114,
      "step": 1255300
    },
    {
      "epoch": 2.054358712515465,
      "grad_norm": 0.1632380187511444,
      "learning_rate": 5.930101539901031e-06,
      "loss": 0.014,
      "step": 1255320
    },
    {
      "epoch": 2.0543914429541186,
      "grad_norm": 0.2312956154346466,
      "learning_rate": 5.930035647687513e-06,
      "loss": 0.0138,
      "step": 1255340
    },
    {
      "epoch": 2.0544241733927717,
      "grad_norm": 0.14157593250274658,
      "learning_rate": 5.929969755473997e-06,
      "loss": 0.021,
      "step": 1255360
    },
    {
      "epoch": 2.0544569038314253,
      "grad_norm": 0.44654718041419983,
      "learning_rate": 5.929903863260479e-06,
      "loss": 0.0164,
      "step": 1255380
    },
    {
      "epoch": 2.0544896342700785,
      "grad_norm": 0.34486740827560425,
      "learning_rate": 5.929837971046962e-06,
      "loss": 0.0181,
      "step": 1255400
    },
    {
      "epoch": 2.0545223647087316,
      "grad_norm": 1.2851288318634033,
      "learning_rate": 5.929772078833444e-06,
      "loss": 0.0135,
      "step": 1255420
    },
    {
      "epoch": 2.054555095147385,
      "grad_norm": 0.23158088326454163,
      "learning_rate": 5.929706186619928e-06,
      "loss": 0.0247,
      "step": 1255440
    },
    {
      "epoch": 2.0545878255860384,
      "grad_norm": 0.25674349069595337,
      "learning_rate": 5.9296402944064104e-06,
      "loss": 0.0181,
      "step": 1255460
    },
    {
      "epoch": 2.054620556024692,
      "grad_norm": 0.25342318415641785,
      "learning_rate": 5.929574402192894e-06,
      "loss": 0.0149,
      "step": 1255480
    },
    {
      "epoch": 2.054653286463345,
      "grad_norm": 0.5579944849014282,
      "learning_rate": 5.929508509979376e-06,
      "loss": 0.018,
      "step": 1255500
    },
    {
      "epoch": 2.0546860169019987,
      "grad_norm": 0.1357971429824829,
      "learning_rate": 5.9294426177658595e-06,
      "loss": 0.0116,
      "step": 1255520
    },
    {
      "epoch": 2.054718747340652,
      "grad_norm": 0.2777235209941864,
      "learning_rate": 5.929376725552341e-06,
      "loss": 0.0182,
      "step": 1255540
    },
    {
      "epoch": 2.054751477779305,
      "grad_norm": 0.27314406633377075,
      "learning_rate": 5.929310833338825e-06,
      "loss": 0.0123,
      "step": 1255560
    },
    {
      "epoch": 2.0547842082179586,
      "grad_norm": 0.12331034988164902,
      "learning_rate": 5.929244941125307e-06,
      "loss": 0.011,
      "step": 1255580
    },
    {
      "epoch": 2.0548169386566117,
      "grad_norm": 0.22563984990119934,
      "learning_rate": 5.9291790489117905e-06,
      "loss": 0.0201,
      "step": 1255600
    },
    {
      "epoch": 2.0548496690952653,
      "grad_norm": 0.08638324588537216,
      "learning_rate": 5.929113156698273e-06,
      "loss": 0.0106,
      "step": 1255620
    },
    {
      "epoch": 2.0548823995339185,
      "grad_norm": 0.058209873735904694,
      "learning_rate": 5.929047264484756e-06,
      "loss": 0.0139,
      "step": 1255640
    },
    {
      "epoch": 2.054915129972572,
      "grad_norm": 0.24200323224067688,
      "learning_rate": 5.9289813722712395e-06,
      "loss": 0.0131,
      "step": 1255660
    },
    {
      "epoch": 2.0549478604112252,
      "grad_norm": 1.8949609994888306,
      "learning_rate": 5.928915480057722e-06,
      "loss": 0.0193,
      "step": 1255680
    },
    {
      "epoch": 2.0549805908498784,
      "grad_norm": 0.5821830034255981,
      "learning_rate": 5.928849587844205e-06,
      "loss": 0.0141,
      "step": 1255700
    },
    {
      "epoch": 2.055013321288532,
      "grad_norm": 0.23200802505016327,
      "learning_rate": 5.928783695630688e-06,
      "loss": 0.0155,
      "step": 1255720
    },
    {
      "epoch": 2.055046051727185,
      "grad_norm": 0.11498697102069855,
      "learning_rate": 5.928717803417171e-06,
      "loss": 0.0148,
      "step": 1255740
    },
    {
      "epoch": 2.0550787821658387,
      "grad_norm": 0.39424964785575867,
      "learning_rate": 5.928651911203653e-06,
      "loss": 0.014,
      "step": 1255760
    },
    {
      "epoch": 2.055111512604492,
      "grad_norm": 0.2777469754219055,
      "learning_rate": 5.928586018990137e-06,
      "loss": 0.0218,
      "step": 1255780
    },
    {
      "epoch": 2.0551442430431455,
      "grad_norm": 0.13352102041244507,
      "learning_rate": 5.928520126776619e-06,
      "loss": 0.0111,
      "step": 1255800
    },
    {
      "epoch": 2.0551769734817986,
      "grad_norm": 0.7267667651176453,
      "learning_rate": 5.928454234563102e-06,
      "loss": 0.013,
      "step": 1255820
    },
    {
      "epoch": 2.0552097039204518,
      "grad_norm": 0.3784678280353546,
      "learning_rate": 5.928388342349585e-06,
      "loss": 0.0168,
      "step": 1255840
    },
    {
      "epoch": 2.0552424343591054,
      "grad_norm": 0.2678450345993042,
      "learning_rate": 5.928322450136068e-06,
      "loss": 0.0158,
      "step": 1255860
    },
    {
      "epoch": 2.0552751647977585,
      "grad_norm": 0.31505289673805237,
      "learning_rate": 5.9282565579225505e-06,
      "loss": 0.0158,
      "step": 1255880
    },
    {
      "epoch": 2.055307895236412,
      "grad_norm": 0.40140801668167114,
      "learning_rate": 5.928190665709034e-06,
      "loss": 0.0207,
      "step": 1255900
    },
    {
      "epoch": 2.0553406256750653,
      "grad_norm": 0.5164756774902344,
      "learning_rate": 5.928124773495516e-06,
      "loss": 0.0152,
      "step": 1255920
    },
    {
      "epoch": 2.055373356113719,
      "grad_norm": 0.5431020259857178,
      "learning_rate": 5.9280588812819996e-06,
      "loss": 0.0156,
      "step": 1255940
    },
    {
      "epoch": 2.055406086552372,
      "grad_norm": 0.32112324237823486,
      "learning_rate": 5.9279929890684815e-06,
      "loss": 0.0131,
      "step": 1255960
    },
    {
      "epoch": 2.055438816991025,
      "grad_norm": 0.09193364530801773,
      "learning_rate": 5.927927096854965e-06,
      "loss": 0.0132,
      "step": 1255980
    },
    {
      "epoch": 2.0554715474296787,
      "grad_norm": 0.3218279778957367,
      "learning_rate": 5.927861204641449e-06,
      "loss": 0.0102,
      "step": 1256000
    },
    {
      "epoch": 2.055504277868332,
      "grad_norm": 0.3805563747882843,
      "learning_rate": 5.9277953124279305e-06,
      "loss": 0.0138,
      "step": 1256020
    },
    {
      "epoch": 2.0555370083069855,
      "grad_norm": 0.4958518445491791,
      "learning_rate": 5.927729420214414e-06,
      "loss": 0.0159,
      "step": 1256040
    },
    {
      "epoch": 2.0555697387456386,
      "grad_norm": 0.2515522241592407,
      "learning_rate": 5.927663528000897e-06,
      "loss": 0.0132,
      "step": 1256060
    },
    {
      "epoch": 2.055602469184292,
      "grad_norm": 0.40809422731399536,
      "learning_rate": 5.92759763578738e-06,
      "loss": 0.016,
      "step": 1256080
    },
    {
      "epoch": 2.0556351996229454,
      "grad_norm": 0.07956118136644363,
      "learning_rate": 5.927531743573862e-06,
      "loss": 0.0177,
      "step": 1256100
    },
    {
      "epoch": 2.0556679300615985,
      "grad_norm": 0.7354145050048828,
      "learning_rate": 5.927465851360346e-06,
      "loss": 0.0167,
      "step": 1256120
    },
    {
      "epoch": 2.055700660500252,
      "grad_norm": 0.6713957190513611,
      "learning_rate": 5.927399959146828e-06,
      "loss": 0.0165,
      "step": 1256140
    },
    {
      "epoch": 2.0557333909389053,
      "grad_norm": 0.30500927567481995,
      "learning_rate": 5.927334066933311e-06,
      "loss": 0.0159,
      "step": 1256160
    },
    {
      "epoch": 2.055766121377559,
      "grad_norm": 1.0542936325073242,
      "learning_rate": 5.927268174719793e-06,
      "loss": 0.0147,
      "step": 1256180
    },
    {
      "epoch": 2.055798851816212,
      "grad_norm": 0.6797611117362976,
      "learning_rate": 5.927202282506277e-06,
      "loss": 0.0123,
      "step": 1256200
    },
    {
      "epoch": 2.055831582254865,
      "grad_norm": 1.365211844444275,
      "learning_rate": 5.92713639029276e-06,
      "loss": 0.0133,
      "step": 1256220
    },
    {
      "epoch": 2.0558643126935188,
      "grad_norm": 1.2031793594360352,
      "learning_rate": 5.927070498079242e-06,
      "loss": 0.0167,
      "step": 1256240
    },
    {
      "epoch": 2.055897043132172,
      "grad_norm": 0.24292945861816406,
      "learning_rate": 5.927004605865725e-06,
      "loss": 0.0144,
      "step": 1256260
    },
    {
      "epoch": 2.0559297735708255,
      "grad_norm": 0.2030797302722931,
      "learning_rate": 5.926938713652209e-06,
      "loss": 0.0198,
      "step": 1256280
    },
    {
      "epoch": 2.0559625040094787,
      "grad_norm": 0.5319060683250427,
      "learning_rate": 5.926872821438691e-06,
      "loss": 0.0131,
      "step": 1256300
    },
    {
      "epoch": 2.0559952344481323,
      "grad_norm": 0.3332524597644806,
      "learning_rate": 5.926806929225174e-06,
      "loss": 0.0112,
      "step": 1256320
    },
    {
      "epoch": 2.0560279648867854,
      "grad_norm": 0.5132928490638733,
      "learning_rate": 5.926741037011658e-06,
      "loss": 0.0147,
      "step": 1256340
    },
    {
      "epoch": 2.0560606953254386,
      "grad_norm": 0.21318209171295166,
      "learning_rate": 5.92667514479814e-06,
      "loss": 0.0222,
      "step": 1256360
    },
    {
      "epoch": 2.056093425764092,
      "grad_norm": 0.4022095799446106,
      "learning_rate": 5.926609252584623e-06,
      "loss": 0.0156,
      "step": 1256380
    },
    {
      "epoch": 2.0561261562027453,
      "grad_norm": 1.0664061307907104,
      "learning_rate": 5.926543360371105e-06,
      "loss": 0.0138,
      "step": 1256400
    },
    {
      "epoch": 2.056158886641399,
      "grad_norm": 0.5990619659423828,
      "learning_rate": 5.926477468157589e-06,
      "loss": 0.0236,
      "step": 1256420
    },
    {
      "epoch": 2.056191617080052,
      "grad_norm": 0.10479923337697983,
      "learning_rate": 5.926411575944071e-06,
      "loss": 0.021,
      "step": 1256440
    },
    {
      "epoch": 2.0562243475187056,
      "grad_norm": 0.3426300287246704,
      "learning_rate": 5.926345683730554e-06,
      "loss": 0.0112,
      "step": 1256460
    },
    {
      "epoch": 2.056257077957359,
      "grad_norm": 0.3250516951084137,
      "learning_rate": 5.926279791517037e-06,
      "loss": 0.0182,
      "step": 1256480
    },
    {
      "epoch": 2.056289808396012,
      "grad_norm": 0.8365347385406494,
      "learning_rate": 5.92621389930352e-06,
      "loss": 0.0157,
      "step": 1256500
    },
    {
      "epoch": 2.0563225388346655,
      "grad_norm": 0.27891039848327637,
      "learning_rate": 5.926148007090002e-06,
      "loss": 0.017,
      "step": 1256520
    },
    {
      "epoch": 2.0563552692733187,
      "grad_norm": 0.967034637928009,
      "learning_rate": 5.926082114876486e-06,
      "loss": 0.0246,
      "step": 1256540
    },
    {
      "epoch": 2.0563879997119723,
      "grad_norm": 1.2574143409729004,
      "learning_rate": 5.926016222662968e-06,
      "loss": 0.0159,
      "step": 1256560
    },
    {
      "epoch": 2.0564207301506254,
      "grad_norm": 0.5242470502853394,
      "learning_rate": 5.9259503304494515e-06,
      "loss": 0.0147,
      "step": 1256580
    },
    {
      "epoch": 2.056453460589279,
      "grad_norm": 0.40704742074012756,
      "learning_rate": 5.925884438235933e-06,
      "loss": 0.0135,
      "step": 1256600
    },
    {
      "epoch": 2.056486191027932,
      "grad_norm": 0.3353787362575531,
      "learning_rate": 5.925818546022417e-06,
      "loss": 0.0088,
      "step": 1256620
    },
    {
      "epoch": 2.0565189214665853,
      "grad_norm": 0.08273182809352875,
      "learning_rate": 5.9257526538089e-06,
      "loss": 0.0132,
      "step": 1256640
    },
    {
      "epoch": 2.056551651905239,
      "grad_norm": 0.6116198301315308,
      "learning_rate": 5.9256867615953824e-06,
      "loss": 0.0139,
      "step": 1256660
    },
    {
      "epoch": 2.056584382343892,
      "grad_norm": 0.29209959506988525,
      "learning_rate": 5.925620869381865e-06,
      "loss": 0.0132,
      "step": 1256680
    },
    {
      "epoch": 2.0566171127825457,
      "grad_norm": 0.46395358443260193,
      "learning_rate": 5.925554977168349e-06,
      "loss": 0.0111,
      "step": 1256700
    },
    {
      "epoch": 2.056649843221199,
      "grad_norm": 0.35211342573165894,
      "learning_rate": 5.9254890849548315e-06,
      "loss": 0.0183,
      "step": 1256720
    },
    {
      "epoch": 2.0566825736598524,
      "grad_norm": 0.3932459056377411,
      "learning_rate": 5.925423192741314e-06,
      "loss": 0.0105,
      "step": 1256740
    },
    {
      "epoch": 2.0567153040985056,
      "grad_norm": 0.3820023238658905,
      "learning_rate": 5.925357300527798e-06,
      "loss": 0.0183,
      "step": 1256760
    },
    {
      "epoch": 2.0567480345371587,
      "grad_norm": 0.18748024106025696,
      "learning_rate": 5.92529140831428e-06,
      "loss": 0.0147,
      "step": 1256780
    },
    {
      "epoch": 2.0567807649758123,
      "grad_norm": 1.454582929611206,
      "learning_rate": 5.925225516100763e-06,
      "loss": 0.0118,
      "step": 1256800
    },
    {
      "epoch": 2.0568134954144655,
      "grad_norm": 0.09912767261266708,
      "learning_rate": 5.925159623887245e-06,
      "loss": 0.0167,
      "step": 1256820
    },
    {
      "epoch": 2.056846225853119,
      "grad_norm": 0.5317914485931396,
      "learning_rate": 5.925093731673729e-06,
      "loss": 0.0188,
      "step": 1256840
    },
    {
      "epoch": 2.056878956291772,
      "grad_norm": 0.22994062304496765,
      "learning_rate": 5.9250278394602115e-06,
      "loss": 0.0145,
      "step": 1256860
    },
    {
      "epoch": 2.0569116867304253,
      "grad_norm": 0.4394329786300659,
      "learning_rate": 5.924961947246694e-06,
      "loss": 0.0141,
      "step": 1256880
    },
    {
      "epoch": 2.056944417169079,
      "grad_norm": 0.558525562286377,
      "learning_rate": 5.924896055033177e-06,
      "loss": 0.015,
      "step": 1256900
    },
    {
      "epoch": 2.056977147607732,
      "grad_norm": 0.5327529311180115,
      "learning_rate": 5.924830162819661e-06,
      "loss": 0.0185,
      "step": 1256920
    },
    {
      "epoch": 2.0570098780463857,
      "grad_norm": 0.5998110175132751,
      "learning_rate": 5.9247642706061425e-06,
      "loss": 0.0101,
      "step": 1256940
    },
    {
      "epoch": 2.057042608485039,
      "grad_norm": 0.1604335755109787,
      "learning_rate": 5.924698378392626e-06,
      "loss": 0.0172,
      "step": 1256960
    },
    {
      "epoch": 2.0570753389236924,
      "grad_norm": 0.692383348941803,
      "learning_rate": 5.924632486179108e-06,
      "loss": 0.0168,
      "step": 1256980
    },
    {
      "epoch": 2.0571080693623456,
      "grad_norm": 0.32863834500312805,
      "learning_rate": 5.9245665939655916e-06,
      "loss": 0.0145,
      "step": 1257000
    },
    {
      "epoch": 2.0571407998009987,
      "grad_norm": 0.16350053250789642,
      "learning_rate": 5.924500701752074e-06,
      "loss": 0.0139,
      "step": 1257020
    },
    {
      "epoch": 2.0571735302396523,
      "grad_norm": 0.3840469419956207,
      "learning_rate": 5.924434809538557e-06,
      "loss": 0.0145,
      "step": 1257040
    },
    {
      "epoch": 2.0572062606783055,
      "grad_norm": 0.24930815398693085,
      "learning_rate": 5.924368917325041e-06,
      "loss": 0.013,
      "step": 1257060
    },
    {
      "epoch": 2.057238991116959,
      "grad_norm": 0.10012669861316681,
      "learning_rate": 5.924303025111523e-06,
      "loss": 0.0216,
      "step": 1257080
    },
    {
      "epoch": 2.057271721555612,
      "grad_norm": 0.045805297791957855,
      "learning_rate": 5.924237132898006e-06,
      "loss": 0.0136,
      "step": 1257100
    },
    {
      "epoch": 2.057304451994266,
      "grad_norm": 1.7570135593414307,
      "learning_rate": 5.924171240684489e-06,
      "loss": 0.0129,
      "step": 1257120
    },
    {
      "epoch": 2.057337182432919,
      "grad_norm": 0.7833275198936462,
      "learning_rate": 5.9241053484709724e-06,
      "loss": 0.0132,
      "step": 1257140
    },
    {
      "epoch": 2.057369912871572,
      "grad_norm": 0.07748425751924515,
      "learning_rate": 5.924039456257454e-06,
      "loss": 0.0157,
      "step": 1257160
    },
    {
      "epoch": 2.0574026433102257,
      "grad_norm": 0.37407466769218445,
      "learning_rate": 5.923973564043938e-06,
      "loss": 0.0161,
      "step": 1257180
    },
    {
      "epoch": 2.057435373748879,
      "grad_norm": 0.34455063939094543,
      "learning_rate": 5.92390767183042e-06,
      "loss": 0.0163,
      "step": 1257200
    },
    {
      "epoch": 2.0574681041875325,
      "grad_norm": 0.35223791003227234,
      "learning_rate": 5.923841779616903e-06,
      "loss": 0.0201,
      "step": 1257220
    },
    {
      "epoch": 2.0575008346261856,
      "grad_norm": 0.40862196683883667,
      "learning_rate": 5.923775887403386e-06,
      "loss": 0.0166,
      "step": 1257240
    },
    {
      "epoch": 2.057533565064839,
      "grad_norm": 0.11338289082050323,
      "learning_rate": 5.923709995189869e-06,
      "loss": 0.0277,
      "step": 1257260
    },
    {
      "epoch": 2.0575662955034923,
      "grad_norm": 0.4208969473838806,
      "learning_rate": 5.923644102976352e-06,
      "loss": 0.011,
      "step": 1257280
    },
    {
      "epoch": 2.0575990259421455,
      "grad_norm": 0.8247734308242798,
      "learning_rate": 5.923578210762835e-06,
      "loss": 0.0156,
      "step": 1257300
    },
    {
      "epoch": 2.057631756380799,
      "grad_norm": 0.39349332451820374,
      "learning_rate": 5.923512318549317e-06,
      "loss": 0.0201,
      "step": 1257320
    },
    {
      "epoch": 2.0576644868194522,
      "grad_norm": 0.3749634921550751,
      "learning_rate": 5.923446426335801e-06,
      "loss": 0.0128,
      "step": 1257340
    },
    {
      "epoch": 2.057697217258106,
      "grad_norm": 0.5697197914123535,
      "learning_rate": 5.9233805341222826e-06,
      "loss": 0.0116,
      "step": 1257360
    },
    {
      "epoch": 2.057729947696759,
      "grad_norm": 0.721770703792572,
      "learning_rate": 5.923314641908766e-06,
      "loss": 0.0172,
      "step": 1257380
    },
    {
      "epoch": 2.0577626781354126,
      "grad_norm": 0.3345049023628235,
      "learning_rate": 5.92324874969525e-06,
      "loss": 0.0197,
      "step": 1257400
    },
    {
      "epoch": 2.0577954085740657,
      "grad_norm": 0.284107506275177,
      "learning_rate": 5.923182857481732e-06,
      "loss": 0.0157,
      "step": 1257420
    },
    {
      "epoch": 2.057828139012719,
      "grad_norm": 0.40148255228996277,
      "learning_rate": 5.923116965268215e-06,
      "loss": 0.017,
      "step": 1257440
    },
    {
      "epoch": 2.0578608694513725,
      "grad_norm": 0.7070177793502808,
      "learning_rate": 5.923051073054697e-06,
      "loss": 0.0162,
      "step": 1257460
    },
    {
      "epoch": 2.0578935998900256,
      "grad_norm": 0.42868030071258545,
      "learning_rate": 5.922985180841181e-06,
      "loss": 0.0157,
      "step": 1257480
    },
    {
      "epoch": 2.057926330328679,
      "grad_norm": 0.5638749599456787,
      "learning_rate": 5.9229192886276634e-06,
      "loss": 0.0155,
      "step": 1257500
    },
    {
      "epoch": 2.0579590607673324,
      "grad_norm": 0.26918134093284607,
      "learning_rate": 5.922853396414146e-06,
      "loss": 0.0153,
      "step": 1257520
    },
    {
      "epoch": 2.0579917912059855,
      "grad_norm": 0.5932509899139404,
      "learning_rate": 5.922787504200629e-06,
      "loss": 0.0166,
      "step": 1257540
    },
    {
      "epoch": 2.058024521644639,
      "grad_norm": 0.21172276139259338,
      "learning_rate": 5.9227216119871125e-06,
      "loss": 0.0158,
      "step": 1257560
    },
    {
      "epoch": 2.0580572520832923,
      "grad_norm": 0.521722137928009,
      "learning_rate": 5.922655719773594e-06,
      "loss": 0.0136,
      "step": 1257580
    },
    {
      "epoch": 2.058089982521946,
      "grad_norm": 0.655432403087616,
      "learning_rate": 5.922589827560078e-06,
      "loss": 0.013,
      "step": 1257600
    },
    {
      "epoch": 2.058122712960599,
      "grad_norm": 0.3606075048446655,
      "learning_rate": 5.92252393534656e-06,
      "loss": 0.0103,
      "step": 1257620
    },
    {
      "epoch": 2.0581554433992526,
      "grad_norm": 1.0673530101776123,
      "learning_rate": 5.9224580431330435e-06,
      "loss": 0.0166,
      "step": 1257640
    },
    {
      "epoch": 2.0581881738379058,
      "grad_norm": 0.2104293406009674,
      "learning_rate": 5.922392150919526e-06,
      "loss": 0.0148,
      "step": 1257660
    },
    {
      "epoch": 2.058220904276559,
      "grad_norm": 0.22633226215839386,
      "learning_rate": 5.922326258706009e-06,
      "loss": 0.0125,
      "step": 1257680
    },
    {
      "epoch": 2.0582536347152125,
      "grad_norm": 0.6788973808288574,
      "learning_rate": 5.922260366492492e-06,
      "loss": 0.0146,
      "step": 1257700
    },
    {
      "epoch": 2.0582863651538656,
      "grad_norm": 0.08164467662572861,
      "learning_rate": 5.922194474278975e-06,
      "loss": 0.0185,
      "step": 1257720
    },
    {
      "epoch": 2.0583190955925192,
      "grad_norm": 1.5632160902023315,
      "learning_rate": 5.922128582065457e-06,
      "loss": 0.0138,
      "step": 1257740
    },
    {
      "epoch": 2.0583518260311724,
      "grad_norm": 0.9071317911148071,
      "learning_rate": 5.922062689851941e-06,
      "loss": 0.01,
      "step": 1257760
    },
    {
      "epoch": 2.058384556469826,
      "grad_norm": 0.24416190385818481,
      "learning_rate": 5.921996797638424e-06,
      "loss": 0.0176,
      "step": 1257780
    },
    {
      "epoch": 2.058417286908479,
      "grad_norm": 1.0068235397338867,
      "learning_rate": 5.921930905424906e-06,
      "loss": 0.013,
      "step": 1257800
    },
    {
      "epoch": 2.0584500173471323,
      "grad_norm": 0.802034318447113,
      "learning_rate": 5.92186501321139e-06,
      "loss": 0.0162,
      "step": 1257820
    },
    {
      "epoch": 2.058482747785786,
      "grad_norm": 0.071227066218853,
      "learning_rate": 5.921799120997872e-06,
      "loss": 0.0191,
      "step": 1257840
    },
    {
      "epoch": 2.058515478224439,
      "grad_norm": 0.5129415988922119,
      "learning_rate": 5.921733228784355e-06,
      "loss": 0.0116,
      "step": 1257860
    },
    {
      "epoch": 2.0585482086630926,
      "grad_norm": 0.5568025708198547,
      "learning_rate": 5.921667336570838e-06,
      "loss": 0.0196,
      "step": 1257880
    },
    {
      "epoch": 2.0585809391017458,
      "grad_norm": 0.6257389187812805,
      "learning_rate": 5.921601444357321e-06,
      "loss": 0.0126,
      "step": 1257900
    },
    {
      "epoch": 2.0586136695403994,
      "grad_norm": 0.2777508497238159,
      "learning_rate": 5.9215355521438035e-06,
      "loss": 0.0161,
      "step": 1257920
    },
    {
      "epoch": 2.0586463999790525,
      "grad_norm": 0.4011111259460449,
      "learning_rate": 5.921469659930287e-06,
      "loss": 0.0165,
      "step": 1257940
    },
    {
      "epoch": 2.0586791304177057,
      "grad_norm": 0.22581548988819122,
      "learning_rate": 5.921403767716769e-06,
      "loss": 0.0235,
      "step": 1257960
    },
    {
      "epoch": 2.0587118608563593,
      "grad_norm": 0.33261838555336,
      "learning_rate": 5.921337875503253e-06,
      "loss": 0.0153,
      "step": 1257980
    },
    {
      "epoch": 2.0587445912950124,
      "grad_norm": 0.924447774887085,
      "learning_rate": 5.9212719832897345e-06,
      "loss": 0.0186,
      "step": 1258000
    },
    {
      "epoch": 2.058777321733666,
      "grad_norm": 0.6421093940734863,
      "learning_rate": 5.921206091076218e-06,
      "loss": 0.011,
      "step": 1258020
    },
    {
      "epoch": 2.058810052172319,
      "grad_norm": 0.2467164248228073,
      "learning_rate": 5.921140198862701e-06,
      "loss": 0.0124,
      "step": 1258040
    },
    {
      "epoch": 2.0588427826109728,
      "grad_norm": 0.13805441558361053,
      "learning_rate": 5.9210743066491835e-06,
      "loss": 0.0104,
      "step": 1258060
    },
    {
      "epoch": 2.058875513049626,
      "grad_norm": 0.3418658971786499,
      "learning_rate": 5.921008414435666e-06,
      "loss": 0.0136,
      "step": 1258080
    },
    {
      "epoch": 2.058908243488279,
      "grad_norm": 0.6860360503196716,
      "learning_rate": 5.92094252222215e-06,
      "loss": 0.013,
      "step": 1258100
    },
    {
      "epoch": 2.0589409739269326,
      "grad_norm": 1.0615923404693604,
      "learning_rate": 5.920876630008633e-06,
      "loss": 0.0088,
      "step": 1258120
    },
    {
      "epoch": 2.058973704365586,
      "grad_norm": 0.606822669506073,
      "learning_rate": 5.920810737795115e-06,
      "loss": 0.0197,
      "step": 1258140
    },
    {
      "epoch": 2.0590064348042394,
      "grad_norm": 0.6350306272506714,
      "learning_rate": 5.920744845581599e-06,
      "loss": 0.017,
      "step": 1258160
    },
    {
      "epoch": 2.0590391652428925,
      "grad_norm": 0.2518256902694702,
      "learning_rate": 5.920678953368081e-06,
      "loss": 0.0192,
      "step": 1258180
    },
    {
      "epoch": 2.059071895681546,
      "grad_norm": 0.17134283483028412,
      "learning_rate": 5.920613061154564e-06,
      "loss": 0.0214,
      "step": 1258200
    },
    {
      "epoch": 2.0591046261201993,
      "grad_norm": 0.3369670510292053,
      "learning_rate": 5.920547168941046e-06,
      "loss": 0.0156,
      "step": 1258220
    },
    {
      "epoch": 2.0591373565588524,
      "grad_norm": 0.9589271545410156,
      "learning_rate": 5.92048127672753e-06,
      "loss": 0.014,
      "step": 1258240
    },
    {
      "epoch": 2.059170086997506,
      "grad_norm": 0.05158272013068199,
      "learning_rate": 5.920415384514013e-06,
      "loss": 0.0158,
      "step": 1258260
    },
    {
      "epoch": 2.059202817436159,
      "grad_norm": 0.2360823154449463,
      "learning_rate": 5.920349492300495e-06,
      "loss": 0.013,
      "step": 1258280
    },
    {
      "epoch": 2.0592355478748128,
      "grad_norm": 0.2780032455921173,
      "learning_rate": 5.920283600086978e-06,
      "loss": 0.0102,
      "step": 1258300
    },
    {
      "epoch": 2.059268278313466,
      "grad_norm": 0.6849608421325684,
      "learning_rate": 5.920217707873462e-06,
      "loss": 0.0149,
      "step": 1258320
    },
    {
      "epoch": 2.059301008752119,
      "grad_norm": 0.37284478545188904,
      "learning_rate": 5.920151815659944e-06,
      "loss": 0.0115,
      "step": 1258340
    },
    {
      "epoch": 2.0593337391907727,
      "grad_norm": 0.1992989033460617,
      "learning_rate": 5.920085923446427e-06,
      "loss": 0.0116,
      "step": 1258360
    },
    {
      "epoch": 2.059366469629426,
      "grad_norm": 0.7743533253669739,
      "learning_rate": 5.920020031232909e-06,
      "loss": 0.0144,
      "step": 1258380
    },
    {
      "epoch": 2.0593992000680794,
      "grad_norm": 0.06100492179393768,
      "learning_rate": 5.919954139019393e-06,
      "loss": 0.0222,
      "step": 1258400
    },
    {
      "epoch": 2.0594319305067326,
      "grad_norm": 0.6132407188415527,
      "learning_rate": 5.9198882468058745e-06,
      "loss": 0.0141,
      "step": 1258420
    },
    {
      "epoch": 2.059464660945386,
      "grad_norm": 0.19264400005340576,
      "learning_rate": 5.919822354592358e-06,
      "loss": 0.0103,
      "step": 1258440
    },
    {
      "epoch": 2.0594973913840393,
      "grad_norm": 0.40977340936660767,
      "learning_rate": 5.919756462378842e-06,
      "loss": 0.0187,
      "step": 1258460
    },
    {
      "epoch": 2.0595301218226925,
      "grad_norm": 0.178412526845932,
      "learning_rate": 5.919690570165324e-06,
      "loss": 0.0126,
      "step": 1258480
    },
    {
      "epoch": 2.059562852261346,
      "grad_norm": 0.3433507978916168,
      "learning_rate": 5.919624677951807e-06,
      "loss": 0.0143,
      "step": 1258500
    },
    {
      "epoch": 2.059595582699999,
      "grad_norm": 1.2299575805664062,
      "learning_rate": 5.91955878573829e-06,
      "loss": 0.016,
      "step": 1258520
    },
    {
      "epoch": 2.059628313138653,
      "grad_norm": 0.45806896686553955,
      "learning_rate": 5.919492893524773e-06,
      "loss": 0.0118,
      "step": 1258540
    },
    {
      "epoch": 2.059661043577306,
      "grad_norm": 1.6780674457550049,
      "learning_rate": 5.919427001311255e-06,
      "loss": 0.0111,
      "step": 1258560
    },
    {
      "epoch": 2.0596937740159595,
      "grad_norm": 0.46623557806015015,
      "learning_rate": 5.919361109097739e-06,
      "loss": 0.0229,
      "step": 1258580
    },
    {
      "epoch": 2.0597265044546127,
      "grad_norm": 0.6260027289390564,
      "learning_rate": 5.919295216884221e-06,
      "loss": 0.0176,
      "step": 1258600
    },
    {
      "epoch": 2.059759234893266,
      "grad_norm": 0.18225060403347015,
      "learning_rate": 5.9192293246707045e-06,
      "loss": 0.0143,
      "step": 1258620
    },
    {
      "epoch": 2.0597919653319194,
      "grad_norm": 0.33533090353012085,
      "learning_rate": 5.919163432457186e-06,
      "loss": 0.0118,
      "step": 1258640
    },
    {
      "epoch": 2.0598246957705726,
      "grad_norm": 0.45895126461982727,
      "learning_rate": 5.91909754024367e-06,
      "loss": 0.0156,
      "step": 1258660
    },
    {
      "epoch": 2.059857426209226,
      "grad_norm": 0.29110652208328247,
      "learning_rate": 5.919031648030153e-06,
      "loss": 0.0242,
      "step": 1258680
    },
    {
      "epoch": 2.0598901566478793,
      "grad_norm": 0.5527951121330261,
      "learning_rate": 5.9189657558166354e-06,
      "loss": 0.0258,
      "step": 1258700
    },
    {
      "epoch": 2.059922887086533,
      "grad_norm": 0.300981342792511,
      "learning_rate": 5.918899863603118e-06,
      "loss": 0.016,
      "step": 1258720
    },
    {
      "epoch": 2.059955617525186,
      "grad_norm": 0.252898246049881,
      "learning_rate": 5.918833971389602e-06,
      "loss": 0.0154,
      "step": 1258740
    },
    {
      "epoch": 2.0599883479638392,
      "grad_norm": 0.6904799938201904,
      "learning_rate": 5.918768079176084e-06,
      "loss": 0.019,
      "step": 1258760
    },
    {
      "epoch": 2.060021078402493,
      "grad_norm": 0.5911924839019775,
      "learning_rate": 5.918702186962567e-06,
      "loss": 0.0122,
      "step": 1258780
    },
    {
      "epoch": 2.060053808841146,
      "grad_norm": 0.6348331570625305,
      "learning_rate": 5.918636294749049e-06,
      "loss": 0.0143,
      "step": 1258800
    },
    {
      "epoch": 2.0600865392797996,
      "grad_norm": 0.49846625328063965,
      "learning_rate": 5.918570402535533e-06,
      "loss": 0.012,
      "step": 1258820
    },
    {
      "epoch": 2.0601192697184527,
      "grad_norm": 0.2974993586540222,
      "learning_rate": 5.918504510322016e-06,
      "loss": 0.0164,
      "step": 1258840
    },
    {
      "epoch": 2.0601520001571063,
      "grad_norm": 0.7820159792900085,
      "learning_rate": 5.918438618108498e-06,
      "loss": 0.0112,
      "step": 1258860
    },
    {
      "epoch": 2.0601847305957595,
      "grad_norm": 0.5602841377258301,
      "learning_rate": 5.918372725894982e-06,
      "loss": 0.0211,
      "step": 1258880
    },
    {
      "epoch": 2.0602174610344126,
      "grad_norm": 0.29000961780548096,
      "learning_rate": 5.9183068336814645e-06,
      "loss": 0.0167,
      "step": 1258900
    },
    {
      "epoch": 2.060250191473066,
      "grad_norm": 0.1987239420413971,
      "learning_rate": 5.918240941467947e-06,
      "loss": 0.0114,
      "step": 1258920
    },
    {
      "epoch": 2.0602829219117194,
      "grad_norm": 0.4056050777435303,
      "learning_rate": 5.91817504925443e-06,
      "loss": 0.0125,
      "step": 1258940
    },
    {
      "epoch": 2.060315652350373,
      "grad_norm": 0.32118624448776245,
      "learning_rate": 5.918109157040914e-06,
      "loss": 0.0101,
      "step": 1258960
    },
    {
      "epoch": 2.060348382789026,
      "grad_norm": 0.29826703667640686,
      "learning_rate": 5.9180432648273955e-06,
      "loss": 0.0142,
      "step": 1258980
    },
    {
      "epoch": 2.0603811132276797,
      "grad_norm": 0.2325015664100647,
      "learning_rate": 5.917977372613879e-06,
      "loss": 0.0208,
      "step": 1259000
    },
    {
      "epoch": 2.060413843666333,
      "grad_norm": 2.222987651824951,
      "learning_rate": 5.917911480400361e-06,
      "loss": 0.0141,
      "step": 1259020
    },
    {
      "epoch": 2.060446574104986,
      "grad_norm": 0.3583284914493561,
      "learning_rate": 5.9178455881868446e-06,
      "loss": 0.0124,
      "step": 1259040
    },
    {
      "epoch": 2.0604793045436396,
      "grad_norm": 0.8186753988265991,
      "learning_rate": 5.917779695973327e-06,
      "loss": 0.0167,
      "step": 1259060
    },
    {
      "epoch": 2.0605120349822927,
      "grad_norm": 0.5644949674606323,
      "learning_rate": 5.91771380375981e-06,
      "loss": 0.0141,
      "step": 1259080
    },
    {
      "epoch": 2.0605447654209463,
      "grad_norm": 0.07185786217451096,
      "learning_rate": 5.917647911546293e-06,
      "loss": 0.0151,
      "step": 1259100
    },
    {
      "epoch": 2.0605774958595995,
      "grad_norm": 0.21055129170417786,
      "learning_rate": 5.917582019332776e-06,
      "loss": 0.0122,
      "step": 1259120
    },
    {
      "epoch": 2.0606102262982526,
      "grad_norm": 0.2836180627346039,
      "learning_rate": 5.917516127119258e-06,
      "loss": 0.0127,
      "step": 1259140
    },
    {
      "epoch": 2.0606429567369062,
      "grad_norm": 0.12012821435928345,
      "learning_rate": 5.917450234905742e-06,
      "loss": 0.0144,
      "step": 1259160
    },
    {
      "epoch": 2.0606756871755594,
      "grad_norm": 0.2793768048286438,
      "learning_rate": 5.9173843426922254e-06,
      "loss": 0.0126,
      "step": 1259180
    },
    {
      "epoch": 2.060708417614213,
      "grad_norm": 0.2547706365585327,
      "learning_rate": 5.917318450478707e-06,
      "loss": 0.0099,
      "step": 1259200
    },
    {
      "epoch": 2.060741148052866,
      "grad_norm": 0.1759066879749298,
      "learning_rate": 5.917252558265191e-06,
      "loss": 0.0233,
      "step": 1259220
    },
    {
      "epoch": 2.0607738784915197,
      "grad_norm": 0.1946224570274353,
      "learning_rate": 5.917186666051673e-06,
      "loss": 0.0175,
      "step": 1259240
    },
    {
      "epoch": 2.060806608930173,
      "grad_norm": 1.0479954481124878,
      "learning_rate": 5.917120773838156e-06,
      "loss": 0.0194,
      "step": 1259260
    },
    {
      "epoch": 2.060839339368826,
      "grad_norm": 0.3668401837348938,
      "learning_rate": 5.917054881624638e-06,
      "loss": 0.014,
      "step": 1259280
    },
    {
      "epoch": 2.0608720698074796,
      "grad_norm": 0.1181778758764267,
      "learning_rate": 5.916988989411122e-06,
      "loss": 0.0172,
      "step": 1259300
    },
    {
      "epoch": 2.0609048002461328,
      "grad_norm": 0.3570052981376648,
      "learning_rate": 5.916923097197605e-06,
      "loss": 0.0199,
      "step": 1259320
    },
    {
      "epoch": 2.0609375306847864,
      "grad_norm": 0.09775148332118988,
      "learning_rate": 5.916857204984088e-06,
      "loss": 0.0162,
      "step": 1259340
    },
    {
      "epoch": 2.0609702611234395,
      "grad_norm": 0.2048756331205368,
      "learning_rate": 5.91679131277057e-06,
      "loss": 0.0113,
      "step": 1259360
    },
    {
      "epoch": 2.061002991562093,
      "grad_norm": 0.8230814337730408,
      "learning_rate": 5.916725420557054e-06,
      "loss": 0.0157,
      "step": 1259380
    },
    {
      "epoch": 2.0610357220007463,
      "grad_norm": 0.6358044147491455,
      "learning_rate": 5.9166595283435356e-06,
      "loss": 0.0127,
      "step": 1259400
    },
    {
      "epoch": 2.0610684524393994,
      "grad_norm": 0.3541237413883209,
      "learning_rate": 5.916593636130019e-06,
      "loss": 0.0155,
      "step": 1259420
    },
    {
      "epoch": 2.061101182878053,
      "grad_norm": 0.2689657509326935,
      "learning_rate": 5.916527743916501e-06,
      "loss": 0.0088,
      "step": 1259440
    },
    {
      "epoch": 2.061133913316706,
      "grad_norm": 0.39060890674591064,
      "learning_rate": 5.916461851702985e-06,
      "loss": 0.0183,
      "step": 1259460
    },
    {
      "epoch": 2.0611666437553597,
      "grad_norm": 0.6133207082748413,
      "learning_rate": 5.916395959489467e-06,
      "loss": 0.0153,
      "step": 1259480
    },
    {
      "epoch": 2.061199374194013,
      "grad_norm": 0.24639268219470978,
      "learning_rate": 5.91633006727595e-06,
      "loss": 0.0111,
      "step": 1259500
    },
    {
      "epoch": 2.0612321046326665,
      "grad_norm": 0.5091846585273743,
      "learning_rate": 5.916264175062434e-06,
      "loss": 0.0096,
      "step": 1259520
    },
    {
      "epoch": 2.0612648350713196,
      "grad_norm": 0.23891493678092957,
      "learning_rate": 5.9161982828489164e-06,
      "loss": 0.0144,
      "step": 1259540
    },
    {
      "epoch": 2.061297565509973,
      "grad_norm": 0.8102996945381165,
      "learning_rate": 5.916132390635399e-06,
      "loss": 0.0173,
      "step": 1259560
    },
    {
      "epoch": 2.0613302959486264,
      "grad_norm": 0.5789848566055298,
      "learning_rate": 5.916066498421882e-06,
      "loss": 0.0147,
      "step": 1259580
    },
    {
      "epoch": 2.0613630263872795,
      "grad_norm": 0.41444993019104004,
      "learning_rate": 5.9160006062083655e-06,
      "loss": 0.0086,
      "step": 1259600
    },
    {
      "epoch": 2.061395756825933,
      "grad_norm": 0.6504030823707581,
      "learning_rate": 5.915934713994847e-06,
      "loss": 0.0101,
      "step": 1259620
    },
    {
      "epoch": 2.0614284872645863,
      "grad_norm": 0.5345372557640076,
      "learning_rate": 5.915868821781331e-06,
      "loss": 0.0131,
      "step": 1259640
    },
    {
      "epoch": 2.06146121770324,
      "grad_norm": 0.5385655164718628,
      "learning_rate": 5.915802929567813e-06,
      "loss": 0.0152,
      "step": 1259660
    },
    {
      "epoch": 2.061493948141893,
      "grad_norm": 0.4824974834918976,
      "learning_rate": 5.9157370373542965e-06,
      "loss": 0.0177,
      "step": 1259680
    },
    {
      "epoch": 2.061526678580546,
      "grad_norm": 0.2180391550064087,
      "learning_rate": 5.915671145140779e-06,
      "loss": 0.0141,
      "step": 1259700
    },
    {
      "epoch": 2.0615594090191998,
      "grad_norm": 0.7468906044960022,
      "learning_rate": 5.915605252927262e-06,
      "loss": 0.0178,
      "step": 1259720
    },
    {
      "epoch": 2.061592139457853,
      "grad_norm": 0.6355908513069153,
      "learning_rate": 5.915539360713745e-06,
      "loss": 0.0253,
      "step": 1259740
    },
    {
      "epoch": 2.0616248698965065,
      "grad_norm": 0.29637807607650757,
      "learning_rate": 5.915473468500228e-06,
      "loss": 0.0086,
      "step": 1259760
    },
    {
      "epoch": 2.0616576003351597,
      "grad_norm": 0.9551133513450623,
      "learning_rate": 5.91540757628671e-06,
      "loss": 0.0167,
      "step": 1259780
    },
    {
      "epoch": 2.0616903307738133,
      "grad_norm": 0.5933718085289001,
      "learning_rate": 5.915341684073194e-06,
      "loss": 0.0118,
      "step": 1259800
    },
    {
      "epoch": 2.0617230612124664,
      "grad_norm": 0.22997058928012848,
      "learning_rate": 5.915275791859676e-06,
      "loss": 0.0086,
      "step": 1259820
    },
    {
      "epoch": 2.0617557916511196,
      "grad_norm": 0.4394185245037079,
      "learning_rate": 5.915209899646159e-06,
      "loss": 0.0185,
      "step": 1259840
    },
    {
      "epoch": 2.061788522089773,
      "grad_norm": 1.0915558338165283,
      "learning_rate": 5.915144007432643e-06,
      "loss": 0.0172,
      "step": 1259860
    },
    {
      "epoch": 2.0618212525284263,
      "grad_norm": 0.7709481120109558,
      "learning_rate": 5.915078115219125e-06,
      "loss": 0.012,
      "step": 1259880
    },
    {
      "epoch": 2.06185398296708,
      "grad_norm": 0.16909605264663696,
      "learning_rate": 5.915012223005608e-06,
      "loss": 0.0197,
      "step": 1259900
    },
    {
      "epoch": 2.061886713405733,
      "grad_norm": 0.40771812200546265,
      "learning_rate": 5.914946330792091e-06,
      "loss": 0.0107,
      "step": 1259920
    },
    {
      "epoch": 2.061919443844386,
      "grad_norm": 0.10801748931407928,
      "learning_rate": 5.914880438578574e-06,
      "loss": 0.0116,
      "step": 1259940
    },
    {
      "epoch": 2.06195217428304,
      "grad_norm": 0.4734109938144684,
      "learning_rate": 5.9148145463650565e-06,
      "loss": 0.0121,
      "step": 1259960
    },
    {
      "epoch": 2.061984904721693,
      "grad_norm": 0.36286279559135437,
      "learning_rate": 5.91474865415154e-06,
      "loss": 0.0183,
      "step": 1259980
    },
    {
      "epoch": 2.0620176351603465,
      "grad_norm": 0.740999698638916,
      "learning_rate": 5.914682761938022e-06,
      "loss": 0.0129,
      "step": 1260000
    },
    {
      "epoch": 2.0620503655989997,
      "grad_norm": 0.4852653741836548,
      "learning_rate": 5.914616869724506e-06,
      "loss": 0.0154,
      "step": 1260020
    },
    {
      "epoch": 2.0620830960376533,
      "grad_norm": 0.6943451762199402,
      "learning_rate": 5.9145509775109875e-06,
      "loss": 0.0123,
      "step": 1260040
    },
    {
      "epoch": 2.0621158264763064,
      "grad_norm": 0.9933421015739441,
      "learning_rate": 5.914485085297471e-06,
      "loss": 0.0172,
      "step": 1260060
    },
    {
      "epoch": 2.0621485569149596,
      "grad_norm": 0.259084552526474,
      "learning_rate": 5.914419193083954e-06,
      "loss": 0.0166,
      "step": 1260080
    },
    {
      "epoch": 2.062181287353613,
      "grad_norm": 0.6249744296073914,
      "learning_rate": 5.9143533008704365e-06,
      "loss": 0.0179,
      "step": 1260100
    },
    {
      "epoch": 2.0622140177922663,
      "grad_norm": 0.42695942521095276,
      "learning_rate": 5.914287408656919e-06,
      "loss": 0.02,
      "step": 1260120
    },
    {
      "epoch": 2.06224674823092,
      "grad_norm": 0.3688710629940033,
      "learning_rate": 5.914221516443403e-06,
      "loss": 0.0164,
      "step": 1260140
    },
    {
      "epoch": 2.062279478669573,
      "grad_norm": 0.31459033489227295,
      "learning_rate": 5.914155624229885e-06,
      "loss": 0.0123,
      "step": 1260160
    },
    {
      "epoch": 2.0623122091082267,
      "grad_norm": 0.07910413295030594,
      "learning_rate": 5.914089732016368e-06,
      "loss": 0.0134,
      "step": 1260180
    },
    {
      "epoch": 2.06234493954688,
      "grad_norm": 0.22864340245723724,
      "learning_rate": 5.91402383980285e-06,
      "loss": 0.0133,
      "step": 1260200
    },
    {
      "epoch": 2.062377669985533,
      "grad_norm": 0.2606038451194763,
      "learning_rate": 5.913957947589334e-06,
      "loss": 0.0201,
      "step": 1260220
    },
    {
      "epoch": 2.0624104004241866,
      "grad_norm": 0.23030520975589752,
      "learning_rate": 5.913892055375817e-06,
      "loss": 0.0113,
      "step": 1260240
    },
    {
      "epoch": 2.0624431308628397,
      "grad_norm": 0.16157963871955872,
      "learning_rate": 5.913826163162299e-06,
      "loss": 0.0167,
      "step": 1260260
    },
    {
      "epoch": 2.0624758613014933,
      "grad_norm": 0.3751291036605835,
      "learning_rate": 5.913760270948783e-06,
      "loss": 0.0178,
      "step": 1260280
    },
    {
      "epoch": 2.0625085917401464,
      "grad_norm": 0.4227786362171173,
      "learning_rate": 5.913694378735265e-06,
      "loss": 0.0147,
      "step": 1260300
    },
    {
      "epoch": 2.0625413221788,
      "grad_norm": 0.8697580695152283,
      "learning_rate": 5.913628486521748e-06,
      "loss": 0.0162,
      "step": 1260320
    },
    {
      "epoch": 2.062574052617453,
      "grad_norm": 0.1333591639995575,
      "learning_rate": 5.913562594308231e-06,
      "loss": 0.014,
      "step": 1260340
    },
    {
      "epoch": 2.0626067830561063,
      "grad_norm": 0.17670771479606628,
      "learning_rate": 5.913496702094714e-06,
      "loss": 0.0159,
      "step": 1260360
    },
    {
      "epoch": 2.06263951349476,
      "grad_norm": 0.38135936856269836,
      "learning_rate": 5.913430809881197e-06,
      "loss": 0.0203,
      "step": 1260380
    },
    {
      "epoch": 2.062672243933413,
      "grad_norm": 0.284152626991272,
      "learning_rate": 5.91336491766768e-06,
      "loss": 0.0136,
      "step": 1260400
    },
    {
      "epoch": 2.0627049743720667,
      "grad_norm": 0.33039313554763794,
      "learning_rate": 5.913299025454162e-06,
      "loss": 0.0136,
      "step": 1260420
    },
    {
      "epoch": 2.06273770481072,
      "grad_norm": 0.25908568501472473,
      "learning_rate": 5.913233133240646e-06,
      "loss": 0.0173,
      "step": 1260440
    },
    {
      "epoch": 2.0627704352493734,
      "grad_norm": 0.1751425713300705,
      "learning_rate": 5.9131672410271276e-06,
      "loss": 0.0187,
      "step": 1260460
    },
    {
      "epoch": 2.0628031656880266,
      "grad_norm": 0.9015548229217529,
      "learning_rate": 5.913101348813611e-06,
      "loss": 0.0219,
      "step": 1260480
    },
    {
      "epoch": 2.0628358961266797,
      "grad_norm": 0.7985263466835022,
      "learning_rate": 5.913035456600094e-06,
      "loss": 0.0271,
      "step": 1260500
    },
    {
      "epoch": 2.0628686265653333,
      "grad_norm": 0.49937212467193604,
      "learning_rate": 5.912969564386577e-06,
      "loss": 0.0254,
      "step": 1260520
    },
    {
      "epoch": 2.0629013570039865,
      "grad_norm": 0.22559726238250732,
      "learning_rate": 5.912903672173059e-06,
      "loss": 0.011,
      "step": 1260540
    },
    {
      "epoch": 2.06293408744264,
      "grad_norm": 0.15785253047943115,
      "learning_rate": 5.912837779959543e-06,
      "loss": 0.0103,
      "step": 1260560
    },
    {
      "epoch": 2.062966817881293,
      "grad_norm": 0.5207380056381226,
      "learning_rate": 5.912771887746026e-06,
      "loss": 0.015,
      "step": 1260580
    },
    {
      "epoch": 2.0629995483199464,
      "grad_norm": 0.4417259097099304,
      "learning_rate": 5.9127059955325084e-06,
      "loss": 0.0182,
      "step": 1260600
    },
    {
      "epoch": 2.0630322787586,
      "grad_norm": 0.3386457562446594,
      "learning_rate": 5.912640103318992e-06,
      "loss": 0.0131,
      "step": 1260620
    },
    {
      "epoch": 2.063065009197253,
      "grad_norm": 0.3881080746650696,
      "learning_rate": 5.912574211105474e-06,
      "loss": 0.0116,
      "step": 1260640
    },
    {
      "epoch": 2.0630977396359067,
      "grad_norm": 0.28346091508865356,
      "learning_rate": 5.9125083188919575e-06,
      "loss": 0.0129,
      "step": 1260660
    },
    {
      "epoch": 2.06313047007456,
      "grad_norm": 0.1372646540403366,
      "learning_rate": 5.912442426678439e-06,
      "loss": 0.0206,
      "step": 1260680
    },
    {
      "epoch": 2.0631632005132134,
      "grad_norm": 4.704452991485596,
      "learning_rate": 5.912376534464923e-06,
      "loss": 0.0111,
      "step": 1260700
    },
    {
      "epoch": 2.0631959309518666,
      "grad_norm": 0.19559699296951294,
      "learning_rate": 5.912310642251406e-06,
      "loss": 0.0142,
      "step": 1260720
    },
    {
      "epoch": 2.0632286613905197,
      "grad_norm": 1.0274827480316162,
      "learning_rate": 5.9122447500378885e-06,
      "loss": 0.0271,
      "step": 1260740
    },
    {
      "epoch": 2.0632613918291733,
      "grad_norm": 0.026669682934880257,
      "learning_rate": 5.912178857824371e-06,
      "loss": 0.0136,
      "step": 1260760
    },
    {
      "epoch": 2.0632941222678265,
      "grad_norm": 1.6509674787521362,
      "learning_rate": 5.912112965610855e-06,
      "loss": 0.0156,
      "step": 1260780
    },
    {
      "epoch": 2.06332685270648,
      "grad_norm": 0.653828501701355,
      "learning_rate": 5.912047073397337e-06,
      "loss": 0.0138,
      "step": 1260800
    },
    {
      "epoch": 2.0633595831451332,
      "grad_norm": 0.7716449499130249,
      "learning_rate": 5.91198118118382e-06,
      "loss": 0.0211,
      "step": 1260820
    },
    {
      "epoch": 2.063392313583787,
      "grad_norm": 0.5279168486595154,
      "learning_rate": 5.911915288970302e-06,
      "loss": 0.0154,
      "step": 1260840
    },
    {
      "epoch": 2.06342504402244,
      "grad_norm": 0.7946502566337585,
      "learning_rate": 5.911849396756786e-06,
      "loss": 0.0185,
      "step": 1260860
    },
    {
      "epoch": 2.063457774461093,
      "grad_norm": 0.2517947554588318,
      "learning_rate": 5.9117835045432685e-06,
      "loss": 0.0155,
      "step": 1260880
    },
    {
      "epoch": 2.0634905048997467,
      "grad_norm": 0.9660060405731201,
      "learning_rate": 5.911717612329751e-06,
      "loss": 0.0187,
      "step": 1260900
    },
    {
      "epoch": 2.0635232353384,
      "grad_norm": 0.3378617763519287,
      "learning_rate": 5.911651720116235e-06,
      "loss": 0.0105,
      "step": 1260920
    },
    {
      "epoch": 2.0635559657770535,
      "grad_norm": 1.1503695249557495,
      "learning_rate": 5.9115858279027175e-06,
      "loss": 0.0204,
      "step": 1260940
    },
    {
      "epoch": 2.0635886962157066,
      "grad_norm": 0.3218473196029663,
      "learning_rate": 5.9115199356892e-06,
      "loss": 0.0131,
      "step": 1260960
    },
    {
      "epoch": 2.06362142665436,
      "grad_norm": 0.3802134692668915,
      "learning_rate": 5.911454043475683e-06,
      "loss": 0.0177,
      "step": 1260980
    },
    {
      "epoch": 2.0636541570930134,
      "grad_norm": 0.3908452093601227,
      "learning_rate": 5.911388151262167e-06,
      "loss": 0.0157,
      "step": 1261000
    },
    {
      "epoch": 2.0636868875316665,
      "grad_norm": 0.4367847442626953,
      "learning_rate": 5.9113222590486485e-06,
      "loss": 0.0159,
      "step": 1261020
    },
    {
      "epoch": 2.06371961797032,
      "grad_norm": 0.4197269380092621,
      "learning_rate": 5.911256366835132e-06,
      "loss": 0.0172,
      "step": 1261040
    },
    {
      "epoch": 2.0637523484089733,
      "grad_norm": 0.4220500588417053,
      "learning_rate": 5.911190474621614e-06,
      "loss": 0.0148,
      "step": 1261060
    },
    {
      "epoch": 2.063785078847627,
      "grad_norm": 0.18157035112380981,
      "learning_rate": 5.9111245824080976e-06,
      "loss": 0.0171,
      "step": 1261080
    },
    {
      "epoch": 2.06381780928628,
      "grad_norm": 0.36995723843574524,
      "learning_rate": 5.91105869019458e-06,
      "loss": 0.0222,
      "step": 1261100
    },
    {
      "epoch": 2.0638505397249336,
      "grad_norm": 0.46997377276420593,
      "learning_rate": 5.910992797981063e-06,
      "loss": 0.0132,
      "step": 1261120
    },
    {
      "epoch": 2.0638832701635867,
      "grad_norm": 0.5859987139701843,
      "learning_rate": 5.910926905767546e-06,
      "loss": 0.0131,
      "step": 1261140
    },
    {
      "epoch": 2.06391600060224,
      "grad_norm": 0.24635590612888336,
      "learning_rate": 5.910861013554029e-06,
      "loss": 0.0112,
      "step": 1261160
    },
    {
      "epoch": 2.0639487310408935,
      "grad_norm": 0.21319642663002014,
      "learning_rate": 5.910795121340511e-06,
      "loss": 0.0126,
      "step": 1261180
    },
    {
      "epoch": 2.0639814614795466,
      "grad_norm": 0.3157708942890167,
      "learning_rate": 5.910729229126995e-06,
      "loss": 0.0147,
      "step": 1261200
    },
    {
      "epoch": 2.0640141919182002,
      "grad_norm": 0.649732768535614,
      "learning_rate": 5.910663336913477e-06,
      "loss": 0.0105,
      "step": 1261220
    },
    {
      "epoch": 2.0640469223568534,
      "grad_norm": 0.5762081742286682,
      "learning_rate": 5.91059744469996e-06,
      "loss": 0.0195,
      "step": 1261240
    },
    {
      "epoch": 2.064079652795507,
      "grad_norm": 0.24333837628364563,
      "learning_rate": 5.910531552486442e-06,
      "loss": 0.0175,
      "step": 1261260
    },
    {
      "epoch": 2.06411238323416,
      "grad_norm": 0.3767060935497284,
      "learning_rate": 5.910465660272926e-06,
      "loss": 0.0131,
      "step": 1261280
    },
    {
      "epoch": 2.0641451136728133,
      "grad_norm": 0.3452301025390625,
      "learning_rate": 5.910399768059409e-06,
      "loss": 0.0221,
      "step": 1261300
    },
    {
      "epoch": 2.064177844111467,
      "grad_norm": 0.38222038745880127,
      "learning_rate": 5.910333875845891e-06,
      "loss": 0.0117,
      "step": 1261320
    },
    {
      "epoch": 2.06421057455012,
      "grad_norm": 0.22786937654018402,
      "learning_rate": 5.910267983632375e-06,
      "loss": 0.0201,
      "step": 1261340
    },
    {
      "epoch": 2.0642433049887736,
      "grad_norm": 0.16594989597797394,
      "learning_rate": 5.910202091418858e-06,
      "loss": 0.0224,
      "step": 1261360
    },
    {
      "epoch": 2.0642760354274268,
      "grad_norm": 0.23390115797519684,
      "learning_rate": 5.91013619920534e-06,
      "loss": 0.0179,
      "step": 1261380
    },
    {
      "epoch": 2.0643087658660804,
      "grad_norm": 0.25941017270088196,
      "learning_rate": 5.910070306991823e-06,
      "loss": 0.0233,
      "step": 1261400
    },
    {
      "epoch": 2.0643414963047335,
      "grad_norm": 0.1438380777835846,
      "learning_rate": 5.910004414778307e-06,
      "loss": 0.011,
      "step": 1261420
    },
    {
      "epoch": 2.0643742267433867,
      "grad_norm": 0.19073542952537537,
      "learning_rate": 5.909938522564789e-06,
      "loss": 0.0207,
      "step": 1261440
    },
    {
      "epoch": 2.0644069571820403,
      "grad_norm": 0.361872136592865,
      "learning_rate": 5.909872630351272e-06,
      "loss": 0.0182,
      "step": 1261460
    },
    {
      "epoch": 2.0644396876206934,
      "grad_norm": 0.2223035842180252,
      "learning_rate": 5.909806738137754e-06,
      "loss": 0.0145,
      "step": 1261480
    },
    {
      "epoch": 2.064472418059347,
      "grad_norm": 0.6794270873069763,
      "learning_rate": 5.909740845924238e-06,
      "loss": 0.0181,
      "step": 1261500
    },
    {
      "epoch": 2.064505148498,
      "grad_norm": 0.6577178835868835,
      "learning_rate": 5.90967495371072e-06,
      "loss": 0.0139,
      "step": 1261520
    },
    {
      "epoch": 2.0645378789366533,
      "grad_norm": 0.34758076071739197,
      "learning_rate": 5.909609061497203e-06,
      "loss": 0.0152,
      "step": 1261540
    },
    {
      "epoch": 2.064570609375307,
      "grad_norm": 0.4505471885204315,
      "learning_rate": 5.909543169283686e-06,
      "loss": 0.0142,
      "step": 1261560
    },
    {
      "epoch": 2.06460333981396,
      "grad_norm": 0.2802346348762512,
      "learning_rate": 5.9094772770701694e-06,
      "loss": 0.0173,
      "step": 1261580
    },
    {
      "epoch": 2.0646360702526136,
      "grad_norm": 0.2398192286491394,
      "learning_rate": 5.909411384856651e-06,
      "loss": 0.0121,
      "step": 1261600
    },
    {
      "epoch": 2.064668800691267,
      "grad_norm": 0.2372775375843048,
      "learning_rate": 5.909345492643135e-06,
      "loss": 0.0159,
      "step": 1261620
    },
    {
      "epoch": 2.0647015311299204,
      "grad_norm": 0.20646454393863678,
      "learning_rate": 5.9092796004296185e-06,
      "loss": 0.0126,
      "step": 1261640
    },
    {
      "epoch": 2.0647342615685735,
      "grad_norm": 0.25885874032974243,
      "learning_rate": 5.9092137082161e-06,
      "loss": 0.0092,
      "step": 1261660
    },
    {
      "epoch": 2.0647669920072267,
      "grad_norm": 0.44021913409233093,
      "learning_rate": 5.909147816002584e-06,
      "loss": 0.0103,
      "step": 1261680
    },
    {
      "epoch": 2.0647997224458803,
      "grad_norm": 0.4901185631752014,
      "learning_rate": 5.909081923789066e-06,
      "loss": 0.0104,
      "step": 1261700
    },
    {
      "epoch": 2.0648324528845334,
      "grad_norm": 0.36665257811546326,
      "learning_rate": 5.9090160315755495e-06,
      "loss": 0.0179,
      "step": 1261720
    },
    {
      "epoch": 2.064865183323187,
      "grad_norm": 0.9998645186424255,
      "learning_rate": 5.908950139362032e-06,
      "loss": 0.018,
      "step": 1261740
    },
    {
      "epoch": 2.06489791376184,
      "grad_norm": 0.7180317044258118,
      "learning_rate": 5.908884247148515e-06,
      "loss": 0.017,
      "step": 1261760
    },
    {
      "epoch": 2.0649306442004938,
      "grad_norm": 0.15091538429260254,
      "learning_rate": 5.908818354934998e-06,
      "loss": 0.0162,
      "step": 1261780
    },
    {
      "epoch": 2.064963374639147,
      "grad_norm": 0.4215344786643982,
      "learning_rate": 5.908752462721481e-06,
      "loss": 0.0136,
      "step": 1261800
    },
    {
      "epoch": 2.0649961050778,
      "grad_norm": 0.29254457354545593,
      "learning_rate": 5.908686570507963e-06,
      "loss": 0.0131,
      "step": 1261820
    },
    {
      "epoch": 2.0650288355164537,
      "grad_norm": 0.27688294649124146,
      "learning_rate": 5.908620678294447e-06,
      "loss": 0.0126,
      "step": 1261840
    },
    {
      "epoch": 2.065061565955107,
      "grad_norm": 0.8711870908737183,
      "learning_rate": 5.908554786080929e-06,
      "loss": 0.0195,
      "step": 1261860
    },
    {
      "epoch": 2.0650942963937604,
      "grad_norm": 0.2569940984249115,
      "learning_rate": 5.908488893867412e-06,
      "loss": 0.013,
      "step": 1261880
    },
    {
      "epoch": 2.0651270268324136,
      "grad_norm": 0.3423492908477783,
      "learning_rate": 5.908423001653895e-06,
      "loss": 0.0174,
      "step": 1261900
    },
    {
      "epoch": 2.065159757271067,
      "grad_norm": 0.1009989082813263,
      "learning_rate": 5.908357109440378e-06,
      "loss": 0.0173,
      "step": 1261920
    },
    {
      "epoch": 2.0651924877097203,
      "grad_norm": 0.6768958568572998,
      "learning_rate": 5.9082912172268605e-06,
      "loss": 0.0156,
      "step": 1261940
    },
    {
      "epoch": 2.0652252181483735,
      "grad_norm": 0.2716637849807739,
      "learning_rate": 5.908225325013344e-06,
      "loss": 0.0157,
      "step": 1261960
    },
    {
      "epoch": 2.065257948587027,
      "grad_norm": 0.11074912548065186,
      "learning_rate": 5.908159432799827e-06,
      "loss": 0.0092,
      "step": 1261980
    },
    {
      "epoch": 2.06529067902568,
      "grad_norm": 0.40440961718559265,
      "learning_rate": 5.9080935405863095e-06,
      "loss": 0.0119,
      "step": 1262000
    },
    {
      "epoch": 2.065323409464334,
      "grad_norm": 0.783618152141571,
      "learning_rate": 5.908027648372793e-06,
      "loss": 0.0202,
      "step": 1262020
    },
    {
      "epoch": 2.065356139902987,
      "grad_norm": 0.11560001969337463,
      "learning_rate": 5.907961756159275e-06,
      "loss": 0.0144,
      "step": 1262040
    },
    {
      "epoch": 2.06538887034164,
      "grad_norm": 0.061833880841732025,
      "learning_rate": 5.907895863945759e-06,
      "loss": 0.0149,
      "step": 1262060
    },
    {
      "epoch": 2.0654216007802937,
      "grad_norm": 0.7216488718986511,
      "learning_rate": 5.9078299717322405e-06,
      "loss": 0.0118,
      "step": 1262080
    },
    {
      "epoch": 2.065454331218947,
      "grad_norm": 0.47765490412712097,
      "learning_rate": 5.907764079518724e-06,
      "loss": 0.0214,
      "step": 1262100
    },
    {
      "epoch": 2.0654870616576004,
      "grad_norm": 0.24838165938854218,
      "learning_rate": 5.907698187305207e-06,
      "loss": 0.0161,
      "step": 1262120
    },
    {
      "epoch": 2.0655197920962536,
      "grad_norm": 0.5880303978919983,
      "learning_rate": 5.9076322950916896e-06,
      "loss": 0.0156,
      "step": 1262140
    },
    {
      "epoch": 2.065552522534907,
      "grad_norm": 0.2721147835254669,
      "learning_rate": 5.907566402878172e-06,
      "loss": 0.02,
      "step": 1262160
    },
    {
      "epoch": 2.0655852529735603,
      "grad_norm": 0.21562828123569489,
      "learning_rate": 5.907500510664656e-06,
      "loss": 0.0107,
      "step": 1262180
    },
    {
      "epoch": 2.0656179834122135,
      "grad_norm": 0.337830513715744,
      "learning_rate": 5.907434618451138e-06,
      "loss": 0.0212,
      "step": 1262200
    },
    {
      "epoch": 2.065650713850867,
      "grad_norm": 0.3518410921096802,
      "learning_rate": 5.907368726237621e-06,
      "loss": 0.0198,
      "step": 1262220
    },
    {
      "epoch": 2.06568344428952,
      "grad_norm": 0.43077409267425537,
      "learning_rate": 5.907302834024103e-06,
      "loss": 0.0112,
      "step": 1262240
    },
    {
      "epoch": 2.065716174728174,
      "grad_norm": 0.19875599443912506,
      "learning_rate": 5.907236941810587e-06,
      "loss": 0.0128,
      "step": 1262260
    },
    {
      "epoch": 2.065748905166827,
      "grad_norm": 0.36368846893310547,
      "learning_rate": 5.907171049597069e-06,
      "loss": 0.0125,
      "step": 1262280
    },
    {
      "epoch": 2.0657816356054806,
      "grad_norm": 0.20092588663101196,
      "learning_rate": 5.907105157383552e-06,
      "loss": 0.0128,
      "step": 1262300
    },
    {
      "epoch": 2.0658143660441337,
      "grad_norm": 0.7088173627853394,
      "learning_rate": 5.907039265170035e-06,
      "loss": 0.0165,
      "step": 1262320
    },
    {
      "epoch": 2.065847096482787,
      "grad_norm": 0.3210143446922302,
      "learning_rate": 5.906973372956518e-06,
      "loss": 0.0136,
      "step": 1262340
    },
    {
      "epoch": 2.0658798269214405,
      "grad_norm": 0.17877471446990967,
      "learning_rate": 5.906907480743001e-06,
      "loss": 0.0142,
      "step": 1262360
    },
    {
      "epoch": 2.0659125573600936,
      "grad_norm": 0.26982706785202026,
      "learning_rate": 5.906841588529484e-06,
      "loss": 0.0158,
      "step": 1262380
    },
    {
      "epoch": 2.065945287798747,
      "grad_norm": 0.29630687832832336,
      "learning_rate": 5.906775696315967e-06,
      "loss": 0.0122,
      "step": 1262400
    },
    {
      "epoch": 2.0659780182374003,
      "grad_norm": 0.31801262497901917,
      "learning_rate": 5.90670980410245e-06,
      "loss": 0.0155,
      "step": 1262420
    },
    {
      "epoch": 2.066010748676054,
      "grad_norm": 0.43658387660980225,
      "learning_rate": 5.906643911888933e-06,
      "loss": 0.0205,
      "step": 1262440
    },
    {
      "epoch": 2.066043479114707,
      "grad_norm": 0.33239132165908813,
      "learning_rate": 5.906578019675415e-06,
      "loss": 0.011,
      "step": 1262460
    },
    {
      "epoch": 2.0660762095533602,
      "grad_norm": 0.2973577082157135,
      "learning_rate": 5.906512127461899e-06,
      "loss": 0.0156,
      "step": 1262480
    },
    {
      "epoch": 2.066108939992014,
      "grad_norm": 0.23123236000537872,
      "learning_rate": 5.9064462352483806e-06,
      "loss": 0.0155,
      "step": 1262500
    },
    {
      "epoch": 2.066141670430667,
      "grad_norm": 0.17076249420642853,
      "learning_rate": 5.906380343034864e-06,
      "loss": 0.0134,
      "step": 1262520
    },
    {
      "epoch": 2.0661744008693206,
      "grad_norm": 0.2450091391801834,
      "learning_rate": 5.906314450821347e-06,
      "loss": 0.0128,
      "step": 1262540
    },
    {
      "epoch": 2.0662071313079737,
      "grad_norm": 0.26443564891815186,
      "learning_rate": 5.90624855860783e-06,
      "loss": 0.0139,
      "step": 1262560
    },
    {
      "epoch": 2.0662398617466273,
      "grad_norm": 0.3117598593235016,
      "learning_rate": 5.906182666394312e-06,
      "loss": 0.0163,
      "step": 1262580
    },
    {
      "epoch": 2.0662725921852805,
      "grad_norm": 0.7356307506561279,
      "learning_rate": 5.906116774180796e-06,
      "loss": 0.0108,
      "step": 1262600
    },
    {
      "epoch": 2.0663053226239336,
      "grad_norm": 0.11593350023031235,
      "learning_rate": 5.906050881967278e-06,
      "loss": 0.017,
      "step": 1262620
    },
    {
      "epoch": 2.066338053062587,
      "grad_norm": 0.2127639353275299,
      "learning_rate": 5.9059849897537614e-06,
      "loss": 0.0119,
      "step": 1262640
    },
    {
      "epoch": 2.0663707835012404,
      "grad_norm": 0.5430331230163574,
      "learning_rate": 5.905919097540243e-06,
      "loss": 0.0095,
      "step": 1262660
    },
    {
      "epoch": 2.066403513939894,
      "grad_norm": 0.45116478204727173,
      "learning_rate": 5.905853205326727e-06,
      "loss": 0.0136,
      "step": 1262680
    },
    {
      "epoch": 2.066436244378547,
      "grad_norm": 0.20547756552696228,
      "learning_rate": 5.9057873131132105e-06,
      "loss": 0.0151,
      "step": 1262700
    },
    {
      "epoch": 2.0664689748172007,
      "grad_norm": 0.5629606246948242,
      "learning_rate": 5.905721420899692e-06,
      "loss": 0.0163,
      "step": 1262720
    },
    {
      "epoch": 2.066501705255854,
      "grad_norm": 0.37496039271354675,
      "learning_rate": 5.905655528686176e-06,
      "loss": 0.0145,
      "step": 1262740
    },
    {
      "epoch": 2.066534435694507,
      "grad_norm": 0.5363118648529053,
      "learning_rate": 5.905589636472659e-06,
      "loss": 0.0162,
      "step": 1262760
    },
    {
      "epoch": 2.0665671661331606,
      "grad_norm": 0.2524981200695038,
      "learning_rate": 5.9055237442591415e-06,
      "loss": 0.0117,
      "step": 1262780
    },
    {
      "epoch": 2.0665998965718138,
      "grad_norm": 0.3291642367839813,
      "learning_rate": 5.905457852045624e-06,
      "loss": 0.0187,
      "step": 1262800
    },
    {
      "epoch": 2.0666326270104673,
      "grad_norm": 0.6963868737220764,
      "learning_rate": 5.905391959832108e-06,
      "loss": 0.0211,
      "step": 1262820
    },
    {
      "epoch": 2.0666653574491205,
      "grad_norm": 0.17282070219516754,
      "learning_rate": 5.90532606761859e-06,
      "loss": 0.0221,
      "step": 1262840
    },
    {
      "epoch": 2.066698087887774,
      "grad_norm": 0.32636815309524536,
      "learning_rate": 5.905260175405073e-06,
      "loss": 0.0169,
      "step": 1262860
    },
    {
      "epoch": 2.0667308183264272,
      "grad_norm": 0.14091162383556366,
      "learning_rate": 5.905194283191555e-06,
      "loss": 0.018,
      "step": 1262880
    },
    {
      "epoch": 2.0667635487650804,
      "grad_norm": 0.29158633947372437,
      "learning_rate": 5.905128390978039e-06,
      "loss": 0.0186,
      "step": 1262900
    },
    {
      "epoch": 2.066796279203734,
      "grad_norm": 0.7420214414596558,
      "learning_rate": 5.9050624987645215e-06,
      "loss": 0.0203,
      "step": 1262920
    },
    {
      "epoch": 2.066829009642387,
      "grad_norm": 0.11448673903942108,
      "learning_rate": 5.904996606551004e-06,
      "loss": 0.0237,
      "step": 1262940
    },
    {
      "epoch": 2.0668617400810407,
      "grad_norm": 0.2957523465156555,
      "learning_rate": 5.904930714337487e-06,
      "loss": 0.0174,
      "step": 1262960
    },
    {
      "epoch": 2.066894470519694,
      "grad_norm": 0.3722967207431793,
      "learning_rate": 5.9048648221239705e-06,
      "loss": 0.0172,
      "step": 1262980
    },
    {
      "epoch": 2.066927200958347,
      "grad_norm": 0.22890590131282806,
      "learning_rate": 5.9047989299104524e-06,
      "loss": 0.0116,
      "step": 1263000
    },
    {
      "epoch": 2.0669599313970006,
      "grad_norm": 0.08608365058898926,
      "learning_rate": 5.904733037696936e-06,
      "loss": 0.0116,
      "step": 1263020
    },
    {
      "epoch": 2.0669926618356538,
      "grad_norm": 0.056639257818460464,
      "learning_rate": 5.90466714548342e-06,
      "loss": 0.0146,
      "step": 1263040
    },
    {
      "epoch": 2.0670253922743074,
      "grad_norm": 0.5409194231033325,
      "learning_rate": 5.9046012532699015e-06,
      "loss": 0.0119,
      "step": 1263060
    },
    {
      "epoch": 2.0670581227129605,
      "grad_norm": 1.0574527978897095,
      "learning_rate": 5.904535361056385e-06,
      "loss": 0.0163,
      "step": 1263080
    },
    {
      "epoch": 2.067090853151614,
      "grad_norm": 0.2331146001815796,
      "learning_rate": 5.904469468842867e-06,
      "loss": 0.0123,
      "step": 1263100
    },
    {
      "epoch": 2.0671235835902673,
      "grad_norm": 0.3913576602935791,
      "learning_rate": 5.9044035766293506e-06,
      "loss": 0.0134,
      "step": 1263120
    },
    {
      "epoch": 2.0671563140289204,
      "grad_norm": 0.2568730413913727,
      "learning_rate": 5.9043376844158325e-06,
      "loss": 0.009,
      "step": 1263140
    },
    {
      "epoch": 2.067189044467574,
      "grad_norm": 0.3988041877746582,
      "learning_rate": 5.904271792202316e-06,
      "loss": 0.019,
      "step": 1263160
    },
    {
      "epoch": 2.067221774906227,
      "grad_norm": 0.6087730526924133,
      "learning_rate": 5.904205899988799e-06,
      "loss": 0.0171,
      "step": 1263180
    },
    {
      "epoch": 2.0672545053448808,
      "grad_norm": 0.17372450232505798,
      "learning_rate": 5.9041400077752815e-06,
      "loss": 0.0149,
      "step": 1263200
    },
    {
      "epoch": 2.067287235783534,
      "grad_norm": 0.25472500920295715,
      "learning_rate": 5.904074115561764e-06,
      "loss": 0.019,
      "step": 1263220
    },
    {
      "epoch": 2.0673199662221875,
      "grad_norm": 0.2898350656032562,
      "learning_rate": 5.904008223348248e-06,
      "loss": 0.0132,
      "step": 1263240
    },
    {
      "epoch": 2.0673526966608406,
      "grad_norm": 0.5751218795776367,
      "learning_rate": 5.90394233113473e-06,
      "loss": 0.0142,
      "step": 1263260
    },
    {
      "epoch": 2.067385427099494,
      "grad_norm": 0.3282999098300934,
      "learning_rate": 5.903876438921213e-06,
      "loss": 0.0142,
      "step": 1263280
    },
    {
      "epoch": 2.0674181575381474,
      "grad_norm": 0.22751541435718536,
      "learning_rate": 5.903810546707695e-06,
      "loss": 0.014,
      "step": 1263300
    },
    {
      "epoch": 2.0674508879768005,
      "grad_norm": 0.22485290467739105,
      "learning_rate": 5.903744654494179e-06,
      "loss": 0.0173,
      "step": 1263320
    },
    {
      "epoch": 2.067483618415454,
      "grad_norm": 0.392324835062027,
      "learning_rate": 5.9036787622806616e-06,
      "loss": 0.0226,
      "step": 1263340
    },
    {
      "epoch": 2.0675163488541073,
      "grad_norm": 0.48208364844322205,
      "learning_rate": 5.903612870067144e-06,
      "loss": 0.0161,
      "step": 1263360
    },
    {
      "epoch": 2.067549079292761,
      "grad_norm": 0.49639350175857544,
      "learning_rate": 5.903546977853628e-06,
      "loss": 0.022,
      "step": 1263380
    },
    {
      "epoch": 2.067581809731414,
      "grad_norm": 0.6231526732444763,
      "learning_rate": 5.903481085640111e-06,
      "loss": 0.0173,
      "step": 1263400
    },
    {
      "epoch": 2.067614540170067,
      "grad_norm": 0.1846393644809723,
      "learning_rate": 5.903415193426593e-06,
      "loss": 0.0132,
      "step": 1263420
    },
    {
      "epoch": 2.0676472706087208,
      "grad_norm": 0.2564711272716522,
      "learning_rate": 5.903349301213076e-06,
      "loss": 0.0103,
      "step": 1263440
    },
    {
      "epoch": 2.067680001047374,
      "grad_norm": 0.5733503699302673,
      "learning_rate": 5.90328340899956e-06,
      "loss": 0.0122,
      "step": 1263460
    },
    {
      "epoch": 2.0677127314860275,
      "grad_norm": 0.7542171478271484,
      "learning_rate": 5.903217516786042e-06,
      "loss": 0.0124,
      "step": 1263480
    },
    {
      "epoch": 2.0677454619246807,
      "grad_norm": 0.6406157612800598,
      "learning_rate": 5.903151624572525e-06,
      "loss": 0.0189,
      "step": 1263500
    },
    {
      "epoch": 2.0677781923633343,
      "grad_norm": 0.3465728461742401,
      "learning_rate": 5.903085732359007e-06,
      "loss": 0.0124,
      "step": 1263520
    },
    {
      "epoch": 2.0678109228019874,
      "grad_norm": 0.6693457365036011,
      "learning_rate": 5.903019840145491e-06,
      "loss": 0.0189,
      "step": 1263540
    },
    {
      "epoch": 2.0678436532406406,
      "grad_norm": 0.5960520505905151,
      "learning_rate": 5.902953947931973e-06,
      "loss": 0.0118,
      "step": 1263560
    },
    {
      "epoch": 2.067876383679294,
      "grad_norm": 0.1883256882429123,
      "learning_rate": 5.902888055718456e-06,
      "loss": 0.0122,
      "step": 1263580
    },
    {
      "epoch": 2.0679091141179473,
      "grad_norm": 0.6190673112869263,
      "learning_rate": 5.902822163504939e-06,
      "loss": 0.0131,
      "step": 1263600
    },
    {
      "epoch": 2.067941844556601,
      "grad_norm": 0.37090474367141724,
      "learning_rate": 5.9027562712914225e-06,
      "loss": 0.0165,
      "step": 1263620
    },
    {
      "epoch": 2.067974574995254,
      "grad_norm": 0.6239194869995117,
      "learning_rate": 5.902690379077904e-06,
      "loss": 0.0216,
      "step": 1263640
    },
    {
      "epoch": 2.068007305433907,
      "grad_norm": 0.11724399030208588,
      "learning_rate": 5.902624486864388e-06,
      "loss": 0.0133,
      "step": 1263660
    },
    {
      "epoch": 2.068040035872561,
      "grad_norm": 0.5163922905921936,
      "learning_rate": 5.90255859465087e-06,
      "loss": 0.0137,
      "step": 1263680
    },
    {
      "epoch": 2.068072766311214,
      "grad_norm": 0.7824707627296448,
      "learning_rate": 5.902492702437353e-06,
      "loss": 0.0182,
      "step": 1263700
    },
    {
      "epoch": 2.0681054967498675,
      "grad_norm": 0.21492382884025574,
      "learning_rate": 5.902426810223836e-06,
      "loss": 0.0134,
      "step": 1263720
    },
    {
      "epoch": 2.0681382271885207,
      "grad_norm": 0.1383630782365799,
      "learning_rate": 5.902360918010319e-06,
      "loss": 0.0137,
      "step": 1263740
    },
    {
      "epoch": 2.0681709576271743,
      "grad_norm": 0.3994556963443756,
      "learning_rate": 5.9022950257968025e-06,
      "loss": 0.0164,
      "step": 1263760
    },
    {
      "epoch": 2.0682036880658274,
      "grad_norm": 0.5085848569869995,
      "learning_rate": 5.902229133583285e-06,
      "loss": 0.0187,
      "step": 1263780
    },
    {
      "epoch": 2.0682364185044806,
      "grad_norm": 0.28659042716026306,
      "learning_rate": 5.902163241369768e-06,
      "loss": 0.0188,
      "step": 1263800
    },
    {
      "epoch": 2.068269148943134,
      "grad_norm": 0.5471269488334656,
      "learning_rate": 5.902097349156251e-06,
      "loss": 0.02,
      "step": 1263820
    },
    {
      "epoch": 2.0683018793817873,
      "grad_norm": 0.18796983361244202,
      "learning_rate": 5.902031456942734e-06,
      "loss": 0.0202,
      "step": 1263840
    },
    {
      "epoch": 2.068334609820441,
      "grad_norm": 0.7980011701583862,
      "learning_rate": 5.901965564729216e-06,
      "loss": 0.0122,
      "step": 1263860
    },
    {
      "epoch": 2.068367340259094,
      "grad_norm": 0.42397770285606384,
      "learning_rate": 5.9018996725157e-06,
      "loss": 0.0208,
      "step": 1263880
    },
    {
      "epoch": 2.0684000706977477,
      "grad_norm": 0.22597837448120117,
      "learning_rate": 5.901833780302182e-06,
      "loss": 0.0173,
      "step": 1263900
    },
    {
      "epoch": 2.068432801136401,
      "grad_norm": 0.18105976283550262,
      "learning_rate": 5.901767888088665e-06,
      "loss": 0.0131,
      "step": 1263920
    },
    {
      "epoch": 2.068465531575054,
      "grad_norm": 0.09912984073162079,
      "learning_rate": 5.901701995875148e-06,
      "loss": 0.0099,
      "step": 1263940
    },
    {
      "epoch": 2.0684982620137076,
      "grad_norm": 0.40871357917785645,
      "learning_rate": 5.901636103661631e-06,
      "loss": 0.014,
      "step": 1263960
    },
    {
      "epoch": 2.0685309924523607,
      "grad_norm": 0.6540709137916565,
      "learning_rate": 5.9015702114481135e-06,
      "loss": 0.0131,
      "step": 1263980
    },
    {
      "epoch": 2.0685637228910143,
      "grad_norm": 0.3659415543079376,
      "learning_rate": 5.901504319234597e-06,
      "loss": 0.0155,
      "step": 1264000
    },
    {
      "epoch": 2.0685964533296675,
      "grad_norm": 0.2517094314098358,
      "learning_rate": 5.901438427021079e-06,
      "loss": 0.0153,
      "step": 1264020
    },
    {
      "epoch": 2.068629183768321,
      "grad_norm": 0.5635469555854797,
      "learning_rate": 5.9013725348075625e-06,
      "loss": 0.014,
      "step": 1264040
    },
    {
      "epoch": 2.068661914206974,
      "grad_norm": 0.7723129987716675,
      "learning_rate": 5.9013066425940444e-06,
      "loss": 0.0132,
      "step": 1264060
    },
    {
      "epoch": 2.0686946446456274,
      "grad_norm": 0.29296422004699707,
      "learning_rate": 5.901240750380528e-06,
      "loss": 0.0161,
      "step": 1264080
    },
    {
      "epoch": 2.068727375084281,
      "grad_norm": 0.44690078496932983,
      "learning_rate": 5.901174858167012e-06,
      "loss": 0.0203,
      "step": 1264100
    },
    {
      "epoch": 2.068760105522934,
      "grad_norm": 1.2227327823638916,
      "learning_rate": 5.9011089659534935e-06,
      "loss": 0.0177,
      "step": 1264120
    },
    {
      "epoch": 2.0687928359615877,
      "grad_norm": 0.26353365182876587,
      "learning_rate": 5.901043073739977e-06,
      "loss": 0.0202,
      "step": 1264140
    },
    {
      "epoch": 2.068825566400241,
      "grad_norm": 0.39431047439575195,
      "learning_rate": 5.900977181526459e-06,
      "loss": 0.0141,
      "step": 1264160
    },
    {
      "epoch": 2.0688582968388944,
      "grad_norm": 0.9872729182243347,
      "learning_rate": 5.9009112893129426e-06,
      "loss": 0.0221,
      "step": 1264180
    },
    {
      "epoch": 2.0688910272775476,
      "grad_norm": 0.105010025203228,
      "learning_rate": 5.900845397099425e-06,
      "loss": 0.0135,
      "step": 1264200
    },
    {
      "epoch": 2.0689237577162007,
      "grad_norm": 0.5155497789382935,
      "learning_rate": 5.900779504885908e-06,
      "loss": 0.0152,
      "step": 1264220
    },
    {
      "epoch": 2.0689564881548543,
      "grad_norm": 0.1547337919473648,
      "learning_rate": 5.900713612672391e-06,
      "loss": 0.019,
      "step": 1264240
    },
    {
      "epoch": 2.0689892185935075,
      "grad_norm": 0.7400718927383423,
      "learning_rate": 5.900647720458874e-06,
      "loss": 0.0168,
      "step": 1264260
    },
    {
      "epoch": 2.069021949032161,
      "grad_norm": 0.3388049900531769,
      "learning_rate": 5.900581828245356e-06,
      "loss": 0.0179,
      "step": 1264280
    },
    {
      "epoch": 2.0690546794708142,
      "grad_norm": 0.7816637754440308,
      "learning_rate": 5.90051593603184e-06,
      "loss": 0.0203,
      "step": 1264300
    },
    {
      "epoch": 2.069087409909468,
      "grad_norm": 1.0618717670440674,
      "learning_rate": 5.900450043818322e-06,
      "loss": 0.016,
      "step": 1264320
    },
    {
      "epoch": 2.069120140348121,
      "grad_norm": 0.15752677619457245,
      "learning_rate": 5.900384151604805e-06,
      "loss": 0.0144,
      "step": 1264340
    },
    {
      "epoch": 2.069152870786774,
      "grad_norm": 0.1354730725288391,
      "learning_rate": 5.900318259391288e-06,
      "loss": 0.0163,
      "step": 1264360
    },
    {
      "epoch": 2.0691856012254277,
      "grad_norm": 0.16534021496772766,
      "learning_rate": 5.900252367177771e-06,
      "loss": 0.0128,
      "step": 1264380
    },
    {
      "epoch": 2.069218331664081,
      "grad_norm": 0.2898993194103241,
      "learning_rate": 5.9001864749642535e-06,
      "loss": 0.0165,
      "step": 1264400
    },
    {
      "epoch": 2.0692510621027345,
      "grad_norm": 0.6743903160095215,
      "learning_rate": 5.900120582750737e-06,
      "loss": 0.0118,
      "step": 1264420
    },
    {
      "epoch": 2.0692837925413876,
      "grad_norm": 1.3648613691329956,
      "learning_rate": 5.90005469053722e-06,
      "loss": 0.0187,
      "step": 1264440
    },
    {
      "epoch": 2.069316522980041,
      "grad_norm": 0.5570787787437439,
      "learning_rate": 5.899988798323703e-06,
      "loss": 0.0147,
      "step": 1264460
    },
    {
      "epoch": 2.0693492534186944,
      "grad_norm": 0.286922425031662,
      "learning_rate": 5.899922906110186e-06,
      "loss": 0.0138,
      "step": 1264480
    },
    {
      "epoch": 2.0693819838573475,
      "grad_norm": 0.42871588468551636,
      "learning_rate": 5.899857013896668e-06,
      "loss": 0.0217,
      "step": 1264500
    },
    {
      "epoch": 2.069414714296001,
      "grad_norm": 0.3142182230949402,
      "learning_rate": 5.899791121683152e-06,
      "loss": 0.018,
      "step": 1264520
    },
    {
      "epoch": 2.0694474447346543,
      "grad_norm": 0.2934945523738861,
      "learning_rate": 5.8997252294696336e-06,
      "loss": 0.0157,
      "step": 1264540
    },
    {
      "epoch": 2.069480175173308,
      "grad_norm": 0.21551713347434998,
      "learning_rate": 5.899659337256117e-06,
      "loss": 0.0144,
      "step": 1264560
    },
    {
      "epoch": 2.069512905611961,
      "grad_norm": 0.6705841422080994,
      "learning_rate": 5.8995934450426e-06,
      "loss": 0.0114,
      "step": 1264580
    },
    {
      "epoch": 2.069545636050614,
      "grad_norm": 0.5547145009040833,
      "learning_rate": 5.899527552829083e-06,
      "loss": 0.0172,
      "step": 1264600
    },
    {
      "epoch": 2.0695783664892677,
      "grad_norm": 1.5649815797805786,
      "learning_rate": 5.899461660615565e-06,
      "loss": 0.0157,
      "step": 1264620
    },
    {
      "epoch": 2.069611096927921,
      "grad_norm": 0.17100059986114502,
      "learning_rate": 5.899395768402049e-06,
      "loss": 0.011,
      "step": 1264640
    },
    {
      "epoch": 2.0696438273665745,
      "grad_norm": 0.7012231349945068,
      "learning_rate": 5.899329876188531e-06,
      "loss": 0.0201,
      "step": 1264660
    },
    {
      "epoch": 2.0696765578052276,
      "grad_norm": 1.503028392791748,
      "learning_rate": 5.8992639839750144e-06,
      "loss": 0.0174,
      "step": 1264680
    },
    {
      "epoch": 2.0697092882438812,
      "grad_norm": 0.47097957134246826,
      "learning_rate": 5.899198091761496e-06,
      "loss": 0.015,
      "step": 1264700
    },
    {
      "epoch": 2.0697420186825344,
      "grad_norm": 0.41800302267074585,
      "learning_rate": 5.89913219954798e-06,
      "loss": 0.0085,
      "step": 1264720
    },
    {
      "epoch": 2.0697747491211875,
      "grad_norm": 0.34734073281288147,
      "learning_rate": 5.899066307334463e-06,
      "loss": 0.0124,
      "step": 1264740
    },
    {
      "epoch": 2.069807479559841,
      "grad_norm": 0.3017890453338623,
      "learning_rate": 5.899000415120945e-06,
      "loss": 0.0152,
      "step": 1264760
    },
    {
      "epoch": 2.0698402099984943,
      "grad_norm": 0.35063669085502625,
      "learning_rate": 5.898934522907428e-06,
      "loss": 0.0185,
      "step": 1264780
    },
    {
      "epoch": 2.069872940437148,
      "grad_norm": 1.2755779027938843,
      "learning_rate": 5.898868630693912e-06,
      "loss": 0.014,
      "step": 1264800
    },
    {
      "epoch": 2.069905670875801,
      "grad_norm": 0.42108434438705444,
      "learning_rate": 5.8988027384803945e-06,
      "loss": 0.0173,
      "step": 1264820
    },
    {
      "epoch": 2.0699384013144546,
      "grad_norm": 0.35312479734420776,
      "learning_rate": 5.898736846266877e-06,
      "loss": 0.0101,
      "step": 1264840
    },
    {
      "epoch": 2.0699711317531078,
      "grad_norm": 0.23085948824882507,
      "learning_rate": 5.898670954053361e-06,
      "loss": 0.0128,
      "step": 1264860
    },
    {
      "epoch": 2.070003862191761,
      "grad_norm": 0.48884743452072144,
      "learning_rate": 5.898605061839843e-06,
      "loss": 0.013,
      "step": 1264880
    },
    {
      "epoch": 2.0700365926304145,
      "grad_norm": 0.3271235227584839,
      "learning_rate": 5.898539169626326e-06,
      "loss": 0.0124,
      "step": 1264900
    },
    {
      "epoch": 2.0700693230690677,
      "grad_norm": 0.12125017493963242,
      "learning_rate": 5.898473277412808e-06,
      "loss": 0.0168,
      "step": 1264920
    },
    {
      "epoch": 2.0701020535077213,
      "grad_norm": 0.2124047577381134,
      "learning_rate": 5.898407385199292e-06,
      "loss": 0.012,
      "step": 1264940
    },
    {
      "epoch": 2.0701347839463744,
      "grad_norm": 0.720635712146759,
      "learning_rate": 5.8983414929857745e-06,
      "loss": 0.0122,
      "step": 1264960
    },
    {
      "epoch": 2.070167514385028,
      "grad_norm": 0.2739417850971222,
      "learning_rate": 5.898275600772257e-06,
      "loss": 0.0158,
      "step": 1264980
    },
    {
      "epoch": 2.070200244823681,
      "grad_norm": 0.4659585952758789,
      "learning_rate": 5.89820970855874e-06,
      "loss": 0.0133,
      "step": 1265000
    },
    {
      "epoch": 2.0702329752623343,
      "grad_norm": 0.21870258450508118,
      "learning_rate": 5.8981438163452236e-06,
      "loss": 0.0125,
      "step": 1265020
    },
    {
      "epoch": 2.070265705700988,
      "grad_norm": 0.637238085269928,
      "learning_rate": 5.8980779241317054e-06,
      "loss": 0.0163,
      "step": 1265040
    },
    {
      "epoch": 2.070298436139641,
      "grad_norm": 2.05743145942688,
      "learning_rate": 5.898012031918189e-06,
      "loss": 0.0161,
      "step": 1265060
    },
    {
      "epoch": 2.0703311665782946,
      "grad_norm": 0.8151994347572327,
      "learning_rate": 5.897946139704671e-06,
      "loss": 0.0176,
      "step": 1265080
    },
    {
      "epoch": 2.070363897016948,
      "grad_norm": 0.3689037263393402,
      "learning_rate": 5.8978802474911545e-06,
      "loss": 0.0164,
      "step": 1265100
    },
    {
      "epoch": 2.070396627455601,
      "grad_norm": 0.3028539717197418,
      "learning_rate": 5.897814355277636e-06,
      "loss": 0.0152,
      "step": 1265120
    },
    {
      "epoch": 2.0704293578942545,
      "grad_norm": 0.9571475386619568,
      "learning_rate": 5.89774846306412e-06,
      "loss": 0.0147,
      "step": 1265140
    },
    {
      "epoch": 2.0704620883329077,
      "grad_norm": 0.2483578324317932,
      "learning_rate": 5.897682570850604e-06,
      "loss": 0.0127,
      "step": 1265160
    },
    {
      "epoch": 2.0704948187715613,
      "grad_norm": 0.17413204908370972,
      "learning_rate": 5.8976166786370855e-06,
      "loss": 0.0159,
      "step": 1265180
    },
    {
      "epoch": 2.0705275492102144,
      "grad_norm": 0.46753236651420593,
      "learning_rate": 5.897550786423569e-06,
      "loss": 0.011,
      "step": 1265200
    },
    {
      "epoch": 2.070560279648868,
      "grad_norm": 0.07233187556266785,
      "learning_rate": 5.897484894210052e-06,
      "loss": 0.0146,
      "step": 1265220
    },
    {
      "epoch": 2.070593010087521,
      "grad_norm": 0.2343512773513794,
      "learning_rate": 5.8974190019965345e-06,
      "loss": 0.0206,
      "step": 1265240
    },
    {
      "epoch": 2.0706257405261743,
      "grad_norm": 0.7677455544471741,
      "learning_rate": 5.897353109783017e-06,
      "loss": 0.014,
      "step": 1265260
    },
    {
      "epoch": 2.070658470964828,
      "grad_norm": 0.25378572940826416,
      "learning_rate": 5.897287217569501e-06,
      "loss": 0.0124,
      "step": 1265280
    },
    {
      "epoch": 2.070691201403481,
      "grad_norm": 0.23807509243488312,
      "learning_rate": 5.897221325355983e-06,
      "loss": 0.018,
      "step": 1265300
    },
    {
      "epoch": 2.0707239318421347,
      "grad_norm": 1.8685246706008911,
      "learning_rate": 5.897155433142466e-06,
      "loss": 0.0194,
      "step": 1265320
    },
    {
      "epoch": 2.070756662280788,
      "grad_norm": 0.7276660203933716,
      "learning_rate": 5.897089540928948e-06,
      "loss": 0.0148,
      "step": 1265340
    },
    {
      "epoch": 2.0707893927194414,
      "grad_norm": 0.5742767453193665,
      "learning_rate": 5.897023648715432e-06,
      "loss": 0.0186,
      "step": 1265360
    },
    {
      "epoch": 2.0708221231580946,
      "grad_norm": 0.2177276611328125,
      "learning_rate": 5.8969577565019146e-06,
      "loss": 0.0138,
      "step": 1265380
    },
    {
      "epoch": 2.0708548535967477,
      "grad_norm": 0.10290876775979996,
      "learning_rate": 5.896891864288397e-06,
      "loss": 0.0166,
      "step": 1265400
    },
    {
      "epoch": 2.0708875840354013,
      "grad_norm": 1.28392493724823,
      "learning_rate": 5.89682597207488e-06,
      "loss": 0.0145,
      "step": 1265420
    },
    {
      "epoch": 2.0709203144740544,
      "grad_norm": 1.5902565717697144,
      "learning_rate": 5.896760079861364e-06,
      "loss": 0.0124,
      "step": 1265440
    },
    {
      "epoch": 2.070953044912708,
      "grad_norm": 0.66343092918396,
      "learning_rate": 5.8966941876478455e-06,
      "loss": 0.0208,
      "step": 1265460
    },
    {
      "epoch": 2.070985775351361,
      "grad_norm": 0.5648139119148254,
      "learning_rate": 5.896628295434329e-06,
      "loss": 0.0231,
      "step": 1265480
    },
    {
      "epoch": 2.071018505790015,
      "grad_norm": 0.6321541666984558,
      "learning_rate": 5.896562403220813e-06,
      "loss": 0.0113,
      "step": 1265500
    },
    {
      "epoch": 2.071051236228668,
      "grad_norm": 0.6213366389274597,
      "learning_rate": 5.896496511007295e-06,
      "loss": 0.0146,
      "step": 1265520
    },
    {
      "epoch": 2.071083966667321,
      "grad_norm": 0.059975072741508484,
      "learning_rate": 5.896430618793778e-06,
      "loss": 0.0129,
      "step": 1265540
    },
    {
      "epoch": 2.0711166971059747,
      "grad_norm": 0.5047231912612915,
      "learning_rate": 5.89636472658026e-06,
      "loss": 0.0156,
      "step": 1265560
    },
    {
      "epoch": 2.071149427544628,
      "grad_norm": 0.14738447964191437,
      "learning_rate": 5.896298834366744e-06,
      "loss": 0.0121,
      "step": 1265580
    },
    {
      "epoch": 2.0711821579832814,
      "grad_norm": 0.6282199621200562,
      "learning_rate": 5.896232942153226e-06,
      "loss": 0.0185,
      "step": 1265600
    },
    {
      "epoch": 2.0712148884219346,
      "grad_norm": 0.25477513670921326,
      "learning_rate": 5.896167049939709e-06,
      "loss": 0.0162,
      "step": 1265620
    },
    {
      "epoch": 2.071247618860588,
      "grad_norm": 0.6259574294090271,
      "learning_rate": 5.896101157726192e-06,
      "loss": 0.0103,
      "step": 1265640
    },
    {
      "epoch": 2.0712803492992413,
      "grad_norm": 1.2422521114349365,
      "learning_rate": 5.8960352655126755e-06,
      "loss": 0.0146,
      "step": 1265660
    },
    {
      "epoch": 2.0713130797378945,
      "grad_norm": 0.8188248872756958,
      "learning_rate": 5.895969373299157e-06,
      "loss": 0.0088,
      "step": 1265680
    },
    {
      "epoch": 2.071345810176548,
      "grad_norm": 0.3563201129436493,
      "learning_rate": 5.895903481085641e-06,
      "loss": 0.0201,
      "step": 1265700
    },
    {
      "epoch": 2.071378540615201,
      "grad_norm": 0.2940026819705963,
      "learning_rate": 5.895837588872123e-06,
      "loss": 0.023,
      "step": 1265720
    },
    {
      "epoch": 2.071411271053855,
      "grad_norm": 0.5157951712608337,
      "learning_rate": 5.895771696658606e-06,
      "loss": 0.0136,
      "step": 1265740
    },
    {
      "epoch": 2.071444001492508,
      "grad_norm": 0.5273736715316772,
      "learning_rate": 5.895705804445089e-06,
      "loss": 0.0182,
      "step": 1265760
    },
    {
      "epoch": 2.0714767319311616,
      "grad_norm": 0.0671037808060646,
      "learning_rate": 5.895639912231572e-06,
      "loss": 0.0132,
      "step": 1265780
    },
    {
      "epoch": 2.0715094623698147,
      "grad_norm": 0.6676426529884338,
      "learning_rate": 5.895574020018055e-06,
      "loss": 0.0142,
      "step": 1265800
    },
    {
      "epoch": 2.071542192808468,
      "grad_norm": 0.640528678894043,
      "learning_rate": 5.895508127804538e-06,
      "loss": 0.0187,
      "step": 1265820
    },
    {
      "epoch": 2.0715749232471214,
      "grad_norm": 0.24831989407539368,
      "learning_rate": 5.895442235591021e-06,
      "loss": 0.0132,
      "step": 1265840
    },
    {
      "epoch": 2.0716076536857746,
      "grad_norm": 0.5551360845565796,
      "learning_rate": 5.895376343377504e-06,
      "loss": 0.0148,
      "step": 1265860
    },
    {
      "epoch": 2.071640384124428,
      "grad_norm": 0.5463539361953735,
      "learning_rate": 5.895310451163987e-06,
      "loss": 0.0152,
      "step": 1265880
    },
    {
      "epoch": 2.0716731145630813,
      "grad_norm": 0.46362683176994324,
      "learning_rate": 5.895244558950469e-06,
      "loss": 0.0166,
      "step": 1265900
    },
    {
      "epoch": 2.071705845001735,
      "grad_norm": 0.23752707242965698,
      "learning_rate": 5.895178666736953e-06,
      "loss": 0.0163,
      "step": 1265920
    },
    {
      "epoch": 2.071738575440388,
      "grad_norm": 0.1796707659959793,
      "learning_rate": 5.895112774523435e-06,
      "loss": 0.0186,
      "step": 1265940
    },
    {
      "epoch": 2.0717713058790412,
      "grad_norm": 0.27810895442962646,
      "learning_rate": 5.895046882309918e-06,
      "loss": 0.0137,
      "step": 1265960
    },
    {
      "epoch": 2.071804036317695,
      "grad_norm": 0.1731833666563034,
      "learning_rate": 5.8949809900964e-06,
      "loss": 0.0142,
      "step": 1265980
    },
    {
      "epoch": 2.071836766756348,
      "grad_norm": 0.628287136554718,
      "learning_rate": 5.894915097882884e-06,
      "loss": 0.0108,
      "step": 1266000
    },
    {
      "epoch": 2.0718694971950016,
      "grad_norm": 0.5462999939918518,
      "learning_rate": 5.8948492056693665e-06,
      "loss": 0.0138,
      "step": 1266020
    },
    {
      "epoch": 2.0719022276336547,
      "grad_norm": 0.7339611649513245,
      "learning_rate": 5.89478331345585e-06,
      "loss": 0.0116,
      "step": 1266040
    },
    {
      "epoch": 2.071934958072308,
      "grad_norm": 0.34845465421676636,
      "learning_rate": 5.894717421242332e-06,
      "loss": 0.016,
      "step": 1266060
    },
    {
      "epoch": 2.0719676885109615,
      "grad_norm": 0.8980509638786316,
      "learning_rate": 5.8946515290288155e-06,
      "loss": 0.0116,
      "step": 1266080
    },
    {
      "epoch": 2.0720004189496146,
      "grad_norm": 0.6617476344108582,
      "learning_rate": 5.8945856368152974e-06,
      "loss": 0.0218,
      "step": 1266100
    },
    {
      "epoch": 2.072033149388268,
      "grad_norm": 0.15767113864421844,
      "learning_rate": 5.894519744601781e-06,
      "loss": 0.0148,
      "step": 1266120
    },
    {
      "epoch": 2.0720658798269214,
      "grad_norm": 0.3254111707210541,
      "learning_rate": 5.894453852388263e-06,
      "loss": 0.0122,
      "step": 1266140
    },
    {
      "epoch": 2.072098610265575,
      "grad_norm": 0.525529146194458,
      "learning_rate": 5.8943879601747465e-06,
      "loss": 0.0174,
      "step": 1266160
    },
    {
      "epoch": 2.072131340704228,
      "grad_norm": 0.1165710985660553,
      "learning_rate": 5.894322067961229e-06,
      "loss": 0.0202,
      "step": 1266180
    },
    {
      "epoch": 2.0721640711428813,
      "grad_norm": 0.6301875114440918,
      "learning_rate": 5.894256175747712e-06,
      "loss": 0.0129,
      "step": 1266200
    },
    {
      "epoch": 2.072196801581535,
      "grad_norm": 1.626501202583313,
      "learning_rate": 5.8941902835341956e-06,
      "loss": 0.0174,
      "step": 1266220
    },
    {
      "epoch": 2.072229532020188,
      "grad_norm": 0.4890284240245819,
      "learning_rate": 5.894124391320678e-06,
      "loss": 0.0219,
      "step": 1266240
    },
    {
      "epoch": 2.0722622624588416,
      "grad_norm": 0.26814281940460205,
      "learning_rate": 5.894058499107161e-06,
      "loss": 0.012,
      "step": 1266260
    },
    {
      "epoch": 2.0722949928974947,
      "grad_norm": 0.17463454604148865,
      "learning_rate": 5.893992606893644e-06,
      "loss": 0.0152,
      "step": 1266280
    },
    {
      "epoch": 2.0723277233361483,
      "grad_norm": 0.9630952477455139,
      "learning_rate": 5.893926714680127e-06,
      "loss": 0.0186,
      "step": 1266300
    },
    {
      "epoch": 2.0723604537748015,
      "grad_norm": 0.11892406642436981,
      "learning_rate": 5.893860822466609e-06,
      "loss": 0.0093,
      "step": 1266320
    },
    {
      "epoch": 2.0723931842134546,
      "grad_norm": 0.2388458102941513,
      "learning_rate": 5.893794930253093e-06,
      "loss": 0.0106,
      "step": 1266340
    },
    {
      "epoch": 2.0724259146521082,
      "grad_norm": 0.3781990706920624,
      "learning_rate": 5.893729038039575e-06,
      "loss": 0.0143,
      "step": 1266360
    },
    {
      "epoch": 2.0724586450907614,
      "grad_norm": 0.18588784337043762,
      "learning_rate": 5.893663145826058e-06,
      "loss": 0.0179,
      "step": 1266380
    },
    {
      "epoch": 2.072491375529415,
      "grad_norm": 0.49939632415771484,
      "learning_rate": 5.893597253612541e-06,
      "loss": 0.0192,
      "step": 1266400
    },
    {
      "epoch": 2.072524105968068,
      "grad_norm": 0.24041494727134705,
      "learning_rate": 5.893531361399024e-06,
      "loss": 0.0113,
      "step": 1266420
    },
    {
      "epoch": 2.0725568364067217,
      "grad_norm": 0.5227180123329163,
      "learning_rate": 5.8934654691855065e-06,
      "loss": 0.0139,
      "step": 1266440
    },
    {
      "epoch": 2.072589566845375,
      "grad_norm": 0.356905460357666,
      "learning_rate": 5.89339957697199e-06,
      "loss": 0.0184,
      "step": 1266460
    },
    {
      "epoch": 2.072622297284028,
      "grad_norm": 0.24640582501888275,
      "learning_rate": 5.893333684758472e-06,
      "loss": 0.0165,
      "step": 1266480
    },
    {
      "epoch": 2.0726550277226816,
      "grad_norm": 0.6870121359825134,
      "learning_rate": 5.893267792544956e-06,
      "loss": 0.0182,
      "step": 1266500
    },
    {
      "epoch": 2.0726877581613348,
      "grad_norm": 0.03884682431817055,
      "learning_rate": 5.8932019003314375e-06,
      "loss": 0.0114,
      "step": 1266520
    },
    {
      "epoch": 2.0727204885999884,
      "grad_norm": 0.22993507981300354,
      "learning_rate": 5.893136008117921e-06,
      "loss": 0.016,
      "step": 1266540
    },
    {
      "epoch": 2.0727532190386415,
      "grad_norm": 0.4622410535812378,
      "learning_rate": 5.893070115904405e-06,
      "loss": 0.0111,
      "step": 1266560
    },
    {
      "epoch": 2.072785949477295,
      "grad_norm": 0.298749178647995,
      "learning_rate": 5.8930042236908866e-06,
      "loss": 0.0128,
      "step": 1266580
    },
    {
      "epoch": 2.0728186799159483,
      "grad_norm": 0.29482120275497437,
      "learning_rate": 5.89293833147737e-06,
      "loss": 0.0191,
      "step": 1266600
    },
    {
      "epoch": 2.0728514103546014,
      "grad_norm": 0.7307564616203308,
      "learning_rate": 5.892872439263853e-06,
      "loss": 0.0238,
      "step": 1266620
    },
    {
      "epoch": 2.072884140793255,
      "grad_norm": 0.5633337497711182,
      "learning_rate": 5.892806547050336e-06,
      "loss": 0.0211,
      "step": 1266640
    },
    {
      "epoch": 2.072916871231908,
      "grad_norm": 0.4157956838607788,
      "learning_rate": 5.892740654836818e-06,
      "loss": 0.0166,
      "step": 1266660
    },
    {
      "epoch": 2.0729496016705617,
      "grad_norm": 1.8057035207748413,
      "learning_rate": 5.892674762623302e-06,
      "loss": 0.0153,
      "step": 1266680
    },
    {
      "epoch": 2.072982332109215,
      "grad_norm": 0.37191200256347656,
      "learning_rate": 5.892608870409784e-06,
      "loss": 0.0122,
      "step": 1266700
    },
    {
      "epoch": 2.073015062547868,
      "grad_norm": 0.6857902407646179,
      "learning_rate": 5.8925429781962674e-06,
      "loss": 0.0165,
      "step": 1266720
    },
    {
      "epoch": 2.0730477929865216,
      "grad_norm": 1.1263821125030518,
      "learning_rate": 5.892477085982749e-06,
      "loss": 0.0125,
      "step": 1266740
    },
    {
      "epoch": 2.073080523425175,
      "grad_norm": 0.44919246435165405,
      "learning_rate": 5.892411193769233e-06,
      "loss": 0.0118,
      "step": 1266760
    },
    {
      "epoch": 2.0731132538638284,
      "grad_norm": 0.17591021955013275,
      "learning_rate": 5.892345301555716e-06,
      "loss": 0.0129,
      "step": 1266780
    },
    {
      "epoch": 2.0731459843024815,
      "grad_norm": 0.3758145570755005,
      "learning_rate": 5.892279409342198e-06,
      "loss": 0.0172,
      "step": 1266800
    },
    {
      "epoch": 2.073178714741135,
      "grad_norm": 0.4464138150215149,
      "learning_rate": 5.892213517128681e-06,
      "loss": 0.0153,
      "step": 1266820
    },
    {
      "epoch": 2.0732114451797883,
      "grad_norm": 0.755117654800415,
      "learning_rate": 5.892147624915165e-06,
      "loss": 0.0177,
      "step": 1266840
    },
    {
      "epoch": 2.0732441756184414,
      "grad_norm": 0.15418820083141327,
      "learning_rate": 5.892081732701647e-06,
      "loss": 0.0191,
      "step": 1266860
    },
    {
      "epoch": 2.073276906057095,
      "grad_norm": 0.9884068965911865,
      "learning_rate": 5.89201584048813e-06,
      "loss": 0.0163,
      "step": 1266880
    },
    {
      "epoch": 2.073309636495748,
      "grad_norm": 0.30789756774902344,
      "learning_rate": 5.891949948274614e-06,
      "loss": 0.0119,
      "step": 1266900
    },
    {
      "epoch": 2.0733423669344018,
      "grad_norm": 0.28857532143592834,
      "learning_rate": 5.891884056061096e-06,
      "loss": 0.0275,
      "step": 1266920
    },
    {
      "epoch": 2.073375097373055,
      "grad_norm": 0.3306395709514618,
      "learning_rate": 5.891818163847579e-06,
      "loss": 0.019,
      "step": 1266940
    },
    {
      "epoch": 2.0734078278117085,
      "grad_norm": 0.29909855127334595,
      "learning_rate": 5.891752271634061e-06,
      "loss": 0.0188,
      "step": 1266960
    },
    {
      "epoch": 2.0734405582503617,
      "grad_norm": 0.12307381629943848,
      "learning_rate": 5.891686379420545e-06,
      "loss": 0.014,
      "step": 1266980
    },
    {
      "epoch": 2.073473288689015,
      "grad_norm": 0.2673490345478058,
      "learning_rate": 5.891620487207027e-06,
      "loss": 0.0154,
      "step": 1267000
    },
    {
      "epoch": 2.0735060191276684,
      "grad_norm": 0.1634579300880432,
      "learning_rate": 5.89155459499351e-06,
      "loss": 0.0157,
      "step": 1267020
    },
    {
      "epoch": 2.0735387495663216,
      "grad_norm": 0.6575281023979187,
      "learning_rate": 5.891488702779993e-06,
      "loss": 0.0153,
      "step": 1267040
    },
    {
      "epoch": 2.073571480004975,
      "grad_norm": 0.4275946617126465,
      "learning_rate": 5.891422810566476e-06,
      "loss": 0.0173,
      "step": 1267060
    },
    {
      "epoch": 2.0736042104436283,
      "grad_norm": 0.20032410323619843,
      "learning_rate": 5.8913569183529585e-06,
      "loss": 0.0189,
      "step": 1267080
    },
    {
      "epoch": 2.073636940882282,
      "grad_norm": 0.9453483819961548,
      "learning_rate": 5.891291026139442e-06,
      "loss": 0.0169,
      "step": 1267100
    },
    {
      "epoch": 2.073669671320935,
      "grad_norm": 0.12418531626462936,
      "learning_rate": 5.891225133925924e-06,
      "loss": 0.0177,
      "step": 1267120
    },
    {
      "epoch": 2.073702401759588,
      "grad_norm": 0.1833738535642624,
      "learning_rate": 5.8911592417124075e-06,
      "loss": 0.0126,
      "step": 1267140
    },
    {
      "epoch": 2.073735132198242,
      "grad_norm": 0.5513131618499756,
      "learning_rate": 5.891093349498889e-06,
      "loss": 0.0194,
      "step": 1267160
    },
    {
      "epoch": 2.073767862636895,
      "grad_norm": 0.1623661071062088,
      "learning_rate": 5.891027457285373e-06,
      "loss": 0.0209,
      "step": 1267180
    },
    {
      "epoch": 2.0738005930755485,
      "grad_norm": 0.28791624307632446,
      "learning_rate": 5.890961565071856e-06,
      "loss": 0.0159,
      "step": 1267200
    },
    {
      "epoch": 2.0738333235142017,
      "grad_norm": 0.2560577988624573,
      "learning_rate": 5.8908956728583385e-06,
      "loss": 0.0145,
      "step": 1267220
    },
    {
      "epoch": 2.0738660539528553,
      "grad_norm": 0.682975172996521,
      "learning_rate": 5.890829780644821e-06,
      "loss": 0.0122,
      "step": 1267240
    },
    {
      "epoch": 2.0738987843915084,
      "grad_norm": 0.31316423416137695,
      "learning_rate": 5.890763888431305e-06,
      "loss": 0.0198,
      "step": 1267260
    },
    {
      "epoch": 2.0739315148301616,
      "grad_norm": 0.3626226484775543,
      "learning_rate": 5.8906979962177875e-06,
      "loss": 0.0149,
      "step": 1267280
    },
    {
      "epoch": 2.073964245268815,
      "grad_norm": 1.2856820821762085,
      "learning_rate": 5.89063210400427e-06,
      "loss": 0.014,
      "step": 1267300
    },
    {
      "epoch": 2.0739969757074683,
      "grad_norm": 0.3592831790447235,
      "learning_rate": 5.890566211790754e-06,
      "loss": 0.0141,
      "step": 1267320
    },
    {
      "epoch": 2.074029706146122,
      "grad_norm": 0.26845845580101013,
      "learning_rate": 5.890500319577236e-06,
      "loss": 0.0174,
      "step": 1267340
    },
    {
      "epoch": 2.074062436584775,
      "grad_norm": 0.2977999448776245,
      "learning_rate": 5.890434427363719e-06,
      "loss": 0.016,
      "step": 1267360
    },
    {
      "epoch": 2.0740951670234287,
      "grad_norm": 0.3925235867500305,
      "learning_rate": 5.890368535150201e-06,
      "loss": 0.0214,
      "step": 1267380
    },
    {
      "epoch": 2.074127897462082,
      "grad_norm": 0.2678758203983307,
      "learning_rate": 5.890302642936685e-06,
      "loss": 0.0209,
      "step": 1267400
    },
    {
      "epoch": 2.074160627900735,
      "grad_norm": 0.2934238910675049,
      "learning_rate": 5.8902367507231676e-06,
      "loss": 0.0158,
      "step": 1267420
    },
    {
      "epoch": 2.0741933583393886,
      "grad_norm": 0.23181495070457458,
      "learning_rate": 5.89017085850965e-06,
      "loss": 0.0116,
      "step": 1267440
    },
    {
      "epoch": 2.0742260887780417,
      "grad_norm": 0.4357321858406067,
      "learning_rate": 5.890104966296133e-06,
      "loss": 0.0151,
      "step": 1267460
    },
    {
      "epoch": 2.0742588192166953,
      "grad_norm": 0.1018831878900528,
      "learning_rate": 5.890039074082617e-06,
      "loss": 0.0111,
      "step": 1267480
    },
    {
      "epoch": 2.0742915496553485,
      "grad_norm": 1.292055368423462,
      "learning_rate": 5.8899731818690985e-06,
      "loss": 0.0157,
      "step": 1267500
    },
    {
      "epoch": 2.074324280094002,
      "grad_norm": 0.8695569634437561,
      "learning_rate": 5.889907289655582e-06,
      "loss": 0.0209,
      "step": 1267520
    },
    {
      "epoch": 2.074357010532655,
      "grad_norm": 0.5833091139793396,
      "learning_rate": 5.889841397442064e-06,
      "loss": 0.0124,
      "step": 1267540
    },
    {
      "epoch": 2.0743897409713083,
      "grad_norm": 0.6250423789024353,
      "learning_rate": 5.889775505228548e-06,
      "loss": 0.0185,
      "step": 1267560
    },
    {
      "epoch": 2.074422471409962,
      "grad_norm": 0.834372878074646,
      "learning_rate": 5.88970961301503e-06,
      "loss": 0.0165,
      "step": 1267580
    },
    {
      "epoch": 2.074455201848615,
      "grad_norm": 0.13956224918365479,
      "learning_rate": 5.889643720801513e-06,
      "loss": 0.0133,
      "step": 1267600
    },
    {
      "epoch": 2.0744879322872687,
      "grad_norm": 0.5480040311813354,
      "learning_rate": 5.889577828587997e-06,
      "loss": 0.0201,
      "step": 1267620
    },
    {
      "epoch": 2.074520662725922,
      "grad_norm": 0.8146522045135498,
      "learning_rate": 5.889511936374479e-06,
      "loss": 0.013,
      "step": 1267640
    },
    {
      "epoch": 2.074553393164575,
      "grad_norm": 0.5174240469932556,
      "learning_rate": 5.889446044160962e-06,
      "loss": 0.0137,
      "step": 1267660
    },
    {
      "epoch": 2.0745861236032286,
      "grad_norm": 0.3724718987941742,
      "learning_rate": 5.889380151947445e-06,
      "loss": 0.0173,
      "step": 1267680
    },
    {
      "epoch": 2.0746188540418817,
      "grad_norm": 0.09910835325717926,
      "learning_rate": 5.8893142597339285e-06,
      "loss": 0.0155,
      "step": 1267700
    },
    {
      "epoch": 2.0746515844805353,
      "grad_norm": 0.4533272385597229,
      "learning_rate": 5.88924836752041e-06,
      "loss": 0.0192,
      "step": 1267720
    },
    {
      "epoch": 2.0746843149191885,
      "grad_norm": 0.9639134407043457,
      "learning_rate": 5.889182475306894e-06,
      "loss": 0.0183,
      "step": 1267740
    },
    {
      "epoch": 2.074717045357842,
      "grad_norm": 0.055681418627500534,
      "learning_rate": 5.889116583093376e-06,
      "loss": 0.0131,
      "step": 1267760
    },
    {
      "epoch": 2.074749775796495,
      "grad_norm": 0.19813133776187897,
      "learning_rate": 5.8890506908798594e-06,
      "loss": 0.0155,
      "step": 1267780
    },
    {
      "epoch": 2.0747825062351484,
      "grad_norm": 0.8417033553123474,
      "learning_rate": 5.888984798666342e-06,
      "loss": 0.0195,
      "step": 1267800
    },
    {
      "epoch": 2.074815236673802,
      "grad_norm": 0.6120944023132324,
      "learning_rate": 5.888918906452825e-06,
      "loss": 0.0225,
      "step": 1267820
    },
    {
      "epoch": 2.074847967112455,
      "grad_norm": 0.23845729231834412,
      "learning_rate": 5.888853014239308e-06,
      "loss": 0.011,
      "step": 1267840
    },
    {
      "epoch": 2.0748806975511087,
      "grad_norm": 0.36987072229385376,
      "learning_rate": 5.888787122025791e-06,
      "loss": 0.0229,
      "step": 1267860
    },
    {
      "epoch": 2.074913427989762,
      "grad_norm": 0.09879715740680695,
      "learning_rate": 5.888721229812273e-06,
      "loss": 0.014,
      "step": 1267880
    },
    {
      "epoch": 2.0749461584284155,
      "grad_norm": 0.21959485113620758,
      "learning_rate": 5.888655337598757e-06,
      "loss": 0.0184,
      "step": 1267900
    },
    {
      "epoch": 2.0749788888670686,
      "grad_norm": 0.678915798664093,
      "learning_rate": 5.888589445385239e-06,
      "loss": 0.0168,
      "step": 1267920
    },
    {
      "epoch": 2.0750116193057218,
      "grad_norm": 0.21566829085350037,
      "learning_rate": 5.888523553171722e-06,
      "loss": 0.0102,
      "step": 1267940
    },
    {
      "epoch": 2.0750443497443753,
      "grad_norm": 0.4859229326248169,
      "learning_rate": 5.888457660958206e-06,
      "loss": 0.0157,
      "step": 1267960
    },
    {
      "epoch": 2.0750770801830285,
      "grad_norm": 0.5410310626029968,
      "learning_rate": 5.888391768744688e-06,
      "loss": 0.0133,
      "step": 1267980
    },
    {
      "epoch": 2.075109810621682,
      "grad_norm": 0.5035894513130188,
      "learning_rate": 5.888325876531171e-06,
      "loss": 0.0132,
      "step": 1268000
    },
    {
      "epoch": 2.0751425410603352,
      "grad_norm": 0.32561740279197693,
      "learning_rate": 5.888259984317653e-06,
      "loss": 0.0141,
      "step": 1268020
    },
    {
      "epoch": 2.075175271498989,
      "grad_norm": 0.4060159921646118,
      "learning_rate": 5.888194092104137e-06,
      "loss": 0.0142,
      "step": 1268040
    },
    {
      "epoch": 2.075208001937642,
      "grad_norm": 0.20663389563560486,
      "learning_rate": 5.8881281998906195e-06,
      "loss": 0.0157,
      "step": 1268060
    },
    {
      "epoch": 2.075240732376295,
      "grad_norm": 0.14096125960350037,
      "learning_rate": 5.888062307677102e-06,
      "loss": 0.0112,
      "step": 1268080
    },
    {
      "epoch": 2.0752734628149487,
      "grad_norm": 0.28762078285217285,
      "learning_rate": 5.887996415463585e-06,
      "loss": 0.0154,
      "step": 1268100
    },
    {
      "epoch": 2.075306193253602,
      "grad_norm": 0.5131163597106934,
      "learning_rate": 5.8879305232500685e-06,
      "loss": 0.0153,
      "step": 1268120
    },
    {
      "epoch": 2.0753389236922555,
      "grad_norm": 0.8876287937164307,
      "learning_rate": 5.8878646310365504e-06,
      "loss": 0.0178,
      "step": 1268140
    },
    {
      "epoch": 2.0753716541309086,
      "grad_norm": 0.2537694275379181,
      "learning_rate": 5.887798738823034e-06,
      "loss": 0.0145,
      "step": 1268160
    },
    {
      "epoch": 2.0754043845695618,
      "grad_norm": 0.34028860926628113,
      "learning_rate": 5.887732846609516e-06,
      "loss": 0.016,
      "step": 1268180
    },
    {
      "epoch": 2.0754371150082154,
      "grad_norm": 0.3106266260147095,
      "learning_rate": 5.8876669543959995e-06,
      "loss": 0.0165,
      "step": 1268200
    },
    {
      "epoch": 2.0754698454468685,
      "grad_norm": 0.24140450358390808,
      "learning_rate": 5.887601062182482e-06,
      "loss": 0.0144,
      "step": 1268220
    },
    {
      "epoch": 2.075502575885522,
      "grad_norm": 0.5339218974113464,
      "learning_rate": 5.887535169968965e-06,
      "loss": 0.013,
      "step": 1268240
    },
    {
      "epoch": 2.0755353063241753,
      "grad_norm": 0.6119009852409363,
      "learning_rate": 5.887469277755448e-06,
      "loss": 0.0164,
      "step": 1268260
    },
    {
      "epoch": 2.075568036762829,
      "grad_norm": 0.42591559886932373,
      "learning_rate": 5.887403385541931e-06,
      "loss": 0.0117,
      "step": 1268280
    },
    {
      "epoch": 2.075600767201482,
      "grad_norm": 0.4720887839794159,
      "learning_rate": 5.887337493328413e-06,
      "loss": 0.0198,
      "step": 1268300
    },
    {
      "epoch": 2.075633497640135,
      "grad_norm": 0.08534635603427887,
      "learning_rate": 5.887271601114897e-06,
      "loss": 0.0135,
      "step": 1268320
    },
    {
      "epoch": 2.0756662280787888,
      "grad_norm": 0.52302086353302,
      "learning_rate": 5.88720570890138e-06,
      "loss": 0.0172,
      "step": 1268340
    },
    {
      "epoch": 2.075698958517442,
      "grad_norm": 0.43591323494911194,
      "learning_rate": 5.887139816687862e-06,
      "loss": 0.015,
      "step": 1268360
    },
    {
      "epoch": 2.0757316889560955,
      "grad_norm": 0.08175238966941833,
      "learning_rate": 5.887073924474346e-06,
      "loss": 0.0152,
      "step": 1268380
    },
    {
      "epoch": 2.0757644193947486,
      "grad_norm": 0.09380646049976349,
      "learning_rate": 5.887008032260828e-06,
      "loss": 0.0235,
      "step": 1268400
    },
    {
      "epoch": 2.0757971498334022,
      "grad_norm": 0.5344701409339905,
      "learning_rate": 5.886942140047311e-06,
      "loss": 0.0176,
      "step": 1268420
    },
    {
      "epoch": 2.0758298802720554,
      "grad_norm": 0.5720546841621399,
      "learning_rate": 5.886876247833794e-06,
      "loss": 0.0169,
      "step": 1268440
    },
    {
      "epoch": 2.0758626107107085,
      "grad_norm": 0.7302452325820923,
      "learning_rate": 5.886810355620277e-06,
      "loss": 0.0127,
      "step": 1268460
    },
    {
      "epoch": 2.075895341149362,
      "grad_norm": 0.49803438782691956,
      "learning_rate": 5.8867444634067596e-06,
      "loss": 0.02,
      "step": 1268480
    },
    {
      "epoch": 2.0759280715880153,
      "grad_norm": 0.41826605796813965,
      "learning_rate": 5.886678571193243e-06,
      "loss": 0.0165,
      "step": 1268500
    },
    {
      "epoch": 2.075960802026669,
      "grad_norm": 0.2375878244638443,
      "learning_rate": 5.886612678979725e-06,
      "loss": 0.0154,
      "step": 1268520
    },
    {
      "epoch": 2.075993532465322,
      "grad_norm": 1.3130887746810913,
      "learning_rate": 5.886546786766209e-06,
      "loss": 0.0148,
      "step": 1268540
    },
    {
      "epoch": 2.0760262629039756,
      "grad_norm": 0.1649232804775238,
      "learning_rate": 5.8864808945526905e-06,
      "loss": 0.0159,
      "step": 1268560
    },
    {
      "epoch": 2.0760589933426288,
      "grad_norm": 0.18812525272369385,
      "learning_rate": 5.886415002339174e-06,
      "loss": 0.0129,
      "step": 1268580
    },
    {
      "epoch": 2.076091723781282,
      "grad_norm": 0.4081180691719055,
      "learning_rate": 5.886349110125657e-06,
      "loss": 0.0133,
      "step": 1268600
    },
    {
      "epoch": 2.0761244542199355,
      "grad_norm": 0.37336045503616333,
      "learning_rate": 5.88628321791214e-06,
      "loss": 0.0135,
      "step": 1268620
    },
    {
      "epoch": 2.0761571846585887,
      "grad_norm": 0.14226186275482178,
      "learning_rate": 5.886217325698622e-06,
      "loss": 0.0146,
      "step": 1268640
    },
    {
      "epoch": 2.0761899150972423,
      "grad_norm": 1.6991724967956543,
      "learning_rate": 5.886151433485106e-06,
      "loss": 0.0172,
      "step": 1268660
    },
    {
      "epoch": 2.0762226455358954,
      "grad_norm": 0.39436957240104675,
      "learning_rate": 5.886085541271589e-06,
      "loss": 0.0148,
      "step": 1268680
    },
    {
      "epoch": 2.076255375974549,
      "grad_norm": 0.49679622054100037,
      "learning_rate": 5.886019649058071e-06,
      "loss": 0.0139,
      "step": 1268700
    },
    {
      "epoch": 2.076288106413202,
      "grad_norm": 0.6497257351875305,
      "learning_rate": 5.885953756844555e-06,
      "loss": 0.0233,
      "step": 1268720
    },
    {
      "epoch": 2.0763208368518553,
      "grad_norm": 0.29387548565864563,
      "learning_rate": 5.885887864631037e-06,
      "loss": 0.0209,
      "step": 1268740
    },
    {
      "epoch": 2.076353567290509,
      "grad_norm": 0.43248802423477173,
      "learning_rate": 5.8858219724175205e-06,
      "loss": 0.011,
      "step": 1268760
    },
    {
      "epoch": 2.076386297729162,
      "grad_norm": 0.2915958762168884,
      "learning_rate": 5.885756080204002e-06,
      "loss": 0.0164,
      "step": 1268780
    },
    {
      "epoch": 2.0764190281678157,
      "grad_norm": 0.21396225690841675,
      "learning_rate": 5.885690187990486e-06,
      "loss": 0.0179,
      "step": 1268800
    },
    {
      "epoch": 2.076451758606469,
      "grad_norm": 1.0834088325500488,
      "learning_rate": 5.885624295776969e-06,
      "loss": 0.0182,
      "step": 1268820
    },
    {
      "epoch": 2.0764844890451224,
      "grad_norm": 0.3892298638820648,
      "learning_rate": 5.885558403563451e-06,
      "loss": 0.0143,
      "step": 1268840
    },
    {
      "epoch": 2.0765172194837755,
      "grad_norm": 0.2234143614768982,
      "learning_rate": 5.885492511349934e-06,
      "loss": 0.0127,
      "step": 1268860
    },
    {
      "epoch": 2.0765499499224287,
      "grad_norm": 0.24000397324562073,
      "learning_rate": 5.885426619136418e-06,
      "loss": 0.0164,
      "step": 1268880
    },
    {
      "epoch": 2.0765826803610823,
      "grad_norm": 0.35183185338974,
      "learning_rate": 5.8853607269229e-06,
      "loss": 0.013,
      "step": 1268900
    },
    {
      "epoch": 2.0766154107997354,
      "grad_norm": 0.5456929802894592,
      "learning_rate": 5.885294834709383e-06,
      "loss": 0.0181,
      "step": 1268920
    },
    {
      "epoch": 2.076648141238389,
      "grad_norm": 0.5557169914245605,
      "learning_rate": 5.885228942495865e-06,
      "loss": 0.0123,
      "step": 1268940
    },
    {
      "epoch": 2.076680871677042,
      "grad_norm": 0.7544150352478027,
      "learning_rate": 5.885163050282349e-06,
      "loss": 0.0132,
      "step": 1268960
    },
    {
      "epoch": 2.076713602115696,
      "grad_norm": 0.4323524236679077,
      "learning_rate": 5.885097158068831e-06,
      "loss": 0.0186,
      "step": 1268980
    },
    {
      "epoch": 2.076746332554349,
      "grad_norm": 0.15466710925102234,
      "learning_rate": 5.885031265855314e-06,
      "loss": 0.0114,
      "step": 1269000
    },
    {
      "epoch": 2.076779062993002,
      "grad_norm": 0.31947124004364014,
      "learning_rate": 5.884965373641798e-06,
      "loss": 0.0159,
      "step": 1269020
    },
    {
      "epoch": 2.0768117934316557,
      "grad_norm": 0.48551154136657715,
      "learning_rate": 5.88489948142828e-06,
      "loss": 0.015,
      "step": 1269040
    },
    {
      "epoch": 2.076844523870309,
      "grad_norm": 0.5689305663108826,
      "learning_rate": 5.884833589214763e-06,
      "loss": 0.0092,
      "step": 1269060
    },
    {
      "epoch": 2.0768772543089624,
      "grad_norm": 0.6115872859954834,
      "learning_rate": 5.884767697001246e-06,
      "loss": 0.0132,
      "step": 1269080
    },
    {
      "epoch": 2.0769099847476156,
      "grad_norm": 0.7627265453338623,
      "learning_rate": 5.884701804787729e-06,
      "loss": 0.0152,
      "step": 1269100
    },
    {
      "epoch": 2.0769427151862687,
      "grad_norm": 0.25801339745521545,
      "learning_rate": 5.8846359125742115e-06,
      "loss": 0.0139,
      "step": 1269120
    },
    {
      "epoch": 2.0769754456249223,
      "grad_norm": 0.16192282736301422,
      "learning_rate": 5.884570020360695e-06,
      "loss": 0.0177,
      "step": 1269140
    },
    {
      "epoch": 2.0770081760635755,
      "grad_norm": 0.5075100064277649,
      "learning_rate": 5.884504128147177e-06,
      "loss": 0.0256,
      "step": 1269160
    },
    {
      "epoch": 2.077040906502229,
      "grad_norm": 0.20976398885250092,
      "learning_rate": 5.8844382359336605e-06,
      "loss": 0.0169,
      "step": 1269180
    },
    {
      "epoch": 2.077073636940882,
      "grad_norm": 0.48934170603752136,
      "learning_rate": 5.884372343720142e-06,
      "loss": 0.0141,
      "step": 1269200
    },
    {
      "epoch": 2.077106367379536,
      "grad_norm": 1.4441477060317993,
      "learning_rate": 5.884306451506626e-06,
      "loss": 0.0097,
      "step": 1269220
    },
    {
      "epoch": 2.077139097818189,
      "grad_norm": 0.6991980075836182,
      "learning_rate": 5.884240559293109e-06,
      "loss": 0.0146,
      "step": 1269240
    },
    {
      "epoch": 2.077171828256842,
      "grad_norm": 1.2845525741577148,
      "learning_rate": 5.8841746670795915e-06,
      "loss": 0.0178,
      "step": 1269260
    },
    {
      "epoch": 2.0772045586954957,
      "grad_norm": 0.4979982078075409,
      "learning_rate": 5.884108774866074e-06,
      "loss": 0.0115,
      "step": 1269280
    },
    {
      "epoch": 2.077237289134149,
      "grad_norm": 0.9979101419448853,
      "learning_rate": 5.884042882652558e-06,
      "loss": 0.013,
      "step": 1269300
    },
    {
      "epoch": 2.0772700195728024,
      "grad_norm": 0.1376536786556244,
      "learning_rate": 5.88397699043904e-06,
      "loss": 0.0135,
      "step": 1269320
    },
    {
      "epoch": 2.0773027500114556,
      "grad_norm": 0.16295722126960754,
      "learning_rate": 5.883911098225523e-06,
      "loss": 0.0134,
      "step": 1269340
    },
    {
      "epoch": 2.077335480450109,
      "grad_norm": 1.1317938566207886,
      "learning_rate": 5.883845206012007e-06,
      "loss": 0.0103,
      "step": 1269360
    },
    {
      "epoch": 2.0773682108887623,
      "grad_norm": 0.4294582009315491,
      "learning_rate": 5.883779313798489e-06,
      "loss": 0.0141,
      "step": 1269380
    },
    {
      "epoch": 2.0774009413274155,
      "grad_norm": 0.3086003363132477,
      "learning_rate": 5.883713421584972e-06,
      "loss": 0.0168,
      "step": 1269400
    },
    {
      "epoch": 2.077433671766069,
      "grad_norm": 1.201600432395935,
      "learning_rate": 5.883647529371454e-06,
      "loss": 0.0172,
      "step": 1269420
    },
    {
      "epoch": 2.0774664022047222,
      "grad_norm": 0.22808562219142914,
      "learning_rate": 5.883581637157938e-06,
      "loss": 0.0107,
      "step": 1269440
    },
    {
      "epoch": 2.077499132643376,
      "grad_norm": 1.0017565488815308,
      "learning_rate": 5.883515744944421e-06,
      "loss": 0.0263,
      "step": 1269460
    },
    {
      "epoch": 2.077531863082029,
      "grad_norm": 0.2619403004646301,
      "learning_rate": 5.883449852730903e-06,
      "loss": 0.0133,
      "step": 1269480
    },
    {
      "epoch": 2.0775645935206826,
      "grad_norm": 0.5314230918884277,
      "learning_rate": 5.883383960517386e-06,
      "loss": 0.0217,
      "step": 1269500
    },
    {
      "epoch": 2.0775973239593357,
      "grad_norm": 0.8486616611480713,
      "learning_rate": 5.88331806830387e-06,
      "loss": 0.0141,
      "step": 1269520
    },
    {
      "epoch": 2.077630054397989,
      "grad_norm": 0.2597557306289673,
      "learning_rate": 5.8832521760903515e-06,
      "loss": 0.0144,
      "step": 1269540
    },
    {
      "epoch": 2.0776627848366425,
      "grad_norm": 0.27562594413757324,
      "learning_rate": 5.883186283876835e-06,
      "loss": 0.0231,
      "step": 1269560
    },
    {
      "epoch": 2.0776955152752956,
      "grad_norm": 0.33900517225265503,
      "learning_rate": 5.883120391663317e-06,
      "loss": 0.0178,
      "step": 1269580
    },
    {
      "epoch": 2.077728245713949,
      "grad_norm": 0.2455882579088211,
      "learning_rate": 5.883054499449801e-06,
      "loss": 0.0177,
      "step": 1269600
    },
    {
      "epoch": 2.0777609761526024,
      "grad_norm": 0.26687008142471313,
      "learning_rate": 5.882988607236283e-06,
      "loss": 0.03,
      "step": 1269620
    },
    {
      "epoch": 2.077793706591256,
      "grad_norm": 0.3775613307952881,
      "learning_rate": 5.882922715022766e-06,
      "loss": 0.0219,
      "step": 1269640
    },
    {
      "epoch": 2.077826437029909,
      "grad_norm": 0.32745257019996643,
      "learning_rate": 5.882856822809249e-06,
      "loss": 0.017,
      "step": 1269660
    },
    {
      "epoch": 2.0778591674685623,
      "grad_norm": 0.07469603419303894,
      "learning_rate": 5.882790930595732e-06,
      "loss": 0.0186,
      "step": 1269680
    },
    {
      "epoch": 2.077891897907216,
      "grad_norm": 0.2860296368598938,
      "learning_rate": 5.882725038382214e-06,
      "loss": 0.0191,
      "step": 1269700
    },
    {
      "epoch": 2.077924628345869,
      "grad_norm": 0.33206236362457275,
      "learning_rate": 5.882659146168698e-06,
      "loss": 0.0182,
      "step": 1269720
    },
    {
      "epoch": 2.0779573587845226,
      "grad_norm": 0.15910162031650543,
      "learning_rate": 5.8825932539551815e-06,
      "loss": 0.0154,
      "step": 1269740
    },
    {
      "epoch": 2.0779900892231757,
      "grad_norm": 0.3777291476726532,
      "learning_rate": 5.882527361741663e-06,
      "loss": 0.0175,
      "step": 1269760
    },
    {
      "epoch": 2.078022819661829,
      "grad_norm": 0.7980352640151978,
      "learning_rate": 5.882461469528147e-06,
      "loss": 0.0147,
      "step": 1269780
    },
    {
      "epoch": 2.0780555501004825,
      "grad_norm": 0.226759135723114,
      "learning_rate": 5.882395577314629e-06,
      "loss": 0.0219,
      "step": 1269800
    },
    {
      "epoch": 2.0780882805391356,
      "grad_norm": 1.3358445167541504,
      "learning_rate": 5.8823296851011124e-06,
      "loss": 0.0109,
      "step": 1269820
    },
    {
      "epoch": 2.0781210109777892,
      "grad_norm": 0.44451797008514404,
      "learning_rate": 5.882263792887594e-06,
      "loss": 0.0136,
      "step": 1269840
    },
    {
      "epoch": 2.0781537414164424,
      "grad_norm": 0.549156665802002,
      "learning_rate": 5.882197900674078e-06,
      "loss": 0.0204,
      "step": 1269860
    },
    {
      "epoch": 2.078186471855096,
      "grad_norm": 0.32477453351020813,
      "learning_rate": 5.882132008460561e-06,
      "loss": 0.0092,
      "step": 1269880
    },
    {
      "epoch": 2.078219202293749,
      "grad_norm": 0.1392771303653717,
      "learning_rate": 5.882066116247044e-06,
      "loss": 0.0212,
      "step": 1269900
    },
    {
      "epoch": 2.0782519327324023,
      "grad_norm": 0.29659244418144226,
      "learning_rate": 5.882000224033526e-06,
      "loss": 0.0253,
      "step": 1269920
    },
    {
      "epoch": 2.078284663171056,
      "grad_norm": 0.08708538115024567,
      "learning_rate": 5.88193433182001e-06,
      "loss": 0.0232,
      "step": 1269940
    },
    {
      "epoch": 2.078317393609709,
      "grad_norm": 0.11234257370233536,
      "learning_rate": 5.881868439606492e-06,
      "loss": 0.0133,
      "step": 1269960
    },
    {
      "epoch": 2.0783501240483626,
      "grad_norm": 1.433021068572998,
      "learning_rate": 5.881802547392975e-06,
      "loss": 0.016,
      "step": 1269980
    },
    {
      "epoch": 2.0783828544870158,
      "grad_norm": 0.3691418468952179,
      "learning_rate": 5.881736655179457e-06,
      "loss": 0.0124,
      "step": 1270000
    },
    {
      "epoch": 2.0784155849256694,
      "grad_norm": 0.320552259683609,
      "learning_rate": 5.881670762965941e-06,
      "loss": 0.0158,
      "step": 1270020
    },
    {
      "epoch": 2.0784483153643225,
      "grad_norm": 0.6660665273666382,
      "learning_rate": 5.881604870752423e-06,
      "loss": 0.0206,
      "step": 1270040
    },
    {
      "epoch": 2.0784810458029757,
      "grad_norm": 0.4795547127723694,
      "learning_rate": 5.881538978538906e-06,
      "loss": 0.0214,
      "step": 1270060
    },
    {
      "epoch": 2.0785137762416293,
      "grad_norm": 0.4332650899887085,
      "learning_rate": 5.88147308632539e-06,
      "loss": 0.0118,
      "step": 1270080
    },
    {
      "epoch": 2.0785465066802824,
      "grad_norm": 0.6768900752067566,
      "learning_rate": 5.8814071941118725e-06,
      "loss": 0.012,
      "step": 1270100
    },
    {
      "epoch": 2.078579237118936,
      "grad_norm": 0.11369124799966812,
      "learning_rate": 5.881341301898355e-06,
      "loss": 0.0221,
      "step": 1270120
    },
    {
      "epoch": 2.078611967557589,
      "grad_norm": 0.39146682620048523,
      "learning_rate": 5.881275409684838e-06,
      "loss": 0.0137,
      "step": 1270140
    },
    {
      "epoch": 2.0786446979962427,
      "grad_norm": 0.24266719818115234,
      "learning_rate": 5.8812095174713216e-06,
      "loss": 0.0148,
      "step": 1270160
    },
    {
      "epoch": 2.078677428434896,
      "grad_norm": 0.4304139018058777,
      "learning_rate": 5.8811436252578034e-06,
      "loss": 0.0101,
      "step": 1270180
    },
    {
      "epoch": 2.078710158873549,
      "grad_norm": 0.3640158474445343,
      "learning_rate": 5.881077733044287e-06,
      "loss": 0.0149,
      "step": 1270200
    },
    {
      "epoch": 2.0787428893122026,
      "grad_norm": 0.6516813039779663,
      "learning_rate": 5.881011840830769e-06,
      "loss": 0.0216,
      "step": 1270220
    },
    {
      "epoch": 2.078775619750856,
      "grad_norm": 0.32313817739486694,
      "learning_rate": 5.8809459486172525e-06,
      "loss": 0.0206,
      "step": 1270240
    },
    {
      "epoch": 2.0788083501895094,
      "grad_norm": 0.1699371486902237,
      "learning_rate": 5.880880056403735e-06,
      "loss": 0.0137,
      "step": 1270260
    },
    {
      "epoch": 2.0788410806281625,
      "grad_norm": 0.4596517086029053,
      "learning_rate": 5.880814164190218e-06,
      "loss": 0.013,
      "step": 1270280
    },
    {
      "epoch": 2.078873811066816,
      "grad_norm": 0.374564528465271,
      "learning_rate": 5.880748271976701e-06,
      "loss": 0.0196,
      "step": 1270300
    },
    {
      "epoch": 2.0789065415054693,
      "grad_norm": 0.3403134346008301,
      "learning_rate": 5.880682379763184e-06,
      "loss": 0.0092,
      "step": 1270320
    },
    {
      "epoch": 2.0789392719441224,
      "grad_norm": 0.365629106760025,
      "learning_rate": 5.880616487549666e-06,
      "loss": 0.0134,
      "step": 1270340
    },
    {
      "epoch": 2.078972002382776,
      "grad_norm": 0.09414565563201904,
      "learning_rate": 5.88055059533615e-06,
      "loss": 0.0117,
      "step": 1270360
    },
    {
      "epoch": 2.079004732821429,
      "grad_norm": 0.5637511014938354,
      "learning_rate": 5.880484703122632e-06,
      "loss": 0.0173,
      "step": 1270380
    },
    {
      "epoch": 2.0790374632600828,
      "grad_norm": 0.09386350959539413,
      "learning_rate": 5.880418810909115e-06,
      "loss": 0.0108,
      "step": 1270400
    },
    {
      "epoch": 2.079070193698736,
      "grad_norm": 0.46475285291671753,
      "learning_rate": 5.880352918695599e-06,
      "loss": 0.0128,
      "step": 1270420
    },
    {
      "epoch": 2.0791029241373895,
      "grad_norm": 0.3017965853214264,
      "learning_rate": 5.880287026482081e-06,
      "loss": 0.0176,
      "step": 1270440
    },
    {
      "epoch": 2.0791356545760427,
      "grad_norm": 1.1337244510650635,
      "learning_rate": 5.880221134268564e-06,
      "loss": 0.0158,
      "step": 1270460
    },
    {
      "epoch": 2.079168385014696,
      "grad_norm": 0.16947536170482635,
      "learning_rate": 5.880155242055047e-06,
      "loss": 0.0156,
      "step": 1270480
    },
    {
      "epoch": 2.0792011154533494,
      "grad_norm": 1.1565738916397095,
      "learning_rate": 5.88008934984153e-06,
      "loss": 0.0139,
      "step": 1270500
    },
    {
      "epoch": 2.0792338458920026,
      "grad_norm": 0.8743517994880676,
      "learning_rate": 5.8800234576280126e-06,
      "loss": 0.0155,
      "step": 1270520
    },
    {
      "epoch": 2.079266576330656,
      "grad_norm": 0.4325578808784485,
      "learning_rate": 5.879957565414496e-06,
      "loss": 0.0149,
      "step": 1270540
    },
    {
      "epoch": 2.0792993067693093,
      "grad_norm": 0.7475730180740356,
      "learning_rate": 5.879891673200978e-06,
      "loss": 0.0249,
      "step": 1270560
    },
    {
      "epoch": 2.0793320372079624,
      "grad_norm": 0.31353506445884705,
      "learning_rate": 5.879825780987462e-06,
      "loss": 0.0255,
      "step": 1270580
    },
    {
      "epoch": 2.079364767646616,
      "grad_norm": 0.4380848705768585,
      "learning_rate": 5.8797598887739435e-06,
      "loss": 0.0174,
      "step": 1270600
    },
    {
      "epoch": 2.079397498085269,
      "grad_norm": 0.5695896148681641,
      "learning_rate": 5.879693996560427e-06,
      "loss": 0.0209,
      "step": 1270620
    },
    {
      "epoch": 2.079430228523923,
      "grad_norm": 0.4119536876678467,
      "learning_rate": 5.87962810434691e-06,
      "loss": 0.0207,
      "step": 1270640
    },
    {
      "epoch": 2.079462958962576,
      "grad_norm": 0.4225195646286011,
      "learning_rate": 5.879562212133393e-06,
      "loss": 0.0138,
      "step": 1270660
    },
    {
      "epoch": 2.0794956894012295,
      "grad_norm": 0.16765305399894714,
      "learning_rate": 5.879496319919875e-06,
      "loss": 0.0143,
      "step": 1270680
    },
    {
      "epoch": 2.0795284198398827,
      "grad_norm": 5.343898296356201,
      "learning_rate": 5.879430427706359e-06,
      "loss": 0.0112,
      "step": 1270700
    },
    {
      "epoch": 2.079561150278536,
      "grad_norm": 0.4062824547290802,
      "learning_rate": 5.879364535492841e-06,
      "loss": 0.0135,
      "step": 1270720
    },
    {
      "epoch": 2.0795938807171894,
      "grad_norm": 0.2986993193626404,
      "learning_rate": 5.879298643279324e-06,
      "loss": 0.0119,
      "step": 1270740
    },
    {
      "epoch": 2.0796266111558426,
      "grad_norm": 0.45502403378486633,
      "learning_rate": 5.879232751065806e-06,
      "loss": 0.0187,
      "step": 1270760
    },
    {
      "epoch": 2.079659341594496,
      "grad_norm": 0.8309971690177917,
      "learning_rate": 5.87916685885229e-06,
      "loss": 0.0146,
      "step": 1270780
    },
    {
      "epoch": 2.0796920720331493,
      "grad_norm": 1.2060341835021973,
      "learning_rate": 5.8791009666387735e-06,
      "loss": 0.0126,
      "step": 1270800
    },
    {
      "epoch": 2.079724802471803,
      "grad_norm": 0.3121439516544342,
      "learning_rate": 5.879035074425255e-06,
      "loss": 0.0134,
      "step": 1270820
    },
    {
      "epoch": 2.079757532910456,
      "grad_norm": 0.2244342565536499,
      "learning_rate": 5.878969182211739e-06,
      "loss": 0.0088,
      "step": 1270840
    },
    {
      "epoch": 2.079790263349109,
      "grad_norm": 0.2390783131122589,
      "learning_rate": 5.878903289998221e-06,
      "loss": 0.0107,
      "step": 1270860
    },
    {
      "epoch": 2.079822993787763,
      "grad_norm": 0.29197952151298523,
      "learning_rate": 5.878837397784704e-06,
      "loss": 0.0189,
      "step": 1270880
    },
    {
      "epoch": 2.079855724226416,
      "grad_norm": 0.4388693869113922,
      "learning_rate": 5.878771505571187e-06,
      "loss": 0.0126,
      "step": 1270900
    },
    {
      "epoch": 2.0798884546650696,
      "grad_norm": 0.40918126702308655,
      "learning_rate": 5.87870561335767e-06,
      "loss": 0.014,
      "step": 1270920
    },
    {
      "epoch": 2.0799211851037227,
      "grad_norm": 1.097288727760315,
      "learning_rate": 5.878639721144153e-06,
      "loss": 0.0079,
      "step": 1270940
    },
    {
      "epoch": 2.0799539155423763,
      "grad_norm": 0.349250853061676,
      "learning_rate": 5.878573828930636e-06,
      "loss": 0.0118,
      "step": 1270960
    },
    {
      "epoch": 2.0799866459810294,
      "grad_norm": 0.6258921027183533,
      "learning_rate": 5.878507936717118e-06,
      "loss": 0.0149,
      "step": 1270980
    },
    {
      "epoch": 2.0800193764196826,
      "grad_norm": 0.5963773727416992,
      "learning_rate": 5.878442044503602e-06,
      "loss": 0.0219,
      "step": 1271000
    },
    {
      "epoch": 2.080052106858336,
      "grad_norm": 0.2384188324213028,
      "learning_rate": 5.878376152290084e-06,
      "loss": 0.0145,
      "step": 1271020
    },
    {
      "epoch": 2.0800848372969893,
      "grad_norm": 0.21509885787963867,
      "learning_rate": 5.878310260076567e-06,
      "loss": 0.0126,
      "step": 1271040
    },
    {
      "epoch": 2.080117567735643,
      "grad_norm": 0.3008512556552887,
      "learning_rate": 5.87824436786305e-06,
      "loss": 0.0111,
      "step": 1271060
    },
    {
      "epoch": 2.080150298174296,
      "grad_norm": 1.067076325416565,
      "learning_rate": 5.878178475649533e-06,
      "loss": 0.0148,
      "step": 1271080
    },
    {
      "epoch": 2.0801830286129497,
      "grad_norm": 0.41275715827941895,
      "learning_rate": 5.878112583436015e-06,
      "loss": 0.0138,
      "step": 1271100
    },
    {
      "epoch": 2.080215759051603,
      "grad_norm": 0.5892618894577026,
      "learning_rate": 5.878046691222499e-06,
      "loss": 0.0167,
      "step": 1271120
    },
    {
      "epoch": 2.080248489490256,
      "grad_norm": 0.4081532657146454,
      "learning_rate": 5.877980799008982e-06,
      "loss": 0.0133,
      "step": 1271140
    },
    {
      "epoch": 2.0802812199289096,
      "grad_norm": 0.1415533572435379,
      "learning_rate": 5.8779149067954645e-06,
      "loss": 0.0192,
      "step": 1271160
    },
    {
      "epoch": 2.0803139503675627,
      "grad_norm": 0.5207169651985168,
      "learning_rate": 5.877849014581948e-06,
      "loss": 0.0172,
      "step": 1271180
    },
    {
      "epoch": 2.0803466808062163,
      "grad_norm": 1.3533086776733398,
      "learning_rate": 5.87778312236843e-06,
      "loss": 0.0134,
      "step": 1271200
    },
    {
      "epoch": 2.0803794112448695,
      "grad_norm": 0.26518115401268005,
      "learning_rate": 5.8777172301549135e-06,
      "loss": 0.0143,
      "step": 1271220
    },
    {
      "epoch": 2.0804121416835226,
      "grad_norm": 0.2351987510919571,
      "learning_rate": 5.8776513379413954e-06,
      "loss": 0.0163,
      "step": 1271240
    },
    {
      "epoch": 2.080444872122176,
      "grad_norm": 0.23748865723609924,
      "learning_rate": 5.877585445727879e-06,
      "loss": 0.013,
      "step": 1271260
    },
    {
      "epoch": 2.0804776025608294,
      "grad_norm": 1.5659189224243164,
      "learning_rate": 5.877519553514362e-06,
      "loss": 0.0147,
      "step": 1271280
    },
    {
      "epoch": 2.080510332999483,
      "grad_norm": 0.6009044051170349,
      "learning_rate": 5.8774536613008445e-06,
      "loss": 0.0123,
      "step": 1271300
    },
    {
      "epoch": 2.080543063438136,
      "grad_norm": 0.1578764170408249,
      "learning_rate": 5.877387769087327e-06,
      "loss": 0.0132,
      "step": 1271320
    },
    {
      "epoch": 2.0805757938767897,
      "grad_norm": 0.6028613448143005,
      "learning_rate": 5.877321876873811e-06,
      "loss": 0.0175,
      "step": 1271340
    },
    {
      "epoch": 2.080608524315443,
      "grad_norm": 0.34317874908447266,
      "learning_rate": 5.877255984660293e-06,
      "loss": 0.0184,
      "step": 1271360
    },
    {
      "epoch": 2.080641254754096,
      "grad_norm": 0.3009040355682373,
      "learning_rate": 5.877190092446776e-06,
      "loss": 0.0146,
      "step": 1271380
    },
    {
      "epoch": 2.0806739851927496,
      "grad_norm": 0.31659945845603943,
      "learning_rate": 5.877124200233258e-06,
      "loss": 0.0131,
      "step": 1271400
    },
    {
      "epoch": 2.0807067156314027,
      "grad_norm": 0.1760968416929245,
      "learning_rate": 5.877058308019742e-06,
      "loss": 0.0142,
      "step": 1271420
    },
    {
      "epoch": 2.0807394460700563,
      "grad_norm": 0.37513479590415955,
      "learning_rate": 5.8769924158062245e-06,
      "loss": 0.0169,
      "step": 1271440
    },
    {
      "epoch": 2.0807721765087095,
      "grad_norm": 0.6024917364120483,
      "learning_rate": 5.876926523592707e-06,
      "loss": 0.0268,
      "step": 1271460
    },
    {
      "epoch": 2.080804906947363,
      "grad_norm": 0.46994146704673767,
      "learning_rate": 5.876860631379191e-06,
      "loss": 0.0097,
      "step": 1271480
    },
    {
      "epoch": 2.0808376373860162,
      "grad_norm": 0.2824377119541168,
      "learning_rate": 5.876794739165674e-06,
      "loss": 0.0256,
      "step": 1271500
    },
    {
      "epoch": 2.0808703678246694,
      "grad_norm": 0.1081845611333847,
      "learning_rate": 5.876728846952156e-06,
      "loss": 0.0124,
      "step": 1271520
    },
    {
      "epoch": 2.080903098263323,
      "grad_norm": 0.3138224482536316,
      "learning_rate": 5.876662954738639e-06,
      "loss": 0.0114,
      "step": 1271540
    },
    {
      "epoch": 2.080935828701976,
      "grad_norm": 0.17423444986343384,
      "learning_rate": 5.876597062525123e-06,
      "loss": 0.0197,
      "step": 1271560
    },
    {
      "epoch": 2.0809685591406297,
      "grad_norm": 0.623486340045929,
      "learning_rate": 5.8765311703116045e-06,
      "loss": 0.017,
      "step": 1271580
    },
    {
      "epoch": 2.081001289579283,
      "grad_norm": 0.7397286891937256,
      "learning_rate": 5.876465278098088e-06,
      "loss": 0.0194,
      "step": 1271600
    },
    {
      "epoch": 2.0810340200179365,
      "grad_norm": 0.3638230562210083,
      "learning_rate": 5.87639938588457e-06,
      "loss": 0.0161,
      "step": 1271620
    },
    {
      "epoch": 2.0810667504565896,
      "grad_norm": 0.1727394312620163,
      "learning_rate": 5.876333493671054e-06,
      "loss": 0.0158,
      "step": 1271640
    },
    {
      "epoch": 2.0810994808952428,
      "grad_norm": 0.688970685005188,
      "learning_rate": 5.876267601457536e-06,
      "loss": 0.013,
      "step": 1271660
    },
    {
      "epoch": 2.0811322113338964,
      "grad_norm": 0.2867339849472046,
      "learning_rate": 5.876201709244019e-06,
      "loss": 0.0086,
      "step": 1271680
    },
    {
      "epoch": 2.0811649417725495,
      "grad_norm": 0.12113578617572784,
      "learning_rate": 5.876135817030502e-06,
      "loss": 0.0136,
      "step": 1271700
    },
    {
      "epoch": 2.081197672211203,
      "grad_norm": 0.5597019791603088,
      "learning_rate": 5.876069924816985e-06,
      "loss": 0.0111,
      "step": 1271720
    },
    {
      "epoch": 2.0812304026498563,
      "grad_norm": 0.31873631477355957,
      "learning_rate": 5.876004032603467e-06,
      "loss": 0.0194,
      "step": 1271740
    },
    {
      "epoch": 2.08126313308851,
      "grad_norm": 0.22262261807918549,
      "learning_rate": 5.875938140389951e-06,
      "loss": 0.0154,
      "step": 1271760
    },
    {
      "epoch": 2.081295863527163,
      "grad_norm": 0.3356204628944397,
      "learning_rate": 5.875872248176433e-06,
      "loss": 0.0135,
      "step": 1271780
    },
    {
      "epoch": 2.081328593965816,
      "grad_norm": 0.7069727778434753,
      "learning_rate": 5.875806355962916e-06,
      "loss": 0.0212,
      "step": 1271800
    },
    {
      "epoch": 2.0813613244044697,
      "grad_norm": 0.8973389863967896,
      "learning_rate": 5.875740463749398e-06,
      "loss": 0.0192,
      "step": 1271820
    },
    {
      "epoch": 2.081394054843123,
      "grad_norm": 0.11304078251123428,
      "learning_rate": 5.875674571535882e-06,
      "loss": 0.0121,
      "step": 1271840
    },
    {
      "epoch": 2.0814267852817765,
      "grad_norm": 0.17317278683185577,
      "learning_rate": 5.8756086793223654e-06,
      "loss": 0.02,
      "step": 1271860
    },
    {
      "epoch": 2.0814595157204296,
      "grad_norm": 0.20125649869441986,
      "learning_rate": 5.875542787108847e-06,
      "loss": 0.0093,
      "step": 1271880
    },
    {
      "epoch": 2.0814922461590832,
      "grad_norm": 0.4184880554676056,
      "learning_rate": 5.875476894895331e-06,
      "loss": 0.0152,
      "step": 1271900
    },
    {
      "epoch": 2.0815249765977364,
      "grad_norm": 0.8780501484870911,
      "learning_rate": 5.875411002681814e-06,
      "loss": 0.02,
      "step": 1271920
    },
    {
      "epoch": 2.0815577070363895,
      "grad_norm": 0.7588076591491699,
      "learning_rate": 5.875345110468296e-06,
      "loss": 0.0136,
      "step": 1271940
    },
    {
      "epoch": 2.081590437475043,
      "grad_norm": 0.2173757404088974,
      "learning_rate": 5.875279218254779e-06,
      "loss": 0.0159,
      "step": 1271960
    },
    {
      "epoch": 2.0816231679136963,
      "grad_norm": 0.4895494282245636,
      "learning_rate": 5.875213326041263e-06,
      "loss": 0.0187,
      "step": 1271980
    },
    {
      "epoch": 2.08165589835235,
      "grad_norm": 0.569172203540802,
      "learning_rate": 5.875147433827745e-06,
      "loss": 0.0092,
      "step": 1272000
    },
    {
      "epoch": 2.081688628791003,
      "grad_norm": 0.47636252641677856,
      "learning_rate": 5.875081541614228e-06,
      "loss": 0.0143,
      "step": 1272020
    },
    {
      "epoch": 2.0817213592296566,
      "grad_norm": 3.4149270057678223,
      "learning_rate": 5.87501564940071e-06,
      "loss": 0.0136,
      "step": 1272040
    },
    {
      "epoch": 2.0817540896683098,
      "grad_norm": 0.8416305184364319,
      "learning_rate": 5.874949757187194e-06,
      "loss": 0.0153,
      "step": 1272060
    },
    {
      "epoch": 2.081786820106963,
      "grad_norm": 0.6427119970321655,
      "learning_rate": 5.8748838649736764e-06,
      "loss": 0.0146,
      "step": 1272080
    },
    {
      "epoch": 2.0818195505456165,
      "grad_norm": 0.3899322748184204,
      "learning_rate": 5.874817972760159e-06,
      "loss": 0.0151,
      "step": 1272100
    },
    {
      "epoch": 2.0818522809842697,
      "grad_norm": 0.6568718552589417,
      "learning_rate": 5.874752080546642e-06,
      "loss": 0.02,
      "step": 1272120
    },
    {
      "epoch": 2.0818850114229233,
      "grad_norm": 0.642978847026825,
      "learning_rate": 5.8746861883331255e-06,
      "loss": 0.0136,
      "step": 1272140
    },
    {
      "epoch": 2.0819177418615764,
      "grad_norm": 2.04130220413208,
      "learning_rate": 5.874620296119607e-06,
      "loss": 0.0158,
      "step": 1272160
    },
    {
      "epoch": 2.0819504723002296,
      "grad_norm": 0.629607617855072,
      "learning_rate": 5.874554403906091e-06,
      "loss": 0.0098,
      "step": 1272180
    },
    {
      "epoch": 2.081983202738883,
      "grad_norm": 0.4310755729675293,
      "learning_rate": 5.8744885116925746e-06,
      "loss": 0.0225,
      "step": 1272200
    },
    {
      "epoch": 2.0820159331775363,
      "grad_norm": 0.22634990513324738,
      "learning_rate": 5.8744226194790565e-06,
      "loss": 0.015,
      "step": 1272220
    },
    {
      "epoch": 2.08204866361619,
      "grad_norm": 0.40136075019836426,
      "learning_rate": 5.87435672726554e-06,
      "loss": 0.0176,
      "step": 1272240
    },
    {
      "epoch": 2.082081394054843,
      "grad_norm": 0.26495110988616943,
      "learning_rate": 5.874290835052022e-06,
      "loss": 0.014,
      "step": 1272260
    },
    {
      "epoch": 2.0821141244934966,
      "grad_norm": 0.5453442335128784,
      "learning_rate": 5.8742249428385055e-06,
      "loss": 0.0206,
      "step": 1272280
    },
    {
      "epoch": 2.08214685493215,
      "grad_norm": 0.3704959452152252,
      "learning_rate": 5.874159050624988e-06,
      "loss": 0.0195,
      "step": 1272300
    },
    {
      "epoch": 2.082179585370803,
      "grad_norm": 0.191331148147583,
      "learning_rate": 5.874093158411471e-06,
      "loss": 0.0157,
      "step": 1272320
    },
    {
      "epoch": 2.0822123158094565,
      "grad_norm": 0.17542214691638947,
      "learning_rate": 5.874027266197954e-06,
      "loss": 0.0185,
      "step": 1272340
    },
    {
      "epoch": 2.0822450462481097,
      "grad_norm": 0.3733395040035248,
      "learning_rate": 5.873961373984437e-06,
      "loss": 0.0128,
      "step": 1272360
    },
    {
      "epoch": 2.0822777766867633,
      "grad_norm": 0.399154931306839,
      "learning_rate": 5.873895481770919e-06,
      "loss": 0.0189,
      "step": 1272380
    },
    {
      "epoch": 2.0823105071254164,
      "grad_norm": 0.203847736120224,
      "learning_rate": 5.873829589557403e-06,
      "loss": 0.0188,
      "step": 1272400
    },
    {
      "epoch": 2.08234323756407,
      "grad_norm": 0.3038366436958313,
      "learning_rate": 5.873763697343885e-06,
      "loss": 0.0172,
      "step": 1272420
    },
    {
      "epoch": 2.082375968002723,
      "grad_norm": 0.23624609410762787,
      "learning_rate": 5.873697805130368e-06,
      "loss": 0.0227,
      "step": 1272440
    },
    {
      "epoch": 2.0824086984413763,
      "grad_norm": 0.3583722710609436,
      "learning_rate": 5.873631912916851e-06,
      "loss": 0.0133,
      "step": 1272460
    },
    {
      "epoch": 2.08244142888003,
      "grad_norm": 0.4993163049221039,
      "learning_rate": 5.873566020703334e-06,
      "loss": 0.0158,
      "step": 1272480
    },
    {
      "epoch": 2.082474159318683,
      "grad_norm": 0.233440101146698,
      "learning_rate": 5.8735001284898165e-06,
      "loss": 0.0219,
      "step": 1272500
    },
    {
      "epoch": 2.0825068897573367,
      "grad_norm": 0.5329412817955017,
      "learning_rate": 5.8734342362763e-06,
      "loss": 0.012,
      "step": 1272520
    },
    {
      "epoch": 2.08253962019599,
      "grad_norm": 0.0960853323340416,
      "learning_rate": 5.873368344062783e-06,
      "loss": 0.0119,
      "step": 1272540
    },
    {
      "epoch": 2.0825723506346434,
      "grad_norm": 2.38824462890625,
      "learning_rate": 5.8733024518492656e-06,
      "loss": 0.0157,
      "step": 1272560
    },
    {
      "epoch": 2.0826050810732966,
      "grad_norm": 0.2529453635215759,
      "learning_rate": 5.873236559635749e-06,
      "loss": 0.0189,
      "step": 1272580
    },
    {
      "epoch": 2.0826378115119497,
      "grad_norm": 0.2333686500787735,
      "learning_rate": 5.873170667422231e-06,
      "loss": 0.0196,
      "step": 1272600
    },
    {
      "epoch": 2.0826705419506033,
      "grad_norm": 1.5494863986968994,
      "learning_rate": 5.873104775208715e-06,
      "loss": 0.0131,
      "step": 1272620
    },
    {
      "epoch": 2.0827032723892565,
      "grad_norm": 0.9802841544151306,
      "learning_rate": 5.8730388829951965e-06,
      "loss": 0.0164,
      "step": 1272640
    },
    {
      "epoch": 2.08273600282791,
      "grad_norm": 0.852356493473053,
      "learning_rate": 5.87297299078168e-06,
      "loss": 0.0121,
      "step": 1272660
    },
    {
      "epoch": 2.082768733266563,
      "grad_norm": 0.30089083313941956,
      "learning_rate": 5.872907098568163e-06,
      "loss": 0.0165,
      "step": 1272680
    },
    {
      "epoch": 2.0828014637052163,
      "grad_norm": 0.46452468633651733,
      "learning_rate": 5.872841206354646e-06,
      "loss": 0.0172,
      "step": 1272700
    },
    {
      "epoch": 2.08283419414387,
      "grad_norm": 0.5478603839874268,
      "learning_rate": 5.872775314141128e-06,
      "loss": 0.0154,
      "step": 1272720
    },
    {
      "epoch": 2.082866924582523,
      "grad_norm": 0.3778577446937561,
      "learning_rate": 5.872709421927612e-06,
      "loss": 0.0097,
      "step": 1272740
    },
    {
      "epoch": 2.0828996550211767,
      "grad_norm": 0.16011174023151398,
      "learning_rate": 5.872643529714094e-06,
      "loss": 0.013,
      "step": 1272760
    },
    {
      "epoch": 2.08293238545983,
      "grad_norm": 0.3693788945674896,
      "learning_rate": 5.872577637500577e-06,
      "loss": 0.0185,
      "step": 1272780
    },
    {
      "epoch": 2.0829651158984834,
      "grad_norm": 0.23928619921207428,
      "learning_rate": 5.872511745287059e-06,
      "loss": 0.0143,
      "step": 1272800
    },
    {
      "epoch": 2.0829978463371366,
      "grad_norm": 0.19353654980659485,
      "learning_rate": 5.872445853073543e-06,
      "loss": 0.0148,
      "step": 1272820
    },
    {
      "epoch": 2.0830305767757897,
      "grad_norm": 0.3834710717201233,
      "learning_rate": 5.872379960860025e-06,
      "loss": 0.0159,
      "step": 1272840
    },
    {
      "epoch": 2.0830633072144433,
      "grad_norm": 0.3508032262325287,
      "learning_rate": 5.872314068646508e-06,
      "loss": 0.02,
      "step": 1272860
    },
    {
      "epoch": 2.0830960376530965,
      "grad_norm": 0.5381293892860413,
      "learning_rate": 5.872248176432992e-06,
      "loss": 0.02,
      "step": 1272880
    },
    {
      "epoch": 2.08312876809175,
      "grad_norm": 0.34466752409935,
      "learning_rate": 5.872182284219474e-06,
      "loss": 0.0115,
      "step": 1272900
    },
    {
      "epoch": 2.083161498530403,
      "grad_norm": 0.4443053901195526,
      "learning_rate": 5.8721163920059574e-06,
      "loss": 0.0158,
      "step": 1272920
    },
    {
      "epoch": 2.083194228969057,
      "grad_norm": 0.2951015532016754,
      "learning_rate": 5.87205049979244e-06,
      "loss": 0.0098,
      "step": 1272940
    },
    {
      "epoch": 2.08322695940771,
      "grad_norm": 0.4597080647945404,
      "learning_rate": 5.871984607578923e-06,
      "loss": 0.0142,
      "step": 1272960
    },
    {
      "epoch": 2.083259689846363,
      "grad_norm": 0.06298022717237473,
      "learning_rate": 5.871918715365406e-06,
      "loss": 0.0179,
      "step": 1272980
    },
    {
      "epoch": 2.0832924202850167,
      "grad_norm": 0.6176520586013794,
      "learning_rate": 5.871852823151889e-06,
      "loss": 0.0182,
      "step": 1273000
    },
    {
      "epoch": 2.08332515072367,
      "grad_norm": 0.5401847958564758,
      "learning_rate": 5.871786930938371e-06,
      "loss": 0.0147,
      "step": 1273020
    },
    {
      "epoch": 2.0833578811623235,
      "grad_norm": 0.5770294070243835,
      "learning_rate": 5.871721038724855e-06,
      "loss": 0.0159,
      "step": 1273040
    },
    {
      "epoch": 2.0833906116009766,
      "grad_norm": 0.45266789197921753,
      "learning_rate": 5.871655146511337e-06,
      "loss": 0.0145,
      "step": 1273060
    },
    {
      "epoch": 2.08342334203963,
      "grad_norm": 0.18087805807590485,
      "learning_rate": 5.87158925429782e-06,
      "loss": 0.0122,
      "step": 1273080
    },
    {
      "epoch": 2.0834560724782833,
      "grad_norm": 0.9709349274635315,
      "learning_rate": 5.871523362084303e-06,
      "loss": 0.0136,
      "step": 1273100
    },
    {
      "epoch": 2.0834888029169365,
      "grad_norm": 0.16774480044841766,
      "learning_rate": 5.871457469870786e-06,
      "loss": 0.0156,
      "step": 1273120
    },
    {
      "epoch": 2.08352153335559,
      "grad_norm": 0.3908711373806,
      "learning_rate": 5.871391577657268e-06,
      "loss": 0.0112,
      "step": 1273140
    },
    {
      "epoch": 2.0835542637942432,
      "grad_norm": 0.06131025403738022,
      "learning_rate": 5.871325685443752e-06,
      "loss": 0.0111,
      "step": 1273160
    },
    {
      "epoch": 2.083586994232897,
      "grad_norm": 0.34106385707855225,
      "learning_rate": 5.871259793230234e-06,
      "loss": 0.0126,
      "step": 1273180
    },
    {
      "epoch": 2.08361972467155,
      "grad_norm": 0.2661469280719757,
      "learning_rate": 5.8711939010167175e-06,
      "loss": 0.0132,
      "step": 1273200
    },
    {
      "epoch": 2.0836524551102036,
      "grad_norm": 0.8330362439155579,
      "learning_rate": 5.871128008803199e-06,
      "loss": 0.0142,
      "step": 1273220
    },
    {
      "epoch": 2.0836851855488567,
      "grad_norm": 0.28441134095191956,
      "learning_rate": 5.871062116589683e-06,
      "loss": 0.0117,
      "step": 1273240
    },
    {
      "epoch": 2.08371791598751,
      "grad_norm": 0.1826183795928955,
      "learning_rate": 5.8709962243761665e-06,
      "loss": 0.0109,
      "step": 1273260
    },
    {
      "epoch": 2.0837506464261635,
      "grad_norm": 0.41712796688079834,
      "learning_rate": 5.8709303321626484e-06,
      "loss": 0.014,
      "step": 1273280
    },
    {
      "epoch": 2.0837833768648166,
      "grad_norm": 2.099019765853882,
      "learning_rate": 5.870864439949132e-06,
      "loss": 0.0239,
      "step": 1273300
    },
    {
      "epoch": 2.08381610730347,
      "grad_norm": 0.5260615944862366,
      "learning_rate": 5.870798547735615e-06,
      "loss": 0.0134,
      "step": 1273320
    },
    {
      "epoch": 2.0838488377421234,
      "grad_norm": 0.06781920790672302,
      "learning_rate": 5.8707326555220975e-06,
      "loss": 0.0126,
      "step": 1273340
    },
    {
      "epoch": 2.083881568180777,
      "grad_norm": 0.5562795400619507,
      "learning_rate": 5.87066676330858e-06,
      "loss": 0.0102,
      "step": 1273360
    },
    {
      "epoch": 2.08391429861943,
      "grad_norm": 0.17334707081317902,
      "learning_rate": 5.870600871095064e-06,
      "loss": 0.0133,
      "step": 1273380
    },
    {
      "epoch": 2.0839470290580833,
      "grad_norm": 0.6162567734718323,
      "learning_rate": 5.870534978881546e-06,
      "loss": 0.0225,
      "step": 1273400
    },
    {
      "epoch": 2.083979759496737,
      "grad_norm": 0.31298044323921204,
      "learning_rate": 5.870469086668029e-06,
      "loss": 0.0177,
      "step": 1273420
    },
    {
      "epoch": 2.08401248993539,
      "grad_norm": 0.20705744624137878,
      "learning_rate": 5.870403194454511e-06,
      "loss": 0.016,
      "step": 1273440
    },
    {
      "epoch": 2.0840452203740436,
      "grad_norm": 0.30644944310188293,
      "learning_rate": 5.870337302240995e-06,
      "loss": 0.0112,
      "step": 1273460
    },
    {
      "epoch": 2.0840779508126968,
      "grad_norm": 0.4256211519241333,
      "learning_rate": 5.8702714100274775e-06,
      "loss": 0.0102,
      "step": 1273480
    },
    {
      "epoch": 2.0841106812513503,
      "grad_norm": 0.6523357033729553,
      "learning_rate": 5.87020551781396e-06,
      "loss": 0.0194,
      "step": 1273500
    },
    {
      "epoch": 2.0841434116900035,
      "grad_norm": 0.9455183148384094,
      "learning_rate": 5.870139625600443e-06,
      "loss": 0.0125,
      "step": 1273520
    },
    {
      "epoch": 2.0841761421286567,
      "grad_norm": 0.2977595329284668,
      "learning_rate": 5.870073733386927e-06,
      "loss": 0.0202,
      "step": 1273540
    },
    {
      "epoch": 2.0842088725673102,
      "grad_norm": 0.28534996509552,
      "learning_rate": 5.8700078411734085e-06,
      "loss": 0.0176,
      "step": 1273560
    },
    {
      "epoch": 2.0842416030059634,
      "grad_norm": 0.6770907640457153,
      "learning_rate": 5.869941948959892e-06,
      "loss": 0.0106,
      "step": 1273580
    },
    {
      "epoch": 2.084274333444617,
      "grad_norm": 0.06587433815002441,
      "learning_rate": 5.869876056746376e-06,
      "loss": 0.0164,
      "step": 1273600
    },
    {
      "epoch": 2.08430706388327,
      "grad_norm": 0.13165102899074554,
      "learning_rate": 5.8698101645328576e-06,
      "loss": 0.0161,
      "step": 1273620
    },
    {
      "epoch": 2.0843397943219233,
      "grad_norm": 0.22172926366329193,
      "learning_rate": 5.869744272319341e-06,
      "loss": 0.017,
      "step": 1273640
    },
    {
      "epoch": 2.084372524760577,
      "grad_norm": 0.2612038552761078,
      "learning_rate": 5.869678380105823e-06,
      "loss": 0.0172,
      "step": 1273660
    },
    {
      "epoch": 2.08440525519923,
      "grad_norm": 0.8373080492019653,
      "learning_rate": 5.869612487892307e-06,
      "loss": 0.013,
      "step": 1273680
    },
    {
      "epoch": 2.0844379856378836,
      "grad_norm": 0.7096604704856873,
      "learning_rate": 5.8695465956787885e-06,
      "loss": 0.0146,
      "step": 1273700
    },
    {
      "epoch": 2.084470716076537,
      "grad_norm": 0.17886614799499512,
      "learning_rate": 5.869480703465272e-06,
      "loss": 0.0191,
      "step": 1273720
    },
    {
      "epoch": 2.0845034465151904,
      "grad_norm": 0.39005011320114136,
      "learning_rate": 5.869414811251755e-06,
      "loss": 0.0225,
      "step": 1273740
    },
    {
      "epoch": 2.0845361769538435,
      "grad_norm": 0.34301093220710754,
      "learning_rate": 5.869348919038238e-06,
      "loss": 0.0162,
      "step": 1273760
    },
    {
      "epoch": 2.0845689073924967,
      "grad_norm": 0.3238925337791443,
      "learning_rate": 5.86928302682472e-06,
      "loss": 0.0187,
      "step": 1273780
    },
    {
      "epoch": 2.0846016378311503,
      "grad_norm": 0.6256493926048279,
      "learning_rate": 5.869217134611204e-06,
      "loss": 0.0132,
      "step": 1273800
    },
    {
      "epoch": 2.0846343682698034,
      "grad_norm": 0.11491473764181137,
      "learning_rate": 5.869151242397686e-06,
      "loss": 0.0071,
      "step": 1273820
    },
    {
      "epoch": 2.084667098708457,
      "grad_norm": 0.11994671076536179,
      "learning_rate": 5.869085350184169e-06,
      "loss": 0.0095,
      "step": 1273840
    },
    {
      "epoch": 2.08469982914711,
      "grad_norm": 0.6991182565689087,
      "learning_rate": 5.869019457970651e-06,
      "loss": 0.0138,
      "step": 1273860
    },
    {
      "epoch": 2.0847325595857638,
      "grad_norm": 0.28721702098846436,
      "learning_rate": 5.868953565757135e-06,
      "loss": 0.0234,
      "step": 1273880
    },
    {
      "epoch": 2.084765290024417,
      "grad_norm": 0.20083048939704895,
      "learning_rate": 5.868887673543618e-06,
      "loss": 0.0105,
      "step": 1273900
    },
    {
      "epoch": 2.08479802046307,
      "grad_norm": 0.3493310809135437,
      "learning_rate": 5.8688217813301e-06,
      "loss": 0.0126,
      "step": 1273920
    },
    {
      "epoch": 2.0848307509017237,
      "grad_norm": 0.6065242290496826,
      "learning_rate": 5.868755889116584e-06,
      "loss": 0.0158,
      "step": 1273940
    },
    {
      "epoch": 2.084863481340377,
      "grad_norm": 0.12164711207151413,
      "learning_rate": 5.868689996903067e-06,
      "loss": 0.0196,
      "step": 1273960
    },
    {
      "epoch": 2.0848962117790304,
      "grad_norm": 0.37477025389671326,
      "learning_rate": 5.868624104689549e-06,
      "loss": 0.0177,
      "step": 1273980
    },
    {
      "epoch": 2.0849289422176835,
      "grad_norm": 0.33808720111846924,
      "learning_rate": 5.868558212476032e-06,
      "loss": 0.0142,
      "step": 1274000
    },
    {
      "epoch": 2.084961672656337,
      "grad_norm": 0.5314503312110901,
      "learning_rate": 5.868492320262516e-06,
      "loss": 0.0181,
      "step": 1274020
    },
    {
      "epoch": 2.0849944030949903,
      "grad_norm": 0.4666456878185272,
      "learning_rate": 5.868426428048998e-06,
      "loss": 0.0148,
      "step": 1274040
    },
    {
      "epoch": 2.0850271335336434,
      "grad_norm": 0.09782123565673828,
      "learning_rate": 5.868360535835481e-06,
      "loss": 0.012,
      "step": 1274060
    },
    {
      "epoch": 2.085059863972297,
      "grad_norm": 0.2321586310863495,
      "learning_rate": 5.868294643621963e-06,
      "loss": 0.0108,
      "step": 1274080
    },
    {
      "epoch": 2.08509259441095,
      "grad_norm": 0.10626627504825592,
      "learning_rate": 5.868228751408447e-06,
      "loss": 0.014,
      "step": 1274100
    },
    {
      "epoch": 2.085125324849604,
      "grad_norm": 0.15664230287075043,
      "learning_rate": 5.8681628591949294e-06,
      "loss": 0.0206,
      "step": 1274120
    },
    {
      "epoch": 2.085158055288257,
      "grad_norm": 0.8938397765159607,
      "learning_rate": 5.868096966981412e-06,
      "loss": 0.0182,
      "step": 1274140
    },
    {
      "epoch": 2.0851907857269105,
      "grad_norm": 0.8666871190071106,
      "learning_rate": 5.868031074767895e-06,
      "loss": 0.0186,
      "step": 1274160
    },
    {
      "epoch": 2.0852235161655637,
      "grad_norm": 1.6498937606811523,
      "learning_rate": 5.8679651825543785e-06,
      "loss": 0.0172,
      "step": 1274180
    },
    {
      "epoch": 2.085256246604217,
      "grad_norm": 0.3583109378814697,
      "learning_rate": 5.86789929034086e-06,
      "loss": 0.0211,
      "step": 1274200
    },
    {
      "epoch": 2.0852889770428704,
      "grad_norm": 0.7332667708396912,
      "learning_rate": 5.867833398127344e-06,
      "loss": 0.0145,
      "step": 1274220
    },
    {
      "epoch": 2.0853217074815236,
      "grad_norm": 0.3515462577342987,
      "learning_rate": 5.867767505913826e-06,
      "loss": 0.0137,
      "step": 1274240
    },
    {
      "epoch": 2.085354437920177,
      "grad_norm": 0.33905065059661865,
      "learning_rate": 5.8677016137003095e-06,
      "loss": 0.0116,
      "step": 1274260
    },
    {
      "epoch": 2.0853871683588303,
      "grad_norm": 0.18433262407779694,
      "learning_rate": 5.867635721486792e-06,
      "loss": 0.0141,
      "step": 1274280
    },
    {
      "epoch": 2.0854198987974835,
      "grad_norm": 0.25821730494499207,
      "learning_rate": 5.867569829273275e-06,
      "loss": 0.0116,
      "step": 1274300
    },
    {
      "epoch": 2.085452629236137,
      "grad_norm": 0.17599651217460632,
      "learning_rate": 5.8675039370597585e-06,
      "loss": 0.0129,
      "step": 1274320
    },
    {
      "epoch": 2.08548535967479,
      "grad_norm": 0.506546139717102,
      "learning_rate": 5.867438044846241e-06,
      "loss": 0.0131,
      "step": 1274340
    },
    {
      "epoch": 2.085518090113444,
      "grad_norm": 1.5196069478988647,
      "learning_rate": 5.867372152632724e-06,
      "loss": 0.0206,
      "step": 1274360
    },
    {
      "epoch": 2.085550820552097,
      "grad_norm": 1.1809314489364624,
      "learning_rate": 5.867306260419207e-06,
      "loss": 0.0217,
      "step": 1274380
    },
    {
      "epoch": 2.0855835509907505,
      "grad_norm": 0.19510558247566223,
      "learning_rate": 5.86724036820569e-06,
      "loss": 0.0155,
      "step": 1274400
    },
    {
      "epoch": 2.0856162814294037,
      "grad_norm": 0.2441401183605194,
      "learning_rate": 5.867174475992172e-06,
      "loss": 0.0159,
      "step": 1274420
    },
    {
      "epoch": 2.085649011868057,
      "grad_norm": 0.5358350276947021,
      "learning_rate": 5.867108583778656e-06,
      "loss": 0.0111,
      "step": 1274440
    },
    {
      "epoch": 2.0856817423067104,
      "grad_norm": 0.6089823246002197,
      "learning_rate": 5.867042691565138e-06,
      "loss": 0.012,
      "step": 1274460
    },
    {
      "epoch": 2.0857144727453636,
      "grad_norm": 0.4391784369945526,
      "learning_rate": 5.866976799351621e-06,
      "loss": 0.0171,
      "step": 1274480
    },
    {
      "epoch": 2.085747203184017,
      "grad_norm": 0.39902955293655396,
      "learning_rate": 5.866910907138104e-06,
      "loss": 0.0165,
      "step": 1274500
    },
    {
      "epoch": 2.0857799336226703,
      "grad_norm": 0.6047927141189575,
      "learning_rate": 5.866845014924587e-06,
      "loss": 0.0143,
      "step": 1274520
    },
    {
      "epoch": 2.085812664061324,
      "grad_norm": 0.32239580154418945,
      "learning_rate": 5.8667791227110695e-06,
      "loss": 0.0115,
      "step": 1274540
    },
    {
      "epoch": 2.085845394499977,
      "grad_norm": 0.14581207931041718,
      "learning_rate": 5.866713230497553e-06,
      "loss": 0.016,
      "step": 1274560
    },
    {
      "epoch": 2.0858781249386302,
      "grad_norm": 0.2533189058303833,
      "learning_rate": 5.866647338284035e-06,
      "loss": 0.019,
      "step": 1274580
    },
    {
      "epoch": 2.085910855377284,
      "grad_norm": 0.2040114849805832,
      "learning_rate": 5.8665814460705186e-06,
      "loss": 0.0139,
      "step": 1274600
    },
    {
      "epoch": 2.085943585815937,
      "grad_norm": 0.6095834970474243,
      "learning_rate": 5.8665155538570005e-06,
      "loss": 0.0188,
      "step": 1274620
    },
    {
      "epoch": 2.0859763162545906,
      "grad_norm": 0.2769196033477783,
      "learning_rate": 5.866449661643484e-06,
      "loss": 0.0166,
      "step": 1274640
    },
    {
      "epoch": 2.0860090466932437,
      "grad_norm": 0.49005597829818726,
      "learning_rate": 5.866383769429968e-06,
      "loss": 0.0192,
      "step": 1274660
    },
    {
      "epoch": 2.0860417771318973,
      "grad_norm": 0.19451472163200378,
      "learning_rate": 5.8663178772164495e-06,
      "loss": 0.0115,
      "step": 1274680
    },
    {
      "epoch": 2.0860745075705505,
      "grad_norm": 0.41341543197631836,
      "learning_rate": 5.866251985002933e-06,
      "loss": 0.0145,
      "step": 1274700
    },
    {
      "epoch": 2.0861072380092036,
      "grad_norm": 0.27260375022888184,
      "learning_rate": 5.866186092789415e-06,
      "loss": 0.0197,
      "step": 1274720
    },
    {
      "epoch": 2.086139968447857,
      "grad_norm": 0.4302944540977478,
      "learning_rate": 5.866120200575899e-06,
      "loss": 0.0142,
      "step": 1274740
    },
    {
      "epoch": 2.0861726988865104,
      "grad_norm": 1.1513737440109253,
      "learning_rate": 5.866054308362381e-06,
      "loss": 0.0151,
      "step": 1274760
    },
    {
      "epoch": 2.086205429325164,
      "grad_norm": 1.6742486953735352,
      "learning_rate": 5.865988416148864e-06,
      "loss": 0.0198,
      "step": 1274780
    },
    {
      "epoch": 2.086238159763817,
      "grad_norm": 0.3241840898990631,
      "learning_rate": 5.865922523935347e-06,
      "loss": 0.0171,
      "step": 1274800
    },
    {
      "epoch": 2.0862708902024707,
      "grad_norm": 0.626352846622467,
      "learning_rate": 5.86585663172183e-06,
      "loss": 0.0169,
      "step": 1274820
    },
    {
      "epoch": 2.086303620641124,
      "grad_norm": 0.32825884222984314,
      "learning_rate": 5.865790739508312e-06,
      "loss": 0.0226,
      "step": 1274840
    },
    {
      "epoch": 2.086336351079777,
      "grad_norm": 0.8555274605751038,
      "learning_rate": 5.865724847294796e-06,
      "loss": 0.0144,
      "step": 1274860
    },
    {
      "epoch": 2.0863690815184306,
      "grad_norm": 0.22444617748260498,
      "learning_rate": 5.865658955081278e-06,
      "loss": 0.0175,
      "step": 1274880
    },
    {
      "epoch": 2.0864018119570837,
      "grad_norm": 0.49455612897872925,
      "learning_rate": 5.865593062867761e-06,
      "loss": 0.0174,
      "step": 1274900
    },
    {
      "epoch": 2.0864345423957373,
      "grad_norm": 0.22759994864463806,
      "learning_rate": 5.865527170654244e-06,
      "loss": 0.0212,
      "step": 1274920
    },
    {
      "epoch": 2.0864672728343905,
      "grad_norm": 0.7137231230735779,
      "learning_rate": 5.865461278440727e-06,
      "loss": 0.0183,
      "step": 1274940
    },
    {
      "epoch": 2.086500003273044,
      "grad_norm": 1.021710753440857,
      "learning_rate": 5.86539538622721e-06,
      "loss": 0.014,
      "step": 1274960
    },
    {
      "epoch": 2.0865327337116972,
      "grad_norm": 0.17339035868644714,
      "learning_rate": 5.865329494013693e-06,
      "loss": 0.0162,
      "step": 1274980
    },
    {
      "epoch": 2.0865654641503504,
      "grad_norm": 0.662142813205719,
      "learning_rate": 5.865263601800176e-06,
      "loss": 0.0147,
      "step": 1275000
    },
    {
      "epoch": 2.086598194589004,
      "grad_norm": 0.2153409719467163,
      "learning_rate": 5.865197709586659e-06,
      "loss": 0.0214,
      "step": 1275020
    },
    {
      "epoch": 2.086630925027657,
      "grad_norm": 0.28827187418937683,
      "learning_rate": 5.865131817373142e-06,
      "loss": 0.0272,
      "step": 1275040
    },
    {
      "epoch": 2.0866636554663107,
      "grad_norm": 0.1796947717666626,
      "learning_rate": 5.865065925159624e-06,
      "loss": 0.0213,
      "step": 1275060
    },
    {
      "epoch": 2.086696385904964,
      "grad_norm": 0.09366734325885773,
      "learning_rate": 5.865000032946108e-06,
      "loss": 0.0137,
      "step": 1275080
    },
    {
      "epoch": 2.0867291163436175,
      "grad_norm": 0.3930699825286865,
      "learning_rate": 5.86493414073259e-06,
      "loss": 0.0227,
      "step": 1275100
    },
    {
      "epoch": 2.0867618467822706,
      "grad_norm": 0.3081634044647217,
      "learning_rate": 5.864868248519073e-06,
      "loss": 0.0165,
      "step": 1275120
    },
    {
      "epoch": 2.0867945772209238,
      "grad_norm": 0.9105503559112549,
      "learning_rate": 5.864802356305556e-06,
      "loss": 0.0171,
      "step": 1275140
    },
    {
      "epoch": 2.0868273076595774,
      "grad_norm": 0.9662047624588013,
      "learning_rate": 5.864736464092039e-06,
      "loss": 0.0152,
      "step": 1275160
    },
    {
      "epoch": 2.0868600380982305,
      "grad_norm": 0.12497483938932419,
      "learning_rate": 5.864670571878521e-06,
      "loss": 0.0169,
      "step": 1275180
    },
    {
      "epoch": 2.086892768536884,
      "grad_norm": 1.003597378730774,
      "learning_rate": 5.864604679665005e-06,
      "loss": 0.0144,
      "step": 1275200
    },
    {
      "epoch": 2.0869254989755373,
      "grad_norm": 0.3630266487598419,
      "learning_rate": 5.864538787451487e-06,
      "loss": 0.0151,
      "step": 1275220
    },
    {
      "epoch": 2.0869582294141904,
      "grad_norm": 0.2969370186328888,
      "learning_rate": 5.8644728952379705e-06,
      "loss": 0.0156,
      "step": 1275240
    },
    {
      "epoch": 2.086990959852844,
      "grad_norm": 0.16748934984207153,
      "learning_rate": 5.864407003024452e-06,
      "loss": 0.0127,
      "step": 1275260
    },
    {
      "epoch": 2.087023690291497,
      "grad_norm": 0.36102595925331116,
      "learning_rate": 5.864341110810936e-06,
      "loss": 0.0165,
      "step": 1275280
    },
    {
      "epoch": 2.0870564207301507,
      "grad_norm": 0.4165143370628357,
      "learning_rate": 5.864275218597419e-06,
      "loss": 0.0121,
      "step": 1275300
    },
    {
      "epoch": 2.087089151168804,
      "grad_norm": 0.10087034851312637,
      "learning_rate": 5.8642093263839014e-06,
      "loss": 0.0191,
      "step": 1275320
    },
    {
      "epoch": 2.0871218816074575,
      "grad_norm": 1.1892023086547852,
      "learning_rate": 5.864143434170385e-06,
      "loss": 0.015,
      "step": 1275340
    },
    {
      "epoch": 2.0871546120461106,
      "grad_norm": 0.6615235805511475,
      "learning_rate": 5.864077541956868e-06,
      "loss": 0.0181,
      "step": 1275360
    },
    {
      "epoch": 2.087187342484764,
      "grad_norm": 0.5424935817718506,
      "learning_rate": 5.8640116497433505e-06,
      "loss": 0.0116,
      "step": 1275380
    },
    {
      "epoch": 2.0872200729234174,
      "grad_norm": 0.21107110381126404,
      "learning_rate": 5.863945757529833e-06,
      "loss": 0.015,
      "step": 1275400
    },
    {
      "epoch": 2.0872528033620705,
      "grad_norm": 0.316650390625,
      "learning_rate": 5.863879865316317e-06,
      "loss": 0.0183,
      "step": 1275420
    },
    {
      "epoch": 2.087285533800724,
      "grad_norm": 0.2704281210899353,
      "learning_rate": 5.863813973102799e-06,
      "loss": 0.0104,
      "step": 1275440
    },
    {
      "epoch": 2.0873182642393773,
      "grad_norm": 0.2970523536205292,
      "learning_rate": 5.863748080889282e-06,
      "loss": 0.0155,
      "step": 1275460
    },
    {
      "epoch": 2.087350994678031,
      "grad_norm": 0.6850955486297607,
      "learning_rate": 5.863682188675764e-06,
      "loss": 0.0171,
      "step": 1275480
    },
    {
      "epoch": 2.087383725116684,
      "grad_norm": 0.27237483859062195,
      "learning_rate": 5.863616296462248e-06,
      "loss": 0.0168,
      "step": 1275500
    },
    {
      "epoch": 2.087416455555337,
      "grad_norm": 0.27510181069374084,
      "learning_rate": 5.8635504042487305e-06,
      "loss": 0.0102,
      "step": 1275520
    },
    {
      "epoch": 2.0874491859939908,
      "grad_norm": 0.6169009208679199,
      "learning_rate": 5.863484512035213e-06,
      "loss": 0.0232,
      "step": 1275540
    },
    {
      "epoch": 2.087481916432644,
      "grad_norm": 1.3702826499938965,
      "learning_rate": 5.863418619821696e-06,
      "loss": 0.0182,
      "step": 1275560
    },
    {
      "epoch": 2.0875146468712975,
      "grad_norm": 0.2509567141532898,
      "learning_rate": 5.86335272760818e-06,
      "loss": 0.0208,
      "step": 1275580
    },
    {
      "epoch": 2.0875473773099507,
      "grad_norm": 1.039642572402954,
      "learning_rate": 5.8632868353946615e-06,
      "loss": 0.0186,
      "step": 1275600
    },
    {
      "epoch": 2.0875801077486043,
      "grad_norm": 0.5751798748970032,
      "learning_rate": 5.863220943181145e-06,
      "loss": 0.0214,
      "step": 1275620
    },
    {
      "epoch": 2.0876128381872574,
      "grad_norm": 0.15101750195026398,
      "learning_rate": 5.863155050967627e-06,
      "loss": 0.0199,
      "step": 1275640
    },
    {
      "epoch": 2.0876455686259106,
      "grad_norm": 0.3427312970161438,
      "learning_rate": 5.8630891587541106e-06,
      "loss": 0.0166,
      "step": 1275660
    },
    {
      "epoch": 2.087678299064564,
      "grad_norm": 0.23890982568264008,
      "learning_rate": 5.8630232665405925e-06,
      "loss": 0.0178,
      "step": 1275680
    },
    {
      "epoch": 2.0877110295032173,
      "grad_norm": 0.37612631916999817,
      "learning_rate": 5.862957374327076e-06,
      "loss": 0.0178,
      "step": 1275700
    },
    {
      "epoch": 2.087743759941871,
      "grad_norm": 0.4413585662841797,
      "learning_rate": 5.86289148211356e-06,
      "loss": 0.0126,
      "step": 1275720
    },
    {
      "epoch": 2.087776490380524,
      "grad_norm": 0.4491742253303528,
      "learning_rate": 5.8628255899000415e-06,
      "loss": 0.0202,
      "step": 1275740
    },
    {
      "epoch": 2.087809220819177,
      "grad_norm": 0.2013969123363495,
      "learning_rate": 5.862759697686525e-06,
      "loss": 0.0157,
      "step": 1275760
    },
    {
      "epoch": 2.087841951257831,
      "grad_norm": 1.1382683515548706,
      "learning_rate": 5.862693805473008e-06,
      "loss": 0.0122,
      "step": 1275780
    },
    {
      "epoch": 2.087874681696484,
      "grad_norm": 0.3088255524635315,
      "learning_rate": 5.862627913259491e-06,
      "loss": 0.0115,
      "step": 1275800
    },
    {
      "epoch": 2.0879074121351375,
      "grad_norm": 0.3997189998626709,
      "learning_rate": 5.862562021045973e-06,
      "loss": 0.0107,
      "step": 1275820
    },
    {
      "epoch": 2.0879401425737907,
      "grad_norm": 0.22179661691188812,
      "learning_rate": 5.862496128832457e-06,
      "loss": 0.0155,
      "step": 1275840
    },
    {
      "epoch": 2.0879728730124443,
      "grad_norm": 0.3546818494796753,
      "learning_rate": 5.862430236618939e-06,
      "loss": 0.0225,
      "step": 1275860
    },
    {
      "epoch": 2.0880056034510974,
      "grad_norm": 0.050631921738386154,
      "learning_rate": 5.862364344405422e-06,
      "loss": 0.0139,
      "step": 1275880
    },
    {
      "epoch": 2.0880383338897506,
      "grad_norm": 0.12504881620407104,
      "learning_rate": 5.862298452191904e-06,
      "loss": 0.0163,
      "step": 1275900
    },
    {
      "epoch": 2.088071064328404,
      "grad_norm": 0.23611515760421753,
      "learning_rate": 5.862232559978388e-06,
      "loss": 0.013,
      "step": 1275920
    },
    {
      "epoch": 2.0881037947670573,
      "grad_norm": 0.40164244174957275,
      "learning_rate": 5.862166667764871e-06,
      "loss": 0.0247,
      "step": 1275940
    },
    {
      "epoch": 2.088136525205711,
      "grad_norm": 0.15320022404193878,
      "learning_rate": 5.862100775551353e-06,
      "loss": 0.0117,
      "step": 1275960
    },
    {
      "epoch": 2.088169255644364,
      "grad_norm": 0.21781182289123535,
      "learning_rate": 5.862034883337836e-06,
      "loss": 0.0196,
      "step": 1275980
    },
    {
      "epoch": 2.0882019860830177,
      "grad_norm": 0.5216600894927979,
      "learning_rate": 5.86196899112432e-06,
      "loss": 0.0088,
      "step": 1276000
    },
    {
      "epoch": 2.088234716521671,
      "grad_norm": 0.2562744617462158,
      "learning_rate": 5.8619030989108016e-06,
      "loss": 0.0152,
      "step": 1276020
    },
    {
      "epoch": 2.088267446960324,
      "grad_norm": 0.17471151053905487,
      "learning_rate": 5.861837206697285e-06,
      "loss": 0.0129,
      "step": 1276040
    },
    {
      "epoch": 2.0883001773989776,
      "grad_norm": 0.12411905825138092,
      "learning_rate": 5.861771314483769e-06,
      "loss": 0.0211,
      "step": 1276060
    },
    {
      "epoch": 2.0883329078376307,
      "grad_norm": 0.3510993421077728,
      "learning_rate": 5.861705422270251e-06,
      "loss": 0.0132,
      "step": 1276080
    },
    {
      "epoch": 2.0883656382762843,
      "grad_norm": 0.21640850603580475,
      "learning_rate": 5.861639530056734e-06,
      "loss": 0.0157,
      "step": 1276100
    },
    {
      "epoch": 2.0883983687149374,
      "grad_norm": 0.5901086926460266,
      "learning_rate": 5.861573637843216e-06,
      "loss": 0.0247,
      "step": 1276120
    },
    {
      "epoch": 2.088431099153591,
      "grad_norm": 0.9176366329193115,
      "learning_rate": 5.8615077456297e-06,
      "loss": 0.0177,
      "step": 1276140
    },
    {
      "epoch": 2.088463829592244,
      "grad_norm": 0.18358181416988373,
      "learning_rate": 5.8614418534161824e-06,
      "loss": 0.0134,
      "step": 1276160
    },
    {
      "epoch": 2.0884965600308973,
      "grad_norm": 0.23783470690250397,
      "learning_rate": 5.861375961202665e-06,
      "loss": 0.0151,
      "step": 1276180
    },
    {
      "epoch": 2.088529290469551,
      "grad_norm": 0.7788260579109192,
      "learning_rate": 5.861310068989148e-06,
      "loss": 0.0135,
      "step": 1276200
    },
    {
      "epoch": 2.088562020908204,
      "grad_norm": 0.15636026859283447,
      "learning_rate": 5.8612441767756315e-06,
      "loss": 0.0153,
      "step": 1276220
    },
    {
      "epoch": 2.0885947513468577,
      "grad_norm": 0.4625892639160156,
      "learning_rate": 5.861178284562113e-06,
      "loss": 0.0187,
      "step": 1276240
    },
    {
      "epoch": 2.088627481785511,
      "grad_norm": 0.8252003192901611,
      "learning_rate": 5.861112392348597e-06,
      "loss": 0.0136,
      "step": 1276260
    },
    {
      "epoch": 2.0886602122241644,
      "grad_norm": 0.24364925920963287,
      "learning_rate": 5.861046500135079e-06,
      "loss": 0.0201,
      "step": 1276280
    },
    {
      "epoch": 2.0886929426628176,
      "grad_norm": 0.21157903969287872,
      "learning_rate": 5.8609806079215625e-06,
      "loss": 0.0133,
      "step": 1276300
    },
    {
      "epoch": 2.0887256731014707,
      "grad_norm": 0.41184133291244507,
      "learning_rate": 5.860914715708045e-06,
      "loss": 0.0165,
      "step": 1276320
    },
    {
      "epoch": 2.0887584035401243,
      "grad_norm": 0.8760010004043579,
      "learning_rate": 5.860848823494528e-06,
      "loss": 0.0211,
      "step": 1276340
    },
    {
      "epoch": 2.0887911339787775,
      "grad_norm": 0.13378746807575226,
      "learning_rate": 5.860782931281011e-06,
      "loss": 0.0109,
      "step": 1276360
    },
    {
      "epoch": 2.088823864417431,
      "grad_norm": 0.4875287413597107,
      "learning_rate": 5.860717039067494e-06,
      "loss": 0.0145,
      "step": 1276380
    },
    {
      "epoch": 2.088856594856084,
      "grad_norm": 0.3471238613128662,
      "learning_rate": 5.860651146853977e-06,
      "loss": 0.0147,
      "step": 1276400
    },
    {
      "epoch": 2.088889325294738,
      "grad_norm": 0.498054176568985,
      "learning_rate": 5.86058525464046e-06,
      "loss": 0.0195,
      "step": 1276420
    },
    {
      "epoch": 2.088922055733391,
      "grad_norm": 1.1370776891708374,
      "learning_rate": 5.860519362426943e-06,
      "loss": 0.016,
      "step": 1276440
    },
    {
      "epoch": 2.088954786172044,
      "grad_norm": 0.35500970482826233,
      "learning_rate": 5.860453470213425e-06,
      "loss": 0.0194,
      "step": 1276460
    },
    {
      "epoch": 2.0889875166106977,
      "grad_norm": 0.42467400431632996,
      "learning_rate": 5.860387577999909e-06,
      "loss": 0.017,
      "step": 1276480
    },
    {
      "epoch": 2.089020247049351,
      "grad_norm": 0.7156652212142944,
      "learning_rate": 5.860321685786391e-06,
      "loss": 0.0117,
      "step": 1276500
    },
    {
      "epoch": 2.0890529774880044,
      "grad_norm": 0.2844316065311432,
      "learning_rate": 5.860255793572874e-06,
      "loss": 0.0145,
      "step": 1276520
    },
    {
      "epoch": 2.0890857079266576,
      "grad_norm": 0.8317618370056152,
      "learning_rate": 5.860189901359357e-06,
      "loss": 0.0138,
      "step": 1276540
    },
    {
      "epoch": 2.089118438365311,
      "grad_norm": 0.13901519775390625,
      "learning_rate": 5.86012400914584e-06,
      "loss": 0.0137,
      "step": 1276560
    },
    {
      "epoch": 2.0891511688039643,
      "grad_norm": 0.38893744349479675,
      "learning_rate": 5.8600581169323225e-06,
      "loss": 0.0101,
      "step": 1276580
    },
    {
      "epoch": 2.0891838992426175,
      "grad_norm": 0.26821839809417725,
      "learning_rate": 5.859992224718806e-06,
      "loss": 0.0163,
      "step": 1276600
    },
    {
      "epoch": 2.089216629681271,
      "grad_norm": 0.414202481508255,
      "learning_rate": 5.859926332505288e-06,
      "loss": 0.0163,
      "step": 1276620
    },
    {
      "epoch": 2.0892493601199242,
      "grad_norm": 0.46928495168685913,
      "learning_rate": 5.859860440291772e-06,
      "loss": 0.0175,
      "step": 1276640
    },
    {
      "epoch": 2.089282090558578,
      "grad_norm": 0.11613795161247253,
      "learning_rate": 5.8597945480782535e-06,
      "loss": 0.0104,
      "step": 1276660
    },
    {
      "epoch": 2.089314820997231,
      "grad_norm": 0.4964672029018402,
      "learning_rate": 5.859728655864737e-06,
      "loss": 0.0145,
      "step": 1276680
    },
    {
      "epoch": 2.089347551435884,
      "grad_norm": 0.4720194637775421,
      "learning_rate": 5.859662763651219e-06,
      "loss": 0.0136,
      "step": 1276700
    },
    {
      "epoch": 2.0893802818745377,
      "grad_norm": 0.19126519560813904,
      "learning_rate": 5.8595968714377025e-06,
      "loss": 0.0132,
      "step": 1276720
    },
    {
      "epoch": 2.089413012313191,
      "grad_norm": 0.45908135175704956,
      "learning_rate": 5.859530979224185e-06,
      "loss": 0.0113,
      "step": 1276740
    },
    {
      "epoch": 2.0894457427518445,
      "grad_norm": 0.14496979117393494,
      "learning_rate": 5.859465087010668e-06,
      "loss": 0.0147,
      "step": 1276760
    },
    {
      "epoch": 2.0894784731904976,
      "grad_norm": 0.2989839017391205,
      "learning_rate": 5.859399194797152e-06,
      "loss": 0.0105,
      "step": 1276780
    },
    {
      "epoch": 2.089511203629151,
      "grad_norm": 0.13561731576919556,
      "learning_rate": 5.859333302583634e-06,
      "loss": 0.0178,
      "step": 1276800
    },
    {
      "epoch": 2.0895439340678044,
      "grad_norm": 0.4728669226169586,
      "learning_rate": 5.859267410370117e-06,
      "loss": 0.0172,
      "step": 1276820
    },
    {
      "epoch": 2.0895766645064575,
      "grad_norm": 0.43569886684417725,
      "learning_rate": 5.8592015181566e-06,
      "loss": 0.0103,
      "step": 1276840
    },
    {
      "epoch": 2.089609394945111,
      "grad_norm": 0.08261094242334366,
      "learning_rate": 5.859135625943083e-06,
      "loss": 0.0135,
      "step": 1276860
    },
    {
      "epoch": 2.0896421253837643,
      "grad_norm": 0.3903954029083252,
      "learning_rate": 5.859069733729565e-06,
      "loss": 0.0153,
      "step": 1276880
    },
    {
      "epoch": 2.089674855822418,
      "grad_norm": 0.17918992042541504,
      "learning_rate": 5.859003841516049e-06,
      "loss": 0.0136,
      "step": 1276900
    },
    {
      "epoch": 2.089707586261071,
      "grad_norm": 0.22984398901462555,
      "learning_rate": 5.858937949302531e-06,
      "loss": 0.0192,
      "step": 1276920
    },
    {
      "epoch": 2.0897403166997246,
      "grad_norm": 0.549484372138977,
      "learning_rate": 5.858872057089014e-06,
      "loss": 0.024,
      "step": 1276940
    },
    {
      "epoch": 2.0897730471383777,
      "grad_norm": 0.48878607153892517,
      "learning_rate": 5.858806164875497e-06,
      "loss": 0.0141,
      "step": 1276960
    },
    {
      "epoch": 2.089805777577031,
      "grad_norm": 0.6314589381217957,
      "learning_rate": 5.85874027266198e-06,
      "loss": 0.0115,
      "step": 1276980
    },
    {
      "epoch": 2.0898385080156845,
      "grad_norm": 0.5667577981948853,
      "learning_rate": 5.858674380448463e-06,
      "loss": 0.0173,
      "step": 1277000
    },
    {
      "epoch": 2.0898712384543376,
      "grad_norm": 0.8574506044387817,
      "learning_rate": 5.858608488234946e-06,
      "loss": 0.018,
      "step": 1277020
    },
    {
      "epoch": 2.0899039688929912,
      "grad_norm": 0.18149887025356293,
      "learning_rate": 5.858542596021428e-06,
      "loss": 0.0138,
      "step": 1277040
    },
    {
      "epoch": 2.0899366993316444,
      "grad_norm": 0.2904159724712372,
      "learning_rate": 5.858476703807912e-06,
      "loss": 0.0175,
      "step": 1277060
    },
    {
      "epoch": 2.089969429770298,
      "grad_norm": 0.32905063033103943,
      "learning_rate": 5.8584108115943935e-06,
      "loss": 0.0153,
      "step": 1277080
    },
    {
      "epoch": 2.090002160208951,
      "grad_norm": 0.6299390196800232,
      "learning_rate": 5.858344919380877e-06,
      "loss": 0.0155,
      "step": 1277100
    },
    {
      "epoch": 2.0900348906476043,
      "grad_norm": 0.8249020576477051,
      "learning_rate": 5.858279027167361e-06,
      "loss": 0.0117,
      "step": 1277120
    },
    {
      "epoch": 2.090067621086258,
      "grad_norm": 0.4677492678165436,
      "learning_rate": 5.858213134953843e-06,
      "loss": 0.0205,
      "step": 1277140
    },
    {
      "epoch": 2.090100351524911,
      "grad_norm": 0.5504429340362549,
      "learning_rate": 5.858147242740326e-06,
      "loss": 0.0181,
      "step": 1277160
    },
    {
      "epoch": 2.0901330819635646,
      "grad_norm": 0.4623733460903168,
      "learning_rate": 5.858081350526809e-06,
      "loss": 0.0118,
      "step": 1277180
    },
    {
      "epoch": 2.0901658124022178,
      "grad_norm": 0.29534420371055603,
      "learning_rate": 5.858015458313292e-06,
      "loss": 0.0106,
      "step": 1277200
    },
    {
      "epoch": 2.0901985428408714,
      "grad_norm": 0.27878162264823914,
      "learning_rate": 5.857949566099774e-06,
      "loss": 0.0139,
      "step": 1277220
    },
    {
      "epoch": 2.0902312732795245,
      "grad_norm": 0.36740007996559143,
      "learning_rate": 5.857883673886258e-06,
      "loss": 0.0165,
      "step": 1277240
    },
    {
      "epoch": 2.0902640037181777,
      "grad_norm": 0.6695159673690796,
      "learning_rate": 5.85781778167274e-06,
      "loss": 0.015,
      "step": 1277260
    },
    {
      "epoch": 2.0902967341568313,
      "grad_norm": 0.3576164245605469,
      "learning_rate": 5.8577518894592235e-06,
      "loss": 0.0107,
      "step": 1277280
    },
    {
      "epoch": 2.0903294645954844,
      "grad_norm": 0.7010087966918945,
      "learning_rate": 5.857685997245705e-06,
      "loss": 0.0251,
      "step": 1277300
    },
    {
      "epoch": 2.090362195034138,
      "grad_norm": 0.4288761615753174,
      "learning_rate": 5.857620105032189e-06,
      "loss": 0.0157,
      "step": 1277320
    },
    {
      "epoch": 2.090394925472791,
      "grad_norm": 0.41375067830085754,
      "learning_rate": 5.857554212818672e-06,
      "loss": 0.0138,
      "step": 1277340
    },
    {
      "epoch": 2.0904276559114443,
      "grad_norm": 0.4771285057067871,
      "learning_rate": 5.8574883206051544e-06,
      "loss": 0.013,
      "step": 1277360
    },
    {
      "epoch": 2.090460386350098,
      "grad_norm": 0.0976470336318016,
      "learning_rate": 5.857422428391637e-06,
      "loss": 0.0141,
      "step": 1277380
    },
    {
      "epoch": 2.090493116788751,
      "grad_norm": 0.8926419019699097,
      "learning_rate": 5.857356536178121e-06,
      "loss": 0.0142,
      "step": 1277400
    },
    {
      "epoch": 2.0905258472274046,
      "grad_norm": 0.8334395885467529,
      "learning_rate": 5.857290643964603e-06,
      "loss": 0.0159,
      "step": 1277420
    },
    {
      "epoch": 2.090558577666058,
      "grad_norm": 0.43557992577552795,
      "learning_rate": 5.857224751751086e-06,
      "loss": 0.0173,
      "step": 1277440
    },
    {
      "epoch": 2.0905913081047114,
      "grad_norm": 0.8334431648254395,
      "learning_rate": 5.85715885953757e-06,
      "loss": 0.0113,
      "step": 1277460
    },
    {
      "epoch": 2.0906240385433645,
      "grad_norm": 0.39552322030067444,
      "learning_rate": 5.857092967324052e-06,
      "loss": 0.0093,
      "step": 1277480
    },
    {
      "epoch": 2.0906567689820177,
      "grad_norm": 0.7892201542854309,
      "learning_rate": 5.857027075110535e-06,
      "loss": 0.0182,
      "step": 1277500
    },
    {
      "epoch": 2.0906894994206713,
      "grad_norm": 0.5377392172813416,
      "learning_rate": 5.856961182897017e-06,
      "loss": 0.0181,
      "step": 1277520
    },
    {
      "epoch": 2.0907222298593244,
      "grad_norm": 0.6231152415275574,
      "learning_rate": 5.856895290683501e-06,
      "loss": 0.0122,
      "step": 1277540
    },
    {
      "epoch": 2.090754960297978,
      "grad_norm": 0.09285975247621536,
      "learning_rate": 5.856829398469983e-06,
      "loss": 0.0176,
      "step": 1277560
    },
    {
      "epoch": 2.090787690736631,
      "grad_norm": 0.5660173892974854,
      "learning_rate": 5.856763506256466e-06,
      "loss": 0.0137,
      "step": 1277580
    },
    {
      "epoch": 2.0908204211752848,
      "grad_norm": 0.22370894253253937,
      "learning_rate": 5.856697614042949e-06,
      "loss": 0.013,
      "step": 1277600
    },
    {
      "epoch": 2.090853151613938,
      "grad_norm": 0.6753221750259399,
      "learning_rate": 5.856631721829433e-06,
      "loss": 0.0207,
      "step": 1277620
    },
    {
      "epoch": 2.090885882052591,
      "grad_norm": 0.578253984451294,
      "learning_rate": 5.8565658296159145e-06,
      "loss": 0.0152,
      "step": 1277640
    },
    {
      "epoch": 2.0909186124912447,
      "grad_norm": 0.40024030208587646,
      "learning_rate": 5.856499937402398e-06,
      "loss": 0.015,
      "step": 1277660
    },
    {
      "epoch": 2.090951342929898,
      "grad_norm": 0.452983558177948,
      "learning_rate": 5.85643404518888e-06,
      "loss": 0.0152,
      "step": 1277680
    },
    {
      "epoch": 2.0909840733685514,
      "grad_norm": 0.22565826773643494,
      "learning_rate": 5.8563681529753636e-06,
      "loss": 0.0142,
      "step": 1277700
    },
    {
      "epoch": 2.0910168038072046,
      "grad_norm": 0.3165441155433655,
      "learning_rate": 5.8563022607618455e-06,
      "loss": 0.0194,
      "step": 1277720
    },
    {
      "epoch": 2.091049534245858,
      "grad_norm": 0.35354265570640564,
      "learning_rate": 5.856236368548329e-06,
      "loss": 0.0127,
      "step": 1277740
    },
    {
      "epoch": 2.0910822646845113,
      "grad_norm": 0.25727251172065735,
      "learning_rate": 5.856170476334812e-06,
      "loss": 0.0125,
      "step": 1277760
    },
    {
      "epoch": 2.0911149951231645,
      "grad_norm": 0.7817932963371277,
      "learning_rate": 5.8561045841212945e-06,
      "loss": 0.0162,
      "step": 1277780
    },
    {
      "epoch": 2.091147725561818,
      "grad_norm": 0.310452938079834,
      "learning_rate": 5.856038691907777e-06,
      "loss": 0.0124,
      "step": 1277800
    },
    {
      "epoch": 2.091180456000471,
      "grad_norm": 0.35328811407089233,
      "learning_rate": 5.855972799694261e-06,
      "loss": 0.0171,
      "step": 1277820
    },
    {
      "epoch": 2.091213186439125,
      "grad_norm": 0.0662730410695076,
      "learning_rate": 5.855906907480744e-06,
      "loss": 0.016,
      "step": 1277840
    },
    {
      "epoch": 2.091245916877778,
      "grad_norm": 0.8259824514389038,
      "learning_rate": 5.855841015267226e-06,
      "loss": 0.015,
      "step": 1277860
    },
    {
      "epoch": 2.0912786473164315,
      "grad_norm": 0.47456660866737366,
      "learning_rate": 5.85577512305371e-06,
      "loss": 0.0128,
      "step": 1277880
    },
    {
      "epoch": 2.0913113777550847,
      "grad_norm": 0.23103360831737518,
      "learning_rate": 5.855709230840192e-06,
      "loss": 0.0143,
      "step": 1277900
    },
    {
      "epoch": 2.091344108193738,
      "grad_norm": 0.0922010987997055,
      "learning_rate": 5.855643338626675e-06,
      "loss": 0.0169,
      "step": 1277920
    },
    {
      "epoch": 2.0913768386323914,
      "grad_norm": 0.11236997693777084,
      "learning_rate": 5.855577446413157e-06,
      "loss": 0.0122,
      "step": 1277940
    },
    {
      "epoch": 2.0914095690710446,
      "grad_norm": 0.7694177031517029,
      "learning_rate": 5.855511554199641e-06,
      "loss": 0.0158,
      "step": 1277960
    },
    {
      "epoch": 2.091442299509698,
      "grad_norm": 0.3475803732872009,
      "learning_rate": 5.855445661986124e-06,
      "loss": 0.0149,
      "step": 1277980
    },
    {
      "epoch": 2.0914750299483513,
      "grad_norm": 1.0089797973632812,
      "learning_rate": 5.855379769772606e-06,
      "loss": 0.0172,
      "step": 1278000
    },
    {
      "epoch": 2.091507760387005,
      "grad_norm": 0.06874024122953415,
      "learning_rate": 5.855313877559089e-06,
      "loss": 0.011,
      "step": 1278020
    },
    {
      "epoch": 2.091540490825658,
      "grad_norm": 0.25515612959861755,
      "learning_rate": 5.855247985345573e-06,
      "loss": 0.0095,
      "step": 1278040
    },
    {
      "epoch": 2.091573221264311,
      "grad_norm": 0.2627376616001129,
      "learning_rate": 5.8551820931320546e-06,
      "loss": 0.0194,
      "step": 1278060
    },
    {
      "epoch": 2.091605951702965,
      "grad_norm": 0.2246808409690857,
      "learning_rate": 5.855116200918538e-06,
      "loss": 0.0153,
      "step": 1278080
    },
    {
      "epoch": 2.091638682141618,
      "grad_norm": 0.3068552315235138,
      "learning_rate": 5.85505030870502e-06,
      "loss": 0.0153,
      "step": 1278100
    },
    {
      "epoch": 2.0916714125802716,
      "grad_norm": 0.3063625991344452,
      "learning_rate": 5.854984416491504e-06,
      "loss": 0.0227,
      "step": 1278120
    },
    {
      "epoch": 2.0917041430189247,
      "grad_norm": 0.9052583575248718,
      "learning_rate": 5.854918524277986e-06,
      "loss": 0.0172,
      "step": 1278140
    },
    {
      "epoch": 2.0917368734575783,
      "grad_norm": 0.3908105492591858,
      "learning_rate": 5.854852632064469e-06,
      "loss": 0.0186,
      "step": 1278160
    },
    {
      "epoch": 2.0917696038962315,
      "grad_norm": 0.33049675822257996,
      "learning_rate": 5.854786739850953e-06,
      "loss": 0.0178,
      "step": 1278180
    },
    {
      "epoch": 2.0918023343348846,
      "grad_norm": 0.4721290171146393,
      "learning_rate": 5.8547208476374354e-06,
      "loss": 0.0114,
      "step": 1278200
    },
    {
      "epoch": 2.091835064773538,
      "grad_norm": 0.5796418786048889,
      "learning_rate": 5.854654955423918e-06,
      "loss": 0.0208,
      "step": 1278220
    },
    {
      "epoch": 2.0918677952121913,
      "grad_norm": 0.23151493072509766,
      "learning_rate": 5.854589063210401e-06,
      "loss": 0.0117,
      "step": 1278240
    },
    {
      "epoch": 2.091900525650845,
      "grad_norm": 0.1645645797252655,
      "learning_rate": 5.8545231709968845e-06,
      "loss": 0.0126,
      "step": 1278260
    },
    {
      "epoch": 2.091933256089498,
      "grad_norm": 0.2562015652656555,
      "learning_rate": 5.854457278783366e-06,
      "loss": 0.019,
      "step": 1278280
    },
    {
      "epoch": 2.0919659865281512,
      "grad_norm": 0.6164131760597229,
      "learning_rate": 5.85439138656985e-06,
      "loss": 0.0177,
      "step": 1278300
    },
    {
      "epoch": 2.091998716966805,
      "grad_norm": 0.17136940360069275,
      "learning_rate": 5.854325494356332e-06,
      "loss": 0.0241,
      "step": 1278320
    },
    {
      "epoch": 2.092031447405458,
      "grad_norm": 0.6352617144584656,
      "learning_rate": 5.8542596021428155e-06,
      "loss": 0.0121,
      "step": 1278340
    },
    {
      "epoch": 2.0920641778441116,
      "grad_norm": 0.35119399428367615,
      "learning_rate": 5.854193709929298e-06,
      "loss": 0.0122,
      "step": 1278360
    },
    {
      "epoch": 2.0920969082827647,
      "grad_norm": 0.16186916828155518,
      "learning_rate": 5.854127817715781e-06,
      "loss": 0.0207,
      "step": 1278380
    },
    {
      "epoch": 2.0921296387214183,
      "grad_norm": 0.29677608609199524,
      "learning_rate": 5.854061925502264e-06,
      "loss": 0.0172,
      "step": 1278400
    },
    {
      "epoch": 2.0921623691600715,
      "grad_norm": 0.5830256938934326,
      "learning_rate": 5.853996033288747e-06,
      "loss": 0.0192,
      "step": 1278420
    },
    {
      "epoch": 2.0921950995987246,
      "grad_norm": 0.2592092454433441,
      "learning_rate": 5.853930141075229e-06,
      "loss": 0.0155,
      "step": 1278440
    },
    {
      "epoch": 2.0922278300373782,
      "grad_norm": 0.7953392267227173,
      "learning_rate": 5.853864248861713e-06,
      "loss": 0.0167,
      "step": 1278460
    },
    {
      "epoch": 2.0922605604760314,
      "grad_norm": 1.0605065822601318,
      "learning_rate": 5.853798356648195e-06,
      "loss": 0.0102,
      "step": 1278480
    },
    {
      "epoch": 2.092293290914685,
      "grad_norm": 1.0752800703048706,
      "learning_rate": 5.853732464434678e-06,
      "loss": 0.0136,
      "step": 1278500
    },
    {
      "epoch": 2.092326021353338,
      "grad_norm": 0.5178866982460022,
      "learning_rate": 5.853666572221162e-06,
      "loss": 0.0113,
      "step": 1278520
    },
    {
      "epoch": 2.0923587517919917,
      "grad_norm": 0.46435222029685974,
      "learning_rate": 5.853600680007644e-06,
      "loss": 0.012,
      "step": 1278540
    },
    {
      "epoch": 2.092391482230645,
      "grad_norm": 0.3304893672466278,
      "learning_rate": 5.853534787794127e-06,
      "loss": 0.0185,
      "step": 1278560
    },
    {
      "epoch": 2.092424212669298,
      "grad_norm": 0.2987731397151947,
      "learning_rate": 5.853468895580609e-06,
      "loss": 0.0202,
      "step": 1278580
    },
    {
      "epoch": 2.0924569431079516,
      "grad_norm": 0.4820297658443451,
      "learning_rate": 5.853403003367093e-06,
      "loss": 0.0133,
      "step": 1278600
    },
    {
      "epoch": 2.0924896735466048,
      "grad_norm": 0.7371720671653748,
      "learning_rate": 5.8533371111535755e-06,
      "loss": 0.0137,
      "step": 1278620
    },
    {
      "epoch": 2.0925224039852584,
      "grad_norm": 0.24773503839969635,
      "learning_rate": 5.853271218940058e-06,
      "loss": 0.0172,
      "step": 1278640
    },
    {
      "epoch": 2.0925551344239115,
      "grad_norm": 0.1390390694141388,
      "learning_rate": 5.853205326726541e-06,
      "loss": 0.0138,
      "step": 1278660
    },
    {
      "epoch": 2.092587864862565,
      "grad_norm": 0.19287443161010742,
      "learning_rate": 5.853139434513025e-06,
      "loss": 0.0173,
      "step": 1278680
    },
    {
      "epoch": 2.0926205953012182,
      "grad_norm": 0.33815711736679077,
      "learning_rate": 5.8530735422995065e-06,
      "loss": 0.013,
      "step": 1278700
    },
    {
      "epoch": 2.0926533257398714,
      "grad_norm": 0.6087151169776917,
      "learning_rate": 5.85300765008599e-06,
      "loss": 0.0113,
      "step": 1278720
    },
    {
      "epoch": 2.092686056178525,
      "grad_norm": 0.6969049572944641,
      "learning_rate": 5.852941757872472e-06,
      "loss": 0.0112,
      "step": 1278740
    },
    {
      "epoch": 2.092718786617178,
      "grad_norm": 0.5254943370819092,
      "learning_rate": 5.8528758656589555e-06,
      "loss": 0.0266,
      "step": 1278760
    },
    {
      "epoch": 2.0927515170558317,
      "grad_norm": 0.8023701310157776,
      "learning_rate": 5.852809973445438e-06,
      "loss": 0.0189,
      "step": 1278780
    },
    {
      "epoch": 2.092784247494485,
      "grad_norm": 0.13257183134555817,
      "learning_rate": 5.852744081231921e-06,
      "loss": 0.0163,
      "step": 1278800
    },
    {
      "epoch": 2.092816977933138,
      "grad_norm": 0.8879503011703491,
      "learning_rate": 5.852678189018404e-06,
      "loss": 0.0176,
      "step": 1278820
    },
    {
      "epoch": 2.0928497083717916,
      "grad_norm": 0.14042159914970398,
      "learning_rate": 5.852612296804887e-06,
      "loss": 0.0177,
      "step": 1278840
    },
    {
      "epoch": 2.092882438810445,
      "grad_norm": 0.687794029712677,
      "learning_rate": 5.85254640459137e-06,
      "loss": 0.0117,
      "step": 1278860
    },
    {
      "epoch": 2.0929151692490984,
      "grad_norm": 0.26455122232437134,
      "learning_rate": 5.852480512377853e-06,
      "loss": 0.0133,
      "step": 1278880
    },
    {
      "epoch": 2.0929478996877515,
      "grad_norm": 0.3139112591743469,
      "learning_rate": 5.852414620164336e-06,
      "loss": 0.0127,
      "step": 1278900
    },
    {
      "epoch": 2.092980630126405,
      "grad_norm": 0.32595574855804443,
      "learning_rate": 5.852348727950818e-06,
      "loss": 0.0131,
      "step": 1278920
    },
    {
      "epoch": 2.0930133605650583,
      "grad_norm": 0.7721909880638123,
      "learning_rate": 5.852282835737302e-06,
      "loss": 0.0165,
      "step": 1278940
    },
    {
      "epoch": 2.0930460910037114,
      "grad_norm": 0.1627339869737625,
      "learning_rate": 5.852216943523784e-06,
      "loss": 0.0111,
      "step": 1278960
    },
    {
      "epoch": 2.093078821442365,
      "grad_norm": 0.17669600248336792,
      "learning_rate": 5.852151051310267e-06,
      "loss": 0.0186,
      "step": 1278980
    },
    {
      "epoch": 2.093111551881018,
      "grad_norm": 0.41453057527542114,
      "learning_rate": 5.85208515909675e-06,
      "loss": 0.0147,
      "step": 1279000
    },
    {
      "epoch": 2.0931442823196718,
      "grad_norm": 0.26254722476005554,
      "learning_rate": 5.852019266883233e-06,
      "loss": 0.0206,
      "step": 1279020
    },
    {
      "epoch": 2.093177012758325,
      "grad_norm": 0.2655547261238098,
      "learning_rate": 5.851953374669716e-06,
      "loss": 0.0108,
      "step": 1279040
    },
    {
      "epoch": 2.0932097431969785,
      "grad_norm": 1.2669470310211182,
      "learning_rate": 5.851887482456199e-06,
      "loss": 0.018,
      "step": 1279060
    },
    {
      "epoch": 2.0932424736356317,
      "grad_norm": 0.3759709596633911,
      "learning_rate": 5.851821590242681e-06,
      "loss": 0.0241,
      "step": 1279080
    },
    {
      "epoch": 2.093275204074285,
      "grad_norm": 0.5056554675102234,
      "learning_rate": 5.851755698029165e-06,
      "loss": 0.0107,
      "step": 1279100
    },
    {
      "epoch": 2.0933079345129384,
      "grad_norm": 1.0610014200210571,
      "learning_rate": 5.8516898058156466e-06,
      "loss": 0.0116,
      "step": 1279120
    },
    {
      "epoch": 2.0933406649515915,
      "grad_norm": 0.24152696132659912,
      "learning_rate": 5.85162391360213e-06,
      "loss": 0.0136,
      "step": 1279140
    },
    {
      "epoch": 2.093373395390245,
      "grad_norm": 0.22836364805698395,
      "learning_rate": 5.851558021388613e-06,
      "loss": 0.015,
      "step": 1279160
    },
    {
      "epoch": 2.0934061258288983,
      "grad_norm": 1.439452052116394,
      "learning_rate": 5.851492129175096e-06,
      "loss": 0.0194,
      "step": 1279180
    },
    {
      "epoch": 2.093438856267552,
      "grad_norm": 0.41394686698913574,
      "learning_rate": 5.851426236961578e-06,
      "loss": 0.0152,
      "step": 1279200
    },
    {
      "epoch": 2.093471586706205,
      "grad_norm": 0.4006846845149994,
      "learning_rate": 5.851360344748062e-06,
      "loss": 0.0145,
      "step": 1279220
    },
    {
      "epoch": 2.093504317144858,
      "grad_norm": 0.4611445963382721,
      "learning_rate": 5.851294452534545e-06,
      "loss": 0.0198,
      "step": 1279240
    },
    {
      "epoch": 2.093537047583512,
      "grad_norm": 0.9413618445396423,
      "learning_rate": 5.8512285603210274e-06,
      "loss": 0.0166,
      "step": 1279260
    },
    {
      "epoch": 2.093569778022165,
      "grad_norm": 0.2512039840221405,
      "learning_rate": 5.851162668107511e-06,
      "loss": 0.0172,
      "step": 1279280
    },
    {
      "epoch": 2.0936025084608185,
      "grad_norm": 0.35454419255256653,
      "learning_rate": 5.851096775893993e-06,
      "loss": 0.0138,
      "step": 1279300
    },
    {
      "epoch": 2.0936352388994717,
      "grad_norm": 0.2921863794326782,
      "learning_rate": 5.8510308836804765e-06,
      "loss": 0.0205,
      "step": 1279320
    },
    {
      "epoch": 2.0936679693381253,
      "grad_norm": 0.12360572069883347,
      "learning_rate": 5.850964991466958e-06,
      "loss": 0.0177,
      "step": 1279340
    },
    {
      "epoch": 2.0937006997767784,
      "grad_norm": 1.108794927597046,
      "learning_rate": 5.850899099253442e-06,
      "loss": 0.0211,
      "step": 1279360
    },
    {
      "epoch": 2.0937334302154316,
      "grad_norm": 0.35608869791030884,
      "learning_rate": 5.850833207039925e-06,
      "loss": 0.0153,
      "step": 1279380
    },
    {
      "epoch": 2.093766160654085,
      "grad_norm": 0.6939681172370911,
      "learning_rate": 5.8507673148264075e-06,
      "loss": 0.0174,
      "step": 1279400
    },
    {
      "epoch": 2.0937988910927383,
      "grad_norm": 1.1155143976211548,
      "learning_rate": 5.85070142261289e-06,
      "loss": 0.0192,
      "step": 1279420
    },
    {
      "epoch": 2.093831621531392,
      "grad_norm": 0.3907727599143982,
      "learning_rate": 5.850635530399374e-06,
      "loss": 0.017,
      "step": 1279440
    },
    {
      "epoch": 2.093864351970045,
      "grad_norm": 0.4736407399177551,
      "learning_rate": 5.850569638185856e-06,
      "loss": 0.016,
      "step": 1279460
    },
    {
      "epoch": 2.0938970824086987,
      "grad_norm": 0.13400574028491974,
      "learning_rate": 5.850503745972339e-06,
      "loss": 0.0106,
      "step": 1279480
    },
    {
      "epoch": 2.093929812847352,
      "grad_norm": 0.35867416858673096,
      "learning_rate": 5.850437853758821e-06,
      "loss": 0.0125,
      "step": 1279500
    },
    {
      "epoch": 2.093962543286005,
      "grad_norm": 0.19013716280460358,
      "learning_rate": 5.850371961545305e-06,
      "loss": 0.0152,
      "step": 1279520
    },
    {
      "epoch": 2.0939952737246585,
      "grad_norm": 0.2789213955402374,
      "learning_rate": 5.850306069331787e-06,
      "loss": 0.0105,
      "step": 1279540
    },
    {
      "epoch": 2.0940280041633117,
      "grad_norm": 0.33509594202041626,
      "learning_rate": 5.85024017711827e-06,
      "loss": 0.0095,
      "step": 1279560
    },
    {
      "epoch": 2.0940607346019653,
      "grad_norm": 0.10696141421794891,
      "learning_rate": 5.850174284904754e-06,
      "loss": 0.0137,
      "step": 1279580
    },
    {
      "epoch": 2.0940934650406184,
      "grad_norm": 0.311597615480423,
      "learning_rate": 5.850108392691236e-06,
      "loss": 0.0131,
      "step": 1279600
    },
    {
      "epoch": 2.094126195479272,
      "grad_norm": 0.19107984006404877,
      "learning_rate": 5.850042500477719e-06,
      "loss": 0.0138,
      "step": 1279620
    },
    {
      "epoch": 2.094158925917925,
      "grad_norm": 0.6105471849441528,
      "learning_rate": 5.849976608264202e-06,
      "loss": 0.0194,
      "step": 1279640
    },
    {
      "epoch": 2.0941916563565783,
      "grad_norm": 0.35415005683898926,
      "learning_rate": 5.849910716050685e-06,
      "loss": 0.0159,
      "step": 1279660
    },
    {
      "epoch": 2.094224386795232,
      "grad_norm": 0.3832007348537445,
      "learning_rate": 5.8498448238371675e-06,
      "loss": 0.0199,
      "step": 1279680
    },
    {
      "epoch": 2.094257117233885,
      "grad_norm": 0.4034128785133362,
      "learning_rate": 5.849778931623651e-06,
      "loss": 0.0142,
      "step": 1279700
    },
    {
      "epoch": 2.0942898476725387,
      "grad_norm": 0.0599425844848156,
      "learning_rate": 5.849713039410133e-06,
      "loss": 0.0104,
      "step": 1279720
    },
    {
      "epoch": 2.094322578111192,
      "grad_norm": 0.30815717577934265,
      "learning_rate": 5.8496471471966166e-06,
      "loss": 0.0124,
      "step": 1279740
    },
    {
      "epoch": 2.094355308549845,
      "grad_norm": 0.26980140805244446,
      "learning_rate": 5.8495812549830985e-06,
      "loss": 0.0142,
      "step": 1279760
    },
    {
      "epoch": 2.0943880389884986,
      "grad_norm": 0.16367898881435394,
      "learning_rate": 5.849515362769582e-06,
      "loss": 0.0227,
      "step": 1279780
    },
    {
      "epoch": 2.0944207694271517,
      "grad_norm": 0.5408062934875488,
      "learning_rate": 5.849449470556065e-06,
      "loss": 0.0103,
      "step": 1279800
    },
    {
      "epoch": 2.0944534998658053,
      "grad_norm": 0.7252596616744995,
      "learning_rate": 5.8493835783425475e-06,
      "loss": 0.0144,
      "step": 1279820
    },
    {
      "epoch": 2.0944862303044585,
      "grad_norm": 0.5946559906005859,
      "learning_rate": 5.84931768612903e-06,
      "loss": 0.0152,
      "step": 1279840
    },
    {
      "epoch": 2.094518960743112,
      "grad_norm": 0.30115094780921936,
      "learning_rate": 5.849251793915514e-06,
      "loss": 0.0102,
      "step": 1279860
    },
    {
      "epoch": 2.094551691181765,
      "grad_norm": 0.492715448141098,
      "learning_rate": 5.849185901701996e-06,
      "loss": 0.0131,
      "step": 1279880
    },
    {
      "epoch": 2.0945844216204184,
      "grad_norm": 0.6697655916213989,
      "learning_rate": 5.849120009488479e-06,
      "loss": 0.0174,
      "step": 1279900
    },
    {
      "epoch": 2.094617152059072,
      "grad_norm": 1.7988101243972778,
      "learning_rate": 5.849054117274963e-06,
      "loss": 0.0141,
      "step": 1279920
    },
    {
      "epoch": 2.094649882497725,
      "grad_norm": 0.4680447578430176,
      "learning_rate": 5.848988225061445e-06,
      "loss": 0.015,
      "step": 1279940
    },
    {
      "epoch": 2.0946826129363787,
      "grad_norm": 1.01096773147583,
      "learning_rate": 5.848922332847928e-06,
      "loss": 0.0178,
      "step": 1279960
    },
    {
      "epoch": 2.094715343375032,
      "grad_norm": 0.5472850203514099,
      "learning_rate": 5.84885644063441e-06,
      "loss": 0.0151,
      "step": 1279980
    },
    {
      "epoch": 2.0947480738136854,
      "grad_norm": 1.0370291471481323,
      "learning_rate": 5.848790548420894e-06,
      "loss": 0.0208,
      "step": 1280000
    },
    {
      "epoch": 2.0947808042523386,
      "grad_norm": 0.08739273995161057,
      "learning_rate": 5.848724656207377e-06,
      "loss": 0.0164,
      "step": 1280020
    },
    {
      "epoch": 2.0948135346909917,
      "grad_norm": 0.11271288990974426,
      "learning_rate": 5.848658763993859e-06,
      "loss": 0.0171,
      "step": 1280040
    },
    {
      "epoch": 2.0948462651296453,
      "grad_norm": 0.3713012635707855,
      "learning_rate": 5.848592871780342e-06,
      "loss": 0.0167,
      "step": 1280060
    },
    {
      "epoch": 2.0948789955682985,
      "grad_norm": 3.14945650100708,
      "learning_rate": 5.848526979566826e-06,
      "loss": 0.0131,
      "step": 1280080
    },
    {
      "epoch": 2.094911726006952,
      "grad_norm": 1.3593429327011108,
      "learning_rate": 5.848461087353308e-06,
      "loss": 0.0125,
      "step": 1280100
    },
    {
      "epoch": 2.0949444564456052,
      "grad_norm": 0.5470647811889648,
      "learning_rate": 5.848395195139791e-06,
      "loss": 0.0165,
      "step": 1280120
    },
    {
      "epoch": 2.094977186884259,
      "grad_norm": 0.5526545643806458,
      "learning_rate": 5.848329302926273e-06,
      "loss": 0.0149,
      "step": 1280140
    },
    {
      "epoch": 2.095009917322912,
      "grad_norm": 0.24241329729557037,
      "learning_rate": 5.848263410712757e-06,
      "loss": 0.0206,
      "step": 1280160
    },
    {
      "epoch": 2.095042647761565,
      "grad_norm": 0.17812319099903107,
      "learning_rate": 5.848197518499239e-06,
      "loss": 0.0169,
      "step": 1280180
    },
    {
      "epoch": 2.0950753782002187,
      "grad_norm": 0.2887306213378906,
      "learning_rate": 5.848131626285722e-06,
      "loss": 0.0111,
      "step": 1280200
    },
    {
      "epoch": 2.095108108638872,
      "grad_norm": 0.3347800076007843,
      "learning_rate": 5.848065734072205e-06,
      "loss": 0.0134,
      "step": 1280220
    },
    {
      "epoch": 2.0951408390775255,
      "grad_norm": 0.3395681381225586,
      "learning_rate": 5.8479998418586885e-06,
      "loss": 0.0149,
      "step": 1280240
    },
    {
      "epoch": 2.0951735695161786,
      "grad_norm": 0.5588940382003784,
      "learning_rate": 5.84793394964517e-06,
      "loss": 0.0164,
      "step": 1280260
    },
    {
      "epoch": 2.095206299954832,
      "grad_norm": 0.6468480825424194,
      "learning_rate": 5.847868057431654e-06,
      "loss": 0.0235,
      "step": 1280280
    },
    {
      "epoch": 2.0952390303934854,
      "grad_norm": 0.6360471248626709,
      "learning_rate": 5.8478021652181375e-06,
      "loss": 0.0122,
      "step": 1280300
    },
    {
      "epoch": 2.0952717608321385,
      "grad_norm": 0.6590889096260071,
      "learning_rate": 5.847736273004619e-06,
      "loss": 0.0083,
      "step": 1280320
    },
    {
      "epoch": 2.095304491270792,
      "grad_norm": 0.4031951129436493,
      "learning_rate": 5.847670380791103e-06,
      "loss": 0.0113,
      "step": 1280340
    },
    {
      "epoch": 2.0953372217094453,
      "grad_norm": 0.24555902183055878,
      "learning_rate": 5.847604488577585e-06,
      "loss": 0.0095,
      "step": 1280360
    },
    {
      "epoch": 2.095369952148099,
      "grad_norm": 0.5501464009284973,
      "learning_rate": 5.8475385963640685e-06,
      "loss": 0.012,
      "step": 1280380
    },
    {
      "epoch": 2.095402682586752,
      "grad_norm": 0.3884207010269165,
      "learning_rate": 5.847472704150551e-06,
      "loss": 0.0109,
      "step": 1280400
    },
    {
      "epoch": 2.095435413025405,
      "grad_norm": 0.32235682010650635,
      "learning_rate": 5.847406811937034e-06,
      "loss": 0.0171,
      "step": 1280420
    },
    {
      "epoch": 2.0954681434640587,
      "grad_norm": 0.7762330770492554,
      "learning_rate": 5.847340919723517e-06,
      "loss": 0.0201,
      "step": 1280440
    },
    {
      "epoch": 2.095500873902712,
      "grad_norm": 0.13006360828876495,
      "learning_rate": 5.84727502751e-06,
      "loss": 0.0242,
      "step": 1280460
    },
    {
      "epoch": 2.0955336043413655,
      "grad_norm": 0.4080360531806946,
      "learning_rate": 5.847209135296482e-06,
      "loss": 0.0152,
      "step": 1280480
    },
    {
      "epoch": 2.0955663347800186,
      "grad_norm": 0.4704311192035675,
      "learning_rate": 5.847143243082966e-06,
      "loss": 0.0083,
      "step": 1280500
    },
    {
      "epoch": 2.0955990652186722,
      "grad_norm": 0.503556489944458,
      "learning_rate": 5.847077350869448e-06,
      "loss": 0.0097,
      "step": 1280520
    },
    {
      "epoch": 2.0956317956573254,
      "grad_norm": 0.9491416811943054,
      "learning_rate": 5.847011458655931e-06,
      "loss": 0.0209,
      "step": 1280540
    },
    {
      "epoch": 2.0956645260959785,
      "grad_norm": 0.42166492342948914,
      "learning_rate": 5.846945566442413e-06,
      "loss": 0.0262,
      "step": 1280560
    },
    {
      "epoch": 2.095697256534632,
      "grad_norm": 0.20369882881641388,
      "learning_rate": 5.846879674228897e-06,
      "loss": 0.0219,
      "step": 1280580
    },
    {
      "epoch": 2.0957299869732853,
      "grad_norm": 0.7626477479934692,
      "learning_rate": 5.8468137820153795e-06,
      "loss": 0.0188,
      "step": 1280600
    },
    {
      "epoch": 2.095762717411939,
      "grad_norm": 0.16841964423656464,
      "learning_rate": 5.846747889801862e-06,
      "loss": 0.0089,
      "step": 1280620
    },
    {
      "epoch": 2.095795447850592,
      "grad_norm": 1.764356255531311,
      "learning_rate": 5.846681997588346e-06,
      "loss": 0.0149,
      "step": 1280640
    },
    {
      "epoch": 2.0958281782892456,
      "grad_norm": 0.37983980774879456,
      "learning_rate": 5.8466161053748285e-06,
      "loss": 0.0114,
      "step": 1280660
    },
    {
      "epoch": 2.0958609087278988,
      "grad_norm": 0.12170802056789398,
      "learning_rate": 5.846550213161311e-06,
      "loss": 0.0105,
      "step": 1280680
    },
    {
      "epoch": 2.095893639166552,
      "grad_norm": 1.220708966255188,
      "learning_rate": 5.846484320947794e-06,
      "loss": 0.0194,
      "step": 1280700
    },
    {
      "epoch": 2.0959263696052055,
      "grad_norm": 0.967644453048706,
      "learning_rate": 5.846418428734278e-06,
      "loss": 0.0189,
      "step": 1280720
    },
    {
      "epoch": 2.0959591000438587,
      "grad_norm": 0.1696614772081375,
      "learning_rate": 5.8463525365207595e-06,
      "loss": 0.0168,
      "step": 1280740
    },
    {
      "epoch": 2.0959918304825123,
      "grad_norm": 0.18435430526733398,
      "learning_rate": 5.846286644307243e-06,
      "loss": 0.0157,
      "step": 1280760
    },
    {
      "epoch": 2.0960245609211654,
      "grad_norm": 0.4155798554420471,
      "learning_rate": 5.846220752093725e-06,
      "loss": 0.0166,
      "step": 1280780
    },
    {
      "epoch": 2.096057291359819,
      "grad_norm": 0.19393275678157806,
      "learning_rate": 5.8461548598802086e-06,
      "loss": 0.0118,
      "step": 1280800
    },
    {
      "epoch": 2.096090021798472,
      "grad_norm": 0.18251727521419525,
      "learning_rate": 5.846088967666691e-06,
      "loss": 0.0092,
      "step": 1280820
    },
    {
      "epoch": 2.0961227522371253,
      "grad_norm": 0.6320841908454895,
      "learning_rate": 5.846023075453174e-06,
      "loss": 0.0206,
      "step": 1280840
    },
    {
      "epoch": 2.096155482675779,
      "grad_norm": 0.5271221399307251,
      "learning_rate": 5.845957183239657e-06,
      "loss": 0.0166,
      "step": 1280860
    },
    {
      "epoch": 2.096188213114432,
      "grad_norm": 0.4148256182670593,
      "learning_rate": 5.84589129102614e-06,
      "loss": 0.0137,
      "step": 1280880
    },
    {
      "epoch": 2.0962209435530856,
      "grad_norm": 0.13225625455379486,
      "learning_rate": 5.845825398812622e-06,
      "loss": 0.0169,
      "step": 1280900
    },
    {
      "epoch": 2.096253673991739,
      "grad_norm": 0.12255514413118362,
      "learning_rate": 5.845759506599106e-06,
      "loss": 0.0109,
      "step": 1280920
    },
    {
      "epoch": 2.0962864044303924,
      "grad_norm": 0.8617509603500366,
      "learning_rate": 5.845693614385588e-06,
      "loss": 0.0194,
      "step": 1280940
    },
    {
      "epoch": 2.0963191348690455,
      "grad_norm": 0.5138267278671265,
      "learning_rate": 5.845627722172071e-06,
      "loss": 0.0181,
      "step": 1280960
    },
    {
      "epoch": 2.0963518653076987,
      "grad_norm": 1.1798073053359985,
      "learning_rate": 5.845561829958555e-06,
      "loss": 0.0191,
      "step": 1280980
    },
    {
      "epoch": 2.0963845957463523,
      "grad_norm": 0.3213731646537781,
      "learning_rate": 5.845495937745037e-06,
      "loss": 0.014,
      "step": 1281000
    },
    {
      "epoch": 2.0964173261850054,
      "grad_norm": 0.24612879753112793,
      "learning_rate": 5.84543004553152e-06,
      "loss": 0.0144,
      "step": 1281020
    },
    {
      "epoch": 2.096450056623659,
      "grad_norm": 0.28792139887809753,
      "learning_rate": 5.845364153318003e-06,
      "loss": 0.0157,
      "step": 1281040
    },
    {
      "epoch": 2.096482787062312,
      "grad_norm": 0.7584178447723389,
      "learning_rate": 5.845298261104486e-06,
      "loss": 0.019,
      "step": 1281060
    },
    {
      "epoch": 2.0965155175009658,
      "grad_norm": 3.2957382202148438,
      "learning_rate": 5.845232368890969e-06,
      "loss": 0.0216,
      "step": 1281080
    },
    {
      "epoch": 2.096548247939619,
      "grad_norm": 0.349090576171875,
      "learning_rate": 5.845166476677452e-06,
      "loss": 0.0122,
      "step": 1281100
    },
    {
      "epoch": 2.096580978378272,
      "grad_norm": 0.3599803149700165,
      "learning_rate": 5.845100584463934e-06,
      "loss": 0.0218,
      "step": 1281120
    },
    {
      "epoch": 2.0966137088169257,
      "grad_norm": 0.765198290348053,
      "learning_rate": 5.845034692250418e-06,
      "loss": 0.0147,
      "step": 1281140
    },
    {
      "epoch": 2.096646439255579,
      "grad_norm": 0.2609591782093048,
      "learning_rate": 5.8449688000368996e-06,
      "loss": 0.0223,
      "step": 1281160
    },
    {
      "epoch": 2.0966791696942324,
      "grad_norm": 0.20893491804599762,
      "learning_rate": 5.844902907823383e-06,
      "loss": 0.0213,
      "step": 1281180
    },
    {
      "epoch": 2.0967119001328856,
      "grad_norm": 0.21982841193675995,
      "learning_rate": 5.844837015609866e-06,
      "loss": 0.0168,
      "step": 1281200
    },
    {
      "epoch": 2.096744630571539,
      "grad_norm": 1.0908972024917603,
      "learning_rate": 5.844771123396349e-06,
      "loss": 0.0191,
      "step": 1281220
    },
    {
      "epoch": 2.0967773610101923,
      "grad_norm": 0.304188072681427,
      "learning_rate": 5.844705231182831e-06,
      "loss": 0.0164,
      "step": 1281240
    },
    {
      "epoch": 2.0968100914488454,
      "grad_norm": 0.09952419251203537,
      "learning_rate": 5.844639338969315e-06,
      "loss": 0.0135,
      "step": 1281260
    },
    {
      "epoch": 2.096842821887499,
      "grad_norm": 0.30966895818710327,
      "learning_rate": 5.844573446755797e-06,
      "loss": 0.0122,
      "step": 1281280
    },
    {
      "epoch": 2.096875552326152,
      "grad_norm": 0.3907039165496826,
      "learning_rate": 5.8445075545422804e-06,
      "loss": 0.0219,
      "step": 1281300
    },
    {
      "epoch": 2.096908282764806,
      "grad_norm": 2.413008689880371,
      "learning_rate": 5.844441662328764e-06,
      "loss": 0.0162,
      "step": 1281320
    },
    {
      "epoch": 2.096941013203459,
      "grad_norm": 0.7113139629364014,
      "learning_rate": 5.844375770115246e-06,
      "loss": 0.0136,
      "step": 1281340
    },
    {
      "epoch": 2.096973743642112,
      "grad_norm": 0.36467882990837097,
      "learning_rate": 5.8443098779017295e-06,
      "loss": 0.0112,
      "step": 1281360
    },
    {
      "epoch": 2.0970064740807657,
      "grad_norm": 0.904594898223877,
      "learning_rate": 5.844243985688211e-06,
      "loss": 0.0272,
      "step": 1281380
    },
    {
      "epoch": 2.097039204519419,
      "grad_norm": 0.15539637207984924,
      "learning_rate": 5.844178093474695e-06,
      "loss": 0.0132,
      "step": 1281400
    },
    {
      "epoch": 2.0970719349580724,
      "grad_norm": 0.3388305902481079,
      "learning_rate": 5.844112201261177e-06,
      "loss": 0.0153,
      "step": 1281420
    },
    {
      "epoch": 2.0971046653967256,
      "grad_norm": 0.1940455287694931,
      "learning_rate": 5.8440463090476605e-06,
      "loss": 0.0245,
      "step": 1281440
    },
    {
      "epoch": 2.097137395835379,
      "grad_norm": 0.43792304396629333,
      "learning_rate": 5.843980416834143e-06,
      "loss": 0.0188,
      "step": 1281460
    },
    {
      "epoch": 2.0971701262740323,
      "grad_norm": 0.247920423746109,
      "learning_rate": 5.843914524620626e-06,
      "loss": 0.0178,
      "step": 1281480
    },
    {
      "epoch": 2.0972028567126855,
      "grad_norm": 0.13423599302768707,
      "learning_rate": 5.843848632407109e-06,
      "loss": 0.0166,
      "step": 1281500
    },
    {
      "epoch": 2.097235587151339,
      "grad_norm": 0.23609678447246552,
      "learning_rate": 5.843782740193592e-06,
      "loss": 0.0152,
      "step": 1281520
    },
    {
      "epoch": 2.097268317589992,
      "grad_norm": 1.5647978782653809,
      "learning_rate": 5.843716847980074e-06,
      "loss": 0.0213,
      "step": 1281540
    },
    {
      "epoch": 2.097301048028646,
      "grad_norm": 0.2601146399974823,
      "learning_rate": 5.843650955766558e-06,
      "loss": 0.0177,
      "step": 1281560
    },
    {
      "epoch": 2.097333778467299,
      "grad_norm": 0.2534206509590149,
      "learning_rate": 5.84358506355304e-06,
      "loss": 0.0129,
      "step": 1281580
    },
    {
      "epoch": 2.0973665089059526,
      "grad_norm": 0.4196096658706665,
      "learning_rate": 5.843519171339523e-06,
      "loss": 0.0162,
      "step": 1281600
    },
    {
      "epoch": 2.0973992393446057,
      "grad_norm": 1.0012973546981812,
      "learning_rate": 5.843453279126006e-06,
      "loss": 0.0154,
      "step": 1281620
    },
    {
      "epoch": 2.097431969783259,
      "grad_norm": 0.4245547652244568,
      "learning_rate": 5.843387386912489e-06,
      "loss": 0.0185,
      "step": 1281640
    },
    {
      "epoch": 2.0974647002219124,
      "grad_norm": 0.9703473448753357,
      "learning_rate": 5.8433214946989714e-06,
      "loss": 0.0192,
      "step": 1281660
    },
    {
      "epoch": 2.0974974306605656,
      "grad_norm": 0.16095952689647675,
      "learning_rate": 5.843255602485455e-06,
      "loss": 0.0107,
      "step": 1281680
    },
    {
      "epoch": 2.097530161099219,
      "grad_norm": 0.3264109492301941,
      "learning_rate": 5.843189710271938e-06,
      "loss": 0.0166,
      "step": 1281700
    },
    {
      "epoch": 2.0975628915378723,
      "grad_norm": 0.29536113142967224,
      "learning_rate": 5.8431238180584205e-06,
      "loss": 0.0171,
      "step": 1281720
    },
    {
      "epoch": 2.097595621976526,
      "grad_norm": 0.13884592056274414,
      "learning_rate": 5.843057925844904e-06,
      "loss": 0.0196,
      "step": 1281740
    },
    {
      "epoch": 2.097628352415179,
      "grad_norm": 0.2657023072242737,
      "learning_rate": 5.842992033631386e-06,
      "loss": 0.0148,
      "step": 1281760
    },
    {
      "epoch": 2.0976610828538322,
      "grad_norm": 0.17893341183662415,
      "learning_rate": 5.84292614141787e-06,
      "loss": 0.0144,
      "step": 1281780
    },
    {
      "epoch": 2.097693813292486,
      "grad_norm": 0.8145624995231628,
      "learning_rate": 5.8428602492043515e-06,
      "loss": 0.0198,
      "step": 1281800
    },
    {
      "epoch": 2.097726543731139,
      "grad_norm": 0.06750303506851196,
      "learning_rate": 5.842794356990835e-06,
      "loss": 0.016,
      "step": 1281820
    },
    {
      "epoch": 2.0977592741697926,
      "grad_norm": 0.46828269958496094,
      "learning_rate": 5.842728464777318e-06,
      "loss": 0.0115,
      "step": 1281840
    },
    {
      "epoch": 2.0977920046084457,
      "grad_norm": 1.2668745517730713,
      "learning_rate": 5.8426625725638005e-06,
      "loss": 0.0129,
      "step": 1281860
    },
    {
      "epoch": 2.097824735047099,
      "grad_norm": 0.1824829876422882,
      "learning_rate": 5.842596680350283e-06,
      "loss": 0.014,
      "step": 1281880
    },
    {
      "epoch": 2.0978574654857525,
      "grad_norm": 0.5151596665382385,
      "learning_rate": 5.842530788136767e-06,
      "loss": 0.0156,
      "step": 1281900
    },
    {
      "epoch": 2.0978901959244056,
      "grad_norm": 0.6508374214172363,
      "learning_rate": 5.842464895923249e-06,
      "loss": 0.0135,
      "step": 1281920
    },
    {
      "epoch": 2.097922926363059,
      "grad_norm": 3.1900837421417236,
      "learning_rate": 5.842399003709732e-06,
      "loss": 0.0178,
      "step": 1281940
    },
    {
      "epoch": 2.0979556568017124,
      "grad_norm": 0.4874792993068695,
      "learning_rate": 5.842333111496214e-06,
      "loss": 0.0102,
      "step": 1281960
    },
    {
      "epoch": 2.097988387240366,
      "grad_norm": 0.4610595405101776,
      "learning_rate": 5.842267219282698e-06,
      "loss": 0.013,
      "step": 1281980
    },
    {
      "epoch": 2.098021117679019,
      "grad_norm": 0.6799443960189819,
      "learning_rate": 5.8422013270691806e-06,
      "loss": 0.0153,
      "step": 1282000
    },
    {
      "epoch": 2.0980538481176723,
      "grad_norm": 0.3645196259021759,
      "learning_rate": 5.842135434855663e-06,
      "loss": 0.0175,
      "step": 1282020
    },
    {
      "epoch": 2.098086578556326,
      "grad_norm": 0.36116692423820496,
      "learning_rate": 5.842069542642147e-06,
      "loss": 0.0111,
      "step": 1282040
    },
    {
      "epoch": 2.098119308994979,
      "grad_norm": 0.14638158679008484,
      "learning_rate": 5.84200365042863e-06,
      "loss": 0.0149,
      "step": 1282060
    },
    {
      "epoch": 2.0981520394336326,
      "grad_norm": 0.5262466669082642,
      "learning_rate": 5.841937758215112e-06,
      "loss": 0.0266,
      "step": 1282080
    },
    {
      "epoch": 2.0981847698722857,
      "grad_norm": 1.1300817728042603,
      "learning_rate": 5.841871866001595e-06,
      "loss": 0.0159,
      "step": 1282100
    },
    {
      "epoch": 2.0982175003109393,
      "grad_norm": 0.3758663535118103,
      "learning_rate": 5.841805973788079e-06,
      "loss": 0.0097,
      "step": 1282120
    },
    {
      "epoch": 2.0982502307495925,
      "grad_norm": 0.3001082241535187,
      "learning_rate": 5.841740081574561e-06,
      "loss": 0.0127,
      "step": 1282140
    },
    {
      "epoch": 2.0982829611882456,
      "grad_norm": 0.7774723172187805,
      "learning_rate": 5.841674189361044e-06,
      "loss": 0.0175,
      "step": 1282160
    },
    {
      "epoch": 2.0983156916268992,
      "grad_norm": 0.7670891880989075,
      "learning_rate": 5.841608297147526e-06,
      "loss": 0.0179,
      "step": 1282180
    },
    {
      "epoch": 2.0983484220655524,
      "grad_norm": 0.47008761763572693,
      "learning_rate": 5.84154240493401e-06,
      "loss": 0.015,
      "step": 1282200
    },
    {
      "epoch": 2.098381152504206,
      "grad_norm": 0.15841400623321533,
      "learning_rate": 5.841476512720492e-06,
      "loss": 0.0125,
      "step": 1282220
    },
    {
      "epoch": 2.098413882942859,
      "grad_norm": 0.33844947814941406,
      "learning_rate": 5.841410620506975e-06,
      "loss": 0.0119,
      "step": 1282240
    },
    {
      "epoch": 2.0984466133815127,
      "grad_norm": 0.19960123300552368,
      "learning_rate": 5.841344728293458e-06,
      "loss": 0.0169,
      "step": 1282260
    },
    {
      "epoch": 2.098479343820166,
      "grad_norm": 0.3617088794708252,
      "learning_rate": 5.8412788360799415e-06,
      "loss": 0.0162,
      "step": 1282280
    },
    {
      "epoch": 2.098512074258819,
      "grad_norm": 0.2603089213371277,
      "learning_rate": 5.841212943866423e-06,
      "loss": 0.0204,
      "step": 1282300
    },
    {
      "epoch": 2.0985448046974726,
      "grad_norm": 0.5779818296432495,
      "learning_rate": 5.841147051652907e-06,
      "loss": 0.0158,
      "step": 1282320
    },
    {
      "epoch": 2.0985775351361258,
      "grad_norm": 0.38971978425979614,
      "learning_rate": 5.841081159439389e-06,
      "loss": 0.0115,
      "step": 1282340
    },
    {
      "epoch": 2.0986102655747794,
      "grad_norm": 0.5379992127418518,
      "learning_rate": 5.841015267225872e-06,
      "loss": 0.0139,
      "step": 1282360
    },
    {
      "epoch": 2.0986429960134325,
      "grad_norm": 0.43058982491493225,
      "learning_rate": 5.840949375012356e-06,
      "loss": 0.0154,
      "step": 1282380
    },
    {
      "epoch": 2.098675726452086,
      "grad_norm": 0.4593513607978821,
      "learning_rate": 5.840883482798838e-06,
      "loss": 0.0175,
      "step": 1282400
    },
    {
      "epoch": 2.0987084568907393,
      "grad_norm": 0.2744831144809723,
      "learning_rate": 5.8408175905853215e-06,
      "loss": 0.0126,
      "step": 1282420
    },
    {
      "epoch": 2.0987411873293924,
      "grad_norm": 0.2869335114955902,
      "learning_rate": 5.840751698371803e-06,
      "loss": 0.012,
      "step": 1282440
    },
    {
      "epoch": 2.098773917768046,
      "grad_norm": 0.6553679704666138,
      "learning_rate": 5.840685806158287e-06,
      "loss": 0.0196,
      "step": 1282460
    },
    {
      "epoch": 2.098806648206699,
      "grad_norm": 0.2671160399913788,
      "learning_rate": 5.84061991394477e-06,
      "loss": 0.0104,
      "step": 1282480
    },
    {
      "epoch": 2.0988393786453527,
      "grad_norm": 0.3645583987236023,
      "learning_rate": 5.8405540217312524e-06,
      "loss": 0.0138,
      "step": 1282500
    },
    {
      "epoch": 2.098872109084006,
      "grad_norm": 0.6164963245391846,
      "learning_rate": 5.840488129517735e-06,
      "loss": 0.0136,
      "step": 1282520
    },
    {
      "epoch": 2.0989048395226595,
      "grad_norm": 0.3190648853778839,
      "learning_rate": 5.840422237304219e-06,
      "loss": 0.0161,
      "step": 1282540
    },
    {
      "epoch": 2.0989375699613126,
      "grad_norm": 0.8060276508331299,
      "learning_rate": 5.840356345090701e-06,
      "loss": 0.015,
      "step": 1282560
    },
    {
      "epoch": 2.098970300399966,
      "grad_norm": 0.5876575112342834,
      "learning_rate": 5.840290452877184e-06,
      "loss": 0.0125,
      "step": 1282580
    },
    {
      "epoch": 2.0990030308386194,
      "grad_norm": 0.2920762598514557,
      "learning_rate": 5.840224560663666e-06,
      "loss": 0.0149,
      "step": 1282600
    },
    {
      "epoch": 2.0990357612772725,
      "grad_norm": 0.16915108263492584,
      "learning_rate": 5.84015866845015e-06,
      "loss": 0.0128,
      "step": 1282620
    },
    {
      "epoch": 2.099068491715926,
      "grad_norm": 0.4345186650753021,
      "learning_rate": 5.8400927762366325e-06,
      "loss": 0.0187,
      "step": 1282640
    },
    {
      "epoch": 2.0991012221545793,
      "grad_norm": 3.0248610973358154,
      "learning_rate": 5.840026884023115e-06,
      "loss": 0.0108,
      "step": 1282660
    },
    {
      "epoch": 2.099133952593233,
      "grad_norm": 0.21695876121520996,
      "learning_rate": 5.839960991809598e-06,
      "loss": 0.0166,
      "step": 1282680
    },
    {
      "epoch": 2.099166683031886,
      "grad_norm": 0.3257972002029419,
      "learning_rate": 5.8398950995960815e-06,
      "loss": 0.0148,
      "step": 1282700
    },
    {
      "epoch": 2.099199413470539,
      "grad_norm": 0.5242557525634766,
      "learning_rate": 5.8398292073825634e-06,
      "loss": 0.0195,
      "step": 1282720
    },
    {
      "epoch": 2.0992321439091928,
      "grad_norm": 0.6806203126907349,
      "learning_rate": 5.839763315169047e-06,
      "loss": 0.0216,
      "step": 1282740
    },
    {
      "epoch": 2.099264874347846,
      "grad_norm": 0.5929314494132996,
      "learning_rate": 5.839697422955531e-06,
      "loss": 0.0158,
      "step": 1282760
    },
    {
      "epoch": 2.0992976047864995,
      "grad_norm": 0.4812953472137451,
      "learning_rate": 5.8396315307420125e-06,
      "loss": 0.0135,
      "step": 1282780
    },
    {
      "epoch": 2.0993303352251527,
      "grad_norm": 0.5848081111907959,
      "learning_rate": 5.839565638528496e-06,
      "loss": 0.0165,
      "step": 1282800
    },
    {
      "epoch": 2.099363065663806,
      "grad_norm": 0.8207463622093201,
      "learning_rate": 5.839499746314978e-06,
      "loss": 0.0154,
      "step": 1282820
    },
    {
      "epoch": 2.0993957961024594,
      "grad_norm": 0.5712576508522034,
      "learning_rate": 5.8394338541014616e-06,
      "loss": 0.015,
      "step": 1282840
    },
    {
      "epoch": 2.0994285265411126,
      "grad_norm": 0.3771750330924988,
      "learning_rate": 5.839367961887944e-06,
      "loss": 0.0114,
      "step": 1282860
    },
    {
      "epoch": 2.099461256979766,
      "grad_norm": 0.2834882438182831,
      "learning_rate": 5.839302069674427e-06,
      "loss": 0.0104,
      "step": 1282880
    },
    {
      "epoch": 2.0994939874184193,
      "grad_norm": 0.46504107117652893,
      "learning_rate": 5.83923617746091e-06,
      "loss": 0.0193,
      "step": 1282900
    },
    {
      "epoch": 2.099526717857073,
      "grad_norm": 0.0948694497346878,
      "learning_rate": 5.839170285247393e-06,
      "loss": 0.0184,
      "step": 1282920
    },
    {
      "epoch": 2.099559448295726,
      "grad_norm": 0.13855183124542236,
      "learning_rate": 5.839104393033875e-06,
      "loss": 0.0137,
      "step": 1282940
    },
    {
      "epoch": 2.099592178734379,
      "grad_norm": 0.1611974686384201,
      "learning_rate": 5.839038500820359e-06,
      "loss": 0.0115,
      "step": 1282960
    },
    {
      "epoch": 2.099624909173033,
      "grad_norm": 0.3952575922012329,
      "learning_rate": 5.838972608606841e-06,
      "loss": 0.0149,
      "step": 1282980
    },
    {
      "epoch": 2.099657639611686,
      "grad_norm": 0.2637668550014496,
      "learning_rate": 5.838906716393324e-06,
      "loss": 0.0143,
      "step": 1283000
    },
    {
      "epoch": 2.0996903700503395,
      "grad_norm": 0.2981475591659546,
      "learning_rate": 5.838840824179807e-06,
      "loss": 0.0102,
      "step": 1283020
    },
    {
      "epoch": 2.0997231004889927,
      "grad_norm": 0.3200468122959137,
      "learning_rate": 5.83877493196629e-06,
      "loss": 0.012,
      "step": 1283040
    },
    {
      "epoch": 2.0997558309276463,
      "grad_norm": 0.2754521369934082,
      "learning_rate": 5.8387090397527725e-06,
      "loss": 0.0146,
      "step": 1283060
    },
    {
      "epoch": 2.0997885613662994,
      "grad_norm": 0.48967427015304565,
      "learning_rate": 5.838643147539256e-06,
      "loss": 0.0155,
      "step": 1283080
    },
    {
      "epoch": 2.0998212918049526,
      "grad_norm": 0.37351351976394653,
      "learning_rate": 5.838577255325739e-06,
      "loss": 0.0134,
      "step": 1283100
    },
    {
      "epoch": 2.099854022243606,
      "grad_norm": 0.4602939188480377,
      "learning_rate": 5.838511363112222e-06,
      "loss": 0.0184,
      "step": 1283120
    },
    {
      "epoch": 2.0998867526822593,
      "grad_norm": 0.15342435240745544,
      "learning_rate": 5.838445470898705e-06,
      "loss": 0.0102,
      "step": 1283140
    },
    {
      "epoch": 2.099919483120913,
      "grad_norm": 0.5099835395812988,
      "learning_rate": 5.838379578685187e-06,
      "loss": 0.0139,
      "step": 1283160
    },
    {
      "epoch": 2.099952213559566,
      "grad_norm": 0.35076475143432617,
      "learning_rate": 5.838313686471671e-06,
      "loss": 0.0115,
      "step": 1283180
    },
    {
      "epoch": 2.0999849439982197,
      "grad_norm": 0.15268895030021667,
      "learning_rate": 5.8382477942581526e-06,
      "loss": 0.0131,
      "step": 1283200
    },
    {
      "epoch": 2.100017674436873,
      "grad_norm": 0.3344354033470154,
      "learning_rate": 5.838181902044636e-06,
      "loss": 0.0176,
      "step": 1283220
    },
    {
      "epoch": 2.100050404875526,
      "grad_norm": 0.4604966640472412,
      "learning_rate": 5.838116009831119e-06,
      "loss": 0.0118,
      "step": 1283240
    },
    {
      "epoch": 2.1000831353141796,
      "grad_norm": 0.3325490951538086,
      "learning_rate": 5.838050117617602e-06,
      "loss": 0.0145,
      "step": 1283260
    },
    {
      "epoch": 2.1001158657528327,
      "grad_norm": 0.2860950529575348,
      "learning_rate": 5.837984225404084e-06,
      "loss": 0.0178,
      "step": 1283280
    },
    {
      "epoch": 2.1001485961914863,
      "grad_norm": 0.421575129032135,
      "learning_rate": 5.837918333190568e-06,
      "loss": 0.0222,
      "step": 1283300
    },
    {
      "epoch": 2.1001813266301395,
      "grad_norm": 0.2627791464328766,
      "learning_rate": 5.83785244097705e-06,
      "loss": 0.0107,
      "step": 1283320
    },
    {
      "epoch": 2.100214057068793,
      "grad_norm": 0.4784098267555237,
      "learning_rate": 5.8377865487635334e-06,
      "loss": 0.0193,
      "step": 1283340
    },
    {
      "epoch": 2.100246787507446,
      "grad_norm": 0.7174339890480042,
      "learning_rate": 5.837720656550015e-06,
      "loss": 0.0163,
      "step": 1283360
    },
    {
      "epoch": 2.1002795179460994,
      "grad_norm": 0.18363657593727112,
      "learning_rate": 5.837654764336499e-06,
      "loss": 0.0098,
      "step": 1283380
    },
    {
      "epoch": 2.100312248384753,
      "grad_norm": 0.12207824736833572,
      "learning_rate": 5.837588872122981e-06,
      "loss": 0.0193,
      "step": 1283400
    },
    {
      "epoch": 2.100344978823406,
      "grad_norm": 0.26991143822669983,
      "learning_rate": 5.837522979909464e-06,
      "loss": 0.012,
      "step": 1283420
    },
    {
      "epoch": 2.1003777092620597,
      "grad_norm": 0.39565980434417725,
      "learning_rate": 5.837457087695948e-06,
      "loss": 0.0157,
      "step": 1283440
    },
    {
      "epoch": 2.100410439700713,
      "grad_norm": 0.5741304755210876,
      "learning_rate": 5.83739119548243e-06,
      "loss": 0.023,
      "step": 1283460
    },
    {
      "epoch": 2.100443170139366,
      "grad_norm": 0.7476688027381897,
      "learning_rate": 5.8373253032689135e-06,
      "loss": 0.0215,
      "step": 1283480
    },
    {
      "epoch": 2.1004759005780196,
      "grad_norm": 0.830471396446228,
      "learning_rate": 5.837259411055396e-06,
      "loss": 0.0165,
      "step": 1283500
    },
    {
      "epoch": 2.1005086310166727,
      "grad_norm": 0.4171725809574127,
      "learning_rate": 5.837193518841879e-06,
      "loss": 0.0141,
      "step": 1283520
    },
    {
      "epoch": 2.1005413614553263,
      "grad_norm": 0.42571699619293213,
      "learning_rate": 5.837127626628362e-06,
      "loss": 0.011,
      "step": 1283540
    },
    {
      "epoch": 2.1005740918939795,
      "grad_norm": 0.5270038843154907,
      "learning_rate": 5.837061734414845e-06,
      "loss": 0.0184,
      "step": 1283560
    },
    {
      "epoch": 2.100606822332633,
      "grad_norm": 0.6497362852096558,
      "learning_rate": 5.836995842201327e-06,
      "loss": 0.0175,
      "step": 1283580
    },
    {
      "epoch": 2.1006395527712862,
      "grad_norm": 0.4557695984840393,
      "learning_rate": 5.836929949987811e-06,
      "loss": 0.0143,
      "step": 1283600
    },
    {
      "epoch": 2.1006722832099394,
      "grad_norm": 0.6065984964370728,
      "learning_rate": 5.836864057774293e-06,
      "loss": 0.0173,
      "step": 1283620
    },
    {
      "epoch": 2.100705013648593,
      "grad_norm": 0.41103461384773254,
      "learning_rate": 5.836798165560776e-06,
      "loss": 0.0162,
      "step": 1283640
    },
    {
      "epoch": 2.100737744087246,
      "grad_norm": 0.4529021680355072,
      "learning_rate": 5.836732273347259e-06,
      "loss": 0.0164,
      "step": 1283660
    },
    {
      "epoch": 2.1007704745258997,
      "grad_norm": 0.2074766904115677,
      "learning_rate": 5.836666381133742e-06,
      "loss": 0.0102,
      "step": 1283680
    },
    {
      "epoch": 2.100803204964553,
      "grad_norm": 0.9042035937309265,
      "learning_rate": 5.8366004889202245e-06,
      "loss": 0.0181,
      "step": 1283700
    },
    {
      "epoch": 2.1008359354032065,
      "grad_norm": 0.273821622133255,
      "learning_rate": 5.836534596706708e-06,
      "loss": 0.0173,
      "step": 1283720
    },
    {
      "epoch": 2.1008686658418596,
      "grad_norm": 0.5445049405097961,
      "learning_rate": 5.83646870449319e-06,
      "loss": 0.0124,
      "step": 1283740
    },
    {
      "epoch": 2.1009013962805128,
      "grad_norm": 0.8150294423103333,
      "learning_rate": 5.8364028122796735e-06,
      "loss": 0.016,
      "step": 1283760
    },
    {
      "epoch": 2.1009341267191664,
      "grad_norm": 0.5317993760108948,
      "learning_rate": 5.836336920066155e-06,
      "loss": 0.0144,
      "step": 1283780
    },
    {
      "epoch": 2.1009668571578195,
      "grad_norm": 0.3781833052635193,
      "learning_rate": 5.836271027852639e-06,
      "loss": 0.016,
      "step": 1283800
    },
    {
      "epoch": 2.100999587596473,
      "grad_norm": 0.3004339933395386,
      "learning_rate": 5.836205135639123e-06,
      "loss": 0.0143,
      "step": 1283820
    },
    {
      "epoch": 2.1010323180351262,
      "grad_norm": 0.1628837138414383,
      "learning_rate": 5.8361392434256045e-06,
      "loss": 0.0177,
      "step": 1283840
    },
    {
      "epoch": 2.10106504847378,
      "grad_norm": 0.4697889983654022,
      "learning_rate": 5.836073351212088e-06,
      "loss": 0.0204,
      "step": 1283860
    },
    {
      "epoch": 2.101097778912433,
      "grad_norm": 0.22526223957538605,
      "learning_rate": 5.836007458998571e-06,
      "loss": 0.0122,
      "step": 1283880
    },
    {
      "epoch": 2.101130509351086,
      "grad_norm": 0.07074479013681412,
      "learning_rate": 5.8359415667850535e-06,
      "loss": 0.0137,
      "step": 1283900
    },
    {
      "epoch": 2.1011632397897397,
      "grad_norm": 0.2096770703792572,
      "learning_rate": 5.835875674571536e-06,
      "loss": 0.0173,
      "step": 1283920
    },
    {
      "epoch": 2.101195970228393,
      "grad_norm": 0.4190637767314911,
      "learning_rate": 5.83580978235802e-06,
      "loss": 0.0181,
      "step": 1283940
    },
    {
      "epoch": 2.1012287006670465,
      "grad_norm": 0.6930222511291504,
      "learning_rate": 5.835743890144502e-06,
      "loss": 0.0148,
      "step": 1283960
    },
    {
      "epoch": 2.1012614311056996,
      "grad_norm": 0.17752760648727417,
      "learning_rate": 5.835677997930985e-06,
      "loss": 0.0181,
      "step": 1283980
    },
    {
      "epoch": 2.1012941615443532,
      "grad_norm": 0.21177643537521362,
      "learning_rate": 5.835612105717467e-06,
      "loss": 0.0159,
      "step": 1284000
    },
    {
      "epoch": 2.1013268919830064,
      "grad_norm": 0.18160344660282135,
      "learning_rate": 5.835546213503951e-06,
      "loss": 0.0133,
      "step": 1284020
    },
    {
      "epoch": 2.1013596224216595,
      "grad_norm": 4.381909370422363,
      "learning_rate": 5.8354803212904336e-06,
      "loss": 0.0161,
      "step": 1284040
    },
    {
      "epoch": 2.101392352860313,
      "grad_norm": 0.20411473512649536,
      "learning_rate": 5.835414429076916e-06,
      "loss": 0.0179,
      "step": 1284060
    },
    {
      "epoch": 2.1014250832989663,
      "grad_norm": 0.32366952300071716,
      "learning_rate": 5.835348536863399e-06,
      "loss": 0.0174,
      "step": 1284080
    },
    {
      "epoch": 2.10145781373762,
      "grad_norm": 0.6278426647186279,
      "learning_rate": 5.835282644649883e-06,
      "loss": 0.0192,
      "step": 1284100
    },
    {
      "epoch": 2.101490544176273,
      "grad_norm": Infinity,
      "learning_rate": 5.8352167524363645e-06,
      "loss": 0.0183,
      "step": 1284120
    },
    {
      "epoch": 2.1015232746149266,
      "grad_norm": 0.17497484385967255,
      "learning_rate": 5.835150860222848e-06,
      "loss": 0.0201,
      "step": 1284140
    },
    {
      "epoch": 2.1015560050535798,
      "grad_norm": 0.20105226337909698,
      "learning_rate": 5.835084968009332e-06,
      "loss": 0.0193,
      "step": 1284160
    },
    {
      "epoch": 2.101588735492233,
      "grad_norm": 0.8327932953834534,
      "learning_rate": 5.835019075795814e-06,
      "loss": 0.0188,
      "step": 1284180
    },
    {
      "epoch": 2.1016214659308865,
      "grad_norm": 0.3302505314350128,
      "learning_rate": 5.834953183582297e-06,
      "loss": 0.0157,
      "step": 1284200
    },
    {
      "epoch": 2.1016541963695397,
      "grad_norm": 0.1439913511276245,
      "learning_rate": 5.834887291368779e-06,
      "loss": 0.0175,
      "step": 1284220
    },
    {
      "epoch": 2.1016869268081932,
      "grad_norm": 0.2802972197532654,
      "learning_rate": 5.834821399155263e-06,
      "loss": 0.0137,
      "step": 1284240
    },
    {
      "epoch": 2.1017196572468464,
      "grad_norm": 0.47161149978637695,
      "learning_rate": 5.8347555069417446e-06,
      "loss": 0.0155,
      "step": 1284260
    },
    {
      "epoch": 2.1017523876855,
      "grad_norm": 0.6605032086372375,
      "learning_rate": 5.834689614728228e-06,
      "loss": 0.0153,
      "step": 1284280
    },
    {
      "epoch": 2.101785118124153,
      "grad_norm": 2.916027069091797,
      "learning_rate": 5.834623722514711e-06,
      "loss": 0.0251,
      "step": 1284300
    },
    {
      "epoch": 2.1018178485628063,
      "grad_norm": 0.22646450996398926,
      "learning_rate": 5.8345578303011945e-06,
      "loss": 0.0147,
      "step": 1284320
    },
    {
      "epoch": 2.10185057900146,
      "grad_norm": 0.057468682527542114,
      "learning_rate": 5.834491938087676e-06,
      "loss": 0.0165,
      "step": 1284340
    },
    {
      "epoch": 2.101883309440113,
      "grad_norm": 0.12522655725479126,
      "learning_rate": 5.83442604587416e-06,
      "loss": 0.0139,
      "step": 1284360
    },
    {
      "epoch": 2.1019160398787666,
      "grad_norm": 0.15652599930763245,
      "learning_rate": 5.834360153660642e-06,
      "loss": 0.0156,
      "step": 1284380
    },
    {
      "epoch": 2.10194877031742,
      "grad_norm": 0.4494170844554901,
      "learning_rate": 5.8342942614471254e-06,
      "loss": 0.012,
      "step": 1284400
    },
    {
      "epoch": 2.101981500756073,
      "grad_norm": 1.1783796548843384,
      "learning_rate": 5.834228369233607e-06,
      "loss": 0.019,
      "step": 1284420
    },
    {
      "epoch": 2.1020142311947265,
      "grad_norm": 0.22085845470428467,
      "learning_rate": 5.834162477020091e-06,
      "loss": 0.0105,
      "step": 1284440
    },
    {
      "epoch": 2.1020469616333797,
      "grad_norm": 1.3904318809509277,
      "learning_rate": 5.834096584806574e-06,
      "loss": 0.0182,
      "step": 1284460
    },
    {
      "epoch": 2.1020796920720333,
      "grad_norm": 2.0724103450775146,
      "learning_rate": 5.834030692593056e-06,
      "loss": 0.016,
      "step": 1284480
    },
    {
      "epoch": 2.1021124225106864,
      "grad_norm": 0.5760585069656372,
      "learning_rate": 5.83396480037954e-06,
      "loss": 0.0165,
      "step": 1284500
    },
    {
      "epoch": 2.10214515294934,
      "grad_norm": 0.33422431349754333,
      "learning_rate": 5.833898908166023e-06,
      "loss": 0.022,
      "step": 1284520
    },
    {
      "epoch": 2.102177883387993,
      "grad_norm": 1.0797878503799438,
      "learning_rate": 5.8338330159525054e-06,
      "loss": 0.0199,
      "step": 1284540
    },
    {
      "epoch": 2.1022106138266463,
      "grad_norm": 0.11461687833070755,
      "learning_rate": 5.833767123738988e-06,
      "loss": 0.0152,
      "step": 1284560
    },
    {
      "epoch": 2.1022433442653,
      "grad_norm": 1.1819392442703247,
      "learning_rate": 5.833701231525472e-06,
      "loss": 0.016,
      "step": 1284580
    },
    {
      "epoch": 2.102276074703953,
      "grad_norm": 0.15033674240112305,
      "learning_rate": 5.833635339311954e-06,
      "loss": 0.0199,
      "step": 1284600
    },
    {
      "epoch": 2.1023088051426067,
      "grad_norm": 0.3106253445148468,
      "learning_rate": 5.833569447098437e-06,
      "loss": 0.0128,
      "step": 1284620
    },
    {
      "epoch": 2.10234153558126,
      "grad_norm": 0.5812092423439026,
      "learning_rate": 5.833503554884919e-06,
      "loss": 0.018,
      "step": 1284640
    },
    {
      "epoch": 2.1023742660199134,
      "grad_norm": 0.2026299685239792,
      "learning_rate": 5.833437662671403e-06,
      "loss": 0.02,
      "step": 1284660
    },
    {
      "epoch": 2.1024069964585665,
      "grad_norm": 1.9739611148834229,
      "learning_rate": 5.8333717704578855e-06,
      "loss": 0.0148,
      "step": 1284680
    },
    {
      "epoch": 2.1024397268972197,
      "grad_norm": 0.16926081478595734,
      "learning_rate": 5.833305878244368e-06,
      "loss": 0.0137,
      "step": 1284700
    },
    {
      "epoch": 2.1024724573358733,
      "grad_norm": 0.5846485495567322,
      "learning_rate": 5.833239986030851e-06,
      "loss": 0.0136,
      "step": 1284720
    },
    {
      "epoch": 2.1025051877745264,
      "grad_norm": 0.5768111944198608,
      "learning_rate": 5.8331740938173345e-06,
      "loss": 0.0142,
      "step": 1284740
    },
    {
      "epoch": 2.10253791821318,
      "grad_norm": 0.10925935208797455,
      "learning_rate": 5.8331082016038164e-06,
      "loss": 0.0171,
      "step": 1284760
    },
    {
      "epoch": 2.102570648651833,
      "grad_norm": 0.16125941276550293,
      "learning_rate": 5.8330423093903e-06,
      "loss": 0.0127,
      "step": 1284780
    },
    {
      "epoch": 2.102603379090487,
      "grad_norm": 0.7294092178344727,
      "learning_rate": 5.832976417176782e-06,
      "loss": 0.0158,
      "step": 1284800
    },
    {
      "epoch": 2.10263610952914,
      "grad_norm": 0.2691476345062256,
      "learning_rate": 5.8329105249632655e-06,
      "loss": 0.0185,
      "step": 1284820
    },
    {
      "epoch": 2.102668839967793,
      "grad_norm": 0.49673381447792053,
      "learning_rate": 5.832844632749749e-06,
      "loss": 0.0154,
      "step": 1284840
    },
    {
      "epoch": 2.1027015704064467,
      "grad_norm": 0.4332250654697418,
      "learning_rate": 5.832778740536231e-06,
      "loss": 0.0258,
      "step": 1284860
    },
    {
      "epoch": 2.1027343008451,
      "grad_norm": 0.3704347014427185,
      "learning_rate": 5.8327128483227146e-06,
      "loss": 0.0119,
      "step": 1284880
    },
    {
      "epoch": 2.1027670312837534,
      "grad_norm": 0.26402825117111206,
      "learning_rate": 5.832646956109197e-06,
      "loss": 0.0143,
      "step": 1284900
    },
    {
      "epoch": 2.1027997617224066,
      "grad_norm": 0.47722136974334717,
      "learning_rate": 5.83258106389568e-06,
      "loss": 0.017,
      "step": 1284920
    },
    {
      "epoch": 2.1028324921610597,
      "grad_norm": 0.12754958868026733,
      "learning_rate": 5.832515171682163e-06,
      "loss": 0.0156,
      "step": 1284940
    },
    {
      "epoch": 2.1028652225997133,
      "grad_norm": 0.2859133780002594,
      "learning_rate": 5.832449279468646e-06,
      "loss": 0.0201,
      "step": 1284960
    },
    {
      "epoch": 2.1028979530383665,
      "grad_norm": 0.43513035774230957,
      "learning_rate": 5.832383387255128e-06,
      "loss": 0.0194,
      "step": 1284980
    },
    {
      "epoch": 2.10293068347702,
      "grad_norm": 0.24364866316318512,
      "learning_rate": 5.832317495041612e-06,
      "loss": 0.0139,
      "step": 1285000
    },
    {
      "epoch": 2.102963413915673,
      "grad_norm": 0.8566864132881165,
      "learning_rate": 5.832251602828094e-06,
      "loss": 0.0222,
      "step": 1285020
    },
    {
      "epoch": 2.102996144354327,
      "grad_norm": 0.22215746343135834,
      "learning_rate": 5.832185710614577e-06,
      "loss": 0.018,
      "step": 1285040
    },
    {
      "epoch": 2.10302887479298,
      "grad_norm": 0.41873687505722046,
      "learning_rate": 5.83211981840106e-06,
      "loss": 0.0157,
      "step": 1285060
    },
    {
      "epoch": 2.103061605231633,
      "grad_norm": 0.7824949026107788,
      "learning_rate": 5.832053926187543e-06,
      "loss": 0.0183,
      "step": 1285080
    },
    {
      "epoch": 2.1030943356702867,
      "grad_norm": 0.13580329716205597,
      "learning_rate": 5.8319880339740256e-06,
      "loss": 0.0135,
      "step": 1285100
    },
    {
      "epoch": 2.10312706610894,
      "grad_norm": 0.47080162167549133,
      "learning_rate": 5.831922141760509e-06,
      "loss": 0.016,
      "step": 1285120
    },
    {
      "epoch": 2.1031597965475934,
      "grad_norm": 1.796204924583435,
      "learning_rate": 5.831856249546991e-06,
      "loss": 0.0165,
      "step": 1285140
    },
    {
      "epoch": 2.1031925269862466,
      "grad_norm": 0.5997351408004761,
      "learning_rate": 5.831790357333475e-06,
      "loss": 0.0152,
      "step": 1285160
    },
    {
      "epoch": 2.1032252574249,
      "grad_norm": 0.3464522063732147,
      "learning_rate": 5.8317244651199565e-06,
      "loss": 0.0149,
      "step": 1285180
    },
    {
      "epoch": 2.1032579878635533,
      "grad_norm": 0.4251589775085449,
      "learning_rate": 5.83165857290644e-06,
      "loss": 0.0193,
      "step": 1285200
    },
    {
      "epoch": 2.1032907183022065,
      "grad_norm": 0.8038738369941711,
      "learning_rate": 5.831592680692924e-06,
      "loss": 0.0102,
      "step": 1285220
    },
    {
      "epoch": 2.10332344874086,
      "grad_norm": 0.3932332992553711,
      "learning_rate": 5.8315267884794056e-06,
      "loss": 0.0189,
      "step": 1285240
    },
    {
      "epoch": 2.1033561791795132,
      "grad_norm": 0.20412485301494598,
      "learning_rate": 5.831460896265889e-06,
      "loss": 0.0211,
      "step": 1285260
    },
    {
      "epoch": 2.103388909618167,
      "grad_norm": 1.0101286172866821,
      "learning_rate": 5.831395004052371e-06,
      "loss": 0.0185,
      "step": 1285280
    },
    {
      "epoch": 2.10342164005682,
      "grad_norm": 1.614515781402588,
      "learning_rate": 5.831329111838855e-06,
      "loss": 0.0177,
      "step": 1285300
    },
    {
      "epoch": 2.1034543704954736,
      "grad_norm": 0.21963298320770264,
      "learning_rate": 5.831263219625337e-06,
      "loss": 0.0226,
      "step": 1285320
    },
    {
      "epoch": 2.1034871009341267,
      "grad_norm": 0.15399442613124847,
      "learning_rate": 5.83119732741182e-06,
      "loss": 0.0173,
      "step": 1285340
    },
    {
      "epoch": 2.10351983137278,
      "grad_norm": 0.13736611604690552,
      "learning_rate": 5.831131435198303e-06,
      "loss": 0.0159,
      "step": 1285360
    },
    {
      "epoch": 2.1035525618114335,
      "grad_norm": 0.41555148363113403,
      "learning_rate": 5.8310655429847864e-06,
      "loss": 0.0206,
      "step": 1285380
    },
    {
      "epoch": 2.1035852922500866,
      "grad_norm": 0.63962322473526,
      "learning_rate": 5.830999650771268e-06,
      "loss": 0.0176,
      "step": 1285400
    },
    {
      "epoch": 2.10361802268874,
      "grad_norm": 0.6987993717193604,
      "learning_rate": 5.830933758557752e-06,
      "loss": 0.0133,
      "step": 1285420
    },
    {
      "epoch": 2.1036507531273934,
      "grad_norm": 0.18966546654701233,
      "learning_rate": 5.830867866344234e-06,
      "loss": 0.0136,
      "step": 1285440
    },
    {
      "epoch": 2.103683483566047,
      "grad_norm": 1.021276831626892,
      "learning_rate": 5.830801974130717e-06,
      "loss": 0.0168,
      "step": 1285460
    },
    {
      "epoch": 2.1037162140047,
      "grad_norm": 3.559708595275879,
      "learning_rate": 5.8307360819172e-06,
      "loss": 0.0323,
      "step": 1285480
    },
    {
      "epoch": 2.1037489444433533,
      "grad_norm": 0.5349791049957275,
      "learning_rate": 5.830670189703683e-06,
      "loss": 0.0202,
      "step": 1285500
    },
    {
      "epoch": 2.103781674882007,
      "grad_norm": 0.8756594657897949,
      "learning_rate": 5.830604297490166e-06,
      "loss": 0.0202,
      "step": 1285520
    },
    {
      "epoch": 2.10381440532066,
      "grad_norm": 0.3111042082309723,
      "learning_rate": 5.830538405276649e-06,
      "loss": 0.0161,
      "step": 1285540
    },
    {
      "epoch": 2.1038471357593136,
      "grad_norm": 0.2191125452518463,
      "learning_rate": 5.830472513063132e-06,
      "loss": 0.0147,
      "step": 1285560
    },
    {
      "epoch": 2.1038798661979667,
      "grad_norm": 0.24268192052841187,
      "learning_rate": 5.830406620849615e-06,
      "loss": 0.0124,
      "step": 1285580
    },
    {
      "epoch": 2.1039125966366203,
      "grad_norm": 0.2682915925979614,
      "learning_rate": 5.830340728636098e-06,
      "loss": 0.0104,
      "step": 1285600
    },
    {
      "epoch": 2.1039453270752735,
      "grad_norm": 0.0421418733894825,
      "learning_rate": 5.83027483642258e-06,
      "loss": 0.0222,
      "step": 1285620
    },
    {
      "epoch": 2.1039780575139266,
      "grad_norm": 0.5115531086921692,
      "learning_rate": 5.830208944209064e-06,
      "loss": 0.0164,
      "step": 1285640
    },
    {
      "epoch": 2.1040107879525802,
      "grad_norm": 0.490966796875,
      "learning_rate": 5.830143051995546e-06,
      "loss": 0.0173,
      "step": 1285660
    },
    {
      "epoch": 2.1040435183912334,
      "grad_norm": 0.3794567286968231,
      "learning_rate": 5.830077159782029e-06,
      "loss": 0.0145,
      "step": 1285680
    },
    {
      "epoch": 2.104076248829887,
      "grad_norm": 0.25244927406311035,
      "learning_rate": 5.830011267568512e-06,
      "loss": 0.0198,
      "step": 1285700
    },
    {
      "epoch": 2.10410897926854,
      "grad_norm": 0.16283559799194336,
      "learning_rate": 5.829945375354995e-06,
      "loss": 0.0117,
      "step": 1285720
    },
    {
      "epoch": 2.1041417097071937,
      "grad_norm": 1.209273099899292,
      "learning_rate": 5.8298794831414775e-06,
      "loss": 0.0171,
      "step": 1285740
    },
    {
      "epoch": 2.104174440145847,
      "grad_norm": 0.9053694605827332,
      "learning_rate": 5.829813590927961e-06,
      "loss": 0.0217,
      "step": 1285760
    },
    {
      "epoch": 2.1042071705845,
      "grad_norm": 0.49105679988861084,
      "learning_rate": 5.829747698714443e-06,
      "loss": 0.015,
      "step": 1285780
    },
    {
      "epoch": 2.1042399010231536,
      "grad_norm": 0.07876310497522354,
      "learning_rate": 5.8296818065009265e-06,
      "loss": 0.0151,
      "step": 1285800
    },
    {
      "epoch": 2.1042726314618068,
      "grad_norm": 0.3753175437450409,
      "learning_rate": 5.829615914287408e-06,
      "loss": 0.0152,
      "step": 1285820
    },
    {
      "epoch": 2.1043053619004604,
      "grad_norm": 0.08257712423801422,
      "learning_rate": 5.829550022073892e-06,
      "loss": 0.0112,
      "step": 1285840
    },
    {
      "epoch": 2.1043380923391135,
      "grad_norm": 0.2680864632129669,
      "learning_rate": 5.829484129860375e-06,
      "loss": 0.012,
      "step": 1285860
    },
    {
      "epoch": 2.1043708227777667,
      "grad_norm": 0.7960622310638428,
      "learning_rate": 5.8294182376468575e-06,
      "loss": 0.0126,
      "step": 1285880
    },
    {
      "epoch": 2.1044035532164203,
      "grad_norm": 1.2020831108093262,
      "learning_rate": 5.829352345433341e-06,
      "loss": 0.0178,
      "step": 1285900
    },
    {
      "epoch": 2.1044362836550734,
      "grad_norm": 0.30702778697013855,
      "learning_rate": 5.829286453219824e-06,
      "loss": 0.0127,
      "step": 1285920
    },
    {
      "epoch": 2.104469014093727,
      "grad_norm": 0.7477400898933411,
      "learning_rate": 5.8292205610063065e-06,
      "loss": 0.0184,
      "step": 1285940
    },
    {
      "epoch": 2.10450174453238,
      "grad_norm": 1.2571645975112915,
      "learning_rate": 5.829154668792789e-06,
      "loss": 0.0186,
      "step": 1285960
    },
    {
      "epoch": 2.1045344749710337,
      "grad_norm": 0.604170024394989,
      "learning_rate": 5.829088776579273e-06,
      "loss": 0.0175,
      "step": 1285980
    },
    {
      "epoch": 2.104567205409687,
      "grad_norm": 0.3147056996822357,
      "learning_rate": 5.829022884365755e-06,
      "loss": 0.0147,
      "step": 1286000
    },
    {
      "epoch": 2.10459993584834,
      "grad_norm": 0.11435417085886002,
      "learning_rate": 5.828956992152238e-06,
      "loss": 0.0168,
      "step": 1286020
    },
    {
      "epoch": 2.1046326662869936,
      "grad_norm": 0.26558342576026917,
      "learning_rate": 5.82889109993872e-06,
      "loss": 0.0099,
      "step": 1286040
    },
    {
      "epoch": 2.104665396725647,
      "grad_norm": 0.38179701566696167,
      "learning_rate": 5.828825207725204e-06,
      "loss": 0.0136,
      "step": 1286060
    },
    {
      "epoch": 2.1046981271643004,
      "grad_norm": 0.36325839161872864,
      "learning_rate": 5.8287593155116866e-06,
      "loss": 0.0154,
      "step": 1286080
    },
    {
      "epoch": 2.1047308576029535,
      "grad_norm": 0.5197582840919495,
      "learning_rate": 5.828693423298169e-06,
      "loss": 0.0153,
      "step": 1286100
    },
    {
      "epoch": 2.104763588041607,
      "grad_norm": 0.5200254321098328,
      "learning_rate": 5.828627531084652e-06,
      "loss": 0.0144,
      "step": 1286120
    },
    {
      "epoch": 2.1047963184802603,
      "grad_norm": 0.37194961309432983,
      "learning_rate": 5.828561638871136e-06,
      "loss": 0.0122,
      "step": 1286140
    },
    {
      "epoch": 2.1048290489189134,
      "grad_norm": 0.26432371139526367,
      "learning_rate": 5.8284957466576175e-06,
      "loss": 0.0163,
      "step": 1286160
    },
    {
      "epoch": 2.104861779357567,
      "grad_norm": 0.3692302703857422,
      "learning_rate": 5.828429854444101e-06,
      "loss": 0.0136,
      "step": 1286180
    },
    {
      "epoch": 2.10489450979622,
      "grad_norm": 0.8661428689956665,
      "learning_rate": 5.828363962230583e-06,
      "loss": 0.0134,
      "step": 1286200
    },
    {
      "epoch": 2.1049272402348738,
      "grad_norm": 0.4224189221858978,
      "learning_rate": 5.828298070017067e-06,
      "loss": 0.0203,
      "step": 1286220
    },
    {
      "epoch": 2.104959970673527,
      "grad_norm": 0.4034229815006256,
      "learning_rate": 5.8282321778035485e-06,
      "loss": 0.0155,
      "step": 1286240
    },
    {
      "epoch": 2.1049927011121805,
      "grad_norm": 0.17759016156196594,
      "learning_rate": 5.828166285590032e-06,
      "loss": 0.02,
      "step": 1286260
    },
    {
      "epoch": 2.1050254315508337,
      "grad_norm": 0.23196955025196075,
      "learning_rate": 5.828100393376516e-06,
      "loss": 0.0145,
      "step": 1286280
    },
    {
      "epoch": 2.105058161989487,
      "grad_norm": 0.1806069165468216,
      "learning_rate": 5.8280345011629976e-06,
      "loss": 0.0139,
      "step": 1286300
    },
    {
      "epoch": 2.1050908924281404,
      "grad_norm": 0.23306192457675934,
      "learning_rate": 5.827968608949481e-06,
      "loss": 0.0252,
      "step": 1286320
    },
    {
      "epoch": 2.1051236228667936,
      "grad_norm": 0.34025293588638306,
      "learning_rate": 5.827902716735964e-06,
      "loss": 0.0122,
      "step": 1286340
    },
    {
      "epoch": 2.105156353305447,
      "grad_norm": 0.17502595484256744,
      "learning_rate": 5.827836824522447e-06,
      "loss": 0.0131,
      "step": 1286360
    },
    {
      "epoch": 2.1051890837441003,
      "grad_norm": 0.3059183359146118,
      "learning_rate": 5.827770932308929e-06,
      "loss": 0.0158,
      "step": 1286380
    },
    {
      "epoch": 2.105221814182754,
      "grad_norm": 0.589297890663147,
      "learning_rate": 5.827705040095413e-06,
      "loss": 0.0135,
      "step": 1286400
    },
    {
      "epoch": 2.105254544621407,
      "grad_norm": 0.15580861270427704,
      "learning_rate": 5.827639147881895e-06,
      "loss": 0.0205,
      "step": 1286420
    },
    {
      "epoch": 2.10528727506006,
      "grad_norm": 0.3599512577056885,
      "learning_rate": 5.8275732556683784e-06,
      "loss": 0.0199,
      "step": 1286440
    },
    {
      "epoch": 2.105320005498714,
      "grad_norm": 0.2663929760456085,
      "learning_rate": 5.82750736345486e-06,
      "loss": 0.0191,
      "step": 1286460
    },
    {
      "epoch": 2.105352735937367,
      "grad_norm": 1.3632895946502686,
      "learning_rate": 5.827441471241344e-06,
      "loss": 0.0151,
      "step": 1286480
    },
    {
      "epoch": 2.1053854663760205,
      "grad_norm": 0.1937473565340042,
      "learning_rate": 5.827375579027827e-06,
      "loss": 0.0135,
      "step": 1286500
    },
    {
      "epoch": 2.1054181968146737,
      "grad_norm": 0.4870234429836273,
      "learning_rate": 5.827309686814309e-06,
      "loss": 0.0171,
      "step": 1286520
    },
    {
      "epoch": 2.105450927253327,
      "grad_norm": 0.3772750198841095,
      "learning_rate": 5.827243794600792e-06,
      "loss": 0.0142,
      "step": 1286540
    },
    {
      "epoch": 2.1054836576919804,
      "grad_norm": 0.947192907333374,
      "learning_rate": 5.827177902387276e-06,
      "loss": 0.0115,
      "step": 1286560
    },
    {
      "epoch": 2.1055163881306336,
      "grad_norm": 0.2777178883552551,
      "learning_rate": 5.827112010173758e-06,
      "loss": 0.0203,
      "step": 1286580
    },
    {
      "epoch": 2.105549118569287,
      "grad_norm": 0.1661522090435028,
      "learning_rate": 5.827046117960241e-06,
      "loss": 0.0102,
      "step": 1286600
    },
    {
      "epoch": 2.1055818490079403,
      "grad_norm": 0.5223851799964905,
      "learning_rate": 5.826980225746725e-06,
      "loss": 0.0143,
      "step": 1286620
    },
    {
      "epoch": 2.105614579446594,
      "grad_norm": 0.8640297651290894,
      "learning_rate": 5.826914333533207e-06,
      "loss": 0.0153,
      "step": 1286640
    },
    {
      "epoch": 2.105647309885247,
      "grad_norm": 0.5837185978889465,
      "learning_rate": 5.82684844131969e-06,
      "loss": 0.0144,
      "step": 1286660
    },
    {
      "epoch": 2.1056800403239,
      "grad_norm": 0.567203938961029,
      "learning_rate": 5.826782549106172e-06,
      "loss": 0.0151,
      "step": 1286680
    },
    {
      "epoch": 2.105712770762554,
      "grad_norm": 0.29622071981430054,
      "learning_rate": 5.826716656892656e-06,
      "loss": 0.0148,
      "step": 1286700
    },
    {
      "epoch": 2.105745501201207,
      "grad_norm": 0.34373533725738525,
      "learning_rate": 5.8266507646791385e-06,
      "loss": 0.0144,
      "step": 1286720
    },
    {
      "epoch": 2.1057782316398606,
      "grad_norm": 0.7960655093193054,
      "learning_rate": 5.826584872465621e-06,
      "loss": 0.019,
      "step": 1286740
    },
    {
      "epoch": 2.1058109620785137,
      "grad_norm": 0.4074352979660034,
      "learning_rate": 5.826518980252104e-06,
      "loss": 0.0243,
      "step": 1286760
    },
    {
      "epoch": 2.1058436925171673,
      "grad_norm": 0.2153722494840622,
      "learning_rate": 5.8264530880385875e-06,
      "loss": 0.0157,
      "step": 1286780
    },
    {
      "epoch": 2.1058764229558204,
      "grad_norm": 0.4170900881290436,
      "learning_rate": 5.8263871958250694e-06,
      "loss": 0.0125,
      "step": 1286800
    },
    {
      "epoch": 2.1059091533944736,
      "grad_norm": 1.5674364566802979,
      "learning_rate": 5.826321303611553e-06,
      "loss": 0.015,
      "step": 1286820
    },
    {
      "epoch": 2.105941883833127,
      "grad_norm": 0.10353586077690125,
      "learning_rate": 5.826255411398035e-06,
      "loss": 0.024,
      "step": 1286840
    },
    {
      "epoch": 2.1059746142717803,
      "grad_norm": 0.3913315534591675,
      "learning_rate": 5.8261895191845185e-06,
      "loss": 0.0177,
      "step": 1286860
    },
    {
      "epoch": 2.106007344710434,
      "grad_norm": 0.4874746799468994,
      "learning_rate": 5.826123626971001e-06,
      "loss": 0.0177,
      "step": 1286880
    },
    {
      "epoch": 2.106040075149087,
      "grad_norm": 0.39451250433921814,
      "learning_rate": 5.826057734757484e-06,
      "loss": 0.0119,
      "step": 1286900
    },
    {
      "epoch": 2.1060728055877407,
      "grad_norm": 0.3017738461494446,
      "learning_rate": 5.825991842543967e-06,
      "loss": 0.0124,
      "step": 1286920
    },
    {
      "epoch": 2.106105536026394,
      "grad_norm": 0.7238309383392334,
      "learning_rate": 5.82592595033045e-06,
      "loss": 0.0132,
      "step": 1286940
    },
    {
      "epoch": 2.106138266465047,
      "grad_norm": 0.3129381835460663,
      "learning_rate": 5.825860058116933e-06,
      "loss": 0.0153,
      "step": 1286960
    },
    {
      "epoch": 2.1061709969037006,
      "grad_norm": 0.43646055459976196,
      "learning_rate": 5.825794165903416e-06,
      "loss": 0.0116,
      "step": 1286980
    },
    {
      "epoch": 2.1062037273423537,
      "grad_norm": 0.1556576043367386,
      "learning_rate": 5.825728273689899e-06,
      "loss": 0.0171,
      "step": 1287000
    },
    {
      "epoch": 2.1062364577810073,
      "grad_norm": 0.20055614411830902,
      "learning_rate": 5.825662381476381e-06,
      "loss": 0.0191,
      "step": 1287020
    },
    {
      "epoch": 2.1062691882196605,
      "grad_norm": 0.7328333258628845,
      "learning_rate": 5.825596489262865e-06,
      "loss": 0.009,
      "step": 1287040
    },
    {
      "epoch": 2.106301918658314,
      "grad_norm": 0.14555580914020538,
      "learning_rate": 5.825530597049347e-06,
      "loss": 0.014,
      "step": 1287060
    },
    {
      "epoch": 2.106334649096967,
      "grad_norm": 0.7508498430252075,
      "learning_rate": 5.82546470483583e-06,
      "loss": 0.0158,
      "step": 1287080
    },
    {
      "epoch": 2.1063673795356204,
      "grad_norm": 0.34441494941711426,
      "learning_rate": 5.825398812622313e-06,
      "loss": 0.0152,
      "step": 1287100
    },
    {
      "epoch": 2.106400109974274,
      "grad_norm": 1.1258624792099,
      "learning_rate": 5.825332920408796e-06,
      "loss": 0.0156,
      "step": 1287120
    },
    {
      "epoch": 2.106432840412927,
      "grad_norm": 0.7494853138923645,
      "learning_rate": 5.8252670281952786e-06,
      "loss": 0.0152,
      "step": 1287140
    },
    {
      "epoch": 2.1064655708515807,
      "grad_norm": 0.7123311161994934,
      "learning_rate": 5.825201135981762e-06,
      "loss": 0.0136,
      "step": 1287160
    },
    {
      "epoch": 2.106498301290234,
      "grad_norm": 0.21709544956684113,
      "learning_rate": 5.825135243768244e-06,
      "loss": 0.0176,
      "step": 1287180
    },
    {
      "epoch": 2.1065310317288874,
      "grad_norm": 0.4231346547603607,
      "learning_rate": 5.825069351554728e-06,
      "loss": 0.0179,
      "step": 1287200
    },
    {
      "epoch": 2.1065637621675406,
      "grad_norm": 0.39684736728668213,
      "learning_rate": 5.8250034593412095e-06,
      "loss": 0.0171,
      "step": 1287220
    },
    {
      "epoch": 2.1065964926061937,
      "grad_norm": 0.6134167909622192,
      "learning_rate": 5.824937567127693e-06,
      "loss": 0.015,
      "step": 1287240
    },
    {
      "epoch": 2.1066292230448473,
      "grad_norm": 0.18572698533535004,
      "learning_rate": 5.824871674914175e-06,
      "loss": 0.0189,
      "step": 1287260
    },
    {
      "epoch": 2.1066619534835005,
      "grad_norm": 0.28599411249160767,
      "learning_rate": 5.824805782700659e-06,
      "loss": 0.0175,
      "step": 1287280
    },
    {
      "epoch": 2.106694683922154,
      "grad_norm": 0.48092028498649597,
      "learning_rate": 5.824739890487141e-06,
      "loss": 0.0213,
      "step": 1287300
    },
    {
      "epoch": 2.1067274143608072,
      "grad_norm": 0.13658106327056885,
      "learning_rate": 5.824673998273624e-06,
      "loss": 0.0113,
      "step": 1287320
    },
    {
      "epoch": 2.1067601447994604,
      "grad_norm": 0.0483047254383564,
      "learning_rate": 5.824608106060108e-06,
      "loss": 0.011,
      "step": 1287340
    },
    {
      "epoch": 2.106792875238114,
      "grad_norm": 0.6651447415351868,
      "learning_rate": 5.82454221384659e-06,
      "loss": 0.02,
      "step": 1287360
    },
    {
      "epoch": 2.106825605676767,
      "grad_norm": 0.37000733613967896,
      "learning_rate": 5.824476321633073e-06,
      "loss": 0.0129,
      "step": 1287380
    },
    {
      "epoch": 2.1068583361154207,
      "grad_norm": 0.34826016426086426,
      "learning_rate": 5.824410429419556e-06,
      "loss": 0.0226,
      "step": 1287400
    },
    {
      "epoch": 2.106891066554074,
      "grad_norm": 0.08332014083862305,
      "learning_rate": 5.8243445372060395e-06,
      "loss": 0.014,
      "step": 1287420
    },
    {
      "epoch": 2.1069237969927275,
      "grad_norm": 0.5627363920211792,
      "learning_rate": 5.824278644992521e-06,
      "loss": 0.0116,
      "step": 1287440
    },
    {
      "epoch": 2.1069565274313806,
      "grad_norm": 0.19416356086730957,
      "learning_rate": 5.824212752779005e-06,
      "loss": 0.0138,
      "step": 1287460
    },
    {
      "epoch": 2.1069892578700338,
      "grad_norm": 0.2602332830429077,
      "learning_rate": 5.824146860565487e-06,
      "loss": 0.0153,
      "step": 1287480
    },
    {
      "epoch": 2.1070219883086874,
      "grad_norm": 0.6144285798072815,
      "learning_rate": 5.82408096835197e-06,
      "loss": 0.0184,
      "step": 1287500
    },
    {
      "epoch": 2.1070547187473405,
      "grad_norm": 0.6329834461212158,
      "learning_rate": 5.824015076138453e-06,
      "loss": 0.0136,
      "step": 1287520
    },
    {
      "epoch": 2.107087449185994,
      "grad_norm": 0.7216256856918335,
      "learning_rate": 5.823949183924936e-06,
      "loss": 0.0173,
      "step": 1287540
    },
    {
      "epoch": 2.1071201796246473,
      "grad_norm": 0.5281270146369934,
      "learning_rate": 5.823883291711419e-06,
      "loss": 0.0179,
      "step": 1287560
    },
    {
      "epoch": 2.107152910063301,
      "grad_norm": 0.4183557331562042,
      "learning_rate": 5.823817399497902e-06,
      "loss": 0.015,
      "step": 1287580
    },
    {
      "epoch": 2.107185640501954,
      "grad_norm": 0.2926202118396759,
      "learning_rate": 5.823751507284384e-06,
      "loss": 0.0097,
      "step": 1287600
    },
    {
      "epoch": 2.107218370940607,
      "grad_norm": 0.6796712279319763,
      "learning_rate": 5.823685615070868e-06,
      "loss": 0.0207,
      "step": 1287620
    },
    {
      "epoch": 2.1072511013792607,
      "grad_norm": 0.40083229541778564,
      "learning_rate": 5.82361972285735e-06,
      "loss": 0.0124,
      "step": 1287640
    },
    {
      "epoch": 2.107283831817914,
      "grad_norm": 0.12580466270446777,
      "learning_rate": 5.823553830643833e-06,
      "loss": 0.0131,
      "step": 1287660
    },
    {
      "epoch": 2.1073165622565675,
      "grad_norm": 0.9753881096839905,
      "learning_rate": 5.823487938430317e-06,
      "loss": 0.0122,
      "step": 1287680
    },
    {
      "epoch": 2.1073492926952206,
      "grad_norm": 0.6927027702331543,
      "learning_rate": 5.823422046216799e-06,
      "loss": 0.0165,
      "step": 1287700
    },
    {
      "epoch": 2.1073820231338742,
      "grad_norm": 0.4516713619232178,
      "learning_rate": 5.823356154003282e-06,
      "loss": 0.0179,
      "step": 1287720
    },
    {
      "epoch": 2.1074147535725274,
      "grad_norm": 0.5023267269134521,
      "learning_rate": 5.823290261789765e-06,
      "loss": 0.0235,
      "step": 1287740
    },
    {
      "epoch": 2.1074474840111805,
      "grad_norm": 0.16009610891342163,
      "learning_rate": 5.823224369576248e-06,
      "loss": 0.0111,
      "step": 1287760
    },
    {
      "epoch": 2.107480214449834,
      "grad_norm": 0.12856927514076233,
      "learning_rate": 5.8231584773627305e-06,
      "loss": 0.0128,
      "step": 1287780
    },
    {
      "epoch": 2.1075129448884873,
      "grad_norm": 0.11949322372674942,
      "learning_rate": 5.823092585149214e-06,
      "loss": 0.0184,
      "step": 1287800
    },
    {
      "epoch": 2.107545675327141,
      "grad_norm": 0.4704146385192871,
      "learning_rate": 5.823026692935696e-06,
      "loss": 0.0153,
      "step": 1287820
    },
    {
      "epoch": 2.107578405765794,
      "grad_norm": 0.23980151116847992,
      "learning_rate": 5.8229608007221795e-06,
      "loss": 0.0154,
      "step": 1287840
    },
    {
      "epoch": 2.1076111362044476,
      "grad_norm": 0.28049010038375854,
      "learning_rate": 5.822894908508661e-06,
      "loss": 0.015,
      "step": 1287860
    },
    {
      "epoch": 2.1076438666431008,
      "grad_norm": 0.5420886278152466,
      "learning_rate": 5.822829016295145e-06,
      "loss": 0.0123,
      "step": 1287880
    },
    {
      "epoch": 2.107676597081754,
      "grad_norm": 0.3286980092525482,
      "learning_rate": 5.822763124081628e-06,
      "loss": 0.0159,
      "step": 1287900
    },
    {
      "epoch": 2.1077093275204075,
      "grad_norm": 0.2652670741081238,
      "learning_rate": 5.8226972318681105e-06,
      "loss": 0.0155,
      "step": 1287920
    },
    {
      "epoch": 2.1077420579590607,
      "grad_norm": 0.16279619932174683,
      "learning_rate": 5.822631339654593e-06,
      "loss": 0.018,
      "step": 1287940
    },
    {
      "epoch": 2.1077747883977143,
      "grad_norm": 0.28446510434150696,
      "learning_rate": 5.822565447441077e-06,
      "loss": 0.0108,
      "step": 1287960
    },
    {
      "epoch": 2.1078075188363674,
      "grad_norm": 1.1312323808670044,
      "learning_rate": 5.822499555227559e-06,
      "loss": 0.011,
      "step": 1287980
    },
    {
      "epoch": 2.1078402492750206,
      "grad_norm": 0.43172523379325867,
      "learning_rate": 5.822433663014042e-06,
      "loss": 0.014,
      "step": 1288000
    },
    {
      "epoch": 2.107872979713674,
      "grad_norm": 0.3752140700817108,
      "learning_rate": 5.822367770800526e-06,
      "loss": 0.0145,
      "step": 1288020
    },
    {
      "epoch": 2.1079057101523273,
      "grad_norm": 0.45086830854415894,
      "learning_rate": 5.822301878587008e-06,
      "loss": 0.0153,
      "step": 1288040
    },
    {
      "epoch": 2.107938440590981,
      "grad_norm": 0.13812877237796783,
      "learning_rate": 5.822235986373491e-06,
      "loss": 0.0203,
      "step": 1288060
    },
    {
      "epoch": 2.107971171029634,
      "grad_norm": 0.5459376573562622,
      "learning_rate": 5.822170094159973e-06,
      "loss": 0.016,
      "step": 1288080
    },
    {
      "epoch": 2.1080039014682876,
      "grad_norm": 0.21458213031291962,
      "learning_rate": 5.822104201946457e-06,
      "loss": 0.0192,
      "step": 1288100
    },
    {
      "epoch": 2.108036631906941,
      "grad_norm": 0.5830003023147583,
      "learning_rate": 5.822038309732939e-06,
      "loss": 0.0138,
      "step": 1288120
    },
    {
      "epoch": 2.108069362345594,
      "grad_norm": 0.5848473906517029,
      "learning_rate": 5.821972417519422e-06,
      "loss": 0.015,
      "step": 1288140
    },
    {
      "epoch": 2.1081020927842475,
      "grad_norm": 0.3673214912414551,
      "learning_rate": 5.821906525305905e-06,
      "loss": 0.0099,
      "step": 1288160
    },
    {
      "epoch": 2.1081348232229007,
      "grad_norm": 0.20882076025009155,
      "learning_rate": 5.821840633092389e-06,
      "loss": 0.0147,
      "step": 1288180
    },
    {
      "epoch": 2.1081675536615543,
      "grad_norm": 0.5449816584587097,
      "learning_rate": 5.8217747408788705e-06,
      "loss": 0.0218,
      "step": 1288200
    },
    {
      "epoch": 2.1082002841002074,
      "grad_norm": 0.16951829195022583,
      "learning_rate": 5.821708848665354e-06,
      "loss": 0.0168,
      "step": 1288220
    },
    {
      "epoch": 2.108233014538861,
      "grad_norm": 0.67322838306427,
      "learning_rate": 5.821642956451836e-06,
      "loss": 0.0123,
      "step": 1288240
    },
    {
      "epoch": 2.108265744977514,
      "grad_norm": 0.9874262809753418,
      "learning_rate": 5.82157706423832e-06,
      "loss": 0.0227,
      "step": 1288260
    },
    {
      "epoch": 2.1082984754161673,
      "grad_norm": 0.09464070945978165,
      "learning_rate": 5.8215111720248015e-06,
      "loss": 0.023,
      "step": 1288280
    },
    {
      "epoch": 2.108331205854821,
      "grad_norm": 1.1887598037719727,
      "learning_rate": 5.821445279811285e-06,
      "loss": 0.0194,
      "step": 1288300
    },
    {
      "epoch": 2.108363936293474,
      "grad_norm": 0.13524624705314636,
      "learning_rate": 5.821379387597768e-06,
      "loss": 0.0161,
      "step": 1288320
    },
    {
      "epoch": 2.1083966667321277,
      "grad_norm": 0.4141078293323517,
      "learning_rate": 5.8213134953842506e-06,
      "loss": 0.0155,
      "step": 1288340
    },
    {
      "epoch": 2.108429397170781,
      "grad_norm": 0.3246254622936249,
      "learning_rate": 5.821247603170734e-06,
      "loss": 0.0125,
      "step": 1288360
    },
    {
      "epoch": 2.1084621276094344,
      "grad_norm": 0.29341956973075867,
      "learning_rate": 5.821181710957217e-06,
      "loss": 0.0178,
      "step": 1288380
    },
    {
      "epoch": 2.1084948580480876,
      "grad_norm": 0.43870359659194946,
      "learning_rate": 5.8211158187437e-06,
      "loss": 0.0197,
      "step": 1288400
    },
    {
      "epoch": 2.1085275884867407,
      "grad_norm": 0.9408564567565918,
      "learning_rate": 5.821049926530182e-06,
      "loss": 0.0183,
      "step": 1288420
    },
    {
      "epoch": 2.1085603189253943,
      "grad_norm": 0.2202371060848236,
      "learning_rate": 5.820984034316666e-06,
      "loss": 0.0119,
      "step": 1288440
    },
    {
      "epoch": 2.1085930493640475,
      "grad_norm": 0.552619457244873,
      "learning_rate": 5.820918142103148e-06,
      "loss": 0.0118,
      "step": 1288460
    },
    {
      "epoch": 2.108625779802701,
      "grad_norm": 0.3158070147037506,
      "learning_rate": 5.8208522498896314e-06,
      "loss": 0.0143,
      "step": 1288480
    },
    {
      "epoch": 2.108658510241354,
      "grad_norm": 0.6016395092010498,
      "learning_rate": 5.820786357676113e-06,
      "loss": 0.017,
      "step": 1288500
    },
    {
      "epoch": 2.108691240680008,
      "grad_norm": 0.3920900821685791,
      "learning_rate": 5.820720465462597e-06,
      "loss": 0.0165,
      "step": 1288520
    },
    {
      "epoch": 2.108723971118661,
      "grad_norm": 0.5060225129127502,
      "learning_rate": 5.82065457324908e-06,
      "loss": 0.0139,
      "step": 1288540
    },
    {
      "epoch": 2.108756701557314,
      "grad_norm": 0.3463969826698303,
      "learning_rate": 5.820588681035562e-06,
      "loss": 0.0164,
      "step": 1288560
    },
    {
      "epoch": 2.1087894319959677,
      "grad_norm": 0.245114266872406,
      "learning_rate": 5.820522788822045e-06,
      "loss": 0.0092,
      "step": 1288580
    },
    {
      "epoch": 2.108822162434621,
      "grad_norm": 0.44493749737739563,
      "learning_rate": 5.820456896608529e-06,
      "loss": 0.013,
      "step": 1288600
    },
    {
      "epoch": 2.1088548928732744,
      "grad_norm": 0.49789926409721375,
      "learning_rate": 5.820391004395011e-06,
      "loss": 0.0197,
      "step": 1288620
    },
    {
      "epoch": 2.1088876233119276,
      "grad_norm": 0.7274608612060547,
      "learning_rate": 5.820325112181494e-06,
      "loss": 0.0141,
      "step": 1288640
    },
    {
      "epoch": 2.108920353750581,
      "grad_norm": 0.4334161579608917,
      "learning_rate": 5.820259219967976e-06,
      "loss": 0.022,
      "step": 1288660
    },
    {
      "epoch": 2.1089530841892343,
      "grad_norm": 0.3383912444114685,
      "learning_rate": 5.82019332775446e-06,
      "loss": 0.011,
      "step": 1288680
    },
    {
      "epoch": 2.1089858146278875,
      "grad_norm": 0.5951297283172607,
      "learning_rate": 5.820127435540942e-06,
      "loss": 0.0209,
      "step": 1288700
    },
    {
      "epoch": 2.109018545066541,
      "grad_norm": 0.5430933833122253,
      "learning_rate": 5.820061543327425e-06,
      "loss": 0.0168,
      "step": 1288720
    },
    {
      "epoch": 2.1090512755051942,
      "grad_norm": 0.2427472472190857,
      "learning_rate": 5.819995651113909e-06,
      "loss": 0.0237,
      "step": 1288740
    },
    {
      "epoch": 2.109084005943848,
      "grad_norm": 0.5359477400779724,
      "learning_rate": 5.8199297589003915e-06,
      "loss": 0.0174,
      "step": 1288760
    },
    {
      "epoch": 2.109116736382501,
      "grad_norm": 0.18386343121528625,
      "learning_rate": 5.819863866686874e-06,
      "loss": 0.0153,
      "step": 1288780
    },
    {
      "epoch": 2.1091494668211546,
      "grad_norm": 0.34743648767471313,
      "learning_rate": 5.819797974473357e-06,
      "loss": 0.0154,
      "step": 1288800
    },
    {
      "epoch": 2.1091821972598077,
      "grad_norm": 0.5130820274353027,
      "learning_rate": 5.8197320822598406e-06,
      "loss": 0.0113,
      "step": 1288820
    },
    {
      "epoch": 2.109214927698461,
      "grad_norm": 0.6615220904350281,
      "learning_rate": 5.8196661900463224e-06,
      "loss": 0.0188,
      "step": 1288840
    },
    {
      "epoch": 2.1092476581371145,
      "grad_norm": 0.4789227545261383,
      "learning_rate": 5.819600297832806e-06,
      "loss": 0.0136,
      "step": 1288860
    },
    {
      "epoch": 2.1092803885757676,
      "grad_norm": 0.34844478964805603,
      "learning_rate": 5.819534405619288e-06,
      "loss": 0.0132,
      "step": 1288880
    },
    {
      "epoch": 2.109313119014421,
      "grad_norm": 0.22898384928703308,
      "learning_rate": 5.8194685134057715e-06,
      "loss": 0.0103,
      "step": 1288900
    },
    {
      "epoch": 2.1093458494530744,
      "grad_norm": 0.1215178593993187,
      "learning_rate": 5.819402621192254e-06,
      "loss": 0.0142,
      "step": 1288920
    },
    {
      "epoch": 2.1093785798917275,
      "grad_norm": 0.46796074509620667,
      "learning_rate": 5.819336728978737e-06,
      "loss": 0.012,
      "step": 1288940
    },
    {
      "epoch": 2.109411310330381,
      "grad_norm": 0.31627216935157776,
      "learning_rate": 5.81927083676522e-06,
      "loss": 0.012,
      "step": 1288960
    },
    {
      "epoch": 2.1094440407690342,
      "grad_norm": 1.038404107093811,
      "learning_rate": 5.819204944551703e-06,
      "loss": 0.0153,
      "step": 1288980
    },
    {
      "epoch": 2.109476771207688,
      "grad_norm": 0.7350748777389526,
      "learning_rate": 5.819139052338185e-06,
      "loss": 0.0178,
      "step": 1289000
    },
    {
      "epoch": 2.109509501646341,
      "grad_norm": 0.8021010756492615,
      "learning_rate": 5.819073160124669e-06,
      "loss": 0.019,
      "step": 1289020
    },
    {
      "epoch": 2.1095422320849946,
      "grad_norm": 0.34468892216682434,
      "learning_rate": 5.819007267911151e-06,
      "loss": 0.014,
      "step": 1289040
    },
    {
      "epoch": 2.1095749625236477,
      "grad_norm": 0.303504079580307,
      "learning_rate": 5.818941375697634e-06,
      "loss": 0.016,
      "step": 1289060
    },
    {
      "epoch": 2.109607692962301,
      "grad_norm": 0.9725534915924072,
      "learning_rate": 5.818875483484118e-06,
      "loss": 0.0175,
      "step": 1289080
    },
    {
      "epoch": 2.1096404234009545,
      "grad_norm": 0.12007691711187363,
      "learning_rate": 5.8188095912706e-06,
      "loss": 0.017,
      "step": 1289100
    },
    {
      "epoch": 2.1096731538396076,
      "grad_norm": 0.14906233549118042,
      "learning_rate": 5.818743699057083e-06,
      "loss": 0.0166,
      "step": 1289120
    },
    {
      "epoch": 2.1097058842782612,
      "grad_norm": 0.1113562136888504,
      "learning_rate": 5.818677806843565e-06,
      "loss": 0.0148,
      "step": 1289140
    },
    {
      "epoch": 2.1097386147169144,
      "grad_norm": 0.30165526270866394,
      "learning_rate": 5.818611914630049e-06,
      "loss": 0.0213,
      "step": 1289160
    },
    {
      "epoch": 2.109771345155568,
      "grad_norm": 0.44610559940338135,
      "learning_rate": 5.8185460224165316e-06,
      "loss": 0.0202,
      "step": 1289180
    },
    {
      "epoch": 2.109804075594221,
      "grad_norm": 0.5024926066398621,
      "learning_rate": 5.818480130203014e-06,
      "loss": 0.0167,
      "step": 1289200
    },
    {
      "epoch": 2.1098368060328743,
      "grad_norm": 0.3105550706386566,
      "learning_rate": 5.818414237989497e-06,
      "loss": 0.0155,
      "step": 1289220
    },
    {
      "epoch": 2.109869536471528,
      "grad_norm": 0.3577927350997925,
      "learning_rate": 5.818348345775981e-06,
      "loss": 0.016,
      "step": 1289240
    },
    {
      "epoch": 2.109902266910181,
      "grad_norm": 0.2822585999965668,
      "learning_rate": 5.8182824535624625e-06,
      "loss": 0.0182,
      "step": 1289260
    },
    {
      "epoch": 2.1099349973488346,
      "grad_norm": 0.8878886103630066,
      "learning_rate": 5.818216561348946e-06,
      "loss": 0.0112,
      "step": 1289280
    },
    {
      "epoch": 2.1099677277874878,
      "grad_norm": 0.07378142327070236,
      "learning_rate": 5.818150669135428e-06,
      "loss": 0.0177,
      "step": 1289300
    },
    {
      "epoch": 2.1100004582261414,
      "grad_norm": 0.3275459110736847,
      "learning_rate": 5.818084776921912e-06,
      "loss": 0.0145,
      "step": 1289320
    },
    {
      "epoch": 2.1100331886647945,
      "grad_norm": 0.5798177719116211,
      "learning_rate": 5.818018884708394e-06,
      "loss": 0.0169,
      "step": 1289340
    },
    {
      "epoch": 2.1100659191034477,
      "grad_norm": 0.4705217778682709,
      "learning_rate": 5.817952992494877e-06,
      "loss": 0.0139,
      "step": 1289360
    },
    {
      "epoch": 2.1100986495421012,
      "grad_norm": 0.2245531529188156,
      "learning_rate": 5.81788710028136e-06,
      "loss": 0.0102,
      "step": 1289380
    },
    {
      "epoch": 2.1101313799807544,
      "grad_norm": 0.1807967722415924,
      "learning_rate": 5.817821208067843e-06,
      "loss": 0.017,
      "step": 1289400
    },
    {
      "epoch": 2.110164110419408,
      "grad_norm": 1.0147085189819336,
      "learning_rate": 5.817755315854326e-06,
      "loss": 0.017,
      "step": 1289420
    },
    {
      "epoch": 2.110196840858061,
      "grad_norm": 0.44774287939071655,
      "learning_rate": 5.817689423640809e-06,
      "loss": 0.0132,
      "step": 1289440
    },
    {
      "epoch": 2.1102295712967143,
      "grad_norm": 0.40754517912864685,
      "learning_rate": 5.8176235314272925e-06,
      "loss": 0.0185,
      "step": 1289460
    },
    {
      "epoch": 2.110262301735368,
      "grad_norm": 0.07560837268829346,
      "learning_rate": 5.817557639213774e-06,
      "loss": 0.013,
      "step": 1289480
    },
    {
      "epoch": 2.110295032174021,
      "grad_norm": 0.31441301107406616,
      "learning_rate": 5.817491747000258e-06,
      "loss": 0.0089,
      "step": 1289500
    },
    {
      "epoch": 2.1103277626126746,
      "grad_norm": 0.3369591236114502,
      "learning_rate": 5.81742585478674e-06,
      "loss": 0.014,
      "step": 1289520
    },
    {
      "epoch": 2.110360493051328,
      "grad_norm": 1.6042429208755493,
      "learning_rate": 5.817359962573223e-06,
      "loss": 0.0152,
      "step": 1289540
    },
    {
      "epoch": 2.1103932234899814,
      "grad_norm": 0.22691656649112701,
      "learning_rate": 5.817294070359706e-06,
      "loss": 0.0113,
      "step": 1289560
    },
    {
      "epoch": 2.1104259539286345,
      "grad_norm": 0.11293310672044754,
      "learning_rate": 5.817228178146189e-06,
      "loss": 0.0138,
      "step": 1289580
    },
    {
      "epoch": 2.1104586843672877,
      "grad_norm": 0.4006633162498474,
      "learning_rate": 5.817162285932672e-06,
      "loss": 0.0183,
      "step": 1289600
    },
    {
      "epoch": 2.1104914148059413,
      "grad_norm": 0.3253595530986786,
      "learning_rate": 5.817096393719155e-06,
      "loss": 0.0119,
      "step": 1289620
    },
    {
      "epoch": 2.1105241452445944,
      "grad_norm": 0.3446275591850281,
      "learning_rate": 5.817030501505637e-06,
      "loss": 0.0177,
      "step": 1289640
    },
    {
      "epoch": 2.110556875683248,
      "grad_norm": 0.10050390660762787,
      "learning_rate": 5.816964609292121e-06,
      "loss": 0.0131,
      "step": 1289660
    },
    {
      "epoch": 2.110589606121901,
      "grad_norm": 0.5779277682304382,
      "learning_rate": 5.816898717078603e-06,
      "loss": 0.014,
      "step": 1289680
    },
    {
      "epoch": 2.1106223365605548,
      "grad_norm": 0.38147345185279846,
      "learning_rate": 5.816832824865086e-06,
      "loss": 0.0109,
      "step": 1289700
    },
    {
      "epoch": 2.110655066999208,
      "grad_norm": 0.4620705544948578,
      "learning_rate": 5.816766932651569e-06,
      "loss": 0.0114,
      "step": 1289720
    },
    {
      "epoch": 2.110687797437861,
      "grad_norm": 0.2958680987358093,
      "learning_rate": 5.816701040438052e-06,
      "loss": 0.0156,
      "step": 1289740
    },
    {
      "epoch": 2.1107205278765147,
      "grad_norm": 0.27325713634490967,
      "learning_rate": 5.816635148224534e-06,
      "loss": 0.0154,
      "step": 1289760
    },
    {
      "epoch": 2.110753258315168,
      "grad_norm": 0.41144317388534546,
      "learning_rate": 5.816569256011018e-06,
      "loss": 0.0178,
      "step": 1289780
    },
    {
      "epoch": 2.1107859887538214,
      "grad_norm": 0.5126878023147583,
      "learning_rate": 5.816503363797501e-06,
      "loss": 0.0161,
      "step": 1289800
    },
    {
      "epoch": 2.1108187191924745,
      "grad_norm": 1.5710721015930176,
      "learning_rate": 5.8164374715839835e-06,
      "loss": 0.0183,
      "step": 1289820
    },
    {
      "epoch": 2.110851449631128,
      "grad_norm": 0.15515269339084625,
      "learning_rate": 5.816371579370467e-06,
      "loss": 0.0164,
      "step": 1289840
    },
    {
      "epoch": 2.1108841800697813,
      "grad_norm": 1.156656265258789,
      "learning_rate": 5.816305687156949e-06,
      "loss": 0.0191,
      "step": 1289860
    },
    {
      "epoch": 2.1109169105084344,
      "grad_norm": 0.5959163308143616,
      "learning_rate": 5.8162397949434325e-06,
      "loss": 0.0119,
      "step": 1289880
    },
    {
      "epoch": 2.110949640947088,
      "grad_norm": 0.39357250928878784,
      "learning_rate": 5.8161739027299144e-06,
      "loss": 0.0114,
      "step": 1289900
    },
    {
      "epoch": 2.110982371385741,
      "grad_norm": 0.8172804713249207,
      "learning_rate": 5.816108010516398e-06,
      "loss": 0.0206,
      "step": 1289920
    },
    {
      "epoch": 2.111015101824395,
      "grad_norm": 0.23014529049396515,
      "learning_rate": 5.816042118302881e-06,
      "loss": 0.0159,
      "step": 1289940
    },
    {
      "epoch": 2.111047832263048,
      "grad_norm": 0.4935416579246521,
      "learning_rate": 5.8159762260893635e-06,
      "loss": 0.013,
      "step": 1289960
    },
    {
      "epoch": 2.1110805627017015,
      "grad_norm": 0.8275995254516602,
      "learning_rate": 5.815910333875846e-06,
      "loss": 0.015,
      "step": 1289980
    },
    {
      "epoch": 2.1111132931403547,
      "grad_norm": 0.5803492069244385,
      "learning_rate": 5.81584444166233e-06,
      "loss": 0.0135,
      "step": 1290000
    },
    {
      "epoch": 2.111146023579008,
      "grad_norm": 0.3146657645702362,
      "learning_rate": 5.815778549448812e-06,
      "loss": 0.0188,
      "step": 1290020
    },
    {
      "epoch": 2.1111787540176614,
      "grad_norm": 1.4745208024978638,
      "learning_rate": 5.815712657235295e-06,
      "loss": 0.0154,
      "step": 1290040
    },
    {
      "epoch": 2.1112114844563146,
      "grad_norm": 0.8500905632972717,
      "learning_rate": 5.815646765021777e-06,
      "loss": 0.0193,
      "step": 1290060
    },
    {
      "epoch": 2.111244214894968,
      "grad_norm": 0.6993629932403564,
      "learning_rate": 5.815580872808261e-06,
      "loss": 0.0206,
      "step": 1290080
    },
    {
      "epoch": 2.1112769453336213,
      "grad_norm": 0.45078280568122864,
      "learning_rate": 5.815514980594743e-06,
      "loss": 0.0241,
      "step": 1290100
    },
    {
      "epoch": 2.111309675772275,
      "grad_norm": 0.10058066993951797,
      "learning_rate": 5.815449088381226e-06,
      "loss": 0.0109,
      "step": 1290120
    },
    {
      "epoch": 2.111342406210928,
      "grad_norm": 0.4222628176212311,
      "learning_rate": 5.81538319616771e-06,
      "loss": 0.0152,
      "step": 1290140
    },
    {
      "epoch": 2.111375136649581,
      "grad_norm": 1.2709901332855225,
      "learning_rate": 5.815317303954192e-06,
      "loss": 0.0172,
      "step": 1290160
    },
    {
      "epoch": 2.111407867088235,
      "grad_norm": 0.2559920847415924,
      "learning_rate": 5.815251411740675e-06,
      "loss": 0.014,
      "step": 1290180
    },
    {
      "epoch": 2.111440597526888,
      "grad_norm": 1.736710548400879,
      "learning_rate": 5.815185519527158e-06,
      "loss": 0.0157,
      "step": 1290200
    },
    {
      "epoch": 2.1114733279655415,
      "grad_norm": 0.28766098618507385,
      "learning_rate": 5.815119627313641e-06,
      "loss": 0.0148,
      "step": 1290220
    },
    {
      "epoch": 2.1115060584041947,
      "grad_norm": 0.669324517250061,
      "learning_rate": 5.8150537351001235e-06,
      "loss": 0.0115,
      "step": 1290240
    },
    {
      "epoch": 2.1115387888428483,
      "grad_norm": 0.15664079785346985,
      "learning_rate": 5.814987842886607e-06,
      "loss": 0.0111,
      "step": 1290260
    },
    {
      "epoch": 2.1115715192815014,
      "grad_norm": 0.897211492061615,
      "learning_rate": 5.814921950673089e-06,
      "loss": 0.0196,
      "step": 1290280
    },
    {
      "epoch": 2.1116042497201546,
      "grad_norm": 0.2974346876144409,
      "learning_rate": 5.814856058459573e-06,
      "loss": 0.0119,
      "step": 1290300
    },
    {
      "epoch": 2.111636980158808,
      "grad_norm": 0.5195271372795105,
      "learning_rate": 5.8147901662460545e-06,
      "loss": 0.0106,
      "step": 1290320
    },
    {
      "epoch": 2.1116697105974613,
      "grad_norm": 0.5900437831878662,
      "learning_rate": 5.814724274032538e-06,
      "loss": 0.015,
      "step": 1290340
    },
    {
      "epoch": 2.111702441036115,
      "grad_norm": 0.2929018437862396,
      "learning_rate": 5.814658381819021e-06,
      "loss": 0.011,
      "step": 1290360
    },
    {
      "epoch": 2.111735171474768,
      "grad_norm": 3.8746964931488037,
      "learning_rate": 5.8145924896055036e-06,
      "loss": 0.0211,
      "step": 1290380
    },
    {
      "epoch": 2.1117679019134212,
      "grad_norm": 0.283882737159729,
      "learning_rate": 5.814526597391986e-06,
      "loss": 0.0108,
      "step": 1290400
    },
    {
      "epoch": 2.111800632352075,
      "grad_norm": 0.3478933870792389,
      "learning_rate": 5.81446070517847e-06,
      "loss": 0.0114,
      "step": 1290420
    },
    {
      "epoch": 2.111833362790728,
      "grad_norm": 0.12044482678174973,
      "learning_rate": 5.814394812964952e-06,
      "loss": 0.0164,
      "step": 1290440
    },
    {
      "epoch": 2.1118660932293816,
      "grad_norm": 0.6135503053665161,
      "learning_rate": 5.814328920751435e-06,
      "loss": 0.0214,
      "step": 1290460
    },
    {
      "epoch": 2.1118988236680347,
      "grad_norm": 1.9792475700378418,
      "learning_rate": 5.814263028537919e-06,
      "loss": 0.0213,
      "step": 1290480
    },
    {
      "epoch": 2.1119315541066883,
      "grad_norm": 0.8401224613189697,
      "learning_rate": 5.814197136324401e-06,
      "loss": 0.0159,
      "step": 1290500
    },
    {
      "epoch": 2.1119642845453415,
      "grad_norm": 0.6109873652458191,
      "learning_rate": 5.8141312441108844e-06,
      "loss": 0.0187,
      "step": 1290520
    },
    {
      "epoch": 2.1119970149839946,
      "grad_norm": 0.19962453842163086,
      "learning_rate": 5.814065351897366e-06,
      "loss": 0.0105,
      "step": 1290540
    },
    {
      "epoch": 2.112029745422648,
      "grad_norm": 0.22596211731433868,
      "learning_rate": 5.81399945968385e-06,
      "loss": 0.012,
      "step": 1290560
    },
    {
      "epoch": 2.1120624758613014,
      "grad_norm": 0.3085874617099762,
      "learning_rate": 5.813933567470333e-06,
      "loss": 0.0112,
      "step": 1290580
    },
    {
      "epoch": 2.112095206299955,
      "grad_norm": 0.7895617485046387,
      "learning_rate": 5.813867675256815e-06,
      "loss": 0.0189,
      "step": 1290600
    },
    {
      "epoch": 2.112127936738608,
      "grad_norm": 0.9885371923446655,
      "learning_rate": 5.813801783043298e-06,
      "loss": 0.017,
      "step": 1290620
    },
    {
      "epoch": 2.1121606671772617,
      "grad_norm": 0.28704118728637695,
      "learning_rate": 5.813735890829782e-06,
      "loss": 0.0202,
      "step": 1290640
    },
    {
      "epoch": 2.112193397615915,
      "grad_norm": 0.288160502910614,
      "learning_rate": 5.813669998616264e-06,
      "loss": 0.0112,
      "step": 1290660
    },
    {
      "epoch": 2.112226128054568,
      "grad_norm": 0.4958813786506653,
      "learning_rate": 5.813604106402747e-06,
      "loss": 0.0201,
      "step": 1290680
    },
    {
      "epoch": 2.1122588584932216,
      "grad_norm": 0.959256112575531,
      "learning_rate": 5.813538214189229e-06,
      "loss": 0.0202,
      "step": 1290700
    },
    {
      "epoch": 2.1122915889318747,
      "grad_norm": 1.0011439323425293,
      "learning_rate": 5.813472321975713e-06,
      "loss": 0.0151,
      "step": 1290720
    },
    {
      "epoch": 2.1123243193705283,
      "grad_norm": 0.44908609986305237,
      "learning_rate": 5.8134064297621954e-06,
      "loss": 0.0182,
      "step": 1290740
    },
    {
      "epoch": 2.1123570498091815,
      "grad_norm": 0.38363057374954224,
      "learning_rate": 5.813340537548678e-06,
      "loss": 0.0232,
      "step": 1290760
    },
    {
      "epoch": 2.112389780247835,
      "grad_norm": 0.41571077704429626,
      "learning_rate": 5.813274645335161e-06,
      "loss": 0.0127,
      "step": 1290780
    },
    {
      "epoch": 2.1124225106864882,
      "grad_norm": 0.3102762699127197,
      "learning_rate": 5.8132087531216445e-06,
      "loss": 0.0228,
      "step": 1290800
    },
    {
      "epoch": 2.1124552411251414,
      "grad_norm": 0.17101651430130005,
      "learning_rate": 5.813142860908127e-06,
      "loss": 0.016,
      "step": 1290820
    },
    {
      "epoch": 2.112487971563795,
      "grad_norm": 2.5309770107269287,
      "learning_rate": 5.81307696869461e-06,
      "loss": 0.0206,
      "step": 1290840
    },
    {
      "epoch": 2.112520702002448,
      "grad_norm": 0.7255275845527649,
      "learning_rate": 5.8130110764810936e-06,
      "loss": 0.0201,
      "step": 1290860
    },
    {
      "epoch": 2.1125534324411017,
      "grad_norm": 0.1885182112455368,
      "learning_rate": 5.8129451842675755e-06,
      "loss": 0.0167,
      "step": 1290880
    },
    {
      "epoch": 2.112586162879755,
      "grad_norm": 0.43650397658348083,
      "learning_rate": 5.812879292054059e-06,
      "loss": 0.0151,
      "step": 1290900
    },
    {
      "epoch": 2.1126188933184085,
      "grad_norm": 0.2260497361421585,
      "learning_rate": 5.812813399840541e-06,
      "loss": 0.018,
      "step": 1290920
    },
    {
      "epoch": 2.1126516237570616,
      "grad_norm": 0.21033529937267303,
      "learning_rate": 5.8127475076270245e-06,
      "loss": 0.0184,
      "step": 1290940
    },
    {
      "epoch": 2.1126843541957148,
      "grad_norm": 0.5853577256202698,
      "learning_rate": 5.812681615413507e-06,
      "loss": 0.0187,
      "step": 1290960
    },
    {
      "epoch": 2.1127170846343684,
      "grad_norm": 0.23424586653709412,
      "learning_rate": 5.81261572319999e-06,
      "loss": 0.0154,
      "step": 1290980
    },
    {
      "epoch": 2.1127498150730215,
      "grad_norm": 0.28784215450286865,
      "learning_rate": 5.812549830986473e-06,
      "loss": 0.0171,
      "step": 1291000
    },
    {
      "epoch": 2.112782545511675,
      "grad_norm": 0.42976126074790955,
      "learning_rate": 5.812483938772956e-06,
      "loss": 0.0127,
      "step": 1291020
    },
    {
      "epoch": 2.1128152759503283,
      "grad_norm": 0.07950997352600098,
      "learning_rate": 5.812418046559438e-06,
      "loss": 0.0108,
      "step": 1291040
    },
    {
      "epoch": 2.1128480063889814,
      "grad_norm": 0.1556856781244278,
      "learning_rate": 5.812352154345922e-06,
      "loss": 0.0161,
      "step": 1291060
    },
    {
      "epoch": 2.112880736827635,
      "grad_norm": 0.3088330626487732,
      "learning_rate": 5.812286262132404e-06,
      "loss": 0.0165,
      "step": 1291080
    },
    {
      "epoch": 2.112913467266288,
      "grad_norm": 0.7376281023025513,
      "learning_rate": 5.812220369918887e-06,
      "loss": 0.0167,
      "step": 1291100
    },
    {
      "epoch": 2.1129461977049417,
      "grad_norm": 3.0591864585876465,
      "learning_rate": 5.812154477705369e-06,
      "loss": 0.0189,
      "step": 1291120
    },
    {
      "epoch": 2.112978928143595,
      "grad_norm": 0.3028040826320648,
      "learning_rate": 5.812088585491853e-06,
      "loss": 0.0264,
      "step": 1291140
    },
    {
      "epoch": 2.1130116585822485,
      "grad_norm": 0.7930364608764648,
      "learning_rate": 5.8120226932783355e-06,
      "loss": 0.0226,
      "step": 1291160
    },
    {
      "epoch": 2.1130443890209016,
      "grad_norm": 0.8867306709289551,
      "learning_rate": 5.811956801064818e-06,
      "loss": 0.0244,
      "step": 1291180
    },
    {
      "epoch": 2.113077119459555,
      "grad_norm": 0.4476996958255768,
      "learning_rate": 5.811890908851302e-06,
      "loss": 0.0226,
      "step": 1291200
    },
    {
      "epoch": 2.1131098498982084,
      "grad_norm": 0.12293574213981628,
      "learning_rate": 5.8118250166377846e-06,
      "loss": 0.0202,
      "step": 1291220
    },
    {
      "epoch": 2.1131425803368615,
      "grad_norm": 0.13010349869728088,
      "learning_rate": 5.811759124424267e-06,
      "loss": 0.009,
      "step": 1291240
    },
    {
      "epoch": 2.113175310775515,
      "grad_norm": 1.5108169317245483,
      "learning_rate": 5.81169323221075e-06,
      "loss": 0.0176,
      "step": 1291260
    },
    {
      "epoch": 2.1132080412141683,
      "grad_norm": 0.3501250743865967,
      "learning_rate": 5.811627339997234e-06,
      "loss": 0.0171,
      "step": 1291280
    },
    {
      "epoch": 2.113240771652822,
      "grad_norm": 0.17045286297798157,
      "learning_rate": 5.8115614477837155e-06,
      "loss": 0.0181,
      "step": 1291300
    },
    {
      "epoch": 2.113273502091475,
      "grad_norm": 0.274849534034729,
      "learning_rate": 5.811495555570199e-06,
      "loss": 0.0223,
      "step": 1291320
    },
    {
      "epoch": 2.113306232530128,
      "grad_norm": 0.16104070842266083,
      "learning_rate": 5.811429663356681e-06,
      "loss": 0.0145,
      "step": 1291340
    },
    {
      "epoch": 2.1133389629687818,
      "grad_norm": 0.2803962230682373,
      "learning_rate": 5.811363771143165e-06,
      "loss": 0.0129,
      "step": 1291360
    },
    {
      "epoch": 2.113371693407435,
      "grad_norm": 0.38343000411987305,
      "learning_rate": 5.811297878929647e-06,
      "loss": 0.0167,
      "step": 1291380
    },
    {
      "epoch": 2.1134044238460885,
      "grad_norm": 0.16605618596076965,
      "learning_rate": 5.81123198671613e-06,
      "loss": 0.0176,
      "step": 1291400
    },
    {
      "epoch": 2.1134371542847417,
      "grad_norm": 0.9812600612640381,
      "learning_rate": 5.811166094502613e-06,
      "loss": 0.0174,
      "step": 1291420
    },
    {
      "epoch": 2.1134698847233953,
      "grad_norm": 0.372590035200119,
      "learning_rate": 5.811100202289096e-06,
      "loss": 0.0145,
      "step": 1291440
    },
    {
      "epoch": 2.1135026151620484,
      "grad_norm": 0.37651628255844116,
      "learning_rate": 5.811034310075578e-06,
      "loss": 0.0205,
      "step": 1291460
    },
    {
      "epoch": 2.1135353456007016,
      "grad_norm": 0.42678454518318176,
      "learning_rate": 5.810968417862062e-06,
      "loss": 0.0138,
      "step": 1291480
    },
    {
      "epoch": 2.113568076039355,
      "grad_norm": 0.12056875228881836,
      "learning_rate": 5.810902525648544e-06,
      "loss": 0.015,
      "step": 1291500
    },
    {
      "epoch": 2.1136008064780083,
      "grad_norm": 0.4036300480365753,
      "learning_rate": 5.810836633435027e-06,
      "loss": 0.0179,
      "step": 1291520
    },
    {
      "epoch": 2.113633536916662,
      "grad_norm": 0.59332275390625,
      "learning_rate": 5.810770741221511e-06,
      "loss": 0.0173,
      "step": 1291540
    },
    {
      "epoch": 2.113666267355315,
      "grad_norm": 0.26897329092025757,
      "learning_rate": 5.810704849007993e-06,
      "loss": 0.0137,
      "step": 1291560
    },
    {
      "epoch": 2.1136989977939686,
      "grad_norm": 0.3415311574935913,
      "learning_rate": 5.8106389567944764e-06,
      "loss": 0.0157,
      "step": 1291580
    },
    {
      "epoch": 2.113731728232622,
      "grad_norm": 0.16693167388439178,
      "learning_rate": 5.810573064580959e-06,
      "loss": 0.0149,
      "step": 1291600
    },
    {
      "epoch": 2.113764458671275,
      "grad_norm": 0.3047572672367096,
      "learning_rate": 5.810507172367442e-06,
      "loss": 0.0141,
      "step": 1291620
    },
    {
      "epoch": 2.1137971891099285,
      "grad_norm": 0.6034418940544128,
      "learning_rate": 5.810441280153925e-06,
      "loss": 0.0162,
      "step": 1291640
    },
    {
      "epoch": 2.1138299195485817,
      "grad_norm": 0.14090707898139954,
      "learning_rate": 5.810375387940408e-06,
      "loss": 0.0116,
      "step": 1291660
    },
    {
      "epoch": 2.1138626499872353,
      "grad_norm": 1.1123552322387695,
      "learning_rate": 5.81030949572689e-06,
      "loss": 0.0154,
      "step": 1291680
    },
    {
      "epoch": 2.1138953804258884,
      "grad_norm": 0.47154533863067627,
      "learning_rate": 5.810243603513374e-06,
      "loss": 0.0155,
      "step": 1291700
    },
    {
      "epoch": 2.113928110864542,
      "grad_norm": 0.3074318468570709,
      "learning_rate": 5.810177711299856e-06,
      "loss": 0.0152,
      "step": 1291720
    },
    {
      "epoch": 2.113960841303195,
      "grad_norm": 0.3629479706287384,
      "learning_rate": 5.810111819086339e-06,
      "loss": 0.0204,
      "step": 1291740
    },
    {
      "epoch": 2.1139935717418483,
      "grad_norm": 0.16872653365135193,
      "learning_rate": 5.810045926872822e-06,
      "loss": 0.0148,
      "step": 1291760
    },
    {
      "epoch": 2.114026302180502,
      "grad_norm": 1.1918039321899414,
      "learning_rate": 5.809980034659305e-06,
      "loss": 0.0203,
      "step": 1291780
    },
    {
      "epoch": 2.114059032619155,
      "grad_norm": 0.5681297779083252,
      "learning_rate": 5.809914142445787e-06,
      "loss": 0.017,
      "step": 1291800
    },
    {
      "epoch": 2.1140917630578087,
      "grad_norm": 0.24494792520999908,
      "learning_rate": 5.809848250232271e-06,
      "loss": 0.0128,
      "step": 1291820
    },
    {
      "epoch": 2.114124493496462,
      "grad_norm": 0.4376257359981537,
      "learning_rate": 5.809782358018753e-06,
      "loss": 0.0179,
      "step": 1291840
    },
    {
      "epoch": 2.1141572239351154,
      "grad_norm": 0.20594556629657745,
      "learning_rate": 5.8097164658052365e-06,
      "loss": 0.0185,
      "step": 1291860
    },
    {
      "epoch": 2.1141899543737686,
      "grad_norm": 0.05825231596827507,
      "learning_rate": 5.80965057359172e-06,
      "loss": 0.0151,
      "step": 1291880
    },
    {
      "epoch": 2.1142226848124217,
      "grad_norm": 1.0163218975067139,
      "learning_rate": 5.809584681378202e-06,
      "loss": 0.0127,
      "step": 1291900
    },
    {
      "epoch": 2.1142554152510753,
      "grad_norm": 0.7581601738929749,
      "learning_rate": 5.8095187891646855e-06,
      "loss": 0.0168,
      "step": 1291920
    },
    {
      "epoch": 2.1142881456897284,
      "grad_norm": 1.1238317489624023,
      "learning_rate": 5.8094528969511674e-06,
      "loss": 0.0187,
      "step": 1291940
    },
    {
      "epoch": 2.114320876128382,
      "grad_norm": 1.5917155742645264,
      "learning_rate": 5.809387004737651e-06,
      "loss": 0.014,
      "step": 1291960
    },
    {
      "epoch": 2.114353606567035,
      "grad_norm": 1.1422929763793945,
      "learning_rate": 5.809321112524133e-06,
      "loss": 0.0134,
      "step": 1291980
    },
    {
      "epoch": 2.1143863370056883,
      "grad_norm": 0.44946447014808655,
      "learning_rate": 5.8092552203106165e-06,
      "loss": 0.0095,
      "step": 1292000
    },
    {
      "epoch": 2.114419067444342,
      "grad_norm": 0.1539187729358673,
      "learning_rate": 5.809189328097099e-06,
      "loss": 0.0115,
      "step": 1292020
    },
    {
      "epoch": 2.114451797882995,
      "grad_norm": 0.48151740431785583,
      "learning_rate": 5.809123435883583e-06,
      "loss": 0.0163,
      "step": 1292040
    },
    {
      "epoch": 2.1144845283216487,
      "grad_norm": 0.688575804233551,
      "learning_rate": 5.809057543670065e-06,
      "loss": 0.0184,
      "step": 1292060
    },
    {
      "epoch": 2.114517258760302,
      "grad_norm": 0.6668922305107117,
      "learning_rate": 5.808991651456548e-06,
      "loss": 0.0177,
      "step": 1292080
    },
    {
      "epoch": 2.1145499891989554,
      "grad_norm": 0.575898289680481,
      "learning_rate": 5.80892575924303e-06,
      "loss": 0.0141,
      "step": 1292100
    },
    {
      "epoch": 2.1145827196376086,
      "grad_norm": 0.8416980504989624,
      "learning_rate": 5.808859867029514e-06,
      "loss": 0.02,
      "step": 1292120
    },
    {
      "epoch": 2.1146154500762617,
      "grad_norm": 0.5822237730026245,
      "learning_rate": 5.808793974815996e-06,
      "loss": 0.016,
      "step": 1292140
    },
    {
      "epoch": 2.1146481805149153,
      "grad_norm": 0.3849637508392334,
      "learning_rate": 5.808728082602479e-06,
      "loss": 0.0177,
      "step": 1292160
    },
    {
      "epoch": 2.1146809109535685,
      "grad_norm": 0.5719863176345825,
      "learning_rate": 5.808662190388962e-06,
      "loss": 0.0112,
      "step": 1292180
    },
    {
      "epoch": 2.114713641392222,
      "grad_norm": 1.7453705072402954,
      "learning_rate": 5.808596298175445e-06,
      "loss": 0.0107,
      "step": 1292200
    },
    {
      "epoch": 2.114746371830875,
      "grad_norm": 0.1630864143371582,
      "learning_rate": 5.8085304059619275e-06,
      "loss": 0.0146,
      "step": 1292220
    },
    {
      "epoch": 2.114779102269529,
      "grad_norm": 0.2220526486635208,
      "learning_rate": 5.808464513748411e-06,
      "loss": 0.0177,
      "step": 1292240
    },
    {
      "epoch": 2.114811832708182,
      "grad_norm": 0.025438552722334862,
      "learning_rate": 5.808398621534894e-06,
      "loss": 0.0137,
      "step": 1292260
    },
    {
      "epoch": 2.114844563146835,
      "grad_norm": 0.3606412410736084,
      "learning_rate": 5.8083327293213766e-06,
      "loss": 0.013,
      "step": 1292280
    },
    {
      "epoch": 2.1148772935854887,
      "grad_norm": 0.6177385449409485,
      "learning_rate": 5.80826683710786e-06,
      "loss": 0.0201,
      "step": 1292300
    },
    {
      "epoch": 2.114910024024142,
      "grad_norm": 0.297614723443985,
      "learning_rate": 5.808200944894342e-06,
      "loss": 0.0126,
      "step": 1292320
    },
    {
      "epoch": 2.1149427544627954,
      "grad_norm": 0.4781494438648224,
      "learning_rate": 5.808135052680826e-06,
      "loss": 0.0137,
      "step": 1292340
    },
    {
      "epoch": 2.1149754849014486,
      "grad_norm": 0.2355908900499344,
      "learning_rate": 5.8080691604673075e-06,
      "loss": 0.0146,
      "step": 1292360
    },
    {
      "epoch": 2.115008215340102,
      "grad_norm": 0.6987974643707275,
      "learning_rate": 5.808003268253791e-06,
      "loss": 0.0164,
      "step": 1292380
    },
    {
      "epoch": 2.1150409457787553,
      "grad_norm": 0.1768662929534912,
      "learning_rate": 5.807937376040274e-06,
      "loss": 0.0175,
      "step": 1292400
    },
    {
      "epoch": 2.1150736762174085,
      "grad_norm": 0.27434104681015015,
      "learning_rate": 5.807871483826757e-06,
      "loss": 0.0197,
      "step": 1292420
    },
    {
      "epoch": 2.115106406656062,
      "grad_norm": 0.19563153386116028,
      "learning_rate": 5.807805591613239e-06,
      "loss": 0.0134,
      "step": 1292440
    },
    {
      "epoch": 2.1151391370947152,
      "grad_norm": 0.14742803573608398,
      "learning_rate": 5.807739699399723e-06,
      "loss": 0.0124,
      "step": 1292460
    },
    {
      "epoch": 2.115171867533369,
      "grad_norm": 0.3519590497016907,
      "learning_rate": 5.807673807186205e-06,
      "loss": 0.0135,
      "step": 1292480
    },
    {
      "epoch": 2.115204597972022,
      "grad_norm": 0.2859559655189514,
      "learning_rate": 5.807607914972688e-06,
      "loss": 0.0148,
      "step": 1292500
    },
    {
      "epoch": 2.115237328410675,
      "grad_norm": 0.18918342888355255,
      "learning_rate": 5.80754202275917e-06,
      "loss": 0.0091,
      "step": 1292520
    },
    {
      "epoch": 2.1152700588493287,
      "grad_norm": 0.1313597410917282,
      "learning_rate": 5.807476130545654e-06,
      "loss": 0.0115,
      "step": 1292540
    },
    {
      "epoch": 2.115302789287982,
      "grad_norm": 0.2590266168117523,
      "learning_rate": 5.807410238332137e-06,
      "loss": 0.0114,
      "step": 1292560
    },
    {
      "epoch": 2.1153355197266355,
      "grad_norm": 0.15129996836185455,
      "learning_rate": 5.807344346118619e-06,
      "loss": 0.012,
      "step": 1292580
    },
    {
      "epoch": 2.1153682501652886,
      "grad_norm": 0.2077106237411499,
      "learning_rate": 5.807278453905103e-06,
      "loss": 0.0146,
      "step": 1292600
    },
    {
      "epoch": 2.115400980603942,
      "grad_norm": 0.20128275454044342,
      "learning_rate": 5.807212561691586e-06,
      "loss": 0.0185,
      "step": 1292620
    },
    {
      "epoch": 2.1154337110425954,
      "grad_norm": 0.5036754012107849,
      "learning_rate": 5.807146669478068e-06,
      "loss": 0.0142,
      "step": 1292640
    },
    {
      "epoch": 2.1154664414812485,
      "grad_norm": 0.45290741324424744,
      "learning_rate": 5.807080777264551e-06,
      "loss": 0.0127,
      "step": 1292660
    },
    {
      "epoch": 2.115499171919902,
      "grad_norm": 0.8322227597236633,
      "learning_rate": 5.807014885051035e-06,
      "loss": 0.0137,
      "step": 1292680
    },
    {
      "epoch": 2.1155319023585553,
      "grad_norm": 0.5944197773933411,
      "learning_rate": 5.806948992837517e-06,
      "loss": 0.0189,
      "step": 1292700
    },
    {
      "epoch": 2.115564632797209,
      "grad_norm": 0.6305582523345947,
      "learning_rate": 5.806883100624e-06,
      "loss": 0.0159,
      "step": 1292720
    },
    {
      "epoch": 2.115597363235862,
      "grad_norm": 0.14621993899345398,
      "learning_rate": 5.806817208410482e-06,
      "loss": 0.0159,
      "step": 1292740
    },
    {
      "epoch": 2.1156300936745156,
      "grad_norm": 0.40281566977500916,
      "learning_rate": 5.806751316196966e-06,
      "loss": 0.0202,
      "step": 1292760
    },
    {
      "epoch": 2.1156628241131687,
      "grad_norm": 1.30411696434021,
      "learning_rate": 5.8066854239834484e-06,
      "loss": 0.0163,
      "step": 1292780
    },
    {
      "epoch": 2.115695554551822,
      "grad_norm": 0.15808750689029694,
      "learning_rate": 5.806619531769931e-06,
      "loss": 0.0174,
      "step": 1292800
    },
    {
      "epoch": 2.1157282849904755,
      "grad_norm": 0.42764198780059814,
      "learning_rate": 5.806553639556414e-06,
      "loss": 0.0209,
      "step": 1292820
    },
    {
      "epoch": 2.1157610154291286,
      "grad_norm": 0.16711437702178955,
      "learning_rate": 5.8064877473428975e-06,
      "loss": 0.0129,
      "step": 1292840
    },
    {
      "epoch": 2.1157937458677822,
      "grad_norm": 0.915612518787384,
      "learning_rate": 5.806421855129379e-06,
      "loss": 0.0186,
      "step": 1292860
    },
    {
      "epoch": 2.1158264763064354,
      "grad_norm": 0.5683258175849915,
      "learning_rate": 5.806355962915863e-06,
      "loss": 0.0135,
      "step": 1292880
    },
    {
      "epoch": 2.115859206745089,
      "grad_norm": 0.26064005494117737,
      "learning_rate": 5.806290070702345e-06,
      "loss": 0.0202,
      "step": 1292900
    },
    {
      "epoch": 2.115891937183742,
      "grad_norm": 0.5208185315132141,
      "learning_rate": 5.8062241784888285e-06,
      "loss": 0.0124,
      "step": 1292920
    },
    {
      "epoch": 2.1159246676223953,
      "grad_norm": 0.1629403829574585,
      "learning_rate": 5.806158286275312e-06,
      "loss": 0.0124,
      "step": 1292940
    },
    {
      "epoch": 2.115957398061049,
      "grad_norm": 0.09679025411605835,
      "learning_rate": 5.806092394061794e-06,
      "loss": 0.0141,
      "step": 1292960
    },
    {
      "epoch": 2.115990128499702,
      "grad_norm": 0.3300912380218506,
      "learning_rate": 5.8060265018482775e-06,
      "loss": 0.0113,
      "step": 1292980
    },
    {
      "epoch": 2.1160228589383556,
      "grad_norm": 0.1947028785943985,
      "learning_rate": 5.805960609634759e-06,
      "loss": 0.0205,
      "step": 1293000
    },
    {
      "epoch": 2.1160555893770088,
      "grad_norm": 0.9306360483169556,
      "learning_rate": 5.805894717421243e-06,
      "loss": 0.0142,
      "step": 1293020
    },
    {
      "epoch": 2.1160883198156624,
      "grad_norm": 0.09607306867837906,
      "learning_rate": 5.805828825207726e-06,
      "loss": 0.0231,
      "step": 1293040
    },
    {
      "epoch": 2.1161210502543155,
      "grad_norm": 0.3550342619419098,
      "learning_rate": 5.8057629329942085e-06,
      "loss": 0.0145,
      "step": 1293060
    },
    {
      "epoch": 2.1161537806929687,
      "grad_norm": 0.2820577919483185,
      "learning_rate": 5.805697040780691e-06,
      "loss": 0.0118,
      "step": 1293080
    },
    {
      "epoch": 2.1161865111316223,
      "grad_norm": 0.16561393439769745,
      "learning_rate": 5.805631148567175e-06,
      "loss": 0.017,
      "step": 1293100
    },
    {
      "epoch": 2.1162192415702754,
      "grad_norm": 0.36439716815948486,
      "learning_rate": 5.805565256353657e-06,
      "loss": 0.0124,
      "step": 1293120
    },
    {
      "epoch": 2.116251972008929,
      "grad_norm": 0.16673576831817627,
      "learning_rate": 5.80549936414014e-06,
      "loss": 0.0136,
      "step": 1293140
    },
    {
      "epoch": 2.116284702447582,
      "grad_norm": 0.264436811208725,
      "learning_rate": 5.805433471926622e-06,
      "loss": 0.0142,
      "step": 1293160
    },
    {
      "epoch": 2.1163174328862357,
      "grad_norm": 0.7863836288452148,
      "learning_rate": 5.805367579713106e-06,
      "loss": 0.0141,
      "step": 1293180
    },
    {
      "epoch": 2.116350163324889,
      "grad_norm": 0.874355673789978,
      "learning_rate": 5.8053016874995885e-06,
      "loss": 0.0198,
      "step": 1293200
    },
    {
      "epoch": 2.116382893763542,
      "grad_norm": 0.32900920510292053,
      "learning_rate": 5.805235795286071e-06,
      "loss": 0.0141,
      "step": 1293220
    },
    {
      "epoch": 2.1164156242021956,
      "grad_norm": 0.5350109934806824,
      "learning_rate": 5.805169903072554e-06,
      "loss": 0.017,
      "step": 1293240
    },
    {
      "epoch": 2.116448354640849,
      "grad_norm": 0.49135059118270874,
      "learning_rate": 5.805104010859038e-06,
      "loss": 0.0098,
      "step": 1293260
    },
    {
      "epoch": 2.1164810850795024,
      "grad_norm": 6.594492435455322,
      "learning_rate": 5.8050381186455195e-06,
      "loss": 0.0221,
      "step": 1293280
    },
    {
      "epoch": 2.1165138155181555,
      "grad_norm": 0.13075123727321625,
      "learning_rate": 5.804972226432003e-06,
      "loss": 0.0172,
      "step": 1293300
    },
    {
      "epoch": 2.116546545956809,
      "grad_norm": 0.21495941281318665,
      "learning_rate": 5.804906334218487e-06,
      "loss": 0.0176,
      "step": 1293320
    },
    {
      "epoch": 2.1165792763954623,
      "grad_norm": 0.16216839849948883,
      "learning_rate": 5.8048404420049685e-06,
      "loss": 0.0226,
      "step": 1293340
    },
    {
      "epoch": 2.1166120068341154,
      "grad_norm": 0.24322983622550964,
      "learning_rate": 5.804774549791452e-06,
      "loss": 0.0159,
      "step": 1293360
    },
    {
      "epoch": 2.116644737272769,
      "grad_norm": 0.5459551215171814,
      "learning_rate": 5.804708657577934e-06,
      "loss": 0.0169,
      "step": 1293380
    },
    {
      "epoch": 2.116677467711422,
      "grad_norm": 0.15394341945648193,
      "learning_rate": 5.804642765364418e-06,
      "loss": 0.0127,
      "step": 1293400
    },
    {
      "epoch": 2.1167101981500758,
      "grad_norm": 0.11070899665355682,
      "learning_rate": 5.8045768731509e-06,
      "loss": 0.0099,
      "step": 1293420
    },
    {
      "epoch": 2.116742928588729,
      "grad_norm": 0.4018873870372772,
      "learning_rate": 5.804510980937383e-06,
      "loss": 0.0123,
      "step": 1293440
    },
    {
      "epoch": 2.116775659027382,
      "grad_norm": 0.2593134343624115,
      "learning_rate": 5.804445088723866e-06,
      "loss": 0.0195,
      "step": 1293460
    },
    {
      "epoch": 2.1168083894660357,
      "grad_norm": 0.840948760509491,
      "learning_rate": 5.804379196510349e-06,
      "loss": 0.017,
      "step": 1293480
    },
    {
      "epoch": 2.116841119904689,
      "grad_norm": 0.5760303139686584,
      "learning_rate": 5.804313304296831e-06,
      "loss": 0.0183,
      "step": 1293500
    },
    {
      "epoch": 2.1168738503433424,
      "grad_norm": 0.5940840244293213,
      "learning_rate": 5.804247412083315e-06,
      "loss": 0.0148,
      "step": 1293520
    },
    {
      "epoch": 2.1169065807819956,
      "grad_norm": 0.6418155431747437,
      "learning_rate": 5.804181519869797e-06,
      "loss": 0.0118,
      "step": 1293540
    },
    {
      "epoch": 2.116939311220649,
      "grad_norm": 0.30271580815315247,
      "learning_rate": 5.80411562765628e-06,
      "loss": 0.0165,
      "step": 1293560
    },
    {
      "epoch": 2.1169720416593023,
      "grad_norm": 0.6989172101020813,
      "learning_rate": 5.804049735442763e-06,
      "loss": 0.018,
      "step": 1293580
    },
    {
      "epoch": 2.1170047720979555,
      "grad_norm": 0.34692224860191345,
      "learning_rate": 5.803983843229246e-06,
      "loss": 0.0157,
      "step": 1293600
    },
    {
      "epoch": 2.117037502536609,
      "grad_norm": 0.20007683336734772,
      "learning_rate": 5.803917951015729e-06,
      "loss": 0.0106,
      "step": 1293620
    },
    {
      "epoch": 2.117070232975262,
      "grad_norm": 0.858599066734314,
      "learning_rate": 5.803852058802212e-06,
      "loss": 0.0183,
      "step": 1293640
    },
    {
      "epoch": 2.117102963413916,
      "grad_norm": 0.189004048705101,
      "learning_rate": 5.803786166588695e-06,
      "loss": 0.0172,
      "step": 1293660
    },
    {
      "epoch": 2.117135693852569,
      "grad_norm": 0.11129444092512131,
      "learning_rate": 5.803720274375178e-06,
      "loss": 0.0145,
      "step": 1293680
    },
    {
      "epoch": 2.1171684242912225,
      "grad_norm": 0.3203144669532776,
      "learning_rate": 5.803654382161661e-06,
      "loss": 0.0112,
      "step": 1293700
    },
    {
      "epoch": 2.1172011547298757,
      "grad_norm": 0.1955340951681137,
      "learning_rate": 5.803588489948143e-06,
      "loss": 0.0157,
      "step": 1293720
    },
    {
      "epoch": 2.117233885168529,
      "grad_norm": 0.31266069412231445,
      "learning_rate": 5.803522597734627e-06,
      "loss": 0.0105,
      "step": 1293740
    },
    {
      "epoch": 2.1172666156071824,
      "grad_norm": 0.37262392044067383,
      "learning_rate": 5.803456705521109e-06,
      "loss": 0.0154,
      "step": 1293760
    },
    {
      "epoch": 2.1172993460458356,
      "grad_norm": 0.22771333158016205,
      "learning_rate": 5.803390813307592e-06,
      "loss": 0.0154,
      "step": 1293780
    },
    {
      "epoch": 2.117332076484489,
      "grad_norm": 0.18781441450119019,
      "learning_rate": 5.803324921094075e-06,
      "loss": 0.0163,
      "step": 1293800
    },
    {
      "epoch": 2.1173648069231423,
      "grad_norm": 0.4975645840167999,
      "learning_rate": 5.803259028880558e-06,
      "loss": 0.0138,
      "step": 1293820
    },
    {
      "epoch": 2.117397537361796,
      "grad_norm": 0.11503149569034576,
      "learning_rate": 5.80319313666704e-06,
      "loss": 0.0171,
      "step": 1293840
    },
    {
      "epoch": 2.117430267800449,
      "grad_norm": 0.7930850386619568,
      "learning_rate": 5.803127244453524e-06,
      "loss": 0.0153,
      "step": 1293860
    },
    {
      "epoch": 2.1174629982391022,
      "grad_norm": 0.5501804947853088,
      "learning_rate": 5.803061352240006e-06,
      "loss": 0.0127,
      "step": 1293880
    },
    {
      "epoch": 2.117495728677756,
      "grad_norm": 0.3895646333694458,
      "learning_rate": 5.8029954600264895e-06,
      "loss": 0.0146,
      "step": 1293900
    },
    {
      "epoch": 2.117528459116409,
      "grad_norm": 0.5694573521614075,
      "learning_rate": 5.802929567812971e-06,
      "loss": 0.0141,
      "step": 1293920
    },
    {
      "epoch": 2.1175611895550626,
      "grad_norm": 0.40219929814338684,
      "learning_rate": 5.802863675599455e-06,
      "loss": 0.0189,
      "step": 1293940
    },
    {
      "epoch": 2.1175939199937157,
      "grad_norm": 0.45579591393470764,
      "learning_rate": 5.802797783385937e-06,
      "loss": 0.0196,
      "step": 1293960
    },
    {
      "epoch": 2.1176266504323693,
      "grad_norm": 0.38518664240837097,
      "learning_rate": 5.8027318911724204e-06,
      "loss": 0.0182,
      "step": 1293980
    },
    {
      "epoch": 2.1176593808710225,
      "grad_norm": 0.3100559115409851,
      "learning_rate": 5.802665998958904e-06,
      "loss": 0.0113,
      "step": 1294000
    },
    {
      "epoch": 2.1176921113096756,
      "grad_norm": 0.1014404371380806,
      "learning_rate": 5.802600106745386e-06,
      "loss": 0.0122,
      "step": 1294020
    },
    {
      "epoch": 2.117724841748329,
      "grad_norm": 0.14294196665287018,
      "learning_rate": 5.8025342145318695e-06,
      "loss": 0.0121,
      "step": 1294040
    },
    {
      "epoch": 2.1177575721869824,
      "grad_norm": 0.3791349530220032,
      "learning_rate": 5.802468322318352e-06,
      "loss": 0.0181,
      "step": 1294060
    },
    {
      "epoch": 2.117790302625636,
      "grad_norm": 0.45127710700035095,
      "learning_rate": 5.802402430104835e-06,
      "loss": 0.0183,
      "step": 1294080
    },
    {
      "epoch": 2.117823033064289,
      "grad_norm": 0.6443899273872375,
      "learning_rate": 5.802336537891318e-06,
      "loss": 0.0139,
      "step": 1294100
    },
    {
      "epoch": 2.1178557635029422,
      "grad_norm": 0.4288937747478485,
      "learning_rate": 5.802270645677801e-06,
      "loss": 0.0166,
      "step": 1294120
    },
    {
      "epoch": 2.117888493941596,
      "grad_norm": 0.052056584507226944,
      "learning_rate": 5.802204753464283e-06,
      "loss": 0.0095,
      "step": 1294140
    },
    {
      "epoch": 2.117921224380249,
      "grad_norm": 0.44167131185531616,
      "learning_rate": 5.802138861250767e-06,
      "loss": 0.0215,
      "step": 1294160
    },
    {
      "epoch": 2.1179539548189026,
      "grad_norm": 0.3643637001514435,
      "learning_rate": 5.802072969037249e-06,
      "loss": 0.0127,
      "step": 1294180
    },
    {
      "epoch": 2.1179866852575557,
      "grad_norm": 0.42239460349082947,
      "learning_rate": 5.802007076823732e-06,
      "loss": 0.0183,
      "step": 1294200
    },
    {
      "epoch": 2.1180194156962093,
      "grad_norm": 0.17567597329616547,
      "learning_rate": 5.801941184610215e-06,
      "loss": 0.0142,
      "step": 1294220
    },
    {
      "epoch": 2.1180521461348625,
      "grad_norm": 0.37599170207977295,
      "learning_rate": 5.801875292396698e-06,
      "loss": 0.0203,
      "step": 1294240
    },
    {
      "epoch": 2.1180848765735156,
      "grad_norm": 0.5858767628669739,
      "learning_rate": 5.8018094001831805e-06,
      "loss": 0.0102,
      "step": 1294260
    },
    {
      "epoch": 2.1181176070121692,
      "grad_norm": 0.40088599920272827,
      "learning_rate": 5.801743507969664e-06,
      "loss": 0.0127,
      "step": 1294280
    },
    {
      "epoch": 2.1181503374508224,
      "grad_norm": 0.6498580574989319,
      "learning_rate": 5.801677615756146e-06,
      "loss": 0.012,
      "step": 1294300
    },
    {
      "epoch": 2.118183067889476,
      "grad_norm": 0.05917865410447121,
      "learning_rate": 5.8016117235426296e-06,
      "loss": 0.0167,
      "step": 1294320
    },
    {
      "epoch": 2.118215798328129,
      "grad_norm": 0.6461125016212463,
      "learning_rate": 5.801545831329113e-06,
      "loss": 0.0154,
      "step": 1294340
    },
    {
      "epoch": 2.1182485287667827,
      "grad_norm": 0.9961016178131104,
      "learning_rate": 5.801479939115595e-06,
      "loss": 0.0215,
      "step": 1294360
    },
    {
      "epoch": 2.118281259205436,
      "grad_norm": 0.2846134603023529,
      "learning_rate": 5.801414046902079e-06,
      "loss": 0.0133,
      "step": 1294380
    },
    {
      "epoch": 2.118313989644089,
      "grad_norm": 0.14825375378131866,
      "learning_rate": 5.8013481546885605e-06,
      "loss": 0.0161,
      "step": 1294400
    },
    {
      "epoch": 2.1183467200827426,
      "grad_norm": 1.0119600296020508,
      "learning_rate": 5.801282262475044e-06,
      "loss": 0.0118,
      "step": 1294420
    },
    {
      "epoch": 2.1183794505213958,
      "grad_norm": 0.614482045173645,
      "learning_rate": 5.801216370261527e-06,
      "loss": 0.0191,
      "step": 1294440
    },
    {
      "epoch": 2.1184121809600494,
      "grad_norm": 0.2164284586906433,
      "learning_rate": 5.80115047804801e-06,
      "loss": 0.018,
      "step": 1294460
    },
    {
      "epoch": 2.1184449113987025,
      "grad_norm": 0.5128603577613831,
      "learning_rate": 5.801084585834492e-06,
      "loss": 0.0182,
      "step": 1294480
    },
    {
      "epoch": 2.118477641837356,
      "grad_norm": 0.7636723518371582,
      "learning_rate": 5.801018693620976e-06,
      "loss": 0.0147,
      "step": 1294500
    },
    {
      "epoch": 2.1185103722760092,
      "grad_norm": 0.17788372933864594,
      "learning_rate": 5.800952801407458e-06,
      "loss": 0.01,
      "step": 1294520
    },
    {
      "epoch": 2.1185431027146624,
      "grad_norm": 0.05461202189326286,
      "learning_rate": 5.800886909193941e-06,
      "loss": 0.0261,
      "step": 1294540
    },
    {
      "epoch": 2.118575833153316,
      "grad_norm": 0.47543609142303467,
      "learning_rate": 5.800821016980423e-06,
      "loss": 0.0124,
      "step": 1294560
    },
    {
      "epoch": 2.118608563591969,
      "grad_norm": 0.7066730856895447,
      "learning_rate": 5.800755124766907e-06,
      "loss": 0.0162,
      "step": 1294580
    },
    {
      "epoch": 2.1186412940306227,
      "grad_norm": 0.1609947830438614,
      "learning_rate": 5.80068923255339e-06,
      "loss": 0.0112,
      "step": 1294600
    },
    {
      "epoch": 2.118674024469276,
      "grad_norm": 0.5099826455116272,
      "learning_rate": 5.800623340339872e-06,
      "loss": 0.0193,
      "step": 1294620
    },
    {
      "epoch": 2.1187067549079295,
      "grad_norm": 0.9043972492218018,
      "learning_rate": 5.800557448126355e-06,
      "loss": 0.024,
      "step": 1294640
    },
    {
      "epoch": 2.1187394853465826,
      "grad_norm": 0.5294408798217773,
      "learning_rate": 5.800491555912839e-06,
      "loss": 0.0115,
      "step": 1294660
    },
    {
      "epoch": 2.118772215785236,
      "grad_norm": 0.8559409379959106,
      "learning_rate": 5.8004256636993206e-06,
      "loss": 0.0196,
      "step": 1294680
    },
    {
      "epoch": 2.1188049462238894,
      "grad_norm": 0.49918437004089355,
      "learning_rate": 5.800359771485804e-06,
      "loss": 0.0123,
      "step": 1294700
    },
    {
      "epoch": 2.1188376766625425,
      "grad_norm": 0.2741530239582062,
      "learning_rate": 5.800293879272288e-06,
      "loss": 0.0145,
      "step": 1294720
    },
    {
      "epoch": 2.118870407101196,
      "grad_norm": 0.3268303871154785,
      "learning_rate": 5.80022798705877e-06,
      "loss": 0.0133,
      "step": 1294740
    },
    {
      "epoch": 2.1189031375398493,
      "grad_norm": 0.31016069650650024,
      "learning_rate": 5.800162094845253e-06,
      "loss": 0.017,
      "step": 1294760
    },
    {
      "epoch": 2.118935867978503,
      "grad_norm": 0.1787939965724945,
      "learning_rate": 5.800096202631735e-06,
      "loss": 0.0154,
      "step": 1294780
    },
    {
      "epoch": 2.118968598417156,
      "grad_norm": 0.1786622703075409,
      "learning_rate": 5.800030310418219e-06,
      "loss": 0.0146,
      "step": 1294800
    },
    {
      "epoch": 2.119001328855809,
      "grad_norm": 1.5700019598007202,
      "learning_rate": 5.7999644182047014e-06,
      "loss": 0.0125,
      "step": 1294820
    },
    {
      "epoch": 2.1190340592944628,
      "grad_norm": 0.35929107666015625,
      "learning_rate": 5.799898525991184e-06,
      "loss": 0.0133,
      "step": 1294840
    },
    {
      "epoch": 2.119066789733116,
      "grad_norm": 0.27471351623535156,
      "learning_rate": 5.799832633777667e-06,
      "loss": 0.0146,
      "step": 1294860
    },
    {
      "epoch": 2.1190995201717695,
      "grad_norm": 0.15884701907634735,
      "learning_rate": 5.7997667415641505e-06,
      "loss": 0.0098,
      "step": 1294880
    },
    {
      "epoch": 2.1191322506104227,
      "grad_norm": 0.9327389001846313,
      "learning_rate": 5.799700849350632e-06,
      "loss": 0.0203,
      "step": 1294900
    },
    {
      "epoch": 2.1191649810490762,
      "grad_norm": 0.680820882320404,
      "learning_rate": 5.799634957137116e-06,
      "loss": 0.0163,
      "step": 1294920
    },
    {
      "epoch": 2.1191977114877294,
      "grad_norm": 0.6975698471069336,
      "learning_rate": 5.799569064923598e-06,
      "loss": 0.0143,
      "step": 1294940
    },
    {
      "epoch": 2.1192304419263825,
      "grad_norm": 0.29843926429748535,
      "learning_rate": 5.7995031727100815e-06,
      "loss": 0.024,
      "step": 1294960
    },
    {
      "epoch": 2.119263172365036,
      "grad_norm": 0.39783164858818054,
      "learning_rate": 5.799437280496563e-06,
      "loss": 0.0161,
      "step": 1294980
    },
    {
      "epoch": 2.1192959028036893,
      "grad_norm": 0.34484681487083435,
      "learning_rate": 5.799371388283047e-06,
      "loss": 0.0141,
      "step": 1295000
    },
    {
      "epoch": 2.119328633242343,
      "grad_norm": 0.14051999151706696,
      "learning_rate": 5.79930549606953e-06,
      "loss": 0.0173,
      "step": 1295020
    },
    {
      "epoch": 2.119361363680996,
      "grad_norm": 0.24658533930778503,
      "learning_rate": 5.7992396038560124e-06,
      "loss": 0.0155,
      "step": 1295040
    },
    {
      "epoch": 2.119394094119649,
      "grad_norm": 0.21060825884342194,
      "learning_rate": 5.799173711642496e-06,
      "loss": 0.0097,
      "step": 1295060
    },
    {
      "epoch": 2.119426824558303,
      "grad_norm": 0.2538151741027832,
      "learning_rate": 5.799107819428979e-06,
      "loss": 0.0226,
      "step": 1295080
    },
    {
      "epoch": 2.119459554996956,
      "grad_norm": 0.41555845737457275,
      "learning_rate": 5.7990419272154615e-06,
      "loss": 0.0131,
      "step": 1295100
    },
    {
      "epoch": 2.1194922854356095,
      "grad_norm": 0.7987060546875,
      "learning_rate": 5.798976035001944e-06,
      "loss": 0.0143,
      "step": 1295120
    },
    {
      "epoch": 2.1195250158742627,
      "grad_norm": 0.4421292245388031,
      "learning_rate": 5.798910142788428e-06,
      "loss": 0.0186,
      "step": 1295140
    },
    {
      "epoch": 2.1195577463129163,
      "grad_norm": 0.1173410564661026,
      "learning_rate": 5.79884425057491e-06,
      "loss": 0.0144,
      "step": 1295160
    },
    {
      "epoch": 2.1195904767515694,
      "grad_norm": 0.19773687422275543,
      "learning_rate": 5.798778358361393e-06,
      "loss": 0.0129,
      "step": 1295180
    },
    {
      "epoch": 2.1196232071902226,
      "grad_norm": 0.20679476857185364,
      "learning_rate": 5.798712466147875e-06,
      "loss": 0.0131,
      "step": 1295200
    },
    {
      "epoch": 2.119655937628876,
      "grad_norm": 0.277609258890152,
      "learning_rate": 5.798646573934359e-06,
      "loss": 0.0127,
      "step": 1295220
    },
    {
      "epoch": 2.1196886680675293,
      "grad_norm": 0.5403964519500732,
      "learning_rate": 5.7985806817208415e-06,
      "loss": 0.0132,
      "step": 1295240
    },
    {
      "epoch": 2.119721398506183,
      "grad_norm": 0.379517138004303,
      "learning_rate": 5.798514789507324e-06,
      "loss": 0.012,
      "step": 1295260
    },
    {
      "epoch": 2.119754128944836,
      "grad_norm": 0.644273579120636,
      "learning_rate": 5.798448897293807e-06,
      "loss": 0.0184,
      "step": 1295280
    },
    {
      "epoch": 2.1197868593834897,
      "grad_norm": 2.130108594894409,
      "learning_rate": 5.798383005080291e-06,
      "loss": 0.0144,
      "step": 1295300
    },
    {
      "epoch": 2.119819589822143,
      "grad_norm": 0.34465813636779785,
      "learning_rate": 5.7983171128667725e-06,
      "loss": 0.0118,
      "step": 1295320
    },
    {
      "epoch": 2.119852320260796,
      "grad_norm": 0.5159718990325928,
      "learning_rate": 5.798251220653256e-06,
      "loss": 0.012,
      "step": 1295340
    },
    {
      "epoch": 2.1198850506994495,
      "grad_norm": 0.321102112531662,
      "learning_rate": 5.798185328439738e-06,
      "loss": 0.0103,
      "step": 1295360
    },
    {
      "epoch": 2.1199177811381027,
      "grad_norm": 0.19272196292877197,
      "learning_rate": 5.7981194362262215e-06,
      "loss": 0.0153,
      "step": 1295380
    },
    {
      "epoch": 2.1199505115767563,
      "grad_norm": 0.4293804466724396,
      "learning_rate": 5.798053544012705e-06,
      "loss": 0.0188,
      "step": 1295400
    },
    {
      "epoch": 2.1199832420154094,
      "grad_norm": 0.20634828507900238,
      "learning_rate": 5.797987651799187e-06,
      "loss": 0.0113,
      "step": 1295420
    },
    {
      "epoch": 2.120015972454063,
      "grad_norm": 0.14308282732963562,
      "learning_rate": 5.797921759585671e-06,
      "loss": 0.0208,
      "step": 1295440
    },
    {
      "epoch": 2.120048702892716,
      "grad_norm": 0.3426920771598816,
      "learning_rate": 5.797855867372153e-06,
      "loss": 0.0123,
      "step": 1295460
    },
    {
      "epoch": 2.1200814333313693,
      "grad_norm": 0.3289177715778351,
      "learning_rate": 5.797789975158636e-06,
      "loss": 0.0188,
      "step": 1295480
    },
    {
      "epoch": 2.120114163770023,
      "grad_norm": 0.29883164167404175,
      "learning_rate": 5.797724082945119e-06,
      "loss": 0.0137,
      "step": 1295500
    },
    {
      "epoch": 2.120146894208676,
      "grad_norm": 0.30895379185676575,
      "learning_rate": 5.797658190731602e-06,
      "loss": 0.0201,
      "step": 1295520
    },
    {
      "epoch": 2.1201796246473297,
      "grad_norm": 0.8246134519577026,
      "learning_rate": 5.797592298518084e-06,
      "loss": 0.0206,
      "step": 1295540
    },
    {
      "epoch": 2.120212355085983,
      "grad_norm": 0.3644401431083679,
      "learning_rate": 5.797526406304568e-06,
      "loss": 0.0161,
      "step": 1295560
    },
    {
      "epoch": 2.120245085524636,
      "grad_norm": 0.2589513957500458,
      "learning_rate": 5.79746051409105e-06,
      "loss": 0.019,
      "step": 1295580
    },
    {
      "epoch": 2.1202778159632896,
      "grad_norm": 0.3343591094017029,
      "learning_rate": 5.797394621877533e-06,
      "loss": 0.0121,
      "step": 1295600
    },
    {
      "epoch": 2.1203105464019427,
      "grad_norm": 0.18423248827457428,
      "learning_rate": 5.797328729664016e-06,
      "loss": 0.0197,
      "step": 1295620
    },
    {
      "epoch": 2.1203432768405963,
      "grad_norm": 0.14412619173526764,
      "learning_rate": 5.797262837450499e-06,
      "loss": 0.0165,
      "step": 1295640
    },
    {
      "epoch": 2.1203760072792495,
      "grad_norm": 0.5872067213058472,
      "learning_rate": 5.797196945236982e-06,
      "loss": 0.0227,
      "step": 1295660
    },
    {
      "epoch": 2.120408737717903,
      "grad_norm": 0.6323556303977966,
      "learning_rate": 5.797131053023465e-06,
      "loss": 0.0136,
      "step": 1295680
    },
    {
      "epoch": 2.120441468156556,
      "grad_norm": 0.25490567088127136,
      "learning_rate": 5.797065160809947e-06,
      "loss": 0.0114,
      "step": 1295700
    },
    {
      "epoch": 2.1204741985952094,
      "grad_norm": 0.3188205063343048,
      "learning_rate": 5.796999268596431e-06,
      "loss": 0.0168,
      "step": 1295720
    },
    {
      "epoch": 2.120506929033863,
      "grad_norm": 0.20427335798740387,
      "learning_rate": 5.7969333763829126e-06,
      "loss": 0.0105,
      "step": 1295740
    },
    {
      "epoch": 2.120539659472516,
      "grad_norm": 0.20198176801204681,
      "learning_rate": 5.796867484169396e-06,
      "loss": 0.0156,
      "step": 1295760
    },
    {
      "epoch": 2.1205723899111697,
      "grad_norm": 3.1982452869415283,
      "learning_rate": 5.79680159195588e-06,
      "loss": 0.0188,
      "step": 1295780
    },
    {
      "epoch": 2.120605120349823,
      "grad_norm": 0.7564956545829773,
      "learning_rate": 5.796735699742362e-06,
      "loss": 0.017,
      "step": 1295800
    },
    {
      "epoch": 2.1206378507884764,
      "grad_norm": 0.12729975581169128,
      "learning_rate": 5.796669807528845e-06,
      "loss": 0.0156,
      "step": 1295820
    },
    {
      "epoch": 2.1206705812271296,
      "grad_norm": 0.22207561135292053,
      "learning_rate": 5.796603915315327e-06,
      "loss": 0.0136,
      "step": 1295840
    },
    {
      "epoch": 2.1207033116657827,
      "grad_norm": 0.07481483370065689,
      "learning_rate": 5.796538023101811e-06,
      "loss": 0.0156,
      "step": 1295860
    },
    {
      "epoch": 2.1207360421044363,
      "grad_norm": 0.2210986316204071,
      "learning_rate": 5.7964721308882934e-06,
      "loss": 0.0194,
      "step": 1295880
    },
    {
      "epoch": 2.1207687725430895,
      "grad_norm": 0.5487368702888489,
      "learning_rate": 5.796406238674776e-06,
      "loss": 0.0199,
      "step": 1295900
    },
    {
      "epoch": 2.120801502981743,
      "grad_norm": 0.5372664928436279,
      "learning_rate": 5.796340346461259e-06,
      "loss": 0.015,
      "step": 1295920
    },
    {
      "epoch": 2.1208342334203962,
      "grad_norm": 0.15055455267429352,
      "learning_rate": 5.7962744542477425e-06,
      "loss": 0.0186,
      "step": 1295940
    },
    {
      "epoch": 2.12086696385905,
      "grad_norm": 0.3149656057357788,
      "learning_rate": 5.796208562034224e-06,
      "loss": 0.0114,
      "step": 1295960
    },
    {
      "epoch": 2.120899694297703,
      "grad_norm": 1.0664987564086914,
      "learning_rate": 5.796142669820708e-06,
      "loss": 0.0233,
      "step": 1295980
    },
    {
      "epoch": 2.120932424736356,
      "grad_norm": 3.157038927078247,
      "learning_rate": 5.79607677760719e-06,
      "loss": 0.0204,
      "step": 1296000
    },
    {
      "epoch": 2.1209651551750097,
      "grad_norm": 0.2627311944961548,
      "learning_rate": 5.7960108853936734e-06,
      "loss": 0.0186,
      "step": 1296020
    },
    {
      "epoch": 2.120997885613663,
      "grad_norm": 0.14427457749843597,
      "learning_rate": 5.795944993180156e-06,
      "loss": 0.0151,
      "step": 1296040
    },
    {
      "epoch": 2.1210306160523165,
      "grad_norm": 0.4244939982891083,
      "learning_rate": 5.795879100966639e-06,
      "loss": 0.0131,
      "step": 1296060
    },
    {
      "epoch": 2.1210633464909696,
      "grad_norm": 0.4368011951446533,
      "learning_rate": 5.795813208753122e-06,
      "loss": 0.0113,
      "step": 1296080
    },
    {
      "epoch": 2.121096076929623,
      "grad_norm": 0.6444697380065918,
      "learning_rate": 5.795747316539605e-06,
      "loss": 0.017,
      "step": 1296100
    },
    {
      "epoch": 2.1211288073682764,
      "grad_norm": 0.45407599210739136,
      "learning_rate": 5.795681424326088e-06,
      "loss": 0.0185,
      "step": 1296120
    },
    {
      "epoch": 2.1211615378069295,
      "grad_norm": 0.3108495771884918,
      "learning_rate": 5.795615532112571e-06,
      "loss": 0.016,
      "step": 1296140
    },
    {
      "epoch": 2.121194268245583,
      "grad_norm": 0.3506857752799988,
      "learning_rate": 5.795549639899054e-06,
      "loss": 0.0125,
      "step": 1296160
    },
    {
      "epoch": 2.1212269986842363,
      "grad_norm": 0.4391704797744751,
      "learning_rate": 5.795483747685536e-06,
      "loss": 0.0229,
      "step": 1296180
    },
    {
      "epoch": 2.12125972912289,
      "grad_norm": 0.9044251441955566,
      "learning_rate": 5.79541785547202e-06,
      "loss": 0.0185,
      "step": 1296200
    },
    {
      "epoch": 2.121292459561543,
      "grad_norm": 0.207466259598732,
      "learning_rate": 5.795351963258502e-06,
      "loss": 0.0164,
      "step": 1296220
    },
    {
      "epoch": 2.1213251900001966,
      "grad_norm": 0.5310385823249817,
      "learning_rate": 5.795286071044985e-06,
      "loss": 0.0157,
      "step": 1296240
    },
    {
      "epoch": 2.1213579204388497,
      "grad_norm": 0.43122872710227966,
      "learning_rate": 5.795220178831468e-06,
      "loss": 0.0241,
      "step": 1296260
    },
    {
      "epoch": 2.121390650877503,
      "grad_norm": 0.2933095395565033,
      "learning_rate": 5.795154286617951e-06,
      "loss": 0.0143,
      "step": 1296280
    },
    {
      "epoch": 2.1214233813161565,
      "grad_norm": 0.6412030458450317,
      "learning_rate": 5.7950883944044335e-06,
      "loss": 0.0276,
      "step": 1296300
    },
    {
      "epoch": 2.1214561117548096,
      "grad_norm": 0.2939961552619934,
      "learning_rate": 5.795022502190917e-06,
      "loss": 0.0118,
      "step": 1296320
    },
    {
      "epoch": 2.1214888421934632,
      "grad_norm": 1.1361314058303833,
      "learning_rate": 5.794956609977399e-06,
      "loss": 0.0185,
      "step": 1296340
    },
    {
      "epoch": 2.1215215726321164,
      "grad_norm": 0.20092126727104187,
      "learning_rate": 5.7948907177638826e-06,
      "loss": 0.0141,
      "step": 1296360
    },
    {
      "epoch": 2.12155430307077,
      "grad_norm": 0.19865231215953827,
      "learning_rate": 5.7948248255503645e-06,
      "loss": 0.0177,
      "step": 1296380
    },
    {
      "epoch": 2.121587033509423,
      "grad_norm": 0.4274463951587677,
      "learning_rate": 5.794758933336848e-06,
      "loss": 0.0144,
      "step": 1296400
    },
    {
      "epoch": 2.1216197639480763,
      "grad_norm": 0.25602081418037415,
      "learning_rate": 5.794693041123331e-06,
      "loss": 0.0183,
      "step": 1296420
    },
    {
      "epoch": 2.12165249438673,
      "grad_norm": 0.1481846123933792,
      "learning_rate": 5.7946271489098135e-06,
      "loss": 0.0073,
      "step": 1296440
    },
    {
      "epoch": 2.121685224825383,
      "grad_norm": 0.18209724128246307,
      "learning_rate": 5.794561256696297e-06,
      "loss": 0.0152,
      "step": 1296460
    },
    {
      "epoch": 2.1217179552640366,
      "grad_norm": 0.17366854846477509,
      "learning_rate": 5.79449536448278e-06,
      "loss": 0.0104,
      "step": 1296480
    },
    {
      "epoch": 2.1217506857026898,
      "grad_norm": 0.7094668745994568,
      "learning_rate": 5.794429472269263e-06,
      "loss": 0.0196,
      "step": 1296500
    },
    {
      "epoch": 2.121783416141343,
      "grad_norm": 0.30575159192085266,
      "learning_rate": 5.794363580055745e-06,
      "loss": 0.0181,
      "step": 1296520
    },
    {
      "epoch": 2.1218161465799965,
      "grad_norm": 0.9564988017082214,
      "learning_rate": 5.794297687842229e-06,
      "loss": 0.0206,
      "step": 1296540
    },
    {
      "epoch": 2.1218488770186497,
      "grad_norm": 0.720731258392334,
      "learning_rate": 5.794231795628711e-06,
      "loss": 0.0187,
      "step": 1296560
    },
    {
      "epoch": 2.1218816074573033,
      "grad_norm": 0.43819692730903625,
      "learning_rate": 5.794165903415194e-06,
      "loss": 0.0182,
      "step": 1296580
    },
    {
      "epoch": 2.1219143378959564,
      "grad_norm": 0.3407963216304779,
      "learning_rate": 5.794100011201676e-06,
      "loss": 0.02,
      "step": 1296600
    },
    {
      "epoch": 2.12194706833461,
      "grad_norm": 0.4306418001651764,
      "learning_rate": 5.79403411898816e-06,
      "loss": 0.0143,
      "step": 1296620
    },
    {
      "epoch": 2.121979798773263,
      "grad_norm": 0.12452090531587601,
      "learning_rate": 5.793968226774643e-06,
      "loss": 0.0136,
      "step": 1296640
    },
    {
      "epoch": 2.1220125292119163,
      "grad_norm": 0.3600223660469055,
      "learning_rate": 5.793902334561125e-06,
      "loss": 0.0115,
      "step": 1296660
    },
    {
      "epoch": 2.12204525965057,
      "grad_norm": 0.3875063359737396,
      "learning_rate": 5.793836442347608e-06,
      "loss": 0.0203,
      "step": 1296680
    },
    {
      "epoch": 2.122077990089223,
      "grad_norm": 1.0619162321090698,
      "learning_rate": 5.793770550134092e-06,
      "loss": 0.0215,
      "step": 1296700
    },
    {
      "epoch": 2.1221107205278766,
      "grad_norm": 0.8177586793899536,
      "learning_rate": 5.7937046579205736e-06,
      "loss": 0.0127,
      "step": 1296720
    },
    {
      "epoch": 2.12214345096653,
      "grad_norm": 0.5919057130813599,
      "learning_rate": 5.793638765707057e-06,
      "loss": 0.0108,
      "step": 1296740
    },
    {
      "epoch": 2.1221761814051834,
      "grad_norm": 0.4350222051143646,
      "learning_rate": 5.793572873493539e-06,
      "loss": 0.0124,
      "step": 1296760
    },
    {
      "epoch": 2.1222089118438365,
      "grad_norm": 0.2597027122974396,
      "learning_rate": 5.793506981280023e-06,
      "loss": 0.0112,
      "step": 1296780
    },
    {
      "epoch": 2.1222416422824897,
      "grad_norm": 0.06036552041769028,
      "learning_rate": 5.7934410890665045e-06,
      "loss": 0.01,
      "step": 1296800
    },
    {
      "epoch": 2.1222743727211433,
      "grad_norm": 1.1267545223236084,
      "learning_rate": 5.793375196852988e-06,
      "loss": 0.0187,
      "step": 1296820
    },
    {
      "epoch": 2.1223071031597964,
      "grad_norm": 0.9416176080703735,
      "learning_rate": 5.793309304639472e-06,
      "loss": 0.0172,
      "step": 1296840
    },
    {
      "epoch": 2.12233983359845,
      "grad_norm": 0.33396926522254944,
      "learning_rate": 5.793243412425954e-06,
      "loss": 0.0158,
      "step": 1296860
    },
    {
      "epoch": 2.122372564037103,
      "grad_norm": 4.854748249053955,
      "learning_rate": 5.793177520212437e-06,
      "loss": 0.0259,
      "step": 1296880
    },
    {
      "epoch": 2.1224052944757568,
      "grad_norm": 0.9947794079780579,
      "learning_rate": 5.79311162799892e-06,
      "loss": 0.0167,
      "step": 1296900
    },
    {
      "epoch": 2.12243802491441,
      "grad_norm": 0.846689760684967,
      "learning_rate": 5.793045735785403e-06,
      "loss": 0.0173,
      "step": 1296920
    },
    {
      "epoch": 2.122470755353063,
      "grad_norm": 0.3570675253868103,
      "learning_rate": 5.792979843571885e-06,
      "loss": 0.0095,
      "step": 1296940
    },
    {
      "epoch": 2.1225034857917167,
      "grad_norm": 0.2522583305835724,
      "learning_rate": 5.792913951358369e-06,
      "loss": 0.018,
      "step": 1296960
    },
    {
      "epoch": 2.12253621623037,
      "grad_norm": 0.19254812598228455,
      "learning_rate": 5.792848059144851e-06,
      "loss": 0.0118,
      "step": 1296980
    },
    {
      "epoch": 2.1225689466690234,
      "grad_norm": 0.3644101321697235,
      "learning_rate": 5.7927821669313345e-06,
      "loss": 0.0159,
      "step": 1297000
    },
    {
      "epoch": 2.1226016771076766,
      "grad_norm": 0.23244169354438782,
      "learning_rate": 5.792716274717816e-06,
      "loss": 0.0144,
      "step": 1297020
    },
    {
      "epoch": 2.12263440754633,
      "grad_norm": 0.10796941071748734,
      "learning_rate": 5.7926503825043e-06,
      "loss": 0.0179,
      "step": 1297040
    },
    {
      "epoch": 2.1226671379849833,
      "grad_norm": 0.47611987590789795,
      "learning_rate": 5.792584490290783e-06,
      "loss": 0.017,
      "step": 1297060
    },
    {
      "epoch": 2.1226998684236364,
      "grad_norm": 0.36520999670028687,
      "learning_rate": 5.7925185980772654e-06,
      "loss": 0.0109,
      "step": 1297080
    },
    {
      "epoch": 2.12273259886229,
      "grad_norm": 0.2874879539012909,
      "learning_rate": 5.792452705863748e-06,
      "loss": 0.0131,
      "step": 1297100
    },
    {
      "epoch": 2.122765329300943,
      "grad_norm": 0.3851926624774933,
      "learning_rate": 5.792386813650232e-06,
      "loss": 0.0179,
      "step": 1297120
    },
    {
      "epoch": 2.122798059739597,
      "grad_norm": 0.33031991124153137,
      "learning_rate": 5.792320921436714e-06,
      "loss": 0.0119,
      "step": 1297140
    },
    {
      "epoch": 2.12283079017825,
      "grad_norm": 0.26992547512054443,
      "learning_rate": 5.792255029223197e-06,
      "loss": 0.012,
      "step": 1297160
    },
    {
      "epoch": 2.122863520616903,
      "grad_norm": 0.4116886854171753,
      "learning_rate": 5.792189137009681e-06,
      "loss": 0.0189,
      "step": 1297180
    },
    {
      "epoch": 2.1228962510555567,
      "grad_norm": 0.4297367334365845,
      "learning_rate": 5.792123244796163e-06,
      "loss": 0.0187,
      "step": 1297200
    },
    {
      "epoch": 2.12292898149421,
      "grad_norm": 0.468641996383667,
      "learning_rate": 5.792057352582646e-06,
      "loss": 0.0155,
      "step": 1297220
    },
    {
      "epoch": 2.1229617119328634,
      "grad_norm": 0.381693571805954,
      "learning_rate": 5.791991460369128e-06,
      "loss": 0.0097,
      "step": 1297240
    },
    {
      "epoch": 2.1229944423715166,
      "grad_norm": 2.296571731567383,
      "learning_rate": 5.791925568155612e-06,
      "loss": 0.0207,
      "step": 1297260
    },
    {
      "epoch": 2.12302717281017,
      "grad_norm": 0.3167305886745453,
      "learning_rate": 5.7918596759420945e-06,
      "loss": 0.0083,
      "step": 1297280
    },
    {
      "epoch": 2.1230599032488233,
      "grad_norm": 0.4088380038738251,
      "learning_rate": 5.791793783728577e-06,
      "loss": 0.0162,
      "step": 1297300
    },
    {
      "epoch": 2.1230926336874765,
      "grad_norm": 0.12165003269910812,
      "learning_rate": 5.79172789151506e-06,
      "loss": 0.0168,
      "step": 1297320
    },
    {
      "epoch": 2.12312536412613,
      "grad_norm": 0.15261174738407135,
      "learning_rate": 5.791661999301544e-06,
      "loss": 0.0164,
      "step": 1297340
    },
    {
      "epoch": 2.123158094564783,
      "grad_norm": 0.2511264979839325,
      "learning_rate": 5.7915961070880255e-06,
      "loss": 0.0159,
      "step": 1297360
    },
    {
      "epoch": 2.123190825003437,
      "grad_norm": 1.0449979305267334,
      "learning_rate": 5.791530214874509e-06,
      "loss": 0.0159,
      "step": 1297380
    },
    {
      "epoch": 2.12322355544209,
      "grad_norm": 0.5254407525062561,
      "learning_rate": 5.791464322660991e-06,
      "loss": 0.0188,
      "step": 1297400
    },
    {
      "epoch": 2.1232562858807436,
      "grad_norm": 0.17418792843818665,
      "learning_rate": 5.7913984304474745e-06,
      "loss": 0.0113,
      "step": 1297420
    },
    {
      "epoch": 2.1232890163193967,
      "grad_norm": 0.2274457812309265,
      "learning_rate": 5.791332538233957e-06,
      "loss": 0.0163,
      "step": 1297440
    },
    {
      "epoch": 2.12332174675805,
      "grad_norm": 0.5662262439727783,
      "learning_rate": 5.79126664602044e-06,
      "loss": 0.0088,
      "step": 1297460
    },
    {
      "epoch": 2.1233544771967034,
      "grad_norm": 0.19210238754749298,
      "learning_rate": 5.791200753806923e-06,
      "loss": 0.0154,
      "step": 1297480
    },
    {
      "epoch": 2.1233872076353566,
      "grad_norm": 0.3525875508785248,
      "learning_rate": 5.791134861593406e-06,
      "loss": 0.0134,
      "step": 1297500
    },
    {
      "epoch": 2.12341993807401,
      "grad_norm": 0.12648259103298187,
      "learning_rate": 5.791068969379889e-06,
      "loss": 0.0108,
      "step": 1297520
    },
    {
      "epoch": 2.1234526685126633,
      "grad_norm": 0.41871580481529236,
      "learning_rate": 5.791003077166372e-06,
      "loss": 0.0161,
      "step": 1297540
    },
    {
      "epoch": 2.123485398951317,
      "grad_norm": 0.4264625012874603,
      "learning_rate": 5.790937184952855e-06,
      "loss": 0.0117,
      "step": 1297560
    },
    {
      "epoch": 2.12351812938997,
      "grad_norm": 0.6893793344497681,
      "learning_rate": 5.790871292739337e-06,
      "loss": 0.0135,
      "step": 1297580
    },
    {
      "epoch": 2.1235508598286232,
      "grad_norm": 0.3035075068473816,
      "learning_rate": 5.790805400525821e-06,
      "loss": 0.0187,
      "step": 1297600
    },
    {
      "epoch": 2.123583590267277,
      "grad_norm": 0.16735048592090607,
      "learning_rate": 5.790739508312303e-06,
      "loss": 0.0142,
      "step": 1297620
    },
    {
      "epoch": 2.12361632070593,
      "grad_norm": 0.820012629032135,
      "learning_rate": 5.790673616098786e-06,
      "loss": 0.0195,
      "step": 1297640
    },
    {
      "epoch": 2.1236490511445836,
      "grad_norm": 0.44765371084213257,
      "learning_rate": 5.790607723885269e-06,
      "loss": 0.0149,
      "step": 1297660
    },
    {
      "epoch": 2.1236817815832367,
      "grad_norm": 0.3649488687515259,
      "learning_rate": 5.790541831671752e-06,
      "loss": 0.0183,
      "step": 1297680
    },
    {
      "epoch": 2.1237145120218903,
      "grad_norm": 0.5051325559616089,
      "learning_rate": 5.790475939458235e-06,
      "loss": 0.0148,
      "step": 1297700
    },
    {
      "epoch": 2.1237472424605435,
      "grad_norm": 1.2135118246078491,
      "learning_rate": 5.790410047244718e-06,
      "loss": 0.021,
      "step": 1297720
    },
    {
      "epoch": 2.1237799728991966,
      "grad_norm": 0.12449587881565094,
      "learning_rate": 5.7903441550312e-06,
      "loss": 0.0138,
      "step": 1297740
    },
    {
      "epoch": 2.12381270333785,
      "grad_norm": 0.6161314249038696,
      "learning_rate": 5.790278262817684e-06,
      "loss": 0.0167,
      "step": 1297760
    },
    {
      "epoch": 2.1238454337765034,
      "grad_norm": 0.7564598917961121,
      "learning_rate": 5.7902123706041656e-06,
      "loss": 0.0142,
      "step": 1297780
    },
    {
      "epoch": 2.123878164215157,
      "grad_norm": 0.4933221638202667,
      "learning_rate": 5.790146478390649e-06,
      "loss": 0.0125,
      "step": 1297800
    },
    {
      "epoch": 2.12391089465381,
      "grad_norm": 0.5900371670722961,
      "learning_rate": 5.790080586177131e-06,
      "loss": 0.023,
      "step": 1297820
    },
    {
      "epoch": 2.1239436250924637,
      "grad_norm": 0.2630419433116913,
      "learning_rate": 5.790014693963615e-06,
      "loss": 0.0108,
      "step": 1297840
    },
    {
      "epoch": 2.123976355531117,
      "grad_norm": 0.2800525724887848,
      "learning_rate": 5.789948801750098e-06,
      "loss": 0.0124,
      "step": 1297860
    },
    {
      "epoch": 2.12400908596977,
      "grad_norm": 0.20817334949970245,
      "learning_rate": 5.78988290953658e-06,
      "loss": 0.011,
      "step": 1297880
    },
    {
      "epoch": 2.1240418164084236,
      "grad_norm": 0.15077313780784607,
      "learning_rate": 5.789817017323064e-06,
      "loss": 0.0193,
      "step": 1297900
    },
    {
      "epoch": 2.1240745468470767,
      "grad_norm": 0.5742990374565125,
      "learning_rate": 5.7897511251095464e-06,
      "loss": 0.0168,
      "step": 1297920
    },
    {
      "epoch": 2.1241072772857303,
      "grad_norm": 0.5260546207427979,
      "learning_rate": 5.789685232896029e-06,
      "loss": 0.0147,
      "step": 1297940
    },
    {
      "epoch": 2.1241400077243835,
      "grad_norm": 0.2529595196247101,
      "learning_rate": 5.789619340682512e-06,
      "loss": 0.011,
      "step": 1297960
    },
    {
      "epoch": 2.124172738163037,
      "grad_norm": 0.3701455593109131,
      "learning_rate": 5.7895534484689955e-06,
      "loss": 0.0139,
      "step": 1297980
    },
    {
      "epoch": 2.1242054686016902,
      "grad_norm": 0.26454323530197144,
      "learning_rate": 5.789487556255477e-06,
      "loss": 0.0141,
      "step": 1298000
    },
    {
      "epoch": 2.1242381990403434,
      "grad_norm": 0.758162796497345,
      "learning_rate": 5.789421664041961e-06,
      "loss": 0.0177,
      "step": 1298020
    },
    {
      "epoch": 2.124270929478997,
      "grad_norm": 0.2471880465745926,
      "learning_rate": 5.789355771828443e-06,
      "loss": 0.019,
      "step": 1298040
    },
    {
      "epoch": 2.12430365991765,
      "grad_norm": 0.3134263753890991,
      "learning_rate": 5.7892898796149265e-06,
      "loss": 0.0123,
      "step": 1298060
    },
    {
      "epoch": 2.1243363903563037,
      "grad_norm": 0.33409735560417175,
      "learning_rate": 5.789223987401409e-06,
      "loss": 0.0151,
      "step": 1298080
    },
    {
      "epoch": 2.124369120794957,
      "grad_norm": 0.46906453371047974,
      "learning_rate": 5.789158095187892e-06,
      "loss": 0.0169,
      "step": 1298100
    },
    {
      "epoch": 2.12440185123361,
      "grad_norm": 0.46594882011413574,
      "learning_rate": 5.789092202974375e-06,
      "loss": 0.0195,
      "step": 1298120
    },
    {
      "epoch": 2.1244345816722636,
      "grad_norm": 0.4279572069644928,
      "learning_rate": 5.789026310760858e-06,
      "loss": 0.0158,
      "step": 1298140
    },
    {
      "epoch": 2.1244673121109168,
      "grad_norm": 0.3372337222099304,
      "learning_rate": 5.78896041854734e-06,
      "loss": 0.0113,
      "step": 1298160
    },
    {
      "epoch": 2.1245000425495704,
      "grad_norm": 0.49553927779197693,
      "learning_rate": 5.788894526333824e-06,
      "loss": 0.021,
      "step": 1298180
    },
    {
      "epoch": 2.1245327729882235,
      "grad_norm": 0.5187840461730957,
      "learning_rate": 5.788828634120306e-06,
      "loss": 0.0115,
      "step": 1298200
    },
    {
      "epoch": 2.124565503426877,
      "grad_norm": 0.7912929654121399,
      "learning_rate": 5.788762741906789e-06,
      "loss": 0.0107,
      "step": 1298220
    },
    {
      "epoch": 2.1245982338655303,
      "grad_norm": 0.6727259755134583,
      "learning_rate": 5.788696849693273e-06,
      "loss": 0.0161,
      "step": 1298240
    },
    {
      "epoch": 2.1246309643041834,
      "grad_norm": 0.45170411467552185,
      "learning_rate": 5.788630957479755e-06,
      "loss": 0.0176,
      "step": 1298260
    },
    {
      "epoch": 2.124663694742837,
      "grad_norm": 0.055441658943891525,
      "learning_rate": 5.788565065266238e-06,
      "loss": 0.013,
      "step": 1298280
    },
    {
      "epoch": 2.12469642518149,
      "grad_norm": 0.4686184823513031,
      "learning_rate": 5.788499173052721e-06,
      "loss": 0.0225,
      "step": 1298300
    },
    {
      "epoch": 2.1247291556201438,
      "grad_norm": 0.44505226612091064,
      "learning_rate": 5.788433280839204e-06,
      "loss": 0.0187,
      "step": 1298320
    },
    {
      "epoch": 2.124761886058797,
      "grad_norm": 0.11083100736141205,
      "learning_rate": 5.7883673886256865e-06,
      "loss": 0.0149,
      "step": 1298340
    },
    {
      "epoch": 2.1247946164974505,
      "grad_norm": 0.6731840968132019,
      "learning_rate": 5.78830149641217e-06,
      "loss": 0.0136,
      "step": 1298360
    },
    {
      "epoch": 2.1248273469361036,
      "grad_norm": 1.4020556211471558,
      "learning_rate": 5.788235604198652e-06,
      "loss": 0.0153,
      "step": 1298380
    },
    {
      "epoch": 2.124860077374757,
      "grad_norm": 0.6300158500671387,
      "learning_rate": 5.7881697119851356e-06,
      "loss": 0.0136,
      "step": 1298400
    },
    {
      "epoch": 2.1248928078134104,
      "grad_norm": 0.8486549854278564,
      "learning_rate": 5.7881038197716175e-06,
      "loss": 0.0147,
      "step": 1298420
    },
    {
      "epoch": 2.1249255382520635,
      "grad_norm": 0.5166927576065063,
      "learning_rate": 5.788037927558101e-06,
      "loss": 0.0168,
      "step": 1298440
    },
    {
      "epoch": 2.124958268690717,
      "grad_norm": 0.1547466218471527,
      "learning_rate": 5.787972035344584e-06,
      "loss": 0.0203,
      "step": 1298460
    },
    {
      "epoch": 2.1249909991293703,
      "grad_norm": 0.15156975388526917,
      "learning_rate": 5.7879061431310665e-06,
      "loss": 0.014,
      "step": 1298480
    },
    {
      "epoch": 2.125023729568024,
      "grad_norm": 0.9429241418838501,
      "learning_rate": 5.787840250917549e-06,
      "loss": 0.0176,
      "step": 1298500
    },
    {
      "epoch": 2.125056460006677,
      "grad_norm": 0.47268587350845337,
      "learning_rate": 5.787774358704033e-06,
      "loss": 0.0109,
      "step": 1298520
    },
    {
      "epoch": 2.12508919044533,
      "grad_norm": 0.7333455681800842,
      "learning_rate": 5.787708466490515e-06,
      "loss": 0.0135,
      "step": 1298540
    },
    {
      "epoch": 2.1251219208839838,
      "grad_norm": 0.22032330930233002,
      "learning_rate": 5.787642574276998e-06,
      "loss": 0.0178,
      "step": 1298560
    },
    {
      "epoch": 2.125154651322637,
      "grad_norm": 0.5486740469932556,
      "learning_rate": 5.787576682063482e-06,
      "loss": 0.0193,
      "step": 1298580
    },
    {
      "epoch": 2.1251873817612905,
      "grad_norm": 0.3610532879829407,
      "learning_rate": 5.787510789849964e-06,
      "loss": 0.0223,
      "step": 1298600
    },
    {
      "epoch": 2.1252201121999437,
      "grad_norm": 0.4189929962158203,
      "learning_rate": 5.787444897636447e-06,
      "loss": 0.0166,
      "step": 1298620
    },
    {
      "epoch": 2.125252842638597,
      "grad_norm": 0.7437980771064758,
      "learning_rate": 5.787379005422929e-06,
      "loss": 0.0185,
      "step": 1298640
    },
    {
      "epoch": 2.1252855730772504,
      "grad_norm": 0.2869666516780853,
      "learning_rate": 5.787313113209413e-06,
      "loss": 0.0155,
      "step": 1298660
    },
    {
      "epoch": 2.1253183035159036,
      "grad_norm": 0.5770702958106995,
      "learning_rate": 5.787247220995895e-06,
      "loss": 0.0165,
      "step": 1298680
    },
    {
      "epoch": 2.125351033954557,
      "grad_norm": 0.5270495414733887,
      "learning_rate": 5.787181328782378e-06,
      "loss": 0.017,
      "step": 1298700
    },
    {
      "epoch": 2.1253837643932103,
      "grad_norm": 0.1991075724363327,
      "learning_rate": 5.787115436568861e-06,
      "loss": 0.0146,
      "step": 1298720
    },
    {
      "epoch": 2.125416494831864,
      "grad_norm": 1.2491252422332764,
      "learning_rate": 5.787049544355345e-06,
      "loss": 0.0165,
      "step": 1298740
    },
    {
      "epoch": 2.125449225270517,
      "grad_norm": 0.5824252367019653,
      "learning_rate": 5.786983652141827e-06,
      "loss": 0.0115,
      "step": 1298760
    },
    {
      "epoch": 2.12548195570917,
      "grad_norm": 0.5848690271377563,
      "learning_rate": 5.78691775992831e-06,
      "loss": 0.0177,
      "step": 1298780
    },
    {
      "epoch": 2.125514686147824,
      "grad_norm": 0.5636059641838074,
      "learning_rate": 5.786851867714792e-06,
      "loss": 0.0093,
      "step": 1298800
    },
    {
      "epoch": 2.125547416586477,
      "grad_norm": 0.18025153875350952,
      "learning_rate": 5.786785975501276e-06,
      "loss": 0.0132,
      "step": 1298820
    },
    {
      "epoch": 2.1255801470251305,
      "grad_norm": 0.4591575860977173,
      "learning_rate": 5.7867200832877575e-06,
      "loss": 0.0193,
      "step": 1298840
    },
    {
      "epoch": 2.1256128774637837,
      "grad_norm": 0.23777242004871368,
      "learning_rate": 5.786654191074241e-06,
      "loss": 0.0157,
      "step": 1298860
    },
    {
      "epoch": 2.1256456079024373,
      "grad_norm": 0.22840362787246704,
      "learning_rate": 5.786588298860724e-06,
      "loss": 0.0106,
      "step": 1298880
    },
    {
      "epoch": 2.1256783383410904,
      "grad_norm": 0.23846383392810822,
      "learning_rate": 5.786522406647207e-06,
      "loss": 0.0139,
      "step": 1298900
    },
    {
      "epoch": 2.1257110687797436,
      "grad_norm": 0.5092726945877075,
      "learning_rate": 5.78645651443369e-06,
      "loss": 0.0123,
      "step": 1298920
    },
    {
      "epoch": 2.125743799218397,
      "grad_norm": 0.49751976132392883,
      "learning_rate": 5.786390622220173e-06,
      "loss": 0.0198,
      "step": 1298940
    },
    {
      "epoch": 2.1257765296570503,
      "grad_norm": 0.2496836632490158,
      "learning_rate": 5.786324730006656e-06,
      "loss": 0.0159,
      "step": 1298960
    },
    {
      "epoch": 2.125809260095704,
      "grad_norm": 0.8828000426292419,
      "learning_rate": 5.786258837793138e-06,
      "loss": 0.0158,
      "step": 1298980
    },
    {
      "epoch": 2.125841990534357,
      "grad_norm": 0.4141869843006134,
      "learning_rate": 5.786192945579622e-06,
      "loss": 0.0175,
      "step": 1299000
    },
    {
      "epoch": 2.1258747209730107,
      "grad_norm": 1.0129902362823486,
      "learning_rate": 5.786127053366104e-06,
      "loss": 0.0206,
      "step": 1299020
    },
    {
      "epoch": 2.125907451411664,
      "grad_norm": 0.7495970129966736,
      "learning_rate": 5.7860611611525875e-06,
      "loss": 0.0202,
      "step": 1299040
    },
    {
      "epoch": 2.125940181850317,
      "grad_norm": 0.3178877532482147,
      "learning_rate": 5.785995268939069e-06,
      "loss": 0.0179,
      "step": 1299060
    },
    {
      "epoch": 2.1259729122889706,
      "grad_norm": 0.9659902453422546,
      "learning_rate": 5.785929376725553e-06,
      "loss": 0.0213,
      "step": 1299080
    },
    {
      "epoch": 2.1260056427276237,
      "grad_norm": 0.0885399580001831,
      "learning_rate": 5.785863484512036e-06,
      "loss": 0.0113,
      "step": 1299100
    },
    {
      "epoch": 2.1260383731662773,
      "grad_norm": 0.41578295826911926,
      "learning_rate": 5.7857975922985184e-06,
      "loss": 0.0173,
      "step": 1299120
    },
    {
      "epoch": 2.1260711036049305,
      "grad_norm": 0.2971956431865692,
      "learning_rate": 5.785731700085001e-06,
      "loss": 0.0121,
      "step": 1299140
    },
    {
      "epoch": 2.126103834043584,
      "grad_norm": 0.3297360837459564,
      "learning_rate": 5.785665807871485e-06,
      "loss": 0.0123,
      "step": 1299160
    },
    {
      "epoch": 2.126136564482237,
      "grad_norm": 0.36490121483802795,
      "learning_rate": 5.785599915657967e-06,
      "loss": 0.0116,
      "step": 1299180
    },
    {
      "epoch": 2.1261692949208904,
      "grad_norm": 0.09099278599023819,
      "learning_rate": 5.78553402344445e-06,
      "loss": 0.0103,
      "step": 1299200
    },
    {
      "epoch": 2.126202025359544,
      "grad_norm": 0.8423053026199341,
      "learning_rate": 5.785468131230932e-06,
      "loss": 0.0188,
      "step": 1299220
    },
    {
      "epoch": 2.126234755798197,
      "grad_norm": 0.8147395849227905,
      "learning_rate": 5.785402239017416e-06,
      "loss": 0.0177,
      "step": 1299240
    },
    {
      "epoch": 2.1262674862368507,
      "grad_norm": 0.26759153604507446,
      "learning_rate": 5.7853363468038985e-06,
      "loss": 0.0219,
      "step": 1299260
    },
    {
      "epoch": 2.126300216675504,
      "grad_norm": 0.15358570218086243,
      "learning_rate": 5.785270454590381e-06,
      "loss": 0.0201,
      "step": 1299280
    },
    {
      "epoch": 2.1263329471141574,
      "grad_norm": 0.22595255076885223,
      "learning_rate": 5.785204562376865e-06,
      "loss": 0.0109,
      "step": 1299300
    },
    {
      "epoch": 2.1263656775528106,
      "grad_norm": 0.3722662329673767,
      "learning_rate": 5.7851386701633475e-06,
      "loss": 0.0192,
      "step": 1299320
    },
    {
      "epoch": 2.1263984079914637,
      "grad_norm": 0.8829509019851685,
      "learning_rate": 5.78507277794983e-06,
      "loss": 0.0201,
      "step": 1299340
    },
    {
      "epoch": 2.1264311384301173,
      "grad_norm": 0.2708407938480377,
      "learning_rate": 5.785006885736313e-06,
      "loss": 0.0166,
      "step": 1299360
    },
    {
      "epoch": 2.1264638688687705,
      "grad_norm": 0.3651697635650635,
      "learning_rate": 5.784940993522797e-06,
      "loss": 0.0206,
      "step": 1299380
    },
    {
      "epoch": 2.126496599307424,
      "grad_norm": 0.6035492420196533,
      "learning_rate": 5.7848751013092785e-06,
      "loss": 0.0185,
      "step": 1299400
    },
    {
      "epoch": 2.1265293297460772,
      "grad_norm": 0.13668319582939148,
      "learning_rate": 5.784809209095762e-06,
      "loss": 0.0126,
      "step": 1299420
    },
    {
      "epoch": 2.126562060184731,
      "grad_norm": 0.350269079208374,
      "learning_rate": 5.784743316882244e-06,
      "loss": 0.0132,
      "step": 1299440
    },
    {
      "epoch": 2.126594790623384,
      "grad_norm": 0.5270125269889832,
      "learning_rate": 5.7846774246687276e-06,
      "loss": 0.0131,
      "step": 1299460
    },
    {
      "epoch": 2.126627521062037,
      "grad_norm": 0.5980057120323181,
      "learning_rate": 5.78461153245521e-06,
      "loss": 0.0171,
      "step": 1299480
    },
    {
      "epoch": 2.1266602515006907,
      "grad_norm": 0.4199708104133606,
      "learning_rate": 5.784545640241693e-06,
      "loss": 0.0112,
      "step": 1299500
    },
    {
      "epoch": 2.126692981939344,
      "grad_norm": 0.6602480411529541,
      "learning_rate": 5.784479748028176e-06,
      "loss": 0.0119,
      "step": 1299520
    },
    {
      "epoch": 2.1267257123779975,
      "grad_norm": 0.3104352355003357,
      "learning_rate": 5.784413855814659e-06,
      "loss": 0.0179,
      "step": 1299540
    },
    {
      "epoch": 2.1267584428166506,
      "grad_norm": 0.5604773759841919,
      "learning_rate": 5.784347963601141e-06,
      "loss": 0.0162,
      "step": 1299560
    },
    {
      "epoch": 2.126791173255304,
      "grad_norm": 0.2911410629749298,
      "learning_rate": 5.784282071387625e-06,
      "loss": 0.0179,
      "step": 1299580
    },
    {
      "epoch": 2.1268239036939574,
      "grad_norm": 0.3293593227863312,
      "learning_rate": 5.784216179174107e-06,
      "loss": 0.0164,
      "step": 1299600
    },
    {
      "epoch": 2.1268566341326105,
      "grad_norm": 1.8836673498153687,
      "learning_rate": 5.78415028696059e-06,
      "loss": 0.0192,
      "step": 1299620
    },
    {
      "epoch": 2.126889364571264,
      "grad_norm": 0.41820138692855835,
      "learning_rate": 5.784084394747074e-06,
      "loss": 0.0164,
      "step": 1299640
    },
    {
      "epoch": 2.1269220950099172,
      "grad_norm": 0.17357489466667175,
      "learning_rate": 5.784018502533556e-06,
      "loss": 0.0114,
      "step": 1299660
    },
    {
      "epoch": 2.126954825448571,
      "grad_norm": 0.761085569858551,
      "learning_rate": 5.783952610320039e-06,
      "loss": 0.0102,
      "step": 1299680
    },
    {
      "epoch": 2.126987555887224,
      "grad_norm": 0.3054454028606415,
      "learning_rate": 5.783886718106521e-06,
      "loss": 0.0123,
      "step": 1299700
    },
    {
      "epoch": 2.127020286325877,
      "grad_norm": 0.3937973380088806,
      "learning_rate": 5.783820825893005e-06,
      "loss": 0.0152,
      "step": 1299720
    },
    {
      "epoch": 2.1270530167645307,
      "grad_norm": 0.576602041721344,
      "learning_rate": 5.783754933679488e-06,
      "loss": 0.0122,
      "step": 1299740
    },
    {
      "epoch": 2.127085747203184,
      "grad_norm": 0.16824182868003845,
      "learning_rate": 5.78368904146597e-06,
      "loss": 0.0139,
      "step": 1299760
    },
    {
      "epoch": 2.1271184776418375,
      "grad_norm": 0.8485438227653503,
      "learning_rate": 5.783623149252453e-06,
      "loss": 0.0187,
      "step": 1299780
    },
    {
      "epoch": 2.1271512080804906,
      "grad_norm": 0.4916994571685791,
      "learning_rate": 5.783557257038937e-06,
      "loss": 0.0118,
      "step": 1299800
    },
    {
      "epoch": 2.1271839385191442,
      "grad_norm": 0.4699534475803375,
      "learning_rate": 5.7834913648254186e-06,
      "loss": 0.0115,
      "step": 1299820
    },
    {
      "epoch": 2.1272166689577974,
      "grad_norm": 0.5481802225112915,
      "learning_rate": 5.783425472611902e-06,
      "loss": 0.0192,
      "step": 1299840
    },
    {
      "epoch": 2.1272493993964505,
      "grad_norm": 0.6471333503723145,
      "learning_rate": 5.783359580398384e-06,
      "loss": 0.0154,
      "step": 1299860
    },
    {
      "epoch": 2.127282129835104,
      "grad_norm": 0.5810503363609314,
      "learning_rate": 5.783293688184868e-06,
      "loss": 0.0133,
      "step": 1299880
    },
    {
      "epoch": 2.1273148602737573,
      "grad_norm": 0.7305225729942322,
      "learning_rate": 5.78322779597135e-06,
      "loss": 0.0184,
      "step": 1299900
    },
    {
      "epoch": 2.127347590712411,
      "grad_norm": 9.884319305419922,
      "learning_rate": 5.783161903757833e-06,
      "loss": 0.0232,
      "step": 1299920
    },
    {
      "epoch": 2.127380321151064,
      "grad_norm": 0.904991626739502,
      "learning_rate": 5.783096011544316e-06,
      "loss": 0.0162,
      "step": 1299940
    },
    {
      "epoch": 2.1274130515897176,
      "grad_norm": 0.13886694610118866,
      "learning_rate": 5.7830301193307994e-06,
      "loss": 0.016,
      "step": 1299960
    },
    {
      "epoch": 2.1274457820283708,
      "grad_norm": 0.7446156740188599,
      "learning_rate": 5.782964227117282e-06,
      "loss": 0.0162,
      "step": 1299980
    },
    {
      "epoch": 2.127478512467024,
      "grad_norm": 0.9535378217697144,
      "learning_rate": 5.782898334903765e-06,
      "loss": 0.0195,
      "step": 1300000
    },
    {
      "epoch": 2.127478512467024,
      "eval_loss": 0.008936060592532158,
      "eval_runtime": 6501.2567,
      "eval_samples_per_second": 158.101,
      "eval_steps_per_second": 15.81,
      "eval_sts-dev_pearson_cosine": 0.9793430019902463,
      "eval_sts-dev_spearman_cosine": 0.8920088065029455,
      "step": 1300000
    },
    {
      "epoch": 2.1275112429056775,
      "grad_norm": 0.25327372550964355,
      "learning_rate": 5.7828324426902485e-06,
      "loss": 0.0096,
      "step": 1300020
    },
    {
      "epoch": 2.1275439733443307,
      "grad_norm": 0.7380669713020325,
      "learning_rate": 5.78276655047673e-06,
      "loss": 0.0208,
      "step": 1300040
    },
    {
      "epoch": 2.1275767037829842,
      "grad_norm": 0.44387543201446533,
      "learning_rate": 5.782700658263214e-06,
      "loss": 0.0246,
      "step": 1300060
    },
    {
      "epoch": 2.1276094342216374,
      "grad_norm": 0.48335859179496765,
      "learning_rate": 5.782634766049696e-06,
      "loss": 0.0122,
      "step": 1300080
    },
    {
      "epoch": 2.1276421646602905,
      "grad_norm": 0.5971223711967468,
      "learning_rate": 5.7825688738361795e-06,
      "loss": 0.0155,
      "step": 1300100
    },
    {
      "epoch": 2.127674895098944,
      "grad_norm": 0.23145563900470734,
      "learning_rate": 5.782502981622662e-06,
      "loss": 0.0149,
      "step": 1300120
    },
    {
      "epoch": 2.1277076255375973,
      "grad_norm": 0.6506761312484741,
      "learning_rate": 5.782437089409145e-06,
      "loss": 0.0213,
      "step": 1300140
    },
    {
      "epoch": 2.127740355976251,
      "grad_norm": 0.29648056626319885,
      "learning_rate": 5.782371197195628e-06,
      "loss": 0.0156,
      "step": 1300160
    },
    {
      "epoch": 2.127773086414904,
      "grad_norm": 0.1188092976808548,
      "learning_rate": 5.782305304982111e-06,
      "loss": 0.0205,
      "step": 1300180
    },
    {
      "epoch": 2.1278058168535576,
      "grad_norm": 0.40353214740753174,
      "learning_rate": 5.782239412768593e-06,
      "loss": 0.0156,
      "step": 1300200
    },
    {
      "epoch": 2.127838547292211,
      "grad_norm": 0.21586741507053375,
      "learning_rate": 5.782173520555077e-06,
      "loss": 0.0151,
      "step": 1300220
    },
    {
      "epoch": 2.127871277730864,
      "grad_norm": 1.1301133632659912,
      "learning_rate": 5.782107628341559e-06,
      "loss": 0.0189,
      "step": 1300240
    },
    {
      "epoch": 2.1279040081695175,
      "grad_norm": 0.18811237812042236,
      "learning_rate": 5.782041736128042e-06,
      "loss": 0.0129,
      "step": 1300260
    },
    {
      "epoch": 2.1279367386081707,
      "grad_norm": 0.2394329458475113,
      "learning_rate": 5.781975843914525e-06,
      "loss": 0.0153,
      "step": 1300280
    },
    {
      "epoch": 2.1279694690468243,
      "grad_norm": 0.5012458562850952,
      "learning_rate": 5.781909951701008e-06,
      "loss": 0.0145,
      "step": 1300300
    },
    {
      "epoch": 2.1280021994854774,
      "grad_norm": 0.12890063226222992,
      "learning_rate": 5.781844059487491e-06,
      "loss": 0.016,
      "step": 1300320
    },
    {
      "epoch": 2.128034929924131,
      "grad_norm": 0.2017989307641983,
      "learning_rate": 5.781778167273974e-06,
      "loss": 0.0206,
      "step": 1300340
    },
    {
      "epoch": 2.128067660362784,
      "grad_norm": 0.17791232466697693,
      "learning_rate": 5.781712275060457e-06,
      "loss": 0.0216,
      "step": 1300360
    },
    {
      "epoch": 2.1281003908014373,
      "grad_norm": 0.8068310022354126,
      "learning_rate": 5.7816463828469395e-06,
      "loss": 0.0184,
      "step": 1300380
    },
    {
      "epoch": 2.128133121240091,
      "grad_norm": 0.33435875177383423,
      "learning_rate": 5.781580490633423e-06,
      "loss": 0.0165,
      "step": 1300400
    },
    {
      "epoch": 2.128165851678744,
      "grad_norm": 0.990987241268158,
      "learning_rate": 5.781514598419905e-06,
      "loss": 0.0131,
      "step": 1300420
    },
    {
      "epoch": 2.1281985821173977,
      "grad_norm": 0.1664436161518097,
      "learning_rate": 5.781448706206389e-06,
      "loss": 0.0112,
      "step": 1300440
    },
    {
      "epoch": 2.128231312556051,
      "grad_norm": 0.3845311999320984,
      "learning_rate": 5.7813828139928705e-06,
      "loss": 0.0153,
      "step": 1300460
    },
    {
      "epoch": 2.1282640429947044,
      "grad_norm": 0.3389948308467865,
      "learning_rate": 5.781316921779354e-06,
      "loss": 0.0228,
      "step": 1300480
    },
    {
      "epoch": 2.1282967734333575,
      "grad_norm": 0.9894856214523315,
      "learning_rate": 5.781251029565837e-06,
      "loss": 0.019,
      "step": 1300500
    },
    {
      "epoch": 2.1283295038720107,
      "grad_norm": 0.45851150155067444,
      "learning_rate": 5.7811851373523195e-06,
      "loss": 0.016,
      "step": 1300520
    },
    {
      "epoch": 2.1283622343106643,
      "grad_norm": 0.40680962800979614,
      "learning_rate": 5.781119245138802e-06,
      "loss": 0.0132,
      "step": 1300540
    },
    {
      "epoch": 2.1283949647493174,
      "grad_norm": 0.12416741251945496,
      "learning_rate": 5.781053352925286e-06,
      "loss": 0.0195,
      "step": 1300560
    },
    {
      "epoch": 2.128427695187971,
      "grad_norm": 0.2070760726928711,
      "learning_rate": 5.780987460711768e-06,
      "loss": 0.0119,
      "step": 1300580
    },
    {
      "epoch": 2.128460425626624,
      "grad_norm": 0.16163359582424164,
      "learning_rate": 5.780921568498251e-06,
      "loss": 0.0122,
      "step": 1300600
    },
    {
      "epoch": 2.128493156065278,
      "grad_norm": 0.33177581429481506,
      "learning_rate": 5.780855676284733e-06,
      "loss": 0.0147,
      "step": 1300620
    },
    {
      "epoch": 2.128525886503931,
      "grad_norm": 1.0968382358551025,
      "learning_rate": 5.780789784071217e-06,
      "loss": 0.0139,
      "step": 1300640
    },
    {
      "epoch": 2.128558616942584,
      "grad_norm": 0.40309247374534607,
      "learning_rate": 5.780723891857699e-06,
      "loss": 0.0156,
      "step": 1300660
    },
    {
      "epoch": 2.1285913473812377,
      "grad_norm": 0.5295210480690002,
      "learning_rate": 5.780657999644182e-06,
      "loss": 0.0115,
      "step": 1300680
    },
    {
      "epoch": 2.128624077819891,
      "grad_norm": 0.10304933041334152,
      "learning_rate": 5.780592107430666e-06,
      "loss": 0.0184,
      "step": 1300700
    },
    {
      "epoch": 2.1286568082585444,
      "grad_norm": 0.7767062783241272,
      "learning_rate": 5.780526215217148e-06,
      "loss": 0.018,
      "step": 1300720
    },
    {
      "epoch": 2.1286895386971976,
      "grad_norm": 0.43244925141334534,
      "learning_rate": 5.780460323003631e-06,
      "loss": 0.0165,
      "step": 1300740
    },
    {
      "epoch": 2.128722269135851,
      "grad_norm": 0.21811707317829132,
      "learning_rate": 5.780394430790114e-06,
      "loss": 0.0146,
      "step": 1300760
    },
    {
      "epoch": 2.1287549995745043,
      "grad_norm": 0.33152133226394653,
      "learning_rate": 5.780328538576597e-06,
      "loss": 0.0119,
      "step": 1300780
    },
    {
      "epoch": 2.1287877300131575,
      "grad_norm": 0.2982523739337921,
      "learning_rate": 5.78026264636308e-06,
      "loss": 0.0216,
      "step": 1300800
    },
    {
      "epoch": 2.128820460451811,
      "grad_norm": 0.43942898511886597,
      "learning_rate": 5.780196754149563e-06,
      "loss": 0.0135,
      "step": 1300820
    },
    {
      "epoch": 2.128853190890464,
      "grad_norm": 0.22643154859542847,
      "learning_rate": 5.780130861936045e-06,
      "loss": 0.0117,
      "step": 1300840
    },
    {
      "epoch": 2.128885921329118,
      "grad_norm": 0.49167442321777344,
      "learning_rate": 5.780064969722529e-06,
      "loss": 0.015,
      "step": 1300860
    },
    {
      "epoch": 2.128918651767771,
      "grad_norm": 0.18104979395866394,
      "learning_rate": 5.7799990775090105e-06,
      "loss": 0.0141,
      "step": 1300880
    },
    {
      "epoch": 2.1289513822064245,
      "grad_norm": 0.7548452019691467,
      "learning_rate": 5.779933185295494e-06,
      "loss": 0.0246,
      "step": 1300900
    },
    {
      "epoch": 2.1289841126450777,
      "grad_norm": 0.5919963121414185,
      "learning_rate": 5.779867293081977e-06,
      "loss": 0.009,
      "step": 1300920
    },
    {
      "epoch": 2.129016843083731,
      "grad_norm": 0.22361710667610168,
      "learning_rate": 5.77980140086846e-06,
      "loss": 0.0173,
      "step": 1300940
    },
    {
      "epoch": 2.1290495735223844,
      "grad_norm": 0.1911855936050415,
      "learning_rate": 5.779735508654942e-06,
      "loss": 0.0108,
      "step": 1300960
    },
    {
      "epoch": 2.1290823039610376,
      "grad_norm": 0.09477808326482773,
      "learning_rate": 5.779669616441426e-06,
      "loss": 0.0189,
      "step": 1300980
    },
    {
      "epoch": 2.129115034399691,
      "grad_norm": 0.7434277534484863,
      "learning_rate": 5.779603724227908e-06,
      "loss": 0.0211,
      "step": 1301000
    },
    {
      "epoch": 2.1291477648383443,
      "grad_norm": 0.3739240765571594,
      "learning_rate": 5.779537832014391e-06,
      "loss": 0.0168,
      "step": 1301020
    },
    {
      "epoch": 2.129180495276998,
      "grad_norm": 0.40039142966270447,
      "learning_rate": 5.779471939800875e-06,
      "loss": 0.0135,
      "step": 1301040
    },
    {
      "epoch": 2.129213225715651,
      "grad_norm": 0.24980014562606812,
      "learning_rate": 5.779406047587357e-06,
      "loss": 0.0216,
      "step": 1301060
    },
    {
      "epoch": 2.1292459561543042,
      "grad_norm": 0.41049787402153015,
      "learning_rate": 5.7793401553738405e-06,
      "loss": 0.0178,
      "step": 1301080
    },
    {
      "epoch": 2.129278686592958,
      "grad_norm": 0.8412185907363892,
      "learning_rate": 5.779274263160322e-06,
      "loss": 0.0186,
      "step": 1301100
    },
    {
      "epoch": 2.129311417031611,
      "grad_norm": 0.6405956149101257,
      "learning_rate": 5.779208370946806e-06,
      "loss": 0.0184,
      "step": 1301120
    },
    {
      "epoch": 2.1293441474702646,
      "grad_norm": 0.2556079626083374,
      "learning_rate": 5.779142478733289e-06,
      "loss": 0.0181,
      "step": 1301140
    },
    {
      "epoch": 2.1293768779089177,
      "grad_norm": 0.1544664204120636,
      "learning_rate": 5.7790765865197714e-06,
      "loss": 0.0145,
      "step": 1301160
    },
    {
      "epoch": 2.129409608347571,
      "grad_norm": 0.3357139229774475,
      "learning_rate": 5.779010694306254e-06,
      "loss": 0.014,
      "step": 1301180
    },
    {
      "epoch": 2.1294423387862245,
      "grad_norm": 0.08303020894527435,
      "learning_rate": 5.778944802092738e-06,
      "loss": 0.0106,
      "step": 1301200
    },
    {
      "epoch": 2.1294750692248776,
      "grad_norm": 0.2072499394416809,
      "learning_rate": 5.77887890987922e-06,
      "loss": 0.0199,
      "step": 1301220
    },
    {
      "epoch": 2.129507799663531,
      "grad_norm": 0.35069242119789124,
      "learning_rate": 5.778813017665703e-06,
      "loss": 0.0115,
      "step": 1301240
    },
    {
      "epoch": 2.1295405301021844,
      "grad_norm": 0.5583047270774841,
      "learning_rate": 5.778747125452185e-06,
      "loss": 0.0144,
      "step": 1301260
    },
    {
      "epoch": 2.129573260540838,
      "grad_norm": 0.40431344509124756,
      "learning_rate": 5.778681233238669e-06,
      "loss": 0.0137,
      "step": 1301280
    },
    {
      "epoch": 2.129605990979491,
      "grad_norm": 0.34233906865119934,
      "learning_rate": 5.7786153410251515e-06,
      "loss": 0.0155,
      "step": 1301300
    },
    {
      "epoch": 2.1296387214181443,
      "grad_norm": 0.262332558631897,
      "learning_rate": 5.778549448811634e-06,
      "loss": 0.0113,
      "step": 1301320
    },
    {
      "epoch": 2.129671451856798,
      "grad_norm": 0.19895467162132263,
      "learning_rate": 5.778483556598117e-06,
      "loss": 0.0091,
      "step": 1301340
    },
    {
      "epoch": 2.129704182295451,
      "grad_norm": 0.20036636292934418,
      "learning_rate": 5.7784176643846005e-06,
      "loss": 0.0153,
      "step": 1301360
    },
    {
      "epoch": 2.1297369127341046,
      "grad_norm": 0.3922054171562195,
      "learning_rate": 5.778351772171083e-06,
      "loss": 0.0205,
      "step": 1301380
    },
    {
      "epoch": 2.1297696431727577,
      "grad_norm": 0.34090614318847656,
      "learning_rate": 5.778285879957566e-06,
      "loss": 0.0156,
      "step": 1301400
    },
    {
      "epoch": 2.1298023736114113,
      "grad_norm": 0.8037063479423523,
      "learning_rate": 5.77821998774405e-06,
      "loss": 0.0155,
      "step": 1301420
    },
    {
      "epoch": 2.1298351040500645,
      "grad_norm": 1.3441643714904785,
      "learning_rate": 5.7781540955305315e-06,
      "loss": 0.0124,
      "step": 1301440
    },
    {
      "epoch": 2.1298678344887176,
      "grad_norm": 0.2869017422199249,
      "learning_rate": 5.778088203317015e-06,
      "loss": 0.015,
      "step": 1301460
    },
    {
      "epoch": 2.1299005649273712,
      "grad_norm": 0.3809782564640045,
      "learning_rate": 5.778022311103497e-06,
      "loss": 0.0193,
      "step": 1301480
    },
    {
      "epoch": 2.1299332953660244,
      "grad_norm": 0.30183541774749756,
      "learning_rate": 5.7779564188899806e-06,
      "loss": 0.0143,
      "step": 1301500
    },
    {
      "epoch": 2.129966025804678,
      "grad_norm": 0.42247074842453003,
      "learning_rate": 5.777890526676463e-06,
      "loss": 0.0118,
      "step": 1301520
    },
    {
      "epoch": 2.129998756243331,
      "grad_norm": 0.328948438167572,
      "learning_rate": 5.777824634462946e-06,
      "loss": 0.0177,
      "step": 1301540
    },
    {
      "epoch": 2.1300314866819843,
      "grad_norm": 0.7142980098724365,
      "learning_rate": 5.777758742249429e-06,
      "loss": 0.0165,
      "step": 1301560
    },
    {
      "epoch": 2.130064217120638,
      "grad_norm": 0.16196474432945251,
      "learning_rate": 5.777692850035912e-06,
      "loss": 0.0102,
      "step": 1301580
    },
    {
      "epoch": 2.130096947559291,
      "grad_norm": 0.5270000696182251,
      "learning_rate": 5.777626957822394e-06,
      "loss": 0.0123,
      "step": 1301600
    },
    {
      "epoch": 2.1301296779979446,
      "grad_norm": 0.626660168170929,
      "learning_rate": 5.777561065608878e-06,
      "loss": 0.0201,
      "step": 1301620
    },
    {
      "epoch": 2.1301624084365978,
      "grad_norm": 1.8881248235702515,
      "learning_rate": 5.77749517339536e-06,
      "loss": 0.0249,
      "step": 1301640
    },
    {
      "epoch": 2.1301951388752514,
      "grad_norm": 0.2784261405467987,
      "learning_rate": 5.777429281181843e-06,
      "loss": 0.0119,
      "step": 1301660
    },
    {
      "epoch": 2.1302278693139045,
      "grad_norm": 0.3495729863643646,
      "learning_rate": 5.777363388968325e-06,
      "loss": 0.0158,
      "step": 1301680
    },
    {
      "epoch": 2.1302605997525577,
      "grad_norm": 0.43944644927978516,
      "learning_rate": 5.777297496754809e-06,
      "loss": 0.019,
      "step": 1301700
    },
    {
      "epoch": 2.1302933301912113,
      "grad_norm": 0.09977477043867111,
      "learning_rate": 5.7772316045412915e-06,
      "loss": 0.0095,
      "step": 1301720
    },
    {
      "epoch": 2.1303260606298644,
      "grad_norm": 0.8188148736953735,
      "learning_rate": 5.777165712327774e-06,
      "loss": 0.0193,
      "step": 1301740
    },
    {
      "epoch": 2.130358791068518,
      "grad_norm": 1.9983686208724976,
      "learning_rate": 5.777099820114258e-06,
      "loss": 0.0123,
      "step": 1301760
    },
    {
      "epoch": 2.130391521507171,
      "grad_norm": 0.7872936129570007,
      "learning_rate": 5.777033927900741e-06,
      "loss": 0.0113,
      "step": 1301780
    },
    {
      "epoch": 2.1304242519458247,
      "grad_norm": 0.5813397169113159,
      "learning_rate": 5.776968035687223e-06,
      "loss": 0.017,
      "step": 1301800
    },
    {
      "epoch": 2.130456982384478,
      "grad_norm": 0.27060970664024353,
      "learning_rate": 5.776902143473706e-06,
      "loss": 0.0185,
      "step": 1301820
    },
    {
      "epoch": 2.130489712823131,
      "grad_norm": 0.7931925058364868,
      "learning_rate": 5.77683625126019e-06,
      "loss": 0.0158,
      "step": 1301840
    },
    {
      "epoch": 2.1305224432617846,
      "grad_norm": 0.5804522633552551,
      "learning_rate": 5.7767703590466716e-06,
      "loss": 0.0136,
      "step": 1301860
    },
    {
      "epoch": 2.130555173700438,
      "grad_norm": 0.8844549655914307,
      "learning_rate": 5.776704466833155e-06,
      "loss": 0.0133,
      "step": 1301880
    },
    {
      "epoch": 2.1305879041390914,
      "grad_norm": 0.5034372210502625,
      "learning_rate": 5.776638574619637e-06,
      "loss": 0.0158,
      "step": 1301900
    },
    {
      "epoch": 2.1306206345777445,
      "grad_norm": 0.2467082142829895,
      "learning_rate": 5.776572682406121e-06,
      "loss": 0.0248,
      "step": 1301920
    },
    {
      "epoch": 2.130653365016398,
      "grad_norm": 0.5986190438270569,
      "learning_rate": 5.776506790192603e-06,
      "loss": 0.0255,
      "step": 1301940
    },
    {
      "epoch": 2.1306860954550513,
      "grad_norm": 0.7222789525985718,
      "learning_rate": 5.776440897979086e-06,
      "loss": 0.0173,
      "step": 1301960
    },
    {
      "epoch": 2.1307188258937044,
      "grad_norm": 0.17388512194156647,
      "learning_rate": 5.776375005765569e-06,
      "loss": 0.0107,
      "step": 1301980
    },
    {
      "epoch": 2.130751556332358,
      "grad_norm": 0.6497924327850342,
      "learning_rate": 5.7763091135520524e-06,
      "loss": 0.0139,
      "step": 1302000
    },
    {
      "epoch": 2.130784286771011,
      "grad_norm": 0.26532936096191406,
      "learning_rate": 5.776243221338534e-06,
      "loss": 0.019,
      "step": 1302020
    },
    {
      "epoch": 2.1308170172096648,
      "grad_norm": 0.7287935614585876,
      "learning_rate": 5.776177329125018e-06,
      "loss": 0.0137,
      "step": 1302040
    },
    {
      "epoch": 2.130849747648318,
      "grad_norm": 0.7036986351013184,
      "learning_rate": 5.7761114369115e-06,
      "loss": 0.0158,
      "step": 1302060
    },
    {
      "epoch": 2.1308824780869715,
      "grad_norm": 0.1816667765378952,
      "learning_rate": 5.776045544697983e-06,
      "loss": 0.0139,
      "step": 1302080
    },
    {
      "epoch": 2.1309152085256247,
      "grad_norm": 0.36217474937438965,
      "learning_rate": 5.775979652484467e-06,
      "loss": 0.0241,
      "step": 1302100
    },
    {
      "epoch": 2.130947938964278,
      "grad_norm": 0.3056015074253082,
      "learning_rate": 5.775913760270949e-06,
      "loss": 0.009,
      "step": 1302120
    },
    {
      "epoch": 2.1309806694029314,
      "grad_norm": 0.2284715622663498,
      "learning_rate": 5.7758478680574325e-06,
      "loss": 0.0123,
      "step": 1302140
    },
    {
      "epoch": 2.1310133998415846,
      "grad_norm": 0.20675694942474365,
      "learning_rate": 5.775781975843915e-06,
      "loss": 0.0142,
      "step": 1302160
    },
    {
      "epoch": 2.131046130280238,
      "grad_norm": 0.6173413991928101,
      "learning_rate": 5.775716083630398e-06,
      "loss": 0.0131,
      "step": 1302180
    },
    {
      "epoch": 2.1310788607188913,
      "grad_norm": 0.47644874453544617,
      "learning_rate": 5.775650191416881e-06,
      "loss": 0.0127,
      "step": 1302200
    },
    {
      "epoch": 2.131111591157545,
      "grad_norm": 0.2551860213279724,
      "learning_rate": 5.775584299203364e-06,
      "loss": 0.0102,
      "step": 1302220
    },
    {
      "epoch": 2.131144321596198,
      "grad_norm": 0.15765473246574402,
      "learning_rate": 5.775518406989846e-06,
      "loss": 0.017,
      "step": 1302240
    },
    {
      "epoch": 2.131177052034851,
      "grad_norm": 0.2978101074695587,
      "learning_rate": 5.77545251477633e-06,
      "loss": 0.0134,
      "step": 1302260
    },
    {
      "epoch": 2.131209782473505,
      "grad_norm": 0.3411961793899536,
      "learning_rate": 5.775386622562812e-06,
      "loss": 0.0149,
      "step": 1302280
    },
    {
      "epoch": 2.131242512912158,
      "grad_norm": 0.5599525570869446,
      "learning_rate": 5.775320730349295e-06,
      "loss": 0.0204,
      "step": 1302300
    },
    {
      "epoch": 2.1312752433508115,
      "grad_norm": 0.21916882693767548,
      "learning_rate": 5.775254838135778e-06,
      "loss": 0.0212,
      "step": 1302320
    },
    {
      "epoch": 2.1313079737894647,
      "grad_norm": 0.2353992462158203,
      "learning_rate": 5.775188945922261e-06,
      "loss": 0.0142,
      "step": 1302340
    },
    {
      "epoch": 2.1313407042281183,
      "grad_norm": 0.5045098066329956,
      "learning_rate": 5.7751230537087435e-06,
      "loss": 0.015,
      "step": 1302360
    },
    {
      "epoch": 2.1313734346667714,
      "grad_norm": 0.3521212041378021,
      "learning_rate": 5.775057161495227e-06,
      "loss": 0.0126,
      "step": 1302380
    },
    {
      "epoch": 2.1314061651054246,
      "grad_norm": 0.6345489025115967,
      "learning_rate": 5.774991269281709e-06,
      "loss": 0.0152,
      "step": 1302400
    },
    {
      "epoch": 2.131438895544078,
      "grad_norm": 0.05846511200070381,
      "learning_rate": 5.7749253770681925e-06,
      "loss": 0.0131,
      "step": 1302420
    },
    {
      "epoch": 2.1314716259827313,
      "grad_norm": 0.17332059144973755,
      "learning_rate": 5.774859484854676e-06,
      "loss": 0.0122,
      "step": 1302440
    },
    {
      "epoch": 2.131504356421385,
      "grad_norm": 0.47253522276878357,
      "learning_rate": 5.774793592641158e-06,
      "loss": 0.0134,
      "step": 1302460
    },
    {
      "epoch": 2.131537086860038,
      "grad_norm": 0.5734177827835083,
      "learning_rate": 5.774727700427642e-06,
      "loss": 0.0191,
      "step": 1302480
    },
    {
      "epoch": 2.1315698172986917,
      "grad_norm": 1.2001001834869385,
      "learning_rate": 5.7746618082141235e-06,
      "loss": 0.0228,
      "step": 1302500
    },
    {
      "epoch": 2.131602547737345,
      "grad_norm": 0.19785165786743164,
      "learning_rate": 5.774595916000607e-06,
      "loss": 0.013,
      "step": 1302520
    },
    {
      "epoch": 2.131635278175998,
      "grad_norm": 0.30081015825271606,
      "learning_rate": 5.774530023787089e-06,
      "loss": 0.0117,
      "step": 1302540
    },
    {
      "epoch": 2.1316680086146516,
      "grad_norm": 1.1800148487091064,
      "learning_rate": 5.7744641315735725e-06,
      "loss": 0.0177,
      "step": 1302560
    },
    {
      "epoch": 2.1317007390533047,
      "grad_norm": 0.16097454726696014,
      "learning_rate": 5.774398239360055e-06,
      "loss": 0.0167,
      "step": 1302580
    },
    {
      "epoch": 2.1317334694919583,
      "grad_norm": 0.30024874210357666,
      "learning_rate": 5.774332347146539e-06,
      "loss": 0.013,
      "step": 1302600
    },
    {
      "epoch": 2.1317661999306114,
      "grad_norm": 0.4641878008842468,
      "learning_rate": 5.774266454933021e-06,
      "loss": 0.0099,
      "step": 1302620
    },
    {
      "epoch": 2.131798930369265,
      "grad_norm": 0.40405967831611633,
      "learning_rate": 5.774200562719504e-06,
      "loss": 0.0146,
      "step": 1302640
    },
    {
      "epoch": 2.131831660807918,
      "grad_norm": 0.40327706933021545,
      "learning_rate": 5.774134670505986e-06,
      "loss": 0.0187,
      "step": 1302660
    },
    {
      "epoch": 2.1318643912465713,
      "grad_norm": 0.08505981415510178,
      "learning_rate": 5.77406877829247e-06,
      "loss": 0.0134,
      "step": 1302680
    },
    {
      "epoch": 2.131897121685225,
      "grad_norm": 0.35027244687080383,
      "learning_rate": 5.774002886078952e-06,
      "loss": 0.0221,
      "step": 1302700
    },
    {
      "epoch": 2.131929852123878,
      "grad_norm": 0.5478779077529907,
      "learning_rate": 5.773936993865435e-06,
      "loss": 0.0187,
      "step": 1302720
    },
    {
      "epoch": 2.1319625825625317,
      "grad_norm": 0.543669581413269,
      "learning_rate": 5.773871101651918e-06,
      "loss": 0.0158,
      "step": 1302740
    },
    {
      "epoch": 2.131995313001185,
      "grad_norm": 0.42779088020324707,
      "learning_rate": 5.773805209438401e-06,
      "loss": 0.021,
      "step": 1302760
    },
    {
      "epoch": 2.132028043439838,
      "grad_norm": 0.166296124458313,
      "learning_rate": 5.7737393172248835e-06,
      "loss": 0.011,
      "step": 1302780
    },
    {
      "epoch": 2.1320607738784916,
      "grad_norm": 0.44803762435913086,
      "learning_rate": 5.773673425011367e-06,
      "loss": 0.0137,
      "step": 1302800
    },
    {
      "epoch": 2.1320935043171447,
      "grad_norm": 0.4051154851913452,
      "learning_rate": 5.77360753279785e-06,
      "loss": 0.0184,
      "step": 1302820
    },
    {
      "epoch": 2.1321262347557983,
      "grad_norm": 0.10191604495048523,
      "learning_rate": 5.773541640584333e-06,
      "loss": 0.0141,
      "step": 1302840
    },
    {
      "epoch": 2.1321589651944515,
      "grad_norm": 0.23490393161773682,
      "learning_rate": 5.773475748370816e-06,
      "loss": 0.0118,
      "step": 1302860
    },
    {
      "epoch": 2.132191695633105,
      "grad_norm": 0.4917135536670685,
      "learning_rate": 5.773409856157298e-06,
      "loss": 0.0217,
      "step": 1302880
    },
    {
      "epoch": 2.132224426071758,
      "grad_norm": 0.23334313929080963,
      "learning_rate": 5.773343963943782e-06,
      "loss": 0.0144,
      "step": 1302900
    },
    {
      "epoch": 2.1322571565104114,
      "grad_norm": 0.597537636756897,
      "learning_rate": 5.7732780717302636e-06,
      "loss": 0.0151,
      "step": 1302920
    },
    {
      "epoch": 2.132289886949065,
      "grad_norm": 0.956721305847168,
      "learning_rate": 5.773212179516747e-06,
      "loss": 0.0161,
      "step": 1302940
    },
    {
      "epoch": 2.132322617387718,
      "grad_norm": 0.3124100863933563,
      "learning_rate": 5.77314628730323e-06,
      "loss": 0.0151,
      "step": 1302960
    },
    {
      "epoch": 2.1323553478263717,
      "grad_norm": 0.9618709087371826,
      "learning_rate": 5.773080395089713e-06,
      "loss": 0.0128,
      "step": 1302980
    },
    {
      "epoch": 2.132388078265025,
      "grad_norm": 1.5488357543945312,
      "learning_rate": 5.773014502876195e-06,
      "loss": 0.0185,
      "step": 1303000
    },
    {
      "epoch": 2.1324208087036784,
      "grad_norm": 0.42769381403923035,
      "learning_rate": 5.772948610662679e-06,
      "loss": 0.0164,
      "step": 1303020
    },
    {
      "epoch": 2.1324535391423316,
      "grad_norm": 0.36656445264816284,
      "learning_rate": 5.772882718449161e-06,
      "loss": 0.0177,
      "step": 1303040
    },
    {
      "epoch": 2.1324862695809848,
      "grad_norm": 0.6717453002929688,
      "learning_rate": 5.7728168262356444e-06,
      "loss": 0.0196,
      "step": 1303060
    },
    {
      "epoch": 2.1325190000196383,
      "grad_norm": 0.376516729593277,
      "learning_rate": 5.772750934022126e-06,
      "loss": 0.0132,
      "step": 1303080
    },
    {
      "epoch": 2.1325517304582915,
      "grad_norm": 0.734383225440979,
      "learning_rate": 5.77268504180861e-06,
      "loss": 0.0175,
      "step": 1303100
    },
    {
      "epoch": 2.132584460896945,
      "grad_norm": 0.1289716362953186,
      "learning_rate": 5.772619149595093e-06,
      "loss": 0.0156,
      "step": 1303120
    },
    {
      "epoch": 2.1326171913355982,
      "grad_norm": 0.17803137004375458,
      "learning_rate": 5.772553257381575e-06,
      "loss": 0.0146,
      "step": 1303140
    },
    {
      "epoch": 2.1326499217742514,
      "grad_norm": 1.1295132637023926,
      "learning_rate": 5.772487365168059e-06,
      "loss": 0.0211,
      "step": 1303160
    },
    {
      "epoch": 2.132682652212905,
      "grad_norm": 0.3763013184070587,
      "learning_rate": 5.772421472954542e-06,
      "loss": 0.0148,
      "step": 1303180
    },
    {
      "epoch": 2.132715382651558,
      "grad_norm": 0.5135363936424255,
      "learning_rate": 5.7723555807410245e-06,
      "loss": 0.0163,
      "step": 1303200
    },
    {
      "epoch": 2.1327481130902117,
      "grad_norm": 0.3870260715484619,
      "learning_rate": 5.772289688527507e-06,
      "loss": 0.0204,
      "step": 1303220
    },
    {
      "epoch": 2.132780843528865,
      "grad_norm": 0.43124058842658997,
      "learning_rate": 5.772223796313991e-06,
      "loss": 0.0144,
      "step": 1303240
    },
    {
      "epoch": 2.1328135739675185,
      "grad_norm": 0.3936557173728943,
      "learning_rate": 5.772157904100473e-06,
      "loss": 0.0232,
      "step": 1303260
    },
    {
      "epoch": 2.1328463044061716,
      "grad_norm": 1.1715490818023682,
      "learning_rate": 5.772092011886956e-06,
      "loss": 0.0174,
      "step": 1303280
    },
    {
      "epoch": 2.1328790348448248,
      "grad_norm": 0.309237003326416,
      "learning_rate": 5.772026119673438e-06,
      "loss": 0.0205,
      "step": 1303300
    },
    {
      "epoch": 2.1329117652834784,
      "grad_norm": 0.2975907623767853,
      "learning_rate": 5.771960227459922e-06,
      "loss": 0.0231,
      "step": 1303320
    },
    {
      "epoch": 2.1329444957221315,
      "grad_norm": 0.35200101137161255,
      "learning_rate": 5.7718943352464045e-06,
      "loss": 0.0171,
      "step": 1303340
    },
    {
      "epoch": 2.132977226160785,
      "grad_norm": 0.31442657113075256,
      "learning_rate": 5.771828443032887e-06,
      "loss": 0.0141,
      "step": 1303360
    },
    {
      "epoch": 2.1330099565994383,
      "grad_norm": 0.12858110666275024,
      "learning_rate": 5.77176255081937e-06,
      "loss": 0.0156,
      "step": 1303380
    },
    {
      "epoch": 2.133042687038092,
      "grad_norm": 0.5917746424674988,
      "learning_rate": 5.7716966586058535e-06,
      "loss": 0.0154,
      "step": 1303400
    },
    {
      "epoch": 2.133075417476745,
      "grad_norm": 0.2414560765028,
      "learning_rate": 5.7716307663923354e-06,
      "loss": 0.0214,
      "step": 1303420
    },
    {
      "epoch": 2.133108147915398,
      "grad_norm": 0.24793654680252075,
      "learning_rate": 5.771564874178819e-06,
      "loss": 0.0156,
      "step": 1303440
    },
    {
      "epoch": 2.1331408783540518,
      "grad_norm": 0.9801508188247681,
      "learning_rate": 5.771498981965301e-06,
      "loss": 0.0186,
      "step": 1303460
    },
    {
      "epoch": 2.133173608792705,
      "grad_norm": 0.22454236447811127,
      "learning_rate": 5.7714330897517845e-06,
      "loss": 0.0162,
      "step": 1303480
    },
    {
      "epoch": 2.1332063392313585,
      "grad_norm": 0.7427497506141663,
      "learning_rate": 5.771367197538268e-06,
      "loss": 0.0123,
      "step": 1303500
    },
    {
      "epoch": 2.1332390696700116,
      "grad_norm": 0.5721169114112854,
      "learning_rate": 5.77130130532475e-06,
      "loss": 0.0128,
      "step": 1303520
    },
    {
      "epoch": 2.1332718001086652,
      "grad_norm": 0.23355016112327576,
      "learning_rate": 5.7712354131112336e-06,
      "loss": 0.0134,
      "step": 1303540
    },
    {
      "epoch": 2.1333045305473184,
      "grad_norm": 0.31035059690475464,
      "learning_rate": 5.7711695208977155e-06,
      "loss": 0.019,
      "step": 1303560
    },
    {
      "epoch": 2.1333372609859715,
      "grad_norm": 0.3964326083660126,
      "learning_rate": 5.771103628684199e-06,
      "loss": 0.0125,
      "step": 1303580
    },
    {
      "epoch": 2.133369991424625,
      "grad_norm": 0.6383313536643982,
      "learning_rate": 5.771037736470682e-06,
      "loss": 0.0197,
      "step": 1303600
    },
    {
      "epoch": 2.1334027218632783,
      "grad_norm": 0.6663451790809631,
      "learning_rate": 5.7709718442571645e-06,
      "loss": 0.0076,
      "step": 1303620
    },
    {
      "epoch": 2.133435452301932,
      "grad_norm": 0.46926942467689514,
      "learning_rate": 5.770905952043647e-06,
      "loss": 0.0173,
      "step": 1303640
    },
    {
      "epoch": 2.133468182740585,
      "grad_norm": 0.7137031555175781,
      "learning_rate": 5.770840059830131e-06,
      "loss": 0.0141,
      "step": 1303660
    },
    {
      "epoch": 2.1335009131792386,
      "grad_norm": 0.34355464577674866,
      "learning_rate": 5.770774167616613e-06,
      "loss": 0.0111,
      "step": 1303680
    },
    {
      "epoch": 2.1335336436178918,
      "grad_norm": 0.8475366234779358,
      "learning_rate": 5.770708275403096e-06,
      "loss": 0.0248,
      "step": 1303700
    },
    {
      "epoch": 2.133566374056545,
      "grad_norm": 0.5285901427268982,
      "learning_rate": 5.770642383189578e-06,
      "loss": 0.0117,
      "step": 1303720
    },
    {
      "epoch": 2.1335991044951985,
      "grad_norm": 0.19742339849472046,
      "learning_rate": 5.770576490976062e-06,
      "loss": 0.0138,
      "step": 1303740
    },
    {
      "epoch": 2.1336318349338517,
      "grad_norm": 0.23025669157505035,
      "learning_rate": 5.7705105987625446e-06,
      "loss": 0.0159,
      "step": 1303760
    },
    {
      "epoch": 2.1336645653725053,
      "grad_norm": 0.12090368568897247,
      "learning_rate": 5.770444706549027e-06,
      "loss": 0.0154,
      "step": 1303780
    },
    {
      "epoch": 2.1336972958111584,
      "grad_norm": 0.5374104976654053,
      "learning_rate": 5.77037881433551e-06,
      "loss": 0.0127,
      "step": 1303800
    },
    {
      "epoch": 2.133730026249812,
      "grad_norm": 0.39471665024757385,
      "learning_rate": 5.770312922121994e-06,
      "loss": 0.0214,
      "step": 1303820
    },
    {
      "epoch": 2.133762756688465,
      "grad_norm": 0.2673848271369934,
      "learning_rate": 5.770247029908476e-06,
      "loss": 0.0148,
      "step": 1303840
    },
    {
      "epoch": 2.1337954871271183,
      "grad_norm": 0.46634727716445923,
      "learning_rate": 5.770181137694959e-06,
      "loss": 0.0134,
      "step": 1303860
    },
    {
      "epoch": 2.133828217565772,
      "grad_norm": 0.10632817447185516,
      "learning_rate": 5.770115245481443e-06,
      "loss": 0.0146,
      "step": 1303880
    },
    {
      "epoch": 2.133860948004425,
      "grad_norm": 0.530414342880249,
      "learning_rate": 5.770049353267925e-06,
      "loss": 0.0138,
      "step": 1303900
    },
    {
      "epoch": 2.1338936784430786,
      "grad_norm": 0.45084214210510254,
      "learning_rate": 5.769983461054408e-06,
      "loss": 0.0105,
      "step": 1303920
    },
    {
      "epoch": 2.133926408881732,
      "grad_norm": 0.22030282020568848,
      "learning_rate": 5.76991756884089e-06,
      "loss": 0.0143,
      "step": 1303940
    },
    {
      "epoch": 2.1339591393203854,
      "grad_norm": 0.13881021738052368,
      "learning_rate": 5.769851676627374e-06,
      "loss": 0.0092,
      "step": 1303960
    },
    {
      "epoch": 2.1339918697590385,
      "grad_norm": 0.58006352186203,
      "learning_rate": 5.769785784413856e-06,
      "loss": 0.0117,
      "step": 1303980
    },
    {
      "epoch": 2.1340246001976917,
      "grad_norm": 0.16320960223674774,
      "learning_rate": 5.769719892200339e-06,
      "loss": 0.0111,
      "step": 1304000
    },
    {
      "epoch": 2.1340573306363453,
      "grad_norm": 1.5623431205749512,
      "learning_rate": 5.769653999986822e-06,
      "loss": 0.0114,
      "step": 1304020
    },
    {
      "epoch": 2.1340900610749984,
      "grad_norm": 0.6192184090614319,
      "learning_rate": 5.7695881077733055e-06,
      "loss": 0.0159,
      "step": 1304040
    },
    {
      "epoch": 2.134122791513652,
      "grad_norm": 0.32564833760261536,
      "learning_rate": 5.769522215559787e-06,
      "loss": 0.0164,
      "step": 1304060
    },
    {
      "epoch": 2.134155521952305,
      "grad_norm": 0.2502697706222534,
      "learning_rate": 5.769456323346271e-06,
      "loss": 0.0121,
      "step": 1304080
    },
    {
      "epoch": 2.1341882523909588,
      "grad_norm": 0.9445239901542664,
      "learning_rate": 5.769390431132753e-06,
      "loss": 0.0176,
      "step": 1304100
    },
    {
      "epoch": 2.134220982829612,
      "grad_norm": 0.32398030161857605,
      "learning_rate": 5.769324538919236e-06,
      "loss": 0.0224,
      "step": 1304120
    },
    {
      "epoch": 2.134253713268265,
      "grad_norm": 0.27472320199012756,
      "learning_rate": 5.769258646705719e-06,
      "loss": 0.0166,
      "step": 1304140
    },
    {
      "epoch": 2.1342864437069187,
      "grad_norm": 1.0279262065887451,
      "learning_rate": 5.769192754492202e-06,
      "loss": 0.0149,
      "step": 1304160
    },
    {
      "epoch": 2.134319174145572,
      "grad_norm": 0.5990570783615112,
      "learning_rate": 5.769126862278685e-06,
      "loss": 0.0148,
      "step": 1304180
    },
    {
      "epoch": 2.1343519045842254,
      "grad_norm": 0.23770074546337128,
      "learning_rate": 5.769060970065168e-06,
      "loss": 0.0175,
      "step": 1304200
    },
    {
      "epoch": 2.1343846350228786,
      "grad_norm": 0.12280141562223434,
      "learning_rate": 5.768995077851651e-06,
      "loss": 0.0128,
      "step": 1304220
    },
    {
      "epoch": 2.1344173654615317,
      "grad_norm": 0.2628910541534424,
      "learning_rate": 5.768929185638134e-06,
      "loss": 0.0098,
      "step": 1304240
    },
    {
      "epoch": 2.1344500959001853,
      "grad_norm": 2.4763402938842773,
      "learning_rate": 5.768863293424617e-06,
      "loss": 0.0194,
      "step": 1304260
    },
    {
      "epoch": 2.1344828263388385,
      "grad_norm": 0.4791675806045532,
      "learning_rate": 5.768797401211099e-06,
      "loss": 0.0186,
      "step": 1304280
    },
    {
      "epoch": 2.134515556777492,
      "grad_norm": 0.6133443713188171,
      "learning_rate": 5.768731508997583e-06,
      "loss": 0.023,
      "step": 1304300
    },
    {
      "epoch": 2.134548287216145,
      "grad_norm": 0.6431449055671692,
      "learning_rate": 5.768665616784065e-06,
      "loss": 0.0123,
      "step": 1304320
    },
    {
      "epoch": 2.134581017654799,
      "grad_norm": 0.20021361112594604,
      "learning_rate": 5.768599724570548e-06,
      "loss": 0.0132,
      "step": 1304340
    },
    {
      "epoch": 2.134613748093452,
      "grad_norm": 0.24975275993347168,
      "learning_rate": 5.768533832357031e-06,
      "loss": 0.0154,
      "step": 1304360
    },
    {
      "epoch": 2.134646478532105,
      "grad_norm": 1.0448541641235352,
      "learning_rate": 5.768467940143514e-06,
      "loss": 0.0137,
      "step": 1304380
    },
    {
      "epoch": 2.1346792089707587,
      "grad_norm": 0.39636483788490295,
      "learning_rate": 5.7684020479299965e-06,
      "loss": 0.0142,
      "step": 1304400
    },
    {
      "epoch": 2.134711939409412,
      "grad_norm": 0.6915350556373596,
      "learning_rate": 5.76833615571648e-06,
      "loss": 0.0195,
      "step": 1304420
    },
    {
      "epoch": 2.1347446698480654,
      "grad_norm": 1.298008680343628,
      "learning_rate": 5.768270263502962e-06,
      "loss": 0.0174,
      "step": 1304440
    },
    {
      "epoch": 2.1347774002867186,
      "grad_norm": 0.46428436040878296,
      "learning_rate": 5.7682043712894455e-06,
      "loss": 0.0239,
      "step": 1304460
    },
    {
      "epoch": 2.134810130725372,
      "grad_norm": 0.36792492866516113,
      "learning_rate": 5.768138479075927e-06,
      "loss": 0.0119,
      "step": 1304480
    },
    {
      "epoch": 2.1348428611640253,
      "grad_norm": 0.7203490734100342,
      "learning_rate": 5.768072586862411e-06,
      "loss": 0.018,
      "step": 1304500
    },
    {
      "epoch": 2.1348755916026785,
      "grad_norm": 0.5810405611991882,
      "learning_rate": 5.768006694648893e-06,
      "loss": 0.019,
      "step": 1304520
    },
    {
      "epoch": 2.134908322041332,
      "grad_norm": 0.7655107378959656,
      "learning_rate": 5.7679408024353765e-06,
      "loss": 0.0116,
      "step": 1304540
    },
    {
      "epoch": 2.1349410524799852,
      "grad_norm": 0.27054741978645325,
      "learning_rate": 5.76787491022186e-06,
      "loss": 0.0116,
      "step": 1304560
    },
    {
      "epoch": 2.134973782918639,
      "grad_norm": 0.8209131956100464,
      "learning_rate": 5.767809018008342e-06,
      "loss": 0.0136,
      "step": 1304580
    },
    {
      "epoch": 2.135006513357292,
      "grad_norm": 0.37091851234436035,
      "learning_rate": 5.7677431257948256e-06,
      "loss": 0.0141,
      "step": 1304600
    },
    {
      "epoch": 2.135039243795945,
      "grad_norm": 1.7039861679077148,
      "learning_rate": 5.767677233581308e-06,
      "loss": 0.0206,
      "step": 1304620
    },
    {
      "epoch": 2.1350719742345987,
      "grad_norm": 1.2482236623764038,
      "learning_rate": 5.767611341367791e-06,
      "loss": 0.0204,
      "step": 1304640
    },
    {
      "epoch": 2.135104704673252,
      "grad_norm": 0.45980367064476013,
      "learning_rate": 5.767545449154274e-06,
      "loss": 0.0101,
      "step": 1304660
    },
    {
      "epoch": 2.1351374351119055,
      "grad_norm": 0.4891163110733032,
      "learning_rate": 5.767479556940757e-06,
      "loss": 0.0143,
      "step": 1304680
    },
    {
      "epoch": 2.1351701655505586,
      "grad_norm": 0.7418471574783325,
      "learning_rate": 5.767413664727239e-06,
      "loss": 0.018,
      "step": 1304700
    },
    {
      "epoch": 2.135202895989212,
      "grad_norm": 0.24808381497859955,
      "learning_rate": 5.767347772513723e-06,
      "loss": 0.0141,
      "step": 1304720
    },
    {
      "epoch": 2.1352356264278654,
      "grad_norm": 0.3560050129890442,
      "learning_rate": 5.767281880300205e-06,
      "loss": 0.0112,
      "step": 1304740
    },
    {
      "epoch": 2.1352683568665185,
      "grad_norm": 0.21634754538536072,
      "learning_rate": 5.767215988086688e-06,
      "loss": 0.0217,
      "step": 1304760
    },
    {
      "epoch": 2.135301087305172,
      "grad_norm": 0.7930874824523926,
      "learning_rate": 5.767150095873171e-06,
      "loss": 0.0218,
      "step": 1304780
    },
    {
      "epoch": 2.1353338177438252,
      "grad_norm": 0.6814293265342712,
      "learning_rate": 5.767084203659654e-06,
      "loss": 0.0115,
      "step": 1304800
    },
    {
      "epoch": 2.135366548182479,
      "grad_norm": 0.2747440040111542,
      "learning_rate": 5.7670183114461365e-06,
      "loss": 0.0181,
      "step": 1304820
    },
    {
      "epoch": 2.135399278621132,
      "grad_norm": 0.16852232813835144,
      "learning_rate": 5.76695241923262e-06,
      "loss": 0.015,
      "step": 1304840
    },
    {
      "epoch": 2.1354320090597856,
      "grad_norm": 0.19952133297920227,
      "learning_rate": 5.766886527019102e-06,
      "loss": 0.0158,
      "step": 1304860
    },
    {
      "epoch": 2.1354647394984387,
      "grad_norm": 0.8906107544898987,
      "learning_rate": 5.766820634805586e-06,
      "loss": 0.0126,
      "step": 1304880
    },
    {
      "epoch": 2.135497469937092,
      "grad_norm": 0.7094390988349915,
      "learning_rate": 5.766754742592069e-06,
      "loss": 0.0116,
      "step": 1304900
    },
    {
      "epoch": 2.1355302003757455,
      "grad_norm": 2.857670545578003,
      "learning_rate": 5.766688850378551e-06,
      "loss": 0.018,
      "step": 1304920
    },
    {
      "epoch": 2.1355629308143986,
      "grad_norm": 0.1818741112947464,
      "learning_rate": 5.766622958165035e-06,
      "loss": 0.017,
      "step": 1304940
    },
    {
      "epoch": 2.1355956612530522,
      "grad_norm": 0.2438226193189621,
      "learning_rate": 5.7665570659515166e-06,
      "loss": 0.017,
      "step": 1304960
    },
    {
      "epoch": 2.1356283916917054,
      "grad_norm": 0.22579362988471985,
      "learning_rate": 5.766491173738e-06,
      "loss": 0.0169,
      "step": 1304980
    },
    {
      "epoch": 2.135661122130359,
      "grad_norm": 0.5064919590950012,
      "learning_rate": 5.766425281524483e-06,
      "loss": 0.0168,
      "step": 1305000
    },
    {
      "epoch": 2.135693852569012,
      "grad_norm": 0.19596804678440094,
      "learning_rate": 5.766359389310966e-06,
      "loss": 0.0093,
      "step": 1305020
    },
    {
      "epoch": 2.1357265830076653,
      "grad_norm": 0.25631704926490784,
      "learning_rate": 5.766293497097448e-06,
      "loss": 0.0162,
      "step": 1305040
    },
    {
      "epoch": 2.135759313446319,
      "grad_norm": 0.384517639875412,
      "learning_rate": 5.766227604883932e-06,
      "loss": 0.0136,
      "step": 1305060
    },
    {
      "epoch": 2.135792043884972,
      "grad_norm": 0.16339896619319916,
      "learning_rate": 5.766161712670414e-06,
      "loss": 0.0144,
      "step": 1305080
    },
    {
      "epoch": 2.1358247743236256,
      "grad_norm": 0.20799869298934937,
      "learning_rate": 5.7660958204568974e-06,
      "loss": 0.019,
      "step": 1305100
    },
    {
      "epoch": 2.1358575047622788,
      "grad_norm": 0.3054085373878479,
      "learning_rate": 5.766029928243379e-06,
      "loss": 0.0105,
      "step": 1305120
    },
    {
      "epoch": 2.1358902352009324,
      "grad_norm": 0.5281103253364563,
      "learning_rate": 5.765964036029863e-06,
      "loss": 0.0112,
      "step": 1305140
    },
    {
      "epoch": 2.1359229656395855,
      "grad_norm": 0.6563336253166199,
      "learning_rate": 5.765898143816346e-06,
      "loss": 0.0116,
      "step": 1305160
    },
    {
      "epoch": 2.1359556960782387,
      "grad_norm": 1.13350248336792,
      "learning_rate": 5.765832251602828e-06,
      "loss": 0.0136,
      "step": 1305180
    },
    {
      "epoch": 2.1359884265168922,
      "grad_norm": 0.05096348747611046,
      "learning_rate": 5.765766359389311e-06,
      "loss": 0.0165,
      "step": 1305200
    },
    {
      "epoch": 2.1360211569555454,
      "grad_norm": 0.2651490569114685,
      "learning_rate": 5.765700467175795e-06,
      "loss": 0.0195,
      "step": 1305220
    },
    {
      "epoch": 2.136053887394199,
      "grad_norm": 2.6974315643310547,
      "learning_rate": 5.765634574962277e-06,
      "loss": 0.0133,
      "step": 1305240
    },
    {
      "epoch": 2.136086617832852,
      "grad_norm": 0.39660266041755676,
      "learning_rate": 5.76556868274876e-06,
      "loss": 0.0191,
      "step": 1305260
    },
    {
      "epoch": 2.1361193482715057,
      "grad_norm": 0.35523080825805664,
      "learning_rate": 5.765502790535244e-06,
      "loss": 0.0224,
      "step": 1305280
    },
    {
      "epoch": 2.136152078710159,
      "grad_norm": 0.1636875718832016,
      "learning_rate": 5.765436898321726e-06,
      "loss": 0.0212,
      "step": 1305300
    },
    {
      "epoch": 2.136184809148812,
      "grad_norm": 0.6878158450126648,
      "learning_rate": 5.765371006108209e-06,
      "loss": 0.017,
      "step": 1305320
    },
    {
      "epoch": 2.1362175395874656,
      "grad_norm": 0.20912691950798035,
      "learning_rate": 5.765305113894691e-06,
      "loss": 0.0144,
      "step": 1305340
    },
    {
      "epoch": 2.136250270026119,
      "grad_norm": 0.5362838506698608,
      "learning_rate": 5.765239221681175e-06,
      "loss": 0.0115,
      "step": 1305360
    },
    {
      "epoch": 2.1362830004647724,
      "grad_norm": 0.38197553157806396,
      "learning_rate": 5.7651733294676575e-06,
      "loss": 0.0156,
      "step": 1305380
    },
    {
      "epoch": 2.1363157309034255,
      "grad_norm": 0.5233713984489441,
      "learning_rate": 5.76510743725414e-06,
      "loss": 0.0109,
      "step": 1305400
    },
    {
      "epoch": 2.136348461342079,
      "grad_norm": 0.12746970355510712,
      "learning_rate": 5.765041545040623e-06,
      "loss": 0.0129,
      "step": 1305420
    },
    {
      "epoch": 2.1363811917807323,
      "grad_norm": 0.8849735260009766,
      "learning_rate": 5.7649756528271066e-06,
      "loss": 0.019,
      "step": 1305440
    },
    {
      "epoch": 2.1364139222193854,
      "grad_norm": 0.51540207862854,
      "learning_rate": 5.7649097606135884e-06,
      "loss": 0.0165,
      "step": 1305460
    },
    {
      "epoch": 2.136446652658039,
      "grad_norm": 0.33453693985939026,
      "learning_rate": 5.764843868400072e-06,
      "loss": 0.0165,
      "step": 1305480
    },
    {
      "epoch": 2.136479383096692,
      "grad_norm": 0.07807131856679916,
      "learning_rate": 5.764777976186554e-06,
      "loss": 0.0162,
      "step": 1305500
    },
    {
      "epoch": 2.1365121135353458,
      "grad_norm": 0.527976930141449,
      "learning_rate": 5.7647120839730375e-06,
      "loss": 0.0201,
      "step": 1305520
    },
    {
      "epoch": 2.136544843973999,
      "grad_norm": 0.47130537033081055,
      "learning_rate": 5.764646191759519e-06,
      "loss": 0.0116,
      "step": 1305540
    },
    {
      "epoch": 2.1365775744126525,
      "grad_norm": 0.34728261828422546,
      "learning_rate": 5.764580299546003e-06,
      "loss": 0.0156,
      "step": 1305560
    },
    {
      "epoch": 2.1366103048513057,
      "grad_norm": 1.976413607597351,
      "learning_rate": 5.764514407332486e-06,
      "loss": 0.0253,
      "step": 1305580
    },
    {
      "epoch": 2.136643035289959,
      "grad_norm": 0.3444668650627136,
      "learning_rate": 5.7644485151189685e-06,
      "loss": 0.0199,
      "step": 1305600
    },
    {
      "epoch": 2.1366757657286124,
      "grad_norm": 0.3632987141609192,
      "learning_rate": 5.764382622905452e-06,
      "loss": 0.0129,
      "step": 1305620
    },
    {
      "epoch": 2.1367084961672655,
      "grad_norm": 0.9221106767654419,
      "learning_rate": 5.764316730691935e-06,
      "loss": 0.025,
      "step": 1305640
    },
    {
      "epoch": 2.136741226605919,
      "grad_norm": 1.5383172035217285,
      "learning_rate": 5.7642508384784175e-06,
      "loss": 0.0262,
      "step": 1305660
    },
    {
      "epoch": 2.1367739570445723,
      "grad_norm": 0.2358246147632599,
      "learning_rate": 5.7641849462649e-06,
      "loss": 0.0173,
      "step": 1305680
    },
    {
      "epoch": 2.1368066874832254,
      "grad_norm": 0.4246130585670471,
      "learning_rate": 5.764119054051384e-06,
      "loss": 0.0194,
      "step": 1305700
    },
    {
      "epoch": 2.136839417921879,
      "grad_norm": 1.2300931215286255,
      "learning_rate": 5.764053161837866e-06,
      "loss": 0.0126,
      "step": 1305720
    },
    {
      "epoch": 2.136872148360532,
      "grad_norm": 0.4237968921661377,
      "learning_rate": 5.763987269624349e-06,
      "loss": 0.0177,
      "step": 1305740
    },
    {
      "epoch": 2.136904878799186,
      "grad_norm": 0.11278951913118362,
      "learning_rate": 5.763921377410831e-06,
      "loss": 0.017,
      "step": 1305760
    },
    {
      "epoch": 2.136937609237839,
      "grad_norm": 0.39670929312705994,
      "learning_rate": 5.763855485197315e-06,
      "loss": 0.0125,
      "step": 1305780
    },
    {
      "epoch": 2.1369703396764925,
      "grad_norm": 0.5901723504066467,
      "learning_rate": 5.7637895929837976e-06,
      "loss": 0.0159,
      "step": 1305800
    },
    {
      "epoch": 2.1370030701151457,
      "grad_norm": 0.1696692258119583,
      "learning_rate": 5.76372370077028e-06,
      "loss": 0.015,
      "step": 1305820
    },
    {
      "epoch": 2.137035800553799,
      "grad_norm": 0.26605719327926636,
      "learning_rate": 5.763657808556763e-06,
      "loss": 0.0152,
      "step": 1305840
    },
    {
      "epoch": 2.1370685309924524,
      "grad_norm": 0.9609614610671997,
      "learning_rate": 5.763591916343247e-06,
      "loss": 0.0143,
      "step": 1305860
    },
    {
      "epoch": 2.1371012614311056,
      "grad_norm": 0.3296293318271637,
      "learning_rate": 5.7635260241297285e-06,
      "loss": 0.0192,
      "step": 1305880
    },
    {
      "epoch": 2.137133991869759,
      "grad_norm": 0.2045186161994934,
      "learning_rate": 5.763460131916212e-06,
      "loss": 0.0175,
      "step": 1305900
    },
    {
      "epoch": 2.1371667223084123,
      "grad_norm": 0.707571268081665,
      "learning_rate": 5.763394239702694e-06,
      "loss": 0.0165,
      "step": 1305920
    },
    {
      "epoch": 2.137199452747066,
      "grad_norm": 0.8789988160133362,
      "learning_rate": 5.763328347489178e-06,
      "loss": 0.0102,
      "step": 1305940
    },
    {
      "epoch": 2.137232183185719,
      "grad_norm": 0.12156959623098373,
      "learning_rate": 5.763262455275661e-06,
      "loss": 0.0152,
      "step": 1305960
    },
    {
      "epoch": 2.137264913624372,
      "grad_norm": 0.8437167406082153,
      "learning_rate": 5.763196563062143e-06,
      "loss": 0.0226,
      "step": 1305980
    },
    {
      "epoch": 2.137297644063026,
      "grad_norm": 0.13203968107700348,
      "learning_rate": 5.763130670848627e-06,
      "loss": 0.0148,
      "step": 1306000
    },
    {
      "epoch": 2.137330374501679,
      "grad_norm": 0.4100082814693451,
      "learning_rate": 5.763064778635109e-06,
      "loss": 0.0178,
      "step": 1306020
    },
    {
      "epoch": 2.1373631049403325,
      "grad_norm": 0.10339134186506271,
      "learning_rate": 5.762998886421592e-06,
      "loss": 0.0111,
      "step": 1306040
    },
    {
      "epoch": 2.1373958353789857,
      "grad_norm": 0.24901586771011353,
      "learning_rate": 5.762932994208075e-06,
      "loss": 0.0136,
      "step": 1306060
    },
    {
      "epoch": 2.1374285658176393,
      "grad_norm": 0.641426682472229,
      "learning_rate": 5.7628671019945585e-06,
      "loss": 0.018,
      "step": 1306080
    },
    {
      "epoch": 2.1374612962562924,
      "grad_norm": 0.12160392105579376,
      "learning_rate": 5.76280120978104e-06,
      "loss": 0.0112,
      "step": 1306100
    },
    {
      "epoch": 2.1374940266949456,
      "grad_norm": 0.34502536058425903,
      "learning_rate": 5.762735317567524e-06,
      "loss": 0.0136,
      "step": 1306120
    },
    {
      "epoch": 2.137526757133599,
      "grad_norm": 0.27790525555610657,
      "learning_rate": 5.762669425354006e-06,
      "loss": 0.0268,
      "step": 1306140
    },
    {
      "epoch": 2.1375594875722523,
      "grad_norm": 0.25750264525413513,
      "learning_rate": 5.762603533140489e-06,
      "loss": 0.0186,
      "step": 1306160
    },
    {
      "epoch": 2.137592218010906,
      "grad_norm": 0.4689196050167084,
      "learning_rate": 5.762537640926972e-06,
      "loss": 0.0143,
      "step": 1306180
    },
    {
      "epoch": 2.137624948449559,
      "grad_norm": 0.34809210896492004,
      "learning_rate": 5.762471748713455e-06,
      "loss": 0.0183,
      "step": 1306200
    },
    {
      "epoch": 2.1376576788882122,
      "grad_norm": 0.3021494448184967,
      "learning_rate": 5.762405856499938e-06,
      "loss": 0.0177,
      "step": 1306220
    },
    {
      "epoch": 2.137690409326866,
      "grad_norm": 0.6906636357307434,
      "learning_rate": 5.762339964286421e-06,
      "loss": 0.0137,
      "step": 1306240
    },
    {
      "epoch": 2.137723139765519,
      "grad_norm": 0.26833802461624146,
      "learning_rate": 5.762274072072903e-06,
      "loss": 0.0146,
      "step": 1306260
    },
    {
      "epoch": 2.1377558702041726,
      "grad_norm": 0.6383671760559082,
      "learning_rate": 5.762208179859387e-06,
      "loss": 0.0067,
      "step": 1306280
    },
    {
      "epoch": 2.1377886006428257,
      "grad_norm": 0.27581170201301575,
      "learning_rate": 5.762142287645869e-06,
      "loss": 0.0151,
      "step": 1306300
    },
    {
      "epoch": 2.1378213310814793,
      "grad_norm": 0.14164289832115173,
      "learning_rate": 5.762076395432352e-06,
      "loss": 0.0163,
      "step": 1306320
    },
    {
      "epoch": 2.1378540615201325,
      "grad_norm": 0.8371899127960205,
      "learning_rate": 5.762010503218836e-06,
      "loss": 0.0152,
      "step": 1306340
    },
    {
      "epoch": 2.1378867919587856,
      "grad_norm": 0.49132129549980164,
      "learning_rate": 5.761944611005318e-06,
      "loss": 0.0164,
      "step": 1306360
    },
    {
      "epoch": 2.137919522397439,
      "grad_norm": 0.5086714029312134,
      "learning_rate": 5.761878718791801e-06,
      "loss": 0.011,
      "step": 1306380
    },
    {
      "epoch": 2.1379522528360924,
      "grad_norm": 0.4429710805416107,
      "learning_rate": 5.761812826578283e-06,
      "loss": 0.0183,
      "step": 1306400
    },
    {
      "epoch": 2.137984983274746,
      "grad_norm": 0.4312897026538849,
      "learning_rate": 5.761746934364767e-06,
      "loss": 0.0148,
      "step": 1306420
    },
    {
      "epoch": 2.138017713713399,
      "grad_norm": 0.2825433909893036,
      "learning_rate": 5.7616810421512495e-06,
      "loss": 0.0115,
      "step": 1306440
    },
    {
      "epoch": 2.1380504441520527,
      "grad_norm": 0.32580432295799255,
      "learning_rate": 5.761615149937733e-06,
      "loss": 0.0146,
      "step": 1306460
    },
    {
      "epoch": 2.138083174590706,
      "grad_norm": 0.2003174126148224,
      "learning_rate": 5.761549257724215e-06,
      "loss": 0.0159,
      "step": 1306480
    },
    {
      "epoch": 2.138115905029359,
      "grad_norm": 0.12391209602355957,
      "learning_rate": 5.7614833655106985e-06,
      "loss": 0.0198,
      "step": 1306500
    },
    {
      "epoch": 2.1381486354680126,
      "grad_norm": 0.9684229493141174,
      "learning_rate": 5.7614174732971804e-06,
      "loss": 0.0169,
      "step": 1306520
    },
    {
      "epoch": 2.1381813659066657,
      "grad_norm": 0.4641031324863434,
      "learning_rate": 5.761351581083664e-06,
      "loss": 0.0127,
      "step": 1306540
    },
    {
      "epoch": 2.1382140963453193,
      "grad_norm": 0.23128286004066467,
      "learning_rate": 5.761285688870146e-06,
      "loss": 0.0135,
      "step": 1306560
    },
    {
      "epoch": 2.1382468267839725,
      "grad_norm": 0.2505243122577667,
      "learning_rate": 5.7612197966566295e-06,
      "loss": 0.0146,
      "step": 1306580
    },
    {
      "epoch": 2.138279557222626,
      "grad_norm": 0.6566988229751587,
      "learning_rate": 5.761153904443112e-06,
      "loss": 0.0191,
      "step": 1306600
    },
    {
      "epoch": 2.1383122876612792,
      "grad_norm": 0.326850950717926,
      "learning_rate": 5.761088012229595e-06,
      "loss": 0.0158,
      "step": 1306620
    },
    {
      "epoch": 2.1383450180999324,
      "grad_norm": 0.046389851719141006,
      "learning_rate": 5.761022120016078e-06,
      "loss": 0.0138,
      "step": 1306640
    },
    {
      "epoch": 2.138377748538586,
      "grad_norm": 0.1096515953540802,
      "learning_rate": 5.760956227802561e-06,
      "loss": 0.012,
      "step": 1306660
    },
    {
      "epoch": 2.138410478977239,
      "grad_norm": 0.23453430831432343,
      "learning_rate": 5.760890335589044e-06,
      "loss": 0.0177,
      "step": 1306680
    },
    {
      "epoch": 2.1384432094158927,
      "grad_norm": 0.5844324231147766,
      "learning_rate": 5.760824443375527e-06,
      "loss": 0.014,
      "step": 1306700
    },
    {
      "epoch": 2.138475939854546,
      "grad_norm": 0.17499928176403046,
      "learning_rate": 5.76075855116201e-06,
      "loss": 0.0102,
      "step": 1306720
    },
    {
      "epoch": 2.1385086702931995,
      "grad_norm": 0.7177808880805969,
      "learning_rate": 5.760692658948492e-06,
      "loss": 0.0138,
      "step": 1306740
    },
    {
      "epoch": 2.1385414007318526,
      "grad_norm": 0.4013340175151825,
      "learning_rate": 5.760626766734976e-06,
      "loss": 0.0149,
      "step": 1306760
    },
    {
      "epoch": 2.1385741311705058,
      "grad_norm": 0.4954681694507599,
      "learning_rate": 5.760560874521458e-06,
      "loss": 0.0131,
      "step": 1306780
    },
    {
      "epoch": 2.1386068616091594,
      "grad_norm": 0.48076364398002625,
      "learning_rate": 5.760494982307941e-06,
      "loss": 0.0153,
      "step": 1306800
    },
    {
      "epoch": 2.1386395920478125,
      "grad_norm": 0.18706682324409485,
      "learning_rate": 5.760429090094424e-06,
      "loss": 0.0159,
      "step": 1306820
    },
    {
      "epoch": 2.138672322486466,
      "grad_norm": 0.38944709300994873,
      "learning_rate": 5.760363197880907e-06,
      "loss": 0.0182,
      "step": 1306840
    },
    {
      "epoch": 2.1387050529251193,
      "grad_norm": 0.6603525876998901,
      "learning_rate": 5.7602973056673895e-06,
      "loss": 0.0242,
      "step": 1306860
    },
    {
      "epoch": 2.138737783363773,
      "grad_norm": 0.4542962312698364,
      "learning_rate": 5.760231413453873e-06,
      "loss": 0.0149,
      "step": 1306880
    },
    {
      "epoch": 2.138770513802426,
      "grad_norm": 0.5804305076599121,
      "learning_rate": 5.760165521240355e-06,
      "loss": 0.0099,
      "step": 1306900
    },
    {
      "epoch": 2.138803244241079,
      "grad_norm": 0.4506947100162506,
      "learning_rate": 5.760099629026839e-06,
      "loss": 0.018,
      "step": 1306920
    },
    {
      "epoch": 2.1388359746797327,
      "grad_norm": 1.003258228302002,
      "learning_rate": 5.7600337368133205e-06,
      "loss": 0.0166,
      "step": 1306940
    },
    {
      "epoch": 2.138868705118386,
      "grad_norm": 0.7480130195617676,
      "learning_rate": 5.759967844599804e-06,
      "loss": 0.0263,
      "step": 1306960
    },
    {
      "epoch": 2.1389014355570395,
      "grad_norm": 0.4178318679332733,
      "learning_rate": 5.759901952386287e-06,
      "loss": 0.0138,
      "step": 1306980
    },
    {
      "epoch": 2.1389341659956926,
      "grad_norm": 0.11712563782930374,
      "learning_rate": 5.7598360601727696e-06,
      "loss": 0.013,
      "step": 1307000
    },
    {
      "epoch": 2.1389668964343462,
      "grad_norm": 0.7042529582977295,
      "learning_rate": 5.759770167959253e-06,
      "loss": 0.0154,
      "step": 1307020
    },
    {
      "epoch": 2.1389996268729994,
      "grad_norm": 0.4942961633205414,
      "learning_rate": 5.759704275745736e-06,
      "loss": 0.0167,
      "step": 1307040
    },
    {
      "epoch": 2.1390323573116525,
      "grad_norm": 0.6974030137062073,
      "learning_rate": 5.759638383532219e-06,
      "loss": 0.0171,
      "step": 1307060
    },
    {
      "epoch": 2.139065087750306,
      "grad_norm": 0.2296101301908493,
      "learning_rate": 5.759572491318701e-06,
      "loss": 0.016,
      "step": 1307080
    },
    {
      "epoch": 2.1390978181889593,
      "grad_norm": 0.7413593530654907,
      "learning_rate": 5.759506599105185e-06,
      "loss": 0.0199,
      "step": 1307100
    },
    {
      "epoch": 2.139130548627613,
      "grad_norm": 0.5882266759872437,
      "learning_rate": 5.759440706891667e-06,
      "loss": 0.0142,
      "step": 1307120
    },
    {
      "epoch": 2.139163279066266,
      "grad_norm": 0.6283961534500122,
      "learning_rate": 5.7593748146781504e-06,
      "loss": 0.0229,
      "step": 1307140
    },
    {
      "epoch": 2.1391960095049196,
      "grad_norm": 0.9541521072387695,
      "learning_rate": 5.759308922464632e-06,
      "loss": 0.0164,
      "step": 1307160
    },
    {
      "epoch": 2.1392287399435728,
      "grad_norm": 0.34890156984329224,
      "learning_rate": 5.759243030251116e-06,
      "loss": 0.0227,
      "step": 1307180
    },
    {
      "epoch": 2.139261470382226,
      "grad_norm": 0.2854681611061096,
      "learning_rate": 5.759177138037599e-06,
      "loss": 0.0163,
      "step": 1307200
    },
    {
      "epoch": 2.1392942008208795,
      "grad_norm": 0.6991150379180908,
      "learning_rate": 5.759111245824081e-06,
      "loss": 0.0215,
      "step": 1307220
    },
    {
      "epoch": 2.1393269312595327,
      "grad_norm": 0.6178586483001709,
      "learning_rate": 5.759045353610564e-06,
      "loss": 0.0171,
      "step": 1307240
    },
    {
      "epoch": 2.1393596616981863,
      "grad_norm": 0.31312716007232666,
      "learning_rate": 5.758979461397048e-06,
      "loss": 0.0089,
      "step": 1307260
    },
    {
      "epoch": 2.1393923921368394,
      "grad_norm": 0.2643066644668579,
      "learning_rate": 5.75891356918353e-06,
      "loss": 0.0157,
      "step": 1307280
    },
    {
      "epoch": 2.1394251225754926,
      "grad_norm": 0.6433922052383423,
      "learning_rate": 5.758847676970013e-06,
      "loss": 0.0166,
      "step": 1307300
    },
    {
      "epoch": 2.139457853014146,
      "grad_norm": 0.31924277544021606,
      "learning_rate": 5.758781784756495e-06,
      "loss": 0.0159,
      "step": 1307320
    },
    {
      "epoch": 2.1394905834527993,
      "grad_norm": 0.47836196422576904,
      "learning_rate": 5.758715892542979e-06,
      "loss": 0.0129,
      "step": 1307340
    },
    {
      "epoch": 2.139523313891453,
      "grad_norm": 0.09480064362287521,
      "learning_rate": 5.758650000329462e-06,
      "loss": 0.0137,
      "step": 1307360
    },
    {
      "epoch": 2.139556044330106,
      "grad_norm": 0.7971357703208923,
      "learning_rate": 5.758584108115944e-06,
      "loss": 0.0222,
      "step": 1307380
    },
    {
      "epoch": 2.1395887747687596,
      "grad_norm": 0.6563971638679504,
      "learning_rate": 5.758518215902428e-06,
      "loss": 0.0109,
      "step": 1307400
    },
    {
      "epoch": 2.139621505207413,
      "grad_norm": 0.2770630121231079,
      "learning_rate": 5.75845232368891e-06,
      "loss": 0.012,
      "step": 1307420
    },
    {
      "epoch": 2.139654235646066,
      "grad_norm": 0.18329380452632904,
      "learning_rate": 5.758386431475393e-06,
      "loss": 0.0189,
      "step": 1307440
    },
    {
      "epoch": 2.1396869660847195,
      "grad_norm": 0.38303789496421814,
      "learning_rate": 5.758320539261876e-06,
      "loss": 0.0186,
      "step": 1307460
    },
    {
      "epoch": 2.1397196965233727,
      "grad_norm": 0.34173378348350525,
      "learning_rate": 5.758254647048359e-06,
      "loss": 0.0085,
      "step": 1307480
    },
    {
      "epoch": 2.1397524269620263,
      "grad_norm": 0.251961350440979,
      "learning_rate": 5.7581887548348415e-06,
      "loss": 0.0193,
      "step": 1307500
    },
    {
      "epoch": 2.1397851574006794,
      "grad_norm": 0.27221575379371643,
      "learning_rate": 5.758122862621325e-06,
      "loss": 0.0162,
      "step": 1307520
    },
    {
      "epoch": 2.139817887839333,
      "grad_norm": 0.4759027063846588,
      "learning_rate": 5.758056970407807e-06,
      "loss": 0.0126,
      "step": 1307540
    },
    {
      "epoch": 2.139850618277986,
      "grad_norm": 0.20020505785942078,
      "learning_rate": 5.7579910781942905e-06,
      "loss": 0.0166,
      "step": 1307560
    },
    {
      "epoch": 2.1398833487166393,
      "grad_norm": 0.27865102887153625,
      "learning_rate": 5.757925185980772e-06,
      "loss": 0.0194,
      "step": 1307580
    },
    {
      "epoch": 2.139916079155293,
      "grad_norm": 0.25625166296958923,
      "learning_rate": 5.757859293767256e-06,
      "loss": 0.0166,
      "step": 1307600
    },
    {
      "epoch": 2.139948809593946,
      "grad_norm": 0.38021427392959595,
      "learning_rate": 5.757793401553739e-06,
      "loss": 0.0143,
      "step": 1307620
    },
    {
      "epoch": 2.1399815400325997,
      "grad_norm": 0.7649210691452026,
      "learning_rate": 5.7577275093402215e-06,
      "loss": 0.0121,
      "step": 1307640
    },
    {
      "epoch": 2.140014270471253,
      "grad_norm": 0.17336207628250122,
      "learning_rate": 5.757661617126704e-06,
      "loss": 0.0121,
      "step": 1307660
    },
    {
      "epoch": 2.140047000909906,
      "grad_norm": 0.629244327545166,
      "learning_rate": 5.757595724913188e-06,
      "loss": 0.0163,
      "step": 1307680
    },
    {
      "epoch": 2.1400797313485596,
      "grad_norm": 0.9093495607376099,
      "learning_rate": 5.75752983269967e-06,
      "loss": 0.0164,
      "step": 1307700
    },
    {
      "epoch": 2.1401124617872127,
      "grad_norm": 0.525360107421875,
      "learning_rate": 5.757463940486153e-06,
      "loss": 0.0187,
      "step": 1307720
    },
    {
      "epoch": 2.1401451922258663,
      "grad_norm": 0.6669579744338989,
      "learning_rate": 5.757398048272637e-06,
      "loss": 0.0218,
      "step": 1307740
    },
    {
      "epoch": 2.1401779226645194,
      "grad_norm": 0.5658550262451172,
      "learning_rate": 5.757332156059119e-06,
      "loss": 0.0191,
      "step": 1307760
    },
    {
      "epoch": 2.140210653103173,
      "grad_norm": 0.2214420884847641,
      "learning_rate": 5.757266263845602e-06,
      "loss": 0.0154,
      "step": 1307780
    },
    {
      "epoch": 2.140243383541826,
      "grad_norm": 0.08370033651590347,
      "learning_rate": 5.757200371632084e-06,
      "loss": 0.0084,
      "step": 1307800
    },
    {
      "epoch": 2.1402761139804793,
      "grad_norm": 0.30521318316459656,
      "learning_rate": 5.757134479418568e-06,
      "loss": 0.0139,
      "step": 1307820
    },
    {
      "epoch": 2.140308844419133,
      "grad_norm": 0.6339123845100403,
      "learning_rate": 5.7570685872050506e-06,
      "loss": 0.0191,
      "step": 1307840
    },
    {
      "epoch": 2.140341574857786,
      "grad_norm": 0.2819381356239319,
      "learning_rate": 5.757002694991533e-06,
      "loss": 0.0133,
      "step": 1307860
    },
    {
      "epoch": 2.1403743052964397,
      "grad_norm": 0.3687607944011688,
      "learning_rate": 5.756936802778016e-06,
      "loss": 0.0189,
      "step": 1307880
    },
    {
      "epoch": 2.140407035735093,
      "grad_norm": 0.06276807188987732,
      "learning_rate": 5.7568709105645e-06,
      "loss": 0.0137,
      "step": 1307900
    },
    {
      "epoch": 2.1404397661737464,
      "grad_norm": 0.7732345461845398,
      "learning_rate": 5.7568050183509815e-06,
      "loss": 0.0116,
      "step": 1307920
    },
    {
      "epoch": 2.1404724966123996,
      "grad_norm": 0.4734377861022949,
      "learning_rate": 5.756739126137465e-06,
      "loss": 0.0179,
      "step": 1307940
    },
    {
      "epoch": 2.1405052270510527,
      "grad_norm": 0.2275611162185669,
      "learning_rate": 5.756673233923947e-06,
      "loss": 0.0121,
      "step": 1307960
    },
    {
      "epoch": 2.1405379574897063,
      "grad_norm": 0.21817442774772644,
      "learning_rate": 5.756607341710431e-06,
      "loss": 0.0226,
      "step": 1307980
    },
    {
      "epoch": 2.1405706879283595,
      "grad_norm": 0.19550195336341858,
      "learning_rate": 5.756541449496913e-06,
      "loss": 0.0138,
      "step": 1308000
    },
    {
      "epoch": 2.140603418367013,
      "grad_norm": 0.11968974024057388,
      "learning_rate": 5.756475557283396e-06,
      "loss": 0.0132,
      "step": 1308020
    },
    {
      "epoch": 2.140636148805666,
      "grad_norm": 0.21661938726902008,
      "learning_rate": 5.756409665069879e-06,
      "loss": 0.0169,
      "step": 1308040
    },
    {
      "epoch": 2.14066887924432,
      "grad_norm": 0.7803189158439636,
      "learning_rate": 5.756343772856362e-06,
      "loss": 0.0203,
      "step": 1308060
    },
    {
      "epoch": 2.140701609682973,
      "grad_norm": 0.6523493528366089,
      "learning_rate": 5.756277880642845e-06,
      "loss": 0.0137,
      "step": 1308080
    },
    {
      "epoch": 2.140734340121626,
      "grad_norm": 0.25807633996009827,
      "learning_rate": 5.756211988429328e-06,
      "loss": 0.0126,
      "step": 1308100
    },
    {
      "epoch": 2.1407670705602797,
      "grad_norm": 0.0612964853644371,
      "learning_rate": 5.7561460962158115e-06,
      "loss": 0.0188,
      "step": 1308120
    },
    {
      "epoch": 2.140799800998933,
      "grad_norm": 0.25826898217201233,
      "learning_rate": 5.756080204002293e-06,
      "loss": 0.0188,
      "step": 1308140
    },
    {
      "epoch": 2.1408325314375865,
      "grad_norm": 0.2872034013271332,
      "learning_rate": 5.756014311788777e-06,
      "loss": 0.0182,
      "step": 1308160
    },
    {
      "epoch": 2.1408652618762396,
      "grad_norm": 0.17541679739952087,
      "learning_rate": 5.755948419575259e-06,
      "loss": 0.014,
      "step": 1308180
    },
    {
      "epoch": 2.140897992314893,
      "grad_norm": 0.19512075185775757,
      "learning_rate": 5.755882527361742e-06,
      "loss": 0.0111,
      "step": 1308200
    },
    {
      "epoch": 2.1409307227535463,
      "grad_norm": 0.22264234721660614,
      "learning_rate": 5.755816635148225e-06,
      "loss": 0.0111,
      "step": 1308220
    },
    {
      "epoch": 2.1409634531921995,
      "grad_norm": 0.37117013335227966,
      "learning_rate": 5.755750742934708e-06,
      "loss": 0.0119,
      "step": 1308240
    },
    {
      "epoch": 2.140996183630853,
      "grad_norm": 0.646541953086853,
      "learning_rate": 5.755684850721191e-06,
      "loss": 0.0169,
      "step": 1308260
    },
    {
      "epoch": 2.1410289140695062,
      "grad_norm": 0.3518902361392975,
      "learning_rate": 5.755618958507674e-06,
      "loss": 0.0141,
      "step": 1308280
    },
    {
      "epoch": 2.14106164450816,
      "grad_norm": 0.14000341296195984,
      "learning_rate": 5.755553066294156e-06,
      "loss": 0.016,
      "step": 1308300
    },
    {
      "epoch": 2.141094374946813,
      "grad_norm": 0.17131295800209045,
      "learning_rate": 5.75548717408064e-06,
      "loss": 0.0163,
      "step": 1308320
    },
    {
      "epoch": 2.1411271053854666,
      "grad_norm": 0.5816973447799683,
      "learning_rate": 5.755421281867122e-06,
      "loss": 0.0158,
      "step": 1308340
    },
    {
      "epoch": 2.1411598358241197,
      "grad_norm": 0.11140581220388412,
      "learning_rate": 5.755355389653605e-06,
      "loss": 0.0147,
      "step": 1308360
    },
    {
      "epoch": 2.141192566262773,
      "grad_norm": 0.26973363757133484,
      "learning_rate": 5.755289497440087e-06,
      "loss": 0.0206,
      "step": 1308380
    },
    {
      "epoch": 2.1412252967014265,
      "grad_norm": 0.5224891901016235,
      "learning_rate": 5.755223605226571e-06,
      "loss": 0.0142,
      "step": 1308400
    },
    {
      "epoch": 2.1412580271400796,
      "grad_norm": 0.12095153331756592,
      "learning_rate": 5.755157713013054e-06,
      "loss": 0.0168,
      "step": 1308420
    },
    {
      "epoch": 2.141290757578733,
      "grad_norm": 0.321408212184906,
      "learning_rate": 5.755091820799536e-06,
      "loss": 0.0103,
      "step": 1308440
    },
    {
      "epoch": 2.1413234880173864,
      "grad_norm": 0.19771483540534973,
      "learning_rate": 5.75502592858602e-06,
      "loss": 0.0101,
      "step": 1308460
    },
    {
      "epoch": 2.14135621845604,
      "grad_norm": 0.5175141096115112,
      "learning_rate": 5.7549600363725025e-06,
      "loss": 0.017,
      "step": 1308480
    },
    {
      "epoch": 2.141388948894693,
      "grad_norm": 0.41561293601989746,
      "learning_rate": 5.754894144158985e-06,
      "loss": 0.016,
      "step": 1308500
    },
    {
      "epoch": 2.1414216793333463,
      "grad_norm": 0.12159507721662521,
      "learning_rate": 5.754828251945468e-06,
      "loss": 0.0163,
      "step": 1308520
    },
    {
      "epoch": 2.141454409772,
      "grad_norm": 0.43260687589645386,
      "learning_rate": 5.7547623597319515e-06,
      "loss": 0.0086,
      "step": 1308540
    },
    {
      "epoch": 2.141487140210653,
      "grad_norm": 1.217383623123169,
      "learning_rate": 5.7546964675184334e-06,
      "loss": 0.0178,
      "step": 1308560
    },
    {
      "epoch": 2.1415198706493066,
      "grad_norm": 0.9634737372398376,
      "learning_rate": 5.754630575304917e-06,
      "loss": 0.0144,
      "step": 1308580
    },
    {
      "epoch": 2.1415526010879598,
      "grad_norm": 0.8894502520561218,
      "learning_rate": 5.754564683091399e-06,
      "loss": 0.012,
      "step": 1308600
    },
    {
      "epoch": 2.1415853315266133,
      "grad_norm": 0.45203980803489685,
      "learning_rate": 5.7544987908778825e-06,
      "loss": 0.0153,
      "step": 1308620
    },
    {
      "epoch": 2.1416180619652665,
      "grad_norm": 0.6517942547798157,
      "learning_rate": 5.754432898664365e-06,
      "loss": 0.0147,
      "step": 1308640
    },
    {
      "epoch": 2.1416507924039196,
      "grad_norm": 0.47811996936798096,
      "learning_rate": 5.754367006450848e-06,
      "loss": 0.0151,
      "step": 1308660
    },
    {
      "epoch": 2.1416835228425732,
      "grad_norm": 0.5468310713768005,
      "learning_rate": 5.754301114237331e-06,
      "loss": 0.0198,
      "step": 1308680
    },
    {
      "epoch": 2.1417162532812264,
      "grad_norm": 0.9272260665893555,
      "learning_rate": 5.754235222023814e-06,
      "loss": 0.0189,
      "step": 1308700
    },
    {
      "epoch": 2.14174898371988,
      "grad_norm": 0.86539626121521,
      "learning_rate": 5.754169329810296e-06,
      "loss": 0.0153,
      "step": 1308720
    },
    {
      "epoch": 2.141781714158533,
      "grad_norm": 0.31346607208251953,
      "learning_rate": 5.75410343759678e-06,
      "loss": 0.0171,
      "step": 1308740
    },
    {
      "epoch": 2.1418144445971863,
      "grad_norm": 0.3554244935512543,
      "learning_rate": 5.754037545383262e-06,
      "loss": 0.0161,
      "step": 1308760
    },
    {
      "epoch": 2.14184717503584,
      "grad_norm": 0.5287081599235535,
      "learning_rate": 5.753971653169745e-06,
      "loss": 0.0139,
      "step": 1308780
    },
    {
      "epoch": 2.141879905474493,
      "grad_norm": 0.4774385988712311,
      "learning_rate": 5.753905760956229e-06,
      "loss": 0.0142,
      "step": 1308800
    },
    {
      "epoch": 2.1419126359131466,
      "grad_norm": 0.2367410957813263,
      "learning_rate": 5.753839868742711e-06,
      "loss": 0.0123,
      "step": 1308820
    },
    {
      "epoch": 2.1419453663517998,
      "grad_norm": 0.21108393371105194,
      "learning_rate": 5.753773976529194e-06,
      "loss": 0.0117,
      "step": 1308840
    },
    {
      "epoch": 2.1419780967904534,
      "grad_norm": 0.38696593046188354,
      "learning_rate": 5.753708084315677e-06,
      "loss": 0.0199,
      "step": 1308860
    },
    {
      "epoch": 2.1420108272291065,
      "grad_norm": 0.1710319072008133,
      "learning_rate": 5.75364219210216e-06,
      "loss": 0.0156,
      "step": 1308880
    },
    {
      "epoch": 2.1420435576677597,
      "grad_norm": 0.5222174525260925,
      "learning_rate": 5.7535762998886425e-06,
      "loss": 0.0138,
      "step": 1308900
    },
    {
      "epoch": 2.1420762881064133,
      "grad_norm": 0.9659665822982788,
      "learning_rate": 5.753510407675126e-06,
      "loss": 0.0152,
      "step": 1308920
    },
    {
      "epoch": 2.1421090185450664,
      "grad_norm": 0.5436053276062012,
      "learning_rate": 5.753444515461608e-06,
      "loss": 0.0255,
      "step": 1308940
    },
    {
      "epoch": 2.14214174898372,
      "grad_norm": 0.2674097716808319,
      "learning_rate": 5.753378623248092e-06,
      "loss": 0.0161,
      "step": 1308960
    },
    {
      "epoch": 2.142174479422373,
      "grad_norm": 0.32308459281921387,
      "learning_rate": 5.7533127310345735e-06,
      "loss": 0.0152,
      "step": 1308980
    },
    {
      "epoch": 2.1422072098610268,
      "grad_norm": 0.4995831549167633,
      "learning_rate": 5.753246838821057e-06,
      "loss": 0.0128,
      "step": 1309000
    },
    {
      "epoch": 2.14223994029968,
      "grad_norm": 0.47004029154777527,
      "learning_rate": 5.75318094660754e-06,
      "loss": 0.0174,
      "step": 1309020
    },
    {
      "epoch": 2.142272670738333,
      "grad_norm": 0.4215191602706909,
      "learning_rate": 5.7531150543940226e-06,
      "loss": 0.0191,
      "step": 1309040
    },
    {
      "epoch": 2.1423054011769866,
      "grad_norm": 0.7574543356895447,
      "learning_rate": 5.753049162180505e-06,
      "loss": 0.0162,
      "step": 1309060
    },
    {
      "epoch": 2.14233813161564,
      "grad_norm": 0.5605077147483826,
      "learning_rate": 5.752983269966989e-06,
      "loss": 0.0169,
      "step": 1309080
    },
    {
      "epoch": 2.1423708620542934,
      "grad_norm": 0.2738758623600006,
      "learning_rate": 5.752917377753471e-06,
      "loss": 0.0082,
      "step": 1309100
    },
    {
      "epoch": 2.1424035924929465,
      "grad_norm": 1.0094884634017944,
      "learning_rate": 5.752851485539954e-06,
      "loss": 0.0158,
      "step": 1309120
    },
    {
      "epoch": 2.1424363229316,
      "grad_norm": 0.3349541425704956,
      "learning_rate": 5.752785593326438e-06,
      "loss": 0.0169,
      "step": 1309140
    },
    {
      "epoch": 2.1424690533702533,
      "grad_norm": 0.30693790316581726,
      "learning_rate": 5.75271970111292e-06,
      "loss": 0.0127,
      "step": 1309160
    },
    {
      "epoch": 2.1425017838089064,
      "grad_norm": 0.2265588790178299,
      "learning_rate": 5.7526538088994034e-06,
      "loss": 0.0133,
      "step": 1309180
    },
    {
      "epoch": 2.14253451424756,
      "grad_norm": 0.6464706659317017,
      "learning_rate": 5.752587916685885e-06,
      "loss": 0.01,
      "step": 1309200
    },
    {
      "epoch": 2.142567244686213,
      "grad_norm": 0.49945685267448425,
      "learning_rate": 5.752522024472369e-06,
      "loss": 0.0128,
      "step": 1309220
    },
    {
      "epoch": 2.1425999751248668,
      "grad_norm": 0.43130090832710266,
      "learning_rate": 5.752456132258852e-06,
      "loss": 0.0181,
      "step": 1309240
    },
    {
      "epoch": 2.14263270556352,
      "grad_norm": 0.33063048124313354,
      "learning_rate": 5.752390240045334e-06,
      "loss": 0.0133,
      "step": 1309260
    },
    {
      "epoch": 2.142665436002173,
      "grad_norm": 0.15324729681015015,
      "learning_rate": 5.752324347831817e-06,
      "loss": 0.0129,
      "step": 1309280
    },
    {
      "epoch": 2.1426981664408267,
      "grad_norm": 0.23325058817863464,
      "learning_rate": 5.752258455618301e-06,
      "loss": 0.0106,
      "step": 1309300
    },
    {
      "epoch": 2.14273089687948,
      "grad_norm": 0.8766013383865356,
      "learning_rate": 5.752192563404783e-06,
      "loss": 0.0189,
      "step": 1309320
    },
    {
      "epoch": 2.1427636273181334,
      "grad_norm": 0.12963761389255524,
      "learning_rate": 5.752126671191266e-06,
      "loss": 0.0142,
      "step": 1309340
    },
    {
      "epoch": 2.1427963577567866,
      "grad_norm": 0.43382617831230164,
      "learning_rate": 5.752060778977748e-06,
      "loss": 0.0187,
      "step": 1309360
    },
    {
      "epoch": 2.14282908819544,
      "grad_norm": 0.24293988943099976,
      "learning_rate": 5.751994886764232e-06,
      "loss": 0.0111,
      "step": 1309380
    },
    {
      "epoch": 2.1428618186340933,
      "grad_norm": 1.2432899475097656,
      "learning_rate": 5.751928994550714e-06,
      "loss": 0.0155,
      "step": 1309400
    },
    {
      "epoch": 2.1428945490727465,
      "grad_norm": 0.2315966635942459,
      "learning_rate": 5.751863102337197e-06,
      "loss": 0.0113,
      "step": 1309420
    },
    {
      "epoch": 2.1429272795114,
      "grad_norm": 0.19022327661514282,
      "learning_rate": 5.75179721012368e-06,
      "loss": 0.0137,
      "step": 1309440
    },
    {
      "epoch": 2.142960009950053,
      "grad_norm": 0.672635555267334,
      "learning_rate": 5.751731317910163e-06,
      "loss": 0.0167,
      "step": 1309460
    },
    {
      "epoch": 2.142992740388707,
      "grad_norm": 0.22561122477054596,
      "learning_rate": 5.751665425696646e-06,
      "loss": 0.0153,
      "step": 1309480
    },
    {
      "epoch": 2.14302547082736,
      "grad_norm": 0.4861282706260681,
      "learning_rate": 5.751599533483129e-06,
      "loss": 0.0184,
      "step": 1309500
    },
    {
      "epoch": 2.1430582012660135,
      "grad_norm": 0.38850152492523193,
      "learning_rate": 5.751533641269612e-06,
      "loss": 0.0092,
      "step": 1309520
    },
    {
      "epoch": 2.1430909317046667,
      "grad_norm": 0.4312444031238556,
      "learning_rate": 5.7514677490560945e-06,
      "loss": 0.0129,
      "step": 1309540
    },
    {
      "epoch": 2.14312366214332,
      "grad_norm": 0.4533952474594116,
      "learning_rate": 5.751401856842578e-06,
      "loss": 0.0169,
      "step": 1309560
    },
    {
      "epoch": 2.1431563925819734,
      "grad_norm": 0.4027802050113678,
      "learning_rate": 5.75133596462906e-06,
      "loss": 0.0156,
      "step": 1309580
    },
    {
      "epoch": 2.1431891230206266,
      "grad_norm": 0.8314727544784546,
      "learning_rate": 5.7512700724155435e-06,
      "loss": 0.0173,
      "step": 1309600
    },
    {
      "epoch": 2.14322185345928,
      "grad_norm": 0.32069921493530273,
      "learning_rate": 5.751204180202025e-06,
      "loss": 0.0145,
      "step": 1309620
    },
    {
      "epoch": 2.1432545838979333,
      "grad_norm": 0.4672497808933258,
      "learning_rate": 5.751138287988509e-06,
      "loss": 0.0151,
      "step": 1309640
    },
    {
      "epoch": 2.143287314336587,
      "grad_norm": 0.33710581064224243,
      "learning_rate": 5.751072395774992e-06,
      "loss": 0.0178,
      "step": 1309660
    },
    {
      "epoch": 2.14332004477524,
      "grad_norm": 0.26842913031578064,
      "learning_rate": 5.7510065035614745e-06,
      "loss": 0.0156,
      "step": 1309680
    },
    {
      "epoch": 2.1433527752138932,
      "grad_norm": 0.09656292200088501,
      "learning_rate": 5.750940611347957e-06,
      "loss": 0.0107,
      "step": 1309700
    },
    {
      "epoch": 2.143385505652547,
      "grad_norm": 0.45604997873306274,
      "learning_rate": 5.750874719134441e-06,
      "loss": 0.0142,
      "step": 1309720
    },
    {
      "epoch": 2.1434182360912,
      "grad_norm": 0.3132067918777466,
      "learning_rate": 5.750808826920923e-06,
      "loss": 0.0182,
      "step": 1309740
    },
    {
      "epoch": 2.1434509665298536,
      "grad_norm": 0.19127148389816284,
      "learning_rate": 5.750742934707406e-06,
      "loss": 0.0102,
      "step": 1309760
    },
    {
      "epoch": 2.1434836969685067,
      "grad_norm": 0.5323165655136108,
      "learning_rate": 5.750677042493888e-06,
      "loss": 0.0221,
      "step": 1309780
    },
    {
      "epoch": 2.1435164274071603,
      "grad_norm": 0.5590400695800781,
      "learning_rate": 5.750611150280372e-06,
      "loss": 0.0153,
      "step": 1309800
    },
    {
      "epoch": 2.1435491578458135,
      "grad_norm": 0.6703494191169739,
      "learning_rate": 5.750545258066855e-06,
      "loss": 0.0147,
      "step": 1309820
    },
    {
      "epoch": 2.1435818882844666,
      "grad_norm": 0.2267547845840454,
      "learning_rate": 5.750479365853337e-06,
      "loss": 0.0214,
      "step": 1309840
    },
    {
      "epoch": 2.14361461872312,
      "grad_norm": 0.638745129108429,
      "learning_rate": 5.750413473639821e-06,
      "loss": 0.015,
      "step": 1309860
    },
    {
      "epoch": 2.1436473491617734,
      "grad_norm": 0.35383141040802,
      "learning_rate": 5.7503475814263036e-06,
      "loss": 0.014,
      "step": 1309880
    },
    {
      "epoch": 2.143680079600427,
      "grad_norm": 0.535507082939148,
      "learning_rate": 5.750281689212786e-06,
      "loss": 0.0203,
      "step": 1309900
    },
    {
      "epoch": 2.14371281003908,
      "grad_norm": 0.5625340938568115,
      "learning_rate": 5.750215796999269e-06,
      "loss": 0.0156,
      "step": 1309920
    },
    {
      "epoch": 2.1437455404777337,
      "grad_norm": 0.3700307309627533,
      "learning_rate": 5.750149904785753e-06,
      "loss": 0.0171,
      "step": 1309940
    },
    {
      "epoch": 2.143778270916387,
      "grad_norm": 0.21214981377124786,
      "learning_rate": 5.7500840125722345e-06,
      "loss": 0.0112,
      "step": 1309960
    },
    {
      "epoch": 2.14381100135504,
      "grad_norm": 0.5538150072097778,
      "learning_rate": 5.750018120358718e-06,
      "loss": 0.02,
      "step": 1309980
    },
    {
      "epoch": 2.1438437317936936,
      "grad_norm": 0.5780795216560364,
      "learning_rate": 5.7499522281452e-06,
      "loss": 0.0177,
      "step": 1310000
    },
    {
      "epoch": 2.1438764622323467,
      "grad_norm": 0.5302063226699829,
      "learning_rate": 5.749886335931684e-06,
      "loss": 0.0133,
      "step": 1310020
    },
    {
      "epoch": 2.1439091926710003,
      "grad_norm": 0.5341659784317017,
      "learning_rate": 5.749820443718166e-06,
      "loss": 0.0229,
      "step": 1310040
    },
    {
      "epoch": 2.1439419231096535,
      "grad_norm": 0.7780174612998962,
      "learning_rate": 5.749754551504649e-06,
      "loss": 0.0172,
      "step": 1310060
    },
    {
      "epoch": 2.143974653548307,
      "grad_norm": 0.19778753817081451,
      "learning_rate": 5.749688659291132e-06,
      "loss": 0.0106,
      "step": 1310080
    },
    {
      "epoch": 2.1440073839869602,
      "grad_norm": 0.23837246000766754,
      "learning_rate": 5.749622767077615e-06,
      "loss": 0.0132,
      "step": 1310100
    },
    {
      "epoch": 2.1440401144256134,
      "grad_norm": 0.1868777573108673,
      "learning_rate": 5.749556874864097e-06,
      "loss": 0.0136,
      "step": 1310120
    },
    {
      "epoch": 2.144072844864267,
      "grad_norm": 0.5574442744255066,
      "learning_rate": 5.749490982650581e-06,
      "loss": 0.0215,
      "step": 1310140
    },
    {
      "epoch": 2.14410557530292,
      "grad_norm": 0.7938120365142822,
      "learning_rate": 5.749425090437063e-06,
      "loss": 0.0104,
      "step": 1310160
    },
    {
      "epoch": 2.1441383057415737,
      "grad_norm": 0.4130261540412903,
      "learning_rate": 5.749359198223546e-06,
      "loss": 0.0134,
      "step": 1310180
    },
    {
      "epoch": 2.144171036180227,
      "grad_norm": 0.4670218229293823,
      "learning_rate": 5.74929330601003e-06,
      "loss": 0.0174,
      "step": 1310200
    },
    {
      "epoch": 2.1442037666188805,
      "grad_norm": 0.14861126244068146,
      "learning_rate": 5.749227413796512e-06,
      "loss": 0.0116,
      "step": 1310220
    },
    {
      "epoch": 2.1442364970575336,
      "grad_norm": 0.36711907386779785,
      "learning_rate": 5.7491615215829954e-06,
      "loss": 0.0123,
      "step": 1310240
    },
    {
      "epoch": 2.1442692274961868,
      "grad_norm": 0.26515650749206543,
      "learning_rate": 5.749095629369477e-06,
      "loss": 0.0078,
      "step": 1310260
    },
    {
      "epoch": 2.1443019579348404,
      "grad_norm": 0.3404996991157532,
      "learning_rate": 5.749029737155961e-06,
      "loss": 0.0257,
      "step": 1310280
    },
    {
      "epoch": 2.1443346883734935,
      "grad_norm": 1.3168694972991943,
      "learning_rate": 5.748963844942444e-06,
      "loss": 0.021,
      "step": 1310300
    },
    {
      "epoch": 2.144367418812147,
      "grad_norm": 0.5586379170417786,
      "learning_rate": 5.748897952728926e-06,
      "loss": 0.0137,
      "step": 1310320
    },
    {
      "epoch": 2.1444001492508002,
      "grad_norm": 0.4698692262172699,
      "learning_rate": 5.748832060515409e-06,
      "loss": 0.0184,
      "step": 1310340
    },
    {
      "epoch": 2.1444328796894534,
      "grad_norm": 0.2870047092437744,
      "learning_rate": 5.748766168301893e-06,
      "loss": 0.0188,
      "step": 1310360
    },
    {
      "epoch": 2.144465610128107,
      "grad_norm": 0.18911586701869965,
      "learning_rate": 5.748700276088375e-06,
      "loss": 0.0229,
      "step": 1310380
    },
    {
      "epoch": 2.14449834056676,
      "grad_norm": 0.9675302505493164,
      "learning_rate": 5.748634383874858e-06,
      "loss": 0.0148,
      "step": 1310400
    },
    {
      "epoch": 2.1445310710054137,
      "grad_norm": 0.6056883931159973,
      "learning_rate": 5.74856849166134e-06,
      "loss": 0.0205,
      "step": 1310420
    },
    {
      "epoch": 2.144563801444067,
      "grad_norm": 0.18350404500961304,
      "learning_rate": 5.748502599447824e-06,
      "loss": 0.0127,
      "step": 1310440
    },
    {
      "epoch": 2.1445965318827205,
      "grad_norm": 0.16508899629116058,
      "learning_rate": 5.748436707234306e-06,
      "loss": 0.0118,
      "step": 1310460
    },
    {
      "epoch": 2.1446292623213736,
      "grad_norm": 0.1084696501493454,
      "learning_rate": 5.748370815020789e-06,
      "loss": 0.013,
      "step": 1310480
    },
    {
      "epoch": 2.144661992760027,
      "grad_norm": 0.7065348625183105,
      "learning_rate": 5.748304922807272e-06,
      "loss": 0.0138,
      "step": 1310500
    },
    {
      "epoch": 2.1446947231986804,
      "grad_norm": 0.6233435273170471,
      "learning_rate": 5.7482390305937555e-06,
      "loss": 0.0136,
      "step": 1310520
    },
    {
      "epoch": 2.1447274536373335,
      "grad_norm": 0.2791685461997986,
      "learning_rate": 5.748173138380238e-06,
      "loss": 0.0165,
      "step": 1310540
    },
    {
      "epoch": 2.144760184075987,
      "grad_norm": 0.4006813168525696,
      "learning_rate": 5.748107246166721e-06,
      "loss": 0.0168,
      "step": 1310560
    },
    {
      "epoch": 2.1447929145146403,
      "grad_norm": 0.3457831144332886,
      "learning_rate": 5.7480413539532045e-06,
      "loss": 0.022,
      "step": 1310580
    },
    {
      "epoch": 2.144825644953294,
      "grad_norm": 0.4413224160671234,
      "learning_rate": 5.7479754617396864e-06,
      "loss": 0.0151,
      "step": 1310600
    },
    {
      "epoch": 2.144858375391947,
      "grad_norm": 0.2777727544307709,
      "learning_rate": 5.74790956952617e-06,
      "loss": 0.0113,
      "step": 1310620
    },
    {
      "epoch": 2.1448911058306,
      "grad_norm": 1.1473817825317383,
      "learning_rate": 5.747843677312652e-06,
      "loss": 0.0192,
      "step": 1310640
    },
    {
      "epoch": 2.1449238362692538,
      "grad_norm": 0.3918900489807129,
      "learning_rate": 5.7477777850991355e-06,
      "loss": 0.0138,
      "step": 1310660
    },
    {
      "epoch": 2.144956566707907,
      "grad_norm": 0.17476266622543335,
      "learning_rate": 5.747711892885618e-06,
      "loss": 0.0189,
      "step": 1310680
    },
    {
      "epoch": 2.1449892971465605,
      "grad_norm": 0.9082863330841064,
      "learning_rate": 5.747646000672101e-06,
      "loss": 0.0177,
      "step": 1310700
    },
    {
      "epoch": 2.1450220275852137,
      "grad_norm": 0.3643912374973297,
      "learning_rate": 5.747580108458584e-06,
      "loss": 0.0178,
      "step": 1310720
    },
    {
      "epoch": 2.145054758023867,
      "grad_norm": 0.5778929591178894,
      "learning_rate": 5.747514216245067e-06,
      "loss": 0.0127,
      "step": 1310740
    },
    {
      "epoch": 2.1450874884625204,
      "grad_norm": 0.5386998057365417,
      "learning_rate": 5.747448324031549e-06,
      "loss": 0.0149,
      "step": 1310760
    },
    {
      "epoch": 2.1451202189011735,
      "grad_norm": 0.4549970328807831,
      "learning_rate": 5.747382431818033e-06,
      "loss": 0.0192,
      "step": 1310780
    },
    {
      "epoch": 2.145152949339827,
      "grad_norm": 0.9363592267036438,
      "learning_rate": 5.747316539604515e-06,
      "loss": 0.0146,
      "step": 1310800
    },
    {
      "epoch": 2.1451856797784803,
      "grad_norm": 0.12312345951795578,
      "learning_rate": 5.747250647390998e-06,
      "loss": 0.0138,
      "step": 1310820
    },
    {
      "epoch": 2.145218410217134,
      "grad_norm": 0.23072515428066254,
      "learning_rate": 5.747184755177481e-06,
      "loss": 0.0202,
      "step": 1310840
    },
    {
      "epoch": 2.145251140655787,
      "grad_norm": 0.17997992038726807,
      "learning_rate": 5.747118862963964e-06,
      "loss": 0.0082,
      "step": 1310860
    },
    {
      "epoch": 2.14528387109444,
      "grad_norm": 0.4720556437969208,
      "learning_rate": 5.747052970750447e-06,
      "loss": 0.0131,
      "step": 1310880
    },
    {
      "epoch": 2.145316601533094,
      "grad_norm": 0.27781689167022705,
      "learning_rate": 5.74698707853693e-06,
      "loss": 0.0125,
      "step": 1310900
    },
    {
      "epoch": 2.145349331971747,
      "grad_norm": 1.1186678409576416,
      "learning_rate": 5.746921186323413e-06,
      "loss": 0.0172,
      "step": 1310920
    },
    {
      "epoch": 2.1453820624104005,
      "grad_norm": 0.803220808506012,
      "learning_rate": 5.7468552941098956e-06,
      "loss": 0.0115,
      "step": 1310940
    },
    {
      "epoch": 2.1454147928490537,
      "grad_norm": 0.1859627515077591,
      "learning_rate": 5.746789401896379e-06,
      "loss": 0.0155,
      "step": 1310960
    },
    {
      "epoch": 2.1454475232877073,
      "grad_norm": 0.16481530666351318,
      "learning_rate": 5.746723509682861e-06,
      "loss": 0.0164,
      "step": 1310980
    },
    {
      "epoch": 2.1454802537263604,
      "grad_norm": 0.06772520393133163,
      "learning_rate": 5.746657617469345e-06,
      "loss": 0.0134,
      "step": 1311000
    },
    {
      "epoch": 2.1455129841650136,
      "grad_norm": 0.607226550579071,
      "learning_rate": 5.7465917252558265e-06,
      "loss": 0.0117,
      "step": 1311020
    },
    {
      "epoch": 2.145545714603667,
      "grad_norm": 0.34207454323768616,
      "learning_rate": 5.74652583304231e-06,
      "loss": 0.0215,
      "step": 1311040
    },
    {
      "epoch": 2.1455784450423203,
      "grad_norm": 0.33733323216438293,
      "learning_rate": 5.746459940828793e-06,
      "loss": 0.0094,
      "step": 1311060
    },
    {
      "epoch": 2.145611175480974,
      "grad_norm": 0.4385003447532654,
      "learning_rate": 5.746394048615276e-06,
      "loss": 0.0175,
      "step": 1311080
    },
    {
      "epoch": 2.145643905919627,
      "grad_norm": 0.6412337422370911,
      "learning_rate": 5.746328156401758e-06,
      "loss": 0.0165,
      "step": 1311100
    },
    {
      "epoch": 2.1456766363582807,
      "grad_norm": 1.1586940288543701,
      "learning_rate": 5.746262264188242e-06,
      "loss": 0.0145,
      "step": 1311120
    },
    {
      "epoch": 2.145709366796934,
      "grad_norm": 0.45812925696372986,
      "learning_rate": 5.746196371974724e-06,
      "loss": 0.0181,
      "step": 1311140
    },
    {
      "epoch": 2.145742097235587,
      "grad_norm": 0.4464406967163086,
      "learning_rate": 5.746130479761207e-06,
      "loss": 0.0126,
      "step": 1311160
    },
    {
      "epoch": 2.1457748276742405,
      "grad_norm": 0.1497819572687149,
      "learning_rate": 5.746064587547689e-06,
      "loss": 0.0157,
      "step": 1311180
    },
    {
      "epoch": 2.1458075581128937,
      "grad_norm": 0.8294777870178223,
      "learning_rate": 5.745998695334173e-06,
      "loss": 0.0158,
      "step": 1311200
    },
    {
      "epoch": 2.1458402885515473,
      "grad_norm": 0.7682169079780579,
      "learning_rate": 5.745932803120655e-06,
      "loss": 0.0186,
      "step": 1311220
    },
    {
      "epoch": 2.1458730189902004,
      "grad_norm": 0.6446323394775391,
      "learning_rate": 5.745866910907138e-06,
      "loss": 0.0141,
      "step": 1311240
    },
    {
      "epoch": 2.145905749428854,
      "grad_norm": 0.8157978057861328,
      "learning_rate": 5.745801018693622e-06,
      "loss": 0.0146,
      "step": 1311260
    },
    {
      "epoch": 2.145938479867507,
      "grad_norm": 0.20863616466522217,
      "learning_rate": 5.745735126480104e-06,
      "loss": 0.0161,
      "step": 1311280
    },
    {
      "epoch": 2.1459712103061603,
      "grad_norm": 0.3081868290901184,
      "learning_rate": 5.745669234266587e-06,
      "loss": 0.016,
      "step": 1311300
    },
    {
      "epoch": 2.146003940744814,
      "grad_norm": 1.066034197807312,
      "learning_rate": 5.74560334205307e-06,
      "loss": 0.0176,
      "step": 1311320
    },
    {
      "epoch": 2.146036671183467,
      "grad_norm": 1.2309294939041138,
      "learning_rate": 5.745537449839553e-06,
      "loss": 0.0196,
      "step": 1311340
    },
    {
      "epoch": 2.1460694016221207,
      "grad_norm": 0.8489307165145874,
      "learning_rate": 5.745471557626036e-06,
      "loss": 0.0114,
      "step": 1311360
    },
    {
      "epoch": 2.146102132060774,
      "grad_norm": 0.2744810879230499,
      "learning_rate": 5.745405665412519e-06,
      "loss": 0.0147,
      "step": 1311380
    },
    {
      "epoch": 2.1461348624994274,
      "grad_norm": 0.8797107934951782,
      "learning_rate": 5.745339773199001e-06,
      "loss": 0.0132,
      "step": 1311400
    },
    {
      "epoch": 2.1461675929380806,
      "grad_norm": 0.37073877453804016,
      "learning_rate": 5.745273880985485e-06,
      "loss": 0.0137,
      "step": 1311420
    },
    {
      "epoch": 2.1462003233767337,
      "grad_norm": 0.4675949811935425,
      "learning_rate": 5.745207988771967e-06,
      "loss": 0.0089,
      "step": 1311440
    },
    {
      "epoch": 2.1462330538153873,
      "grad_norm": 0.12468640506267548,
      "learning_rate": 5.74514209655845e-06,
      "loss": 0.0137,
      "step": 1311460
    },
    {
      "epoch": 2.1462657842540405,
      "grad_norm": 0.34910741448402405,
      "learning_rate": 5.745076204344933e-06,
      "loss": 0.0159,
      "step": 1311480
    },
    {
      "epoch": 2.146298514692694,
      "grad_norm": 0.5250424742698669,
      "learning_rate": 5.745010312131416e-06,
      "loss": 0.0101,
      "step": 1311500
    },
    {
      "epoch": 2.146331245131347,
      "grad_norm": 0.7933216691017151,
      "learning_rate": 5.744944419917898e-06,
      "loss": 0.0151,
      "step": 1311520
    },
    {
      "epoch": 2.146363975570001,
      "grad_norm": 0.2728944420814514,
      "learning_rate": 5.744878527704382e-06,
      "loss": 0.0122,
      "step": 1311540
    },
    {
      "epoch": 2.146396706008654,
      "grad_norm": 9.076544761657715,
      "learning_rate": 5.744812635490864e-06,
      "loss": 0.0216,
      "step": 1311560
    },
    {
      "epoch": 2.146429436447307,
      "grad_norm": 0.4630219042301178,
      "learning_rate": 5.7447467432773475e-06,
      "loss": 0.0146,
      "step": 1311580
    },
    {
      "epoch": 2.1464621668859607,
      "grad_norm": 0.3435187339782715,
      "learning_rate": 5.744680851063831e-06,
      "loss": 0.0171,
      "step": 1311600
    },
    {
      "epoch": 2.146494897324614,
      "grad_norm": 0.31767287850379944,
      "learning_rate": 5.744614958850313e-06,
      "loss": 0.0128,
      "step": 1311620
    },
    {
      "epoch": 2.1465276277632674,
      "grad_norm": 0.6088739633560181,
      "learning_rate": 5.7445490666367965e-06,
      "loss": 0.0158,
      "step": 1311640
    },
    {
      "epoch": 2.1465603582019206,
      "grad_norm": 0.09917116910219193,
      "learning_rate": 5.744483174423278e-06,
      "loss": 0.008,
      "step": 1311660
    },
    {
      "epoch": 2.146593088640574,
      "grad_norm": 0.24629336595535278,
      "learning_rate": 5.744417282209762e-06,
      "loss": 0.0179,
      "step": 1311680
    },
    {
      "epoch": 2.1466258190792273,
      "grad_norm": 0.6002818942070007,
      "learning_rate": 5.744351389996245e-06,
      "loss": 0.0167,
      "step": 1311700
    },
    {
      "epoch": 2.1466585495178805,
      "grad_norm": 0.3566613793373108,
      "learning_rate": 5.7442854977827275e-06,
      "loss": 0.0082,
      "step": 1311720
    },
    {
      "epoch": 2.146691279956534,
      "grad_norm": 0.9226092100143433,
      "learning_rate": 5.74421960556921e-06,
      "loss": 0.0131,
      "step": 1311740
    },
    {
      "epoch": 2.1467240103951872,
      "grad_norm": 0.32318273186683655,
      "learning_rate": 5.744153713355694e-06,
      "loss": 0.0147,
      "step": 1311760
    },
    {
      "epoch": 2.146756740833841,
      "grad_norm": 0.20379389822483063,
      "learning_rate": 5.744087821142176e-06,
      "loss": 0.0101,
      "step": 1311780
    },
    {
      "epoch": 2.146789471272494,
      "grad_norm": 0.10768302530050278,
      "learning_rate": 5.744021928928659e-06,
      "loss": 0.0128,
      "step": 1311800
    },
    {
      "epoch": 2.146822201711147,
      "grad_norm": 0.20751072466373444,
      "learning_rate": 5.743956036715141e-06,
      "loss": 0.0176,
      "step": 1311820
    },
    {
      "epoch": 2.1468549321498007,
      "grad_norm": 0.381073921918869,
      "learning_rate": 5.743890144501625e-06,
      "loss": 0.0155,
      "step": 1311840
    },
    {
      "epoch": 2.146887662588454,
      "grad_norm": 0.642974317073822,
      "learning_rate": 5.7438242522881075e-06,
      "loss": 0.0204,
      "step": 1311860
    },
    {
      "epoch": 2.1469203930271075,
      "grad_norm": 0.5873262286186218,
      "learning_rate": 5.74375836007459e-06,
      "loss": 0.0219,
      "step": 1311880
    },
    {
      "epoch": 2.1469531234657606,
      "grad_norm": 0.28709861636161804,
      "learning_rate": 5.743692467861073e-06,
      "loss": 0.0162,
      "step": 1311900
    },
    {
      "epoch": 2.146985853904414,
      "grad_norm": 0.15887461602687836,
      "learning_rate": 5.743626575647557e-06,
      "loss": 0.0212,
      "step": 1311920
    },
    {
      "epoch": 2.1470185843430674,
      "grad_norm": 0.42313507199287415,
      "learning_rate": 5.743560683434039e-06,
      "loss": 0.0155,
      "step": 1311940
    },
    {
      "epoch": 2.1470513147817205,
      "grad_norm": 0.07497143000364304,
      "learning_rate": 5.743494791220522e-06,
      "loss": 0.0119,
      "step": 1311960
    },
    {
      "epoch": 2.147084045220374,
      "grad_norm": 0.3180839419364929,
      "learning_rate": 5.743428899007006e-06,
      "loss": 0.0134,
      "step": 1311980
    },
    {
      "epoch": 2.1471167756590273,
      "grad_norm": 0.3573109209537506,
      "learning_rate": 5.7433630067934875e-06,
      "loss": 0.0193,
      "step": 1312000
    },
    {
      "epoch": 2.147149506097681,
      "grad_norm": 0.3013692796230316,
      "learning_rate": 5.743297114579971e-06,
      "loss": 0.0153,
      "step": 1312020
    },
    {
      "epoch": 2.147182236536334,
      "grad_norm": 0.4648226797580719,
      "learning_rate": 5.743231222366453e-06,
      "loss": 0.0174,
      "step": 1312040
    },
    {
      "epoch": 2.1472149669749876,
      "grad_norm": 0.7695502042770386,
      "learning_rate": 5.743165330152937e-06,
      "loss": 0.0198,
      "step": 1312060
    },
    {
      "epoch": 2.1472476974136407,
      "grad_norm": 1.013550877571106,
      "learning_rate": 5.743099437939419e-06,
      "loss": 0.017,
      "step": 1312080
    },
    {
      "epoch": 2.147280427852294,
      "grad_norm": 0.2768312692642212,
      "learning_rate": 5.743033545725902e-06,
      "loss": 0.0146,
      "step": 1312100
    },
    {
      "epoch": 2.1473131582909475,
      "grad_norm": 0.5413457155227661,
      "learning_rate": 5.742967653512385e-06,
      "loss": 0.0207,
      "step": 1312120
    },
    {
      "epoch": 2.1473458887296006,
      "grad_norm": 0.2583858072757721,
      "learning_rate": 5.742901761298868e-06,
      "loss": 0.0148,
      "step": 1312140
    },
    {
      "epoch": 2.1473786191682542,
      "grad_norm": 0.16701249778270721,
      "learning_rate": 5.74283586908535e-06,
      "loss": 0.017,
      "step": 1312160
    },
    {
      "epoch": 2.1474113496069074,
      "grad_norm": 0.44108739495277405,
      "learning_rate": 5.742769976871834e-06,
      "loss": 0.0125,
      "step": 1312180
    },
    {
      "epoch": 2.147444080045561,
      "grad_norm": 0.9372695684432983,
      "learning_rate": 5.742704084658316e-06,
      "loss": 0.0206,
      "step": 1312200
    },
    {
      "epoch": 2.147476810484214,
      "grad_norm": 0.35157227516174316,
      "learning_rate": 5.742638192444799e-06,
      "loss": 0.0207,
      "step": 1312220
    },
    {
      "epoch": 2.1475095409228673,
      "grad_norm": 0.8656920194625854,
      "learning_rate": 5.742572300231281e-06,
      "loss": 0.0139,
      "step": 1312240
    },
    {
      "epoch": 2.147542271361521,
      "grad_norm": 3.0229287147521973,
      "learning_rate": 5.742506408017765e-06,
      "loss": 0.0195,
      "step": 1312260
    },
    {
      "epoch": 2.147575001800174,
      "grad_norm": 0.6178147196769714,
      "learning_rate": 5.742440515804248e-06,
      "loss": 0.0146,
      "step": 1312280
    },
    {
      "epoch": 2.1476077322388276,
      "grad_norm": 0.8131837844848633,
      "learning_rate": 5.74237462359073e-06,
      "loss": 0.0108,
      "step": 1312300
    },
    {
      "epoch": 2.1476404626774808,
      "grad_norm": 0.3300257921218872,
      "learning_rate": 5.742308731377214e-06,
      "loss": 0.0165,
      "step": 1312320
    },
    {
      "epoch": 2.147673193116134,
      "grad_norm": 0.19278527796268463,
      "learning_rate": 5.742242839163697e-06,
      "loss": 0.0184,
      "step": 1312340
    },
    {
      "epoch": 2.1477059235547875,
      "grad_norm": 0.2931482195854187,
      "learning_rate": 5.742176946950179e-06,
      "loss": 0.0143,
      "step": 1312360
    },
    {
      "epoch": 2.1477386539934407,
      "grad_norm": 0.4854619801044464,
      "learning_rate": 5.742111054736662e-06,
      "loss": 0.0167,
      "step": 1312380
    },
    {
      "epoch": 2.1477713844320943,
      "grad_norm": 0.283488929271698,
      "learning_rate": 5.742045162523146e-06,
      "loss": 0.0173,
      "step": 1312400
    },
    {
      "epoch": 2.1478041148707474,
      "grad_norm": 0.18852850794792175,
      "learning_rate": 5.741979270309628e-06,
      "loss": 0.0151,
      "step": 1312420
    },
    {
      "epoch": 2.147836845309401,
      "grad_norm": 0.458534300327301,
      "learning_rate": 5.741913378096111e-06,
      "loss": 0.0151,
      "step": 1312440
    },
    {
      "epoch": 2.147869575748054,
      "grad_norm": 1.0191385746002197,
      "learning_rate": 5.741847485882593e-06,
      "loss": 0.0138,
      "step": 1312460
    },
    {
      "epoch": 2.1479023061867073,
      "grad_norm": 0.522024393081665,
      "learning_rate": 5.741781593669077e-06,
      "loss": 0.0115,
      "step": 1312480
    },
    {
      "epoch": 2.147935036625361,
      "grad_norm": 0.4999074339866638,
      "learning_rate": 5.741715701455559e-06,
      "loss": 0.0164,
      "step": 1312500
    },
    {
      "epoch": 2.147967767064014,
      "grad_norm": 0.544041097164154,
      "learning_rate": 5.741649809242042e-06,
      "loss": 0.0154,
      "step": 1312520
    },
    {
      "epoch": 2.1480004975026676,
      "grad_norm": 0.2871648371219635,
      "learning_rate": 5.741583917028525e-06,
      "loss": 0.0133,
      "step": 1312540
    },
    {
      "epoch": 2.148033227941321,
      "grad_norm": 0.31117719411849976,
      "learning_rate": 5.7415180248150085e-06,
      "loss": 0.0175,
      "step": 1312560
    },
    {
      "epoch": 2.1480659583799744,
      "grad_norm": 0.945306658744812,
      "learning_rate": 5.74145213260149e-06,
      "loss": 0.017,
      "step": 1312580
    },
    {
      "epoch": 2.1480986888186275,
      "grad_norm": 0.23872984945774078,
      "learning_rate": 5.741386240387974e-06,
      "loss": 0.0139,
      "step": 1312600
    },
    {
      "epoch": 2.1481314192572807,
      "grad_norm": 0.20726200938224792,
      "learning_rate": 5.741320348174456e-06,
      "loss": 0.0153,
      "step": 1312620
    },
    {
      "epoch": 2.1481641496959343,
      "grad_norm": 0.8829839825630188,
      "learning_rate": 5.7412544559609394e-06,
      "loss": 0.0185,
      "step": 1312640
    },
    {
      "epoch": 2.1481968801345874,
      "grad_norm": 0.1849212944507599,
      "learning_rate": 5.741188563747423e-06,
      "loss": 0.0123,
      "step": 1312660
    },
    {
      "epoch": 2.148229610573241,
      "grad_norm": 0.06587118655443192,
      "learning_rate": 5.741122671533905e-06,
      "loss": 0.0152,
      "step": 1312680
    },
    {
      "epoch": 2.148262341011894,
      "grad_norm": 0.30217453837394714,
      "learning_rate": 5.7410567793203885e-06,
      "loss": 0.0188,
      "step": 1312700
    },
    {
      "epoch": 2.1482950714505478,
      "grad_norm": 0.4100334048271179,
      "learning_rate": 5.740990887106871e-06,
      "loss": 0.0125,
      "step": 1312720
    },
    {
      "epoch": 2.148327801889201,
      "grad_norm": 0.6611517071723938,
      "learning_rate": 5.740924994893354e-06,
      "loss": 0.0104,
      "step": 1312740
    },
    {
      "epoch": 2.148360532327854,
      "grad_norm": 0.49688175320625305,
      "learning_rate": 5.740859102679837e-06,
      "loss": 0.0156,
      "step": 1312760
    },
    {
      "epoch": 2.1483932627665077,
      "grad_norm": 0.8489413857460022,
      "learning_rate": 5.74079321046632e-06,
      "loss": 0.0123,
      "step": 1312780
    },
    {
      "epoch": 2.148425993205161,
      "grad_norm": 0.786551296710968,
      "learning_rate": 5.740727318252802e-06,
      "loss": 0.0187,
      "step": 1312800
    },
    {
      "epoch": 2.1484587236438144,
      "grad_norm": 0.35287216305732727,
      "learning_rate": 5.740661426039286e-06,
      "loss": 0.016,
      "step": 1312820
    },
    {
      "epoch": 2.1484914540824676,
      "grad_norm": 0.19465570151805878,
      "learning_rate": 5.740595533825768e-06,
      "loss": 0.0147,
      "step": 1312840
    },
    {
      "epoch": 2.148524184521121,
      "grad_norm": 0.40492182970046997,
      "learning_rate": 5.740529641612251e-06,
      "loss": 0.0191,
      "step": 1312860
    },
    {
      "epoch": 2.1485569149597743,
      "grad_norm": 0.5732591152191162,
      "learning_rate": 5.740463749398734e-06,
      "loss": 0.0114,
      "step": 1312880
    },
    {
      "epoch": 2.1485896453984275,
      "grad_norm": 0.13989228010177612,
      "learning_rate": 5.740397857185217e-06,
      "loss": 0.0233,
      "step": 1312900
    },
    {
      "epoch": 2.148622375837081,
      "grad_norm": 0.274608314037323,
      "learning_rate": 5.7403319649716995e-06,
      "loss": 0.0195,
      "step": 1312920
    },
    {
      "epoch": 2.148655106275734,
      "grad_norm": 0.34530386328697205,
      "learning_rate": 5.740266072758183e-06,
      "loss": 0.0114,
      "step": 1312940
    },
    {
      "epoch": 2.148687836714388,
      "grad_norm": 0.13673126697540283,
      "learning_rate": 5.740200180544665e-06,
      "loss": 0.0109,
      "step": 1312960
    },
    {
      "epoch": 2.148720567153041,
      "grad_norm": 0.0910680964589119,
      "learning_rate": 5.7401342883311486e-06,
      "loss": 0.0131,
      "step": 1312980
    },
    {
      "epoch": 2.1487532975916945,
      "grad_norm": 0.2553459405899048,
      "learning_rate": 5.740068396117632e-06,
      "loss": 0.0238,
      "step": 1313000
    },
    {
      "epoch": 2.1487860280303477,
      "grad_norm": 0.2923966348171234,
      "learning_rate": 5.740002503904114e-06,
      "loss": 0.0108,
      "step": 1313020
    },
    {
      "epoch": 2.148818758469001,
      "grad_norm": 0.6207497715950012,
      "learning_rate": 5.739936611690598e-06,
      "loss": 0.0212,
      "step": 1313040
    },
    {
      "epoch": 2.1488514889076544,
      "grad_norm": 1.2129557132720947,
      "learning_rate": 5.7398707194770795e-06,
      "loss": 0.0167,
      "step": 1313060
    },
    {
      "epoch": 2.1488842193463076,
      "grad_norm": 0.6411247849464417,
      "learning_rate": 5.739804827263563e-06,
      "loss": 0.0211,
      "step": 1313080
    },
    {
      "epoch": 2.148916949784961,
      "grad_norm": 0.6172341704368591,
      "learning_rate": 5.739738935050045e-06,
      "loss": 0.0177,
      "step": 1313100
    },
    {
      "epoch": 2.1489496802236143,
      "grad_norm": 1.073972463607788,
      "learning_rate": 5.739673042836529e-06,
      "loss": 0.0146,
      "step": 1313120
    },
    {
      "epoch": 2.148982410662268,
      "grad_norm": 0.32019710540771484,
      "learning_rate": 5.739607150623011e-06,
      "loss": 0.0094,
      "step": 1313140
    },
    {
      "epoch": 2.149015141100921,
      "grad_norm": 0.8836556673049927,
      "learning_rate": 5.739541258409495e-06,
      "loss": 0.0117,
      "step": 1313160
    },
    {
      "epoch": 2.149047871539574,
      "grad_norm": 0.47443491220474243,
      "learning_rate": 5.739475366195977e-06,
      "loss": 0.013,
      "step": 1313180
    },
    {
      "epoch": 2.149080601978228,
      "grad_norm": 0.2743556797504425,
      "learning_rate": 5.73940947398246e-06,
      "loss": 0.0141,
      "step": 1313200
    },
    {
      "epoch": 2.149113332416881,
      "grad_norm": 0.4532332420349121,
      "learning_rate": 5.739343581768942e-06,
      "loss": 0.0149,
      "step": 1313220
    },
    {
      "epoch": 2.1491460628555346,
      "grad_norm": 0.07107335329055786,
      "learning_rate": 5.739277689555426e-06,
      "loss": 0.0105,
      "step": 1313240
    },
    {
      "epoch": 2.1491787932941877,
      "grad_norm": 0.8244021534919739,
      "learning_rate": 5.739211797341908e-06,
      "loss": 0.0184,
      "step": 1313260
    },
    {
      "epoch": 2.1492115237328413,
      "grad_norm": 0.17062121629714966,
      "learning_rate": 5.739145905128391e-06,
      "loss": 0.0114,
      "step": 1313280
    },
    {
      "epoch": 2.1492442541714945,
      "grad_norm": 0.2911413013935089,
      "learning_rate": 5.739080012914874e-06,
      "loss": 0.0153,
      "step": 1313300
    },
    {
      "epoch": 2.1492769846101476,
      "grad_norm": 0.348111093044281,
      "learning_rate": 5.739014120701357e-06,
      "loss": 0.0147,
      "step": 1313320
    },
    {
      "epoch": 2.149309715048801,
      "grad_norm": 1.3273224830627441,
      "learning_rate": 5.73894822848784e-06,
      "loss": 0.0106,
      "step": 1313340
    },
    {
      "epoch": 2.1493424454874543,
      "grad_norm": 0.09676194936037064,
      "learning_rate": 5.738882336274323e-06,
      "loss": 0.0137,
      "step": 1313360
    },
    {
      "epoch": 2.149375175926108,
      "grad_norm": 0.23839905858039856,
      "learning_rate": 5.738816444060806e-06,
      "loss": 0.0127,
      "step": 1313380
    },
    {
      "epoch": 2.149407906364761,
      "grad_norm": 0.5244786143302917,
      "learning_rate": 5.738750551847289e-06,
      "loss": 0.0145,
      "step": 1313400
    },
    {
      "epoch": 2.1494406368034142,
      "grad_norm": 1.555166244506836,
      "learning_rate": 5.738684659633772e-06,
      "loss": 0.0171,
      "step": 1313420
    },
    {
      "epoch": 2.149473367242068,
      "grad_norm": 0.28599315881729126,
      "learning_rate": 5.738618767420254e-06,
      "loss": 0.0172,
      "step": 1313440
    },
    {
      "epoch": 2.149506097680721,
      "grad_norm": 0.670996904373169,
      "learning_rate": 5.738552875206738e-06,
      "loss": 0.012,
      "step": 1313460
    },
    {
      "epoch": 2.1495388281193746,
      "grad_norm": 0.48505592346191406,
      "learning_rate": 5.73848698299322e-06,
      "loss": 0.017,
      "step": 1313480
    },
    {
      "epoch": 2.1495715585580277,
      "grad_norm": 0.9028276801109314,
      "learning_rate": 5.738421090779703e-06,
      "loss": 0.0193,
      "step": 1313500
    },
    {
      "epoch": 2.1496042889966813,
      "grad_norm": 0.704468309879303,
      "learning_rate": 5.738355198566186e-06,
      "loss": 0.0114,
      "step": 1313520
    },
    {
      "epoch": 2.1496370194353345,
      "grad_norm": 0.5073936581611633,
      "learning_rate": 5.738289306352669e-06,
      "loss": 0.0148,
      "step": 1313540
    },
    {
      "epoch": 2.1496697498739876,
      "grad_norm": 0.3914918005466461,
      "learning_rate": 5.738223414139151e-06,
      "loss": 0.01,
      "step": 1313560
    },
    {
      "epoch": 2.149702480312641,
      "grad_norm": 0.35166382789611816,
      "learning_rate": 5.738157521925635e-06,
      "loss": 0.0137,
      "step": 1313580
    },
    {
      "epoch": 2.1497352107512944,
      "grad_norm": 0.17691408097743988,
      "learning_rate": 5.738091629712117e-06,
      "loss": 0.0208,
      "step": 1313600
    },
    {
      "epoch": 2.149767941189948,
      "grad_norm": 0.299483984708786,
      "learning_rate": 5.7380257374986005e-06,
      "loss": 0.0186,
      "step": 1313620
    },
    {
      "epoch": 2.149800671628601,
      "grad_norm": 0.32585063576698303,
      "learning_rate": 5.737959845285082e-06,
      "loss": 0.0127,
      "step": 1313640
    },
    {
      "epoch": 2.1498334020672547,
      "grad_norm": 0.4614412188529968,
      "learning_rate": 5.737893953071566e-06,
      "loss": 0.0121,
      "step": 1313660
    },
    {
      "epoch": 2.149866132505908,
      "grad_norm": 1.7302181720733643,
      "learning_rate": 5.737828060858049e-06,
      "loss": 0.0268,
      "step": 1313680
    },
    {
      "epoch": 2.149898862944561,
      "grad_norm": 0.12704259157180786,
      "learning_rate": 5.7377621686445314e-06,
      "loss": 0.0215,
      "step": 1313700
    },
    {
      "epoch": 2.1499315933832146,
      "grad_norm": 0.2712045907974243,
      "learning_rate": 5.737696276431015e-06,
      "loss": 0.0216,
      "step": 1313720
    },
    {
      "epoch": 2.1499643238218678,
      "grad_norm": 0.4596969187259674,
      "learning_rate": 5.737630384217498e-06,
      "loss": 0.0123,
      "step": 1313740
    },
    {
      "epoch": 2.1499970542605213,
      "grad_norm": 0.20725595951080322,
      "learning_rate": 5.7375644920039805e-06,
      "loss": 0.019,
      "step": 1313760
    },
    {
      "epoch": 2.1500297846991745,
      "grad_norm": 0.8122002482414246,
      "learning_rate": 5.737498599790463e-06,
      "loss": 0.0112,
      "step": 1313780
    },
    {
      "epoch": 2.1500625151378276,
      "grad_norm": 1.186871886253357,
      "learning_rate": 5.737432707576947e-06,
      "loss": 0.0152,
      "step": 1313800
    },
    {
      "epoch": 2.1500952455764812,
      "grad_norm": 0.17026999592781067,
      "learning_rate": 5.737366815363429e-06,
      "loss": 0.0153,
      "step": 1313820
    },
    {
      "epoch": 2.1501279760151344,
      "grad_norm": 0.2159990668296814,
      "learning_rate": 5.737300923149912e-06,
      "loss": 0.0178,
      "step": 1313840
    },
    {
      "epoch": 2.150160706453788,
      "grad_norm": 0.5432013273239136,
      "learning_rate": 5.737235030936394e-06,
      "loss": 0.0135,
      "step": 1313860
    },
    {
      "epoch": 2.150193436892441,
      "grad_norm": 0.17981359362602234,
      "learning_rate": 5.737169138722878e-06,
      "loss": 0.0074,
      "step": 1313880
    },
    {
      "epoch": 2.1502261673310947,
      "grad_norm": 0.6515620350837708,
      "learning_rate": 5.7371032465093605e-06,
      "loss": 0.0251,
      "step": 1313900
    },
    {
      "epoch": 2.150258897769748,
      "grad_norm": 0.4849376976490021,
      "learning_rate": 5.737037354295843e-06,
      "loss": 0.0156,
      "step": 1313920
    },
    {
      "epoch": 2.150291628208401,
      "grad_norm": 0.4638359248638153,
      "learning_rate": 5.736971462082326e-06,
      "loss": 0.0178,
      "step": 1313940
    },
    {
      "epoch": 2.1503243586470546,
      "grad_norm": 0.7544708251953125,
      "learning_rate": 5.73690556986881e-06,
      "loss": 0.0165,
      "step": 1313960
    },
    {
      "epoch": 2.1503570890857078,
      "grad_norm": 0.2170255035161972,
      "learning_rate": 5.7368396776552915e-06,
      "loss": 0.0116,
      "step": 1313980
    },
    {
      "epoch": 2.1503898195243614,
      "grad_norm": 0.07344411313533783,
      "learning_rate": 5.736773785441775e-06,
      "loss": 0.0105,
      "step": 1314000
    },
    {
      "epoch": 2.1504225499630145,
      "grad_norm": 0.22371211647987366,
      "learning_rate": 5.736707893228257e-06,
      "loss": 0.0217,
      "step": 1314020
    },
    {
      "epoch": 2.150455280401668,
      "grad_norm": 0.3668917715549469,
      "learning_rate": 5.7366420010147405e-06,
      "loss": 0.0233,
      "step": 1314040
    },
    {
      "epoch": 2.1504880108403213,
      "grad_norm": 0.6962115168571472,
      "learning_rate": 5.736576108801224e-06,
      "loss": 0.0202,
      "step": 1314060
    },
    {
      "epoch": 2.1505207412789744,
      "grad_norm": 2.051647901535034,
      "learning_rate": 5.736510216587706e-06,
      "loss": 0.028,
      "step": 1314080
    },
    {
      "epoch": 2.150553471717628,
      "grad_norm": 0.10205468535423279,
      "learning_rate": 5.73644432437419e-06,
      "loss": 0.0117,
      "step": 1314100
    },
    {
      "epoch": 2.150586202156281,
      "grad_norm": 0.25714316964149475,
      "learning_rate": 5.7363784321606715e-06,
      "loss": 0.0112,
      "step": 1314120
    },
    {
      "epoch": 2.1506189325949348,
      "grad_norm": 0.5879353284835815,
      "learning_rate": 5.736312539947155e-06,
      "loss": 0.018,
      "step": 1314140
    },
    {
      "epoch": 2.150651663033588,
      "grad_norm": 0.6384409070014954,
      "learning_rate": 5.736246647733638e-06,
      "loss": 0.0146,
      "step": 1314160
    },
    {
      "epoch": 2.1506843934722415,
      "grad_norm": 0.4562442898750305,
      "learning_rate": 5.7361807555201206e-06,
      "loss": 0.0159,
      "step": 1314180
    },
    {
      "epoch": 2.1507171239108946,
      "grad_norm": 0.861068069934845,
      "learning_rate": 5.736114863306603e-06,
      "loss": 0.013,
      "step": 1314200
    },
    {
      "epoch": 2.150749854349548,
      "grad_norm": 0.6805538535118103,
      "learning_rate": 5.736048971093087e-06,
      "loss": 0.0239,
      "step": 1314220
    },
    {
      "epoch": 2.1507825847882014,
      "grad_norm": 0.19072780013084412,
      "learning_rate": 5.735983078879569e-06,
      "loss": 0.0122,
      "step": 1314240
    },
    {
      "epoch": 2.1508153152268545,
      "grad_norm": 0.19657014310359955,
      "learning_rate": 5.735917186666052e-06,
      "loss": 0.0201,
      "step": 1314260
    },
    {
      "epoch": 2.150848045665508,
      "grad_norm": 0.43957164883613586,
      "learning_rate": 5.735851294452534e-06,
      "loss": 0.0122,
      "step": 1314280
    },
    {
      "epoch": 2.1508807761041613,
      "grad_norm": 0.6872301697731018,
      "learning_rate": 5.735785402239018e-06,
      "loss": 0.0179,
      "step": 1314300
    },
    {
      "epoch": 2.150913506542815,
      "grad_norm": 0.15684576332569122,
      "learning_rate": 5.735719510025501e-06,
      "loss": 0.0211,
      "step": 1314320
    },
    {
      "epoch": 2.150946236981468,
      "grad_norm": 0.4295005202293396,
      "learning_rate": 5.735653617811983e-06,
      "loss": 0.0156,
      "step": 1314340
    },
    {
      "epoch": 2.150978967420121,
      "grad_norm": 0.4043669104576111,
      "learning_rate": 5.735587725598466e-06,
      "loss": 0.0169,
      "step": 1314360
    },
    {
      "epoch": 2.1510116978587748,
      "grad_norm": 0.3010895252227783,
      "learning_rate": 5.73552183338495e-06,
      "loss": 0.0158,
      "step": 1314380
    },
    {
      "epoch": 2.151044428297428,
      "grad_norm": 0.44359466433525085,
      "learning_rate": 5.735455941171432e-06,
      "loss": 0.0137,
      "step": 1314400
    },
    {
      "epoch": 2.1510771587360815,
      "grad_norm": 0.6278292536735535,
      "learning_rate": 5.735390048957915e-06,
      "loss": 0.0142,
      "step": 1314420
    },
    {
      "epoch": 2.1511098891747347,
      "grad_norm": 0.31492429971694946,
      "learning_rate": 5.735324156744399e-06,
      "loss": 0.0168,
      "step": 1314440
    },
    {
      "epoch": 2.1511426196133883,
      "grad_norm": 0.056538764387369156,
      "learning_rate": 5.735258264530881e-06,
      "loss": 0.0138,
      "step": 1314460
    },
    {
      "epoch": 2.1511753500520414,
      "grad_norm": 0.2972430884838104,
      "learning_rate": 5.735192372317364e-06,
      "loss": 0.0205,
      "step": 1314480
    },
    {
      "epoch": 2.1512080804906946,
      "grad_norm": 0.3482760190963745,
      "learning_rate": 5.735126480103846e-06,
      "loss": 0.0085,
      "step": 1314500
    },
    {
      "epoch": 2.151240810929348,
      "grad_norm": 0.5470001101493835,
      "learning_rate": 5.73506058789033e-06,
      "loss": 0.0148,
      "step": 1314520
    },
    {
      "epoch": 2.1512735413680013,
      "grad_norm": 0.19576363265514374,
      "learning_rate": 5.7349946956768124e-06,
      "loss": 0.0116,
      "step": 1314540
    },
    {
      "epoch": 2.151306271806655,
      "grad_norm": 0.2465810924768448,
      "learning_rate": 5.734928803463295e-06,
      "loss": 0.0179,
      "step": 1314560
    },
    {
      "epoch": 2.151339002245308,
      "grad_norm": 0.5716599822044373,
      "learning_rate": 5.734862911249778e-06,
      "loss": 0.019,
      "step": 1314580
    },
    {
      "epoch": 2.1513717326839616,
      "grad_norm": 0.1831965446472168,
      "learning_rate": 5.7347970190362615e-06,
      "loss": 0.0152,
      "step": 1314600
    },
    {
      "epoch": 2.151404463122615,
      "grad_norm": 0.28349199891090393,
      "learning_rate": 5.734731126822743e-06,
      "loss": 0.0089,
      "step": 1314620
    },
    {
      "epoch": 2.151437193561268,
      "grad_norm": 0.7685844898223877,
      "learning_rate": 5.734665234609227e-06,
      "loss": 0.0204,
      "step": 1314640
    },
    {
      "epoch": 2.1514699239999215,
      "grad_norm": 0.8317069411277771,
      "learning_rate": 5.734599342395709e-06,
      "loss": 0.0113,
      "step": 1314660
    },
    {
      "epoch": 2.1515026544385747,
      "grad_norm": 0.13700498640537262,
      "learning_rate": 5.7345334501821925e-06,
      "loss": 0.0122,
      "step": 1314680
    },
    {
      "epoch": 2.1515353848772283,
      "grad_norm": 0.43112483620643616,
      "learning_rate": 5.734467557968675e-06,
      "loss": 0.0228,
      "step": 1314700
    },
    {
      "epoch": 2.1515681153158814,
      "grad_norm": 0.29164019227027893,
      "learning_rate": 5.734401665755158e-06,
      "loss": 0.0225,
      "step": 1314720
    },
    {
      "epoch": 2.151600845754535,
      "grad_norm": 1.018944501876831,
      "learning_rate": 5.734335773541641e-06,
      "loss": 0.0137,
      "step": 1314740
    },
    {
      "epoch": 2.151633576193188,
      "grad_norm": 0.3633095622062683,
      "learning_rate": 5.734269881328124e-06,
      "loss": 0.01,
      "step": 1314760
    },
    {
      "epoch": 2.1516663066318413,
      "grad_norm": 0.1641349345445633,
      "learning_rate": 5.734203989114607e-06,
      "loss": 0.0149,
      "step": 1314780
    },
    {
      "epoch": 2.151699037070495,
      "grad_norm": 0.3935246169567108,
      "learning_rate": 5.73413809690109e-06,
      "loss": 0.0163,
      "step": 1314800
    },
    {
      "epoch": 2.151731767509148,
      "grad_norm": 0.7023709416389465,
      "learning_rate": 5.734072204687573e-06,
      "loss": 0.0197,
      "step": 1314820
    },
    {
      "epoch": 2.1517644979478017,
      "grad_norm": 0.19022570550441742,
      "learning_rate": 5.734006312474055e-06,
      "loss": 0.0117,
      "step": 1314840
    },
    {
      "epoch": 2.151797228386455,
      "grad_norm": 0.2884824275970459,
      "learning_rate": 5.733940420260539e-06,
      "loss": 0.0111,
      "step": 1314860
    },
    {
      "epoch": 2.151829958825108,
      "grad_norm": 0.34790119528770447,
      "learning_rate": 5.733874528047021e-06,
      "loss": 0.0153,
      "step": 1314880
    },
    {
      "epoch": 2.1518626892637616,
      "grad_norm": 0.38915160298347473,
      "learning_rate": 5.733808635833504e-06,
      "loss": 0.0242,
      "step": 1314900
    },
    {
      "epoch": 2.1518954197024147,
      "grad_norm": 0.22237548232078552,
      "learning_rate": 5.733742743619987e-06,
      "loss": 0.0169,
      "step": 1314920
    },
    {
      "epoch": 2.1519281501410683,
      "grad_norm": 0.5214099884033203,
      "learning_rate": 5.73367685140647e-06,
      "loss": 0.0124,
      "step": 1314940
    },
    {
      "epoch": 2.1519608805797215,
      "grad_norm": 0.3620156943798065,
      "learning_rate": 5.7336109591929525e-06,
      "loss": 0.0156,
      "step": 1314960
    },
    {
      "epoch": 2.151993611018375,
      "grad_norm": 0.11104132980108261,
      "learning_rate": 5.733545066979436e-06,
      "loss": 0.0166,
      "step": 1314980
    },
    {
      "epoch": 2.152026341457028,
      "grad_norm": 0.6573753952980042,
      "learning_rate": 5.733479174765918e-06,
      "loss": 0.0178,
      "step": 1315000
    },
    {
      "epoch": 2.1520590718956814,
      "grad_norm": 0.394199937582016,
      "learning_rate": 5.7334132825524016e-06,
      "loss": 0.0153,
      "step": 1315020
    },
    {
      "epoch": 2.152091802334335,
      "grad_norm": 0.25212764739990234,
      "learning_rate": 5.7333473903388835e-06,
      "loss": 0.0177,
      "step": 1315040
    },
    {
      "epoch": 2.152124532772988,
      "grad_norm": 0.6481263637542725,
      "learning_rate": 5.733281498125367e-06,
      "loss": 0.0178,
      "step": 1315060
    },
    {
      "epoch": 2.1521572632116417,
      "grad_norm": 0.36398807168006897,
      "learning_rate": 5.733215605911849e-06,
      "loss": 0.0135,
      "step": 1315080
    },
    {
      "epoch": 2.152189993650295,
      "grad_norm": 0.4773907959461212,
      "learning_rate": 5.7331497136983325e-06,
      "loss": 0.0123,
      "step": 1315100
    },
    {
      "epoch": 2.1522227240889484,
      "grad_norm": 0.14465145766735077,
      "learning_rate": 5.733083821484816e-06,
      "loss": 0.0119,
      "step": 1315120
    },
    {
      "epoch": 2.1522554545276016,
      "grad_norm": 0.5147281289100647,
      "learning_rate": 5.733017929271298e-06,
      "loss": 0.0214,
      "step": 1315140
    },
    {
      "epoch": 2.1522881849662547,
      "grad_norm": 0.7683839797973633,
      "learning_rate": 5.732952037057782e-06,
      "loss": 0.0148,
      "step": 1315160
    },
    {
      "epoch": 2.1523209154049083,
      "grad_norm": 0.5720378160476685,
      "learning_rate": 5.732886144844264e-06,
      "loss": 0.0167,
      "step": 1315180
    },
    {
      "epoch": 2.1523536458435615,
      "grad_norm": 0.7457721829414368,
      "learning_rate": 5.732820252630747e-06,
      "loss": 0.0126,
      "step": 1315200
    },
    {
      "epoch": 2.152386376282215,
      "grad_norm": 0.9482258558273315,
      "learning_rate": 5.73275436041723e-06,
      "loss": 0.0136,
      "step": 1315220
    },
    {
      "epoch": 2.1524191067208682,
      "grad_norm": 0.3069721460342407,
      "learning_rate": 5.732688468203713e-06,
      "loss": 0.0169,
      "step": 1315240
    },
    {
      "epoch": 2.152451837159522,
      "grad_norm": 0.4225168228149414,
      "learning_rate": 5.732622575990195e-06,
      "loss": 0.014,
      "step": 1315260
    },
    {
      "epoch": 2.152484567598175,
      "grad_norm": 0.18910247087478638,
      "learning_rate": 5.732556683776679e-06,
      "loss": 0.019,
      "step": 1315280
    },
    {
      "epoch": 2.152517298036828,
      "grad_norm": 0.31287717819213867,
      "learning_rate": 5.732490791563161e-06,
      "loss": 0.0105,
      "step": 1315300
    },
    {
      "epoch": 2.1525500284754817,
      "grad_norm": 0.1873173862695694,
      "learning_rate": 5.732424899349644e-06,
      "loss": 0.0114,
      "step": 1315320
    },
    {
      "epoch": 2.152582758914135,
      "grad_norm": 0.20823028683662415,
      "learning_rate": 5.732359007136127e-06,
      "loss": 0.0201,
      "step": 1315340
    },
    {
      "epoch": 2.1526154893527885,
      "grad_norm": 0.45717307925224304,
      "learning_rate": 5.73229311492261e-06,
      "loss": 0.0154,
      "step": 1315360
    },
    {
      "epoch": 2.1526482197914416,
      "grad_norm": 0.07272004336118698,
      "learning_rate": 5.732227222709093e-06,
      "loss": 0.0115,
      "step": 1315380
    },
    {
      "epoch": 2.1526809502300948,
      "grad_norm": 0.16113312542438507,
      "learning_rate": 5.732161330495576e-06,
      "loss": 0.0152,
      "step": 1315400
    },
    {
      "epoch": 2.1527136806687484,
      "grad_norm": 1.3241522312164307,
      "learning_rate": 5.732095438282058e-06,
      "loss": 0.0141,
      "step": 1315420
    },
    {
      "epoch": 2.1527464111074015,
      "grad_norm": 0.24806560575962067,
      "learning_rate": 5.732029546068542e-06,
      "loss": 0.0238,
      "step": 1315440
    },
    {
      "epoch": 2.152779141546055,
      "grad_norm": 0.4515254497528076,
      "learning_rate": 5.731963653855025e-06,
      "loss": 0.0206,
      "step": 1315460
    },
    {
      "epoch": 2.1528118719847082,
      "grad_norm": 0.671228289604187,
      "learning_rate": 5.731897761641507e-06,
      "loss": 0.0169,
      "step": 1315480
    },
    {
      "epoch": 2.152844602423362,
      "grad_norm": 0.1080220490694046,
      "learning_rate": 5.731831869427991e-06,
      "loss": 0.0229,
      "step": 1315500
    },
    {
      "epoch": 2.152877332862015,
      "grad_norm": 0.5864347219467163,
      "learning_rate": 5.731765977214473e-06,
      "loss": 0.0162,
      "step": 1315520
    },
    {
      "epoch": 2.152910063300668,
      "grad_norm": 0.8966747522354126,
      "learning_rate": 5.731700085000956e-06,
      "loss": 0.0107,
      "step": 1315540
    },
    {
      "epoch": 2.1529427937393217,
      "grad_norm": 0.2639678716659546,
      "learning_rate": 5.731634192787439e-06,
      "loss": 0.0136,
      "step": 1315560
    },
    {
      "epoch": 2.152975524177975,
      "grad_norm": 0.8101807832717896,
      "learning_rate": 5.731568300573922e-06,
      "loss": 0.0158,
      "step": 1315580
    },
    {
      "epoch": 2.1530082546166285,
      "grad_norm": 0.26183202862739563,
      "learning_rate": 5.731502408360404e-06,
      "loss": 0.0115,
      "step": 1315600
    },
    {
      "epoch": 2.1530409850552816,
      "grad_norm": 0.2450380027294159,
      "learning_rate": 5.731436516146888e-06,
      "loss": 0.0119,
      "step": 1315620
    },
    {
      "epoch": 2.1530737154939352,
      "grad_norm": 1.1849771738052368,
      "learning_rate": 5.73137062393337e-06,
      "loss": 0.0149,
      "step": 1315640
    },
    {
      "epoch": 2.1531064459325884,
      "grad_norm": 0.6794313192367554,
      "learning_rate": 5.7313047317198535e-06,
      "loss": 0.0183,
      "step": 1315660
    },
    {
      "epoch": 2.1531391763712415,
      "grad_norm": 1.8563095331192017,
      "learning_rate": 5.731238839506335e-06,
      "loss": 0.012,
      "step": 1315680
    },
    {
      "epoch": 2.153171906809895,
      "grad_norm": 0.08271761238574982,
      "learning_rate": 5.731172947292819e-06,
      "loss": 0.0184,
      "step": 1315700
    },
    {
      "epoch": 2.1532046372485483,
      "grad_norm": 0.16980735957622528,
      "learning_rate": 5.731107055079302e-06,
      "loss": 0.01,
      "step": 1315720
    },
    {
      "epoch": 2.153237367687202,
      "grad_norm": 0.49045515060424805,
      "learning_rate": 5.7310411628657844e-06,
      "loss": 0.019,
      "step": 1315740
    },
    {
      "epoch": 2.153270098125855,
      "grad_norm": 0.4204515218734741,
      "learning_rate": 5.730975270652267e-06,
      "loss": 0.0126,
      "step": 1315760
    },
    {
      "epoch": 2.1533028285645086,
      "grad_norm": 0.31366655230522156,
      "learning_rate": 5.730909378438751e-06,
      "loss": 0.0115,
      "step": 1315780
    },
    {
      "epoch": 2.1533355590031618,
      "grad_norm": 5.41359281539917,
      "learning_rate": 5.7308434862252335e-06,
      "loss": 0.0187,
      "step": 1315800
    },
    {
      "epoch": 2.153368289441815,
      "grad_norm": 0.323190301656723,
      "learning_rate": 5.730777594011716e-06,
      "loss": 0.0146,
      "step": 1315820
    },
    {
      "epoch": 2.1534010198804685,
      "grad_norm": 0.34111106395721436,
      "learning_rate": 5.7307117017982e-06,
      "loss": 0.0202,
      "step": 1315840
    },
    {
      "epoch": 2.1534337503191217,
      "grad_norm": 0.23942485451698303,
      "learning_rate": 5.730645809584682e-06,
      "loss": 0.0151,
      "step": 1315860
    },
    {
      "epoch": 2.1534664807577752,
      "grad_norm": 0.39512720704078674,
      "learning_rate": 5.730579917371165e-06,
      "loss": 0.0178,
      "step": 1315880
    },
    {
      "epoch": 2.1534992111964284,
      "grad_norm": 0.37294772267341614,
      "learning_rate": 5.730514025157647e-06,
      "loss": 0.0164,
      "step": 1315900
    },
    {
      "epoch": 2.153531941635082,
      "grad_norm": 0.16150563955307007,
      "learning_rate": 5.730448132944131e-06,
      "loss": 0.0188,
      "step": 1315920
    },
    {
      "epoch": 2.153564672073735,
      "grad_norm": 1.6339246034622192,
      "learning_rate": 5.7303822407306135e-06,
      "loss": 0.0156,
      "step": 1315940
    },
    {
      "epoch": 2.1535974025123883,
      "grad_norm": 0.4379226565361023,
      "learning_rate": 5.730316348517096e-06,
      "loss": 0.0175,
      "step": 1315960
    },
    {
      "epoch": 2.153630132951042,
      "grad_norm": 0.33093956112861633,
      "learning_rate": 5.730250456303579e-06,
      "loss": 0.018,
      "step": 1315980
    },
    {
      "epoch": 2.153662863389695,
      "grad_norm": 0.8049787878990173,
      "learning_rate": 5.730184564090063e-06,
      "loss": 0.0119,
      "step": 1316000
    },
    {
      "epoch": 2.1536955938283486,
      "grad_norm": 0.9565327167510986,
      "learning_rate": 5.7301186718765445e-06,
      "loss": 0.0187,
      "step": 1316020
    },
    {
      "epoch": 2.153728324267002,
      "grad_norm": 0.39986151456832886,
      "learning_rate": 5.730052779663028e-06,
      "loss": 0.0174,
      "step": 1316040
    },
    {
      "epoch": 2.1537610547056554,
      "grad_norm": 0.7148399353027344,
      "learning_rate": 5.72998688744951e-06,
      "loss": 0.0138,
      "step": 1316060
    },
    {
      "epoch": 2.1537937851443085,
      "grad_norm": 0.21069423854351044,
      "learning_rate": 5.7299209952359936e-06,
      "loss": 0.0204,
      "step": 1316080
    },
    {
      "epoch": 2.1538265155829617,
      "grad_norm": 1.1451573371887207,
      "learning_rate": 5.7298551030224754e-06,
      "loss": 0.017,
      "step": 1316100
    },
    {
      "epoch": 2.1538592460216153,
      "grad_norm": 0.08908919245004654,
      "learning_rate": 5.729789210808959e-06,
      "loss": 0.0105,
      "step": 1316120
    },
    {
      "epoch": 2.1538919764602684,
      "grad_norm": 0.7688488960266113,
      "learning_rate": 5.729723318595442e-06,
      "loss": 0.018,
      "step": 1316140
    },
    {
      "epoch": 2.153924706898922,
      "grad_norm": 0.16310332715511322,
      "learning_rate": 5.7296574263819245e-06,
      "loss": 0.0196,
      "step": 1316160
    },
    {
      "epoch": 2.153957437337575,
      "grad_norm": 0.5116111040115356,
      "learning_rate": 5.729591534168408e-06,
      "loss": 0.0135,
      "step": 1316180
    },
    {
      "epoch": 2.1539901677762288,
      "grad_norm": 0.4580164849758148,
      "learning_rate": 5.729525641954891e-06,
      "loss": 0.0162,
      "step": 1316200
    },
    {
      "epoch": 2.154022898214882,
      "grad_norm": 0.5244160294532776,
      "learning_rate": 5.729459749741374e-06,
      "loss": 0.02,
      "step": 1316220
    },
    {
      "epoch": 2.154055628653535,
      "grad_norm": 0.17294882237911224,
      "learning_rate": 5.729393857527856e-06,
      "loss": 0.0132,
      "step": 1316240
    },
    {
      "epoch": 2.1540883590921887,
      "grad_norm": 0.29357174038887024,
      "learning_rate": 5.72932796531434e-06,
      "loss": 0.0127,
      "step": 1316260
    },
    {
      "epoch": 2.154121089530842,
      "grad_norm": 0.8346022963523865,
      "learning_rate": 5.729262073100822e-06,
      "loss": 0.0184,
      "step": 1316280
    },
    {
      "epoch": 2.1541538199694954,
      "grad_norm": 0.5501466989517212,
      "learning_rate": 5.729196180887305e-06,
      "loss": 0.011,
      "step": 1316300
    },
    {
      "epoch": 2.1541865504081485,
      "grad_norm": 3.482935667037964,
      "learning_rate": 5.729130288673787e-06,
      "loss": 0.0155,
      "step": 1316320
    },
    {
      "epoch": 2.154219280846802,
      "grad_norm": 0.5696696043014526,
      "learning_rate": 5.729064396460271e-06,
      "loss": 0.0203,
      "step": 1316340
    },
    {
      "epoch": 2.1542520112854553,
      "grad_norm": 0.2436477690935135,
      "learning_rate": 5.728998504246754e-06,
      "loss": 0.018,
      "step": 1316360
    },
    {
      "epoch": 2.1542847417241084,
      "grad_norm": 0.6656197309494019,
      "learning_rate": 5.728932612033236e-06,
      "loss": 0.0193,
      "step": 1316380
    },
    {
      "epoch": 2.154317472162762,
      "grad_norm": 0.08727932721376419,
      "learning_rate": 5.728866719819719e-06,
      "loss": 0.0155,
      "step": 1316400
    },
    {
      "epoch": 2.154350202601415,
      "grad_norm": 0.24039778113365173,
      "learning_rate": 5.728800827606203e-06,
      "loss": 0.0152,
      "step": 1316420
    },
    {
      "epoch": 2.154382933040069,
      "grad_norm": 0.14562125504016876,
      "learning_rate": 5.7287349353926846e-06,
      "loss": 0.0191,
      "step": 1316440
    },
    {
      "epoch": 2.154415663478722,
      "grad_norm": 0.39423054456710815,
      "learning_rate": 5.728669043179168e-06,
      "loss": 0.0171,
      "step": 1316460
    },
    {
      "epoch": 2.154448393917375,
      "grad_norm": 0.6423882842063904,
      "learning_rate": 5.72860315096565e-06,
      "loss": 0.0196,
      "step": 1316480
    },
    {
      "epoch": 2.1544811243560287,
      "grad_norm": 0.3682512640953064,
      "learning_rate": 5.728537258752134e-06,
      "loss": 0.0212,
      "step": 1316500
    },
    {
      "epoch": 2.154513854794682,
      "grad_norm": 0.2616015374660492,
      "learning_rate": 5.728471366538617e-06,
      "loss": 0.0158,
      "step": 1316520
    },
    {
      "epoch": 2.1545465852333354,
      "grad_norm": 0.39872604608535767,
      "learning_rate": 5.728405474325099e-06,
      "loss": 0.0118,
      "step": 1316540
    },
    {
      "epoch": 2.1545793156719886,
      "grad_norm": 0.6646620035171509,
      "learning_rate": 5.728339582111583e-06,
      "loss": 0.0144,
      "step": 1316560
    },
    {
      "epoch": 2.154612046110642,
      "grad_norm": 0.22931577265262604,
      "learning_rate": 5.7282736898980654e-06,
      "loss": 0.0206,
      "step": 1316580
    },
    {
      "epoch": 2.1546447765492953,
      "grad_norm": 0.32686847448349,
      "learning_rate": 5.728207797684548e-06,
      "loss": 0.0162,
      "step": 1316600
    },
    {
      "epoch": 2.1546775069879485,
      "grad_norm": 1.0535531044006348,
      "learning_rate": 5.728141905471031e-06,
      "loss": 0.0162,
      "step": 1316620
    },
    {
      "epoch": 2.154710237426602,
      "grad_norm": 0.9209883213043213,
      "learning_rate": 5.7280760132575145e-06,
      "loss": 0.0134,
      "step": 1316640
    },
    {
      "epoch": 2.154742967865255,
      "grad_norm": 0.19018477201461792,
      "learning_rate": 5.728010121043996e-06,
      "loss": 0.0204,
      "step": 1316660
    },
    {
      "epoch": 2.154775698303909,
      "grad_norm": 0.2098332643508911,
      "learning_rate": 5.72794422883048e-06,
      "loss": 0.0134,
      "step": 1316680
    },
    {
      "epoch": 2.154808428742562,
      "grad_norm": 0.507344126701355,
      "learning_rate": 5.727878336616962e-06,
      "loss": 0.0084,
      "step": 1316700
    },
    {
      "epoch": 2.1548411591812155,
      "grad_norm": 0.7173441052436829,
      "learning_rate": 5.7278124444034455e-06,
      "loss": 0.0215,
      "step": 1316720
    },
    {
      "epoch": 2.1548738896198687,
      "grad_norm": 0.17954197525978088,
      "learning_rate": 5.727746552189928e-06,
      "loss": 0.0167,
      "step": 1316740
    },
    {
      "epoch": 2.154906620058522,
      "grad_norm": 0.2998899519443512,
      "learning_rate": 5.727680659976411e-06,
      "loss": 0.0204,
      "step": 1316760
    },
    {
      "epoch": 2.1549393504971754,
      "grad_norm": 0.31526991724967957,
      "learning_rate": 5.727614767762894e-06,
      "loss": 0.013,
      "step": 1316780
    },
    {
      "epoch": 2.1549720809358286,
      "grad_norm": 0.14759215712547302,
      "learning_rate": 5.727548875549377e-06,
      "loss": 0.0119,
      "step": 1316800
    },
    {
      "epoch": 2.155004811374482,
      "grad_norm": 0.30436912178993225,
      "learning_rate": 5.727482983335859e-06,
      "loss": 0.0155,
      "step": 1316820
    },
    {
      "epoch": 2.1550375418131353,
      "grad_norm": 0.29759639501571655,
      "learning_rate": 5.727417091122343e-06,
      "loss": 0.0175,
      "step": 1316840
    },
    {
      "epoch": 2.1550702722517885,
      "grad_norm": 0.2114570438861847,
      "learning_rate": 5.727351198908826e-06,
      "loss": 0.0158,
      "step": 1316860
    },
    {
      "epoch": 2.155103002690442,
      "grad_norm": 0.37447163462638855,
      "learning_rate": 5.727285306695308e-06,
      "loss": 0.0216,
      "step": 1316880
    },
    {
      "epoch": 2.1551357331290952,
      "grad_norm": 0.5275938510894775,
      "learning_rate": 5.727219414481792e-06,
      "loss": 0.0178,
      "step": 1316900
    },
    {
      "epoch": 2.155168463567749,
      "grad_norm": 0.18465553224086761,
      "learning_rate": 5.727153522268274e-06,
      "loss": 0.0202,
      "step": 1316920
    },
    {
      "epoch": 2.155201194006402,
      "grad_norm": 1.0033010244369507,
      "learning_rate": 5.727087630054757e-06,
      "loss": 0.0203,
      "step": 1316940
    },
    {
      "epoch": 2.1552339244450556,
      "grad_norm": 0.46061697602272034,
      "learning_rate": 5.727021737841239e-06,
      "loss": 0.0142,
      "step": 1316960
    },
    {
      "epoch": 2.1552666548837087,
      "grad_norm": 0.5143731236457825,
      "learning_rate": 5.726955845627723e-06,
      "loss": 0.0131,
      "step": 1316980
    },
    {
      "epoch": 2.155299385322362,
      "grad_norm": 0.8255656361579895,
      "learning_rate": 5.7268899534142055e-06,
      "loss": 0.0123,
      "step": 1317000
    },
    {
      "epoch": 2.1553321157610155,
      "grad_norm": 0.7111091613769531,
      "learning_rate": 5.726824061200689e-06,
      "loss": 0.0178,
      "step": 1317020
    },
    {
      "epoch": 2.1553648461996686,
      "grad_norm": 0.19485023617744446,
      "learning_rate": 5.726758168987171e-06,
      "loss": 0.0127,
      "step": 1317040
    },
    {
      "epoch": 2.155397576638322,
      "grad_norm": 0.3248186409473419,
      "learning_rate": 5.7266922767736546e-06,
      "loss": 0.0141,
      "step": 1317060
    },
    {
      "epoch": 2.1554303070769754,
      "grad_norm": 1.1846258640289307,
      "learning_rate": 5.7266263845601365e-06,
      "loss": 0.0227,
      "step": 1317080
    },
    {
      "epoch": 2.155463037515629,
      "grad_norm": 0.14368022978305817,
      "learning_rate": 5.72656049234662e-06,
      "loss": 0.0143,
      "step": 1317100
    },
    {
      "epoch": 2.155495767954282,
      "grad_norm": 1.0883171558380127,
      "learning_rate": 5.726494600133102e-06,
      "loss": 0.0194,
      "step": 1317120
    },
    {
      "epoch": 2.1555284983929353,
      "grad_norm": 0.5974018573760986,
      "learning_rate": 5.7264287079195855e-06,
      "loss": 0.0256,
      "step": 1317140
    },
    {
      "epoch": 2.155561228831589,
      "grad_norm": 0.7958795428276062,
      "learning_rate": 5.726362815706068e-06,
      "loss": 0.0155,
      "step": 1317160
    },
    {
      "epoch": 2.155593959270242,
      "grad_norm": 0.6957255005836487,
      "learning_rate": 5.726296923492551e-06,
      "loss": 0.0119,
      "step": 1317180
    },
    {
      "epoch": 2.1556266897088956,
      "grad_norm": 0.25358277559280396,
      "learning_rate": 5.726231031279034e-06,
      "loss": 0.0226,
      "step": 1317200
    },
    {
      "epoch": 2.1556594201475487,
      "grad_norm": 0.36413058638572693,
      "learning_rate": 5.726165139065517e-06,
      "loss": 0.0139,
      "step": 1317220
    },
    {
      "epoch": 2.1556921505862023,
      "grad_norm": 0.3858657479286194,
      "learning_rate": 5.726099246852e-06,
      "loss": 0.013,
      "step": 1317240
    },
    {
      "epoch": 2.1557248810248555,
      "grad_norm": 1.0594440698623657,
      "learning_rate": 5.726033354638483e-06,
      "loss": 0.0148,
      "step": 1317260
    },
    {
      "epoch": 2.1557576114635086,
      "grad_norm": 0.4550602436065674,
      "learning_rate": 5.725967462424966e-06,
      "loss": 0.0151,
      "step": 1317280
    },
    {
      "epoch": 2.1557903419021622,
      "grad_norm": 0.5455581545829773,
      "learning_rate": 5.725901570211448e-06,
      "loss": 0.0233,
      "step": 1317300
    },
    {
      "epoch": 2.1558230723408154,
      "grad_norm": 0.3740648031234741,
      "learning_rate": 5.725835677997932e-06,
      "loss": 0.015,
      "step": 1317320
    },
    {
      "epoch": 2.155855802779469,
      "grad_norm": 0.228313148021698,
      "learning_rate": 5.725769785784414e-06,
      "loss": 0.0145,
      "step": 1317340
    },
    {
      "epoch": 2.155888533218122,
      "grad_norm": 0.428803414106369,
      "learning_rate": 5.725703893570897e-06,
      "loss": 0.02,
      "step": 1317360
    },
    {
      "epoch": 2.1559212636567757,
      "grad_norm": 0.0803631991147995,
      "learning_rate": 5.72563800135738e-06,
      "loss": 0.0138,
      "step": 1317380
    },
    {
      "epoch": 2.155953994095429,
      "grad_norm": 0.321656733751297,
      "learning_rate": 5.725572109143863e-06,
      "loss": 0.013,
      "step": 1317400
    },
    {
      "epoch": 2.155986724534082,
      "grad_norm": 0.19688139855861664,
      "learning_rate": 5.725506216930346e-06,
      "loss": 0.0146,
      "step": 1317420
    },
    {
      "epoch": 2.1560194549727356,
      "grad_norm": 0.1440482884645462,
      "learning_rate": 5.725440324716829e-06,
      "loss": 0.0178,
      "step": 1317440
    },
    {
      "epoch": 2.1560521854113888,
      "grad_norm": 0.20962953567504883,
      "learning_rate": 5.725374432503311e-06,
      "loss": 0.0228,
      "step": 1317460
    },
    {
      "epoch": 2.1560849158500424,
      "grad_norm": 0.5272400975227356,
      "learning_rate": 5.725308540289795e-06,
      "loss": 0.0221,
      "step": 1317480
    },
    {
      "epoch": 2.1561176462886955,
      "grad_norm": 0.60736083984375,
      "learning_rate": 5.7252426480762765e-06,
      "loss": 0.0183,
      "step": 1317500
    },
    {
      "epoch": 2.156150376727349,
      "grad_norm": 0.42248865962028503,
      "learning_rate": 5.72517675586276e-06,
      "loss": 0.0211,
      "step": 1317520
    },
    {
      "epoch": 2.1561831071660023,
      "grad_norm": 0.6443073749542236,
      "learning_rate": 5.725110863649243e-06,
      "loss": 0.0164,
      "step": 1317540
    },
    {
      "epoch": 2.1562158376046554,
      "grad_norm": 0.39246851205825806,
      "learning_rate": 5.725044971435726e-06,
      "loss": 0.0174,
      "step": 1317560
    },
    {
      "epoch": 2.156248568043309,
      "grad_norm": 0.7208799719810486,
      "learning_rate": 5.724979079222209e-06,
      "loss": 0.016,
      "step": 1317580
    },
    {
      "epoch": 2.156281298481962,
      "grad_norm": 0.6487494111061096,
      "learning_rate": 5.724913187008692e-06,
      "loss": 0.0131,
      "step": 1317600
    },
    {
      "epoch": 2.1563140289206157,
      "grad_norm": 0.9395598769187927,
      "learning_rate": 5.724847294795175e-06,
      "loss": 0.0166,
      "step": 1317620
    },
    {
      "epoch": 2.156346759359269,
      "grad_norm": 0.5570892691612244,
      "learning_rate": 5.724781402581657e-06,
      "loss": 0.0174,
      "step": 1317640
    },
    {
      "epoch": 2.1563794897979225,
      "grad_norm": 0.2753811478614807,
      "learning_rate": 5.724715510368141e-06,
      "loss": 0.0232,
      "step": 1317660
    },
    {
      "epoch": 2.1564122202365756,
      "grad_norm": 0.15486808121204376,
      "learning_rate": 5.724649618154623e-06,
      "loss": 0.0143,
      "step": 1317680
    },
    {
      "epoch": 2.156444950675229,
      "grad_norm": 0.0367133654654026,
      "learning_rate": 5.7245837259411065e-06,
      "loss": 0.0242,
      "step": 1317700
    },
    {
      "epoch": 2.1564776811138824,
      "grad_norm": 0.3429866433143616,
      "learning_rate": 5.724517833727588e-06,
      "loss": 0.0089,
      "step": 1317720
    },
    {
      "epoch": 2.1565104115525355,
      "grad_norm": 0.17090396583080292,
      "learning_rate": 5.724451941514072e-06,
      "loss": 0.0163,
      "step": 1317740
    },
    {
      "epoch": 2.156543141991189,
      "grad_norm": 0.30921411514282227,
      "learning_rate": 5.724386049300555e-06,
      "loss": 0.0134,
      "step": 1317760
    },
    {
      "epoch": 2.1565758724298423,
      "grad_norm": 0.5213375687599182,
      "learning_rate": 5.7243201570870374e-06,
      "loss": 0.0163,
      "step": 1317780
    },
    {
      "epoch": 2.156608602868496,
      "grad_norm": 0.3800210654735565,
      "learning_rate": 5.72425426487352e-06,
      "loss": 0.0116,
      "step": 1317800
    },
    {
      "epoch": 2.156641333307149,
      "grad_norm": 0.24266858398914337,
      "learning_rate": 5.724188372660004e-06,
      "loss": 0.0202,
      "step": 1317820
    },
    {
      "epoch": 2.156674063745802,
      "grad_norm": 0.18407408893108368,
      "learning_rate": 5.724122480446486e-06,
      "loss": 0.0185,
      "step": 1317840
    },
    {
      "epoch": 2.1567067941844558,
      "grad_norm": 0.5041178464889526,
      "learning_rate": 5.724056588232969e-06,
      "loss": 0.0145,
      "step": 1317860
    },
    {
      "epoch": 2.156739524623109,
      "grad_norm": 0.29452717304229736,
      "learning_rate": 5.723990696019451e-06,
      "loss": 0.0158,
      "step": 1317880
    },
    {
      "epoch": 2.1567722550617625,
      "grad_norm": 0.4899645447731018,
      "learning_rate": 5.723924803805935e-06,
      "loss": 0.0158,
      "step": 1317900
    },
    {
      "epoch": 2.1568049855004157,
      "grad_norm": 0.5116767287254333,
      "learning_rate": 5.723858911592418e-06,
      "loss": 0.0199,
      "step": 1317920
    },
    {
      "epoch": 2.156837715939069,
      "grad_norm": 0.16105042397975922,
      "learning_rate": 5.7237930193789e-06,
      "loss": 0.0129,
      "step": 1317940
    },
    {
      "epoch": 2.1568704463777224,
      "grad_norm": 0.16549143195152283,
      "learning_rate": 5.723727127165384e-06,
      "loss": 0.0141,
      "step": 1317960
    },
    {
      "epoch": 2.1569031768163756,
      "grad_norm": 0.5671926140785217,
      "learning_rate": 5.723661234951866e-06,
      "loss": 0.0176,
      "step": 1317980
    },
    {
      "epoch": 2.156935907255029,
      "grad_norm": 0.2552752196788788,
      "learning_rate": 5.723595342738349e-06,
      "loss": 0.0143,
      "step": 1318000
    },
    {
      "epoch": 2.1569686376936823,
      "grad_norm": 0.30588576197624207,
      "learning_rate": 5.723529450524832e-06,
      "loss": 0.0184,
      "step": 1318020
    },
    {
      "epoch": 2.157001368132336,
      "grad_norm": 0.6440169811248779,
      "learning_rate": 5.723463558311315e-06,
      "loss": 0.0151,
      "step": 1318040
    },
    {
      "epoch": 2.157034098570989,
      "grad_norm": 0.35179561376571655,
      "learning_rate": 5.7233976660977975e-06,
      "loss": 0.015,
      "step": 1318060
    },
    {
      "epoch": 2.157066829009642,
      "grad_norm": 0.8731173872947693,
      "learning_rate": 5.723331773884281e-06,
      "loss": 0.0176,
      "step": 1318080
    },
    {
      "epoch": 2.157099559448296,
      "grad_norm": 2.5430524349212646,
      "learning_rate": 5.723265881670763e-06,
      "loss": 0.0085,
      "step": 1318100
    },
    {
      "epoch": 2.157132289886949,
      "grad_norm": 0.39803266525268555,
      "learning_rate": 5.7231999894572466e-06,
      "loss": 0.0199,
      "step": 1318120
    },
    {
      "epoch": 2.1571650203256025,
      "grad_norm": 0.320589154958725,
      "learning_rate": 5.7231340972437285e-06,
      "loss": 0.0134,
      "step": 1318140
    },
    {
      "epoch": 2.1571977507642557,
      "grad_norm": 0.3549056053161621,
      "learning_rate": 5.723068205030212e-06,
      "loss": 0.0153,
      "step": 1318160
    },
    {
      "epoch": 2.1572304812029093,
      "grad_norm": 0.40336066484451294,
      "learning_rate": 5.723002312816695e-06,
      "loss": 0.0155,
      "step": 1318180
    },
    {
      "epoch": 2.1572632116415624,
      "grad_norm": 0.31805869936943054,
      "learning_rate": 5.7229364206031775e-06,
      "loss": 0.015,
      "step": 1318200
    },
    {
      "epoch": 2.1572959420802156,
      "grad_norm": 0.12170039117336273,
      "learning_rate": 5.72287052838966e-06,
      "loss": 0.0112,
      "step": 1318220
    },
    {
      "epoch": 2.157328672518869,
      "grad_norm": 0.11061513423919678,
      "learning_rate": 5.722804636176144e-06,
      "loss": 0.0104,
      "step": 1318240
    },
    {
      "epoch": 2.1573614029575223,
      "grad_norm": 0.3093593120574951,
      "learning_rate": 5.722738743962626e-06,
      "loss": 0.0146,
      "step": 1318260
    },
    {
      "epoch": 2.157394133396176,
      "grad_norm": 0.6490226984024048,
      "learning_rate": 5.722672851749109e-06,
      "loss": 0.0105,
      "step": 1318280
    },
    {
      "epoch": 2.157426863834829,
      "grad_norm": 0.30621984601020813,
      "learning_rate": 5.722606959535593e-06,
      "loss": 0.015,
      "step": 1318300
    },
    {
      "epoch": 2.157459594273482,
      "grad_norm": 0.16856937110424042,
      "learning_rate": 5.722541067322075e-06,
      "loss": 0.0151,
      "step": 1318320
    },
    {
      "epoch": 2.157492324712136,
      "grad_norm": 0.5125588774681091,
      "learning_rate": 5.722475175108558e-06,
      "loss": 0.0219,
      "step": 1318340
    },
    {
      "epoch": 2.157525055150789,
      "grad_norm": 0.3926694691181183,
      "learning_rate": 5.72240928289504e-06,
      "loss": 0.0194,
      "step": 1318360
    },
    {
      "epoch": 2.1575577855894426,
      "grad_norm": 0.049572303891181946,
      "learning_rate": 5.722343390681524e-06,
      "loss": 0.0113,
      "step": 1318380
    },
    {
      "epoch": 2.1575905160280957,
      "grad_norm": 0.38185879588127136,
      "learning_rate": 5.722277498468007e-06,
      "loss": 0.0129,
      "step": 1318400
    },
    {
      "epoch": 2.1576232464667493,
      "grad_norm": 0.25526922941207886,
      "learning_rate": 5.722211606254489e-06,
      "loss": 0.0183,
      "step": 1318420
    },
    {
      "epoch": 2.1576559769054025,
      "grad_norm": 0.6374501585960388,
      "learning_rate": 5.722145714040972e-06,
      "loss": 0.0108,
      "step": 1318440
    },
    {
      "epoch": 2.1576887073440556,
      "grad_norm": 0.4317544400691986,
      "learning_rate": 5.722079821827456e-06,
      "loss": 0.0165,
      "step": 1318460
    },
    {
      "epoch": 2.157721437782709,
      "grad_norm": 0.1890413612127304,
      "learning_rate": 5.7220139296139376e-06,
      "loss": 0.0142,
      "step": 1318480
    },
    {
      "epoch": 2.1577541682213623,
      "grad_norm": 1.3175833225250244,
      "learning_rate": 5.721948037400421e-06,
      "loss": 0.0136,
      "step": 1318500
    },
    {
      "epoch": 2.157786898660016,
      "grad_norm": 0.16595637798309326,
      "learning_rate": 5.721882145186903e-06,
      "loss": 0.0102,
      "step": 1318520
    },
    {
      "epoch": 2.157819629098669,
      "grad_norm": 0.3091585338115692,
      "learning_rate": 5.721816252973387e-06,
      "loss": 0.0106,
      "step": 1318540
    },
    {
      "epoch": 2.1578523595373227,
      "grad_norm": 0.34002625942230225,
      "learning_rate": 5.721750360759869e-06,
      "loss": 0.0161,
      "step": 1318560
    },
    {
      "epoch": 2.157885089975976,
      "grad_norm": 0.18558059632778168,
      "learning_rate": 5.721684468546352e-06,
      "loss": 0.0123,
      "step": 1318580
    },
    {
      "epoch": 2.157917820414629,
      "grad_norm": 0.5264418125152588,
      "learning_rate": 5.721618576332835e-06,
      "loss": 0.0169,
      "step": 1318600
    },
    {
      "epoch": 2.1579505508532826,
      "grad_norm": 0.33040374517440796,
      "learning_rate": 5.7215526841193184e-06,
      "loss": 0.0211,
      "step": 1318620
    },
    {
      "epoch": 2.1579832812919357,
      "grad_norm": 0.20226623117923737,
      "learning_rate": 5.721486791905801e-06,
      "loss": 0.0136,
      "step": 1318640
    },
    {
      "epoch": 2.1580160117305893,
      "grad_norm": 0.40123775601387024,
      "learning_rate": 5.721420899692284e-06,
      "loss": 0.0154,
      "step": 1318660
    },
    {
      "epoch": 2.1580487421692425,
      "grad_norm": 0.7588226795196533,
      "learning_rate": 5.7213550074787675e-06,
      "loss": 0.0169,
      "step": 1318680
    },
    {
      "epoch": 2.158081472607896,
      "grad_norm": 0.7421074509620667,
      "learning_rate": 5.721289115265249e-06,
      "loss": 0.0147,
      "step": 1318700
    },
    {
      "epoch": 2.158114203046549,
      "grad_norm": 0.2723652124404907,
      "learning_rate": 5.721223223051733e-06,
      "loss": 0.019,
      "step": 1318720
    },
    {
      "epoch": 2.1581469334852024,
      "grad_norm": 0.03837917745113373,
      "learning_rate": 5.721157330838215e-06,
      "loss": 0.0184,
      "step": 1318740
    },
    {
      "epoch": 2.158179663923856,
      "grad_norm": 0.2165706604719162,
      "learning_rate": 5.7210914386246985e-06,
      "loss": 0.0166,
      "step": 1318760
    },
    {
      "epoch": 2.158212394362509,
      "grad_norm": 0.6719838380813599,
      "learning_rate": 5.721025546411181e-06,
      "loss": 0.016,
      "step": 1318780
    },
    {
      "epoch": 2.1582451248011627,
      "grad_norm": 1.3315547704696655,
      "learning_rate": 5.720959654197664e-06,
      "loss": 0.0105,
      "step": 1318800
    },
    {
      "epoch": 2.158277855239816,
      "grad_norm": 0.36820393800735474,
      "learning_rate": 5.720893761984147e-06,
      "loss": 0.0121,
      "step": 1318820
    },
    {
      "epoch": 2.1583105856784695,
      "grad_norm": 0.3656826615333557,
      "learning_rate": 5.72082786977063e-06,
      "loss": 0.0113,
      "step": 1318840
    },
    {
      "epoch": 2.1583433161171226,
      "grad_norm": 1.0787866115570068,
      "learning_rate": 5.720761977557112e-06,
      "loss": 0.0196,
      "step": 1318860
    },
    {
      "epoch": 2.1583760465557758,
      "grad_norm": 0.6131641268730164,
      "learning_rate": 5.720696085343596e-06,
      "loss": 0.0113,
      "step": 1318880
    },
    {
      "epoch": 2.1584087769944293,
      "grad_norm": 1.73527193069458,
      "learning_rate": 5.720630193130078e-06,
      "loss": 0.0137,
      "step": 1318900
    },
    {
      "epoch": 2.1584415074330825,
      "grad_norm": 0.4048265218734741,
      "learning_rate": 5.720564300916561e-06,
      "loss": 0.0166,
      "step": 1318920
    },
    {
      "epoch": 2.158474237871736,
      "grad_norm": 0.21574309468269348,
      "learning_rate": 5.720498408703043e-06,
      "loss": 0.0232,
      "step": 1318940
    },
    {
      "epoch": 2.1585069683103892,
      "grad_norm": 0.9578713774681091,
      "learning_rate": 5.720432516489527e-06,
      "loss": 0.0178,
      "step": 1318960
    },
    {
      "epoch": 2.158539698749043,
      "grad_norm": 0.3779797852039337,
      "learning_rate": 5.72036662427601e-06,
      "loss": 0.0114,
      "step": 1318980
    },
    {
      "epoch": 2.158572429187696,
      "grad_norm": 0.24848578870296478,
      "learning_rate": 5.720300732062492e-06,
      "loss": 0.0117,
      "step": 1319000
    },
    {
      "epoch": 2.158605159626349,
      "grad_norm": 0.6192147731781006,
      "learning_rate": 5.720234839848976e-06,
      "loss": 0.0161,
      "step": 1319020
    },
    {
      "epoch": 2.1586378900650027,
      "grad_norm": 0.10556136071681976,
      "learning_rate": 5.7201689476354585e-06,
      "loss": 0.0181,
      "step": 1319040
    },
    {
      "epoch": 2.158670620503656,
      "grad_norm": 1.34808349609375,
      "learning_rate": 5.720103055421941e-06,
      "loss": 0.0142,
      "step": 1319060
    },
    {
      "epoch": 2.1587033509423095,
      "grad_norm": 0.3173694312572479,
      "learning_rate": 5.720037163208424e-06,
      "loss": 0.0185,
      "step": 1319080
    },
    {
      "epoch": 2.1587360813809626,
      "grad_norm": 0.20374508202075958,
      "learning_rate": 5.719971270994908e-06,
      "loss": 0.0095,
      "step": 1319100
    },
    {
      "epoch": 2.158768811819616,
      "grad_norm": 0.2623792290687561,
      "learning_rate": 5.7199053787813895e-06,
      "loss": 0.0142,
      "step": 1319120
    },
    {
      "epoch": 2.1588015422582694,
      "grad_norm": 1.9322352409362793,
      "learning_rate": 5.719839486567873e-06,
      "loss": 0.0174,
      "step": 1319140
    },
    {
      "epoch": 2.1588342726969225,
      "grad_norm": 0.12254270166158676,
      "learning_rate": 5.719773594354355e-06,
      "loss": 0.0164,
      "step": 1319160
    },
    {
      "epoch": 2.158867003135576,
      "grad_norm": 0.8906382918357849,
      "learning_rate": 5.7197077021408385e-06,
      "loss": 0.0164,
      "step": 1319180
    },
    {
      "epoch": 2.1588997335742293,
      "grad_norm": 0.09078753739595413,
      "learning_rate": 5.719641809927321e-06,
      "loss": 0.0193,
      "step": 1319200
    },
    {
      "epoch": 2.158932464012883,
      "grad_norm": 1.0385453701019287,
      "learning_rate": 5.719575917713804e-06,
      "loss": 0.0185,
      "step": 1319220
    },
    {
      "epoch": 2.158965194451536,
      "grad_norm": 0.3113798201084137,
      "learning_rate": 5.719510025500287e-06,
      "loss": 0.0142,
      "step": 1319240
    },
    {
      "epoch": 2.1589979248901896,
      "grad_norm": 0.6732360124588013,
      "learning_rate": 5.71944413328677e-06,
      "loss": 0.0123,
      "step": 1319260
    },
    {
      "epoch": 2.1590306553288428,
      "grad_norm": 0.18036240339279175,
      "learning_rate": 5.719378241073252e-06,
      "loss": 0.0162,
      "step": 1319280
    },
    {
      "epoch": 2.159063385767496,
      "grad_norm": 0.7557802796363831,
      "learning_rate": 5.719312348859736e-06,
      "loss": 0.0157,
      "step": 1319300
    },
    {
      "epoch": 2.1590961162061495,
      "grad_norm": 0.22126798331737518,
      "learning_rate": 5.719246456646219e-06,
      "loss": 0.0172,
      "step": 1319320
    },
    {
      "epoch": 2.1591288466448026,
      "grad_norm": 0.6194804906845093,
      "learning_rate": 5.719180564432701e-06,
      "loss": 0.0154,
      "step": 1319340
    },
    {
      "epoch": 2.1591615770834562,
      "grad_norm": 0.19789117574691772,
      "learning_rate": 5.719114672219185e-06,
      "loss": 0.0144,
      "step": 1319360
    },
    {
      "epoch": 2.1591943075221094,
      "grad_norm": 0.6295045614242554,
      "learning_rate": 5.719048780005667e-06,
      "loss": 0.0129,
      "step": 1319380
    },
    {
      "epoch": 2.159227037960763,
      "grad_norm": 0.15467385947704315,
      "learning_rate": 5.71898288779215e-06,
      "loss": 0.0128,
      "step": 1319400
    },
    {
      "epoch": 2.159259768399416,
      "grad_norm": 0.40685099363327026,
      "learning_rate": 5.718916995578633e-06,
      "loss": 0.0161,
      "step": 1319420
    },
    {
      "epoch": 2.1592924988380693,
      "grad_norm": 0.547914981842041,
      "learning_rate": 5.718851103365116e-06,
      "loss": 0.0165,
      "step": 1319440
    },
    {
      "epoch": 2.159325229276723,
      "grad_norm": 0.31622886657714844,
      "learning_rate": 5.718785211151599e-06,
      "loss": 0.0177,
      "step": 1319460
    },
    {
      "epoch": 2.159357959715376,
      "grad_norm": 0.5317757725715637,
      "learning_rate": 5.718719318938082e-06,
      "loss": 0.02,
      "step": 1319480
    },
    {
      "epoch": 2.1593906901540296,
      "grad_norm": 1.127048134803772,
      "learning_rate": 5.718653426724564e-06,
      "loss": 0.0164,
      "step": 1319500
    },
    {
      "epoch": 2.1594234205926828,
      "grad_norm": 0.2738083302974701,
      "learning_rate": 5.718587534511048e-06,
      "loss": 0.0134,
      "step": 1319520
    },
    {
      "epoch": 2.159456151031336,
      "grad_norm": 0.13759534060955048,
      "learning_rate": 5.7185216422975296e-06,
      "loss": 0.0156,
      "step": 1319540
    },
    {
      "epoch": 2.1594888814699895,
      "grad_norm": 0.2913632392883301,
      "learning_rate": 5.718455750084013e-06,
      "loss": 0.0188,
      "step": 1319560
    },
    {
      "epoch": 2.1595216119086427,
      "grad_norm": 0.23195047676563263,
      "learning_rate": 5.718389857870496e-06,
      "loss": 0.0163,
      "step": 1319580
    },
    {
      "epoch": 2.1595543423472963,
      "grad_norm": 0.4912042021751404,
      "learning_rate": 5.718323965656979e-06,
      "loss": 0.0095,
      "step": 1319600
    },
    {
      "epoch": 2.1595870727859494,
      "grad_norm": 0.24934732913970947,
      "learning_rate": 5.718258073443461e-06,
      "loss": 0.0158,
      "step": 1319620
    },
    {
      "epoch": 2.159619803224603,
      "grad_norm": 0.47200101613998413,
      "learning_rate": 5.718192181229945e-06,
      "loss": 0.0153,
      "step": 1319640
    },
    {
      "epoch": 2.159652533663256,
      "grad_norm": 0.27366775274276733,
      "learning_rate": 5.718126289016427e-06,
      "loss": 0.018,
      "step": 1319660
    },
    {
      "epoch": 2.1596852641019093,
      "grad_norm": 0.2915855646133423,
      "learning_rate": 5.7180603968029104e-06,
      "loss": 0.0154,
      "step": 1319680
    },
    {
      "epoch": 2.159717994540563,
      "grad_norm": 0.46377095580101013,
      "learning_rate": 5.717994504589394e-06,
      "loss": 0.0141,
      "step": 1319700
    },
    {
      "epoch": 2.159750724979216,
      "grad_norm": 0.22810034453868866,
      "learning_rate": 5.717928612375876e-06,
      "loss": 0.011,
      "step": 1319720
    },
    {
      "epoch": 2.1597834554178696,
      "grad_norm": 0.2886365056037903,
      "learning_rate": 5.7178627201623595e-06,
      "loss": 0.0146,
      "step": 1319740
    },
    {
      "epoch": 2.159816185856523,
      "grad_norm": 0.33779120445251465,
      "learning_rate": 5.717796827948841e-06,
      "loss": 0.0205,
      "step": 1319760
    },
    {
      "epoch": 2.1598489162951764,
      "grad_norm": 0.4062507450580597,
      "learning_rate": 5.717730935735325e-06,
      "loss": 0.0164,
      "step": 1319780
    },
    {
      "epoch": 2.1598816467338295,
      "grad_norm": 0.7381793260574341,
      "learning_rate": 5.717665043521808e-06,
      "loss": 0.0155,
      "step": 1319800
    },
    {
      "epoch": 2.1599143771724827,
      "grad_norm": 0.44212374091148376,
      "learning_rate": 5.7175991513082904e-06,
      "loss": 0.0127,
      "step": 1319820
    },
    {
      "epoch": 2.1599471076111363,
      "grad_norm": 0.2897246778011322,
      "learning_rate": 5.717533259094773e-06,
      "loss": 0.0166,
      "step": 1319840
    },
    {
      "epoch": 2.1599798380497894,
      "grad_norm": 1.148760437965393,
      "learning_rate": 5.717467366881257e-06,
      "loss": 0.0175,
      "step": 1319860
    },
    {
      "epoch": 2.160012568488443,
      "grad_norm": 0.2016209214925766,
      "learning_rate": 5.717401474667739e-06,
      "loss": 0.0109,
      "step": 1319880
    },
    {
      "epoch": 2.160045298927096,
      "grad_norm": 0.8590571880340576,
      "learning_rate": 5.717335582454222e-06,
      "loss": 0.0235,
      "step": 1319900
    },
    {
      "epoch": 2.1600780293657493,
      "grad_norm": 0.4039938747882843,
      "learning_rate": 5.717269690240704e-06,
      "loss": 0.0157,
      "step": 1319920
    },
    {
      "epoch": 2.160110759804403,
      "grad_norm": 0.29887500405311584,
      "learning_rate": 5.717203798027188e-06,
      "loss": 0.0155,
      "step": 1319940
    },
    {
      "epoch": 2.160143490243056,
      "grad_norm": 0.752366304397583,
      "learning_rate": 5.71713790581367e-06,
      "loss": 0.0131,
      "step": 1319960
    },
    {
      "epoch": 2.1601762206817097,
      "grad_norm": 0.7934715151786804,
      "learning_rate": 5.717072013600153e-06,
      "loss": 0.0156,
      "step": 1319980
    },
    {
      "epoch": 2.160208951120363,
      "grad_norm": 0.29846638441085815,
      "learning_rate": 5.717006121386636e-06,
      "loss": 0.011,
      "step": 1320000
    },
    {
      "epoch": 2.1602416815590164,
      "grad_norm": 0.6371685862541199,
      "learning_rate": 5.716940229173119e-06,
      "loss": 0.0165,
      "step": 1320020
    },
    {
      "epoch": 2.1602744119976696,
      "grad_norm": 0.5960047841072083,
      "learning_rate": 5.716874336959602e-06,
      "loss": 0.0191,
      "step": 1320040
    },
    {
      "epoch": 2.1603071424363227,
      "grad_norm": 0.8648347854614258,
      "learning_rate": 5.716808444746085e-06,
      "loss": 0.0144,
      "step": 1320060
    },
    {
      "epoch": 2.1603398728749763,
      "grad_norm": 0.37482210993766785,
      "learning_rate": 5.716742552532568e-06,
      "loss": 0.0145,
      "step": 1320080
    },
    {
      "epoch": 2.1603726033136295,
      "grad_norm": 0.4012235999107361,
      "learning_rate": 5.7166766603190505e-06,
      "loss": 0.0144,
      "step": 1320100
    },
    {
      "epoch": 2.160405333752283,
      "grad_norm": 0.19683146476745605,
      "learning_rate": 5.716610768105534e-06,
      "loss": 0.0159,
      "step": 1320120
    },
    {
      "epoch": 2.160438064190936,
      "grad_norm": 0.3398295044898987,
      "learning_rate": 5.716544875892016e-06,
      "loss": 0.0151,
      "step": 1320140
    },
    {
      "epoch": 2.16047079462959,
      "grad_norm": 0.17673462629318237,
      "learning_rate": 5.7164789836784996e-06,
      "loss": 0.0141,
      "step": 1320160
    },
    {
      "epoch": 2.160503525068243,
      "grad_norm": 0.5593520402908325,
      "learning_rate": 5.7164130914649815e-06,
      "loss": 0.0175,
      "step": 1320180
    },
    {
      "epoch": 2.160536255506896,
      "grad_norm": 0.11101070791482925,
      "learning_rate": 5.716347199251465e-06,
      "loss": 0.0127,
      "step": 1320200
    },
    {
      "epoch": 2.1605689859455497,
      "grad_norm": 0.475846529006958,
      "learning_rate": 5.716281307037948e-06,
      "loss": 0.0118,
      "step": 1320220
    },
    {
      "epoch": 2.160601716384203,
      "grad_norm": 0.6096836924552917,
      "learning_rate": 5.7162154148244305e-06,
      "loss": 0.0149,
      "step": 1320240
    },
    {
      "epoch": 2.1606344468228564,
      "grad_norm": 0.17855346202850342,
      "learning_rate": 5.716149522610913e-06,
      "loss": 0.0187,
      "step": 1320260
    },
    {
      "epoch": 2.1606671772615096,
      "grad_norm": 0.27425071597099304,
      "learning_rate": 5.716083630397397e-06,
      "loss": 0.0182,
      "step": 1320280
    },
    {
      "epoch": 2.160699907700163,
      "grad_norm": 1.1211774349212646,
      "learning_rate": 5.716017738183879e-06,
      "loss": 0.015,
      "step": 1320300
    },
    {
      "epoch": 2.1607326381388163,
      "grad_norm": 0.27413007616996765,
      "learning_rate": 5.715951845970362e-06,
      "loss": 0.0122,
      "step": 1320320
    },
    {
      "epoch": 2.1607653685774695,
      "grad_norm": 0.37035897374153137,
      "learning_rate": 5.715885953756844e-06,
      "loss": 0.0161,
      "step": 1320340
    },
    {
      "epoch": 2.160798099016123,
      "grad_norm": 0.3930203914642334,
      "learning_rate": 5.715820061543328e-06,
      "loss": 0.0106,
      "step": 1320360
    },
    {
      "epoch": 2.1608308294547762,
      "grad_norm": 0.651012659072876,
      "learning_rate": 5.715754169329811e-06,
      "loss": 0.0172,
      "step": 1320380
    },
    {
      "epoch": 2.16086355989343,
      "grad_norm": 0.7447363138198853,
      "learning_rate": 5.715688277116293e-06,
      "loss": 0.02,
      "step": 1320400
    },
    {
      "epoch": 2.160896290332083,
      "grad_norm": 0.5738345384597778,
      "learning_rate": 5.715622384902777e-06,
      "loss": 0.0177,
      "step": 1320420
    },
    {
      "epoch": 2.1609290207707366,
      "grad_norm": 0.2953193187713623,
      "learning_rate": 5.71555649268926e-06,
      "loss": 0.0095,
      "step": 1320440
    },
    {
      "epoch": 2.1609617512093897,
      "grad_norm": 0.3410923182964325,
      "learning_rate": 5.715490600475742e-06,
      "loss": 0.0235,
      "step": 1320460
    },
    {
      "epoch": 2.160994481648043,
      "grad_norm": 0.6749030947685242,
      "learning_rate": 5.715424708262225e-06,
      "loss": 0.0172,
      "step": 1320480
    },
    {
      "epoch": 2.1610272120866965,
      "grad_norm": 0.22222915291786194,
      "learning_rate": 5.715358816048709e-06,
      "loss": 0.0172,
      "step": 1320500
    },
    {
      "epoch": 2.1610599425253496,
      "grad_norm": 0.33977413177490234,
      "learning_rate": 5.7152929238351906e-06,
      "loss": 0.0158,
      "step": 1320520
    },
    {
      "epoch": 2.161092672964003,
      "grad_norm": 1.3844983577728271,
      "learning_rate": 5.715227031621674e-06,
      "loss": 0.0138,
      "step": 1320540
    },
    {
      "epoch": 2.1611254034026564,
      "grad_norm": 0.253070592880249,
      "learning_rate": 5.715161139408156e-06,
      "loss": 0.0099,
      "step": 1320560
    },
    {
      "epoch": 2.16115813384131,
      "grad_norm": 0.32279494404792786,
      "learning_rate": 5.71509524719464e-06,
      "loss": 0.0133,
      "step": 1320580
    },
    {
      "epoch": 2.161190864279963,
      "grad_norm": 0.2468893676996231,
      "learning_rate": 5.715029354981122e-06,
      "loss": 0.0179,
      "step": 1320600
    },
    {
      "epoch": 2.1612235947186162,
      "grad_norm": 0.2099705934524536,
      "learning_rate": 5.714963462767605e-06,
      "loss": 0.0103,
      "step": 1320620
    },
    {
      "epoch": 2.16125632515727,
      "grad_norm": 0.32518133521080017,
      "learning_rate": 5.714897570554088e-06,
      "loss": 0.0179,
      "step": 1320640
    },
    {
      "epoch": 2.161289055595923,
      "grad_norm": 0.4274117648601532,
      "learning_rate": 5.7148316783405714e-06,
      "loss": 0.0157,
      "step": 1320660
    },
    {
      "epoch": 2.1613217860345766,
      "grad_norm": 0.4265035390853882,
      "learning_rate": 5.714765786127053e-06,
      "loss": 0.0208,
      "step": 1320680
    },
    {
      "epoch": 2.1613545164732297,
      "grad_norm": 0.4584127366542816,
      "learning_rate": 5.714699893913537e-06,
      "loss": 0.0145,
      "step": 1320700
    },
    {
      "epoch": 2.1613872469118833,
      "grad_norm": 0.3651898503303528,
      "learning_rate": 5.714634001700019e-06,
      "loss": 0.018,
      "step": 1320720
    },
    {
      "epoch": 2.1614199773505365,
      "grad_norm": 0.1991012841463089,
      "learning_rate": 5.714568109486502e-06,
      "loss": 0.0154,
      "step": 1320740
    },
    {
      "epoch": 2.1614527077891896,
      "grad_norm": 0.4944406747817993,
      "learning_rate": 5.714502217272986e-06,
      "loss": 0.0093,
      "step": 1320760
    },
    {
      "epoch": 2.1614854382278432,
      "grad_norm": 0.6265043020248413,
      "learning_rate": 5.714436325059468e-06,
      "loss": 0.0273,
      "step": 1320780
    },
    {
      "epoch": 2.1615181686664964,
      "grad_norm": 0.4624302387237549,
      "learning_rate": 5.7143704328459515e-06,
      "loss": 0.0229,
      "step": 1320800
    },
    {
      "epoch": 2.16155089910515,
      "grad_norm": 0.18675562739372253,
      "learning_rate": 5.714304540632433e-06,
      "loss": 0.012,
      "step": 1320820
    },
    {
      "epoch": 2.161583629543803,
      "grad_norm": 0.1255921721458435,
      "learning_rate": 5.714238648418917e-06,
      "loss": 0.0123,
      "step": 1320840
    },
    {
      "epoch": 2.1616163599824567,
      "grad_norm": 0.8755189776420593,
      "learning_rate": 5.7141727562054e-06,
      "loss": 0.0206,
      "step": 1320860
    },
    {
      "epoch": 2.16164909042111,
      "grad_norm": 1.1098718643188477,
      "learning_rate": 5.714106863991883e-06,
      "loss": 0.0112,
      "step": 1320880
    },
    {
      "epoch": 2.161681820859763,
      "grad_norm": 0.668857991695404,
      "learning_rate": 5.714040971778365e-06,
      "loss": 0.0159,
      "step": 1320900
    },
    {
      "epoch": 2.1617145512984166,
      "grad_norm": 0.745074450969696,
      "learning_rate": 5.713975079564849e-06,
      "loss": 0.0144,
      "step": 1320920
    },
    {
      "epoch": 2.1617472817370698,
      "grad_norm": 1.0338801145553589,
      "learning_rate": 5.713909187351331e-06,
      "loss": 0.0149,
      "step": 1320940
    },
    {
      "epoch": 2.1617800121757234,
      "grad_norm": 0.8137932419776917,
      "learning_rate": 5.713843295137814e-06,
      "loss": 0.0214,
      "step": 1320960
    },
    {
      "epoch": 2.1618127426143765,
      "grad_norm": 0.17925497889518738,
      "learning_rate": 5.713777402924296e-06,
      "loss": 0.0148,
      "step": 1320980
    },
    {
      "epoch": 2.1618454730530297,
      "grad_norm": 0.203740194439888,
      "learning_rate": 5.71371151071078e-06,
      "loss": 0.0137,
      "step": 1321000
    },
    {
      "epoch": 2.1618782034916832,
      "grad_norm": 1.0263781547546387,
      "learning_rate": 5.7136456184972625e-06,
      "loss": 0.0134,
      "step": 1321020
    },
    {
      "epoch": 2.1619109339303364,
      "grad_norm": 0.14851804077625275,
      "learning_rate": 5.713579726283745e-06,
      "loss": 0.0152,
      "step": 1321040
    },
    {
      "epoch": 2.16194366436899,
      "grad_norm": 0.5936050415039062,
      "learning_rate": 5.713513834070228e-06,
      "loss": 0.0145,
      "step": 1321060
    },
    {
      "epoch": 2.161976394807643,
      "grad_norm": 0.6851836442947388,
      "learning_rate": 5.7134479418567115e-06,
      "loss": 0.0187,
      "step": 1321080
    },
    {
      "epoch": 2.1620091252462967,
      "grad_norm": 0.4074803292751312,
      "learning_rate": 5.713382049643194e-06,
      "loss": 0.0147,
      "step": 1321100
    },
    {
      "epoch": 2.16204185568495,
      "grad_norm": 0.47697991132736206,
      "learning_rate": 5.713316157429677e-06,
      "loss": 0.0146,
      "step": 1321120
    },
    {
      "epoch": 2.162074586123603,
      "grad_norm": 1.0514247417449951,
      "learning_rate": 5.713250265216161e-06,
      "loss": 0.0155,
      "step": 1321140
    },
    {
      "epoch": 2.1621073165622566,
      "grad_norm": 0.119422048330307,
      "learning_rate": 5.7131843730026425e-06,
      "loss": 0.0149,
      "step": 1321160
    },
    {
      "epoch": 2.16214004700091,
      "grad_norm": 0.17299504578113556,
      "learning_rate": 5.713118480789126e-06,
      "loss": 0.0152,
      "step": 1321180
    },
    {
      "epoch": 2.1621727774395634,
      "grad_norm": 0.679943323135376,
      "learning_rate": 5.713052588575608e-06,
      "loss": 0.023,
      "step": 1321200
    },
    {
      "epoch": 2.1622055078782165,
      "grad_norm": 0.21631130576133728,
      "learning_rate": 5.7129866963620915e-06,
      "loss": 0.0188,
      "step": 1321220
    },
    {
      "epoch": 2.16223823831687,
      "grad_norm": 0.1978319138288498,
      "learning_rate": 5.712920804148574e-06,
      "loss": 0.0164,
      "step": 1321240
    },
    {
      "epoch": 2.1622709687555233,
      "grad_norm": 0.7573733329772949,
      "learning_rate": 5.712854911935057e-06,
      "loss": 0.0199,
      "step": 1321260
    },
    {
      "epoch": 2.1623036991941764,
      "grad_norm": 1.4803699254989624,
      "learning_rate": 5.71278901972154e-06,
      "loss": 0.0144,
      "step": 1321280
    },
    {
      "epoch": 2.16233642963283,
      "grad_norm": 0.12420814484357834,
      "learning_rate": 5.712723127508023e-06,
      "loss": 0.0158,
      "step": 1321300
    },
    {
      "epoch": 2.162369160071483,
      "grad_norm": 0.1244373470544815,
      "learning_rate": 5.712657235294505e-06,
      "loss": 0.0179,
      "step": 1321320
    },
    {
      "epoch": 2.1624018905101368,
      "grad_norm": 0.18893694877624512,
      "learning_rate": 5.712591343080989e-06,
      "loss": 0.0097,
      "step": 1321340
    },
    {
      "epoch": 2.16243462094879,
      "grad_norm": 0.10422658920288086,
      "learning_rate": 5.712525450867471e-06,
      "loss": 0.0193,
      "step": 1321360
    },
    {
      "epoch": 2.162467351387443,
      "grad_norm": 0.3286350667476654,
      "learning_rate": 5.712459558653954e-06,
      "loss": 0.0176,
      "step": 1321380
    },
    {
      "epoch": 2.1625000818260967,
      "grad_norm": 0.6377371549606323,
      "learning_rate": 5.712393666440437e-06,
      "loss": 0.0122,
      "step": 1321400
    },
    {
      "epoch": 2.16253281226475,
      "grad_norm": 0.41737836599349976,
      "learning_rate": 5.71232777422692e-06,
      "loss": 0.0141,
      "step": 1321420
    },
    {
      "epoch": 2.1625655427034034,
      "grad_norm": 0.1355935037136078,
      "learning_rate": 5.712261882013403e-06,
      "loss": 0.0165,
      "step": 1321440
    },
    {
      "epoch": 2.1625982731420565,
      "grad_norm": 0.0612182691693306,
      "learning_rate": 5.712195989799886e-06,
      "loss": 0.0116,
      "step": 1321460
    },
    {
      "epoch": 2.16263100358071,
      "grad_norm": 0.2058006078004837,
      "learning_rate": 5.712130097586369e-06,
      "loss": 0.0206,
      "step": 1321480
    },
    {
      "epoch": 2.1626637340193633,
      "grad_norm": 0.5823566317558289,
      "learning_rate": 5.712064205372852e-06,
      "loss": 0.0217,
      "step": 1321500
    },
    {
      "epoch": 2.1626964644580164,
      "grad_norm": 0.5363790988922119,
      "learning_rate": 5.711998313159335e-06,
      "loss": 0.0099,
      "step": 1321520
    },
    {
      "epoch": 2.16272919489667,
      "grad_norm": 0.0863996371626854,
      "learning_rate": 5.711932420945817e-06,
      "loss": 0.0103,
      "step": 1321540
    },
    {
      "epoch": 2.162761925335323,
      "grad_norm": 0.2257714718580246,
      "learning_rate": 5.711866528732301e-06,
      "loss": 0.0163,
      "step": 1321560
    },
    {
      "epoch": 2.162794655773977,
      "grad_norm": 0.5379834771156311,
      "learning_rate": 5.7118006365187826e-06,
      "loss": 0.0169,
      "step": 1321580
    },
    {
      "epoch": 2.16282738621263,
      "grad_norm": 0.3016480803489685,
      "learning_rate": 5.711734744305266e-06,
      "loss": 0.0218,
      "step": 1321600
    },
    {
      "epoch": 2.1628601166512835,
      "grad_norm": 4.312579154968262,
      "learning_rate": 5.711668852091749e-06,
      "loss": 0.0197,
      "step": 1321620
    },
    {
      "epoch": 2.1628928470899367,
      "grad_norm": 0.34171658754348755,
      "learning_rate": 5.711602959878232e-06,
      "loss": 0.0178,
      "step": 1321640
    },
    {
      "epoch": 2.16292557752859,
      "grad_norm": 0.40109875798225403,
      "learning_rate": 5.711537067664714e-06,
      "loss": 0.0148,
      "step": 1321660
    },
    {
      "epoch": 2.1629583079672434,
      "grad_norm": 0.3837067484855652,
      "learning_rate": 5.711471175451198e-06,
      "loss": 0.0155,
      "step": 1321680
    },
    {
      "epoch": 2.1629910384058966,
      "grad_norm": 0.18704012036323547,
      "learning_rate": 5.71140528323768e-06,
      "loss": 0.0104,
      "step": 1321700
    },
    {
      "epoch": 2.16302376884455,
      "grad_norm": 0.2405172735452652,
      "learning_rate": 5.7113393910241634e-06,
      "loss": 0.0145,
      "step": 1321720
    },
    {
      "epoch": 2.1630564992832033,
      "grad_norm": 0.6681890487670898,
      "learning_rate": 5.711273498810645e-06,
      "loss": 0.0178,
      "step": 1321740
    },
    {
      "epoch": 2.163089229721857,
      "grad_norm": 0.5687565207481384,
      "learning_rate": 5.711207606597129e-06,
      "loss": 0.0167,
      "step": 1321760
    },
    {
      "epoch": 2.16312196016051,
      "grad_norm": 0.19227007031440735,
      "learning_rate": 5.711141714383611e-06,
      "loss": 0.0193,
      "step": 1321780
    },
    {
      "epoch": 2.163154690599163,
      "grad_norm": 0.1576787829399109,
      "learning_rate": 5.711075822170094e-06,
      "loss": 0.0135,
      "step": 1321800
    },
    {
      "epoch": 2.163187421037817,
      "grad_norm": 0.11427413672208786,
      "learning_rate": 5.711009929956578e-06,
      "loss": 0.0154,
      "step": 1321820
    },
    {
      "epoch": 2.16322015147647,
      "grad_norm": 0.9969318509101868,
      "learning_rate": 5.71094403774306e-06,
      "loss": 0.0134,
      "step": 1321840
    },
    {
      "epoch": 2.1632528819151235,
      "grad_norm": 0.39130353927612305,
      "learning_rate": 5.7108781455295435e-06,
      "loss": 0.0164,
      "step": 1321860
    },
    {
      "epoch": 2.1632856123537767,
      "grad_norm": 0.22033512592315674,
      "learning_rate": 5.710812253316026e-06,
      "loss": 0.0181,
      "step": 1321880
    },
    {
      "epoch": 2.1633183427924303,
      "grad_norm": 0.16698481142520905,
      "learning_rate": 5.710746361102509e-06,
      "loss": 0.021,
      "step": 1321900
    },
    {
      "epoch": 2.1633510732310834,
      "grad_norm": 0.18161016702651978,
      "learning_rate": 5.710680468888992e-06,
      "loss": 0.0163,
      "step": 1321920
    },
    {
      "epoch": 2.1633838036697366,
      "grad_norm": 0.5270965695381165,
      "learning_rate": 5.710614576675475e-06,
      "loss": 0.0159,
      "step": 1321940
    },
    {
      "epoch": 2.16341653410839,
      "grad_norm": 0.20476171374320984,
      "learning_rate": 5.710548684461957e-06,
      "loss": 0.025,
      "step": 1321960
    },
    {
      "epoch": 2.1634492645470433,
      "grad_norm": 0.4503622353076935,
      "learning_rate": 5.710482792248441e-06,
      "loss": 0.0194,
      "step": 1321980
    },
    {
      "epoch": 2.163481994985697,
      "grad_norm": 0.2154957354068756,
      "learning_rate": 5.710416900034923e-06,
      "loss": 0.0132,
      "step": 1322000
    },
    {
      "epoch": 2.16351472542435,
      "grad_norm": 0.5921198725700378,
      "learning_rate": 5.710351007821406e-06,
      "loss": 0.0166,
      "step": 1322020
    },
    {
      "epoch": 2.1635474558630037,
      "grad_norm": 0.3959764838218689,
      "learning_rate": 5.710285115607889e-06,
      "loss": 0.0132,
      "step": 1322040
    },
    {
      "epoch": 2.163580186301657,
      "grad_norm": 0.53889000415802,
      "learning_rate": 5.710219223394372e-06,
      "loss": 0.0207,
      "step": 1322060
    },
    {
      "epoch": 2.16361291674031,
      "grad_norm": 0.32047685980796814,
      "learning_rate": 5.7101533311808544e-06,
      "loss": 0.0162,
      "step": 1322080
    },
    {
      "epoch": 2.1636456471789636,
      "grad_norm": 0.9721362590789795,
      "learning_rate": 5.710087438967338e-06,
      "loss": 0.0171,
      "step": 1322100
    },
    {
      "epoch": 2.1636783776176167,
      "grad_norm": 0.3616172969341278,
      "learning_rate": 5.71002154675382e-06,
      "loss": 0.0235,
      "step": 1322120
    },
    {
      "epoch": 2.1637111080562703,
      "grad_norm": 0.4403129518032074,
      "learning_rate": 5.7099556545403035e-06,
      "loss": 0.0149,
      "step": 1322140
    },
    {
      "epoch": 2.1637438384949235,
      "grad_norm": 0.5329681634902954,
      "learning_rate": 5.709889762326787e-06,
      "loss": 0.0289,
      "step": 1322160
    },
    {
      "epoch": 2.163776568933577,
      "grad_norm": 0.3569294512271881,
      "learning_rate": 5.709823870113269e-06,
      "loss": 0.0148,
      "step": 1322180
    },
    {
      "epoch": 2.16380929937223,
      "grad_norm": 0.554063618183136,
      "learning_rate": 5.7097579778997526e-06,
      "loss": 0.0113,
      "step": 1322200
    },
    {
      "epoch": 2.1638420298108834,
      "grad_norm": 0.42785438895225525,
      "learning_rate": 5.7096920856862345e-06,
      "loss": 0.0183,
      "step": 1322220
    },
    {
      "epoch": 2.163874760249537,
      "grad_norm": 0.24869531393051147,
      "learning_rate": 5.709626193472718e-06,
      "loss": 0.0231,
      "step": 1322240
    },
    {
      "epoch": 2.16390749068819,
      "grad_norm": 0.06698722392320633,
      "learning_rate": 5.709560301259201e-06,
      "loss": 0.0124,
      "step": 1322260
    },
    {
      "epoch": 2.1639402211268437,
      "grad_norm": 0.1167905405163765,
      "learning_rate": 5.7094944090456835e-06,
      "loss": 0.0122,
      "step": 1322280
    },
    {
      "epoch": 2.163972951565497,
      "grad_norm": 0.6512224078178406,
      "learning_rate": 5.709428516832166e-06,
      "loss": 0.0201,
      "step": 1322300
    },
    {
      "epoch": 2.1640056820041504,
      "grad_norm": Infinity,
      "learning_rate": 5.70936262461865e-06,
      "loss": 0.0211,
      "step": 1322320
    },
    {
      "epoch": 2.1640384124428036,
      "grad_norm": 0.9181391000747681,
      "learning_rate": 5.709296732405132e-06,
      "loss": 0.0202,
      "step": 1322340
    },
    {
      "epoch": 2.1640711428814567,
      "grad_norm": 2.863297700881958,
      "learning_rate": 5.709230840191615e-06,
      "loss": 0.0232,
      "step": 1322360
    },
    {
      "epoch": 2.1641038733201103,
      "grad_norm": 0.43429845571517944,
      "learning_rate": 5.709164947978097e-06,
      "loss": 0.0156,
      "step": 1322380
    },
    {
      "epoch": 2.1641366037587635,
      "grad_norm": 0.5195708274841309,
      "learning_rate": 5.709099055764581e-06,
      "loss": 0.0134,
      "step": 1322400
    },
    {
      "epoch": 2.164169334197417,
      "grad_norm": 1.5186065435409546,
      "learning_rate": 5.7090331635510636e-06,
      "loss": 0.0166,
      "step": 1322420
    },
    {
      "epoch": 2.1642020646360702,
      "grad_norm": 0.06386669725179672,
      "learning_rate": 5.708967271337546e-06,
      "loss": 0.0226,
      "step": 1322440
    },
    {
      "epoch": 2.164234795074724,
      "grad_norm": 1.0709563493728638,
      "learning_rate": 5.708901379124029e-06,
      "loss": 0.0184,
      "step": 1322460
    },
    {
      "epoch": 2.164267525513377,
      "grad_norm": 0.1323433667421341,
      "learning_rate": 5.708835486910513e-06,
      "loss": 0.0138,
      "step": 1322480
    },
    {
      "epoch": 2.16430025595203,
      "grad_norm": 0.1295936554670334,
      "learning_rate": 5.708769594696995e-06,
      "loss": 0.0154,
      "step": 1322500
    },
    {
      "epoch": 2.1643329863906837,
      "grad_norm": 0.6340157389640808,
      "learning_rate": 5.708703702483478e-06,
      "loss": 0.0194,
      "step": 1322520
    },
    {
      "epoch": 2.164365716829337,
      "grad_norm": 5.455763339996338,
      "learning_rate": 5.708637810269962e-06,
      "loss": 0.0234,
      "step": 1322540
    },
    {
      "epoch": 2.1643984472679905,
      "grad_norm": 0.22809340059757233,
      "learning_rate": 5.708571918056444e-06,
      "loss": 0.0192,
      "step": 1322560
    },
    {
      "epoch": 2.1644311777066436,
      "grad_norm": 0.45773208141326904,
      "learning_rate": 5.708506025842927e-06,
      "loss": 0.0154,
      "step": 1322580
    },
    {
      "epoch": 2.1644639081452968,
      "grad_norm": 0.7276167869567871,
      "learning_rate": 5.708440133629409e-06,
      "loss": 0.0186,
      "step": 1322600
    },
    {
      "epoch": 2.1644966385839504,
      "grad_norm": 0.40035879611968994,
      "learning_rate": 5.708374241415893e-06,
      "loss": 0.0141,
      "step": 1322620
    },
    {
      "epoch": 2.1645293690226035,
      "grad_norm": 0.47070416808128357,
      "learning_rate": 5.708308349202375e-06,
      "loss": 0.0146,
      "step": 1322640
    },
    {
      "epoch": 2.164562099461257,
      "grad_norm": 0.5836315155029297,
      "learning_rate": 5.708242456988858e-06,
      "loss": 0.0172,
      "step": 1322660
    },
    {
      "epoch": 2.1645948298999103,
      "grad_norm": 0.48862355947494507,
      "learning_rate": 5.708176564775341e-06,
      "loss": 0.013,
      "step": 1322680
    },
    {
      "epoch": 2.164627560338564,
      "grad_norm": 0.2888578474521637,
      "learning_rate": 5.7081106725618245e-06,
      "loss": 0.0126,
      "step": 1322700
    },
    {
      "epoch": 2.164660290777217,
      "grad_norm": 0.4058047831058502,
      "learning_rate": 5.708044780348306e-06,
      "loss": 0.0116,
      "step": 1322720
    },
    {
      "epoch": 2.16469302121587,
      "grad_norm": 0.28967157006263733,
      "learning_rate": 5.70797888813479e-06,
      "loss": 0.02,
      "step": 1322740
    },
    {
      "epoch": 2.1647257516545237,
      "grad_norm": 0.13588422536849976,
      "learning_rate": 5.707912995921272e-06,
      "loss": 0.009,
      "step": 1322760
    },
    {
      "epoch": 2.164758482093177,
      "grad_norm": 0.1396559774875641,
      "learning_rate": 5.707847103707755e-06,
      "loss": 0.0144,
      "step": 1322780
    },
    {
      "epoch": 2.1647912125318305,
      "grad_norm": 0.8722813129425049,
      "learning_rate": 5.707781211494237e-06,
      "loss": 0.0131,
      "step": 1322800
    },
    {
      "epoch": 2.1648239429704836,
      "grad_norm": 0.25953400135040283,
      "learning_rate": 5.707715319280721e-06,
      "loss": 0.0155,
      "step": 1322820
    },
    {
      "epoch": 2.1648566734091372,
      "grad_norm": 0.7747437357902527,
      "learning_rate": 5.7076494270672045e-06,
      "loss": 0.0125,
      "step": 1322840
    },
    {
      "epoch": 2.1648894038477904,
      "grad_norm": 0.5042629241943359,
      "learning_rate": 5.707583534853686e-06,
      "loss": 0.0176,
      "step": 1322860
    },
    {
      "epoch": 2.1649221342864435,
      "grad_norm": 0.25571209192276,
      "learning_rate": 5.70751764264017e-06,
      "loss": 0.0165,
      "step": 1322880
    },
    {
      "epoch": 2.164954864725097,
      "grad_norm": 0.1658882051706314,
      "learning_rate": 5.707451750426653e-06,
      "loss": 0.0184,
      "step": 1322900
    },
    {
      "epoch": 2.1649875951637503,
      "grad_norm": 0.5733320713043213,
      "learning_rate": 5.7073858582131354e-06,
      "loss": 0.0105,
      "step": 1322920
    },
    {
      "epoch": 2.165020325602404,
      "grad_norm": 0.5425865650177002,
      "learning_rate": 5.707319965999618e-06,
      "loss": 0.0144,
      "step": 1322940
    },
    {
      "epoch": 2.165053056041057,
      "grad_norm": 0.20756487548351288,
      "learning_rate": 5.707254073786102e-06,
      "loss": 0.0178,
      "step": 1322960
    },
    {
      "epoch": 2.16508578647971,
      "grad_norm": 0.08782167732715607,
      "learning_rate": 5.707188181572584e-06,
      "loss": 0.0195,
      "step": 1322980
    },
    {
      "epoch": 2.1651185169183638,
      "grad_norm": 0.20335079729557037,
      "learning_rate": 5.707122289359067e-06,
      "loss": 0.0151,
      "step": 1323000
    },
    {
      "epoch": 2.165151247357017,
      "grad_norm": 1.0372164249420166,
      "learning_rate": 5.707056397145549e-06,
      "loss": 0.0195,
      "step": 1323020
    },
    {
      "epoch": 2.1651839777956705,
      "grad_norm": 0.35002437233924866,
      "learning_rate": 5.706990504932033e-06,
      "loss": 0.0082,
      "step": 1323040
    },
    {
      "epoch": 2.1652167082343237,
      "grad_norm": 1.1405376195907593,
      "learning_rate": 5.7069246127185155e-06,
      "loss": 0.0176,
      "step": 1323060
    },
    {
      "epoch": 2.1652494386729773,
      "grad_norm": 0.5609134435653687,
      "learning_rate": 5.706858720504998e-06,
      "loss": 0.0179,
      "step": 1323080
    },
    {
      "epoch": 2.1652821691116304,
      "grad_norm": 1.1294087171554565,
      "learning_rate": 5.706792828291481e-06,
      "loss": 0.0158,
      "step": 1323100
    },
    {
      "epoch": 2.1653148995502836,
      "grad_norm": 0.21627067029476166,
      "learning_rate": 5.7067269360779645e-06,
      "loss": 0.0199,
      "step": 1323120
    },
    {
      "epoch": 2.165347629988937,
      "grad_norm": 0.08928878605365753,
      "learning_rate": 5.706661043864446e-06,
      "loss": 0.0139,
      "step": 1323140
    },
    {
      "epoch": 2.1653803604275903,
      "grad_norm": 0.4003138244152069,
      "learning_rate": 5.70659515165093e-06,
      "loss": 0.0107,
      "step": 1323160
    },
    {
      "epoch": 2.165413090866244,
      "grad_norm": 0.11312645673751831,
      "learning_rate": 5.706529259437412e-06,
      "loss": 0.016,
      "step": 1323180
    },
    {
      "epoch": 2.165445821304897,
      "grad_norm": 0.33134642243385315,
      "learning_rate": 5.7064633672238955e-06,
      "loss": 0.0101,
      "step": 1323200
    },
    {
      "epoch": 2.1654785517435506,
      "grad_norm": 0.6945124268531799,
      "learning_rate": 5.706397475010379e-06,
      "loss": 0.0167,
      "step": 1323220
    },
    {
      "epoch": 2.165511282182204,
      "grad_norm": 0.2958563268184662,
      "learning_rate": 5.706331582796861e-06,
      "loss": 0.0173,
      "step": 1323240
    },
    {
      "epoch": 2.165544012620857,
      "grad_norm": 0.5034312009811401,
      "learning_rate": 5.7062656905833446e-06,
      "loss": 0.0156,
      "step": 1323260
    },
    {
      "epoch": 2.1655767430595105,
      "grad_norm": 0.655390739440918,
      "learning_rate": 5.706199798369827e-06,
      "loss": 0.0133,
      "step": 1323280
    },
    {
      "epoch": 2.1656094734981637,
      "grad_norm": 0.8213622570037842,
      "learning_rate": 5.70613390615631e-06,
      "loss": 0.0123,
      "step": 1323300
    },
    {
      "epoch": 2.1656422039368173,
      "grad_norm": 0.25897806882858276,
      "learning_rate": 5.706068013942793e-06,
      "loss": 0.0185,
      "step": 1323320
    },
    {
      "epoch": 2.1656749343754704,
      "grad_norm": 0.5782040357589722,
      "learning_rate": 5.706002121729276e-06,
      "loss": 0.0157,
      "step": 1323340
    },
    {
      "epoch": 2.165707664814124,
      "grad_norm": 1.299582839012146,
      "learning_rate": 5.705936229515758e-06,
      "loss": 0.0177,
      "step": 1323360
    },
    {
      "epoch": 2.165740395252777,
      "grad_norm": 0.27143558859825134,
      "learning_rate": 5.705870337302242e-06,
      "loss": 0.0146,
      "step": 1323380
    },
    {
      "epoch": 2.1657731256914303,
      "grad_norm": 0.6498801708221436,
      "learning_rate": 5.705804445088724e-06,
      "loss": 0.0136,
      "step": 1323400
    },
    {
      "epoch": 2.165805856130084,
      "grad_norm": 0.08742067217826843,
      "learning_rate": 5.705738552875207e-06,
      "loss": 0.0148,
      "step": 1323420
    },
    {
      "epoch": 2.165838586568737,
      "grad_norm": 0.3680802285671234,
      "learning_rate": 5.70567266066169e-06,
      "loss": 0.0132,
      "step": 1323440
    },
    {
      "epoch": 2.1658713170073907,
      "grad_norm": 0.43775445222854614,
      "learning_rate": 5.705606768448173e-06,
      "loss": 0.0082,
      "step": 1323460
    },
    {
      "epoch": 2.165904047446044,
      "grad_norm": 0.40791717171669006,
      "learning_rate": 5.7055408762346555e-06,
      "loss": 0.0134,
      "step": 1323480
    },
    {
      "epoch": 2.1659367778846974,
      "grad_norm": 0.3410939872264862,
      "learning_rate": 5.705474984021139e-06,
      "loss": 0.0206,
      "step": 1323500
    },
    {
      "epoch": 2.1659695083233506,
      "grad_norm": 0.9026713967323303,
      "learning_rate": 5.705409091807621e-06,
      "loss": 0.0148,
      "step": 1323520
    },
    {
      "epoch": 2.1660022387620037,
      "grad_norm": 0.5944965481758118,
      "learning_rate": 5.705343199594105e-06,
      "loss": 0.0147,
      "step": 1323540
    },
    {
      "epoch": 2.1660349692006573,
      "grad_norm": 0.2230096012353897,
      "learning_rate": 5.705277307380588e-06,
      "loss": 0.0164,
      "step": 1323560
    },
    {
      "epoch": 2.1660676996393105,
      "grad_norm": 0.23558883368968964,
      "learning_rate": 5.70521141516707e-06,
      "loss": 0.0139,
      "step": 1323580
    },
    {
      "epoch": 2.166100430077964,
      "grad_norm": 0.3358411490917206,
      "learning_rate": 5.705145522953554e-06,
      "loss": 0.0149,
      "step": 1323600
    },
    {
      "epoch": 2.166133160516617,
      "grad_norm": 0.2823491394519806,
      "learning_rate": 5.7050796307400356e-06,
      "loss": 0.015,
      "step": 1323620
    },
    {
      "epoch": 2.166165890955271,
      "grad_norm": 0.5057506561279297,
      "learning_rate": 5.705013738526519e-06,
      "loss": 0.012,
      "step": 1323640
    },
    {
      "epoch": 2.166198621393924,
      "grad_norm": 0.18653710186481476,
      "learning_rate": 5.704947846313002e-06,
      "loss": 0.0156,
      "step": 1323660
    },
    {
      "epoch": 2.166231351832577,
      "grad_norm": 0.2260182797908783,
      "learning_rate": 5.704881954099485e-06,
      "loss": 0.0065,
      "step": 1323680
    },
    {
      "epoch": 2.1662640822712307,
      "grad_norm": 0.6253498792648315,
      "learning_rate": 5.704816061885967e-06,
      "loss": 0.0173,
      "step": 1323700
    },
    {
      "epoch": 2.166296812709884,
      "grad_norm": 0.21225158870220184,
      "learning_rate": 5.704750169672451e-06,
      "loss": 0.0262,
      "step": 1323720
    },
    {
      "epoch": 2.1663295431485374,
      "grad_norm": 0.2795647084712982,
      "learning_rate": 5.704684277458933e-06,
      "loss": 0.0238,
      "step": 1323740
    },
    {
      "epoch": 2.1663622735871906,
      "grad_norm": 0.328828364610672,
      "learning_rate": 5.7046183852454164e-06,
      "loss": 0.0116,
      "step": 1323760
    },
    {
      "epoch": 2.166395004025844,
      "grad_norm": 0.48747745156288147,
      "learning_rate": 5.704552493031898e-06,
      "loss": 0.0172,
      "step": 1323780
    },
    {
      "epoch": 2.1664277344644973,
      "grad_norm": 0.2791498899459839,
      "learning_rate": 5.704486600818382e-06,
      "loss": 0.0143,
      "step": 1323800
    },
    {
      "epoch": 2.1664604649031505,
      "grad_norm": 0.568917989730835,
      "learning_rate": 5.704420708604864e-06,
      "loss": 0.0163,
      "step": 1323820
    },
    {
      "epoch": 2.166493195341804,
      "grad_norm": 0.5272648930549622,
      "learning_rate": 5.704354816391347e-06,
      "loss": 0.0174,
      "step": 1323840
    },
    {
      "epoch": 2.166525925780457,
      "grad_norm": 0.26096054911613464,
      "learning_rate": 5.70428892417783e-06,
      "loss": 0.0163,
      "step": 1323860
    },
    {
      "epoch": 2.166558656219111,
      "grad_norm": 0.6758603453636169,
      "learning_rate": 5.704223031964313e-06,
      "loss": 0.0192,
      "step": 1323880
    },
    {
      "epoch": 2.166591386657764,
      "grad_norm": 0.18043966591358185,
      "learning_rate": 5.7041571397507965e-06,
      "loss": 0.0151,
      "step": 1323900
    },
    {
      "epoch": 2.1666241170964176,
      "grad_norm": 0.8332268595695496,
      "learning_rate": 5.704091247537279e-06,
      "loss": 0.0168,
      "step": 1323920
    },
    {
      "epoch": 2.1666568475350707,
      "grad_norm": 0.7530092597007751,
      "learning_rate": 5.704025355323762e-06,
      "loss": 0.0228,
      "step": 1323940
    },
    {
      "epoch": 2.166689577973724,
      "grad_norm": 0.26892727613449097,
      "learning_rate": 5.703959463110245e-06,
      "loss": 0.0174,
      "step": 1323960
    },
    {
      "epoch": 2.1667223084123775,
      "grad_norm": 0.8895686864852905,
      "learning_rate": 5.703893570896728e-06,
      "loss": 0.0208,
      "step": 1323980
    },
    {
      "epoch": 2.1667550388510306,
      "grad_norm": 0.10329531878232956,
      "learning_rate": 5.70382767868321e-06,
      "loss": 0.0052,
      "step": 1324000
    },
    {
      "epoch": 2.166787769289684,
      "grad_norm": 0.6557782292366028,
      "learning_rate": 5.703761786469694e-06,
      "loss": 0.011,
      "step": 1324020
    },
    {
      "epoch": 2.1668204997283373,
      "grad_norm": 0.14835581183433533,
      "learning_rate": 5.703695894256176e-06,
      "loss": 0.0147,
      "step": 1324040
    },
    {
      "epoch": 2.1668532301669905,
      "grad_norm": 0.3175789713859558,
      "learning_rate": 5.703630002042659e-06,
      "loss": 0.0116,
      "step": 1324060
    },
    {
      "epoch": 2.166885960605644,
      "grad_norm": 0.23679597675800323,
      "learning_rate": 5.703564109829142e-06,
      "loss": 0.0116,
      "step": 1324080
    },
    {
      "epoch": 2.1669186910442972,
      "grad_norm": 0.16541306674480438,
      "learning_rate": 5.703498217615625e-06,
      "loss": 0.017,
      "step": 1324100
    },
    {
      "epoch": 2.166951421482951,
      "grad_norm": 0.7128258347511292,
      "learning_rate": 5.7034323254021074e-06,
      "loss": 0.0194,
      "step": 1324120
    },
    {
      "epoch": 2.166984151921604,
      "grad_norm": 0.25464534759521484,
      "learning_rate": 5.703366433188591e-06,
      "loss": 0.0098,
      "step": 1324140
    },
    {
      "epoch": 2.1670168823602576,
      "grad_norm": 0.06158164516091347,
      "learning_rate": 5.703300540975073e-06,
      "loss": 0.0144,
      "step": 1324160
    },
    {
      "epoch": 2.1670496127989107,
      "grad_norm": 0.21774883568286896,
      "learning_rate": 5.7032346487615565e-06,
      "loss": 0.013,
      "step": 1324180
    },
    {
      "epoch": 2.167082343237564,
      "grad_norm": 0.5789103507995605,
      "learning_rate": 5.703168756548038e-06,
      "loss": 0.0227,
      "step": 1324200
    },
    {
      "epoch": 2.1671150736762175,
      "grad_norm": 0.45193642377853394,
      "learning_rate": 5.703102864334522e-06,
      "loss": 0.0188,
      "step": 1324220
    },
    {
      "epoch": 2.1671478041148706,
      "grad_norm": 0.19247008860111237,
      "learning_rate": 5.703036972121005e-06,
      "loss": 0.0138,
      "step": 1324240
    },
    {
      "epoch": 2.167180534553524,
      "grad_norm": 0.18062222003936768,
      "learning_rate": 5.7029710799074875e-06,
      "loss": 0.0142,
      "step": 1324260
    },
    {
      "epoch": 2.1672132649921774,
      "grad_norm": 0.3020291328430176,
      "learning_rate": 5.702905187693971e-06,
      "loss": 0.0146,
      "step": 1324280
    },
    {
      "epoch": 2.167245995430831,
      "grad_norm": 0.06252359598875046,
      "learning_rate": 5.702839295480454e-06,
      "loss": 0.0209,
      "step": 1324300
    },
    {
      "epoch": 2.167278725869484,
      "grad_norm": 0.7918741703033447,
      "learning_rate": 5.7027734032669365e-06,
      "loss": 0.0193,
      "step": 1324320
    },
    {
      "epoch": 2.1673114563081373,
      "grad_norm": 0.39230436086654663,
      "learning_rate": 5.702707511053419e-06,
      "loss": 0.0147,
      "step": 1324340
    },
    {
      "epoch": 2.167344186746791,
      "grad_norm": 0.6161749362945557,
      "learning_rate": 5.702641618839903e-06,
      "loss": 0.0146,
      "step": 1324360
    },
    {
      "epoch": 2.167376917185444,
      "grad_norm": 0.20608842372894287,
      "learning_rate": 5.702575726626385e-06,
      "loss": 0.0137,
      "step": 1324380
    },
    {
      "epoch": 2.1674096476240976,
      "grad_norm": 0.39993709325790405,
      "learning_rate": 5.702509834412868e-06,
      "loss": 0.0159,
      "step": 1324400
    },
    {
      "epoch": 2.1674423780627508,
      "grad_norm": 0.28569695353507996,
      "learning_rate": 5.70244394219935e-06,
      "loss": 0.0131,
      "step": 1324420
    },
    {
      "epoch": 2.167475108501404,
      "grad_norm": 0.8368738293647766,
      "learning_rate": 5.702378049985834e-06,
      "loss": 0.0213,
      "step": 1324440
    },
    {
      "epoch": 2.1675078389400575,
      "grad_norm": 0.4600287079811096,
      "learning_rate": 5.7023121577723166e-06,
      "loss": 0.0243,
      "step": 1324460
    },
    {
      "epoch": 2.1675405693787106,
      "grad_norm": 0.11017969995737076,
      "learning_rate": 5.702246265558799e-06,
      "loss": 0.0099,
      "step": 1324480
    },
    {
      "epoch": 2.1675732998173642,
      "grad_norm": 0.27427583932876587,
      "learning_rate": 5.702180373345282e-06,
      "loss": 0.0136,
      "step": 1324500
    },
    {
      "epoch": 2.1676060302560174,
      "grad_norm": 0.48138993978500366,
      "learning_rate": 5.702114481131766e-06,
      "loss": 0.0168,
      "step": 1324520
    },
    {
      "epoch": 2.167638760694671,
      "grad_norm": 0.5351414680480957,
      "learning_rate": 5.7020485889182475e-06,
      "loss": 0.0149,
      "step": 1324540
    },
    {
      "epoch": 2.167671491133324,
      "grad_norm": 0.5269273519515991,
      "learning_rate": 5.701982696704731e-06,
      "loss": 0.0139,
      "step": 1324560
    },
    {
      "epoch": 2.1677042215719773,
      "grad_norm": 0.23649421334266663,
      "learning_rate": 5.701916804491213e-06,
      "loss": 0.0139,
      "step": 1324580
    },
    {
      "epoch": 2.167736952010631,
      "grad_norm": 1.0586049556732178,
      "learning_rate": 5.701850912277697e-06,
      "loss": 0.0181,
      "step": 1324600
    },
    {
      "epoch": 2.167769682449284,
      "grad_norm": 0.8400673270225525,
      "learning_rate": 5.70178502006418e-06,
      "loss": 0.0194,
      "step": 1324620
    },
    {
      "epoch": 2.1678024128879376,
      "grad_norm": 0.167375847697258,
      "learning_rate": 5.701719127850662e-06,
      "loss": 0.0179,
      "step": 1324640
    },
    {
      "epoch": 2.1678351433265908,
      "grad_norm": 0.5506005883216858,
      "learning_rate": 5.701653235637146e-06,
      "loss": 0.0197,
      "step": 1324660
    },
    {
      "epoch": 2.1678678737652444,
      "grad_norm": 0.5087480545043945,
      "learning_rate": 5.7015873434236275e-06,
      "loss": 0.0091,
      "step": 1324680
    },
    {
      "epoch": 2.1679006042038975,
      "grad_norm": 0.31712108850479126,
      "learning_rate": 5.701521451210111e-06,
      "loss": 0.0155,
      "step": 1324700
    },
    {
      "epoch": 2.1679333346425507,
      "grad_norm": 0.904229998588562,
      "learning_rate": 5.701455558996594e-06,
      "loss": 0.0238,
      "step": 1324720
    },
    {
      "epoch": 2.1679660650812043,
      "grad_norm": 0.5734831094741821,
      "learning_rate": 5.7013896667830775e-06,
      "loss": 0.019,
      "step": 1324740
    },
    {
      "epoch": 2.1679987955198574,
      "grad_norm": 0.38389644026756287,
      "learning_rate": 5.701323774569559e-06,
      "loss": 0.0139,
      "step": 1324760
    },
    {
      "epoch": 2.168031525958511,
      "grad_norm": 0.7599779367446899,
      "learning_rate": 5.701257882356043e-06,
      "loss": 0.0243,
      "step": 1324780
    },
    {
      "epoch": 2.168064256397164,
      "grad_norm": 0.2969951033592224,
      "learning_rate": 5.701191990142525e-06,
      "loss": 0.0141,
      "step": 1324800
    },
    {
      "epoch": 2.1680969868358178,
      "grad_norm": 0.619465172290802,
      "learning_rate": 5.701126097929008e-06,
      "loss": 0.0148,
      "step": 1324820
    },
    {
      "epoch": 2.168129717274471,
      "grad_norm": 0.3716815412044525,
      "learning_rate": 5.70106020571549e-06,
      "loss": 0.0131,
      "step": 1324840
    },
    {
      "epoch": 2.168162447713124,
      "grad_norm": 0.8745118379592896,
      "learning_rate": 5.700994313501974e-06,
      "loss": 0.014,
      "step": 1324860
    },
    {
      "epoch": 2.1681951781517776,
      "grad_norm": 0.24454517662525177,
      "learning_rate": 5.700928421288457e-06,
      "loss": 0.0122,
      "step": 1324880
    },
    {
      "epoch": 2.168227908590431,
      "grad_norm": 0.17412415146827698,
      "learning_rate": 5.700862529074939e-06,
      "loss": 0.0178,
      "step": 1324900
    },
    {
      "epoch": 2.1682606390290844,
      "grad_norm": 0.09434038400650024,
      "learning_rate": 5.700796636861422e-06,
      "loss": 0.0209,
      "step": 1324920
    },
    {
      "epoch": 2.1682933694677375,
      "grad_norm": 0.40008917450904846,
      "learning_rate": 5.700730744647906e-06,
      "loss": 0.0143,
      "step": 1324940
    },
    {
      "epoch": 2.168326099906391,
      "grad_norm": 0.2934400141239166,
      "learning_rate": 5.7006648524343884e-06,
      "loss": 0.0144,
      "step": 1324960
    },
    {
      "epoch": 2.1683588303450443,
      "grad_norm": 0.4510083794593811,
      "learning_rate": 5.700598960220871e-06,
      "loss": 0.0154,
      "step": 1324980
    },
    {
      "epoch": 2.1683915607836974,
      "grad_norm": 0.16647787392139435,
      "learning_rate": 5.700533068007355e-06,
      "loss": 0.0178,
      "step": 1325000
    },
    {
      "epoch": 2.168424291222351,
      "grad_norm": 0.6134485602378845,
      "learning_rate": 5.700467175793837e-06,
      "loss": 0.0202,
      "step": 1325020
    },
    {
      "epoch": 2.168457021661004,
      "grad_norm": 0.7948659658432007,
      "learning_rate": 5.70040128358032e-06,
      "loss": 0.0169,
      "step": 1325040
    },
    {
      "epoch": 2.1684897520996578,
      "grad_norm": 0.24375036358833313,
      "learning_rate": 5.700335391366802e-06,
      "loss": 0.0144,
      "step": 1325060
    },
    {
      "epoch": 2.168522482538311,
      "grad_norm": 0.18988581001758575,
      "learning_rate": 5.700269499153286e-06,
      "loss": 0.0193,
      "step": 1325080
    },
    {
      "epoch": 2.1685552129769645,
      "grad_norm": 0.12588800489902496,
      "learning_rate": 5.7002036069397685e-06,
      "loss": 0.015,
      "step": 1325100
    },
    {
      "epoch": 2.1685879434156177,
      "grad_norm": 0.28997087478637695,
      "learning_rate": 5.700137714726251e-06,
      "loss": 0.0219,
      "step": 1325120
    },
    {
      "epoch": 2.168620673854271,
      "grad_norm": 0.24244405329227448,
      "learning_rate": 5.700071822512734e-06,
      "loss": 0.0151,
      "step": 1325140
    },
    {
      "epoch": 2.1686534042929244,
      "grad_norm": 0.4147343635559082,
      "learning_rate": 5.7000059302992175e-06,
      "loss": 0.0173,
      "step": 1325160
    },
    {
      "epoch": 2.1686861347315776,
      "grad_norm": 0.07903670519590378,
      "learning_rate": 5.6999400380856994e-06,
      "loss": 0.0195,
      "step": 1325180
    },
    {
      "epoch": 2.168718865170231,
      "grad_norm": 0.11455892026424408,
      "learning_rate": 5.699874145872183e-06,
      "loss": 0.0231,
      "step": 1325200
    },
    {
      "epoch": 2.1687515956088843,
      "grad_norm": 0.25745031237602234,
      "learning_rate": 5.699808253658665e-06,
      "loss": 0.0159,
      "step": 1325220
    },
    {
      "epoch": 2.168784326047538,
      "grad_norm": 0.2733393907546997,
      "learning_rate": 5.6997423614451485e-06,
      "loss": 0.0162,
      "step": 1325240
    },
    {
      "epoch": 2.168817056486191,
      "grad_norm": 0.7071741819381714,
      "learning_rate": 5.699676469231631e-06,
      "loss": 0.0164,
      "step": 1325260
    },
    {
      "epoch": 2.168849786924844,
      "grad_norm": 0.3069336712360382,
      "learning_rate": 5.699610577018114e-06,
      "loss": 0.0164,
      "step": 1325280
    },
    {
      "epoch": 2.168882517363498,
      "grad_norm": 0.12109015136957169,
      "learning_rate": 5.6995446848045976e-06,
      "loss": 0.0133,
      "step": 1325300
    },
    {
      "epoch": 2.168915247802151,
      "grad_norm": 0.5642997026443481,
      "learning_rate": 5.69947879259108e-06,
      "loss": 0.0168,
      "step": 1325320
    },
    {
      "epoch": 2.1689479782408045,
      "grad_norm": 0.5188980102539062,
      "learning_rate": 5.699412900377563e-06,
      "loss": 0.0261,
      "step": 1325340
    },
    {
      "epoch": 2.1689807086794577,
      "grad_norm": 0.26304250955581665,
      "learning_rate": 5.699347008164046e-06,
      "loss": 0.0133,
      "step": 1325360
    },
    {
      "epoch": 2.1690134391181113,
      "grad_norm": 1.129151701927185,
      "learning_rate": 5.699281115950529e-06,
      "loss": 0.0193,
      "step": 1325380
    },
    {
      "epoch": 2.1690461695567644,
      "grad_norm": 0.6965059638023376,
      "learning_rate": 5.699215223737011e-06,
      "loss": 0.0197,
      "step": 1325400
    },
    {
      "epoch": 2.1690788999954176,
      "grad_norm": 0.9678268432617188,
      "learning_rate": 5.699149331523495e-06,
      "loss": 0.0128,
      "step": 1325420
    },
    {
      "epoch": 2.169111630434071,
      "grad_norm": 0.1880490481853485,
      "learning_rate": 5.699083439309977e-06,
      "loss": 0.0151,
      "step": 1325440
    },
    {
      "epoch": 2.1691443608727243,
      "grad_norm": 0.426999568939209,
      "learning_rate": 5.69901754709646e-06,
      "loss": 0.0145,
      "step": 1325460
    },
    {
      "epoch": 2.169177091311378,
      "grad_norm": 0.1726336032152176,
      "learning_rate": 5.698951654882943e-06,
      "loss": 0.0151,
      "step": 1325480
    },
    {
      "epoch": 2.169209821750031,
      "grad_norm": 0.34096071124076843,
      "learning_rate": 5.698885762669426e-06,
      "loss": 0.0146,
      "step": 1325500
    },
    {
      "epoch": 2.1692425521886842,
      "grad_norm": 0.3153815269470215,
      "learning_rate": 5.6988198704559085e-06,
      "loss": 0.0181,
      "step": 1325520
    },
    {
      "epoch": 2.169275282627338,
      "grad_norm": 0.29149654507637024,
      "learning_rate": 5.698753978242392e-06,
      "loss": 0.0172,
      "step": 1325540
    },
    {
      "epoch": 2.169308013065991,
      "grad_norm": 0.25591525435447693,
      "learning_rate": 5.698688086028874e-06,
      "loss": 0.0184,
      "step": 1325560
    },
    {
      "epoch": 2.1693407435046446,
      "grad_norm": 0.4290756285190582,
      "learning_rate": 5.698622193815358e-06,
      "loss": 0.017,
      "step": 1325580
    },
    {
      "epoch": 2.1693734739432977,
      "grad_norm": 1.6052154302597046,
      "learning_rate": 5.6985563016018395e-06,
      "loss": 0.0217,
      "step": 1325600
    },
    {
      "epoch": 2.1694062043819513,
      "grad_norm": 0.5315671563148499,
      "learning_rate": 5.698490409388323e-06,
      "loss": 0.0138,
      "step": 1325620
    },
    {
      "epoch": 2.1694389348206045,
      "grad_norm": 0.42252683639526367,
      "learning_rate": 5.698424517174805e-06,
      "loss": 0.0161,
      "step": 1325640
    },
    {
      "epoch": 2.1694716652592576,
      "grad_norm": 0.18437467515468597,
      "learning_rate": 5.6983586249612886e-06,
      "loss": 0.0129,
      "step": 1325660
    },
    {
      "epoch": 2.169504395697911,
      "grad_norm": 0.4802335202693939,
      "learning_rate": 5.698292732747772e-06,
      "loss": 0.0202,
      "step": 1325680
    },
    {
      "epoch": 2.1695371261365644,
      "grad_norm": 0.3189757168292999,
      "learning_rate": 5.698226840534254e-06,
      "loss": 0.0202,
      "step": 1325700
    },
    {
      "epoch": 2.169569856575218,
      "grad_norm": 1.3355467319488525,
      "learning_rate": 5.698160948320738e-06,
      "loss": 0.0139,
      "step": 1325720
    },
    {
      "epoch": 2.169602587013871,
      "grad_norm": 0.6798632740974426,
      "learning_rate": 5.69809505610722e-06,
      "loss": 0.0133,
      "step": 1325740
    },
    {
      "epoch": 2.1696353174525247,
      "grad_norm": 0.22415083646774292,
      "learning_rate": 5.698029163893703e-06,
      "loss": 0.0148,
      "step": 1325760
    },
    {
      "epoch": 2.169668047891178,
      "grad_norm": 0.5725387334823608,
      "learning_rate": 5.697963271680186e-06,
      "loss": 0.0148,
      "step": 1325780
    },
    {
      "epoch": 2.169700778329831,
      "grad_norm": 0.24477559328079224,
      "learning_rate": 5.6978973794666694e-06,
      "loss": 0.0196,
      "step": 1325800
    },
    {
      "epoch": 2.1697335087684846,
      "grad_norm": 1.23810613155365,
      "learning_rate": 5.697831487253151e-06,
      "loss": 0.0178,
      "step": 1325820
    },
    {
      "epoch": 2.1697662392071377,
      "grad_norm": 0.08049826323986053,
      "learning_rate": 5.697765595039635e-06,
      "loss": 0.0207,
      "step": 1325840
    },
    {
      "epoch": 2.1697989696457913,
      "grad_norm": 0.29215723276138306,
      "learning_rate": 5.697699702826117e-06,
      "loss": 0.0212,
      "step": 1325860
    },
    {
      "epoch": 2.1698317000844445,
      "grad_norm": 0.25791025161743164,
      "learning_rate": 5.6976338106126e-06,
      "loss": 0.0119,
      "step": 1325880
    },
    {
      "epoch": 2.169864430523098,
      "grad_norm": 0.14903749525547028,
      "learning_rate": 5.697567918399083e-06,
      "loss": 0.0158,
      "step": 1325900
    },
    {
      "epoch": 2.1698971609617512,
      "grad_norm": 0.3028877079486847,
      "learning_rate": 5.697502026185566e-06,
      "loss": 0.0118,
      "step": 1325920
    },
    {
      "epoch": 2.1699298914004044,
      "grad_norm": 0.25795161724090576,
      "learning_rate": 5.697436133972049e-06,
      "loss": 0.012,
      "step": 1325940
    },
    {
      "epoch": 2.169962621839058,
      "grad_norm": 0.8985280990600586,
      "learning_rate": 5.697370241758532e-06,
      "loss": 0.0144,
      "step": 1325960
    },
    {
      "epoch": 2.169995352277711,
      "grad_norm": 0.32965439558029175,
      "learning_rate": 5.697304349545014e-06,
      "loss": 0.0141,
      "step": 1325980
    },
    {
      "epoch": 2.1700280827163647,
      "grad_norm": 0.5773815512657166,
      "learning_rate": 5.697238457331498e-06,
      "loss": 0.012,
      "step": 1326000
    },
    {
      "epoch": 2.170060813155018,
      "grad_norm": 0.7908269762992859,
      "learning_rate": 5.697172565117981e-06,
      "loss": 0.0108,
      "step": 1326020
    },
    {
      "epoch": 2.170093543593671,
      "grad_norm": 0.12668843567371368,
      "learning_rate": 5.697106672904463e-06,
      "loss": 0.0157,
      "step": 1326040
    },
    {
      "epoch": 2.1701262740323246,
      "grad_norm": 0.3454694151878357,
      "learning_rate": 5.697040780690947e-06,
      "loss": 0.015,
      "step": 1326060
    },
    {
      "epoch": 2.1701590044709778,
      "grad_norm": 0.2622019648551941,
      "learning_rate": 5.696974888477429e-06,
      "loss": 0.0158,
      "step": 1326080
    },
    {
      "epoch": 2.1701917349096314,
      "grad_norm": 0.5453599095344543,
      "learning_rate": 5.696908996263912e-06,
      "loss": 0.0167,
      "step": 1326100
    },
    {
      "epoch": 2.1702244653482845,
      "grad_norm": 0.5300134420394897,
      "learning_rate": 5.696843104050395e-06,
      "loss": 0.0184,
      "step": 1326120
    },
    {
      "epoch": 2.170257195786938,
      "grad_norm": 0.2363169938325882,
      "learning_rate": 5.696777211836878e-06,
      "loss": 0.0146,
      "step": 1326140
    },
    {
      "epoch": 2.1702899262255912,
      "grad_norm": 0.7643986940383911,
      "learning_rate": 5.6967113196233605e-06,
      "loss": 0.0218,
      "step": 1326160
    },
    {
      "epoch": 2.1703226566642444,
      "grad_norm": 0.6224385499954224,
      "learning_rate": 5.696645427409844e-06,
      "loss": 0.0141,
      "step": 1326180
    },
    {
      "epoch": 2.170355387102898,
      "grad_norm": 0.340761661529541,
      "learning_rate": 5.696579535196326e-06,
      "loss": 0.0197,
      "step": 1326200
    },
    {
      "epoch": 2.170388117541551,
      "grad_norm": 0.6832563877105713,
      "learning_rate": 5.6965136429828095e-06,
      "loss": 0.0182,
      "step": 1326220
    },
    {
      "epoch": 2.1704208479802047,
      "grad_norm": 0.2994234561920166,
      "learning_rate": 5.696447750769291e-06,
      "loss": 0.0123,
      "step": 1326240
    },
    {
      "epoch": 2.170453578418858,
      "grad_norm": 0.5519474148750305,
      "learning_rate": 5.696381858555775e-06,
      "loss": 0.0113,
      "step": 1326260
    },
    {
      "epoch": 2.1704863088575115,
      "grad_norm": 0.36043933033943176,
      "learning_rate": 5.696315966342258e-06,
      "loss": 0.011,
      "step": 1326280
    },
    {
      "epoch": 2.1705190392961646,
      "grad_norm": 0.29992765188217163,
      "learning_rate": 5.6962500741287405e-06,
      "loss": 0.0129,
      "step": 1326300
    },
    {
      "epoch": 2.170551769734818,
      "grad_norm": 0.23621635138988495,
      "learning_rate": 5.696184181915223e-06,
      "loss": 0.0122,
      "step": 1326320
    },
    {
      "epoch": 2.1705845001734714,
      "grad_norm": 0.37689903378486633,
      "learning_rate": 5.696118289701707e-06,
      "loss": 0.013,
      "step": 1326340
    },
    {
      "epoch": 2.1706172306121245,
      "grad_norm": 0.40045467019081116,
      "learning_rate": 5.6960523974881895e-06,
      "loss": 0.0112,
      "step": 1326360
    },
    {
      "epoch": 2.170649961050778,
      "grad_norm": 0.3103291988372803,
      "learning_rate": 5.695986505274672e-06,
      "loss": 0.016,
      "step": 1326380
    },
    {
      "epoch": 2.1706826914894313,
      "grad_norm": 0.6383582949638367,
      "learning_rate": 5.695920613061156e-06,
      "loss": 0.0131,
      "step": 1326400
    },
    {
      "epoch": 2.170715421928085,
      "grad_norm": 4.195016860961914,
      "learning_rate": 5.695854720847638e-06,
      "loss": 0.0121,
      "step": 1326420
    },
    {
      "epoch": 2.170748152366738,
      "grad_norm": 0.42977675795555115,
      "learning_rate": 5.695788828634121e-06,
      "loss": 0.0177,
      "step": 1326440
    },
    {
      "epoch": 2.170780882805391,
      "grad_norm": 0.7893981337547302,
      "learning_rate": 5.695722936420603e-06,
      "loss": 0.0273,
      "step": 1326460
    },
    {
      "epoch": 2.1708136132440448,
      "grad_norm": 0.09844555705785751,
      "learning_rate": 5.695657044207087e-06,
      "loss": 0.0138,
      "step": 1326480
    },
    {
      "epoch": 2.170846343682698,
      "grad_norm": 0.22608143091201782,
      "learning_rate": 5.6955911519935696e-06,
      "loss": 0.0159,
      "step": 1326500
    },
    {
      "epoch": 2.1708790741213515,
      "grad_norm": 0.4713801145553589,
      "learning_rate": 5.695525259780052e-06,
      "loss": 0.019,
      "step": 1326520
    },
    {
      "epoch": 2.1709118045600047,
      "grad_norm": 0.2702462077140808,
      "learning_rate": 5.695459367566535e-06,
      "loss": 0.0174,
      "step": 1326540
    },
    {
      "epoch": 2.1709445349986582,
      "grad_norm": 0.12154048681259155,
      "learning_rate": 5.695393475353019e-06,
      "loss": 0.0119,
      "step": 1326560
    },
    {
      "epoch": 2.1709772654373114,
      "grad_norm": 0.2750023901462555,
      "learning_rate": 5.6953275831395005e-06,
      "loss": 0.0206,
      "step": 1326580
    },
    {
      "epoch": 2.1710099958759645,
      "grad_norm": 0.5414884686470032,
      "learning_rate": 5.695261690925984e-06,
      "loss": 0.0143,
      "step": 1326600
    },
    {
      "epoch": 2.171042726314618,
      "grad_norm": 0.2997131943702698,
      "learning_rate": 5.695195798712466e-06,
      "loss": 0.0141,
      "step": 1326620
    },
    {
      "epoch": 2.1710754567532713,
      "grad_norm": 0.25442054867744446,
      "learning_rate": 5.69512990649895e-06,
      "loss": 0.0149,
      "step": 1326640
    },
    {
      "epoch": 2.171108187191925,
      "grad_norm": 0.3083752989768982,
      "learning_rate": 5.6950640142854315e-06,
      "loss": 0.0169,
      "step": 1326660
    },
    {
      "epoch": 2.171140917630578,
      "grad_norm": 0.343131422996521,
      "learning_rate": 5.694998122071915e-06,
      "loss": 0.0172,
      "step": 1326680
    },
    {
      "epoch": 2.1711736480692316,
      "grad_norm": 0.2219579815864563,
      "learning_rate": 5.694932229858398e-06,
      "loss": 0.0172,
      "step": 1326700
    },
    {
      "epoch": 2.171206378507885,
      "grad_norm": 0.22210918366909027,
      "learning_rate": 5.6948663376448806e-06,
      "loss": 0.0182,
      "step": 1326720
    },
    {
      "epoch": 2.171239108946538,
      "grad_norm": 0.5092679262161255,
      "learning_rate": 5.694800445431364e-06,
      "loss": 0.0166,
      "step": 1326740
    },
    {
      "epoch": 2.1712718393851915,
      "grad_norm": 1.635668158531189,
      "learning_rate": 5.694734553217847e-06,
      "loss": 0.0146,
      "step": 1326760
    },
    {
      "epoch": 2.1713045698238447,
      "grad_norm": 0.1725378781557083,
      "learning_rate": 5.69466866100433e-06,
      "loss": 0.0121,
      "step": 1326780
    },
    {
      "epoch": 2.1713373002624983,
      "grad_norm": 0.7349360585212708,
      "learning_rate": 5.694602768790812e-06,
      "loss": 0.0151,
      "step": 1326800
    },
    {
      "epoch": 2.1713700307011514,
      "grad_norm": 0.543971061706543,
      "learning_rate": 5.694536876577296e-06,
      "loss": 0.0106,
      "step": 1326820
    },
    {
      "epoch": 2.171402761139805,
      "grad_norm": 0.3326508700847626,
      "learning_rate": 5.694470984363778e-06,
      "loss": 0.0116,
      "step": 1326840
    },
    {
      "epoch": 2.171435491578458,
      "grad_norm": 0.9844455718994141,
      "learning_rate": 5.6944050921502614e-06,
      "loss": 0.0214,
      "step": 1326860
    },
    {
      "epoch": 2.1714682220171113,
      "grad_norm": 0.49218031764030457,
      "learning_rate": 5.694339199936743e-06,
      "loss": 0.0187,
      "step": 1326880
    },
    {
      "epoch": 2.171500952455765,
      "grad_norm": 0.5128735899925232,
      "learning_rate": 5.694273307723227e-06,
      "loss": 0.0089,
      "step": 1326900
    },
    {
      "epoch": 2.171533682894418,
      "grad_norm": 2.789515733718872,
      "learning_rate": 5.69420741550971e-06,
      "loss": 0.0177,
      "step": 1326920
    },
    {
      "epoch": 2.1715664133330717,
      "grad_norm": 0.9897506237030029,
      "learning_rate": 5.694141523296192e-06,
      "loss": 0.0158,
      "step": 1326940
    },
    {
      "epoch": 2.171599143771725,
      "grad_norm": 0.11087106168270111,
      "learning_rate": 5.694075631082675e-06,
      "loss": 0.0152,
      "step": 1326960
    },
    {
      "epoch": 2.1716318742103784,
      "grad_norm": 0.2793220579624176,
      "learning_rate": 5.694009738869159e-06,
      "loss": 0.015,
      "step": 1326980
    },
    {
      "epoch": 2.1716646046490315,
      "grad_norm": 0.20786629617214203,
      "learning_rate": 5.693943846655641e-06,
      "loss": 0.0176,
      "step": 1327000
    },
    {
      "epoch": 2.1716973350876847,
      "grad_norm": 0.6254385709762573,
      "learning_rate": 5.693877954442124e-06,
      "loss": 0.0196,
      "step": 1327020
    },
    {
      "epoch": 2.1717300655263383,
      "grad_norm": 0.1999015361070633,
      "learning_rate": 5.693812062228606e-06,
      "loss": 0.0211,
      "step": 1327040
    },
    {
      "epoch": 2.1717627959649914,
      "grad_norm": 0.20606230199337006,
      "learning_rate": 5.69374617001509e-06,
      "loss": 0.0172,
      "step": 1327060
    },
    {
      "epoch": 2.171795526403645,
      "grad_norm": 0.37009352445602417,
      "learning_rate": 5.693680277801573e-06,
      "loss": 0.0141,
      "step": 1327080
    },
    {
      "epoch": 2.171828256842298,
      "grad_norm": 0.5544552206993103,
      "learning_rate": 5.693614385588055e-06,
      "loss": 0.0139,
      "step": 1327100
    },
    {
      "epoch": 2.1718609872809513,
      "grad_norm": 0.34375646710395813,
      "learning_rate": 5.693548493374539e-06,
      "loss": 0.0128,
      "step": 1327120
    },
    {
      "epoch": 2.171893717719605,
      "grad_norm": 0.20086951553821564,
      "learning_rate": 5.6934826011610215e-06,
      "loss": 0.0137,
      "step": 1327140
    },
    {
      "epoch": 2.171926448158258,
      "grad_norm": 0.19258996844291687,
      "learning_rate": 5.693416708947504e-06,
      "loss": 0.0148,
      "step": 1327160
    },
    {
      "epoch": 2.1719591785969117,
      "grad_norm": 0.13531582057476044,
      "learning_rate": 5.693350816733987e-06,
      "loss": 0.016,
      "step": 1327180
    },
    {
      "epoch": 2.171991909035565,
      "grad_norm": 0.5580869317054749,
      "learning_rate": 5.6932849245204705e-06,
      "loss": 0.0133,
      "step": 1327200
    },
    {
      "epoch": 2.1720246394742184,
      "grad_norm": 0.40462687611579895,
      "learning_rate": 5.6932190323069524e-06,
      "loss": 0.0243,
      "step": 1327220
    },
    {
      "epoch": 2.1720573699128716,
      "grad_norm": 4.02058219909668,
      "learning_rate": 5.693153140093436e-06,
      "loss": 0.0187,
      "step": 1327240
    },
    {
      "epoch": 2.1720901003515247,
      "grad_norm": 0.43818676471710205,
      "learning_rate": 5.693087247879918e-06,
      "loss": 0.017,
      "step": 1327260
    },
    {
      "epoch": 2.1721228307901783,
      "grad_norm": 0.34755080938339233,
      "learning_rate": 5.6930213556664015e-06,
      "loss": 0.0134,
      "step": 1327280
    },
    {
      "epoch": 2.1721555612288315,
      "grad_norm": 0.5373131632804871,
      "learning_rate": 5.692955463452884e-06,
      "loss": 0.0165,
      "step": 1327300
    },
    {
      "epoch": 2.172188291667485,
      "grad_norm": 0.20998159050941467,
      "learning_rate": 5.692889571239367e-06,
      "loss": 0.0134,
      "step": 1327320
    },
    {
      "epoch": 2.172221022106138,
      "grad_norm": 0.34824615716934204,
      "learning_rate": 5.69282367902585e-06,
      "loss": 0.0171,
      "step": 1327340
    },
    {
      "epoch": 2.172253752544792,
      "grad_norm": 0.5843951106071472,
      "learning_rate": 5.692757786812333e-06,
      "loss": 0.0143,
      "step": 1327360
    },
    {
      "epoch": 2.172286482983445,
      "grad_norm": 0.4121200442314148,
      "learning_rate": 5.692691894598815e-06,
      "loss": 0.0087,
      "step": 1327380
    },
    {
      "epoch": 2.172319213422098,
      "grad_norm": 0.7366591095924377,
      "learning_rate": 5.692626002385299e-06,
      "loss": 0.0253,
      "step": 1327400
    },
    {
      "epoch": 2.1723519438607517,
      "grad_norm": 0.5342825055122375,
      "learning_rate": 5.692560110171782e-06,
      "loss": 0.0114,
      "step": 1327420
    },
    {
      "epoch": 2.172384674299405,
      "grad_norm": 0.17637400329113007,
      "learning_rate": 5.692494217958264e-06,
      "loss": 0.0186,
      "step": 1327440
    },
    {
      "epoch": 2.1724174047380584,
      "grad_norm": 0.620277464389801,
      "learning_rate": 5.692428325744748e-06,
      "loss": 0.0131,
      "step": 1327460
    },
    {
      "epoch": 2.1724501351767116,
      "grad_norm": 0.22247470915317535,
      "learning_rate": 5.69236243353123e-06,
      "loss": 0.0124,
      "step": 1327480
    },
    {
      "epoch": 2.1724828656153647,
      "grad_norm": 1.0191640853881836,
      "learning_rate": 5.692296541317713e-06,
      "loss": 0.0117,
      "step": 1327500
    },
    {
      "epoch": 2.1725155960540183,
      "grad_norm": 0.8990387916564941,
      "learning_rate": 5.692230649104195e-06,
      "loss": 0.0172,
      "step": 1327520
    },
    {
      "epoch": 2.1725483264926715,
      "grad_norm": 0.2777477502822876,
      "learning_rate": 5.692164756890679e-06,
      "loss": 0.0145,
      "step": 1327540
    },
    {
      "epoch": 2.172581056931325,
      "grad_norm": 0.4352329671382904,
      "learning_rate": 5.6920988646771616e-06,
      "loss": 0.0102,
      "step": 1327560
    },
    {
      "epoch": 2.1726137873699782,
      "grad_norm": 0.4920933246612549,
      "learning_rate": 5.692032972463645e-06,
      "loss": 0.0127,
      "step": 1327580
    },
    {
      "epoch": 2.172646517808632,
      "grad_norm": 0.9720668196678162,
      "learning_rate": 5.691967080250127e-06,
      "loss": 0.0192,
      "step": 1327600
    },
    {
      "epoch": 2.172679248247285,
      "grad_norm": 0.20791497826576233,
      "learning_rate": 5.691901188036611e-06,
      "loss": 0.0122,
      "step": 1327620
    },
    {
      "epoch": 2.172711978685938,
      "grad_norm": 0.5841691493988037,
      "learning_rate": 5.6918352958230925e-06,
      "loss": 0.0121,
      "step": 1327640
    },
    {
      "epoch": 2.1727447091245917,
      "grad_norm": 0.38125208020210266,
      "learning_rate": 5.691769403609576e-06,
      "loss": 0.0107,
      "step": 1327660
    },
    {
      "epoch": 2.172777439563245,
      "grad_norm": 0.7863775491714478,
      "learning_rate": 5.691703511396058e-06,
      "loss": 0.0128,
      "step": 1327680
    },
    {
      "epoch": 2.1728101700018985,
      "grad_norm": 0.46624502539634705,
      "learning_rate": 5.691637619182542e-06,
      "loss": 0.0128,
      "step": 1327700
    },
    {
      "epoch": 2.1728429004405516,
      "grad_norm": 0.7666349411010742,
      "learning_rate": 5.691571726969024e-06,
      "loss": 0.0195,
      "step": 1327720
    },
    {
      "epoch": 2.172875630879205,
      "grad_norm": 0.5943374037742615,
      "learning_rate": 5.691505834755507e-06,
      "loss": 0.0147,
      "step": 1327740
    },
    {
      "epoch": 2.1729083613178584,
      "grad_norm": 0.1767295002937317,
      "learning_rate": 5.69143994254199e-06,
      "loss": 0.0115,
      "step": 1327760
    },
    {
      "epoch": 2.1729410917565115,
      "grad_norm": 0.6236580014228821,
      "learning_rate": 5.691374050328473e-06,
      "loss": 0.0181,
      "step": 1327780
    },
    {
      "epoch": 2.172973822195165,
      "grad_norm": 0.8295320868492126,
      "learning_rate": 5.691308158114956e-06,
      "loss": 0.0165,
      "step": 1327800
    },
    {
      "epoch": 2.1730065526338183,
      "grad_norm": 0.24886929988861084,
      "learning_rate": 5.691242265901439e-06,
      "loss": 0.0116,
      "step": 1327820
    },
    {
      "epoch": 2.173039283072472,
      "grad_norm": 0.4645235240459442,
      "learning_rate": 5.6911763736879225e-06,
      "loss": 0.0131,
      "step": 1327840
    },
    {
      "epoch": 2.173072013511125,
      "grad_norm": 0.21308410167694092,
      "learning_rate": 5.691110481474404e-06,
      "loss": 0.0191,
      "step": 1327860
    },
    {
      "epoch": 2.1731047439497786,
      "grad_norm": 1.0080866813659668,
      "learning_rate": 5.691044589260888e-06,
      "loss": 0.0188,
      "step": 1327880
    },
    {
      "epoch": 2.1731374743884317,
      "grad_norm": 0.5198795199394226,
      "learning_rate": 5.69097869704737e-06,
      "loss": 0.0166,
      "step": 1327900
    },
    {
      "epoch": 2.173170204827085,
      "grad_norm": 0.3690437972545624,
      "learning_rate": 5.690912804833853e-06,
      "loss": 0.0203,
      "step": 1327920
    },
    {
      "epoch": 2.1732029352657385,
      "grad_norm": 0.18176642060279846,
      "learning_rate": 5.690846912620336e-06,
      "loss": 0.019,
      "step": 1327940
    },
    {
      "epoch": 2.1732356657043916,
      "grad_norm": 0.3736634850502014,
      "learning_rate": 5.690781020406819e-06,
      "loss": 0.017,
      "step": 1327960
    },
    {
      "epoch": 2.1732683961430452,
      "grad_norm": 0.15743717551231384,
      "learning_rate": 5.690715128193302e-06,
      "loss": 0.0195,
      "step": 1327980
    },
    {
      "epoch": 2.1733011265816984,
      "grad_norm": 0.48419737815856934,
      "learning_rate": 5.690649235979785e-06,
      "loss": 0.0249,
      "step": 1328000
    },
    {
      "epoch": 2.173333857020352,
      "grad_norm": 0.06354448199272156,
      "learning_rate": 5.690583343766267e-06,
      "loss": 0.0119,
      "step": 1328020
    },
    {
      "epoch": 2.173366587459005,
      "grad_norm": 0.3079112470149994,
      "learning_rate": 5.690517451552751e-06,
      "loss": 0.0114,
      "step": 1328040
    },
    {
      "epoch": 2.1733993178976583,
      "grad_norm": 0.25774872303009033,
      "learning_rate": 5.690451559339233e-06,
      "loss": 0.0209,
      "step": 1328060
    },
    {
      "epoch": 2.173432048336312,
      "grad_norm": 0.9763782024383545,
      "learning_rate": 5.690385667125716e-06,
      "loss": 0.0196,
      "step": 1328080
    },
    {
      "epoch": 2.173464778774965,
      "grad_norm": 0.09256105870008469,
      "learning_rate": 5.690319774912199e-06,
      "loss": 0.018,
      "step": 1328100
    },
    {
      "epoch": 2.1734975092136186,
      "grad_norm": 0.2860771715641022,
      "learning_rate": 5.690253882698682e-06,
      "loss": 0.0101,
      "step": 1328120
    },
    {
      "epoch": 2.1735302396522718,
      "grad_norm": 0.052902113646268845,
      "learning_rate": 5.690187990485165e-06,
      "loss": 0.0157,
      "step": 1328140
    },
    {
      "epoch": 2.1735629700909254,
      "grad_norm": 0.2612348794937134,
      "learning_rate": 5.690122098271648e-06,
      "loss": 0.0095,
      "step": 1328160
    },
    {
      "epoch": 2.1735957005295785,
      "grad_norm": 1.2432633638381958,
      "learning_rate": 5.690056206058131e-06,
      "loss": 0.0244,
      "step": 1328180
    },
    {
      "epoch": 2.1736284309682317,
      "grad_norm": 0.2103407084941864,
      "learning_rate": 5.6899903138446135e-06,
      "loss": 0.0156,
      "step": 1328200
    },
    {
      "epoch": 2.1736611614068853,
      "grad_norm": 1.274784803390503,
      "learning_rate": 5.689924421631097e-06,
      "loss": 0.016,
      "step": 1328220
    },
    {
      "epoch": 2.1736938918455384,
      "grad_norm": 0.5222572088241577,
      "learning_rate": 5.689858529417579e-06,
      "loss": 0.015,
      "step": 1328240
    },
    {
      "epoch": 2.173726622284192,
      "grad_norm": 0.35083121061325073,
      "learning_rate": 5.6897926372040625e-06,
      "loss": 0.0154,
      "step": 1328260
    },
    {
      "epoch": 2.173759352722845,
      "grad_norm": 0.4684097468852997,
      "learning_rate": 5.689726744990544e-06,
      "loss": 0.0104,
      "step": 1328280
    },
    {
      "epoch": 2.1737920831614987,
      "grad_norm": 0.16042278707027435,
      "learning_rate": 5.689660852777028e-06,
      "loss": 0.0189,
      "step": 1328300
    },
    {
      "epoch": 2.173824813600152,
      "grad_norm": 0.7419427037239075,
      "learning_rate": 5.689594960563511e-06,
      "loss": 0.0186,
      "step": 1328320
    },
    {
      "epoch": 2.173857544038805,
      "grad_norm": 0.5757818818092346,
      "learning_rate": 5.6895290683499935e-06,
      "loss": 0.0176,
      "step": 1328340
    },
    {
      "epoch": 2.1738902744774586,
      "grad_norm": 0.3502543270587921,
      "learning_rate": 5.689463176136476e-06,
      "loss": 0.016,
      "step": 1328360
    },
    {
      "epoch": 2.173923004916112,
      "grad_norm": 0.3668680489063263,
      "learning_rate": 5.68939728392296e-06,
      "loss": 0.0106,
      "step": 1328380
    },
    {
      "epoch": 2.1739557353547654,
      "grad_norm": 0.8018535375595093,
      "learning_rate": 5.689331391709442e-06,
      "loss": 0.0195,
      "step": 1328400
    },
    {
      "epoch": 2.1739884657934185,
      "grad_norm": 0.27085691690444946,
      "learning_rate": 5.689265499495925e-06,
      "loss": 0.0111,
      "step": 1328420
    },
    {
      "epoch": 2.174021196232072,
      "grad_norm": 0.5617386102676392,
      "learning_rate": 5.689199607282407e-06,
      "loss": 0.0177,
      "step": 1328440
    },
    {
      "epoch": 2.1740539266707253,
      "grad_norm": 0.3666088283061981,
      "learning_rate": 5.689133715068891e-06,
      "loss": 0.0227,
      "step": 1328460
    },
    {
      "epoch": 2.1740866571093784,
      "grad_norm": 0.16604653000831604,
      "learning_rate": 5.689067822855374e-06,
      "loss": 0.0143,
      "step": 1328480
    },
    {
      "epoch": 2.174119387548032,
      "grad_norm": 0.2924368679523468,
      "learning_rate": 5.689001930641856e-06,
      "loss": 0.0183,
      "step": 1328500
    },
    {
      "epoch": 2.174152117986685,
      "grad_norm": 0.6549804210662842,
      "learning_rate": 5.68893603842834e-06,
      "loss": 0.0204,
      "step": 1328520
    },
    {
      "epoch": 2.1741848484253388,
      "grad_norm": 0.20278562605381012,
      "learning_rate": 5.688870146214822e-06,
      "loss": 0.0128,
      "step": 1328540
    },
    {
      "epoch": 2.174217578863992,
      "grad_norm": 0.39255446195602417,
      "learning_rate": 5.688804254001305e-06,
      "loss": 0.0134,
      "step": 1328560
    },
    {
      "epoch": 2.174250309302645,
      "grad_norm": 0.26592040061950684,
      "learning_rate": 5.688738361787788e-06,
      "loss": 0.0163,
      "step": 1328580
    },
    {
      "epoch": 2.1742830397412987,
      "grad_norm": 0.6811174750328064,
      "learning_rate": 5.688672469574271e-06,
      "loss": 0.0101,
      "step": 1328600
    },
    {
      "epoch": 2.174315770179952,
      "grad_norm": 0.24264909327030182,
      "learning_rate": 5.6886065773607535e-06,
      "loss": 0.0169,
      "step": 1328620
    },
    {
      "epoch": 2.1743485006186054,
      "grad_norm": 0.3677297830581665,
      "learning_rate": 5.688540685147237e-06,
      "loss": 0.0177,
      "step": 1328640
    },
    {
      "epoch": 2.1743812310572586,
      "grad_norm": 0.3047887682914734,
      "learning_rate": 5.688474792933719e-06,
      "loss": 0.014,
      "step": 1328660
    },
    {
      "epoch": 2.174413961495912,
      "grad_norm": 0.4554242193698883,
      "learning_rate": 5.688408900720203e-06,
      "loss": 0.016,
      "step": 1328680
    },
    {
      "epoch": 2.1744466919345653,
      "grad_norm": 0.09227655827999115,
      "learning_rate": 5.6883430085066845e-06,
      "loss": 0.0121,
      "step": 1328700
    },
    {
      "epoch": 2.1744794223732185,
      "grad_norm": 0.5047332644462585,
      "learning_rate": 5.688277116293168e-06,
      "loss": 0.0176,
      "step": 1328720
    },
    {
      "epoch": 2.174512152811872,
      "grad_norm": 0.15876996517181396,
      "learning_rate": 5.688211224079651e-06,
      "loss": 0.0135,
      "step": 1328740
    },
    {
      "epoch": 2.174544883250525,
      "grad_norm": 0.728115439414978,
      "learning_rate": 5.6881453318661336e-06,
      "loss": 0.0179,
      "step": 1328760
    },
    {
      "epoch": 2.174577613689179,
      "grad_norm": 0.31599539518356323,
      "learning_rate": 5.688079439652616e-06,
      "loss": 0.009,
      "step": 1328780
    },
    {
      "epoch": 2.174610344127832,
      "grad_norm": 0.3841109275817871,
      "learning_rate": 5.6880135474391e-06,
      "loss": 0.0169,
      "step": 1328800
    },
    {
      "epoch": 2.1746430745664855,
      "grad_norm": 0.5416826605796814,
      "learning_rate": 5.687947655225583e-06,
      "loss": 0.014,
      "step": 1328820
    },
    {
      "epoch": 2.1746758050051387,
      "grad_norm": 0.256369411945343,
      "learning_rate": 5.687881763012065e-06,
      "loss": 0.0223,
      "step": 1328840
    },
    {
      "epoch": 2.174708535443792,
      "grad_norm": 0.13385099172592163,
      "learning_rate": 5.687815870798549e-06,
      "loss": 0.0176,
      "step": 1328860
    },
    {
      "epoch": 2.1747412658824454,
      "grad_norm": 0.0968298390507698,
      "learning_rate": 5.687749978585031e-06,
      "loss": 0.011,
      "step": 1328880
    },
    {
      "epoch": 2.1747739963210986,
      "grad_norm": 0.5314339399337769,
      "learning_rate": 5.6876840863715144e-06,
      "loss": 0.0122,
      "step": 1328900
    },
    {
      "epoch": 2.174806726759752,
      "grad_norm": 0.4450176954269409,
      "learning_rate": 5.687618194157996e-06,
      "loss": 0.0129,
      "step": 1328920
    },
    {
      "epoch": 2.1748394571984053,
      "grad_norm": 0.6628265380859375,
      "learning_rate": 5.68755230194448e-06,
      "loss": 0.0154,
      "step": 1328940
    },
    {
      "epoch": 2.174872187637059,
      "grad_norm": 0.2969464957714081,
      "learning_rate": 5.687486409730963e-06,
      "loss": 0.0183,
      "step": 1328960
    },
    {
      "epoch": 2.174904918075712,
      "grad_norm": 0.8600325584411621,
      "learning_rate": 5.687420517517445e-06,
      "loss": 0.0153,
      "step": 1328980
    },
    {
      "epoch": 2.174937648514365,
      "grad_norm": 0.2261817753314972,
      "learning_rate": 5.687354625303928e-06,
      "loss": 0.0133,
      "step": 1329000
    },
    {
      "epoch": 2.174970378953019,
      "grad_norm": 1.019640326499939,
      "learning_rate": 5.687288733090412e-06,
      "loss": 0.0106,
      "step": 1329020
    },
    {
      "epoch": 2.175003109391672,
      "grad_norm": 0.5347065329551697,
      "learning_rate": 5.687222840876894e-06,
      "loss": 0.0224,
      "step": 1329040
    },
    {
      "epoch": 2.1750358398303256,
      "grad_norm": 0.5205026865005493,
      "learning_rate": 5.687156948663377e-06,
      "loss": 0.0116,
      "step": 1329060
    },
    {
      "epoch": 2.1750685702689787,
      "grad_norm": 0.2494039535522461,
      "learning_rate": 5.687091056449859e-06,
      "loss": 0.0128,
      "step": 1329080
    },
    {
      "epoch": 2.175101300707632,
      "grad_norm": 0.32205837965011597,
      "learning_rate": 5.687025164236343e-06,
      "loss": 0.0134,
      "step": 1329100
    },
    {
      "epoch": 2.1751340311462855,
      "grad_norm": 0.12345994263887405,
      "learning_rate": 5.686959272022825e-06,
      "loss": 0.0096,
      "step": 1329120
    },
    {
      "epoch": 2.1751667615849386,
      "grad_norm": 0.4550727903842926,
      "learning_rate": 5.686893379809308e-06,
      "loss": 0.0247,
      "step": 1329140
    },
    {
      "epoch": 2.175199492023592,
      "grad_norm": 1.064712643623352,
      "learning_rate": 5.686827487595791e-06,
      "loss": 0.0154,
      "step": 1329160
    },
    {
      "epoch": 2.1752322224622453,
      "grad_norm": 0.19721265137195587,
      "learning_rate": 5.6867615953822745e-06,
      "loss": 0.0199,
      "step": 1329180
    },
    {
      "epoch": 2.175264952900899,
      "grad_norm": 0.3234291672706604,
      "learning_rate": 5.686695703168757e-06,
      "loss": 0.0123,
      "step": 1329200
    },
    {
      "epoch": 2.175297683339552,
      "grad_norm": 0.3443688154220581,
      "learning_rate": 5.68662981095524e-06,
      "loss": 0.0117,
      "step": 1329220
    },
    {
      "epoch": 2.1753304137782052,
      "grad_norm": 0.5225221514701843,
      "learning_rate": 5.6865639187417235e-06,
      "loss": 0.0151,
      "step": 1329240
    },
    {
      "epoch": 2.175363144216859,
      "grad_norm": 0.6191372275352478,
      "learning_rate": 5.6864980265282054e-06,
      "loss": 0.0093,
      "step": 1329260
    },
    {
      "epoch": 2.175395874655512,
      "grad_norm": 1.636698842048645,
      "learning_rate": 5.686432134314689e-06,
      "loss": 0.0171,
      "step": 1329280
    },
    {
      "epoch": 2.1754286050941656,
      "grad_norm": 0.19275280833244324,
      "learning_rate": 5.686366242101171e-06,
      "loss": 0.0172,
      "step": 1329300
    },
    {
      "epoch": 2.1754613355328187,
      "grad_norm": 0.8591490983963013,
      "learning_rate": 5.6863003498876545e-06,
      "loss": 0.026,
      "step": 1329320
    },
    {
      "epoch": 2.1754940659714723,
      "grad_norm": 0.42178699374198914,
      "learning_rate": 5.686234457674137e-06,
      "loss": 0.0174,
      "step": 1329340
    },
    {
      "epoch": 2.1755267964101255,
      "grad_norm": 0.2385650873184204,
      "learning_rate": 5.68616856546062e-06,
      "loss": 0.0154,
      "step": 1329360
    },
    {
      "epoch": 2.1755595268487786,
      "grad_norm": 0.5612003803253174,
      "learning_rate": 5.686102673247103e-06,
      "loss": 0.0179,
      "step": 1329380
    },
    {
      "epoch": 2.175592257287432,
      "grad_norm": 0.34851083159446716,
      "learning_rate": 5.686036781033586e-06,
      "loss": 0.017,
      "step": 1329400
    },
    {
      "epoch": 2.1756249877260854,
      "grad_norm": 0.377987802028656,
      "learning_rate": 5.685970888820068e-06,
      "loss": 0.0116,
      "step": 1329420
    },
    {
      "epoch": 2.175657718164739,
      "grad_norm": 0.18448004126548767,
      "learning_rate": 5.685904996606552e-06,
      "loss": 0.0191,
      "step": 1329440
    },
    {
      "epoch": 2.175690448603392,
      "grad_norm": 0.29050642251968384,
      "learning_rate": 5.685839104393034e-06,
      "loss": 0.0181,
      "step": 1329460
    },
    {
      "epoch": 2.1757231790420457,
      "grad_norm": 0.2729404866695404,
      "learning_rate": 5.685773212179517e-06,
      "loss": 0.0132,
      "step": 1329480
    },
    {
      "epoch": 2.175755909480699,
      "grad_norm": 0.9808539152145386,
      "learning_rate": 5.685707319965999e-06,
      "loss": 0.0138,
      "step": 1329500
    },
    {
      "epoch": 2.175788639919352,
      "grad_norm": 0.7445239424705505,
      "learning_rate": 5.685641427752483e-06,
      "loss": 0.0187,
      "step": 1329520
    },
    {
      "epoch": 2.1758213703580056,
      "grad_norm": 0.9780582785606384,
      "learning_rate": 5.685575535538966e-06,
      "loss": 0.0198,
      "step": 1329540
    },
    {
      "epoch": 2.1758541007966588,
      "grad_norm": 0.35180285573005676,
      "learning_rate": 5.685509643325448e-06,
      "loss": 0.0152,
      "step": 1329560
    },
    {
      "epoch": 2.1758868312353123,
      "grad_norm": 0.25821688771247864,
      "learning_rate": 5.685443751111932e-06,
      "loss": 0.0167,
      "step": 1329580
    },
    {
      "epoch": 2.1759195616739655,
      "grad_norm": 1.2462685108184814,
      "learning_rate": 5.6853778588984146e-06,
      "loss": 0.0142,
      "step": 1329600
    },
    {
      "epoch": 2.175952292112619,
      "grad_norm": 0.1097029447555542,
      "learning_rate": 5.685311966684897e-06,
      "loss": 0.0208,
      "step": 1329620
    },
    {
      "epoch": 2.1759850225512722,
      "grad_norm": 0.46124640107154846,
      "learning_rate": 5.68524607447138e-06,
      "loss": 0.0147,
      "step": 1329640
    },
    {
      "epoch": 2.1760177529899254,
      "grad_norm": 0.19007548689842224,
      "learning_rate": 5.685180182257864e-06,
      "loss": 0.021,
      "step": 1329660
    },
    {
      "epoch": 2.176050483428579,
      "grad_norm": 0.37484270334243774,
      "learning_rate": 5.6851142900443455e-06,
      "loss": 0.0133,
      "step": 1329680
    },
    {
      "epoch": 2.176083213867232,
      "grad_norm": 0.5567807555198669,
      "learning_rate": 5.685048397830829e-06,
      "loss": 0.0099,
      "step": 1329700
    },
    {
      "epoch": 2.1761159443058857,
      "grad_norm": 0.2444053590297699,
      "learning_rate": 5.684982505617311e-06,
      "loss": 0.0142,
      "step": 1329720
    },
    {
      "epoch": 2.176148674744539,
      "grad_norm": 0.12975677847862244,
      "learning_rate": 5.684916613403795e-06,
      "loss": 0.0145,
      "step": 1329740
    },
    {
      "epoch": 2.1761814051831925,
      "grad_norm": 0.63620924949646,
      "learning_rate": 5.684850721190277e-06,
      "loss": 0.0191,
      "step": 1329760
    },
    {
      "epoch": 2.1762141356218456,
      "grad_norm": 0.21029433608055115,
      "learning_rate": 5.68478482897676e-06,
      "loss": 0.0118,
      "step": 1329780
    },
    {
      "epoch": 2.1762468660604988,
      "grad_norm": 0.054254885762929916,
      "learning_rate": 5.684718936763243e-06,
      "loss": 0.0153,
      "step": 1329800
    },
    {
      "epoch": 2.1762795964991524,
      "grad_norm": 0.23206284642219543,
      "learning_rate": 5.684653044549726e-06,
      "loss": 0.0168,
      "step": 1329820
    },
    {
      "epoch": 2.1763123269378055,
      "grad_norm": 0.19941669702529907,
      "learning_rate": 5.684587152336208e-06,
      "loss": 0.0151,
      "step": 1329840
    },
    {
      "epoch": 2.176345057376459,
      "grad_norm": 0.7589437961578369,
      "learning_rate": 5.684521260122692e-06,
      "loss": 0.0212,
      "step": 1329860
    },
    {
      "epoch": 2.1763777878151123,
      "grad_norm": 1.0614176988601685,
      "learning_rate": 5.6844553679091755e-06,
      "loss": 0.0147,
      "step": 1329880
    },
    {
      "epoch": 2.176410518253766,
      "grad_norm": 4.487590312957764,
      "learning_rate": 5.684389475695657e-06,
      "loss": 0.0142,
      "step": 1329900
    },
    {
      "epoch": 2.176443248692419,
      "grad_norm": 0.4520929455757141,
      "learning_rate": 5.684323583482141e-06,
      "loss": 0.0161,
      "step": 1329920
    },
    {
      "epoch": 2.176475979131072,
      "grad_norm": 0.31048089265823364,
      "learning_rate": 5.684257691268623e-06,
      "loss": 0.0123,
      "step": 1329940
    },
    {
      "epoch": 2.1765087095697258,
      "grad_norm": 0.20612174272537231,
      "learning_rate": 5.684191799055106e-06,
      "loss": 0.0109,
      "step": 1329960
    },
    {
      "epoch": 2.176541440008379,
      "grad_norm": 0.330849826335907,
      "learning_rate": 5.684125906841589e-06,
      "loss": 0.0108,
      "step": 1329980
    },
    {
      "epoch": 2.1765741704470325,
      "grad_norm": 0.5236029028892517,
      "learning_rate": 5.684060014628072e-06,
      "loss": 0.0197,
      "step": 1330000
    },
    {
      "epoch": 2.1766069008856856,
      "grad_norm": 0.3065238893032074,
      "learning_rate": 5.683994122414555e-06,
      "loss": 0.0104,
      "step": 1330020
    },
    {
      "epoch": 2.1766396313243392,
      "grad_norm": 0.09019969403743744,
      "learning_rate": 5.683928230201038e-06,
      "loss": 0.0133,
      "step": 1330040
    },
    {
      "epoch": 2.1766723617629924,
      "grad_norm": 0.16246642172336578,
      "learning_rate": 5.68386233798752e-06,
      "loss": 0.0227,
      "step": 1330060
    },
    {
      "epoch": 2.1767050922016455,
      "grad_norm": 0.4080375134944916,
      "learning_rate": 5.683796445774004e-06,
      "loss": 0.0149,
      "step": 1330080
    },
    {
      "epoch": 2.176737822640299,
      "grad_norm": 0.37008732557296753,
      "learning_rate": 5.683730553560486e-06,
      "loss": 0.0126,
      "step": 1330100
    },
    {
      "epoch": 2.1767705530789523,
      "grad_norm": 0.2639676630496979,
      "learning_rate": 5.683664661346969e-06,
      "loss": 0.0121,
      "step": 1330120
    },
    {
      "epoch": 2.176803283517606,
      "grad_norm": 0.13858948647975922,
      "learning_rate": 5.683598769133452e-06,
      "loss": 0.0208,
      "step": 1330140
    },
    {
      "epoch": 2.176836013956259,
      "grad_norm": 0.14559274911880493,
      "learning_rate": 5.683532876919935e-06,
      "loss": 0.0131,
      "step": 1330160
    },
    {
      "epoch": 2.176868744394912,
      "grad_norm": 0.48573482036590576,
      "learning_rate": 5.683466984706417e-06,
      "loss": 0.0173,
      "step": 1330180
    },
    {
      "epoch": 2.1769014748335658,
      "grad_norm": 0.2833077907562256,
      "learning_rate": 5.683401092492901e-06,
      "loss": 0.0128,
      "step": 1330200
    },
    {
      "epoch": 2.176934205272219,
      "grad_norm": 0.6433382034301758,
      "learning_rate": 5.683335200279383e-06,
      "loss": 0.0197,
      "step": 1330220
    },
    {
      "epoch": 2.1769669357108725,
      "grad_norm": 0.12337446957826614,
      "learning_rate": 5.6832693080658665e-06,
      "loss": 0.0139,
      "step": 1330240
    },
    {
      "epoch": 2.1769996661495257,
      "grad_norm": 0.7144228219985962,
      "learning_rate": 5.68320341585235e-06,
      "loss": 0.0135,
      "step": 1330260
    },
    {
      "epoch": 2.1770323965881793,
      "grad_norm": 0.6675435304641724,
      "learning_rate": 5.683137523638832e-06,
      "loss": 0.0183,
      "step": 1330280
    },
    {
      "epoch": 2.1770651270268324,
      "grad_norm": 0.17414604127407074,
      "learning_rate": 5.6830716314253155e-06,
      "loss": 0.0161,
      "step": 1330300
    },
    {
      "epoch": 2.1770978574654856,
      "grad_norm": 0.17597444355487823,
      "learning_rate": 5.6830057392117974e-06,
      "loss": 0.0125,
      "step": 1330320
    },
    {
      "epoch": 2.177130587904139,
      "grad_norm": 0.10777200758457184,
      "learning_rate": 5.682939846998281e-06,
      "loss": 0.015,
      "step": 1330340
    },
    {
      "epoch": 2.1771633183427923,
      "grad_norm": 0.3321976661682129,
      "learning_rate": 5.682873954784764e-06,
      "loss": 0.0155,
      "step": 1330360
    },
    {
      "epoch": 2.177196048781446,
      "grad_norm": 0.9729655981063843,
      "learning_rate": 5.6828080625712465e-06,
      "loss": 0.014,
      "step": 1330380
    },
    {
      "epoch": 2.177228779220099,
      "grad_norm": 0.3090944290161133,
      "learning_rate": 5.682742170357729e-06,
      "loss": 0.0178,
      "step": 1330400
    },
    {
      "epoch": 2.1772615096587526,
      "grad_norm": 0.29912129044532776,
      "learning_rate": 5.682676278144213e-06,
      "loss": 0.0162,
      "step": 1330420
    },
    {
      "epoch": 2.177294240097406,
      "grad_norm": 0.2842567563056946,
      "learning_rate": 5.682610385930695e-06,
      "loss": 0.0104,
      "step": 1330440
    },
    {
      "epoch": 2.177326970536059,
      "grad_norm": 0.1778683215379715,
      "learning_rate": 5.682544493717178e-06,
      "loss": 0.0108,
      "step": 1330460
    },
    {
      "epoch": 2.1773597009747125,
      "grad_norm": 0.31359103322029114,
      "learning_rate": 5.68247860150366e-06,
      "loss": 0.0141,
      "step": 1330480
    },
    {
      "epoch": 2.1773924314133657,
      "grad_norm": 0.1570407599210739,
      "learning_rate": 5.682412709290144e-06,
      "loss": 0.0161,
      "step": 1330500
    },
    {
      "epoch": 2.1774251618520193,
      "grad_norm": 1.3178720474243164,
      "learning_rate": 5.682346817076626e-06,
      "loss": 0.014,
      "step": 1330520
    },
    {
      "epoch": 2.1774578922906724,
      "grad_norm": 0.46966615319252014,
      "learning_rate": 5.682280924863109e-06,
      "loss": 0.0176,
      "step": 1330540
    },
    {
      "epoch": 2.1774906227293256,
      "grad_norm": 0.9947913885116577,
      "learning_rate": 5.682215032649592e-06,
      "loss": 0.0207,
      "step": 1330560
    },
    {
      "epoch": 2.177523353167979,
      "grad_norm": 0.17809391021728516,
      "learning_rate": 5.682149140436075e-06,
      "loss": 0.0152,
      "step": 1330580
    },
    {
      "epoch": 2.1775560836066323,
      "grad_norm": 0.4825802743434906,
      "learning_rate": 5.682083248222558e-06,
      "loss": 0.0214,
      "step": 1330600
    },
    {
      "epoch": 2.177588814045286,
      "grad_norm": 0.12941737473011017,
      "learning_rate": 5.682017356009041e-06,
      "loss": 0.0169,
      "step": 1330620
    },
    {
      "epoch": 2.177621544483939,
      "grad_norm": 0.16639076173305511,
      "learning_rate": 5.681951463795524e-06,
      "loss": 0.0141,
      "step": 1330640
    },
    {
      "epoch": 2.1776542749225927,
      "grad_norm": 0.23757466673851013,
      "learning_rate": 5.6818855715820065e-06,
      "loss": 0.0157,
      "step": 1330660
    },
    {
      "epoch": 2.177687005361246,
      "grad_norm": 0.12241194397211075,
      "learning_rate": 5.68181967936849e-06,
      "loss": 0.0196,
      "step": 1330680
    },
    {
      "epoch": 2.177719735799899,
      "grad_norm": 0.2557619512081146,
      "learning_rate": 5.681753787154972e-06,
      "loss": 0.0121,
      "step": 1330700
    },
    {
      "epoch": 2.1777524662385526,
      "grad_norm": 0.1669003665447235,
      "learning_rate": 5.681687894941456e-06,
      "loss": 0.0103,
      "step": 1330720
    },
    {
      "epoch": 2.1777851966772057,
      "grad_norm": 0.2966757118701935,
      "learning_rate": 5.6816220027279375e-06,
      "loss": 0.0101,
      "step": 1330740
    },
    {
      "epoch": 2.1778179271158593,
      "grad_norm": 0.46493929624557495,
      "learning_rate": 5.681556110514421e-06,
      "loss": 0.0196,
      "step": 1330760
    },
    {
      "epoch": 2.1778506575545125,
      "grad_norm": 0.6649301648139954,
      "learning_rate": 5.681490218300904e-06,
      "loss": 0.0148,
      "step": 1330780
    },
    {
      "epoch": 2.177883387993166,
      "grad_norm": 0.5861864686012268,
      "learning_rate": 5.6814243260873866e-06,
      "loss": 0.0187,
      "step": 1330800
    },
    {
      "epoch": 2.177916118431819,
      "grad_norm": 0.23447155952453613,
      "learning_rate": 5.681358433873869e-06,
      "loss": 0.0116,
      "step": 1330820
    },
    {
      "epoch": 2.1779488488704724,
      "grad_norm": 0.3314298987388611,
      "learning_rate": 5.681292541660353e-06,
      "loss": 0.0133,
      "step": 1330840
    },
    {
      "epoch": 2.177981579309126,
      "grad_norm": 0.17092713713645935,
      "learning_rate": 5.681226649446835e-06,
      "loss": 0.0157,
      "step": 1330860
    },
    {
      "epoch": 2.178014309747779,
      "grad_norm": 0.5200029015541077,
      "learning_rate": 5.681160757233318e-06,
      "loss": 0.0158,
      "step": 1330880
    },
    {
      "epoch": 2.1780470401864327,
      "grad_norm": 0.07922223210334778,
      "learning_rate": 5.6810948650198e-06,
      "loss": 0.019,
      "step": 1330900
    },
    {
      "epoch": 2.178079770625086,
      "grad_norm": 0.409371018409729,
      "learning_rate": 5.681028972806284e-06,
      "loss": 0.0142,
      "step": 1330920
    },
    {
      "epoch": 2.1781125010637394,
      "grad_norm": 0.392344206571579,
      "learning_rate": 5.6809630805927674e-06,
      "loss": 0.0197,
      "step": 1330940
    },
    {
      "epoch": 2.1781452315023926,
      "grad_norm": 0.5160627961158752,
      "learning_rate": 5.680897188379249e-06,
      "loss": 0.0164,
      "step": 1330960
    },
    {
      "epoch": 2.1781779619410457,
      "grad_norm": 0.234125018119812,
      "learning_rate": 5.680831296165733e-06,
      "loss": 0.0107,
      "step": 1330980
    },
    {
      "epoch": 2.1782106923796993,
      "grad_norm": 0.42392343282699585,
      "learning_rate": 5.680765403952216e-06,
      "loss": 0.0139,
      "step": 1331000
    },
    {
      "epoch": 2.1782434228183525,
      "grad_norm": 0.23743970692157745,
      "learning_rate": 5.680699511738698e-06,
      "loss": 0.0126,
      "step": 1331020
    },
    {
      "epoch": 2.178276153257006,
      "grad_norm": 0.19465738534927368,
      "learning_rate": 5.680633619525181e-06,
      "loss": 0.0178,
      "step": 1331040
    },
    {
      "epoch": 2.1783088836956592,
      "grad_norm": 0.3920208513736725,
      "learning_rate": 5.680567727311665e-06,
      "loss": 0.0164,
      "step": 1331060
    },
    {
      "epoch": 2.178341614134313,
      "grad_norm": 0.3013618290424347,
      "learning_rate": 5.680501835098147e-06,
      "loss": 0.0116,
      "step": 1331080
    },
    {
      "epoch": 2.178374344572966,
      "grad_norm": 1.162274956703186,
      "learning_rate": 5.68043594288463e-06,
      "loss": 0.0165,
      "step": 1331100
    },
    {
      "epoch": 2.178407075011619,
      "grad_norm": 0.24288421869277954,
      "learning_rate": 5.680370050671112e-06,
      "loss": 0.0201,
      "step": 1331120
    },
    {
      "epoch": 2.1784398054502727,
      "grad_norm": 0.16157829761505127,
      "learning_rate": 5.680304158457596e-06,
      "loss": 0.0186,
      "step": 1331140
    },
    {
      "epoch": 2.178472535888926,
      "grad_norm": 0.6285778284072876,
      "learning_rate": 5.6802382662440784e-06,
      "loss": 0.0177,
      "step": 1331160
    },
    {
      "epoch": 2.1785052663275795,
      "grad_norm": 0.125014528632164,
      "learning_rate": 5.680172374030561e-06,
      "loss": 0.0134,
      "step": 1331180
    },
    {
      "epoch": 2.1785379967662326,
      "grad_norm": 0.16903650760650635,
      "learning_rate": 5.680106481817044e-06,
      "loss": 0.0206,
      "step": 1331200
    },
    {
      "epoch": 2.178570727204886,
      "grad_norm": 0.29428890347480774,
      "learning_rate": 5.6800405896035275e-06,
      "loss": 0.0196,
      "step": 1331220
    },
    {
      "epoch": 2.1786034576435394,
      "grad_norm": 0.8250319361686707,
      "learning_rate": 5.679974697390009e-06,
      "loss": 0.0259,
      "step": 1331240
    },
    {
      "epoch": 2.1786361880821925,
      "grad_norm": 0.25415536761283875,
      "learning_rate": 5.679908805176493e-06,
      "loss": 0.0134,
      "step": 1331260
    },
    {
      "epoch": 2.178668918520846,
      "grad_norm": 0.18522097170352936,
      "learning_rate": 5.679842912962975e-06,
      "loss": 0.0103,
      "step": 1331280
    },
    {
      "epoch": 2.1787016489594992,
      "grad_norm": 0.2592892348766327,
      "learning_rate": 5.6797770207494584e-06,
      "loss": 0.0148,
      "step": 1331300
    },
    {
      "epoch": 2.178734379398153,
      "grad_norm": 0.4662263095378876,
      "learning_rate": 5.679711128535942e-06,
      "loss": 0.0169,
      "step": 1331320
    },
    {
      "epoch": 2.178767109836806,
      "grad_norm": 0.7539892792701721,
      "learning_rate": 5.679645236322424e-06,
      "loss": 0.0131,
      "step": 1331340
    },
    {
      "epoch": 2.1787998402754596,
      "grad_norm": 0.4652576744556427,
      "learning_rate": 5.6795793441089075e-06,
      "loss": 0.0145,
      "step": 1331360
    },
    {
      "epoch": 2.1788325707141127,
      "grad_norm": 0.23282559216022491,
      "learning_rate": 5.679513451895389e-06,
      "loss": 0.0177,
      "step": 1331380
    },
    {
      "epoch": 2.178865301152766,
      "grad_norm": 0.31341180205345154,
      "learning_rate": 5.679447559681873e-06,
      "loss": 0.0122,
      "step": 1331400
    },
    {
      "epoch": 2.1788980315914195,
      "grad_norm": 0.571274995803833,
      "learning_rate": 5.679381667468356e-06,
      "loss": 0.0157,
      "step": 1331420
    },
    {
      "epoch": 2.1789307620300726,
      "grad_norm": 0.5692926645278931,
      "learning_rate": 5.679315775254839e-06,
      "loss": 0.0089,
      "step": 1331440
    },
    {
      "epoch": 2.1789634924687262,
      "grad_norm": 0.1470450907945633,
      "learning_rate": 5.679249883041321e-06,
      "loss": 0.0152,
      "step": 1331460
    },
    {
      "epoch": 2.1789962229073794,
      "grad_norm": 1.2746434211730957,
      "learning_rate": 5.679183990827805e-06,
      "loss": 0.0168,
      "step": 1331480
    },
    {
      "epoch": 2.179028953346033,
      "grad_norm": 0.7243528366088867,
      "learning_rate": 5.679118098614287e-06,
      "loss": 0.022,
      "step": 1331500
    },
    {
      "epoch": 2.179061683784686,
      "grad_norm": 0.5650370717048645,
      "learning_rate": 5.67905220640077e-06,
      "loss": 0.0155,
      "step": 1331520
    },
    {
      "epoch": 2.1790944142233393,
      "grad_norm": 1.6624109745025635,
      "learning_rate": 5.678986314187252e-06,
      "loss": 0.0131,
      "step": 1331540
    },
    {
      "epoch": 2.179127144661993,
      "grad_norm": 1.629568338394165,
      "learning_rate": 5.678920421973736e-06,
      "loss": 0.013,
      "step": 1331560
    },
    {
      "epoch": 2.179159875100646,
      "grad_norm": 0.3491288423538208,
      "learning_rate": 5.6788545297602185e-06,
      "loss": 0.0124,
      "step": 1331580
    },
    {
      "epoch": 2.1791926055392996,
      "grad_norm": 0.31584271788597107,
      "learning_rate": 5.678788637546701e-06,
      "loss": 0.0117,
      "step": 1331600
    },
    {
      "epoch": 2.1792253359779528,
      "grad_norm": 0.4189375340938568,
      "learning_rate": 5.678722745333184e-06,
      "loss": 0.0162,
      "step": 1331620
    },
    {
      "epoch": 2.179258066416606,
      "grad_norm": 0.6450182795524597,
      "learning_rate": 5.6786568531196676e-06,
      "loss": 0.0166,
      "step": 1331640
    },
    {
      "epoch": 2.1792907968552595,
      "grad_norm": 0.4344135820865631,
      "learning_rate": 5.67859096090615e-06,
      "loss": 0.0115,
      "step": 1331660
    },
    {
      "epoch": 2.1793235272939127,
      "grad_norm": 0.3675246834754944,
      "learning_rate": 5.678525068692633e-06,
      "loss": 0.0176,
      "step": 1331680
    },
    {
      "epoch": 2.1793562577325662,
      "grad_norm": 0.49596765637397766,
      "learning_rate": 5.678459176479117e-06,
      "loss": 0.019,
      "step": 1331700
    },
    {
      "epoch": 2.1793889881712194,
      "grad_norm": 0.3696490526199341,
      "learning_rate": 5.6783932842655985e-06,
      "loss": 0.0158,
      "step": 1331720
    },
    {
      "epoch": 2.179421718609873,
      "grad_norm": 0.2161947786808014,
      "learning_rate": 5.678327392052082e-06,
      "loss": 0.0178,
      "step": 1331740
    },
    {
      "epoch": 2.179454449048526,
      "grad_norm": 0.3520359694957733,
      "learning_rate": 5.678261499838564e-06,
      "loss": 0.0135,
      "step": 1331760
    },
    {
      "epoch": 2.1794871794871793,
      "grad_norm": 0.39449718594551086,
      "learning_rate": 5.678195607625048e-06,
      "loss": 0.016,
      "step": 1331780
    },
    {
      "epoch": 2.179519909925833,
      "grad_norm": 0.376348614692688,
      "learning_rate": 5.67812971541153e-06,
      "loss": 0.0091,
      "step": 1331800
    },
    {
      "epoch": 2.179552640364486,
      "grad_norm": 1.0099750757217407,
      "learning_rate": 5.678063823198013e-06,
      "loss": 0.0128,
      "step": 1331820
    },
    {
      "epoch": 2.1795853708031396,
      "grad_norm": 0.1289958506822586,
      "learning_rate": 5.677997930984496e-06,
      "loss": 0.0214,
      "step": 1331840
    },
    {
      "epoch": 2.179618101241793,
      "grad_norm": 0.3468288779258728,
      "learning_rate": 5.677932038770979e-06,
      "loss": 0.0125,
      "step": 1331860
    },
    {
      "epoch": 2.1796508316804464,
      "grad_norm": 1.0105136632919312,
      "learning_rate": 5.677866146557461e-06,
      "loss": 0.0152,
      "step": 1331880
    },
    {
      "epoch": 2.1796835621190995,
      "grad_norm": 0.7902092933654785,
      "learning_rate": 5.677800254343945e-06,
      "loss": 0.0127,
      "step": 1331900
    },
    {
      "epoch": 2.1797162925577527,
      "grad_norm": 1.092939853668213,
      "learning_rate": 5.677734362130427e-06,
      "loss": 0.0142,
      "step": 1331920
    },
    {
      "epoch": 2.1797490229964063,
      "grad_norm": 0.4650302529335022,
      "learning_rate": 5.67766846991691e-06,
      "loss": 0.0171,
      "step": 1331940
    },
    {
      "epoch": 2.1797817534350594,
      "grad_norm": 0.3727513551712036,
      "learning_rate": 5.677602577703393e-06,
      "loss": 0.0125,
      "step": 1331960
    },
    {
      "epoch": 2.179814483873713,
      "grad_norm": 0.49451521039009094,
      "learning_rate": 5.677536685489876e-06,
      "loss": 0.0197,
      "step": 1331980
    },
    {
      "epoch": 2.179847214312366,
      "grad_norm": 0.12091516703367233,
      "learning_rate": 5.677470793276359e-06,
      "loss": 0.0208,
      "step": 1332000
    },
    {
      "epoch": 2.1798799447510198,
      "grad_norm": 0.7961663603782654,
      "learning_rate": 5.677404901062842e-06,
      "loss": 0.0212,
      "step": 1332020
    },
    {
      "epoch": 2.179912675189673,
      "grad_norm": 0.15372927486896515,
      "learning_rate": 5.677339008849325e-06,
      "loss": 0.0192,
      "step": 1332040
    },
    {
      "epoch": 2.179945405628326,
      "grad_norm": 0.2590019404888153,
      "learning_rate": 5.677273116635808e-06,
      "loss": 0.0151,
      "step": 1332060
    },
    {
      "epoch": 2.1799781360669797,
      "grad_norm": 0.44228485226631165,
      "learning_rate": 5.677207224422291e-06,
      "loss": 0.0107,
      "step": 1332080
    },
    {
      "epoch": 2.180010866505633,
      "grad_norm": 0.1637241542339325,
      "learning_rate": 5.677141332208773e-06,
      "loss": 0.0162,
      "step": 1332100
    },
    {
      "epoch": 2.1800435969442864,
      "grad_norm": 0.4687139391899109,
      "learning_rate": 5.677075439995257e-06,
      "loss": 0.0205,
      "step": 1332120
    },
    {
      "epoch": 2.1800763273829395,
      "grad_norm": 0.48999157547950745,
      "learning_rate": 5.677009547781739e-06,
      "loss": 0.0161,
      "step": 1332140
    },
    {
      "epoch": 2.1801090578215927,
      "grad_norm": 0.12385427206754684,
      "learning_rate": 5.676943655568222e-06,
      "loss": 0.0165,
      "step": 1332160
    },
    {
      "epoch": 2.1801417882602463,
      "grad_norm": 0.20715269446372986,
      "learning_rate": 5.676877763354705e-06,
      "loss": 0.0205,
      "step": 1332180
    },
    {
      "epoch": 2.1801745186988994,
      "grad_norm": 0.2643589675426483,
      "learning_rate": 5.676811871141188e-06,
      "loss": 0.0129,
      "step": 1332200
    },
    {
      "epoch": 2.180207249137553,
      "grad_norm": 0.2870669960975647,
      "learning_rate": 5.67674597892767e-06,
      "loss": 0.0157,
      "step": 1332220
    },
    {
      "epoch": 2.180239979576206,
      "grad_norm": 0.501987874507904,
      "learning_rate": 5.676680086714154e-06,
      "loss": 0.0155,
      "step": 1332240
    },
    {
      "epoch": 2.18027271001486,
      "grad_norm": 0.3381851613521576,
      "learning_rate": 5.676614194500636e-06,
      "loss": 0.0128,
      "step": 1332260
    },
    {
      "epoch": 2.180305440453513,
      "grad_norm": 0.27913403511047363,
      "learning_rate": 5.6765483022871195e-06,
      "loss": 0.0126,
      "step": 1332280
    },
    {
      "epoch": 2.180338170892166,
      "grad_norm": 0.0976739227771759,
      "learning_rate": 5.676482410073601e-06,
      "loss": 0.015,
      "step": 1332300
    },
    {
      "epoch": 2.1803709013308197,
      "grad_norm": 0.21508163213729858,
      "learning_rate": 5.676416517860085e-06,
      "loss": 0.0133,
      "step": 1332320
    },
    {
      "epoch": 2.180403631769473,
      "grad_norm": 0.34308335185050964,
      "learning_rate": 5.6763506256465685e-06,
      "loss": 0.0171,
      "step": 1332340
    },
    {
      "epoch": 2.1804363622081264,
      "grad_norm": 0.22341428697109222,
      "learning_rate": 5.6762847334330504e-06,
      "loss": 0.0114,
      "step": 1332360
    },
    {
      "epoch": 2.1804690926467796,
      "grad_norm": 0.30174821615219116,
      "learning_rate": 5.676218841219534e-06,
      "loss": 0.0153,
      "step": 1332380
    },
    {
      "epoch": 2.180501823085433,
      "grad_norm": 0.16986685991287231,
      "learning_rate": 5.676152949006016e-06,
      "loss": 0.0169,
      "step": 1332400
    },
    {
      "epoch": 2.1805345535240863,
      "grad_norm": 0.48329275846481323,
      "learning_rate": 5.6760870567924995e-06,
      "loss": 0.0158,
      "step": 1332420
    },
    {
      "epoch": 2.1805672839627395,
      "grad_norm": 0.6784951090812683,
      "learning_rate": 5.676021164578982e-06,
      "loss": 0.0187,
      "step": 1332440
    },
    {
      "epoch": 2.180600014401393,
      "grad_norm": 0.3196708559989929,
      "learning_rate": 5.675955272365465e-06,
      "loss": 0.0134,
      "step": 1332460
    },
    {
      "epoch": 2.180632744840046,
      "grad_norm": 0.5508933067321777,
      "learning_rate": 5.675889380151948e-06,
      "loss": 0.0165,
      "step": 1332480
    },
    {
      "epoch": 2.1806654752787,
      "grad_norm": 1.1196914911270142,
      "learning_rate": 5.675823487938431e-06,
      "loss": 0.0209,
      "step": 1332500
    },
    {
      "epoch": 2.180698205717353,
      "grad_norm": 0.2286382019519806,
      "learning_rate": 5.675757595724913e-06,
      "loss": 0.0191,
      "step": 1332520
    },
    {
      "epoch": 2.1807309361560065,
      "grad_norm": 0.17406271398067474,
      "learning_rate": 5.675691703511397e-06,
      "loss": 0.0084,
      "step": 1332540
    },
    {
      "epoch": 2.1807636665946597,
      "grad_norm": 0.492194801568985,
      "learning_rate": 5.675625811297879e-06,
      "loss": 0.0124,
      "step": 1332560
    },
    {
      "epoch": 2.180796397033313,
      "grad_norm": 0.38775813579559326,
      "learning_rate": 5.675559919084362e-06,
      "loss": 0.0198,
      "step": 1332580
    },
    {
      "epoch": 2.1808291274719664,
      "grad_norm": 0.36336004734039307,
      "learning_rate": 5.675494026870845e-06,
      "loss": 0.0144,
      "step": 1332600
    },
    {
      "epoch": 2.1808618579106196,
      "grad_norm": 0.7522982954978943,
      "learning_rate": 5.675428134657328e-06,
      "loss": 0.013,
      "step": 1332620
    },
    {
      "epoch": 2.180894588349273,
      "grad_norm": 0.22960221767425537,
      "learning_rate": 5.6753622424438105e-06,
      "loss": 0.022,
      "step": 1332640
    },
    {
      "epoch": 2.1809273187879263,
      "grad_norm": 0.7819602489471436,
      "learning_rate": 5.675296350230294e-06,
      "loss": 0.0219,
      "step": 1332660
    },
    {
      "epoch": 2.18096004922658,
      "grad_norm": 0.13025307655334473,
      "learning_rate": 5.675230458016776e-06,
      "loss": 0.0163,
      "step": 1332680
    },
    {
      "epoch": 2.180992779665233,
      "grad_norm": 0.4515482783317566,
      "learning_rate": 5.6751645658032595e-06,
      "loss": 0.0161,
      "step": 1332700
    },
    {
      "epoch": 2.1810255101038862,
      "grad_norm": 0.14402174949645996,
      "learning_rate": 5.675098673589743e-06,
      "loss": 0.0154,
      "step": 1332720
    },
    {
      "epoch": 2.18105824054254,
      "grad_norm": 0.6515021324157715,
      "learning_rate": 5.675032781376225e-06,
      "loss": 0.0161,
      "step": 1332740
    },
    {
      "epoch": 2.181090970981193,
      "grad_norm": 0.44210293889045715,
      "learning_rate": 5.674966889162709e-06,
      "loss": 0.0142,
      "step": 1332760
    },
    {
      "epoch": 2.1811237014198466,
      "grad_norm": 0.3750678300857544,
      "learning_rate": 5.6749009969491905e-06,
      "loss": 0.0136,
      "step": 1332780
    },
    {
      "epoch": 2.1811564318584997,
      "grad_norm": 0.4953550100326538,
      "learning_rate": 5.674835104735674e-06,
      "loss": 0.0112,
      "step": 1332800
    },
    {
      "epoch": 2.1811891622971533,
      "grad_norm": 0.9593545198440552,
      "learning_rate": 5.674769212522157e-06,
      "loss": 0.0135,
      "step": 1332820
    },
    {
      "epoch": 2.1812218927358065,
      "grad_norm": 1.1400444507598877,
      "learning_rate": 5.6747033203086396e-06,
      "loss": 0.0213,
      "step": 1332840
    },
    {
      "epoch": 2.1812546231744596,
      "grad_norm": 0.31378892064094543,
      "learning_rate": 5.674637428095122e-06,
      "loss": 0.0143,
      "step": 1332860
    },
    {
      "epoch": 2.181287353613113,
      "grad_norm": 0.5551381707191467,
      "learning_rate": 5.674571535881606e-06,
      "loss": 0.0151,
      "step": 1332880
    },
    {
      "epoch": 2.1813200840517664,
      "grad_norm": 0.37504714727401733,
      "learning_rate": 5.674505643668088e-06,
      "loss": 0.0098,
      "step": 1332900
    },
    {
      "epoch": 2.18135281449042,
      "grad_norm": 0.1857069879770279,
      "learning_rate": 5.674439751454571e-06,
      "loss": 0.0133,
      "step": 1332920
    },
    {
      "epoch": 2.181385544929073,
      "grad_norm": 0.3798220455646515,
      "learning_rate": 5.674373859241053e-06,
      "loss": 0.0122,
      "step": 1332940
    },
    {
      "epoch": 2.1814182753677267,
      "grad_norm": 1.2011252641677856,
      "learning_rate": 5.674307967027537e-06,
      "loss": 0.016,
      "step": 1332960
    },
    {
      "epoch": 2.18145100580638,
      "grad_norm": 0.23732779920101166,
      "learning_rate": 5.67424207481402e-06,
      "loss": 0.0122,
      "step": 1332980
    },
    {
      "epoch": 2.181483736245033,
      "grad_norm": 0.3910464644432068,
      "learning_rate": 5.674176182600502e-06,
      "loss": 0.0145,
      "step": 1333000
    },
    {
      "epoch": 2.1815164666836866,
      "grad_norm": 2.0545756816864014,
      "learning_rate": 5.674110290386985e-06,
      "loss": 0.02,
      "step": 1333020
    },
    {
      "epoch": 2.1815491971223397,
      "grad_norm": 0.3109869658946991,
      "learning_rate": 5.674044398173469e-06,
      "loss": 0.0135,
      "step": 1333040
    },
    {
      "epoch": 2.1815819275609933,
      "grad_norm": 0.50481116771698,
      "learning_rate": 5.673978505959951e-06,
      "loss": 0.0165,
      "step": 1333060
    },
    {
      "epoch": 2.1816146579996465,
      "grad_norm": 0.35640859603881836,
      "learning_rate": 5.673912613746434e-06,
      "loss": 0.0191,
      "step": 1333080
    },
    {
      "epoch": 2.1816473884383,
      "grad_norm": 0.6762171983718872,
      "learning_rate": 5.673846721532918e-06,
      "loss": 0.0225,
      "step": 1333100
    },
    {
      "epoch": 2.1816801188769532,
      "grad_norm": 0.30537205934524536,
      "learning_rate": 5.6737808293194e-06,
      "loss": 0.0163,
      "step": 1333120
    },
    {
      "epoch": 2.1817128493156064,
      "grad_norm": 0.07417364418506622,
      "learning_rate": 5.673714937105883e-06,
      "loss": 0.0089,
      "step": 1333140
    },
    {
      "epoch": 2.18174557975426,
      "grad_norm": 0.47398462891578674,
      "learning_rate": 5.673649044892365e-06,
      "loss": 0.0146,
      "step": 1333160
    },
    {
      "epoch": 2.181778310192913,
      "grad_norm": 0.16934572160243988,
      "learning_rate": 5.673583152678849e-06,
      "loss": 0.014,
      "step": 1333180
    },
    {
      "epoch": 2.1818110406315667,
      "grad_norm": 1.5622061491012573,
      "learning_rate": 5.6735172604653314e-06,
      "loss": 0.012,
      "step": 1333200
    },
    {
      "epoch": 2.18184377107022,
      "grad_norm": 0.9188854694366455,
      "learning_rate": 5.673451368251814e-06,
      "loss": 0.0198,
      "step": 1333220
    },
    {
      "epoch": 2.181876501508873,
      "grad_norm": 0.48178088665008545,
      "learning_rate": 5.673385476038297e-06,
      "loss": 0.0146,
      "step": 1333240
    },
    {
      "epoch": 2.1819092319475266,
      "grad_norm": 0.15384680032730103,
      "learning_rate": 5.6733195838247805e-06,
      "loss": 0.013,
      "step": 1333260
    },
    {
      "epoch": 2.1819419623861798,
      "grad_norm": 0.28332915902137756,
      "learning_rate": 5.673253691611262e-06,
      "loss": 0.0146,
      "step": 1333280
    },
    {
      "epoch": 2.1819746928248334,
      "grad_norm": 0.4513983726501465,
      "learning_rate": 5.673187799397746e-06,
      "loss": 0.0117,
      "step": 1333300
    },
    {
      "epoch": 2.1820074232634865,
      "grad_norm": 0.20608235895633698,
      "learning_rate": 5.673121907184228e-06,
      "loss": 0.0195,
      "step": 1333320
    },
    {
      "epoch": 2.18204015370214,
      "grad_norm": 0.18739329278469086,
      "learning_rate": 5.6730560149707115e-06,
      "loss": 0.0134,
      "step": 1333340
    },
    {
      "epoch": 2.1820728841407933,
      "grad_norm": 0.827720046043396,
      "learning_rate": 5.672990122757193e-06,
      "loss": 0.0195,
      "step": 1333360
    },
    {
      "epoch": 2.1821056145794464,
      "grad_norm": 0.3260183334350586,
      "learning_rate": 5.672924230543677e-06,
      "loss": 0.0144,
      "step": 1333380
    },
    {
      "epoch": 2.1821383450181,
      "grad_norm": 0.81373530626297,
      "learning_rate": 5.6728583383301605e-06,
      "loss": 0.0138,
      "step": 1333400
    },
    {
      "epoch": 2.182171075456753,
      "grad_norm": 0.34666287899017334,
      "learning_rate": 5.672792446116642e-06,
      "loss": 0.0214,
      "step": 1333420
    },
    {
      "epoch": 2.1822038058954067,
      "grad_norm": 0.4266321063041687,
      "learning_rate": 5.672726553903126e-06,
      "loss": 0.0172,
      "step": 1333440
    },
    {
      "epoch": 2.18223653633406,
      "grad_norm": 0.3157639801502228,
      "learning_rate": 5.672660661689609e-06,
      "loss": 0.0151,
      "step": 1333460
    },
    {
      "epoch": 2.1822692667727135,
      "grad_norm": 0.7948887944221497,
      "learning_rate": 5.6725947694760915e-06,
      "loss": 0.0217,
      "step": 1333480
    },
    {
      "epoch": 2.1823019972113666,
      "grad_norm": 0.1362563818693161,
      "learning_rate": 5.672528877262574e-06,
      "loss": 0.0167,
      "step": 1333500
    },
    {
      "epoch": 2.18233472765002,
      "grad_norm": 0.2976808249950409,
      "learning_rate": 5.672462985049058e-06,
      "loss": 0.0151,
      "step": 1333520
    },
    {
      "epoch": 2.1823674580886734,
      "grad_norm": 0.6035668849945068,
      "learning_rate": 5.67239709283554e-06,
      "loss": 0.0176,
      "step": 1333540
    },
    {
      "epoch": 2.1824001885273265,
      "grad_norm": 1.0973063707351685,
      "learning_rate": 5.672331200622023e-06,
      "loss": 0.0175,
      "step": 1333560
    },
    {
      "epoch": 2.18243291896598,
      "grad_norm": 0.4243534505367279,
      "learning_rate": 5.672265308408505e-06,
      "loss": 0.0153,
      "step": 1333580
    },
    {
      "epoch": 2.1824656494046333,
      "grad_norm": 0.42460349202156067,
      "learning_rate": 5.672199416194989e-06,
      "loss": 0.0183,
      "step": 1333600
    },
    {
      "epoch": 2.1824983798432864,
      "grad_norm": 0.6428725123405457,
      "learning_rate": 5.6721335239814715e-06,
      "loss": 0.0174,
      "step": 1333620
    },
    {
      "epoch": 2.18253111028194,
      "grad_norm": 0.6377354860305786,
      "learning_rate": 5.672067631767954e-06,
      "loss": 0.0235,
      "step": 1333640
    },
    {
      "epoch": 2.182563840720593,
      "grad_norm": 0.4796518087387085,
      "learning_rate": 5.672001739554437e-06,
      "loss": 0.0133,
      "step": 1333660
    },
    {
      "epoch": 2.1825965711592468,
      "grad_norm": 0.4275106191635132,
      "learning_rate": 5.6719358473409206e-06,
      "loss": 0.013,
      "step": 1333680
    },
    {
      "epoch": 2.1826293015979,
      "grad_norm": 0.25917723774909973,
      "learning_rate": 5.6718699551274025e-06,
      "loss": 0.0144,
      "step": 1333700
    },
    {
      "epoch": 2.1826620320365535,
      "grad_norm": 0.17355717718601227,
      "learning_rate": 5.671804062913886e-06,
      "loss": 0.0154,
      "step": 1333720
    },
    {
      "epoch": 2.1826947624752067,
      "grad_norm": 0.15901494026184082,
      "learning_rate": 5.671738170700368e-06,
      "loss": 0.013,
      "step": 1333740
    },
    {
      "epoch": 2.18272749291386,
      "grad_norm": 0.6681451797485352,
      "learning_rate": 5.6716722784868515e-06,
      "loss": 0.0166,
      "step": 1333760
    },
    {
      "epoch": 2.1827602233525134,
      "grad_norm": 0.17270562052726746,
      "learning_rate": 5.671606386273335e-06,
      "loss": 0.0159,
      "step": 1333780
    },
    {
      "epoch": 2.1827929537911666,
      "grad_norm": 2.0022506713867188,
      "learning_rate": 5.671540494059817e-06,
      "loss": 0.018,
      "step": 1333800
    },
    {
      "epoch": 2.18282568422982,
      "grad_norm": 0.14124612510204315,
      "learning_rate": 5.671474601846301e-06,
      "loss": 0.0155,
      "step": 1333820
    },
    {
      "epoch": 2.1828584146684733,
      "grad_norm": 0.5668801069259644,
      "learning_rate": 5.671408709632783e-06,
      "loss": 0.021,
      "step": 1333840
    },
    {
      "epoch": 2.182891145107127,
      "grad_norm": 1.0824650526046753,
      "learning_rate": 5.671342817419266e-06,
      "loss": 0.0158,
      "step": 1333860
    },
    {
      "epoch": 2.18292387554578,
      "grad_norm": 0.8562352657318115,
      "learning_rate": 5.671276925205749e-06,
      "loss": 0.0121,
      "step": 1333880
    },
    {
      "epoch": 2.182956605984433,
      "grad_norm": 0.1661464124917984,
      "learning_rate": 5.671211032992232e-06,
      "loss": 0.0137,
      "step": 1333900
    },
    {
      "epoch": 2.182989336423087,
      "grad_norm": 0.5031859874725342,
      "learning_rate": 5.671145140778714e-06,
      "loss": 0.0173,
      "step": 1333920
    },
    {
      "epoch": 2.18302206686174,
      "grad_norm": 0.917618989944458,
      "learning_rate": 5.671079248565198e-06,
      "loss": 0.0145,
      "step": 1333940
    },
    {
      "epoch": 2.1830547973003935,
      "grad_norm": 0.5730989575386047,
      "learning_rate": 5.67101335635168e-06,
      "loss": 0.0128,
      "step": 1333960
    },
    {
      "epoch": 2.1830875277390467,
      "grad_norm": 0.47190144658088684,
      "learning_rate": 5.670947464138163e-06,
      "loss": 0.0127,
      "step": 1333980
    },
    {
      "epoch": 2.1831202581777003,
      "grad_norm": 0.19998487830162048,
      "learning_rate": 5.670881571924646e-06,
      "loss": 0.015,
      "step": 1334000
    },
    {
      "epoch": 2.1831529886163534,
      "grad_norm": 0.22980663180351257,
      "learning_rate": 5.670815679711129e-06,
      "loss": 0.0211,
      "step": 1334020
    },
    {
      "epoch": 2.1831857190550066,
      "grad_norm": 0.38854697346687317,
      "learning_rate": 5.670749787497612e-06,
      "loss": 0.014,
      "step": 1334040
    },
    {
      "epoch": 2.18321844949366,
      "grad_norm": 0.16139763593673706,
      "learning_rate": 5.670683895284095e-06,
      "loss": 0.0117,
      "step": 1334060
    },
    {
      "epoch": 2.1832511799323133,
      "grad_norm": 0.07405098527669907,
      "learning_rate": 5.670618003070577e-06,
      "loss": 0.0171,
      "step": 1334080
    },
    {
      "epoch": 2.183283910370967,
      "grad_norm": 2.4489667415618896,
      "learning_rate": 5.670552110857061e-06,
      "loss": 0.0204,
      "step": 1334100
    },
    {
      "epoch": 2.18331664080962,
      "grad_norm": 0.27717775106430054,
      "learning_rate": 5.670486218643544e-06,
      "loss": 0.016,
      "step": 1334120
    },
    {
      "epoch": 2.1833493712482737,
      "grad_norm": 0.3864515423774719,
      "learning_rate": 5.670420326430026e-06,
      "loss": 0.0147,
      "step": 1334140
    },
    {
      "epoch": 2.183382101686927,
      "grad_norm": 0.603082001209259,
      "learning_rate": 5.67035443421651e-06,
      "loss": 0.0169,
      "step": 1334160
    },
    {
      "epoch": 2.18341483212558,
      "grad_norm": 0.45172974467277527,
      "learning_rate": 5.670288542002992e-06,
      "loss": 0.0145,
      "step": 1334180
    },
    {
      "epoch": 2.1834475625642336,
      "grad_norm": 0.4981478452682495,
      "learning_rate": 5.670222649789475e-06,
      "loss": 0.0198,
      "step": 1334200
    },
    {
      "epoch": 2.1834802930028867,
      "grad_norm": 0.2567075192928314,
      "learning_rate": 5.670156757575958e-06,
      "loss": 0.0122,
      "step": 1334220
    },
    {
      "epoch": 2.1835130234415403,
      "grad_norm": 0.7331639528274536,
      "learning_rate": 5.670090865362441e-06,
      "loss": 0.0122,
      "step": 1334240
    },
    {
      "epoch": 2.1835457538801935,
      "grad_norm": 0.4731041193008423,
      "learning_rate": 5.670024973148923e-06,
      "loss": 0.0119,
      "step": 1334260
    },
    {
      "epoch": 2.183578484318847,
      "grad_norm": 0.5254541039466858,
      "learning_rate": 5.669959080935407e-06,
      "loss": 0.0148,
      "step": 1334280
    },
    {
      "epoch": 2.1836112147575,
      "grad_norm": 0.201736181974411,
      "learning_rate": 5.669893188721889e-06,
      "loss": 0.0227,
      "step": 1334300
    },
    {
      "epoch": 2.1836439451961533,
      "grad_norm": 0.5228548049926758,
      "learning_rate": 5.6698272965083725e-06,
      "loss": 0.0139,
      "step": 1334320
    },
    {
      "epoch": 2.183676675634807,
      "grad_norm": 0.29555365443229675,
      "learning_rate": 5.669761404294854e-06,
      "loss": 0.0223,
      "step": 1334340
    },
    {
      "epoch": 2.18370940607346,
      "grad_norm": 0.8524959087371826,
      "learning_rate": 5.669695512081338e-06,
      "loss": 0.0142,
      "step": 1334360
    },
    {
      "epoch": 2.1837421365121137,
      "grad_norm": 0.6316091418266296,
      "learning_rate": 5.66962961986782e-06,
      "loss": 0.0119,
      "step": 1334380
    },
    {
      "epoch": 2.183774866950767,
      "grad_norm": 0.648398220539093,
      "learning_rate": 5.6695637276543034e-06,
      "loss": 0.0178,
      "step": 1334400
    },
    {
      "epoch": 2.1838075973894204,
      "grad_norm": 0.3207649886608124,
      "learning_rate": 5.669497835440786e-06,
      "loss": 0.0151,
      "step": 1334420
    },
    {
      "epoch": 2.1838403278280736,
      "grad_norm": 0.46172982454299927,
      "learning_rate": 5.669431943227269e-06,
      "loss": 0.0205,
      "step": 1334440
    },
    {
      "epoch": 2.1838730582667267,
      "grad_norm": 0.7619794011116028,
      "learning_rate": 5.6693660510137525e-06,
      "loss": 0.02,
      "step": 1334460
    },
    {
      "epoch": 2.1839057887053803,
      "grad_norm": 0.7967247366905212,
      "learning_rate": 5.669300158800235e-06,
      "loss": 0.017,
      "step": 1334480
    },
    {
      "epoch": 2.1839385191440335,
      "grad_norm": 0.9535180926322937,
      "learning_rate": 5.669234266586718e-06,
      "loss": 0.0128,
      "step": 1334500
    },
    {
      "epoch": 2.183971249582687,
      "grad_norm": 0.40964221954345703,
      "learning_rate": 5.669168374373201e-06,
      "loss": 0.0189,
      "step": 1334520
    },
    {
      "epoch": 2.18400398002134,
      "grad_norm": 0.4099183678627014,
      "learning_rate": 5.669102482159684e-06,
      "loss": 0.0179,
      "step": 1334540
    },
    {
      "epoch": 2.184036710459994,
      "grad_norm": 0.14522849023342133,
      "learning_rate": 5.669036589946166e-06,
      "loss": 0.0094,
      "step": 1334560
    },
    {
      "epoch": 2.184069440898647,
      "grad_norm": 0.3127368986606598,
      "learning_rate": 5.66897069773265e-06,
      "loss": 0.0169,
      "step": 1334580
    },
    {
      "epoch": 2.1841021713373,
      "grad_norm": 1.305806279182434,
      "learning_rate": 5.668904805519132e-06,
      "loss": 0.0175,
      "step": 1334600
    },
    {
      "epoch": 2.1841349017759537,
      "grad_norm": 0.4354831576347351,
      "learning_rate": 5.668838913305615e-06,
      "loss": 0.0263,
      "step": 1334620
    },
    {
      "epoch": 2.184167632214607,
      "grad_norm": 0.4184253513813019,
      "learning_rate": 5.668773021092098e-06,
      "loss": 0.0199,
      "step": 1334640
    },
    {
      "epoch": 2.1842003626532605,
      "grad_norm": 0.2968248426914215,
      "learning_rate": 5.668707128878581e-06,
      "loss": 0.0186,
      "step": 1334660
    },
    {
      "epoch": 2.1842330930919136,
      "grad_norm": 0.7408181428909302,
      "learning_rate": 5.6686412366650635e-06,
      "loss": 0.0133,
      "step": 1334680
    },
    {
      "epoch": 2.1842658235305668,
      "grad_norm": 0.9308582544326782,
      "learning_rate": 5.668575344451547e-06,
      "loss": 0.0156,
      "step": 1334700
    },
    {
      "epoch": 2.1842985539692203,
      "grad_norm": 0.24292385578155518,
      "learning_rate": 5.668509452238029e-06,
      "loss": 0.0119,
      "step": 1334720
    },
    {
      "epoch": 2.1843312844078735,
      "grad_norm": 0.3984944224357605,
      "learning_rate": 5.6684435600245126e-06,
      "loss": 0.0177,
      "step": 1334740
    },
    {
      "epoch": 2.184364014846527,
      "grad_norm": 0.3508831560611725,
      "learning_rate": 5.6683776678109944e-06,
      "loss": 0.0162,
      "step": 1334760
    },
    {
      "epoch": 2.1843967452851802,
      "grad_norm": 0.24650436639785767,
      "learning_rate": 5.668311775597478e-06,
      "loss": 0.0173,
      "step": 1334780
    },
    {
      "epoch": 2.184429475723834,
      "grad_norm": 0.30796006321907043,
      "learning_rate": 5.668245883383962e-06,
      "loss": 0.0157,
      "step": 1334800
    },
    {
      "epoch": 2.184462206162487,
      "grad_norm": 0.3393845558166504,
      "learning_rate": 5.6681799911704435e-06,
      "loss": 0.0098,
      "step": 1334820
    },
    {
      "epoch": 2.18449493660114,
      "grad_norm": 0.6945096850395203,
      "learning_rate": 5.668114098956927e-06,
      "loss": 0.0175,
      "step": 1334840
    },
    {
      "epoch": 2.1845276670397937,
      "grad_norm": 0.9952411651611328,
      "learning_rate": 5.66804820674341e-06,
      "loss": 0.0186,
      "step": 1334860
    },
    {
      "epoch": 2.184560397478447,
      "grad_norm": 0.3988382816314697,
      "learning_rate": 5.667982314529893e-06,
      "loss": 0.0151,
      "step": 1334880
    },
    {
      "epoch": 2.1845931279171005,
      "grad_norm": 0.37524330615997314,
      "learning_rate": 5.667916422316375e-06,
      "loss": 0.015,
      "step": 1334900
    },
    {
      "epoch": 2.1846258583557536,
      "grad_norm": 0.12272274494171143,
      "learning_rate": 5.667850530102859e-06,
      "loss": 0.0097,
      "step": 1334920
    },
    {
      "epoch": 2.184658588794407,
      "grad_norm": 1.1487095355987549,
      "learning_rate": 5.667784637889341e-06,
      "loss": 0.0207,
      "step": 1334940
    },
    {
      "epoch": 2.1846913192330604,
      "grad_norm": 0.13215325772762299,
      "learning_rate": 5.667718745675824e-06,
      "loss": 0.0173,
      "step": 1334960
    },
    {
      "epoch": 2.1847240496717135,
      "grad_norm": 0.5804656147956848,
      "learning_rate": 5.667652853462306e-06,
      "loss": 0.019,
      "step": 1334980
    },
    {
      "epoch": 2.184756780110367,
      "grad_norm": 0.27884772419929504,
      "learning_rate": 5.66758696124879e-06,
      "loss": 0.0141,
      "step": 1335000
    },
    {
      "epoch": 2.1847895105490203,
      "grad_norm": 0.2015271931886673,
      "learning_rate": 5.667521069035273e-06,
      "loss": 0.0181,
      "step": 1335020
    },
    {
      "epoch": 2.184822240987674,
      "grad_norm": 0.992073118686676,
      "learning_rate": 5.667455176821755e-06,
      "loss": 0.0138,
      "step": 1335040
    },
    {
      "epoch": 2.184854971426327,
      "grad_norm": 0.10741037130355835,
      "learning_rate": 5.667389284608238e-06,
      "loss": 0.0103,
      "step": 1335060
    },
    {
      "epoch": 2.1848877018649806,
      "grad_norm": 0.23405463993549347,
      "learning_rate": 5.667323392394722e-06,
      "loss": 0.0101,
      "step": 1335080
    },
    {
      "epoch": 2.1849204323036338,
      "grad_norm": 0.16758990287780762,
      "learning_rate": 5.6672575001812036e-06,
      "loss": 0.0118,
      "step": 1335100
    },
    {
      "epoch": 2.184953162742287,
      "grad_norm": 0.487688273191452,
      "learning_rate": 5.667191607967687e-06,
      "loss": 0.0237,
      "step": 1335120
    },
    {
      "epoch": 2.1849858931809405,
      "grad_norm": 0.36927783489227295,
      "learning_rate": 5.667125715754169e-06,
      "loss": 0.0134,
      "step": 1335140
    },
    {
      "epoch": 2.1850186236195936,
      "grad_norm": 0.27041247487068176,
      "learning_rate": 5.667059823540653e-06,
      "loss": 0.0136,
      "step": 1335160
    },
    {
      "epoch": 2.1850513540582472,
      "grad_norm": 0.21645915508270264,
      "learning_rate": 5.666993931327136e-06,
      "loss": 0.0104,
      "step": 1335180
    },
    {
      "epoch": 2.1850840844969004,
      "grad_norm": 0.34528791904449463,
      "learning_rate": 5.666928039113618e-06,
      "loss": 0.0156,
      "step": 1335200
    },
    {
      "epoch": 2.1851168149355535,
      "grad_norm": 0.2531857192516327,
      "learning_rate": 5.666862146900102e-06,
      "loss": 0.0156,
      "step": 1335220
    },
    {
      "epoch": 2.185149545374207,
      "grad_norm": 0.26028427481651306,
      "learning_rate": 5.666796254686584e-06,
      "loss": 0.0124,
      "step": 1335240
    },
    {
      "epoch": 2.1851822758128603,
      "grad_norm": 0.5261929035186768,
      "learning_rate": 5.666730362473067e-06,
      "loss": 0.0153,
      "step": 1335260
    },
    {
      "epoch": 2.185215006251514,
      "grad_norm": 0.7616615295410156,
      "learning_rate": 5.66666447025955e-06,
      "loss": 0.0094,
      "step": 1335280
    },
    {
      "epoch": 2.185247736690167,
      "grad_norm": 1.1715086698532104,
      "learning_rate": 5.6665985780460335e-06,
      "loss": 0.0165,
      "step": 1335300
    },
    {
      "epoch": 2.1852804671288206,
      "grad_norm": 0.27036917209625244,
      "learning_rate": 5.666532685832515e-06,
      "loss": 0.0174,
      "step": 1335320
    },
    {
      "epoch": 2.1853131975674738,
      "grad_norm": 0.3204117715358734,
      "learning_rate": 5.666466793618999e-06,
      "loss": 0.0142,
      "step": 1335340
    },
    {
      "epoch": 2.185345928006127,
      "grad_norm": 0.20063607394695282,
      "learning_rate": 5.666400901405481e-06,
      "loss": 0.0122,
      "step": 1335360
    },
    {
      "epoch": 2.1853786584447805,
      "grad_norm": 0.13725170493125916,
      "learning_rate": 5.6663350091919645e-06,
      "loss": 0.0203,
      "step": 1335380
    },
    {
      "epoch": 2.1854113888834337,
      "grad_norm": 0.2292586714029312,
      "learning_rate": 5.666269116978446e-06,
      "loss": 0.0113,
      "step": 1335400
    },
    {
      "epoch": 2.1854441193220873,
      "grad_norm": 0.7685241103172302,
      "learning_rate": 5.66620322476493e-06,
      "loss": 0.012,
      "step": 1335420
    },
    {
      "epoch": 2.1854768497607404,
      "grad_norm": 0.6585786938667297,
      "learning_rate": 5.666137332551413e-06,
      "loss": 0.0201,
      "step": 1335440
    },
    {
      "epoch": 2.185509580199394,
      "grad_norm": 0.2154691368341446,
      "learning_rate": 5.666071440337895e-06,
      "loss": 0.014,
      "step": 1335460
    },
    {
      "epoch": 2.185542310638047,
      "grad_norm": 0.7673133611679077,
      "learning_rate": 5.666005548124378e-06,
      "loss": 0.0169,
      "step": 1335480
    },
    {
      "epoch": 2.1855750410767003,
      "grad_norm": 0.37514162063598633,
      "learning_rate": 5.665939655910862e-06,
      "loss": 0.0137,
      "step": 1335500
    },
    {
      "epoch": 2.185607771515354,
      "grad_norm": 0.8001506924629211,
      "learning_rate": 5.6658737636973445e-06,
      "loss": 0.0176,
      "step": 1335520
    },
    {
      "epoch": 2.185640501954007,
      "grad_norm": 0.41884341835975647,
      "learning_rate": 5.665807871483827e-06,
      "loss": 0.013,
      "step": 1335540
    },
    {
      "epoch": 2.1856732323926606,
      "grad_norm": 0.22099873423576355,
      "learning_rate": 5.665741979270311e-06,
      "loss": 0.0151,
      "step": 1335560
    },
    {
      "epoch": 2.185705962831314,
      "grad_norm": 0.20292304456233978,
      "learning_rate": 5.665676087056793e-06,
      "loss": 0.0167,
      "step": 1335580
    },
    {
      "epoch": 2.1857386932699674,
      "grad_norm": 0.6564374566078186,
      "learning_rate": 5.665610194843276e-06,
      "loss": 0.0119,
      "step": 1335600
    },
    {
      "epoch": 2.1857714237086205,
      "grad_norm": 0.493415504693985,
      "learning_rate": 5.665544302629758e-06,
      "loss": 0.0134,
      "step": 1335620
    },
    {
      "epoch": 2.1858041541472737,
      "grad_norm": 0.07485406845808029,
      "learning_rate": 5.665478410416242e-06,
      "loss": 0.0129,
      "step": 1335640
    },
    {
      "epoch": 2.1858368845859273,
      "grad_norm": 0.44382911920547485,
      "learning_rate": 5.6654125182027245e-06,
      "loss": 0.0117,
      "step": 1335660
    },
    {
      "epoch": 2.1858696150245804,
      "grad_norm": 1.0412909984588623,
      "learning_rate": 5.665346625989207e-06,
      "loss": 0.0175,
      "step": 1335680
    },
    {
      "epoch": 2.185902345463234,
      "grad_norm": 0.26001355051994324,
      "learning_rate": 5.66528073377569e-06,
      "loss": 0.0116,
      "step": 1335700
    },
    {
      "epoch": 2.185935075901887,
      "grad_norm": 0.09357985854148865,
      "learning_rate": 5.665214841562174e-06,
      "loss": 0.0224,
      "step": 1335720
    },
    {
      "epoch": 2.1859678063405408,
      "grad_norm": 0.5152556300163269,
      "learning_rate": 5.6651489493486555e-06,
      "loss": 0.0175,
      "step": 1335740
    },
    {
      "epoch": 2.186000536779194,
      "grad_norm": 0.13127224147319794,
      "learning_rate": 5.665083057135139e-06,
      "loss": 0.0172,
      "step": 1335760
    },
    {
      "epoch": 2.186033267217847,
      "grad_norm": 0.6522169709205627,
      "learning_rate": 5.665017164921621e-06,
      "loss": 0.0168,
      "step": 1335780
    },
    {
      "epoch": 2.1860659976565007,
      "grad_norm": 0.18852652609348297,
      "learning_rate": 5.6649512727081045e-06,
      "loss": 0.0157,
      "step": 1335800
    },
    {
      "epoch": 2.186098728095154,
      "grad_norm": 0.38568219542503357,
      "learning_rate": 5.664885380494587e-06,
      "loss": 0.0166,
      "step": 1335820
    },
    {
      "epoch": 2.1861314585338074,
      "grad_norm": 0.158170685172081,
      "learning_rate": 5.66481948828107e-06,
      "loss": 0.0109,
      "step": 1335840
    },
    {
      "epoch": 2.1861641889724606,
      "grad_norm": 0.644835889339447,
      "learning_rate": 5.664753596067554e-06,
      "loss": 0.0187,
      "step": 1335860
    },
    {
      "epoch": 2.186196919411114,
      "grad_norm": 0.5672538876533508,
      "learning_rate": 5.664687703854036e-06,
      "loss": 0.0137,
      "step": 1335880
    },
    {
      "epoch": 2.1862296498497673,
      "grad_norm": 0.20521430671215057,
      "learning_rate": 5.664621811640519e-06,
      "loss": 0.0117,
      "step": 1335900
    },
    {
      "epoch": 2.1862623802884205,
      "grad_norm": 0.4332469403743744,
      "learning_rate": 5.664555919427002e-06,
      "loss": 0.0139,
      "step": 1335920
    },
    {
      "epoch": 2.186295110727074,
      "grad_norm": 0.33764567971229553,
      "learning_rate": 5.664490027213485e-06,
      "loss": 0.0173,
      "step": 1335940
    },
    {
      "epoch": 2.186327841165727,
      "grad_norm": 0.2962592840194702,
      "learning_rate": 5.664424134999967e-06,
      "loss": 0.0192,
      "step": 1335960
    },
    {
      "epoch": 2.186360571604381,
      "grad_norm": 0.43191754817962646,
      "learning_rate": 5.664358242786451e-06,
      "loss": 0.0136,
      "step": 1335980
    },
    {
      "epoch": 2.186393302043034,
      "grad_norm": 0.8182538747787476,
      "learning_rate": 5.664292350572933e-06,
      "loss": 0.0125,
      "step": 1336000
    },
    {
      "epoch": 2.1864260324816875,
      "grad_norm": 0.16803769767284393,
      "learning_rate": 5.664226458359416e-06,
      "loss": 0.0133,
      "step": 1336020
    },
    {
      "epoch": 2.1864587629203407,
      "grad_norm": 0.4485023617744446,
      "learning_rate": 5.664160566145899e-06,
      "loss": 0.0147,
      "step": 1336040
    },
    {
      "epoch": 2.186491493358994,
      "grad_norm": 0.3843836784362793,
      "learning_rate": 5.664094673932382e-06,
      "loss": 0.0155,
      "step": 1336060
    },
    {
      "epoch": 2.1865242237976474,
      "grad_norm": 0.23177729547023773,
      "learning_rate": 5.664028781718865e-06,
      "loss": 0.0147,
      "step": 1336080
    },
    {
      "epoch": 2.1865569542363006,
      "grad_norm": 0.6593648195266724,
      "learning_rate": 5.663962889505348e-06,
      "loss": 0.0161,
      "step": 1336100
    },
    {
      "epoch": 2.186589684674954,
      "grad_norm": 0.08359071612358093,
      "learning_rate": 5.66389699729183e-06,
      "loss": 0.0182,
      "step": 1336120
    },
    {
      "epoch": 2.1866224151136073,
      "grad_norm": 0.6867831945419312,
      "learning_rate": 5.663831105078314e-06,
      "loss": 0.016,
      "step": 1336140
    },
    {
      "epoch": 2.186655145552261,
      "grad_norm": 0.8118113279342651,
      "learning_rate": 5.6637652128647955e-06,
      "loss": 0.0165,
      "step": 1336160
    },
    {
      "epoch": 2.186687875990914,
      "grad_norm": 2.279534339904785,
      "learning_rate": 5.663699320651279e-06,
      "loss": 0.0209,
      "step": 1336180
    },
    {
      "epoch": 2.1867206064295672,
      "grad_norm": 0.2667577564716339,
      "learning_rate": 5.663633428437761e-06,
      "loss": 0.0192,
      "step": 1336200
    },
    {
      "epoch": 2.186753336868221,
      "grad_norm": 0.5926585793495178,
      "learning_rate": 5.663567536224245e-06,
      "loss": 0.0091,
      "step": 1336220
    },
    {
      "epoch": 2.186786067306874,
      "grad_norm": 0.32141271233558655,
      "learning_rate": 5.663501644010728e-06,
      "loss": 0.0215,
      "step": 1336240
    },
    {
      "epoch": 2.1868187977455276,
      "grad_norm": 2.472435235977173,
      "learning_rate": 5.66343575179721e-06,
      "loss": 0.0155,
      "step": 1336260
    },
    {
      "epoch": 2.1868515281841807,
      "grad_norm": 0.47726911306381226,
      "learning_rate": 5.663369859583694e-06,
      "loss": 0.012,
      "step": 1336280
    },
    {
      "epoch": 2.186884258622834,
      "grad_norm": 0.5035203695297241,
      "learning_rate": 5.663303967370176e-06,
      "loss": 0.0126,
      "step": 1336300
    },
    {
      "epoch": 2.1869169890614875,
      "grad_norm": 0.3499840199947357,
      "learning_rate": 5.663238075156659e-06,
      "loss": 0.0196,
      "step": 1336320
    },
    {
      "epoch": 2.1869497195001406,
      "grad_norm": 0.3234676718711853,
      "learning_rate": 5.663172182943142e-06,
      "loss": 0.0157,
      "step": 1336340
    },
    {
      "epoch": 2.186982449938794,
      "grad_norm": 0.4340295195579529,
      "learning_rate": 5.6631062907296255e-06,
      "loss": 0.0118,
      "step": 1336360
    },
    {
      "epoch": 2.1870151803774474,
      "grad_norm": 0.5674936771392822,
      "learning_rate": 5.663040398516107e-06,
      "loss": 0.0179,
      "step": 1336380
    },
    {
      "epoch": 2.187047910816101,
      "grad_norm": 0.32532063126564026,
      "learning_rate": 5.662974506302591e-06,
      "loss": 0.0103,
      "step": 1336400
    },
    {
      "epoch": 2.187080641254754,
      "grad_norm": 1.1263681650161743,
      "learning_rate": 5.662908614089073e-06,
      "loss": 0.0212,
      "step": 1336420
    },
    {
      "epoch": 2.1871133716934072,
      "grad_norm": 0.24065741896629333,
      "learning_rate": 5.6628427218755564e-06,
      "loss": 0.0149,
      "step": 1336440
    },
    {
      "epoch": 2.187146102132061,
      "grad_norm": 0.24908575415611267,
      "learning_rate": 5.662776829662039e-06,
      "loss": 0.0124,
      "step": 1336460
    },
    {
      "epoch": 2.187178832570714,
      "grad_norm": 0.33525654673576355,
      "learning_rate": 5.662710937448522e-06,
      "loss": 0.0241,
      "step": 1336480
    },
    {
      "epoch": 2.1872115630093676,
      "grad_norm": 0.42028477787971497,
      "learning_rate": 5.662645045235005e-06,
      "loss": 0.0105,
      "step": 1336500
    },
    {
      "epoch": 2.1872442934480207,
      "grad_norm": 0.2912288308143616,
      "learning_rate": 5.662579153021488e-06,
      "loss": 0.0108,
      "step": 1336520
    },
    {
      "epoch": 2.1872770238866743,
      "grad_norm": 0.19932179152965546,
      "learning_rate": 5.66251326080797e-06,
      "loss": 0.0103,
      "step": 1336540
    },
    {
      "epoch": 2.1873097543253275,
      "grad_norm": 0.1751582771539688,
      "learning_rate": 5.662447368594454e-06,
      "loss": 0.0106,
      "step": 1336560
    },
    {
      "epoch": 2.1873424847639806,
      "grad_norm": 0.27558207511901855,
      "learning_rate": 5.662381476380937e-06,
      "loss": 0.0153,
      "step": 1336580
    },
    {
      "epoch": 2.1873752152026342,
      "grad_norm": 1.0344154834747314,
      "learning_rate": 5.662315584167419e-06,
      "loss": 0.0137,
      "step": 1336600
    },
    {
      "epoch": 2.1874079456412874,
      "grad_norm": 0.5591707229614258,
      "learning_rate": 5.662249691953903e-06,
      "loss": 0.0192,
      "step": 1336620
    },
    {
      "epoch": 2.187440676079941,
      "grad_norm": 0.2538909316062927,
      "learning_rate": 5.662183799740385e-06,
      "loss": 0.0134,
      "step": 1336640
    },
    {
      "epoch": 2.187473406518594,
      "grad_norm": 0.2100047618150711,
      "learning_rate": 5.662117907526868e-06,
      "loss": 0.0095,
      "step": 1336660
    },
    {
      "epoch": 2.1875061369572473,
      "grad_norm": 0.23407971858978271,
      "learning_rate": 5.662052015313351e-06,
      "loss": 0.0161,
      "step": 1336680
    },
    {
      "epoch": 2.187538867395901,
      "grad_norm": 0.4056796431541443,
      "learning_rate": 5.661986123099834e-06,
      "loss": 0.0174,
      "step": 1336700
    },
    {
      "epoch": 2.187571597834554,
      "grad_norm": 0.516657292842865,
      "learning_rate": 5.6619202308863165e-06,
      "loss": 0.016,
      "step": 1336720
    },
    {
      "epoch": 2.1876043282732076,
      "grad_norm": 0.3070017993450165,
      "learning_rate": 5.6618543386728e-06,
      "loss": 0.0137,
      "step": 1336740
    },
    {
      "epoch": 2.1876370587118608,
      "grad_norm": 0.3387932777404785,
      "learning_rate": 5.661788446459282e-06,
      "loss": 0.0198,
      "step": 1336760
    },
    {
      "epoch": 2.1876697891505144,
      "grad_norm": 0.38706016540527344,
      "learning_rate": 5.6617225542457656e-06,
      "loss": 0.0156,
      "step": 1336780
    },
    {
      "epoch": 2.1877025195891675,
      "grad_norm": 0.8171955347061157,
      "learning_rate": 5.6616566620322475e-06,
      "loss": 0.0174,
      "step": 1336800
    },
    {
      "epoch": 2.1877352500278207,
      "grad_norm": 0.21517230570316315,
      "learning_rate": 5.661590769818731e-06,
      "loss": 0.0108,
      "step": 1336820
    },
    {
      "epoch": 2.1877679804664742,
      "grad_norm": 0.3147672116756439,
      "learning_rate": 5.661524877605214e-06,
      "loss": 0.0154,
      "step": 1336840
    },
    {
      "epoch": 2.1878007109051274,
      "grad_norm": 0.18611697852611542,
      "learning_rate": 5.6614589853916965e-06,
      "loss": 0.0156,
      "step": 1336860
    },
    {
      "epoch": 2.187833441343781,
      "grad_norm": 0.34472399950027466,
      "learning_rate": 5.661393093178179e-06,
      "loss": 0.0132,
      "step": 1336880
    },
    {
      "epoch": 2.187866171782434,
      "grad_norm": 0.46595892310142517,
      "learning_rate": 5.661327200964663e-06,
      "loss": 0.0197,
      "step": 1336900
    },
    {
      "epoch": 2.1878989022210877,
      "grad_norm": 0.585220217704773,
      "learning_rate": 5.661261308751146e-06,
      "loss": 0.0084,
      "step": 1336920
    },
    {
      "epoch": 2.187931632659741,
      "grad_norm": 0.6165740489959717,
      "learning_rate": 5.661195416537628e-06,
      "loss": 0.0165,
      "step": 1336940
    },
    {
      "epoch": 2.187964363098394,
      "grad_norm": 0.6543769240379333,
      "learning_rate": 5.661129524324112e-06,
      "loss": 0.0159,
      "step": 1336960
    },
    {
      "epoch": 2.1879970935370476,
      "grad_norm": 0.5318013429641724,
      "learning_rate": 5.661063632110594e-06,
      "loss": 0.0133,
      "step": 1336980
    },
    {
      "epoch": 2.188029823975701,
      "grad_norm": 0.23202379047870636,
      "learning_rate": 5.660997739897077e-06,
      "loss": 0.0105,
      "step": 1337000
    },
    {
      "epoch": 2.1880625544143544,
      "grad_norm": 0.37712809443473816,
      "learning_rate": 5.660931847683559e-06,
      "loss": 0.014,
      "step": 1337020
    },
    {
      "epoch": 2.1880952848530075,
      "grad_norm": 1.1831780672073364,
      "learning_rate": 5.660865955470043e-06,
      "loss": 0.0141,
      "step": 1337040
    },
    {
      "epoch": 2.188128015291661,
      "grad_norm": 1.011756181716919,
      "learning_rate": 5.660800063256526e-06,
      "loss": 0.0142,
      "step": 1337060
    },
    {
      "epoch": 2.1881607457303143,
      "grad_norm": 0.5866830348968506,
      "learning_rate": 5.660734171043008e-06,
      "loss": 0.0142,
      "step": 1337080
    },
    {
      "epoch": 2.1881934761689674,
      "grad_norm": 0.13726262748241425,
      "learning_rate": 5.660668278829491e-06,
      "loss": 0.0187,
      "step": 1337100
    },
    {
      "epoch": 2.188226206607621,
      "grad_norm": 0.6692605018615723,
      "learning_rate": 5.660602386615975e-06,
      "loss": 0.0109,
      "step": 1337120
    },
    {
      "epoch": 2.188258937046274,
      "grad_norm": 0.32359668612480164,
      "learning_rate": 5.6605364944024566e-06,
      "loss": 0.0169,
      "step": 1337140
    },
    {
      "epoch": 2.1882916674849278,
      "grad_norm": 0.4906937777996063,
      "learning_rate": 5.66047060218894e-06,
      "loss": 0.0129,
      "step": 1337160
    },
    {
      "epoch": 2.188324397923581,
      "grad_norm": 0.5500484108924866,
      "learning_rate": 5.660404709975422e-06,
      "loss": 0.0122,
      "step": 1337180
    },
    {
      "epoch": 2.1883571283622345,
      "grad_norm": 0.9996048212051392,
      "learning_rate": 5.660338817761906e-06,
      "loss": 0.0162,
      "step": 1337200
    },
    {
      "epoch": 2.1883898588008877,
      "grad_norm": 0.2409706860780716,
      "learning_rate": 5.6602729255483875e-06,
      "loss": 0.0146,
      "step": 1337220
    },
    {
      "epoch": 2.188422589239541,
      "grad_norm": 0.5675445199012756,
      "learning_rate": 5.660207033334871e-06,
      "loss": 0.0112,
      "step": 1337240
    },
    {
      "epoch": 2.1884553196781944,
      "grad_norm": 0.22404789924621582,
      "learning_rate": 5.660141141121354e-06,
      "loss": 0.0228,
      "step": 1337260
    },
    {
      "epoch": 2.1884880501168475,
      "grad_norm": 0.3629802167415619,
      "learning_rate": 5.660075248907837e-06,
      "loss": 0.0134,
      "step": 1337280
    },
    {
      "epoch": 2.188520780555501,
      "grad_norm": 1.23225736618042,
      "learning_rate": 5.66000935669432e-06,
      "loss": 0.0165,
      "step": 1337300
    },
    {
      "epoch": 2.1885535109941543,
      "grad_norm": 0.17112131416797638,
      "learning_rate": 5.659943464480803e-06,
      "loss": 0.0202,
      "step": 1337320
    },
    {
      "epoch": 2.188586241432808,
      "grad_norm": 0.2276453822851181,
      "learning_rate": 5.659877572267286e-06,
      "loss": 0.02,
      "step": 1337340
    },
    {
      "epoch": 2.188618971871461,
      "grad_norm": 0.9824379682540894,
      "learning_rate": 5.659811680053768e-06,
      "loss": 0.0227,
      "step": 1337360
    },
    {
      "epoch": 2.188651702310114,
      "grad_norm": 1.1623804569244385,
      "learning_rate": 5.659745787840252e-06,
      "loss": 0.0172,
      "step": 1337380
    },
    {
      "epoch": 2.188684432748768,
      "grad_norm": 0.27959269285202026,
      "learning_rate": 5.659679895626734e-06,
      "loss": 0.0254,
      "step": 1337400
    },
    {
      "epoch": 2.188717163187421,
      "grad_norm": 0.28681516647338867,
      "learning_rate": 5.6596140034132175e-06,
      "loss": 0.0146,
      "step": 1337420
    },
    {
      "epoch": 2.1887498936260745,
      "grad_norm": 0.5796325206756592,
      "learning_rate": 5.659548111199699e-06,
      "loss": 0.0152,
      "step": 1337440
    },
    {
      "epoch": 2.1887826240647277,
      "grad_norm": 0.6089777946472168,
      "learning_rate": 5.659482218986183e-06,
      "loss": 0.0186,
      "step": 1337460
    },
    {
      "epoch": 2.1888153545033813,
      "grad_norm": 0.41150766611099243,
      "learning_rate": 5.659416326772666e-06,
      "loss": 0.0206,
      "step": 1337480
    },
    {
      "epoch": 2.1888480849420344,
      "grad_norm": 0.1355457901954651,
      "learning_rate": 5.6593504345591484e-06,
      "loss": 0.014,
      "step": 1337500
    },
    {
      "epoch": 2.1888808153806876,
      "grad_norm": 0.4798141121864319,
      "learning_rate": 5.659284542345631e-06,
      "loss": 0.0105,
      "step": 1337520
    },
    {
      "epoch": 2.188913545819341,
      "grad_norm": 0.11791888624429703,
      "learning_rate": 5.659218650132115e-06,
      "loss": 0.0162,
      "step": 1337540
    },
    {
      "epoch": 2.1889462762579943,
      "grad_norm": 0.5785394310951233,
      "learning_rate": 5.659152757918597e-06,
      "loss": 0.0192,
      "step": 1337560
    },
    {
      "epoch": 2.188979006696648,
      "grad_norm": 0.41952091455459595,
      "learning_rate": 5.65908686570508e-06,
      "loss": 0.0117,
      "step": 1337580
    },
    {
      "epoch": 2.189011737135301,
      "grad_norm": 0.6927782297134399,
      "learning_rate": 5.659020973491562e-06,
      "loss": 0.0115,
      "step": 1337600
    },
    {
      "epoch": 2.1890444675739547,
      "grad_norm": 0.3984123170375824,
      "learning_rate": 5.658955081278046e-06,
      "loss": 0.0145,
      "step": 1337620
    },
    {
      "epoch": 2.189077198012608,
      "grad_norm": 0.12119568884372711,
      "learning_rate": 5.658889189064529e-06,
      "loss": 0.0103,
      "step": 1337640
    },
    {
      "epoch": 2.189109928451261,
      "grad_norm": 0.07053819298744202,
      "learning_rate": 5.658823296851011e-06,
      "loss": 0.0175,
      "step": 1337660
    },
    {
      "epoch": 2.1891426588899146,
      "grad_norm": 0.7305217385292053,
      "learning_rate": 5.658757404637495e-06,
      "loss": 0.013,
      "step": 1337680
    },
    {
      "epoch": 2.1891753893285677,
      "grad_norm": 0.24550966918468475,
      "learning_rate": 5.6586915124239775e-06,
      "loss": 0.011,
      "step": 1337700
    },
    {
      "epoch": 2.1892081197672213,
      "grad_norm": 0.08934447914361954,
      "learning_rate": 5.65862562021046e-06,
      "loss": 0.0111,
      "step": 1337720
    },
    {
      "epoch": 2.1892408502058744,
      "grad_norm": 0.23475155234336853,
      "learning_rate": 5.658559727996943e-06,
      "loss": 0.0102,
      "step": 1337740
    },
    {
      "epoch": 2.1892735806445276,
      "grad_norm": 0.8505998253822327,
      "learning_rate": 5.658493835783427e-06,
      "loss": 0.0156,
      "step": 1337760
    },
    {
      "epoch": 2.189306311083181,
      "grad_norm": 0.4674544036388397,
      "learning_rate": 5.6584279435699085e-06,
      "loss": 0.0148,
      "step": 1337780
    },
    {
      "epoch": 2.1893390415218343,
      "grad_norm": 0.14107933640480042,
      "learning_rate": 5.658362051356392e-06,
      "loss": 0.0139,
      "step": 1337800
    },
    {
      "epoch": 2.189371771960488,
      "grad_norm": 0.3216443657875061,
      "learning_rate": 5.658296159142874e-06,
      "loss": 0.0095,
      "step": 1337820
    },
    {
      "epoch": 2.189404502399141,
      "grad_norm": 1.0422487258911133,
      "learning_rate": 5.6582302669293575e-06,
      "loss": 0.0145,
      "step": 1337840
    },
    {
      "epoch": 2.1894372328377947,
      "grad_norm": 0.5380738973617554,
      "learning_rate": 5.65816437471584e-06,
      "loss": 0.0137,
      "step": 1337860
    },
    {
      "epoch": 2.189469963276448,
      "grad_norm": 0.42045527696609497,
      "learning_rate": 5.658098482502323e-06,
      "loss": 0.0186,
      "step": 1337880
    },
    {
      "epoch": 2.189502693715101,
      "grad_norm": 0.2420472502708435,
      "learning_rate": 5.658032590288806e-06,
      "loss": 0.0206,
      "step": 1337900
    },
    {
      "epoch": 2.1895354241537546,
      "grad_norm": 0.3975987732410431,
      "learning_rate": 5.657966698075289e-06,
      "loss": 0.0163,
      "step": 1337920
    },
    {
      "epoch": 2.1895681545924077,
      "grad_norm": 0.31177476048469543,
      "learning_rate": 5.657900805861771e-06,
      "loss": 0.0208,
      "step": 1337940
    },
    {
      "epoch": 2.1896008850310613,
      "grad_norm": 0.9124137759208679,
      "learning_rate": 5.657834913648255e-06,
      "loss": 0.0159,
      "step": 1337960
    },
    {
      "epoch": 2.1896336154697145,
      "grad_norm": 0.24422124028205872,
      "learning_rate": 5.657769021434738e-06,
      "loss": 0.0118,
      "step": 1337980
    },
    {
      "epoch": 2.189666345908368,
      "grad_norm": 0.6483469009399414,
      "learning_rate": 5.65770312922122e-06,
      "loss": 0.0189,
      "step": 1338000
    },
    {
      "epoch": 2.189699076347021,
      "grad_norm": 0.846571683883667,
      "learning_rate": 5.657637237007704e-06,
      "loss": 0.0217,
      "step": 1338020
    },
    {
      "epoch": 2.1897318067856744,
      "grad_norm": 0.2754119336605072,
      "learning_rate": 5.657571344794186e-06,
      "loss": 0.0208,
      "step": 1338040
    },
    {
      "epoch": 2.189764537224328,
      "grad_norm": 0.3244211971759796,
      "learning_rate": 5.657505452580669e-06,
      "loss": 0.0131,
      "step": 1338060
    },
    {
      "epoch": 2.189797267662981,
      "grad_norm": 0.5431974530220032,
      "learning_rate": 5.657439560367152e-06,
      "loss": 0.0226,
      "step": 1338080
    },
    {
      "epoch": 2.1898299981016347,
      "grad_norm": 0.3884317874908447,
      "learning_rate": 5.657373668153635e-06,
      "loss": 0.0125,
      "step": 1338100
    },
    {
      "epoch": 2.189862728540288,
      "grad_norm": 0.7309342622756958,
      "learning_rate": 5.657307775940118e-06,
      "loss": 0.0165,
      "step": 1338120
    },
    {
      "epoch": 2.189895458978941,
      "grad_norm": 0.13103531301021576,
      "learning_rate": 5.657241883726601e-06,
      "loss": 0.0166,
      "step": 1338140
    },
    {
      "epoch": 2.1899281894175946,
      "grad_norm": 1.2905164957046509,
      "learning_rate": 5.657175991513083e-06,
      "loss": 0.0185,
      "step": 1338160
    },
    {
      "epoch": 2.1899609198562477,
      "grad_norm": 0.1960654854774475,
      "learning_rate": 5.657110099299567e-06,
      "loss": 0.0114,
      "step": 1338180
    },
    {
      "epoch": 2.1899936502949013,
      "grad_norm": 0.36829954385757446,
      "learning_rate": 5.6570442070860486e-06,
      "loss": 0.0112,
      "step": 1338200
    },
    {
      "epoch": 2.1900263807335545,
      "grad_norm": 0.24394896626472473,
      "learning_rate": 5.656978314872532e-06,
      "loss": 0.0144,
      "step": 1338220
    },
    {
      "epoch": 2.190059111172208,
      "grad_norm": 1.5846583843231201,
      "learning_rate": 5.656912422659014e-06,
      "loss": 0.0169,
      "step": 1338240
    },
    {
      "epoch": 2.1900918416108612,
      "grad_norm": 0.929076611995697,
      "learning_rate": 5.656846530445498e-06,
      "loss": 0.0206,
      "step": 1338260
    },
    {
      "epoch": 2.1901245720495144,
      "grad_norm": 0.48967868089675903,
      "learning_rate": 5.65678063823198e-06,
      "loss": 0.01,
      "step": 1338280
    },
    {
      "epoch": 2.190157302488168,
      "grad_norm": 0.6245354413986206,
      "learning_rate": 5.656714746018463e-06,
      "loss": 0.0145,
      "step": 1338300
    },
    {
      "epoch": 2.190190032926821,
      "grad_norm": 0.2834293246269226,
      "learning_rate": 5.656648853804947e-06,
      "loss": 0.0133,
      "step": 1338320
    },
    {
      "epoch": 2.1902227633654747,
      "grad_norm": 0.7784899473190308,
      "learning_rate": 5.6565829615914294e-06,
      "loss": 0.0175,
      "step": 1338340
    },
    {
      "epoch": 2.190255493804128,
      "grad_norm": 0.5273106098175049,
      "learning_rate": 5.656517069377912e-06,
      "loss": 0.0161,
      "step": 1338360
    },
    {
      "epoch": 2.1902882242427815,
      "grad_norm": 0.4430558383464813,
      "learning_rate": 5.656451177164395e-06,
      "loss": 0.012,
      "step": 1338380
    },
    {
      "epoch": 2.1903209546814346,
      "grad_norm": 1.5017387866973877,
      "learning_rate": 5.6563852849508785e-06,
      "loss": 0.0095,
      "step": 1338400
    },
    {
      "epoch": 2.1903536851200878,
      "grad_norm": 0.9436896443367004,
      "learning_rate": 5.65631939273736e-06,
      "loss": 0.018,
      "step": 1338420
    },
    {
      "epoch": 2.1903864155587414,
      "grad_norm": 0.5667445659637451,
      "learning_rate": 5.656253500523844e-06,
      "loss": 0.0152,
      "step": 1338440
    },
    {
      "epoch": 2.1904191459973945,
      "grad_norm": 0.19877813756465912,
      "learning_rate": 5.656187608310326e-06,
      "loss": 0.0209,
      "step": 1338460
    },
    {
      "epoch": 2.190451876436048,
      "grad_norm": 0.6448522210121155,
      "learning_rate": 5.6561217160968095e-06,
      "loss": 0.0114,
      "step": 1338480
    },
    {
      "epoch": 2.1904846068747013,
      "grad_norm": 0.4077344238758087,
      "learning_rate": 5.656055823883292e-06,
      "loss": 0.0083,
      "step": 1338500
    },
    {
      "epoch": 2.190517337313355,
      "grad_norm": 0.12287982553243637,
      "learning_rate": 5.655989931669775e-06,
      "loss": 0.0106,
      "step": 1338520
    },
    {
      "epoch": 2.190550067752008,
      "grad_norm": 0.5114967226982117,
      "learning_rate": 5.655924039456258e-06,
      "loss": 0.0158,
      "step": 1338540
    },
    {
      "epoch": 2.190582798190661,
      "grad_norm": 0.3898511230945587,
      "learning_rate": 5.655858147242741e-06,
      "loss": 0.0147,
      "step": 1338560
    },
    {
      "epoch": 2.1906155286293147,
      "grad_norm": 0.2811282277107239,
      "learning_rate": 5.655792255029223e-06,
      "loss": 0.0159,
      "step": 1338580
    },
    {
      "epoch": 2.190648259067968,
      "grad_norm": 0.36738383769989014,
      "learning_rate": 5.655726362815707e-06,
      "loss": 0.0166,
      "step": 1338600
    },
    {
      "epoch": 2.1906809895066215,
      "grad_norm": 0.34236788749694824,
      "learning_rate": 5.655660470602189e-06,
      "loss": 0.0195,
      "step": 1338620
    },
    {
      "epoch": 2.1907137199452746,
      "grad_norm": 0.16447852551937103,
      "learning_rate": 5.655594578388672e-06,
      "loss": 0.0145,
      "step": 1338640
    },
    {
      "epoch": 2.1907464503839282,
      "grad_norm": 0.2996658384799957,
      "learning_rate": 5.655528686175155e-06,
      "loss": 0.0111,
      "step": 1338660
    },
    {
      "epoch": 2.1907791808225814,
      "grad_norm": 0.45092853903770447,
      "learning_rate": 5.655462793961638e-06,
      "loss": 0.021,
      "step": 1338680
    },
    {
      "epoch": 2.1908119112612345,
      "grad_norm": 0.42953377962112427,
      "learning_rate": 5.655396901748121e-06,
      "loss": 0.016,
      "step": 1338700
    },
    {
      "epoch": 2.190844641699888,
      "grad_norm": 0.5773082375526428,
      "learning_rate": 5.655331009534604e-06,
      "loss": 0.0199,
      "step": 1338720
    },
    {
      "epoch": 2.1908773721385413,
      "grad_norm": 1.2402600049972534,
      "learning_rate": 5.655265117321087e-06,
      "loss": 0.0172,
      "step": 1338740
    },
    {
      "epoch": 2.190910102577195,
      "grad_norm": 0.4164789617061615,
      "learning_rate": 5.6551992251075695e-06,
      "loss": 0.0202,
      "step": 1338760
    },
    {
      "epoch": 2.190942833015848,
      "grad_norm": 0.3110887110233307,
      "learning_rate": 5.655133332894053e-06,
      "loss": 0.0194,
      "step": 1338780
    },
    {
      "epoch": 2.1909755634545016,
      "grad_norm": 0.37697526812553406,
      "learning_rate": 5.655067440680535e-06,
      "loss": 0.0144,
      "step": 1338800
    },
    {
      "epoch": 2.1910082938931548,
      "grad_norm": 0.263190358877182,
      "learning_rate": 5.6550015484670186e-06,
      "loss": 0.013,
      "step": 1338820
    },
    {
      "epoch": 2.191041024331808,
      "grad_norm": 0.362827867269516,
      "learning_rate": 5.6549356562535005e-06,
      "loss": 0.0142,
      "step": 1338840
    },
    {
      "epoch": 2.1910737547704615,
      "grad_norm": 0.08452801406383514,
      "learning_rate": 5.654869764039984e-06,
      "loss": 0.0102,
      "step": 1338860
    },
    {
      "epoch": 2.1911064852091147,
      "grad_norm": 0.6053277850151062,
      "learning_rate": 5.654803871826467e-06,
      "loss": 0.0189,
      "step": 1338880
    },
    {
      "epoch": 2.1911392156477683,
      "grad_norm": 0.5689296126365662,
      "learning_rate": 5.6547379796129495e-06,
      "loss": 0.0117,
      "step": 1338900
    },
    {
      "epoch": 2.1911719460864214,
      "grad_norm": 0.3532562255859375,
      "learning_rate": 5.654672087399432e-06,
      "loss": 0.0138,
      "step": 1338920
    },
    {
      "epoch": 2.191204676525075,
      "grad_norm": 0.2887343466281891,
      "learning_rate": 5.654606195185916e-06,
      "loss": 0.0119,
      "step": 1338940
    },
    {
      "epoch": 2.191237406963728,
      "grad_norm": 0.3220875859260559,
      "learning_rate": 5.654540302972398e-06,
      "loss": 0.0171,
      "step": 1338960
    },
    {
      "epoch": 2.1912701374023813,
      "grad_norm": 0.3787021338939667,
      "learning_rate": 5.654474410758881e-06,
      "loss": 0.017,
      "step": 1338980
    },
    {
      "epoch": 2.191302867841035,
      "grad_norm": 0.2542188763618469,
      "learning_rate": 5.654408518545363e-06,
      "loss": 0.0146,
      "step": 1339000
    },
    {
      "epoch": 2.191335598279688,
      "grad_norm": 0.4529350697994232,
      "learning_rate": 5.654342626331847e-06,
      "loss": 0.0157,
      "step": 1339020
    },
    {
      "epoch": 2.1913683287183416,
      "grad_norm": 0.29855743050575256,
      "learning_rate": 5.65427673411833e-06,
      "loss": 0.0215,
      "step": 1339040
    },
    {
      "epoch": 2.191401059156995,
      "grad_norm": 0.12327343970537186,
      "learning_rate": 5.654210841904812e-06,
      "loss": 0.0131,
      "step": 1339060
    },
    {
      "epoch": 2.1914337895956484,
      "grad_norm": 0.10211341083049774,
      "learning_rate": 5.654144949691296e-06,
      "loss": 0.0204,
      "step": 1339080
    },
    {
      "epoch": 2.1914665200343015,
      "grad_norm": 0.15321876108646393,
      "learning_rate": 5.654079057477778e-06,
      "loss": 0.0117,
      "step": 1339100
    },
    {
      "epoch": 2.1914992504729547,
      "grad_norm": 0.18434277176856995,
      "learning_rate": 5.654013165264261e-06,
      "loss": 0.0128,
      "step": 1339120
    },
    {
      "epoch": 2.1915319809116083,
      "grad_norm": 0.07764078676700592,
      "learning_rate": 5.653947273050744e-06,
      "loss": 0.0152,
      "step": 1339140
    },
    {
      "epoch": 2.1915647113502614,
      "grad_norm": 0.5273014903068542,
      "learning_rate": 5.653881380837228e-06,
      "loss": 0.0127,
      "step": 1339160
    },
    {
      "epoch": 2.191597441788915,
      "grad_norm": 0.1978052407503128,
      "learning_rate": 5.65381548862371e-06,
      "loss": 0.0165,
      "step": 1339180
    },
    {
      "epoch": 2.191630172227568,
      "grad_norm": 0.6979369521141052,
      "learning_rate": 5.653749596410193e-06,
      "loss": 0.016,
      "step": 1339200
    },
    {
      "epoch": 2.1916629026662218,
      "grad_norm": 1.6805789470672607,
      "learning_rate": 5.653683704196675e-06,
      "loss": 0.0119,
      "step": 1339220
    },
    {
      "epoch": 2.191695633104875,
      "grad_norm": 0.9114364981651306,
      "learning_rate": 5.653617811983159e-06,
      "loss": 0.0153,
      "step": 1339240
    },
    {
      "epoch": 2.191728363543528,
      "grad_norm": 0.32373297214508057,
      "learning_rate": 5.6535519197696405e-06,
      "loss": 0.0126,
      "step": 1339260
    },
    {
      "epoch": 2.1917610939821817,
      "grad_norm": 0.44839218258857727,
      "learning_rate": 5.653486027556124e-06,
      "loss": 0.0165,
      "step": 1339280
    },
    {
      "epoch": 2.191793824420835,
      "grad_norm": 0.2119792401790619,
      "learning_rate": 5.653420135342607e-06,
      "loss": 0.0136,
      "step": 1339300
    },
    {
      "epoch": 2.1918265548594884,
      "grad_norm": 0.14480705559253693,
      "learning_rate": 5.65335424312909e-06,
      "loss": 0.0147,
      "step": 1339320
    },
    {
      "epoch": 2.1918592852981416,
      "grad_norm": 0.2616114616394043,
      "learning_rate": 5.653288350915572e-06,
      "loss": 0.0177,
      "step": 1339340
    },
    {
      "epoch": 2.1918920157367947,
      "grad_norm": 1.8457891941070557,
      "learning_rate": 5.653222458702056e-06,
      "loss": 0.0128,
      "step": 1339360
    },
    {
      "epoch": 2.1919247461754483,
      "grad_norm": 0.3708559274673462,
      "learning_rate": 5.653156566488539e-06,
      "loss": 0.0167,
      "step": 1339380
    },
    {
      "epoch": 2.1919574766141015,
      "grad_norm": 0.7986422777175903,
      "learning_rate": 5.653090674275021e-06,
      "loss": 0.0165,
      "step": 1339400
    },
    {
      "epoch": 2.191990207052755,
      "grad_norm": 0.36836087703704834,
      "learning_rate": 5.653024782061505e-06,
      "loss": 0.0197,
      "step": 1339420
    },
    {
      "epoch": 2.192022937491408,
      "grad_norm": 0.9715401530265808,
      "learning_rate": 5.652958889847987e-06,
      "loss": 0.0123,
      "step": 1339440
    },
    {
      "epoch": 2.192055667930062,
      "grad_norm": 0.23043479025363922,
      "learning_rate": 5.6528929976344705e-06,
      "loss": 0.0149,
      "step": 1339460
    },
    {
      "epoch": 2.192088398368715,
      "grad_norm": 1.132634162902832,
      "learning_rate": 5.652827105420952e-06,
      "loss": 0.0199,
      "step": 1339480
    },
    {
      "epoch": 2.192121128807368,
      "grad_norm": 1.1231950521469116,
      "learning_rate": 5.652761213207436e-06,
      "loss": 0.0095,
      "step": 1339500
    },
    {
      "epoch": 2.1921538592460217,
      "grad_norm": 0.5401144027709961,
      "learning_rate": 5.652695320993919e-06,
      "loss": 0.0141,
      "step": 1339520
    },
    {
      "epoch": 2.192186589684675,
      "grad_norm": 0.14855557680130005,
      "learning_rate": 5.6526294287804014e-06,
      "loss": 0.0102,
      "step": 1339540
    },
    {
      "epoch": 2.1922193201233284,
      "grad_norm": 0.6874971985816956,
      "learning_rate": 5.652563536566884e-06,
      "loss": 0.013,
      "step": 1339560
    },
    {
      "epoch": 2.1922520505619816,
      "grad_norm": 1.1966804265975952,
      "learning_rate": 5.652497644353368e-06,
      "loss": 0.0169,
      "step": 1339580
    },
    {
      "epoch": 2.192284781000635,
      "grad_norm": 0.20626869797706604,
      "learning_rate": 5.65243175213985e-06,
      "loss": 0.0175,
      "step": 1339600
    },
    {
      "epoch": 2.1923175114392883,
      "grad_norm": 0.74680495262146,
      "learning_rate": 5.652365859926333e-06,
      "loss": 0.0149,
      "step": 1339620
    },
    {
      "epoch": 2.1923502418779415,
      "grad_norm": 0.6749428510665894,
      "learning_rate": 5.652299967712815e-06,
      "loss": 0.0193,
      "step": 1339640
    },
    {
      "epoch": 2.192382972316595,
      "grad_norm": 0.1916889250278473,
      "learning_rate": 5.652234075499299e-06,
      "loss": 0.0162,
      "step": 1339660
    },
    {
      "epoch": 2.192415702755248,
      "grad_norm": 0.5875375270843506,
      "learning_rate": 5.6521681832857815e-06,
      "loss": 0.0184,
      "step": 1339680
    },
    {
      "epoch": 2.192448433193902,
      "grad_norm": 0.2334984689950943,
      "learning_rate": 5.652102291072264e-06,
      "loss": 0.0223,
      "step": 1339700
    },
    {
      "epoch": 2.192481163632555,
      "grad_norm": 0.37391889095306396,
      "learning_rate": 5.652036398858747e-06,
      "loss": 0.0129,
      "step": 1339720
    },
    {
      "epoch": 2.192513894071208,
      "grad_norm": 0.25358203053474426,
      "learning_rate": 5.6519705066452305e-06,
      "loss": 0.0191,
      "step": 1339740
    },
    {
      "epoch": 2.1925466245098617,
      "grad_norm": 0.38533151149749756,
      "learning_rate": 5.651904614431713e-06,
      "loss": 0.0143,
      "step": 1339760
    },
    {
      "epoch": 2.192579354948515,
      "grad_norm": 0.40730521082878113,
      "learning_rate": 5.651838722218196e-06,
      "loss": 0.0134,
      "step": 1339780
    },
    {
      "epoch": 2.1926120853871685,
      "grad_norm": 0.33165499567985535,
      "learning_rate": 5.65177283000468e-06,
      "loss": 0.0137,
      "step": 1339800
    },
    {
      "epoch": 2.1926448158258216,
      "grad_norm": 0.14050908386707306,
      "learning_rate": 5.6517069377911615e-06,
      "loss": 0.0167,
      "step": 1339820
    },
    {
      "epoch": 2.192677546264475,
      "grad_norm": 0.32871147990226746,
      "learning_rate": 5.651641045577645e-06,
      "loss": 0.0104,
      "step": 1339840
    },
    {
      "epoch": 2.1927102767031283,
      "grad_norm": 1.0521655082702637,
      "learning_rate": 5.651575153364127e-06,
      "loss": 0.0157,
      "step": 1339860
    },
    {
      "epoch": 2.1927430071417815,
      "grad_norm": 0.08736581355333328,
      "learning_rate": 5.6515092611506106e-06,
      "loss": 0.0182,
      "step": 1339880
    },
    {
      "epoch": 2.192775737580435,
      "grad_norm": 0.6314774751663208,
      "learning_rate": 5.651443368937093e-06,
      "loss": 0.0116,
      "step": 1339900
    },
    {
      "epoch": 2.1928084680190882,
      "grad_norm": 0.5781523585319519,
      "learning_rate": 5.651377476723576e-06,
      "loss": 0.018,
      "step": 1339920
    },
    {
      "epoch": 2.192841198457742,
      "grad_norm": 0.33989498019218445,
      "learning_rate": 5.651311584510059e-06,
      "loss": 0.0192,
      "step": 1339940
    },
    {
      "epoch": 2.192873928896395,
      "grad_norm": 1.3586041927337646,
      "learning_rate": 5.651245692296542e-06,
      "loss": 0.0172,
      "step": 1339960
    },
    {
      "epoch": 2.1929066593350486,
      "grad_norm": 0.2867638170719147,
      "learning_rate": 5.651179800083024e-06,
      "loss": 0.0177,
      "step": 1339980
    },
    {
      "epoch": 2.1929393897737017,
      "grad_norm": 0.21196003258228302,
      "learning_rate": 5.651113907869508e-06,
      "loss": 0.0147,
      "step": 1340000
    },
    {
      "epoch": 2.192972120212355,
      "grad_norm": 0.12071225047111511,
      "learning_rate": 5.65104801565599e-06,
      "loss": 0.0161,
      "step": 1340020
    },
    {
      "epoch": 2.1930048506510085,
      "grad_norm": 0.28984159231185913,
      "learning_rate": 5.650982123442473e-06,
      "loss": 0.0183,
      "step": 1340040
    },
    {
      "epoch": 2.1930375810896616,
      "grad_norm": 0.8117527365684509,
      "learning_rate": 5.650916231228955e-06,
      "loss": 0.0146,
      "step": 1340060
    },
    {
      "epoch": 2.193070311528315,
      "grad_norm": 0.9552132487297058,
      "learning_rate": 5.650850339015439e-06,
      "loss": 0.0112,
      "step": 1340080
    },
    {
      "epoch": 2.1931030419669684,
      "grad_norm": 0.367378294467926,
      "learning_rate": 5.650784446801922e-06,
      "loss": 0.0199,
      "step": 1340100
    },
    {
      "epoch": 2.193135772405622,
      "grad_norm": 0.15031243860721588,
      "learning_rate": 5.650718554588404e-06,
      "loss": 0.0102,
      "step": 1340120
    },
    {
      "epoch": 2.193168502844275,
      "grad_norm": 0.20892763137817383,
      "learning_rate": 5.650652662374888e-06,
      "loss": 0.0134,
      "step": 1340140
    },
    {
      "epoch": 2.1932012332829283,
      "grad_norm": 1.4546878337860107,
      "learning_rate": 5.650586770161371e-06,
      "loss": 0.01,
      "step": 1340160
    },
    {
      "epoch": 2.193233963721582,
      "grad_norm": 0.6134517788887024,
      "learning_rate": 5.650520877947853e-06,
      "loss": 0.0195,
      "step": 1340180
    },
    {
      "epoch": 2.193266694160235,
      "grad_norm": 0.30961787700653076,
      "learning_rate": 5.650454985734336e-06,
      "loss": 0.0109,
      "step": 1340200
    },
    {
      "epoch": 2.1932994245988886,
      "grad_norm": 0.6312559843063354,
      "learning_rate": 5.65038909352082e-06,
      "loss": 0.0146,
      "step": 1340220
    },
    {
      "epoch": 2.1933321550375418,
      "grad_norm": 0.16911861300468445,
      "learning_rate": 5.6503232013073016e-06,
      "loss": 0.0138,
      "step": 1340240
    },
    {
      "epoch": 2.1933648854761953,
      "grad_norm": 0.9456941485404968,
      "learning_rate": 5.650257309093785e-06,
      "loss": 0.0113,
      "step": 1340260
    },
    {
      "epoch": 2.1933976159148485,
      "grad_norm": 0.1626662313938141,
      "learning_rate": 5.650191416880267e-06,
      "loss": 0.0147,
      "step": 1340280
    },
    {
      "epoch": 2.1934303463535016,
      "grad_norm": 0.4957112669944763,
      "learning_rate": 5.650125524666751e-06,
      "loss": 0.0138,
      "step": 1340300
    },
    {
      "epoch": 2.1934630767921552,
      "grad_norm": 0.8423811197280884,
      "learning_rate": 5.650059632453233e-06,
      "loss": 0.0136,
      "step": 1340320
    },
    {
      "epoch": 2.1934958072308084,
      "grad_norm": 0.4031495749950409,
      "learning_rate": 5.649993740239716e-06,
      "loss": 0.0124,
      "step": 1340340
    },
    {
      "epoch": 2.193528537669462,
      "grad_norm": 0.35284024477005005,
      "learning_rate": 5.649927848026199e-06,
      "loss": 0.0164,
      "step": 1340360
    },
    {
      "epoch": 2.193561268108115,
      "grad_norm": 0.3369750678539276,
      "learning_rate": 5.6498619558126824e-06,
      "loss": 0.0138,
      "step": 1340380
    },
    {
      "epoch": 2.1935939985467687,
      "grad_norm": 0.509175717830658,
      "learning_rate": 5.649796063599164e-06,
      "loss": 0.0261,
      "step": 1340400
    },
    {
      "epoch": 2.193626728985422,
      "grad_norm": 0.4158511757850647,
      "learning_rate": 5.649730171385648e-06,
      "loss": 0.0151,
      "step": 1340420
    },
    {
      "epoch": 2.193659459424075,
      "grad_norm": 0.31493905186653137,
      "learning_rate": 5.6496642791721315e-06,
      "loss": 0.0142,
      "step": 1340440
    },
    {
      "epoch": 2.1936921898627286,
      "grad_norm": 0.9058353900909424,
      "learning_rate": 5.649598386958613e-06,
      "loss": 0.0126,
      "step": 1340460
    },
    {
      "epoch": 2.1937249203013818,
      "grad_norm": 0.5146298408508301,
      "learning_rate": 5.649532494745097e-06,
      "loss": 0.0159,
      "step": 1340480
    },
    {
      "epoch": 2.1937576507400354,
      "grad_norm": 1.7127902507781982,
      "learning_rate": 5.649466602531579e-06,
      "loss": 0.0129,
      "step": 1340500
    },
    {
      "epoch": 2.1937903811786885,
      "grad_norm": 0.9666693806648254,
      "learning_rate": 5.6494007103180625e-06,
      "loss": 0.0164,
      "step": 1340520
    },
    {
      "epoch": 2.193823111617342,
      "grad_norm": 0.3711962401866913,
      "learning_rate": 5.649334818104545e-06,
      "loss": 0.0129,
      "step": 1340540
    },
    {
      "epoch": 2.1938558420559953,
      "grad_norm": 0.43480241298675537,
      "learning_rate": 5.649268925891028e-06,
      "loss": 0.0186,
      "step": 1340560
    },
    {
      "epoch": 2.1938885724946484,
      "grad_norm": 0.5108687877655029,
      "learning_rate": 5.649203033677511e-06,
      "loss": 0.0148,
      "step": 1340580
    },
    {
      "epoch": 2.193921302933302,
      "grad_norm": 0.6608577370643616,
      "learning_rate": 5.649137141463994e-06,
      "loss": 0.0111,
      "step": 1340600
    },
    {
      "epoch": 2.193954033371955,
      "grad_norm": 0.41812101006507874,
      "learning_rate": 5.649071249250476e-06,
      "loss": 0.0161,
      "step": 1340620
    },
    {
      "epoch": 2.1939867638106088,
      "grad_norm": 0.46771669387817383,
      "learning_rate": 5.64900535703696e-06,
      "loss": 0.0144,
      "step": 1340640
    },
    {
      "epoch": 2.194019494249262,
      "grad_norm": 0.21895615756511688,
      "learning_rate": 5.648939464823442e-06,
      "loss": 0.0093,
      "step": 1340660
    },
    {
      "epoch": 2.1940522246879155,
      "grad_norm": 2.6563544273376465,
      "learning_rate": 5.648873572609925e-06,
      "loss": 0.0169,
      "step": 1340680
    },
    {
      "epoch": 2.1940849551265686,
      "grad_norm": 0.8559916615486145,
      "learning_rate": 5.648807680396408e-06,
      "loss": 0.0221,
      "step": 1340700
    },
    {
      "epoch": 2.194117685565222,
      "grad_norm": 0.17614440619945526,
      "learning_rate": 5.648741788182891e-06,
      "loss": 0.0112,
      "step": 1340720
    },
    {
      "epoch": 2.1941504160038754,
      "grad_norm": 0.21509084105491638,
      "learning_rate": 5.6486758959693734e-06,
      "loss": 0.0154,
      "step": 1340740
    },
    {
      "epoch": 2.1941831464425285,
      "grad_norm": 0.5171774625778198,
      "learning_rate": 5.648610003755857e-06,
      "loss": 0.02,
      "step": 1340760
    },
    {
      "epoch": 2.194215876881182,
      "grad_norm": 0.4536225199699402,
      "learning_rate": 5.64854411154234e-06,
      "loss": 0.0124,
      "step": 1340780
    },
    {
      "epoch": 2.1942486073198353,
      "grad_norm": 0.0475572869181633,
      "learning_rate": 5.6484782193288225e-06,
      "loss": 0.0134,
      "step": 1340800
    },
    {
      "epoch": 2.1942813377584884,
      "grad_norm": 0.28512051701545715,
      "learning_rate": 5.648412327115306e-06,
      "loss": 0.012,
      "step": 1340820
    },
    {
      "epoch": 2.194314068197142,
      "grad_norm": 0.6475895643234253,
      "learning_rate": 5.648346434901788e-06,
      "loss": 0.0177,
      "step": 1340840
    },
    {
      "epoch": 2.194346798635795,
      "grad_norm": 0.2712198495864868,
      "learning_rate": 5.6482805426882716e-06,
      "loss": 0.0122,
      "step": 1340860
    },
    {
      "epoch": 2.1943795290744488,
      "grad_norm": 1.360346794128418,
      "learning_rate": 5.6482146504747535e-06,
      "loss": 0.0251,
      "step": 1340880
    },
    {
      "epoch": 2.194412259513102,
      "grad_norm": 0.2128538191318512,
      "learning_rate": 5.648148758261237e-06,
      "loss": 0.0173,
      "step": 1340900
    },
    {
      "epoch": 2.1944449899517555,
      "grad_norm": 0.7228176593780518,
      "learning_rate": 5.64808286604772e-06,
      "loss": 0.0129,
      "step": 1340920
    },
    {
      "epoch": 2.1944777203904087,
      "grad_norm": 0.7648135423660278,
      "learning_rate": 5.6480169738342025e-06,
      "loss": 0.014,
      "step": 1340940
    },
    {
      "epoch": 2.194510450829062,
      "grad_norm": 0.4432433843612671,
      "learning_rate": 5.647951081620685e-06,
      "loss": 0.0116,
      "step": 1340960
    },
    {
      "epoch": 2.1945431812677154,
      "grad_norm": 1.004770040512085,
      "learning_rate": 5.647885189407169e-06,
      "loss": 0.0188,
      "step": 1340980
    },
    {
      "epoch": 2.1945759117063686,
      "grad_norm": 0.2521687150001526,
      "learning_rate": 5.647819297193651e-06,
      "loss": 0.0208,
      "step": 1341000
    },
    {
      "epoch": 2.194608642145022,
      "grad_norm": 0.48158225417137146,
      "learning_rate": 5.647753404980134e-06,
      "loss": 0.0144,
      "step": 1341020
    },
    {
      "epoch": 2.1946413725836753,
      "grad_norm": 0.18436719477176666,
      "learning_rate": 5.647687512766616e-06,
      "loss": 0.0127,
      "step": 1341040
    },
    {
      "epoch": 2.194674103022329,
      "grad_norm": 0.344044953584671,
      "learning_rate": 5.6476216205531e-06,
      "loss": 0.0106,
      "step": 1341060
    },
    {
      "epoch": 2.194706833460982,
      "grad_norm": 0.256082683801651,
      "learning_rate": 5.647555728339582e-06,
      "loss": 0.0153,
      "step": 1341080
    },
    {
      "epoch": 2.194739563899635,
      "grad_norm": 0.15813048183918,
      "learning_rate": 5.647489836126065e-06,
      "loss": 0.0099,
      "step": 1341100
    },
    {
      "epoch": 2.194772294338289,
      "grad_norm": 0.3491765260696411,
      "learning_rate": 5.647423943912548e-06,
      "loss": 0.0154,
      "step": 1341120
    },
    {
      "epoch": 2.194805024776942,
      "grad_norm": 0.7035876512527466,
      "learning_rate": 5.647358051699031e-06,
      "loss": 0.0135,
      "step": 1341140
    },
    {
      "epoch": 2.1948377552155955,
      "grad_norm": 0.18010075390338898,
      "learning_rate": 5.647292159485514e-06,
      "loss": 0.0164,
      "step": 1341160
    },
    {
      "epoch": 2.1948704856542487,
      "grad_norm": 0.21497753262519836,
      "learning_rate": 5.647226267271997e-06,
      "loss": 0.0173,
      "step": 1341180
    },
    {
      "epoch": 2.194903216092902,
      "grad_norm": 0.43461504578590393,
      "learning_rate": 5.64716037505848e-06,
      "loss": 0.0122,
      "step": 1341200
    },
    {
      "epoch": 2.1949359465315554,
      "grad_norm": 0.19125506281852722,
      "learning_rate": 5.647094482844963e-06,
      "loss": 0.0148,
      "step": 1341220
    },
    {
      "epoch": 2.1949686769702086,
      "grad_norm": 0.7947219014167786,
      "learning_rate": 5.647028590631446e-06,
      "loss": 0.0156,
      "step": 1341240
    },
    {
      "epoch": 2.195001407408862,
      "grad_norm": 0.18371738493442535,
      "learning_rate": 5.646962698417928e-06,
      "loss": 0.0141,
      "step": 1341260
    },
    {
      "epoch": 2.1950341378475153,
      "grad_norm": 1.3077586889266968,
      "learning_rate": 5.646896806204412e-06,
      "loss": 0.0216,
      "step": 1341280
    },
    {
      "epoch": 2.195066868286169,
      "grad_norm": 0.39037811756134033,
      "learning_rate": 5.6468309139908935e-06,
      "loss": 0.0163,
      "step": 1341300
    },
    {
      "epoch": 2.195099598724822,
      "grad_norm": 0.16822472214698792,
      "learning_rate": 5.646765021777377e-06,
      "loss": 0.0171,
      "step": 1341320
    },
    {
      "epoch": 2.1951323291634752,
      "grad_norm": 0.37847772240638733,
      "learning_rate": 5.64669912956386e-06,
      "loss": 0.0122,
      "step": 1341340
    },
    {
      "epoch": 2.195165059602129,
      "grad_norm": 0.7387564778327942,
      "learning_rate": 5.646633237350343e-06,
      "loss": 0.0133,
      "step": 1341360
    },
    {
      "epoch": 2.195197790040782,
      "grad_norm": 0.7911357879638672,
      "learning_rate": 5.646567345136825e-06,
      "loss": 0.0179,
      "step": 1341380
    },
    {
      "epoch": 2.1952305204794356,
      "grad_norm": 0.9950219392776489,
      "learning_rate": 5.646501452923309e-06,
      "loss": 0.0152,
      "step": 1341400
    },
    {
      "epoch": 2.1952632509180887,
      "grad_norm": 0.25097277760505676,
      "learning_rate": 5.646435560709791e-06,
      "loss": 0.0169,
      "step": 1341420
    },
    {
      "epoch": 2.1952959813567423,
      "grad_norm": 0.2700345516204834,
      "learning_rate": 5.646369668496274e-06,
      "loss": 0.0143,
      "step": 1341440
    },
    {
      "epoch": 2.1953287117953955,
      "grad_norm": 0.2705685794353485,
      "learning_rate": 5.646303776282756e-06,
      "loss": 0.017,
      "step": 1341460
    },
    {
      "epoch": 2.1953614422340486,
      "grad_norm": 0.5874972343444824,
      "learning_rate": 5.64623788406924e-06,
      "loss": 0.0186,
      "step": 1341480
    },
    {
      "epoch": 2.195394172672702,
      "grad_norm": 0.44520625472068787,
      "learning_rate": 5.6461719918557235e-06,
      "loss": 0.0147,
      "step": 1341500
    },
    {
      "epoch": 2.1954269031113554,
      "grad_norm": 0.0963483527302742,
      "learning_rate": 5.646106099642205e-06,
      "loss": 0.0187,
      "step": 1341520
    },
    {
      "epoch": 2.195459633550009,
      "grad_norm": 0.1601199060678482,
      "learning_rate": 5.646040207428689e-06,
      "loss": 0.0183,
      "step": 1341540
    },
    {
      "epoch": 2.195492363988662,
      "grad_norm": 1.2433091402053833,
      "learning_rate": 5.645974315215172e-06,
      "loss": 0.0212,
      "step": 1341560
    },
    {
      "epoch": 2.1955250944273157,
      "grad_norm": 0.41791924834251404,
      "learning_rate": 5.6459084230016544e-06,
      "loss": 0.0139,
      "step": 1341580
    },
    {
      "epoch": 2.195557824865969,
      "grad_norm": 1.2280489206314087,
      "learning_rate": 5.645842530788137e-06,
      "loss": 0.0105,
      "step": 1341600
    },
    {
      "epoch": 2.195590555304622,
      "grad_norm": 0.33200448751449585,
      "learning_rate": 5.645776638574621e-06,
      "loss": 0.0202,
      "step": 1341620
    },
    {
      "epoch": 2.1956232857432756,
      "grad_norm": 0.6321806907653809,
      "learning_rate": 5.645710746361103e-06,
      "loss": 0.0156,
      "step": 1341640
    },
    {
      "epoch": 2.1956560161819287,
      "grad_norm": 0.1420036405324936,
      "learning_rate": 5.645644854147586e-06,
      "loss": 0.0183,
      "step": 1341660
    },
    {
      "epoch": 2.1956887466205823,
      "grad_norm": 0.17557530105113983,
      "learning_rate": 5.645578961934068e-06,
      "loss": 0.0138,
      "step": 1341680
    },
    {
      "epoch": 2.1957214770592355,
      "grad_norm": 0.20513829588890076,
      "learning_rate": 5.645513069720552e-06,
      "loss": 0.0119,
      "step": 1341700
    },
    {
      "epoch": 2.195754207497889,
      "grad_norm": 1.1761703491210938,
      "learning_rate": 5.6454471775070345e-06,
      "loss": 0.0185,
      "step": 1341720
    },
    {
      "epoch": 2.1957869379365422,
      "grad_norm": 1.101677656173706,
      "learning_rate": 5.645381285293517e-06,
      "loss": 0.0178,
      "step": 1341740
    },
    {
      "epoch": 2.1958196683751954,
      "grad_norm": 0.5301182270050049,
      "learning_rate": 5.64531539308e-06,
      "loss": 0.0125,
      "step": 1341760
    },
    {
      "epoch": 2.195852398813849,
      "grad_norm": 0.21593648195266724,
      "learning_rate": 5.6452495008664835e-06,
      "loss": 0.0144,
      "step": 1341780
    },
    {
      "epoch": 2.195885129252502,
      "grad_norm": 0.3233393132686615,
      "learning_rate": 5.6451836086529654e-06,
      "loss": 0.0125,
      "step": 1341800
    },
    {
      "epoch": 2.1959178596911557,
      "grad_norm": 0.2738126218318939,
      "learning_rate": 5.645117716439449e-06,
      "loss": 0.0139,
      "step": 1341820
    },
    {
      "epoch": 2.195950590129809,
      "grad_norm": 0.30823180079460144,
      "learning_rate": 5.645051824225933e-06,
      "loss": 0.0197,
      "step": 1341840
    },
    {
      "epoch": 2.1959833205684625,
      "grad_norm": 0.36600902676582336,
      "learning_rate": 5.6449859320124145e-06,
      "loss": 0.012,
      "step": 1341860
    },
    {
      "epoch": 2.1960160510071156,
      "grad_norm": 0.7667341828346252,
      "learning_rate": 5.644920039798898e-06,
      "loss": 0.0155,
      "step": 1341880
    },
    {
      "epoch": 2.1960487814457688,
      "grad_norm": 0.29571229219436646,
      "learning_rate": 5.64485414758538e-06,
      "loss": 0.0151,
      "step": 1341900
    },
    {
      "epoch": 2.1960815118844224,
      "grad_norm": 0.36449429392814636,
      "learning_rate": 5.6447882553718636e-06,
      "loss": 0.0112,
      "step": 1341920
    },
    {
      "epoch": 2.1961142423230755,
      "grad_norm": 0.9129580855369568,
      "learning_rate": 5.644722363158346e-06,
      "loss": 0.0103,
      "step": 1341940
    },
    {
      "epoch": 2.196146972761729,
      "grad_norm": 0.3381003141403198,
      "learning_rate": 5.644656470944829e-06,
      "loss": 0.0178,
      "step": 1341960
    },
    {
      "epoch": 2.1961797032003822,
      "grad_norm": 0.6094207167625427,
      "learning_rate": 5.644590578731312e-06,
      "loss": 0.017,
      "step": 1341980
    },
    {
      "epoch": 2.196212433639036,
      "grad_norm": 0.39948928356170654,
      "learning_rate": 5.644524686517795e-06,
      "loss": 0.0199,
      "step": 1342000
    },
    {
      "epoch": 2.196245164077689,
      "grad_norm": 1.0578669309616089,
      "learning_rate": 5.644458794304277e-06,
      "loss": 0.0219,
      "step": 1342020
    },
    {
      "epoch": 2.196277894516342,
      "grad_norm": 0.3725486099720001,
      "learning_rate": 5.644392902090761e-06,
      "loss": 0.0108,
      "step": 1342040
    },
    {
      "epoch": 2.1963106249549957,
      "grad_norm": 0.2460155338048935,
      "learning_rate": 5.644327009877243e-06,
      "loss": 0.0116,
      "step": 1342060
    },
    {
      "epoch": 2.196343355393649,
      "grad_norm": 0.4763241112232208,
      "learning_rate": 5.644261117663726e-06,
      "loss": 0.0153,
      "step": 1342080
    },
    {
      "epoch": 2.1963760858323025,
      "grad_norm": 0.2284816950559616,
      "learning_rate": 5.644195225450208e-06,
      "loss": 0.0131,
      "step": 1342100
    },
    {
      "epoch": 2.1964088162709556,
      "grad_norm": 0.09959890693426132,
      "learning_rate": 5.644129333236692e-06,
      "loss": 0.01,
      "step": 1342120
    },
    {
      "epoch": 2.1964415467096092,
      "grad_norm": 0.5697330832481384,
      "learning_rate": 5.6440634410231745e-06,
      "loss": 0.0207,
      "step": 1342140
    },
    {
      "epoch": 2.1964742771482624,
      "grad_norm": 0.6258646249771118,
      "learning_rate": 5.643997548809657e-06,
      "loss": 0.0152,
      "step": 1342160
    },
    {
      "epoch": 2.1965070075869155,
      "grad_norm": 0.654413104057312,
      "learning_rate": 5.64393165659614e-06,
      "loss": 0.0131,
      "step": 1342180
    },
    {
      "epoch": 2.196539738025569,
      "grad_norm": 0.8908706307411194,
      "learning_rate": 5.643865764382624e-06,
      "loss": 0.0148,
      "step": 1342200
    },
    {
      "epoch": 2.1965724684642223,
      "grad_norm": 0.16099382936954498,
      "learning_rate": 5.643799872169106e-06,
      "loss": 0.0135,
      "step": 1342220
    },
    {
      "epoch": 2.196605198902876,
      "grad_norm": 0.673677384853363,
      "learning_rate": 5.643733979955589e-06,
      "loss": 0.018,
      "step": 1342240
    },
    {
      "epoch": 2.196637929341529,
      "grad_norm": 0.41597452759742737,
      "learning_rate": 5.643668087742073e-06,
      "loss": 0.0164,
      "step": 1342260
    },
    {
      "epoch": 2.196670659780182,
      "grad_norm": 0.49332791566848755,
      "learning_rate": 5.6436021955285546e-06,
      "loss": 0.0174,
      "step": 1342280
    },
    {
      "epoch": 2.1967033902188358,
      "grad_norm": 0.17817339301109314,
      "learning_rate": 5.643536303315038e-06,
      "loss": 0.0143,
      "step": 1342300
    },
    {
      "epoch": 2.196736120657489,
      "grad_norm": 0.7303518652915955,
      "learning_rate": 5.64347041110152e-06,
      "loss": 0.0249,
      "step": 1342320
    },
    {
      "epoch": 2.1967688510961425,
      "grad_norm": 0.40031784772872925,
      "learning_rate": 5.643404518888004e-06,
      "loss": 0.0221,
      "step": 1342340
    },
    {
      "epoch": 2.1968015815347957,
      "grad_norm": 0.2964209318161011,
      "learning_rate": 5.643338626674486e-06,
      "loss": 0.0163,
      "step": 1342360
    },
    {
      "epoch": 2.1968343119734492,
      "grad_norm": 0.19086498022079468,
      "learning_rate": 5.643272734460969e-06,
      "loss": 0.0115,
      "step": 1342380
    },
    {
      "epoch": 2.1968670424121024,
      "grad_norm": 0.15016621351242065,
      "learning_rate": 5.643206842247452e-06,
      "loss": 0.0202,
      "step": 1342400
    },
    {
      "epoch": 2.1968997728507556,
      "grad_norm": 0.3504992127418518,
      "learning_rate": 5.6431409500339354e-06,
      "loss": 0.0194,
      "step": 1342420
    },
    {
      "epoch": 2.196932503289409,
      "grad_norm": 0.26018330454826355,
      "learning_rate": 5.643075057820417e-06,
      "loss": 0.0171,
      "step": 1342440
    },
    {
      "epoch": 2.1969652337280623,
      "grad_norm": 0.3790872395038605,
      "learning_rate": 5.643009165606901e-06,
      "loss": 0.0126,
      "step": 1342460
    },
    {
      "epoch": 2.196997964166716,
      "grad_norm": 0.3156169652938843,
      "learning_rate": 5.642943273393383e-06,
      "loss": 0.0135,
      "step": 1342480
    },
    {
      "epoch": 2.197030694605369,
      "grad_norm": 0.3016720116138458,
      "learning_rate": 5.642877381179866e-06,
      "loss": 0.0125,
      "step": 1342500
    },
    {
      "epoch": 2.1970634250440226,
      "grad_norm": 0.21293027698993683,
      "learning_rate": 5.642811488966349e-06,
      "loss": 0.0148,
      "step": 1342520
    },
    {
      "epoch": 2.197096155482676,
      "grad_norm": 0.3445820212364197,
      "learning_rate": 5.642745596752832e-06,
      "loss": 0.0241,
      "step": 1342540
    },
    {
      "epoch": 2.197128885921329,
      "grad_norm": 0.3630574345588684,
      "learning_rate": 5.6426797045393155e-06,
      "loss": 0.0127,
      "step": 1342560
    },
    {
      "epoch": 2.1971616163599825,
      "grad_norm": 0.4568313658237457,
      "learning_rate": 5.642613812325798e-06,
      "loss": 0.0212,
      "step": 1342580
    },
    {
      "epoch": 2.1971943467986357,
      "grad_norm": 0.41365164518356323,
      "learning_rate": 5.642547920112281e-06,
      "loss": 0.0141,
      "step": 1342600
    },
    {
      "epoch": 2.1972270772372893,
      "grad_norm": 0.6321372389793396,
      "learning_rate": 5.642482027898764e-06,
      "loss": 0.0089,
      "step": 1342620
    },
    {
      "epoch": 2.1972598076759424,
      "grad_norm": 0.38338062167167664,
      "learning_rate": 5.642416135685247e-06,
      "loss": 0.0177,
      "step": 1342640
    },
    {
      "epoch": 2.197292538114596,
      "grad_norm": 0.9116237163543701,
      "learning_rate": 5.642350243471729e-06,
      "loss": 0.0218,
      "step": 1342660
    },
    {
      "epoch": 2.197325268553249,
      "grad_norm": 0.6605955362319946,
      "learning_rate": 5.642284351258213e-06,
      "loss": 0.0145,
      "step": 1342680
    },
    {
      "epoch": 2.1973579989919023,
      "grad_norm": 0.29903799295425415,
      "learning_rate": 5.642218459044695e-06,
      "loss": 0.0163,
      "step": 1342700
    },
    {
      "epoch": 2.197390729430556,
      "grad_norm": 0.2782547175884247,
      "learning_rate": 5.642152566831178e-06,
      "loss": 0.0162,
      "step": 1342720
    },
    {
      "epoch": 2.197423459869209,
      "grad_norm": 0.2588191330432892,
      "learning_rate": 5.642086674617661e-06,
      "loss": 0.017,
      "step": 1342740
    },
    {
      "epoch": 2.1974561903078627,
      "grad_norm": 0.5581550002098083,
      "learning_rate": 5.642020782404144e-06,
      "loss": 0.0159,
      "step": 1342760
    },
    {
      "epoch": 2.197488920746516,
      "grad_norm": 0.29956862330436707,
      "learning_rate": 5.6419548901906264e-06,
      "loss": 0.0153,
      "step": 1342780
    },
    {
      "epoch": 2.197521651185169,
      "grad_norm": 0.20775209367275238,
      "learning_rate": 5.64188899797711e-06,
      "loss": 0.0134,
      "step": 1342800
    },
    {
      "epoch": 2.1975543816238226,
      "grad_norm": 0.3934074938297272,
      "learning_rate": 5.641823105763592e-06,
      "loss": 0.0136,
      "step": 1342820
    },
    {
      "epoch": 2.1975871120624757,
      "grad_norm": 2.092007875442505,
      "learning_rate": 5.6417572135500755e-06,
      "loss": 0.0138,
      "step": 1342840
    },
    {
      "epoch": 2.1976198425011293,
      "grad_norm": 0.14078113436698914,
      "learning_rate": 5.641691321336557e-06,
      "loss": 0.0148,
      "step": 1342860
    },
    {
      "epoch": 2.1976525729397824,
      "grad_norm": 0.2919853627681732,
      "learning_rate": 5.641625429123041e-06,
      "loss": 0.0139,
      "step": 1342880
    },
    {
      "epoch": 2.197685303378436,
      "grad_norm": 0.7609093189239502,
      "learning_rate": 5.641559536909525e-06,
      "loss": 0.0217,
      "step": 1342900
    },
    {
      "epoch": 2.197718033817089,
      "grad_norm": 0.21491123735904694,
      "learning_rate": 5.6414936446960065e-06,
      "loss": 0.0148,
      "step": 1342920
    },
    {
      "epoch": 2.1977507642557423,
      "grad_norm": 1.419918417930603,
      "learning_rate": 5.64142775248249e-06,
      "loss": 0.0133,
      "step": 1342940
    },
    {
      "epoch": 2.197783494694396,
      "grad_norm": 0.03954360634088516,
      "learning_rate": 5.641361860268972e-06,
      "loss": 0.0094,
      "step": 1342960
    },
    {
      "epoch": 2.197816225133049,
      "grad_norm": 0.3655620813369751,
      "learning_rate": 5.6412959680554555e-06,
      "loss": 0.0186,
      "step": 1342980
    },
    {
      "epoch": 2.1978489555717027,
      "grad_norm": 0.0844016969203949,
      "learning_rate": 5.641230075841938e-06,
      "loss": 0.0162,
      "step": 1343000
    },
    {
      "epoch": 2.197881686010356,
      "grad_norm": 0.3620555102825165,
      "learning_rate": 5.641164183628421e-06,
      "loss": 0.0259,
      "step": 1343020
    },
    {
      "epoch": 2.1979144164490094,
      "grad_norm": 0.5994052886962891,
      "learning_rate": 5.641098291414904e-06,
      "loss": 0.0148,
      "step": 1343040
    },
    {
      "epoch": 2.1979471468876626,
      "grad_norm": 0.6592683792114258,
      "learning_rate": 5.641032399201387e-06,
      "loss": 0.0103,
      "step": 1343060
    },
    {
      "epoch": 2.1979798773263157,
      "grad_norm": 0.2840866148471832,
      "learning_rate": 5.640966506987869e-06,
      "loss": 0.0132,
      "step": 1343080
    },
    {
      "epoch": 2.1980126077649693,
      "grad_norm": 0.4586881697177887,
      "learning_rate": 5.640900614774353e-06,
      "loss": 0.0138,
      "step": 1343100
    },
    {
      "epoch": 2.1980453382036225,
      "grad_norm": 0.23298904299736023,
      "learning_rate": 5.640834722560835e-06,
      "loss": 0.0181,
      "step": 1343120
    },
    {
      "epoch": 2.198078068642276,
      "grad_norm": 0.425693541765213,
      "learning_rate": 5.640768830347318e-06,
      "loss": 0.0115,
      "step": 1343140
    },
    {
      "epoch": 2.198110799080929,
      "grad_norm": 0.2562859356403351,
      "learning_rate": 5.640702938133801e-06,
      "loss": 0.0171,
      "step": 1343160
    },
    {
      "epoch": 2.198143529519583,
      "grad_norm": 0.3246111571788788,
      "learning_rate": 5.640637045920284e-06,
      "loss": 0.0168,
      "step": 1343180
    },
    {
      "epoch": 2.198176259958236,
      "grad_norm": 0.17016513645648956,
      "learning_rate": 5.6405711537067665e-06,
      "loss": 0.0109,
      "step": 1343200
    },
    {
      "epoch": 2.198208990396889,
      "grad_norm": 0.3321998119354248,
      "learning_rate": 5.64050526149325e-06,
      "loss": 0.0217,
      "step": 1343220
    },
    {
      "epoch": 2.1982417208355427,
      "grad_norm": 0.345129132270813,
      "learning_rate": 5.640439369279732e-06,
      "loss": 0.0168,
      "step": 1343240
    },
    {
      "epoch": 2.198274451274196,
      "grad_norm": 1.4462379217147827,
      "learning_rate": 5.640373477066216e-06,
      "loss": 0.0247,
      "step": 1343260
    },
    {
      "epoch": 2.1983071817128494,
      "grad_norm": 0.25365862250328064,
      "learning_rate": 5.640307584852699e-06,
      "loss": 0.0211,
      "step": 1343280
    },
    {
      "epoch": 2.1983399121515026,
      "grad_norm": 0.2872392237186432,
      "learning_rate": 5.640241692639181e-06,
      "loss": 0.0116,
      "step": 1343300
    },
    {
      "epoch": 2.198372642590156,
      "grad_norm": 0.4626656770706177,
      "learning_rate": 5.640175800425665e-06,
      "loss": 0.0177,
      "step": 1343320
    },
    {
      "epoch": 2.1984053730288093,
      "grad_norm": 1.004503607749939,
      "learning_rate": 5.6401099082121466e-06,
      "loss": 0.0124,
      "step": 1343340
    },
    {
      "epoch": 2.1984381034674625,
      "grad_norm": 0.26418337225914,
      "learning_rate": 5.64004401599863e-06,
      "loss": 0.0164,
      "step": 1343360
    },
    {
      "epoch": 2.198470833906116,
      "grad_norm": 0.21177345514297485,
      "learning_rate": 5.639978123785113e-06,
      "loss": 0.0134,
      "step": 1343380
    },
    {
      "epoch": 2.1985035643447692,
      "grad_norm": 0.6345236301422119,
      "learning_rate": 5.639912231571596e-06,
      "loss": 0.0135,
      "step": 1343400
    },
    {
      "epoch": 2.198536294783423,
      "grad_norm": 0.23031091690063477,
      "learning_rate": 5.639846339358078e-06,
      "loss": 0.0102,
      "step": 1343420
    },
    {
      "epoch": 2.198569025222076,
      "grad_norm": 0.21431593596935272,
      "learning_rate": 5.639780447144562e-06,
      "loss": 0.0114,
      "step": 1343440
    },
    {
      "epoch": 2.1986017556607296,
      "grad_norm": 1.0892252922058105,
      "learning_rate": 5.639714554931044e-06,
      "loss": 0.0195,
      "step": 1343460
    },
    {
      "epoch": 2.1986344860993827,
      "grad_norm": 0.23044103384017944,
      "learning_rate": 5.639648662717527e-06,
      "loss": 0.0087,
      "step": 1343480
    },
    {
      "epoch": 2.198667216538036,
      "grad_norm": 0.36176857352256775,
      "learning_rate": 5.639582770504009e-06,
      "loss": 0.01,
      "step": 1343500
    },
    {
      "epoch": 2.1986999469766895,
      "grad_norm": 0.2464163601398468,
      "learning_rate": 5.639516878290493e-06,
      "loss": 0.0119,
      "step": 1343520
    },
    {
      "epoch": 2.1987326774153426,
      "grad_norm": 0.18792691826820374,
      "learning_rate": 5.639450986076976e-06,
      "loss": 0.0148,
      "step": 1343540
    },
    {
      "epoch": 2.198765407853996,
      "grad_norm": 0.38575759530067444,
      "learning_rate": 5.639385093863458e-06,
      "loss": 0.0185,
      "step": 1343560
    },
    {
      "epoch": 2.1987981382926494,
      "grad_norm": 0.18596972525119781,
      "learning_rate": 5.639319201649941e-06,
      "loss": 0.0149,
      "step": 1343580
    },
    {
      "epoch": 2.198830868731303,
      "grad_norm": 0.6153697967529297,
      "learning_rate": 5.639253309436425e-06,
      "loss": 0.0157,
      "step": 1343600
    },
    {
      "epoch": 2.198863599169956,
      "grad_norm": 0.2113116830587387,
      "learning_rate": 5.6391874172229074e-06,
      "loss": 0.0211,
      "step": 1343620
    },
    {
      "epoch": 2.1988963296086093,
      "grad_norm": 0.43902388215065,
      "learning_rate": 5.63912152500939e-06,
      "loss": 0.0164,
      "step": 1343640
    },
    {
      "epoch": 2.198929060047263,
      "grad_norm": 0.4203189015388489,
      "learning_rate": 5.639055632795874e-06,
      "loss": 0.0169,
      "step": 1343660
    },
    {
      "epoch": 2.198961790485916,
      "grad_norm": 0.5200994610786438,
      "learning_rate": 5.638989740582356e-06,
      "loss": 0.0172,
      "step": 1343680
    },
    {
      "epoch": 2.1989945209245696,
      "grad_norm": 0.25494441390037537,
      "learning_rate": 5.638923848368839e-06,
      "loss": 0.0109,
      "step": 1343700
    },
    {
      "epoch": 2.1990272513632227,
      "grad_norm": 1.3016985654830933,
      "learning_rate": 5.638857956155321e-06,
      "loss": 0.0255,
      "step": 1343720
    },
    {
      "epoch": 2.1990599818018763,
      "grad_norm": 0.45991700887680054,
      "learning_rate": 5.638792063941805e-06,
      "loss": 0.02,
      "step": 1343740
    },
    {
      "epoch": 2.1990927122405295,
      "grad_norm": 0.2745664417743683,
      "learning_rate": 5.6387261717282875e-06,
      "loss": 0.0151,
      "step": 1343760
    },
    {
      "epoch": 2.1991254426791826,
      "grad_norm": 0.324629008769989,
      "learning_rate": 5.63866027951477e-06,
      "loss": 0.0213,
      "step": 1343780
    },
    {
      "epoch": 2.1991581731178362,
      "grad_norm": 0.19614742696285248,
      "learning_rate": 5.638594387301253e-06,
      "loss": 0.0247,
      "step": 1343800
    },
    {
      "epoch": 2.1991909035564894,
      "grad_norm": 0.4900328516960144,
      "learning_rate": 5.6385284950877365e-06,
      "loss": 0.0183,
      "step": 1343820
    },
    {
      "epoch": 2.199223633995143,
      "grad_norm": 0.2787037789821625,
      "learning_rate": 5.6384626028742184e-06,
      "loss": 0.019,
      "step": 1343840
    },
    {
      "epoch": 2.199256364433796,
      "grad_norm": 0.24810408055782318,
      "learning_rate": 5.638396710660702e-06,
      "loss": 0.0154,
      "step": 1343860
    },
    {
      "epoch": 2.1992890948724493,
      "grad_norm": 0.6315301656723022,
      "learning_rate": 5.638330818447184e-06,
      "loss": 0.016,
      "step": 1343880
    },
    {
      "epoch": 2.199321825311103,
      "grad_norm": 0.6207274794578552,
      "learning_rate": 5.6382649262336675e-06,
      "loss": 0.0169,
      "step": 1343900
    },
    {
      "epoch": 2.199354555749756,
      "grad_norm": 0.8586488366127014,
      "learning_rate": 5.638199034020149e-06,
      "loss": 0.0123,
      "step": 1343920
    },
    {
      "epoch": 2.1993872861884096,
      "grad_norm": 0.2520168721675873,
      "learning_rate": 5.638133141806633e-06,
      "loss": 0.0118,
      "step": 1343940
    },
    {
      "epoch": 2.1994200166270628,
      "grad_norm": 0.5776855945587158,
      "learning_rate": 5.6380672495931166e-06,
      "loss": 0.0155,
      "step": 1343960
    },
    {
      "epoch": 2.1994527470657164,
      "grad_norm": 0.5537322759628296,
      "learning_rate": 5.6380013573795985e-06,
      "loss": 0.0148,
      "step": 1343980
    },
    {
      "epoch": 2.1994854775043695,
      "grad_norm": 0.6744436621665955,
      "learning_rate": 5.637935465166082e-06,
      "loss": 0.0128,
      "step": 1344000
    },
    {
      "epoch": 2.1995182079430227,
      "grad_norm": 0.2589836120605469,
      "learning_rate": 5.637869572952565e-06,
      "loss": 0.0135,
      "step": 1344020
    },
    {
      "epoch": 2.1995509383816763,
      "grad_norm": 0.06586384773254395,
      "learning_rate": 5.6378036807390475e-06,
      "loss": 0.0121,
      "step": 1344040
    },
    {
      "epoch": 2.1995836688203294,
      "grad_norm": 0.32863110303878784,
      "learning_rate": 5.63773778852553e-06,
      "loss": 0.0135,
      "step": 1344060
    },
    {
      "epoch": 2.199616399258983,
      "grad_norm": 0.5833021998405457,
      "learning_rate": 5.637671896312014e-06,
      "loss": 0.0148,
      "step": 1344080
    },
    {
      "epoch": 2.199649129697636,
      "grad_norm": 0.23021277785301208,
      "learning_rate": 5.637606004098496e-06,
      "loss": 0.0145,
      "step": 1344100
    },
    {
      "epoch": 2.1996818601362897,
      "grad_norm": 8.34653091430664,
      "learning_rate": 5.637540111884979e-06,
      "loss": 0.016,
      "step": 1344120
    },
    {
      "epoch": 2.199714590574943,
      "grad_norm": 0.14394180476665497,
      "learning_rate": 5.637474219671461e-06,
      "loss": 0.0104,
      "step": 1344140
    },
    {
      "epoch": 2.199747321013596,
      "grad_norm": 3.5585968494415283,
      "learning_rate": 5.637408327457945e-06,
      "loss": 0.0194,
      "step": 1344160
    },
    {
      "epoch": 2.1997800514522496,
      "grad_norm": 0.45513299107551575,
      "learning_rate": 5.6373424352444275e-06,
      "loss": 0.0182,
      "step": 1344180
    },
    {
      "epoch": 2.199812781890903,
      "grad_norm": 0.7400169968605042,
      "learning_rate": 5.63727654303091e-06,
      "loss": 0.0149,
      "step": 1344200
    },
    {
      "epoch": 2.1998455123295564,
      "grad_norm": 0.4696972370147705,
      "learning_rate": 5.637210650817393e-06,
      "loss": 0.0158,
      "step": 1344220
    },
    {
      "epoch": 2.1998782427682095,
      "grad_norm": 0.8856619596481323,
      "learning_rate": 5.637144758603877e-06,
      "loss": 0.0213,
      "step": 1344240
    },
    {
      "epoch": 2.1999109732068627,
      "grad_norm": 0.34200531244277954,
      "learning_rate": 5.6370788663903585e-06,
      "loss": 0.018,
      "step": 1344260
    },
    {
      "epoch": 2.1999437036455163,
      "grad_norm": 0.08067992329597473,
      "learning_rate": 5.637012974176842e-06,
      "loss": 0.0099,
      "step": 1344280
    },
    {
      "epoch": 2.1999764340841694,
      "grad_norm": 0.3977128565311432,
      "learning_rate": 5.636947081963326e-06,
      "loss": 0.0162,
      "step": 1344300
    },
    {
      "epoch": 2.200009164522823,
      "grad_norm": 0.4508129060268402,
      "learning_rate": 5.6368811897498076e-06,
      "loss": 0.0083,
      "step": 1344320
    },
    {
      "epoch": 2.200041894961476,
      "grad_norm": 3.9340403079986572,
      "learning_rate": 5.636815297536291e-06,
      "loss": 0.0162,
      "step": 1344340
    },
    {
      "epoch": 2.2000746254001298,
      "grad_norm": 0.17416058480739594,
      "learning_rate": 5.636749405322773e-06,
      "loss": 0.012,
      "step": 1344360
    },
    {
      "epoch": 2.200107355838783,
      "grad_norm": 0.2122243195772171,
      "learning_rate": 5.636683513109257e-06,
      "loss": 0.0157,
      "step": 1344380
    },
    {
      "epoch": 2.200140086277436,
      "grad_norm": 0.07004833221435547,
      "learning_rate": 5.636617620895739e-06,
      "loss": 0.0164,
      "step": 1344400
    },
    {
      "epoch": 2.2001728167160897,
      "grad_norm": 0.30632463097572327,
      "learning_rate": 5.636551728682222e-06,
      "loss": 0.0169,
      "step": 1344420
    },
    {
      "epoch": 2.200205547154743,
      "grad_norm": 0.18931658565998077,
      "learning_rate": 5.636485836468705e-06,
      "loss": 0.0138,
      "step": 1344440
    },
    {
      "epoch": 2.2002382775933964,
      "grad_norm": 0.12824083864688873,
      "learning_rate": 5.6364199442551884e-06,
      "loss": 0.0094,
      "step": 1344460
    },
    {
      "epoch": 2.2002710080320496,
      "grad_norm": 0.3510048985481262,
      "learning_rate": 5.63635405204167e-06,
      "loss": 0.0153,
      "step": 1344480
    },
    {
      "epoch": 2.200303738470703,
      "grad_norm": 0.6007193326950073,
      "learning_rate": 5.636288159828154e-06,
      "loss": 0.0163,
      "step": 1344500
    },
    {
      "epoch": 2.2003364689093563,
      "grad_norm": 1.0976433753967285,
      "learning_rate": 5.636222267614636e-06,
      "loss": 0.0186,
      "step": 1344520
    },
    {
      "epoch": 2.2003691993480095,
      "grad_norm": 1.0696122646331787,
      "learning_rate": 5.636156375401119e-06,
      "loss": 0.022,
      "step": 1344540
    },
    {
      "epoch": 2.200401929786663,
      "grad_norm": 0.060385484248399734,
      "learning_rate": 5.636090483187602e-06,
      "loss": 0.0131,
      "step": 1344560
    },
    {
      "epoch": 2.200434660225316,
      "grad_norm": 0.906928539276123,
      "learning_rate": 5.636024590974085e-06,
      "loss": 0.02,
      "step": 1344580
    },
    {
      "epoch": 2.20046739066397,
      "grad_norm": 0.46941667795181274,
      "learning_rate": 5.635958698760568e-06,
      "loss": 0.0183,
      "step": 1344600
    },
    {
      "epoch": 2.200500121102623,
      "grad_norm": 0.5670634508132935,
      "learning_rate": 5.635892806547051e-06,
      "loss": 0.0124,
      "step": 1344620
    },
    {
      "epoch": 2.2005328515412765,
      "grad_norm": 0.5260559916496277,
      "learning_rate": 5.635826914333533e-06,
      "loss": 0.0129,
      "step": 1344640
    },
    {
      "epoch": 2.2005655819799297,
      "grad_norm": 0.3463934659957886,
      "learning_rate": 5.635761022120017e-06,
      "loss": 0.0173,
      "step": 1344660
    },
    {
      "epoch": 2.200598312418583,
      "grad_norm": 0.5645074844360352,
      "learning_rate": 5.6356951299065e-06,
      "loss": 0.0114,
      "step": 1344680
    },
    {
      "epoch": 2.2006310428572364,
      "grad_norm": 0.909999668598175,
      "learning_rate": 5.635629237692982e-06,
      "loss": 0.0179,
      "step": 1344700
    },
    {
      "epoch": 2.2006637732958896,
      "grad_norm": 0.16267147660255432,
      "learning_rate": 5.635563345479466e-06,
      "loss": 0.0189,
      "step": 1344720
    },
    {
      "epoch": 2.200696503734543,
      "grad_norm": 0.2202923744916916,
      "learning_rate": 5.635497453265948e-06,
      "loss": 0.0221,
      "step": 1344740
    },
    {
      "epoch": 2.2007292341731963,
      "grad_norm": 0.548829972743988,
      "learning_rate": 5.635431561052431e-06,
      "loss": 0.0139,
      "step": 1344760
    },
    {
      "epoch": 2.20076196461185,
      "grad_norm": 0.7583168148994446,
      "learning_rate": 5.635365668838914e-06,
      "loss": 0.0132,
      "step": 1344780
    },
    {
      "epoch": 2.200794695050503,
      "grad_norm": 0.47501665353775024,
      "learning_rate": 5.635299776625397e-06,
      "loss": 0.0167,
      "step": 1344800
    },
    {
      "epoch": 2.200827425489156,
      "grad_norm": 0.38745325803756714,
      "learning_rate": 5.6352338844118795e-06,
      "loss": 0.011,
      "step": 1344820
    },
    {
      "epoch": 2.20086015592781,
      "grad_norm": 0.4383170008659363,
      "learning_rate": 5.635167992198363e-06,
      "loss": 0.0136,
      "step": 1344840
    },
    {
      "epoch": 2.200892886366463,
      "grad_norm": 0.24018873274326324,
      "learning_rate": 5.635102099984845e-06,
      "loss": 0.0151,
      "step": 1344860
    },
    {
      "epoch": 2.2009256168051166,
      "grad_norm": 0.6031432151794434,
      "learning_rate": 5.6350362077713285e-06,
      "loss": 0.0179,
      "step": 1344880
    },
    {
      "epoch": 2.2009583472437697,
      "grad_norm": 0.4356158971786499,
      "learning_rate": 5.63497031555781e-06,
      "loss": 0.0128,
      "step": 1344900
    },
    {
      "epoch": 2.2009910776824233,
      "grad_norm": 0.6237486004829407,
      "learning_rate": 5.634904423344294e-06,
      "loss": 0.0158,
      "step": 1344920
    },
    {
      "epoch": 2.2010238081210765,
      "grad_norm": 0.18247053027153015,
      "learning_rate": 5.634838531130776e-06,
      "loss": 0.0148,
      "step": 1344940
    },
    {
      "epoch": 2.2010565385597296,
      "grad_norm": 0.7394537329673767,
      "learning_rate": 5.6347726389172595e-06,
      "loss": 0.0153,
      "step": 1344960
    },
    {
      "epoch": 2.201089268998383,
      "grad_norm": 1.106209635734558,
      "learning_rate": 5.634706746703742e-06,
      "loss": 0.0148,
      "step": 1344980
    },
    {
      "epoch": 2.2011219994370363,
      "grad_norm": 0.3160686790943146,
      "learning_rate": 5.634640854490225e-06,
      "loss": 0.0131,
      "step": 1345000
    },
    {
      "epoch": 2.20115472987569,
      "grad_norm": 1.8104068040847778,
      "learning_rate": 5.6345749622767085e-06,
      "loss": 0.0136,
      "step": 1345020
    },
    {
      "epoch": 2.201187460314343,
      "grad_norm": 0.5169870257377625,
      "learning_rate": 5.634509070063191e-06,
      "loss": 0.0119,
      "step": 1345040
    },
    {
      "epoch": 2.2012201907529967,
      "grad_norm": 0.5173537731170654,
      "learning_rate": 5.634443177849674e-06,
      "loss": 0.011,
      "step": 1345060
    },
    {
      "epoch": 2.20125292119165,
      "grad_norm": 0.7683069705963135,
      "learning_rate": 5.634377285636157e-06,
      "loss": 0.0178,
      "step": 1345080
    },
    {
      "epoch": 2.201285651630303,
      "grad_norm": 0.7278982996940613,
      "learning_rate": 5.63431139342264e-06,
      "loss": 0.0121,
      "step": 1345100
    },
    {
      "epoch": 2.2013183820689566,
      "grad_norm": 0.6045294404029846,
      "learning_rate": 5.634245501209122e-06,
      "loss": 0.0192,
      "step": 1345120
    },
    {
      "epoch": 2.2013511125076097,
      "grad_norm": 0.43454694747924805,
      "learning_rate": 5.634179608995606e-06,
      "loss": 0.012,
      "step": 1345140
    },
    {
      "epoch": 2.2013838429462633,
      "grad_norm": 0.4640267789363861,
      "learning_rate": 5.634113716782088e-06,
      "loss": 0.0207,
      "step": 1345160
    },
    {
      "epoch": 2.2014165733849165,
      "grad_norm": 0.39775681495666504,
      "learning_rate": 5.634047824568571e-06,
      "loss": 0.0176,
      "step": 1345180
    },
    {
      "epoch": 2.20144930382357,
      "grad_norm": 0.3085218369960785,
      "learning_rate": 5.633981932355054e-06,
      "loss": 0.0136,
      "step": 1345200
    },
    {
      "epoch": 2.201482034262223,
      "grad_norm": 0.23353604972362518,
      "learning_rate": 5.633916040141537e-06,
      "loss": 0.0161,
      "step": 1345220
    },
    {
      "epoch": 2.2015147647008764,
      "grad_norm": 0.5194063782691956,
      "learning_rate": 5.6338501479280195e-06,
      "loss": 0.0204,
      "step": 1345240
    },
    {
      "epoch": 2.20154749513953,
      "grad_norm": 1.0092737674713135,
      "learning_rate": 5.633784255714503e-06,
      "loss": 0.0166,
      "step": 1345260
    },
    {
      "epoch": 2.201580225578183,
      "grad_norm": 0.17862355709075928,
      "learning_rate": 5.633718363500985e-06,
      "loss": 0.0144,
      "step": 1345280
    },
    {
      "epoch": 2.2016129560168367,
      "grad_norm": 0.8721067309379578,
      "learning_rate": 5.633652471287469e-06,
      "loss": 0.0135,
      "step": 1345300
    },
    {
      "epoch": 2.20164568645549,
      "grad_norm": 0.1662190556526184,
      "learning_rate": 5.6335865790739505e-06,
      "loss": 0.0187,
      "step": 1345320
    },
    {
      "epoch": 2.201678416894143,
      "grad_norm": 0.19526393711566925,
      "learning_rate": 5.633520686860434e-06,
      "loss": 0.009,
      "step": 1345340
    },
    {
      "epoch": 2.2017111473327966,
      "grad_norm": 0.9095696806907654,
      "learning_rate": 5.633454794646918e-06,
      "loss": 0.0131,
      "step": 1345360
    },
    {
      "epoch": 2.2017438777714498,
      "grad_norm": 0.34562352299690247,
      "learning_rate": 5.6333889024333996e-06,
      "loss": 0.0132,
      "step": 1345380
    },
    {
      "epoch": 2.2017766082101033,
      "grad_norm": 0.24776174128055573,
      "learning_rate": 5.633323010219883e-06,
      "loss": 0.02,
      "step": 1345400
    },
    {
      "epoch": 2.2018093386487565,
      "grad_norm": 0.40517276525497437,
      "learning_rate": 5.633257118006366e-06,
      "loss": 0.021,
      "step": 1345420
    },
    {
      "epoch": 2.20184206908741,
      "grad_norm": 0.228709414601326,
      "learning_rate": 5.633191225792849e-06,
      "loss": 0.0204,
      "step": 1345440
    },
    {
      "epoch": 2.2018747995260632,
      "grad_norm": 0.42861267924308777,
      "learning_rate": 5.633125333579331e-06,
      "loss": 0.0096,
      "step": 1345460
    },
    {
      "epoch": 2.2019075299647164,
      "grad_norm": 0.33095142245292664,
      "learning_rate": 5.633059441365815e-06,
      "loss": 0.0215,
      "step": 1345480
    },
    {
      "epoch": 2.20194026040337,
      "grad_norm": 0.12160899490118027,
      "learning_rate": 5.632993549152297e-06,
      "loss": 0.0134,
      "step": 1345500
    },
    {
      "epoch": 2.201972990842023,
      "grad_norm": 0.2682493329048157,
      "learning_rate": 5.6329276569387804e-06,
      "loss": 0.0207,
      "step": 1345520
    },
    {
      "epoch": 2.2020057212806767,
      "grad_norm": 1.1774672269821167,
      "learning_rate": 5.632861764725262e-06,
      "loss": 0.0208,
      "step": 1345540
    },
    {
      "epoch": 2.20203845171933,
      "grad_norm": 0.18007469177246094,
      "learning_rate": 5.632795872511746e-06,
      "loss": 0.0149,
      "step": 1345560
    },
    {
      "epoch": 2.2020711821579835,
      "grad_norm": 0.7548971176147461,
      "learning_rate": 5.632729980298229e-06,
      "loss": 0.0172,
      "step": 1345580
    },
    {
      "epoch": 2.2021039125966366,
      "grad_norm": 0.2711765468120575,
      "learning_rate": 5.632664088084711e-06,
      "loss": 0.0121,
      "step": 1345600
    },
    {
      "epoch": 2.2021366430352898,
      "grad_norm": 1.9004108905792236,
      "learning_rate": 5.632598195871194e-06,
      "loss": 0.022,
      "step": 1345620
    },
    {
      "epoch": 2.2021693734739434,
      "grad_norm": 0.2008272260427475,
      "learning_rate": 5.632532303657678e-06,
      "loss": 0.0129,
      "step": 1345640
    },
    {
      "epoch": 2.2022021039125965,
      "grad_norm": 0.3257216513156891,
      "learning_rate": 5.63246641144416e-06,
      "loss": 0.0111,
      "step": 1345660
    },
    {
      "epoch": 2.20223483435125,
      "grad_norm": 0.1661464124917984,
      "learning_rate": 5.632400519230643e-06,
      "loss": 0.0193,
      "step": 1345680
    },
    {
      "epoch": 2.2022675647899033,
      "grad_norm": 0.29232874512672424,
      "learning_rate": 5.632334627017125e-06,
      "loss": 0.0145,
      "step": 1345700
    },
    {
      "epoch": 2.202300295228557,
      "grad_norm": 0.7885254621505737,
      "learning_rate": 5.632268734803609e-06,
      "loss": 0.0152,
      "step": 1345720
    },
    {
      "epoch": 2.20233302566721,
      "grad_norm": 0.6306767463684082,
      "learning_rate": 5.632202842590092e-06,
      "loss": 0.0151,
      "step": 1345740
    },
    {
      "epoch": 2.202365756105863,
      "grad_norm": 0.8357328176498413,
      "learning_rate": 5.632136950376574e-06,
      "loss": 0.0175,
      "step": 1345760
    },
    {
      "epoch": 2.2023984865445168,
      "grad_norm": 0.25632670521736145,
      "learning_rate": 5.632071058163058e-06,
      "loss": 0.0112,
      "step": 1345780
    },
    {
      "epoch": 2.20243121698317,
      "grad_norm": 0.725437581539154,
      "learning_rate": 5.63200516594954e-06,
      "loss": 0.0157,
      "step": 1345800
    },
    {
      "epoch": 2.2024639474218235,
      "grad_norm": 0.9173357486724854,
      "learning_rate": 5.631939273736023e-06,
      "loss": 0.021,
      "step": 1345820
    },
    {
      "epoch": 2.2024966778604766,
      "grad_norm": 0.45518845319747925,
      "learning_rate": 5.631873381522506e-06,
      "loss": 0.0163,
      "step": 1345840
    },
    {
      "epoch": 2.20252940829913,
      "grad_norm": 0.8608065247535706,
      "learning_rate": 5.6318074893089895e-06,
      "loss": 0.0158,
      "step": 1345860
    },
    {
      "epoch": 2.2025621387377834,
      "grad_norm": 0.6452254056930542,
      "learning_rate": 5.6317415970954714e-06,
      "loss": 0.0202,
      "step": 1345880
    },
    {
      "epoch": 2.2025948691764365,
      "grad_norm": 0.17460042238235474,
      "learning_rate": 5.631675704881955e-06,
      "loss": 0.0087,
      "step": 1345900
    },
    {
      "epoch": 2.20262759961509,
      "grad_norm": 0.321902334690094,
      "learning_rate": 5.631609812668437e-06,
      "loss": 0.0091,
      "step": 1345920
    },
    {
      "epoch": 2.2026603300537433,
      "grad_norm": 0.2951798737049103,
      "learning_rate": 5.6315439204549205e-06,
      "loss": 0.0119,
      "step": 1345940
    },
    {
      "epoch": 2.202693060492397,
      "grad_norm": 0.19428372383117676,
      "learning_rate": 5.631478028241402e-06,
      "loss": 0.0131,
      "step": 1345960
    },
    {
      "epoch": 2.20272579093105,
      "grad_norm": 1.4186756610870361,
      "learning_rate": 5.631412136027886e-06,
      "loss": 0.0181,
      "step": 1345980
    },
    {
      "epoch": 2.202758521369703,
      "grad_norm": 0.4648212492465973,
      "learning_rate": 5.631346243814369e-06,
      "loss": 0.0145,
      "step": 1346000
    },
    {
      "epoch": 2.2027912518083568,
      "grad_norm": 0.5462164282798767,
      "learning_rate": 5.6312803516008515e-06,
      "loss": 0.0152,
      "step": 1346020
    },
    {
      "epoch": 2.20282398224701,
      "grad_norm": 0.2145712673664093,
      "learning_rate": 5.631214459387334e-06,
      "loss": 0.0171,
      "step": 1346040
    },
    {
      "epoch": 2.2028567126856635,
      "grad_norm": 0.9677932262420654,
      "learning_rate": 5.631148567173818e-06,
      "loss": 0.0112,
      "step": 1346060
    },
    {
      "epoch": 2.2028894431243167,
      "grad_norm": 0.28654226660728455,
      "learning_rate": 5.6310826749603005e-06,
      "loss": 0.0171,
      "step": 1346080
    },
    {
      "epoch": 2.2029221735629703,
      "grad_norm": 0.36041444540023804,
      "learning_rate": 5.631016782746783e-06,
      "loss": 0.0126,
      "step": 1346100
    },
    {
      "epoch": 2.2029549040016234,
      "grad_norm": 0.5582606792449951,
      "learning_rate": 5.630950890533267e-06,
      "loss": 0.0114,
      "step": 1346120
    },
    {
      "epoch": 2.2029876344402766,
      "grad_norm": 0.5985734462738037,
      "learning_rate": 5.630884998319749e-06,
      "loss": 0.0128,
      "step": 1346140
    },
    {
      "epoch": 2.20302036487893,
      "grad_norm": 0.19228839874267578,
      "learning_rate": 5.630819106106232e-06,
      "loss": 0.0137,
      "step": 1346160
    },
    {
      "epoch": 2.2030530953175833,
      "grad_norm": 0.40010690689086914,
      "learning_rate": 5.630753213892714e-06,
      "loss": 0.0196,
      "step": 1346180
    },
    {
      "epoch": 2.203085825756237,
      "grad_norm": 0.49440130591392517,
      "learning_rate": 5.630687321679198e-06,
      "loss": 0.0124,
      "step": 1346200
    },
    {
      "epoch": 2.20311855619489,
      "grad_norm": 0.7228491306304932,
      "learning_rate": 5.6306214294656806e-06,
      "loss": 0.0213,
      "step": 1346220
    },
    {
      "epoch": 2.2031512866335436,
      "grad_norm": 0.46515271067619324,
      "learning_rate": 5.630555537252163e-06,
      "loss": 0.0138,
      "step": 1346240
    },
    {
      "epoch": 2.203184017072197,
      "grad_norm": 0.22861649096012115,
      "learning_rate": 5.630489645038646e-06,
      "loss": 0.0278,
      "step": 1346260
    },
    {
      "epoch": 2.20321674751085,
      "grad_norm": 0.20005245506763458,
      "learning_rate": 5.63042375282513e-06,
      "loss": 0.012,
      "step": 1346280
    },
    {
      "epoch": 2.2032494779495035,
      "grad_norm": 0.20431901514530182,
      "learning_rate": 5.6303578606116115e-06,
      "loss": 0.0192,
      "step": 1346300
    },
    {
      "epoch": 2.2032822083881567,
      "grad_norm": 1.9665443897247314,
      "learning_rate": 5.630291968398095e-06,
      "loss": 0.0126,
      "step": 1346320
    },
    {
      "epoch": 2.2033149388268103,
      "grad_norm": 0.3643241226673126,
      "learning_rate": 5.630226076184577e-06,
      "loss": 0.0162,
      "step": 1346340
    },
    {
      "epoch": 2.2033476692654634,
      "grad_norm": 1.1802843809127808,
      "learning_rate": 5.630160183971061e-06,
      "loss": 0.0141,
      "step": 1346360
    },
    {
      "epoch": 2.203380399704117,
      "grad_norm": 1.4844352006912231,
      "learning_rate": 5.630094291757543e-06,
      "loss": 0.014,
      "step": 1346380
    },
    {
      "epoch": 2.20341313014277,
      "grad_norm": 0.406211256980896,
      "learning_rate": 5.630028399544026e-06,
      "loss": 0.0162,
      "step": 1346400
    },
    {
      "epoch": 2.2034458605814233,
      "grad_norm": 0.2742987871170044,
      "learning_rate": 5.62996250733051e-06,
      "loss": 0.0103,
      "step": 1346420
    },
    {
      "epoch": 2.203478591020077,
      "grad_norm": 1.7685434818267822,
      "learning_rate": 5.629896615116992e-06,
      "loss": 0.0111,
      "step": 1346440
    },
    {
      "epoch": 2.20351132145873,
      "grad_norm": 0.4716164171695709,
      "learning_rate": 5.629830722903475e-06,
      "loss": 0.02,
      "step": 1346460
    },
    {
      "epoch": 2.2035440518973837,
      "grad_norm": 0.5750280618667603,
      "learning_rate": 5.629764830689958e-06,
      "loss": 0.0224,
      "step": 1346480
    },
    {
      "epoch": 2.203576782336037,
      "grad_norm": 0.7998190522193909,
      "learning_rate": 5.6296989384764415e-06,
      "loss": 0.0216,
      "step": 1346500
    },
    {
      "epoch": 2.2036095127746904,
      "grad_norm": 0.32042816281318665,
      "learning_rate": 5.629633046262923e-06,
      "loss": 0.0224,
      "step": 1346520
    },
    {
      "epoch": 2.2036422432133436,
      "grad_norm": 0.3197430372238159,
      "learning_rate": 5.629567154049407e-06,
      "loss": 0.0107,
      "step": 1346540
    },
    {
      "epoch": 2.2036749736519967,
      "grad_norm": 0.5049970149993896,
      "learning_rate": 5.629501261835889e-06,
      "loss": 0.0169,
      "step": 1346560
    },
    {
      "epoch": 2.2037077040906503,
      "grad_norm": 1.3900585174560547,
      "learning_rate": 5.629435369622372e-06,
      "loss": 0.0198,
      "step": 1346580
    },
    {
      "epoch": 2.2037404345293035,
      "grad_norm": 0.29475030303001404,
      "learning_rate": 5.629369477408855e-06,
      "loss": 0.0173,
      "step": 1346600
    },
    {
      "epoch": 2.203773164967957,
      "grad_norm": 1.027984380722046,
      "learning_rate": 5.629303585195338e-06,
      "loss": 0.0097,
      "step": 1346620
    },
    {
      "epoch": 2.20380589540661,
      "grad_norm": 0.8245428800582886,
      "learning_rate": 5.629237692981821e-06,
      "loss": 0.0221,
      "step": 1346640
    },
    {
      "epoch": 2.203838625845264,
      "grad_norm": 0.5850580930709839,
      "learning_rate": 5.629171800768304e-06,
      "loss": 0.0156,
      "step": 1346660
    },
    {
      "epoch": 2.203871356283917,
      "grad_norm": 0.7671531438827515,
      "learning_rate": 5.629105908554786e-06,
      "loss": 0.0095,
      "step": 1346680
    },
    {
      "epoch": 2.20390408672257,
      "grad_norm": 0.12970393896102905,
      "learning_rate": 5.62904001634127e-06,
      "loss": 0.0128,
      "step": 1346700
    },
    {
      "epoch": 2.2039368171612237,
      "grad_norm": 0.5218087434768677,
      "learning_rate": 5.628974124127752e-06,
      "loss": 0.0168,
      "step": 1346720
    },
    {
      "epoch": 2.203969547599877,
      "grad_norm": 0.48707136511802673,
      "learning_rate": 5.628908231914235e-06,
      "loss": 0.0242,
      "step": 1346740
    },
    {
      "epoch": 2.2040022780385304,
      "grad_norm": 0.4663017690181732,
      "learning_rate": 5.628842339700717e-06,
      "loss": 0.0158,
      "step": 1346760
    },
    {
      "epoch": 2.2040350084771836,
      "grad_norm": 0.2763878107070923,
      "learning_rate": 5.628776447487201e-06,
      "loss": 0.0157,
      "step": 1346780
    },
    {
      "epoch": 2.204067738915837,
      "grad_norm": 1.957139492034912,
      "learning_rate": 5.628710555273684e-06,
      "loss": 0.0271,
      "step": 1346800
    },
    {
      "epoch": 2.2041004693544903,
      "grad_norm": 0.46771687269210815,
      "learning_rate": 5.628644663060166e-06,
      "loss": 0.0175,
      "step": 1346820
    },
    {
      "epoch": 2.2041331997931435,
      "grad_norm": 0.29267793893814087,
      "learning_rate": 5.62857877084665e-06,
      "loss": 0.0128,
      "step": 1346840
    },
    {
      "epoch": 2.204165930231797,
      "grad_norm": 0.7687223553657532,
      "learning_rate": 5.6285128786331325e-06,
      "loss": 0.0093,
      "step": 1346860
    },
    {
      "epoch": 2.2041986606704502,
      "grad_norm": 0.2438555806875229,
      "learning_rate": 5.628446986419615e-06,
      "loss": 0.0159,
      "step": 1346880
    },
    {
      "epoch": 2.204231391109104,
      "grad_norm": 0.4390581548213959,
      "learning_rate": 5.628381094206098e-06,
      "loss": 0.0319,
      "step": 1346900
    },
    {
      "epoch": 2.204264121547757,
      "grad_norm": 1.2227015495300293,
      "learning_rate": 5.6283152019925815e-06,
      "loss": 0.0188,
      "step": 1346920
    },
    {
      "epoch": 2.20429685198641,
      "grad_norm": 0.5944119691848755,
      "learning_rate": 5.628249309779063e-06,
      "loss": 0.0191,
      "step": 1346940
    },
    {
      "epoch": 2.2043295824250637,
      "grad_norm": 0.4474581182003021,
      "learning_rate": 5.628183417565547e-06,
      "loss": 0.0152,
      "step": 1346960
    },
    {
      "epoch": 2.204362312863717,
      "grad_norm": 0.5609045624732971,
      "learning_rate": 5.628117525352029e-06,
      "loss": 0.0215,
      "step": 1346980
    },
    {
      "epoch": 2.2043950433023705,
      "grad_norm": 0.20162779092788696,
      "learning_rate": 5.6280516331385125e-06,
      "loss": 0.011,
      "step": 1347000
    },
    {
      "epoch": 2.2044277737410236,
      "grad_norm": 0.7328234910964966,
      "learning_rate": 5.627985740924995e-06,
      "loss": 0.0127,
      "step": 1347020
    },
    {
      "epoch": 2.204460504179677,
      "grad_norm": 0.2817176580429077,
      "learning_rate": 5.627919848711478e-06,
      "loss": 0.0206,
      "step": 1347040
    },
    {
      "epoch": 2.2044932346183304,
      "grad_norm": 0.0843626856803894,
      "learning_rate": 5.627853956497961e-06,
      "loss": 0.0179,
      "step": 1347060
    },
    {
      "epoch": 2.2045259650569835,
      "grad_norm": 0.4330056607723236,
      "learning_rate": 5.627788064284444e-06,
      "loss": 0.0165,
      "step": 1347080
    },
    {
      "epoch": 2.204558695495637,
      "grad_norm": 0.2250124216079712,
      "learning_rate": 5.627722172070926e-06,
      "loss": 0.0135,
      "step": 1347100
    },
    {
      "epoch": 2.2045914259342902,
      "grad_norm": 0.4767301678657532,
      "learning_rate": 5.62765627985741e-06,
      "loss": 0.0139,
      "step": 1347120
    },
    {
      "epoch": 2.204624156372944,
      "grad_norm": 0.2233608365058899,
      "learning_rate": 5.627590387643893e-06,
      "loss": 0.0121,
      "step": 1347140
    },
    {
      "epoch": 2.204656886811597,
      "grad_norm": 0.3638246953487396,
      "learning_rate": 5.627524495430375e-06,
      "loss": 0.0134,
      "step": 1347160
    },
    {
      "epoch": 2.2046896172502506,
      "grad_norm": 0.13026519119739532,
      "learning_rate": 5.627458603216859e-06,
      "loss": 0.0173,
      "step": 1347180
    },
    {
      "epoch": 2.2047223476889037,
      "grad_norm": 0.27994468808174133,
      "learning_rate": 5.627392711003341e-06,
      "loss": 0.0135,
      "step": 1347200
    },
    {
      "epoch": 2.204755078127557,
      "grad_norm": 0.971919059753418,
      "learning_rate": 5.627326818789824e-06,
      "loss": 0.0198,
      "step": 1347220
    },
    {
      "epoch": 2.2047878085662105,
      "grad_norm": 0.257781058549881,
      "learning_rate": 5.627260926576307e-06,
      "loss": 0.016,
      "step": 1347240
    },
    {
      "epoch": 2.2048205390048636,
      "grad_norm": 0.9047436118125916,
      "learning_rate": 5.62719503436279e-06,
      "loss": 0.0202,
      "step": 1347260
    },
    {
      "epoch": 2.2048532694435172,
      "grad_norm": 0.7311161160469055,
      "learning_rate": 5.6271291421492725e-06,
      "loss": 0.0216,
      "step": 1347280
    },
    {
      "epoch": 2.2048859998821704,
      "grad_norm": 0.9040127396583557,
      "learning_rate": 5.627063249935756e-06,
      "loss": 0.0131,
      "step": 1347300
    },
    {
      "epoch": 2.2049187303208235,
      "grad_norm": 0.36899057030677795,
      "learning_rate": 5.626997357722238e-06,
      "loss": 0.0169,
      "step": 1347320
    },
    {
      "epoch": 2.204951460759477,
      "grad_norm": 0.12810860574245453,
      "learning_rate": 5.626931465508722e-06,
      "loss": 0.0129,
      "step": 1347340
    },
    {
      "epoch": 2.2049841911981303,
      "grad_norm": 0.7796847820281982,
      "learning_rate": 5.6268655732952035e-06,
      "loss": 0.0229,
      "step": 1347360
    },
    {
      "epoch": 2.205016921636784,
      "grad_norm": 0.16877159476280212,
      "learning_rate": 5.626799681081687e-06,
      "loss": 0.0124,
      "step": 1347380
    },
    {
      "epoch": 2.205049652075437,
      "grad_norm": 0.17795193195343018,
      "learning_rate": 5.62673378886817e-06,
      "loss": 0.0156,
      "step": 1347400
    },
    {
      "epoch": 2.2050823825140906,
      "grad_norm": 0.2567320466041565,
      "learning_rate": 5.6266678966546526e-06,
      "loss": 0.0156,
      "step": 1347420
    },
    {
      "epoch": 2.2051151129527438,
      "grad_norm": 0.7400280237197876,
      "learning_rate": 5.626602004441135e-06,
      "loss": 0.0153,
      "step": 1347440
    },
    {
      "epoch": 2.205147843391397,
      "grad_norm": 0.3638295829296112,
      "learning_rate": 5.626536112227619e-06,
      "loss": 0.0122,
      "step": 1347460
    },
    {
      "epoch": 2.2051805738300505,
      "grad_norm": 0.21356201171875,
      "learning_rate": 5.626470220014102e-06,
      "loss": 0.0182,
      "step": 1347480
    },
    {
      "epoch": 2.2052133042687037,
      "grad_norm": 0.3883710205554962,
      "learning_rate": 5.626404327800584e-06,
      "loss": 0.0137,
      "step": 1347500
    },
    {
      "epoch": 2.2052460347073572,
      "grad_norm": 0.16109316051006317,
      "learning_rate": 5.626338435587068e-06,
      "loss": 0.015,
      "step": 1347520
    },
    {
      "epoch": 2.2052787651460104,
      "grad_norm": 0.6634988784790039,
      "learning_rate": 5.62627254337355e-06,
      "loss": 0.0146,
      "step": 1347540
    },
    {
      "epoch": 2.205311495584664,
      "grad_norm": 0.31814008951187134,
      "learning_rate": 5.6262066511600334e-06,
      "loss": 0.0128,
      "step": 1347560
    },
    {
      "epoch": 2.205344226023317,
      "grad_norm": 0.526323139667511,
      "learning_rate": 5.626140758946515e-06,
      "loss": 0.02,
      "step": 1347580
    },
    {
      "epoch": 2.2053769564619703,
      "grad_norm": 0.41046467423439026,
      "learning_rate": 5.626074866732999e-06,
      "loss": 0.014,
      "step": 1347600
    },
    {
      "epoch": 2.205409686900624,
      "grad_norm": 0.435040682554245,
      "learning_rate": 5.626008974519482e-06,
      "loss": 0.0128,
      "step": 1347620
    },
    {
      "epoch": 2.205442417339277,
      "grad_norm": 1.5977109670639038,
      "learning_rate": 5.625943082305964e-06,
      "loss": 0.0183,
      "step": 1347640
    },
    {
      "epoch": 2.2054751477779306,
      "grad_norm": 0.3543858826160431,
      "learning_rate": 5.625877190092447e-06,
      "loss": 0.0173,
      "step": 1347660
    },
    {
      "epoch": 2.205507878216584,
      "grad_norm": 0.7180927991867065,
      "learning_rate": 5.625811297878931e-06,
      "loss": 0.0181,
      "step": 1347680
    },
    {
      "epoch": 2.2055406086552374,
      "grad_norm": 0.37655404210090637,
      "learning_rate": 5.625745405665413e-06,
      "loss": 0.0186,
      "step": 1347700
    },
    {
      "epoch": 2.2055733390938905,
      "grad_norm": 0.1314062625169754,
      "learning_rate": 5.625679513451896e-06,
      "loss": 0.015,
      "step": 1347720
    },
    {
      "epoch": 2.2056060695325437,
      "grad_norm": 0.7727311253547668,
      "learning_rate": 5.625613621238378e-06,
      "loss": 0.0164,
      "step": 1347740
    },
    {
      "epoch": 2.2056387999711973,
      "grad_norm": 0.3872085511684418,
      "learning_rate": 5.625547729024862e-06,
      "loss": 0.02,
      "step": 1347760
    },
    {
      "epoch": 2.2056715304098504,
      "grad_norm": 0.1475871205329895,
      "learning_rate": 5.6254818368113436e-06,
      "loss": 0.0174,
      "step": 1347780
    },
    {
      "epoch": 2.205704260848504,
      "grad_norm": 0.4951139986515045,
      "learning_rate": 5.625415944597827e-06,
      "loss": 0.0105,
      "step": 1347800
    },
    {
      "epoch": 2.205736991287157,
      "grad_norm": 0.9866026043891907,
      "learning_rate": 5.625350052384311e-06,
      "loss": 0.0197,
      "step": 1347820
    },
    {
      "epoch": 2.2057697217258108,
      "grad_norm": 0.13357898592948914,
      "learning_rate": 5.625284160170793e-06,
      "loss": 0.0106,
      "step": 1347840
    },
    {
      "epoch": 2.205802452164464,
      "grad_norm": 1.0549256801605225,
      "learning_rate": 5.625218267957276e-06,
      "loss": 0.0123,
      "step": 1347860
    },
    {
      "epoch": 2.205835182603117,
      "grad_norm": 0.3213358223438263,
      "learning_rate": 5.625152375743759e-06,
      "loss": 0.0134,
      "step": 1347880
    },
    {
      "epoch": 2.2058679130417707,
      "grad_norm": 0.18385621905326843,
      "learning_rate": 5.625086483530242e-06,
      "loss": 0.01,
      "step": 1347900
    },
    {
      "epoch": 2.205900643480424,
      "grad_norm": 0.2374849170446396,
      "learning_rate": 5.6250205913167244e-06,
      "loss": 0.0231,
      "step": 1347920
    },
    {
      "epoch": 2.2059333739190774,
      "grad_norm": 0.7033186554908752,
      "learning_rate": 5.624954699103208e-06,
      "loss": 0.0175,
      "step": 1347940
    },
    {
      "epoch": 2.2059661043577306,
      "grad_norm": 0.981603741645813,
      "learning_rate": 5.62488880688969e-06,
      "loss": 0.019,
      "step": 1347960
    },
    {
      "epoch": 2.205998834796384,
      "grad_norm": 0.41022226214408875,
      "learning_rate": 5.6248229146761735e-06,
      "loss": 0.0144,
      "step": 1347980
    },
    {
      "epoch": 2.2060315652350373,
      "grad_norm": 2.759946346282959,
      "learning_rate": 5.624757022462655e-06,
      "loss": 0.0134,
      "step": 1348000
    },
    {
      "epoch": 2.2060642956736904,
      "grad_norm": 0.6043640375137329,
      "learning_rate": 5.624691130249139e-06,
      "loss": 0.0163,
      "step": 1348020
    },
    {
      "epoch": 2.206097026112344,
      "grad_norm": 0.43132227659225464,
      "learning_rate": 5.624625238035622e-06,
      "loss": 0.0121,
      "step": 1348040
    },
    {
      "epoch": 2.206129756550997,
      "grad_norm": 0.09822528809309006,
      "learning_rate": 5.6245593458221045e-06,
      "loss": 0.0147,
      "step": 1348060
    },
    {
      "epoch": 2.206162486989651,
      "grad_norm": 0.6881596446037292,
      "learning_rate": 5.624493453608587e-06,
      "loss": 0.0173,
      "step": 1348080
    },
    {
      "epoch": 2.206195217428304,
      "grad_norm": 0.5282930731773376,
      "learning_rate": 5.624427561395071e-06,
      "loss": 0.0131,
      "step": 1348100
    },
    {
      "epoch": 2.2062279478669575,
      "grad_norm": 0.8181815147399902,
      "learning_rate": 5.624361669181553e-06,
      "loss": 0.013,
      "step": 1348120
    },
    {
      "epoch": 2.2062606783056107,
      "grad_norm": 0.8612548112869263,
      "learning_rate": 5.624295776968036e-06,
      "loss": 0.0232,
      "step": 1348140
    },
    {
      "epoch": 2.206293408744264,
      "grad_norm": 0.2082960307598114,
      "learning_rate": 5.624229884754518e-06,
      "loss": 0.0137,
      "step": 1348160
    },
    {
      "epoch": 2.2063261391829174,
      "grad_norm": 0.27873438596725464,
      "learning_rate": 5.624163992541002e-06,
      "loss": 0.01,
      "step": 1348180
    },
    {
      "epoch": 2.2063588696215706,
      "grad_norm": 0.2556218206882477,
      "learning_rate": 5.624098100327485e-06,
      "loss": 0.0215,
      "step": 1348200
    },
    {
      "epoch": 2.206391600060224,
      "grad_norm": 0.3040136694908142,
      "learning_rate": 5.624032208113967e-06,
      "loss": 0.0126,
      "step": 1348220
    },
    {
      "epoch": 2.2064243304988773,
      "grad_norm": 0.6550622582435608,
      "learning_rate": 5.623966315900451e-06,
      "loss": 0.0127,
      "step": 1348240
    },
    {
      "epoch": 2.206457060937531,
      "grad_norm": 0.32337188720703125,
      "learning_rate": 5.6239004236869336e-06,
      "loss": 0.0135,
      "step": 1348260
    },
    {
      "epoch": 2.206489791376184,
      "grad_norm": 0.3123384416103363,
      "learning_rate": 5.623834531473416e-06,
      "loss": 0.0114,
      "step": 1348280
    },
    {
      "epoch": 2.206522521814837,
      "grad_norm": 0.3459862172603607,
      "learning_rate": 5.623768639259899e-06,
      "loss": 0.0158,
      "step": 1348300
    },
    {
      "epoch": 2.206555252253491,
      "grad_norm": 0.29570722579956055,
      "learning_rate": 5.623702747046383e-06,
      "loss": 0.0148,
      "step": 1348320
    },
    {
      "epoch": 2.206587982692144,
      "grad_norm": 0.21325254440307617,
      "learning_rate": 5.6236368548328645e-06,
      "loss": 0.0209,
      "step": 1348340
    },
    {
      "epoch": 2.2066207131307976,
      "grad_norm": 0.5274171233177185,
      "learning_rate": 5.623570962619348e-06,
      "loss": 0.0114,
      "step": 1348360
    },
    {
      "epoch": 2.2066534435694507,
      "grad_norm": 0.3995135724544525,
      "learning_rate": 5.62350507040583e-06,
      "loss": 0.0187,
      "step": 1348380
    },
    {
      "epoch": 2.206686174008104,
      "grad_norm": 0.5873685479164124,
      "learning_rate": 5.623439178192314e-06,
      "loss": 0.0172,
      "step": 1348400
    },
    {
      "epoch": 2.2067189044467574,
      "grad_norm": 0.674816906452179,
      "learning_rate": 5.623373285978796e-06,
      "loss": 0.0151,
      "step": 1348420
    },
    {
      "epoch": 2.2067516348854106,
      "grad_norm": 0.5784310102462769,
      "learning_rate": 5.623307393765279e-06,
      "loss": 0.0177,
      "step": 1348440
    },
    {
      "epoch": 2.206784365324064,
      "grad_norm": 0.631598711013794,
      "learning_rate": 5.623241501551762e-06,
      "loss": 0.0204,
      "step": 1348460
    },
    {
      "epoch": 2.2068170957627173,
      "grad_norm": 0.39984744787216187,
      "learning_rate": 5.623175609338245e-06,
      "loss": 0.0115,
      "step": 1348480
    },
    {
      "epoch": 2.206849826201371,
      "grad_norm": 0.21459728479385376,
      "learning_rate": 5.623109717124727e-06,
      "loss": 0.014,
      "step": 1348500
    },
    {
      "epoch": 2.206882556640024,
      "grad_norm": 0.5694526433944702,
      "learning_rate": 5.623043824911211e-06,
      "loss": 0.0188,
      "step": 1348520
    },
    {
      "epoch": 2.2069152870786772,
      "grad_norm": 0.3972633481025696,
      "learning_rate": 5.6229779326976945e-06,
      "loss": 0.0176,
      "step": 1348540
    },
    {
      "epoch": 2.206948017517331,
      "grad_norm": 0.47897350788116455,
      "learning_rate": 5.622912040484176e-06,
      "loss": 0.0093,
      "step": 1348560
    },
    {
      "epoch": 2.206980747955984,
      "grad_norm": 0.5110124349594116,
      "learning_rate": 5.62284614827066e-06,
      "loss": 0.0118,
      "step": 1348580
    },
    {
      "epoch": 2.2070134783946376,
      "grad_norm": 0.5396524667739868,
      "learning_rate": 5.622780256057142e-06,
      "loss": 0.0157,
      "step": 1348600
    },
    {
      "epoch": 2.2070462088332907,
      "grad_norm": 0.39705008268356323,
      "learning_rate": 5.622714363843625e-06,
      "loss": 0.016,
      "step": 1348620
    },
    {
      "epoch": 2.2070789392719443,
      "grad_norm": 0.40600869059562683,
      "learning_rate": 5.622648471630108e-06,
      "loss": 0.0121,
      "step": 1348640
    },
    {
      "epoch": 2.2071116697105975,
      "grad_norm": 0.1851401925086975,
      "learning_rate": 5.622582579416591e-06,
      "loss": 0.0137,
      "step": 1348660
    },
    {
      "epoch": 2.2071444001492506,
      "grad_norm": 3.5680010318756104,
      "learning_rate": 5.622516687203074e-06,
      "loss": 0.0166,
      "step": 1348680
    },
    {
      "epoch": 2.207177130587904,
      "grad_norm": 0.2987377345561981,
      "learning_rate": 5.622450794989557e-06,
      "loss": 0.013,
      "step": 1348700
    },
    {
      "epoch": 2.2072098610265574,
      "grad_norm": 0.20145902037620544,
      "learning_rate": 5.622384902776039e-06,
      "loss": 0.0157,
      "step": 1348720
    },
    {
      "epoch": 2.207242591465211,
      "grad_norm": 0.26594477891921997,
      "learning_rate": 5.622319010562523e-06,
      "loss": 0.0176,
      "step": 1348740
    },
    {
      "epoch": 2.207275321903864,
      "grad_norm": 0.5243160724639893,
      "learning_rate": 5.622253118349005e-06,
      "loss": 0.0092,
      "step": 1348760
    },
    {
      "epoch": 2.2073080523425177,
      "grad_norm": 0.9980476498603821,
      "learning_rate": 5.622187226135488e-06,
      "loss": 0.018,
      "step": 1348780
    },
    {
      "epoch": 2.207340782781171,
      "grad_norm": 0.48676085472106934,
      "learning_rate": 5.62212133392197e-06,
      "loss": 0.0147,
      "step": 1348800
    },
    {
      "epoch": 2.207373513219824,
      "grad_norm": 0.2025085836648941,
      "learning_rate": 5.622055441708454e-06,
      "loss": 0.011,
      "step": 1348820
    },
    {
      "epoch": 2.2074062436584776,
      "grad_norm": 0.6668345332145691,
      "learning_rate": 5.621989549494936e-06,
      "loss": 0.0164,
      "step": 1348840
    },
    {
      "epoch": 2.2074389740971307,
      "grad_norm": 0.45172885060310364,
      "learning_rate": 5.621923657281419e-06,
      "loss": 0.0173,
      "step": 1348860
    },
    {
      "epoch": 2.2074717045357843,
      "grad_norm": 0.448322594165802,
      "learning_rate": 5.621857765067903e-06,
      "loss": 0.018,
      "step": 1348880
    },
    {
      "epoch": 2.2075044349744375,
      "grad_norm": 0.5168439745903015,
      "learning_rate": 5.6217918728543855e-06,
      "loss": 0.02,
      "step": 1348900
    },
    {
      "epoch": 2.2075371654130906,
      "grad_norm": 0.31346169114112854,
      "learning_rate": 5.621725980640868e-06,
      "loss": 0.017,
      "step": 1348920
    },
    {
      "epoch": 2.2075698958517442,
      "grad_norm": 1.0340503454208374,
      "learning_rate": 5.621660088427351e-06,
      "loss": 0.0182,
      "step": 1348940
    },
    {
      "epoch": 2.2076026262903974,
      "grad_norm": 0.12695910036563873,
      "learning_rate": 5.6215941962138345e-06,
      "loss": 0.0105,
      "step": 1348960
    },
    {
      "epoch": 2.207635356729051,
      "grad_norm": 0.5117692351341248,
      "learning_rate": 5.6215283040003164e-06,
      "loss": 0.0231,
      "step": 1348980
    },
    {
      "epoch": 2.207668087167704,
      "grad_norm": 0.2646714448928833,
      "learning_rate": 5.6214624117868e-06,
      "loss": 0.0165,
      "step": 1349000
    },
    {
      "epoch": 2.2077008176063577,
      "grad_norm": 0.3760642111301422,
      "learning_rate": 5.621396519573282e-06,
      "loss": 0.0132,
      "step": 1349020
    },
    {
      "epoch": 2.207733548045011,
      "grad_norm": 0.345439076423645,
      "learning_rate": 5.6213306273597655e-06,
      "loss": 0.0154,
      "step": 1349040
    },
    {
      "epoch": 2.207766278483664,
      "grad_norm": 2.521533727645874,
      "learning_rate": 5.621264735146248e-06,
      "loss": 0.0111,
      "step": 1349060
    },
    {
      "epoch": 2.2077990089223176,
      "grad_norm": 0.09164164215326309,
      "learning_rate": 5.621198842932731e-06,
      "loss": 0.0189,
      "step": 1349080
    },
    {
      "epoch": 2.2078317393609708,
      "grad_norm": 0.49223417043685913,
      "learning_rate": 5.621132950719214e-06,
      "loss": 0.0183,
      "step": 1349100
    },
    {
      "epoch": 2.2078644697996244,
      "grad_norm": 0.48307186365127563,
      "learning_rate": 5.621067058505697e-06,
      "loss": 0.0176,
      "step": 1349120
    },
    {
      "epoch": 2.2078972002382775,
      "grad_norm": 0.16181981563568115,
      "learning_rate": 5.621001166292179e-06,
      "loss": 0.0166,
      "step": 1349140
    },
    {
      "epoch": 2.207929930676931,
      "grad_norm": 0.12295647710561752,
      "learning_rate": 5.620935274078663e-06,
      "loss": 0.0144,
      "step": 1349160
    },
    {
      "epoch": 2.2079626611155843,
      "grad_norm": 0.6992310285568237,
      "learning_rate": 5.620869381865145e-06,
      "loss": 0.0202,
      "step": 1349180
    },
    {
      "epoch": 2.2079953915542374,
      "grad_norm": 0.3727330267429352,
      "learning_rate": 5.620803489651628e-06,
      "loss": 0.0138,
      "step": 1349200
    },
    {
      "epoch": 2.208028121992891,
      "grad_norm": 0.8398269414901733,
      "learning_rate": 5.620737597438111e-06,
      "loss": 0.0108,
      "step": 1349220
    },
    {
      "epoch": 2.208060852431544,
      "grad_norm": 0.5363968014717102,
      "learning_rate": 5.620671705224594e-06,
      "loss": 0.0184,
      "step": 1349240
    },
    {
      "epoch": 2.2080935828701977,
      "grad_norm": 0.17566053569316864,
      "learning_rate": 5.620605813011077e-06,
      "loss": 0.0148,
      "step": 1349260
    },
    {
      "epoch": 2.208126313308851,
      "grad_norm": 0.3422573506832123,
      "learning_rate": 5.62053992079756e-06,
      "loss": 0.0141,
      "step": 1349280
    },
    {
      "epoch": 2.2081590437475045,
      "grad_norm": 0.2559514045715332,
      "learning_rate": 5.620474028584043e-06,
      "loss": 0.0243,
      "step": 1349300
    },
    {
      "epoch": 2.2081917741861576,
      "grad_norm": 0.3806743323802948,
      "learning_rate": 5.6204081363705255e-06,
      "loss": 0.0105,
      "step": 1349320
    },
    {
      "epoch": 2.208224504624811,
      "grad_norm": 0.7660770416259766,
      "learning_rate": 5.620342244157009e-06,
      "loss": 0.018,
      "step": 1349340
    },
    {
      "epoch": 2.2082572350634644,
      "grad_norm": 0.2475157529115677,
      "learning_rate": 5.620276351943491e-06,
      "loss": 0.0109,
      "step": 1349360
    },
    {
      "epoch": 2.2082899655021175,
      "grad_norm": 0.3812706172466278,
      "learning_rate": 5.620210459729975e-06,
      "loss": 0.0196,
      "step": 1349380
    },
    {
      "epoch": 2.208322695940771,
      "grad_norm": 0.06727252155542374,
      "learning_rate": 5.6201445675164565e-06,
      "loss": 0.017,
      "step": 1349400
    },
    {
      "epoch": 2.2083554263794243,
      "grad_norm": 0.36114534735679626,
      "learning_rate": 5.62007867530294e-06,
      "loss": 0.0157,
      "step": 1349420
    },
    {
      "epoch": 2.208388156818078,
      "grad_norm": 0.187544047832489,
      "learning_rate": 5.620012783089423e-06,
      "loss": 0.0109,
      "step": 1349440
    },
    {
      "epoch": 2.208420887256731,
      "grad_norm": 0.22654187679290771,
      "learning_rate": 5.6199468908759056e-06,
      "loss": 0.0121,
      "step": 1349460
    },
    {
      "epoch": 2.208453617695384,
      "grad_norm": 0.5394557118415833,
      "learning_rate": 5.619880998662388e-06,
      "loss": 0.0213,
      "step": 1349480
    },
    {
      "epoch": 2.2084863481340378,
      "grad_norm": 0.15152564644813538,
      "learning_rate": 5.619815106448872e-06,
      "loss": 0.0122,
      "step": 1349500
    },
    {
      "epoch": 2.208519078572691,
      "grad_norm": 0.2490130364894867,
      "learning_rate": 5.619749214235354e-06,
      "loss": 0.0116,
      "step": 1349520
    },
    {
      "epoch": 2.2085518090113445,
      "grad_norm": 0.30831679701805115,
      "learning_rate": 5.619683322021837e-06,
      "loss": 0.0115,
      "step": 1349540
    },
    {
      "epoch": 2.2085845394499977,
      "grad_norm": 0.39903709292411804,
      "learning_rate": 5.619617429808319e-06,
      "loss": 0.0176,
      "step": 1349560
    },
    {
      "epoch": 2.2086172698886513,
      "grad_norm": 0.9923421740531921,
      "learning_rate": 5.619551537594803e-06,
      "loss": 0.0162,
      "step": 1349580
    },
    {
      "epoch": 2.2086500003273044,
      "grad_norm": 0.7105570435523987,
      "learning_rate": 5.6194856453812864e-06,
      "loss": 0.0168,
      "step": 1349600
    },
    {
      "epoch": 2.2086827307659576,
      "grad_norm": 0.38371288776397705,
      "learning_rate": 5.619419753167768e-06,
      "loss": 0.014,
      "step": 1349620
    },
    {
      "epoch": 2.208715461204611,
      "grad_norm": 0.24534782767295837,
      "learning_rate": 5.619353860954252e-06,
      "loss": 0.0159,
      "step": 1349640
    },
    {
      "epoch": 2.2087481916432643,
      "grad_norm": 0.41183704137802124,
      "learning_rate": 5.619287968740734e-06,
      "loss": 0.0168,
      "step": 1349660
    },
    {
      "epoch": 2.208780922081918,
      "grad_norm": 0.17043490707874298,
      "learning_rate": 5.619222076527217e-06,
      "loss": 0.0163,
      "step": 1349680
    },
    {
      "epoch": 2.208813652520571,
      "grad_norm": 0.11123218387365341,
      "learning_rate": 5.6191561843137e-06,
      "loss": 0.0211,
      "step": 1349700
    },
    {
      "epoch": 2.2088463829592246,
      "grad_norm": 0.2947668433189392,
      "learning_rate": 5.619090292100184e-06,
      "loss": 0.0136,
      "step": 1349720
    },
    {
      "epoch": 2.208879113397878,
      "grad_norm": 0.863464891910553,
      "learning_rate": 5.619024399886666e-06,
      "loss": 0.0127,
      "step": 1349740
    },
    {
      "epoch": 2.208911843836531,
      "grad_norm": 0.24767464399337769,
      "learning_rate": 5.618958507673149e-06,
      "loss": 0.0108,
      "step": 1349760
    },
    {
      "epoch": 2.2089445742751845,
      "grad_norm": 0.9786871075630188,
      "learning_rate": 5.618892615459631e-06,
      "loss": 0.0256,
      "step": 1349780
    },
    {
      "epoch": 2.2089773047138377,
      "grad_norm": 0.4428198039531708,
      "learning_rate": 5.618826723246115e-06,
      "loss": 0.0206,
      "step": 1349800
    },
    {
      "epoch": 2.2090100351524913,
      "grad_norm": 0.26956984400749207,
      "learning_rate": 5.618760831032597e-06,
      "loss": 0.0159,
      "step": 1349820
    },
    {
      "epoch": 2.2090427655911444,
      "grad_norm": 0.15759557485580444,
      "learning_rate": 5.61869493881908e-06,
      "loss": 0.0198,
      "step": 1349840
    },
    {
      "epoch": 2.209075496029798,
      "grad_norm": 0.15023837983608246,
      "learning_rate": 5.618629046605563e-06,
      "loss": 0.0113,
      "step": 1349860
    },
    {
      "epoch": 2.209108226468451,
      "grad_norm": 0.2576434910297394,
      "learning_rate": 5.618563154392046e-06,
      "loss": 0.0193,
      "step": 1349880
    },
    {
      "epoch": 2.2091409569071043,
      "grad_norm": 0.41505569219589233,
      "learning_rate": 5.618497262178528e-06,
      "loss": 0.017,
      "step": 1349900
    },
    {
      "epoch": 2.209173687345758,
      "grad_norm": 0.3130723237991333,
      "learning_rate": 5.618431369965012e-06,
      "loss": 0.0183,
      "step": 1349920
    },
    {
      "epoch": 2.209206417784411,
      "grad_norm": 1.460110068321228,
      "learning_rate": 5.618365477751495e-06,
      "loss": 0.0142,
      "step": 1349940
    },
    {
      "epoch": 2.2092391482230647,
      "grad_norm": 0.7867856025695801,
      "learning_rate": 5.6182995855379775e-06,
      "loss": 0.0203,
      "step": 1349960
    },
    {
      "epoch": 2.209271878661718,
      "grad_norm": 0.9001195430755615,
      "learning_rate": 5.618233693324461e-06,
      "loss": 0.0178,
      "step": 1349980
    },
    {
      "epoch": 2.209304609100371,
      "grad_norm": 0.42675039172172546,
      "learning_rate": 5.618167801110943e-06,
      "loss": 0.0176,
      "step": 1350000
    },
    {
      "epoch": 2.209304609100371,
      "eval_loss": 0.008711420930922031,
      "eval_runtime": 6519.4197,
      "eval_samples_per_second": 157.661,
      "eval_steps_per_second": 15.766,
      "eval_sts-dev_pearson_cosine": 0.9798955725651651,
      "eval_sts-dev_spearman_cosine": 0.8922051275586015,
      "step": 1350000
    },
    {
      "epoch": 2.2093373395390246,
      "grad_norm": 0.5978598594665527,
      "learning_rate": 5.6181019088974265e-06,
      "loss": 0.0173,
      "step": 1350020
    },
    {
      "epoch": 2.2093700699776777,
      "grad_norm": 0.7454606294631958,
      "learning_rate": 5.618036016683908e-06,
      "loss": 0.0181,
      "step": 1350040
    },
    {
      "epoch": 2.2094028004163313,
      "grad_norm": 0.3060842454433441,
      "learning_rate": 5.617970124470392e-06,
      "loss": 0.0149,
      "step": 1350060
    },
    {
      "epoch": 2.2094355308549845,
      "grad_norm": 0.5645883679389954,
      "learning_rate": 5.617904232256875e-06,
      "loss": 0.0122,
      "step": 1350080
    },
    {
      "epoch": 2.209468261293638,
      "grad_norm": 3.329871654510498,
      "learning_rate": 5.6178383400433575e-06,
      "loss": 0.0161,
      "step": 1350100
    },
    {
      "epoch": 2.209500991732291,
      "grad_norm": 0.38678714632987976,
      "learning_rate": 5.61777244782984e-06,
      "loss": 0.0171,
      "step": 1350120
    },
    {
      "epoch": 2.2095337221709443,
      "grad_norm": 0.32337069511413574,
      "learning_rate": 5.617706555616324e-06,
      "loss": 0.0129,
      "step": 1350140
    },
    {
      "epoch": 2.209566452609598,
      "grad_norm": 0.13873116672039032,
      "learning_rate": 5.617640663402806e-06,
      "loss": 0.0196,
      "step": 1350160
    },
    {
      "epoch": 2.209599183048251,
      "grad_norm": 0.20085586607456207,
      "learning_rate": 5.617574771189289e-06,
      "loss": 0.0132,
      "step": 1350180
    },
    {
      "epoch": 2.2096319134869047,
      "grad_norm": 0.4106159806251526,
      "learning_rate": 5.617508878975771e-06,
      "loss": 0.0149,
      "step": 1350200
    },
    {
      "epoch": 2.209664643925558,
      "grad_norm": 1.293776273727417,
      "learning_rate": 5.617442986762255e-06,
      "loss": 0.0173,
      "step": 1350220
    },
    {
      "epoch": 2.2096973743642114,
      "grad_norm": 1.121341586112976,
      "learning_rate": 5.6173770945487375e-06,
      "loss": 0.0164,
      "step": 1350240
    },
    {
      "epoch": 2.2097301048028646,
      "grad_norm": 0.1742187738418579,
      "learning_rate": 5.61731120233522e-06,
      "loss": 0.0164,
      "step": 1350260
    },
    {
      "epoch": 2.2097628352415177,
      "grad_norm": 0.15463940799236298,
      "learning_rate": 5.617245310121704e-06,
      "loss": 0.0164,
      "step": 1350280
    },
    {
      "epoch": 2.2097955656801713,
      "grad_norm": 0.8660688400268555,
      "learning_rate": 5.6171794179081866e-06,
      "loss": 0.0144,
      "step": 1350300
    },
    {
      "epoch": 2.2098282961188245,
      "grad_norm": 0.6900210380554199,
      "learning_rate": 5.617113525694669e-06,
      "loss": 0.0147,
      "step": 1350320
    },
    {
      "epoch": 2.209861026557478,
      "grad_norm": 0.3504208028316498,
      "learning_rate": 5.617047633481152e-06,
      "loss": 0.0174,
      "step": 1350340
    },
    {
      "epoch": 2.209893756996131,
      "grad_norm": 0.5108287930488586,
      "learning_rate": 5.616981741267636e-06,
      "loss": 0.0184,
      "step": 1350360
    },
    {
      "epoch": 2.2099264874347844,
      "grad_norm": 0.12054844945669174,
      "learning_rate": 5.6169158490541175e-06,
      "loss": 0.015,
      "step": 1350380
    },
    {
      "epoch": 2.209959217873438,
      "grad_norm": 0.7505712509155273,
      "learning_rate": 5.616849956840601e-06,
      "loss": 0.021,
      "step": 1350400
    },
    {
      "epoch": 2.209991948312091,
      "grad_norm": 1.1181972026824951,
      "learning_rate": 5.616784064627083e-06,
      "loss": 0.0194,
      "step": 1350420
    },
    {
      "epoch": 2.2100246787507447,
      "grad_norm": 0.2514640986919403,
      "learning_rate": 5.616718172413567e-06,
      "loss": 0.0201,
      "step": 1350440
    },
    {
      "epoch": 2.210057409189398,
      "grad_norm": 0.35526716709136963,
      "learning_rate": 5.616652280200049e-06,
      "loss": 0.0162,
      "step": 1350460
    },
    {
      "epoch": 2.2100901396280515,
      "grad_norm": 0.29620468616485596,
      "learning_rate": 5.616586387986532e-06,
      "loss": 0.015,
      "step": 1350480
    },
    {
      "epoch": 2.2101228700667046,
      "grad_norm": 0.3681027889251709,
      "learning_rate": 5.616520495773015e-06,
      "loss": 0.0146,
      "step": 1350500
    },
    {
      "epoch": 2.2101556005053578,
      "grad_norm": 0.19193604588508606,
      "learning_rate": 5.616454603559498e-06,
      "loss": 0.0147,
      "step": 1350520
    },
    {
      "epoch": 2.2101883309440113,
      "grad_norm": 0.4951099455356598,
      "learning_rate": 5.61638871134598e-06,
      "loss": 0.0221,
      "step": 1350540
    },
    {
      "epoch": 2.2102210613826645,
      "grad_norm": 1.2934634685516357,
      "learning_rate": 5.616322819132464e-06,
      "loss": 0.0139,
      "step": 1350560
    },
    {
      "epoch": 2.210253791821318,
      "grad_norm": 0.20442605018615723,
      "learning_rate": 5.616256926918946e-06,
      "loss": 0.0127,
      "step": 1350580
    },
    {
      "epoch": 2.2102865222599712,
      "grad_norm": 0.24324548244476318,
      "learning_rate": 5.616191034705429e-06,
      "loss": 0.0156,
      "step": 1350600
    },
    {
      "epoch": 2.210319252698625,
      "grad_norm": 0.7580259442329407,
      "learning_rate": 5.616125142491911e-06,
      "loss": 0.0276,
      "step": 1350620
    },
    {
      "epoch": 2.210351983137278,
      "grad_norm": 0.8740265965461731,
      "learning_rate": 5.616059250278395e-06,
      "loss": 0.0219,
      "step": 1350640
    },
    {
      "epoch": 2.210384713575931,
      "grad_norm": 0.08821942657232285,
      "learning_rate": 5.6159933580648784e-06,
      "loss": 0.0123,
      "step": 1350660
    },
    {
      "epoch": 2.2104174440145847,
      "grad_norm": 0.4359751343727112,
      "learning_rate": 5.61592746585136e-06,
      "loss": 0.0156,
      "step": 1350680
    },
    {
      "epoch": 2.210450174453238,
      "grad_norm": 0.27565479278564453,
      "learning_rate": 5.615861573637844e-06,
      "loss": 0.013,
      "step": 1350700
    },
    {
      "epoch": 2.2104829048918915,
      "grad_norm": 0.16018038988113403,
      "learning_rate": 5.615795681424327e-06,
      "loss": 0.0154,
      "step": 1350720
    },
    {
      "epoch": 2.2105156353305446,
      "grad_norm": 0.4470857083797455,
      "learning_rate": 5.615729789210809e-06,
      "loss": 0.0211,
      "step": 1350740
    },
    {
      "epoch": 2.210548365769198,
      "grad_norm": 0.7468850016593933,
      "learning_rate": 5.615663896997292e-06,
      "loss": 0.0121,
      "step": 1350760
    },
    {
      "epoch": 2.2105810962078514,
      "grad_norm": 1.2591253519058228,
      "learning_rate": 5.615598004783776e-06,
      "loss": 0.0158,
      "step": 1350780
    },
    {
      "epoch": 2.2106138266465045,
      "grad_norm": 0.2058676779270172,
      "learning_rate": 5.615532112570258e-06,
      "loss": 0.014,
      "step": 1350800
    },
    {
      "epoch": 2.210646557085158,
      "grad_norm": 0.15108929574489594,
      "learning_rate": 5.615466220356741e-06,
      "loss": 0.0109,
      "step": 1350820
    },
    {
      "epoch": 2.2106792875238113,
      "grad_norm": 0.7675727009773254,
      "learning_rate": 5.615400328143223e-06,
      "loss": 0.0196,
      "step": 1350840
    },
    {
      "epoch": 2.210712017962465,
      "grad_norm": 0.2770492732524872,
      "learning_rate": 5.615334435929707e-06,
      "loss": 0.0158,
      "step": 1350860
    },
    {
      "epoch": 2.210744748401118,
      "grad_norm": 0.11273257434368134,
      "learning_rate": 5.615268543716189e-06,
      "loss": 0.0134,
      "step": 1350880
    },
    {
      "epoch": 2.2107774788397716,
      "grad_norm": 0.6970357298851013,
      "learning_rate": 5.615202651502672e-06,
      "loss": 0.0132,
      "step": 1350900
    },
    {
      "epoch": 2.2108102092784248,
      "grad_norm": 0.20197051763534546,
      "learning_rate": 5.615136759289155e-06,
      "loss": 0.0232,
      "step": 1350920
    },
    {
      "epoch": 2.210842939717078,
      "grad_norm": 0.18046624958515167,
      "learning_rate": 5.6150708670756385e-06,
      "loss": 0.0115,
      "step": 1350940
    },
    {
      "epoch": 2.2108756701557315,
      "grad_norm": 0.35540327429771423,
      "learning_rate": 5.61500497486212e-06,
      "loss": 0.0201,
      "step": 1350960
    },
    {
      "epoch": 2.2109084005943846,
      "grad_norm": 0.8234679698944092,
      "learning_rate": 5.614939082648604e-06,
      "loss": 0.0177,
      "step": 1350980
    },
    {
      "epoch": 2.2109411310330382,
      "grad_norm": 0.23486192524433136,
      "learning_rate": 5.6148731904350875e-06,
      "loss": 0.0128,
      "step": 1351000
    },
    {
      "epoch": 2.2109738614716914,
      "grad_norm": 0.47386616468429565,
      "learning_rate": 5.6148072982215694e-06,
      "loss": 0.0198,
      "step": 1351020
    },
    {
      "epoch": 2.211006591910345,
      "grad_norm": 0.8096685409545898,
      "learning_rate": 5.614741406008053e-06,
      "loss": 0.022,
      "step": 1351040
    },
    {
      "epoch": 2.211039322348998,
      "grad_norm": 0.08154453337192535,
      "learning_rate": 5.614675513794535e-06,
      "loss": 0.0116,
      "step": 1351060
    },
    {
      "epoch": 2.2110720527876513,
      "grad_norm": 0.5433362722396851,
      "learning_rate": 5.6146096215810185e-06,
      "loss": 0.0126,
      "step": 1351080
    },
    {
      "epoch": 2.211104783226305,
      "grad_norm": 1.0717483758926392,
      "learning_rate": 5.614543729367501e-06,
      "loss": 0.0186,
      "step": 1351100
    },
    {
      "epoch": 2.211137513664958,
      "grad_norm": 0.17456583678722382,
      "learning_rate": 5.614477837153984e-06,
      "loss": 0.0182,
      "step": 1351120
    },
    {
      "epoch": 2.2111702441036116,
      "grad_norm": 0.16296856105327606,
      "learning_rate": 5.614411944940467e-06,
      "loss": 0.0144,
      "step": 1351140
    },
    {
      "epoch": 2.2112029745422648,
      "grad_norm": 0.6962382197380066,
      "learning_rate": 5.61434605272695e-06,
      "loss": 0.0116,
      "step": 1351160
    },
    {
      "epoch": 2.2112357049809184,
      "grad_norm": 0.2512143552303314,
      "learning_rate": 5.614280160513432e-06,
      "loss": 0.0154,
      "step": 1351180
    },
    {
      "epoch": 2.2112684354195715,
      "grad_norm": 0.41134461760520935,
      "learning_rate": 5.614214268299916e-06,
      "loss": 0.0136,
      "step": 1351200
    },
    {
      "epoch": 2.2113011658582247,
      "grad_norm": 1.3672734498977661,
      "learning_rate": 5.614148376086398e-06,
      "loss": 0.0214,
      "step": 1351220
    },
    {
      "epoch": 2.2113338962968783,
      "grad_norm": 0.16298626363277435,
      "learning_rate": 5.614082483872881e-06,
      "loss": 0.0126,
      "step": 1351240
    },
    {
      "epoch": 2.2113666267355314,
      "grad_norm": 0.23074223101139069,
      "learning_rate": 5.614016591659364e-06,
      "loss": 0.0162,
      "step": 1351260
    },
    {
      "epoch": 2.211399357174185,
      "grad_norm": 0.635819137096405,
      "learning_rate": 5.613950699445847e-06,
      "loss": 0.0166,
      "step": 1351280
    },
    {
      "epoch": 2.211432087612838,
      "grad_norm": 0.6744778156280518,
      "learning_rate": 5.6138848072323295e-06,
      "loss": 0.0154,
      "step": 1351300
    },
    {
      "epoch": 2.2114648180514918,
      "grad_norm": 0.7106536626815796,
      "learning_rate": 5.613818915018813e-06,
      "loss": 0.0207,
      "step": 1351320
    },
    {
      "epoch": 2.211497548490145,
      "grad_norm": 0.24325965344905853,
      "learning_rate": 5.613753022805296e-06,
      "loss": 0.0096,
      "step": 1351340
    },
    {
      "epoch": 2.211530278928798,
      "grad_norm": 0.3745497167110443,
      "learning_rate": 5.6136871305917786e-06,
      "loss": 0.0189,
      "step": 1351360
    },
    {
      "epoch": 2.2115630093674516,
      "grad_norm": 0.16193833947181702,
      "learning_rate": 5.613621238378262e-06,
      "loss": 0.0223,
      "step": 1351380
    },
    {
      "epoch": 2.211595739806105,
      "grad_norm": 0.8399078249931335,
      "learning_rate": 5.613555346164744e-06,
      "loss": 0.0163,
      "step": 1351400
    },
    {
      "epoch": 2.2116284702447584,
      "grad_norm": 0.1540134698152542,
      "learning_rate": 5.613489453951228e-06,
      "loss": 0.0127,
      "step": 1351420
    },
    {
      "epoch": 2.2116612006834115,
      "grad_norm": 0.4605025351047516,
      "learning_rate": 5.6134235617377095e-06,
      "loss": 0.0149,
      "step": 1351440
    },
    {
      "epoch": 2.2116939311220647,
      "grad_norm": 0.1441657543182373,
      "learning_rate": 5.613357669524193e-06,
      "loss": 0.0115,
      "step": 1351460
    },
    {
      "epoch": 2.2117266615607183,
      "grad_norm": 0.33176928758621216,
      "learning_rate": 5.613291777310676e-06,
      "loss": 0.0098,
      "step": 1351480
    },
    {
      "epoch": 2.2117593919993714,
      "grad_norm": 0.11507032066583633,
      "learning_rate": 5.613225885097159e-06,
      "loss": 0.0122,
      "step": 1351500
    },
    {
      "epoch": 2.211792122438025,
      "grad_norm": 0.427722692489624,
      "learning_rate": 5.613159992883641e-06,
      "loss": 0.0127,
      "step": 1351520
    },
    {
      "epoch": 2.211824852876678,
      "grad_norm": 0.2775091230869293,
      "learning_rate": 5.613094100670125e-06,
      "loss": 0.0213,
      "step": 1351540
    },
    {
      "epoch": 2.2118575833153318,
      "grad_norm": 0.18205948173999786,
      "learning_rate": 5.613028208456607e-06,
      "loss": 0.0126,
      "step": 1351560
    },
    {
      "epoch": 2.211890313753985,
      "grad_norm": 0.34736168384552,
      "learning_rate": 5.61296231624309e-06,
      "loss": 0.0123,
      "step": 1351580
    },
    {
      "epoch": 2.211923044192638,
      "grad_norm": 0.20992054045200348,
      "learning_rate": 5.612896424029572e-06,
      "loss": 0.0127,
      "step": 1351600
    },
    {
      "epoch": 2.2119557746312917,
      "grad_norm": 0.1620560586452484,
      "learning_rate": 5.612830531816056e-06,
      "loss": 0.0205,
      "step": 1351620
    },
    {
      "epoch": 2.211988505069945,
      "grad_norm": 0.6447846293449402,
      "learning_rate": 5.612764639602538e-06,
      "loss": 0.0145,
      "step": 1351640
    },
    {
      "epoch": 2.2120212355085984,
      "grad_norm": 2.8425819873809814,
      "learning_rate": 5.612698747389021e-06,
      "loss": 0.0155,
      "step": 1351660
    },
    {
      "epoch": 2.2120539659472516,
      "grad_norm": 0.6361258625984192,
      "learning_rate": 5.612632855175504e-06,
      "loss": 0.0208,
      "step": 1351680
    },
    {
      "epoch": 2.212086696385905,
      "grad_norm": 0.044068701565265656,
      "learning_rate": 5.612566962961987e-06,
      "loss": 0.016,
      "step": 1351700
    },
    {
      "epoch": 2.2121194268245583,
      "grad_norm": 0.7747425436973572,
      "learning_rate": 5.61250107074847e-06,
      "loss": 0.0175,
      "step": 1351720
    },
    {
      "epoch": 2.2121521572632115,
      "grad_norm": 0.6831636428833008,
      "learning_rate": 5.612435178534953e-06,
      "loss": 0.0119,
      "step": 1351740
    },
    {
      "epoch": 2.212184887701865,
      "grad_norm": 0.35751858353614807,
      "learning_rate": 5.612369286321436e-06,
      "loss": 0.0165,
      "step": 1351760
    },
    {
      "epoch": 2.212217618140518,
      "grad_norm": 0.18415100872516632,
      "learning_rate": 5.612303394107919e-06,
      "loss": 0.0114,
      "step": 1351780
    },
    {
      "epoch": 2.212250348579172,
      "grad_norm": 0.2698657810688019,
      "learning_rate": 5.612237501894402e-06,
      "loss": 0.0133,
      "step": 1351800
    },
    {
      "epoch": 2.212283079017825,
      "grad_norm": 1.084783673286438,
      "learning_rate": 5.612171609680884e-06,
      "loss": 0.0147,
      "step": 1351820
    },
    {
      "epoch": 2.2123158094564785,
      "grad_norm": 0.3265080749988556,
      "learning_rate": 5.612105717467368e-06,
      "loss": 0.0157,
      "step": 1351840
    },
    {
      "epoch": 2.2123485398951317,
      "grad_norm": 0.9258166551589966,
      "learning_rate": 5.61203982525385e-06,
      "loss": 0.0172,
      "step": 1351860
    },
    {
      "epoch": 2.212381270333785,
      "grad_norm": 0.19460628926753998,
      "learning_rate": 5.611973933040333e-06,
      "loss": 0.0089,
      "step": 1351880
    },
    {
      "epoch": 2.2124140007724384,
      "grad_norm": 0.4905584156513214,
      "learning_rate": 5.611908040826816e-06,
      "loss": 0.0142,
      "step": 1351900
    },
    {
      "epoch": 2.2124467312110916,
      "grad_norm": 0.09359870851039886,
      "learning_rate": 5.611842148613299e-06,
      "loss": 0.0136,
      "step": 1351920
    },
    {
      "epoch": 2.212479461649745,
      "grad_norm": 0.8987916707992554,
      "learning_rate": 5.611776256399781e-06,
      "loss": 0.0137,
      "step": 1351940
    },
    {
      "epoch": 2.2125121920883983,
      "grad_norm": 0.09278561174869537,
      "learning_rate": 5.611710364186265e-06,
      "loss": 0.022,
      "step": 1351960
    },
    {
      "epoch": 2.2125449225270515,
      "grad_norm": 0.8510063290596008,
      "learning_rate": 5.611644471972747e-06,
      "loss": 0.0176,
      "step": 1351980
    },
    {
      "epoch": 2.212577652965705,
      "grad_norm": 0.14879831671714783,
      "learning_rate": 5.6115785797592305e-06,
      "loss": 0.0203,
      "step": 1352000
    },
    {
      "epoch": 2.2126103834043582,
      "grad_norm": 0.68430495262146,
      "learning_rate": 5.611512687545712e-06,
      "loss": 0.0165,
      "step": 1352020
    },
    {
      "epoch": 2.212643113843012,
      "grad_norm": 0.188155397772789,
      "learning_rate": 5.611446795332196e-06,
      "loss": 0.0167,
      "step": 1352040
    },
    {
      "epoch": 2.212675844281665,
      "grad_norm": 0.7250909209251404,
      "learning_rate": 5.6113809031186795e-06,
      "loss": 0.011,
      "step": 1352060
    },
    {
      "epoch": 2.2127085747203186,
      "grad_norm": 0.40946856141090393,
      "learning_rate": 5.611315010905161e-06,
      "loss": 0.0171,
      "step": 1352080
    },
    {
      "epoch": 2.2127413051589717,
      "grad_norm": 0.16111503541469574,
      "learning_rate": 5.611249118691645e-06,
      "loss": 0.0172,
      "step": 1352100
    },
    {
      "epoch": 2.212774035597625,
      "grad_norm": 0.15696106851100922,
      "learning_rate": 5.611183226478128e-06,
      "loss": 0.0168,
      "step": 1352120
    },
    {
      "epoch": 2.2128067660362785,
      "grad_norm": 0.2629440426826477,
      "learning_rate": 5.6111173342646105e-06,
      "loss": 0.0169,
      "step": 1352140
    },
    {
      "epoch": 2.2128394964749316,
      "grad_norm": 0.1261179894208908,
      "learning_rate": 5.611051442051093e-06,
      "loss": 0.0143,
      "step": 1352160
    },
    {
      "epoch": 2.212872226913585,
      "grad_norm": 1.856376051902771,
      "learning_rate": 5.610985549837577e-06,
      "loss": 0.013,
      "step": 1352180
    },
    {
      "epoch": 2.2129049573522384,
      "grad_norm": 0.7976521849632263,
      "learning_rate": 5.610919657624059e-06,
      "loss": 0.0127,
      "step": 1352200
    },
    {
      "epoch": 2.212937687790892,
      "grad_norm": 1.9300388097763062,
      "learning_rate": 5.610853765410542e-06,
      "loss": 0.0146,
      "step": 1352220
    },
    {
      "epoch": 2.212970418229545,
      "grad_norm": 0.3503418266773224,
      "learning_rate": 5.610787873197024e-06,
      "loss": 0.0163,
      "step": 1352240
    },
    {
      "epoch": 2.2130031486681982,
      "grad_norm": 0.28713780641555786,
      "learning_rate": 5.610721980983508e-06,
      "loss": 0.0165,
      "step": 1352260
    },
    {
      "epoch": 2.213035879106852,
      "grad_norm": 0.12222728133201599,
      "learning_rate": 5.6106560887699905e-06,
      "loss": 0.0157,
      "step": 1352280
    },
    {
      "epoch": 2.213068609545505,
      "grad_norm": 0.37206998467445374,
      "learning_rate": 5.610590196556473e-06,
      "loss": 0.0198,
      "step": 1352300
    },
    {
      "epoch": 2.2131013399841586,
      "grad_norm": 0.2722577154636383,
      "learning_rate": 5.610524304342956e-06,
      "loss": 0.0108,
      "step": 1352320
    },
    {
      "epoch": 2.2131340704228117,
      "grad_norm": 0.6351805329322815,
      "learning_rate": 5.6104584121294396e-06,
      "loss": 0.012,
      "step": 1352340
    },
    {
      "epoch": 2.2131668008614653,
      "grad_norm": 0.142999529838562,
      "learning_rate": 5.6103925199159215e-06,
      "loss": 0.016,
      "step": 1352360
    },
    {
      "epoch": 2.2131995313001185,
      "grad_norm": 0.5742718577384949,
      "learning_rate": 5.610326627702405e-06,
      "loss": 0.0181,
      "step": 1352380
    },
    {
      "epoch": 2.2132322617387716,
      "grad_norm": 3.5830798149108887,
      "learning_rate": 5.610260735488889e-06,
      "loss": 0.0142,
      "step": 1352400
    },
    {
      "epoch": 2.2132649921774252,
      "grad_norm": 0.22319447994232178,
      "learning_rate": 5.6101948432753705e-06,
      "loss": 0.0209,
      "step": 1352420
    },
    {
      "epoch": 2.2132977226160784,
      "grad_norm": 0.20276254415512085,
      "learning_rate": 5.610128951061854e-06,
      "loss": 0.0122,
      "step": 1352440
    },
    {
      "epoch": 2.213330453054732,
      "grad_norm": 0.3104642927646637,
      "learning_rate": 5.610063058848336e-06,
      "loss": 0.016,
      "step": 1352460
    },
    {
      "epoch": 2.213363183493385,
      "grad_norm": 0.26208803057670593,
      "learning_rate": 5.60999716663482e-06,
      "loss": 0.0187,
      "step": 1352480
    },
    {
      "epoch": 2.2133959139320387,
      "grad_norm": 0.32298606634140015,
      "learning_rate": 5.609931274421302e-06,
      "loss": 0.014,
      "step": 1352500
    },
    {
      "epoch": 2.213428644370692,
      "grad_norm": 0.6968132853507996,
      "learning_rate": 5.609865382207785e-06,
      "loss": 0.0215,
      "step": 1352520
    },
    {
      "epoch": 2.213461374809345,
      "grad_norm": 1.3947439193725586,
      "learning_rate": 5.609799489994268e-06,
      "loss": 0.019,
      "step": 1352540
    },
    {
      "epoch": 2.2134941052479986,
      "grad_norm": 0.4749079942703247,
      "learning_rate": 5.609733597780751e-06,
      "loss": 0.0207,
      "step": 1352560
    },
    {
      "epoch": 2.2135268356866518,
      "grad_norm": 0.20110471546649933,
      "learning_rate": 5.609667705567233e-06,
      "loss": 0.0211,
      "step": 1352580
    },
    {
      "epoch": 2.2135595661253054,
      "grad_norm": 0.4264463186264038,
      "learning_rate": 5.609601813353717e-06,
      "loss": 0.0168,
      "step": 1352600
    },
    {
      "epoch": 2.2135922965639585,
      "grad_norm": 0.35970890522003174,
      "learning_rate": 5.609535921140199e-06,
      "loss": 0.0127,
      "step": 1352620
    },
    {
      "epoch": 2.213625027002612,
      "grad_norm": 0.19972524046897888,
      "learning_rate": 5.609470028926682e-06,
      "loss": 0.0186,
      "step": 1352640
    },
    {
      "epoch": 2.2136577574412653,
      "grad_norm": 0.15104860067367554,
      "learning_rate": 5.609404136713164e-06,
      "loss": 0.0185,
      "step": 1352660
    },
    {
      "epoch": 2.2136904878799184,
      "grad_norm": 0.22644515335559845,
      "learning_rate": 5.609338244499648e-06,
      "loss": 0.0126,
      "step": 1352680
    },
    {
      "epoch": 2.213723218318572,
      "grad_norm": 0.27959945797920227,
      "learning_rate": 5.609272352286131e-06,
      "loss": 0.0166,
      "step": 1352700
    },
    {
      "epoch": 2.213755948757225,
      "grad_norm": 0.26005521416664124,
      "learning_rate": 5.609206460072613e-06,
      "loss": 0.0155,
      "step": 1352720
    },
    {
      "epoch": 2.2137886791958787,
      "grad_norm": 0.5840843319892883,
      "learning_rate": 5.609140567859096e-06,
      "loss": 0.0131,
      "step": 1352740
    },
    {
      "epoch": 2.213821409634532,
      "grad_norm": 0.4708133935928345,
      "learning_rate": 5.60907467564558e-06,
      "loss": 0.0119,
      "step": 1352760
    },
    {
      "epoch": 2.2138541400731855,
      "grad_norm": 0.17485114932060242,
      "learning_rate": 5.609008783432062e-06,
      "loss": 0.0123,
      "step": 1352780
    },
    {
      "epoch": 2.2138868705118386,
      "grad_norm": 0.20642399787902832,
      "learning_rate": 5.608942891218545e-06,
      "loss": 0.0103,
      "step": 1352800
    },
    {
      "epoch": 2.213919600950492,
      "grad_norm": 0.420637845993042,
      "learning_rate": 5.608876999005029e-06,
      "loss": 0.0217,
      "step": 1352820
    },
    {
      "epoch": 2.2139523313891454,
      "grad_norm": 0.6434850096702576,
      "learning_rate": 5.608811106791511e-06,
      "loss": 0.0188,
      "step": 1352840
    },
    {
      "epoch": 2.2139850618277985,
      "grad_norm": 0.2994674742221832,
      "learning_rate": 5.608745214577994e-06,
      "loss": 0.0148,
      "step": 1352860
    },
    {
      "epoch": 2.214017792266452,
      "grad_norm": 0.31340330839157104,
      "learning_rate": 5.608679322364476e-06,
      "loss": 0.0136,
      "step": 1352880
    },
    {
      "epoch": 2.2140505227051053,
      "grad_norm": 0.4211699664592743,
      "learning_rate": 5.60861343015096e-06,
      "loss": 0.017,
      "step": 1352900
    },
    {
      "epoch": 2.214083253143759,
      "grad_norm": 0.4607026278972626,
      "learning_rate": 5.608547537937442e-06,
      "loss": 0.0236,
      "step": 1352920
    },
    {
      "epoch": 2.214115983582412,
      "grad_norm": 0.09671557694673538,
      "learning_rate": 5.608481645723925e-06,
      "loss": 0.0172,
      "step": 1352940
    },
    {
      "epoch": 2.214148714021065,
      "grad_norm": 0.2617393434047699,
      "learning_rate": 5.608415753510408e-06,
      "loss": 0.0205,
      "step": 1352960
    },
    {
      "epoch": 2.2141814444597188,
      "grad_norm": 0.6876673102378845,
      "learning_rate": 5.6083498612968915e-06,
      "loss": 0.012,
      "step": 1352980
    },
    {
      "epoch": 2.214214174898372,
      "grad_norm": 0.5050658583641052,
      "learning_rate": 5.608283969083373e-06,
      "loss": 0.0178,
      "step": 1353000
    },
    {
      "epoch": 2.2142469053370255,
      "grad_norm": 1.7791911363601685,
      "learning_rate": 5.608218076869857e-06,
      "loss": 0.016,
      "step": 1353020
    },
    {
      "epoch": 2.2142796357756787,
      "grad_norm": 0.4545261263847351,
      "learning_rate": 5.608152184656339e-06,
      "loss": 0.0147,
      "step": 1353040
    },
    {
      "epoch": 2.214312366214332,
      "grad_norm": 0.24780461192131042,
      "learning_rate": 5.6080862924428224e-06,
      "loss": 0.0186,
      "step": 1353060
    },
    {
      "epoch": 2.2143450966529854,
      "grad_norm": 0.37802958488464355,
      "learning_rate": 5.608020400229305e-06,
      "loss": 0.0145,
      "step": 1353080
    },
    {
      "epoch": 2.2143778270916386,
      "grad_norm": 0.7435236573219299,
      "learning_rate": 5.607954508015788e-06,
      "loss": 0.0116,
      "step": 1353100
    },
    {
      "epoch": 2.214410557530292,
      "grad_norm": 0.31729426980018616,
      "learning_rate": 5.6078886158022715e-06,
      "loss": 0.0134,
      "step": 1353120
    },
    {
      "epoch": 2.2144432879689453,
      "grad_norm": 0.27510377764701843,
      "learning_rate": 5.607822723588754e-06,
      "loss": 0.0163,
      "step": 1353140
    },
    {
      "epoch": 2.214476018407599,
      "grad_norm": 0.3276755213737488,
      "learning_rate": 5.607756831375237e-06,
      "loss": 0.0132,
      "step": 1353160
    },
    {
      "epoch": 2.214508748846252,
      "grad_norm": 0.9326550960540771,
      "learning_rate": 5.60769093916172e-06,
      "loss": 0.0179,
      "step": 1353180
    },
    {
      "epoch": 2.214541479284905,
      "grad_norm": 0.4011889696121216,
      "learning_rate": 5.607625046948203e-06,
      "loss": 0.0164,
      "step": 1353200
    },
    {
      "epoch": 2.214574209723559,
      "grad_norm": 0.3708450198173523,
      "learning_rate": 5.607559154734685e-06,
      "loss": 0.0153,
      "step": 1353220
    },
    {
      "epoch": 2.214606940162212,
      "grad_norm": 0.2822905480861664,
      "learning_rate": 5.607493262521169e-06,
      "loss": 0.0165,
      "step": 1353240
    },
    {
      "epoch": 2.2146396706008655,
      "grad_norm": 0.6275128126144409,
      "learning_rate": 5.607427370307651e-06,
      "loss": 0.0141,
      "step": 1353260
    },
    {
      "epoch": 2.2146724010395187,
      "grad_norm": 0.18538051843643188,
      "learning_rate": 5.607361478094134e-06,
      "loss": 0.0166,
      "step": 1353280
    },
    {
      "epoch": 2.2147051314781723,
      "grad_norm": 0.3486595153808594,
      "learning_rate": 5.607295585880617e-06,
      "loss": 0.0245,
      "step": 1353300
    },
    {
      "epoch": 2.2147378619168254,
      "grad_norm": 0.40058547258377075,
      "learning_rate": 5.6072296936671e-06,
      "loss": 0.0169,
      "step": 1353320
    },
    {
      "epoch": 2.2147705923554786,
      "grad_norm": 0.2086043357849121,
      "learning_rate": 5.6071638014535825e-06,
      "loss": 0.0158,
      "step": 1353340
    },
    {
      "epoch": 2.214803322794132,
      "grad_norm": 0.4455610513687134,
      "learning_rate": 5.607097909240066e-06,
      "loss": 0.0149,
      "step": 1353360
    },
    {
      "epoch": 2.2148360532327853,
      "grad_norm": 0.4055369198322296,
      "learning_rate": 5.607032017026548e-06,
      "loss": 0.0164,
      "step": 1353380
    },
    {
      "epoch": 2.214868783671439,
      "grad_norm": 0.1759718954563141,
      "learning_rate": 5.6069661248130316e-06,
      "loss": 0.0091,
      "step": 1353400
    },
    {
      "epoch": 2.214901514110092,
      "grad_norm": 0.2578006684780121,
      "learning_rate": 5.6069002325995135e-06,
      "loss": 0.0227,
      "step": 1353420
    },
    {
      "epoch": 2.214934244548745,
      "grad_norm": 0.6447812914848328,
      "learning_rate": 5.606834340385997e-06,
      "loss": 0.0209,
      "step": 1353440
    },
    {
      "epoch": 2.214966974987399,
      "grad_norm": 0.8055793046951294,
      "learning_rate": 5.606768448172481e-06,
      "loss": 0.016,
      "step": 1353460
    },
    {
      "epoch": 2.214999705426052,
      "grad_norm": 0.10241340845823288,
      "learning_rate": 5.6067025559589625e-06,
      "loss": 0.0114,
      "step": 1353480
    },
    {
      "epoch": 2.2150324358647056,
      "grad_norm": 0.4139147400856018,
      "learning_rate": 5.606636663745446e-06,
      "loss": 0.0165,
      "step": 1353500
    },
    {
      "epoch": 2.2150651663033587,
      "grad_norm": 0.8073819875717163,
      "learning_rate": 5.606570771531928e-06,
      "loss": 0.0202,
      "step": 1353520
    },
    {
      "epoch": 2.2150978967420123,
      "grad_norm": 0.2956729233264923,
      "learning_rate": 5.606504879318412e-06,
      "loss": 0.0147,
      "step": 1353540
    },
    {
      "epoch": 2.2151306271806654,
      "grad_norm": 0.5922536849975586,
      "learning_rate": 5.606438987104894e-06,
      "loss": 0.0161,
      "step": 1353560
    },
    {
      "epoch": 2.2151633576193186,
      "grad_norm": 0.10946846753358841,
      "learning_rate": 5.606373094891378e-06,
      "loss": 0.0183,
      "step": 1353580
    },
    {
      "epoch": 2.215196088057972,
      "grad_norm": 0.3865782916545868,
      "learning_rate": 5.60630720267786e-06,
      "loss": 0.0142,
      "step": 1353600
    },
    {
      "epoch": 2.2152288184966253,
      "grad_norm": 0.48152080178260803,
      "learning_rate": 5.606241310464343e-06,
      "loss": 0.0121,
      "step": 1353620
    },
    {
      "epoch": 2.215261548935279,
      "grad_norm": 0.3421538770198822,
      "learning_rate": 5.606175418250825e-06,
      "loss": 0.0142,
      "step": 1353640
    },
    {
      "epoch": 2.215294279373932,
      "grad_norm": 0.3154345750808716,
      "learning_rate": 5.606109526037309e-06,
      "loss": 0.0145,
      "step": 1353660
    },
    {
      "epoch": 2.2153270098125857,
      "grad_norm": 0.5377823114395142,
      "learning_rate": 5.606043633823791e-06,
      "loss": 0.0204,
      "step": 1353680
    },
    {
      "epoch": 2.215359740251239,
      "grad_norm": 1.3712915182113647,
      "learning_rate": 5.605977741610274e-06,
      "loss": 0.0168,
      "step": 1353700
    },
    {
      "epoch": 2.215392470689892,
      "grad_norm": 0.22072072327136993,
      "learning_rate": 5.605911849396757e-06,
      "loss": 0.0191,
      "step": 1353720
    },
    {
      "epoch": 2.2154252011285456,
      "grad_norm": 0.8146370649337769,
      "learning_rate": 5.60584595718324e-06,
      "loss": 0.015,
      "step": 1353740
    },
    {
      "epoch": 2.2154579315671987,
      "grad_norm": 0.4501151740550995,
      "learning_rate": 5.6057800649697226e-06,
      "loss": 0.0168,
      "step": 1353760
    },
    {
      "epoch": 2.2154906620058523,
      "grad_norm": 0.5845699310302734,
      "learning_rate": 5.605714172756206e-06,
      "loss": 0.0159,
      "step": 1353780
    },
    {
      "epoch": 2.2155233924445055,
      "grad_norm": 0.7384954690933228,
      "learning_rate": 5.605648280542689e-06,
      "loss": 0.0175,
      "step": 1353800
    },
    {
      "epoch": 2.215556122883159,
      "grad_norm": 0.6448230147361755,
      "learning_rate": 5.605582388329172e-06,
      "loss": 0.0149,
      "step": 1353820
    },
    {
      "epoch": 2.215588853321812,
      "grad_norm": 0.4122932255268097,
      "learning_rate": 5.605516496115655e-06,
      "loss": 0.0144,
      "step": 1353840
    },
    {
      "epoch": 2.2156215837604654,
      "grad_norm": 1.00639808177948,
      "learning_rate": 5.605450603902137e-06,
      "loss": 0.0174,
      "step": 1353860
    },
    {
      "epoch": 2.215654314199119,
      "grad_norm": 0.188081756234169,
      "learning_rate": 5.605384711688621e-06,
      "loss": 0.0164,
      "step": 1353880
    },
    {
      "epoch": 2.215687044637772,
      "grad_norm": 0.3994084596633911,
      "learning_rate": 5.605318819475103e-06,
      "loss": 0.0131,
      "step": 1353900
    },
    {
      "epoch": 2.2157197750764257,
      "grad_norm": 0.33884432911872864,
      "learning_rate": 5.605252927261586e-06,
      "loss": 0.0177,
      "step": 1353920
    },
    {
      "epoch": 2.215752505515079,
      "grad_norm": 0.4352220296859741,
      "learning_rate": 5.605187035048069e-06,
      "loss": 0.0227,
      "step": 1353940
    },
    {
      "epoch": 2.2157852359537324,
      "grad_norm": 0.3861611485481262,
      "learning_rate": 5.605121142834552e-06,
      "loss": 0.0113,
      "step": 1353960
    },
    {
      "epoch": 2.2158179663923856,
      "grad_norm": 0.5574960112571716,
      "learning_rate": 5.605055250621034e-06,
      "loss": 0.0163,
      "step": 1353980
    },
    {
      "epoch": 2.2158506968310387,
      "grad_norm": 0.2554745376110077,
      "learning_rate": 5.604989358407518e-06,
      "loss": 0.0194,
      "step": 1354000
    },
    {
      "epoch": 2.2158834272696923,
      "grad_norm": 0.10097119957208633,
      "learning_rate": 5.604923466194e-06,
      "loss": 0.0187,
      "step": 1354020
    },
    {
      "epoch": 2.2159161577083455,
      "grad_norm": 0.4813828468322754,
      "learning_rate": 5.6048575739804835e-06,
      "loss": 0.0147,
      "step": 1354040
    },
    {
      "epoch": 2.215948888146999,
      "grad_norm": 0.18822140991687775,
      "learning_rate": 5.604791681766965e-06,
      "loss": 0.0158,
      "step": 1354060
    },
    {
      "epoch": 2.2159816185856522,
      "grad_norm": 0.10573706030845642,
      "learning_rate": 5.604725789553449e-06,
      "loss": 0.0109,
      "step": 1354080
    },
    {
      "epoch": 2.216014349024306,
      "grad_norm": 0.20819957554340363,
      "learning_rate": 5.604659897339932e-06,
      "loss": 0.016,
      "step": 1354100
    },
    {
      "epoch": 2.216047079462959,
      "grad_norm": 0.4831470847129822,
      "learning_rate": 5.6045940051264144e-06,
      "loss": 0.0114,
      "step": 1354120
    },
    {
      "epoch": 2.216079809901612,
      "grad_norm": 0.6328353881835938,
      "learning_rate": 5.604528112912897e-06,
      "loss": 0.0141,
      "step": 1354140
    },
    {
      "epoch": 2.2161125403402657,
      "grad_norm": 0.35685715079307556,
      "learning_rate": 5.604462220699381e-06,
      "loss": 0.0174,
      "step": 1354160
    },
    {
      "epoch": 2.216145270778919,
      "grad_norm": 0.22534316778182983,
      "learning_rate": 5.6043963284858635e-06,
      "loss": 0.0146,
      "step": 1354180
    },
    {
      "epoch": 2.2161780012175725,
      "grad_norm": 0.25421300530433655,
      "learning_rate": 5.604330436272346e-06,
      "loss": 0.0215,
      "step": 1354200
    },
    {
      "epoch": 2.2162107316562256,
      "grad_norm": 0.5545955300331116,
      "learning_rate": 5.60426454405883e-06,
      "loss": 0.0175,
      "step": 1354220
    },
    {
      "epoch": 2.216243462094879,
      "grad_norm": 0.5663177371025085,
      "learning_rate": 5.604198651845312e-06,
      "loss": 0.0103,
      "step": 1354240
    },
    {
      "epoch": 2.2162761925335324,
      "grad_norm": 0.2442413717508316,
      "learning_rate": 5.604132759631795e-06,
      "loss": 0.0108,
      "step": 1354260
    },
    {
      "epoch": 2.2163089229721855,
      "grad_norm": 0.32312873005867004,
      "learning_rate": 5.604066867418277e-06,
      "loss": 0.0132,
      "step": 1354280
    },
    {
      "epoch": 2.216341653410839,
      "grad_norm": 0.0965735912322998,
      "learning_rate": 5.604000975204761e-06,
      "loss": 0.0094,
      "step": 1354300
    },
    {
      "epoch": 2.2163743838494923,
      "grad_norm": 0.12527506053447723,
      "learning_rate": 5.6039350829912435e-06,
      "loss": 0.019,
      "step": 1354320
    },
    {
      "epoch": 2.216407114288146,
      "grad_norm": 0.2992544174194336,
      "learning_rate": 5.603869190777726e-06,
      "loss": 0.0105,
      "step": 1354340
    },
    {
      "epoch": 2.216439844726799,
      "grad_norm": 1.637830376625061,
      "learning_rate": 5.603803298564209e-06,
      "loss": 0.0124,
      "step": 1354360
    },
    {
      "epoch": 2.2164725751654526,
      "grad_norm": 0.6974685788154602,
      "learning_rate": 5.603737406350693e-06,
      "loss": 0.0147,
      "step": 1354380
    },
    {
      "epoch": 2.2165053056041057,
      "grad_norm": 0.23890940845012665,
      "learning_rate": 5.6036715141371745e-06,
      "loss": 0.0147,
      "step": 1354400
    },
    {
      "epoch": 2.216538036042759,
      "grad_norm": 0.27931132912635803,
      "learning_rate": 5.603605621923658e-06,
      "loss": 0.0209,
      "step": 1354420
    },
    {
      "epoch": 2.2165707664814125,
      "grad_norm": 0.4473426043987274,
      "learning_rate": 5.60353972971014e-06,
      "loss": 0.0121,
      "step": 1354440
    },
    {
      "epoch": 2.2166034969200656,
      "grad_norm": 0.27763938903808594,
      "learning_rate": 5.6034738374966235e-06,
      "loss": 0.0121,
      "step": 1354460
    },
    {
      "epoch": 2.2166362273587192,
      "grad_norm": 0.3336164355278015,
      "learning_rate": 5.6034079452831054e-06,
      "loss": 0.0136,
      "step": 1354480
    },
    {
      "epoch": 2.2166689577973724,
      "grad_norm": 0.14823909103870392,
      "learning_rate": 5.603342053069589e-06,
      "loss": 0.0153,
      "step": 1354500
    },
    {
      "epoch": 2.2167016882360255,
      "grad_norm": 0.7034597992897034,
      "learning_rate": 5.603276160856073e-06,
      "loss": 0.0125,
      "step": 1354520
    },
    {
      "epoch": 2.216734418674679,
      "grad_norm": 1.4238849878311157,
      "learning_rate": 5.6032102686425545e-06,
      "loss": 0.0169,
      "step": 1354540
    },
    {
      "epoch": 2.2167671491133323,
      "grad_norm": 0.8265932202339172,
      "learning_rate": 5.603144376429038e-06,
      "loss": 0.0228,
      "step": 1354560
    },
    {
      "epoch": 2.216799879551986,
      "grad_norm": 0.31264176964759827,
      "learning_rate": 5.603078484215521e-06,
      "loss": 0.0166,
      "step": 1354580
    },
    {
      "epoch": 2.216832609990639,
      "grad_norm": 0.19421765208244324,
      "learning_rate": 5.6030125920020036e-06,
      "loss": 0.0143,
      "step": 1354600
    },
    {
      "epoch": 2.2168653404292926,
      "grad_norm": 1.771506667137146,
      "learning_rate": 5.602946699788486e-06,
      "loss": 0.0136,
      "step": 1354620
    },
    {
      "epoch": 2.2168980708679458,
      "grad_norm": 0.262842059135437,
      "learning_rate": 5.60288080757497e-06,
      "loss": 0.0134,
      "step": 1354640
    },
    {
      "epoch": 2.216930801306599,
      "grad_norm": 1.139712929725647,
      "learning_rate": 5.602814915361452e-06,
      "loss": 0.0154,
      "step": 1354660
    },
    {
      "epoch": 2.2169635317452525,
      "grad_norm": 0.2724493443965912,
      "learning_rate": 5.602749023147935e-06,
      "loss": 0.0103,
      "step": 1354680
    },
    {
      "epoch": 2.2169962621839057,
      "grad_norm": 0.8752089142799377,
      "learning_rate": 5.602683130934417e-06,
      "loss": 0.018,
      "step": 1354700
    },
    {
      "epoch": 2.2170289926225593,
      "grad_norm": 1.0632219314575195,
      "learning_rate": 5.602617238720901e-06,
      "loss": 0.0153,
      "step": 1354720
    },
    {
      "epoch": 2.2170617230612124,
      "grad_norm": 0.5969815850257874,
      "learning_rate": 5.602551346507384e-06,
      "loss": 0.0124,
      "step": 1354740
    },
    {
      "epoch": 2.217094453499866,
      "grad_norm": 1.1218091249465942,
      "learning_rate": 5.602485454293866e-06,
      "loss": 0.0189,
      "step": 1354760
    },
    {
      "epoch": 2.217127183938519,
      "grad_norm": 0.12005135416984558,
      "learning_rate": 5.602419562080349e-06,
      "loss": 0.0079,
      "step": 1354780
    },
    {
      "epoch": 2.2171599143771723,
      "grad_norm": 0.34061548113822937,
      "learning_rate": 5.602353669866833e-06,
      "loss": 0.0106,
      "step": 1354800
    },
    {
      "epoch": 2.217192644815826,
      "grad_norm": 0.3828636705875397,
      "learning_rate": 5.6022877776533146e-06,
      "loss": 0.0138,
      "step": 1354820
    },
    {
      "epoch": 2.217225375254479,
      "grad_norm": 0.5353068709373474,
      "learning_rate": 5.602221885439798e-06,
      "loss": 0.0137,
      "step": 1354840
    },
    {
      "epoch": 2.2172581056931326,
      "grad_norm": 0.453401654958725,
      "learning_rate": 5.602155993226282e-06,
      "loss": 0.0103,
      "step": 1354860
    },
    {
      "epoch": 2.217290836131786,
      "grad_norm": 1.862270474433899,
      "learning_rate": 5.602090101012764e-06,
      "loss": 0.0154,
      "step": 1354880
    },
    {
      "epoch": 2.217323566570439,
      "grad_norm": 0.5053020119667053,
      "learning_rate": 5.602024208799247e-06,
      "loss": 0.0163,
      "step": 1354900
    },
    {
      "epoch": 2.2173562970090925,
      "grad_norm": 0.5241872668266296,
      "learning_rate": 5.601958316585729e-06,
      "loss": 0.0165,
      "step": 1354920
    },
    {
      "epoch": 2.2173890274477457,
      "grad_norm": 0.2494620382785797,
      "learning_rate": 5.601892424372213e-06,
      "loss": 0.0181,
      "step": 1354940
    },
    {
      "epoch": 2.2174217578863993,
      "grad_norm": 0.6723765730857849,
      "learning_rate": 5.601826532158695e-06,
      "loss": 0.0138,
      "step": 1354960
    },
    {
      "epoch": 2.2174544883250524,
      "grad_norm": 0.24455104768276215,
      "learning_rate": 5.601760639945178e-06,
      "loss": 0.0115,
      "step": 1354980
    },
    {
      "epoch": 2.217487218763706,
      "grad_norm": 0.23406291007995605,
      "learning_rate": 5.601694747731661e-06,
      "loss": 0.0143,
      "step": 1355000
    },
    {
      "epoch": 2.217519949202359,
      "grad_norm": 0.14227868616580963,
      "learning_rate": 5.6016288555181445e-06,
      "loss": 0.0114,
      "step": 1355020
    },
    {
      "epoch": 2.2175526796410123,
      "grad_norm": 0.18208785355091095,
      "learning_rate": 5.601562963304626e-06,
      "loss": 0.0163,
      "step": 1355040
    },
    {
      "epoch": 2.217585410079666,
      "grad_norm": 0.3051040470600128,
      "learning_rate": 5.60149707109111e-06,
      "loss": 0.0114,
      "step": 1355060
    },
    {
      "epoch": 2.217618140518319,
      "grad_norm": 0.2675970792770386,
      "learning_rate": 5.601431178877592e-06,
      "loss": 0.0224,
      "step": 1355080
    },
    {
      "epoch": 2.2176508709569727,
      "grad_norm": 0.2075568288564682,
      "learning_rate": 5.6013652866640754e-06,
      "loss": 0.0135,
      "step": 1355100
    },
    {
      "epoch": 2.217683601395626,
      "grad_norm": 0.17672976851463318,
      "learning_rate": 5.601299394450558e-06,
      "loss": 0.0157,
      "step": 1355120
    },
    {
      "epoch": 2.2177163318342794,
      "grad_norm": 0.47527459263801575,
      "learning_rate": 5.601233502237041e-06,
      "loss": 0.0172,
      "step": 1355140
    },
    {
      "epoch": 2.2177490622729326,
      "grad_norm": 0.35242006182670593,
      "learning_rate": 5.601167610023524e-06,
      "loss": 0.0114,
      "step": 1355160
    },
    {
      "epoch": 2.2177817927115857,
      "grad_norm": 0.2676243782043457,
      "learning_rate": 5.601101717810007e-06,
      "loss": 0.0159,
      "step": 1355180
    },
    {
      "epoch": 2.2178145231502393,
      "grad_norm": 0.4144672453403473,
      "learning_rate": 5.601035825596489e-06,
      "loss": 0.0165,
      "step": 1355200
    },
    {
      "epoch": 2.2178472535888925,
      "grad_norm": 0.7627666592597961,
      "learning_rate": 5.600969933382973e-06,
      "loss": 0.0148,
      "step": 1355220
    },
    {
      "epoch": 2.217879984027546,
      "grad_norm": 0.21006514132022858,
      "learning_rate": 5.600904041169456e-06,
      "loss": 0.0137,
      "step": 1355240
    },
    {
      "epoch": 2.217912714466199,
      "grad_norm": 0.8610584139823914,
      "learning_rate": 5.600838148955938e-06,
      "loss": 0.0097,
      "step": 1355260
    },
    {
      "epoch": 2.217945444904853,
      "grad_norm": 0.27830690145492554,
      "learning_rate": 5.600772256742422e-06,
      "loss": 0.0112,
      "step": 1355280
    },
    {
      "epoch": 2.217978175343506,
      "grad_norm": 0.08776795119047165,
      "learning_rate": 5.600706364528904e-06,
      "loss": 0.0096,
      "step": 1355300
    },
    {
      "epoch": 2.218010905782159,
      "grad_norm": 0.18513931334018707,
      "learning_rate": 5.600640472315387e-06,
      "loss": 0.0156,
      "step": 1355320
    },
    {
      "epoch": 2.2180436362208127,
      "grad_norm": 0.23049764335155487,
      "learning_rate": 5.60057458010187e-06,
      "loss": 0.0147,
      "step": 1355340
    },
    {
      "epoch": 2.218076366659466,
      "grad_norm": 1.4250717163085938,
      "learning_rate": 5.600508687888353e-06,
      "loss": 0.0168,
      "step": 1355360
    },
    {
      "epoch": 2.2181090970981194,
      "grad_norm": 0.24932318925857544,
      "learning_rate": 5.6004427956748355e-06,
      "loss": 0.0136,
      "step": 1355380
    },
    {
      "epoch": 2.2181418275367726,
      "grad_norm": 1.1239373683929443,
      "learning_rate": 5.600376903461319e-06,
      "loss": 0.0153,
      "step": 1355400
    },
    {
      "epoch": 2.218174557975426,
      "grad_norm": 0.30427172780036926,
      "learning_rate": 5.600311011247801e-06,
      "loss": 0.0148,
      "step": 1355420
    },
    {
      "epoch": 2.2182072884140793,
      "grad_norm": 0.7043522000312805,
      "learning_rate": 5.6002451190342846e-06,
      "loss": 0.0204,
      "step": 1355440
    },
    {
      "epoch": 2.2182400188527325,
      "grad_norm": 0.47856974601745605,
      "learning_rate": 5.6001792268207665e-06,
      "loss": 0.0195,
      "step": 1355460
    },
    {
      "epoch": 2.218272749291386,
      "grad_norm": 0.07576652616262436,
      "learning_rate": 5.60011333460725e-06,
      "loss": 0.0152,
      "step": 1355480
    },
    {
      "epoch": 2.218305479730039,
      "grad_norm": 0.9843490719795227,
      "learning_rate": 5.600047442393732e-06,
      "loss": 0.0204,
      "step": 1355500
    },
    {
      "epoch": 2.218338210168693,
      "grad_norm": 0.28129085898399353,
      "learning_rate": 5.5999815501802155e-06,
      "loss": 0.0127,
      "step": 1355520
    },
    {
      "epoch": 2.218370940607346,
      "grad_norm": 1.2100462913513184,
      "learning_rate": 5.599915657966698e-06,
      "loss": 0.0151,
      "step": 1355540
    },
    {
      "epoch": 2.2184036710459996,
      "grad_norm": 0.9563926458358765,
      "learning_rate": 5.599849765753181e-06,
      "loss": 0.0193,
      "step": 1355560
    },
    {
      "epoch": 2.2184364014846527,
      "grad_norm": 0.6984214186668396,
      "learning_rate": 5.599783873539665e-06,
      "loss": 0.0121,
      "step": 1355580
    },
    {
      "epoch": 2.218469131923306,
      "grad_norm": 0.5282312035560608,
      "learning_rate": 5.599717981326147e-06,
      "loss": 0.0123,
      "step": 1355600
    },
    {
      "epoch": 2.2185018623619595,
      "grad_norm": 0.11442689597606659,
      "learning_rate": 5.59965208911263e-06,
      "loss": 0.0162,
      "step": 1355620
    },
    {
      "epoch": 2.2185345928006126,
      "grad_norm": 0.4211278557777405,
      "learning_rate": 5.599586196899113e-06,
      "loss": 0.0138,
      "step": 1355640
    },
    {
      "epoch": 2.218567323239266,
      "grad_norm": 0.32679295539855957,
      "learning_rate": 5.599520304685596e-06,
      "loss": 0.0152,
      "step": 1355660
    },
    {
      "epoch": 2.2186000536779193,
      "grad_norm": 0.6910274028778076,
      "learning_rate": 5.599454412472078e-06,
      "loss": 0.0142,
      "step": 1355680
    },
    {
      "epoch": 2.218632784116573,
      "grad_norm": 0.08336115628480911,
      "learning_rate": 5.599388520258562e-06,
      "loss": 0.0105,
      "step": 1355700
    },
    {
      "epoch": 2.218665514555226,
      "grad_norm": 0.2857285439968109,
      "learning_rate": 5.599322628045044e-06,
      "loss": 0.0174,
      "step": 1355720
    },
    {
      "epoch": 2.2186982449938792,
      "grad_norm": 0.42235228419303894,
      "learning_rate": 5.599256735831527e-06,
      "loss": 0.0134,
      "step": 1355740
    },
    {
      "epoch": 2.218730975432533,
      "grad_norm": 0.26708748936653137,
      "learning_rate": 5.59919084361801e-06,
      "loss": 0.0147,
      "step": 1355760
    },
    {
      "epoch": 2.218763705871186,
      "grad_norm": 0.43402615189552307,
      "learning_rate": 5.599124951404493e-06,
      "loss": 0.0107,
      "step": 1355780
    },
    {
      "epoch": 2.2187964363098396,
      "grad_norm": 0.26506802439689636,
      "learning_rate": 5.5990590591909756e-06,
      "loss": 0.0153,
      "step": 1355800
    },
    {
      "epoch": 2.2188291667484927,
      "grad_norm": 1.4802865982055664,
      "learning_rate": 5.598993166977459e-06,
      "loss": 0.0211,
      "step": 1355820
    },
    {
      "epoch": 2.2188618971871463,
      "grad_norm": 0.642806887626648,
      "learning_rate": 5.598927274763941e-06,
      "loss": 0.0165,
      "step": 1355840
    },
    {
      "epoch": 2.2188946276257995,
      "grad_norm": 0.06617161631584167,
      "learning_rate": 5.598861382550425e-06,
      "loss": 0.0158,
      "step": 1355860
    },
    {
      "epoch": 2.2189273580644526,
      "grad_norm": 0.3260501027107239,
      "learning_rate": 5.5987954903369065e-06,
      "loss": 0.0169,
      "step": 1355880
    },
    {
      "epoch": 2.218960088503106,
      "grad_norm": 0.4380865693092346,
      "learning_rate": 5.59872959812339e-06,
      "loss": 0.0206,
      "step": 1355900
    },
    {
      "epoch": 2.2189928189417594,
      "grad_norm": 0.23699109256267548,
      "learning_rate": 5.598663705909874e-06,
      "loss": 0.0165,
      "step": 1355920
    },
    {
      "epoch": 2.219025549380413,
      "grad_norm": 0.9184847474098206,
      "learning_rate": 5.598597813696356e-06,
      "loss": 0.0126,
      "step": 1355940
    },
    {
      "epoch": 2.219058279819066,
      "grad_norm": 0.24018093943595886,
      "learning_rate": 5.598531921482839e-06,
      "loss": 0.0165,
      "step": 1355960
    },
    {
      "epoch": 2.2190910102577197,
      "grad_norm": 0.26469942927360535,
      "learning_rate": 5.598466029269322e-06,
      "loss": 0.0108,
      "step": 1355980
    },
    {
      "epoch": 2.219123740696373,
      "grad_norm": 0.3308117389678955,
      "learning_rate": 5.598400137055805e-06,
      "loss": 0.0183,
      "step": 1356000
    },
    {
      "epoch": 2.219156471135026,
      "grad_norm": 0.20355631411075592,
      "learning_rate": 5.598334244842287e-06,
      "loss": 0.0226,
      "step": 1356020
    },
    {
      "epoch": 2.2191892015736796,
      "grad_norm": 0.41708073019981384,
      "learning_rate": 5.598268352628771e-06,
      "loss": 0.0165,
      "step": 1356040
    },
    {
      "epoch": 2.2192219320123328,
      "grad_norm": 0.5688577890396118,
      "learning_rate": 5.598202460415253e-06,
      "loss": 0.0144,
      "step": 1356060
    },
    {
      "epoch": 2.2192546624509863,
      "grad_norm": 0.4332679510116577,
      "learning_rate": 5.5981365682017365e-06,
      "loss": 0.0166,
      "step": 1356080
    },
    {
      "epoch": 2.2192873928896395,
      "grad_norm": 0.1945972442626953,
      "learning_rate": 5.598070675988218e-06,
      "loss": 0.0134,
      "step": 1356100
    },
    {
      "epoch": 2.2193201233282926,
      "grad_norm": 0.14858576655387878,
      "learning_rate": 5.598004783774702e-06,
      "loss": 0.0168,
      "step": 1356120
    },
    {
      "epoch": 2.2193528537669462,
      "grad_norm": 0.8411204814910889,
      "learning_rate": 5.597938891561185e-06,
      "loss": 0.0119,
      "step": 1356140
    },
    {
      "epoch": 2.2193855842055994,
      "grad_norm": 0.26757684350013733,
      "learning_rate": 5.5978729993476674e-06,
      "loss": 0.0228,
      "step": 1356160
    },
    {
      "epoch": 2.219418314644253,
      "grad_norm": 0.9757050275802612,
      "learning_rate": 5.59780710713415e-06,
      "loss": 0.0183,
      "step": 1356180
    },
    {
      "epoch": 2.219451045082906,
      "grad_norm": 0.09511736035346985,
      "learning_rate": 5.597741214920634e-06,
      "loss": 0.0132,
      "step": 1356200
    },
    {
      "epoch": 2.2194837755215597,
      "grad_norm": 0.3867514431476593,
      "learning_rate": 5.597675322707116e-06,
      "loss": 0.011,
      "step": 1356220
    },
    {
      "epoch": 2.219516505960213,
      "grad_norm": 0.4604663848876953,
      "learning_rate": 5.597609430493599e-06,
      "loss": 0.0115,
      "step": 1356240
    },
    {
      "epoch": 2.219549236398866,
      "grad_norm": 0.9175955653190613,
      "learning_rate": 5.597543538280081e-06,
      "loss": 0.0223,
      "step": 1356260
    },
    {
      "epoch": 2.2195819668375196,
      "grad_norm": 0.6821418404579163,
      "learning_rate": 5.597477646066565e-06,
      "loss": 0.0131,
      "step": 1356280
    },
    {
      "epoch": 2.2196146972761728,
      "grad_norm": 0.19773972034454346,
      "learning_rate": 5.597411753853048e-06,
      "loss": 0.0116,
      "step": 1356300
    },
    {
      "epoch": 2.2196474277148264,
      "grad_norm": 0.3538160026073456,
      "learning_rate": 5.59734586163953e-06,
      "loss": 0.0194,
      "step": 1356320
    },
    {
      "epoch": 2.2196801581534795,
      "grad_norm": 0.39127928018569946,
      "learning_rate": 5.597279969426014e-06,
      "loss": 0.0199,
      "step": 1356340
    },
    {
      "epoch": 2.219712888592133,
      "grad_norm": 0.22140036523342133,
      "learning_rate": 5.5972140772124965e-06,
      "loss": 0.0103,
      "step": 1356360
    },
    {
      "epoch": 2.2197456190307863,
      "grad_norm": 0.7780601382255554,
      "learning_rate": 5.597148184998979e-06,
      "loss": 0.0164,
      "step": 1356380
    },
    {
      "epoch": 2.2197783494694394,
      "grad_norm": 0.24547931551933289,
      "learning_rate": 5.597082292785462e-06,
      "loss": 0.0107,
      "step": 1356400
    },
    {
      "epoch": 2.219811079908093,
      "grad_norm": 0.2073284238576889,
      "learning_rate": 5.597016400571946e-06,
      "loss": 0.0164,
      "step": 1356420
    },
    {
      "epoch": 2.219843810346746,
      "grad_norm": 0.1516820192337036,
      "learning_rate": 5.5969505083584275e-06,
      "loss": 0.0134,
      "step": 1356440
    },
    {
      "epoch": 2.2198765407853998,
      "grad_norm": 0.6475696563720703,
      "learning_rate": 5.596884616144911e-06,
      "loss": 0.0109,
      "step": 1356460
    },
    {
      "epoch": 2.219909271224053,
      "grad_norm": 0.1427503526210785,
      "learning_rate": 5.596818723931393e-06,
      "loss": 0.0186,
      "step": 1356480
    },
    {
      "epoch": 2.219942001662706,
      "grad_norm": 0.43855246901512146,
      "learning_rate": 5.5967528317178765e-06,
      "loss": 0.0132,
      "step": 1356500
    },
    {
      "epoch": 2.2199747321013596,
      "grad_norm": 0.5314595699310303,
      "learning_rate": 5.5966869395043584e-06,
      "loss": 0.015,
      "step": 1356520
    },
    {
      "epoch": 2.220007462540013,
      "grad_norm": 0.30159807205200195,
      "learning_rate": 5.596621047290842e-06,
      "loss": 0.0207,
      "step": 1356540
    },
    {
      "epoch": 2.2200401929786664,
      "grad_norm": 0.7293606400489807,
      "learning_rate": 5.596555155077325e-06,
      "loss": 0.0122,
      "step": 1356560
    },
    {
      "epoch": 2.2200729234173195,
      "grad_norm": 0.18087433278560638,
      "learning_rate": 5.5964892628638075e-06,
      "loss": 0.0104,
      "step": 1356580
    },
    {
      "epoch": 2.220105653855973,
      "grad_norm": 0.22140002250671387,
      "learning_rate": 5.59642337065029e-06,
      "loss": 0.0174,
      "step": 1356600
    },
    {
      "epoch": 2.2201383842946263,
      "grad_norm": 1.3221107721328735,
      "learning_rate": 5.596357478436774e-06,
      "loss": 0.0118,
      "step": 1356620
    },
    {
      "epoch": 2.2201711147332794,
      "grad_norm": 0.2532348036766052,
      "learning_rate": 5.5962915862232566e-06,
      "loss": 0.0171,
      "step": 1356640
    },
    {
      "epoch": 2.220203845171933,
      "grad_norm": 0.3629167675971985,
      "learning_rate": 5.596225694009739e-06,
      "loss": 0.0128,
      "step": 1356660
    },
    {
      "epoch": 2.220236575610586,
      "grad_norm": 0.362067848443985,
      "learning_rate": 5.596159801796223e-06,
      "loss": 0.0129,
      "step": 1356680
    },
    {
      "epoch": 2.2202693060492398,
      "grad_norm": 0.06841348111629486,
      "learning_rate": 5.596093909582705e-06,
      "loss": 0.0191,
      "step": 1356700
    },
    {
      "epoch": 2.220302036487893,
      "grad_norm": 0.1648990958929062,
      "learning_rate": 5.596028017369188e-06,
      "loss": 0.0187,
      "step": 1356720
    },
    {
      "epoch": 2.2203347669265465,
      "grad_norm": 0.5363799333572388,
      "learning_rate": 5.59596212515567e-06,
      "loss": 0.011,
      "step": 1356740
    },
    {
      "epoch": 2.2203674973651997,
      "grad_norm": 0.578038215637207,
      "learning_rate": 5.595896232942154e-06,
      "loss": 0.0149,
      "step": 1356760
    },
    {
      "epoch": 2.220400227803853,
      "grad_norm": 0.6550045013427734,
      "learning_rate": 5.595830340728637e-06,
      "loss": 0.0115,
      "step": 1356780
    },
    {
      "epoch": 2.2204329582425064,
      "grad_norm": 0.335063099861145,
      "learning_rate": 5.595764448515119e-06,
      "loss": 0.015,
      "step": 1356800
    },
    {
      "epoch": 2.2204656886811596,
      "grad_norm": 0.5165513753890991,
      "learning_rate": 5.595698556301602e-06,
      "loss": 0.0133,
      "step": 1356820
    },
    {
      "epoch": 2.220498419119813,
      "grad_norm": 0.8483454585075378,
      "learning_rate": 5.595632664088086e-06,
      "loss": 0.0152,
      "step": 1356840
    },
    {
      "epoch": 2.2205311495584663,
      "grad_norm": 1.0473204851150513,
      "learning_rate": 5.5955667718745676e-06,
      "loss": 0.0144,
      "step": 1356860
    },
    {
      "epoch": 2.22056387999712,
      "grad_norm": 0.14965248107910156,
      "learning_rate": 5.595500879661051e-06,
      "loss": 0.013,
      "step": 1356880
    },
    {
      "epoch": 2.220596610435773,
      "grad_norm": 0.29578864574432373,
      "learning_rate": 5.595434987447533e-06,
      "loss": 0.0114,
      "step": 1356900
    },
    {
      "epoch": 2.220629340874426,
      "grad_norm": 1.6227924823760986,
      "learning_rate": 5.595369095234017e-06,
      "loss": 0.0214,
      "step": 1356920
    },
    {
      "epoch": 2.22066207131308,
      "grad_norm": 0.3262244462966919,
      "learning_rate": 5.595303203020499e-06,
      "loss": 0.0185,
      "step": 1356940
    },
    {
      "epoch": 2.220694801751733,
      "grad_norm": 0.22951653599739075,
      "learning_rate": 5.595237310806982e-06,
      "loss": 0.0205,
      "step": 1356960
    },
    {
      "epoch": 2.2207275321903865,
      "grad_norm": 0.8856480717658997,
      "learning_rate": 5.595171418593466e-06,
      "loss": 0.0113,
      "step": 1356980
    },
    {
      "epoch": 2.2207602626290397,
      "grad_norm": 0.3553250730037689,
      "learning_rate": 5.5951055263799484e-06,
      "loss": 0.016,
      "step": 1357000
    },
    {
      "epoch": 2.2207929930676933,
      "grad_norm": 0.22528187930583954,
      "learning_rate": 5.595039634166431e-06,
      "loss": 0.0126,
      "step": 1357020
    },
    {
      "epoch": 2.2208257235063464,
      "grad_norm": 0.5563790798187256,
      "learning_rate": 5.594973741952914e-06,
      "loss": 0.0153,
      "step": 1357040
    },
    {
      "epoch": 2.2208584539449996,
      "grad_norm": 0.6256324052810669,
      "learning_rate": 5.5949078497393975e-06,
      "loss": 0.0226,
      "step": 1357060
    },
    {
      "epoch": 2.220891184383653,
      "grad_norm": 0.1730268895626068,
      "learning_rate": 5.594841957525879e-06,
      "loss": 0.016,
      "step": 1357080
    },
    {
      "epoch": 2.2209239148223063,
      "grad_norm": 0.3187936842441559,
      "learning_rate": 5.594776065312363e-06,
      "loss": 0.0167,
      "step": 1357100
    },
    {
      "epoch": 2.22095664526096,
      "grad_norm": 0.17194631695747375,
      "learning_rate": 5.594710173098845e-06,
      "loss": 0.0131,
      "step": 1357120
    },
    {
      "epoch": 2.220989375699613,
      "grad_norm": 0.3590024411678314,
      "learning_rate": 5.5946442808853285e-06,
      "loss": 0.0144,
      "step": 1357140
    },
    {
      "epoch": 2.2210221061382667,
      "grad_norm": 0.8534464240074158,
      "learning_rate": 5.594578388671811e-06,
      "loss": 0.0168,
      "step": 1357160
    },
    {
      "epoch": 2.22105483657692,
      "grad_norm": 0.31557735800743103,
      "learning_rate": 5.594512496458294e-06,
      "loss": 0.0174,
      "step": 1357180
    },
    {
      "epoch": 2.221087567015573,
      "grad_norm": 0.2836686968803406,
      "learning_rate": 5.594446604244777e-06,
      "loss": 0.0157,
      "step": 1357200
    },
    {
      "epoch": 2.2211202974542266,
      "grad_norm": 0.21451912820339203,
      "learning_rate": 5.59438071203126e-06,
      "loss": 0.0094,
      "step": 1357220
    },
    {
      "epoch": 2.2211530278928797,
      "grad_norm": 0.3749505877494812,
      "learning_rate": 5.594314819817742e-06,
      "loss": 0.0156,
      "step": 1357240
    },
    {
      "epoch": 2.2211857583315333,
      "grad_norm": 0.7016013860702515,
      "learning_rate": 5.594248927604226e-06,
      "loss": 0.0214,
      "step": 1357260
    },
    {
      "epoch": 2.2212184887701865,
      "grad_norm": 0.5248460173606873,
      "learning_rate": 5.594183035390708e-06,
      "loss": 0.0118,
      "step": 1357280
    },
    {
      "epoch": 2.22125121920884,
      "grad_norm": 0.2219889760017395,
      "learning_rate": 5.594117143177191e-06,
      "loss": 0.0132,
      "step": 1357300
    },
    {
      "epoch": 2.221283949647493,
      "grad_norm": 0.2822772264480591,
      "learning_rate": 5.594051250963675e-06,
      "loss": 0.0242,
      "step": 1357320
    },
    {
      "epoch": 2.2213166800861464,
      "grad_norm": 0.44826486706733704,
      "learning_rate": 5.593985358750157e-06,
      "loss": 0.0154,
      "step": 1357340
    },
    {
      "epoch": 2.2213494105248,
      "grad_norm": 0.39709171652793884,
      "learning_rate": 5.59391946653664e-06,
      "loss": 0.0134,
      "step": 1357360
    },
    {
      "epoch": 2.221382140963453,
      "grad_norm": 0.6572852730751038,
      "learning_rate": 5.593853574323122e-06,
      "loss": 0.0156,
      "step": 1357380
    },
    {
      "epoch": 2.2214148714021067,
      "grad_norm": 0.19902227818965912,
      "learning_rate": 5.593787682109606e-06,
      "loss": 0.0207,
      "step": 1357400
    },
    {
      "epoch": 2.22144760184076,
      "grad_norm": 0.7090052962303162,
      "learning_rate": 5.5937217898960885e-06,
      "loss": 0.0152,
      "step": 1357420
    },
    {
      "epoch": 2.2214803322794134,
      "grad_norm": 0.4786038398742676,
      "learning_rate": 5.593655897682571e-06,
      "loss": 0.0151,
      "step": 1357440
    },
    {
      "epoch": 2.2215130627180666,
      "grad_norm": 0.5695287585258484,
      "learning_rate": 5.593590005469054e-06,
      "loss": 0.0152,
      "step": 1357460
    },
    {
      "epoch": 2.2215457931567197,
      "grad_norm": 1.0761440992355347,
      "learning_rate": 5.5935241132555376e-06,
      "loss": 0.0157,
      "step": 1357480
    },
    {
      "epoch": 2.2215785235953733,
      "grad_norm": 0.10250032693147659,
      "learning_rate": 5.5934582210420195e-06,
      "loss": 0.018,
      "step": 1357500
    },
    {
      "epoch": 2.2216112540340265,
      "grad_norm": 0.6438547968864441,
      "learning_rate": 5.593392328828503e-06,
      "loss": 0.0153,
      "step": 1357520
    },
    {
      "epoch": 2.22164398447268,
      "grad_norm": 0.3063806891441345,
      "learning_rate": 5.593326436614985e-06,
      "loss": 0.0109,
      "step": 1357540
    },
    {
      "epoch": 2.2216767149113332,
      "grad_norm": 0.8626776337623596,
      "learning_rate": 5.5932605444014685e-06,
      "loss": 0.0251,
      "step": 1357560
    },
    {
      "epoch": 2.2217094453499864,
      "grad_norm": 0.11592860519886017,
      "learning_rate": 5.593194652187951e-06,
      "loss": 0.0111,
      "step": 1357580
    },
    {
      "epoch": 2.22174217578864,
      "grad_norm": 0.4937595725059509,
      "learning_rate": 5.593128759974434e-06,
      "loss": 0.0122,
      "step": 1357600
    },
    {
      "epoch": 2.221774906227293,
      "grad_norm": 0.2292536050081253,
      "learning_rate": 5.593062867760917e-06,
      "loss": 0.0218,
      "step": 1357620
    },
    {
      "epoch": 2.2218076366659467,
      "grad_norm": 0.25361502170562744,
      "learning_rate": 5.5929969755474e-06,
      "loss": 0.0223,
      "step": 1357640
    },
    {
      "epoch": 2.2218403671046,
      "grad_norm": 0.3243602514266968,
      "learning_rate": 5.592931083333882e-06,
      "loss": 0.0137,
      "step": 1357660
    },
    {
      "epoch": 2.2218730975432535,
      "grad_norm": 0.29083263874053955,
      "learning_rate": 5.592865191120366e-06,
      "loss": 0.0138,
      "step": 1357680
    },
    {
      "epoch": 2.2219058279819066,
      "grad_norm": 0.21010299026966095,
      "learning_rate": 5.592799298906849e-06,
      "loss": 0.0137,
      "step": 1357700
    },
    {
      "epoch": 2.2219385584205598,
      "grad_norm": 0.5626059174537659,
      "learning_rate": 5.592733406693331e-06,
      "loss": 0.0138,
      "step": 1357720
    },
    {
      "epoch": 2.2219712888592134,
      "grad_norm": 0.17304396629333496,
      "learning_rate": 5.592667514479815e-06,
      "loss": 0.011,
      "step": 1357740
    },
    {
      "epoch": 2.2220040192978665,
      "grad_norm": 0.5421074628829956,
      "learning_rate": 5.592601622266297e-06,
      "loss": 0.0185,
      "step": 1357760
    },
    {
      "epoch": 2.22203674973652,
      "grad_norm": 1.201000690460205,
      "learning_rate": 5.59253573005278e-06,
      "loss": 0.0119,
      "step": 1357780
    },
    {
      "epoch": 2.2220694801751733,
      "grad_norm": 0.2660476863384247,
      "learning_rate": 5.592469837839263e-06,
      "loss": 0.0126,
      "step": 1357800
    },
    {
      "epoch": 2.222102210613827,
      "grad_norm": 0.5713938474655151,
      "learning_rate": 5.592403945625746e-06,
      "loss": 0.0165,
      "step": 1357820
    },
    {
      "epoch": 2.22213494105248,
      "grad_norm": 0.1574757695198059,
      "learning_rate": 5.592338053412229e-06,
      "loss": 0.0151,
      "step": 1357840
    },
    {
      "epoch": 2.222167671491133,
      "grad_norm": 0.13966532051563263,
      "learning_rate": 5.592272161198712e-06,
      "loss": 0.015,
      "step": 1357860
    },
    {
      "epoch": 2.2222004019297867,
      "grad_norm": 0.4829174876213074,
      "learning_rate": 5.592206268985194e-06,
      "loss": 0.0217,
      "step": 1357880
    },
    {
      "epoch": 2.22223313236844,
      "grad_norm": 0.27577880024909973,
      "learning_rate": 5.592140376771678e-06,
      "loss": 0.0119,
      "step": 1357900
    },
    {
      "epoch": 2.2222658628070935,
      "grad_norm": 0.5039740204811096,
      "learning_rate": 5.5920744845581595e-06,
      "loss": 0.0109,
      "step": 1357920
    },
    {
      "epoch": 2.2222985932457466,
      "grad_norm": 0.48954883217811584,
      "learning_rate": 5.592008592344643e-06,
      "loss": 0.0196,
      "step": 1357940
    },
    {
      "epoch": 2.2223313236844,
      "grad_norm": 0.7149457335472107,
      "learning_rate": 5.591942700131126e-06,
      "loss": 0.0131,
      "step": 1357960
    },
    {
      "epoch": 2.2223640541230534,
      "grad_norm": 0.30919355154037476,
      "learning_rate": 5.591876807917609e-06,
      "loss": 0.0083,
      "step": 1357980
    },
    {
      "epoch": 2.2223967845617065,
      "grad_norm": 0.5105085372924805,
      "learning_rate": 5.591810915704091e-06,
      "loss": 0.0132,
      "step": 1358000
    },
    {
      "epoch": 2.22242951500036,
      "grad_norm": 0.2655596137046814,
      "learning_rate": 5.591745023490575e-06,
      "loss": 0.0186,
      "step": 1358020
    },
    {
      "epoch": 2.2224622454390133,
      "grad_norm": 0.6548085808753967,
      "learning_rate": 5.591679131277058e-06,
      "loss": 0.013,
      "step": 1358040
    },
    {
      "epoch": 2.222494975877667,
      "grad_norm": 0.8162794709205627,
      "learning_rate": 5.59161323906354e-06,
      "loss": 0.0181,
      "step": 1358060
    },
    {
      "epoch": 2.22252770631632,
      "grad_norm": 0.9322653412818909,
      "learning_rate": 5.591547346850024e-06,
      "loss": 0.0206,
      "step": 1358080
    },
    {
      "epoch": 2.222560436754973,
      "grad_norm": 0.27504628896713257,
      "learning_rate": 5.591481454636506e-06,
      "loss": 0.017,
      "step": 1358100
    },
    {
      "epoch": 2.2225931671936268,
      "grad_norm": 0.91879802942276,
      "learning_rate": 5.5914155624229895e-06,
      "loss": 0.0195,
      "step": 1358120
    },
    {
      "epoch": 2.22262589763228,
      "grad_norm": 0.46216288208961487,
      "learning_rate": 5.591349670209471e-06,
      "loss": 0.0096,
      "step": 1358140
    },
    {
      "epoch": 2.2226586280709335,
      "grad_norm": 0.6901852488517761,
      "learning_rate": 5.591283777995955e-06,
      "loss": 0.0156,
      "step": 1358160
    },
    {
      "epoch": 2.2226913585095867,
      "grad_norm": 0.5455288887023926,
      "learning_rate": 5.591217885782438e-06,
      "loss": 0.0145,
      "step": 1358180
    },
    {
      "epoch": 2.2227240889482403,
      "grad_norm": 0.1278587132692337,
      "learning_rate": 5.5911519935689204e-06,
      "loss": 0.0232,
      "step": 1358200
    },
    {
      "epoch": 2.2227568193868934,
      "grad_norm": 0.1899525672197342,
      "learning_rate": 5.591086101355403e-06,
      "loss": 0.0177,
      "step": 1358220
    },
    {
      "epoch": 2.2227895498255466,
      "grad_norm": 2.221994400024414,
      "learning_rate": 5.591020209141887e-06,
      "loss": 0.0219,
      "step": 1358240
    },
    {
      "epoch": 2.2228222802642,
      "grad_norm": 0.39916229248046875,
      "learning_rate": 5.590954316928369e-06,
      "loss": 0.0205,
      "step": 1358260
    },
    {
      "epoch": 2.2228550107028533,
      "grad_norm": 1.3447967767715454,
      "learning_rate": 5.590888424714852e-06,
      "loss": 0.018,
      "step": 1358280
    },
    {
      "epoch": 2.222887741141507,
      "grad_norm": 0.4323418438434601,
      "learning_rate": 5.590822532501334e-06,
      "loss": 0.0165,
      "step": 1358300
    },
    {
      "epoch": 2.22292047158016,
      "grad_norm": 0.5168759822845459,
      "learning_rate": 5.590756640287818e-06,
      "loss": 0.0128,
      "step": 1358320
    },
    {
      "epoch": 2.2229532020188136,
      "grad_norm": 0.7121976017951965,
      "learning_rate": 5.5906907480743e-06,
      "loss": 0.0148,
      "step": 1358340
    },
    {
      "epoch": 2.222985932457467,
      "grad_norm": 0.5266368985176086,
      "learning_rate": 5.590624855860783e-06,
      "loss": 0.0161,
      "step": 1358360
    },
    {
      "epoch": 2.22301866289612,
      "grad_norm": 0.32050415873527527,
      "learning_rate": 5.590558963647267e-06,
      "loss": 0.0159,
      "step": 1358380
    },
    {
      "epoch": 2.2230513933347735,
      "grad_norm": 0.6792001724243164,
      "learning_rate": 5.590493071433749e-06,
      "loss": 0.0155,
      "step": 1358400
    },
    {
      "epoch": 2.2230841237734267,
      "grad_norm": 0.546451210975647,
      "learning_rate": 5.590427179220232e-06,
      "loss": 0.0139,
      "step": 1358420
    },
    {
      "epoch": 2.2231168542120803,
      "grad_norm": 0.3032083511352539,
      "learning_rate": 5.590361287006715e-06,
      "loss": 0.0172,
      "step": 1358440
    },
    {
      "epoch": 2.2231495846507334,
      "grad_norm": 0.17341141402721405,
      "learning_rate": 5.590295394793198e-06,
      "loss": 0.0204,
      "step": 1358460
    },
    {
      "epoch": 2.223182315089387,
      "grad_norm": 0.4792521595954895,
      "learning_rate": 5.5902295025796805e-06,
      "loss": 0.0194,
      "step": 1358480
    },
    {
      "epoch": 2.22321504552804,
      "grad_norm": 1.8760029077529907,
      "learning_rate": 5.590163610366164e-06,
      "loss": 0.0191,
      "step": 1358500
    },
    {
      "epoch": 2.2232477759666933,
      "grad_norm": 0.12232612818479538,
      "learning_rate": 5.590097718152646e-06,
      "loss": 0.0128,
      "step": 1358520
    },
    {
      "epoch": 2.223280506405347,
      "grad_norm": 1.4843350648880005,
      "learning_rate": 5.5900318259391296e-06,
      "loss": 0.0202,
      "step": 1358540
    },
    {
      "epoch": 2.223313236844,
      "grad_norm": 0.1668878048658371,
      "learning_rate": 5.5899659337256114e-06,
      "loss": 0.013,
      "step": 1358560
    },
    {
      "epoch": 2.2233459672826537,
      "grad_norm": 0.25967124104499817,
      "learning_rate": 5.589900041512095e-06,
      "loss": 0.018,
      "step": 1358580
    },
    {
      "epoch": 2.223378697721307,
      "grad_norm": 0.0686555802822113,
      "learning_rate": 5.589834149298578e-06,
      "loss": 0.0123,
      "step": 1358600
    },
    {
      "epoch": 2.2234114281599604,
      "grad_norm": 0.38827234506607056,
      "learning_rate": 5.5897682570850605e-06,
      "loss": 0.0161,
      "step": 1358620
    },
    {
      "epoch": 2.2234441585986136,
      "grad_norm": 0.583595871925354,
      "learning_rate": 5.589702364871543e-06,
      "loss": 0.0164,
      "step": 1358640
    },
    {
      "epoch": 2.2234768890372667,
      "grad_norm": 0.4970005750656128,
      "learning_rate": 5.589636472658027e-06,
      "loss": 0.0236,
      "step": 1358660
    },
    {
      "epoch": 2.2235096194759203,
      "grad_norm": 0.663705587387085,
      "learning_rate": 5.589570580444509e-06,
      "loss": 0.0215,
      "step": 1358680
    },
    {
      "epoch": 2.2235423499145734,
      "grad_norm": 0.5192814469337463,
      "learning_rate": 5.589504688230992e-06,
      "loss": 0.0125,
      "step": 1358700
    },
    {
      "epoch": 2.223575080353227,
      "grad_norm": 0.14211136102676392,
      "learning_rate": 5.589438796017474e-06,
      "loss": 0.0103,
      "step": 1358720
    },
    {
      "epoch": 2.22360781079188,
      "grad_norm": 0.13481839001178741,
      "learning_rate": 5.589372903803958e-06,
      "loss": 0.0157,
      "step": 1358740
    },
    {
      "epoch": 2.223640541230534,
      "grad_norm": 0.3394736647605896,
      "learning_rate": 5.589307011590441e-06,
      "loss": 0.0144,
      "step": 1358760
    },
    {
      "epoch": 2.223673271669187,
      "grad_norm": 0.7260229587554932,
      "learning_rate": 5.589241119376923e-06,
      "loss": 0.0135,
      "step": 1358780
    },
    {
      "epoch": 2.22370600210784,
      "grad_norm": 0.2319585382938385,
      "learning_rate": 5.589175227163407e-06,
      "loss": 0.0139,
      "step": 1358800
    },
    {
      "epoch": 2.2237387325464937,
      "grad_norm": 0.2655908465385437,
      "learning_rate": 5.58910933494989e-06,
      "loss": 0.0218,
      "step": 1358820
    },
    {
      "epoch": 2.223771462985147,
      "grad_norm": 0.6702584028244019,
      "learning_rate": 5.589043442736372e-06,
      "loss": 0.025,
      "step": 1358840
    },
    {
      "epoch": 2.2238041934238004,
      "grad_norm": 0.1700821816921234,
      "learning_rate": 5.588977550522855e-06,
      "loss": 0.0169,
      "step": 1358860
    },
    {
      "epoch": 2.2238369238624536,
      "grad_norm": 0.1185171827673912,
      "learning_rate": 5.588911658309339e-06,
      "loss": 0.0144,
      "step": 1358880
    },
    {
      "epoch": 2.223869654301107,
      "grad_norm": 0.9033605456352234,
      "learning_rate": 5.5888457660958206e-06,
      "loss": 0.0139,
      "step": 1358900
    },
    {
      "epoch": 2.2239023847397603,
      "grad_norm": 0.8497384786605835,
      "learning_rate": 5.588779873882304e-06,
      "loss": 0.019,
      "step": 1358920
    },
    {
      "epoch": 2.2239351151784135,
      "grad_norm": 0.06762515008449554,
      "learning_rate": 5.588713981668786e-06,
      "loss": 0.0174,
      "step": 1358940
    },
    {
      "epoch": 2.223967845617067,
      "grad_norm": 0.49944809079170227,
      "learning_rate": 5.58864808945527e-06,
      "loss": 0.0178,
      "step": 1358960
    },
    {
      "epoch": 2.22400057605572,
      "grad_norm": 0.16830147802829742,
      "learning_rate": 5.588582197241752e-06,
      "loss": 0.0116,
      "step": 1358980
    },
    {
      "epoch": 2.224033306494374,
      "grad_norm": 0.5867990255355835,
      "learning_rate": 5.588516305028235e-06,
      "loss": 0.0205,
      "step": 1359000
    },
    {
      "epoch": 2.224066036933027,
      "grad_norm": 0.22778797149658203,
      "learning_rate": 5.588450412814718e-06,
      "loss": 0.0136,
      "step": 1359020
    },
    {
      "epoch": 2.22409876737168,
      "grad_norm": 1.5755060911178589,
      "learning_rate": 5.5883845206012014e-06,
      "loss": 0.0175,
      "step": 1359040
    },
    {
      "epoch": 2.2241314978103337,
      "grad_norm": 0.4647343158721924,
      "learning_rate": 5.588318628387683e-06,
      "loss": 0.0162,
      "step": 1359060
    },
    {
      "epoch": 2.224164228248987,
      "grad_norm": 0.019699232652783394,
      "learning_rate": 5.588252736174167e-06,
      "loss": 0.013,
      "step": 1359080
    },
    {
      "epoch": 2.2241969586876404,
      "grad_norm": 0.38699662685394287,
      "learning_rate": 5.5881868439606505e-06,
      "loss": 0.0126,
      "step": 1359100
    },
    {
      "epoch": 2.2242296891262936,
      "grad_norm": 0.5245779752731323,
      "learning_rate": 5.588120951747132e-06,
      "loss": 0.0089,
      "step": 1359120
    },
    {
      "epoch": 2.224262419564947,
      "grad_norm": 0.1225166916847229,
      "learning_rate": 5.588055059533616e-06,
      "loss": 0.0204,
      "step": 1359140
    },
    {
      "epoch": 2.2242951500036003,
      "grad_norm": 0.3932756185531616,
      "learning_rate": 5.587989167320098e-06,
      "loss": 0.0169,
      "step": 1359160
    },
    {
      "epoch": 2.2243278804422535,
      "grad_norm": 0.6955314874649048,
      "learning_rate": 5.5879232751065815e-06,
      "loss": 0.0127,
      "step": 1359180
    },
    {
      "epoch": 2.224360610880907,
      "grad_norm": 0.7815408706665039,
      "learning_rate": 5.587857382893064e-06,
      "loss": 0.0231,
      "step": 1359200
    },
    {
      "epoch": 2.2243933413195602,
      "grad_norm": 0.07878590375185013,
      "learning_rate": 5.587791490679547e-06,
      "loss": 0.0132,
      "step": 1359220
    },
    {
      "epoch": 2.224426071758214,
      "grad_norm": 0.38598865270614624,
      "learning_rate": 5.58772559846603e-06,
      "loss": 0.0177,
      "step": 1359240
    },
    {
      "epoch": 2.224458802196867,
      "grad_norm": 0.5572426319122314,
      "learning_rate": 5.587659706252513e-06,
      "loss": 0.0196,
      "step": 1359260
    },
    {
      "epoch": 2.2244915326355206,
      "grad_norm": 0.6829084753990173,
      "learning_rate": 5.587593814038995e-06,
      "loss": 0.0162,
      "step": 1359280
    },
    {
      "epoch": 2.2245242630741737,
      "grad_norm": 0.29106420278549194,
      "learning_rate": 5.587527921825479e-06,
      "loss": 0.0145,
      "step": 1359300
    },
    {
      "epoch": 2.224556993512827,
      "grad_norm": 1.6695362329483032,
      "learning_rate": 5.587462029611961e-06,
      "loss": 0.0187,
      "step": 1359320
    },
    {
      "epoch": 2.2245897239514805,
      "grad_norm": 0.2659365236759186,
      "learning_rate": 5.587396137398444e-06,
      "loss": 0.0135,
      "step": 1359340
    },
    {
      "epoch": 2.2246224543901336,
      "grad_norm": 0.24910905957221985,
      "learning_rate": 5.587330245184926e-06,
      "loss": 0.0177,
      "step": 1359360
    },
    {
      "epoch": 2.224655184828787,
      "grad_norm": 0.2665956914424896,
      "learning_rate": 5.58726435297141e-06,
      "loss": 0.019,
      "step": 1359380
    },
    {
      "epoch": 2.2246879152674404,
      "grad_norm": 0.32252806425094604,
      "learning_rate": 5.5871984607578924e-06,
      "loss": 0.0165,
      "step": 1359400
    },
    {
      "epoch": 2.224720645706094,
      "grad_norm": 0.2879551947116852,
      "learning_rate": 5.587132568544375e-06,
      "loss": 0.0209,
      "step": 1359420
    },
    {
      "epoch": 2.224753376144747,
      "grad_norm": 0.25394099950790405,
      "learning_rate": 5.587066676330859e-06,
      "loss": 0.0127,
      "step": 1359440
    },
    {
      "epoch": 2.2247861065834003,
      "grad_norm": 0.9983866810798645,
      "learning_rate": 5.5870007841173415e-06,
      "loss": 0.0196,
      "step": 1359460
    },
    {
      "epoch": 2.224818837022054,
      "grad_norm": 0.4350813925266266,
      "learning_rate": 5.586934891903824e-06,
      "loss": 0.0109,
      "step": 1359480
    },
    {
      "epoch": 2.224851567460707,
      "grad_norm": 0.5248184204101562,
      "learning_rate": 5.586868999690307e-06,
      "loss": 0.0133,
      "step": 1359500
    },
    {
      "epoch": 2.2248842978993606,
      "grad_norm": 0.5888323783874512,
      "learning_rate": 5.586803107476791e-06,
      "loss": 0.0144,
      "step": 1359520
    },
    {
      "epoch": 2.2249170283380137,
      "grad_norm": 0.6478053331375122,
      "learning_rate": 5.5867372152632725e-06,
      "loss": 0.0178,
      "step": 1359540
    },
    {
      "epoch": 2.224949758776667,
      "grad_norm": 0.3501356244087219,
      "learning_rate": 5.586671323049756e-06,
      "loss": 0.0103,
      "step": 1359560
    },
    {
      "epoch": 2.2249824892153205,
      "grad_norm": 0.37345024943351746,
      "learning_rate": 5.586605430836238e-06,
      "loss": 0.0173,
      "step": 1359580
    },
    {
      "epoch": 2.2250152196539736,
      "grad_norm": 0.6396285891532898,
      "learning_rate": 5.5865395386227215e-06,
      "loss": 0.0159,
      "step": 1359600
    },
    {
      "epoch": 2.2250479500926272,
      "grad_norm": 0.8291608095169067,
      "learning_rate": 5.586473646409204e-06,
      "loss": 0.0152,
      "step": 1359620
    },
    {
      "epoch": 2.2250806805312804,
      "grad_norm": 0.28334084153175354,
      "learning_rate": 5.586407754195687e-06,
      "loss": 0.0099,
      "step": 1359640
    },
    {
      "epoch": 2.225113410969934,
      "grad_norm": 0.23637643456459045,
      "learning_rate": 5.58634186198217e-06,
      "loss": 0.016,
      "step": 1359660
    },
    {
      "epoch": 2.225146141408587,
      "grad_norm": 0.19144076108932495,
      "learning_rate": 5.586275969768653e-06,
      "loss": 0.014,
      "step": 1359680
    },
    {
      "epoch": 2.2251788718472403,
      "grad_norm": 0.2204921841621399,
      "learning_rate": 5.586210077555135e-06,
      "loss": 0.0161,
      "step": 1359700
    },
    {
      "epoch": 2.225211602285894,
      "grad_norm": 0.4560973346233368,
      "learning_rate": 5.586144185341619e-06,
      "loss": 0.023,
      "step": 1359720
    },
    {
      "epoch": 2.225244332724547,
      "grad_norm": 0.36633503437042236,
      "learning_rate": 5.586078293128101e-06,
      "loss": 0.0095,
      "step": 1359740
    },
    {
      "epoch": 2.2252770631632006,
      "grad_norm": 0.11420338600873947,
      "learning_rate": 5.586012400914584e-06,
      "loss": 0.0164,
      "step": 1359760
    },
    {
      "epoch": 2.2253097936018538,
      "grad_norm": 1.451203465461731,
      "learning_rate": 5.585946508701068e-06,
      "loss": 0.0134,
      "step": 1359780
    },
    {
      "epoch": 2.2253425240405074,
      "grad_norm": 0.4328094720840454,
      "learning_rate": 5.58588061648755e-06,
      "loss": 0.0133,
      "step": 1359800
    },
    {
      "epoch": 2.2253752544791605,
      "grad_norm": 0.13442637026309967,
      "learning_rate": 5.585814724274033e-06,
      "loss": 0.0144,
      "step": 1359820
    },
    {
      "epoch": 2.2254079849178137,
      "grad_norm": 0.19187398254871368,
      "learning_rate": 5.585748832060516e-06,
      "loss": 0.0131,
      "step": 1359840
    },
    {
      "epoch": 2.2254407153564673,
      "grad_norm": 0.3200194835662842,
      "learning_rate": 5.585682939846999e-06,
      "loss": 0.0176,
      "step": 1359860
    },
    {
      "epoch": 2.2254734457951204,
      "grad_norm": 0.3311404585838318,
      "learning_rate": 5.585617047633482e-06,
      "loss": 0.012,
      "step": 1359880
    },
    {
      "epoch": 2.225506176233774,
      "grad_norm": 0.13247212767601013,
      "learning_rate": 5.585551155419965e-06,
      "loss": 0.0115,
      "step": 1359900
    },
    {
      "epoch": 2.225538906672427,
      "grad_norm": 0.3459586799144745,
      "learning_rate": 5.585485263206447e-06,
      "loss": 0.017,
      "step": 1359920
    },
    {
      "epoch": 2.2255716371110807,
      "grad_norm": 0.6930913329124451,
      "learning_rate": 5.585419370992931e-06,
      "loss": 0.0137,
      "step": 1359940
    },
    {
      "epoch": 2.225604367549734,
      "grad_norm": 0.02423485368490219,
      "learning_rate": 5.5853534787794125e-06,
      "loss": 0.0127,
      "step": 1359960
    },
    {
      "epoch": 2.225637097988387,
      "grad_norm": 0.17388799786567688,
      "learning_rate": 5.585287586565896e-06,
      "loss": 0.0167,
      "step": 1359980
    },
    {
      "epoch": 2.2256698284270406,
      "grad_norm": 0.16611319780349731,
      "learning_rate": 5.585221694352379e-06,
      "loss": 0.012,
      "step": 1360000
    },
    {
      "epoch": 2.225702558865694,
      "grad_norm": 2.3316478729248047,
      "learning_rate": 5.585155802138862e-06,
      "loss": 0.0239,
      "step": 1360020
    },
    {
      "epoch": 2.2257352893043474,
      "grad_norm": 0.637617826461792,
      "learning_rate": 5.585089909925344e-06,
      "loss": 0.0141,
      "step": 1360040
    },
    {
      "epoch": 2.2257680197430005,
      "grad_norm": 0.1706390529870987,
      "learning_rate": 5.585024017711828e-06,
      "loss": 0.013,
      "step": 1360060
    },
    {
      "epoch": 2.225800750181654,
      "grad_norm": 0.24807403981685638,
      "learning_rate": 5.58495812549831e-06,
      "loss": 0.0127,
      "step": 1360080
    },
    {
      "epoch": 2.2258334806203073,
      "grad_norm": 0.47603440284729004,
      "learning_rate": 5.584892233284793e-06,
      "loss": 0.0225,
      "step": 1360100
    },
    {
      "epoch": 2.2258662110589604,
      "grad_norm": 0.32735493779182434,
      "learning_rate": 5.584826341071275e-06,
      "loss": 0.0139,
      "step": 1360120
    },
    {
      "epoch": 2.225898941497614,
      "grad_norm": 0.3883259892463684,
      "learning_rate": 5.584760448857759e-06,
      "loss": 0.0154,
      "step": 1360140
    },
    {
      "epoch": 2.225931671936267,
      "grad_norm": 0.620107889175415,
      "learning_rate": 5.5846945566442425e-06,
      "loss": 0.0153,
      "step": 1360160
    },
    {
      "epoch": 2.2259644023749208,
      "grad_norm": 0.46371954679489136,
      "learning_rate": 5.584628664430724e-06,
      "loss": 0.0153,
      "step": 1360180
    },
    {
      "epoch": 2.225997132813574,
      "grad_norm": 2.598283290863037,
      "learning_rate": 5.584562772217208e-06,
      "loss": 0.011,
      "step": 1360200
    },
    {
      "epoch": 2.2260298632522275,
      "grad_norm": 0.3132176697254181,
      "learning_rate": 5.58449688000369e-06,
      "loss": 0.0129,
      "step": 1360220
    },
    {
      "epoch": 2.2260625936908807,
      "grad_norm": 0.9131184220314026,
      "learning_rate": 5.5844309877901734e-06,
      "loss": 0.0153,
      "step": 1360240
    },
    {
      "epoch": 2.226095324129534,
      "grad_norm": 0.501192569732666,
      "learning_rate": 5.584365095576656e-06,
      "loss": 0.0248,
      "step": 1360260
    },
    {
      "epoch": 2.2261280545681874,
      "grad_norm": 0.40963268280029297,
      "learning_rate": 5.58429920336314e-06,
      "loss": 0.0117,
      "step": 1360280
    },
    {
      "epoch": 2.2261607850068406,
      "grad_norm": 0.12798404693603516,
      "learning_rate": 5.584233311149622e-06,
      "loss": 0.0107,
      "step": 1360300
    },
    {
      "epoch": 2.226193515445494,
      "grad_norm": 0.610561192035675,
      "learning_rate": 5.584167418936105e-06,
      "loss": 0.0105,
      "step": 1360320
    },
    {
      "epoch": 2.2262262458841473,
      "grad_norm": 0.14956019818782806,
      "learning_rate": 5.584101526722587e-06,
      "loss": 0.0196,
      "step": 1360340
    },
    {
      "epoch": 2.226258976322801,
      "grad_norm": 0.3176964819431305,
      "learning_rate": 5.584035634509071e-06,
      "loss": 0.0184,
      "step": 1360360
    },
    {
      "epoch": 2.226291706761454,
      "grad_norm": 0.18610690534114838,
      "learning_rate": 5.583969742295553e-06,
      "loss": 0.0124,
      "step": 1360380
    },
    {
      "epoch": 2.226324437200107,
      "grad_norm": 0.4343591630458832,
      "learning_rate": 5.583903850082036e-06,
      "loss": 0.0109,
      "step": 1360400
    },
    {
      "epoch": 2.226357167638761,
      "grad_norm": 0.8354365229606628,
      "learning_rate": 5.583837957868519e-06,
      "loss": 0.0123,
      "step": 1360420
    },
    {
      "epoch": 2.226389898077414,
      "grad_norm": 0.23380731046199799,
      "learning_rate": 5.583772065655002e-06,
      "loss": 0.0135,
      "step": 1360440
    },
    {
      "epoch": 2.2264226285160675,
      "grad_norm": 0.04477139934897423,
      "learning_rate": 5.5837061734414844e-06,
      "loss": 0.0115,
      "step": 1360460
    },
    {
      "epoch": 2.2264553589547207,
      "grad_norm": 0.3095957636833191,
      "learning_rate": 5.583640281227968e-06,
      "loss": 0.0189,
      "step": 1360480
    },
    {
      "epoch": 2.2264880893933743,
      "grad_norm": 0.238353431224823,
      "learning_rate": 5.583574389014451e-06,
      "loss": 0.0154,
      "step": 1360500
    },
    {
      "epoch": 2.2265208198320274,
      "grad_norm": 0.18052037060260773,
      "learning_rate": 5.5835084968009335e-06,
      "loss": 0.0135,
      "step": 1360520
    },
    {
      "epoch": 2.2265535502706806,
      "grad_norm": 0.3043181002140045,
      "learning_rate": 5.583442604587417e-06,
      "loss": 0.0169,
      "step": 1360540
    },
    {
      "epoch": 2.226586280709334,
      "grad_norm": 0.13997483253479004,
      "learning_rate": 5.583376712373899e-06,
      "loss": 0.0122,
      "step": 1360560
    },
    {
      "epoch": 2.2266190111479873,
      "grad_norm": 1.7227591276168823,
      "learning_rate": 5.5833108201603826e-06,
      "loss": 0.0139,
      "step": 1360580
    },
    {
      "epoch": 2.226651741586641,
      "grad_norm": 0.26526448130607605,
      "learning_rate": 5.5832449279468645e-06,
      "loss": 0.0111,
      "step": 1360600
    },
    {
      "epoch": 2.226684472025294,
      "grad_norm": 0.3632303774356842,
      "learning_rate": 5.583179035733348e-06,
      "loss": 0.0108,
      "step": 1360620
    },
    {
      "epoch": 2.226717202463947,
      "grad_norm": 0.4620911777019501,
      "learning_rate": 5.583113143519831e-06,
      "loss": 0.0126,
      "step": 1360640
    },
    {
      "epoch": 2.226749932902601,
      "grad_norm": 0.42413580417633057,
      "learning_rate": 5.5830472513063135e-06,
      "loss": 0.0186,
      "step": 1360660
    },
    {
      "epoch": 2.226782663341254,
      "grad_norm": 0.11254820227622986,
      "learning_rate": 5.582981359092796e-06,
      "loss": 0.0176,
      "step": 1360680
    },
    {
      "epoch": 2.2268153937799076,
      "grad_norm": 0.4361012876033783,
      "learning_rate": 5.58291546687928e-06,
      "loss": 0.0128,
      "step": 1360700
    },
    {
      "epoch": 2.2268481242185607,
      "grad_norm": 0.3451300263404846,
      "learning_rate": 5.582849574665762e-06,
      "loss": 0.0145,
      "step": 1360720
    },
    {
      "epoch": 2.2268808546572143,
      "grad_norm": 0.48406654596328735,
      "learning_rate": 5.582783682452245e-06,
      "loss": 0.0119,
      "step": 1360740
    },
    {
      "epoch": 2.2269135850958675,
      "grad_norm": 0.07412615418434143,
      "learning_rate": 5.582717790238727e-06,
      "loss": 0.0163,
      "step": 1360760
    },
    {
      "epoch": 2.2269463155345206,
      "grad_norm": 0.26314952969551086,
      "learning_rate": 5.582651898025211e-06,
      "loss": 0.0119,
      "step": 1360780
    },
    {
      "epoch": 2.226979045973174,
      "grad_norm": 0.3913111090660095,
      "learning_rate": 5.5825860058116935e-06,
      "loss": 0.0154,
      "step": 1360800
    },
    {
      "epoch": 2.2270117764118273,
      "grad_norm": 0.36724090576171875,
      "learning_rate": 5.582520113598176e-06,
      "loss": 0.0189,
      "step": 1360820
    },
    {
      "epoch": 2.227044506850481,
      "grad_norm": 0.36518964171409607,
      "learning_rate": 5.58245422138466e-06,
      "loss": 0.0116,
      "step": 1360840
    },
    {
      "epoch": 2.227077237289134,
      "grad_norm": 0.3234010338783264,
      "learning_rate": 5.582388329171143e-06,
      "loss": 0.0169,
      "step": 1360860
    },
    {
      "epoch": 2.2271099677277877,
      "grad_norm": 0.2030147761106491,
      "learning_rate": 5.582322436957625e-06,
      "loss": 0.0212,
      "step": 1360880
    },
    {
      "epoch": 2.227142698166441,
      "grad_norm": 0.20994986593723297,
      "learning_rate": 5.582256544744108e-06,
      "loss": 0.0167,
      "step": 1360900
    },
    {
      "epoch": 2.227175428605094,
      "grad_norm": 0.350460946559906,
      "learning_rate": 5.582190652530592e-06,
      "loss": 0.0144,
      "step": 1360920
    },
    {
      "epoch": 2.2272081590437476,
      "grad_norm": 0.07935428619384766,
      "learning_rate": 5.5821247603170736e-06,
      "loss": 0.0114,
      "step": 1360940
    },
    {
      "epoch": 2.2272408894824007,
      "grad_norm": 0.5126795768737793,
      "learning_rate": 5.582058868103557e-06,
      "loss": 0.0143,
      "step": 1360960
    },
    {
      "epoch": 2.2272736199210543,
      "grad_norm": 0.6205928325653076,
      "learning_rate": 5.581992975890039e-06,
      "loss": 0.0171,
      "step": 1360980
    },
    {
      "epoch": 2.2273063503597075,
      "grad_norm": 0.29518836736679077,
      "learning_rate": 5.581927083676523e-06,
      "loss": 0.0134,
      "step": 1361000
    },
    {
      "epoch": 2.2273390807983606,
      "grad_norm": 0.37417134642601013,
      "learning_rate": 5.581861191463005e-06,
      "loss": 0.0174,
      "step": 1361020
    },
    {
      "epoch": 2.227371811237014,
      "grad_norm": 0.3392007648944855,
      "learning_rate": 5.581795299249488e-06,
      "loss": 0.0227,
      "step": 1361040
    },
    {
      "epoch": 2.2274045416756674,
      "grad_norm": 1.140656590461731,
      "learning_rate": 5.581729407035971e-06,
      "loss": 0.0115,
      "step": 1361060
    },
    {
      "epoch": 2.227437272114321,
      "grad_norm": 0.4104658365249634,
      "learning_rate": 5.5816635148224544e-06,
      "loss": 0.0114,
      "step": 1361080
    },
    {
      "epoch": 2.227470002552974,
      "grad_norm": 0.4167073369026184,
      "learning_rate": 5.581597622608936e-06,
      "loss": 0.0197,
      "step": 1361100
    },
    {
      "epoch": 2.2275027329916277,
      "grad_norm": 0.5445142388343811,
      "learning_rate": 5.58153173039542e-06,
      "loss": 0.0146,
      "step": 1361120
    },
    {
      "epoch": 2.227535463430281,
      "grad_norm": 0.3109557628631592,
      "learning_rate": 5.581465838181902e-06,
      "loss": 0.017,
      "step": 1361140
    },
    {
      "epoch": 2.227568193868934,
      "grad_norm": 0.1417115479707718,
      "learning_rate": 5.581399945968385e-06,
      "loss": 0.0102,
      "step": 1361160
    },
    {
      "epoch": 2.2276009243075876,
      "grad_norm": 0.715190589427948,
      "learning_rate": 5.581334053754867e-06,
      "loss": 0.0149,
      "step": 1361180
    },
    {
      "epoch": 2.2276336547462408,
      "grad_norm": 1.0345581769943237,
      "learning_rate": 5.581268161541351e-06,
      "loss": 0.0176,
      "step": 1361200
    },
    {
      "epoch": 2.2276663851848943,
      "grad_norm": 0.16661955416202545,
      "learning_rate": 5.5812022693278345e-06,
      "loss": 0.0142,
      "step": 1361220
    },
    {
      "epoch": 2.2276991156235475,
      "grad_norm": 0.3486940860748291,
      "learning_rate": 5.581136377114316e-06,
      "loss": 0.0184,
      "step": 1361240
    },
    {
      "epoch": 2.227731846062201,
      "grad_norm": 0.3294278085231781,
      "learning_rate": 5.5810704849008e-06,
      "loss": 0.0141,
      "step": 1361260
    },
    {
      "epoch": 2.2277645765008542,
      "grad_norm": 0.2372315675020218,
      "learning_rate": 5.581004592687283e-06,
      "loss": 0.0112,
      "step": 1361280
    },
    {
      "epoch": 2.2277973069395074,
      "grad_norm": 0.608532726764679,
      "learning_rate": 5.5809387004737654e-06,
      "loss": 0.0154,
      "step": 1361300
    },
    {
      "epoch": 2.227830037378161,
      "grad_norm": 0.5111928582191467,
      "learning_rate": 5.580872808260248e-06,
      "loss": 0.0121,
      "step": 1361320
    },
    {
      "epoch": 2.227862767816814,
      "grad_norm": 0.8054848313331604,
      "learning_rate": 5.580806916046732e-06,
      "loss": 0.0135,
      "step": 1361340
    },
    {
      "epoch": 2.2278954982554677,
      "grad_norm": 1.2520405054092407,
      "learning_rate": 5.580741023833214e-06,
      "loss": 0.0139,
      "step": 1361360
    },
    {
      "epoch": 2.227928228694121,
      "grad_norm": 0.1644836664199829,
      "learning_rate": 5.580675131619697e-06,
      "loss": 0.0102,
      "step": 1361380
    },
    {
      "epoch": 2.2279609591327745,
      "grad_norm": 0.5898988246917725,
      "learning_rate": 5.580609239406179e-06,
      "loss": 0.0181,
      "step": 1361400
    },
    {
      "epoch": 2.2279936895714276,
      "grad_norm": 0.21663397550582886,
      "learning_rate": 5.580543347192663e-06,
      "loss": 0.0148,
      "step": 1361420
    },
    {
      "epoch": 2.2280264200100808,
      "grad_norm": 0.5745028853416443,
      "learning_rate": 5.5804774549791455e-06,
      "loss": 0.0108,
      "step": 1361440
    },
    {
      "epoch": 2.2280591504487344,
      "grad_norm": 0.48990440368652344,
      "learning_rate": 5.580411562765628e-06,
      "loss": 0.0144,
      "step": 1361460
    },
    {
      "epoch": 2.2280918808873875,
      "grad_norm": 0.7107059359550476,
      "learning_rate": 5.580345670552111e-06,
      "loss": 0.0171,
      "step": 1361480
    },
    {
      "epoch": 2.228124611326041,
      "grad_norm": 0.20561523735523224,
      "learning_rate": 5.5802797783385945e-06,
      "loss": 0.0264,
      "step": 1361500
    },
    {
      "epoch": 2.2281573417646943,
      "grad_norm": 0.09362200647592545,
      "learning_rate": 5.580213886125076e-06,
      "loss": 0.0145,
      "step": 1361520
    },
    {
      "epoch": 2.228190072203348,
      "grad_norm": 0.545363187789917,
      "learning_rate": 5.58014799391156e-06,
      "loss": 0.0196,
      "step": 1361540
    },
    {
      "epoch": 2.228222802642001,
      "grad_norm": 0.24413058161735535,
      "learning_rate": 5.580082101698044e-06,
      "loss": 0.0175,
      "step": 1361560
    },
    {
      "epoch": 2.228255533080654,
      "grad_norm": 0.26690539717674255,
      "learning_rate": 5.5800162094845255e-06,
      "loss": 0.0181,
      "step": 1361580
    },
    {
      "epoch": 2.2282882635193078,
      "grad_norm": 0.3432663381099701,
      "learning_rate": 5.579950317271009e-06,
      "loss": 0.0145,
      "step": 1361600
    },
    {
      "epoch": 2.228320993957961,
      "grad_norm": 0.9579837322235107,
      "learning_rate": 5.579884425057491e-06,
      "loss": 0.0172,
      "step": 1361620
    },
    {
      "epoch": 2.2283537243966145,
      "grad_norm": 0.25439220666885376,
      "learning_rate": 5.5798185328439745e-06,
      "loss": 0.0166,
      "step": 1361640
    },
    {
      "epoch": 2.2283864548352676,
      "grad_norm": 0.2145698070526123,
      "learning_rate": 5.579752640630457e-06,
      "loss": 0.0123,
      "step": 1361660
    },
    {
      "epoch": 2.2284191852739212,
      "grad_norm": 0.5610625147819519,
      "learning_rate": 5.57968674841694e-06,
      "loss": 0.0202,
      "step": 1361680
    },
    {
      "epoch": 2.2284519157125744,
      "grad_norm": 0.29627880454063416,
      "learning_rate": 5.579620856203423e-06,
      "loss": 0.0124,
      "step": 1361700
    },
    {
      "epoch": 2.2284846461512275,
      "grad_norm": 0.2757233679294586,
      "learning_rate": 5.579554963989906e-06,
      "loss": 0.0136,
      "step": 1361720
    },
    {
      "epoch": 2.228517376589881,
      "grad_norm": 0.2734334468841553,
      "learning_rate": 5.579489071776388e-06,
      "loss": 0.0146,
      "step": 1361740
    },
    {
      "epoch": 2.2285501070285343,
      "grad_norm": 0.3234916925430298,
      "learning_rate": 5.579423179562872e-06,
      "loss": 0.0153,
      "step": 1361760
    },
    {
      "epoch": 2.228582837467188,
      "grad_norm": 0.45568472146987915,
      "learning_rate": 5.579357287349354e-06,
      "loss": 0.012,
      "step": 1361780
    },
    {
      "epoch": 2.228615567905841,
      "grad_norm": 0.13973383605480194,
      "learning_rate": 5.579291395135837e-06,
      "loss": 0.0124,
      "step": 1361800
    },
    {
      "epoch": 2.2286482983444946,
      "grad_norm": 0.6555117964744568,
      "learning_rate": 5.57922550292232e-06,
      "loss": 0.0143,
      "step": 1361820
    },
    {
      "epoch": 2.2286810287831478,
      "grad_norm": 0.3718254864215851,
      "learning_rate": 5.579159610708803e-06,
      "loss": 0.0095,
      "step": 1361840
    },
    {
      "epoch": 2.228713759221801,
      "grad_norm": 0.30886310338974,
      "learning_rate": 5.5790937184952855e-06,
      "loss": 0.0094,
      "step": 1361860
    },
    {
      "epoch": 2.2287464896604545,
      "grad_norm": 0.34060126543045044,
      "learning_rate": 5.579027826281769e-06,
      "loss": 0.0137,
      "step": 1361880
    },
    {
      "epoch": 2.2287792200991077,
      "grad_norm": 0.08958793431520462,
      "learning_rate": 5.578961934068252e-06,
      "loss": 0.0206,
      "step": 1361900
    },
    {
      "epoch": 2.2288119505377613,
      "grad_norm": 0.24282728135585785,
      "learning_rate": 5.578896041854735e-06,
      "loss": 0.01,
      "step": 1361920
    },
    {
      "epoch": 2.2288446809764144,
      "grad_norm": 0.6873694658279419,
      "learning_rate": 5.578830149641218e-06,
      "loss": 0.0117,
      "step": 1361940
    },
    {
      "epoch": 2.228877411415068,
      "grad_norm": 0.6590339541435242,
      "learning_rate": 5.5787642574277e-06,
      "loss": 0.0172,
      "step": 1361960
    },
    {
      "epoch": 2.228910141853721,
      "grad_norm": 0.6262630224227905,
      "learning_rate": 5.578698365214184e-06,
      "loss": 0.0133,
      "step": 1361980
    },
    {
      "epoch": 2.2289428722923743,
      "grad_norm": 0.29453667998313904,
      "learning_rate": 5.5786324730006656e-06,
      "loss": 0.0209,
      "step": 1362000
    },
    {
      "epoch": 2.228975602731028,
      "grad_norm": 0.3612479865550995,
      "learning_rate": 5.578566580787149e-06,
      "loss": 0.0199,
      "step": 1362020
    },
    {
      "epoch": 2.229008333169681,
      "grad_norm": 16.418968200683594,
      "learning_rate": 5.578500688573632e-06,
      "loss": 0.0189,
      "step": 1362040
    },
    {
      "epoch": 2.2290410636083346,
      "grad_norm": 0.23394669592380524,
      "learning_rate": 5.578434796360115e-06,
      "loss": 0.0187,
      "step": 1362060
    },
    {
      "epoch": 2.229073794046988,
      "grad_norm": 0.2388538122177124,
      "learning_rate": 5.578368904146597e-06,
      "loss": 0.0119,
      "step": 1362080
    },
    {
      "epoch": 2.229106524485641,
      "grad_norm": 0.1670806109905243,
      "learning_rate": 5.578303011933081e-06,
      "loss": 0.0105,
      "step": 1362100
    },
    {
      "epoch": 2.2291392549242945,
      "grad_norm": 0.27831268310546875,
      "learning_rate": 5.578237119719563e-06,
      "loss": 0.0116,
      "step": 1362120
    },
    {
      "epoch": 2.2291719853629477,
      "grad_norm": 0.6286443471908569,
      "learning_rate": 5.5781712275060464e-06,
      "loss": 0.012,
      "step": 1362140
    },
    {
      "epoch": 2.2292047158016013,
      "grad_norm": 0.33984240889549255,
      "learning_rate": 5.578105335292528e-06,
      "loss": 0.0131,
      "step": 1362160
    },
    {
      "epoch": 2.2292374462402544,
      "grad_norm": 0.26567211747169495,
      "learning_rate": 5.578039443079012e-06,
      "loss": 0.0226,
      "step": 1362180
    },
    {
      "epoch": 2.229270176678908,
      "grad_norm": 0.48610472679138184,
      "learning_rate": 5.577973550865494e-06,
      "loss": 0.0158,
      "step": 1362200
    },
    {
      "epoch": 2.229302907117561,
      "grad_norm": 0.0926528126001358,
      "learning_rate": 5.577907658651977e-06,
      "loss": 0.0148,
      "step": 1362220
    },
    {
      "epoch": 2.2293356375562143,
      "grad_norm": 0.5835085511207581,
      "learning_rate": 5.57784176643846e-06,
      "loss": 0.0142,
      "step": 1362240
    },
    {
      "epoch": 2.229368367994868,
      "grad_norm": 0.6836981177330017,
      "learning_rate": 5.577775874224943e-06,
      "loss": 0.0191,
      "step": 1362260
    },
    {
      "epoch": 2.229401098433521,
      "grad_norm": 0.4889239966869354,
      "learning_rate": 5.5777099820114265e-06,
      "loss": 0.026,
      "step": 1362280
    },
    {
      "epoch": 2.2294338288721747,
      "grad_norm": 0.28582414984703064,
      "learning_rate": 5.577644089797909e-06,
      "loss": 0.0165,
      "step": 1362300
    },
    {
      "epoch": 2.229466559310828,
      "grad_norm": 0.45320138335227966,
      "learning_rate": 5.577578197584392e-06,
      "loss": 0.0095,
      "step": 1362320
    },
    {
      "epoch": 2.2294992897494814,
      "grad_norm": 0.2910108268260956,
      "learning_rate": 5.577512305370875e-06,
      "loss": 0.0168,
      "step": 1362340
    },
    {
      "epoch": 2.2295320201881346,
      "grad_norm": 0.21411222219467163,
      "learning_rate": 5.577446413157358e-06,
      "loss": 0.0272,
      "step": 1362360
    },
    {
      "epoch": 2.2295647506267877,
      "grad_norm": 0.7063149213790894,
      "learning_rate": 5.57738052094384e-06,
      "loss": 0.0184,
      "step": 1362380
    },
    {
      "epoch": 2.2295974810654413,
      "grad_norm": 0.22773294150829315,
      "learning_rate": 5.577314628730324e-06,
      "loss": 0.0143,
      "step": 1362400
    },
    {
      "epoch": 2.2296302115040945,
      "grad_norm": 0.4657321870326996,
      "learning_rate": 5.577248736516806e-06,
      "loss": 0.0156,
      "step": 1362420
    },
    {
      "epoch": 2.229662941942748,
      "grad_norm": 0.22309811413288116,
      "learning_rate": 5.577182844303289e-06,
      "loss": 0.0164,
      "step": 1362440
    },
    {
      "epoch": 2.229695672381401,
      "grad_norm": 0.8646968603134155,
      "learning_rate": 5.577116952089772e-06,
      "loss": 0.0211,
      "step": 1362460
    },
    {
      "epoch": 2.229728402820055,
      "grad_norm": 0.5789048075675964,
      "learning_rate": 5.577051059876255e-06,
      "loss": 0.0148,
      "step": 1362480
    },
    {
      "epoch": 2.229761133258708,
      "grad_norm": 0.3063495457172394,
      "learning_rate": 5.5769851676627374e-06,
      "loss": 0.0154,
      "step": 1362500
    },
    {
      "epoch": 2.229793863697361,
      "grad_norm": 0.0965232104063034,
      "learning_rate": 5.576919275449221e-06,
      "loss": 0.0112,
      "step": 1362520
    },
    {
      "epoch": 2.2298265941360147,
      "grad_norm": 0.3551434576511383,
      "learning_rate": 5.576853383235703e-06,
      "loss": 0.0159,
      "step": 1362540
    },
    {
      "epoch": 2.229859324574668,
      "grad_norm": 0.2944057583808899,
      "learning_rate": 5.5767874910221865e-06,
      "loss": 0.0128,
      "step": 1362560
    },
    {
      "epoch": 2.2298920550133214,
      "grad_norm": 0.20836852490901947,
      "learning_rate": 5.576721598808668e-06,
      "loss": 0.0094,
      "step": 1362580
    },
    {
      "epoch": 2.2299247854519746,
      "grad_norm": 0.16051603853702545,
      "learning_rate": 5.576655706595152e-06,
      "loss": 0.017,
      "step": 1362600
    },
    {
      "epoch": 2.2299575158906277,
      "grad_norm": 0.31160178780555725,
      "learning_rate": 5.5765898143816356e-06,
      "loss": 0.0133,
      "step": 1362620
    },
    {
      "epoch": 2.2299902463292813,
      "grad_norm": 0.4861779808998108,
      "learning_rate": 5.5765239221681175e-06,
      "loss": 0.0222,
      "step": 1362640
    },
    {
      "epoch": 2.2300229767679345,
      "grad_norm": 0.38088458776474,
      "learning_rate": 5.576458029954601e-06,
      "loss": 0.0136,
      "step": 1362660
    },
    {
      "epoch": 2.230055707206588,
      "grad_norm": 0.24852490425109863,
      "learning_rate": 5.576392137741084e-06,
      "loss": 0.0147,
      "step": 1362680
    },
    {
      "epoch": 2.2300884376452412,
      "grad_norm": 1.0648784637451172,
      "learning_rate": 5.5763262455275665e-06,
      "loss": 0.0171,
      "step": 1362700
    },
    {
      "epoch": 2.230121168083895,
      "grad_norm": 0.40997132658958435,
      "learning_rate": 5.576260353314049e-06,
      "loss": 0.0201,
      "step": 1362720
    },
    {
      "epoch": 2.230153898522548,
      "grad_norm": 0.3802751302719116,
      "learning_rate": 5.576194461100533e-06,
      "loss": 0.0168,
      "step": 1362740
    },
    {
      "epoch": 2.230186628961201,
      "grad_norm": 0.6404169797897339,
      "learning_rate": 5.576128568887015e-06,
      "loss": 0.0172,
      "step": 1362760
    },
    {
      "epoch": 2.2302193593998547,
      "grad_norm": 0.4969870150089264,
      "learning_rate": 5.576062676673498e-06,
      "loss": 0.0106,
      "step": 1362780
    },
    {
      "epoch": 2.230252089838508,
      "grad_norm": 0.29759418964385986,
      "learning_rate": 5.57599678445998e-06,
      "loss": 0.0163,
      "step": 1362800
    },
    {
      "epoch": 2.2302848202771615,
      "grad_norm": 0.0845501497387886,
      "learning_rate": 5.575930892246464e-06,
      "loss": 0.0179,
      "step": 1362820
    },
    {
      "epoch": 2.2303175507158146,
      "grad_norm": 0.22534720599651337,
      "learning_rate": 5.5758650000329466e-06,
      "loss": 0.0172,
      "step": 1362840
    },
    {
      "epoch": 2.230350281154468,
      "grad_norm": 0.4580894708633423,
      "learning_rate": 5.575799107819429e-06,
      "loss": 0.0185,
      "step": 1362860
    },
    {
      "epoch": 2.2303830115931214,
      "grad_norm": 0.3833472430706024,
      "learning_rate": 5.575733215605912e-06,
      "loss": 0.0152,
      "step": 1362880
    },
    {
      "epoch": 2.2304157420317745,
      "grad_norm": 0.5666833519935608,
      "learning_rate": 5.575667323392396e-06,
      "loss": 0.0183,
      "step": 1362900
    },
    {
      "epoch": 2.230448472470428,
      "grad_norm": 0.2978181540966034,
      "learning_rate": 5.5756014311788775e-06,
      "loss": 0.0197,
      "step": 1362920
    },
    {
      "epoch": 2.2304812029090813,
      "grad_norm": 0.9103398323059082,
      "learning_rate": 5.575535538965361e-06,
      "loss": 0.0107,
      "step": 1362940
    },
    {
      "epoch": 2.230513933347735,
      "grad_norm": 0.3111097514629364,
      "learning_rate": 5.575469646751845e-06,
      "loss": 0.0183,
      "step": 1362960
    },
    {
      "epoch": 2.230546663786388,
      "grad_norm": 0.15510153770446777,
      "learning_rate": 5.575403754538327e-06,
      "loss": 0.0126,
      "step": 1362980
    },
    {
      "epoch": 2.2305793942250416,
      "grad_norm": 0.1950388103723526,
      "learning_rate": 5.57533786232481e-06,
      "loss": 0.0216,
      "step": 1363000
    },
    {
      "epoch": 2.2306121246636947,
      "grad_norm": 0.272434264421463,
      "learning_rate": 5.575271970111292e-06,
      "loss": 0.0155,
      "step": 1363020
    },
    {
      "epoch": 2.230644855102348,
      "grad_norm": 0.26495084166526794,
      "learning_rate": 5.575206077897776e-06,
      "loss": 0.0163,
      "step": 1363040
    },
    {
      "epoch": 2.2306775855410015,
      "grad_norm": 0.8117035627365112,
      "learning_rate": 5.575140185684258e-06,
      "loss": 0.017,
      "step": 1363060
    },
    {
      "epoch": 2.2307103159796546,
      "grad_norm": 0.36634206771850586,
      "learning_rate": 5.575074293470741e-06,
      "loss": 0.0125,
      "step": 1363080
    },
    {
      "epoch": 2.2307430464183082,
      "grad_norm": 0.5715178847312927,
      "learning_rate": 5.575008401257224e-06,
      "loss": 0.0124,
      "step": 1363100
    },
    {
      "epoch": 2.2307757768569614,
      "grad_norm": 0.46171265840530396,
      "learning_rate": 5.5749425090437074e-06,
      "loss": 0.0182,
      "step": 1363120
    },
    {
      "epoch": 2.230808507295615,
      "grad_norm": 0.2132681906223297,
      "learning_rate": 5.574876616830189e-06,
      "loss": 0.0138,
      "step": 1363140
    },
    {
      "epoch": 2.230841237734268,
      "grad_norm": 0.2354535460472107,
      "learning_rate": 5.574810724616673e-06,
      "loss": 0.0146,
      "step": 1363160
    },
    {
      "epoch": 2.2308739681729213,
      "grad_norm": 0.1545100063085556,
      "learning_rate": 5.574744832403155e-06,
      "loss": 0.019,
      "step": 1363180
    },
    {
      "epoch": 2.230906698611575,
      "grad_norm": 0.6200299859046936,
      "learning_rate": 5.574678940189638e-06,
      "loss": 0.0215,
      "step": 1363200
    },
    {
      "epoch": 2.230939429050228,
      "grad_norm": 1.1006801128387451,
      "learning_rate": 5.57461304797612e-06,
      "loss": 0.0152,
      "step": 1363220
    },
    {
      "epoch": 2.2309721594888816,
      "grad_norm": 0.4930119812488556,
      "learning_rate": 5.574547155762604e-06,
      "loss": 0.0112,
      "step": 1363240
    },
    {
      "epoch": 2.2310048899275348,
      "grad_norm": 0.5681448578834534,
      "learning_rate": 5.574481263549087e-06,
      "loss": 0.0108,
      "step": 1363260
    },
    {
      "epoch": 2.2310376203661884,
      "grad_norm": 0.22662101686000824,
      "learning_rate": 5.574415371335569e-06,
      "loss": 0.0189,
      "step": 1363280
    },
    {
      "epoch": 2.2310703508048415,
      "grad_norm": 0.17067883908748627,
      "learning_rate": 5.574349479122053e-06,
      "loss": 0.016,
      "step": 1363300
    },
    {
      "epoch": 2.2311030812434947,
      "grad_norm": 0.7199530005455017,
      "learning_rate": 5.574283586908536e-06,
      "loss": 0.0172,
      "step": 1363320
    },
    {
      "epoch": 2.2311358116821483,
      "grad_norm": 0.9054391980171204,
      "learning_rate": 5.5742176946950184e-06,
      "loss": 0.0208,
      "step": 1363340
    },
    {
      "epoch": 2.2311685421208014,
      "grad_norm": 0.07272229343652725,
      "learning_rate": 5.574151802481501e-06,
      "loss": 0.0166,
      "step": 1363360
    },
    {
      "epoch": 2.231201272559455,
      "grad_norm": 0.40770530700683594,
      "learning_rate": 5.574085910267985e-06,
      "loss": 0.0145,
      "step": 1363380
    },
    {
      "epoch": 2.231234002998108,
      "grad_norm": 0.20850442349910736,
      "learning_rate": 5.574020018054467e-06,
      "loss": 0.0127,
      "step": 1363400
    },
    {
      "epoch": 2.2312667334367617,
      "grad_norm": 0.3530818521976471,
      "learning_rate": 5.57395412584095e-06,
      "loss": 0.0155,
      "step": 1363420
    },
    {
      "epoch": 2.231299463875415,
      "grad_norm": 0.5380995869636536,
      "learning_rate": 5.573888233627432e-06,
      "loss": 0.0236,
      "step": 1363440
    },
    {
      "epoch": 2.231332194314068,
      "grad_norm": 0.10427650064229965,
      "learning_rate": 5.573822341413916e-06,
      "loss": 0.015,
      "step": 1363460
    },
    {
      "epoch": 2.2313649247527216,
      "grad_norm": 0.4970756471157074,
      "learning_rate": 5.5737564492003985e-06,
      "loss": 0.0146,
      "step": 1363480
    },
    {
      "epoch": 2.231397655191375,
      "grad_norm": 0.9315505623817444,
      "learning_rate": 5.573690556986881e-06,
      "loss": 0.0177,
      "step": 1363500
    },
    {
      "epoch": 2.2314303856300284,
      "grad_norm": 0.40882056951522827,
      "learning_rate": 5.573624664773364e-06,
      "loss": 0.0154,
      "step": 1363520
    },
    {
      "epoch": 2.2314631160686815,
      "grad_norm": 0.3653933107852936,
      "learning_rate": 5.5735587725598475e-06,
      "loss": 0.0161,
      "step": 1363540
    },
    {
      "epoch": 2.231495846507335,
      "grad_norm": 0.16415511071681976,
      "learning_rate": 5.573492880346329e-06,
      "loss": 0.0134,
      "step": 1363560
    },
    {
      "epoch": 2.2315285769459883,
      "grad_norm": 0.3525274097919464,
      "learning_rate": 5.573426988132813e-06,
      "loss": 0.0133,
      "step": 1363580
    },
    {
      "epoch": 2.2315613073846414,
      "grad_norm": 0.7414649128913879,
      "learning_rate": 5.573361095919295e-06,
      "loss": 0.0123,
      "step": 1363600
    },
    {
      "epoch": 2.231594037823295,
      "grad_norm": 0.5314112901687622,
      "learning_rate": 5.5732952037057785e-06,
      "loss": 0.0119,
      "step": 1363620
    },
    {
      "epoch": 2.231626768261948,
      "grad_norm": 5.01396369934082,
      "learning_rate": 5.573229311492261e-06,
      "loss": 0.0187,
      "step": 1363640
    },
    {
      "epoch": 2.2316594987006018,
      "grad_norm": 0.6960359811782837,
      "learning_rate": 5.573163419278744e-06,
      "loss": 0.017,
      "step": 1363660
    },
    {
      "epoch": 2.231692229139255,
      "grad_norm": 0.22029799222946167,
      "learning_rate": 5.5730975270652276e-06,
      "loss": 0.0109,
      "step": 1363680
    },
    {
      "epoch": 2.231724959577908,
      "grad_norm": 0.05911184102296829,
      "learning_rate": 5.57303163485171e-06,
      "loss": 0.0168,
      "step": 1363700
    },
    {
      "epoch": 2.2317576900165617,
      "grad_norm": 0.09583063423633575,
      "learning_rate": 5.572965742638193e-06,
      "loss": 0.0139,
      "step": 1363720
    },
    {
      "epoch": 2.231790420455215,
      "grad_norm": 0.801788866519928,
      "learning_rate": 5.572899850424676e-06,
      "loss": 0.0152,
      "step": 1363740
    },
    {
      "epoch": 2.2318231508938684,
      "grad_norm": 1.0541253089904785,
      "learning_rate": 5.572833958211159e-06,
      "loss": 0.0145,
      "step": 1363760
    },
    {
      "epoch": 2.2318558813325216,
      "grad_norm": 0.16328515112400055,
      "learning_rate": 5.572768065997641e-06,
      "loss": 0.0164,
      "step": 1363780
    },
    {
      "epoch": 2.231888611771175,
      "grad_norm": 0.7300519347190857,
      "learning_rate": 5.572702173784125e-06,
      "loss": 0.017,
      "step": 1363800
    },
    {
      "epoch": 2.2319213422098283,
      "grad_norm": 0.18055933713912964,
      "learning_rate": 5.572636281570607e-06,
      "loss": 0.0186,
      "step": 1363820
    },
    {
      "epoch": 2.2319540726484814,
      "grad_norm": 0.19410450756549835,
      "learning_rate": 5.57257038935709e-06,
      "loss": 0.011,
      "step": 1363840
    },
    {
      "epoch": 2.231986803087135,
      "grad_norm": 0.27224916219711304,
      "learning_rate": 5.572504497143573e-06,
      "loss": 0.0261,
      "step": 1363860
    },
    {
      "epoch": 2.232019533525788,
      "grad_norm": 0.21105435490608215,
      "learning_rate": 5.572438604930056e-06,
      "loss": 0.017,
      "step": 1363880
    },
    {
      "epoch": 2.232052263964442,
      "grad_norm": 0.3517407476902008,
      "learning_rate": 5.5723727127165385e-06,
      "loss": 0.015,
      "step": 1363900
    },
    {
      "epoch": 2.232084994403095,
      "grad_norm": 0.382877916097641,
      "learning_rate": 5.572306820503022e-06,
      "loss": 0.0106,
      "step": 1363920
    },
    {
      "epoch": 2.2321177248417485,
      "grad_norm": 0.19200459122657776,
      "learning_rate": 5.572240928289504e-06,
      "loss": 0.0112,
      "step": 1363940
    },
    {
      "epoch": 2.2321504552804017,
      "grad_norm": 0.4732705354690552,
      "learning_rate": 5.572175036075988e-06,
      "loss": 0.0169,
      "step": 1363960
    },
    {
      "epoch": 2.232183185719055,
      "grad_norm": 0.22398994863033295,
      "learning_rate": 5.5721091438624695e-06,
      "loss": 0.0136,
      "step": 1363980
    },
    {
      "epoch": 2.2322159161577084,
      "grad_norm": 0.2394917905330658,
      "learning_rate": 5.572043251648953e-06,
      "loss": 0.0111,
      "step": 1364000
    },
    {
      "epoch": 2.2322486465963616,
      "grad_norm": 0.528904139995575,
      "learning_rate": 5.571977359435437e-06,
      "loss": 0.0132,
      "step": 1364020
    },
    {
      "epoch": 2.232281377035015,
      "grad_norm": 0.22061078250408173,
      "learning_rate": 5.5719114672219186e-06,
      "loss": 0.0196,
      "step": 1364040
    },
    {
      "epoch": 2.2323141074736683,
      "grad_norm": 0.42666903138160706,
      "learning_rate": 5.571845575008402e-06,
      "loss": 0.0182,
      "step": 1364060
    },
    {
      "epoch": 2.2323468379123215,
      "grad_norm": 0.5877829790115356,
      "learning_rate": 5.571779682794884e-06,
      "loss": 0.0103,
      "step": 1364080
    },
    {
      "epoch": 2.232379568350975,
      "grad_norm": 0.23989476263523102,
      "learning_rate": 5.571713790581368e-06,
      "loss": 0.0193,
      "step": 1364100
    },
    {
      "epoch": 2.232412298789628,
      "grad_norm": 0.3507533371448517,
      "learning_rate": 5.57164789836785e-06,
      "loss": 0.0118,
      "step": 1364120
    },
    {
      "epoch": 2.232445029228282,
      "grad_norm": 0.45544666051864624,
      "learning_rate": 5.571582006154334e-06,
      "loss": 0.0124,
      "step": 1364140
    },
    {
      "epoch": 2.232477759666935,
      "grad_norm": 0.5328221917152405,
      "learning_rate": 5.571516113940816e-06,
      "loss": 0.0152,
      "step": 1364160
    },
    {
      "epoch": 2.2325104901055886,
      "grad_norm": 0.3268318176269531,
      "learning_rate": 5.5714502217272994e-06,
      "loss": 0.0167,
      "step": 1364180
    },
    {
      "epoch": 2.2325432205442417,
      "grad_norm": 0.8053082823753357,
      "learning_rate": 5.571384329513781e-06,
      "loss": 0.0115,
      "step": 1364200
    },
    {
      "epoch": 2.232575950982895,
      "grad_norm": 0.17935781180858612,
      "learning_rate": 5.571318437300265e-06,
      "loss": 0.0191,
      "step": 1364220
    },
    {
      "epoch": 2.2326086814215484,
      "grad_norm": 0.1643640398979187,
      "learning_rate": 5.571252545086747e-06,
      "loss": 0.0095,
      "step": 1364240
    },
    {
      "epoch": 2.2326414118602016,
      "grad_norm": 0.8550158739089966,
      "learning_rate": 5.57118665287323e-06,
      "loss": 0.0174,
      "step": 1364260
    },
    {
      "epoch": 2.232674142298855,
      "grad_norm": 0.5443978309631348,
      "learning_rate": 5.571120760659713e-06,
      "loss": 0.0149,
      "step": 1364280
    },
    {
      "epoch": 2.2327068727375083,
      "grad_norm": 0.32945823669433594,
      "learning_rate": 5.571054868446196e-06,
      "loss": 0.0145,
      "step": 1364300
    },
    {
      "epoch": 2.232739603176162,
      "grad_norm": 1.063835859298706,
      "learning_rate": 5.570988976232679e-06,
      "loss": 0.0206,
      "step": 1364320
    },
    {
      "epoch": 2.232772333614815,
      "grad_norm": 0.08432189375162125,
      "learning_rate": 5.570923084019162e-06,
      "loss": 0.0112,
      "step": 1364340
    },
    {
      "epoch": 2.2328050640534682,
      "grad_norm": 0.2964898645877838,
      "learning_rate": 5.570857191805645e-06,
      "loss": 0.0118,
      "step": 1364360
    },
    {
      "epoch": 2.232837794492122,
      "grad_norm": 0.17341046035289764,
      "learning_rate": 5.570791299592128e-06,
      "loss": 0.0118,
      "step": 1364380
    },
    {
      "epoch": 2.232870524930775,
      "grad_norm": 0.3258327841758728,
      "learning_rate": 5.570725407378611e-06,
      "loss": 0.0162,
      "step": 1364400
    },
    {
      "epoch": 2.2329032553694286,
      "grad_norm": 0.24382220208644867,
      "learning_rate": 5.570659515165093e-06,
      "loss": 0.0181,
      "step": 1364420
    },
    {
      "epoch": 2.2329359858080817,
      "grad_norm": 0.2770296037197113,
      "learning_rate": 5.570593622951577e-06,
      "loss": 0.0118,
      "step": 1364440
    },
    {
      "epoch": 2.2329687162467353,
      "grad_norm": 0.37399521470069885,
      "learning_rate": 5.570527730738059e-06,
      "loss": 0.0204,
      "step": 1364460
    },
    {
      "epoch": 2.2330014466853885,
      "grad_norm": 0.5986711382865906,
      "learning_rate": 5.570461838524542e-06,
      "loss": 0.011,
      "step": 1364480
    },
    {
      "epoch": 2.2330341771240416,
      "grad_norm": 0.16865745186805725,
      "learning_rate": 5.570395946311025e-06,
      "loss": 0.0184,
      "step": 1364500
    },
    {
      "epoch": 2.233066907562695,
      "grad_norm": 0.3865148723125458,
      "learning_rate": 5.570330054097508e-06,
      "loss": 0.0111,
      "step": 1364520
    },
    {
      "epoch": 2.2330996380013484,
      "grad_norm": 0.40743061900138855,
      "learning_rate": 5.5702641618839904e-06,
      "loss": 0.0163,
      "step": 1364540
    },
    {
      "epoch": 2.233132368440002,
      "grad_norm": 0.40103450417518616,
      "learning_rate": 5.570198269670474e-06,
      "loss": 0.0072,
      "step": 1364560
    },
    {
      "epoch": 2.233165098878655,
      "grad_norm": 0.20044060051441193,
      "learning_rate": 5.570132377456956e-06,
      "loss": 0.0149,
      "step": 1364580
    },
    {
      "epoch": 2.2331978293173087,
      "grad_norm": 1.338471531867981,
      "learning_rate": 5.5700664852434395e-06,
      "loss": 0.0146,
      "step": 1364600
    },
    {
      "epoch": 2.233230559755962,
      "grad_norm": 0.42705237865448,
      "learning_rate": 5.570000593029921e-06,
      "loss": 0.0152,
      "step": 1364620
    },
    {
      "epoch": 2.233263290194615,
      "grad_norm": 0.06992340087890625,
      "learning_rate": 5.569934700816405e-06,
      "loss": 0.0095,
      "step": 1364640
    },
    {
      "epoch": 2.2332960206332686,
      "grad_norm": 0.3218078911304474,
      "learning_rate": 5.569868808602888e-06,
      "loss": 0.013,
      "step": 1364660
    },
    {
      "epoch": 2.2333287510719217,
      "grad_norm": 0.10668868571519852,
      "learning_rate": 5.5698029163893705e-06,
      "loss": 0.0161,
      "step": 1364680
    },
    {
      "epoch": 2.2333614815105753,
      "grad_norm": 0.2880031168460846,
      "learning_rate": 5.569737024175853e-06,
      "loss": 0.0187,
      "step": 1364700
    },
    {
      "epoch": 2.2333942119492285,
      "grad_norm": 1.1207711696624756,
      "learning_rate": 5.569671131962337e-06,
      "loss": 0.0236,
      "step": 1364720
    },
    {
      "epoch": 2.233426942387882,
      "grad_norm": 0.5175554156303406,
      "learning_rate": 5.5696052397488195e-06,
      "loss": 0.0166,
      "step": 1364740
    },
    {
      "epoch": 2.2334596728265352,
      "grad_norm": 0.27977097034454346,
      "learning_rate": 5.569539347535302e-06,
      "loss": 0.0149,
      "step": 1364760
    },
    {
      "epoch": 2.2334924032651884,
      "grad_norm": 0.1623164415359497,
      "learning_rate": 5.569473455321786e-06,
      "loss": 0.0217,
      "step": 1364780
    },
    {
      "epoch": 2.233525133703842,
      "grad_norm": 0.5533077716827393,
      "learning_rate": 5.569407563108268e-06,
      "loss": 0.0116,
      "step": 1364800
    },
    {
      "epoch": 2.233557864142495,
      "grad_norm": 0.3051607012748718,
      "learning_rate": 5.569341670894751e-06,
      "loss": 0.0164,
      "step": 1364820
    },
    {
      "epoch": 2.2335905945811487,
      "grad_norm": 0.651238203048706,
      "learning_rate": 5.569275778681233e-06,
      "loss": 0.0134,
      "step": 1364840
    },
    {
      "epoch": 2.233623325019802,
      "grad_norm": 0.14057043194770813,
      "learning_rate": 5.569209886467717e-06,
      "loss": 0.0102,
      "step": 1364860
    },
    {
      "epoch": 2.2336560554584555,
      "grad_norm": 1.497960090637207,
      "learning_rate": 5.5691439942541996e-06,
      "loss": 0.017,
      "step": 1364880
    },
    {
      "epoch": 2.2336887858971086,
      "grad_norm": 0.6384493708610535,
      "learning_rate": 5.569078102040682e-06,
      "loss": 0.014,
      "step": 1364900
    },
    {
      "epoch": 2.2337215163357618,
      "grad_norm": 0.5077301263809204,
      "learning_rate": 5.569012209827165e-06,
      "loss": 0.0146,
      "step": 1364920
    },
    {
      "epoch": 2.2337542467744154,
      "grad_norm": 0.32763761281967163,
      "learning_rate": 5.568946317613649e-06,
      "loss": 0.0105,
      "step": 1364940
    },
    {
      "epoch": 2.2337869772130685,
      "grad_norm": 1.0928856134414673,
      "learning_rate": 5.5688804254001305e-06,
      "loss": 0.019,
      "step": 1364960
    },
    {
      "epoch": 2.233819707651722,
      "grad_norm": 0.12791848182678223,
      "learning_rate": 5.568814533186614e-06,
      "loss": 0.0115,
      "step": 1364980
    },
    {
      "epoch": 2.2338524380903753,
      "grad_norm": 0.537142276763916,
      "learning_rate": 5.568748640973096e-06,
      "loss": 0.0182,
      "step": 1365000
    },
    {
      "epoch": 2.233885168529029,
      "grad_norm": 0.37524673342704773,
      "learning_rate": 5.56868274875958e-06,
      "loss": 0.0157,
      "step": 1365020
    },
    {
      "epoch": 2.233917898967682,
      "grad_norm": 0.8158088326454163,
      "learning_rate": 5.5686168565460615e-06,
      "loss": 0.02,
      "step": 1365040
    },
    {
      "epoch": 2.233950629406335,
      "grad_norm": 0.14459553360939026,
      "learning_rate": 5.568550964332545e-06,
      "loss": 0.0085,
      "step": 1365060
    },
    {
      "epoch": 2.2339833598449887,
      "grad_norm": 0.19081836938858032,
      "learning_rate": 5.568485072119029e-06,
      "loss": 0.0184,
      "step": 1365080
    },
    {
      "epoch": 2.234016090283642,
      "grad_norm": 1.2182692289352417,
      "learning_rate": 5.5684191799055105e-06,
      "loss": 0.0198,
      "step": 1365100
    },
    {
      "epoch": 2.2340488207222955,
      "grad_norm": 0.9518834948539734,
      "learning_rate": 5.568353287691994e-06,
      "loss": 0.0243,
      "step": 1365120
    },
    {
      "epoch": 2.2340815511609486,
      "grad_norm": 0.11519817262887955,
      "learning_rate": 5.568287395478477e-06,
      "loss": 0.0258,
      "step": 1365140
    },
    {
      "epoch": 2.234114281599602,
      "grad_norm": 0.14240960776805878,
      "learning_rate": 5.56822150326496e-06,
      "loss": 0.0126,
      "step": 1365160
    },
    {
      "epoch": 2.2341470120382554,
      "grad_norm": 0.6400742530822754,
      "learning_rate": 5.568155611051442e-06,
      "loss": 0.0191,
      "step": 1365180
    },
    {
      "epoch": 2.2341797424769085,
      "grad_norm": 0.5345171093940735,
      "learning_rate": 5.568089718837926e-06,
      "loss": 0.0126,
      "step": 1365200
    },
    {
      "epoch": 2.234212472915562,
      "grad_norm": 0.24733255803585052,
      "learning_rate": 5.568023826624408e-06,
      "loss": 0.0203,
      "step": 1365220
    },
    {
      "epoch": 2.2342452033542153,
      "grad_norm": 0.1957557201385498,
      "learning_rate": 5.567957934410891e-06,
      "loss": 0.0188,
      "step": 1365240
    },
    {
      "epoch": 2.234277933792869,
      "grad_norm": 0.16194848716259003,
      "learning_rate": 5.567892042197373e-06,
      "loss": 0.0178,
      "step": 1365260
    },
    {
      "epoch": 2.234310664231522,
      "grad_norm": 0.0768158808350563,
      "learning_rate": 5.567826149983857e-06,
      "loss": 0.0216,
      "step": 1365280
    },
    {
      "epoch": 2.234343394670175,
      "grad_norm": 0.10588754713535309,
      "learning_rate": 5.56776025777034e-06,
      "loss": 0.0189,
      "step": 1365300
    },
    {
      "epoch": 2.2343761251088288,
      "grad_norm": 0.5879945755004883,
      "learning_rate": 5.567694365556822e-06,
      "loss": 0.0181,
      "step": 1365320
    },
    {
      "epoch": 2.234408855547482,
      "grad_norm": 0.25424641370773315,
      "learning_rate": 5.567628473343305e-06,
      "loss": 0.0178,
      "step": 1365340
    },
    {
      "epoch": 2.2344415859861355,
      "grad_norm": 0.18377861380577087,
      "learning_rate": 5.567562581129789e-06,
      "loss": 0.0131,
      "step": 1365360
    },
    {
      "epoch": 2.2344743164247887,
      "grad_norm": 0.16737975180149078,
      "learning_rate": 5.567496688916271e-06,
      "loss": 0.019,
      "step": 1365380
    },
    {
      "epoch": 2.2345070468634423,
      "grad_norm": 0.12824705243110657,
      "learning_rate": 5.567430796702754e-06,
      "loss": 0.0156,
      "step": 1365400
    },
    {
      "epoch": 2.2345397773020954,
      "grad_norm": 0.6235061883926392,
      "learning_rate": 5.567364904489238e-06,
      "loss": 0.0168,
      "step": 1365420
    },
    {
      "epoch": 2.2345725077407486,
      "grad_norm": 0.13515020906925201,
      "learning_rate": 5.56729901227572e-06,
      "loss": 0.0153,
      "step": 1365440
    },
    {
      "epoch": 2.234605238179402,
      "grad_norm": 0.8970242142677307,
      "learning_rate": 5.567233120062203e-06,
      "loss": 0.0183,
      "step": 1365460
    },
    {
      "epoch": 2.2346379686180553,
      "grad_norm": 0.20090210437774658,
      "learning_rate": 5.567167227848685e-06,
      "loss": 0.0146,
      "step": 1365480
    },
    {
      "epoch": 2.234670699056709,
      "grad_norm": 0.3173005282878876,
      "learning_rate": 5.567101335635169e-06,
      "loss": 0.0141,
      "step": 1365500
    },
    {
      "epoch": 2.234703429495362,
      "grad_norm": 0.5380687713623047,
      "learning_rate": 5.5670354434216515e-06,
      "loss": 0.023,
      "step": 1365520
    },
    {
      "epoch": 2.2347361599340156,
      "grad_norm": 0.10009325295686722,
      "learning_rate": 5.566969551208134e-06,
      "loss": 0.0119,
      "step": 1365540
    },
    {
      "epoch": 2.234768890372669,
      "grad_norm": 0.26505765318870544,
      "learning_rate": 5.566903658994617e-06,
      "loss": 0.0129,
      "step": 1365560
    },
    {
      "epoch": 2.234801620811322,
      "grad_norm": 0.1633189469575882,
      "learning_rate": 5.5668377667811005e-06,
      "loss": 0.0186,
      "step": 1365580
    },
    {
      "epoch": 2.2348343512499755,
      "grad_norm": 0.6860556602478027,
      "learning_rate": 5.5667718745675824e-06,
      "loss": 0.0142,
      "step": 1365600
    },
    {
      "epoch": 2.2348670816886287,
      "grad_norm": 0.2410901039838791,
      "learning_rate": 5.566705982354066e-06,
      "loss": 0.0185,
      "step": 1365620
    },
    {
      "epoch": 2.2348998121272823,
      "grad_norm": 0.6332648992538452,
      "learning_rate": 5.566640090140548e-06,
      "loss": 0.0137,
      "step": 1365640
    },
    {
      "epoch": 2.2349325425659354,
      "grad_norm": 0.3621641993522644,
      "learning_rate": 5.5665741979270315e-06,
      "loss": 0.0137,
      "step": 1365660
    },
    {
      "epoch": 2.2349652730045886,
      "grad_norm": 0.18810127675533295,
      "learning_rate": 5.566508305713514e-06,
      "loss": 0.0098,
      "step": 1365680
    },
    {
      "epoch": 2.234998003443242,
      "grad_norm": 1.057220458984375,
      "learning_rate": 5.566442413499997e-06,
      "loss": 0.022,
      "step": 1365700
    },
    {
      "epoch": 2.2350307338818953,
      "grad_norm": 0.9940453171730042,
      "learning_rate": 5.56637652128648e-06,
      "loss": 0.0202,
      "step": 1365720
    },
    {
      "epoch": 2.235063464320549,
      "grad_norm": 0.4075292944908142,
      "learning_rate": 5.566310629072963e-06,
      "loss": 0.0106,
      "step": 1365740
    },
    {
      "epoch": 2.235096194759202,
      "grad_norm": 1.457672357559204,
      "learning_rate": 5.566244736859446e-06,
      "loss": 0.0214,
      "step": 1365760
    },
    {
      "epoch": 2.2351289251978557,
      "grad_norm": 0.3423767387866974,
      "learning_rate": 5.566178844645929e-06,
      "loss": 0.0146,
      "step": 1365780
    },
    {
      "epoch": 2.235161655636509,
      "grad_norm": 0.19381371140480042,
      "learning_rate": 5.566112952432412e-06,
      "loss": 0.0134,
      "step": 1365800
    },
    {
      "epoch": 2.235194386075162,
      "grad_norm": 0.3142554461956024,
      "learning_rate": 5.566047060218894e-06,
      "loss": 0.0141,
      "step": 1365820
    },
    {
      "epoch": 2.2352271165138156,
      "grad_norm": 0.24360696971416473,
      "learning_rate": 5.565981168005378e-06,
      "loss": 0.0222,
      "step": 1365840
    },
    {
      "epoch": 2.2352598469524687,
      "grad_norm": 0.06075163558125496,
      "learning_rate": 5.56591527579186e-06,
      "loss": 0.0181,
      "step": 1365860
    },
    {
      "epoch": 2.2352925773911223,
      "grad_norm": 0.9266820549964905,
      "learning_rate": 5.565849383578343e-06,
      "loss": 0.0132,
      "step": 1365880
    },
    {
      "epoch": 2.2353253078297755,
      "grad_norm": 0.14309769868850708,
      "learning_rate": 5.565783491364826e-06,
      "loss": 0.0136,
      "step": 1365900
    },
    {
      "epoch": 2.235358038268429,
      "grad_norm": 0.6991528868675232,
      "learning_rate": 5.565717599151309e-06,
      "loss": 0.0184,
      "step": 1365920
    },
    {
      "epoch": 2.235390768707082,
      "grad_norm": 0.22327959537506104,
      "learning_rate": 5.5656517069377915e-06,
      "loss": 0.0192,
      "step": 1365940
    },
    {
      "epoch": 2.2354234991457353,
      "grad_norm": 0.24755126237869263,
      "learning_rate": 5.565585814724275e-06,
      "loss": 0.0149,
      "step": 1365960
    },
    {
      "epoch": 2.235456229584389,
      "grad_norm": 0.26223281025886536,
      "learning_rate": 5.565519922510757e-06,
      "loss": 0.0151,
      "step": 1365980
    },
    {
      "epoch": 2.235488960023042,
      "grad_norm": 0.9198848009109497,
      "learning_rate": 5.565454030297241e-06,
      "loss": 0.0117,
      "step": 1366000
    },
    {
      "epoch": 2.2355216904616957,
      "grad_norm": 0.32493042945861816,
      "learning_rate": 5.5653881380837225e-06,
      "loss": 0.0165,
      "step": 1366020
    },
    {
      "epoch": 2.235554420900349,
      "grad_norm": 0.15225635468959808,
      "learning_rate": 5.565322245870206e-06,
      "loss": 0.0143,
      "step": 1366040
    },
    {
      "epoch": 2.2355871513390024,
      "grad_norm": 0.5529057383537292,
      "learning_rate": 5.565256353656688e-06,
      "loss": 0.0121,
      "step": 1366060
    },
    {
      "epoch": 2.2356198817776556,
      "grad_norm": 0.14780603349208832,
      "learning_rate": 5.5651904614431716e-06,
      "loss": 0.0132,
      "step": 1366080
    },
    {
      "epoch": 2.2356526122163087,
      "grad_norm": 0.31049174070358276,
      "learning_rate": 5.565124569229654e-06,
      "loss": 0.0116,
      "step": 1366100
    },
    {
      "epoch": 2.2356853426549623,
      "grad_norm": 0.20114800333976746,
      "learning_rate": 5.565058677016137e-06,
      "loss": 0.0127,
      "step": 1366120
    },
    {
      "epoch": 2.2357180730936155,
      "grad_norm": 0.9863062500953674,
      "learning_rate": 5.564992784802621e-06,
      "loss": 0.014,
      "step": 1366140
    },
    {
      "epoch": 2.235750803532269,
      "grad_norm": 0.18245163559913635,
      "learning_rate": 5.564926892589103e-06,
      "loss": 0.0155,
      "step": 1366160
    },
    {
      "epoch": 2.235783533970922,
      "grad_norm": 2.3124561309814453,
      "learning_rate": 5.564861000375586e-06,
      "loss": 0.018,
      "step": 1366180
    },
    {
      "epoch": 2.235816264409576,
      "grad_norm": 0.08981378376483917,
      "learning_rate": 5.564795108162069e-06,
      "loss": 0.0152,
      "step": 1366200
    },
    {
      "epoch": 2.235848994848229,
      "grad_norm": 0.8389965891838074,
      "learning_rate": 5.5647292159485524e-06,
      "loss": 0.0186,
      "step": 1366220
    },
    {
      "epoch": 2.235881725286882,
      "grad_norm": 0.373873233795166,
      "learning_rate": 5.564663323735034e-06,
      "loss": 0.0197,
      "step": 1366240
    },
    {
      "epoch": 2.2359144557255357,
      "grad_norm": 0.2902664542198181,
      "learning_rate": 5.564597431521518e-06,
      "loss": 0.0138,
      "step": 1366260
    },
    {
      "epoch": 2.235947186164189,
      "grad_norm": 0.3383464813232422,
      "learning_rate": 5.564531539308e-06,
      "loss": 0.0226,
      "step": 1366280
    },
    {
      "epoch": 2.2359799166028425,
      "grad_norm": 0.3915126323699951,
      "learning_rate": 5.564465647094483e-06,
      "loss": 0.0226,
      "step": 1366300
    },
    {
      "epoch": 2.2360126470414956,
      "grad_norm": 0.08073238283395767,
      "learning_rate": 5.564399754880966e-06,
      "loss": 0.0153,
      "step": 1366320
    },
    {
      "epoch": 2.236045377480149,
      "grad_norm": 1.0092267990112305,
      "learning_rate": 5.564333862667449e-06,
      "loss": 0.0151,
      "step": 1366340
    },
    {
      "epoch": 2.2360781079188023,
      "grad_norm": 0.345796138048172,
      "learning_rate": 5.564267970453932e-06,
      "loss": 0.0128,
      "step": 1366360
    },
    {
      "epoch": 2.2361108383574555,
      "grad_norm": 0.34636062383651733,
      "learning_rate": 5.564202078240415e-06,
      "loss": 0.0157,
      "step": 1366380
    },
    {
      "epoch": 2.236143568796109,
      "grad_norm": 0.32507798075675964,
      "learning_rate": 5.564136186026897e-06,
      "loss": 0.0161,
      "step": 1366400
    },
    {
      "epoch": 2.2361762992347622,
      "grad_norm": 0.5136221647262573,
      "learning_rate": 5.564070293813381e-06,
      "loss": 0.025,
      "step": 1366420
    },
    {
      "epoch": 2.236209029673416,
      "grad_norm": 1.453406810760498,
      "learning_rate": 5.5640044015998626e-06,
      "loss": 0.0153,
      "step": 1366440
    },
    {
      "epoch": 2.236241760112069,
      "grad_norm": 0.8428105711936951,
      "learning_rate": 5.563938509386346e-06,
      "loss": 0.0207,
      "step": 1366460
    },
    {
      "epoch": 2.2362744905507226,
      "grad_norm": 0.15783725678920746,
      "learning_rate": 5.56387261717283e-06,
      "loss": 0.0086,
      "step": 1366480
    },
    {
      "epoch": 2.2363072209893757,
      "grad_norm": 0.18076691031455994,
      "learning_rate": 5.563806724959312e-06,
      "loss": 0.0148,
      "step": 1366500
    },
    {
      "epoch": 2.236339951428029,
      "grad_norm": 0.2333884984254837,
      "learning_rate": 5.563740832745795e-06,
      "loss": 0.0139,
      "step": 1366520
    },
    {
      "epoch": 2.2363726818666825,
      "grad_norm": 0.259789377450943,
      "learning_rate": 5.563674940532278e-06,
      "loss": 0.0147,
      "step": 1366540
    },
    {
      "epoch": 2.2364054123053356,
      "grad_norm": 0.8076942563056946,
      "learning_rate": 5.563609048318761e-06,
      "loss": 0.0131,
      "step": 1366560
    },
    {
      "epoch": 2.236438142743989,
      "grad_norm": 0.8895765542984009,
      "learning_rate": 5.5635431561052434e-06,
      "loss": 0.0196,
      "step": 1366580
    },
    {
      "epoch": 2.2364708731826424,
      "grad_norm": 0.344281405210495,
      "learning_rate": 5.563477263891727e-06,
      "loss": 0.0174,
      "step": 1366600
    },
    {
      "epoch": 2.236503603621296,
      "grad_norm": 0.47691765427589417,
      "learning_rate": 5.563411371678209e-06,
      "loss": 0.0195,
      "step": 1366620
    },
    {
      "epoch": 2.236536334059949,
      "grad_norm": 0.5278635621070862,
      "learning_rate": 5.5633454794646925e-06,
      "loss": 0.0207,
      "step": 1366640
    },
    {
      "epoch": 2.2365690644986023,
      "grad_norm": 1.0451892614364624,
      "learning_rate": 5.563279587251174e-06,
      "loss": 0.0124,
      "step": 1366660
    },
    {
      "epoch": 2.236601794937256,
      "grad_norm": 0.21526072919368744,
      "learning_rate": 5.563213695037658e-06,
      "loss": 0.0126,
      "step": 1366680
    },
    {
      "epoch": 2.236634525375909,
      "grad_norm": 0.24149921536445618,
      "learning_rate": 5.563147802824141e-06,
      "loss": 0.0136,
      "step": 1366700
    },
    {
      "epoch": 2.2366672558145626,
      "grad_norm": 0.41004082560539246,
      "learning_rate": 5.5630819106106235e-06,
      "loss": 0.0133,
      "step": 1366720
    },
    {
      "epoch": 2.2366999862532158,
      "grad_norm": 0.627679705619812,
      "learning_rate": 5.563016018397106e-06,
      "loss": 0.0134,
      "step": 1366740
    },
    {
      "epoch": 2.236732716691869,
      "grad_norm": 0.36112773418426514,
      "learning_rate": 5.56295012618359e-06,
      "loss": 0.0104,
      "step": 1366760
    },
    {
      "epoch": 2.2367654471305225,
      "grad_norm": 0.15142002701759338,
      "learning_rate": 5.562884233970072e-06,
      "loss": 0.0154,
      "step": 1366780
    },
    {
      "epoch": 2.2367981775691756,
      "grad_norm": 0.8985646963119507,
      "learning_rate": 5.562818341756555e-06,
      "loss": 0.0203,
      "step": 1366800
    },
    {
      "epoch": 2.2368309080078292,
      "grad_norm": 0.9445546269416809,
      "learning_rate": 5.562752449543039e-06,
      "loss": 0.018,
      "step": 1366820
    },
    {
      "epoch": 2.2368636384464824,
      "grad_norm": 0.7100961804389954,
      "learning_rate": 5.562686557329521e-06,
      "loss": 0.0173,
      "step": 1366840
    },
    {
      "epoch": 2.236896368885136,
      "grad_norm": 0.09493939578533173,
      "learning_rate": 5.562620665116004e-06,
      "loss": 0.0071,
      "step": 1366860
    },
    {
      "epoch": 2.236929099323789,
      "grad_norm": 0.09366193413734436,
      "learning_rate": 5.562554772902486e-06,
      "loss": 0.0145,
      "step": 1366880
    },
    {
      "epoch": 2.2369618297624423,
      "grad_norm": 0.3459487557411194,
      "learning_rate": 5.56248888068897e-06,
      "loss": 0.0158,
      "step": 1366900
    },
    {
      "epoch": 2.236994560201096,
      "grad_norm": 0.8003543615341187,
      "learning_rate": 5.5624229884754526e-06,
      "loss": 0.0157,
      "step": 1366920
    },
    {
      "epoch": 2.237027290639749,
      "grad_norm": 0.9659579992294312,
      "learning_rate": 5.562357096261935e-06,
      "loss": 0.0181,
      "step": 1366940
    },
    {
      "epoch": 2.2370600210784026,
      "grad_norm": 0.20481941103935242,
      "learning_rate": 5.562291204048418e-06,
      "loss": 0.0084,
      "step": 1366960
    },
    {
      "epoch": 2.2370927515170558,
      "grad_norm": 0.43765735626220703,
      "learning_rate": 5.562225311834902e-06,
      "loss": 0.0189,
      "step": 1366980
    },
    {
      "epoch": 2.2371254819557094,
      "grad_norm": 1.700299620628357,
      "learning_rate": 5.5621594196213835e-06,
      "loss": 0.0141,
      "step": 1367000
    },
    {
      "epoch": 2.2371582123943625,
      "grad_norm": 0.7919961214065552,
      "learning_rate": 5.562093527407867e-06,
      "loss": 0.0166,
      "step": 1367020
    },
    {
      "epoch": 2.2371909428330157,
      "grad_norm": 0.7441596388816833,
      "learning_rate": 5.562027635194349e-06,
      "loss": 0.0192,
      "step": 1367040
    },
    {
      "epoch": 2.2372236732716693,
      "grad_norm": 0.7642596960067749,
      "learning_rate": 5.561961742980833e-06,
      "loss": 0.0111,
      "step": 1367060
    },
    {
      "epoch": 2.2372564037103224,
      "grad_norm": 0.5559256076812744,
      "learning_rate": 5.5618958507673145e-06,
      "loss": 0.0204,
      "step": 1367080
    },
    {
      "epoch": 2.237289134148976,
      "grad_norm": 0.1296801120042801,
      "learning_rate": 5.561829958553798e-06,
      "loss": 0.0207,
      "step": 1367100
    },
    {
      "epoch": 2.237321864587629,
      "grad_norm": 0.20860280096530914,
      "learning_rate": 5.561764066340281e-06,
      "loss": 0.014,
      "step": 1367120
    },
    {
      "epoch": 2.2373545950262823,
      "grad_norm": 0.2736069858074188,
      "learning_rate": 5.5616981741267635e-06,
      "loss": 0.0117,
      "step": 1367140
    },
    {
      "epoch": 2.237387325464936,
      "grad_norm": 0.11404567211866379,
      "learning_rate": 5.561632281913246e-06,
      "loss": 0.0121,
      "step": 1367160
    },
    {
      "epoch": 2.237420055903589,
      "grad_norm": 0.13981373608112335,
      "learning_rate": 5.56156638969973e-06,
      "loss": 0.0087,
      "step": 1367180
    },
    {
      "epoch": 2.2374527863422426,
      "grad_norm": 0.5064655542373657,
      "learning_rate": 5.561500497486213e-06,
      "loss": 0.0198,
      "step": 1367200
    },
    {
      "epoch": 2.237485516780896,
      "grad_norm": 0.11733967810869217,
      "learning_rate": 5.561434605272695e-06,
      "loss": 0.0127,
      "step": 1367220
    },
    {
      "epoch": 2.2375182472195494,
      "grad_norm": 0.4291478097438812,
      "learning_rate": 5.561368713059179e-06,
      "loss": 0.0167,
      "step": 1367240
    },
    {
      "epoch": 2.2375509776582025,
      "grad_norm": 0.19309088587760925,
      "learning_rate": 5.561302820845661e-06,
      "loss": 0.0097,
      "step": 1367260
    },
    {
      "epoch": 2.2375837080968557,
      "grad_norm": 0.424367219209671,
      "learning_rate": 5.561236928632144e-06,
      "loss": 0.0122,
      "step": 1367280
    },
    {
      "epoch": 2.2376164385355093,
      "grad_norm": 0.9000070691108704,
      "learning_rate": 5.561171036418626e-06,
      "loss": 0.0177,
      "step": 1367300
    },
    {
      "epoch": 2.2376491689741624,
      "grad_norm": 0.1623070240020752,
      "learning_rate": 5.56110514420511e-06,
      "loss": 0.0124,
      "step": 1367320
    },
    {
      "epoch": 2.237681899412816,
      "grad_norm": 0.4340873658657074,
      "learning_rate": 5.561039251991593e-06,
      "loss": 0.0214,
      "step": 1367340
    },
    {
      "epoch": 2.237714629851469,
      "grad_norm": 0.5562963485717773,
      "learning_rate": 5.560973359778075e-06,
      "loss": 0.0166,
      "step": 1367360
    },
    {
      "epoch": 2.2377473602901228,
      "grad_norm": 0.7647261619567871,
      "learning_rate": 5.560907467564558e-06,
      "loss": 0.0132,
      "step": 1367380
    },
    {
      "epoch": 2.237780090728776,
      "grad_norm": 0.30218642950057983,
      "learning_rate": 5.560841575351042e-06,
      "loss": 0.0163,
      "step": 1367400
    },
    {
      "epoch": 2.237812821167429,
      "grad_norm": 0.3871653974056244,
      "learning_rate": 5.560775683137524e-06,
      "loss": 0.0127,
      "step": 1367420
    },
    {
      "epoch": 2.2378455516060827,
      "grad_norm": 0.18790289759635925,
      "learning_rate": 5.560709790924007e-06,
      "loss": 0.0164,
      "step": 1367440
    },
    {
      "epoch": 2.237878282044736,
      "grad_norm": 0.03572361171245575,
      "learning_rate": 5.560643898710489e-06,
      "loss": 0.0143,
      "step": 1367460
    },
    {
      "epoch": 2.2379110124833894,
      "grad_norm": 0.2134084850549698,
      "learning_rate": 5.560578006496973e-06,
      "loss": 0.0146,
      "step": 1367480
    },
    {
      "epoch": 2.2379437429220426,
      "grad_norm": 0.7254374027252197,
      "learning_rate": 5.560512114283455e-06,
      "loss": 0.0172,
      "step": 1367500
    },
    {
      "epoch": 2.237976473360696,
      "grad_norm": 0.40224382281303406,
      "learning_rate": 5.560446222069938e-06,
      "loss": 0.0111,
      "step": 1367520
    },
    {
      "epoch": 2.2380092037993493,
      "grad_norm": 0.46469101309776306,
      "learning_rate": 5.560380329856422e-06,
      "loss": 0.0138,
      "step": 1367540
    },
    {
      "epoch": 2.2380419342380025,
      "grad_norm": 0.9419455528259277,
      "learning_rate": 5.5603144376429045e-06,
      "loss": 0.0175,
      "step": 1367560
    },
    {
      "epoch": 2.238074664676656,
      "grad_norm": 0.17613337934017181,
      "learning_rate": 5.560248545429387e-06,
      "loss": 0.0229,
      "step": 1367580
    },
    {
      "epoch": 2.238107395115309,
      "grad_norm": 4.156237602233887,
      "learning_rate": 5.56018265321587e-06,
      "loss": 0.0182,
      "step": 1367600
    },
    {
      "epoch": 2.238140125553963,
      "grad_norm": 0.17404280602931976,
      "learning_rate": 5.5601167610023535e-06,
      "loss": 0.0161,
      "step": 1367620
    },
    {
      "epoch": 2.238172855992616,
      "grad_norm": 0.35550689697265625,
      "learning_rate": 5.5600508687888354e-06,
      "loss": 0.0116,
      "step": 1367640
    },
    {
      "epoch": 2.2382055864312695,
      "grad_norm": 0.7465121150016785,
      "learning_rate": 5.559984976575319e-06,
      "loss": 0.0178,
      "step": 1367660
    },
    {
      "epoch": 2.2382383168699227,
      "grad_norm": 0.12012887746095657,
      "learning_rate": 5.559919084361801e-06,
      "loss": 0.0109,
      "step": 1367680
    },
    {
      "epoch": 2.238271047308576,
      "grad_norm": 0.2772388160228729,
      "learning_rate": 5.5598531921482845e-06,
      "loss": 0.0113,
      "step": 1367700
    },
    {
      "epoch": 2.2383037777472294,
      "grad_norm": 1.3330479860305786,
      "learning_rate": 5.559787299934767e-06,
      "loss": 0.0165,
      "step": 1367720
    },
    {
      "epoch": 2.2383365081858826,
      "grad_norm": 0.22743991017341614,
      "learning_rate": 5.55972140772125e-06,
      "loss": 0.0189,
      "step": 1367740
    },
    {
      "epoch": 2.238369238624536,
      "grad_norm": 0.6528589725494385,
      "learning_rate": 5.559655515507733e-06,
      "loss": 0.009,
      "step": 1367760
    },
    {
      "epoch": 2.2384019690631893,
      "grad_norm": 0.565514326095581,
      "learning_rate": 5.559589623294216e-06,
      "loss": 0.0123,
      "step": 1367780
    },
    {
      "epoch": 2.238434699501843,
      "grad_norm": 0.14702002704143524,
      "learning_rate": 5.559523731080698e-06,
      "loss": 0.0097,
      "step": 1367800
    },
    {
      "epoch": 2.238467429940496,
      "grad_norm": 0.256302148103714,
      "learning_rate": 5.559457838867182e-06,
      "loss": 0.0156,
      "step": 1367820
    },
    {
      "epoch": 2.2385001603791492,
      "grad_norm": 0.2093411237001419,
      "learning_rate": 5.559391946653664e-06,
      "loss": 0.0161,
      "step": 1367840
    },
    {
      "epoch": 2.238532890817803,
      "grad_norm": 0.8456366062164307,
      "learning_rate": 5.559326054440147e-06,
      "loss": 0.0131,
      "step": 1367860
    },
    {
      "epoch": 2.238565621256456,
      "grad_norm": 0.41956377029418945,
      "learning_rate": 5.559260162226631e-06,
      "loss": 0.0121,
      "step": 1367880
    },
    {
      "epoch": 2.2385983516951096,
      "grad_norm": 0.1279326230287552,
      "learning_rate": 5.559194270013113e-06,
      "loss": 0.0149,
      "step": 1367900
    },
    {
      "epoch": 2.2386310821337627,
      "grad_norm": 0.29068899154663086,
      "learning_rate": 5.559128377799596e-06,
      "loss": 0.0132,
      "step": 1367920
    },
    {
      "epoch": 2.2386638125724163,
      "grad_norm": 0.3019479215145111,
      "learning_rate": 5.559062485586078e-06,
      "loss": 0.0188,
      "step": 1367940
    },
    {
      "epoch": 2.2386965430110695,
      "grad_norm": 0.223859965801239,
      "learning_rate": 5.558996593372562e-06,
      "loss": 0.0228,
      "step": 1367960
    },
    {
      "epoch": 2.2387292734497226,
      "grad_norm": 0.2807047367095947,
      "learning_rate": 5.5589307011590445e-06,
      "loss": 0.0189,
      "step": 1367980
    },
    {
      "epoch": 2.238762003888376,
      "grad_norm": 0.20862266421318054,
      "learning_rate": 5.558864808945528e-06,
      "loss": 0.0119,
      "step": 1368000
    },
    {
      "epoch": 2.2387947343270294,
      "grad_norm": 0.4785561263561249,
      "learning_rate": 5.55879891673201e-06,
      "loss": 0.016,
      "step": 1368020
    },
    {
      "epoch": 2.238827464765683,
      "grad_norm": 0.25493714213371277,
      "learning_rate": 5.558733024518494e-06,
      "loss": 0.0243,
      "step": 1368040
    },
    {
      "epoch": 2.238860195204336,
      "grad_norm": 0.24704374372959137,
      "learning_rate": 5.5586671323049755e-06,
      "loss": 0.0163,
      "step": 1368060
    },
    {
      "epoch": 2.2388929256429897,
      "grad_norm": 0.6523874402046204,
      "learning_rate": 5.558601240091459e-06,
      "loss": 0.0106,
      "step": 1368080
    },
    {
      "epoch": 2.238925656081643,
      "grad_norm": 0.18755078315734863,
      "learning_rate": 5.558535347877941e-06,
      "loss": 0.0171,
      "step": 1368100
    },
    {
      "epoch": 2.238958386520296,
      "grad_norm": 0.1589430421590805,
      "learning_rate": 5.5584694556644246e-06,
      "loss": 0.0138,
      "step": 1368120
    },
    {
      "epoch": 2.2389911169589496,
      "grad_norm": 0.09435795992612839,
      "learning_rate": 5.558403563450907e-06,
      "loss": 0.0178,
      "step": 1368140
    },
    {
      "epoch": 2.2390238473976027,
      "grad_norm": 0.45722511410713196,
      "learning_rate": 5.55833767123739e-06,
      "loss": 0.0204,
      "step": 1368160
    },
    {
      "epoch": 2.2390565778362563,
      "grad_norm": 0.16059055924415588,
      "learning_rate": 5.558271779023873e-06,
      "loss": 0.0131,
      "step": 1368180
    },
    {
      "epoch": 2.2390893082749095,
      "grad_norm": 0.2326635718345642,
      "learning_rate": 5.558205886810356e-06,
      "loss": 0.016,
      "step": 1368200
    },
    {
      "epoch": 2.2391220387135626,
      "grad_norm": 1.1094228029251099,
      "learning_rate": 5.558139994596838e-06,
      "loss": 0.015,
      "step": 1368220
    },
    {
      "epoch": 2.2391547691522162,
      "grad_norm": 1.2641388177871704,
      "learning_rate": 5.558074102383322e-06,
      "loss": 0.0152,
      "step": 1368240
    },
    {
      "epoch": 2.2391874995908694,
      "grad_norm": 0.4693482518196106,
      "learning_rate": 5.5580082101698054e-06,
      "loss": 0.0228,
      "step": 1368260
    },
    {
      "epoch": 2.239220230029523,
      "grad_norm": 1.3699827194213867,
      "learning_rate": 5.557942317956287e-06,
      "loss": 0.0109,
      "step": 1368280
    },
    {
      "epoch": 2.239252960468176,
      "grad_norm": 0.14126268029212952,
      "learning_rate": 5.557876425742771e-06,
      "loss": 0.0126,
      "step": 1368300
    },
    {
      "epoch": 2.2392856909068297,
      "grad_norm": 0.48928937315940857,
      "learning_rate": 5.557810533529253e-06,
      "loss": 0.011,
      "step": 1368320
    },
    {
      "epoch": 2.239318421345483,
      "grad_norm": 0.18511290848255157,
      "learning_rate": 5.557744641315736e-06,
      "loss": 0.0117,
      "step": 1368340
    },
    {
      "epoch": 2.239351151784136,
      "grad_norm": 0.36338159441947937,
      "learning_rate": 5.557678749102219e-06,
      "loss": 0.0141,
      "step": 1368360
    },
    {
      "epoch": 2.2393838822227896,
      "grad_norm": 0.4733721911907196,
      "learning_rate": 5.557612856888702e-06,
      "loss": 0.0147,
      "step": 1368380
    },
    {
      "epoch": 2.2394166126614428,
      "grad_norm": 0.5347049832344055,
      "learning_rate": 5.557546964675185e-06,
      "loss": 0.0171,
      "step": 1368400
    },
    {
      "epoch": 2.2394493431000964,
      "grad_norm": 0.302715003490448,
      "learning_rate": 5.557481072461668e-06,
      "loss": 0.0181,
      "step": 1368420
    },
    {
      "epoch": 2.2394820735387495,
      "grad_norm": 0.1598537266254425,
      "learning_rate": 5.55741518024815e-06,
      "loss": 0.0155,
      "step": 1368440
    },
    {
      "epoch": 2.239514803977403,
      "grad_norm": 0.625436007976532,
      "learning_rate": 5.557349288034634e-06,
      "loss": 0.0212,
      "step": 1368460
    },
    {
      "epoch": 2.2395475344160563,
      "grad_norm": 0.7026407122612,
      "learning_rate": 5.557283395821116e-06,
      "loss": 0.0203,
      "step": 1368480
    },
    {
      "epoch": 2.2395802648547094,
      "grad_norm": 0.20540039241313934,
      "learning_rate": 5.557217503607599e-06,
      "loss": 0.0134,
      "step": 1368500
    },
    {
      "epoch": 2.239612995293363,
      "grad_norm": 0.4972693622112274,
      "learning_rate": 5.557151611394082e-06,
      "loss": 0.0154,
      "step": 1368520
    },
    {
      "epoch": 2.239645725732016,
      "grad_norm": 0.32357701659202576,
      "learning_rate": 5.557085719180565e-06,
      "loss": 0.0122,
      "step": 1368540
    },
    {
      "epoch": 2.2396784561706697,
      "grad_norm": 0.4022291898727417,
      "learning_rate": 5.557019826967047e-06,
      "loss": 0.0128,
      "step": 1368560
    },
    {
      "epoch": 2.239711186609323,
      "grad_norm": 0.17542801797389984,
      "learning_rate": 5.556953934753531e-06,
      "loss": 0.0111,
      "step": 1368580
    },
    {
      "epoch": 2.2397439170479765,
      "grad_norm": 0.5402476191520691,
      "learning_rate": 5.556888042540014e-06,
      "loss": 0.0138,
      "step": 1368600
    },
    {
      "epoch": 2.2397766474866296,
      "grad_norm": 0.5139357447624207,
      "learning_rate": 5.5568221503264965e-06,
      "loss": 0.0179,
      "step": 1368620
    },
    {
      "epoch": 2.239809377925283,
      "grad_norm": 0.09039445966482162,
      "learning_rate": 5.55675625811298e-06,
      "loss": 0.0091,
      "step": 1368640
    },
    {
      "epoch": 2.2398421083639364,
      "grad_norm": 0.2814863324165344,
      "learning_rate": 5.556690365899462e-06,
      "loss": 0.0214,
      "step": 1368660
    },
    {
      "epoch": 2.2398748388025895,
      "grad_norm": 0.8024968504905701,
      "learning_rate": 5.5566244736859455e-06,
      "loss": 0.0202,
      "step": 1368680
    },
    {
      "epoch": 2.239907569241243,
      "grad_norm": 0.1443147212266922,
      "learning_rate": 5.556558581472427e-06,
      "loss": 0.0119,
      "step": 1368700
    },
    {
      "epoch": 2.2399402996798963,
      "grad_norm": 0.463591605424881,
      "learning_rate": 5.556492689258911e-06,
      "loss": 0.0173,
      "step": 1368720
    },
    {
      "epoch": 2.2399730301185494,
      "grad_norm": 0.46228694915771484,
      "learning_rate": 5.556426797045394e-06,
      "loss": 0.0197,
      "step": 1368740
    },
    {
      "epoch": 2.240005760557203,
      "grad_norm": 0.27901512384414673,
      "learning_rate": 5.5563609048318765e-06,
      "loss": 0.0208,
      "step": 1368760
    },
    {
      "epoch": 2.240038490995856,
      "grad_norm": 0.43903765082359314,
      "learning_rate": 5.556295012618359e-06,
      "loss": 0.0154,
      "step": 1368780
    },
    {
      "epoch": 2.2400712214345098,
      "grad_norm": 0.3182676136493683,
      "learning_rate": 5.556229120404843e-06,
      "loss": 0.018,
      "step": 1368800
    },
    {
      "epoch": 2.240103951873163,
      "grad_norm": 0.2699531316757202,
      "learning_rate": 5.556163228191325e-06,
      "loss": 0.0157,
      "step": 1368820
    },
    {
      "epoch": 2.2401366823118165,
      "grad_norm": 0.16947047412395477,
      "learning_rate": 5.556097335977808e-06,
      "loss": 0.0135,
      "step": 1368840
    },
    {
      "epoch": 2.2401694127504697,
      "grad_norm": 0.1898048371076584,
      "learning_rate": 5.55603144376429e-06,
      "loss": 0.015,
      "step": 1368860
    },
    {
      "epoch": 2.240202143189123,
      "grad_norm": 1.3613228797912598,
      "learning_rate": 5.555965551550774e-06,
      "loss": 0.0193,
      "step": 1368880
    },
    {
      "epoch": 2.2402348736277764,
      "grad_norm": 0.3842177987098694,
      "learning_rate": 5.555899659337256e-06,
      "loss": 0.0156,
      "step": 1368900
    },
    {
      "epoch": 2.2402676040664296,
      "grad_norm": 0.2424742877483368,
      "learning_rate": 5.555833767123739e-06,
      "loss": 0.014,
      "step": 1368920
    },
    {
      "epoch": 2.240300334505083,
      "grad_norm": 0.18041449785232544,
      "learning_rate": 5.555767874910223e-06,
      "loss": 0.0187,
      "step": 1368940
    },
    {
      "epoch": 2.2403330649437363,
      "grad_norm": 0.3068874180316925,
      "learning_rate": 5.555701982696705e-06,
      "loss": 0.0093,
      "step": 1368960
    },
    {
      "epoch": 2.24036579538239,
      "grad_norm": 0.26947343349456787,
      "learning_rate": 5.555636090483188e-06,
      "loss": 0.022,
      "step": 1368980
    },
    {
      "epoch": 2.240398525821043,
      "grad_norm": 0.143373504281044,
      "learning_rate": 5.555570198269671e-06,
      "loss": 0.02,
      "step": 1369000
    },
    {
      "epoch": 2.240431256259696,
      "grad_norm": 0.5738389492034912,
      "learning_rate": 5.555504306056154e-06,
      "loss": 0.0155,
      "step": 1369020
    },
    {
      "epoch": 2.24046398669835,
      "grad_norm": 0.4437103271484375,
      "learning_rate": 5.5554384138426365e-06,
      "loss": 0.0153,
      "step": 1369040
    },
    {
      "epoch": 2.240496717137003,
      "grad_norm": 0.1891586184501648,
      "learning_rate": 5.55537252162912e-06,
      "loss": 0.0163,
      "step": 1369060
    },
    {
      "epoch": 2.2405294475756565,
      "grad_norm": 0.16200502216815948,
      "learning_rate": 5.555306629415602e-06,
      "loss": 0.0151,
      "step": 1369080
    },
    {
      "epoch": 2.2405621780143097,
      "grad_norm": 0.18744845688343048,
      "learning_rate": 5.555240737202086e-06,
      "loss": 0.0154,
      "step": 1369100
    },
    {
      "epoch": 2.2405949084529633,
      "grad_norm": 0.13289208710193634,
      "learning_rate": 5.5551748449885675e-06,
      "loss": 0.0125,
      "step": 1369120
    },
    {
      "epoch": 2.2406276388916164,
      "grad_norm": 0.19413483142852783,
      "learning_rate": 5.555108952775051e-06,
      "loss": 0.0205,
      "step": 1369140
    },
    {
      "epoch": 2.2406603693302696,
      "grad_norm": 0.534990131855011,
      "learning_rate": 5.555043060561534e-06,
      "loss": 0.0159,
      "step": 1369160
    },
    {
      "epoch": 2.240693099768923,
      "grad_norm": 0.24873462319374084,
      "learning_rate": 5.5549771683480166e-06,
      "loss": 0.0165,
      "step": 1369180
    },
    {
      "epoch": 2.2407258302075763,
      "grad_norm": 0.2970679700374603,
      "learning_rate": 5.554911276134499e-06,
      "loss": 0.0129,
      "step": 1369200
    },
    {
      "epoch": 2.24075856064623,
      "grad_norm": 0.20759008824825287,
      "learning_rate": 5.554845383920983e-06,
      "loss": 0.01,
      "step": 1369220
    },
    {
      "epoch": 2.240791291084883,
      "grad_norm": 0.2464931160211563,
      "learning_rate": 5.554779491707465e-06,
      "loss": 0.0126,
      "step": 1369240
    },
    {
      "epoch": 2.2408240215235367,
      "grad_norm": 0.21885649859905243,
      "learning_rate": 5.554713599493948e-06,
      "loss": 0.0129,
      "step": 1369260
    },
    {
      "epoch": 2.24085675196219,
      "grad_norm": 0.6754730343818665,
      "learning_rate": 5.554647707280432e-06,
      "loss": 0.0128,
      "step": 1369280
    },
    {
      "epoch": 2.240889482400843,
      "grad_norm": 0.46237388253211975,
      "learning_rate": 5.554581815066914e-06,
      "loss": 0.0121,
      "step": 1369300
    },
    {
      "epoch": 2.2409222128394966,
      "grad_norm": 0.4099934697151184,
      "learning_rate": 5.5545159228533974e-06,
      "loss": 0.0191,
      "step": 1369320
    },
    {
      "epoch": 2.2409549432781497,
      "grad_norm": 0.3085019886493683,
      "learning_rate": 5.554450030639879e-06,
      "loss": 0.0134,
      "step": 1369340
    },
    {
      "epoch": 2.2409876737168033,
      "grad_norm": 0.3572819232940674,
      "learning_rate": 5.554384138426363e-06,
      "loss": 0.0135,
      "step": 1369360
    },
    {
      "epoch": 2.2410204041554564,
      "grad_norm": 0.5502268671989441,
      "learning_rate": 5.554318246212846e-06,
      "loss": 0.011,
      "step": 1369380
    },
    {
      "epoch": 2.24105313459411,
      "grad_norm": 1.0788259506225586,
      "learning_rate": 5.554252353999328e-06,
      "loss": 0.0175,
      "step": 1369400
    },
    {
      "epoch": 2.241085865032763,
      "grad_norm": 0.19093602895736694,
      "learning_rate": 5.554186461785811e-06,
      "loss": 0.013,
      "step": 1369420
    },
    {
      "epoch": 2.2411185954714163,
      "grad_norm": 0.11715865135192871,
      "learning_rate": 5.554120569572295e-06,
      "loss": 0.0223,
      "step": 1369440
    },
    {
      "epoch": 2.24115132591007,
      "grad_norm": 0.7316136360168457,
      "learning_rate": 5.554054677358777e-06,
      "loss": 0.0162,
      "step": 1369460
    },
    {
      "epoch": 2.241184056348723,
      "grad_norm": 0.4438483417034149,
      "learning_rate": 5.55398878514526e-06,
      "loss": 0.0177,
      "step": 1369480
    },
    {
      "epoch": 2.2412167867873767,
      "grad_norm": 0.4215078055858612,
      "learning_rate": 5.553922892931742e-06,
      "loss": 0.0184,
      "step": 1369500
    },
    {
      "epoch": 2.24124951722603,
      "grad_norm": 1.8926829099655151,
      "learning_rate": 5.553857000718226e-06,
      "loss": 0.0159,
      "step": 1369520
    },
    {
      "epoch": 2.2412822476646834,
      "grad_norm": 0.17737090587615967,
      "learning_rate": 5.553791108504708e-06,
      "loss": 0.0118,
      "step": 1369540
    },
    {
      "epoch": 2.2413149781033366,
      "grad_norm": 0.1990639716386795,
      "learning_rate": 5.553725216291191e-06,
      "loss": 0.0166,
      "step": 1369560
    },
    {
      "epoch": 2.2413477085419897,
      "grad_norm": 0.7592589855194092,
      "learning_rate": 5.553659324077674e-06,
      "loss": 0.0215,
      "step": 1369580
    },
    {
      "epoch": 2.2413804389806433,
      "grad_norm": 0.44280731678009033,
      "learning_rate": 5.5535934318641575e-06,
      "loss": 0.0156,
      "step": 1369600
    },
    {
      "epoch": 2.2414131694192965,
      "grad_norm": 0.46366000175476074,
      "learning_rate": 5.553527539650639e-06,
      "loss": 0.0106,
      "step": 1369620
    },
    {
      "epoch": 2.24144589985795,
      "grad_norm": 0.5450249314308167,
      "learning_rate": 5.553461647437123e-06,
      "loss": 0.017,
      "step": 1369640
    },
    {
      "epoch": 2.241478630296603,
      "grad_norm": 0.47205591201782227,
      "learning_rate": 5.5533957552236065e-06,
      "loss": 0.0113,
      "step": 1369660
    },
    {
      "epoch": 2.241511360735257,
      "grad_norm": 0.2596079111099243,
      "learning_rate": 5.5533298630100884e-06,
      "loss": 0.0188,
      "step": 1369680
    },
    {
      "epoch": 2.24154409117391,
      "grad_norm": 0.21656310558319092,
      "learning_rate": 5.553263970796572e-06,
      "loss": 0.0091,
      "step": 1369700
    },
    {
      "epoch": 2.241576821612563,
      "grad_norm": 0.33974990248680115,
      "learning_rate": 5.553198078583054e-06,
      "loss": 0.0163,
      "step": 1369720
    },
    {
      "epoch": 2.2416095520512167,
      "grad_norm": 0.2941841185092926,
      "learning_rate": 5.5531321863695375e-06,
      "loss": 0.0203,
      "step": 1369740
    },
    {
      "epoch": 2.24164228248987,
      "grad_norm": 0.3226776719093323,
      "learning_rate": 5.55306629415602e-06,
      "loss": 0.0195,
      "step": 1369760
    },
    {
      "epoch": 2.2416750129285234,
      "grad_norm": 0.37635427713394165,
      "learning_rate": 5.553000401942503e-06,
      "loss": 0.0162,
      "step": 1369780
    },
    {
      "epoch": 2.2417077433671766,
      "grad_norm": 0.4308629631996155,
      "learning_rate": 5.552934509728986e-06,
      "loss": 0.0153,
      "step": 1369800
    },
    {
      "epoch": 2.2417404738058297,
      "grad_norm": 0.25238171219825745,
      "learning_rate": 5.552868617515469e-06,
      "loss": 0.0188,
      "step": 1369820
    },
    {
      "epoch": 2.2417732042444833,
      "grad_norm": 0.16974572837352753,
      "learning_rate": 5.552802725301951e-06,
      "loss": 0.0128,
      "step": 1369840
    },
    {
      "epoch": 2.2418059346831365,
      "grad_norm": 0.5240010619163513,
      "learning_rate": 5.552736833088435e-06,
      "loss": 0.0129,
      "step": 1369860
    },
    {
      "epoch": 2.24183866512179,
      "grad_norm": 1.2616784572601318,
      "learning_rate": 5.552670940874917e-06,
      "loss": 0.0174,
      "step": 1369880
    },
    {
      "epoch": 2.2418713955604432,
      "grad_norm": 1.4886223077774048,
      "learning_rate": 5.5526050486614e-06,
      "loss": 0.0108,
      "step": 1369900
    },
    {
      "epoch": 2.241904125999097,
      "grad_norm": 0.2934325933456421,
      "learning_rate": 5.552539156447882e-06,
      "loss": 0.0134,
      "step": 1369920
    },
    {
      "epoch": 2.24193685643775,
      "grad_norm": 0.3227444291114807,
      "learning_rate": 5.552473264234366e-06,
      "loss": 0.011,
      "step": 1369940
    },
    {
      "epoch": 2.241969586876403,
      "grad_norm": 0.16541968286037445,
      "learning_rate": 5.5524073720208485e-06,
      "loss": 0.0152,
      "step": 1369960
    },
    {
      "epoch": 2.2420023173150567,
      "grad_norm": 0.8266689777374268,
      "learning_rate": 5.552341479807331e-06,
      "loss": 0.0097,
      "step": 1369980
    },
    {
      "epoch": 2.24203504775371,
      "grad_norm": 0.4462473690509796,
      "learning_rate": 5.552275587593815e-06,
      "loss": 0.0089,
      "step": 1370000
    },
    {
      "epoch": 2.2420677781923635,
      "grad_norm": 0.2574913203716278,
      "learning_rate": 5.5522096953802976e-06,
      "loss": 0.0131,
      "step": 1370020
    },
    {
      "epoch": 2.2421005086310166,
      "grad_norm": 0.899799644947052,
      "learning_rate": 5.55214380316678e-06,
      "loss": 0.0112,
      "step": 1370040
    },
    {
      "epoch": 2.24213323906967,
      "grad_norm": 0.19492454826831818,
      "learning_rate": 5.552077910953263e-06,
      "loss": 0.0154,
      "step": 1370060
    },
    {
      "epoch": 2.2421659695083234,
      "grad_norm": 0.36344584822654724,
      "learning_rate": 5.552012018739747e-06,
      "loss": 0.0195,
      "step": 1370080
    },
    {
      "epoch": 2.2421986999469765,
      "grad_norm": 0.4457133710384369,
      "learning_rate": 5.5519461265262285e-06,
      "loss": 0.0156,
      "step": 1370100
    },
    {
      "epoch": 2.24223143038563,
      "grad_norm": 0.21584537625312805,
      "learning_rate": 5.551880234312712e-06,
      "loss": 0.0112,
      "step": 1370120
    },
    {
      "epoch": 2.2422641608242833,
      "grad_norm": 0.38514941930770874,
      "learning_rate": 5.551814342099194e-06,
      "loss": 0.0132,
      "step": 1370140
    },
    {
      "epoch": 2.242296891262937,
      "grad_norm": 0.12422803044319153,
      "learning_rate": 5.551748449885678e-06,
      "loss": 0.0128,
      "step": 1370160
    },
    {
      "epoch": 2.24232962170159,
      "grad_norm": 1.290986180305481,
      "learning_rate": 5.55168255767216e-06,
      "loss": 0.0223,
      "step": 1370180
    },
    {
      "epoch": 2.242362352140243,
      "grad_norm": 0.39805838465690613,
      "learning_rate": 5.551616665458643e-06,
      "loss": 0.0102,
      "step": 1370200
    },
    {
      "epoch": 2.2423950825788967,
      "grad_norm": 0.8297747373580933,
      "learning_rate": 5.551550773245126e-06,
      "loss": 0.0192,
      "step": 1370220
    },
    {
      "epoch": 2.24242781301755,
      "grad_norm": 0.6087476015090942,
      "learning_rate": 5.551484881031609e-06,
      "loss": 0.0162,
      "step": 1370240
    },
    {
      "epoch": 2.2424605434562035,
      "grad_norm": 0.43026232719421387,
      "learning_rate": 5.551418988818091e-06,
      "loss": 0.0154,
      "step": 1370260
    },
    {
      "epoch": 2.2424932738948566,
      "grad_norm": 0.10827519744634628,
      "learning_rate": 5.551353096604575e-06,
      "loss": 0.011,
      "step": 1370280
    },
    {
      "epoch": 2.2425260043335102,
      "grad_norm": 0.09072006493806839,
      "learning_rate": 5.551287204391057e-06,
      "loss": 0.016,
      "step": 1370300
    },
    {
      "epoch": 2.2425587347721634,
      "grad_norm": 0.6658370494842529,
      "learning_rate": 5.55122131217754e-06,
      "loss": 0.0125,
      "step": 1370320
    },
    {
      "epoch": 2.2425914652108165,
      "grad_norm": 0.33811160922050476,
      "learning_rate": 5.551155419964024e-06,
      "loss": 0.0134,
      "step": 1370340
    },
    {
      "epoch": 2.24262419564947,
      "grad_norm": 0.572025716304779,
      "learning_rate": 5.551089527750506e-06,
      "loss": 0.0155,
      "step": 1370360
    },
    {
      "epoch": 2.2426569260881233,
      "grad_norm": 0.4612942636013031,
      "learning_rate": 5.551023635536989e-06,
      "loss": 0.011,
      "step": 1370380
    },
    {
      "epoch": 2.242689656526777,
      "grad_norm": 0.10952787846326828,
      "learning_rate": 5.550957743323472e-06,
      "loss": 0.013,
      "step": 1370400
    },
    {
      "epoch": 2.24272238696543,
      "grad_norm": 0.22058331966400146,
      "learning_rate": 5.550891851109955e-06,
      "loss": 0.01,
      "step": 1370420
    },
    {
      "epoch": 2.2427551174040836,
      "grad_norm": 0.40677738189697266,
      "learning_rate": 5.550825958896438e-06,
      "loss": 0.0234,
      "step": 1370440
    },
    {
      "epoch": 2.2427878478427368,
      "grad_norm": 0.5359326601028442,
      "learning_rate": 5.550760066682921e-06,
      "loss": 0.0204,
      "step": 1370460
    },
    {
      "epoch": 2.24282057828139,
      "grad_norm": 1.0521358251571655,
      "learning_rate": 5.550694174469403e-06,
      "loss": 0.0162,
      "step": 1370480
    },
    {
      "epoch": 2.2428533087200435,
      "grad_norm": 0.42224568128585815,
      "learning_rate": 5.550628282255887e-06,
      "loss": 0.0118,
      "step": 1370500
    },
    {
      "epoch": 2.2428860391586967,
      "grad_norm": 0.9011918306350708,
      "learning_rate": 5.550562390042369e-06,
      "loss": 0.011,
      "step": 1370520
    },
    {
      "epoch": 2.2429187695973503,
      "grad_norm": 0.44603127241134644,
      "learning_rate": 5.550496497828852e-06,
      "loss": 0.018,
      "step": 1370540
    },
    {
      "epoch": 2.2429515000360034,
      "grad_norm": 1.4137425422668457,
      "learning_rate": 5.550430605615335e-06,
      "loss": 0.0184,
      "step": 1370560
    },
    {
      "epoch": 2.242984230474657,
      "grad_norm": 0.6382260918617249,
      "learning_rate": 5.550364713401818e-06,
      "loss": 0.0136,
      "step": 1370580
    },
    {
      "epoch": 2.24301696091331,
      "grad_norm": 0.2649935483932495,
      "learning_rate": 5.5502988211883e-06,
      "loss": 0.0147,
      "step": 1370600
    },
    {
      "epoch": 2.2430496913519633,
      "grad_norm": 0.7250300645828247,
      "learning_rate": 5.550232928974784e-06,
      "loss": 0.0196,
      "step": 1370620
    },
    {
      "epoch": 2.243082421790617,
      "grad_norm": 0.3279968202114105,
      "learning_rate": 5.550167036761266e-06,
      "loss": 0.0163,
      "step": 1370640
    },
    {
      "epoch": 2.24311515222927,
      "grad_norm": 0.1320665031671524,
      "learning_rate": 5.5501011445477495e-06,
      "loss": 0.0107,
      "step": 1370660
    },
    {
      "epoch": 2.2431478826679236,
      "grad_norm": 0.4755939841270447,
      "learning_rate": 5.550035252334231e-06,
      "loss": 0.0187,
      "step": 1370680
    },
    {
      "epoch": 2.243180613106577,
      "grad_norm": 0.32551753520965576,
      "learning_rate": 5.549969360120715e-06,
      "loss": 0.0121,
      "step": 1370700
    },
    {
      "epoch": 2.2432133435452304,
      "grad_norm": 0.2682018578052521,
      "learning_rate": 5.5499034679071985e-06,
      "loss": 0.0116,
      "step": 1370720
    },
    {
      "epoch": 2.2432460739838835,
      "grad_norm": 0.8226937055587769,
      "learning_rate": 5.54983757569368e-06,
      "loss": 0.0104,
      "step": 1370740
    },
    {
      "epoch": 2.2432788044225367,
      "grad_norm": 0.1886703222990036,
      "learning_rate": 5.549771683480164e-06,
      "loss": 0.0119,
      "step": 1370760
    },
    {
      "epoch": 2.2433115348611903,
      "grad_norm": 0.32171306014060974,
      "learning_rate": 5.549705791266647e-06,
      "loss": 0.023,
      "step": 1370780
    },
    {
      "epoch": 2.2433442652998434,
      "grad_norm": 0.148208349943161,
      "learning_rate": 5.5496398990531295e-06,
      "loss": 0.0113,
      "step": 1370800
    },
    {
      "epoch": 2.243376995738497,
      "grad_norm": 0.16272810101509094,
      "learning_rate": 5.549574006839612e-06,
      "loss": 0.0149,
      "step": 1370820
    },
    {
      "epoch": 2.24340972617715,
      "grad_norm": 0.2353217601776123,
      "learning_rate": 5.549508114626096e-06,
      "loss": 0.0136,
      "step": 1370840
    },
    {
      "epoch": 2.2434424566158038,
      "grad_norm": 0.4547608494758606,
      "learning_rate": 5.549442222412578e-06,
      "loss": 0.0131,
      "step": 1370860
    },
    {
      "epoch": 2.243475187054457,
      "grad_norm": 0.25022149085998535,
      "learning_rate": 5.549376330199061e-06,
      "loss": 0.0139,
      "step": 1370880
    },
    {
      "epoch": 2.24350791749311,
      "grad_norm": 0.11888650804758072,
      "learning_rate": 5.549310437985543e-06,
      "loss": 0.0112,
      "step": 1370900
    },
    {
      "epoch": 2.2435406479317637,
      "grad_norm": 0.4031195640563965,
      "learning_rate": 5.549244545772027e-06,
      "loss": 0.02,
      "step": 1370920
    },
    {
      "epoch": 2.243573378370417,
      "grad_norm": 0.6164426803588867,
      "learning_rate": 5.549178653558509e-06,
      "loss": 0.0158,
      "step": 1370940
    },
    {
      "epoch": 2.2436061088090704,
      "grad_norm": 0.4499327540397644,
      "learning_rate": 5.549112761344992e-06,
      "loss": 0.0119,
      "step": 1370960
    },
    {
      "epoch": 2.2436388392477236,
      "grad_norm": 0.3762168288230896,
      "learning_rate": 5.549046869131475e-06,
      "loss": 0.0281,
      "step": 1370980
    },
    {
      "epoch": 2.243671569686377,
      "grad_norm": 1.350352168083191,
      "learning_rate": 5.548980976917958e-06,
      "loss": 0.0101,
      "step": 1371000
    },
    {
      "epoch": 2.2437043001250303,
      "grad_norm": 0.3216637969017029,
      "learning_rate": 5.5489150847044405e-06,
      "loss": 0.0143,
      "step": 1371020
    },
    {
      "epoch": 2.2437370305636835,
      "grad_norm": 0.36976513266563416,
      "learning_rate": 5.548849192490924e-06,
      "loss": 0.0171,
      "step": 1371040
    },
    {
      "epoch": 2.243769761002337,
      "grad_norm": 0.2001013308763504,
      "learning_rate": 5.548783300277407e-06,
      "loss": 0.0149,
      "step": 1371060
    },
    {
      "epoch": 2.24380249144099,
      "grad_norm": 0.2451365739107132,
      "learning_rate": 5.5487174080638895e-06,
      "loss": 0.0155,
      "step": 1371080
    },
    {
      "epoch": 2.243835221879644,
      "grad_norm": 0.8426625728607178,
      "learning_rate": 5.548651515850373e-06,
      "loss": 0.0133,
      "step": 1371100
    },
    {
      "epoch": 2.243867952318297,
      "grad_norm": 0.981167733669281,
      "learning_rate": 5.548585623636855e-06,
      "loss": 0.0225,
      "step": 1371120
    },
    {
      "epoch": 2.2439006827569505,
      "grad_norm": 0.14518408477306366,
      "learning_rate": 5.548519731423339e-06,
      "loss": 0.0143,
      "step": 1371140
    },
    {
      "epoch": 2.2439334131956037,
      "grad_norm": 0.08436454087495804,
      "learning_rate": 5.5484538392098205e-06,
      "loss": 0.0135,
      "step": 1371160
    },
    {
      "epoch": 2.243966143634257,
      "grad_norm": 0.2596355676651001,
      "learning_rate": 5.548387946996304e-06,
      "loss": 0.0136,
      "step": 1371180
    },
    {
      "epoch": 2.2439988740729104,
      "grad_norm": 1.1689202785491943,
      "learning_rate": 5.548322054782787e-06,
      "loss": 0.0184,
      "step": 1371200
    },
    {
      "epoch": 2.2440316045115636,
      "grad_norm": 0.16716419160366058,
      "learning_rate": 5.5482561625692696e-06,
      "loss": 0.0111,
      "step": 1371220
    },
    {
      "epoch": 2.244064334950217,
      "grad_norm": 0.2820802330970764,
      "learning_rate": 5.548190270355752e-06,
      "loss": 0.0132,
      "step": 1371240
    },
    {
      "epoch": 2.2440970653888703,
      "grad_norm": 0.3231707513332367,
      "learning_rate": 5.548124378142236e-06,
      "loss": 0.0187,
      "step": 1371260
    },
    {
      "epoch": 2.2441297958275235,
      "grad_norm": 0.6122592091560364,
      "learning_rate": 5.548058485928718e-06,
      "loss": 0.0151,
      "step": 1371280
    },
    {
      "epoch": 2.244162526266177,
      "grad_norm": 0.5807884335517883,
      "learning_rate": 5.547992593715201e-06,
      "loss": 0.0186,
      "step": 1371300
    },
    {
      "epoch": 2.24419525670483,
      "grad_norm": 0.26362812519073486,
      "learning_rate": 5.547926701501683e-06,
      "loss": 0.012,
      "step": 1371320
    },
    {
      "epoch": 2.244227987143484,
      "grad_norm": 0.5738351941108704,
      "learning_rate": 5.547860809288167e-06,
      "loss": 0.0159,
      "step": 1371340
    },
    {
      "epoch": 2.244260717582137,
      "grad_norm": 0.9317950010299683,
      "learning_rate": 5.54779491707465e-06,
      "loss": 0.0114,
      "step": 1371360
    },
    {
      "epoch": 2.2442934480207906,
      "grad_norm": 0.2744489908218384,
      "learning_rate": 5.547729024861132e-06,
      "loss": 0.0138,
      "step": 1371380
    },
    {
      "epoch": 2.2443261784594437,
      "grad_norm": 0.6425621509552002,
      "learning_rate": 5.547663132647616e-06,
      "loss": 0.0191,
      "step": 1371400
    },
    {
      "epoch": 2.244358908898097,
      "grad_norm": 0.2819529175758362,
      "learning_rate": 5.547597240434099e-06,
      "loss": 0.0285,
      "step": 1371420
    },
    {
      "epoch": 2.2443916393367505,
      "grad_norm": 0.40998125076293945,
      "learning_rate": 5.547531348220581e-06,
      "loss": 0.0168,
      "step": 1371440
    },
    {
      "epoch": 2.2444243697754036,
      "grad_norm": 0.5964175462722778,
      "learning_rate": 5.547465456007064e-06,
      "loss": 0.0217,
      "step": 1371460
    },
    {
      "epoch": 2.244457100214057,
      "grad_norm": 0.5446300506591797,
      "learning_rate": 5.547399563793548e-06,
      "loss": 0.0142,
      "step": 1371480
    },
    {
      "epoch": 2.2444898306527103,
      "grad_norm": 0.09256575256586075,
      "learning_rate": 5.54733367158003e-06,
      "loss": 0.011,
      "step": 1371500
    },
    {
      "epoch": 2.244522561091364,
      "grad_norm": 0.4397875666618347,
      "learning_rate": 5.547267779366513e-06,
      "loss": 0.0158,
      "step": 1371520
    },
    {
      "epoch": 2.244555291530017,
      "grad_norm": 0.3806670010089874,
      "learning_rate": 5.547201887152995e-06,
      "loss": 0.0222,
      "step": 1371540
    },
    {
      "epoch": 2.2445880219686702,
      "grad_norm": 0.2810581624507904,
      "learning_rate": 5.547135994939479e-06,
      "loss": 0.02,
      "step": 1371560
    },
    {
      "epoch": 2.244620752407324,
      "grad_norm": 0.5921229720115662,
      "learning_rate": 5.547070102725961e-06,
      "loss": 0.0132,
      "step": 1371580
    },
    {
      "epoch": 2.244653482845977,
      "grad_norm": 0.2874227464199066,
      "learning_rate": 5.547004210512444e-06,
      "loss": 0.0163,
      "step": 1371600
    },
    {
      "epoch": 2.2446862132846306,
      "grad_norm": 0.37866929173469543,
      "learning_rate": 5.546938318298927e-06,
      "loss": 0.0125,
      "step": 1371620
    },
    {
      "epoch": 2.2447189437232837,
      "grad_norm": 0.09721409529447556,
      "learning_rate": 5.5468724260854105e-06,
      "loss": 0.0114,
      "step": 1371640
    },
    {
      "epoch": 2.244751674161937,
      "grad_norm": 0.2826786935329437,
      "learning_rate": 5.546806533871892e-06,
      "loss": 0.0171,
      "step": 1371660
    },
    {
      "epoch": 2.2447844046005905,
      "grad_norm": 0.15486986935138702,
      "learning_rate": 5.546740641658376e-06,
      "loss": 0.0183,
      "step": 1371680
    },
    {
      "epoch": 2.2448171350392436,
      "grad_norm": 0.4391692876815796,
      "learning_rate": 5.546674749444858e-06,
      "loss": 0.0159,
      "step": 1371700
    },
    {
      "epoch": 2.244849865477897,
      "grad_norm": 0.08992402255535126,
      "learning_rate": 5.5466088572313414e-06,
      "loss": 0.0167,
      "step": 1371720
    },
    {
      "epoch": 2.2448825959165504,
      "grad_norm": 0.6847670674324036,
      "learning_rate": 5.546542965017823e-06,
      "loss": 0.0164,
      "step": 1371740
    },
    {
      "epoch": 2.244915326355204,
      "grad_norm": 0.4841347634792328,
      "learning_rate": 5.546477072804307e-06,
      "loss": 0.0135,
      "step": 1371760
    },
    {
      "epoch": 2.244948056793857,
      "grad_norm": 0.6125823259353638,
      "learning_rate": 5.5464111805907905e-06,
      "loss": 0.0142,
      "step": 1371780
    },
    {
      "epoch": 2.2449807872325103,
      "grad_norm": 0.2371985912322998,
      "learning_rate": 5.546345288377272e-06,
      "loss": 0.0199,
      "step": 1371800
    },
    {
      "epoch": 2.245013517671164,
      "grad_norm": 0.09294547140598297,
      "learning_rate": 5.546279396163756e-06,
      "loss": 0.0175,
      "step": 1371820
    },
    {
      "epoch": 2.245046248109817,
      "grad_norm": 0.27595776319503784,
      "learning_rate": 5.546213503950239e-06,
      "loss": 0.0104,
      "step": 1371840
    },
    {
      "epoch": 2.2450789785484706,
      "grad_norm": 0.1299019604921341,
      "learning_rate": 5.5461476117367215e-06,
      "loss": 0.0119,
      "step": 1371860
    },
    {
      "epoch": 2.2451117089871238,
      "grad_norm": 0.33052030205726624,
      "learning_rate": 5.546081719523204e-06,
      "loss": 0.0148,
      "step": 1371880
    },
    {
      "epoch": 2.2451444394257773,
      "grad_norm": 0.6882138252258301,
      "learning_rate": 5.546015827309688e-06,
      "loss": 0.0267,
      "step": 1371900
    },
    {
      "epoch": 2.2451771698644305,
      "grad_norm": 0.21327345073223114,
      "learning_rate": 5.54594993509617e-06,
      "loss": 0.0151,
      "step": 1371920
    },
    {
      "epoch": 2.2452099003030836,
      "grad_norm": 0.3617042601108551,
      "learning_rate": 5.545884042882653e-06,
      "loss": 0.0185,
      "step": 1371940
    },
    {
      "epoch": 2.2452426307417372,
      "grad_norm": 0.1413353979587555,
      "learning_rate": 5.545818150669135e-06,
      "loss": 0.0126,
      "step": 1371960
    },
    {
      "epoch": 2.2452753611803904,
      "grad_norm": 0.09514671564102173,
      "learning_rate": 5.545752258455619e-06,
      "loss": 0.0129,
      "step": 1371980
    },
    {
      "epoch": 2.245308091619044,
      "grad_norm": 0.5672299265861511,
      "learning_rate": 5.5456863662421015e-06,
      "loss": 0.0142,
      "step": 1372000
    },
    {
      "epoch": 2.245340822057697,
      "grad_norm": 0.3111478388309479,
      "learning_rate": 5.545620474028584e-06,
      "loss": 0.0149,
      "step": 1372020
    },
    {
      "epoch": 2.2453735524963507,
      "grad_norm": 0.140813410282135,
      "learning_rate": 5.545554581815067e-06,
      "loss": 0.0097,
      "step": 1372040
    },
    {
      "epoch": 2.245406282935004,
      "grad_norm": 0.33804622292518616,
      "learning_rate": 5.5454886896015506e-06,
      "loss": 0.0178,
      "step": 1372060
    },
    {
      "epoch": 2.245439013373657,
      "grad_norm": 0.8139005303382874,
      "learning_rate": 5.5454227973880325e-06,
      "loss": 0.0122,
      "step": 1372080
    },
    {
      "epoch": 2.2454717438123106,
      "grad_norm": 0.09812275320291519,
      "learning_rate": 5.545356905174516e-06,
      "loss": 0.0197,
      "step": 1372100
    },
    {
      "epoch": 2.2455044742509638,
      "grad_norm": 0.4623725116252899,
      "learning_rate": 5.545291012961e-06,
      "loss": 0.0168,
      "step": 1372120
    },
    {
      "epoch": 2.2455372046896174,
      "grad_norm": 1.3745536804199219,
      "learning_rate": 5.5452251207474815e-06,
      "loss": 0.0118,
      "step": 1372140
    },
    {
      "epoch": 2.2455699351282705,
      "grad_norm": 0.3207639455795288,
      "learning_rate": 5.545159228533965e-06,
      "loss": 0.0126,
      "step": 1372160
    },
    {
      "epoch": 2.245602665566924,
      "grad_norm": 0.18473263084888458,
      "learning_rate": 5.545093336320447e-06,
      "loss": 0.0231,
      "step": 1372180
    },
    {
      "epoch": 2.2456353960055773,
      "grad_norm": 0.5247711539268494,
      "learning_rate": 5.545027444106931e-06,
      "loss": 0.0094,
      "step": 1372200
    },
    {
      "epoch": 2.2456681264442304,
      "grad_norm": 0.29001086950302124,
      "learning_rate": 5.544961551893413e-06,
      "loss": 0.02,
      "step": 1372220
    },
    {
      "epoch": 2.245700856882884,
      "grad_norm": 0.1531447470188141,
      "learning_rate": 5.544895659679896e-06,
      "loss": 0.0143,
      "step": 1372240
    },
    {
      "epoch": 2.245733587321537,
      "grad_norm": 2.2574589252471924,
      "learning_rate": 5.544829767466379e-06,
      "loss": 0.0201,
      "step": 1372260
    },
    {
      "epoch": 2.2457663177601908,
      "grad_norm": 0.4028722941875458,
      "learning_rate": 5.544763875252862e-06,
      "loss": 0.0163,
      "step": 1372280
    },
    {
      "epoch": 2.245799048198844,
      "grad_norm": 0.08028844743967056,
      "learning_rate": 5.544697983039344e-06,
      "loss": 0.0076,
      "step": 1372300
    },
    {
      "epoch": 2.2458317786374975,
      "grad_norm": 0.12957757711410522,
      "learning_rate": 5.544632090825828e-06,
      "loss": 0.0214,
      "step": 1372320
    },
    {
      "epoch": 2.2458645090761507,
      "grad_norm": 0.6093565225601196,
      "learning_rate": 5.54456619861231e-06,
      "loss": 0.0107,
      "step": 1372340
    },
    {
      "epoch": 2.245897239514804,
      "grad_norm": 0.33197999000549316,
      "learning_rate": 5.544500306398793e-06,
      "loss": 0.0164,
      "step": 1372360
    },
    {
      "epoch": 2.2459299699534574,
      "grad_norm": 0.27349644899368286,
      "learning_rate": 5.544434414185276e-06,
      "loss": 0.0139,
      "step": 1372380
    },
    {
      "epoch": 2.2459627003921105,
      "grad_norm": 0.1535354107618332,
      "learning_rate": 5.544368521971759e-06,
      "loss": 0.0139,
      "step": 1372400
    },
    {
      "epoch": 2.245995430830764,
      "grad_norm": 0.38131463527679443,
      "learning_rate": 5.5443026297582416e-06,
      "loss": 0.0169,
      "step": 1372420
    },
    {
      "epoch": 2.2460281612694173,
      "grad_norm": 0.607406497001648,
      "learning_rate": 5.544236737544725e-06,
      "loss": 0.0172,
      "step": 1372440
    },
    {
      "epoch": 2.246060891708071,
      "grad_norm": 0.2320462018251419,
      "learning_rate": 5.544170845331208e-06,
      "loss": 0.0146,
      "step": 1372460
    },
    {
      "epoch": 2.246093622146724,
      "grad_norm": 0.8360798954963684,
      "learning_rate": 5.544104953117691e-06,
      "loss": 0.0194,
      "step": 1372480
    },
    {
      "epoch": 2.246126352585377,
      "grad_norm": 0.71949702501297,
      "learning_rate": 5.544039060904174e-06,
      "loss": 0.014,
      "step": 1372500
    },
    {
      "epoch": 2.246159083024031,
      "grad_norm": 0.14672359824180603,
      "learning_rate": 5.543973168690656e-06,
      "loss": 0.0139,
      "step": 1372520
    },
    {
      "epoch": 2.246191813462684,
      "grad_norm": 0.23145198822021484,
      "learning_rate": 5.54390727647714e-06,
      "loss": 0.0118,
      "step": 1372540
    },
    {
      "epoch": 2.2462245439013375,
      "grad_norm": 0.4557618200778961,
      "learning_rate": 5.543841384263622e-06,
      "loss": 0.0156,
      "step": 1372560
    },
    {
      "epoch": 2.2462572743399907,
      "grad_norm": 0.5730674266815186,
      "learning_rate": 5.543775492050105e-06,
      "loss": 0.0123,
      "step": 1372580
    },
    {
      "epoch": 2.2462900047786443,
      "grad_norm": 0.6463748216629028,
      "learning_rate": 5.543709599836588e-06,
      "loss": 0.0115,
      "step": 1372600
    },
    {
      "epoch": 2.2463227352172974,
      "grad_norm": 0.12473877519369125,
      "learning_rate": 5.543643707623071e-06,
      "loss": 0.0174,
      "step": 1372620
    },
    {
      "epoch": 2.2463554656559506,
      "grad_norm": 0.32578229904174805,
      "learning_rate": 5.543577815409553e-06,
      "loss": 0.0126,
      "step": 1372640
    },
    {
      "epoch": 2.246388196094604,
      "grad_norm": 0.2163025587797165,
      "learning_rate": 5.543511923196037e-06,
      "loss": 0.0233,
      "step": 1372660
    },
    {
      "epoch": 2.2464209265332573,
      "grad_norm": 0.17987971007823944,
      "learning_rate": 5.543446030982519e-06,
      "loss": 0.01,
      "step": 1372680
    },
    {
      "epoch": 2.246453656971911,
      "grad_norm": 0.7078844904899597,
      "learning_rate": 5.5433801387690025e-06,
      "loss": 0.0189,
      "step": 1372700
    },
    {
      "epoch": 2.246486387410564,
      "grad_norm": 0.19531364738941193,
      "learning_rate": 5.543314246555484e-06,
      "loss": 0.0145,
      "step": 1372720
    },
    {
      "epoch": 2.2465191178492177,
      "grad_norm": 0.3057789206504822,
      "learning_rate": 5.543248354341968e-06,
      "loss": 0.015,
      "step": 1372740
    },
    {
      "epoch": 2.246551848287871,
      "grad_norm": 0.4566376507282257,
      "learning_rate": 5.54318246212845e-06,
      "loss": 0.0221,
      "step": 1372760
    },
    {
      "epoch": 2.246584578726524,
      "grad_norm": 0.21736156940460205,
      "learning_rate": 5.5431165699149334e-06,
      "loss": 0.0164,
      "step": 1372780
    },
    {
      "epoch": 2.2466173091651775,
      "grad_norm": 0.2554379105567932,
      "learning_rate": 5.543050677701417e-06,
      "loss": 0.0145,
      "step": 1372800
    },
    {
      "epoch": 2.2466500396038307,
      "grad_norm": 0.7865273952484131,
      "learning_rate": 5.542984785487899e-06,
      "loss": 0.0166,
      "step": 1372820
    },
    {
      "epoch": 2.2466827700424843,
      "grad_norm": 0.40859556198120117,
      "learning_rate": 5.5429188932743825e-06,
      "loss": 0.0124,
      "step": 1372840
    },
    {
      "epoch": 2.2467155004811374,
      "grad_norm": 0.19731296598911285,
      "learning_rate": 5.542853001060865e-06,
      "loss": 0.0129,
      "step": 1372860
    },
    {
      "epoch": 2.2467482309197906,
      "grad_norm": 0.08112208545207977,
      "learning_rate": 5.542787108847348e-06,
      "loss": 0.015,
      "step": 1372880
    },
    {
      "epoch": 2.246780961358444,
      "grad_norm": 0.8806554079055786,
      "learning_rate": 5.542721216633831e-06,
      "loss": 0.0204,
      "step": 1372900
    },
    {
      "epoch": 2.2468136917970973,
      "grad_norm": 0.43811309337615967,
      "learning_rate": 5.542655324420314e-06,
      "loss": 0.0147,
      "step": 1372920
    },
    {
      "epoch": 2.246846422235751,
      "grad_norm": 0.1746099591255188,
      "learning_rate": 5.542589432206796e-06,
      "loss": 0.0085,
      "step": 1372940
    },
    {
      "epoch": 2.246879152674404,
      "grad_norm": 0.7706158757209778,
      "learning_rate": 5.54252353999328e-06,
      "loss": 0.0116,
      "step": 1372960
    },
    {
      "epoch": 2.2469118831130577,
      "grad_norm": 0.3914346396923065,
      "learning_rate": 5.542457647779762e-06,
      "loss": 0.0169,
      "step": 1372980
    },
    {
      "epoch": 2.246944613551711,
      "grad_norm": 0.21905876696109772,
      "learning_rate": 5.542391755566245e-06,
      "loss": 0.0215,
      "step": 1373000
    },
    {
      "epoch": 2.246977343990364,
      "grad_norm": 0.8106611967086792,
      "learning_rate": 5.542325863352728e-06,
      "loss": 0.0108,
      "step": 1373020
    },
    {
      "epoch": 2.2470100744290176,
      "grad_norm": 0.6866719126701355,
      "learning_rate": 5.542259971139211e-06,
      "loss": 0.0179,
      "step": 1373040
    },
    {
      "epoch": 2.2470428048676707,
      "grad_norm": 0.40489259362220764,
      "learning_rate": 5.5421940789256935e-06,
      "loss": 0.0177,
      "step": 1373060
    },
    {
      "epoch": 2.2470755353063243,
      "grad_norm": 1.0069639682769775,
      "learning_rate": 5.542128186712177e-06,
      "loss": 0.0172,
      "step": 1373080
    },
    {
      "epoch": 2.2471082657449775,
      "grad_norm": 0.039173103868961334,
      "learning_rate": 5.542062294498659e-06,
      "loss": 0.0141,
      "step": 1373100
    },
    {
      "epoch": 2.247140996183631,
      "grad_norm": 0.9856888055801392,
      "learning_rate": 5.5419964022851425e-06,
      "loss": 0.0145,
      "step": 1373120
    },
    {
      "epoch": 2.247173726622284,
      "grad_norm": 0.5044216513633728,
      "learning_rate": 5.5419305100716244e-06,
      "loss": 0.0127,
      "step": 1373140
    },
    {
      "epoch": 2.2472064570609374,
      "grad_norm": 1.1506638526916504,
      "learning_rate": 5.541864617858108e-06,
      "loss": 0.0287,
      "step": 1373160
    },
    {
      "epoch": 2.247239187499591,
      "grad_norm": 0.3274185061454773,
      "learning_rate": 5.541798725644592e-06,
      "loss": 0.0229,
      "step": 1373180
    },
    {
      "epoch": 2.247271917938244,
      "grad_norm": 0.08760059624910355,
      "learning_rate": 5.5417328334310735e-06,
      "loss": 0.0156,
      "step": 1373200
    },
    {
      "epoch": 2.2473046483768977,
      "grad_norm": 0.27319011092185974,
      "learning_rate": 5.541666941217557e-06,
      "loss": 0.007,
      "step": 1373220
    },
    {
      "epoch": 2.247337378815551,
      "grad_norm": 0.23563596606254578,
      "learning_rate": 5.54160104900404e-06,
      "loss": 0.0146,
      "step": 1373240
    },
    {
      "epoch": 2.247370109254204,
      "grad_norm": 0.04872674122452736,
      "learning_rate": 5.5415351567905226e-06,
      "loss": 0.0141,
      "step": 1373260
    },
    {
      "epoch": 2.2474028396928576,
      "grad_norm": 0.4205869138240814,
      "learning_rate": 5.541469264577005e-06,
      "loss": 0.0135,
      "step": 1373280
    },
    {
      "epoch": 2.2474355701315107,
      "grad_norm": 0.4493781626224518,
      "learning_rate": 5.541403372363489e-06,
      "loss": 0.0163,
      "step": 1373300
    },
    {
      "epoch": 2.2474683005701643,
      "grad_norm": 0.3400125205516815,
      "learning_rate": 5.541337480149971e-06,
      "loss": 0.0149,
      "step": 1373320
    },
    {
      "epoch": 2.2475010310088175,
      "grad_norm": 0.46453917026519775,
      "learning_rate": 5.541271587936454e-06,
      "loss": 0.0185,
      "step": 1373340
    },
    {
      "epoch": 2.247533761447471,
      "grad_norm": 1.2443538904190063,
      "learning_rate": 5.541205695722936e-06,
      "loss": 0.0204,
      "step": 1373360
    },
    {
      "epoch": 2.2475664918861242,
      "grad_norm": 0.281111478805542,
      "learning_rate": 5.54113980350942e-06,
      "loss": 0.0177,
      "step": 1373380
    },
    {
      "epoch": 2.2475992223247774,
      "grad_norm": 0.43739497661590576,
      "learning_rate": 5.541073911295903e-06,
      "loss": 0.0132,
      "step": 1373400
    },
    {
      "epoch": 2.247631952763431,
      "grad_norm": 0.2689248323440552,
      "learning_rate": 5.541008019082385e-06,
      "loss": 0.0185,
      "step": 1373420
    },
    {
      "epoch": 2.247664683202084,
      "grad_norm": 0.3144737780094147,
      "learning_rate": 5.540942126868868e-06,
      "loss": 0.0131,
      "step": 1373440
    },
    {
      "epoch": 2.2476974136407377,
      "grad_norm": 0.08607574552297592,
      "learning_rate": 5.540876234655352e-06,
      "loss": 0.0169,
      "step": 1373460
    },
    {
      "epoch": 2.247730144079391,
      "grad_norm": 0.41504383087158203,
      "learning_rate": 5.5408103424418336e-06,
      "loss": 0.0201,
      "step": 1373480
    },
    {
      "epoch": 2.2477628745180445,
      "grad_norm": 6.807074546813965,
      "learning_rate": 5.540744450228317e-06,
      "loss": 0.0148,
      "step": 1373500
    },
    {
      "epoch": 2.2477956049566976,
      "grad_norm": 0.25507235527038574,
      "learning_rate": 5.540678558014801e-06,
      "loss": 0.0154,
      "step": 1373520
    },
    {
      "epoch": 2.2478283353953508,
      "grad_norm": 0.2080191820859909,
      "learning_rate": 5.540612665801283e-06,
      "loss": 0.0128,
      "step": 1373540
    },
    {
      "epoch": 2.2478610658340044,
      "grad_norm": 0.17443034052848816,
      "learning_rate": 5.540546773587766e-06,
      "loss": 0.0117,
      "step": 1373560
    },
    {
      "epoch": 2.2478937962726575,
      "grad_norm": 0.18914492428302765,
      "learning_rate": 5.540480881374248e-06,
      "loss": 0.0158,
      "step": 1373580
    },
    {
      "epoch": 2.247926526711311,
      "grad_norm": 0.8268307447433472,
      "learning_rate": 5.540414989160732e-06,
      "loss": 0.0143,
      "step": 1373600
    },
    {
      "epoch": 2.2479592571499643,
      "grad_norm": 0.34128305315971375,
      "learning_rate": 5.5403490969472144e-06,
      "loss": 0.0174,
      "step": 1373620
    },
    {
      "epoch": 2.247991987588618,
      "grad_norm": 0.6194258332252502,
      "learning_rate": 5.540283204733697e-06,
      "loss": 0.0166,
      "step": 1373640
    },
    {
      "epoch": 2.248024718027271,
      "grad_norm": 0.19398923218250275,
      "learning_rate": 5.54021731252018e-06,
      "loss": 0.0142,
      "step": 1373660
    },
    {
      "epoch": 2.248057448465924,
      "grad_norm": 0.3153376877307892,
      "learning_rate": 5.5401514203066635e-06,
      "loss": 0.015,
      "step": 1373680
    },
    {
      "epoch": 2.2480901789045777,
      "grad_norm": 0.5697495341300964,
      "learning_rate": 5.540085528093145e-06,
      "loss": 0.0187,
      "step": 1373700
    },
    {
      "epoch": 2.248122909343231,
      "grad_norm": 0.41629302501678467,
      "learning_rate": 5.540019635879629e-06,
      "loss": 0.0211,
      "step": 1373720
    },
    {
      "epoch": 2.2481556397818845,
      "grad_norm": 0.21843701601028442,
      "learning_rate": 5.539953743666111e-06,
      "loss": 0.0196,
      "step": 1373740
    },
    {
      "epoch": 2.2481883702205376,
      "grad_norm": 0.4790377914905548,
      "learning_rate": 5.5398878514525945e-06,
      "loss": 0.0157,
      "step": 1373760
    },
    {
      "epoch": 2.2482211006591912,
      "grad_norm": 0.7155584692955017,
      "learning_rate": 5.539821959239076e-06,
      "loss": 0.0162,
      "step": 1373780
    },
    {
      "epoch": 2.2482538310978444,
      "grad_norm": 0.5669248104095459,
      "learning_rate": 5.53975606702556e-06,
      "loss": 0.0145,
      "step": 1373800
    },
    {
      "epoch": 2.2482865615364975,
      "grad_norm": 0.1085522249341011,
      "learning_rate": 5.539690174812043e-06,
      "loss": 0.0093,
      "step": 1373820
    },
    {
      "epoch": 2.248319291975151,
      "grad_norm": 0.3116289973258972,
      "learning_rate": 5.539624282598525e-06,
      "loss": 0.0147,
      "step": 1373840
    },
    {
      "epoch": 2.2483520224138043,
      "grad_norm": 0.4230653643608093,
      "learning_rate": 5.539558390385009e-06,
      "loss": 0.014,
      "step": 1373860
    },
    {
      "epoch": 2.248384752852458,
      "grad_norm": 0.556814432144165,
      "learning_rate": 5.539492498171492e-06,
      "loss": 0.0183,
      "step": 1373880
    },
    {
      "epoch": 2.248417483291111,
      "grad_norm": 0.5378785729408264,
      "learning_rate": 5.5394266059579745e-06,
      "loss": 0.0135,
      "step": 1373900
    },
    {
      "epoch": 2.2484502137297646,
      "grad_norm": 0.27047744393348694,
      "learning_rate": 5.539360713744457e-06,
      "loss": 0.0136,
      "step": 1373920
    },
    {
      "epoch": 2.2484829441684178,
      "grad_norm": 0.20324675738811493,
      "learning_rate": 5.539294821530941e-06,
      "loss": 0.0131,
      "step": 1373940
    },
    {
      "epoch": 2.248515674607071,
      "grad_norm": 0.1235925704240799,
      "learning_rate": 5.539228929317423e-06,
      "loss": 0.0168,
      "step": 1373960
    },
    {
      "epoch": 2.2485484050457245,
      "grad_norm": 0.7367489337921143,
      "learning_rate": 5.539163037103906e-06,
      "loss": 0.014,
      "step": 1373980
    },
    {
      "epoch": 2.2485811354843777,
      "grad_norm": 0.28373777866363525,
      "learning_rate": 5.539097144890388e-06,
      "loss": 0.014,
      "step": 1374000
    },
    {
      "epoch": 2.2486138659230313,
      "grad_norm": 0.36984992027282715,
      "learning_rate": 5.539031252676872e-06,
      "loss": 0.0109,
      "step": 1374020
    },
    {
      "epoch": 2.2486465963616844,
      "grad_norm": 0.41008105874061584,
      "learning_rate": 5.5389653604633545e-06,
      "loss": 0.0134,
      "step": 1374040
    },
    {
      "epoch": 2.248679326800338,
      "grad_norm": 0.6974664926528931,
      "learning_rate": 5.538899468249837e-06,
      "loss": 0.0186,
      "step": 1374060
    },
    {
      "epoch": 2.248712057238991,
      "grad_norm": 0.11025907099246979,
      "learning_rate": 5.53883357603632e-06,
      "loss": 0.0135,
      "step": 1374080
    },
    {
      "epoch": 2.2487447876776443,
      "grad_norm": 0.23703013360500336,
      "learning_rate": 5.5387676838228036e-06,
      "loss": 0.0146,
      "step": 1374100
    },
    {
      "epoch": 2.248777518116298,
      "grad_norm": 1.15140962600708,
      "learning_rate": 5.5387017916092855e-06,
      "loss": 0.0218,
      "step": 1374120
    },
    {
      "epoch": 2.248810248554951,
      "grad_norm": 0.33773720264434814,
      "learning_rate": 5.538635899395769e-06,
      "loss": 0.0215,
      "step": 1374140
    },
    {
      "epoch": 2.2488429789936046,
      "grad_norm": 0.8112822771072388,
      "learning_rate": 5.538570007182251e-06,
      "loss": 0.0144,
      "step": 1374160
    },
    {
      "epoch": 2.248875709432258,
      "grad_norm": 0.3988746702671051,
      "learning_rate": 5.5385041149687345e-06,
      "loss": 0.0158,
      "step": 1374180
    },
    {
      "epoch": 2.2489084398709114,
      "grad_norm": 0.07375302910804749,
      "learning_rate": 5.538438222755217e-06,
      "loss": 0.0197,
      "step": 1374200
    },
    {
      "epoch": 2.2489411703095645,
      "grad_norm": 0.1696256697177887,
      "learning_rate": 5.5383723305417e-06,
      "loss": 0.0164,
      "step": 1374220
    },
    {
      "epoch": 2.2489739007482177,
      "grad_norm": 0.26391908526420593,
      "learning_rate": 5.538306438328184e-06,
      "loss": 0.017,
      "step": 1374240
    },
    {
      "epoch": 2.2490066311868713,
      "grad_norm": 0.23013000190258026,
      "learning_rate": 5.538240546114666e-06,
      "loss": 0.0211,
      "step": 1374260
    },
    {
      "epoch": 2.2490393616255244,
      "grad_norm": 0.5080412030220032,
      "learning_rate": 5.538174653901149e-06,
      "loss": 0.0141,
      "step": 1374280
    },
    {
      "epoch": 2.249072092064178,
      "grad_norm": 0.29571858048439026,
      "learning_rate": 5.538108761687632e-06,
      "loss": 0.016,
      "step": 1374300
    },
    {
      "epoch": 2.249104822502831,
      "grad_norm": 0.20827555656433105,
      "learning_rate": 5.538042869474115e-06,
      "loss": 0.0169,
      "step": 1374320
    },
    {
      "epoch": 2.2491375529414843,
      "grad_norm": 0.25161993503570557,
      "learning_rate": 5.537976977260597e-06,
      "loss": 0.0113,
      "step": 1374340
    },
    {
      "epoch": 2.249170283380138,
      "grad_norm": 0.32127735018730164,
      "learning_rate": 5.537911085047081e-06,
      "loss": 0.0154,
      "step": 1374360
    },
    {
      "epoch": 2.249203013818791,
      "grad_norm": 0.3685199022293091,
      "learning_rate": 5.537845192833563e-06,
      "loss": 0.0091,
      "step": 1374380
    },
    {
      "epoch": 2.2492357442574447,
      "grad_norm": 0.8121630549430847,
      "learning_rate": 5.537779300620046e-06,
      "loss": 0.0215,
      "step": 1374400
    },
    {
      "epoch": 2.249268474696098,
      "grad_norm": 0.5529375076293945,
      "learning_rate": 5.537713408406529e-06,
      "loss": 0.012,
      "step": 1374420
    },
    {
      "epoch": 2.2493012051347514,
      "grad_norm": 0.8038827776908875,
      "learning_rate": 5.537647516193012e-06,
      "loss": 0.0128,
      "step": 1374440
    },
    {
      "epoch": 2.2493339355734046,
      "grad_norm": 0.25557541847229004,
      "learning_rate": 5.537581623979495e-06,
      "loss": 0.0168,
      "step": 1374460
    },
    {
      "epoch": 2.2493666660120577,
      "grad_norm": 0.04327540099620819,
      "learning_rate": 5.537515731765978e-06,
      "loss": 0.0113,
      "step": 1374480
    },
    {
      "epoch": 2.2493993964507113,
      "grad_norm": 0.5427901148796082,
      "learning_rate": 5.53744983955246e-06,
      "loss": 0.0137,
      "step": 1374500
    },
    {
      "epoch": 2.2494321268893644,
      "grad_norm": 0.34251660108566284,
      "learning_rate": 5.537383947338944e-06,
      "loss": 0.0159,
      "step": 1374520
    },
    {
      "epoch": 2.249464857328018,
      "grad_norm": 0.22514264285564423,
      "learning_rate": 5.5373180551254255e-06,
      "loss": 0.0218,
      "step": 1374540
    },
    {
      "epoch": 2.249497587766671,
      "grad_norm": 0.6169595718383789,
      "learning_rate": 5.537252162911909e-06,
      "loss": 0.022,
      "step": 1374560
    },
    {
      "epoch": 2.249530318205325,
      "grad_norm": 0.7039880156517029,
      "learning_rate": 5.537186270698393e-06,
      "loss": 0.0137,
      "step": 1374580
    },
    {
      "epoch": 2.249563048643978,
      "grad_norm": 0.11628149449825287,
      "learning_rate": 5.537120378484875e-06,
      "loss": 0.0169,
      "step": 1374600
    },
    {
      "epoch": 2.249595779082631,
      "grad_norm": 0.2657354474067688,
      "learning_rate": 5.537054486271358e-06,
      "loss": 0.0158,
      "step": 1374620
    },
    {
      "epoch": 2.2496285095212847,
      "grad_norm": 0.20428287982940674,
      "learning_rate": 5.53698859405784e-06,
      "loss": 0.0231,
      "step": 1374640
    },
    {
      "epoch": 2.249661239959938,
      "grad_norm": 0.2745124101638794,
      "learning_rate": 5.536922701844324e-06,
      "loss": 0.0159,
      "step": 1374660
    },
    {
      "epoch": 2.2496939703985914,
      "grad_norm": 0.5600799322128296,
      "learning_rate": 5.536856809630806e-06,
      "loss": 0.0154,
      "step": 1374680
    },
    {
      "epoch": 2.2497267008372446,
      "grad_norm": 0.4146869480609894,
      "learning_rate": 5.53679091741729e-06,
      "loss": 0.0164,
      "step": 1374700
    },
    {
      "epoch": 2.2497594312758977,
      "grad_norm": 0.43243277072906494,
      "learning_rate": 5.536725025203772e-06,
      "loss": 0.0165,
      "step": 1374720
    },
    {
      "epoch": 2.2497921617145513,
      "grad_norm": 0.4258212745189667,
      "learning_rate": 5.5366591329902555e-06,
      "loss": 0.0134,
      "step": 1374740
    },
    {
      "epoch": 2.2498248921532045,
      "grad_norm": 0.44606268405914307,
      "learning_rate": 5.536593240776737e-06,
      "loss": 0.0197,
      "step": 1374760
    },
    {
      "epoch": 2.249857622591858,
      "grad_norm": 1.9239941835403442,
      "learning_rate": 5.536527348563221e-06,
      "loss": 0.0153,
      "step": 1374780
    },
    {
      "epoch": 2.249890353030511,
      "grad_norm": 0.4891388416290283,
      "learning_rate": 5.536461456349703e-06,
      "loss": 0.0112,
      "step": 1374800
    },
    {
      "epoch": 2.249923083469165,
      "grad_norm": 0.49372634291648865,
      "learning_rate": 5.5363955641361864e-06,
      "loss": 0.0099,
      "step": 1374820
    },
    {
      "epoch": 2.249955813907818,
      "grad_norm": 0.37231704592704773,
      "learning_rate": 5.536329671922669e-06,
      "loss": 0.0145,
      "step": 1374840
    },
    {
      "epoch": 2.249988544346471,
      "grad_norm": 0.13802120089530945,
      "learning_rate": 5.536263779709152e-06,
      "loss": 0.0163,
      "step": 1374860
    },
    {
      "epoch": 2.2500212747851247,
      "grad_norm": 0.5078522562980652,
      "learning_rate": 5.536197887495635e-06,
      "loss": 0.0123,
      "step": 1374880
    },
    {
      "epoch": 2.250054005223778,
      "grad_norm": 0.4168927073478699,
      "learning_rate": 5.536131995282118e-06,
      "loss": 0.0079,
      "step": 1374900
    },
    {
      "epoch": 2.2500867356624314,
      "grad_norm": 0.5151931643486023,
      "learning_rate": 5.536066103068601e-06,
      "loss": 0.014,
      "step": 1374920
    },
    {
      "epoch": 2.2501194661010846,
      "grad_norm": 0.30517858266830444,
      "learning_rate": 5.536000210855084e-06,
      "loss": 0.0172,
      "step": 1374940
    },
    {
      "epoch": 2.250152196539738,
      "grad_norm": 0.5336765050888062,
      "learning_rate": 5.535934318641567e-06,
      "loss": 0.0219,
      "step": 1374960
    },
    {
      "epoch": 2.2501849269783913,
      "grad_norm": 0.18988493084907532,
      "learning_rate": 5.535868426428049e-06,
      "loss": 0.0143,
      "step": 1374980
    },
    {
      "epoch": 2.2502176574170445,
      "grad_norm": 0.5168250799179077,
      "learning_rate": 5.535802534214533e-06,
      "loss": 0.0126,
      "step": 1375000
    },
    {
      "epoch": 2.250250387855698,
      "grad_norm": 0.5965242981910706,
      "learning_rate": 5.535736642001015e-06,
      "loss": 0.0143,
      "step": 1375020
    },
    {
      "epoch": 2.2502831182943512,
      "grad_norm": 0.5313877463340759,
      "learning_rate": 5.535670749787498e-06,
      "loss": 0.0205,
      "step": 1375040
    },
    {
      "epoch": 2.250315848733005,
      "grad_norm": 0.14816828072071075,
      "learning_rate": 5.535604857573981e-06,
      "loss": 0.0147,
      "step": 1375060
    },
    {
      "epoch": 2.250348579171658,
      "grad_norm": 0.5964608192443848,
      "learning_rate": 5.535538965360464e-06,
      "loss": 0.0194,
      "step": 1375080
    },
    {
      "epoch": 2.2503813096103116,
      "grad_norm": 0.4678182005882263,
      "learning_rate": 5.5354730731469465e-06,
      "loss": 0.0151,
      "step": 1375100
    },
    {
      "epoch": 2.2504140400489647,
      "grad_norm": 0.27304983139038086,
      "learning_rate": 5.53540718093343e-06,
      "loss": 0.0108,
      "step": 1375120
    },
    {
      "epoch": 2.250446770487618,
      "grad_norm": 0.244487464427948,
      "learning_rate": 5.535341288719912e-06,
      "loss": 0.0144,
      "step": 1375140
    },
    {
      "epoch": 2.2504795009262715,
      "grad_norm": 0.22400902211666107,
      "learning_rate": 5.5352753965063956e-06,
      "loss": 0.0159,
      "step": 1375160
    },
    {
      "epoch": 2.2505122313649246,
      "grad_norm": 0.4189170002937317,
      "learning_rate": 5.5352095042928774e-06,
      "loss": 0.0173,
      "step": 1375180
    },
    {
      "epoch": 2.250544961803578,
      "grad_norm": 0.3602234423160553,
      "learning_rate": 5.535143612079361e-06,
      "loss": 0.0114,
      "step": 1375200
    },
    {
      "epoch": 2.2505776922422314,
      "grad_norm": 0.36832892894744873,
      "learning_rate": 5.535077719865844e-06,
      "loss": 0.0169,
      "step": 1375220
    },
    {
      "epoch": 2.250610422680885,
      "grad_norm": 0.19208373129367828,
      "learning_rate": 5.5350118276523265e-06,
      "loss": 0.0199,
      "step": 1375240
    },
    {
      "epoch": 2.250643153119538,
      "grad_norm": 0.8806657791137695,
      "learning_rate": 5.53494593543881e-06,
      "loss": 0.0138,
      "step": 1375260
    },
    {
      "epoch": 2.2506758835581913,
      "grad_norm": 0.6155202388763428,
      "learning_rate": 5.534880043225293e-06,
      "loss": 0.017,
      "step": 1375280
    },
    {
      "epoch": 2.250708613996845,
      "grad_norm": 0.20877918601036072,
      "learning_rate": 5.5348141510117756e-06,
      "loss": 0.0125,
      "step": 1375300
    },
    {
      "epoch": 2.250741344435498,
      "grad_norm": 0.8198619484901428,
      "learning_rate": 5.534748258798258e-06,
      "loss": 0.0181,
      "step": 1375320
    },
    {
      "epoch": 2.2507740748741516,
      "grad_norm": 0.4208983778953552,
      "learning_rate": 5.534682366584742e-06,
      "loss": 0.0144,
      "step": 1375340
    },
    {
      "epoch": 2.2508068053128047,
      "grad_norm": 0.5040624141693115,
      "learning_rate": 5.534616474371224e-06,
      "loss": 0.0154,
      "step": 1375360
    },
    {
      "epoch": 2.2508395357514583,
      "grad_norm": 0.3918563425540924,
      "learning_rate": 5.534550582157707e-06,
      "loss": 0.0184,
      "step": 1375380
    },
    {
      "epoch": 2.2508722661901115,
      "grad_norm": 0.11197198927402496,
      "learning_rate": 5.534484689944189e-06,
      "loss": 0.0072,
      "step": 1375400
    },
    {
      "epoch": 2.2509049966287646,
      "grad_norm": 0.39363670349121094,
      "learning_rate": 5.534418797730673e-06,
      "loss": 0.0101,
      "step": 1375420
    },
    {
      "epoch": 2.2509377270674182,
      "grad_norm": 0.0940275564789772,
      "learning_rate": 5.534352905517156e-06,
      "loss": 0.0197,
      "step": 1375440
    },
    {
      "epoch": 2.2509704575060714,
      "grad_norm": 1.4429152011871338,
      "learning_rate": 5.534287013303638e-06,
      "loss": 0.0169,
      "step": 1375460
    },
    {
      "epoch": 2.251003187944725,
      "grad_norm": 0.3020387887954712,
      "learning_rate": 5.534221121090121e-06,
      "loss": 0.0144,
      "step": 1375480
    },
    {
      "epoch": 2.251035918383378,
      "grad_norm": 1.1508698463439941,
      "learning_rate": 5.534155228876605e-06,
      "loss": 0.0195,
      "step": 1375500
    },
    {
      "epoch": 2.2510686488220317,
      "grad_norm": 0.33451196551322937,
      "learning_rate": 5.5340893366630866e-06,
      "loss": 0.0171,
      "step": 1375520
    },
    {
      "epoch": 2.251101379260685,
      "grad_norm": 0.2140352874994278,
      "learning_rate": 5.53402344444957e-06,
      "loss": 0.0158,
      "step": 1375540
    },
    {
      "epoch": 2.251134109699338,
      "grad_norm": 0.4185287058353424,
      "learning_rate": 5.533957552236052e-06,
      "loss": 0.0148,
      "step": 1375560
    },
    {
      "epoch": 2.2511668401379916,
      "grad_norm": 0.26174208521842957,
      "learning_rate": 5.533891660022536e-06,
      "loss": 0.0129,
      "step": 1375580
    },
    {
      "epoch": 2.2511995705766448,
      "grad_norm": 0.46400004625320435,
      "learning_rate": 5.5338257678090175e-06,
      "loss": 0.0117,
      "step": 1375600
    },
    {
      "epoch": 2.2512323010152984,
      "grad_norm": 1.1059956550598145,
      "learning_rate": 5.533759875595501e-06,
      "loss": 0.0189,
      "step": 1375620
    },
    {
      "epoch": 2.2512650314539515,
      "grad_norm": 0.3750761151313782,
      "learning_rate": 5.533693983381985e-06,
      "loss": 0.0186,
      "step": 1375640
    },
    {
      "epoch": 2.251297761892605,
      "grad_norm": 0.8315874934196472,
      "learning_rate": 5.533628091168467e-06,
      "loss": 0.0191,
      "step": 1375660
    },
    {
      "epoch": 2.2513304923312583,
      "grad_norm": 0.16765329241752625,
      "learning_rate": 5.53356219895495e-06,
      "loss": 0.0144,
      "step": 1375680
    },
    {
      "epoch": 2.2513632227699114,
      "grad_norm": 0.31459155678749084,
      "learning_rate": 5.533496306741433e-06,
      "loss": 0.0138,
      "step": 1375700
    },
    {
      "epoch": 2.251395953208565,
      "grad_norm": 1.2946429252624512,
      "learning_rate": 5.533430414527916e-06,
      "loss": 0.0169,
      "step": 1375720
    },
    {
      "epoch": 2.251428683647218,
      "grad_norm": 0.3891589045524597,
      "learning_rate": 5.533364522314398e-06,
      "loss": 0.0128,
      "step": 1375740
    },
    {
      "epoch": 2.2514614140858717,
      "grad_norm": 0.7989992499351501,
      "learning_rate": 5.533298630100882e-06,
      "loss": 0.0149,
      "step": 1375760
    },
    {
      "epoch": 2.251494144524525,
      "grad_norm": 0.4321615397930145,
      "learning_rate": 5.533232737887364e-06,
      "loss": 0.0142,
      "step": 1375780
    },
    {
      "epoch": 2.2515268749631785,
      "grad_norm": 0.0702328085899353,
      "learning_rate": 5.5331668456738475e-06,
      "loss": 0.0098,
      "step": 1375800
    },
    {
      "epoch": 2.2515596054018316,
      "grad_norm": 0.27659526467323303,
      "learning_rate": 5.533100953460329e-06,
      "loss": 0.0165,
      "step": 1375820
    },
    {
      "epoch": 2.251592335840485,
      "grad_norm": 0.5532939434051514,
      "learning_rate": 5.533035061246813e-06,
      "loss": 0.0162,
      "step": 1375840
    },
    {
      "epoch": 2.2516250662791384,
      "grad_norm": 0.17368334531784058,
      "learning_rate": 5.532969169033296e-06,
      "loss": 0.0224,
      "step": 1375860
    },
    {
      "epoch": 2.2516577967177915,
      "grad_norm": 0.46846774220466614,
      "learning_rate": 5.532903276819778e-06,
      "loss": 0.0118,
      "step": 1375880
    },
    {
      "epoch": 2.251690527156445,
      "grad_norm": 0.5756575465202332,
      "learning_rate": 5.532837384606261e-06,
      "loss": 0.0138,
      "step": 1375900
    },
    {
      "epoch": 2.2517232575950983,
      "grad_norm": 0.3756353557109833,
      "learning_rate": 5.532771492392745e-06,
      "loss": 0.0167,
      "step": 1375920
    },
    {
      "epoch": 2.251755988033752,
      "grad_norm": 0.1630849391222,
      "learning_rate": 5.532705600179227e-06,
      "loss": 0.0136,
      "step": 1375940
    },
    {
      "epoch": 2.251788718472405,
      "grad_norm": 0.5067465305328369,
      "learning_rate": 5.53263970796571e-06,
      "loss": 0.0169,
      "step": 1375960
    },
    {
      "epoch": 2.251821448911058,
      "grad_norm": 0.5620428919792175,
      "learning_rate": 5.532573815752194e-06,
      "loss": 0.0156,
      "step": 1375980
    },
    {
      "epoch": 2.2518541793497118,
      "grad_norm": 0.4707796275615692,
      "learning_rate": 5.532507923538676e-06,
      "loss": 0.0135,
      "step": 1376000
    },
    {
      "epoch": 2.251886909788365,
      "grad_norm": 0.27934402227401733,
      "learning_rate": 5.532442031325159e-06,
      "loss": 0.0186,
      "step": 1376020
    },
    {
      "epoch": 2.2519196402270185,
      "grad_norm": 0.7474153637886047,
      "learning_rate": 5.532376139111641e-06,
      "loss": 0.0186,
      "step": 1376040
    },
    {
      "epoch": 2.2519523706656717,
      "grad_norm": 0.3683631122112274,
      "learning_rate": 5.532310246898125e-06,
      "loss": 0.0192,
      "step": 1376060
    },
    {
      "epoch": 2.251985101104325,
      "grad_norm": 0.5692447423934937,
      "learning_rate": 5.5322443546846075e-06,
      "loss": 0.0207,
      "step": 1376080
    },
    {
      "epoch": 2.2520178315429784,
      "grad_norm": 0.1136401891708374,
      "learning_rate": 5.53217846247109e-06,
      "loss": 0.0142,
      "step": 1376100
    },
    {
      "epoch": 2.2520505619816316,
      "grad_norm": 0.13563066720962524,
      "learning_rate": 5.532112570257573e-06,
      "loss": 0.0117,
      "step": 1376120
    },
    {
      "epoch": 2.252083292420285,
      "grad_norm": 0.1786472350358963,
      "learning_rate": 5.5320466780440566e-06,
      "loss": 0.0128,
      "step": 1376140
    },
    {
      "epoch": 2.2521160228589383,
      "grad_norm": 0.2791396379470825,
      "learning_rate": 5.5319807858305385e-06,
      "loss": 0.0177,
      "step": 1376160
    },
    {
      "epoch": 2.2521487532975915,
      "grad_norm": 0.48571887612342834,
      "learning_rate": 5.531914893617022e-06,
      "loss": 0.0146,
      "step": 1376180
    },
    {
      "epoch": 2.252181483736245,
      "grad_norm": 0.6296859979629517,
      "learning_rate": 5.531849001403504e-06,
      "loss": 0.0156,
      "step": 1376200
    },
    {
      "epoch": 2.252214214174898,
      "grad_norm": 1.3991199731826782,
      "learning_rate": 5.5317831091899875e-06,
      "loss": 0.0215,
      "step": 1376220
    },
    {
      "epoch": 2.252246944613552,
      "grad_norm": 0.3838995397090912,
      "learning_rate": 5.53171721697647e-06,
      "loss": 0.0134,
      "step": 1376240
    },
    {
      "epoch": 2.252279675052205,
      "grad_norm": 0.23096109926700592,
      "learning_rate": 5.531651324762953e-06,
      "loss": 0.0219,
      "step": 1376260
    },
    {
      "epoch": 2.2523124054908585,
      "grad_norm": 1.0078978538513184,
      "learning_rate": 5.531585432549436e-06,
      "loss": 0.0146,
      "step": 1376280
    },
    {
      "epoch": 2.2523451359295117,
      "grad_norm": 0.18608836829662323,
      "learning_rate": 5.531519540335919e-06,
      "loss": 0.0104,
      "step": 1376300
    },
    {
      "epoch": 2.252377866368165,
      "grad_norm": 0.0889696553349495,
      "learning_rate": 5.531453648122402e-06,
      "loss": 0.0102,
      "step": 1376320
    },
    {
      "epoch": 2.2524105968068184,
      "grad_norm": 0.8879494667053223,
      "learning_rate": 5.531387755908885e-06,
      "loss": 0.0139,
      "step": 1376340
    },
    {
      "epoch": 2.2524433272454716,
      "grad_norm": 1.0001662969589233,
      "learning_rate": 5.531321863695368e-06,
      "loss": 0.0129,
      "step": 1376360
    },
    {
      "epoch": 2.252476057684125,
      "grad_norm": 0.2415430247783661,
      "learning_rate": 5.53125597148185e-06,
      "loss": 0.0133,
      "step": 1376380
    },
    {
      "epoch": 2.2525087881227783,
      "grad_norm": 0.27793169021606445,
      "learning_rate": 5.531190079268334e-06,
      "loss": 0.0194,
      "step": 1376400
    },
    {
      "epoch": 2.252541518561432,
      "grad_norm": 0.1516626924276352,
      "learning_rate": 5.531124187054816e-06,
      "loss": 0.0146,
      "step": 1376420
    },
    {
      "epoch": 2.252574249000085,
      "grad_norm": 0.6075510382652283,
      "learning_rate": 5.531058294841299e-06,
      "loss": 0.0111,
      "step": 1376440
    },
    {
      "epoch": 2.252606979438738,
      "grad_norm": 0.2882177531719208,
      "learning_rate": 5.530992402627782e-06,
      "loss": 0.014,
      "step": 1376460
    },
    {
      "epoch": 2.252639709877392,
      "grad_norm": 0.8917168974876404,
      "learning_rate": 5.530926510414265e-06,
      "loss": 0.0133,
      "step": 1376480
    },
    {
      "epoch": 2.252672440316045,
      "grad_norm": 0.23020921647548676,
      "learning_rate": 5.530860618200748e-06,
      "loss": 0.0091,
      "step": 1376500
    },
    {
      "epoch": 2.2527051707546986,
      "grad_norm": 0.3917803466320038,
      "learning_rate": 5.530794725987231e-06,
      "loss": 0.013,
      "step": 1376520
    },
    {
      "epoch": 2.2527379011933517,
      "grad_norm": 0.28421393036842346,
      "learning_rate": 5.530728833773713e-06,
      "loss": 0.0171,
      "step": 1376540
    },
    {
      "epoch": 2.2527706316320053,
      "grad_norm": 0.19062843918800354,
      "learning_rate": 5.530662941560197e-06,
      "loss": 0.0175,
      "step": 1376560
    },
    {
      "epoch": 2.2528033620706585,
      "grad_norm": 1.0170482397079468,
      "learning_rate": 5.5305970493466785e-06,
      "loss": 0.0186,
      "step": 1376580
    },
    {
      "epoch": 2.2528360925093116,
      "grad_norm": 0.7884191274642944,
      "learning_rate": 5.530531157133162e-06,
      "loss": 0.0153,
      "step": 1376600
    },
    {
      "epoch": 2.252868822947965,
      "grad_norm": 0.1991548240184784,
      "learning_rate": 5.530465264919644e-06,
      "loss": 0.0161,
      "step": 1376620
    },
    {
      "epoch": 2.2529015533866183,
      "grad_norm": 0.3111223876476288,
      "learning_rate": 5.530399372706128e-06,
      "loss": 0.0205,
      "step": 1376640
    },
    {
      "epoch": 2.252934283825272,
      "grad_norm": 0.8381426334381104,
      "learning_rate": 5.53033348049261e-06,
      "loss": 0.0106,
      "step": 1376660
    },
    {
      "epoch": 2.252967014263925,
      "grad_norm": 0.5649210214614868,
      "learning_rate": 5.530267588279093e-06,
      "loss": 0.0197,
      "step": 1376680
    },
    {
      "epoch": 2.2529997447025787,
      "grad_norm": 0.5072600245475769,
      "learning_rate": 5.530201696065577e-06,
      "loss": 0.0167,
      "step": 1376700
    },
    {
      "epoch": 2.253032475141232,
      "grad_norm": 0.6818252205848694,
      "learning_rate": 5.530135803852059e-06,
      "loss": 0.018,
      "step": 1376720
    },
    {
      "epoch": 2.253065205579885,
      "grad_norm": 1.071927785873413,
      "learning_rate": 5.530069911638542e-06,
      "loss": 0.0202,
      "step": 1376740
    },
    {
      "epoch": 2.2530979360185386,
      "grad_norm": 0.23787935078144073,
      "learning_rate": 5.530004019425025e-06,
      "loss": 0.0144,
      "step": 1376760
    },
    {
      "epoch": 2.2531306664571917,
      "grad_norm": 0.376187264919281,
      "learning_rate": 5.5299381272115085e-06,
      "loss": 0.0182,
      "step": 1376780
    },
    {
      "epoch": 2.2531633968958453,
      "grad_norm": 1.0872529745101929,
      "learning_rate": 5.52987223499799e-06,
      "loss": 0.0182,
      "step": 1376800
    },
    {
      "epoch": 2.2531961273344985,
      "grad_norm": 0.33879297971725464,
      "learning_rate": 5.529806342784474e-06,
      "loss": 0.0118,
      "step": 1376820
    },
    {
      "epoch": 2.253228857773152,
      "grad_norm": 0.30351749062538147,
      "learning_rate": 5.529740450570956e-06,
      "loss": 0.0115,
      "step": 1376840
    },
    {
      "epoch": 2.253261588211805,
      "grad_norm": 0.29432573914527893,
      "learning_rate": 5.5296745583574394e-06,
      "loss": 0.0134,
      "step": 1376860
    },
    {
      "epoch": 2.2532943186504584,
      "grad_norm": 0.7268366813659668,
      "learning_rate": 5.529608666143922e-06,
      "loss": 0.0157,
      "step": 1376880
    },
    {
      "epoch": 2.253327049089112,
      "grad_norm": 0.7500460743904114,
      "learning_rate": 5.529542773930405e-06,
      "loss": 0.0101,
      "step": 1376900
    },
    {
      "epoch": 2.253359779527765,
      "grad_norm": 0.34062904119491577,
      "learning_rate": 5.529476881716888e-06,
      "loss": 0.0165,
      "step": 1376920
    },
    {
      "epoch": 2.2533925099664187,
      "grad_norm": 0.36183294653892517,
      "learning_rate": 5.529410989503371e-06,
      "loss": 0.0151,
      "step": 1376940
    },
    {
      "epoch": 2.253425240405072,
      "grad_norm": 0.5161300897598267,
      "learning_rate": 5.529345097289853e-06,
      "loss": 0.0211,
      "step": 1376960
    },
    {
      "epoch": 2.2534579708437255,
      "grad_norm": 0.24153591692447662,
      "learning_rate": 5.529279205076337e-06,
      "loss": 0.0105,
      "step": 1376980
    },
    {
      "epoch": 2.2534907012823786,
      "grad_norm": 0.11435656994581223,
      "learning_rate": 5.529213312862819e-06,
      "loss": 0.0153,
      "step": 1377000
    },
    {
      "epoch": 2.2535234317210318,
      "grad_norm": 0.49702614545822144,
      "learning_rate": 5.529147420649302e-06,
      "loss": 0.0243,
      "step": 1377020
    },
    {
      "epoch": 2.2535561621596853,
      "grad_norm": 0.579437792301178,
      "learning_rate": 5.529081528435786e-06,
      "loss": 0.0158,
      "step": 1377040
    },
    {
      "epoch": 2.2535888925983385,
      "grad_norm": 0.14920929074287415,
      "learning_rate": 5.529015636222268e-06,
      "loss": 0.0185,
      "step": 1377060
    },
    {
      "epoch": 2.253621623036992,
      "grad_norm": 0.31591537594795227,
      "learning_rate": 5.528949744008751e-06,
      "loss": 0.0113,
      "step": 1377080
    },
    {
      "epoch": 2.2536543534756452,
      "grad_norm": 0.1956479251384735,
      "learning_rate": 5.528883851795234e-06,
      "loss": 0.0165,
      "step": 1377100
    },
    {
      "epoch": 2.253687083914299,
      "grad_norm": 0.3046345114707947,
      "learning_rate": 5.528817959581717e-06,
      "loss": 0.0163,
      "step": 1377120
    },
    {
      "epoch": 2.253719814352952,
      "grad_norm": 0.20108143985271454,
      "learning_rate": 5.5287520673681995e-06,
      "loss": 0.0112,
      "step": 1377140
    },
    {
      "epoch": 2.253752544791605,
      "grad_norm": 0.8832991123199463,
      "learning_rate": 5.528686175154683e-06,
      "loss": 0.0156,
      "step": 1377160
    },
    {
      "epoch": 2.2537852752302587,
      "grad_norm": 3.5572621822357178,
      "learning_rate": 5.528620282941165e-06,
      "loss": 0.012,
      "step": 1377180
    },
    {
      "epoch": 2.253818005668912,
      "grad_norm": 0.4519588053226471,
      "learning_rate": 5.5285543907276486e-06,
      "loss": 0.0168,
      "step": 1377200
    },
    {
      "epoch": 2.2538507361075655,
      "grad_norm": 0.44609153270721436,
      "learning_rate": 5.5284884985141304e-06,
      "loss": 0.0166,
      "step": 1377220
    },
    {
      "epoch": 2.2538834665462186,
      "grad_norm": 0.486227810382843,
      "learning_rate": 5.528422606300614e-06,
      "loss": 0.0143,
      "step": 1377240
    },
    {
      "epoch": 2.2539161969848722,
      "grad_norm": 0.6261364221572876,
      "learning_rate": 5.528356714087097e-06,
      "loss": 0.0117,
      "step": 1377260
    },
    {
      "epoch": 2.2539489274235254,
      "grad_norm": 0.4522334635257721,
      "learning_rate": 5.5282908218735795e-06,
      "loss": 0.0162,
      "step": 1377280
    },
    {
      "epoch": 2.2539816578621785,
      "grad_norm": 0.3502551317214966,
      "learning_rate": 5.528224929660062e-06,
      "loss": 0.0128,
      "step": 1377300
    },
    {
      "epoch": 2.254014388300832,
      "grad_norm": 0.44798600673675537,
      "learning_rate": 5.528159037446546e-06,
      "loss": 0.0107,
      "step": 1377320
    },
    {
      "epoch": 2.2540471187394853,
      "grad_norm": 0.14467811584472656,
      "learning_rate": 5.528093145233028e-06,
      "loss": 0.0143,
      "step": 1377340
    },
    {
      "epoch": 2.254079849178139,
      "grad_norm": 0.937295138835907,
      "learning_rate": 5.528027253019511e-06,
      "loss": 0.0199,
      "step": 1377360
    },
    {
      "epoch": 2.254112579616792,
      "grad_norm": 0.2946578860282898,
      "learning_rate": 5.527961360805995e-06,
      "loss": 0.0157,
      "step": 1377380
    },
    {
      "epoch": 2.2541453100554456,
      "grad_norm": 0.34401649236679077,
      "learning_rate": 5.527895468592477e-06,
      "loss": 0.0129,
      "step": 1377400
    },
    {
      "epoch": 2.2541780404940988,
      "grad_norm": 0.34872162342071533,
      "learning_rate": 5.52782957637896e-06,
      "loss": 0.0087,
      "step": 1377420
    },
    {
      "epoch": 2.254210770932752,
      "grad_norm": 0.3568383753299713,
      "learning_rate": 5.527763684165442e-06,
      "loss": 0.0142,
      "step": 1377440
    },
    {
      "epoch": 2.2542435013714055,
      "grad_norm": 0.19802232086658478,
      "learning_rate": 5.527697791951926e-06,
      "loss": 0.0199,
      "step": 1377460
    },
    {
      "epoch": 2.2542762318100587,
      "grad_norm": 0.17590290307998657,
      "learning_rate": 5.527631899738409e-06,
      "loss": 0.02,
      "step": 1377480
    },
    {
      "epoch": 2.2543089622487122,
      "grad_norm": 0.1573113054037094,
      "learning_rate": 5.527566007524891e-06,
      "loss": 0.0133,
      "step": 1377500
    },
    {
      "epoch": 2.2543416926873654,
      "grad_norm": 0.5888400077819824,
      "learning_rate": 5.527500115311374e-06,
      "loss": 0.0183,
      "step": 1377520
    },
    {
      "epoch": 2.2543744231260185,
      "grad_norm": 0.19050635397434235,
      "learning_rate": 5.527434223097858e-06,
      "loss": 0.0157,
      "step": 1377540
    },
    {
      "epoch": 2.254407153564672,
      "grad_norm": 0.5428728461265564,
      "learning_rate": 5.5273683308843396e-06,
      "loss": 0.0179,
      "step": 1377560
    },
    {
      "epoch": 2.2544398840033253,
      "grad_norm": 0.15955215692520142,
      "learning_rate": 5.527302438670823e-06,
      "loss": 0.0124,
      "step": 1377580
    },
    {
      "epoch": 2.254472614441979,
      "grad_norm": 0.1591283679008484,
      "learning_rate": 5.527236546457305e-06,
      "loss": 0.0159,
      "step": 1377600
    },
    {
      "epoch": 2.254505344880632,
      "grad_norm": 0.28531813621520996,
      "learning_rate": 5.527170654243789e-06,
      "loss": 0.0124,
      "step": 1377620
    },
    {
      "epoch": 2.254538075319285,
      "grad_norm": 0.46082010865211487,
      "learning_rate": 5.5271047620302705e-06,
      "loss": 0.0135,
      "step": 1377640
    },
    {
      "epoch": 2.254570805757939,
      "grad_norm": 0.24106881022453308,
      "learning_rate": 5.527038869816754e-06,
      "loss": 0.0153,
      "step": 1377660
    },
    {
      "epoch": 2.254603536196592,
      "grad_norm": 0.4346134066581726,
      "learning_rate": 5.526972977603237e-06,
      "loss": 0.0203,
      "step": 1377680
    },
    {
      "epoch": 2.2546362666352455,
      "grad_norm": 0.4305494427680969,
      "learning_rate": 5.52690708538972e-06,
      "loss": 0.0142,
      "step": 1377700
    },
    {
      "epoch": 2.2546689970738987,
      "grad_norm": 0.5080673694610596,
      "learning_rate": 5.526841193176202e-06,
      "loss": 0.0136,
      "step": 1377720
    },
    {
      "epoch": 2.2547017275125523,
      "grad_norm": 0.476352721452713,
      "learning_rate": 5.526775300962686e-06,
      "loss": 0.013,
      "step": 1377740
    },
    {
      "epoch": 2.2547344579512054,
      "grad_norm": 0.4387539327144623,
      "learning_rate": 5.526709408749169e-06,
      "loss": 0.013,
      "step": 1377760
    },
    {
      "epoch": 2.2547671883898586,
      "grad_norm": 0.10709898173809052,
      "learning_rate": 5.526643516535651e-06,
      "loss": 0.014,
      "step": 1377780
    },
    {
      "epoch": 2.254799918828512,
      "grad_norm": 0.2969306707382202,
      "learning_rate": 5.526577624322135e-06,
      "loss": 0.0211,
      "step": 1377800
    },
    {
      "epoch": 2.2548326492671653,
      "grad_norm": 0.24071036279201508,
      "learning_rate": 5.526511732108617e-06,
      "loss": 0.0124,
      "step": 1377820
    },
    {
      "epoch": 2.254865379705819,
      "grad_norm": 0.73989337682724,
      "learning_rate": 5.5264458398951005e-06,
      "loss": 0.0183,
      "step": 1377840
    },
    {
      "epoch": 2.254898110144472,
      "grad_norm": 0.4610981047153473,
      "learning_rate": 5.526379947681582e-06,
      "loss": 0.0139,
      "step": 1377860
    },
    {
      "epoch": 2.2549308405831257,
      "grad_norm": 0.8412354588508606,
      "learning_rate": 5.526314055468066e-06,
      "loss": 0.0136,
      "step": 1377880
    },
    {
      "epoch": 2.254963571021779,
      "grad_norm": 0.24130579829216003,
      "learning_rate": 5.526248163254549e-06,
      "loss": 0.0145,
      "step": 1377900
    },
    {
      "epoch": 2.254996301460432,
      "grad_norm": 0.6891871690750122,
      "learning_rate": 5.526182271041031e-06,
      "loss": 0.0118,
      "step": 1377920
    },
    {
      "epoch": 2.2550290318990855,
      "grad_norm": 0.522262692451477,
      "learning_rate": 5.526116378827514e-06,
      "loss": 0.013,
      "step": 1377940
    },
    {
      "epoch": 2.2550617623377387,
      "grad_norm": 0.6821765303611755,
      "learning_rate": 5.526050486613998e-06,
      "loss": 0.0165,
      "step": 1377960
    },
    {
      "epoch": 2.2550944927763923,
      "grad_norm": 0.2210649698972702,
      "learning_rate": 5.52598459440048e-06,
      "loss": 0.0167,
      "step": 1377980
    },
    {
      "epoch": 2.2551272232150454,
      "grad_norm": 0.46153783798217773,
      "learning_rate": 5.525918702186963e-06,
      "loss": 0.0135,
      "step": 1378000
    },
    {
      "epoch": 2.255159953653699,
      "grad_norm": 0.3302229344844818,
      "learning_rate": 5.525852809973445e-06,
      "loss": 0.0115,
      "step": 1378020
    },
    {
      "epoch": 2.255192684092352,
      "grad_norm": 0.10101479291915894,
      "learning_rate": 5.525786917759929e-06,
      "loss": 0.0115,
      "step": 1378040
    },
    {
      "epoch": 2.2552254145310053,
      "grad_norm": 0.5308078527450562,
      "learning_rate": 5.5257210255464114e-06,
      "loss": 0.0299,
      "step": 1378060
    },
    {
      "epoch": 2.255258144969659,
      "grad_norm": 0.33796441555023193,
      "learning_rate": 5.525655133332894e-06,
      "loss": 0.0151,
      "step": 1378080
    },
    {
      "epoch": 2.255290875408312,
      "grad_norm": 0.315788596868515,
      "learning_rate": 5.525589241119378e-06,
      "loss": 0.0109,
      "step": 1378100
    },
    {
      "epoch": 2.2553236058469657,
      "grad_norm": 0.437843382358551,
      "learning_rate": 5.5255233489058605e-06,
      "loss": 0.0159,
      "step": 1378120
    },
    {
      "epoch": 2.255356336285619,
      "grad_norm": 0.4253333806991577,
      "learning_rate": 5.525457456692343e-06,
      "loss": 0.0212,
      "step": 1378140
    },
    {
      "epoch": 2.2553890667242724,
      "grad_norm": 0.632272481918335,
      "learning_rate": 5.525391564478826e-06,
      "loss": 0.0178,
      "step": 1378160
    },
    {
      "epoch": 2.2554217971629256,
      "grad_norm": 0.3423548638820648,
      "learning_rate": 5.52532567226531e-06,
      "loss": 0.0143,
      "step": 1378180
    },
    {
      "epoch": 2.2554545276015787,
      "grad_norm": 0.4249770939350128,
      "learning_rate": 5.5252597800517915e-06,
      "loss": 0.02,
      "step": 1378200
    },
    {
      "epoch": 2.2554872580402323,
      "grad_norm": 0.43943050503730774,
      "learning_rate": 5.525193887838275e-06,
      "loss": 0.0171,
      "step": 1378220
    },
    {
      "epoch": 2.2555199884788855,
      "grad_norm": 0.4495708644390106,
      "learning_rate": 5.525127995624757e-06,
      "loss": 0.0181,
      "step": 1378240
    },
    {
      "epoch": 2.255552718917539,
      "grad_norm": 0.4923876225948334,
      "learning_rate": 5.5250621034112405e-06,
      "loss": 0.0181,
      "step": 1378260
    },
    {
      "epoch": 2.255585449356192,
      "grad_norm": 0.31359052658081055,
      "learning_rate": 5.524996211197723e-06,
      "loss": 0.0142,
      "step": 1378280
    },
    {
      "epoch": 2.255618179794846,
      "grad_norm": 0.20199894905090332,
      "learning_rate": 5.524930318984206e-06,
      "loss": 0.0224,
      "step": 1378300
    },
    {
      "epoch": 2.255650910233499,
      "grad_norm": 0.6897034049034119,
      "learning_rate": 5.524864426770689e-06,
      "loss": 0.0131,
      "step": 1378320
    },
    {
      "epoch": 2.255683640672152,
      "grad_norm": 0.37966012954711914,
      "learning_rate": 5.524798534557172e-06,
      "loss": 0.0118,
      "step": 1378340
    },
    {
      "epoch": 2.2557163711108057,
      "grad_norm": 0.3945728540420532,
      "learning_rate": 5.524732642343654e-06,
      "loss": 0.0185,
      "step": 1378360
    },
    {
      "epoch": 2.255749101549459,
      "grad_norm": 0.1468532532453537,
      "learning_rate": 5.524666750130138e-06,
      "loss": 0.0182,
      "step": 1378380
    },
    {
      "epoch": 2.2557818319881124,
      "grad_norm": 0.3467312753200531,
      "learning_rate": 5.52460085791662e-06,
      "loss": 0.0175,
      "step": 1378400
    },
    {
      "epoch": 2.2558145624267656,
      "grad_norm": 0.15374381840229034,
      "learning_rate": 5.524534965703103e-06,
      "loss": 0.0124,
      "step": 1378420
    },
    {
      "epoch": 2.255847292865419,
      "grad_norm": 0.26346203684806824,
      "learning_rate": 5.524469073489587e-06,
      "loss": 0.0189,
      "step": 1378440
    },
    {
      "epoch": 2.2558800233040723,
      "grad_norm": 0.5946944952011108,
      "learning_rate": 5.524403181276069e-06,
      "loss": 0.0149,
      "step": 1378460
    },
    {
      "epoch": 2.2559127537427255,
      "grad_norm": 0.2057957798242569,
      "learning_rate": 5.524337289062552e-06,
      "loss": 0.0126,
      "step": 1378480
    },
    {
      "epoch": 2.255945484181379,
      "grad_norm": 0.501173198223114,
      "learning_rate": 5.524271396849034e-06,
      "loss": 0.017,
      "step": 1378500
    },
    {
      "epoch": 2.2559782146200322,
      "grad_norm": 0.3430274724960327,
      "learning_rate": 5.524205504635518e-06,
      "loss": 0.0097,
      "step": 1378520
    },
    {
      "epoch": 2.256010945058686,
      "grad_norm": 0.36484992504119873,
      "learning_rate": 5.524139612422001e-06,
      "loss": 0.0149,
      "step": 1378540
    },
    {
      "epoch": 2.256043675497339,
      "grad_norm": 0.23548030853271484,
      "learning_rate": 5.524073720208484e-06,
      "loss": 0.0134,
      "step": 1378560
    },
    {
      "epoch": 2.2560764059359926,
      "grad_norm": 0.48896846175193787,
      "learning_rate": 5.524007827994966e-06,
      "loss": 0.0184,
      "step": 1378580
    },
    {
      "epoch": 2.2561091363746457,
      "grad_norm": 0.3181808292865753,
      "learning_rate": 5.52394193578145e-06,
      "loss": 0.0177,
      "step": 1378600
    },
    {
      "epoch": 2.256141866813299,
      "grad_norm": 0.21161098778247833,
      "learning_rate": 5.5238760435679315e-06,
      "loss": 0.0173,
      "step": 1378620
    },
    {
      "epoch": 2.2561745972519525,
      "grad_norm": 3.686032772064209,
      "learning_rate": 5.523810151354415e-06,
      "loss": 0.0229,
      "step": 1378640
    },
    {
      "epoch": 2.2562073276906056,
      "grad_norm": 0.3429357707500458,
      "learning_rate": 5.523744259140897e-06,
      "loss": 0.0163,
      "step": 1378660
    },
    {
      "epoch": 2.256240058129259,
      "grad_norm": 0.35182127356529236,
      "learning_rate": 5.523678366927381e-06,
      "loss": 0.0175,
      "step": 1378680
    },
    {
      "epoch": 2.2562727885679124,
      "grad_norm": 0.26352930068969727,
      "learning_rate": 5.523612474713863e-06,
      "loss": 0.0169,
      "step": 1378700
    },
    {
      "epoch": 2.256305519006566,
      "grad_norm": 0.7471144199371338,
      "learning_rate": 5.523546582500346e-06,
      "loss": 0.0172,
      "step": 1378720
    },
    {
      "epoch": 2.256338249445219,
      "grad_norm": 0.4988669455051422,
      "learning_rate": 5.523480690286829e-06,
      "loss": 0.0136,
      "step": 1378740
    },
    {
      "epoch": 2.2563709798838723,
      "grad_norm": 0.5029438138008118,
      "learning_rate": 5.523414798073312e-06,
      "loss": 0.0187,
      "step": 1378760
    },
    {
      "epoch": 2.256403710322526,
      "grad_norm": 0.334788054227829,
      "learning_rate": 5.523348905859795e-06,
      "loss": 0.0099,
      "step": 1378780
    },
    {
      "epoch": 2.256436440761179,
      "grad_norm": 0.8211401700973511,
      "learning_rate": 5.523283013646278e-06,
      "loss": 0.0247,
      "step": 1378800
    },
    {
      "epoch": 2.2564691711998326,
      "grad_norm": 0.2882373332977295,
      "learning_rate": 5.5232171214327615e-06,
      "loss": 0.0182,
      "step": 1378820
    },
    {
      "epoch": 2.2565019016384857,
      "grad_norm": 0.3374372720718384,
      "learning_rate": 5.523151229219243e-06,
      "loss": 0.0151,
      "step": 1378840
    },
    {
      "epoch": 2.2565346320771393,
      "grad_norm": 0.6078037023544312,
      "learning_rate": 5.523085337005727e-06,
      "loss": 0.018,
      "step": 1378860
    },
    {
      "epoch": 2.2565673625157925,
      "grad_norm": 0.7231883406639099,
      "learning_rate": 5.523019444792209e-06,
      "loss": 0.0149,
      "step": 1378880
    },
    {
      "epoch": 2.2566000929544456,
      "grad_norm": 0.33086585998535156,
      "learning_rate": 5.5229535525786924e-06,
      "loss": 0.0117,
      "step": 1378900
    },
    {
      "epoch": 2.2566328233930992,
      "grad_norm": 0.3205958604812622,
      "learning_rate": 5.522887660365175e-06,
      "loss": 0.0165,
      "step": 1378920
    },
    {
      "epoch": 2.2566655538317524,
      "grad_norm": 0.32837623357772827,
      "learning_rate": 5.522821768151658e-06,
      "loss": 0.0176,
      "step": 1378940
    },
    {
      "epoch": 2.256698284270406,
      "grad_norm": 0.3517342805862427,
      "learning_rate": 5.522755875938141e-06,
      "loss": 0.0167,
      "step": 1378960
    },
    {
      "epoch": 2.256731014709059,
      "grad_norm": 0.18878474831581116,
      "learning_rate": 5.522689983724624e-06,
      "loss": 0.012,
      "step": 1378980
    },
    {
      "epoch": 2.2567637451477127,
      "grad_norm": 0.8527622222900391,
      "learning_rate": 5.522624091511106e-06,
      "loss": 0.0168,
      "step": 1379000
    },
    {
      "epoch": 2.256796475586366,
      "grad_norm": 0.32574230432510376,
      "learning_rate": 5.52255819929759e-06,
      "loss": 0.0129,
      "step": 1379020
    },
    {
      "epoch": 2.256829206025019,
      "grad_norm": 1.2126036882400513,
      "learning_rate": 5.522492307084072e-06,
      "loss": 0.0232,
      "step": 1379040
    },
    {
      "epoch": 2.2568619364636726,
      "grad_norm": 0.6090520024299622,
      "learning_rate": 5.522426414870555e-06,
      "loss": 0.0211,
      "step": 1379060
    },
    {
      "epoch": 2.2568946669023258,
      "grad_norm": 0.14003193378448486,
      "learning_rate": 5.522360522657038e-06,
      "loss": 0.0132,
      "step": 1379080
    },
    {
      "epoch": 2.2569273973409794,
      "grad_norm": 0.2591251730918884,
      "learning_rate": 5.522294630443521e-06,
      "loss": 0.0196,
      "step": 1379100
    },
    {
      "epoch": 2.2569601277796325,
      "grad_norm": 0.7402287125587463,
      "learning_rate": 5.5222287382300034e-06,
      "loss": 0.0177,
      "step": 1379120
    },
    {
      "epoch": 2.2569928582182857,
      "grad_norm": 0.4714939296245575,
      "learning_rate": 5.522162846016487e-06,
      "loss": 0.0176,
      "step": 1379140
    },
    {
      "epoch": 2.2570255886569393,
      "grad_norm": 0.25589218735694885,
      "learning_rate": 5.52209695380297e-06,
      "loss": 0.0178,
      "step": 1379160
    },
    {
      "epoch": 2.2570583190955924,
      "grad_norm": 0.14303965866565704,
      "learning_rate": 5.5220310615894525e-06,
      "loss": 0.0084,
      "step": 1379180
    },
    {
      "epoch": 2.257091049534246,
      "grad_norm": 0.42923474311828613,
      "learning_rate": 5.521965169375936e-06,
      "loss": 0.0148,
      "step": 1379200
    },
    {
      "epoch": 2.257123779972899,
      "grad_norm": 0.30021926760673523,
      "learning_rate": 5.521899277162418e-06,
      "loss": 0.0127,
      "step": 1379220
    },
    {
      "epoch": 2.2571565104115523,
      "grad_norm": 0.6354517340660095,
      "learning_rate": 5.5218333849489016e-06,
      "loss": 0.0144,
      "step": 1379240
    },
    {
      "epoch": 2.257189240850206,
      "grad_norm": 0.2811650037765503,
      "learning_rate": 5.5217674927353835e-06,
      "loss": 0.0238,
      "step": 1379260
    },
    {
      "epoch": 2.257221971288859,
      "grad_norm": 0.361702561378479,
      "learning_rate": 5.521701600521867e-06,
      "loss": 0.0181,
      "step": 1379280
    },
    {
      "epoch": 2.2572547017275126,
      "grad_norm": 0.4889633357524872,
      "learning_rate": 5.52163570830835e-06,
      "loss": 0.0235,
      "step": 1379300
    },
    {
      "epoch": 2.257287432166166,
      "grad_norm": 0.7778149843215942,
      "learning_rate": 5.5215698160948325e-06,
      "loss": 0.0149,
      "step": 1379320
    },
    {
      "epoch": 2.2573201626048194,
      "grad_norm": 0.4084782004356384,
      "learning_rate": 5.521503923881315e-06,
      "loss": 0.0119,
      "step": 1379340
    },
    {
      "epoch": 2.2573528930434725,
      "grad_norm": 0.8321789503097534,
      "learning_rate": 5.521438031667799e-06,
      "loss": 0.0168,
      "step": 1379360
    },
    {
      "epoch": 2.2573856234821257,
      "grad_norm": 0.6577577590942383,
      "learning_rate": 5.521372139454281e-06,
      "loss": 0.0157,
      "step": 1379380
    },
    {
      "epoch": 2.2574183539207793,
      "grad_norm": 0.29079243540763855,
      "learning_rate": 5.521306247240764e-06,
      "loss": 0.0177,
      "step": 1379400
    },
    {
      "epoch": 2.2574510843594324,
      "grad_norm": 0.35943058133125305,
      "learning_rate": 5.521240355027246e-06,
      "loss": 0.0141,
      "step": 1379420
    },
    {
      "epoch": 2.257483814798086,
      "grad_norm": 0.6416173577308655,
      "learning_rate": 5.52117446281373e-06,
      "loss": 0.0133,
      "step": 1379440
    },
    {
      "epoch": 2.257516545236739,
      "grad_norm": 0.2961055636405945,
      "learning_rate": 5.521108570600212e-06,
      "loss": 0.0204,
      "step": 1379460
    },
    {
      "epoch": 2.2575492756753928,
      "grad_norm": 0.16554711759090424,
      "learning_rate": 5.521042678386695e-06,
      "loss": 0.0125,
      "step": 1379480
    },
    {
      "epoch": 2.257582006114046,
      "grad_norm": 0.3484741747379303,
      "learning_rate": 5.520976786173179e-06,
      "loss": 0.0171,
      "step": 1379500
    },
    {
      "epoch": 2.257614736552699,
      "grad_norm": 0.29103949666023254,
      "learning_rate": 5.520910893959661e-06,
      "loss": 0.0214,
      "step": 1379520
    },
    {
      "epoch": 2.2576474669913527,
      "grad_norm": 0.20993748307228088,
      "learning_rate": 5.520845001746144e-06,
      "loss": 0.0151,
      "step": 1379540
    },
    {
      "epoch": 2.257680197430006,
      "grad_norm": 0.6288726329803467,
      "learning_rate": 5.520779109532627e-06,
      "loss": 0.021,
      "step": 1379560
    },
    {
      "epoch": 2.2577129278686594,
      "grad_norm": 0.3308156728744507,
      "learning_rate": 5.52071321731911e-06,
      "loss": 0.0147,
      "step": 1379580
    },
    {
      "epoch": 2.2577456583073126,
      "grad_norm": 0.5475642681121826,
      "learning_rate": 5.5206473251055926e-06,
      "loss": 0.018,
      "step": 1379600
    },
    {
      "epoch": 2.257778388745966,
      "grad_norm": 0.5087096095085144,
      "learning_rate": 5.520581432892076e-06,
      "loss": 0.017,
      "step": 1379620
    },
    {
      "epoch": 2.2578111191846193,
      "grad_norm": 0.8455927968025208,
      "learning_rate": 5.520515540678558e-06,
      "loss": 0.0127,
      "step": 1379640
    },
    {
      "epoch": 2.2578438496232724,
      "grad_norm": 0.3479422926902771,
      "learning_rate": 5.520449648465042e-06,
      "loss": 0.014,
      "step": 1379660
    },
    {
      "epoch": 2.257876580061926,
      "grad_norm": 0.5092272758483887,
      "learning_rate": 5.5203837562515235e-06,
      "loss": 0.0193,
      "step": 1379680
    },
    {
      "epoch": 2.257909310500579,
      "grad_norm": 0.2502363920211792,
      "learning_rate": 5.520317864038007e-06,
      "loss": 0.0092,
      "step": 1379700
    },
    {
      "epoch": 2.257942040939233,
      "grad_norm": 0.29645413160324097,
      "learning_rate": 5.52025197182449e-06,
      "loss": 0.019,
      "step": 1379720
    },
    {
      "epoch": 2.257974771377886,
      "grad_norm": 0.2862427532672882,
      "learning_rate": 5.520186079610973e-06,
      "loss": 0.0101,
      "step": 1379740
    },
    {
      "epoch": 2.2580075018165395,
      "grad_norm": 0.5253151655197144,
      "learning_rate": 5.520120187397455e-06,
      "loss": 0.0156,
      "step": 1379760
    },
    {
      "epoch": 2.2580402322551927,
      "grad_norm": 1.0231456756591797,
      "learning_rate": 5.520054295183939e-06,
      "loss": 0.0283,
      "step": 1379780
    },
    {
      "epoch": 2.258072962693846,
      "grad_norm": 0.34540489315986633,
      "learning_rate": 5.519988402970421e-06,
      "loss": 0.0153,
      "step": 1379800
    },
    {
      "epoch": 2.2581056931324994,
      "grad_norm": 0.5792437195777893,
      "learning_rate": 5.519922510756904e-06,
      "loss": 0.0163,
      "step": 1379820
    },
    {
      "epoch": 2.2581384235711526,
      "grad_norm": 0.17458675801753998,
      "learning_rate": 5.519856618543388e-06,
      "loss": 0.013,
      "step": 1379840
    },
    {
      "epoch": 2.258171154009806,
      "grad_norm": 0.20003089308738708,
      "learning_rate": 5.51979072632987e-06,
      "loss": 0.0128,
      "step": 1379860
    },
    {
      "epoch": 2.2582038844484593,
      "grad_norm": 0.5495890974998474,
      "learning_rate": 5.5197248341163535e-06,
      "loss": 0.0144,
      "step": 1379880
    },
    {
      "epoch": 2.258236614887113,
      "grad_norm": 0.08605016022920609,
      "learning_rate": 5.519658941902835e-06,
      "loss": 0.0161,
      "step": 1379900
    },
    {
      "epoch": 2.258269345325766,
      "grad_norm": 0.6245831251144409,
      "learning_rate": 5.519593049689319e-06,
      "loss": 0.0169,
      "step": 1379920
    },
    {
      "epoch": 2.258302075764419,
      "grad_norm": 0.951104998588562,
      "learning_rate": 5.519527157475802e-06,
      "loss": 0.0161,
      "step": 1379940
    },
    {
      "epoch": 2.258334806203073,
      "grad_norm": 0.18009160459041595,
      "learning_rate": 5.5194612652622844e-06,
      "loss": 0.01,
      "step": 1379960
    },
    {
      "epoch": 2.258367536641726,
      "grad_norm": 0.11973721534013748,
      "learning_rate": 5.519395373048767e-06,
      "loss": 0.0157,
      "step": 1379980
    },
    {
      "epoch": 2.2584002670803796,
      "grad_norm": 0.4084687829017639,
      "learning_rate": 5.519329480835251e-06,
      "loss": 0.0262,
      "step": 1380000
    },
    {
      "epoch": 2.2584329975190327,
      "grad_norm": 1.061911940574646,
      "learning_rate": 5.519263588621733e-06,
      "loss": 0.0177,
      "step": 1380020
    },
    {
      "epoch": 2.2584657279576863,
      "grad_norm": 0.16601166129112244,
      "learning_rate": 5.519197696408216e-06,
      "loss": 0.0194,
      "step": 1380040
    },
    {
      "epoch": 2.2584984583963394,
      "grad_norm": 0.30785685777664185,
      "learning_rate": 5.519131804194698e-06,
      "loss": 0.0139,
      "step": 1380060
    },
    {
      "epoch": 2.2585311888349926,
      "grad_norm": 0.6035950779914856,
      "learning_rate": 5.519065911981182e-06,
      "loss": 0.014,
      "step": 1380080
    },
    {
      "epoch": 2.258563919273646,
      "grad_norm": 0.7444983720779419,
      "learning_rate": 5.5190000197676645e-06,
      "loss": 0.0205,
      "step": 1380100
    },
    {
      "epoch": 2.2585966497122993,
      "grad_norm": 0.17196951806545258,
      "learning_rate": 5.518934127554147e-06,
      "loss": 0.0236,
      "step": 1380120
    },
    {
      "epoch": 2.258629380150953,
      "grad_norm": 0.9188894629478455,
      "learning_rate": 5.51886823534063e-06,
      "loss": 0.0181,
      "step": 1380140
    },
    {
      "epoch": 2.258662110589606,
      "grad_norm": 0.9869362115859985,
      "learning_rate": 5.5188023431271135e-06,
      "loss": 0.0152,
      "step": 1380160
    },
    {
      "epoch": 2.2586948410282597,
      "grad_norm": 1.003691554069519,
      "learning_rate": 5.518736450913595e-06,
      "loss": 0.0178,
      "step": 1380180
    },
    {
      "epoch": 2.258727571466913,
      "grad_norm": 0.657734215259552,
      "learning_rate": 5.518670558700079e-06,
      "loss": 0.0209,
      "step": 1380200
    },
    {
      "epoch": 2.258760301905566,
      "grad_norm": 0.6164885759353638,
      "learning_rate": 5.518604666486563e-06,
      "loss": 0.0139,
      "step": 1380220
    },
    {
      "epoch": 2.2587930323442196,
      "grad_norm": 0.11034835875034332,
      "learning_rate": 5.5185387742730445e-06,
      "loss": 0.0172,
      "step": 1380240
    },
    {
      "epoch": 2.2588257627828727,
      "grad_norm": 0.5040570497512817,
      "learning_rate": 5.518472882059528e-06,
      "loss": 0.0114,
      "step": 1380260
    },
    {
      "epoch": 2.2588584932215263,
      "grad_norm": 0.8466250896453857,
      "learning_rate": 5.51840698984601e-06,
      "loss": 0.0174,
      "step": 1380280
    },
    {
      "epoch": 2.2588912236601795,
      "grad_norm": 0.41267821192741394,
      "learning_rate": 5.5183410976324935e-06,
      "loss": 0.0158,
      "step": 1380300
    },
    {
      "epoch": 2.258923954098833,
      "grad_norm": 0.4708031713962555,
      "learning_rate": 5.518275205418976e-06,
      "loss": 0.0163,
      "step": 1380320
    },
    {
      "epoch": 2.258956684537486,
      "grad_norm": 0.49440157413482666,
      "learning_rate": 5.518209313205459e-06,
      "loss": 0.0138,
      "step": 1380340
    },
    {
      "epoch": 2.2589894149761394,
      "grad_norm": 0.1891622543334961,
      "learning_rate": 5.518143420991942e-06,
      "loss": 0.012,
      "step": 1380360
    },
    {
      "epoch": 2.259022145414793,
      "grad_norm": 0.3259337246417999,
      "learning_rate": 5.518077528778425e-06,
      "loss": 0.014,
      "step": 1380380
    },
    {
      "epoch": 2.259054875853446,
      "grad_norm": 0.28704991936683655,
      "learning_rate": 5.518011636564907e-06,
      "loss": 0.0177,
      "step": 1380400
    },
    {
      "epoch": 2.2590876062920997,
      "grad_norm": 0.1615123599767685,
      "learning_rate": 5.517945744351391e-06,
      "loss": 0.0157,
      "step": 1380420
    },
    {
      "epoch": 2.259120336730753,
      "grad_norm": 0.3988097012042999,
      "learning_rate": 5.517879852137873e-06,
      "loss": 0.0104,
      "step": 1380440
    },
    {
      "epoch": 2.2591530671694064,
      "grad_norm": 0.3134998679161072,
      "learning_rate": 5.517813959924356e-06,
      "loss": 0.0083,
      "step": 1380460
    },
    {
      "epoch": 2.2591857976080596,
      "grad_norm": 0.3881932199001312,
      "learning_rate": 5.517748067710838e-06,
      "loss": 0.0228,
      "step": 1380480
    },
    {
      "epoch": 2.2592185280467127,
      "grad_norm": 0.7561629414558411,
      "learning_rate": 5.517682175497322e-06,
      "loss": 0.0181,
      "step": 1380500
    },
    {
      "epoch": 2.2592512584853663,
      "grad_norm": 0.7752137184143066,
      "learning_rate": 5.5176162832838045e-06,
      "loss": 0.0173,
      "step": 1380520
    },
    {
      "epoch": 2.2592839889240195,
      "grad_norm": 0.1918238401412964,
      "learning_rate": 5.517550391070287e-06,
      "loss": 0.0104,
      "step": 1380540
    },
    {
      "epoch": 2.259316719362673,
      "grad_norm": 0.3189505934715271,
      "learning_rate": 5.517484498856771e-06,
      "loss": 0.0186,
      "step": 1380560
    },
    {
      "epoch": 2.2593494498013262,
      "grad_norm": 0.5874441266059875,
      "learning_rate": 5.517418606643254e-06,
      "loss": 0.0123,
      "step": 1380580
    },
    {
      "epoch": 2.2593821802399794,
      "grad_norm": 0.10067584365606308,
      "learning_rate": 5.517352714429736e-06,
      "loss": 0.0113,
      "step": 1380600
    },
    {
      "epoch": 2.259414910678633,
      "grad_norm": 0.5448469519615173,
      "learning_rate": 5.517286822216219e-06,
      "loss": 0.019,
      "step": 1380620
    },
    {
      "epoch": 2.259447641117286,
      "grad_norm": 1.1837663650512695,
      "learning_rate": 5.517220930002703e-06,
      "loss": 0.0225,
      "step": 1380640
    },
    {
      "epoch": 2.2594803715559397,
      "grad_norm": 0.371956467628479,
      "learning_rate": 5.5171550377891846e-06,
      "loss": 0.01,
      "step": 1380660
    },
    {
      "epoch": 2.259513101994593,
      "grad_norm": 0.12283995002508163,
      "learning_rate": 5.517089145575668e-06,
      "loss": 0.0096,
      "step": 1380680
    },
    {
      "epoch": 2.259545832433246,
      "grad_norm": 0.5007738471031189,
      "learning_rate": 5.51702325336215e-06,
      "loss": 0.0175,
      "step": 1380700
    },
    {
      "epoch": 2.2595785628718996,
      "grad_norm": 0.04596629738807678,
      "learning_rate": 5.516957361148634e-06,
      "loss": 0.0132,
      "step": 1380720
    },
    {
      "epoch": 2.2596112933105528,
      "grad_norm": 0.5693603157997131,
      "learning_rate": 5.516891468935116e-06,
      "loss": 0.0204,
      "step": 1380740
    },
    {
      "epoch": 2.2596440237492064,
      "grad_norm": 0.359920471906662,
      "learning_rate": 5.516825576721599e-06,
      "loss": 0.0114,
      "step": 1380760
    },
    {
      "epoch": 2.2596767541878595,
      "grad_norm": 0.2405117005109787,
      "learning_rate": 5.516759684508082e-06,
      "loss": 0.0112,
      "step": 1380780
    },
    {
      "epoch": 2.259709484626513,
      "grad_norm": 0.46208077669143677,
      "learning_rate": 5.5166937922945654e-06,
      "loss": 0.0168,
      "step": 1380800
    },
    {
      "epoch": 2.2597422150651663,
      "grad_norm": 0.8423780798912048,
      "learning_rate": 5.516627900081047e-06,
      "loss": 0.0186,
      "step": 1380820
    },
    {
      "epoch": 2.2597749455038194,
      "grad_norm": 0.06026335805654526,
      "learning_rate": 5.516562007867531e-06,
      "loss": 0.0171,
      "step": 1380840
    },
    {
      "epoch": 2.259807675942473,
      "grad_norm": 0.19927260279655457,
      "learning_rate": 5.516496115654013e-06,
      "loss": 0.0099,
      "step": 1380860
    },
    {
      "epoch": 2.259840406381126,
      "grad_norm": 0.7758569717407227,
      "learning_rate": 5.516430223440496e-06,
      "loss": 0.019,
      "step": 1380880
    },
    {
      "epoch": 2.2598731368197797,
      "grad_norm": 0.16390618681907654,
      "learning_rate": 5.51636433122698e-06,
      "loss": 0.0109,
      "step": 1380900
    },
    {
      "epoch": 2.259905867258433,
      "grad_norm": 0.09835391491651535,
      "learning_rate": 5.516298439013462e-06,
      "loss": 0.0133,
      "step": 1380920
    },
    {
      "epoch": 2.2599385976970865,
      "grad_norm": 0.7741190791130066,
      "learning_rate": 5.5162325467999455e-06,
      "loss": 0.0196,
      "step": 1380940
    },
    {
      "epoch": 2.2599713281357396,
      "grad_norm": 0.5503905415534973,
      "learning_rate": 5.516166654586428e-06,
      "loss": 0.0157,
      "step": 1380960
    },
    {
      "epoch": 2.260004058574393,
      "grad_norm": 0.48243194818496704,
      "learning_rate": 5.516100762372911e-06,
      "loss": 0.0212,
      "step": 1380980
    },
    {
      "epoch": 2.2600367890130464,
      "grad_norm": 0.2812945246696472,
      "learning_rate": 5.516034870159394e-06,
      "loss": 0.0125,
      "step": 1381000
    },
    {
      "epoch": 2.2600695194516995,
      "grad_norm": 0.6173235177993774,
      "learning_rate": 5.515968977945877e-06,
      "loss": 0.0125,
      "step": 1381020
    },
    {
      "epoch": 2.260102249890353,
      "grad_norm": 0.5895582437515259,
      "learning_rate": 5.515903085732359e-06,
      "loss": 0.0116,
      "step": 1381040
    },
    {
      "epoch": 2.2601349803290063,
      "grad_norm": 0.1347876638174057,
      "learning_rate": 5.515837193518843e-06,
      "loss": 0.0177,
      "step": 1381060
    },
    {
      "epoch": 2.26016771076766,
      "grad_norm": 0.968072772026062,
      "learning_rate": 5.515771301305325e-06,
      "loss": 0.0146,
      "step": 1381080
    },
    {
      "epoch": 2.260200441206313,
      "grad_norm": 0.22341693937778473,
      "learning_rate": 5.515705409091808e-06,
      "loss": 0.0135,
      "step": 1381100
    },
    {
      "epoch": 2.260233171644966,
      "grad_norm": 0.41171005368232727,
      "learning_rate": 5.515639516878291e-06,
      "loss": 0.0079,
      "step": 1381120
    },
    {
      "epoch": 2.2602659020836198,
      "grad_norm": 0.4052664637565613,
      "learning_rate": 5.515573624664774e-06,
      "loss": 0.0136,
      "step": 1381140
    },
    {
      "epoch": 2.260298632522273,
      "grad_norm": 0.7065693140029907,
      "learning_rate": 5.5155077324512564e-06,
      "loss": 0.0173,
      "step": 1381160
    },
    {
      "epoch": 2.2603313629609265,
      "grad_norm": 0.039557259529829025,
      "learning_rate": 5.51544184023774e-06,
      "loss": 0.0146,
      "step": 1381180
    },
    {
      "epoch": 2.2603640933995797,
      "grad_norm": 0.058754418045282364,
      "learning_rate": 5.515375948024222e-06,
      "loss": 0.0143,
      "step": 1381200
    },
    {
      "epoch": 2.2603968238382333,
      "grad_norm": 0.4534013271331787,
      "learning_rate": 5.5153100558107055e-06,
      "loss": 0.0192,
      "step": 1381220
    },
    {
      "epoch": 2.2604295542768864,
      "grad_norm": 0.32507067918777466,
      "learning_rate": 5.515244163597187e-06,
      "loss": 0.0202,
      "step": 1381240
    },
    {
      "epoch": 2.2604622847155396,
      "grad_norm": 0.29807019233703613,
      "learning_rate": 5.515178271383671e-06,
      "loss": 0.02,
      "step": 1381260
    },
    {
      "epoch": 2.260495015154193,
      "grad_norm": 0.8986808061599731,
      "learning_rate": 5.5151123791701546e-06,
      "loss": 0.025,
      "step": 1381280
    },
    {
      "epoch": 2.2605277455928463,
      "grad_norm": 0.23714600503444672,
      "learning_rate": 5.5150464869566365e-06,
      "loss": 0.0121,
      "step": 1381300
    },
    {
      "epoch": 2.2605604760315,
      "grad_norm": 1.043939471244812,
      "learning_rate": 5.51498059474312e-06,
      "loss": 0.0208,
      "step": 1381320
    },
    {
      "epoch": 2.260593206470153,
      "grad_norm": 0.28850704431533813,
      "learning_rate": 5.514914702529603e-06,
      "loss": 0.0159,
      "step": 1381340
    },
    {
      "epoch": 2.2606259369088066,
      "grad_norm": 0.07828773558139801,
      "learning_rate": 5.5148488103160855e-06,
      "loss": 0.0112,
      "step": 1381360
    },
    {
      "epoch": 2.26065866734746,
      "grad_norm": 0.24365638196468353,
      "learning_rate": 5.514782918102568e-06,
      "loss": 0.0118,
      "step": 1381380
    },
    {
      "epoch": 2.260691397786113,
      "grad_norm": 0.4265572726726532,
      "learning_rate": 5.514717025889052e-06,
      "loss": 0.0166,
      "step": 1381400
    },
    {
      "epoch": 2.2607241282247665,
      "grad_norm": 0.14384599030017853,
      "learning_rate": 5.514651133675534e-06,
      "loss": 0.0167,
      "step": 1381420
    },
    {
      "epoch": 2.2607568586634197,
      "grad_norm": 0.38616102933883667,
      "learning_rate": 5.514585241462017e-06,
      "loss": 0.0113,
      "step": 1381440
    },
    {
      "epoch": 2.2607895891020733,
      "grad_norm": 0.17083655297756195,
      "learning_rate": 5.514519349248499e-06,
      "loss": 0.0201,
      "step": 1381460
    },
    {
      "epoch": 2.2608223195407264,
      "grad_norm": 0.7329193353652954,
      "learning_rate": 5.514453457034983e-06,
      "loss": 0.0143,
      "step": 1381480
    },
    {
      "epoch": 2.26085504997938,
      "grad_norm": 0.1777430772781372,
      "learning_rate": 5.514387564821465e-06,
      "loss": 0.0172,
      "step": 1381500
    },
    {
      "epoch": 2.260887780418033,
      "grad_norm": 0.12207257747650146,
      "learning_rate": 5.514321672607948e-06,
      "loss": 0.014,
      "step": 1381520
    },
    {
      "epoch": 2.2609205108566863,
      "grad_norm": 0.09226352721452713,
      "learning_rate": 5.514255780394431e-06,
      "loss": 0.0102,
      "step": 1381540
    },
    {
      "epoch": 2.26095324129534,
      "grad_norm": 0.10525663197040558,
      "learning_rate": 5.514189888180914e-06,
      "loss": 0.0126,
      "step": 1381560
    },
    {
      "epoch": 2.260985971733993,
      "grad_norm": 0.7709380984306335,
      "learning_rate": 5.5141239959673965e-06,
      "loss": 0.0158,
      "step": 1381580
    },
    {
      "epoch": 2.2610187021726467,
      "grad_norm": 0.06450388580560684,
      "learning_rate": 5.51405810375388e-06,
      "loss": 0.0174,
      "step": 1381600
    },
    {
      "epoch": 2.2610514326113,
      "grad_norm": 0.11074749380350113,
      "learning_rate": 5.513992211540363e-06,
      "loss": 0.0186,
      "step": 1381620
    },
    {
      "epoch": 2.2610841630499534,
      "grad_norm": 0.16496041417121887,
      "learning_rate": 5.513926319326846e-06,
      "loss": 0.0136,
      "step": 1381640
    },
    {
      "epoch": 2.2611168934886066,
      "grad_norm": 0.25215083360671997,
      "learning_rate": 5.513860427113329e-06,
      "loss": 0.0163,
      "step": 1381660
    },
    {
      "epoch": 2.2611496239272597,
      "grad_norm": 0.5650777220726013,
      "learning_rate": 5.513794534899811e-06,
      "loss": 0.0159,
      "step": 1381680
    },
    {
      "epoch": 2.2611823543659133,
      "grad_norm": 0.3694230616092682,
      "learning_rate": 5.513728642686295e-06,
      "loss": 0.0175,
      "step": 1381700
    },
    {
      "epoch": 2.2612150848045665,
      "grad_norm": 0.27958986163139343,
      "learning_rate": 5.5136627504727765e-06,
      "loss": 0.0111,
      "step": 1381720
    },
    {
      "epoch": 2.26124781524322,
      "grad_norm": 0.1511603444814682,
      "learning_rate": 5.51359685825926e-06,
      "loss": 0.0209,
      "step": 1381740
    },
    {
      "epoch": 2.261280545681873,
      "grad_norm": 0.17348763346672058,
      "learning_rate": 5.513530966045743e-06,
      "loss": 0.0178,
      "step": 1381760
    },
    {
      "epoch": 2.261313276120527,
      "grad_norm": 0.0778634324669838,
      "learning_rate": 5.513465073832226e-06,
      "loss": 0.0122,
      "step": 1381780
    },
    {
      "epoch": 2.26134600655918,
      "grad_norm": 0.1694624125957489,
      "learning_rate": 5.513399181618708e-06,
      "loss": 0.0141,
      "step": 1381800
    },
    {
      "epoch": 2.261378736997833,
      "grad_norm": 0.2170196771621704,
      "learning_rate": 5.513333289405192e-06,
      "loss": 0.0178,
      "step": 1381820
    },
    {
      "epoch": 2.2614114674364867,
      "grad_norm": 0.20255149900913239,
      "learning_rate": 5.513267397191674e-06,
      "loss": 0.014,
      "step": 1381840
    },
    {
      "epoch": 2.26144419787514,
      "grad_norm": 0.20400160551071167,
      "learning_rate": 5.513201504978157e-06,
      "loss": 0.0138,
      "step": 1381860
    },
    {
      "epoch": 2.2614769283137934,
      "grad_norm": 0.5530469417572021,
      "learning_rate": 5.513135612764639e-06,
      "loss": 0.0146,
      "step": 1381880
    },
    {
      "epoch": 2.2615096587524466,
      "grad_norm": 0.4524308741092682,
      "learning_rate": 5.513069720551123e-06,
      "loss": 0.0204,
      "step": 1381900
    },
    {
      "epoch": 2.2615423891911,
      "grad_norm": 0.8351925015449524,
      "learning_rate": 5.513003828337606e-06,
      "loss": 0.013,
      "step": 1381920
    },
    {
      "epoch": 2.2615751196297533,
      "grad_norm": 0.7688452005386353,
      "learning_rate": 5.512937936124088e-06,
      "loss": 0.0096,
      "step": 1381940
    },
    {
      "epoch": 2.2616078500684065,
      "grad_norm": 1.0005359649658203,
      "learning_rate": 5.512872043910572e-06,
      "loss": 0.0225,
      "step": 1381960
    },
    {
      "epoch": 2.26164058050706,
      "grad_norm": 0.6995636820793152,
      "learning_rate": 5.512806151697055e-06,
      "loss": 0.0175,
      "step": 1381980
    },
    {
      "epoch": 2.2616733109457132,
      "grad_norm": 0.6268848180770874,
      "learning_rate": 5.5127402594835374e-06,
      "loss": 0.0193,
      "step": 1382000
    },
    {
      "epoch": 2.261706041384367,
      "grad_norm": 0.1476610153913498,
      "learning_rate": 5.51267436727002e-06,
      "loss": 0.013,
      "step": 1382020
    },
    {
      "epoch": 2.26173877182302,
      "grad_norm": 0.22472116351127625,
      "learning_rate": 5.512608475056504e-06,
      "loss": 0.0148,
      "step": 1382040
    },
    {
      "epoch": 2.261771502261673,
      "grad_norm": 0.5203503966331482,
      "learning_rate": 5.512542582842986e-06,
      "loss": 0.013,
      "step": 1382060
    },
    {
      "epoch": 2.2618042327003267,
      "grad_norm": 0.28120455145835876,
      "learning_rate": 5.512476690629469e-06,
      "loss": 0.0222,
      "step": 1382080
    },
    {
      "epoch": 2.26183696313898,
      "grad_norm": 0.600325882434845,
      "learning_rate": 5.512410798415951e-06,
      "loss": 0.0139,
      "step": 1382100
    },
    {
      "epoch": 2.2618696935776335,
      "grad_norm": 0.16156938672065735,
      "learning_rate": 5.512344906202435e-06,
      "loss": 0.0147,
      "step": 1382120
    },
    {
      "epoch": 2.2619024240162866,
      "grad_norm": 0.31007179617881775,
      "learning_rate": 5.5122790139889175e-06,
      "loss": 0.0157,
      "step": 1382140
    },
    {
      "epoch": 2.2619351544549398,
      "grad_norm": 0.311098575592041,
      "learning_rate": 5.5122131217754e-06,
      "loss": 0.0138,
      "step": 1382160
    },
    {
      "epoch": 2.2619678848935934,
      "grad_norm": 0.21058039367198944,
      "learning_rate": 5.512147229561883e-06,
      "loss": 0.0106,
      "step": 1382180
    },
    {
      "epoch": 2.2620006153322465,
      "grad_norm": 0.20488937199115753,
      "learning_rate": 5.5120813373483665e-06,
      "loss": 0.0097,
      "step": 1382200
    },
    {
      "epoch": 2.2620333457709,
      "grad_norm": 0.8381925225257874,
      "learning_rate": 5.512015445134848e-06,
      "loss": 0.0167,
      "step": 1382220
    },
    {
      "epoch": 2.2620660762095532,
      "grad_norm": 0.2656744420528412,
      "learning_rate": 5.511949552921332e-06,
      "loss": 0.0103,
      "step": 1382240
    },
    {
      "epoch": 2.262098806648207,
      "grad_norm": 0.22461596131324768,
      "learning_rate": 5.511883660707814e-06,
      "loss": 0.0177,
      "step": 1382260
    },
    {
      "epoch": 2.26213153708686,
      "grad_norm": 0.10513469576835632,
      "learning_rate": 5.5118177684942975e-06,
      "loss": 0.0163,
      "step": 1382280
    },
    {
      "epoch": 2.262164267525513,
      "grad_norm": 0.38598352670669556,
      "learning_rate": 5.511751876280781e-06,
      "loss": 0.0149,
      "step": 1382300
    },
    {
      "epoch": 2.2621969979641667,
      "grad_norm": 0.3099033236503601,
      "learning_rate": 5.511685984067263e-06,
      "loss": 0.0195,
      "step": 1382320
    },
    {
      "epoch": 2.26222972840282,
      "grad_norm": 1.0321170091629028,
      "learning_rate": 5.5116200918537466e-06,
      "loss": 0.016,
      "step": 1382340
    },
    {
      "epoch": 2.2622624588414735,
      "grad_norm": 0.32070162892341614,
      "learning_rate": 5.5115541996402284e-06,
      "loss": 0.0176,
      "step": 1382360
    },
    {
      "epoch": 2.2622951892801266,
      "grad_norm": 0.21786518394947052,
      "learning_rate": 5.511488307426712e-06,
      "loss": 0.0173,
      "step": 1382380
    },
    {
      "epoch": 2.2623279197187802,
      "grad_norm": 0.16133564710617065,
      "learning_rate": 5.511422415213195e-06,
      "loss": 0.0157,
      "step": 1382400
    },
    {
      "epoch": 2.2623606501574334,
      "grad_norm": 0.26862841844558716,
      "learning_rate": 5.511356522999678e-06,
      "loss": 0.0173,
      "step": 1382420
    },
    {
      "epoch": 2.2623933805960865,
      "grad_norm": 0.6468318104743958,
      "learning_rate": 5.51129063078616e-06,
      "loss": 0.0192,
      "step": 1382440
    },
    {
      "epoch": 2.26242611103474,
      "grad_norm": 0.8369753956794739,
      "learning_rate": 5.511224738572644e-06,
      "loss": 0.0157,
      "step": 1382460
    },
    {
      "epoch": 2.2624588414733933,
      "grad_norm": 0.06697771698236465,
      "learning_rate": 5.511158846359126e-06,
      "loss": 0.0143,
      "step": 1382480
    },
    {
      "epoch": 2.262491571912047,
      "grad_norm": 0.31954270601272583,
      "learning_rate": 5.511092954145609e-06,
      "loss": 0.0137,
      "step": 1382500
    },
    {
      "epoch": 2.2625243023507,
      "grad_norm": 0.4432500898838043,
      "learning_rate": 5.511027061932091e-06,
      "loss": 0.0137,
      "step": 1382520
    },
    {
      "epoch": 2.2625570327893536,
      "grad_norm": 0.7300830483436584,
      "learning_rate": 5.510961169718575e-06,
      "loss": 0.0194,
      "step": 1382540
    },
    {
      "epoch": 2.2625897632280068,
      "grad_norm": 0.6523856520652771,
      "learning_rate": 5.5108952775050575e-06,
      "loss": 0.0179,
      "step": 1382560
    },
    {
      "epoch": 2.26262249366666,
      "grad_norm": 0.3783529996871948,
      "learning_rate": 5.51082938529154e-06,
      "loss": 0.0124,
      "step": 1382580
    },
    {
      "epoch": 2.2626552241053135,
      "grad_norm": 0.4273749589920044,
      "learning_rate": 5.510763493078023e-06,
      "loss": 0.0131,
      "step": 1382600
    },
    {
      "epoch": 2.2626879545439667,
      "grad_norm": 0.29621341824531555,
      "learning_rate": 5.510697600864507e-06,
      "loss": 0.0105,
      "step": 1382620
    },
    {
      "epoch": 2.2627206849826202,
      "grad_norm": 0.3894466757774353,
      "learning_rate": 5.5106317086509885e-06,
      "loss": 0.0121,
      "step": 1382640
    },
    {
      "epoch": 2.2627534154212734,
      "grad_norm": 0.8638238310813904,
      "learning_rate": 5.510565816437472e-06,
      "loss": 0.0178,
      "step": 1382660
    },
    {
      "epoch": 2.262786145859927,
      "grad_norm": 0.40322235226631165,
      "learning_rate": 5.510499924223956e-06,
      "loss": 0.0115,
      "step": 1382680
    },
    {
      "epoch": 2.26281887629858,
      "grad_norm": 1.5213358402252197,
      "learning_rate": 5.5104340320104376e-06,
      "loss": 0.0275,
      "step": 1382700
    },
    {
      "epoch": 2.2628516067372333,
      "grad_norm": 3.6349949836730957,
      "learning_rate": 5.510368139796921e-06,
      "loss": 0.0182,
      "step": 1382720
    },
    {
      "epoch": 2.262884337175887,
      "grad_norm": 0.21309097111225128,
      "learning_rate": 5.510302247583403e-06,
      "loss": 0.0089,
      "step": 1382740
    },
    {
      "epoch": 2.26291706761454,
      "grad_norm": 0.6295374631881714,
      "learning_rate": 5.510236355369887e-06,
      "loss": 0.0167,
      "step": 1382760
    },
    {
      "epoch": 2.2629497980531936,
      "grad_norm": 0.7149977087974548,
      "learning_rate": 5.510170463156369e-06,
      "loss": 0.0189,
      "step": 1382780
    },
    {
      "epoch": 2.262982528491847,
      "grad_norm": 0.4312871992588043,
      "learning_rate": 5.510104570942852e-06,
      "loss": 0.0126,
      "step": 1382800
    },
    {
      "epoch": 2.2630152589305004,
      "grad_norm": 0.2080356478691101,
      "learning_rate": 5.510038678729335e-06,
      "loss": 0.0168,
      "step": 1382820
    },
    {
      "epoch": 2.2630479893691535,
      "grad_norm": 0.24894295632839203,
      "learning_rate": 5.5099727865158184e-06,
      "loss": 0.0167,
      "step": 1382840
    },
    {
      "epoch": 2.2630807198078067,
      "grad_norm": 0.2594698369503021,
      "learning_rate": 5.5099068943023e-06,
      "loss": 0.0197,
      "step": 1382860
    },
    {
      "epoch": 2.2631134502464603,
      "grad_norm": 0.36796632409095764,
      "learning_rate": 5.509841002088784e-06,
      "loss": 0.0168,
      "step": 1382880
    },
    {
      "epoch": 2.2631461806851134,
      "grad_norm": 0.42574504017829895,
      "learning_rate": 5.509775109875266e-06,
      "loss": 0.0127,
      "step": 1382900
    },
    {
      "epoch": 2.263178911123767,
      "grad_norm": 0.3112778067588806,
      "learning_rate": 5.509709217661749e-06,
      "loss": 0.0172,
      "step": 1382920
    },
    {
      "epoch": 2.26321164156242,
      "grad_norm": 0.3109251856803894,
      "learning_rate": 5.509643325448232e-06,
      "loss": 0.0183,
      "step": 1382940
    },
    {
      "epoch": 2.2632443720010738,
      "grad_norm": 0.4996525049209595,
      "learning_rate": 5.509577433234715e-06,
      "loss": 0.0068,
      "step": 1382960
    },
    {
      "epoch": 2.263277102439727,
      "grad_norm": 1.0479562282562256,
      "learning_rate": 5.509511541021198e-06,
      "loss": 0.0186,
      "step": 1382980
    },
    {
      "epoch": 2.26330983287838,
      "grad_norm": 0.22115793824195862,
      "learning_rate": 5.509445648807681e-06,
      "loss": 0.0135,
      "step": 1383000
    },
    {
      "epoch": 2.2633425633170337,
      "grad_norm": 0.8581087589263916,
      "learning_rate": 5.509379756594164e-06,
      "loss": 0.0136,
      "step": 1383020
    },
    {
      "epoch": 2.263375293755687,
      "grad_norm": 0.22873006761074066,
      "learning_rate": 5.509313864380647e-06,
      "loss": 0.0176,
      "step": 1383040
    },
    {
      "epoch": 2.2634080241943404,
      "grad_norm": 0.7248703837394714,
      "learning_rate": 5.50924797216713e-06,
      "loss": 0.0201,
      "step": 1383060
    },
    {
      "epoch": 2.2634407546329935,
      "grad_norm": 0.216270312666893,
      "learning_rate": 5.509182079953612e-06,
      "loss": 0.0218,
      "step": 1383080
    },
    {
      "epoch": 2.263473485071647,
      "grad_norm": 0.41233688592910767,
      "learning_rate": 5.509116187740096e-06,
      "loss": 0.0111,
      "step": 1383100
    },
    {
      "epoch": 2.2635062155103003,
      "grad_norm": 1.1190192699432373,
      "learning_rate": 5.509050295526578e-06,
      "loss": 0.0162,
      "step": 1383120
    },
    {
      "epoch": 2.2635389459489534,
      "grad_norm": 0.3223741948604584,
      "learning_rate": 5.508984403313061e-06,
      "loss": 0.0153,
      "step": 1383140
    },
    {
      "epoch": 2.263571676387607,
      "grad_norm": 0.26818984746932983,
      "learning_rate": 5.508918511099544e-06,
      "loss": 0.0173,
      "step": 1383160
    },
    {
      "epoch": 2.26360440682626,
      "grad_norm": 0.6730549931526184,
      "learning_rate": 5.508852618886027e-06,
      "loss": 0.0139,
      "step": 1383180
    },
    {
      "epoch": 2.263637137264914,
      "grad_norm": 0.1515108346939087,
      "learning_rate": 5.5087867266725094e-06,
      "loss": 0.0186,
      "step": 1383200
    },
    {
      "epoch": 2.263669867703567,
      "grad_norm": 0.3430039882659912,
      "learning_rate": 5.508720834458993e-06,
      "loss": 0.0178,
      "step": 1383220
    },
    {
      "epoch": 2.2637025981422205,
      "grad_norm": 0.3168618679046631,
      "learning_rate": 5.508654942245475e-06,
      "loss": 0.0161,
      "step": 1383240
    },
    {
      "epoch": 2.2637353285808737,
      "grad_norm": 0.41165539622306824,
      "learning_rate": 5.5085890500319585e-06,
      "loss": 0.0104,
      "step": 1383260
    },
    {
      "epoch": 2.263768059019527,
      "grad_norm": 0.1877480298280716,
      "learning_rate": 5.50852315781844e-06,
      "loss": 0.011,
      "step": 1383280
    },
    {
      "epoch": 2.2638007894581804,
      "grad_norm": 0.5048146843910217,
      "learning_rate": 5.508457265604924e-06,
      "loss": 0.0144,
      "step": 1383300
    },
    {
      "epoch": 2.2638335198968336,
      "grad_norm": 0.6737447381019592,
      "learning_rate": 5.508391373391406e-06,
      "loss": 0.0168,
      "step": 1383320
    },
    {
      "epoch": 2.263866250335487,
      "grad_norm": 0.13238570094108582,
      "learning_rate": 5.5083254811778895e-06,
      "loss": 0.0209,
      "step": 1383340
    },
    {
      "epoch": 2.2638989807741403,
      "grad_norm": 1.1275780200958252,
      "learning_rate": 5.508259588964373e-06,
      "loss": 0.0179,
      "step": 1383360
    },
    {
      "epoch": 2.263931711212794,
      "grad_norm": 0.312818318605423,
      "learning_rate": 5.508193696750855e-06,
      "loss": 0.015,
      "step": 1383380
    },
    {
      "epoch": 2.263964441651447,
      "grad_norm": 0.8035797476768494,
      "learning_rate": 5.5081278045373385e-06,
      "loss": 0.0135,
      "step": 1383400
    },
    {
      "epoch": 2.2639971720901,
      "grad_norm": 0.6169087290763855,
      "learning_rate": 5.508061912323821e-06,
      "loss": 0.0263,
      "step": 1383420
    },
    {
      "epoch": 2.264029902528754,
      "grad_norm": 0.6157916188240051,
      "learning_rate": 5.507996020110304e-06,
      "loss": 0.0121,
      "step": 1383440
    },
    {
      "epoch": 2.264062632967407,
      "grad_norm": 0.046911194920539856,
      "learning_rate": 5.507930127896787e-06,
      "loss": 0.0144,
      "step": 1383460
    },
    {
      "epoch": 2.2640953634060605,
      "grad_norm": 0.8479822874069214,
      "learning_rate": 5.50786423568327e-06,
      "loss": 0.0139,
      "step": 1383480
    },
    {
      "epoch": 2.2641280938447137,
      "grad_norm": 0.059343576431274414,
      "learning_rate": 5.507798343469752e-06,
      "loss": 0.0173,
      "step": 1383500
    },
    {
      "epoch": 2.2641608242833673,
      "grad_norm": 1.2414374351501465,
      "learning_rate": 5.507732451256236e-06,
      "loss": 0.0146,
      "step": 1383520
    },
    {
      "epoch": 2.2641935547220204,
      "grad_norm": 0.5066683888435364,
      "learning_rate": 5.507666559042718e-06,
      "loss": 0.0139,
      "step": 1383540
    },
    {
      "epoch": 2.2642262851606736,
      "grad_norm": 0.2960242033004761,
      "learning_rate": 5.507600666829201e-06,
      "loss": 0.0131,
      "step": 1383560
    },
    {
      "epoch": 2.264259015599327,
      "grad_norm": 0.16840243339538574,
      "learning_rate": 5.507534774615684e-06,
      "loss": 0.0164,
      "step": 1383580
    },
    {
      "epoch": 2.2642917460379803,
      "grad_norm": 0.5853352546691895,
      "learning_rate": 5.507468882402167e-06,
      "loss": 0.0161,
      "step": 1383600
    },
    {
      "epoch": 2.264324476476634,
      "grad_norm": 0.2912314534187317,
      "learning_rate": 5.5074029901886495e-06,
      "loss": 0.0127,
      "step": 1383620
    },
    {
      "epoch": 2.264357206915287,
      "grad_norm": 0.14127184450626373,
      "learning_rate": 5.507337097975133e-06,
      "loss": 0.0127,
      "step": 1383640
    },
    {
      "epoch": 2.2643899373539402,
      "grad_norm": 0.1706053763628006,
      "learning_rate": 5.507271205761615e-06,
      "loss": 0.0154,
      "step": 1383660
    },
    {
      "epoch": 2.264422667792594,
      "grad_norm": 0.5060194730758667,
      "learning_rate": 5.507205313548099e-06,
      "loss": 0.0105,
      "step": 1383680
    },
    {
      "epoch": 2.264455398231247,
      "grad_norm": 0.6475546360015869,
      "learning_rate": 5.5071394213345805e-06,
      "loss": 0.0151,
      "step": 1383700
    },
    {
      "epoch": 2.2644881286699006,
      "grad_norm": 0.7604541778564453,
      "learning_rate": 5.507073529121064e-06,
      "loss": 0.0218,
      "step": 1383720
    },
    {
      "epoch": 2.2645208591085537,
      "grad_norm": 0.2076081782579422,
      "learning_rate": 5.507007636907548e-06,
      "loss": 0.0168,
      "step": 1383740
    },
    {
      "epoch": 2.264553589547207,
      "grad_norm": 0.2796667218208313,
      "learning_rate": 5.5069417446940295e-06,
      "loss": 0.0105,
      "step": 1383760
    },
    {
      "epoch": 2.2645863199858605,
      "grad_norm": 0.6490652561187744,
      "learning_rate": 5.506875852480513e-06,
      "loss": 0.019,
      "step": 1383780
    },
    {
      "epoch": 2.2646190504245136,
      "grad_norm": 0.08659671247005463,
      "learning_rate": 5.506809960266996e-06,
      "loss": 0.0163,
      "step": 1383800
    },
    {
      "epoch": 2.264651780863167,
      "grad_norm": 0.166873499751091,
      "learning_rate": 5.506744068053479e-06,
      "loss": 0.0112,
      "step": 1383820
    },
    {
      "epoch": 2.2646845113018204,
      "grad_norm": 0.29198578000068665,
      "learning_rate": 5.506678175839961e-06,
      "loss": 0.0114,
      "step": 1383840
    },
    {
      "epoch": 2.264717241740474,
      "grad_norm": 0.45490574836730957,
      "learning_rate": 5.506612283626445e-06,
      "loss": 0.0205,
      "step": 1383860
    },
    {
      "epoch": 2.264749972179127,
      "grad_norm": 0.2431684285402298,
      "learning_rate": 5.506546391412927e-06,
      "loss": 0.0149,
      "step": 1383880
    },
    {
      "epoch": 2.2647827026177803,
      "grad_norm": 0.6991469860076904,
      "learning_rate": 5.50648049919941e-06,
      "loss": 0.0101,
      "step": 1383900
    },
    {
      "epoch": 2.264815433056434,
      "grad_norm": 0.07281886786222458,
      "learning_rate": 5.506414606985892e-06,
      "loss": 0.0142,
      "step": 1383920
    },
    {
      "epoch": 2.264848163495087,
      "grad_norm": 0.3853333294391632,
      "learning_rate": 5.506348714772376e-06,
      "loss": 0.0165,
      "step": 1383940
    },
    {
      "epoch": 2.2648808939337406,
      "grad_norm": 1.1181914806365967,
      "learning_rate": 5.506282822558859e-06,
      "loss": 0.0193,
      "step": 1383960
    },
    {
      "epoch": 2.2649136243723937,
      "grad_norm": 0.4596902132034302,
      "learning_rate": 5.506216930345341e-06,
      "loss": 0.0114,
      "step": 1383980
    },
    {
      "epoch": 2.2649463548110473,
      "grad_norm": 0.8862775564193726,
      "learning_rate": 5.506151038131824e-06,
      "loss": 0.0181,
      "step": 1384000
    },
    {
      "epoch": 2.2649790852497005,
      "grad_norm": 0.44735977053642273,
      "learning_rate": 5.506085145918308e-06,
      "loss": 0.0158,
      "step": 1384020
    },
    {
      "epoch": 2.2650118156883536,
      "grad_norm": 0.31549423933029175,
      "learning_rate": 5.50601925370479e-06,
      "loss": 0.0153,
      "step": 1384040
    },
    {
      "epoch": 2.2650445461270072,
      "grad_norm": 1.250023365020752,
      "learning_rate": 5.505953361491273e-06,
      "loss": 0.0182,
      "step": 1384060
    },
    {
      "epoch": 2.2650772765656604,
      "grad_norm": 0.1600070595741272,
      "learning_rate": 5.505887469277757e-06,
      "loss": 0.0136,
      "step": 1384080
    },
    {
      "epoch": 2.265110007004314,
      "grad_norm": 0.37129712104797363,
      "learning_rate": 5.505821577064239e-06,
      "loss": 0.013,
      "step": 1384100
    },
    {
      "epoch": 2.265142737442967,
      "grad_norm": 0.5930479168891907,
      "learning_rate": 5.505755684850722e-06,
      "loss": 0.0168,
      "step": 1384120
    },
    {
      "epoch": 2.2651754678816207,
      "grad_norm": 0.5087897181510925,
      "learning_rate": 5.505689792637204e-06,
      "loss": 0.0219,
      "step": 1384140
    },
    {
      "epoch": 2.265208198320274,
      "grad_norm": 0.3225560486316681,
      "learning_rate": 5.505623900423688e-06,
      "loss": 0.0148,
      "step": 1384160
    },
    {
      "epoch": 2.265240928758927,
      "grad_norm": 0.39078593254089355,
      "learning_rate": 5.5055580082101705e-06,
      "loss": 0.0152,
      "step": 1384180
    },
    {
      "epoch": 2.2652736591975806,
      "grad_norm": 0.2782125473022461,
      "learning_rate": 5.505492115996653e-06,
      "loss": 0.0119,
      "step": 1384200
    },
    {
      "epoch": 2.2653063896362338,
      "grad_norm": 0.2978692352771759,
      "learning_rate": 5.505426223783136e-06,
      "loss": 0.0144,
      "step": 1384220
    },
    {
      "epoch": 2.2653391200748874,
      "grad_norm": 0.7779763340950012,
      "learning_rate": 5.5053603315696195e-06,
      "loss": 0.0214,
      "step": 1384240
    },
    {
      "epoch": 2.2653718505135405,
      "grad_norm": 0.22039243578910828,
      "learning_rate": 5.5052944393561014e-06,
      "loss": 0.0228,
      "step": 1384260
    },
    {
      "epoch": 2.265404580952194,
      "grad_norm": 0.38432809710502625,
      "learning_rate": 5.505228547142585e-06,
      "loss": 0.0104,
      "step": 1384280
    },
    {
      "epoch": 2.2654373113908473,
      "grad_norm": 0.39127805829048157,
      "learning_rate": 5.505162654929067e-06,
      "loss": 0.0165,
      "step": 1384300
    },
    {
      "epoch": 2.2654700418295004,
      "grad_norm": 0.4611133337020874,
      "learning_rate": 5.5050967627155505e-06,
      "loss": 0.0159,
      "step": 1384320
    },
    {
      "epoch": 2.265502772268154,
      "grad_norm": 1.0324944257736206,
      "learning_rate": 5.505030870502032e-06,
      "loss": 0.0139,
      "step": 1384340
    },
    {
      "epoch": 2.265535502706807,
      "grad_norm": 0.5257039666175842,
      "learning_rate": 5.504964978288516e-06,
      "loss": 0.0139,
      "step": 1384360
    },
    {
      "epoch": 2.2655682331454607,
      "grad_norm": 0.534489095211029,
      "learning_rate": 5.504899086074999e-06,
      "loss": 0.0209,
      "step": 1384380
    },
    {
      "epoch": 2.265600963584114,
      "grad_norm": 0.2863139510154724,
      "learning_rate": 5.5048331938614815e-06,
      "loss": 0.0147,
      "step": 1384400
    },
    {
      "epoch": 2.2656336940227675,
      "grad_norm": 0.5357782244682312,
      "learning_rate": 5.504767301647965e-06,
      "loss": 0.0168,
      "step": 1384420
    },
    {
      "epoch": 2.2656664244614206,
      "grad_norm": 0.7128767371177673,
      "learning_rate": 5.504701409434448e-06,
      "loss": 0.0114,
      "step": 1384440
    },
    {
      "epoch": 2.265699154900074,
      "grad_norm": 0.28955528140068054,
      "learning_rate": 5.5046355172209305e-06,
      "loss": 0.0102,
      "step": 1384460
    },
    {
      "epoch": 2.2657318853387274,
      "grad_norm": 0.20867012441158295,
      "learning_rate": 5.504569625007413e-06,
      "loss": 0.0186,
      "step": 1384480
    },
    {
      "epoch": 2.2657646157773805,
      "grad_norm": 0.705045759677887,
      "learning_rate": 5.504503732793897e-06,
      "loss": 0.0132,
      "step": 1384500
    },
    {
      "epoch": 2.265797346216034,
      "grad_norm": 1.450481653213501,
      "learning_rate": 5.504437840580379e-06,
      "loss": 0.0206,
      "step": 1384520
    },
    {
      "epoch": 2.2658300766546873,
      "grad_norm": 2.1601810455322266,
      "learning_rate": 5.504371948366862e-06,
      "loss": 0.025,
      "step": 1384540
    },
    {
      "epoch": 2.265862807093341,
      "grad_norm": 0.3386160731315613,
      "learning_rate": 5.504306056153344e-06,
      "loss": 0.0106,
      "step": 1384560
    },
    {
      "epoch": 2.265895537531994,
      "grad_norm": 0.3250557780265808,
      "learning_rate": 5.504240163939828e-06,
      "loss": 0.0233,
      "step": 1384580
    },
    {
      "epoch": 2.265928267970647,
      "grad_norm": 0.18833701312541962,
      "learning_rate": 5.5041742717263105e-06,
      "loss": 0.0128,
      "step": 1384600
    },
    {
      "epoch": 2.2659609984093008,
      "grad_norm": 0.20283010601997375,
      "learning_rate": 5.504108379512793e-06,
      "loss": 0.0141,
      "step": 1384620
    },
    {
      "epoch": 2.265993728847954,
      "grad_norm": 0.15811815857887268,
      "learning_rate": 5.504042487299276e-06,
      "loss": 0.0163,
      "step": 1384640
    },
    {
      "epoch": 2.2660264592866075,
      "grad_norm": 0.2537040412425995,
      "learning_rate": 5.50397659508576e-06,
      "loss": 0.0181,
      "step": 1384660
    },
    {
      "epoch": 2.2660591897252607,
      "grad_norm": 0.6220916509628296,
      "learning_rate": 5.5039107028722415e-06,
      "loss": 0.0235,
      "step": 1384680
    },
    {
      "epoch": 2.2660919201639143,
      "grad_norm": 0.1971806138753891,
      "learning_rate": 5.503844810658725e-06,
      "loss": 0.0095,
      "step": 1384700
    },
    {
      "epoch": 2.2661246506025674,
      "grad_norm": 0.3590606153011322,
      "learning_rate": 5.503778918445207e-06,
      "loss": 0.014,
      "step": 1384720
    },
    {
      "epoch": 2.2661573810412206,
      "grad_norm": 0.6199147701263428,
      "learning_rate": 5.5037130262316906e-06,
      "loss": 0.0149,
      "step": 1384740
    },
    {
      "epoch": 2.266190111479874,
      "grad_norm": 0.5037739276885986,
      "learning_rate": 5.503647134018174e-06,
      "loss": 0.0204,
      "step": 1384760
    },
    {
      "epoch": 2.2662228419185273,
      "grad_norm": 0.19591566920280457,
      "learning_rate": 5.503581241804656e-06,
      "loss": 0.0134,
      "step": 1384780
    },
    {
      "epoch": 2.266255572357181,
      "grad_norm": 0.23320090770721436,
      "learning_rate": 5.50351534959114e-06,
      "loss": 0.0122,
      "step": 1384800
    },
    {
      "epoch": 2.266288302795834,
      "grad_norm": 1.1910555362701416,
      "learning_rate": 5.503449457377622e-06,
      "loss": 0.0183,
      "step": 1384820
    },
    {
      "epoch": 2.2663210332344876,
      "grad_norm": 0.2695302367210388,
      "learning_rate": 5.503383565164105e-06,
      "loss": 0.0075,
      "step": 1384840
    },
    {
      "epoch": 2.266353763673141,
      "grad_norm": 0.23129044473171234,
      "learning_rate": 5.503317672950588e-06,
      "loss": 0.0136,
      "step": 1384860
    },
    {
      "epoch": 2.266386494111794,
      "grad_norm": 0.5409695506095886,
      "learning_rate": 5.5032517807370714e-06,
      "loss": 0.0176,
      "step": 1384880
    },
    {
      "epoch": 2.2664192245504475,
      "grad_norm": 0.7654748558998108,
      "learning_rate": 5.503185888523553e-06,
      "loss": 0.0137,
      "step": 1384900
    },
    {
      "epoch": 2.2664519549891007,
      "grad_norm": 0.3271576166152954,
      "learning_rate": 5.503119996310037e-06,
      "loss": 0.0128,
      "step": 1384920
    },
    {
      "epoch": 2.2664846854277543,
      "grad_norm": 0.6050890684127808,
      "learning_rate": 5.503054104096519e-06,
      "loss": 0.0139,
      "step": 1384940
    },
    {
      "epoch": 2.2665174158664074,
      "grad_norm": 0.2893563210964203,
      "learning_rate": 5.502988211883002e-06,
      "loss": 0.0171,
      "step": 1384960
    },
    {
      "epoch": 2.266550146305061,
      "grad_norm": 0.7840297818183899,
      "learning_rate": 5.502922319669485e-06,
      "loss": 0.0148,
      "step": 1384980
    },
    {
      "epoch": 2.266582876743714,
      "grad_norm": 0.7155588865280151,
      "learning_rate": 5.502856427455968e-06,
      "loss": 0.0145,
      "step": 1385000
    },
    {
      "epoch": 2.2666156071823673,
      "grad_norm": 0.3050515353679657,
      "learning_rate": 5.502790535242451e-06,
      "loss": 0.0184,
      "step": 1385020
    },
    {
      "epoch": 2.266648337621021,
      "grad_norm": 1.5643670558929443,
      "learning_rate": 5.502724643028934e-06,
      "loss": 0.0149,
      "step": 1385040
    },
    {
      "epoch": 2.266681068059674,
      "grad_norm": 0.4475056231021881,
      "learning_rate": 5.502658750815416e-06,
      "loss": 0.0175,
      "step": 1385060
    },
    {
      "epoch": 2.2667137984983277,
      "grad_norm": 0.3532567620277405,
      "learning_rate": 5.5025928586019e-06,
      "loss": 0.0158,
      "step": 1385080
    },
    {
      "epoch": 2.266746528936981,
      "grad_norm": 0.8346235156059265,
      "learning_rate": 5.502526966388382e-06,
      "loss": 0.0147,
      "step": 1385100
    },
    {
      "epoch": 2.266779259375634,
      "grad_norm": 0.6811705231666565,
      "learning_rate": 5.502461074174865e-06,
      "loss": 0.0155,
      "step": 1385120
    },
    {
      "epoch": 2.2668119898142876,
      "grad_norm": 0.3803606629371643,
      "learning_rate": 5.502395181961349e-06,
      "loss": 0.0152,
      "step": 1385140
    },
    {
      "epoch": 2.2668447202529407,
      "grad_norm": 0.31159013509750366,
      "learning_rate": 5.502329289747831e-06,
      "loss": 0.0097,
      "step": 1385160
    },
    {
      "epoch": 2.2668774506915943,
      "grad_norm": 0.5536942481994629,
      "learning_rate": 5.502263397534314e-06,
      "loss": 0.0125,
      "step": 1385180
    },
    {
      "epoch": 2.2669101811302474,
      "grad_norm": 0.1488226354122162,
      "learning_rate": 5.502197505320797e-06,
      "loss": 0.0252,
      "step": 1385200
    },
    {
      "epoch": 2.2669429115689006,
      "grad_norm": 0.6264573931694031,
      "learning_rate": 5.50213161310728e-06,
      "loss": 0.0193,
      "step": 1385220
    },
    {
      "epoch": 2.266975642007554,
      "grad_norm": 0.40702274441719055,
      "learning_rate": 5.5020657208937625e-06,
      "loss": 0.0115,
      "step": 1385240
    },
    {
      "epoch": 2.2670083724462073,
      "grad_norm": 0.4269470274448395,
      "learning_rate": 5.501999828680246e-06,
      "loss": 0.0177,
      "step": 1385260
    },
    {
      "epoch": 2.267041102884861,
      "grad_norm": 0.3114791512489319,
      "learning_rate": 5.501933936466728e-06,
      "loss": 0.0087,
      "step": 1385280
    },
    {
      "epoch": 2.267073833323514,
      "grad_norm": 0.5612367391586304,
      "learning_rate": 5.5018680442532115e-06,
      "loss": 0.0238,
      "step": 1385300
    },
    {
      "epoch": 2.2671065637621677,
      "grad_norm": 0.2492140680551529,
      "learning_rate": 5.501802152039693e-06,
      "loss": 0.0156,
      "step": 1385320
    },
    {
      "epoch": 2.267139294200821,
      "grad_norm": 0.3844660520553589,
      "learning_rate": 5.501736259826177e-06,
      "loss": 0.0185,
      "step": 1385340
    },
    {
      "epoch": 2.267172024639474,
      "grad_norm": 0.7208018898963928,
      "learning_rate": 5.501670367612659e-06,
      "loss": 0.0212,
      "step": 1385360
    },
    {
      "epoch": 2.2672047550781276,
      "grad_norm": 0.2783769369125366,
      "learning_rate": 5.5016044753991425e-06,
      "loss": 0.0093,
      "step": 1385380
    },
    {
      "epoch": 2.2672374855167807,
      "grad_norm": 0.2256765067577362,
      "learning_rate": 5.501538583185625e-06,
      "loss": 0.0149,
      "step": 1385400
    },
    {
      "epoch": 2.2672702159554343,
      "grad_norm": 0.5379948019981384,
      "learning_rate": 5.501472690972108e-06,
      "loss": 0.0133,
      "step": 1385420
    },
    {
      "epoch": 2.2673029463940875,
      "grad_norm": 0.4307503402233124,
      "learning_rate": 5.501406798758591e-06,
      "loss": 0.0164,
      "step": 1385440
    },
    {
      "epoch": 2.267335676832741,
      "grad_norm": 0.5340942740440369,
      "learning_rate": 5.501340906545074e-06,
      "loss": 0.0148,
      "step": 1385460
    },
    {
      "epoch": 2.267368407271394,
      "grad_norm": 0.1606321632862091,
      "learning_rate": 5.501275014331557e-06,
      "loss": 0.0127,
      "step": 1385480
    },
    {
      "epoch": 2.2674011377100474,
      "grad_norm": 0.3018694818019867,
      "learning_rate": 5.50120912211804e-06,
      "loss": 0.0198,
      "step": 1385500
    },
    {
      "epoch": 2.267433868148701,
      "grad_norm": 0.5681159496307373,
      "learning_rate": 5.501143229904523e-06,
      "loss": 0.0126,
      "step": 1385520
    },
    {
      "epoch": 2.267466598587354,
      "grad_norm": 0.37787938117980957,
      "learning_rate": 5.501077337691005e-06,
      "loss": 0.017,
      "step": 1385540
    },
    {
      "epoch": 2.2674993290260077,
      "grad_norm": 0.48475855588912964,
      "learning_rate": 5.501011445477489e-06,
      "loss": 0.0171,
      "step": 1385560
    },
    {
      "epoch": 2.267532059464661,
      "grad_norm": 0.793121874332428,
      "learning_rate": 5.500945553263971e-06,
      "loss": 0.0121,
      "step": 1385580
    },
    {
      "epoch": 2.2675647899033144,
      "grad_norm": 0.5249033570289612,
      "learning_rate": 5.500879661050454e-06,
      "loss": 0.0147,
      "step": 1385600
    },
    {
      "epoch": 2.2675975203419676,
      "grad_norm": 0.36477404832839966,
      "learning_rate": 5.500813768836937e-06,
      "loss": 0.0211,
      "step": 1385620
    },
    {
      "epoch": 2.2676302507806207,
      "grad_norm": 0.3591582775115967,
      "learning_rate": 5.50074787662342e-06,
      "loss": 0.0236,
      "step": 1385640
    },
    {
      "epoch": 2.2676629812192743,
      "grad_norm": 0.2975226044654846,
      "learning_rate": 5.5006819844099025e-06,
      "loss": 0.0146,
      "step": 1385660
    },
    {
      "epoch": 2.2676957116579275,
      "grad_norm": 1.2904975414276123,
      "learning_rate": 5.500616092196386e-06,
      "loss": 0.0129,
      "step": 1385680
    },
    {
      "epoch": 2.267728442096581,
      "grad_norm": 0.1908016800880432,
      "learning_rate": 5.500550199982868e-06,
      "loss": 0.0101,
      "step": 1385700
    },
    {
      "epoch": 2.2677611725352342,
      "grad_norm": 0.22519835829734802,
      "learning_rate": 5.500484307769352e-06,
      "loss": 0.01,
      "step": 1385720
    },
    {
      "epoch": 2.267793902973888,
      "grad_norm": 0.1998264193534851,
      "learning_rate": 5.5004184155558335e-06,
      "loss": 0.0131,
      "step": 1385740
    },
    {
      "epoch": 2.267826633412541,
      "grad_norm": 0.49305200576782227,
      "learning_rate": 5.500352523342317e-06,
      "loss": 0.0174,
      "step": 1385760
    },
    {
      "epoch": 2.267859363851194,
      "grad_norm": 0.123549684882164,
      "learning_rate": 5.5002866311288e-06,
      "loss": 0.0151,
      "step": 1385780
    },
    {
      "epoch": 2.2678920942898477,
      "grad_norm": 0.3273085355758667,
      "learning_rate": 5.5002207389152826e-06,
      "loss": 0.0141,
      "step": 1385800
    },
    {
      "epoch": 2.267924824728501,
      "grad_norm": 0.16263732314109802,
      "learning_rate": 5.500154846701766e-06,
      "loss": 0.0118,
      "step": 1385820
    },
    {
      "epoch": 2.2679575551671545,
      "grad_norm": 0.18522678315639496,
      "learning_rate": 5.500088954488249e-06,
      "loss": 0.0163,
      "step": 1385840
    },
    {
      "epoch": 2.2679902856058076,
      "grad_norm": 0.4078248143196106,
      "learning_rate": 5.500023062274732e-06,
      "loss": 0.0111,
      "step": 1385860
    },
    {
      "epoch": 2.268023016044461,
      "grad_norm": 0.4073334336280823,
      "learning_rate": 5.499957170061214e-06,
      "loss": 0.0154,
      "step": 1385880
    },
    {
      "epoch": 2.2680557464831144,
      "grad_norm": 0.13976523280143738,
      "learning_rate": 5.499891277847698e-06,
      "loss": 0.0111,
      "step": 1385900
    },
    {
      "epoch": 2.2680884769217675,
      "grad_norm": 0.8453460931777954,
      "learning_rate": 5.49982538563418e-06,
      "loss": 0.0135,
      "step": 1385920
    },
    {
      "epoch": 2.268121207360421,
      "grad_norm": 1.9629164934158325,
      "learning_rate": 5.4997594934206634e-06,
      "loss": 0.0109,
      "step": 1385940
    },
    {
      "epoch": 2.2681539377990743,
      "grad_norm": 0.3048676550388336,
      "learning_rate": 5.499693601207145e-06,
      "loss": 0.0156,
      "step": 1385960
    },
    {
      "epoch": 2.268186668237728,
      "grad_norm": 0.5879170894622803,
      "learning_rate": 5.499627708993629e-06,
      "loss": 0.0218,
      "step": 1385980
    },
    {
      "epoch": 2.268219398676381,
      "grad_norm": 0.5788038969039917,
      "learning_rate": 5.499561816780112e-06,
      "loss": 0.0148,
      "step": 1386000
    },
    {
      "epoch": 2.2682521291150346,
      "grad_norm": 0.49770960211753845,
      "learning_rate": 5.499495924566594e-06,
      "loss": 0.0176,
      "step": 1386020
    },
    {
      "epoch": 2.2682848595536877,
      "grad_norm": 0.46856749057769775,
      "learning_rate": 5.499430032353077e-06,
      "loss": 0.018,
      "step": 1386040
    },
    {
      "epoch": 2.268317589992341,
      "grad_norm": 0.14969651401042938,
      "learning_rate": 5.499364140139561e-06,
      "loss": 0.0159,
      "step": 1386060
    },
    {
      "epoch": 2.2683503204309945,
      "grad_norm": 0.31011489033699036,
      "learning_rate": 5.499298247926043e-06,
      "loss": 0.0258,
      "step": 1386080
    },
    {
      "epoch": 2.2683830508696476,
      "grad_norm": 0.5760701298713684,
      "learning_rate": 5.499232355712526e-06,
      "loss": 0.0154,
      "step": 1386100
    },
    {
      "epoch": 2.2684157813083012,
      "grad_norm": 0.8523374795913696,
      "learning_rate": 5.499166463499008e-06,
      "loss": 0.0154,
      "step": 1386120
    },
    {
      "epoch": 2.2684485117469544,
      "grad_norm": 0.5401178598403931,
      "learning_rate": 5.499100571285492e-06,
      "loss": 0.0101,
      "step": 1386140
    },
    {
      "epoch": 2.268481242185608,
      "grad_norm": 0.3368835747241974,
      "learning_rate": 5.4990346790719736e-06,
      "loss": 0.0152,
      "step": 1386160
    },
    {
      "epoch": 2.268513972624261,
      "grad_norm": 0.3167415261268616,
      "learning_rate": 5.498968786858457e-06,
      "loss": 0.011,
      "step": 1386180
    },
    {
      "epoch": 2.2685467030629143,
      "grad_norm": 0.46034252643585205,
      "learning_rate": 5.498902894644941e-06,
      "loss": 0.0141,
      "step": 1386200
    },
    {
      "epoch": 2.268579433501568,
      "grad_norm": 0.6403374075889587,
      "learning_rate": 5.498837002431423e-06,
      "loss": 0.0187,
      "step": 1386220
    },
    {
      "epoch": 2.268612163940221,
      "grad_norm": 0.24606597423553467,
      "learning_rate": 5.498771110217906e-06,
      "loss": 0.0118,
      "step": 1386240
    },
    {
      "epoch": 2.2686448943788746,
      "grad_norm": 0.286236435174942,
      "learning_rate": 5.498705218004389e-06,
      "loss": 0.0186,
      "step": 1386260
    },
    {
      "epoch": 2.2686776248175278,
      "grad_norm": 0.17547041177749634,
      "learning_rate": 5.4986393257908725e-06,
      "loss": 0.0112,
      "step": 1386280
    },
    {
      "epoch": 2.2687103552561814,
      "grad_norm": 0.18871420621871948,
      "learning_rate": 5.4985734335773544e-06,
      "loss": 0.0199,
      "step": 1386300
    },
    {
      "epoch": 2.2687430856948345,
      "grad_norm": 1.0575342178344727,
      "learning_rate": 5.498507541363838e-06,
      "loss": 0.014,
      "step": 1386320
    },
    {
      "epoch": 2.2687758161334877,
      "grad_norm": 0.20189009606838226,
      "learning_rate": 5.49844164915032e-06,
      "loss": 0.0144,
      "step": 1386340
    },
    {
      "epoch": 2.2688085465721413,
      "grad_norm": 0.3636239171028137,
      "learning_rate": 5.4983757569368035e-06,
      "loss": 0.0186,
      "step": 1386360
    },
    {
      "epoch": 2.2688412770107944,
      "grad_norm": 0.1676379144191742,
      "learning_rate": 5.498309864723285e-06,
      "loss": 0.0144,
      "step": 1386380
    },
    {
      "epoch": 2.268874007449448,
      "grad_norm": 0.2100009322166443,
      "learning_rate": 5.498243972509769e-06,
      "loss": 0.0116,
      "step": 1386400
    },
    {
      "epoch": 2.268906737888101,
      "grad_norm": 0.6201284527778625,
      "learning_rate": 5.498178080296252e-06,
      "loss": 0.0152,
      "step": 1386420
    },
    {
      "epoch": 2.2689394683267547,
      "grad_norm": 0.26047590374946594,
      "learning_rate": 5.4981121880827345e-06,
      "loss": 0.0133,
      "step": 1386440
    },
    {
      "epoch": 2.268972198765408,
      "grad_norm": 0.40501317381858826,
      "learning_rate": 5.498046295869217e-06,
      "loss": 0.0168,
      "step": 1386460
    },
    {
      "epoch": 2.269004929204061,
      "grad_norm": 0.07195338606834412,
      "learning_rate": 5.497980403655701e-06,
      "loss": 0.0172,
      "step": 1386480
    },
    {
      "epoch": 2.2690376596427146,
      "grad_norm": 1.3576136827468872,
      "learning_rate": 5.497914511442183e-06,
      "loss": 0.0105,
      "step": 1386500
    },
    {
      "epoch": 2.269070390081368,
      "grad_norm": 0.22701512277126312,
      "learning_rate": 5.497848619228666e-06,
      "loss": 0.0077,
      "step": 1386520
    },
    {
      "epoch": 2.2691031205200214,
      "grad_norm": 0.2511257529258728,
      "learning_rate": 5.49778272701515e-06,
      "loss": 0.0115,
      "step": 1386540
    },
    {
      "epoch": 2.2691358509586745,
      "grad_norm": 0.2804703414440155,
      "learning_rate": 5.497716834801632e-06,
      "loss": 0.0124,
      "step": 1386560
    },
    {
      "epoch": 2.269168581397328,
      "grad_norm": 4.74201774597168,
      "learning_rate": 5.497650942588115e-06,
      "loss": 0.0132,
      "step": 1386580
    },
    {
      "epoch": 2.2692013118359813,
      "grad_norm": 0.37572699785232544,
      "learning_rate": 5.497585050374597e-06,
      "loss": 0.015,
      "step": 1386600
    },
    {
      "epoch": 2.2692340422746344,
      "grad_norm": 0.522586464881897,
      "learning_rate": 5.497519158161081e-06,
      "loss": 0.011,
      "step": 1386620
    },
    {
      "epoch": 2.269266772713288,
      "grad_norm": 0.38686975836753845,
      "learning_rate": 5.4974532659475636e-06,
      "loss": 0.016,
      "step": 1386640
    },
    {
      "epoch": 2.269299503151941,
      "grad_norm": 0.25157397985458374,
      "learning_rate": 5.497387373734046e-06,
      "loss": 0.0135,
      "step": 1386660
    },
    {
      "epoch": 2.2693322335905948,
      "grad_norm": 0.22814339399337769,
      "learning_rate": 5.497321481520529e-06,
      "loss": 0.0167,
      "step": 1386680
    },
    {
      "epoch": 2.269364964029248,
      "grad_norm": 0.09234625846147537,
      "learning_rate": 5.497255589307013e-06,
      "loss": 0.0211,
      "step": 1386700
    },
    {
      "epoch": 2.269397694467901,
      "grad_norm": 0.1790965050458908,
      "learning_rate": 5.4971896970934945e-06,
      "loss": 0.0164,
      "step": 1386720
    },
    {
      "epoch": 2.2694304249065547,
      "grad_norm": 0.32134875655174255,
      "learning_rate": 5.497123804879978e-06,
      "loss": 0.0143,
      "step": 1386740
    },
    {
      "epoch": 2.269463155345208,
      "grad_norm": 0.49849751591682434,
      "learning_rate": 5.49705791266646e-06,
      "loss": 0.014,
      "step": 1386760
    },
    {
      "epoch": 2.2694958857838614,
      "grad_norm": 0.21729132533073425,
      "learning_rate": 5.4969920204529436e-06,
      "loss": 0.016,
      "step": 1386780
    },
    {
      "epoch": 2.2695286162225146,
      "grad_norm": 0.7640244960784912,
      "learning_rate": 5.496926128239426e-06,
      "loss": 0.0181,
      "step": 1386800
    },
    {
      "epoch": 2.2695613466611677,
      "grad_norm": 0.14535927772521973,
      "learning_rate": 5.496860236025909e-06,
      "loss": 0.012,
      "step": 1386820
    },
    {
      "epoch": 2.2695940770998213,
      "grad_norm": 0.29988667368888855,
      "learning_rate": 5.496794343812392e-06,
      "loss": 0.0139,
      "step": 1386840
    },
    {
      "epoch": 2.2696268075384745,
      "grad_norm": 0.3297206163406372,
      "learning_rate": 5.496728451598875e-06,
      "loss": 0.0176,
      "step": 1386860
    },
    {
      "epoch": 2.269659537977128,
      "grad_norm": 0.19115598499774933,
      "learning_rate": 5.496662559385358e-06,
      "loss": 0.0149,
      "step": 1386880
    },
    {
      "epoch": 2.269692268415781,
      "grad_norm": 0.5873938202857971,
      "learning_rate": 5.496596667171841e-06,
      "loss": 0.0169,
      "step": 1386900
    },
    {
      "epoch": 2.269724998854435,
      "grad_norm": 0.16093608736991882,
      "learning_rate": 5.4965307749583244e-06,
      "loss": 0.0178,
      "step": 1386920
    },
    {
      "epoch": 2.269757729293088,
      "grad_norm": 0.4920254945755005,
      "learning_rate": 5.496464882744806e-06,
      "loss": 0.0155,
      "step": 1386940
    },
    {
      "epoch": 2.269790459731741,
      "grad_norm": 0.23514686524868011,
      "learning_rate": 5.49639899053129e-06,
      "loss": 0.0158,
      "step": 1386960
    },
    {
      "epoch": 2.2698231901703947,
      "grad_norm": 0.04173416644334793,
      "learning_rate": 5.496333098317772e-06,
      "loss": 0.0151,
      "step": 1386980
    },
    {
      "epoch": 2.269855920609048,
      "grad_norm": 0.3725860118865967,
      "learning_rate": 5.496267206104255e-06,
      "loss": 0.0102,
      "step": 1387000
    },
    {
      "epoch": 2.2698886510477014,
      "grad_norm": 0.4239702820777893,
      "learning_rate": 5.496201313890738e-06,
      "loss": 0.0209,
      "step": 1387020
    },
    {
      "epoch": 2.2699213814863546,
      "grad_norm": 0.08089893311262131,
      "learning_rate": 5.496135421677221e-06,
      "loss": 0.0143,
      "step": 1387040
    },
    {
      "epoch": 2.269954111925008,
      "grad_norm": 0.38288599252700806,
      "learning_rate": 5.496069529463704e-06,
      "loss": 0.0146,
      "step": 1387060
    },
    {
      "epoch": 2.2699868423636613,
      "grad_norm": 0.2143702507019043,
      "learning_rate": 5.496003637250187e-06,
      "loss": 0.0173,
      "step": 1387080
    },
    {
      "epoch": 2.2700195728023145,
      "grad_norm": 0.4472157955169678,
      "learning_rate": 5.495937745036669e-06,
      "loss": 0.0172,
      "step": 1387100
    },
    {
      "epoch": 2.270052303240968,
      "grad_norm": 0.23717886209487915,
      "learning_rate": 5.495871852823153e-06,
      "loss": 0.0181,
      "step": 1387120
    },
    {
      "epoch": 2.2700850336796212,
      "grad_norm": 0.8562681674957275,
      "learning_rate": 5.495805960609635e-06,
      "loss": 0.0196,
      "step": 1387140
    },
    {
      "epoch": 2.270117764118275,
      "grad_norm": 0.27087777853012085,
      "learning_rate": 5.495740068396118e-06,
      "loss": 0.0158,
      "step": 1387160
    },
    {
      "epoch": 2.270150494556928,
      "grad_norm": 0.6166212558746338,
      "learning_rate": 5.4956741761826e-06,
      "loss": 0.0151,
      "step": 1387180
    },
    {
      "epoch": 2.2701832249955816,
      "grad_norm": 0.33010509610176086,
      "learning_rate": 5.495608283969084e-06,
      "loss": 0.0129,
      "step": 1387200
    },
    {
      "epoch": 2.2702159554342347,
      "grad_norm": 0.48276033997535706,
      "learning_rate": 5.495542391755566e-06,
      "loss": 0.0222,
      "step": 1387220
    },
    {
      "epoch": 2.270248685872888,
      "grad_norm": 0.1472441703081131,
      "learning_rate": 5.495476499542049e-06,
      "loss": 0.0152,
      "step": 1387240
    },
    {
      "epoch": 2.2702814163115415,
      "grad_norm": 0.34675732254981995,
      "learning_rate": 5.495410607328533e-06,
      "loss": 0.0117,
      "step": 1387260
    },
    {
      "epoch": 2.2703141467501946,
      "grad_norm": 0.2393384575843811,
      "learning_rate": 5.4953447151150155e-06,
      "loss": 0.0178,
      "step": 1387280
    },
    {
      "epoch": 2.270346877188848,
      "grad_norm": 0.23323288559913635,
      "learning_rate": 5.495278822901498e-06,
      "loss": 0.0185,
      "step": 1387300
    },
    {
      "epoch": 2.2703796076275014,
      "grad_norm": 0.41961151361465454,
      "learning_rate": 5.495212930687981e-06,
      "loss": 0.0189,
      "step": 1387320
    },
    {
      "epoch": 2.270412338066155,
      "grad_norm": 0.5265581011772156,
      "learning_rate": 5.4951470384744645e-06,
      "loss": 0.0203,
      "step": 1387340
    },
    {
      "epoch": 2.270445068504808,
      "grad_norm": 0.9832494258880615,
      "learning_rate": 5.495081146260946e-06,
      "loss": 0.0148,
      "step": 1387360
    },
    {
      "epoch": 2.2704777989434612,
      "grad_norm": 0.25997358560562134,
      "learning_rate": 5.49501525404743e-06,
      "loss": 0.0104,
      "step": 1387380
    },
    {
      "epoch": 2.270510529382115,
      "grad_norm": 0.30478015542030334,
      "learning_rate": 5.494949361833912e-06,
      "loss": 0.0113,
      "step": 1387400
    },
    {
      "epoch": 2.270543259820768,
      "grad_norm": 0.16628094017505646,
      "learning_rate": 5.4948834696203955e-06,
      "loss": 0.013,
      "step": 1387420
    },
    {
      "epoch": 2.2705759902594216,
      "grad_norm": 0.14861290156841278,
      "learning_rate": 5.494817577406878e-06,
      "loss": 0.0184,
      "step": 1387440
    },
    {
      "epoch": 2.2706087206980747,
      "grad_norm": 0.14085760712623596,
      "learning_rate": 5.494751685193361e-06,
      "loss": 0.0154,
      "step": 1387460
    },
    {
      "epoch": 2.2706414511367283,
      "grad_norm": 0.571897566318512,
      "learning_rate": 5.494685792979844e-06,
      "loss": 0.0098,
      "step": 1387480
    },
    {
      "epoch": 2.2706741815753815,
      "grad_norm": 0.2904970347881317,
      "learning_rate": 5.494619900766327e-06,
      "loss": 0.0126,
      "step": 1387500
    },
    {
      "epoch": 2.2707069120140346,
      "grad_norm": 0.9898056983947754,
      "learning_rate": 5.494554008552809e-06,
      "loss": 0.0173,
      "step": 1387520
    },
    {
      "epoch": 2.2707396424526882,
      "grad_norm": 0.18443071842193604,
      "learning_rate": 5.494488116339293e-06,
      "loss": 0.0123,
      "step": 1387540
    },
    {
      "epoch": 2.2707723728913414,
      "grad_norm": 0.3211759030818939,
      "learning_rate": 5.494422224125775e-06,
      "loss": 0.0144,
      "step": 1387560
    },
    {
      "epoch": 2.270805103329995,
      "grad_norm": 0.6248720288276672,
      "learning_rate": 5.494356331912258e-06,
      "loss": 0.0126,
      "step": 1387580
    },
    {
      "epoch": 2.270837833768648,
      "grad_norm": 0.28421512246131897,
      "learning_rate": 5.494290439698742e-06,
      "loss": 0.0208,
      "step": 1387600
    },
    {
      "epoch": 2.2708705642073017,
      "grad_norm": 0.3108692467212677,
      "learning_rate": 5.494224547485224e-06,
      "loss": 0.0104,
      "step": 1387620
    },
    {
      "epoch": 2.270903294645955,
      "grad_norm": 0.6060850024223328,
      "learning_rate": 5.494158655271707e-06,
      "loss": 0.0087,
      "step": 1387640
    },
    {
      "epoch": 2.270936025084608,
      "grad_norm": 0.574000895023346,
      "learning_rate": 5.49409276305819e-06,
      "loss": 0.0115,
      "step": 1387660
    },
    {
      "epoch": 2.2709687555232616,
      "grad_norm": 1.124733805656433,
      "learning_rate": 5.494026870844673e-06,
      "loss": 0.0168,
      "step": 1387680
    },
    {
      "epoch": 2.2710014859619148,
      "grad_norm": 0.2806161046028137,
      "learning_rate": 5.4939609786311555e-06,
      "loss": 0.0134,
      "step": 1387700
    },
    {
      "epoch": 2.2710342164005684,
      "grad_norm": 0.17020969092845917,
      "learning_rate": 5.493895086417639e-06,
      "loss": 0.016,
      "step": 1387720
    },
    {
      "epoch": 2.2710669468392215,
      "grad_norm": 0.9560588002204895,
      "learning_rate": 5.493829194204121e-06,
      "loss": 0.0149,
      "step": 1387740
    },
    {
      "epoch": 2.271099677277875,
      "grad_norm": 0.47762787342071533,
      "learning_rate": 5.493763301990605e-06,
      "loss": 0.012,
      "step": 1387760
    },
    {
      "epoch": 2.2711324077165282,
      "grad_norm": 0.32535457611083984,
      "learning_rate": 5.4936974097770865e-06,
      "loss": 0.0134,
      "step": 1387780
    },
    {
      "epoch": 2.2711651381551814,
      "grad_norm": 1.159030556678772,
      "learning_rate": 5.49363151756357e-06,
      "loss": 0.0229,
      "step": 1387800
    },
    {
      "epoch": 2.271197868593835,
      "grad_norm": 0.16460970044136047,
      "learning_rate": 5.493565625350053e-06,
      "loss": 0.0087,
      "step": 1387820
    },
    {
      "epoch": 2.271230599032488,
      "grad_norm": 0.3177274167537689,
      "learning_rate": 5.4934997331365356e-06,
      "loss": 0.0137,
      "step": 1387840
    },
    {
      "epoch": 2.2712633294711417,
      "grad_norm": 0.23425447940826416,
      "learning_rate": 5.493433840923018e-06,
      "loss": 0.015,
      "step": 1387860
    },
    {
      "epoch": 2.271296059909795,
      "grad_norm": 0.44816482067108154,
      "learning_rate": 5.493367948709502e-06,
      "loss": 0.0133,
      "step": 1387880
    },
    {
      "epoch": 2.2713287903484485,
      "grad_norm": 0.3718603849411011,
      "learning_rate": 5.493302056495984e-06,
      "loss": 0.0213,
      "step": 1387900
    },
    {
      "epoch": 2.2713615207871016,
      "grad_norm": 0.33349305391311646,
      "learning_rate": 5.493236164282467e-06,
      "loss": 0.0142,
      "step": 1387920
    },
    {
      "epoch": 2.271394251225755,
      "grad_norm": 1.8865411281585693,
      "learning_rate": 5.493170272068951e-06,
      "loss": 0.0128,
      "step": 1387940
    },
    {
      "epoch": 2.2714269816644084,
      "grad_norm": 0.2652788758277893,
      "learning_rate": 5.493104379855433e-06,
      "loss": 0.0134,
      "step": 1387960
    },
    {
      "epoch": 2.2714597121030615,
      "grad_norm": 0.7805414795875549,
      "learning_rate": 5.4930384876419164e-06,
      "loss": 0.0142,
      "step": 1387980
    },
    {
      "epoch": 2.271492442541715,
      "grad_norm": 0.2055492103099823,
      "learning_rate": 5.492972595428398e-06,
      "loss": 0.0145,
      "step": 1388000
    },
    {
      "epoch": 2.2715251729803683,
      "grad_norm": 0.42709168791770935,
      "learning_rate": 5.492906703214882e-06,
      "loss": 0.0121,
      "step": 1388020
    },
    {
      "epoch": 2.271557903419022,
      "grad_norm": 0.6236348152160645,
      "learning_rate": 5.492840811001365e-06,
      "loss": 0.0085,
      "step": 1388040
    },
    {
      "epoch": 2.271590633857675,
      "grad_norm": 0.2276800572872162,
      "learning_rate": 5.492774918787847e-06,
      "loss": 0.0127,
      "step": 1388060
    },
    {
      "epoch": 2.271623364296328,
      "grad_norm": 0.26238036155700684,
      "learning_rate": 5.49270902657433e-06,
      "loss": 0.0175,
      "step": 1388080
    },
    {
      "epoch": 2.2716560947349818,
      "grad_norm": 0.16270853579044342,
      "learning_rate": 5.492643134360814e-06,
      "loss": 0.0103,
      "step": 1388100
    },
    {
      "epoch": 2.271688825173635,
      "grad_norm": 0.4480256140232086,
      "learning_rate": 5.492577242147296e-06,
      "loss": 0.014,
      "step": 1388120
    },
    {
      "epoch": 2.2717215556122885,
      "grad_norm": 0.1741609424352646,
      "learning_rate": 5.492511349933779e-06,
      "loss": 0.0157,
      "step": 1388140
    },
    {
      "epoch": 2.2717542860509417,
      "grad_norm": 0.28918418288230896,
      "learning_rate": 5.492445457720261e-06,
      "loss": 0.0162,
      "step": 1388160
    },
    {
      "epoch": 2.271787016489595,
      "grad_norm": 0.4803047776222229,
      "learning_rate": 5.492379565506745e-06,
      "loss": 0.0148,
      "step": 1388180
    },
    {
      "epoch": 2.2718197469282484,
      "grad_norm": 0.44002458453178406,
      "learning_rate": 5.4923136732932266e-06,
      "loss": 0.0226,
      "step": 1388200
    },
    {
      "epoch": 2.2718524773669015,
      "grad_norm": 2.0026400089263916,
      "learning_rate": 5.49224778107971e-06,
      "loss": 0.0188,
      "step": 1388220
    },
    {
      "epoch": 2.271885207805555,
      "grad_norm": 0.6156348586082458,
      "learning_rate": 5.492181888866193e-06,
      "loss": 0.0132,
      "step": 1388240
    },
    {
      "epoch": 2.2719179382442083,
      "grad_norm": 0.5984781384468079,
      "learning_rate": 5.492115996652676e-06,
      "loss": 0.0168,
      "step": 1388260
    },
    {
      "epoch": 2.2719506686828614,
      "grad_norm": 0.15611904859542847,
      "learning_rate": 5.492050104439159e-06,
      "loss": 0.0106,
      "step": 1388280
    },
    {
      "epoch": 2.271983399121515,
      "grad_norm": 0.5953220725059509,
      "learning_rate": 5.491984212225642e-06,
      "loss": 0.011,
      "step": 1388300
    },
    {
      "epoch": 2.272016129560168,
      "grad_norm": 0.10706551373004913,
      "learning_rate": 5.491918320012125e-06,
      "loss": 0.0118,
      "step": 1388320
    },
    {
      "epoch": 2.272048859998822,
      "grad_norm": 0.8509531021118164,
      "learning_rate": 5.4918524277986074e-06,
      "loss": 0.0132,
      "step": 1388340
    },
    {
      "epoch": 2.272081590437475,
      "grad_norm": 0.37357062101364136,
      "learning_rate": 5.491786535585091e-06,
      "loss": 0.0155,
      "step": 1388360
    },
    {
      "epoch": 2.2721143208761285,
      "grad_norm": 0.4523158371448517,
      "learning_rate": 5.491720643371573e-06,
      "loss": 0.0197,
      "step": 1388380
    },
    {
      "epoch": 2.2721470513147817,
      "grad_norm": 1.7628358602523804,
      "learning_rate": 5.4916547511580565e-06,
      "loss": 0.0106,
      "step": 1388400
    },
    {
      "epoch": 2.272179781753435,
      "grad_norm": 0.4821242690086365,
      "learning_rate": 5.491588858944538e-06,
      "loss": 0.0135,
      "step": 1388420
    },
    {
      "epoch": 2.2722125121920884,
      "grad_norm": 0.49955666065216064,
      "learning_rate": 5.491522966731022e-06,
      "loss": 0.014,
      "step": 1388440
    },
    {
      "epoch": 2.2722452426307416,
      "grad_norm": 0.43443191051483154,
      "learning_rate": 5.491457074517505e-06,
      "loss": 0.0139,
      "step": 1388460
    },
    {
      "epoch": 2.272277973069395,
      "grad_norm": 0.5813080072402954,
      "learning_rate": 5.4913911823039875e-06,
      "loss": 0.0147,
      "step": 1388480
    },
    {
      "epoch": 2.2723107035080483,
      "grad_norm": 0.28133606910705566,
      "learning_rate": 5.49132529009047e-06,
      "loss": 0.014,
      "step": 1388500
    },
    {
      "epoch": 2.272343433946702,
      "grad_norm": 0.5950319766998291,
      "learning_rate": 5.491259397876954e-06,
      "loss": 0.0127,
      "step": 1388520
    },
    {
      "epoch": 2.272376164385355,
      "grad_norm": 0.40458834171295166,
      "learning_rate": 5.491193505663436e-06,
      "loss": 0.0082,
      "step": 1388540
    },
    {
      "epoch": 2.272408894824008,
      "grad_norm": 0.15778179466724396,
      "learning_rate": 5.491127613449919e-06,
      "loss": 0.0114,
      "step": 1388560
    },
    {
      "epoch": 2.272441625262662,
      "grad_norm": 0.13127806782722473,
      "learning_rate": 5.491061721236401e-06,
      "loss": 0.013,
      "step": 1388580
    },
    {
      "epoch": 2.272474355701315,
      "grad_norm": 0.3862154483795166,
      "learning_rate": 5.490995829022885e-06,
      "loss": 0.0162,
      "step": 1388600
    },
    {
      "epoch": 2.2725070861399685,
      "grad_norm": 0.2436448633670807,
      "learning_rate": 5.4909299368093675e-06,
      "loss": 0.0122,
      "step": 1388620
    },
    {
      "epoch": 2.2725398165786217,
      "grad_norm": 0.6446937322616577,
      "learning_rate": 5.49086404459585e-06,
      "loss": 0.018,
      "step": 1388640
    },
    {
      "epoch": 2.2725725470172753,
      "grad_norm": 0.44618427753448486,
      "learning_rate": 5.490798152382334e-06,
      "loss": 0.015,
      "step": 1388660
    },
    {
      "epoch": 2.2726052774559284,
      "grad_norm": 0.30989572405815125,
      "learning_rate": 5.4907322601688166e-06,
      "loss": 0.0222,
      "step": 1388680
    },
    {
      "epoch": 2.2726380078945816,
      "grad_norm": 0.507266640663147,
      "learning_rate": 5.490666367955299e-06,
      "loss": 0.0255,
      "step": 1388700
    },
    {
      "epoch": 2.272670738333235,
      "grad_norm": 0.40195444226264954,
      "learning_rate": 5.490600475741782e-06,
      "loss": 0.0135,
      "step": 1388720
    },
    {
      "epoch": 2.2727034687718883,
      "grad_norm": 0.5565550923347473,
      "learning_rate": 5.490534583528266e-06,
      "loss": 0.0182,
      "step": 1388740
    },
    {
      "epoch": 2.272736199210542,
      "grad_norm": 0.4814704358577728,
      "learning_rate": 5.4904686913147475e-06,
      "loss": 0.0202,
      "step": 1388760
    },
    {
      "epoch": 2.272768929649195,
      "grad_norm": 0.3482985198497772,
      "learning_rate": 5.490402799101231e-06,
      "loss": 0.0098,
      "step": 1388780
    },
    {
      "epoch": 2.2728016600878487,
      "grad_norm": 0.1772846132516861,
      "learning_rate": 5.490336906887713e-06,
      "loss": 0.0126,
      "step": 1388800
    },
    {
      "epoch": 2.272834390526502,
      "grad_norm": 0.4644797146320343,
      "learning_rate": 5.490271014674197e-06,
      "loss": 0.0193,
      "step": 1388820
    },
    {
      "epoch": 2.272867120965155,
      "grad_norm": 0.15691113471984863,
      "learning_rate": 5.490205122460679e-06,
      "loss": 0.0141,
      "step": 1388840
    },
    {
      "epoch": 2.2728998514038086,
      "grad_norm": 0.3435554504394531,
      "learning_rate": 5.490139230247162e-06,
      "loss": 0.0187,
      "step": 1388860
    },
    {
      "epoch": 2.2729325818424617,
      "grad_norm": 0.09982718527317047,
      "learning_rate": 5.490073338033645e-06,
      "loss": 0.0164,
      "step": 1388880
    },
    {
      "epoch": 2.2729653122811153,
      "grad_norm": 1.0699306726455688,
      "learning_rate": 5.490007445820128e-06,
      "loss": 0.0128,
      "step": 1388900
    },
    {
      "epoch": 2.2729980427197685,
      "grad_norm": 1.18138587474823,
      "learning_rate": 5.48994155360661e-06,
      "loss": 0.0132,
      "step": 1388920
    },
    {
      "epoch": 2.273030773158422,
      "grad_norm": 0.10920943319797516,
      "learning_rate": 5.489875661393094e-06,
      "loss": 0.0173,
      "step": 1388940
    },
    {
      "epoch": 2.273063503597075,
      "grad_norm": 0.20179252326488495,
      "learning_rate": 5.489809769179576e-06,
      "loss": 0.0164,
      "step": 1388960
    },
    {
      "epoch": 2.2730962340357284,
      "grad_norm": 0.4262930154800415,
      "learning_rate": 5.489743876966059e-06,
      "loss": 0.0175,
      "step": 1388980
    },
    {
      "epoch": 2.273128964474382,
      "grad_norm": 0.2439708560705185,
      "learning_rate": 5.489677984752543e-06,
      "loss": 0.0145,
      "step": 1389000
    },
    {
      "epoch": 2.273161694913035,
      "grad_norm": 1.126668095588684,
      "learning_rate": 5.489612092539025e-06,
      "loss": 0.0222,
      "step": 1389020
    },
    {
      "epoch": 2.2731944253516887,
      "grad_norm": 0.9856749176979065,
      "learning_rate": 5.489546200325508e-06,
      "loss": 0.0169,
      "step": 1389040
    },
    {
      "epoch": 2.273227155790342,
      "grad_norm": 0.2717670500278473,
      "learning_rate": 5.489480308111991e-06,
      "loss": 0.0132,
      "step": 1389060
    },
    {
      "epoch": 2.2732598862289954,
      "grad_norm": 1.1872916221618652,
      "learning_rate": 5.489414415898474e-06,
      "loss": 0.0162,
      "step": 1389080
    },
    {
      "epoch": 2.2732926166676486,
      "grad_norm": 0.39877623319625854,
      "learning_rate": 5.489348523684957e-06,
      "loss": 0.0126,
      "step": 1389100
    },
    {
      "epoch": 2.2733253471063017,
      "grad_norm": 0.6162776947021484,
      "learning_rate": 5.48928263147144e-06,
      "loss": 0.0132,
      "step": 1389120
    },
    {
      "epoch": 2.2733580775449553,
      "grad_norm": 0.8134671449661255,
      "learning_rate": 5.489216739257922e-06,
      "loss": 0.0153,
      "step": 1389140
    },
    {
      "epoch": 2.2733908079836085,
      "grad_norm": 0.16874361038208008,
      "learning_rate": 5.489150847044406e-06,
      "loss": 0.0222,
      "step": 1389160
    },
    {
      "epoch": 2.273423538422262,
      "grad_norm": 0.7498435974121094,
      "learning_rate": 5.489084954830888e-06,
      "loss": 0.0181,
      "step": 1389180
    },
    {
      "epoch": 2.2734562688609152,
      "grad_norm": 0.26776793599128723,
      "learning_rate": 5.489019062617371e-06,
      "loss": 0.0115,
      "step": 1389200
    },
    {
      "epoch": 2.273488999299569,
      "grad_norm": 0.21974925696849823,
      "learning_rate": 5.488953170403853e-06,
      "loss": 0.0132,
      "step": 1389220
    },
    {
      "epoch": 2.273521729738222,
      "grad_norm": 0.4129880964756012,
      "learning_rate": 5.488887278190337e-06,
      "loss": 0.0156,
      "step": 1389240
    },
    {
      "epoch": 2.273554460176875,
      "grad_norm": 0.06965197622776031,
      "learning_rate": 5.488821385976819e-06,
      "loss": 0.0141,
      "step": 1389260
    },
    {
      "epoch": 2.2735871906155287,
      "grad_norm": 0.3366750478744507,
      "learning_rate": 5.488755493763302e-06,
      "loss": 0.0172,
      "step": 1389280
    },
    {
      "epoch": 2.273619921054182,
      "grad_norm": 0.48296812176704407,
      "learning_rate": 5.488689601549785e-06,
      "loss": 0.0182,
      "step": 1389300
    },
    {
      "epoch": 2.2736526514928355,
      "grad_norm": 0.7031082510948181,
      "learning_rate": 5.4886237093362685e-06,
      "loss": 0.0188,
      "step": 1389320
    },
    {
      "epoch": 2.2736853819314886,
      "grad_norm": 0.658769965171814,
      "learning_rate": 5.488557817122751e-06,
      "loss": 0.024,
      "step": 1389340
    },
    {
      "epoch": 2.273718112370142,
      "grad_norm": 0.16403044760227203,
      "learning_rate": 5.488491924909234e-06,
      "loss": 0.0149,
      "step": 1389360
    },
    {
      "epoch": 2.2737508428087954,
      "grad_norm": 0.726198136806488,
      "learning_rate": 5.4884260326957175e-06,
      "loss": 0.014,
      "step": 1389380
    },
    {
      "epoch": 2.2737835732474485,
      "grad_norm": 0.13813188672065735,
      "learning_rate": 5.488360140482199e-06,
      "loss": 0.016,
      "step": 1389400
    },
    {
      "epoch": 2.273816303686102,
      "grad_norm": 1.8997694253921509,
      "learning_rate": 5.488294248268683e-06,
      "loss": 0.016,
      "step": 1389420
    },
    {
      "epoch": 2.2738490341247553,
      "grad_norm": 0.1763388067483902,
      "learning_rate": 5.488228356055165e-06,
      "loss": 0.0136,
      "step": 1389440
    },
    {
      "epoch": 2.273881764563409,
      "grad_norm": 0.29394301772117615,
      "learning_rate": 5.4881624638416485e-06,
      "loss": 0.0244,
      "step": 1389460
    },
    {
      "epoch": 2.273914495002062,
      "grad_norm": 0.9946442246437073,
      "learning_rate": 5.488096571628131e-06,
      "loss": 0.0206,
      "step": 1389480
    },
    {
      "epoch": 2.2739472254407156,
      "grad_norm": 0.2630431056022644,
      "learning_rate": 5.488030679414614e-06,
      "loss": 0.0148,
      "step": 1389500
    },
    {
      "epoch": 2.2739799558793687,
      "grad_norm": 0.39761948585510254,
      "learning_rate": 5.487964787201097e-06,
      "loss": 0.017,
      "step": 1389520
    },
    {
      "epoch": 2.274012686318022,
      "grad_norm": 0.42871904373168945,
      "learning_rate": 5.48789889498758e-06,
      "loss": 0.0173,
      "step": 1389540
    },
    {
      "epoch": 2.2740454167566755,
      "grad_norm": 0.5567842721939087,
      "learning_rate": 5.487833002774062e-06,
      "loss": 0.0123,
      "step": 1389560
    },
    {
      "epoch": 2.2740781471953286,
      "grad_norm": 0.5696547627449036,
      "learning_rate": 5.487767110560546e-06,
      "loss": 0.013,
      "step": 1389580
    },
    {
      "epoch": 2.2741108776339822,
      "grad_norm": 0.26572781801223755,
      "learning_rate": 5.487701218347028e-06,
      "loss": 0.0115,
      "step": 1389600
    },
    {
      "epoch": 2.2741436080726354,
      "grad_norm": 0.48552361130714417,
      "learning_rate": 5.487635326133511e-06,
      "loss": 0.0112,
      "step": 1389620
    },
    {
      "epoch": 2.274176338511289,
      "grad_norm": 0.1465078890323639,
      "learning_rate": 5.487569433919994e-06,
      "loss": 0.0143,
      "step": 1389640
    },
    {
      "epoch": 2.274209068949942,
      "grad_norm": 0.2536303699016571,
      "learning_rate": 5.487503541706477e-06,
      "loss": 0.0192,
      "step": 1389660
    },
    {
      "epoch": 2.2742417993885953,
      "grad_norm": 0.23140236735343933,
      "learning_rate": 5.4874376494929595e-06,
      "loss": 0.0128,
      "step": 1389680
    },
    {
      "epoch": 2.274274529827249,
      "grad_norm": 0.21176597476005554,
      "learning_rate": 5.487371757279443e-06,
      "loss": 0.01,
      "step": 1389700
    },
    {
      "epoch": 2.274307260265902,
      "grad_norm": 0.19944199919700623,
      "learning_rate": 5.487305865065926e-06,
      "loss": 0.0122,
      "step": 1389720
    },
    {
      "epoch": 2.2743399907045556,
      "grad_norm": 0.4647495150566101,
      "learning_rate": 5.4872399728524085e-06,
      "loss": 0.0138,
      "step": 1389740
    },
    {
      "epoch": 2.2743727211432088,
      "grad_norm": 0.49957600235939026,
      "learning_rate": 5.487174080638892e-06,
      "loss": 0.022,
      "step": 1389760
    },
    {
      "epoch": 2.274405451581862,
      "grad_norm": 0.8148841857910156,
      "learning_rate": 5.487108188425374e-06,
      "loss": 0.0211,
      "step": 1389780
    },
    {
      "epoch": 2.2744381820205155,
      "grad_norm": 0.13240540027618408,
      "learning_rate": 5.487042296211858e-06,
      "loss": 0.0148,
      "step": 1389800
    },
    {
      "epoch": 2.2744709124591687,
      "grad_norm": 0.11559352278709412,
      "learning_rate": 5.4869764039983395e-06,
      "loss": 0.0146,
      "step": 1389820
    },
    {
      "epoch": 2.2745036428978223,
      "grad_norm": 0.1317376047372818,
      "learning_rate": 5.486910511784823e-06,
      "loss": 0.0155,
      "step": 1389840
    },
    {
      "epoch": 2.2745363733364754,
      "grad_norm": 1.147796392440796,
      "learning_rate": 5.486844619571306e-06,
      "loss": 0.0167,
      "step": 1389860
    },
    {
      "epoch": 2.2745691037751286,
      "grad_norm": 0.64106684923172,
      "learning_rate": 5.4867787273577886e-06,
      "loss": 0.013,
      "step": 1389880
    },
    {
      "epoch": 2.274601834213782,
      "grad_norm": 0.08621513843536377,
      "learning_rate": 5.486712835144271e-06,
      "loss": 0.0119,
      "step": 1389900
    },
    {
      "epoch": 2.2746345646524353,
      "grad_norm": 0.4013713598251343,
      "learning_rate": 5.486646942930755e-06,
      "loss": 0.0108,
      "step": 1389920
    },
    {
      "epoch": 2.274667295091089,
      "grad_norm": 0.16972725093364716,
      "learning_rate": 5.486581050717237e-06,
      "loss": 0.0193,
      "step": 1389940
    },
    {
      "epoch": 2.274700025529742,
      "grad_norm": 0.24925589561462402,
      "learning_rate": 5.48651515850372e-06,
      "loss": 0.0213,
      "step": 1389960
    },
    {
      "epoch": 2.2747327559683956,
      "grad_norm": 0.2750665247440338,
      "learning_rate": 5.486449266290202e-06,
      "loss": 0.0135,
      "step": 1389980
    },
    {
      "epoch": 2.274765486407049,
      "grad_norm": 0.8303474187850952,
      "learning_rate": 5.486383374076686e-06,
      "loss": 0.0163,
      "step": 1390000
    },
    {
      "epoch": 2.274798216845702,
      "grad_norm": 0.5841008424758911,
      "learning_rate": 5.486317481863168e-06,
      "loss": 0.0123,
      "step": 1390020
    },
    {
      "epoch": 2.2748309472843555,
      "grad_norm": 0.21584554016590118,
      "learning_rate": 5.486251589649651e-06,
      "loss": 0.0152,
      "step": 1390040
    },
    {
      "epoch": 2.2748636777230087,
      "grad_norm": 0.26533621549606323,
      "learning_rate": 5.486185697436135e-06,
      "loss": 0.0156,
      "step": 1390060
    },
    {
      "epoch": 2.2748964081616623,
      "grad_norm": 0.220362588763237,
      "learning_rate": 5.486119805222617e-06,
      "loss": 0.0151,
      "step": 1390080
    },
    {
      "epoch": 2.2749291386003154,
      "grad_norm": 0.07940244674682617,
      "learning_rate": 5.4860539130091e-06,
      "loss": 0.0144,
      "step": 1390100
    },
    {
      "epoch": 2.274961869038969,
      "grad_norm": 0.12229213863611221,
      "learning_rate": 5.485988020795583e-06,
      "loss": 0.0174,
      "step": 1390120
    },
    {
      "epoch": 2.274994599477622,
      "grad_norm": 0.31830206513404846,
      "learning_rate": 5.485922128582066e-06,
      "loss": 0.0093,
      "step": 1390140
    },
    {
      "epoch": 2.2750273299162753,
      "grad_norm": 0.6053219437599182,
      "learning_rate": 5.485856236368549e-06,
      "loss": 0.0152,
      "step": 1390160
    },
    {
      "epoch": 2.275060060354929,
      "grad_norm": 0.7298415303230286,
      "learning_rate": 5.485790344155032e-06,
      "loss": 0.0114,
      "step": 1390180
    },
    {
      "epoch": 2.275092790793582,
      "grad_norm": 0.23035866022109985,
      "learning_rate": 5.485724451941514e-06,
      "loss": 0.017,
      "step": 1390200
    },
    {
      "epoch": 2.2751255212322357,
      "grad_norm": 0.6710184216499329,
      "learning_rate": 5.485658559727998e-06,
      "loss": 0.0164,
      "step": 1390220
    },
    {
      "epoch": 2.275158251670889,
      "grad_norm": 0.2162272036075592,
      "learning_rate": 5.4855926675144796e-06,
      "loss": 0.0158,
      "step": 1390240
    },
    {
      "epoch": 2.2751909821095424,
      "grad_norm": 0.3982333540916443,
      "learning_rate": 5.485526775300963e-06,
      "loss": 0.0115,
      "step": 1390260
    },
    {
      "epoch": 2.2752237125481956,
      "grad_norm": 0.6109009385108948,
      "learning_rate": 5.485460883087446e-06,
      "loss": 0.0175,
      "step": 1390280
    },
    {
      "epoch": 2.2752564429868487,
      "grad_norm": 0.4594346284866333,
      "learning_rate": 5.485394990873929e-06,
      "loss": 0.0108,
      "step": 1390300
    },
    {
      "epoch": 2.2752891734255023,
      "grad_norm": 0.5363464951515198,
      "learning_rate": 5.485329098660411e-06,
      "loss": 0.0122,
      "step": 1390320
    },
    {
      "epoch": 2.2753219038641554,
      "grad_norm": 0.1420956552028656,
      "learning_rate": 5.485263206446895e-06,
      "loss": 0.0141,
      "step": 1390340
    },
    {
      "epoch": 2.275354634302809,
      "grad_norm": 0.22271282970905304,
      "learning_rate": 5.485197314233377e-06,
      "loss": 0.0155,
      "step": 1390360
    },
    {
      "epoch": 2.275387364741462,
      "grad_norm": 0.2720851004123688,
      "learning_rate": 5.4851314220198604e-06,
      "loss": 0.0168,
      "step": 1390380
    },
    {
      "epoch": 2.275420095180116,
      "grad_norm": 0.12968315184116364,
      "learning_rate": 5.485065529806344e-06,
      "loss": 0.0133,
      "step": 1390400
    },
    {
      "epoch": 2.275452825618769,
      "grad_norm": 0.2553292512893677,
      "learning_rate": 5.484999637592826e-06,
      "loss": 0.0203,
      "step": 1390420
    },
    {
      "epoch": 2.275485556057422,
      "grad_norm": 0.786894679069519,
      "learning_rate": 5.4849337453793095e-06,
      "loss": 0.0185,
      "step": 1390440
    },
    {
      "epoch": 2.2755182864960757,
      "grad_norm": 0.1969131976366043,
      "learning_rate": 5.484867853165791e-06,
      "loss": 0.013,
      "step": 1390460
    },
    {
      "epoch": 2.275551016934729,
      "grad_norm": 0.49097582697868347,
      "learning_rate": 5.484801960952275e-06,
      "loss": 0.0175,
      "step": 1390480
    },
    {
      "epoch": 2.2755837473733824,
      "grad_norm": 0.18783217668533325,
      "learning_rate": 5.484736068738758e-06,
      "loss": 0.0151,
      "step": 1390500
    },
    {
      "epoch": 2.2756164778120356,
      "grad_norm": 0.3506595492362976,
      "learning_rate": 5.4846701765252405e-06,
      "loss": 0.0168,
      "step": 1390520
    },
    {
      "epoch": 2.275649208250689,
      "grad_norm": 0.44571542739868164,
      "learning_rate": 5.484604284311723e-06,
      "loss": 0.0223,
      "step": 1390540
    },
    {
      "epoch": 2.2756819386893423,
      "grad_norm": 0.23389220237731934,
      "learning_rate": 5.484538392098207e-06,
      "loss": 0.01,
      "step": 1390560
    },
    {
      "epoch": 2.2757146691279955,
      "grad_norm": 0.3200443685054779,
      "learning_rate": 5.484472499884689e-06,
      "loss": 0.0192,
      "step": 1390580
    },
    {
      "epoch": 2.275747399566649,
      "grad_norm": 0.8646866679191589,
      "learning_rate": 5.484406607671172e-06,
      "loss": 0.0193,
      "step": 1390600
    },
    {
      "epoch": 2.275780130005302,
      "grad_norm": 0.29620325565338135,
      "learning_rate": 5.484340715457654e-06,
      "loss": 0.0137,
      "step": 1390620
    },
    {
      "epoch": 2.275812860443956,
      "grad_norm": 0.8069689273834229,
      "learning_rate": 5.484274823244138e-06,
      "loss": 0.0173,
      "step": 1390640
    },
    {
      "epoch": 2.275845590882609,
      "grad_norm": 0.36043059825897217,
      "learning_rate": 5.4842089310306205e-06,
      "loss": 0.0171,
      "step": 1390660
    },
    {
      "epoch": 2.2758783213212626,
      "grad_norm": 0.6710795164108276,
      "learning_rate": 5.484143038817103e-06,
      "loss": 0.0159,
      "step": 1390680
    },
    {
      "epoch": 2.2759110517599157,
      "grad_norm": 0.4226539134979248,
      "learning_rate": 5.484077146603586e-06,
      "loss": 0.0138,
      "step": 1390700
    },
    {
      "epoch": 2.275943782198569,
      "grad_norm": 0.24176688492298126,
      "learning_rate": 5.4840112543900696e-06,
      "loss": 0.0142,
      "step": 1390720
    },
    {
      "epoch": 2.2759765126372224,
      "grad_norm": 0.21198976039886475,
      "learning_rate": 5.483945362176552e-06,
      "loss": 0.0164,
      "step": 1390740
    },
    {
      "epoch": 2.2760092430758756,
      "grad_norm": 0.4896637499332428,
      "learning_rate": 5.483879469963035e-06,
      "loss": 0.0107,
      "step": 1390760
    },
    {
      "epoch": 2.276041973514529,
      "grad_norm": 0.9321251511573792,
      "learning_rate": 5.483813577749519e-06,
      "loss": 0.0177,
      "step": 1390780
    },
    {
      "epoch": 2.2760747039531823,
      "grad_norm": 0.3016461133956909,
      "learning_rate": 5.4837476855360005e-06,
      "loss": 0.01,
      "step": 1390800
    },
    {
      "epoch": 2.276107434391836,
      "grad_norm": 0.6974698901176453,
      "learning_rate": 5.483681793322484e-06,
      "loss": 0.0143,
      "step": 1390820
    },
    {
      "epoch": 2.276140164830489,
      "grad_norm": 0.5499116778373718,
      "learning_rate": 5.483615901108966e-06,
      "loss": 0.013,
      "step": 1390840
    },
    {
      "epoch": 2.2761728952691422,
      "grad_norm": 0.9021811485290527,
      "learning_rate": 5.48355000889545e-06,
      "loss": 0.0236,
      "step": 1390860
    },
    {
      "epoch": 2.276205625707796,
      "grad_norm": 0.02961845137178898,
      "learning_rate": 5.483484116681932e-06,
      "loss": 0.0137,
      "step": 1390880
    },
    {
      "epoch": 2.276238356146449,
      "grad_norm": 0.08183485269546509,
      "learning_rate": 5.483418224468415e-06,
      "loss": 0.0116,
      "step": 1390900
    },
    {
      "epoch": 2.2762710865851026,
      "grad_norm": 0.6862825155258179,
      "learning_rate": 5.483352332254898e-06,
      "loss": 0.0133,
      "step": 1390920
    },
    {
      "epoch": 2.2763038170237557,
      "grad_norm": 0.6258748173713684,
      "learning_rate": 5.483286440041381e-06,
      "loss": 0.0175,
      "step": 1390940
    },
    {
      "epoch": 2.2763365474624093,
      "grad_norm": 0.6583219766616821,
      "learning_rate": 5.483220547827863e-06,
      "loss": 0.0156,
      "step": 1390960
    },
    {
      "epoch": 2.2763692779010625,
      "grad_norm": 0.3076727092266083,
      "learning_rate": 5.483154655614347e-06,
      "loss": 0.0092,
      "step": 1390980
    },
    {
      "epoch": 2.2764020083397156,
      "grad_norm": 1.3447529077529907,
      "learning_rate": 5.483088763400829e-06,
      "loss": 0.0102,
      "step": 1391000
    },
    {
      "epoch": 2.276434738778369,
      "grad_norm": 0.26114198565483093,
      "learning_rate": 5.483022871187312e-06,
      "loss": 0.0146,
      "step": 1391020
    },
    {
      "epoch": 2.2764674692170224,
      "grad_norm": 0.5043858885765076,
      "learning_rate": 5.482956978973794e-06,
      "loss": 0.0103,
      "step": 1391040
    },
    {
      "epoch": 2.276500199655676,
      "grad_norm": 0.2022625356912613,
      "learning_rate": 5.482891086760278e-06,
      "loss": 0.0163,
      "step": 1391060
    },
    {
      "epoch": 2.276532930094329,
      "grad_norm": 0.5206248760223389,
      "learning_rate": 5.4828251945467606e-06,
      "loss": 0.0199,
      "step": 1391080
    },
    {
      "epoch": 2.2765656605329827,
      "grad_norm": 0.668929934501648,
      "learning_rate": 5.482759302333243e-06,
      "loss": 0.0158,
      "step": 1391100
    },
    {
      "epoch": 2.276598390971636,
      "grad_norm": 0.2898443937301636,
      "learning_rate": 5.482693410119727e-06,
      "loss": 0.0161,
      "step": 1391120
    },
    {
      "epoch": 2.276631121410289,
      "grad_norm": 0.23968946933746338,
      "learning_rate": 5.48262751790621e-06,
      "loss": 0.0176,
      "step": 1391140
    },
    {
      "epoch": 2.2766638518489426,
      "grad_norm": 0.1246265172958374,
      "learning_rate": 5.482561625692692e-06,
      "loss": 0.0114,
      "step": 1391160
    },
    {
      "epoch": 2.2766965822875957,
      "grad_norm": 0.11790095269680023,
      "learning_rate": 5.482495733479175e-06,
      "loss": 0.0139,
      "step": 1391180
    },
    {
      "epoch": 2.2767293127262493,
      "grad_norm": 0.1818600594997406,
      "learning_rate": 5.482429841265659e-06,
      "loss": 0.0137,
      "step": 1391200
    },
    {
      "epoch": 2.2767620431649025,
      "grad_norm": 0.5617275834083557,
      "learning_rate": 5.482363949052141e-06,
      "loss": 0.0183,
      "step": 1391220
    },
    {
      "epoch": 2.2767947736035556,
      "grad_norm": 0.51335608959198,
      "learning_rate": 5.482298056838624e-06,
      "loss": 0.0184,
      "step": 1391240
    },
    {
      "epoch": 2.2768275040422092,
      "grad_norm": 0.27440112829208374,
      "learning_rate": 5.482232164625106e-06,
      "loss": 0.0155,
      "step": 1391260
    },
    {
      "epoch": 2.2768602344808624,
      "grad_norm": 0.1962241232395172,
      "learning_rate": 5.48216627241159e-06,
      "loss": 0.0095,
      "step": 1391280
    },
    {
      "epoch": 2.276892964919516,
      "grad_norm": 0.3741251230239868,
      "learning_rate": 5.482100380198072e-06,
      "loss": 0.0217,
      "step": 1391300
    },
    {
      "epoch": 2.276925695358169,
      "grad_norm": 0.8744974136352539,
      "learning_rate": 5.482034487984555e-06,
      "loss": 0.0118,
      "step": 1391320
    },
    {
      "epoch": 2.2769584257968223,
      "grad_norm": 0.43384087085723877,
      "learning_rate": 5.481968595771038e-06,
      "loss": 0.0188,
      "step": 1391340
    },
    {
      "epoch": 2.276991156235476,
      "grad_norm": 0.17898452281951904,
      "learning_rate": 5.4819027035575215e-06,
      "loss": 0.0085,
      "step": 1391360
    },
    {
      "epoch": 2.277023886674129,
      "grad_norm": 0.3555537164211273,
      "learning_rate": 5.481836811344003e-06,
      "loss": 0.0121,
      "step": 1391380
    },
    {
      "epoch": 2.2770566171127826,
      "grad_norm": 0.18346886336803436,
      "learning_rate": 5.481770919130487e-06,
      "loss": 0.0159,
      "step": 1391400
    },
    {
      "epoch": 2.2770893475514358,
      "grad_norm": 0.5111246705055237,
      "learning_rate": 5.481705026916969e-06,
      "loss": 0.0149,
      "step": 1391420
    },
    {
      "epoch": 2.2771220779900894,
      "grad_norm": 0.3342234194278717,
      "learning_rate": 5.4816391347034524e-06,
      "loss": 0.0163,
      "step": 1391440
    },
    {
      "epoch": 2.2771548084287425,
      "grad_norm": 0.9538313746452332,
      "learning_rate": 5.481573242489936e-06,
      "loss": 0.0178,
      "step": 1391460
    },
    {
      "epoch": 2.2771875388673957,
      "grad_norm": 0.6546019315719604,
      "learning_rate": 5.481507350276418e-06,
      "loss": 0.0198,
      "step": 1391480
    },
    {
      "epoch": 2.2772202693060493,
      "grad_norm": 0.12013746052980423,
      "learning_rate": 5.4814414580629015e-06,
      "loss": 0.0167,
      "step": 1391500
    },
    {
      "epoch": 2.2772529997447024,
      "grad_norm": 0.3799411952495575,
      "learning_rate": 5.481375565849384e-06,
      "loss": 0.0178,
      "step": 1391520
    },
    {
      "epoch": 2.277285730183356,
      "grad_norm": 0.1944282203912735,
      "learning_rate": 5.481309673635867e-06,
      "loss": 0.0215,
      "step": 1391540
    },
    {
      "epoch": 2.277318460622009,
      "grad_norm": 0.709091067314148,
      "learning_rate": 5.48124378142235e-06,
      "loss": 0.0169,
      "step": 1391560
    },
    {
      "epoch": 2.2773511910606627,
      "grad_norm": 0.18848557770252228,
      "learning_rate": 5.481177889208833e-06,
      "loss": 0.0126,
      "step": 1391580
    },
    {
      "epoch": 2.277383921499316,
      "grad_norm": 0.710164487361908,
      "learning_rate": 5.481111996995315e-06,
      "loss": 0.0114,
      "step": 1391600
    },
    {
      "epoch": 2.277416651937969,
      "grad_norm": 0.3894832730293274,
      "learning_rate": 5.481046104781799e-06,
      "loss": 0.0157,
      "step": 1391620
    },
    {
      "epoch": 2.2774493823766226,
      "grad_norm": 0.3595602214336395,
      "learning_rate": 5.480980212568281e-06,
      "loss": 0.0174,
      "step": 1391640
    },
    {
      "epoch": 2.277482112815276,
      "grad_norm": 0.325061559677124,
      "learning_rate": 5.480914320354764e-06,
      "loss": 0.0145,
      "step": 1391660
    },
    {
      "epoch": 2.2775148432539294,
      "grad_norm": 0.15052473545074463,
      "learning_rate": 5.480848428141247e-06,
      "loss": 0.0153,
      "step": 1391680
    },
    {
      "epoch": 2.2775475736925825,
      "grad_norm": 0.12856751680374146,
      "learning_rate": 5.48078253592773e-06,
      "loss": 0.0163,
      "step": 1391700
    },
    {
      "epoch": 2.277580304131236,
      "grad_norm": 0.07387860119342804,
      "learning_rate": 5.4807166437142125e-06,
      "loss": 0.0156,
      "step": 1391720
    },
    {
      "epoch": 2.2776130345698893,
      "grad_norm": 0.44466742873191833,
      "learning_rate": 5.480650751500696e-06,
      "loss": 0.0199,
      "step": 1391740
    },
    {
      "epoch": 2.2776457650085424,
      "grad_norm": 0.19987685978412628,
      "learning_rate": 5.480584859287178e-06,
      "loss": 0.0128,
      "step": 1391760
    },
    {
      "epoch": 2.277678495447196,
      "grad_norm": 0.11216577142477036,
      "learning_rate": 5.4805189670736615e-06,
      "loss": 0.0148,
      "step": 1391780
    },
    {
      "epoch": 2.277711225885849,
      "grad_norm": 0.6800434589385986,
      "learning_rate": 5.480453074860145e-06,
      "loss": 0.0124,
      "step": 1391800
    },
    {
      "epoch": 2.2777439563245028,
      "grad_norm": 0.5162511467933655,
      "learning_rate": 5.480387182646627e-06,
      "loss": 0.0149,
      "step": 1391820
    },
    {
      "epoch": 2.277776686763156,
      "grad_norm": 0.05785948038101196,
      "learning_rate": 5.480321290433111e-06,
      "loss": 0.0155,
      "step": 1391840
    },
    {
      "epoch": 2.2778094172018095,
      "grad_norm": 0.7603694796562195,
      "learning_rate": 5.4802553982195925e-06,
      "loss": 0.0145,
      "step": 1391860
    },
    {
      "epoch": 2.2778421476404627,
      "grad_norm": 0.28589943051338196,
      "learning_rate": 5.480189506006076e-06,
      "loss": 0.0189,
      "step": 1391880
    },
    {
      "epoch": 2.277874878079116,
      "grad_norm": 0.32532140612602234,
      "learning_rate": 5.480123613792559e-06,
      "loss": 0.0116,
      "step": 1391900
    },
    {
      "epoch": 2.2779076085177694,
      "grad_norm": 0.17606015503406525,
      "learning_rate": 5.4800577215790416e-06,
      "loss": 0.0114,
      "step": 1391920
    },
    {
      "epoch": 2.2779403389564226,
      "grad_norm": 0.5544798374176025,
      "learning_rate": 5.479991829365524e-06,
      "loss": 0.0114,
      "step": 1391940
    },
    {
      "epoch": 2.277973069395076,
      "grad_norm": 0.2762187421321869,
      "learning_rate": 5.479925937152008e-06,
      "loss": 0.0117,
      "step": 1391960
    },
    {
      "epoch": 2.2780057998337293,
      "grad_norm": 0.9063493013381958,
      "learning_rate": 5.47986004493849e-06,
      "loss": 0.0218,
      "step": 1391980
    },
    {
      "epoch": 2.278038530272383,
      "grad_norm": 0.5320847034454346,
      "learning_rate": 5.479794152724973e-06,
      "loss": 0.0105,
      "step": 1392000
    },
    {
      "epoch": 2.278071260711036,
      "grad_norm": 0.5824604630470276,
      "learning_rate": 5.479728260511455e-06,
      "loss": 0.0102,
      "step": 1392020
    },
    {
      "epoch": 2.278103991149689,
      "grad_norm": 0.38219740986824036,
      "learning_rate": 5.479662368297939e-06,
      "loss": 0.0197,
      "step": 1392040
    },
    {
      "epoch": 2.278136721588343,
      "grad_norm": 0.16993650794029236,
      "learning_rate": 5.479596476084421e-06,
      "loss": 0.0141,
      "step": 1392060
    },
    {
      "epoch": 2.278169452026996,
      "grad_norm": 0.1574663370847702,
      "learning_rate": 5.479530583870904e-06,
      "loss": 0.0175,
      "step": 1392080
    },
    {
      "epoch": 2.2782021824656495,
      "grad_norm": 0.2528626322746277,
      "learning_rate": 5.479464691657387e-06,
      "loss": 0.0183,
      "step": 1392100
    },
    {
      "epoch": 2.2782349129043027,
      "grad_norm": 0.9035789370536804,
      "learning_rate": 5.47939879944387e-06,
      "loss": 0.0175,
      "step": 1392120
    },
    {
      "epoch": 2.2782676433429563,
      "grad_norm": 0.29443085193634033,
      "learning_rate": 5.4793329072303526e-06,
      "loss": 0.0134,
      "step": 1392140
    },
    {
      "epoch": 2.2783003737816094,
      "grad_norm": 0.8753566145896912,
      "learning_rate": 5.479267015016836e-06,
      "loss": 0.0108,
      "step": 1392160
    },
    {
      "epoch": 2.2783331042202626,
      "grad_norm": 0.43565836548805237,
      "learning_rate": 5.479201122803319e-06,
      "loss": 0.0164,
      "step": 1392180
    },
    {
      "epoch": 2.278365834658916,
      "grad_norm": 0.19090770184993744,
      "learning_rate": 5.479135230589802e-06,
      "loss": 0.01,
      "step": 1392200
    },
    {
      "epoch": 2.2783985650975693,
      "grad_norm": 0.28916096687316895,
      "learning_rate": 5.479069338376285e-06,
      "loss": 0.0126,
      "step": 1392220
    },
    {
      "epoch": 2.278431295536223,
      "grad_norm": 0.28035759925842285,
      "learning_rate": 5.479003446162767e-06,
      "loss": 0.0072,
      "step": 1392240
    },
    {
      "epoch": 2.278464025974876,
      "grad_norm": 0.9914879202842712,
      "learning_rate": 5.478937553949251e-06,
      "loss": 0.0115,
      "step": 1392260
    },
    {
      "epoch": 2.2784967564135297,
      "grad_norm": 0.25064095854759216,
      "learning_rate": 5.478871661735733e-06,
      "loss": 0.0147,
      "step": 1392280
    },
    {
      "epoch": 2.278529486852183,
      "grad_norm": 0.585817813873291,
      "learning_rate": 5.478805769522216e-06,
      "loss": 0.0105,
      "step": 1392300
    },
    {
      "epoch": 2.278562217290836,
      "grad_norm": 0.3328804075717926,
      "learning_rate": 5.478739877308699e-06,
      "loss": 0.012,
      "step": 1392320
    },
    {
      "epoch": 2.2785949477294896,
      "grad_norm": 0.6908489465713501,
      "learning_rate": 5.478673985095182e-06,
      "loss": 0.0169,
      "step": 1392340
    },
    {
      "epoch": 2.2786276781681427,
      "grad_norm": 0.8342758417129517,
      "learning_rate": 5.478608092881664e-06,
      "loss": 0.0124,
      "step": 1392360
    },
    {
      "epoch": 2.2786604086067963,
      "grad_norm": 0.15850581228733063,
      "learning_rate": 5.478542200668148e-06,
      "loss": 0.0156,
      "step": 1392380
    },
    {
      "epoch": 2.2786931390454495,
      "grad_norm": 0.6777804493904114,
      "learning_rate": 5.47847630845463e-06,
      "loss": 0.0154,
      "step": 1392400
    },
    {
      "epoch": 2.278725869484103,
      "grad_norm": 0.3180942237377167,
      "learning_rate": 5.4784104162411135e-06,
      "loss": 0.0167,
      "step": 1392420
    },
    {
      "epoch": 2.278758599922756,
      "grad_norm": 0.5224612951278687,
      "learning_rate": 5.478344524027595e-06,
      "loss": 0.0103,
      "step": 1392440
    },
    {
      "epoch": 2.2787913303614094,
      "grad_norm": 0.40392979979515076,
      "learning_rate": 5.478278631814079e-06,
      "loss": 0.0128,
      "step": 1392460
    },
    {
      "epoch": 2.278824060800063,
      "grad_norm": 0.1770441085100174,
      "learning_rate": 5.478212739600562e-06,
      "loss": 0.0087,
      "step": 1392480
    },
    {
      "epoch": 2.278856791238716,
      "grad_norm": 0.3517516851425171,
      "learning_rate": 5.478146847387044e-06,
      "loss": 0.0122,
      "step": 1392500
    },
    {
      "epoch": 2.2788895216773697,
      "grad_norm": 0.7038150429725647,
      "learning_rate": 5.478080955173528e-06,
      "loss": 0.0238,
      "step": 1392520
    },
    {
      "epoch": 2.278922252116023,
      "grad_norm": 0.3092005252838135,
      "learning_rate": 5.478015062960011e-06,
      "loss": 0.0087,
      "step": 1392540
    },
    {
      "epoch": 2.2789549825546764,
      "grad_norm": 0.14947302639484406,
      "learning_rate": 5.4779491707464935e-06,
      "loss": 0.0209,
      "step": 1392560
    },
    {
      "epoch": 2.2789877129933296,
      "grad_norm": 0.3873001039028168,
      "learning_rate": 5.477883278532976e-06,
      "loss": 0.014,
      "step": 1392580
    },
    {
      "epoch": 2.2790204434319827,
      "grad_norm": 0.1499919593334198,
      "learning_rate": 5.47781738631946e-06,
      "loss": 0.0148,
      "step": 1392600
    },
    {
      "epoch": 2.2790531738706363,
      "grad_norm": 0.45392438769340515,
      "learning_rate": 5.477751494105942e-06,
      "loss": 0.0183,
      "step": 1392620
    },
    {
      "epoch": 2.2790859043092895,
      "grad_norm": 0.44268906116485596,
      "learning_rate": 5.477685601892425e-06,
      "loss": 0.0141,
      "step": 1392640
    },
    {
      "epoch": 2.279118634747943,
      "grad_norm": 0.7091766595840454,
      "learning_rate": 5.477619709678907e-06,
      "loss": 0.0133,
      "step": 1392660
    },
    {
      "epoch": 2.2791513651865962,
      "grad_norm": 0.7322495579719543,
      "learning_rate": 5.477553817465391e-06,
      "loss": 0.0196,
      "step": 1392680
    },
    {
      "epoch": 2.27918409562525,
      "grad_norm": 0.3528916537761688,
      "learning_rate": 5.4774879252518735e-06,
      "loss": 0.0245,
      "step": 1392700
    },
    {
      "epoch": 2.279216826063903,
      "grad_norm": 0.2935202717781067,
      "learning_rate": 5.477422033038356e-06,
      "loss": 0.0127,
      "step": 1392720
    },
    {
      "epoch": 2.279249556502556,
      "grad_norm": 0.3770001232624054,
      "learning_rate": 5.477356140824839e-06,
      "loss": 0.013,
      "step": 1392740
    },
    {
      "epoch": 2.2792822869412097,
      "grad_norm": 0.363086074590683,
      "learning_rate": 5.4772902486113226e-06,
      "loss": 0.0145,
      "step": 1392760
    },
    {
      "epoch": 2.279315017379863,
      "grad_norm": 0.59175044298172,
      "learning_rate": 5.4772243563978045e-06,
      "loss": 0.0177,
      "step": 1392780
    },
    {
      "epoch": 2.2793477478185165,
      "grad_norm": 0.19617493450641632,
      "learning_rate": 5.477158464184288e-06,
      "loss": 0.0178,
      "step": 1392800
    },
    {
      "epoch": 2.2793804782571696,
      "grad_norm": 0.2998863160610199,
      "learning_rate": 5.47709257197077e-06,
      "loss": 0.0193,
      "step": 1392820
    },
    {
      "epoch": 2.2794132086958228,
      "grad_norm": 0.37681105732917786,
      "learning_rate": 5.4770266797572535e-06,
      "loss": 0.0166,
      "step": 1392840
    },
    {
      "epoch": 2.2794459391344764,
      "grad_norm": 0.24160327017307281,
      "learning_rate": 5.476960787543737e-06,
      "loss": 0.0126,
      "step": 1392860
    },
    {
      "epoch": 2.2794786695731295,
      "grad_norm": 0.3475225567817688,
      "learning_rate": 5.476894895330219e-06,
      "loss": 0.0213,
      "step": 1392880
    },
    {
      "epoch": 2.279511400011783,
      "grad_norm": 0.2734280228614807,
      "learning_rate": 5.476829003116703e-06,
      "loss": 0.0134,
      "step": 1392900
    },
    {
      "epoch": 2.2795441304504362,
      "grad_norm": 0.8755388259887695,
      "learning_rate": 5.4767631109031845e-06,
      "loss": 0.015,
      "step": 1392920
    },
    {
      "epoch": 2.2795768608890894,
      "grad_norm": 0.5529018640518188,
      "learning_rate": 5.476697218689668e-06,
      "loss": 0.0213,
      "step": 1392940
    },
    {
      "epoch": 2.279609591327743,
      "grad_norm": 0.25795355439186096,
      "learning_rate": 5.476631326476151e-06,
      "loss": 0.0222,
      "step": 1392960
    },
    {
      "epoch": 2.279642321766396,
      "grad_norm": 0.5156973600387573,
      "learning_rate": 5.476565434262634e-06,
      "loss": 0.0123,
      "step": 1392980
    },
    {
      "epoch": 2.2796750522050497,
      "grad_norm": 0.7579886317253113,
      "learning_rate": 5.476499542049116e-06,
      "loss": 0.0213,
      "step": 1393000
    },
    {
      "epoch": 2.279707782643703,
      "grad_norm": 0.6897265911102295,
      "learning_rate": 5.4764336498356e-06,
      "loss": 0.0165,
      "step": 1393020
    },
    {
      "epoch": 2.2797405130823565,
      "grad_norm": 0.465995192527771,
      "learning_rate": 5.476367757622082e-06,
      "loss": 0.0199,
      "step": 1393040
    },
    {
      "epoch": 2.2797732435210096,
      "grad_norm": 1.3388698101043701,
      "learning_rate": 5.476301865408565e-06,
      "loss": 0.0176,
      "step": 1393060
    },
    {
      "epoch": 2.279805973959663,
      "grad_norm": 0.1754712015390396,
      "learning_rate": 5.476235973195047e-06,
      "loss": 0.018,
      "step": 1393080
    },
    {
      "epoch": 2.2798387043983164,
      "grad_norm": 0.03065594658255577,
      "learning_rate": 5.476170080981531e-06,
      "loss": 0.0095,
      "step": 1393100
    },
    {
      "epoch": 2.2798714348369695,
      "grad_norm": 0.1762516051530838,
      "learning_rate": 5.476104188768014e-06,
      "loss": 0.0113,
      "step": 1393120
    },
    {
      "epoch": 2.279904165275623,
      "grad_norm": 0.23942576348781586,
      "learning_rate": 5.476038296554496e-06,
      "loss": 0.0105,
      "step": 1393140
    },
    {
      "epoch": 2.2799368957142763,
      "grad_norm": 0.31098249554634094,
      "learning_rate": 5.475972404340979e-06,
      "loss": 0.0251,
      "step": 1393160
    },
    {
      "epoch": 2.27996962615293,
      "grad_norm": 0.21512770652770996,
      "learning_rate": 5.475906512127463e-06,
      "loss": 0.0102,
      "step": 1393180
    },
    {
      "epoch": 2.280002356591583,
      "grad_norm": 1.3555430173873901,
      "learning_rate": 5.4758406199139445e-06,
      "loss": 0.0183,
      "step": 1393200
    },
    {
      "epoch": 2.280035087030236,
      "grad_norm": 0.492799311876297,
      "learning_rate": 5.475774727700428e-06,
      "loss": 0.0135,
      "step": 1393220
    },
    {
      "epoch": 2.2800678174688898,
      "grad_norm": 0.16062580049037933,
      "learning_rate": 5.475708835486912e-06,
      "loss": 0.014,
      "step": 1393240
    },
    {
      "epoch": 2.280100547907543,
      "grad_norm": 0.09356580674648285,
      "learning_rate": 5.475642943273394e-06,
      "loss": 0.0134,
      "step": 1393260
    },
    {
      "epoch": 2.2801332783461965,
      "grad_norm": 0.3946874439716339,
      "learning_rate": 5.475577051059877e-06,
      "loss": 0.0135,
      "step": 1393280
    },
    {
      "epoch": 2.2801660087848497,
      "grad_norm": 0.5139830708503723,
      "learning_rate": 5.475511158846359e-06,
      "loss": 0.0175,
      "step": 1393300
    },
    {
      "epoch": 2.2801987392235032,
      "grad_norm": 0.3464329242706299,
      "learning_rate": 5.475445266632843e-06,
      "loss": 0.0168,
      "step": 1393320
    },
    {
      "epoch": 2.2802314696621564,
      "grad_norm": 0.27634894847869873,
      "learning_rate": 5.475379374419325e-06,
      "loss": 0.0222,
      "step": 1393340
    },
    {
      "epoch": 2.2802642001008095,
      "grad_norm": 0.5540124773979187,
      "learning_rate": 5.475313482205808e-06,
      "loss": 0.0142,
      "step": 1393360
    },
    {
      "epoch": 2.280296930539463,
      "grad_norm": 0.3431176543235779,
      "learning_rate": 5.475247589992291e-06,
      "loss": 0.0137,
      "step": 1393380
    },
    {
      "epoch": 2.2803296609781163,
      "grad_norm": 0.3710341453552246,
      "learning_rate": 5.4751816977787745e-06,
      "loss": 0.0143,
      "step": 1393400
    },
    {
      "epoch": 2.28036239141677,
      "grad_norm": 0.08307641744613647,
      "learning_rate": 5.475115805565256e-06,
      "loss": 0.0109,
      "step": 1393420
    },
    {
      "epoch": 2.280395121855423,
      "grad_norm": 0.17396840453147888,
      "learning_rate": 5.47504991335174e-06,
      "loss": 0.0146,
      "step": 1393440
    },
    {
      "epoch": 2.2804278522940766,
      "grad_norm": 0.5638701915740967,
      "learning_rate": 5.474984021138222e-06,
      "loss": 0.0096,
      "step": 1393460
    },
    {
      "epoch": 2.28046058273273,
      "grad_norm": 0.11431669443845749,
      "learning_rate": 5.4749181289247054e-06,
      "loss": 0.0167,
      "step": 1393480
    },
    {
      "epoch": 2.280493313171383,
      "grad_norm": 0.3075873851776123,
      "learning_rate": 5.474852236711188e-06,
      "loss": 0.0179,
      "step": 1393500
    },
    {
      "epoch": 2.2805260436100365,
      "grad_norm": 0.07558052986860275,
      "learning_rate": 5.474786344497671e-06,
      "loss": 0.0114,
      "step": 1393520
    },
    {
      "epoch": 2.2805587740486897,
      "grad_norm": 0.6089515089988708,
      "learning_rate": 5.474720452284154e-06,
      "loss": 0.0179,
      "step": 1393540
    },
    {
      "epoch": 2.2805915044873433,
      "grad_norm": 0.36250367760658264,
      "learning_rate": 5.474654560070637e-06,
      "loss": 0.0193,
      "step": 1393560
    },
    {
      "epoch": 2.2806242349259964,
      "grad_norm": 0.31304532289505005,
      "learning_rate": 5.47458866785712e-06,
      "loss": 0.0198,
      "step": 1393580
    },
    {
      "epoch": 2.28065696536465,
      "grad_norm": 0.35136517882347107,
      "learning_rate": 5.474522775643603e-06,
      "loss": 0.0126,
      "step": 1393600
    },
    {
      "epoch": 2.280689695803303,
      "grad_norm": 0.38234248757362366,
      "learning_rate": 5.474456883430086e-06,
      "loss": 0.014,
      "step": 1393620
    },
    {
      "epoch": 2.2807224262419563,
      "grad_norm": 0.23020702600479126,
      "learning_rate": 5.474390991216568e-06,
      "loss": 0.0139,
      "step": 1393640
    },
    {
      "epoch": 2.28075515668061,
      "grad_norm": 0.042361896485090256,
      "learning_rate": 5.474325099003052e-06,
      "loss": 0.0171,
      "step": 1393660
    },
    {
      "epoch": 2.280787887119263,
      "grad_norm": 0.5461146831512451,
      "learning_rate": 5.474259206789534e-06,
      "loss": 0.0126,
      "step": 1393680
    },
    {
      "epoch": 2.2808206175579167,
      "grad_norm": 0.32329338788986206,
      "learning_rate": 5.474193314576017e-06,
      "loss": 0.0177,
      "step": 1393700
    },
    {
      "epoch": 2.28085334799657,
      "grad_norm": 0.7031732201576233,
      "learning_rate": 5.4741274223625e-06,
      "loss": 0.0179,
      "step": 1393720
    },
    {
      "epoch": 2.2808860784352234,
      "grad_norm": 0.4998306632041931,
      "learning_rate": 5.474061530148983e-06,
      "loss": 0.0205,
      "step": 1393740
    },
    {
      "epoch": 2.2809188088738765,
      "grad_norm": 0.8646613955497742,
      "learning_rate": 5.4739956379354655e-06,
      "loss": 0.0124,
      "step": 1393760
    },
    {
      "epoch": 2.2809515393125297,
      "grad_norm": 0.3824727237224579,
      "learning_rate": 5.473929745721949e-06,
      "loss": 0.0134,
      "step": 1393780
    },
    {
      "epoch": 2.2809842697511833,
      "grad_norm": 0.38111424446105957,
      "learning_rate": 5.473863853508431e-06,
      "loss": 0.017,
      "step": 1393800
    },
    {
      "epoch": 2.2810170001898364,
      "grad_norm": 0.4431007504463196,
      "learning_rate": 5.4737979612949146e-06,
      "loss": 0.0131,
      "step": 1393820
    },
    {
      "epoch": 2.28104973062849,
      "grad_norm": 0.5657052993774414,
      "learning_rate": 5.4737320690813964e-06,
      "loss": 0.0172,
      "step": 1393840
    },
    {
      "epoch": 2.281082461067143,
      "grad_norm": 0.3110341727733612,
      "learning_rate": 5.47366617686788e-06,
      "loss": 0.0162,
      "step": 1393860
    },
    {
      "epoch": 2.281115191505797,
      "grad_norm": 0.6117242574691772,
      "learning_rate": 5.473600284654362e-06,
      "loss": 0.0188,
      "step": 1393880
    },
    {
      "epoch": 2.28114792194445,
      "grad_norm": 0.13972733914852142,
      "learning_rate": 5.4735343924408455e-06,
      "loss": 0.0116,
      "step": 1393900
    },
    {
      "epoch": 2.281180652383103,
      "grad_norm": 0.14505968987941742,
      "learning_rate": 5.473468500227329e-06,
      "loss": 0.0156,
      "step": 1393920
    },
    {
      "epoch": 2.2812133828217567,
      "grad_norm": 0.11341977119445801,
      "learning_rate": 5.473402608013811e-06,
      "loss": 0.009,
      "step": 1393940
    },
    {
      "epoch": 2.28124611326041,
      "grad_norm": 0.43855881690979004,
      "learning_rate": 5.473336715800295e-06,
      "loss": 0.0154,
      "step": 1393960
    },
    {
      "epoch": 2.2812788436990634,
      "grad_norm": 1.0166136026382446,
      "learning_rate": 5.473270823586777e-06,
      "loss": 0.0165,
      "step": 1393980
    },
    {
      "epoch": 2.2813115741377166,
      "grad_norm": 0.13589397072792053,
      "learning_rate": 5.47320493137326e-06,
      "loss": 0.0208,
      "step": 1394000
    },
    {
      "epoch": 2.28134430457637,
      "grad_norm": 0.21702732145786285,
      "learning_rate": 5.473139039159743e-06,
      "loss": 0.0088,
      "step": 1394020
    },
    {
      "epoch": 2.2813770350150233,
      "grad_norm": 0.2423088252544403,
      "learning_rate": 5.473073146946226e-06,
      "loss": 0.0132,
      "step": 1394040
    },
    {
      "epoch": 2.2814097654536765,
      "grad_norm": 0.28786224126815796,
      "learning_rate": 5.473007254732708e-06,
      "loss": 0.0175,
      "step": 1394060
    },
    {
      "epoch": 2.28144249589233,
      "grad_norm": 0.9869493246078491,
      "learning_rate": 5.472941362519192e-06,
      "loss": 0.0142,
      "step": 1394080
    },
    {
      "epoch": 2.281475226330983,
      "grad_norm": 0.4956014156341553,
      "learning_rate": 5.472875470305674e-06,
      "loss": 0.0148,
      "step": 1394100
    },
    {
      "epoch": 2.281507956769637,
      "grad_norm": 0.6033557653427124,
      "learning_rate": 5.472809578092157e-06,
      "loss": 0.0163,
      "step": 1394120
    },
    {
      "epoch": 2.28154068720829,
      "grad_norm": 0.14233864843845367,
      "learning_rate": 5.47274368587864e-06,
      "loss": 0.0152,
      "step": 1394140
    },
    {
      "epoch": 2.2815734176469435,
      "grad_norm": 0.1398082971572876,
      "learning_rate": 5.472677793665123e-06,
      "loss": 0.0128,
      "step": 1394160
    },
    {
      "epoch": 2.2816061480855967,
      "grad_norm": 1.1646627187728882,
      "learning_rate": 5.4726119014516056e-06,
      "loss": 0.0149,
      "step": 1394180
    },
    {
      "epoch": 2.28163887852425,
      "grad_norm": 0.7355841994285583,
      "learning_rate": 5.472546009238089e-06,
      "loss": 0.0206,
      "step": 1394200
    },
    {
      "epoch": 2.2816716089629034,
      "grad_norm": 0.32727891206741333,
      "learning_rate": 5.472480117024571e-06,
      "loss": 0.0178,
      "step": 1394220
    },
    {
      "epoch": 2.2817043394015566,
      "grad_norm": 0.11116685718297958,
      "learning_rate": 5.472414224811055e-06,
      "loss": 0.0139,
      "step": 1394240
    },
    {
      "epoch": 2.28173706984021,
      "grad_norm": 0.15752066671848297,
      "learning_rate": 5.472348332597538e-06,
      "loss": 0.0127,
      "step": 1394260
    },
    {
      "epoch": 2.2817698002788633,
      "grad_norm": 0.19478532671928406,
      "learning_rate": 5.47228244038402e-06,
      "loss": 0.0148,
      "step": 1394280
    },
    {
      "epoch": 2.2818025307175165,
      "grad_norm": 0.7633440494537354,
      "learning_rate": 5.472216548170504e-06,
      "loss": 0.0212,
      "step": 1394300
    },
    {
      "epoch": 2.28183526115617,
      "grad_norm": 0.3410922586917877,
      "learning_rate": 5.472150655956986e-06,
      "loss": 0.0159,
      "step": 1394320
    },
    {
      "epoch": 2.2818679915948232,
      "grad_norm": 0.7589737772941589,
      "learning_rate": 5.472084763743469e-06,
      "loss": 0.0132,
      "step": 1394340
    },
    {
      "epoch": 2.281900722033477,
      "grad_norm": 0.2972351014614105,
      "learning_rate": 5.472018871529952e-06,
      "loss": 0.0164,
      "step": 1394360
    },
    {
      "epoch": 2.28193345247213,
      "grad_norm": 0.4302466809749603,
      "learning_rate": 5.471952979316435e-06,
      "loss": 0.0238,
      "step": 1394380
    },
    {
      "epoch": 2.281966182910783,
      "grad_norm": 0.709478497505188,
      "learning_rate": 5.471887087102917e-06,
      "loss": 0.013,
      "step": 1394400
    },
    {
      "epoch": 2.2819989133494367,
      "grad_norm": 0.3148941695690155,
      "learning_rate": 5.471821194889401e-06,
      "loss": 0.0137,
      "step": 1394420
    },
    {
      "epoch": 2.28203164378809,
      "grad_norm": 0.27652508020401,
      "learning_rate": 5.471755302675883e-06,
      "loss": 0.0124,
      "step": 1394440
    },
    {
      "epoch": 2.2820643742267435,
      "grad_norm": 0.4486188590526581,
      "learning_rate": 5.4716894104623665e-06,
      "loss": 0.0133,
      "step": 1394460
    },
    {
      "epoch": 2.2820971046653966,
      "grad_norm": 1.8489570617675781,
      "learning_rate": 5.471623518248848e-06,
      "loss": 0.015,
      "step": 1394480
    },
    {
      "epoch": 2.28212983510405,
      "grad_norm": 0.1263408660888672,
      "learning_rate": 5.471557626035332e-06,
      "loss": 0.0191,
      "step": 1394500
    },
    {
      "epoch": 2.2821625655427034,
      "grad_norm": 0.27056291699409485,
      "learning_rate": 5.471491733821815e-06,
      "loss": 0.0136,
      "step": 1394520
    },
    {
      "epoch": 2.2821952959813565,
      "grad_norm": 0.6974213719367981,
      "learning_rate": 5.471425841608297e-06,
      "loss": 0.0187,
      "step": 1394540
    },
    {
      "epoch": 2.28222802642001,
      "grad_norm": 0.5294922590255737,
      "learning_rate": 5.47135994939478e-06,
      "loss": 0.0134,
      "step": 1394560
    },
    {
      "epoch": 2.2822607568586633,
      "grad_norm": 0.6309696435928345,
      "learning_rate": 5.471294057181264e-06,
      "loss": 0.0146,
      "step": 1394580
    },
    {
      "epoch": 2.282293487297317,
      "grad_norm": 0.11033704876899719,
      "learning_rate": 5.471228164967746e-06,
      "loss": 0.015,
      "step": 1394600
    },
    {
      "epoch": 2.28232621773597,
      "grad_norm": 0.17871904373168945,
      "learning_rate": 5.471162272754229e-06,
      "loss": 0.0179,
      "step": 1394620
    },
    {
      "epoch": 2.2823589481746236,
      "grad_norm": 1.3280972242355347,
      "learning_rate": 5.471096380540713e-06,
      "loss": 0.0223,
      "step": 1394640
    },
    {
      "epoch": 2.2823916786132767,
      "grad_norm": 0.4109937846660614,
      "learning_rate": 5.471030488327195e-06,
      "loss": 0.0171,
      "step": 1394660
    },
    {
      "epoch": 2.28242440905193,
      "grad_norm": 0.30661606788635254,
      "learning_rate": 5.470964596113678e-06,
      "loss": 0.0225,
      "step": 1394680
    },
    {
      "epoch": 2.2824571394905835,
      "grad_norm": 0.5880897641181946,
      "learning_rate": 5.47089870390016e-06,
      "loss": 0.0128,
      "step": 1394700
    },
    {
      "epoch": 2.2824898699292366,
      "grad_norm": 0.21880896389484406,
      "learning_rate": 5.470832811686644e-06,
      "loss": 0.0188,
      "step": 1394720
    },
    {
      "epoch": 2.2825226003678902,
      "grad_norm": 0.5014327764511108,
      "learning_rate": 5.4707669194731265e-06,
      "loss": 0.015,
      "step": 1394740
    },
    {
      "epoch": 2.2825553308065434,
      "grad_norm": 0.6238781213760376,
      "learning_rate": 5.470701027259609e-06,
      "loss": 0.0151,
      "step": 1394760
    },
    {
      "epoch": 2.282588061245197,
      "grad_norm": 0.5741242170333862,
      "learning_rate": 5.470635135046092e-06,
      "loss": 0.0124,
      "step": 1394780
    },
    {
      "epoch": 2.28262079168385,
      "grad_norm": 0.34528279304504395,
      "learning_rate": 5.470569242832576e-06,
      "loss": 0.0136,
      "step": 1394800
    },
    {
      "epoch": 2.2826535221225033,
      "grad_norm": 6.004441261291504,
      "learning_rate": 5.4705033506190575e-06,
      "loss": 0.0148,
      "step": 1394820
    },
    {
      "epoch": 2.282686252561157,
      "grad_norm": 0.22102735936641693,
      "learning_rate": 5.470437458405541e-06,
      "loss": 0.0119,
      "step": 1394840
    },
    {
      "epoch": 2.28271898299981,
      "grad_norm": 0.35534557700157166,
      "learning_rate": 5.470371566192023e-06,
      "loss": 0.0111,
      "step": 1394860
    },
    {
      "epoch": 2.2827517134384636,
      "grad_norm": 0.07604952901601791,
      "learning_rate": 5.4703056739785065e-06,
      "loss": 0.0145,
      "step": 1394880
    },
    {
      "epoch": 2.2827844438771168,
      "grad_norm": 0.3280937969684601,
      "learning_rate": 5.4702397817649884e-06,
      "loss": 0.0126,
      "step": 1394900
    },
    {
      "epoch": 2.2828171743157704,
      "grad_norm": 0.2589700520038605,
      "learning_rate": 5.470173889551472e-06,
      "loss": 0.018,
      "step": 1394920
    },
    {
      "epoch": 2.2828499047544235,
      "grad_norm": 0.07812076807022095,
      "learning_rate": 5.470107997337955e-06,
      "loss": 0.0184,
      "step": 1394940
    },
    {
      "epoch": 2.2828826351930767,
      "grad_norm": 0.8083006739616394,
      "learning_rate": 5.4700421051244375e-06,
      "loss": 0.0177,
      "step": 1394960
    },
    {
      "epoch": 2.2829153656317303,
      "grad_norm": 0.6331730484962463,
      "learning_rate": 5.469976212910921e-06,
      "loss": 0.0165,
      "step": 1394980
    },
    {
      "epoch": 2.2829480960703834,
      "grad_norm": 0.58216792345047,
      "learning_rate": 5.469910320697404e-06,
      "loss": 0.014,
      "step": 1395000
    },
    {
      "epoch": 2.282980826509037,
      "grad_norm": 0.9300334453582764,
      "learning_rate": 5.4698444284838866e-06,
      "loss": 0.015,
      "step": 1395020
    },
    {
      "epoch": 2.28301355694769,
      "grad_norm": 0.4013341963291168,
      "learning_rate": 5.469778536270369e-06,
      "loss": 0.0155,
      "step": 1395040
    },
    {
      "epoch": 2.2830462873863437,
      "grad_norm": 0.25352197885513306,
      "learning_rate": 5.469712644056853e-06,
      "loss": 0.011,
      "step": 1395060
    },
    {
      "epoch": 2.283079017824997,
      "grad_norm": 0.24464412033557892,
      "learning_rate": 5.469646751843335e-06,
      "loss": 0.0103,
      "step": 1395080
    },
    {
      "epoch": 2.28311174826365,
      "grad_norm": 0.7887776494026184,
      "learning_rate": 5.469580859629818e-06,
      "loss": 0.0125,
      "step": 1395100
    },
    {
      "epoch": 2.2831444787023036,
      "grad_norm": 0.26112017035484314,
      "learning_rate": 5.4695149674163e-06,
      "loss": 0.0123,
      "step": 1395120
    },
    {
      "epoch": 2.283177209140957,
      "grad_norm": 0.16233684122562408,
      "learning_rate": 5.469449075202784e-06,
      "loss": 0.0096,
      "step": 1395140
    },
    {
      "epoch": 2.2832099395796104,
      "grad_norm": 0.18071360886096954,
      "learning_rate": 5.469383182989267e-06,
      "loss": 0.0147,
      "step": 1395160
    },
    {
      "epoch": 2.2832426700182635,
      "grad_norm": 0.04206279665231705,
      "learning_rate": 5.469317290775749e-06,
      "loss": 0.0156,
      "step": 1395180
    },
    {
      "epoch": 2.283275400456917,
      "grad_norm": 0.3476209044456482,
      "learning_rate": 5.469251398562232e-06,
      "loss": 0.0139,
      "step": 1395200
    },
    {
      "epoch": 2.2833081308955703,
      "grad_norm": 0.31282860040664673,
      "learning_rate": 5.469185506348716e-06,
      "loss": 0.0147,
      "step": 1395220
    },
    {
      "epoch": 2.2833408613342234,
      "grad_norm": 0.3840348720550537,
      "learning_rate": 5.4691196141351975e-06,
      "loss": 0.0132,
      "step": 1395240
    },
    {
      "epoch": 2.283373591772877,
      "grad_norm": 0.3289187550544739,
      "learning_rate": 5.469053721921681e-06,
      "loss": 0.0184,
      "step": 1395260
    },
    {
      "epoch": 2.28340632221153,
      "grad_norm": 0.4303695857524872,
      "learning_rate": 5.468987829708163e-06,
      "loss": 0.0083,
      "step": 1395280
    },
    {
      "epoch": 2.2834390526501838,
      "grad_norm": 1.2697113752365112,
      "learning_rate": 5.468921937494647e-06,
      "loss": 0.0123,
      "step": 1395300
    },
    {
      "epoch": 2.283471783088837,
      "grad_norm": 0.6518263816833496,
      "learning_rate": 5.46885604528113e-06,
      "loss": 0.0129,
      "step": 1395320
    },
    {
      "epoch": 2.2835045135274905,
      "grad_norm": 0.6827136874198914,
      "learning_rate": 5.468790153067612e-06,
      "loss": 0.0221,
      "step": 1395340
    },
    {
      "epoch": 2.2835372439661437,
      "grad_norm": 0.47805681824684143,
      "learning_rate": 5.468724260854096e-06,
      "loss": 0.0148,
      "step": 1395360
    },
    {
      "epoch": 2.283569974404797,
      "grad_norm": 0.3873904347419739,
      "learning_rate": 5.468658368640578e-06,
      "loss": 0.0191,
      "step": 1395380
    },
    {
      "epoch": 2.2836027048434504,
      "grad_norm": 0.4134935140609741,
      "learning_rate": 5.468592476427061e-06,
      "loss": 0.0129,
      "step": 1395400
    },
    {
      "epoch": 2.2836354352821036,
      "grad_norm": 0.1868855506181717,
      "learning_rate": 5.468526584213544e-06,
      "loss": 0.011,
      "step": 1395420
    },
    {
      "epoch": 2.283668165720757,
      "grad_norm": 0.42294782400131226,
      "learning_rate": 5.4684606920000275e-06,
      "loss": 0.0141,
      "step": 1395440
    },
    {
      "epoch": 2.2837008961594103,
      "grad_norm": 0.5069438815116882,
      "learning_rate": 5.468394799786509e-06,
      "loss": 0.0191,
      "step": 1395460
    },
    {
      "epoch": 2.283733626598064,
      "grad_norm": 0.26861631870269775,
      "learning_rate": 5.468328907572993e-06,
      "loss": 0.0105,
      "step": 1395480
    },
    {
      "epoch": 2.283766357036717,
      "grad_norm": 0.18508197367191315,
      "learning_rate": 5.468263015359475e-06,
      "loss": 0.0154,
      "step": 1395500
    },
    {
      "epoch": 2.28379908747537,
      "grad_norm": 0.2980625033378601,
      "learning_rate": 5.4681971231459584e-06,
      "loss": 0.0101,
      "step": 1395520
    },
    {
      "epoch": 2.283831817914024,
      "grad_norm": 0.5066066980361938,
      "learning_rate": 5.468131230932441e-06,
      "loss": 0.0125,
      "step": 1395540
    },
    {
      "epoch": 2.283864548352677,
      "grad_norm": 0.540417492389679,
      "learning_rate": 5.468065338718924e-06,
      "loss": 0.0109,
      "step": 1395560
    },
    {
      "epoch": 2.2838972787913305,
      "grad_norm": 0.920214831829071,
      "learning_rate": 5.467999446505407e-06,
      "loss": 0.0128,
      "step": 1395580
    },
    {
      "epoch": 2.2839300092299837,
      "grad_norm": 0.21735066175460815,
      "learning_rate": 5.46793355429189e-06,
      "loss": 0.0115,
      "step": 1395600
    },
    {
      "epoch": 2.2839627396686373,
      "grad_norm": 0.18817202746868134,
      "learning_rate": 5.467867662078372e-06,
      "loss": 0.0127,
      "step": 1395620
    },
    {
      "epoch": 2.2839954701072904,
      "grad_norm": 0.36998626589775085,
      "learning_rate": 5.467801769864856e-06,
      "loss": 0.0121,
      "step": 1395640
    },
    {
      "epoch": 2.2840282005459436,
      "grad_norm": 0.3136290907859802,
      "learning_rate": 5.467735877651338e-06,
      "loss": 0.0179,
      "step": 1395660
    },
    {
      "epoch": 2.284060930984597,
      "grad_norm": 1.012205958366394,
      "learning_rate": 5.467669985437821e-06,
      "loss": 0.0194,
      "step": 1395680
    },
    {
      "epoch": 2.2840936614232503,
      "grad_norm": 0.12788040935993195,
      "learning_rate": 5.467604093224305e-06,
      "loss": 0.0101,
      "step": 1395700
    },
    {
      "epoch": 2.284126391861904,
      "grad_norm": 0.1844281703233719,
      "learning_rate": 5.467538201010787e-06,
      "loss": 0.0212,
      "step": 1395720
    },
    {
      "epoch": 2.284159122300557,
      "grad_norm": 0.29276666045188904,
      "learning_rate": 5.46747230879727e-06,
      "loss": 0.018,
      "step": 1395740
    },
    {
      "epoch": 2.2841918527392107,
      "grad_norm": 0.21686074137687683,
      "learning_rate": 5.467406416583753e-06,
      "loss": 0.0133,
      "step": 1395760
    },
    {
      "epoch": 2.284224583177864,
      "grad_norm": 0.5213473439216614,
      "learning_rate": 5.467340524370236e-06,
      "loss": 0.019,
      "step": 1395780
    },
    {
      "epoch": 2.284257313616517,
      "grad_norm": 0.9831894040107727,
      "learning_rate": 5.4672746321567185e-06,
      "loss": 0.0147,
      "step": 1395800
    },
    {
      "epoch": 2.2842900440551706,
      "grad_norm": 0.4884442389011383,
      "learning_rate": 5.467208739943202e-06,
      "loss": 0.0195,
      "step": 1395820
    },
    {
      "epoch": 2.2843227744938237,
      "grad_norm": 0.23962441086769104,
      "learning_rate": 5.467142847729684e-06,
      "loss": 0.016,
      "step": 1395840
    },
    {
      "epoch": 2.2843555049324773,
      "grad_norm": 0.36043283343315125,
      "learning_rate": 5.4670769555161676e-06,
      "loss": 0.0106,
      "step": 1395860
    },
    {
      "epoch": 2.2843882353711304,
      "grad_norm": 0.479674369096756,
      "learning_rate": 5.4670110633026495e-06,
      "loss": 0.017,
      "step": 1395880
    },
    {
      "epoch": 2.2844209658097836,
      "grad_norm": 0.4649646580219269,
      "learning_rate": 5.466945171089133e-06,
      "loss": 0.0211,
      "step": 1395900
    },
    {
      "epoch": 2.284453696248437,
      "grad_norm": 0.3956621587276459,
      "learning_rate": 5.466879278875615e-06,
      "loss": 0.0097,
      "step": 1395920
    },
    {
      "epoch": 2.2844864266870903,
      "grad_norm": 0.323581337928772,
      "learning_rate": 5.4668133866620985e-06,
      "loss": 0.0147,
      "step": 1395940
    },
    {
      "epoch": 2.284519157125744,
      "grad_norm": 0.26046377420425415,
      "learning_rate": 5.466747494448581e-06,
      "loss": 0.0166,
      "step": 1395960
    },
    {
      "epoch": 2.284551887564397,
      "grad_norm": 0.27198660373687744,
      "learning_rate": 5.466681602235064e-06,
      "loss": 0.0171,
      "step": 1395980
    },
    {
      "epoch": 2.2845846180030502,
      "grad_norm": 0.23132598400115967,
      "learning_rate": 5.466615710021547e-06,
      "loss": 0.018,
      "step": 1396000
    },
    {
      "epoch": 2.284617348441704,
      "grad_norm": 0.582062840461731,
      "learning_rate": 5.46654981780803e-06,
      "loss": 0.013,
      "step": 1396020
    },
    {
      "epoch": 2.284650078880357,
      "grad_norm": 0.7382413148880005,
      "learning_rate": 5.466483925594513e-06,
      "loss": 0.016,
      "step": 1396040
    },
    {
      "epoch": 2.2846828093190106,
      "grad_norm": 0.43660661578178406,
      "learning_rate": 5.466418033380996e-06,
      "loss": 0.0293,
      "step": 1396060
    },
    {
      "epoch": 2.2847155397576637,
      "grad_norm": 0.19608359038829803,
      "learning_rate": 5.466352141167479e-06,
      "loss": 0.0116,
      "step": 1396080
    },
    {
      "epoch": 2.2847482701963173,
      "grad_norm": 1.0053400993347168,
      "learning_rate": 5.466286248953961e-06,
      "loss": 0.0106,
      "step": 1396100
    },
    {
      "epoch": 2.2847810006349705,
      "grad_norm": 0.06596647948026657,
      "learning_rate": 5.466220356740445e-06,
      "loss": 0.0183,
      "step": 1396120
    },
    {
      "epoch": 2.2848137310736236,
      "grad_norm": 0.6166681051254272,
      "learning_rate": 5.466154464526927e-06,
      "loss": 0.0101,
      "step": 1396140
    },
    {
      "epoch": 2.284846461512277,
      "grad_norm": 0.8675774335861206,
      "learning_rate": 5.46608857231341e-06,
      "loss": 0.0203,
      "step": 1396160
    },
    {
      "epoch": 2.2848791919509304,
      "grad_norm": 0.3550488352775574,
      "learning_rate": 5.466022680099893e-06,
      "loss": 0.0165,
      "step": 1396180
    },
    {
      "epoch": 2.284911922389584,
      "grad_norm": 0.3554161489009857,
      "learning_rate": 5.465956787886376e-06,
      "loss": 0.0098,
      "step": 1396200
    },
    {
      "epoch": 2.284944652828237,
      "grad_norm": 0.9622952342033386,
      "learning_rate": 5.4658908956728586e-06,
      "loss": 0.0156,
      "step": 1396220
    },
    {
      "epoch": 2.2849773832668907,
      "grad_norm": 0.3958436846733093,
      "learning_rate": 5.465825003459342e-06,
      "loss": 0.0102,
      "step": 1396240
    },
    {
      "epoch": 2.285010113705544,
      "grad_norm": 0.11755651235580444,
      "learning_rate": 5.465759111245824e-06,
      "loss": 0.0132,
      "step": 1396260
    },
    {
      "epoch": 2.285042844144197,
      "grad_norm": 1.8335826396942139,
      "learning_rate": 5.465693219032308e-06,
      "loss": 0.0169,
      "step": 1396280
    },
    {
      "epoch": 2.2850755745828506,
      "grad_norm": 0.2726677358150482,
      "learning_rate": 5.4656273268187895e-06,
      "loss": 0.0159,
      "step": 1396300
    },
    {
      "epoch": 2.2851083050215037,
      "grad_norm": 0.08520404249429703,
      "learning_rate": 5.465561434605273e-06,
      "loss": 0.0096,
      "step": 1396320
    },
    {
      "epoch": 2.2851410354601573,
      "grad_norm": 1.4201440811157227,
      "learning_rate": 5.465495542391756e-06,
      "loss": 0.0177,
      "step": 1396340
    },
    {
      "epoch": 2.2851737658988105,
      "grad_norm": 0.21467801928520203,
      "learning_rate": 5.465429650178239e-06,
      "loss": 0.0169,
      "step": 1396360
    },
    {
      "epoch": 2.285206496337464,
      "grad_norm": 0.5956544876098633,
      "learning_rate": 5.465363757964722e-06,
      "loss": 0.0188,
      "step": 1396380
    },
    {
      "epoch": 2.2852392267761172,
      "grad_norm": 0.3215479254722595,
      "learning_rate": 5.465297865751205e-06,
      "loss": 0.0172,
      "step": 1396400
    },
    {
      "epoch": 2.2852719572147704,
      "grad_norm": 1.0359525680541992,
      "learning_rate": 5.465231973537688e-06,
      "loss": 0.0129,
      "step": 1396420
    },
    {
      "epoch": 2.285304687653424,
      "grad_norm": 0.3513389229774475,
      "learning_rate": 5.46516608132417e-06,
      "loss": 0.0216,
      "step": 1396440
    },
    {
      "epoch": 2.285337418092077,
      "grad_norm": 0.9142634272575378,
      "learning_rate": 5.465100189110654e-06,
      "loss": 0.0159,
      "step": 1396460
    },
    {
      "epoch": 2.2853701485307307,
      "grad_norm": 0.3763747811317444,
      "learning_rate": 5.465034296897136e-06,
      "loss": 0.0127,
      "step": 1396480
    },
    {
      "epoch": 2.285402878969384,
      "grad_norm": 0.46270567178726196,
      "learning_rate": 5.4649684046836195e-06,
      "loss": 0.0164,
      "step": 1396500
    },
    {
      "epoch": 2.2854356094080375,
      "grad_norm": 0.6697011590003967,
      "learning_rate": 5.464902512470101e-06,
      "loss": 0.011,
      "step": 1396520
    },
    {
      "epoch": 2.2854683398466906,
      "grad_norm": 0.9245738983154297,
      "learning_rate": 5.464836620256585e-06,
      "loss": 0.0116,
      "step": 1396540
    },
    {
      "epoch": 2.2855010702853438,
      "grad_norm": 0.5569910407066345,
      "learning_rate": 5.464770728043068e-06,
      "loss": 0.0111,
      "step": 1396560
    },
    {
      "epoch": 2.2855338007239974,
      "grad_norm": 0.1158372238278389,
      "learning_rate": 5.4647048358295504e-06,
      "loss": 0.0136,
      "step": 1396580
    },
    {
      "epoch": 2.2855665311626505,
      "grad_norm": 0.27885863184928894,
      "learning_rate": 5.464638943616033e-06,
      "loss": 0.0089,
      "step": 1396600
    },
    {
      "epoch": 2.285599261601304,
      "grad_norm": 0.25614356994628906,
      "learning_rate": 5.464573051402517e-06,
      "loss": 0.0139,
      "step": 1396620
    },
    {
      "epoch": 2.2856319920399573,
      "grad_norm": 0.23955318331718445,
      "learning_rate": 5.464507159188999e-06,
      "loss": 0.0119,
      "step": 1396640
    },
    {
      "epoch": 2.285664722478611,
      "grad_norm": 0.2629983127117157,
      "learning_rate": 5.464441266975482e-06,
      "loss": 0.018,
      "step": 1396660
    },
    {
      "epoch": 2.285697452917264,
      "grad_norm": 0.514549970626831,
      "learning_rate": 5.464375374761964e-06,
      "loss": 0.0103,
      "step": 1396680
    },
    {
      "epoch": 2.285730183355917,
      "grad_norm": 0.4484865665435791,
      "learning_rate": 5.464309482548448e-06,
      "loss": 0.0144,
      "step": 1396700
    },
    {
      "epoch": 2.2857629137945707,
      "grad_norm": 0.4674035310745239,
      "learning_rate": 5.46424359033493e-06,
      "loss": 0.0217,
      "step": 1396720
    },
    {
      "epoch": 2.285795644233224,
      "grad_norm": 0.39801421761512756,
      "learning_rate": 5.464177698121413e-06,
      "loss": 0.0149,
      "step": 1396740
    },
    {
      "epoch": 2.2858283746718775,
      "grad_norm": 0.44661638140678406,
      "learning_rate": 5.464111805907897e-06,
      "loss": 0.0216,
      "step": 1396760
    },
    {
      "epoch": 2.2858611051105306,
      "grad_norm": 0.5190849304199219,
      "learning_rate": 5.464045913694379e-06,
      "loss": 0.0172,
      "step": 1396780
    },
    {
      "epoch": 2.2858938355491842,
      "grad_norm": 1.4131413698196411,
      "learning_rate": 5.463980021480862e-06,
      "loss": 0.0169,
      "step": 1396800
    },
    {
      "epoch": 2.2859265659878374,
      "grad_norm": 0.5372152328491211,
      "learning_rate": 5.463914129267345e-06,
      "loss": 0.0094,
      "step": 1396820
    },
    {
      "epoch": 2.2859592964264905,
      "grad_norm": 0.5314452052116394,
      "learning_rate": 5.463848237053829e-06,
      "loss": 0.0103,
      "step": 1396840
    },
    {
      "epoch": 2.285992026865144,
      "grad_norm": 0.2782699763774872,
      "learning_rate": 5.4637823448403105e-06,
      "loss": 0.0169,
      "step": 1396860
    },
    {
      "epoch": 2.2860247573037973,
      "grad_norm": 0.6523861885070801,
      "learning_rate": 5.463716452626794e-06,
      "loss": 0.0136,
      "step": 1396880
    },
    {
      "epoch": 2.286057487742451,
      "grad_norm": 0.24536862969398499,
      "learning_rate": 5.463650560413276e-06,
      "loss": 0.0162,
      "step": 1396900
    },
    {
      "epoch": 2.286090218181104,
      "grad_norm": 0.23287703096866608,
      "learning_rate": 5.4635846681997595e-06,
      "loss": 0.0132,
      "step": 1396920
    },
    {
      "epoch": 2.2861229486197576,
      "grad_norm": 0.24101530015468597,
      "learning_rate": 5.4635187759862414e-06,
      "loss": 0.0177,
      "step": 1396940
    },
    {
      "epoch": 2.2861556790584108,
      "grad_norm": 0.5590254068374634,
      "learning_rate": 5.463452883772725e-06,
      "loss": 0.0142,
      "step": 1396960
    },
    {
      "epoch": 2.286188409497064,
      "grad_norm": 0.6264076828956604,
      "learning_rate": 5.463386991559208e-06,
      "loss": 0.0119,
      "step": 1396980
    },
    {
      "epoch": 2.2862211399357175,
      "grad_norm": 0.2635778784751892,
      "learning_rate": 5.4633210993456905e-06,
      "loss": 0.0123,
      "step": 1397000
    },
    {
      "epoch": 2.2862538703743707,
      "grad_norm": 0.7622078657150269,
      "learning_rate": 5.463255207132173e-06,
      "loss": 0.021,
      "step": 1397020
    },
    {
      "epoch": 2.2862866008130243,
      "grad_norm": 0.3684811294078827,
      "learning_rate": 5.463189314918657e-06,
      "loss": 0.0188,
      "step": 1397040
    },
    {
      "epoch": 2.2863193312516774,
      "grad_norm": 0.3007631301879883,
      "learning_rate": 5.463123422705139e-06,
      "loss": 0.0161,
      "step": 1397060
    },
    {
      "epoch": 2.286352061690331,
      "grad_norm": 0.2543674111366272,
      "learning_rate": 5.463057530491622e-06,
      "loss": 0.0167,
      "step": 1397080
    },
    {
      "epoch": 2.286384792128984,
      "grad_norm": 0.49480175971984863,
      "learning_rate": 5.462991638278106e-06,
      "loss": 0.013,
      "step": 1397100
    },
    {
      "epoch": 2.2864175225676373,
      "grad_norm": 0.601962149143219,
      "learning_rate": 5.462925746064588e-06,
      "loss": 0.0186,
      "step": 1397120
    },
    {
      "epoch": 2.286450253006291,
      "grad_norm": 0.44891050457954407,
      "learning_rate": 5.462859853851071e-06,
      "loss": 0.0129,
      "step": 1397140
    },
    {
      "epoch": 2.286482983444944,
      "grad_norm": 0.4615313410758972,
      "learning_rate": 5.462793961637553e-06,
      "loss": 0.018,
      "step": 1397160
    },
    {
      "epoch": 2.2865157138835976,
      "grad_norm": 0.8216366171836853,
      "learning_rate": 5.462728069424037e-06,
      "loss": 0.0172,
      "step": 1397180
    },
    {
      "epoch": 2.286548444322251,
      "grad_norm": 0.5843822956085205,
      "learning_rate": 5.46266217721052e-06,
      "loss": 0.0251,
      "step": 1397200
    },
    {
      "epoch": 2.2865811747609044,
      "grad_norm": 0.02542167343199253,
      "learning_rate": 5.462596284997002e-06,
      "loss": 0.017,
      "step": 1397220
    },
    {
      "epoch": 2.2866139051995575,
      "grad_norm": 0.12134544551372528,
      "learning_rate": 5.462530392783485e-06,
      "loss": 0.0155,
      "step": 1397240
    },
    {
      "epoch": 2.2866466356382107,
      "grad_norm": 0.9337458610534668,
      "learning_rate": 5.462464500569969e-06,
      "loss": 0.0165,
      "step": 1397260
    },
    {
      "epoch": 2.2866793660768643,
      "grad_norm": 0.2551489770412445,
      "learning_rate": 5.4623986083564506e-06,
      "loss": 0.0145,
      "step": 1397280
    },
    {
      "epoch": 2.2867120965155174,
      "grad_norm": 0.40495800971984863,
      "learning_rate": 5.462332716142934e-06,
      "loss": 0.017,
      "step": 1397300
    },
    {
      "epoch": 2.286744826954171,
      "grad_norm": 0.19512100517749786,
      "learning_rate": 5.462266823929416e-06,
      "loss": 0.0235,
      "step": 1397320
    },
    {
      "epoch": 2.286777557392824,
      "grad_norm": 0.13524092733860016,
      "learning_rate": 5.4622009317159e-06,
      "loss": 0.0107,
      "step": 1397340
    },
    {
      "epoch": 2.2868102878314773,
      "grad_norm": 0.7226032018661499,
      "learning_rate": 5.462135039502382e-06,
      "loss": 0.0172,
      "step": 1397360
    },
    {
      "epoch": 2.286843018270131,
      "grad_norm": 0.6204647421836853,
      "learning_rate": 5.462069147288865e-06,
      "loss": 0.0172,
      "step": 1397380
    },
    {
      "epoch": 2.286875748708784,
      "grad_norm": 0.26364850997924805,
      "learning_rate": 5.462003255075348e-06,
      "loss": 0.0068,
      "step": 1397400
    },
    {
      "epoch": 2.2869084791474377,
      "grad_norm": 0.2297784984111786,
      "learning_rate": 5.4619373628618314e-06,
      "loss": 0.0146,
      "step": 1397420
    },
    {
      "epoch": 2.286941209586091,
      "grad_norm": 0.27641817927360535,
      "learning_rate": 5.461871470648314e-06,
      "loss": 0.0103,
      "step": 1397440
    },
    {
      "epoch": 2.286973940024744,
      "grad_norm": 0.20239993929862976,
      "learning_rate": 5.461805578434797e-06,
      "loss": 0.0194,
      "step": 1397460
    },
    {
      "epoch": 2.2870066704633976,
      "grad_norm": 0.16199062764644623,
      "learning_rate": 5.4617396862212805e-06,
      "loss": 0.0101,
      "step": 1397480
    },
    {
      "epoch": 2.2870394009020507,
      "grad_norm": 0.8659242987632751,
      "learning_rate": 5.461673794007762e-06,
      "loss": 0.0142,
      "step": 1397500
    },
    {
      "epoch": 2.2870721313407043,
      "grad_norm": 0.14557093381881714,
      "learning_rate": 5.461607901794246e-06,
      "loss": 0.016,
      "step": 1397520
    },
    {
      "epoch": 2.2871048617793575,
      "grad_norm": 0.32299861311912537,
      "learning_rate": 5.461542009580728e-06,
      "loss": 0.0155,
      "step": 1397540
    },
    {
      "epoch": 2.287137592218011,
      "grad_norm": 0.5440261960029602,
      "learning_rate": 5.4614761173672114e-06,
      "loss": 0.0152,
      "step": 1397560
    },
    {
      "epoch": 2.287170322656664,
      "grad_norm": 0.32636451721191406,
      "learning_rate": 5.461410225153694e-06,
      "loss": 0.0121,
      "step": 1397580
    },
    {
      "epoch": 2.2872030530953174,
      "grad_norm": 0.3593425154685974,
      "learning_rate": 5.461344332940177e-06,
      "loss": 0.0134,
      "step": 1397600
    },
    {
      "epoch": 2.287235783533971,
      "grad_norm": 0.3022577464580536,
      "learning_rate": 5.46127844072666e-06,
      "loss": 0.0173,
      "step": 1397620
    },
    {
      "epoch": 2.287268513972624,
      "grad_norm": 0.1424582153558731,
      "learning_rate": 5.461212548513143e-06,
      "loss": 0.0221,
      "step": 1397640
    },
    {
      "epoch": 2.2873012444112777,
      "grad_norm": 0.5241455435752869,
      "learning_rate": 5.461146656299625e-06,
      "loss": 0.0134,
      "step": 1397660
    },
    {
      "epoch": 2.287333974849931,
      "grad_norm": 1.725890040397644,
      "learning_rate": 5.461080764086109e-06,
      "loss": 0.0171,
      "step": 1397680
    },
    {
      "epoch": 2.2873667052885844,
      "grad_norm": 0.8925902843475342,
      "learning_rate": 5.461014871872591e-06,
      "loss": 0.0122,
      "step": 1397700
    },
    {
      "epoch": 2.2873994357272376,
      "grad_norm": 0.0782843753695488,
      "learning_rate": 5.460948979659074e-06,
      "loss": 0.0221,
      "step": 1397720
    },
    {
      "epoch": 2.2874321661658907,
      "grad_norm": 0.3030893802642822,
      "learning_rate": 5.460883087445556e-06,
      "loss": 0.0191,
      "step": 1397740
    },
    {
      "epoch": 2.2874648966045443,
      "grad_norm": 0.633824348449707,
      "learning_rate": 5.46081719523204e-06,
      "loss": 0.0237,
      "step": 1397760
    },
    {
      "epoch": 2.2874976270431975,
      "grad_norm": 0.45012718439102173,
      "learning_rate": 5.460751303018523e-06,
      "loss": 0.0148,
      "step": 1397780
    },
    {
      "epoch": 2.287530357481851,
      "grad_norm": 0.3492656946182251,
      "learning_rate": 5.460685410805005e-06,
      "loss": 0.0141,
      "step": 1397800
    },
    {
      "epoch": 2.2875630879205042,
      "grad_norm": 0.39792218804359436,
      "learning_rate": 5.460619518591489e-06,
      "loss": 0.0138,
      "step": 1397820
    },
    {
      "epoch": 2.287595818359158,
      "grad_norm": 0.08238208293914795,
      "learning_rate": 5.4605536263779715e-06,
      "loss": 0.0133,
      "step": 1397840
    },
    {
      "epoch": 2.287628548797811,
      "grad_norm": 0.5837550759315491,
      "learning_rate": 5.460487734164454e-06,
      "loss": 0.0163,
      "step": 1397860
    },
    {
      "epoch": 2.287661279236464,
      "grad_norm": 0.45298540592193604,
      "learning_rate": 5.460421841950937e-06,
      "loss": 0.0205,
      "step": 1397880
    },
    {
      "epoch": 2.2876940096751177,
      "grad_norm": 0.2506538927555084,
      "learning_rate": 5.4603559497374206e-06,
      "loss": 0.0146,
      "step": 1397900
    },
    {
      "epoch": 2.287726740113771,
      "grad_norm": 0.2421659529209137,
      "learning_rate": 5.4602900575239025e-06,
      "loss": 0.0138,
      "step": 1397920
    },
    {
      "epoch": 2.2877594705524245,
      "grad_norm": 0.17059652507305145,
      "learning_rate": 5.460224165310386e-06,
      "loss": 0.0131,
      "step": 1397940
    },
    {
      "epoch": 2.2877922009910776,
      "grad_norm": 0.4093254506587982,
      "learning_rate": 5.460158273096868e-06,
      "loss": 0.0133,
      "step": 1397960
    },
    {
      "epoch": 2.287824931429731,
      "grad_norm": 0.39901843667030334,
      "learning_rate": 5.4600923808833515e-06,
      "loss": 0.0159,
      "step": 1397980
    },
    {
      "epoch": 2.2878576618683844,
      "grad_norm": 0.3659979999065399,
      "learning_rate": 5.460026488669834e-06,
      "loss": 0.0154,
      "step": 1398000
    },
    {
      "epoch": 2.2878903923070375,
      "grad_norm": 1.6474238634109497,
      "learning_rate": 5.459960596456317e-06,
      "loss": 0.0182,
      "step": 1398020
    },
    {
      "epoch": 2.287923122745691,
      "grad_norm": 0.175765261054039,
      "learning_rate": 5.4598947042428e-06,
      "loss": 0.0232,
      "step": 1398040
    },
    {
      "epoch": 2.2879558531843442,
      "grad_norm": 0.6323341131210327,
      "learning_rate": 5.459828812029283e-06,
      "loss": 0.0089,
      "step": 1398060
    },
    {
      "epoch": 2.287988583622998,
      "grad_norm": 0.3629990518093109,
      "learning_rate": 5.459762919815765e-06,
      "loss": 0.015,
      "step": 1398080
    },
    {
      "epoch": 2.288021314061651,
      "grad_norm": 0.6329343318939209,
      "learning_rate": 5.459697027602249e-06,
      "loss": 0.0197,
      "step": 1398100
    },
    {
      "epoch": 2.2880540445003046,
      "grad_norm": 0.8163760304450989,
      "learning_rate": 5.459631135388731e-06,
      "loss": 0.0154,
      "step": 1398120
    },
    {
      "epoch": 2.2880867749389577,
      "grad_norm": 0.9394477605819702,
      "learning_rate": 5.459565243175214e-06,
      "loss": 0.0188,
      "step": 1398140
    },
    {
      "epoch": 2.288119505377611,
      "grad_norm": 0.5984783172607422,
      "learning_rate": 5.459499350961698e-06,
      "loss": 0.0158,
      "step": 1398160
    },
    {
      "epoch": 2.2881522358162645,
      "grad_norm": 0.2724514901638031,
      "learning_rate": 5.45943345874818e-06,
      "loss": 0.0104,
      "step": 1398180
    },
    {
      "epoch": 2.2881849662549176,
      "grad_norm": 1.4661588668823242,
      "learning_rate": 5.459367566534663e-06,
      "loss": 0.0162,
      "step": 1398200
    },
    {
      "epoch": 2.2882176966935712,
      "grad_norm": 1.0693057775497437,
      "learning_rate": 5.459301674321146e-06,
      "loss": 0.0131,
      "step": 1398220
    },
    {
      "epoch": 2.2882504271322244,
      "grad_norm": 0.1999853551387787,
      "learning_rate": 5.459235782107629e-06,
      "loss": 0.0128,
      "step": 1398240
    },
    {
      "epoch": 2.288283157570878,
      "grad_norm": 0.18216954171657562,
      "learning_rate": 5.4591698898941116e-06,
      "loss": 0.0183,
      "step": 1398260
    },
    {
      "epoch": 2.288315888009531,
      "grad_norm": 0.3230378031730652,
      "learning_rate": 5.459103997680595e-06,
      "loss": 0.0175,
      "step": 1398280
    },
    {
      "epoch": 2.2883486184481843,
      "grad_norm": 0.42971521615982056,
      "learning_rate": 5.459038105467077e-06,
      "loss": 0.0206,
      "step": 1398300
    },
    {
      "epoch": 2.288381348886838,
      "grad_norm": 0.1750868558883667,
      "learning_rate": 5.458972213253561e-06,
      "loss": 0.0107,
      "step": 1398320
    },
    {
      "epoch": 2.288414079325491,
      "grad_norm": 0.08960577100515366,
      "learning_rate": 5.4589063210400425e-06,
      "loss": 0.0175,
      "step": 1398340
    },
    {
      "epoch": 2.2884468097641446,
      "grad_norm": 0.20754466950893402,
      "learning_rate": 5.458840428826526e-06,
      "loss": 0.0103,
      "step": 1398360
    },
    {
      "epoch": 2.2884795402027978,
      "grad_norm": 0.8394297957420349,
      "learning_rate": 5.458774536613009e-06,
      "loss": 0.0132,
      "step": 1398380
    },
    {
      "epoch": 2.2885122706414514,
      "grad_norm": 0.4452877938747406,
      "learning_rate": 5.458708644399492e-06,
      "loss": 0.0122,
      "step": 1398400
    },
    {
      "epoch": 2.2885450010801045,
      "grad_norm": 0.20067518949508667,
      "learning_rate": 5.458642752185974e-06,
      "loss": 0.0111,
      "step": 1398420
    },
    {
      "epoch": 2.2885777315187577,
      "grad_norm": 0.24115939438343048,
      "learning_rate": 5.458576859972458e-06,
      "loss": 0.0134,
      "step": 1398440
    },
    {
      "epoch": 2.2886104619574112,
      "grad_norm": 0.4353944659233093,
      "learning_rate": 5.45851096775894e-06,
      "loss": 0.0139,
      "step": 1398460
    },
    {
      "epoch": 2.2886431923960644,
      "grad_norm": 0.3500951826572418,
      "learning_rate": 5.458445075545423e-06,
      "loss": 0.0188,
      "step": 1398480
    },
    {
      "epoch": 2.288675922834718,
      "grad_norm": 0.9870738387107849,
      "learning_rate": 5.458379183331907e-06,
      "loss": 0.0215,
      "step": 1398500
    },
    {
      "epoch": 2.288708653273371,
      "grad_norm": 0.19817283749580383,
      "learning_rate": 5.458313291118389e-06,
      "loss": 0.0118,
      "step": 1398520
    },
    {
      "epoch": 2.2887413837120247,
      "grad_norm": 0.3831120729446411,
      "learning_rate": 5.4582473989048725e-06,
      "loss": 0.0138,
      "step": 1398540
    },
    {
      "epoch": 2.288774114150678,
      "grad_norm": 0.5707195401191711,
      "learning_rate": 5.458181506691354e-06,
      "loss": 0.0124,
      "step": 1398560
    },
    {
      "epoch": 2.288806844589331,
      "grad_norm": 0.429701030254364,
      "learning_rate": 5.458115614477838e-06,
      "loss": 0.014,
      "step": 1398580
    },
    {
      "epoch": 2.2888395750279846,
      "grad_norm": 0.6055193543434143,
      "learning_rate": 5.458049722264321e-06,
      "loss": 0.0163,
      "step": 1398600
    },
    {
      "epoch": 2.288872305466638,
      "grad_norm": 0.6295201182365417,
      "learning_rate": 5.4579838300508034e-06,
      "loss": 0.0189,
      "step": 1398620
    },
    {
      "epoch": 2.2889050359052914,
      "grad_norm": 0.4084666073322296,
      "learning_rate": 5.457917937837286e-06,
      "loss": 0.0195,
      "step": 1398640
    },
    {
      "epoch": 2.2889377663439445,
      "grad_norm": 0.6229256391525269,
      "learning_rate": 5.45785204562377e-06,
      "loss": 0.0119,
      "step": 1398660
    },
    {
      "epoch": 2.288970496782598,
      "grad_norm": 0.7502666115760803,
      "learning_rate": 5.457786153410252e-06,
      "loss": 0.0106,
      "step": 1398680
    },
    {
      "epoch": 2.2890032272212513,
      "grad_norm": 0.43922561407089233,
      "learning_rate": 5.457720261196735e-06,
      "loss": 0.0116,
      "step": 1398700
    },
    {
      "epoch": 2.2890359576599044,
      "grad_norm": 0.03662967309355736,
      "learning_rate": 5.457654368983217e-06,
      "loss": 0.023,
      "step": 1398720
    },
    {
      "epoch": 2.289068688098558,
      "grad_norm": 0.39383018016815186,
      "learning_rate": 5.457588476769701e-06,
      "loss": 0.0154,
      "step": 1398740
    },
    {
      "epoch": 2.289101418537211,
      "grad_norm": 0.42617911100387573,
      "learning_rate": 5.457522584556183e-06,
      "loss": 0.019,
      "step": 1398760
    },
    {
      "epoch": 2.2891341489758648,
      "grad_norm": 0.63028883934021,
      "learning_rate": 5.457456692342666e-06,
      "loss": 0.012,
      "step": 1398780
    },
    {
      "epoch": 2.289166879414518,
      "grad_norm": 0.8396354913711548,
      "learning_rate": 5.457390800129149e-06,
      "loss": 0.0214,
      "step": 1398800
    },
    {
      "epoch": 2.289199609853171,
      "grad_norm": 0.7774063348770142,
      "learning_rate": 5.457324907915632e-06,
      "loss": 0.0186,
      "step": 1398820
    },
    {
      "epoch": 2.2892323402918247,
      "grad_norm": 0.36079999804496765,
      "learning_rate": 5.457259015702115e-06,
      "loss": 0.0123,
      "step": 1398840
    },
    {
      "epoch": 2.289265070730478,
      "grad_norm": 1.1833856105804443,
      "learning_rate": 5.457193123488598e-06,
      "loss": 0.0157,
      "step": 1398860
    },
    {
      "epoch": 2.2892978011691314,
      "grad_norm": 0.17907997965812683,
      "learning_rate": 5.457127231275081e-06,
      "loss": 0.011,
      "step": 1398880
    },
    {
      "epoch": 2.2893305316077845,
      "grad_norm": 0.12948156893253326,
      "learning_rate": 5.4570613390615635e-06,
      "loss": 0.0099,
      "step": 1398900
    },
    {
      "epoch": 2.2893632620464377,
      "grad_norm": 0.6659688949584961,
      "learning_rate": 5.456995446848047e-06,
      "loss": 0.0173,
      "step": 1398920
    },
    {
      "epoch": 2.2893959924850913,
      "grad_norm": 0.2548151910305023,
      "learning_rate": 5.456929554634529e-06,
      "loss": 0.0139,
      "step": 1398940
    },
    {
      "epoch": 2.2894287229237444,
      "grad_norm": 0.49096885323524475,
      "learning_rate": 5.4568636624210125e-06,
      "loss": 0.0138,
      "step": 1398960
    },
    {
      "epoch": 2.289461453362398,
      "grad_norm": 0.3148093521595001,
      "learning_rate": 5.4567977702074944e-06,
      "loss": 0.0142,
      "step": 1398980
    },
    {
      "epoch": 2.289494183801051,
      "grad_norm": 0.3359793722629547,
      "learning_rate": 5.456731877993978e-06,
      "loss": 0.0172,
      "step": 1399000
    },
    {
      "epoch": 2.289526914239705,
      "grad_norm": 0.18752682209014893,
      "learning_rate": 5.456665985780461e-06,
      "loss": 0.0202,
      "step": 1399020
    },
    {
      "epoch": 2.289559644678358,
      "grad_norm": 0.1263548582792282,
      "learning_rate": 5.4566000935669435e-06,
      "loss": 0.0146,
      "step": 1399040
    },
    {
      "epoch": 2.289592375117011,
      "grad_norm": 0.7175992131233215,
      "learning_rate": 5.456534201353426e-06,
      "loss": 0.0201,
      "step": 1399060
    },
    {
      "epoch": 2.2896251055556647,
      "grad_norm": 0.35246941447257996,
      "learning_rate": 5.45646830913991e-06,
      "loss": 0.0097,
      "step": 1399080
    },
    {
      "epoch": 2.289657835994318,
      "grad_norm": 0.24471957981586456,
      "learning_rate": 5.456402416926392e-06,
      "loss": 0.0127,
      "step": 1399100
    },
    {
      "epoch": 2.2896905664329714,
      "grad_norm": 0.617382824420929,
      "learning_rate": 5.456336524712875e-06,
      "loss": 0.0105,
      "step": 1399120
    },
    {
      "epoch": 2.2897232968716246,
      "grad_norm": 0.2970442473888397,
      "learning_rate": 5.456270632499357e-06,
      "loss": 0.015,
      "step": 1399140
    },
    {
      "epoch": 2.289756027310278,
      "grad_norm": 0.21160149574279785,
      "learning_rate": 5.456204740285841e-06,
      "loss": 0.0197,
      "step": 1399160
    },
    {
      "epoch": 2.2897887577489313,
      "grad_norm": 0.20097261667251587,
      "learning_rate": 5.4561388480723235e-06,
      "loss": 0.0095,
      "step": 1399180
    },
    {
      "epoch": 2.2898214881875845,
      "grad_norm": 0.3420860171318054,
      "learning_rate": 5.456072955858806e-06,
      "loss": 0.0209,
      "step": 1399200
    },
    {
      "epoch": 2.289854218626238,
      "grad_norm": 0.3403197228908539,
      "learning_rate": 5.45600706364529e-06,
      "loss": 0.0182,
      "step": 1399220
    },
    {
      "epoch": 2.289886949064891,
      "grad_norm": 0.7337851524353027,
      "learning_rate": 5.455941171431773e-06,
      "loss": 0.0183,
      "step": 1399240
    },
    {
      "epoch": 2.289919679503545,
      "grad_norm": 0.2955356538295746,
      "learning_rate": 5.455875279218255e-06,
      "loss": 0.0179,
      "step": 1399260
    },
    {
      "epoch": 2.289952409942198,
      "grad_norm": 0.32020923495292664,
      "learning_rate": 5.455809387004738e-06,
      "loss": 0.0165,
      "step": 1399280
    },
    {
      "epoch": 2.2899851403808515,
      "grad_norm": 3.7301278114318848,
      "learning_rate": 5.455743494791222e-06,
      "loss": 0.0095,
      "step": 1399300
    },
    {
      "epoch": 2.2900178708195047,
      "grad_norm": 0.124583899974823,
      "learning_rate": 5.4556776025777036e-06,
      "loss": 0.0142,
      "step": 1399320
    },
    {
      "epoch": 2.290050601258158,
      "grad_norm": 0.20595936477184296,
      "learning_rate": 5.455611710364187e-06,
      "loss": 0.0228,
      "step": 1399340
    },
    {
      "epoch": 2.2900833316968114,
      "grad_norm": 0.22078277170658112,
      "learning_rate": 5.455545818150669e-06,
      "loss": 0.0104,
      "step": 1399360
    },
    {
      "epoch": 2.2901160621354646,
      "grad_norm": 0.5460044741630554,
      "learning_rate": 5.455479925937153e-06,
      "loss": 0.0167,
      "step": 1399380
    },
    {
      "epoch": 2.290148792574118,
      "grad_norm": 0.2367202341556549,
      "learning_rate": 5.455414033723635e-06,
      "loss": 0.0164,
      "step": 1399400
    },
    {
      "epoch": 2.2901815230127713,
      "grad_norm": 0.5270683169364929,
      "learning_rate": 5.455348141510118e-06,
      "loss": 0.0124,
      "step": 1399420
    },
    {
      "epoch": 2.290214253451425,
      "grad_norm": 0.6427074670791626,
      "learning_rate": 5.455282249296601e-06,
      "loss": 0.0188,
      "step": 1399440
    },
    {
      "epoch": 2.290246983890078,
      "grad_norm": 0.46445873379707336,
      "learning_rate": 5.4552163570830844e-06,
      "loss": 0.0126,
      "step": 1399460
    },
    {
      "epoch": 2.2902797143287312,
      "grad_norm": 1.3829762935638428,
      "learning_rate": 5.455150464869566e-06,
      "loss": 0.0109,
      "step": 1399480
    },
    {
      "epoch": 2.290312444767385,
      "grad_norm": 0.1555911749601364,
      "learning_rate": 5.45508457265605e-06,
      "loss": 0.0115,
      "step": 1399500
    },
    {
      "epoch": 2.290345175206038,
      "grad_norm": 0.18203556537628174,
      "learning_rate": 5.455018680442532e-06,
      "loss": 0.0114,
      "step": 1399520
    },
    {
      "epoch": 2.2903779056446916,
      "grad_norm": 0.33272305130958557,
      "learning_rate": 5.454952788229015e-06,
      "loss": 0.0148,
      "step": 1399540
    },
    {
      "epoch": 2.2904106360833447,
      "grad_norm": 0.4330645203590393,
      "learning_rate": 5.454886896015499e-06,
      "loss": 0.0164,
      "step": 1399560
    },
    {
      "epoch": 2.2904433665219983,
      "grad_norm": 0.11912018805742264,
      "learning_rate": 5.454821003801981e-06,
      "loss": 0.0123,
      "step": 1399580
    },
    {
      "epoch": 2.2904760969606515,
      "grad_norm": 0.31484922766685486,
      "learning_rate": 5.4547551115884645e-06,
      "loss": 0.0171,
      "step": 1399600
    },
    {
      "epoch": 2.2905088273993046,
      "grad_norm": 0.8145158290863037,
      "learning_rate": 5.454689219374947e-06,
      "loss": 0.0163,
      "step": 1399620
    },
    {
      "epoch": 2.290541557837958,
      "grad_norm": 0.21833185851573944,
      "learning_rate": 5.45462332716143e-06,
      "loss": 0.015,
      "step": 1399640
    },
    {
      "epoch": 2.2905742882766114,
      "grad_norm": 0.7543092966079712,
      "learning_rate": 5.454557434947913e-06,
      "loss": 0.0148,
      "step": 1399660
    },
    {
      "epoch": 2.290607018715265,
      "grad_norm": 0.6144413352012634,
      "learning_rate": 5.454491542734396e-06,
      "loss": 0.0265,
      "step": 1399680
    },
    {
      "epoch": 2.290639749153918,
      "grad_norm": 0.5595672130584717,
      "learning_rate": 5.454425650520878e-06,
      "loss": 0.0116,
      "step": 1399700
    },
    {
      "epoch": 2.2906724795925717,
      "grad_norm": 0.26885464787483215,
      "learning_rate": 5.454359758307362e-06,
      "loss": 0.0148,
      "step": 1399720
    },
    {
      "epoch": 2.290705210031225,
      "grad_norm": 0.77255779504776,
      "learning_rate": 5.454293866093844e-06,
      "loss": 0.0101,
      "step": 1399740
    },
    {
      "epoch": 2.290737940469878,
      "grad_norm": 0.5419287085533142,
      "learning_rate": 5.454227973880327e-06,
      "loss": 0.0198,
      "step": 1399760
    },
    {
      "epoch": 2.2907706709085316,
      "grad_norm": 0.5490398406982422,
      "learning_rate": 5.454162081666809e-06,
      "loss": 0.0178,
      "step": 1399780
    },
    {
      "epoch": 2.2908034013471847,
      "grad_norm": 1.5124250650405884,
      "learning_rate": 5.454096189453293e-06,
      "loss": 0.0184,
      "step": 1399800
    },
    {
      "epoch": 2.2908361317858383,
      "grad_norm": 0.8969676494598389,
      "learning_rate": 5.4540302972397754e-06,
      "loss": 0.0177,
      "step": 1399820
    },
    {
      "epoch": 2.2908688622244915,
      "grad_norm": 0.19887763261795044,
      "learning_rate": 5.453964405026258e-06,
      "loss": 0.0163,
      "step": 1399840
    },
    {
      "epoch": 2.290901592663145,
      "grad_norm": 1.1632181406021118,
      "learning_rate": 5.453898512812741e-06,
      "loss": 0.0177,
      "step": 1399860
    },
    {
      "epoch": 2.2909343231017982,
      "grad_norm": 0.1202482059597969,
      "learning_rate": 5.4538326205992245e-06,
      "loss": 0.0201,
      "step": 1399880
    },
    {
      "epoch": 2.2909670535404514,
      "grad_norm": 0.38456809520721436,
      "learning_rate": 5.453766728385707e-06,
      "loss": 0.012,
      "step": 1399900
    },
    {
      "epoch": 2.290999783979105,
      "grad_norm": 0.2612740695476532,
      "learning_rate": 5.45370083617219e-06,
      "loss": 0.0214,
      "step": 1399920
    },
    {
      "epoch": 2.291032514417758,
      "grad_norm": 0.20757043361663818,
      "learning_rate": 5.4536349439586736e-06,
      "loss": 0.0136,
      "step": 1399940
    },
    {
      "epoch": 2.2910652448564117,
      "grad_norm": 0.8774628043174744,
      "learning_rate": 5.4535690517451555e-06,
      "loss": 0.02,
      "step": 1399960
    },
    {
      "epoch": 2.291097975295065,
      "grad_norm": 0.5917761325836182,
      "learning_rate": 5.453503159531639e-06,
      "loss": 0.0136,
      "step": 1399980
    },
    {
      "epoch": 2.2911307057337185,
      "grad_norm": 0.30503183603286743,
      "learning_rate": 5.453437267318121e-06,
      "loss": 0.0179,
      "step": 1400000
    },
    {
      "epoch": 2.2911307057337185,
      "eval_loss": 0.00858598854392767,
      "eval_runtime": 6524.7591,
      "eval_samples_per_second": 157.532,
      "eval_steps_per_second": 15.753,
      "eval_sts-dev_pearson_cosine": 0.9803614023437285,
      "eval_sts-dev_spearman_cosine": 0.8926938224007518,
      "step": 1400000
    },
    {
      "epoch": 2.2911634361723716,
      "grad_norm": 0.12577559053897858,
      "learning_rate": 5.4533713751046045e-06,
      "loss": 0.0185,
      "step": 1400020
    },
    {
      "epoch": 2.2911961666110248,
      "grad_norm": 0.3478650450706482,
      "learning_rate": 5.453305482891087e-06,
      "loss": 0.0109,
      "step": 1400040
    },
    {
      "epoch": 2.2912288970496784,
      "grad_norm": 0.4265044033527374,
      "learning_rate": 5.45323959067757e-06,
      "loss": 0.0118,
      "step": 1400060
    },
    {
      "epoch": 2.2912616274883315,
      "grad_norm": 1.0965981483459473,
      "learning_rate": 5.453173698464053e-06,
      "loss": 0.0116,
      "step": 1400080
    },
    {
      "epoch": 2.291294357926985,
      "grad_norm": 0.4727094769477844,
      "learning_rate": 5.453107806250536e-06,
      "loss": 0.0192,
      "step": 1400100
    },
    {
      "epoch": 2.2913270883656383,
      "grad_norm": 0.8332038521766663,
      "learning_rate": 5.453041914037018e-06,
      "loss": 0.027,
      "step": 1400120
    },
    {
      "epoch": 2.291359818804292,
      "grad_norm": 0.4104555547237396,
      "learning_rate": 5.452976021823502e-06,
      "loss": 0.0133,
      "step": 1400140
    },
    {
      "epoch": 2.291392549242945,
      "grad_norm": 0.08831251412630081,
      "learning_rate": 5.452910129609984e-06,
      "loss": 0.0181,
      "step": 1400160
    },
    {
      "epoch": 2.291425279681598,
      "grad_norm": 0.5283886790275574,
      "learning_rate": 5.452844237396467e-06,
      "loss": 0.0123,
      "step": 1400180
    },
    {
      "epoch": 2.2914580101202517,
      "grad_norm": 0.3066016733646393,
      "learning_rate": 5.45277834518295e-06,
      "loss": 0.0225,
      "step": 1400200
    },
    {
      "epoch": 2.291490740558905,
      "grad_norm": 0.5699553489685059,
      "learning_rate": 5.452712452969433e-06,
      "loss": 0.0143,
      "step": 1400220
    },
    {
      "epoch": 2.2915234709975585,
      "grad_norm": 0.22046814858913422,
      "learning_rate": 5.452646560755916e-06,
      "loss": 0.0135,
      "step": 1400240
    },
    {
      "epoch": 2.2915562014362116,
      "grad_norm": 0.8866856694221497,
      "learning_rate": 5.452580668542399e-06,
      "loss": 0.0183,
      "step": 1400260
    },
    {
      "epoch": 2.2915889318748652,
      "grad_norm": 0.9606071710586548,
      "learning_rate": 5.452514776328882e-06,
      "loss": 0.0201,
      "step": 1400280
    },
    {
      "epoch": 2.2916216623135184,
      "grad_norm": 0.15512560307979584,
      "learning_rate": 5.452448884115365e-06,
      "loss": 0.0132,
      "step": 1400300
    },
    {
      "epoch": 2.2916543927521715,
      "grad_norm": 0.1955789029598236,
      "learning_rate": 5.452382991901848e-06,
      "loss": 0.0158,
      "step": 1400320
    },
    {
      "epoch": 2.291687123190825,
      "grad_norm": 0.39476728439331055,
      "learning_rate": 5.45231709968833e-06,
      "loss": 0.0189,
      "step": 1400340
    },
    {
      "epoch": 2.2917198536294783,
      "grad_norm": 0.232213094830513,
      "learning_rate": 5.452251207474814e-06,
      "loss": 0.0182,
      "step": 1400360
    },
    {
      "epoch": 2.291752584068132,
      "grad_norm": 0.04225244000554085,
      "learning_rate": 5.4521853152612955e-06,
      "loss": 0.0134,
      "step": 1400380
    },
    {
      "epoch": 2.291785314506785,
      "grad_norm": 0.5683067440986633,
      "learning_rate": 5.452119423047779e-06,
      "loss": 0.0171,
      "step": 1400400
    },
    {
      "epoch": 2.291818044945438,
      "grad_norm": 0.15660735964775085,
      "learning_rate": 5.452053530834262e-06,
      "loss": 0.0179,
      "step": 1400420
    },
    {
      "epoch": 2.2918507753840918,
      "grad_norm": 0.31356629729270935,
      "learning_rate": 5.451987638620745e-06,
      "loss": 0.0149,
      "step": 1400440
    },
    {
      "epoch": 2.291883505822745,
      "grad_norm": 0.4268282949924469,
      "learning_rate": 5.451921746407227e-06,
      "loss": 0.0135,
      "step": 1400460
    },
    {
      "epoch": 2.2919162362613985,
      "grad_norm": 1.6890813112258911,
      "learning_rate": 5.451855854193711e-06,
      "loss": 0.0258,
      "step": 1400480
    },
    {
      "epoch": 2.2919489667000517,
      "grad_norm": 0.8652122616767883,
      "learning_rate": 5.451789961980193e-06,
      "loss": 0.0112,
      "step": 1400500
    },
    {
      "epoch": 2.291981697138705,
      "grad_norm": 0.6709613800048828,
      "learning_rate": 5.451724069766676e-06,
      "loss": 0.0124,
      "step": 1400520
    },
    {
      "epoch": 2.2920144275773584,
      "grad_norm": 0.39162763953208923,
      "learning_rate": 5.451658177553158e-06,
      "loss": 0.0167,
      "step": 1400540
    },
    {
      "epoch": 2.2920471580160116,
      "grad_norm": 0.13630034029483795,
      "learning_rate": 5.451592285339642e-06,
      "loss": 0.0118,
      "step": 1400560
    },
    {
      "epoch": 2.292079888454665,
      "grad_norm": 0.33056145906448364,
      "learning_rate": 5.451526393126124e-06,
      "loss": 0.01,
      "step": 1400580
    },
    {
      "epoch": 2.2921126188933183,
      "grad_norm": 0.35946324467658997,
      "learning_rate": 5.451460500912607e-06,
      "loss": 0.0176,
      "step": 1400600
    },
    {
      "epoch": 2.292145349331972,
      "grad_norm": 0.07460249215364456,
      "learning_rate": 5.451394608699091e-06,
      "loss": 0.0235,
      "step": 1400620
    },
    {
      "epoch": 2.292178079770625,
      "grad_norm": 0.33841416239738464,
      "learning_rate": 5.451328716485573e-06,
      "loss": 0.0184,
      "step": 1400640
    },
    {
      "epoch": 2.292210810209278,
      "grad_norm": 0.6194491386413574,
      "learning_rate": 5.4512628242720564e-06,
      "loss": 0.0127,
      "step": 1400660
    },
    {
      "epoch": 2.292243540647932,
      "grad_norm": 0.2184431403875351,
      "learning_rate": 5.451196932058539e-06,
      "loss": 0.0159,
      "step": 1400680
    },
    {
      "epoch": 2.292276271086585,
      "grad_norm": 0.24686388671398163,
      "learning_rate": 5.451131039845023e-06,
      "loss": 0.016,
      "step": 1400700
    },
    {
      "epoch": 2.2923090015252385,
      "grad_norm": 0.41796591877937317,
      "learning_rate": 5.451065147631505e-06,
      "loss": 0.0183,
      "step": 1400720
    },
    {
      "epoch": 2.2923417319638917,
      "grad_norm": 2.3859763145446777,
      "learning_rate": 5.450999255417988e-06,
      "loss": 0.0178,
      "step": 1400740
    },
    {
      "epoch": 2.2923744624025453,
      "grad_norm": 0.4324803948402405,
      "learning_rate": 5.45093336320447e-06,
      "loss": 0.0134,
      "step": 1400760
    },
    {
      "epoch": 2.2924071928411984,
      "grad_norm": 0.2917550206184387,
      "learning_rate": 5.450867470990954e-06,
      "loss": 0.0158,
      "step": 1400780
    },
    {
      "epoch": 2.2924399232798516,
      "grad_norm": 0.5862569808959961,
      "learning_rate": 5.450801578777436e-06,
      "loss": 0.0158,
      "step": 1400800
    },
    {
      "epoch": 2.292472653718505,
      "grad_norm": 0.7758212685585022,
      "learning_rate": 5.450735686563919e-06,
      "loss": 0.0141,
      "step": 1400820
    },
    {
      "epoch": 2.2925053841571583,
      "grad_norm": 0.33424121141433716,
      "learning_rate": 5.450669794350402e-06,
      "loss": 0.0149,
      "step": 1400840
    },
    {
      "epoch": 2.292538114595812,
      "grad_norm": 0.06114805489778519,
      "learning_rate": 5.450603902136885e-06,
      "loss": 0.0127,
      "step": 1400860
    },
    {
      "epoch": 2.292570845034465,
      "grad_norm": 0.34958308935165405,
      "learning_rate": 5.450538009923367e-06,
      "loss": 0.0203,
      "step": 1400880
    },
    {
      "epoch": 2.2926035754731187,
      "grad_norm": 0.4765504002571106,
      "learning_rate": 5.450472117709851e-06,
      "loss": 0.0146,
      "step": 1400900
    },
    {
      "epoch": 2.292636305911772,
      "grad_norm": 0.17852070927619934,
      "learning_rate": 5.450406225496333e-06,
      "loss": 0.0162,
      "step": 1400920
    },
    {
      "epoch": 2.292669036350425,
      "grad_norm": 0.23584771156311035,
      "learning_rate": 5.4503403332828165e-06,
      "loss": 0.0108,
      "step": 1400940
    },
    {
      "epoch": 2.2927017667890786,
      "grad_norm": 0.0923798531293869,
      "learning_rate": 5.4502744410693e-06,
      "loss": 0.0094,
      "step": 1400960
    },
    {
      "epoch": 2.2927344972277317,
      "grad_norm": 0.8993015885353088,
      "learning_rate": 5.450208548855782e-06,
      "loss": 0.0153,
      "step": 1400980
    },
    {
      "epoch": 2.2927672276663853,
      "grad_norm": 0.1418474167585373,
      "learning_rate": 5.4501426566422656e-06,
      "loss": 0.0144,
      "step": 1401000
    },
    {
      "epoch": 2.2927999581050384,
      "grad_norm": 0.22515679895877838,
      "learning_rate": 5.4500767644287474e-06,
      "loss": 0.0102,
      "step": 1401020
    },
    {
      "epoch": 2.292832688543692,
      "grad_norm": 0.2424040138721466,
      "learning_rate": 5.450010872215231e-06,
      "loss": 0.0107,
      "step": 1401040
    },
    {
      "epoch": 2.292865418982345,
      "grad_norm": 0.18838605284690857,
      "learning_rate": 5.449944980001714e-06,
      "loss": 0.0138,
      "step": 1401060
    },
    {
      "epoch": 2.2928981494209983,
      "grad_norm": 0.30070939660072327,
      "learning_rate": 5.4498790877881965e-06,
      "loss": 0.0173,
      "step": 1401080
    },
    {
      "epoch": 2.292930879859652,
      "grad_norm": 0.3512793779373169,
      "learning_rate": 5.449813195574679e-06,
      "loss": 0.0154,
      "step": 1401100
    },
    {
      "epoch": 2.292963610298305,
      "grad_norm": 0.3766423165798187,
      "learning_rate": 5.449747303361163e-06,
      "loss": 0.0174,
      "step": 1401120
    },
    {
      "epoch": 2.2929963407369587,
      "grad_norm": 0.479097455739975,
      "learning_rate": 5.449681411147645e-06,
      "loss": 0.0193,
      "step": 1401140
    },
    {
      "epoch": 2.293029071175612,
      "grad_norm": 0.24302799999713898,
      "learning_rate": 5.449615518934128e-06,
      "loss": 0.0118,
      "step": 1401160
    },
    {
      "epoch": 2.2930618016142654,
      "grad_norm": 0.4917263090610504,
      "learning_rate": 5.44954962672061e-06,
      "loss": 0.0158,
      "step": 1401180
    },
    {
      "epoch": 2.2930945320529186,
      "grad_norm": 0.3760051429271698,
      "learning_rate": 5.449483734507094e-06,
      "loss": 0.0149,
      "step": 1401200
    },
    {
      "epoch": 2.2931272624915717,
      "grad_norm": 0.3267291188240051,
      "learning_rate": 5.4494178422935765e-06,
      "loss": 0.017,
      "step": 1401220
    },
    {
      "epoch": 2.2931599929302253,
      "grad_norm": 0.6399165391921997,
      "learning_rate": 5.449351950080059e-06,
      "loss": 0.0163,
      "step": 1401240
    },
    {
      "epoch": 2.2931927233688785,
      "grad_norm": 0.43849173188209534,
      "learning_rate": 5.449286057866542e-06,
      "loss": 0.0119,
      "step": 1401260
    },
    {
      "epoch": 2.293225453807532,
      "grad_norm": 0.7941112518310547,
      "learning_rate": 5.449220165653026e-06,
      "loss": 0.021,
      "step": 1401280
    },
    {
      "epoch": 2.293258184246185,
      "grad_norm": 0.22331547737121582,
      "learning_rate": 5.449154273439508e-06,
      "loss": 0.0094,
      "step": 1401300
    },
    {
      "epoch": 2.293290914684839,
      "grad_norm": 0.11657512933015823,
      "learning_rate": 5.449088381225991e-06,
      "loss": 0.0151,
      "step": 1401320
    },
    {
      "epoch": 2.293323645123492,
      "grad_norm": 1.0467051267623901,
      "learning_rate": 5.449022489012475e-06,
      "loss": 0.0153,
      "step": 1401340
    },
    {
      "epoch": 2.293356375562145,
      "grad_norm": 0.3490871489048004,
      "learning_rate": 5.4489565967989566e-06,
      "loss": 0.0088,
      "step": 1401360
    },
    {
      "epoch": 2.2933891060007987,
      "grad_norm": 0.3580217957496643,
      "learning_rate": 5.44889070458544e-06,
      "loss": 0.0145,
      "step": 1401380
    },
    {
      "epoch": 2.293421836439452,
      "grad_norm": 0.8807819485664368,
      "learning_rate": 5.448824812371922e-06,
      "loss": 0.017,
      "step": 1401400
    },
    {
      "epoch": 2.2934545668781054,
      "grad_norm": 0.2547658681869507,
      "learning_rate": 5.448758920158406e-06,
      "loss": 0.0155,
      "step": 1401420
    },
    {
      "epoch": 2.2934872973167586,
      "grad_norm": 0.33120301365852356,
      "learning_rate": 5.448693027944888e-06,
      "loss": 0.0181,
      "step": 1401440
    },
    {
      "epoch": 2.293520027755412,
      "grad_norm": 0.15308527648448944,
      "learning_rate": 5.448627135731371e-06,
      "loss": 0.013,
      "step": 1401460
    },
    {
      "epoch": 2.2935527581940653,
      "grad_norm": 0.6262324452400208,
      "learning_rate": 5.448561243517854e-06,
      "loss": 0.0148,
      "step": 1401480
    },
    {
      "epoch": 2.2935854886327185,
      "grad_norm": 0.23295800387859344,
      "learning_rate": 5.4484953513043374e-06,
      "loss": 0.0132,
      "step": 1401500
    },
    {
      "epoch": 2.293618219071372,
      "grad_norm": 0.5424144268035889,
      "learning_rate": 5.448429459090819e-06,
      "loss": 0.0157,
      "step": 1401520
    },
    {
      "epoch": 2.2936509495100252,
      "grad_norm": 0.3612360954284668,
      "learning_rate": 5.448363566877303e-06,
      "loss": 0.0126,
      "step": 1401540
    },
    {
      "epoch": 2.293683679948679,
      "grad_norm": 0.9663678407669067,
      "learning_rate": 5.448297674663785e-06,
      "loss": 0.0236,
      "step": 1401560
    },
    {
      "epoch": 2.293716410387332,
      "grad_norm": 0.4252302944660187,
      "learning_rate": 5.448231782450268e-06,
      "loss": 0.0124,
      "step": 1401580
    },
    {
      "epoch": 2.2937491408259856,
      "grad_norm": 0.12513472139835358,
      "learning_rate": 5.44816589023675e-06,
      "loss": 0.0167,
      "step": 1401600
    },
    {
      "epoch": 2.2937818712646387,
      "grad_norm": 0.2677331566810608,
      "learning_rate": 5.448099998023234e-06,
      "loss": 0.0173,
      "step": 1401620
    },
    {
      "epoch": 2.293814601703292,
      "grad_norm": 0.16811390221118927,
      "learning_rate": 5.448034105809717e-06,
      "loss": 0.0165,
      "step": 1401640
    },
    {
      "epoch": 2.2938473321419455,
      "grad_norm": 0.1880999505519867,
      "learning_rate": 5.447968213596199e-06,
      "loss": 0.013,
      "step": 1401660
    },
    {
      "epoch": 2.2938800625805986,
      "grad_norm": 0.5505180954933167,
      "learning_rate": 5.447902321382683e-06,
      "loss": 0.0112,
      "step": 1401680
    },
    {
      "epoch": 2.293912793019252,
      "grad_norm": 0.4800994098186493,
      "learning_rate": 5.447836429169166e-06,
      "loss": 0.0122,
      "step": 1401700
    },
    {
      "epoch": 2.2939455234579054,
      "grad_norm": 0.14258788526058197,
      "learning_rate": 5.447770536955648e-06,
      "loss": 0.0188,
      "step": 1401720
    },
    {
      "epoch": 2.293978253896559,
      "grad_norm": 0.5007278323173523,
      "learning_rate": 5.447704644742131e-06,
      "loss": 0.0188,
      "step": 1401740
    },
    {
      "epoch": 2.294010984335212,
      "grad_norm": 0.7623165249824524,
      "learning_rate": 5.447638752528615e-06,
      "loss": 0.0173,
      "step": 1401760
    },
    {
      "epoch": 2.2940437147738653,
      "grad_norm": 0.2376168817281723,
      "learning_rate": 5.447572860315097e-06,
      "loss": 0.0142,
      "step": 1401780
    },
    {
      "epoch": 2.294076445212519,
      "grad_norm": 0.900189995765686,
      "learning_rate": 5.44750696810158e-06,
      "loss": 0.011,
      "step": 1401800
    },
    {
      "epoch": 2.294109175651172,
      "grad_norm": 0.15482904016971588,
      "learning_rate": 5.447441075888062e-06,
      "loss": 0.0109,
      "step": 1401820
    },
    {
      "epoch": 2.2941419060898256,
      "grad_norm": 0.24680139124393463,
      "learning_rate": 5.447375183674546e-06,
      "loss": 0.0153,
      "step": 1401840
    },
    {
      "epoch": 2.2941746365284788,
      "grad_norm": 0.5367579460144043,
      "learning_rate": 5.4473092914610284e-06,
      "loss": 0.0157,
      "step": 1401860
    },
    {
      "epoch": 2.294207366967132,
      "grad_norm": 0.3301686644554138,
      "learning_rate": 5.447243399247511e-06,
      "loss": 0.0179,
      "step": 1401880
    },
    {
      "epoch": 2.2942400974057855,
      "grad_norm": 0.20107582211494446,
      "learning_rate": 5.447177507033994e-06,
      "loss": 0.0189,
      "step": 1401900
    },
    {
      "epoch": 2.2942728278444386,
      "grad_norm": 0.2814660966396332,
      "learning_rate": 5.4471116148204775e-06,
      "loss": 0.017,
      "step": 1401920
    },
    {
      "epoch": 2.2943055582830922,
      "grad_norm": 0.2286631315946579,
      "learning_rate": 5.447045722606959e-06,
      "loss": 0.015,
      "step": 1401940
    },
    {
      "epoch": 2.2943382887217454,
      "grad_norm": 0.42513471841812134,
      "learning_rate": 5.446979830393443e-06,
      "loss": 0.0106,
      "step": 1401960
    },
    {
      "epoch": 2.2943710191603985,
      "grad_norm": 0.1998051255941391,
      "learning_rate": 5.446913938179925e-06,
      "loss": 0.0141,
      "step": 1401980
    },
    {
      "epoch": 2.294403749599052,
      "grad_norm": 1.0355936288833618,
      "learning_rate": 5.4468480459664085e-06,
      "loss": 0.0231,
      "step": 1402000
    },
    {
      "epoch": 2.2944364800377053,
      "grad_norm": 1.0327891111373901,
      "learning_rate": 5.446782153752892e-06,
      "loss": 0.0136,
      "step": 1402020
    },
    {
      "epoch": 2.294469210476359,
      "grad_norm": 0.2744779586791992,
      "learning_rate": 5.446716261539374e-06,
      "loss": 0.0121,
      "step": 1402040
    },
    {
      "epoch": 2.294501940915012,
      "grad_norm": 0.3428027331829071,
      "learning_rate": 5.4466503693258575e-06,
      "loss": 0.0134,
      "step": 1402060
    },
    {
      "epoch": 2.2945346713536656,
      "grad_norm": 0.47439852356910706,
      "learning_rate": 5.44658447711234e-06,
      "loss": 0.0092,
      "step": 1402080
    },
    {
      "epoch": 2.2945674017923188,
      "grad_norm": 0.8458353877067566,
      "learning_rate": 5.446518584898823e-06,
      "loss": 0.0196,
      "step": 1402100
    },
    {
      "epoch": 2.294600132230972,
      "grad_norm": 0.14982014894485474,
      "learning_rate": 5.446452692685306e-06,
      "loss": 0.0167,
      "step": 1402120
    },
    {
      "epoch": 2.2946328626696255,
      "grad_norm": 0.6507944464683533,
      "learning_rate": 5.446386800471789e-06,
      "loss": 0.0231,
      "step": 1402140
    },
    {
      "epoch": 2.2946655931082787,
      "grad_norm": 0.3704703152179718,
      "learning_rate": 5.446320908258271e-06,
      "loss": 0.0132,
      "step": 1402160
    },
    {
      "epoch": 2.2946983235469323,
      "grad_norm": 0.54129558801651,
      "learning_rate": 5.446255016044755e-06,
      "loss": 0.0151,
      "step": 1402180
    },
    {
      "epoch": 2.2947310539855854,
      "grad_norm": 0.2973249554634094,
      "learning_rate": 5.446189123831237e-06,
      "loss": 0.0176,
      "step": 1402200
    },
    {
      "epoch": 2.294763784424239,
      "grad_norm": 1.3567880392074585,
      "learning_rate": 5.44612323161772e-06,
      "loss": 0.0181,
      "step": 1402220
    },
    {
      "epoch": 2.294796514862892,
      "grad_norm": 0.2780151665210724,
      "learning_rate": 5.446057339404203e-06,
      "loss": 0.0166,
      "step": 1402240
    },
    {
      "epoch": 2.2948292453015453,
      "grad_norm": 0.32724618911743164,
      "learning_rate": 5.445991447190686e-06,
      "loss": 0.0153,
      "step": 1402260
    },
    {
      "epoch": 2.294861975740199,
      "grad_norm": 0.5745744705200195,
      "learning_rate": 5.4459255549771685e-06,
      "loss": 0.017,
      "step": 1402280
    },
    {
      "epoch": 2.294894706178852,
      "grad_norm": 0.29372212290763855,
      "learning_rate": 5.445859662763652e-06,
      "loss": 0.0179,
      "step": 1402300
    },
    {
      "epoch": 2.2949274366175056,
      "grad_norm": 0.2122928649187088,
      "learning_rate": 5.445793770550134e-06,
      "loss": 0.0134,
      "step": 1402320
    },
    {
      "epoch": 2.294960167056159,
      "grad_norm": 0.269624263048172,
      "learning_rate": 5.445727878336618e-06,
      "loss": 0.014,
      "step": 1402340
    },
    {
      "epoch": 2.2949928974948124,
      "grad_norm": 0.36149731278419495,
      "learning_rate": 5.445661986123101e-06,
      "loss": 0.0128,
      "step": 1402360
    },
    {
      "epoch": 2.2950256279334655,
      "grad_norm": 1.588456630706787,
      "learning_rate": 5.445596093909583e-06,
      "loss": 0.0181,
      "step": 1402380
    },
    {
      "epoch": 2.2950583583721187,
      "grad_norm": 0.8973075151443481,
      "learning_rate": 5.445530201696067e-06,
      "loss": 0.0146,
      "step": 1402400
    },
    {
      "epoch": 2.2950910888107723,
      "grad_norm": 0.48641541600227356,
      "learning_rate": 5.4454643094825485e-06,
      "loss": 0.0191,
      "step": 1402420
    },
    {
      "epoch": 2.2951238192494254,
      "grad_norm": 0.36868342757225037,
      "learning_rate": 5.445398417269032e-06,
      "loss": 0.0234,
      "step": 1402440
    },
    {
      "epoch": 2.295156549688079,
      "grad_norm": 0.25160038471221924,
      "learning_rate": 5.445332525055515e-06,
      "loss": 0.0111,
      "step": 1402460
    },
    {
      "epoch": 2.295189280126732,
      "grad_norm": 0.5745162963867188,
      "learning_rate": 5.445266632841998e-06,
      "loss": 0.0119,
      "step": 1402480
    },
    {
      "epoch": 2.2952220105653858,
      "grad_norm": 0.4727548658847809,
      "learning_rate": 5.44520074062848e-06,
      "loss": 0.0134,
      "step": 1402500
    },
    {
      "epoch": 2.295254741004039,
      "grad_norm": 0.3935953378677368,
      "learning_rate": 5.445134848414964e-06,
      "loss": 0.01,
      "step": 1402520
    },
    {
      "epoch": 2.295287471442692,
      "grad_norm": 0.41392093896865845,
      "learning_rate": 5.445068956201446e-06,
      "loss": 0.0199,
      "step": 1402540
    },
    {
      "epoch": 2.2953202018813457,
      "grad_norm": 0.168740376830101,
      "learning_rate": 5.445003063987929e-06,
      "loss": 0.0163,
      "step": 1402560
    },
    {
      "epoch": 2.295352932319999,
      "grad_norm": 0.5563686490058899,
      "learning_rate": 5.444937171774411e-06,
      "loss": 0.0174,
      "step": 1402580
    },
    {
      "epoch": 2.2953856627586524,
      "grad_norm": 0.1017812043428421,
      "learning_rate": 5.444871279560895e-06,
      "loss": 0.0116,
      "step": 1402600
    },
    {
      "epoch": 2.2954183931973056,
      "grad_norm": 0.3315613865852356,
      "learning_rate": 5.444805387347377e-06,
      "loss": 0.0122,
      "step": 1402620
    },
    {
      "epoch": 2.295451123635959,
      "grad_norm": 0.09346576780080795,
      "learning_rate": 5.44473949513386e-06,
      "loss": 0.0206,
      "step": 1402640
    },
    {
      "epoch": 2.2954838540746123,
      "grad_norm": 1.1969759464263916,
      "learning_rate": 5.444673602920343e-06,
      "loss": 0.0129,
      "step": 1402660
    },
    {
      "epoch": 2.2955165845132655,
      "grad_norm": 0.793713390827179,
      "learning_rate": 5.444607710706826e-06,
      "loss": 0.0164,
      "step": 1402680
    },
    {
      "epoch": 2.295549314951919,
      "grad_norm": 0.25442057847976685,
      "learning_rate": 5.444541818493309e-06,
      "loss": 0.0189,
      "step": 1402700
    },
    {
      "epoch": 2.295582045390572,
      "grad_norm": 0.294432133436203,
      "learning_rate": 5.444475926279792e-06,
      "loss": 0.0145,
      "step": 1402720
    },
    {
      "epoch": 2.295614775829226,
      "grad_norm": 0.7111707329750061,
      "learning_rate": 5.444410034066275e-06,
      "loss": 0.0204,
      "step": 1402740
    },
    {
      "epoch": 2.295647506267879,
      "grad_norm": 0.4969545900821686,
      "learning_rate": 5.444344141852758e-06,
      "loss": 0.0121,
      "step": 1402760
    },
    {
      "epoch": 2.2956802367065325,
      "grad_norm": 0.6558258533477783,
      "learning_rate": 5.444278249639241e-06,
      "loss": 0.0189,
      "step": 1402780
    },
    {
      "epoch": 2.2957129671451857,
      "grad_norm": 0.2473914474248886,
      "learning_rate": 5.444212357425723e-06,
      "loss": 0.0177,
      "step": 1402800
    },
    {
      "epoch": 2.295745697583839,
      "grad_norm": 0.2511133849620819,
      "learning_rate": 5.444146465212207e-06,
      "loss": 0.0166,
      "step": 1402820
    },
    {
      "epoch": 2.2957784280224924,
      "grad_norm": 2.008491039276123,
      "learning_rate": 5.444080572998689e-06,
      "loss": 0.0154,
      "step": 1402840
    },
    {
      "epoch": 2.2958111584611456,
      "grad_norm": 0.20042043924331665,
      "learning_rate": 5.444014680785172e-06,
      "loss": 0.0125,
      "step": 1402860
    },
    {
      "epoch": 2.295843888899799,
      "grad_norm": 0.810088038444519,
      "learning_rate": 5.443948788571655e-06,
      "loss": 0.0177,
      "step": 1402880
    },
    {
      "epoch": 2.2958766193384523,
      "grad_norm": 0.9043028354644775,
      "learning_rate": 5.443882896358138e-06,
      "loss": 0.015,
      "step": 1402900
    },
    {
      "epoch": 2.295909349777106,
      "grad_norm": 0.3126187324523926,
      "learning_rate": 5.4438170041446204e-06,
      "loss": 0.0171,
      "step": 1402920
    },
    {
      "epoch": 2.295942080215759,
      "grad_norm": 0.22570423781871796,
      "learning_rate": 5.443751111931104e-06,
      "loss": 0.0112,
      "step": 1402940
    },
    {
      "epoch": 2.2959748106544122,
      "grad_norm": 0.8032999634742737,
      "learning_rate": 5.443685219717586e-06,
      "loss": 0.0167,
      "step": 1402960
    },
    {
      "epoch": 2.296007541093066,
      "grad_norm": 0.2522316873073578,
      "learning_rate": 5.4436193275040695e-06,
      "loss": 0.0205,
      "step": 1402980
    },
    {
      "epoch": 2.296040271531719,
      "grad_norm": 0.2836044430732727,
      "learning_rate": 5.443553435290551e-06,
      "loss": 0.0155,
      "step": 1403000
    },
    {
      "epoch": 2.2960730019703726,
      "grad_norm": 0.29010576009750366,
      "learning_rate": 5.443487543077035e-06,
      "loss": 0.009,
      "step": 1403020
    },
    {
      "epoch": 2.2961057324090257,
      "grad_norm": 0.6866805553436279,
      "learning_rate": 5.443421650863518e-06,
      "loss": 0.0156,
      "step": 1403040
    },
    {
      "epoch": 2.2961384628476793,
      "grad_norm": 1.0607613325119019,
      "learning_rate": 5.4433557586500005e-06,
      "loss": 0.0143,
      "step": 1403060
    },
    {
      "epoch": 2.2961711932863325,
      "grad_norm": 0.4455389678478241,
      "learning_rate": 5.443289866436484e-06,
      "loss": 0.0139,
      "step": 1403080
    },
    {
      "epoch": 2.2962039237249856,
      "grad_norm": 0.5138139128684998,
      "learning_rate": 5.443223974222967e-06,
      "loss": 0.0174,
      "step": 1403100
    },
    {
      "epoch": 2.296236654163639,
      "grad_norm": 0.2811545431613922,
      "learning_rate": 5.4431580820094495e-06,
      "loss": 0.0153,
      "step": 1403120
    },
    {
      "epoch": 2.2962693846022924,
      "grad_norm": 0.13805346190929413,
      "learning_rate": 5.443092189795932e-06,
      "loss": 0.0192,
      "step": 1403140
    },
    {
      "epoch": 2.296302115040946,
      "grad_norm": 0.3259066343307495,
      "learning_rate": 5.443026297582416e-06,
      "loss": 0.014,
      "step": 1403160
    },
    {
      "epoch": 2.296334845479599,
      "grad_norm": 0.7105820178985596,
      "learning_rate": 5.442960405368898e-06,
      "loss": 0.0122,
      "step": 1403180
    },
    {
      "epoch": 2.2963675759182527,
      "grad_norm": 0.7374438643455505,
      "learning_rate": 5.442894513155381e-06,
      "loss": 0.0155,
      "step": 1403200
    },
    {
      "epoch": 2.296400306356906,
      "grad_norm": 0.5581746697425842,
      "learning_rate": 5.442828620941863e-06,
      "loss": 0.0179,
      "step": 1403220
    },
    {
      "epoch": 2.296433036795559,
      "grad_norm": 0.25693514943122864,
      "learning_rate": 5.442762728728347e-06,
      "loss": 0.0204,
      "step": 1403240
    },
    {
      "epoch": 2.2964657672342126,
      "grad_norm": 0.617057204246521,
      "learning_rate": 5.4426968365148295e-06,
      "loss": 0.0164,
      "step": 1403260
    },
    {
      "epoch": 2.2964984976728657,
      "grad_norm": 0.28425681591033936,
      "learning_rate": 5.442630944301312e-06,
      "loss": 0.0153,
      "step": 1403280
    },
    {
      "epoch": 2.2965312281115193,
      "grad_norm": 0.4300660490989685,
      "learning_rate": 5.442565052087795e-06,
      "loss": 0.0133,
      "step": 1403300
    },
    {
      "epoch": 2.2965639585501725,
      "grad_norm": 2.648374557495117,
      "learning_rate": 5.442499159874279e-06,
      "loss": 0.0174,
      "step": 1403320
    },
    {
      "epoch": 2.296596688988826,
      "grad_norm": 0.2863248884677887,
      "learning_rate": 5.4424332676607605e-06,
      "loss": 0.0147,
      "step": 1403340
    },
    {
      "epoch": 2.2966294194274792,
      "grad_norm": 0.4064812660217285,
      "learning_rate": 5.442367375447244e-06,
      "loss": 0.015,
      "step": 1403360
    },
    {
      "epoch": 2.2966621498661324,
      "grad_norm": 0.3867974281311035,
      "learning_rate": 5.442301483233726e-06,
      "loss": 0.0128,
      "step": 1403380
    },
    {
      "epoch": 2.296694880304786,
      "grad_norm": 0.2549046277999878,
      "learning_rate": 5.4422355910202096e-06,
      "loss": 0.0146,
      "step": 1403400
    },
    {
      "epoch": 2.296727610743439,
      "grad_norm": 0.2660561203956604,
      "learning_rate": 5.442169698806693e-06,
      "loss": 0.0158,
      "step": 1403420
    },
    {
      "epoch": 2.2967603411820927,
      "grad_norm": 0.4154747724533081,
      "learning_rate": 5.442103806593175e-06,
      "loss": 0.0189,
      "step": 1403440
    },
    {
      "epoch": 2.296793071620746,
      "grad_norm": 0.5809176564216614,
      "learning_rate": 5.442037914379659e-06,
      "loss": 0.0162,
      "step": 1403460
    },
    {
      "epoch": 2.296825802059399,
      "grad_norm": 0.09564563632011414,
      "learning_rate": 5.441972022166141e-06,
      "loss": 0.0102,
      "step": 1403480
    },
    {
      "epoch": 2.2968585324980526,
      "grad_norm": 0.6071420311927795,
      "learning_rate": 5.441906129952624e-06,
      "loss": 0.0153,
      "step": 1403500
    },
    {
      "epoch": 2.2968912629367058,
      "grad_norm": 0.07736378163099289,
      "learning_rate": 5.441840237739107e-06,
      "loss": 0.0182,
      "step": 1403520
    },
    {
      "epoch": 2.2969239933753594,
      "grad_norm": 0.13433168828487396,
      "learning_rate": 5.4417743455255904e-06,
      "loss": 0.0153,
      "step": 1403540
    },
    {
      "epoch": 2.2969567238140125,
      "grad_norm": 0.24269551038742065,
      "learning_rate": 5.441708453312072e-06,
      "loss": 0.01,
      "step": 1403560
    },
    {
      "epoch": 2.2969894542526657,
      "grad_norm": 0.7328320145606995,
      "learning_rate": 5.441642561098556e-06,
      "loss": 0.0163,
      "step": 1403580
    },
    {
      "epoch": 2.2970221846913192,
      "grad_norm": 0.23583121597766876,
      "learning_rate": 5.441576668885038e-06,
      "loss": 0.0192,
      "step": 1403600
    },
    {
      "epoch": 2.2970549151299724,
      "grad_norm": 0.45895037055015564,
      "learning_rate": 5.441510776671521e-06,
      "loss": 0.0143,
      "step": 1403620
    },
    {
      "epoch": 2.297087645568626,
      "grad_norm": 0.4963841438293457,
      "learning_rate": 5.441444884458003e-06,
      "loss": 0.0096,
      "step": 1403640
    },
    {
      "epoch": 2.297120376007279,
      "grad_norm": 0.2635752558708191,
      "learning_rate": 5.441378992244487e-06,
      "loss": 0.0112,
      "step": 1403660
    },
    {
      "epoch": 2.2971531064459327,
      "grad_norm": 0.30747300386428833,
      "learning_rate": 5.44131310003097e-06,
      "loss": 0.0129,
      "step": 1403680
    },
    {
      "epoch": 2.297185836884586,
      "grad_norm": 0.3261435925960541,
      "learning_rate": 5.441247207817452e-06,
      "loss": 0.009,
      "step": 1403700
    },
    {
      "epoch": 2.297218567323239,
      "grad_norm": 0.3939736783504486,
      "learning_rate": 5.441181315603935e-06,
      "loss": 0.0118,
      "step": 1403720
    },
    {
      "epoch": 2.2972512977618926,
      "grad_norm": 0.43901196122169495,
      "learning_rate": 5.441115423390419e-06,
      "loss": 0.0182,
      "step": 1403740
    },
    {
      "epoch": 2.297284028200546,
      "grad_norm": 0.36325502395629883,
      "learning_rate": 5.4410495311769014e-06,
      "loss": 0.0237,
      "step": 1403760
    },
    {
      "epoch": 2.2973167586391994,
      "grad_norm": 0.4453043043613434,
      "learning_rate": 5.440983638963384e-06,
      "loss": 0.0097,
      "step": 1403780
    },
    {
      "epoch": 2.2973494890778525,
      "grad_norm": 0.6006650328636169,
      "learning_rate": 5.440917746749868e-06,
      "loss": 0.0137,
      "step": 1403800
    },
    {
      "epoch": 2.297382219516506,
      "grad_norm": 0.5767415165901184,
      "learning_rate": 5.44085185453635e-06,
      "loss": 0.019,
      "step": 1403820
    },
    {
      "epoch": 2.2974149499551593,
      "grad_norm": 0.3415537476539612,
      "learning_rate": 5.440785962322833e-06,
      "loss": 0.0115,
      "step": 1403840
    },
    {
      "epoch": 2.2974476803938124,
      "grad_norm": 0.6098338961601257,
      "learning_rate": 5.440720070109315e-06,
      "loss": 0.0153,
      "step": 1403860
    },
    {
      "epoch": 2.297480410832466,
      "grad_norm": 0.2487279772758484,
      "learning_rate": 5.440654177895799e-06,
      "loss": 0.0169,
      "step": 1403880
    },
    {
      "epoch": 2.297513141271119,
      "grad_norm": 0.30020835995674133,
      "learning_rate": 5.4405882856822815e-06,
      "loss": 0.0122,
      "step": 1403900
    },
    {
      "epoch": 2.2975458717097728,
      "grad_norm": 0.24421298503875732,
      "learning_rate": 5.440522393468764e-06,
      "loss": 0.0177,
      "step": 1403920
    },
    {
      "epoch": 2.297578602148426,
      "grad_norm": 0.587440550327301,
      "learning_rate": 5.440456501255247e-06,
      "loss": 0.0112,
      "step": 1403940
    },
    {
      "epoch": 2.2976113325870795,
      "grad_norm": 0.2499125450849533,
      "learning_rate": 5.4403906090417305e-06,
      "loss": 0.0132,
      "step": 1403960
    },
    {
      "epoch": 2.2976440630257327,
      "grad_norm": 0.208428755402565,
      "learning_rate": 5.440324716828212e-06,
      "loss": 0.0167,
      "step": 1403980
    },
    {
      "epoch": 2.297676793464386,
      "grad_norm": 0.31923365592956543,
      "learning_rate": 5.440258824614696e-06,
      "loss": 0.0125,
      "step": 1404000
    },
    {
      "epoch": 2.2977095239030394,
      "grad_norm": 0.1610010266304016,
      "learning_rate": 5.440192932401178e-06,
      "loss": 0.0098,
      "step": 1404020
    },
    {
      "epoch": 2.2977422543416925,
      "grad_norm": 1.7900038957595825,
      "learning_rate": 5.4401270401876615e-06,
      "loss": 0.0165,
      "step": 1404040
    },
    {
      "epoch": 2.297774984780346,
      "grad_norm": 0.32477322220802307,
      "learning_rate": 5.440061147974144e-06,
      "loss": 0.0112,
      "step": 1404060
    },
    {
      "epoch": 2.2978077152189993,
      "grad_norm": 0.0998903289437294,
      "learning_rate": 5.439995255760627e-06,
      "loss": 0.0114,
      "step": 1404080
    },
    {
      "epoch": 2.297840445657653,
      "grad_norm": 0.13003776967525482,
      "learning_rate": 5.43992936354711e-06,
      "loss": 0.0129,
      "step": 1404100
    },
    {
      "epoch": 2.297873176096306,
      "grad_norm": 0.2574608623981476,
      "learning_rate": 5.439863471333593e-06,
      "loss": 0.0186,
      "step": 1404120
    },
    {
      "epoch": 2.297905906534959,
      "grad_norm": 0.5168138146400452,
      "learning_rate": 5.439797579120076e-06,
      "loss": 0.0196,
      "step": 1404140
    },
    {
      "epoch": 2.297938636973613,
      "grad_norm": 9.248738288879395,
      "learning_rate": 5.439731686906559e-06,
      "loss": 0.0126,
      "step": 1404160
    },
    {
      "epoch": 2.297971367412266,
      "grad_norm": 0.24184809625148773,
      "learning_rate": 5.439665794693042e-06,
      "loss": 0.0162,
      "step": 1404180
    },
    {
      "epoch": 2.2980040978509195,
      "grad_norm": 0.06637028604745865,
      "learning_rate": 5.439599902479524e-06,
      "loss": 0.0162,
      "step": 1404200
    },
    {
      "epoch": 2.2980368282895727,
      "grad_norm": 0.1398095339536667,
      "learning_rate": 5.439534010266008e-06,
      "loss": 0.0152,
      "step": 1404220
    },
    {
      "epoch": 2.2980695587282263,
      "grad_norm": 1.015548825263977,
      "learning_rate": 5.43946811805249e-06,
      "loss": 0.0171,
      "step": 1404240
    },
    {
      "epoch": 2.2981022891668794,
      "grad_norm": 1.0887497663497925,
      "learning_rate": 5.439402225838973e-06,
      "loss": 0.0191,
      "step": 1404260
    },
    {
      "epoch": 2.2981350196055326,
      "grad_norm": 0.22782552242279053,
      "learning_rate": 5.439336333625456e-06,
      "loss": 0.0084,
      "step": 1404280
    },
    {
      "epoch": 2.298167750044186,
      "grad_norm": 0.7033827900886536,
      "learning_rate": 5.439270441411939e-06,
      "loss": 0.0166,
      "step": 1404300
    },
    {
      "epoch": 2.2982004804828393,
      "grad_norm": 0.34601733088493347,
      "learning_rate": 5.4392045491984215e-06,
      "loss": 0.0141,
      "step": 1404320
    },
    {
      "epoch": 2.298233210921493,
      "grad_norm": 0.3870648145675659,
      "learning_rate": 5.439138656984905e-06,
      "loss": 0.0177,
      "step": 1404340
    },
    {
      "epoch": 2.298265941360146,
      "grad_norm": 0.3381560146808624,
      "learning_rate": 5.439072764771387e-06,
      "loss": 0.0161,
      "step": 1404360
    },
    {
      "epoch": 2.2982986717987997,
      "grad_norm": 0.47060880064964294,
      "learning_rate": 5.439006872557871e-06,
      "loss": 0.0147,
      "step": 1404380
    },
    {
      "epoch": 2.298331402237453,
      "grad_norm": 0.22094152867794037,
      "learning_rate": 5.4389409803443525e-06,
      "loss": 0.0143,
      "step": 1404400
    },
    {
      "epoch": 2.298364132676106,
      "grad_norm": 0.5525832772254944,
      "learning_rate": 5.438875088130836e-06,
      "loss": 0.0196,
      "step": 1404420
    },
    {
      "epoch": 2.2983968631147595,
      "grad_norm": 1.199623703956604,
      "learning_rate": 5.438809195917318e-06,
      "loss": 0.0209,
      "step": 1404440
    },
    {
      "epoch": 2.2984295935534127,
      "grad_norm": 0.33150359988212585,
      "learning_rate": 5.4387433037038016e-06,
      "loss": 0.0164,
      "step": 1404460
    },
    {
      "epoch": 2.2984623239920663,
      "grad_norm": 0.36525872349739075,
      "learning_rate": 5.438677411490285e-06,
      "loss": 0.0162,
      "step": 1404480
    },
    {
      "epoch": 2.2984950544307194,
      "grad_norm": 0.26789337396621704,
      "learning_rate": 5.438611519276767e-06,
      "loss": 0.0105,
      "step": 1404500
    },
    {
      "epoch": 2.298527784869373,
      "grad_norm": 1.1559288501739502,
      "learning_rate": 5.438545627063251e-06,
      "loss": 0.0199,
      "step": 1404520
    },
    {
      "epoch": 2.298560515308026,
      "grad_norm": 0.8106891512870789,
      "learning_rate": 5.438479734849733e-06,
      "loss": 0.0146,
      "step": 1404540
    },
    {
      "epoch": 2.2985932457466793,
      "grad_norm": 0.59991455078125,
      "learning_rate": 5.438413842636216e-06,
      "loss": 0.0162,
      "step": 1404560
    },
    {
      "epoch": 2.298625976185333,
      "grad_norm": 0.46071985363960266,
      "learning_rate": 5.438347950422699e-06,
      "loss": 0.0099,
      "step": 1404580
    },
    {
      "epoch": 2.298658706623986,
      "grad_norm": 0.4649554491043091,
      "learning_rate": 5.4382820582091824e-06,
      "loss": 0.0167,
      "step": 1404600
    },
    {
      "epoch": 2.2986914370626397,
      "grad_norm": 1.0159528255462646,
      "learning_rate": 5.438216165995664e-06,
      "loss": 0.0183,
      "step": 1404620
    },
    {
      "epoch": 2.298724167501293,
      "grad_norm": 0.6581798195838928,
      "learning_rate": 5.438150273782148e-06,
      "loss": 0.0191,
      "step": 1404640
    },
    {
      "epoch": 2.2987568979399464,
      "grad_norm": 0.4665239751338959,
      "learning_rate": 5.43808438156863e-06,
      "loss": 0.0167,
      "step": 1404660
    },
    {
      "epoch": 2.2987896283785996,
      "grad_norm": 0.4049784243106842,
      "learning_rate": 5.438018489355113e-06,
      "loss": 0.0152,
      "step": 1404680
    },
    {
      "epoch": 2.2988223588172527,
      "grad_norm": 0.3509383499622345,
      "learning_rate": 5.437952597141596e-06,
      "loss": 0.0128,
      "step": 1404700
    },
    {
      "epoch": 2.2988550892559063,
      "grad_norm": 0.1348290741443634,
      "learning_rate": 5.437886704928079e-06,
      "loss": 0.0125,
      "step": 1404720
    },
    {
      "epoch": 2.2988878196945595,
      "grad_norm": 0.6064019203186035,
      "learning_rate": 5.437820812714562e-06,
      "loss": 0.0133,
      "step": 1404740
    },
    {
      "epoch": 2.298920550133213,
      "grad_norm": 0.3299922049045563,
      "learning_rate": 5.437754920501045e-06,
      "loss": 0.0258,
      "step": 1404760
    },
    {
      "epoch": 2.298953280571866,
      "grad_norm": 0.2626394033432007,
      "learning_rate": 5.437689028287527e-06,
      "loss": 0.0221,
      "step": 1404780
    },
    {
      "epoch": 2.29898601101052,
      "grad_norm": 0.39812684059143066,
      "learning_rate": 5.437623136074011e-06,
      "loss": 0.0179,
      "step": 1404800
    },
    {
      "epoch": 2.299018741449173,
      "grad_norm": 0.202983096241951,
      "learning_rate": 5.437557243860494e-06,
      "loss": 0.0191,
      "step": 1404820
    },
    {
      "epoch": 2.299051471887826,
      "grad_norm": 0.5570180416107178,
      "learning_rate": 5.437491351646976e-06,
      "loss": 0.0138,
      "step": 1404840
    },
    {
      "epoch": 2.2990842023264797,
      "grad_norm": 0.6427743434906006,
      "learning_rate": 5.43742545943346e-06,
      "loss": 0.0204,
      "step": 1404860
    },
    {
      "epoch": 2.299116932765133,
      "grad_norm": 0.22619585692882538,
      "learning_rate": 5.437359567219942e-06,
      "loss": 0.0146,
      "step": 1404880
    },
    {
      "epoch": 2.2991496632037864,
      "grad_norm": 0.5255573391914368,
      "learning_rate": 5.437293675006425e-06,
      "loss": 0.0217,
      "step": 1404900
    },
    {
      "epoch": 2.2991823936424396,
      "grad_norm": 0.29716354608535767,
      "learning_rate": 5.437227782792908e-06,
      "loss": 0.0207,
      "step": 1404920
    },
    {
      "epoch": 2.2992151240810927,
      "grad_norm": 0.11156605184078217,
      "learning_rate": 5.437161890579391e-06,
      "loss": 0.0082,
      "step": 1404940
    },
    {
      "epoch": 2.2992478545197463,
      "grad_norm": 0.17628537118434906,
      "learning_rate": 5.4370959983658734e-06,
      "loss": 0.0113,
      "step": 1404960
    },
    {
      "epoch": 2.2992805849583995,
      "grad_norm": 0.5919996500015259,
      "learning_rate": 5.437030106152357e-06,
      "loss": 0.0134,
      "step": 1404980
    },
    {
      "epoch": 2.299313315397053,
      "grad_norm": 1.2594348192214966,
      "learning_rate": 5.436964213938839e-06,
      "loss": 0.018,
      "step": 1405000
    },
    {
      "epoch": 2.2993460458357062,
      "grad_norm": 0.708253800868988,
      "learning_rate": 5.4368983217253225e-06,
      "loss": 0.0172,
      "step": 1405020
    },
    {
      "epoch": 2.2993787762743594,
      "grad_norm": 0.4067932367324829,
      "learning_rate": 5.436832429511804e-06,
      "loss": 0.0194,
      "step": 1405040
    },
    {
      "epoch": 2.299411506713013,
      "grad_norm": 0.2878318130970001,
      "learning_rate": 5.436766537298288e-06,
      "loss": 0.0235,
      "step": 1405060
    },
    {
      "epoch": 2.299444237151666,
      "grad_norm": 0.3863115906715393,
      "learning_rate": 5.436700645084771e-06,
      "loss": 0.0154,
      "step": 1405080
    },
    {
      "epoch": 2.2994769675903197,
      "grad_norm": 0.50938481092453,
      "learning_rate": 5.4366347528712535e-06,
      "loss": 0.016,
      "step": 1405100
    },
    {
      "epoch": 2.299509698028973,
      "grad_norm": 1.5021723508834839,
      "learning_rate": 5.436568860657736e-06,
      "loss": 0.0128,
      "step": 1405120
    },
    {
      "epoch": 2.2995424284676265,
      "grad_norm": 0.6614807844161987,
      "learning_rate": 5.43650296844422e-06,
      "loss": 0.0117,
      "step": 1405140
    },
    {
      "epoch": 2.2995751589062796,
      "grad_norm": 0.2107272744178772,
      "learning_rate": 5.436437076230702e-06,
      "loss": 0.0163,
      "step": 1405160
    },
    {
      "epoch": 2.2996078893449328,
      "grad_norm": 0.16911830008029938,
      "learning_rate": 5.436371184017185e-06,
      "loss": 0.019,
      "step": 1405180
    },
    {
      "epoch": 2.2996406197835864,
      "grad_norm": 0.08425518125295639,
      "learning_rate": 5.436305291803669e-06,
      "loss": 0.0122,
      "step": 1405200
    },
    {
      "epoch": 2.2996733502222395,
      "grad_norm": 0.47989213466644287,
      "learning_rate": 5.436239399590151e-06,
      "loss": 0.0166,
      "step": 1405220
    },
    {
      "epoch": 2.299706080660893,
      "grad_norm": 0.1701647937297821,
      "learning_rate": 5.436173507376634e-06,
      "loss": 0.0126,
      "step": 1405240
    },
    {
      "epoch": 2.2997388110995463,
      "grad_norm": 0.21508710086345673,
      "learning_rate": 5.436107615163116e-06,
      "loss": 0.0133,
      "step": 1405260
    },
    {
      "epoch": 2.2997715415382,
      "grad_norm": 0.8698248267173767,
      "learning_rate": 5.4360417229496e-06,
      "loss": 0.0138,
      "step": 1405280
    },
    {
      "epoch": 2.299804271976853,
      "grad_norm": 0.303663432598114,
      "learning_rate": 5.4359758307360826e-06,
      "loss": 0.0149,
      "step": 1405300
    },
    {
      "epoch": 2.299837002415506,
      "grad_norm": 0.3189504146575928,
      "learning_rate": 5.435909938522565e-06,
      "loss": 0.0171,
      "step": 1405320
    },
    {
      "epoch": 2.2998697328541597,
      "grad_norm": 0.8219916224479675,
      "learning_rate": 5.435844046309048e-06,
      "loss": 0.0167,
      "step": 1405340
    },
    {
      "epoch": 2.299902463292813,
      "grad_norm": 0.5461047887802124,
      "learning_rate": 5.435778154095532e-06,
      "loss": 0.014,
      "step": 1405360
    },
    {
      "epoch": 2.2999351937314665,
      "grad_norm": 0.3364918529987335,
      "learning_rate": 5.4357122618820135e-06,
      "loss": 0.0103,
      "step": 1405380
    },
    {
      "epoch": 2.2999679241701196,
      "grad_norm": 0.2658142149448395,
      "learning_rate": 5.435646369668497e-06,
      "loss": 0.0137,
      "step": 1405400
    },
    {
      "epoch": 2.3000006546087732,
      "grad_norm": 0.20074723660945892,
      "learning_rate": 5.435580477454979e-06,
      "loss": 0.0194,
      "step": 1405420
    },
    {
      "epoch": 2.3000333850474264,
      "grad_norm": 0.17466628551483154,
      "learning_rate": 5.435514585241463e-06,
      "loss": 0.0156,
      "step": 1405440
    },
    {
      "epoch": 2.3000661154860795,
      "grad_norm": 0.952436625957489,
      "learning_rate": 5.4354486930279445e-06,
      "loss": 0.0111,
      "step": 1405460
    },
    {
      "epoch": 2.300098845924733,
      "grad_norm": 0.327059805393219,
      "learning_rate": 5.435382800814428e-06,
      "loss": 0.0145,
      "step": 1405480
    },
    {
      "epoch": 2.3001315763633863,
      "grad_norm": 0.6877111792564392,
      "learning_rate": 5.435316908600911e-06,
      "loss": 0.0103,
      "step": 1405500
    },
    {
      "epoch": 2.30016430680204,
      "grad_norm": 0.2960127890110016,
      "learning_rate": 5.4352510163873935e-06,
      "loss": 0.0096,
      "step": 1405520
    },
    {
      "epoch": 2.300197037240693,
      "grad_norm": 0.08100422471761703,
      "learning_rate": 5.435185124173877e-06,
      "loss": 0.0175,
      "step": 1405540
    },
    {
      "epoch": 2.3002297676793466,
      "grad_norm": 0.24488119781017303,
      "learning_rate": 5.43511923196036e-06,
      "loss": 0.0106,
      "step": 1405560
    },
    {
      "epoch": 2.3002624981179998,
      "grad_norm": 0.12539221346378326,
      "learning_rate": 5.435053339746843e-06,
      "loss": 0.0111,
      "step": 1405580
    },
    {
      "epoch": 2.300295228556653,
      "grad_norm": 0.884345293045044,
      "learning_rate": 5.434987447533325e-06,
      "loss": 0.0248,
      "step": 1405600
    },
    {
      "epoch": 2.3003279589953065,
      "grad_norm": 0.18607327342033386,
      "learning_rate": 5.434921555319809e-06,
      "loss": 0.0128,
      "step": 1405620
    },
    {
      "epoch": 2.3003606894339597,
      "grad_norm": 0.20793302357196808,
      "learning_rate": 5.434855663106291e-06,
      "loss": 0.02,
      "step": 1405640
    },
    {
      "epoch": 2.3003934198726133,
      "grad_norm": 0.46651050448417664,
      "learning_rate": 5.434789770892774e-06,
      "loss": 0.0092,
      "step": 1405660
    },
    {
      "epoch": 2.3004261503112664,
      "grad_norm": 0.2553482949733734,
      "learning_rate": 5.434723878679256e-06,
      "loss": 0.0077,
      "step": 1405680
    },
    {
      "epoch": 2.30045888074992,
      "grad_norm": 0.20117567479610443,
      "learning_rate": 5.43465798646574e-06,
      "loss": 0.0139,
      "step": 1405700
    },
    {
      "epoch": 2.300491611188573,
      "grad_norm": 0.577197790145874,
      "learning_rate": 5.434592094252223e-06,
      "loss": 0.0135,
      "step": 1405720
    },
    {
      "epoch": 2.3005243416272263,
      "grad_norm": 0.6379501223564148,
      "learning_rate": 5.434526202038705e-06,
      "loss": 0.019,
      "step": 1405740
    },
    {
      "epoch": 2.30055707206588,
      "grad_norm": 0.13091963529586792,
      "learning_rate": 5.434460309825188e-06,
      "loss": 0.0114,
      "step": 1405760
    },
    {
      "epoch": 2.300589802504533,
      "grad_norm": 0.26663559675216675,
      "learning_rate": 5.434394417611672e-06,
      "loss": 0.0248,
      "step": 1405780
    },
    {
      "epoch": 2.3006225329431866,
      "grad_norm": 0.4115302264690399,
      "learning_rate": 5.434328525398154e-06,
      "loss": 0.018,
      "step": 1405800
    },
    {
      "epoch": 2.30065526338184,
      "grad_norm": 0.4823184013366699,
      "learning_rate": 5.434262633184637e-06,
      "loss": 0.012,
      "step": 1405820
    },
    {
      "epoch": 2.3006879938204934,
      "grad_norm": 0.641216516494751,
      "learning_rate": 5.434196740971119e-06,
      "loss": 0.0219,
      "step": 1405840
    },
    {
      "epoch": 2.3007207242591465,
      "grad_norm": 0.33645740151405334,
      "learning_rate": 5.434130848757603e-06,
      "loss": 0.0135,
      "step": 1405860
    },
    {
      "epoch": 2.3007534546977997,
      "grad_norm": 0.48525360226631165,
      "learning_rate": 5.434064956544086e-06,
      "loss": 0.0187,
      "step": 1405880
    },
    {
      "epoch": 2.3007861851364533,
      "grad_norm": 0.2996629476547241,
      "learning_rate": 5.433999064330568e-06,
      "loss": 0.0168,
      "step": 1405900
    },
    {
      "epoch": 2.3008189155751064,
      "grad_norm": 0.4867059588432312,
      "learning_rate": 5.433933172117052e-06,
      "loss": 0.0144,
      "step": 1405920
    },
    {
      "epoch": 2.30085164601376,
      "grad_norm": 0.19861924648284912,
      "learning_rate": 5.4338672799035345e-06,
      "loss": 0.0138,
      "step": 1405940
    },
    {
      "epoch": 2.300884376452413,
      "grad_norm": 0.21861279010772705,
      "learning_rate": 5.433801387690017e-06,
      "loss": 0.0148,
      "step": 1405960
    },
    {
      "epoch": 2.3009171068910668,
      "grad_norm": 0.3780301511287689,
      "learning_rate": 5.4337354954765e-06,
      "loss": 0.0168,
      "step": 1405980
    },
    {
      "epoch": 2.30094983732972,
      "grad_norm": 0.1579296886920929,
      "learning_rate": 5.4336696032629835e-06,
      "loss": 0.0099,
      "step": 1406000
    },
    {
      "epoch": 2.300982567768373,
      "grad_norm": 0.25634482502937317,
      "learning_rate": 5.433603711049465e-06,
      "loss": 0.0192,
      "step": 1406020
    },
    {
      "epoch": 2.3010152982070267,
      "grad_norm": 0.18627746403217316,
      "learning_rate": 5.433537818835949e-06,
      "loss": 0.0116,
      "step": 1406040
    },
    {
      "epoch": 2.30104802864568,
      "grad_norm": 0.2221425324678421,
      "learning_rate": 5.433471926622431e-06,
      "loss": 0.0147,
      "step": 1406060
    },
    {
      "epoch": 2.3010807590843334,
      "grad_norm": 0.30264708399772644,
      "learning_rate": 5.4334060344089145e-06,
      "loss": 0.0178,
      "step": 1406080
    },
    {
      "epoch": 2.3011134895229866,
      "grad_norm": 0.4140818417072296,
      "learning_rate": 5.433340142195397e-06,
      "loss": 0.0168,
      "step": 1406100
    },
    {
      "epoch": 2.30114621996164,
      "grad_norm": 0.46301767230033875,
      "learning_rate": 5.43327424998188e-06,
      "loss": 0.016,
      "step": 1406120
    },
    {
      "epoch": 2.3011789504002933,
      "grad_norm": 0.809817910194397,
      "learning_rate": 5.433208357768363e-06,
      "loss": 0.016,
      "step": 1406140
    },
    {
      "epoch": 2.3012116808389464,
      "grad_norm": 0.24258014559745789,
      "learning_rate": 5.433142465554846e-06,
      "loss": 0.0101,
      "step": 1406160
    },
    {
      "epoch": 2.3012444112776,
      "grad_norm": 0.1293216198682785,
      "learning_rate": 5.433076573341328e-06,
      "loss": 0.011,
      "step": 1406180
    },
    {
      "epoch": 2.301277141716253,
      "grad_norm": 0.24054579436779022,
      "learning_rate": 5.433010681127812e-06,
      "loss": 0.0123,
      "step": 1406200
    },
    {
      "epoch": 2.301309872154907,
      "grad_norm": 0.5097323656082153,
      "learning_rate": 5.432944788914294e-06,
      "loss": 0.0151,
      "step": 1406220
    },
    {
      "epoch": 2.30134260259356,
      "grad_norm": 0.5589231252670288,
      "learning_rate": 5.432878896700777e-06,
      "loss": 0.0153,
      "step": 1406240
    },
    {
      "epoch": 2.3013753330322135,
      "grad_norm": 0.29251086711883545,
      "learning_rate": 5.432813004487261e-06,
      "loss": 0.0118,
      "step": 1406260
    },
    {
      "epoch": 2.3014080634708667,
      "grad_norm": 1.082159399986267,
      "learning_rate": 5.432747112273743e-06,
      "loss": 0.0185,
      "step": 1406280
    },
    {
      "epoch": 2.30144079390952,
      "grad_norm": 1.1355352401733398,
      "learning_rate": 5.432681220060226e-06,
      "loss": 0.0144,
      "step": 1406300
    },
    {
      "epoch": 2.3014735243481734,
      "grad_norm": 0.2576589584350586,
      "learning_rate": 5.432615327846709e-06,
      "loss": 0.012,
      "step": 1406320
    },
    {
      "epoch": 2.3015062547868266,
      "grad_norm": 0.22980672121047974,
      "learning_rate": 5.432549435633192e-06,
      "loss": 0.0189,
      "step": 1406340
    },
    {
      "epoch": 2.30153898522548,
      "grad_norm": 0.9112030267715454,
      "learning_rate": 5.4324835434196745e-06,
      "loss": 0.0146,
      "step": 1406360
    },
    {
      "epoch": 2.3015717156641333,
      "grad_norm": 0.6996339559555054,
      "learning_rate": 5.432417651206158e-06,
      "loss": 0.0139,
      "step": 1406380
    },
    {
      "epoch": 2.301604446102787,
      "grad_norm": 0.15984739363193512,
      "learning_rate": 5.43235175899264e-06,
      "loss": 0.0099,
      "step": 1406400
    },
    {
      "epoch": 2.30163717654144,
      "grad_norm": 0.5726147890090942,
      "learning_rate": 5.432285866779124e-06,
      "loss": 0.0187,
      "step": 1406420
    },
    {
      "epoch": 2.301669906980093,
      "grad_norm": 0.19099660217761993,
      "learning_rate": 5.4322199745656055e-06,
      "loss": 0.0154,
      "step": 1406440
    },
    {
      "epoch": 2.301702637418747,
      "grad_norm": 0.188918799161911,
      "learning_rate": 5.432154082352089e-06,
      "loss": 0.0095,
      "step": 1406460
    },
    {
      "epoch": 2.3017353678574,
      "grad_norm": 0.2443767786026001,
      "learning_rate": 5.432088190138571e-06,
      "loss": 0.0259,
      "step": 1406480
    },
    {
      "epoch": 2.3017680982960536,
      "grad_norm": 0.5430893898010254,
      "learning_rate": 5.4320222979250546e-06,
      "loss": 0.0215,
      "step": 1406500
    },
    {
      "epoch": 2.3018008287347067,
      "grad_norm": 0.42976370453834534,
      "learning_rate": 5.431956405711537e-06,
      "loss": 0.011,
      "step": 1406520
    },
    {
      "epoch": 2.30183355917336,
      "grad_norm": 0.5766897201538086,
      "learning_rate": 5.43189051349802e-06,
      "loss": 0.0099,
      "step": 1406540
    },
    {
      "epoch": 2.3018662896120134,
      "grad_norm": 0.26398351788520813,
      "learning_rate": 5.431824621284503e-06,
      "loss": 0.0145,
      "step": 1406560
    },
    {
      "epoch": 2.3018990200506666,
      "grad_norm": 0.13383176922798157,
      "learning_rate": 5.431758729070986e-06,
      "loss": 0.0201,
      "step": 1406580
    },
    {
      "epoch": 2.30193175048932,
      "grad_norm": 0.27735015749931335,
      "learning_rate": 5.431692836857469e-06,
      "loss": 0.015,
      "step": 1406600
    },
    {
      "epoch": 2.3019644809279733,
      "grad_norm": 0.03589985519647598,
      "learning_rate": 5.431626944643952e-06,
      "loss": 0.0128,
      "step": 1406620
    },
    {
      "epoch": 2.3019972113666265,
      "grad_norm": 0.7485485672950745,
      "learning_rate": 5.4315610524304354e-06,
      "loss": 0.0185,
      "step": 1406640
    },
    {
      "epoch": 2.30202994180528,
      "grad_norm": 0.19970917701721191,
      "learning_rate": 5.431495160216917e-06,
      "loss": 0.0162,
      "step": 1406660
    },
    {
      "epoch": 2.3020626722439332,
      "grad_norm": 0.9348915815353394,
      "learning_rate": 5.431429268003401e-06,
      "loss": 0.0132,
      "step": 1406680
    },
    {
      "epoch": 2.302095402682587,
      "grad_norm": 0.11356973648071289,
      "learning_rate": 5.431363375789883e-06,
      "loss": 0.0162,
      "step": 1406700
    },
    {
      "epoch": 2.30212813312124,
      "grad_norm": 0.6213198900222778,
      "learning_rate": 5.431297483576366e-06,
      "loss": 0.0176,
      "step": 1406720
    },
    {
      "epoch": 2.3021608635598936,
      "grad_norm": 0.26986780762672424,
      "learning_rate": 5.431231591362849e-06,
      "loss": 0.0149,
      "step": 1406740
    },
    {
      "epoch": 2.3021935939985467,
      "grad_norm": 0.42830201983451843,
      "learning_rate": 5.431165699149332e-06,
      "loss": 0.0092,
      "step": 1406760
    },
    {
      "epoch": 2.3022263244372,
      "grad_norm": 0.13440647721290588,
      "learning_rate": 5.431099806935815e-06,
      "loss": 0.0102,
      "step": 1406780
    },
    {
      "epoch": 2.3022590548758535,
      "grad_norm": 0.7636842727661133,
      "learning_rate": 5.431033914722298e-06,
      "loss": 0.0179,
      "step": 1406800
    },
    {
      "epoch": 2.3022917853145066,
      "grad_norm": 0.09397783875465393,
      "learning_rate": 5.43096802250878e-06,
      "loss": 0.0137,
      "step": 1406820
    },
    {
      "epoch": 2.30232451575316,
      "grad_norm": 0.11283689737319946,
      "learning_rate": 5.430902130295264e-06,
      "loss": 0.0138,
      "step": 1406840
    },
    {
      "epoch": 2.3023572461918134,
      "grad_norm": 0.194271519780159,
      "learning_rate": 5.4308362380817456e-06,
      "loss": 0.0148,
      "step": 1406860
    },
    {
      "epoch": 2.302389976630467,
      "grad_norm": 0.4074185788631439,
      "learning_rate": 5.430770345868229e-06,
      "loss": 0.0119,
      "step": 1406880
    },
    {
      "epoch": 2.30242270706912,
      "grad_norm": 0.7782551050186157,
      "learning_rate": 5.430704453654712e-06,
      "loss": 0.014,
      "step": 1406900
    },
    {
      "epoch": 2.3024554375077733,
      "grad_norm": 0.7046252489089966,
      "learning_rate": 5.430638561441195e-06,
      "loss": 0.0142,
      "step": 1406920
    },
    {
      "epoch": 2.302488167946427,
      "grad_norm": 1.254173755645752,
      "learning_rate": 5.430572669227678e-06,
      "loss": 0.014,
      "step": 1406940
    },
    {
      "epoch": 2.30252089838508,
      "grad_norm": 0.5647218823432922,
      "learning_rate": 5.430506777014161e-06,
      "loss": 0.0114,
      "step": 1406960
    },
    {
      "epoch": 2.3025536288237336,
      "grad_norm": 0.47023990750312805,
      "learning_rate": 5.430440884800644e-06,
      "loss": 0.0142,
      "step": 1406980
    },
    {
      "epoch": 2.3025863592623868,
      "grad_norm": 0.404318630695343,
      "learning_rate": 5.4303749925871264e-06,
      "loss": 0.0174,
      "step": 1407000
    },
    {
      "epoch": 2.3026190897010403,
      "grad_norm": 0.36645206809043884,
      "learning_rate": 5.43030910037361e-06,
      "loss": 0.0113,
      "step": 1407020
    },
    {
      "epoch": 2.3026518201396935,
      "grad_norm": 1.6833653450012207,
      "learning_rate": 5.430243208160092e-06,
      "loss": 0.015,
      "step": 1407040
    },
    {
      "epoch": 2.3026845505783466,
      "grad_norm": 0.570134699344635,
      "learning_rate": 5.4301773159465755e-06,
      "loss": 0.0176,
      "step": 1407060
    },
    {
      "epoch": 2.3027172810170002,
      "grad_norm": 0.26807382702827454,
      "learning_rate": 5.430111423733057e-06,
      "loss": 0.0142,
      "step": 1407080
    },
    {
      "epoch": 2.3027500114556534,
      "grad_norm": 0.1768169403076172,
      "learning_rate": 5.430045531519541e-06,
      "loss": 0.015,
      "step": 1407100
    },
    {
      "epoch": 2.302782741894307,
      "grad_norm": 0.28791213035583496,
      "learning_rate": 5.429979639306024e-06,
      "loss": 0.0133,
      "step": 1407120
    },
    {
      "epoch": 2.30281547233296,
      "grad_norm": 0.2671525478363037,
      "learning_rate": 5.4299137470925065e-06,
      "loss": 0.0149,
      "step": 1407140
    },
    {
      "epoch": 2.3028482027716137,
      "grad_norm": 0.24803340435028076,
      "learning_rate": 5.429847854878989e-06,
      "loss": 0.0158,
      "step": 1407160
    },
    {
      "epoch": 2.302880933210267,
      "grad_norm": 0.533937394618988,
      "learning_rate": 5.429781962665473e-06,
      "loss": 0.0219,
      "step": 1407180
    },
    {
      "epoch": 2.30291366364892,
      "grad_norm": 0.4536762237548828,
      "learning_rate": 5.429716070451955e-06,
      "loss": 0.0133,
      "step": 1407200
    },
    {
      "epoch": 2.3029463940875736,
      "grad_norm": 0.06599044054746628,
      "learning_rate": 5.429650178238438e-06,
      "loss": 0.0091,
      "step": 1407220
    },
    {
      "epoch": 2.3029791245262268,
      "grad_norm": 0.24173523485660553,
      "learning_rate": 5.42958428602492e-06,
      "loss": 0.0126,
      "step": 1407240
    },
    {
      "epoch": 2.3030118549648804,
      "grad_norm": 0.40348803997039795,
      "learning_rate": 5.429518393811404e-06,
      "loss": 0.0136,
      "step": 1407260
    },
    {
      "epoch": 2.3030445854035335,
      "grad_norm": 0.14124973118305206,
      "learning_rate": 5.429452501597887e-06,
      "loss": 0.016,
      "step": 1407280
    },
    {
      "epoch": 2.303077315842187,
      "grad_norm": 0.33356401324272156,
      "learning_rate": 5.429386609384369e-06,
      "loss": 0.0145,
      "step": 1407300
    },
    {
      "epoch": 2.3031100462808403,
      "grad_norm": 0.5224781632423401,
      "learning_rate": 5.429320717170853e-06,
      "loss": 0.0174,
      "step": 1407320
    },
    {
      "epoch": 2.3031427767194934,
      "grad_norm": 0.6431076526641846,
      "learning_rate": 5.429254824957335e-06,
      "loss": 0.0207,
      "step": 1407340
    },
    {
      "epoch": 2.303175507158147,
      "grad_norm": 0.3486597239971161,
      "learning_rate": 5.429188932743818e-06,
      "loss": 0.016,
      "step": 1407360
    },
    {
      "epoch": 2.3032082375968,
      "grad_norm": 0.377797394990921,
      "learning_rate": 5.429123040530301e-06,
      "loss": 0.0105,
      "step": 1407380
    },
    {
      "epoch": 2.3032409680354538,
      "grad_norm": 0.3754490315914154,
      "learning_rate": 5.429057148316785e-06,
      "loss": 0.0137,
      "step": 1407400
    },
    {
      "epoch": 2.303273698474107,
      "grad_norm": 0.556452751159668,
      "learning_rate": 5.4289912561032665e-06,
      "loss": 0.0161,
      "step": 1407420
    },
    {
      "epoch": 2.3033064289127605,
      "grad_norm": 0.38449642062187195,
      "learning_rate": 5.42892536388975e-06,
      "loss": 0.0112,
      "step": 1407440
    },
    {
      "epoch": 2.3033391593514136,
      "grad_norm": 0.17015936970710754,
      "learning_rate": 5.428859471676232e-06,
      "loss": 0.0123,
      "step": 1407460
    },
    {
      "epoch": 2.303371889790067,
      "grad_norm": 0.37948861718177795,
      "learning_rate": 5.428793579462716e-06,
      "loss": 0.016,
      "step": 1407480
    },
    {
      "epoch": 2.3034046202287204,
      "grad_norm": 0.27862346172332764,
      "learning_rate": 5.4287276872491975e-06,
      "loss": 0.0101,
      "step": 1407500
    },
    {
      "epoch": 2.3034373506673735,
      "grad_norm": 0.8409417271614075,
      "learning_rate": 5.428661795035681e-06,
      "loss": 0.0116,
      "step": 1407520
    },
    {
      "epoch": 2.303470081106027,
      "grad_norm": 0.30816397070884705,
      "learning_rate": 5.428595902822164e-06,
      "loss": 0.0147,
      "step": 1407540
    },
    {
      "epoch": 2.3035028115446803,
      "grad_norm": 0.6620759963989258,
      "learning_rate": 5.4285300106086465e-06,
      "loss": 0.0133,
      "step": 1407560
    },
    {
      "epoch": 2.303535541983334,
      "grad_norm": 0.9247555136680603,
      "learning_rate": 5.428464118395129e-06,
      "loss": 0.0145,
      "step": 1407580
    },
    {
      "epoch": 2.303568272421987,
      "grad_norm": 0.3751009404659271,
      "learning_rate": 5.428398226181613e-06,
      "loss": 0.0143,
      "step": 1407600
    },
    {
      "epoch": 2.30360100286064,
      "grad_norm": 0.42750972509384155,
      "learning_rate": 5.428332333968095e-06,
      "loss": 0.0133,
      "step": 1407620
    },
    {
      "epoch": 2.3036337332992938,
      "grad_norm": 0.46438732743263245,
      "learning_rate": 5.428266441754578e-06,
      "loss": 0.0155,
      "step": 1407640
    },
    {
      "epoch": 2.303666463737947,
      "grad_norm": 0.6029215455055237,
      "learning_rate": 5.428200549541062e-06,
      "loss": 0.0109,
      "step": 1407660
    },
    {
      "epoch": 2.3036991941766005,
      "grad_norm": 0.1223142072558403,
      "learning_rate": 5.428134657327544e-06,
      "loss": 0.011,
      "step": 1407680
    },
    {
      "epoch": 2.3037319246152537,
      "grad_norm": 0.3438682556152344,
      "learning_rate": 5.428068765114027e-06,
      "loss": 0.0236,
      "step": 1407700
    },
    {
      "epoch": 2.3037646550539073,
      "grad_norm": 0.29481685161590576,
      "learning_rate": 5.428002872900509e-06,
      "loss": 0.0108,
      "step": 1407720
    },
    {
      "epoch": 2.3037973854925604,
      "grad_norm": 0.23469777405261993,
      "learning_rate": 5.427936980686993e-06,
      "loss": 0.0098,
      "step": 1407740
    },
    {
      "epoch": 2.3038301159312136,
      "grad_norm": 0.20012858510017395,
      "learning_rate": 5.427871088473476e-06,
      "loss": 0.0155,
      "step": 1407760
    },
    {
      "epoch": 2.303862846369867,
      "grad_norm": 0.08790306746959686,
      "learning_rate": 5.427805196259958e-06,
      "loss": 0.0121,
      "step": 1407780
    },
    {
      "epoch": 2.3038955768085203,
      "grad_norm": 0.38493457436561584,
      "learning_rate": 5.427739304046441e-06,
      "loss": 0.024,
      "step": 1407800
    },
    {
      "epoch": 2.303928307247174,
      "grad_norm": 0.8402179479598999,
      "learning_rate": 5.427673411832925e-06,
      "loss": 0.0153,
      "step": 1407820
    },
    {
      "epoch": 2.303961037685827,
      "grad_norm": 0.4841920733451843,
      "learning_rate": 5.427607519619407e-06,
      "loss": 0.0177,
      "step": 1407840
    },
    {
      "epoch": 2.3039937681244806,
      "grad_norm": 0.4997067451477051,
      "learning_rate": 5.42754162740589e-06,
      "loss": 0.0162,
      "step": 1407860
    },
    {
      "epoch": 2.304026498563134,
      "grad_norm": 0.5367109179496765,
      "learning_rate": 5.427475735192372e-06,
      "loss": 0.0151,
      "step": 1407880
    },
    {
      "epoch": 2.304059229001787,
      "grad_norm": 0.4119302034378052,
      "learning_rate": 5.427409842978856e-06,
      "loss": 0.0075,
      "step": 1407900
    },
    {
      "epoch": 2.3040919594404405,
      "grad_norm": 0.8048210740089417,
      "learning_rate": 5.427343950765338e-06,
      "loss": 0.0188,
      "step": 1407920
    },
    {
      "epoch": 2.3041246898790937,
      "grad_norm": 0.21397921442985535,
      "learning_rate": 5.427278058551821e-06,
      "loss": 0.0108,
      "step": 1407940
    },
    {
      "epoch": 2.3041574203177473,
      "grad_norm": 1.8276158571243286,
      "learning_rate": 5.427212166338304e-06,
      "loss": 0.0158,
      "step": 1407960
    },
    {
      "epoch": 2.3041901507564004,
      "grad_norm": 0.21831999719142914,
      "learning_rate": 5.4271462741247875e-06,
      "loss": 0.0075,
      "step": 1407980
    },
    {
      "epoch": 2.3042228811950536,
      "grad_norm": 0.16828323900699615,
      "learning_rate": 5.42708038191127e-06,
      "loss": 0.0181,
      "step": 1408000
    },
    {
      "epoch": 2.304255611633707,
      "grad_norm": 0.5723234415054321,
      "learning_rate": 5.427014489697753e-06,
      "loss": 0.019,
      "step": 1408020
    },
    {
      "epoch": 2.3042883420723603,
      "grad_norm": 1.3610622882843018,
      "learning_rate": 5.4269485974842365e-06,
      "loss": 0.0113,
      "step": 1408040
    },
    {
      "epoch": 2.304321072511014,
      "grad_norm": 0.2543773055076599,
      "learning_rate": 5.4268827052707184e-06,
      "loss": 0.0121,
      "step": 1408060
    },
    {
      "epoch": 2.304353802949667,
      "grad_norm": 0.903862476348877,
      "learning_rate": 5.426816813057202e-06,
      "loss": 0.0207,
      "step": 1408080
    },
    {
      "epoch": 2.3043865333883202,
      "grad_norm": 0.7507941722869873,
      "learning_rate": 5.426750920843684e-06,
      "loss": 0.0096,
      "step": 1408100
    },
    {
      "epoch": 2.304419263826974,
      "grad_norm": 0.2143545001745224,
      "learning_rate": 5.4266850286301675e-06,
      "loss": 0.0128,
      "step": 1408120
    },
    {
      "epoch": 2.304451994265627,
      "grad_norm": 0.5182974338531494,
      "learning_rate": 5.42661913641665e-06,
      "loss": 0.0266,
      "step": 1408140
    },
    {
      "epoch": 2.3044847247042806,
      "grad_norm": 0.49663522839546204,
      "learning_rate": 5.426553244203133e-06,
      "loss": 0.0127,
      "step": 1408160
    },
    {
      "epoch": 2.3045174551429337,
      "grad_norm": 0.6855957508087158,
      "learning_rate": 5.426487351989616e-06,
      "loss": 0.0193,
      "step": 1408180
    },
    {
      "epoch": 2.3045501855815873,
      "grad_norm": 1.5268698930740356,
      "learning_rate": 5.426421459776099e-06,
      "loss": 0.0144,
      "step": 1408200
    },
    {
      "epoch": 2.3045829160202405,
      "grad_norm": 0.1560370773077011,
      "learning_rate": 5.426355567562581e-06,
      "loss": 0.0115,
      "step": 1408220
    },
    {
      "epoch": 2.3046156464588936,
      "grad_norm": 0.19017907977104187,
      "learning_rate": 5.426289675349065e-06,
      "loss": 0.0148,
      "step": 1408240
    },
    {
      "epoch": 2.304648376897547,
      "grad_norm": 0.6084246635437012,
      "learning_rate": 5.426223783135547e-06,
      "loss": 0.0148,
      "step": 1408260
    },
    {
      "epoch": 2.3046811073362004,
      "grad_norm": 0.5311723947525024,
      "learning_rate": 5.42615789092203e-06,
      "loss": 0.0147,
      "step": 1408280
    },
    {
      "epoch": 2.304713837774854,
      "grad_norm": 0.17324955761432648,
      "learning_rate": 5.426091998708512e-06,
      "loss": 0.0169,
      "step": 1408300
    },
    {
      "epoch": 2.304746568213507,
      "grad_norm": 0.11948781460523605,
      "learning_rate": 5.426026106494996e-06,
      "loss": 0.0142,
      "step": 1408320
    },
    {
      "epoch": 2.3047792986521607,
      "grad_norm": 0.2097044736146927,
      "learning_rate": 5.425960214281479e-06,
      "loss": 0.0131,
      "step": 1408340
    },
    {
      "epoch": 2.304812029090814,
      "grad_norm": 0.8101388216018677,
      "learning_rate": 5.425894322067961e-06,
      "loss": 0.0128,
      "step": 1408360
    },
    {
      "epoch": 2.304844759529467,
      "grad_norm": 0.45460930466651917,
      "learning_rate": 5.425828429854445e-06,
      "loss": 0.0196,
      "step": 1408380
    },
    {
      "epoch": 2.3048774899681206,
      "grad_norm": 0.13829319179058075,
      "learning_rate": 5.4257625376409275e-06,
      "loss": 0.0088,
      "step": 1408400
    },
    {
      "epoch": 2.3049102204067737,
      "grad_norm": 0.3082553446292877,
      "learning_rate": 5.42569664542741e-06,
      "loss": 0.0165,
      "step": 1408420
    },
    {
      "epoch": 2.3049429508454273,
      "grad_norm": 0.3036629259586334,
      "learning_rate": 5.425630753213893e-06,
      "loss": 0.0138,
      "step": 1408440
    },
    {
      "epoch": 2.3049756812840805,
      "grad_norm": 0.11145444959402084,
      "learning_rate": 5.425564861000377e-06,
      "loss": 0.0149,
      "step": 1408460
    },
    {
      "epoch": 2.305008411722734,
      "grad_norm": 0.2961762845516205,
      "learning_rate": 5.4254989687868585e-06,
      "loss": 0.0166,
      "step": 1408480
    },
    {
      "epoch": 2.3050411421613872,
      "grad_norm": 0.6307346224784851,
      "learning_rate": 5.425433076573342e-06,
      "loss": 0.0169,
      "step": 1408500
    },
    {
      "epoch": 2.3050738726000404,
      "grad_norm": 0.1910514086484909,
      "learning_rate": 5.425367184359824e-06,
      "loss": 0.0121,
      "step": 1408520
    },
    {
      "epoch": 2.305106603038694,
      "grad_norm": 0.15818312764167786,
      "learning_rate": 5.4253012921463076e-06,
      "loss": 0.029,
      "step": 1408540
    },
    {
      "epoch": 2.305139333477347,
      "grad_norm": 0.6778191328048706,
      "learning_rate": 5.42523539993279e-06,
      "loss": 0.0091,
      "step": 1408560
    },
    {
      "epoch": 2.3051720639160007,
      "grad_norm": 0.20060059428215027,
      "learning_rate": 5.425169507719273e-06,
      "loss": 0.0147,
      "step": 1408580
    },
    {
      "epoch": 2.305204794354654,
      "grad_norm": 0.5614859461784363,
      "learning_rate": 5.425103615505756e-06,
      "loss": 0.0213,
      "step": 1408600
    },
    {
      "epoch": 2.3052375247933075,
      "grad_norm": 0.3832175135612488,
      "learning_rate": 5.425037723292239e-06,
      "loss": 0.0222,
      "step": 1408620
    },
    {
      "epoch": 2.3052702552319606,
      "grad_norm": 0.30869704484939575,
      "learning_rate": 5.424971831078721e-06,
      "loss": 0.0247,
      "step": 1408640
    },
    {
      "epoch": 2.3053029856706138,
      "grad_norm": 0.6238679885864258,
      "learning_rate": 5.424905938865205e-06,
      "loss": 0.0147,
      "step": 1408660
    },
    {
      "epoch": 2.3053357161092674,
      "grad_norm": 0.7187411785125732,
      "learning_rate": 5.424840046651687e-06,
      "loss": 0.014,
      "step": 1408680
    },
    {
      "epoch": 2.3053684465479205,
      "grad_norm": 0.2794160544872284,
      "learning_rate": 5.42477415443817e-06,
      "loss": 0.0105,
      "step": 1408700
    },
    {
      "epoch": 2.305401176986574,
      "grad_norm": 0.5679613947868347,
      "learning_rate": 5.424708262224654e-06,
      "loss": 0.0168,
      "step": 1408720
    },
    {
      "epoch": 2.3054339074252272,
      "grad_norm": 0.1878216713666916,
      "learning_rate": 5.424642370011136e-06,
      "loss": 0.0168,
      "step": 1408740
    },
    {
      "epoch": 2.305466637863881,
      "grad_norm": 0.23722538352012634,
      "learning_rate": 5.424576477797619e-06,
      "loss": 0.0163,
      "step": 1408760
    },
    {
      "epoch": 2.305499368302534,
      "grad_norm": 0.27309414744377136,
      "learning_rate": 5.424510585584102e-06,
      "loss": 0.0126,
      "step": 1408780
    },
    {
      "epoch": 2.305532098741187,
      "grad_norm": 0.2151591032743454,
      "learning_rate": 5.424444693370585e-06,
      "loss": 0.0105,
      "step": 1408800
    },
    {
      "epoch": 2.3055648291798407,
      "grad_norm": 0.2412533313035965,
      "learning_rate": 5.424378801157068e-06,
      "loss": 0.0193,
      "step": 1408820
    },
    {
      "epoch": 2.305597559618494,
      "grad_norm": 0.338722825050354,
      "learning_rate": 5.424312908943551e-06,
      "loss": 0.0139,
      "step": 1408840
    },
    {
      "epoch": 2.3056302900571475,
      "grad_norm": 0.044116437435150146,
      "learning_rate": 5.424247016730033e-06,
      "loss": 0.0148,
      "step": 1408860
    },
    {
      "epoch": 2.3056630204958006,
      "grad_norm": 0.32998794317245483,
      "learning_rate": 5.424181124516517e-06,
      "loss": 0.0154,
      "step": 1408880
    },
    {
      "epoch": 2.3056957509344542,
      "grad_norm": 0.19796721637248993,
      "learning_rate": 5.424115232302999e-06,
      "loss": 0.0105,
      "step": 1408900
    },
    {
      "epoch": 2.3057284813731074,
      "grad_norm": 0.8525535464286804,
      "learning_rate": 5.424049340089482e-06,
      "loss": 0.028,
      "step": 1408920
    },
    {
      "epoch": 2.3057612118117605,
      "grad_norm": 0.08814972639083862,
      "learning_rate": 5.423983447875965e-06,
      "loss": 0.0135,
      "step": 1408940
    },
    {
      "epoch": 2.305793942250414,
      "grad_norm": 0.5517690777778625,
      "learning_rate": 5.423917555662448e-06,
      "loss": 0.0164,
      "step": 1408960
    },
    {
      "epoch": 2.3058266726890673,
      "grad_norm": 0.6867942810058594,
      "learning_rate": 5.42385166344893e-06,
      "loss": 0.0197,
      "step": 1408980
    },
    {
      "epoch": 2.305859403127721,
      "grad_norm": 0.33975252509117126,
      "learning_rate": 5.423785771235414e-06,
      "loss": 0.0184,
      "step": 1409000
    },
    {
      "epoch": 2.305892133566374,
      "grad_norm": 0.2858535349369049,
      "learning_rate": 5.423719879021896e-06,
      "loss": 0.0165,
      "step": 1409020
    },
    {
      "epoch": 2.3059248640050276,
      "grad_norm": 0.7144100666046143,
      "learning_rate": 5.4236539868083794e-06,
      "loss": 0.0173,
      "step": 1409040
    },
    {
      "epoch": 2.3059575944436808,
      "grad_norm": 0.15076197683811188,
      "learning_rate": 5.423588094594863e-06,
      "loss": 0.0202,
      "step": 1409060
    },
    {
      "epoch": 2.305990324882334,
      "grad_norm": 0.2999831736087799,
      "learning_rate": 5.423522202381345e-06,
      "loss": 0.0111,
      "step": 1409080
    },
    {
      "epoch": 2.3060230553209875,
      "grad_norm": 0.6786239743232727,
      "learning_rate": 5.4234563101678285e-06,
      "loss": 0.0172,
      "step": 1409100
    },
    {
      "epoch": 2.3060557857596407,
      "grad_norm": 0.3507033884525299,
      "learning_rate": 5.42339041795431e-06,
      "loss": 0.021,
      "step": 1409120
    },
    {
      "epoch": 2.3060885161982942,
      "grad_norm": 0.29224154353141785,
      "learning_rate": 5.423324525740794e-06,
      "loss": 0.0086,
      "step": 1409140
    },
    {
      "epoch": 2.3061212466369474,
      "grad_norm": 0.2541864514350891,
      "learning_rate": 5.423258633527277e-06,
      "loss": 0.0138,
      "step": 1409160
    },
    {
      "epoch": 2.306153977075601,
      "grad_norm": 0.2788775861263275,
      "learning_rate": 5.4231927413137595e-06,
      "loss": 0.0103,
      "step": 1409180
    },
    {
      "epoch": 2.306186707514254,
      "grad_norm": 0.4157562553882599,
      "learning_rate": 5.423126849100242e-06,
      "loss": 0.0243,
      "step": 1409200
    },
    {
      "epoch": 2.3062194379529073,
      "grad_norm": 0.5661309957504272,
      "learning_rate": 5.423060956886726e-06,
      "loss": 0.0177,
      "step": 1409220
    },
    {
      "epoch": 2.306252168391561,
      "grad_norm": 0.17434926331043243,
      "learning_rate": 5.422995064673208e-06,
      "loss": 0.0141,
      "step": 1409240
    },
    {
      "epoch": 2.306284898830214,
      "grad_norm": 0.2507205605506897,
      "learning_rate": 5.422929172459691e-06,
      "loss": 0.0148,
      "step": 1409260
    },
    {
      "epoch": 2.3063176292688676,
      "grad_norm": 0.6383758187294006,
      "learning_rate": 5.422863280246173e-06,
      "loss": 0.0182,
      "step": 1409280
    },
    {
      "epoch": 2.306350359707521,
      "grad_norm": 0.5525235533714294,
      "learning_rate": 5.422797388032657e-06,
      "loss": 0.0194,
      "step": 1409300
    },
    {
      "epoch": 2.3063830901461744,
      "grad_norm": 0.23973840475082397,
      "learning_rate": 5.422731495819139e-06,
      "loss": 0.0128,
      "step": 1409320
    },
    {
      "epoch": 2.3064158205848275,
      "grad_norm": 0.2710205614566803,
      "learning_rate": 5.422665603605622e-06,
      "loss": 0.0105,
      "step": 1409340
    },
    {
      "epoch": 2.3064485510234807,
      "grad_norm": 0.1487283706665039,
      "learning_rate": 5.422599711392105e-06,
      "loss": 0.0142,
      "step": 1409360
    },
    {
      "epoch": 2.3064812814621343,
      "grad_norm": 0.29202497005462646,
      "learning_rate": 5.422533819178588e-06,
      "loss": 0.0175,
      "step": 1409380
    },
    {
      "epoch": 2.3065140119007874,
      "grad_norm": 0.12276656180620193,
      "learning_rate": 5.422467926965071e-06,
      "loss": 0.0139,
      "step": 1409400
    },
    {
      "epoch": 2.306546742339441,
      "grad_norm": 0.5014123916625977,
      "learning_rate": 5.422402034751554e-06,
      "loss": 0.0186,
      "step": 1409420
    },
    {
      "epoch": 2.306579472778094,
      "grad_norm": 0.198176771402359,
      "learning_rate": 5.422336142538037e-06,
      "loss": 0.0145,
      "step": 1409440
    },
    {
      "epoch": 2.3066122032167478,
      "grad_norm": 0.20705993473529816,
      "learning_rate": 5.4222702503245195e-06,
      "loss": 0.0142,
      "step": 1409460
    },
    {
      "epoch": 2.306644933655401,
      "grad_norm": 0.5478798747062683,
      "learning_rate": 5.422204358111003e-06,
      "loss": 0.0145,
      "step": 1409480
    },
    {
      "epoch": 2.306677664094054,
      "grad_norm": 0.7159548401832581,
      "learning_rate": 5.422138465897485e-06,
      "loss": 0.0165,
      "step": 1409500
    },
    {
      "epoch": 2.3067103945327077,
      "grad_norm": 0.12908652424812317,
      "learning_rate": 5.422072573683969e-06,
      "loss": 0.0109,
      "step": 1409520
    },
    {
      "epoch": 2.306743124971361,
      "grad_norm": 0.2503291368484497,
      "learning_rate": 5.4220066814704505e-06,
      "loss": 0.0123,
      "step": 1409540
    },
    {
      "epoch": 2.3067758554100144,
      "grad_norm": 0.23300126194953918,
      "learning_rate": 5.421940789256934e-06,
      "loss": 0.0187,
      "step": 1409560
    },
    {
      "epoch": 2.3068085858486675,
      "grad_norm": 0.7249649167060852,
      "learning_rate": 5.421874897043417e-06,
      "loss": 0.0164,
      "step": 1409580
    },
    {
      "epoch": 2.3068413162873207,
      "grad_norm": 0.2918720543384552,
      "learning_rate": 5.4218090048298996e-06,
      "loss": 0.0209,
      "step": 1409600
    },
    {
      "epoch": 2.3068740467259743,
      "grad_norm": 0.28001832962036133,
      "learning_rate": 5.421743112616382e-06,
      "loss": 0.0162,
      "step": 1409620
    },
    {
      "epoch": 2.3069067771646274,
      "grad_norm": 1.0921401977539062,
      "learning_rate": 5.421677220402866e-06,
      "loss": 0.0206,
      "step": 1409640
    },
    {
      "epoch": 2.306939507603281,
      "grad_norm": 0.3868829309940338,
      "learning_rate": 5.421611328189348e-06,
      "loss": 0.0177,
      "step": 1409660
    },
    {
      "epoch": 2.306972238041934,
      "grad_norm": 0.7758225798606873,
      "learning_rate": 5.421545435975831e-06,
      "loss": 0.016,
      "step": 1409680
    },
    {
      "epoch": 2.3070049684805873,
      "grad_norm": 0.1864989697933197,
      "learning_rate": 5.421479543762313e-06,
      "loss": 0.0263,
      "step": 1409700
    },
    {
      "epoch": 2.307037698919241,
      "grad_norm": 0.3238602578639984,
      "learning_rate": 5.421413651548797e-06,
      "loss": 0.0133,
      "step": 1409720
    },
    {
      "epoch": 2.307070429357894,
      "grad_norm": 0.3661053776741028,
      "learning_rate": 5.42134775933528e-06,
      "loss": 0.0163,
      "step": 1409740
    },
    {
      "epoch": 2.3071031597965477,
      "grad_norm": 2.2993710041046143,
      "learning_rate": 5.421281867121762e-06,
      "loss": 0.0185,
      "step": 1409760
    },
    {
      "epoch": 2.307135890235201,
      "grad_norm": 0.5759711265563965,
      "learning_rate": 5.421215974908246e-06,
      "loss": 0.0207,
      "step": 1409780
    },
    {
      "epoch": 2.3071686206738544,
      "grad_norm": 0.2432999163866043,
      "learning_rate": 5.421150082694729e-06,
      "loss": 0.0193,
      "step": 1409800
    },
    {
      "epoch": 2.3072013511125076,
      "grad_norm": 0.5772902965545654,
      "learning_rate": 5.421084190481211e-06,
      "loss": 0.0176,
      "step": 1409820
    },
    {
      "epoch": 2.3072340815511607,
      "grad_norm": 0.06177857518196106,
      "learning_rate": 5.421018298267694e-06,
      "loss": 0.0149,
      "step": 1409840
    },
    {
      "epoch": 2.3072668119898143,
      "grad_norm": 0.5886357426643372,
      "learning_rate": 5.420952406054178e-06,
      "loss": 0.0152,
      "step": 1409860
    },
    {
      "epoch": 2.3072995424284675,
      "grad_norm": 0.4584306478500366,
      "learning_rate": 5.42088651384066e-06,
      "loss": 0.0183,
      "step": 1409880
    },
    {
      "epoch": 2.307332272867121,
      "grad_norm": 0.5135151147842407,
      "learning_rate": 5.420820621627143e-06,
      "loss": 0.0106,
      "step": 1409900
    },
    {
      "epoch": 2.307365003305774,
      "grad_norm": 0.3096199035644531,
      "learning_rate": 5.420754729413625e-06,
      "loss": 0.0096,
      "step": 1409920
    },
    {
      "epoch": 2.307397733744428,
      "grad_norm": 0.4189804196357727,
      "learning_rate": 5.420688837200109e-06,
      "loss": 0.0165,
      "step": 1409940
    },
    {
      "epoch": 2.307430464183081,
      "grad_norm": 0.23970754444599152,
      "learning_rate": 5.420622944986591e-06,
      "loss": 0.0112,
      "step": 1409960
    },
    {
      "epoch": 2.307463194621734,
      "grad_norm": 0.2994324862957001,
      "learning_rate": 5.420557052773074e-06,
      "loss": 0.0087,
      "step": 1409980
    },
    {
      "epoch": 2.3074959250603877,
      "grad_norm": 0.5356078743934631,
      "learning_rate": 5.420491160559557e-06,
      "loss": 0.0204,
      "step": 1410000
    },
    {
      "epoch": 2.307528655499041,
      "grad_norm": 0.22396883368492126,
      "learning_rate": 5.4204252683460405e-06,
      "loss": 0.0143,
      "step": 1410020
    },
    {
      "epoch": 2.3075613859376944,
      "grad_norm": 0.10042007267475128,
      "learning_rate": 5.420359376132522e-06,
      "loss": 0.0087,
      "step": 1410040
    },
    {
      "epoch": 2.3075941163763476,
      "grad_norm": 0.13506056368350983,
      "learning_rate": 5.420293483919006e-06,
      "loss": 0.0207,
      "step": 1410060
    },
    {
      "epoch": 2.307626846815001,
      "grad_norm": 0.12174347043037415,
      "learning_rate": 5.420227591705488e-06,
      "loss": 0.0158,
      "step": 1410080
    },
    {
      "epoch": 2.3076595772536543,
      "grad_norm": 0.12909942865371704,
      "learning_rate": 5.4201616994919714e-06,
      "loss": 0.009,
      "step": 1410100
    },
    {
      "epoch": 2.3076923076923075,
      "grad_norm": 0.11427488178014755,
      "learning_rate": 5.420095807278455e-06,
      "loss": 0.0106,
      "step": 1410120
    },
    {
      "epoch": 2.307725038130961,
      "grad_norm": 0.19686846435070038,
      "learning_rate": 5.420029915064937e-06,
      "loss": 0.0171,
      "step": 1410140
    },
    {
      "epoch": 2.3077577685696142,
      "grad_norm": 0.5295214653015137,
      "learning_rate": 5.4199640228514205e-06,
      "loss": 0.011,
      "step": 1410160
    },
    {
      "epoch": 2.307790499008268,
      "grad_norm": 3.034972667694092,
      "learning_rate": 5.419898130637903e-06,
      "loss": 0.0169,
      "step": 1410180
    },
    {
      "epoch": 2.307823229446921,
      "grad_norm": 0.2511778473854065,
      "learning_rate": 5.419832238424386e-06,
      "loss": 0.0128,
      "step": 1410200
    },
    {
      "epoch": 2.3078559598855746,
      "grad_norm": 1.1704589128494263,
      "learning_rate": 5.419766346210869e-06,
      "loss": 0.0134,
      "step": 1410220
    },
    {
      "epoch": 2.3078886903242277,
      "grad_norm": 0.5851266384124756,
      "learning_rate": 5.419700453997352e-06,
      "loss": 0.0194,
      "step": 1410240
    },
    {
      "epoch": 2.307921420762881,
      "grad_norm": 1.061413049697876,
      "learning_rate": 5.419634561783834e-06,
      "loss": 0.017,
      "step": 1410260
    },
    {
      "epoch": 2.3079541512015345,
      "grad_norm": 0.06077620014548302,
      "learning_rate": 5.419568669570318e-06,
      "loss": 0.018,
      "step": 1410280
    },
    {
      "epoch": 2.3079868816401876,
      "grad_norm": 0.058667998760938644,
      "learning_rate": 5.4195027773568e-06,
      "loss": 0.0153,
      "step": 1410300
    },
    {
      "epoch": 2.308019612078841,
      "grad_norm": 0.1834506094455719,
      "learning_rate": 5.419436885143283e-06,
      "loss": 0.0123,
      "step": 1410320
    },
    {
      "epoch": 2.3080523425174944,
      "grad_norm": 0.5766171813011169,
      "learning_rate": 5.419370992929765e-06,
      "loss": 0.0117,
      "step": 1410340
    },
    {
      "epoch": 2.308085072956148,
      "grad_norm": 0.5198094844818115,
      "learning_rate": 5.419305100716249e-06,
      "loss": 0.0218,
      "step": 1410360
    },
    {
      "epoch": 2.308117803394801,
      "grad_norm": 0.6210351586341858,
      "learning_rate": 5.4192392085027315e-06,
      "loss": 0.0143,
      "step": 1410380
    },
    {
      "epoch": 2.3081505338334543,
      "grad_norm": 0.22192639112472534,
      "learning_rate": 5.419173316289214e-06,
      "loss": 0.0158,
      "step": 1410400
    },
    {
      "epoch": 2.308183264272108,
      "grad_norm": 0.31945523619651794,
      "learning_rate": 5.419107424075697e-06,
      "loss": 0.0208,
      "step": 1410420
    },
    {
      "epoch": 2.308215994710761,
      "grad_norm": 1.4694815874099731,
      "learning_rate": 5.4190415318621805e-06,
      "loss": 0.0243,
      "step": 1410440
    },
    {
      "epoch": 2.3082487251494146,
      "grad_norm": 1.0776562690734863,
      "learning_rate": 5.418975639648663e-06,
      "loss": 0.0159,
      "step": 1410460
    },
    {
      "epoch": 2.3082814555880677,
      "grad_norm": 0.5364329814910889,
      "learning_rate": 5.418909747435146e-06,
      "loss": 0.0132,
      "step": 1410480
    },
    {
      "epoch": 2.3083141860267213,
      "grad_norm": 0.5809217691421509,
      "learning_rate": 5.41884385522163e-06,
      "loss": 0.017,
      "step": 1410500
    },
    {
      "epoch": 2.3083469164653745,
      "grad_norm": 0.22702562808990479,
      "learning_rate": 5.4187779630081115e-06,
      "loss": 0.0195,
      "step": 1410520
    },
    {
      "epoch": 2.3083796469040276,
      "grad_norm": 0.09839265048503876,
      "learning_rate": 5.418712070794595e-06,
      "loss": 0.0135,
      "step": 1410540
    },
    {
      "epoch": 2.3084123773426812,
      "grad_norm": 0.4730444848537445,
      "learning_rate": 5.418646178581077e-06,
      "loss": 0.0099,
      "step": 1410560
    },
    {
      "epoch": 2.3084451077813344,
      "grad_norm": 0.17872631549835205,
      "learning_rate": 5.4185802863675606e-06,
      "loss": 0.015,
      "step": 1410580
    },
    {
      "epoch": 2.308477838219988,
      "grad_norm": 0.8110613226890564,
      "learning_rate": 5.418514394154043e-06,
      "loss": 0.0149,
      "step": 1410600
    },
    {
      "epoch": 2.308510568658641,
      "grad_norm": 0.13190145790576935,
      "learning_rate": 5.418448501940526e-06,
      "loss": 0.0192,
      "step": 1410620
    },
    {
      "epoch": 2.3085432990972947,
      "grad_norm": 0.21195237338542938,
      "learning_rate": 5.418382609727009e-06,
      "loss": 0.0184,
      "step": 1410640
    },
    {
      "epoch": 2.308576029535948,
      "grad_norm": 0.09670460224151611,
      "learning_rate": 5.418316717513492e-06,
      "loss": 0.0132,
      "step": 1410660
    },
    {
      "epoch": 2.308608759974601,
      "grad_norm": 0.22570526599884033,
      "learning_rate": 5.418250825299974e-06,
      "loss": 0.0169,
      "step": 1410680
    },
    {
      "epoch": 2.3086414904132546,
      "grad_norm": 0.34098318219184875,
      "learning_rate": 5.418184933086458e-06,
      "loss": 0.0097,
      "step": 1410700
    },
    {
      "epoch": 2.3086742208519078,
      "grad_norm": 0.14982348680496216,
      "learning_rate": 5.41811904087294e-06,
      "loss": 0.0226,
      "step": 1410720
    },
    {
      "epoch": 2.3087069512905614,
      "grad_norm": 0.5703802704811096,
      "learning_rate": 5.418053148659423e-06,
      "loss": 0.015,
      "step": 1410740
    },
    {
      "epoch": 2.3087396817292145,
      "grad_norm": 1.0870404243469238,
      "learning_rate": 5.417987256445906e-06,
      "loss": 0.0137,
      "step": 1410760
    },
    {
      "epoch": 2.308772412167868,
      "grad_norm": 0.8023574352264404,
      "learning_rate": 5.417921364232389e-06,
      "loss": 0.0154,
      "step": 1410780
    },
    {
      "epoch": 2.3088051426065213,
      "grad_norm": 0.6090697646141052,
      "learning_rate": 5.417855472018872e-06,
      "loss": 0.0137,
      "step": 1410800
    },
    {
      "epoch": 2.3088378730451744,
      "grad_norm": 0.8063990473747253,
      "learning_rate": 5.417789579805355e-06,
      "loss": 0.0166,
      "step": 1410820
    },
    {
      "epoch": 2.308870603483828,
      "grad_norm": 0.18424805998802185,
      "learning_rate": 5.417723687591838e-06,
      "loss": 0.0162,
      "step": 1410840
    },
    {
      "epoch": 2.308903333922481,
      "grad_norm": 0.29809024930000305,
      "learning_rate": 5.417657795378321e-06,
      "loss": 0.0126,
      "step": 1410860
    },
    {
      "epoch": 2.3089360643611347,
      "grad_norm": 0.34606045484542847,
      "learning_rate": 5.417591903164804e-06,
      "loss": 0.0223,
      "step": 1410880
    },
    {
      "epoch": 2.308968794799788,
      "grad_norm": 0.2142372727394104,
      "learning_rate": 5.417526010951286e-06,
      "loss": 0.0127,
      "step": 1410900
    },
    {
      "epoch": 2.3090015252384415,
      "grad_norm": 0.2854716181755066,
      "learning_rate": 5.41746011873777e-06,
      "loss": 0.0117,
      "step": 1410920
    },
    {
      "epoch": 2.3090342556770946,
      "grad_norm": 0.04191642627120018,
      "learning_rate": 5.417394226524252e-06,
      "loss": 0.0116,
      "step": 1410940
    },
    {
      "epoch": 2.309066986115748,
      "grad_norm": 0.5296351909637451,
      "learning_rate": 5.417328334310735e-06,
      "loss": 0.0102,
      "step": 1410960
    },
    {
      "epoch": 2.3090997165544014,
      "grad_norm": 1.0574545860290527,
      "learning_rate": 5.417262442097218e-06,
      "loss": 0.0225,
      "step": 1410980
    },
    {
      "epoch": 2.3091324469930545,
      "grad_norm": 0.3496411442756653,
      "learning_rate": 5.417196549883701e-06,
      "loss": 0.0151,
      "step": 1411000
    },
    {
      "epoch": 2.309165177431708,
      "grad_norm": 0.42972391843795776,
      "learning_rate": 5.417130657670183e-06,
      "loss": 0.0155,
      "step": 1411020
    },
    {
      "epoch": 2.3091979078703613,
      "grad_norm": 1.4280893802642822,
      "learning_rate": 5.417064765456667e-06,
      "loss": 0.0182,
      "step": 1411040
    },
    {
      "epoch": 2.3092306383090144,
      "grad_norm": 0.1359616219997406,
      "learning_rate": 5.416998873243149e-06,
      "loss": 0.0169,
      "step": 1411060
    },
    {
      "epoch": 2.309263368747668,
      "grad_norm": 0.5907509326934814,
      "learning_rate": 5.4169329810296325e-06,
      "loss": 0.0113,
      "step": 1411080
    },
    {
      "epoch": 2.309296099186321,
      "grad_norm": 0.25896793603897095,
      "learning_rate": 5.416867088816114e-06,
      "loss": 0.0108,
      "step": 1411100
    },
    {
      "epoch": 2.3093288296249748,
      "grad_norm": 0.1726517230272293,
      "learning_rate": 5.416801196602598e-06,
      "loss": 0.0127,
      "step": 1411120
    },
    {
      "epoch": 2.309361560063628,
      "grad_norm": 0.46074721217155457,
      "learning_rate": 5.41673530438908e-06,
      "loss": 0.0253,
      "step": 1411140
    },
    {
      "epoch": 2.309394290502281,
      "grad_norm": 0.2924458086490631,
      "learning_rate": 5.416669412175563e-06,
      "loss": 0.0132,
      "step": 1411160
    },
    {
      "epoch": 2.3094270209409347,
      "grad_norm": 0.5416089296340942,
      "learning_rate": 5.416603519962047e-06,
      "loss": 0.0135,
      "step": 1411180
    },
    {
      "epoch": 2.309459751379588,
      "grad_norm": 0.7341709733009338,
      "learning_rate": 5.416537627748529e-06,
      "loss": 0.0186,
      "step": 1411200
    },
    {
      "epoch": 2.3094924818182414,
      "grad_norm": 0.7213755249977112,
      "learning_rate": 5.4164717355350125e-06,
      "loss": 0.0204,
      "step": 1411220
    },
    {
      "epoch": 2.3095252122568946,
      "grad_norm": 0.2817839980125427,
      "learning_rate": 5.416405843321495e-06,
      "loss": 0.0102,
      "step": 1411240
    },
    {
      "epoch": 2.309557942695548,
      "grad_norm": 0.4181440472602844,
      "learning_rate": 5.416339951107979e-06,
      "loss": 0.0172,
      "step": 1411260
    },
    {
      "epoch": 2.3095906731342013,
      "grad_norm": 0.24369457364082336,
      "learning_rate": 5.416274058894461e-06,
      "loss": 0.0141,
      "step": 1411280
    },
    {
      "epoch": 2.3096234035728544,
      "grad_norm": 0.5722770690917969,
      "learning_rate": 5.416208166680944e-06,
      "loss": 0.0155,
      "step": 1411300
    },
    {
      "epoch": 2.309656134011508,
      "grad_norm": 0.12030599266290665,
      "learning_rate": 5.416142274467426e-06,
      "loss": 0.0114,
      "step": 1411320
    },
    {
      "epoch": 2.309688864450161,
      "grad_norm": 0.5962175726890564,
      "learning_rate": 5.41607638225391e-06,
      "loss": 0.0222,
      "step": 1411340
    },
    {
      "epoch": 2.309721594888815,
      "grad_norm": 0.3173196017742157,
      "learning_rate": 5.416010490040392e-06,
      "loss": 0.0176,
      "step": 1411360
    },
    {
      "epoch": 2.309754325327468,
      "grad_norm": 0.19500720500946045,
      "learning_rate": 5.415944597826875e-06,
      "loss": 0.0139,
      "step": 1411380
    },
    {
      "epoch": 2.3097870557661215,
      "grad_norm": 0.4857257902622223,
      "learning_rate": 5.415878705613358e-06,
      "loss": 0.0216,
      "step": 1411400
    },
    {
      "epoch": 2.3098197862047747,
      "grad_norm": 0.13987217843532562,
      "learning_rate": 5.415812813399841e-06,
      "loss": 0.012,
      "step": 1411420
    },
    {
      "epoch": 2.309852516643428,
      "grad_norm": 0.25216639041900635,
      "learning_rate": 5.4157469211863235e-06,
      "loss": 0.0124,
      "step": 1411440
    },
    {
      "epoch": 2.3098852470820814,
      "grad_norm": 0.24751253426074982,
      "learning_rate": 5.415681028972807e-06,
      "loss": 0.0101,
      "step": 1411460
    },
    {
      "epoch": 2.3099179775207346,
      "grad_norm": 0.8682113289833069,
      "learning_rate": 5.415615136759289e-06,
      "loss": 0.0187,
      "step": 1411480
    },
    {
      "epoch": 2.309950707959388,
      "grad_norm": 0.27218446135520935,
      "learning_rate": 5.4155492445457725e-06,
      "loss": 0.0182,
      "step": 1411500
    },
    {
      "epoch": 2.3099834383980413,
      "grad_norm": 0.8182938098907471,
      "learning_rate": 5.415483352332256e-06,
      "loss": 0.0207,
      "step": 1411520
    },
    {
      "epoch": 2.310016168836695,
      "grad_norm": 0.6418668627738953,
      "learning_rate": 5.415417460118738e-06,
      "loss": 0.012,
      "step": 1411540
    },
    {
      "epoch": 2.310048899275348,
      "grad_norm": 0.2690953314304352,
      "learning_rate": 5.415351567905222e-06,
      "loss": 0.018,
      "step": 1411560
    },
    {
      "epoch": 2.310081629714001,
      "grad_norm": 0.22204433381557465,
      "learning_rate": 5.4152856756917035e-06,
      "loss": 0.0172,
      "step": 1411580
    },
    {
      "epoch": 2.310114360152655,
      "grad_norm": 0.20230379700660706,
      "learning_rate": 5.415219783478187e-06,
      "loss": 0.0207,
      "step": 1411600
    },
    {
      "epoch": 2.310147090591308,
      "grad_norm": 1.1821811199188232,
      "learning_rate": 5.41515389126467e-06,
      "loss": 0.0161,
      "step": 1411620
    },
    {
      "epoch": 2.3101798210299616,
      "grad_norm": 0.14442786574363708,
      "learning_rate": 5.4150879990511526e-06,
      "loss": 0.0138,
      "step": 1411640
    },
    {
      "epoch": 2.3102125514686147,
      "grad_norm": 0.7111592292785645,
      "learning_rate": 5.415022106837635e-06,
      "loss": 0.0126,
      "step": 1411660
    },
    {
      "epoch": 2.3102452819072683,
      "grad_norm": 0.12182768434286118,
      "learning_rate": 5.414956214624119e-06,
      "loss": 0.0157,
      "step": 1411680
    },
    {
      "epoch": 2.3102780123459215,
      "grad_norm": 0.25313812494277954,
      "learning_rate": 5.414890322410601e-06,
      "loss": 0.0176,
      "step": 1411700
    },
    {
      "epoch": 2.3103107427845746,
      "grad_norm": 0.052015263587236404,
      "learning_rate": 5.414824430197084e-06,
      "loss": 0.01,
      "step": 1411720
    },
    {
      "epoch": 2.310343473223228,
      "grad_norm": 0.19646905362606049,
      "learning_rate": 5.414758537983566e-06,
      "loss": 0.0107,
      "step": 1411740
    },
    {
      "epoch": 2.3103762036618813,
      "grad_norm": 0.20431725680828094,
      "learning_rate": 5.41469264577005e-06,
      "loss": 0.0148,
      "step": 1411760
    },
    {
      "epoch": 2.310408934100535,
      "grad_norm": 0.44710588455200195,
      "learning_rate": 5.414626753556533e-06,
      "loss": 0.009,
      "step": 1411780
    },
    {
      "epoch": 2.310441664539188,
      "grad_norm": 0.424745112657547,
      "learning_rate": 5.414560861343015e-06,
      "loss": 0.0133,
      "step": 1411800
    },
    {
      "epoch": 2.3104743949778417,
      "grad_norm": 0.49232280254364014,
      "learning_rate": 5.414494969129498e-06,
      "loss": 0.0184,
      "step": 1411820
    },
    {
      "epoch": 2.310507125416495,
      "grad_norm": 0.34582415223121643,
      "learning_rate": 5.414429076915982e-06,
      "loss": 0.0142,
      "step": 1411840
    },
    {
      "epoch": 2.310539855855148,
      "grad_norm": 0.3919728994369507,
      "learning_rate": 5.414363184702464e-06,
      "loss": 0.0118,
      "step": 1411860
    },
    {
      "epoch": 2.3105725862938016,
      "grad_norm": 0.1325836032629013,
      "learning_rate": 5.414297292488947e-06,
      "loss": 0.0147,
      "step": 1411880
    },
    {
      "epoch": 2.3106053167324547,
      "grad_norm": 0.18270841240882874,
      "learning_rate": 5.414231400275431e-06,
      "loss": 0.0109,
      "step": 1411900
    },
    {
      "epoch": 2.3106380471711083,
      "grad_norm": 0.5444838404655457,
      "learning_rate": 5.414165508061913e-06,
      "loss": 0.0138,
      "step": 1411920
    },
    {
      "epoch": 2.3106707776097615,
      "grad_norm": 0.38052505254745483,
      "learning_rate": 5.414099615848396e-06,
      "loss": 0.0162,
      "step": 1411940
    },
    {
      "epoch": 2.310703508048415,
      "grad_norm": 0.48061877489089966,
      "learning_rate": 5.414033723634878e-06,
      "loss": 0.0202,
      "step": 1411960
    },
    {
      "epoch": 2.310736238487068,
      "grad_norm": 0.23445458710193634,
      "learning_rate": 5.413967831421362e-06,
      "loss": 0.0178,
      "step": 1411980
    },
    {
      "epoch": 2.3107689689257214,
      "grad_norm": 0.28760650753974915,
      "learning_rate": 5.413901939207844e-06,
      "loss": 0.0132,
      "step": 1412000
    },
    {
      "epoch": 2.310801699364375,
      "grad_norm": 0.40256935358047485,
      "learning_rate": 5.413836046994327e-06,
      "loss": 0.009,
      "step": 1412020
    },
    {
      "epoch": 2.310834429803028,
      "grad_norm": 0.5948441624641418,
      "learning_rate": 5.41377015478081e-06,
      "loss": 0.0183,
      "step": 1412040
    },
    {
      "epoch": 2.3108671602416817,
      "grad_norm": 0.6074610352516174,
      "learning_rate": 5.4137042625672935e-06,
      "loss": 0.0221,
      "step": 1412060
    },
    {
      "epoch": 2.310899890680335,
      "grad_norm": 0.44075819849967957,
      "learning_rate": 5.413638370353775e-06,
      "loss": 0.0105,
      "step": 1412080
    },
    {
      "epoch": 2.3109326211189885,
      "grad_norm": 0.22470557689666748,
      "learning_rate": 5.413572478140259e-06,
      "loss": 0.016,
      "step": 1412100
    },
    {
      "epoch": 2.3109653515576416,
      "grad_norm": 0.30973130464553833,
      "learning_rate": 5.413506585926741e-06,
      "loss": 0.0119,
      "step": 1412120
    },
    {
      "epoch": 2.3109980819962948,
      "grad_norm": 0.30951935052871704,
      "learning_rate": 5.4134406937132244e-06,
      "loss": 0.0211,
      "step": 1412140
    },
    {
      "epoch": 2.3110308124349483,
      "grad_norm": 0.08816837519407272,
      "learning_rate": 5.413374801499706e-06,
      "loss": 0.0098,
      "step": 1412160
    },
    {
      "epoch": 2.3110635428736015,
      "grad_norm": 0.2774718105792999,
      "learning_rate": 5.41330890928619e-06,
      "loss": 0.0129,
      "step": 1412180
    },
    {
      "epoch": 2.311096273312255,
      "grad_norm": 0.9059857130050659,
      "learning_rate": 5.413243017072673e-06,
      "loss": 0.0138,
      "step": 1412200
    },
    {
      "epoch": 2.3111290037509082,
      "grad_norm": 0.27188587188720703,
      "learning_rate": 5.413177124859155e-06,
      "loss": 0.0204,
      "step": 1412220
    },
    {
      "epoch": 2.311161734189562,
      "grad_norm": 0.3709415793418884,
      "learning_rate": 5.413111232645639e-06,
      "loss": 0.0138,
      "step": 1412240
    },
    {
      "epoch": 2.311194464628215,
      "grad_norm": 0.207236185669899,
      "learning_rate": 5.413045340432122e-06,
      "loss": 0.0137,
      "step": 1412260
    },
    {
      "epoch": 2.311227195066868,
      "grad_norm": 0.5980620384216309,
      "learning_rate": 5.4129794482186045e-06,
      "loss": 0.0227,
      "step": 1412280
    },
    {
      "epoch": 2.3112599255055217,
      "grad_norm": 0.3854635953903198,
      "learning_rate": 5.412913556005087e-06,
      "loss": 0.0195,
      "step": 1412300
    },
    {
      "epoch": 2.311292655944175,
      "grad_norm": 0.3701647222042084,
      "learning_rate": 5.412847663791571e-06,
      "loss": 0.01,
      "step": 1412320
    },
    {
      "epoch": 2.3113253863828285,
      "grad_norm": 3.536026954650879,
      "learning_rate": 5.412781771578053e-06,
      "loss": 0.0119,
      "step": 1412340
    },
    {
      "epoch": 2.3113581168214816,
      "grad_norm": 0.0623188316822052,
      "learning_rate": 5.412715879364536e-06,
      "loss": 0.0083,
      "step": 1412360
    },
    {
      "epoch": 2.311390847260135,
      "grad_norm": 0.7833964824676514,
      "learning_rate": 5.412649987151018e-06,
      "loss": 0.019,
      "step": 1412380
    },
    {
      "epoch": 2.3114235776987884,
      "grad_norm": 1.4114935398101807,
      "learning_rate": 5.412584094937502e-06,
      "loss": 0.0204,
      "step": 1412400
    },
    {
      "epoch": 2.3114563081374415,
      "grad_norm": 0.13254302740097046,
      "learning_rate": 5.4125182027239845e-06,
      "loss": 0.0302,
      "step": 1412420
    },
    {
      "epoch": 2.311489038576095,
      "grad_norm": 0.9878837466239929,
      "learning_rate": 5.412452310510467e-06,
      "loss": 0.017,
      "step": 1412440
    },
    {
      "epoch": 2.3115217690147483,
      "grad_norm": 0.32735326886177063,
      "learning_rate": 5.41238641829695e-06,
      "loss": 0.0111,
      "step": 1412460
    },
    {
      "epoch": 2.311554499453402,
      "grad_norm": 0.4552123546600342,
      "learning_rate": 5.4123205260834336e-06,
      "loss": 0.0177,
      "step": 1412480
    },
    {
      "epoch": 2.311587229892055,
      "grad_norm": 0.3638608455657959,
      "learning_rate": 5.4122546338699154e-06,
      "loss": 0.017,
      "step": 1412500
    },
    {
      "epoch": 2.3116199603307086,
      "grad_norm": 0.13007190823554993,
      "learning_rate": 5.412188741656399e-06,
      "loss": 0.0161,
      "step": 1412520
    },
    {
      "epoch": 2.3116526907693618,
      "grad_norm": 0.22945277392864227,
      "learning_rate": 5.412122849442881e-06,
      "loss": 0.0142,
      "step": 1412540
    },
    {
      "epoch": 2.311685421208015,
      "grad_norm": 0.3417201638221741,
      "learning_rate": 5.4120569572293645e-06,
      "loss": 0.0134,
      "step": 1412560
    },
    {
      "epoch": 2.3117181516466685,
      "grad_norm": 1.2551707029342651,
      "learning_rate": 5.411991065015848e-06,
      "loss": 0.0183,
      "step": 1412580
    },
    {
      "epoch": 2.3117508820853216,
      "grad_norm": 0.3054076135158539,
      "learning_rate": 5.41192517280233e-06,
      "loss": 0.0179,
      "step": 1412600
    },
    {
      "epoch": 2.3117836125239752,
      "grad_norm": 0.6991168260574341,
      "learning_rate": 5.411859280588814e-06,
      "loss": 0.0194,
      "step": 1412620
    },
    {
      "epoch": 2.3118163429626284,
      "grad_norm": 0.09933793544769287,
      "learning_rate": 5.411793388375296e-06,
      "loss": 0.0151,
      "step": 1412640
    },
    {
      "epoch": 2.3118490734012815,
      "grad_norm": 0.4260901212692261,
      "learning_rate": 5.411727496161779e-06,
      "loss": 0.0166,
      "step": 1412660
    },
    {
      "epoch": 2.311881803839935,
      "grad_norm": 0.19786734879016876,
      "learning_rate": 5.411661603948262e-06,
      "loss": 0.0158,
      "step": 1412680
    },
    {
      "epoch": 2.3119145342785883,
      "grad_norm": 0.46811535954475403,
      "learning_rate": 5.411595711734745e-06,
      "loss": 0.0137,
      "step": 1412700
    },
    {
      "epoch": 2.311947264717242,
      "grad_norm": 0.11490775644779205,
      "learning_rate": 5.411529819521227e-06,
      "loss": 0.014,
      "step": 1412720
    },
    {
      "epoch": 2.311979995155895,
      "grad_norm": 0.172357439994812,
      "learning_rate": 5.411463927307711e-06,
      "loss": 0.0142,
      "step": 1412740
    },
    {
      "epoch": 2.312012725594548,
      "grad_norm": 0.5282237529754639,
      "learning_rate": 5.411398035094193e-06,
      "loss": 0.0153,
      "step": 1412760
    },
    {
      "epoch": 2.3120454560332018,
      "grad_norm": 0.8988932371139526,
      "learning_rate": 5.411332142880676e-06,
      "loss": 0.0132,
      "step": 1412780
    },
    {
      "epoch": 2.312078186471855,
      "grad_norm": 1.0336294174194336,
      "learning_rate": 5.411266250667159e-06,
      "loss": 0.0146,
      "step": 1412800
    },
    {
      "epoch": 2.3121109169105085,
      "grad_norm": 0.10619199275970459,
      "learning_rate": 5.411200358453642e-06,
      "loss": 0.0151,
      "step": 1412820
    },
    {
      "epoch": 2.3121436473491617,
      "grad_norm": 0.28969255089759827,
      "learning_rate": 5.4111344662401246e-06,
      "loss": 0.0125,
      "step": 1412840
    },
    {
      "epoch": 2.3121763777878153,
      "grad_norm": 0.17494544386863708,
      "learning_rate": 5.411068574026608e-06,
      "loss": 0.0174,
      "step": 1412860
    },
    {
      "epoch": 2.3122091082264684,
      "grad_norm": 0.24078306555747986,
      "learning_rate": 5.41100268181309e-06,
      "loss": 0.0143,
      "step": 1412880
    },
    {
      "epoch": 2.3122418386651216,
      "grad_norm": 0.6450644135475159,
      "learning_rate": 5.410936789599574e-06,
      "loss": 0.0148,
      "step": 1412900
    },
    {
      "epoch": 2.312274569103775,
      "grad_norm": 0.18952035903930664,
      "learning_rate": 5.410870897386057e-06,
      "loss": 0.015,
      "step": 1412920
    },
    {
      "epoch": 2.3123072995424283,
      "grad_norm": 0.3506765365600586,
      "learning_rate": 5.410805005172539e-06,
      "loss": 0.0117,
      "step": 1412940
    },
    {
      "epoch": 2.312340029981082,
      "grad_norm": 0.19986562430858612,
      "learning_rate": 5.410739112959023e-06,
      "loss": 0.0141,
      "step": 1412960
    },
    {
      "epoch": 2.312372760419735,
      "grad_norm": 0.5315988659858704,
      "learning_rate": 5.410673220745505e-06,
      "loss": 0.0155,
      "step": 1412980
    },
    {
      "epoch": 2.3124054908583886,
      "grad_norm": 0.4974420368671417,
      "learning_rate": 5.410607328531988e-06,
      "loss": 0.0142,
      "step": 1413000
    },
    {
      "epoch": 2.312438221297042,
      "grad_norm": 0.43208515644073486,
      "learning_rate": 5.410541436318471e-06,
      "loss": 0.0113,
      "step": 1413020
    },
    {
      "epoch": 2.312470951735695,
      "grad_norm": 0.9683355689048767,
      "learning_rate": 5.410475544104954e-06,
      "loss": 0.012,
      "step": 1413040
    },
    {
      "epoch": 2.3125036821743485,
      "grad_norm": 0.7891632914543152,
      "learning_rate": 5.410409651891436e-06,
      "loss": 0.0212,
      "step": 1413060
    },
    {
      "epoch": 2.3125364126130017,
      "grad_norm": 0.2634947597980499,
      "learning_rate": 5.41034375967792e-06,
      "loss": 0.0131,
      "step": 1413080
    },
    {
      "epoch": 2.3125691430516553,
      "grad_norm": 0.8351248502731323,
      "learning_rate": 5.410277867464402e-06,
      "loss": 0.0185,
      "step": 1413100
    },
    {
      "epoch": 2.3126018734903084,
      "grad_norm": 0.11454085260629654,
      "learning_rate": 5.4102119752508855e-06,
      "loss": 0.0184,
      "step": 1413120
    },
    {
      "epoch": 2.312634603928962,
      "grad_norm": 0.6082308292388916,
      "learning_rate": 5.410146083037367e-06,
      "loss": 0.0118,
      "step": 1413140
    },
    {
      "epoch": 2.312667334367615,
      "grad_norm": 0.2679731249809265,
      "learning_rate": 5.410080190823851e-06,
      "loss": 0.0145,
      "step": 1413160
    },
    {
      "epoch": 2.3127000648062683,
      "grad_norm": 0.09015198051929474,
      "learning_rate": 5.410014298610333e-06,
      "loss": 0.012,
      "step": 1413180
    },
    {
      "epoch": 2.312732795244922,
      "grad_norm": 0.6369718909263611,
      "learning_rate": 5.409948406396816e-06,
      "loss": 0.0176,
      "step": 1413200
    },
    {
      "epoch": 2.312765525683575,
      "grad_norm": 0.20419904589653015,
      "learning_rate": 5.409882514183299e-06,
      "loss": 0.0157,
      "step": 1413220
    },
    {
      "epoch": 2.3127982561222287,
      "grad_norm": 0.5868598222732544,
      "learning_rate": 5.409816621969782e-06,
      "loss": 0.0178,
      "step": 1413240
    },
    {
      "epoch": 2.312830986560882,
      "grad_norm": 0.18769437074661255,
      "learning_rate": 5.4097507297562655e-06,
      "loss": 0.0143,
      "step": 1413260
    },
    {
      "epoch": 2.3128637169995354,
      "grad_norm": 0.31726738810539246,
      "learning_rate": 5.409684837542748e-06,
      "loss": 0.01,
      "step": 1413280
    },
    {
      "epoch": 2.3128964474381886,
      "grad_norm": 0.6201534271240234,
      "learning_rate": 5.409618945329231e-06,
      "loss": 0.0097,
      "step": 1413300
    },
    {
      "epoch": 2.3129291778768417,
      "grad_norm": 0.5672016143798828,
      "learning_rate": 5.409553053115714e-06,
      "loss": 0.0146,
      "step": 1413320
    },
    {
      "epoch": 2.3129619083154953,
      "grad_norm": 0.4189050495624542,
      "learning_rate": 5.409487160902197e-06,
      "loss": 0.019,
      "step": 1413340
    },
    {
      "epoch": 2.3129946387541485,
      "grad_norm": 0.3070511519908905,
      "learning_rate": 5.409421268688679e-06,
      "loss": 0.0134,
      "step": 1413360
    },
    {
      "epoch": 2.313027369192802,
      "grad_norm": 0.5908563733100891,
      "learning_rate": 5.409355376475163e-06,
      "loss": 0.0125,
      "step": 1413380
    },
    {
      "epoch": 2.313060099631455,
      "grad_norm": 0.3220328688621521,
      "learning_rate": 5.409289484261645e-06,
      "loss": 0.0106,
      "step": 1413400
    },
    {
      "epoch": 2.313092830070109,
      "grad_norm": 0.18081733584403992,
      "learning_rate": 5.409223592048128e-06,
      "loss": 0.0118,
      "step": 1413420
    },
    {
      "epoch": 2.313125560508762,
      "grad_norm": 0.7314925789833069,
      "learning_rate": 5.409157699834611e-06,
      "loss": 0.0151,
      "step": 1413440
    },
    {
      "epoch": 2.313158290947415,
      "grad_norm": 0.19815434515476227,
      "learning_rate": 5.409091807621094e-06,
      "loss": 0.0172,
      "step": 1413460
    },
    {
      "epoch": 2.3131910213860687,
      "grad_norm": 0.5628904700279236,
      "learning_rate": 5.4090259154075765e-06,
      "loss": 0.0154,
      "step": 1413480
    },
    {
      "epoch": 2.313223751824722,
      "grad_norm": 1.1843115091323853,
      "learning_rate": 5.40896002319406e-06,
      "loss": 0.0127,
      "step": 1413500
    },
    {
      "epoch": 2.3132564822633754,
      "grad_norm": 0.21423935890197754,
      "learning_rate": 5.408894130980542e-06,
      "loss": 0.0141,
      "step": 1413520
    },
    {
      "epoch": 2.3132892127020286,
      "grad_norm": 0.24486440420150757,
      "learning_rate": 5.4088282387670255e-06,
      "loss": 0.0192,
      "step": 1413540
    },
    {
      "epoch": 2.313321943140682,
      "grad_norm": 0.256350040435791,
      "learning_rate": 5.4087623465535074e-06,
      "loss": 0.0151,
      "step": 1413560
    },
    {
      "epoch": 2.3133546735793353,
      "grad_norm": 0.43415266275405884,
      "learning_rate": 5.408696454339991e-06,
      "loss": 0.0137,
      "step": 1413580
    },
    {
      "epoch": 2.3133874040179885,
      "grad_norm": 0.7992295026779175,
      "learning_rate": 5.408630562126474e-06,
      "loss": 0.0127,
      "step": 1413600
    },
    {
      "epoch": 2.313420134456642,
      "grad_norm": 0.2659415900707245,
      "learning_rate": 5.4085646699129565e-06,
      "loss": 0.0165,
      "step": 1413620
    },
    {
      "epoch": 2.3134528648952952,
      "grad_norm": 0.7368195652961731,
      "learning_rate": 5.40849877769944e-06,
      "loss": 0.0216,
      "step": 1413640
    },
    {
      "epoch": 2.313485595333949,
      "grad_norm": 0.13959506154060364,
      "learning_rate": 5.408432885485923e-06,
      "loss": 0.0106,
      "step": 1413660
    },
    {
      "epoch": 2.313518325772602,
      "grad_norm": 0.2568325698375702,
      "learning_rate": 5.4083669932724056e-06,
      "loss": 0.0192,
      "step": 1413680
    },
    {
      "epoch": 2.3135510562112556,
      "grad_norm": 0.25516805052757263,
      "learning_rate": 5.408301101058888e-06,
      "loss": 0.0247,
      "step": 1413700
    },
    {
      "epoch": 2.3135837866499087,
      "grad_norm": 0.17883916199207306,
      "learning_rate": 5.408235208845372e-06,
      "loss": 0.0127,
      "step": 1413720
    },
    {
      "epoch": 2.313616517088562,
      "grad_norm": 0.40980735421180725,
      "learning_rate": 5.408169316631854e-06,
      "loss": 0.0113,
      "step": 1413740
    },
    {
      "epoch": 2.3136492475272155,
      "grad_norm": 0.2223265916109085,
      "learning_rate": 5.408103424418337e-06,
      "loss": 0.0147,
      "step": 1413760
    },
    {
      "epoch": 2.3136819779658686,
      "grad_norm": 0.42840227484703064,
      "learning_rate": 5.408037532204819e-06,
      "loss": 0.0072,
      "step": 1413780
    },
    {
      "epoch": 2.313714708404522,
      "grad_norm": 1.1513268947601318,
      "learning_rate": 5.407971639991303e-06,
      "loss": 0.0142,
      "step": 1413800
    },
    {
      "epoch": 2.3137474388431754,
      "grad_norm": 0.9759175777435303,
      "learning_rate": 5.407905747777786e-06,
      "loss": 0.0139,
      "step": 1413820
    },
    {
      "epoch": 2.313780169281829,
      "grad_norm": 0.2526182234287262,
      "learning_rate": 5.407839855564268e-06,
      "loss": 0.0184,
      "step": 1413840
    },
    {
      "epoch": 2.313812899720482,
      "grad_norm": 0.5847700834274292,
      "learning_rate": 5.407773963350751e-06,
      "loss": 0.0127,
      "step": 1413860
    },
    {
      "epoch": 2.3138456301591352,
      "grad_norm": 0.4384371340274811,
      "learning_rate": 5.407708071137235e-06,
      "loss": 0.0143,
      "step": 1413880
    },
    {
      "epoch": 2.313878360597789,
      "grad_norm": 0.16856348514556885,
      "learning_rate": 5.4076421789237165e-06,
      "loss": 0.0109,
      "step": 1413900
    },
    {
      "epoch": 2.313911091036442,
      "grad_norm": 0.5656577348709106,
      "learning_rate": 5.4075762867102e-06,
      "loss": 0.015,
      "step": 1413920
    },
    {
      "epoch": 2.3139438214750956,
      "grad_norm": 0.9121515154838562,
      "learning_rate": 5.407510394496682e-06,
      "loss": 0.0159,
      "step": 1413940
    },
    {
      "epoch": 2.3139765519137487,
      "grad_norm": 0.6023175120353699,
      "learning_rate": 5.407444502283166e-06,
      "loss": 0.0111,
      "step": 1413960
    },
    {
      "epoch": 2.3140092823524023,
      "grad_norm": 0.33518925309181213,
      "learning_rate": 5.407378610069649e-06,
      "loss": 0.0145,
      "step": 1413980
    },
    {
      "epoch": 2.3140420127910555,
      "grad_norm": 0.09978924691677094,
      "learning_rate": 5.407312717856131e-06,
      "loss": 0.0139,
      "step": 1414000
    },
    {
      "epoch": 2.3140747432297086,
      "grad_norm": 0.07583390921354294,
      "learning_rate": 5.407246825642615e-06,
      "loss": 0.0173,
      "step": 1414020
    },
    {
      "epoch": 2.3141074736683622,
      "grad_norm": 0.47278350591659546,
      "learning_rate": 5.407180933429097e-06,
      "loss": 0.0159,
      "step": 1414040
    },
    {
      "epoch": 2.3141402041070154,
      "grad_norm": 0.16884928941726685,
      "learning_rate": 5.40711504121558e-06,
      "loss": 0.0163,
      "step": 1414060
    },
    {
      "epoch": 2.314172934545669,
      "grad_norm": 2.180055856704712,
      "learning_rate": 5.407049149002063e-06,
      "loss": 0.0126,
      "step": 1414080
    },
    {
      "epoch": 2.314205664984322,
      "grad_norm": 0.22374318540096283,
      "learning_rate": 5.4069832567885465e-06,
      "loss": 0.0143,
      "step": 1414100
    },
    {
      "epoch": 2.3142383954229753,
      "grad_norm": 1.1689614057540894,
      "learning_rate": 5.406917364575028e-06,
      "loss": 0.0149,
      "step": 1414120
    },
    {
      "epoch": 2.314271125861629,
      "grad_norm": 0.30315423011779785,
      "learning_rate": 5.406851472361512e-06,
      "loss": 0.0154,
      "step": 1414140
    },
    {
      "epoch": 2.314303856300282,
      "grad_norm": 1.3234808444976807,
      "learning_rate": 5.406785580147994e-06,
      "loss": 0.0141,
      "step": 1414160
    },
    {
      "epoch": 2.3143365867389356,
      "grad_norm": 1.1359248161315918,
      "learning_rate": 5.4067196879344774e-06,
      "loss": 0.0112,
      "step": 1414180
    },
    {
      "epoch": 2.3143693171775888,
      "grad_norm": 1.0646960735321045,
      "learning_rate": 5.406653795720959e-06,
      "loss": 0.0169,
      "step": 1414200
    },
    {
      "epoch": 2.314402047616242,
      "grad_norm": 0.08970789611339569,
      "learning_rate": 5.406587903507443e-06,
      "loss": 0.0094,
      "step": 1414220
    },
    {
      "epoch": 2.3144347780548955,
      "grad_norm": 0.45251142978668213,
      "learning_rate": 5.406522011293926e-06,
      "loss": 0.0131,
      "step": 1414240
    },
    {
      "epoch": 2.3144675084935487,
      "grad_norm": 0.21977809071540833,
      "learning_rate": 5.406456119080408e-06,
      "loss": 0.0279,
      "step": 1414260
    },
    {
      "epoch": 2.3145002389322022,
      "grad_norm": 0.22284506261348724,
      "learning_rate": 5.406390226866891e-06,
      "loss": 0.0114,
      "step": 1414280
    },
    {
      "epoch": 2.3145329693708554,
      "grad_norm": 0.07797954976558685,
      "learning_rate": 5.406324334653375e-06,
      "loss": 0.0149,
      "step": 1414300
    },
    {
      "epoch": 2.314565699809509,
      "grad_norm": 0.24887020885944366,
      "learning_rate": 5.4062584424398575e-06,
      "loss": 0.0194,
      "step": 1414320
    },
    {
      "epoch": 2.314598430248162,
      "grad_norm": 0.775651216506958,
      "learning_rate": 5.40619255022634e-06,
      "loss": 0.0146,
      "step": 1414340
    },
    {
      "epoch": 2.3146311606868153,
      "grad_norm": 0.20732933282852173,
      "learning_rate": 5.406126658012824e-06,
      "loss": 0.012,
      "step": 1414360
    },
    {
      "epoch": 2.314663891125469,
      "grad_norm": 0.5524435639381409,
      "learning_rate": 5.406060765799306e-06,
      "loss": 0.0141,
      "step": 1414380
    },
    {
      "epoch": 2.314696621564122,
      "grad_norm": 0.44036710262298584,
      "learning_rate": 5.405994873585789e-06,
      "loss": 0.0244,
      "step": 1414400
    },
    {
      "epoch": 2.3147293520027756,
      "grad_norm": 0.4365079402923584,
      "learning_rate": 5.405928981372271e-06,
      "loss": 0.0157,
      "step": 1414420
    },
    {
      "epoch": 2.314762082441429,
      "grad_norm": 0.10974810272455215,
      "learning_rate": 5.405863089158755e-06,
      "loss": 0.0128,
      "step": 1414440
    },
    {
      "epoch": 2.3147948128800824,
      "grad_norm": 0.1347508728504181,
      "learning_rate": 5.4057971969452375e-06,
      "loss": 0.015,
      "step": 1414460
    },
    {
      "epoch": 2.3148275433187355,
      "grad_norm": 0.31940630078315735,
      "learning_rate": 5.40573130473172e-06,
      "loss": 0.0147,
      "step": 1414480
    },
    {
      "epoch": 2.3148602737573887,
      "grad_norm": 0.3319641351699829,
      "learning_rate": 5.405665412518203e-06,
      "loss": 0.0123,
      "step": 1414500
    },
    {
      "epoch": 2.3148930041960423,
      "grad_norm": 0.38741588592529297,
      "learning_rate": 5.4055995203046866e-06,
      "loss": 0.0115,
      "step": 1414520
    },
    {
      "epoch": 2.3149257346346954,
      "grad_norm": 0.6328341364860535,
      "learning_rate": 5.4055336280911685e-06,
      "loss": 0.0171,
      "step": 1414540
    },
    {
      "epoch": 2.314958465073349,
      "grad_norm": 0.12928155064582825,
      "learning_rate": 5.405467735877652e-06,
      "loss": 0.0112,
      "step": 1414560
    },
    {
      "epoch": 2.314991195512002,
      "grad_norm": 0.20945072174072266,
      "learning_rate": 5.405401843664134e-06,
      "loss": 0.0132,
      "step": 1414580
    },
    {
      "epoch": 2.3150239259506558,
      "grad_norm": 0.2129278928041458,
      "learning_rate": 5.4053359514506175e-06,
      "loss": 0.0176,
      "step": 1414600
    },
    {
      "epoch": 2.315056656389309,
      "grad_norm": 0.3235318660736084,
      "learning_rate": 5.4052700592371e-06,
      "loss": 0.0157,
      "step": 1414620
    },
    {
      "epoch": 2.315089386827962,
      "grad_norm": 0.3923828601837158,
      "learning_rate": 5.405204167023583e-06,
      "loss": 0.0123,
      "step": 1414640
    },
    {
      "epoch": 2.3151221172666157,
      "grad_norm": 0.12998223304748535,
      "learning_rate": 5.405138274810066e-06,
      "loss": 0.0155,
      "step": 1414660
    },
    {
      "epoch": 2.315154847705269,
      "grad_norm": 0.31782352924346924,
      "learning_rate": 5.405072382596549e-06,
      "loss": 0.0136,
      "step": 1414680
    },
    {
      "epoch": 2.3151875781439224,
      "grad_norm": 0.2796976566314697,
      "learning_rate": 5.405006490383032e-06,
      "loss": 0.0217,
      "step": 1414700
    },
    {
      "epoch": 2.3152203085825755,
      "grad_norm": 0.4726133942604065,
      "learning_rate": 5.404940598169515e-06,
      "loss": 0.0204,
      "step": 1414720
    },
    {
      "epoch": 2.315253039021229,
      "grad_norm": 1.590695858001709,
      "learning_rate": 5.404874705955998e-06,
      "loss": 0.0123,
      "step": 1414740
    },
    {
      "epoch": 2.3152857694598823,
      "grad_norm": 0.1913730800151825,
      "learning_rate": 5.40480881374248e-06,
      "loss": 0.0123,
      "step": 1414760
    },
    {
      "epoch": 2.3153184998985354,
      "grad_norm": 0.1895267516374588,
      "learning_rate": 5.404742921528964e-06,
      "loss": 0.0151,
      "step": 1414780
    },
    {
      "epoch": 2.315351230337189,
      "grad_norm": 0.4785035252571106,
      "learning_rate": 5.404677029315446e-06,
      "loss": 0.0116,
      "step": 1414800
    },
    {
      "epoch": 2.315383960775842,
      "grad_norm": 0.1555619090795517,
      "learning_rate": 5.404611137101929e-06,
      "loss": 0.0103,
      "step": 1414820
    },
    {
      "epoch": 2.315416691214496,
      "grad_norm": 0.20942099392414093,
      "learning_rate": 5.404545244888412e-06,
      "loss": 0.0164,
      "step": 1414840
    },
    {
      "epoch": 2.315449421653149,
      "grad_norm": 0.3329792022705078,
      "learning_rate": 5.404479352674895e-06,
      "loss": 0.0161,
      "step": 1414860
    },
    {
      "epoch": 2.3154821520918025,
      "grad_norm": 0.3899097442626953,
      "learning_rate": 5.4044134604613776e-06,
      "loss": 0.0178,
      "step": 1414880
    },
    {
      "epoch": 2.3155148825304557,
      "grad_norm": 0.4975944757461548,
      "learning_rate": 5.404347568247861e-06,
      "loss": 0.0191,
      "step": 1414900
    },
    {
      "epoch": 2.315547612969109,
      "grad_norm": 0.4749683439731598,
      "learning_rate": 5.404281676034343e-06,
      "loss": 0.014,
      "step": 1414920
    },
    {
      "epoch": 2.3155803434077624,
      "grad_norm": 0.2769394516944885,
      "learning_rate": 5.404215783820827e-06,
      "loss": 0.0188,
      "step": 1414940
    },
    {
      "epoch": 2.3156130738464156,
      "grad_norm": 0.260977566242218,
      "learning_rate": 5.4041498916073085e-06,
      "loss": 0.0133,
      "step": 1414960
    },
    {
      "epoch": 2.315645804285069,
      "grad_norm": 0.4571143388748169,
      "learning_rate": 5.404083999393792e-06,
      "loss": 0.0121,
      "step": 1414980
    },
    {
      "epoch": 2.3156785347237223,
      "grad_norm": 0.4605408012866974,
      "learning_rate": 5.404018107180274e-06,
      "loss": 0.0184,
      "step": 1415000
    },
    {
      "epoch": 2.315711265162376,
      "grad_norm": 0.3460681438446045,
      "learning_rate": 5.403952214966758e-06,
      "loss": 0.0126,
      "step": 1415020
    },
    {
      "epoch": 2.315743995601029,
      "grad_norm": 0.6927850246429443,
      "learning_rate": 5.403886322753241e-06,
      "loss": 0.0192,
      "step": 1415040
    },
    {
      "epoch": 2.315776726039682,
      "grad_norm": 0.4938910901546478,
      "learning_rate": 5.403820430539723e-06,
      "loss": 0.0146,
      "step": 1415060
    },
    {
      "epoch": 2.315809456478336,
      "grad_norm": 1.7458419799804688,
      "learning_rate": 5.403754538326207e-06,
      "loss": 0.0193,
      "step": 1415080
    },
    {
      "epoch": 2.315842186916989,
      "grad_norm": 0.47836893796920776,
      "learning_rate": 5.403688646112689e-06,
      "loss": 0.018,
      "step": 1415100
    },
    {
      "epoch": 2.3158749173556425,
      "grad_norm": 0.33761700987815857,
      "learning_rate": 5.403622753899173e-06,
      "loss": 0.024,
      "step": 1415120
    },
    {
      "epoch": 2.3159076477942957,
      "grad_norm": 0.12001243978738785,
      "learning_rate": 5.403556861685655e-06,
      "loss": 0.011,
      "step": 1415140
    },
    {
      "epoch": 2.3159403782329493,
      "grad_norm": 0.3725356161594391,
      "learning_rate": 5.4034909694721385e-06,
      "loss": 0.0158,
      "step": 1415160
    },
    {
      "epoch": 2.3159731086716024,
      "grad_norm": 0.6434480547904968,
      "learning_rate": 5.40342507725862e-06,
      "loss": 0.0126,
      "step": 1415180
    },
    {
      "epoch": 2.3160058391102556,
      "grad_norm": 0.41065263748168945,
      "learning_rate": 5.403359185045104e-06,
      "loss": 0.014,
      "step": 1415200
    },
    {
      "epoch": 2.316038569548909,
      "grad_norm": 1.1504137516021729,
      "learning_rate": 5.403293292831586e-06,
      "loss": 0.0177,
      "step": 1415220
    },
    {
      "epoch": 2.3160712999875623,
      "grad_norm": 1.4106154441833496,
      "learning_rate": 5.4032274006180694e-06,
      "loss": 0.0217,
      "step": 1415240
    },
    {
      "epoch": 2.316104030426216,
      "grad_norm": 0.37305760383605957,
      "learning_rate": 5.403161508404552e-06,
      "loss": 0.0187,
      "step": 1415260
    },
    {
      "epoch": 2.316136760864869,
      "grad_norm": 0.5175521373748779,
      "learning_rate": 5.403095616191035e-06,
      "loss": 0.0157,
      "step": 1415280
    },
    {
      "epoch": 2.3161694913035227,
      "grad_norm": 0.47248753905296326,
      "learning_rate": 5.403029723977518e-06,
      "loss": 0.014,
      "step": 1415300
    },
    {
      "epoch": 2.316202221742176,
      "grad_norm": 0.17913644015789032,
      "learning_rate": 5.402963831764001e-06,
      "loss": 0.0143,
      "step": 1415320
    },
    {
      "epoch": 2.316234952180829,
      "grad_norm": 0.7098511457443237,
      "learning_rate": 5.402897939550483e-06,
      "loss": 0.0152,
      "step": 1415340
    },
    {
      "epoch": 2.3162676826194826,
      "grad_norm": 0.11404474824666977,
      "learning_rate": 5.402832047336967e-06,
      "loss": 0.0145,
      "step": 1415360
    },
    {
      "epoch": 2.3163004130581357,
      "grad_norm": 0.5795856714248657,
      "learning_rate": 5.40276615512345e-06,
      "loss": 0.0089,
      "step": 1415380
    },
    {
      "epoch": 2.3163331434967893,
      "grad_norm": 0.4690801501274109,
      "learning_rate": 5.402700262909932e-06,
      "loss": 0.0173,
      "step": 1415400
    },
    {
      "epoch": 2.3163658739354425,
      "grad_norm": 0.260423868894577,
      "learning_rate": 5.402634370696416e-06,
      "loss": 0.0154,
      "step": 1415420
    },
    {
      "epoch": 2.316398604374096,
      "grad_norm": 0.4959505498409271,
      "learning_rate": 5.402568478482898e-06,
      "loss": 0.0136,
      "step": 1415440
    },
    {
      "epoch": 2.316431334812749,
      "grad_norm": 0.8009775280952454,
      "learning_rate": 5.402502586269381e-06,
      "loss": 0.0114,
      "step": 1415460
    },
    {
      "epoch": 2.3164640652514024,
      "grad_norm": 0.2542651891708374,
      "learning_rate": 5.402436694055864e-06,
      "loss": 0.0142,
      "step": 1415480
    },
    {
      "epoch": 2.316496795690056,
      "grad_norm": 1.0512300729751587,
      "learning_rate": 5.402370801842347e-06,
      "loss": 0.0092,
      "step": 1415500
    },
    {
      "epoch": 2.316529526128709,
      "grad_norm": 0.1267520934343338,
      "learning_rate": 5.4023049096288295e-06,
      "loss": 0.0141,
      "step": 1415520
    },
    {
      "epoch": 2.3165622565673627,
      "grad_norm": 0.21234288811683655,
      "learning_rate": 5.402239017415313e-06,
      "loss": 0.0147,
      "step": 1415540
    },
    {
      "epoch": 2.316594987006016,
      "grad_norm": 0.8161951303482056,
      "learning_rate": 5.402173125201795e-06,
      "loss": 0.0163,
      "step": 1415560
    },
    {
      "epoch": 2.3166277174446694,
      "grad_norm": 0.28994956612586975,
      "learning_rate": 5.4021072329882785e-06,
      "loss": 0.0138,
      "step": 1415580
    },
    {
      "epoch": 2.3166604478833226,
      "grad_norm": 0.27077651023864746,
      "learning_rate": 5.4020413407747604e-06,
      "loss": 0.0107,
      "step": 1415600
    },
    {
      "epoch": 2.3166931783219757,
      "grad_norm": 0.8018295764923096,
      "learning_rate": 5.401975448561244e-06,
      "loss": 0.0189,
      "step": 1415620
    },
    {
      "epoch": 2.3167259087606293,
      "grad_norm": 0.26817572116851807,
      "learning_rate": 5.401909556347727e-06,
      "loss": 0.0208,
      "step": 1415640
    },
    {
      "epoch": 2.3167586391992825,
      "grad_norm": 0.7916107177734375,
      "learning_rate": 5.4018436641342095e-06,
      "loss": 0.013,
      "step": 1415660
    },
    {
      "epoch": 2.3167913696379356,
      "grad_norm": 0.7401554584503174,
      "learning_rate": 5.401777771920692e-06,
      "loss": 0.0206,
      "step": 1415680
    },
    {
      "epoch": 2.3168241000765892,
      "grad_norm": 0.9042554497718811,
      "learning_rate": 5.401711879707176e-06,
      "loss": 0.016,
      "step": 1415700
    },
    {
      "epoch": 2.3168568305152424,
      "grad_norm": 0.20637856423854828,
      "learning_rate": 5.4016459874936586e-06,
      "loss": 0.0134,
      "step": 1415720
    },
    {
      "epoch": 2.316889560953896,
      "grad_norm": 0.7990134954452515,
      "learning_rate": 5.401580095280141e-06,
      "loss": 0.0155,
      "step": 1415740
    },
    {
      "epoch": 2.316922291392549,
      "grad_norm": 0.5590126514434814,
      "learning_rate": 5.401514203066625e-06,
      "loss": 0.0178,
      "step": 1415760
    },
    {
      "epoch": 2.3169550218312027,
      "grad_norm": 0.21432103216648102,
      "learning_rate": 5.401448310853107e-06,
      "loss": 0.0238,
      "step": 1415780
    },
    {
      "epoch": 2.316987752269856,
      "grad_norm": 0.6488661766052246,
      "learning_rate": 5.40138241863959e-06,
      "loss": 0.0163,
      "step": 1415800
    },
    {
      "epoch": 2.317020482708509,
      "grad_norm": 0.2889781892299652,
      "learning_rate": 5.401316526426072e-06,
      "loss": 0.0168,
      "step": 1415820
    },
    {
      "epoch": 2.3170532131471626,
      "grad_norm": 0.14461646974086761,
      "learning_rate": 5.401250634212556e-06,
      "loss": 0.0098,
      "step": 1415840
    },
    {
      "epoch": 2.3170859435858158,
      "grad_norm": 0.23521475493907928,
      "learning_rate": 5.401184741999039e-06,
      "loss": 0.009,
      "step": 1415860
    },
    {
      "epoch": 2.3171186740244694,
      "grad_norm": 0.44802960753440857,
      "learning_rate": 5.401118849785521e-06,
      "loss": 0.0152,
      "step": 1415880
    },
    {
      "epoch": 2.3171514044631225,
      "grad_norm": 0.3300207853317261,
      "learning_rate": 5.401052957572004e-06,
      "loss": 0.0145,
      "step": 1415900
    },
    {
      "epoch": 2.317184134901776,
      "grad_norm": 1.0282689332962036,
      "learning_rate": 5.400987065358488e-06,
      "loss": 0.0214,
      "step": 1415920
    },
    {
      "epoch": 2.3172168653404293,
      "grad_norm": 1.2821208238601685,
      "learning_rate": 5.4009211731449696e-06,
      "loss": 0.0165,
      "step": 1415940
    },
    {
      "epoch": 2.3172495957790824,
      "grad_norm": 0.35070669651031494,
      "learning_rate": 5.400855280931453e-06,
      "loss": 0.0174,
      "step": 1415960
    },
    {
      "epoch": 2.317282326217736,
      "grad_norm": 0.23192912340164185,
      "learning_rate": 5.400789388717935e-06,
      "loss": 0.0144,
      "step": 1415980
    },
    {
      "epoch": 2.317315056656389,
      "grad_norm": 0.7844717502593994,
      "learning_rate": 5.400723496504419e-06,
      "loss": 0.0148,
      "step": 1416000
    },
    {
      "epoch": 2.3173477870950427,
      "grad_norm": 0.3354566991329193,
      "learning_rate": 5.4006576042909005e-06,
      "loss": 0.0171,
      "step": 1416020
    },
    {
      "epoch": 2.317380517533696,
      "grad_norm": 0.45826101303100586,
      "learning_rate": 5.400591712077384e-06,
      "loss": 0.0154,
      "step": 1416040
    },
    {
      "epoch": 2.3174132479723495,
      "grad_norm": 0.26438668370246887,
      "learning_rate": 5.400525819863867e-06,
      "loss": 0.0153,
      "step": 1416060
    },
    {
      "epoch": 2.3174459784110026,
      "grad_norm": 0.9375897645950317,
      "learning_rate": 5.40045992765035e-06,
      "loss": 0.0097,
      "step": 1416080
    },
    {
      "epoch": 2.317478708849656,
      "grad_norm": 0.5939943194389343,
      "learning_rate": 5.400394035436833e-06,
      "loss": 0.0169,
      "step": 1416100
    },
    {
      "epoch": 2.3175114392883094,
      "grad_norm": 0.22985592484474182,
      "learning_rate": 5.400328143223316e-06,
      "loss": 0.0157,
      "step": 1416120
    },
    {
      "epoch": 2.3175441697269625,
      "grad_norm": 0.1732432097196579,
      "learning_rate": 5.400262251009799e-06,
      "loss": 0.0101,
      "step": 1416140
    },
    {
      "epoch": 2.317576900165616,
      "grad_norm": 0.2834661900997162,
      "learning_rate": 5.400196358796281e-06,
      "loss": 0.0192,
      "step": 1416160
    },
    {
      "epoch": 2.3176096306042693,
      "grad_norm": 0.12869690358638763,
      "learning_rate": 5.400130466582765e-06,
      "loss": 0.0105,
      "step": 1416180
    },
    {
      "epoch": 2.317642361042923,
      "grad_norm": 0.21168333292007446,
      "learning_rate": 5.400064574369247e-06,
      "loss": 0.0171,
      "step": 1416200
    },
    {
      "epoch": 2.317675091481576,
      "grad_norm": 0.5616752505302429,
      "learning_rate": 5.3999986821557305e-06,
      "loss": 0.0105,
      "step": 1416220
    },
    {
      "epoch": 2.317707821920229,
      "grad_norm": 0.08721989393234253,
      "learning_rate": 5.399932789942212e-06,
      "loss": 0.0156,
      "step": 1416240
    },
    {
      "epoch": 2.3177405523588828,
      "grad_norm": 0.5745035409927368,
      "learning_rate": 5.399866897728696e-06,
      "loss": 0.0154,
      "step": 1416260
    },
    {
      "epoch": 2.317773282797536,
      "grad_norm": 0.48677822947502136,
      "learning_rate": 5.399801005515179e-06,
      "loss": 0.0172,
      "step": 1416280
    },
    {
      "epoch": 2.3178060132361895,
      "grad_norm": 0.6646543741226196,
      "learning_rate": 5.399735113301661e-06,
      "loss": 0.0127,
      "step": 1416300
    },
    {
      "epoch": 2.3178387436748427,
      "grad_norm": 0.11816979199647903,
      "learning_rate": 5.399669221088144e-06,
      "loss": 0.0167,
      "step": 1416320
    },
    {
      "epoch": 2.3178714741134963,
      "grad_norm": 0.9391326904296875,
      "learning_rate": 5.399603328874628e-06,
      "loss": 0.0133,
      "step": 1416340
    },
    {
      "epoch": 2.3179042045521494,
      "grad_norm": 1.181498646736145,
      "learning_rate": 5.39953743666111e-06,
      "loss": 0.02,
      "step": 1416360
    },
    {
      "epoch": 2.3179369349908026,
      "grad_norm": 0.6075745224952698,
      "learning_rate": 5.399471544447593e-06,
      "loss": 0.0137,
      "step": 1416380
    },
    {
      "epoch": 2.317969665429456,
      "grad_norm": 0.5945358872413635,
      "learning_rate": 5.399405652234075e-06,
      "loss": 0.0168,
      "step": 1416400
    },
    {
      "epoch": 2.3180023958681093,
      "grad_norm": 0.8532550930976868,
      "learning_rate": 5.399339760020559e-06,
      "loss": 0.0174,
      "step": 1416420
    },
    {
      "epoch": 2.318035126306763,
      "grad_norm": 0.7068144083023071,
      "learning_rate": 5.399273867807042e-06,
      "loss": 0.0182,
      "step": 1416440
    },
    {
      "epoch": 2.318067856745416,
      "grad_norm": 0.07194840908050537,
      "learning_rate": 5.399207975593524e-06,
      "loss": 0.012,
      "step": 1416460
    },
    {
      "epoch": 2.3181005871840696,
      "grad_norm": 0.7150625586509705,
      "learning_rate": 5.399142083380008e-06,
      "loss": 0.0171,
      "step": 1416480
    },
    {
      "epoch": 2.318133317622723,
      "grad_norm": 0.14974476397037506,
      "learning_rate": 5.3990761911664905e-06,
      "loss": 0.0091,
      "step": 1416500
    },
    {
      "epoch": 2.318166048061376,
      "grad_norm": 0.03129032626748085,
      "learning_rate": 5.399010298952973e-06,
      "loss": 0.0123,
      "step": 1416520
    },
    {
      "epoch": 2.3181987785000295,
      "grad_norm": 0.3326359987258911,
      "learning_rate": 5.398944406739456e-06,
      "loss": 0.0137,
      "step": 1416540
    },
    {
      "epoch": 2.3182315089386827,
      "grad_norm": 0.2723907232284546,
      "learning_rate": 5.3988785145259396e-06,
      "loss": 0.0167,
      "step": 1416560
    },
    {
      "epoch": 2.3182642393773363,
      "grad_norm": 1.5811978578567505,
      "learning_rate": 5.3988126223124215e-06,
      "loss": 0.0185,
      "step": 1416580
    },
    {
      "epoch": 2.3182969698159894,
      "grad_norm": 0.2793920636177063,
      "learning_rate": 5.398746730098905e-06,
      "loss": 0.0124,
      "step": 1416600
    },
    {
      "epoch": 2.318329700254643,
      "grad_norm": 0.22027498483657837,
      "learning_rate": 5.398680837885387e-06,
      "loss": 0.0135,
      "step": 1416620
    },
    {
      "epoch": 2.318362430693296,
      "grad_norm": 0.4996403455734253,
      "learning_rate": 5.3986149456718705e-06,
      "loss": 0.0139,
      "step": 1416640
    },
    {
      "epoch": 2.3183951611319493,
      "grad_norm": 1.0319334268569946,
      "learning_rate": 5.398549053458353e-06,
      "loss": 0.0142,
      "step": 1416660
    },
    {
      "epoch": 2.318427891570603,
      "grad_norm": 0.8601643443107605,
      "learning_rate": 5.398483161244836e-06,
      "loss": 0.0128,
      "step": 1416680
    },
    {
      "epoch": 2.318460622009256,
      "grad_norm": 0.330546110868454,
      "learning_rate": 5.398417269031319e-06,
      "loss": 0.0154,
      "step": 1416700
    },
    {
      "epoch": 2.3184933524479097,
      "grad_norm": 0.251242995262146,
      "learning_rate": 5.398351376817802e-06,
      "loss": 0.0146,
      "step": 1416720
    },
    {
      "epoch": 2.318526082886563,
      "grad_norm": 0.4038958251476288,
      "learning_rate": 5.398285484604284e-06,
      "loss": 0.016,
      "step": 1416740
    },
    {
      "epoch": 2.3185588133252164,
      "grad_norm": 0.4948565661907196,
      "learning_rate": 5.398219592390768e-06,
      "loss": 0.0234,
      "step": 1416760
    },
    {
      "epoch": 2.3185915437638696,
      "grad_norm": 0.1297731250524521,
      "learning_rate": 5.398153700177251e-06,
      "loss": 0.0212,
      "step": 1416780
    },
    {
      "epoch": 2.3186242742025227,
      "grad_norm": 0.21833162009716034,
      "learning_rate": 5.398087807963733e-06,
      "loss": 0.0144,
      "step": 1416800
    },
    {
      "epoch": 2.3186570046411763,
      "grad_norm": 0.2509438693523407,
      "learning_rate": 5.398021915750217e-06,
      "loss": 0.0204,
      "step": 1416820
    },
    {
      "epoch": 2.3186897350798295,
      "grad_norm": 0.5528832077980042,
      "learning_rate": 5.397956023536699e-06,
      "loss": 0.0152,
      "step": 1416840
    },
    {
      "epoch": 2.318722465518483,
      "grad_norm": 0.2720242440700531,
      "learning_rate": 5.397890131323182e-06,
      "loss": 0.014,
      "step": 1416860
    },
    {
      "epoch": 2.318755195957136,
      "grad_norm": 0.8138254284858704,
      "learning_rate": 5.397824239109665e-06,
      "loss": 0.0136,
      "step": 1416880
    },
    {
      "epoch": 2.31878792639579,
      "grad_norm": 0.884746789932251,
      "learning_rate": 5.397758346896148e-06,
      "loss": 0.0241,
      "step": 1416900
    },
    {
      "epoch": 2.318820656834443,
      "grad_norm": 0.5578396320343018,
      "learning_rate": 5.397692454682631e-06,
      "loss": 0.0147,
      "step": 1416920
    },
    {
      "epoch": 2.318853387273096,
      "grad_norm": 0.34624481201171875,
      "learning_rate": 5.397626562469114e-06,
      "loss": 0.02,
      "step": 1416940
    },
    {
      "epoch": 2.3188861177117497,
      "grad_norm": 0.27138572931289673,
      "learning_rate": 5.397560670255596e-06,
      "loss": 0.0175,
      "step": 1416960
    },
    {
      "epoch": 2.318918848150403,
      "grad_norm": 0.6703166365623474,
      "learning_rate": 5.39749477804208e-06,
      "loss": 0.015,
      "step": 1416980
    },
    {
      "epoch": 2.3189515785890564,
      "grad_norm": 0.47268611192703247,
      "learning_rate": 5.3974288858285615e-06,
      "loss": 0.016,
      "step": 1417000
    },
    {
      "epoch": 2.3189843090277096,
      "grad_norm": 0.9689928293228149,
      "learning_rate": 5.397362993615045e-06,
      "loss": 0.0192,
      "step": 1417020
    },
    {
      "epoch": 2.319017039466363,
      "grad_norm": 1.2609703540802002,
      "learning_rate": 5.397297101401527e-06,
      "loss": 0.0186,
      "step": 1417040
    },
    {
      "epoch": 2.3190497699050163,
      "grad_norm": 0.20592956244945526,
      "learning_rate": 5.397231209188011e-06,
      "loss": 0.0098,
      "step": 1417060
    },
    {
      "epoch": 2.3190825003436695,
      "grad_norm": 0.136461079120636,
      "learning_rate": 5.397165316974493e-06,
      "loss": 0.0175,
      "step": 1417080
    },
    {
      "epoch": 2.319115230782323,
      "grad_norm": 0.10520501434803009,
      "learning_rate": 5.397099424760976e-06,
      "loss": 0.0111,
      "step": 1417100
    },
    {
      "epoch": 2.319147961220976,
      "grad_norm": 0.31693127751350403,
      "learning_rate": 5.397033532547459e-06,
      "loss": 0.0124,
      "step": 1417120
    },
    {
      "epoch": 2.31918069165963,
      "grad_norm": 0.20161215960979462,
      "learning_rate": 5.396967640333942e-06,
      "loss": 0.0193,
      "step": 1417140
    },
    {
      "epoch": 2.319213422098283,
      "grad_norm": 0.2862553894519806,
      "learning_rate": 5.396901748120425e-06,
      "loss": 0.012,
      "step": 1417160
    },
    {
      "epoch": 2.319246152536936,
      "grad_norm": 0.6558982133865356,
      "learning_rate": 5.396835855906908e-06,
      "loss": 0.0225,
      "step": 1417180
    },
    {
      "epoch": 2.3192788829755897,
      "grad_norm": 0.332566499710083,
      "learning_rate": 5.3967699636933915e-06,
      "loss": 0.0266,
      "step": 1417200
    },
    {
      "epoch": 2.319311613414243,
      "grad_norm": 0.21321731805801392,
      "learning_rate": 5.396704071479873e-06,
      "loss": 0.017,
      "step": 1417220
    },
    {
      "epoch": 2.3193443438528965,
      "grad_norm": 0.15413296222686768,
      "learning_rate": 5.396638179266357e-06,
      "loss": 0.0207,
      "step": 1417240
    },
    {
      "epoch": 2.3193770742915496,
      "grad_norm": 0.13400858640670776,
      "learning_rate": 5.396572287052839e-06,
      "loss": 0.0143,
      "step": 1417260
    },
    {
      "epoch": 2.3194098047302028,
      "grad_norm": 0.13226474821567535,
      "learning_rate": 5.3965063948393224e-06,
      "loss": 0.0128,
      "step": 1417280
    },
    {
      "epoch": 2.3194425351688563,
      "grad_norm": 0.29729339480400085,
      "learning_rate": 5.396440502625805e-06,
      "loss": 0.0114,
      "step": 1417300
    },
    {
      "epoch": 2.3194752656075095,
      "grad_norm": 0.19440393149852753,
      "learning_rate": 5.396374610412288e-06,
      "loss": 0.0106,
      "step": 1417320
    },
    {
      "epoch": 2.319507996046163,
      "grad_norm": 0.5054282546043396,
      "learning_rate": 5.396308718198771e-06,
      "loss": 0.0146,
      "step": 1417340
    },
    {
      "epoch": 2.3195407264848162,
      "grad_norm": 0.21818102896213531,
      "learning_rate": 5.396242825985254e-06,
      "loss": 0.0184,
      "step": 1417360
    },
    {
      "epoch": 2.31957345692347,
      "grad_norm": 1.0911478996276855,
      "learning_rate": 5.396176933771736e-06,
      "loss": 0.0162,
      "step": 1417380
    },
    {
      "epoch": 2.319606187362123,
      "grad_norm": 0.3404054641723633,
      "learning_rate": 5.39611104155822e-06,
      "loss": 0.016,
      "step": 1417400
    },
    {
      "epoch": 2.319638917800776,
      "grad_norm": 0.14943091571331024,
      "learning_rate": 5.396045149344702e-06,
      "loss": 0.0162,
      "step": 1417420
    },
    {
      "epoch": 2.3196716482394297,
      "grad_norm": 0.24679870903491974,
      "learning_rate": 5.395979257131185e-06,
      "loss": 0.0105,
      "step": 1417440
    },
    {
      "epoch": 2.319704378678083,
      "grad_norm": 0.7938473224639893,
      "learning_rate": 5.395913364917668e-06,
      "loss": 0.0205,
      "step": 1417460
    },
    {
      "epoch": 2.3197371091167365,
      "grad_norm": 0.37232962250709534,
      "learning_rate": 5.395847472704151e-06,
      "loss": 0.0171,
      "step": 1417480
    },
    {
      "epoch": 2.3197698395553896,
      "grad_norm": 0.23842644691467285,
      "learning_rate": 5.395781580490634e-06,
      "loss": 0.011,
      "step": 1417500
    },
    {
      "epoch": 2.319802569994043,
      "grad_norm": 0.23988977074623108,
      "learning_rate": 5.395715688277117e-06,
      "loss": 0.0228,
      "step": 1417520
    },
    {
      "epoch": 2.3198353004326964,
      "grad_norm": 0.19666524231433868,
      "learning_rate": 5.3956497960636e-06,
      "loss": 0.0175,
      "step": 1417540
    },
    {
      "epoch": 2.3198680308713495,
      "grad_norm": 0.40670543909072876,
      "learning_rate": 5.3955839038500825e-06,
      "loss": 0.0135,
      "step": 1417560
    },
    {
      "epoch": 2.319900761310003,
      "grad_norm": 0.30755162239074707,
      "learning_rate": 5.395518011636566e-06,
      "loss": 0.0102,
      "step": 1417580
    },
    {
      "epoch": 2.3199334917486563,
      "grad_norm": 0.3666419982910156,
      "learning_rate": 5.395452119423048e-06,
      "loss": 0.0101,
      "step": 1417600
    },
    {
      "epoch": 2.31996622218731,
      "grad_norm": 0.3302433490753174,
      "learning_rate": 5.3953862272095316e-06,
      "loss": 0.014,
      "step": 1417620
    },
    {
      "epoch": 2.319998952625963,
      "grad_norm": 0.11074734479188919,
      "learning_rate": 5.3953203349960134e-06,
      "loss": 0.012,
      "step": 1417640
    },
    {
      "epoch": 2.3200316830646166,
      "grad_norm": 0.41991105675697327,
      "learning_rate": 5.395254442782497e-06,
      "loss": 0.0154,
      "step": 1417660
    },
    {
      "epoch": 2.3200644135032698,
      "grad_norm": 0.7061587572097778,
      "learning_rate": 5.39518855056898e-06,
      "loss": 0.0189,
      "step": 1417680
    },
    {
      "epoch": 2.320097143941923,
      "grad_norm": 0.20207136869430542,
      "learning_rate": 5.3951226583554625e-06,
      "loss": 0.0189,
      "step": 1417700
    },
    {
      "epoch": 2.3201298743805765,
      "grad_norm": 0.265642374753952,
      "learning_rate": 5.395056766141945e-06,
      "loss": 0.0101,
      "step": 1417720
    },
    {
      "epoch": 2.3201626048192296,
      "grad_norm": 0.6642841696739197,
      "learning_rate": 5.394990873928429e-06,
      "loss": 0.0139,
      "step": 1417740
    },
    {
      "epoch": 2.3201953352578832,
      "grad_norm": 0.3287878632545471,
      "learning_rate": 5.394924981714911e-06,
      "loss": 0.0185,
      "step": 1417760
    },
    {
      "epoch": 2.3202280656965364,
      "grad_norm": 0.17448554933071136,
      "learning_rate": 5.394859089501394e-06,
      "loss": 0.0083,
      "step": 1417780
    },
    {
      "epoch": 2.32026079613519,
      "grad_norm": 0.13854330778121948,
      "learning_rate": 5.394793197287876e-06,
      "loss": 0.0132,
      "step": 1417800
    },
    {
      "epoch": 2.320293526573843,
      "grad_norm": 2.1452441215515137,
      "learning_rate": 5.39472730507436e-06,
      "loss": 0.0178,
      "step": 1417820
    },
    {
      "epoch": 2.3203262570124963,
      "grad_norm": 0.07252783328294754,
      "learning_rate": 5.394661412860843e-06,
      "loss": 0.0148,
      "step": 1417840
    },
    {
      "epoch": 2.32035898745115,
      "grad_norm": 0.7128360867500305,
      "learning_rate": 5.394595520647325e-06,
      "loss": 0.0103,
      "step": 1417860
    },
    {
      "epoch": 2.320391717889803,
      "grad_norm": 0.24596485495567322,
      "learning_rate": 5.394529628433809e-06,
      "loss": 0.0148,
      "step": 1417880
    },
    {
      "epoch": 2.3204244483284566,
      "grad_norm": 0.33335357904434204,
      "learning_rate": 5.394463736220292e-06,
      "loss": 0.0191,
      "step": 1417900
    },
    {
      "epoch": 2.3204571787671098,
      "grad_norm": 0.2919042408466339,
      "learning_rate": 5.394397844006774e-06,
      "loss": 0.0142,
      "step": 1417920
    },
    {
      "epoch": 2.3204899092057634,
      "grad_norm": 0.6753813624382019,
      "learning_rate": 5.394331951793257e-06,
      "loss": 0.0122,
      "step": 1417940
    },
    {
      "epoch": 2.3205226396444165,
      "grad_norm": 0.5250923037528992,
      "learning_rate": 5.394266059579741e-06,
      "loss": 0.01,
      "step": 1417960
    },
    {
      "epoch": 2.3205553700830697,
      "grad_norm": 0.49782174825668335,
      "learning_rate": 5.3942001673662226e-06,
      "loss": 0.0199,
      "step": 1417980
    },
    {
      "epoch": 2.3205881005217233,
      "grad_norm": 0.4607138931751251,
      "learning_rate": 5.394134275152706e-06,
      "loss": 0.0201,
      "step": 1418000
    },
    {
      "epoch": 2.3206208309603764,
      "grad_norm": 0.8541265726089478,
      "learning_rate": 5.394068382939188e-06,
      "loss": 0.0225,
      "step": 1418020
    },
    {
      "epoch": 2.32065356139903,
      "grad_norm": 0.12453607469797134,
      "learning_rate": 5.394002490725672e-06,
      "loss": 0.0119,
      "step": 1418040
    },
    {
      "epoch": 2.320686291837683,
      "grad_norm": 0.6814871430397034,
      "learning_rate": 5.3939365985121535e-06,
      "loss": 0.0208,
      "step": 1418060
    },
    {
      "epoch": 2.3207190222763368,
      "grad_norm": 0.4814896881580353,
      "learning_rate": 5.393870706298637e-06,
      "loss": 0.0144,
      "step": 1418080
    },
    {
      "epoch": 2.32075175271499,
      "grad_norm": 0.32866260409355164,
      "learning_rate": 5.39380481408512e-06,
      "loss": 0.0215,
      "step": 1418100
    },
    {
      "epoch": 2.320784483153643,
      "grad_norm": 0.15669786930084229,
      "learning_rate": 5.393738921871603e-06,
      "loss": 0.012,
      "step": 1418120
    },
    {
      "epoch": 2.3208172135922966,
      "grad_norm": 0.8130503296852112,
      "learning_rate": 5.393673029658085e-06,
      "loss": 0.0151,
      "step": 1418140
    },
    {
      "epoch": 2.32084994403095,
      "grad_norm": 0.9370134472846985,
      "learning_rate": 5.393607137444569e-06,
      "loss": 0.014,
      "step": 1418160
    },
    {
      "epoch": 2.3208826744696034,
      "grad_norm": 0.2169084995985031,
      "learning_rate": 5.393541245231051e-06,
      "loss": 0.0177,
      "step": 1418180
    },
    {
      "epoch": 2.3209154049082565,
      "grad_norm": 0.20686174929141998,
      "learning_rate": 5.393475353017534e-06,
      "loss": 0.0181,
      "step": 1418200
    },
    {
      "epoch": 2.32094813534691,
      "grad_norm": 0.38156214356422424,
      "learning_rate": 5.393409460804018e-06,
      "loss": 0.0121,
      "step": 1418220
    },
    {
      "epoch": 2.3209808657855633,
      "grad_norm": 0.1382461041212082,
      "learning_rate": 5.3933435685905e-06,
      "loss": 0.0209,
      "step": 1418240
    },
    {
      "epoch": 2.3210135962242164,
      "grad_norm": 0.42538049817085266,
      "learning_rate": 5.3932776763769835e-06,
      "loss": 0.012,
      "step": 1418260
    },
    {
      "epoch": 2.32104632666287,
      "grad_norm": 0.47887927293777466,
      "learning_rate": 5.393211784163465e-06,
      "loss": 0.0172,
      "step": 1418280
    },
    {
      "epoch": 2.321079057101523,
      "grad_norm": 0.32492795586586,
      "learning_rate": 5.393145891949949e-06,
      "loss": 0.0143,
      "step": 1418300
    },
    {
      "epoch": 2.3211117875401768,
      "grad_norm": 0.2717290222644806,
      "learning_rate": 5.393079999736432e-06,
      "loss": 0.0161,
      "step": 1418320
    },
    {
      "epoch": 2.32114451797883,
      "grad_norm": 0.7371187806129456,
      "learning_rate": 5.393014107522914e-06,
      "loss": 0.0189,
      "step": 1418340
    },
    {
      "epoch": 2.3211772484174835,
      "grad_norm": 0.3055536150932312,
      "learning_rate": 5.392948215309397e-06,
      "loss": 0.0199,
      "step": 1418360
    },
    {
      "epoch": 2.3212099788561367,
      "grad_norm": 0.5881869792938232,
      "learning_rate": 5.392882323095881e-06,
      "loss": 0.0134,
      "step": 1418380
    },
    {
      "epoch": 2.32124270929479,
      "grad_norm": 0.253816157579422,
      "learning_rate": 5.392816430882363e-06,
      "loss": 0.019,
      "step": 1418400
    },
    {
      "epoch": 2.3212754397334434,
      "grad_norm": 0.2830076813697815,
      "learning_rate": 5.392750538668846e-06,
      "loss": 0.0144,
      "step": 1418420
    },
    {
      "epoch": 2.3213081701720966,
      "grad_norm": 0.26253166794776917,
      "learning_rate": 5.392684646455328e-06,
      "loss": 0.0142,
      "step": 1418440
    },
    {
      "epoch": 2.32134090061075,
      "grad_norm": 0.8557447791099548,
      "learning_rate": 5.392618754241812e-06,
      "loss": 0.012,
      "step": 1418460
    },
    {
      "epoch": 2.3213736310494033,
      "grad_norm": 0.5827296376228333,
      "learning_rate": 5.3925528620282944e-06,
      "loss": 0.0155,
      "step": 1418480
    },
    {
      "epoch": 2.321406361488057,
      "grad_norm": 0.5813325047492981,
      "learning_rate": 5.392486969814777e-06,
      "loss": 0.0176,
      "step": 1418500
    },
    {
      "epoch": 2.32143909192671,
      "grad_norm": 0.17744851112365723,
      "learning_rate": 5.39242107760126e-06,
      "loss": 0.017,
      "step": 1418520
    },
    {
      "epoch": 2.321471822365363,
      "grad_norm": 0.5281419157981873,
      "learning_rate": 5.3923551853877435e-06,
      "loss": 0.0183,
      "step": 1418540
    },
    {
      "epoch": 2.321504552804017,
      "grad_norm": 0.07817614823579788,
      "learning_rate": 5.392289293174226e-06,
      "loss": 0.0134,
      "step": 1418560
    },
    {
      "epoch": 2.32153728324267,
      "grad_norm": 0.8631017804145813,
      "learning_rate": 5.392223400960709e-06,
      "loss": 0.0196,
      "step": 1418580
    },
    {
      "epoch": 2.3215700136813235,
      "grad_norm": 0.24039705097675323,
      "learning_rate": 5.3921575087471926e-06,
      "loss": 0.0138,
      "step": 1418600
    },
    {
      "epoch": 2.3216027441199767,
      "grad_norm": 0.42574208974838257,
      "learning_rate": 5.3920916165336745e-06,
      "loss": 0.0167,
      "step": 1418620
    },
    {
      "epoch": 2.32163547455863,
      "grad_norm": 0.22008362412452698,
      "learning_rate": 5.392025724320158e-06,
      "loss": 0.0121,
      "step": 1418640
    },
    {
      "epoch": 2.3216682049972834,
      "grad_norm": 0.4059043228626251,
      "learning_rate": 5.39195983210664e-06,
      "loss": 0.0144,
      "step": 1418660
    },
    {
      "epoch": 2.3217009354359366,
      "grad_norm": 0.2065282166004181,
      "learning_rate": 5.3918939398931235e-06,
      "loss": 0.0204,
      "step": 1418680
    },
    {
      "epoch": 2.32173366587459,
      "grad_norm": 0.6195563673973083,
      "learning_rate": 5.391828047679606e-06,
      "loss": 0.0131,
      "step": 1418700
    },
    {
      "epoch": 2.3217663963132433,
      "grad_norm": 0.5220193266868591,
      "learning_rate": 5.391762155466089e-06,
      "loss": 0.0167,
      "step": 1418720
    },
    {
      "epoch": 2.3217991267518965,
      "grad_norm": 0.7022734880447388,
      "learning_rate": 5.391696263252572e-06,
      "loss": 0.0198,
      "step": 1418740
    },
    {
      "epoch": 2.32183185719055,
      "grad_norm": 0.7861403226852417,
      "learning_rate": 5.391630371039055e-06,
      "loss": 0.0189,
      "step": 1418760
    },
    {
      "epoch": 2.3218645876292032,
      "grad_norm": 1.7184429168701172,
      "learning_rate": 5.391564478825537e-06,
      "loss": 0.0182,
      "step": 1418780
    },
    {
      "epoch": 2.321897318067857,
      "grad_norm": 0.29518967866897583,
      "learning_rate": 5.391498586612021e-06,
      "loss": 0.0154,
      "step": 1418800
    },
    {
      "epoch": 2.32193004850651,
      "grad_norm": 0.9362333416938782,
      "learning_rate": 5.391432694398503e-06,
      "loss": 0.0181,
      "step": 1418820
    },
    {
      "epoch": 2.3219627789451636,
      "grad_norm": 0.22212879359722137,
      "learning_rate": 5.391366802184986e-06,
      "loss": 0.0098,
      "step": 1418840
    },
    {
      "epoch": 2.3219955093838167,
      "grad_norm": 0.10671466588973999,
      "learning_rate": 5.391300909971468e-06,
      "loss": 0.017,
      "step": 1418860
    },
    {
      "epoch": 2.32202823982247,
      "grad_norm": 0.32622289657592773,
      "learning_rate": 5.391235017757952e-06,
      "loss": 0.0186,
      "step": 1418880
    },
    {
      "epoch": 2.3220609702611235,
      "grad_norm": 0.32447004318237305,
      "learning_rate": 5.391169125544435e-06,
      "loss": 0.0164,
      "step": 1418900
    },
    {
      "epoch": 2.3220937006997766,
      "grad_norm": 0.5945071578025818,
      "learning_rate": 5.391103233330917e-06,
      "loss": 0.0126,
      "step": 1418920
    },
    {
      "epoch": 2.32212643113843,
      "grad_norm": 0.4101426303386688,
      "learning_rate": 5.391037341117401e-06,
      "loss": 0.0168,
      "step": 1418940
    },
    {
      "epoch": 2.3221591615770834,
      "grad_norm": 0.16848525404930115,
      "learning_rate": 5.390971448903884e-06,
      "loss": 0.0143,
      "step": 1418960
    },
    {
      "epoch": 2.322191892015737,
      "grad_norm": 0.7193607091903687,
      "learning_rate": 5.390905556690366e-06,
      "loss": 0.0195,
      "step": 1418980
    },
    {
      "epoch": 2.32222462245439,
      "grad_norm": 0.5569924712181091,
      "learning_rate": 5.390839664476849e-06,
      "loss": 0.015,
      "step": 1419000
    },
    {
      "epoch": 2.3222573528930432,
      "grad_norm": 0.36661291122436523,
      "learning_rate": 5.390773772263333e-06,
      "loss": 0.0101,
      "step": 1419020
    },
    {
      "epoch": 2.322290083331697,
      "grad_norm": 0.4001339077949524,
      "learning_rate": 5.3907078800498145e-06,
      "loss": 0.0126,
      "step": 1419040
    },
    {
      "epoch": 2.32232281377035,
      "grad_norm": 0.7086851596832275,
      "learning_rate": 5.390641987836298e-06,
      "loss": 0.0143,
      "step": 1419060
    },
    {
      "epoch": 2.3223555442090036,
      "grad_norm": 0.2843543291091919,
      "learning_rate": 5.39057609562278e-06,
      "loss": 0.0233,
      "step": 1419080
    },
    {
      "epoch": 2.3223882746476567,
      "grad_norm": 1.1117069721221924,
      "learning_rate": 5.390510203409264e-06,
      "loss": 0.0246,
      "step": 1419100
    },
    {
      "epoch": 2.3224210050863103,
      "grad_norm": 0.11930114775896072,
      "learning_rate": 5.390444311195746e-06,
      "loss": 0.013,
      "step": 1419120
    },
    {
      "epoch": 2.3224537355249635,
      "grad_norm": 0.22574599087238312,
      "learning_rate": 5.390378418982229e-06,
      "loss": 0.0106,
      "step": 1419140
    },
    {
      "epoch": 2.3224864659636166,
      "grad_norm": 0.11806761473417282,
      "learning_rate": 5.390312526768712e-06,
      "loss": 0.0169,
      "step": 1419160
    },
    {
      "epoch": 2.3225191964022702,
      "grad_norm": 0.35758888721466064,
      "learning_rate": 5.390246634555195e-06,
      "loss": 0.0165,
      "step": 1419180
    },
    {
      "epoch": 2.3225519268409234,
      "grad_norm": 0.7056600451469421,
      "learning_rate": 5.390180742341677e-06,
      "loss": 0.0169,
      "step": 1419200
    },
    {
      "epoch": 2.322584657279577,
      "grad_norm": 0.32648545503616333,
      "learning_rate": 5.390114850128161e-06,
      "loss": 0.0164,
      "step": 1419220
    },
    {
      "epoch": 2.32261738771823,
      "grad_norm": 0.3189582824707031,
      "learning_rate": 5.3900489579146445e-06,
      "loss": 0.0176,
      "step": 1419240
    },
    {
      "epoch": 2.3226501181568837,
      "grad_norm": 0.27661848068237305,
      "learning_rate": 5.389983065701126e-06,
      "loss": 0.0172,
      "step": 1419260
    },
    {
      "epoch": 2.322682848595537,
      "grad_norm": 0.14763730764389038,
      "learning_rate": 5.38991717348761e-06,
      "loss": 0.0157,
      "step": 1419280
    },
    {
      "epoch": 2.32271557903419,
      "grad_norm": 0.1970384567975998,
      "learning_rate": 5.389851281274092e-06,
      "loss": 0.0183,
      "step": 1419300
    },
    {
      "epoch": 2.3227483094728436,
      "grad_norm": 0.2036718726158142,
      "learning_rate": 5.3897853890605754e-06,
      "loss": 0.0167,
      "step": 1419320
    },
    {
      "epoch": 2.3227810399114968,
      "grad_norm": 0.11053907871246338,
      "learning_rate": 5.389719496847058e-06,
      "loss": 0.0147,
      "step": 1419340
    },
    {
      "epoch": 2.3228137703501504,
      "grad_norm": 0.5794406533241272,
      "learning_rate": 5.389653604633541e-06,
      "loss": 0.0122,
      "step": 1419360
    },
    {
      "epoch": 2.3228465007888035,
      "grad_norm": 0.18471750617027283,
      "learning_rate": 5.389587712420024e-06,
      "loss": 0.014,
      "step": 1419380
    },
    {
      "epoch": 2.322879231227457,
      "grad_norm": 0.37398430705070496,
      "learning_rate": 5.389521820206507e-06,
      "loss": 0.0196,
      "step": 1419400
    },
    {
      "epoch": 2.3229119616661102,
      "grad_norm": 0.3222140669822693,
      "learning_rate": 5.389455927992989e-06,
      "loss": 0.0131,
      "step": 1419420
    },
    {
      "epoch": 2.3229446921047634,
      "grad_norm": 0.8969058990478516,
      "learning_rate": 5.389390035779473e-06,
      "loss": 0.0172,
      "step": 1419440
    },
    {
      "epoch": 2.322977422543417,
      "grad_norm": 0.410033643245697,
      "learning_rate": 5.389324143565955e-06,
      "loss": 0.0156,
      "step": 1419460
    },
    {
      "epoch": 2.32301015298207,
      "grad_norm": 0.8916215300559998,
      "learning_rate": 5.389258251352438e-06,
      "loss": 0.0174,
      "step": 1419480
    },
    {
      "epoch": 2.3230428834207237,
      "grad_norm": 0.47686418890953064,
      "learning_rate": 5.389192359138921e-06,
      "loss": 0.0158,
      "step": 1419500
    },
    {
      "epoch": 2.323075613859377,
      "grad_norm": 0.12276103347539902,
      "learning_rate": 5.389126466925404e-06,
      "loss": 0.0162,
      "step": 1419520
    },
    {
      "epoch": 2.3231083442980305,
      "grad_norm": 0.22740083932876587,
      "learning_rate": 5.3890605747118864e-06,
      "loss": 0.0132,
      "step": 1419540
    },
    {
      "epoch": 2.3231410747366836,
      "grad_norm": 0.7129830121994019,
      "learning_rate": 5.38899468249837e-06,
      "loss": 0.0203,
      "step": 1419560
    },
    {
      "epoch": 2.323173805175337,
      "grad_norm": 1.0171440839767456,
      "learning_rate": 5.388928790284852e-06,
      "loss": 0.0207,
      "step": 1419580
    },
    {
      "epoch": 2.3232065356139904,
      "grad_norm": 0.16612346470355988,
      "learning_rate": 5.3888628980713355e-06,
      "loss": 0.0165,
      "step": 1419600
    },
    {
      "epoch": 2.3232392660526435,
      "grad_norm": 1.1128593683242798,
      "learning_rate": 5.388797005857819e-06,
      "loss": 0.0181,
      "step": 1419620
    },
    {
      "epoch": 2.323271996491297,
      "grad_norm": 0.5642328858375549,
      "learning_rate": 5.388731113644301e-06,
      "loss": 0.02,
      "step": 1419640
    },
    {
      "epoch": 2.3233047269299503,
      "grad_norm": 0.38861238956451416,
      "learning_rate": 5.3886652214307846e-06,
      "loss": 0.0142,
      "step": 1419660
    },
    {
      "epoch": 2.323337457368604,
      "grad_norm": 1.05413019657135,
      "learning_rate": 5.3885993292172665e-06,
      "loss": 0.021,
      "step": 1419680
    },
    {
      "epoch": 2.323370187807257,
      "grad_norm": 0.23659175634384155,
      "learning_rate": 5.38853343700375e-06,
      "loss": 0.0203,
      "step": 1419700
    },
    {
      "epoch": 2.32340291824591,
      "grad_norm": 0.584882915019989,
      "learning_rate": 5.388467544790233e-06,
      "loss": 0.0149,
      "step": 1419720
    },
    {
      "epoch": 2.3234356486845638,
      "grad_norm": 1.9785771369934082,
      "learning_rate": 5.3884016525767155e-06,
      "loss": 0.0132,
      "step": 1419740
    },
    {
      "epoch": 2.323468379123217,
      "grad_norm": 0.33946919441223145,
      "learning_rate": 5.388335760363198e-06,
      "loss": 0.0154,
      "step": 1419760
    },
    {
      "epoch": 2.3235011095618705,
      "grad_norm": 0.25237610936164856,
      "learning_rate": 5.388269868149682e-06,
      "loss": 0.0154,
      "step": 1419780
    },
    {
      "epoch": 2.3235338400005237,
      "grad_norm": 0.6731129884719849,
      "learning_rate": 5.388203975936164e-06,
      "loss": 0.012,
      "step": 1419800
    },
    {
      "epoch": 2.3235665704391772,
      "grad_norm": 1.6621603965759277,
      "learning_rate": 5.388138083722647e-06,
      "loss": 0.0167,
      "step": 1419820
    },
    {
      "epoch": 2.3235993008778304,
      "grad_norm": 0.3296600580215454,
      "learning_rate": 5.388072191509129e-06,
      "loss": 0.0178,
      "step": 1419840
    },
    {
      "epoch": 2.3236320313164835,
      "grad_norm": 0.5179660320281982,
      "learning_rate": 5.388006299295613e-06,
      "loss": 0.0109,
      "step": 1419860
    },
    {
      "epoch": 2.323664761755137,
      "grad_norm": 0.041536297649145126,
      "learning_rate": 5.387940407082095e-06,
      "loss": 0.0142,
      "step": 1419880
    },
    {
      "epoch": 2.3236974921937903,
      "grad_norm": 0.09609352797269821,
      "learning_rate": 5.387874514868578e-06,
      "loss": 0.011,
      "step": 1419900
    },
    {
      "epoch": 2.323730222632444,
      "grad_norm": 0.5058188438415527,
      "learning_rate": 5.387808622655061e-06,
      "loss": 0.0097,
      "step": 1419920
    },
    {
      "epoch": 2.323762953071097,
      "grad_norm": 0.21517664194107056,
      "learning_rate": 5.387742730441544e-06,
      "loss": 0.0184,
      "step": 1419940
    },
    {
      "epoch": 2.3237956835097506,
      "grad_norm": 0.5222682356834412,
      "learning_rate": 5.387676838228027e-06,
      "loss": 0.0128,
      "step": 1419960
    },
    {
      "epoch": 2.323828413948404,
      "grad_norm": 0.3524622321128845,
      "learning_rate": 5.38761094601451e-06,
      "loss": 0.0155,
      "step": 1419980
    },
    {
      "epoch": 2.323861144387057,
      "grad_norm": 0.2228950411081314,
      "learning_rate": 5.387545053800993e-06,
      "loss": 0.01,
      "step": 1420000
    },
    {
      "epoch": 2.3238938748257105,
      "grad_norm": 0.9313310980796814,
      "learning_rate": 5.3874791615874756e-06,
      "loss": 0.0164,
      "step": 1420020
    },
    {
      "epoch": 2.3239266052643637,
      "grad_norm": 0.7988677620887756,
      "learning_rate": 5.387413269373959e-06,
      "loss": 0.0146,
      "step": 1420040
    },
    {
      "epoch": 2.3239593357030173,
      "grad_norm": 0.5962759852409363,
      "learning_rate": 5.387347377160441e-06,
      "loss": 0.01,
      "step": 1420060
    },
    {
      "epoch": 2.3239920661416704,
      "grad_norm": 0.11257678270339966,
      "learning_rate": 5.387281484946925e-06,
      "loss": 0.01,
      "step": 1420080
    },
    {
      "epoch": 2.324024796580324,
      "grad_norm": 0.18850091099739075,
      "learning_rate": 5.3872155927334065e-06,
      "loss": 0.0172,
      "step": 1420100
    },
    {
      "epoch": 2.324057527018977,
      "grad_norm": 0.1655891239643097,
      "learning_rate": 5.38714970051989e-06,
      "loss": 0.0266,
      "step": 1420120
    },
    {
      "epoch": 2.3240902574576303,
      "grad_norm": 0.20035839080810547,
      "learning_rate": 5.387083808306373e-06,
      "loss": 0.0244,
      "step": 1420140
    },
    {
      "epoch": 2.324122987896284,
      "grad_norm": 1.0820804834365845,
      "learning_rate": 5.387017916092856e-06,
      "loss": 0.0165,
      "step": 1420160
    },
    {
      "epoch": 2.324155718334937,
      "grad_norm": 0.1379626840353012,
      "learning_rate": 5.386952023879338e-06,
      "loss": 0.0094,
      "step": 1420180
    },
    {
      "epoch": 2.3241884487735907,
      "grad_norm": 0.4460715055465698,
      "learning_rate": 5.386886131665822e-06,
      "loss": 0.0163,
      "step": 1420200
    },
    {
      "epoch": 2.324221179212244,
      "grad_norm": 0.44857773184776306,
      "learning_rate": 5.386820239452304e-06,
      "loss": 0.0119,
      "step": 1420220
    },
    {
      "epoch": 2.324253909650897,
      "grad_norm": 2.170711040496826,
      "learning_rate": 5.386754347238787e-06,
      "loss": 0.0223,
      "step": 1420240
    },
    {
      "epoch": 2.3242866400895505,
      "grad_norm": 0.506956934928894,
      "learning_rate": 5.386688455025269e-06,
      "loss": 0.0165,
      "step": 1420260
    },
    {
      "epoch": 2.3243193705282037,
      "grad_norm": 0.19681257009506226,
      "learning_rate": 5.386622562811753e-06,
      "loss": 0.0138,
      "step": 1420280
    },
    {
      "epoch": 2.3243521009668573,
      "grad_norm": 0.3713034391403198,
      "learning_rate": 5.3865566705982365e-06,
      "loss": 0.0192,
      "step": 1420300
    },
    {
      "epoch": 2.3243848314055104,
      "grad_norm": 0.34884393215179443,
      "learning_rate": 5.386490778384718e-06,
      "loss": 0.0176,
      "step": 1420320
    },
    {
      "epoch": 2.3244175618441636,
      "grad_norm": 0.041761480271816254,
      "learning_rate": 5.386424886171202e-06,
      "loss": 0.0136,
      "step": 1420340
    },
    {
      "epoch": 2.324450292282817,
      "grad_norm": 1.0595035552978516,
      "learning_rate": 5.386358993957685e-06,
      "loss": 0.0233,
      "step": 1420360
    },
    {
      "epoch": 2.3244830227214703,
      "grad_norm": 0.28821006417274475,
      "learning_rate": 5.3862931017441674e-06,
      "loss": 0.0135,
      "step": 1420380
    },
    {
      "epoch": 2.324515753160124,
      "grad_norm": 0.13176612555980682,
      "learning_rate": 5.38622720953065e-06,
      "loss": 0.0145,
      "step": 1420400
    },
    {
      "epoch": 2.324548483598777,
      "grad_norm": 0.2397785186767578,
      "learning_rate": 5.386161317317134e-06,
      "loss": 0.0178,
      "step": 1420420
    },
    {
      "epoch": 2.3245812140374307,
      "grad_norm": 0.11711479723453522,
      "learning_rate": 5.386095425103616e-06,
      "loss": 0.0137,
      "step": 1420440
    },
    {
      "epoch": 2.324613944476084,
      "grad_norm": 0.11973840743303299,
      "learning_rate": 5.386029532890099e-06,
      "loss": 0.0128,
      "step": 1420460
    },
    {
      "epoch": 2.324646674914737,
      "grad_norm": 0.27547457814216614,
      "learning_rate": 5.385963640676581e-06,
      "loss": 0.0086,
      "step": 1420480
    },
    {
      "epoch": 2.3246794053533906,
      "grad_norm": 0.905655562877655,
      "learning_rate": 5.385897748463065e-06,
      "loss": 0.0155,
      "step": 1420500
    },
    {
      "epoch": 2.3247121357920437,
      "grad_norm": 0.41547703742980957,
      "learning_rate": 5.3858318562495474e-06,
      "loss": 0.0157,
      "step": 1420520
    },
    {
      "epoch": 2.3247448662306973,
      "grad_norm": 0.4303288757801056,
      "learning_rate": 5.38576596403603e-06,
      "loss": 0.0105,
      "step": 1420540
    },
    {
      "epoch": 2.3247775966693505,
      "grad_norm": 0.08682273328304291,
      "learning_rate": 5.385700071822513e-06,
      "loss": 0.0163,
      "step": 1420560
    },
    {
      "epoch": 2.324810327108004,
      "grad_norm": 0.5238895416259766,
      "learning_rate": 5.3856341796089965e-06,
      "loss": 0.0153,
      "step": 1420580
    },
    {
      "epoch": 2.324843057546657,
      "grad_norm": 0.5100589990615845,
      "learning_rate": 5.385568287395478e-06,
      "loss": 0.0194,
      "step": 1420600
    },
    {
      "epoch": 2.3248757879853104,
      "grad_norm": 0.8534764051437378,
      "learning_rate": 5.385502395181962e-06,
      "loss": 0.0222,
      "step": 1420620
    },
    {
      "epoch": 2.324908518423964,
      "grad_norm": 0.2695326507091522,
      "learning_rate": 5.385436502968444e-06,
      "loss": 0.0147,
      "step": 1420640
    },
    {
      "epoch": 2.324941248862617,
      "grad_norm": 0.40094897150993347,
      "learning_rate": 5.3853706107549275e-06,
      "loss": 0.017,
      "step": 1420660
    },
    {
      "epoch": 2.3249739793012707,
      "grad_norm": 0.1633831411600113,
      "learning_rate": 5.385304718541411e-06,
      "loss": 0.0142,
      "step": 1420680
    },
    {
      "epoch": 2.325006709739924,
      "grad_norm": 0.17809094488620758,
      "learning_rate": 5.385238826327893e-06,
      "loss": 0.0125,
      "step": 1420700
    },
    {
      "epoch": 2.3250394401785774,
      "grad_norm": 0.09311935305595398,
      "learning_rate": 5.3851729341143765e-06,
      "loss": 0.0118,
      "step": 1420720
    },
    {
      "epoch": 2.3250721706172306,
      "grad_norm": 1.4068467617034912,
      "learning_rate": 5.385107041900859e-06,
      "loss": 0.0107,
      "step": 1420740
    },
    {
      "epoch": 2.3251049010558837,
      "grad_norm": 0.2741059958934784,
      "learning_rate": 5.385041149687342e-06,
      "loss": 0.0131,
      "step": 1420760
    },
    {
      "epoch": 2.3251376314945373,
      "grad_norm": 0.5645964741706848,
      "learning_rate": 5.384975257473825e-06,
      "loss": 0.017,
      "step": 1420780
    },
    {
      "epoch": 2.3251703619331905,
      "grad_norm": 1.8014678955078125,
      "learning_rate": 5.384909365260308e-06,
      "loss": 0.0159,
      "step": 1420800
    },
    {
      "epoch": 2.325203092371844,
      "grad_norm": 0.8320713639259338,
      "learning_rate": 5.38484347304679e-06,
      "loss": 0.0132,
      "step": 1420820
    },
    {
      "epoch": 2.3252358228104972,
      "grad_norm": 0.1825837343931198,
      "learning_rate": 5.384777580833274e-06,
      "loss": 0.0104,
      "step": 1420840
    },
    {
      "epoch": 2.325268553249151,
      "grad_norm": 0.6168051958084106,
      "learning_rate": 5.384711688619756e-06,
      "loss": 0.0108,
      "step": 1420860
    },
    {
      "epoch": 2.325301283687804,
      "grad_norm": 0.30744898319244385,
      "learning_rate": 5.384645796406239e-06,
      "loss": 0.0146,
      "step": 1420880
    },
    {
      "epoch": 2.325334014126457,
      "grad_norm": 0.12054499983787537,
      "learning_rate": 5.384579904192721e-06,
      "loss": 0.0097,
      "step": 1420900
    },
    {
      "epoch": 2.3253667445651107,
      "grad_norm": 0.17129839956760406,
      "learning_rate": 5.384514011979205e-06,
      "loss": 0.0147,
      "step": 1420920
    },
    {
      "epoch": 2.325399475003764,
      "grad_norm": 0.46716663241386414,
      "learning_rate": 5.3844481197656875e-06,
      "loss": 0.0152,
      "step": 1420940
    },
    {
      "epoch": 2.3254322054424175,
      "grad_norm": 0.3796814978122711,
      "learning_rate": 5.38438222755217e-06,
      "loss": 0.0197,
      "step": 1420960
    },
    {
      "epoch": 2.3254649358810706,
      "grad_norm": 0.23321674764156342,
      "learning_rate": 5.384316335338653e-06,
      "loss": 0.0171,
      "step": 1420980
    },
    {
      "epoch": 2.325497666319724,
      "grad_norm": 1.3797733783721924,
      "learning_rate": 5.384250443125137e-06,
      "loss": 0.0185,
      "step": 1421000
    },
    {
      "epoch": 2.3255303967583774,
      "grad_norm": 0.4280204474925995,
      "learning_rate": 5.384184550911619e-06,
      "loss": 0.0109,
      "step": 1421020
    },
    {
      "epoch": 2.3255631271970305,
      "grad_norm": 1.782711148262024,
      "learning_rate": 5.384118658698102e-06,
      "loss": 0.0283,
      "step": 1421040
    },
    {
      "epoch": 2.325595857635684,
      "grad_norm": 0.48155802488327026,
      "learning_rate": 5.384052766484586e-06,
      "loss": 0.0161,
      "step": 1421060
    },
    {
      "epoch": 2.3256285880743373,
      "grad_norm": 0.27860650420188904,
      "learning_rate": 5.3839868742710676e-06,
      "loss": 0.0122,
      "step": 1421080
    },
    {
      "epoch": 2.325661318512991,
      "grad_norm": 0.6226668953895569,
      "learning_rate": 5.383920982057551e-06,
      "loss": 0.016,
      "step": 1421100
    },
    {
      "epoch": 2.325694048951644,
      "grad_norm": 0.4090602993965149,
      "learning_rate": 5.383855089844033e-06,
      "loss": 0.012,
      "step": 1421120
    },
    {
      "epoch": 2.3257267793902976,
      "grad_norm": 0.3586188852787018,
      "learning_rate": 5.383789197630517e-06,
      "loss": 0.0164,
      "step": 1421140
    },
    {
      "epoch": 2.3257595098289507,
      "grad_norm": 1.0959910154342651,
      "learning_rate": 5.383723305416999e-06,
      "loss": 0.0198,
      "step": 1421160
    },
    {
      "epoch": 2.325792240267604,
      "grad_norm": 0.5241464376449585,
      "learning_rate": 5.383657413203482e-06,
      "loss": 0.0146,
      "step": 1421180
    },
    {
      "epoch": 2.3258249707062575,
      "grad_norm": 0.289429634809494,
      "learning_rate": 5.383591520989965e-06,
      "loss": 0.0202,
      "step": 1421200
    },
    {
      "epoch": 2.3258577011449106,
      "grad_norm": 0.16975291073322296,
      "learning_rate": 5.383525628776448e-06,
      "loss": 0.0141,
      "step": 1421220
    },
    {
      "epoch": 2.3258904315835642,
      "grad_norm": 0.2158515453338623,
      "learning_rate": 5.38345973656293e-06,
      "loss": 0.0124,
      "step": 1421240
    },
    {
      "epoch": 2.3259231620222174,
      "grad_norm": 0.4166245758533478,
      "learning_rate": 5.383393844349414e-06,
      "loss": 0.0149,
      "step": 1421260
    },
    {
      "epoch": 2.325955892460871,
      "grad_norm": 0.23827244341373444,
      "learning_rate": 5.383327952135896e-06,
      "loss": 0.0143,
      "step": 1421280
    },
    {
      "epoch": 2.325988622899524,
      "grad_norm": 0.3219594955444336,
      "learning_rate": 5.383262059922379e-06,
      "loss": 0.0109,
      "step": 1421300
    },
    {
      "epoch": 2.3260213533381773,
      "grad_norm": 0.13330571353435516,
      "learning_rate": 5.383196167708862e-06,
      "loss": 0.0144,
      "step": 1421320
    },
    {
      "epoch": 2.326054083776831,
      "grad_norm": 0.1576777994632721,
      "learning_rate": 5.383130275495345e-06,
      "loss": 0.009,
      "step": 1421340
    },
    {
      "epoch": 2.326086814215484,
      "grad_norm": 1.3270585536956787,
      "learning_rate": 5.3830643832818284e-06,
      "loss": 0.018,
      "step": 1421360
    },
    {
      "epoch": 2.3261195446541376,
      "grad_norm": 0.11966169625520706,
      "learning_rate": 5.382998491068311e-06,
      "loss": 0.0104,
      "step": 1421380
    },
    {
      "epoch": 2.3261522750927908,
      "grad_norm": 0.09751661121845245,
      "learning_rate": 5.382932598854794e-06,
      "loss": 0.0133,
      "step": 1421400
    },
    {
      "epoch": 2.3261850055314444,
      "grad_norm": 0.23724369704723358,
      "learning_rate": 5.382866706641277e-06,
      "loss": 0.012,
      "step": 1421420
    },
    {
      "epoch": 2.3262177359700975,
      "grad_norm": 0.12538202106952667,
      "learning_rate": 5.38280081442776e-06,
      "loss": 0.01,
      "step": 1421440
    },
    {
      "epoch": 2.3262504664087507,
      "grad_norm": 0.4774342477321625,
      "learning_rate": 5.382734922214242e-06,
      "loss": 0.0144,
      "step": 1421460
    },
    {
      "epoch": 2.3262831968474043,
      "grad_norm": 0.5598990321159363,
      "learning_rate": 5.382669030000726e-06,
      "loss": 0.0148,
      "step": 1421480
    },
    {
      "epoch": 2.3263159272860574,
      "grad_norm": 0.19350364804267883,
      "learning_rate": 5.382603137787208e-06,
      "loss": 0.016,
      "step": 1421500
    },
    {
      "epoch": 2.326348657724711,
      "grad_norm": 0.39084938168525696,
      "learning_rate": 5.382537245573691e-06,
      "loss": 0.0203,
      "step": 1421520
    },
    {
      "epoch": 2.326381388163364,
      "grad_norm": 0.36568477749824524,
      "learning_rate": 5.382471353360174e-06,
      "loss": 0.0137,
      "step": 1421540
    },
    {
      "epoch": 2.3264141186020177,
      "grad_norm": 0.1089358851313591,
      "learning_rate": 5.382405461146657e-06,
      "loss": 0.0094,
      "step": 1421560
    },
    {
      "epoch": 2.326446849040671,
      "grad_norm": 0.4102626442909241,
      "learning_rate": 5.3823395689331394e-06,
      "loss": 0.014,
      "step": 1421580
    },
    {
      "epoch": 2.326479579479324,
      "grad_norm": 0.15060211718082428,
      "learning_rate": 5.382273676719623e-06,
      "loss": 0.012,
      "step": 1421600
    },
    {
      "epoch": 2.3265123099179776,
      "grad_norm": 0.2374887764453888,
      "learning_rate": 5.382207784506105e-06,
      "loss": 0.0115,
      "step": 1421620
    },
    {
      "epoch": 2.326545040356631,
      "grad_norm": 0.2889058291912079,
      "learning_rate": 5.3821418922925885e-06,
      "loss": 0.0129,
      "step": 1421640
    },
    {
      "epoch": 2.3265777707952844,
      "grad_norm": 0.49222812056541443,
      "learning_rate": 5.38207600007907e-06,
      "loss": 0.0138,
      "step": 1421660
    },
    {
      "epoch": 2.3266105012339375,
      "grad_norm": 0.13250814378261566,
      "learning_rate": 5.382010107865554e-06,
      "loss": 0.0162,
      "step": 1421680
    },
    {
      "epoch": 2.3266432316725907,
      "grad_norm": 0.13631221652030945,
      "learning_rate": 5.381944215652036e-06,
      "loss": 0.0147,
      "step": 1421700
    },
    {
      "epoch": 2.3266759621112443,
      "grad_norm": 1.1661218404769897,
      "learning_rate": 5.3818783234385195e-06,
      "loss": 0.0116,
      "step": 1421720
    },
    {
      "epoch": 2.3267086925498974,
      "grad_norm": 0.5067073106765747,
      "learning_rate": 5.381812431225003e-06,
      "loss": 0.0087,
      "step": 1421740
    },
    {
      "epoch": 2.326741422988551,
      "grad_norm": 0.4970179796218872,
      "learning_rate": 5.381746539011485e-06,
      "loss": 0.0141,
      "step": 1421760
    },
    {
      "epoch": 2.326774153427204,
      "grad_norm": 0.10952542722225189,
      "learning_rate": 5.3816806467979685e-06,
      "loss": 0.0146,
      "step": 1421780
    },
    {
      "epoch": 2.3268068838658573,
      "grad_norm": 0.22008056938648224,
      "learning_rate": 5.381614754584451e-06,
      "loss": 0.012,
      "step": 1421800
    },
    {
      "epoch": 2.326839614304511,
      "grad_norm": 0.5232717394828796,
      "learning_rate": 5.381548862370935e-06,
      "loss": 0.0145,
      "step": 1421820
    },
    {
      "epoch": 2.326872344743164,
      "grad_norm": 0.31698229908943176,
      "learning_rate": 5.381482970157417e-06,
      "loss": 0.0132,
      "step": 1421840
    },
    {
      "epoch": 2.3269050751818177,
      "grad_norm": 0.159086674451828,
      "learning_rate": 5.3814170779439e-06,
      "loss": 0.0136,
      "step": 1421860
    },
    {
      "epoch": 2.326937805620471,
      "grad_norm": 0.3028833568096161,
      "learning_rate": 5.381351185730382e-06,
      "loss": 0.015,
      "step": 1421880
    },
    {
      "epoch": 2.3269705360591244,
      "grad_norm": 0.40256598591804504,
      "learning_rate": 5.381285293516866e-06,
      "loss": 0.0138,
      "step": 1421900
    },
    {
      "epoch": 2.3270032664977776,
      "grad_norm": 5.296393394470215,
      "learning_rate": 5.381219401303348e-06,
      "loss": 0.0124,
      "step": 1421920
    },
    {
      "epoch": 2.3270359969364307,
      "grad_norm": 0.29660874605178833,
      "learning_rate": 5.381153509089831e-06,
      "loss": 0.0144,
      "step": 1421940
    },
    {
      "epoch": 2.3270687273750843,
      "grad_norm": 0.062032755464315414,
      "learning_rate": 5.381087616876314e-06,
      "loss": 0.0108,
      "step": 1421960
    },
    {
      "epoch": 2.3271014578137375,
      "grad_norm": 0.1657576709985733,
      "learning_rate": 5.381021724662797e-06,
      "loss": 0.0134,
      "step": 1421980
    },
    {
      "epoch": 2.327134188252391,
      "grad_norm": 0.22479818761348724,
      "learning_rate": 5.3809558324492795e-06,
      "loss": 0.0138,
      "step": 1422000
    },
    {
      "epoch": 2.327166918691044,
      "grad_norm": 0.4947342872619629,
      "learning_rate": 5.380889940235763e-06,
      "loss": 0.0171,
      "step": 1422020
    },
    {
      "epoch": 2.327199649129698,
      "grad_norm": 2.0395689010620117,
      "learning_rate": 5.380824048022245e-06,
      "loss": 0.0139,
      "step": 1422040
    },
    {
      "epoch": 2.327232379568351,
      "grad_norm": 0.0826406329870224,
      "learning_rate": 5.3807581558087286e-06,
      "loss": 0.0173,
      "step": 1422060
    },
    {
      "epoch": 2.327265110007004,
      "grad_norm": 0.1934179961681366,
      "learning_rate": 5.380692263595212e-06,
      "loss": 0.01,
      "step": 1422080
    },
    {
      "epoch": 2.3272978404456577,
      "grad_norm": 0.45789259672164917,
      "learning_rate": 5.380626371381694e-06,
      "loss": 0.018,
      "step": 1422100
    },
    {
      "epoch": 2.327330570884311,
      "grad_norm": 0.348910391330719,
      "learning_rate": 5.380560479168178e-06,
      "loss": 0.014,
      "step": 1422120
    },
    {
      "epoch": 2.3273633013229644,
      "grad_norm": 0.09888071566820145,
      "learning_rate": 5.3804945869546595e-06,
      "loss": 0.0162,
      "step": 1422140
    },
    {
      "epoch": 2.3273960317616176,
      "grad_norm": 0.30600792169570923,
      "learning_rate": 5.380428694741143e-06,
      "loss": 0.0133,
      "step": 1422160
    },
    {
      "epoch": 2.327428762200271,
      "grad_norm": 0.6142817735671997,
      "learning_rate": 5.380362802527626e-06,
      "loss": 0.0165,
      "step": 1422180
    },
    {
      "epoch": 2.3274614926389243,
      "grad_norm": 0.5377442836761475,
      "learning_rate": 5.380296910314109e-06,
      "loss": 0.0206,
      "step": 1422200
    },
    {
      "epoch": 2.3274942230775775,
      "grad_norm": 0.14688508212566376,
      "learning_rate": 5.380231018100591e-06,
      "loss": 0.0117,
      "step": 1422220
    },
    {
      "epoch": 2.327526953516231,
      "grad_norm": 1.8003679513931274,
      "learning_rate": 5.380165125887075e-06,
      "loss": 0.024,
      "step": 1422240
    },
    {
      "epoch": 2.327559683954884,
      "grad_norm": 0.3967902660369873,
      "learning_rate": 5.380099233673557e-06,
      "loss": 0.0197,
      "step": 1422260
    },
    {
      "epoch": 2.327592414393538,
      "grad_norm": 1.0406535863876343,
      "learning_rate": 5.38003334146004e-06,
      "loss": 0.0194,
      "step": 1422280
    },
    {
      "epoch": 2.327625144832191,
      "grad_norm": 0.25721293687820435,
      "learning_rate": 5.379967449246522e-06,
      "loss": 0.0136,
      "step": 1422300
    },
    {
      "epoch": 2.3276578752708446,
      "grad_norm": 0.08091581612825394,
      "learning_rate": 5.379901557033006e-06,
      "loss": 0.0137,
      "step": 1422320
    },
    {
      "epoch": 2.3276906057094977,
      "grad_norm": 0.5685951709747314,
      "learning_rate": 5.379835664819489e-06,
      "loss": 0.0215,
      "step": 1422340
    },
    {
      "epoch": 2.327723336148151,
      "grad_norm": 0.10134948045015335,
      "learning_rate": 5.379769772605971e-06,
      "loss": 0.018,
      "step": 1422360
    },
    {
      "epoch": 2.3277560665868045,
      "grad_norm": 0.12430530041456223,
      "learning_rate": 5.379703880392454e-06,
      "loss": 0.0175,
      "step": 1422380
    },
    {
      "epoch": 2.3277887970254576,
      "grad_norm": 0.41029056906700134,
      "learning_rate": 5.379637988178938e-06,
      "loss": 0.0124,
      "step": 1422400
    },
    {
      "epoch": 2.327821527464111,
      "grad_norm": 0.10097245126962662,
      "learning_rate": 5.3795720959654204e-06,
      "loss": 0.0099,
      "step": 1422420
    },
    {
      "epoch": 2.3278542579027643,
      "grad_norm": 0.2991410791873932,
      "learning_rate": 5.379506203751903e-06,
      "loss": 0.0136,
      "step": 1422440
    },
    {
      "epoch": 2.327886988341418,
      "grad_norm": 0.1841672956943512,
      "learning_rate": 5.379440311538387e-06,
      "loss": 0.0127,
      "step": 1422460
    },
    {
      "epoch": 2.327919718780071,
      "grad_norm": 0.10561039298772812,
      "learning_rate": 5.379374419324869e-06,
      "loss": 0.01,
      "step": 1422480
    },
    {
      "epoch": 2.3279524492187242,
      "grad_norm": 1.547568917274475,
      "learning_rate": 5.379308527111352e-06,
      "loss": 0.0171,
      "step": 1422500
    },
    {
      "epoch": 2.327985179657378,
      "grad_norm": 0.36275050044059753,
      "learning_rate": 5.379242634897834e-06,
      "loss": 0.0096,
      "step": 1422520
    },
    {
      "epoch": 2.328017910096031,
      "grad_norm": 0.09909430146217346,
      "learning_rate": 5.379176742684318e-06,
      "loss": 0.0225,
      "step": 1422540
    },
    {
      "epoch": 2.3280506405346846,
      "grad_norm": 0.5529853701591492,
      "learning_rate": 5.3791108504708005e-06,
      "loss": 0.014,
      "step": 1422560
    },
    {
      "epoch": 2.3280833709733377,
      "grad_norm": 0.44832268357276917,
      "learning_rate": 5.379044958257283e-06,
      "loss": 0.0176,
      "step": 1422580
    },
    {
      "epoch": 2.3281161014119913,
      "grad_norm": 0.15419575572013855,
      "learning_rate": 5.378979066043766e-06,
      "loss": 0.0133,
      "step": 1422600
    },
    {
      "epoch": 2.3281488318506445,
      "grad_norm": 0.17790552973747253,
      "learning_rate": 5.3789131738302495e-06,
      "loss": 0.0178,
      "step": 1422620
    },
    {
      "epoch": 2.3281815622892976,
      "grad_norm": 0.7552570700645447,
      "learning_rate": 5.378847281616731e-06,
      "loss": 0.0181,
      "step": 1422640
    },
    {
      "epoch": 2.328214292727951,
      "grad_norm": 0.7546027898788452,
      "learning_rate": 5.378781389403215e-06,
      "loss": 0.0135,
      "step": 1422660
    },
    {
      "epoch": 2.3282470231666044,
      "grad_norm": 0.9445375800132751,
      "learning_rate": 5.378715497189697e-06,
      "loss": 0.017,
      "step": 1422680
    },
    {
      "epoch": 2.328279753605258,
      "grad_norm": 0.9010039567947388,
      "learning_rate": 5.3786496049761805e-06,
      "loss": 0.0165,
      "step": 1422700
    },
    {
      "epoch": 2.328312484043911,
      "grad_norm": 0.2111891657114029,
      "learning_rate": 5.378583712762662e-06,
      "loss": 0.0151,
      "step": 1422720
    },
    {
      "epoch": 2.3283452144825647,
      "grad_norm": 0.13440045714378357,
      "learning_rate": 5.378517820549146e-06,
      "loss": 0.0109,
      "step": 1422740
    },
    {
      "epoch": 2.328377944921218,
      "grad_norm": 0.24817577004432678,
      "learning_rate": 5.3784519283356295e-06,
      "loss": 0.0143,
      "step": 1422760
    },
    {
      "epoch": 2.328410675359871,
      "grad_norm": 1.0270075798034668,
      "learning_rate": 5.3783860361221114e-06,
      "loss": 0.0111,
      "step": 1422780
    },
    {
      "epoch": 2.3284434057985246,
      "grad_norm": 0.3819844126701355,
      "learning_rate": 5.378320143908595e-06,
      "loss": 0.0167,
      "step": 1422800
    },
    {
      "epoch": 2.3284761362371778,
      "grad_norm": 0.3027867376804352,
      "learning_rate": 5.378254251695078e-06,
      "loss": 0.0124,
      "step": 1422820
    },
    {
      "epoch": 2.3285088666758313,
      "grad_norm": 0.18616685271263123,
      "learning_rate": 5.3781883594815605e-06,
      "loss": 0.0109,
      "step": 1422840
    },
    {
      "epoch": 2.3285415971144845,
      "grad_norm": 0.434498131275177,
      "learning_rate": 5.378122467268043e-06,
      "loss": 0.0117,
      "step": 1422860
    },
    {
      "epoch": 2.328574327553138,
      "grad_norm": 0.6337189674377441,
      "learning_rate": 5.378056575054527e-06,
      "loss": 0.0161,
      "step": 1422880
    },
    {
      "epoch": 2.3286070579917912,
      "grad_norm": 0.8439161777496338,
      "learning_rate": 5.377990682841009e-06,
      "loss": 0.0116,
      "step": 1422900
    },
    {
      "epoch": 2.3286397884304444,
      "grad_norm": 0.29915285110473633,
      "learning_rate": 5.377924790627492e-06,
      "loss": 0.0122,
      "step": 1422920
    },
    {
      "epoch": 2.328672518869098,
      "grad_norm": 0.506138265132904,
      "learning_rate": 5.377858898413974e-06,
      "loss": 0.0119,
      "step": 1422940
    },
    {
      "epoch": 2.328705249307751,
      "grad_norm": 0.26286959648132324,
      "learning_rate": 5.377793006200458e-06,
      "loss": 0.0107,
      "step": 1422960
    },
    {
      "epoch": 2.3287379797464047,
      "grad_norm": 0.6104253530502319,
      "learning_rate": 5.3777271139869405e-06,
      "loss": 0.0141,
      "step": 1422980
    },
    {
      "epoch": 2.328770710185058,
      "grad_norm": 0.5550436973571777,
      "learning_rate": 5.377661221773423e-06,
      "loss": 0.0156,
      "step": 1423000
    },
    {
      "epoch": 2.3288034406237115,
      "grad_norm": 0.1875537633895874,
      "learning_rate": 5.377595329559906e-06,
      "loss": 0.016,
      "step": 1423020
    },
    {
      "epoch": 2.3288361710623646,
      "grad_norm": 0.6516813039779663,
      "learning_rate": 5.37752943734639e-06,
      "loss": 0.0199,
      "step": 1423040
    },
    {
      "epoch": 2.3288689015010178,
      "grad_norm": 0.4421604573726654,
      "learning_rate": 5.3774635451328715e-06,
      "loss": 0.0106,
      "step": 1423060
    },
    {
      "epoch": 2.3289016319396714,
      "grad_norm": 0.4184747338294983,
      "learning_rate": 5.377397652919355e-06,
      "loss": 0.0147,
      "step": 1423080
    },
    {
      "epoch": 2.3289343623783245,
      "grad_norm": 0.14174100756645203,
      "learning_rate": 5.377331760705837e-06,
      "loss": 0.0182,
      "step": 1423100
    },
    {
      "epoch": 2.328967092816978,
      "grad_norm": 0.5951569676399231,
      "learning_rate": 5.3772658684923206e-06,
      "loss": 0.0224,
      "step": 1423120
    },
    {
      "epoch": 2.3289998232556313,
      "grad_norm": 0.16528043150901794,
      "learning_rate": 5.377199976278804e-06,
      "loss": 0.0121,
      "step": 1423140
    },
    {
      "epoch": 2.329032553694285,
      "grad_norm": 0.8498804569244385,
      "learning_rate": 5.377134084065286e-06,
      "loss": 0.012,
      "step": 1423160
    },
    {
      "epoch": 2.329065284132938,
      "grad_norm": 0.22991329431533813,
      "learning_rate": 5.37706819185177e-06,
      "loss": 0.0164,
      "step": 1423180
    },
    {
      "epoch": 2.329098014571591,
      "grad_norm": 0.09857599437236786,
      "learning_rate": 5.377002299638252e-06,
      "loss": 0.0105,
      "step": 1423200
    },
    {
      "epoch": 2.3291307450102448,
      "grad_norm": 0.19528619945049286,
      "learning_rate": 5.376936407424735e-06,
      "loss": 0.0115,
      "step": 1423220
    },
    {
      "epoch": 2.329163475448898,
      "grad_norm": 0.4408898651599884,
      "learning_rate": 5.376870515211218e-06,
      "loss": 0.0156,
      "step": 1423240
    },
    {
      "epoch": 2.3291962058875515,
      "grad_norm": 0.5078026056289673,
      "learning_rate": 5.3768046229977014e-06,
      "loss": 0.019,
      "step": 1423260
    },
    {
      "epoch": 2.3292289363262046,
      "grad_norm": 0.13245807588100433,
      "learning_rate": 5.376738730784183e-06,
      "loss": 0.014,
      "step": 1423280
    },
    {
      "epoch": 2.329261666764858,
      "grad_norm": 0.24870704114437103,
      "learning_rate": 5.376672838570667e-06,
      "loss": 0.0132,
      "step": 1423300
    },
    {
      "epoch": 2.3292943972035114,
      "grad_norm": 0.47773277759552,
      "learning_rate": 5.376606946357149e-06,
      "loss": 0.013,
      "step": 1423320
    },
    {
      "epoch": 2.3293271276421645,
      "grad_norm": 0.36917898058891296,
      "learning_rate": 5.376541054143632e-06,
      "loss": 0.0149,
      "step": 1423340
    },
    {
      "epoch": 2.329359858080818,
      "grad_norm": 0.27669450640678406,
      "learning_rate": 5.376475161930115e-06,
      "loss": 0.0188,
      "step": 1423360
    },
    {
      "epoch": 2.3293925885194713,
      "grad_norm": 0.2189585119485855,
      "learning_rate": 5.376409269716598e-06,
      "loss": 0.009,
      "step": 1423380
    },
    {
      "epoch": 2.3294253189581244,
      "grad_norm": 1.3183563947677612,
      "learning_rate": 5.376343377503081e-06,
      "loss": 0.0167,
      "step": 1423400
    },
    {
      "epoch": 2.329458049396778,
      "grad_norm": 0.3891803026199341,
      "learning_rate": 5.376277485289564e-06,
      "loss": 0.0088,
      "step": 1423420
    },
    {
      "epoch": 2.329490779835431,
      "grad_norm": 0.25744137167930603,
      "learning_rate": 5.376211593076046e-06,
      "loss": 0.0177,
      "step": 1423440
    },
    {
      "epoch": 2.3295235102740848,
      "grad_norm": 0.5868393182754517,
      "learning_rate": 5.37614570086253e-06,
      "loss": 0.0225,
      "step": 1423460
    },
    {
      "epoch": 2.329556240712738,
      "grad_norm": 0.40407779812812805,
      "learning_rate": 5.376079808649013e-06,
      "loss": 0.0173,
      "step": 1423480
    },
    {
      "epoch": 2.3295889711513915,
      "grad_norm": 0.34342658519744873,
      "learning_rate": 5.376013916435495e-06,
      "loss": 0.0158,
      "step": 1423500
    },
    {
      "epoch": 2.3296217015900447,
      "grad_norm": 0.6838365197181702,
      "learning_rate": 5.375948024221979e-06,
      "loss": 0.0161,
      "step": 1423520
    },
    {
      "epoch": 2.329654432028698,
      "grad_norm": 0.36733123660087585,
      "learning_rate": 5.375882132008461e-06,
      "loss": 0.0131,
      "step": 1423540
    },
    {
      "epoch": 2.3296871624673514,
      "grad_norm": 0.4994255602359772,
      "learning_rate": 5.375816239794944e-06,
      "loss": 0.0136,
      "step": 1423560
    },
    {
      "epoch": 2.3297198929060046,
      "grad_norm": 0.403157114982605,
      "learning_rate": 5.375750347581427e-06,
      "loss": 0.0141,
      "step": 1423580
    },
    {
      "epoch": 2.329752623344658,
      "grad_norm": 0.1303202211856842,
      "learning_rate": 5.37568445536791e-06,
      "loss": 0.0129,
      "step": 1423600
    },
    {
      "epoch": 2.3297853537833113,
      "grad_norm": 0.3329017162322998,
      "learning_rate": 5.3756185631543924e-06,
      "loss": 0.0178,
      "step": 1423620
    },
    {
      "epoch": 2.329818084221965,
      "grad_norm": 0.18386034667491913,
      "learning_rate": 5.375552670940876e-06,
      "loss": 0.0227,
      "step": 1423640
    },
    {
      "epoch": 2.329850814660618,
      "grad_norm": 0.4650822579860687,
      "learning_rate": 5.375486778727358e-06,
      "loss": 0.0152,
      "step": 1423660
    },
    {
      "epoch": 2.329883545099271,
      "grad_norm": 0.08686616271734238,
      "learning_rate": 5.3754208865138415e-06,
      "loss": 0.0132,
      "step": 1423680
    },
    {
      "epoch": 2.329916275537925,
      "grad_norm": 0.6658890247344971,
      "learning_rate": 5.375354994300323e-06,
      "loss": 0.0168,
      "step": 1423700
    },
    {
      "epoch": 2.329949005976578,
      "grad_norm": 0.9146326184272766,
      "learning_rate": 5.375289102086807e-06,
      "loss": 0.0156,
      "step": 1423720
    },
    {
      "epoch": 2.3299817364152315,
      "grad_norm": 0.27258020639419556,
      "learning_rate": 5.375223209873289e-06,
      "loss": 0.014,
      "step": 1423740
    },
    {
      "epoch": 2.3300144668538847,
      "grad_norm": 0.38545796275138855,
      "learning_rate": 5.3751573176597725e-06,
      "loss": 0.0145,
      "step": 1423760
    },
    {
      "epoch": 2.3300471972925383,
      "grad_norm": 0.7886456251144409,
      "learning_rate": 5.375091425446255e-06,
      "loss": 0.0173,
      "step": 1423780
    },
    {
      "epoch": 2.3300799277311914,
      "grad_norm": 0.8039305210113525,
      "learning_rate": 5.375025533232738e-06,
      "loss": 0.0193,
      "step": 1423800
    },
    {
      "epoch": 2.3301126581698446,
      "grad_norm": 0.026857726275920868,
      "learning_rate": 5.3749596410192215e-06,
      "loss": 0.012,
      "step": 1423820
    },
    {
      "epoch": 2.330145388608498,
      "grad_norm": 0.32320210337638855,
      "learning_rate": 5.374893748805704e-06,
      "loss": 0.0233,
      "step": 1423840
    },
    {
      "epoch": 2.3301781190471513,
      "grad_norm": 0.15355530381202698,
      "learning_rate": 5.374827856592187e-06,
      "loss": 0.0118,
      "step": 1423860
    },
    {
      "epoch": 2.330210849485805,
      "grad_norm": 0.2030135691165924,
      "learning_rate": 5.37476196437867e-06,
      "loss": 0.0222,
      "step": 1423880
    },
    {
      "epoch": 2.330243579924458,
      "grad_norm": 0.09502341598272324,
      "learning_rate": 5.374696072165153e-06,
      "loss": 0.0146,
      "step": 1423900
    },
    {
      "epoch": 2.3302763103631117,
      "grad_norm": 0.6237894892692566,
      "learning_rate": 5.374630179951635e-06,
      "loss": 0.0144,
      "step": 1423920
    },
    {
      "epoch": 2.330309040801765,
      "grad_norm": 0.7625932693481445,
      "learning_rate": 5.374564287738119e-06,
      "loss": 0.019,
      "step": 1423940
    },
    {
      "epoch": 2.330341771240418,
      "grad_norm": 0.5380357503890991,
      "learning_rate": 5.374498395524601e-06,
      "loss": 0.0108,
      "step": 1423960
    },
    {
      "epoch": 2.3303745016790716,
      "grad_norm": 0.28518760204315186,
      "learning_rate": 5.374432503311084e-06,
      "loss": 0.0161,
      "step": 1423980
    },
    {
      "epoch": 2.3304072321177247,
      "grad_norm": 0.27121445536613464,
      "learning_rate": 5.374366611097567e-06,
      "loss": 0.0183,
      "step": 1424000
    },
    {
      "epoch": 2.3304399625563783,
      "grad_norm": 0.15237213671207428,
      "learning_rate": 5.37430071888405e-06,
      "loss": 0.0182,
      "step": 1424020
    },
    {
      "epoch": 2.3304726929950315,
      "grad_norm": 0.7363269329071045,
      "learning_rate": 5.3742348266705325e-06,
      "loss": 0.0178,
      "step": 1424040
    },
    {
      "epoch": 2.330505423433685,
      "grad_norm": 0.23012910783290863,
      "learning_rate": 5.374168934457016e-06,
      "loss": 0.0134,
      "step": 1424060
    },
    {
      "epoch": 2.330538153872338,
      "grad_norm": 0.27527111768722534,
      "learning_rate": 5.374103042243498e-06,
      "loss": 0.0128,
      "step": 1424080
    },
    {
      "epoch": 2.3305708843109914,
      "grad_norm": 1.110479712486267,
      "learning_rate": 5.374037150029982e-06,
      "loss": 0.0183,
      "step": 1424100
    },
    {
      "epoch": 2.330603614749645,
      "grad_norm": 0.07898253202438354,
      "learning_rate": 5.3739712578164635e-06,
      "loss": 0.0103,
      "step": 1424120
    },
    {
      "epoch": 2.330636345188298,
      "grad_norm": 0.9173241257667542,
      "learning_rate": 5.373905365602947e-06,
      "loss": 0.019,
      "step": 1424140
    },
    {
      "epoch": 2.3306690756269517,
      "grad_norm": 0.2778650224208832,
      "learning_rate": 5.37383947338943e-06,
      "loss": 0.0171,
      "step": 1424160
    },
    {
      "epoch": 2.330701806065605,
      "grad_norm": 0.14209115505218506,
      "learning_rate": 5.3737735811759125e-06,
      "loss": 0.0144,
      "step": 1424180
    },
    {
      "epoch": 2.3307345365042584,
      "grad_norm": 0.5340614914894104,
      "learning_rate": 5.373707688962396e-06,
      "loss": 0.0158,
      "step": 1424200
    },
    {
      "epoch": 2.3307672669429116,
      "grad_norm": 0.2601957619190216,
      "learning_rate": 5.373641796748879e-06,
      "loss": 0.0172,
      "step": 1424220
    },
    {
      "epoch": 2.3307999973815647,
      "grad_norm": 0.3722464144229889,
      "learning_rate": 5.373575904535362e-06,
      "loss": 0.0132,
      "step": 1424240
    },
    {
      "epoch": 2.3308327278202183,
      "grad_norm": 0.6463438272476196,
      "learning_rate": 5.373510012321844e-06,
      "loss": 0.0146,
      "step": 1424260
    },
    {
      "epoch": 2.3308654582588715,
      "grad_norm": 0.19775621592998505,
      "learning_rate": 5.373444120108328e-06,
      "loss": 0.0143,
      "step": 1424280
    },
    {
      "epoch": 2.330898188697525,
      "grad_norm": 0.1217307522892952,
      "learning_rate": 5.37337822789481e-06,
      "loss": 0.0109,
      "step": 1424300
    },
    {
      "epoch": 2.3309309191361782,
      "grad_norm": 0.14454036951065063,
      "learning_rate": 5.373312335681293e-06,
      "loss": 0.0203,
      "step": 1424320
    },
    {
      "epoch": 2.330963649574832,
      "grad_norm": 0.3468782305717468,
      "learning_rate": 5.373246443467775e-06,
      "loss": 0.0225,
      "step": 1424340
    },
    {
      "epoch": 2.330996380013485,
      "grad_norm": 0.9925056099891663,
      "learning_rate": 5.373180551254259e-06,
      "loss": 0.0181,
      "step": 1424360
    },
    {
      "epoch": 2.331029110452138,
      "grad_norm": 0.3313008248806,
      "learning_rate": 5.373114659040742e-06,
      "loss": 0.0084,
      "step": 1424380
    },
    {
      "epoch": 2.3310618408907917,
      "grad_norm": 0.1403415948152542,
      "learning_rate": 5.373048766827224e-06,
      "loss": 0.0185,
      "step": 1424400
    },
    {
      "epoch": 2.331094571329445,
      "grad_norm": 0.5427344441413879,
      "learning_rate": 5.372982874613707e-06,
      "loss": 0.0116,
      "step": 1424420
    },
    {
      "epoch": 2.3311273017680985,
      "grad_norm": 0.2284793108701706,
      "learning_rate": 5.372916982400191e-06,
      "loss": 0.0172,
      "step": 1424440
    },
    {
      "epoch": 2.3311600322067516,
      "grad_norm": 0.8817337155342102,
      "learning_rate": 5.372851090186673e-06,
      "loss": 0.0139,
      "step": 1424460
    },
    {
      "epoch": 2.331192762645405,
      "grad_norm": 1.1768286228179932,
      "learning_rate": 5.372785197973156e-06,
      "loss": 0.0162,
      "step": 1424480
    },
    {
      "epoch": 2.3312254930840584,
      "grad_norm": 0.6749641299247742,
      "learning_rate": 5.372719305759638e-06,
      "loss": 0.0205,
      "step": 1424500
    },
    {
      "epoch": 2.3312582235227115,
      "grad_norm": 0.22944296896457672,
      "learning_rate": 5.372653413546122e-06,
      "loss": 0.0169,
      "step": 1424520
    },
    {
      "epoch": 2.331290953961365,
      "grad_norm": 0.6071268320083618,
      "learning_rate": 5.372587521332605e-06,
      "loss": 0.0128,
      "step": 1424540
    },
    {
      "epoch": 2.3313236844000182,
      "grad_norm": 0.8705434799194336,
      "learning_rate": 5.372521629119087e-06,
      "loss": 0.0166,
      "step": 1424560
    },
    {
      "epoch": 2.331356414838672,
      "grad_norm": 0.6383041739463806,
      "learning_rate": 5.372455736905571e-06,
      "loss": 0.0207,
      "step": 1424580
    },
    {
      "epoch": 2.331389145277325,
      "grad_norm": 1.2333647012710571,
      "learning_rate": 5.3723898446920535e-06,
      "loss": 0.0149,
      "step": 1424600
    },
    {
      "epoch": 2.3314218757159786,
      "grad_norm": 0.9823357462882996,
      "learning_rate": 5.372323952478536e-06,
      "loss": 0.0148,
      "step": 1424620
    },
    {
      "epoch": 2.3314546061546317,
      "grad_norm": 0.13629524409770966,
      "learning_rate": 5.372258060265019e-06,
      "loss": 0.0113,
      "step": 1424640
    },
    {
      "epoch": 2.331487336593285,
      "grad_norm": 0.8624137043952942,
      "learning_rate": 5.3721921680515025e-06,
      "loss": 0.012,
      "step": 1424660
    },
    {
      "epoch": 2.3315200670319385,
      "grad_norm": 0.18643683195114136,
      "learning_rate": 5.372126275837984e-06,
      "loss": 0.0141,
      "step": 1424680
    },
    {
      "epoch": 2.3315527974705916,
      "grad_norm": 0.24250639975070953,
      "learning_rate": 5.372060383624468e-06,
      "loss": 0.0164,
      "step": 1424700
    },
    {
      "epoch": 2.3315855279092452,
      "grad_norm": 0.38377195596694946,
      "learning_rate": 5.37199449141095e-06,
      "loss": 0.0118,
      "step": 1424720
    },
    {
      "epoch": 2.3316182583478984,
      "grad_norm": 0.21778661012649536,
      "learning_rate": 5.3719285991974335e-06,
      "loss": 0.0169,
      "step": 1424740
    },
    {
      "epoch": 2.3316509887865515,
      "grad_norm": 0.22627754509449005,
      "learning_rate": 5.371862706983915e-06,
      "loss": 0.0103,
      "step": 1424760
    },
    {
      "epoch": 2.331683719225205,
      "grad_norm": 0.7007878422737122,
      "learning_rate": 5.371796814770399e-06,
      "loss": 0.0161,
      "step": 1424780
    },
    {
      "epoch": 2.3317164496638583,
      "grad_norm": 0.5537254810333252,
      "learning_rate": 5.371730922556882e-06,
      "loss": 0.0193,
      "step": 1424800
    },
    {
      "epoch": 2.331749180102512,
      "grad_norm": 0.7622368931770325,
      "learning_rate": 5.3716650303433644e-06,
      "loss": 0.01,
      "step": 1424820
    },
    {
      "epoch": 2.331781910541165,
      "grad_norm": 0.36705663800239563,
      "learning_rate": 5.371599138129847e-06,
      "loss": 0.0152,
      "step": 1424840
    },
    {
      "epoch": 2.331814640979818,
      "grad_norm": 0.20221784710884094,
      "learning_rate": 5.371533245916331e-06,
      "loss": 0.0126,
      "step": 1424860
    },
    {
      "epoch": 2.3318473714184718,
      "grad_norm": 0.9200579524040222,
      "learning_rate": 5.3714673537028135e-06,
      "loss": 0.018,
      "step": 1424880
    },
    {
      "epoch": 2.331880101857125,
      "grad_norm": 0.30467066168785095,
      "learning_rate": 5.371401461489296e-06,
      "loss": 0.0137,
      "step": 1424900
    },
    {
      "epoch": 2.3319128322957785,
      "grad_norm": 0.37884363532066345,
      "learning_rate": 5.37133556927578e-06,
      "loss": 0.0109,
      "step": 1424920
    },
    {
      "epoch": 2.3319455627344317,
      "grad_norm": 0.5724544525146484,
      "learning_rate": 5.371269677062262e-06,
      "loss": 0.0212,
      "step": 1424940
    },
    {
      "epoch": 2.3319782931730852,
      "grad_norm": 0.2607010006904602,
      "learning_rate": 5.371203784848745e-06,
      "loss": 0.0155,
      "step": 1424960
    },
    {
      "epoch": 2.3320110236117384,
      "grad_norm": 0.6635440587997437,
      "learning_rate": 5.371137892635227e-06,
      "loss": 0.013,
      "step": 1424980
    },
    {
      "epoch": 2.3320437540503915,
      "grad_norm": 0.119298055768013,
      "learning_rate": 5.371072000421711e-06,
      "loss": 0.014,
      "step": 1425000
    },
    {
      "epoch": 2.332076484489045,
      "grad_norm": 0.22976301610469818,
      "learning_rate": 5.3710061082081935e-06,
      "loss": 0.0266,
      "step": 1425020
    },
    {
      "epoch": 2.3321092149276983,
      "grad_norm": 0.18513479828834534,
      "learning_rate": 5.370940215994676e-06,
      "loss": 0.0139,
      "step": 1425040
    },
    {
      "epoch": 2.332141945366352,
      "grad_norm": 0.2766142189502716,
      "learning_rate": 5.370874323781159e-06,
      "loss": 0.0107,
      "step": 1425060
    },
    {
      "epoch": 2.332174675805005,
      "grad_norm": 0.07225851714611053,
      "learning_rate": 5.370808431567643e-06,
      "loss": 0.0167,
      "step": 1425080
    },
    {
      "epoch": 2.3322074062436586,
      "grad_norm": 0.4038380980491638,
      "learning_rate": 5.3707425393541245e-06,
      "loss": 0.0209,
      "step": 1425100
    },
    {
      "epoch": 2.332240136682312,
      "grad_norm": 0.18730798363685608,
      "learning_rate": 5.370676647140608e-06,
      "loss": 0.0116,
      "step": 1425120
    },
    {
      "epoch": 2.332272867120965,
      "grad_norm": 0.19581282138824463,
      "learning_rate": 5.37061075492709e-06,
      "loss": 0.0155,
      "step": 1425140
    },
    {
      "epoch": 2.3323055975596185,
      "grad_norm": 0.37605005502700806,
      "learning_rate": 5.3705448627135736e-06,
      "loss": 0.0123,
      "step": 1425160
    },
    {
      "epoch": 2.3323383279982717,
      "grad_norm": 0.5104631781578064,
      "learning_rate": 5.370478970500056e-06,
      "loss": 0.0093,
      "step": 1425180
    },
    {
      "epoch": 2.3323710584369253,
      "grad_norm": 0.3080924451351166,
      "learning_rate": 5.370413078286539e-06,
      "loss": 0.0157,
      "step": 1425200
    },
    {
      "epoch": 2.3324037888755784,
      "grad_norm": 0.1832277476787567,
      "learning_rate": 5.370347186073023e-06,
      "loss": 0.0126,
      "step": 1425220
    },
    {
      "epoch": 2.332436519314232,
      "grad_norm": 0.6661613583564758,
      "learning_rate": 5.370281293859505e-06,
      "loss": 0.02,
      "step": 1425240
    },
    {
      "epoch": 2.332469249752885,
      "grad_norm": 0.34292277693748474,
      "learning_rate": 5.370215401645988e-06,
      "loss": 0.0153,
      "step": 1425260
    },
    {
      "epoch": 2.3325019801915383,
      "grad_norm": 0.28109022974967957,
      "learning_rate": 5.370149509432471e-06,
      "loss": 0.0156,
      "step": 1425280
    },
    {
      "epoch": 2.332534710630192,
      "grad_norm": 0.5678876638412476,
      "learning_rate": 5.3700836172189544e-06,
      "loss": 0.0137,
      "step": 1425300
    },
    {
      "epoch": 2.332567441068845,
      "grad_norm": 0.37308037281036377,
      "learning_rate": 5.370017725005436e-06,
      "loss": 0.0114,
      "step": 1425320
    },
    {
      "epoch": 2.3326001715074987,
      "grad_norm": 0.7788416743278503,
      "learning_rate": 5.36995183279192e-06,
      "loss": 0.0154,
      "step": 1425340
    },
    {
      "epoch": 2.332632901946152,
      "grad_norm": 0.7305699586868286,
      "learning_rate": 5.369885940578402e-06,
      "loss": 0.0151,
      "step": 1425360
    },
    {
      "epoch": 2.3326656323848054,
      "grad_norm": 1.0526044368743896,
      "learning_rate": 5.369820048364885e-06,
      "loss": 0.0176,
      "step": 1425380
    },
    {
      "epoch": 2.3326983628234585,
      "grad_norm": 0.49456265568733215,
      "learning_rate": 5.369754156151368e-06,
      "loss": 0.0122,
      "step": 1425400
    },
    {
      "epoch": 2.3327310932621117,
      "grad_norm": 0.348410427570343,
      "learning_rate": 5.369688263937851e-06,
      "loss": 0.0205,
      "step": 1425420
    },
    {
      "epoch": 2.3327638237007653,
      "grad_norm": 0.08018938452005386,
      "learning_rate": 5.369622371724334e-06,
      "loss": 0.0091,
      "step": 1425440
    },
    {
      "epoch": 2.3327965541394184,
      "grad_norm": 0.32804328203201294,
      "learning_rate": 5.369556479510817e-06,
      "loss": 0.0109,
      "step": 1425460
    },
    {
      "epoch": 2.332829284578072,
      "grad_norm": 0.1983155906200409,
      "learning_rate": 5.369490587297299e-06,
      "loss": 0.012,
      "step": 1425480
    },
    {
      "epoch": 2.332862015016725,
      "grad_norm": 0.21744510531425476,
      "learning_rate": 5.369424695083783e-06,
      "loss": 0.0088,
      "step": 1425500
    },
    {
      "epoch": 2.332894745455379,
      "grad_norm": 0.12426203489303589,
      "learning_rate": 5.3693588028702646e-06,
      "loss": 0.0149,
      "step": 1425520
    },
    {
      "epoch": 2.332927475894032,
      "grad_norm": 1.022189736366272,
      "learning_rate": 5.369292910656748e-06,
      "loss": 0.019,
      "step": 1425540
    },
    {
      "epoch": 2.332960206332685,
      "grad_norm": 0.5535526275634766,
      "learning_rate": 5.36922701844323e-06,
      "loss": 0.0126,
      "step": 1425560
    },
    {
      "epoch": 2.3329929367713387,
      "grad_norm": 0.1895885318517685,
      "learning_rate": 5.369161126229714e-06,
      "loss": 0.0135,
      "step": 1425580
    },
    {
      "epoch": 2.333025667209992,
      "grad_norm": 0.3020365536212921,
      "learning_rate": 5.369095234016197e-06,
      "loss": 0.0139,
      "step": 1425600
    },
    {
      "epoch": 2.3330583976486454,
      "grad_norm": 0.13344967365264893,
      "learning_rate": 5.369029341802679e-06,
      "loss": 0.018,
      "step": 1425620
    },
    {
      "epoch": 2.3330911280872986,
      "grad_norm": 0.2706153690814972,
      "learning_rate": 5.368963449589163e-06,
      "loss": 0.0193,
      "step": 1425640
    },
    {
      "epoch": 2.333123858525952,
      "grad_norm": 0.6429637670516968,
      "learning_rate": 5.3688975573756454e-06,
      "loss": 0.018,
      "step": 1425660
    },
    {
      "epoch": 2.3331565889646053,
      "grad_norm": 0.4975127875804901,
      "learning_rate": 5.368831665162129e-06,
      "loss": 0.0136,
      "step": 1425680
    },
    {
      "epoch": 2.3331893194032585,
      "grad_norm": 1.2283661365509033,
      "learning_rate": 5.368765772948611e-06,
      "loss": 0.0119,
      "step": 1425700
    },
    {
      "epoch": 2.333222049841912,
      "grad_norm": 0.3612358868122101,
      "learning_rate": 5.3686998807350945e-06,
      "loss": 0.0189,
      "step": 1425720
    },
    {
      "epoch": 2.333254780280565,
      "grad_norm": 0.5987470746040344,
      "learning_rate": 5.368633988521576e-06,
      "loss": 0.0183,
      "step": 1425740
    },
    {
      "epoch": 2.333287510719219,
      "grad_norm": 0.34835898876190186,
      "learning_rate": 5.36856809630806e-06,
      "loss": 0.0169,
      "step": 1425760
    },
    {
      "epoch": 2.333320241157872,
      "grad_norm": 0.9656349420547485,
      "learning_rate": 5.368502204094542e-06,
      "loss": 0.0176,
      "step": 1425780
    },
    {
      "epoch": 2.3333529715965255,
      "grad_norm": 0.8427132368087769,
      "learning_rate": 5.3684363118810255e-06,
      "loss": 0.0156,
      "step": 1425800
    },
    {
      "epoch": 2.3333857020351787,
      "grad_norm": 0.4805237054824829,
      "learning_rate": 5.368370419667508e-06,
      "loss": 0.0133,
      "step": 1425820
    },
    {
      "epoch": 2.333418432473832,
      "grad_norm": 0.5751602053642273,
      "learning_rate": 5.368304527453991e-06,
      "loss": 0.0175,
      "step": 1425840
    },
    {
      "epoch": 2.3334511629124854,
      "grad_norm": 0.4824977517127991,
      "learning_rate": 5.368238635240474e-06,
      "loss": 0.0148,
      "step": 1425860
    },
    {
      "epoch": 2.3334838933511386,
      "grad_norm": 1.0372511148452759,
      "learning_rate": 5.368172743026957e-06,
      "loss": 0.0141,
      "step": 1425880
    },
    {
      "epoch": 2.333516623789792,
      "grad_norm": 0.14467699825763702,
      "learning_rate": 5.368106850813439e-06,
      "loss": 0.0143,
      "step": 1425900
    },
    {
      "epoch": 2.3335493542284453,
      "grad_norm": 0.12645189464092255,
      "learning_rate": 5.368040958599923e-06,
      "loss": 0.0142,
      "step": 1425920
    },
    {
      "epoch": 2.333582084667099,
      "grad_norm": 0.44218534231185913,
      "learning_rate": 5.367975066386406e-06,
      "loss": 0.0132,
      "step": 1425940
    },
    {
      "epoch": 2.333614815105752,
      "grad_norm": 0.25894030928611755,
      "learning_rate": 5.367909174172888e-06,
      "loss": 0.0123,
      "step": 1425960
    },
    {
      "epoch": 2.3336475455444052,
      "grad_norm": 0.4069300889968872,
      "learning_rate": 5.367843281959372e-06,
      "loss": 0.0174,
      "step": 1425980
    },
    {
      "epoch": 2.333680275983059,
      "grad_norm": 0.5941178798675537,
      "learning_rate": 5.367777389745854e-06,
      "loss": 0.0094,
      "step": 1426000
    },
    {
      "epoch": 2.333713006421712,
      "grad_norm": 0.6453347206115723,
      "learning_rate": 5.367711497532337e-06,
      "loss": 0.0169,
      "step": 1426020
    },
    {
      "epoch": 2.3337457368603656,
      "grad_norm": 0.5181871652603149,
      "learning_rate": 5.36764560531882e-06,
      "loss": 0.0142,
      "step": 1426040
    },
    {
      "epoch": 2.3337784672990187,
      "grad_norm": 0.2368035465478897,
      "learning_rate": 5.367579713105303e-06,
      "loss": 0.0093,
      "step": 1426060
    },
    {
      "epoch": 2.3338111977376723,
      "grad_norm": 0.4061115086078644,
      "learning_rate": 5.3675138208917855e-06,
      "loss": 0.0152,
      "step": 1426080
    },
    {
      "epoch": 2.3338439281763255,
      "grad_norm": 0.5863921642303467,
      "learning_rate": 5.367447928678269e-06,
      "loss": 0.0212,
      "step": 1426100
    },
    {
      "epoch": 2.3338766586149786,
      "grad_norm": 0.6487841010093689,
      "learning_rate": 5.367382036464751e-06,
      "loss": 0.0222,
      "step": 1426120
    },
    {
      "epoch": 2.333909389053632,
      "grad_norm": 1.229783535003662,
      "learning_rate": 5.367316144251235e-06,
      "loss": 0.0159,
      "step": 1426140
    },
    {
      "epoch": 2.3339421194922854,
      "grad_norm": 0.08784298598766327,
      "learning_rate": 5.3672502520377165e-06,
      "loss": 0.0146,
      "step": 1426160
    },
    {
      "epoch": 2.333974849930939,
      "grad_norm": 0.13482797145843506,
      "learning_rate": 5.3671843598242e-06,
      "loss": 0.0106,
      "step": 1426180
    },
    {
      "epoch": 2.334007580369592,
      "grad_norm": 0.21382342278957367,
      "learning_rate": 5.367118467610683e-06,
      "loss": 0.0162,
      "step": 1426200
    },
    {
      "epoch": 2.3340403108082457,
      "grad_norm": 0.10870199650526047,
      "learning_rate": 5.3670525753971655e-06,
      "loss": 0.0195,
      "step": 1426220
    },
    {
      "epoch": 2.334073041246899,
      "grad_norm": 0.30896231532096863,
      "learning_rate": 5.366986683183648e-06,
      "loss": 0.0143,
      "step": 1426240
    },
    {
      "epoch": 2.334105771685552,
      "grad_norm": 0.2927752137184143,
      "learning_rate": 5.366920790970132e-06,
      "loss": 0.0173,
      "step": 1426260
    },
    {
      "epoch": 2.3341385021242056,
      "grad_norm": 0.09081340581178665,
      "learning_rate": 5.366854898756615e-06,
      "loss": 0.0133,
      "step": 1426280
    },
    {
      "epoch": 2.3341712325628587,
      "grad_norm": 0.12891745567321777,
      "learning_rate": 5.366789006543097e-06,
      "loss": 0.0105,
      "step": 1426300
    },
    {
      "epoch": 2.3342039630015123,
      "grad_norm": 0.32038748264312744,
      "learning_rate": 5.366723114329581e-06,
      "loss": 0.0073,
      "step": 1426320
    },
    {
      "epoch": 2.3342366934401655,
      "grad_norm": 0.7831473350524902,
      "learning_rate": 5.366657222116063e-06,
      "loss": 0.0158,
      "step": 1426340
    },
    {
      "epoch": 2.3342694238788186,
      "grad_norm": 0.49979323148727417,
      "learning_rate": 5.366591329902546e-06,
      "loss": 0.0179,
      "step": 1426360
    },
    {
      "epoch": 2.3343021543174722,
      "grad_norm": 0.6349174380302429,
      "learning_rate": 5.366525437689028e-06,
      "loss": 0.0157,
      "step": 1426380
    },
    {
      "epoch": 2.3343348847561254,
      "grad_norm": 0.11484426259994507,
      "learning_rate": 5.366459545475512e-06,
      "loss": 0.0161,
      "step": 1426400
    },
    {
      "epoch": 2.334367615194779,
      "grad_norm": 0.669942319393158,
      "learning_rate": 5.366393653261995e-06,
      "loss": 0.0161,
      "step": 1426420
    },
    {
      "epoch": 2.334400345633432,
      "grad_norm": 0.6347259879112244,
      "learning_rate": 5.366327761048477e-06,
      "loss": 0.0121,
      "step": 1426440
    },
    {
      "epoch": 2.3344330760720853,
      "grad_norm": 0.10524263978004456,
      "learning_rate": 5.36626186883496e-06,
      "loss": 0.0104,
      "step": 1426460
    },
    {
      "epoch": 2.334465806510739,
      "grad_norm": 0.07703734934329987,
      "learning_rate": 5.366195976621444e-06,
      "loss": 0.0105,
      "step": 1426480
    },
    {
      "epoch": 2.334498536949392,
      "grad_norm": 0.17407266795635223,
      "learning_rate": 5.366130084407926e-06,
      "loss": 0.0109,
      "step": 1426500
    },
    {
      "epoch": 2.3345312673880456,
      "grad_norm": 0.14847421646118164,
      "learning_rate": 5.366064192194409e-06,
      "loss": 0.0181,
      "step": 1426520
    },
    {
      "epoch": 2.3345639978266988,
      "grad_norm": 0.14943234622478485,
      "learning_rate": 5.365998299980891e-06,
      "loss": 0.0145,
      "step": 1426540
    },
    {
      "epoch": 2.3345967282653524,
      "grad_norm": 0.8798306584358215,
      "learning_rate": 5.365932407767375e-06,
      "loss": 0.016,
      "step": 1426560
    },
    {
      "epoch": 2.3346294587040055,
      "grad_norm": 0.7470974922180176,
      "learning_rate": 5.3658665155538566e-06,
      "loss": 0.0124,
      "step": 1426580
    },
    {
      "epoch": 2.3346621891426587,
      "grad_norm": 0.6764642000198364,
      "learning_rate": 5.36580062334034e-06,
      "loss": 0.016,
      "step": 1426600
    },
    {
      "epoch": 2.3346949195813123,
      "grad_norm": 0.42855045199394226,
      "learning_rate": 5.365734731126823e-06,
      "loss": 0.0124,
      "step": 1426620
    },
    {
      "epoch": 2.3347276500199654,
      "grad_norm": 0.25524139404296875,
      "learning_rate": 5.365668838913306e-06,
      "loss": 0.0232,
      "step": 1426640
    },
    {
      "epoch": 2.334760380458619,
      "grad_norm": 0.27985879778862,
      "learning_rate": 5.365602946699789e-06,
      "loss": 0.0108,
      "step": 1426660
    },
    {
      "epoch": 2.334793110897272,
      "grad_norm": 0.2274540215730667,
      "learning_rate": 5.365537054486272e-06,
      "loss": 0.0105,
      "step": 1426680
    },
    {
      "epoch": 2.3348258413359257,
      "grad_norm": 0.39068660140037537,
      "learning_rate": 5.365471162272755e-06,
      "loss": 0.0141,
      "step": 1426700
    },
    {
      "epoch": 2.334858571774579,
      "grad_norm": 0.2638687789440155,
      "learning_rate": 5.3654052700592374e-06,
      "loss": 0.0214,
      "step": 1426720
    },
    {
      "epoch": 2.334891302213232,
      "grad_norm": 0.6251067519187927,
      "learning_rate": 5.365339377845721e-06,
      "loss": 0.011,
      "step": 1426740
    },
    {
      "epoch": 2.3349240326518856,
      "grad_norm": 0.26226678490638733,
      "learning_rate": 5.365273485632203e-06,
      "loss": 0.0091,
      "step": 1426760
    },
    {
      "epoch": 2.334956763090539,
      "grad_norm": 0.3098824620246887,
      "learning_rate": 5.3652075934186865e-06,
      "loss": 0.019,
      "step": 1426780
    },
    {
      "epoch": 2.3349894935291924,
      "grad_norm": 0.23369237780570984,
      "learning_rate": 5.365141701205168e-06,
      "loss": 0.0139,
      "step": 1426800
    },
    {
      "epoch": 2.3350222239678455,
      "grad_norm": 1.0445587635040283,
      "learning_rate": 5.365075808991652e-06,
      "loss": 0.0098,
      "step": 1426820
    },
    {
      "epoch": 2.335054954406499,
      "grad_norm": 0.5492317080497742,
      "learning_rate": 5.365009916778135e-06,
      "loss": 0.0138,
      "step": 1426840
    },
    {
      "epoch": 2.3350876848451523,
      "grad_norm": 0.0808529332280159,
      "learning_rate": 5.3649440245646175e-06,
      "loss": 0.0165,
      "step": 1426860
    },
    {
      "epoch": 2.3351204152838054,
      "grad_norm": 0.23750534653663635,
      "learning_rate": 5.3648781323511e-06,
      "loss": 0.0119,
      "step": 1426880
    },
    {
      "epoch": 2.335153145722459,
      "grad_norm": 0.038978658616542816,
      "learning_rate": 5.364812240137584e-06,
      "loss": 0.0137,
      "step": 1426900
    },
    {
      "epoch": 2.335185876161112,
      "grad_norm": 0.7758768796920776,
      "learning_rate": 5.364746347924066e-06,
      "loss": 0.0136,
      "step": 1426920
    },
    {
      "epoch": 2.3352186065997658,
      "grad_norm": 0.20587337017059326,
      "learning_rate": 5.364680455710549e-06,
      "loss": 0.0129,
      "step": 1426940
    },
    {
      "epoch": 2.335251337038419,
      "grad_norm": 0.09623847901821136,
      "learning_rate": 5.364614563497031e-06,
      "loss": 0.0154,
      "step": 1426960
    },
    {
      "epoch": 2.3352840674770725,
      "grad_norm": 0.3320735991001129,
      "learning_rate": 5.364548671283515e-06,
      "loss": 0.0136,
      "step": 1426980
    },
    {
      "epoch": 2.3353167979157257,
      "grad_norm": 0.8796213269233704,
      "learning_rate": 5.364482779069998e-06,
      "loss": 0.0156,
      "step": 1427000
    },
    {
      "epoch": 2.335349528354379,
      "grad_norm": 0.9380584359169006,
      "learning_rate": 5.36441688685648e-06,
      "loss": 0.0135,
      "step": 1427020
    },
    {
      "epoch": 2.3353822587930324,
      "grad_norm": 0.3167683482170105,
      "learning_rate": 5.364350994642964e-06,
      "loss": 0.0182,
      "step": 1427040
    },
    {
      "epoch": 2.3354149892316856,
      "grad_norm": 0.28896260261535645,
      "learning_rate": 5.3642851024294465e-06,
      "loss": 0.0153,
      "step": 1427060
    },
    {
      "epoch": 2.335447719670339,
      "grad_norm": 0.300281286239624,
      "learning_rate": 5.364219210215929e-06,
      "loss": 0.0134,
      "step": 1427080
    },
    {
      "epoch": 2.3354804501089923,
      "grad_norm": 0.48628613352775574,
      "learning_rate": 5.364153318002412e-06,
      "loss": 0.0105,
      "step": 1427100
    },
    {
      "epoch": 2.335513180547646,
      "grad_norm": 0.1327657401561737,
      "learning_rate": 5.364087425788896e-06,
      "loss": 0.0158,
      "step": 1427120
    },
    {
      "epoch": 2.335545910986299,
      "grad_norm": 0.40960320830345154,
      "learning_rate": 5.3640215335753775e-06,
      "loss": 0.018,
      "step": 1427140
    },
    {
      "epoch": 2.335578641424952,
      "grad_norm": 0.17913943529129028,
      "learning_rate": 5.363955641361861e-06,
      "loss": 0.0155,
      "step": 1427160
    },
    {
      "epoch": 2.335611371863606,
      "grad_norm": 0.27211901545524597,
      "learning_rate": 5.363889749148343e-06,
      "loss": 0.0104,
      "step": 1427180
    },
    {
      "epoch": 2.335644102302259,
      "grad_norm": 0.6427831649780273,
      "learning_rate": 5.3638238569348266e-06,
      "loss": 0.0119,
      "step": 1427200
    },
    {
      "epoch": 2.3356768327409125,
      "grad_norm": 0.1966184377670288,
      "learning_rate": 5.363757964721309e-06,
      "loss": 0.013,
      "step": 1427220
    },
    {
      "epoch": 2.3357095631795657,
      "grad_norm": 0.40314123034477234,
      "learning_rate": 5.363692072507792e-06,
      "loss": 0.0178,
      "step": 1427240
    },
    {
      "epoch": 2.3357422936182193,
      "grad_norm": 0.533717930316925,
      "learning_rate": 5.363626180294275e-06,
      "loss": 0.0094,
      "step": 1427260
    },
    {
      "epoch": 2.3357750240568724,
      "grad_norm": 1.1734771728515625,
      "learning_rate": 5.363560288080758e-06,
      "loss": 0.0098,
      "step": 1427280
    },
    {
      "epoch": 2.3358077544955256,
      "grad_norm": 0.8122381567955017,
      "learning_rate": 5.36349439586724e-06,
      "loss": 0.0219,
      "step": 1427300
    },
    {
      "epoch": 2.335840484934179,
      "grad_norm": 0.4059843420982361,
      "learning_rate": 5.363428503653724e-06,
      "loss": 0.0244,
      "step": 1427320
    },
    {
      "epoch": 2.3358732153728323,
      "grad_norm": 0.280947208404541,
      "learning_rate": 5.3633626114402074e-06,
      "loss": 0.0117,
      "step": 1427340
    },
    {
      "epoch": 2.335905945811486,
      "grad_norm": 0.24048937857151031,
      "learning_rate": 5.363296719226689e-06,
      "loss": 0.0215,
      "step": 1427360
    },
    {
      "epoch": 2.335938676250139,
      "grad_norm": 0.17821361124515533,
      "learning_rate": 5.363230827013173e-06,
      "loss": 0.0118,
      "step": 1427380
    },
    {
      "epoch": 2.3359714066887927,
      "grad_norm": 0.6625586152076721,
      "learning_rate": 5.363164934799655e-06,
      "loss": 0.022,
      "step": 1427400
    },
    {
      "epoch": 2.336004137127446,
      "grad_norm": 0.16134627163410187,
      "learning_rate": 5.363099042586138e-06,
      "loss": 0.0141,
      "step": 1427420
    },
    {
      "epoch": 2.336036867566099,
      "grad_norm": 0.4932613670825958,
      "learning_rate": 5.363033150372621e-06,
      "loss": 0.0133,
      "step": 1427440
    },
    {
      "epoch": 2.3360695980047526,
      "grad_norm": 0.6427739262580872,
      "learning_rate": 5.362967258159104e-06,
      "loss": 0.0173,
      "step": 1427460
    },
    {
      "epoch": 2.3361023284434057,
      "grad_norm": 0.15329405665397644,
      "learning_rate": 5.362901365945587e-06,
      "loss": 0.0187,
      "step": 1427480
    },
    {
      "epoch": 2.3361350588820593,
      "grad_norm": 0.58778977394104,
      "learning_rate": 5.36283547373207e-06,
      "loss": 0.0119,
      "step": 1427500
    },
    {
      "epoch": 2.3361677893207125,
      "grad_norm": 0.3638208210468292,
      "learning_rate": 5.362769581518552e-06,
      "loss": 0.0157,
      "step": 1427520
    },
    {
      "epoch": 2.336200519759366,
      "grad_norm": 0.6160371899604797,
      "learning_rate": 5.362703689305036e-06,
      "loss": 0.0147,
      "step": 1427540
    },
    {
      "epoch": 2.336233250198019,
      "grad_norm": 0.19123205542564392,
      "learning_rate": 5.362637797091518e-06,
      "loss": 0.0147,
      "step": 1427560
    },
    {
      "epoch": 2.3362659806366723,
      "grad_norm": 0.1382610946893692,
      "learning_rate": 5.362571904878001e-06,
      "loss": 0.0198,
      "step": 1427580
    },
    {
      "epoch": 2.336298711075326,
      "grad_norm": 0.21052689850330353,
      "learning_rate": 5.362506012664483e-06,
      "loss": 0.0103,
      "step": 1427600
    },
    {
      "epoch": 2.336331441513979,
      "grad_norm": 0.38546231389045715,
      "learning_rate": 5.362440120450967e-06,
      "loss": 0.0131,
      "step": 1427620
    },
    {
      "epoch": 2.3363641719526327,
      "grad_norm": 0.2323998063802719,
      "learning_rate": 5.362374228237449e-06,
      "loss": 0.0145,
      "step": 1427640
    },
    {
      "epoch": 2.336396902391286,
      "grad_norm": 0.3795066177845001,
      "learning_rate": 5.362308336023932e-06,
      "loss": 0.0168,
      "step": 1427660
    },
    {
      "epoch": 2.3364296328299394,
      "grad_norm": 0.15786638855934143,
      "learning_rate": 5.362242443810415e-06,
      "loss": 0.019,
      "step": 1427680
    },
    {
      "epoch": 2.3364623632685926,
      "grad_norm": 0.09348095208406448,
      "learning_rate": 5.3621765515968985e-06,
      "loss": 0.0162,
      "step": 1427700
    },
    {
      "epoch": 2.3364950937072457,
      "grad_norm": 0.32290494441986084,
      "learning_rate": 5.362110659383381e-06,
      "loss": 0.0166,
      "step": 1427720
    },
    {
      "epoch": 2.3365278241458993,
      "grad_norm": 0.3523637354373932,
      "learning_rate": 5.362044767169864e-06,
      "loss": 0.0142,
      "step": 1427740
    },
    {
      "epoch": 2.3365605545845525,
      "grad_norm": 0.12146789580583572,
      "learning_rate": 5.3619788749563475e-06,
      "loss": 0.0135,
      "step": 1427760
    },
    {
      "epoch": 2.336593285023206,
      "grad_norm": 0.58744215965271,
      "learning_rate": 5.361912982742829e-06,
      "loss": 0.0137,
      "step": 1427780
    },
    {
      "epoch": 2.336626015461859,
      "grad_norm": 0.14166158437728882,
      "learning_rate": 5.361847090529313e-06,
      "loss": 0.0222,
      "step": 1427800
    },
    {
      "epoch": 2.3366587459005124,
      "grad_norm": 1.0346132516860962,
      "learning_rate": 5.361781198315795e-06,
      "loss": 0.0251,
      "step": 1427820
    },
    {
      "epoch": 2.336691476339166,
      "grad_norm": 0.41808852553367615,
      "learning_rate": 5.3617153061022785e-06,
      "loss": 0.0155,
      "step": 1427840
    },
    {
      "epoch": 2.336724206777819,
      "grad_norm": 1.2122877836227417,
      "learning_rate": 5.361649413888761e-06,
      "loss": 0.0141,
      "step": 1427860
    },
    {
      "epoch": 2.3367569372164727,
      "grad_norm": 0.7565708160400391,
      "learning_rate": 5.361583521675244e-06,
      "loss": 0.0191,
      "step": 1427880
    },
    {
      "epoch": 2.336789667655126,
      "grad_norm": 0.29696837067604065,
      "learning_rate": 5.361517629461727e-06,
      "loss": 0.018,
      "step": 1427900
    },
    {
      "epoch": 2.336822398093779,
      "grad_norm": 0.4504818916320801,
      "learning_rate": 5.36145173724821e-06,
      "loss": 0.0183,
      "step": 1427920
    },
    {
      "epoch": 2.3368551285324326,
      "grad_norm": 4.745022773742676,
      "learning_rate": 5.361385845034692e-06,
      "loss": 0.0176,
      "step": 1427940
    },
    {
      "epoch": 2.3368878589710858,
      "grad_norm": 0.09510717540979385,
      "learning_rate": 5.361319952821176e-06,
      "loss": 0.0144,
      "step": 1427960
    },
    {
      "epoch": 2.3369205894097393,
      "grad_norm": 0.3010764420032501,
      "learning_rate": 5.361254060607658e-06,
      "loss": 0.0144,
      "step": 1427980
    },
    {
      "epoch": 2.3369533198483925,
      "grad_norm": 0.6327188611030579,
      "learning_rate": 5.361188168394141e-06,
      "loss": 0.0135,
      "step": 1428000
    },
    {
      "epoch": 2.336986050287046,
      "grad_norm": 0.264560341835022,
      "learning_rate": 5.361122276180624e-06,
      "loss": 0.0162,
      "step": 1428020
    },
    {
      "epoch": 2.3370187807256992,
      "grad_norm": 0.13484123349189758,
      "learning_rate": 5.361056383967107e-06,
      "loss": 0.0135,
      "step": 1428040
    },
    {
      "epoch": 2.3370515111643524,
      "grad_norm": 0.4831777811050415,
      "learning_rate": 5.36099049175359e-06,
      "loss": 0.0171,
      "step": 1428060
    },
    {
      "epoch": 2.337084241603006,
      "grad_norm": 0.49558186531066895,
      "learning_rate": 5.360924599540073e-06,
      "loss": 0.0147,
      "step": 1428080
    },
    {
      "epoch": 2.337116972041659,
      "grad_norm": 0.52912437915802,
      "learning_rate": 5.360858707326556e-06,
      "loss": 0.0152,
      "step": 1428100
    },
    {
      "epoch": 2.3371497024803127,
      "grad_norm": 0.7290077209472656,
      "learning_rate": 5.3607928151130385e-06,
      "loss": 0.0134,
      "step": 1428120
    },
    {
      "epoch": 2.337182432918966,
      "grad_norm": 0.4192308187484741,
      "learning_rate": 5.360726922899522e-06,
      "loss": 0.0114,
      "step": 1428140
    },
    {
      "epoch": 2.3372151633576195,
      "grad_norm": 0.5615764260292053,
      "learning_rate": 5.360661030686004e-06,
      "loss": 0.0242,
      "step": 1428160
    },
    {
      "epoch": 2.3372478937962726,
      "grad_norm": 0.6230224370956421,
      "learning_rate": 5.360595138472488e-06,
      "loss": 0.0192,
      "step": 1428180
    },
    {
      "epoch": 2.3372806242349258,
      "grad_norm": 0.22370073199272156,
      "learning_rate": 5.3605292462589695e-06,
      "loss": 0.0105,
      "step": 1428200
    },
    {
      "epoch": 2.3373133546735794,
      "grad_norm": 0.36688709259033203,
      "learning_rate": 5.360463354045453e-06,
      "loss": 0.0155,
      "step": 1428220
    },
    {
      "epoch": 2.3373460851122325,
      "grad_norm": 1.1107165813446045,
      "learning_rate": 5.360397461831936e-06,
      "loss": 0.0256,
      "step": 1428240
    },
    {
      "epoch": 2.337378815550886,
      "grad_norm": 0.358300119638443,
      "learning_rate": 5.3603315696184186e-06,
      "loss": 0.0184,
      "step": 1428260
    },
    {
      "epoch": 2.3374115459895393,
      "grad_norm": 0.46761956810951233,
      "learning_rate": 5.360265677404901e-06,
      "loss": 0.0142,
      "step": 1428280
    },
    {
      "epoch": 2.337444276428193,
      "grad_norm": 0.28950566053390503,
      "learning_rate": 5.360199785191385e-06,
      "loss": 0.0168,
      "step": 1428300
    },
    {
      "epoch": 2.337477006866846,
      "grad_norm": 0.2375180721282959,
      "learning_rate": 5.360133892977867e-06,
      "loss": 0.0133,
      "step": 1428320
    },
    {
      "epoch": 2.337509737305499,
      "grad_norm": 1.511298418045044,
      "learning_rate": 5.36006800076435e-06,
      "loss": 0.0232,
      "step": 1428340
    },
    {
      "epoch": 2.3375424677441528,
      "grad_norm": 0.7537211179733276,
      "learning_rate": 5.360002108550832e-06,
      "loss": 0.023,
      "step": 1428360
    },
    {
      "epoch": 2.337575198182806,
      "grad_norm": 0.22573119401931763,
      "learning_rate": 5.359936216337316e-06,
      "loss": 0.0228,
      "step": 1428380
    },
    {
      "epoch": 2.3376079286214595,
      "grad_norm": 0.094343401491642,
      "learning_rate": 5.3598703241237994e-06,
      "loss": 0.0106,
      "step": 1428400
    },
    {
      "epoch": 2.3376406590601126,
      "grad_norm": 0.6661189198493958,
      "learning_rate": 5.359804431910281e-06,
      "loss": 0.0127,
      "step": 1428420
    },
    {
      "epoch": 2.3376733894987662,
      "grad_norm": 0.4937233030796051,
      "learning_rate": 5.359738539696765e-06,
      "loss": 0.0126,
      "step": 1428440
    },
    {
      "epoch": 2.3377061199374194,
      "grad_norm": 0.6627487540245056,
      "learning_rate": 5.359672647483248e-06,
      "loss": 0.0113,
      "step": 1428460
    },
    {
      "epoch": 2.3377388503760725,
      "grad_norm": 0.1322118490934372,
      "learning_rate": 5.35960675526973e-06,
      "loss": 0.0158,
      "step": 1428480
    },
    {
      "epoch": 2.337771580814726,
      "grad_norm": 0.9276129603385925,
      "learning_rate": 5.359540863056213e-06,
      "loss": 0.0188,
      "step": 1428500
    },
    {
      "epoch": 2.3378043112533793,
      "grad_norm": 0.28684350848197937,
      "learning_rate": 5.359474970842697e-06,
      "loss": 0.0143,
      "step": 1428520
    },
    {
      "epoch": 2.337837041692033,
      "grad_norm": 0.762739896774292,
      "learning_rate": 5.359409078629179e-06,
      "loss": 0.0242,
      "step": 1428540
    },
    {
      "epoch": 2.337869772130686,
      "grad_norm": 1.3560600280761719,
      "learning_rate": 5.359343186415662e-06,
      "loss": 0.0174,
      "step": 1428560
    },
    {
      "epoch": 2.3379025025693396,
      "grad_norm": 0.4937064051628113,
      "learning_rate": 5.359277294202144e-06,
      "loss": 0.0167,
      "step": 1428580
    },
    {
      "epoch": 2.3379352330079928,
      "grad_norm": 1.0744720697402954,
      "learning_rate": 5.359211401988628e-06,
      "loss": 0.0198,
      "step": 1428600
    },
    {
      "epoch": 2.337967963446646,
      "grad_norm": 0.8045239448547363,
      "learning_rate": 5.3591455097751096e-06,
      "loss": 0.0206,
      "step": 1428620
    },
    {
      "epoch": 2.3380006938852995,
      "grad_norm": 0.4206903278827667,
      "learning_rate": 5.359079617561593e-06,
      "loss": 0.0097,
      "step": 1428640
    },
    {
      "epoch": 2.3380334243239527,
      "grad_norm": 0.471746027469635,
      "learning_rate": 5.359013725348076e-06,
      "loss": 0.0131,
      "step": 1428660
    },
    {
      "epoch": 2.3380661547626063,
      "grad_norm": 0.6564041376113892,
      "learning_rate": 5.358947833134559e-06,
      "loss": 0.018,
      "step": 1428680
    },
    {
      "epoch": 2.3380988852012594,
      "grad_norm": 0.37905165553092957,
      "learning_rate": 5.358881940921041e-06,
      "loss": 0.0195,
      "step": 1428700
    },
    {
      "epoch": 2.338131615639913,
      "grad_norm": 0.6715121269226074,
      "learning_rate": 5.358816048707525e-06,
      "loss": 0.0169,
      "step": 1428720
    },
    {
      "epoch": 2.338164346078566,
      "grad_norm": 0.6071583032608032,
      "learning_rate": 5.358750156494008e-06,
      "loss": 0.0153,
      "step": 1428740
    },
    {
      "epoch": 2.3381970765172193,
      "grad_norm": 0.8429734706878662,
      "learning_rate": 5.3586842642804904e-06,
      "loss": 0.0099,
      "step": 1428760
    },
    {
      "epoch": 2.338229806955873,
      "grad_norm": 0.47818055748939514,
      "learning_rate": 5.358618372066974e-06,
      "loss": 0.0141,
      "step": 1428780
    },
    {
      "epoch": 2.338262537394526,
      "grad_norm": 0.40927690267562866,
      "learning_rate": 5.358552479853456e-06,
      "loss": 0.0158,
      "step": 1428800
    },
    {
      "epoch": 2.3382952678331796,
      "grad_norm": 0.295052170753479,
      "learning_rate": 5.3584865876399395e-06,
      "loss": 0.019,
      "step": 1428820
    },
    {
      "epoch": 2.338327998271833,
      "grad_norm": 0.8790693879127502,
      "learning_rate": 5.358420695426421e-06,
      "loss": 0.0196,
      "step": 1428840
    },
    {
      "epoch": 2.3383607287104864,
      "grad_norm": 0.3700709939002991,
      "learning_rate": 5.358354803212905e-06,
      "loss": 0.0107,
      "step": 1428860
    },
    {
      "epoch": 2.3383934591491395,
      "grad_norm": 1.3278917074203491,
      "learning_rate": 5.358288910999388e-06,
      "loss": 0.0252,
      "step": 1428880
    },
    {
      "epoch": 2.3384261895877927,
      "grad_norm": 0.18598590791225433,
      "learning_rate": 5.3582230187858705e-06,
      "loss": 0.0169,
      "step": 1428900
    },
    {
      "epoch": 2.3384589200264463,
      "grad_norm": 0.6915926933288574,
      "learning_rate": 5.358157126572353e-06,
      "loss": 0.0124,
      "step": 1428920
    },
    {
      "epoch": 2.3384916504650994,
      "grad_norm": 0.1774117648601532,
      "learning_rate": 5.358091234358837e-06,
      "loss": 0.0146,
      "step": 1428940
    },
    {
      "epoch": 2.338524380903753,
      "grad_norm": 0.3757123649120331,
      "learning_rate": 5.358025342145319e-06,
      "loss": 0.0251,
      "step": 1428960
    },
    {
      "epoch": 2.338557111342406,
      "grad_norm": 0.1809297353029251,
      "learning_rate": 5.357959449931802e-06,
      "loss": 0.0166,
      "step": 1428980
    },
    {
      "epoch": 2.3385898417810598,
      "grad_norm": 0.5463674664497375,
      "learning_rate": 5.357893557718284e-06,
      "loss": 0.0116,
      "step": 1429000
    },
    {
      "epoch": 2.338622572219713,
      "grad_norm": 0.229759082198143,
      "learning_rate": 5.357827665504768e-06,
      "loss": 0.0141,
      "step": 1429020
    },
    {
      "epoch": 2.338655302658366,
      "grad_norm": 0.11548540741205215,
      "learning_rate": 5.3577617732912505e-06,
      "loss": 0.0194,
      "step": 1429040
    },
    {
      "epoch": 2.3386880330970197,
      "grad_norm": 0.21634316444396973,
      "learning_rate": 5.357695881077733e-06,
      "loss": 0.0084,
      "step": 1429060
    },
    {
      "epoch": 2.338720763535673,
      "grad_norm": 0.3929487466812134,
      "learning_rate": 5.357629988864216e-06,
      "loss": 0.0158,
      "step": 1429080
    },
    {
      "epoch": 2.3387534939743264,
      "grad_norm": 0.8041233420372009,
      "learning_rate": 5.3575640966506996e-06,
      "loss": 0.0249,
      "step": 1429100
    },
    {
      "epoch": 2.3387862244129796,
      "grad_norm": 0.37899601459503174,
      "learning_rate": 5.357498204437182e-06,
      "loss": 0.0251,
      "step": 1429120
    },
    {
      "epoch": 2.338818954851633,
      "grad_norm": 0.216993048787117,
      "learning_rate": 5.357432312223665e-06,
      "loss": 0.0116,
      "step": 1429140
    },
    {
      "epoch": 2.3388516852902863,
      "grad_norm": 0.3181116282939911,
      "learning_rate": 5.357366420010149e-06,
      "loss": 0.0122,
      "step": 1429160
    },
    {
      "epoch": 2.3388844157289395,
      "grad_norm": 1.1024982929229736,
      "learning_rate": 5.3573005277966305e-06,
      "loss": 0.0184,
      "step": 1429180
    },
    {
      "epoch": 2.338917146167593,
      "grad_norm": 0.3880137801170349,
      "learning_rate": 5.357234635583114e-06,
      "loss": 0.0132,
      "step": 1429200
    },
    {
      "epoch": 2.338949876606246,
      "grad_norm": 0.49652570486068726,
      "learning_rate": 5.357168743369596e-06,
      "loss": 0.0181,
      "step": 1429220
    },
    {
      "epoch": 2.3389826070449,
      "grad_norm": 1.01444411277771,
      "learning_rate": 5.35710285115608e-06,
      "loss": 0.0127,
      "step": 1429240
    },
    {
      "epoch": 2.339015337483553,
      "grad_norm": 0.5229562520980835,
      "learning_rate": 5.357036958942562e-06,
      "loss": 0.0245,
      "step": 1429260
    },
    {
      "epoch": 2.3390480679222065,
      "grad_norm": 0.06403393298387527,
      "learning_rate": 5.356971066729045e-06,
      "loss": 0.0126,
      "step": 1429280
    },
    {
      "epoch": 2.3390807983608597,
      "grad_norm": 0.3250064551830292,
      "learning_rate": 5.356905174515528e-06,
      "loss": 0.0083,
      "step": 1429300
    },
    {
      "epoch": 2.339113528799513,
      "grad_norm": 0.36584603786468506,
      "learning_rate": 5.356839282302011e-06,
      "loss": 0.0181,
      "step": 1429320
    },
    {
      "epoch": 2.3391462592381664,
      "grad_norm": 0.5138092637062073,
      "learning_rate": 5.356773390088493e-06,
      "loss": 0.0149,
      "step": 1429340
    },
    {
      "epoch": 2.3391789896768196,
      "grad_norm": 0.7887923121452332,
      "learning_rate": 5.356707497874977e-06,
      "loss": 0.0175,
      "step": 1429360
    },
    {
      "epoch": 2.339211720115473,
      "grad_norm": 0.3232506215572357,
      "learning_rate": 5.356641605661459e-06,
      "loss": 0.0137,
      "step": 1429380
    },
    {
      "epoch": 2.3392444505541263,
      "grad_norm": 0.2679702639579773,
      "learning_rate": 5.356575713447942e-06,
      "loss": 0.0182,
      "step": 1429400
    },
    {
      "epoch": 2.3392771809927795,
      "grad_norm": 0.17101465165615082,
      "learning_rate": 5.356509821234424e-06,
      "loss": 0.0139,
      "step": 1429420
    },
    {
      "epoch": 2.339309911431433,
      "grad_norm": 1.4346860647201538,
      "learning_rate": 5.356443929020908e-06,
      "loss": 0.0179,
      "step": 1429440
    },
    {
      "epoch": 2.3393426418700862,
      "grad_norm": 0.37964072823524475,
      "learning_rate": 5.356378036807391e-06,
      "loss": 0.0126,
      "step": 1429460
    },
    {
      "epoch": 2.33937537230874,
      "grad_norm": 0.6825206279754639,
      "learning_rate": 5.356312144593873e-06,
      "loss": 0.013,
      "step": 1429480
    },
    {
      "epoch": 2.339408102747393,
      "grad_norm": 0.09665122628211975,
      "learning_rate": 5.356246252380357e-06,
      "loss": 0.0155,
      "step": 1429500
    },
    {
      "epoch": 2.339440833186046,
      "grad_norm": 0.7986621856689453,
      "learning_rate": 5.35618036016684e-06,
      "loss": 0.016,
      "step": 1429520
    },
    {
      "epoch": 2.3394735636246997,
      "grad_norm": 0.40512675046920776,
      "learning_rate": 5.356114467953323e-06,
      "loss": 0.0096,
      "step": 1429540
    },
    {
      "epoch": 2.339506294063353,
      "grad_norm": 0.27935415506362915,
      "learning_rate": 5.356048575739805e-06,
      "loss": 0.0189,
      "step": 1429560
    },
    {
      "epoch": 2.3395390245020065,
      "grad_norm": 0.4805888235569,
      "learning_rate": 5.355982683526289e-06,
      "loss": 0.0157,
      "step": 1429580
    },
    {
      "epoch": 2.3395717549406596,
      "grad_norm": 0.37607574462890625,
      "learning_rate": 5.355916791312771e-06,
      "loss": 0.023,
      "step": 1429600
    },
    {
      "epoch": 2.339604485379313,
      "grad_norm": 0.18301117420196533,
      "learning_rate": 5.355850899099254e-06,
      "loss": 0.0206,
      "step": 1429620
    },
    {
      "epoch": 2.3396372158179664,
      "grad_norm": 0.42797717452049255,
      "learning_rate": 5.355785006885736e-06,
      "loss": 0.0083,
      "step": 1429640
    },
    {
      "epoch": 2.3396699462566195,
      "grad_norm": 0.2557696998119354,
      "learning_rate": 5.35571911467222e-06,
      "loss": 0.0189,
      "step": 1429660
    },
    {
      "epoch": 2.339702676695273,
      "grad_norm": 0.5414212346076965,
      "learning_rate": 5.355653222458702e-06,
      "loss": 0.0147,
      "step": 1429680
    },
    {
      "epoch": 2.3397354071339262,
      "grad_norm": 0.5541115999221802,
      "learning_rate": 5.355587330245185e-06,
      "loss": 0.0218,
      "step": 1429700
    },
    {
      "epoch": 2.33976813757258,
      "grad_norm": 0.5369805097579956,
      "learning_rate": 5.355521438031668e-06,
      "loss": 0.012,
      "step": 1429720
    },
    {
      "epoch": 2.339800868011233,
      "grad_norm": 0.1643235981464386,
      "learning_rate": 5.3554555458181515e-06,
      "loss": 0.0147,
      "step": 1429740
    },
    {
      "epoch": 2.3398335984498866,
      "grad_norm": 0.08909623324871063,
      "learning_rate": 5.355389653604633e-06,
      "loss": 0.0123,
      "step": 1429760
    },
    {
      "epoch": 2.3398663288885397,
      "grad_norm": 0.6362704038619995,
      "learning_rate": 5.355323761391117e-06,
      "loss": 0.0175,
      "step": 1429780
    },
    {
      "epoch": 2.339899059327193,
      "grad_norm": 0.39627301692962646,
      "learning_rate": 5.3552578691776005e-06,
      "loss": 0.0112,
      "step": 1429800
    },
    {
      "epoch": 2.3399317897658465,
      "grad_norm": 0.08280229568481445,
      "learning_rate": 5.355191976964082e-06,
      "loss": 0.0173,
      "step": 1429820
    },
    {
      "epoch": 2.3399645202044996,
      "grad_norm": 0.11108964681625366,
      "learning_rate": 5.355126084750566e-06,
      "loss": 0.0131,
      "step": 1429840
    },
    {
      "epoch": 2.3399972506431532,
      "grad_norm": 0.159987673163414,
      "learning_rate": 5.355060192537048e-06,
      "loss": 0.0214,
      "step": 1429860
    },
    {
      "epoch": 2.3400299810818064,
      "grad_norm": 1.3869082927703857,
      "learning_rate": 5.3549943003235315e-06,
      "loss": 0.0165,
      "step": 1429880
    },
    {
      "epoch": 2.34006271152046,
      "grad_norm": 0.35099974274635315,
      "learning_rate": 5.354928408110014e-06,
      "loss": 0.0128,
      "step": 1429900
    },
    {
      "epoch": 2.340095441959113,
      "grad_norm": 0.17632848024368286,
      "learning_rate": 5.354862515896497e-06,
      "loss": 0.0175,
      "step": 1429920
    },
    {
      "epoch": 2.3401281723977663,
      "grad_norm": 0.3562593460083008,
      "learning_rate": 5.35479662368298e-06,
      "loss": 0.0143,
      "step": 1429940
    },
    {
      "epoch": 2.34016090283642,
      "grad_norm": 0.697593629360199,
      "learning_rate": 5.354730731469463e-06,
      "loss": 0.0142,
      "step": 1429960
    },
    {
      "epoch": 2.340193633275073,
      "grad_norm": 0.39522626996040344,
      "learning_rate": 5.354664839255945e-06,
      "loss": 0.0186,
      "step": 1429980
    },
    {
      "epoch": 2.3402263637137266,
      "grad_norm": 0.5407644510269165,
      "learning_rate": 5.354598947042429e-06,
      "loss": 0.0177,
      "step": 1430000
    },
    {
      "epoch": 2.3402590941523798,
      "grad_norm": 0.18539315462112427,
      "learning_rate": 5.354533054828911e-06,
      "loss": 0.0131,
      "step": 1430020
    },
    {
      "epoch": 2.3402918245910334,
      "grad_norm": 0.28779178857803345,
      "learning_rate": 5.354467162615394e-06,
      "loss": 0.0105,
      "step": 1430040
    },
    {
      "epoch": 2.3403245550296865,
      "grad_norm": 1.1776212453842163,
      "learning_rate": 5.354401270401877e-06,
      "loss": 0.0214,
      "step": 1430060
    },
    {
      "epoch": 2.3403572854683397,
      "grad_norm": 0.06573260575532913,
      "learning_rate": 5.35433537818836e-06,
      "loss": 0.0126,
      "step": 1430080
    },
    {
      "epoch": 2.3403900159069932,
      "grad_norm": 0.6902278661727905,
      "learning_rate": 5.3542694859748425e-06,
      "loss": 0.0136,
      "step": 1430100
    },
    {
      "epoch": 2.3404227463456464,
      "grad_norm": 0.34566500782966614,
      "learning_rate": 5.354203593761326e-06,
      "loss": 0.0121,
      "step": 1430120
    },
    {
      "epoch": 2.3404554767843,
      "grad_norm": 0.9589292407035828,
      "learning_rate": 5.354137701547808e-06,
      "loss": 0.0253,
      "step": 1430140
    },
    {
      "epoch": 2.340488207222953,
      "grad_norm": 0.4217316210269928,
      "learning_rate": 5.3540718093342915e-06,
      "loss": 0.0169,
      "step": 1430160
    },
    {
      "epoch": 2.3405209376616067,
      "grad_norm": 0.34761175513267517,
      "learning_rate": 5.354005917120775e-06,
      "loss": 0.0152,
      "step": 1430180
    },
    {
      "epoch": 2.34055366810026,
      "grad_norm": 0.17153072357177734,
      "learning_rate": 5.353940024907257e-06,
      "loss": 0.0171,
      "step": 1430200
    },
    {
      "epoch": 2.340586398538913,
      "grad_norm": 0.697257936000824,
      "learning_rate": 5.353874132693741e-06,
      "loss": 0.0137,
      "step": 1430220
    },
    {
      "epoch": 2.3406191289775666,
      "grad_norm": 0.5109789371490479,
      "learning_rate": 5.3538082404802225e-06,
      "loss": 0.0165,
      "step": 1430240
    },
    {
      "epoch": 2.34065185941622,
      "grad_norm": 0.5181208252906799,
      "learning_rate": 5.353742348266706e-06,
      "loss": 0.0159,
      "step": 1430260
    },
    {
      "epoch": 2.3406845898548734,
      "grad_norm": 0.3173931837081909,
      "learning_rate": 5.353676456053189e-06,
      "loss": 0.0185,
      "step": 1430280
    },
    {
      "epoch": 2.3407173202935265,
      "grad_norm": 1.5915021896362305,
      "learning_rate": 5.3536105638396716e-06,
      "loss": 0.0175,
      "step": 1430300
    },
    {
      "epoch": 2.34075005073218,
      "grad_norm": 0.2393449991941452,
      "learning_rate": 5.353544671626154e-06,
      "loss": 0.0172,
      "step": 1430320
    },
    {
      "epoch": 2.3407827811708333,
      "grad_norm": 0.4594435393810272,
      "learning_rate": 5.353478779412638e-06,
      "loss": 0.0106,
      "step": 1430340
    },
    {
      "epoch": 2.3408155116094864,
      "grad_norm": 0.3365471363067627,
      "learning_rate": 5.35341288719912e-06,
      "loss": 0.0113,
      "step": 1430360
    },
    {
      "epoch": 2.34084824204814,
      "grad_norm": 0.33656829595565796,
      "learning_rate": 5.353346994985603e-06,
      "loss": 0.019,
      "step": 1430380
    },
    {
      "epoch": 2.340880972486793,
      "grad_norm": 0.5502029061317444,
      "learning_rate": 5.353281102772085e-06,
      "loss": 0.0123,
      "step": 1430400
    },
    {
      "epoch": 2.3409137029254468,
      "grad_norm": 0.2971712648868561,
      "learning_rate": 5.353215210558569e-06,
      "loss": 0.0197,
      "step": 1430420
    },
    {
      "epoch": 2.3409464333641,
      "grad_norm": 0.6526408791542053,
      "learning_rate": 5.353149318345051e-06,
      "loss": 0.0092,
      "step": 1430440
    },
    {
      "epoch": 2.3409791638027535,
      "grad_norm": 0.6778645515441895,
      "learning_rate": 5.353083426131534e-06,
      "loss": 0.0137,
      "step": 1430460
    },
    {
      "epoch": 2.3410118942414067,
      "grad_norm": 0.11107569932937622,
      "learning_rate": 5.353017533918017e-06,
      "loss": 0.0131,
      "step": 1430480
    },
    {
      "epoch": 2.34104462468006,
      "grad_norm": 0.19316622614860535,
      "learning_rate": 5.3529516417045e-06,
      "loss": 0.0126,
      "step": 1430500
    },
    {
      "epoch": 2.3410773551187134,
      "grad_norm": 0.31389862298965454,
      "learning_rate": 5.352885749490983e-06,
      "loss": 0.0208,
      "step": 1430520
    },
    {
      "epoch": 2.3411100855573665,
      "grad_norm": 0.48029977083206177,
      "learning_rate": 5.352819857277466e-06,
      "loss": 0.0149,
      "step": 1430540
    },
    {
      "epoch": 2.34114281599602,
      "grad_norm": 0.42236897349357605,
      "learning_rate": 5.352753965063949e-06,
      "loss": 0.0208,
      "step": 1430560
    },
    {
      "epoch": 2.3411755464346733,
      "grad_norm": 0.372427374124527,
      "learning_rate": 5.352688072850432e-06,
      "loss": 0.0131,
      "step": 1430580
    },
    {
      "epoch": 2.341208276873327,
      "grad_norm": 0.2636631429195404,
      "learning_rate": 5.352622180636915e-06,
      "loss": 0.0244,
      "step": 1430600
    },
    {
      "epoch": 2.34124100731198,
      "grad_norm": 0.44356900453567505,
      "learning_rate": 5.352556288423397e-06,
      "loss": 0.0162,
      "step": 1430620
    },
    {
      "epoch": 2.341273737750633,
      "grad_norm": 0.29627174139022827,
      "learning_rate": 5.352490396209881e-06,
      "loss": 0.0137,
      "step": 1430640
    },
    {
      "epoch": 2.341306468189287,
      "grad_norm": 0.3444106876850128,
      "learning_rate": 5.3524245039963626e-06,
      "loss": 0.0187,
      "step": 1430660
    },
    {
      "epoch": 2.34133919862794,
      "grad_norm": 0.3447904586791992,
      "learning_rate": 5.352358611782846e-06,
      "loss": 0.0153,
      "step": 1430680
    },
    {
      "epoch": 2.3413719290665935,
      "grad_norm": 1.0521292686462402,
      "learning_rate": 5.352292719569329e-06,
      "loss": 0.0147,
      "step": 1430700
    },
    {
      "epoch": 2.3414046595052467,
      "grad_norm": 0.5457623600959778,
      "learning_rate": 5.352226827355812e-06,
      "loss": 0.0176,
      "step": 1430720
    },
    {
      "epoch": 2.3414373899439003,
      "grad_norm": 0.8036578893661499,
      "learning_rate": 5.352160935142294e-06,
      "loss": 0.0122,
      "step": 1430740
    },
    {
      "epoch": 2.3414701203825534,
      "grad_norm": 0.10551466047763824,
      "learning_rate": 5.352095042928778e-06,
      "loss": 0.0132,
      "step": 1430760
    },
    {
      "epoch": 2.3415028508212066,
      "grad_norm": 0.25494322180747986,
      "learning_rate": 5.35202915071526e-06,
      "loss": 0.0179,
      "step": 1430780
    },
    {
      "epoch": 2.34153558125986,
      "grad_norm": 0.6782357096672058,
      "learning_rate": 5.3519632585017434e-06,
      "loss": 0.0174,
      "step": 1430800
    },
    {
      "epoch": 2.3415683116985133,
      "grad_norm": 0.14095130562782288,
      "learning_rate": 5.351897366288225e-06,
      "loss": 0.0103,
      "step": 1430820
    },
    {
      "epoch": 2.341601042137167,
      "grad_norm": 0.11278505623340607,
      "learning_rate": 5.351831474074709e-06,
      "loss": 0.0171,
      "step": 1430840
    },
    {
      "epoch": 2.34163377257582,
      "grad_norm": 0.8069642186164856,
      "learning_rate": 5.3517655818611925e-06,
      "loss": 0.0148,
      "step": 1430860
    },
    {
      "epoch": 2.341666503014473,
      "grad_norm": 0.3336822986602783,
      "learning_rate": 5.351699689647674e-06,
      "loss": 0.0193,
      "step": 1430880
    },
    {
      "epoch": 2.341699233453127,
      "grad_norm": 0.4070070683956146,
      "learning_rate": 5.351633797434158e-06,
      "loss": 0.0138,
      "step": 1430900
    },
    {
      "epoch": 2.34173196389178,
      "grad_norm": 0.16192948818206787,
      "learning_rate": 5.351567905220641e-06,
      "loss": 0.0132,
      "step": 1430920
    },
    {
      "epoch": 2.3417646943304335,
      "grad_norm": 0.3998647928237915,
      "learning_rate": 5.3515020130071235e-06,
      "loss": 0.0155,
      "step": 1430940
    },
    {
      "epoch": 2.3417974247690867,
      "grad_norm": 0.23757128417491913,
      "learning_rate": 5.351436120793606e-06,
      "loss": 0.0111,
      "step": 1430960
    },
    {
      "epoch": 2.34183015520774,
      "grad_norm": 0.18647758662700653,
      "learning_rate": 5.35137022858009e-06,
      "loss": 0.0125,
      "step": 1430980
    },
    {
      "epoch": 2.3418628856463934,
      "grad_norm": 0.538587749004364,
      "learning_rate": 5.351304336366572e-06,
      "loss": 0.015,
      "step": 1431000
    },
    {
      "epoch": 2.3418956160850466,
      "grad_norm": 0.32356226444244385,
      "learning_rate": 5.351238444153055e-06,
      "loss": 0.0163,
      "step": 1431020
    },
    {
      "epoch": 2.3419283465237,
      "grad_norm": 0.09969542920589447,
      "learning_rate": 5.351172551939537e-06,
      "loss": 0.0127,
      "step": 1431040
    },
    {
      "epoch": 2.3419610769623533,
      "grad_norm": 0.1300351619720459,
      "learning_rate": 5.351106659726021e-06,
      "loss": 0.0124,
      "step": 1431060
    },
    {
      "epoch": 2.341993807401007,
      "grad_norm": 0.9316955208778381,
      "learning_rate": 5.3510407675125035e-06,
      "loss": 0.0186,
      "step": 1431080
    },
    {
      "epoch": 2.34202653783966,
      "grad_norm": 0.24842198193073273,
      "learning_rate": 5.350974875298986e-06,
      "loss": 0.016,
      "step": 1431100
    },
    {
      "epoch": 2.3420592682783132,
      "grad_norm": 0.22544832527637482,
      "learning_rate": 5.350908983085469e-06,
      "loss": 0.0152,
      "step": 1431120
    },
    {
      "epoch": 2.342091998716967,
      "grad_norm": 0.5536855459213257,
      "learning_rate": 5.3508430908719526e-06,
      "loss": 0.0213,
      "step": 1431140
    },
    {
      "epoch": 2.34212472915562,
      "grad_norm": 0.48889172077178955,
      "learning_rate": 5.3507771986584345e-06,
      "loss": 0.018,
      "step": 1431160
    },
    {
      "epoch": 2.3421574595942736,
      "grad_norm": 0.4365040957927704,
      "learning_rate": 5.350711306444918e-06,
      "loss": 0.0129,
      "step": 1431180
    },
    {
      "epoch": 2.3421901900329267,
      "grad_norm": 0.7250939011573792,
      "learning_rate": 5.3506454142314e-06,
      "loss": 0.0156,
      "step": 1431200
    },
    {
      "epoch": 2.3422229204715803,
      "grad_norm": 0.21385550498962402,
      "learning_rate": 5.3505795220178835e-06,
      "loss": 0.0176,
      "step": 1431220
    },
    {
      "epoch": 2.3422556509102335,
      "grad_norm": 0.49527859687805176,
      "learning_rate": 5.350513629804367e-06,
      "loss": 0.0178,
      "step": 1431240
    },
    {
      "epoch": 2.3422883813488866,
      "grad_norm": 0.5996919274330139,
      "learning_rate": 5.350447737590849e-06,
      "loss": 0.0123,
      "step": 1431260
    },
    {
      "epoch": 2.34232111178754,
      "grad_norm": 0.3662446141242981,
      "learning_rate": 5.350381845377333e-06,
      "loss": 0.0116,
      "step": 1431280
    },
    {
      "epoch": 2.3423538422261934,
      "grad_norm": 0.33339571952819824,
      "learning_rate": 5.350315953163815e-06,
      "loss": 0.0139,
      "step": 1431300
    },
    {
      "epoch": 2.342386572664847,
      "grad_norm": 0.6054425835609436,
      "learning_rate": 5.350250060950298e-06,
      "loss": 0.016,
      "step": 1431320
    },
    {
      "epoch": 2.3424193031035,
      "grad_norm": 0.41682618856430054,
      "learning_rate": 5.350184168736781e-06,
      "loss": 0.0163,
      "step": 1431340
    },
    {
      "epoch": 2.3424520335421537,
      "grad_norm": 0.6671703457832336,
      "learning_rate": 5.350118276523264e-06,
      "loss": 0.0116,
      "step": 1431360
    },
    {
      "epoch": 2.342484763980807,
      "grad_norm": 0.26866528391838074,
      "learning_rate": 5.350052384309746e-06,
      "loss": 0.0123,
      "step": 1431380
    },
    {
      "epoch": 2.34251749441946,
      "grad_norm": 0.5639768838882446,
      "learning_rate": 5.34998649209623e-06,
      "loss": 0.0192,
      "step": 1431400
    },
    {
      "epoch": 2.3425502248581136,
      "grad_norm": 0.09829756617546082,
      "learning_rate": 5.349920599882712e-06,
      "loss": 0.0137,
      "step": 1431420
    },
    {
      "epoch": 2.3425829552967667,
      "grad_norm": 0.3663363456726074,
      "learning_rate": 5.349854707669195e-06,
      "loss": 0.014,
      "step": 1431440
    },
    {
      "epoch": 2.3426156857354203,
      "grad_norm": 0.48704802989959717,
      "learning_rate": 5.349788815455677e-06,
      "loss": 0.013,
      "step": 1431460
    },
    {
      "epoch": 2.3426484161740735,
      "grad_norm": 0.4114743769168854,
      "learning_rate": 5.349722923242161e-06,
      "loss": 0.0109,
      "step": 1431480
    },
    {
      "epoch": 2.342681146612727,
      "grad_norm": 0.2714468538761139,
      "learning_rate": 5.3496570310286436e-06,
      "loss": 0.0161,
      "step": 1431500
    },
    {
      "epoch": 2.3427138770513802,
      "grad_norm": 0.31926456093788147,
      "learning_rate": 5.349591138815126e-06,
      "loss": 0.012,
      "step": 1431520
    },
    {
      "epoch": 2.3427466074900334,
      "grad_norm": 0.3133111894130707,
      "learning_rate": 5.349525246601609e-06,
      "loss": 0.0109,
      "step": 1431540
    },
    {
      "epoch": 2.342779337928687,
      "grad_norm": 0.16958168148994446,
      "learning_rate": 5.349459354388093e-06,
      "loss": 0.0176,
      "step": 1431560
    },
    {
      "epoch": 2.34281206836734,
      "grad_norm": 0.14058713614940643,
      "learning_rate": 5.349393462174575e-06,
      "loss": 0.0153,
      "step": 1431580
    },
    {
      "epoch": 2.3428447988059937,
      "grad_norm": 0.13819123804569244,
      "learning_rate": 5.349327569961058e-06,
      "loss": 0.0118,
      "step": 1431600
    },
    {
      "epoch": 2.342877529244647,
      "grad_norm": 0.19529400765895844,
      "learning_rate": 5.349261677747542e-06,
      "loss": 0.0122,
      "step": 1431620
    },
    {
      "epoch": 2.3429102596833005,
      "grad_norm": 0.39123326539993286,
      "learning_rate": 5.349195785534024e-06,
      "loss": 0.01,
      "step": 1431640
    },
    {
      "epoch": 2.3429429901219536,
      "grad_norm": 0.4600433111190796,
      "learning_rate": 5.349129893320507e-06,
      "loss": 0.0174,
      "step": 1431660
    },
    {
      "epoch": 2.3429757205606068,
      "grad_norm": 0.1961917132139206,
      "learning_rate": 5.349064001106989e-06,
      "loss": 0.0137,
      "step": 1431680
    },
    {
      "epoch": 2.3430084509992604,
      "grad_norm": 0.17793600261211395,
      "learning_rate": 5.348998108893473e-06,
      "loss": 0.0152,
      "step": 1431700
    },
    {
      "epoch": 2.3430411814379135,
      "grad_norm": 0.08104958385229111,
      "learning_rate": 5.348932216679955e-06,
      "loss": 0.0106,
      "step": 1431720
    },
    {
      "epoch": 2.343073911876567,
      "grad_norm": 1.469449520111084,
      "learning_rate": 5.348866324466438e-06,
      "loss": 0.022,
      "step": 1431740
    },
    {
      "epoch": 2.3431066423152203,
      "grad_norm": 0.5998334884643555,
      "learning_rate": 5.348800432252921e-06,
      "loss": 0.0153,
      "step": 1431760
    },
    {
      "epoch": 2.343139372753874,
      "grad_norm": 0.6893978118896484,
      "learning_rate": 5.3487345400394045e-06,
      "loss": 0.0176,
      "step": 1431780
    },
    {
      "epoch": 2.343172103192527,
      "grad_norm": 0.17498496174812317,
      "learning_rate": 5.348668647825886e-06,
      "loss": 0.0132,
      "step": 1431800
    },
    {
      "epoch": 2.34320483363118,
      "grad_norm": 0.4472353160381317,
      "learning_rate": 5.34860275561237e-06,
      "loss": 0.0148,
      "step": 1431820
    },
    {
      "epoch": 2.3432375640698337,
      "grad_norm": 0.16205237805843353,
      "learning_rate": 5.348536863398852e-06,
      "loss": 0.012,
      "step": 1431840
    },
    {
      "epoch": 2.343270294508487,
      "grad_norm": 0.41156691312789917,
      "learning_rate": 5.3484709711853354e-06,
      "loss": 0.0114,
      "step": 1431860
    },
    {
      "epoch": 2.3433030249471405,
      "grad_norm": 0.2703476548194885,
      "learning_rate": 5.348405078971818e-06,
      "loss": 0.0116,
      "step": 1431880
    },
    {
      "epoch": 2.3433357553857936,
      "grad_norm": 0.3969823122024536,
      "learning_rate": 5.348339186758301e-06,
      "loss": 0.0175,
      "step": 1431900
    },
    {
      "epoch": 2.3433684858244472,
      "grad_norm": 0.2334531992673874,
      "learning_rate": 5.3482732945447845e-06,
      "loss": 0.0199,
      "step": 1431920
    },
    {
      "epoch": 2.3434012162631004,
      "grad_norm": 0.2098698765039444,
      "learning_rate": 5.348207402331267e-06,
      "loss": 0.0138,
      "step": 1431940
    },
    {
      "epoch": 2.3434339467017535,
      "grad_norm": 0.5678890347480774,
      "learning_rate": 5.34814151011775e-06,
      "loss": 0.019,
      "step": 1431960
    },
    {
      "epoch": 2.343466677140407,
      "grad_norm": 1.1688251495361328,
      "learning_rate": 5.348075617904233e-06,
      "loss": 0.0108,
      "step": 1431980
    },
    {
      "epoch": 2.3434994075790603,
      "grad_norm": 0.5879474878311157,
      "learning_rate": 5.348009725690716e-06,
      "loss": 0.0188,
      "step": 1432000
    },
    {
      "epoch": 2.343532138017714,
      "grad_norm": 0.16507098078727722,
      "learning_rate": 5.347943833477198e-06,
      "loss": 0.0142,
      "step": 1432020
    },
    {
      "epoch": 2.343564868456367,
      "grad_norm": 0.09797638654708862,
      "learning_rate": 5.347877941263682e-06,
      "loss": 0.0179,
      "step": 1432040
    },
    {
      "epoch": 2.3435975988950206,
      "grad_norm": 0.19416992366313934,
      "learning_rate": 5.347812049050164e-06,
      "loss": 0.0148,
      "step": 1432060
    },
    {
      "epoch": 2.3436303293336738,
      "grad_norm": 0.5953181982040405,
      "learning_rate": 5.347746156836647e-06,
      "loss": 0.0225,
      "step": 1432080
    },
    {
      "epoch": 2.343663059772327,
      "grad_norm": 0.3096252679824829,
      "learning_rate": 5.34768026462313e-06,
      "loss": 0.0173,
      "step": 1432100
    },
    {
      "epoch": 2.3436957902109805,
      "grad_norm": 0.22156701982021332,
      "learning_rate": 5.347614372409613e-06,
      "loss": 0.0185,
      "step": 1432120
    },
    {
      "epoch": 2.3437285206496337,
      "grad_norm": 0.4293931722640991,
      "learning_rate": 5.3475484801960955e-06,
      "loss": 0.0135,
      "step": 1432140
    },
    {
      "epoch": 2.3437612510882873,
      "grad_norm": 0.20781321823596954,
      "learning_rate": 5.347482587982579e-06,
      "loss": 0.0134,
      "step": 1432160
    },
    {
      "epoch": 2.3437939815269404,
      "grad_norm": 0.17097307741641998,
      "learning_rate": 5.347416695769061e-06,
      "loss": 0.0108,
      "step": 1432180
    },
    {
      "epoch": 2.343826711965594,
      "grad_norm": 0.44689232110977173,
      "learning_rate": 5.3473508035555445e-06,
      "loss": 0.0179,
      "step": 1432200
    },
    {
      "epoch": 2.343859442404247,
      "grad_norm": 0.34969833493232727,
      "learning_rate": 5.3472849113420264e-06,
      "loss": 0.017,
      "step": 1432220
    },
    {
      "epoch": 2.3438921728429003,
      "grad_norm": 0.09080364555120468,
      "learning_rate": 5.34721901912851e-06,
      "loss": 0.0206,
      "step": 1432240
    },
    {
      "epoch": 2.343924903281554,
      "grad_norm": 0.4480949938297272,
      "learning_rate": 5.347153126914994e-06,
      "loss": 0.0142,
      "step": 1432260
    },
    {
      "epoch": 2.343957633720207,
      "grad_norm": 1.243070125579834,
      "learning_rate": 5.3470872347014755e-06,
      "loss": 0.0096,
      "step": 1432280
    },
    {
      "epoch": 2.3439903641588606,
      "grad_norm": 0.6889132857322693,
      "learning_rate": 5.347021342487959e-06,
      "loss": 0.0148,
      "step": 1432300
    },
    {
      "epoch": 2.344023094597514,
      "grad_norm": 1.824467658996582,
      "learning_rate": 5.346955450274442e-06,
      "loss": 0.0143,
      "step": 1432320
    },
    {
      "epoch": 2.3440558250361674,
      "grad_norm": 0.08723533898591995,
      "learning_rate": 5.3468895580609246e-06,
      "loss": 0.013,
      "step": 1432340
    },
    {
      "epoch": 2.3440885554748205,
      "grad_norm": 0.24476481974124908,
      "learning_rate": 5.346823665847407e-06,
      "loss": 0.0133,
      "step": 1432360
    },
    {
      "epoch": 2.3441212859134737,
      "grad_norm": 0.2804950475692749,
      "learning_rate": 5.346757773633891e-06,
      "loss": 0.0182,
      "step": 1432380
    },
    {
      "epoch": 2.3441540163521273,
      "grad_norm": 1.3342899084091187,
      "learning_rate": 5.346691881420373e-06,
      "loss": 0.0171,
      "step": 1432400
    },
    {
      "epoch": 2.3441867467907804,
      "grad_norm": 0.26444631814956665,
      "learning_rate": 5.346625989206856e-06,
      "loss": 0.0112,
      "step": 1432420
    },
    {
      "epoch": 2.3442194772294336,
      "grad_norm": 0.14310243725776672,
      "learning_rate": 5.346560096993338e-06,
      "loss": 0.0191,
      "step": 1432440
    },
    {
      "epoch": 2.344252207668087,
      "grad_norm": 0.3870050013065338,
      "learning_rate": 5.346494204779822e-06,
      "loss": 0.0231,
      "step": 1432460
    },
    {
      "epoch": 2.3442849381067403,
      "grad_norm": 0.15261410176753998,
      "learning_rate": 5.346428312566304e-06,
      "loss": 0.0164,
      "step": 1432480
    },
    {
      "epoch": 2.344317668545394,
      "grad_norm": 0.22770462930202484,
      "learning_rate": 5.346362420352787e-06,
      "loss": 0.0185,
      "step": 1432500
    },
    {
      "epoch": 2.344350398984047,
      "grad_norm": 0.11602266877889633,
      "learning_rate": 5.34629652813927e-06,
      "loss": 0.0146,
      "step": 1432520
    },
    {
      "epoch": 2.3443831294227007,
      "grad_norm": 0.6621498465538025,
      "learning_rate": 5.346230635925753e-06,
      "loss": 0.0215,
      "step": 1432540
    },
    {
      "epoch": 2.344415859861354,
      "grad_norm": 0.13096225261688232,
      "learning_rate": 5.3461647437122356e-06,
      "loss": 0.017,
      "step": 1432560
    },
    {
      "epoch": 2.344448590300007,
      "grad_norm": 0.21452249586582184,
      "learning_rate": 5.346098851498719e-06,
      "loss": 0.015,
      "step": 1432580
    },
    {
      "epoch": 2.3444813207386606,
      "grad_norm": 0.1600261777639389,
      "learning_rate": 5.346032959285201e-06,
      "loss": 0.0185,
      "step": 1432600
    },
    {
      "epoch": 2.3445140511773137,
      "grad_norm": 0.6197785139083862,
      "learning_rate": 5.345967067071685e-06,
      "loss": 0.0101,
      "step": 1432620
    },
    {
      "epoch": 2.3445467816159673,
      "grad_norm": 0.2971952259540558,
      "learning_rate": 5.345901174858168e-06,
      "loss": 0.0142,
      "step": 1432640
    },
    {
      "epoch": 2.3445795120546205,
      "grad_norm": 0.47992968559265137,
      "learning_rate": 5.34583528264465e-06,
      "loss": 0.0142,
      "step": 1432660
    },
    {
      "epoch": 2.344612242493274,
      "grad_norm": 0.6242178082466125,
      "learning_rate": 5.345769390431134e-06,
      "loss": 0.0137,
      "step": 1432680
    },
    {
      "epoch": 2.344644972931927,
      "grad_norm": 0.3497408926486969,
      "learning_rate": 5.3457034982176156e-06,
      "loss": 0.0113,
      "step": 1432700
    },
    {
      "epoch": 2.3446777033705803,
      "grad_norm": 0.37486615777015686,
      "learning_rate": 5.345637606004099e-06,
      "loss": 0.0104,
      "step": 1432720
    },
    {
      "epoch": 2.344710433809234,
      "grad_norm": 0.2078276425600052,
      "learning_rate": 5.345571713790582e-06,
      "loss": 0.0108,
      "step": 1432740
    },
    {
      "epoch": 2.344743164247887,
      "grad_norm": 0.2668750584125519,
      "learning_rate": 5.345505821577065e-06,
      "loss": 0.0125,
      "step": 1432760
    },
    {
      "epoch": 2.3447758946865407,
      "grad_norm": 0.07294342666864395,
      "learning_rate": 5.345439929363547e-06,
      "loss": 0.0135,
      "step": 1432780
    },
    {
      "epoch": 2.344808625125194,
      "grad_norm": 0.3829272985458374,
      "learning_rate": 5.345374037150031e-06,
      "loss": 0.0121,
      "step": 1432800
    },
    {
      "epoch": 2.3448413555638474,
      "grad_norm": 0.2560800015926361,
      "learning_rate": 5.345308144936513e-06,
      "loss": 0.0154,
      "step": 1432820
    },
    {
      "epoch": 2.3448740860025006,
      "grad_norm": 0.5511225461959839,
      "learning_rate": 5.3452422527229964e-06,
      "loss": 0.0122,
      "step": 1432840
    },
    {
      "epoch": 2.3449068164411537,
      "grad_norm": 0.30031514167785645,
      "learning_rate": 5.345176360509478e-06,
      "loss": 0.0174,
      "step": 1432860
    },
    {
      "epoch": 2.3449395468798073,
      "grad_norm": 0.19892077147960663,
      "learning_rate": 5.345110468295962e-06,
      "loss": 0.0159,
      "step": 1432880
    },
    {
      "epoch": 2.3449722773184605,
      "grad_norm": 0.1229386106133461,
      "learning_rate": 5.345044576082445e-06,
      "loss": 0.014,
      "step": 1432900
    },
    {
      "epoch": 2.345005007757114,
      "grad_norm": 0.727301299571991,
      "learning_rate": 5.344978683868927e-06,
      "loss": 0.0222,
      "step": 1432920
    },
    {
      "epoch": 2.345037738195767,
      "grad_norm": 0.3360427916049957,
      "learning_rate": 5.34491279165541e-06,
      "loss": 0.0135,
      "step": 1432940
    },
    {
      "epoch": 2.345070468634421,
      "grad_norm": 0.2416583001613617,
      "learning_rate": 5.344846899441894e-06,
      "loss": 0.0152,
      "step": 1432960
    },
    {
      "epoch": 2.345103199073074,
      "grad_norm": 0.1685524433851242,
      "learning_rate": 5.3447810072283765e-06,
      "loss": 0.0129,
      "step": 1432980
    },
    {
      "epoch": 2.345135929511727,
      "grad_norm": 0.6866320967674255,
      "learning_rate": 5.344715115014859e-06,
      "loss": 0.0206,
      "step": 1433000
    },
    {
      "epoch": 2.3451686599503807,
      "grad_norm": 0.23629167675971985,
      "learning_rate": 5.344649222801343e-06,
      "loss": 0.0121,
      "step": 1433020
    },
    {
      "epoch": 2.345201390389034,
      "grad_norm": 0.16922293603420258,
      "learning_rate": 5.344583330587825e-06,
      "loss": 0.0149,
      "step": 1433040
    },
    {
      "epoch": 2.3452341208276875,
      "grad_norm": 0.9973046183586121,
      "learning_rate": 5.344517438374308e-06,
      "loss": 0.0174,
      "step": 1433060
    },
    {
      "epoch": 2.3452668512663406,
      "grad_norm": 0.2814176678657532,
      "learning_rate": 5.34445154616079e-06,
      "loss": 0.0099,
      "step": 1433080
    },
    {
      "epoch": 2.345299581704994,
      "grad_norm": 0.19500906765460968,
      "learning_rate": 5.344385653947274e-06,
      "loss": 0.0158,
      "step": 1433100
    },
    {
      "epoch": 2.3453323121436473,
      "grad_norm": 0.37237969040870667,
      "learning_rate": 5.3443197617337565e-06,
      "loss": 0.01,
      "step": 1433120
    },
    {
      "epoch": 2.3453650425823005,
      "grad_norm": 0.10698126256465912,
      "learning_rate": 5.344253869520239e-06,
      "loss": 0.0076,
      "step": 1433140
    },
    {
      "epoch": 2.345397773020954,
      "grad_norm": 0.11891958117485046,
      "learning_rate": 5.344187977306722e-06,
      "loss": 0.0114,
      "step": 1433160
    },
    {
      "epoch": 2.3454305034596072,
      "grad_norm": 0.7258253693580627,
      "learning_rate": 5.3441220850932056e-06,
      "loss": 0.0139,
      "step": 1433180
    },
    {
      "epoch": 2.345463233898261,
      "grad_norm": 0.119530089199543,
      "learning_rate": 5.3440561928796875e-06,
      "loss": 0.0194,
      "step": 1433200
    },
    {
      "epoch": 2.345495964336914,
      "grad_norm": 0.5034728646278381,
      "learning_rate": 5.343990300666171e-06,
      "loss": 0.0164,
      "step": 1433220
    },
    {
      "epoch": 2.3455286947755676,
      "grad_norm": 0.45633798837661743,
      "learning_rate": 5.343924408452653e-06,
      "loss": 0.0166,
      "step": 1433240
    },
    {
      "epoch": 2.3455614252142207,
      "grad_norm": 0.5616788864135742,
      "learning_rate": 5.3438585162391365e-06,
      "loss": 0.0135,
      "step": 1433260
    },
    {
      "epoch": 2.345594155652874,
      "grad_norm": 0.293436735868454,
      "learning_rate": 5.343792624025618e-06,
      "loss": 0.014,
      "step": 1433280
    },
    {
      "epoch": 2.3456268860915275,
      "grad_norm": 0.37850818037986755,
      "learning_rate": 5.343726731812102e-06,
      "loss": 0.0104,
      "step": 1433300
    },
    {
      "epoch": 2.3456596165301806,
      "grad_norm": 0.33748137950897217,
      "learning_rate": 5.343660839598586e-06,
      "loss": 0.0213,
      "step": 1433320
    },
    {
      "epoch": 2.345692346968834,
      "grad_norm": 0.25348901748657227,
      "learning_rate": 5.3435949473850675e-06,
      "loss": 0.0137,
      "step": 1433340
    },
    {
      "epoch": 2.3457250774074874,
      "grad_norm": 0.5652668476104736,
      "learning_rate": 5.343529055171551e-06,
      "loss": 0.0178,
      "step": 1433360
    },
    {
      "epoch": 2.345757807846141,
      "grad_norm": 0.5658248662948608,
      "learning_rate": 5.343463162958034e-06,
      "loss": 0.0116,
      "step": 1433380
    },
    {
      "epoch": 2.345790538284794,
      "grad_norm": 0.2789463698863983,
      "learning_rate": 5.343397270744517e-06,
      "loss": 0.0147,
      "step": 1433400
    },
    {
      "epoch": 2.3458232687234473,
      "grad_norm": 0.4662564992904663,
      "learning_rate": 5.343331378530999e-06,
      "loss": 0.0122,
      "step": 1433420
    },
    {
      "epoch": 2.345855999162101,
      "grad_norm": 1.431925892829895,
      "learning_rate": 5.343265486317483e-06,
      "loss": 0.0189,
      "step": 1433440
    },
    {
      "epoch": 2.345888729600754,
      "grad_norm": 0.29729682207107544,
      "learning_rate": 5.343199594103965e-06,
      "loss": 0.0142,
      "step": 1433460
    },
    {
      "epoch": 2.3459214600394076,
      "grad_norm": 1.004870891571045,
      "learning_rate": 5.343133701890448e-06,
      "loss": 0.0178,
      "step": 1433480
    },
    {
      "epoch": 2.3459541904780608,
      "grad_norm": 0.16036628186702728,
      "learning_rate": 5.34306780967693e-06,
      "loss": 0.019,
      "step": 1433500
    },
    {
      "epoch": 2.3459869209167143,
      "grad_norm": 0.13021916151046753,
      "learning_rate": 5.343001917463414e-06,
      "loss": 0.0156,
      "step": 1433520
    },
    {
      "epoch": 2.3460196513553675,
      "grad_norm": 0.45316174626350403,
      "learning_rate": 5.3429360252498966e-06,
      "loss": 0.0111,
      "step": 1433540
    },
    {
      "epoch": 2.3460523817940206,
      "grad_norm": 0.4150312840938568,
      "learning_rate": 5.342870133036379e-06,
      "loss": 0.0242,
      "step": 1433560
    },
    {
      "epoch": 2.3460851122326742,
      "grad_norm": 0.17246297001838684,
      "learning_rate": 5.342804240822862e-06,
      "loss": 0.0132,
      "step": 1433580
    },
    {
      "epoch": 2.3461178426713274,
      "grad_norm": 0.1384773701429367,
      "learning_rate": 5.342738348609346e-06,
      "loss": 0.0103,
      "step": 1433600
    },
    {
      "epoch": 2.346150573109981,
      "grad_norm": 0.344454824924469,
      "learning_rate": 5.3426724563958275e-06,
      "loss": 0.0155,
      "step": 1433620
    },
    {
      "epoch": 2.346183303548634,
      "grad_norm": 0.9502029418945312,
      "learning_rate": 5.342606564182311e-06,
      "loss": 0.0108,
      "step": 1433640
    },
    {
      "epoch": 2.3462160339872877,
      "grad_norm": 0.8211118578910828,
      "learning_rate": 5.342540671968793e-06,
      "loss": 0.015,
      "step": 1433660
    },
    {
      "epoch": 2.346248764425941,
      "grad_norm": 0.32334011793136597,
      "learning_rate": 5.342474779755277e-06,
      "loss": 0.0163,
      "step": 1433680
    },
    {
      "epoch": 2.346281494864594,
      "grad_norm": 0.5305384993553162,
      "learning_rate": 5.34240888754176e-06,
      "loss": 0.012,
      "step": 1433700
    },
    {
      "epoch": 2.3463142253032476,
      "grad_norm": 0.6297110319137573,
      "learning_rate": 5.342342995328242e-06,
      "loss": 0.0148,
      "step": 1433720
    },
    {
      "epoch": 2.3463469557419008,
      "grad_norm": 0.3446001708507538,
      "learning_rate": 5.342277103114726e-06,
      "loss": 0.0119,
      "step": 1433740
    },
    {
      "epoch": 2.3463796861805544,
      "grad_norm": 0.22976906597614288,
      "learning_rate": 5.342211210901208e-06,
      "loss": 0.0198,
      "step": 1433760
    },
    {
      "epoch": 2.3464124166192075,
      "grad_norm": 0.06274797767400742,
      "learning_rate": 5.342145318687691e-06,
      "loss": 0.0138,
      "step": 1433780
    },
    {
      "epoch": 2.346445147057861,
      "grad_norm": 0.40000128746032715,
      "learning_rate": 5.342079426474174e-06,
      "loss": 0.0149,
      "step": 1433800
    },
    {
      "epoch": 2.3464778774965143,
      "grad_norm": 0.49304327368736267,
      "learning_rate": 5.3420135342606575e-06,
      "loss": 0.0206,
      "step": 1433820
    },
    {
      "epoch": 2.3465106079351674,
      "grad_norm": 0.45573362708091736,
      "learning_rate": 5.341947642047139e-06,
      "loss": 0.0181,
      "step": 1433840
    },
    {
      "epoch": 2.346543338373821,
      "grad_norm": 1.0121740102767944,
      "learning_rate": 5.341881749833623e-06,
      "loss": 0.0185,
      "step": 1433860
    },
    {
      "epoch": 2.346576068812474,
      "grad_norm": 0.12705495953559875,
      "learning_rate": 5.341815857620105e-06,
      "loss": 0.0161,
      "step": 1433880
    },
    {
      "epoch": 2.3466087992511278,
      "grad_norm": 0.3032498359680176,
      "learning_rate": 5.3417499654065884e-06,
      "loss": 0.01,
      "step": 1433900
    },
    {
      "epoch": 2.346641529689781,
      "grad_norm": 0.44507673382759094,
      "learning_rate": 5.341684073193071e-06,
      "loss": 0.0202,
      "step": 1433920
    },
    {
      "epoch": 2.346674260128434,
      "grad_norm": 0.3235788345336914,
      "learning_rate": 5.341618180979554e-06,
      "loss": 0.0161,
      "step": 1433940
    },
    {
      "epoch": 2.3467069905670876,
      "grad_norm": 0.24924935400485992,
      "learning_rate": 5.341552288766037e-06,
      "loss": 0.0163,
      "step": 1433960
    },
    {
      "epoch": 2.346739721005741,
      "grad_norm": 0.23233562707901,
      "learning_rate": 5.34148639655252e-06,
      "loss": 0.0133,
      "step": 1433980
    },
    {
      "epoch": 2.3467724514443944,
      "grad_norm": 0.2518186569213867,
      "learning_rate": 5.341420504339002e-06,
      "loss": 0.0078,
      "step": 1434000
    },
    {
      "epoch": 2.3468051818830475,
      "grad_norm": 0.31129494309425354,
      "learning_rate": 5.341354612125486e-06,
      "loss": 0.0119,
      "step": 1434020
    },
    {
      "epoch": 2.3468379123217007,
      "grad_norm": 0.1260431557893753,
      "learning_rate": 5.341288719911969e-06,
      "loss": 0.0175,
      "step": 1434040
    },
    {
      "epoch": 2.3468706427603543,
      "grad_norm": 0.2991316616535187,
      "learning_rate": 5.341222827698451e-06,
      "loss": 0.0164,
      "step": 1434060
    },
    {
      "epoch": 2.3469033731990074,
      "grad_norm": 0.6336600184440613,
      "learning_rate": 5.341156935484935e-06,
      "loss": 0.0093,
      "step": 1434080
    },
    {
      "epoch": 2.346936103637661,
      "grad_norm": 0.568461537361145,
      "learning_rate": 5.341091043271417e-06,
      "loss": 0.0151,
      "step": 1434100
    },
    {
      "epoch": 2.346968834076314,
      "grad_norm": 0.3377988636493683,
      "learning_rate": 5.3410251510579e-06,
      "loss": 0.0094,
      "step": 1434120
    },
    {
      "epoch": 2.3470015645149678,
      "grad_norm": 0.1811414211988449,
      "learning_rate": 5.340959258844383e-06,
      "loss": 0.019,
      "step": 1434140
    },
    {
      "epoch": 2.347034294953621,
      "grad_norm": 0.3975900411605835,
      "learning_rate": 5.340893366630866e-06,
      "loss": 0.0171,
      "step": 1434160
    },
    {
      "epoch": 2.347067025392274,
      "grad_norm": 0.6046945452690125,
      "learning_rate": 5.3408274744173485e-06,
      "loss": 0.018,
      "step": 1434180
    },
    {
      "epoch": 2.3470997558309277,
      "grad_norm": 0.12619318068027496,
      "learning_rate": 5.340761582203832e-06,
      "loss": 0.0128,
      "step": 1434200
    },
    {
      "epoch": 2.347132486269581,
      "grad_norm": 0.24444758892059326,
      "learning_rate": 5.340695689990314e-06,
      "loss": 0.0144,
      "step": 1434220
    },
    {
      "epoch": 2.3471652167082344,
      "grad_norm": 0.8805961012840271,
      "learning_rate": 5.3406297977767975e-06,
      "loss": 0.0173,
      "step": 1434240
    },
    {
      "epoch": 2.3471979471468876,
      "grad_norm": 0.6745280027389526,
      "learning_rate": 5.3405639055632794e-06,
      "loss": 0.0198,
      "step": 1434260
    },
    {
      "epoch": 2.347230677585541,
      "grad_norm": 0.9375618100166321,
      "learning_rate": 5.340498013349763e-06,
      "loss": 0.0118,
      "step": 1434280
    },
    {
      "epoch": 2.3472634080241943,
      "grad_norm": 0.8429858088493347,
      "learning_rate": 5.340432121136245e-06,
      "loss": 0.0186,
      "step": 1434300
    },
    {
      "epoch": 2.3472961384628475,
      "grad_norm": 0.1600564420223236,
      "learning_rate": 5.3403662289227285e-06,
      "loss": 0.0168,
      "step": 1434320
    },
    {
      "epoch": 2.347328868901501,
      "grad_norm": 0.3845146894454956,
      "learning_rate": 5.340300336709211e-06,
      "loss": 0.0135,
      "step": 1434340
    },
    {
      "epoch": 2.347361599340154,
      "grad_norm": 0.4810263216495514,
      "learning_rate": 5.340234444495694e-06,
      "loss": 0.012,
      "step": 1434360
    },
    {
      "epoch": 2.347394329778808,
      "grad_norm": 0.11419376730918884,
      "learning_rate": 5.3401685522821776e-06,
      "loss": 0.0165,
      "step": 1434380
    },
    {
      "epoch": 2.347427060217461,
      "grad_norm": 0.410541832447052,
      "learning_rate": 5.34010266006866e-06,
      "loss": 0.0144,
      "step": 1434400
    },
    {
      "epoch": 2.3474597906561145,
      "grad_norm": 0.14326965808868408,
      "learning_rate": 5.340036767855143e-06,
      "loss": 0.0161,
      "step": 1434420
    },
    {
      "epoch": 2.3474925210947677,
      "grad_norm": 0.4241693913936615,
      "learning_rate": 5.339970875641626e-06,
      "loss": 0.0209,
      "step": 1434440
    },
    {
      "epoch": 2.347525251533421,
      "grad_norm": 0.1857372373342514,
      "learning_rate": 5.339904983428109e-06,
      "loss": 0.0155,
      "step": 1434460
    },
    {
      "epoch": 2.3475579819720744,
      "grad_norm": 0.43827691674232483,
      "learning_rate": 5.339839091214591e-06,
      "loss": 0.0169,
      "step": 1434480
    },
    {
      "epoch": 2.3475907124107276,
      "grad_norm": 0.45622408390045166,
      "learning_rate": 5.339773199001075e-06,
      "loss": 0.0208,
      "step": 1434500
    },
    {
      "epoch": 2.347623442849381,
      "grad_norm": 0.39314770698547363,
      "learning_rate": 5.339707306787557e-06,
      "loss": 0.0172,
      "step": 1434520
    },
    {
      "epoch": 2.3476561732880343,
      "grad_norm": 1.4404853582382202,
      "learning_rate": 5.33964141457404e-06,
      "loss": 0.0171,
      "step": 1434540
    },
    {
      "epoch": 2.347688903726688,
      "grad_norm": 0.3408849835395813,
      "learning_rate": 5.339575522360523e-06,
      "loss": 0.0102,
      "step": 1434560
    },
    {
      "epoch": 2.347721634165341,
      "grad_norm": 0.5390405058860779,
      "learning_rate": 5.339509630147006e-06,
      "loss": 0.0208,
      "step": 1434580
    },
    {
      "epoch": 2.3477543646039942,
      "grad_norm": 0.41859790682792664,
      "learning_rate": 5.3394437379334886e-06,
      "loss": 0.0131,
      "step": 1434600
    },
    {
      "epoch": 2.347787095042648,
      "grad_norm": 0.39852768182754517,
      "learning_rate": 5.339377845719972e-06,
      "loss": 0.0167,
      "step": 1434620
    },
    {
      "epoch": 2.347819825481301,
      "grad_norm": 0.37400248646736145,
      "learning_rate": 5.339311953506454e-06,
      "loss": 0.0133,
      "step": 1434640
    },
    {
      "epoch": 2.3478525559199546,
      "grad_norm": 0.5286766290664673,
      "learning_rate": 5.339246061292938e-06,
      "loss": 0.0129,
      "step": 1434660
    },
    {
      "epoch": 2.3478852863586077,
      "grad_norm": 0.24317878484725952,
      "learning_rate": 5.3391801690794195e-06,
      "loss": 0.0172,
      "step": 1434680
    },
    {
      "epoch": 2.3479180167972613,
      "grad_norm": 2.274754762649536,
      "learning_rate": 5.339114276865903e-06,
      "loss": 0.0174,
      "step": 1434700
    },
    {
      "epoch": 2.3479507472359145,
      "grad_norm": 0.31780144572257996,
      "learning_rate": 5.339048384652387e-06,
      "loss": 0.0208,
      "step": 1434720
    },
    {
      "epoch": 2.3479834776745676,
      "grad_norm": 0.3161265254020691,
      "learning_rate": 5.338982492438869e-06,
      "loss": 0.0144,
      "step": 1434740
    },
    {
      "epoch": 2.348016208113221,
      "grad_norm": 0.5484437942504883,
      "learning_rate": 5.338916600225352e-06,
      "loss": 0.0187,
      "step": 1434760
    },
    {
      "epoch": 2.3480489385518744,
      "grad_norm": 0.2819942235946655,
      "learning_rate": 5.338850708011835e-06,
      "loss": 0.0147,
      "step": 1434780
    },
    {
      "epoch": 2.348081668990528,
      "grad_norm": 0.35067471861839294,
      "learning_rate": 5.338784815798318e-06,
      "loss": 0.0233,
      "step": 1434800
    },
    {
      "epoch": 2.348114399429181,
      "grad_norm": 0.1651604324579239,
      "learning_rate": 5.3387189235848e-06,
      "loss": 0.0166,
      "step": 1434820
    },
    {
      "epoch": 2.3481471298678347,
      "grad_norm": 0.8236205577850342,
      "learning_rate": 5.338653031371284e-06,
      "loss": 0.0239,
      "step": 1434840
    },
    {
      "epoch": 2.348179860306488,
      "grad_norm": 0.33863893151283264,
      "learning_rate": 5.338587139157766e-06,
      "loss": 0.0115,
      "step": 1434860
    },
    {
      "epoch": 2.348212590745141,
      "grad_norm": 0.1650945544242859,
      "learning_rate": 5.3385212469442495e-06,
      "loss": 0.0125,
      "step": 1434880
    },
    {
      "epoch": 2.3482453211837946,
      "grad_norm": 0.2954610288143158,
      "learning_rate": 5.338455354730731e-06,
      "loss": 0.02,
      "step": 1434900
    },
    {
      "epoch": 2.3482780516224477,
      "grad_norm": 0.6966384649276733,
      "learning_rate": 5.338389462517215e-06,
      "loss": 0.0161,
      "step": 1434920
    },
    {
      "epoch": 2.3483107820611013,
      "grad_norm": 0.7434054613113403,
      "learning_rate": 5.338323570303698e-06,
      "loss": 0.0143,
      "step": 1434940
    },
    {
      "epoch": 2.3483435124997545,
      "grad_norm": 0.4248983860015869,
      "learning_rate": 5.33825767809018e-06,
      "loss": 0.0134,
      "step": 1434960
    },
    {
      "epoch": 2.348376242938408,
      "grad_norm": 0.07050022482872009,
      "learning_rate": 5.338191785876663e-06,
      "loss": 0.0142,
      "step": 1434980
    },
    {
      "epoch": 2.3484089733770612,
      "grad_norm": 0.23279647529125214,
      "learning_rate": 5.338125893663147e-06,
      "loss": 0.0148,
      "step": 1435000
    },
    {
      "epoch": 2.3484417038157144,
      "grad_norm": 0.6181128621101379,
      "learning_rate": 5.338060001449629e-06,
      "loss": 0.0146,
      "step": 1435020
    },
    {
      "epoch": 2.348474434254368,
      "grad_norm": 0.42761731147766113,
      "learning_rate": 5.337994109236112e-06,
      "loss": 0.0125,
      "step": 1435040
    },
    {
      "epoch": 2.348507164693021,
      "grad_norm": 0.7019824981689453,
      "learning_rate": 5.337928217022594e-06,
      "loss": 0.0158,
      "step": 1435060
    },
    {
      "epoch": 2.3485398951316747,
      "grad_norm": 0.17023810744285583,
      "learning_rate": 5.337862324809078e-06,
      "loss": 0.0123,
      "step": 1435080
    },
    {
      "epoch": 2.348572625570328,
      "grad_norm": 0.5010439157485962,
      "learning_rate": 5.337796432595561e-06,
      "loss": 0.0156,
      "step": 1435100
    },
    {
      "epoch": 2.3486053560089815,
      "grad_norm": 0.2589004635810852,
      "learning_rate": 5.337730540382043e-06,
      "loss": 0.0164,
      "step": 1435120
    },
    {
      "epoch": 2.3486380864476346,
      "grad_norm": 0.3472243547439575,
      "learning_rate": 5.337664648168527e-06,
      "loss": 0.0153,
      "step": 1435140
    },
    {
      "epoch": 2.3486708168862878,
      "grad_norm": 0.3846966028213501,
      "learning_rate": 5.3375987559550095e-06,
      "loss": 0.013,
      "step": 1435160
    },
    {
      "epoch": 2.3487035473249414,
      "grad_norm": 0.11159558594226837,
      "learning_rate": 5.337532863741492e-06,
      "loss": 0.0158,
      "step": 1435180
    },
    {
      "epoch": 2.3487362777635945,
      "grad_norm": 0.1730365753173828,
      "learning_rate": 5.337466971527975e-06,
      "loss": 0.0098,
      "step": 1435200
    },
    {
      "epoch": 2.348769008202248,
      "grad_norm": 0.06314568966627121,
      "learning_rate": 5.3374010793144586e-06,
      "loss": 0.0167,
      "step": 1435220
    },
    {
      "epoch": 2.3488017386409012,
      "grad_norm": 0.18760210275650024,
      "learning_rate": 5.3373351871009405e-06,
      "loss": 0.0113,
      "step": 1435240
    },
    {
      "epoch": 2.348834469079555,
      "grad_norm": 0.3192298114299774,
      "learning_rate": 5.337269294887424e-06,
      "loss": 0.0106,
      "step": 1435260
    },
    {
      "epoch": 2.348867199518208,
      "grad_norm": 0.19791153073310852,
      "learning_rate": 5.337203402673906e-06,
      "loss": 0.0084,
      "step": 1435280
    },
    {
      "epoch": 2.348899929956861,
      "grad_norm": 0.07170279324054718,
      "learning_rate": 5.3371375104603895e-06,
      "loss": 0.0133,
      "step": 1435300
    },
    {
      "epoch": 2.3489326603955147,
      "grad_norm": 0.78095543384552,
      "learning_rate": 5.3370716182468714e-06,
      "loss": 0.0171,
      "step": 1435320
    },
    {
      "epoch": 2.348965390834168,
      "grad_norm": 0.595380425453186,
      "learning_rate": 5.337005726033355e-06,
      "loss": 0.0206,
      "step": 1435340
    },
    {
      "epoch": 2.3489981212728215,
      "grad_norm": 0.8318946957588196,
      "learning_rate": 5.336939833819838e-06,
      "loss": 0.013,
      "step": 1435360
    },
    {
      "epoch": 2.3490308517114746,
      "grad_norm": 0.31691861152648926,
      "learning_rate": 5.3368739416063205e-06,
      "loss": 0.019,
      "step": 1435380
    },
    {
      "epoch": 2.349063582150128,
      "grad_norm": 0.21028278768062592,
      "learning_rate": 5.336808049392803e-06,
      "loss": 0.0162,
      "step": 1435400
    },
    {
      "epoch": 2.3490963125887814,
      "grad_norm": 0.20167513191699982,
      "learning_rate": 5.336742157179287e-06,
      "loss": 0.0169,
      "step": 1435420
    },
    {
      "epoch": 2.3491290430274345,
      "grad_norm": 0.28396254777908325,
      "learning_rate": 5.3366762649657696e-06,
      "loss": 0.0139,
      "step": 1435440
    },
    {
      "epoch": 2.349161773466088,
      "grad_norm": 0.11625403165817261,
      "learning_rate": 5.336610372752252e-06,
      "loss": 0.0152,
      "step": 1435460
    },
    {
      "epoch": 2.3491945039047413,
      "grad_norm": 0.5441506505012512,
      "learning_rate": 5.336544480538736e-06,
      "loss": 0.014,
      "step": 1435480
    },
    {
      "epoch": 2.3492272343433944,
      "grad_norm": 0.30859965085983276,
      "learning_rate": 5.336478588325218e-06,
      "loss": 0.0112,
      "step": 1435500
    },
    {
      "epoch": 2.349259964782048,
      "grad_norm": 0.4099533259868622,
      "learning_rate": 5.336412696111701e-06,
      "loss": 0.0192,
      "step": 1435520
    },
    {
      "epoch": 2.349292695220701,
      "grad_norm": 0.9798418879508972,
      "learning_rate": 5.336346803898183e-06,
      "loss": 0.0169,
      "step": 1435540
    },
    {
      "epoch": 2.3493254256593548,
      "grad_norm": 0.234372079372406,
      "learning_rate": 5.336280911684667e-06,
      "loss": 0.0169,
      "step": 1435560
    },
    {
      "epoch": 2.349358156098008,
      "grad_norm": 0.30735576152801514,
      "learning_rate": 5.33621501947115e-06,
      "loss": 0.0109,
      "step": 1435580
    },
    {
      "epoch": 2.3493908865366615,
      "grad_norm": 0.2142583429813385,
      "learning_rate": 5.336149127257632e-06,
      "loss": 0.0114,
      "step": 1435600
    },
    {
      "epoch": 2.3494236169753147,
      "grad_norm": 0.5947900414466858,
      "learning_rate": 5.336083235044115e-06,
      "loss": 0.0226,
      "step": 1435620
    },
    {
      "epoch": 2.349456347413968,
      "grad_norm": 0.31924474239349365,
      "learning_rate": 5.336017342830599e-06,
      "loss": 0.0111,
      "step": 1435640
    },
    {
      "epoch": 2.3494890778526214,
      "grad_norm": 0.18750154972076416,
      "learning_rate": 5.3359514506170805e-06,
      "loss": 0.0135,
      "step": 1435660
    },
    {
      "epoch": 2.3495218082912745,
      "grad_norm": 0.2532130181789398,
      "learning_rate": 5.335885558403564e-06,
      "loss": 0.0137,
      "step": 1435680
    },
    {
      "epoch": 2.349554538729928,
      "grad_norm": 0.28513631224632263,
      "learning_rate": 5.335819666190046e-06,
      "loss": 0.0111,
      "step": 1435700
    },
    {
      "epoch": 2.3495872691685813,
      "grad_norm": 0.7423936128616333,
      "learning_rate": 5.33575377397653e-06,
      "loss": 0.0133,
      "step": 1435720
    },
    {
      "epoch": 2.349619999607235,
      "grad_norm": 0.3048909902572632,
      "learning_rate": 5.335687881763012e-06,
      "loss": 0.0176,
      "step": 1435740
    },
    {
      "epoch": 2.349652730045888,
      "grad_norm": 0.6423072218894958,
      "learning_rate": 5.335621989549495e-06,
      "loss": 0.0123,
      "step": 1435760
    },
    {
      "epoch": 2.349685460484541,
      "grad_norm": 0.10552363842725754,
      "learning_rate": 5.335556097335979e-06,
      "loss": 0.0099,
      "step": 1435780
    },
    {
      "epoch": 2.349718190923195,
      "grad_norm": 0.8257371783256531,
      "learning_rate": 5.335490205122461e-06,
      "loss": 0.012,
      "step": 1435800
    },
    {
      "epoch": 2.349750921361848,
      "grad_norm": 0.5068717002868652,
      "learning_rate": 5.335424312908944e-06,
      "loss": 0.0216,
      "step": 1435820
    },
    {
      "epoch": 2.3497836518005015,
      "grad_norm": 0.10646669566631317,
      "learning_rate": 5.335358420695427e-06,
      "loss": 0.0152,
      "step": 1435840
    },
    {
      "epoch": 2.3498163822391547,
      "grad_norm": 0.2889319062232971,
      "learning_rate": 5.3352925284819105e-06,
      "loss": 0.0132,
      "step": 1435860
    },
    {
      "epoch": 2.3498491126778083,
      "grad_norm": 0.41038310527801514,
      "learning_rate": 5.335226636268392e-06,
      "loss": 0.0204,
      "step": 1435880
    },
    {
      "epoch": 2.3498818431164614,
      "grad_norm": 0.6620883345603943,
      "learning_rate": 5.335160744054876e-06,
      "loss": 0.0169,
      "step": 1435900
    },
    {
      "epoch": 2.3499145735551146,
      "grad_norm": 0.5858182311058044,
      "learning_rate": 5.335094851841358e-06,
      "loss": 0.0176,
      "step": 1435920
    },
    {
      "epoch": 2.349947303993768,
      "grad_norm": 0.7098037004470825,
      "learning_rate": 5.3350289596278414e-06,
      "loss": 0.0133,
      "step": 1435940
    },
    {
      "epoch": 2.3499800344324213,
      "grad_norm": 0.6978411674499512,
      "learning_rate": 5.334963067414324e-06,
      "loss": 0.0176,
      "step": 1435960
    },
    {
      "epoch": 2.350012764871075,
      "grad_norm": 0.6402449607849121,
      "learning_rate": 5.334897175200807e-06,
      "loss": 0.0128,
      "step": 1435980
    },
    {
      "epoch": 2.350045495309728,
      "grad_norm": 0.22569920122623444,
      "learning_rate": 5.33483128298729e-06,
      "loss": 0.0139,
      "step": 1436000
    },
    {
      "epoch": 2.3500782257483817,
      "grad_norm": 0.18264102935791016,
      "learning_rate": 5.334765390773773e-06,
      "loss": 0.0154,
      "step": 1436020
    },
    {
      "epoch": 2.350110956187035,
      "grad_norm": 0.463113009929657,
      "learning_rate": 5.334699498560255e-06,
      "loss": 0.0145,
      "step": 1436040
    },
    {
      "epoch": 2.350143686625688,
      "grad_norm": 1.0480350255966187,
      "learning_rate": 5.334633606346739e-06,
      "loss": 0.0085,
      "step": 1436060
    },
    {
      "epoch": 2.3501764170643415,
      "grad_norm": 0.613475501537323,
      "learning_rate": 5.334567714133221e-06,
      "loss": 0.0099,
      "step": 1436080
    },
    {
      "epoch": 2.3502091475029947,
      "grad_norm": 0.6930766105651855,
      "learning_rate": 5.334501821919704e-06,
      "loss": 0.0157,
      "step": 1436100
    },
    {
      "epoch": 2.3502418779416483,
      "grad_norm": 0.3017236292362213,
      "learning_rate": 5.334435929706186e-06,
      "loss": 0.0171,
      "step": 1436120
    },
    {
      "epoch": 2.3502746083803014,
      "grad_norm": 0.5060890316963196,
      "learning_rate": 5.33437003749267e-06,
      "loss": 0.0118,
      "step": 1436140
    },
    {
      "epoch": 2.350307338818955,
      "grad_norm": 0.29561272263526917,
      "learning_rate": 5.334304145279153e-06,
      "loss": 0.0168,
      "step": 1436160
    },
    {
      "epoch": 2.350340069257608,
      "grad_norm": 0.03386083245277405,
      "learning_rate": 5.334238253065636e-06,
      "loss": 0.0177,
      "step": 1436180
    },
    {
      "epoch": 2.3503727996962613,
      "grad_norm": 0.499775767326355,
      "learning_rate": 5.334172360852119e-06,
      "loss": 0.0108,
      "step": 1436200
    },
    {
      "epoch": 2.350405530134915,
      "grad_norm": 0.3657158613204956,
      "learning_rate": 5.3341064686386015e-06,
      "loss": 0.0194,
      "step": 1436220
    },
    {
      "epoch": 2.350438260573568,
      "grad_norm": 0.40099045634269714,
      "learning_rate": 5.334040576425085e-06,
      "loss": 0.0171,
      "step": 1436240
    },
    {
      "epoch": 2.3504709910122217,
      "grad_norm": 0.15167956054210663,
      "learning_rate": 5.333974684211567e-06,
      "loss": 0.0139,
      "step": 1436260
    },
    {
      "epoch": 2.350503721450875,
      "grad_norm": 0.5909829139709473,
      "learning_rate": 5.3339087919980506e-06,
      "loss": 0.0129,
      "step": 1436280
    },
    {
      "epoch": 2.3505364518895284,
      "grad_norm": 0.10277988016605377,
      "learning_rate": 5.3338428997845324e-06,
      "loss": 0.0204,
      "step": 1436300
    },
    {
      "epoch": 2.3505691823281816,
      "grad_norm": 0.22759312391281128,
      "learning_rate": 5.333777007571016e-06,
      "loss": 0.0106,
      "step": 1436320
    },
    {
      "epoch": 2.3506019127668347,
      "grad_norm": 0.5499364733695984,
      "learning_rate": 5.333711115357498e-06,
      "loss": 0.0182,
      "step": 1436340
    },
    {
      "epoch": 2.3506346432054883,
      "grad_norm": 0.09386391192674637,
      "learning_rate": 5.3336452231439815e-06,
      "loss": 0.0105,
      "step": 1436360
    },
    {
      "epoch": 2.3506673736441415,
      "grad_norm": 0.4169217348098755,
      "learning_rate": 5.333579330930464e-06,
      "loss": 0.0114,
      "step": 1436380
    },
    {
      "epoch": 2.350700104082795,
      "grad_norm": 0.19029511511325836,
      "learning_rate": 5.333513438716947e-06,
      "loss": 0.0172,
      "step": 1436400
    },
    {
      "epoch": 2.350732834521448,
      "grad_norm": 0.4052759110927582,
      "learning_rate": 5.33344754650343e-06,
      "loss": 0.0134,
      "step": 1436420
    },
    {
      "epoch": 2.350765564960102,
      "grad_norm": 0.1859852373600006,
      "learning_rate": 5.333381654289913e-06,
      "loss": 0.0189,
      "step": 1436440
    },
    {
      "epoch": 2.350798295398755,
      "grad_norm": 1.9163182973861694,
      "learning_rate": 5.333315762076395e-06,
      "loss": 0.0104,
      "step": 1436460
    },
    {
      "epoch": 2.350831025837408,
      "grad_norm": 0.196872740983963,
      "learning_rate": 5.333249869862879e-06,
      "loss": 0.0226,
      "step": 1436480
    },
    {
      "epoch": 2.3508637562760617,
      "grad_norm": 0.35122352838516235,
      "learning_rate": 5.333183977649362e-06,
      "loss": 0.0138,
      "step": 1436500
    },
    {
      "epoch": 2.350896486714715,
      "grad_norm": 0.5009737610816956,
      "learning_rate": 5.333118085435844e-06,
      "loss": 0.0168,
      "step": 1436520
    },
    {
      "epoch": 2.3509292171533684,
      "grad_norm": 0.949177622795105,
      "learning_rate": 5.333052193222328e-06,
      "loss": 0.0128,
      "step": 1436540
    },
    {
      "epoch": 2.3509619475920216,
      "grad_norm": 0.2241813689470291,
      "learning_rate": 5.33298630100881e-06,
      "loss": 0.0188,
      "step": 1436560
    },
    {
      "epoch": 2.350994678030675,
      "grad_norm": 0.11198582500219345,
      "learning_rate": 5.332920408795293e-06,
      "loss": 0.0188,
      "step": 1436580
    },
    {
      "epoch": 2.3510274084693283,
      "grad_norm": 0.3009950816631317,
      "learning_rate": 5.332854516581776e-06,
      "loss": 0.0136,
      "step": 1436600
    },
    {
      "epoch": 2.3510601389079815,
      "grad_norm": 0.16025495529174805,
      "learning_rate": 5.332788624368259e-06,
      "loss": 0.0172,
      "step": 1436620
    },
    {
      "epoch": 2.351092869346635,
      "grad_norm": 0.5371440052986145,
      "learning_rate": 5.3327227321547416e-06,
      "loss": 0.0127,
      "step": 1436640
    },
    {
      "epoch": 2.3511255997852882,
      "grad_norm": 0.39389869570732117,
      "learning_rate": 5.332656839941225e-06,
      "loss": 0.0136,
      "step": 1436660
    },
    {
      "epoch": 2.351158330223942,
      "grad_norm": 0.28334590792655945,
      "learning_rate": 5.332590947727707e-06,
      "loss": 0.0161,
      "step": 1436680
    },
    {
      "epoch": 2.351191060662595,
      "grad_norm": 0.6038014888763428,
      "learning_rate": 5.332525055514191e-06,
      "loss": 0.0123,
      "step": 1436700
    },
    {
      "epoch": 2.3512237911012486,
      "grad_norm": 0.14075949788093567,
      "learning_rate": 5.3324591633006725e-06,
      "loss": 0.0195,
      "step": 1436720
    },
    {
      "epoch": 2.3512565215399017,
      "grad_norm": 0.3883810341358185,
      "learning_rate": 5.332393271087156e-06,
      "loss": 0.0185,
      "step": 1436740
    },
    {
      "epoch": 2.351289251978555,
      "grad_norm": 0.28914183378219604,
      "learning_rate": 5.332327378873639e-06,
      "loss": 0.0208,
      "step": 1436760
    },
    {
      "epoch": 2.3513219824172085,
      "grad_norm": 0.4848932921886444,
      "learning_rate": 5.332261486660122e-06,
      "loss": 0.0118,
      "step": 1436780
    },
    {
      "epoch": 2.3513547128558616,
      "grad_norm": 0.15313412249088287,
      "learning_rate": 5.332195594446604e-06,
      "loss": 0.0147,
      "step": 1436800
    },
    {
      "epoch": 2.351387443294515,
      "grad_norm": 0.40082165598869324,
      "learning_rate": 5.332129702233088e-06,
      "loss": 0.0152,
      "step": 1436820
    },
    {
      "epoch": 2.3514201737331684,
      "grad_norm": 0.4100100100040436,
      "learning_rate": 5.332063810019571e-06,
      "loss": 0.0192,
      "step": 1436840
    },
    {
      "epoch": 2.351452904171822,
      "grad_norm": 0.2352529764175415,
      "learning_rate": 5.331997917806053e-06,
      "loss": 0.0137,
      "step": 1436860
    },
    {
      "epoch": 2.351485634610475,
      "grad_norm": 0.09278521686792374,
      "learning_rate": 5.331932025592537e-06,
      "loss": 0.0094,
      "step": 1436880
    },
    {
      "epoch": 2.3515183650491283,
      "grad_norm": 0.05353422090411186,
      "learning_rate": 5.331866133379019e-06,
      "loss": 0.0161,
      "step": 1436900
    },
    {
      "epoch": 2.351551095487782,
      "grad_norm": 0.6391609907150269,
      "learning_rate": 5.3318002411655025e-06,
      "loss": 0.0223,
      "step": 1436920
    },
    {
      "epoch": 2.351583825926435,
      "grad_norm": 0.17053531110286713,
      "learning_rate": 5.331734348951984e-06,
      "loss": 0.0176,
      "step": 1436940
    },
    {
      "epoch": 2.3516165563650886,
      "grad_norm": 0.44611719250679016,
      "learning_rate": 5.331668456738468e-06,
      "loss": 0.0177,
      "step": 1436960
    },
    {
      "epoch": 2.3516492868037417,
      "grad_norm": 0.4616295397281647,
      "learning_rate": 5.331602564524951e-06,
      "loss": 0.0129,
      "step": 1436980
    },
    {
      "epoch": 2.351682017242395,
      "grad_norm": 0.5350525975227356,
      "learning_rate": 5.331536672311433e-06,
      "loss": 0.0201,
      "step": 1437000
    },
    {
      "epoch": 2.3517147476810485,
      "grad_norm": 0.24665206670761108,
      "learning_rate": 5.331470780097916e-06,
      "loss": 0.011,
      "step": 1437020
    },
    {
      "epoch": 2.3517474781197016,
      "grad_norm": 0.3330530822277069,
      "learning_rate": 5.3314048878844e-06,
      "loss": 0.0154,
      "step": 1437040
    },
    {
      "epoch": 2.3517802085583552,
      "grad_norm": 0.20354865491390228,
      "learning_rate": 5.331338995670882e-06,
      "loss": 0.0142,
      "step": 1437060
    },
    {
      "epoch": 2.3518129389970084,
      "grad_norm": 0.47741127014160156,
      "learning_rate": 5.331273103457365e-06,
      "loss": 0.0224,
      "step": 1437080
    },
    {
      "epoch": 2.3518456694356615,
      "grad_norm": 0.2860804796218872,
      "learning_rate": 5.331207211243847e-06,
      "loss": 0.0132,
      "step": 1437100
    },
    {
      "epoch": 2.351878399874315,
      "grad_norm": 0.4125278890132904,
      "learning_rate": 5.331141319030331e-06,
      "loss": 0.0126,
      "step": 1437120
    },
    {
      "epoch": 2.3519111303129683,
      "grad_norm": 0.6385589838027954,
      "learning_rate": 5.331075426816813e-06,
      "loss": 0.0222,
      "step": 1437140
    },
    {
      "epoch": 2.351943860751622,
      "grad_norm": 0.19812412559986115,
      "learning_rate": 5.331009534603296e-06,
      "loss": 0.0119,
      "step": 1437160
    },
    {
      "epoch": 2.351976591190275,
      "grad_norm": 0.372525155544281,
      "learning_rate": 5.330943642389779e-06,
      "loss": 0.022,
      "step": 1437180
    },
    {
      "epoch": 2.3520093216289286,
      "grad_norm": 0.23747484385967255,
      "learning_rate": 5.330877750176262e-06,
      "loss": 0.0115,
      "step": 1437200
    },
    {
      "epoch": 2.3520420520675818,
      "grad_norm": 0.1562509387731552,
      "learning_rate": 5.330811857962745e-06,
      "loss": 0.0183,
      "step": 1437220
    },
    {
      "epoch": 2.352074782506235,
      "grad_norm": 0.4457980990409851,
      "learning_rate": 5.330745965749228e-06,
      "loss": 0.0231,
      "step": 1437240
    },
    {
      "epoch": 2.3521075129448885,
      "grad_norm": 0.20621661841869354,
      "learning_rate": 5.330680073535711e-06,
      "loss": 0.0159,
      "step": 1437260
    },
    {
      "epoch": 2.3521402433835417,
      "grad_norm": 0.2399463653564453,
      "learning_rate": 5.3306141813221935e-06,
      "loss": 0.0119,
      "step": 1437280
    },
    {
      "epoch": 2.3521729738221953,
      "grad_norm": 0.28731584548950195,
      "learning_rate": 5.330548289108677e-06,
      "loss": 0.0145,
      "step": 1437300
    },
    {
      "epoch": 2.3522057042608484,
      "grad_norm": 0.12655068933963776,
      "learning_rate": 5.330482396895159e-06,
      "loss": 0.0096,
      "step": 1437320
    },
    {
      "epoch": 2.352238434699502,
      "grad_norm": 0.49408379197120667,
      "learning_rate": 5.3304165046816425e-06,
      "loss": 0.0169,
      "step": 1437340
    },
    {
      "epoch": 2.352271165138155,
      "grad_norm": 0.20507100224494934,
      "learning_rate": 5.3303506124681244e-06,
      "loss": 0.0202,
      "step": 1437360
    },
    {
      "epoch": 2.3523038955768083,
      "grad_norm": 0.5116785168647766,
      "learning_rate": 5.330284720254608e-06,
      "loss": 0.0132,
      "step": 1437380
    },
    {
      "epoch": 2.352336626015462,
      "grad_norm": 0.6926273107528687,
      "learning_rate": 5.330218828041091e-06,
      "loss": 0.0149,
      "step": 1437400
    },
    {
      "epoch": 2.352369356454115,
      "grad_norm": 0.22477245330810547,
      "learning_rate": 5.3301529358275735e-06,
      "loss": 0.0139,
      "step": 1437420
    },
    {
      "epoch": 2.3524020868927686,
      "grad_norm": 0.31661003828048706,
      "learning_rate": 5.330087043614056e-06,
      "loss": 0.0122,
      "step": 1437440
    },
    {
      "epoch": 2.352434817331422,
      "grad_norm": 0.16078625619411469,
      "learning_rate": 5.33002115140054e-06,
      "loss": 0.0201,
      "step": 1437460
    },
    {
      "epoch": 2.3524675477700754,
      "grad_norm": 0.25114700198173523,
      "learning_rate": 5.329955259187022e-06,
      "loss": 0.0106,
      "step": 1437480
    },
    {
      "epoch": 2.3525002782087285,
      "grad_norm": 1.3861204385757446,
      "learning_rate": 5.329889366973505e-06,
      "loss": 0.0144,
      "step": 1437500
    },
    {
      "epoch": 2.3525330086473817,
      "grad_norm": 0.36014172434806824,
      "learning_rate": 5.329823474759987e-06,
      "loss": 0.013,
      "step": 1437520
    },
    {
      "epoch": 2.3525657390860353,
      "grad_norm": 0.36975088715553284,
      "learning_rate": 5.329757582546471e-06,
      "loss": 0.0174,
      "step": 1437540
    },
    {
      "epoch": 2.3525984695246884,
      "grad_norm": 0.3071163296699524,
      "learning_rate": 5.329691690332954e-06,
      "loss": 0.0123,
      "step": 1437560
    },
    {
      "epoch": 2.352631199963342,
      "grad_norm": 0.15222059190273285,
      "learning_rate": 5.329625798119436e-06,
      "loss": 0.0184,
      "step": 1437580
    },
    {
      "epoch": 2.352663930401995,
      "grad_norm": 0.9267634749412537,
      "learning_rate": 5.32955990590592e-06,
      "loss": 0.016,
      "step": 1437600
    },
    {
      "epoch": 2.3526966608406488,
      "grad_norm": 0.7190192341804504,
      "learning_rate": 5.329494013692403e-06,
      "loss": 0.0151,
      "step": 1437620
    },
    {
      "epoch": 2.352729391279302,
      "grad_norm": 0.3471674621105194,
      "learning_rate": 5.329428121478885e-06,
      "loss": 0.0114,
      "step": 1437640
    },
    {
      "epoch": 2.352762121717955,
      "grad_norm": 0.24719958007335663,
      "learning_rate": 5.329362229265368e-06,
      "loss": 0.0188,
      "step": 1437660
    },
    {
      "epoch": 2.3527948521566087,
      "grad_norm": 0.20787812769412994,
      "learning_rate": 5.329296337051852e-06,
      "loss": 0.0161,
      "step": 1437680
    },
    {
      "epoch": 2.352827582595262,
      "grad_norm": 0.07866116613149643,
      "learning_rate": 5.3292304448383335e-06,
      "loss": 0.0209,
      "step": 1437700
    },
    {
      "epoch": 2.3528603130339154,
      "grad_norm": 0.2029629647731781,
      "learning_rate": 5.329164552624817e-06,
      "loss": 0.0116,
      "step": 1437720
    },
    {
      "epoch": 2.3528930434725686,
      "grad_norm": 0.47002264857292175,
      "learning_rate": 5.329098660411299e-06,
      "loss": 0.0137,
      "step": 1437740
    },
    {
      "epoch": 2.352925773911222,
      "grad_norm": 0.2997276484966278,
      "learning_rate": 5.329032768197783e-06,
      "loss": 0.0145,
      "step": 1437760
    },
    {
      "epoch": 2.3529585043498753,
      "grad_norm": 0.16179905831813812,
      "learning_rate": 5.328966875984265e-06,
      "loss": 0.0156,
      "step": 1437780
    },
    {
      "epoch": 2.3529912347885285,
      "grad_norm": 0.2688109576702118,
      "learning_rate": 5.328900983770748e-06,
      "loss": 0.0123,
      "step": 1437800
    },
    {
      "epoch": 2.353023965227182,
      "grad_norm": 0.5989806056022644,
      "learning_rate": 5.328835091557231e-06,
      "loss": 0.0137,
      "step": 1437820
    },
    {
      "epoch": 2.353056695665835,
      "grad_norm": 0.17835630476474762,
      "learning_rate": 5.328769199343714e-06,
      "loss": 0.0116,
      "step": 1437840
    },
    {
      "epoch": 2.353089426104489,
      "grad_norm": 0.15167953073978424,
      "learning_rate": 5.328703307130196e-06,
      "loss": 0.0164,
      "step": 1437860
    },
    {
      "epoch": 2.353122156543142,
      "grad_norm": 0.7201890349388123,
      "learning_rate": 5.32863741491668e-06,
      "loss": 0.0195,
      "step": 1437880
    },
    {
      "epoch": 2.3531548869817955,
      "grad_norm": 0.2994579076766968,
      "learning_rate": 5.3285715227031635e-06,
      "loss": 0.014,
      "step": 1437900
    },
    {
      "epoch": 2.3531876174204487,
      "grad_norm": 0.28265661001205444,
      "learning_rate": 5.328505630489645e-06,
      "loss": 0.012,
      "step": 1437920
    },
    {
      "epoch": 2.353220347859102,
      "grad_norm": 0.19812707602977753,
      "learning_rate": 5.328439738276129e-06,
      "loss": 0.0188,
      "step": 1437940
    },
    {
      "epoch": 2.3532530782977554,
      "grad_norm": 0.24959149956703186,
      "learning_rate": 5.328373846062611e-06,
      "loss": 0.0152,
      "step": 1437960
    },
    {
      "epoch": 2.3532858087364086,
      "grad_norm": 0.7085314393043518,
      "learning_rate": 5.3283079538490944e-06,
      "loss": 0.0136,
      "step": 1437980
    },
    {
      "epoch": 2.353318539175062,
      "grad_norm": 0.5173940658569336,
      "learning_rate": 5.328242061635577e-06,
      "loss": 0.0151,
      "step": 1438000
    },
    {
      "epoch": 2.3533512696137153,
      "grad_norm": 0.95896315574646,
      "learning_rate": 5.32817616942206e-06,
      "loss": 0.0202,
      "step": 1438020
    },
    {
      "epoch": 2.353384000052369,
      "grad_norm": 0.21102309226989746,
      "learning_rate": 5.328110277208543e-06,
      "loss": 0.017,
      "step": 1438040
    },
    {
      "epoch": 2.353416730491022,
      "grad_norm": 0.3932296931743622,
      "learning_rate": 5.328044384995026e-06,
      "loss": 0.0102,
      "step": 1438060
    },
    {
      "epoch": 2.353449460929675,
      "grad_norm": 0.4214908182621002,
      "learning_rate": 5.327978492781508e-06,
      "loss": 0.0157,
      "step": 1438080
    },
    {
      "epoch": 2.353482191368329,
      "grad_norm": 1.02717125415802,
      "learning_rate": 5.327912600567992e-06,
      "loss": 0.0182,
      "step": 1438100
    },
    {
      "epoch": 2.353514921806982,
      "grad_norm": 0.668152928352356,
      "learning_rate": 5.327846708354474e-06,
      "loss": 0.0149,
      "step": 1438120
    },
    {
      "epoch": 2.3535476522456356,
      "grad_norm": 0.3680386245250702,
      "learning_rate": 5.327780816140957e-06,
      "loss": 0.0159,
      "step": 1438140
    },
    {
      "epoch": 2.3535803826842887,
      "grad_norm": 0.46363863348960876,
      "learning_rate": 5.327714923927439e-06,
      "loss": 0.0151,
      "step": 1438160
    },
    {
      "epoch": 2.3536131131229423,
      "grad_norm": 0.674507200717926,
      "learning_rate": 5.327649031713923e-06,
      "loss": 0.0157,
      "step": 1438180
    },
    {
      "epoch": 2.3536458435615955,
      "grad_norm": 0.11315459758043289,
      "learning_rate": 5.3275831395004054e-06,
      "loss": 0.0134,
      "step": 1438200
    },
    {
      "epoch": 2.3536785740002486,
      "grad_norm": 0.2880021035671234,
      "learning_rate": 5.327517247286888e-06,
      "loss": 0.0201,
      "step": 1438220
    },
    {
      "epoch": 2.353711304438902,
      "grad_norm": 0.11078527569770813,
      "learning_rate": 5.327451355073372e-06,
      "loss": 0.0091,
      "step": 1438240
    },
    {
      "epoch": 2.3537440348775553,
      "grad_norm": 0.27140921354293823,
      "learning_rate": 5.3273854628598545e-06,
      "loss": 0.01,
      "step": 1438260
    },
    {
      "epoch": 2.353776765316209,
      "grad_norm": 0.1253538876771927,
      "learning_rate": 5.327319570646337e-06,
      "loss": 0.0218,
      "step": 1438280
    },
    {
      "epoch": 2.353809495754862,
      "grad_norm": 1.8088881969451904,
      "learning_rate": 5.32725367843282e-06,
      "loss": 0.0136,
      "step": 1438300
    },
    {
      "epoch": 2.3538422261935157,
      "grad_norm": 0.13314224779605865,
      "learning_rate": 5.3271877862193036e-06,
      "loss": 0.0118,
      "step": 1438320
    },
    {
      "epoch": 2.353874956632169,
      "grad_norm": 0.21905802190303802,
      "learning_rate": 5.3271218940057855e-06,
      "loss": 0.0142,
      "step": 1438340
    },
    {
      "epoch": 2.353907687070822,
      "grad_norm": 0.4958329498767853,
      "learning_rate": 5.327056001792269e-06,
      "loss": 0.0118,
      "step": 1438360
    },
    {
      "epoch": 2.3539404175094756,
      "grad_norm": 0.3765484392642975,
      "learning_rate": 5.326990109578751e-06,
      "loss": 0.0174,
      "step": 1438380
    },
    {
      "epoch": 2.3539731479481287,
      "grad_norm": 0.439716100692749,
      "learning_rate": 5.3269242173652345e-06,
      "loss": 0.0109,
      "step": 1438400
    },
    {
      "epoch": 2.3540058783867823,
      "grad_norm": 1.264562964439392,
      "learning_rate": 5.326858325151717e-06,
      "loss": 0.0152,
      "step": 1438420
    },
    {
      "epoch": 2.3540386088254355,
      "grad_norm": 0.16153539717197418,
      "learning_rate": 5.3267924329382e-06,
      "loss": 0.0115,
      "step": 1438440
    },
    {
      "epoch": 2.3540713392640886,
      "grad_norm": 0.6000803112983704,
      "learning_rate": 5.326726540724683e-06,
      "loss": 0.014,
      "step": 1438460
    },
    {
      "epoch": 2.354104069702742,
      "grad_norm": 0.4301353693008423,
      "learning_rate": 5.326660648511166e-06,
      "loss": 0.0138,
      "step": 1438480
    },
    {
      "epoch": 2.3541368001413954,
      "grad_norm": 0.20376774668693542,
      "learning_rate": 5.326594756297648e-06,
      "loss": 0.0156,
      "step": 1438500
    },
    {
      "epoch": 2.354169530580049,
      "grad_norm": 0.29414451122283936,
      "learning_rate": 5.326528864084132e-06,
      "loss": 0.0152,
      "step": 1438520
    },
    {
      "epoch": 2.354202261018702,
      "grad_norm": 0.4976515471935272,
      "learning_rate": 5.326462971870614e-06,
      "loss": 0.0201,
      "step": 1438540
    },
    {
      "epoch": 2.3542349914573553,
      "grad_norm": 0.2197074145078659,
      "learning_rate": 5.326397079657097e-06,
      "loss": 0.0214,
      "step": 1438560
    },
    {
      "epoch": 2.354267721896009,
      "grad_norm": 0.1670837700366974,
      "learning_rate": 5.32633118744358e-06,
      "loss": 0.0151,
      "step": 1438580
    },
    {
      "epoch": 2.354300452334662,
      "grad_norm": 0.23368559777736664,
      "learning_rate": 5.326265295230063e-06,
      "loss": 0.0144,
      "step": 1438600
    },
    {
      "epoch": 2.3543331827733156,
      "grad_norm": 0.2714487612247467,
      "learning_rate": 5.326199403016546e-06,
      "loss": 0.0173,
      "step": 1438620
    },
    {
      "epoch": 2.3543659132119688,
      "grad_norm": 0.4967336356639862,
      "learning_rate": 5.326133510803029e-06,
      "loss": 0.0132,
      "step": 1438640
    },
    {
      "epoch": 2.3543986436506223,
      "grad_norm": 0.647727370262146,
      "learning_rate": 5.326067618589512e-06,
      "loss": 0.0174,
      "step": 1438660
    },
    {
      "epoch": 2.3544313740892755,
      "grad_norm": 0.34997472167015076,
      "learning_rate": 5.3260017263759946e-06,
      "loss": 0.0161,
      "step": 1438680
    },
    {
      "epoch": 2.3544641045279286,
      "grad_norm": 0.14010164141654968,
      "learning_rate": 5.325935834162478e-06,
      "loss": 0.0167,
      "step": 1438700
    },
    {
      "epoch": 2.3544968349665822,
      "grad_norm": 0.3174256384372711,
      "learning_rate": 5.32586994194896e-06,
      "loss": 0.0133,
      "step": 1438720
    },
    {
      "epoch": 2.3545295654052354,
      "grad_norm": 0.2520725727081299,
      "learning_rate": 5.325804049735444e-06,
      "loss": 0.0167,
      "step": 1438740
    },
    {
      "epoch": 2.354562295843889,
      "grad_norm": 0.4015407860279083,
      "learning_rate": 5.3257381575219255e-06,
      "loss": 0.0099,
      "step": 1438760
    },
    {
      "epoch": 2.354595026282542,
      "grad_norm": 0.5140417814254761,
      "learning_rate": 5.325672265308409e-06,
      "loss": 0.0159,
      "step": 1438780
    },
    {
      "epoch": 2.3546277567211957,
      "grad_norm": 0.25013822317123413,
      "learning_rate": 5.325606373094892e-06,
      "loss": 0.0181,
      "step": 1438800
    },
    {
      "epoch": 2.354660487159849,
      "grad_norm": 0.24966420233249664,
      "learning_rate": 5.325540480881375e-06,
      "loss": 0.0135,
      "step": 1438820
    },
    {
      "epoch": 2.354693217598502,
      "grad_norm": 0.47144582867622375,
      "learning_rate": 5.325474588667857e-06,
      "loss": 0.017,
      "step": 1438840
    },
    {
      "epoch": 2.3547259480371556,
      "grad_norm": 0.386516273021698,
      "learning_rate": 5.325408696454341e-06,
      "loss": 0.02,
      "step": 1438860
    },
    {
      "epoch": 2.3547586784758088,
      "grad_norm": 0.6173124313354492,
      "learning_rate": 5.325342804240823e-06,
      "loss": 0.0139,
      "step": 1438880
    },
    {
      "epoch": 2.3547914089144624,
      "grad_norm": 0.24555698037147522,
      "learning_rate": 5.325276912027306e-06,
      "loss": 0.017,
      "step": 1438900
    },
    {
      "epoch": 2.3548241393531155,
      "grad_norm": 0.6944447755813599,
      "learning_rate": 5.325211019813788e-06,
      "loss": 0.0124,
      "step": 1438920
    },
    {
      "epoch": 2.354856869791769,
      "grad_norm": 0.39776185154914856,
      "learning_rate": 5.325145127600272e-06,
      "loss": 0.019,
      "step": 1438940
    },
    {
      "epoch": 2.3548896002304223,
      "grad_norm": 0.28016650676727295,
      "learning_rate": 5.3250792353867555e-06,
      "loss": 0.0108,
      "step": 1438960
    },
    {
      "epoch": 2.3549223306690754,
      "grad_norm": 0.4280526340007782,
      "learning_rate": 5.325013343173237e-06,
      "loss": 0.0139,
      "step": 1438980
    },
    {
      "epoch": 2.354955061107729,
      "grad_norm": 0.23360122740268707,
      "learning_rate": 5.324947450959721e-06,
      "loss": 0.0141,
      "step": 1439000
    },
    {
      "epoch": 2.354987791546382,
      "grad_norm": 0.2607799172401428,
      "learning_rate": 5.324881558746204e-06,
      "loss": 0.0086,
      "step": 1439020
    },
    {
      "epoch": 2.3550205219850358,
      "grad_norm": 0.1906055510044098,
      "learning_rate": 5.3248156665326864e-06,
      "loss": 0.0123,
      "step": 1439040
    },
    {
      "epoch": 2.355053252423689,
      "grad_norm": 1.3979113101959229,
      "learning_rate": 5.324749774319169e-06,
      "loss": 0.014,
      "step": 1439060
    },
    {
      "epoch": 2.3550859828623425,
      "grad_norm": 0.17225511372089386,
      "learning_rate": 5.324683882105653e-06,
      "loss": 0.0124,
      "step": 1439080
    },
    {
      "epoch": 2.3551187133009956,
      "grad_norm": 0.17370767891407013,
      "learning_rate": 5.324617989892135e-06,
      "loss": 0.016,
      "step": 1439100
    },
    {
      "epoch": 2.355151443739649,
      "grad_norm": 0.41839513182640076,
      "learning_rate": 5.324552097678618e-06,
      "loss": 0.0145,
      "step": 1439120
    },
    {
      "epoch": 2.3551841741783024,
      "grad_norm": 0.10933136194944382,
      "learning_rate": 5.3244862054651e-06,
      "loss": 0.0137,
      "step": 1439140
    },
    {
      "epoch": 2.3552169046169555,
      "grad_norm": 0.31488126516342163,
      "learning_rate": 5.324420313251584e-06,
      "loss": 0.0094,
      "step": 1439160
    },
    {
      "epoch": 2.355249635055609,
      "grad_norm": 0.26716148853302,
      "learning_rate": 5.324354421038066e-06,
      "loss": 0.0109,
      "step": 1439180
    },
    {
      "epoch": 2.3552823654942623,
      "grad_norm": 0.3651389479637146,
      "learning_rate": 5.324288528824549e-06,
      "loss": 0.015,
      "step": 1439200
    },
    {
      "epoch": 2.355315095932916,
      "grad_norm": 0.2973513901233673,
      "learning_rate": 5.324222636611032e-06,
      "loss": 0.0131,
      "step": 1439220
    },
    {
      "epoch": 2.355347826371569,
      "grad_norm": 0.1815972775220871,
      "learning_rate": 5.324156744397515e-06,
      "loss": 0.0152,
      "step": 1439240
    },
    {
      "epoch": 2.355380556810222,
      "grad_norm": 0.2479596734046936,
      "learning_rate": 5.324090852183997e-06,
      "loss": 0.0091,
      "step": 1439260
    },
    {
      "epoch": 2.3554132872488758,
      "grad_norm": 0.11472302675247192,
      "learning_rate": 5.324024959970481e-06,
      "loss": 0.0186,
      "step": 1439280
    },
    {
      "epoch": 2.355446017687529,
      "grad_norm": 0.17426420748233795,
      "learning_rate": 5.323959067756964e-06,
      "loss": 0.0125,
      "step": 1439300
    },
    {
      "epoch": 2.3554787481261825,
      "grad_norm": 0.25199317932128906,
      "learning_rate": 5.3238931755434465e-06,
      "loss": 0.0145,
      "step": 1439320
    },
    {
      "epoch": 2.3555114785648357,
      "grad_norm": 0.2584652006626129,
      "learning_rate": 5.32382728332993e-06,
      "loss": 0.014,
      "step": 1439340
    },
    {
      "epoch": 2.3555442090034893,
      "grad_norm": 0.23655793070793152,
      "learning_rate": 5.323761391116412e-06,
      "loss": 0.0108,
      "step": 1439360
    },
    {
      "epoch": 2.3555769394421424,
      "grad_norm": 1.1784616708755493,
      "learning_rate": 5.3236954989028955e-06,
      "loss": 0.0244,
      "step": 1439380
    },
    {
      "epoch": 2.3556096698807956,
      "grad_norm": 0.4998549520969391,
      "learning_rate": 5.3236296066893774e-06,
      "loss": 0.0093,
      "step": 1439400
    },
    {
      "epoch": 2.355642400319449,
      "grad_norm": 0.25413212180137634,
      "learning_rate": 5.323563714475861e-06,
      "loss": 0.0184,
      "step": 1439420
    },
    {
      "epoch": 2.3556751307581023,
      "grad_norm": 1.0731990337371826,
      "learning_rate": 5.323497822262344e-06,
      "loss": 0.0104,
      "step": 1439440
    },
    {
      "epoch": 2.355707861196756,
      "grad_norm": 0.5619362592697144,
      "learning_rate": 5.3234319300488265e-06,
      "loss": 0.0146,
      "step": 1439460
    },
    {
      "epoch": 2.355740591635409,
      "grad_norm": 0.6535466313362122,
      "learning_rate": 5.323366037835309e-06,
      "loss": 0.0198,
      "step": 1439480
    },
    {
      "epoch": 2.3557733220740626,
      "grad_norm": 0.3741048276424408,
      "learning_rate": 5.323300145621793e-06,
      "loss": 0.0159,
      "step": 1439500
    },
    {
      "epoch": 2.355806052512716,
      "grad_norm": 0.2729310691356659,
      "learning_rate": 5.323234253408275e-06,
      "loss": 0.0146,
      "step": 1439520
    },
    {
      "epoch": 2.355838782951369,
      "grad_norm": 0.3582998514175415,
      "learning_rate": 5.323168361194758e-06,
      "loss": 0.0189,
      "step": 1439540
    },
    {
      "epoch": 2.3558715133900225,
      "grad_norm": 0.39677655696868896,
      "learning_rate": 5.32310246898124e-06,
      "loss": 0.0083,
      "step": 1439560
    },
    {
      "epoch": 2.3559042438286757,
      "grad_norm": 0.1353117823600769,
      "learning_rate": 5.323036576767724e-06,
      "loss": 0.026,
      "step": 1439580
    },
    {
      "epoch": 2.3559369742673293,
      "grad_norm": 0.5460400581359863,
      "learning_rate": 5.3229706845542065e-06,
      "loss": 0.0088,
      "step": 1439600
    },
    {
      "epoch": 2.3559697047059824,
      "grad_norm": 0.7775183916091919,
      "learning_rate": 5.322904792340689e-06,
      "loss": 0.0197,
      "step": 1439620
    },
    {
      "epoch": 2.356002435144636,
      "grad_norm": 0.34633252024650574,
      "learning_rate": 5.322838900127172e-06,
      "loss": 0.0083,
      "step": 1439640
    },
    {
      "epoch": 2.356035165583289,
      "grad_norm": 0.2724682092666626,
      "learning_rate": 5.322773007913656e-06,
      "loss": 0.0209,
      "step": 1439660
    },
    {
      "epoch": 2.3560678960219423,
      "grad_norm": 0.14082206785678864,
      "learning_rate": 5.322707115700138e-06,
      "loss": 0.0144,
      "step": 1439680
    },
    {
      "epoch": 2.356100626460596,
      "grad_norm": 0.7801676392555237,
      "learning_rate": 5.322641223486621e-06,
      "loss": 0.0226,
      "step": 1439700
    },
    {
      "epoch": 2.356133356899249,
      "grad_norm": 0.06978435814380646,
      "learning_rate": 5.322575331273105e-06,
      "loss": 0.0166,
      "step": 1439720
    },
    {
      "epoch": 2.3561660873379027,
      "grad_norm": 0.17693139612674713,
      "learning_rate": 5.3225094390595866e-06,
      "loss": 0.0166,
      "step": 1439740
    },
    {
      "epoch": 2.356198817776556,
      "grad_norm": 0.40062978863716125,
      "learning_rate": 5.32244354684607e-06,
      "loss": 0.0133,
      "step": 1439760
    },
    {
      "epoch": 2.3562315482152094,
      "grad_norm": 0.14632143080234528,
      "learning_rate": 5.322377654632552e-06,
      "loss": 0.0101,
      "step": 1439780
    },
    {
      "epoch": 2.3562642786538626,
      "grad_norm": 0.6414271593093872,
      "learning_rate": 5.322311762419036e-06,
      "loss": 0.0127,
      "step": 1439800
    },
    {
      "epoch": 2.3562970090925157,
      "grad_norm": 0.6725507974624634,
      "learning_rate": 5.322245870205518e-06,
      "loss": 0.0145,
      "step": 1439820
    },
    {
      "epoch": 2.3563297395311693,
      "grad_norm": 0.3945700228214264,
      "learning_rate": 5.322179977992001e-06,
      "loss": 0.0138,
      "step": 1439840
    },
    {
      "epoch": 2.3563624699698225,
      "grad_norm": 1.3603066205978394,
      "learning_rate": 5.322114085778484e-06,
      "loss": 0.0111,
      "step": 1439860
    },
    {
      "epoch": 2.356395200408476,
      "grad_norm": 0.2723073661327362,
      "learning_rate": 5.3220481935649674e-06,
      "loss": 0.0188,
      "step": 1439880
    },
    {
      "epoch": 2.356427930847129,
      "grad_norm": 0.2267184853553772,
      "learning_rate": 5.321982301351449e-06,
      "loss": 0.0213,
      "step": 1439900
    },
    {
      "epoch": 2.356460661285783,
      "grad_norm": 0.1641864776611328,
      "learning_rate": 5.321916409137933e-06,
      "loss": 0.0185,
      "step": 1439920
    },
    {
      "epoch": 2.356493391724436,
      "grad_norm": 0.45457178354263306,
      "learning_rate": 5.321850516924415e-06,
      "loss": 0.017,
      "step": 1439940
    },
    {
      "epoch": 2.356526122163089,
      "grad_norm": 0.2917082607746124,
      "learning_rate": 5.321784624710898e-06,
      "loss": 0.0137,
      "step": 1439960
    },
    {
      "epoch": 2.3565588526017427,
      "grad_norm": 0.28782588243484497,
      "learning_rate": 5.32171873249738e-06,
      "loss": 0.0117,
      "step": 1439980
    },
    {
      "epoch": 2.356591583040396,
      "grad_norm": 0.5015476942062378,
      "learning_rate": 5.321652840283864e-06,
      "loss": 0.0165,
      "step": 1440000
    },
    {
      "epoch": 2.3566243134790494,
      "grad_norm": 0.38915300369262695,
      "learning_rate": 5.3215869480703475e-06,
      "loss": 0.0184,
      "step": 1440020
    },
    {
      "epoch": 2.3566570439177026,
      "grad_norm": 0.3148891031742096,
      "learning_rate": 5.321521055856829e-06,
      "loss": 0.0121,
      "step": 1440040
    },
    {
      "epoch": 2.3566897743563557,
      "grad_norm": 0.30085229873657227,
      "learning_rate": 5.321455163643313e-06,
      "loss": 0.0151,
      "step": 1440060
    },
    {
      "epoch": 2.3567225047950093,
      "grad_norm": 0.2641758322715759,
      "learning_rate": 5.321389271429796e-06,
      "loss": 0.0228,
      "step": 1440080
    },
    {
      "epoch": 2.3567552352336625,
      "grad_norm": 1.0286369323730469,
      "learning_rate": 5.321323379216279e-06,
      "loss": 0.0182,
      "step": 1440100
    },
    {
      "epoch": 2.356787965672316,
      "grad_norm": 0.5438808798789978,
      "learning_rate": 5.321257487002761e-06,
      "loss": 0.0122,
      "step": 1440120
    },
    {
      "epoch": 2.3568206961109692,
      "grad_norm": 0.30920839309692383,
      "learning_rate": 5.321191594789245e-06,
      "loss": 0.0105,
      "step": 1440140
    },
    {
      "epoch": 2.3568534265496224,
      "grad_norm": 0.4720400869846344,
      "learning_rate": 5.321125702575727e-06,
      "loss": 0.0182,
      "step": 1440160
    },
    {
      "epoch": 2.356886156988276,
      "grad_norm": 0.0962744951248169,
      "learning_rate": 5.32105981036221e-06,
      "loss": 0.0215,
      "step": 1440180
    },
    {
      "epoch": 2.356918887426929,
      "grad_norm": 0.09509044885635376,
      "learning_rate": 5.320993918148692e-06,
      "loss": 0.0136,
      "step": 1440200
    },
    {
      "epoch": 2.3569516178655827,
      "grad_norm": 0.31575846672058105,
      "learning_rate": 5.320928025935176e-06,
      "loss": 0.0132,
      "step": 1440220
    },
    {
      "epoch": 2.356984348304236,
      "grad_norm": 0.7734742760658264,
      "learning_rate": 5.3208621337216584e-06,
      "loss": 0.015,
      "step": 1440240
    },
    {
      "epoch": 2.3570170787428895,
      "grad_norm": 0.43103790283203125,
      "learning_rate": 5.320796241508141e-06,
      "loss": 0.0113,
      "step": 1440260
    },
    {
      "epoch": 2.3570498091815426,
      "grad_norm": 0.19937372207641602,
      "learning_rate": 5.320730349294624e-06,
      "loss": 0.0218,
      "step": 1440280
    },
    {
      "epoch": 2.3570825396201958,
      "grad_norm": 0.1073688343167305,
      "learning_rate": 5.3206644570811075e-06,
      "loss": 0.0153,
      "step": 1440300
    },
    {
      "epoch": 2.3571152700588494,
      "grad_norm": 0.3582425117492676,
      "learning_rate": 5.320598564867589e-06,
      "loss": 0.0134,
      "step": 1440320
    },
    {
      "epoch": 2.3571480004975025,
      "grad_norm": 0.2899804413318634,
      "learning_rate": 5.320532672654073e-06,
      "loss": 0.0172,
      "step": 1440340
    },
    {
      "epoch": 2.357180730936156,
      "grad_norm": 0.1890568882226944,
      "learning_rate": 5.3204667804405566e-06,
      "loss": 0.0128,
      "step": 1440360
    },
    {
      "epoch": 2.3572134613748092,
      "grad_norm": 0.4745722711086273,
      "learning_rate": 5.3204008882270385e-06,
      "loss": 0.0153,
      "step": 1440380
    },
    {
      "epoch": 2.357246191813463,
      "grad_norm": 0.39242538809776306,
      "learning_rate": 5.320334996013522e-06,
      "loss": 0.0148,
      "step": 1440400
    },
    {
      "epoch": 2.357278922252116,
      "grad_norm": 0.21929460763931274,
      "learning_rate": 5.320269103800004e-06,
      "loss": 0.0178,
      "step": 1440420
    },
    {
      "epoch": 2.357311652690769,
      "grad_norm": 0.38738197088241577,
      "learning_rate": 5.3202032115864875e-06,
      "loss": 0.0179,
      "step": 1440440
    },
    {
      "epoch": 2.3573443831294227,
      "grad_norm": 0.9021013975143433,
      "learning_rate": 5.32013731937297e-06,
      "loss": 0.0162,
      "step": 1440460
    },
    {
      "epoch": 2.357377113568076,
      "grad_norm": 0.2960447669029236,
      "learning_rate": 5.320071427159453e-06,
      "loss": 0.0191,
      "step": 1440480
    },
    {
      "epoch": 2.3574098440067295,
      "grad_norm": 0.1825709193944931,
      "learning_rate": 5.320005534945936e-06,
      "loss": 0.0117,
      "step": 1440500
    },
    {
      "epoch": 2.3574425744453826,
      "grad_norm": 0.35962504148483276,
      "learning_rate": 5.319939642732419e-06,
      "loss": 0.0186,
      "step": 1440520
    },
    {
      "epoch": 2.3574753048840362,
      "grad_norm": 0.04821566119790077,
      "learning_rate": 5.319873750518901e-06,
      "loss": 0.0145,
      "step": 1440540
    },
    {
      "epoch": 2.3575080353226894,
      "grad_norm": 0.6390340328216553,
      "learning_rate": 5.319807858305385e-06,
      "loss": 0.013,
      "step": 1440560
    },
    {
      "epoch": 2.3575407657613425,
      "grad_norm": 0.292961448431015,
      "learning_rate": 5.319741966091867e-06,
      "loss": 0.011,
      "step": 1440580
    },
    {
      "epoch": 2.357573496199996,
      "grad_norm": 0.9468599557876587,
      "learning_rate": 5.31967607387835e-06,
      "loss": 0.0213,
      "step": 1440600
    },
    {
      "epoch": 2.3576062266386493,
      "grad_norm": 0.1129339188337326,
      "learning_rate": 5.319610181664833e-06,
      "loss": 0.016,
      "step": 1440620
    },
    {
      "epoch": 2.357638957077303,
      "grad_norm": 0.36458805203437805,
      "learning_rate": 5.319544289451316e-06,
      "loss": 0.0186,
      "step": 1440640
    },
    {
      "epoch": 2.357671687515956,
      "grad_norm": 0.1871263086795807,
      "learning_rate": 5.3194783972377985e-06,
      "loss": 0.0098,
      "step": 1440660
    },
    {
      "epoch": 2.3577044179546096,
      "grad_norm": 0.8656919598579407,
      "learning_rate": 5.319412505024282e-06,
      "loss": 0.0115,
      "step": 1440680
    },
    {
      "epoch": 2.3577371483932628,
      "grad_norm": 0.36555612087249756,
      "learning_rate": 5.319346612810764e-06,
      "loss": 0.0151,
      "step": 1440700
    },
    {
      "epoch": 2.357769878831916,
      "grad_norm": 0.4758751094341278,
      "learning_rate": 5.319280720597248e-06,
      "loss": 0.0196,
      "step": 1440720
    },
    {
      "epoch": 2.3578026092705695,
      "grad_norm": 0.35323625802993774,
      "learning_rate": 5.319214828383731e-06,
      "loss": 0.012,
      "step": 1440740
    },
    {
      "epoch": 2.3578353397092227,
      "grad_norm": 0.23194777965545654,
      "learning_rate": 5.319148936170213e-06,
      "loss": 0.0152,
      "step": 1440760
    },
    {
      "epoch": 2.3578680701478762,
      "grad_norm": 0.21476329863071442,
      "learning_rate": 5.319083043956697e-06,
      "loss": 0.0142,
      "step": 1440780
    },
    {
      "epoch": 2.3579008005865294,
      "grad_norm": 0.2599148154258728,
      "learning_rate": 5.3190171517431785e-06,
      "loss": 0.0157,
      "step": 1440800
    },
    {
      "epoch": 2.357933531025183,
      "grad_norm": 0.28571194410324097,
      "learning_rate": 5.318951259529662e-06,
      "loss": 0.0137,
      "step": 1440820
    },
    {
      "epoch": 2.357966261463836,
      "grad_norm": 0.18950432538986206,
      "learning_rate": 5.318885367316145e-06,
      "loss": 0.0213,
      "step": 1440840
    },
    {
      "epoch": 2.3579989919024893,
      "grad_norm": 0.16863295435905457,
      "learning_rate": 5.318819475102628e-06,
      "loss": 0.016,
      "step": 1440860
    },
    {
      "epoch": 2.358031722341143,
      "grad_norm": 0.54600989818573,
      "learning_rate": 5.31875358288911e-06,
      "loss": 0.0137,
      "step": 1440880
    },
    {
      "epoch": 2.358064452779796,
      "grad_norm": 0.17505984008312225,
      "learning_rate": 5.318687690675594e-06,
      "loss": 0.0144,
      "step": 1440900
    },
    {
      "epoch": 2.3580971832184496,
      "grad_norm": 0.523978590965271,
      "learning_rate": 5.318621798462076e-06,
      "loss": 0.0119,
      "step": 1440920
    },
    {
      "epoch": 2.358129913657103,
      "grad_norm": 0.21977680921554565,
      "learning_rate": 5.318555906248559e-06,
      "loss": 0.0098,
      "step": 1440940
    },
    {
      "epoch": 2.3581626440957564,
      "grad_norm": 0.6999923586845398,
      "learning_rate": 5.318490014035041e-06,
      "loss": 0.0221,
      "step": 1440960
    },
    {
      "epoch": 2.3581953745344095,
      "grad_norm": 0.1975252777338028,
      "learning_rate": 5.318424121821525e-06,
      "loss": 0.0105,
      "step": 1440980
    },
    {
      "epoch": 2.3582281049730627,
      "grad_norm": 0.13138031959533691,
      "learning_rate": 5.318358229608007e-06,
      "loss": 0.0136,
      "step": 1441000
    },
    {
      "epoch": 2.3582608354117163,
      "grad_norm": 0.7421916127204895,
      "learning_rate": 5.31829233739449e-06,
      "loss": 0.0175,
      "step": 1441020
    },
    {
      "epoch": 2.3582935658503694,
      "grad_norm": 0.05545063316822052,
      "learning_rate": 5.318226445180973e-06,
      "loss": 0.0116,
      "step": 1441040
    },
    {
      "epoch": 2.358326296289023,
      "grad_norm": 0.2961934208869934,
      "learning_rate": 5.318160552967456e-06,
      "loss": 0.0185,
      "step": 1441060
    },
    {
      "epoch": 2.358359026727676,
      "grad_norm": 0.4036683738231659,
      "learning_rate": 5.3180946607539394e-06,
      "loss": 0.0122,
      "step": 1441080
    },
    {
      "epoch": 2.3583917571663298,
      "grad_norm": 0.2534613013267517,
      "learning_rate": 5.318028768540422e-06,
      "loss": 0.014,
      "step": 1441100
    },
    {
      "epoch": 2.358424487604983,
      "grad_norm": 0.864283561706543,
      "learning_rate": 5.317962876326905e-06,
      "loss": 0.013,
      "step": 1441120
    },
    {
      "epoch": 2.358457218043636,
      "grad_norm": 0.5066758394241333,
      "learning_rate": 5.317896984113388e-06,
      "loss": 0.0166,
      "step": 1441140
    },
    {
      "epoch": 2.3584899484822897,
      "grad_norm": 0.3272610902786255,
      "learning_rate": 5.317831091899871e-06,
      "loss": 0.0129,
      "step": 1441160
    },
    {
      "epoch": 2.358522678920943,
      "grad_norm": 0.6051051616668701,
      "learning_rate": 5.317765199686353e-06,
      "loss": 0.0177,
      "step": 1441180
    },
    {
      "epoch": 2.3585554093595964,
      "grad_norm": 0.47709348797798157,
      "learning_rate": 5.317699307472837e-06,
      "loss": 0.0144,
      "step": 1441200
    },
    {
      "epoch": 2.3585881397982495,
      "grad_norm": 0.2986505329608917,
      "learning_rate": 5.317633415259319e-06,
      "loss": 0.0141,
      "step": 1441220
    },
    {
      "epoch": 2.358620870236903,
      "grad_norm": 0.28481245040893555,
      "learning_rate": 5.317567523045802e-06,
      "loss": 0.0109,
      "step": 1441240
    },
    {
      "epoch": 2.3586536006755563,
      "grad_norm": 0.42648881673812866,
      "learning_rate": 5.317501630832285e-06,
      "loss": 0.0079,
      "step": 1441260
    },
    {
      "epoch": 2.3586863311142094,
      "grad_norm": 0.23334762454032898,
      "learning_rate": 5.317435738618768e-06,
      "loss": 0.0144,
      "step": 1441280
    },
    {
      "epoch": 2.358719061552863,
      "grad_norm": 0.38875776529312134,
      "learning_rate": 5.31736984640525e-06,
      "loss": 0.0188,
      "step": 1441300
    },
    {
      "epoch": 2.358751791991516,
      "grad_norm": 0.8963537812232971,
      "learning_rate": 5.317303954191734e-06,
      "loss": 0.0101,
      "step": 1441320
    },
    {
      "epoch": 2.35878452243017,
      "grad_norm": 0.20543283224105835,
      "learning_rate": 5.317238061978216e-06,
      "loss": 0.0134,
      "step": 1441340
    },
    {
      "epoch": 2.358817252868823,
      "grad_norm": 0.3173734247684479,
      "learning_rate": 5.3171721697646995e-06,
      "loss": 0.0147,
      "step": 1441360
    },
    {
      "epoch": 2.3588499833074765,
      "grad_norm": 0.2974589765071869,
      "learning_rate": 5.317106277551181e-06,
      "loss": 0.0139,
      "step": 1441380
    },
    {
      "epoch": 2.3588827137461297,
      "grad_norm": 0.475371778011322,
      "learning_rate": 5.317040385337665e-06,
      "loss": 0.0175,
      "step": 1441400
    },
    {
      "epoch": 2.358915444184783,
      "grad_norm": 0.51622474193573,
      "learning_rate": 5.3169744931241486e-06,
      "loss": 0.0159,
      "step": 1441420
    },
    {
      "epoch": 2.3589481746234364,
      "grad_norm": 0.3483157455921173,
      "learning_rate": 5.3169086009106304e-06,
      "loss": 0.016,
      "step": 1441440
    },
    {
      "epoch": 2.3589809050620896,
      "grad_norm": 0.5758503675460815,
      "learning_rate": 5.316842708697114e-06,
      "loss": 0.0245,
      "step": 1441460
    },
    {
      "epoch": 2.359013635500743,
      "grad_norm": 0.05939321592450142,
      "learning_rate": 5.316776816483597e-06,
      "loss": 0.0143,
      "step": 1441480
    },
    {
      "epoch": 2.3590463659393963,
      "grad_norm": 0.2548162639141083,
      "learning_rate": 5.3167109242700795e-06,
      "loss": 0.0149,
      "step": 1441500
    },
    {
      "epoch": 2.3590790963780495,
      "grad_norm": 0.16837377846240997,
      "learning_rate": 5.316645032056562e-06,
      "loss": 0.0118,
      "step": 1441520
    },
    {
      "epoch": 2.359111826816703,
      "grad_norm": 0.4377899467945099,
      "learning_rate": 5.316579139843046e-06,
      "loss": 0.0145,
      "step": 1441540
    },
    {
      "epoch": 2.359144557255356,
      "grad_norm": 0.6226515769958496,
      "learning_rate": 5.316513247629528e-06,
      "loss": 0.0139,
      "step": 1441560
    },
    {
      "epoch": 2.35917728769401,
      "grad_norm": 0.38117384910583496,
      "learning_rate": 5.316447355416011e-06,
      "loss": 0.0175,
      "step": 1441580
    },
    {
      "epoch": 2.359210018132663,
      "grad_norm": 0.4109787344932556,
      "learning_rate": 5.316381463202493e-06,
      "loss": 0.0113,
      "step": 1441600
    },
    {
      "epoch": 2.359242748571316,
      "grad_norm": 0.16799169778823853,
      "learning_rate": 5.316315570988977e-06,
      "loss": 0.014,
      "step": 1441620
    },
    {
      "epoch": 2.3592754790099697,
      "grad_norm": 0.14200374484062195,
      "learning_rate": 5.3162496787754595e-06,
      "loss": 0.0137,
      "step": 1441640
    },
    {
      "epoch": 2.359308209448623,
      "grad_norm": 1.876439094543457,
      "learning_rate": 5.316183786561942e-06,
      "loss": 0.0131,
      "step": 1441660
    },
    {
      "epoch": 2.3593409398872764,
      "grad_norm": 0.3266753852367401,
      "learning_rate": 5.316117894348425e-06,
      "loss": 0.0127,
      "step": 1441680
    },
    {
      "epoch": 2.3593736703259296,
      "grad_norm": 0.5791833996772766,
      "learning_rate": 5.316052002134909e-06,
      "loss": 0.0187,
      "step": 1441700
    },
    {
      "epoch": 2.359406400764583,
      "grad_norm": 0.21514536440372467,
      "learning_rate": 5.3159861099213905e-06,
      "loss": 0.0169,
      "step": 1441720
    },
    {
      "epoch": 2.3594391312032363,
      "grad_norm": 0.12694157660007477,
      "learning_rate": 5.315920217707874e-06,
      "loss": 0.0146,
      "step": 1441740
    },
    {
      "epoch": 2.3594718616418895,
      "grad_norm": 0.6415783166885376,
      "learning_rate": 5.315854325494358e-06,
      "loss": 0.0158,
      "step": 1441760
    },
    {
      "epoch": 2.359504592080543,
      "grad_norm": 0.46862900257110596,
      "learning_rate": 5.3157884332808396e-06,
      "loss": 0.0104,
      "step": 1441780
    },
    {
      "epoch": 2.3595373225191962,
      "grad_norm": 0.5720176696777344,
      "learning_rate": 5.315722541067323e-06,
      "loss": 0.0183,
      "step": 1441800
    },
    {
      "epoch": 2.35957005295785,
      "grad_norm": 0.26256638765335083,
      "learning_rate": 5.315656648853805e-06,
      "loss": 0.0159,
      "step": 1441820
    },
    {
      "epoch": 2.359602783396503,
      "grad_norm": 0.5417625308036804,
      "learning_rate": 5.315590756640289e-06,
      "loss": 0.0144,
      "step": 1441840
    },
    {
      "epoch": 2.3596355138351566,
      "grad_norm": 0.978243887424469,
      "learning_rate": 5.315524864426771e-06,
      "loss": 0.0149,
      "step": 1441860
    },
    {
      "epoch": 2.3596682442738097,
      "grad_norm": 0.12090542912483215,
      "learning_rate": 5.315458972213254e-06,
      "loss": 0.0134,
      "step": 1441880
    },
    {
      "epoch": 2.359700974712463,
      "grad_norm": 0.32765135169029236,
      "learning_rate": 5.315393079999737e-06,
      "loss": 0.0137,
      "step": 1441900
    },
    {
      "epoch": 2.3597337051511165,
      "grad_norm": 0.5299327373504639,
      "learning_rate": 5.3153271877862204e-06,
      "loss": 0.0186,
      "step": 1441920
    },
    {
      "epoch": 2.3597664355897696,
      "grad_norm": 0.209878072142601,
      "learning_rate": 5.315261295572702e-06,
      "loss": 0.0156,
      "step": 1441940
    },
    {
      "epoch": 2.359799166028423,
      "grad_norm": 1.347440481185913,
      "learning_rate": 5.315195403359186e-06,
      "loss": 0.0177,
      "step": 1441960
    },
    {
      "epoch": 2.3598318964670764,
      "grad_norm": 0.06761114299297333,
      "learning_rate": 5.315129511145668e-06,
      "loss": 0.0168,
      "step": 1441980
    },
    {
      "epoch": 2.35986462690573,
      "grad_norm": 0.23903565108776093,
      "learning_rate": 5.315063618932151e-06,
      "loss": 0.0115,
      "step": 1442000
    },
    {
      "epoch": 2.359897357344383,
      "grad_norm": 0.24209626019001007,
      "learning_rate": 5.314997726718633e-06,
      "loss": 0.0108,
      "step": 1442020
    },
    {
      "epoch": 2.3599300877830363,
      "grad_norm": 0.08563913404941559,
      "learning_rate": 5.314931834505117e-06,
      "loss": 0.0162,
      "step": 1442040
    },
    {
      "epoch": 2.35996281822169,
      "grad_norm": 0.23804616928100586,
      "learning_rate": 5.3148659422916e-06,
      "loss": 0.0137,
      "step": 1442060
    },
    {
      "epoch": 2.359995548660343,
      "grad_norm": 1.1458923816680908,
      "learning_rate": 5.314800050078082e-06,
      "loss": 0.0178,
      "step": 1442080
    },
    {
      "epoch": 2.3600282790989966,
      "grad_norm": 0.4286883473396301,
      "learning_rate": 5.314734157864565e-06,
      "loss": 0.017,
      "step": 1442100
    },
    {
      "epoch": 2.3600610095376497,
      "grad_norm": 0.48579323291778564,
      "learning_rate": 5.314668265651049e-06,
      "loss": 0.0156,
      "step": 1442120
    },
    {
      "epoch": 2.3600937399763033,
      "grad_norm": 0.24661710858345032,
      "learning_rate": 5.314602373437531e-06,
      "loss": 0.0125,
      "step": 1442140
    },
    {
      "epoch": 2.3601264704149565,
      "grad_norm": 0.11420171707868576,
      "learning_rate": 5.314536481224014e-06,
      "loss": 0.0123,
      "step": 1442160
    },
    {
      "epoch": 2.3601592008536096,
      "grad_norm": 0.7889339923858643,
      "learning_rate": 5.314470589010498e-06,
      "loss": 0.0157,
      "step": 1442180
    },
    {
      "epoch": 2.3601919312922632,
      "grad_norm": 0.27069154381752014,
      "learning_rate": 5.31440469679698e-06,
      "loss": 0.0169,
      "step": 1442200
    },
    {
      "epoch": 2.3602246617309164,
      "grad_norm": 0.5925495028495789,
      "learning_rate": 5.314338804583463e-06,
      "loss": 0.0162,
      "step": 1442220
    },
    {
      "epoch": 2.36025739216957,
      "grad_norm": 0.3312617838382721,
      "learning_rate": 5.314272912369945e-06,
      "loss": 0.0128,
      "step": 1442240
    },
    {
      "epoch": 2.360290122608223,
      "grad_norm": 0.1629406064748764,
      "learning_rate": 5.314207020156429e-06,
      "loss": 0.0103,
      "step": 1442260
    },
    {
      "epoch": 2.3603228530468767,
      "grad_norm": 0.5974166393280029,
      "learning_rate": 5.3141411279429114e-06,
      "loss": 0.0116,
      "step": 1442280
    },
    {
      "epoch": 2.36035558348553,
      "grad_norm": 0.23417843878269196,
      "learning_rate": 5.314075235729394e-06,
      "loss": 0.0114,
      "step": 1442300
    },
    {
      "epoch": 2.360388313924183,
      "grad_norm": 0.18783041834831238,
      "learning_rate": 5.314009343515877e-06,
      "loss": 0.0126,
      "step": 1442320
    },
    {
      "epoch": 2.3604210443628366,
      "grad_norm": 0.420846164226532,
      "learning_rate": 5.3139434513023605e-06,
      "loss": 0.008,
      "step": 1442340
    },
    {
      "epoch": 2.3604537748014898,
      "grad_norm": 0.7849413752555847,
      "learning_rate": 5.313877559088842e-06,
      "loss": 0.0164,
      "step": 1442360
    },
    {
      "epoch": 2.3604865052401434,
      "grad_norm": 0.6104676127433777,
      "learning_rate": 5.313811666875326e-06,
      "loss": 0.0131,
      "step": 1442380
    },
    {
      "epoch": 2.3605192356787965,
      "grad_norm": 0.23753651976585388,
      "learning_rate": 5.313745774661808e-06,
      "loss": 0.0147,
      "step": 1442400
    },
    {
      "epoch": 2.36055196611745,
      "grad_norm": 0.2950163781642914,
      "learning_rate": 5.3136798824482915e-06,
      "loss": 0.0217,
      "step": 1442420
    },
    {
      "epoch": 2.3605846965561033,
      "grad_norm": 0.6011977195739746,
      "learning_rate": 5.313613990234774e-06,
      "loss": 0.0143,
      "step": 1442440
    },
    {
      "epoch": 2.3606174269947564,
      "grad_norm": 0.18606635928153992,
      "learning_rate": 5.313548098021257e-06,
      "loss": 0.0127,
      "step": 1442460
    },
    {
      "epoch": 2.36065015743341,
      "grad_norm": 0.09650525450706482,
      "learning_rate": 5.3134822058077405e-06,
      "loss": 0.0139,
      "step": 1442480
    },
    {
      "epoch": 2.360682887872063,
      "grad_norm": 0.0689672976732254,
      "learning_rate": 5.313416313594223e-06,
      "loss": 0.0141,
      "step": 1442500
    },
    {
      "epoch": 2.3607156183107167,
      "grad_norm": 0.269633024930954,
      "learning_rate": 5.313350421380706e-06,
      "loss": 0.0107,
      "step": 1442520
    },
    {
      "epoch": 2.36074834874937,
      "grad_norm": 0.10197948664426804,
      "learning_rate": 5.313284529167189e-06,
      "loss": 0.0161,
      "step": 1442540
    },
    {
      "epoch": 2.3607810791880235,
      "grad_norm": 0.4074641764163971,
      "learning_rate": 5.313218636953672e-06,
      "loss": 0.0118,
      "step": 1442560
    },
    {
      "epoch": 2.3608138096266766,
      "grad_norm": 0.11110051721334457,
      "learning_rate": 5.313152744740154e-06,
      "loss": 0.0111,
      "step": 1442580
    },
    {
      "epoch": 2.36084654006533,
      "grad_norm": 0.2995354235172272,
      "learning_rate": 5.313086852526638e-06,
      "loss": 0.0119,
      "step": 1442600
    },
    {
      "epoch": 2.3608792705039834,
      "grad_norm": 0.3225831985473633,
      "learning_rate": 5.31302096031312e-06,
      "loss": 0.0156,
      "step": 1442620
    },
    {
      "epoch": 2.3609120009426365,
      "grad_norm": 0.21890747547149658,
      "learning_rate": 5.312955068099603e-06,
      "loss": 0.0135,
      "step": 1442640
    },
    {
      "epoch": 2.36094473138129,
      "grad_norm": 0.4251781404018402,
      "learning_rate": 5.312889175886086e-06,
      "loss": 0.0209,
      "step": 1442660
    },
    {
      "epoch": 2.3609774618199433,
      "grad_norm": 0.8018890619277954,
      "learning_rate": 5.312823283672569e-06,
      "loss": 0.0156,
      "step": 1442680
    },
    {
      "epoch": 2.361010192258597,
      "grad_norm": 1.1241039037704468,
      "learning_rate": 5.3127573914590515e-06,
      "loss": 0.018,
      "step": 1442700
    },
    {
      "epoch": 2.36104292269725,
      "grad_norm": 0.08339501917362213,
      "learning_rate": 5.312691499245535e-06,
      "loss": 0.0199,
      "step": 1442720
    },
    {
      "epoch": 2.361075653135903,
      "grad_norm": 0.2751864194869995,
      "learning_rate": 5.312625607032017e-06,
      "loss": 0.0146,
      "step": 1442740
    },
    {
      "epoch": 2.3611083835745568,
      "grad_norm": 0.9843824505805969,
      "learning_rate": 5.312559714818501e-06,
      "loss": 0.0149,
      "step": 1442760
    },
    {
      "epoch": 2.36114111401321,
      "grad_norm": 0.2616277039051056,
      "learning_rate": 5.3124938226049825e-06,
      "loss": 0.0102,
      "step": 1442780
    },
    {
      "epoch": 2.3611738444518635,
      "grad_norm": 0.1338915228843689,
      "learning_rate": 5.312427930391466e-06,
      "loss": 0.0122,
      "step": 1442800
    },
    {
      "epoch": 2.3612065748905167,
      "grad_norm": 0.5668579339981079,
      "learning_rate": 5.31236203817795e-06,
      "loss": 0.0194,
      "step": 1442820
    },
    {
      "epoch": 2.3612393053291703,
      "grad_norm": 0.641152024269104,
      "learning_rate": 5.3122961459644315e-06,
      "loss": 0.0153,
      "step": 1442840
    },
    {
      "epoch": 2.3612720357678234,
      "grad_norm": 0.14588676393032074,
      "learning_rate": 5.312230253750915e-06,
      "loss": 0.0149,
      "step": 1442860
    },
    {
      "epoch": 2.3613047662064766,
      "grad_norm": 0.42864611744880676,
      "learning_rate": 5.312164361537398e-06,
      "loss": 0.0142,
      "step": 1442880
    },
    {
      "epoch": 2.36133749664513,
      "grad_norm": 1.125784158706665,
      "learning_rate": 5.312098469323881e-06,
      "loss": 0.015,
      "step": 1442900
    },
    {
      "epoch": 2.3613702270837833,
      "grad_norm": 0.24280591309070587,
      "learning_rate": 5.312032577110363e-06,
      "loss": 0.0098,
      "step": 1442920
    },
    {
      "epoch": 2.361402957522437,
      "grad_norm": 0.5560872554779053,
      "learning_rate": 5.311966684896847e-06,
      "loss": 0.0126,
      "step": 1442940
    },
    {
      "epoch": 2.36143568796109,
      "grad_norm": 0.3367634117603302,
      "learning_rate": 5.311900792683329e-06,
      "loss": 0.0134,
      "step": 1442960
    },
    {
      "epoch": 2.3614684183997436,
      "grad_norm": 0.5166141390800476,
      "learning_rate": 5.311834900469812e-06,
      "loss": 0.0205,
      "step": 1442980
    },
    {
      "epoch": 2.361501148838397,
      "grad_norm": 0.9778317809104919,
      "learning_rate": 5.311769008256294e-06,
      "loss": 0.0188,
      "step": 1443000
    },
    {
      "epoch": 2.36153387927705,
      "grad_norm": 0.17258943617343903,
      "learning_rate": 5.311703116042778e-06,
      "loss": 0.013,
      "step": 1443020
    },
    {
      "epoch": 2.3615666097157035,
      "grad_norm": 0.27399539947509766,
      "learning_rate": 5.31163722382926e-06,
      "loss": 0.0208,
      "step": 1443040
    },
    {
      "epoch": 2.3615993401543567,
      "grad_norm": 0.42354816198349,
      "learning_rate": 5.311571331615743e-06,
      "loss": 0.0148,
      "step": 1443060
    },
    {
      "epoch": 2.3616320705930103,
      "grad_norm": 0.3217412233352661,
      "learning_rate": 5.311505439402226e-06,
      "loss": 0.0151,
      "step": 1443080
    },
    {
      "epoch": 2.3616648010316634,
      "grad_norm": 0.4449852705001831,
      "learning_rate": 5.311439547188709e-06,
      "loss": 0.0093,
      "step": 1443100
    },
    {
      "epoch": 2.3616975314703166,
      "grad_norm": 0.7217046022415161,
      "learning_rate": 5.311373654975192e-06,
      "loss": 0.0167,
      "step": 1443120
    },
    {
      "epoch": 2.36173026190897,
      "grad_norm": 0.5141714811325073,
      "learning_rate": 5.311307762761675e-06,
      "loss": 0.0089,
      "step": 1443140
    },
    {
      "epoch": 2.3617629923476233,
      "grad_norm": 0.14533336460590363,
      "learning_rate": 5.311241870548157e-06,
      "loss": 0.0112,
      "step": 1443160
    },
    {
      "epoch": 2.361795722786277,
      "grad_norm": 0.8719822764396667,
      "learning_rate": 5.311175978334641e-06,
      "loss": 0.0159,
      "step": 1443180
    },
    {
      "epoch": 2.36182845322493,
      "grad_norm": 0.46812593936920166,
      "learning_rate": 5.311110086121124e-06,
      "loss": 0.0151,
      "step": 1443200
    },
    {
      "epoch": 2.361861183663583,
      "grad_norm": 0.21879301965236664,
      "learning_rate": 5.311044193907606e-06,
      "loss": 0.0167,
      "step": 1443220
    },
    {
      "epoch": 2.361893914102237,
      "grad_norm": 0.77936851978302,
      "learning_rate": 5.31097830169409e-06,
      "loss": 0.02,
      "step": 1443240
    },
    {
      "epoch": 2.36192664454089,
      "grad_norm": 0.1774132400751114,
      "learning_rate": 5.310912409480572e-06,
      "loss": 0.0146,
      "step": 1443260
    },
    {
      "epoch": 2.3619593749795436,
      "grad_norm": 0.13677161931991577,
      "learning_rate": 5.310846517267055e-06,
      "loss": 0.0126,
      "step": 1443280
    },
    {
      "epoch": 2.3619921054181967,
      "grad_norm": 0.3427702486515045,
      "learning_rate": 5.310780625053538e-06,
      "loss": 0.0248,
      "step": 1443300
    },
    {
      "epoch": 2.3620248358568503,
      "grad_norm": 1.0463157892227173,
      "learning_rate": 5.310714732840021e-06,
      "loss": 0.0199,
      "step": 1443320
    },
    {
      "epoch": 2.3620575662955035,
      "grad_norm": 0.26447129249572754,
      "learning_rate": 5.3106488406265034e-06,
      "loss": 0.0189,
      "step": 1443340
    },
    {
      "epoch": 2.3620902967341566,
      "grad_norm": 0.22799713909626007,
      "learning_rate": 5.310582948412987e-06,
      "loss": 0.0129,
      "step": 1443360
    },
    {
      "epoch": 2.36212302717281,
      "grad_norm": 0.5869590640068054,
      "learning_rate": 5.310517056199469e-06,
      "loss": 0.016,
      "step": 1443380
    },
    {
      "epoch": 2.3621557576114633,
      "grad_norm": 0.8946046233177185,
      "learning_rate": 5.3104511639859525e-06,
      "loss": 0.0154,
      "step": 1443400
    },
    {
      "epoch": 2.362188488050117,
      "grad_norm": 0.49176689982414246,
      "learning_rate": 5.310385271772434e-06,
      "loss": 0.0139,
      "step": 1443420
    },
    {
      "epoch": 2.36222121848877,
      "grad_norm": 0.439507395029068,
      "learning_rate": 5.310319379558918e-06,
      "loss": 0.0185,
      "step": 1443440
    },
    {
      "epoch": 2.3622539489274237,
      "grad_norm": 0.8604134917259216,
      "learning_rate": 5.310253487345401e-06,
      "loss": 0.0138,
      "step": 1443460
    },
    {
      "epoch": 2.362286679366077,
      "grad_norm": 0.3359435796737671,
      "learning_rate": 5.3101875951318835e-06,
      "loss": 0.0139,
      "step": 1443480
    },
    {
      "epoch": 2.36231940980473,
      "grad_norm": 0.7010706067085266,
      "learning_rate": 5.310121702918366e-06,
      "loss": 0.0192,
      "step": 1443500
    },
    {
      "epoch": 2.3623521402433836,
      "grad_norm": 0.09473221749067307,
      "learning_rate": 5.31005581070485e-06,
      "loss": 0.0183,
      "step": 1443520
    },
    {
      "epoch": 2.3623848706820367,
      "grad_norm": 1.1148816347122192,
      "learning_rate": 5.3099899184913325e-06,
      "loss": 0.0165,
      "step": 1443540
    },
    {
      "epoch": 2.3624176011206903,
      "grad_norm": 0.44352155923843384,
      "learning_rate": 5.309924026277815e-06,
      "loss": 0.014,
      "step": 1443560
    },
    {
      "epoch": 2.3624503315593435,
      "grad_norm": 0.039346277713775635,
      "learning_rate": 5.309858134064299e-06,
      "loss": 0.0102,
      "step": 1443580
    },
    {
      "epoch": 2.362483061997997,
      "grad_norm": 0.5705983638763428,
      "learning_rate": 5.309792241850781e-06,
      "loss": 0.0113,
      "step": 1443600
    },
    {
      "epoch": 2.36251579243665,
      "grad_norm": 0.8012288212776184,
      "learning_rate": 5.309726349637264e-06,
      "loss": 0.0167,
      "step": 1443620
    },
    {
      "epoch": 2.3625485228753034,
      "grad_norm": 0.2514350712299347,
      "learning_rate": 5.309660457423746e-06,
      "loss": 0.015,
      "step": 1443640
    },
    {
      "epoch": 2.362581253313957,
      "grad_norm": 0.09158216416835785,
      "learning_rate": 5.30959456521023e-06,
      "loss": 0.0074,
      "step": 1443660
    },
    {
      "epoch": 2.36261398375261,
      "grad_norm": 0.10049526393413544,
      "learning_rate": 5.3095286729967125e-06,
      "loss": 0.0098,
      "step": 1443680
    },
    {
      "epoch": 2.3626467141912637,
      "grad_norm": 0.29476192593574524,
      "learning_rate": 5.309462780783195e-06,
      "loss": 0.0112,
      "step": 1443700
    },
    {
      "epoch": 2.362679444629917,
      "grad_norm": 0.1561371237039566,
      "learning_rate": 5.309396888569678e-06,
      "loss": 0.0154,
      "step": 1443720
    },
    {
      "epoch": 2.3627121750685705,
      "grad_norm": 0.9254658818244934,
      "learning_rate": 5.309330996356162e-06,
      "loss": 0.0143,
      "step": 1443740
    },
    {
      "epoch": 2.3627449055072236,
      "grad_norm": 0.4058971405029297,
      "learning_rate": 5.3092651041426435e-06,
      "loss": 0.0143,
      "step": 1443760
    },
    {
      "epoch": 2.3627776359458768,
      "grad_norm": 0.310548335313797,
      "learning_rate": 5.309199211929127e-06,
      "loss": 0.0113,
      "step": 1443780
    },
    {
      "epoch": 2.3628103663845303,
      "grad_norm": 0.4731314778327942,
      "learning_rate": 5.309133319715609e-06,
      "loss": 0.0128,
      "step": 1443800
    },
    {
      "epoch": 2.3628430968231835,
      "grad_norm": 0.5522975325584412,
      "learning_rate": 5.3090674275020926e-06,
      "loss": 0.0233,
      "step": 1443820
    },
    {
      "epoch": 2.362875827261837,
      "grad_norm": 0.160147562623024,
      "learning_rate": 5.3090015352885745e-06,
      "loss": 0.0203,
      "step": 1443840
    },
    {
      "epoch": 2.3629085577004902,
      "grad_norm": 0.3196570873260498,
      "learning_rate": 5.308935643075058e-06,
      "loss": 0.0099,
      "step": 1443860
    },
    {
      "epoch": 2.362941288139144,
      "grad_norm": 0.5622372627258301,
      "learning_rate": 5.308869750861542e-06,
      "loss": 0.0106,
      "step": 1443880
    },
    {
      "epoch": 2.362974018577797,
      "grad_norm": 0.6701080203056335,
      "learning_rate": 5.3088038586480235e-06,
      "loss": 0.0179,
      "step": 1443900
    },
    {
      "epoch": 2.36300674901645,
      "grad_norm": 0.6406333446502686,
      "learning_rate": 5.308737966434507e-06,
      "loss": 0.0124,
      "step": 1443920
    },
    {
      "epoch": 2.3630394794551037,
      "grad_norm": 0.384746789932251,
      "learning_rate": 5.30867207422099e-06,
      "loss": 0.0115,
      "step": 1443940
    },
    {
      "epoch": 2.363072209893757,
      "grad_norm": 0.1462155282497406,
      "learning_rate": 5.3086061820074734e-06,
      "loss": 0.0099,
      "step": 1443960
    },
    {
      "epoch": 2.3631049403324105,
      "grad_norm": 0.21806982159614563,
      "learning_rate": 5.308540289793955e-06,
      "loss": 0.0195,
      "step": 1443980
    },
    {
      "epoch": 2.3631376707710636,
      "grad_norm": 0.3234845995903015,
      "learning_rate": 5.308474397580439e-06,
      "loss": 0.0183,
      "step": 1444000
    },
    {
      "epoch": 2.363170401209717,
      "grad_norm": 1.2445179224014282,
      "learning_rate": 5.308408505366921e-06,
      "loss": 0.0129,
      "step": 1444020
    },
    {
      "epoch": 2.3632031316483704,
      "grad_norm": 0.2433456927537918,
      "learning_rate": 5.308342613153404e-06,
      "loss": 0.0182,
      "step": 1444040
    },
    {
      "epoch": 2.3632358620870235,
      "grad_norm": 0.18998883664608002,
      "learning_rate": 5.308276720939886e-06,
      "loss": 0.016,
      "step": 1444060
    },
    {
      "epoch": 2.363268592525677,
      "grad_norm": 0.32627901434898376,
      "learning_rate": 5.30821082872637e-06,
      "loss": 0.0166,
      "step": 1444080
    },
    {
      "epoch": 2.3633013229643303,
      "grad_norm": 0.25638359785079956,
      "learning_rate": 5.308144936512853e-06,
      "loss": 0.016,
      "step": 1444100
    },
    {
      "epoch": 2.363334053402984,
      "grad_norm": 0.1917223334312439,
      "learning_rate": 5.308079044299335e-06,
      "loss": 0.018,
      "step": 1444120
    },
    {
      "epoch": 2.363366783841637,
      "grad_norm": 0.31403350830078125,
      "learning_rate": 5.308013152085818e-06,
      "loss": 0.0178,
      "step": 1444140
    },
    {
      "epoch": 2.3633995142802906,
      "grad_norm": 0.37300947308540344,
      "learning_rate": 5.307947259872302e-06,
      "loss": 0.0123,
      "step": 1444160
    },
    {
      "epoch": 2.3634322447189438,
      "grad_norm": 0.17117871344089508,
      "learning_rate": 5.3078813676587836e-06,
      "loss": 0.0144,
      "step": 1444180
    },
    {
      "epoch": 2.363464975157597,
      "grad_norm": 0.5880545377731323,
      "learning_rate": 5.307815475445267e-06,
      "loss": 0.0152,
      "step": 1444200
    },
    {
      "epoch": 2.3634977055962505,
      "grad_norm": 0.2853502035140991,
      "learning_rate": 5.307749583231751e-06,
      "loss": 0.0178,
      "step": 1444220
    },
    {
      "epoch": 2.3635304360349036,
      "grad_norm": 0.35869547724723816,
      "learning_rate": 5.307683691018233e-06,
      "loss": 0.0137,
      "step": 1444240
    },
    {
      "epoch": 2.3635631664735572,
      "grad_norm": 0.1904662698507309,
      "learning_rate": 5.307617798804716e-06,
      "loss": 0.0125,
      "step": 1444260
    },
    {
      "epoch": 2.3635958969122104,
      "grad_norm": 0.5003705024719238,
      "learning_rate": 5.307551906591198e-06,
      "loss": 0.0157,
      "step": 1444280
    },
    {
      "epoch": 2.363628627350864,
      "grad_norm": 0.1448785960674286,
      "learning_rate": 5.307486014377682e-06,
      "loss": 0.0169,
      "step": 1444300
    },
    {
      "epoch": 2.363661357789517,
      "grad_norm": 0.21864642202854156,
      "learning_rate": 5.3074201221641644e-06,
      "loss": 0.0121,
      "step": 1444320
    },
    {
      "epoch": 2.3636940882281703,
      "grad_norm": 0.11594262719154358,
      "learning_rate": 5.307354229950647e-06,
      "loss": 0.018,
      "step": 1444340
    },
    {
      "epoch": 2.363726818666824,
      "grad_norm": 0.29016146063804626,
      "learning_rate": 5.30728833773713e-06,
      "loss": 0.0171,
      "step": 1444360
    },
    {
      "epoch": 2.363759549105477,
      "grad_norm": 0.23016367852687836,
      "learning_rate": 5.3072224455236135e-06,
      "loss": 0.0141,
      "step": 1444380
    },
    {
      "epoch": 2.3637922795441306,
      "grad_norm": 0.32883819937705994,
      "learning_rate": 5.307156553310095e-06,
      "loss": 0.018,
      "step": 1444400
    },
    {
      "epoch": 2.3638250099827838,
      "grad_norm": 0.17476151883602142,
      "learning_rate": 5.307090661096579e-06,
      "loss": 0.0136,
      "step": 1444420
    },
    {
      "epoch": 2.3638577404214374,
      "grad_norm": 0.3326048254966736,
      "learning_rate": 5.307024768883061e-06,
      "loss": 0.0112,
      "step": 1444440
    },
    {
      "epoch": 2.3638904708600905,
      "grad_norm": 0.11962514370679855,
      "learning_rate": 5.3069588766695445e-06,
      "loss": 0.0146,
      "step": 1444460
    },
    {
      "epoch": 2.3639232012987437,
      "grad_norm": 0.40958133339881897,
      "learning_rate": 5.306892984456027e-06,
      "loss": 0.0127,
      "step": 1444480
    },
    {
      "epoch": 2.3639559317373973,
      "grad_norm": 0.5725166201591492,
      "learning_rate": 5.30682709224251e-06,
      "loss": 0.019,
      "step": 1444500
    },
    {
      "epoch": 2.3639886621760504,
      "grad_norm": 0.756947934627533,
      "learning_rate": 5.306761200028993e-06,
      "loss": 0.019,
      "step": 1444520
    },
    {
      "epoch": 2.364021392614704,
      "grad_norm": 0.4372357726097107,
      "learning_rate": 5.306695307815476e-06,
      "loss": 0.015,
      "step": 1444540
    },
    {
      "epoch": 2.364054123053357,
      "grad_norm": 0.38175496459007263,
      "learning_rate": 5.306629415601958e-06,
      "loss": 0.0141,
      "step": 1444560
    },
    {
      "epoch": 2.3640868534920103,
      "grad_norm": 0.44843021035194397,
      "learning_rate": 5.306563523388442e-06,
      "loss": 0.0133,
      "step": 1444580
    },
    {
      "epoch": 2.364119583930664,
      "grad_norm": 0.31013309955596924,
      "learning_rate": 5.306497631174925e-06,
      "loss": 0.0112,
      "step": 1444600
    },
    {
      "epoch": 2.364152314369317,
      "grad_norm": 0.36406320333480835,
      "learning_rate": 5.306431738961407e-06,
      "loss": 0.0102,
      "step": 1444620
    },
    {
      "epoch": 2.3641850448079706,
      "grad_norm": 0.20131002366542816,
      "learning_rate": 5.306365846747891e-06,
      "loss": 0.0144,
      "step": 1444640
    },
    {
      "epoch": 2.364217775246624,
      "grad_norm": 0.6564285755157471,
      "learning_rate": 5.306299954534373e-06,
      "loss": 0.016,
      "step": 1444660
    },
    {
      "epoch": 2.364250505685277,
      "grad_norm": 0.23619326949119568,
      "learning_rate": 5.306234062320856e-06,
      "loss": 0.0149,
      "step": 1444680
    },
    {
      "epoch": 2.3642832361239305,
      "grad_norm": 0.2577393352985382,
      "learning_rate": 5.306168170107339e-06,
      "loss": 0.0124,
      "step": 1444700
    },
    {
      "epoch": 2.3643159665625837,
      "grad_norm": 0.24523483216762543,
      "learning_rate": 5.306102277893822e-06,
      "loss": 0.0081,
      "step": 1444720
    },
    {
      "epoch": 2.3643486970012373,
      "grad_norm": 0.3244642913341522,
      "learning_rate": 5.3060363856803045e-06,
      "loss": 0.0133,
      "step": 1444740
    },
    {
      "epoch": 2.3643814274398904,
      "grad_norm": 0.6813167333602905,
      "learning_rate": 5.305970493466788e-06,
      "loss": 0.0123,
      "step": 1444760
    },
    {
      "epoch": 2.364414157878544,
      "grad_norm": 0.3976881504058838,
      "learning_rate": 5.30590460125327e-06,
      "loss": 0.0182,
      "step": 1444780
    },
    {
      "epoch": 2.364446888317197,
      "grad_norm": 0.6201851963996887,
      "learning_rate": 5.305838709039754e-06,
      "loss": 0.0123,
      "step": 1444800
    },
    {
      "epoch": 2.3644796187558503,
      "grad_norm": 0.23086576163768768,
      "learning_rate": 5.3057728168262355e-06,
      "loss": 0.0163,
      "step": 1444820
    },
    {
      "epoch": 2.364512349194504,
      "grad_norm": 0.41048160195350647,
      "learning_rate": 5.305706924612719e-06,
      "loss": 0.0159,
      "step": 1444840
    },
    {
      "epoch": 2.364545079633157,
      "grad_norm": 0.18391862511634827,
      "learning_rate": 5.305641032399201e-06,
      "loss": 0.0143,
      "step": 1444860
    },
    {
      "epoch": 2.3645778100718107,
      "grad_norm": 0.9930271506309509,
      "learning_rate": 5.3055751401856845e-06,
      "loss": 0.022,
      "step": 1444880
    },
    {
      "epoch": 2.364610540510464,
      "grad_norm": 0.3536524772644043,
      "learning_rate": 5.305509247972167e-06,
      "loss": 0.0171,
      "step": 1444900
    },
    {
      "epoch": 2.3646432709491174,
      "grad_norm": 0.10954121500253677,
      "learning_rate": 5.30544335575865e-06,
      "loss": 0.0144,
      "step": 1444920
    },
    {
      "epoch": 2.3646760013877706,
      "grad_norm": 0.0434897355735302,
      "learning_rate": 5.305377463545134e-06,
      "loss": 0.0133,
      "step": 1444940
    },
    {
      "epoch": 2.3647087318264237,
      "grad_norm": 0.47046172618865967,
      "learning_rate": 5.305311571331616e-06,
      "loss": 0.0119,
      "step": 1444960
    },
    {
      "epoch": 2.3647414622650773,
      "grad_norm": 0.39269760251045227,
      "learning_rate": 5.305245679118099e-06,
      "loss": 0.0159,
      "step": 1444980
    },
    {
      "epoch": 2.3647741927037305,
      "grad_norm": 0.49410009384155273,
      "learning_rate": 5.305179786904582e-06,
      "loss": 0.0186,
      "step": 1445000
    },
    {
      "epoch": 2.364806923142384,
      "grad_norm": 0.18913117051124573,
      "learning_rate": 5.305113894691065e-06,
      "loss": 0.0127,
      "step": 1445020
    },
    {
      "epoch": 2.364839653581037,
      "grad_norm": 0.6782987117767334,
      "learning_rate": 5.305048002477547e-06,
      "loss": 0.0165,
      "step": 1445040
    },
    {
      "epoch": 2.364872384019691,
      "grad_norm": 0.21487246453762054,
      "learning_rate": 5.304982110264031e-06,
      "loss": 0.0084,
      "step": 1445060
    },
    {
      "epoch": 2.364905114458344,
      "grad_norm": 0.310907244682312,
      "learning_rate": 5.304916218050513e-06,
      "loss": 0.0121,
      "step": 1445080
    },
    {
      "epoch": 2.364937844896997,
      "grad_norm": 0.6546758413314819,
      "learning_rate": 5.304850325836996e-06,
      "loss": 0.015,
      "step": 1445100
    },
    {
      "epoch": 2.3649705753356507,
      "grad_norm": 0.24698427319526672,
      "learning_rate": 5.304784433623479e-06,
      "loss": 0.0119,
      "step": 1445120
    },
    {
      "epoch": 2.365003305774304,
      "grad_norm": 0.8833578824996948,
      "learning_rate": 5.304718541409962e-06,
      "loss": 0.0183,
      "step": 1445140
    },
    {
      "epoch": 2.3650360362129574,
      "grad_norm": 0.16950808465480804,
      "learning_rate": 5.304652649196445e-06,
      "loss": 0.0126,
      "step": 1445160
    },
    {
      "epoch": 2.3650687666516106,
      "grad_norm": 0.2320297360420227,
      "learning_rate": 5.304586756982928e-06,
      "loss": 0.0177,
      "step": 1445180
    },
    {
      "epoch": 2.365101497090264,
      "grad_norm": 0.7804555296897888,
      "learning_rate": 5.30452086476941e-06,
      "loss": 0.0205,
      "step": 1445200
    },
    {
      "epoch": 2.3651342275289173,
      "grad_norm": 0.4991457462310791,
      "learning_rate": 5.304454972555894e-06,
      "loss": 0.0107,
      "step": 1445220
    },
    {
      "epoch": 2.3651669579675705,
      "grad_norm": 0.18569588661193848,
      "learning_rate": 5.3043890803423756e-06,
      "loss": 0.0133,
      "step": 1445240
    },
    {
      "epoch": 2.365199688406224,
      "grad_norm": 0.10489731281995773,
      "learning_rate": 5.304323188128859e-06,
      "loss": 0.0084,
      "step": 1445260
    },
    {
      "epoch": 2.3652324188448772,
      "grad_norm": 0.06922957301139832,
      "learning_rate": 5.304257295915343e-06,
      "loss": 0.0102,
      "step": 1445280
    },
    {
      "epoch": 2.365265149283531,
      "grad_norm": 0.12290386110544205,
      "learning_rate": 5.304191403701825e-06,
      "loss": 0.0079,
      "step": 1445300
    },
    {
      "epoch": 2.365297879722184,
      "grad_norm": 0.2412930130958557,
      "learning_rate": 5.304125511488308e-06,
      "loss": 0.008,
      "step": 1445320
    },
    {
      "epoch": 2.3653306101608376,
      "grad_norm": 0.3387594223022461,
      "learning_rate": 5.304059619274791e-06,
      "loss": 0.0176,
      "step": 1445340
    },
    {
      "epoch": 2.3653633405994907,
      "grad_norm": 0.6019109487533569,
      "learning_rate": 5.303993727061274e-06,
      "loss": 0.012,
      "step": 1445360
    },
    {
      "epoch": 2.365396071038144,
      "grad_norm": 0.3584064543247223,
      "learning_rate": 5.3039278348477564e-06,
      "loss": 0.0185,
      "step": 1445380
    },
    {
      "epoch": 2.3654288014767975,
      "grad_norm": 0.15923547744750977,
      "learning_rate": 5.30386194263424e-06,
      "loss": 0.0149,
      "step": 1445400
    },
    {
      "epoch": 2.3654615319154506,
      "grad_norm": 1.065987467765808,
      "learning_rate": 5.303796050420722e-06,
      "loss": 0.0159,
      "step": 1445420
    },
    {
      "epoch": 2.365494262354104,
      "grad_norm": 0.9368338584899902,
      "learning_rate": 5.3037301582072055e-06,
      "loss": 0.0111,
      "step": 1445440
    },
    {
      "epoch": 2.3655269927927574,
      "grad_norm": 0.4741266071796417,
      "learning_rate": 5.303664265993687e-06,
      "loss": 0.0165,
      "step": 1445460
    },
    {
      "epoch": 2.365559723231411,
      "grad_norm": 0.44940340518951416,
      "learning_rate": 5.303598373780171e-06,
      "loss": 0.0195,
      "step": 1445480
    },
    {
      "epoch": 2.365592453670064,
      "grad_norm": 0.3561619520187378,
      "learning_rate": 5.303532481566654e-06,
      "loss": 0.0132,
      "step": 1445500
    },
    {
      "epoch": 2.3656251841087172,
      "grad_norm": 0.35587263107299805,
      "learning_rate": 5.3034665893531365e-06,
      "loss": 0.0145,
      "step": 1445520
    },
    {
      "epoch": 2.365657914547371,
      "grad_norm": 0.3682631552219391,
      "learning_rate": 5.303400697139619e-06,
      "loss": 0.0092,
      "step": 1445540
    },
    {
      "epoch": 2.365690644986024,
      "grad_norm": 0.3866899013519287,
      "learning_rate": 5.303334804926103e-06,
      "loss": 0.0118,
      "step": 1445560
    },
    {
      "epoch": 2.3657233754246776,
      "grad_norm": 0.3758397698402405,
      "learning_rate": 5.303268912712585e-06,
      "loss": 0.0133,
      "step": 1445580
    },
    {
      "epoch": 2.3657561058633307,
      "grad_norm": 0.5487292408943176,
      "learning_rate": 5.303203020499068e-06,
      "loss": 0.0141,
      "step": 1445600
    },
    {
      "epoch": 2.3657888363019843,
      "grad_norm": 0.3200606405735016,
      "learning_rate": 5.30313712828555e-06,
      "loss": 0.0125,
      "step": 1445620
    },
    {
      "epoch": 2.3658215667406375,
      "grad_norm": 0.15644477307796478,
      "learning_rate": 5.303071236072034e-06,
      "loss": 0.0119,
      "step": 1445640
    },
    {
      "epoch": 2.3658542971792906,
      "grad_norm": 0.2494499385356903,
      "learning_rate": 5.303005343858517e-06,
      "loss": 0.0173,
      "step": 1445660
    },
    {
      "epoch": 2.3658870276179442,
      "grad_norm": 0.287990003824234,
      "learning_rate": 5.302939451644999e-06,
      "loss": 0.0133,
      "step": 1445680
    },
    {
      "epoch": 2.3659197580565974,
      "grad_norm": 0.7385510802268982,
      "learning_rate": 5.302873559431483e-06,
      "loss": 0.0148,
      "step": 1445700
    },
    {
      "epoch": 2.365952488495251,
      "grad_norm": 0.6574276685714722,
      "learning_rate": 5.3028076672179655e-06,
      "loss": 0.0166,
      "step": 1445720
    },
    {
      "epoch": 2.365985218933904,
      "grad_norm": 0.312379390001297,
      "learning_rate": 5.302741775004448e-06,
      "loss": 0.0168,
      "step": 1445740
    },
    {
      "epoch": 2.3660179493725577,
      "grad_norm": 0.3902225196361542,
      "learning_rate": 5.302675882790931e-06,
      "loss": 0.0135,
      "step": 1445760
    },
    {
      "epoch": 2.366050679811211,
      "grad_norm": 0.21338410675525665,
      "learning_rate": 5.302609990577415e-06,
      "loss": 0.0159,
      "step": 1445780
    },
    {
      "epoch": 2.366083410249864,
      "grad_norm": 0.4866372048854828,
      "learning_rate": 5.3025440983638965e-06,
      "loss": 0.0156,
      "step": 1445800
    },
    {
      "epoch": 2.3661161406885176,
      "grad_norm": 0.4735373258590698,
      "learning_rate": 5.30247820615038e-06,
      "loss": 0.0132,
      "step": 1445820
    },
    {
      "epoch": 2.3661488711271708,
      "grad_norm": 0.37213924527168274,
      "learning_rate": 5.302412313936862e-06,
      "loss": 0.013,
      "step": 1445840
    },
    {
      "epoch": 2.3661816015658244,
      "grad_norm": 0.16924358904361725,
      "learning_rate": 5.3023464217233456e-06,
      "loss": 0.0139,
      "step": 1445860
    },
    {
      "epoch": 2.3662143320044775,
      "grad_norm": 0.3550820052623749,
      "learning_rate": 5.3022805295098275e-06,
      "loss": 0.0097,
      "step": 1445880
    },
    {
      "epoch": 2.366247062443131,
      "grad_norm": 0.2576653063297272,
      "learning_rate": 5.302214637296311e-06,
      "loss": 0.0194,
      "step": 1445900
    },
    {
      "epoch": 2.3662797928817842,
      "grad_norm": 0.5726795792579651,
      "learning_rate": 5.302148745082794e-06,
      "loss": 0.0155,
      "step": 1445920
    },
    {
      "epoch": 2.3663125233204374,
      "grad_norm": 0.3603026866912842,
      "learning_rate": 5.3020828528692765e-06,
      "loss": 0.0252,
      "step": 1445940
    },
    {
      "epoch": 2.366345253759091,
      "grad_norm": 0.4411488473415375,
      "learning_rate": 5.302016960655759e-06,
      "loss": 0.0226,
      "step": 1445960
    },
    {
      "epoch": 2.366377984197744,
      "grad_norm": 0.2618977725505829,
      "learning_rate": 5.301951068442243e-06,
      "loss": 0.0142,
      "step": 1445980
    },
    {
      "epoch": 2.3664107146363977,
      "grad_norm": 0.12067313492298126,
      "learning_rate": 5.301885176228726e-06,
      "loss": 0.0069,
      "step": 1446000
    },
    {
      "epoch": 2.366443445075051,
      "grad_norm": 0.2765871584415436,
      "learning_rate": 5.301819284015208e-06,
      "loss": 0.0152,
      "step": 1446020
    },
    {
      "epoch": 2.3664761755137045,
      "grad_norm": 0.4428172707557678,
      "learning_rate": 5.301753391801692e-06,
      "loss": 0.0123,
      "step": 1446040
    },
    {
      "epoch": 2.3665089059523576,
      "grad_norm": 0.22311712801456451,
      "learning_rate": 5.301687499588174e-06,
      "loss": 0.02,
      "step": 1446060
    },
    {
      "epoch": 2.366541636391011,
      "grad_norm": 0.6284196972846985,
      "learning_rate": 5.301621607374657e-06,
      "loss": 0.0164,
      "step": 1446080
    },
    {
      "epoch": 2.3665743668296644,
      "grad_norm": 0.7580483555793762,
      "learning_rate": 5.301555715161139e-06,
      "loss": 0.0086,
      "step": 1446100
    },
    {
      "epoch": 2.3666070972683175,
      "grad_norm": 0.12741532921791077,
      "learning_rate": 5.301489822947623e-06,
      "loss": 0.0164,
      "step": 1446120
    },
    {
      "epoch": 2.366639827706971,
      "grad_norm": 0.8908202648162842,
      "learning_rate": 5.301423930734106e-06,
      "loss": 0.0126,
      "step": 1446140
    },
    {
      "epoch": 2.3666725581456243,
      "grad_norm": 0.40601328015327454,
      "learning_rate": 5.301358038520588e-06,
      "loss": 0.0097,
      "step": 1446160
    },
    {
      "epoch": 2.3667052885842774,
      "grad_norm": 0.23610448837280273,
      "learning_rate": 5.301292146307071e-06,
      "loss": 0.0193,
      "step": 1446180
    },
    {
      "epoch": 2.366738019022931,
      "grad_norm": 0.15708227455615997,
      "learning_rate": 5.301226254093555e-06,
      "loss": 0.0167,
      "step": 1446200
    },
    {
      "epoch": 2.366770749461584,
      "grad_norm": 0.17693351209163666,
      "learning_rate": 5.301160361880037e-06,
      "loss": 0.019,
      "step": 1446220
    },
    {
      "epoch": 2.3668034799002378,
      "grad_norm": 0.5936362147331238,
      "learning_rate": 5.30109446966652e-06,
      "loss": 0.0198,
      "step": 1446240
    },
    {
      "epoch": 2.366836210338891,
      "grad_norm": 0.3726707696914673,
      "learning_rate": 5.301028577453002e-06,
      "loss": 0.0139,
      "step": 1446260
    },
    {
      "epoch": 2.366868940777544,
      "grad_norm": 0.20728467404842377,
      "learning_rate": 5.300962685239486e-06,
      "loss": 0.0114,
      "step": 1446280
    },
    {
      "epoch": 2.3669016712161977,
      "grad_norm": 0.30324333906173706,
      "learning_rate": 5.300896793025968e-06,
      "loss": 0.0094,
      "step": 1446300
    },
    {
      "epoch": 2.366934401654851,
      "grad_norm": 0.25671544671058655,
      "learning_rate": 5.300830900812451e-06,
      "loss": 0.0141,
      "step": 1446320
    },
    {
      "epoch": 2.3669671320935044,
      "grad_norm": 0.23219537734985352,
      "learning_rate": 5.300765008598935e-06,
      "loss": 0.0155,
      "step": 1446340
    },
    {
      "epoch": 2.3669998625321576,
      "grad_norm": 0.575628936290741,
      "learning_rate": 5.3006991163854175e-06,
      "loss": 0.0172,
      "step": 1446360
    },
    {
      "epoch": 2.367032592970811,
      "grad_norm": 0.5074081420898438,
      "learning_rate": 5.3006332241719e-06,
      "loss": 0.0122,
      "step": 1446380
    },
    {
      "epoch": 2.3670653234094643,
      "grad_norm": 0.35113269090652466,
      "learning_rate": 5.300567331958383e-06,
      "loss": 0.0133,
      "step": 1446400
    },
    {
      "epoch": 2.3670980538481174,
      "grad_norm": 0.8074811697006226,
      "learning_rate": 5.3005014397448665e-06,
      "loss": 0.0153,
      "step": 1446420
    },
    {
      "epoch": 2.367130784286771,
      "grad_norm": 0.7381342053413391,
      "learning_rate": 5.300435547531348e-06,
      "loss": 0.0165,
      "step": 1446440
    },
    {
      "epoch": 2.367163514725424,
      "grad_norm": 0.6681233644485474,
      "learning_rate": 5.300369655317832e-06,
      "loss": 0.0113,
      "step": 1446460
    },
    {
      "epoch": 2.367196245164078,
      "grad_norm": 0.10189992189407349,
      "learning_rate": 5.300303763104314e-06,
      "loss": 0.0151,
      "step": 1446480
    },
    {
      "epoch": 2.367228975602731,
      "grad_norm": 0.30724745988845825,
      "learning_rate": 5.3002378708907975e-06,
      "loss": 0.0147,
      "step": 1446500
    },
    {
      "epoch": 2.3672617060413845,
      "grad_norm": 1.2179001569747925,
      "learning_rate": 5.30017197867728e-06,
      "loss": 0.0163,
      "step": 1446520
    },
    {
      "epoch": 2.3672944364800377,
      "grad_norm": 0.3743904232978821,
      "learning_rate": 5.300106086463763e-06,
      "loss": 0.0158,
      "step": 1446540
    },
    {
      "epoch": 2.367327166918691,
      "grad_norm": 0.19483685493469238,
      "learning_rate": 5.300040194250246e-06,
      "loss": 0.0166,
      "step": 1446560
    },
    {
      "epoch": 2.3673598973573444,
      "grad_norm": 0.436922162771225,
      "learning_rate": 5.299974302036729e-06,
      "loss": 0.0197,
      "step": 1446580
    },
    {
      "epoch": 2.3673926277959976,
      "grad_norm": 0.29605790972709656,
      "learning_rate": 5.299908409823211e-06,
      "loss": 0.012,
      "step": 1446600
    },
    {
      "epoch": 2.367425358234651,
      "grad_norm": 0.4043596386909485,
      "learning_rate": 5.299842517609695e-06,
      "loss": 0.0085,
      "step": 1446620
    },
    {
      "epoch": 2.3674580886733043,
      "grad_norm": 0.3307267129421234,
      "learning_rate": 5.299776625396177e-06,
      "loss": 0.0202,
      "step": 1446640
    },
    {
      "epoch": 2.367490819111958,
      "grad_norm": 0.21981248259544373,
      "learning_rate": 5.29971073318266e-06,
      "loss": 0.0097,
      "step": 1446660
    },
    {
      "epoch": 2.367523549550611,
      "grad_norm": 0.6168211102485657,
      "learning_rate": 5.299644840969142e-06,
      "loss": 0.0124,
      "step": 1446680
    },
    {
      "epoch": 2.367556279989264,
      "grad_norm": 0.3979027569293976,
      "learning_rate": 5.299578948755626e-06,
      "loss": 0.0138,
      "step": 1446700
    },
    {
      "epoch": 2.367589010427918,
      "grad_norm": 0.1754942089319229,
      "learning_rate": 5.299513056542109e-06,
      "loss": 0.0153,
      "step": 1446720
    },
    {
      "epoch": 2.367621740866571,
      "grad_norm": 0.45416608452796936,
      "learning_rate": 5.299447164328592e-06,
      "loss": 0.0184,
      "step": 1446740
    },
    {
      "epoch": 2.3676544713052246,
      "grad_norm": 0.35253971815109253,
      "learning_rate": 5.299381272115075e-06,
      "loss": 0.0168,
      "step": 1446760
    },
    {
      "epoch": 2.3676872017438777,
      "grad_norm": 0.6598080396652222,
      "learning_rate": 5.2993153799015575e-06,
      "loss": 0.0108,
      "step": 1446780
    },
    {
      "epoch": 2.3677199321825313,
      "grad_norm": 0.45836567878723145,
      "learning_rate": 5.299249487688041e-06,
      "loss": 0.0117,
      "step": 1446800
    },
    {
      "epoch": 2.3677526626211844,
      "grad_norm": 0.8252540826797485,
      "learning_rate": 5.299183595474523e-06,
      "loss": 0.016,
      "step": 1446820
    },
    {
      "epoch": 2.3677853930598376,
      "grad_norm": 0.21759259700775146,
      "learning_rate": 5.299117703261007e-06,
      "loss": 0.0169,
      "step": 1446840
    },
    {
      "epoch": 2.367818123498491,
      "grad_norm": 0.21757595241069794,
      "learning_rate": 5.2990518110474885e-06,
      "loss": 0.0128,
      "step": 1446860
    },
    {
      "epoch": 2.3678508539371443,
      "grad_norm": 1.1071128845214844,
      "learning_rate": 5.298985918833972e-06,
      "loss": 0.018,
      "step": 1446880
    },
    {
      "epoch": 2.367883584375798,
      "grad_norm": 0.2241293489933014,
      "learning_rate": 5.298920026620454e-06,
      "loss": 0.0127,
      "step": 1446900
    },
    {
      "epoch": 2.367916314814451,
      "grad_norm": 0.31275060772895813,
      "learning_rate": 5.2988541344069376e-06,
      "loss": 0.0206,
      "step": 1446920
    },
    {
      "epoch": 2.3679490452531047,
      "grad_norm": 0.25830549001693726,
      "learning_rate": 5.29878824219342e-06,
      "loss": 0.012,
      "step": 1446940
    },
    {
      "epoch": 2.367981775691758,
      "grad_norm": 0.3573019802570343,
      "learning_rate": 5.298722349979903e-06,
      "loss": 0.0137,
      "step": 1446960
    },
    {
      "epoch": 2.368014506130411,
      "grad_norm": 5.739497184753418,
      "learning_rate": 5.298656457766386e-06,
      "loss": 0.0114,
      "step": 1446980
    },
    {
      "epoch": 2.3680472365690646,
      "grad_norm": 0.29303526878356934,
      "learning_rate": 5.298590565552869e-06,
      "loss": 0.0125,
      "step": 1447000
    },
    {
      "epoch": 2.3680799670077177,
      "grad_norm": 0.8423000574111938,
      "learning_rate": 5.298524673339351e-06,
      "loss": 0.0164,
      "step": 1447020
    },
    {
      "epoch": 2.3681126974463713,
      "grad_norm": 0.1535838544368744,
      "learning_rate": 5.298458781125835e-06,
      "loss": 0.0142,
      "step": 1447040
    },
    {
      "epoch": 2.3681454278850245,
      "grad_norm": 0.5647002458572388,
      "learning_rate": 5.2983928889123184e-06,
      "loss": 0.021,
      "step": 1447060
    },
    {
      "epoch": 2.368178158323678,
      "grad_norm": 0.36478447914123535,
      "learning_rate": 5.2983269966988e-06,
      "loss": 0.0215,
      "step": 1447080
    },
    {
      "epoch": 2.368210888762331,
      "grad_norm": 0.37473759055137634,
      "learning_rate": 5.298261104485284e-06,
      "loss": 0.0147,
      "step": 1447100
    },
    {
      "epoch": 2.3682436192009844,
      "grad_norm": 0.18729227781295776,
      "learning_rate": 5.298195212271766e-06,
      "loss": 0.0136,
      "step": 1447120
    },
    {
      "epoch": 2.368276349639638,
      "grad_norm": 0.6430844664573669,
      "learning_rate": 5.298129320058249e-06,
      "loss": 0.0113,
      "step": 1447140
    },
    {
      "epoch": 2.368309080078291,
      "grad_norm": 0.3885774314403534,
      "learning_rate": 5.298063427844732e-06,
      "loss": 0.017,
      "step": 1447160
    },
    {
      "epoch": 2.3683418105169447,
      "grad_norm": 0.10129040479660034,
      "learning_rate": 5.297997535631215e-06,
      "loss": 0.0095,
      "step": 1447180
    },
    {
      "epoch": 2.368374540955598,
      "grad_norm": 0.24041174352169037,
      "learning_rate": 5.297931643417698e-06,
      "loss": 0.0194,
      "step": 1447200
    },
    {
      "epoch": 2.3684072713942514,
      "grad_norm": 0.14665904641151428,
      "learning_rate": 5.297865751204181e-06,
      "loss": 0.0145,
      "step": 1447220
    },
    {
      "epoch": 2.3684400018329046,
      "grad_norm": 0.9175610542297363,
      "learning_rate": 5.297799858990663e-06,
      "loss": 0.0123,
      "step": 1447240
    },
    {
      "epoch": 2.3684727322715577,
      "grad_norm": 0.08755450695753098,
      "learning_rate": 5.297733966777147e-06,
      "loss": 0.0158,
      "step": 1447260
    },
    {
      "epoch": 2.3685054627102113,
      "grad_norm": 0.3530834913253784,
      "learning_rate": 5.2976680745636286e-06,
      "loss": 0.0126,
      "step": 1447280
    },
    {
      "epoch": 2.3685381931488645,
      "grad_norm": 0.7282233238220215,
      "learning_rate": 5.297602182350112e-06,
      "loss": 0.02,
      "step": 1447300
    },
    {
      "epoch": 2.368570923587518,
      "grad_norm": 0.2924326956272125,
      "learning_rate": 5.297536290136595e-06,
      "loss": 0.0192,
      "step": 1447320
    },
    {
      "epoch": 2.3686036540261712,
      "grad_norm": 0.2526054382324219,
      "learning_rate": 5.297470397923078e-06,
      "loss": 0.0148,
      "step": 1447340
    },
    {
      "epoch": 2.368636384464825,
      "grad_norm": 0.16590702533721924,
      "learning_rate": 5.29740450570956e-06,
      "loss": 0.0147,
      "step": 1447360
    },
    {
      "epoch": 2.368669114903478,
      "grad_norm": 0.2504996657371521,
      "learning_rate": 5.297338613496044e-06,
      "loss": 0.0205,
      "step": 1447380
    },
    {
      "epoch": 2.368701845342131,
      "grad_norm": 0.3219740390777588,
      "learning_rate": 5.297272721282527e-06,
      "loss": 0.0114,
      "step": 1447400
    },
    {
      "epoch": 2.3687345757807847,
      "grad_norm": 0.5277187824249268,
      "learning_rate": 5.2972068290690094e-06,
      "loss": 0.015,
      "step": 1447420
    },
    {
      "epoch": 2.368767306219438,
      "grad_norm": 0.8304114937782288,
      "learning_rate": 5.297140936855493e-06,
      "loss": 0.0172,
      "step": 1447440
    },
    {
      "epoch": 2.3688000366580915,
      "grad_norm": 0.5688005685806274,
      "learning_rate": 5.297075044641975e-06,
      "loss": 0.0218,
      "step": 1447460
    },
    {
      "epoch": 2.3688327670967446,
      "grad_norm": 0.3740655183792114,
      "learning_rate": 5.2970091524284585e-06,
      "loss": 0.0136,
      "step": 1447480
    },
    {
      "epoch": 2.368865497535398,
      "grad_norm": 0.11361325532197952,
      "learning_rate": 5.29694326021494e-06,
      "loss": 0.0129,
      "step": 1447500
    },
    {
      "epoch": 2.3688982279740514,
      "grad_norm": 0.1871124505996704,
      "learning_rate": 5.296877368001424e-06,
      "loss": 0.015,
      "step": 1447520
    },
    {
      "epoch": 2.3689309584127045,
      "grad_norm": 0.2033996880054474,
      "learning_rate": 5.296811475787907e-06,
      "loss": 0.0146,
      "step": 1447540
    },
    {
      "epoch": 2.368963688851358,
      "grad_norm": 0.4029025733470917,
      "learning_rate": 5.2967455835743895e-06,
      "loss": 0.0127,
      "step": 1447560
    },
    {
      "epoch": 2.3689964192900113,
      "grad_norm": 0.43428608775138855,
      "learning_rate": 5.296679691360872e-06,
      "loss": 0.0151,
      "step": 1447580
    },
    {
      "epoch": 2.369029149728665,
      "grad_norm": 0.6610538363456726,
      "learning_rate": 5.296613799147356e-06,
      "loss": 0.0114,
      "step": 1447600
    },
    {
      "epoch": 2.369061880167318,
      "grad_norm": 0.09601687639951706,
      "learning_rate": 5.296547906933838e-06,
      "loss": 0.0167,
      "step": 1447620
    },
    {
      "epoch": 2.369094610605971,
      "grad_norm": 0.8124210834503174,
      "learning_rate": 5.296482014720321e-06,
      "loss": 0.0187,
      "step": 1447640
    },
    {
      "epoch": 2.3691273410446247,
      "grad_norm": 0.16498728096485138,
      "learning_rate": 5.296416122506803e-06,
      "loss": 0.0154,
      "step": 1447660
    },
    {
      "epoch": 2.369160071483278,
      "grad_norm": 0.4202094078063965,
      "learning_rate": 5.296350230293287e-06,
      "loss": 0.0141,
      "step": 1447680
    },
    {
      "epoch": 2.3691928019219315,
      "grad_norm": 0.18406133353710175,
      "learning_rate": 5.296284338079769e-06,
      "loss": 0.0115,
      "step": 1447700
    },
    {
      "epoch": 2.3692255323605846,
      "grad_norm": 0.26662173867225647,
      "learning_rate": 5.296218445866252e-06,
      "loss": 0.0241,
      "step": 1447720
    },
    {
      "epoch": 2.369258262799238,
      "grad_norm": 0.43924182653427124,
      "learning_rate": 5.296152553652736e-06,
      "loss": 0.0209,
      "step": 1447740
    },
    {
      "epoch": 2.3692909932378914,
      "grad_norm": 0.15415768325328827,
      "learning_rate": 5.296086661439218e-06,
      "loss": 0.011,
      "step": 1447760
    },
    {
      "epoch": 2.3693237236765445,
      "grad_norm": 0.31081509590148926,
      "learning_rate": 5.296020769225701e-06,
      "loss": 0.0121,
      "step": 1447780
    },
    {
      "epoch": 2.369356454115198,
      "grad_norm": 0.6892526149749756,
      "learning_rate": 5.295954877012184e-06,
      "loss": 0.0156,
      "step": 1447800
    },
    {
      "epoch": 2.3693891845538513,
      "grad_norm": 0.30994150042533875,
      "learning_rate": 5.295888984798668e-06,
      "loss": 0.0179,
      "step": 1447820
    },
    {
      "epoch": 2.369421914992505,
      "grad_norm": 1.1440860033035278,
      "learning_rate": 5.2958230925851495e-06,
      "loss": 0.0147,
      "step": 1447840
    },
    {
      "epoch": 2.369454645431158,
      "grad_norm": 0.34480780363082886,
      "learning_rate": 5.295757200371633e-06,
      "loss": 0.0179,
      "step": 1447860
    },
    {
      "epoch": 2.369487375869811,
      "grad_norm": 0.18695694208145142,
      "learning_rate": 5.295691308158115e-06,
      "loss": 0.0129,
      "step": 1447880
    },
    {
      "epoch": 2.3695201063084648,
      "grad_norm": 0.20788492262363434,
      "learning_rate": 5.295625415944599e-06,
      "loss": 0.013,
      "step": 1447900
    },
    {
      "epoch": 2.369552836747118,
      "grad_norm": 0.27115538716316223,
      "learning_rate": 5.2955595237310805e-06,
      "loss": 0.0151,
      "step": 1447920
    },
    {
      "epoch": 2.3695855671857715,
      "grad_norm": 0.1515563577413559,
      "learning_rate": 5.295493631517564e-06,
      "loss": 0.0135,
      "step": 1447940
    },
    {
      "epoch": 2.3696182976244247,
      "grad_norm": 0.29427605867385864,
      "learning_rate": 5.295427739304047e-06,
      "loss": 0.0121,
      "step": 1447960
    },
    {
      "epoch": 2.3696510280630783,
      "grad_norm": 0.36706575751304626,
      "learning_rate": 5.2953618470905295e-06,
      "loss": 0.0124,
      "step": 1447980
    },
    {
      "epoch": 2.3696837585017314,
      "grad_norm": 0.3534940481185913,
      "learning_rate": 5.295295954877012e-06,
      "loss": 0.0161,
      "step": 1448000
    },
    {
      "epoch": 2.3697164889403846,
      "grad_norm": 0.3338586986064911,
      "learning_rate": 5.295230062663496e-06,
      "loss": 0.0181,
      "step": 1448020
    },
    {
      "epoch": 2.369749219379038,
      "grad_norm": 0.5261082053184509,
      "learning_rate": 5.295164170449978e-06,
      "loss": 0.0112,
      "step": 1448040
    },
    {
      "epoch": 2.3697819498176913,
      "grad_norm": 0.2932789921760559,
      "learning_rate": 5.295098278236461e-06,
      "loss": 0.0132,
      "step": 1448060
    },
    {
      "epoch": 2.369814680256345,
      "grad_norm": 1.189622163772583,
      "learning_rate": 5.295032386022943e-06,
      "loss": 0.0193,
      "step": 1448080
    },
    {
      "epoch": 2.369847410694998,
      "grad_norm": 0.26123687624931335,
      "learning_rate": 5.294966493809427e-06,
      "loss": 0.0192,
      "step": 1448100
    },
    {
      "epoch": 2.3698801411336516,
      "grad_norm": 1.0890799760818481,
      "learning_rate": 5.29490060159591e-06,
      "loss": 0.0199,
      "step": 1448120
    },
    {
      "epoch": 2.369912871572305,
      "grad_norm": 0.2806815803050995,
      "learning_rate": 5.294834709382392e-06,
      "loss": 0.0141,
      "step": 1448140
    },
    {
      "epoch": 2.369945602010958,
      "grad_norm": 0.17157122492790222,
      "learning_rate": 5.294768817168876e-06,
      "loss": 0.0131,
      "step": 1448160
    },
    {
      "epoch": 2.3699783324496115,
      "grad_norm": 0.22468312084674835,
      "learning_rate": 5.294702924955359e-06,
      "loss": 0.0137,
      "step": 1448180
    },
    {
      "epoch": 2.3700110628882647,
      "grad_norm": 0.19764100015163422,
      "learning_rate": 5.294637032741841e-06,
      "loss": 0.0123,
      "step": 1448200
    },
    {
      "epoch": 2.3700437933269183,
      "grad_norm": 0.39476338028907776,
      "learning_rate": 5.294571140528324e-06,
      "loss": 0.0163,
      "step": 1448220
    },
    {
      "epoch": 2.3700765237655714,
      "grad_norm": 0.3736821711063385,
      "learning_rate": 5.294505248314808e-06,
      "loss": 0.009,
      "step": 1448240
    },
    {
      "epoch": 2.370109254204225,
      "grad_norm": 0.16000428795814514,
      "learning_rate": 5.29443935610129e-06,
      "loss": 0.0179,
      "step": 1448260
    },
    {
      "epoch": 2.370141984642878,
      "grad_norm": 0.4552931785583496,
      "learning_rate": 5.294373463887773e-06,
      "loss": 0.0121,
      "step": 1448280
    },
    {
      "epoch": 2.3701747150815313,
      "grad_norm": 0.3471736013889313,
      "learning_rate": 5.294307571674255e-06,
      "loss": 0.0175,
      "step": 1448300
    },
    {
      "epoch": 2.370207445520185,
      "grad_norm": 0.31636980175971985,
      "learning_rate": 5.294241679460739e-06,
      "loss": 0.0191,
      "step": 1448320
    },
    {
      "epoch": 2.370240175958838,
      "grad_norm": 0.9082294702529907,
      "learning_rate": 5.294175787247221e-06,
      "loss": 0.0154,
      "step": 1448340
    },
    {
      "epoch": 2.3702729063974917,
      "grad_norm": 0.4927256107330322,
      "learning_rate": 5.294109895033704e-06,
      "loss": 0.0154,
      "step": 1448360
    },
    {
      "epoch": 2.370305636836145,
      "grad_norm": 0.3797055780887604,
      "learning_rate": 5.294044002820187e-06,
      "loss": 0.0145,
      "step": 1448380
    },
    {
      "epoch": 2.3703383672747984,
      "grad_norm": 0.3499380946159363,
      "learning_rate": 5.2939781106066705e-06,
      "loss": 0.0148,
      "step": 1448400
    },
    {
      "epoch": 2.3703710977134516,
      "grad_norm": 0.3208056688308716,
      "learning_rate": 5.293912218393152e-06,
      "loss": 0.0113,
      "step": 1448420
    },
    {
      "epoch": 2.3704038281521047,
      "grad_norm": 0.29115116596221924,
      "learning_rate": 5.293846326179636e-06,
      "loss": 0.0185,
      "step": 1448440
    },
    {
      "epoch": 2.3704365585907583,
      "grad_norm": 1.0075722932815552,
      "learning_rate": 5.2937804339661195e-06,
      "loss": 0.0171,
      "step": 1448460
    },
    {
      "epoch": 2.3704692890294115,
      "grad_norm": 0.09046202152967453,
      "learning_rate": 5.293714541752601e-06,
      "loss": 0.0168,
      "step": 1448480
    },
    {
      "epoch": 2.370502019468065,
      "grad_norm": 0.3990602195262909,
      "learning_rate": 5.293648649539085e-06,
      "loss": 0.0113,
      "step": 1448500
    },
    {
      "epoch": 2.370534749906718,
      "grad_norm": 0.5092064738273621,
      "learning_rate": 5.293582757325567e-06,
      "loss": 0.0111,
      "step": 1448520
    },
    {
      "epoch": 2.370567480345372,
      "grad_norm": 0.45672836899757385,
      "learning_rate": 5.2935168651120505e-06,
      "loss": 0.0152,
      "step": 1448540
    },
    {
      "epoch": 2.370600210784025,
      "grad_norm": 0.7040685415267944,
      "learning_rate": 5.293450972898533e-06,
      "loss": 0.0198,
      "step": 1448560
    },
    {
      "epoch": 2.370632941222678,
      "grad_norm": 0.31172674894332886,
      "learning_rate": 5.293385080685016e-06,
      "loss": 0.0142,
      "step": 1448580
    },
    {
      "epoch": 2.3706656716613317,
      "grad_norm": 0.40304166078567505,
      "learning_rate": 5.293319188471499e-06,
      "loss": 0.0136,
      "step": 1448600
    },
    {
      "epoch": 2.370698402099985,
      "grad_norm": 0.3063642680644989,
      "learning_rate": 5.293253296257982e-06,
      "loss": 0.0187,
      "step": 1448620
    },
    {
      "epoch": 2.3707311325386384,
      "grad_norm": 0.43441152572631836,
      "learning_rate": 5.293187404044464e-06,
      "loss": 0.0164,
      "step": 1448640
    },
    {
      "epoch": 2.3707638629772916,
      "grad_norm": 0.1902616322040558,
      "learning_rate": 5.293121511830948e-06,
      "loss": 0.0131,
      "step": 1448660
    },
    {
      "epoch": 2.370796593415945,
      "grad_norm": 0.06516600400209427,
      "learning_rate": 5.29305561961743e-06,
      "loss": 0.0154,
      "step": 1448680
    },
    {
      "epoch": 2.3708293238545983,
      "grad_norm": 0.11508002132177353,
      "learning_rate": 5.292989727403913e-06,
      "loss": 0.0173,
      "step": 1448700
    },
    {
      "epoch": 2.3708620542932515,
      "grad_norm": 0.15612488985061646,
      "learning_rate": 5.292923835190395e-06,
      "loss": 0.0156,
      "step": 1448720
    },
    {
      "epoch": 2.370894784731905,
      "grad_norm": 0.3118704855442047,
      "learning_rate": 5.292857942976879e-06,
      "loss": 0.0106,
      "step": 1448740
    },
    {
      "epoch": 2.370927515170558,
      "grad_norm": 0.06324812024831772,
      "learning_rate": 5.2927920507633615e-06,
      "loss": 0.0169,
      "step": 1448760
    },
    {
      "epoch": 2.370960245609212,
      "grad_norm": 0.4381774067878723,
      "learning_rate": 5.292726158549844e-06,
      "loss": 0.0217,
      "step": 1448780
    },
    {
      "epoch": 2.370992976047865,
      "grad_norm": 0.16476954519748688,
      "learning_rate": 5.292660266336328e-06,
      "loss": 0.0179,
      "step": 1448800
    },
    {
      "epoch": 2.3710257064865186,
      "grad_norm": 0.22850923240184784,
      "learning_rate": 5.2925943741228105e-06,
      "loss": 0.0135,
      "step": 1448820
    },
    {
      "epoch": 2.3710584369251717,
      "grad_norm": 0.06726430356502533,
      "learning_rate": 5.292528481909293e-06,
      "loss": 0.0219,
      "step": 1448840
    },
    {
      "epoch": 2.371091167363825,
      "grad_norm": 0.35481175780296326,
      "learning_rate": 5.292462589695776e-06,
      "loss": 0.0089,
      "step": 1448860
    },
    {
      "epoch": 2.3711238978024785,
      "grad_norm": 0.3725634217262268,
      "learning_rate": 5.29239669748226e-06,
      "loss": 0.0206,
      "step": 1448880
    },
    {
      "epoch": 2.3711566282411316,
      "grad_norm": 0.45417532324790955,
      "learning_rate": 5.2923308052687415e-06,
      "loss": 0.0106,
      "step": 1448900
    },
    {
      "epoch": 2.371189358679785,
      "grad_norm": 0.1861315220594406,
      "learning_rate": 5.292264913055225e-06,
      "loss": 0.0146,
      "step": 1448920
    },
    {
      "epoch": 2.3712220891184383,
      "grad_norm": 0.6846489310264587,
      "learning_rate": 5.292199020841707e-06,
      "loss": 0.0172,
      "step": 1448940
    },
    {
      "epoch": 2.371254819557092,
      "grad_norm": 0.4176290035247803,
      "learning_rate": 5.2921331286281906e-06,
      "loss": 0.0119,
      "step": 1448960
    },
    {
      "epoch": 2.371287549995745,
      "grad_norm": 0.603672981262207,
      "learning_rate": 5.292067236414673e-06,
      "loss": 0.0099,
      "step": 1448980
    },
    {
      "epoch": 2.3713202804343982,
      "grad_norm": 0.49656838178634644,
      "learning_rate": 5.292001344201156e-06,
      "loss": 0.0202,
      "step": 1449000
    },
    {
      "epoch": 2.371353010873052,
      "grad_norm": 0.36895182728767395,
      "learning_rate": 5.291935451987639e-06,
      "loss": 0.0309,
      "step": 1449020
    },
    {
      "epoch": 2.371385741311705,
      "grad_norm": 0.41120055317878723,
      "learning_rate": 5.291869559774122e-06,
      "loss": 0.0144,
      "step": 1449040
    },
    {
      "epoch": 2.3714184717503586,
      "grad_norm": 0.16778995096683502,
      "learning_rate": 5.291803667560604e-06,
      "loss": 0.0205,
      "step": 1449060
    },
    {
      "epoch": 2.3714512021890117,
      "grad_norm": 0.9683441519737244,
      "learning_rate": 5.291737775347088e-06,
      "loss": 0.0146,
      "step": 1449080
    },
    {
      "epoch": 2.3714839326276653,
      "grad_norm": 0.27771979570388794,
      "learning_rate": 5.29167188313357e-06,
      "loss": 0.0193,
      "step": 1449100
    },
    {
      "epoch": 2.3715166630663185,
      "grad_norm": 0.5310579538345337,
      "learning_rate": 5.291605990920053e-06,
      "loss": 0.0147,
      "step": 1449120
    },
    {
      "epoch": 2.3715493935049716,
      "grad_norm": 0.41826483607292175,
      "learning_rate": 5.291540098706536e-06,
      "loss": 0.0148,
      "step": 1449140
    },
    {
      "epoch": 2.371582123943625,
      "grad_norm": 0.34566861391067505,
      "learning_rate": 5.291474206493019e-06,
      "loss": 0.0146,
      "step": 1449160
    },
    {
      "epoch": 2.3716148543822784,
      "grad_norm": 0.4790598154067993,
      "learning_rate": 5.291408314279502e-06,
      "loss": 0.0126,
      "step": 1449180
    },
    {
      "epoch": 2.371647584820932,
      "grad_norm": 0.2754947245121002,
      "learning_rate": 5.291342422065985e-06,
      "loss": 0.0093,
      "step": 1449200
    },
    {
      "epoch": 2.371680315259585,
      "grad_norm": 0.20683346688747406,
      "learning_rate": 5.291276529852468e-06,
      "loss": 0.0117,
      "step": 1449220
    },
    {
      "epoch": 2.3717130456982383,
      "grad_norm": 0.5754722356796265,
      "learning_rate": 5.291210637638951e-06,
      "loss": 0.0113,
      "step": 1449240
    },
    {
      "epoch": 2.371745776136892,
      "grad_norm": 0.1605333685874939,
      "learning_rate": 5.291144745425434e-06,
      "loss": 0.016,
      "step": 1449260
    },
    {
      "epoch": 2.371778506575545,
      "grad_norm": 0.3038696050643921,
      "learning_rate": 5.291078853211916e-06,
      "loss": 0.0138,
      "step": 1449280
    },
    {
      "epoch": 2.3718112370141986,
      "grad_norm": 1.0386159420013428,
      "learning_rate": 5.2910129609984e-06,
      "loss": 0.0148,
      "step": 1449300
    },
    {
      "epoch": 2.3718439674528518,
      "grad_norm": 0.20668238401412964,
      "learning_rate": 5.2909470687848816e-06,
      "loss": 0.0147,
      "step": 1449320
    },
    {
      "epoch": 2.371876697891505,
      "grad_norm": 0.39234238862991333,
      "learning_rate": 5.290881176571365e-06,
      "loss": 0.0173,
      "step": 1449340
    },
    {
      "epoch": 2.3719094283301585,
      "grad_norm": 0.20603984594345093,
      "learning_rate": 5.290815284357848e-06,
      "loss": 0.013,
      "step": 1449360
    },
    {
      "epoch": 2.3719421587688116,
      "grad_norm": 0.46127641201019287,
      "learning_rate": 5.290749392144331e-06,
      "loss": 0.0157,
      "step": 1449380
    },
    {
      "epoch": 2.3719748892074652,
      "grad_norm": 0.7483039498329163,
      "learning_rate": 5.290683499930813e-06,
      "loss": 0.0171,
      "step": 1449400
    },
    {
      "epoch": 2.3720076196461184,
      "grad_norm": 0.5663373470306396,
      "learning_rate": 5.290617607717297e-06,
      "loss": 0.0126,
      "step": 1449420
    },
    {
      "epoch": 2.372040350084772,
      "grad_norm": 0.3678267300128937,
      "learning_rate": 5.290551715503779e-06,
      "loss": 0.0166,
      "step": 1449440
    },
    {
      "epoch": 2.372073080523425,
      "grad_norm": 0.7292851805686951,
      "learning_rate": 5.2904858232902624e-06,
      "loss": 0.011,
      "step": 1449460
    },
    {
      "epoch": 2.3721058109620783,
      "grad_norm": 0.6400311589241028,
      "learning_rate": 5.290419931076744e-06,
      "loss": 0.0139,
      "step": 1449480
    },
    {
      "epoch": 2.372138541400732,
      "grad_norm": 0.581524133682251,
      "learning_rate": 5.290354038863228e-06,
      "loss": 0.0174,
      "step": 1449500
    },
    {
      "epoch": 2.372171271839385,
      "grad_norm": 0.1772967427968979,
      "learning_rate": 5.2902881466497115e-06,
      "loss": 0.0141,
      "step": 1449520
    },
    {
      "epoch": 2.3722040022780386,
      "grad_norm": 0.22410733997821808,
      "learning_rate": 5.290222254436193e-06,
      "loss": 0.0147,
      "step": 1449540
    },
    {
      "epoch": 2.3722367327166918,
      "grad_norm": 0.8557891845703125,
      "learning_rate": 5.290156362222677e-06,
      "loss": 0.0205,
      "step": 1449560
    },
    {
      "epoch": 2.3722694631553454,
      "grad_norm": 0.4884054958820343,
      "learning_rate": 5.29009047000916e-06,
      "loss": 0.0171,
      "step": 1449580
    },
    {
      "epoch": 2.3723021935939985,
      "grad_norm": 0.24446651339530945,
      "learning_rate": 5.2900245777956425e-06,
      "loss": 0.0121,
      "step": 1449600
    },
    {
      "epoch": 2.3723349240326517,
      "grad_norm": 0.6356275081634521,
      "learning_rate": 5.289958685582125e-06,
      "loss": 0.0153,
      "step": 1449620
    },
    {
      "epoch": 2.3723676544713053,
      "grad_norm": 0.5453349351882935,
      "learning_rate": 5.289892793368609e-06,
      "loss": 0.0157,
      "step": 1449640
    },
    {
      "epoch": 2.3724003849099584,
      "grad_norm": 0.08519745618104935,
      "learning_rate": 5.289826901155091e-06,
      "loss": 0.0188,
      "step": 1449660
    },
    {
      "epoch": 2.372433115348612,
      "grad_norm": 0.11542461067438126,
      "learning_rate": 5.289761008941574e-06,
      "loss": 0.0115,
      "step": 1449680
    },
    {
      "epoch": 2.372465845787265,
      "grad_norm": 0.47827985882759094,
      "learning_rate": 5.289695116728056e-06,
      "loss": 0.0157,
      "step": 1449700
    },
    {
      "epoch": 2.3724985762259188,
      "grad_norm": 0.08643637597560883,
      "learning_rate": 5.28962922451454e-06,
      "loss": 0.0164,
      "step": 1449720
    },
    {
      "epoch": 2.372531306664572,
      "grad_norm": 0.3943534791469574,
      "learning_rate": 5.289563332301022e-06,
      "loss": 0.0221,
      "step": 1449740
    },
    {
      "epoch": 2.372564037103225,
      "grad_norm": 0.16197657585144043,
      "learning_rate": 5.289497440087505e-06,
      "loss": 0.0128,
      "step": 1449760
    },
    {
      "epoch": 2.3725967675418786,
      "grad_norm": 0.360841304063797,
      "learning_rate": 5.289431547873988e-06,
      "loss": 0.0215,
      "step": 1449780
    },
    {
      "epoch": 2.372629497980532,
      "grad_norm": 0.16388294100761414,
      "learning_rate": 5.289365655660471e-06,
      "loss": 0.0198,
      "step": 1449800
    },
    {
      "epoch": 2.3726622284191854,
      "grad_norm": 0.49941521883010864,
      "learning_rate": 5.2892997634469535e-06,
      "loss": 0.017,
      "step": 1449820
    },
    {
      "epoch": 2.3726949588578385,
      "grad_norm": 0.4153878092765808,
      "learning_rate": 5.289233871233437e-06,
      "loss": 0.0114,
      "step": 1449840
    },
    {
      "epoch": 2.372727689296492,
      "grad_norm": 0.810775876045227,
      "learning_rate": 5.28916797901992e-06,
      "loss": 0.0163,
      "step": 1449860
    },
    {
      "epoch": 2.3727604197351453,
      "grad_norm": 0.40297162532806396,
      "learning_rate": 5.2891020868064025e-06,
      "loss": 0.0136,
      "step": 1449880
    },
    {
      "epoch": 2.3727931501737984,
      "grad_norm": 0.04564562812447548,
      "learning_rate": 5.289036194592886e-06,
      "loss": 0.0211,
      "step": 1449900
    },
    {
      "epoch": 2.372825880612452,
      "grad_norm": 0.20854654908180237,
      "learning_rate": 5.288970302379368e-06,
      "loss": 0.0143,
      "step": 1449920
    },
    {
      "epoch": 2.372858611051105,
      "grad_norm": 0.239418625831604,
      "learning_rate": 5.288904410165852e-06,
      "loss": 0.0084,
      "step": 1449940
    },
    {
      "epoch": 2.3728913414897588,
      "grad_norm": 0.17489035427570343,
      "learning_rate": 5.2888385179523335e-06,
      "loss": 0.0228,
      "step": 1449960
    },
    {
      "epoch": 2.372924071928412,
      "grad_norm": 0.6778122186660767,
      "learning_rate": 5.288772625738817e-06,
      "loss": 0.0143,
      "step": 1449980
    },
    {
      "epoch": 2.3729568023670655,
      "grad_norm": 0.12960216403007507,
      "learning_rate": 5.2887067335253e-06,
      "loss": 0.0229,
      "step": 1450000
    },
    {
      "epoch": 2.3729568023670655,
      "eval_loss": 0.008381111547350883,
      "eval_runtime": 6495.1021,
      "eval_samples_per_second": 158.251,
      "eval_steps_per_second": 15.825,
      "eval_sts-dev_pearson_cosine": 0.9806451436606358,
      "eval_sts-dev_spearman_cosine": 0.8927695058930001,
      "step": 1450000
    },
    {
      "epoch": 2.3729895328057187,
      "grad_norm": 0.40002286434173584,
      "learning_rate": 5.2886408413117825e-06,
      "loss": 0.0201,
      "step": 1450020
    },
    {
      "epoch": 2.373022263244372,
      "grad_norm": 0.29057955741882324,
      "learning_rate": 5.288574949098265e-06,
      "loss": 0.0131,
      "step": 1450040
    },
    {
      "epoch": 2.3730549936830254,
      "grad_norm": 0.2295682430267334,
      "learning_rate": 5.288509056884749e-06,
      "loss": 0.0228,
      "step": 1450060
    },
    {
      "epoch": 2.3730877241216786,
      "grad_norm": 0.3409704566001892,
      "learning_rate": 5.288443164671231e-06,
      "loss": 0.013,
      "step": 1450080
    },
    {
      "epoch": 2.373120454560332,
      "grad_norm": 0.47940269112586975,
      "learning_rate": 5.288377272457714e-06,
      "loss": 0.0195,
      "step": 1450100
    },
    {
      "epoch": 2.3731531849989853,
      "grad_norm": 0.4883198142051697,
      "learning_rate": 5.288311380244196e-06,
      "loss": 0.0128,
      "step": 1450120
    },
    {
      "epoch": 2.373185915437639,
      "grad_norm": 0.11250606924295425,
      "learning_rate": 5.28824548803068e-06,
      "loss": 0.0127,
      "step": 1450140
    },
    {
      "epoch": 2.373218645876292,
      "grad_norm": 0.7575552463531494,
      "learning_rate": 5.2881795958171626e-06,
      "loss": 0.0126,
      "step": 1450160
    },
    {
      "epoch": 2.373251376314945,
      "grad_norm": 0.1698586791753769,
      "learning_rate": 5.288113703603645e-06,
      "loss": 0.0156,
      "step": 1450180
    },
    {
      "epoch": 2.373284106753599,
      "grad_norm": 0.28280115127563477,
      "learning_rate": 5.288047811390129e-06,
      "loss": 0.0143,
      "step": 1450200
    },
    {
      "epoch": 2.373316837192252,
      "grad_norm": 0.19658644497394562,
      "learning_rate": 5.287981919176612e-06,
      "loss": 0.0128,
      "step": 1450220
    },
    {
      "epoch": 2.3733495676309055,
      "grad_norm": 0.3927169144153595,
      "learning_rate": 5.287916026963094e-06,
      "loss": 0.023,
      "step": 1450240
    },
    {
      "epoch": 2.3733822980695587,
      "grad_norm": 0.3954348564147949,
      "learning_rate": 5.287850134749577e-06,
      "loss": 0.0144,
      "step": 1450260
    },
    {
      "epoch": 2.3734150285082123,
      "grad_norm": 0.21859638392925262,
      "learning_rate": 5.287784242536061e-06,
      "loss": 0.0147,
      "step": 1450280
    },
    {
      "epoch": 2.3734477589468654,
      "grad_norm": 0.5736060738563538,
      "learning_rate": 5.287718350322543e-06,
      "loss": 0.0221,
      "step": 1450300
    },
    {
      "epoch": 2.3734804893855186,
      "grad_norm": 0.28848814964294434,
      "learning_rate": 5.287652458109026e-06,
      "loss": 0.0117,
      "step": 1450320
    },
    {
      "epoch": 2.373513219824172,
      "grad_norm": 0.48687201738357544,
      "learning_rate": 5.287586565895508e-06,
      "loss": 0.0141,
      "step": 1450340
    },
    {
      "epoch": 2.3735459502628253,
      "grad_norm": 0.4711856544017792,
      "learning_rate": 5.287520673681992e-06,
      "loss": 0.0127,
      "step": 1450360
    },
    {
      "epoch": 2.373578680701479,
      "grad_norm": 0.21259798109531403,
      "learning_rate": 5.287454781468474e-06,
      "loss": 0.0115,
      "step": 1450380
    },
    {
      "epoch": 2.373611411140132,
      "grad_norm": 0.6502245664596558,
      "learning_rate": 5.287388889254957e-06,
      "loss": 0.0132,
      "step": 1450400
    },
    {
      "epoch": 2.3736441415787857,
      "grad_norm": 0.32615089416503906,
      "learning_rate": 5.28732299704144e-06,
      "loss": 0.0172,
      "step": 1450420
    },
    {
      "epoch": 2.373676872017439,
      "grad_norm": 0.5936642289161682,
      "learning_rate": 5.2872571048279235e-06,
      "loss": 0.0125,
      "step": 1450440
    },
    {
      "epoch": 2.373709602456092,
      "grad_norm": 0.439902126789093,
      "learning_rate": 5.287191212614405e-06,
      "loss": 0.0126,
      "step": 1450460
    },
    {
      "epoch": 2.3737423328947456,
      "grad_norm": 4.695412635803223,
      "learning_rate": 5.287125320400889e-06,
      "loss": 0.0151,
      "step": 1450480
    },
    {
      "epoch": 2.3737750633333987,
      "grad_norm": 0.0946527048945427,
      "learning_rate": 5.287059428187371e-06,
      "loss": 0.0158,
      "step": 1450500
    },
    {
      "epoch": 2.3738077937720523,
      "grad_norm": 0.3758753538131714,
      "learning_rate": 5.2869935359738544e-06,
      "loss": 0.0159,
      "step": 1450520
    },
    {
      "epoch": 2.3738405242107055,
      "grad_norm": 0.2120993286371231,
      "learning_rate": 5.286927643760336e-06,
      "loss": 0.012,
      "step": 1450540
    },
    {
      "epoch": 2.373873254649359,
      "grad_norm": 0.09035301953554153,
      "learning_rate": 5.28686175154682e-06,
      "loss": 0.0173,
      "step": 1450560
    },
    {
      "epoch": 2.373905985088012,
      "grad_norm": 0.2773241400718689,
      "learning_rate": 5.2867958593333035e-06,
      "loss": 0.0195,
      "step": 1450580
    },
    {
      "epoch": 2.3739387155266654,
      "grad_norm": 0.34599167108535767,
      "learning_rate": 5.286729967119786e-06,
      "loss": 0.0129,
      "step": 1450600
    },
    {
      "epoch": 2.373971445965319,
      "grad_norm": 0.09809738397598267,
      "learning_rate": 5.286664074906269e-06,
      "loss": 0.0113,
      "step": 1450620
    },
    {
      "epoch": 2.374004176403972,
      "grad_norm": 0.7700011730194092,
      "learning_rate": 5.286598182692752e-06,
      "loss": 0.0225,
      "step": 1450640
    },
    {
      "epoch": 2.3740369068426257,
      "grad_norm": 0.212226003408432,
      "learning_rate": 5.286532290479235e-06,
      "loss": 0.0107,
      "step": 1450660
    },
    {
      "epoch": 2.374069637281279,
      "grad_norm": 0.7163049578666687,
      "learning_rate": 5.286466398265717e-06,
      "loss": 0.016,
      "step": 1450680
    },
    {
      "epoch": 2.374102367719932,
      "grad_norm": 0.16212551295757294,
      "learning_rate": 5.286400506052201e-06,
      "loss": 0.0131,
      "step": 1450700
    },
    {
      "epoch": 2.3741350981585856,
      "grad_norm": 0.6704736948013306,
      "learning_rate": 5.286334613838683e-06,
      "loss": 0.0123,
      "step": 1450720
    },
    {
      "epoch": 2.3741678285972387,
      "grad_norm": 0.5460872650146484,
      "learning_rate": 5.286268721625166e-06,
      "loss": 0.023,
      "step": 1450740
    },
    {
      "epoch": 2.3742005590358923,
      "grad_norm": 0.30035755038261414,
      "learning_rate": 5.286202829411648e-06,
      "loss": 0.0113,
      "step": 1450760
    },
    {
      "epoch": 2.3742332894745455,
      "grad_norm": 0.6043708920478821,
      "learning_rate": 5.286136937198132e-06,
      "loss": 0.0129,
      "step": 1450780
    },
    {
      "epoch": 2.3742660199131986,
      "grad_norm": 0.3267802596092224,
      "learning_rate": 5.2860710449846145e-06,
      "loss": 0.0147,
      "step": 1450800
    },
    {
      "epoch": 2.3742987503518522,
      "grad_norm": 0.15986144542694092,
      "learning_rate": 5.286005152771097e-06,
      "loss": 0.0118,
      "step": 1450820
    },
    {
      "epoch": 2.3743314807905054,
      "grad_norm": 0.22899532318115234,
      "learning_rate": 5.28593926055758e-06,
      "loss": 0.0136,
      "step": 1450840
    },
    {
      "epoch": 2.374364211229159,
      "grad_norm": 0.22097180783748627,
      "learning_rate": 5.2858733683440635e-06,
      "loss": 0.0177,
      "step": 1450860
    },
    {
      "epoch": 2.374396941667812,
      "grad_norm": 0.5605443716049194,
      "learning_rate": 5.2858074761305454e-06,
      "loss": 0.0107,
      "step": 1450880
    },
    {
      "epoch": 2.3744296721064657,
      "grad_norm": 0.17417119443416595,
      "learning_rate": 5.285741583917029e-06,
      "loss": 0.0133,
      "step": 1450900
    },
    {
      "epoch": 2.374462402545119,
      "grad_norm": 0.14941014349460602,
      "learning_rate": 5.285675691703513e-06,
      "loss": 0.0143,
      "step": 1450920
    },
    {
      "epoch": 2.374495132983772,
      "grad_norm": 0.1494830995798111,
      "learning_rate": 5.2856097994899945e-06,
      "loss": 0.0125,
      "step": 1450940
    },
    {
      "epoch": 2.3745278634224256,
      "grad_norm": 0.4047297239303589,
      "learning_rate": 5.285543907276478e-06,
      "loss": 0.0158,
      "step": 1450960
    },
    {
      "epoch": 2.3745605938610788,
      "grad_norm": 1.0680369138717651,
      "learning_rate": 5.28547801506296e-06,
      "loss": 0.0209,
      "step": 1450980
    },
    {
      "epoch": 2.3745933242997324,
      "grad_norm": 0.23786763846874237,
      "learning_rate": 5.2854121228494436e-06,
      "loss": 0.0174,
      "step": 1451000
    },
    {
      "epoch": 2.3746260547383855,
      "grad_norm": 0.05233248695731163,
      "learning_rate": 5.285346230635926e-06,
      "loss": 0.0155,
      "step": 1451020
    },
    {
      "epoch": 2.374658785177039,
      "grad_norm": 0.4756161868572235,
      "learning_rate": 5.285280338422409e-06,
      "loss": 0.0145,
      "step": 1451040
    },
    {
      "epoch": 2.3746915156156922,
      "grad_norm": 0.39668750762939453,
      "learning_rate": 5.285214446208892e-06,
      "loss": 0.0087,
      "step": 1451060
    },
    {
      "epoch": 2.3747242460543454,
      "grad_norm": 0.5454279184341431,
      "learning_rate": 5.285148553995375e-06,
      "loss": 0.0124,
      "step": 1451080
    },
    {
      "epoch": 2.374756976492999,
      "grad_norm": 0.4905884563922882,
      "learning_rate": 5.285082661781857e-06,
      "loss": 0.0179,
      "step": 1451100
    },
    {
      "epoch": 2.374789706931652,
      "grad_norm": 0.27515676617622375,
      "learning_rate": 5.285016769568341e-06,
      "loss": 0.0151,
      "step": 1451120
    },
    {
      "epoch": 2.3748224373703057,
      "grad_norm": 1.5295606851577759,
      "learning_rate": 5.284950877354823e-06,
      "loss": 0.0159,
      "step": 1451140
    },
    {
      "epoch": 2.374855167808959,
      "grad_norm": 0.7068023681640625,
      "learning_rate": 5.284884985141306e-06,
      "loss": 0.0142,
      "step": 1451160
    },
    {
      "epoch": 2.3748878982476125,
      "grad_norm": 0.14755891263484955,
      "learning_rate": 5.284819092927789e-06,
      "loss": 0.0119,
      "step": 1451180
    },
    {
      "epoch": 2.3749206286862656,
      "grad_norm": 0.24794763326644897,
      "learning_rate": 5.284753200714272e-06,
      "loss": 0.0165,
      "step": 1451200
    },
    {
      "epoch": 2.374953359124919,
      "grad_norm": 0.5498856902122498,
      "learning_rate": 5.2846873085007546e-06,
      "loss": 0.0184,
      "step": 1451220
    },
    {
      "epoch": 2.3749860895635724,
      "grad_norm": 0.5157849788665771,
      "learning_rate": 5.284621416287238e-06,
      "loss": 0.011,
      "step": 1451240
    },
    {
      "epoch": 2.3750188200022255,
      "grad_norm": 0.45994940400123596,
      "learning_rate": 5.284555524073721e-06,
      "loss": 0.0145,
      "step": 1451260
    },
    {
      "epoch": 2.375051550440879,
      "grad_norm": 0.5345550775527954,
      "learning_rate": 5.284489631860204e-06,
      "loss": 0.0204,
      "step": 1451280
    },
    {
      "epoch": 2.3750842808795323,
      "grad_norm": 0.14839807152748108,
      "learning_rate": 5.284423739646687e-06,
      "loss": 0.0135,
      "step": 1451300
    },
    {
      "epoch": 2.375117011318186,
      "grad_norm": 0.8594104647636414,
      "learning_rate": 5.284357847433169e-06,
      "loss": 0.009,
      "step": 1451320
    },
    {
      "epoch": 2.375149741756839,
      "grad_norm": 0.42262935638427734,
      "learning_rate": 5.284291955219653e-06,
      "loss": 0.0222,
      "step": 1451340
    },
    {
      "epoch": 2.375182472195492,
      "grad_norm": 0.5713136792182922,
      "learning_rate": 5.284226063006135e-06,
      "loss": 0.0174,
      "step": 1451360
    },
    {
      "epoch": 2.3752152026341458,
      "grad_norm": 0.17933733761310577,
      "learning_rate": 5.284160170792618e-06,
      "loss": 0.0154,
      "step": 1451380
    },
    {
      "epoch": 2.375247933072799,
      "grad_norm": 0.41772204637527466,
      "learning_rate": 5.284094278579101e-06,
      "loss": 0.0215,
      "step": 1451400
    },
    {
      "epoch": 2.3752806635114525,
      "grad_norm": 0.3947807252407074,
      "learning_rate": 5.284028386365584e-06,
      "loss": 0.016,
      "step": 1451420
    },
    {
      "epoch": 2.3753133939501057,
      "grad_norm": 0.14902712404727936,
      "learning_rate": 5.283962494152066e-06,
      "loss": 0.0161,
      "step": 1451440
    },
    {
      "epoch": 2.3753461243887593,
      "grad_norm": 0.15831518173217773,
      "learning_rate": 5.28389660193855e-06,
      "loss": 0.0145,
      "step": 1451460
    },
    {
      "epoch": 2.3753788548274124,
      "grad_norm": 0.5350214838981628,
      "learning_rate": 5.283830709725032e-06,
      "loss": 0.0195,
      "step": 1451480
    },
    {
      "epoch": 2.3754115852660656,
      "grad_norm": 0.1623721569776535,
      "learning_rate": 5.2837648175115155e-06,
      "loss": 0.0186,
      "step": 1451500
    },
    {
      "epoch": 2.375444315704719,
      "grad_norm": 0.4608910381793976,
      "learning_rate": 5.283698925297997e-06,
      "loss": 0.0175,
      "step": 1451520
    },
    {
      "epoch": 2.3754770461433723,
      "grad_norm": 0.12791506946086884,
      "learning_rate": 5.283633033084481e-06,
      "loss": 0.0113,
      "step": 1451540
    },
    {
      "epoch": 2.375509776582026,
      "grad_norm": 0.6687875986099243,
      "learning_rate": 5.283567140870963e-06,
      "loss": 0.0126,
      "step": 1451560
    },
    {
      "epoch": 2.375542507020679,
      "grad_norm": 0.2811712324619293,
      "learning_rate": 5.283501248657446e-06,
      "loss": 0.0093,
      "step": 1451580
    },
    {
      "epoch": 2.3755752374593326,
      "grad_norm": 0.26310789585113525,
      "learning_rate": 5.283435356443929e-06,
      "loss": 0.0124,
      "step": 1451600
    },
    {
      "epoch": 2.375607967897986,
      "grad_norm": 0.44471171498298645,
      "learning_rate": 5.283369464230412e-06,
      "loss": 0.0112,
      "step": 1451620
    },
    {
      "epoch": 2.375640698336639,
      "grad_norm": 0.5784854292869568,
      "learning_rate": 5.2833035720168955e-06,
      "loss": 0.0156,
      "step": 1451640
    },
    {
      "epoch": 2.3756734287752925,
      "grad_norm": 0.5465575456619263,
      "learning_rate": 5.283237679803378e-06,
      "loss": 0.0175,
      "step": 1451660
    },
    {
      "epoch": 2.3757061592139457,
      "grad_norm": 0.16453149914741516,
      "learning_rate": 5.283171787589861e-06,
      "loss": 0.0111,
      "step": 1451680
    },
    {
      "epoch": 2.3757388896525993,
      "grad_norm": 0.14961998164653778,
      "learning_rate": 5.283105895376344e-06,
      "loss": 0.0197,
      "step": 1451700
    },
    {
      "epoch": 2.3757716200912524,
      "grad_norm": 0.29538002610206604,
      "learning_rate": 5.283040003162827e-06,
      "loss": 0.0139,
      "step": 1451720
    },
    {
      "epoch": 2.375804350529906,
      "grad_norm": 0.3687285780906677,
      "learning_rate": 5.282974110949309e-06,
      "loss": 0.0131,
      "step": 1451740
    },
    {
      "epoch": 2.375837080968559,
      "grad_norm": 0.23220540583133698,
      "learning_rate": 5.282908218735793e-06,
      "loss": 0.0108,
      "step": 1451760
    },
    {
      "epoch": 2.3758698114072123,
      "grad_norm": 0.19157196581363678,
      "learning_rate": 5.282842326522275e-06,
      "loss": 0.0144,
      "step": 1451780
    },
    {
      "epoch": 2.375902541845866,
      "grad_norm": 0.2575458586215973,
      "learning_rate": 5.282776434308758e-06,
      "loss": 0.0213,
      "step": 1451800
    },
    {
      "epoch": 2.375935272284519,
      "grad_norm": 0.9698936343193054,
      "learning_rate": 5.282710542095241e-06,
      "loss": 0.0174,
      "step": 1451820
    },
    {
      "epoch": 2.3759680027231727,
      "grad_norm": 0.5879823565483093,
      "learning_rate": 5.282644649881724e-06,
      "loss": 0.0112,
      "step": 1451840
    },
    {
      "epoch": 2.376000733161826,
      "grad_norm": 0.5715422630310059,
      "learning_rate": 5.2825787576682065e-06,
      "loss": 0.0164,
      "step": 1451860
    },
    {
      "epoch": 2.3760334636004794,
      "grad_norm": 0.6586256623268127,
      "learning_rate": 5.28251286545469e-06,
      "loss": 0.0145,
      "step": 1451880
    },
    {
      "epoch": 2.3760661940391326,
      "grad_norm": 1.353468894958496,
      "learning_rate": 5.282446973241172e-06,
      "loss": 0.0205,
      "step": 1451900
    },
    {
      "epoch": 2.3760989244777857,
      "grad_norm": 0.23028656840324402,
      "learning_rate": 5.2823810810276555e-06,
      "loss": 0.0155,
      "step": 1451920
    },
    {
      "epoch": 2.3761316549164393,
      "grad_norm": 0.6217631697654724,
      "learning_rate": 5.282315188814137e-06,
      "loss": 0.0097,
      "step": 1451940
    },
    {
      "epoch": 2.3761643853550924,
      "grad_norm": 0.23517173528671265,
      "learning_rate": 5.282249296600621e-06,
      "loss": 0.0161,
      "step": 1451960
    },
    {
      "epoch": 2.376197115793746,
      "grad_norm": 0.2744999825954437,
      "learning_rate": 5.282183404387105e-06,
      "loss": 0.0094,
      "step": 1451980
    },
    {
      "epoch": 2.376229846232399,
      "grad_norm": 0.29804933071136475,
      "learning_rate": 5.2821175121735865e-06,
      "loss": 0.0109,
      "step": 1452000
    },
    {
      "epoch": 2.376262576671053,
      "grad_norm": 0.34363871812820435,
      "learning_rate": 5.28205161996007e-06,
      "loss": 0.0135,
      "step": 1452020
    },
    {
      "epoch": 2.376295307109706,
      "grad_norm": 0.40826115012168884,
      "learning_rate": 5.281985727746553e-06,
      "loss": 0.0133,
      "step": 1452040
    },
    {
      "epoch": 2.376328037548359,
      "grad_norm": 0.2988492548465729,
      "learning_rate": 5.2819198355330356e-06,
      "loss": 0.0096,
      "step": 1452060
    },
    {
      "epoch": 2.3763607679870127,
      "grad_norm": 0.31429246068000793,
      "learning_rate": 5.281853943319518e-06,
      "loss": 0.0166,
      "step": 1452080
    },
    {
      "epoch": 2.376393498425666,
      "grad_norm": 0.23734112083911896,
      "learning_rate": 5.281788051106002e-06,
      "loss": 0.0102,
      "step": 1452100
    },
    {
      "epoch": 2.3764262288643194,
      "grad_norm": 1.0186305046081543,
      "learning_rate": 5.281722158892484e-06,
      "loss": 0.0184,
      "step": 1452120
    },
    {
      "epoch": 2.3764589593029726,
      "grad_norm": 0.23695912957191467,
      "learning_rate": 5.281656266678967e-06,
      "loss": 0.0118,
      "step": 1452140
    },
    {
      "epoch": 2.3764916897416257,
      "grad_norm": 0.635360062122345,
      "learning_rate": 5.281590374465449e-06,
      "loss": 0.0154,
      "step": 1452160
    },
    {
      "epoch": 2.3765244201802793,
      "grad_norm": 0.44053423404693604,
      "learning_rate": 5.281524482251933e-06,
      "loss": 0.0177,
      "step": 1452180
    },
    {
      "epoch": 2.3765571506189325,
      "grad_norm": 0.5639362335205078,
      "learning_rate": 5.281458590038416e-06,
      "loss": 0.0255,
      "step": 1452200
    },
    {
      "epoch": 2.376589881057586,
      "grad_norm": 0.08688919991254807,
      "learning_rate": 5.281392697824898e-06,
      "loss": 0.0159,
      "step": 1452220
    },
    {
      "epoch": 2.376622611496239,
      "grad_norm": 0.5981556177139282,
      "learning_rate": 5.281326805611381e-06,
      "loss": 0.0176,
      "step": 1452240
    },
    {
      "epoch": 2.3766553419348924,
      "grad_norm": 0.37195542454719543,
      "learning_rate": 5.281260913397865e-06,
      "loss": 0.0155,
      "step": 1452260
    },
    {
      "epoch": 2.376688072373546,
      "grad_norm": 0.19047018885612488,
      "learning_rate": 5.2811950211843465e-06,
      "loss": 0.0112,
      "step": 1452280
    },
    {
      "epoch": 2.376720802812199,
      "grad_norm": 0.25393199920654297,
      "learning_rate": 5.28112912897083e-06,
      "loss": 0.0125,
      "step": 1452300
    },
    {
      "epoch": 2.3767535332508527,
      "grad_norm": 0.4960111081600189,
      "learning_rate": 5.281063236757314e-06,
      "loss": 0.0133,
      "step": 1452320
    },
    {
      "epoch": 2.376786263689506,
      "grad_norm": 0.2985014021396637,
      "learning_rate": 5.280997344543796e-06,
      "loss": 0.0079,
      "step": 1452340
    },
    {
      "epoch": 2.3768189941281594,
      "grad_norm": 0.30086687207221985,
      "learning_rate": 5.280931452330279e-06,
      "loss": 0.0151,
      "step": 1452360
    },
    {
      "epoch": 2.3768517245668126,
      "grad_norm": 0.1994500458240509,
      "learning_rate": 5.280865560116761e-06,
      "loss": 0.0206,
      "step": 1452380
    },
    {
      "epoch": 2.3768844550054657,
      "grad_norm": 0.7448261380195618,
      "learning_rate": 5.280799667903245e-06,
      "loss": 0.0171,
      "step": 1452400
    },
    {
      "epoch": 2.3769171854441193,
      "grad_norm": 0.27058160305023193,
      "learning_rate": 5.280733775689727e-06,
      "loss": 0.0138,
      "step": 1452420
    },
    {
      "epoch": 2.3769499158827725,
      "grad_norm": 0.3604021668434143,
      "learning_rate": 5.28066788347621e-06,
      "loss": 0.0179,
      "step": 1452440
    },
    {
      "epoch": 2.376982646321426,
      "grad_norm": 0.25446420907974243,
      "learning_rate": 5.280601991262693e-06,
      "loss": 0.0123,
      "step": 1452460
    },
    {
      "epoch": 2.3770153767600792,
      "grad_norm": 0.2150259017944336,
      "learning_rate": 5.2805360990491765e-06,
      "loss": 0.0188,
      "step": 1452480
    },
    {
      "epoch": 2.377048107198733,
      "grad_norm": 0.3239174783229828,
      "learning_rate": 5.280470206835658e-06,
      "loss": 0.0127,
      "step": 1452500
    },
    {
      "epoch": 2.377080837637386,
      "grad_norm": 0.2899289131164551,
      "learning_rate": 5.280404314622142e-06,
      "loss": 0.0128,
      "step": 1452520
    },
    {
      "epoch": 2.377113568076039,
      "grad_norm": 0.6001044511795044,
      "learning_rate": 5.280338422408624e-06,
      "loss": 0.0144,
      "step": 1452540
    },
    {
      "epoch": 2.3771462985146927,
      "grad_norm": 0.3455345630645752,
      "learning_rate": 5.2802725301951074e-06,
      "loss": 0.012,
      "step": 1452560
    },
    {
      "epoch": 2.377179028953346,
      "grad_norm": 0.6421094536781311,
      "learning_rate": 5.280206637981589e-06,
      "loss": 0.0223,
      "step": 1452580
    },
    {
      "epoch": 2.3772117593919995,
      "grad_norm": 0.1941363662481308,
      "learning_rate": 5.280140745768073e-06,
      "loss": 0.0183,
      "step": 1452600
    },
    {
      "epoch": 2.3772444898306526,
      "grad_norm": 0.27741944789886475,
      "learning_rate": 5.280074853554556e-06,
      "loss": 0.0191,
      "step": 1452620
    },
    {
      "epoch": 2.377277220269306,
      "grad_norm": 0.22313658893108368,
      "learning_rate": 5.280008961341038e-06,
      "loss": 0.0113,
      "step": 1452640
    },
    {
      "epoch": 2.3773099507079594,
      "grad_norm": 0.1953774243593216,
      "learning_rate": 5.279943069127521e-06,
      "loss": 0.0107,
      "step": 1452660
    },
    {
      "epoch": 2.3773426811466125,
      "grad_norm": 0.13575373589992523,
      "learning_rate": 5.279877176914005e-06,
      "loss": 0.0103,
      "step": 1452680
    },
    {
      "epoch": 2.377375411585266,
      "grad_norm": 0.40837496519088745,
      "learning_rate": 5.2798112847004875e-06,
      "loss": 0.012,
      "step": 1452700
    },
    {
      "epoch": 2.3774081420239193,
      "grad_norm": 0.46457797288894653,
      "learning_rate": 5.27974539248697e-06,
      "loss": 0.0111,
      "step": 1452720
    },
    {
      "epoch": 2.377440872462573,
      "grad_norm": 0.22834832966327667,
      "learning_rate": 5.279679500273454e-06,
      "loss": 0.0196,
      "step": 1452740
    },
    {
      "epoch": 2.377473602901226,
      "grad_norm": 0.823855996131897,
      "learning_rate": 5.279613608059936e-06,
      "loss": 0.0097,
      "step": 1452760
    },
    {
      "epoch": 2.3775063333398796,
      "grad_norm": 0.13103175163269043,
      "learning_rate": 5.279547715846419e-06,
      "loss": 0.0102,
      "step": 1452780
    },
    {
      "epoch": 2.3775390637785327,
      "grad_norm": 0.3942517042160034,
      "learning_rate": 5.279481823632901e-06,
      "loss": 0.0111,
      "step": 1452800
    },
    {
      "epoch": 2.377571794217186,
      "grad_norm": 0.4379238486289978,
      "learning_rate": 5.279415931419385e-06,
      "loss": 0.0198,
      "step": 1452820
    },
    {
      "epoch": 2.3776045246558395,
      "grad_norm": 0.21915458142757416,
      "learning_rate": 5.2793500392058675e-06,
      "loss": 0.0142,
      "step": 1452840
    },
    {
      "epoch": 2.3776372550944926,
      "grad_norm": 1.0242853164672852,
      "learning_rate": 5.27928414699235e-06,
      "loss": 0.0156,
      "step": 1452860
    },
    {
      "epoch": 2.3776699855331462,
      "grad_norm": 0.9000235199928284,
      "learning_rate": 5.279218254778833e-06,
      "loss": 0.0178,
      "step": 1452880
    },
    {
      "epoch": 2.3777027159717994,
      "grad_norm": 0.526763379573822,
      "learning_rate": 5.2791523625653166e-06,
      "loss": 0.0153,
      "step": 1452900
    },
    {
      "epoch": 2.377735446410453,
      "grad_norm": 0.23732388019561768,
      "learning_rate": 5.2790864703517984e-06,
      "loss": 0.0121,
      "step": 1452920
    },
    {
      "epoch": 2.377768176849106,
      "grad_norm": 0.16554099321365356,
      "learning_rate": 5.279020578138282e-06,
      "loss": 0.0124,
      "step": 1452940
    },
    {
      "epoch": 2.3778009072877593,
      "grad_norm": 0.1725054234266281,
      "learning_rate": 5.278954685924764e-06,
      "loss": 0.0241,
      "step": 1452960
    },
    {
      "epoch": 2.377833637726413,
      "grad_norm": 0.3736613392829895,
      "learning_rate": 5.2788887937112475e-06,
      "loss": 0.0137,
      "step": 1452980
    },
    {
      "epoch": 2.377866368165066,
      "grad_norm": 0.08865194022655487,
      "learning_rate": 5.27882290149773e-06,
      "loss": 0.012,
      "step": 1453000
    },
    {
      "epoch": 2.3778990986037196,
      "grad_norm": 0.3335029184818268,
      "learning_rate": 5.278757009284213e-06,
      "loss": 0.0135,
      "step": 1453020
    },
    {
      "epoch": 2.3779318290423728,
      "grad_norm": 1.044060468673706,
      "learning_rate": 5.2786911170706966e-06,
      "loss": 0.0168,
      "step": 1453040
    },
    {
      "epoch": 2.3779645594810264,
      "grad_norm": 0.3232690393924713,
      "learning_rate": 5.278625224857179e-06,
      "loss": 0.0175,
      "step": 1453060
    },
    {
      "epoch": 2.3779972899196795,
      "grad_norm": 0.3514690399169922,
      "learning_rate": 5.278559332643662e-06,
      "loss": 0.0214,
      "step": 1453080
    },
    {
      "epoch": 2.3780300203583327,
      "grad_norm": 0.6100838780403137,
      "learning_rate": 5.278493440430145e-06,
      "loss": 0.0149,
      "step": 1453100
    },
    {
      "epoch": 2.3780627507969863,
      "grad_norm": 0.2630779445171356,
      "learning_rate": 5.278427548216628e-06,
      "loss": 0.0139,
      "step": 1453120
    },
    {
      "epoch": 2.3780954812356394,
      "grad_norm": 0.1589967906475067,
      "learning_rate": 5.27836165600311e-06,
      "loss": 0.009,
      "step": 1453140
    },
    {
      "epoch": 2.378128211674293,
      "grad_norm": 0.5446125864982605,
      "learning_rate": 5.278295763789594e-06,
      "loss": 0.0131,
      "step": 1453160
    },
    {
      "epoch": 2.378160942112946,
      "grad_norm": 0.5093841552734375,
      "learning_rate": 5.278229871576076e-06,
      "loss": 0.0117,
      "step": 1453180
    },
    {
      "epoch": 2.3781936725515997,
      "grad_norm": 0.1725204437971115,
      "learning_rate": 5.278163979362559e-06,
      "loss": 0.0146,
      "step": 1453200
    },
    {
      "epoch": 2.378226402990253,
      "grad_norm": 0.30386385321617126,
      "learning_rate": 5.278098087149042e-06,
      "loss": 0.0136,
      "step": 1453220
    },
    {
      "epoch": 2.378259133428906,
      "grad_norm": 0.3521471917629242,
      "learning_rate": 5.278032194935525e-06,
      "loss": 0.0147,
      "step": 1453240
    },
    {
      "epoch": 2.3782918638675596,
      "grad_norm": 0.4057823717594147,
      "learning_rate": 5.2779663027220076e-06,
      "loss": 0.0085,
      "step": 1453260
    },
    {
      "epoch": 2.378324594306213,
      "grad_norm": 0.5950642228126526,
      "learning_rate": 5.277900410508491e-06,
      "loss": 0.0153,
      "step": 1453280
    },
    {
      "epoch": 2.3783573247448664,
      "grad_norm": 0.31684136390686035,
      "learning_rate": 5.277834518294973e-06,
      "loss": 0.0181,
      "step": 1453300
    },
    {
      "epoch": 2.3783900551835195,
      "grad_norm": 0.6115705966949463,
      "learning_rate": 5.277768626081457e-06,
      "loss": 0.0176,
      "step": 1453320
    },
    {
      "epoch": 2.378422785622173,
      "grad_norm": 0.7006986141204834,
      "learning_rate": 5.2777027338679385e-06,
      "loss": 0.0207,
      "step": 1453340
    },
    {
      "epoch": 2.3784555160608263,
      "grad_norm": 0.9611226916313171,
      "learning_rate": 5.277636841654422e-06,
      "loss": 0.0174,
      "step": 1453360
    },
    {
      "epoch": 2.3784882464994794,
      "grad_norm": 0.45097219944000244,
      "learning_rate": 5.277570949440906e-06,
      "loss": 0.0175,
      "step": 1453380
    },
    {
      "epoch": 2.378520976938133,
      "grad_norm": 0.42782217264175415,
      "learning_rate": 5.277505057227388e-06,
      "loss": 0.018,
      "step": 1453400
    },
    {
      "epoch": 2.378553707376786,
      "grad_norm": 0.1423356980085373,
      "learning_rate": 5.277439165013871e-06,
      "loss": 0.0122,
      "step": 1453420
    },
    {
      "epoch": 2.3785864378154398,
      "grad_norm": 0.366807222366333,
      "learning_rate": 5.277373272800354e-06,
      "loss": 0.014,
      "step": 1453440
    },
    {
      "epoch": 2.378619168254093,
      "grad_norm": 0.08563077449798584,
      "learning_rate": 5.277307380586837e-06,
      "loss": 0.0192,
      "step": 1453460
    },
    {
      "epoch": 2.3786518986927465,
      "grad_norm": 0.1136322170495987,
      "learning_rate": 5.277241488373319e-06,
      "loss": 0.0152,
      "step": 1453480
    },
    {
      "epoch": 2.3786846291313997,
      "grad_norm": 0.149701327085495,
      "learning_rate": 5.277175596159803e-06,
      "loss": 0.0159,
      "step": 1453500
    },
    {
      "epoch": 2.378717359570053,
      "grad_norm": 0.7461643815040588,
      "learning_rate": 5.277109703946285e-06,
      "loss": 0.0156,
      "step": 1453520
    },
    {
      "epoch": 2.3787500900087064,
      "grad_norm": 0.5367377400398254,
      "learning_rate": 5.2770438117327685e-06,
      "loss": 0.0169,
      "step": 1453540
    },
    {
      "epoch": 2.3787828204473596,
      "grad_norm": 0.29574427008628845,
      "learning_rate": 5.27697791951925e-06,
      "loss": 0.0189,
      "step": 1453560
    },
    {
      "epoch": 2.378815550886013,
      "grad_norm": 0.32088878750801086,
      "learning_rate": 5.276912027305734e-06,
      "loss": 0.0094,
      "step": 1453580
    },
    {
      "epoch": 2.3788482813246663,
      "grad_norm": 0.5455835461616516,
      "learning_rate": 5.276846135092216e-06,
      "loss": 0.0122,
      "step": 1453600
    },
    {
      "epoch": 2.37888101176332,
      "grad_norm": 0.6956517696380615,
      "learning_rate": 5.276780242878699e-06,
      "loss": 0.0151,
      "step": 1453620
    },
    {
      "epoch": 2.378913742201973,
      "grad_norm": 0.4888678193092346,
      "learning_rate": 5.276714350665182e-06,
      "loss": 0.0194,
      "step": 1453640
    },
    {
      "epoch": 2.378946472640626,
      "grad_norm": 0.591251790523529,
      "learning_rate": 5.276648458451665e-06,
      "loss": 0.0155,
      "step": 1453660
    },
    {
      "epoch": 2.37897920307928,
      "grad_norm": 1.007367491722107,
      "learning_rate": 5.276582566238148e-06,
      "loss": 0.0207,
      "step": 1453680
    },
    {
      "epoch": 2.379011933517933,
      "grad_norm": 0.13786809146404266,
      "learning_rate": 5.276516674024631e-06,
      "loss": 0.0105,
      "step": 1453700
    },
    {
      "epoch": 2.3790446639565865,
      "grad_norm": 0.24493303894996643,
      "learning_rate": 5.276450781811114e-06,
      "loss": 0.0174,
      "step": 1453720
    },
    {
      "epoch": 2.3790773943952397,
      "grad_norm": 0.5309174060821533,
      "learning_rate": 5.276384889597597e-06,
      "loss": 0.0144,
      "step": 1453740
    },
    {
      "epoch": 2.379110124833893,
      "grad_norm": 0.604326605796814,
      "learning_rate": 5.27631899738408e-06,
      "loss": 0.0202,
      "step": 1453760
    },
    {
      "epoch": 2.3791428552725464,
      "grad_norm": 0.8126505017280579,
      "learning_rate": 5.276253105170562e-06,
      "loss": 0.0189,
      "step": 1453780
    },
    {
      "epoch": 2.3791755857111996,
      "grad_norm": 0.6925646662712097,
      "learning_rate": 5.276187212957046e-06,
      "loss": 0.0121,
      "step": 1453800
    },
    {
      "epoch": 2.379208316149853,
      "grad_norm": 0.5094243288040161,
      "learning_rate": 5.276121320743528e-06,
      "loss": 0.0112,
      "step": 1453820
    },
    {
      "epoch": 2.3792410465885063,
      "grad_norm": 0.6376654505729675,
      "learning_rate": 5.276055428530011e-06,
      "loss": 0.011,
      "step": 1453840
    },
    {
      "epoch": 2.3792737770271595,
      "grad_norm": 0.30078595876693726,
      "learning_rate": 5.275989536316494e-06,
      "loss": 0.011,
      "step": 1453860
    },
    {
      "epoch": 2.379306507465813,
      "grad_norm": 1.1406792402267456,
      "learning_rate": 5.275923644102977e-06,
      "loss": 0.0183,
      "step": 1453880
    },
    {
      "epoch": 2.379339237904466,
      "grad_norm": 0.21567967534065247,
      "learning_rate": 5.2758577518894595e-06,
      "loss": 0.014,
      "step": 1453900
    },
    {
      "epoch": 2.37937196834312,
      "grad_norm": 0.33844101428985596,
      "learning_rate": 5.275791859675943e-06,
      "loss": 0.0171,
      "step": 1453920
    },
    {
      "epoch": 2.379404698781773,
      "grad_norm": 0.5148988366127014,
      "learning_rate": 5.275725967462425e-06,
      "loss": 0.0155,
      "step": 1453940
    },
    {
      "epoch": 2.3794374292204266,
      "grad_norm": 0.8351311683654785,
      "learning_rate": 5.2756600752489085e-06,
      "loss": 0.0201,
      "step": 1453960
    },
    {
      "epoch": 2.3794701596590797,
      "grad_norm": 0.38542118668556213,
      "learning_rate": 5.2755941830353904e-06,
      "loss": 0.0175,
      "step": 1453980
    },
    {
      "epoch": 2.379502890097733,
      "grad_norm": 1.9866437911987305,
      "learning_rate": 5.275528290821874e-06,
      "loss": 0.0191,
      "step": 1454000
    },
    {
      "epoch": 2.3795356205363865,
      "grad_norm": 0.34013187885284424,
      "learning_rate": 5.275462398608357e-06,
      "loss": 0.012,
      "step": 1454020
    },
    {
      "epoch": 2.3795683509750396,
      "grad_norm": 0.25813353061676025,
      "learning_rate": 5.2753965063948395e-06,
      "loss": 0.0124,
      "step": 1454040
    },
    {
      "epoch": 2.379601081413693,
      "grad_norm": 1.0659867525100708,
      "learning_rate": 5.275330614181322e-06,
      "loss": 0.0182,
      "step": 1454060
    },
    {
      "epoch": 2.3796338118523463,
      "grad_norm": 0.5213051438331604,
      "learning_rate": 5.275264721967806e-06,
      "loss": 0.0212,
      "step": 1454080
    },
    {
      "epoch": 2.379666542291,
      "grad_norm": 0.4489862024784088,
      "learning_rate": 5.2751988297542886e-06,
      "loss": 0.0193,
      "step": 1454100
    },
    {
      "epoch": 2.379699272729653,
      "grad_norm": 0.40290606021881104,
      "learning_rate": 5.275132937540771e-06,
      "loss": 0.0132,
      "step": 1454120
    },
    {
      "epoch": 2.3797320031683062,
      "grad_norm": 0.2790936827659607,
      "learning_rate": 5.275067045327255e-06,
      "loss": 0.0151,
      "step": 1454140
    },
    {
      "epoch": 2.37976473360696,
      "grad_norm": 0.6081938147544861,
      "learning_rate": 5.275001153113737e-06,
      "loss": 0.0218,
      "step": 1454160
    },
    {
      "epoch": 2.379797464045613,
      "grad_norm": 0.5103521347045898,
      "learning_rate": 5.27493526090022e-06,
      "loss": 0.0157,
      "step": 1454180
    },
    {
      "epoch": 2.3798301944842666,
      "grad_norm": 0.7237333655357361,
      "learning_rate": 5.274869368686702e-06,
      "loss": 0.0141,
      "step": 1454200
    },
    {
      "epoch": 2.3798629249229197,
      "grad_norm": 0.2785174250602722,
      "learning_rate": 5.274803476473186e-06,
      "loss": 0.0179,
      "step": 1454220
    },
    {
      "epoch": 2.3798956553615733,
      "grad_norm": 0.7862023711204529,
      "learning_rate": 5.274737584259669e-06,
      "loss": 0.0193,
      "step": 1454240
    },
    {
      "epoch": 2.3799283858002265,
      "grad_norm": 0.1359119713306427,
      "learning_rate": 5.274671692046151e-06,
      "loss": 0.0126,
      "step": 1454260
    },
    {
      "epoch": 2.3799611162388796,
      "grad_norm": 0.09033439308404922,
      "learning_rate": 5.274605799832634e-06,
      "loss": 0.0115,
      "step": 1454280
    },
    {
      "epoch": 2.379993846677533,
      "grad_norm": 0.21476058661937714,
      "learning_rate": 5.274539907619118e-06,
      "loss": 0.0163,
      "step": 1454300
    },
    {
      "epoch": 2.3800265771161864,
      "grad_norm": 0.32647451758384705,
      "learning_rate": 5.2744740154055995e-06,
      "loss": 0.0122,
      "step": 1454320
    },
    {
      "epoch": 2.38005930755484,
      "grad_norm": 0.9156243801116943,
      "learning_rate": 5.274408123192083e-06,
      "loss": 0.0101,
      "step": 1454340
    },
    {
      "epoch": 2.380092037993493,
      "grad_norm": 0.231936514377594,
      "learning_rate": 5.274342230978565e-06,
      "loss": 0.0158,
      "step": 1454360
    },
    {
      "epoch": 2.3801247684321467,
      "grad_norm": 0.5206241607666016,
      "learning_rate": 5.274276338765049e-06,
      "loss": 0.0175,
      "step": 1454380
    },
    {
      "epoch": 2.3801574988708,
      "grad_norm": 0.32474035024642944,
      "learning_rate": 5.2742104465515305e-06,
      "loss": 0.016,
      "step": 1454400
    },
    {
      "epoch": 2.380190229309453,
      "grad_norm": 0.25226348638534546,
      "learning_rate": 5.274144554338014e-06,
      "loss": 0.0127,
      "step": 1454420
    },
    {
      "epoch": 2.3802229597481066,
      "grad_norm": 0.6241364479064941,
      "learning_rate": 5.274078662124498e-06,
      "loss": 0.0139,
      "step": 1454440
    },
    {
      "epoch": 2.3802556901867598,
      "grad_norm": 0.6937259435653687,
      "learning_rate": 5.2740127699109796e-06,
      "loss": 0.0233,
      "step": 1454460
    },
    {
      "epoch": 2.3802884206254133,
      "grad_norm": 0.13700918853282928,
      "learning_rate": 5.273946877697463e-06,
      "loss": 0.0189,
      "step": 1454480
    },
    {
      "epoch": 2.3803211510640665,
      "grad_norm": 0.8904904723167419,
      "learning_rate": 5.273880985483946e-06,
      "loss": 0.0189,
      "step": 1454500
    },
    {
      "epoch": 2.38035388150272,
      "grad_norm": 0.23265551030635834,
      "learning_rate": 5.2738150932704295e-06,
      "loss": 0.0212,
      "step": 1454520
    },
    {
      "epoch": 2.3803866119413732,
      "grad_norm": 0.5387882590293884,
      "learning_rate": 5.273749201056911e-06,
      "loss": 0.0147,
      "step": 1454540
    },
    {
      "epoch": 2.3804193423800264,
      "grad_norm": 1.631811261177063,
      "learning_rate": 5.273683308843395e-06,
      "loss": 0.0208,
      "step": 1454560
    },
    {
      "epoch": 2.38045207281868,
      "grad_norm": 0.34251418709754944,
      "learning_rate": 5.273617416629877e-06,
      "loss": 0.0183,
      "step": 1454580
    },
    {
      "epoch": 2.380484803257333,
      "grad_norm": 0.23353172838687897,
      "learning_rate": 5.2735515244163604e-06,
      "loss": 0.0103,
      "step": 1454600
    },
    {
      "epoch": 2.3805175336959867,
      "grad_norm": 0.19443735480308533,
      "learning_rate": 5.273485632202842e-06,
      "loss": 0.0133,
      "step": 1454620
    },
    {
      "epoch": 2.38055026413464,
      "grad_norm": 0.5285565853118896,
      "learning_rate": 5.273419739989326e-06,
      "loss": 0.0153,
      "step": 1454640
    },
    {
      "epoch": 2.3805829945732935,
      "grad_norm": 0.1588684320449829,
      "learning_rate": 5.273353847775809e-06,
      "loss": 0.0133,
      "step": 1454660
    },
    {
      "epoch": 2.3806157250119466,
      "grad_norm": 0.7003402709960938,
      "learning_rate": 5.273287955562291e-06,
      "loss": 0.0161,
      "step": 1454680
    },
    {
      "epoch": 2.3806484554505998,
      "grad_norm": 0.23675836622714996,
      "learning_rate": 5.273222063348774e-06,
      "loss": 0.0132,
      "step": 1454700
    },
    {
      "epoch": 2.3806811858892534,
      "grad_norm": 1.0874814987182617,
      "learning_rate": 5.273156171135258e-06,
      "loss": 0.0111,
      "step": 1454720
    },
    {
      "epoch": 2.3807139163279065,
      "grad_norm": 0.43322280049324036,
      "learning_rate": 5.27309027892174e-06,
      "loss": 0.0151,
      "step": 1454740
    },
    {
      "epoch": 2.38074664676656,
      "grad_norm": 0.6573086977005005,
      "learning_rate": 5.273024386708223e-06,
      "loss": 0.0133,
      "step": 1454760
    },
    {
      "epoch": 2.3807793772052133,
      "grad_norm": 1.1376147270202637,
      "learning_rate": 5.272958494494707e-06,
      "loss": 0.0127,
      "step": 1454780
    },
    {
      "epoch": 2.380812107643867,
      "grad_norm": 0.8569564819335938,
      "learning_rate": 5.272892602281189e-06,
      "loss": 0.0164,
      "step": 1454800
    },
    {
      "epoch": 2.38084483808252,
      "grad_norm": 0.31496524810791016,
      "learning_rate": 5.272826710067672e-06,
      "loss": 0.0125,
      "step": 1454820
    },
    {
      "epoch": 2.380877568521173,
      "grad_norm": 0.19272805750370026,
      "learning_rate": 5.272760817854154e-06,
      "loss": 0.0155,
      "step": 1454840
    },
    {
      "epoch": 2.3809102989598268,
      "grad_norm": 1.2373661994934082,
      "learning_rate": 5.272694925640638e-06,
      "loss": 0.0135,
      "step": 1454860
    },
    {
      "epoch": 2.38094302939848,
      "grad_norm": 0.7729597091674805,
      "learning_rate": 5.2726290334271205e-06,
      "loss": 0.0084,
      "step": 1454880
    },
    {
      "epoch": 2.3809757598371335,
      "grad_norm": 0.12434788793325424,
      "learning_rate": 5.272563141213603e-06,
      "loss": 0.0131,
      "step": 1454900
    },
    {
      "epoch": 2.3810084902757866,
      "grad_norm": 0.5491542816162109,
      "learning_rate": 5.272497249000086e-06,
      "loss": 0.0181,
      "step": 1454920
    },
    {
      "epoch": 2.3810412207144402,
      "grad_norm": 0.43207651376724243,
      "learning_rate": 5.2724313567865696e-06,
      "loss": 0.0191,
      "step": 1454940
    },
    {
      "epoch": 2.3810739511530934,
      "grad_norm": 0.9532845616340637,
      "learning_rate": 5.2723654645730515e-06,
      "loss": 0.0149,
      "step": 1454960
    },
    {
      "epoch": 2.3811066815917465,
      "grad_norm": 0.8821338415145874,
      "learning_rate": 5.272299572359535e-06,
      "loss": 0.017,
      "step": 1454980
    },
    {
      "epoch": 2.3811394120304,
      "grad_norm": 0.2217513769865036,
      "learning_rate": 5.272233680146017e-06,
      "loss": 0.0198,
      "step": 1455000
    },
    {
      "epoch": 2.3811721424690533,
      "grad_norm": 0.4749930799007416,
      "learning_rate": 5.2721677879325005e-06,
      "loss": 0.0145,
      "step": 1455020
    },
    {
      "epoch": 2.381204872907707,
      "grad_norm": 0.44269710779190063,
      "learning_rate": 5.272101895718983e-06,
      "loss": 0.0113,
      "step": 1455040
    },
    {
      "epoch": 2.38123760334636,
      "grad_norm": 0.10163715481758118,
      "learning_rate": 5.272036003505466e-06,
      "loss": 0.0098,
      "step": 1455060
    },
    {
      "epoch": 2.3812703337850136,
      "grad_norm": 0.3892142176628113,
      "learning_rate": 5.271970111291949e-06,
      "loss": 0.0177,
      "step": 1455080
    },
    {
      "epoch": 2.3813030642236668,
      "grad_norm": 0.1806184947490692,
      "learning_rate": 5.271904219078432e-06,
      "loss": 0.0137,
      "step": 1455100
    },
    {
      "epoch": 2.38133579466232,
      "grad_norm": 0.848456859588623,
      "learning_rate": 5.271838326864914e-06,
      "loss": 0.0217,
      "step": 1455120
    },
    {
      "epoch": 2.3813685251009735,
      "grad_norm": 0.23609966039657593,
      "learning_rate": 5.271772434651398e-06,
      "loss": 0.0123,
      "step": 1455140
    },
    {
      "epoch": 2.3814012555396267,
      "grad_norm": 0.3982012867927551,
      "learning_rate": 5.271706542437881e-06,
      "loss": 0.0165,
      "step": 1455160
    },
    {
      "epoch": 2.3814339859782803,
      "grad_norm": 0.2606818377971649,
      "learning_rate": 5.271640650224363e-06,
      "loss": 0.0143,
      "step": 1455180
    },
    {
      "epoch": 2.3814667164169334,
      "grad_norm": 0.2152431160211563,
      "learning_rate": 5.271574758010847e-06,
      "loss": 0.0219,
      "step": 1455200
    },
    {
      "epoch": 2.3814994468555866,
      "grad_norm": 0.5604333877563477,
      "learning_rate": 5.271508865797329e-06,
      "loss": 0.0189,
      "step": 1455220
    },
    {
      "epoch": 2.38153217729424,
      "grad_norm": 0.2089935839176178,
      "learning_rate": 5.271442973583812e-06,
      "loss": 0.0138,
      "step": 1455240
    },
    {
      "epoch": 2.3815649077328933,
      "grad_norm": 0.3294459581375122,
      "learning_rate": 5.271377081370295e-06,
      "loss": 0.0155,
      "step": 1455260
    },
    {
      "epoch": 2.381597638171547,
      "grad_norm": 0.3118021786212921,
      "learning_rate": 5.271311189156778e-06,
      "loss": 0.0121,
      "step": 1455280
    },
    {
      "epoch": 2.3816303686102,
      "grad_norm": 0.39581847190856934,
      "learning_rate": 5.2712452969432606e-06,
      "loss": 0.026,
      "step": 1455300
    },
    {
      "epoch": 2.381663099048853,
      "grad_norm": 0.17656847834587097,
      "learning_rate": 5.271179404729744e-06,
      "loss": 0.017,
      "step": 1455320
    },
    {
      "epoch": 2.381695829487507,
      "grad_norm": 1.6348445415496826,
      "learning_rate": 5.271113512516226e-06,
      "loss": 0.0173,
      "step": 1455340
    },
    {
      "epoch": 2.38172855992616,
      "grad_norm": 0.3061341643333435,
      "learning_rate": 5.27104762030271e-06,
      "loss": 0.0156,
      "step": 1455360
    },
    {
      "epoch": 2.3817612903648135,
      "grad_norm": 0.5800697803497314,
      "learning_rate": 5.2709817280891915e-06,
      "loss": 0.0236,
      "step": 1455380
    },
    {
      "epoch": 2.3817940208034667,
      "grad_norm": 0.2512061893939972,
      "learning_rate": 5.270915835875675e-06,
      "loss": 0.0196,
      "step": 1455400
    },
    {
      "epoch": 2.3818267512421203,
      "grad_norm": 0.175101175904274,
      "learning_rate": 5.270849943662157e-06,
      "loss": 0.0159,
      "step": 1455420
    },
    {
      "epoch": 2.3818594816807734,
      "grad_norm": 0.1332758516073227,
      "learning_rate": 5.270784051448641e-06,
      "loss": 0.0139,
      "step": 1455440
    },
    {
      "epoch": 2.3818922121194266,
      "grad_norm": 0.35979026556015015,
      "learning_rate": 5.270718159235123e-06,
      "loss": 0.0178,
      "step": 1455460
    },
    {
      "epoch": 2.38192494255808,
      "grad_norm": 0.3965424597263336,
      "learning_rate": 5.270652267021606e-06,
      "loss": 0.0151,
      "step": 1455480
    },
    {
      "epoch": 2.3819576729967333,
      "grad_norm": 0.19969089329242706,
      "learning_rate": 5.27058637480809e-06,
      "loss": 0.0142,
      "step": 1455500
    },
    {
      "epoch": 2.381990403435387,
      "grad_norm": 0.5223872065544128,
      "learning_rate": 5.270520482594572e-06,
      "loss": 0.0185,
      "step": 1455520
    },
    {
      "epoch": 2.38202313387404,
      "grad_norm": 0.09182092547416687,
      "learning_rate": 5.270454590381055e-06,
      "loss": 0.0115,
      "step": 1455540
    },
    {
      "epoch": 2.3820558643126937,
      "grad_norm": 0.558861255645752,
      "learning_rate": 5.270388698167538e-06,
      "loss": 0.0115,
      "step": 1455560
    },
    {
      "epoch": 2.382088594751347,
      "grad_norm": 0.7324596643447876,
      "learning_rate": 5.2703228059540215e-06,
      "loss": 0.0133,
      "step": 1455580
    },
    {
      "epoch": 2.38212132519,
      "grad_norm": 0.42735499143600464,
      "learning_rate": 5.270256913740503e-06,
      "loss": 0.021,
      "step": 1455600
    },
    {
      "epoch": 2.3821540556286536,
      "grad_norm": 0.41875651478767395,
      "learning_rate": 5.270191021526987e-06,
      "loss": 0.0173,
      "step": 1455620
    },
    {
      "epoch": 2.3821867860673067,
      "grad_norm": 0.11101946979761124,
      "learning_rate": 5.270125129313469e-06,
      "loss": 0.0182,
      "step": 1455640
    },
    {
      "epoch": 2.3822195165059603,
      "grad_norm": 0.2188558578491211,
      "learning_rate": 5.270059237099952e-06,
      "loss": 0.0135,
      "step": 1455660
    },
    {
      "epoch": 2.3822522469446135,
      "grad_norm": 0.22155214846134186,
      "learning_rate": 5.269993344886435e-06,
      "loss": 0.0224,
      "step": 1455680
    },
    {
      "epoch": 2.382284977383267,
      "grad_norm": 0.11212152242660522,
      "learning_rate": 5.269927452672918e-06,
      "loss": 0.0098,
      "step": 1455700
    },
    {
      "epoch": 2.38231770782192,
      "grad_norm": 0.4146164357662201,
      "learning_rate": 5.269861560459401e-06,
      "loss": 0.0151,
      "step": 1455720
    },
    {
      "epoch": 2.3823504382605734,
      "grad_norm": 0.5807972550392151,
      "learning_rate": 5.269795668245884e-06,
      "loss": 0.0171,
      "step": 1455740
    },
    {
      "epoch": 2.382383168699227,
      "grad_norm": 1.290104627609253,
      "learning_rate": 5.269729776032366e-06,
      "loss": 0.0243,
      "step": 1455760
    },
    {
      "epoch": 2.38241589913788,
      "grad_norm": 0.17539405822753906,
      "learning_rate": 5.26966388381885e-06,
      "loss": 0.0089,
      "step": 1455780
    },
    {
      "epoch": 2.3824486295765337,
      "grad_norm": 0.1736474633216858,
      "learning_rate": 5.269597991605332e-06,
      "loss": 0.0199,
      "step": 1455800
    },
    {
      "epoch": 2.382481360015187,
      "grad_norm": 0.6507100462913513,
      "learning_rate": 5.269532099391815e-06,
      "loss": 0.0094,
      "step": 1455820
    },
    {
      "epoch": 2.3825140904538404,
      "grad_norm": 0.8866470456123352,
      "learning_rate": 5.269466207178299e-06,
      "loss": 0.0161,
      "step": 1455840
    },
    {
      "epoch": 2.3825468208924936,
      "grad_norm": 0.18026375770568848,
      "learning_rate": 5.269400314964781e-06,
      "loss": 0.0106,
      "step": 1455860
    },
    {
      "epoch": 2.3825795513311467,
      "grad_norm": 0.4032134711742401,
      "learning_rate": 5.269334422751264e-06,
      "loss": 0.0179,
      "step": 1455880
    },
    {
      "epoch": 2.3826122817698003,
      "grad_norm": 0.493408203125,
      "learning_rate": 5.269268530537747e-06,
      "loss": 0.0234,
      "step": 1455900
    },
    {
      "epoch": 2.3826450122084535,
      "grad_norm": 0.11410007625818253,
      "learning_rate": 5.26920263832423e-06,
      "loss": 0.012,
      "step": 1455920
    },
    {
      "epoch": 2.382677742647107,
      "grad_norm": 0.20329244434833527,
      "learning_rate": 5.2691367461107125e-06,
      "loss": 0.0181,
      "step": 1455940
    },
    {
      "epoch": 2.3827104730857602,
      "grad_norm": 0.2940628230571747,
      "learning_rate": 5.269070853897196e-06,
      "loss": 0.0159,
      "step": 1455960
    },
    {
      "epoch": 2.382743203524414,
      "grad_norm": 0.4389113485813141,
      "learning_rate": 5.269004961683678e-06,
      "loss": 0.0154,
      "step": 1455980
    },
    {
      "epoch": 2.382775933963067,
      "grad_norm": 0.7000201940536499,
      "learning_rate": 5.2689390694701615e-06,
      "loss": 0.0165,
      "step": 1456000
    },
    {
      "epoch": 2.38280866440172,
      "grad_norm": 0.18606919050216675,
      "learning_rate": 5.2688731772566434e-06,
      "loss": 0.0115,
      "step": 1456020
    },
    {
      "epoch": 2.3828413948403737,
      "grad_norm": 0.377334326505661,
      "learning_rate": 5.268807285043127e-06,
      "loss": 0.0171,
      "step": 1456040
    },
    {
      "epoch": 2.382874125279027,
      "grad_norm": 0.1648380011320114,
      "learning_rate": 5.26874139282961e-06,
      "loss": 0.0153,
      "step": 1456060
    },
    {
      "epoch": 2.3829068557176805,
      "grad_norm": 0.6075953245162964,
      "learning_rate": 5.2686755006160925e-06,
      "loss": 0.0127,
      "step": 1456080
    },
    {
      "epoch": 2.3829395861563336,
      "grad_norm": 0.3318106234073639,
      "learning_rate": 5.268609608402575e-06,
      "loss": 0.0155,
      "step": 1456100
    },
    {
      "epoch": 2.382972316594987,
      "grad_norm": 0.19668573141098022,
      "learning_rate": 5.268543716189059e-06,
      "loss": 0.0094,
      "step": 1456120
    },
    {
      "epoch": 2.3830050470336404,
      "grad_norm": 0.9545766711235046,
      "learning_rate": 5.268477823975541e-06,
      "loss": 0.0157,
      "step": 1456140
    },
    {
      "epoch": 2.3830377774722935,
      "grad_norm": 0.3890950381755829,
      "learning_rate": 5.268411931762024e-06,
      "loss": 0.0151,
      "step": 1456160
    },
    {
      "epoch": 2.383070507910947,
      "grad_norm": 0.6513451933860779,
      "learning_rate": 5.268346039548506e-06,
      "loss": 0.0245,
      "step": 1456180
    },
    {
      "epoch": 2.3831032383496003,
      "grad_norm": 0.6055077314376831,
      "learning_rate": 5.26828014733499e-06,
      "loss": 0.0229,
      "step": 1456200
    },
    {
      "epoch": 2.383135968788254,
      "grad_norm": 0.31562915444374084,
      "learning_rate": 5.268214255121473e-06,
      "loss": 0.0195,
      "step": 1456220
    },
    {
      "epoch": 2.383168699226907,
      "grad_norm": 0.6324290037155151,
      "learning_rate": 5.268148362907955e-06,
      "loss": 0.0193,
      "step": 1456240
    },
    {
      "epoch": 2.3832014296655606,
      "grad_norm": 0.22823646664619446,
      "learning_rate": 5.268082470694439e-06,
      "loss": 0.02,
      "step": 1456260
    },
    {
      "epoch": 2.3832341601042137,
      "grad_norm": 0.20506364107131958,
      "learning_rate": 5.268016578480922e-06,
      "loss": 0.0127,
      "step": 1456280
    },
    {
      "epoch": 2.383266890542867,
      "grad_norm": 0.32585451006889343,
      "learning_rate": 5.267950686267404e-06,
      "loss": 0.0151,
      "step": 1456300
    },
    {
      "epoch": 2.3832996209815205,
      "grad_norm": 0.1394023299217224,
      "learning_rate": 5.267884794053887e-06,
      "loss": 0.0148,
      "step": 1456320
    },
    {
      "epoch": 2.3833323514201736,
      "grad_norm": 0.43553951382637024,
      "learning_rate": 5.267818901840371e-06,
      "loss": 0.0189,
      "step": 1456340
    },
    {
      "epoch": 2.3833650818588272,
      "grad_norm": 0.098970927298069,
      "learning_rate": 5.2677530096268525e-06,
      "loss": 0.0173,
      "step": 1456360
    },
    {
      "epoch": 2.3833978122974804,
      "grad_norm": 0.24048644304275513,
      "learning_rate": 5.267687117413336e-06,
      "loss": 0.0146,
      "step": 1456380
    },
    {
      "epoch": 2.383430542736134,
      "grad_norm": 0.07357057929039001,
      "learning_rate": 5.267621225199818e-06,
      "loss": 0.0154,
      "step": 1456400
    },
    {
      "epoch": 2.383463273174787,
      "grad_norm": 0.34489622712135315,
      "learning_rate": 5.267555332986302e-06,
      "loss": 0.0173,
      "step": 1456420
    },
    {
      "epoch": 2.3834960036134403,
      "grad_norm": 1.2875252962112427,
      "learning_rate": 5.2674894407727835e-06,
      "loss": 0.0198,
      "step": 1456440
    },
    {
      "epoch": 2.383528734052094,
      "grad_norm": 0.3074425756931305,
      "learning_rate": 5.267423548559267e-06,
      "loss": 0.0089,
      "step": 1456460
    },
    {
      "epoch": 2.383561464490747,
      "grad_norm": 0.6887904405593872,
      "learning_rate": 5.26735765634575e-06,
      "loss": 0.0156,
      "step": 1456480
    },
    {
      "epoch": 2.3835941949294006,
      "grad_norm": 0.44636350870132446,
      "learning_rate": 5.2672917641322326e-06,
      "loss": 0.0159,
      "step": 1456500
    },
    {
      "epoch": 2.3836269253680538,
      "grad_norm": 0.24071398377418518,
      "learning_rate": 5.267225871918715e-06,
      "loss": 0.0179,
      "step": 1456520
    },
    {
      "epoch": 2.3836596558067074,
      "grad_norm": 0.62568598985672,
      "learning_rate": 5.267159979705199e-06,
      "loss": 0.0196,
      "step": 1456540
    },
    {
      "epoch": 2.3836923862453605,
      "grad_norm": 0.3978894054889679,
      "learning_rate": 5.267094087491682e-06,
      "loss": 0.0228,
      "step": 1456560
    },
    {
      "epoch": 2.3837251166840137,
      "grad_norm": 0.23609106242656708,
      "learning_rate": 5.267028195278164e-06,
      "loss": 0.0143,
      "step": 1456580
    },
    {
      "epoch": 2.3837578471226673,
      "grad_norm": 0.3592812418937683,
      "learning_rate": 5.266962303064648e-06,
      "loss": 0.0139,
      "step": 1456600
    },
    {
      "epoch": 2.3837905775613204,
      "grad_norm": 0.12349279224872589,
      "learning_rate": 5.26689641085113e-06,
      "loss": 0.0147,
      "step": 1456620
    },
    {
      "epoch": 2.383823307999974,
      "grad_norm": 0.4136436879634857,
      "learning_rate": 5.2668305186376134e-06,
      "loss": 0.0197,
      "step": 1456640
    },
    {
      "epoch": 2.383856038438627,
      "grad_norm": 0.27839383482933044,
      "learning_rate": 5.266764626424095e-06,
      "loss": 0.0081,
      "step": 1456660
    },
    {
      "epoch": 2.3838887688772807,
      "grad_norm": 0.7176029086112976,
      "learning_rate": 5.266698734210579e-06,
      "loss": 0.0216,
      "step": 1456680
    },
    {
      "epoch": 2.383921499315934,
      "grad_norm": 0.5570200085639954,
      "learning_rate": 5.266632841997062e-06,
      "loss": 0.0157,
      "step": 1456700
    },
    {
      "epoch": 2.383954229754587,
      "grad_norm": 0.28643491864204407,
      "learning_rate": 5.266566949783544e-06,
      "loss": 0.0102,
      "step": 1456720
    },
    {
      "epoch": 2.3839869601932406,
      "grad_norm": 0.28110790252685547,
      "learning_rate": 5.266501057570027e-06,
      "loss": 0.0112,
      "step": 1456740
    },
    {
      "epoch": 2.384019690631894,
      "grad_norm": 1.6202459335327148,
      "learning_rate": 5.266435165356511e-06,
      "loss": 0.0158,
      "step": 1456760
    },
    {
      "epoch": 2.3840524210705474,
      "grad_norm": 1.1945459842681885,
      "learning_rate": 5.266369273142993e-06,
      "loss": 0.0173,
      "step": 1456780
    },
    {
      "epoch": 2.3840851515092005,
      "grad_norm": 0.15450888872146606,
      "learning_rate": 5.266303380929476e-06,
      "loss": 0.0205,
      "step": 1456800
    },
    {
      "epoch": 2.3841178819478537,
      "grad_norm": 0.3501635789871216,
      "learning_rate": 5.266237488715958e-06,
      "loss": 0.0112,
      "step": 1456820
    },
    {
      "epoch": 2.3841506123865073,
      "grad_norm": 0.18406780064105988,
      "learning_rate": 5.266171596502442e-06,
      "loss": 0.0094,
      "step": 1456840
    },
    {
      "epoch": 2.3841833428251604,
      "grad_norm": 0.56696617603302,
      "learning_rate": 5.2661057042889244e-06,
      "loss": 0.0199,
      "step": 1456860
    },
    {
      "epoch": 2.384216073263814,
      "grad_norm": 0.21715646982192993,
      "learning_rate": 5.266039812075407e-06,
      "loss": 0.0129,
      "step": 1456880
    },
    {
      "epoch": 2.384248803702467,
      "grad_norm": 0.7093523144721985,
      "learning_rate": 5.265973919861891e-06,
      "loss": 0.0171,
      "step": 1456900
    },
    {
      "epoch": 2.3842815341411203,
      "grad_norm": 0.9787930846214294,
      "learning_rate": 5.2659080276483735e-06,
      "loss": 0.0174,
      "step": 1456920
    },
    {
      "epoch": 2.384314264579774,
      "grad_norm": 0.5028433799743652,
      "learning_rate": 5.265842135434856e-06,
      "loss": 0.0134,
      "step": 1456940
    },
    {
      "epoch": 2.384346995018427,
      "grad_norm": 0.3425479829311371,
      "learning_rate": 5.265776243221339e-06,
      "loss": 0.0189,
      "step": 1456960
    },
    {
      "epoch": 2.3843797254570807,
      "grad_norm": 0.38556164503097534,
      "learning_rate": 5.2657103510078226e-06,
      "loss": 0.0249,
      "step": 1456980
    },
    {
      "epoch": 2.384412455895734,
      "grad_norm": 0.2890714406967163,
      "learning_rate": 5.2656444587943045e-06,
      "loss": 0.0139,
      "step": 1457000
    },
    {
      "epoch": 2.3844451863343874,
      "grad_norm": 1.0658442974090576,
      "learning_rate": 5.265578566580788e-06,
      "loss": 0.0152,
      "step": 1457020
    },
    {
      "epoch": 2.3844779167730406,
      "grad_norm": 0.2220078557729721,
      "learning_rate": 5.26551267436727e-06,
      "loss": 0.0158,
      "step": 1457040
    },
    {
      "epoch": 2.3845106472116937,
      "grad_norm": 0.20455226302146912,
      "learning_rate": 5.2654467821537535e-06,
      "loss": 0.0109,
      "step": 1457060
    },
    {
      "epoch": 2.3845433776503473,
      "grad_norm": 0.19092480838298798,
      "learning_rate": 5.265380889940236e-06,
      "loss": 0.0156,
      "step": 1457080
    },
    {
      "epoch": 2.3845761080890004,
      "grad_norm": 0.13332568109035492,
      "learning_rate": 5.265314997726719e-06,
      "loss": 0.0163,
      "step": 1457100
    },
    {
      "epoch": 2.384608838527654,
      "grad_norm": 0.4706149399280548,
      "learning_rate": 5.265249105513202e-06,
      "loss": 0.0159,
      "step": 1457120
    },
    {
      "epoch": 2.384641568966307,
      "grad_norm": 0.5286157131195068,
      "learning_rate": 5.265183213299685e-06,
      "loss": 0.0173,
      "step": 1457140
    },
    {
      "epoch": 2.384674299404961,
      "grad_norm": 0.929298460483551,
      "learning_rate": 5.265117321086167e-06,
      "loss": 0.015,
      "step": 1457160
    },
    {
      "epoch": 2.384707029843614,
      "grad_norm": 0.8019680380821228,
      "learning_rate": 5.265051428872651e-06,
      "loss": 0.0172,
      "step": 1457180
    },
    {
      "epoch": 2.384739760282267,
      "grad_norm": 0.3065533936023712,
      "learning_rate": 5.264985536659133e-06,
      "loss": 0.021,
      "step": 1457200
    },
    {
      "epoch": 2.3847724907209207,
      "grad_norm": 0.2729366421699524,
      "learning_rate": 5.264919644445616e-06,
      "loss": 0.0146,
      "step": 1457220
    },
    {
      "epoch": 2.384805221159574,
      "grad_norm": 0.5686271786689758,
      "learning_rate": 5.2648537522321e-06,
      "loss": 0.0125,
      "step": 1457240
    },
    {
      "epoch": 2.3848379515982274,
      "grad_norm": 0.9227684140205383,
      "learning_rate": 5.264787860018582e-06,
      "loss": 0.0193,
      "step": 1457260
    },
    {
      "epoch": 2.3848706820368806,
      "grad_norm": 0.4733947515487671,
      "learning_rate": 5.264721967805065e-06,
      "loss": 0.0118,
      "step": 1457280
    },
    {
      "epoch": 2.384903412475534,
      "grad_norm": 0.4394732713699341,
      "learning_rate": 5.264656075591548e-06,
      "loss": 0.0111,
      "step": 1457300
    },
    {
      "epoch": 2.3849361429141873,
      "grad_norm": 0.16340123116970062,
      "learning_rate": 5.264590183378031e-06,
      "loss": 0.0129,
      "step": 1457320
    },
    {
      "epoch": 2.3849688733528405,
      "grad_norm": 0.27482515573501587,
      "learning_rate": 5.2645242911645136e-06,
      "loss": 0.0145,
      "step": 1457340
    },
    {
      "epoch": 2.385001603791494,
      "grad_norm": 0.46943435072898865,
      "learning_rate": 5.264458398950997e-06,
      "loss": 0.0217,
      "step": 1457360
    },
    {
      "epoch": 2.385034334230147,
      "grad_norm": 0.43099215626716614,
      "learning_rate": 5.264392506737479e-06,
      "loss": 0.0134,
      "step": 1457380
    },
    {
      "epoch": 2.385067064668801,
      "grad_norm": 0.283363401889801,
      "learning_rate": 5.264326614523963e-06,
      "loss": 0.0114,
      "step": 1457400
    },
    {
      "epoch": 2.385099795107454,
      "grad_norm": 0.5235769748687744,
      "learning_rate": 5.2642607223104445e-06,
      "loss": 0.0114,
      "step": 1457420
    },
    {
      "epoch": 2.3851325255461076,
      "grad_norm": 0.3085705041885376,
      "learning_rate": 5.264194830096928e-06,
      "loss": 0.013,
      "step": 1457440
    },
    {
      "epoch": 2.3851652559847607,
      "grad_norm": 0.3944374918937683,
      "learning_rate": 5.26412893788341e-06,
      "loss": 0.0129,
      "step": 1457460
    },
    {
      "epoch": 2.385197986423414,
      "grad_norm": 0.3646683692932129,
      "learning_rate": 5.264063045669894e-06,
      "loss": 0.018,
      "step": 1457480
    },
    {
      "epoch": 2.3852307168620674,
      "grad_norm": 0.1652388721704483,
      "learning_rate": 5.263997153456376e-06,
      "loss": 0.0159,
      "step": 1457500
    },
    {
      "epoch": 2.3852634473007206,
      "grad_norm": 0.286201536655426,
      "learning_rate": 5.263931261242859e-06,
      "loss": 0.0167,
      "step": 1457520
    },
    {
      "epoch": 2.385296177739374,
      "grad_norm": 0.5693939328193665,
      "learning_rate": 5.263865369029342e-06,
      "loss": 0.0149,
      "step": 1457540
    },
    {
      "epoch": 2.3853289081780273,
      "grad_norm": 1.255520224571228,
      "learning_rate": 5.263799476815825e-06,
      "loss": 0.0137,
      "step": 1457560
    },
    {
      "epoch": 2.385361638616681,
      "grad_norm": 0.4971437156200409,
      "learning_rate": 5.263733584602307e-06,
      "loss": 0.0119,
      "step": 1457580
    },
    {
      "epoch": 2.385394369055334,
      "grad_norm": 0.12704764306545258,
      "learning_rate": 5.263667692388791e-06,
      "loss": 0.015,
      "step": 1457600
    },
    {
      "epoch": 2.3854270994939872,
      "grad_norm": 0.3621823787689209,
      "learning_rate": 5.2636018001752745e-06,
      "loss": 0.011,
      "step": 1457620
    },
    {
      "epoch": 2.385459829932641,
      "grad_norm": 0.159826397895813,
      "learning_rate": 5.263535907961756e-06,
      "loss": 0.0144,
      "step": 1457640
    },
    {
      "epoch": 2.385492560371294,
      "grad_norm": 1.177813172340393,
      "learning_rate": 5.26347001574824e-06,
      "loss": 0.0141,
      "step": 1457660
    },
    {
      "epoch": 2.3855252908099476,
      "grad_norm": 0.8396813273429871,
      "learning_rate": 5.263404123534722e-06,
      "loss": 0.012,
      "step": 1457680
    },
    {
      "epoch": 2.3855580212486007,
      "grad_norm": 0.10261163115501404,
      "learning_rate": 5.2633382313212054e-06,
      "loss": 0.0153,
      "step": 1457700
    },
    {
      "epoch": 2.3855907516872543,
      "grad_norm": 0.9105680584907532,
      "learning_rate": 5.263272339107688e-06,
      "loss": 0.0127,
      "step": 1457720
    },
    {
      "epoch": 2.3856234821259075,
      "grad_norm": 1.1879394054412842,
      "learning_rate": 5.263206446894171e-06,
      "loss": 0.0141,
      "step": 1457740
    },
    {
      "epoch": 2.3856562125645606,
      "grad_norm": 0.20994985103607178,
      "learning_rate": 5.263140554680654e-06,
      "loss": 0.0122,
      "step": 1457760
    },
    {
      "epoch": 2.385688943003214,
      "grad_norm": 0.15202327072620392,
      "learning_rate": 5.263074662467137e-06,
      "loss": 0.0154,
      "step": 1457780
    },
    {
      "epoch": 2.3857216734418674,
      "grad_norm": 0.21574924886226654,
      "learning_rate": 5.263008770253619e-06,
      "loss": 0.0134,
      "step": 1457800
    },
    {
      "epoch": 2.385754403880521,
      "grad_norm": 0.31102612614631653,
      "learning_rate": 5.262942878040103e-06,
      "loss": 0.0175,
      "step": 1457820
    },
    {
      "epoch": 2.385787134319174,
      "grad_norm": 0.30615395307540894,
      "learning_rate": 5.262876985826585e-06,
      "loss": 0.0201,
      "step": 1457840
    },
    {
      "epoch": 2.3858198647578277,
      "grad_norm": 0.22330030798912048,
      "learning_rate": 5.262811093613068e-06,
      "loss": 0.0137,
      "step": 1457860
    },
    {
      "epoch": 2.385852595196481,
      "grad_norm": 0.2430981546640396,
      "learning_rate": 5.262745201399551e-06,
      "loss": 0.0149,
      "step": 1457880
    },
    {
      "epoch": 2.385885325635134,
      "grad_norm": 0.23105990886688232,
      "learning_rate": 5.262679309186034e-06,
      "loss": 0.0132,
      "step": 1457900
    },
    {
      "epoch": 2.3859180560737876,
      "grad_norm": 0.5512159466743469,
      "learning_rate": 5.262613416972516e-06,
      "loss": 0.0188,
      "step": 1457920
    },
    {
      "epoch": 2.3859507865124407,
      "grad_norm": 0.209319069981575,
      "learning_rate": 5.262547524759e-06,
      "loss": 0.0135,
      "step": 1457940
    },
    {
      "epoch": 2.3859835169510943,
      "grad_norm": 0.1484275907278061,
      "learning_rate": 5.262481632545483e-06,
      "loss": 0.0169,
      "step": 1457960
    },
    {
      "epoch": 2.3860162473897475,
      "grad_norm": 0.4240429103374481,
      "learning_rate": 5.2624157403319655e-06,
      "loss": 0.0111,
      "step": 1457980
    },
    {
      "epoch": 2.386048977828401,
      "grad_norm": 0.24126596748828888,
      "learning_rate": 5.262349848118449e-06,
      "loss": 0.0134,
      "step": 1458000
    },
    {
      "epoch": 2.3860817082670542,
      "grad_norm": 0.207184299826622,
      "learning_rate": 5.262283955904931e-06,
      "loss": 0.0169,
      "step": 1458020
    },
    {
      "epoch": 2.3861144387057074,
      "grad_norm": 0.31993621587753296,
      "learning_rate": 5.2622180636914145e-06,
      "loss": 0.0153,
      "step": 1458040
    },
    {
      "epoch": 2.386147169144361,
      "grad_norm": 1.1035163402557373,
      "learning_rate": 5.2621521714778964e-06,
      "loss": 0.0171,
      "step": 1458060
    },
    {
      "epoch": 2.386179899583014,
      "grad_norm": 0.2922346889972687,
      "learning_rate": 5.26208627926438e-06,
      "loss": 0.017,
      "step": 1458080
    },
    {
      "epoch": 2.3862126300216677,
      "grad_norm": 0.39326924085617065,
      "learning_rate": 5.262020387050863e-06,
      "loss": 0.0158,
      "step": 1458100
    },
    {
      "epoch": 2.386245360460321,
      "grad_norm": 0.3563445508480072,
      "learning_rate": 5.2619544948373455e-06,
      "loss": 0.0138,
      "step": 1458120
    },
    {
      "epoch": 2.3862780908989745,
      "grad_norm": 0.7970623970031738,
      "learning_rate": 5.261888602623828e-06,
      "loss": 0.015,
      "step": 1458140
    },
    {
      "epoch": 2.3863108213376276,
      "grad_norm": 0.2781386077404022,
      "learning_rate": 5.261822710410312e-06,
      "loss": 0.0131,
      "step": 1458160
    },
    {
      "epoch": 2.3863435517762808,
      "grad_norm": 0.7617402076721191,
      "learning_rate": 5.261756818196794e-06,
      "loss": 0.0204,
      "step": 1458180
    },
    {
      "epoch": 2.3863762822149344,
      "grad_norm": 0.2996895909309387,
      "learning_rate": 5.261690925983277e-06,
      "loss": 0.0132,
      "step": 1458200
    },
    {
      "epoch": 2.3864090126535875,
      "grad_norm": 0.36387455463409424,
      "learning_rate": 5.261625033769759e-06,
      "loss": 0.0117,
      "step": 1458220
    },
    {
      "epoch": 2.386441743092241,
      "grad_norm": 0.2709568440914154,
      "learning_rate": 5.261559141556243e-06,
      "loss": 0.0151,
      "step": 1458240
    },
    {
      "epoch": 2.3864744735308943,
      "grad_norm": 1.132760763168335,
      "learning_rate": 5.261493249342725e-06,
      "loss": 0.0199,
      "step": 1458260
    },
    {
      "epoch": 2.3865072039695474,
      "grad_norm": 0.8408125638961792,
      "learning_rate": 5.261427357129208e-06,
      "loss": 0.0204,
      "step": 1458280
    },
    {
      "epoch": 2.386539934408201,
      "grad_norm": 0.23511622846126556,
      "learning_rate": 5.261361464915692e-06,
      "loss": 0.0147,
      "step": 1458300
    },
    {
      "epoch": 2.386572664846854,
      "grad_norm": 0.4659630060195923,
      "learning_rate": 5.261295572702174e-06,
      "loss": 0.0134,
      "step": 1458320
    },
    {
      "epoch": 2.3866053952855077,
      "grad_norm": 0.10799773037433624,
      "learning_rate": 5.261229680488657e-06,
      "loss": 0.0252,
      "step": 1458340
    },
    {
      "epoch": 2.386638125724161,
      "grad_norm": 0.5625495910644531,
      "learning_rate": 5.26116378827514e-06,
      "loss": 0.0162,
      "step": 1458360
    },
    {
      "epoch": 2.386670856162814,
      "grad_norm": 0.21053026616573334,
      "learning_rate": 5.261097896061624e-06,
      "loss": 0.0137,
      "step": 1458380
    },
    {
      "epoch": 2.3867035866014676,
      "grad_norm": 0.28670981526374817,
      "learning_rate": 5.2610320038481056e-06,
      "loss": 0.0136,
      "step": 1458400
    },
    {
      "epoch": 2.386736317040121,
      "grad_norm": 1.3541460037231445,
      "learning_rate": 5.260966111634589e-06,
      "loss": 0.0222,
      "step": 1458420
    },
    {
      "epoch": 2.3867690474787744,
      "grad_norm": 0.9036542177200317,
      "learning_rate": 5.260900219421071e-06,
      "loss": 0.0122,
      "step": 1458440
    },
    {
      "epoch": 2.3868017779174275,
      "grad_norm": 0.2685554027557373,
      "learning_rate": 5.260834327207555e-06,
      "loss": 0.0118,
      "step": 1458460
    },
    {
      "epoch": 2.386834508356081,
      "grad_norm": 0.1232689619064331,
      "learning_rate": 5.2607684349940365e-06,
      "loss": 0.0196,
      "step": 1458480
    },
    {
      "epoch": 2.3868672387947343,
      "grad_norm": 0.18589919805526733,
      "learning_rate": 5.26070254278052e-06,
      "loss": 0.0182,
      "step": 1458500
    },
    {
      "epoch": 2.3868999692333874,
      "grad_norm": 0.5132969617843628,
      "learning_rate": 5.260636650567003e-06,
      "loss": 0.0173,
      "step": 1458520
    },
    {
      "epoch": 2.386932699672041,
      "grad_norm": 0.8853817582130432,
      "learning_rate": 5.260570758353486e-06,
      "loss": 0.021,
      "step": 1458540
    },
    {
      "epoch": 2.386965430110694,
      "grad_norm": 0.07506774365901947,
      "learning_rate": 5.260504866139968e-06,
      "loss": 0.0082,
      "step": 1458560
    },
    {
      "epoch": 2.3869981605493478,
      "grad_norm": 0.32611262798309326,
      "learning_rate": 5.260438973926452e-06,
      "loss": 0.0146,
      "step": 1458580
    },
    {
      "epoch": 2.387030890988001,
      "grad_norm": 0.284712553024292,
      "learning_rate": 5.260373081712934e-06,
      "loss": 0.0146,
      "step": 1458600
    },
    {
      "epoch": 2.3870636214266545,
      "grad_norm": 0.31576186418533325,
      "learning_rate": 5.260307189499417e-06,
      "loss": 0.0125,
      "step": 1458620
    },
    {
      "epoch": 2.3870963518653077,
      "grad_norm": 0.44131436944007874,
      "learning_rate": 5.260241297285899e-06,
      "loss": 0.0086,
      "step": 1458640
    },
    {
      "epoch": 2.387129082303961,
      "grad_norm": 0.19711120426654816,
      "learning_rate": 5.260175405072383e-06,
      "loss": 0.0115,
      "step": 1458660
    },
    {
      "epoch": 2.3871618127426144,
      "grad_norm": 0.42328694462776184,
      "learning_rate": 5.2601095128588665e-06,
      "loss": 0.0123,
      "step": 1458680
    },
    {
      "epoch": 2.3871945431812676,
      "grad_norm": 0.2001805156469345,
      "learning_rate": 5.260043620645348e-06,
      "loss": 0.011,
      "step": 1458700
    },
    {
      "epoch": 2.387227273619921,
      "grad_norm": 0.4116462469100952,
      "learning_rate": 5.259977728431832e-06,
      "loss": 0.0146,
      "step": 1458720
    },
    {
      "epoch": 2.3872600040585743,
      "grad_norm": 0.10848769545555115,
      "learning_rate": 5.259911836218315e-06,
      "loss": 0.0142,
      "step": 1458740
    },
    {
      "epoch": 2.387292734497228,
      "grad_norm": 0.27801430225372314,
      "learning_rate": 5.259845944004797e-06,
      "loss": 0.0094,
      "step": 1458760
    },
    {
      "epoch": 2.387325464935881,
      "grad_norm": 1.0990519523620605,
      "learning_rate": 5.25978005179128e-06,
      "loss": 0.0117,
      "step": 1458780
    },
    {
      "epoch": 2.387358195374534,
      "grad_norm": 0.23350833356380463,
      "learning_rate": 5.259714159577764e-06,
      "loss": 0.0139,
      "step": 1458800
    },
    {
      "epoch": 2.387390925813188,
      "grad_norm": 0.15641751885414124,
      "learning_rate": 5.259648267364246e-06,
      "loss": 0.0124,
      "step": 1458820
    },
    {
      "epoch": 2.387423656251841,
      "grad_norm": 0.1820845901966095,
      "learning_rate": 5.259582375150729e-06,
      "loss": 0.012,
      "step": 1458840
    },
    {
      "epoch": 2.3874563866904945,
      "grad_norm": 0.19519658386707306,
      "learning_rate": 5.259516482937211e-06,
      "loss": 0.0177,
      "step": 1458860
    },
    {
      "epoch": 2.3874891171291477,
      "grad_norm": 0.36400172114372253,
      "learning_rate": 5.259450590723695e-06,
      "loss": 0.0142,
      "step": 1458880
    },
    {
      "epoch": 2.3875218475678013,
      "grad_norm": 0.17774812877178192,
      "learning_rate": 5.2593846985101774e-06,
      "loss": 0.0146,
      "step": 1458900
    },
    {
      "epoch": 2.3875545780064544,
      "grad_norm": 0.4935888946056366,
      "learning_rate": 5.25931880629666e-06,
      "loss": 0.0181,
      "step": 1458920
    },
    {
      "epoch": 2.3875873084451076,
      "grad_norm": 0.33503806591033936,
      "learning_rate": 5.259252914083143e-06,
      "loss": 0.0202,
      "step": 1458940
    },
    {
      "epoch": 2.387620038883761,
      "grad_norm": 0.3407159447669983,
      "learning_rate": 5.2591870218696265e-06,
      "loss": 0.015,
      "step": 1458960
    },
    {
      "epoch": 2.3876527693224143,
      "grad_norm": 0.3219776749610901,
      "learning_rate": 5.259121129656108e-06,
      "loss": 0.023,
      "step": 1458980
    },
    {
      "epoch": 2.387685499761068,
      "grad_norm": 0.1370367556810379,
      "learning_rate": 5.259055237442592e-06,
      "loss": 0.0102,
      "step": 1459000
    },
    {
      "epoch": 2.387718230199721,
      "grad_norm": 0.4937781095504761,
      "learning_rate": 5.2589893452290756e-06,
      "loss": 0.0211,
      "step": 1459020
    },
    {
      "epoch": 2.3877509606383747,
      "grad_norm": 0.15157249569892883,
      "learning_rate": 5.2589234530155575e-06,
      "loss": 0.0191,
      "step": 1459040
    },
    {
      "epoch": 2.387783691077028,
      "grad_norm": 0.5860471725463867,
      "learning_rate": 5.258857560802041e-06,
      "loss": 0.015,
      "step": 1459060
    },
    {
      "epoch": 2.387816421515681,
      "grad_norm": 0.2649178206920624,
      "learning_rate": 5.258791668588523e-06,
      "loss": 0.0133,
      "step": 1459080
    },
    {
      "epoch": 2.3878491519543346,
      "grad_norm": 0.40913742780685425,
      "learning_rate": 5.2587257763750065e-06,
      "loss": 0.017,
      "step": 1459100
    },
    {
      "epoch": 2.3878818823929877,
      "grad_norm": 0.2573707103729248,
      "learning_rate": 5.258659884161489e-06,
      "loss": 0.015,
      "step": 1459120
    },
    {
      "epoch": 2.3879146128316413,
      "grad_norm": 0.48438888788223267,
      "learning_rate": 5.258593991947972e-06,
      "loss": 0.0163,
      "step": 1459140
    },
    {
      "epoch": 2.3879473432702945,
      "grad_norm": 0.3094278872013092,
      "learning_rate": 5.258528099734455e-06,
      "loss": 0.0166,
      "step": 1459160
    },
    {
      "epoch": 2.387980073708948,
      "grad_norm": 0.22492367029190063,
      "learning_rate": 5.258462207520938e-06,
      "loss": 0.015,
      "step": 1459180
    },
    {
      "epoch": 2.388012804147601,
      "grad_norm": 0.21896840631961823,
      "learning_rate": 5.25839631530742e-06,
      "loss": 0.0129,
      "step": 1459200
    },
    {
      "epoch": 2.3880455345862543,
      "grad_norm": 0.6293951272964478,
      "learning_rate": 5.258330423093904e-06,
      "loss": 0.0188,
      "step": 1459220
    },
    {
      "epoch": 2.388078265024908,
      "grad_norm": 0.39616474509239197,
      "learning_rate": 5.258264530880386e-06,
      "loss": 0.0186,
      "step": 1459240
    },
    {
      "epoch": 2.388110995463561,
      "grad_norm": 0.6294432282447815,
      "learning_rate": 5.258198638666869e-06,
      "loss": 0.0168,
      "step": 1459260
    },
    {
      "epoch": 2.3881437259022147,
      "grad_norm": 0.36804071068763733,
      "learning_rate": 5.258132746453351e-06,
      "loss": 0.0154,
      "step": 1459280
    },
    {
      "epoch": 2.388176456340868,
      "grad_norm": 0.5847707390785217,
      "learning_rate": 5.258066854239835e-06,
      "loss": 0.0165,
      "step": 1459300
    },
    {
      "epoch": 2.3882091867795214,
      "grad_norm": 0.394308477640152,
      "learning_rate": 5.2580009620263175e-06,
      "loss": 0.0117,
      "step": 1459320
    },
    {
      "epoch": 2.3882419172181746,
      "grad_norm": 0.8153666853904724,
      "learning_rate": 5.2579350698128e-06,
      "loss": 0.0154,
      "step": 1459340
    },
    {
      "epoch": 2.3882746476568277,
      "grad_norm": 0.3226326107978821,
      "learning_rate": 5.257869177599284e-06,
      "loss": 0.0105,
      "step": 1459360
    },
    {
      "epoch": 2.3883073780954813,
      "grad_norm": 0.286059707403183,
      "learning_rate": 5.257803285385767e-06,
      "loss": 0.0121,
      "step": 1459380
    },
    {
      "epoch": 2.3883401085341345,
      "grad_norm": 0.2471960037946701,
      "learning_rate": 5.257737393172249e-06,
      "loss": 0.0243,
      "step": 1459400
    },
    {
      "epoch": 2.388372838972788,
      "grad_norm": 0.33427685499191284,
      "learning_rate": 5.257671500958732e-06,
      "loss": 0.0079,
      "step": 1459420
    },
    {
      "epoch": 2.388405569411441,
      "grad_norm": 0.3354424238204956,
      "learning_rate": 5.257605608745216e-06,
      "loss": 0.0144,
      "step": 1459440
    },
    {
      "epoch": 2.388438299850095,
      "grad_norm": 0.15109287202358246,
      "learning_rate": 5.2575397165316975e-06,
      "loss": 0.013,
      "step": 1459460
    },
    {
      "epoch": 2.388471030288748,
      "grad_norm": 0.2380443513393402,
      "learning_rate": 5.257473824318181e-06,
      "loss": 0.0179,
      "step": 1459480
    },
    {
      "epoch": 2.388503760727401,
      "grad_norm": 0.38885924220085144,
      "learning_rate": 5.257407932104663e-06,
      "loss": 0.0134,
      "step": 1459500
    },
    {
      "epoch": 2.3885364911660547,
      "grad_norm": 0.14492341876029968,
      "learning_rate": 5.257342039891147e-06,
      "loss": 0.0145,
      "step": 1459520
    },
    {
      "epoch": 2.388569221604708,
      "grad_norm": 0.5274604558944702,
      "learning_rate": 5.257276147677629e-06,
      "loss": 0.0191,
      "step": 1459540
    },
    {
      "epoch": 2.3886019520433615,
      "grad_norm": 0.7835043668746948,
      "learning_rate": 5.257210255464112e-06,
      "loss": 0.0148,
      "step": 1459560
    },
    {
      "epoch": 2.3886346824820146,
      "grad_norm": 0.4623861610889435,
      "learning_rate": 5.257144363250595e-06,
      "loss": 0.0167,
      "step": 1459580
    },
    {
      "epoch": 2.388667412920668,
      "grad_norm": 0.09387505799531937,
      "learning_rate": 5.257078471037078e-06,
      "loss": 0.013,
      "step": 1459600
    },
    {
      "epoch": 2.3887001433593213,
      "grad_norm": 0.895087718963623,
      "learning_rate": 5.25701257882356e-06,
      "loss": 0.0173,
      "step": 1459620
    },
    {
      "epoch": 2.3887328737979745,
      "grad_norm": 0.2584579586982727,
      "learning_rate": 5.256946686610044e-06,
      "loss": 0.0136,
      "step": 1459640
    },
    {
      "epoch": 2.388765604236628,
      "grad_norm": 0.20396822690963745,
      "learning_rate": 5.256880794396526e-06,
      "loss": 0.0196,
      "step": 1459660
    },
    {
      "epoch": 2.3887983346752812,
      "grad_norm": 0.41082605719566345,
      "learning_rate": 5.256814902183009e-06,
      "loss": 0.0169,
      "step": 1459680
    },
    {
      "epoch": 2.388831065113935,
      "grad_norm": 0.23744726181030273,
      "learning_rate": 5.256749009969493e-06,
      "loss": 0.0193,
      "step": 1459700
    },
    {
      "epoch": 2.388863795552588,
      "grad_norm": 0.26120704412460327,
      "learning_rate": 5.256683117755975e-06,
      "loss": 0.0172,
      "step": 1459720
    },
    {
      "epoch": 2.3888965259912416,
      "grad_norm": 0.8165726661682129,
      "learning_rate": 5.2566172255424584e-06,
      "loss": 0.0156,
      "step": 1459740
    },
    {
      "epoch": 2.3889292564298947,
      "grad_norm": 0.6512191295623779,
      "learning_rate": 5.256551333328941e-06,
      "loss": 0.0122,
      "step": 1459760
    },
    {
      "epoch": 2.388961986868548,
      "grad_norm": 0.6845629811286926,
      "learning_rate": 5.256485441115424e-06,
      "loss": 0.02,
      "step": 1459780
    },
    {
      "epoch": 2.3889947173072015,
      "grad_norm": 0.34413257241249084,
      "learning_rate": 5.256419548901907e-06,
      "loss": 0.0105,
      "step": 1459800
    },
    {
      "epoch": 2.3890274477458546,
      "grad_norm": 0.12825602293014526,
      "learning_rate": 5.25635365668839e-06,
      "loss": 0.0098,
      "step": 1459820
    },
    {
      "epoch": 2.389060178184508,
      "grad_norm": 0.14636830985546112,
      "learning_rate": 5.256287764474872e-06,
      "loss": 0.011,
      "step": 1459840
    },
    {
      "epoch": 2.3890929086231614,
      "grad_norm": 0.15519419312477112,
      "learning_rate": 5.256221872261356e-06,
      "loss": 0.0098,
      "step": 1459860
    },
    {
      "epoch": 2.3891256390618145,
      "grad_norm": 0.588528037071228,
      "learning_rate": 5.256155980047838e-06,
      "loss": 0.0197,
      "step": 1459880
    },
    {
      "epoch": 2.389158369500468,
      "grad_norm": 0.18347157537937164,
      "learning_rate": 5.256090087834321e-06,
      "loss": 0.0156,
      "step": 1459900
    },
    {
      "epoch": 2.3891910999391213,
      "grad_norm": 0.11681666225194931,
      "learning_rate": 5.256024195620804e-06,
      "loss": 0.0138,
      "step": 1459920
    },
    {
      "epoch": 2.389223830377775,
      "grad_norm": 0.379917711019516,
      "learning_rate": 5.255958303407287e-06,
      "loss": 0.0172,
      "step": 1459940
    },
    {
      "epoch": 2.389256560816428,
      "grad_norm": 0.13206331431865692,
      "learning_rate": 5.255892411193769e-06,
      "loss": 0.0129,
      "step": 1459960
    },
    {
      "epoch": 2.389289291255081,
      "grad_norm": 0.21940864622592926,
      "learning_rate": 5.255826518980253e-06,
      "loss": 0.0193,
      "step": 1459980
    },
    {
      "epoch": 2.3893220216937348,
      "grad_norm": 0.149788498878479,
      "learning_rate": 5.255760626766735e-06,
      "loss": 0.018,
      "step": 1460000
    },
    {
      "epoch": 2.389354752132388,
      "grad_norm": 0.46363383531570435,
      "learning_rate": 5.2556947345532185e-06,
      "loss": 0.0177,
      "step": 1460020
    },
    {
      "epoch": 2.3893874825710415,
      "grad_norm": Infinity,
      "learning_rate": 5.2556288423397e-06,
      "loss": 0.0226,
      "step": 1460040
    },
    {
      "epoch": 2.3894202130096946,
      "grad_norm": 0.35698404908180237,
      "learning_rate": 5.255562950126184e-06,
      "loss": 0.0191,
      "step": 1460060
    },
    {
      "epoch": 2.3894529434483482,
      "grad_norm": 0.8093535900115967,
      "learning_rate": 5.2554970579126676e-06,
      "loss": 0.0128,
      "step": 1460080
    },
    {
      "epoch": 2.3894856738870014,
      "grad_norm": 1.318594217300415,
      "learning_rate": 5.2554311656991494e-06,
      "loss": 0.0135,
      "step": 1460100
    },
    {
      "epoch": 2.3895184043256545,
      "grad_norm": 0.4782972037792206,
      "learning_rate": 5.255365273485633e-06,
      "loss": 0.0224,
      "step": 1460120
    },
    {
      "epoch": 2.389551134764308,
      "grad_norm": 0.28134024143218994,
      "learning_rate": 5.255299381272116e-06,
      "loss": 0.0176,
      "step": 1460140
    },
    {
      "epoch": 2.3895838652029613,
      "grad_norm": 0.44733521342277527,
      "learning_rate": 5.2552334890585985e-06,
      "loss": 0.0194,
      "step": 1460160
    },
    {
      "epoch": 2.389616595641615,
      "grad_norm": 0.664179801940918,
      "learning_rate": 5.255167596845081e-06,
      "loss": 0.0111,
      "step": 1460180
    },
    {
      "epoch": 2.389649326080268,
      "grad_norm": 0.13372385501861572,
      "learning_rate": 5.255101704631565e-06,
      "loss": 0.0194,
      "step": 1460200
    },
    {
      "epoch": 2.3896820565189216,
      "grad_norm": 0.5531811118125916,
      "learning_rate": 5.255035812418047e-06,
      "loss": 0.0161,
      "step": 1460220
    },
    {
      "epoch": 2.3897147869575748,
      "grad_norm": 0.47895264625549316,
      "learning_rate": 5.25496992020453e-06,
      "loss": 0.0104,
      "step": 1460240
    },
    {
      "epoch": 2.389747517396228,
      "grad_norm": 1.0348974466323853,
      "learning_rate": 5.254904027991012e-06,
      "loss": 0.0113,
      "step": 1460260
    },
    {
      "epoch": 2.3897802478348815,
      "grad_norm": 0.1459251493215561,
      "learning_rate": 5.254838135777496e-06,
      "loss": 0.0118,
      "step": 1460280
    },
    {
      "epoch": 2.3898129782735347,
      "grad_norm": 0.5399552583694458,
      "learning_rate": 5.254772243563978e-06,
      "loss": 0.0145,
      "step": 1460300
    },
    {
      "epoch": 2.3898457087121883,
      "grad_norm": 0.8334588408470154,
      "learning_rate": 5.254706351350461e-06,
      "loss": 0.0176,
      "step": 1460320
    },
    {
      "epoch": 2.3898784391508414,
      "grad_norm": 1.2452415227890015,
      "learning_rate": 5.254640459136944e-06,
      "loss": 0.0165,
      "step": 1460340
    },
    {
      "epoch": 2.389911169589495,
      "grad_norm": 0.2436078041791916,
      "learning_rate": 5.254574566923427e-06,
      "loss": 0.0193,
      "step": 1460360
    },
    {
      "epoch": 2.389943900028148,
      "grad_norm": 0.45787638425827026,
      "learning_rate": 5.2545086747099095e-06,
      "loss": 0.0132,
      "step": 1460380
    },
    {
      "epoch": 2.3899766304668013,
      "grad_norm": 0.05383266508579254,
      "learning_rate": 5.254442782496393e-06,
      "loss": 0.0149,
      "step": 1460400
    },
    {
      "epoch": 2.390009360905455,
      "grad_norm": 0.47868672013282776,
      "learning_rate": 5.254376890282876e-06,
      "loss": 0.0139,
      "step": 1460420
    },
    {
      "epoch": 2.390042091344108,
      "grad_norm": 0.24239230155944824,
      "learning_rate": 5.2543109980693586e-06,
      "loss": 0.0117,
      "step": 1460440
    },
    {
      "epoch": 2.3900748217827616,
      "grad_norm": 0.33043256402015686,
      "learning_rate": 5.254245105855842e-06,
      "loss": 0.0102,
      "step": 1460460
    },
    {
      "epoch": 2.390107552221415,
      "grad_norm": 0.3557418882846832,
      "learning_rate": 5.254179213642324e-06,
      "loss": 0.0126,
      "step": 1460480
    },
    {
      "epoch": 2.3901402826600684,
      "grad_norm": 0.5516547560691833,
      "learning_rate": 5.254113321428808e-06,
      "loss": 0.0146,
      "step": 1460500
    },
    {
      "epoch": 2.3901730130987215,
      "grad_norm": 0.20811638236045837,
      "learning_rate": 5.2540474292152895e-06,
      "loss": 0.0115,
      "step": 1460520
    },
    {
      "epoch": 2.3902057435373747,
      "grad_norm": 0.9059857130050659,
      "learning_rate": 5.253981537001773e-06,
      "loss": 0.0165,
      "step": 1460540
    },
    {
      "epoch": 2.3902384739760283,
      "grad_norm": 0.6305086612701416,
      "learning_rate": 5.253915644788256e-06,
      "loss": 0.0166,
      "step": 1460560
    },
    {
      "epoch": 2.3902712044146814,
      "grad_norm": 0.4822816848754883,
      "learning_rate": 5.253849752574739e-06,
      "loss": 0.0216,
      "step": 1460580
    },
    {
      "epoch": 2.390303934853335,
      "grad_norm": 0.18731337785720825,
      "learning_rate": 5.253783860361221e-06,
      "loss": 0.0181,
      "step": 1460600
    },
    {
      "epoch": 2.390336665291988,
      "grad_norm": 0.15774090588092804,
      "learning_rate": 5.253717968147705e-06,
      "loss": 0.0137,
      "step": 1460620
    },
    {
      "epoch": 2.3903693957306418,
      "grad_norm": 0.07438844442367554,
      "learning_rate": 5.253652075934187e-06,
      "loss": 0.0144,
      "step": 1460640
    },
    {
      "epoch": 2.390402126169295,
      "grad_norm": 0.2441069781780243,
      "learning_rate": 5.25358618372067e-06,
      "loss": 0.0164,
      "step": 1460660
    },
    {
      "epoch": 2.390434856607948,
      "grad_norm": 1.3097052574157715,
      "learning_rate": 5.253520291507152e-06,
      "loss": 0.0172,
      "step": 1460680
    },
    {
      "epoch": 2.3904675870466017,
      "grad_norm": 0.3209242820739746,
      "learning_rate": 5.253454399293636e-06,
      "loss": 0.0139,
      "step": 1460700
    },
    {
      "epoch": 2.390500317485255,
      "grad_norm": 0.4859965741634369,
      "learning_rate": 5.253388507080119e-06,
      "loss": 0.0108,
      "step": 1460720
    },
    {
      "epoch": 2.3905330479239084,
      "grad_norm": 0.2732905447483063,
      "learning_rate": 5.253322614866601e-06,
      "loss": 0.0158,
      "step": 1460740
    },
    {
      "epoch": 2.3905657783625616,
      "grad_norm": 0.6746184825897217,
      "learning_rate": 5.253256722653085e-06,
      "loss": 0.0216,
      "step": 1460760
    },
    {
      "epoch": 2.390598508801215,
      "grad_norm": 0.044861819595098495,
      "learning_rate": 5.253190830439568e-06,
      "loss": 0.0195,
      "step": 1460780
    },
    {
      "epoch": 2.3906312392398683,
      "grad_norm": 0.613379180431366,
      "learning_rate": 5.25312493822605e-06,
      "loss": 0.0119,
      "step": 1460800
    },
    {
      "epoch": 2.3906639696785215,
      "grad_norm": 0.400616317987442,
      "learning_rate": 5.253059046012533e-06,
      "loss": 0.016,
      "step": 1460820
    },
    {
      "epoch": 2.390696700117175,
      "grad_norm": 0.6839503645896912,
      "learning_rate": 5.252993153799017e-06,
      "loss": 0.0171,
      "step": 1460840
    },
    {
      "epoch": 2.390729430555828,
      "grad_norm": 0.6723675727844238,
      "learning_rate": 5.252927261585499e-06,
      "loss": 0.0145,
      "step": 1460860
    },
    {
      "epoch": 2.390762160994482,
      "grad_norm": 0.7475221753120422,
      "learning_rate": 5.252861369371982e-06,
      "loss": 0.0142,
      "step": 1460880
    },
    {
      "epoch": 2.390794891433135,
      "grad_norm": 0.45609191060066223,
      "learning_rate": 5.252795477158464e-06,
      "loss": 0.0134,
      "step": 1460900
    },
    {
      "epoch": 2.3908276218717885,
      "grad_norm": 0.5192270874977112,
      "learning_rate": 5.252729584944948e-06,
      "loss": 0.0107,
      "step": 1460920
    },
    {
      "epoch": 2.3908603523104417,
      "grad_norm": 0.5083845257759094,
      "learning_rate": 5.2526636927314304e-06,
      "loss": 0.0159,
      "step": 1460940
    },
    {
      "epoch": 2.390893082749095,
      "grad_norm": 0.40135589241981506,
      "learning_rate": 5.252597800517913e-06,
      "loss": 0.0159,
      "step": 1460960
    },
    {
      "epoch": 2.3909258131877484,
      "grad_norm": 0.4618951678276062,
      "learning_rate": 5.252531908304396e-06,
      "loss": 0.0119,
      "step": 1460980
    },
    {
      "epoch": 2.3909585436264016,
      "grad_norm": 0.5209088325500488,
      "learning_rate": 5.2524660160908795e-06,
      "loss": 0.0096,
      "step": 1461000
    },
    {
      "epoch": 2.390991274065055,
      "grad_norm": 1.093199372291565,
      "learning_rate": 5.252400123877361e-06,
      "loss": 0.0202,
      "step": 1461020
    },
    {
      "epoch": 2.3910240045037083,
      "grad_norm": 0.7522811889648438,
      "learning_rate": 5.252334231663845e-06,
      "loss": 0.0161,
      "step": 1461040
    },
    {
      "epoch": 2.391056734942362,
      "grad_norm": 0.3190133571624756,
      "learning_rate": 5.252268339450327e-06,
      "loss": 0.0172,
      "step": 1461060
    },
    {
      "epoch": 2.391089465381015,
      "grad_norm": 0.43223536014556885,
      "learning_rate": 5.2522024472368105e-06,
      "loss": 0.0153,
      "step": 1461080
    },
    {
      "epoch": 2.3911221958196682,
      "grad_norm": 0.26498308777809143,
      "learning_rate": 5.252136555023292e-06,
      "loss": 0.0111,
      "step": 1461100
    },
    {
      "epoch": 2.391154926258322,
      "grad_norm": 0.44200101494789124,
      "learning_rate": 5.252070662809776e-06,
      "loss": 0.0087,
      "step": 1461120
    },
    {
      "epoch": 2.391187656696975,
      "grad_norm": 0.9488076567649841,
      "learning_rate": 5.2520047705962595e-06,
      "loss": 0.0173,
      "step": 1461140
    },
    {
      "epoch": 2.3912203871356286,
      "grad_norm": 0.858627438545227,
      "learning_rate": 5.251938878382742e-06,
      "loss": 0.0179,
      "step": 1461160
    },
    {
      "epoch": 2.3912531175742817,
      "grad_norm": 0.6848676800727844,
      "learning_rate": 5.251872986169225e-06,
      "loss": 0.0181,
      "step": 1461180
    },
    {
      "epoch": 2.3912858480129353,
      "grad_norm": 0.23819276690483093,
      "learning_rate": 5.251807093955708e-06,
      "loss": 0.0135,
      "step": 1461200
    },
    {
      "epoch": 2.3913185784515885,
      "grad_norm": 0.2557936906814575,
      "learning_rate": 5.251741201742191e-06,
      "loss": 0.0111,
      "step": 1461220
    },
    {
      "epoch": 2.3913513088902416,
      "grad_norm": 0.670255184173584,
      "learning_rate": 5.251675309528673e-06,
      "loss": 0.0221,
      "step": 1461240
    },
    {
      "epoch": 2.391384039328895,
      "grad_norm": 0.7596133947372437,
      "learning_rate": 5.251609417315157e-06,
      "loss": 0.0209,
      "step": 1461260
    },
    {
      "epoch": 2.3914167697675484,
      "grad_norm": 0.07262374460697174,
      "learning_rate": 5.251543525101639e-06,
      "loss": 0.0113,
      "step": 1461280
    },
    {
      "epoch": 2.391449500206202,
      "grad_norm": 1.3103514909744263,
      "learning_rate": 5.251477632888122e-06,
      "loss": 0.0155,
      "step": 1461300
    },
    {
      "epoch": 2.391482230644855,
      "grad_norm": 0.16618774831295013,
      "learning_rate": 5.251411740674604e-06,
      "loss": 0.0142,
      "step": 1461320
    },
    {
      "epoch": 2.3915149610835083,
      "grad_norm": 0.18255390226840973,
      "learning_rate": 5.251345848461088e-06,
      "loss": 0.0162,
      "step": 1461340
    },
    {
      "epoch": 2.391547691522162,
      "grad_norm": 0.18143975734710693,
      "learning_rate": 5.2512799562475705e-06,
      "loss": 0.0105,
      "step": 1461360
    },
    {
      "epoch": 2.391580421960815,
      "grad_norm": 0.8614572286605835,
      "learning_rate": 5.251214064034053e-06,
      "loss": 0.0106,
      "step": 1461380
    },
    {
      "epoch": 2.3916131523994686,
      "grad_norm": 0.3097134530544281,
      "learning_rate": 5.251148171820536e-06,
      "loss": 0.0137,
      "step": 1461400
    },
    {
      "epoch": 2.3916458828381217,
      "grad_norm": 0.9340271949768066,
      "learning_rate": 5.25108227960702e-06,
      "loss": 0.0135,
      "step": 1461420
    },
    {
      "epoch": 2.391678613276775,
      "grad_norm": 0.19546306133270264,
      "learning_rate": 5.2510163873935015e-06,
      "loss": 0.0176,
      "step": 1461440
    },
    {
      "epoch": 2.3917113437154285,
      "grad_norm": 0.5466824173927307,
      "learning_rate": 5.250950495179985e-06,
      "loss": 0.0147,
      "step": 1461460
    },
    {
      "epoch": 2.3917440741540816,
      "grad_norm": 0.5670939683914185,
      "learning_rate": 5.250884602966469e-06,
      "loss": 0.017,
      "step": 1461480
    },
    {
      "epoch": 2.3917768045927352,
      "grad_norm": 0.2745717763900757,
      "learning_rate": 5.2508187107529505e-06,
      "loss": 0.0212,
      "step": 1461500
    },
    {
      "epoch": 2.3918095350313884,
      "grad_norm": 1.700315237045288,
      "learning_rate": 5.250752818539434e-06,
      "loss": 0.0122,
      "step": 1461520
    },
    {
      "epoch": 2.391842265470042,
      "grad_norm": 0.5035510659217834,
      "learning_rate": 5.250686926325916e-06,
      "loss": 0.0168,
      "step": 1461540
    },
    {
      "epoch": 2.391874995908695,
      "grad_norm": 0.39005324244499207,
      "learning_rate": 5.2506210341124e-06,
      "loss": 0.009,
      "step": 1461560
    },
    {
      "epoch": 2.3919077263473483,
      "grad_norm": 0.3581852614879608,
      "learning_rate": 5.250555141898882e-06,
      "loss": 0.017,
      "step": 1461580
    },
    {
      "epoch": 2.391940456786002,
      "grad_norm": 0.28804948925971985,
      "learning_rate": 5.250489249685365e-06,
      "loss": 0.0199,
      "step": 1461600
    },
    {
      "epoch": 2.391973187224655,
      "grad_norm": 0.1774328351020813,
      "learning_rate": 5.250423357471848e-06,
      "loss": 0.0211,
      "step": 1461620
    },
    {
      "epoch": 2.3920059176633086,
      "grad_norm": 0.5450475215911865,
      "learning_rate": 5.250357465258331e-06,
      "loss": 0.0164,
      "step": 1461640
    },
    {
      "epoch": 2.3920386481019618,
      "grad_norm": 0.30152133107185364,
      "learning_rate": 5.250291573044813e-06,
      "loss": 0.0195,
      "step": 1461660
    },
    {
      "epoch": 2.3920713785406154,
      "grad_norm": 0.37514036893844604,
      "learning_rate": 5.250225680831297e-06,
      "loss": 0.0145,
      "step": 1461680
    },
    {
      "epoch": 2.3921041089792685,
      "grad_norm": 0.5600944757461548,
      "learning_rate": 5.250159788617779e-06,
      "loss": 0.012,
      "step": 1461700
    },
    {
      "epoch": 2.3921368394179217,
      "grad_norm": 0.4032001495361328,
      "learning_rate": 5.250093896404262e-06,
      "loss": 0.013,
      "step": 1461720
    },
    {
      "epoch": 2.3921695698565753,
      "grad_norm": 0.9442743062973022,
      "learning_rate": 5.250028004190745e-06,
      "loss": 0.0132,
      "step": 1461740
    },
    {
      "epoch": 2.3922023002952284,
      "grad_norm": 0.3167649209499359,
      "learning_rate": 5.249962111977228e-06,
      "loss": 0.0206,
      "step": 1461760
    },
    {
      "epoch": 2.392235030733882,
      "grad_norm": 0.6344366669654846,
      "learning_rate": 5.249896219763711e-06,
      "loss": 0.0155,
      "step": 1461780
    },
    {
      "epoch": 2.392267761172535,
      "grad_norm": 0.25318384170532227,
      "learning_rate": 5.249830327550194e-06,
      "loss": 0.0112,
      "step": 1461800
    },
    {
      "epoch": 2.3923004916111887,
      "grad_norm": 0.42334386706352234,
      "learning_rate": 5.249764435336677e-06,
      "loss": 0.0152,
      "step": 1461820
    },
    {
      "epoch": 2.392333222049842,
      "grad_norm": 0.19695116579532623,
      "learning_rate": 5.24969854312316e-06,
      "loss": 0.0147,
      "step": 1461840
    },
    {
      "epoch": 2.392365952488495,
      "grad_norm": 0.21305333077907562,
      "learning_rate": 5.249632650909643e-06,
      "loss": 0.0165,
      "step": 1461860
    },
    {
      "epoch": 2.3923986829271486,
      "grad_norm": 0.24446465075016022,
      "learning_rate": 5.249566758696125e-06,
      "loss": 0.0191,
      "step": 1461880
    },
    {
      "epoch": 2.392431413365802,
      "grad_norm": 0.13169609010219574,
      "learning_rate": 5.249500866482609e-06,
      "loss": 0.0115,
      "step": 1461900
    },
    {
      "epoch": 2.3924641438044554,
      "grad_norm": 0.3808157444000244,
      "learning_rate": 5.249434974269091e-06,
      "loss": 0.0097,
      "step": 1461920
    },
    {
      "epoch": 2.3924968742431085,
      "grad_norm": 0.5712100267410278,
      "learning_rate": 5.249369082055574e-06,
      "loss": 0.0191,
      "step": 1461940
    },
    {
      "epoch": 2.392529604681762,
      "grad_norm": 0.10630643367767334,
      "learning_rate": 5.249303189842057e-06,
      "loss": 0.0164,
      "step": 1461960
    },
    {
      "epoch": 2.3925623351204153,
      "grad_norm": 0.4578969478607178,
      "learning_rate": 5.24923729762854e-06,
      "loss": 0.0124,
      "step": 1461980
    },
    {
      "epoch": 2.3925950655590684,
      "grad_norm": 0.59395432472229,
      "learning_rate": 5.2491714054150224e-06,
      "loss": 0.0156,
      "step": 1462000
    },
    {
      "epoch": 2.392627795997722,
      "grad_norm": 0.38401320576667786,
      "learning_rate": 5.249105513201506e-06,
      "loss": 0.0155,
      "step": 1462020
    },
    {
      "epoch": 2.392660526436375,
      "grad_norm": 0.2877040505409241,
      "learning_rate": 5.249039620987988e-06,
      "loss": 0.0162,
      "step": 1462040
    },
    {
      "epoch": 2.3926932568750288,
      "grad_norm": 0.18534506857395172,
      "learning_rate": 5.2489737287744715e-06,
      "loss": 0.0136,
      "step": 1462060
    },
    {
      "epoch": 2.392725987313682,
      "grad_norm": 0.33098387718200684,
      "learning_rate": 5.248907836560953e-06,
      "loss": 0.0112,
      "step": 1462080
    },
    {
      "epoch": 2.3927587177523355,
      "grad_norm": 0.45836567878723145,
      "learning_rate": 5.248841944347437e-06,
      "loss": 0.0164,
      "step": 1462100
    },
    {
      "epoch": 2.3927914481909887,
      "grad_norm": 0.5707533955574036,
      "learning_rate": 5.248776052133919e-06,
      "loss": 0.0203,
      "step": 1462120
    },
    {
      "epoch": 2.392824178629642,
      "grad_norm": 0.33790910243988037,
      "learning_rate": 5.2487101599204025e-06,
      "loss": 0.018,
      "step": 1462140
    },
    {
      "epoch": 2.3928569090682954,
      "grad_norm": 0.46918821334838867,
      "learning_rate": 5.248644267706885e-06,
      "loss": 0.0201,
      "step": 1462160
    },
    {
      "epoch": 2.3928896395069486,
      "grad_norm": 0.16039733588695526,
      "learning_rate": 5.248578375493368e-06,
      "loss": 0.019,
      "step": 1462180
    },
    {
      "epoch": 2.392922369945602,
      "grad_norm": 0.14748874306678772,
      "learning_rate": 5.2485124832798515e-06,
      "loss": 0.0119,
      "step": 1462200
    },
    {
      "epoch": 2.3929551003842553,
      "grad_norm": 0.21456947922706604,
      "learning_rate": 5.248446591066334e-06,
      "loss": 0.0112,
      "step": 1462220
    },
    {
      "epoch": 2.392987830822909,
      "grad_norm": 0.1609627902507782,
      "learning_rate": 5.248380698852818e-06,
      "loss": 0.0197,
      "step": 1462240
    },
    {
      "epoch": 2.393020561261562,
      "grad_norm": 0.20752854645252228,
      "learning_rate": 5.2483148066393e-06,
      "loss": 0.0154,
      "step": 1462260
    },
    {
      "epoch": 2.393053291700215,
      "grad_norm": 0.21318942308425903,
      "learning_rate": 5.248248914425783e-06,
      "loss": 0.013,
      "step": 1462280
    },
    {
      "epoch": 2.393086022138869,
      "grad_norm": 0.27188462018966675,
      "learning_rate": 5.248183022212265e-06,
      "loss": 0.0153,
      "step": 1462300
    },
    {
      "epoch": 2.393118752577522,
      "grad_norm": 0.21254366636276245,
      "learning_rate": 5.248117129998749e-06,
      "loss": 0.0187,
      "step": 1462320
    },
    {
      "epoch": 2.3931514830161755,
      "grad_norm": 0.6172232627868652,
      "learning_rate": 5.248051237785231e-06,
      "loss": 0.0153,
      "step": 1462340
    },
    {
      "epoch": 2.3931842134548287,
      "grad_norm": 0.4350500702857971,
      "learning_rate": 5.247985345571714e-06,
      "loss": 0.0129,
      "step": 1462360
    },
    {
      "epoch": 2.3932169438934823,
      "grad_norm": 0.43612393736839294,
      "learning_rate": 5.247919453358197e-06,
      "loss": 0.021,
      "step": 1462380
    },
    {
      "epoch": 2.3932496743321354,
      "grad_norm": 0.14439810812473297,
      "learning_rate": 5.24785356114468e-06,
      "loss": 0.0188,
      "step": 1462400
    },
    {
      "epoch": 2.3932824047707886,
      "grad_norm": 1.0993247032165527,
      "learning_rate": 5.2477876689311625e-06,
      "loss": 0.017,
      "step": 1462420
    },
    {
      "epoch": 2.393315135209442,
      "grad_norm": 0.19540338218212128,
      "learning_rate": 5.247721776717646e-06,
      "loss": 0.018,
      "step": 1462440
    },
    {
      "epoch": 2.3933478656480953,
      "grad_norm": 0.2969612181186676,
      "learning_rate": 5.247655884504128e-06,
      "loss": 0.0169,
      "step": 1462460
    },
    {
      "epoch": 2.393380596086749,
      "grad_norm": 0.5417238473892212,
      "learning_rate": 5.2475899922906116e-06,
      "loss": 0.0116,
      "step": 1462480
    },
    {
      "epoch": 2.393413326525402,
      "grad_norm": 0.6015651226043701,
      "learning_rate": 5.2475241000770935e-06,
      "loss": 0.0171,
      "step": 1462500
    },
    {
      "epoch": 2.3934460569640557,
      "grad_norm": 0.6904825568199158,
      "learning_rate": 5.247458207863577e-06,
      "loss": 0.0159,
      "step": 1462520
    },
    {
      "epoch": 2.393478787402709,
      "grad_norm": 1.2429248094558716,
      "learning_rate": 5.247392315650061e-06,
      "loss": 0.0152,
      "step": 1462540
    },
    {
      "epoch": 2.393511517841362,
      "grad_norm": 0.3540044128894806,
      "learning_rate": 5.2473264234365425e-06,
      "loss": 0.0153,
      "step": 1462560
    },
    {
      "epoch": 2.3935442482800156,
      "grad_norm": 0.5124355554580688,
      "learning_rate": 5.247260531223026e-06,
      "loss": 0.0151,
      "step": 1462580
    },
    {
      "epoch": 2.3935769787186687,
      "grad_norm": 0.3173374831676483,
      "learning_rate": 5.247194639009509e-06,
      "loss": 0.0127,
      "step": 1462600
    },
    {
      "epoch": 2.3936097091573223,
      "grad_norm": 0.3007829189300537,
      "learning_rate": 5.247128746795992e-06,
      "loss": 0.012,
      "step": 1462620
    },
    {
      "epoch": 2.3936424395959754,
      "grad_norm": 0.6516234874725342,
      "learning_rate": 5.247062854582474e-06,
      "loss": 0.0249,
      "step": 1462640
    },
    {
      "epoch": 2.393675170034629,
      "grad_norm": 0.28533804416656494,
      "learning_rate": 5.246996962368958e-06,
      "loss": 0.0161,
      "step": 1462660
    },
    {
      "epoch": 2.393707900473282,
      "grad_norm": 0.3986736834049225,
      "learning_rate": 5.24693107015544e-06,
      "loss": 0.0152,
      "step": 1462680
    },
    {
      "epoch": 2.3937406309119353,
      "grad_norm": 0.08335026353597641,
      "learning_rate": 5.246865177941923e-06,
      "loss": 0.0133,
      "step": 1462700
    },
    {
      "epoch": 2.393773361350589,
      "grad_norm": 0.15196377038955688,
      "learning_rate": 5.246799285728405e-06,
      "loss": 0.0165,
      "step": 1462720
    },
    {
      "epoch": 2.393806091789242,
      "grad_norm": 0.21934838593006134,
      "learning_rate": 5.246733393514889e-06,
      "loss": 0.017,
      "step": 1462740
    },
    {
      "epoch": 2.3938388222278957,
      "grad_norm": 0.2835569381713867,
      "learning_rate": 5.246667501301372e-06,
      "loss": 0.0153,
      "step": 1462760
    },
    {
      "epoch": 2.393871552666549,
      "grad_norm": 0.18001709878444672,
      "learning_rate": 5.246601609087854e-06,
      "loss": 0.0177,
      "step": 1462780
    },
    {
      "epoch": 2.3939042831052024,
      "grad_norm": 0.33073440194129944,
      "learning_rate": 5.246535716874337e-06,
      "loss": 0.0116,
      "step": 1462800
    },
    {
      "epoch": 2.3939370135438556,
      "grad_norm": 0.22595511376857758,
      "learning_rate": 5.246469824660821e-06,
      "loss": 0.0212,
      "step": 1462820
    },
    {
      "epoch": 2.3939697439825087,
      "grad_norm": 0.17631985247135162,
      "learning_rate": 5.246403932447303e-06,
      "loss": 0.0139,
      "step": 1462840
    },
    {
      "epoch": 2.3940024744211623,
      "grad_norm": 0.18517017364501953,
      "learning_rate": 5.246338040233786e-06,
      "loss": 0.0183,
      "step": 1462860
    },
    {
      "epoch": 2.3940352048598155,
      "grad_norm": 0.2021917849779129,
      "learning_rate": 5.24627214802027e-06,
      "loss": 0.011,
      "step": 1462880
    },
    {
      "epoch": 2.394067935298469,
      "grad_norm": 0.3517676889896393,
      "learning_rate": 5.246206255806752e-06,
      "loss": 0.0152,
      "step": 1462900
    },
    {
      "epoch": 2.394100665737122,
      "grad_norm": 0.1937163770198822,
      "learning_rate": 5.246140363593235e-06,
      "loss": 0.0172,
      "step": 1462920
    },
    {
      "epoch": 2.3941333961757754,
      "grad_norm": 0.4766576588153839,
      "learning_rate": 5.246074471379717e-06,
      "loss": 0.015,
      "step": 1462940
    },
    {
      "epoch": 2.394166126614429,
      "grad_norm": 0.26373839378356934,
      "learning_rate": 5.246008579166201e-06,
      "loss": 0.0147,
      "step": 1462960
    },
    {
      "epoch": 2.394198857053082,
      "grad_norm": 0.21492721140384674,
      "learning_rate": 5.2459426869526835e-06,
      "loss": 0.0152,
      "step": 1462980
    },
    {
      "epoch": 2.3942315874917357,
      "grad_norm": 0.3870103359222412,
      "learning_rate": 5.245876794739166e-06,
      "loss": 0.0149,
      "step": 1463000
    },
    {
      "epoch": 2.394264317930389,
      "grad_norm": 1.0082858800888062,
      "learning_rate": 5.245810902525649e-06,
      "loss": 0.0205,
      "step": 1463020
    },
    {
      "epoch": 2.394297048369042,
      "grad_norm": 1.1536376476287842,
      "learning_rate": 5.2457450103121325e-06,
      "loss": 0.0159,
      "step": 1463040
    },
    {
      "epoch": 2.3943297788076956,
      "grad_norm": 0.2658398151397705,
      "learning_rate": 5.245679118098614e-06,
      "loss": 0.0105,
      "step": 1463060
    },
    {
      "epoch": 2.3943625092463487,
      "grad_norm": 0.13688458502292633,
      "learning_rate": 5.245613225885098e-06,
      "loss": 0.0147,
      "step": 1463080
    },
    {
      "epoch": 2.3943952396850023,
      "grad_norm": 2.501371383666992,
      "learning_rate": 5.24554733367158e-06,
      "loss": 0.0153,
      "step": 1463100
    },
    {
      "epoch": 2.3944279701236555,
      "grad_norm": 0.36143946647644043,
      "learning_rate": 5.2454814414580635e-06,
      "loss": 0.0103,
      "step": 1463120
    },
    {
      "epoch": 2.394460700562309,
      "grad_norm": 0.285200297832489,
      "learning_rate": 5.245415549244545e-06,
      "loss": 0.013,
      "step": 1463140
    },
    {
      "epoch": 2.3944934310009622,
      "grad_norm": 0.2391945719718933,
      "learning_rate": 5.245349657031029e-06,
      "loss": 0.0107,
      "step": 1463160
    },
    {
      "epoch": 2.3945261614396154,
      "grad_norm": 0.3743247091770172,
      "learning_rate": 5.245283764817512e-06,
      "loss": 0.0218,
      "step": 1463180
    },
    {
      "epoch": 2.394558891878269,
      "grad_norm": 0.3004973530769348,
      "learning_rate": 5.2452178726039944e-06,
      "loss": 0.0107,
      "step": 1463200
    },
    {
      "epoch": 2.394591622316922,
      "grad_norm": 0.31923675537109375,
      "learning_rate": 5.245151980390478e-06,
      "loss": 0.0145,
      "step": 1463220
    },
    {
      "epoch": 2.3946243527555757,
      "grad_norm": 0.5374482870101929,
      "learning_rate": 5.245086088176961e-06,
      "loss": 0.013,
      "step": 1463240
    },
    {
      "epoch": 2.394657083194229,
      "grad_norm": 0.11310475319623947,
      "learning_rate": 5.2450201959634435e-06,
      "loss": 0.017,
      "step": 1463260
    },
    {
      "epoch": 2.3946898136328825,
      "grad_norm": 0.5764274001121521,
      "learning_rate": 5.244954303749926e-06,
      "loss": 0.0121,
      "step": 1463280
    },
    {
      "epoch": 2.3947225440715356,
      "grad_norm": 0.40317150950431824,
      "learning_rate": 5.24488841153641e-06,
      "loss": 0.0119,
      "step": 1463300
    },
    {
      "epoch": 2.3947552745101888,
      "grad_norm": 0.3293702006340027,
      "learning_rate": 5.244822519322892e-06,
      "loss": 0.0147,
      "step": 1463320
    },
    {
      "epoch": 2.3947880049488424,
      "grad_norm": 0.4404662549495697,
      "learning_rate": 5.244756627109375e-06,
      "loss": 0.0135,
      "step": 1463340
    },
    {
      "epoch": 2.3948207353874955,
      "grad_norm": 0.5610771179199219,
      "learning_rate": 5.244690734895857e-06,
      "loss": 0.0124,
      "step": 1463360
    },
    {
      "epoch": 2.394853465826149,
      "grad_norm": 0.4123147428035736,
      "learning_rate": 5.244624842682341e-06,
      "loss": 0.019,
      "step": 1463380
    },
    {
      "epoch": 2.3948861962648023,
      "grad_norm": 0.18525415658950806,
      "learning_rate": 5.2445589504688235e-06,
      "loss": 0.0188,
      "step": 1463400
    },
    {
      "epoch": 2.394918926703456,
      "grad_norm": 0.26342740654945374,
      "learning_rate": 5.244493058255306e-06,
      "loss": 0.0122,
      "step": 1463420
    },
    {
      "epoch": 2.394951657142109,
      "grad_norm": 0.2229640632867813,
      "learning_rate": 5.244427166041789e-06,
      "loss": 0.0185,
      "step": 1463440
    },
    {
      "epoch": 2.394984387580762,
      "grad_norm": 0.27561047673225403,
      "learning_rate": 5.244361273828273e-06,
      "loss": 0.0109,
      "step": 1463460
    },
    {
      "epoch": 2.3950171180194157,
      "grad_norm": 0.6583630442619324,
      "learning_rate": 5.2442953816147545e-06,
      "loss": 0.0198,
      "step": 1463480
    },
    {
      "epoch": 2.395049848458069,
      "grad_norm": 0.21022799611091614,
      "learning_rate": 5.244229489401238e-06,
      "loss": 0.0123,
      "step": 1463500
    },
    {
      "epoch": 2.3950825788967225,
      "grad_norm": 0.7528215646743774,
      "learning_rate": 5.24416359718772e-06,
      "loss": 0.0237,
      "step": 1463520
    },
    {
      "epoch": 2.3951153093353756,
      "grad_norm": 0.19355690479278564,
      "learning_rate": 5.2440977049742036e-06,
      "loss": 0.0114,
      "step": 1463540
    },
    {
      "epoch": 2.3951480397740292,
      "grad_norm": 0.1368296593427658,
      "learning_rate": 5.244031812760686e-06,
      "loss": 0.0159,
      "step": 1463560
    },
    {
      "epoch": 2.3951807702126824,
      "grad_norm": 0.22929862141609192,
      "learning_rate": 5.243965920547169e-06,
      "loss": 0.0136,
      "step": 1463580
    },
    {
      "epoch": 2.3952135006513355,
      "grad_norm": 0.1834801584482193,
      "learning_rate": 5.243900028333653e-06,
      "loss": 0.0146,
      "step": 1463600
    },
    {
      "epoch": 2.395246231089989,
      "grad_norm": 0.3446124494075775,
      "learning_rate": 5.243834136120135e-06,
      "loss": 0.0147,
      "step": 1463620
    },
    {
      "epoch": 2.3952789615286423,
      "grad_norm": 0.6901929974555969,
      "learning_rate": 5.243768243906618e-06,
      "loss": 0.0168,
      "step": 1463640
    },
    {
      "epoch": 2.395311691967296,
      "grad_norm": 0.45992937684059143,
      "learning_rate": 5.243702351693101e-06,
      "loss": 0.0129,
      "step": 1463660
    },
    {
      "epoch": 2.395344422405949,
      "grad_norm": 0.16092845797538757,
      "learning_rate": 5.2436364594795844e-06,
      "loss": 0.0108,
      "step": 1463680
    },
    {
      "epoch": 2.3953771528446026,
      "grad_norm": 0.39502906799316406,
      "learning_rate": 5.243570567266066e-06,
      "loss": 0.0155,
      "step": 1463700
    },
    {
      "epoch": 2.3954098832832558,
      "grad_norm": 0.2275933027267456,
      "learning_rate": 5.24350467505255e-06,
      "loss": 0.0121,
      "step": 1463720
    },
    {
      "epoch": 2.395442613721909,
      "grad_norm": 0.46081724762916565,
      "learning_rate": 5.243438782839032e-06,
      "loss": 0.0137,
      "step": 1463740
    },
    {
      "epoch": 2.3954753441605625,
      "grad_norm": 0.3751102387905121,
      "learning_rate": 5.243372890625515e-06,
      "loss": 0.0138,
      "step": 1463760
    },
    {
      "epoch": 2.3955080745992157,
      "grad_norm": 0.3239804804325104,
      "learning_rate": 5.243306998411998e-06,
      "loss": 0.0176,
      "step": 1463780
    },
    {
      "epoch": 2.3955408050378693,
      "grad_norm": 0.08048895001411438,
      "learning_rate": 5.243241106198481e-06,
      "loss": 0.0123,
      "step": 1463800
    },
    {
      "epoch": 2.3955735354765224,
      "grad_norm": 0.49913591146469116,
      "learning_rate": 5.243175213984964e-06,
      "loss": 0.0126,
      "step": 1463820
    },
    {
      "epoch": 2.395606265915176,
      "grad_norm": 0.2701573371887207,
      "learning_rate": 5.243109321771447e-06,
      "loss": 0.015,
      "step": 1463840
    },
    {
      "epoch": 2.395638996353829,
      "grad_norm": 0.49686750769615173,
      "learning_rate": 5.243043429557929e-06,
      "loss": 0.0149,
      "step": 1463860
    },
    {
      "epoch": 2.3956717267924823,
      "grad_norm": 0.15154540538787842,
      "learning_rate": 5.242977537344413e-06,
      "loss": 0.0154,
      "step": 1463880
    },
    {
      "epoch": 2.395704457231136,
      "grad_norm": 0.5039528608322144,
      "learning_rate": 5.2429116451308946e-06,
      "loss": 0.0112,
      "step": 1463900
    },
    {
      "epoch": 2.395737187669789,
      "grad_norm": 0.5172147154808044,
      "learning_rate": 5.242845752917378e-06,
      "loss": 0.0165,
      "step": 1463920
    },
    {
      "epoch": 2.3957699181084426,
      "grad_norm": 0.1880720555782318,
      "learning_rate": 5.242779860703862e-06,
      "loss": 0.0213,
      "step": 1463940
    },
    {
      "epoch": 2.395802648547096,
      "grad_norm": 0.8962998390197754,
      "learning_rate": 5.242713968490344e-06,
      "loss": 0.018,
      "step": 1463960
    },
    {
      "epoch": 2.3958353789857494,
      "grad_norm": 0.5991077423095703,
      "learning_rate": 5.242648076276827e-06,
      "loss": 0.0162,
      "step": 1463980
    },
    {
      "epoch": 2.3958681094244025,
      "grad_norm": 0.33845648169517517,
      "learning_rate": 5.24258218406331e-06,
      "loss": 0.0164,
      "step": 1464000
    },
    {
      "epoch": 2.3959008398630557,
      "grad_norm": 0.16344468295574188,
      "learning_rate": 5.242516291849793e-06,
      "loss": 0.0122,
      "step": 1464020
    },
    {
      "epoch": 2.3959335703017093,
      "grad_norm": 0.3850771486759186,
      "learning_rate": 5.2424503996362754e-06,
      "loss": 0.0143,
      "step": 1464040
    },
    {
      "epoch": 2.3959663007403624,
      "grad_norm": 0.13522537052631378,
      "learning_rate": 5.242384507422759e-06,
      "loss": 0.0106,
      "step": 1464060
    },
    {
      "epoch": 2.395999031179016,
      "grad_norm": 0.3435112237930298,
      "learning_rate": 5.242318615209241e-06,
      "loss": 0.0127,
      "step": 1464080
    },
    {
      "epoch": 2.396031761617669,
      "grad_norm": 0.4186015725135803,
      "learning_rate": 5.2422527229957245e-06,
      "loss": 0.014,
      "step": 1464100
    },
    {
      "epoch": 2.3960644920563228,
      "grad_norm": 0.1881498545408249,
      "learning_rate": 5.242186830782206e-06,
      "loss": 0.0136,
      "step": 1464120
    },
    {
      "epoch": 2.396097222494976,
      "grad_norm": 0.7587962746620178,
      "learning_rate": 5.24212093856869e-06,
      "loss": 0.023,
      "step": 1464140
    },
    {
      "epoch": 2.396129952933629,
      "grad_norm": 1.32322096824646,
      "learning_rate": 5.242055046355172e-06,
      "loss": 0.0114,
      "step": 1464160
    },
    {
      "epoch": 2.3961626833722827,
      "grad_norm": 0.8311532139778137,
      "learning_rate": 5.2419891541416555e-06,
      "loss": 0.0162,
      "step": 1464180
    },
    {
      "epoch": 2.396195413810936,
      "grad_norm": 0.44488775730133057,
      "learning_rate": 5.241923261928138e-06,
      "loss": 0.0156,
      "step": 1464200
    },
    {
      "epoch": 2.3962281442495894,
      "grad_norm": 0.24387812614440918,
      "learning_rate": 5.241857369714621e-06,
      "loss": 0.0146,
      "step": 1464220
    },
    {
      "epoch": 2.3962608746882426,
      "grad_norm": 0.2512838840484619,
      "learning_rate": 5.241791477501104e-06,
      "loss": 0.014,
      "step": 1464240
    },
    {
      "epoch": 2.396293605126896,
      "grad_norm": 0.49128079414367676,
      "learning_rate": 5.241725585287587e-06,
      "loss": 0.0125,
      "step": 1464260
    },
    {
      "epoch": 2.3963263355655493,
      "grad_norm": 0.4132695198059082,
      "learning_rate": 5.24165969307407e-06,
      "loss": 0.0134,
      "step": 1464280
    },
    {
      "epoch": 2.3963590660042025,
      "grad_norm": 0.086850106716156,
      "learning_rate": 5.241593800860553e-06,
      "loss": 0.0151,
      "step": 1464300
    },
    {
      "epoch": 2.396391796442856,
      "grad_norm": 0.4442041516304016,
      "learning_rate": 5.241527908647036e-06,
      "loss": 0.011,
      "step": 1464320
    },
    {
      "epoch": 2.396424526881509,
      "grad_norm": 0.0487082302570343,
      "learning_rate": 5.241462016433518e-06,
      "loss": 0.0207,
      "step": 1464340
    },
    {
      "epoch": 2.396457257320163,
      "grad_norm": 0.7514695525169373,
      "learning_rate": 5.241396124220002e-06,
      "loss": 0.0161,
      "step": 1464360
    },
    {
      "epoch": 2.396489987758816,
      "grad_norm": 0.2017631232738495,
      "learning_rate": 5.241330232006484e-06,
      "loss": 0.0121,
      "step": 1464380
    },
    {
      "epoch": 2.396522718197469,
      "grad_norm": 0.07677307724952698,
      "learning_rate": 5.241264339792967e-06,
      "loss": 0.0142,
      "step": 1464400
    },
    {
      "epoch": 2.3965554486361227,
      "grad_norm": 0.5956438183784485,
      "learning_rate": 5.24119844757945e-06,
      "loss": 0.0143,
      "step": 1464420
    },
    {
      "epoch": 2.396588179074776,
      "grad_norm": 0.5770082473754883,
      "learning_rate": 5.241132555365933e-06,
      "loss": 0.0184,
      "step": 1464440
    },
    {
      "epoch": 2.3966209095134294,
      "grad_norm": 0.4411833882331848,
      "learning_rate": 5.2410666631524155e-06,
      "loss": 0.0174,
      "step": 1464460
    },
    {
      "epoch": 2.3966536399520826,
      "grad_norm": 0.1853257566690445,
      "learning_rate": 5.241000770938899e-06,
      "loss": 0.0117,
      "step": 1464480
    },
    {
      "epoch": 2.3966863703907357,
      "grad_norm": 0.5367459058761597,
      "learning_rate": 5.240934878725381e-06,
      "loss": 0.0159,
      "step": 1464500
    },
    {
      "epoch": 2.3967191008293893,
      "grad_norm": 0.8149020075798035,
      "learning_rate": 5.2408689865118646e-06,
      "loss": 0.0166,
      "step": 1464520
    },
    {
      "epoch": 2.3967518312680425,
      "grad_norm": 0.2073124796152115,
      "learning_rate": 5.2408030942983465e-06,
      "loss": 0.0152,
      "step": 1464540
    },
    {
      "epoch": 2.396784561706696,
      "grad_norm": 0.3693390488624573,
      "learning_rate": 5.24073720208483e-06,
      "loss": 0.0113,
      "step": 1464560
    },
    {
      "epoch": 2.396817292145349,
      "grad_norm": 0.28104105591773987,
      "learning_rate": 5.240671309871313e-06,
      "loss": 0.0126,
      "step": 1464580
    },
    {
      "epoch": 2.396850022584003,
      "grad_norm": 0.23768775165081024,
      "learning_rate": 5.2406054176577955e-06,
      "loss": 0.0175,
      "step": 1464600
    },
    {
      "epoch": 2.396882753022656,
      "grad_norm": 0.8923357725143433,
      "learning_rate": 5.240539525444278e-06,
      "loss": 0.0188,
      "step": 1464620
    },
    {
      "epoch": 2.396915483461309,
      "grad_norm": 0.6398255228996277,
      "learning_rate": 5.240473633230762e-06,
      "loss": 0.0174,
      "step": 1464640
    },
    {
      "epoch": 2.3969482138999627,
      "grad_norm": 0.12245869636535645,
      "learning_rate": 5.240407741017245e-06,
      "loss": 0.0124,
      "step": 1464660
    },
    {
      "epoch": 2.396980944338616,
      "grad_norm": 0.1584223210811615,
      "learning_rate": 5.240341848803727e-06,
      "loss": 0.0188,
      "step": 1464680
    },
    {
      "epoch": 2.3970136747772695,
      "grad_norm": 0.29206663370132446,
      "learning_rate": 5.240275956590211e-06,
      "loss": 0.0136,
      "step": 1464700
    },
    {
      "epoch": 2.3970464052159226,
      "grad_norm": 0.2632267475128174,
      "learning_rate": 5.240210064376693e-06,
      "loss": 0.0143,
      "step": 1464720
    },
    {
      "epoch": 2.397079135654576,
      "grad_norm": 0.169300377368927,
      "learning_rate": 5.240144172163176e-06,
      "loss": 0.0159,
      "step": 1464740
    },
    {
      "epoch": 2.3971118660932293,
      "grad_norm": 0.6324715614318848,
      "learning_rate": 5.240078279949658e-06,
      "loss": 0.0145,
      "step": 1464760
    },
    {
      "epoch": 2.3971445965318825,
      "grad_norm": 1.1341381072998047,
      "learning_rate": 5.240012387736142e-06,
      "loss": 0.0137,
      "step": 1464780
    },
    {
      "epoch": 2.397177326970536,
      "grad_norm": 0.47148630023002625,
      "learning_rate": 5.239946495522625e-06,
      "loss": 0.0102,
      "step": 1464800
    },
    {
      "epoch": 2.3972100574091892,
      "grad_norm": 0.2105342000722885,
      "learning_rate": 5.239880603309107e-06,
      "loss": 0.0233,
      "step": 1464820
    },
    {
      "epoch": 2.397242787847843,
      "grad_norm": 1.3334267139434814,
      "learning_rate": 5.23981471109559e-06,
      "loss": 0.019,
      "step": 1464840
    },
    {
      "epoch": 2.397275518286496,
      "grad_norm": 0.2563523054122925,
      "learning_rate": 5.239748818882074e-06,
      "loss": 0.0178,
      "step": 1464860
    },
    {
      "epoch": 2.3973082487251496,
      "grad_norm": 0.8983813524246216,
      "learning_rate": 5.239682926668556e-06,
      "loss": 0.0104,
      "step": 1464880
    },
    {
      "epoch": 2.3973409791638027,
      "grad_norm": 0.06777774542570114,
      "learning_rate": 5.239617034455039e-06,
      "loss": 0.0109,
      "step": 1464900
    },
    {
      "epoch": 2.397373709602456,
      "grad_norm": 0.12459418177604675,
      "learning_rate": 5.239551142241521e-06,
      "loss": 0.0118,
      "step": 1464920
    },
    {
      "epoch": 2.3974064400411095,
      "grad_norm": 0.2868037819862366,
      "learning_rate": 5.239485250028005e-06,
      "loss": 0.0142,
      "step": 1464940
    },
    {
      "epoch": 2.3974391704797626,
      "grad_norm": 0.5757042765617371,
      "learning_rate": 5.2394193578144865e-06,
      "loss": 0.0162,
      "step": 1464960
    },
    {
      "epoch": 2.397471900918416,
      "grad_norm": 0.20160479843616486,
      "learning_rate": 5.23935346560097e-06,
      "loss": 0.011,
      "step": 1464980
    },
    {
      "epoch": 2.3975046313570694,
      "grad_norm": 1.545045018196106,
      "learning_rate": 5.239287573387454e-06,
      "loss": 0.0165,
      "step": 1465000
    },
    {
      "epoch": 2.397537361795723,
      "grad_norm": 1.6827815771102905,
      "learning_rate": 5.2392216811739365e-06,
      "loss": 0.0172,
      "step": 1465020
    },
    {
      "epoch": 2.397570092234376,
      "grad_norm": 0.40075400471687317,
      "learning_rate": 5.239155788960419e-06,
      "loss": 0.0137,
      "step": 1465040
    },
    {
      "epoch": 2.3976028226730293,
      "grad_norm": 0.276292622089386,
      "learning_rate": 5.239089896746902e-06,
      "loss": 0.0156,
      "step": 1465060
    },
    {
      "epoch": 2.397635553111683,
      "grad_norm": 1.7918390035629272,
      "learning_rate": 5.2390240045333855e-06,
      "loss": 0.0171,
      "step": 1465080
    },
    {
      "epoch": 2.397668283550336,
      "grad_norm": 0.15261873602867126,
      "learning_rate": 5.238958112319867e-06,
      "loss": 0.0112,
      "step": 1465100
    },
    {
      "epoch": 2.3977010139889896,
      "grad_norm": 0.31744423508644104,
      "learning_rate": 5.238892220106351e-06,
      "loss": 0.0139,
      "step": 1465120
    },
    {
      "epoch": 2.3977337444276428,
      "grad_norm": 0.6512118577957153,
      "learning_rate": 5.238826327892833e-06,
      "loss": 0.0197,
      "step": 1465140
    },
    {
      "epoch": 2.3977664748662963,
      "grad_norm": 0.23265184462070465,
      "learning_rate": 5.2387604356793165e-06,
      "loss": 0.0136,
      "step": 1465160
    },
    {
      "epoch": 2.3977992053049495,
      "grad_norm": 0.21958193182945251,
      "learning_rate": 5.238694543465798e-06,
      "loss": 0.0175,
      "step": 1465180
    },
    {
      "epoch": 2.3978319357436026,
      "grad_norm": 2.1975417137145996,
      "learning_rate": 5.238628651252282e-06,
      "loss": 0.0201,
      "step": 1465200
    },
    {
      "epoch": 2.3978646661822562,
      "grad_norm": 0.49166339635849,
      "learning_rate": 5.238562759038765e-06,
      "loss": 0.0138,
      "step": 1465220
    },
    {
      "epoch": 2.3978973966209094,
      "grad_norm": 1.068679928779602,
      "learning_rate": 5.2384968668252474e-06,
      "loss": 0.0104,
      "step": 1465240
    },
    {
      "epoch": 2.397930127059563,
      "grad_norm": 0.18067528307437897,
      "learning_rate": 5.23843097461173e-06,
      "loss": 0.0106,
      "step": 1465260
    },
    {
      "epoch": 2.397962857498216,
      "grad_norm": 0.4484993815422058,
      "learning_rate": 5.238365082398214e-06,
      "loss": 0.0151,
      "step": 1465280
    },
    {
      "epoch": 2.3979955879368697,
      "grad_norm": 0.4333643317222595,
      "learning_rate": 5.238299190184696e-06,
      "loss": 0.015,
      "step": 1465300
    },
    {
      "epoch": 2.398028318375523,
      "grad_norm": 0.4156227111816406,
      "learning_rate": 5.238233297971179e-06,
      "loss": 0.0174,
      "step": 1465320
    },
    {
      "epoch": 2.398061048814176,
      "grad_norm": 0.2109854370355606,
      "learning_rate": 5.238167405757663e-06,
      "loss": 0.0145,
      "step": 1465340
    },
    {
      "epoch": 2.3980937792528296,
      "grad_norm": 0.6917281746864319,
      "learning_rate": 5.238101513544145e-06,
      "loss": 0.0143,
      "step": 1465360
    },
    {
      "epoch": 2.3981265096914828,
      "grad_norm": 0.21710827946662903,
      "learning_rate": 5.238035621330628e-06,
      "loss": 0.0135,
      "step": 1465380
    },
    {
      "epoch": 2.3981592401301364,
      "grad_norm": 0.18359488248825073,
      "learning_rate": 5.23796972911711e-06,
      "loss": 0.018,
      "step": 1465400
    },
    {
      "epoch": 2.3981919705687895,
      "grad_norm": 0.10218202322721481,
      "learning_rate": 5.237903836903594e-06,
      "loss": 0.0139,
      "step": 1465420
    },
    {
      "epoch": 2.398224701007443,
      "grad_norm": 0.4900161921977997,
      "learning_rate": 5.2378379446900765e-06,
      "loss": 0.0167,
      "step": 1465440
    },
    {
      "epoch": 2.3982574314460963,
      "grad_norm": 0.2094430923461914,
      "learning_rate": 5.237772052476559e-06,
      "loss": 0.0244,
      "step": 1465460
    },
    {
      "epoch": 2.3982901618847494,
      "grad_norm": 1.832238793373108,
      "learning_rate": 5.237706160263042e-06,
      "loss": 0.0101,
      "step": 1465480
    },
    {
      "epoch": 2.398322892323403,
      "grad_norm": 0.20974743366241455,
      "learning_rate": 5.237640268049526e-06,
      "loss": 0.0196,
      "step": 1465500
    },
    {
      "epoch": 2.398355622762056,
      "grad_norm": 0.27215954661369324,
      "learning_rate": 5.2375743758360075e-06,
      "loss": 0.0196,
      "step": 1465520
    },
    {
      "epoch": 2.3983883532007098,
      "grad_norm": 0.14574529230594635,
      "learning_rate": 5.237508483622491e-06,
      "loss": 0.0152,
      "step": 1465540
    },
    {
      "epoch": 2.398421083639363,
      "grad_norm": 0.35957226157188416,
      "learning_rate": 5.237442591408973e-06,
      "loss": 0.018,
      "step": 1465560
    },
    {
      "epoch": 2.3984538140780165,
      "grad_norm": 0.2244419902563095,
      "learning_rate": 5.2373766991954566e-06,
      "loss": 0.0162,
      "step": 1465580
    },
    {
      "epoch": 2.3984865445166696,
      "grad_norm": 0.5546859502792358,
      "learning_rate": 5.237310806981939e-06,
      "loss": 0.0192,
      "step": 1465600
    },
    {
      "epoch": 2.398519274955323,
      "grad_norm": 0.12527760863304138,
      "learning_rate": 5.237244914768422e-06,
      "loss": 0.017,
      "step": 1465620
    },
    {
      "epoch": 2.3985520053939764,
      "grad_norm": 0.40650179982185364,
      "learning_rate": 5.237179022554905e-06,
      "loss": 0.0119,
      "step": 1465640
    },
    {
      "epoch": 2.3985847358326295,
      "grad_norm": 0.6997759938240051,
      "learning_rate": 5.237113130341388e-06,
      "loss": 0.0127,
      "step": 1465660
    },
    {
      "epoch": 2.398617466271283,
      "grad_norm": 0.17329178750514984,
      "learning_rate": 5.23704723812787e-06,
      "loss": 0.0119,
      "step": 1465680
    },
    {
      "epoch": 2.3986501967099363,
      "grad_norm": 0.38129255175590515,
      "learning_rate": 5.236981345914354e-06,
      "loss": 0.0149,
      "step": 1465700
    },
    {
      "epoch": 2.39868292714859,
      "grad_norm": 0.5056729912757874,
      "learning_rate": 5.2369154537008374e-06,
      "loss": 0.0148,
      "step": 1465720
    },
    {
      "epoch": 2.398715657587243,
      "grad_norm": 0.2788570523262024,
      "learning_rate": 5.236849561487319e-06,
      "loss": 0.0132,
      "step": 1465740
    },
    {
      "epoch": 2.398748388025896,
      "grad_norm": 0.41809749603271484,
      "learning_rate": 5.236783669273803e-06,
      "loss": 0.0117,
      "step": 1465760
    },
    {
      "epoch": 2.3987811184645498,
      "grad_norm": 0.5140256285667419,
      "learning_rate": 5.236717777060285e-06,
      "loss": 0.0136,
      "step": 1465780
    },
    {
      "epoch": 2.398813848903203,
      "grad_norm": 1.172048568725586,
      "learning_rate": 5.236651884846768e-06,
      "loss": 0.017,
      "step": 1465800
    },
    {
      "epoch": 2.3988465793418565,
      "grad_norm": 0.6822720766067505,
      "learning_rate": 5.236585992633251e-06,
      "loss": 0.014,
      "step": 1465820
    },
    {
      "epoch": 2.3988793097805097,
      "grad_norm": 0.17964622378349304,
      "learning_rate": 5.236520100419734e-06,
      "loss": 0.0168,
      "step": 1465840
    },
    {
      "epoch": 2.3989120402191633,
      "grad_norm": 0.3387486934661865,
      "learning_rate": 5.236454208206217e-06,
      "loss": 0.0151,
      "step": 1465860
    },
    {
      "epoch": 2.3989447706578164,
      "grad_norm": 0.08590586483478546,
      "learning_rate": 5.2363883159927e-06,
      "loss": 0.018,
      "step": 1465880
    },
    {
      "epoch": 2.3989775010964696,
      "grad_norm": 0.23059500753879547,
      "learning_rate": 5.236322423779182e-06,
      "loss": 0.0138,
      "step": 1465900
    },
    {
      "epoch": 2.399010231535123,
      "grad_norm": 0.6845729351043701,
      "learning_rate": 5.236256531565666e-06,
      "loss": 0.0166,
      "step": 1465920
    },
    {
      "epoch": 2.3990429619737763,
      "grad_norm": 0.45324286818504333,
      "learning_rate": 5.2361906393521476e-06,
      "loss": 0.0172,
      "step": 1465940
    },
    {
      "epoch": 2.39907569241243,
      "grad_norm": 0.20947526395320892,
      "learning_rate": 5.236124747138631e-06,
      "loss": 0.0127,
      "step": 1465960
    },
    {
      "epoch": 2.399108422851083,
      "grad_norm": 0.6450087428092957,
      "learning_rate": 5.236058854925113e-06,
      "loss": 0.0154,
      "step": 1465980
    },
    {
      "epoch": 2.399141153289736,
      "grad_norm": 0.10108041763305664,
      "learning_rate": 5.235992962711597e-06,
      "loss": 0.0156,
      "step": 1466000
    },
    {
      "epoch": 2.39917388372839,
      "grad_norm": 0.3685062825679779,
      "learning_rate": 5.235927070498079e-06,
      "loss": 0.0121,
      "step": 1466020
    },
    {
      "epoch": 2.399206614167043,
      "grad_norm": 0.7748943567276001,
      "learning_rate": 5.235861178284562e-06,
      "loss": 0.0129,
      "step": 1466040
    },
    {
      "epoch": 2.3992393446056965,
      "grad_norm": 1.004920244216919,
      "learning_rate": 5.235795286071046e-06,
      "loss": 0.016,
      "step": 1466060
    },
    {
      "epoch": 2.3992720750443497,
      "grad_norm": 0.5604586005210876,
      "learning_rate": 5.2357293938575284e-06,
      "loss": 0.0176,
      "step": 1466080
    },
    {
      "epoch": 2.399304805483003,
      "grad_norm": 0.07727561146020889,
      "learning_rate": 5.235663501644011e-06,
      "loss": 0.0158,
      "step": 1466100
    },
    {
      "epoch": 2.3993375359216564,
      "grad_norm": 0.14097018539905548,
      "learning_rate": 5.235597609430494e-06,
      "loss": 0.0204,
      "step": 1466120
    },
    {
      "epoch": 2.3993702663603096,
      "grad_norm": 0.35285213589668274,
      "learning_rate": 5.2355317172169775e-06,
      "loss": 0.016,
      "step": 1466140
    },
    {
      "epoch": 2.399402996798963,
      "grad_norm": 0.5506317615509033,
      "learning_rate": 5.235465825003459e-06,
      "loss": 0.013,
      "step": 1466160
    },
    {
      "epoch": 2.3994357272376163,
      "grad_norm": 0.21766377985477448,
      "learning_rate": 5.235399932789943e-06,
      "loss": 0.0152,
      "step": 1466180
    },
    {
      "epoch": 2.39946845767627,
      "grad_norm": 0.4313058853149414,
      "learning_rate": 5.235334040576425e-06,
      "loss": 0.015,
      "step": 1466200
    },
    {
      "epoch": 2.399501188114923,
      "grad_norm": 0.4234256446361542,
      "learning_rate": 5.2352681483629085e-06,
      "loss": 0.0151,
      "step": 1466220
    },
    {
      "epoch": 2.3995339185535762,
      "grad_norm": 0.43548583984375,
      "learning_rate": 5.235202256149391e-06,
      "loss": 0.0132,
      "step": 1466240
    },
    {
      "epoch": 2.39956664899223,
      "grad_norm": 0.04817953333258629,
      "learning_rate": 5.235136363935874e-06,
      "loss": 0.0142,
      "step": 1466260
    },
    {
      "epoch": 2.399599379430883,
      "grad_norm": 0.5158950090408325,
      "learning_rate": 5.235070471722357e-06,
      "loss": 0.0121,
      "step": 1466280
    },
    {
      "epoch": 2.3996321098695366,
      "grad_norm": 0.22634348273277283,
      "learning_rate": 5.23500457950884e-06,
      "loss": 0.0222,
      "step": 1466300
    },
    {
      "epoch": 2.3996648403081897,
      "grad_norm": 0.18726204335689545,
      "learning_rate": 5.234938687295322e-06,
      "loss": 0.0121,
      "step": 1466320
    },
    {
      "epoch": 2.3996975707468433,
      "grad_norm": 0.34617161750793457,
      "learning_rate": 5.234872795081806e-06,
      "loss": 0.016,
      "step": 1466340
    },
    {
      "epoch": 2.3997303011854965,
      "grad_norm": 0.1876392513513565,
      "learning_rate": 5.234806902868288e-06,
      "loss": 0.0178,
      "step": 1466360
    },
    {
      "epoch": 2.3997630316241496,
      "grad_norm": 0.1598123461008072,
      "learning_rate": 5.234741010654771e-06,
      "loss": 0.0103,
      "step": 1466380
    },
    {
      "epoch": 2.399795762062803,
      "grad_norm": 0.8261063694953918,
      "learning_rate": 5.234675118441255e-06,
      "loss": 0.016,
      "step": 1466400
    },
    {
      "epoch": 2.3998284925014564,
      "grad_norm": 0.46645060181617737,
      "learning_rate": 5.234609226227737e-06,
      "loss": 0.0152,
      "step": 1466420
    },
    {
      "epoch": 2.39986122294011,
      "grad_norm": 0.5906506776809692,
      "learning_rate": 5.23454333401422e-06,
      "loss": 0.0158,
      "step": 1466440
    },
    {
      "epoch": 2.399893953378763,
      "grad_norm": 0.2648477852344513,
      "learning_rate": 5.234477441800703e-06,
      "loss": 0.0135,
      "step": 1466460
    },
    {
      "epoch": 2.3999266838174167,
      "grad_norm": 0.34652218222618103,
      "learning_rate": 5.234411549587186e-06,
      "loss": 0.0119,
      "step": 1466480
    },
    {
      "epoch": 2.39995941425607,
      "grad_norm": 0.15837816894054413,
      "learning_rate": 5.2343456573736685e-06,
      "loss": 0.0148,
      "step": 1466500
    },
    {
      "epoch": 2.399992144694723,
      "grad_norm": 0.4641561210155487,
      "learning_rate": 5.234279765160152e-06,
      "loss": 0.0189,
      "step": 1466520
    },
    {
      "epoch": 2.4000248751333766,
      "grad_norm": 0.38365638256073,
      "learning_rate": 5.234213872946634e-06,
      "loss": 0.0117,
      "step": 1466540
    },
    {
      "epoch": 2.4000576055720297,
      "grad_norm": 0.6697286367416382,
      "learning_rate": 5.234147980733118e-06,
      "loss": 0.0137,
      "step": 1466560
    },
    {
      "epoch": 2.4000903360106833,
      "grad_norm": 1.0401620864868164,
      "learning_rate": 5.2340820885195995e-06,
      "loss": 0.0106,
      "step": 1466580
    },
    {
      "epoch": 2.4001230664493365,
      "grad_norm": 0.0999448373913765,
      "learning_rate": 5.234016196306083e-06,
      "loss": 0.01,
      "step": 1466600
    },
    {
      "epoch": 2.40015579688799,
      "grad_norm": 1.8510159254074097,
      "learning_rate": 5.233950304092566e-06,
      "loss": 0.0134,
      "step": 1466620
    },
    {
      "epoch": 2.4001885273266432,
      "grad_norm": 0.15928056836128235,
      "learning_rate": 5.2338844118790485e-06,
      "loss": 0.0244,
      "step": 1466640
    },
    {
      "epoch": 2.4002212577652964,
      "grad_norm": 0.8474109172821045,
      "learning_rate": 5.233818519665531e-06,
      "loss": 0.0145,
      "step": 1466660
    },
    {
      "epoch": 2.40025398820395,
      "grad_norm": 0.48054617643356323,
      "learning_rate": 5.233752627452015e-06,
      "loss": 0.0158,
      "step": 1466680
    },
    {
      "epoch": 2.400286718642603,
      "grad_norm": 0.2993910014629364,
      "learning_rate": 5.233686735238497e-06,
      "loss": 0.0188,
      "step": 1466700
    },
    {
      "epoch": 2.4003194490812567,
      "grad_norm": 0.3641964793205261,
      "learning_rate": 5.23362084302498e-06,
      "loss": 0.0122,
      "step": 1466720
    },
    {
      "epoch": 2.40035217951991,
      "grad_norm": 0.3133785128593445,
      "learning_rate": 5.233554950811464e-06,
      "loss": 0.009,
      "step": 1466740
    },
    {
      "epoch": 2.4003849099585635,
      "grad_norm": 0.2620539963245392,
      "learning_rate": 5.233489058597946e-06,
      "loss": 0.0146,
      "step": 1466760
    },
    {
      "epoch": 2.4004176403972166,
      "grad_norm": 0.23073787987232208,
      "learning_rate": 5.233423166384429e-06,
      "loss": 0.012,
      "step": 1466780
    },
    {
      "epoch": 2.4004503708358698,
      "grad_norm": 0.20986439287662506,
      "learning_rate": 5.233357274170911e-06,
      "loss": 0.0127,
      "step": 1466800
    },
    {
      "epoch": 2.4004831012745234,
      "grad_norm": 0.11606532335281372,
      "learning_rate": 5.233291381957395e-06,
      "loss": 0.0115,
      "step": 1466820
    },
    {
      "epoch": 2.4005158317131765,
      "grad_norm": 0.36915266513824463,
      "learning_rate": 5.233225489743878e-06,
      "loss": 0.014,
      "step": 1466840
    },
    {
      "epoch": 2.40054856215183,
      "grad_norm": 0.46389520168304443,
      "learning_rate": 5.23315959753036e-06,
      "loss": 0.0141,
      "step": 1466860
    },
    {
      "epoch": 2.4005812925904833,
      "grad_norm": 0.6737108826637268,
      "learning_rate": 5.233093705316843e-06,
      "loss": 0.0125,
      "step": 1466880
    },
    {
      "epoch": 2.400614023029137,
      "grad_norm": 0.6341004371643066,
      "learning_rate": 5.233027813103327e-06,
      "loss": 0.0152,
      "step": 1466900
    },
    {
      "epoch": 2.40064675346779,
      "grad_norm": 0.5261428356170654,
      "learning_rate": 5.232961920889809e-06,
      "loss": 0.0216,
      "step": 1466920
    },
    {
      "epoch": 2.400679483906443,
      "grad_norm": 0.10708698630332947,
      "learning_rate": 5.232896028676292e-06,
      "loss": 0.0152,
      "step": 1466940
    },
    {
      "epoch": 2.4007122143450967,
      "grad_norm": 0.26266205310821533,
      "learning_rate": 5.232830136462774e-06,
      "loss": 0.0149,
      "step": 1466960
    },
    {
      "epoch": 2.40074494478375,
      "grad_norm": 0.2913268506526947,
      "learning_rate": 5.232764244249258e-06,
      "loss": 0.0142,
      "step": 1466980
    },
    {
      "epoch": 2.4007776752224035,
      "grad_norm": 0.22859369218349457,
      "learning_rate": 5.2326983520357396e-06,
      "loss": 0.0092,
      "step": 1467000
    },
    {
      "epoch": 2.4008104056610566,
      "grad_norm": 0.07986000925302505,
      "learning_rate": 5.232632459822223e-06,
      "loss": 0.0125,
      "step": 1467020
    },
    {
      "epoch": 2.4008431360997102,
      "grad_norm": 0.6276720762252808,
      "learning_rate": 5.232566567608706e-06,
      "loss": 0.0198,
      "step": 1467040
    },
    {
      "epoch": 2.4008758665383634,
      "grad_norm": 0.1847505420446396,
      "learning_rate": 5.232500675395189e-06,
      "loss": 0.0164,
      "step": 1467060
    },
    {
      "epoch": 2.4009085969770165,
      "grad_norm": 0.6153063178062439,
      "learning_rate": 5.232434783181671e-06,
      "loss": 0.0098,
      "step": 1467080
    },
    {
      "epoch": 2.40094132741567,
      "grad_norm": 0.4443906843662262,
      "learning_rate": 5.232368890968155e-06,
      "loss": 0.0152,
      "step": 1467100
    },
    {
      "epoch": 2.4009740578543233,
      "grad_norm": 0.2574169337749481,
      "learning_rate": 5.232302998754638e-06,
      "loss": 0.0088,
      "step": 1467120
    },
    {
      "epoch": 2.401006788292977,
      "grad_norm": 0.29582297801971436,
      "learning_rate": 5.2322371065411204e-06,
      "loss": 0.0125,
      "step": 1467140
    },
    {
      "epoch": 2.40103951873163,
      "grad_norm": 0.6931500434875488,
      "learning_rate": 5.232171214327604e-06,
      "loss": 0.0174,
      "step": 1467160
    },
    {
      "epoch": 2.4010722491702836,
      "grad_norm": 0.40924882888793945,
      "learning_rate": 5.232105322114086e-06,
      "loss": 0.0225,
      "step": 1467180
    },
    {
      "epoch": 2.4011049796089368,
      "grad_norm": 0.8632029891014099,
      "learning_rate": 5.2320394299005695e-06,
      "loss": 0.0083,
      "step": 1467200
    },
    {
      "epoch": 2.40113771004759,
      "grad_norm": 0.4239221215248108,
      "learning_rate": 5.231973537687051e-06,
      "loss": 0.0115,
      "step": 1467220
    },
    {
      "epoch": 2.4011704404862435,
      "grad_norm": 0.19055724143981934,
      "learning_rate": 5.231907645473535e-06,
      "loss": 0.0121,
      "step": 1467240
    },
    {
      "epoch": 2.4012031709248967,
      "grad_norm": 0.419685959815979,
      "learning_rate": 5.231841753260018e-06,
      "loss": 0.0111,
      "step": 1467260
    },
    {
      "epoch": 2.4012359013635503,
      "grad_norm": 0.09035924077033997,
      "learning_rate": 5.2317758610465004e-06,
      "loss": 0.0129,
      "step": 1467280
    },
    {
      "epoch": 2.4012686318022034,
      "grad_norm": 0.17219923436641693,
      "learning_rate": 5.231709968832983e-06,
      "loss": 0.0166,
      "step": 1467300
    },
    {
      "epoch": 2.401301362240857,
      "grad_norm": 0.322904109954834,
      "learning_rate": 5.231644076619467e-06,
      "loss": 0.0123,
      "step": 1467320
    },
    {
      "epoch": 2.40133409267951,
      "grad_norm": 0.7568742632865906,
      "learning_rate": 5.231578184405949e-06,
      "loss": 0.016,
      "step": 1467340
    },
    {
      "epoch": 2.4013668231181633,
      "grad_norm": 0.3481580913066864,
      "learning_rate": 5.231512292192432e-06,
      "loss": 0.0159,
      "step": 1467360
    },
    {
      "epoch": 2.401399553556817,
      "grad_norm": 0.5067716836929321,
      "learning_rate": 5.231446399978914e-06,
      "loss": 0.0139,
      "step": 1467380
    },
    {
      "epoch": 2.40143228399547,
      "grad_norm": 0.3121373653411865,
      "learning_rate": 5.231380507765398e-06,
      "loss": 0.0171,
      "step": 1467400
    },
    {
      "epoch": 2.4014650144341236,
      "grad_norm": 0.5787986516952515,
      "learning_rate": 5.2313146155518805e-06,
      "loss": 0.0232,
      "step": 1467420
    },
    {
      "epoch": 2.401497744872777,
      "grad_norm": 0.7526827454566956,
      "learning_rate": 5.231248723338363e-06,
      "loss": 0.0243,
      "step": 1467440
    },
    {
      "epoch": 2.40153047531143,
      "grad_norm": 0.27350106835365295,
      "learning_rate": 5.231182831124847e-06,
      "loss": 0.012,
      "step": 1467460
    },
    {
      "epoch": 2.4015632057500835,
      "grad_norm": 0.32511064410209656,
      "learning_rate": 5.2311169389113295e-06,
      "loss": 0.0137,
      "step": 1467480
    },
    {
      "epoch": 2.4015959361887367,
      "grad_norm": 0.20616845786571503,
      "learning_rate": 5.231051046697812e-06,
      "loss": 0.0218,
      "step": 1467500
    },
    {
      "epoch": 2.4016286666273903,
      "grad_norm": 0.6509302854537964,
      "learning_rate": 5.230985154484295e-06,
      "loss": 0.0152,
      "step": 1467520
    },
    {
      "epoch": 2.4016613970660434,
      "grad_norm": 0.20503327250480652,
      "learning_rate": 5.230919262270779e-06,
      "loss": 0.0123,
      "step": 1467540
    },
    {
      "epoch": 2.4016941275046966,
      "grad_norm": 1.316005825996399,
      "learning_rate": 5.2308533700572605e-06,
      "loss": 0.017,
      "step": 1467560
    },
    {
      "epoch": 2.40172685794335,
      "grad_norm": 0.2911009192466736,
      "learning_rate": 5.230787477843744e-06,
      "loss": 0.0153,
      "step": 1467580
    },
    {
      "epoch": 2.4017595883820033,
      "grad_norm": 0.5049333572387695,
      "learning_rate": 5.230721585630226e-06,
      "loss": 0.0108,
      "step": 1467600
    },
    {
      "epoch": 2.401792318820657,
      "grad_norm": 0.16988039016723633,
      "learning_rate": 5.2306556934167096e-06,
      "loss": 0.0154,
      "step": 1467620
    },
    {
      "epoch": 2.40182504925931,
      "grad_norm": 0.3205258846282959,
      "learning_rate": 5.230589801203192e-06,
      "loss": 0.0171,
      "step": 1467640
    },
    {
      "epoch": 2.4018577796979637,
      "grad_norm": 0.41037696599960327,
      "learning_rate": 5.230523908989675e-06,
      "loss": 0.014,
      "step": 1467660
    },
    {
      "epoch": 2.401890510136617,
      "grad_norm": 0.3699840307235718,
      "learning_rate": 5.230458016776158e-06,
      "loss": 0.0132,
      "step": 1467680
    },
    {
      "epoch": 2.40192324057527,
      "grad_norm": 0.8382063508033752,
      "learning_rate": 5.230392124562641e-06,
      "loss": 0.019,
      "step": 1467700
    },
    {
      "epoch": 2.4019559710139236,
      "grad_norm": 0.7190117239952087,
      "learning_rate": 5.230326232349123e-06,
      "loss": 0.0141,
      "step": 1467720
    },
    {
      "epoch": 2.4019887014525767,
      "grad_norm": 0.27058595418930054,
      "learning_rate": 5.230260340135607e-06,
      "loss": 0.0116,
      "step": 1467740
    },
    {
      "epoch": 2.4020214318912303,
      "grad_norm": 0.09650819003582001,
      "learning_rate": 5.230194447922089e-06,
      "loss": 0.0212,
      "step": 1467760
    },
    {
      "epoch": 2.4020541623298834,
      "grad_norm": 0.15875238180160522,
      "learning_rate": 5.230128555708572e-06,
      "loss": 0.0127,
      "step": 1467780
    },
    {
      "epoch": 2.402086892768537,
      "grad_norm": 0.27415454387664795,
      "learning_rate": 5.230062663495056e-06,
      "loss": 0.0143,
      "step": 1467800
    },
    {
      "epoch": 2.40211962320719,
      "grad_norm": 0.10813136398792267,
      "learning_rate": 5.229996771281538e-06,
      "loss": 0.01,
      "step": 1467820
    },
    {
      "epoch": 2.4021523536458433,
      "grad_norm": 0.5952982306480408,
      "learning_rate": 5.229930879068021e-06,
      "loss": 0.0124,
      "step": 1467840
    },
    {
      "epoch": 2.402185084084497,
      "grad_norm": 0.084278404712677,
      "learning_rate": 5.229864986854504e-06,
      "loss": 0.017,
      "step": 1467860
    },
    {
      "epoch": 2.40221781452315,
      "grad_norm": 0.1845586746931076,
      "learning_rate": 5.229799094640987e-06,
      "loss": 0.0099,
      "step": 1467880
    },
    {
      "epoch": 2.4022505449618037,
      "grad_norm": 0.22796642780303955,
      "learning_rate": 5.22973320242747e-06,
      "loss": 0.0192,
      "step": 1467900
    },
    {
      "epoch": 2.402283275400457,
      "grad_norm": 0.7104358077049255,
      "learning_rate": 5.229667310213953e-06,
      "loss": 0.0167,
      "step": 1467920
    },
    {
      "epoch": 2.4023160058391104,
      "grad_norm": 0.09100151807069778,
      "learning_rate": 5.229601418000435e-06,
      "loss": 0.0138,
      "step": 1467940
    },
    {
      "epoch": 2.4023487362777636,
      "grad_norm": 0.3762974441051483,
      "learning_rate": 5.229535525786919e-06,
      "loss": 0.0138,
      "step": 1467960
    },
    {
      "epoch": 2.4023814667164167,
      "grad_norm": 0.3554782569408417,
      "learning_rate": 5.2294696335734006e-06,
      "loss": 0.0137,
      "step": 1467980
    },
    {
      "epoch": 2.4024141971550703,
      "grad_norm": 0.2072857916355133,
      "learning_rate": 5.229403741359884e-06,
      "loss": 0.0146,
      "step": 1468000
    },
    {
      "epoch": 2.4024469275937235,
      "grad_norm": 0.3176170885562897,
      "learning_rate": 5.229337849146366e-06,
      "loss": 0.0194,
      "step": 1468020
    },
    {
      "epoch": 2.402479658032377,
      "grad_norm": 0.5984800457954407,
      "learning_rate": 5.22927195693285e-06,
      "loss": 0.015,
      "step": 1468040
    },
    {
      "epoch": 2.40251238847103,
      "grad_norm": 0.3451458215713501,
      "learning_rate": 5.229206064719332e-06,
      "loss": 0.0136,
      "step": 1468060
    },
    {
      "epoch": 2.402545118909684,
      "grad_norm": 0.4259847402572632,
      "learning_rate": 5.229140172505815e-06,
      "loss": 0.0163,
      "step": 1468080
    },
    {
      "epoch": 2.402577849348337,
      "grad_norm": 0.29529985785484314,
      "learning_rate": 5.229074280292298e-06,
      "loss": 0.0183,
      "step": 1468100
    },
    {
      "epoch": 2.40261057978699,
      "grad_norm": 0.4654347002506256,
      "learning_rate": 5.2290083880787814e-06,
      "loss": 0.0083,
      "step": 1468120
    },
    {
      "epoch": 2.4026433102256437,
      "grad_norm": 0.5755136609077454,
      "learning_rate": 5.228942495865263e-06,
      "loss": 0.0084,
      "step": 1468140
    },
    {
      "epoch": 2.402676040664297,
      "grad_norm": 0.17156915366649628,
      "learning_rate": 5.228876603651747e-06,
      "loss": 0.0176,
      "step": 1468160
    },
    {
      "epoch": 2.4027087711029504,
      "grad_norm": 0.14281660318374634,
      "learning_rate": 5.2288107114382305e-06,
      "loss": 0.0148,
      "step": 1468180
    },
    {
      "epoch": 2.4027415015416036,
      "grad_norm": 0.7408270835876465,
      "learning_rate": 5.228744819224712e-06,
      "loss": 0.0154,
      "step": 1468200
    },
    {
      "epoch": 2.402774231980257,
      "grad_norm": 0.3238029479980469,
      "learning_rate": 5.228678927011196e-06,
      "loss": 0.0202,
      "step": 1468220
    },
    {
      "epoch": 2.4028069624189103,
      "grad_norm": 0.5833285450935364,
      "learning_rate": 5.228613034797678e-06,
      "loss": 0.0134,
      "step": 1468240
    },
    {
      "epoch": 2.4028396928575635,
      "grad_norm": 0.31996598839759827,
      "learning_rate": 5.2285471425841615e-06,
      "loss": 0.0128,
      "step": 1468260
    },
    {
      "epoch": 2.402872423296217,
      "grad_norm": 0.49257758259773254,
      "learning_rate": 5.228481250370644e-06,
      "loss": 0.0192,
      "step": 1468280
    },
    {
      "epoch": 2.4029051537348702,
      "grad_norm": 0.5976218581199646,
      "learning_rate": 5.228415358157127e-06,
      "loss": 0.0176,
      "step": 1468300
    },
    {
      "epoch": 2.402937884173524,
      "grad_norm": 0.5693067312240601,
      "learning_rate": 5.22834946594361e-06,
      "loss": 0.0167,
      "step": 1468320
    },
    {
      "epoch": 2.402970614612177,
      "grad_norm": 0.14892975986003876,
      "learning_rate": 5.228283573730093e-06,
      "loss": 0.0101,
      "step": 1468340
    },
    {
      "epoch": 2.4030033450508306,
      "grad_norm": 0.28775644302368164,
      "learning_rate": 5.228217681516575e-06,
      "loss": 0.0157,
      "step": 1468360
    },
    {
      "epoch": 2.4030360754894837,
      "grad_norm": 0.4128718972206116,
      "learning_rate": 5.228151789303059e-06,
      "loss": 0.016,
      "step": 1468380
    },
    {
      "epoch": 2.403068805928137,
      "grad_norm": 0.25190088152885437,
      "learning_rate": 5.228085897089541e-06,
      "loss": 0.0189,
      "step": 1468400
    },
    {
      "epoch": 2.4031015363667905,
      "grad_norm": 0.6689876317977905,
      "learning_rate": 5.228020004876024e-06,
      "loss": 0.0181,
      "step": 1468420
    },
    {
      "epoch": 2.4031342668054436,
      "grad_norm": 0.6391066908836365,
      "learning_rate": 5.227954112662507e-06,
      "loss": 0.0184,
      "step": 1468440
    },
    {
      "epoch": 2.403166997244097,
      "grad_norm": 2.0809717178344727,
      "learning_rate": 5.22788822044899e-06,
      "loss": 0.0156,
      "step": 1468460
    },
    {
      "epoch": 2.4031997276827504,
      "grad_norm": 0.11352100968360901,
      "learning_rate": 5.2278223282354725e-06,
      "loss": 0.0179,
      "step": 1468480
    },
    {
      "epoch": 2.403232458121404,
      "grad_norm": 2.1484415531158447,
      "learning_rate": 5.227756436021956e-06,
      "loss": 0.0122,
      "step": 1468500
    },
    {
      "epoch": 2.403265188560057,
      "grad_norm": 0.5247931480407715,
      "learning_rate": 5.227690543808439e-06,
      "loss": 0.0136,
      "step": 1468520
    },
    {
      "epoch": 2.4032979189987103,
      "grad_norm": 0.3559158444404602,
      "learning_rate": 5.2276246515949215e-06,
      "loss": 0.0166,
      "step": 1468540
    },
    {
      "epoch": 2.403330649437364,
      "grad_norm": 0.3492484390735626,
      "learning_rate": 5.227558759381405e-06,
      "loss": 0.0139,
      "step": 1468560
    },
    {
      "epoch": 2.403363379876017,
      "grad_norm": 0.28048986196517944,
      "learning_rate": 5.227492867167887e-06,
      "loss": 0.0133,
      "step": 1468580
    },
    {
      "epoch": 2.4033961103146706,
      "grad_norm": 0.12198657542467117,
      "learning_rate": 5.227426974954371e-06,
      "loss": 0.0098,
      "step": 1468600
    },
    {
      "epoch": 2.4034288407533237,
      "grad_norm": 0.25374966859817505,
      "learning_rate": 5.2273610827408525e-06,
      "loss": 0.012,
      "step": 1468620
    },
    {
      "epoch": 2.4034615711919773,
      "grad_norm": 0.16664867103099823,
      "learning_rate": 5.227295190527336e-06,
      "loss": 0.0089,
      "step": 1468640
    },
    {
      "epoch": 2.4034943016306305,
      "grad_norm": 0.36907052993774414,
      "learning_rate": 5.227229298313819e-06,
      "loss": 0.012,
      "step": 1468660
    },
    {
      "epoch": 2.4035270320692836,
      "grad_norm": 0.37178367376327515,
      "learning_rate": 5.2271634061003015e-06,
      "loss": 0.0227,
      "step": 1468680
    },
    {
      "epoch": 2.4035597625079372,
      "grad_norm": 0.5380109548568726,
      "learning_rate": 5.227097513886784e-06,
      "loss": 0.0103,
      "step": 1468700
    },
    {
      "epoch": 2.4035924929465904,
      "grad_norm": 0.2183166891336441,
      "learning_rate": 5.227031621673268e-06,
      "loss": 0.0133,
      "step": 1468720
    },
    {
      "epoch": 2.403625223385244,
      "grad_norm": 0.692764937877655,
      "learning_rate": 5.22696572945975e-06,
      "loss": 0.0186,
      "step": 1468740
    },
    {
      "epoch": 2.403657953823897,
      "grad_norm": 0.4351106286048889,
      "learning_rate": 5.226899837246233e-06,
      "loss": 0.0139,
      "step": 1468760
    },
    {
      "epoch": 2.4036906842625507,
      "grad_norm": 0.3187367022037506,
      "learning_rate": 5.226833945032715e-06,
      "loss": 0.0145,
      "step": 1468780
    },
    {
      "epoch": 2.403723414701204,
      "grad_norm": 0.17946383357048035,
      "learning_rate": 5.226768052819199e-06,
      "loss": 0.0137,
      "step": 1468800
    },
    {
      "epoch": 2.403756145139857,
      "grad_norm": 0.5568345785140991,
      "learning_rate": 5.226702160605681e-06,
      "loss": 0.0148,
      "step": 1468820
    },
    {
      "epoch": 2.4037888755785106,
      "grad_norm": 0.21976305544376373,
      "learning_rate": 5.226636268392164e-06,
      "loss": 0.0109,
      "step": 1468840
    },
    {
      "epoch": 2.4038216060171638,
      "grad_norm": 2.5471036434173584,
      "learning_rate": 5.226570376178648e-06,
      "loss": 0.0141,
      "step": 1468860
    },
    {
      "epoch": 2.4038543364558174,
      "grad_norm": 0.5223107933998108,
      "learning_rate": 5.22650448396513e-06,
      "loss": 0.0121,
      "step": 1468880
    },
    {
      "epoch": 2.4038870668944705,
      "grad_norm": 0.27876630425453186,
      "learning_rate": 5.226438591751613e-06,
      "loss": 0.01,
      "step": 1468900
    },
    {
      "epoch": 2.4039197973331237,
      "grad_norm": 0.16463200747966766,
      "learning_rate": 5.226372699538096e-06,
      "loss": 0.0108,
      "step": 1468920
    },
    {
      "epoch": 2.4039525277717773,
      "grad_norm": 0.5099439024925232,
      "learning_rate": 5.22630680732458e-06,
      "loss": 0.0135,
      "step": 1468940
    },
    {
      "epoch": 2.4039852582104304,
      "grad_norm": 0.30449095368385315,
      "learning_rate": 5.226240915111062e-06,
      "loss": 0.0119,
      "step": 1468960
    },
    {
      "epoch": 2.404017988649084,
      "grad_norm": 0.5663699507713318,
      "learning_rate": 5.226175022897545e-06,
      "loss": 0.0162,
      "step": 1468980
    },
    {
      "epoch": 2.404050719087737,
      "grad_norm": 0.8383750915527344,
      "learning_rate": 5.226109130684027e-06,
      "loss": 0.0117,
      "step": 1469000
    },
    {
      "epoch": 2.4040834495263903,
      "grad_norm": 1.2443737983703613,
      "learning_rate": 5.226043238470511e-06,
      "loss": 0.0186,
      "step": 1469020
    },
    {
      "epoch": 2.404116179965044,
      "grad_norm": 0.15930069983005524,
      "learning_rate": 5.2259773462569926e-06,
      "loss": 0.0196,
      "step": 1469040
    },
    {
      "epoch": 2.404148910403697,
      "grad_norm": 0.3045262396335602,
      "learning_rate": 5.225911454043476e-06,
      "loss": 0.0152,
      "step": 1469060
    },
    {
      "epoch": 2.4041816408423506,
      "grad_norm": 0.44550490379333496,
      "learning_rate": 5.225845561829959e-06,
      "loss": 0.0153,
      "step": 1469080
    },
    {
      "epoch": 2.404214371281004,
      "grad_norm": 0.40313205122947693,
      "learning_rate": 5.225779669616442e-06,
      "loss": 0.0137,
      "step": 1469100
    },
    {
      "epoch": 2.4042471017196574,
      "grad_norm": 0.2777242362499237,
      "learning_rate": 5.225713777402924e-06,
      "loss": 0.0122,
      "step": 1469120
    },
    {
      "epoch": 2.4042798321583105,
      "grad_norm": 0.31834840774536133,
      "learning_rate": 5.225647885189408e-06,
      "loss": 0.0164,
      "step": 1469140
    },
    {
      "epoch": 2.4043125625969637,
      "grad_norm": 0.349066823720932,
      "learning_rate": 5.22558199297589e-06,
      "loss": 0.0132,
      "step": 1469160
    },
    {
      "epoch": 2.4043452930356173,
      "grad_norm": 0.12343058735132217,
      "learning_rate": 5.2255161007623734e-06,
      "loss": 0.012,
      "step": 1469180
    },
    {
      "epoch": 2.4043780234742704,
      "grad_norm": 0.5564320683479309,
      "learning_rate": 5.225450208548857e-06,
      "loss": 0.0193,
      "step": 1469200
    },
    {
      "epoch": 2.404410753912924,
      "grad_norm": 0.23704034090042114,
      "learning_rate": 5.225384316335339e-06,
      "loss": 0.0131,
      "step": 1469220
    },
    {
      "epoch": 2.404443484351577,
      "grad_norm": 0.5897125005722046,
      "learning_rate": 5.2253184241218225e-06,
      "loss": 0.018,
      "step": 1469240
    },
    {
      "epoch": 2.4044762147902308,
      "grad_norm": 0.48391422629356384,
      "learning_rate": 5.225252531908304e-06,
      "loss": 0.0143,
      "step": 1469260
    },
    {
      "epoch": 2.404508945228884,
      "grad_norm": 1.2304928302764893,
      "learning_rate": 5.225186639694788e-06,
      "loss": 0.0167,
      "step": 1469280
    },
    {
      "epoch": 2.404541675667537,
      "grad_norm": 0.3806462585926056,
      "learning_rate": 5.225120747481271e-06,
      "loss": 0.0166,
      "step": 1469300
    },
    {
      "epoch": 2.4045744061061907,
      "grad_norm": 0.21038319170475006,
      "learning_rate": 5.2250548552677535e-06,
      "loss": 0.0179,
      "step": 1469320
    },
    {
      "epoch": 2.404607136544844,
      "grad_norm": 0.3952626883983612,
      "learning_rate": 5.224988963054236e-06,
      "loss": 0.012,
      "step": 1469340
    },
    {
      "epoch": 2.4046398669834974,
      "grad_norm": 0.09068413078784943,
      "learning_rate": 5.22492307084072e-06,
      "loss": 0.0109,
      "step": 1469360
    },
    {
      "epoch": 2.4046725974221506,
      "grad_norm": 0.983487606048584,
      "learning_rate": 5.224857178627202e-06,
      "loss": 0.0188,
      "step": 1469380
    },
    {
      "epoch": 2.404705327860804,
      "grad_norm": 0.10556145757436752,
      "learning_rate": 5.224791286413685e-06,
      "loss": 0.0125,
      "step": 1469400
    },
    {
      "epoch": 2.4047380582994573,
      "grad_norm": 0.23882873356342316,
      "learning_rate": 5.224725394200167e-06,
      "loss": 0.0233,
      "step": 1469420
    },
    {
      "epoch": 2.4047707887381105,
      "grad_norm": 0.28159692883491516,
      "learning_rate": 5.224659501986651e-06,
      "loss": 0.0163,
      "step": 1469440
    },
    {
      "epoch": 2.404803519176764,
      "grad_norm": 0.3160396218299866,
      "learning_rate": 5.2245936097731335e-06,
      "loss": 0.0128,
      "step": 1469460
    },
    {
      "epoch": 2.404836249615417,
      "grad_norm": 0.3343043625354767,
      "learning_rate": 5.224527717559616e-06,
      "loss": 0.0139,
      "step": 1469480
    },
    {
      "epoch": 2.404868980054071,
      "grad_norm": 0.6084673404693604,
      "learning_rate": 5.224461825346099e-06,
      "loss": 0.0163,
      "step": 1469500
    },
    {
      "epoch": 2.404901710492724,
      "grad_norm": 0.12510354816913605,
      "learning_rate": 5.2243959331325825e-06,
      "loss": 0.0146,
      "step": 1469520
    },
    {
      "epoch": 2.4049344409313775,
      "grad_norm": 0.22856377065181732,
      "learning_rate": 5.2243300409190644e-06,
      "loss": 0.0165,
      "step": 1469540
    },
    {
      "epoch": 2.4049671713700307,
      "grad_norm": 4.443276405334473,
      "learning_rate": 5.224264148705548e-06,
      "loss": 0.0148,
      "step": 1469560
    },
    {
      "epoch": 2.404999901808684,
      "grad_norm": 0.503601610660553,
      "learning_rate": 5.224198256492032e-06,
      "loss": 0.0157,
      "step": 1469580
    },
    {
      "epoch": 2.4050326322473374,
      "grad_norm": 0.34373006224632263,
      "learning_rate": 5.2241323642785135e-06,
      "loss": 0.0191,
      "step": 1469600
    },
    {
      "epoch": 2.4050653626859906,
      "grad_norm": 0.4609116017818451,
      "learning_rate": 5.224066472064997e-06,
      "loss": 0.016,
      "step": 1469620
    },
    {
      "epoch": 2.405098093124644,
      "grad_norm": 1.0927761793136597,
      "learning_rate": 5.224000579851479e-06,
      "loss": 0.0149,
      "step": 1469640
    },
    {
      "epoch": 2.4051308235632973,
      "grad_norm": 0.38287320733070374,
      "learning_rate": 5.2239346876379626e-06,
      "loss": 0.0173,
      "step": 1469660
    },
    {
      "epoch": 2.405163554001951,
      "grad_norm": 0.1478981375694275,
      "learning_rate": 5.223868795424445e-06,
      "loss": 0.012,
      "step": 1469680
    },
    {
      "epoch": 2.405196284440604,
      "grad_norm": 0.24230307340621948,
      "learning_rate": 5.223802903210928e-06,
      "loss": 0.0123,
      "step": 1469700
    },
    {
      "epoch": 2.405229014879257,
      "grad_norm": 0.22347648441791534,
      "learning_rate": 5.223737010997411e-06,
      "loss": 0.0146,
      "step": 1469720
    },
    {
      "epoch": 2.405261745317911,
      "grad_norm": 0.14306139945983887,
      "learning_rate": 5.223671118783894e-06,
      "loss": 0.0127,
      "step": 1469740
    },
    {
      "epoch": 2.405294475756564,
      "grad_norm": 0.2248212993144989,
      "learning_rate": 5.223605226570376e-06,
      "loss": 0.0106,
      "step": 1469760
    },
    {
      "epoch": 2.4053272061952176,
      "grad_norm": 0.4470982849597931,
      "learning_rate": 5.22353933435686e-06,
      "loss": 0.0159,
      "step": 1469780
    },
    {
      "epoch": 2.4053599366338707,
      "grad_norm": 0.4588278532028198,
      "learning_rate": 5.223473442143342e-06,
      "loss": 0.0117,
      "step": 1469800
    },
    {
      "epoch": 2.4053926670725243,
      "grad_norm": 0.434556782245636,
      "learning_rate": 5.223407549929825e-06,
      "loss": 0.013,
      "step": 1469820
    },
    {
      "epoch": 2.4054253975111775,
      "grad_norm": 0.18326647579669952,
      "learning_rate": 5.223341657716307e-06,
      "loss": 0.0104,
      "step": 1469840
    },
    {
      "epoch": 2.4054581279498306,
      "grad_norm": 0.20407822728157043,
      "learning_rate": 5.223275765502791e-06,
      "loss": 0.0153,
      "step": 1469860
    },
    {
      "epoch": 2.405490858388484,
      "grad_norm": 1.0478934049606323,
      "learning_rate": 5.2232098732892736e-06,
      "loss": 0.0152,
      "step": 1469880
    },
    {
      "epoch": 2.4055235888271373,
      "grad_norm": 0.6120912432670593,
      "learning_rate": 5.223143981075756e-06,
      "loss": 0.0135,
      "step": 1469900
    },
    {
      "epoch": 2.405556319265791,
      "grad_norm": 0.40553492307662964,
      "learning_rate": 5.22307808886224e-06,
      "loss": 0.0201,
      "step": 1469920
    },
    {
      "epoch": 2.405589049704444,
      "grad_norm": 0.1305016279220581,
      "learning_rate": 5.223012196648723e-06,
      "loss": 0.0097,
      "step": 1469940
    },
    {
      "epoch": 2.4056217801430977,
      "grad_norm": 0.2139991670846939,
      "learning_rate": 5.222946304435205e-06,
      "loss": 0.0112,
      "step": 1469960
    },
    {
      "epoch": 2.405654510581751,
      "grad_norm": 0.5296007394790649,
      "learning_rate": 5.222880412221688e-06,
      "loss": 0.0171,
      "step": 1469980
    },
    {
      "epoch": 2.405687241020404,
      "grad_norm": 0.40874627232551575,
      "learning_rate": 5.222814520008172e-06,
      "loss": 0.0155,
      "step": 1470000
    },
    {
      "epoch": 2.4057199714590576,
      "grad_norm": 0.6537275314331055,
      "learning_rate": 5.222748627794654e-06,
      "loss": 0.0112,
      "step": 1470020
    },
    {
      "epoch": 2.4057527018977107,
      "grad_norm": 0.11285747587680817,
      "learning_rate": 5.222682735581137e-06,
      "loss": 0.0177,
      "step": 1470040
    },
    {
      "epoch": 2.4057854323363643,
      "grad_norm": 0.34809765219688416,
      "learning_rate": 5.222616843367619e-06,
      "loss": 0.0231,
      "step": 1470060
    },
    {
      "epoch": 2.4058181627750175,
      "grad_norm": 0.39215829968452454,
      "learning_rate": 5.222550951154103e-06,
      "loss": 0.0117,
      "step": 1470080
    },
    {
      "epoch": 2.405850893213671,
      "grad_norm": 0.39533689618110657,
      "learning_rate": 5.222485058940585e-06,
      "loss": 0.0081,
      "step": 1470100
    },
    {
      "epoch": 2.405883623652324,
      "grad_norm": 0.9815480709075928,
      "learning_rate": 5.222419166727068e-06,
      "loss": 0.0155,
      "step": 1470120
    },
    {
      "epoch": 2.4059163540909774,
      "grad_norm": 0.3173273503780365,
      "learning_rate": 5.222353274513551e-06,
      "loss": 0.0091,
      "step": 1470140
    },
    {
      "epoch": 2.405949084529631,
      "grad_norm": 1.0117696523666382,
      "learning_rate": 5.2222873823000345e-06,
      "loss": 0.0186,
      "step": 1470160
    },
    {
      "epoch": 2.405981814968284,
      "grad_norm": 0.35935178399086,
      "learning_rate": 5.222221490086516e-06,
      "loss": 0.0211,
      "step": 1470180
    },
    {
      "epoch": 2.4060145454069377,
      "grad_norm": 0.5023313760757446,
      "learning_rate": 5.222155597873e-06,
      "loss": 0.0191,
      "step": 1470200
    },
    {
      "epoch": 2.406047275845591,
      "grad_norm": 0.29160839319229126,
      "learning_rate": 5.222089705659482e-06,
      "loss": 0.0136,
      "step": 1470220
    },
    {
      "epoch": 2.4060800062842445,
      "grad_norm": 0.3511219024658203,
      "learning_rate": 5.222023813445965e-06,
      "loss": 0.0213,
      "step": 1470240
    },
    {
      "epoch": 2.4061127367228976,
      "grad_norm": 0.34209293127059937,
      "learning_rate": 5.221957921232449e-06,
      "loss": 0.0133,
      "step": 1470260
    },
    {
      "epoch": 2.4061454671615508,
      "grad_norm": 0.3793364465236664,
      "learning_rate": 5.221892029018931e-06,
      "loss": 0.0115,
      "step": 1470280
    },
    {
      "epoch": 2.4061781976002043,
      "grad_norm": 0.35230302810668945,
      "learning_rate": 5.2218261368054145e-06,
      "loss": 0.013,
      "step": 1470300
    },
    {
      "epoch": 2.4062109280388575,
      "grad_norm": 0.19676929712295532,
      "learning_rate": 5.221760244591897e-06,
      "loss": 0.0104,
      "step": 1470320
    },
    {
      "epoch": 2.406243658477511,
      "grad_norm": 0.15145480632781982,
      "learning_rate": 5.22169435237838e-06,
      "loss": 0.0191,
      "step": 1470340
    },
    {
      "epoch": 2.4062763889161642,
      "grad_norm": 0.22717145085334778,
      "learning_rate": 5.221628460164863e-06,
      "loss": 0.0111,
      "step": 1470360
    },
    {
      "epoch": 2.406309119354818,
      "grad_norm": 1.5059106349945068,
      "learning_rate": 5.221562567951346e-06,
      "loss": 0.0191,
      "step": 1470380
    },
    {
      "epoch": 2.406341849793471,
      "grad_norm": 0.17603246867656708,
      "learning_rate": 5.221496675737828e-06,
      "loss": 0.0198,
      "step": 1470400
    },
    {
      "epoch": 2.406374580232124,
      "grad_norm": 0.18088629841804504,
      "learning_rate": 5.221430783524312e-06,
      "loss": 0.0116,
      "step": 1470420
    },
    {
      "epoch": 2.4064073106707777,
      "grad_norm": 0.24900445342063904,
      "learning_rate": 5.221364891310794e-06,
      "loss": 0.0099,
      "step": 1470440
    },
    {
      "epoch": 2.406440041109431,
      "grad_norm": 0.18744944036006927,
      "learning_rate": 5.221298999097277e-06,
      "loss": 0.0089,
      "step": 1470460
    },
    {
      "epoch": 2.4064727715480845,
      "grad_norm": 1.0958753824234009,
      "learning_rate": 5.22123310688376e-06,
      "loss": 0.0248,
      "step": 1470480
    },
    {
      "epoch": 2.4065055019867376,
      "grad_norm": 0.3247818648815155,
      "learning_rate": 5.221167214670243e-06,
      "loss": 0.0167,
      "step": 1470500
    },
    {
      "epoch": 2.4065382324253908,
      "grad_norm": 0.2602877616882324,
      "learning_rate": 5.2211013224567255e-06,
      "loss": 0.0114,
      "step": 1470520
    },
    {
      "epoch": 2.4065709628640444,
      "grad_norm": 0.5724681615829468,
      "learning_rate": 5.221035430243209e-06,
      "loss": 0.0121,
      "step": 1470540
    },
    {
      "epoch": 2.4066036933026975,
      "grad_norm": 0.24079982936382294,
      "learning_rate": 5.220969538029691e-06,
      "loss": 0.0132,
      "step": 1470560
    },
    {
      "epoch": 2.406636423741351,
      "grad_norm": 0.283199667930603,
      "learning_rate": 5.2209036458161745e-06,
      "loss": 0.0086,
      "step": 1470580
    },
    {
      "epoch": 2.4066691541800043,
      "grad_norm": 0.6472522020339966,
      "learning_rate": 5.220837753602656e-06,
      "loss": 0.0145,
      "step": 1470600
    },
    {
      "epoch": 2.4067018846186574,
      "grad_norm": 1.0253605842590332,
      "learning_rate": 5.22077186138914e-06,
      "loss": 0.0157,
      "step": 1470620
    },
    {
      "epoch": 2.406734615057311,
      "grad_norm": 3.3956573009490967,
      "learning_rate": 5.220705969175624e-06,
      "loss": 0.0275,
      "step": 1470640
    },
    {
      "epoch": 2.406767345495964,
      "grad_norm": 0.16614645719528198,
      "learning_rate": 5.2206400769621055e-06,
      "loss": 0.014,
      "step": 1470660
    },
    {
      "epoch": 2.4068000759346178,
      "grad_norm": 0.20404697954654694,
      "learning_rate": 5.220574184748589e-06,
      "loss": 0.0188,
      "step": 1470680
    },
    {
      "epoch": 2.406832806373271,
      "grad_norm": 0.46880990266799927,
      "learning_rate": 5.220508292535072e-06,
      "loss": 0.0143,
      "step": 1470700
    },
    {
      "epoch": 2.4068655368119245,
      "grad_norm": 0.6642833352088928,
      "learning_rate": 5.2204424003215546e-06,
      "loss": 0.0158,
      "step": 1470720
    },
    {
      "epoch": 2.4068982672505776,
      "grad_norm": 0.5282142162322998,
      "learning_rate": 5.220376508108037e-06,
      "loss": 0.0168,
      "step": 1470740
    },
    {
      "epoch": 2.406930997689231,
      "grad_norm": 1.1153168678283691,
      "learning_rate": 5.220310615894521e-06,
      "loss": 0.0188,
      "step": 1470760
    },
    {
      "epoch": 2.4069637281278844,
      "grad_norm": 0.5543349981307983,
      "learning_rate": 5.220244723681003e-06,
      "loss": 0.0245,
      "step": 1470780
    },
    {
      "epoch": 2.4069964585665375,
      "grad_norm": 0.5903515219688416,
      "learning_rate": 5.220178831467486e-06,
      "loss": 0.0107,
      "step": 1470800
    },
    {
      "epoch": 2.407029189005191,
      "grad_norm": 0.30473002791404724,
      "learning_rate": 5.220112939253968e-06,
      "loss": 0.0151,
      "step": 1470820
    },
    {
      "epoch": 2.4070619194438443,
      "grad_norm": 0.9281312227249146,
      "learning_rate": 5.220047047040452e-06,
      "loss": 0.0158,
      "step": 1470840
    },
    {
      "epoch": 2.407094649882498,
      "grad_norm": 0.18465681374073029,
      "learning_rate": 5.219981154826934e-06,
      "loss": 0.0136,
      "step": 1470860
    },
    {
      "epoch": 2.407127380321151,
      "grad_norm": 0.26104670763015747,
      "learning_rate": 5.219915262613417e-06,
      "loss": 0.0144,
      "step": 1470880
    },
    {
      "epoch": 2.407160110759804,
      "grad_norm": 0.22752095758914948,
      "learning_rate": 5.2198493703999e-06,
      "loss": 0.0212,
      "step": 1470900
    },
    {
      "epoch": 2.4071928411984578,
      "grad_norm": 0.5719118714332581,
      "learning_rate": 5.219783478186383e-06,
      "loss": 0.0126,
      "step": 1470920
    },
    {
      "epoch": 2.407225571637111,
      "grad_norm": 0.4491659104824066,
      "learning_rate": 5.2197175859728655e-06,
      "loss": 0.0129,
      "step": 1470940
    },
    {
      "epoch": 2.4072583020757645,
      "grad_norm": 0.6050483584403992,
      "learning_rate": 5.219651693759349e-06,
      "loss": 0.0138,
      "step": 1470960
    },
    {
      "epoch": 2.4072910325144177,
      "grad_norm": 0.30099189281463623,
      "learning_rate": 5.219585801545832e-06,
      "loss": 0.0225,
      "step": 1470980
    },
    {
      "epoch": 2.4073237629530713,
      "grad_norm": 0.469731867313385,
      "learning_rate": 5.219519909332315e-06,
      "loss": 0.0135,
      "step": 1471000
    },
    {
      "epoch": 2.4073564933917244,
      "grad_norm": 0.10111143440008163,
      "learning_rate": 5.219454017118798e-06,
      "loss": 0.0175,
      "step": 1471020
    },
    {
      "epoch": 2.4073892238303776,
      "grad_norm": 0.3127425014972687,
      "learning_rate": 5.21938812490528e-06,
      "loss": 0.01,
      "step": 1471040
    },
    {
      "epoch": 2.407421954269031,
      "grad_norm": 0.421037495136261,
      "learning_rate": 5.219322232691764e-06,
      "loss": 0.0135,
      "step": 1471060
    },
    {
      "epoch": 2.4074546847076843,
      "grad_norm": 0.8790814876556396,
      "learning_rate": 5.2192563404782456e-06,
      "loss": 0.0133,
      "step": 1471080
    },
    {
      "epoch": 2.407487415146338,
      "grad_norm": 0.20323167741298676,
      "learning_rate": 5.219190448264729e-06,
      "loss": 0.0111,
      "step": 1471100
    },
    {
      "epoch": 2.407520145584991,
      "grad_norm": 0.3734999895095825,
      "learning_rate": 5.219124556051212e-06,
      "loss": 0.0184,
      "step": 1471120
    },
    {
      "epoch": 2.4075528760236447,
      "grad_norm": 0.2881634831428528,
      "learning_rate": 5.219058663837695e-06,
      "loss": 0.0175,
      "step": 1471140
    },
    {
      "epoch": 2.407585606462298,
      "grad_norm": 0.7034428715705872,
      "learning_rate": 5.218992771624177e-06,
      "loss": 0.0191,
      "step": 1471160
    },
    {
      "epoch": 2.407618336900951,
      "grad_norm": 0.1459648311138153,
      "learning_rate": 5.218926879410661e-06,
      "loss": 0.0162,
      "step": 1471180
    },
    {
      "epoch": 2.4076510673396045,
      "grad_norm": 0.22828051447868347,
      "learning_rate": 5.218860987197143e-06,
      "loss": 0.0091,
      "step": 1471200
    },
    {
      "epoch": 2.4076837977782577,
      "grad_norm": 0.34322819113731384,
      "learning_rate": 5.2187950949836264e-06,
      "loss": 0.0117,
      "step": 1471220
    },
    {
      "epoch": 2.4077165282169113,
      "grad_norm": 0.4041782021522522,
      "learning_rate": 5.218729202770108e-06,
      "loss": 0.0112,
      "step": 1471240
    },
    {
      "epoch": 2.4077492586555644,
      "grad_norm": 0.2704249918460846,
      "learning_rate": 5.218663310556592e-06,
      "loss": 0.0175,
      "step": 1471260
    },
    {
      "epoch": 2.407781989094218,
      "grad_norm": 0.36790865659713745,
      "learning_rate": 5.218597418343075e-06,
      "loss": 0.015,
      "step": 1471280
    },
    {
      "epoch": 2.407814719532871,
      "grad_norm": 0.23104701936244965,
      "learning_rate": 5.218531526129557e-06,
      "loss": 0.0164,
      "step": 1471300
    },
    {
      "epoch": 2.4078474499715243,
      "grad_norm": 0.24378067255020142,
      "learning_rate": 5.218465633916041e-06,
      "loss": 0.0195,
      "step": 1471320
    },
    {
      "epoch": 2.407880180410178,
      "grad_norm": 0.4248508810997009,
      "learning_rate": 5.218399741702524e-06,
      "loss": 0.0148,
      "step": 1471340
    },
    {
      "epoch": 2.407912910848831,
      "grad_norm": 0.3315413296222687,
      "learning_rate": 5.2183338494890065e-06,
      "loss": 0.0184,
      "step": 1471360
    },
    {
      "epoch": 2.4079456412874847,
      "grad_norm": 0.5426285266876221,
      "learning_rate": 5.218267957275489e-06,
      "loss": 0.0128,
      "step": 1471380
    },
    {
      "epoch": 2.407978371726138,
      "grad_norm": 0.18154729902744293,
      "learning_rate": 5.218202065061973e-06,
      "loss": 0.0146,
      "step": 1471400
    },
    {
      "epoch": 2.4080111021647914,
      "grad_norm": 0.36878228187561035,
      "learning_rate": 5.218136172848455e-06,
      "loss": 0.0096,
      "step": 1471420
    },
    {
      "epoch": 2.4080438326034446,
      "grad_norm": 0.5197229385375977,
      "learning_rate": 5.218070280634938e-06,
      "loss": 0.0116,
      "step": 1471440
    },
    {
      "epoch": 2.4080765630420977,
      "grad_norm": 0.256141722202301,
      "learning_rate": 5.21800438842142e-06,
      "loss": 0.0134,
      "step": 1471460
    },
    {
      "epoch": 2.4081092934807513,
      "grad_norm": 0.6502600312232971,
      "learning_rate": 5.217938496207904e-06,
      "loss": 0.0121,
      "step": 1471480
    },
    {
      "epoch": 2.4081420239194045,
      "grad_norm": 1.444753885269165,
      "learning_rate": 5.2178726039943865e-06,
      "loss": 0.014,
      "step": 1471500
    },
    {
      "epoch": 2.408174754358058,
      "grad_norm": 1.313369631767273,
      "learning_rate": 5.217806711780869e-06,
      "loss": 0.0133,
      "step": 1471520
    },
    {
      "epoch": 2.408207484796711,
      "grad_norm": 0.3487050533294678,
      "learning_rate": 5.217740819567352e-06,
      "loss": 0.0207,
      "step": 1471540
    },
    {
      "epoch": 2.408240215235365,
      "grad_norm": 0.44700345396995544,
      "learning_rate": 5.2176749273538356e-06,
      "loss": 0.0121,
      "step": 1471560
    },
    {
      "epoch": 2.408272945674018,
      "grad_norm": 1.0731576681137085,
      "learning_rate": 5.2176090351403174e-06,
      "loss": 0.0179,
      "step": 1471580
    },
    {
      "epoch": 2.408305676112671,
      "grad_norm": 0.28508099913597107,
      "learning_rate": 5.217543142926801e-06,
      "loss": 0.0095,
      "step": 1471600
    },
    {
      "epoch": 2.4083384065513247,
      "grad_norm": 0.37981700897216797,
      "learning_rate": 5.217477250713283e-06,
      "loss": 0.0116,
      "step": 1471620
    },
    {
      "epoch": 2.408371136989978,
      "grad_norm": 0.5339428186416626,
      "learning_rate": 5.2174113584997665e-06,
      "loss": 0.0108,
      "step": 1471640
    },
    {
      "epoch": 2.4084038674286314,
      "grad_norm": 0.2418951690196991,
      "learning_rate": 5.217345466286248e-06,
      "loss": 0.0144,
      "step": 1471660
    },
    {
      "epoch": 2.4084365978672846,
      "grad_norm": 0.21489089727401733,
      "learning_rate": 5.217279574072732e-06,
      "loss": 0.0138,
      "step": 1471680
    },
    {
      "epoch": 2.408469328305938,
      "grad_norm": 0.5662035346031189,
      "learning_rate": 5.217213681859216e-06,
      "loss": 0.0097,
      "step": 1471700
    },
    {
      "epoch": 2.4085020587445913,
      "grad_norm": 0.4067763686180115,
      "learning_rate": 5.217147789645698e-06,
      "loss": 0.0139,
      "step": 1471720
    },
    {
      "epoch": 2.4085347891832445,
      "grad_norm": 0.7455300688743591,
      "learning_rate": 5.217081897432181e-06,
      "loss": 0.0129,
      "step": 1471740
    },
    {
      "epoch": 2.408567519621898,
      "grad_norm": 0.27762314677238464,
      "learning_rate": 5.217016005218664e-06,
      "loss": 0.0157,
      "step": 1471760
    },
    {
      "epoch": 2.4086002500605512,
      "grad_norm": 0.1304631531238556,
      "learning_rate": 5.216950113005147e-06,
      "loss": 0.0132,
      "step": 1471780
    },
    {
      "epoch": 2.408632980499205,
      "grad_norm": 0.3304205536842346,
      "learning_rate": 5.216884220791629e-06,
      "loss": 0.0147,
      "step": 1471800
    },
    {
      "epoch": 2.408665710937858,
      "grad_norm": 0.47364282608032227,
      "learning_rate": 5.216818328578113e-06,
      "loss": 0.0159,
      "step": 1471820
    },
    {
      "epoch": 2.4086984413765116,
      "grad_norm": 0.6314930319786072,
      "learning_rate": 5.216752436364595e-06,
      "loss": 0.0166,
      "step": 1471840
    },
    {
      "epoch": 2.4087311718151647,
      "grad_norm": 0.5206432938575745,
      "learning_rate": 5.216686544151078e-06,
      "loss": 0.0142,
      "step": 1471860
    },
    {
      "epoch": 2.408763902253818,
      "grad_norm": 0.43039995431900024,
      "learning_rate": 5.21662065193756e-06,
      "loss": 0.0169,
      "step": 1471880
    },
    {
      "epoch": 2.4087966326924715,
      "grad_norm": 0.29589974880218506,
      "learning_rate": 5.216554759724044e-06,
      "loss": 0.0169,
      "step": 1471900
    },
    {
      "epoch": 2.4088293631311246,
      "grad_norm": 0.6397417783737183,
      "learning_rate": 5.2164888675105266e-06,
      "loss": 0.0207,
      "step": 1471920
    },
    {
      "epoch": 2.408862093569778,
      "grad_norm": 0.31497225165367126,
      "learning_rate": 5.216422975297009e-06,
      "loss": 0.0097,
      "step": 1471940
    },
    {
      "epoch": 2.4088948240084314,
      "grad_norm": 0.4303334653377533,
      "learning_rate": 5.216357083083492e-06,
      "loss": 0.0147,
      "step": 1471960
    },
    {
      "epoch": 2.4089275544470845,
      "grad_norm": 0.34764721989631653,
      "learning_rate": 5.216291190869976e-06,
      "loss": 0.015,
      "step": 1471980
    },
    {
      "epoch": 2.408960284885738,
      "grad_norm": 0.74964839220047,
      "learning_rate": 5.2162252986564575e-06,
      "loss": 0.0183,
      "step": 1472000
    },
    {
      "epoch": 2.4089930153243913,
      "grad_norm": 0.37244701385498047,
      "learning_rate": 5.216159406442941e-06,
      "loss": 0.0134,
      "step": 1472020
    },
    {
      "epoch": 2.409025745763045,
      "grad_norm": 0.35857251286506653,
      "learning_rate": 5.216093514229425e-06,
      "loss": 0.0174,
      "step": 1472040
    },
    {
      "epoch": 2.409058476201698,
      "grad_norm": 0.4637240767478943,
      "learning_rate": 5.216027622015907e-06,
      "loss": 0.0133,
      "step": 1472060
    },
    {
      "epoch": 2.409091206640351,
      "grad_norm": 0.4336157739162445,
      "learning_rate": 5.21596172980239e-06,
      "loss": 0.018,
      "step": 1472080
    },
    {
      "epoch": 2.4091239370790047,
      "grad_norm": 1.720046043395996,
      "learning_rate": 5.215895837588872e-06,
      "loss": 0.0195,
      "step": 1472100
    },
    {
      "epoch": 2.409156667517658,
      "grad_norm": 0.15785926580429077,
      "learning_rate": 5.215829945375356e-06,
      "loss": 0.0163,
      "step": 1472120
    },
    {
      "epoch": 2.4091893979563115,
      "grad_norm": 0.475752592086792,
      "learning_rate": 5.215764053161838e-06,
      "loss": 0.0166,
      "step": 1472140
    },
    {
      "epoch": 2.4092221283949646,
      "grad_norm": 0.3778536319732666,
      "learning_rate": 5.215698160948321e-06,
      "loss": 0.0109,
      "step": 1472160
    },
    {
      "epoch": 2.4092548588336182,
      "grad_norm": 0.5333961844444275,
      "learning_rate": 5.215632268734804e-06,
      "loss": 0.0153,
      "step": 1472180
    },
    {
      "epoch": 2.4092875892722714,
      "grad_norm": 1.7678086757659912,
      "learning_rate": 5.2155663765212875e-06,
      "loss": 0.0164,
      "step": 1472200
    },
    {
      "epoch": 2.4093203197109245,
      "grad_norm": 0.13193821907043457,
      "learning_rate": 5.215500484307769e-06,
      "loss": 0.0183,
      "step": 1472220
    },
    {
      "epoch": 2.409353050149578,
      "grad_norm": 0.18648570775985718,
      "learning_rate": 5.215434592094253e-06,
      "loss": 0.0175,
      "step": 1472240
    },
    {
      "epoch": 2.4093857805882313,
      "grad_norm": 0.18486405909061432,
      "learning_rate": 5.215368699880735e-06,
      "loss": 0.0164,
      "step": 1472260
    },
    {
      "epoch": 2.409418511026885,
      "grad_norm": 0.5418998599052429,
      "learning_rate": 5.215302807667218e-06,
      "loss": 0.0152,
      "step": 1472280
    },
    {
      "epoch": 2.409451241465538,
      "grad_norm": 0.27668920159339905,
      "learning_rate": 5.215236915453701e-06,
      "loss": 0.0176,
      "step": 1472300
    },
    {
      "epoch": 2.4094839719041916,
      "grad_norm": 0.15848402678966522,
      "learning_rate": 5.215171023240184e-06,
      "loss": 0.0123,
      "step": 1472320
    },
    {
      "epoch": 2.4095167023428448,
      "grad_norm": 0.5320140719413757,
      "learning_rate": 5.215105131026667e-06,
      "loss": 0.0125,
      "step": 1472340
    },
    {
      "epoch": 2.409549432781498,
      "grad_norm": 0.08849168568849564,
      "learning_rate": 5.21503923881315e-06,
      "loss": 0.0122,
      "step": 1472360
    },
    {
      "epoch": 2.4095821632201515,
      "grad_norm": 0.43034929037094116,
      "learning_rate": 5.214973346599633e-06,
      "loss": 0.0125,
      "step": 1472380
    },
    {
      "epoch": 2.4096148936588047,
      "grad_norm": 0.9116151928901672,
      "learning_rate": 5.214907454386116e-06,
      "loss": 0.0211,
      "step": 1472400
    },
    {
      "epoch": 2.4096476240974583,
      "grad_norm": 0.1788916289806366,
      "learning_rate": 5.214841562172599e-06,
      "loss": 0.0085,
      "step": 1472420
    },
    {
      "epoch": 2.4096803545361114,
      "grad_norm": 0.5845619440078735,
      "learning_rate": 5.214775669959081e-06,
      "loss": 0.015,
      "step": 1472440
    },
    {
      "epoch": 2.409713084974765,
      "grad_norm": 0.23477055132389069,
      "learning_rate": 5.214709777745565e-06,
      "loss": 0.0202,
      "step": 1472460
    },
    {
      "epoch": 2.409745815413418,
      "grad_norm": 0.4642176628112793,
      "learning_rate": 5.214643885532047e-06,
      "loss": 0.0229,
      "step": 1472480
    },
    {
      "epoch": 2.4097785458520713,
      "grad_norm": 0.3833162486553192,
      "learning_rate": 5.21457799331853e-06,
      "loss": 0.0142,
      "step": 1472500
    },
    {
      "epoch": 2.409811276290725,
      "grad_norm": 0.2965327501296997,
      "learning_rate": 5.214512101105013e-06,
      "loss": 0.0175,
      "step": 1472520
    },
    {
      "epoch": 2.409844006729378,
      "grad_norm": 0.41659557819366455,
      "learning_rate": 5.214446208891496e-06,
      "loss": 0.0155,
      "step": 1472540
    },
    {
      "epoch": 2.4098767371680316,
      "grad_norm": 1.220839500427246,
      "learning_rate": 5.2143803166779785e-06,
      "loss": 0.0175,
      "step": 1472560
    },
    {
      "epoch": 2.409909467606685,
      "grad_norm": 0.12225255370140076,
      "learning_rate": 5.214314424464462e-06,
      "loss": 0.013,
      "step": 1472580
    },
    {
      "epoch": 2.4099421980453384,
      "grad_norm": 0.37756308913230896,
      "learning_rate": 5.214248532250944e-06,
      "loss": 0.0146,
      "step": 1472600
    },
    {
      "epoch": 2.4099749284839915,
      "grad_norm": 0.12256330251693726,
      "learning_rate": 5.2141826400374275e-06,
      "loss": 0.0136,
      "step": 1472620
    },
    {
      "epoch": 2.4100076589226447,
      "grad_norm": 0.6617795825004578,
      "learning_rate": 5.2141167478239094e-06,
      "loss": 0.0143,
      "step": 1472640
    },
    {
      "epoch": 2.4100403893612983,
      "grad_norm": 0.2491837739944458,
      "learning_rate": 5.214050855610393e-06,
      "loss": 0.0186,
      "step": 1472660
    },
    {
      "epoch": 2.4100731197999514,
      "grad_norm": 0.29475733637809753,
      "learning_rate": 5.213984963396875e-06,
      "loss": 0.0181,
      "step": 1472680
    },
    {
      "epoch": 2.410105850238605,
      "grad_norm": 0.6321054697036743,
      "learning_rate": 5.2139190711833585e-06,
      "loss": 0.0151,
      "step": 1472700
    },
    {
      "epoch": 2.410138580677258,
      "grad_norm": 0.11413442343473434,
      "learning_rate": 5.213853178969842e-06,
      "loss": 0.0132,
      "step": 1472720
    },
    {
      "epoch": 2.4101713111159118,
      "grad_norm": 0.21991752088069916,
      "learning_rate": 5.213787286756324e-06,
      "loss": 0.0112,
      "step": 1472740
    },
    {
      "epoch": 2.410204041554565,
      "grad_norm": 0.21400904655456543,
      "learning_rate": 5.2137213945428076e-06,
      "loss": 0.0095,
      "step": 1472760
    },
    {
      "epoch": 2.410236771993218,
      "grad_norm": 0.3811724781990051,
      "learning_rate": 5.21365550232929e-06,
      "loss": 0.0133,
      "step": 1472780
    },
    {
      "epoch": 2.4102695024318717,
      "grad_norm": 0.09982544183731079,
      "learning_rate": 5.213589610115774e-06,
      "loss": 0.0196,
      "step": 1472800
    },
    {
      "epoch": 2.410302232870525,
      "grad_norm": 1.2903518676757812,
      "learning_rate": 5.213523717902256e-06,
      "loss": 0.0153,
      "step": 1472820
    },
    {
      "epoch": 2.4103349633091784,
      "grad_norm": 0.26383867859840393,
      "learning_rate": 5.213457825688739e-06,
      "loss": 0.0143,
      "step": 1472840
    },
    {
      "epoch": 2.4103676937478316,
      "grad_norm": 0.8385423421859741,
      "learning_rate": 5.213391933475221e-06,
      "loss": 0.0143,
      "step": 1472860
    },
    {
      "epoch": 2.410400424186485,
      "grad_norm": 0.16015903651714325,
      "learning_rate": 5.213326041261705e-06,
      "loss": 0.0182,
      "step": 1472880
    },
    {
      "epoch": 2.4104331546251383,
      "grad_norm": 0.15099656581878662,
      "learning_rate": 5.213260149048187e-06,
      "loss": 0.0123,
      "step": 1472900
    },
    {
      "epoch": 2.4104658850637914,
      "grad_norm": 0.45386803150177,
      "learning_rate": 5.21319425683467e-06,
      "loss": 0.0132,
      "step": 1472920
    },
    {
      "epoch": 2.410498615502445,
      "grad_norm": 0.13115926086902618,
      "learning_rate": 5.213128364621153e-06,
      "loss": 0.023,
      "step": 1472940
    },
    {
      "epoch": 2.410531345941098,
      "grad_norm": 0.24158938229084015,
      "learning_rate": 5.213062472407636e-06,
      "loss": 0.0139,
      "step": 1472960
    },
    {
      "epoch": 2.410564076379752,
      "grad_norm": 0.4650801718235016,
      "learning_rate": 5.2129965801941185e-06,
      "loss": 0.0193,
      "step": 1472980
    },
    {
      "epoch": 2.410596806818405,
      "grad_norm": 0.2516596019268036,
      "learning_rate": 5.212930687980602e-06,
      "loss": 0.0148,
      "step": 1473000
    },
    {
      "epoch": 2.4106295372570585,
      "grad_norm": 0.7062673568725586,
      "learning_rate": 5.212864795767084e-06,
      "loss": 0.0157,
      "step": 1473020
    },
    {
      "epoch": 2.4106622676957117,
      "grad_norm": 0.38195717334747314,
      "learning_rate": 5.212798903553568e-06,
      "loss": 0.0234,
      "step": 1473040
    },
    {
      "epoch": 2.410694998134365,
      "grad_norm": 0.09375785291194916,
      "learning_rate": 5.2127330113400495e-06,
      "loss": 0.0151,
      "step": 1473060
    },
    {
      "epoch": 2.4107277285730184,
      "grad_norm": 0.09655360877513885,
      "learning_rate": 5.212667119126533e-06,
      "loss": 0.0139,
      "step": 1473080
    },
    {
      "epoch": 2.4107604590116716,
      "grad_norm": 0.164713054895401,
      "learning_rate": 5.212601226913017e-06,
      "loss": 0.0194,
      "step": 1473100
    },
    {
      "epoch": 2.410793189450325,
      "grad_norm": 0.4384435713291168,
      "learning_rate": 5.2125353346994986e-06,
      "loss": 0.0128,
      "step": 1473120
    },
    {
      "epoch": 2.4108259198889783,
      "grad_norm": 0.3917357623577118,
      "learning_rate": 5.212469442485982e-06,
      "loss": 0.0117,
      "step": 1473140
    },
    {
      "epoch": 2.410858650327632,
      "grad_norm": 0.22293169796466827,
      "learning_rate": 5.212403550272465e-06,
      "loss": 0.011,
      "step": 1473160
    },
    {
      "epoch": 2.410891380766285,
      "grad_norm": 0.14810895919799805,
      "learning_rate": 5.212337658058948e-06,
      "loss": 0.0129,
      "step": 1473180
    },
    {
      "epoch": 2.410924111204938,
      "grad_norm": 0.7773385047912598,
      "learning_rate": 5.21227176584543e-06,
      "loss": 0.0169,
      "step": 1473200
    },
    {
      "epoch": 2.410956841643592,
      "grad_norm": 0.26555538177490234,
      "learning_rate": 5.212205873631914e-06,
      "loss": 0.0177,
      "step": 1473220
    },
    {
      "epoch": 2.410989572082245,
      "grad_norm": 1.168593168258667,
      "learning_rate": 5.212139981418396e-06,
      "loss": 0.0173,
      "step": 1473240
    },
    {
      "epoch": 2.4110223025208986,
      "grad_norm": 0.13014686107635498,
      "learning_rate": 5.2120740892048794e-06,
      "loss": 0.0153,
      "step": 1473260
    },
    {
      "epoch": 2.4110550329595517,
      "grad_norm": 0.8943400979042053,
      "learning_rate": 5.212008196991361e-06,
      "loss": 0.0135,
      "step": 1473280
    },
    {
      "epoch": 2.4110877633982053,
      "grad_norm": 0.5728917717933655,
      "learning_rate": 5.211942304777845e-06,
      "loss": 0.0152,
      "step": 1473300
    },
    {
      "epoch": 2.4111204938368584,
      "grad_norm": 0.2818761169910431,
      "learning_rate": 5.211876412564328e-06,
      "loss": 0.0128,
      "step": 1473320
    },
    {
      "epoch": 2.4111532242755116,
      "grad_norm": 0.3386368751525879,
      "learning_rate": 5.21181052035081e-06,
      "loss": 0.0115,
      "step": 1473340
    },
    {
      "epoch": 2.411185954714165,
      "grad_norm": 0.2896519899368286,
      "learning_rate": 5.211744628137293e-06,
      "loss": 0.0171,
      "step": 1473360
    },
    {
      "epoch": 2.4112186851528183,
      "grad_norm": 0.28542885184288025,
      "learning_rate": 5.211678735923777e-06,
      "loss": 0.0112,
      "step": 1473380
    },
    {
      "epoch": 2.411251415591472,
      "grad_norm": 0.3721504807472229,
      "learning_rate": 5.211612843710259e-06,
      "loss": 0.0118,
      "step": 1473400
    },
    {
      "epoch": 2.411284146030125,
      "grad_norm": 0.40771225094795227,
      "learning_rate": 5.211546951496742e-06,
      "loss": 0.0144,
      "step": 1473420
    },
    {
      "epoch": 2.4113168764687787,
      "grad_norm": 0.2464161217212677,
      "learning_rate": 5.211481059283226e-06,
      "loss": 0.0171,
      "step": 1473440
    },
    {
      "epoch": 2.411349606907432,
      "grad_norm": 0.451034814119339,
      "learning_rate": 5.211415167069708e-06,
      "loss": 0.015,
      "step": 1473460
    },
    {
      "epoch": 2.411382337346085,
      "grad_norm": 0.4567689895629883,
      "learning_rate": 5.211349274856191e-06,
      "loss": 0.0191,
      "step": 1473480
    },
    {
      "epoch": 2.4114150677847386,
      "grad_norm": 0.05354973301291466,
      "learning_rate": 5.211283382642673e-06,
      "loss": 0.0122,
      "step": 1473500
    },
    {
      "epoch": 2.4114477982233917,
      "grad_norm": 1.3352595567703247,
      "learning_rate": 5.211217490429157e-06,
      "loss": 0.0143,
      "step": 1473520
    },
    {
      "epoch": 2.4114805286620453,
      "grad_norm": 0.26003170013427734,
      "learning_rate": 5.2111515982156395e-06,
      "loss": 0.0123,
      "step": 1473540
    },
    {
      "epoch": 2.4115132591006985,
      "grad_norm": 0.5581559538841248,
      "learning_rate": 5.211085706002122e-06,
      "loss": 0.0156,
      "step": 1473560
    },
    {
      "epoch": 2.4115459895393516,
      "grad_norm": 0.9345924258232117,
      "learning_rate": 5.211019813788605e-06,
      "loss": 0.0164,
      "step": 1473580
    },
    {
      "epoch": 2.411578719978005,
      "grad_norm": 0.13932077586650848,
      "learning_rate": 5.2109539215750886e-06,
      "loss": 0.0112,
      "step": 1473600
    },
    {
      "epoch": 2.4116114504166584,
      "grad_norm": 0.4669782519340515,
      "learning_rate": 5.2108880293615705e-06,
      "loss": 0.0118,
      "step": 1473620
    },
    {
      "epoch": 2.411644180855312,
      "grad_norm": 0.46462586522102356,
      "learning_rate": 5.210822137148054e-06,
      "loss": 0.0103,
      "step": 1473640
    },
    {
      "epoch": 2.411676911293965,
      "grad_norm": 0.3202540874481201,
      "learning_rate": 5.210756244934536e-06,
      "loss": 0.0158,
      "step": 1473660
    },
    {
      "epoch": 2.4117096417326183,
      "grad_norm": 0.14170819520950317,
      "learning_rate": 5.2106903527210195e-06,
      "loss": 0.0283,
      "step": 1473680
    },
    {
      "epoch": 2.411742372171272,
      "grad_norm": 0.09440604597330093,
      "learning_rate": 5.210624460507501e-06,
      "loss": 0.0202,
      "step": 1473700
    },
    {
      "epoch": 2.411775102609925,
      "grad_norm": 0.5571346879005432,
      "learning_rate": 5.210558568293985e-06,
      "loss": 0.0133,
      "step": 1473720
    },
    {
      "epoch": 2.4118078330485786,
      "grad_norm": 0.14091943204402924,
      "learning_rate": 5.210492676080468e-06,
      "loss": 0.0108,
      "step": 1473740
    },
    {
      "epoch": 2.4118405634872317,
      "grad_norm": 0.3874422311782837,
      "learning_rate": 5.2104267838669505e-06,
      "loss": 0.0134,
      "step": 1473760
    },
    {
      "epoch": 2.4118732939258853,
      "grad_norm": 0.2926114797592163,
      "learning_rate": 5.210360891653434e-06,
      "loss": 0.0129,
      "step": 1473780
    },
    {
      "epoch": 2.4119060243645385,
      "grad_norm": 0.6493807435035706,
      "learning_rate": 5.210294999439917e-06,
      "loss": 0.0122,
      "step": 1473800
    },
    {
      "epoch": 2.4119387548031916,
      "grad_norm": 1.6377410888671875,
      "learning_rate": 5.2102291072263995e-06,
      "loss": 0.0135,
      "step": 1473820
    },
    {
      "epoch": 2.4119714852418452,
      "grad_norm": 0.4322289228439331,
      "learning_rate": 5.210163215012882e-06,
      "loss": 0.0231,
      "step": 1473840
    },
    {
      "epoch": 2.4120042156804984,
      "grad_norm": 0.3679560124874115,
      "learning_rate": 5.210097322799366e-06,
      "loss": 0.0087,
      "step": 1473860
    },
    {
      "epoch": 2.412036946119152,
      "grad_norm": 0.34202900528907776,
      "learning_rate": 5.210031430585848e-06,
      "loss": 0.0164,
      "step": 1473880
    },
    {
      "epoch": 2.412069676557805,
      "grad_norm": 0.34373006224632263,
      "learning_rate": 5.209965538372331e-06,
      "loss": 0.0074,
      "step": 1473900
    },
    {
      "epoch": 2.4121024069964587,
      "grad_norm": 0.5055666565895081,
      "learning_rate": 5.209899646158813e-06,
      "loss": 0.0114,
      "step": 1473920
    },
    {
      "epoch": 2.412135137435112,
      "grad_norm": 0.3488501012325287,
      "learning_rate": 5.209833753945297e-06,
      "loss": 0.0113,
      "step": 1473940
    },
    {
      "epoch": 2.412167867873765,
      "grad_norm": 0.24722392857074738,
      "learning_rate": 5.2097678617317796e-06,
      "loss": 0.0131,
      "step": 1473960
    },
    {
      "epoch": 2.4122005983124186,
      "grad_norm": 0.1540856510400772,
      "learning_rate": 5.209701969518262e-06,
      "loss": 0.0138,
      "step": 1473980
    },
    {
      "epoch": 2.4122333287510718,
      "grad_norm": 0.17009907960891724,
      "learning_rate": 5.209636077304745e-06,
      "loss": 0.0177,
      "step": 1474000
    },
    {
      "epoch": 2.4122660591897254,
      "grad_norm": 0.45708248019218445,
      "learning_rate": 5.209570185091229e-06,
      "loss": 0.0146,
      "step": 1474020
    },
    {
      "epoch": 2.4122987896283785,
      "grad_norm": 0.41981083154678345,
      "learning_rate": 5.2095042928777105e-06,
      "loss": 0.018,
      "step": 1474040
    },
    {
      "epoch": 2.412331520067032,
      "grad_norm": 0.1762835532426834,
      "learning_rate": 5.209438400664194e-06,
      "loss": 0.0139,
      "step": 1474060
    },
    {
      "epoch": 2.4123642505056853,
      "grad_norm": 0.18231777846813202,
      "learning_rate": 5.209372508450676e-06,
      "loss": 0.0149,
      "step": 1474080
    },
    {
      "epoch": 2.4123969809443384,
      "grad_norm": 0.29838141798973083,
      "learning_rate": 5.20930661623716e-06,
      "loss": 0.017,
      "step": 1474100
    },
    {
      "epoch": 2.412429711382992,
      "grad_norm": 0.9349625110626221,
      "learning_rate": 5.209240724023642e-06,
      "loss": 0.014,
      "step": 1474120
    },
    {
      "epoch": 2.412462441821645,
      "grad_norm": 0.06313634663820267,
      "learning_rate": 5.209174831810125e-06,
      "loss": 0.0147,
      "step": 1474140
    },
    {
      "epoch": 2.4124951722602987,
      "grad_norm": 0.20039181411266327,
      "learning_rate": 5.209108939596609e-06,
      "loss": 0.0181,
      "step": 1474160
    },
    {
      "epoch": 2.412527902698952,
      "grad_norm": 0.22829024493694305,
      "learning_rate": 5.209043047383091e-06,
      "loss": 0.0109,
      "step": 1474180
    },
    {
      "epoch": 2.4125606331376055,
      "grad_norm": 0.1984998732805252,
      "learning_rate": 5.208977155169574e-06,
      "loss": 0.0163,
      "step": 1474200
    },
    {
      "epoch": 2.4125933635762586,
      "grad_norm": 0.3457298278808594,
      "learning_rate": 5.208911262956057e-06,
      "loss": 0.0134,
      "step": 1474220
    },
    {
      "epoch": 2.412626094014912,
      "grad_norm": 0.28950271010398865,
      "learning_rate": 5.2088453707425405e-06,
      "loss": 0.0161,
      "step": 1474240
    },
    {
      "epoch": 2.4126588244535654,
      "grad_norm": 0.4944424629211426,
      "learning_rate": 5.208779478529022e-06,
      "loss": 0.0199,
      "step": 1474260
    },
    {
      "epoch": 2.4126915548922185,
      "grad_norm": 0.8035604953765869,
      "learning_rate": 5.208713586315506e-06,
      "loss": 0.0299,
      "step": 1474280
    },
    {
      "epoch": 2.412724285330872,
      "grad_norm": 0.4821169078350067,
      "learning_rate": 5.208647694101988e-06,
      "loss": 0.0163,
      "step": 1474300
    },
    {
      "epoch": 2.4127570157695253,
      "grad_norm": 0.07225573062896729,
      "learning_rate": 5.2085818018884714e-06,
      "loss": 0.0132,
      "step": 1474320
    },
    {
      "epoch": 2.412789746208179,
      "grad_norm": 0.1495964527130127,
      "learning_rate": 5.208515909674954e-06,
      "loss": 0.0178,
      "step": 1474340
    },
    {
      "epoch": 2.412822476646832,
      "grad_norm": 0.06997327506542206,
      "learning_rate": 5.208450017461437e-06,
      "loss": 0.0127,
      "step": 1474360
    },
    {
      "epoch": 2.412855207085485,
      "grad_norm": 0.22790223360061646,
      "learning_rate": 5.20838412524792e-06,
      "loss": 0.0133,
      "step": 1474380
    },
    {
      "epoch": 2.4128879375241388,
      "grad_norm": 0.17946352064609528,
      "learning_rate": 5.208318233034403e-06,
      "loss": 0.0114,
      "step": 1474400
    },
    {
      "epoch": 2.412920667962792,
      "grad_norm": 0.39892280101776123,
      "learning_rate": 5.208252340820885e-06,
      "loss": 0.0079,
      "step": 1474420
    },
    {
      "epoch": 2.4129533984014455,
      "grad_norm": 0.2526746094226837,
      "learning_rate": 5.208186448607369e-06,
      "loss": 0.0122,
      "step": 1474440
    },
    {
      "epoch": 2.4129861288400987,
      "grad_norm": 0.12152188271284103,
      "learning_rate": 5.208120556393851e-06,
      "loss": 0.0195,
      "step": 1474460
    },
    {
      "epoch": 2.4130188592787523,
      "grad_norm": 0.5870329737663269,
      "learning_rate": 5.208054664180334e-06,
      "loss": 0.0103,
      "step": 1474480
    },
    {
      "epoch": 2.4130515897174054,
      "grad_norm": 0.09440687298774719,
      "learning_rate": 5.207988771966818e-06,
      "loss": 0.0207,
      "step": 1474500
    },
    {
      "epoch": 2.4130843201560586,
      "grad_norm": 0.29102715849876404,
      "learning_rate": 5.2079228797533e-06,
      "loss": 0.0143,
      "step": 1474520
    },
    {
      "epoch": 2.413117050594712,
      "grad_norm": 0.15073217451572418,
      "learning_rate": 5.207856987539783e-06,
      "loss": 0.0139,
      "step": 1474540
    },
    {
      "epoch": 2.4131497810333653,
      "grad_norm": 0.6071562767028809,
      "learning_rate": 5.207791095326266e-06,
      "loss": 0.0157,
      "step": 1474560
    },
    {
      "epoch": 2.413182511472019,
      "grad_norm": 0.08873632550239563,
      "learning_rate": 5.207725203112749e-06,
      "loss": 0.0162,
      "step": 1474580
    },
    {
      "epoch": 2.413215241910672,
      "grad_norm": 0.18175993859767914,
      "learning_rate": 5.2076593108992315e-06,
      "loss": 0.0153,
      "step": 1474600
    },
    {
      "epoch": 2.4132479723493256,
      "grad_norm": 0.4778307378292084,
      "learning_rate": 5.207593418685715e-06,
      "loss": 0.0113,
      "step": 1474620
    },
    {
      "epoch": 2.413280702787979,
      "grad_norm": 0.07397810369729996,
      "learning_rate": 5.207527526472197e-06,
      "loss": 0.0127,
      "step": 1474640
    },
    {
      "epoch": 2.413313433226632,
      "grad_norm": 0.2045704573392868,
      "learning_rate": 5.2074616342586805e-06,
      "loss": 0.0146,
      "step": 1474660
    },
    {
      "epoch": 2.4133461636652855,
      "grad_norm": 0.3794700801372528,
      "learning_rate": 5.2073957420451624e-06,
      "loss": 0.0113,
      "step": 1474680
    },
    {
      "epoch": 2.4133788941039387,
      "grad_norm": 0.580542802810669,
      "learning_rate": 5.207329849831646e-06,
      "loss": 0.0108,
      "step": 1474700
    },
    {
      "epoch": 2.4134116245425923,
      "grad_norm": 0.19509270787239075,
      "learning_rate": 5.207263957618128e-06,
      "loss": 0.0171,
      "step": 1474720
    },
    {
      "epoch": 2.4134443549812454,
      "grad_norm": 0.36095523834228516,
      "learning_rate": 5.2071980654046115e-06,
      "loss": 0.0117,
      "step": 1474740
    },
    {
      "epoch": 2.413477085419899,
      "grad_norm": 0.7399054169654846,
      "learning_rate": 5.207132173191094e-06,
      "loss": 0.0133,
      "step": 1474760
    },
    {
      "epoch": 2.413509815858552,
      "grad_norm": 0.23518845438957214,
      "learning_rate": 5.207066280977577e-06,
      "loss": 0.0191,
      "step": 1474780
    },
    {
      "epoch": 2.4135425462972053,
      "grad_norm": 0.2382340431213379,
      "learning_rate": 5.20700038876406e-06,
      "loss": 0.0119,
      "step": 1474800
    },
    {
      "epoch": 2.413575276735859,
      "grad_norm": 0.5550759434700012,
      "learning_rate": 5.206934496550543e-06,
      "loss": 0.0129,
      "step": 1474820
    },
    {
      "epoch": 2.413608007174512,
      "grad_norm": 0.3068355917930603,
      "learning_rate": 5.206868604337026e-06,
      "loss": 0.0109,
      "step": 1474840
    },
    {
      "epoch": 2.4136407376131657,
      "grad_norm": 0.4861781597137451,
      "learning_rate": 5.206802712123509e-06,
      "loss": 0.0167,
      "step": 1474860
    },
    {
      "epoch": 2.413673468051819,
      "grad_norm": 1.0027203559875488,
      "learning_rate": 5.206736819909992e-06,
      "loss": 0.0198,
      "step": 1474880
    },
    {
      "epoch": 2.4137061984904724,
      "grad_norm": 0.5983939170837402,
      "learning_rate": 5.206670927696474e-06,
      "loss": 0.0143,
      "step": 1474900
    },
    {
      "epoch": 2.4137389289291256,
      "grad_norm": 0.07761438190937042,
      "learning_rate": 5.206605035482958e-06,
      "loss": 0.0155,
      "step": 1474920
    },
    {
      "epoch": 2.4137716593677787,
      "grad_norm": 0.10905278474092484,
      "learning_rate": 5.20653914326944e-06,
      "loss": 0.0118,
      "step": 1474940
    },
    {
      "epoch": 2.4138043898064323,
      "grad_norm": 0.05915132537484169,
      "learning_rate": 5.206473251055923e-06,
      "loss": 0.019,
      "step": 1474960
    },
    {
      "epoch": 2.4138371202450855,
      "grad_norm": 1.07648503780365,
      "learning_rate": 5.206407358842406e-06,
      "loss": 0.0157,
      "step": 1474980
    },
    {
      "epoch": 2.413869850683739,
      "grad_norm": 0.2983225882053375,
      "learning_rate": 5.206341466628889e-06,
      "loss": 0.0118,
      "step": 1475000
    },
    {
      "epoch": 2.413902581122392,
      "grad_norm": 0.43004292249679565,
      "learning_rate": 5.2062755744153716e-06,
      "loss": 0.0114,
      "step": 1475020
    },
    {
      "epoch": 2.4139353115610453,
      "grad_norm": 0.15717697143554688,
      "learning_rate": 5.206209682201855e-06,
      "loss": 0.0214,
      "step": 1475040
    },
    {
      "epoch": 2.413968041999699,
      "grad_norm": 0.27656564116477966,
      "learning_rate": 5.206143789988337e-06,
      "loss": 0.0152,
      "step": 1475060
    },
    {
      "epoch": 2.414000772438352,
      "grad_norm": 1.574399709701538,
      "learning_rate": 5.206077897774821e-06,
      "loss": 0.0078,
      "step": 1475080
    },
    {
      "epoch": 2.4140335028770057,
      "grad_norm": 0.3942195177078247,
      "learning_rate": 5.2060120055613025e-06,
      "loss": 0.0155,
      "step": 1475100
    },
    {
      "epoch": 2.414066233315659,
      "grad_norm": 0.18363317847251892,
      "learning_rate": 5.205946113347786e-06,
      "loss": 0.0151,
      "step": 1475120
    },
    {
      "epoch": 2.414098963754312,
      "grad_norm": 0.7098453640937805,
      "learning_rate": 5.205880221134269e-06,
      "loss": 0.0127,
      "step": 1475140
    },
    {
      "epoch": 2.4141316941929656,
      "grad_norm": 0.5163295865058899,
      "learning_rate": 5.205814328920752e-06,
      "loss": 0.0133,
      "step": 1475160
    },
    {
      "epoch": 2.4141644246316187,
      "grad_norm": 0.47243958711624146,
      "learning_rate": 5.205748436707235e-06,
      "loss": 0.0181,
      "step": 1475180
    },
    {
      "epoch": 2.4141971550702723,
      "grad_norm": 0.7461593151092529,
      "learning_rate": 5.205682544493718e-06,
      "loss": 0.0198,
      "step": 1475200
    },
    {
      "epoch": 2.4142298855089255,
      "grad_norm": 0.16904623806476593,
      "learning_rate": 5.205616652280201e-06,
      "loss": 0.022,
      "step": 1475220
    },
    {
      "epoch": 2.414262615947579,
      "grad_norm": 0.6168667078018188,
      "learning_rate": 5.205550760066683e-06,
      "loss": 0.0146,
      "step": 1475240
    },
    {
      "epoch": 2.414295346386232,
      "grad_norm": 0.20217067003250122,
      "learning_rate": 5.205484867853167e-06,
      "loss": 0.0099,
      "step": 1475260
    },
    {
      "epoch": 2.4143280768248854,
      "grad_norm": 0.42693161964416504,
      "learning_rate": 5.205418975639649e-06,
      "loss": 0.0166,
      "step": 1475280
    },
    {
      "epoch": 2.414360807263539,
      "grad_norm": 0.3923999071121216,
      "learning_rate": 5.2053530834261325e-06,
      "loss": 0.012,
      "step": 1475300
    },
    {
      "epoch": 2.414393537702192,
      "grad_norm": 1.026659369468689,
      "learning_rate": 5.205287191212614e-06,
      "loss": 0.014,
      "step": 1475320
    },
    {
      "epoch": 2.4144262681408457,
      "grad_norm": 0.2566912770271301,
      "learning_rate": 5.205221298999098e-06,
      "loss": 0.0113,
      "step": 1475340
    },
    {
      "epoch": 2.414458998579499,
      "grad_norm": 0.7856743335723877,
      "learning_rate": 5.205155406785581e-06,
      "loss": 0.0205,
      "step": 1475360
    },
    {
      "epoch": 2.4144917290181525,
      "grad_norm": 0.2396966814994812,
      "learning_rate": 5.205089514572063e-06,
      "loss": 0.0129,
      "step": 1475380
    },
    {
      "epoch": 2.4145244594568056,
      "grad_norm": 0.8266052603721619,
      "learning_rate": 5.205023622358546e-06,
      "loss": 0.0116,
      "step": 1475400
    },
    {
      "epoch": 2.4145571898954588,
      "grad_norm": 0.6152036190032959,
      "learning_rate": 5.20495773014503e-06,
      "loss": 0.0183,
      "step": 1475420
    },
    {
      "epoch": 2.4145899203341123,
      "grad_norm": 0.6801636815071106,
      "learning_rate": 5.204891837931512e-06,
      "loss": 0.0105,
      "step": 1475440
    },
    {
      "epoch": 2.4146226507727655,
      "grad_norm": 0.24305470287799835,
      "learning_rate": 5.204825945717995e-06,
      "loss": 0.0141,
      "step": 1475460
    },
    {
      "epoch": 2.414655381211419,
      "grad_norm": 0.21455515921115875,
      "learning_rate": 5.204760053504477e-06,
      "loss": 0.0137,
      "step": 1475480
    },
    {
      "epoch": 2.4146881116500722,
      "grad_norm": 0.27119046449661255,
      "learning_rate": 5.204694161290961e-06,
      "loss": 0.0159,
      "step": 1475500
    },
    {
      "epoch": 2.414720842088726,
      "grad_norm": 0.44506651163101196,
      "learning_rate": 5.204628269077443e-06,
      "loss": 0.0124,
      "step": 1475520
    },
    {
      "epoch": 2.414753572527379,
      "grad_norm": 0.7269892692565918,
      "learning_rate": 5.204562376863926e-06,
      "loss": 0.0171,
      "step": 1475540
    },
    {
      "epoch": 2.414786302966032,
      "grad_norm": 1.0157767534255981,
      "learning_rate": 5.20449648465041e-06,
      "loss": 0.0171,
      "step": 1475560
    },
    {
      "epoch": 2.4148190334046857,
      "grad_norm": 0.4084679186344147,
      "learning_rate": 5.2044305924368925e-06,
      "loss": 0.0114,
      "step": 1475580
    },
    {
      "epoch": 2.414851763843339,
      "grad_norm": 0.180672287940979,
      "learning_rate": 5.204364700223375e-06,
      "loss": 0.0204,
      "step": 1475600
    },
    {
      "epoch": 2.4148844942819925,
      "grad_norm": 0.4683891236782074,
      "learning_rate": 5.204298808009858e-06,
      "loss": 0.0132,
      "step": 1475620
    },
    {
      "epoch": 2.4149172247206456,
      "grad_norm": 0.37224340438842773,
      "learning_rate": 5.2042329157963416e-06,
      "loss": 0.0134,
      "step": 1475640
    },
    {
      "epoch": 2.414949955159299,
      "grad_norm": 0.2743850350379944,
      "learning_rate": 5.2041670235828235e-06,
      "loss": 0.014,
      "step": 1475660
    },
    {
      "epoch": 2.4149826855979524,
      "grad_norm": 0.35874512791633606,
      "learning_rate": 5.204101131369307e-06,
      "loss": 0.0151,
      "step": 1475680
    },
    {
      "epoch": 2.4150154160366055,
      "grad_norm": 0.18002112209796906,
      "learning_rate": 5.204035239155789e-06,
      "loss": 0.0104,
      "step": 1475700
    },
    {
      "epoch": 2.415048146475259,
      "grad_norm": 0.28344300389289856,
      "learning_rate": 5.2039693469422725e-06,
      "loss": 0.0126,
      "step": 1475720
    },
    {
      "epoch": 2.4150808769139123,
      "grad_norm": 0.3107125163078308,
      "learning_rate": 5.203903454728754e-06,
      "loss": 0.0113,
      "step": 1475740
    },
    {
      "epoch": 2.415113607352566,
      "grad_norm": 0.44270914793014526,
      "learning_rate": 5.203837562515238e-06,
      "loss": 0.0137,
      "step": 1475760
    },
    {
      "epoch": 2.415146337791219,
      "grad_norm": 0.693105936050415,
      "learning_rate": 5.203771670301721e-06,
      "loss": 0.02,
      "step": 1475780
    },
    {
      "epoch": 2.4151790682298726,
      "grad_norm": 0.29456275701522827,
      "learning_rate": 5.2037057780882035e-06,
      "loss": 0.0157,
      "step": 1475800
    },
    {
      "epoch": 2.4152117986685258,
      "grad_norm": 0.2805948853492737,
      "learning_rate": 5.203639885874686e-06,
      "loss": 0.013,
      "step": 1475820
    },
    {
      "epoch": 2.415244529107179,
      "grad_norm": 0.21775192022323608,
      "learning_rate": 5.20357399366117e-06,
      "loss": 0.0142,
      "step": 1475840
    },
    {
      "epoch": 2.4152772595458325,
      "grad_norm": 0.26950517296791077,
      "learning_rate": 5.203508101447652e-06,
      "loss": 0.0149,
      "step": 1475860
    },
    {
      "epoch": 2.4153099899844857,
      "grad_norm": 0.35655906796455383,
      "learning_rate": 5.203442209234135e-06,
      "loss": 0.0121,
      "step": 1475880
    },
    {
      "epoch": 2.4153427204231392,
      "grad_norm": 0.06977492570877075,
      "learning_rate": 5.203376317020619e-06,
      "loss": 0.0112,
      "step": 1475900
    },
    {
      "epoch": 2.4153754508617924,
      "grad_norm": 0.7077054977416992,
      "learning_rate": 5.203310424807101e-06,
      "loss": 0.0229,
      "step": 1475920
    },
    {
      "epoch": 2.415408181300446,
      "grad_norm": 0.47197407484054565,
      "learning_rate": 5.203244532593584e-06,
      "loss": 0.0165,
      "step": 1475940
    },
    {
      "epoch": 2.415440911739099,
      "grad_norm": 0.2302723377943039,
      "learning_rate": 5.203178640380066e-06,
      "loss": 0.0146,
      "step": 1475960
    },
    {
      "epoch": 2.4154736421777523,
      "grad_norm": 0.29647496342658997,
      "learning_rate": 5.20311274816655e-06,
      "loss": 0.0109,
      "step": 1475980
    },
    {
      "epoch": 2.415506372616406,
      "grad_norm": 1.8578215837478638,
      "learning_rate": 5.2030468559530326e-06,
      "loss": 0.0218,
      "step": 1476000
    },
    {
      "epoch": 2.415539103055059,
      "grad_norm": 0.2237110733985901,
      "learning_rate": 5.202980963739515e-06,
      "loss": 0.016,
      "step": 1476020
    },
    {
      "epoch": 2.4155718334937126,
      "grad_norm": 0.6274386644363403,
      "learning_rate": 5.202915071525998e-06,
      "loss": 0.0116,
      "step": 1476040
    },
    {
      "epoch": 2.415604563932366,
      "grad_norm": 0.24056342244148254,
      "learning_rate": 5.202849179312482e-06,
      "loss": 0.0121,
      "step": 1476060
    },
    {
      "epoch": 2.4156372943710194,
      "grad_norm": 0.4028501808643341,
      "learning_rate": 5.2027832870989635e-06,
      "loss": 0.0113,
      "step": 1476080
    },
    {
      "epoch": 2.4156700248096725,
      "grad_norm": 0.28708115220069885,
      "learning_rate": 5.202717394885447e-06,
      "loss": 0.0174,
      "step": 1476100
    },
    {
      "epoch": 2.4157027552483257,
      "grad_norm": 0.5314088463783264,
      "learning_rate": 5.202651502671929e-06,
      "loss": 0.0208,
      "step": 1476120
    },
    {
      "epoch": 2.4157354856869793,
      "grad_norm": 0.08349670469760895,
      "learning_rate": 5.202585610458413e-06,
      "loss": 0.0088,
      "step": 1476140
    },
    {
      "epoch": 2.4157682161256324,
      "grad_norm": 0.5363736748695374,
      "learning_rate": 5.202519718244895e-06,
      "loss": 0.0135,
      "step": 1476160
    },
    {
      "epoch": 2.415800946564286,
      "grad_norm": 0.29804593324661255,
      "learning_rate": 5.202453826031378e-06,
      "loss": 0.0137,
      "step": 1476180
    },
    {
      "epoch": 2.415833677002939,
      "grad_norm": 0.6696274280548096,
      "learning_rate": 5.202387933817861e-06,
      "loss": 0.0255,
      "step": 1476200
    },
    {
      "epoch": 2.4158664074415928,
      "grad_norm": 0.29301637411117554,
      "learning_rate": 5.202322041604344e-06,
      "loss": 0.0138,
      "step": 1476220
    },
    {
      "epoch": 2.415899137880246,
      "grad_norm": 0.3201838433742523,
      "learning_rate": 5.202256149390827e-06,
      "loss": 0.0126,
      "step": 1476240
    },
    {
      "epoch": 2.415931868318899,
      "grad_norm": 0.2916274666786194,
      "learning_rate": 5.20219025717731e-06,
      "loss": 0.0186,
      "step": 1476260
    },
    {
      "epoch": 2.4159645987575527,
      "grad_norm": 0.37274208664894104,
      "learning_rate": 5.2021243649637935e-06,
      "loss": 0.0112,
      "step": 1476280
    },
    {
      "epoch": 2.415997329196206,
      "grad_norm": 0.46712079644203186,
      "learning_rate": 5.202058472750275e-06,
      "loss": 0.0165,
      "step": 1476300
    },
    {
      "epoch": 2.4160300596348594,
      "grad_norm": 0.16863109171390533,
      "learning_rate": 5.201992580536759e-06,
      "loss": 0.0115,
      "step": 1476320
    },
    {
      "epoch": 2.4160627900735125,
      "grad_norm": 1.7640331983566284,
      "learning_rate": 5.201926688323241e-06,
      "loss": 0.0122,
      "step": 1476340
    },
    {
      "epoch": 2.416095520512166,
      "grad_norm": 0.12163622677326202,
      "learning_rate": 5.2018607961097244e-06,
      "loss": 0.0176,
      "step": 1476360
    },
    {
      "epoch": 2.4161282509508193,
      "grad_norm": 0.20471511781215668,
      "learning_rate": 5.201794903896207e-06,
      "loss": 0.016,
      "step": 1476380
    },
    {
      "epoch": 2.4161609813894724,
      "grad_norm": 0.20544883608818054,
      "learning_rate": 5.20172901168269e-06,
      "loss": 0.013,
      "step": 1476400
    },
    {
      "epoch": 2.416193711828126,
      "grad_norm": 0.9608717560768127,
      "learning_rate": 5.201663119469173e-06,
      "loss": 0.0183,
      "step": 1476420
    },
    {
      "epoch": 2.416226442266779,
      "grad_norm": 0.5902614593505859,
      "learning_rate": 5.201597227255656e-06,
      "loss": 0.0135,
      "step": 1476440
    },
    {
      "epoch": 2.416259172705433,
      "grad_norm": 0.11339763551950455,
      "learning_rate": 5.201531335042138e-06,
      "loss": 0.012,
      "step": 1476460
    },
    {
      "epoch": 2.416291903144086,
      "grad_norm": 0.1503290832042694,
      "learning_rate": 5.201465442828622e-06,
      "loss": 0.0119,
      "step": 1476480
    },
    {
      "epoch": 2.4163246335827395,
      "grad_norm": 0.5328702926635742,
      "learning_rate": 5.201399550615104e-06,
      "loss": 0.0136,
      "step": 1476500
    },
    {
      "epoch": 2.4163573640213927,
      "grad_norm": 0.13066595792770386,
      "learning_rate": 5.201333658401587e-06,
      "loss": 0.0158,
      "step": 1476520
    },
    {
      "epoch": 2.416390094460046,
      "grad_norm": 0.18803063035011292,
      "learning_rate": 5.201267766188069e-06,
      "loss": 0.013,
      "step": 1476540
    },
    {
      "epoch": 2.4164228248986994,
      "grad_norm": 0.9326173663139343,
      "learning_rate": 5.201201873974553e-06,
      "loss": 0.0083,
      "step": 1476560
    },
    {
      "epoch": 2.4164555553373526,
      "grad_norm": 0.2888426184654236,
      "learning_rate": 5.201135981761035e-06,
      "loss": 0.0148,
      "step": 1476580
    },
    {
      "epoch": 2.416488285776006,
      "grad_norm": 1.506223201751709,
      "learning_rate": 5.201070089547518e-06,
      "loss": 0.0174,
      "step": 1476600
    },
    {
      "epoch": 2.4165210162146593,
      "grad_norm": 0.3247460722923279,
      "learning_rate": 5.201004197334002e-06,
      "loss": 0.0175,
      "step": 1476620
    },
    {
      "epoch": 2.4165537466533125,
      "grad_norm": 0.1644056737422943,
      "learning_rate": 5.2009383051204845e-06,
      "loss": 0.0111,
      "step": 1476640
    },
    {
      "epoch": 2.416586477091966,
      "grad_norm": 1.0595279932022095,
      "learning_rate": 5.200872412906968e-06,
      "loss": 0.0101,
      "step": 1476660
    },
    {
      "epoch": 2.416619207530619,
      "grad_norm": 0.33419451117515564,
      "learning_rate": 5.20080652069345e-06,
      "loss": 0.0221,
      "step": 1476680
    },
    {
      "epoch": 2.416651937969273,
      "grad_norm": 0.187477245926857,
      "learning_rate": 5.2007406284799335e-06,
      "loss": 0.0159,
      "step": 1476700
    },
    {
      "epoch": 2.416684668407926,
      "grad_norm": 0.6807374954223633,
      "learning_rate": 5.2006747362664154e-06,
      "loss": 0.0134,
      "step": 1476720
    },
    {
      "epoch": 2.416717398846579,
      "grad_norm": 0.5242541432380676,
      "learning_rate": 5.200608844052899e-06,
      "loss": 0.0102,
      "step": 1476740
    },
    {
      "epoch": 2.4167501292852327,
      "grad_norm": 0.3415776193141937,
      "learning_rate": 5.200542951839381e-06,
      "loss": 0.0176,
      "step": 1476760
    },
    {
      "epoch": 2.416782859723886,
      "grad_norm": 0.26560598611831665,
      "learning_rate": 5.2004770596258645e-06,
      "loss": 0.0135,
      "step": 1476780
    },
    {
      "epoch": 2.4168155901625394,
      "grad_norm": 0.16798663139343262,
      "learning_rate": 5.200411167412347e-06,
      "loss": 0.0135,
      "step": 1476800
    },
    {
      "epoch": 2.4168483206011926,
      "grad_norm": 0.14778146147727966,
      "learning_rate": 5.20034527519883e-06,
      "loss": 0.0133,
      "step": 1476820
    },
    {
      "epoch": 2.416881051039846,
      "grad_norm": 0.41429829597473145,
      "learning_rate": 5.200279382985313e-06,
      "loss": 0.0137,
      "step": 1476840
    },
    {
      "epoch": 2.4169137814784993,
      "grad_norm": 0.3047041893005371,
      "learning_rate": 5.200213490771796e-06,
      "loss": 0.0131,
      "step": 1476860
    },
    {
      "epoch": 2.4169465119171525,
      "grad_norm": 0.2970336973667145,
      "learning_rate": 5.200147598558278e-06,
      "loss": 0.0132,
      "step": 1476880
    },
    {
      "epoch": 2.416979242355806,
      "grad_norm": 0.3521067798137665,
      "learning_rate": 5.200081706344762e-06,
      "loss": 0.0113,
      "step": 1476900
    },
    {
      "epoch": 2.4170119727944592,
      "grad_norm": 0.4452529847621918,
      "learning_rate": 5.200015814131244e-06,
      "loss": 0.0153,
      "step": 1476920
    },
    {
      "epoch": 2.417044703233113,
      "grad_norm": 0.0745781883597374,
      "learning_rate": 5.199949921917727e-06,
      "loss": 0.0172,
      "step": 1476940
    },
    {
      "epoch": 2.417077433671766,
      "grad_norm": 0.2144404798746109,
      "learning_rate": 5.199884029704211e-06,
      "loss": 0.012,
      "step": 1476960
    },
    {
      "epoch": 2.4171101641104196,
      "grad_norm": 0.17532196640968323,
      "learning_rate": 5.199818137490693e-06,
      "loss": 0.0094,
      "step": 1476980
    },
    {
      "epoch": 2.4171428945490727,
      "grad_norm": 0.33915555477142334,
      "learning_rate": 5.199752245277176e-06,
      "loss": 0.0109,
      "step": 1477000
    },
    {
      "epoch": 2.417175624987726,
      "grad_norm": 0.6453505158424377,
      "learning_rate": 5.199686353063659e-06,
      "loss": 0.0104,
      "step": 1477020
    },
    {
      "epoch": 2.4172083554263795,
      "grad_norm": 0.4939342141151428,
      "learning_rate": 5.199620460850142e-06,
      "loss": 0.0124,
      "step": 1477040
    },
    {
      "epoch": 2.4172410858650326,
      "grad_norm": 0.5993236899375916,
      "learning_rate": 5.1995545686366246e-06,
      "loss": 0.0188,
      "step": 1477060
    },
    {
      "epoch": 2.417273816303686,
      "grad_norm": 0.12437577545642853,
      "learning_rate": 5.199488676423108e-06,
      "loss": 0.0138,
      "step": 1477080
    },
    {
      "epoch": 2.4173065467423394,
      "grad_norm": 0.23241421580314636,
      "learning_rate": 5.19942278420959e-06,
      "loss": 0.0193,
      "step": 1477100
    },
    {
      "epoch": 2.417339277180993,
      "grad_norm": 0.7288007140159607,
      "learning_rate": 5.199356891996074e-06,
      "loss": 0.0151,
      "step": 1477120
    },
    {
      "epoch": 2.417372007619646,
      "grad_norm": 0.2916238605976105,
      "learning_rate": 5.1992909997825555e-06,
      "loss": 0.0101,
      "step": 1477140
    },
    {
      "epoch": 2.4174047380582993,
      "grad_norm": 0.7017291784286499,
      "learning_rate": 5.199225107569039e-06,
      "loss": 0.0128,
      "step": 1477160
    },
    {
      "epoch": 2.417437468496953,
      "grad_norm": 0.525442898273468,
      "learning_rate": 5.199159215355522e-06,
      "loss": 0.0131,
      "step": 1477180
    },
    {
      "epoch": 2.417470198935606,
      "grad_norm": 0.3299711346626282,
      "learning_rate": 5.199093323142005e-06,
      "loss": 0.0131,
      "step": 1477200
    },
    {
      "epoch": 2.4175029293742596,
      "grad_norm": 0.6308110356330872,
      "learning_rate": 5.199027430928487e-06,
      "loss": 0.0163,
      "step": 1477220
    },
    {
      "epoch": 2.4175356598129127,
      "grad_norm": 0.7246555089950562,
      "learning_rate": 5.198961538714971e-06,
      "loss": 0.0142,
      "step": 1477240
    },
    {
      "epoch": 2.4175683902515663,
      "grad_norm": 0.5037839412689209,
      "learning_rate": 5.198895646501453e-06,
      "loss": 0.0175,
      "step": 1477260
    },
    {
      "epoch": 2.4176011206902195,
      "grad_norm": 0.0858311802148819,
      "learning_rate": 5.198829754287936e-06,
      "loss": 0.0264,
      "step": 1477280
    },
    {
      "epoch": 2.4176338511288726,
      "grad_norm": 0.2618843615055084,
      "learning_rate": 5.19876386207442e-06,
      "loss": 0.0156,
      "step": 1477300
    },
    {
      "epoch": 2.4176665815675262,
      "grad_norm": 0.21955236792564392,
      "learning_rate": 5.198697969860902e-06,
      "loss": 0.0119,
      "step": 1477320
    },
    {
      "epoch": 2.4176993120061794,
      "grad_norm": 0.7176229357719421,
      "learning_rate": 5.1986320776473855e-06,
      "loss": 0.0207,
      "step": 1477340
    },
    {
      "epoch": 2.417732042444833,
      "grad_norm": 0.35816490650177,
      "learning_rate": 5.198566185433867e-06,
      "loss": 0.0197,
      "step": 1477360
    },
    {
      "epoch": 2.417764772883486,
      "grad_norm": 0.8472293615341187,
      "learning_rate": 5.198500293220351e-06,
      "loss": 0.0181,
      "step": 1477380
    },
    {
      "epoch": 2.4177975033221397,
      "grad_norm": 0.21376708149909973,
      "learning_rate": 5.198434401006834e-06,
      "loss": 0.0178,
      "step": 1477400
    },
    {
      "epoch": 2.417830233760793,
      "grad_norm": 0.8086092472076416,
      "learning_rate": 5.198368508793316e-06,
      "loss": 0.0154,
      "step": 1477420
    },
    {
      "epoch": 2.417862964199446,
      "grad_norm": 0.35459333658218384,
      "learning_rate": 5.198302616579799e-06,
      "loss": 0.0199,
      "step": 1477440
    },
    {
      "epoch": 2.4178956946380996,
      "grad_norm": 0.4495966136455536,
      "learning_rate": 5.198236724366283e-06,
      "loss": 0.0158,
      "step": 1477460
    },
    {
      "epoch": 2.4179284250767528,
      "grad_norm": 0.5498756170272827,
      "learning_rate": 5.198170832152765e-06,
      "loss": 0.015,
      "step": 1477480
    },
    {
      "epoch": 2.4179611555154064,
      "grad_norm": 0.4705289602279663,
      "learning_rate": 5.198104939939248e-06,
      "loss": 0.0153,
      "step": 1477500
    },
    {
      "epoch": 2.4179938859540595,
      "grad_norm": 0.8341628313064575,
      "learning_rate": 5.19803904772573e-06,
      "loss": 0.0181,
      "step": 1477520
    },
    {
      "epoch": 2.418026616392713,
      "grad_norm": 0.5027479529380798,
      "learning_rate": 5.197973155512214e-06,
      "loss": 0.0149,
      "step": 1477540
    },
    {
      "epoch": 2.4180593468313663,
      "grad_norm": 0.2132428139448166,
      "learning_rate": 5.197907263298696e-06,
      "loss": 0.0083,
      "step": 1477560
    },
    {
      "epoch": 2.4180920772700194,
      "grad_norm": 0.2500614523887634,
      "learning_rate": 5.197841371085179e-06,
      "loss": 0.0176,
      "step": 1477580
    },
    {
      "epoch": 2.418124807708673,
      "grad_norm": 0.25911495089530945,
      "learning_rate": 5.197775478871662e-06,
      "loss": 0.0131,
      "step": 1477600
    },
    {
      "epoch": 2.418157538147326,
      "grad_norm": 0.4165882170200348,
      "learning_rate": 5.197709586658145e-06,
      "loss": 0.0123,
      "step": 1477620
    },
    {
      "epoch": 2.4181902685859797,
      "grad_norm": 0.31108012795448303,
      "learning_rate": 5.197643694444627e-06,
      "loss": 0.0241,
      "step": 1477640
    },
    {
      "epoch": 2.418222999024633,
      "grad_norm": 0.3472301959991455,
      "learning_rate": 5.197577802231111e-06,
      "loss": 0.0137,
      "step": 1477660
    },
    {
      "epoch": 2.4182557294632865,
      "grad_norm": 0.21736764907836914,
      "learning_rate": 5.197511910017594e-06,
      "loss": 0.0176,
      "step": 1477680
    },
    {
      "epoch": 2.4182884599019396,
      "grad_norm": 0.49132901430130005,
      "learning_rate": 5.1974460178040765e-06,
      "loss": 0.0118,
      "step": 1477700
    },
    {
      "epoch": 2.418321190340593,
      "grad_norm": 0.4419514834880829,
      "learning_rate": 5.19738012559056e-06,
      "loss": 0.0132,
      "step": 1477720
    },
    {
      "epoch": 2.4183539207792464,
      "grad_norm": 0.41609084606170654,
      "learning_rate": 5.197314233377042e-06,
      "loss": 0.0136,
      "step": 1477740
    },
    {
      "epoch": 2.4183866512178995,
      "grad_norm": 0.7181397080421448,
      "learning_rate": 5.1972483411635255e-06,
      "loss": 0.0166,
      "step": 1477760
    },
    {
      "epoch": 2.418419381656553,
      "grad_norm": 0.3416839838027954,
      "learning_rate": 5.1971824489500074e-06,
      "loss": 0.0161,
      "step": 1477780
    },
    {
      "epoch": 2.4184521120952063,
      "grad_norm": 0.4086211621761322,
      "learning_rate": 5.197116556736491e-06,
      "loss": 0.0134,
      "step": 1477800
    },
    {
      "epoch": 2.41848484253386,
      "grad_norm": 0.2712077796459198,
      "learning_rate": 5.197050664522974e-06,
      "loss": 0.0099,
      "step": 1477820
    },
    {
      "epoch": 2.418517572972513,
      "grad_norm": 0.10573036223649979,
      "learning_rate": 5.1969847723094565e-06,
      "loss": 0.0102,
      "step": 1477840
    },
    {
      "epoch": 2.418550303411166,
      "grad_norm": 0.2910921573638916,
      "learning_rate": 5.196918880095939e-06,
      "loss": 0.0155,
      "step": 1477860
    },
    {
      "epoch": 2.4185830338498198,
      "grad_norm": 0.2755763530731201,
      "learning_rate": 5.196852987882423e-06,
      "loss": 0.0101,
      "step": 1477880
    },
    {
      "epoch": 2.418615764288473,
      "grad_norm": 0.18002134561538696,
      "learning_rate": 5.196787095668905e-06,
      "loss": 0.0161,
      "step": 1477900
    },
    {
      "epoch": 2.4186484947271265,
      "grad_norm": 0.7217245101928711,
      "learning_rate": 5.196721203455388e-06,
      "loss": 0.0133,
      "step": 1477920
    },
    {
      "epoch": 2.4186812251657797,
      "grad_norm": 0.28441619873046875,
      "learning_rate": 5.19665531124187e-06,
      "loss": 0.0142,
      "step": 1477940
    },
    {
      "epoch": 2.4187139556044333,
      "grad_norm": 0.4506475627422333,
      "learning_rate": 5.196589419028354e-06,
      "loss": 0.0182,
      "step": 1477960
    },
    {
      "epoch": 2.4187466860430864,
      "grad_norm": 0.4445231854915619,
      "learning_rate": 5.1965235268148365e-06,
      "loss": 0.0135,
      "step": 1477980
    },
    {
      "epoch": 2.4187794164817396,
      "grad_norm": 0.9857609868049622,
      "learning_rate": 5.196457634601319e-06,
      "loss": 0.0157,
      "step": 1478000
    },
    {
      "epoch": 2.418812146920393,
      "grad_norm": 0.2045266479253769,
      "learning_rate": 5.196391742387803e-06,
      "loss": 0.0107,
      "step": 1478020
    },
    {
      "epoch": 2.4188448773590463,
      "grad_norm": 0.5502181649208069,
      "learning_rate": 5.196325850174286e-06,
      "loss": 0.0177,
      "step": 1478040
    },
    {
      "epoch": 2.4188776077977,
      "grad_norm": 0.23827216029167175,
      "learning_rate": 5.196259957960768e-06,
      "loss": 0.0176,
      "step": 1478060
    },
    {
      "epoch": 2.418910338236353,
      "grad_norm": 0.3110126554965973,
      "learning_rate": 5.196194065747251e-06,
      "loss": 0.019,
      "step": 1478080
    },
    {
      "epoch": 2.418943068675006,
      "grad_norm": 0.4517151415348053,
      "learning_rate": 5.196128173533735e-06,
      "loss": 0.0098,
      "step": 1478100
    },
    {
      "epoch": 2.41897579911366,
      "grad_norm": 0.6883828043937683,
      "learning_rate": 5.1960622813202165e-06,
      "loss": 0.0173,
      "step": 1478120
    },
    {
      "epoch": 2.419008529552313,
      "grad_norm": 0.867357075214386,
      "learning_rate": 5.1959963891067e-06,
      "loss": 0.0154,
      "step": 1478140
    },
    {
      "epoch": 2.4190412599909665,
      "grad_norm": 0.5083141922950745,
      "learning_rate": 5.195930496893182e-06,
      "loss": 0.0202,
      "step": 1478160
    },
    {
      "epoch": 2.4190739904296197,
      "grad_norm": 0.05843181163072586,
      "learning_rate": 5.195864604679666e-06,
      "loss": 0.0176,
      "step": 1478180
    },
    {
      "epoch": 2.419106720868273,
      "grad_norm": 0.5809978246688843,
      "learning_rate": 5.195798712466148e-06,
      "loss": 0.0239,
      "step": 1478200
    },
    {
      "epoch": 2.4191394513069264,
      "grad_norm": 0.2909216284751892,
      "learning_rate": 5.195732820252631e-06,
      "loss": 0.0109,
      "step": 1478220
    },
    {
      "epoch": 2.4191721817455796,
      "grad_norm": 0.26419857144355774,
      "learning_rate": 5.195666928039114e-06,
      "loss": 0.0188,
      "step": 1478240
    },
    {
      "epoch": 2.419204912184233,
      "grad_norm": 0.307662695646286,
      "learning_rate": 5.195601035825597e-06,
      "loss": 0.014,
      "step": 1478260
    },
    {
      "epoch": 2.4192376426228863,
      "grad_norm": 0.3638931214809418,
      "learning_rate": 5.195535143612079e-06,
      "loss": 0.0123,
      "step": 1478280
    },
    {
      "epoch": 2.41927037306154,
      "grad_norm": 0.14734555780887604,
      "learning_rate": 5.195469251398563e-06,
      "loss": 0.0175,
      "step": 1478300
    },
    {
      "epoch": 2.419303103500193,
      "grad_norm": 0.7254000306129456,
      "learning_rate": 5.195403359185045e-06,
      "loss": 0.0158,
      "step": 1478320
    },
    {
      "epoch": 2.419335833938846,
      "grad_norm": 0.1572379320859909,
      "learning_rate": 5.195337466971528e-06,
      "loss": 0.0141,
      "step": 1478340
    },
    {
      "epoch": 2.4193685643775,
      "grad_norm": 1.6474287509918213,
      "learning_rate": 5.195271574758012e-06,
      "loss": 0.0146,
      "step": 1478360
    },
    {
      "epoch": 2.419401294816153,
      "grad_norm": 0.6191728115081787,
      "learning_rate": 5.195205682544494e-06,
      "loss": 0.0191,
      "step": 1478380
    },
    {
      "epoch": 2.4194340252548066,
      "grad_norm": 0.4292721748352051,
      "learning_rate": 5.1951397903309774e-06,
      "loss": 0.0137,
      "step": 1478400
    },
    {
      "epoch": 2.4194667556934597,
      "grad_norm": 0.4272400736808777,
      "learning_rate": 5.19507389811746e-06,
      "loss": 0.0123,
      "step": 1478420
    },
    {
      "epoch": 2.4194994861321133,
      "grad_norm": 0.34061795473098755,
      "learning_rate": 5.195008005903943e-06,
      "loss": 0.0142,
      "step": 1478440
    },
    {
      "epoch": 2.4195322165707664,
      "grad_norm": 0.4791457951068878,
      "learning_rate": 5.194942113690426e-06,
      "loss": 0.0156,
      "step": 1478460
    },
    {
      "epoch": 2.4195649470094196,
      "grad_norm": 0.6524638533592224,
      "learning_rate": 5.194876221476909e-06,
      "loss": 0.0188,
      "step": 1478480
    },
    {
      "epoch": 2.419597677448073,
      "grad_norm": 0.8465667366981506,
      "learning_rate": 5.194810329263391e-06,
      "loss": 0.0177,
      "step": 1478500
    },
    {
      "epoch": 2.4196304078867263,
      "grad_norm": 0.3537350296974182,
      "learning_rate": 5.194744437049875e-06,
      "loss": 0.0172,
      "step": 1478520
    },
    {
      "epoch": 2.41966313832538,
      "grad_norm": 0.5454771518707275,
      "learning_rate": 5.194678544836357e-06,
      "loss": 0.0171,
      "step": 1478540
    },
    {
      "epoch": 2.419695868764033,
      "grad_norm": 0.15051382780075073,
      "learning_rate": 5.19461265262284e-06,
      "loss": 0.0148,
      "step": 1478560
    },
    {
      "epoch": 2.4197285992026867,
      "grad_norm": 0.4590303301811218,
      "learning_rate": 5.194546760409322e-06,
      "loss": 0.015,
      "step": 1478580
    },
    {
      "epoch": 2.41976132964134,
      "grad_norm": 0.6809319853782654,
      "learning_rate": 5.194480868195806e-06,
      "loss": 0.0158,
      "step": 1478600
    },
    {
      "epoch": 2.419794060079993,
      "grad_norm": 0.23887529969215393,
      "learning_rate": 5.1944149759822884e-06,
      "loss": 0.0131,
      "step": 1478620
    },
    {
      "epoch": 2.4198267905186466,
      "grad_norm": 0.5690855383872986,
      "learning_rate": 5.194349083768771e-06,
      "loss": 0.0101,
      "step": 1478640
    },
    {
      "epoch": 2.4198595209572997,
      "grad_norm": 0.6960745453834534,
      "learning_rate": 5.194283191555254e-06,
      "loss": 0.015,
      "step": 1478660
    },
    {
      "epoch": 2.4198922513959533,
      "grad_norm": 0.32655608654022217,
      "learning_rate": 5.1942172993417375e-06,
      "loss": 0.0124,
      "step": 1478680
    },
    {
      "epoch": 2.4199249818346065,
      "grad_norm": 0.20904433727264404,
      "learning_rate": 5.19415140712822e-06,
      "loss": 0.0194,
      "step": 1478700
    },
    {
      "epoch": 2.41995771227326,
      "grad_norm": 0.27696338295936584,
      "learning_rate": 5.194085514914703e-06,
      "loss": 0.013,
      "step": 1478720
    },
    {
      "epoch": 2.419990442711913,
      "grad_norm": 0.46049225330352783,
      "learning_rate": 5.1940196227011866e-06,
      "loss": 0.0074,
      "step": 1478740
    },
    {
      "epoch": 2.4200231731505664,
      "grad_norm": 0.6073803901672363,
      "learning_rate": 5.1939537304876684e-06,
      "loss": 0.0144,
      "step": 1478760
    },
    {
      "epoch": 2.42005590358922,
      "grad_norm": 1.5579051971435547,
      "learning_rate": 5.193887838274152e-06,
      "loss": 0.0118,
      "step": 1478780
    },
    {
      "epoch": 2.420088634027873,
      "grad_norm": 0.16664628684520721,
      "learning_rate": 5.193821946060634e-06,
      "loss": 0.0118,
      "step": 1478800
    },
    {
      "epoch": 2.4201213644665267,
      "grad_norm": 0.3079072833061218,
      "learning_rate": 5.1937560538471175e-06,
      "loss": 0.0127,
      "step": 1478820
    },
    {
      "epoch": 2.42015409490518,
      "grad_norm": 0.09405255317687988,
      "learning_rate": 5.1936901616336e-06,
      "loss": 0.0139,
      "step": 1478840
    },
    {
      "epoch": 2.4201868253438334,
      "grad_norm": 0.40009966492652893,
      "learning_rate": 5.193624269420083e-06,
      "loss": 0.0098,
      "step": 1478860
    },
    {
      "epoch": 2.4202195557824866,
      "grad_norm": 0.18909908831119537,
      "learning_rate": 5.193558377206566e-06,
      "loss": 0.019,
      "step": 1478880
    },
    {
      "epoch": 2.4202522862211397,
      "grad_norm": 0.27636775374412537,
      "learning_rate": 5.193492484993049e-06,
      "loss": 0.0101,
      "step": 1478900
    },
    {
      "epoch": 2.4202850166597933,
      "grad_norm": 0.36172977089881897,
      "learning_rate": 5.193426592779531e-06,
      "loss": 0.0177,
      "step": 1478920
    },
    {
      "epoch": 2.4203177470984465,
      "grad_norm": 0.22316941618919373,
      "learning_rate": 5.193360700566015e-06,
      "loss": 0.0141,
      "step": 1478940
    },
    {
      "epoch": 2.4203504775371,
      "grad_norm": 0.49044451117515564,
      "learning_rate": 5.193294808352497e-06,
      "loss": 0.017,
      "step": 1478960
    },
    {
      "epoch": 2.4203832079757532,
      "grad_norm": 1.776112675666809,
      "learning_rate": 5.19322891613898e-06,
      "loss": 0.0221,
      "step": 1478980
    },
    {
      "epoch": 2.420415938414407,
      "grad_norm": 0.611813485622406,
      "learning_rate": 5.193163023925463e-06,
      "loss": 0.0133,
      "step": 1479000
    },
    {
      "epoch": 2.42044866885306,
      "grad_norm": 0.48540809750556946,
      "learning_rate": 5.193097131711946e-06,
      "loss": 0.0185,
      "step": 1479020
    },
    {
      "epoch": 2.420481399291713,
      "grad_norm": 0.1654614955186844,
      "learning_rate": 5.1930312394984285e-06,
      "loss": 0.0182,
      "step": 1479040
    },
    {
      "epoch": 2.4205141297303667,
      "grad_norm": 0.14920566976070404,
      "learning_rate": 5.192965347284912e-06,
      "loss": 0.0113,
      "step": 1479060
    },
    {
      "epoch": 2.42054686016902,
      "grad_norm": 0.5783712267875671,
      "learning_rate": 5.192899455071395e-06,
      "loss": 0.0228,
      "step": 1479080
    },
    {
      "epoch": 2.4205795906076735,
      "grad_norm": 0.3136740028858185,
      "learning_rate": 5.1928335628578776e-06,
      "loss": 0.0131,
      "step": 1479100
    },
    {
      "epoch": 2.4206123210463266,
      "grad_norm": 0.4302462041378021,
      "learning_rate": 5.192767670644361e-06,
      "loss": 0.0174,
      "step": 1479120
    },
    {
      "epoch": 2.42064505148498,
      "grad_norm": 0.4293205738067627,
      "learning_rate": 5.192701778430843e-06,
      "loss": 0.0158,
      "step": 1479140
    },
    {
      "epoch": 2.4206777819236334,
      "grad_norm": 0.304138720035553,
      "learning_rate": 5.192635886217327e-06,
      "loss": 0.012,
      "step": 1479160
    },
    {
      "epoch": 2.4207105123622865,
      "grad_norm": 0.27605825662612915,
      "learning_rate": 5.1925699940038085e-06,
      "loss": 0.0127,
      "step": 1479180
    },
    {
      "epoch": 2.42074324280094,
      "grad_norm": 1.0591706037521362,
      "learning_rate": 5.192504101790292e-06,
      "loss": 0.0228,
      "step": 1479200
    },
    {
      "epoch": 2.4207759732395933,
      "grad_norm": 0.2833540141582489,
      "learning_rate": 5.192438209576775e-06,
      "loss": 0.0171,
      "step": 1479220
    },
    {
      "epoch": 2.420808703678247,
      "grad_norm": 0.34439852833747864,
      "learning_rate": 5.192372317363258e-06,
      "loss": 0.0109,
      "step": 1479240
    },
    {
      "epoch": 2.4208414341169,
      "grad_norm": 0.22745345532894135,
      "learning_rate": 5.19230642514974e-06,
      "loss": 0.0153,
      "step": 1479260
    },
    {
      "epoch": 2.4208741645555536,
      "grad_norm": 0.32722246646881104,
      "learning_rate": 5.192240532936224e-06,
      "loss": 0.012,
      "step": 1479280
    },
    {
      "epoch": 2.4209068949942067,
      "grad_norm": 0.44870537519454956,
      "learning_rate": 5.192174640722706e-06,
      "loss": 0.0137,
      "step": 1479300
    },
    {
      "epoch": 2.42093962543286,
      "grad_norm": 0.5185609459877014,
      "learning_rate": 5.192108748509189e-06,
      "loss": 0.0188,
      "step": 1479320
    },
    {
      "epoch": 2.4209723558715135,
      "grad_norm": 0.21985004842281342,
      "learning_rate": 5.192042856295671e-06,
      "loss": 0.0137,
      "step": 1479340
    },
    {
      "epoch": 2.4210050863101666,
      "grad_norm": 0.23391616344451904,
      "learning_rate": 5.191976964082155e-06,
      "loss": 0.0107,
      "step": 1479360
    },
    {
      "epoch": 2.4210378167488202,
      "grad_norm": 0.22761163115501404,
      "learning_rate": 5.191911071868637e-06,
      "loss": 0.0165,
      "step": 1479380
    },
    {
      "epoch": 2.4210705471874734,
      "grad_norm": 0.2514994442462921,
      "learning_rate": 5.19184517965512e-06,
      "loss": 0.0109,
      "step": 1479400
    },
    {
      "epoch": 2.421103277626127,
      "grad_norm": 0.4562017619609833,
      "learning_rate": 5.191779287441604e-06,
      "loss": 0.0169,
      "step": 1479420
    },
    {
      "epoch": 2.42113600806478,
      "grad_norm": 0.22491924464702606,
      "learning_rate": 5.191713395228087e-06,
      "loss": 0.0138,
      "step": 1479440
    },
    {
      "epoch": 2.4211687385034333,
      "grad_norm": 0.17099252343177795,
      "learning_rate": 5.191647503014569e-06,
      "loss": 0.0146,
      "step": 1479460
    },
    {
      "epoch": 2.421201468942087,
      "grad_norm": 0.4059036374092102,
      "learning_rate": 5.191581610801052e-06,
      "loss": 0.0118,
      "step": 1479480
    },
    {
      "epoch": 2.42123419938074,
      "grad_norm": 0.22386012971401215,
      "learning_rate": 5.191515718587536e-06,
      "loss": 0.0178,
      "step": 1479500
    },
    {
      "epoch": 2.4212669298193936,
      "grad_norm": 0.43274644017219543,
      "learning_rate": 5.191449826374018e-06,
      "loss": 0.0121,
      "step": 1479520
    },
    {
      "epoch": 2.4212996602580468,
      "grad_norm": 0.1785501092672348,
      "learning_rate": 5.191383934160501e-06,
      "loss": 0.0111,
      "step": 1479540
    },
    {
      "epoch": 2.4213323906967004,
      "grad_norm": 0.33412012457847595,
      "learning_rate": 5.191318041946983e-06,
      "loss": 0.0127,
      "step": 1479560
    },
    {
      "epoch": 2.4213651211353535,
      "grad_norm": 0.6555425524711609,
      "learning_rate": 5.191252149733467e-06,
      "loss": 0.0125,
      "step": 1479580
    },
    {
      "epoch": 2.4213978515740067,
      "grad_norm": 0.310555100440979,
      "learning_rate": 5.191186257519949e-06,
      "loss": 0.0173,
      "step": 1479600
    },
    {
      "epoch": 2.4214305820126603,
      "grad_norm": 0.14034971594810486,
      "learning_rate": 5.191120365306432e-06,
      "loss": 0.0117,
      "step": 1479620
    },
    {
      "epoch": 2.4214633124513134,
      "grad_norm": 0.3805343806743622,
      "learning_rate": 5.191054473092915e-06,
      "loss": 0.0117,
      "step": 1479640
    },
    {
      "epoch": 2.421496042889967,
      "grad_norm": 0.677615225315094,
      "learning_rate": 5.190988580879398e-06,
      "loss": 0.0202,
      "step": 1479660
    },
    {
      "epoch": 2.42152877332862,
      "grad_norm": 0.3692643642425537,
      "learning_rate": 5.19092268866588e-06,
      "loss": 0.0087,
      "step": 1479680
    },
    {
      "epoch": 2.4215615037672733,
      "grad_norm": 0.39478597044944763,
      "learning_rate": 5.190856796452364e-06,
      "loss": 0.013,
      "step": 1479700
    },
    {
      "epoch": 2.421594234205927,
      "grad_norm": 0.9840078949928284,
      "learning_rate": 5.190790904238846e-06,
      "loss": 0.0134,
      "step": 1479720
    },
    {
      "epoch": 2.42162696464458,
      "grad_norm": 0.7031065225601196,
      "learning_rate": 5.1907250120253295e-06,
      "loss": 0.0154,
      "step": 1479740
    },
    {
      "epoch": 2.4216596950832336,
      "grad_norm": 1.346154808998108,
      "learning_rate": 5.190659119811813e-06,
      "loss": 0.0168,
      "step": 1479760
    },
    {
      "epoch": 2.421692425521887,
      "grad_norm": 0.30573755502700806,
      "learning_rate": 5.190593227598295e-06,
      "loss": 0.013,
      "step": 1479780
    },
    {
      "epoch": 2.42172515596054,
      "grad_norm": 0.25048017501831055,
      "learning_rate": 5.1905273353847785e-06,
      "loss": 0.0106,
      "step": 1479800
    },
    {
      "epoch": 2.4217578863991935,
      "grad_norm": 0.2155134528875351,
      "learning_rate": 5.1904614431712604e-06,
      "loss": 0.0171,
      "step": 1479820
    },
    {
      "epoch": 2.4217906168378467,
      "grad_norm": 0.3865273892879486,
      "learning_rate": 5.190395550957744e-06,
      "loss": 0.016,
      "step": 1479840
    },
    {
      "epoch": 2.4218233472765003,
      "grad_norm": 1.0436543226242065,
      "learning_rate": 5.190329658744227e-06,
      "loss": 0.0185,
      "step": 1479860
    },
    {
      "epoch": 2.4218560777151534,
      "grad_norm": 0.7712225317955017,
      "learning_rate": 5.1902637665307095e-06,
      "loss": 0.0152,
      "step": 1479880
    },
    {
      "epoch": 2.421888808153807,
      "grad_norm": 0.2313031256198883,
      "learning_rate": 5.190197874317192e-06,
      "loss": 0.0141,
      "step": 1479900
    },
    {
      "epoch": 2.42192153859246,
      "grad_norm": 0.8030645251274109,
      "learning_rate": 5.190131982103676e-06,
      "loss": 0.0221,
      "step": 1479920
    },
    {
      "epoch": 2.4219542690311133,
      "grad_norm": 0.8080710768699646,
      "learning_rate": 5.190066089890158e-06,
      "loss": 0.0151,
      "step": 1479940
    },
    {
      "epoch": 2.421986999469767,
      "grad_norm": 0.9100085496902466,
      "learning_rate": 5.190000197676641e-06,
      "loss": 0.0157,
      "step": 1479960
    },
    {
      "epoch": 2.42201972990842,
      "grad_norm": 1.2621958255767822,
      "learning_rate": 5.189934305463123e-06,
      "loss": 0.0118,
      "step": 1479980
    },
    {
      "epoch": 2.4220524603470737,
      "grad_norm": 0.38928642868995667,
      "learning_rate": 5.189868413249607e-06,
      "loss": 0.0114,
      "step": 1480000
    },
    {
      "epoch": 2.422085190785727,
      "grad_norm": 1.0319501161575317,
      "learning_rate": 5.1898025210360895e-06,
      "loss": 0.022,
      "step": 1480020
    },
    {
      "epoch": 2.4221179212243804,
      "grad_norm": 0.316981703042984,
      "learning_rate": 5.189736628822572e-06,
      "loss": 0.0146,
      "step": 1480040
    },
    {
      "epoch": 2.4221506516630336,
      "grad_norm": 0.24322141706943512,
      "learning_rate": 5.189670736609055e-06,
      "loss": 0.018,
      "step": 1480060
    },
    {
      "epoch": 2.4221833821016867,
      "grad_norm": 0.23084266483783722,
      "learning_rate": 5.189604844395539e-06,
      "loss": 0.0136,
      "step": 1480080
    },
    {
      "epoch": 2.4222161125403403,
      "grad_norm": 1.2367771863937378,
      "learning_rate": 5.1895389521820205e-06,
      "loss": 0.0139,
      "step": 1480100
    },
    {
      "epoch": 2.4222488429789935,
      "grad_norm": 0.33057284355163574,
      "learning_rate": 5.189473059968504e-06,
      "loss": 0.0145,
      "step": 1480120
    },
    {
      "epoch": 2.422281573417647,
      "grad_norm": 0.6012680530548096,
      "learning_rate": 5.189407167754988e-06,
      "loss": 0.0168,
      "step": 1480140
    },
    {
      "epoch": 2.4223143038563,
      "grad_norm": 0.16631357371807098,
      "learning_rate": 5.1893412755414695e-06,
      "loss": 0.0125,
      "step": 1480160
    },
    {
      "epoch": 2.422347034294954,
      "grad_norm": 0.8601855039596558,
      "learning_rate": 5.189275383327953e-06,
      "loss": 0.0177,
      "step": 1480180
    },
    {
      "epoch": 2.422379764733607,
      "grad_norm": 0.45118647813796997,
      "learning_rate": 5.189209491114435e-06,
      "loss": 0.0187,
      "step": 1480200
    },
    {
      "epoch": 2.42241249517226,
      "grad_norm": 0.1194772943854332,
      "learning_rate": 5.189143598900919e-06,
      "loss": 0.0135,
      "step": 1480220
    },
    {
      "epoch": 2.4224452256109137,
      "grad_norm": 0.26822078227996826,
      "learning_rate": 5.189077706687401e-06,
      "loss": 0.0149,
      "step": 1480240
    },
    {
      "epoch": 2.422477956049567,
      "grad_norm": 0.504252016544342,
      "learning_rate": 5.189011814473884e-06,
      "loss": 0.0122,
      "step": 1480260
    },
    {
      "epoch": 2.4225106864882204,
      "grad_norm": 0.18715795874595642,
      "learning_rate": 5.188945922260367e-06,
      "loss": 0.0119,
      "step": 1480280
    },
    {
      "epoch": 2.4225434169268736,
      "grad_norm": 0.4379822313785553,
      "learning_rate": 5.18888003004685e-06,
      "loss": 0.0117,
      "step": 1480300
    },
    {
      "epoch": 2.422576147365527,
      "grad_norm": 0.214691624045372,
      "learning_rate": 5.188814137833332e-06,
      "loss": 0.0123,
      "step": 1480320
    },
    {
      "epoch": 2.4226088778041803,
      "grad_norm": 0.2810475826263428,
      "learning_rate": 5.188748245619816e-06,
      "loss": 0.0184,
      "step": 1480340
    },
    {
      "epoch": 2.4226416082428335,
      "grad_norm": 0.17998838424682617,
      "learning_rate": 5.188682353406298e-06,
      "loss": 0.0129,
      "step": 1480360
    },
    {
      "epoch": 2.422674338681487,
      "grad_norm": 0.4625977873802185,
      "learning_rate": 5.188616461192781e-06,
      "loss": 0.011,
      "step": 1480380
    },
    {
      "epoch": 2.42270706912014,
      "grad_norm": 0.4639171361923218,
      "learning_rate": 5.188550568979263e-06,
      "loss": 0.0131,
      "step": 1480400
    },
    {
      "epoch": 2.422739799558794,
      "grad_norm": 0.48736247420310974,
      "learning_rate": 5.188484676765747e-06,
      "loss": 0.0148,
      "step": 1480420
    },
    {
      "epoch": 2.422772529997447,
      "grad_norm": 0.7606549263000488,
      "learning_rate": 5.18841878455223e-06,
      "loss": 0.0121,
      "step": 1480440
    },
    {
      "epoch": 2.4228052604361006,
      "grad_norm": 0.37575218081474304,
      "learning_rate": 5.188352892338712e-06,
      "loss": 0.0119,
      "step": 1480460
    },
    {
      "epoch": 2.4228379908747537,
      "grad_norm": 0.07045401632785797,
      "learning_rate": 5.188287000125196e-06,
      "loss": 0.0164,
      "step": 1480480
    },
    {
      "epoch": 2.422870721313407,
      "grad_norm": 0.11729113757610321,
      "learning_rate": 5.188221107911679e-06,
      "loss": 0.0143,
      "step": 1480500
    },
    {
      "epoch": 2.4229034517520605,
      "grad_norm": 0.44453132152557373,
      "learning_rate": 5.188155215698162e-06,
      "loss": 0.0145,
      "step": 1480520
    },
    {
      "epoch": 2.4229361821907136,
      "grad_norm": 0.1437588632106781,
      "learning_rate": 5.188089323484644e-06,
      "loss": 0.0121,
      "step": 1480540
    },
    {
      "epoch": 2.422968912629367,
      "grad_norm": 0.2807861566543579,
      "learning_rate": 5.188023431271128e-06,
      "loss": 0.0201,
      "step": 1480560
    },
    {
      "epoch": 2.4230016430680203,
      "grad_norm": 0.466030091047287,
      "learning_rate": 5.18795753905761e-06,
      "loss": 0.0185,
      "step": 1480580
    },
    {
      "epoch": 2.423034373506674,
      "grad_norm": 0.33799684047698975,
      "learning_rate": 5.187891646844093e-06,
      "loss": 0.0124,
      "step": 1480600
    },
    {
      "epoch": 2.423067103945327,
      "grad_norm": 0.3011914789676666,
      "learning_rate": 5.187825754630575e-06,
      "loss": 0.0168,
      "step": 1480620
    },
    {
      "epoch": 2.4230998343839802,
      "grad_norm": 0.30369991064071655,
      "learning_rate": 5.187759862417059e-06,
      "loss": 0.0126,
      "step": 1480640
    },
    {
      "epoch": 2.423132564822634,
      "grad_norm": 0.20139364898204803,
      "learning_rate": 5.1876939702035414e-06,
      "loss": 0.0124,
      "step": 1480660
    },
    {
      "epoch": 2.423165295261287,
      "grad_norm": 0.22082068026065826,
      "learning_rate": 5.187628077990024e-06,
      "loss": 0.0083,
      "step": 1480680
    },
    {
      "epoch": 2.4231980256999406,
      "grad_norm": 0.10815165191888809,
      "learning_rate": 5.187562185776507e-06,
      "loss": 0.0141,
      "step": 1480700
    },
    {
      "epoch": 2.4232307561385937,
      "grad_norm": 0.589501142501831,
      "learning_rate": 5.1874962935629905e-06,
      "loss": 0.025,
      "step": 1480720
    },
    {
      "epoch": 2.4232634865772473,
      "grad_norm": 0.2976255416870117,
      "learning_rate": 5.187430401349472e-06,
      "loss": 0.0213,
      "step": 1480740
    },
    {
      "epoch": 2.4232962170159005,
      "grad_norm": 0.5984249114990234,
      "learning_rate": 5.187364509135956e-06,
      "loss": 0.02,
      "step": 1480760
    },
    {
      "epoch": 2.4233289474545536,
      "grad_norm": 0.6016101241111755,
      "learning_rate": 5.187298616922438e-06,
      "loss": 0.0136,
      "step": 1480780
    },
    {
      "epoch": 2.4233616778932072,
      "grad_norm": 0.27066805958747864,
      "learning_rate": 5.1872327247089215e-06,
      "loss": 0.0142,
      "step": 1480800
    },
    {
      "epoch": 2.4233944083318604,
      "grad_norm": 0.4687786400318146,
      "learning_rate": 5.187166832495405e-06,
      "loss": 0.0206,
      "step": 1480820
    },
    {
      "epoch": 2.423427138770514,
      "grad_norm": 0.15637768805027008,
      "learning_rate": 5.187100940281887e-06,
      "loss": 0.0188,
      "step": 1480840
    },
    {
      "epoch": 2.423459869209167,
      "grad_norm": 0.18750913441181183,
      "learning_rate": 5.1870350480683705e-06,
      "loss": 0.014,
      "step": 1480860
    },
    {
      "epoch": 2.4234925996478207,
      "grad_norm": 0.4109603762626648,
      "learning_rate": 5.186969155854853e-06,
      "loss": 0.0102,
      "step": 1480880
    },
    {
      "epoch": 2.423525330086474,
      "grad_norm": 0.8429821729660034,
      "learning_rate": 5.186903263641336e-06,
      "loss": 0.0174,
      "step": 1480900
    },
    {
      "epoch": 2.423558060525127,
      "grad_norm": 0.19490331411361694,
      "learning_rate": 5.186837371427819e-06,
      "loss": 0.012,
      "step": 1480920
    },
    {
      "epoch": 2.4235907909637806,
      "grad_norm": 0.40038904547691345,
      "learning_rate": 5.186771479214302e-06,
      "loss": 0.0124,
      "step": 1480940
    },
    {
      "epoch": 2.4236235214024338,
      "grad_norm": 0.5028707385063171,
      "learning_rate": 5.186705587000784e-06,
      "loss": 0.011,
      "step": 1480960
    },
    {
      "epoch": 2.4236562518410874,
      "grad_norm": 0.18037687242031097,
      "learning_rate": 5.186639694787268e-06,
      "loss": 0.0108,
      "step": 1480980
    },
    {
      "epoch": 2.4236889822797405,
      "grad_norm": 2.036747932434082,
      "learning_rate": 5.18657380257375e-06,
      "loss": 0.0181,
      "step": 1481000
    },
    {
      "epoch": 2.423721712718394,
      "grad_norm": 0.2625444531440735,
      "learning_rate": 5.186507910360233e-06,
      "loss": 0.018,
      "step": 1481020
    },
    {
      "epoch": 2.4237544431570472,
      "grad_norm": 0.28610026836395264,
      "learning_rate": 5.186442018146716e-06,
      "loss": 0.0187,
      "step": 1481040
    },
    {
      "epoch": 2.4237871735957004,
      "grad_norm": 0.3420794606208801,
      "learning_rate": 5.186376125933199e-06,
      "loss": 0.0112,
      "step": 1481060
    },
    {
      "epoch": 2.423819904034354,
      "grad_norm": 0.848753035068512,
      "learning_rate": 5.1863102337196815e-06,
      "loss": 0.0179,
      "step": 1481080
    },
    {
      "epoch": 2.423852634473007,
      "grad_norm": 0.3818809390068054,
      "learning_rate": 5.186244341506165e-06,
      "loss": 0.0143,
      "step": 1481100
    },
    {
      "epoch": 2.4238853649116607,
      "grad_norm": 0.7970784306526184,
      "learning_rate": 5.186178449292647e-06,
      "loss": 0.014,
      "step": 1481120
    },
    {
      "epoch": 2.423918095350314,
      "grad_norm": 0.22689786553382874,
      "learning_rate": 5.1861125570791306e-06,
      "loss": 0.0131,
      "step": 1481140
    },
    {
      "epoch": 2.423950825788967,
      "grad_norm": 0.0907287672162056,
      "learning_rate": 5.1860466648656125e-06,
      "loss": 0.0158,
      "step": 1481160
    },
    {
      "epoch": 2.4239835562276206,
      "grad_norm": 0.9448187351226807,
      "learning_rate": 5.185980772652096e-06,
      "loss": 0.0153,
      "step": 1481180
    },
    {
      "epoch": 2.424016286666274,
      "grad_norm": 0.4137575030326843,
      "learning_rate": 5.18591488043858e-06,
      "loss": 0.0142,
      "step": 1481200
    },
    {
      "epoch": 2.4240490171049274,
      "grad_norm": 1.0883848667144775,
      "learning_rate": 5.1858489882250615e-06,
      "loss": 0.0172,
      "step": 1481220
    },
    {
      "epoch": 2.4240817475435805,
      "grad_norm": 0.28362905979156494,
      "learning_rate": 5.185783096011545e-06,
      "loss": 0.0104,
      "step": 1481240
    },
    {
      "epoch": 2.4241144779822337,
      "grad_norm": 0.39837145805358887,
      "learning_rate": 5.185717203798028e-06,
      "loss": 0.0167,
      "step": 1481260
    },
    {
      "epoch": 2.4241472084208873,
      "grad_norm": 0.24229563772678375,
      "learning_rate": 5.185651311584511e-06,
      "loss": 0.0186,
      "step": 1481280
    },
    {
      "epoch": 2.4241799388595404,
      "grad_norm": 0.6672506928443909,
      "learning_rate": 5.185585419370993e-06,
      "loss": 0.0176,
      "step": 1481300
    },
    {
      "epoch": 2.424212669298194,
      "grad_norm": 0.6713959574699402,
      "learning_rate": 5.185519527157477e-06,
      "loss": 0.0113,
      "step": 1481320
    },
    {
      "epoch": 2.424245399736847,
      "grad_norm": 0.8337563276290894,
      "learning_rate": 5.185453634943959e-06,
      "loss": 0.0187,
      "step": 1481340
    },
    {
      "epoch": 2.4242781301755008,
      "grad_norm": 0.09122323244810104,
      "learning_rate": 5.185387742730442e-06,
      "loss": 0.0127,
      "step": 1481360
    },
    {
      "epoch": 2.424310860614154,
      "grad_norm": 0.24707099795341492,
      "learning_rate": 5.185321850516924e-06,
      "loss": 0.0127,
      "step": 1481380
    },
    {
      "epoch": 2.424343591052807,
      "grad_norm": 0.31732919812202454,
      "learning_rate": 5.185255958303408e-06,
      "loss": 0.0166,
      "step": 1481400
    },
    {
      "epoch": 2.4243763214914607,
      "grad_norm": 1.1186165809631348,
      "learning_rate": 5.18519006608989e-06,
      "loss": 0.0167,
      "step": 1481420
    },
    {
      "epoch": 2.424409051930114,
      "grad_norm": 0.7267242074012756,
      "learning_rate": 5.185124173876373e-06,
      "loss": 0.0099,
      "step": 1481440
    },
    {
      "epoch": 2.4244417823687674,
      "grad_norm": 0.48704028129577637,
      "learning_rate": 5.185058281662856e-06,
      "loss": 0.0114,
      "step": 1481460
    },
    {
      "epoch": 2.4244745128074205,
      "grad_norm": 0.2776196002960205,
      "learning_rate": 5.184992389449339e-06,
      "loss": 0.0166,
      "step": 1481480
    },
    {
      "epoch": 2.424507243246074,
      "grad_norm": 0.3364289402961731,
      "learning_rate": 5.184926497235822e-06,
      "loss": 0.0153,
      "step": 1481500
    },
    {
      "epoch": 2.4245399736847273,
      "grad_norm": 0.5021405816078186,
      "learning_rate": 5.184860605022305e-06,
      "loss": 0.0148,
      "step": 1481520
    },
    {
      "epoch": 2.4245727041233804,
      "grad_norm": 0.4855915606021881,
      "learning_rate": 5.184794712808788e-06,
      "loss": 0.0125,
      "step": 1481540
    },
    {
      "epoch": 2.424605434562034,
      "grad_norm": 0.8173630237579346,
      "learning_rate": 5.184728820595271e-06,
      "loss": 0.0157,
      "step": 1481560
    },
    {
      "epoch": 2.424638165000687,
      "grad_norm": 0.4452863335609436,
      "learning_rate": 5.184662928381754e-06,
      "loss": 0.0117,
      "step": 1481580
    },
    {
      "epoch": 2.424670895439341,
      "grad_norm": 0.3261064887046814,
      "learning_rate": 5.184597036168236e-06,
      "loss": 0.014,
      "step": 1481600
    },
    {
      "epoch": 2.424703625877994,
      "grad_norm": 0.8547372221946716,
      "learning_rate": 5.18453114395472e-06,
      "loss": 0.0213,
      "step": 1481620
    },
    {
      "epoch": 2.4247363563166475,
      "grad_norm": 0.3733523488044739,
      "learning_rate": 5.184465251741202e-06,
      "loss": 0.0128,
      "step": 1481640
    },
    {
      "epoch": 2.4247690867553007,
      "grad_norm": 0.09320331364870071,
      "learning_rate": 5.184399359527685e-06,
      "loss": 0.0124,
      "step": 1481660
    },
    {
      "epoch": 2.424801817193954,
      "grad_norm": 0.100357785820961,
      "learning_rate": 5.184333467314168e-06,
      "loss": 0.0124,
      "step": 1481680
    },
    {
      "epoch": 2.4248345476326074,
      "grad_norm": 0.18205571174621582,
      "learning_rate": 5.184267575100651e-06,
      "loss": 0.0109,
      "step": 1481700
    },
    {
      "epoch": 2.4248672780712606,
      "grad_norm": 0.2873190939426422,
      "learning_rate": 5.184201682887133e-06,
      "loss": 0.0143,
      "step": 1481720
    },
    {
      "epoch": 2.424900008509914,
      "grad_norm": 0.9503870606422424,
      "learning_rate": 5.184135790673617e-06,
      "loss": 0.0201,
      "step": 1481740
    },
    {
      "epoch": 2.4249327389485673,
      "grad_norm": 0.15391524136066437,
      "learning_rate": 5.184069898460099e-06,
      "loss": 0.0133,
      "step": 1481760
    },
    {
      "epoch": 2.424965469387221,
      "grad_norm": 0.5252190232276917,
      "learning_rate": 5.1840040062465825e-06,
      "loss": 0.0161,
      "step": 1481780
    },
    {
      "epoch": 2.424998199825874,
      "grad_norm": 0.62430340051651,
      "learning_rate": 5.183938114033064e-06,
      "loss": 0.0134,
      "step": 1481800
    },
    {
      "epoch": 2.425030930264527,
      "grad_norm": 0.29715850949287415,
      "learning_rate": 5.183872221819548e-06,
      "loss": 0.0106,
      "step": 1481820
    },
    {
      "epoch": 2.425063660703181,
      "grad_norm": 0.48006540536880493,
      "learning_rate": 5.183806329606031e-06,
      "loss": 0.0208,
      "step": 1481840
    },
    {
      "epoch": 2.425096391141834,
      "grad_norm": 1.3184059858322144,
      "learning_rate": 5.1837404373925134e-06,
      "loss": 0.0146,
      "step": 1481860
    },
    {
      "epoch": 2.4251291215804875,
      "grad_norm": 0.3661656379699707,
      "learning_rate": 5.183674545178997e-06,
      "loss": 0.0125,
      "step": 1481880
    },
    {
      "epoch": 2.4251618520191407,
      "grad_norm": 0.65152508020401,
      "learning_rate": 5.18360865296548e-06,
      "loss": 0.0237,
      "step": 1481900
    },
    {
      "epoch": 2.4251945824577943,
      "grad_norm": 0.5820319652557373,
      "learning_rate": 5.1835427607519625e-06,
      "loss": 0.0242,
      "step": 1481920
    },
    {
      "epoch": 2.4252273128964474,
      "grad_norm": 0.36470067501068115,
      "learning_rate": 5.183476868538445e-06,
      "loss": 0.0152,
      "step": 1481940
    },
    {
      "epoch": 2.4252600433351006,
      "grad_norm": 0.6392220258712769,
      "learning_rate": 5.183410976324929e-06,
      "loss": 0.0167,
      "step": 1481960
    },
    {
      "epoch": 2.425292773773754,
      "grad_norm": 0.17988623678684235,
      "learning_rate": 5.183345084111411e-06,
      "loss": 0.0157,
      "step": 1481980
    },
    {
      "epoch": 2.4253255042124073,
      "grad_norm": 0.10468508303165436,
      "learning_rate": 5.183279191897894e-06,
      "loss": 0.014,
      "step": 1482000
    },
    {
      "epoch": 2.425358234651061,
      "grad_norm": 0.06118186563253403,
      "learning_rate": 5.183213299684376e-06,
      "loss": 0.0154,
      "step": 1482020
    },
    {
      "epoch": 2.425390965089714,
      "grad_norm": 0.15402354300022125,
      "learning_rate": 5.18314740747086e-06,
      "loss": 0.0166,
      "step": 1482040
    },
    {
      "epoch": 2.4254236955283677,
      "grad_norm": 0.5093660950660706,
      "learning_rate": 5.1830815152573425e-06,
      "loss": 0.0139,
      "step": 1482060
    },
    {
      "epoch": 2.425456425967021,
      "grad_norm": 0.4135015904903412,
      "learning_rate": 5.183015623043825e-06,
      "loss": 0.0182,
      "step": 1482080
    },
    {
      "epoch": 2.425489156405674,
      "grad_norm": 0.5473201274871826,
      "learning_rate": 5.182949730830308e-06,
      "loss": 0.009,
      "step": 1482100
    },
    {
      "epoch": 2.4255218868443276,
      "grad_norm": 0.41085895895957947,
      "learning_rate": 5.182883838616792e-06,
      "loss": 0.0119,
      "step": 1482120
    },
    {
      "epoch": 2.4255546172829807,
      "grad_norm": 4.223579406738281,
      "learning_rate": 5.1828179464032735e-06,
      "loss": 0.0122,
      "step": 1482140
    },
    {
      "epoch": 2.4255873477216343,
      "grad_norm": 0.1824779361486435,
      "learning_rate": 5.182752054189757e-06,
      "loss": 0.0177,
      "step": 1482160
    },
    {
      "epoch": 2.4256200781602875,
      "grad_norm": 1.4189727306365967,
      "learning_rate": 5.182686161976239e-06,
      "loss": 0.0151,
      "step": 1482180
    },
    {
      "epoch": 2.425652808598941,
      "grad_norm": 0.8525193929672241,
      "learning_rate": 5.1826202697627226e-06,
      "loss": 0.0207,
      "step": 1482200
    },
    {
      "epoch": 2.425685539037594,
      "grad_norm": 0.3291565477848053,
      "learning_rate": 5.182554377549206e-06,
      "loss": 0.0142,
      "step": 1482220
    },
    {
      "epoch": 2.4257182694762474,
      "grad_norm": 0.3119407594203949,
      "learning_rate": 5.182488485335688e-06,
      "loss": 0.0095,
      "step": 1482240
    },
    {
      "epoch": 2.425750999914901,
      "grad_norm": 0.6600825190544128,
      "learning_rate": 5.182422593122172e-06,
      "loss": 0.019,
      "step": 1482260
    },
    {
      "epoch": 2.425783730353554,
      "grad_norm": 0.8636979460716248,
      "learning_rate": 5.182356700908654e-06,
      "loss": 0.0276,
      "step": 1482280
    },
    {
      "epoch": 2.4258164607922077,
      "grad_norm": 0.3356790542602539,
      "learning_rate": 5.182290808695137e-06,
      "loss": 0.0116,
      "step": 1482300
    },
    {
      "epoch": 2.425849191230861,
      "grad_norm": 0.13240565359592438,
      "learning_rate": 5.18222491648162e-06,
      "loss": 0.014,
      "step": 1482320
    },
    {
      "epoch": 2.4258819216695144,
      "grad_norm": 0.10886666923761368,
      "learning_rate": 5.1821590242681034e-06,
      "loss": 0.0159,
      "step": 1482340
    },
    {
      "epoch": 2.4259146521081676,
      "grad_norm": 0.4522077143192291,
      "learning_rate": 5.182093132054585e-06,
      "loss": 0.0103,
      "step": 1482360
    },
    {
      "epoch": 2.4259473825468207,
      "grad_norm": 0.5087267160415649,
      "learning_rate": 5.182027239841069e-06,
      "loss": 0.0234,
      "step": 1482380
    },
    {
      "epoch": 2.4259801129854743,
      "grad_norm": 0.10690128058195114,
      "learning_rate": 5.181961347627551e-06,
      "loss": 0.012,
      "step": 1482400
    },
    {
      "epoch": 2.4260128434241275,
      "grad_norm": 0.4147697687149048,
      "learning_rate": 5.181895455414034e-06,
      "loss": 0.0144,
      "step": 1482420
    },
    {
      "epoch": 2.426045573862781,
      "grad_norm": 0.6362150311470032,
      "learning_rate": 5.181829563200516e-06,
      "loss": 0.0163,
      "step": 1482440
    },
    {
      "epoch": 2.4260783043014342,
      "grad_norm": 0.2487388253211975,
      "learning_rate": 5.181763670987e-06,
      "loss": 0.0157,
      "step": 1482460
    },
    {
      "epoch": 2.426111034740088,
      "grad_norm": 1.711199164390564,
      "learning_rate": 5.181697778773483e-06,
      "loss": 0.0139,
      "step": 1482480
    },
    {
      "epoch": 2.426143765178741,
      "grad_norm": 0.45899099111557007,
      "learning_rate": 5.181631886559965e-06,
      "loss": 0.0124,
      "step": 1482500
    },
    {
      "epoch": 2.426176495617394,
      "grad_norm": 0.380815327167511,
      "learning_rate": 5.181565994346448e-06,
      "loss": 0.0144,
      "step": 1482520
    },
    {
      "epoch": 2.4262092260560477,
      "grad_norm": 0.09916778653860092,
      "learning_rate": 5.181500102132932e-06,
      "loss": 0.0203,
      "step": 1482540
    },
    {
      "epoch": 2.426241956494701,
      "grad_norm": 0.47355130314826965,
      "learning_rate": 5.1814342099194136e-06,
      "loss": 0.0175,
      "step": 1482560
    },
    {
      "epoch": 2.4262746869333545,
      "grad_norm": 0.5999357104301453,
      "learning_rate": 5.181368317705897e-06,
      "loss": 0.0219,
      "step": 1482580
    },
    {
      "epoch": 2.4263074173720076,
      "grad_norm": 0.42467859387397766,
      "learning_rate": 5.181302425492381e-06,
      "loss": 0.0153,
      "step": 1482600
    },
    {
      "epoch": 2.426340147810661,
      "grad_norm": 0.15910841524600983,
      "learning_rate": 5.181236533278863e-06,
      "loss": 0.0143,
      "step": 1482620
    },
    {
      "epoch": 2.4263728782493144,
      "grad_norm": 0.8426311016082764,
      "learning_rate": 5.181170641065346e-06,
      "loss": 0.0138,
      "step": 1482640
    },
    {
      "epoch": 2.4264056086879675,
      "grad_norm": 0.9849957823753357,
      "learning_rate": 5.181104748851828e-06,
      "loss": 0.0155,
      "step": 1482660
    },
    {
      "epoch": 2.426438339126621,
      "grad_norm": 0.2365015149116516,
      "learning_rate": 5.181038856638312e-06,
      "loss": 0.0106,
      "step": 1482680
    },
    {
      "epoch": 2.4264710695652743,
      "grad_norm": 0.10161758959293365,
      "learning_rate": 5.1809729644247944e-06,
      "loss": 0.0128,
      "step": 1482700
    },
    {
      "epoch": 2.426503800003928,
      "grad_norm": 0.32187706232070923,
      "learning_rate": 5.180907072211277e-06,
      "loss": 0.02,
      "step": 1482720
    },
    {
      "epoch": 2.426536530442581,
      "grad_norm": 0.3563140630722046,
      "learning_rate": 5.18084117999776e-06,
      "loss": 0.0141,
      "step": 1482740
    },
    {
      "epoch": 2.426569260881234,
      "grad_norm": 0.19404356181621552,
      "learning_rate": 5.1807752877842435e-06,
      "loss": 0.0105,
      "step": 1482760
    },
    {
      "epoch": 2.4266019913198877,
      "grad_norm": 0.3134235739707947,
      "learning_rate": 5.180709395570725e-06,
      "loss": 0.0135,
      "step": 1482780
    },
    {
      "epoch": 2.426634721758541,
      "grad_norm": 0.3035464286804199,
      "learning_rate": 5.180643503357209e-06,
      "loss": 0.0085,
      "step": 1482800
    },
    {
      "epoch": 2.4266674521971945,
      "grad_norm": 0.4293254613876343,
      "learning_rate": 5.180577611143691e-06,
      "loss": 0.0191,
      "step": 1482820
    },
    {
      "epoch": 2.4267001826358476,
      "grad_norm": 0.14739055931568146,
      "learning_rate": 5.1805117189301745e-06,
      "loss": 0.0179,
      "step": 1482840
    },
    {
      "epoch": 2.426732913074501,
      "grad_norm": 0.24055221676826477,
      "learning_rate": 5.180445826716657e-06,
      "loss": 0.0088,
      "step": 1482860
    },
    {
      "epoch": 2.4267656435131544,
      "grad_norm": 0.21124033629894257,
      "learning_rate": 5.18037993450314e-06,
      "loss": 0.0148,
      "step": 1482880
    },
    {
      "epoch": 2.4267983739518075,
      "grad_norm": 0.8811336159706116,
      "learning_rate": 5.180314042289623e-06,
      "loss": 0.0165,
      "step": 1482900
    },
    {
      "epoch": 2.426831104390461,
      "grad_norm": 0.7496962547302246,
      "learning_rate": 5.180248150076106e-06,
      "loss": 0.0211,
      "step": 1482920
    },
    {
      "epoch": 2.4268638348291143,
      "grad_norm": 1.3332176208496094,
      "learning_rate": 5.180182257862589e-06,
      "loss": 0.013,
      "step": 1482940
    },
    {
      "epoch": 2.426896565267768,
      "grad_norm": 0.5105596780776978,
      "learning_rate": 5.180116365649072e-06,
      "loss": 0.0154,
      "step": 1482960
    },
    {
      "epoch": 2.426929295706421,
      "grad_norm": 0.2570539116859436,
      "learning_rate": 5.180050473435555e-06,
      "loss": 0.0155,
      "step": 1482980
    },
    {
      "epoch": 2.426962026145074,
      "grad_norm": 0.23513856530189514,
      "learning_rate": 5.179984581222037e-06,
      "loss": 0.0137,
      "step": 1483000
    },
    {
      "epoch": 2.4269947565837278,
      "grad_norm": 0.443403035402298,
      "learning_rate": 5.179918689008521e-06,
      "loss": 0.0137,
      "step": 1483020
    },
    {
      "epoch": 2.427027487022381,
      "grad_norm": 0.537789523601532,
      "learning_rate": 5.179852796795003e-06,
      "loss": 0.0104,
      "step": 1483040
    },
    {
      "epoch": 2.4270602174610345,
      "grad_norm": 0.5196250081062317,
      "learning_rate": 5.179786904581486e-06,
      "loss": 0.0133,
      "step": 1483060
    },
    {
      "epoch": 2.4270929478996877,
      "grad_norm": 0.2970314621925354,
      "learning_rate": 5.179721012367969e-06,
      "loss": 0.0162,
      "step": 1483080
    },
    {
      "epoch": 2.4271256783383413,
      "grad_norm": 0.7268903851509094,
      "learning_rate": 5.179655120154452e-06,
      "loss": 0.0153,
      "step": 1483100
    },
    {
      "epoch": 2.4271584087769944,
      "grad_norm": 0.4290286898612976,
      "learning_rate": 5.1795892279409345e-06,
      "loss": 0.018,
      "step": 1483120
    },
    {
      "epoch": 2.4271911392156476,
      "grad_norm": 0.42016535997390747,
      "learning_rate": 5.179523335727418e-06,
      "loss": 0.0152,
      "step": 1483140
    },
    {
      "epoch": 2.427223869654301,
      "grad_norm": 0.27661529183387756,
      "learning_rate": 5.1794574435139e-06,
      "loss": 0.0156,
      "step": 1483160
    },
    {
      "epoch": 2.4272566000929543,
      "grad_norm": 0.3476889431476593,
      "learning_rate": 5.179391551300384e-06,
      "loss": 0.0154,
      "step": 1483180
    },
    {
      "epoch": 2.427289330531608,
      "grad_norm": 0.4445400536060333,
      "learning_rate": 5.1793256590868655e-06,
      "loss": 0.0146,
      "step": 1483200
    },
    {
      "epoch": 2.427322060970261,
      "grad_norm": 0.09520938992500305,
      "learning_rate": 5.179259766873349e-06,
      "loss": 0.0106,
      "step": 1483220
    },
    {
      "epoch": 2.4273547914089146,
      "grad_norm": 0.8429808020591736,
      "learning_rate": 5.179193874659831e-06,
      "loss": 0.02,
      "step": 1483240
    },
    {
      "epoch": 2.427387521847568,
      "grad_norm": 0.39416858553886414,
      "learning_rate": 5.1791279824463145e-06,
      "loss": 0.01,
      "step": 1483260
    },
    {
      "epoch": 2.427420252286221,
      "grad_norm": 0.32432159781455994,
      "learning_rate": 5.179062090232798e-06,
      "loss": 0.0157,
      "step": 1483280
    },
    {
      "epoch": 2.4274529827248745,
      "grad_norm": 1.0538980960845947,
      "learning_rate": 5.17899619801928e-06,
      "loss": 0.0177,
      "step": 1483300
    },
    {
      "epoch": 2.4274857131635277,
      "grad_norm": 0.8486694693565369,
      "learning_rate": 5.178930305805764e-06,
      "loss": 0.018,
      "step": 1483320
    },
    {
      "epoch": 2.4275184436021813,
      "grad_norm": 0.1152997836470604,
      "learning_rate": 5.178864413592246e-06,
      "loss": 0.0068,
      "step": 1483340
    },
    {
      "epoch": 2.4275511740408344,
      "grad_norm": 0.359696626663208,
      "learning_rate": 5.17879852137873e-06,
      "loss": 0.0182,
      "step": 1483360
    },
    {
      "epoch": 2.427583904479488,
      "grad_norm": 0.040296077728271484,
      "learning_rate": 5.178732629165212e-06,
      "loss": 0.0187,
      "step": 1483380
    },
    {
      "epoch": 2.427616634918141,
      "grad_norm": 0.24737650156021118,
      "learning_rate": 5.178666736951695e-06,
      "loss": 0.0137,
      "step": 1483400
    },
    {
      "epoch": 2.4276493653567943,
      "grad_norm": 0.5086987018585205,
      "learning_rate": 5.178600844738177e-06,
      "loss": 0.015,
      "step": 1483420
    },
    {
      "epoch": 2.427682095795448,
      "grad_norm": 0.5708126425743103,
      "learning_rate": 5.178534952524661e-06,
      "loss": 0.0141,
      "step": 1483440
    },
    {
      "epoch": 2.427714826234101,
      "grad_norm": 0.4835077226161957,
      "learning_rate": 5.178469060311143e-06,
      "loss": 0.0108,
      "step": 1483460
    },
    {
      "epoch": 2.4277475566727547,
      "grad_norm": 0.39265426993370056,
      "learning_rate": 5.178403168097626e-06,
      "loss": 0.0178,
      "step": 1483480
    },
    {
      "epoch": 2.427780287111408,
      "grad_norm": 0.23619574308395386,
      "learning_rate": 5.178337275884109e-06,
      "loss": 0.0175,
      "step": 1483500
    },
    {
      "epoch": 2.4278130175500614,
      "grad_norm": 1.052539348602295,
      "learning_rate": 5.178271383670592e-06,
      "loss": 0.0178,
      "step": 1483520
    },
    {
      "epoch": 2.4278457479887146,
      "grad_norm": 0.17874814569950104,
      "learning_rate": 5.178205491457075e-06,
      "loss": 0.0152,
      "step": 1483540
    },
    {
      "epoch": 2.4278784784273677,
      "grad_norm": 0.20744472742080688,
      "learning_rate": 5.178139599243558e-06,
      "loss": 0.018,
      "step": 1483560
    },
    {
      "epoch": 2.4279112088660213,
      "grad_norm": 0.4267529547214508,
      "learning_rate": 5.17807370703004e-06,
      "loss": 0.0206,
      "step": 1483580
    },
    {
      "epoch": 2.4279439393046744,
      "grad_norm": 0.8620249629020691,
      "learning_rate": 5.178007814816524e-06,
      "loss": 0.013,
      "step": 1483600
    },
    {
      "epoch": 2.427976669743328,
      "grad_norm": 0.11397822201251984,
      "learning_rate": 5.1779419226030055e-06,
      "loss": 0.0229,
      "step": 1483620
    },
    {
      "epoch": 2.428009400181981,
      "grad_norm": 0.8474611639976501,
      "learning_rate": 5.177876030389489e-06,
      "loss": 0.0127,
      "step": 1483640
    },
    {
      "epoch": 2.428042130620635,
      "grad_norm": 0.6172571778297424,
      "learning_rate": 5.177810138175973e-06,
      "loss": 0.0127,
      "step": 1483660
    },
    {
      "epoch": 2.428074861059288,
      "grad_norm": 0.4097892940044403,
      "learning_rate": 5.177744245962455e-06,
      "loss": 0.0093,
      "step": 1483680
    },
    {
      "epoch": 2.428107591497941,
      "grad_norm": 0.26380670070648193,
      "learning_rate": 5.177678353748938e-06,
      "loss": 0.0141,
      "step": 1483700
    },
    {
      "epoch": 2.4281403219365947,
      "grad_norm": 0.8390979170799255,
      "learning_rate": 5.177612461535421e-06,
      "loss": 0.0127,
      "step": 1483720
    },
    {
      "epoch": 2.428173052375248,
      "grad_norm": 0.7407472133636475,
      "learning_rate": 5.177546569321904e-06,
      "loss": 0.0139,
      "step": 1483740
    },
    {
      "epoch": 2.4282057828139014,
      "grad_norm": 0.05939975380897522,
      "learning_rate": 5.177480677108386e-06,
      "loss": 0.0107,
      "step": 1483760
    },
    {
      "epoch": 2.4282385132525546,
      "grad_norm": 0.7877203822135925,
      "learning_rate": 5.17741478489487e-06,
      "loss": 0.0189,
      "step": 1483780
    },
    {
      "epoch": 2.428271243691208,
      "grad_norm": 0.32532811164855957,
      "learning_rate": 5.177348892681352e-06,
      "loss": 0.0108,
      "step": 1483800
    },
    {
      "epoch": 2.4283039741298613,
      "grad_norm": 0.5963953137397766,
      "learning_rate": 5.1772830004678355e-06,
      "loss": 0.0131,
      "step": 1483820
    },
    {
      "epoch": 2.4283367045685145,
      "grad_norm": 0.3886411786079407,
      "learning_rate": 5.177217108254317e-06,
      "loss": 0.0133,
      "step": 1483840
    },
    {
      "epoch": 2.428369435007168,
      "grad_norm": 0.5983237624168396,
      "learning_rate": 5.177151216040801e-06,
      "loss": 0.0186,
      "step": 1483860
    },
    {
      "epoch": 2.428402165445821,
      "grad_norm": 0.21004325151443481,
      "learning_rate": 5.177085323827284e-06,
      "loss": 0.0249,
      "step": 1483880
    },
    {
      "epoch": 2.428434895884475,
      "grad_norm": 0.31379514932632446,
      "learning_rate": 5.1770194316137664e-06,
      "loss": 0.0124,
      "step": 1483900
    },
    {
      "epoch": 2.428467626323128,
      "grad_norm": 0.13829821348190308,
      "learning_rate": 5.176953539400249e-06,
      "loss": 0.0176,
      "step": 1483920
    },
    {
      "epoch": 2.4285003567617816,
      "grad_norm": 0.31826502084732056,
      "learning_rate": 5.176887647186733e-06,
      "loss": 0.0248,
      "step": 1483940
    },
    {
      "epoch": 2.4285330872004347,
      "grad_norm": 0.24452999234199524,
      "learning_rate": 5.176821754973215e-06,
      "loss": 0.0119,
      "step": 1483960
    },
    {
      "epoch": 2.428565817639088,
      "grad_norm": 0.4345753788948059,
      "learning_rate": 5.176755862759698e-06,
      "loss": 0.0171,
      "step": 1483980
    },
    {
      "epoch": 2.4285985480777414,
      "grad_norm": 0.6397411227226257,
      "learning_rate": 5.176689970546182e-06,
      "loss": 0.017,
      "step": 1484000
    },
    {
      "epoch": 2.4286312785163946,
      "grad_norm": 0.16008146107196808,
      "learning_rate": 5.176624078332664e-06,
      "loss": 0.0129,
      "step": 1484020
    },
    {
      "epoch": 2.428664008955048,
      "grad_norm": 0.41270560026168823,
      "learning_rate": 5.176558186119147e-06,
      "loss": 0.0135,
      "step": 1484040
    },
    {
      "epoch": 2.4286967393937013,
      "grad_norm": 0.1896522045135498,
      "learning_rate": 5.176492293905629e-06,
      "loss": 0.0104,
      "step": 1484060
    },
    {
      "epoch": 2.428729469832355,
      "grad_norm": 0.038665954023599625,
      "learning_rate": 5.176426401692113e-06,
      "loss": 0.0151,
      "step": 1484080
    },
    {
      "epoch": 2.428762200271008,
      "grad_norm": 3.039858102798462,
      "learning_rate": 5.1763605094785955e-06,
      "loss": 0.0188,
      "step": 1484100
    },
    {
      "epoch": 2.4287949307096612,
      "grad_norm": 0.3618007004261017,
      "learning_rate": 5.176294617265078e-06,
      "loss": 0.0126,
      "step": 1484120
    },
    {
      "epoch": 2.428827661148315,
      "grad_norm": 0.20230738818645477,
      "learning_rate": 5.176228725051561e-06,
      "loss": 0.0145,
      "step": 1484140
    },
    {
      "epoch": 2.428860391586968,
      "grad_norm": 0.658998966217041,
      "learning_rate": 5.176162832838045e-06,
      "loss": 0.0238,
      "step": 1484160
    },
    {
      "epoch": 2.4288931220256216,
      "grad_norm": 0.6378172039985657,
      "learning_rate": 5.1760969406245265e-06,
      "loss": 0.0157,
      "step": 1484180
    },
    {
      "epoch": 2.4289258524642747,
      "grad_norm": 0.4564841091632843,
      "learning_rate": 5.17603104841101e-06,
      "loss": 0.0128,
      "step": 1484200
    },
    {
      "epoch": 2.428958582902928,
      "grad_norm": 0.9919272065162659,
      "learning_rate": 5.175965156197492e-06,
      "loss": 0.0181,
      "step": 1484220
    },
    {
      "epoch": 2.4289913133415815,
      "grad_norm": 0.8552548289299011,
      "learning_rate": 5.1758992639839756e-06,
      "loss": 0.0216,
      "step": 1484240
    },
    {
      "epoch": 2.4290240437802346,
      "grad_norm": 0.5603872537612915,
      "learning_rate": 5.1758333717704575e-06,
      "loss": 0.0169,
      "step": 1484260
    },
    {
      "epoch": 2.429056774218888,
      "grad_norm": 0.9841805100440979,
      "learning_rate": 5.175767479556941e-06,
      "loss": 0.0178,
      "step": 1484280
    },
    {
      "epoch": 2.4290895046575414,
      "grad_norm": 0.39411771297454834,
      "learning_rate": 5.175701587343424e-06,
      "loss": 0.0231,
      "step": 1484300
    },
    {
      "epoch": 2.4291222350961945,
      "grad_norm": 0.4002115726470947,
      "learning_rate": 5.1756356951299065e-06,
      "loss": 0.0195,
      "step": 1484320
    },
    {
      "epoch": 2.429154965534848,
      "grad_norm": 0.48364463448524475,
      "learning_rate": 5.17556980291639e-06,
      "loss": 0.0134,
      "step": 1484340
    },
    {
      "epoch": 2.4291876959735013,
      "grad_norm": 0.19938063621520996,
      "learning_rate": 5.175503910702873e-06,
      "loss": 0.0149,
      "step": 1484360
    },
    {
      "epoch": 2.429220426412155,
      "grad_norm": 0.4561260938644409,
      "learning_rate": 5.175438018489356e-06,
      "loss": 0.0124,
      "step": 1484380
    },
    {
      "epoch": 2.429253156850808,
      "grad_norm": 0.20942705869674683,
      "learning_rate": 5.175372126275838e-06,
      "loss": 0.0073,
      "step": 1484400
    },
    {
      "epoch": 2.4292858872894616,
      "grad_norm": 0.6334142088890076,
      "learning_rate": 5.175306234062322e-06,
      "loss": 0.0113,
      "step": 1484420
    },
    {
      "epoch": 2.4293186177281147,
      "grad_norm": 0.5722060203552246,
      "learning_rate": 5.175240341848804e-06,
      "loss": 0.0209,
      "step": 1484440
    },
    {
      "epoch": 2.429351348166768,
      "grad_norm": 0.7820203304290771,
      "learning_rate": 5.175174449635287e-06,
      "loss": 0.0186,
      "step": 1484460
    },
    {
      "epoch": 2.4293840786054215,
      "grad_norm": 0.46602609753608704,
      "learning_rate": 5.175108557421769e-06,
      "loss": 0.015,
      "step": 1484480
    },
    {
      "epoch": 2.4294168090440746,
      "grad_norm": 0.5157039761543274,
      "learning_rate": 5.175042665208253e-06,
      "loss": 0.0158,
      "step": 1484500
    },
    {
      "epoch": 2.4294495394827282,
      "grad_norm": 0.19962388277053833,
      "learning_rate": 5.174976772994736e-06,
      "loss": 0.0145,
      "step": 1484520
    },
    {
      "epoch": 2.4294822699213814,
      "grad_norm": 0.14724493026733398,
      "learning_rate": 5.174910880781218e-06,
      "loss": 0.0121,
      "step": 1484540
    },
    {
      "epoch": 2.429515000360035,
      "grad_norm": 0.3349035382270813,
      "learning_rate": 5.174844988567701e-06,
      "loss": 0.0145,
      "step": 1484560
    },
    {
      "epoch": 2.429547730798688,
      "grad_norm": 0.02267776057124138,
      "learning_rate": 5.174779096354185e-06,
      "loss": 0.0126,
      "step": 1484580
    },
    {
      "epoch": 2.4295804612373413,
      "grad_norm": 0.16603806614875793,
      "learning_rate": 5.1747132041406666e-06,
      "loss": 0.0138,
      "step": 1484600
    },
    {
      "epoch": 2.429613191675995,
      "grad_norm": 1.0386316776275635,
      "learning_rate": 5.17464731192715e-06,
      "loss": 0.0118,
      "step": 1484620
    },
    {
      "epoch": 2.429645922114648,
      "grad_norm": 1.2523462772369385,
      "learning_rate": 5.174581419713632e-06,
      "loss": 0.0202,
      "step": 1484640
    },
    {
      "epoch": 2.4296786525533016,
      "grad_norm": 0.46150797605514526,
      "learning_rate": 5.174515527500116e-06,
      "loss": 0.0107,
      "step": 1484660
    },
    {
      "epoch": 2.4297113829919548,
      "grad_norm": 0.4050654470920563,
      "learning_rate": 5.174449635286599e-06,
      "loss": 0.0143,
      "step": 1484680
    },
    {
      "epoch": 2.4297441134306084,
      "grad_norm": 0.3569691777229309,
      "learning_rate": 5.174383743073081e-06,
      "loss": 0.0156,
      "step": 1484700
    },
    {
      "epoch": 2.4297768438692615,
      "grad_norm": 0.04136312007904053,
      "learning_rate": 5.174317850859565e-06,
      "loss": 0.0195,
      "step": 1484720
    },
    {
      "epoch": 2.4298095743079147,
      "grad_norm": 0.4339660108089447,
      "learning_rate": 5.1742519586460474e-06,
      "loss": 0.0244,
      "step": 1484740
    },
    {
      "epoch": 2.4298423047465683,
      "grad_norm": 0.7367194294929504,
      "learning_rate": 5.17418606643253e-06,
      "loss": 0.019,
      "step": 1484760
    },
    {
      "epoch": 2.4298750351852214,
      "grad_norm": 0.6532726883888245,
      "learning_rate": 5.174120174219013e-06,
      "loss": 0.0124,
      "step": 1484780
    },
    {
      "epoch": 2.429907765623875,
      "grad_norm": 0.3701231777667999,
      "learning_rate": 5.1740542820054965e-06,
      "loss": 0.0263,
      "step": 1484800
    },
    {
      "epoch": 2.429940496062528,
      "grad_norm": 0.34892377257347107,
      "learning_rate": 5.173988389791978e-06,
      "loss": 0.0114,
      "step": 1484820
    },
    {
      "epoch": 2.4299732265011817,
      "grad_norm": 0.49108654260635376,
      "learning_rate": 5.173922497578462e-06,
      "loss": 0.0211,
      "step": 1484840
    },
    {
      "epoch": 2.430005956939835,
      "grad_norm": 0.38381361961364746,
      "learning_rate": 5.173856605364944e-06,
      "loss": 0.0108,
      "step": 1484860
    },
    {
      "epoch": 2.430038687378488,
      "grad_norm": 0.7546807527542114,
      "learning_rate": 5.1737907131514275e-06,
      "loss": 0.0143,
      "step": 1484880
    },
    {
      "epoch": 2.4300714178171416,
      "grad_norm": 0.30580440163612366,
      "learning_rate": 5.17372482093791e-06,
      "loss": 0.018,
      "step": 1484900
    },
    {
      "epoch": 2.430104148255795,
      "grad_norm": 0.1939522624015808,
      "learning_rate": 5.173658928724393e-06,
      "loss": 0.0182,
      "step": 1484920
    },
    {
      "epoch": 2.4301368786944484,
      "grad_norm": 1.3986440896987915,
      "learning_rate": 5.173593036510876e-06,
      "loss": 0.0162,
      "step": 1484940
    },
    {
      "epoch": 2.4301696091331015,
      "grad_norm": 0.542815625667572,
      "learning_rate": 5.173527144297359e-06,
      "loss": 0.0093,
      "step": 1484960
    },
    {
      "epoch": 2.430202339571755,
      "grad_norm": 0.5810911655426025,
      "learning_rate": 5.173461252083841e-06,
      "loss": 0.0125,
      "step": 1484980
    },
    {
      "epoch": 2.4302350700104083,
      "grad_norm": 0.8974044919013977,
      "learning_rate": 5.173395359870325e-06,
      "loss": 0.0164,
      "step": 1485000
    },
    {
      "epoch": 2.4302678004490614,
      "grad_norm": 0.26879364252090454,
      "learning_rate": 5.173329467656807e-06,
      "loss": 0.0183,
      "step": 1485020
    },
    {
      "epoch": 2.430300530887715,
      "grad_norm": 0.07594220340251923,
      "learning_rate": 5.17326357544329e-06,
      "loss": 0.0142,
      "step": 1485040
    },
    {
      "epoch": 2.430333261326368,
      "grad_norm": 1.0652896165847778,
      "learning_rate": 5.173197683229774e-06,
      "loss": 0.0165,
      "step": 1485060
    },
    {
      "epoch": 2.4303659917650218,
      "grad_norm": 0.5954287052154541,
      "learning_rate": 5.173131791016256e-06,
      "loss": 0.0165,
      "step": 1485080
    },
    {
      "epoch": 2.430398722203675,
      "grad_norm": 0.5136541128158569,
      "learning_rate": 5.173065898802739e-06,
      "loss": 0.0148,
      "step": 1485100
    },
    {
      "epoch": 2.4304314526423285,
      "grad_norm": 3.0834014415740967,
      "learning_rate": 5.173000006589222e-06,
      "loss": 0.0078,
      "step": 1485120
    },
    {
      "epoch": 2.4304641830809817,
      "grad_norm": 0.23963524401187897,
      "learning_rate": 5.172934114375705e-06,
      "loss": 0.0092,
      "step": 1485140
    },
    {
      "epoch": 2.430496913519635,
      "grad_norm": 0.5725853443145752,
      "learning_rate": 5.1728682221621875e-06,
      "loss": 0.0102,
      "step": 1485160
    },
    {
      "epoch": 2.4305296439582884,
      "grad_norm": 0.21251972019672394,
      "learning_rate": 5.172802329948671e-06,
      "loss": 0.0134,
      "step": 1485180
    },
    {
      "epoch": 2.4305623743969416,
      "grad_norm": 0.5169855952262878,
      "learning_rate": 5.172736437735153e-06,
      "loss": 0.0135,
      "step": 1485200
    },
    {
      "epoch": 2.430595104835595,
      "grad_norm": 0.47575700283050537,
      "learning_rate": 5.172670545521637e-06,
      "loss": 0.0254,
      "step": 1485220
    },
    {
      "epoch": 2.4306278352742483,
      "grad_norm": 0.3767719268798828,
      "learning_rate": 5.1726046533081185e-06,
      "loss": 0.0172,
      "step": 1485240
    },
    {
      "epoch": 2.430660565712902,
      "grad_norm": 0.10792694240808487,
      "learning_rate": 5.172538761094602e-06,
      "loss": 0.0144,
      "step": 1485260
    },
    {
      "epoch": 2.430693296151555,
      "grad_norm": 0.34289878606796265,
      "learning_rate": 5.172472868881084e-06,
      "loss": 0.0111,
      "step": 1485280
    },
    {
      "epoch": 2.430726026590208,
      "grad_norm": 0.3071384131908417,
      "learning_rate": 5.1724069766675675e-06,
      "loss": 0.0126,
      "step": 1485300
    },
    {
      "epoch": 2.430758757028862,
      "grad_norm": 0.5496684908866882,
      "learning_rate": 5.17234108445405e-06,
      "loss": 0.0112,
      "step": 1485320
    },
    {
      "epoch": 2.430791487467515,
      "grad_norm": 0.16764535009860992,
      "learning_rate": 5.172275192240533e-06,
      "loss": 0.015,
      "step": 1485340
    },
    {
      "epoch": 2.4308242179061685,
      "grad_norm": 0.36458492279052734,
      "learning_rate": 5.172209300027016e-06,
      "loss": 0.0168,
      "step": 1485360
    },
    {
      "epoch": 2.4308569483448217,
      "grad_norm": 0.21034784615039825,
      "learning_rate": 5.172143407813499e-06,
      "loss": 0.0154,
      "step": 1485380
    },
    {
      "epoch": 2.4308896787834753,
      "grad_norm": 0.33021485805511475,
      "learning_rate": 5.172077515599982e-06,
      "loss": 0.0127,
      "step": 1485400
    },
    {
      "epoch": 2.4309224092221284,
      "grad_norm": 0.2368442416191101,
      "learning_rate": 5.172011623386465e-06,
      "loss": 0.016,
      "step": 1485420
    },
    {
      "epoch": 2.4309551396607816,
      "grad_norm": 0.6297286152839661,
      "learning_rate": 5.171945731172948e-06,
      "loss": 0.0152,
      "step": 1485440
    },
    {
      "epoch": 2.430987870099435,
      "grad_norm": 0.6544795632362366,
      "learning_rate": 5.17187983895943e-06,
      "loss": 0.0229,
      "step": 1485460
    },
    {
      "epoch": 2.4310206005380883,
      "grad_norm": 0.5370578169822693,
      "learning_rate": 5.171813946745914e-06,
      "loss": 0.0182,
      "step": 1485480
    },
    {
      "epoch": 2.431053330976742,
      "grad_norm": 0.45587944984436035,
      "learning_rate": 5.171748054532396e-06,
      "loss": 0.0112,
      "step": 1485500
    },
    {
      "epoch": 2.431086061415395,
      "grad_norm": 0.44580408930778503,
      "learning_rate": 5.171682162318879e-06,
      "loss": 0.0156,
      "step": 1485520
    },
    {
      "epoch": 2.4311187918540487,
      "grad_norm": 0.5842617154121399,
      "learning_rate": 5.171616270105362e-06,
      "loss": 0.0142,
      "step": 1485540
    },
    {
      "epoch": 2.431151522292702,
      "grad_norm": 0.3199710249900818,
      "learning_rate": 5.171550377891845e-06,
      "loss": 0.0094,
      "step": 1485560
    },
    {
      "epoch": 2.431184252731355,
      "grad_norm": 0.1517248898744583,
      "learning_rate": 5.171484485678328e-06,
      "loss": 0.0172,
      "step": 1485580
    },
    {
      "epoch": 2.4312169831700086,
      "grad_norm": 0.5211086273193359,
      "learning_rate": 5.171418593464811e-06,
      "loss": 0.0161,
      "step": 1485600
    },
    {
      "epoch": 2.4312497136086617,
      "grad_norm": 0.28900355100631714,
      "learning_rate": 5.171352701251293e-06,
      "loss": 0.0068,
      "step": 1485620
    },
    {
      "epoch": 2.4312824440473153,
      "grad_norm": 0.5612857341766357,
      "learning_rate": 5.171286809037777e-06,
      "loss": 0.0193,
      "step": 1485640
    },
    {
      "epoch": 2.4313151744859685,
      "grad_norm": 0.26877591013908386,
      "learning_rate": 5.1712209168242586e-06,
      "loss": 0.0175,
      "step": 1485660
    },
    {
      "epoch": 2.4313479049246216,
      "grad_norm": 0.2417641580104828,
      "learning_rate": 5.171155024610742e-06,
      "loss": 0.0089,
      "step": 1485680
    },
    {
      "epoch": 2.431380635363275,
      "grad_norm": 0.4073488116264343,
      "learning_rate": 5.171089132397225e-06,
      "loss": 0.0171,
      "step": 1485700
    },
    {
      "epoch": 2.4314133658019284,
      "grad_norm": 0.3890335261821747,
      "learning_rate": 5.171023240183708e-06,
      "loss": 0.0165,
      "step": 1485720
    },
    {
      "epoch": 2.431446096240582,
      "grad_norm": 0.6712801456451416,
      "learning_rate": 5.170957347970191e-06,
      "loss": 0.0132,
      "step": 1485740
    },
    {
      "epoch": 2.431478826679235,
      "grad_norm": 0.15465441346168518,
      "learning_rate": 5.170891455756674e-06,
      "loss": 0.0187,
      "step": 1485760
    },
    {
      "epoch": 2.4315115571178882,
      "grad_norm": 0.2918999493122101,
      "learning_rate": 5.170825563543157e-06,
      "loss": 0.0125,
      "step": 1485780
    },
    {
      "epoch": 2.431544287556542,
      "grad_norm": 0.24662654101848602,
      "learning_rate": 5.1707596713296394e-06,
      "loss": 0.0127,
      "step": 1485800
    },
    {
      "epoch": 2.431577017995195,
      "grad_norm": 0.4737769067287445,
      "learning_rate": 5.170693779116123e-06,
      "loss": 0.0147,
      "step": 1485820
    },
    {
      "epoch": 2.4316097484338486,
      "grad_norm": 0.2147204428911209,
      "learning_rate": 5.170627886902605e-06,
      "loss": 0.0187,
      "step": 1485840
    },
    {
      "epoch": 2.4316424788725017,
      "grad_norm": 0.22589774429798126,
      "learning_rate": 5.1705619946890885e-06,
      "loss": 0.0174,
      "step": 1485860
    },
    {
      "epoch": 2.4316752093111553,
      "grad_norm": 0.5260049104690552,
      "learning_rate": 5.17049610247557e-06,
      "loss": 0.0156,
      "step": 1485880
    },
    {
      "epoch": 2.4317079397498085,
      "grad_norm": 0.6716639399528503,
      "learning_rate": 5.170430210262054e-06,
      "loss": 0.0167,
      "step": 1485900
    },
    {
      "epoch": 2.4317406701884616,
      "grad_norm": 0.3620351552963257,
      "learning_rate": 5.170364318048537e-06,
      "loss": 0.0153,
      "step": 1485920
    },
    {
      "epoch": 2.4317734006271152,
      "grad_norm": 0.3015766441822052,
      "learning_rate": 5.1702984258350195e-06,
      "loss": 0.0126,
      "step": 1485940
    },
    {
      "epoch": 2.4318061310657684,
      "grad_norm": 0.3187650442123413,
      "learning_rate": 5.170232533621502e-06,
      "loss": 0.015,
      "step": 1485960
    },
    {
      "epoch": 2.431838861504422,
      "grad_norm": 0.5419890880584717,
      "learning_rate": 5.170166641407986e-06,
      "loss": 0.0239,
      "step": 1485980
    },
    {
      "epoch": 2.431871591943075,
      "grad_norm": 0.7961190938949585,
      "learning_rate": 5.170100749194468e-06,
      "loss": 0.0219,
      "step": 1486000
    },
    {
      "epoch": 2.4319043223817287,
      "grad_norm": 0.8490267992019653,
      "learning_rate": 5.170034856980951e-06,
      "loss": 0.0197,
      "step": 1486020
    },
    {
      "epoch": 2.431937052820382,
      "grad_norm": 0.7725584506988525,
      "learning_rate": 5.169968964767433e-06,
      "loss": 0.0125,
      "step": 1486040
    },
    {
      "epoch": 2.431969783259035,
      "grad_norm": 0.4885921776294708,
      "learning_rate": 5.169903072553917e-06,
      "loss": 0.0231,
      "step": 1486060
    },
    {
      "epoch": 2.4320025136976886,
      "grad_norm": 0.1689399778842926,
      "learning_rate": 5.169837180340399e-06,
      "loss": 0.0117,
      "step": 1486080
    },
    {
      "epoch": 2.4320352441363418,
      "grad_norm": 0.2806609570980072,
      "learning_rate": 5.169771288126882e-06,
      "loss": 0.0081,
      "step": 1486100
    },
    {
      "epoch": 2.4320679745749954,
      "grad_norm": 0.778303325176239,
      "learning_rate": 5.169705395913366e-06,
      "loss": 0.0165,
      "step": 1486120
    },
    {
      "epoch": 2.4321007050136485,
      "grad_norm": 0.6297316551208496,
      "learning_rate": 5.1696395036998485e-06,
      "loss": 0.0108,
      "step": 1486140
    },
    {
      "epoch": 2.432133435452302,
      "grad_norm": 0.2500581443309784,
      "learning_rate": 5.169573611486331e-06,
      "loss": 0.0164,
      "step": 1486160
    },
    {
      "epoch": 2.4321661658909552,
      "grad_norm": 0.26396578550338745,
      "learning_rate": 5.169507719272814e-06,
      "loss": 0.0134,
      "step": 1486180
    },
    {
      "epoch": 2.4321988963296084,
      "grad_norm": 0.27824994921684265,
      "learning_rate": 5.169441827059298e-06,
      "loss": 0.0209,
      "step": 1486200
    },
    {
      "epoch": 2.432231626768262,
      "grad_norm": 0.25181958079338074,
      "learning_rate": 5.1693759348457795e-06,
      "loss": 0.0126,
      "step": 1486220
    },
    {
      "epoch": 2.432264357206915,
      "grad_norm": 0.4780312180519104,
      "learning_rate": 5.169310042632263e-06,
      "loss": 0.0141,
      "step": 1486240
    },
    {
      "epoch": 2.4322970876455687,
      "grad_norm": 0.266762912273407,
      "learning_rate": 5.169244150418745e-06,
      "loss": 0.0204,
      "step": 1486260
    },
    {
      "epoch": 2.432329818084222,
      "grad_norm": 0.6539595723152161,
      "learning_rate": 5.1691782582052286e-06,
      "loss": 0.0117,
      "step": 1486280
    },
    {
      "epoch": 2.4323625485228755,
      "grad_norm": 0.48750489950180054,
      "learning_rate": 5.1691123659917105e-06,
      "loss": 0.0187,
      "step": 1486300
    },
    {
      "epoch": 2.4323952789615286,
      "grad_norm": 0.3074840307235718,
      "learning_rate": 5.169046473778194e-06,
      "loss": 0.0177,
      "step": 1486320
    },
    {
      "epoch": 2.432428009400182,
      "grad_norm": 0.23919855058193207,
      "learning_rate": 5.168980581564677e-06,
      "loss": 0.0085,
      "step": 1486340
    },
    {
      "epoch": 2.4324607398388354,
      "grad_norm": 0.3920939266681671,
      "learning_rate": 5.1689146893511595e-06,
      "loss": 0.0195,
      "step": 1486360
    },
    {
      "epoch": 2.4324934702774885,
      "grad_norm": 1.0171321630477905,
      "learning_rate": 5.168848797137642e-06,
      "loss": 0.0155,
      "step": 1486380
    },
    {
      "epoch": 2.432526200716142,
      "grad_norm": 0.25312861800193787,
      "learning_rate": 5.168782904924126e-06,
      "loss": 0.0097,
      "step": 1486400
    },
    {
      "epoch": 2.4325589311547953,
      "grad_norm": 0.2935706377029419,
      "learning_rate": 5.168717012710608e-06,
      "loss": 0.0188,
      "step": 1486420
    },
    {
      "epoch": 2.432591661593449,
      "grad_norm": 0.34578362107276917,
      "learning_rate": 5.168651120497091e-06,
      "loss": 0.009,
      "step": 1486440
    },
    {
      "epoch": 2.432624392032102,
      "grad_norm": 0.3345644474029541,
      "learning_rate": 5.168585228283575e-06,
      "loss": 0.0158,
      "step": 1486460
    },
    {
      "epoch": 2.432657122470755,
      "grad_norm": 0.5346916913986206,
      "learning_rate": 5.168519336070057e-06,
      "loss": 0.0212,
      "step": 1486480
    },
    {
      "epoch": 2.4326898529094088,
      "grad_norm": 0.22721439599990845,
      "learning_rate": 5.16845344385654e-06,
      "loss": 0.0119,
      "step": 1486500
    },
    {
      "epoch": 2.432722583348062,
      "grad_norm": 0.7825582027435303,
      "learning_rate": 5.168387551643022e-06,
      "loss": 0.0165,
      "step": 1486520
    },
    {
      "epoch": 2.4327553137867155,
      "grad_norm": 1.088567852973938,
      "learning_rate": 5.168321659429506e-06,
      "loss": 0.0187,
      "step": 1486540
    },
    {
      "epoch": 2.4327880442253687,
      "grad_norm": 0.21463681757450104,
      "learning_rate": 5.168255767215989e-06,
      "loss": 0.0102,
      "step": 1486560
    },
    {
      "epoch": 2.4328207746640222,
      "grad_norm": 0.3039451539516449,
      "learning_rate": 5.168189875002471e-06,
      "loss": 0.0217,
      "step": 1486580
    },
    {
      "epoch": 2.4328535051026754,
      "grad_norm": 0.4997773766517639,
      "learning_rate": 5.168123982788954e-06,
      "loss": 0.014,
      "step": 1486600
    },
    {
      "epoch": 2.4328862355413285,
      "grad_norm": 0.17341090738773346,
      "learning_rate": 5.168058090575438e-06,
      "loss": 0.0225,
      "step": 1486620
    },
    {
      "epoch": 2.432918965979982,
      "grad_norm": 0.5526782274246216,
      "learning_rate": 5.16799219836192e-06,
      "loss": 0.0089,
      "step": 1486640
    },
    {
      "epoch": 2.4329516964186353,
      "grad_norm": 0.5132776498794556,
      "learning_rate": 5.167926306148403e-06,
      "loss": 0.0134,
      "step": 1486660
    },
    {
      "epoch": 2.432984426857289,
      "grad_norm": 0.8087833523750305,
      "learning_rate": 5.167860413934885e-06,
      "loss": 0.0154,
      "step": 1486680
    },
    {
      "epoch": 2.433017157295942,
      "grad_norm": 1.0603266954421997,
      "learning_rate": 5.167794521721369e-06,
      "loss": 0.0157,
      "step": 1486700
    },
    {
      "epoch": 2.4330498877345956,
      "grad_norm": 0.40491288900375366,
      "learning_rate": 5.167728629507851e-06,
      "loss": 0.0152,
      "step": 1486720
    },
    {
      "epoch": 2.433082618173249,
      "grad_norm": 0.1456446796655655,
      "learning_rate": 5.167662737294334e-06,
      "loss": 0.014,
      "step": 1486740
    },
    {
      "epoch": 2.433115348611902,
      "grad_norm": 0.47938114404678345,
      "learning_rate": 5.167596845080817e-06,
      "loss": 0.0139,
      "step": 1486760
    },
    {
      "epoch": 2.4331480790505555,
      "grad_norm": 0.8351149559020996,
      "learning_rate": 5.1675309528673005e-06,
      "loss": 0.0128,
      "step": 1486780
    },
    {
      "epoch": 2.4331808094892087,
      "grad_norm": 0.316468209028244,
      "learning_rate": 5.167465060653783e-06,
      "loss": 0.0113,
      "step": 1486800
    },
    {
      "epoch": 2.4332135399278623,
      "grad_norm": 0.11671832948923111,
      "learning_rate": 5.167399168440266e-06,
      "loss": 0.0179,
      "step": 1486820
    },
    {
      "epoch": 2.4332462703665154,
      "grad_norm": 0.5449689030647278,
      "learning_rate": 5.1673332762267495e-06,
      "loss": 0.0182,
      "step": 1486840
    },
    {
      "epoch": 2.433279000805169,
      "grad_norm": 0.1329008787870407,
      "learning_rate": 5.167267384013231e-06,
      "loss": 0.0126,
      "step": 1486860
    },
    {
      "epoch": 2.433311731243822,
      "grad_norm": 0.4642908275127411,
      "learning_rate": 5.167201491799715e-06,
      "loss": 0.0108,
      "step": 1486880
    },
    {
      "epoch": 2.4333444616824753,
      "grad_norm": 0.530133843421936,
      "learning_rate": 5.167135599586197e-06,
      "loss": 0.0136,
      "step": 1486900
    },
    {
      "epoch": 2.433377192121129,
      "grad_norm": 0.44770586490631104,
      "learning_rate": 5.1670697073726805e-06,
      "loss": 0.0136,
      "step": 1486920
    },
    {
      "epoch": 2.433409922559782,
      "grad_norm": 0.5904946327209473,
      "learning_rate": 5.167003815159163e-06,
      "loss": 0.0112,
      "step": 1486940
    },
    {
      "epoch": 2.4334426529984357,
      "grad_norm": 0.7706419229507446,
      "learning_rate": 5.166937922945646e-06,
      "loss": 0.0183,
      "step": 1486960
    },
    {
      "epoch": 2.433475383437089,
      "grad_norm": 0.7307426333427429,
      "learning_rate": 5.166872030732129e-06,
      "loss": 0.0238,
      "step": 1486980
    },
    {
      "epoch": 2.4335081138757424,
      "grad_norm": 2.0879406929016113,
      "learning_rate": 5.166806138518612e-06,
      "loss": 0.0094,
      "step": 1487000
    },
    {
      "epoch": 2.4335408443143955,
      "grad_norm": 0.3847567141056061,
      "learning_rate": 5.166740246305094e-06,
      "loss": 0.0127,
      "step": 1487020
    },
    {
      "epoch": 2.4335735747530487,
      "grad_norm": 0.31397193670272827,
      "learning_rate": 5.166674354091578e-06,
      "loss": 0.0161,
      "step": 1487040
    },
    {
      "epoch": 2.4336063051917023,
      "grad_norm": 0.42793598771095276,
      "learning_rate": 5.16660846187806e-06,
      "loss": 0.0123,
      "step": 1487060
    },
    {
      "epoch": 2.4336390356303554,
      "grad_norm": 0.18864558637142181,
      "learning_rate": 5.166542569664543e-06,
      "loss": 0.0116,
      "step": 1487080
    },
    {
      "epoch": 2.433671766069009,
      "grad_norm": 0.1489284783601761,
      "learning_rate": 5.166476677451025e-06,
      "loss": 0.0126,
      "step": 1487100
    },
    {
      "epoch": 2.433704496507662,
      "grad_norm": 0.1788635551929474,
      "learning_rate": 5.166410785237509e-06,
      "loss": 0.0142,
      "step": 1487120
    },
    {
      "epoch": 2.433737226946316,
      "grad_norm": 0.28769415616989136,
      "learning_rate": 5.1663448930239915e-06,
      "loss": 0.0116,
      "step": 1487140
    },
    {
      "epoch": 2.433769957384969,
      "grad_norm": 0.6287557482719421,
      "learning_rate": 5.166279000810474e-06,
      "loss": 0.0151,
      "step": 1487160
    },
    {
      "epoch": 2.433802687823622,
      "grad_norm": 0.30205467343330383,
      "learning_rate": 5.166213108596958e-06,
      "loss": 0.0108,
      "step": 1487180
    },
    {
      "epoch": 2.4338354182622757,
      "grad_norm": 0.2800927758216858,
      "learning_rate": 5.1661472163834405e-06,
      "loss": 0.0221,
      "step": 1487200
    },
    {
      "epoch": 2.433868148700929,
      "grad_norm": 0.4839475154876709,
      "learning_rate": 5.166081324169924e-06,
      "loss": 0.0132,
      "step": 1487220
    },
    {
      "epoch": 2.4339008791395824,
      "grad_norm": 0.6904997825622559,
      "learning_rate": 5.166015431956406e-06,
      "loss": 0.018,
      "step": 1487240
    },
    {
      "epoch": 2.4339336095782356,
      "grad_norm": 0.672151505947113,
      "learning_rate": 5.16594953974289e-06,
      "loss": 0.0145,
      "step": 1487260
    },
    {
      "epoch": 2.4339663400168887,
      "grad_norm": 0.43163925409317017,
      "learning_rate": 5.1658836475293715e-06,
      "loss": 0.0127,
      "step": 1487280
    },
    {
      "epoch": 2.4339990704555423,
      "grad_norm": 0.8113831281661987,
      "learning_rate": 5.165817755315855e-06,
      "loss": 0.0129,
      "step": 1487300
    },
    {
      "epoch": 2.4340318008941955,
      "grad_norm": 0.6984603404998779,
      "learning_rate": 5.165751863102337e-06,
      "loss": 0.0148,
      "step": 1487320
    },
    {
      "epoch": 2.434064531332849,
      "grad_norm": 0.7430472373962402,
      "learning_rate": 5.1656859708888206e-06,
      "loss": 0.018,
      "step": 1487340
    },
    {
      "epoch": 2.434097261771502,
      "grad_norm": 0.16109462082386017,
      "learning_rate": 5.165620078675303e-06,
      "loss": 0.0146,
      "step": 1487360
    },
    {
      "epoch": 2.4341299922101554,
      "grad_norm": 0.16909414529800415,
      "learning_rate": 5.165554186461786e-06,
      "loss": 0.0175,
      "step": 1487380
    },
    {
      "epoch": 2.434162722648809,
      "grad_norm": 0.18687409162521362,
      "learning_rate": 5.165488294248269e-06,
      "loss": 0.008,
      "step": 1487400
    },
    {
      "epoch": 2.434195453087462,
      "grad_norm": 0.4813302457332611,
      "learning_rate": 5.165422402034752e-06,
      "loss": 0.0102,
      "step": 1487420
    },
    {
      "epoch": 2.4342281835261157,
      "grad_norm": 0.570575475692749,
      "learning_rate": 5.165356509821234e-06,
      "loss": 0.0163,
      "step": 1487440
    },
    {
      "epoch": 2.434260913964769,
      "grad_norm": 0.6237741112709045,
      "learning_rate": 5.165290617607718e-06,
      "loss": 0.0125,
      "step": 1487460
    },
    {
      "epoch": 2.4342936444034224,
      "grad_norm": 0.3578895926475525,
      "learning_rate": 5.1652247253942e-06,
      "loss": 0.019,
      "step": 1487480
    },
    {
      "epoch": 2.4343263748420756,
      "grad_norm": 1.0927996635437012,
      "learning_rate": 5.165158833180683e-06,
      "loss": 0.0144,
      "step": 1487500
    },
    {
      "epoch": 2.4343591052807287,
      "grad_norm": 0.1372612863779068,
      "learning_rate": 5.165092940967167e-06,
      "loss": 0.013,
      "step": 1487520
    },
    {
      "epoch": 2.4343918357193823,
      "grad_norm": 0.16695928573608398,
      "learning_rate": 5.165027048753649e-06,
      "loss": 0.0119,
      "step": 1487540
    },
    {
      "epoch": 2.4344245661580355,
      "grad_norm": 0.3101685643196106,
      "learning_rate": 5.164961156540132e-06,
      "loss": 0.0146,
      "step": 1487560
    },
    {
      "epoch": 2.434457296596689,
      "grad_norm": 0.47124841809272766,
      "learning_rate": 5.164895264326615e-06,
      "loss": 0.0192,
      "step": 1487580
    },
    {
      "epoch": 2.4344900270353422,
      "grad_norm": 1.0454292297363281,
      "learning_rate": 5.164829372113098e-06,
      "loss": 0.0247,
      "step": 1487600
    },
    {
      "epoch": 2.434522757473996,
      "grad_norm": 0.19672800600528717,
      "learning_rate": 5.164763479899581e-06,
      "loss": 0.0189,
      "step": 1487620
    },
    {
      "epoch": 2.434555487912649,
      "grad_norm": 0.09905462712049484,
      "learning_rate": 5.164697587686064e-06,
      "loss": 0.0168,
      "step": 1487640
    },
    {
      "epoch": 2.434588218351302,
      "grad_norm": 0.4966813623905182,
      "learning_rate": 5.164631695472546e-06,
      "loss": 0.0108,
      "step": 1487660
    },
    {
      "epoch": 2.4346209487899557,
      "grad_norm": 0.4407385587692261,
      "learning_rate": 5.16456580325903e-06,
      "loss": 0.0137,
      "step": 1487680
    },
    {
      "epoch": 2.434653679228609,
      "grad_norm": 0.13084706664085388,
      "learning_rate": 5.1644999110455116e-06,
      "loss": 0.0106,
      "step": 1487700
    },
    {
      "epoch": 2.4346864096672625,
      "grad_norm": 0.303121954202652,
      "learning_rate": 5.164434018831995e-06,
      "loss": 0.0086,
      "step": 1487720
    },
    {
      "epoch": 2.4347191401059156,
      "grad_norm": 0.4005192220211029,
      "learning_rate": 5.164368126618478e-06,
      "loss": 0.0137,
      "step": 1487740
    },
    {
      "epoch": 2.434751870544569,
      "grad_norm": 0.3509184420108795,
      "learning_rate": 5.164302234404961e-06,
      "loss": 0.0219,
      "step": 1487760
    },
    {
      "epoch": 2.4347846009832224,
      "grad_norm": 0.3297605812549591,
      "learning_rate": 5.164236342191443e-06,
      "loss": 0.0179,
      "step": 1487780
    },
    {
      "epoch": 2.4348173314218755,
      "grad_norm": 0.605344295501709,
      "learning_rate": 5.164170449977927e-06,
      "loss": 0.0132,
      "step": 1487800
    },
    {
      "epoch": 2.434850061860529,
      "grad_norm": 0.29072266817092896,
      "learning_rate": 5.164104557764409e-06,
      "loss": 0.0154,
      "step": 1487820
    },
    {
      "epoch": 2.4348827922991823,
      "grad_norm": 0.8871737122535706,
      "learning_rate": 5.1640386655508924e-06,
      "loss": 0.0235,
      "step": 1487840
    },
    {
      "epoch": 2.434915522737836,
      "grad_norm": 0.13210569322109222,
      "learning_rate": 5.163972773337376e-06,
      "loss": 0.015,
      "step": 1487860
    },
    {
      "epoch": 2.434948253176489,
      "grad_norm": 0.8557381629943848,
      "learning_rate": 5.163906881123858e-06,
      "loss": 0.0121,
      "step": 1487880
    },
    {
      "epoch": 2.4349809836151426,
      "grad_norm": 0.3345494866371155,
      "learning_rate": 5.1638409889103415e-06,
      "loss": 0.0129,
      "step": 1487900
    },
    {
      "epoch": 2.4350137140537957,
      "grad_norm": 0.6600885987281799,
      "learning_rate": 5.163775096696823e-06,
      "loss": 0.0201,
      "step": 1487920
    },
    {
      "epoch": 2.435046444492449,
      "grad_norm": 0.20940569043159485,
      "learning_rate": 5.163709204483307e-06,
      "loss": 0.014,
      "step": 1487940
    },
    {
      "epoch": 2.4350791749311025,
      "grad_norm": 0.2869488000869751,
      "learning_rate": 5.16364331226979e-06,
      "loss": 0.0146,
      "step": 1487960
    },
    {
      "epoch": 2.4351119053697556,
      "grad_norm": 1.0440188646316528,
      "learning_rate": 5.1635774200562725e-06,
      "loss": 0.0145,
      "step": 1487980
    },
    {
      "epoch": 2.4351446358084092,
      "grad_norm": 0.31410354375839233,
      "learning_rate": 5.163511527842755e-06,
      "loss": 0.0129,
      "step": 1488000
    },
    {
      "epoch": 2.4351773662470624,
      "grad_norm": 1.2613192796707153,
      "learning_rate": 5.163445635629239e-06,
      "loss": 0.0198,
      "step": 1488020
    },
    {
      "epoch": 2.435210096685716,
      "grad_norm": 0.23592616617679596,
      "learning_rate": 5.163379743415721e-06,
      "loss": 0.0134,
      "step": 1488040
    },
    {
      "epoch": 2.435242827124369,
      "grad_norm": 1.0228495597839355,
      "learning_rate": 5.163313851202204e-06,
      "loss": 0.0259,
      "step": 1488060
    },
    {
      "epoch": 2.4352755575630223,
      "grad_norm": 0.4002615511417389,
      "learning_rate": 5.163247958988686e-06,
      "loss": 0.0198,
      "step": 1488080
    },
    {
      "epoch": 2.435308288001676,
      "grad_norm": 0.24885165691375732,
      "learning_rate": 5.16318206677517e-06,
      "loss": 0.0146,
      "step": 1488100
    },
    {
      "epoch": 2.435341018440329,
      "grad_norm": 0.41455480456352234,
      "learning_rate": 5.163116174561652e-06,
      "loss": 0.0168,
      "step": 1488120
    },
    {
      "epoch": 2.4353737488789826,
      "grad_norm": 0.3044465482234955,
      "learning_rate": 5.163050282348135e-06,
      "loss": 0.0123,
      "step": 1488140
    },
    {
      "epoch": 2.4354064793176358,
      "grad_norm": 0.21692118048667908,
      "learning_rate": 5.162984390134618e-06,
      "loss": 0.0172,
      "step": 1488160
    },
    {
      "epoch": 2.4354392097562894,
      "grad_norm": 0.2624948024749756,
      "learning_rate": 5.162918497921101e-06,
      "loss": 0.0239,
      "step": 1488180
    },
    {
      "epoch": 2.4354719401949425,
      "grad_norm": 0.3277943432331085,
      "learning_rate": 5.162852605707584e-06,
      "loss": 0.0149,
      "step": 1488200
    },
    {
      "epoch": 2.4355046706335957,
      "grad_norm": 0.5046716928482056,
      "learning_rate": 5.162786713494067e-06,
      "loss": 0.0165,
      "step": 1488220
    },
    {
      "epoch": 2.4355374010722493,
      "grad_norm": 0.9495031237602234,
      "learning_rate": 5.16272082128055e-06,
      "loss": 0.0156,
      "step": 1488240
    },
    {
      "epoch": 2.4355701315109024,
      "grad_norm": 0.1335596740245819,
      "learning_rate": 5.1626549290670325e-06,
      "loss": 0.0126,
      "step": 1488260
    },
    {
      "epoch": 2.435602861949556,
      "grad_norm": 0.5346421599388123,
      "learning_rate": 5.162589036853516e-06,
      "loss": 0.0121,
      "step": 1488280
    },
    {
      "epoch": 2.435635592388209,
      "grad_norm": 0.5660244822502136,
      "learning_rate": 5.162523144639998e-06,
      "loss": 0.0123,
      "step": 1488300
    },
    {
      "epoch": 2.4356683228268627,
      "grad_norm": 0.13654029369354248,
      "learning_rate": 5.1624572524264816e-06,
      "loss": 0.0233,
      "step": 1488320
    },
    {
      "epoch": 2.435701053265516,
      "grad_norm": 0.15501314401626587,
      "learning_rate": 5.1623913602129635e-06,
      "loss": 0.0145,
      "step": 1488340
    },
    {
      "epoch": 2.435733783704169,
      "grad_norm": 0.9740456938743591,
      "learning_rate": 5.162325467999447e-06,
      "loss": 0.0129,
      "step": 1488360
    },
    {
      "epoch": 2.4357665141428226,
      "grad_norm": 0.5052378177642822,
      "learning_rate": 5.16225957578593e-06,
      "loss": 0.0273,
      "step": 1488380
    },
    {
      "epoch": 2.435799244581476,
      "grad_norm": 0.6425501108169556,
      "learning_rate": 5.1621936835724125e-06,
      "loss": 0.0123,
      "step": 1488400
    },
    {
      "epoch": 2.4358319750201294,
      "grad_norm": 0.2076817750930786,
      "learning_rate": 5.162127791358895e-06,
      "loss": 0.0106,
      "step": 1488420
    },
    {
      "epoch": 2.4358647054587825,
      "grad_norm": 0.6569881439208984,
      "learning_rate": 5.162061899145379e-06,
      "loss": 0.0117,
      "step": 1488440
    },
    {
      "epoch": 2.435897435897436,
      "grad_norm": 0.5456541776657104,
      "learning_rate": 5.161996006931861e-06,
      "loss": 0.016,
      "step": 1488460
    },
    {
      "epoch": 2.4359301663360893,
      "grad_norm": 0.18730823695659637,
      "learning_rate": 5.161930114718344e-06,
      "loss": 0.0139,
      "step": 1488480
    },
    {
      "epoch": 2.4359628967747424,
      "grad_norm": 0.5627540349960327,
      "learning_rate": 5.161864222504826e-06,
      "loss": 0.0192,
      "step": 1488500
    },
    {
      "epoch": 2.435995627213396,
      "grad_norm": 0.5397773385047913,
      "learning_rate": 5.16179833029131e-06,
      "loss": 0.0174,
      "step": 1488520
    },
    {
      "epoch": 2.436028357652049,
      "grad_norm": 0.334961861371994,
      "learning_rate": 5.1617324380777926e-06,
      "loss": 0.008,
      "step": 1488540
    },
    {
      "epoch": 2.4360610880907028,
      "grad_norm": 0.22923122346401215,
      "learning_rate": 5.161666545864275e-06,
      "loss": 0.0173,
      "step": 1488560
    },
    {
      "epoch": 2.436093818529356,
      "grad_norm": 0.1285594254732132,
      "learning_rate": 5.161600653650759e-06,
      "loss": 0.0173,
      "step": 1488580
    },
    {
      "epoch": 2.4361265489680095,
      "grad_norm": 0.5217267870903015,
      "learning_rate": 5.161534761437242e-06,
      "loss": 0.0152,
      "step": 1488600
    },
    {
      "epoch": 2.4361592794066627,
      "grad_norm": 0.8422797322273254,
      "learning_rate": 5.161468869223724e-06,
      "loss": 0.0153,
      "step": 1488620
    },
    {
      "epoch": 2.436192009845316,
      "grad_norm": 0.4047953486442566,
      "learning_rate": 5.161402977010207e-06,
      "loss": 0.0123,
      "step": 1488640
    },
    {
      "epoch": 2.4362247402839694,
      "grad_norm": 0.438337117433548,
      "learning_rate": 5.161337084796691e-06,
      "loss": 0.0198,
      "step": 1488660
    },
    {
      "epoch": 2.4362574707226226,
      "grad_norm": 0.17989856004714966,
      "learning_rate": 5.161271192583173e-06,
      "loss": 0.0184,
      "step": 1488680
    },
    {
      "epoch": 2.436290201161276,
      "grad_norm": 0.5318372249603271,
      "learning_rate": 5.161205300369656e-06,
      "loss": 0.0141,
      "step": 1488700
    },
    {
      "epoch": 2.4363229315999293,
      "grad_norm": 0.7628066539764404,
      "learning_rate": 5.161139408156138e-06,
      "loss": 0.0143,
      "step": 1488720
    },
    {
      "epoch": 2.4363556620385824,
      "grad_norm": 0.3182230293750763,
      "learning_rate": 5.161073515942622e-06,
      "loss": 0.0177,
      "step": 1488740
    },
    {
      "epoch": 2.436388392477236,
      "grad_norm": 0.4273979663848877,
      "learning_rate": 5.161007623729104e-06,
      "loss": 0.0131,
      "step": 1488760
    },
    {
      "epoch": 2.436421122915889,
      "grad_norm": 1.3534260988235474,
      "learning_rate": 5.160941731515587e-06,
      "loss": 0.0273,
      "step": 1488780
    },
    {
      "epoch": 2.436453853354543,
      "grad_norm": 0.33139917254447937,
      "learning_rate": 5.16087583930207e-06,
      "loss": 0.0098,
      "step": 1488800
    },
    {
      "epoch": 2.436486583793196,
      "grad_norm": 0.4046591818332672,
      "learning_rate": 5.1608099470885535e-06,
      "loss": 0.0167,
      "step": 1488820
    },
    {
      "epoch": 2.436519314231849,
      "grad_norm": 0.6853549480438232,
      "learning_rate": 5.160744054875035e-06,
      "loss": 0.0088,
      "step": 1488840
    },
    {
      "epoch": 2.4365520446705027,
      "grad_norm": 0.577729344367981,
      "learning_rate": 5.160678162661519e-06,
      "loss": 0.0169,
      "step": 1488860
    },
    {
      "epoch": 2.436584775109156,
      "grad_norm": 0.24402335286140442,
      "learning_rate": 5.160612270448001e-06,
      "loss": 0.012,
      "step": 1488880
    },
    {
      "epoch": 2.4366175055478094,
      "grad_norm": 0.22682902216911316,
      "learning_rate": 5.160546378234484e-06,
      "loss": 0.0208,
      "step": 1488900
    },
    {
      "epoch": 2.4366502359864626,
      "grad_norm": 0.2533417344093323,
      "learning_rate": 5.160480486020968e-06,
      "loss": 0.0098,
      "step": 1488920
    },
    {
      "epoch": 2.436682966425116,
      "grad_norm": 0.3058549761772156,
      "learning_rate": 5.16041459380745e-06,
      "loss": 0.0106,
      "step": 1488940
    },
    {
      "epoch": 2.4367156968637693,
      "grad_norm": 0.16282731294631958,
      "learning_rate": 5.1603487015939335e-06,
      "loss": 0.02,
      "step": 1488960
    },
    {
      "epoch": 2.4367484273024225,
      "grad_norm": 1.0542656183242798,
      "learning_rate": 5.160282809380416e-06,
      "loss": 0.0144,
      "step": 1488980
    },
    {
      "epoch": 2.436781157741076,
      "grad_norm": 0.7102870345115662,
      "learning_rate": 5.160216917166899e-06,
      "loss": 0.0173,
      "step": 1489000
    },
    {
      "epoch": 2.436813888179729,
      "grad_norm": 0.1821783035993576,
      "learning_rate": 5.160151024953382e-06,
      "loss": 0.0123,
      "step": 1489020
    },
    {
      "epoch": 2.436846618618383,
      "grad_norm": 0.4842398762702942,
      "learning_rate": 5.160085132739865e-06,
      "loss": 0.0167,
      "step": 1489040
    },
    {
      "epoch": 2.436879349057036,
      "grad_norm": 1.0167961120605469,
      "learning_rate": 5.160019240526347e-06,
      "loss": 0.0131,
      "step": 1489060
    },
    {
      "epoch": 2.4369120794956896,
      "grad_norm": 0.09291870146989822,
      "learning_rate": 5.159953348312831e-06,
      "loss": 0.0154,
      "step": 1489080
    },
    {
      "epoch": 2.4369448099343427,
      "grad_norm": 0.12797625362873077,
      "learning_rate": 5.159887456099313e-06,
      "loss": 0.0173,
      "step": 1489100
    },
    {
      "epoch": 2.436977540372996,
      "grad_norm": 0.19975806772708893,
      "learning_rate": 5.159821563885796e-06,
      "loss": 0.0174,
      "step": 1489120
    },
    {
      "epoch": 2.4370102708116494,
      "grad_norm": 0.21434436738491058,
      "learning_rate": 5.159755671672278e-06,
      "loss": 0.0191,
      "step": 1489140
    },
    {
      "epoch": 2.4370430012503026,
      "grad_norm": 1.0444817543029785,
      "learning_rate": 5.159689779458762e-06,
      "loss": 0.0142,
      "step": 1489160
    },
    {
      "epoch": 2.437075731688956,
      "grad_norm": 0.22467465698719025,
      "learning_rate": 5.1596238872452445e-06,
      "loss": 0.0096,
      "step": 1489180
    },
    {
      "epoch": 2.4371084621276093,
      "grad_norm": 0.34407272934913635,
      "learning_rate": 5.159557995031727e-06,
      "loss": 0.0146,
      "step": 1489200
    },
    {
      "epoch": 2.437141192566263,
      "grad_norm": 0.20162400603294373,
      "learning_rate": 5.15949210281821e-06,
      "loss": 0.0125,
      "step": 1489220
    },
    {
      "epoch": 2.437173923004916,
      "grad_norm": 0.46625638008117676,
      "learning_rate": 5.1594262106046935e-06,
      "loss": 0.0168,
      "step": 1489240
    },
    {
      "epoch": 2.4372066534435692,
      "grad_norm": 0.262074738740921,
      "learning_rate": 5.159360318391176e-06,
      "loss": 0.0114,
      "step": 1489260
    },
    {
      "epoch": 2.437239383882223,
      "grad_norm": 1.1466155052185059,
      "learning_rate": 5.159294426177659e-06,
      "loss": 0.0179,
      "step": 1489280
    },
    {
      "epoch": 2.437272114320876,
      "grad_norm": 0.45631423592567444,
      "learning_rate": 5.159228533964143e-06,
      "loss": 0.0136,
      "step": 1489300
    },
    {
      "epoch": 2.4373048447595296,
      "grad_norm": 0.24555420875549316,
      "learning_rate": 5.1591626417506245e-06,
      "loss": 0.0158,
      "step": 1489320
    },
    {
      "epoch": 2.4373375751981827,
      "grad_norm": 0.18877120316028595,
      "learning_rate": 5.159096749537108e-06,
      "loss": 0.0121,
      "step": 1489340
    },
    {
      "epoch": 2.4373703056368363,
      "grad_norm": 0.4096357226371765,
      "learning_rate": 5.15903085732359e-06,
      "loss": 0.0147,
      "step": 1489360
    },
    {
      "epoch": 2.4374030360754895,
      "grad_norm": 1.8418138027191162,
      "learning_rate": 5.1589649651100736e-06,
      "loss": 0.0137,
      "step": 1489380
    },
    {
      "epoch": 2.4374357665141426,
      "grad_norm": 0.3548237681388855,
      "learning_rate": 5.158899072896556e-06,
      "loss": 0.0214,
      "step": 1489400
    },
    {
      "epoch": 2.437468496952796,
      "grad_norm": 0.16017645597457886,
      "learning_rate": 5.158833180683039e-06,
      "loss": 0.0096,
      "step": 1489420
    },
    {
      "epoch": 2.4375012273914494,
      "grad_norm": 0.25024574995040894,
      "learning_rate": 5.158767288469522e-06,
      "loss": 0.0117,
      "step": 1489440
    },
    {
      "epoch": 2.437533957830103,
      "grad_norm": 0.24705670773983002,
      "learning_rate": 5.158701396256005e-06,
      "loss": 0.0121,
      "step": 1489460
    },
    {
      "epoch": 2.437566688268756,
      "grad_norm": 0.6163108944892883,
      "learning_rate": 5.158635504042487e-06,
      "loss": 0.0101,
      "step": 1489480
    },
    {
      "epoch": 2.4375994187074097,
      "grad_norm": 0.15275602042675018,
      "learning_rate": 5.158569611828971e-06,
      "loss": 0.0129,
      "step": 1489500
    },
    {
      "epoch": 2.437632149146063,
      "grad_norm": 0.08575623482465744,
      "learning_rate": 5.158503719615453e-06,
      "loss": 0.0088,
      "step": 1489520
    },
    {
      "epoch": 2.437664879584716,
      "grad_norm": 1.1343625783920288,
      "learning_rate": 5.158437827401936e-06,
      "loss": 0.0175,
      "step": 1489540
    },
    {
      "epoch": 2.4376976100233696,
      "grad_norm": 0.1283254325389862,
      "learning_rate": 5.158371935188419e-06,
      "loss": 0.0187,
      "step": 1489560
    },
    {
      "epoch": 2.4377303404620227,
      "grad_norm": 0.19391155242919922,
      "learning_rate": 5.158306042974902e-06,
      "loss": 0.0125,
      "step": 1489580
    },
    {
      "epoch": 2.4377630709006763,
      "grad_norm": 1.0156137943267822,
      "learning_rate": 5.1582401507613845e-06,
      "loss": 0.0144,
      "step": 1489600
    },
    {
      "epoch": 2.4377958013393295,
      "grad_norm": 0.2447054386138916,
      "learning_rate": 5.158174258547868e-06,
      "loss": 0.0138,
      "step": 1489620
    },
    {
      "epoch": 2.437828531777983,
      "grad_norm": 0.15836308896541595,
      "learning_rate": 5.158108366334351e-06,
      "loss": 0.0108,
      "step": 1489640
    },
    {
      "epoch": 2.4378612622166362,
      "grad_norm": 0.6466314792633057,
      "learning_rate": 5.158042474120834e-06,
      "loss": 0.0311,
      "step": 1489660
    },
    {
      "epoch": 2.4378939926552894,
      "grad_norm": 0.3767293691635132,
      "learning_rate": 5.157976581907317e-06,
      "loss": 0.0106,
      "step": 1489680
    },
    {
      "epoch": 2.437926723093943,
      "grad_norm": 1.4136788845062256,
      "learning_rate": 5.157910689693799e-06,
      "loss": 0.0116,
      "step": 1489700
    },
    {
      "epoch": 2.437959453532596,
      "grad_norm": 0.29952529072761536,
      "learning_rate": 5.157844797480283e-06,
      "loss": 0.0147,
      "step": 1489720
    },
    {
      "epoch": 2.4379921839712497,
      "grad_norm": 0.14237120747566223,
      "learning_rate": 5.1577789052667646e-06,
      "loss": 0.0143,
      "step": 1489740
    },
    {
      "epoch": 2.438024914409903,
      "grad_norm": 0.11852652579545975,
      "learning_rate": 5.157713013053248e-06,
      "loss": 0.0128,
      "step": 1489760
    },
    {
      "epoch": 2.4380576448485565,
      "grad_norm": 0.35580265522003174,
      "learning_rate": 5.157647120839731e-06,
      "loss": 0.0076,
      "step": 1489780
    },
    {
      "epoch": 2.4380903752872096,
      "grad_norm": 0.41956284642219543,
      "learning_rate": 5.157581228626214e-06,
      "loss": 0.0181,
      "step": 1489800
    },
    {
      "epoch": 2.4381231057258628,
      "grad_norm": 0.8287136554718018,
      "learning_rate": 5.157515336412696e-06,
      "loss": 0.0106,
      "step": 1489820
    },
    {
      "epoch": 2.4381558361645164,
      "grad_norm": 0.3408622145652771,
      "learning_rate": 5.15744944419918e-06,
      "loss": 0.0178,
      "step": 1489840
    },
    {
      "epoch": 2.4381885666031695,
      "grad_norm": 0.43399861454963684,
      "learning_rate": 5.157383551985662e-06,
      "loss": 0.0171,
      "step": 1489860
    },
    {
      "epoch": 2.438221297041823,
      "grad_norm": 0.22281070053577423,
      "learning_rate": 5.1573176597721454e-06,
      "loss": 0.0189,
      "step": 1489880
    },
    {
      "epoch": 2.4382540274804763,
      "grad_norm": 0.774450957775116,
      "learning_rate": 5.157251767558627e-06,
      "loss": 0.0143,
      "step": 1489900
    },
    {
      "epoch": 2.43828675791913,
      "grad_norm": 0.13849589228630066,
      "learning_rate": 5.157185875345111e-06,
      "loss": 0.0151,
      "step": 1489920
    },
    {
      "epoch": 2.438319488357783,
      "grad_norm": 0.9440091848373413,
      "learning_rate": 5.157119983131593e-06,
      "loss": 0.0168,
      "step": 1489940
    },
    {
      "epoch": 2.438352218796436,
      "grad_norm": 0.20811504125595093,
      "learning_rate": 5.157054090918076e-06,
      "loss": 0.0125,
      "step": 1489960
    },
    {
      "epoch": 2.4383849492350897,
      "grad_norm": 0.6690834760665894,
      "learning_rate": 5.15698819870456e-06,
      "loss": 0.0158,
      "step": 1489980
    },
    {
      "epoch": 2.438417679673743,
      "grad_norm": 0.14682252705097198,
      "learning_rate": 5.156922306491043e-06,
      "loss": 0.0106,
      "step": 1490000
    },
    {
      "epoch": 2.4384504101123965,
      "grad_norm": 0.08904123306274414,
      "learning_rate": 5.1568564142775255e-06,
      "loss": 0.014,
      "step": 1490020
    },
    {
      "epoch": 2.4384831405510496,
      "grad_norm": 0.2941240668296814,
      "learning_rate": 5.156790522064008e-06,
      "loss": 0.0138,
      "step": 1490040
    },
    {
      "epoch": 2.4385158709897032,
      "grad_norm": 0.7348445057868958,
      "learning_rate": 5.156724629850492e-06,
      "loss": 0.0118,
      "step": 1490060
    },
    {
      "epoch": 2.4385486014283564,
      "grad_norm": 0.5286845564842224,
      "learning_rate": 5.156658737636974e-06,
      "loss": 0.015,
      "step": 1490080
    },
    {
      "epoch": 2.4385813318670095,
      "grad_norm": 0.7523344159126282,
      "learning_rate": 5.156592845423457e-06,
      "loss": 0.0155,
      "step": 1490100
    },
    {
      "epoch": 2.438614062305663,
      "grad_norm": 0.26048195362091064,
      "learning_rate": 5.156526953209939e-06,
      "loss": 0.0126,
      "step": 1490120
    },
    {
      "epoch": 2.4386467927443163,
      "grad_norm": 0.6565274596214294,
      "learning_rate": 5.156461060996423e-06,
      "loss": 0.0143,
      "step": 1490140
    },
    {
      "epoch": 2.43867952318297,
      "grad_norm": 0.8780037760734558,
      "learning_rate": 5.156395168782905e-06,
      "loss": 0.0108,
      "step": 1490160
    },
    {
      "epoch": 2.438712253621623,
      "grad_norm": 0.13259080052375793,
      "learning_rate": 5.156329276569388e-06,
      "loss": 0.0124,
      "step": 1490180
    },
    {
      "epoch": 2.4387449840602766,
      "grad_norm": 0.434272438287735,
      "learning_rate": 5.156263384355871e-06,
      "loss": 0.0144,
      "step": 1490200
    },
    {
      "epoch": 2.4387777144989298,
      "grad_norm": 0.6185617446899414,
      "learning_rate": 5.156197492142354e-06,
      "loss": 0.014,
      "step": 1490220
    },
    {
      "epoch": 2.438810444937583,
      "grad_norm": 0.2095453441143036,
      "learning_rate": 5.1561315999288364e-06,
      "loss": 0.0234,
      "step": 1490240
    },
    {
      "epoch": 2.4388431753762365,
      "grad_norm": 0.20084550976753235,
      "learning_rate": 5.15606570771532e-06,
      "loss": 0.0079,
      "step": 1490260
    },
    {
      "epoch": 2.4388759058148897,
      "grad_norm": 0.39768093824386597,
      "learning_rate": 5.155999815501802e-06,
      "loss": 0.0116,
      "step": 1490280
    },
    {
      "epoch": 2.4389086362535433,
      "grad_norm": 0.34259817004203796,
      "learning_rate": 5.1559339232882855e-06,
      "loss": 0.008,
      "step": 1490300
    },
    {
      "epoch": 2.4389413666921964,
      "grad_norm": 0.46823832392692566,
      "learning_rate": 5.155868031074769e-06,
      "loss": 0.007,
      "step": 1490320
    },
    {
      "epoch": 2.4389740971308496,
      "grad_norm": 0.9120238423347473,
      "learning_rate": 5.155802138861251e-06,
      "loss": 0.0207,
      "step": 1490340
    },
    {
      "epoch": 2.439006827569503,
      "grad_norm": 0.27383285760879517,
      "learning_rate": 5.155736246647735e-06,
      "loss": 0.0165,
      "step": 1490360
    },
    {
      "epoch": 2.4390395580081563,
      "grad_norm": 0.10151198506355286,
      "learning_rate": 5.1556703544342165e-06,
      "loss": 0.0143,
      "step": 1490380
    },
    {
      "epoch": 2.43907228844681,
      "grad_norm": 0.21440964937210083,
      "learning_rate": 5.1556044622207e-06,
      "loss": 0.0124,
      "step": 1490400
    },
    {
      "epoch": 2.439105018885463,
      "grad_norm": 0.334797203540802,
      "learning_rate": 5.155538570007183e-06,
      "loss": 0.0128,
      "step": 1490420
    },
    {
      "epoch": 2.439137749324116,
      "grad_norm": 0.5484409928321838,
      "learning_rate": 5.1554726777936655e-06,
      "loss": 0.0166,
      "step": 1490440
    },
    {
      "epoch": 2.43917047976277,
      "grad_norm": 0.5321869254112244,
      "learning_rate": 5.155406785580148e-06,
      "loss": 0.0178,
      "step": 1490460
    },
    {
      "epoch": 2.439203210201423,
      "grad_norm": 0.1079699918627739,
      "learning_rate": 5.155340893366632e-06,
      "loss": 0.0133,
      "step": 1490480
    },
    {
      "epoch": 2.4392359406400765,
      "grad_norm": 0.9490692019462585,
      "learning_rate": 5.155275001153114e-06,
      "loss": 0.0171,
      "step": 1490500
    },
    {
      "epoch": 2.4392686710787297,
      "grad_norm": 0.5518378019332886,
      "learning_rate": 5.155209108939597e-06,
      "loss": 0.018,
      "step": 1490520
    },
    {
      "epoch": 2.4393014015173833,
      "grad_norm": 0.3077843189239502,
      "learning_rate": 5.155143216726079e-06,
      "loss": 0.0117,
      "step": 1490540
    },
    {
      "epoch": 2.4393341319560364,
      "grad_norm": 0.29437026381492615,
      "learning_rate": 5.155077324512563e-06,
      "loss": 0.0161,
      "step": 1490560
    },
    {
      "epoch": 2.4393668623946896,
      "grad_norm": 0.6399028301239014,
      "learning_rate": 5.1550114322990456e-06,
      "loss": 0.0121,
      "step": 1490580
    },
    {
      "epoch": 2.439399592833343,
      "grad_norm": 0.4318409562110901,
      "learning_rate": 5.154945540085528e-06,
      "loss": 0.0204,
      "step": 1490600
    },
    {
      "epoch": 2.4394323232719963,
      "grad_norm": 0.2251548171043396,
      "learning_rate": 5.154879647872011e-06,
      "loss": 0.0137,
      "step": 1490620
    },
    {
      "epoch": 2.43946505371065,
      "grad_norm": 0.32145190238952637,
      "learning_rate": 5.154813755658495e-06,
      "loss": 0.0095,
      "step": 1490640
    },
    {
      "epoch": 2.439497784149303,
      "grad_norm": 0.37037792801856995,
      "learning_rate": 5.1547478634449765e-06,
      "loss": 0.0114,
      "step": 1490660
    },
    {
      "epoch": 2.4395305145879567,
      "grad_norm": 0.11440690606832504,
      "learning_rate": 5.15468197123146e-06,
      "loss": 0.0163,
      "step": 1490680
    },
    {
      "epoch": 2.43956324502661,
      "grad_norm": 0.20841097831726074,
      "learning_rate": 5.154616079017944e-06,
      "loss": 0.0108,
      "step": 1490700
    },
    {
      "epoch": 2.439595975465263,
      "grad_norm": 0.48511311411857605,
      "learning_rate": 5.154550186804426e-06,
      "loss": 0.0173,
      "step": 1490720
    },
    {
      "epoch": 2.4396287059039166,
      "grad_norm": 0.9288069009780884,
      "learning_rate": 5.154484294590909e-06,
      "loss": 0.0158,
      "step": 1490740
    },
    {
      "epoch": 2.4396614363425697,
      "grad_norm": 0.31761422753334045,
      "learning_rate": 5.154418402377391e-06,
      "loss": 0.0107,
      "step": 1490760
    },
    {
      "epoch": 2.4396941667812233,
      "grad_norm": 0.41603735089302063,
      "learning_rate": 5.154352510163875e-06,
      "loss": 0.0176,
      "step": 1490780
    },
    {
      "epoch": 2.4397268972198765,
      "grad_norm": 0.45887836813926697,
      "learning_rate": 5.154286617950357e-06,
      "loss": 0.0183,
      "step": 1490800
    },
    {
      "epoch": 2.43975962765853,
      "grad_norm": 0.11303813755512238,
      "learning_rate": 5.15422072573684e-06,
      "loss": 0.0158,
      "step": 1490820
    },
    {
      "epoch": 2.439792358097183,
      "grad_norm": 0.487932413816452,
      "learning_rate": 5.154154833523323e-06,
      "loss": 0.02,
      "step": 1490840
    },
    {
      "epoch": 2.4398250885358364,
      "grad_norm": 0.7954711318016052,
      "learning_rate": 5.1540889413098065e-06,
      "loss": 0.0142,
      "step": 1490860
    },
    {
      "epoch": 2.43985781897449,
      "grad_norm": 1.341274380683899,
      "learning_rate": 5.154023049096288e-06,
      "loss": 0.019,
      "step": 1490880
    },
    {
      "epoch": 2.439890549413143,
      "grad_norm": 0.42940762639045715,
      "learning_rate": 5.153957156882772e-06,
      "loss": 0.0171,
      "step": 1490900
    },
    {
      "epoch": 2.4399232798517967,
      "grad_norm": 0.11991791427135468,
      "learning_rate": 5.153891264669254e-06,
      "loss": 0.0131,
      "step": 1490920
    },
    {
      "epoch": 2.43995601029045,
      "grad_norm": 0.2496625930070877,
      "learning_rate": 5.153825372455737e-06,
      "loss": 0.0084,
      "step": 1490940
    },
    {
      "epoch": 2.4399887407291034,
      "grad_norm": 0.381258100271225,
      "learning_rate": 5.153759480242219e-06,
      "loss": 0.0122,
      "step": 1490960
    },
    {
      "epoch": 2.4400214711677566,
      "grad_norm": 0.2631864845752716,
      "learning_rate": 5.153693588028703e-06,
      "loss": 0.0079,
      "step": 1490980
    },
    {
      "epoch": 2.4400542016064097,
      "grad_norm": 0.5758000612258911,
      "learning_rate": 5.153627695815186e-06,
      "loss": 0.01,
      "step": 1491000
    },
    {
      "epoch": 2.4400869320450633,
      "grad_norm": 0.04901571571826935,
      "learning_rate": 5.153561803601668e-06,
      "loss": 0.0116,
      "step": 1491020
    },
    {
      "epoch": 2.4401196624837165,
      "grad_norm": 0.9135250449180603,
      "learning_rate": 5.153495911388152e-06,
      "loss": 0.0228,
      "step": 1491040
    },
    {
      "epoch": 2.44015239292237,
      "grad_norm": 0.596542239189148,
      "learning_rate": 5.153430019174635e-06,
      "loss": 0.0126,
      "step": 1491060
    },
    {
      "epoch": 2.4401851233610232,
      "grad_norm": 0.4387468099594116,
      "learning_rate": 5.153364126961118e-06,
      "loss": 0.0104,
      "step": 1491080
    },
    {
      "epoch": 2.440217853799677,
      "grad_norm": 0.48159918189048767,
      "learning_rate": 5.1532982347476e-06,
      "loss": 0.0219,
      "step": 1491100
    },
    {
      "epoch": 2.44025058423833,
      "grad_norm": 0.24911275506019592,
      "learning_rate": 5.153232342534084e-06,
      "loss": 0.0119,
      "step": 1491120
    },
    {
      "epoch": 2.440283314676983,
      "grad_norm": 0.1286356896162033,
      "learning_rate": 5.153166450320566e-06,
      "loss": 0.0107,
      "step": 1491140
    },
    {
      "epoch": 2.4403160451156367,
      "grad_norm": 0.40857067704200745,
      "learning_rate": 5.153100558107049e-06,
      "loss": 0.022,
      "step": 1491160
    },
    {
      "epoch": 2.44034877555429,
      "grad_norm": 0.6033499240875244,
      "learning_rate": 5.153034665893531e-06,
      "loss": 0.0264,
      "step": 1491180
    },
    {
      "epoch": 2.4403815059929435,
      "grad_norm": 0.8082786202430725,
      "learning_rate": 5.152968773680015e-06,
      "loss": 0.0139,
      "step": 1491200
    },
    {
      "epoch": 2.4404142364315966,
      "grad_norm": 0.637123167514801,
      "learning_rate": 5.1529028814664975e-06,
      "loss": 0.0149,
      "step": 1491220
    },
    {
      "epoch": 2.44044696687025,
      "grad_norm": 0.5430625677108765,
      "learning_rate": 5.15283698925298e-06,
      "loss": 0.0151,
      "step": 1491240
    },
    {
      "epoch": 2.4404796973089034,
      "grad_norm": 0.1267150640487671,
      "learning_rate": 5.152771097039463e-06,
      "loss": 0.0146,
      "step": 1491260
    },
    {
      "epoch": 2.4405124277475565,
      "grad_norm": 0.8307974934577942,
      "learning_rate": 5.1527052048259465e-06,
      "loss": 0.0213,
      "step": 1491280
    },
    {
      "epoch": 2.44054515818621,
      "grad_norm": 0.6766114234924316,
      "learning_rate": 5.1526393126124284e-06,
      "loss": 0.015,
      "step": 1491300
    },
    {
      "epoch": 2.4405778886248632,
      "grad_norm": 0.4147113263607025,
      "learning_rate": 5.152573420398912e-06,
      "loss": 0.0116,
      "step": 1491320
    },
    {
      "epoch": 2.440610619063517,
      "grad_norm": 0.523842453956604,
      "learning_rate": 5.152507528185394e-06,
      "loss": 0.0102,
      "step": 1491340
    },
    {
      "epoch": 2.44064334950217,
      "grad_norm": 0.29241830110549927,
      "learning_rate": 5.1524416359718775e-06,
      "loss": 0.0157,
      "step": 1491360
    },
    {
      "epoch": 2.4406760799408236,
      "grad_norm": 0.16892962157726288,
      "learning_rate": 5.152375743758361e-06,
      "loss": 0.0157,
      "step": 1491380
    },
    {
      "epoch": 2.4407088103794767,
      "grad_norm": 0.21946464478969574,
      "learning_rate": 5.152309851544843e-06,
      "loss": 0.0138,
      "step": 1491400
    },
    {
      "epoch": 2.44074154081813,
      "grad_norm": 0.19766847789287567,
      "learning_rate": 5.1522439593313266e-06,
      "loss": 0.0124,
      "step": 1491420
    },
    {
      "epoch": 2.4407742712567835,
      "grad_norm": 0.30489349365234375,
      "learning_rate": 5.152178067117809e-06,
      "loss": 0.0111,
      "step": 1491440
    },
    {
      "epoch": 2.4408070016954366,
      "grad_norm": 0.07361624389886856,
      "learning_rate": 5.152112174904292e-06,
      "loss": 0.0179,
      "step": 1491460
    },
    {
      "epoch": 2.4408397321340902,
      "grad_norm": 3.6741302013397217,
      "learning_rate": 5.152046282690775e-06,
      "loss": 0.0162,
      "step": 1491480
    },
    {
      "epoch": 2.4408724625727434,
      "grad_norm": 0.3647472858428955,
      "learning_rate": 5.151980390477258e-06,
      "loss": 0.0191,
      "step": 1491500
    },
    {
      "epoch": 2.440905193011397,
      "grad_norm": 0.13863322138786316,
      "learning_rate": 5.15191449826374e-06,
      "loss": 0.0149,
      "step": 1491520
    },
    {
      "epoch": 2.44093792345005,
      "grad_norm": 0.19857990741729736,
      "learning_rate": 5.151848606050224e-06,
      "loss": 0.0136,
      "step": 1491540
    },
    {
      "epoch": 2.4409706538887033,
      "grad_norm": 0.2018008977174759,
      "learning_rate": 5.151782713836706e-06,
      "loss": 0.0143,
      "step": 1491560
    },
    {
      "epoch": 2.441003384327357,
      "grad_norm": 0.45889222621917725,
      "learning_rate": 5.151716821623189e-06,
      "loss": 0.0144,
      "step": 1491580
    },
    {
      "epoch": 2.44103611476601,
      "grad_norm": 0.5358948707580566,
      "learning_rate": 5.151650929409672e-06,
      "loss": 0.0191,
      "step": 1491600
    },
    {
      "epoch": 2.4410688452046636,
      "grad_norm": 0.4783703088760376,
      "learning_rate": 5.151585037196155e-06,
      "loss": 0.0141,
      "step": 1491620
    },
    {
      "epoch": 2.4411015756433168,
      "grad_norm": 0.5760481357574463,
      "learning_rate": 5.1515191449826375e-06,
      "loss": 0.013,
      "step": 1491640
    },
    {
      "epoch": 2.4411343060819704,
      "grad_norm": 0.252814918756485,
      "learning_rate": 5.151453252769121e-06,
      "loss": 0.0126,
      "step": 1491660
    },
    {
      "epoch": 2.4411670365206235,
      "grad_norm": 0.35984140634536743,
      "learning_rate": 5.151387360555603e-06,
      "loss": 0.0184,
      "step": 1491680
    },
    {
      "epoch": 2.4411997669592767,
      "grad_norm": 0.22134391963481903,
      "learning_rate": 5.151321468342087e-06,
      "loss": 0.0168,
      "step": 1491700
    },
    {
      "epoch": 2.4412324973979302,
      "grad_norm": 0.35167157649993896,
      "learning_rate": 5.15125557612857e-06,
      "loss": 0.0149,
      "step": 1491720
    },
    {
      "epoch": 2.4412652278365834,
      "grad_norm": 0.37851041555404663,
      "learning_rate": 5.151189683915052e-06,
      "loss": 0.0144,
      "step": 1491740
    },
    {
      "epoch": 2.441297958275237,
      "grad_norm": 2.2470474243164062,
      "learning_rate": 5.151123791701536e-06,
      "loss": 0.0162,
      "step": 1491760
    },
    {
      "epoch": 2.44133068871389,
      "grad_norm": 0.1792762279510498,
      "learning_rate": 5.1510578994880176e-06,
      "loss": 0.0121,
      "step": 1491780
    },
    {
      "epoch": 2.4413634191525433,
      "grad_norm": 0.14271247386932373,
      "learning_rate": 5.150992007274501e-06,
      "loss": 0.0152,
      "step": 1491800
    },
    {
      "epoch": 2.441396149591197,
      "grad_norm": 0.994089663028717,
      "learning_rate": 5.150926115060984e-06,
      "loss": 0.0207,
      "step": 1491820
    },
    {
      "epoch": 2.44142888002985,
      "grad_norm": 0.6457288265228271,
      "learning_rate": 5.150860222847467e-06,
      "loss": 0.0105,
      "step": 1491840
    },
    {
      "epoch": 2.4414616104685036,
      "grad_norm": 0.6272084712982178,
      "learning_rate": 5.150794330633949e-06,
      "loss": 0.0147,
      "step": 1491860
    },
    {
      "epoch": 2.441494340907157,
      "grad_norm": 0.632638692855835,
      "learning_rate": 5.150728438420433e-06,
      "loss": 0.0099,
      "step": 1491880
    },
    {
      "epoch": 2.44152707134581,
      "grad_norm": 0.3422519564628601,
      "learning_rate": 5.150662546206915e-06,
      "loss": 0.0249,
      "step": 1491900
    },
    {
      "epoch": 2.4415598017844635,
      "grad_norm": 0.2975444793701172,
      "learning_rate": 5.1505966539933984e-06,
      "loss": 0.015,
      "step": 1491920
    },
    {
      "epoch": 2.4415925322231167,
      "grad_norm": 0.31726324558258057,
      "learning_rate": 5.15053076177988e-06,
      "loss": 0.0127,
      "step": 1491940
    },
    {
      "epoch": 2.4416252626617703,
      "grad_norm": 0.16210059821605682,
      "learning_rate": 5.150464869566364e-06,
      "loss": 0.0213,
      "step": 1491960
    },
    {
      "epoch": 2.4416579931004234,
      "grad_norm": 0.9752234816551208,
      "learning_rate": 5.150398977352846e-06,
      "loss": 0.0182,
      "step": 1491980
    },
    {
      "epoch": 2.441690723539077,
      "grad_norm": 1.4275147914886475,
      "learning_rate": 5.150333085139329e-06,
      "loss": 0.0116,
      "step": 1492000
    },
    {
      "epoch": 2.44172345397773,
      "grad_norm": 0.3517929017543793,
      "learning_rate": 5.150267192925812e-06,
      "loss": 0.0141,
      "step": 1492020
    },
    {
      "epoch": 2.4417561844163833,
      "grad_norm": 0.42947742342948914,
      "learning_rate": 5.150201300712295e-06,
      "loss": 0.0162,
      "step": 1492040
    },
    {
      "epoch": 2.441788914855037,
      "grad_norm": 0.48154911398887634,
      "learning_rate": 5.150135408498778e-06,
      "loss": 0.0176,
      "step": 1492060
    },
    {
      "epoch": 2.44182164529369,
      "grad_norm": 1.4423027038574219,
      "learning_rate": 5.150069516285261e-06,
      "loss": 0.0159,
      "step": 1492080
    },
    {
      "epoch": 2.4418543757323437,
      "grad_norm": 0.936923086643219,
      "learning_rate": 5.150003624071744e-06,
      "loss": 0.0171,
      "step": 1492100
    },
    {
      "epoch": 2.441887106170997,
      "grad_norm": 0.47719842195510864,
      "learning_rate": 5.149937731858227e-06,
      "loss": 0.01,
      "step": 1492120
    },
    {
      "epoch": 2.4419198366096504,
      "grad_norm": 0.46711623668670654,
      "learning_rate": 5.14987183964471e-06,
      "loss": 0.0206,
      "step": 1492140
    },
    {
      "epoch": 2.4419525670483035,
      "grad_norm": 0.22836904227733612,
      "learning_rate": 5.149805947431192e-06,
      "loss": 0.0085,
      "step": 1492160
    },
    {
      "epoch": 2.4419852974869567,
      "grad_norm": 0.8740230798721313,
      "learning_rate": 5.149740055217676e-06,
      "loss": 0.0126,
      "step": 1492180
    },
    {
      "epoch": 2.4420180279256103,
      "grad_norm": 0.6331733465194702,
      "learning_rate": 5.149674163004158e-06,
      "loss": 0.0152,
      "step": 1492200
    },
    {
      "epoch": 2.4420507583642634,
      "grad_norm": 0.5238543152809143,
      "learning_rate": 5.149608270790641e-06,
      "loss": 0.02,
      "step": 1492220
    },
    {
      "epoch": 2.442083488802917,
      "grad_norm": 0.5020239949226379,
      "learning_rate": 5.149542378577124e-06,
      "loss": 0.014,
      "step": 1492240
    },
    {
      "epoch": 2.44211621924157,
      "grad_norm": 1.1286051273345947,
      "learning_rate": 5.149476486363607e-06,
      "loss": 0.0174,
      "step": 1492260
    },
    {
      "epoch": 2.442148949680224,
      "grad_norm": 0.22538918256759644,
      "learning_rate": 5.1494105941500895e-06,
      "loss": 0.0137,
      "step": 1492280
    },
    {
      "epoch": 2.442181680118877,
      "grad_norm": 0.7308260798454285,
      "learning_rate": 5.149344701936573e-06,
      "loss": 0.0092,
      "step": 1492300
    },
    {
      "epoch": 2.44221441055753,
      "grad_norm": 0.1634342521429062,
      "learning_rate": 5.149278809723055e-06,
      "loss": 0.012,
      "step": 1492320
    },
    {
      "epoch": 2.4422471409961837,
      "grad_norm": 0.2111034095287323,
      "learning_rate": 5.1492129175095385e-06,
      "loss": 0.0143,
      "step": 1492340
    },
    {
      "epoch": 2.442279871434837,
      "grad_norm": 0.4741699695587158,
      "learning_rate": 5.14914702529602e-06,
      "loss": 0.0142,
      "step": 1492360
    },
    {
      "epoch": 2.4423126018734904,
      "grad_norm": 0.5197768211364746,
      "learning_rate": 5.149081133082504e-06,
      "loss": 0.021,
      "step": 1492380
    },
    {
      "epoch": 2.4423453323121436,
      "grad_norm": 0.28702512383461,
      "learning_rate": 5.149015240868987e-06,
      "loss": 0.0089,
      "step": 1492400
    },
    {
      "epoch": 2.442378062750797,
      "grad_norm": 0.20790904760360718,
      "learning_rate": 5.1489493486554695e-06,
      "loss": 0.011,
      "step": 1492420
    },
    {
      "epoch": 2.4424107931894503,
      "grad_norm": 0.5528913736343384,
      "learning_rate": 5.148883456441953e-06,
      "loss": 0.0172,
      "step": 1492440
    },
    {
      "epoch": 2.4424435236281035,
      "grad_norm": 0.3211497962474823,
      "learning_rate": 5.148817564228436e-06,
      "loss": 0.0115,
      "step": 1492460
    },
    {
      "epoch": 2.442476254066757,
      "grad_norm": 0.12539641559123993,
      "learning_rate": 5.1487516720149185e-06,
      "loss": 0.0155,
      "step": 1492480
    },
    {
      "epoch": 2.44250898450541,
      "grad_norm": 0.5928796529769897,
      "learning_rate": 5.148685779801401e-06,
      "loss": 0.0117,
      "step": 1492500
    },
    {
      "epoch": 2.442541714944064,
      "grad_norm": 0.6165732741355896,
      "learning_rate": 5.148619887587885e-06,
      "loss": 0.0175,
      "step": 1492520
    },
    {
      "epoch": 2.442574445382717,
      "grad_norm": 0.9395636916160583,
      "learning_rate": 5.148553995374367e-06,
      "loss": 0.0152,
      "step": 1492540
    },
    {
      "epoch": 2.4426071758213705,
      "grad_norm": 0.1894787698984146,
      "learning_rate": 5.14848810316085e-06,
      "loss": 0.0163,
      "step": 1492560
    },
    {
      "epoch": 2.4426399062600237,
      "grad_norm": 0.7014516592025757,
      "learning_rate": 5.148422210947332e-06,
      "loss": 0.0149,
      "step": 1492580
    },
    {
      "epoch": 2.442672636698677,
      "grad_norm": 0.5580081939697266,
      "learning_rate": 5.148356318733816e-06,
      "loss": 0.0154,
      "step": 1492600
    },
    {
      "epoch": 2.4427053671373304,
      "grad_norm": 0.3636332154273987,
      "learning_rate": 5.1482904265202986e-06,
      "loss": 0.0111,
      "step": 1492620
    },
    {
      "epoch": 2.4427380975759836,
      "grad_norm": 0.2093697041273117,
      "learning_rate": 5.148224534306781e-06,
      "loss": 0.0093,
      "step": 1492640
    },
    {
      "epoch": 2.442770828014637,
      "grad_norm": 0.14580197632312775,
      "learning_rate": 5.148158642093264e-06,
      "loss": 0.0187,
      "step": 1492660
    },
    {
      "epoch": 2.4428035584532903,
      "grad_norm": 0.18438886106014252,
      "learning_rate": 5.148092749879748e-06,
      "loss": 0.0119,
      "step": 1492680
    },
    {
      "epoch": 2.442836288891944,
      "grad_norm": 0.7276734709739685,
      "learning_rate": 5.1480268576662295e-06,
      "loss": 0.0181,
      "step": 1492700
    },
    {
      "epoch": 2.442869019330597,
      "grad_norm": 0.3773728609085083,
      "learning_rate": 5.147960965452713e-06,
      "loss": 0.0152,
      "step": 1492720
    },
    {
      "epoch": 2.4429017497692502,
      "grad_norm": 0.2661834955215454,
      "learning_rate": 5.147895073239195e-06,
      "loss": 0.0151,
      "step": 1492740
    },
    {
      "epoch": 2.442934480207904,
      "grad_norm": 0.2153998166322708,
      "learning_rate": 5.147829181025679e-06,
      "loss": 0.0176,
      "step": 1492760
    },
    {
      "epoch": 2.442967210646557,
      "grad_norm": 0.25708410143852234,
      "learning_rate": 5.147763288812162e-06,
      "loss": 0.0182,
      "step": 1492780
    },
    {
      "epoch": 2.4429999410852106,
      "grad_norm": 0.5375261902809143,
      "learning_rate": 5.147697396598644e-06,
      "loss": 0.0119,
      "step": 1492800
    },
    {
      "epoch": 2.4430326715238637,
      "grad_norm": 0.6795161962509155,
      "learning_rate": 5.147631504385128e-06,
      "loss": 0.0155,
      "step": 1492820
    },
    {
      "epoch": 2.4430654019625173,
      "grad_norm": 0.6372402310371399,
      "learning_rate": 5.14756561217161e-06,
      "loss": 0.0169,
      "step": 1492840
    },
    {
      "epoch": 2.4430981324011705,
      "grad_norm": 0.22193127870559692,
      "learning_rate": 5.147499719958093e-06,
      "loss": 0.0126,
      "step": 1492860
    },
    {
      "epoch": 2.4431308628398236,
      "grad_norm": 0.42734989523887634,
      "learning_rate": 5.147433827744576e-06,
      "loss": 0.0143,
      "step": 1492880
    },
    {
      "epoch": 2.443163593278477,
      "grad_norm": 0.6356545686721802,
      "learning_rate": 5.1473679355310595e-06,
      "loss": 0.0113,
      "step": 1492900
    },
    {
      "epoch": 2.4431963237171304,
      "grad_norm": 0.944042444229126,
      "learning_rate": 5.147302043317541e-06,
      "loss": 0.0222,
      "step": 1492920
    },
    {
      "epoch": 2.443229054155784,
      "grad_norm": 1.122652530670166,
      "learning_rate": 5.147236151104025e-06,
      "loss": 0.0201,
      "step": 1492940
    },
    {
      "epoch": 2.443261784594437,
      "grad_norm": 0.1312752217054367,
      "learning_rate": 5.147170258890507e-06,
      "loss": 0.0146,
      "step": 1492960
    },
    {
      "epoch": 2.4432945150330907,
      "grad_norm": 0.14229002594947815,
      "learning_rate": 5.1471043666769904e-06,
      "loss": 0.0129,
      "step": 1492980
    },
    {
      "epoch": 2.443327245471744,
      "grad_norm": 1.2440255880355835,
      "learning_rate": 5.147038474463472e-06,
      "loss": 0.0112,
      "step": 1493000
    },
    {
      "epoch": 2.443359975910397,
      "grad_norm": 0.5081597566604614,
      "learning_rate": 5.146972582249956e-06,
      "loss": 0.0153,
      "step": 1493020
    },
    {
      "epoch": 2.4433927063490506,
      "grad_norm": 0.39123985171318054,
      "learning_rate": 5.146906690036439e-06,
      "loss": 0.0161,
      "step": 1493040
    },
    {
      "epoch": 2.4434254367877037,
      "grad_norm": 0.1742190420627594,
      "learning_rate": 5.146840797822921e-06,
      "loss": 0.0178,
      "step": 1493060
    },
    {
      "epoch": 2.4434581672263573,
      "grad_norm": 0.6288725733757019,
      "learning_rate": 5.146774905609404e-06,
      "loss": 0.0161,
      "step": 1493080
    },
    {
      "epoch": 2.4434908976650105,
      "grad_norm": 0.7410351037979126,
      "learning_rate": 5.146709013395888e-06,
      "loss": 0.023,
      "step": 1493100
    },
    {
      "epoch": 2.443523628103664,
      "grad_norm": 0.2957409620285034,
      "learning_rate": 5.14664312118237e-06,
      "loss": 0.0139,
      "step": 1493120
    },
    {
      "epoch": 2.4435563585423172,
      "grad_norm": 0.4933311641216278,
      "learning_rate": 5.146577228968853e-06,
      "loss": 0.015,
      "step": 1493140
    },
    {
      "epoch": 2.4435890889809704,
      "grad_norm": 0.521786630153656,
      "learning_rate": 5.146511336755337e-06,
      "loss": 0.0171,
      "step": 1493160
    },
    {
      "epoch": 2.443621819419624,
      "grad_norm": 0.7147776484489441,
      "learning_rate": 5.146445444541819e-06,
      "loss": 0.0136,
      "step": 1493180
    },
    {
      "epoch": 2.443654549858277,
      "grad_norm": 0.34686651825904846,
      "learning_rate": 5.146379552328302e-06,
      "loss": 0.0186,
      "step": 1493200
    },
    {
      "epoch": 2.4436872802969307,
      "grad_norm": 0.41595813632011414,
      "learning_rate": 5.146313660114784e-06,
      "loss": 0.0171,
      "step": 1493220
    },
    {
      "epoch": 2.443720010735584,
      "grad_norm": 0.34191229939460754,
      "learning_rate": 5.146247767901268e-06,
      "loss": 0.0142,
      "step": 1493240
    },
    {
      "epoch": 2.4437527411742375,
      "grad_norm": 0.16472791135311127,
      "learning_rate": 5.1461818756877505e-06,
      "loss": 0.0205,
      "step": 1493260
    },
    {
      "epoch": 2.4437854716128906,
      "grad_norm": 1.0521936416625977,
      "learning_rate": 5.146115983474233e-06,
      "loss": 0.0183,
      "step": 1493280
    },
    {
      "epoch": 2.4438182020515438,
      "grad_norm": 1.0821539163589478,
      "learning_rate": 5.146050091260716e-06,
      "loss": 0.0161,
      "step": 1493300
    },
    {
      "epoch": 2.4438509324901974,
      "grad_norm": 0.5074871778488159,
      "learning_rate": 5.1459841990471995e-06,
      "loss": 0.0086,
      "step": 1493320
    },
    {
      "epoch": 2.4438836629288505,
      "grad_norm": 0.17924055457115173,
      "learning_rate": 5.1459183068336814e-06,
      "loss": 0.0148,
      "step": 1493340
    },
    {
      "epoch": 2.443916393367504,
      "grad_norm": 0.1400061547756195,
      "learning_rate": 5.145852414620165e-06,
      "loss": 0.0099,
      "step": 1493360
    },
    {
      "epoch": 2.4439491238061573,
      "grad_norm": 0.30743834376335144,
      "learning_rate": 5.145786522406647e-06,
      "loss": 0.0151,
      "step": 1493380
    },
    {
      "epoch": 2.4439818542448104,
      "grad_norm": 0.8680092096328735,
      "learning_rate": 5.1457206301931305e-06,
      "loss": 0.0292,
      "step": 1493400
    },
    {
      "epoch": 2.444014584683464,
      "grad_norm": 0.4329290986061096,
      "learning_rate": 5.145654737979613e-06,
      "loss": 0.0132,
      "step": 1493420
    },
    {
      "epoch": 2.444047315122117,
      "grad_norm": 0.6882312893867493,
      "learning_rate": 5.145588845766096e-06,
      "loss": 0.0111,
      "step": 1493440
    },
    {
      "epoch": 2.4440800455607707,
      "grad_norm": 0.2520589530467987,
      "learning_rate": 5.145522953552579e-06,
      "loss": 0.015,
      "step": 1493460
    },
    {
      "epoch": 2.444112775999424,
      "grad_norm": 0.38919901847839355,
      "learning_rate": 5.145457061339062e-06,
      "loss": 0.0192,
      "step": 1493480
    },
    {
      "epoch": 2.444145506438077,
      "grad_norm": 0.18831662833690643,
      "learning_rate": 5.145391169125545e-06,
      "loss": 0.0133,
      "step": 1493500
    },
    {
      "epoch": 2.4441782368767306,
      "grad_norm": 0.4752328395843506,
      "learning_rate": 5.145325276912028e-06,
      "loss": 0.015,
      "step": 1493520
    },
    {
      "epoch": 2.444210967315384,
      "grad_norm": 0.08751612901687622,
      "learning_rate": 5.145259384698511e-06,
      "loss": 0.0108,
      "step": 1493540
    },
    {
      "epoch": 2.4442436977540374,
      "grad_norm": 0.3330245614051819,
      "learning_rate": 5.145193492484993e-06,
      "loss": 0.0098,
      "step": 1493560
    },
    {
      "epoch": 2.4442764281926905,
      "grad_norm": 1.2094533443450928,
      "learning_rate": 5.145127600271477e-06,
      "loss": 0.0129,
      "step": 1493580
    },
    {
      "epoch": 2.444309158631344,
      "grad_norm": 0.19189107418060303,
      "learning_rate": 5.145061708057959e-06,
      "loss": 0.0121,
      "step": 1493600
    },
    {
      "epoch": 2.4443418890699973,
      "grad_norm": 0.6082063317298889,
      "learning_rate": 5.144995815844442e-06,
      "loss": 0.0192,
      "step": 1493620
    },
    {
      "epoch": 2.4443746195086504,
      "grad_norm": 0.3222035765647888,
      "learning_rate": 5.144929923630925e-06,
      "loss": 0.0143,
      "step": 1493640
    },
    {
      "epoch": 2.444407349947304,
      "grad_norm": 0.16647329926490784,
      "learning_rate": 5.144864031417408e-06,
      "loss": 0.0175,
      "step": 1493660
    },
    {
      "epoch": 2.444440080385957,
      "grad_norm": 0.5479477643966675,
      "learning_rate": 5.1447981392038906e-06,
      "loss": 0.0139,
      "step": 1493680
    },
    {
      "epoch": 2.4444728108246108,
      "grad_norm": 0.27502501010894775,
      "learning_rate": 5.144732246990374e-06,
      "loss": 0.0125,
      "step": 1493700
    },
    {
      "epoch": 2.444505541263264,
      "grad_norm": 0.08508536964654922,
      "learning_rate": 5.144666354776856e-06,
      "loss": 0.015,
      "step": 1493720
    },
    {
      "epoch": 2.4445382717019175,
      "grad_norm": 0.2861385941505432,
      "learning_rate": 5.14460046256334e-06,
      "loss": 0.0125,
      "step": 1493740
    },
    {
      "epoch": 2.4445710021405707,
      "grad_norm": 0.4246416389942169,
      "learning_rate": 5.1445345703498215e-06,
      "loss": 0.0137,
      "step": 1493760
    },
    {
      "epoch": 2.444603732579224,
      "grad_norm": 0.16539953649044037,
      "learning_rate": 5.144468678136305e-06,
      "loss": 0.0144,
      "step": 1493780
    },
    {
      "epoch": 2.4446364630178774,
      "grad_norm": 0.5310280919075012,
      "learning_rate": 5.144402785922787e-06,
      "loss": 0.0114,
      "step": 1493800
    },
    {
      "epoch": 2.4446691934565306,
      "grad_norm": 0.1071045771241188,
      "learning_rate": 5.144336893709271e-06,
      "loss": 0.0176,
      "step": 1493820
    },
    {
      "epoch": 2.444701923895184,
      "grad_norm": 0.8313111662864685,
      "learning_rate": 5.144271001495754e-06,
      "loss": 0.0126,
      "step": 1493840
    },
    {
      "epoch": 2.4447346543338373,
      "grad_norm": 0.3436235785484314,
      "learning_rate": 5.144205109282237e-06,
      "loss": 0.0114,
      "step": 1493860
    },
    {
      "epoch": 2.444767384772491,
      "grad_norm": 0.5364526510238647,
      "learning_rate": 5.14413921706872e-06,
      "loss": 0.0105,
      "step": 1493880
    },
    {
      "epoch": 2.444800115211144,
      "grad_norm": 0.31893739104270935,
      "learning_rate": 5.144073324855202e-06,
      "loss": 0.0137,
      "step": 1493900
    },
    {
      "epoch": 2.444832845649797,
      "grad_norm": 0.14241249859333038,
      "learning_rate": 5.144007432641686e-06,
      "loss": 0.0134,
      "step": 1493920
    },
    {
      "epoch": 2.444865576088451,
      "grad_norm": 0.24884405732154846,
      "learning_rate": 5.143941540428168e-06,
      "loss": 0.0122,
      "step": 1493940
    },
    {
      "epoch": 2.444898306527104,
      "grad_norm": 0.40078601241111755,
      "learning_rate": 5.1438756482146515e-06,
      "loss": 0.0162,
      "step": 1493960
    },
    {
      "epoch": 2.4449310369657575,
      "grad_norm": 0.24720405042171478,
      "learning_rate": 5.143809756001133e-06,
      "loss": 0.0137,
      "step": 1493980
    },
    {
      "epoch": 2.4449637674044107,
      "grad_norm": 0.3080403506755829,
      "learning_rate": 5.143743863787617e-06,
      "loss": 0.0109,
      "step": 1494000
    },
    {
      "epoch": 2.4449964978430643,
      "grad_norm": 0.3666505813598633,
      "learning_rate": 5.143677971574099e-06,
      "loss": 0.0144,
      "step": 1494020
    },
    {
      "epoch": 2.4450292282817174,
      "grad_norm": 0.2528131604194641,
      "learning_rate": 5.143612079360582e-06,
      "loss": 0.0142,
      "step": 1494040
    },
    {
      "epoch": 2.4450619587203706,
      "grad_norm": 0.4819508492946625,
      "learning_rate": 5.143546187147065e-06,
      "loss": 0.0139,
      "step": 1494060
    },
    {
      "epoch": 2.445094689159024,
      "grad_norm": 0.5010833144187927,
      "learning_rate": 5.143480294933548e-06,
      "loss": 0.0177,
      "step": 1494080
    },
    {
      "epoch": 2.4451274195976773,
      "grad_norm": 0.3182169497013092,
      "learning_rate": 5.143414402720031e-06,
      "loss": 0.0093,
      "step": 1494100
    },
    {
      "epoch": 2.445160150036331,
      "grad_norm": 0.18750764429569244,
      "learning_rate": 5.143348510506514e-06,
      "loss": 0.0158,
      "step": 1494120
    },
    {
      "epoch": 2.445192880474984,
      "grad_norm": 0.2510863244533539,
      "learning_rate": 5.143282618292996e-06,
      "loss": 0.0081,
      "step": 1494140
    },
    {
      "epoch": 2.4452256109136377,
      "grad_norm": 0.18305090069770813,
      "learning_rate": 5.14321672607948e-06,
      "loss": 0.0182,
      "step": 1494160
    },
    {
      "epoch": 2.445258341352291,
      "grad_norm": 0.13957317173480988,
      "learning_rate": 5.143150833865963e-06,
      "loss": 0.0188,
      "step": 1494180
    },
    {
      "epoch": 2.445291071790944,
      "grad_norm": 0.25171422958374023,
      "learning_rate": 5.143084941652445e-06,
      "loss": 0.0111,
      "step": 1494200
    },
    {
      "epoch": 2.4453238022295976,
      "grad_norm": 0.5417683720588684,
      "learning_rate": 5.143019049438929e-06,
      "loss": 0.0155,
      "step": 1494220
    },
    {
      "epoch": 2.4453565326682507,
      "grad_norm": 0.09803388267755508,
      "learning_rate": 5.142953157225411e-06,
      "loss": 0.0105,
      "step": 1494240
    },
    {
      "epoch": 2.4453892631069043,
      "grad_norm": 0.24504734575748444,
      "learning_rate": 5.142887265011894e-06,
      "loss": 0.0139,
      "step": 1494260
    },
    {
      "epoch": 2.4454219935455574,
      "grad_norm": 0.8337125778198242,
      "learning_rate": 5.142821372798377e-06,
      "loss": 0.0153,
      "step": 1494280
    },
    {
      "epoch": 2.445454723984211,
      "grad_norm": 0.7392222285270691,
      "learning_rate": 5.14275548058486e-06,
      "loss": 0.0149,
      "step": 1494300
    },
    {
      "epoch": 2.445487454422864,
      "grad_norm": 1.0908362865447998,
      "learning_rate": 5.1426895883713425e-06,
      "loss": 0.0147,
      "step": 1494320
    },
    {
      "epoch": 2.4455201848615173,
      "grad_norm": 0.4717675745487213,
      "learning_rate": 5.142623696157826e-06,
      "loss": 0.0127,
      "step": 1494340
    },
    {
      "epoch": 2.445552915300171,
      "grad_norm": 0.3950386941432953,
      "learning_rate": 5.142557803944308e-06,
      "loss": 0.0128,
      "step": 1494360
    },
    {
      "epoch": 2.445585645738824,
      "grad_norm": 0.2622344195842743,
      "learning_rate": 5.1424919117307915e-06,
      "loss": 0.0116,
      "step": 1494380
    },
    {
      "epoch": 2.4456183761774777,
      "grad_norm": 0.30647993087768555,
      "learning_rate": 5.142426019517273e-06,
      "loss": 0.0129,
      "step": 1494400
    },
    {
      "epoch": 2.445651106616131,
      "grad_norm": 0.6143352389335632,
      "learning_rate": 5.142360127303757e-06,
      "loss": 0.0156,
      "step": 1494420
    },
    {
      "epoch": 2.4456838370547844,
      "grad_norm": 0.5374702215194702,
      "learning_rate": 5.14229423509024e-06,
      "loss": 0.0218,
      "step": 1494440
    },
    {
      "epoch": 2.4457165674934376,
      "grad_norm": 0.8244271278381348,
      "learning_rate": 5.1422283428767225e-06,
      "loss": 0.0158,
      "step": 1494460
    },
    {
      "epoch": 2.4457492979320907,
      "grad_norm": 0.332466185092926,
      "learning_rate": 5.142162450663205e-06,
      "loss": 0.013,
      "step": 1494480
    },
    {
      "epoch": 2.4457820283707443,
      "grad_norm": 0.09352093935012817,
      "learning_rate": 5.142096558449689e-06,
      "loss": 0.0163,
      "step": 1494500
    },
    {
      "epoch": 2.4458147588093975,
      "grad_norm": 0.6830285787582397,
      "learning_rate": 5.142030666236171e-06,
      "loss": 0.0141,
      "step": 1494520
    },
    {
      "epoch": 2.445847489248051,
      "grad_norm": 0.6860032677650452,
      "learning_rate": 5.141964774022654e-06,
      "loss": 0.0134,
      "step": 1494540
    },
    {
      "epoch": 2.445880219686704,
      "grad_norm": 0.3621273934841156,
      "learning_rate": 5.141898881809138e-06,
      "loss": 0.0205,
      "step": 1494560
    },
    {
      "epoch": 2.445912950125358,
      "grad_norm": 0.25166380405426025,
      "learning_rate": 5.14183298959562e-06,
      "loss": 0.017,
      "step": 1494580
    },
    {
      "epoch": 2.445945680564011,
      "grad_norm": 0.2713354527950287,
      "learning_rate": 5.141767097382103e-06,
      "loss": 0.0198,
      "step": 1494600
    },
    {
      "epoch": 2.445978411002664,
      "grad_norm": 0.40032801032066345,
      "learning_rate": 5.141701205168585e-06,
      "loss": 0.0117,
      "step": 1494620
    },
    {
      "epoch": 2.4460111414413177,
      "grad_norm": 0.1861802488565445,
      "learning_rate": 5.141635312955069e-06,
      "loss": 0.0108,
      "step": 1494640
    },
    {
      "epoch": 2.446043871879971,
      "grad_norm": 0.4878086745738983,
      "learning_rate": 5.141569420741552e-06,
      "loss": 0.0112,
      "step": 1494660
    },
    {
      "epoch": 2.4460766023186244,
      "grad_norm": 0.3376760482788086,
      "learning_rate": 5.141503528528034e-06,
      "loss": 0.014,
      "step": 1494680
    },
    {
      "epoch": 2.4461093327572776,
      "grad_norm": 1.4567233324050903,
      "learning_rate": 5.141437636314517e-06,
      "loss": 0.0187,
      "step": 1494700
    },
    {
      "epoch": 2.446142063195931,
      "grad_norm": 1.7128797769546509,
      "learning_rate": 5.141371744101001e-06,
      "loss": 0.0111,
      "step": 1494720
    },
    {
      "epoch": 2.4461747936345843,
      "grad_norm": 0.8534159064292908,
      "learning_rate": 5.1413058518874825e-06,
      "loss": 0.0189,
      "step": 1494740
    },
    {
      "epoch": 2.4462075240732375,
      "grad_norm": 0.1635148674249649,
      "learning_rate": 5.141239959673966e-06,
      "loss": 0.0163,
      "step": 1494760
    },
    {
      "epoch": 2.446240254511891,
      "grad_norm": 0.34660768508911133,
      "learning_rate": 5.141174067460448e-06,
      "loss": 0.0131,
      "step": 1494780
    },
    {
      "epoch": 2.4462729849505442,
      "grad_norm": 0.6036393642425537,
      "learning_rate": 5.141108175246932e-06,
      "loss": 0.0104,
      "step": 1494800
    },
    {
      "epoch": 2.446305715389198,
      "grad_norm": 0.9721769094467163,
      "learning_rate": 5.1410422830334135e-06,
      "loss": 0.0098,
      "step": 1494820
    },
    {
      "epoch": 2.446338445827851,
      "grad_norm": 0.4628864526748657,
      "learning_rate": 5.140976390819897e-06,
      "loss": 0.017,
      "step": 1494840
    },
    {
      "epoch": 2.446371176266504,
      "grad_norm": 0.2687743902206421,
      "learning_rate": 5.14091049860638e-06,
      "loss": 0.0156,
      "step": 1494860
    },
    {
      "epoch": 2.4464039067051577,
      "grad_norm": 0.24085116386413574,
      "learning_rate": 5.1408446063928626e-06,
      "loss": 0.0163,
      "step": 1494880
    },
    {
      "epoch": 2.446436637143811,
      "grad_norm": 0.5108046531677246,
      "learning_rate": 5.140778714179346e-06,
      "loss": 0.0189,
      "step": 1494900
    },
    {
      "epoch": 2.4464693675824645,
      "grad_norm": 0.1589953899383545,
      "learning_rate": 5.140712821965829e-06,
      "loss": 0.0198,
      "step": 1494920
    },
    {
      "epoch": 2.4465020980211176,
      "grad_norm": 0.3042386472225189,
      "learning_rate": 5.1406469297523125e-06,
      "loss": 0.0127,
      "step": 1494940
    },
    {
      "epoch": 2.4465348284597708,
      "grad_norm": 0.39233097434043884,
      "learning_rate": 5.140581037538794e-06,
      "loss": 0.0146,
      "step": 1494960
    },
    {
      "epoch": 2.4465675588984244,
      "grad_norm": 0.23401296138763428,
      "learning_rate": 5.140515145325278e-06,
      "loss": 0.0117,
      "step": 1494980
    },
    {
      "epoch": 2.4466002893370775,
      "grad_norm": 0.46965262293815613,
      "learning_rate": 5.14044925311176e-06,
      "loss": 0.0195,
      "step": 1495000
    },
    {
      "epoch": 2.446633019775731,
      "grad_norm": 0.29135215282440186,
      "learning_rate": 5.1403833608982434e-06,
      "loss": 0.0201,
      "step": 1495020
    },
    {
      "epoch": 2.4466657502143843,
      "grad_norm": 0.3925963342189789,
      "learning_rate": 5.140317468684725e-06,
      "loss": 0.0163,
      "step": 1495040
    },
    {
      "epoch": 2.446698480653038,
      "grad_norm": 1.3461713790893555,
      "learning_rate": 5.140251576471209e-06,
      "loss": 0.0157,
      "step": 1495060
    },
    {
      "epoch": 2.446731211091691,
      "grad_norm": 0.5350527763366699,
      "learning_rate": 5.140185684257692e-06,
      "loss": 0.0123,
      "step": 1495080
    },
    {
      "epoch": 2.446763941530344,
      "grad_norm": 0.6334243416786194,
      "learning_rate": 5.140119792044174e-06,
      "loss": 0.0106,
      "step": 1495100
    },
    {
      "epoch": 2.4467966719689977,
      "grad_norm": 0.15206918120384216,
      "learning_rate": 5.140053899830657e-06,
      "loss": 0.0181,
      "step": 1495120
    },
    {
      "epoch": 2.446829402407651,
      "grad_norm": 0.30072468519210815,
      "learning_rate": 5.139988007617141e-06,
      "loss": 0.014,
      "step": 1495140
    },
    {
      "epoch": 2.4468621328463045,
      "grad_norm": 0.3654279410839081,
      "learning_rate": 5.139922115403623e-06,
      "loss": 0.0197,
      "step": 1495160
    },
    {
      "epoch": 2.4468948632849576,
      "grad_norm": 0.39150941371917725,
      "learning_rate": 5.139856223190106e-06,
      "loss": 0.0166,
      "step": 1495180
    },
    {
      "epoch": 2.4469275937236112,
      "grad_norm": 0.40719184279441833,
      "learning_rate": 5.139790330976588e-06,
      "loss": 0.0199,
      "step": 1495200
    },
    {
      "epoch": 2.4469603241622644,
      "grad_norm": 0.43830257654190063,
      "learning_rate": 5.139724438763072e-06,
      "loss": 0.0203,
      "step": 1495220
    },
    {
      "epoch": 2.4469930546009175,
      "grad_norm": 0.3601563274860382,
      "learning_rate": 5.139658546549555e-06,
      "loss": 0.0152,
      "step": 1495240
    },
    {
      "epoch": 2.447025785039571,
      "grad_norm": 0.29872840642929077,
      "learning_rate": 5.139592654336037e-06,
      "loss": 0.0092,
      "step": 1495260
    },
    {
      "epoch": 2.4470585154782243,
      "grad_norm": 0.3226511478424072,
      "learning_rate": 5.139526762122521e-06,
      "loss": 0.0138,
      "step": 1495280
    },
    {
      "epoch": 2.447091245916878,
      "grad_norm": 0.8658353686332703,
      "learning_rate": 5.1394608699090035e-06,
      "loss": 0.0153,
      "step": 1495300
    },
    {
      "epoch": 2.447123976355531,
      "grad_norm": 0.7672200798988342,
      "learning_rate": 5.139394977695486e-06,
      "loss": 0.0163,
      "step": 1495320
    },
    {
      "epoch": 2.4471567067941846,
      "grad_norm": 0.30697211623191833,
      "learning_rate": 5.139329085481969e-06,
      "loss": 0.0166,
      "step": 1495340
    },
    {
      "epoch": 2.4471894372328378,
      "grad_norm": 0.6385360956192017,
      "learning_rate": 5.1392631932684526e-06,
      "loss": 0.0254,
      "step": 1495360
    },
    {
      "epoch": 2.447222167671491,
      "grad_norm": 0.11452937871217728,
      "learning_rate": 5.1391973010549344e-06,
      "loss": 0.0216,
      "step": 1495380
    },
    {
      "epoch": 2.4472548981101445,
      "grad_norm": 0.27926793694496155,
      "learning_rate": 5.139131408841418e-06,
      "loss": 0.0153,
      "step": 1495400
    },
    {
      "epoch": 2.4472876285487977,
      "grad_norm": 0.3144124150276184,
      "learning_rate": 5.1390655166279e-06,
      "loss": 0.0135,
      "step": 1495420
    },
    {
      "epoch": 2.4473203589874513,
      "grad_norm": 0.580793559551239,
      "learning_rate": 5.1389996244143835e-06,
      "loss": 0.0143,
      "step": 1495440
    },
    {
      "epoch": 2.4473530894261044,
      "grad_norm": 0.460907906293869,
      "learning_rate": 5.138933732200866e-06,
      "loss": 0.0161,
      "step": 1495460
    },
    {
      "epoch": 2.447385819864758,
      "grad_norm": 0.5327714681625366,
      "learning_rate": 5.138867839987349e-06,
      "loss": 0.0111,
      "step": 1495480
    },
    {
      "epoch": 2.447418550303411,
      "grad_norm": 0.4586624205112457,
      "learning_rate": 5.138801947773832e-06,
      "loss": 0.01,
      "step": 1495500
    },
    {
      "epoch": 2.4474512807420643,
      "grad_norm": 0.21716874837875366,
      "learning_rate": 5.138736055560315e-06,
      "loss": 0.0138,
      "step": 1495520
    },
    {
      "epoch": 2.447484011180718,
      "grad_norm": 0.2766529619693756,
      "learning_rate": 5.138670163346797e-06,
      "loss": 0.0145,
      "step": 1495540
    },
    {
      "epoch": 2.447516741619371,
      "grad_norm": 0.17699101567268372,
      "learning_rate": 5.138604271133281e-06,
      "loss": 0.0183,
      "step": 1495560
    },
    {
      "epoch": 2.4475494720580246,
      "grad_norm": 0.14212679862976074,
      "learning_rate": 5.138538378919763e-06,
      "loss": 0.0097,
      "step": 1495580
    },
    {
      "epoch": 2.447582202496678,
      "grad_norm": 0.3440067172050476,
      "learning_rate": 5.138472486706246e-06,
      "loss": 0.0171,
      "step": 1495600
    },
    {
      "epoch": 2.4476149329353314,
      "grad_norm": 0.7373339533805847,
      "learning_rate": 5.13840659449273e-06,
      "loss": 0.0144,
      "step": 1495620
    },
    {
      "epoch": 2.4476476633739845,
      "grad_norm": 0.17376983165740967,
      "learning_rate": 5.138340702279212e-06,
      "loss": 0.0137,
      "step": 1495640
    },
    {
      "epoch": 2.4476803938126377,
      "grad_norm": 0.0913035050034523,
      "learning_rate": 5.138274810065695e-06,
      "loss": 0.0155,
      "step": 1495660
    },
    {
      "epoch": 2.4477131242512913,
      "grad_norm": 0.20049971342086792,
      "learning_rate": 5.138208917852178e-06,
      "loss": 0.0184,
      "step": 1495680
    },
    {
      "epoch": 2.4477458546899444,
      "grad_norm": 0.616918683052063,
      "learning_rate": 5.138143025638661e-06,
      "loss": 0.0156,
      "step": 1495700
    },
    {
      "epoch": 2.447778585128598,
      "grad_norm": 0.241843581199646,
      "learning_rate": 5.1380771334251436e-06,
      "loss": 0.0189,
      "step": 1495720
    },
    {
      "epoch": 2.447811315567251,
      "grad_norm": 0.5421412587165833,
      "learning_rate": 5.138011241211627e-06,
      "loss": 0.0153,
      "step": 1495740
    },
    {
      "epoch": 2.4478440460059048,
      "grad_norm": 0.2824784517288208,
      "learning_rate": 5.137945348998109e-06,
      "loss": 0.0104,
      "step": 1495760
    },
    {
      "epoch": 2.447876776444558,
      "grad_norm": 0.5020825266838074,
      "learning_rate": 5.137879456784593e-06,
      "loss": 0.0192,
      "step": 1495780
    },
    {
      "epoch": 2.447909506883211,
      "grad_norm": 0.25710585713386536,
      "learning_rate": 5.1378135645710745e-06,
      "loss": 0.0147,
      "step": 1495800
    },
    {
      "epoch": 2.4479422373218647,
      "grad_norm": 0.6951392292976379,
      "learning_rate": 5.137747672357558e-06,
      "loss": 0.0118,
      "step": 1495820
    },
    {
      "epoch": 2.447974967760518,
      "grad_norm": 0.3896334171295166,
      "learning_rate": 5.13768178014404e-06,
      "loss": 0.0137,
      "step": 1495840
    },
    {
      "epoch": 2.4480076981991714,
      "grad_norm": 0.4718007743358612,
      "learning_rate": 5.137615887930524e-06,
      "loss": 0.0192,
      "step": 1495860
    },
    {
      "epoch": 2.4480404286378246,
      "grad_norm": 0.6606428027153015,
      "learning_rate": 5.137549995717006e-06,
      "loss": 0.0128,
      "step": 1495880
    },
    {
      "epoch": 2.448073159076478,
      "grad_norm": 0.5058054327964783,
      "learning_rate": 5.137484103503489e-06,
      "loss": 0.0128,
      "step": 1495900
    },
    {
      "epoch": 2.4481058895151313,
      "grad_norm": 1.2379626035690308,
      "learning_rate": 5.137418211289972e-06,
      "loss": 0.0193,
      "step": 1495920
    },
    {
      "epoch": 2.4481386199537845,
      "grad_norm": 0.5920532941818237,
      "learning_rate": 5.137352319076455e-06,
      "loss": 0.0136,
      "step": 1495940
    },
    {
      "epoch": 2.448171350392438,
      "grad_norm": 0.1756584644317627,
      "learning_rate": 5.137286426862938e-06,
      "loss": 0.014,
      "step": 1495960
    },
    {
      "epoch": 2.448204080831091,
      "grad_norm": 0.48600512742996216,
      "learning_rate": 5.137220534649421e-06,
      "loss": 0.0179,
      "step": 1495980
    },
    {
      "epoch": 2.448236811269745,
      "grad_norm": 0.3086017072200775,
      "learning_rate": 5.1371546424359045e-06,
      "loss": 0.0101,
      "step": 1496000
    },
    {
      "epoch": 2.448269541708398,
      "grad_norm": 0.1179853305220604,
      "learning_rate": 5.137088750222386e-06,
      "loss": 0.0104,
      "step": 1496020
    },
    {
      "epoch": 2.4483022721470515,
      "grad_norm": 0.4166989028453827,
      "learning_rate": 5.13702285800887e-06,
      "loss": 0.0105,
      "step": 1496040
    },
    {
      "epoch": 2.4483350025857047,
      "grad_norm": 0.22643595933914185,
      "learning_rate": 5.136956965795352e-06,
      "loss": 0.0079,
      "step": 1496060
    },
    {
      "epoch": 2.448367733024358,
      "grad_norm": 0.41857093572616577,
      "learning_rate": 5.136891073581835e-06,
      "loss": 0.0215,
      "step": 1496080
    },
    {
      "epoch": 2.4484004634630114,
      "grad_norm": 0.1687130182981491,
      "learning_rate": 5.136825181368318e-06,
      "loss": 0.0118,
      "step": 1496100
    },
    {
      "epoch": 2.4484331939016646,
      "grad_norm": 0.5550920367240906,
      "learning_rate": 5.136759289154801e-06,
      "loss": 0.0155,
      "step": 1496120
    },
    {
      "epoch": 2.448465924340318,
      "grad_norm": 0.3928936719894409,
      "learning_rate": 5.136693396941284e-06,
      "loss": 0.0229,
      "step": 1496140
    },
    {
      "epoch": 2.4484986547789713,
      "grad_norm": 0.20230582356452942,
      "learning_rate": 5.136627504727767e-06,
      "loss": 0.0115,
      "step": 1496160
    },
    {
      "epoch": 2.448531385217625,
      "grad_norm": 0.35685375332832336,
      "learning_rate": 5.136561612514249e-06,
      "loss": 0.0118,
      "step": 1496180
    },
    {
      "epoch": 2.448564115656278,
      "grad_norm": 0.5984632968902588,
      "learning_rate": 5.136495720300733e-06,
      "loss": 0.0244,
      "step": 1496200
    },
    {
      "epoch": 2.4485968460949312,
      "grad_norm": 0.2673373222351074,
      "learning_rate": 5.136429828087215e-06,
      "loss": 0.0144,
      "step": 1496220
    },
    {
      "epoch": 2.448629576533585,
      "grad_norm": 0.26109665632247925,
      "learning_rate": 5.136363935873698e-06,
      "loss": 0.0191,
      "step": 1496240
    },
    {
      "epoch": 2.448662306972238,
      "grad_norm": 0.46619200706481934,
      "learning_rate": 5.136298043660181e-06,
      "loss": 0.014,
      "step": 1496260
    },
    {
      "epoch": 2.4486950374108916,
      "grad_norm": 0.19200393557548523,
      "learning_rate": 5.136232151446664e-06,
      "loss": 0.0138,
      "step": 1496280
    },
    {
      "epoch": 2.4487277678495447,
      "grad_norm": 0.500738799571991,
      "learning_rate": 5.136166259233147e-06,
      "loss": 0.0133,
      "step": 1496300
    },
    {
      "epoch": 2.4487604982881983,
      "grad_norm": 0.6176453232765198,
      "learning_rate": 5.13610036701963e-06,
      "loss": 0.012,
      "step": 1496320
    },
    {
      "epoch": 2.4487932287268515,
      "grad_norm": 0.2920311391353607,
      "learning_rate": 5.136034474806113e-06,
      "loss": 0.0123,
      "step": 1496340
    },
    {
      "epoch": 2.4488259591655046,
      "grad_norm": 0.7221683859825134,
      "learning_rate": 5.1359685825925955e-06,
      "loss": 0.0201,
      "step": 1496360
    },
    {
      "epoch": 2.448858689604158,
      "grad_norm": 0.30621907114982605,
      "learning_rate": 5.135902690379079e-06,
      "loss": 0.0168,
      "step": 1496380
    },
    {
      "epoch": 2.4488914200428114,
      "grad_norm": 0.519530177116394,
      "learning_rate": 5.135836798165561e-06,
      "loss": 0.0117,
      "step": 1496400
    },
    {
      "epoch": 2.448924150481465,
      "grad_norm": 0.615001380443573,
      "learning_rate": 5.1357709059520445e-06,
      "loss": 0.0134,
      "step": 1496420
    },
    {
      "epoch": 2.448956880920118,
      "grad_norm": 0.21323303878307343,
      "learning_rate": 5.1357050137385264e-06,
      "loss": 0.0169,
      "step": 1496440
    },
    {
      "epoch": 2.4489896113587712,
      "grad_norm": 0.11436668038368225,
      "learning_rate": 5.13563912152501e-06,
      "loss": 0.0102,
      "step": 1496460
    },
    {
      "epoch": 2.449022341797425,
      "grad_norm": 0.2574775516986847,
      "learning_rate": 5.135573229311493e-06,
      "loss": 0.0185,
      "step": 1496480
    },
    {
      "epoch": 2.449055072236078,
      "grad_norm": 0.18356963992118835,
      "learning_rate": 5.1355073370979755e-06,
      "loss": 0.0133,
      "step": 1496500
    },
    {
      "epoch": 2.4490878026747316,
      "grad_norm": 0.840349555015564,
      "learning_rate": 5.135441444884458e-06,
      "loss": 0.0157,
      "step": 1496520
    },
    {
      "epoch": 2.4491205331133847,
      "grad_norm": 0.7885936498641968,
      "learning_rate": 5.135375552670942e-06,
      "loss": 0.0135,
      "step": 1496540
    },
    {
      "epoch": 2.449153263552038,
      "grad_norm": 0.5543927550315857,
      "learning_rate": 5.135309660457424e-06,
      "loss": 0.0145,
      "step": 1496560
    },
    {
      "epoch": 2.4491859939906915,
      "grad_norm": 0.0687277540564537,
      "learning_rate": 5.135243768243907e-06,
      "loss": 0.0104,
      "step": 1496580
    },
    {
      "epoch": 2.4492187244293446,
      "grad_norm": 0.5009384751319885,
      "learning_rate": 5.135177876030389e-06,
      "loss": 0.0163,
      "step": 1496600
    },
    {
      "epoch": 2.4492514548679982,
      "grad_norm": 0.2340397834777832,
      "learning_rate": 5.135111983816873e-06,
      "loss": 0.0097,
      "step": 1496620
    },
    {
      "epoch": 2.4492841853066514,
      "grad_norm": 0.4514920711517334,
      "learning_rate": 5.1350460916033555e-06,
      "loss": 0.016,
      "step": 1496640
    },
    {
      "epoch": 2.449316915745305,
      "grad_norm": 0.6197611093521118,
      "learning_rate": 5.134980199389838e-06,
      "loss": 0.0147,
      "step": 1496660
    },
    {
      "epoch": 2.449349646183958,
      "grad_norm": 0.3615049719810486,
      "learning_rate": 5.134914307176322e-06,
      "loss": 0.0101,
      "step": 1496680
    },
    {
      "epoch": 2.4493823766226113,
      "grad_norm": 0.24892689287662506,
      "learning_rate": 5.134848414962805e-06,
      "loss": 0.0118,
      "step": 1496700
    },
    {
      "epoch": 2.449415107061265,
      "grad_norm": 0.07041026651859283,
      "learning_rate": 5.134782522749287e-06,
      "loss": 0.0169,
      "step": 1496720
    },
    {
      "epoch": 2.449447837499918,
      "grad_norm": 0.37366122007369995,
      "learning_rate": 5.13471663053577e-06,
      "loss": 0.0181,
      "step": 1496740
    },
    {
      "epoch": 2.4494805679385716,
      "grad_norm": 0.30546489357948303,
      "learning_rate": 5.134650738322254e-06,
      "loss": 0.0191,
      "step": 1496760
    },
    {
      "epoch": 2.4495132983772248,
      "grad_norm": 0.17382372915744781,
      "learning_rate": 5.1345848461087355e-06,
      "loss": 0.0133,
      "step": 1496780
    },
    {
      "epoch": 2.4495460288158784,
      "grad_norm": 0.6240161657333374,
      "learning_rate": 5.134518953895219e-06,
      "loss": 0.0134,
      "step": 1496800
    },
    {
      "epoch": 2.4495787592545315,
      "grad_norm": 0.319835901260376,
      "learning_rate": 5.134453061681701e-06,
      "loss": 0.0119,
      "step": 1496820
    },
    {
      "epoch": 2.4496114896931847,
      "grad_norm": 0.6019925475120544,
      "learning_rate": 5.134387169468185e-06,
      "loss": 0.0178,
      "step": 1496840
    },
    {
      "epoch": 2.4496442201318382,
      "grad_norm": 0.9015358686447144,
      "learning_rate": 5.1343212772546665e-06,
      "loss": 0.0119,
      "step": 1496860
    },
    {
      "epoch": 2.4496769505704914,
      "grad_norm": 0.8946066498756409,
      "learning_rate": 5.13425538504115e-06,
      "loss": 0.0155,
      "step": 1496880
    },
    {
      "epoch": 2.449709681009145,
      "grad_norm": 0.49589577317237854,
      "learning_rate": 5.134189492827633e-06,
      "loss": 0.0117,
      "step": 1496900
    },
    {
      "epoch": 2.449742411447798,
      "grad_norm": 0.6255001425743103,
      "learning_rate": 5.1341236006141156e-06,
      "loss": 0.0202,
      "step": 1496920
    },
    {
      "epoch": 2.4497751418864517,
      "grad_norm": 0.10255953669548035,
      "learning_rate": 5.134057708400598e-06,
      "loss": 0.0158,
      "step": 1496940
    },
    {
      "epoch": 2.449807872325105,
      "grad_norm": 0.32978999614715576,
      "learning_rate": 5.133991816187082e-06,
      "loss": 0.0159,
      "step": 1496960
    },
    {
      "epoch": 2.449840602763758,
      "grad_norm": 0.38218751549720764,
      "learning_rate": 5.133925923973564e-06,
      "loss": 0.0181,
      "step": 1496980
    },
    {
      "epoch": 2.4498733332024116,
      "grad_norm": 0.2953377366065979,
      "learning_rate": 5.133860031760047e-06,
      "loss": 0.0137,
      "step": 1497000
    },
    {
      "epoch": 2.449906063641065,
      "grad_norm": 0.2771258056163788,
      "learning_rate": 5.133794139546531e-06,
      "loss": 0.0183,
      "step": 1497020
    },
    {
      "epoch": 2.4499387940797184,
      "grad_norm": 0.3936578929424286,
      "learning_rate": 5.133728247333013e-06,
      "loss": 0.0158,
      "step": 1497040
    },
    {
      "epoch": 2.4499715245183715,
      "grad_norm": 0.9169512987136841,
      "learning_rate": 5.1336623551194964e-06,
      "loss": 0.0106,
      "step": 1497060
    },
    {
      "epoch": 2.450004254957025,
      "grad_norm": 0.3373628258705139,
      "learning_rate": 5.133596462905978e-06,
      "loss": 0.0143,
      "step": 1497080
    },
    {
      "epoch": 2.4500369853956783,
      "grad_norm": 0.48995378613471985,
      "learning_rate": 5.133530570692462e-06,
      "loss": 0.0148,
      "step": 1497100
    },
    {
      "epoch": 2.4500697158343314,
      "grad_norm": 0.4384547173976898,
      "learning_rate": 5.133464678478945e-06,
      "loss": 0.0143,
      "step": 1497120
    },
    {
      "epoch": 2.450102446272985,
      "grad_norm": 0.5654877424240112,
      "learning_rate": 5.133398786265427e-06,
      "loss": 0.0101,
      "step": 1497140
    },
    {
      "epoch": 2.450135176711638,
      "grad_norm": 1.2642827033996582,
      "learning_rate": 5.13333289405191e-06,
      "loss": 0.011,
      "step": 1497160
    },
    {
      "epoch": 2.4501679071502918,
      "grad_norm": 0.2305188775062561,
      "learning_rate": 5.133267001838394e-06,
      "loss": 0.0162,
      "step": 1497180
    },
    {
      "epoch": 2.450200637588945,
      "grad_norm": 0.3369062542915344,
      "learning_rate": 5.133201109624876e-06,
      "loss": 0.0166,
      "step": 1497200
    },
    {
      "epoch": 2.4502333680275985,
      "grad_norm": 0.29361864924430847,
      "learning_rate": 5.133135217411359e-06,
      "loss": 0.0165,
      "step": 1497220
    },
    {
      "epoch": 2.4502660984662517,
      "grad_norm": 0.6951931118965149,
      "learning_rate": 5.133069325197841e-06,
      "loss": 0.0143,
      "step": 1497240
    },
    {
      "epoch": 2.450298828904905,
      "grad_norm": 0.9874758720397949,
      "learning_rate": 5.133003432984325e-06,
      "loss": 0.0155,
      "step": 1497260
    },
    {
      "epoch": 2.4503315593435584,
      "grad_norm": 0.07964819669723511,
      "learning_rate": 5.1329375407708074e-06,
      "loss": 0.0097,
      "step": 1497280
    },
    {
      "epoch": 2.4503642897822115,
      "grad_norm": 0.22046944499015808,
      "learning_rate": 5.13287164855729e-06,
      "loss": 0.0134,
      "step": 1497300
    },
    {
      "epoch": 2.450397020220865,
      "grad_norm": 0.5489048361778259,
      "learning_rate": 5.132805756343773e-06,
      "loss": 0.0119,
      "step": 1497320
    },
    {
      "epoch": 2.4504297506595183,
      "grad_norm": 0.13781891763210297,
      "learning_rate": 5.1327398641302565e-06,
      "loss": 0.0132,
      "step": 1497340
    },
    {
      "epoch": 2.450462481098172,
      "grad_norm": 0.32508817315101624,
      "learning_rate": 5.132673971916739e-06,
      "loss": 0.0108,
      "step": 1497360
    },
    {
      "epoch": 2.450495211536825,
      "grad_norm": 0.40370792150497437,
      "learning_rate": 5.132608079703222e-06,
      "loss": 0.0159,
      "step": 1497380
    },
    {
      "epoch": 2.450527941975478,
      "grad_norm": 0.37744125723838806,
      "learning_rate": 5.1325421874897056e-06,
      "loss": 0.0109,
      "step": 1497400
    },
    {
      "epoch": 2.450560672414132,
      "grad_norm": 0.27376043796539307,
      "learning_rate": 5.1324762952761875e-06,
      "loss": 0.0177,
      "step": 1497420
    },
    {
      "epoch": 2.450593402852785,
      "grad_norm": 1.1040546894073486,
      "learning_rate": 5.132410403062671e-06,
      "loss": 0.0184,
      "step": 1497440
    },
    {
      "epoch": 2.4506261332914385,
      "grad_norm": 0.2988087832927704,
      "learning_rate": 5.132344510849153e-06,
      "loss": 0.0131,
      "step": 1497460
    },
    {
      "epoch": 2.4506588637300917,
      "grad_norm": 0.2875538766384125,
      "learning_rate": 5.1322786186356365e-06,
      "loss": 0.0179,
      "step": 1497480
    },
    {
      "epoch": 2.4506915941687453,
      "grad_norm": 0.3641200661659241,
      "learning_rate": 5.132212726422119e-06,
      "loss": 0.0164,
      "step": 1497500
    },
    {
      "epoch": 2.4507243246073984,
      "grad_norm": 0.5171443223953247,
      "learning_rate": 5.132146834208602e-06,
      "loss": 0.015,
      "step": 1497520
    },
    {
      "epoch": 2.4507570550460516,
      "grad_norm": 0.3256833255290985,
      "learning_rate": 5.132080941995085e-06,
      "loss": 0.0129,
      "step": 1497540
    },
    {
      "epoch": 2.450789785484705,
      "grad_norm": 0.9373866319656372,
      "learning_rate": 5.132015049781568e-06,
      "loss": 0.0185,
      "step": 1497560
    },
    {
      "epoch": 2.4508225159233583,
      "grad_norm": 0.2644916772842407,
      "learning_rate": 5.13194915756805e-06,
      "loss": 0.014,
      "step": 1497580
    },
    {
      "epoch": 2.450855246362012,
      "grad_norm": 0.2186928689479828,
      "learning_rate": 5.131883265354534e-06,
      "loss": 0.0261,
      "step": 1497600
    },
    {
      "epoch": 2.450887976800665,
      "grad_norm": 0.18965019285678864,
      "learning_rate": 5.131817373141016e-06,
      "loss": 0.0169,
      "step": 1497620
    },
    {
      "epoch": 2.4509207072393187,
      "grad_norm": 0.9516364932060242,
      "learning_rate": 5.131751480927499e-06,
      "loss": 0.0167,
      "step": 1497640
    },
    {
      "epoch": 2.450953437677972,
      "grad_norm": 0.727270781993866,
      "learning_rate": 5.131685588713981e-06,
      "loss": 0.0154,
      "step": 1497660
    },
    {
      "epoch": 2.450986168116625,
      "grad_norm": 0.2878427803516388,
      "learning_rate": 5.131619696500465e-06,
      "loss": 0.0194,
      "step": 1497680
    },
    {
      "epoch": 2.4510188985552785,
      "grad_norm": 0.6203182935714722,
      "learning_rate": 5.131553804286948e-06,
      "loss": 0.0221,
      "step": 1497700
    },
    {
      "epoch": 2.4510516289939317,
      "grad_norm": 0.13339632749557495,
      "learning_rate": 5.131487912073431e-06,
      "loss": 0.0096,
      "step": 1497720
    },
    {
      "epoch": 2.4510843594325853,
      "grad_norm": 0.33348146080970764,
      "learning_rate": 5.131422019859914e-06,
      "loss": 0.0159,
      "step": 1497740
    },
    {
      "epoch": 2.4511170898712384,
      "grad_norm": 0.24343957006931305,
      "learning_rate": 5.1313561276463966e-06,
      "loss": 0.0134,
      "step": 1497760
    },
    {
      "epoch": 2.451149820309892,
      "grad_norm": 0.20180906355381012,
      "learning_rate": 5.13129023543288e-06,
      "loss": 0.0076,
      "step": 1497780
    },
    {
      "epoch": 2.451182550748545,
      "grad_norm": 0.4110727906227112,
      "learning_rate": 5.131224343219362e-06,
      "loss": 0.0117,
      "step": 1497800
    },
    {
      "epoch": 2.4512152811871983,
      "grad_norm": 0.35180702805519104,
      "learning_rate": 5.131158451005846e-06,
      "loss": 0.0126,
      "step": 1497820
    },
    {
      "epoch": 2.451248011625852,
      "grad_norm": 0.43877261877059937,
      "learning_rate": 5.1310925587923275e-06,
      "loss": 0.0172,
      "step": 1497840
    },
    {
      "epoch": 2.451280742064505,
      "grad_norm": 0.20586493611335754,
      "learning_rate": 5.131026666578811e-06,
      "loss": 0.0219,
      "step": 1497860
    },
    {
      "epoch": 2.4513134725031587,
      "grad_norm": 0.8800386190414429,
      "learning_rate": 5.130960774365293e-06,
      "loss": 0.0144,
      "step": 1497880
    },
    {
      "epoch": 2.451346202941812,
      "grad_norm": 0.7300885915756226,
      "learning_rate": 5.130894882151777e-06,
      "loss": 0.0185,
      "step": 1497900
    },
    {
      "epoch": 2.451378933380465,
      "grad_norm": 0.7520129680633545,
      "learning_rate": 5.130828989938259e-06,
      "loss": 0.013,
      "step": 1497920
    },
    {
      "epoch": 2.4514116638191186,
      "grad_norm": 0.8922497630119324,
      "learning_rate": 5.130763097724742e-06,
      "loss": 0.0201,
      "step": 1497940
    },
    {
      "epoch": 2.4514443942577717,
      "grad_norm": 1.2185192108154297,
      "learning_rate": 5.130697205511225e-06,
      "loss": 0.0154,
      "step": 1497960
    },
    {
      "epoch": 2.4514771246964253,
      "grad_norm": 0.26431354880332947,
      "learning_rate": 5.130631313297708e-06,
      "loss": 0.0145,
      "step": 1497980
    },
    {
      "epoch": 2.4515098551350785,
      "grad_norm": 0.9935945868492126,
      "learning_rate": 5.13056542108419e-06,
      "loss": 0.0194,
      "step": 1498000
    },
    {
      "epoch": 2.4515425855737316,
      "grad_norm": 0.1033656895160675,
      "learning_rate": 5.130499528870674e-06,
      "loss": 0.0127,
      "step": 1498020
    },
    {
      "epoch": 2.451575316012385,
      "grad_norm": 0.31610676646232605,
      "learning_rate": 5.130433636657156e-06,
      "loss": 0.0091,
      "step": 1498040
    },
    {
      "epoch": 2.4516080464510384,
      "grad_norm": 0.7768989205360413,
      "learning_rate": 5.130367744443639e-06,
      "loss": 0.0133,
      "step": 1498060
    },
    {
      "epoch": 2.451640776889692,
      "grad_norm": 0.09903482347726822,
      "learning_rate": 5.130301852230123e-06,
      "loss": 0.0144,
      "step": 1498080
    },
    {
      "epoch": 2.451673507328345,
      "grad_norm": 0.8579253554344177,
      "learning_rate": 5.130235960016605e-06,
      "loss": 0.0127,
      "step": 1498100
    },
    {
      "epoch": 2.4517062377669987,
      "grad_norm": 0.11393729597330093,
      "learning_rate": 5.1301700678030884e-06,
      "loss": 0.0202,
      "step": 1498120
    },
    {
      "epoch": 2.451738968205652,
      "grad_norm": 0.3810635805130005,
      "learning_rate": 5.130104175589571e-06,
      "loss": 0.0145,
      "step": 1498140
    },
    {
      "epoch": 2.451771698644305,
      "grad_norm": 0.5389824509620667,
      "learning_rate": 5.130038283376054e-06,
      "loss": 0.0133,
      "step": 1498160
    },
    {
      "epoch": 2.4518044290829586,
      "grad_norm": 0.3992505371570587,
      "learning_rate": 5.129972391162537e-06,
      "loss": 0.0178,
      "step": 1498180
    },
    {
      "epoch": 2.4518371595216117,
      "grad_norm": 0.8053773045539856,
      "learning_rate": 5.12990649894902e-06,
      "loss": 0.0192,
      "step": 1498200
    },
    {
      "epoch": 2.4518698899602653,
      "grad_norm": 0.19503474235534668,
      "learning_rate": 5.129840606735502e-06,
      "loss": 0.017,
      "step": 1498220
    },
    {
      "epoch": 2.4519026203989185,
      "grad_norm": 0.6004508137702942,
      "learning_rate": 5.129774714521986e-06,
      "loss": 0.0128,
      "step": 1498240
    },
    {
      "epoch": 2.451935350837572,
      "grad_norm": 0.3700835406780243,
      "learning_rate": 5.129708822308468e-06,
      "loss": 0.0143,
      "step": 1498260
    },
    {
      "epoch": 2.4519680812762252,
      "grad_norm": 0.17583012580871582,
      "learning_rate": 5.129642930094951e-06,
      "loss": 0.0122,
      "step": 1498280
    },
    {
      "epoch": 2.4520008117148784,
      "grad_norm": 0.2666173577308655,
      "learning_rate": 5.129577037881434e-06,
      "loss": 0.0124,
      "step": 1498300
    },
    {
      "epoch": 2.452033542153532,
      "grad_norm": 0.11612991988658905,
      "learning_rate": 5.129511145667917e-06,
      "loss": 0.0136,
      "step": 1498320
    },
    {
      "epoch": 2.452066272592185,
      "grad_norm": 0.22652791440486908,
      "learning_rate": 5.129445253454399e-06,
      "loss": 0.0088,
      "step": 1498340
    },
    {
      "epoch": 2.4520990030308387,
      "grad_norm": 0.19562341272830963,
      "learning_rate": 5.129379361240883e-06,
      "loss": 0.0186,
      "step": 1498360
    },
    {
      "epoch": 2.452131733469492,
      "grad_norm": 0.46172791719436646,
      "learning_rate": 5.129313469027365e-06,
      "loss": 0.0105,
      "step": 1498380
    },
    {
      "epoch": 2.4521644639081455,
      "grad_norm": 0.272586852312088,
      "learning_rate": 5.1292475768138485e-06,
      "loss": 0.018,
      "step": 1498400
    },
    {
      "epoch": 2.4521971943467986,
      "grad_norm": 0.22333090007305145,
      "learning_rate": 5.129181684600332e-06,
      "loss": 0.0103,
      "step": 1498420
    },
    {
      "epoch": 2.4522299247854518,
      "grad_norm": 0.5284387469291687,
      "learning_rate": 5.129115792386814e-06,
      "loss": 0.013,
      "step": 1498440
    },
    {
      "epoch": 2.4522626552241054,
      "grad_norm": 0.3114713132381439,
      "learning_rate": 5.1290499001732975e-06,
      "loss": 0.0177,
      "step": 1498460
    },
    {
      "epoch": 2.4522953856627585,
      "grad_norm": 0.1780543178319931,
      "learning_rate": 5.1289840079597794e-06,
      "loss": 0.0142,
      "step": 1498480
    },
    {
      "epoch": 2.452328116101412,
      "grad_norm": 0.5457273721694946,
      "learning_rate": 5.128918115746263e-06,
      "loss": 0.0087,
      "step": 1498500
    },
    {
      "epoch": 2.4523608465400653,
      "grad_norm": 0.641042172908783,
      "learning_rate": 5.128852223532746e-06,
      "loss": 0.0175,
      "step": 1498520
    },
    {
      "epoch": 2.452393576978719,
      "grad_norm": 1.0919592380523682,
      "learning_rate": 5.1287863313192285e-06,
      "loss": 0.0122,
      "step": 1498540
    },
    {
      "epoch": 2.452426307417372,
      "grad_norm": 0.6331178545951843,
      "learning_rate": 5.128720439105711e-06,
      "loss": 0.0187,
      "step": 1498560
    },
    {
      "epoch": 2.452459037856025,
      "grad_norm": 0.4519416093826294,
      "learning_rate": 5.128654546892195e-06,
      "loss": 0.0113,
      "step": 1498580
    },
    {
      "epoch": 2.4524917682946787,
      "grad_norm": 0.4763014316558838,
      "learning_rate": 5.128588654678677e-06,
      "loss": 0.0109,
      "step": 1498600
    },
    {
      "epoch": 2.452524498733332,
      "grad_norm": 0.38347169756889343,
      "learning_rate": 5.12852276246516e-06,
      "loss": 0.0125,
      "step": 1498620
    },
    {
      "epoch": 2.4525572291719855,
      "grad_norm": 0.31508076190948486,
      "learning_rate": 5.128456870251642e-06,
      "loss": 0.0106,
      "step": 1498640
    },
    {
      "epoch": 2.4525899596106386,
      "grad_norm": 0.24261419475078583,
      "learning_rate": 5.128390978038126e-06,
      "loss": 0.0182,
      "step": 1498660
    },
    {
      "epoch": 2.4526226900492922,
      "grad_norm": 0.3870469927787781,
      "learning_rate": 5.128325085824608e-06,
      "loss": 0.019,
      "step": 1498680
    },
    {
      "epoch": 2.4526554204879454,
      "grad_norm": 0.068589948117733,
      "learning_rate": 5.128259193611091e-06,
      "loss": 0.0184,
      "step": 1498700
    },
    {
      "epoch": 2.4526881509265985,
      "grad_norm": 0.2402164191007614,
      "learning_rate": 5.128193301397574e-06,
      "loss": 0.0099,
      "step": 1498720
    },
    {
      "epoch": 2.452720881365252,
      "grad_norm": 0.2071637362241745,
      "learning_rate": 5.128127409184057e-06,
      "loss": 0.0117,
      "step": 1498740
    },
    {
      "epoch": 2.4527536118039053,
      "grad_norm": 0.27695491909980774,
      "learning_rate": 5.12806151697054e-06,
      "loss": 0.0258,
      "step": 1498760
    },
    {
      "epoch": 2.452786342242559,
      "grad_norm": 0.19687466323375702,
      "learning_rate": 5.127995624757023e-06,
      "loss": 0.0117,
      "step": 1498780
    },
    {
      "epoch": 2.452819072681212,
      "grad_norm": 0.4222743511199951,
      "learning_rate": 5.127929732543506e-06,
      "loss": 0.0184,
      "step": 1498800
    },
    {
      "epoch": 2.4528518031198656,
      "grad_norm": 0.09110677987337112,
      "learning_rate": 5.1278638403299886e-06,
      "loss": 0.0109,
      "step": 1498820
    },
    {
      "epoch": 2.4528845335585188,
      "grad_norm": 0.3153897225856781,
      "learning_rate": 5.127797948116472e-06,
      "loss": 0.0135,
      "step": 1498840
    },
    {
      "epoch": 2.452917263997172,
      "grad_norm": 1.0441123247146606,
      "learning_rate": 5.127732055902954e-06,
      "loss": 0.0189,
      "step": 1498860
    },
    {
      "epoch": 2.4529499944358255,
      "grad_norm": 0.09934430569410324,
      "learning_rate": 5.127666163689438e-06,
      "loss": 0.0112,
      "step": 1498880
    },
    {
      "epoch": 2.4529827248744787,
      "grad_norm": 0.27191823720932007,
      "learning_rate": 5.1276002714759195e-06,
      "loss": 0.014,
      "step": 1498900
    },
    {
      "epoch": 2.4530154553131323,
      "grad_norm": 0.2246839851140976,
      "learning_rate": 5.127534379262403e-06,
      "loss": 0.0173,
      "step": 1498920
    },
    {
      "epoch": 2.4530481857517854,
      "grad_norm": 0.4304199516773224,
      "learning_rate": 5.127468487048886e-06,
      "loss": 0.0155,
      "step": 1498940
    },
    {
      "epoch": 2.453080916190439,
      "grad_norm": 0.07254146784543991,
      "learning_rate": 5.127402594835369e-06,
      "loss": 0.0172,
      "step": 1498960
    },
    {
      "epoch": 2.453113646629092,
      "grad_norm": 0.6018791198730469,
      "learning_rate": 5.127336702621851e-06,
      "loss": 0.0145,
      "step": 1498980
    },
    {
      "epoch": 2.4531463770677453,
      "grad_norm": 0.3418790400028229,
      "learning_rate": 5.127270810408335e-06,
      "loss": 0.0122,
      "step": 1499000
    },
    {
      "epoch": 2.453179107506399,
      "grad_norm": 0.1389477699995041,
      "learning_rate": 5.127204918194817e-06,
      "loss": 0.0211,
      "step": 1499020
    },
    {
      "epoch": 2.453211837945052,
      "grad_norm": 0.11106276512145996,
      "learning_rate": 5.1271390259813e-06,
      "loss": 0.0124,
      "step": 1499040
    },
    {
      "epoch": 2.4532445683837056,
      "grad_norm": 0.11426828056573868,
      "learning_rate": 5.127073133767782e-06,
      "loss": 0.0124,
      "step": 1499060
    },
    {
      "epoch": 2.453277298822359,
      "grad_norm": 0.15931864082813263,
      "learning_rate": 5.127007241554266e-06,
      "loss": 0.0104,
      "step": 1499080
    },
    {
      "epoch": 2.4533100292610124,
      "grad_norm": 0.7173444628715515,
      "learning_rate": 5.126941349340749e-06,
      "loss": 0.0146,
      "step": 1499100
    },
    {
      "epoch": 2.4533427596996655,
      "grad_norm": 1.2120156288146973,
      "learning_rate": 5.126875457127231e-06,
      "loss": 0.016,
      "step": 1499120
    },
    {
      "epoch": 2.4533754901383187,
      "grad_norm": 0.4139449894428253,
      "learning_rate": 5.126809564913715e-06,
      "loss": 0.0107,
      "step": 1499140
    },
    {
      "epoch": 2.4534082205769723,
      "grad_norm": 0.41225865483283997,
      "learning_rate": 5.126743672700198e-06,
      "loss": 0.0153,
      "step": 1499160
    },
    {
      "epoch": 2.4534409510156254,
      "grad_norm": 0.7843612432479858,
      "learning_rate": 5.12667778048668e-06,
      "loss": 0.0131,
      "step": 1499180
    },
    {
      "epoch": 2.453473681454279,
      "grad_norm": 0.2286330610513687,
      "learning_rate": 5.126611888273163e-06,
      "loss": 0.0094,
      "step": 1499200
    },
    {
      "epoch": 2.453506411892932,
      "grad_norm": 0.4001523554325104,
      "learning_rate": 5.126545996059647e-06,
      "loss": 0.0135,
      "step": 1499220
    },
    {
      "epoch": 2.4535391423315858,
      "grad_norm": 0.11247973144054413,
      "learning_rate": 5.126480103846129e-06,
      "loss": 0.0123,
      "step": 1499240
    },
    {
      "epoch": 2.453571872770239,
      "grad_norm": 0.8060680627822876,
      "learning_rate": 5.126414211632612e-06,
      "loss": 0.0226,
      "step": 1499260
    },
    {
      "epoch": 2.453604603208892,
      "grad_norm": 0.2522225081920624,
      "learning_rate": 5.126348319419094e-06,
      "loss": 0.013,
      "step": 1499280
    },
    {
      "epoch": 2.4536373336475457,
      "grad_norm": 0.23149506747722626,
      "learning_rate": 5.126282427205578e-06,
      "loss": 0.0131,
      "step": 1499300
    },
    {
      "epoch": 2.453670064086199,
      "grad_norm": 0.3581540584564209,
      "learning_rate": 5.1262165349920604e-06,
      "loss": 0.0111,
      "step": 1499320
    },
    {
      "epoch": 2.4537027945248524,
      "grad_norm": 0.21500590443611145,
      "learning_rate": 5.126150642778543e-06,
      "loss": 0.011,
      "step": 1499340
    },
    {
      "epoch": 2.4537355249635056,
      "grad_norm": 0.8559278249740601,
      "learning_rate": 5.126084750565026e-06,
      "loss": 0.0115,
      "step": 1499360
    },
    {
      "epoch": 2.453768255402159,
      "grad_norm": 0.21599577367305756,
      "learning_rate": 5.1260188583515095e-06,
      "loss": 0.0183,
      "step": 1499380
    },
    {
      "epoch": 2.4538009858408123,
      "grad_norm": 0.26722434163093567,
      "learning_rate": 5.125952966137991e-06,
      "loss": 0.0183,
      "step": 1499400
    },
    {
      "epoch": 2.4538337162794654,
      "grad_norm": 0.13947628438472748,
      "learning_rate": 5.125887073924475e-06,
      "loss": 0.0184,
      "step": 1499420
    },
    {
      "epoch": 2.453866446718119,
      "grad_norm": 0.48301324248313904,
      "learning_rate": 5.125821181710957e-06,
      "loss": 0.0174,
      "step": 1499440
    },
    {
      "epoch": 2.453899177156772,
      "grad_norm": 0.17089635133743286,
      "learning_rate": 5.1257552894974405e-06,
      "loss": 0.0104,
      "step": 1499460
    },
    {
      "epoch": 2.453931907595426,
      "grad_norm": 0.45267453789711,
      "learning_rate": 5.125689397283924e-06,
      "loss": 0.0128,
      "step": 1499480
    },
    {
      "epoch": 2.453964638034079,
      "grad_norm": 0.2843649387359619,
      "learning_rate": 5.125623505070406e-06,
      "loss": 0.0118,
      "step": 1499500
    },
    {
      "epoch": 2.453997368472732,
      "grad_norm": 0.1948103904724121,
      "learning_rate": 5.1255576128568895e-06,
      "loss": 0.0193,
      "step": 1499520
    },
    {
      "epoch": 2.4540300989113857,
      "grad_norm": 0.11896020174026489,
      "learning_rate": 5.125491720643372e-06,
      "loss": 0.0183,
      "step": 1499540
    },
    {
      "epoch": 2.454062829350039,
      "grad_norm": 0.4075024127960205,
      "learning_rate": 5.125425828429855e-06,
      "loss": 0.0139,
      "step": 1499560
    },
    {
      "epoch": 2.4540955597886924,
      "grad_norm": 2.649655342102051,
      "learning_rate": 5.125359936216338e-06,
      "loss": 0.0232,
      "step": 1499580
    },
    {
      "epoch": 2.4541282902273456,
      "grad_norm": 0.2572052478790283,
      "learning_rate": 5.125294044002821e-06,
      "loss": 0.019,
      "step": 1499600
    },
    {
      "epoch": 2.4541610206659987,
      "grad_norm": 0.9064931273460388,
      "learning_rate": 5.125228151789303e-06,
      "loss": 0.0153,
      "step": 1499620
    },
    {
      "epoch": 2.4541937511046523,
      "grad_norm": 0.23106716573238373,
      "learning_rate": 5.125162259575787e-06,
      "loss": 0.0125,
      "step": 1499640
    },
    {
      "epoch": 2.4542264815433055,
      "grad_norm": 0.386368066072464,
      "learning_rate": 5.125096367362269e-06,
      "loss": 0.0195,
      "step": 1499660
    },
    {
      "epoch": 2.454259211981959,
      "grad_norm": 0.29675644636154175,
      "learning_rate": 5.125030475148752e-06,
      "loss": 0.0118,
      "step": 1499680
    },
    {
      "epoch": 2.454291942420612,
      "grad_norm": 0.38567787408828735,
      "learning_rate": 5.124964582935234e-06,
      "loss": 0.0171,
      "step": 1499700
    },
    {
      "epoch": 2.454324672859266,
      "grad_norm": 0.4394102692604065,
      "learning_rate": 5.124898690721718e-06,
      "loss": 0.0131,
      "step": 1499720
    },
    {
      "epoch": 2.454357403297919,
      "grad_norm": 0.4795807898044586,
      "learning_rate": 5.1248327985082005e-06,
      "loss": 0.0148,
      "step": 1499740
    },
    {
      "epoch": 2.454390133736572,
      "grad_norm": 0.4091588854789734,
      "learning_rate": 5.124766906294683e-06,
      "loss": 0.0178,
      "step": 1499760
    },
    {
      "epoch": 2.4544228641752257,
      "grad_norm": 0.4282585084438324,
      "learning_rate": 5.124701014081166e-06,
      "loss": 0.0128,
      "step": 1499780
    },
    {
      "epoch": 2.454455594613879,
      "grad_norm": 0.2834189832210541,
      "learning_rate": 5.1246351218676496e-06,
      "loss": 0.0167,
      "step": 1499800
    },
    {
      "epoch": 2.4544883250525324,
      "grad_norm": 0.32669514417648315,
      "learning_rate": 5.124569229654132e-06,
      "loss": 0.0143,
      "step": 1499820
    },
    {
      "epoch": 2.4545210554911856,
      "grad_norm": 0.27982097864151,
      "learning_rate": 5.124503337440615e-06,
      "loss": 0.0212,
      "step": 1499840
    },
    {
      "epoch": 2.454553785929839,
      "grad_norm": 0.5031416416168213,
      "learning_rate": 5.124437445227099e-06,
      "loss": 0.0171,
      "step": 1499860
    },
    {
      "epoch": 2.4545865163684923,
      "grad_norm": 0.14938656985759735,
      "learning_rate": 5.1243715530135805e-06,
      "loss": 0.0099,
      "step": 1499880
    },
    {
      "epoch": 2.4546192468071455,
      "grad_norm": 0.6172202825546265,
      "learning_rate": 5.124305660800064e-06,
      "loss": 0.0128,
      "step": 1499900
    },
    {
      "epoch": 2.454651977245799,
      "grad_norm": 0.439864844083786,
      "learning_rate": 5.124239768586546e-06,
      "loss": 0.0103,
      "step": 1499920
    },
    {
      "epoch": 2.4546847076844522,
      "grad_norm": 0.42125487327575684,
      "learning_rate": 5.12417387637303e-06,
      "loss": 0.0175,
      "step": 1499940
    },
    {
      "epoch": 2.454717438123106,
      "grad_norm": 0.25609683990478516,
      "learning_rate": 5.124107984159512e-06,
      "loss": 0.0139,
      "step": 1499960
    },
    {
      "epoch": 2.454750168561759,
      "grad_norm": 0.6052302718162537,
      "learning_rate": 5.124042091945995e-06,
      "loss": 0.0205,
      "step": 1499980
    },
    {
      "epoch": 2.4547828990004126,
      "grad_norm": 0.2675916254520416,
      "learning_rate": 5.123976199732478e-06,
      "loss": 0.0116,
      "step": 1500000
    },
    {
      "epoch": 2.4547828990004126,
      "eval_loss": 0.008297108113765717,
      "eval_runtime": 6512.0097,
      "eval_samples_per_second": 157.84,
      "eval_steps_per_second": 15.784,
      "eval_sts-dev_pearson_cosine": 0.9809742243514269,
      "eval_sts-dev_spearman_cosine": 0.8932086744919954,
      "step": 1500000
    },
    {
      "epoch": 2.4548156294390657,
      "grad_norm": 0.2675994336605072,
      "learning_rate": 5.123910307518961e-06,
      "loss": 0.0142,
      "step": 1500020
    },
    {
      "epoch": 2.454848359877719,
      "grad_norm": 0.6840959191322327,
      "learning_rate": 5.123844415305443e-06,
      "loss": 0.0213,
      "step": 1500040
    },
    {
      "epoch": 2.4548810903163725,
      "grad_norm": 0.3745874762535095,
      "learning_rate": 5.123778523091927e-06,
      "loss": 0.0166,
      "step": 1500060
    },
    {
      "epoch": 2.4549138207550256,
      "grad_norm": 0.20862974226474762,
      "learning_rate": 5.123712630878409e-06,
      "loss": 0.0137,
      "step": 1500080
    },
    {
      "epoch": 2.454946551193679,
      "grad_norm": 1.2907383441925049,
      "learning_rate": 5.123646738664892e-06,
      "loss": 0.0149,
      "step": 1500100
    },
    {
      "epoch": 2.4549792816323324,
      "grad_norm": 0.29934683442115784,
      "learning_rate": 5.123580846451375e-06,
      "loss": 0.0146,
      "step": 1500120
    },
    {
      "epoch": 2.455012012070986,
      "grad_norm": 0.1313634216785431,
      "learning_rate": 5.123514954237858e-06,
      "loss": 0.0182,
      "step": 1500140
    },
    {
      "epoch": 2.455044742509639,
      "grad_norm": 0.6080235838890076,
      "learning_rate": 5.1234490620243414e-06,
      "loss": 0.0149,
      "step": 1500160
    },
    {
      "epoch": 2.4550774729482923,
      "grad_norm": 0.7028915286064148,
      "learning_rate": 5.123383169810824e-06,
      "loss": 0.0145,
      "step": 1500180
    },
    {
      "epoch": 2.455110203386946,
      "grad_norm": 0.12498107552528381,
      "learning_rate": 5.123317277597307e-06,
      "loss": 0.013,
      "step": 1500200
    },
    {
      "epoch": 2.455142933825599,
      "grad_norm": 0.6704649329185486,
      "learning_rate": 5.12325138538379e-06,
      "loss": 0.011,
      "step": 1500220
    },
    {
      "epoch": 2.4551756642642526,
      "grad_norm": 1.0973362922668457,
      "learning_rate": 5.123185493170273e-06,
      "loss": 0.0255,
      "step": 1500240
    },
    {
      "epoch": 2.4552083947029057,
      "grad_norm": 0.4885009229183197,
      "learning_rate": 5.123119600956755e-06,
      "loss": 0.0118,
      "step": 1500260
    },
    {
      "epoch": 2.4552411251415593,
      "grad_norm": 0.7458115220069885,
      "learning_rate": 5.123053708743239e-06,
      "loss": 0.011,
      "step": 1500280
    },
    {
      "epoch": 2.4552738555802125,
      "grad_norm": 0.3011806309223175,
      "learning_rate": 5.122987816529721e-06,
      "loss": 0.0198,
      "step": 1500300
    },
    {
      "epoch": 2.4553065860188656,
      "grad_norm": 0.2803627848625183,
      "learning_rate": 5.122921924316204e-06,
      "loss": 0.0122,
      "step": 1500320
    },
    {
      "epoch": 2.4553393164575192,
      "grad_norm": 0.25120341777801514,
      "learning_rate": 5.122856032102687e-06,
      "loss": 0.0208,
      "step": 1500340
    },
    {
      "epoch": 2.4553720468961724,
      "grad_norm": 0.14986839890480042,
      "learning_rate": 5.12279013988917e-06,
      "loss": 0.0081,
      "step": 1500360
    },
    {
      "epoch": 2.455404777334826,
      "grad_norm": 0.5181820392608643,
      "learning_rate": 5.122724247675652e-06,
      "loss": 0.0103,
      "step": 1500380
    },
    {
      "epoch": 2.455437507773479,
      "grad_norm": 0.12081101536750793,
      "learning_rate": 5.122658355462136e-06,
      "loss": 0.0145,
      "step": 1500400
    },
    {
      "epoch": 2.4554702382121327,
      "grad_norm": 0.14492356777191162,
      "learning_rate": 5.122592463248618e-06,
      "loss": 0.0122,
      "step": 1500420
    },
    {
      "epoch": 2.455502968650786,
      "grad_norm": 0.9249213337898254,
      "learning_rate": 5.1225265710351015e-06,
      "loss": 0.0105,
      "step": 1500440
    },
    {
      "epoch": 2.455535699089439,
      "grad_norm": 0.5664758086204529,
      "learning_rate": 5.122460678821583e-06,
      "loss": 0.0228,
      "step": 1500460
    },
    {
      "epoch": 2.4555684295280926,
      "grad_norm": 0.10372263193130493,
      "learning_rate": 5.122394786608067e-06,
      "loss": 0.0117,
      "step": 1500480
    },
    {
      "epoch": 2.4556011599667458,
      "grad_norm": 0.7721405625343323,
      "learning_rate": 5.12232889439455e-06,
      "loss": 0.0175,
      "step": 1500500
    },
    {
      "epoch": 2.4556338904053994,
      "grad_norm": 1.166468858718872,
      "learning_rate": 5.1222630021810324e-06,
      "loss": 0.0136,
      "step": 1500520
    },
    {
      "epoch": 2.4556666208440525,
      "grad_norm": 0.19864994287490845,
      "learning_rate": 5.122197109967516e-06,
      "loss": 0.0133,
      "step": 1500540
    },
    {
      "epoch": 2.455699351282706,
      "grad_norm": 0.22541703283786774,
      "learning_rate": 5.122131217753999e-06,
      "loss": 0.0124,
      "step": 1500560
    },
    {
      "epoch": 2.4557320817213593,
      "grad_norm": 0.6627529859542847,
      "learning_rate": 5.1220653255404815e-06,
      "loss": 0.0172,
      "step": 1500580
    },
    {
      "epoch": 2.4557648121600124,
      "grad_norm": 0.08515330404043198,
      "learning_rate": 5.121999433326964e-06,
      "loss": 0.0147,
      "step": 1500600
    },
    {
      "epoch": 2.455797542598666,
      "grad_norm": 0.3529416024684906,
      "learning_rate": 5.121933541113448e-06,
      "loss": 0.0199,
      "step": 1500620
    },
    {
      "epoch": 2.455830273037319,
      "grad_norm": 0.2775276005268097,
      "learning_rate": 5.12186764889993e-06,
      "loss": 0.0179,
      "step": 1500640
    },
    {
      "epoch": 2.4558630034759728,
      "grad_norm": 0.3706100881099701,
      "learning_rate": 5.121801756686413e-06,
      "loss": 0.0174,
      "step": 1500660
    },
    {
      "epoch": 2.455895733914626,
      "grad_norm": 0.2999982535839081,
      "learning_rate": 5.121735864472895e-06,
      "loss": 0.0143,
      "step": 1500680
    },
    {
      "epoch": 2.4559284643532795,
      "grad_norm": 0.40323352813720703,
      "learning_rate": 5.121669972259379e-06,
      "loss": 0.016,
      "step": 1500700
    },
    {
      "epoch": 2.4559611947919326,
      "grad_norm": 0.2692131996154785,
      "learning_rate": 5.121604080045861e-06,
      "loss": 0.0097,
      "step": 1500720
    },
    {
      "epoch": 2.455993925230586,
      "grad_norm": 0.6114639043807983,
      "learning_rate": 5.121538187832344e-06,
      "loss": 0.0146,
      "step": 1500740
    },
    {
      "epoch": 2.4560266556692394,
      "grad_norm": 1.0296047925949097,
      "learning_rate": 5.121472295618827e-06,
      "loss": 0.021,
      "step": 1500760
    },
    {
      "epoch": 2.4560593861078925,
      "grad_norm": 0.3618147373199463,
      "learning_rate": 5.12140640340531e-06,
      "loss": 0.0155,
      "step": 1500780
    },
    {
      "epoch": 2.456092116546546,
      "grad_norm": 0.4814312160015106,
      "learning_rate": 5.1213405111917925e-06,
      "loss": 0.015,
      "step": 1500800
    },
    {
      "epoch": 2.4561248469851993,
      "grad_norm": 0.2207869589328766,
      "learning_rate": 5.121274618978276e-06,
      "loss": 0.0139,
      "step": 1500820
    },
    {
      "epoch": 2.456157577423853,
      "grad_norm": 0.24188880622386932,
      "learning_rate": 5.121208726764758e-06,
      "loss": 0.0072,
      "step": 1500840
    },
    {
      "epoch": 2.456190307862506,
      "grad_norm": 0.31315478682518005,
      "learning_rate": 5.1211428345512416e-06,
      "loss": 0.0112,
      "step": 1500860
    },
    {
      "epoch": 2.456223038301159,
      "grad_norm": 0.9748426675796509,
      "learning_rate": 5.121076942337725e-06,
      "loss": 0.0167,
      "step": 1500880
    },
    {
      "epoch": 2.4562557687398128,
      "grad_norm": 0.2894294559955597,
      "learning_rate": 5.121011050124207e-06,
      "loss": 0.0123,
      "step": 1500900
    },
    {
      "epoch": 2.456288499178466,
      "grad_norm": 0.3251124620437622,
      "learning_rate": 5.120945157910691e-06,
      "loss": 0.0133,
      "step": 1500920
    },
    {
      "epoch": 2.4563212296171195,
      "grad_norm": 0.16687455773353577,
      "learning_rate": 5.1208792656971725e-06,
      "loss": 0.0247,
      "step": 1500940
    },
    {
      "epoch": 2.4563539600557727,
      "grad_norm": 0.35562923550605774,
      "learning_rate": 5.120813373483656e-06,
      "loss": 0.0179,
      "step": 1500960
    },
    {
      "epoch": 2.456386690494426,
      "grad_norm": 0.11043362319469452,
      "learning_rate": 5.120747481270139e-06,
      "loss": 0.0119,
      "step": 1500980
    },
    {
      "epoch": 2.4564194209330794,
      "grad_norm": 0.5378168821334839,
      "learning_rate": 5.120681589056622e-06,
      "loss": 0.0176,
      "step": 1501000
    },
    {
      "epoch": 2.4564521513717326,
      "grad_norm": 0.3534659147262573,
      "learning_rate": 5.120615696843104e-06,
      "loss": 0.0113,
      "step": 1501020
    },
    {
      "epoch": 2.456484881810386,
      "grad_norm": 0.15022465586662292,
      "learning_rate": 5.120549804629588e-06,
      "loss": 0.0111,
      "step": 1501040
    },
    {
      "epoch": 2.4565176122490393,
      "grad_norm": 0.18061020970344543,
      "learning_rate": 5.12048391241607e-06,
      "loss": 0.011,
      "step": 1501060
    },
    {
      "epoch": 2.4565503426876925,
      "grad_norm": 0.5565158128738403,
      "learning_rate": 5.120418020202553e-06,
      "loss": 0.0173,
      "step": 1501080
    },
    {
      "epoch": 2.456583073126346,
      "grad_norm": 0.9040306806564331,
      "learning_rate": 5.120352127989035e-06,
      "loss": 0.017,
      "step": 1501100
    },
    {
      "epoch": 2.456615803564999,
      "grad_norm": 0.9256357550621033,
      "learning_rate": 5.120286235775519e-06,
      "loss": 0.0134,
      "step": 1501120
    },
    {
      "epoch": 2.456648534003653,
      "grad_norm": 0.3123078942298889,
      "learning_rate": 5.120220343562002e-06,
      "loss": 0.0193,
      "step": 1501140
    },
    {
      "epoch": 2.456681264442306,
      "grad_norm": 4.698055744171143,
      "learning_rate": 5.120154451348484e-06,
      "loss": 0.016,
      "step": 1501160
    },
    {
      "epoch": 2.4567139948809595,
      "grad_norm": 0.3739639222621918,
      "learning_rate": 5.120088559134967e-06,
      "loss": 0.0126,
      "step": 1501180
    },
    {
      "epoch": 2.4567467253196127,
      "grad_norm": 0.4114467203617096,
      "learning_rate": 5.120022666921451e-06,
      "loss": 0.0204,
      "step": 1501200
    },
    {
      "epoch": 2.456779455758266,
      "grad_norm": 0.4444955587387085,
      "learning_rate": 5.119956774707933e-06,
      "loss": 0.0131,
      "step": 1501220
    },
    {
      "epoch": 2.4568121861969194,
      "grad_norm": 0.3325822055339813,
      "learning_rate": 5.119890882494416e-06,
      "loss": 0.0157,
      "step": 1501240
    },
    {
      "epoch": 2.4568449166355726,
      "grad_norm": 0.37100574374198914,
      "learning_rate": 5.1198249902809e-06,
      "loss": 0.0194,
      "step": 1501260
    },
    {
      "epoch": 2.456877647074226,
      "grad_norm": 0.32799333333969116,
      "learning_rate": 5.119759098067382e-06,
      "loss": 0.0118,
      "step": 1501280
    },
    {
      "epoch": 2.4569103775128793,
      "grad_norm": 0.4561935365200043,
      "learning_rate": 5.119693205853865e-06,
      "loss": 0.0105,
      "step": 1501300
    },
    {
      "epoch": 2.456943107951533,
      "grad_norm": 0.6480251550674438,
      "learning_rate": 5.119627313640347e-06,
      "loss": 0.0163,
      "step": 1501320
    },
    {
      "epoch": 2.456975838390186,
      "grad_norm": 0.29341769218444824,
      "learning_rate": 5.119561421426831e-06,
      "loss": 0.0204,
      "step": 1501340
    },
    {
      "epoch": 2.4570085688288392,
      "grad_norm": 0.2732606530189514,
      "learning_rate": 5.1194955292133134e-06,
      "loss": 0.0129,
      "step": 1501360
    },
    {
      "epoch": 2.457041299267493,
      "grad_norm": 0.4117705523967743,
      "learning_rate": 5.119429636999796e-06,
      "loss": 0.0107,
      "step": 1501380
    },
    {
      "epoch": 2.457074029706146,
      "grad_norm": 0.24576905369758606,
      "learning_rate": 5.119363744786279e-06,
      "loss": 0.0143,
      "step": 1501400
    },
    {
      "epoch": 2.4571067601447996,
      "grad_norm": 0.2378055304288864,
      "learning_rate": 5.1192978525727625e-06,
      "loss": 0.0081,
      "step": 1501420
    },
    {
      "epoch": 2.4571394905834527,
      "grad_norm": 0.17235425114631653,
      "learning_rate": 5.119231960359244e-06,
      "loss": 0.0112,
      "step": 1501440
    },
    {
      "epoch": 2.4571722210221063,
      "grad_norm": 0.2996442914009094,
      "learning_rate": 5.119166068145728e-06,
      "loss": 0.0143,
      "step": 1501460
    },
    {
      "epoch": 2.4572049514607595,
      "grad_norm": 0.10525313764810562,
      "learning_rate": 5.11910017593221e-06,
      "loss": 0.0171,
      "step": 1501480
    },
    {
      "epoch": 2.4572376818994126,
      "grad_norm": 0.7384225726127625,
      "learning_rate": 5.1190342837186935e-06,
      "loss": 0.0147,
      "step": 1501500
    },
    {
      "epoch": 2.457270412338066,
      "grad_norm": 0.22508610785007477,
      "learning_rate": 5.118968391505175e-06,
      "loss": 0.0101,
      "step": 1501520
    },
    {
      "epoch": 2.4573031427767194,
      "grad_norm": 0.962498128414154,
      "learning_rate": 5.118902499291659e-06,
      "loss": 0.0195,
      "step": 1501540
    },
    {
      "epoch": 2.457335873215373,
      "grad_norm": 0.29753246903419495,
      "learning_rate": 5.118836607078142e-06,
      "loss": 0.0129,
      "step": 1501560
    },
    {
      "epoch": 2.457368603654026,
      "grad_norm": 1.3486816883087158,
      "learning_rate": 5.1187707148646244e-06,
      "loss": 0.0197,
      "step": 1501580
    },
    {
      "epoch": 2.4574013340926797,
      "grad_norm": 0.317480206489563,
      "learning_rate": 5.118704822651108e-06,
      "loss": 0.0119,
      "step": 1501600
    },
    {
      "epoch": 2.457434064531333,
      "grad_norm": 0.5373212099075317,
      "learning_rate": 5.118638930437591e-06,
      "loss": 0.0148,
      "step": 1501620
    },
    {
      "epoch": 2.457466794969986,
      "grad_norm": 0.7037032246589661,
      "learning_rate": 5.118573038224074e-06,
      "loss": 0.0194,
      "step": 1501640
    },
    {
      "epoch": 2.4574995254086396,
      "grad_norm": 0.2036934494972229,
      "learning_rate": 5.118507146010556e-06,
      "loss": 0.0125,
      "step": 1501660
    },
    {
      "epoch": 2.4575322558472927,
      "grad_norm": 0.27066248655319214,
      "learning_rate": 5.11844125379704e-06,
      "loss": 0.0113,
      "step": 1501680
    },
    {
      "epoch": 2.4575649862859463,
      "grad_norm": 0.5239624977111816,
      "learning_rate": 5.118375361583522e-06,
      "loss": 0.0156,
      "step": 1501700
    },
    {
      "epoch": 2.4575977167245995,
      "grad_norm": 0.29006364941596985,
      "learning_rate": 5.118309469370005e-06,
      "loss": 0.0108,
      "step": 1501720
    },
    {
      "epoch": 2.457630447163253,
      "grad_norm": 0.3431355357170105,
      "learning_rate": 5.118243577156487e-06,
      "loss": 0.0131,
      "step": 1501740
    },
    {
      "epoch": 2.4576631776019062,
      "grad_norm": 0.11274459958076477,
      "learning_rate": 5.118177684942971e-06,
      "loss": 0.0098,
      "step": 1501760
    },
    {
      "epoch": 2.4576959080405594,
      "grad_norm": 0.3274261951446533,
      "learning_rate": 5.1181117927294535e-06,
      "loss": 0.0185,
      "step": 1501780
    },
    {
      "epoch": 2.457728638479213,
      "grad_norm": 0.36780622601509094,
      "learning_rate": 5.118045900515936e-06,
      "loss": 0.0172,
      "step": 1501800
    },
    {
      "epoch": 2.457761368917866,
      "grad_norm": 0.16649353504180908,
      "learning_rate": 5.117980008302419e-06,
      "loss": 0.0151,
      "step": 1501820
    },
    {
      "epoch": 2.4577940993565197,
      "grad_norm": 0.8092129826545715,
      "learning_rate": 5.117914116088903e-06,
      "loss": 0.0131,
      "step": 1501840
    },
    {
      "epoch": 2.457826829795173,
      "grad_norm": 0.1648111343383789,
      "learning_rate": 5.1178482238753845e-06,
      "loss": 0.0145,
      "step": 1501860
    },
    {
      "epoch": 2.4578595602338265,
      "grad_norm": 0.7995017766952515,
      "learning_rate": 5.117782331661868e-06,
      "loss": 0.0213,
      "step": 1501880
    },
    {
      "epoch": 2.4578922906724796,
      "grad_norm": 1.5169594287872314,
      "learning_rate": 5.11771643944835e-06,
      "loss": 0.0132,
      "step": 1501900
    },
    {
      "epoch": 2.4579250211111328,
      "grad_norm": 0.13990503549575806,
      "learning_rate": 5.1176505472348335e-06,
      "loss": 0.0122,
      "step": 1501920
    },
    {
      "epoch": 2.4579577515497864,
      "grad_norm": 0.12192913144826889,
      "learning_rate": 5.117584655021317e-06,
      "loss": 0.0146,
      "step": 1501940
    },
    {
      "epoch": 2.4579904819884395,
      "grad_norm": 0.16220971941947937,
      "learning_rate": 5.117518762807799e-06,
      "loss": 0.0162,
      "step": 1501960
    },
    {
      "epoch": 2.458023212427093,
      "grad_norm": 0.27590376138687134,
      "learning_rate": 5.117452870594283e-06,
      "loss": 0.0145,
      "step": 1501980
    },
    {
      "epoch": 2.4580559428657462,
      "grad_norm": 0.3469138443470001,
      "learning_rate": 5.117386978380765e-06,
      "loss": 0.0122,
      "step": 1502000
    },
    {
      "epoch": 2.4580886733044,
      "grad_norm": 0.3278869390487671,
      "learning_rate": 5.117321086167248e-06,
      "loss": 0.0231,
      "step": 1502020
    },
    {
      "epoch": 2.458121403743053,
      "grad_norm": 0.2739265263080597,
      "learning_rate": 5.117255193953731e-06,
      "loss": 0.0139,
      "step": 1502040
    },
    {
      "epoch": 2.458154134181706,
      "grad_norm": 0.14337605237960815,
      "learning_rate": 5.117189301740214e-06,
      "loss": 0.0182,
      "step": 1502060
    },
    {
      "epoch": 2.4581868646203597,
      "grad_norm": 0.6088622212409973,
      "learning_rate": 5.117123409526696e-06,
      "loss": 0.0148,
      "step": 1502080
    },
    {
      "epoch": 2.458219595059013,
      "grad_norm": 0.7108028531074524,
      "learning_rate": 5.11705751731318e-06,
      "loss": 0.0126,
      "step": 1502100
    },
    {
      "epoch": 2.4582523254976665,
      "grad_norm": 0.4085063934326172,
      "learning_rate": 5.116991625099662e-06,
      "loss": 0.0102,
      "step": 1502120
    },
    {
      "epoch": 2.4582850559363196,
      "grad_norm": 0.23962341248989105,
      "learning_rate": 5.116925732886145e-06,
      "loss": 0.0207,
      "step": 1502140
    },
    {
      "epoch": 2.4583177863749732,
      "grad_norm": 0.31483545899391174,
      "learning_rate": 5.116859840672628e-06,
      "loss": 0.0126,
      "step": 1502160
    },
    {
      "epoch": 2.4583505168136264,
      "grad_norm": 0.18430164456367493,
      "learning_rate": 5.116793948459111e-06,
      "loss": 0.0113,
      "step": 1502180
    },
    {
      "epoch": 2.4583832472522795,
      "grad_norm": 0.308472603559494,
      "learning_rate": 5.116728056245594e-06,
      "loss": 0.0119,
      "step": 1502200
    },
    {
      "epoch": 2.458415977690933,
      "grad_norm": 0.49593499302864075,
      "learning_rate": 5.116662164032077e-06,
      "loss": 0.0146,
      "step": 1502220
    },
    {
      "epoch": 2.4584487081295863,
      "grad_norm": 0.17041049897670746,
      "learning_rate": 5.116596271818559e-06,
      "loss": 0.0169,
      "step": 1502240
    },
    {
      "epoch": 2.45848143856824,
      "grad_norm": 0.9722163081169128,
      "learning_rate": 5.116530379605043e-06,
      "loss": 0.0176,
      "step": 1502260
    },
    {
      "epoch": 2.458514169006893,
      "grad_norm": 0.25930795073509216,
      "learning_rate": 5.116464487391526e-06,
      "loss": 0.0193,
      "step": 1502280
    },
    {
      "epoch": 2.4585468994455466,
      "grad_norm": 0.18878689408302307,
      "learning_rate": 5.116398595178008e-06,
      "loss": 0.0187,
      "step": 1502300
    },
    {
      "epoch": 2.4585796298841998,
      "grad_norm": 0.5108324289321899,
      "learning_rate": 5.116332702964492e-06,
      "loss": 0.0163,
      "step": 1502320
    },
    {
      "epoch": 2.458612360322853,
      "grad_norm": 0.5053138136863708,
      "learning_rate": 5.116266810750974e-06,
      "loss": 0.0127,
      "step": 1502340
    },
    {
      "epoch": 2.4586450907615065,
      "grad_norm": 1.4247279167175293,
      "learning_rate": 5.116200918537457e-06,
      "loss": 0.0169,
      "step": 1502360
    },
    {
      "epoch": 2.4586778212001597,
      "grad_norm": 0.5153573155403137,
      "learning_rate": 5.11613502632394e-06,
      "loss": 0.0131,
      "step": 1502380
    },
    {
      "epoch": 2.4587105516388132,
      "grad_norm": 0.21043680608272552,
      "learning_rate": 5.116069134110423e-06,
      "loss": 0.0145,
      "step": 1502400
    },
    {
      "epoch": 2.4587432820774664,
      "grad_norm": 0.500815212726593,
      "learning_rate": 5.116003241896905e-06,
      "loss": 0.0132,
      "step": 1502420
    },
    {
      "epoch": 2.4587760125161195,
      "grad_norm": 0.26999568939208984,
      "learning_rate": 5.115937349683389e-06,
      "loss": 0.0188,
      "step": 1502440
    },
    {
      "epoch": 2.458808742954773,
      "grad_norm": 0.24240273237228394,
      "learning_rate": 5.115871457469871e-06,
      "loss": 0.0122,
      "step": 1502460
    },
    {
      "epoch": 2.4588414733934263,
      "grad_norm": 0.29884904623031616,
      "learning_rate": 5.1158055652563545e-06,
      "loss": 0.0135,
      "step": 1502480
    },
    {
      "epoch": 2.45887420383208,
      "grad_norm": 0.1580146849155426,
      "learning_rate": 5.115739673042836e-06,
      "loss": 0.0218,
      "step": 1502500
    },
    {
      "epoch": 2.458906934270733,
      "grad_norm": 0.18522287905216217,
      "learning_rate": 5.11567378082932e-06,
      "loss": 0.0189,
      "step": 1502520
    },
    {
      "epoch": 2.458939664709386,
      "grad_norm": 0.44818541407585144,
      "learning_rate": 5.115607888615802e-06,
      "loss": 0.0195,
      "step": 1502540
    },
    {
      "epoch": 2.45897239514804,
      "grad_norm": 0.28479453921318054,
      "learning_rate": 5.1155419964022854e-06,
      "loss": 0.01,
      "step": 1502560
    },
    {
      "epoch": 2.459005125586693,
      "grad_norm": 0.22348566353321075,
      "learning_rate": 5.115476104188768e-06,
      "loss": 0.0136,
      "step": 1502580
    },
    {
      "epoch": 2.4590378560253465,
      "grad_norm": 0.1271604746580124,
      "learning_rate": 5.115410211975251e-06,
      "loss": 0.0134,
      "step": 1502600
    },
    {
      "epoch": 2.4590705864639997,
      "grad_norm": 0.2572495639324188,
      "learning_rate": 5.115344319761734e-06,
      "loss": 0.0238,
      "step": 1502620
    },
    {
      "epoch": 2.4591033169026533,
      "grad_norm": 0.607048749923706,
      "learning_rate": 5.115278427548217e-06,
      "loss": 0.013,
      "step": 1502640
    },
    {
      "epoch": 2.4591360473413064,
      "grad_norm": 0.2611711323261261,
      "learning_rate": 5.1152125353347e-06,
      "loss": 0.0101,
      "step": 1502660
    },
    {
      "epoch": 2.4591687777799596,
      "grad_norm": 0.1146867647767067,
      "learning_rate": 5.115146643121183e-06,
      "loss": 0.0142,
      "step": 1502680
    },
    {
      "epoch": 2.459201508218613,
      "grad_norm": 0.2945557236671448,
      "learning_rate": 5.115080750907666e-06,
      "loss": 0.0104,
      "step": 1502700
    },
    {
      "epoch": 2.4592342386572663,
      "grad_norm": 0.4727567434310913,
      "learning_rate": 5.115014858694148e-06,
      "loss": 0.0117,
      "step": 1502720
    },
    {
      "epoch": 2.45926696909592,
      "grad_norm": 0.5818725824356079,
      "learning_rate": 5.114948966480632e-06,
      "loss": 0.012,
      "step": 1502740
    },
    {
      "epoch": 2.459299699534573,
      "grad_norm": 0.22678948938846588,
      "learning_rate": 5.114883074267114e-06,
      "loss": 0.0211,
      "step": 1502760
    },
    {
      "epoch": 2.4593324299732267,
      "grad_norm": 0.7246066927909851,
      "learning_rate": 5.114817182053597e-06,
      "loss": 0.0179,
      "step": 1502780
    },
    {
      "epoch": 2.45936516041188,
      "grad_norm": 0.40268000960350037,
      "learning_rate": 5.11475128984008e-06,
      "loss": 0.0162,
      "step": 1502800
    },
    {
      "epoch": 2.459397890850533,
      "grad_norm": 0.29357942938804626,
      "learning_rate": 5.114685397626563e-06,
      "loss": 0.0107,
      "step": 1502820
    },
    {
      "epoch": 2.4594306212891865,
      "grad_norm": 0.2562446892261505,
      "learning_rate": 5.1146195054130455e-06,
      "loss": 0.0138,
      "step": 1502840
    },
    {
      "epoch": 2.4594633517278397,
      "grad_norm": 0.1481562703847885,
      "learning_rate": 5.114553613199529e-06,
      "loss": 0.0142,
      "step": 1502860
    },
    {
      "epoch": 2.4594960821664933,
      "grad_norm": 0.32000333070755005,
      "learning_rate": 5.114487720986011e-06,
      "loss": 0.0157,
      "step": 1502880
    },
    {
      "epoch": 2.4595288126051464,
      "grad_norm": 0.3458438813686371,
      "learning_rate": 5.1144218287724946e-06,
      "loss": 0.0105,
      "step": 1502900
    },
    {
      "epoch": 2.4595615430438,
      "grad_norm": 0.7596464157104492,
      "learning_rate": 5.1143559365589765e-06,
      "loss": 0.0114,
      "step": 1502920
    },
    {
      "epoch": 2.459594273482453,
      "grad_norm": 0.4291081726551056,
      "learning_rate": 5.11429004434546e-06,
      "loss": 0.0121,
      "step": 1502940
    },
    {
      "epoch": 2.4596270039211063,
      "grad_norm": 0.6600243449211121,
      "learning_rate": 5.114224152131943e-06,
      "loss": 0.0171,
      "step": 1502960
    },
    {
      "epoch": 2.45965973435976,
      "grad_norm": 0.8627249002456665,
      "learning_rate": 5.1141582599184255e-06,
      "loss": 0.0144,
      "step": 1502980
    },
    {
      "epoch": 2.459692464798413,
      "grad_norm": 0.3495573401451111,
      "learning_rate": 5.114092367704909e-06,
      "loss": 0.0106,
      "step": 1503000
    },
    {
      "epoch": 2.4597251952370667,
      "grad_norm": 0.7333123087882996,
      "learning_rate": 5.114026475491392e-06,
      "loss": 0.0184,
      "step": 1503020
    },
    {
      "epoch": 2.45975792567572,
      "grad_norm": 0.6016973257064819,
      "learning_rate": 5.113960583277875e-06,
      "loss": 0.0138,
      "step": 1503040
    },
    {
      "epoch": 2.4597906561143734,
      "grad_norm": 1.1595571041107178,
      "learning_rate": 5.113894691064357e-06,
      "loss": 0.0188,
      "step": 1503060
    },
    {
      "epoch": 2.4598233865530266,
      "grad_norm": 0.3716343641281128,
      "learning_rate": 5.113828798850841e-06,
      "loss": 0.0181,
      "step": 1503080
    },
    {
      "epoch": 2.4598561169916797,
      "grad_norm": 0.27402591705322266,
      "learning_rate": 5.113762906637323e-06,
      "loss": 0.0125,
      "step": 1503100
    },
    {
      "epoch": 2.4598888474303333,
      "grad_norm": 0.28603851795196533,
      "learning_rate": 5.113697014423806e-06,
      "loss": 0.0154,
      "step": 1503120
    },
    {
      "epoch": 2.4599215778689865,
      "grad_norm": 0.9182379841804504,
      "learning_rate": 5.113631122210288e-06,
      "loss": 0.0111,
      "step": 1503140
    },
    {
      "epoch": 2.45995430830764,
      "grad_norm": 0.4058266282081604,
      "learning_rate": 5.113565229996772e-06,
      "loss": 0.0197,
      "step": 1503160
    },
    {
      "epoch": 2.459987038746293,
      "grad_norm": 0.3802010715007782,
      "learning_rate": 5.113499337783255e-06,
      "loss": 0.0168,
      "step": 1503180
    },
    {
      "epoch": 2.460019769184947,
      "grad_norm": 0.8368654847145081,
      "learning_rate": 5.113433445569737e-06,
      "loss": 0.0125,
      "step": 1503200
    },
    {
      "epoch": 2.4600524996236,
      "grad_norm": 0.5382209420204163,
      "learning_rate": 5.11336755335622e-06,
      "loss": 0.0127,
      "step": 1503220
    },
    {
      "epoch": 2.460085230062253,
      "grad_norm": 0.12082426995038986,
      "learning_rate": 5.113301661142704e-06,
      "loss": 0.013,
      "step": 1503240
    },
    {
      "epoch": 2.4601179605009067,
      "grad_norm": 0.7058330178260803,
      "learning_rate": 5.1132357689291856e-06,
      "loss": 0.0168,
      "step": 1503260
    },
    {
      "epoch": 2.46015069093956,
      "grad_norm": 0.32709360122680664,
      "learning_rate": 5.113169876715669e-06,
      "loss": 0.0155,
      "step": 1503280
    },
    {
      "epoch": 2.4601834213782134,
      "grad_norm": 0.822213888168335,
      "learning_rate": 5.113103984502151e-06,
      "loss": 0.0202,
      "step": 1503300
    },
    {
      "epoch": 2.4602161518168666,
      "grad_norm": 0.35708674788475037,
      "learning_rate": 5.113038092288635e-06,
      "loss": 0.0124,
      "step": 1503320
    },
    {
      "epoch": 2.46024888225552,
      "grad_norm": 0.35684508085250854,
      "learning_rate": 5.112972200075118e-06,
      "loss": 0.0103,
      "step": 1503340
    },
    {
      "epoch": 2.4602816126941733,
      "grad_norm": 0.3053816258907318,
      "learning_rate": 5.1129063078616e-06,
      "loss": 0.015,
      "step": 1503360
    },
    {
      "epoch": 2.4603143431328265,
      "grad_norm": 0.307467520236969,
      "learning_rate": 5.112840415648084e-06,
      "loss": 0.0162,
      "step": 1503380
    },
    {
      "epoch": 2.46034707357148,
      "grad_norm": 0.3860228955745697,
      "learning_rate": 5.1127745234345664e-06,
      "loss": 0.0257,
      "step": 1503400
    },
    {
      "epoch": 2.4603798040101332,
      "grad_norm": 0.412593275308609,
      "learning_rate": 5.112708631221049e-06,
      "loss": 0.0174,
      "step": 1503420
    },
    {
      "epoch": 2.460412534448787,
      "grad_norm": 0.92299485206604,
      "learning_rate": 5.112642739007532e-06,
      "loss": 0.0189,
      "step": 1503440
    },
    {
      "epoch": 2.46044526488744,
      "grad_norm": 0.3377809226512909,
      "learning_rate": 5.1125768467940155e-06,
      "loss": 0.0173,
      "step": 1503460
    },
    {
      "epoch": 2.4604779953260936,
      "grad_norm": 0.6578084230422974,
      "learning_rate": 5.112510954580497e-06,
      "loss": 0.0133,
      "step": 1503480
    },
    {
      "epoch": 2.4605107257647467,
      "grad_norm": 0.19374944269657135,
      "learning_rate": 5.112445062366981e-06,
      "loss": 0.01,
      "step": 1503500
    },
    {
      "epoch": 2.4605434562034,
      "grad_norm": 0.6298239827156067,
      "learning_rate": 5.112379170153463e-06,
      "loss": 0.0118,
      "step": 1503520
    },
    {
      "epoch": 2.4605761866420535,
      "grad_norm": 1.3280365467071533,
      "learning_rate": 5.1123132779399465e-06,
      "loss": 0.0153,
      "step": 1503540
    },
    {
      "epoch": 2.4606089170807066,
      "grad_norm": 0.862994909286499,
      "learning_rate": 5.112247385726428e-06,
      "loss": 0.0162,
      "step": 1503560
    },
    {
      "epoch": 2.46064164751936,
      "grad_norm": 0.16006529331207275,
      "learning_rate": 5.112181493512912e-06,
      "loss": 0.014,
      "step": 1503580
    },
    {
      "epoch": 2.4606743779580134,
      "grad_norm": 0.2175990790128708,
      "learning_rate": 5.112115601299395e-06,
      "loss": 0.0183,
      "step": 1503600
    },
    {
      "epoch": 2.460707108396667,
      "grad_norm": 1.3261502981185913,
      "learning_rate": 5.1120497090858774e-06,
      "loss": 0.0142,
      "step": 1503620
    },
    {
      "epoch": 2.46073983883532,
      "grad_norm": 0.4663744568824768,
      "learning_rate": 5.11198381687236e-06,
      "loss": 0.0109,
      "step": 1503640
    },
    {
      "epoch": 2.4607725692739733,
      "grad_norm": 0.39267632365226746,
      "learning_rate": 5.111917924658844e-06,
      "loss": 0.0148,
      "step": 1503660
    },
    {
      "epoch": 2.460805299712627,
      "grad_norm": 2.5377752780914307,
      "learning_rate": 5.1118520324453265e-06,
      "loss": 0.0166,
      "step": 1503680
    },
    {
      "epoch": 2.46083803015128,
      "grad_norm": 0.4249642491340637,
      "learning_rate": 5.111786140231809e-06,
      "loss": 0.0195,
      "step": 1503700
    },
    {
      "epoch": 2.4608707605899336,
      "grad_norm": 0.0757209062576294,
      "learning_rate": 5.111720248018293e-06,
      "loss": 0.0132,
      "step": 1503720
    },
    {
      "epoch": 2.4609034910285867,
      "grad_norm": 0.30296704173088074,
      "learning_rate": 5.111654355804775e-06,
      "loss": 0.0122,
      "step": 1503740
    },
    {
      "epoch": 2.4609362214672403,
      "grad_norm": 0.2753032147884369,
      "learning_rate": 5.111588463591258e-06,
      "loss": 0.0156,
      "step": 1503760
    },
    {
      "epoch": 2.4609689519058935,
      "grad_norm": 0.41182395815849304,
      "learning_rate": 5.11152257137774e-06,
      "loss": 0.025,
      "step": 1503780
    },
    {
      "epoch": 2.4610016823445466,
      "grad_norm": 0.30219948291778564,
      "learning_rate": 5.111456679164224e-06,
      "loss": 0.0171,
      "step": 1503800
    },
    {
      "epoch": 2.4610344127832002,
      "grad_norm": 0.2851598560810089,
      "learning_rate": 5.1113907869507065e-06,
      "loss": 0.0153,
      "step": 1503820
    },
    {
      "epoch": 2.4610671432218534,
      "grad_norm": 0.4326942265033722,
      "learning_rate": 5.111324894737189e-06,
      "loss": 0.0176,
      "step": 1503840
    },
    {
      "epoch": 2.461099873660507,
      "grad_norm": 0.3189224600791931,
      "learning_rate": 5.111259002523672e-06,
      "loss": 0.017,
      "step": 1503860
    },
    {
      "epoch": 2.46113260409916,
      "grad_norm": 0.19529324769973755,
      "learning_rate": 5.111193110310156e-06,
      "loss": 0.011,
      "step": 1503880
    },
    {
      "epoch": 2.4611653345378137,
      "grad_norm": 0.14350154995918274,
      "learning_rate": 5.1111272180966375e-06,
      "loss": 0.0127,
      "step": 1503900
    },
    {
      "epoch": 2.461198064976467,
      "grad_norm": 0.7641963958740234,
      "learning_rate": 5.111061325883121e-06,
      "loss": 0.0123,
      "step": 1503920
    },
    {
      "epoch": 2.46123079541512,
      "grad_norm": 0.32135009765625,
      "learning_rate": 5.110995433669603e-06,
      "loss": 0.0204,
      "step": 1503940
    },
    {
      "epoch": 2.4612635258537736,
      "grad_norm": 0.19923187792301178,
      "learning_rate": 5.1109295414560865e-06,
      "loss": 0.0158,
      "step": 1503960
    },
    {
      "epoch": 2.4612962562924268,
      "grad_norm": 0.1519073098897934,
      "learning_rate": 5.110863649242569e-06,
      "loss": 0.0138,
      "step": 1503980
    },
    {
      "epoch": 2.4613289867310804,
      "grad_norm": 0.23637530207633972,
      "learning_rate": 5.110797757029052e-06,
      "loss": 0.0165,
      "step": 1504000
    },
    {
      "epoch": 2.4613617171697335,
      "grad_norm": 0.3816772401332855,
      "learning_rate": 5.110731864815535e-06,
      "loss": 0.0122,
      "step": 1504020
    },
    {
      "epoch": 2.4613944476083867,
      "grad_norm": 0.13967061042785645,
      "learning_rate": 5.110665972602018e-06,
      "loss": 0.0098,
      "step": 1504040
    },
    {
      "epoch": 2.4614271780470403,
      "grad_norm": 0.6515523195266724,
      "learning_rate": 5.110600080388501e-06,
      "loss": 0.0216,
      "step": 1504060
    },
    {
      "epoch": 2.4614599084856934,
      "grad_norm": 0.43685799837112427,
      "learning_rate": 5.110534188174984e-06,
      "loss": 0.0168,
      "step": 1504080
    },
    {
      "epoch": 2.461492638924347,
      "grad_norm": 0.40225738286972046,
      "learning_rate": 5.110468295961467e-06,
      "loss": 0.0149,
      "step": 1504100
    },
    {
      "epoch": 2.461525369363,
      "grad_norm": 0.30115458369255066,
      "learning_rate": 5.110402403747949e-06,
      "loss": 0.0177,
      "step": 1504120
    },
    {
      "epoch": 2.4615580998016533,
      "grad_norm": 0.2543911337852478,
      "learning_rate": 5.110336511534433e-06,
      "loss": 0.0135,
      "step": 1504140
    },
    {
      "epoch": 2.461590830240307,
      "grad_norm": 1.021268367767334,
      "learning_rate": 5.110270619320915e-06,
      "loss": 0.0129,
      "step": 1504160
    },
    {
      "epoch": 2.46162356067896,
      "grad_norm": 0.470429390668869,
      "learning_rate": 5.110204727107398e-06,
      "loss": 0.0198,
      "step": 1504180
    },
    {
      "epoch": 2.4616562911176136,
      "grad_norm": 0.20971518754959106,
      "learning_rate": 5.110138834893881e-06,
      "loss": 0.0114,
      "step": 1504200
    },
    {
      "epoch": 2.461689021556267,
      "grad_norm": 0.15400201082229614,
      "learning_rate": 5.110072942680364e-06,
      "loss": 0.0128,
      "step": 1504220
    },
    {
      "epoch": 2.4617217519949204,
      "grad_norm": 0.5763736963272095,
      "learning_rate": 5.110007050466847e-06,
      "loss": 0.0166,
      "step": 1504240
    },
    {
      "epoch": 2.4617544824335735,
      "grad_norm": 0.1587814837694168,
      "learning_rate": 5.10994115825333e-06,
      "loss": 0.0099,
      "step": 1504260
    },
    {
      "epoch": 2.4617872128722267,
      "grad_norm": 0.6932057738304138,
      "learning_rate": 5.109875266039812e-06,
      "loss": 0.0144,
      "step": 1504280
    },
    {
      "epoch": 2.4618199433108803,
      "grad_norm": 0.42991286516189575,
      "learning_rate": 5.109809373826296e-06,
      "loss": 0.0148,
      "step": 1504300
    },
    {
      "epoch": 2.4618526737495334,
      "grad_norm": 0.11952196061611176,
      "learning_rate": 5.1097434816127776e-06,
      "loss": 0.0171,
      "step": 1504320
    },
    {
      "epoch": 2.461885404188187,
      "grad_norm": 0.3473563492298126,
      "learning_rate": 5.109677589399261e-06,
      "loss": 0.0121,
      "step": 1504340
    },
    {
      "epoch": 2.46191813462684,
      "grad_norm": 0.435502827167511,
      "learning_rate": 5.109611697185743e-06,
      "loss": 0.0158,
      "step": 1504360
    },
    {
      "epoch": 2.4619508650654938,
      "grad_norm": 0.10004539787769318,
      "learning_rate": 5.109545804972227e-06,
      "loss": 0.0132,
      "step": 1504380
    },
    {
      "epoch": 2.461983595504147,
      "grad_norm": 0.27637597918510437,
      "learning_rate": 5.10947991275871e-06,
      "loss": 0.0106,
      "step": 1504400
    },
    {
      "epoch": 2.4620163259428,
      "grad_norm": 0.172150656580925,
      "learning_rate": 5.109414020545193e-06,
      "loss": 0.0227,
      "step": 1504420
    },
    {
      "epoch": 2.4620490563814537,
      "grad_norm": 0.5476849675178528,
      "learning_rate": 5.109348128331676e-06,
      "loss": 0.0093,
      "step": 1504440
    },
    {
      "epoch": 2.462081786820107,
      "grad_norm": 0.18835027515888214,
      "learning_rate": 5.1092822361181584e-06,
      "loss": 0.0116,
      "step": 1504460
    },
    {
      "epoch": 2.4621145172587604,
      "grad_norm": 0.20351208746433258,
      "learning_rate": 5.109216343904642e-06,
      "loss": 0.0074,
      "step": 1504480
    },
    {
      "epoch": 2.4621472476974136,
      "grad_norm": 0.4432419538497925,
      "learning_rate": 5.109150451691124e-06,
      "loss": 0.0135,
      "step": 1504500
    },
    {
      "epoch": 2.462179978136067,
      "grad_norm": 0.14385123550891876,
      "learning_rate": 5.1090845594776075e-06,
      "loss": 0.0183,
      "step": 1504520
    },
    {
      "epoch": 2.4622127085747203,
      "grad_norm": 0.31256935000419617,
      "learning_rate": 5.109018667264089e-06,
      "loss": 0.0164,
      "step": 1504540
    },
    {
      "epoch": 2.4622454390133734,
      "grad_norm": 0.34839197993278503,
      "learning_rate": 5.108952775050573e-06,
      "loss": 0.0141,
      "step": 1504560
    },
    {
      "epoch": 2.462278169452027,
      "grad_norm": 0.382261723279953,
      "learning_rate": 5.108886882837055e-06,
      "loss": 0.0121,
      "step": 1504580
    },
    {
      "epoch": 2.46231089989068,
      "grad_norm": 0.30592676997184753,
      "learning_rate": 5.1088209906235385e-06,
      "loss": 0.0112,
      "step": 1504600
    },
    {
      "epoch": 2.462343630329334,
      "grad_norm": 0.23098665475845337,
      "learning_rate": 5.108755098410021e-06,
      "loss": 0.0114,
      "step": 1504620
    },
    {
      "epoch": 2.462376360767987,
      "grad_norm": 0.44480958580970764,
      "learning_rate": 5.108689206196504e-06,
      "loss": 0.0137,
      "step": 1504640
    },
    {
      "epoch": 2.4624090912066405,
      "grad_norm": 0.2204003632068634,
      "learning_rate": 5.108623313982987e-06,
      "loss": 0.013,
      "step": 1504660
    },
    {
      "epoch": 2.4624418216452937,
      "grad_norm": 0.4360696077346802,
      "learning_rate": 5.10855742176947e-06,
      "loss": 0.0109,
      "step": 1504680
    },
    {
      "epoch": 2.462474552083947,
      "grad_norm": 0.796492338180542,
      "learning_rate": 5.108491529555952e-06,
      "loss": 0.0133,
      "step": 1504700
    },
    {
      "epoch": 2.4625072825226004,
      "grad_norm": 0.7860201001167297,
      "learning_rate": 5.108425637342436e-06,
      "loss": 0.0134,
      "step": 1504720
    },
    {
      "epoch": 2.4625400129612536,
      "grad_norm": 0.24894356727600098,
      "learning_rate": 5.108359745128919e-06,
      "loss": 0.0208,
      "step": 1504740
    },
    {
      "epoch": 2.462572743399907,
      "grad_norm": 0.20421616733074188,
      "learning_rate": 5.108293852915401e-06,
      "loss": 0.0136,
      "step": 1504760
    },
    {
      "epoch": 2.4626054738385603,
      "grad_norm": 0.13186760246753693,
      "learning_rate": 5.108227960701885e-06,
      "loss": 0.0153,
      "step": 1504780
    },
    {
      "epoch": 2.462638204277214,
      "grad_norm": 0.9491883516311646,
      "learning_rate": 5.108162068488367e-06,
      "loss": 0.0148,
      "step": 1504800
    },
    {
      "epoch": 2.462670934715867,
      "grad_norm": 0.28036898374557495,
      "learning_rate": 5.10809617627485e-06,
      "loss": 0.0209,
      "step": 1504820
    },
    {
      "epoch": 2.46270366515452,
      "grad_norm": 0.21366043388843536,
      "learning_rate": 5.108030284061333e-06,
      "loss": 0.0111,
      "step": 1504840
    },
    {
      "epoch": 2.462736395593174,
      "grad_norm": 0.44545692205429077,
      "learning_rate": 5.107964391847816e-06,
      "loss": 0.0174,
      "step": 1504860
    },
    {
      "epoch": 2.462769126031827,
      "grad_norm": 0.14867469668388367,
      "learning_rate": 5.1078984996342985e-06,
      "loss": 0.0131,
      "step": 1504880
    },
    {
      "epoch": 2.4628018564704806,
      "grad_norm": 0.19894032180309296,
      "learning_rate": 5.107832607420782e-06,
      "loss": 0.0157,
      "step": 1504900
    },
    {
      "epoch": 2.4628345869091337,
      "grad_norm": 0.44522884488105774,
      "learning_rate": 5.107766715207264e-06,
      "loss": 0.013,
      "step": 1504920
    },
    {
      "epoch": 2.4628673173477873,
      "grad_norm": 0.2576355040073395,
      "learning_rate": 5.1077008229937476e-06,
      "loss": 0.0102,
      "step": 1504940
    },
    {
      "epoch": 2.4629000477864404,
      "grad_norm": 0.30071645975112915,
      "learning_rate": 5.1076349307802295e-06,
      "loss": 0.0109,
      "step": 1504960
    },
    {
      "epoch": 2.4629327782250936,
      "grad_norm": 0.42533183097839355,
      "learning_rate": 5.107569038566713e-06,
      "loss": 0.0166,
      "step": 1504980
    },
    {
      "epoch": 2.462965508663747,
      "grad_norm": 0.9439316391944885,
      "learning_rate": 5.107503146353196e-06,
      "loss": 0.013,
      "step": 1505000
    },
    {
      "epoch": 2.4629982391024003,
      "grad_norm": 0.15816131234169006,
      "learning_rate": 5.1074372541396785e-06,
      "loss": 0.0114,
      "step": 1505020
    },
    {
      "epoch": 2.463030969541054,
      "grad_norm": 0.19785170257091522,
      "learning_rate": 5.107371361926161e-06,
      "loss": 0.017,
      "step": 1505040
    },
    {
      "epoch": 2.463063699979707,
      "grad_norm": 0.4528559744358063,
      "learning_rate": 5.107305469712645e-06,
      "loss": 0.0111,
      "step": 1505060
    },
    {
      "epoch": 2.4630964304183607,
      "grad_norm": 0.39427119493484497,
      "learning_rate": 5.107239577499127e-06,
      "loss": 0.0172,
      "step": 1505080
    },
    {
      "epoch": 2.463129160857014,
      "grad_norm": 1.615875482559204,
      "learning_rate": 5.10717368528561e-06,
      "loss": 0.0162,
      "step": 1505100
    },
    {
      "epoch": 2.463161891295667,
      "grad_norm": 0.44952091574668884,
      "learning_rate": 5.107107793072094e-06,
      "loss": 0.017,
      "step": 1505120
    },
    {
      "epoch": 2.4631946217343206,
      "grad_norm": 0.05522093176841736,
      "learning_rate": 5.107041900858576e-06,
      "loss": 0.0124,
      "step": 1505140
    },
    {
      "epoch": 2.4632273521729737,
      "grad_norm": 0.5719261765480042,
      "learning_rate": 5.106976008645059e-06,
      "loss": 0.0116,
      "step": 1505160
    },
    {
      "epoch": 2.4632600826116273,
      "grad_norm": 0.2751970589160919,
      "learning_rate": 5.106910116431541e-06,
      "loss": 0.0107,
      "step": 1505180
    },
    {
      "epoch": 2.4632928130502805,
      "grad_norm": 0.7769960761070251,
      "learning_rate": 5.106844224218025e-06,
      "loss": 0.013,
      "step": 1505200
    },
    {
      "epoch": 2.463325543488934,
      "grad_norm": 0.2061401605606079,
      "learning_rate": 5.106778332004508e-06,
      "loss": 0.0155,
      "step": 1505220
    },
    {
      "epoch": 2.463358273927587,
      "grad_norm": 0.451426237821579,
      "learning_rate": 5.10671243979099e-06,
      "loss": 0.0124,
      "step": 1505240
    },
    {
      "epoch": 2.4633910043662404,
      "grad_norm": 0.7100481390953064,
      "learning_rate": 5.106646547577473e-06,
      "loss": 0.0183,
      "step": 1505260
    },
    {
      "epoch": 2.463423734804894,
      "grad_norm": 0.19134017825126648,
      "learning_rate": 5.106580655363957e-06,
      "loss": 0.0106,
      "step": 1505280
    },
    {
      "epoch": 2.463456465243547,
      "grad_norm": 0.29913195967674255,
      "learning_rate": 5.106514763150439e-06,
      "loss": 0.0117,
      "step": 1505300
    },
    {
      "epoch": 2.4634891956822007,
      "grad_norm": 0.37519747018814087,
      "learning_rate": 5.106448870936922e-06,
      "loss": 0.0123,
      "step": 1505320
    },
    {
      "epoch": 2.463521926120854,
      "grad_norm": 0.2632032632827759,
      "learning_rate": 5.106382978723404e-06,
      "loss": 0.0124,
      "step": 1505340
    },
    {
      "epoch": 2.4635546565595074,
      "grad_norm": 0.4211772382259369,
      "learning_rate": 5.106317086509888e-06,
      "loss": 0.0133,
      "step": 1505360
    },
    {
      "epoch": 2.4635873869981606,
      "grad_norm": 0.6589219570159912,
      "learning_rate": 5.1062511942963695e-06,
      "loss": 0.0108,
      "step": 1505380
    },
    {
      "epoch": 2.4636201174368138,
      "grad_norm": 0.09779655188322067,
      "learning_rate": 5.106185302082853e-06,
      "loss": 0.0122,
      "step": 1505400
    },
    {
      "epoch": 2.4636528478754673,
      "grad_norm": 0.18571777641773224,
      "learning_rate": 5.106119409869336e-06,
      "loss": 0.014,
      "step": 1505420
    },
    {
      "epoch": 2.4636855783141205,
      "grad_norm": 0.7785057425498962,
      "learning_rate": 5.106053517655819e-06,
      "loss": 0.0129,
      "step": 1505440
    },
    {
      "epoch": 2.463718308752774,
      "grad_norm": 0.1262025386095047,
      "learning_rate": 5.105987625442302e-06,
      "loss": 0.0119,
      "step": 1505460
    },
    {
      "epoch": 2.4637510391914272,
      "grad_norm": 0.5158059597015381,
      "learning_rate": 5.105921733228785e-06,
      "loss": 0.0172,
      "step": 1505480
    },
    {
      "epoch": 2.4637837696300804,
      "grad_norm": 0.788910448551178,
      "learning_rate": 5.1058558410152685e-06,
      "loss": 0.0176,
      "step": 1505500
    },
    {
      "epoch": 2.463816500068734,
      "grad_norm": 0.5687151551246643,
      "learning_rate": 5.10578994880175e-06,
      "loss": 0.0136,
      "step": 1505520
    },
    {
      "epoch": 2.463849230507387,
      "grad_norm": 0.34469470381736755,
      "learning_rate": 5.105724056588234e-06,
      "loss": 0.0115,
      "step": 1505540
    },
    {
      "epoch": 2.4638819609460407,
      "grad_norm": 0.934569776058197,
      "learning_rate": 5.105658164374716e-06,
      "loss": 0.0183,
      "step": 1505560
    },
    {
      "epoch": 2.463914691384694,
      "grad_norm": 0.2823500633239746,
      "learning_rate": 5.1055922721611995e-06,
      "loss": 0.015,
      "step": 1505580
    },
    {
      "epoch": 2.463947421823347,
      "grad_norm": 0.34932005405426025,
      "learning_rate": 5.105526379947681e-06,
      "loss": 0.0167,
      "step": 1505600
    },
    {
      "epoch": 2.4639801522620006,
      "grad_norm": 0.3574146330356598,
      "learning_rate": 5.105460487734165e-06,
      "loss": 0.016,
      "step": 1505620
    },
    {
      "epoch": 2.4640128827006538,
      "grad_norm": 0.5106678605079651,
      "learning_rate": 5.105394595520648e-06,
      "loss": 0.0171,
      "step": 1505640
    },
    {
      "epoch": 2.4640456131393074,
      "grad_norm": 0.497993528842926,
      "learning_rate": 5.1053287033071304e-06,
      "loss": 0.0154,
      "step": 1505660
    },
    {
      "epoch": 2.4640783435779605,
      "grad_norm": 0.23333600163459778,
      "learning_rate": 5.105262811093613e-06,
      "loss": 0.0091,
      "step": 1505680
    },
    {
      "epoch": 2.464111074016614,
      "grad_norm": 0.37984341382980347,
      "learning_rate": 5.105196918880097e-06,
      "loss": 0.0169,
      "step": 1505700
    },
    {
      "epoch": 2.4641438044552673,
      "grad_norm": 0.40999555587768555,
      "learning_rate": 5.105131026666579e-06,
      "loss": 0.0153,
      "step": 1505720
    },
    {
      "epoch": 2.4641765348939204,
      "grad_norm": 0.3483983874320984,
      "learning_rate": 5.105065134453062e-06,
      "loss": 0.012,
      "step": 1505740
    },
    {
      "epoch": 2.464209265332574,
      "grad_norm": 0.055833298712968826,
      "learning_rate": 5.104999242239544e-06,
      "loss": 0.0134,
      "step": 1505760
    },
    {
      "epoch": 2.464241995771227,
      "grad_norm": 0.2496509552001953,
      "learning_rate": 5.104933350026028e-06,
      "loss": 0.0142,
      "step": 1505780
    },
    {
      "epoch": 2.4642747262098808,
      "grad_norm": 0.45438453555107117,
      "learning_rate": 5.104867457812511e-06,
      "loss": 0.0113,
      "step": 1505800
    },
    {
      "epoch": 2.464307456648534,
      "grad_norm": 0.1558675616979599,
      "learning_rate": 5.104801565598993e-06,
      "loss": 0.0133,
      "step": 1505820
    },
    {
      "epoch": 2.4643401870871875,
      "grad_norm": 0.3513185977935791,
      "learning_rate": 5.104735673385477e-06,
      "loss": 0.0093,
      "step": 1505840
    },
    {
      "epoch": 2.4643729175258406,
      "grad_norm": 0.3349434435367584,
      "learning_rate": 5.1046697811719595e-06,
      "loss": 0.0119,
      "step": 1505860
    },
    {
      "epoch": 2.464405647964494,
      "grad_norm": 0.6383010745048523,
      "learning_rate": 5.104603888958442e-06,
      "loss": 0.016,
      "step": 1505880
    },
    {
      "epoch": 2.4644383784031474,
      "grad_norm": 0.03034106083214283,
      "learning_rate": 5.104537996744925e-06,
      "loss": 0.0177,
      "step": 1505900
    },
    {
      "epoch": 2.4644711088418005,
      "grad_norm": 0.14484432339668274,
      "learning_rate": 5.104472104531409e-06,
      "loss": 0.0124,
      "step": 1505920
    },
    {
      "epoch": 2.464503839280454,
      "grad_norm": 0.38794365525245667,
      "learning_rate": 5.1044062123178905e-06,
      "loss": 0.0141,
      "step": 1505940
    },
    {
      "epoch": 2.4645365697191073,
      "grad_norm": 0.1542431265115738,
      "learning_rate": 5.104340320104374e-06,
      "loss": 0.0127,
      "step": 1505960
    },
    {
      "epoch": 2.464569300157761,
      "grad_norm": 0.46771952509880066,
      "learning_rate": 5.104274427890856e-06,
      "loss": 0.0134,
      "step": 1505980
    },
    {
      "epoch": 2.464602030596414,
      "grad_norm": 0.4819857180118561,
      "learning_rate": 5.1042085356773396e-06,
      "loss": 0.0217,
      "step": 1506000
    },
    {
      "epoch": 2.464634761035067,
      "grad_norm": 0.7778318524360657,
      "learning_rate": 5.104142643463822e-06,
      "loss": 0.0183,
      "step": 1506020
    },
    {
      "epoch": 2.4646674914737208,
      "grad_norm": 1.0426304340362549,
      "learning_rate": 5.104076751250305e-06,
      "loss": 0.0139,
      "step": 1506040
    },
    {
      "epoch": 2.464700221912374,
      "grad_norm": 0.3958691656589508,
      "learning_rate": 5.104010859036788e-06,
      "loss": 0.0189,
      "step": 1506060
    },
    {
      "epoch": 2.4647329523510275,
      "grad_norm": 1.0089612007141113,
      "learning_rate": 5.103944966823271e-06,
      "loss": 0.0207,
      "step": 1506080
    },
    {
      "epoch": 2.4647656827896807,
      "grad_norm": 0.5959129929542542,
      "learning_rate": 5.103879074609753e-06,
      "loss": 0.0181,
      "step": 1506100
    },
    {
      "epoch": 2.4647984132283343,
      "grad_norm": 0.220297709107399,
      "learning_rate": 5.103813182396237e-06,
      "loss": 0.0163,
      "step": 1506120
    },
    {
      "epoch": 2.4648311436669874,
      "grad_norm": 0.4346666932106018,
      "learning_rate": 5.103747290182719e-06,
      "loss": 0.0156,
      "step": 1506140
    },
    {
      "epoch": 2.4648638741056406,
      "grad_norm": 0.13137440383434296,
      "learning_rate": 5.103681397969202e-06,
      "loss": 0.013,
      "step": 1506160
    },
    {
      "epoch": 2.464896604544294,
      "grad_norm": 0.5691997408866882,
      "learning_rate": 5.103615505755686e-06,
      "loss": 0.0161,
      "step": 1506180
    },
    {
      "epoch": 2.4649293349829473,
      "grad_norm": 0.08519448339939117,
      "learning_rate": 5.103549613542168e-06,
      "loss": 0.0128,
      "step": 1506200
    },
    {
      "epoch": 2.464962065421601,
      "grad_norm": 0.5207988023757935,
      "learning_rate": 5.103483721328651e-06,
      "loss": 0.0187,
      "step": 1506220
    },
    {
      "epoch": 2.464994795860254,
      "grad_norm": 0.4440890848636627,
      "learning_rate": 5.103417829115134e-06,
      "loss": 0.0221,
      "step": 1506240
    },
    {
      "epoch": 2.4650275262989076,
      "grad_norm": 0.27864930033683777,
      "learning_rate": 5.103351936901617e-06,
      "loss": 0.021,
      "step": 1506260
    },
    {
      "epoch": 2.465060256737561,
      "grad_norm": 0.47112616896629333,
      "learning_rate": 5.1032860446881e-06,
      "loss": 0.0158,
      "step": 1506280
    },
    {
      "epoch": 2.465092987176214,
      "grad_norm": 0.16290216147899628,
      "learning_rate": 5.103220152474583e-06,
      "loss": 0.0088,
      "step": 1506300
    },
    {
      "epoch": 2.4651257176148675,
      "grad_norm": 0.3683881461620331,
      "learning_rate": 5.103154260261065e-06,
      "loss": 0.0153,
      "step": 1506320
    },
    {
      "epoch": 2.4651584480535207,
      "grad_norm": 0.5284679532051086,
      "learning_rate": 5.103088368047549e-06,
      "loss": 0.0197,
      "step": 1506340
    },
    {
      "epoch": 2.4651911784921743,
      "grad_norm": 0.22777612507343292,
      "learning_rate": 5.1030224758340306e-06,
      "loss": 0.0125,
      "step": 1506360
    },
    {
      "epoch": 2.4652239089308274,
      "grad_norm": 1.0529041290283203,
      "learning_rate": 5.102956583620514e-06,
      "loss": 0.0148,
      "step": 1506380
    },
    {
      "epoch": 2.465256639369481,
      "grad_norm": 0.22080029547214508,
      "learning_rate": 5.102890691406996e-06,
      "loss": 0.0244,
      "step": 1506400
    },
    {
      "epoch": 2.465289369808134,
      "grad_norm": 0.060475993901491165,
      "learning_rate": 5.10282479919348e-06,
      "loss": 0.0147,
      "step": 1506420
    },
    {
      "epoch": 2.4653221002467873,
      "grad_norm": 1.1704200506210327,
      "learning_rate": 5.102758906979962e-06,
      "loss": 0.0173,
      "step": 1506440
    },
    {
      "epoch": 2.465354830685441,
      "grad_norm": 0.5798405408859253,
      "learning_rate": 5.102693014766445e-06,
      "loss": 0.0135,
      "step": 1506460
    },
    {
      "epoch": 2.465387561124094,
      "grad_norm": 0.09444194287061691,
      "learning_rate": 5.102627122552928e-06,
      "loss": 0.0167,
      "step": 1506480
    },
    {
      "epoch": 2.4654202915627477,
      "grad_norm": 0.29212164878845215,
      "learning_rate": 5.1025612303394114e-06,
      "loss": 0.0156,
      "step": 1506500
    },
    {
      "epoch": 2.465453022001401,
      "grad_norm": 0.36587703227996826,
      "learning_rate": 5.102495338125894e-06,
      "loss": 0.014,
      "step": 1506520
    },
    {
      "epoch": 2.4654857524400544,
      "grad_norm": 0.18798023462295532,
      "learning_rate": 5.102429445912377e-06,
      "loss": 0.019,
      "step": 1506540
    },
    {
      "epoch": 2.4655184828787076,
      "grad_norm": 0.19245120882987976,
      "learning_rate": 5.1023635536988605e-06,
      "loss": 0.0152,
      "step": 1506560
    },
    {
      "epoch": 2.4655512133173607,
      "grad_norm": 0.49761447310447693,
      "learning_rate": 5.102297661485342e-06,
      "loss": 0.013,
      "step": 1506580
    },
    {
      "epoch": 2.4655839437560143,
      "grad_norm": 0.6504961848258972,
      "learning_rate": 5.102231769271826e-06,
      "loss": 0.012,
      "step": 1506600
    },
    {
      "epoch": 2.4656166741946675,
      "grad_norm": 0.26082268357276917,
      "learning_rate": 5.102165877058308e-06,
      "loss": 0.0181,
      "step": 1506620
    },
    {
      "epoch": 2.465649404633321,
      "grad_norm": 0.3698766231536865,
      "learning_rate": 5.1020999848447915e-06,
      "loss": 0.0167,
      "step": 1506640
    },
    {
      "epoch": 2.465682135071974,
      "grad_norm": 0.1770029366016388,
      "learning_rate": 5.102034092631274e-06,
      "loss": 0.0163,
      "step": 1506660
    },
    {
      "epoch": 2.465714865510628,
      "grad_norm": 0.23819799721240997,
      "learning_rate": 5.101968200417757e-06,
      "loss": 0.0168,
      "step": 1506680
    },
    {
      "epoch": 2.465747595949281,
      "grad_norm": 0.17027926445007324,
      "learning_rate": 5.10190230820424e-06,
      "loss": 0.0102,
      "step": 1506700
    },
    {
      "epoch": 2.465780326387934,
      "grad_norm": 0.37598541378974915,
      "learning_rate": 5.101836415990723e-06,
      "loss": 0.0175,
      "step": 1506720
    },
    {
      "epoch": 2.4658130568265877,
      "grad_norm": 0.37468451261520386,
      "learning_rate": 5.101770523777205e-06,
      "loss": 0.0109,
      "step": 1506740
    },
    {
      "epoch": 2.465845787265241,
      "grad_norm": 0.6919873952865601,
      "learning_rate": 5.101704631563689e-06,
      "loss": 0.0169,
      "step": 1506760
    },
    {
      "epoch": 2.4658785177038944,
      "grad_norm": 0.15306296944618225,
      "learning_rate": 5.101638739350171e-06,
      "loss": 0.0165,
      "step": 1506780
    },
    {
      "epoch": 2.4659112481425476,
      "grad_norm": 0.6554507613182068,
      "learning_rate": 5.101572847136654e-06,
      "loss": 0.0129,
      "step": 1506800
    },
    {
      "epoch": 2.465943978581201,
      "grad_norm": 0.18523463606834412,
      "learning_rate": 5.101506954923137e-06,
      "loss": 0.0113,
      "step": 1506820
    },
    {
      "epoch": 2.4659767090198543,
      "grad_norm": 0.21979950368404388,
      "learning_rate": 5.10144106270962e-06,
      "loss": 0.0083,
      "step": 1506840
    },
    {
      "epoch": 2.4660094394585075,
      "grad_norm": 0.6440688371658325,
      "learning_rate": 5.101375170496103e-06,
      "loss": 0.0151,
      "step": 1506860
    },
    {
      "epoch": 2.466042169897161,
      "grad_norm": 0.14991453289985657,
      "learning_rate": 5.101309278282586e-06,
      "loss": 0.0181,
      "step": 1506880
    },
    {
      "epoch": 2.4660749003358142,
      "grad_norm": 0.3654881715774536,
      "learning_rate": 5.101243386069069e-06,
      "loss": 0.0241,
      "step": 1506900
    },
    {
      "epoch": 2.466107630774468,
      "grad_norm": 0.09165213257074356,
      "learning_rate": 5.1011774938555515e-06,
      "loss": 0.0212,
      "step": 1506920
    },
    {
      "epoch": 2.466140361213121,
      "grad_norm": 0.8337963223457336,
      "learning_rate": 5.101111601642035e-06,
      "loss": 0.0186,
      "step": 1506940
    },
    {
      "epoch": 2.4661730916517746,
      "grad_norm": 0.2762838304042816,
      "learning_rate": 5.101045709428517e-06,
      "loss": 0.0164,
      "step": 1506960
    },
    {
      "epoch": 2.4662058220904277,
      "grad_norm": 0.26588720083236694,
      "learning_rate": 5.100979817215001e-06,
      "loss": 0.0186,
      "step": 1506980
    },
    {
      "epoch": 2.466238552529081,
      "grad_norm": 1.0113048553466797,
      "learning_rate": 5.1009139250014825e-06,
      "loss": 0.0192,
      "step": 1507000
    },
    {
      "epoch": 2.4662712829677345,
      "grad_norm": 0.12545785307884216,
      "learning_rate": 5.100848032787966e-06,
      "loss": 0.0137,
      "step": 1507020
    },
    {
      "epoch": 2.4663040134063876,
      "grad_norm": 0.3805427849292755,
      "learning_rate": 5.100782140574449e-06,
      "loss": 0.0117,
      "step": 1507040
    },
    {
      "epoch": 2.466336743845041,
      "grad_norm": 0.7527860999107361,
      "learning_rate": 5.1007162483609315e-06,
      "loss": 0.0147,
      "step": 1507060
    },
    {
      "epoch": 2.4663694742836944,
      "grad_norm": 0.17917077243328094,
      "learning_rate": 5.100650356147414e-06,
      "loss": 0.0129,
      "step": 1507080
    },
    {
      "epoch": 2.4664022047223475,
      "grad_norm": 0.6880988478660583,
      "learning_rate": 5.100584463933898e-06,
      "loss": 0.01,
      "step": 1507100
    },
    {
      "epoch": 2.466434935161001,
      "grad_norm": 0.302662193775177,
      "learning_rate": 5.10051857172038e-06,
      "loss": 0.0185,
      "step": 1507120
    },
    {
      "epoch": 2.4664676655996542,
      "grad_norm": 0.35535818338394165,
      "learning_rate": 5.100452679506863e-06,
      "loss": 0.0105,
      "step": 1507140
    },
    {
      "epoch": 2.466500396038308,
      "grad_norm": 0.3745031952857971,
      "learning_rate": 5.100386787293345e-06,
      "loss": 0.0114,
      "step": 1507160
    },
    {
      "epoch": 2.466533126476961,
      "grad_norm": 0.29787296056747437,
      "learning_rate": 5.100320895079829e-06,
      "loss": 0.026,
      "step": 1507180
    },
    {
      "epoch": 2.466565856915614,
      "grad_norm": 0.265677809715271,
      "learning_rate": 5.100255002866312e-06,
      "loss": 0.0223,
      "step": 1507200
    },
    {
      "epoch": 2.4665985873542677,
      "grad_norm": 0.26694345474243164,
      "learning_rate": 5.100189110652794e-06,
      "loss": 0.0113,
      "step": 1507220
    },
    {
      "epoch": 2.466631317792921,
      "grad_norm": 0.46761053800582886,
      "learning_rate": 5.100123218439278e-06,
      "loss": 0.017,
      "step": 1507240
    },
    {
      "epoch": 2.4666640482315745,
      "grad_norm": 0.21973198652267456,
      "learning_rate": 5.100057326225761e-06,
      "loss": 0.0151,
      "step": 1507260
    },
    {
      "epoch": 2.4666967786702276,
      "grad_norm": 0.4845307469367981,
      "learning_rate": 5.099991434012243e-06,
      "loss": 0.0241,
      "step": 1507280
    },
    {
      "epoch": 2.4667295091088812,
      "grad_norm": 0.05685359612107277,
      "learning_rate": 5.099925541798726e-06,
      "loss": 0.0203,
      "step": 1507300
    },
    {
      "epoch": 2.4667622395475344,
      "grad_norm": 0.47047674655914307,
      "learning_rate": 5.09985964958521e-06,
      "loss": 0.0129,
      "step": 1507320
    },
    {
      "epoch": 2.4667949699861875,
      "grad_norm": 0.19126364588737488,
      "learning_rate": 5.099793757371692e-06,
      "loss": 0.0105,
      "step": 1507340
    },
    {
      "epoch": 2.466827700424841,
      "grad_norm": 0.34035438299179077,
      "learning_rate": 5.099727865158175e-06,
      "loss": 0.0097,
      "step": 1507360
    },
    {
      "epoch": 2.4668604308634943,
      "grad_norm": 0.875768780708313,
      "learning_rate": 5.099661972944657e-06,
      "loss": 0.0131,
      "step": 1507380
    },
    {
      "epoch": 2.466893161302148,
      "grad_norm": 0.29253047704696655,
      "learning_rate": 5.099596080731141e-06,
      "loss": 0.0188,
      "step": 1507400
    },
    {
      "epoch": 2.466925891740801,
      "grad_norm": 0.7325944304466248,
      "learning_rate": 5.0995301885176225e-06,
      "loss": 0.0193,
      "step": 1507420
    },
    {
      "epoch": 2.4669586221794546,
      "grad_norm": 0.2089809775352478,
      "learning_rate": 5.099464296304106e-06,
      "loss": 0.0119,
      "step": 1507440
    },
    {
      "epoch": 2.4669913526181078,
      "grad_norm": 0.16266316175460815,
      "learning_rate": 5.099398404090589e-06,
      "loss": 0.0107,
      "step": 1507460
    },
    {
      "epoch": 2.467024083056761,
      "grad_norm": 0.08914560079574585,
      "learning_rate": 5.099332511877072e-06,
      "loss": 0.0113,
      "step": 1507480
    },
    {
      "epoch": 2.4670568134954145,
      "grad_norm": 0.10121674090623856,
      "learning_rate": 5.099266619663554e-06,
      "loss": 0.0078,
      "step": 1507500
    },
    {
      "epoch": 2.4670895439340677,
      "grad_norm": 0.4470977783203125,
      "learning_rate": 5.099200727450038e-06,
      "loss": 0.0107,
      "step": 1507520
    },
    {
      "epoch": 2.4671222743727212,
      "grad_norm": 0.7088461518287659,
      "learning_rate": 5.09913483523652e-06,
      "loss": 0.0096,
      "step": 1507540
    },
    {
      "epoch": 2.4671550048113744,
      "grad_norm": 0.6012586355209351,
      "learning_rate": 5.099068943023003e-06,
      "loss": 0.0156,
      "step": 1507560
    },
    {
      "epoch": 2.467187735250028,
      "grad_norm": 0.6930739283561707,
      "learning_rate": 5.099003050809487e-06,
      "loss": 0.0186,
      "step": 1507580
    },
    {
      "epoch": 2.467220465688681,
      "grad_norm": 0.6145331859588623,
      "learning_rate": 5.098937158595969e-06,
      "loss": 0.013,
      "step": 1507600
    },
    {
      "epoch": 2.4672531961273343,
      "grad_norm": 0.5652316212654114,
      "learning_rate": 5.0988712663824525e-06,
      "loss": 0.016,
      "step": 1507620
    },
    {
      "epoch": 2.467285926565988,
      "grad_norm": 0.20171526074409485,
      "learning_rate": 5.098805374168934e-06,
      "loss": 0.0156,
      "step": 1507640
    },
    {
      "epoch": 2.467318657004641,
      "grad_norm": 0.4279077351093292,
      "learning_rate": 5.098739481955418e-06,
      "loss": 0.0147,
      "step": 1507660
    },
    {
      "epoch": 2.4673513874432946,
      "grad_norm": 0.801263153553009,
      "learning_rate": 5.098673589741901e-06,
      "loss": 0.0137,
      "step": 1507680
    },
    {
      "epoch": 2.467384117881948,
      "grad_norm": 0.19852620363235474,
      "learning_rate": 5.0986076975283834e-06,
      "loss": 0.0081,
      "step": 1507700
    },
    {
      "epoch": 2.4674168483206014,
      "grad_norm": 0.3413406014442444,
      "learning_rate": 5.098541805314866e-06,
      "loss": 0.015,
      "step": 1507720
    },
    {
      "epoch": 2.4674495787592545,
      "grad_norm": 0.9867550730705261,
      "learning_rate": 5.09847591310135e-06,
      "loss": 0.0248,
      "step": 1507740
    },
    {
      "epoch": 2.4674823091979077,
      "grad_norm": 0.7593477368354797,
      "learning_rate": 5.098410020887832e-06,
      "loss": 0.0151,
      "step": 1507760
    },
    {
      "epoch": 2.4675150396365613,
      "grad_norm": 0.24729850888252258,
      "learning_rate": 5.098344128674315e-06,
      "loss": 0.0192,
      "step": 1507780
    },
    {
      "epoch": 2.4675477700752144,
      "grad_norm": 0.7696207165718079,
      "learning_rate": 5.098278236460797e-06,
      "loss": 0.0146,
      "step": 1507800
    },
    {
      "epoch": 2.467580500513868,
      "grad_norm": 0.5463002324104309,
      "learning_rate": 5.098212344247281e-06,
      "loss": 0.0116,
      "step": 1507820
    },
    {
      "epoch": 2.467613230952521,
      "grad_norm": 0.28711268305778503,
      "learning_rate": 5.0981464520337635e-06,
      "loss": 0.0166,
      "step": 1507840
    },
    {
      "epoch": 2.4676459613911748,
      "grad_norm": 0.3679024279117584,
      "learning_rate": 5.098080559820246e-06,
      "loss": 0.0118,
      "step": 1507860
    },
    {
      "epoch": 2.467678691829828,
      "grad_norm": 0.9133645296096802,
      "learning_rate": 5.098014667606729e-06,
      "loss": 0.0171,
      "step": 1507880
    },
    {
      "epoch": 2.467711422268481,
      "grad_norm": 0.08219502121210098,
      "learning_rate": 5.0979487753932125e-06,
      "loss": 0.0126,
      "step": 1507900
    },
    {
      "epoch": 2.4677441527071347,
      "grad_norm": 0.6787722110748291,
      "learning_rate": 5.097882883179695e-06,
      "loss": 0.0194,
      "step": 1507920
    },
    {
      "epoch": 2.467776883145788,
      "grad_norm": 0.23941023647785187,
      "learning_rate": 5.097816990966178e-06,
      "loss": 0.0151,
      "step": 1507940
    },
    {
      "epoch": 2.4678096135844414,
      "grad_norm": 0.4254246950149536,
      "learning_rate": 5.097751098752662e-06,
      "loss": 0.0133,
      "step": 1507960
    },
    {
      "epoch": 2.4678423440230945,
      "grad_norm": 0.25027361512184143,
      "learning_rate": 5.0976852065391435e-06,
      "loss": 0.0132,
      "step": 1507980
    },
    {
      "epoch": 2.467875074461748,
      "grad_norm": 0.2046791911125183,
      "learning_rate": 5.097619314325627e-06,
      "loss": 0.0182,
      "step": 1508000
    },
    {
      "epoch": 2.4679078049004013,
      "grad_norm": 2.705251455307007,
      "learning_rate": 5.097553422112109e-06,
      "loss": 0.0138,
      "step": 1508020
    },
    {
      "epoch": 2.4679405353390544,
      "grad_norm": 0.438922256231308,
      "learning_rate": 5.0974875298985926e-06,
      "loss": 0.0146,
      "step": 1508040
    },
    {
      "epoch": 2.467973265777708,
      "grad_norm": 0.6449578404426575,
      "learning_rate": 5.097421637685075e-06,
      "loss": 0.0133,
      "step": 1508060
    },
    {
      "epoch": 2.468005996216361,
      "grad_norm": 0.08686163276433945,
      "learning_rate": 5.097355745471558e-06,
      "loss": 0.0101,
      "step": 1508080
    },
    {
      "epoch": 2.468038726655015,
      "grad_norm": 1.1869697570800781,
      "learning_rate": 5.097289853258041e-06,
      "loss": 0.0138,
      "step": 1508100
    },
    {
      "epoch": 2.468071457093668,
      "grad_norm": 0.3126927614212036,
      "learning_rate": 5.097223961044524e-06,
      "loss": 0.0167,
      "step": 1508120
    },
    {
      "epoch": 2.4681041875323215,
      "grad_norm": 0.3778406083583832,
      "learning_rate": 5.097158068831006e-06,
      "loss": 0.0164,
      "step": 1508140
    },
    {
      "epoch": 2.4681369179709747,
      "grad_norm": 0.5570085048675537,
      "learning_rate": 5.09709217661749e-06,
      "loss": 0.0171,
      "step": 1508160
    },
    {
      "epoch": 2.468169648409628,
      "grad_norm": 0.6720173358917236,
      "learning_rate": 5.097026284403972e-06,
      "loss": 0.0169,
      "step": 1508180
    },
    {
      "epoch": 2.4682023788482814,
      "grad_norm": 0.27091890573501587,
      "learning_rate": 5.096960392190455e-06,
      "loss": 0.0148,
      "step": 1508200
    },
    {
      "epoch": 2.4682351092869346,
      "grad_norm": 0.37317341566085815,
      "learning_rate": 5.096894499976937e-06,
      "loss": 0.0159,
      "step": 1508220
    },
    {
      "epoch": 2.468267839725588,
      "grad_norm": 2.384127378463745,
      "learning_rate": 5.096828607763421e-06,
      "loss": 0.0192,
      "step": 1508240
    },
    {
      "epoch": 2.4683005701642413,
      "grad_norm": 0.09660380333662033,
      "learning_rate": 5.096762715549904e-06,
      "loss": 0.0187,
      "step": 1508260
    },
    {
      "epoch": 2.468333300602895,
      "grad_norm": 0.48765191435813904,
      "learning_rate": 5.096696823336387e-06,
      "loss": 0.0128,
      "step": 1508280
    },
    {
      "epoch": 2.468366031041548,
      "grad_norm": 0.26908713579177856,
      "learning_rate": 5.09663093112287e-06,
      "loss": 0.0122,
      "step": 1508300
    },
    {
      "epoch": 2.468398761480201,
      "grad_norm": 0.17597347497940063,
      "learning_rate": 5.096565038909353e-06,
      "loss": 0.0133,
      "step": 1508320
    },
    {
      "epoch": 2.468431491918855,
      "grad_norm": 0.40683838725090027,
      "learning_rate": 5.096499146695836e-06,
      "loss": 0.0215,
      "step": 1508340
    },
    {
      "epoch": 2.468464222357508,
      "grad_norm": 0.2966589033603668,
      "learning_rate": 5.096433254482318e-06,
      "loss": 0.0147,
      "step": 1508360
    },
    {
      "epoch": 2.4684969527961615,
      "grad_norm": 0.20549194514751434,
      "learning_rate": 5.096367362268802e-06,
      "loss": 0.0136,
      "step": 1508380
    },
    {
      "epoch": 2.4685296832348147,
      "grad_norm": 0.2908131182193756,
      "learning_rate": 5.0963014700552836e-06,
      "loss": 0.0097,
      "step": 1508400
    },
    {
      "epoch": 2.4685624136734683,
      "grad_norm": 0.7322564125061035,
      "learning_rate": 5.096235577841767e-06,
      "loss": 0.0178,
      "step": 1508420
    },
    {
      "epoch": 2.4685951441121214,
      "grad_norm": 0.6021444201469421,
      "learning_rate": 5.096169685628249e-06,
      "loss": 0.0105,
      "step": 1508440
    },
    {
      "epoch": 2.4686278745507746,
      "grad_norm": 0.1985745131969452,
      "learning_rate": 5.096103793414733e-06,
      "loss": 0.0168,
      "step": 1508460
    },
    {
      "epoch": 2.468660604989428,
      "grad_norm": 0.2033253014087677,
      "learning_rate": 5.096037901201215e-06,
      "loss": 0.0236,
      "step": 1508480
    },
    {
      "epoch": 2.4686933354280813,
      "grad_norm": 0.20852801203727722,
      "learning_rate": 5.095972008987698e-06,
      "loss": 0.0184,
      "step": 1508500
    },
    {
      "epoch": 2.468726065866735,
      "grad_norm": 0.2536733150482178,
      "learning_rate": 5.095906116774181e-06,
      "loss": 0.0155,
      "step": 1508520
    },
    {
      "epoch": 2.468758796305388,
      "grad_norm": 0.7777289748191833,
      "learning_rate": 5.0958402245606644e-06,
      "loss": 0.0145,
      "step": 1508540
    },
    {
      "epoch": 2.4687915267440412,
      "grad_norm": 0.08574939519166946,
      "learning_rate": 5.095774332347146e-06,
      "loss": 0.0214,
      "step": 1508560
    },
    {
      "epoch": 2.468824257182695,
      "grad_norm": 0.39499804377555847,
      "learning_rate": 5.09570844013363e-06,
      "loss": 0.0114,
      "step": 1508580
    },
    {
      "epoch": 2.468856987621348,
      "grad_norm": 0.13895301520824432,
      "learning_rate": 5.095642547920112e-06,
      "loss": 0.0145,
      "step": 1508600
    },
    {
      "epoch": 2.4688897180600016,
      "grad_norm": 0.7662295699119568,
      "learning_rate": 5.095576655706595e-06,
      "loss": 0.0098,
      "step": 1508620
    },
    {
      "epoch": 2.4689224484986547,
      "grad_norm": 0.2345755249261856,
      "learning_rate": 5.095510763493079e-06,
      "loss": 0.0135,
      "step": 1508640
    },
    {
      "epoch": 2.468955178937308,
      "grad_norm": 0.3318236470222473,
      "learning_rate": 5.095444871279561e-06,
      "loss": 0.0127,
      "step": 1508660
    },
    {
      "epoch": 2.4689879093759615,
      "grad_norm": 2.6026318073272705,
      "learning_rate": 5.0953789790660445e-06,
      "loss": 0.0144,
      "step": 1508680
    },
    {
      "epoch": 2.4690206398146146,
      "grad_norm": 0.4000870883464813,
      "learning_rate": 5.095313086852527e-06,
      "loss": 0.0128,
      "step": 1508700
    },
    {
      "epoch": 2.469053370253268,
      "grad_norm": 0.5399350523948669,
      "learning_rate": 5.09524719463901e-06,
      "loss": 0.015,
      "step": 1508720
    },
    {
      "epoch": 2.4690861006919214,
      "grad_norm": 0.2123505175113678,
      "learning_rate": 5.095181302425493e-06,
      "loss": 0.0226,
      "step": 1508740
    },
    {
      "epoch": 2.469118831130575,
      "grad_norm": 0.4097880721092224,
      "learning_rate": 5.095115410211976e-06,
      "loss": 0.011,
      "step": 1508760
    },
    {
      "epoch": 2.469151561569228,
      "grad_norm": 0.2851158082485199,
      "learning_rate": 5.095049517998458e-06,
      "loss": 0.0112,
      "step": 1508780
    },
    {
      "epoch": 2.4691842920078813,
      "grad_norm": 0.12223833799362183,
      "learning_rate": 5.094983625784942e-06,
      "loss": 0.0118,
      "step": 1508800
    },
    {
      "epoch": 2.469217022446535,
      "grad_norm": 0.3905484974384308,
      "learning_rate": 5.094917733571424e-06,
      "loss": 0.0104,
      "step": 1508820
    },
    {
      "epoch": 2.469249752885188,
      "grad_norm": 0.4079151749610901,
      "learning_rate": 5.094851841357907e-06,
      "loss": 0.014,
      "step": 1508840
    },
    {
      "epoch": 2.4692824833238416,
      "grad_norm": 0.3176047205924988,
      "learning_rate": 5.09478594914439e-06,
      "loss": 0.0148,
      "step": 1508860
    },
    {
      "epoch": 2.4693152137624947,
      "grad_norm": 0.3392742872238159,
      "learning_rate": 5.094720056930873e-06,
      "loss": 0.0158,
      "step": 1508880
    },
    {
      "epoch": 2.4693479442011483,
      "grad_norm": 1.3240966796875,
      "learning_rate": 5.0946541647173555e-06,
      "loss": 0.0166,
      "step": 1508900
    },
    {
      "epoch": 2.4693806746398015,
      "grad_norm": 0.2667126953601837,
      "learning_rate": 5.094588272503839e-06,
      "loss": 0.0178,
      "step": 1508920
    },
    {
      "epoch": 2.4694134050784546,
      "grad_norm": 0.1394965499639511,
      "learning_rate": 5.094522380290321e-06,
      "loss": 0.0159,
      "step": 1508940
    },
    {
      "epoch": 2.4694461355171082,
      "grad_norm": 0.2812967598438263,
      "learning_rate": 5.0944564880768045e-06,
      "loss": 0.0157,
      "step": 1508960
    },
    {
      "epoch": 2.4694788659557614,
      "grad_norm": 0.2748018503189087,
      "learning_rate": 5.094390595863288e-06,
      "loss": 0.0143,
      "step": 1508980
    },
    {
      "epoch": 2.469511596394415,
      "grad_norm": 0.4757111966609955,
      "learning_rate": 5.09432470364977e-06,
      "loss": 0.0111,
      "step": 1509000
    },
    {
      "epoch": 2.469544326833068,
      "grad_norm": 0.7205327153205872,
      "learning_rate": 5.094258811436254e-06,
      "loss": 0.0141,
      "step": 1509020
    },
    {
      "epoch": 2.4695770572717217,
      "grad_norm": 0.1012086570262909,
      "learning_rate": 5.0941929192227355e-06,
      "loss": 0.0136,
      "step": 1509040
    },
    {
      "epoch": 2.469609787710375,
      "grad_norm": 0.2615721523761749,
      "learning_rate": 5.094127027009219e-06,
      "loss": 0.0147,
      "step": 1509060
    },
    {
      "epoch": 2.469642518149028,
      "grad_norm": 0.4551042914390564,
      "learning_rate": 5.094061134795702e-06,
      "loss": 0.0098,
      "step": 1509080
    },
    {
      "epoch": 2.4696752485876816,
      "grad_norm": 0.11502508074045181,
      "learning_rate": 5.0939952425821845e-06,
      "loss": 0.0208,
      "step": 1509100
    },
    {
      "epoch": 2.4697079790263348,
      "grad_norm": 0.3060586750507355,
      "learning_rate": 5.093929350368667e-06,
      "loss": 0.011,
      "step": 1509120
    },
    {
      "epoch": 2.4697407094649884,
      "grad_norm": 0.39020952582359314,
      "learning_rate": 5.093863458155151e-06,
      "loss": 0.0122,
      "step": 1509140
    },
    {
      "epoch": 2.4697734399036415,
      "grad_norm": 0.1733381152153015,
      "learning_rate": 5.093797565941633e-06,
      "loss": 0.0123,
      "step": 1509160
    },
    {
      "epoch": 2.469806170342295,
      "grad_norm": 0.21847473084926605,
      "learning_rate": 5.093731673728116e-06,
      "loss": 0.0088,
      "step": 1509180
    },
    {
      "epoch": 2.4698389007809483,
      "grad_norm": 0.2405649721622467,
      "learning_rate": 5.093665781514598e-06,
      "loss": 0.0155,
      "step": 1509200
    },
    {
      "epoch": 2.4698716312196014,
      "grad_norm": 0.179507315158844,
      "learning_rate": 5.093599889301082e-06,
      "loss": 0.0126,
      "step": 1509220
    },
    {
      "epoch": 2.469904361658255,
      "grad_norm": 0.12485463172197342,
      "learning_rate": 5.093533997087564e-06,
      "loss": 0.0132,
      "step": 1509240
    },
    {
      "epoch": 2.469937092096908,
      "grad_norm": 0.24860712885856628,
      "learning_rate": 5.093468104874047e-06,
      "loss": 0.0106,
      "step": 1509260
    },
    {
      "epoch": 2.4699698225355617,
      "grad_norm": 0.5953415036201477,
      "learning_rate": 5.09340221266053e-06,
      "loss": 0.0217,
      "step": 1509280
    },
    {
      "epoch": 2.470002552974215,
      "grad_norm": 0.21529190242290497,
      "learning_rate": 5.093336320447013e-06,
      "loss": 0.0143,
      "step": 1509300
    },
    {
      "epoch": 2.4700352834128685,
      "grad_norm": 0.2477804571390152,
      "learning_rate": 5.093270428233496e-06,
      "loss": 0.0163,
      "step": 1509320
    },
    {
      "epoch": 2.4700680138515216,
      "grad_norm": 0.7385841608047485,
      "learning_rate": 5.093204536019979e-06,
      "loss": 0.0097,
      "step": 1509340
    },
    {
      "epoch": 2.470100744290175,
      "grad_norm": 0.39527544379234314,
      "learning_rate": 5.093138643806463e-06,
      "loss": 0.0152,
      "step": 1509360
    },
    {
      "epoch": 2.4701334747288284,
      "grad_norm": 0.16296078264713287,
      "learning_rate": 5.093072751592945e-06,
      "loss": 0.0145,
      "step": 1509380
    },
    {
      "epoch": 2.4701662051674815,
      "grad_norm": 0.8948462605476379,
      "learning_rate": 5.093006859379428e-06,
      "loss": 0.0122,
      "step": 1509400
    },
    {
      "epoch": 2.470198935606135,
      "grad_norm": 6.693898677825928,
      "learning_rate": 5.09294096716591e-06,
      "loss": 0.0153,
      "step": 1509420
    },
    {
      "epoch": 2.4702316660447883,
      "grad_norm": 0.4385032653808594,
      "learning_rate": 5.092875074952394e-06,
      "loss": 0.0137,
      "step": 1509440
    },
    {
      "epoch": 2.470264396483442,
      "grad_norm": 0.2798619866371155,
      "learning_rate": 5.0928091827388756e-06,
      "loss": 0.0165,
      "step": 1509460
    },
    {
      "epoch": 2.470297126922095,
      "grad_norm": 0.1144915223121643,
      "learning_rate": 5.092743290525359e-06,
      "loss": 0.0098,
      "step": 1509480
    },
    {
      "epoch": 2.470329857360748,
      "grad_norm": 0.6185877919197083,
      "learning_rate": 5.092677398311842e-06,
      "loss": 0.0177,
      "step": 1509500
    },
    {
      "epoch": 2.4703625877994018,
      "grad_norm": 0.22094261646270752,
      "learning_rate": 5.092611506098325e-06,
      "loss": 0.0104,
      "step": 1509520
    },
    {
      "epoch": 2.470395318238055,
      "grad_norm": 0.738461971282959,
      "learning_rate": 5.092545613884807e-06,
      "loss": 0.0144,
      "step": 1509540
    },
    {
      "epoch": 2.4704280486767085,
      "grad_norm": 0.47663643956184387,
      "learning_rate": 5.092479721671291e-06,
      "loss": 0.0127,
      "step": 1509560
    },
    {
      "epoch": 2.4704607791153617,
      "grad_norm": 0.48568663001060486,
      "learning_rate": 5.092413829457773e-06,
      "loss": 0.012,
      "step": 1509580
    },
    {
      "epoch": 2.4704935095540153,
      "grad_norm": 0.3324776887893677,
      "learning_rate": 5.0923479372442564e-06,
      "loss": 0.0118,
      "step": 1509600
    },
    {
      "epoch": 2.4705262399926684,
      "grad_norm": 0.44818782806396484,
      "learning_rate": 5.092282045030738e-06,
      "loss": 0.0111,
      "step": 1509620
    },
    {
      "epoch": 2.4705589704313216,
      "grad_norm": 0.3136425316333771,
      "learning_rate": 5.092216152817222e-06,
      "loss": 0.0164,
      "step": 1509640
    },
    {
      "epoch": 2.470591700869975,
      "grad_norm": 0.11988130211830139,
      "learning_rate": 5.0921502606037055e-06,
      "loss": 0.0129,
      "step": 1509660
    },
    {
      "epoch": 2.4706244313086283,
      "grad_norm": 0.08861149847507477,
      "learning_rate": 5.092084368390187e-06,
      "loss": 0.0236,
      "step": 1509680
    },
    {
      "epoch": 2.470657161747282,
      "grad_norm": 0.5096815228462219,
      "learning_rate": 5.092018476176671e-06,
      "loss": 0.012,
      "step": 1509700
    },
    {
      "epoch": 2.470689892185935,
      "grad_norm": 0.1350584328174591,
      "learning_rate": 5.091952583963154e-06,
      "loss": 0.0097,
      "step": 1509720
    },
    {
      "epoch": 2.4707226226245886,
      "grad_norm": 0.20791590213775635,
      "learning_rate": 5.0918866917496365e-06,
      "loss": 0.0133,
      "step": 1509740
    },
    {
      "epoch": 2.470755353063242,
      "grad_norm": 0.28284016251564026,
      "learning_rate": 5.091820799536119e-06,
      "loss": 0.0204,
      "step": 1509760
    },
    {
      "epoch": 2.470788083501895,
      "grad_norm": 0.13285958766937256,
      "learning_rate": 5.091754907322603e-06,
      "loss": 0.0188,
      "step": 1509780
    },
    {
      "epoch": 2.4708208139405485,
      "grad_norm": 0.5411964058876038,
      "learning_rate": 5.091689015109085e-06,
      "loss": 0.0167,
      "step": 1509800
    },
    {
      "epoch": 2.4708535443792017,
      "grad_norm": 0.3485545516014099,
      "learning_rate": 5.091623122895568e-06,
      "loss": 0.0232,
      "step": 1509820
    },
    {
      "epoch": 2.4708862748178553,
      "grad_norm": 0.41079476475715637,
      "learning_rate": 5.09155723068205e-06,
      "loss": 0.0198,
      "step": 1509840
    },
    {
      "epoch": 2.4709190052565084,
      "grad_norm": 0.31325092911720276,
      "learning_rate": 5.091491338468534e-06,
      "loss": 0.0208,
      "step": 1509860
    },
    {
      "epoch": 2.470951735695162,
      "grad_norm": 0.2938815951347351,
      "learning_rate": 5.0914254462550165e-06,
      "loss": 0.0204,
      "step": 1509880
    },
    {
      "epoch": 2.470984466133815,
      "grad_norm": 0.42386969923973083,
      "learning_rate": 5.091359554041499e-06,
      "loss": 0.0128,
      "step": 1509900
    },
    {
      "epoch": 2.4710171965724683,
      "grad_norm": 0.32324016094207764,
      "learning_rate": 5.091293661827982e-06,
      "loss": 0.0109,
      "step": 1509920
    },
    {
      "epoch": 2.471049927011122,
      "grad_norm": 0.4211689531803131,
      "learning_rate": 5.0912277696144655e-06,
      "loss": 0.0139,
      "step": 1509940
    },
    {
      "epoch": 2.471082657449775,
      "grad_norm": 1.3585273027420044,
      "learning_rate": 5.0911618774009474e-06,
      "loss": 0.0168,
      "step": 1509960
    },
    {
      "epoch": 2.4711153878884287,
      "grad_norm": 0.41715386509895325,
      "learning_rate": 5.091095985187431e-06,
      "loss": 0.0128,
      "step": 1509980
    },
    {
      "epoch": 2.471148118327082,
      "grad_norm": 0.4457697868347168,
      "learning_rate": 5.091030092973913e-06,
      "loss": 0.0158,
      "step": 1510000
    },
    {
      "epoch": 2.4711808487657354,
      "grad_norm": 0.27118414640426636,
      "learning_rate": 5.0909642007603965e-06,
      "loss": 0.016,
      "step": 1510020
    },
    {
      "epoch": 2.4712135792043886,
      "grad_norm": 0.2016092985868454,
      "learning_rate": 5.09089830854688e-06,
      "loss": 0.021,
      "step": 1510040
    },
    {
      "epoch": 2.4712463096430417,
      "grad_norm": 0.19280850887298584,
      "learning_rate": 5.090832416333362e-06,
      "loss": 0.014,
      "step": 1510060
    },
    {
      "epoch": 2.4712790400816953,
      "grad_norm": 0.37023207545280457,
      "learning_rate": 5.0907665241198456e-06,
      "loss": 0.0201,
      "step": 1510080
    },
    {
      "epoch": 2.4713117705203484,
      "grad_norm": 0.15073706209659576,
      "learning_rate": 5.090700631906328e-06,
      "loss": 0.0143,
      "step": 1510100
    },
    {
      "epoch": 2.471344500959002,
      "grad_norm": 0.6121689081192017,
      "learning_rate": 5.090634739692811e-06,
      "loss": 0.0126,
      "step": 1510120
    },
    {
      "epoch": 2.471377231397655,
      "grad_norm": 0.64711993932724,
      "learning_rate": 5.090568847479294e-06,
      "loss": 0.0102,
      "step": 1510140
    },
    {
      "epoch": 2.4714099618363083,
      "grad_norm": 0.5702328085899353,
      "learning_rate": 5.090502955265777e-06,
      "loss": 0.0179,
      "step": 1510160
    },
    {
      "epoch": 2.471442692274962,
      "grad_norm": 0.3803505599498749,
      "learning_rate": 5.090437063052259e-06,
      "loss": 0.0143,
      "step": 1510180
    },
    {
      "epoch": 2.471475422713615,
      "grad_norm": 0.19257818162441254,
      "learning_rate": 5.090371170838743e-06,
      "loss": 0.0176,
      "step": 1510200
    },
    {
      "epoch": 2.4715081531522687,
      "grad_norm": 0.46698278188705444,
      "learning_rate": 5.090305278625225e-06,
      "loss": 0.0182,
      "step": 1510220
    },
    {
      "epoch": 2.471540883590922,
      "grad_norm": 0.46381789445877075,
      "learning_rate": 5.090239386411708e-06,
      "loss": 0.0192,
      "step": 1510240
    },
    {
      "epoch": 2.471573614029575,
      "grad_norm": 0.5162516832351685,
      "learning_rate": 5.09017349419819e-06,
      "loss": 0.0158,
      "step": 1510260
    },
    {
      "epoch": 2.4716063444682286,
      "grad_norm": 0.34947794675827026,
      "learning_rate": 5.090107601984674e-06,
      "loss": 0.0167,
      "step": 1510280
    },
    {
      "epoch": 2.4716390749068817,
      "grad_norm": 0.15299589931964874,
      "learning_rate": 5.0900417097711566e-06,
      "loss": 0.0149,
      "step": 1510300
    },
    {
      "epoch": 2.4716718053455353,
      "grad_norm": 0.21106968820095062,
      "learning_rate": 5.089975817557639e-06,
      "loss": 0.0163,
      "step": 1510320
    },
    {
      "epoch": 2.4717045357841885,
      "grad_norm": 3.8843111991882324,
      "learning_rate": 5.089909925344122e-06,
      "loss": 0.0187,
      "step": 1510340
    },
    {
      "epoch": 2.471737266222842,
      "grad_norm": 0.6375740766525269,
      "learning_rate": 5.089844033130606e-06,
      "loss": 0.0178,
      "step": 1510360
    },
    {
      "epoch": 2.471769996661495,
      "grad_norm": 0.21821361780166626,
      "learning_rate": 5.089778140917088e-06,
      "loss": 0.0145,
      "step": 1510380
    },
    {
      "epoch": 2.4718027271001484,
      "grad_norm": 0.1477033644914627,
      "learning_rate": 5.089712248703571e-06,
      "loss": 0.0237,
      "step": 1510400
    },
    {
      "epoch": 2.471835457538802,
      "grad_norm": 0.17783640325069427,
      "learning_rate": 5.089646356490055e-06,
      "loss": 0.0116,
      "step": 1510420
    },
    {
      "epoch": 2.471868187977455,
      "grad_norm": 0.23818644881248474,
      "learning_rate": 5.089580464276537e-06,
      "loss": 0.0104,
      "step": 1510440
    },
    {
      "epoch": 2.4719009184161087,
      "grad_norm": 0.8276405930519104,
      "learning_rate": 5.08951457206302e-06,
      "loss": 0.0138,
      "step": 1510460
    },
    {
      "epoch": 2.471933648854762,
      "grad_norm": 0.21064814925193787,
      "learning_rate": 5.089448679849502e-06,
      "loss": 0.0125,
      "step": 1510480
    },
    {
      "epoch": 2.4719663792934155,
      "grad_norm": 0.3530780076980591,
      "learning_rate": 5.089382787635986e-06,
      "loss": 0.0132,
      "step": 1510500
    },
    {
      "epoch": 2.4719991097320686,
      "grad_norm": 1.1612333059310913,
      "learning_rate": 5.089316895422468e-06,
      "loss": 0.017,
      "step": 1510520
    },
    {
      "epoch": 2.4720318401707218,
      "grad_norm": 0.17883756756782532,
      "learning_rate": 5.089251003208951e-06,
      "loss": 0.0116,
      "step": 1510540
    },
    {
      "epoch": 2.4720645706093753,
      "grad_norm": 0.046082884073257446,
      "learning_rate": 5.089185110995434e-06,
      "loss": 0.0176,
      "step": 1510560
    },
    {
      "epoch": 2.4720973010480285,
      "grad_norm": 0.29213422536849976,
      "learning_rate": 5.0891192187819174e-06,
      "loss": 0.0135,
      "step": 1510580
    },
    {
      "epoch": 2.472130031486682,
      "grad_norm": 0.31684020161628723,
      "learning_rate": 5.089053326568399e-06,
      "loss": 0.0088,
      "step": 1510600
    },
    {
      "epoch": 2.4721627619253352,
      "grad_norm": 0.14308324456214905,
      "learning_rate": 5.088987434354883e-06,
      "loss": 0.0171,
      "step": 1510620
    },
    {
      "epoch": 2.472195492363989,
      "grad_norm": 0.5621253848075867,
      "learning_rate": 5.088921542141365e-06,
      "loss": 0.0222,
      "step": 1510640
    },
    {
      "epoch": 2.472228222802642,
      "grad_norm": 0.10108524560928345,
      "learning_rate": 5.088855649927848e-06,
      "loss": 0.0127,
      "step": 1510660
    },
    {
      "epoch": 2.472260953241295,
      "grad_norm": 0.3740013539791107,
      "learning_rate": 5.088789757714331e-06,
      "loss": 0.0156,
      "step": 1510680
    },
    {
      "epoch": 2.4722936836799487,
      "grad_norm": 0.47489434480667114,
      "learning_rate": 5.088723865500814e-06,
      "loss": 0.0131,
      "step": 1510700
    },
    {
      "epoch": 2.472326414118602,
      "grad_norm": 0.10000541806221008,
      "learning_rate": 5.0886579732872975e-06,
      "loss": 0.0154,
      "step": 1510720
    },
    {
      "epoch": 2.4723591445572555,
      "grad_norm": 0.42973238229751587,
      "learning_rate": 5.08859208107378e-06,
      "loss": 0.0109,
      "step": 1510740
    },
    {
      "epoch": 2.4723918749959086,
      "grad_norm": 0.40463075041770935,
      "learning_rate": 5.088526188860263e-06,
      "loss": 0.0149,
      "step": 1510760
    },
    {
      "epoch": 2.472424605434562,
      "grad_norm": 0.3931387662887573,
      "learning_rate": 5.088460296646746e-06,
      "loss": 0.0202,
      "step": 1510780
    },
    {
      "epoch": 2.4724573358732154,
      "grad_norm": 0.4772990345954895,
      "learning_rate": 5.088394404433229e-06,
      "loss": 0.0117,
      "step": 1510800
    },
    {
      "epoch": 2.4724900663118685,
      "grad_norm": 0.150785431265831,
      "learning_rate": 5.088328512219711e-06,
      "loss": 0.009,
      "step": 1510820
    },
    {
      "epoch": 2.472522796750522,
      "grad_norm": 0.22505030035972595,
      "learning_rate": 5.088262620006195e-06,
      "loss": 0.0163,
      "step": 1510840
    },
    {
      "epoch": 2.4725555271891753,
      "grad_norm": 0.4028145670890808,
      "learning_rate": 5.088196727792677e-06,
      "loss": 0.0125,
      "step": 1510860
    },
    {
      "epoch": 2.472588257627829,
      "grad_norm": 1.2776942253112793,
      "learning_rate": 5.08813083557916e-06,
      "loss": 0.0151,
      "step": 1510880
    },
    {
      "epoch": 2.472620988066482,
      "grad_norm": 0.3826289176940918,
      "learning_rate": 5.088064943365643e-06,
      "loss": 0.0209,
      "step": 1510900
    },
    {
      "epoch": 2.4726537185051356,
      "grad_norm": 0.3444773852825165,
      "learning_rate": 5.087999051152126e-06,
      "loss": 0.0135,
      "step": 1510920
    },
    {
      "epoch": 2.4726864489437888,
      "grad_norm": 0.3006333112716675,
      "learning_rate": 5.0879331589386085e-06,
      "loss": 0.0127,
      "step": 1510940
    },
    {
      "epoch": 2.472719179382442,
      "grad_norm": 0.5975637435913086,
      "learning_rate": 5.087867266725092e-06,
      "loss": 0.0147,
      "step": 1510960
    },
    {
      "epoch": 2.4727519098210955,
      "grad_norm": 0.7208254933357239,
      "learning_rate": 5.087801374511574e-06,
      "loss": 0.0157,
      "step": 1510980
    },
    {
      "epoch": 2.4727846402597486,
      "grad_norm": 0.28254005312919617,
      "learning_rate": 5.0877354822980575e-06,
      "loss": 0.0136,
      "step": 1511000
    },
    {
      "epoch": 2.4728173706984022,
      "grad_norm": 0.4922013580799103,
      "learning_rate": 5.087669590084539e-06,
      "loss": 0.0185,
      "step": 1511020
    },
    {
      "epoch": 2.4728501011370554,
      "grad_norm": 0.23611445724964142,
      "learning_rate": 5.087603697871023e-06,
      "loss": 0.0191,
      "step": 1511040
    },
    {
      "epoch": 2.472882831575709,
      "grad_norm": 0.2599816918373108,
      "learning_rate": 5.087537805657506e-06,
      "loss": 0.0118,
      "step": 1511060
    },
    {
      "epoch": 2.472915562014362,
      "grad_norm": 2.488757371902466,
      "learning_rate": 5.0874719134439885e-06,
      "loss": 0.0201,
      "step": 1511080
    },
    {
      "epoch": 2.4729482924530153,
      "grad_norm": 0.4115169048309326,
      "learning_rate": 5.087406021230472e-06,
      "loss": 0.0134,
      "step": 1511100
    },
    {
      "epoch": 2.472981022891669,
      "grad_norm": 0.7907774448394775,
      "learning_rate": 5.087340129016955e-06,
      "loss": 0.0138,
      "step": 1511120
    },
    {
      "epoch": 2.473013753330322,
      "grad_norm": 0.17803530395030975,
      "learning_rate": 5.0872742368034376e-06,
      "loss": 0.018,
      "step": 1511140
    },
    {
      "epoch": 2.4730464837689756,
      "grad_norm": 0.21193155646324158,
      "learning_rate": 5.08720834458992e-06,
      "loss": 0.0092,
      "step": 1511160
    },
    {
      "epoch": 2.4730792142076288,
      "grad_norm": 0.2461024671792984,
      "learning_rate": 5.087142452376404e-06,
      "loss": 0.0193,
      "step": 1511180
    },
    {
      "epoch": 2.4731119446462824,
      "grad_norm": 0.08817620575428009,
      "learning_rate": 5.087076560162886e-06,
      "loss": 0.0127,
      "step": 1511200
    },
    {
      "epoch": 2.4731446750849355,
      "grad_norm": 0.4933488667011261,
      "learning_rate": 5.087010667949369e-06,
      "loss": 0.016,
      "step": 1511220
    },
    {
      "epoch": 2.4731774055235887,
      "grad_norm": 0.18142671883106232,
      "learning_rate": 5.086944775735851e-06,
      "loss": 0.0135,
      "step": 1511240
    },
    {
      "epoch": 2.4732101359622423,
      "grad_norm": 0.11839339882135391,
      "learning_rate": 5.086878883522335e-06,
      "loss": 0.0132,
      "step": 1511260
    },
    {
      "epoch": 2.4732428664008954,
      "grad_norm": 0.3227193355560303,
      "learning_rate": 5.086812991308817e-06,
      "loss": 0.0219,
      "step": 1511280
    },
    {
      "epoch": 2.473275596839549,
      "grad_norm": 0.23633427917957306,
      "learning_rate": 5.0867470990953e-06,
      "loss": 0.0105,
      "step": 1511300
    },
    {
      "epoch": 2.473308327278202,
      "grad_norm": 0.2773800492286682,
      "learning_rate": 5.086681206881783e-06,
      "loss": 0.012,
      "step": 1511320
    },
    {
      "epoch": 2.4733410577168558,
      "grad_norm": 0.6310173869132996,
      "learning_rate": 5.086615314668266e-06,
      "loss": 0.0126,
      "step": 1511340
    },
    {
      "epoch": 2.473373788155509,
      "grad_norm": 0.6137539744377136,
      "learning_rate": 5.0865494224547485e-06,
      "loss": 0.0145,
      "step": 1511360
    },
    {
      "epoch": 2.473406518594162,
      "grad_norm": 0.14838092029094696,
      "learning_rate": 5.086483530241232e-06,
      "loss": 0.0198,
      "step": 1511380
    },
    {
      "epoch": 2.4734392490328156,
      "grad_norm": 0.12358037382364273,
      "learning_rate": 5.086417638027714e-06,
      "loss": 0.017,
      "step": 1511400
    },
    {
      "epoch": 2.473471979471469,
      "grad_norm": 0.3517470061779022,
      "learning_rate": 5.086351745814198e-06,
      "loss": 0.022,
      "step": 1511420
    },
    {
      "epoch": 2.4735047099101224,
      "grad_norm": 0.08956243097782135,
      "learning_rate": 5.086285853600681e-06,
      "loss": 0.0158,
      "step": 1511440
    },
    {
      "epoch": 2.4735374403487755,
      "grad_norm": 0.2438858300447464,
      "learning_rate": 5.086219961387163e-06,
      "loss": 0.0099,
      "step": 1511460
    },
    {
      "epoch": 2.473570170787429,
      "grad_norm": 0.2081194818019867,
      "learning_rate": 5.086154069173647e-06,
      "loss": 0.0166,
      "step": 1511480
    },
    {
      "epoch": 2.4736029012260823,
      "grad_norm": 0.36797332763671875,
      "learning_rate": 5.0860881769601286e-06,
      "loss": 0.0132,
      "step": 1511500
    },
    {
      "epoch": 2.4736356316647354,
      "grad_norm": 0.18848572671413422,
      "learning_rate": 5.086022284746612e-06,
      "loss": 0.0193,
      "step": 1511520
    },
    {
      "epoch": 2.473668362103389,
      "grad_norm": 0.25583013892173767,
      "learning_rate": 5.085956392533095e-06,
      "loss": 0.0183,
      "step": 1511540
    },
    {
      "epoch": 2.473701092542042,
      "grad_norm": 0.5068121552467346,
      "learning_rate": 5.085890500319578e-06,
      "loss": 0.0135,
      "step": 1511560
    },
    {
      "epoch": 2.4737338229806958,
      "grad_norm": 0.9241947531700134,
      "learning_rate": 5.08582460810606e-06,
      "loss": 0.0171,
      "step": 1511580
    },
    {
      "epoch": 2.473766553419349,
      "grad_norm": 0.6804143190383911,
      "learning_rate": 5.085758715892544e-06,
      "loss": 0.0145,
      "step": 1511600
    },
    {
      "epoch": 2.473799283858002,
      "grad_norm": 0.5592646598815918,
      "learning_rate": 5.085692823679026e-06,
      "loss": 0.0178,
      "step": 1511620
    },
    {
      "epoch": 2.4738320142966557,
      "grad_norm": 0.4818096458911896,
      "learning_rate": 5.0856269314655094e-06,
      "loss": 0.0176,
      "step": 1511640
    },
    {
      "epoch": 2.473864744735309,
      "grad_norm": 0.25316736102104187,
      "learning_rate": 5.085561039251991e-06,
      "loss": 0.0147,
      "step": 1511660
    },
    {
      "epoch": 2.4738974751739624,
      "grad_norm": 0.6691174507141113,
      "learning_rate": 5.085495147038475e-06,
      "loss": 0.0132,
      "step": 1511680
    },
    {
      "epoch": 2.4739302056126156,
      "grad_norm": 0.5634639263153076,
      "learning_rate": 5.085429254824958e-06,
      "loss": 0.014,
      "step": 1511700
    },
    {
      "epoch": 2.4739629360512687,
      "grad_norm": 0.2646699547767639,
      "learning_rate": 5.08536336261144e-06,
      "loss": 0.0119,
      "step": 1511720
    },
    {
      "epoch": 2.4739956664899223,
      "grad_norm": 0.08358512818813324,
      "learning_rate": 5.085297470397923e-06,
      "loss": 0.012,
      "step": 1511740
    },
    {
      "epoch": 2.4740283969285755,
      "grad_norm": 0.3560546338558197,
      "learning_rate": 5.085231578184407e-06,
      "loss": 0.0183,
      "step": 1511760
    },
    {
      "epoch": 2.474061127367229,
      "grad_norm": 0.3473763167858124,
      "learning_rate": 5.0851656859708895e-06,
      "loss": 0.0175,
      "step": 1511780
    },
    {
      "epoch": 2.474093857805882,
      "grad_norm": 0.9153735041618347,
      "learning_rate": 5.085099793757372e-06,
      "loss": 0.0234,
      "step": 1511800
    },
    {
      "epoch": 2.474126588244536,
      "grad_norm": 0.2340044528245926,
      "learning_rate": 5.085033901543856e-06,
      "loss": 0.0155,
      "step": 1511820
    },
    {
      "epoch": 2.474159318683189,
      "grad_norm": 0.35939377546310425,
      "learning_rate": 5.084968009330338e-06,
      "loss": 0.0124,
      "step": 1511840
    },
    {
      "epoch": 2.474192049121842,
      "grad_norm": 2.5717875957489014,
      "learning_rate": 5.084902117116821e-06,
      "loss": 0.0157,
      "step": 1511860
    },
    {
      "epoch": 2.4742247795604957,
      "grad_norm": 1.1708325147628784,
      "learning_rate": 5.084836224903303e-06,
      "loss": 0.0207,
      "step": 1511880
    },
    {
      "epoch": 2.474257509999149,
      "grad_norm": 0.18733510375022888,
      "learning_rate": 5.084770332689787e-06,
      "loss": 0.0231,
      "step": 1511900
    },
    {
      "epoch": 2.4742902404378024,
      "grad_norm": 0.22127440571784973,
      "learning_rate": 5.0847044404762695e-06,
      "loss": 0.0147,
      "step": 1511920
    },
    {
      "epoch": 2.4743229708764556,
      "grad_norm": 0.09438126534223557,
      "learning_rate": 5.084638548262752e-06,
      "loss": 0.0112,
      "step": 1511940
    },
    {
      "epoch": 2.474355701315109,
      "grad_norm": 0.27341023087501526,
      "learning_rate": 5.084572656049235e-06,
      "loss": 0.008,
      "step": 1511960
    },
    {
      "epoch": 2.4743884317537623,
      "grad_norm": 0.4758070707321167,
      "learning_rate": 5.0845067638357185e-06,
      "loss": 0.0148,
      "step": 1511980
    },
    {
      "epoch": 2.4744211621924155,
      "grad_norm": 0.901038646697998,
      "learning_rate": 5.0844408716222004e-06,
      "loss": 0.0164,
      "step": 1512000
    },
    {
      "epoch": 2.474453892631069,
      "grad_norm": 0.2318124771118164,
      "learning_rate": 5.084374979408684e-06,
      "loss": 0.0155,
      "step": 1512020
    },
    {
      "epoch": 2.4744866230697222,
      "grad_norm": 0.12409397959709167,
      "learning_rate": 5.084309087195166e-06,
      "loss": 0.0083,
      "step": 1512040
    },
    {
      "epoch": 2.474519353508376,
      "grad_norm": 0.1616477519273758,
      "learning_rate": 5.0842431949816495e-06,
      "loss": 0.0127,
      "step": 1512060
    },
    {
      "epoch": 2.474552083947029,
      "grad_norm": 0.36433467268943787,
      "learning_rate": 5.084177302768131e-06,
      "loss": 0.0162,
      "step": 1512080
    },
    {
      "epoch": 2.4745848143856826,
      "grad_norm": 0.45292016863822937,
      "learning_rate": 5.084111410554615e-06,
      "loss": 0.0114,
      "step": 1512100
    },
    {
      "epoch": 2.4746175448243357,
      "grad_norm": 0.21022838354110718,
      "learning_rate": 5.084045518341098e-06,
      "loss": 0.0128,
      "step": 1512120
    },
    {
      "epoch": 2.474650275262989,
      "grad_norm": 0.404489666223526,
      "learning_rate": 5.083979626127581e-06,
      "loss": 0.0105,
      "step": 1512140
    },
    {
      "epoch": 2.4746830057016425,
      "grad_norm": 0.5446610450744629,
      "learning_rate": 5.083913733914064e-06,
      "loss": 0.0161,
      "step": 1512160
    },
    {
      "epoch": 2.4747157361402956,
      "grad_norm": 0.22613972425460815,
      "learning_rate": 5.083847841700547e-06,
      "loss": 0.0153,
      "step": 1512180
    },
    {
      "epoch": 2.474748466578949,
      "grad_norm": 0.42172807455062866,
      "learning_rate": 5.08378194948703e-06,
      "loss": 0.0184,
      "step": 1512200
    },
    {
      "epoch": 2.4747811970176024,
      "grad_norm": 0.1619158536195755,
      "learning_rate": 5.083716057273512e-06,
      "loss": 0.0079,
      "step": 1512220
    },
    {
      "epoch": 2.474813927456256,
      "grad_norm": 0.5062246918678284,
      "learning_rate": 5.083650165059996e-06,
      "loss": 0.0197,
      "step": 1512240
    },
    {
      "epoch": 2.474846657894909,
      "grad_norm": 0.22672420740127563,
      "learning_rate": 5.083584272846478e-06,
      "loss": 0.0127,
      "step": 1512260
    },
    {
      "epoch": 2.4748793883335622,
      "grad_norm": 0.5858206152915955,
      "learning_rate": 5.083518380632961e-06,
      "loss": 0.0125,
      "step": 1512280
    },
    {
      "epoch": 2.474912118772216,
      "grad_norm": 0.34818729758262634,
      "learning_rate": 5.083452488419443e-06,
      "loss": 0.0182,
      "step": 1512300
    },
    {
      "epoch": 2.474944849210869,
      "grad_norm": 0.9538407325744629,
      "learning_rate": 5.083386596205927e-06,
      "loss": 0.0158,
      "step": 1512320
    },
    {
      "epoch": 2.4749775796495226,
      "grad_norm": 0.1490069329738617,
      "learning_rate": 5.0833207039924096e-06,
      "loss": 0.0092,
      "step": 1512340
    },
    {
      "epoch": 2.4750103100881757,
      "grad_norm": 0.23952855169773102,
      "learning_rate": 5.083254811778892e-06,
      "loss": 0.0162,
      "step": 1512360
    },
    {
      "epoch": 2.4750430405268293,
      "grad_norm": 0.09228990226984024,
      "learning_rate": 5.083188919565375e-06,
      "loss": 0.0121,
      "step": 1512380
    },
    {
      "epoch": 2.4750757709654825,
      "grad_norm": 0.14939746260643005,
      "learning_rate": 5.083123027351859e-06,
      "loss": 0.0135,
      "step": 1512400
    },
    {
      "epoch": 2.4751085014041356,
      "grad_norm": 0.4202321767807007,
      "learning_rate": 5.0830571351383405e-06,
      "loss": 0.0095,
      "step": 1512420
    },
    {
      "epoch": 2.4751412318427892,
      "grad_norm": 0.11028917133808136,
      "learning_rate": 5.082991242924824e-06,
      "loss": 0.0201,
      "step": 1512440
    },
    {
      "epoch": 2.4751739622814424,
      "grad_norm": 0.13277794420719147,
      "learning_rate": 5.082925350711306e-06,
      "loss": 0.0223,
      "step": 1512460
    },
    {
      "epoch": 2.475206692720096,
      "grad_norm": 0.21780048310756683,
      "learning_rate": 5.08285945849779e-06,
      "loss": 0.0126,
      "step": 1512480
    },
    {
      "epoch": 2.475239423158749,
      "grad_norm": 1.1292411088943481,
      "learning_rate": 5.082793566284273e-06,
      "loss": 0.0151,
      "step": 1512500
    },
    {
      "epoch": 2.4752721535974027,
      "grad_norm": 0.9448139071464539,
      "learning_rate": 5.082727674070755e-06,
      "loss": 0.0114,
      "step": 1512520
    },
    {
      "epoch": 2.475304884036056,
      "grad_norm": 0.3437078297138214,
      "learning_rate": 5.082661781857239e-06,
      "loss": 0.0173,
      "step": 1512540
    },
    {
      "epoch": 2.475337614474709,
      "grad_norm": 0.6442792415618896,
      "learning_rate": 5.082595889643721e-06,
      "loss": 0.0202,
      "step": 1512560
    },
    {
      "epoch": 2.4753703449133626,
      "grad_norm": 0.15940707921981812,
      "learning_rate": 5.082529997430204e-06,
      "loss": 0.015,
      "step": 1512580
    },
    {
      "epoch": 2.4754030753520158,
      "grad_norm": 0.6750775575637817,
      "learning_rate": 5.082464105216687e-06,
      "loss": 0.0199,
      "step": 1512600
    },
    {
      "epoch": 2.4754358057906694,
      "grad_norm": 0.5897478461265564,
      "learning_rate": 5.0823982130031705e-06,
      "loss": 0.0149,
      "step": 1512620
    },
    {
      "epoch": 2.4754685362293225,
      "grad_norm": 0.194229856133461,
      "learning_rate": 5.082332320789652e-06,
      "loss": 0.016,
      "step": 1512640
    },
    {
      "epoch": 2.475501266667976,
      "grad_norm": 0.5133619904518127,
      "learning_rate": 5.082266428576136e-06,
      "loss": 0.0128,
      "step": 1512660
    },
    {
      "epoch": 2.4755339971066292,
      "grad_norm": 0.5123594403266907,
      "learning_rate": 5.082200536362618e-06,
      "loss": 0.0164,
      "step": 1512680
    },
    {
      "epoch": 2.4755667275452824,
      "grad_norm": 0.2643571197986603,
      "learning_rate": 5.082134644149101e-06,
      "loss": 0.0118,
      "step": 1512700
    },
    {
      "epoch": 2.475599457983936,
      "grad_norm": 0.18556106090545654,
      "learning_rate": 5.082068751935584e-06,
      "loss": 0.0162,
      "step": 1512720
    },
    {
      "epoch": 2.475632188422589,
      "grad_norm": 0.6918515563011169,
      "learning_rate": 5.082002859722067e-06,
      "loss": 0.0226,
      "step": 1512740
    },
    {
      "epoch": 2.4756649188612427,
      "grad_norm": 0.11252441257238388,
      "learning_rate": 5.08193696750855e-06,
      "loss": 0.0144,
      "step": 1512760
    },
    {
      "epoch": 2.475697649299896,
      "grad_norm": 0.5736427307128906,
      "learning_rate": 5.081871075295033e-06,
      "loss": 0.0132,
      "step": 1512780
    },
    {
      "epoch": 2.4757303797385495,
      "grad_norm": 0.12230866402387619,
      "learning_rate": 5.081805183081515e-06,
      "loss": 0.0132,
      "step": 1512800
    },
    {
      "epoch": 2.4757631101772026,
      "grad_norm": 1.6541563272476196,
      "learning_rate": 5.081739290867999e-06,
      "loss": 0.0129,
      "step": 1512820
    },
    {
      "epoch": 2.475795840615856,
      "grad_norm": 1.4397389888763428,
      "learning_rate": 5.081673398654482e-06,
      "loss": 0.0193,
      "step": 1512840
    },
    {
      "epoch": 2.4758285710545094,
      "grad_norm": 0.5584819912910461,
      "learning_rate": 5.081607506440964e-06,
      "loss": 0.0208,
      "step": 1512860
    },
    {
      "epoch": 2.4758613014931625,
      "grad_norm": 0.3636571764945984,
      "learning_rate": 5.081541614227448e-06,
      "loss": 0.0108,
      "step": 1512880
    },
    {
      "epoch": 2.475894031931816,
      "grad_norm": 0.26286861300468445,
      "learning_rate": 5.08147572201393e-06,
      "loss": 0.0197,
      "step": 1512900
    },
    {
      "epoch": 2.4759267623704693,
      "grad_norm": 0.3516112267971039,
      "learning_rate": 5.081409829800413e-06,
      "loss": 0.0109,
      "step": 1512920
    },
    {
      "epoch": 2.475959492809123,
      "grad_norm": 0.6256884336471558,
      "learning_rate": 5.081343937586896e-06,
      "loss": 0.0183,
      "step": 1512940
    },
    {
      "epoch": 2.475992223247776,
      "grad_norm": 0.219292551279068,
      "learning_rate": 5.081278045373379e-06,
      "loss": 0.0129,
      "step": 1512960
    },
    {
      "epoch": 2.476024953686429,
      "grad_norm": 0.282907634973526,
      "learning_rate": 5.0812121531598615e-06,
      "loss": 0.0139,
      "step": 1512980
    },
    {
      "epoch": 2.4760576841250828,
      "grad_norm": 0.2015342265367508,
      "learning_rate": 5.081146260946345e-06,
      "loss": 0.0137,
      "step": 1513000
    },
    {
      "epoch": 2.476090414563736,
      "grad_norm": 0.31792229413986206,
      "learning_rate": 5.081080368732827e-06,
      "loss": 0.0107,
      "step": 1513020
    },
    {
      "epoch": 2.4761231450023895,
      "grad_norm": 0.5016521215438843,
      "learning_rate": 5.0810144765193105e-06,
      "loss": 0.0086,
      "step": 1513040
    },
    {
      "epoch": 2.4761558754410427,
      "grad_norm": 0.2634561359882355,
      "learning_rate": 5.0809485843057924e-06,
      "loss": 0.0167,
      "step": 1513060
    },
    {
      "epoch": 2.4761886058796962,
      "grad_norm": 0.44143620133399963,
      "learning_rate": 5.080882692092276e-06,
      "loss": 0.0133,
      "step": 1513080
    },
    {
      "epoch": 2.4762213363183494,
      "grad_norm": 1.2734211683273315,
      "learning_rate": 5.080816799878758e-06,
      "loss": 0.0173,
      "step": 1513100
    },
    {
      "epoch": 2.4762540667570025,
      "grad_norm": 0.43574243783950806,
      "learning_rate": 5.0807509076652415e-06,
      "loss": 0.0172,
      "step": 1513120
    },
    {
      "epoch": 2.476286797195656,
      "grad_norm": 0.3555520176887512,
      "learning_rate": 5.080685015451724e-06,
      "loss": 0.0148,
      "step": 1513140
    },
    {
      "epoch": 2.4763195276343093,
      "grad_norm": 0.5396213531494141,
      "learning_rate": 5.080619123238207e-06,
      "loss": 0.0123,
      "step": 1513160
    },
    {
      "epoch": 2.476352258072963,
      "grad_norm": 0.1369950771331787,
      "learning_rate": 5.0805532310246906e-06,
      "loss": 0.0135,
      "step": 1513180
    },
    {
      "epoch": 2.476384988511616,
      "grad_norm": 0.25128644704818726,
      "learning_rate": 5.080487338811173e-06,
      "loss": 0.0126,
      "step": 1513200
    },
    {
      "epoch": 2.476417718950269,
      "grad_norm": 0.28943270444869995,
      "learning_rate": 5.080421446597656e-06,
      "loss": 0.0154,
      "step": 1513220
    },
    {
      "epoch": 2.476450449388923,
      "grad_norm": 0.32624438405036926,
      "learning_rate": 5.080355554384139e-06,
      "loss": 0.0183,
      "step": 1513240
    },
    {
      "epoch": 2.476483179827576,
      "grad_norm": 0.1501927673816681,
      "learning_rate": 5.080289662170622e-06,
      "loss": 0.0146,
      "step": 1513260
    },
    {
      "epoch": 2.4765159102662295,
      "grad_norm": 0.3862634301185608,
      "learning_rate": 5.080223769957104e-06,
      "loss": 0.0158,
      "step": 1513280
    },
    {
      "epoch": 2.4765486407048827,
      "grad_norm": 0.3801063895225525,
      "learning_rate": 5.080157877743588e-06,
      "loss": 0.0123,
      "step": 1513300
    },
    {
      "epoch": 2.476581371143536,
      "grad_norm": 0.19203020632266998,
      "learning_rate": 5.08009198553007e-06,
      "loss": 0.0118,
      "step": 1513320
    },
    {
      "epoch": 2.4766141015821894,
      "grad_norm": 0.4148808717727661,
      "learning_rate": 5.080026093316553e-06,
      "loss": 0.0144,
      "step": 1513340
    },
    {
      "epoch": 2.4766468320208426,
      "grad_norm": 0.3158309757709503,
      "learning_rate": 5.079960201103036e-06,
      "loss": 0.011,
      "step": 1513360
    },
    {
      "epoch": 2.476679562459496,
      "grad_norm": 0.4226689338684082,
      "learning_rate": 5.079894308889519e-06,
      "loss": 0.0161,
      "step": 1513380
    },
    {
      "epoch": 2.4767122928981493,
      "grad_norm": 0.3540268540382385,
      "learning_rate": 5.0798284166760015e-06,
      "loss": 0.0112,
      "step": 1513400
    },
    {
      "epoch": 2.476745023336803,
      "grad_norm": 0.26525741815567017,
      "learning_rate": 5.079762524462485e-06,
      "loss": 0.016,
      "step": 1513420
    },
    {
      "epoch": 2.476777753775456,
      "grad_norm": 0.03130500391125679,
      "learning_rate": 5.079696632248967e-06,
      "loss": 0.019,
      "step": 1513440
    },
    {
      "epoch": 2.476810484214109,
      "grad_norm": 1.3621712923049927,
      "learning_rate": 5.079630740035451e-06,
      "loss": 0.0238,
      "step": 1513460
    },
    {
      "epoch": 2.476843214652763,
      "grad_norm": 0.3097339868545532,
      "learning_rate": 5.0795648478219325e-06,
      "loss": 0.0112,
      "step": 1513480
    },
    {
      "epoch": 2.476875945091416,
      "grad_norm": 0.29283377528190613,
      "learning_rate": 5.079498955608416e-06,
      "loss": 0.0169,
      "step": 1513500
    },
    {
      "epoch": 2.4769086755300695,
      "grad_norm": 0.2697194218635559,
      "learning_rate": 5.079433063394899e-06,
      "loss": 0.0144,
      "step": 1513520
    },
    {
      "epoch": 2.4769414059687227,
      "grad_norm": 0.4187878370285034,
      "learning_rate": 5.0793671711813816e-06,
      "loss": 0.021,
      "step": 1513540
    },
    {
      "epoch": 2.4769741364073763,
      "grad_norm": 1.091074824333191,
      "learning_rate": 5.079301278967865e-06,
      "loss": 0.0176,
      "step": 1513560
    },
    {
      "epoch": 2.4770068668460294,
      "grad_norm": 0.5858058929443359,
      "learning_rate": 5.079235386754348e-06,
      "loss": 0.0214,
      "step": 1513580
    },
    {
      "epoch": 2.4770395972846826,
      "grad_norm": 0.7101943492889404,
      "learning_rate": 5.079169494540831e-06,
      "loss": 0.0127,
      "step": 1513600
    },
    {
      "epoch": 2.477072327723336,
      "grad_norm": 0.2185036987066269,
      "learning_rate": 5.079103602327313e-06,
      "loss": 0.0148,
      "step": 1513620
    },
    {
      "epoch": 2.4771050581619893,
      "grad_norm": 0.29431939125061035,
      "learning_rate": 5.079037710113797e-06,
      "loss": 0.0177,
      "step": 1513640
    },
    {
      "epoch": 2.477137788600643,
      "grad_norm": 0.2904879152774811,
      "learning_rate": 5.078971817900279e-06,
      "loss": 0.0154,
      "step": 1513660
    },
    {
      "epoch": 2.477170519039296,
      "grad_norm": 0.7075167298316956,
      "learning_rate": 5.0789059256867624e-06,
      "loss": 0.0147,
      "step": 1513680
    },
    {
      "epoch": 2.4772032494779497,
      "grad_norm": 0.09269411116838455,
      "learning_rate": 5.078840033473244e-06,
      "loss": 0.0169,
      "step": 1513700
    },
    {
      "epoch": 2.477235979916603,
      "grad_norm": 0.7196682691574097,
      "learning_rate": 5.078774141259728e-06,
      "loss": 0.0186,
      "step": 1513720
    },
    {
      "epoch": 2.477268710355256,
      "grad_norm": 0.5267550349235535,
      "learning_rate": 5.078708249046211e-06,
      "loss": 0.0152,
      "step": 1513740
    },
    {
      "epoch": 2.4773014407939096,
      "grad_norm": 0.27971604466438293,
      "learning_rate": 5.078642356832693e-06,
      "loss": 0.014,
      "step": 1513760
    },
    {
      "epoch": 2.4773341712325627,
      "grad_norm": 0.11067531257867813,
      "learning_rate": 5.078576464619176e-06,
      "loss": 0.0141,
      "step": 1513780
    },
    {
      "epoch": 2.4773669016712163,
      "grad_norm": 0.2040943205356598,
      "learning_rate": 5.07851057240566e-06,
      "loss": 0.0135,
      "step": 1513800
    },
    {
      "epoch": 2.4773996321098695,
      "grad_norm": 0.0753418579697609,
      "learning_rate": 5.078444680192142e-06,
      "loss": 0.01,
      "step": 1513820
    },
    {
      "epoch": 2.477432362548523,
      "grad_norm": 0.8481993675231934,
      "learning_rate": 5.078378787978625e-06,
      "loss": 0.0104,
      "step": 1513840
    },
    {
      "epoch": 2.477465092987176,
      "grad_norm": 0.47686684131622314,
      "learning_rate": 5.078312895765107e-06,
      "loss": 0.0187,
      "step": 1513860
    },
    {
      "epoch": 2.4774978234258294,
      "grad_norm": 0.8555511236190796,
      "learning_rate": 5.078247003551591e-06,
      "loss": 0.0185,
      "step": 1513880
    },
    {
      "epoch": 2.477530553864483,
      "grad_norm": 0.6947999596595764,
      "learning_rate": 5.078181111338074e-06,
      "loss": 0.0195,
      "step": 1513900
    },
    {
      "epoch": 2.477563284303136,
      "grad_norm": 0.27756643295288086,
      "learning_rate": 5.078115219124556e-06,
      "loss": 0.0105,
      "step": 1513920
    },
    {
      "epoch": 2.4775960147417897,
      "grad_norm": 0.18677657842636108,
      "learning_rate": 5.07804932691104e-06,
      "loss": 0.0104,
      "step": 1513940
    },
    {
      "epoch": 2.477628745180443,
      "grad_norm": 0.3329218924045563,
      "learning_rate": 5.0779834346975225e-06,
      "loss": 0.0102,
      "step": 1513960
    },
    {
      "epoch": 2.4776614756190964,
      "grad_norm": 0.2711235582828522,
      "learning_rate": 5.077917542484005e-06,
      "loss": 0.0178,
      "step": 1513980
    },
    {
      "epoch": 2.4776942060577496,
      "grad_norm": 0.6697987914085388,
      "learning_rate": 5.077851650270488e-06,
      "loss": 0.0172,
      "step": 1514000
    },
    {
      "epoch": 2.4777269364964027,
      "grad_norm": 0.22387637197971344,
      "learning_rate": 5.0777857580569716e-06,
      "loss": 0.014,
      "step": 1514020
    },
    {
      "epoch": 2.4777596669350563,
      "grad_norm": 0.11029697954654694,
      "learning_rate": 5.0777198658434534e-06,
      "loss": 0.0144,
      "step": 1514040
    },
    {
      "epoch": 2.4777923973737095,
      "grad_norm": 0.7841155529022217,
      "learning_rate": 5.077653973629937e-06,
      "loss": 0.0195,
      "step": 1514060
    },
    {
      "epoch": 2.477825127812363,
      "grad_norm": 0.16513684391975403,
      "learning_rate": 5.077588081416419e-06,
      "loss": 0.0145,
      "step": 1514080
    },
    {
      "epoch": 2.4778578582510162,
      "grad_norm": 0.6992232203483582,
      "learning_rate": 5.0775221892029025e-06,
      "loss": 0.0141,
      "step": 1514100
    },
    {
      "epoch": 2.47789058868967,
      "grad_norm": 0.3804425895214081,
      "learning_rate": 5.077456296989384e-06,
      "loss": 0.0157,
      "step": 1514120
    },
    {
      "epoch": 2.477923319128323,
      "grad_norm": 0.18724356591701508,
      "learning_rate": 5.077390404775868e-06,
      "loss": 0.0192,
      "step": 1514140
    },
    {
      "epoch": 2.477956049566976,
      "grad_norm": 0.32235392928123474,
      "learning_rate": 5.077324512562351e-06,
      "loss": 0.0139,
      "step": 1514160
    },
    {
      "epoch": 2.4779887800056297,
      "grad_norm": 0.2912953495979309,
      "learning_rate": 5.0772586203488335e-06,
      "loss": 0.0152,
      "step": 1514180
    },
    {
      "epoch": 2.478021510444283,
      "grad_norm": 0.38669174909591675,
      "learning_rate": 5.077192728135316e-06,
      "loss": 0.012,
      "step": 1514200
    },
    {
      "epoch": 2.4780542408829365,
      "grad_norm": 0.6149235367774963,
      "learning_rate": 5.0771268359218e-06,
      "loss": 0.0153,
      "step": 1514220
    },
    {
      "epoch": 2.4780869713215896,
      "grad_norm": 0.3366701006889343,
      "learning_rate": 5.0770609437082825e-06,
      "loss": 0.0211,
      "step": 1514240
    },
    {
      "epoch": 2.478119701760243,
      "grad_norm": 0.21357889473438263,
      "learning_rate": 5.076995051494765e-06,
      "loss": 0.0203,
      "step": 1514260
    },
    {
      "epoch": 2.4781524321988964,
      "grad_norm": 0.7202420830726624,
      "learning_rate": 5.076929159281249e-06,
      "loss": 0.0194,
      "step": 1514280
    },
    {
      "epoch": 2.4781851626375495,
      "grad_norm": 0.40271705389022827,
      "learning_rate": 5.076863267067731e-06,
      "loss": 0.0205,
      "step": 1514300
    },
    {
      "epoch": 2.478217893076203,
      "grad_norm": 0.13034850358963013,
      "learning_rate": 5.076797374854214e-06,
      "loss": 0.0129,
      "step": 1514320
    },
    {
      "epoch": 2.4782506235148563,
      "grad_norm": 0.3684205710887909,
      "learning_rate": 5.076731482640696e-06,
      "loss": 0.0157,
      "step": 1514340
    },
    {
      "epoch": 2.47828335395351,
      "grad_norm": 0.21423092484474182,
      "learning_rate": 5.07666559042718e-06,
      "loss": 0.0115,
      "step": 1514360
    },
    {
      "epoch": 2.478316084392163,
      "grad_norm": 0.20845384895801544,
      "learning_rate": 5.0765996982136626e-06,
      "loss": 0.0154,
      "step": 1514380
    },
    {
      "epoch": 2.4783488148308166,
      "grad_norm": 0.3692988157272339,
      "learning_rate": 5.076533806000145e-06,
      "loss": 0.0155,
      "step": 1514400
    },
    {
      "epoch": 2.4783815452694697,
      "grad_norm": 0.8736672401428223,
      "learning_rate": 5.076467913786628e-06,
      "loss": 0.0143,
      "step": 1514420
    },
    {
      "epoch": 2.478414275708123,
      "grad_norm": 0.6205989122390747,
      "learning_rate": 5.076402021573112e-06,
      "loss": 0.0196,
      "step": 1514440
    },
    {
      "epoch": 2.4784470061467765,
      "grad_norm": 0.7439665794372559,
      "learning_rate": 5.0763361293595935e-06,
      "loss": 0.0108,
      "step": 1514460
    },
    {
      "epoch": 2.4784797365854296,
      "grad_norm": 0.6685186624526978,
      "learning_rate": 5.076270237146077e-06,
      "loss": 0.0154,
      "step": 1514480
    },
    {
      "epoch": 2.4785124670240832,
      "grad_norm": 0.3235852122306824,
      "learning_rate": 5.076204344932559e-06,
      "loss": 0.0089,
      "step": 1514500
    },
    {
      "epoch": 2.4785451974627364,
      "grad_norm": 0.2580612301826477,
      "learning_rate": 5.076138452719043e-06,
      "loss": 0.0161,
      "step": 1514520
    },
    {
      "epoch": 2.47857792790139,
      "grad_norm": 0.510302722454071,
      "learning_rate": 5.076072560505525e-06,
      "loss": 0.0174,
      "step": 1514540
    },
    {
      "epoch": 2.478610658340043,
      "grad_norm": 0.3377099335193634,
      "learning_rate": 5.076006668292008e-06,
      "loss": 0.0146,
      "step": 1514560
    },
    {
      "epoch": 2.4786433887786963,
      "grad_norm": 0.4101420044898987,
      "learning_rate": 5.075940776078491e-06,
      "loss": 0.0149,
      "step": 1514580
    },
    {
      "epoch": 2.47867611921735,
      "grad_norm": 0.6682929992675781,
      "learning_rate": 5.075874883864974e-06,
      "loss": 0.0132,
      "step": 1514600
    },
    {
      "epoch": 2.478708849656003,
      "grad_norm": 0.38952216506004333,
      "learning_rate": 5.075808991651457e-06,
      "loss": 0.0149,
      "step": 1514620
    },
    {
      "epoch": 2.4787415800946566,
      "grad_norm": 0.8302571773529053,
      "learning_rate": 5.07574309943794e-06,
      "loss": 0.0146,
      "step": 1514640
    },
    {
      "epoch": 2.4787743105333098,
      "grad_norm": 0.3895707428455353,
      "learning_rate": 5.0756772072244235e-06,
      "loss": 0.0172,
      "step": 1514660
    },
    {
      "epoch": 2.478807040971963,
      "grad_norm": 0.16056492924690247,
      "learning_rate": 5.075611315010905e-06,
      "loss": 0.0178,
      "step": 1514680
    },
    {
      "epoch": 2.4788397714106165,
      "grad_norm": 0.5180795788764954,
      "learning_rate": 5.075545422797389e-06,
      "loss": 0.0127,
      "step": 1514700
    },
    {
      "epoch": 2.4788725018492697,
      "grad_norm": 0.28660833835601807,
      "learning_rate": 5.075479530583871e-06,
      "loss": 0.0111,
      "step": 1514720
    },
    {
      "epoch": 2.4789052322879233,
      "grad_norm": 0.24876542389392853,
      "learning_rate": 5.075413638370354e-06,
      "loss": 0.0158,
      "step": 1514740
    },
    {
      "epoch": 2.4789379627265764,
      "grad_norm": 0.40098443627357483,
      "learning_rate": 5.075347746156837e-06,
      "loss": 0.014,
      "step": 1514760
    },
    {
      "epoch": 2.4789706931652296,
      "grad_norm": 0.24662576615810394,
      "learning_rate": 5.07528185394332e-06,
      "loss": 0.0165,
      "step": 1514780
    },
    {
      "epoch": 2.479003423603883,
      "grad_norm": 0.84150630235672,
      "learning_rate": 5.075215961729803e-06,
      "loss": 0.0111,
      "step": 1514800
    },
    {
      "epoch": 2.4790361540425363,
      "grad_norm": 0.5309301018714905,
      "learning_rate": 5.075150069516286e-06,
      "loss": 0.0139,
      "step": 1514820
    },
    {
      "epoch": 2.47906888448119,
      "grad_norm": 0.2513537108898163,
      "learning_rate": 5.075084177302768e-06,
      "loss": 0.0155,
      "step": 1514840
    },
    {
      "epoch": 2.479101614919843,
      "grad_norm": 0.8902950882911682,
      "learning_rate": 5.075018285089252e-06,
      "loss": 0.0111,
      "step": 1514860
    },
    {
      "epoch": 2.4791343453584966,
      "grad_norm": 0.9577563405036926,
      "learning_rate": 5.074952392875734e-06,
      "loss": 0.014,
      "step": 1514880
    },
    {
      "epoch": 2.47916707579715,
      "grad_norm": 0.17177420854568481,
      "learning_rate": 5.074886500662217e-06,
      "loss": 0.018,
      "step": 1514900
    },
    {
      "epoch": 2.479199806235803,
      "grad_norm": 0.1838953197002411,
      "learning_rate": 5.0748206084487e-06,
      "loss": 0.0112,
      "step": 1514920
    },
    {
      "epoch": 2.4792325366744565,
      "grad_norm": 0.5220370888710022,
      "learning_rate": 5.074754716235183e-06,
      "loss": 0.0158,
      "step": 1514940
    },
    {
      "epoch": 2.4792652671131097,
      "grad_norm": 0.34644263982772827,
      "learning_rate": 5.074688824021666e-06,
      "loss": 0.016,
      "step": 1514960
    },
    {
      "epoch": 2.4792979975517633,
      "grad_norm": 0.6023358106613159,
      "learning_rate": 5.074622931808149e-06,
      "loss": 0.0147,
      "step": 1514980
    },
    {
      "epoch": 2.4793307279904164,
      "grad_norm": 0.28441017866134644,
      "learning_rate": 5.074557039594632e-06,
      "loss": 0.0112,
      "step": 1515000
    },
    {
      "epoch": 2.47936345842907,
      "grad_norm": 2.582906723022461,
      "learning_rate": 5.0744911473811145e-06,
      "loss": 0.0167,
      "step": 1515020
    },
    {
      "epoch": 2.479396188867723,
      "grad_norm": 0.9348201155662537,
      "learning_rate": 5.074425255167598e-06,
      "loss": 0.0154,
      "step": 1515040
    },
    {
      "epoch": 2.4794289193063763,
      "grad_norm": 0.2202315479516983,
      "learning_rate": 5.07435936295408e-06,
      "loss": 0.0108,
      "step": 1515060
    },
    {
      "epoch": 2.47946164974503,
      "grad_norm": 0.15951697528362274,
      "learning_rate": 5.0742934707405635e-06,
      "loss": 0.0108,
      "step": 1515080
    },
    {
      "epoch": 2.479494380183683,
      "grad_norm": 0.5772462487220764,
      "learning_rate": 5.0742275785270454e-06,
      "loss": 0.0179,
      "step": 1515100
    },
    {
      "epoch": 2.4795271106223367,
      "grad_norm": 0.0778251364827156,
      "learning_rate": 5.074161686313529e-06,
      "loss": 0.0133,
      "step": 1515120
    },
    {
      "epoch": 2.47955984106099,
      "grad_norm": 0.052422888576984406,
      "learning_rate": 5.074095794100011e-06,
      "loss": 0.0131,
      "step": 1515140
    },
    {
      "epoch": 2.4795925714996434,
      "grad_norm": 0.33230796456336975,
      "learning_rate": 5.0740299018864945e-06,
      "loss": 0.0113,
      "step": 1515160
    },
    {
      "epoch": 2.4796253019382966,
      "grad_norm": 1.0729246139526367,
      "learning_rate": 5.073964009672977e-06,
      "loss": 0.0124,
      "step": 1515180
    },
    {
      "epoch": 2.4796580323769497,
      "grad_norm": 0.1385079026222229,
      "learning_rate": 5.07389811745946e-06,
      "loss": 0.0115,
      "step": 1515200
    },
    {
      "epoch": 2.4796907628156033,
      "grad_norm": 0.5111459493637085,
      "learning_rate": 5.073832225245943e-06,
      "loss": 0.0136,
      "step": 1515220
    },
    {
      "epoch": 2.4797234932542565,
      "grad_norm": 0.457308828830719,
      "learning_rate": 5.073766333032426e-06,
      "loss": 0.0145,
      "step": 1515240
    },
    {
      "epoch": 2.47975622369291,
      "grad_norm": 0.3058073818683624,
      "learning_rate": 5.073700440818908e-06,
      "loss": 0.0098,
      "step": 1515260
    },
    {
      "epoch": 2.479788954131563,
      "grad_norm": 0.7214187979698181,
      "learning_rate": 5.073634548605392e-06,
      "loss": 0.0096,
      "step": 1515280
    },
    {
      "epoch": 2.479821684570217,
      "grad_norm": 0.4584077000617981,
      "learning_rate": 5.073568656391875e-06,
      "loss": 0.0164,
      "step": 1515300
    },
    {
      "epoch": 2.47985441500887,
      "grad_norm": 0.8466477990150452,
      "learning_rate": 5.073502764178357e-06,
      "loss": 0.0171,
      "step": 1515320
    },
    {
      "epoch": 2.479887145447523,
      "grad_norm": 0.21283414959907532,
      "learning_rate": 5.073436871964841e-06,
      "loss": 0.0152,
      "step": 1515340
    },
    {
      "epoch": 2.4799198758861767,
      "grad_norm": 0.8257461190223694,
      "learning_rate": 5.073370979751323e-06,
      "loss": 0.0168,
      "step": 1515360
    },
    {
      "epoch": 2.47995260632483,
      "grad_norm": 3.526315212249756,
      "learning_rate": 5.073305087537806e-06,
      "loss": 0.0144,
      "step": 1515380
    },
    {
      "epoch": 2.4799853367634834,
      "grad_norm": 0.28458505868911743,
      "learning_rate": 5.073239195324289e-06,
      "loss": 0.018,
      "step": 1515400
    },
    {
      "epoch": 2.4800180672021366,
      "grad_norm": 0.14926651120185852,
      "learning_rate": 5.073173303110772e-06,
      "loss": 0.0107,
      "step": 1515420
    },
    {
      "epoch": 2.48005079764079,
      "grad_norm": 0.45589664578437805,
      "learning_rate": 5.0731074108972545e-06,
      "loss": 0.0165,
      "step": 1515440
    },
    {
      "epoch": 2.4800835280794433,
      "grad_norm": 0.47264936566352844,
      "learning_rate": 5.073041518683738e-06,
      "loss": 0.023,
      "step": 1515460
    },
    {
      "epoch": 2.4801162585180965,
      "grad_norm": 0.2617744207382202,
      "learning_rate": 5.07297562647022e-06,
      "loss": 0.0162,
      "step": 1515480
    },
    {
      "epoch": 2.48014898895675,
      "grad_norm": 0.7338922619819641,
      "learning_rate": 5.072909734256704e-06,
      "loss": 0.0172,
      "step": 1515500
    },
    {
      "epoch": 2.480181719395403,
      "grad_norm": 0.3613910377025604,
      "learning_rate": 5.0728438420431855e-06,
      "loss": 0.0161,
      "step": 1515520
    },
    {
      "epoch": 2.480214449834057,
      "grad_norm": 0.24762307107448578,
      "learning_rate": 5.072777949829669e-06,
      "loss": 0.0092,
      "step": 1515540
    },
    {
      "epoch": 2.48024718027271,
      "grad_norm": 0.30952444672584534,
      "learning_rate": 5.072712057616152e-06,
      "loss": 0.0102,
      "step": 1515560
    },
    {
      "epoch": 2.4802799107113636,
      "grad_norm": 0.2781253159046173,
      "learning_rate": 5.0726461654026346e-06,
      "loss": 0.0123,
      "step": 1515580
    },
    {
      "epoch": 2.4803126411500167,
      "grad_norm": 0.42125627398490906,
      "learning_rate": 5.072580273189117e-06,
      "loss": 0.016,
      "step": 1515600
    },
    {
      "epoch": 2.48034537158867,
      "grad_norm": 0.7445595264434814,
      "learning_rate": 5.072514380975601e-06,
      "loss": 0.0144,
      "step": 1515620
    },
    {
      "epoch": 2.4803781020273235,
      "grad_norm": 0.2334299236536026,
      "learning_rate": 5.072448488762083e-06,
      "loss": 0.0101,
      "step": 1515640
    },
    {
      "epoch": 2.4804108324659766,
      "grad_norm": 0.9191746711730957,
      "learning_rate": 5.072382596548566e-06,
      "loss": 0.0252,
      "step": 1515660
    },
    {
      "epoch": 2.48044356290463,
      "grad_norm": 0.45082369446754456,
      "learning_rate": 5.07231670433505e-06,
      "loss": 0.0146,
      "step": 1515680
    },
    {
      "epoch": 2.4804762933432833,
      "grad_norm": 0.15886026620864868,
      "learning_rate": 5.072250812121532e-06,
      "loss": 0.0103,
      "step": 1515700
    },
    {
      "epoch": 2.480509023781937,
      "grad_norm": 0.3476381003856659,
      "learning_rate": 5.0721849199080154e-06,
      "loss": 0.0118,
      "step": 1515720
    },
    {
      "epoch": 2.48054175422059,
      "grad_norm": 0.6015438437461853,
      "learning_rate": 5.072119027694497e-06,
      "loss": 0.0141,
      "step": 1515740
    },
    {
      "epoch": 2.4805744846592432,
      "grad_norm": 0.5638762712478638,
      "learning_rate": 5.072053135480981e-06,
      "loss": 0.0147,
      "step": 1515760
    },
    {
      "epoch": 2.480607215097897,
      "grad_norm": 0.9156837463378906,
      "learning_rate": 5.071987243267464e-06,
      "loss": 0.0146,
      "step": 1515780
    },
    {
      "epoch": 2.48063994553655,
      "grad_norm": 0.8559333086013794,
      "learning_rate": 5.071921351053946e-06,
      "loss": 0.0147,
      "step": 1515800
    },
    {
      "epoch": 2.4806726759752036,
      "grad_norm": 0.3657779395580292,
      "learning_rate": 5.071855458840429e-06,
      "loss": 0.0089,
      "step": 1515820
    },
    {
      "epoch": 2.4807054064138567,
      "grad_norm": 0.43504658341407776,
      "learning_rate": 5.071789566626913e-06,
      "loss": 0.0119,
      "step": 1515840
    },
    {
      "epoch": 2.4807381368525103,
      "grad_norm": 0.215005561709404,
      "learning_rate": 5.071723674413395e-06,
      "loss": 0.014,
      "step": 1515860
    },
    {
      "epoch": 2.4807708672911635,
      "grad_norm": 0.33509764075279236,
      "learning_rate": 5.071657782199878e-06,
      "loss": 0.0111,
      "step": 1515880
    },
    {
      "epoch": 2.4808035977298166,
      "grad_norm": 0.34922075271606445,
      "learning_rate": 5.07159188998636e-06,
      "loss": 0.0156,
      "step": 1515900
    },
    {
      "epoch": 2.48083632816847,
      "grad_norm": 0.21732454001903534,
      "learning_rate": 5.071525997772844e-06,
      "loss": 0.0133,
      "step": 1515920
    },
    {
      "epoch": 2.4808690586071234,
      "grad_norm": 0.33291909098625183,
      "learning_rate": 5.071460105559326e-06,
      "loss": 0.0121,
      "step": 1515940
    },
    {
      "epoch": 2.480901789045777,
      "grad_norm": 0.4397951066493988,
      "learning_rate": 5.071394213345809e-06,
      "loss": 0.0179,
      "step": 1515960
    },
    {
      "epoch": 2.48093451948443,
      "grad_norm": 0.17804180085659027,
      "learning_rate": 5.071328321132292e-06,
      "loss": 0.014,
      "step": 1515980
    },
    {
      "epoch": 2.4809672499230837,
      "grad_norm": 0.9123285412788391,
      "learning_rate": 5.071262428918775e-06,
      "loss": 0.0115,
      "step": 1516000
    },
    {
      "epoch": 2.480999980361737,
      "grad_norm": 0.3490234315395355,
      "learning_rate": 5.071196536705258e-06,
      "loss": 0.0168,
      "step": 1516020
    },
    {
      "epoch": 2.48103271080039,
      "grad_norm": 0.12683039903640747,
      "learning_rate": 5.071130644491741e-06,
      "loss": 0.017,
      "step": 1516040
    },
    {
      "epoch": 2.4810654412390436,
      "grad_norm": 0.40709388256073,
      "learning_rate": 5.0710647522782246e-06,
      "loss": 0.0152,
      "step": 1516060
    },
    {
      "epoch": 2.4810981716776968,
      "grad_norm": 0.5772159099578857,
      "learning_rate": 5.0709988600647065e-06,
      "loss": 0.0187,
      "step": 1516080
    },
    {
      "epoch": 2.4811309021163503,
      "grad_norm": 0.5169757604598999,
      "learning_rate": 5.07093296785119e-06,
      "loss": 0.0179,
      "step": 1516100
    },
    {
      "epoch": 2.4811636325550035,
      "grad_norm": 0.13733844459056854,
      "learning_rate": 5.070867075637672e-06,
      "loss": 0.0172,
      "step": 1516120
    },
    {
      "epoch": 2.481196362993657,
      "grad_norm": 0.4759564697742462,
      "learning_rate": 5.0708011834241555e-06,
      "loss": 0.0104,
      "step": 1516140
    },
    {
      "epoch": 2.4812290934323102,
      "grad_norm": 0.43586465716362,
      "learning_rate": 5.070735291210637e-06,
      "loss": 0.0121,
      "step": 1516160
    },
    {
      "epoch": 2.4812618238709634,
      "grad_norm": 0.2615778148174286,
      "learning_rate": 5.070669398997121e-06,
      "loss": 0.014,
      "step": 1516180
    },
    {
      "epoch": 2.481294554309617,
      "grad_norm": 0.4208958148956299,
      "learning_rate": 5.070603506783604e-06,
      "loss": 0.0203,
      "step": 1516200
    },
    {
      "epoch": 2.48132728474827,
      "grad_norm": 0.26976221799850464,
      "learning_rate": 5.0705376145700865e-06,
      "loss": 0.0132,
      "step": 1516220
    },
    {
      "epoch": 2.4813600151869237,
      "grad_norm": 0.18788571655750275,
      "learning_rate": 5.070471722356569e-06,
      "loss": 0.0141,
      "step": 1516240
    },
    {
      "epoch": 2.481392745625577,
      "grad_norm": 2.310241460800171,
      "learning_rate": 5.070405830143053e-06,
      "loss": 0.0163,
      "step": 1516260
    },
    {
      "epoch": 2.48142547606423,
      "grad_norm": 0.4001055061817169,
      "learning_rate": 5.070339937929535e-06,
      "loss": 0.013,
      "step": 1516280
    },
    {
      "epoch": 2.4814582065028836,
      "grad_norm": 0.8395495414733887,
      "learning_rate": 5.070274045716018e-06,
      "loss": 0.0151,
      "step": 1516300
    },
    {
      "epoch": 2.4814909369415368,
      "grad_norm": 0.2780618667602539,
      "learning_rate": 5.0702081535025e-06,
      "loss": 0.0121,
      "step": 1516320
    },
    {
      "epoch": 2.4815236673801904,
      "grad_norm": 0.7343466877937317,
      "learning_rate": 5.070142261288984e-06,
      "loss": 0.0216,
      "step": 1516340
    },
    {
      "epoch": 2.4815563978188435,
      "grad_norm": 0.1318458467721939,
      "learning_rate": 5.070076369075467e-06,
      "loss": 0.0177,
      "step": 1516360
    },
    {
      "epoch": 2.4815891282574967,
      "grad_norm": 0.3764861226081848,
      "learning_rate": 5.070010476861949e-06,
      "loss": 0.012,
      "step": 1516380
    },
    {
      "epoch": 2.4816218586961503,
      "grad_norm": 0.40582919120788574,
      "learning_rate": 5.069944584648433e-06,
      "loss": 0.0103,
      "step": 1516400
    },
    {
      "epoch": 2.4816545891348034,
      "grad_norm": 2.412482976913452,
      "learning_rate": 5.0698786924349156e-06,
      "loss": 0.0169,
      "step": 1516420
    },
    {
      "epoch": 2.481687319573457,
      "grad_norm": 0.18934544920921326,
      "learning_rate": 5.069812800221398e-06,
      "loss": 0.015,
      "step": 1516440
    },
    {
      "epoch": 2.48172005001211,
      "grad_norm": 0.4064808785915375,
      "learning_rate": 5.069746908007881e-06,
      "loss": 0.0132,
      "step": 1516460
    },
    {
      "epoch": 2.4817527804507638,
      "grad_norm": 0.46802762150764465,
      "learning_rate": 5.069681015794365e-06,
      "loss": 0.0105,
      "step": 1516480
    },
    {
      "epoch": 2.481785510889417,
      "grad_norm": 0.42622020840644836,
      "learning_rate": 5.0696151235808465e-06,
      "loss": 0.0114,
      "step": 1516500
    },
    {
      "epoch": 2.48181824132807,
      "grad_norm": 0.2549993693828583,
      "learning_rate": 5.06954923136733e-06,
      "loss": 0.0152,
      "step": 1516520
    },
    {
      "epoch": 2.4818509717667236,
      "grad_norm": 0.45878198742866516,
      "learning_rate": 5.069483339153812e-06,
      "loss": 0.0155,
      "step": 1516540
    },
    {
      "epoch": 2.481883702205377,
      "grad_norm": 0.4828246533870697,
      "learning_rate": 5.069417446940296e-06,
      "loss": 0.0137,
      "step": 1516560
    },
    {
      "epoch": 2.4819164326440304,
      "grad_norm": 0.29791560769081116,
      "learning_rate": 5.069351554726778e-06,
      "loss": 0.0151,
      "step": 1516580
    },
    {
      "epoch": 2.4819491630826835,
      "grad_norm": 0.07300437986850739,
      "learning_rate": 5.069285662513261e-06,
      "loss": 0.0123,
      "step": 1516600
    },
    {
      "epoch": 2.481981893521337,
      "grad_norm": 0.5762211084365845,
      "learning_rate": 5.069219770299744e-06,
      "loss": 0.0144,
      "step": 1516620
    },
    {
      "epoch": 2.4820146239599903,
      "grad_norm": 0.5321539044380188,
      "learning_rate": 5.069153878086227e-06,
      "loss": 0.0113,
      "step": 1516640
    },
    {
      "epoch": 2.4820473543986434,
      "grad_norm": 0.17865541577339172,
      "learning_rate": 5.069087985872709e-06,
      "loss": 0.0091,
      "step": 1516660
    },
    {
      "epoch": 2.482080084837297,
      "grad_norm": 0.097708560526371,
      "learning_rate": 5.069022093659193e-06,
      "loss": 0.0279,
      "step": 1516680
    },
    {
      "epoch": 2.48211281527595,
      "grad_norm": 0.3799035847187042,
      "learning_rate": 5.0689562014456765e-06,
      "loss": 0.012,
      "step": 1516700
    },
    {
      "epoch": 2.4821455457146038,
      "grad_norm": 0.1535009741783142,
      "learning_rate": 5.068890309232158e-06,
      "loss": 0.0133,
      "step": 1516720
    },
    {
      "epoch": 2.482178276153257,
      "grad_norm": 0.11726740747690201,
      "learning_rate": 5.068824417018642e-06,
      "loss": 0.0133,
      "step": 1516740
    },
    {
      "epoch": 2.4822110065919105,
      "grad_norm": 0.26404061913490295,
      "learning_rate": 5.068758524805124e-06,
      "loss": 0.0167,
      "step": 1516760
    },
    {
      "epoch": 2.4822437370305637,
      "grad_norm": 0.2862660586833954,
      "learning_rate": 5.0686926325916074e-06,
      "loss": 0.0188,
      "step": 1516780
    },
    {
      "epoch": 2.482276467469217,
      "grad_norm": 0.5235817432403564,
      "learning_rate": 5.06862674037809e-06,
      "loss": 0.0121,
      "step": 1516800
    },
    {
      "epoch": 2.4823091979078704,
      "grad_norm": 0.05081714317202568,
      "learning_rate": 5.068560848164573e-06,
      "loss": 0.0096,
      "step": 1516820
    },
    {
      "epoch": 2.4823419283465236,
      "grad_norm": 0.20590132474899292,
      "learning_rate": 5.068494955951056e-06,
      "loss": 0.0156,
      "step": 1516840
    },
    {
      "epoch": 2.482374658785177,
      "grad_norm": 0.37538233399391174,
      "learning_rate": 5.068429063737539e-06,
      "loss": 0.0217,
      "step": 1516860
    },
    {
      "epoch": 2.4824073892238303,
      "grad_norm": 0.5002027153968811,
      "learning_rate": 5.068363171524021e-06,
      "loss": 0.0183,
      "step": 1516880
    },
    {
      "epoch": 2.482440119662484,
      "grad_norm": 2.826676607131958,
      "learning_rate": 5.068297279310505e-06,
      "loss": 0.0126,
      "step": 1516900
    },
    {
      "epoch": 2.482472850101137,
      "grad_norm": 0.3481369614601135,
      "learning_rate": 5.068231387096987e-06,
      "loss": 0.0151,
      "step": 1516920
    },
    {
      "epoch": 2.48250558053979,
      "grad_norm": 0.05334334820508957,
      "learning_rate": 5.06816549488347e-06,
      "loss": 0.0143,
      "step": 1516940
    },
    {
      "epoch": 2.482538310978444,
      "grad_norm": 0.6204424500465393,
      "learning_rate": 5.068099602669952e-06,
      "loss": 0.013,
      "step": 1516960
    },
    {
      "epoch": 2.482571041417097,
      "grad_norm": 0.3265608847141266,
      "learning_rate": 5.068033710456436e-06,
      "loss": 0.0143,
      "step": 1516980
    },
    {
      "epoch": 2.4826037718557505,
      "grad_norm": 0.48884040117263794,
      "learning_rate": 5.067967818242918e-06,
      "loss": 0.0144,
      "step": 1517000
    },
    {
      "epoch": 2.4826365022944037,
      "grad_norm": 0.48799705505371094,
      "learning_rate": 5.067901926029401e-06,
      "loss": 0.015,
      "step": 1517020
    },
    {
      "epoch": 2.4826692327330573,
      "grad_norm": 0.5437664985656738,
      "learning_rate": 5.067836033815884e-06,
      "loss": 0.0145,
      "step": 1517040
    },
    {
      "epoch": 2.4827019631717104,
      "grad_norm": 0.13191023468971252,
      "learning_rate": 5.0677701416023675e-06,
      "loss": 0.0137,
      "step": 1517060
    },
    {
      "epoch": 2.4827346936103636,
      "grad_norm": 0.480771541595459,
      "learning_rate": 5.06770424938885e-06,
      "loss": 0.0198,
      "step": 1517080
    },
    {
      "epoch": 2.482767424049017,
      "grad_norm": 0.7533050775527954,
      "learning_rate": 5.067638357175333e-06,
      "loss": 0.0122,
      "step": 1517100
    },
    {
      "epoch": 2.4828001544876703,
      "grad_norm": 0.4542357623577118,
      "learning_rate": 5.0675724649618165e-06,
      "loss": 0.0206,
      "step": 1517120
    },
    {
      "epoch": 2.482832884926324,
      "grad_norm": 0.34058621525764465,
      "learning_rate": 5.0675065727482984e-06,
      "loss": 0.0091,
      "step": 1517140
    },
    {
      "epoch": 2.482865615364977,
      "grad_norm": 0.48053082823753357,
      "learning_rate": 5.067440680534782e-06,
      "loss": 0.0141,
      "step": 1517160
    },
    {
      "epoch": 2.4828983458036307,
      "grad_norm": 0.3412739336490631,
      "learning_rate": 5.067374788321264e-06,
      "loss": 0.009,
      "step": 1517180
    },
    {
      "epoch": 2.482931076242284,
      "grad_norm": 0.1972644329071045,
      "learning_rate": 5.0673088961077475e-06,
      "loss": 0.0127,
      "step": 1517200
    },
    {
      "epoch": 2.482963806680937,
      "grad_norm": 0.14324334263801575,
      "learning_rate": 5.06724300389423e-06,
      "loss": 0.0156,
      "step": 1517220
    },
    {
      "epoch": 2.4829965371195906,
      "grad_norm": 0.5751820802688599,
      "learning_rate": 5.067177111680713e-06,
      "loss": 0.016,
      "step": 1517240
    },
    {
      "epoch": 2.4830292675582437,
      "grad_norm": 0.22084790468215942,
      "learning_rate": 5.067111219467196e-06,
      "loss": 0.0145,
      "step": 1517260
    },
    {
      "epoch": 2.4830619979968973,
      "grad_norm": 0.35528987646102905,
      "learning_rate": 5.067045327253679e-06,
      "loss": 0.0132,
      "step": 1517280
    },
    {
      "epoch": 2.4830947284355505,
      "grad_norm": 0.2609027922153473,
      "learning_rate": 5.066979435040161e-06,
      "loss": 0.011,
      "step": 1517300
    },
    {
      "epoch": 2.483127458874204,
      "grad_norm": 0.1471613496541977,
      "learning_rate": 5.066913542826645e-06,
      "loss": 0.0137,
      "step": 1517320
    },
    {
      "epoch": 2.483160189312857,
      "grad_norm": 0.16268020868301392,
      "learning_rate": 5.066847650613127e-06,
      "loss": 0.0191,
      "step": 1517340
    },
    {
      "epoch": 2.4831929197515104,
      "grad_norm": 0.1451563537120819,
      "learning_rate": 5.06678175839961e-06,
      "loss": 0.0156,
      "step": 1517360
    },
    {
      "epoch": 2.483225650190164,
      "grad_norm": 0.9458382725715637,
      "learning_rate": 5.066715866186093e-06,
      "loss": 0.02,
      "step": 1517380
    },
    {
      "epoch": 2.483258380628817,
      "grad_norm": 0.15517637133598328,
      "learning_rate": 5.066649973972576e-06,
      "loss": 0.0213,
      "step": 1517400
    },
    {
      "epoch": 2.4832911110674707,
      "grad_norm": 0.2521146535873413,
      "learning_rate": 5.066584081759059e-06,
      "loss": 0.0093,
      "step": 1517420
    },
    {
      "epoch": 2.483323841506124,
      "grad_norm": 0.8407420516014099,
      "learning_rate": 5.066518189545542e-06,
      "loss": 0.0229,
      "step": 1517440
    },
    {
      "epoch": 2.4833565719447774,
      "grad_norm": 0.218790203332901,
      "learning_rate": 5.066452297332025e-06,
      "loss": 0.0135,
      "step": 1517460
    },
    {
      "epoch": 2.4833893023834306,
      "grad_norm": 0.470223993062973,
      "learning_rate": 5.0663864051185076e-06,
      "loss": 0.0141,
      "step": 1517480
    },
    {
      "epoch": 2.4834220328220837,
      "grad_norm": 0.4041730761528015,
      "learning_rate": 5.066320512904991e-06,
      "loss": 0.0101,
      "step": 1517500
    },
    {
      "epoch": 2.4834547632607373,
      "grad_norm": 0.1605021208524704,
      "learning_rate": 5.066254620691473e-06,
      "loss": 0.0138,
      "step": 1517520
    },
    {
      "epoch": 2.4834874936993905,
      "grad_norm": 0.16954456269741058,
      "learning_rate": 5.066188728477957e-06,
      "loss": 0.0158,
      "step": 1517540
    },
    {
      "epoch": 2.483520224138044,
      "grad_norm": 0.8594322800636292,
      "learning_rate": 5.0661228362644385e-06,
      "loss": 0.017,
      "step": 1517560
    },
    {
      "epoch": 2.4835529545766972,
      "grad_norm": 0.1978157013654709,
      "learning_rate": 5.066056944050922e-06,
      "loss": 0.0108,
      "step": 1517580
    },
    {
      "epoch": 2.483585685015351,
      "grad_norm": 0.269954651594162,
      "learning_rate": 5.065991051837405e-06,
      "loss": 0.0157,
      "step": 1517600
    },
    {
      "epoch": 2.483618415454004,
      "grad_norm": 0.34399157762527466,
      "learning_rate": 5.065925159623888e-06,
      "loss": 0.0171,
      "step": 1517620
    },
    {
      "epoch": 2.483651145892657,
      "grad_norm": 0.6959221363067627,
      "learning_rate": 5.06585926741037e-06,
      "loss": 0.0166,
      "step": 1517640
    },
    {
      "epoch": 2.4836838763313107,
      "grad_norm": 0.3909751772880554,
      "learning_rate": 5.065793375196854e-06,
      "loss": 0.0152,
      "step": 1517660
    },
    {
      "epoch": 2.483716606769964,
      "grad_norm": 0.10807386040687561,
      "learning_rate": 5.065727482983336e-06,
      "loss": 0.0135,
      "step": 1517680
    },
    {
      "epoch": 2.4837493372086175,
      "grad_norm": 0.5621959567070007,
      "learning_rate": 5.065661590769819e-06,
      "loss": 0.0232,
      "step": 1517700
    },
    {
      "epoch": 2.4837820676472706,
      "grad_norm": 0.15837085247039795,
      "learning_rate": 5.065595698556301e-06,
      "loss": 0.0136,
      "step": 1517720
    },
    {
      "epoch": 2.4838147980859238,
      "grad_norm": 0.3074530065059662,
      "learning_rate": 5.065529806342785e-06,
      "loss": 0.012,
      "step": 1517740
    },
    {
      "epoch": 2.4838475285245774,
      "grad_norm": 0.7208878397941589,
      "learning_rate": 5.0654639141292685e-06,
      "loss": 0.0222,
      "step": 1517760
    },
    {
      "epoch": 2.4838802589632305,
      "grad_norm": 0.1104002445936203,
      "learning_rate": 5.06539802191575e-06,
      "loss": 0.0139,
      "step": 1517780
    },
    {
      "epoch": 2.483912989401884,
      "grad_norm": 0.33522874116897583,
      "learning_rate": 5.065332129702234e-06,
      "loss": 0.0153,
      "step": 1517800
    },
    {
      "epoch": 2.4839457198405372,
      "grad_norm": 0.14691022038459778,
      "learning_rate": 5.065266237488717e-06,
      "loss": 0.018,
      "step": 1517820
    },
    {
      "epoch": 2.4839784502791904,
      "grad_norm": 0.29382553696632385,
      "learning_rate": 5.065200345275199e-06,
      "loss": 0.0133,
      "step": 1517840
    },
    {
      "epoch": 2.484011180717844,
      "grad_norm": 0.20073814690113068,
      "learning_rate": 5.065134453061682e-06,
      "loss": 0.0121,
      "step": 1517860
    },
    {
      "epoch": 2.484043911156497,
      "grad_norm": 1.5013972520828247,
      "learning_rate": 5.065068560848166e-06,
      "loss": 0.0146,
      "step": 1517880
    },
    {
      "epoch": 2.4840766415951507,
      "grad_norm": 0.16345752775669098,
      "learning_rate": 5.065002668634648e-06,
      "loss": 0.0166,
      "step": 1517900
    },
    {
      "epoch": 2.484109372033804,
      "grad_norm": 0.3251892328262329,
      "learning_rate": 5.064936776421131e-06,
      "loss": 0.0122,
      "step": 1517920
    },
    {
      "epoch": 2.4841421024724575,
      "grad_norm": 0.34059789776802063,
      "learning_rate": 5.064870884207613e-06,
      "loss": 0.0164,
      "step": 1517940
    },
    {
      "epoch": 2.4841748329111106,
      "grad_norm": 0.48286452889442444,
      "learning_rate": 5.064804991994097e-06,
      "loss": 0.0105,
      "step": 1517960
    },
    {
      "epoch": 2.484207563349764,
      "grad_norm": 0.22958610951900482,
      "learning_rate": 5.064739099780579e-06,
      "loss": 0.0152,
      "step": 1517980
    },
    {
      "epoch": 2.4842402937884174,
      "grad_norm": 0.37051311135292053,
      "learning_rate": 5.064673207567062e-06,
      "loss": 0.0149,
      "step": 1518000
    },
    {
      "epoch": 2.4842730242270705,
      "grad_norm": 0.6570301055908203,
      "learning_rate": 5.064607315353545e-06,
      "loss": 0.0173,
      "step": 1518020
    },
    {
      "epoch": 2.484305754665724,
      "grad_norm": 0.20063062012195587,
      "learning_rate": 5.064541423140028e-06,
      "loss": 0.0128,
      "step": 1518040
    },
    {
      "epoch": 2.4843384851043773,
      "grad_norm": 0.28454598784446716,
      "learning_rate": 5.06447553092651e-06,
      "loss": 0.0148,
      "step": 1518060
    },
    {
      "epoch": 2.484371215543031,
      "grad_norm": 0.6468707919120789,
      "learning_rate": 5.064409638712994e-06,
      "loss": 0.0125,
      "step": 1518080
    },
    {
      "epoch": 2.484403945981684,
      "grad_norm": 0.2682480216026306,
      "learning_rate": 5.064343746499476e-06,
      "loss": 0.0115,
      "step": 1518100
    },
    {
      "epoch": 2.484436676420337,
      "grad_norm": 0.3936103582382202,
      "learning_rate": 5.0642778542859595e-06,
      "loss": 0.0189,
      "step": 1518120
    },
    {
      "epoch": 2.4844694068589908,
      "grad_norm": 0.3398163616657257,
      "learning_rate": 5.064211962072443e-06,
      "loss": 0.0155,
      "step": 1518140
    },
    {
      "epoch": 2.484502137297644,
      "grad_norm": 0.3591490685939789,
      "learning_rate": 5.064146069858925e-06,
      "loss": 0.016,
      "step": 1518160
    },
    {
      "epoch": 2.4845348677362975,
      "grad_norm": 0.7226635217666626,
      "learning_rate": 5.0640801776454085e-06,
      "loss": 0.0142,
      "step": 1518180
    },
    {
      "epoch": 2.4845675981749507,
      "grad_norm": 0.11625967919826508,
      "learning_rate": 5.06401428543189e-06,
      "loss": 0.0175,
      "step": 1518200
    },
    {
      "epoch": 2.4846003286136042,
      "grad_norm": 0.27351781725883484,
      "learning_rate": 5.063948393218374e-06,
      "loss": 0.0184,
      "step": 1518220
    },
    {
      "epoch": 2.4846330590522574,
      "grad_norm": 0.7345609068870544,
      "learning_rate": 5.063882501004857e-06,
      "loss": 0.0136,
      "step": 1518240
    },
    {
      "epoch": 2.4846657894909105,
      "grad_norm": 0.6113890409469604,
      "learning_rate": 5.0638166087913395e-06,
      "loss": 0.0129,
      "step": 1518260
    },
    {
      "epoch": 2.484698519929564,
      "grad_norm": 0.13240227103233337,
      "learning_rate": 5.063750716577822e-06,
      "loss": 0.0092,
      "step": 1518280
    },
    {
      "epoch": 2.4847312503682173,
      "grad_norm": 0.17056819796562195,
      "learning_rate": 5.063684824364306e-06,
      "loss": 0.0123,
      "step": 1518300
    },
    {
      "epoch": 2.484763980806871,
      "grad_norm": 0.2828269302845001,
      "learning_rate": 5.063618932150788e-06,
      "loss": 0.018,
      "step": 1518320
    },
    {
      "epoch": 2.484796711245524,
      "grad_norm": 0.30082929134368896,
      "learning_rate": 5.063553039937271e-06,
      "loss": 0.0134,
      "step": 1518340
    },
    {
      "epoch": 2.4848294416841776,
      "grad_norm": 0.6811205148696899,
      "learning_rate": 5.063487147723753e-06,
      "loss": 0.0207,
      "step": 1518360
    },
    {
      "epoch": 2.484862172122831,
      "grad_norm": 0.1988452821969986,
      "learning_rate": 5.063421255510237e-06,
      "loss": 0.0159,
      "step": 1518380
    },
    {
      "epoch": 2.484894902561484,
      "grad_norm": 0.2797633111476898,
      "learning_rate": 5.0633553632967195e-06,
      "loss": 0.0119,
      "step": 1518400
    },
    {
      "epoch": 2.4849276330001375,
      "grad_norm": 0.6111543774604797,
      "learning_rate": 5.063289471083202e-06,
      "loss": 0.0112,
      "step": 1518420
    },
    {
      "epoch": 2.4849603634387907,
      "grad_norm": 3.732384443283081,
      "learning_rate": 5.063223578869685e-06,
      "loss": 0.0284,
      "step": 1518440
    },
    {
      "epoch": 2.4849930938774443,
      "grad_norm": 0.3192124664783478,
      "learning_rate": 5.063157686656169e-06,
      "loss": 0.0137,
      "step": 1518460
    },
    {
      "epoch": 2.4850258243160974,
      "grad_norm": 0.3053452968597412,
      "learning_rate": 5.063091794442651e-06,
      "loss": 0.0157,
      "step": 1518480
    },
    {
      "epoch": 2.485058554754751,
      "grad_norm": 0.7767381072044373,
      "learning_rate": 5.063025902229134e-06,
      "loss": 0.0145,
      "step": 1518500
    },
    {
      "epoch": 2.485091285193404,
      "grad_norm": 0.2331191599369049,
      "learning_rate": 5.062960010015618e-06,
      "loss": 0.0147,
      "step": 1518520
    },
    {
      "epoch": 2.4851240156320573,
      "grad_norm": 0.16586793959140778,
      "learning_rate": 5.0628941178020995e-06,
      "loss": 0.0102,
      "step": 1518540
    },
    {
      "epoch": 2.485156746070711,
      "grad_norm": 0.28755927085876465,
      "learning_rate": 5.062828225588583e-06,
      "loss": 0.0102,
      "step": 1518560
    },
    {
      "epoch": 2.485189476509364,
      "grad_norm": 0.3544100224971771,
      "learning_rate": 5.062762333375065e-06,
      "loss": 0.013,
      "step": 1518580
    },
    {
      "epoch": 2.4852222069480177,
      "grad_norm": 0.2643963098526001,
      "learning_rate": 5.062696441161549e-06,
      "loss": 0.0092,
      "step": 1518600
    },
    {
      "epoch": 2.485254937386671,
      "grad_norm": 0.36618879437446594,
      "learning_rate": 5.062630548948031e-06,
      "loss": 0.0152,
      "step": 1518620
    },
    {
      "epoch": 2.4852876678253244,
      "grad_norm": 0.34037312865257263,
      "learning_rate": 5.062564656734514e-06,
      "loss": 0.0149,
      "step": 1518640
    },
    {
      "epoch": 2.4853203982639775,
      "grad_norm": 0.34409111738204956,
      "learning_rate": 5.062498764520997e-06,
      "loss": 0.0141,
      "step": 1518660
    },
    {
      "epoch": 2.4853531287026307,
      "grad_norm": 0.3610963523387909,
      "learning_rate": 5.06243287230748e-06,
      "loss": 0.0183,
      "step": 1518680
    },
    {
      "epoch": 2.4853858591412843,
      "grad_norm": 0.2278420627117157,
      "learning_rate": 5.062366980093962e-06,
      "loss": 0.0149,
      "step": 1518700
    },
    {
      "epoch": 2.4854185895799374,
      "grad_norm": 0.5138311982154846,
      "learning_rate": 5.062301087880446e-06,
      "loss": 0.0145,
      "step": 1518720
    },
    {
      "epoch": 2.485451320018591,
      "grad_norm": 0.1356450915336609,
      "learning_rate": 5.062235195666928e-06,
      "loss": 0.0101,
      "step": 1518740
    },
    {
      "epoch": 2.485484050457244,
      "grad_norm": 0.3482012152671814,
      "learning_rate": 5.062169303453411e-06,
      "loss": 0.0118,
      "step": 1518760
    },
    {
      "epoch": 2.485516780895898,
      "grad_norm": 0.25326433777809143,
      "learning_rate": 5.062103411239893e-06,
      "loss": 0.0148,
      "step": 1518780
    },
    {
      "epoch": 2.485549511334551,
      "grad_norm": 0.29092076420783997,
      "learning_rate": 5.062037519026377e-06,
      "loss": 0.0122,
      "step": 1518800
    },
    {
      "epoch": 2.485582241773204,
      "grad_norm": 0.26068124175071716,
      "learning_rate": 5.0619716268128604e-06,
      "loss": 0.0114,
      "step": 1518820
    },
    {
      "epoch": 2.4856149722118577,
      "grad_norm": 0.24264059960842133,
      "learning_rate": 5.061905734599343e-06,
      "loss": 0.0138,
      "step": 1518840
    },
    {
      "epoch": 2.485647702650511,
      "grad_norm": 0.6760051846504211,
      "learning_rate": 5.061839842385826e-06,
      "loss": 0.012,
      "step": 1518860
    },
    {
      "epoch": 2.4856804330891644,
      "grad_norm": 0.2310638278722763,
      "learning_rate": 5.061773950172309e-06,
      "loss": 0.0121,
      "step": 1518880
    },
    {
      "epoch": 2.4857131635278176,
      "grad_norm": 0.1670653373003006,
      "learning_rate": 5.061708057958792e-06,
      "loss": 0.0207,
      "step": 1518900
    },
    {
      "epoch": 2.485745893966471,
      "grad_norm": 0.4330545663833618,
      "learning_rate": 5.061642165745274e-06,
      "loss": 0.0136,
      "step": 1518920
    },
    {
      "epoch": 2.4857786244051243,
      "grad_norm": 0.12687718868255615,
      "learning_rate": 5.061576273531758e-06,
      "loss": 0.0213,
      "step": 1518940
    },
    {
      "epoch": 2.4858113548437775,
      "grad_norm": 0.2924809753894806,
      "learning_rate": 5.06151038131824e-06,
      "loss": 0.0214,
      "step": 1518960
    },
    {
      "epoch": 2.485844085282431,
      "grad_norm": 0.5041806101799011,
      "learning_rate": 5.061444489104723e-06,
      "loss": 0.0147,
      "step": 1518980
    },
    {
      "epoch": 2.485876815721084,
      "grad_norm": 0.41224047541618347,
      "learning_rate": 5.061378596891205e-06,
      "loss": 0.0121,
      "step": 1519000
    },
    {
      "epoch": 2.485909546159738,
      "grad_norm": 1.1431676149368286,
      "learning_rate": 5.061312704677689e-06,
      "loss": 0.0115,
      "step": 1519020
    },
    {
      "epoch": 2.485942276598391,
      "grad_norm": 0.19210579991340637,
      "learning_rate": 5.061246812464171e-06,
      "loss": 0.0133,
      "step": 1519040
    },
    {
      "epoch": 2.4859750070370445,
      "grad_norm": 0.5136898159980774,
      "learning_rate": 5.061180920250654e-06,
      "loss": 0.0199,
      "step": 1519060
    },
    {
      "epoch": 2.4860077374756977,
      "grad_norm": 0.16453756392002106,
      "learning_rate": 5.061115028037137e-06,
      "loss": 0.0148,
      "step": 1519080
    },
    {
      "epoch": 2.486040467914351,
      "grad_norm": 0.2671414315700531,
      "learning_rate": 5.0610491358236205e-06,
      "loss": 0.0162,
      "step": 1519100
    },
    {
      "epoch": 2.4860731983530044,
      "grad_norm": 0.38439857959747314,
      "learning_rate": 5.060983243610102e-06,
      "loss": 0.0121,
      "step": 1519120
    },
    {
      "epoch": 2.4861059287916576,
      "grad_norm": 0.4009285569190979,
      "learning_rate": 5.060917351396586e-06,
      "loss": 0.0178,
      "step": 1519140
    },
    {
      "epoch": 2.486138659230311,
      "grad_norm": 0.42899012565612793,
      "learning_rate": 5.0608514591830696e-06,
      "loss": 0.0127,
      "step": 1519160
    },
    {
      "epoch": 2.4861713896689643,
      "grad_norm": 0.22821681201457977,
      "learning_rate": 5.0607855669695514e-06,
      "loss": 0.0148,
      "step": 1519180
    },
    {
      "epoch": 2.486204120107618,
      "grad_norm": 0.4459260404109955,
      "learning_rate": 5.060719674756035e-06,
      "loss": 0.0152,
      "step": 1519200
    },
    {
      "epoch": 2.486236850546271,
      "grad_norm": Infinity,
      "learning_rate": 5.060653782542517e-06,
      "loss": 0.0252,
      "step": 1519220
    },
    {
      "epoch": 2.4862695809849242,
      "grad_norm": 0.12105550616979599,
      "learning_rate": 5.0605878903290005e-06,
      "loss": 0.0125,
      "step": 1519240
    },
    {
      "epoch": 2.486302311423578,
      "grad_norm": 0.1787831038236618,
      "learning_rate": 5.060521998115483e-06,
      "loss": 0.0136,
      "step": 1519260
    },
    {
      "epoch": 2.486335041862231,
      "grad_norm": 0.17469462752342224,
      "learning_rate": 5.060456105901966e-06,
      "loss": 0.0157,
      "step": 1519280
    },
    {
      "epoch": 2.486367772300884,
      "grad_norm": 0.0991281270980835,
      "learning_rate": 5.060390213688449e-06,
      "loss": 0.0141,
      "step": 1519300
    },
    {
      "epoch": 2.4864005027395377,
      "grad_norm": 0.15269681811332703,
      "learning_rate": 5.060324321474932e-06,
      "loss": 0.0129,
      "step": 1519320
    },
    {
      "epoch": 2.486433233178191,
      "grad_norm": 0.8909451961517334,
      "learning_rate": 5.060258429261414e-06,
      "loss": 0.0134,
      "step": 1519340
    },
    {
      "epoch": 2.4864659636168445,
      "grad_norm": 1.035902738571167,
      "learning_rate": 5.060192537047898e-06,
      "loss": 0.0136,
      "step": 1519360
    },
    {
      "epoch": 2.4864986940554976,
      "grad_norm": 0.21226786077022552,
      "learning_rate": 5.06012664483438e-06,
      "loss": 0.0125,
      "step": 1519380
    },
    {
      "epoch": 2.486531424494151,
      "grad_norm": 0.3149477243423462,
      "learning_rate": 5.060060752620863e-06,
      "loss": 0.0172,
      "step": 1519400
    },
    {
      "epoch": 2.4865641549328044,
      "grad_norm": 0.12968188524246216,
      "learning_rate": 5.059994860407346e-06,
      "loss": 0.0134,
      "step": 1519420
    },
    {
      "epoch": 2.4865968853714575,
      "grad_norm": 0.3891018033027649,
      "learning_rate": 5.059928968193829e-06,
      "loss": 0.0123,
      "step": 1519440
    },
    {
      "epoch": 2.486629615810111,
      "grad_norm": 0.3398817181587219,
      "learning_rate": 5.0598630759803115e-06,
      "loss": 0.0118,
      "step": 1519460
    },
    {
      "epoch": 2.4866623462487643,
      "grad_norm": 0.4257871210575104,
      "learning_rate": 5.059797183766795e-06,
      "loss": 0.0118,
      "step": 1519480
    },
    {
      "epoch": 2.486695076687418,
      "grad_norm": 0.3803596794605255,
      "learning_rate": 5.059731291553277e-06,
      "loss": 0.0195,
      "step": 1519500
    },
    {
      "epoch": 2.486727807126071,
      "grad_norm": 0.2635432183742523,
      "learning_rate": 5.0596653993397606e-06,
      "loss": 0.0171,
      "step": 1519520
    },
    {
      "epoch": 2.4867605375647246,
      "grad_norm": 0.32781457901000977,
      "learning_rate": 5.059599507126244e-06,
      "loss": 0.0157,
      "step": 1519540
    },
    {
      "epoch": 2.4867932680033777,
      "grad_norm": 0.33033305406570435,
      "learning_rate": 5.059533614912726e-06,
      "loss": 0.0133,
      "step": 1519560
    },
    {
      "epoch": 2.486825998442031,
      "grad_norm": 0.12404946982860565,
      "learning_rate": 5.05946772269921e-06,
      "loss": 0.0151,
      "step": 1519580
    },
    {
      "epoch": 2.4868587288806845,
      "grad_norm": 0.18137945234775543,
      "learning_rate": 5.0594018304856915e-06,
      "loss": 0.0178,
      "step": 1519600
    },
    {
      "epoch": 2.4868914593193376,
      "grad_norm": 0.6756805181503296,
      "learning_rate": 5.059335938272175e-06,
      "loss": 0.0099,
      "step": 1519620
    },
    {
      "epoch": 2.4869241897579912,
      "grad_norm": 0.6063635349273682,
      "learning_rate": 5.059270046058658e-06,
      "loss": 0.0127,
      "step": 1519640
    },
    {
      "epoch": 2.4869569201966444,
      "grad_norm": 0.6622564196586609,
      "learning_rate": 5.059204153845141e-06,
      "loss": 0.0164,
      "step": 1519660
    },
    {
      "epoch": 2.486989650635298,
      "grad_norm": 0.25270697474479675,
      "learning_rate": 5.059138261631623e-06,
      "loss": 0.0121,
      "step": 1519680
    },
    {
      "epoch": 2.487022381073951,
      "grad_norm": 0.1934731900691986,
      "learning_rate": 5.059072369418107e-06,
      "loss": 0.0178,
      "step": 1519700
    },
    {
      "epoch": 2.4870551115126043,
      "grad_norm": 0.2443113476037979,
      "learning_rate": 5.059006477204589e-06,
      "loss": 0.0138,
      "step": 1519720
    },
    {
      "epoch": 2.487087841951258,
      "grad_norm": 0.19043616950511932,
      "learning_rate": 5.058940584991072e-06,
      "loss": 0.0079,
      "step": 1519740
    },
    {
      "epoch": 2.487120572389911,
      "grad_norm": 0.7696349024772644,
      "learning_rate": 5.058874692777554e-06,
      "loss": 0.018,
      "step": 1519760
    },
    {
      "epoch": 2.4871533028285646,
      "grad_norm": 0.1701447069644928,
      "learning_rate": 5.058808800564038e-06,
      "loss": 0.0121,
      "step": 1519780
    },
    {
      "epoch": 2.4871860332672178,
      "grad_norm": 0.7900373339653015,
      "learning_rate": 5.05874290835052e-06,
      "loss": 0.016,
      "step": 1519800
    },
    {
      "epoch": 2.4872187637058714,
      "grad_norm": 0.3387860953807831,
      "learning_rate": 5.058677016137003e-06,
      "loss": 0.0078,
      "step": 1519820
    },
    {
      "epoch": 2.4872514941445245,
      "grad_norm": 0.31520628929138184,
      "learning_rate": 5.058611123923486e-06,
      "loss": 0.0191,
      "step": 1519840
    },
    {
      "epoch": 2.4872842245831777,
      "grad_norm": 0.900418221950531,
      "learning_rate": 5.058545231709969e-06,
      "loss": 0.0166,
      "step": 1519860
    },
    {
      "epoch": 2.4873169550218313,
      "grad_norm": 0.8239114880561829,
      "learning_rate": 5.058479339496452e-06,
      "loss": 0.0193,
      "step": 1519880
    },
    {
      "epoch": 2.4873496854604844,
      "grad_norm": 0.14687062799930573,
      "learning_rate": 5.058413447282935e-06,
      "loss": 0.0099,
      "step": 1519900
    },
    {
      "epoch": 2.487382415899138,
      "grad_norm": 0.5261203646659851,
      "learning_rate": 5.058347555069419e-06,
      "loss": 0.0203,
      "step": 1519920
    },
    {
      "epoch": 2.487415146337791,
      "grad_norm": 0.17553438246250153,
      "learning_rate": 5.058281662855901e-06,
      "loss": 0.013,
      "step": 1519940
    },
    {
      "epoch": 2.4874478767764447,
      "grad_norm": 0.13651129603385925,
      "learning_rate": 5.058215770642384e-06,
      "loss": 0.0096,
      "step": 1519960
    },
    {
      "epoch": 2.487480607215098,
      "grad_norm": 0.32053589820861816,
      "learning_rate": 5.058149878428866e-06,
      "loss": 0.0158,
      "step": 1519980
    },
    {
      "epoch": 2.487513337653751,
      "grad_norm": 0.3913062512874603,
      "learning_rate": 5.05808398621535e-06,
      "loss": 0.0152,
      "step": 1520000
    },
    {
      "epoch": 2.4875460680924046,
      "grad_norm": 0.15079738199710846,
      "learning_rate": 5.058018094001832e-06,
      "loss": 0.0083,
      "step": 1520020
    },
    {
      "epoch": 2.487578798531058,
      "grad_norm": 0.4415370225906372,
      "learning_rate": 5.057952201788315e-06,
      "loss": 0.0133,
      "step": 1520040
    },
    {
      "epoch": 2.4876115289697114,
      "grad_norm": 0.15005753934383392,
      "learning_rate": 5.057886309574798e-06,
      "loss": 0.0117,
      "step": 1520060
    },
    {
      "epoch": 2.4876442594083645,
      "grad_norm": 0.20555220544338226,
      "learning_rate": 5.057820417361281e-06,
      "loss": 0.0178,
      "step": 1520080
    },
    {
      "epoch": 2.487676989847018,
      "grad_norm": 0.20912587642669678,
      "learning_rate": 5.057754525147763e-06,
      "loss": 0.0176,
      "step": 1520100
    },
    {
      "epoch": 2.4877097202856713,
      "grad_norm": 0.6402515172958374,
      "learning_rate": 5.057688632934247e-06,
      "loss": 0.0196,
      "step": 1520120
    },
    {
      "epoch": 2.4877424507243244,
      "grad_norm": 0.22900770604610443,
      "learning_rate": 5.057622740720729e-06,
      "loss": 0.011,
      "step": 1520140
    },
    {
      "epoch": 2.487775181162978,
      "grad_norm": 0.13175323605537415,
      "learning_rate": 5.0575568485072125e-06,
      "loss": 0.02,
      "step": 1520160
    },
    {
      "epoch": 2.487807911601631,
      "grad_norm": 0.22123384475708008,
      "learning_rate": 5.057490956293694e-06,
      "loss": 0.0151,
      "step": 1520180
    },
    {
      "epoch": 2.4878406420402848,
      "grad_norm": 0.471245139837265,
      "learning_rate": 5.057425064080178e-06,
      "loss": 0.0123,
      "step": 1520200
    },
    {
      "epoch": 2.487873372478938,
      "grad_norm": 0.13715296983718872,
      "learning_rate": 5.0573591718666615e-06,
      "loss": 0.0231,
      "step": 1520220
    },
    {
      "epoch": 2.4879061029175915,
      "grad_norm": 0.23296262323856354,
      "learning_rate": 5.0572932796531434e-06,
      "loss": 0.012,
      "step": 1520240
    },
    {
      "epoch": 2.4879388333562447,
      "grad_norm": 0.5819730758666992,
      "learning_rate": 5.057227387439627e-06,
      "loss": 0.0142,
      "step": 1520260
    },
    {
      "epoch": 2.487971563794898,
      "grad_norm": 0.7923871874809265,
      "learning_rate": 5.05716149522611e-06,
      "loss": 0.0121,
      "step": 1520280
    },
    {
      "epoch": 2.4880042942335514,
      "grad_norm": 2.0128908157348633,
      "learning_rate": 5.0570956030125925e-06,
      "loss": 0.0164,
      "step": 1520300
    },
    {
      "epoch": 2.4880370246722046,
      "grad_norm": 0.7608471512794495,
      "learning_rate": 5.057029710799075e-06,
      "loss": 0.0153,
      "step": 1520320
    },
    {
      "epoch": 2.488069755110858,
      "grad_norm": 0.1552143543958664,
      "learning_rate": 5.056963818585559e-06,
      "loss": 0.01,
      "step": 1520340
    },
    {
      "epoch": 2.4881024855495113,
      "grad_norm": 0.1041618213057518,
      "learning_rate": 5.056897926372041e-06,
      "loss": 0.0173,
      "step": 1520360
    },
    {
      "epoch": 2.488135215988165,
      "grad_norm": 0.5042598843574524,
      "learning_rate": 5.056832034158524e-06,
      "loss": 0.0242,
      "step": 1520380
    },
    {
      "epoch": 2.488167946426818,
      "grad_norm": 0.2769131362438202,
      "learning_rate": 5.056766141945006e-06,
      "loss": 0.0075,
      "step": 1520400
    },
    {
      "epoch": 2.488200676865471,
      "grad_norm": 0.5241360068321228,
      "learning_rate": 5.05670024973149e-06,
      "loss": 0.0133,
      "step": 1520420
    },
    {
      "epoch": 2.488233407304125,
      "grad_norm": 0.40928125381469727,
      "learning_rate": 5.0566343575179725e-06,
      "loss": 0.0161,
      "step": 1520440
    },
    {
      "epoch": 2.488266137742778,
      "grad_norm": 0.08427193760871887,
      "learning_rate": 5.056568465304455e-06,
      "loss": 0.0136,
      "step": 1520460
    },
    {
      "epoch": 2.4882988681814315,
      "grad_norm": 0.08803059160709381,
      "learning_rate": 5.056502573090938e-06,
      "loss": 0.0152,
      "step": 1520480
    },
    {
      "epoch": 2.4883315986200847,
      "grad_norm": 0.3204229474067688,
      "learning_rate": 5.056436680877422e-06,
      "loss": 0.0127,
      "step": 1520500
    },
    {
      "epoch": 2.4883643290587383,
      "grad_norm": 0.15007153153419495,
      "learning_rate": 5.0563707886639035e-06,
      "loss": 0.0143,
      "step": 1520520
    },
    {
      "epoch": 2.4883970594973914,
      "grad_norm": 0.6255229115486145,
      "learning_rate": 5.056304896450387e-06,
      "loss": 0.0193,
      "step": 1520540
    },
    {
      "epoch": 2.4884297899360446,
      "grad_norm": 0.4198128581047058,
      "learning_rate": 5.056239004236869e-06,
      "loss": 0.0114,
      "step": 1520560
    },
    {
      "epoch": 2.488462520374698,
      "grad_norm": 0.1347661316394806,
      "learning_rate": 5.0561731120233525e-06,
      "loss": 0.0136,
      "step": 1520580
    },
    {
      "epoch": 2.4884952508133513,
      "grad_norm": 0.9121862649917603,
      "learning_rate": 5.056107219809836e-06,
      "loss": 0.0123,
      "step": 1520600
    },
    {
      "epoch": 2.488527981252005,
      "grad_norm": 0.10535933822393417,
      "learning_rate": 5.056041327596318e-06,
      "loss": 0.0139,
      "step": 1520620
    },
    {
      "epoch": 2.488560711690658,
      "grad_norm": 0.8020951151847839,
      "learning_rate": 5.055975435382802e-06,
      "loss": 0.0084,
      "step": 1520640
    },
    {
      "epoch": 2.4885934421293117,
      "grad_norm": 0.3258900046348572,
      "learning_rate": 5.055909543169284e-06,
      "loss": 0.018,
      "step": 1520660
    },
    {
      "epoch": 2.488626172567965,
      "grad_norm": 1.1373704671859741,
      "learning_rate": 5.055843650955767e-06,
      "loss": 0.016,
      "step": 1520680
    },
    {
      "epoch": 2.488658903006618,
      "grad_norm": 0.20257270336151123,
      "learning_rate": 5.05577775874225e-06,
      "loss": 0.0182,
      "step": 1520700
    },
    {
      "epoch": 2.4886916334452716,
      "grad_norm": 0.3154906928539276,
      "learning_rate": 5.055711866528733e-06,
      "loss": 0.013,
      "step": 1520720
    },
    {
      "epoch": 2.4887243638839247,
      "grad_norm": 0.3022097051143646,
      "learning_rate": 5.055645974315215e-06,
      "loss": 0.0171,
      "step": 1520740
    },
    {
      "epoch": 2.4887570943225783,
      "grad_norm": 0.39201611280441284,
      "learning_rate": 5.055580082101699e-06,
      "loss": 0.0155,
      "step": 1520760
    },
    {
      "epoch": 2.4887898247612315,
      "grad_norm": 0.44734227657318115,
      "learning_rate": 5.055514189888181e-06,
      "loss": 0.0144,
      "step": 1520780
    },
    {
      "epoch": 2.4888225551998846,
      "grad_norm": 0.2053563892841339,
      "learning_rate": 5.055448297674664e-06,
      "loss": 0.011,
      "step": 1520800
    },
    {
      "epoch": 2.488855285638538,
      "grad_norm": 0.1702643483877182,
      "learning_rate": 5.055382405461146e-06,
      "loss": 0.0165,
      "step": 1520820
    },
    {
      "epoch": 2.4888880160771913,
      "grad_norm": 0.34281349182128906,
      "learning_rate": 5.05531651324763e-06,
      "loss": 0.018,
      "step": 1520840
    },
    {
      "epoch": 2.488920746515845,
      "grad_norm": 0.5698735117912292,
      "learning_rate": 5.055250621034113e-06,
      "loss": 0.0112,
      "step": 1520860
    },
    {
      "epoch": 2.488953476954498,
      "grad_norm": 0.6724340915679932,
      "learning_rate": 5.055184728820595e-06,
      "loss": 0.0124,
      "step": 1520880
    },
    {
      "epoch": 2.4889862073931512,
      "grad_norm": 0.6279082894325256,
      "learning_rate": 5.055118836607078e-06,
      "loss": 0.0127,
      "step": 1520900
    },
    {
      "epoch": 2.489018937831805,
      "grad_norm": 0.6691553592681885,
      "learning_rate": 5.055052944393562e-06,
      "loss": 0.0109,
      "step": 1520920
    },
    {
      "epoch": 2.489051668270458,
      "grad_norm": 0.5350159406661987,
      "learning_rate": 5.054987052180044e-06,
      "loss": 0.0128,
      "step": 1520940
    },
    {
      "epoch": 2.4890843987091116,
      "grad_norm": 0.7746899127960205,
      "learning_rate": 5.054921159966527e-06,
      "loss": 0.0136,
      "step": 1520960
    },
    {
      "epoch": 2.4891171291477647,
      "grad_norm": 0.3146741986274719,
      "learning_rate": 5.054855267753011e-06,
      "loss": 0.0076,
      "step": 1520980
    },
    {
      "epoch": 2.4891498595864183,
      "grad_norm": 0.219258114695549,
      "learning_rate": 5.054789375539493e-06,
      "loss": 0.014,
      "step": 1521000
    },
    {
      "epoch": 2.4891825900250715,
      "grad_norm": 0.2846904993057251,
      "learning_rate": 5.054723483325976e-06,
      "loss": 0.0133,
      "step": 1521020
    },
    {
      "epoch": 2.4892153204637246,
      "grad_norm": 0.13488543033599854,
      "learning_rate": 5.054657591112458e-06,
      "loss": 0.0175,
      "step": 1521040
    },
    {
      "epoch": 2.489248050902378,
      "grad_norm": 0.1767326146364212,
      "learning_rate": 5.054591698898942e-06,
      "loss": 0.0111,
      "step": 1521060
    },
    {
      "epoch": 2.4892807813410314,
      "grad_norm": 0.24314747750759125,
      "learning_rate": 5.0545258066854244e-06,
      "loss": 0.0178,
      "step": 1521080
    },
    {
      "epoch": 2.489313511779685,
      "grad_norm": 0.7430950999259949,
      "learning_rate": 5.054459914471907e-06,
      "loss": 0.0187,
      "step": 1521100
    },
    {
      "epoch": 2.489346242218338,
      "grad_norm": 0.7943884134292603,
      "learning_rate": 5.05439402225839e-06,
      "loss": 0.0101,
      "step": 1521120
    },
    {
      "epoch": 2.4893789726569917,
      "grad_norm": 0.11602229624986649,
      "learning_rate": 5.0543281300448735e-06,
      "loss": 0.0117,
      "step": 1521140
    },
    {
      "epoch": 2.489411703095645,
      "grad_norm": 0.3286166191101074,
      "learning_rate": 5.054262237831355e-06,
      "loss": 0.0117,
      "step": 1521160
    },
    {
      "epoch": 2.489444433534298,
      "grad_norm": 0.061070796102285385,
      "learning_rate": 5.054196345617839e-06,
      "loss": 0.0203,
      "step": 1521180
    },
    {
      "epoch": 2.4894771639729516,
      "grad_norm": 0.22230838239192963,
      "learning_rate": 5.054130453404321e-06,
      "loss": 0.0101,
      "step": 1521200
    },
    {
      "epoch": 2.4895098944116048,
      "grad_norm": 0.2500897943973541,
      "learning_rate": 5.0540645611908045e-06,
      "loss": 0.0125,
      "step": 1521220
    },
    {
      "epoch": 2.4895426248502583,
      "grad_norm": 0.44544392824172974,
      "learning_rate": 5.053998668977287e-06,
      "loss": 0.0169,
      "step": 1521240
    },
    {
      "epoch": 2.4895753552889115,
      "grad_norm": 0.5605235695838928,
      "learning_rate": 5.05393277676377e-06,
      "loss": 0.0108,
      "step": 1521260
    },
    {
      "epoch": 2.489608085727565,
      "grad_norm": 0.06732679903507233,
      "learning_rate": 5.0538668845502535e-06,
      "loss": 0.0134,
      "step": 1521280
    },
    {
      "epoch": 2.4896408161662182,
      "grad_norm": 0.531519889831543,
      "learning_rate": 5.053800992336736e-06,
      "loss": 0.0179,
      "step": 1521300
    },
    {
      "epoch": 2.4896735466048714,
      "grad_norm": 0.22703169286251068,
      "learning_rate": 5.053735100123219e-06,
      "loss": 0.0104,
      "step": 1521320
    },
    {
      "epoch": 2.489706277043525,
      "grad_norm": 0.9134024381637573,
      "learning_rate": 5.053669207909702e-06,
      "loss": 0.0144,
      "step": 1521340
    },
    {
      "epoch": 2.489739007482178,
      "grad_norm": 0.43390703201293945,
      "learning_rate": 5.053603315696185e-06,
      "loss": 0.015,
      "step": 1521360
    },
    {
      "epoch": 2.4897717379208317,
      "grad_norm": 0.48822253942489624,
      "learning_rate": 5.053537423482667e-06,
      "loss": 0.0112,
      "step": 1521380
    },
    {
      "epoch": 2.489804468359485,
      "grad_norm": 0.23618184030056,
      "learning_rate": 5.053471531269151e-06,
      "loss": 0.0178,
      "step": 1521400
    },
    {
      "epoch": 2.4898371987981385,
      "grad_norm": 0.8538280129432678,
      "learning_rate": 5.053405639055633e-06,
      "loss": 0.02,
      "step": 1521420
    },
    {
      "epoch": 2.4898699292367916,
      "grad_norm": 0.20208711922168732,
      "learning_rate": 5.053339746842116e-06,
      "loss": 0.0141,
      "step": 1521440
    },
    {
      "epoch": 2.4899026596754448,
      "grad_norm": 0.6319190859794617,
      "learning_rate": 5.053273854628599e-06,
      "loss": 0.0189,
      "step": 1521460
    },
    {
      "epoch": 2.4899353901140984,
      "grad_norm": 0.2705487012863159,
      "learning_rate": 5.053207962415082e-06,
      "loss": 0.0135,
      "step": 1521480
    },
    {
      "epoch": 2.4899681205527515,
      "grad_norm": 0.13039687275886536,
      "learning_rate": 5.0531420702015645e-06,
      "loss": 0.0104,
      "step": 1521500
    },
    {
      "epoch": 2.490000850991405,
      "grad_norm": 0.7258070707321167,
      "learning_rate": 5.053076177988048e-06,
      "loss": 0.0136,
      "step": 1521520
    },
    {
      "epoch": 2.4900335814300583,
      "grad_norm": 0.3405390977859497,
      "learning_rate": 5.05301028577453e-06,
      "loss": 0.0136,
      "step": 1521540
    },
    {
      "epoch": 2.490066311868712,
      "grad_norm": 0.19990098476409912,
      "learning_rate": 5.0529443935610136e-06,
      "loss": 0.0099,
      "step": 1521560
    },
    {
      "epoch": 2.490099042307365,
      "grad_norm": 0.14939454197883606,
      "learning_rate": 5.0528785013474955e-06,
      "loss": 0.0103,
      "step": 1521580
    },
    {
      "epoch": 2.490131772746018,
      "grad_norm": 0.08374284952878952,
      "learning_rate": 5.052812609133979e-06,
      "loss": 0.0157,
      "step": 1521600
    },
    {
      "epoch": 2.4901645031846718,
      "grad_norm": 0.3143879771232605,
      "learning_rate": 5.052746716920462e-06,
      "loss": 0.0121,
      "step": 1521620
    },
    {
      "epoch": 2.490197233623325,
      "grad_norm": 0.37803804874420166,
      "learning_rate": 5.0526808247069445e-06,
      "loss": 0.0117,
      "step": 1521640
    },
    {
      "epoch": 2.4902299640619785,
      "grad_norm": 0.6722471117973328,
      "learning_rate": 5.052614932493428e-06,
      "loss": 0.021,
      "step": 1521660
    },
    {
      "epoch": 2.4902626945006316,
      "grad_norm": 0.5710418820381165,
      "learning_rate": 5.052549040279911e-06,
      "loss": 0.0128,
      "step": 1521680
    },
    {
      "epoch": 2.4902954249392852,
      "grad_norm": 0.2523041069507599,
      "learning_rate": 5.052483148066394e-06,
      "loss": 0.008,
      "step": 1521700
    },
    {
      "epoch": 2.4903281553779384,
      "grad_norm": 0.33504870533943176,
      "learning_rate": 5.052417255852876e-06,
      "loss": 0.0131,
      "step": 1521720
    },
    {
      "epoch": 2.4903608858165915,
      "grad_norm": 0.6267969012260437,
      "learning_rate": 5.05235136363936e-06,
      "loss": 0.0156,
      "step": 1521740
    },
    {
      "epoch": 2.490393616255245,
      "grad_norm": 0.3058580756187439,
      "learning_rate": 5.052285471425842e-06,
      "loss": 0.0121,
      "step": 1521760
    },
    {
      "epoch": 2.4904263466938983,
      "grad_norm": 0.5654870271682739,
      "learning_rate": 5.052219579212325e-06,
      "loss": 0.0229,
      "step": 1521780
    },
    {
      "epoch": 2.490459077132552,
      "grad_norm": 0.20954899489879608,
      "learning_rate": 5.052153686998807e-06,
      "loss": 0.0145,
      "step": 1521800
    },
    {
      "epoch": 2.490491807571205,
      "grad_norm": 0.017957063391804695,
      "learning_rate": 5.052087794785291e-06,
      "loss": 0.0118,
      "step": 1521820
    },
    {
      "epoch": 2.4905245380098586,
      "grad_norm": 1.2319674491882324,
      "learning_rate": 5.052021902571773e-06,
      "loss": 0.0124,
      "step": 1521840
    },
    {
      "epoch": 2.4905572684485118,
      "grad_norm": 1.0183566808700562,
      "learning_rate": 5.051956010358256e-06,
      "loss": 0.0231,
      "step": 1521860
    },
    {
      "epoch": 2.490589998887165,
      "grad_norm": 0.2807876169681549,
      "learning_rate": 5.051890118144739e-06,
      "loss": 0.0102,
      "step": 1521880
    },
    {
      "epoch": 2.4906227293258185,
      "grad_norm": 0.2657032608985901,
      "learning_rate": 5.051824225931222e-06,
      "loss": 0.0141,
      "step": 1521900
    },
    {
      "epoch": 2.4906554597644717,
      "grad_norm": 0.42206525802612305,
      "learning_rate": 5.051758333717705e-06,
      "loss": 0.0127,
      "step": 1521920
    },
    {
      "epoch": 2.4906881902031253,
      "grad_norm": 0.7082493305206299,
      "learning_rate": 5.051692441504188e-06,
      "loss": 0.0171,
      "step": 1521940
    },
    {
      "epoch": 2.4907209206417784,
      "grad_norm": 0.6269046664237976,
      "learning_rate": 5.05162654929067e-06,
      "loss": 0.015,
      "step": 1521960
    },
    {
      "epoch": 2.490753651080432,
      "grad_norm": 0.6889460682868958,
      "learning_rate": 5.051560657077154e-06,
      "loss": 0.0136,
      "step": 1521980
    },
    {
      "epoch": 2.490786381519085,
      "grad_norm": 0.41016408801078796,
      "learning_rate": 5.051494764863637e-06,
      "loss": 0.0151,
      "step": 1522000
    },
    {
      "epoch": 2.4908191119577383,
      "grad_norm": 0.14310479164123535,
      "learning_rate": 5.051428872650119e-06,
      "loss": 0.0103,
      "step": 1522020
    },
    {
      "epoch": 2.490851842396392,
      "grad_norm": 0.46742647886276245,
      "learning_rate": 5.051362980436603e-06,
      "loss": 0.028,
      "step": 1522040
    },
    {
      "epoch": 2.490884572835045,
      "grad_norm": 0.24970796704292297,
      "learning_rate": 5.051297088223085e-06,
      "loss": 0.0096,
      "step": 1522060
    },
    {
      "epoch": 2.4909173032736986,
      "grad_norm": 0.8921671509742737,
      "learning_rate": 5.051231196009568e-06,
      "loss": 0.0173,
      "step": 1522080
    },
    {
      "epoch": 2.490950033712352,
      "grad_norm": 0.3547096252441406,
      "learning_rate": 5.051165303796051e-06,
      "loss": 0.0133,
      "step": 1522100
    },
    {
      "epoch": 2.4909827641510054,
      "grad_norm": 0.3545509874820709,
      "learning_rate": 5.051099411582534e-06,
      "loss": 0.0125,
      "step": 1522120
    },
    {
      "epoch": 2.4910154945896585,
      "grad_norm": 0.15200190246105194,
      "learning_rate": 5.051033519369016e-06,
      "loss": 0.0119,
      "step": 1522140
    },
    {
      "epoch": 2.4910482250283117,
      "grad_norm": 0.21109598875045776,
      "learning_rate": 5.0509676271555e-06,
      "loss": 0.0119,
      "step": 1522160
    },
    {
      "epoch": 2.4910809554669653,
      "grad_norm": 1.3207764625549316,
      "learning_rate": 5.050901734941982e-06,
      "loss": 0.0168,
      "step": 1522180
    },
    {
      "epoch": 2.4911136859056184,
      "grad_norm": 0.2749764025211334,
      "learning_rate": 5.0508358427284655e-06,
      "loss": 0.0118,
      "step": 1522200
    },
    {
      "epoch": 2.491146416344272,
      "grad_norm": 0.30062755942344666,
      "learning_rate": 5.050769950514947e-06,
      "loss": 0.0088,
      "step": 1522220
    },
    {
      "epoch": 2.491179146782925,
      "grad_norm": 0.6720075607299805,
      "learning_rate": 5.050704058301431e-06,
      "loss": 0.0121,
      "step": 1522240
    },
    {
      "epoch": 2.4912118772215783,
      "grad_norm": 0.0981459766626358,
      "learning_rate": 5.050638166087914e-06,
      "loss": 0.0182,
      "step": 1522260
    },
    {
      "epoch": 2.491244607660232,
      "grad_norm": 0.22244969010353088,
      "learning_rate": 5.0505722738743964e-06,
      "loss": 0.0149,
      "step": 1522280
    },
    {
      "epoch": 2.491277338098885,
      "grad_norm": 0.7715522646903992,
      "learning_rate": 5.050506381660879e-06,
      "loss": 0.0106,
      "step": 1522300
    },
    {
      "epoch": 2.4913100685375387,
      "grad_norm": 0.6482614874839783,
      "learning_rate": 5.050440489447363e-06,
      "loss": 0.0179,
      "step": 1522320
    },
    {
      "epoch": 2.491342798976192,
      "grad_norm": 0.43525049090385437,
      "learning_rate": 5.0503745972338455e-06,
      "loss": 0.0144,
      "step": 1522340
    },
    {
      "epoch": 2.491375529414845,
      "grad_norm": 0.41123273968696594,
      "learning_rate": 5.050308705020328e-06,
      "loss": 0.0138,
      "step": 1522360
    },
    {
      "epoch": 2.4914082598534986,
      "grad_norm": 0.16149117052555084,
      "learning_rate": 5.050242812806812e-06,
      "loss": 0.0188,
      "step": 1522380
    },
    {
      "epoch": 2.4914409902921517,
      "grad_norm": 0.32740023732185364,
      "learning_rate": 5.050176920593294e-06,
      "loss": 0.0138,
      "step": 1522400
    },
    {
      "epoch": 2.4914737207308053,
      "grad_norm": 0.05540257692337036,
      "learning_rate": 5.050111028379777e-06,
      "loss": 0.0177,
      "step": 1522420
    },
    {
      "epoch": 2.4915064511694585,
      "grad_norm": 0.7711187601089478,
      "learning_rate": 5.050045136166259e-06,
      "loss": 0.0149,
      "step": 1522440
    },
    {
      "epoch": 2.491539181608112,
      "grad_norm": 0.6351667046546936,
      "learning_rate": 5.049979243952743e-06,
      "loss": 0.024,
      "step": 1522460
    },
    {
      "epoch": 2.491571912046765,
      "grad_norm": 0.24687416851520538,
      "learning_rate": 5.0499133517392255e-06,
      "loss": 0.0121,
      "step": 1522480
    },
    {
      "epoch": 2.4916046424854184,
      "grad_norm": 0.5685129761695862,
      "learning_rate": 5.049847459525708e-06,
      "loss": 0.013,
      "step": 1522500
    },
    {
      "epoch": 2.491637372924072,
      "grad_norm": 0.7358582019805908,
      "learning_rate": 5.049781567312191e-06,
      "loss": 0.0168,
      "step": 1522520
    },
    {
      "epoch": 2.491670103362725,
      "grad_norm": 0.5291295647621155,
      "learning_rate": 5.049715675098675e-06,
      "loss": 0.0136,
      "step": 1522540
    },
    {
      "epoch": 2.4917028338013787,
      "grad_norm": 0.1664079874753952,
      "learning_rate": 5.0496497828851565e-06,
      "loss": 0.0121,
      "step": 1522560
    },
    {
      "epoch": 2.491735564240032,
      "grad_norm": 1.4804060459136963,
      "learning_rate": 5.04958389067164e-06,
      "loss": 0.0131,
      "step": 1522580
    },
    {
      "epoch": 2.4917682946786854,
      "grad_norm": 0.337001234292984,
      "learning_rate": 5.049517998458122e-06,
      "loss": 0.0096,
      "step": 1522600
    },
    {
      "epoch": 2.4918010251173386,
      "grad_norm": 0.1207590103149414,
      "learning_rate": 5.0494521062446056e-06,
      "loss": 0.0149,
      "step": 1522620
    },
    {
      "epoch": 2.4918337555559917,
      "grad_norm": 0.13583911955356598,
      "learning_rate": 5.0493862140310874e-06,
      "loss": 0.0116,
      "step": 1522640
    },
    {
      "epoch": 2.4918664859946453,
      "grad_norm": 0.4830128252506256,
      "learning_rate": 5.049320321817571e-06,
      "loss": 0.0136,
      "step": 1522660
    },
    {
      "epoch": 2.4918992164332985,
      "grad_norm": 0.4739850163459778,
      "learning_rate": 5.049254429604055e-06,
      "loss": 0.0143,
      "step": 1522680
    },
    {
      "epoch": 2.491931946871952,
      "grad_norm": 0.2901013195514679,
      "learning_rate": 5.049188537390537e-06,
      "loss": 0.0149,
      "step": 1522700
    },
    {
      "epoch": 2.4919646773106052,
      "grad_norm": 0.045902445912361145,
      "learning_rate": 5.04912264517702e-06,
      "loss": 0.0119,
      "step": 1522720
    },
    {
      "epoch": 2.491997407749259,
      "grad_norm": 0.38232216238975525,
      "learning_rate": 5.049056752963503e-06,
      "loss": 0.0171,
      "step": 1522740
    },
    {
      "epoch": 2.492030138187912,
      "grad_norm": 0.3069654703140259,
      "learning_rate": 5.048990860749986e-06,
      "loss": 0.0143,
      "step": 1522760
    },
    {
      "epoch": 2.492062868626565,
      "grad_norm": 0.30508071184158325,
      "learning_rate": 5.048924968536468e-06,
      "loss": 0.017,
      "step": 1522780
    },
    {
      "epoch": 2.4920955990652187,
      "grad_norm": 0.9777265191078186,
      "learning_rate": 5.048859076322952e-06,
      "loss": 0.0148,
      "step": 1522800
    },
    {
      "epoch": 2.492128329503872,
      "grad_norm": 0.12425883859395981,
      "learning_rate": 5.048793184109434e-06,
      "loss": 0.0076,
      "step": 1522820
    },
    {
      "epoch": 2.4921610599425255,
      "grad_norm": 0.19376839697360992,
      "learning_rate": 5.048727291895917e-06,
      "loss": 0.0115,
      "step": 1522840
    },
    {
      "epoch": 2.4921937903811786,
      "grad_norm": 0.4020506739616394,
      "learning_rate": 5.048661399682399e-06,
      "loss": 0.0151,
      "step": 1522860
    },
    {
      "epoch": 2.492226520819832,
      "grad_norm": 0.12618450820446014,
      "learning_rate": 5.048595507468883e-06,
      "loss": 0.0136,
      "step": 1522880
    },
    {
      "epoch": 2.4922592512584854,
      "grad_norm": 0.2649247944355011,
      "learning_rate": 5.048529615255366e-06,
      "loss": 0.0178,
      "step": 1522900
    },
    {
      "epoch": 2.4922919816971385,
      "grad_norm": 0.19870822131633759,
      "learning_rate": 5.048463723041848e-06,
      "loss": 0.0171,
      "step": 1522920
    },
    {
      "epoch": 2.492324712135792,
      "grad_norm": 0.28637248277664185,
      "learning_rate": 5.048397830828331e-06,
      "loss": 0.0133,
      "step": 1522940
    },
    {
      "epoch": 2.4923574425744452,
      "grad_norm": 0.4276511073112488,
      "learning_rate": 5.048331938614815e-06,
      "loss": 0.0185,
      "step": 1522960
    },
    {
      "epoch": 2.492390173013099,
      "grad_norm": 0.323731929063797,
      "learning_rate": 5.0482660464012966e-06,
      "loss": 0.0135,
      "step": 1522980
    },
    {
      "epoch": 2.492422903451752,
      "grad_norm": 0.25837239623069763,
      "learning_rate": 5.04820015418778e-06,
      "loss": 0.0146,
      "step": 1523000
    },
    {
      "epoch": 2.4924556338904056,
      "grad_norm": 0.09371203184127808,
      "learning_rate": 5.048134261974262e-06,
      "loss": 0.0124,
      "step": 1523020
    },
    {
      "epoch": 2.4924883643290587,
      "grad_norm": 0.337205171585083,
      "learning_rate": 5.048068369760746e-06,
      "loss": 0.0129,
      "step": 1523040
    },
    {
      "epoch": 2.492521094767712,
      "grad_norm": 0.33843910694122314,
      "learning_rate": 5.048002477547229e-06,
      "loss": 0.0151,
      "step": 1523060
    },
    {
      "epoch": 2.4925538252063655,
      "grad_norm": 0.7872250080108643,
      "learning_rate": 5.047936585333711e-06,
      "loss": 0.013,
      "step": 1523080
    },
    {
      "epoch": 2.4925865556450186,
      "grad_norm": 0.4814052879810333,
      "learning_rate": 5.047870693120195e-06,
      "loss": 0.0114,
      "step": 1523100
    },
    {
      "epoch": 2.4926192860836722,
      "grad_norm": 0.20572921633720398,
      "learning_rate": 5.0478048009066774e-06,
      "loss": 0.0136,
      "step": 1523120
    },
    {
      "epoch": 2.4926520165223254,
      "grad_norm": 0.6284933686256409,
      "learning_rate": 5.04773890869316e-06,
      "loss": 0.0101,
      "step": 1523140
    },
    {
      "epoch": 2.492684746960979,
      "grad_norm": 0.47858086228370667,
      "learning_rate": 5.047673016479643e-06,
      "loss": 0.0106,
      "step": 1523160
    },
    {
      "epoch": 2.492717477399632,
      "grad_norm": 0.11349087953567505,
      "learning_rate": 5.0476071242661265e-06,
      "loss": 0.0142,
      "step": 1523180
    },
    {
      "epoch": 2.4927502078382853,
      "grad_norm": 0.13179506361484528,
      "learning_rate": 5.047541232052608e-06,
      "loss": 0.0092,
      "step": 1523200
    },
    {
      "epoch": 2.492782938276939,
      "grad_norm": 0.5697823762893677,
      "learning_rate": 5.047475339839092e-06,
      "loss": 0.0188,
      "step": 1523220
    },
    {
      "epoch": 2.492815668715592,
      "grad_norm": 0.10761380940675735,
      "learning_rate": 5.047409447625574e-06,
      "loss": 0.0089,
      "step": 1523240
    },
    {
      "epoch": 2.4928483991542456,
      "grad_norm": 0.8064016699790955,
      "learning_rate": 5.0473435554120575e-06,
      "loss": 0.0138,
      "step": 1523260
    },
    {
      "epoch": 2.4928811295928988,
      "grad_norm": 0.365761935710907,
      "learning_rate": 5.04727766319854e-06,
      "loss": 0.0202,
      "step": 1523280
    },
    {
      "epoch": 2.4929138600315524,
      "grad_norm": 0.1890580654144287,
      "learning_rate": 5.047211770985023e-06,
      "loss": 0.0089,
      "step": 1523300
    },
    {
      "epoch": 2.4929465904702055,
      "grad_norm": 0.5147037506103516,
      "learning_rate": 5.047145878771506e-06,
      "loss": 0.0199,
      "step": 1523320
    },
    {
      "epoch": 2.4929793209088587,
      "grad_norm": 0.3627170920372009,
      "learning_rate": 5.047079986557989e-06,
      "loss": 0.0156,
      "step": 1523340
    },
    {
      "epoch": 2.4930120513475122,
      "grad_norm": 0.191966250538826,
      "learning_rate": 5.047014094344471e-06,
      "loss": 0.0157,
      "step": 1523360
    },
    {
      "epoch": 2.4930447817861654,
      "grad_norm": 0.46221044659614563,
      "learning_rate": 5.046948202130955e-06,
      "loss": 0.0197,
      "step": 1523380
    },
    {
      "epoch": 2.493077512224819,
      "grad_norm": 0.5001159310340881,
      "learning_rate": 5.046882309917438e-06,
      "loss": 0.0172,
      "step": 1523400
    },
    {
      "epoch": 2.493110242663472,
      "grad_norm": 0.16952133178710938,
      "learning_rate": 5.04681641770392e-06,
      "loss": 0.0111,
      "step": 1523420
    },
    {
      "epoch": 2.4931429731021257,
      "grad_norm": 0.27441534399986267,
      "learning_rate": 5.046750525490404e-06,
      "loss": 0.0181,
      "step": 1523440
    },
    {
      "epoch": 2.493175703540779,
      "grad_norm": 0.47911274433135986,
      "learning_rate": 5.046684633276886e-06,
      "loss": 0.013,
      "step": 1523460
    },
    {
      "epoch": 2.493208433979432,
      "grad_norm": 0.2865268290042877,
      "learning_rate": 5.046618741063369e-06,
      "loss": 0.0123,
      "step": 1523480
    },
    {
      "epoch": 2.4932411644180856,
      "grad_norm": 0.4811716377735138,
      "learning_rate": 5.046552848849852e-06,
      "loss": 0.0152,
      "step": 1523500
    },
    {
      "epoch": 2.493273894856739,
      "grad_norm": 0.3993055820465088,
      "learning_rate": 5.046486956636335e-06,
      "loss": 0.0183,
      "step": 1523520
    },
    {
      "epoch": 2.4933066252953924,
      "grad_norm": 0.2154497504234314,
      "learning_rate": 5.0464210644228175e-06,
      "loss": 0.0141,
      "step": 1523540
    },
    {
      "epoch": 2.4933393557340455,
      "grad_norm": 0.1751723438501358,
      "learning_rate": 5.046355172209301e-06,
      "loss": 0.0134,
      "step": 1523560
    },
    {
      "epoch": 2.493372086172699,
      "grad_norm": 0.5832939147949219,
      "learning_rate": 5.046289279995783e-06,
      "loss": 0.0199,
      "step": 1523580
    },
    {
      "epoch": 2.4934048166113523,
      "grad_norm": 0.3381110727787018,
      "learning_rate": 5.0462233877822666e-06,
      "loss": 0.0132,
      "step": 1523600
    },
    {
      "epoch": 2.4934375470500054,
      "grad_norm": 0.13636191189289093,
      "learning_rate": 5.0461574955687485e-06,
      "loss": 0.0103,
      "step": 1523620
    },
    {
      "epoch": 2.493470277488659,
      "grad_norm": 0.21974167227745056,
      "learning_rate": 5.046091603355232e-06,
      "loss": 0.0161,
      "step": 1523640
    },
    {
      "epoch": 2.493503007927312,
      "grad_norm": 1.3627221584320068,
      "learning_rate": 5.046025711141714e-06,
      "loss": 0.0163,
      "step": 1523660
    },
    {
      "epoch": 2.4935357383659658,
      "grad_norm": 0.1924808770418167,
      "learning_rate": 5.0459598189281975e-06,
      "loss": 0.0167,
      "step": 1523680
    },
    {
      "epoch": 2.493568468804619,
      "grad_norm": 0.13484978675842285,
      "learning_rate": 5.04589392671468e-06,
      "loss": 0.0123,
      "step": 1523700
    },
    {
      "epoch": 2.4936011992432725,
      "grad_norm": 0.5877142548561096,
      "learning_rate": 5.045828034501163e-06,
      "loss": 0.0201,
      "step": 1523720
    },
    {
      "epoch": 2.4936339296819257,
      "grad_norm": 0.3460428714752197,
      "learning_rate": 5.045762142287647e-06,
      "loss": 0.0154,
      "step": 1523740
    },
    {
      "epoch": 2.493666660120579,
      "grad_norm": 1.7899128198623657,
      "learning_rate": 5.045696250074129e-06,
      "loss": 0.0128,
      "step": 1523760
    },
    {
      "epoch": 2.4936993905592324,
      "grad_norm": 0.4276715815067291,
      "learning_rate": 5.045630357860613e-06,
      "loss": 0.0171,
      "step": 1523780
    },
    {
      "epoch": 2.4937321209978855,
      "grad_norm": 0.09295818209648132,
      "learning_rate": 5.045564465647095e-06,
      "loss": 0.0124,
      "step": 1523800
    },
    {
      "epoch": 2.493764851436539,
      "grad_norm": 0.28642529249191284,
      "learning_rate": 5.045498573433578e-06,
      "loss": 0.0154,
      "step": 1523820
    },
    {
      "epoch": 2.4937975818751923,
      "grad_norm": 0.622572660446167,
      "learning_rate": 5.04543268122006e-06,
      "loss": 0.0169,
      "step": 1523840
    },
    {
      "epoch": 2.4938303123138454,
      "grad_norm": 0.3229495584964752,
      "learning_rate": 5.045366789006544e-06,
      "loss": 0.0108,
      "step": 1523860
    },
    {
      "epoch": 2.493863042752499,
      "grad_norm": 0.27527666091918945,
      "learning_rate": 5.045300896793026e-06,
      "loss": 0.02,
      "step": 1523880
    },
    {
      "epoch": 2.493895773191152,
      "grad_norm": 0.4353014826774597,
      "learning_rate": 5.045235004579509e-06,
      "loss": 0.0116,
      "step": 1523900
    },
    {
      "epoch": 2.493928503629806,
      "grad_norm": 0.35655662417411804,
      "learning_rate": 5.045169112365992e-06,
      "loss": 0.0186,
      "step": 1523920
    },
    {
      "epoch": 2.493961234068459,
      "grad_norm": 0.8123573660850525,
      "learning_rate": 5.045103220152475e-06,
      "loss": 0.0182,
      "step": 1523940
    },
    {
      "epoch": 2.493993964507112,
      "grad_norm": 0.18143287301063538,
      "learning_rate": 5.045037327938958e-06,
      "loss": 0.0097,
      "step": 1523960
    },
    {
      "epoch": 2.4940266949457657,
      "grad_norm": 0.3233373165130615,
      "learning_rate": 5.044971435725441e-06,
      "loss": 0.0131,
      "step": 1523980
    },
    {
      "epoch": 2.494059425384419,
      "grad_norm": 0.23789246380329132,
      "learning_rate": 5.044905543511923e-06,
      "loss": 0.0151,
      "step": 1524000
    },
    {
      "epoch": 2.4940921558230724,
      "grad_norm": 0.2359628975391388,
      "learning_rate": 5.044839651298407e-06,
      "loss": 0.0142,
      "step": 1524020
    },
    {
      "epoch": 2.4941248862617256,
      "grad_norm": 0.5893222689628601,
      "learning_rate": 5.0447737590848885e-06,
      "loss": 0.0141,
      "step": 1524040
    },
    {
      "epoch": 2.494157616700379,
      "grad_norm": 0.19147436320781708,
      "learning_rate": 5.044707866871372e-06,
      "loss": 0.0103,
      "step": 1524060
    },
    {
      "epoch": 2.4941903471390323,
      "grad_norm": 0.4547511339187622,
      "learning_rate": 5.044641974657855e-06,
      "loss": 0.0105,
      "step": 1524080
    },
    {
      "epoch": 2.4942230775776855,
      "grad_norm": 0.08753052353858948,
      "learning_rate": 5.044576082444338e-06,
      "loss": 0.0123,
      "step": 1524100
    },
    {
      "epoch": 2.494255808016339,
      "grad_norm": 0.3759630620479584,
      "learning_rate": 5.044510190230821e-06,
      "loss": 0.0087,
      "step": 1524120
    },
    {
      "epoch": 2.494288538454992,
      "grad_norm": 0.45594680309295654,
      "learning_rate": 5.044444298017304e-06,
      "loss": 0.0157,
      "step": 1524140
    },
    {
      "epoch": 2.494321268893646,
      "grad_norm": 0.5411773324012756,
      "learning_rate": 5.044378405803787e-06,
      "loss": 0.0159,
      "step": 1524160
    },
    {
      "epoch": 2.494353999332299,
      "grad_norm": 0.4619766175746918,
      "learning_rate": 5.044312513590269e-06,
      "loss": 0.0202,
      "step": 1524180
    },
    {
      "epoch": 2.4943867297709525,
      "grad_norm": 0.9005244970321655,
      "learning_rate": 5.044246621376753e-06,
      "loss": 0.0107,
      "step": 1524200
    },
    {
      "epoch": 2.4944194602096057,
      "grad_norm": 0.28328046202659607,
      "learning_rate": 5.044180729163235e-06,
      "loss": 0.0117,
      "step": 1524220
    },
    {
      "epoch": 2.494452190648259,
      "grad_norm": 0.4845869839191437,
      "learning_rate": 5.0441148369497185e-06,
      "loss": 0.0195,
      "step": 1524240
    },
    {
      "epoch": 2.4944849210869124,
      "grad_norm": 0.3996201753616333,
      "learning_rate": 5.0440489447362e-06,
      "loss": 0.0121,
      "step": 1524260
    },
    {
      "epoch": 2.4945176515255656,
      "grad_norm": 0.12187550216913223,
      "learning_rate": 5.043983052522684e-06,
      "loss": 0.0145,
      "step": 1524280
    },
    {
      "epoch": 2.494550381964219,
      "grad_norm": 0.32963696122169495,
      "learning_rate": 5.043917160309167e-06,
      "loss": 0.0138,
      "step": 1524300
    },
    {
      "epoch": 2.4945831124028723,
      "grad_norm": 0.3238133490085602,
      "learning_rate": 5.0438512680956494e-06,
      "loss": 0.0122,
      "step": 1524320
    },
    {
      "epoch": 2.494615842841526,
      "grad_norm": 0.930330753326416,
      "learning_rate": 5.043785375882132e-06,
      "loss": 0.0187,
      "step": 1524340
    },
    {
      "epoch": 2.494648573280179,
      "grad_norm": 0.1865488439798355,
      "learning_rate": 5.043719483668616e-06,
      "loss": 0.0159,
      "step": 1524360
    },
    {
      "epoch": 2.4946813037188322,
      "grad_norm": 0.23031871020793915,
      "learning_rate": 5.043653591455098e-06,
      "loss": 0.0141,
      "step": 1524380
    },
    {
      "epoch": 2.494714034157486,
      "grad_norm": 1.0157874822616577,
      "learning_rate": 5.043587699241581e-06,
      "loss": 0.0209,
      "step": 1524400
    },
    {
      "epoch": 2.494746764596139,
      "grad_norm": 0.2656714618206024,
      "learning_rate": 5.043521807028063e-06,
      "loss": 0.014,
      "step": 1524420
    },
    {
      "epoch": 2.4947794950347926,
      "grad_norm": 0.36386966705322266,
      "learning_rate": 5.043455914814547e-06,
      "loss": 0.0188,
      "step": 1524440
    },
    {
      "epoch": 2.4948122254734457,
      "grad_norm": 0.17800822854042053,
      "learning_rate": 5.04339002260103e-06,
      "loss": 0.0167,
      "step": 1524460
    },
    {
      "epoch": 2.4948449559120993,
      "grad_norm": 0.4378776550292969,
      "learning_rate": 5.043324130387512e-06,
      "loss": 0.014,
      "step": 1524480
    },
    {
      "epoch": 2.4948776863507525,
      "grad_norm": 0.16966480016708374,
      "learning_rate": 5.043258238173996e-06,
      "loss": 0.0152,
      "step": 1524500
    },
    {
      "epoch": 2.4949104167894056,
      "grad_norm": 0.42870280146598816,
      "learning_rate": 5.0431923459604785e-06,
      "loss": 0.0157,
      "step": 1524520
    },
    {
      "epoch": 2.494943147228059,
      "grad_norm": 0.2857133746147156,
      "learning_rate": 5.043126453746961e-06,
      "loss": 0.0113,
      "step": 1524540
    },
    {
      "epoch": 2.4949758776667124,
      "grad_norm": 0.292957067489624,
      "learning_rate": 5.043060561533444e-06,
      "loss": 0.0108,
      "step": 1524560
    },
    {
      "epoch": 2.495008608105366,
      "grad_norm": 0.36455708742141724,
      "learning_rate": 5.042994669319928e-06,
      "loss": 0.0174,
      "step": 1524580
    },
    {
      "epoch": 2.495041338544019,
      "grad_norm": 0.18570370972156525,
      "learning_rate": 5.0429287771064095e-06,
      "loss": 0.0152,
      "step": 1524600
    },
    {
      "epoch": 2.4950740689826727,
      "grad_norm": 0.20026202499866486,
      "learning_rate": 5.042862884892893e-06,
      "loss": 0.0142,
      "step": 1524620
    },
    {
      "epoch": 2.495106799421326,
      "grad_norm": 0.1221921369433403,
      "learning_rate": 5.042796992679375e-06,
      "loss": 0.0153,
      "step": 1524640
    },
    {
      "epoch": 2.495139529859979,
      "grad_norm": 0.05747731029987335,
      "learning_rate": 5.0427311004658586e-06,
      "loss": 0.0161,
      "step": 1524660
    },
    {
      "epoch": 2.4951722602986326,
      "grad_norm": 0.5240600109100342,
      "learning_rate": 5.0426652082523404e-06,
      "loss": 0.0145,
      "step": 1524680
    },
    {
      "epoch": 2.4952049907372857,
      "grad_norm": 0.34893620014190674,
      "learning_rate": 5.042599316038824e-06,
      "loss": 0.0154,
      "step": 1524700
    },
    {
      "epoch": 2.4952377211759393,
      "grad_norm": 0.5724860429763794,
      "learning_rate": 5.042533423825307e-06,
      "loss": 0.0094,
      "step": 1524720
    },
    {
      "epoch": 2.4952704516145925,
      "grad_norm": 0.947826623916626,
      "learning_rate": 5.0424675316117895e-06,
      "loss": 0.0248,
      "step": 1524740
    },
    {
      "epoch": 2.495303182053246,
      "grad_norm": 0.1303083449602127,
      "learning_rate": 5.042401639398272e-06,
      "loss": 0.0135,
      "step": 1524760
    },
    {
      "epoch": 2.4953359124918992,
      "grad_norm": 0.9833200573921204,
      "learning_rate": 5.042335747184756e-06,
      "loss": 0.013,
      "step": 1524780
    },
    {
      "epoch": 2.4953686429305524,
      "grad_norm": 0.7269933223724365,
      "learning_rate": 5.042269854971239e-06,
      "loss": 0.0169,
      "step": 1524800
    },
    {
      "epoch": 2.495401373369206,
      "grad_norm": 0.5661290287971497,
      "learning_rate": 5.042203962757721e-06,
      "loss": 0.0137,
      "step": 1524820
    },
    {
      "epoch": 2.495434103807859,
      "grad_norm": 0.5477557182312012,
      "learning_rate": 5.042138070544205e-06,
      "loss": 0.0129,
      "step": 1524840
    },
    {
      "epoch": 2.4954668342465127,
      "grad_norm": 0.2391759753227234,
      "learning_rate": 5.042072178330687e-06,
      "loss": 0.015,
      "step": 1524860
    },
    {
      "epoch": 2.495499564685166,
      "grad_norm": 0.4618513286113739,
      "learning_rate": 5.04200628611717e-06,
      "loss": 0.0176,
      "step": 1524880
    },
    {
      "epoch": 2.4955322951238195,
      "grad_norm": 0.3508376181125641,
      "learning_rate": 5.041940393903652e-06,
      "loss": 0.0151,
      "step": 1524900
    },
    {
      "epoch": 2.4955650255624726,
      "grad_norm": 1.3359829187393188,
      "learning_rate": 5.041874501690136e-06,
      "loss": 0.011,
      "step": 1524920
    },
    {
      "epoch": 2.4955977560011258,
      "grad_norm": 1.2688980102539062,
      "learning_rate": 5.041808609476619e-06,
      "loss": 0.0119,
      "step": 1524940
    },
    {
      "epoch": 2.4956304864397794,
      "grad_norm": 0.167420893907547,
      "learning_rate": 5.041742717263101e-06,
      "loss": 0.0132,
      "step": 1524960
    },
    {
      "epoch": 2.4956632168784325,
      "grad_norm": 0.5894630551338196,
      "learning_rate": 5.041676825049584e-06,
      "loss": 0.0093,
      "step": 1524980
    },
    {
      "epoch": 2.495695947317086,
      "grad_norm": 0.22508983314037323,
      "learning_rate": 5.041610932836068e-06,
      "loss": 0.0111,
      "step": 1525000
    },
    {
      "epoch": 2.4957286777557393,
      "grad_norm": 1.3459796905517578,
      "learning_rate": 5.0415450406225496e-06,
      "loss": 0.0218,
      "step": 1525020
    },
    {
      "epoch": 2.495761408194393,
      "grad_norm": 0.5038937330245972,
      "learning_rate": 5.041479148409033e-06,
      "loss": 0.0139,
      "step": 1525040
    },
    {
      "epoch": 2.495794138633046,
      "grad_norm": 0.12623834609985352,
      "learning_rate": 5.041413256195515e-06,
      "loss": 0.0182,
      "step": 1525060
    },
    {
      "epoch": 2.495826869071699,
      "grad_norm": 0.49027910828590393,
      "learning_rate": 5.041347363981999e-06,
      "loss": 0.0154,
      "step": 1525080
    },
    {
      "epoch": 2.4958595995103527,
      "grad_norm": 0.39482641220092773,
      "learning_rate": 5.041281471768481e-06,
      "loss": 0.0166,
      "step": 1525100
    },
    {
      "epoch": 2.495892329949006,
      "grad_norm": 0.13216546177864075,
      "learning_rate": 5.041215579554964e-06,
      "loss": 0.0088,
      "step": 1525120
    },
    {
      "epoch": 2.4959250603876595,
      "grad_norm": 0.4474811553955078,
      "learning_rate": 5.041149687341448e-06,
      "loss": 0.0124,
      "step": 1525140
    },
    {
      "epoch": 2.4959577908263126,
      "grad_norm": 0.06883813440799713,
      "learning_rate": 5.0410837951279304e-06,
      "loss": 0.0171,
      "step": 1525160
    },
    {
      "epoch": 2.4959905212649662,
      "grad_norm": 0.321715772151947,
      "learning_rate": 5.041017902914413e-06,
      "loss": 0.0182,
      "step": 1525180
    },
    {
      "epoch": 2.4960232517036194,
      "grad_norm": 0.354587584733963,
      "learning_rate": 5.040952010700896e-06,
      "loss": 0.013,
      "step": 1525200
    },
    {
      "epoch": 2.4960559821422725,
      "grad_norm": 0.5729660987854004,
      "learning_rate": 5.0408861184873795e-06,
      "loss": 0.0122,
      "step": 1525220
    },
    {
      "epoch": 2.496088712580926,
      "grad_norm": 0.2053656131029129,
      "learning_rate": 5.040820226273861e-06,
      "loss": 0.0158,
      "step": 1525240
    },
    {
      "epoch": 2.4961214430195793,
      "grad_norm": 2.3896076679229736,
      "learning_rate": 5.040754334060345e-06,
      "loss": 0.015,
      "step": 1525260
    },
    {
      "epoch": 2.496154173458233,
      "grad_norm": 1.1842290163040161,
      "learning_rate": 5.040688441846827e-06,
      "loss": 0.0149,
      "step": 1525280
    },
    {
      "epoch": 2.496186903896886,
      "grad_norm": 0.2582560181617737,
      "learning_rate": 5.0406225496333105e-06,
      "loss": 0.0148,
      "step": 1525300
    },
    {
      "epoch": 2.496219634335539,
      "grad_norm": 0.3892965614795685,
      "learning_rate": 5.040556657419793e-06,
      "loss": 0.0162,
      "step": 1525320
    },
    {
      "epoch": 2.4962523647741928,
      "grad_norm": 0.21057820320129395,
      "learning_rate": 5.040490765206276e-06,
      "loss": 0.0145,
      "step": 1525340
    },
    {
      "epoch": 2.496285095212846,
      "grad_norm": 0.4859565794467926,
      "learning_rate": 5.040424872992759e-06,
      "loss": 0.013,
      "step": 1525360
    },
    {
      "epoch": 2.4963178256514995,
      "grad_norm": 0.22526058554649353,
      "learning_rate": 5.040358980779242e-06,
      "loss": 0.0131,
      "step": 1525380
    },
    {
      "epoch": 2.4963505560901527,
      "grad_norm": 0.3408665359020233,
      "learning_rate": 5.040293088565724e-06,
      "loss": 0.012,
      "step": 1525400
    },
    {
      "epoch": 2.496383286528806,
      "grad_norm": 0.4755767285823822,
      "learning_rate": 5.040227196352208e-06,
      "loss": 0.0118,
      "step": 1525420
    },
    {
      "epoch": 2.4964160169674594,
      "grad_norm": 0.8597114086151123,
      "learning_rate": 5.04016130413869e-06,
      "loss": 0.0179,
      "step": 1525440
    },
    {
      "epoch": 2.4964487474061126,
      "grad_norm": 0.34818604588508606,
      "learning_rate": 5.040095411925173e-06,
      "loss": 0.0114,
      "step": 1525460
    },
    {
      "epoch": 2.496481477844766,
      "grad_norm": 0.3829476833343506,
      "learning_rate": 5.040029519711656e-06,
      "loss": 0.0139,
      "step": 1525480
    },
    {
      "epoch": 2.4965142082834193,
      "grad_norm": 0.13978633284568787,
      "learning_rate": 5.039963627498139e-06,
      "loss": 0.0258,
      "step": 1525500
    },
    {
      "epoch": 2.496546938722073,
      "grad_norm": 1.624496340751648,
      "learning_rate": 5.039897735284622e-06,
      "loss": 0.0244,
      "step": 1525520
    },
    {
      "epoch": 2.496579669160726,
      "grad_norm": 0.25560909509658813,
      "learning_rate": 5.039831843071105e-06,
      "loss": 0.015,
      "step": 1525540
    },
    {
      "epoch": 2.496612399599379,
      "grad_norm": 0.3699031472206116,
      "learning_rate": 5.039765950857588e-06,
      "loss": 0.0097,
      "step": 1525560
    },
    {
      "epoch": 2.496645130038033,
      "grad_norm": 0.1735304892063141,
      "learning_rate": 5.0397000586440705e-06,
      "loss": 0.0159,
      "step": 1525580
    },
    {
      "epoch": 2.496677860476686,
      "grad_norm": 0.6571769714355469,
      "learning_rate": 5.039634166430554e-06,
      "loss": 0.0146,
      "step": 1525600
    },
    {
      "epoch": 2.4967105909153395,
      "grad_norm": 0.19231005012989044,
      "learning_rate": 5.039568274217036e-06,
      "loss": 0.0128,
      "step": 1525620
    },
    {
      "epoch": 2.4967433213539927,
      "grad_norm": 0.829554557800293,
      "learning_rate": 5.03950238200352e-06,
      "loss": 0.0204,
      "step": 1525640
    },
    {
      "epoch": 2.4967760517926463,
      "grad_norm": 0.32365313172340393,
      "learning_rate": 5.0394364897900015e-06,
      "loss": 0.0131,
      "step": 1525660
    },
    {
      "epoch": 2.4968087822312994,
      "grad_norm": 0.1683623045682907,
      "learning_rate": 5.039370597576485e-06,
      "loss": 0.018,
      "step": 1525680
    },
    {
      "epoch": 2.4968415126699526,
      "grad_norm": 0.6117165088653564,
      "learning_rate": 5.039304705362967e-06,
      "loss": 0.0097,
      "step": 1525700
    },
    {
      "epoch": 2.496874243108606,
      "grad_norm": 0.3014049828052521,
      "learning_rate": 5.0392388131494505e-06,
      "loss": 0.0134,
      "step": 1525720
    },
    {
      "epoch": 2.4969069735472593,
      "grad_norm": 0.19610214233398438,
      "learning_rate": 5.039172920935933e-06,
      "loss": 0.0169,
      "step": 1525740
    },
    {
      "epoch": 2.496939703985913,
      "grad_norm": 0.5865617394447327,
      "learning_rate": 5.039107028722416e-06,
      "loss": 0.0105,
      "step": 1525760
    },
    {
      "epoch": 2.496972434424566,
      "grad_norm": 1.274522066116333,
      "learning_rate": 5.039041136508899e-06,
      "loss": 0.0159,
      "step": 1525780
    },
    {
      "epoch": 2.4970051648632197,
      "grad_norm": 0.16246861219406128,
      "learning_rate": 5.038975244295382e-06,
      "loss": 0.01,
      "step": 1525800
    },
    {
      "epoch": 2.497037895301873,
      "grad_norm": 0.9393083453178406,
      "learning_rate": 5.038909352081864e-06,
      "loss": 0.0155,
      "step": 1525820
    },
    {
      "epoch": 2.497070625740526,
      "grad_norm": 0.37212425470352173,
      "learning_rate": 5.038843459868348e-06,
      "loss": 0.0102,
      "step": 1525840
    },
    {
      "epoch": 2.4971033561791796,
      "grad_norm": 0.03515749052166939,
      "learning_rate": 5.038777567654831e-06,
      "loss": 0.0097,
      "step": 1525860
    },
    {
      "epoch": 2.4971360866178327,
      "grad_norm": 0.24649783968925476,
      "learning_rate": 5.038711675441313e-06,
      "loss": 0.0217,
      "step": 1525880
    },
    {
      "epoch": 2.4971688170564863,
      "grad_norm": 0.46337154507637024,
      "learning_rate": 5.038645783227797e-06,
      "loss": 0.0165,
      "step": 1525900
    },
    {
      "epoch": 2.4972015474951395,
      "grad_norm": 0.9953304529190063,
      "learning_rate": 5.038579891014279e-06,
      "loss": 0.0232,
      "step": 1525920
    },
    {
      "epoch": 2.497234277933793,
      "grad_norm": 0.1661732792854309,
      "learning_rate": 5.038513998800762e-06,
      "loss": 0.0087,
      "step": 1525940
    },
    {
      "epoch": 2.497267008372446,
      "grad_norm": 0.5550546646118164,
      "learning_rate": 5.038448106587245e-06,
      "loss": 0.0145,
      "step": 1525960
    },
    {
      "epoch": 2.4972997388110993,
      "grad_norm": 0.6691697239875793,
      "learning_rate": 5.038382214373728e-06,
      "loss": 0.0206,
      "step": 1525980
    },
    {
      "epoch": 2.497332469249753,
      "grad_norm": 0.4805842339992523,
      "learning_rate": 5.038316322160211e-06,
      "loss": 0.0139,
      "step": 1526000
    },
    {
      "epoch": 2.497365199688406,
      "grad_norm": 0.19438885152339935,
      "learning_rate": 5.038250429946694e-06,
      "loss": 0.0106,
      "step": 1526020
    },
    {
      "epoch": 2.4973979301270597,
      "grad_norm": 1.2980984449386597,
      "learning_rate": 5.038184537733176e-06,
      "loss": 0.0181,
      "step": 1526040
    },
    {
      "epoch": 2.497430660565713,
      "grad_norm": 0.8828065991401672,
      "learning_rate": 5.03811864551966e-06,
      "loss": 0.0088,
      "step": 1526060
    },
    {
      "epoch": 2.4974633910043664,
      "grad_norm": 0.5634393692016602,
      "learning_rate": 5.0380527533061415e-06,
      "loss": 0.0101,
      "step": 1526080
    },
    {
      "epoch": 2.4974961214430196,
      "grad_norm": 0.831084132194519,
      "learning_rate": 5.037986861092625e-06,
      "loss": 0.0142,
      "step": 1526100
    },
    {
      "epoch": 2.4975288518816727,
      "grad_norm": 0.290580153465271,
      "learning_rate": 5.037920968879108e-06,
      "loss": 0.0128,
      "step": 1526120
    },
    {
      "epoch": 2.4975615823203263,
      "grad_norm": 0.20378008484840393,
      "learning_rate": 5.037855076665591e-06,
      "loss": 0.0131,
      "step": 1526140
    },
    {
      "epoch": 2.4975943127589795,
      "grad_norm": 0.2531835734844208,
      "learning_rate": 5.037789184452073e-06,
      "loss": 0.0176,
      "step": 1526160
    },
    {
      "epoch": 2.497627043197633,
      "grad_norm": 0.27274224162101746,
      "learning_rate": 5.037723292238557e-06,
      "loss": 0.0101,
      "step": 1526180
    },
    {
      "epoch": 2.497659773636286,
      "grad_norm": 0.5088375806808472,
      "learning_rate": 5.03765740002504e-06,
      "loss": 0.0275,
      "step": 1526200
    },
    {
      "epoch": 2.49769250407494,
      "grad_norm": 0.12038976699113846,
      "learning_rate": 5.037591507811522e-06,
      "loss": 0.0112,
      "step": 1526220
    },
    {
      "epoch": 2.497725234513593,
      "grad_norm": 0.8566713333129883,
      "learning_rate": 5.037525615598006e-06,
      "loss": 0.017,
      "step": 1526240
    },
    {
      "epoch": 2.497757964952246,
      "grad_norm": 0.4679698646068573,
      "learning_rate": 5.037459723384488e-06,
      "loss": 0.0122,
      "step": 1526260
    },
    {
      "epoch": 2.4977906953908997,
      "grad_norm": 0.6476978659629822,
      "learning_rate": 5.0373938311709715e-06,
      "loss": 0.0128,
      "step": 1526280
    },
    {
      "epoch": 2.497823425829553,
      "grad_norm": 0.9270473122596741,
      "learning_rate": 5.037327938957453e-06,
      "loss": 0.0112,
      "step": 1526300
    },
    {
      "epoch": 2.4978561562682065,
      "grad_norm": 0.46419110894203186,
      "learning_rate": 5.037262046743937e-06,
      "loss": 0.015,
      "step": 1526320
    },
    {
      "epoch": 2.4978888867068596,
      "grad_norm": 0.5915616154670715,
      "learning_rate": 5.03719615453042e-06,
      "loss": 0.0147,
      "step": 1526340
    },
    {
      "epoch": 2.497921617145513,
      "grad_norm": 0.7677282691001892,
      "learning_rate": 5.0371302623169024e-06,
      "loss": 0.0155,
      "step": 1526360
    },
    {
      "epoch": 2.4979543475841663,
      "grad_norm": 0.23519288003444672,
      "learning_rate": 5.037064370103385e-06,
      "loss": 0.0167,
      "step": 1526380
    },
    {
      "epoch": 2.4979870780228195,
      "grad_norm": 0.17236313223838806,
      "learning_rate": 5.036998477889869e-06,
      "loss": 0.0111,
      "step": 1526400
    },
    {
      "epoch": 2.498019808461473,
      "grad_norm": 0.3813358545303345,
      "learning_rate": 5.036932585676351e-06,
      "loss": 0.0155,
      "step": 1526420
    },
    {
      "epoch": 2.4980525389001262,
      "grad_norm": 0.4304145872592926,
      "learning_rate": 5.036866693462834e-06,
      "loss": 0.0096,
      "step": 1526440
    },
    {
      "epoch": 2.49808526933878,
      "grad_norm": 0.2081114500761032,
      "learning_rate": 5.036800801249316e-06,
      "loss": 0.0189,
      "step": 1526460
    },
    {
      "epoch": 2.498117999777433,
      "grad_norm": 0.31677839159965515,
      "learning_rate": 5.0367349090358e-06,
      "loss": 0.0112,
      "step": 1526480
    },
    {
      "epoch": 2.4981507302160866,
      "grad_norm": 0.5141187310218811,
      "learning_rate": 5.036669016822282e-06,
      "loss": 0.0234,
      "step": 1526500
    },
    {
      "epoch": 2.4981834606547397,
      "grad_norm": 0.6674643158912659,
      "learning_rate": 5.036603124608765e-06,
      "loss": 0.0134,
      "step": 1526520
    },
    {
      "epoch": 2.498216191093393,
      "grad_norm": 0.5520368218421936,
      "learning_rate": 5.036537232395248e-06,
      "loss": 0.0137,
      "step": 1526540
    },
    {
      "epoch": 2.4982489215320465,
      "grad_norm": 0.506642758846283,
      "learning_rate": 5.0364713401817315e-06,
      "loss": 0.0163,
      "step": 1526560
    },
    {
      "epoch": 2.4982816519706996,
      "grad_norm": 1.6162910461425781,
      "learning_rate": 5.036405447968214e-06,
      "loss": 0.0126,
      "step": 1526580
    },
    {
      "epoch": 2.498314382409353,
      "grad_norm": 0.23822349309921265,
      "learning_rate": 5.036339555754697e-06,
      "loss": 0.013,
      "step": 1526600
    },
    {
      "epoch": 2.4983471128480064,
      "grad_norm": 0.17176006734371185,
      "learning_rate": 5.036273663541181e-06,
      "loss": 0.0101,
      "step": 1526620
    },
    {
      "epoch": 2.49837984328666,
      "grad_norm": 0.3567385673522949,
      "learning_rate": 5.0362077713276625e-06,
      "loss": 0.0108,
      "step": 1526640
    },
    {
      "epoch": 2.498412573725313,
      "grad_norm": 0.2018091380596161,
      "learning_rate": 5.036141879114146e-06,
      "loss": 0.0126,
      "step": 1526660
    },
    {
      "epoch": 2.4984453041639663,
      "grad_norm": 0.2592713236808777,
      "learning_rate": 5.036075986900628e-06,
      "loss": 0.0134,
      "step": 1526680
    },
    {
      "epoch": 2.49847803460262,
      "grad_norm": 0.27275216579437256,
      "learning_rate": 5.0360100946871116e-06,
      "loss": 0.0115,
      "step": 1526700
    },
    {
      "epoch": 2.498510765041273,
      "grad_norm": 0.05091347172856331,
      "learning_rate": 5.0359442024735935e-06,
      "loss": 0.0169,
      "step": 1526720
    },
    {
      "epoch": 2.4985434954799266,
      "grad_norm": 0.1458747684955597,
      "learning_rate": 5.035878310260077e-06,
      "loss": 0.0135,
      "step": 1526740
    },
    {
      "epoch": 2.4985762259185798,
      "grad_norm": 0.20885907113552094,
      "learning_rate": 5.03581241804656e-06,
      "loss": 0.015,
      "step": 1526760
    },
    {
      "epoch": 2.4986089563572333,
      "grad_norm": 0.818625271320343,
      "learning_rate": 5.0357465258330425e-06,
      "loss": 0.0181,
      "step": 1526780
    },
    {
      "epoch": 2.4986416867958865,
      "grad_norm": 0.13851667940616608,
      "learning_rate": 5.035680633619525e-06,
      "loss": 0.009,
      "step": 1526800
    },
    {
      "epoch": 2.4986744172345396,
      "grad_norm": 0.45300403237342834,
      "learning_rate": 5.035614741406009e-06,
      "loss": 0.0219,
      "step": 1526820
    },
    {
      "epoch": 2.4987071476731932,
      "grad_norm": 0.28177231550216675,
      "learning_rate": 5.035548849192491e-06,
      "loss": 0.0133,
      "step": 1526840
    },
    {
      "epoch": 2.4987398781118464,
      "grad_norm": 0.8104566931724548,
      "learning_rate": 5.035482956978974e-06,
      "loss": 0.0186,
      "step": 1526860
    },
    {
      "epoch": 2.4987726085505,
      "grad_norm": 0.400127649307251,
      "learning_rate": 5.035417064765456e-06,
      "loss": 0.0161,
      "step": 1526880
    },
    {
      "epoch": 2.498805338989153,
      "grad_norm": 0.5515926480293274,
      "learning_rate": 5.03535117255194e-06,
      "loss": 0.0218,
      "step": 1526900
    },
    {
      "epoch": 2.4988380694278063,
      "grad_norm": 0.6883507370948792,
      "learning_rate": 5.035285280338423e-06,
      "loss": 0.0192,
      "step": 1526920
    },
    {
      "epoch": 2.49887079986646,
      "grad_norm": 0.2316896915435791,
      "learning_rate": 5.035219388124905e-06,
      "loss": 0.0133,
      "step": 1526940
    },
    {
      "epoch": 2.498903530305113,
      "grad_norm": 0.4022872745990753,
      "learning_rate": 5.035153495911389e-06,
      "loss": 0.0135,
      "step": 1526960
    },
    {
      "epoch": 2.4989362607437666,
      "grad_norm": 1.1156457662582397,
      "learning_rate": 5.035087603697872e-06,
      "loss": 0.0137,
      "step": 1526980
    },
    {
      "epoch": 2.4989689911824198,
      "grad_norm": 1.1507790088653564,
      "learning_rate": 5.035021711484354e-06,
      "loss": 0.0153,
      "step": 1527000
    },
    {
      "epoch": 2.499001721621073,
      "grad_norm": 0.20098727941513062,
      "learning_rate": 5.034955819270837e-06,
      "loss": 0.0129,
      "step": 1527020
    },
    {
      "epoch": 2.4990344520597265,
      "grad_norm": 0.2919570505619049,
      "learning_rate": 5.034889927057321e-06,
      "loss": 0.0096,
      "step": 1527040
    },
    {
      "epoch": 2.4990671824983797,
      "grad_norm": 0.3576020300388336,
      "learning_rate": 5.0348240348438026e-06,
      "loss": 0.0164,
      "step": 1527060
    },
    {
      "epoch": 2.4990999129370333,
      "grad_norm": 0.2190365493297577,
      "learning_rate": 5.034758142630286e-06,
      "loss": 0.0196,
      "step": 1527080
    },
    {
      "epoch": 2.4991326433756864,
      "grad_norm": 0.4620898365974426,
      "learning_rate": 5.034692250416768e-06,
      "loss": 0.0163,
      "step": 1527100
    },
    {
      "epoch": 2.49916537381434,
      "grad_norm": 0.14141157269477844,
      "learning_rate": 5.034626358203252e-06,
      "loss": 0.0173,
      "step": 1527120
    },
    {
      "epoch": 2.499198104252993,
      "grad_norm": 0.34488674998283386,
      "learning_rate": 5.034560465989734e-06,
      "loss": 0.0237,
      "step": 1527140
    },
    {
      "epoch": 2.4992308346916463,
      "grad_norm": 0.5186322331428528,
      "learning_rate": 5.034494573776217e-06,
      "loss": 0.0121,
      "step": 1527160
    },
    {
      "epoch": 2.4992635651303,
      "grad_norm": 0.15996283292770386,
      "learning_rate": 5.0344286815627e-06,
      "loss": 0.0158,
      "step": 1527180
    },
    {
      "epoch": 2.499296295568953,
      "grad_norm": 0.13822045922279358,
      "learning_rate": 5.0343627893491834e-06,
      "loss": 0.0139,
      "step": 1527200
    },
    {
      "epoch": 2.4993290260076066,
      "grad_norm": 0.16864962875843048,
      "learning_rate": 5.034296897135665e-06,
      "loss": 0.0077,
      "step": 1527220
    },
    {
      "epoch": 2.49936175644626,
      "grad_norm": 0.9431084990501404,
      "learning_rate": 5.034231004922149e-06,
      "loss": 0.0163,
      "step": 1527240
    },
    {
      "epoch": 2.4993944868849134,
      "grad_norm": 0.2882221043109894,
      "learning_rate": 5.0341651127086325e-06,
      "loss": 0.0131,
      "step": 1527260
    },
    {
      "epoch": 2.4994272173235665,
      "grad_norm": 0.06420980393886566,
      "learning_rate": 5.034099220495114e-06,
      "loss": 0.0125,
      "step": 1527280
    },
    {
      "epoch": 2.4994599477622197,
      "grad_norm": 0.29041263461112976,
      "learning_rate": 5.034033328281598e-06,
      "loss": 0.0149,
      "step": 1527300
    },
    {
      "epoch": 2.4994926782008733,
      "grad_norm": 0.7192826867103577,
      "learning_rate": 5.03396743606808e-06,
      "loss": 0.0104,
      "step": 1527320
    },
    {
      "epoch": 2.4995254086395264,
      "grad_norm": 0.24518832564353943,
      "learning_rate": 5.0339015438545635e-06,
      "loss": 0.0095,
      "step": 1527340
    },
    {
      "epoch": 2.49955813907818,
      "grad_norm": 0.19143447279930115,
      "learning_rate": 5.033835651641046e-06,
      "loss": 0.0176,
      "step": 1527360
    },
    {
      "epoch": 2.499590869516833,
      "grad_norm": 0.3612745702266693,
      "learning_rate": 5.033769759427529e-06,
      "loss": 0.0156,
      "step": 1527380
    },
    {
      "epoch": 2.4996235999554868,
      "grad_norm": 0.17775604128837585,
      "learning_rate": 5.033703867214012e-06,
      "loss": 0.0142,
      "step": 1527400
    },
    {
      "epoch": 2.49965633039414,
      "grad_norm": 0.290122926235199,
      "learning_rate": 5.033637975000495e-06,
      "loss": 0.0156,
      "step": 1527420
    },
    {
      "epoch": 2.499689060832793,
      "grad_norm": 0.12920333445072174,
      "learning_rate": 5.033572082786977e-06,
      "loss": 0.02,
      "step": 1527440
    },
    {
      "epoch": 2.4997217912714467,
      "grad_norm": 0.6839162707328796,
      "learning_rate": 5.033506190573461e-06,
      "loss": 0.0132,
      "step": 1527460
    },
    {
      "epoch": 2.4997545217101,
      "grad_norm": 1.1408796310424805,
      "learning_rate": 5.033440298359943e-06,
      "loss": 0.0155,
      "step": 1527480
    },
    {
      "epoch": 2.4997872521487534,
      "grad_norm": 0.23435473442077637,
      "learning_rate": 5.033374406146426e-06,
      "loss": 0.0113,
      "step": 1527500
    },
    {
      "epoch": 2.4998199825874066,
      "grad_norm": 0.4620714485645294,
      "learning_rate": 5.033308513932908e-06,
      "loss": 0.0186,
      "step": 1527520
    },
    {
      "epoch": 2.49985271302606,
      "grad_norm": 0.6118343472480774,
      "learning_rate": 5.033242621719392e-06,
      "loss": 0.0155,
      "step": 1527540
    },
    {
      "epoch": 2.4998854434647133,
      "grad_norm": 0.6245527267456055,
      "learning_rate": 5.0331767295058745e-06,
      "loss": 0.0151,
      "step": 1527560
    },
    {
      "epoch": 2.4999181739033665,
      "grad_norm": 0.35696911811828613,
      "learning_rate": 5.033110837292357e-06,
      "loss": 0.0148,
      "step": 1527580
    },
    {
      "epoch": 2.49995090434202,
      "grad_norm": 0.2045145034790039,
      "learning_rate": 5.03304494507884e-06,
      "loss": 0.0195,
      "step": 1527600
    },
    {
      "epoch": 2.499983634780673,
      "grad_norm": 0.19811882078647614,
      "learning_rate": 5.0329790528653235e-06,
      "loss": 0.0145,
      "step": 1527620
    },
    {
      "epoch": 2.500016365219327,
      "grad_norm": 0.16730447113513947,
      "learning_rate": 5.032913160651806e-06,
      "loss": 0.011,
      "step": 1527640
    },
    {
      "epoch": 2.50004909565798,
      "grad_norm": 0.6902160048484802,
      "learning_rate": 5.032847268438289e-06,
      "loss": 0.0153,
      "step": 1527660
    },
    {
      "epoch": 2.5000818260966335,
      "grad_norm": 0.7631686329841614,
      "learning_rate": 5.032781376224773e-06,
      "loss": 0.0156,
      "step": 1527680
    },
    {
      "epoch": 2.5001145565352867,
      "grad_norm": 0.2774505615234375,
      "learning_rate": 5.0327154840112545e-06,
      "loss": 0.0157,
      "step": 1527700
    },
    {
      "epoch": 2.50014728697394,
      "grad_norm": 1.518185019493103,
      "learning_rate": 5.032649591797738e-06,
      "loss": 0.0123,
      "step": 1527720
    },
    {
      "epoch": 2.5001800174125934,
      "grad_norm": 0.8633517622947693,
      "learning_rate": 5.03258369958422e-06,
      "loss": 0.0167,
      "step": 1527740
    },
    {
      "epoch": 2.5002127478512466,
      "grad_norm": 0.20188277959823608,
      "learning_rate": 5.0325178073707035e-06,
      "loss": 0.0126,
      "step": 1527760
    },
    {
      "epoch": 2.5002454782899,
      "grad_norm": 0.3722343444824219,
      "learning_rate": 5.032451915157186e-06,
      "loss": 0.0143,
      "step": 1527780
    },
    {
      "epoch": 2.5002782087285533,
      "grad_norm": 0.7561871409416199,
      "learning_rate": 5.032386022943669e-06,
      "loss": 0.0155,
      "step": 1527800
    },
    {
      "epoch": 2.500310939167207,
      "grad_norm": 0.37701615691185,
      "learning_rate": 5.032320130730152e-06,
      "loss": 0.0126,
      "step": 1527820
    },
    {
      "epoch": 2.50034366960586,
      "grad_norm": 0.4959371089935303,
      "learning_rate": 5.032254238516635e-06,
      "loss": 0.015,
      "step": 1527840
    },
    {
      "epoch": 2.5003764000445132,
      "grad_norm": 0.20357292890548706,
      "learning_rate": 5.032188346303117e-06,
      "loss": 0.0192,
      "step": 1527860
    },
    {
      "epoch": 2.500409130483167,
      "grad_norm": 0.11297401785850525,
      "learning_rate": 5.032122454089601e-06,
      "loss": 0.0139,
      "step": 1527880
    },
    {
      "epoch": 2.50044186092182,
      "grad_norm": 0.9072795510292053,
      "learning_rate": 5.032056561876083e-06,
      "loss": 0.0137,
      "step": 1527900
    },
    {
      "epoch": 2.5004745913604736,
      "grad_norm": 0.3352073132991791,
      "learning_rate": 5.031990669662566e-06,
      "loss": 0.0145,
      "step": 1527920
    },
    {
      "epoch": 2.5005073217991267,
      "grad_norm": 0.18387119472026825,
      "learning_rate": 5.031924777449049e-06,
      "loss": 0.0105,
      "step": 1527940
    },
    {
      "epoch": 2.5005400522377803,
      "grad_norm": 0.2678600549697876,
      "learning_rate": 5.031858885235532e-06,
      "loss": 0.0133,
      "step": 1527960
    },
    {
      "epoch": 2.5005727826764335,
      "grad_norm": 0.09956291317939758,
      "learning_rate": 5.031792993022015e-06,
      "loss": 0.0101,
      "step": 1527980
    },
    {
      "epoch": 2.5006055131150866,
      "grad_norm": 0.2178589105606079,
      "learning_rate": 5.031727100808498e-06,
      "loss": 0.0102,
      "step": 1528000
    },
    {
      "epoch": 2.50063824355374,
      "grad_norm": 0.2860124707221985,
      "learning_rate": 5.031661208594981e-06,
      "loss": 0.0097,
      "step": 1528020
    },
    {
      "epoch": 2.5006709739923934,
      "grad_norm": 0.06522931903600693,
      "learning_rate": 5.031595316381464e-06,
      "loss": 0.0144,
      "step": 1528040
    },
    {
      "epoch": 2.500703704431047,
      "grad_norm": 0.3324790596961975,
      "learning_rate": 5.031529424167947e-06,
      "loss": 0.0138,
      "step": 1528060
    },
    {
      "epoch": 2.5007364348697,
      "grad_norm": 0.24412404000759125,
      "learning_rate": 5.031463531954429e-06,
      "loss": 0.0105,
      "step": 1528080
    },
    {
      "epoch": 2.5007691653083537,
      "grad_norm": 0.3581850230693817,
      "learning_rate": 5.031397639740913e-06,
      "loss": 0.011,
      "step": 1528100
    },
    {
      "epoch": 2.500801895747007,
      "grad_norm": 0.1208445280790329,
      "learning_rate": 5.0313317475273946e-06,
      "loss": 0.0137,
      "step": 1528120
    },
    {
      "epoch": 2.50083462618566,
      "grad_norm": 0.18726332485675812,
      "learning_rate": 5.031265855313878e-06,
      "loss": 0.0181,
      "step": 1528140
    },
    {
      "epoch": 2.5008673566243136,
      "grad_norm": 0.25652819871902466,
      "learning_rate": 5.031199963100361e-06,
      "loss": 0.0187,
      "step": 1528160
    },
    {
      "epoch": 2.5009000870629667,
      "grad_norm": 0.09894482791423798,
      "learning_rate": 5.031134070886844e-06,
      "loss": 0.0117,
      "step": 1528180
    },
    {
      "epoch": 2.50093281750162,
      "grad_norm": 0.5596400499343872,
      "learning_rate": 5.031068178673326e-06,
      "loss": 0.0154,
      "step": 1528200
    },
    {
      "epoch": 2.5009655479402735,
      "grad_norm": 0.14572231471538544,
      "learning_rate": 5.03100228645981e-06,
      "loss": 0.0074,
      "step": 1528220
    },
    {
      "epoch": 2.500998278378927,
      "grad_norm": 0.3822356164455414,
      "learning_rate": 5.030936394246292e-06,
      "loss": 0.019,
      "step": 1528240
    },
    {
      "epoch": 2.5010310088175802,
      "grad_norm": 0.19701866805553436,
      "learning_rate": 5.0308705020327754e-06,
      "loss": 0.018,
      "step": 1528260
    },
    {
      "epoch": 2.5010637392562334,
      "grad_norm": 0.27999940514564514,
      "learning_rate": 5.030804609819257e-06,
      "loss": 0.0126,
      "step": 1528280
    },
    {
      "epoch": 2.501096469694887,
      "grad_norm": 1.2091665267944336,
      "learning_rate": 5.030738717605741e-06,
      "loss": 0.0094,
      "step": 1528300
    },
    {
      "epoch": 2.50112920013354,
      "grad_norm": 0.3969264328479767,
      "learning_rate": 5.0306728253922245e-06,
      "loss": 0.0124,
      "step": 1528320
    },
    {
      "epoch": 2.5011619305721933,
      "grad_norm": 0.16482311487197876,
      "learning_rate": 5.030606933178706e-06,
      "loss": 0.0216,
      "step": 1528340
    },
    {
      "epoch": 2.501194661010847,
      "grad_norm": 0.8313866853713989,
      "learning_rate": 5.03054104096519e-06,
      "loss": 0.0221,
      "step": 1528360
    },
    {
      "epoch": 2.5012273914495005,
      "grad_norm": 0.3285123109817505,
      "learning_rate": 5.030475148751673e-06,
      "loss": 0.0223,
      "step": 1528380
    },
    {
      "epoch": 2.5012601218881536,
      "grad_norm": 0.34480786323547363,
      "learning_rate": 5.0304092565381555e-06,
      "loss": 0.011,
      "step": 1528400
    },
    {
      "epoch": 2.5012928523268068,
      "grad_norm": 0.2558571398258209,
      "learning_rate": 5.030343364324638e-06,
      "loss": 0.0173,
      "step": 1528420
    },
    {
      "epoch": 2.5013255827654604,
      "grad_norm": 0.1929321140050888,
      "learning_rate": 5.030277472111122e-06,
      "loss": 0.0101,
      "step": 1528440
    },
    {
      "epoch": 2.5013583132041135,
      "grad_norm": 0.44628098607063293,
      "learning_rate": 5.030211579897604e-06,
      "loss": 0.0176,
      "step": 1528460
    },
    {
      "epoch": 2.5013910436427667,
      "grad_norm": 0.3318539559841156,
      "learning_rate": 5.030145687684087e-06,
      "loss": 0.0157,
      "step": 1528480
    },
    {
      "epoch": 2.5014237740814202,
      "grad_norm": 0.6104752421379089,
      "learning_rate": 5.030079795470569e-06,
      "loss": 0.0157,
      "step": 1528500
    },
    {
      "epoch": 2.501456504520074,
      "grad_norm": 0.28291141986846924,
      "learning_rate": 5.030013903257053e-06,
      "loss": 0.0184,
      "step": 1528520
    },
    {
      "epoch": 2.501489234958727,
      "grad_norm": 0.506846010684967,
      "learning_rate": 5.029948011043535e-06,
      "loss": 0.0109,
      "step": 1528540
    },
    {
      "epoch": 2.50152196539738,
      "grad_norm": 0.25956833362579346,
      "learning_rate": 5.029882118830018e-06,
      "loss": 0.016,
      "step": 1528560
    },
    {
      "epoch": 2.5015546958360337,
      "grad_norm": 0.6972641348838806,
      "learning_rate": 5.029816226616501e-06,
      "loss": 0.016,
      "step": 1528580
    },
    {
      "epoch": 2.501587426274687,
      "grad_norm": 1.1460204124450684,
      "learning_rate": 5.029750334402984e-06,
      "loss": 0.0106,
      "step": 1528600
    },
    {
      "epoch": 2.50162015671334,
      "grad_norm": 0.32275155186653137,
      "learning_rate": 5.0296844421894664e-06,
      "loss": 0.0163,
      "step": 1528620
    },
    {
      "epoch": 2.5016528871519936,
      "grad_norm": 0.6395652890205383,
      "learning_rate": 5.02961854997595e-06,
      "loss": 0.0096,
      "step": 1528640
    },
    {
      "epoch": 2.5016856175906472,
      "grad_norm": 0.3675139248371124,
      "learning_rate": 5.029552657762433e-06,
      "loss": 0.0126,
      "step": 1528660
    },
    {
      "epoch": 2.5017183480293004,
      "grad_norm": 0.33049532771110535,
      "learning_rate": 5.0294867655489155e-06,
      "loss": 0.0117,
      "step": 1528680
    },
    {
      "epoch": 2.5017510784679535,
      "grad_norm": 0.4932112395763397,
      "learning_rate": 5.029420873335399e-06,
      "loss": 0.0136,
      "step": 1528700
    },
    {
      "epoch": 2.501783808906607,
      "grad_norm": 0.5815523862838745,
      "learning_rate": 5.029354981121881e-06,
      "loss": 0.0125,
      "step": 1528720
    },
    {
      "epoch": 2.5018165393452603,
      "grad_norm": 0.42028963565826416,
      "learning_rate": 5.0292890889083646e-06,
      "loss": 0.0139,
      "step": 1528740
    },
    {
      "epoch": 2.5018492697839134,
      "grad_norm": 0.2670881152153015,
      "learning_rate": 5.0292231966948465e-06,
      "loss": 0.0111,
      "step": 1528760
    },
    {
      "epoch": 2.501882000222567,
      "grad_norm": 0.43463531136512756,
      "learning_rate": 5.02915730448133e-06,
      "loss": 0.0171,
      "step": 1528780
    },
    {
      "epoch": 2.50191473066122,
      "grad_norm": 0.31153514981269836,
      "learning_rate": 5.029091412267813e-06,
      "loss": 0.0119,
      "step": 1528800
    },
    {
      "epoch": 2.5019474610998738,
      "grad_norm": 0.9593261480331421,
      "learning_rate": 5.0290255200542955e-06,
      "loss": 0.0145,
      "step": 1528820
    },
    {
      "epoch": 2.501980191538527,
      "grad_norm": 0.1268111616373062,
      "learning_rate": 5.028959627840778e-06,
      "loss": 0.016,
      "step": 1528840
    },
    {
      "epoch": 2.5020129219771805,
      "grad_norm": 0.2122398167848587,
      "learning_rate": 5.028893735627262e-06,
      "loss": 0.0127,
      "step": 1528860
    },
    {
      "epoch": 2.5020456524158337,
      "grad_norm": 0.13265153765678406,
      "learning_rate": 5.028827843413744e-06,
      "loss": 0.0162,
      "step": 1528880
    },
    {
      "epoch": 2.502078382854487,
      "grad_norm": 0.1844056248664856,
      "learning_rate": 5.028761951200227e-06,
      "loss": 0.0136,
      "step": 1528900
    },
    {
      "epoch": 2.5021111132931404,
      "grad_norm": 0.4814612567424774,
      "learning_rate": 5.028696058986709e-06,
      "loss": 0.0172,
      "step": 1528920
    },
    {
      "epoch": 2.5021438437317935,
      "grad_norm": 0.46306800842285156,
      "learning_rate": 5.028630166773193e-06,
      "loss": 0.0149,
      "step": 1528940
    },
    {
      "epoch": 2.502176574170447,
      "grad_norm": 0.38595089316368103,
      "learning_rate": 5.0285642745596756e-06,
      "loss": 0.0109,
      "step": 1528960
    },
    {
      "epoch": 2.5022093046091003,
      "grad_norm": 0.338156521320343,
      "learning_rate": 5.028498382346158e-06,
      "loss": 0.0111,
      "step": 1528980
    },
    {
      "epoch": 2.502242035047754,
      "grad_norm": 0.3031501770019531,
      "learning_rate": 5.028432490132641e-06,
      "loss": 0.0147,
      "step": 1529000
    },
    {
      "epoch": 2.502274765486407,
      "grad_norm": 0.598262369632721,
      "learning_rate": 5.028366597919125e-06,
      "loss": 0.015,
      "step": 1529020
    },
    {
      "epoch": 2.50230749592506,
      "grad_norm": 0.2667562663555145,
      "learning_rate": 5.028300705705607e-06,
      "loss": 0.0102,
      "step": 1529040
    },
    {
      "epoch": 2.502340226363714,
      "grad_norm": 0.542422354221344,
      "learning_rate": 5.02823481349209e-06,
      "loss": 0.0144,
      "step": 1529060
    },
    {
      "epoch": 2.502372956802367,
      "grad_norm": 0.8208005428314209,
      "learning_rate": 5.028168921278574e-06,
      "loss": 0.02,
      "step": 1529080
    },
    {
      "epoch": 2.5024056872410205,
      "grad_norm": 0.5981478095054626,
      "learning_rate": 5.028103029065056e-06,
      "loss": 0.0124,
      "step": 1529100
    },
    {
      "epoch": 2.5024384176796737,
      "grad_norm": 1.252209186553955,
      "learning_rate": 5.028037136851539e-06,
      "loss": 0.0122,
      "step": 1529120
    },
    {
      "epoch": 2.5024711481183273,
      "grad_norm": 0.2762380540370941,
      "learning_rate": 5.027971244638021e-06,
      "loss": 0.0169,
      "step": 1529140
    },
    {
      "epoch": 2.5025038785569804,
      "grad_norm": 0.49598899483680725,
      "learning_rate": 5.027905352424505e-06,
      "loss": 0.0093,
      "step": 1529160
    },
    {
      "epoch": 2.5025366089956336,
      "grad_norm": 0.48934635519981384,
      "learning_rate": 5.027839460210987e-06,
      "loss": 0.0187,
      "step": 1529180
    },
    {
      "epoch": 2.502569339434287,
      "grad_norm": 0.7610324025154114,
      "learning_rate": 5.02777356799747e-06,
      "loss": 0.013,
      "step": 1529200
    },
    {
      "epoch": 2.5026020698729403,
      "grad_norm": 0.4445219337940216,
      "learning_rate": 5.027707675783953e-06,
      "loss": 0.0155,
      "step": 1529220
    },
    {
      "epoch": 2.502634800311594,
      "grad_norm": 0.14344432950019836,
      "learning_rate": 5.0276417835704365e-06,
      "loss": 0.0148,
      "step": 1529240
    },
    {
      "epoch": 2.502667530750247,
      "grad_norm": 0.19273421168327332,
      "learning_rate": 5.027575891356918e-06,
      "loss": 0.0149,
      "step": 1529260
    },
    {
      "epoch": 2.5027002611889007,
      "grad_norm": 0.5574617981910706,
      "learning_rate": 5.027509999143402e-06,
      "loss": 0.0256,
      "step": 1529280
    },
    {
      "epoch": 2.502732991627554,
      "grad_norm": 0.49652862548828125,
      "learning_rate": 5.027444106929884e-06,
      "loss": 0.014,
      "step": 1529300
    },
    {
      "epoch": 2.502765722066207,
      "grad_norm": 0.2743973135948181,
      "learning_rate": 5.027378214716367e-06,
      "loss": 0.0096,
      "step": 1529320
    },
    {
      "epoch": 2.5027984525048605,
      "grad_norm": 0.7857794761657715,
      "learning_rate": 5.02731232250285e-06,
      "loss": 0.0095,
      "step": 1529340
    },
    {
      "epoch": 2.5028311829435137,
      "grad_norm": 0.8607245087623596,
      "learning_rate": 5.027246430289333e-06,
      "loss": 0.0089,
      "step": 1529360
    },
    {
      "epoch": 2.5028639133821673,
      "grad_norm": 0.4279203712940216,
      "learning_rate": 5.0271805380758165e-06,
      "loss": 0.017,
      "step": 1529380
    },
    {
      "epoch": 2.5028966438208204,
      "grad_norm": 0.5740377306938171,
      "learning_rate": 5.027114645862299e-06,
      "loss": 0.0187,
      "step": 1529400
    },
    {
      "epoch": 2.502929374259474,
      "grad_norm": 0.18360914289951324,
      "learning_rate": 5.027048753648782e-06,
      "loss": 0.0117,
      "step": 1529420
    },
    {
      "epoch": 2.502962104698127,
      "grad_norm": 0.28146299719810486,
      "learning_rate": 5.026982861435265e-06,
      "loss": 0.0182,
      "step": 1529440
    },
    {
      "epoch": 2.5029948351367803,
      "grad_norm": 0.5390124917030334,
      "learning_rate": 5.026916969221748e-06,
      "loss": 0.0099,
      "step": 1529460
    },
    {
      "epoch": 2.503027565575434,
      "grad_norm": 0.7532364130020142,
      "learning_rate": 5.02685107700823e-06,
      "loss": 0.0152,
      "step": 1529480
    },
    {
      "epoch": 2.503060296014087,
      "grad_norm": 0.4072406589984894,
      "learning_rate": 5.026785184794714e-06,
      "loss": 0.0219,
      "step": 1529500
    },
    {
      "epoch": 2.5030930264527407,
      "grad_norm": 0.214699387550354,
      "learning_rate": 5.026719292581196e-06,
      "loss": 0.0143,
      "step": 1529520
    },
    {
      "epoch": 2.503125756891394,
      "grad_norm": 0.28309085965156555,
      "learning_rate": 5.026653400367679e-06,
      "loss": 0.0174,
      "step": 1529540
    },
    {
      "epoch": 2.5031584873300474,
      "grad_norm": 0.6096228957176208,
      "learning_rate": 5.026587508154161e-06,
      "loss": 0.0109,
      "step": 1529560
    },
    {
      "epoch": 2.5031912177687006,
      "grad_norm": 0.40270593762397766,
      "learning_rate": 5.026521615940645e-06,
      "loss": 0.0135,
      "step": 1529580
    },
    {
      "epoch": 2.5032239482073537,
      "grad_norm": 0.34617602825164795,
      "learning_rate": 5.0264557237271275e-06,
      "loss": 0.0111,
      "step": 1529600
    },
    {
      "epoch": 2.5032566786460073,
      "grad_norm": 0.13598835468292236,
      "learning_rate": 5.02638983151361e-06,
      "loss": 0.0113,
      "step": 1529620
    },
    {
      "epoch": 2.5032894090846605,
      "grad_norm": 0.12998276948928833,
      "learning_rate": 5.026323939300093e-06,
      "loss": 0.0122,
      "step": 1529640
    },
    {
      "epoch": 2.5033221395233136,
      "grad_norm": 0.21778526902198792,
      "learning_rate": 5.0262580470865765e-06,
      "loss": 0.0101,
      "step": 1529660
    },
    {
      "epoch": 2.503354869961967,
      "grad_norm": 0.13641521334648132,
      "learning_rate": 5.026192154873058e-06,
      "loss": 0.0112,
      "step": 1529680
    },
    {
      "epoch": 2.503387600400621,
      "grad_norm": 0.26577016711235046,
      "learning_rate": 5.026126262659542e-06,
      "loss": 0.0183,
      "step": 1529700
    },
    {
      "epoch": 2.503420330839274,
      "grad_norm": 0.3607338070869446,
      "learning_rate": 5.026060370446026e-06,
      "loss": 0.0177,
      "step": 1529720
    },
    {
      "epoch": 2.503453061277927,
      "grad_norm": 0.2238018810749054,
      "learning_rate": 5.0259944782325075e-06,
      "loss": 0.0122,
      "step": 1529740
    },
    {
      "epoch": 2.5034857917165807,
      "grad_norm": 0.4140367805957794,
      "learning_rate": 5.025928586018991e-06,
      "loss": 0.0113,
      "step": 1529760
    },
    {
      "epoch": 2.503518522155234,
      "grad_norm": 0.540955126285553,
      "learning_rate": 5.025862693805473e-06,
      "loss": 0.0124,
      "step": 1529780
    },
    {
      "epoch": 2.503551252593887,
      "grad_norm": 0.21987655758857727,
      "learning_rate": 5.0257968015919566e-06,
      "loss": 0.0151,
      "step": 1529800
    },
    {
      "epoch": 2.5035839830325406,
      "grad_norm": 0.09334696829319,
      "learning_rate": 5.025730909378439e-06,
      "loss": 0.0137,
      "step": 1529820
    },
    {
      "epoch": 2.503616713471194,
      "grad_norm": 0.11249487102031708,
      "learning_rate": 5.025665017164922e-06,
      "loss": 0.0126,
      "step": 1529840
    },
    {
      "epoch": 2.5036494439098473,
      "grad_norm": 0.15193559229373932,
      "learning_rate": 5.025599124951405e-06,
      "loss": 0.0236,
      "step": 1529860
    },
    {
      "epoch": 2.5036821743485005,
      "grad_norm": 0.16401782631874084,
      "learning_rate": 5.025533232737888e-06,
      "loss": 0.0153,
      "step": 1529880
    },
    {
      "epoch": 2.503714904787154,
      "grad_norm": 0.398047536611557,
      "learning_rate": 5.02546734052437e-06,
      "loss": 0.0163,
      "step": 1529900
    },
    {
      "epoch": 2.5037476352258072,
      "grad_norm": 0.5035063624382019,
      "learning_rate": 5.025401448310854e-06,
      "loss": 0.0114,
      "step": 1529920
    },
    {
      "epoch": 2.5037803656644604,
      "grad_norm": 0.123688705265522,
      "learning_rate": 5.025335556097336e-06,
      "loss": 0.0235,
      "step": 1529940
    },
    {
      "epoch": 2.503813096103114,
      "grad_norm": 0.27705201506614685,
      "learning_rate": 5.025269663883819e-06,
      "loss": 0.0154,
      "step": 1529960
    },
    {
      "epoch": 2.5038458265417676,
      "grad_norm": 0.15030093491077423,
      "learning_rate": 5.025203771670302e-06,
      "loss": 0.0119,
      "step": 1529980
    },
    {
      "epoch": 2.5038785569804207,
      "grad_norm": 0.6606168150901794,
      "learning_rate": 5.025137879456785e-06,
      "loss": 0.0178,
      "step": 1530000
    },
    {
      "epoch": 2.503911287419074,
      "grad_norm": 0.7557375431060791,
      "learning_rate": 5.0250719872432675e-06,
      "loss": 0.0132,
      "step": 1530020
    },
    {
      "epoch": 2.5039440178577275,
      "grad_norm": 0.07296974956989288,
      "learning_rate": 5.025006095029751e-06,
      "loss": 0.0093,
      "step": 1530040
    },
    {
      "epoch": 2.5039767482963806,
      "grad_norm": 0.5496994256973267,
      "learning_rate": 5.024940202816233e-06,
      "loss": 0.0231,
      "step": 1530060
    },
    {
      "epoch": 2.5040094787350338,
      "grad_norm": 0.43813979625701904,
      "learning_rate": 5.024874310602717e-06,
      "loss": 0.0099,
      "step": 1530080
    },
    {
      "epoch": 2.5040422091736874,
      "grad_norm": 0.21308892965316772,
      "learning_rate": 5.0248084183892e-06,
      "loss": 0.0113,
      "step": 1530100
    },
    {
      "epoch": 2.504074939612341,
      "grad_norm": 0.17187167704105377,
      "learning_rate": 5.024742526175682e-06,
      "loss": 0.0132,
      "step": 1530120
    },
    {
      "epoch": 2.504107670050994,
      "grad_norm": 0.1283906102180481,
      "learning_rate": 5.024676633962166e-06,
      "loss": 0.0139,
      "step": 1530140
    },
    {
      "epoch": 2.5041404004896473,
      "grad_norm": 0.2786330282688141,
      "learning_rate": 5.0246107417486476e-06,
      "loss": 0.0096,
      "step": 1530160
    },
    {
      "epoch": 2.504173130928301,
      "grad_norm": 0.3382677137851715,
      "learning_rate": 5.024544849535131e-06,
      "loss": 0.0137,
      "step": 1530180
    },
    {
      "epoch": 2.504205861366954,
      "grad_norm": 0.12403838336467743,
      "learning_rate": 5.024478957321614e-06,
      "loss": 0.0129,
      "step": 1530200
    },
    {
      "epoch": 2.504238591805607,
      "grad_norm": 0.37550511956214905,
      "learning_rate": 5.024413065108097e-06,
      "loss": 0.0163,
      "step": 1530220
    },
    {
      "epoch": 2.5042713222442607,
      "grad_norm": 0.5316369533538818,
      "learning_rate": 5.024347172894579e-06,
      "loss": 0.0208,
      "step": 1530240
    },
    {
      "epoch": 2.504304052682914,
      "grad_norm": 0.4868584871292114,
      "learning_rate": 5.024281280681063e-06,
      "loss": 0.0188,
      "step": 1530260
    },
    {
      "epoch": 2.5043367831215675,
      "grad_norm": 0.4459910988807678,
      "learning_rate": 5.024215388467545e-06,
      "loss": 0.0138,
      "step": 1530280
    },
    {
      "epoch": 2.5043695135602206,
      "grad_norm": 0.43288853764533997,
      "learning_rate": 5.0241494962540284e-06,
      "loss": 0.013,
      "step": 1530300
    },
    {
      "epoch": 2.5044022439988742,
      "grad_norm": 0.3117764890193939,
      "learning_rate": 5.02408360404051e-06,
      "loss": 0.0085,
      "step": 1530320
    },
    {
      "epoch": 2.5044349744375274,
      "grad_norm": 0.17917025089263916,
      "learning_rate": 5.024017711826994e-06,
      "loss": 0.013,
      "step": 1530340
    },
    {
      "epoch": 2.5044677048761805,
      "grad_norm": 0.24976223707199097,
      "learning_rate": 5.023951819613476e-06,
      "loss": 0.0121,
      "step": 1530360
    },
    {
      "epoch": 2.504500435314834,
      "grad_norm": 0.203849658370018,
      "learning_rate": 5.023885927399959e-06,
      "loss": 0.0159,
      "step": 1530380
    },
    {
      "epoch": 2.5045331657534873,
      "grad_norm": 0.939731776714325,
      "learning_rate": 5.023820035186442e-06,
      "loss": 0.019,
      "step": 1530400
    },
    {
      "epoch": 2.504565896192141,
      "grad_norm": 0.3486548662185669,
      "learning_rate": 5.023754142972925e-06,
      "loss": 0.0172,
      "step": 1530420
    },
    {
      "epoch": 2.504598626630794,
      "grad_norm": 0.34493568539619446,
      "learning_rate": 5.0236882507594085e-06,
      "loss": 0.0145,
      "step": 1530440
    },
    {
      "epoch": 2.5046313570694476,
      "grad_norm": 0.19116568565368652,
      "learning_rate": 5.023622358545891e-06,
      "loss": 0.0148,
      "step": 1530460
    },
    {
      "epoch": 2.5046640875081008,
      "grad_norm": 0.30358561873435974,
      "learning_rate": 5.023556466332375e-06,
      "loss": 0.0108,
      "step": 1530480
    },
    {
      "epoch": 2.504696817946754,
      "grad_norm": 0.53382807970047,
      "learning_rate": 5.023490574118857e-06,
      "loss": 0.0214,
      "step": 1530500
    },
    {
      "epoch": 2.5047295483854075,
      "grad_norm": 0.5446237325668335,
      "learning_rate": 5.02342468190534e-06,
      "loss": 0.0229,
      "step": 1530520
    },
    {
      "epoch": 2.5047622788240607,
      "grad_norm": 0.08901021629571915,
      "learning_rate": 5.023358789691822e-06,
      "loss": 0.0123,
      "step": 1530540
    },
    {
      "epoch": 2.5047950092627143,
      "grad_norm": 0.20261968672275543,
      "learning_rate": 5.023292897478306e-06,
      "loss": 0.0136,
      "step": 1530560
    },
    {
      "epoch": 2.5048277397013674,
      "grad_norm": 0.13388189673423767,
      "learning_rate": 5.023227005264788e-06,
      "loss": 0.0133,
      "step": 1530580
    },
    {
      "epoch": 2.504860470140021,
      "grad_norm": 0.2582249045372009,
      "learning_rate": 5.023161113051271e-06,
      "loss": 0.0186,
      "step": 1530600
    },
    {
      "epoch": 2.504893200578674,
      "grad_norm": 0.6194007992744446,
      "learning_rate": 5.023095220837754e-06,
      "loss": 0.0161,
      "step": 1530620
    },
    {
      "epoch": 2.5049259310173273,
      "grad_norm": 0.4021563231945038,
      "learning_rate": 5.023029328624237e-06,
      "loss": 0.0141,
      "step": 1530640
    },
    {
      "epoch": 2.504958661455981,
      "grad_norm": 0.4046826958656311,
      "learning_rate": 5.0229634364107194e-06,
      "loss": 0.011,
      "step": 1530660
    },
    {
      "epoch": 2.504991391894634,
      "grad_norm": 0.10634395480155945,
      "learning_rate": 5.022897544197203e-06,
      "loss": 0.0119,
      "step": 1530680
    },
    {
      "epoch": 2.5050241223332876,
      "grad_norm": 0.161784827709198,
      "learning_rate": 5.022831651983685e-06,
      "loss": 0.0137,
      "step": 1530700
    },
    {
      "epoch": 2.505056852771941,
      "grad_norm": 0.16728395223617554,
      "learning_rate": 5.0227657597701685e-06,
      "loss": 0.0192,
      "step": 1530720
    },
    {
      "epoch": 2.5050895832105944,
      "grad_norm": 0.1609974205493927,
      "learning_rate": 5.02269986755665e-06,
      "loss": 0.0112,
      "step": 1530740
    },
    {
      "epoch": 2.5051223136492475,
      "grad_norm": 0.10720131546258926,
      "learning_rate": 5.022633975343134e-06,
      "loss": 0.0146,
      "step": 1530760
    },
    {
      "epoch": 2.5051550440879007,
      "grad_norm": 0.5299093127250671,
      "learning_rate": 5.022568083129618e-06,
      "loss": 0.0132,
      "step": 1530780
    },
    {
      "epoch": 2.5051877745265543,
      "grad_norm": 0.4943166971206665,
      "learning_rate": 5.0225021909160995e-06,
      "loss": 0.012,
      "step": 1530800
    },
    {
      "epoch": 2.5052205049652074,
      "grad_norm": 0.273141086101532,
      "learning_rate": 5.022436298702583e-06,
      "loss": 0.0205,
      "step": 1530820
    },
    {
      "epoch": 2.505253235403861,
      "grad_norm": 0.6231098771095276,
      "learning_rate": 5.022370406489066e-06,
      "loss": 0.0143,
      "step": 1530840
    },
    {
      "epoch": 2.505285965842514,
      "grad_norm": 0.2962535619735718,
      "learning_rate": 5.0223045142755485e-06,
      "loss": 0.0131,
      "step": 1530860
    },
    {
      "epoch": 2.5053186962811678,
      "grad_norm": 0.3294470012187958,
      "learning_rate": 5.022238622062031e-06,
      "loss": 0.0181,
      "step": 1530880
    },
    {
      "epoch": 2.505351426719821,
      "grad_norm": 0.4433821737766266,
      "learning_rate": 5.022172729848515e-06,
      "loss": 0.011,
      "step": 1530900
    },
    {
      "epoch": 2.505384157158474,
      "grad_norm": 0.17513811588287354,
      "learning_rate": 5.022106837634997e-06,
      "loss": 0.018,
      "step": 1530920
    },
    {
      "epoch": 2.5054168875971277,
      "grad_norm": 0.9770447015762329,
      "learning_rate": 5.02204094542148e-06,
      "loss": 0.0148,
      "step": 1530940
    },
    {
      "epoch": 2.505449618035781,
      "grad_norm": 0.8528416752815247,
      "learning_rate": 5.021975053207962e-06,
      "loss": 0.0184,
      "step": 1530960
    },
    {
      "epoch": 2.5054823484744344,
      "grad_norm": 0.14243395626544952,
      "learning_rate": 5.021909160994446e-06,
      "loss": 0.0147,
      "step": 1530980
    },
    {
      "epoch": 2.5055150789130876,
      "grad_norm": 0.20545075833797455,
      "learning_rate": 5.0218432687809286e-06,
      "loss": 0.0129,
      "step": 1531000
    },
    {
      "epoch": 2.505547809351741,
      "grad_norm": 0.13052350282669067,
      "learning_rate": 5.021777376567411e-06,
      "loss": 0.011,
      "step": 1531020
    },
    {
      "epoch": 2.5055805397903943,
      "grad_norm": 0.35188695788383484,
      "learning_rate": 5.021711484353894e-06,
      "loss": 0.0125,
      "step": 1531040
    },
    {
      "epoch": 2.5056132702290475,
      "grad_norm": 0.22455917298793793,
      "learning_rate": 5.021645592140378e-06,
      "loss": 0.0144,
      "step": 1531060
    },
    {
      "epoch": 2.505646000667701,
      "grad_norm": 0.3878984749317169,
      "learning_rate": 5.0215796999268595e-06,
      "loss": 0.0184,
      "step": 1531080
    },
    {
      "epoch": 2.505678731106354,
      "grad_norm": 0.12722793221473694,
      "learning_rate": 5.021513807713343e-06,
      "loss": 0.011,
      "step": 1531100
    },
    {
      "epoch": 2.505711461545008,
      "grad_norm": 0.3187093734741211,
      "learning_rate": 5.021447915499825e-06,
      "loss": 0.0158,
      "step": 1531120
    },
    {
      "epoch": 2.505744191983661,
      "grad_norm": 0.6285988688468933,
      "learning_rate": 5.021382023286309e-06,
      "loss": 0.0158,
      "step": 1531140
    },
    {
      "epoch": 2.5057769224223145,
      "grad_norm": 0.49700745940208435,
      "learning_rate": 5.021316131072792e-06,
      "loss": 0.0113,
      "step": 1531160
    },
    {
      "epoch": 2.5058096528609677,
      "grad_norm": 0.09440159797668457,
      "learning_rate": 5.021250238859274e-06,
      "loss": 0.0119,
      "step": 1531180
    },
    {
      "epoch": 2.505842383299621,
      "grad_norm": 0.5240123867988586,
      "learning_rate": 5.021184346645758e-06,
      "loss": 0.0134,
      "step": 1531200
    },
    {
      "epoch": 2.5058751137382744,
      "grad_norm": 0.3458946645259857,
      "learning_rate": 5.02111845443224e-06,
      "loss": 0.021,
      "step": 1531220
    },
    {
      "epoch": 2.5059078441769276,
      "grad_norm": 0.19151997566223145,
      "learning_rate": 5.021052562218723e-06,
      "loss": 0.0165,
      "step": 1531240
    },
    {
      "epoch": 2.5059405746155807,
      "grad_norm": 0.1773919016122818,
      "learning_rate": 5.020986670005206e-06,
      "loss": 0.0122,
      "step": 1531260
    },
    {
      "epoch": 2.5059733050542343,
      "grad_norm": 0.34162187576293945,
      "learning_rate": 5.0209207777916895e-06,
      "loss": 0.0174,
      "step": 1531280
    },
    {
      "epoch": 2.506006035492888,
      "grad_norm": 2.3487653732299805,
      "learning_rate": 5.020854885578171e-06,
      "loss": 0.0147,
      "step": 1531300
    },
    {
      "epoch": 2.506038765931541,
      "grad_norm": 1.16506826877594,
      "learning_rate": 5.020788993364655e-06,
      "loss": 0.0159,
      "step": 1531320
    },
    {
      "epoch": 2.506071496370194,
      "grad_norm": 1.1754100322723389,
      "learning_rate": 5.020723101151137e-06,
      "loss": 0.021,
      "step": 1531340
    },
    {
      "epoch": 2.506104226808848,
      "grad_norm": 0.5097284317016602,
      "learning_rate": 5.02065720893762e-06,
      "loss": 0.0102,
      "step": 1531360
    },
    {
      "epoch": 2.506136957247501,
      "grad_norm": 0.08289297670125961,
      "learning_rate": 5.020591316724102e-06,
      "loss": 0.0248,
      "step": 1531380
    },
    {
      "epoch": 2.506169687686154,
      "grad_norm": 0.12150789052248001,
      "learning_rate": 5.020525424510586e-06,
      "loss": 0.0194,
      "step": 1531400
    },
    {
      "epoch": 2.5062024181248077,
      "grad_norm": 0.20367562770843506,
      "learning_rate": 5.020459532297069e-06,
      "loss": 0.0153,
      "step": 1531420
    },
    {
      "epoch": 2.5062351485634613,
      "grad_norm": 0.7082734704017639,
      "learning_rate": 5.020393640083551e-06,
      "loss": 0.0214,
      "step": 1531440
    },
    {
      "epoch": 2.5062678790021145,
      "grad_norm": 0.2509143352508545,
      "learning_rate": 5.020327747870034e-06,
      "loss": 0.0176,
      "step": 1531460
    },
    {
      "epoch": 2.5063006094407676,
      "grad_norm": 0.8096697330474854,
      "learning_rate": 5.020261855656518e-06,
      "loss": 0.0131,
      "step": 1531480
    },
    {
      "epoch": 2.506333339879421,
      "grad_norm": 0.33049091696739197,
      "learning_rate": 5.0201959634430004e-06,
      "loss": 0.0144,
      "step": 1531500
    },
    {
      "epoch": 2.5063660703180743,
      "grad_norm": 0.43943965435028076,
      "learning_rate": 5.020130071229483e-06,
      "loss": 0.0156,
      "step": 1531520
    },
    {
      "epoch": 2.5063988007567275,
      "grad_norm": 0.314727246761322,
      "learning_rate": 5.020064179015967e-06,
      "loss": 0.0198,
      "step": 1531540
    },
    {
      "epoch": 2.506431531195381,
      "grad_norm": 0.8405805230140686,
      "learning_rate": 5.019998286802449e-06,
      "loss": 0.0154,
      "step": 1531560
    },
    {
      "epoch": 2.5064642616340347,
      "grad_norm": 0.4293650686740875,
      "learning_rate": 5.019932394588932e-06,
      "loss": 0.0142,
      "step": 1531580
    },
    {
      "epoch": 2.506496992072688,
      "grad_norm": 0.3121781647205353,
      "learning_rate": 5.019866502375414e-06,
      "loss": 0.0162,
      "step": 1531600
    },
    {
      "epoch": 2.506529722511341,
      "grad_norm": 0.3288118541240692,
      "learning_rate": 5.019800610161898e-06,
      "loss": 0.0087,
      "step": 1531620
    },
    {
      "epoch": 2.5065624529499946,
      "grad_norm": 0.9305800795555115,
      "learning_rate": 5.0197347179483805e-06,
      "loss": 0.0224,
      "step": 1531640
    },
    {
      "epoch": 2.5065951833886477,
      "grad_norm": 0.29855668544769287,
      "learning_rate": 5.019668825734863e-06,
      "loss": 0.0131,
      "step": 1531660
    },
    {
      "epoch": 2.506627913827301,
      "grad_norm": 0.6960209608078003,
      "learning_rate": 5.019602933521346e-06,
      "loss": 0.0127,
      "step": 1531680
    },
    {
      "epoch": 2.5066606442659545,
      "grad_norm": 1.1425071954727173,
      "learning_rate": 5.0195370413078295e-06,
      "loss": 0.0113,
      "step": 1531700
    },
    {
      "epoch": 2.506693374704608,
      "grad_norm": 0.28577327728271484,
      "learning_rate": 5.0194711490943114e-06,
      "loss": 0.0121,
      "step": 1531720
    },
    {
      "epoch": 2.506726105143261,
      "grad_norm": 1.0290988683700562,
      "learning_rate": 5.019405256880795e-06,
      "loss": 0.0188,
      "step": 1531740
    },
    {
      "epoch": 2.5067588355819144,
      "grad_norm": 0.0966402143239975,
      "learning_rate": 5.019339364667277e-06,
      "loss": 0.0096,
      "step": 1531760
    },
    {
      "epoch": 2.506791566020568,
      "grad_norm": 0.16159190237522125,
      "learning_rate": 5.0192734724537605e-06,
      "loss": 0.0148,
      "step": 1531780
    },
    {
      "epoch": 2.506824296459221,
      "grad_norm": 0.3802531063556671,
      "learning_rate": 5.019207580240243e-06,
      "loss": 0.0136,
      "step": 1531800
    },
    {
      "epoch": 2.5068570268978743,
      "grad_norm": 0.3559859097003937,
      "learning_rate": 5.019141688026726e-06,
      "loss": 0.0212,
      "step": 1531820
    },
    {
      "epoch": 2.506889757336528,
      "grad_norm": 0.3726420998573303,
      "learning_rate": 5.0190757958132096e-06,
      "loss": 0.0182,
      "step": 1531840
    },
    {
      "epoch": 2.506922487775181,
      "grad_norm": 0.8458816409111023,
      "learning_rate": 5.019009903599692e-06,
      "loss": 0.0151,
      "step": 1531860
    },
    {
      "epoch": 2.5069552182138346,
      "grad_norm": 0.11006786674261093,
      "learning_rate": 5.018944011386175e-06,
      "loss": 0.0126,
      "step": 1531880
    },
    {
      "epoch": 2.5069879486524878,
      "grad_norm": 0.2155335396528244,
      "learning_rate": 5.018878119172658e-06,
      "loss": 0.0156,
      "step": 1531900
    },
    {
      "epoch": 2.5070206790911413,
      "grad_norm": 0.44136831164360046,
      "learning_rate": 5.018812226959141e-06,
      "loss": 0.0145,
      "step": 1531920
    },
    {
      "epoch": 2.5070534095297945,
      "grad_norm": 0.20288901031017303,
      "learning_rate": 5.018746334745623e-06,
      "loss": 0.016,
      "step": 1531940
    },
    {
      "epoch": 2.5070861399684476,
      "grad_norm": 0.22021719813346863,
      "learning_rate": 5.018680442532107e-06,
      "loss": 0.0145,
      "step": 1531960
    },
    {
      "epoch": 2.5071188704071012,
      "grad_norm": 0.05022028833627701,
      "learning_rate": 5.018614550318589e-06,
      "loss": 0.0181,
      "step": 1531980
    },
    {
      "epoch": 2.5071516008457544,
      "grad_norm": 0.6857770085334778,
      "learning_rate": 5.018548658105072e-06,
      "loss": 0.0128,
      "step": 1532000
    },
    {
      "epoch": 2.507184331284408,
      "grad_norm": 0.05727962777018547,
      "learning_rate": 5.018482765891555e-06,
      "loss": 0.011,
      "step": 1532020
    },
    {
      "epoch": 2.507217061723061,
      "grad_norm": 0.20993410050868988,
      "learning_rate": 5.018416873678038e-06,
      "loss": 0.012,
      "step": 1532040
    },
    {
      "epoch": 2.5072497921617147,
      "grad_norm": 1.001662015914917,
      "learning_rate": 5.0183509814645205e-06,
      "loss": 0.0185,
      "step": 1532060
    },
    {
      "epoch": 2.507282522600368,
      "grad_norm": 0.22639451920986176,
      "learning_rate": 5.018285089251004e-06,
      "loss": 0.0125,
      "step": 1532080
    },
    {
      "epoch": 2.507315253039021,
      "grad_norm": 0.31599000096321106,
      "learning_rate": 5.018219197037486e-06,
      "loss": 0.0196,
      "step": 1532100
    },
    {
      "epoch": 2.5073479834776746,
      "grad_norm": 0.361378014087677,
      "learning_rate": 5.01815330482397e-06,
      "loss": 0.0103,
      "step": 1532120
    },
    {
      "epoch": 2.5073807139163278,
      "grad_norm": 0.39452359080314636,
      "learning_rate": 5.0180874126104515e-06,
      "loss": 0.0093,
      "step": 1532140
    },
    {
      "epoch": 2.5074134443549814,
      "grad_norm": 0.7865803241729736,
      "learning_rate": 5.018021520396935e-06,
      "loss": 0.0172,
      "step": 1532160
    },
    {
      "epoch": 2.5074461747936345,
      "grad_norm": 0.6780367493629456,
      "learning_rate": 5.017955628183419e-06,
      "loss": 0.0178,
      "step": 1532180
    },
    {
      "epoch": 2.507478905232288,
      "grad_norm": 0.17901498079299927,
      "learning_rate": 5.0178897359699006e-06,
      "loss": 0.0129,
      "step": 1532200
    },
    {
      "epoch": 2.5075116356709413,
      "grad_norm": 0.33852019906044006,
      "learning_rate": 5.017823843756384e-06,
      "loss": 0.0082,
      "step": 1532220
    },
    {
      "epoch": 2.5075443661095944,
      "grad_norm": 0.263129860162735,
      "learning_rate": 5.017757951542867e-06,
      "loss": 0.0127,
      "step": 1532240
    },
    {
      "epoch": 2.507577096548248,
      "grad_norm": 0.4898110330104828,
      "learning_rate": 5.01769205932935e-06,
      "loss": 0.0151,
      "step": 1532260
    },
    {
      "epoch": 2.507609826986901,
      "grad_norm": 0.32817453145980835,
      "learning_rate": 5.017626167115832e-06,
      "loss": 0.0146,
      "step": 1532280
    },
    {
      "epoch": 2.5076425574255548,
      "grad_norm": 0.2378273606300354,
      "learning_rate": 5.017560274902316e-06,
      "loss": 0.0096,
      "step": 1532300
    },
    {
      "epoch": 2.507675287864208,
      "grad_norm": 0.07525243610143661,
      "learning_rate": 5.017494382688798e-06,
      "loss": 0.0118,
      "step": 1532320
    },
    {
      "epoch": 2.5077080183028615,
      "grad_norm": 0.5211851000785828,
      "learning_rate": 5.0174284904752814e-06,
      "loss": 0.0221,
      "step": 1532340
    },
    {
      "epoch": 2.5077407487415146,
      "grad_norm": 0.1287151277065277,
      "learning_rate": 5.017362598261763e-06,
      "loss": 0.0144,
      "step": 1532360
    },
    {
      "epoch": 2.507773479180168,
      "grad_norm": 0.15400399267673492,
      "learning_rate": 5.017296706048247e-06,
      "loss": 0.0147,
      "step": 1532380
    },
    {
      "epoch": 2.5078062096188214,
      "grad_norm": 0.15592136979103088,
      "learning_rate": 5.017230813834729e-06,
      "loss": 0.0099,
      "step": 1532400
    },
    {
      "epoch": 2.5078389400574745,
      "grad_norm": 0.5129163861274719,
      "learning_rate": 5.017164921621212e-06,
      "loss": 0.014,
      "step": 1532420
    },
    {
      "epoch": 2.507871670496128,
      "grad_norm": 0.12048826366662979,
      "learning_rate": 5.017099029407695e-06,
      "loss": 0.0115,
      "step": 1532440
    },
    {
      "epoch": 2.5079044009347813,
      "grad_norm": 0.5077916979789734,
      "learning_rate": 5.017033137194178e-06,
      "loss": 0.019,
      "step": 1532460
    },
    {
      "epoch": 2.507937131373435,
      "grad_norm": 0.21826167404651642,
      "learning_rate": 5.016967244980661e-06,
      "loss": 0.0167,
      "step": 1532480
    },
    {
      "epoch": 2.507969861812088,
      "grad_norm": 0.3568558096885681,
      "learning_rate": 5.016901352767144e-06,
      "loss": 0.0155,
      "step": 1532500
    },
    {
      "epoch": 2.508002592250741,
      "grad_norm": 0.36958393454551697,
      "learning_rate": 5.016835460553626e-06,
      "loss": 0.0125,
      "step": 1532520
    },
    {
      "epoch": 2.5080353226893948,
      "grad_norm": 0.3013453185558319,
      "learning_rate": 5.01676956834011e-06,
      "loss": 0.0137,
      "step": 1532540
    },
    {
      "epoch": 2.508068053128048,
      "grad_norm": 0.3420770466327667,
      "learning_rate": 5.016703676126593e-06,
      "loss": 0.0184,
      "step": 1532560
    },
    {
      "epoch": 2.5081007835667015,
      "grad_norm": 0.47681373357772827,
      "learning_rate": 5.016637783913075e-06,
      "loss": 0.0105,
      "step": 1532580
    },
    {
      "epoch": 2.5081335140053547,
      "grad_norm": 0.3011327087879181,
      "learning_rate": 5.016571891699559e-06,
      "loss": 0.0198,
      "step": 1532600
    },
    {
      "epoch": 2.5081662444440083,
      "grad_norm": 0.31812167167663574,
      "learning_rate": 5.016505999486041e-06,
      "loss": 0.0183,
      "step": 1532620
    },
    {
      "epoch": 2.5081989748826614,
      "grad_norm": 0.2252180427312851,
      "learning_rate": 5.016440107272524e-06,
      "loss": 0.0224,
      "step": 1532640
    },
    {
      "epoch": 2.5082317053213146,
      "grad_norm": 1.5096081495285034,
      "learning_rate": 5.016374215059007e-06,
      "loss": 0.0142,
      "step": 1532660
    },
    {
      "epoch": 2.508264435759968,
      "grad_norm": 0.44867268204689026,
      "learning_rate": 5.01630832284549e-06,
      "loss": 0.0169,
      "step": 1532680
    },
    {
      "epoch": 2.5082971661986213,
      "grad_norm": 0.1711280345916748,
      "learning_rate": 5.0162424306319725e-06,
      "loss": 0.0171,
      "step": 1532700
    },
    {
      "epoch": 2.5083298966372745,
      "grad_norm": 0.810080349445343,
      "learning_rate": 5.016176538418456e-06,
      "loss": 0.0222,
      "step": 1532720
    },
    {
      "epoch": 2.508362627075928,
      "grad_norm": 0.1699494570493698,
      "learning_rate": 5.016110646204938e-06,
      "loss": 0.0198,
      "step": 1532740
    },
    {
      "epoch": 2.5083953575145816,
      "grad_norm": 0.13650590181350708,
      "learning_rate": 5.0160447539914215e-06,
      "loss": 0.0147,
      "step": 1532760
    },
    {
      "epoch": 2.508428087953235,
      "grad_norm": 0.21191193163394928,
      "learning_rate": 5.015978861777903e-06,
      "loss": 0.015,
      "step": 1532780
    },
    {
      "epoch": 2.508460818391888,
      "grad_norm": 0.08805693686008453,
      "learning_rate": 5.015912969564387e-06,
      "loss": 0.0191,
      "step": 1532800
    },
    {
      "epoch": 2.5084935488305415,
      "grad_norm": 0.18610692024230957,
      "learning_rate": 5.01584707735087e-06,
      "loss": 0.0135,
      "step": 1532820
    },
    {
      "epoch": 2.5085262792691947,
      "grad_norm": 1.1008315086364746,
      "learning_rate": 5.0157811851373525e-06,
      "loss": 0.0131,
      "step": 1532840
    },
    {
      "epoch": 2.508559009707848,
      "grad_norm": 0.6495697498321533,
      "learning_rate": 5.015715292923835e-06,
      "loss": 0.014,
      "step": 1532860
    },
    {
      "epoch": 2.5085917401465014,
      "grad_norm": 0.19664902985095978,
      "learning_rate": 5.015649400710319e-06,
      "loss": 0.0109,
      "step": 1532880
    },
    {
      "epoch": 2.508624470585155,
      "grad_norm": 0.4570816159248352,
      "learning_rate": 5.0155835084968015e-06,
      "loss": 0.0132,
      "step": 1532900
    },
    {
      "epoch": 2.508657201023808,
      "grad_norm": 0.29616600275039673,
      "learning_rate": 5.015517616283284e-06,
      "loss": 0.0154,
      "step": 1532920
    },
    {
      "epoch": 2.5086899314624613,
      "grad_norm": 0.3271975815296173,
      "learning_rate": 5.015451724069768e-06,
      "loss": 0.016,
      "step": 1532940
    },
    {
      "epoch": 2.508722661901115,
      "grad_norm": 0.0982716828584671,
      "learning_rate": 5.01538583185625e-06,
      "loss": 0.0122,
      "step": 1532960
    },
    {
      "epoch": 2.508755392339768,
      "grad_norm": 1.4834067821502686,
      "learning_rate": 5.015319939642733e-06,
      "loss": 0.009,
      "step": 1532980
    },
    {
      "epoch": 2.5087881227784212,
      "grad_norm": 0.13253942131996155,
      "learning_rate": 5.015254047429215e-06,
      "loss": 0.0159,
      "step": 1533000
    },
    {
      "epoch": 2.508820853217075,
      "grad_norm": 0.6453529000282288,
      "learning_rate": 5.015188155215699e-06,
      "loss": 0.0132,
      "step": 1533020
    },
    {
      "epoch": 2.5088535836557284,
      "grad_norm": 0.8027135729789734,
      "learning_rate": 5.0151222630021816e-06,
      "loss": 0.0158,
      "step": 1533040
    },
    {
      "epoch": 2.5088863140943816,
      "grad_norm": 0.3660275936126709,
      "learning_rate": 5.015056370788664e-06,
      "loss": 0.0156,
      "step": 1533060
    },
    {
      "epoch": 2.5089190445330347,
      "grad_norm": 0.44840651750564575,
      "learning_rate": 5.014990478575147e-06,
      "loss": 0.0094,
      "step": 1533080
    },
    {
      "epoch": 2.5089517749716883,
      "grad_norm": 0.4713965058326721,
      "learning_rate": 5.014924586361631e-06,
      "loss": 0.0175,
      "step": 1533100
    },
    {
      "epoch": 2.5089845054103415,
      "grad_norm": 0.6185066103935242,
      "learning_rate": 5.0148586941481125e-06,
      "loss": 0.0165,
      "step": 1533120
    },
    {
      "epoch": 2.5090172358489946,
      "grad_norm": 1.9166280031204224,
      "learning_rate": 5.014792801934596e-06,
      "loss": 0.0096,
      "step": 1533140
    },
    {
      "epoch": 2.509049966287648,
      "grad_norm": 1.196279764175415,
      "learning_rate": 5.014726909721078e-06,
      "loss": 0.0142,
      "step": 1533160
    },
    {
      "epoch": 2.509082696726302,
      "grad_norm": 0.04781929776072502,
      "learning_rate": 5.014661017507562e-06,
      "loss": 0.0141,
      "step": 1533180
    },
    {
      "epoch": 2.509115427164955,
      "grad_norm": 1.0478606224060059,
      "learning_rate": 5.0145951252940435e-06,
      "loss": 0.0198,
      "step": 1533200
    },
    {
      "epoch": 2.509148157603608,
      "grad_norm": 0.20832794904708862,
      "learning_rate": 5.014529233080527e-06,
      "loss": 0.006,
      "step": 1533220
    },
    {
      "epoch": 2.5091808880422617,
      "grad_norm": 0.9056689143180847,
      "learning_rate": 5.014463340867011e-06,
      "loss": 0.0142,
      "step": 1533240
    },
    {
      "epoch": 2.509213618480915,
      "grad_norm": 1.6919350624084473,
      "learning_rate": 5.014397448653493e-06,
      "loss": 0.0127,
      "step": 1533260
    },
    {
      "epoch": 2.509246348919568,
      "grad_norm": 0.22526395320892334,
      "learning_rate": 5.014331556439976e-06,
      "loss": 0.0166,
      "step": 1533280
    },
    {
      "epoch": 2.5092790793582216,
      "grad_norm": 0.06473113596439362,
      "learning_rate": 5.014265664226459e-06,
      "loss": 0.0101,
      "step": 1533300
    },
    {
      "epoch": 2.5093118097968747,
      "grad_norm": 0.14090639352798462,
      "learning_rate": 5.0141997720129425e-06,
      "loss": 0.0108,
      "step": 1533320
    },
    {
      "epoch": 2.5093445402355283,
      "grad_norm": 0.27487868070602417,
      "learning_rate": 5.014133879799424e-06,
      "loss": 0.0126,
      "step": 1533340
    },
    {
      "epoch": 2.5093772706741815,
      "grad_norm": 0.1769920438528061,
      "learning_rate": 5.014067987585908e-06,
      "loss": 0.0138,
      "step": 1533360
    },
    {
      "epoch": 2.509410001112835,
      "grad_norm": 0.12194550037384033,
      "learning_rate": 5.01400209537239e-06,
      "loss": 0.0121,
      "step": 1533380
    },
    {
      "epoch": 2.5094427315514882,
      "grad_norm": 0.43073827028274536,
      "learning_rate": 5.0139362031588734e-06,
      "loss": 0.0214,
      "step": 1533400
    },
    {
      "epoch": 2.5094754619901414,
      "grad_norm": 0.5765200853347778,
      "learning_rate": 5.013870310945355e-06,
      "loss": 0.0155,
      "step": 1533420
    },
    {
      "epoch": 2.509508192428795,
      "grad_norm": 0.3609510362148285,
      "learning_rate": 5.013804418731839e-06,
      "loss": 0.0124,
      "step": 1533440
    },
    {
      "epoch": 2.509540922867448,
      "grad_norm": 0.2435927838087082,
      "learning_rate": 5.013738526518322e-06,
      "loss": 0.0138,
      "step": 1533460
    },
    {
      "epoch": 2.5095736533061017,
      "grad_norm": 0.18649780750274658,
      "learning_rate": 5.013672634304804e-06,
      "loss": 0.0159,
      "step": 1533480
    },
    {
      "epoch": 2.509606383744755,
      "grad_norm": 0.24778437614440918,
      "learning_rate": 5.013606742091287e-06,
      "loss": 0.0126,
      "step": 1533500
    },
    {
      "epoch": 2.5096391141834085,
      "grad_norm": 4.350686550140381,
      "learning_rate": 5.013540849877771e-06,
      "loss": 0.0179,
      "step": 1533520
    },
    {
      "epoch": 2.5096718446220616,
      "grad_norm": 0.2414805144071579,
      "learning_rate": 5.013474957664253e-06,
      "loss": 0.0143,
      "step": 1533540
    },
    {
      "epoch": 2.5097045750607148,
      "grad_norm": 3.653301239013672,
      "learning_rate": 5.013409065450736e-06,
      "loss": 0.0142,
      "step": 1533560
    },
    {
      "epoch": 2.5097373054993684,
      "grad_norm": 0.7738872766494751,
      "learning_rate": 5.013343173237218e-06,
      "loss": 0.0101,
      "step": 1533580
    },
    {
      "epoch": 2.5097700359380215,
      "grad_norm": 0.6903261542320251,
      "learning_rate": 5.013277281023702e-06,
      "loss": 0.0159,
      "step": 1533600
    },
    {
      "epoch": 2.509802766376675,
      "grad_norm": 0.10494592785835266,
      "learning_rate": 5.013211388810185e-06,
      "loss": 0.0083,
      "step": 1533620
    },
    {
      "epoch": 2.5098354968153282,
      "grad_norm": 0.5046267509460449,
      "learning_rate": 5.013145496596667e-06,
      "loss": 0.0159,
      "step": 1533640
    },
    {
      "epoch": 2.509868227253982,
      "grad_norm": 0.12604741752147675,
      "learning_rate": 5.013079604383151e-06,
      "loss": 0.0095,
      "step": 1533660
    },
    {
      "epoch": 2.509900957692635,
      "grad_norm": 0.7737045884132385,
      "learning_rate": 5.0130137121696335e-06,
      "loss": 0.0143,
      "step": 1533680
    },
    {
      "epoch": 2.509933688131288,
      "grad_norm": 0.37557289004325867,
      "learning_rate": 5.012947819956116e-06,
      "loss": 0.0121,
      "step": 1533700
    },
    {
      "epoch": 2.5099664185699417,
      "grad_norm": 0.9410088062286377,
      "learning_rate": 5.012881927742599e-06,
      "loss": 0.0126,
      "step": 1533720
    },
    {
      "epoch": 2.509999149008595,
      "grad_norm": 0.3969374895095825,
      "learning_rate": 5.0128160355290825e-06,
      "loss": 0.016,
      "step": 1533740
    },
    {
      "epoch": 2.5100318794472485,
      "grad_norm": 2.2031354904174805,
      "learning_rate": 5.0127501433155644e-06,
      "loss": 0.0202,
      "step": 1533760
    },
    {
      "epoch": 2.5100646098859016,
      "grad_norm": 0.1575981080532074,
      "learning_rate": 5.012684251102048e-06,
      "loss": 0.0114,
      "step": 1533780
    },
    {
      "epoch": 2.5100973403245552,
      "grad_norm": 0.37752944231033325,
      "learning_rate": 5.01261835888853e-06,
      "loss": 0.0165,
      "step": 1533800
    },
    {
      "epoch": 2.5101300707632084,
      "grad_norm": 0.5672827363014221,
      "learning_rate": 5.0125524666750135e-06,
      "loss": 0.015,
      "step": 1533820
    },
    {
      "epoch": 2.5101628012018615,
      "grad_norm": 0.46054452657699585,
      "learning_rate": 5.012486574461496e-06,
      "loss": 0.0137,
      "step": 1533840
    },
    {
      "epoch": 2.510195531640515,
      "grad_norm": 0.3316629230976105,
      "learning_rate": 5.012420682247979e-06,
      "loss": 0.0128,
      "step": 1533860
    },
    {
      "epoch": 2.5102282620791683,
      "grad_norm": 0.32371190190315247,
      "learning_rate": 5.012354790034462e-06,
      "loss": 0.0216,
      "step": 1533880
    },
    {
      "epoch": 2.510260992517822,
      "grad_norm": 0.15184101462364197,
      "learning_rate": 5.012288897820945e-06,
      "loss": 0.0148,
      "step": 1533900
    },
    {
      "epoch": 2.510293722956475,
      "grad_norm": 0.2604772448539734,
      "learning_rate": 5.012223005607427e-06,
      "loss": 0.0131,
      "step": 1533920
    },
    {
      "epoch": 2.5103264533951286,
      "grad_norm": 0.16800904273986816,
      "learning_rate": 5.012157113393911e-06,
      "loss": 0.0155,
      "step": 1533940
    },
    {
      "epoch": 2.5103591838337818,
      "grad_norm": 0.6602767109870911,
      "learning_rate": 5.012091221180394e-06,
      "loss": 0.0172,
      "step": 1533960
    },
    {
      "epoch": 2.510391914272435,
      "grad_norm": 0.8900012373924255,
      "learning_rate": 5.012025328966876e-06,
      "loss": 0.0137,
      "step": 1533980
    },
    {
      "epoch": 2.5104246447110885,
      "grad_norm": 0.4865458309650421,
      "learning_rate": 5.01195943675336e-06,
      "loss": 0.0172,
      "step": 1534000
    },
    {
      "epoch": 2.5104573751497417,
      "grad_norm": 0.08659122884273529,
      "learning_rate": 5.011893544539842e-06,
      "loss": 0.0097,
      "step": 1534020
    },
    {
      "epoch": 2.5104901055883952,
      "grad_norm": 0.8689159750938416,
      "learning_rate": 5.011827652326325e-06,
      "loss": 0.0123,
      "step": 1534040
    },
    {
      "epoch": 2.5105228360270484,
      "grad_norm": 0.6870903372764587,
      "learning_rate": 5.011761760112808e-06,
      "loss": 0.0185,
      "step": 1534060
    },
    {
      "epoch": 2.510555566465702,
      "grad_norm": 0.755878746509552,
      "learning_rate": 5.011695867899291e-06,
      "loss": 0.0123,
      "step": 1534080
    },
    {
      "epoch": 2.510588296904355,
      "grad_norm": 0.2938791811466217,
      "learning_rate": 5.0116299756857736e-06,
      "loss": 0.0191,
      "step": 1534100
    },
    {
      "epoch": 2.5106210273430083,
      "grad_norm": 0.1743682473897934,
      "learning_rate": 5.011564083472257e-06,
      "loss": 0.0085,
      "step": 1534120
    },
    {
      "epoch": 2.510653757781662,
      "grad_norm": 1.1533674001693726,
      "learning_rate": 5.011498191258739e-06,
      "loss": 0.0154,
      "step": 1534140
    },
    {
      "epoch": 2.510686488220315,
      "grad_norm": 0.11739885807037354,
      "learning_rate": 5.011432299045223e-06,
      "loss": 0.0099,
      "step": 1534160
    },
    {
      "epoch": 2.5107192186589686,
      "grad_norm": 0.6108787059783936,
      "learning_rate": 5.0113664068317045e-06,
      "loss": 0.0161,
      "step": 1534180
    },
    {
      "epoch": 2.510751949097622,
      "grad_norm": 0.6708571910858154,
      "learning_rate": 5.011300514618188e-06,
      "loss": 0.0153,
      "step": 1534200
    },
    {
      "epoch": 2.5107846795362754,
      "grad_norm": 1.0706113576889038,
      "learning_rate": 5.01123462240467e-06,
      "loss": 0.0116,
      "step": 1534220
    },
    {
      "epoch": 2.5108174099749285,
      "grad_norm": 0.7190960049629211,
      "learning_rate": 5.0111687301911536e-06,
      "loss": 0.0182,
      "step": 1534240
    },
    {
      "epoch": 2.5108501404135817,
      "grad_norm": 0.6609637141227722,
      "learning_rate": 5.011102837977636e-06,
      "loss": 0.0173,
      "step": 1534260
    },
    {
      "epoch": 2.5108828708522353,
      "grad_norm": 0.07715420424938202,
      "learning_rate": 5.011036945764119e-06,
      "loss": 0.0127,
      "step": 1534280
    },
    {
      "epoch": 2.5109156012908884,
      "grad_norm": 0.20580926537513733,
      "learning_rate": 5.010971053550603e-06,
      "loss": 0.0138,
      "step": 1534300
    },
    {
      "epoch": 2.5109483317295416,
      "grad_norm": 0.27661263942718506,
      "learning_rate": 5.010905161337085e-06,
      "loss": 0.0145,
      "step": 1534320
    },
    {
      "epoch": 2.510981062168195,
      "grad_norm": 0.437580943107605,
      "learning_rate": 5.010839269123569e-06,
      "loss": 0.0209,
      "step": 1534340
    },
    {
      "epoch": 2.5110137926068488,
      "grad_norm": 0.3298373222351074,
      "learning_rate": 5.010773376910051e-06,
      "loss": 0.0113,
      "step": 1534360
    },
    {
      "epoch": 2.511046523045502,
      "grad_norm": 0.8001402616500854,
      "learning_rate": 5.0107074846965344e-06,
      "loss": 0.0165,
      "step": 1534380
    },
    {
      "epoch": 2.511079253484155,
      "grad_norm": 0.13648685812950134,
      "learning_rate": 5.010641592483016e-06,
      "loss": 0.0148,
      "step": 1534400
    },
    {
      "epoch": 2.5111119839228087,
      "grad_norm": 0.4949638247489929,
      "learning_rate": 5.0105757002695e-06,
      "loss": 0.012,
      "step": 1534420
    },
    {
      "epoch": 2.511144714361462,
      "grad_norm": 0.6061484813690186,
      "learning_rate": 5.010509808055982e-06,
      "loss": 0.0113,
      "step": 1534440
    },
    {
      "epoch": 2.511177444800115,
      "grad_norm": 0.5531487464904785,
      "learning_rate": 5.010443915842465e-06,
      "loss": 0.0163,
      "step": 1534460
    },
    {
      "epoch": 2.5112101752387685,
      "grad_norm": 0.4618166387081146,
      "learning_rate": 5.010378023628948e-06,
      "loss": 0.021,
      "step": 1534480
    },
    {
      "epoch": 2.511242905677422,
      "grad_norm": 1.2096322774887085,
      "learning_rate": 5.010312131415431e-06,
      "loss": 0.0138,
      "step": 1534500
    },
    {
      "epoch": 2.5112756361160753,
      "grad_norm": 0.11966162919998169,
      "learning_rate": 5.010246239201914e-06,
      "loss": 0.02,
      "step": 1534520
    },
    {
      "epoch": 2.5113083665547284,
      "grad_norm": 0.31580308079719543,
      "learning_rate": 5.010180346988397e-06,
      "loss": 0.0189,
      "step": 1534540
    },
    {
      "epoch": 2.511341096993382,
      "grad_norm": 0.5337337255477905,
      "learning_rate": 5.010114454774879e-06,
      "loss": 0.0126,
      "step": 1534560
    },
    {
      "epoch": 2.511373827432035,
      "grad_norm": 0.3125459551811218,
      "learning_rate": 5.010048562561363e-06,
      "loss": 0.0117,
      "step": 1534580
    },
    {
      "epoch": 2.5114065578706883,
      "grad_norm": 0.23755738139152527,
      "learning_rate": 5.009982670347845e-06,
      "loss": 0.0129,
      "step": 1534600
    },
    {
      "epoch": 2.511439288309342,
      "grad_norm": 0.5218738913536072,
      "learning_rate": 5.009916778134328e-06,
      "loss": 0.0167,
      "step": 1534620
    },
    {
      "epoch": 2.5114720187479955,
      "grad_norm": 0.310764878988266,
      "learning_rate": 5.009850885920812e-06,
      "loss": 0.0127,
      "step": 1534640
    },
    {
      "epoch": 2.5115047491866487,
      "grad_norm": 0.3309517502784729,
      "learning_rate": 5.009784993707294e-06,
      "loss": 0.0191,
      "step": 1534660
    },
    {
      "epoch": 2.511537479625302,
      "grad_norm": 3.1845943927764893,
      "learning_rate": 5.009719101493777e-06,
      "loss": 0.0181,
      "step": 1534680
    },
    {
      "epoch": 2.5115702100639554,
      "grad_norm": 0.09345706552267075,
      "learning_rate": 5.00965320928026e-06,
      "loss": 0.0161,
      "step": 1534700
    },
    {
      "epoch": 2.5116029405026086,
      "grad_norm": 0.36396199464797974,
      "learning_rate": 5.009587317066743e-06,
      "loss": 0.0175,
      "step": 1534720
    },
    {
      "epoch": 2.5116356709412617,
      "grad_norm": 0.37525954842567444,
      "learning_rate": 5.0095214248532255e-06,
      "loss": 0.0216,
      "step": 1534740
    },
    {
      "epoch": 2.5116684013799153,
      "grad_norm": 0.18964232504367828,
      "learning_rate": 5.009455532639709e-06,
      "loss": 0.0166,
      "step": 1534760
    },
    {
      "epoch": 2.5117011318185685,
      "grad_norm": 1.1735405921936035,
      "learning_rate": 5.009389640426191e-06,
      "loss": 0.0169,
      "step": 1534780
    },
    {
      "epoch": 2.511733862257222,
      "grad_norm": 0.31075146794319153,
      "learning_rate": 5.0093237482126745e-06,
      "loss": 0.0075,
      "step": 1534800
    },
    {
      "epoch": 2.511766592695875,
      "grad_norm": 0.7014963030815125,
      "learning_rate": 5.009257855999156e-06,
      "loss": 0.013,
      "step": 1534820
    },
    {
      "epoch": 2.511799323134529,
      "grad_norm": 0.5183954238891602,
      "learning_rate": 5.00919196378564e-06,
      "loss": 0.0179,
      "step": 1534840
    },
    {
      "epoch": 2.511832053573182,
      "grad_norm": 0.4971461296081543,
      "learning_rate": 5.009126071572123e-06,
      "loss": 0.0184,
      "step": 1534860
    },
    {
      "epoch": 2.511864784011835,
      "grad_norm": 0.40060359239578247,
      "learning_rate": 5.0090601793586055e-06,
      "loss": 0.0116,
      "step": 1534880
    },
    {
      "epoch": 2.5118975144504887,
      "grad_norm": 0.24122962355613708,
      "learning_rate": 5.008994287145088e-06,
      "loss": 0.0129,
      "step": 1534900
    },
    {
      "epoch": 2.511930244889142,
      "grad_norm": 0.5087289810180664,
      "learning_rate": 5.008928394931572e-06,
      "loss": 0.0092,
      "step": 1534920
    },
    {
      "epoch": 2.5119629753277954,
      "grad_norm": 0.47288617491722107,
      "learning_rate": 5.008862502718054e-06,
      "loss": 0.0148,
      "step": 1534940
    },
    {
      "epoch": 2.5119957057664486,
      "grad_norm": 0.34504541754722595,
      "learning_rate": 5.008796610504537e-06,
      "loss": 0.0101,
      "step": 1534960
    },
    {
      "epoch": 2.512028436205102,
      "grad_norm": 0.2868800461292267,
      "learning_rate": 5.008730718291019e-06,
      "loss": 0.0156,
      "step": 1534980
    },
    {
      "epoch": 2.5120611666437553,
      "grad_norm": 1.137663722038269,
      "learning_rate": 5.008664826077503e-06,
      "loss": 0.0151,
      "step": 1535000
    },
    {
      "epoch": 2.5120938970824085,
      "grad_norm": 0.7107380628585815,
      "learning_rate": 5.008598933863986e-06,
      "loss": 0.0183,
      "step": 1535020
    },
    {
      "epoch": 2.512126627521062,
      "grad_norm": 0.2144440859556198,
      "learning_rate": 5.008533041650468e-06,
      "loss": 0.0112,
      "step": 1535040
    },
    {
      "epoch": 2.5121593579597152,
      "grad_norm": 0.8105236887931824,
      "learning_rate": 5.008467149436952e-06,
      "loss": 0.0185,
      "step": 1535060
    },
    {
      "epoch": 2.512192088398369,
      "grad_norm": 0.39896929264068604,
      "learning_rate": 5.0084012572234346e-06,
      "loss": 0.0143,
      "step": 1535080
    },
    {
      "epoch": 2.512224818837022,
      "grad_norm": 0.26985251903533936,
      "learning_rate": 5.008335365009917e-06,
      "loss": 0.0167,
      "step": 1535100
    },
    {
      "epoch": 2.5122575492756756,
      "grad_norm": 0.18540123105049133,
      "learning_rate": 5.0082694727964e-06,
      "loss": 0.0141,
      "step": 1535120
    },
    {
      "epoch": 2.5122902797143287,
      "grad_norm": 0.31617772579193115,
      "learning_rate": 5.008203580582884e-06,
      "loss": 0.0148,
      "step": 1535140
    },
    {
      "epoch": 2.512323010152982,
      "grad_norm": 0.27445387840270996,
      "learning_rate": 5.0081376883693655e-06,
      "loss": 0.0112,
      "step": 1535160
    },
    {
      "epoch": 2.5123557405916355,
      "grad_norm": 0.4549281597137451,
      "learning_rate": 5.008071796155849e-06,
      "loss": 0.0153,
      "step": 1535180
    },
    {
      "epoch": 2.5123884710302886,
      "grad_norm": 0.19428883492946625,
      "learning_rate": 5.008005903942331e-06,
      "loss": 0.0109,
      "step": 1535200
    },
    {
      "epoch": 2.512421201468942,
      "grad_norm": 0.33228400349617004,
      "learning_rate": 5.007940011728815e-06,
      "loss": 0.0162,
      "step": 1535220
    },
    {
      "epoch": 2.5124539319075954,
      "grad_norm": 0.5012333393096924,
      "learning_rate": 5.0078741195152965e-06,
      "loss": 0.0134,
      "step": 1535240
    },
    {
      "epoch": 2.512486662346249,
      "grad_norm": 0.378890722990036,
      "learning_rate": 5.00780822730178e-06,
      "loss": 0.0175,
      "step": 1535260
    },
    {
      "epoch": 2.512519392784902,
      "grad_norm": 0.6425761580467224,
      "learning_rate": 5.007742335088263e-06,
      "loss": 0.0168,
      "step": 1535280
    },
    {
      "epoch": 2.5125521232235553,
      "grad_norm": 0.7387006878852844,
      "learning_rate": 5.0076764428747456e-06,
      "loss": 0.0123,
      "step": 1535300
    },
    {
      "epoch": 2.512584853662209,
      "grad_norm": 0.5291526913642883,
      "learning_rate": 5.007610550661228e-06,
      "loss": 0.0148,
      "step": 1535320
    },
    {
      "epoch": 2.512617584100862,
      "grad_norm": 0.4396883547306061,
      "learning_rate": 5.007544658447712e-06,
      "loss": 0.026,
      "step": 1535340
    },
    {
      "epoch": 2.5126503145395156,
      "grad_norm": 0.8048451542854309,
      "learning_rate": 5.007478766234195e-06,
      "loss": 0.0153,
      "step": 1535360
    },
    {
      "epoch": 2.5126830449781687,
      "grad_norm": 0.49760475754737854,
      "learning_rate": 5.007412874020677e-06,
      "loss": 0.021,
      "step": 1535380
    },
    {
      "epoch": 2.5127157754168223,
      "grad_norm": 0.6811778545379639,
      "learning_rate": 5.007346981807161e-06,
      "loss": 0.0121,
      "step": 1535400
    },
    {
      "epoch": 2.5127485058554755,
      "grad_norm": 0.2561943829059601,
      "learning_rate": 5.007281089593643e-06,
      "loss": 0.0178,
      "step": 1535420
    },
    {
      "epoch": 2.5127812362941286,
      "grad_norm": 1.492124319076538,
      "learning_rate": 5.0072151973801264e-06,
      "loss": 0.014,
      "step": 1535440
    },
    {
      "epoch": 2.5128139667327822,
      "grad_norm": 0.11336164176464081,
      "learning_rate": 5.007149305166608e-06,
      "loss": 0.0111,
      "step": 1535460
    },
    {
      "epoch": 2.5128466971714354,
      "grad_norm": 0.19816544651985168,
      "learning_rate": 5.007083412953092e-06,
      "loss": 0.0127,
      "step": 1535480
    },
    {
      "epoch": 2.512879427610089,
      "grad_norm": 0.11650272458791733,
      "learning_rate": 5.007017520739575e-06,
      "loss": 0.017,
      "step": 1535500
    },
    {
      "epoch": 2.512912158048742,
      "grad_norm": 0.20496459305286407,
      "learning_rate": 5.006951628526057e-06,
      "loss": 0.0152,
      "step": 1535520
    },
    {
      "epoch": 2.5129448884873957,
      "grad_norm": 0.8008539080619812,
      "learning_rate": 5.00688573631254e-06,
      "loss": 0.0173,
      "step": 1535540
    },
    {
      "epoch": 2.512977618926049,
      "grad_norm": 0.945976734161377,
      "learning_rate": 5.006819844099024e-06,
      "loss": 0.0186,
      "step": 1535560
    },
    {
      "epoch": 2.513010349364702,
      "grad_norm": 0.2852455675601959,
      "learning_rate": 5.006753951885506e-06,
      "loss": 0.0139,
      "step": 1535580
    },
    {
      "epoch": 2.5130430798033556,
      "grad_norm": 0.7040475606918335,
      "learning_rate": 5.006688059671989e-06,
      "loss": 0.0128,
      "step": 1535600
    },
    {
      "epoch": 2.5130758102420088,
      "grad_norm": 0.13768933713436127,
      "learning_rate": 5.006622167458471e-06,
      "loss": 0.0163,
      "step": 1535620
    },
    {
      "epoch": 2.5131085406806624,
      "grad_norm": 0.19319982826709747,
      "learning_rate": 5.006556275244955e-06,
      "loss": 0.0086,
      "step": 1535640
    },
    {
      "epoch": 2.5131412711193155,
      "grad_norm": 0.6212846636772156,
      "learning_rate": 5.006490383031437e-06,
      "loss": 0.0148,
      "step": 1535660
    },
    {
      "epoch": 2.513174001557969,
      "grad_norm": 0.10907387733459473,
      "learning_rate": 5.00642449081792e-06,
      "loss": 0.0148,
      "step": 1535680
    },
    {
      "epoch": 2.5132067319966223,
      "grad_norm": 0.462016224861145,
      "learning_rate": 5.006358598604404e-06,
      "loss": 0.0131,
      "step": 1535700
    },
    {
      "epoch": 2.5132394624352754,
      "grad_norm": 0.44351083040237427,
      "learning_rate": 5.0062927063908865e-06,
      "loss": 0.0112,
      "step": 1535720
    },
    {
      "epoch": 2.513272192873929,
      "grad_norm": 0.2726013660430908,
      "learning_rate": 5.006226814177369e-06,
      "loss": 0.0209,
      "step": 1535740
    },
    {
      "epoch": 2.513304923312582,
      "grad_norm": 0.47038811445236206,
      "learning_rate": 5.006160921963852e-06,
      "loss": 0.0178,
      "step": 1535760
    },
    {
      "epoch": 2.5133376537512353,
      "grad_norm": 0.3781123757362366,
      "learning_rate": 5.0060950297503355e-06,
      "loss": 0.0114,
      "step": 1535780
    },
    {
      "epoch": 2.513370384189889,
      "grad_norm": 0.6215547919273376,
      "learning_rate": 5.0060291375368174e-06,
      "loss": 0.0146,
      "step": 1535800
    },
    {
      "epoch": 2.5134031146285425,
      "grad_norm": 0.15399311482906342,
      "learning_rate": 5.005963245323301e-06,
      "loss": 0.009,
      "step": 1535820
    },
    {
      "epoch": 2.5134358450671956,
      "grad_norm": 0.17021335661411285,
      "learning_rate": 5.005897353109783e-06,
      "loss": 0.0196,
      "step": 1535840
    },
    {
      "epoch": 2.513468575505849,
      "grad_norm": 0.5153183937072754,
      "learning_rate": 5.0058314608962665e-06,
      "loss": 0.0113,
      "step": 1535860
    },
    {
      "epoch": 2.5135013059445024,
      "grad_norm": 0.4570302963256836,
      "learning_rate": 5.005765568682749e-06,
      "loss": 0.0115,
      "step": 1535880
    },
    {
      "epoch": 2.5135340363831555,
      "grad_norm": 0.29288727045059204,
      "learning_rate": 5.005699676469232e-06,
      "loss": 0.0104,
      "step": 1535900
    },
    {
      "epoch": 2.5135667668218087,
      "grad_norm": 0.2889600396156311,
      "learning_rate": 5.005633784255715e-06,
      "loss": 0.0165,
      "step": 1535920
    },
    {
      "epoch": 2.5135994972604623,
      "grad_norm": 0.8158196210861206,
      "learning_rate": 5.005567892042198e-06,
      "loss": 0.0087,
      "step": 1535940
    },
    {
      "epoch": 2.513632227699116,
      "grad_norm": 0.2041928619146347,
      "learning_rate": 5.00550199982868e-06,
      "loss": 0.0154,
      "step": 1535960
    },
    {
      "epoch": 2.513664958137769,
      "grad_norm": 0.10790783166885376,
      "learning_rate": 5.005436107615164e-06,
      "loss": 0.0103,
      "step": 1535980
    },
    {
      "epoch": 2.513697688576422,
      "grad_norm": 0.16835911571979523,
      "learning_rate": 5.005370215401646e-06,
      "loss": 0.0155,
      "step": 1536000
    },
    {
      "epoch": 2.5137304190150758,
      "grad_norm": 0.18676845729351044,
      "learning_rate": 5.005304323188129e-06,
      "loss": 0.0197,
      "step": 1536020
    },
    {
      "epoch": 2.513763149453729,
      "grad_norm": 0.2189824879169464,
      "learning_rate": 5.005238430974612e-06,
      "loss": 0.0206,
      "step": 1536040
    },
    {
      "epoch": 2.513795879892382,
      "grad_norm": 0.2630380392074585,
      "learning_rate": 5.005172538761095e-06,
      "loss": 0.0175,
      "step": 1536060
    },
    {
      "epoch": 2.5138286103310357,
      "grad_norm": 0.13716022670269012,
      "learning_rate": 5.005106646547578e-06,
      "loss": 0.0178,
      "step": 1536080
    },
    {
      "epoch": 2.5138613407696893,
      "grad_norm": 0.7681803703308105,
      "learning_rate": 5.005040754334061e-06,
      "loss": 0.0169,
      "step": 1536100
    },
    {
      "epoch": 2.5138940712083424,
      "grad_norm": 0.18922971189022064,
      "learning_rate": 5.004974862120544e-06,
      "loss": 0.0209,
      "step": 1536120
    },
    {
      "epoch": 2.5139268016469956,
      "grad_norm": 0.3075014054775238,
      "learning_rate": 5.0049089699070266e-06,
      "loss": 0.014,
      "step": 1536140
    },
    {
      "epoch": 2.513959532085649,
      "grad_norm": 0.2707075774669647,
      "learning_rate": 5.00484307769351e-06,
      "loss": 0.0125,
      "step": 1536160
    },
    {
      "epoch": 2.5139922625243023,
      "grad_norm": 0.7675715088844299,
      "learning_rate": 5.004777185479992e-06,
      "loss": 0.0128,
      "step": 1536180
    },
    {
      "epoch": 2.5140249929629555,
      "grad_norm": 0.8155969977378845,
      "learning_rate": 5.004711293266476e-06,
      "loss": 0.0207,
      "step": 1536200
    },
    {
      "epoch": 2.514057723401609,
      "grad_norm": 0.25336575508117676,
      "learning_rate": 5.0046454010529575e-06,
      "loss": 0.0093,
      "step": 1536220
    },
    {
      "epoch": 2.5140904538402626,
      "grad_norm": 0.3421863615512848,
      "learning_rate": 5.004579508839441e-06,
      "loss": 0.0149,
      "step": 1536240
    },
    {
      "epoch": 2.514123184278916,
      "grad_norm": 0.2342682182788849,
      "learning_rate": 5.004513616625923e-06,
      "loss": 0.0103,
      "step": 1536260
    },
    {
      "epoch": 2.514155914717569,
      "grad_norm": 0.367249995470047,
      "learning_rate": 5.004447724412407e-06,
      "loss": 0.0151,
      "step": 1536280
    },
    {
      "epoch": 2.5141886451562225,
      "grad_norm": 0.5683166980743408,
      "learning_rate": 5.004381832198889e-06,
      "loss": 0.013,
      "step": 1536300
    },
    {
      "epoch": 2.5142213755948757,
      "grad_norm": 0.12903550267219543,
      "learning_rate": 5.004315939985372e-06,
      "loss": 0.0109,
      "step": 1536320
    },
    {
      "epoch": 2.514254106033529,
      "grad_norm": 0.33194345235824585,
      "learning_rate": 5.004250047771855e-06,
      "loss": 0.0135,
      "step": 1536340
    },
    {
      "epoch": 2.5142868364721824,
      "grad_norm": 0.3728998601436615,
      "learning_rate": 5.004184155558338e-06,
      "loss": 0.0093,
      "step": 1536360
    },
    {
      "epoch": 2.5143195669108356,
      "grad_norm": 0.3766002953052521,
      "learning_rate": 5.00411826334482e-06,
      "loss": 0.0185,
      "step": 1536380
    },
    {
      "epoch": 2.514352297349489,
      "grad_norm": 0.2188730537891388,
      "learning_rate": 5.004052371131304e-06,
      "loss": 0.0194,
      "step": 1536400
    },
    {
      "epoch": 2.5143850277881423,
      "grad_norm": 0.6597800254821777,
      "learning_rate": 5.0039864789177875e-06,
      "loss": 0.016,
      "step": 1536420
    },
    {
      "epoch": 2.514417758226796,
      "grad_norm": 0.2484525889158249,
      "learning_rate": 5.003920586704269e-06,
      "loss": 0.0128,
      "step": 1536440
    },
    {
      "epoch": 2.514450488665449,
      "grad_norm": 0.579725980758667,
      "learning_rate": 5.003854694490753e-06,
      "loss": 0.0166,
      "step": 1536460
    },
    {
      "epoch": 2.514483219104102,
      "grad_norm": 1.0696828365325928,
      "learning_rate": 5.003788802277235e-06,
      "loss": 0.0189,
      "step": 1536480
    },
    {
      "epoch": 2.514515949542756,
      "grad_norm": 0.3878817558288574,
      "learning_rate": 5.003722910063718e-06,
      "loss": 0.014,
      "step": 1536500
    },
    {
      "epoch": 2.514548679981409,
      "grad_norm": 0.336654931306839,
      "learning_rate": 5.003657017850201e-06,
      "loss": 0.0124,
      "step": 1536520
    },
    {
      "epoch": 2.5145814104200626,
      "grad_norm": 0.2349410504102707,
      "learning_rate": 5.003591125636684e-06,
      "loss": 0.0133,
      "step": 1536540
    },
    {
      "epoch": 2.5146141408587157,
      "grad_norm": 0.4305466413497925,
      "learning_rate": 5.003525233423167e-06,
      "loss": 0.0095,
      "step": 1536560
    },
    {
      "epoch": 2.5146468712973693,
      "grad_norm": 0.310892254114151,
      "learning_rate": 5.00345934120965e-06,
      "loss": 0.0136,
      "step": 1536580
    },
    {
      "epoch": 2.5146796017360225,
      "grad_norm": 0.3111381232738495,
      "learning_rate": 5.003393448996132e-06,
      "loss": 0.0174,
      "step": 1536600
    },
    {
      "epoch": 2.5147123321746756,
      "grad_norm": 0.12297426909208298,
      "learning_rate": 5.003327556782616e-06,
      "loss": 0.0126,
      "step": 1536620
    },
    {
      "epoch": 2.514745062613329,
      "grad_norm": 0.15672671794891357,
      "learning_rate": 5.003261664569098e-06,
      "loss": 0.0157,
      "step": 1536640
    },
    {
      "epoch": 2.5147777930519823,
      "grad_norm": 0.39756253361701965,
      "learning_rate": 5.003195772355581e-06,
      "loss": 0.022,
      "step": 1536660
    },
    {
      "epoch": 2.514810523490636,
      "grad_norm": 0.24447914958000183,
      "learning_rate": 5.003129880142064e-06,
      "loss": 0.0174,
      "step": 1536680
    },
    {
      "epoch": 2.514843253929289,
      "grad_norm": 0.417643278837204,
      "learning_rate": 5.003063987928547e-06,
      "loss": 0.0097,
      "step": 1536700
    },
    {
      "epoch": 2.5148759843679427,
      "grad_norm": 0.8154560327529907,
      "learning_rate": 5.002998095715029e-06,
      "loss": 0.011,
      "step": 1536720
    },
    {
      "epoch": 2.514908714806596,
      "grad_norm": 0.18578438460826874,
      "learning_rate": 5.002932203501513e-06,
      "loss": 0.0084,
      "step": 1536740
    },
    {
      "epoch": 2.514941445245249,
      "grad_norm": 0.20493602752685547,
      "learning_rate": 5.002866311287996e-06,
      "loss": 0.0134,
      "step": 1536760
    },
    {
      "epoch": 2.5149741756839026,
      "grad_norm": 0.6282975673675537,
      "learning_rate": 5.0028004190744785e-06,
      "loss": 0.0141,
      "step": 1536780
    },
    {
      "epoch": 2.5150069061225557,
      "grad_norm": 0.28052830696105957,
      "learning_rate": 5.002734526860962e-06,
      "loss": 0.0131,
      "step": 1536800
    },
    {
      "epoch": 2.5150396365612093,
      "grad_norm": 0.7575987577438354,
      "learning_rate": 5.002668634647444e-06,
      "loss": 0.0149,
      "step": 1536820
    },
    {
      "epoch": 2.5150723669998625,
      "grad_norm": 1.1823697090148926,
      "learning_rate": 5.0026027424339275e-06,
      "loss": 0.0209,
      "step": 1536840
    },
    {
      "epoch": 2.515105097438516,
      "grad_norm": 0.475568562746048,
      "learning_rate": 5.002536850220409e-06,
      "loss": 0.011,
      "step": 1536860
    },
    {
      "epoch": 2.515137827877169,
      "grad_norm": 0.7245591282844543,
      "learning_rate": 5.002470958006893e-06,
      "loss": 0.0147,
      "step": 1536880
    },
    {
      "epoch": 2.5151705583158224,
      "grad_norm": 0.4930466115474701,
      "learning_rate": 5.002405065793376e-06,
      "loss": 0.0114,
      "step": 1536900
    },
    {
      "epoch": 2.515203288754476,
      "grad_norm": 0.27249589562416077,
      "learning_rate": 5.0023391735798585e-06,
      "loss": 0.0189,
      "step": 1536920
    },
    {
      "epoch": 2.515236019193129,
      "grad_norm": 0.25770702958106995,
      "learning_rate": 5.002273281366341e-06,
      "loss": 0.0123,
      "step": 1536940
    },
    {
      "epoch": 2.5152687496317827,
      "grad_norm": 0.09156011790037155,
      "learning_rate": 5.002207389152825e-06,
      "loss": 0.0186,
      "step": 1536960
    },
    {
      "epoch": 2.515301480070436,
      "grad_norm": 0.34245139360427856,
      "learning_rate": 5.002141496939307e-06,
      "loss": 0.0265,
      "step": 1536980
    },
    {
      "epoch": 2.5153342105090895,
      "grad_norm": 1.033598780632019,
      "learning_rate": 5.00207560472579e-06,
      "loss": 0.0195,
      "step": 1537000
    },
    {
      "epoch": 2.5153669409477426,
      "grad_norm": 0.44479092955589294,
      "learning_rate": 5.002009712512272e-06,
      "loss": 0.0182,
      "step": 1537020
    },
    {
      "epoch": 2.5153996713863958,
      "grad_norm": 0.5565369725227356,
      "learning_rate": 5.001943820298756e-06,
      "loss": 0.0138,
      "step": 1537040
    },
    {
      "epoch": 2.5154324018250493,
      "grad_norm": 0.3584947884082794,
      "learning_rate": 5.001877928085238e-06,
      "loss": 0.0125,
      "step": 1537060
    },
    {
      "epoch": 2.5154651322637025,
      "grad_norm": 1.8495349884033203,
      "learning_rate": 5.001812035871721e-06,
      "loss": 0.0153,
      "step": 1537080
    },
    {
      "epoch": 2.515497862702356,
      "grad_norm": 0.5333822965621948,
      "learning_rate": 5.001746143658204e-06,
      "loss": 0.0194,
      "step": 1537100
    },
    {
      "epoch": 2.5155305931410092,
      "grad_norm": 0.2480362206697464,
      "learning_rate": 5.001680251444688e-06,
      "loss": 0.0137,
      "step": 1537120
    },
    {
      "epoch": 2.515563323579663,
      "grad_norm": 0.28381797671318054,
      "learning_rate": 5.00161435923117e-06,
      "loss": 0.016,
      "step": 1537140
    },
    {
      "epoch": 2.515596054018316,
      "grad_norm": 0.2760202884674072,
      "learning_rate": 5.001548467017653e-06,
      "loss": 0.0168,
      "step": 1537160
    },
    {
      "epoch": 2.515628784456969,
      "grad_norm": 0.3379572629928589,
      "learning_rate": 5.001482574804137e-06,
      "loss": 0.018,
      "step": 1537180
    },
    {
      "epoch": 2.5156615148956227,
      "grad_norm": 0.1439332365989685,
      "learning_rate": 5.0014166825906185e-06,
      "loss": 0.0124,
      "step": 1537200
    },
    {
      "epoch": 2.515694245334276,
      "grad_norm": 0.1839980036020279,
      "learning_rate": 5.001350790377102e-06,
      "loss": 0.0104,
      "step": 1537220
    },
    {
      "epoch": 2.5157269757729295,
      "grad_norm": 0.22904354333877563,
      "learning_rate": 5.001284898163584e-06,
      "loss": 0.0126,
      "step": 1537240
    },
    {
      "epoch": 2.5157597062115826,
      "grad_norm": 0.5521594882011414,
      "learning_rate": 5.001219005950068e-06,
      "loss": 0.0163,
      "step": 1537260
    },
    {
      "epoch": 2.515792436650236,
      "grad_norm": 1.1275568008422852,
      "learning_rate": 5.0011531137365495e-06,
      "loss": 0.0163,
      "step": 1537280
    },
    {
      "epoch": 2.5158251670888894,
      "grad_norm": 1.5702457427978516,
      "learning_rate": 5.001087221523033e-06,
      "loss": 0.0212,
      "step": 1537300
    },
    {
      "epoch": 2.5158578975275425,
      "grad_norm": 0.1476823389530182,
      "learning_rate": 5.001021329309516e-06,
      "loss": 0.0166,
      "step": 1537320
    },
    {
      "epoch": 2.515890627966196,
      "grad_norm": 0.7050359845161438,
      "learning_rate": 5.0009554370959986e-06,
      "loss": 0.0173,
      "step": 1537340
    },
    {
      "epoch": 2.5159233584048493,
      "grad_norm": 0.21737103164196014,
      "learning_rate": 5.000889544882481e-06,
      "loss": 0.012,
      "step": 1537360
    },
    {
      "epoch": 2.5159560888435024,
      "grad_norm": 0.3685593008995056,
      "learning_rate": 5.000823652668965e-06,
      "loss": 0.011,
      "step": 1537380
    },
    {
      "epoch": 2.515988819282156,
      "grad_norm": 0.2611522376537323,
      "learning_rate": 5.000757760455447e-06,
      "loss": 0.0151,
      "step": 1537400
    },
    {
      "epoch": 2.5160215497208096,
      "grad_norm": 1.28135347366333,
      "learning_rate": 5.00069186824193e-06,
      "loss": 0.0246,
      "step": 1537420
    },
    {
      "epoch": 2.5160542801594628,
      "grad_norm": 0.8511618375778198,
      "learning_rate": 5.000625976028412e-06,
      "loss": 0.0112,
      "step": 1537440
    },
    {
      "epoch": 2.516087010598116,
      "grad_norm": 0.34705278277397156,
      "learning_rate": 5.000560083814896e-06,
      "loss": 0.0117,
      "step": 1537460
    },
    {
      "epoch": 2.5161197410367695,
      "grad_norm": 0.6941786408424377,
      "learning_rate": 5.0004941916013794e-06,
      "loss": 0.0114,
      "step": 1537480
    },
    {
      "epoch": 2.5161524714754226,
      "grad_norm": 1.5970327854156494,
      "learning_rate": 5.000428299387861e-06,
      "loss": 0.0116,
      "step": 1537500
    },
    {
      "epoch": 2.516185201914076,
      "grad_norm": 0.5801124572753906,
      "learning_rate": 5.000362407174345e-06,
      "loss": 0.0185,
      "step": 1537520
    },
    {
      "epoch": 2.5162179323527294,
      "grad_norm": 0.11461230367422104,
      "learning_rate": 5.000296514960828e-06,
      "loss": 0.0144,
      "step": 1537540
    },
    {
      "epoch": 2.516250662791383,
      "grad_norm": 0.9429715275764465,
      "learning_rate": 5.00023062274731e-06,
      "loss": 0.0159,
      "step": 1537560
    },
    {
      "epoch": 2.516283393230036,
      "grad_norm": 0.5359214544296265,
      "learning_rate": 5.000164730533793e-06,
      "loss": 0.013,
      "step": 1537580
    },
    {
      "epoch": 2.5163161236686893,
      "grad_norm": 0.19473494589328766,
      "learning_rate": 5.000098838320277e-06,
      "loss": 0.0143,
      "step": 1537600
    },
    {
      "epoch": 2.516348854107343,
      "grad_norm": 0.2560187876224518,
      "learning_rate": 5.000032946106759e-06,
      "loss": 0.014,
      "step": 1537620
    },
    {
      "epoch": 2.516381584545996,
      "grad_norm": 0.14552153646945953,
      "learning_rate": 4.999967053893242e-06,
      "loss": 0.0219,
      "step": 1537640
    },
    {
      "epoch": 2.516414314984649,
      "grad_norm": 0.6925739049911499,
      "learning_rate": 4.999901161679725e-06,
      "loss": 0.0129,
      "step": 1537660
    },
    {
      "epoch": 2.5164470454233028,
      "grad_norm": 0.5738211870193481,
      "learning_rate": 4.999835269466208e-06,
      "loss": 0.0124,
      "step": 1537680
    },
    {
      "epoch": 2.5164797758619564,
      "grad_norm": 1.0418603420257568,
      "learning_rate": 4.99976937725269e-06,
      "loss": 0.0226,
      "step": 1537700
    },
    {
      "epoch": 2.5165125063006095,
      "grad_norm": 0.511155366897583,
      "learning_rate": 4.999703485039173e-06,
      "loss": 0.0181,
      "step": 1537720
    },
    {
      "epoch": 2.5165452367392627,
      "grad_norm": 0.33983683586120605,
      "learning_rate": 4.999637592825656e-06,
      "loss": 0.0097,
      "step": 1537740
    },
    {
      "epoch": 2.5165779671779163,
      "grad_norm": 0.12606216967105865,
      "learning_rate": 4.9995717006121395e-06,
      "loss": 0.0138,
      "step": 1537760
    },
    {
      "epoch": 2.5166106976165694,
      "grad_norm": 0.48717305064201355,
      "learning_rate": 4.999505808398622e-06,
      "loss": 0.0121,
      "step": 1537780
    },
    {
      "epoch": 2.5166434280552226,
      "grad_norm": 0.21047918498516083,
      "learning_rate": 4.999439916185105e-06,
      "loss": 0.0119,
      "step": 1537800
    },
    {
      "epoch": 2.516676158493876,
      "grad_norm": 0.6672909259796143,
      "learning_rate": 4.999374023971588e-06,
      "loss": 0.0129,
      "step": 1537820
    },
    {
      "epoch": 2.5167088889325293,
      "grad_norm": 0.7070189118385315,
      "learning_rate": 4.9993081317580704e-06,
      "loss": 0.0178,
      "step": 1537840
    },
    {
      "epoch": 2.516741619371183,
      "grad_norm": 0.35050034523010254,
      "learning_rate": 4.999242239544553e-06,
      "loss": 0.0204,
      "step": 1537860
    },
    {
      "epoch": 2.516774349809836,
      "grad_norm": 0.053111061453819275,
      "learning_rate": 4.999176347331036e-06,
      "loss": 0.0113,
      "step": 1537880
    },
    {
      "epoch": 2.5168070802484896,
      "grad_norm": 1.1817398071289062,
      "learning_rate": 4.999110455117519e-06,
      "loss": 0.0125,
      "step": 1537900
    },
    {
      "epoch": 2.516839810687143,
      "grad_norm": 0.6408733129501343,
      "learning_rate": 4.999044562904002e-06,
      "loss": 0.016,
      "step": 1537920
    },
    {
      "epoch": 2.516872541125796,
      "grad_norm": 0.6097156405448914,
      "learning_rate": 4.998978670690485e-06,
      "loss": 0.0173,
      "step": 1537940
    },
    {
      "epoch": 2.5169052715644495,
      "grad_norm": 0.6973088979721069,
      "learning_rate": 4.998912778476968e-06,
      "loss": 0.0216,
      "step": 1537960
    },
    {
      "epoch": 2.5169380020031027,
      "grad_norm": 0.6449931859970093,
      "learning_rate": 4.9988468862634505e-06,
      "loss": 0.0134,
      "step": 1537980
    },
    {
      "epoch": 2.5169707324417563,
      "grad_norm": 0.3259566128253937,
      "learning_rate": 4.998780994049934e-06,
      "loss": 0.0117,
      "step": 1538000
    },
    {
      "epoch": 2.5170034628804094,
      "grad_norm": 0.5468605756759644,
      "learning_rate": 4.998715101836417e-06,
      "loss": 0.0136,
      "step": 1538020
    },
    {
      "epoch": 2.517036193319063,
      "grad_norm": 0.20925091207027435,
      "learning_rate": 4.9986492096228995e-06,
      "loss": 0.0138,
      "step": 1538040
    },
    {
      "epoch": 2.517068923757716,
      "grad_norm": 0.17643575370311737,
      "learning_rate": 4.998583317409382e-06,
      "loss": 0.0097,
      "step": 1538060
    },
    {
      "epoch": 2.5171016541963693,
      "grad_norm": 1.9073607921600342,
      "learning_rate": 4.998517425195865e-06,
      "loss": 0.0209,
      "step": 1538080
    },
    {
      "epoch": 2.517134384635023,
      "grad_norm": 0.07732494920492172,
      "learning_rate": 4.998451532982348e-06,
      "loss": 0.0102,
      "step": 1538100
    },
    {
      "epoch": 2.517167115073676,
      "grad_norm": 0.2120255082845688,
      "learning_rate": 4.9983856407688305e-06,
      "loss": 0.0105,
      "step": 1538120
    },
    {
      "epoch": 2.5171998455123297,
      "grad_norm": 0.7211735844612122,
      "learning_rate": 4.998319748555313e-06,
      "loss": 0.0189,
      "step": 1538140
    },
    {
      "epoch": 2.517232575950983,
      "grad_norm": 0.292610764503479,
      "learning_rate": 4.998253856341797e-06,
      "loss": 0.0138,
      "step": 1538160
    },
    {
      "epoch": 2.5172653063896364,
      "grad_norm": 0.061061277985572815,
      "learning_rate": 4.9981879641282796e-06,
      "loss": 0.0109,
      "step": 1538180
    },
    {
      "epoch": 2.5172980368282896,
      "grad_norm": 0.33152884244918823,
      "learning_rate": 4.998122071914762e-06,
      "loss": 0.0111,
      "step": 1538200
    },
    {
      "epoch": 2.5173307672669427,
      "grad_norm": 0.10786176472902298,
      "learning_rate": 4.998056179701245e-06,
      "loss": 0.0166,
      "step": 1538220
    },
    {
      "epoch": 2.5173634977055963,
      "grad_norm": 0.25573524832725525,
      "learning_rate": 4.997990287487728e-06,
      "loss": 0.0112,
      "step": 1538240
    },
    {
      "epoch": 2.5173962281442495,
      "grad_norm": 0.313657283782959,
      "learning_rate": 4.9979243952742105e-06,
      "loss": 0.0195,
      "step": 1538260
    },
    {
      "epoch": 2.517428958582903,
      "grad_norm": 0.10138397663831711,
      "learning_rate": 4.997858503060693e-06,
      "loss": 0.0183,
      "step": 1538280
    },
    {
      "epoch": 2.517461689021556,
      "grad_norm": 0.40989232063293457,
      "learning_rate": 4.997792610847176e-06,
      "loss": 0.015,
      "step": 1538300
    },
    {
      "epoch": 2.51749441946021,
      "grad_norm": 0.2815214991569519,
      "learning_rate": 4.99772671863366e-06,
      "loss": 0.0227,
      "step": 1538320
    },
    {
      "epoch": 2.517527149898863,
      "grad_norm": 0.5010213255882263,
      "learning_rate": 4.997660826420142e-06,
      "loss": 0.0189,
      "step": 1538340
    },
    {
      "epoch": 2.517559880337516,
      "grad_norm": 0.2687871754169464,
      "learning_rate": 4.997594934206625e-06,
      "loss": 0.012,
      "step": 1538360
    },
    {
      "epoch": 2.5175926107761697,
      "grad_norm": 0.23912887275218964,
      "learning_rate": 4.997529041993109e-06,
      "loss": 0.0122,
      "step": 1538380
    },
    {
      "epoch": 2.517625341214823,
      "grad_norm": 0.1242712065577507,
      "learning_rate": 4.997463149779591e-06,
      "loss": 0.0096,
      "step": 1538400
    },
    {
      "epoch": 2.5176580716534764,
      "grad_norm": 0.25265905261039734,
      "learning_rate": 4.997397257566074e-06,
      "loss": 0.0138,
      "step": 1538420
    },
    {
      "epoch": 2.5176908020921296,
      "grad_norm": 0.22999612987041473,
      "learning_rate": 4.997331365352557e-06,
      "loss": 0.0148,
      "step": 1538440
    },
    {
      "epoch": 2.517723532530783,
      "grad_norm": 0.4049510061740875,
      "learning_rate": 4.99726547313904e-06,
      "loss": 0.0118,
      "step": 1538460
    },
    {
      "epoch": 2.5177562629694363,
      "grad_norm": 0.1623271256685257,
      "learning_rate": 4.997199580925522e-06,
      "loss": 0.0128,
      "step": 1538480
    },
    {
      "epoch": 2.5177889934080895,
      "grad_norm": 1.5987590551376343,
      "learning_rate": 4.997133688712005e-06,
      "loss": 0.0183,
      "step": 1538500
    },
    {
      "epoch": 2.517821723846743,
      "grad_norm": 0.32792508602142334,
      "learning_rate": 4.997067796498488e-06,
      "loss": 0.0121,
      "step": 1538520
    },
    {
      "epoch": 2.5178544542853962,
      "grad_norm": 2.157198667526245,
      "learning_rate": 4.997001904284971e-06,
      "loss": 0.0118,
      "step": 1538540
    },
    {
      "epoch": 2.51788718472405,
      "grad_norm": 0.32976964116096497,
      "learning_rate": 4.996936012071454e-06,
      "loss": 0.0182,
      "step": 1538560
    },
    {
      "epoch": 2.517919915162703,
      "grad_norm": 0.3306587040424347,
      "learning_rate": 4.996870119857937e-06,
      "loss": 0.0115,
      "step": 1538580
    },
    {
      "epoch": 2.5179526456013566,
      "grad_norm": 0.9214659929275513,
      "learning_rate": 4.99680422764442e-06,
      "loss": 0.0213,
      "step": 1538600
    },
    {
      "epoch": 2.5179853760400097,
      "grad_norm": 0.4717678129673004,
      "learning_rate": 4.996738335430902e-06,
      "loss": 0.0132,
      "step": 1538620
    },
    {
      "epoch": 2.518018106478663,
      "grad_norm": 0.0708458349108696,
      "learning_rate": 4.996672443217385e-06,
      "loss": 0.0142,
      "step": 1538640
    },
    {
      "epoch": 2.5180508369173165,
      "grad_norm": 0.5700904130935669,
      "learning_rate": 4.996606551003868e-06,
      "loss": 0.0184,
      "step": 1538660
    },
    {
      "epoch": 2.5180835673559696,
      "grad_norm": 0.13025766611099243,
      "learning_rate": 4.996540658790351e-06,
      "loss": 0.0132,
      "step": 1538680
    },
    {
      "epoch": 2.518116297794623,
      "grad_norm": 0.17506949603557587,
      "learning_rate": 4.996474766576834e-06,
      "loss": 0.0106,
      "step": 1538700
    },
    {
      "epoch": 2.5181490282332764,
      "grad_norm": 0.2961795926094055,
      "learning_rate": 4.996408874363317e-06,
      "loss": 0.0182,
      "step": 1538720
    },
    {
      "epoch": 2.51818175867193,
      "grad_norm": 0.2557677924633026,
      "learning_rate": 4.9963429821498e-06,
      "loss": 0.0117,
      "step": 1538740
    },
    {
      "epoch": 2.518214489110583,
      "grad_norm": 0.10566424578428268,
      "learning_rate": 4.996277089936282e-06,
      "loss": 0.0194,
      "step": 1538760
    },
    {
      "epoch": 2.5182472195492362,
      "grad_norm": 0.21002058684825897,
      "learning_rate": 4.996211197722766e-06,
      "loss": 0.0154,
      "step": 1538780
    },
    {
      "epoch": 2.51827994998789,
      "grad_norm": 0.5444995760917664,
      "learning_rate": 4.996145305509249e-06,
      "loss": 0.0193,
      "step": 1538800
    },
    {
      "epoch": 2.518312680426543,
      "grad_norm": 0.24741233885288239,
      "learning_rate": 4.9960794132957315e-06,
      "loss": 0.0214,
      "step": 1538820
    },
    {
      "epoch": 2.518345410865196,
      "grad_norm": 0.13569684326648712,
      "learning_rate": 4.996013521082214e-06,
      "loss": 0.0139,
      "step": 1538840
    },
    {
      "epoch": 2.5183781413038497,
      "grad_norm": 0.25269266963005066,
      "learning_rate": 4.995947628868697e-06,
      "loss": 0.0137,
      "step": 1538860
    },
    {
      "epoch": 2.5184108717425033,
      "grad_norm": 0.16062717139720917,
      "learning_rate": 4.99588173665518e-06,
      "loss": 0.0158,
      "step": 1538880
    },
    {
      "epoch": 2.5184436021811565,
      "grad_norm": 1.131676197052002,
      "learning_rate": 4.9958158444416624e-06,
      "loss": 0.0114,
      "step": 1538900
    },
    {
      "epoch": 2.5184763326198096,
      "grad_norm": 0.6319554448127747,
      "learning_rate": 4.995749952228145e-06,
      "loss": 0.0107,
      "step": 1538920
    },
    {
      "epoch": 2.5185090630584632,
      "grad_norm": 0.08622073382139206,
      "learning_rate": 4.995684060014629e-06,
      "loss": 0.0145,
      "step": 1538940
    },
    {
      "epoch": 2.5185417934971164,
      "grad_norm": 0.45293423533439636,
      "learning_rate": 4.9956181678011115e-06,
      "loss": 0.0113,
      "step": 1538960
    },
    {
      "epoch": 2.5185745239357695,
      "grad_norm": 0.2755718231201172,
      "learning_rate": 4.995552275587594e-06,
      "loss": 0.0258,
      "step": 1538980
    },
    {
      "epoch": 2.518607254374423,
      "grad_norm": 0.32032936811447144,
      "learning_rate": 4.995486383374077e-06,
      "loss": 0.0085,
      "step": 1539000
    },
    {
      "epoch": 2.5186399848130767,
      "grad_norm": 0.6127433776855469,
      "learning_rate": 4.99542049116056e-06,
      "loss": 0.0144,
      "step": 1539020
    },
    {
      "epoch": 2.51867271525173,
      "grad_norm": 0.2241189330816269,
      "learning_rate": 4.995354598947043e-06,
      "loss": 0.0144,
      "step": 1539040
    },
    {
      "epoch": 2.518705445690383,
      "grad_norm": 0.168711856007576,
      "learning_rate": 4.995288706733526e-06,
      "loss": 0.0137,
      "step": 1539060
    },
    {
      "epoch": 2.5187381761290366,
      "grad_norm": 0.17649327218532562,
      "learning_rate": 4.995222814520009e-06,
      "loss": 0.0222,
      "step": 1539080
    },
    {
      "epoch": 2.5187709065676898,
      "grad_norm": 0.3439604341983795,
      "learning_rate": 4.9951569223064915e-06,
      "loss": 0.0071,
      "step": 1539100
    },
    {
      "epoch": 2.518803637006343,
      "grad_norm": 0.37566664814949036,
      "learning_rate": 4.995091030092974e-06,
      "loss": 0.0149,
      "step": 1539120
    },
    {
      "epoch": 2.5188363674449965,
      "grad_norm": 0.06772023439407349,
      "learning_rate": 4.995025137879457e-06,
      "loss": 0.0128,
      "step": 1539140
    },
    {
      "epoch": 2.51886909788365,
      "grad_norm": 0.12726393342018127,
      "learning_rate": 4.99495924566594e-06,
      "loss": 0.0139,
      "step": 1539160
    },
    {
      "epoch": 2.5189018283223032,
      "grad_norm": 0.29043370485305786,
      "learning_rate": 4.994893353452423e-06,
      "loss": 0.0141,
      "step": 1539180
    },
    {
      "epoch": 2.5189345587609564,
      "grad_norm": 0.9521710872650146,
      "learning_rate": 4.994827461238906e-06,
      "loss": 0.0201,
      "step": 1539200
    },
    {
      "epoch": 2.51896728919961,
      "grad_norm": 0.7371534705162048,
      "learning_rate": 4.994761569025389e-06,
      "loss": 0.0136,
      "step": 1539220
    },
    {
      "epoch": 2.519000019638263,
      "grad_norm": 1.9130898714065552,
      "learning_rate": 4.9946956768118715e-06,
      "loss": 0.0197,
      "step": 1539240
    },
    {
      "epoch": 2.5190327500769163,
      "grad_norm": 1.2508450746536255,
      "learning_rate": 4.994629784598354e-06,
      "loss": 0.0156,
      "step": 1539260
    },
    {
      "epoch": 2.51906548051557,
      "grad_norm": 0.09777970612049103,
      "learning_rate": 4.994563892384837e-06,
      "loss": 0.0161,
      "step": 1539280
    },
    {
      "epoch": 2.5190982109542235,
      "grad_norm": 2.222661256790161,
      "learning_rate": 4.99449800017132e-06,
      "loss": 0.0151,
      "step": 1539300
    },
    {
      "epoch": 2.5191309413928766,
      "grad_norm": 0.5083720684051514,
      "learning_rate": 4.9944321079578025e-06,
      "loss": 0.0155,
      "step": 1539320
    },
    {
      "epoch": 2.51916367183153,
      "grad_norm": 0.40954697132110596,
      "learning_rate": 4.994366215744286e-06,
      "loss": 0.0137,
      "step": 1539340
    },
    {
      "epoch": 2.5191964022701834,
      "grad_norm": 0.28030726313591003,
      "learning_rate": 4.994300323530769e-06,
      "loss": 0.0159,
      "step": 1539360
    },
    {
      "epoch": 2.5192291327088365,
      "grad_norm": 0.7079460024833679,
      "learning_rate": 4.9942344313172516e-06,
      "loss": 0.0147,
      "step": 1539380
    },
    {
      "epoch": 2.5192618631474897,
      "grad_norm": 0.528443455696106,
      "learning_rate": 4.994168539103735e-06,
      "loss": 0.0125,
      "step": 1539400
    },
    {
      "epoch": 2.5192945935861433,
      "grad_norm": 0.5553445816040039,
      "learning_rate": 4.994102646890218e-06,
      "loss": 0.0144,
      "step": 1539420
    },
    {
      "epoch": 2.5193273240247964,
      "grad_norm": 0.3848111927509308,
      "learning_rate": 4.994036754676701e-06,
      "loss": 0.0154,
      "step": 1539440
    },
    {
      "epoch": 2.51936005446345,
      "grad_norm": 0.24090458452701569,
      "learning_rate": 4.993970862463183e-06,
      "loss": 0.0132,
      "step": 1539460
    },
    {
      "epoch": 2.519392784902103,
      "grad_norm": 0.47336673736572266,
      "learning_rate": 4.993904970249666e-06,
      "loss": 0.0166,
      "step": 1539480
    },
    {
      "epoch": 2.5194255153407568,
      "grad_norm": 0.36505255103111267,
      "learning_rate": 4.993839078036149e-06,
      "loss": 0.0151,
      "step": 1539500
    },
    {
      "epoch": 2.51945824577941,
      "grad_norm": 0.6947579383850098,
      "learning_rate": 4.993773185822632e-06,
      "loss": 0.0115,
      "step": 1539520
    },
    {
      "epoch": 2.519490976218063,
      "grad_norm": 0.23151865601539612,
      "learning_rate": 4.993707293609114e-06,
      "loss": 0.0153,
      "step": 1539540
    },
    {
      "epoch": 2.5195237066567167,
      "grad_norm": 0.24373824894428253,
      "learning_rate": 4.993641401395597e-06,
      "loss": 0.0108,
      "step": 1539560
    },
    {
      "epoch": 2.51955643709537,
      "grad_norm": 0.8051115870475769,
      "learning_rate": 4.993575509182081e-06,
      "loss": 0.0142,
      "step": 1539580
    },
    {
      "epoch": 2.5195891675340234,
      "grad_norm": 0.2956741750240326,
      "learning_rate": 4.993509616968563e-06,
      "loss": 0.0097,
      "step": 1539600
    },
    {
      "epoch": 2.5196218979726765,
      "grad_norm": 0.44915303587913513,
      "learning_rate": 4.993443724755046e-06,
      "loss": 0.0208,
      "step": 1539620
    },
    {
      "epoch": 2.51965462841133,
      "grad_norm": 0.1256493330001831,
      "learning_rate": 4.993377832541529e-06,
      "loss": 0.0128,
      "step": 1539640
    },
    {
      "epoch": 2.5196873588499833,
      "grad_norm": 0.3412862718105316,
      "learning_rate": 4.993311940328012e-06,
      "loss": 0.016,
      "step": 1539660
    },
    {
      "epoch": 2.5197200892886364,
      "grad_norm": 0.3760503828525543,
      "learning_rate": 4.993246048114494e-06,
      "loss": 0.0151,
      "step": 1539680
    },
    {
      "epoch": 2.51975281972729,
      "grad_norm": 0.298556923866272,
      "learning_rate": 4.993180155900977e-06,
      "loss": 0.0149,
      "step": 1539700
    },
    {
      "epoch": 2.519785550165943,
      "grad_norm": 0.14773142337799072,
      "learning_rate": 4.99311426368746e-06,
      "loss": 0.0167,
      "step": 1539720
    },
    {
      "epoch": 2.519818280604597,
      "grad_norm": 0.2653162479400635,
      "learning_rate": 4.9930483714739434e-06,
      "loss": 0.0138,
      "step": 1539740
    },
    {
      "epoch": 2.51985101104325,
      "grad_norm": 0.34771743416786194,
      "learning_rate": 4.992982479260426e-06,
      "loss": 0.0146,
      "step": 1539760
    },
    {
      "epoch": 2.5198837414819035,
      "grad_norm": 0.5177499055862427,
      "learning_rate": 4.992916587046909e-06,
      "loss": 0.0143,
      "step": 1539780
    },
    {
      "epoch": 2.5199164719205567,
      "grad_norm": 0.24523669481277466,
      "learning_rate": 4.9928506948333925e-06,
      "loss": 0.0115,
      "step": 1539800
    },
    {
      "epoch": 2.51994920235921,
      "grad_norm": 0.3489745557308197,
      "learning_rate": 4.992784802619875e-06,
      "loss": 0.0114,
      "step": 1539820
    },
    {
      "epoch": 2.5199819327978634,
      "grad_norm": 0.5395439863204956,
      "learning_rate": 4.992718910406358e-06,
      "loss": 0.0183,
      "step": 1539840
    },
    {
      "epoch": 2.5200146632365166,
      "grad_norm": 0.21700704097747803,
      "learning_rate": 4.992653018192841e-06,
      "loss": 0.0212,
      "step": 1539860
    },
    {
      "epoch": 2.52004739367517,
      "grad_norm": 0.4049808979034424,
      "learning_rate": 4.9925871259793235e-06,
      "loss": 0.0159,
      "step": 1539880
    },
    {
      "epoch": 2.5200801241138233,
      "grad_norm": 0.6815827488899231,
      "learning_rate": 4.992521233765806e-06,
      "loss": 0.019,
      "step": 1539900
    },
    {
      "epoch": 2.520112854552477,
      "grad_norm": 0.26939377188682556,
      "learning_rate": 4.992455341552289e-06,
      "loss": 0.0134,
      "step": 1539920
    },
    {
      "epoch": 2.52014558499113,
      "grad_norm": 0.28420621156692505,
      "learning_rate": 4.992389449338772e-06,
      "loss": 0.016,
      "step": 1539940
    },
    {
      "epoch": 2.520178315429783,
      "grad_norm": 0.33819055557250977,
      "learning_rate": 4.992323557125255e-06,
      "loss": 0.018,
      "step": 1539960
    },
    {
      "epoch": 2.520211045868437,
      "grad_norm": 0.12648823857307434,
      "learning_rate": 4.992257664911738e-06,
      "loss": 0.0153,
      "step": 1539980
    },
    {
      "epoch": 2.52024377630709,
      "grad_norm": 0.18630725145339966,
      "learning_rate": 4.992191772698221e-06,
      "loss": 0.0121,
      "step": 1540000
    },
    {
      "epoch": 2.5202765067457435,
      "grad_norm": 0.4724891185760498,
      "learning_rate": 4.9921258804847035e-06,
      "loss": 0.0075,
      "step": 1540020
    },
    {
      "epoch": 2.5203092371843967,
      "grad_norm": 0.3799767792224884,
      "learning_rate": 4.992059988271186e-06,
      "loss": 0.0146,
      "step": 1540040
    },
    {
      "epoch": 2.5203419676230503,
      "grad_norm": 0.38964179158210754,
      "learning_rate": 4.991994096057669e-06,
      "loss": 0.013,
      "step": 1540060
    },
    {
      "epoch": 2.5203746980617034,
      "grad_norm": 0.2621513605117798,
      "learning_rate": 4.991928203844152e-06,
      "loss": 0.0146,
      "step": 1540080
    },
    {
      "epoch": 2.5204074285003566,
      "grad_norm": 0.31716230511665344,
      "learning_rate": 4.991862311630635e-06,
      "loss": 0.0146,
      "step": 1540100
    },
    {
      "epoch": 2.52044015893901,
      "grad_norm": 0.2820412814617157,
      "learning_rate": 4.991796419417118e-06,
      "loss": 0.0133,
      "step": 1540120
    },
    {
      "epoch": 2.5204728893776633,
      "grad_norm": 0.4503057599067688,
      "learning_rate": 4.991730527203601e-06,
      "loss": 0.014,
      "step": 1540140
    },
    {
      "epoch": 2.520505619816317,
      "grad_norm": 5.047297954559326,
      "learning_rate": 4.9916646349900835e-06,
      "loss": 0.0169,
      "step": 1540160
    },
    {
      "epoch": 2.52053835025497,
      "grad_norm": 0.32404160499572754,
      "learning_rate": 4.991598742776566e-06,
      "loss": 0.0208,
      "step": 1540180
    },
    {
      "epoch": 2.5205710806936237,
      "grad_norm": 0.30857259035110474,
      "learning_rate": 4.99153285056305e-06,
      "loss": 0.0157,
      "step": 1540200
    },
    {
      "epoch": 2.520603811132277,
      "grad_norm": 0.12417805939912796,
      "learning_rate": 4.9914669583495326e-06,
      "loss": 0.014,
      "step": 1540220
    },
    {
      "epoch": 2.52063654157093,
      "grad_norm": 0.42156359553337097,
      "learning_rate": 4.991401066136015e-06,
      "loss": 0.0105,
      "step": 1540240
    },
    {
      "epoch": 2.5206692720095836,
      "grad_norm": 0.3942874073982239,
      "learning_rate": 4.991335173922498e-06,
      "loss": 0.0202,
      "step": 1540260
    },
    {
      "epoch": 2.5207020024482367,
      "grad_norm": 0.34414219856262207,
      "learning_rate": 4.991269281708981e-06,
      "loss": 0.0113,
      "step": 1540280
    },
    {
      "epoch": 2.52073473288689,
      "grad_norm": 0.16923017799854279,
      "learning_rate": 4.9912033894954635e-06,
      "loss": 0.0146,
      "step": 1540300
    },
    {
      "epoch": 2.5207674633255435,
      "grad_norm": 0.289201557636261,
      "learning_rate": 4.991137497281946e-06,
      "loss": 0.0132,
      "step": 1540320
    },
    {
      "epoch": 2.520800193764197,
      "grad_norm": 0.17978379130363464,
      "learning_rate": 4.991071605068429e-06,
      "loss": 0.0129,
      "step": 1540340
    },
    {
      "epoch": 2.52083292420285,
      "grad_norm": 0.5167407393455505,
      "learning_rate": 4.991005712854913e-06,
      "loss": 0.0158,
      "step": 1540360
    },
    {
      "epoch": 2.5208656546415034,
      "grad_norm": 0.10025715827941895,
      "learning_rate": 4.990939820641395e-06,
      "loss": 0.0152,
      "step": 1540380
    },
    {
      "epoch": 2.520898385080157,
      "grad_norm": 0.3972841203212738,
      "learning_rate": 4.990873928427878e-06,
      "loss": 0.0147,
      "step": 1540400
    },
    {
      "epoch": 2.52093111551881,
      "grad_norm": 0.6862546801567078,
      "learning_rate": 4.990808036214361e-06,
      "loss": 0.0133,
      "step": 1540420
    },
    {
      "epoch": 2.5209638459574633,
      "grad_norm": 0.10578977316617966,
      "learning_rate": 4.9907421440008436e-06,
      "loss": 0.0119,
      "step": 1540440
    },
    {
      "epoch": 2.520996576396117,
      "grad_norm": 2.423496723175049,
      "learning_rate": 4.990676251787327e-06,
      "loss": 0.0193,
      "step": 1540460
    },
    {
      "epoch": 2.5210293068347704,
      "grad_norm": 0.09385456144809723,
      "learning_rate": 4.99061035957381e-06,
      "loss": 0.014,
      "step": 1540480
    },
    {
      "epoch": 2.5210620372734236,
      "grad_norm": 0.30192098021507263,
      "learning_rate": 4.990544467360293e-06,
      "loss": 0.0131,
      "step": 1540500
    },
    {
      "epoch": 2.5210947677120767,
      "grad_norm": 0.12062214314937592,
      "learning_rate": 4.990478575146775e-06,
      "loss": 0.0115,
      "step": 1540520
    },
    {
      "epoch": 2.5211274981507303,
      "grad_norm": 1.0019630193710327,
      "learning_rate": 4.990412682933258e-06,
      "loss": 0.017,
      "step": 1540540
    },
    {
      "epoch": 2.5211602285893835,
      "grad_norm": 0.025805041193962097,
      "learning_rate": 4.990346790719741e-06,
      "loss": 0.0122,
      "step": 1540560
    },
    {
      "epoch": 2.5211929590280366,
      "grad_norm": 1.1167550086975098,
      "learning_rate": 4.990280898506224e-06,
      "loss": 0.0141,
      "step": 1540580
    },
    {
      "epoch": 2.5212256894666902,
      "grad_norm": 0.8140737414360046,
      "learning_rate": 4.990215006292707e-06,
      "loss": 0.0228,
      "step": 1540600
    },
    {
      "epoch": 2.521258419905344,
      "grad_norm": 0.6691707372665405,
      "learning_rate": 4.99014911407919e-06,
      "loss": 0.0188,
      "step": 1540620
    },
    {
      "epoch": 2.521291150343997,
      "grad_norm": 0.2577647268772125,
      "learning_rate": 4.990083221865673e-06,
      "loss": 0.0095,
      "step": 1540640
    },
    {
      "epoch": 2.52132388078265,
      "grad_norm": 0.2819468677043915,
      "learning_rate": 4.990017329652155e-06,
      "loss": 0.0117,
      "step": 1540660
    },
    {
      "epoch": 2.5213566112213037,
      "grad_norm": 0.7951188683509827,
      "learning_rate": 4.989951437438638e-06,
      "loss": 0.0141,
      "step": 1540680
    },
    {
      "epoch": 2.521389341659957,
      "grad_norm": 0.6046100854873657,
      "learning_rate": 4.989885545225121e-06,
      "loss": 0.0143,
      "step": 1540700
    },
    {
      "epoch": 2.52142207209861,
      "grad_norm": 0.6323829889297485,
      "learning_rate": 4.989819653011604e-06,
      "loss": 0.0179,
      "step": 1540720
    },
    {
      "epoch": 2.5214548025372636,
      "grad_norm": 0.34803012013435364,
      "learning_rate": 4.989753760798086e-06,
      "loss": 0.0138,
      "step": 1540740
    },
    {
      "epoch": 2.521487532975917,
      "grad_norm": 0.165951207280159,
      "learning_rate": 4.98968786858457e-06,
      "loss": 0.0104,
      "step": 1540760
    },
    {
      "epoch": 2.5215202634145704,
      "grad_norm": 0.5074023604393005,
      "learning_rate": 4.989621976371053e-06,
      "loss": 0.0142,
      "step": 1540780
    },
    {
      "epoch": 2.5215529938532235,
      "grad_norm": 0.7786521315574646,
      "learning_rate": 4.989556084157535e-06,
      "loss": 0.0133,
      "step": 1540800
    },
    {
      "epoch": 2.521585724291877,
      "grad_norm": 0.5220171213150024,
      "learning_rate": 4.989490191944019e-06,
      "loss": 0.0126,
      "step": 1540820
    },
    {
      "epoch": 2.5216184547305303,
      "grad_norm": 0.27671435475349426,
      "learning_rate": 4.989424299730502e-06,
      "loss": 0.0158,
      "step": 1540840
    },
    {
      "epoch": 2.5216511851691834,
      "grad_norm": 0.40860599279403687,
      "learning_rate": 4.9893584075169845e-06,
      "loss": 0.0158,
      "step": 1540860
    },
    {
      "epoch": 2.521683915607837,
      "grad_norm": 0.2879069745540619,
      "learning_rate": 4.989292515303467e-06,
      "loss": 0.0185,
      "step": 1540880
    },
    {
      "epoch": 2.52171664604649,
      "grad_norm": 0.16211459040641785,
      "learning_rate": 4.98922662308995e-06,
      "loss": 0.0077,
      "step": 1540900
    },
    {
      "epoch": 2.5217493764851437,
      "grad_norm": 0.23930269479751587,
      "learning_rate": 4.989160730876433e-06,
      "loss": 0.0187,
      "step": 1540920
    },
    {
      "epoch": 2.521782106923797,
      "grad_norm": 0.28015297651290894,
      "learning_rate": 4.9890948386629154e-06,
      "loss": 0.0127,
      "step": 1540940
    },
    {
      "epoch": 2.5218148373624505,
      "grad_norm": 0.19971731305122375,
      "learning_rate": 4.989028946449398e-06,
      "loss": 0.021,
      "step": 1540960
    },
    {
      "epoch": 2.5218475678011036,
      "grad_norm": 0.349558025598526,
      "learning_rate": 4.988963054235882e-06,
      "loss": 0.0112,
      "step": 1540980
    },
    {
      "epoch": 2.521880298239757,
      "grad_norm": 0.20360219478607178,
      "learning_rate": 4.9888971620223645e-06,
      "loss": 0.0186,
      "step": 1541000
    },
    {
      "epoch": 2.5219130286784104,
      "grad_norm": 0.07633476704359055,
      "learning_rate": 4.988831269808847e-06,
      "loss": 0.0115,
      "step": 1541020
    },
    {
      "epoch": 2.5219457591170635,
      "grad_norm": 0.22792723774909973,
      "learning_rate": 4.98876537759533e-06,
      "loss": 0.0162,
      "step": 1541040
    },
    {
      "epoch": 2.521978489555717,
      "grad_norm": 0.13735152781009674,
      "learning_rate": 4.988699485381813e-06,
      "loss": 0.0116,
      "step": 1541060
    },
    {
      "epoch": 2.5220112199943703,
      "grad_norm": 0.23888003826141357,
      "learning_rate": 4.9886335931682955e-06,
      "loss": 0.0172,
      "step": 1541080
    },
    {
      "epoch": 2.522043950433024,
      "grad_norm": 1.164315938949585,
      "learning_rate": 4.988567700954778e-06,
      "loss": 0.0185,
      "step": 1541100
    },
    {
      "epoch": 2.522076680871677,
      "grad_norm": 0.36458146572113037,
      "learning_rate": 4.988501808741261e-06,
      "loss": 0.0202,
      "step": 1541120
    },
    {
      "epoch": 2.52210941131033,
      "grad_norm": 0.7480153441429138,
      "learning_rate": 4.988435916527744e-06,
      "loss": 0.0119,
      "step": 1541140
    },
    {
      "epoch": 2.5221421417489838,
      "grad_norm": 0.7981131076812744,
      "learning_rate": 4.988370024314227e-06,
      "loss": 0.018,
      "step": 1541160
    },
    {
      "epoch": 2.522174872187637,
      "grad_norm": 0.28364187479019165,
      "learning_rate": 4.98830413210071e-06,
      "loss": 0.0155,
      "step": 1541180
    },
    {
      "epoch": 2.5222076026262905,
      "grad_norm": 0.2751639485359192,
      "learning_rate": 4.988238239887193e-06,
      "loss": 0.0102,
      "step": 1541200
    },
    {
      "epoch": 2.5222403330649437,
      "grad_norm": 0.8045433163642883,
      "learning_rate": 4.988172347673676e-06,
      "loss": 0.0104,
      "step": 1541220
    },
    {
      "epoch": 2.5222730635035973,
      "grad_norm": 0.4088490605354309,
      "learning_rate": 4.988106455460159e-06,
      "loss": 0.015,
      "step": 1541240
    },
    {
      "epoch": 2.5223057939422504,
      "grad_norm": 0.4155444800853729,
      "learning_rate": 4.988040563246642e-06,
      "loss": 0.0117,
      "step": 1541260
    },
    {
      "epoch": 2.5223385243809036,
      "grad_norm": 0.7276692986488342,
      "learning_rate": 4.9879746710331246e-06,
      "loss": 0.013,
      "step": 1541280
    },
    {
      "epoch": 2.522371254819557,
      "grad_norm": 1.1066911220550537,
      "learning_rate": 4.987908778819607e-06,
      "loss": 0.0139,
      "step": 1541300
    },
    {
      "epoch": 2.5224039852582103,
      "grad_norm": 1.053300380706787,
      "learning_rate": 4.98784288660609e-06,
      "loss": 0.0192,
      "step": 1541320
    },
    {
      "epoch": 2.522436715696864,
      "grad_norm": 0.6099364161491394,
      "learning_rate": 4.987776994392573e-06,
      "loss": 0.0202,
      "step": 1541340
    },
    {
      "epoch": 2.522469446135517,
      "grad_norm": 0.1834849715232849,
      "learning_rate": 4.9877111021790555e-06,
      "loss": 0.0171,
      "step": 1541360
    },
    {
      "epoch": 2.5225021765741706,
      "grad_norm": 0.20924246311187744,
      "learning_rate": 4.987645209965539e-06,
      "loss": 0.0195,
      "step": 1541380
    },
    {
      "epoch": 2.522534907012824,
      "grad_norm": 0.4147450625896454,
      "learning_rate": 4.987579317752022e-06,
      "loss": 0.0269,
      "step": 1541400
    },
    {
      "epoch": 2.522567637451477,
      "grad_norm": 0.4485451877117157,
      "learning_rate": 4.987513425538505e-06,
      "loss": 0.0098,
      "step": 1541420
    },
    {
      "epoch": 2.5226003678901305,
      "grad_norm": 1.4606655836105347,
      "learning_rate": 4.987447533324987e-06,
      "loss": 0.0169,
      "step": 1541440
    },
    {
      "epoch": 2.5226330983287837,
      "grad_norm": 0.11814729124307632,
      "learning_rate": 4.98738164111147e-06,
      "loss": 0.016,
      "step": 1541460
    },
    {
      "epoch": 2.5226658287674373,
      "grad_norm": 1.697184443473816,
      "learning_rate": 4.987315748897953e-06,
      "loss": 0.0117,
      "step": 1541480
    },
    {
      "epoch": 2.5226985592060904,
      "grad_norm": 0.7125070691108704,
      "learning_rate": 4.9872498566844355e-06,
      "loss": 0.015,
      "step": 1541500
    },
    {
      "epoch": 2.522731289644744,
      "grad_norm": 0.3958716094493866,
      "learning_rate": 4.987183964470919e-06,
      "loss": 0.0128,
      "step": 1541520
    },
    {
      "epoch": 2.522764020083397,
      "grad_norm": 0.6954978108406067,
      "learning_rate": 4.987118072257402e-06,
      "loss": 0.0105,
      "step": 1541540
    },
    {
      "epoch": 2.5227967505220503,
      "grad_norm": 0.3112518787384033,
      "learning_rate": 4.987052180043885e-06,
      "loss": 0.015,
      "step": 1541560
    },
    {
      "epoch": 2.522829480960704,
      "grad_norm": 0.7002025246620178,
      "learning_rate": 4.986986287830367e-06,
      "loss": 0.0192,
      "step": 1541580
    },
    {
      "epoch": 2.522862211399357,
      "grad_norm": 0.09302710741758347,
      "learning_rate": 4.98692039561685e-06,
      "loss": 0.0124,
      "step": 1541600
    },
    {
      "epoch": 2.5228949418380107,
      "grad_norm": 0.3372564911842346,
      "learning_rate": 4.986854503403334e-06,
      "loss": 0.0096,
      "step": 1541620
    },
    {
      "epoch": 2.522927672276664,
      "grad_norm": 0.19696328043937683,
      "learning_rate": 4.986788611189816e-06,
      "loss": 0.0101,
      "step": 1541640
    },
    {
      "epoch": 2.5229604027153174,
      "grad_norm": 0.23063115775585175,
      "learning_rate": 4.986722718976299e-06,
      "loss": 0.0142,
      "step": 1541660
    },
    {
      "epoch": 2.5229931331539706,
      "grad_norm": 0.2970322072505951,
      "learning_rate": 4.986656826762782e-06,
      "loss": 0.0139,
      "step": 1541680
    },
    {
      "epoch": 2.5230258635926237,
      "grad_norm": 0.27069154381752014,
      "learning_rate": 4.986590934549265e-06,
      "loss": 0.0107,
      "step": 1541700
    },
    {
      "epoch": 2.5230585940312773,
      "grad_norm": 0.3628811538219452,
      "learning_rate": 4.986525042335747e-06,
      "loss": 0.0147,
      "step": 1541720
    },
    {
      "epoch": 2.5230913244699305,
      "grad_norm": 0.33558931946754456,
      "learning_rate": 4.98645915012223e-06,
      "loss": 0.0166,
      "step": 1541740
    },
    {
      "epoch": 2.523124054908584,
      "grad_norm": 0.17189976572990417,
      "learning_rate": 4.986393257908713e-06,
      "loss": 0.0217,
      "step": 1541760
    },
    {
      "epoch": 2.523156785347237,
      "grad_norm": 0.6994103193283081,
      "learning_rate": 4.9863273656951964e-06,
      "loss": 0.0105,
      "step": 1541780
    },
    {
      "epoch": 2.523189515785891,
      "grad_norm": 0.1618470400571823,
      "learning_rate": 4.986261473481679e-06,
      "loss": 0.0097,
      "step": 1541800
    },
    {
      "epoch": 2.523222246224544,
      "grad_norm": 0.4805004894733429,
      "learning_rate": 4.986195581268162e-06,
      "loss": 0.0126,
      "step": 1541820
    },
    {
      "epoch": 2.523254976663197,
      "grad_norm": 0.0929512158036232,
      "learning_rate": 4.986129689054645e-06,
      "loss": 0.0184,
      "step": 1541840
    },
    {
      "epoch": 2.5232877071018507,
      "grad_norm": 1.6443848609924316,
      "learning_rate": 4.986063796841128e-06,
      "loss": 0.0107,
      "step": 1541860
    },
    {
      "epoch": 2.523320437540504,
      "grad_norm": 0.14639310538768768,
      "learning_rate": 4.985997904627611e-06,
      "loss": 0.0079,
      "step": 1541880
    },
    {
      "epoch": 2.523353167979157,
      "grad_norm": 0.4837663769721985,
      "learning_rate": 4.985932012414094e-06,
      "loss": 0.012,
      "step": 1541900
    },
    {
      "epoch": 2.5233858984178106,
      "grad_norm": 0.5888127684593201,
      "learning_rate": 4.9858661202005765e-06,
      "loss": 0.0155,
      "step": 1541920
    },
    {
      "epoch": 2.523418628856464,
      "grad_norm": 0.934638500213623,
      "learning_rate": 4.985800227987059e-06,
      "loss": 0.0163,
      "step": 1541940
    },
    {
      "epoch": 2.5234513592951173,
      "grad_norm": 0.8332825899124146,
      "learning_rate": 4.985734335773542e-06,
      "loss": 0.013,
      "step": 1541960
    },
    {
      "epoch": 2.5234840897337705,
      "grad_norm": 0.3095053732395172,
      "learning_rate": 4.985668443560025e-06,
      "loss": 0.0187,
      "step": 1541980
    },
    {
      "epoch": 2.523516820172424,
      "grad_norm": 0.41115784645080566,
      "learning_rate": 4.985602551346507e-06,
      "loss": 0.0121,
      "step": 1542000
    },
    {
      "epoch": 2.523549550611077,
      "grad_norm": 0.5840147733688354,
      "learning_rate": 4.985536659132991e-06,
      "loss": 0.0165,
      "step": 1542020
    },
    {
      "epoch": 2.5235822810497304,
      "grad_norm": 0.19803006947040558,
      "learning_rate": 4.985470766919474e-06,
      "loss": 0.0124,
      "step": 1542040
    },
    {
      "epoch": 2.523615011488384,
      "grad_norm": 0.1816553771495819,
      "learning_rate": 4.9854048747059565e-06,
      "loss": 0.011,
      "step": 1542060
    },
    {
      "epoch": 2.5236477419270376,
      "grad_norm": 0.2283233404159546,
      "learning_rate": 4.985338982492439e-06,
      "loss": 0.0154,
      "step": 1542080
    },
    {
      "epoch": 2.5236804723656907,
      "grad_norm": 0.49674931168556213,
      "learning_rate": 4.985273090278922e-06,
      "loss": 0.0176,
      "step": 1542100
    },
    {
      "epoch": 2.523713202804344,
      "grad_norm": 0.9027132391929626,
      "learning_rate": 4.985207198065405e-06,
      "loss": 0.0136,
      "step": 1542120
    },
    {
      "epoch": 2.5237459332429975,
      "grad_norm": 0.3181150257587433,
      "learning_rate": 4.9851413058518874e-06,
      "loss": 0.0142,
      "step": 1542140
    },
    {
      "epoch": 2.5237786636816506,
      "grad_norm": 0.2840631604194641,
      "learning_rate": 4.98507541363837e-06,
      "loss": 0.0173,
      "step": 1542160
    },
    {
      "epoch": 2.5238113941203038,
      "grad_norm": 0.18166328966617584,
      "learning_rate": 4.985009521424854e-06,
      "loss": 0.0221,
      "step": 1542180
    },
    {
      "epoch": 2.5238441245589573,
      "grad_norm": 0.9590928554534912,
      "learning_rate": 4.9849436292113365e-06,
      "loss": 0.0113,
      "step": 1542200
    },
    {
      "epoch": 2.523876854997611,
      "grad_norm": 0.4178318679332733,
      "learning_rate": 4.984877736997819e-06,
      "loss": 0.0143,
      "step": 1542220
    },
    {
      "epoch": 2.523909585436264,
      "grad_norm": 0.3069649040699005,
      "learning_rate": 4.984811844784303e-06,
      "loss": 0.0144,
      "step": 1542240
    },
    {
      "epoch": 2.5239423158749172,
      "grad_norm": 0.9409484267234802,
      "learning_rate": 4.984745952570786e-06,
      "loss": 0.0215,
      "step": 1542260
    },
    {
      "epoch": 2.523975046313571,
      "grad_norm": 0.7378526329994202,
      "learning_rate": 4.984680060357268e-06,
      "loss": 0.0134,
      "step": 1542280
    },
    {
      "epoch": 2.524007776752224,
      "grad_norm": 0.5010882616043091,
      "learning_rate": 4.984614168143751e-06,
      "loss": 0.0131,
      "step": 1542300
    },
    {
      "epoch": 2.524040507190877,
      "grad_norm": 0.48033657670021057,
      "learning_rate": 4.984548275930234e-06,
      "loss": 0.0119,
      "step": 1542320
    },
    {
      "epoch": 2.5240732376295307,
      "grad_norm": 0.5271667838096619,
      "learning_rate": 4.9844823837167165e-06,
      "loss": 0.0171,
      "step": 1542340
    },
    {
      "epoch": 2.5241059680681843,
      "grad_norm": 0.3990808129310608,
      "learning_rate": 4.984416491503199e-06,
      "loss": 0.019,
      "step": 1542360
    },
    {
      "epoch": 2.5241386985068375,
      "grad_norm": 0.4820009469985962,
      "learning_rate": 4.984350599289682e-06,
      "loss": 0.0131,
      "step": 1542380
    },
    {
      "epoch": 2.5241714289454906,
      "grad_norm": 0.17489278316497803,
      "learning_rate": 4.984284707076166e-06,
      "loss": 0.0167,
      "step": 1542400
    },
    {
      "epoch": 2.524204159384144,
      "grad_norm": 0.07229254394769669,
      "learning_rate": 4.984218814862648e-06,
      "loss": 0.0189,
      "step": 1542420
    },
    {
      "epoch": 2.5242368898227974,
      "grad_norm": 0.3581947684288025,
      "learning_rate": 4.984152922649131e-06,
      "loss": 0.0176,
      "step": 1542440
    },
    {
      "epoch": 2.5242696202614505,
      "grad_norm": 0.3471672534942627,
      "learning_rate": 4.984087030435614e-06,
      "loss": 0.01,
      "step": 1542460
    },
    {
      "epoch": 2.524302350700104,
      "grad_norm": 0.39300647377967834,
      "learning_rate": 4.9840211382220966e-06,
      "loss": 0.0185,
      "step": 1542480
    },
    {
      "epoch": 2.5243350811387573,
      "grad_norm": 0.24407054483890533,
      "learning_rate": 4.983955246008579e-06,
      "loss": 0.0114,
      "step": 1542500
    },
    {
      "epoch": 2.524367811577411,
      "grad_norm": 0.30694571137428284,
      "learning_rate": 4.983889353795062e-06,
      "loss": 0.0167,
      "step": 1542520
    },
    {
      "epoch": 2.524400542016064,
      "grad_norm": 0.6218721866607666,
      "learning_rate": 4.983823461581545e-06,
      "loss": 0.022,
      "step": 1542540
    },
    {
      "epoch": 2.5244332724547176,
      "grad_norm": 0.3157583177089691,
      "learning_rate": 4.983757569368028e-06,
      "loss": 0.0105,
      "step": 1542560
    },
    {
      "epoch": 2.5244660028933708,
      "grad_norm": 0.17792731523513794,
      "learning_rate": 4.983691677154511e-06,
      "loss": 0.0189,
      "step": 1542580
    },
    {
      "epoch": 2.524498733332024,
      "grad_norm": 0.5837593674659729,
      "learning_rate": 4.983625784940994e-06,
      "loss": 0.016,
      "step": 1542600
    },
    {
      "epoch": 2.5245314637706775,
      "grad_norm": 0.36812442541122437,
      "learning_rate": 4.983559892727477e-06,
      "loss": 0.0159,
      "step": 1542620
    },
    {
      "epoch": 2.5245641942093306,
      "grad_norm": 0.5129221081733704,
      "learning_rate": 4.98349400051396e-06,
      "loss": 0.0111,
      "step": 1542640
    },
    {
      "epoch": 2.5245969246479842,
      "grad_norm": 0.3360251784324646,
      "learning_rate": 4.983428108300443e-06,
      "loss": 0.0111,
      "step": 1542660
    },
    {
      "epoch": 2.5246296550866374,
      "grad_norm": 0.3767537474632263,
      "learning_rate": 4.983362216086926e-06,
      "loss": 0.0121,
      "step": 1542680
    },
    {
      "epoch": 2.524662385525291,
      "grad_norm": 1.5151115655899048,
      "learning_rate": 4.983296323873408e-06,
      "loss": 0.0184,
      "step": 1542700
    },
    {
      "epoch": 2.524695115963944,
      "grad_norm": 0.37306350469589233,
      "learning_rate": 4.983230431659891e-06,
      "loss": 0.0208,
      "step": 1542720
    },
    {
      "epoch": 2.5247278464025973,
      "grad_norm": 0.1826077699661255,
      "learning_rate": 4.983164539446374e-06,
      "loss": 0.0119,
      "step": 1542740
    },
    {
      "epoch": 2.524760576841251,
      "grad_norm": 0.6123834252357483,
      "learning_rate": 4.983098647232857e-06,
      "loss": 0.0199,
      "step": 1542760
    },
    {
      "epoch": 2.524793307279904,
      "grad_norm": 0.3394404649734497,
      "learning_rate": 4.983032755019339e-06,
      "loss": 0.0154,
      "step": 1542780
    },
    {
      "epoch": 2.5248260377185576,
      "grad_norm": 0.24712102115154266,
      "learning_rate": 4.982966862805823e-06,
      "loss": 0.0151,
      "step": 1542800
    },
    {
      "epoch": 2.5248587681572108,
      "grad_norm": 0.5348937511444092,
      "learning_rate": 4.982900970592306e-06,
      "loss": 0.0152,
      "step": 1542820
    },
    {
      "epoch": 2.5248914985958644,
      "grad_norm": 1.2036081552505493,
      "learning_rate": 4.982835078378788e-06,
      "loss": 0.0168,
      "step": 1542840
    },
    {
      "epoch": 2.5249242290345175,
      "grad_norm": 0.2997637093067169,
      "learning_rate": 4.982769186165271e-06,
      "loss": 0.0116,
      "step": 1542860
    },
    {
      "epoch": 2.5249569594731707,
      "grad_norm": 0.9088324904441833,
      "learning_rate": 4.982703293951754e-06,
      "loss": 0.0199,
      "step": 1542880
    },
    {
      "epoch": 2.5249896899118243,
      "grad_norm": 1.0098764896392822,
      "learning_rate": 4.982637401738237e-06,
      "loss": 0.0146,
      "step": 1542900
    },
    {
      "epoch": 2.5250224203504774,
      "grad_norm": 0.3840639293193817,
      "learning_rate": 4.98257150952472e-06,
      "loss": 0.0139,
      "step": 1542920
    },
    {
      "epoch": 2.525055150789131,
      "grad_norm": 0.8529236316680908,
      "learning_rate": 4.982505617311203e-06,
      "loss": 0.0188,
      "step": 1542940
    },
    {
      "epoch": 2.525087881227784,
      "grad_norm": 0.14348271489143372,
      "learning_rate": 4.982439725097686e-06,
      "loss": 0.0129,
      "step": 1542960
    },
    {
      "epoch": 2.5251206116664378,
      "grad_norm": 0.1011543944478035,
      "learning_rate": 4.9823738328841684e-06,
      "loss": 0.0089,
      "step": 1542980
    },
    {
      "epoch": 2.525153342105091,
      "grad_norm": 0.21675245463848114,
      "learning_rate": 4.982307940670651e-06,
      "loss": 0.0194,
      "step": 1543000
    },
    {
      "epoch": 2.525186072543744,
      "grad_norm": 0.867494523525238,
      "learning_rate": 4.982242048457134e-06,
      "loss": 0.0116,
      "step": 1543020
    },
    {
      "epoch": 2.5252188029823976,
      "grad_norm": 0.2015678882598877,
      "learning_rate": 4.9821761562436175e-06,
      "loss": 0.0154,
      "step": 1543040
    },
    {
      "epoch": 2.525251533421051,
      "grad_norm": 0.3283163607120514,
      "learning_rate": 4.9821102640301e-06,
      "loss": 0.0168,
      "step": 1543060
    },
    {
      "epoch": 2.5252842638597044,
      "grad_norm": 0.7302144765853882,
      "learning_rate": 4.982044371816583e-06,
      "loss": 0.0153,
      "step": 1543080
    },
    {
      "epoch": 2.5253169942983575,
      "grad_norm": 0.11321074515581131,
      "learning_rate": 4.981978479603066e-06,
      "loss": 0.0127,
      "step": 1543100
    },
    {
      "epoch": 2.525349724737011,
      "grad_norm": 0.26283442974090576,
      "learning_rate": 4.9819125873895485e-06,
      "loss": 0.0117,
      "step": 1543120
    },
    {
      "epoch": 2.5253824551756643,
      "grad_norm": 0.7782779335975647,
      "learning_rate": 4.981846695176031e-06,
      "loss": 0.0153,
      "step": 1543140
    },
    {
      "epoch": 2.5254151856143174,
      "grad_norm": 0.14915068447589874,
      "learning_rate": 4.981780802962514e-06,
      "loss": 0.014,
      "step": 1543160
    },
    {
      "epoch": 2.525447916052971,
      "grad_norm": 0.1559518724679947,
      "learning_rate": 4.981714910748997e-06,
      "loss": 0.0151,
      "step": 1543180
    },
    {
      "epoch": 2.525480646491624,
      "grad_norm": 0.4910062551498413,
      "learning_rate": 4.98164901853548e-06,
      "loss": 0.0124,
      "step": 1543200
    },
    {
      "epoch": 2.5255133769302778,
      "grad_norm": 0.12831848859786987,
      "learning_rate": 4.981583126321963e-06,
      "loss": 0.0138,
      "step": 1543220
    },
    {
      "epoch": 2.525546107368931,
      "grad_norm": 0.35950636863708496,
      "learning_rate": 4.981517234108446e-06,
      "loss": 0.0136,
      "step": 1543240
    },
    {
      "epoch": 2.5255788378075845,
      "grad_norm": 0.18474410474300385,
      "learning_rate": 4.981451341894929e-06,
      "loss": 0.0127,
      "step": 1543260
    },
    {
      "epoch": 2.5256115682462377,
      "grad_norm": 0.4636377692222595,
      "learning_rate": 4.981385449681412e-06,
      "loss": 0.0139,
      "step": 1543280
    },
    {
      "epoch": 2.525644298684891,
      "grad_norm": 3.9623966217041016,
      "learning_rate": 4.981319557467895e-06,
      "loss": 0.0173,
      "step": 1543300
    },
    {
      "epoch": 2.5256770291235444,
      "grad_norm": 0.3345381021499634,
      "learning_rate": 4.9812536652543776e-06,
      "loss": 0.01,
      "step": 1543320
    },
    {
      "epoch": 2.5257097595621976,
      "grad_norm": 0.25413236021995544,
      "learning_rate": 4.98118777304086e-06,
      "loss": 0.0109,
      "step": 1543340
    },
    {
      "epoch": 2.5257424900008507,
      "grad_norm": 1.2409483194351196,
      "learning_rate": 4.981121880827343e-06,
      "loss": 0.0158,
      "step": 1543360
    },
    {
      "epoch": 2.5257752204395043,
      "grad_norm": 0.5900665521621704,
      "learning_rate": 4.981055988613826e-06,
      "loss": 0.0139,
      "step": 1543380
    },
    {
      "epoch": 2.525807950878158,
      "grad_norm": 0.9930410981178284,
      "learning_rate": 4.9809900964003085e-06,
      "loss": 0.0163,
      "step": 1543400
    },
    {
      "epoch": 2.525840681316811,
      "grad_norm": 0.419058233499527,
      "learning_rate": 4.980924204186791e-06,
      "loss": 0.0115,
      "step": 1543420
    },
    {
      "epoch": 2.525873411755464,
      "grad_norm": 0.11100338399410248,
      "learning_rate": 4.980858311973275e-06,
      "loss": 0.0155,
      "step": 1543440
    },
    {
      "epoch": 2.525906142194118,
      "grad_norm": 0.394391804933548,
      "learning_rate": 4.980792419759758e-06,
      "loss": 0.0129,
      "step": 1543460
    },
    {
      "epoch": 2.525938872632771,
      "grad_norm": 0.30310094356536865,
      "learning_rate": 4.98072652754624e-06,
      "loss": 0.0182,
      "step": 1543480
    },
    {
      "epoch": 2.525971603071424,
      "grad_norm": 0.24193689227104187,
      "learning_rate": 4.980660635332723e-06,
      "loss": 0.0179,
      "step": 1543500
    },
    {
      "epoch": 2.5260043335100777,
      "grad_norm": 1.1122015714645386,
      "learning_rate": 4.980594743119206e-06,
      "loss": 0.0204,
      "step": 1543520
    },
    {
      "epoch": 2.5260370639487313,
      "grad_norm": 0.5042914748191833,
      "learning_rate": 4.9805288509056885e-06,
      "loss": 0.0213,
      "step": 1543540
    },
    {
      "epoch": 2.5260697943873844,
      "grad_norm": 0.3958643972873688,
      "learning_rate": 4.980462958692171e-06,
      "loss": 0.0167,
      "step": 1543560
    },
    {
      "epoch": 2.5261025248260376,
      "grad_norm": 0.6223376393318176,
      "learning_rate": 4.980397066478654e-06,
      "loss": 0.0131,
      "step": 1543580
    },
    {
      "epoch": 2.526135255264691,
      "grad_norm": 0.15378771722316742,
      "learning_rate": 4.980331174265138e-06,
      "loss": 0.0177,
      "step": 1543600
    },
    {
      "epoch": 2.5261679857033443,
      "grad_norm": 0.4218533933162689,
      "learning_rate": 4.98026528205162e-06,
      "loss": 0.0155,
      "step": 1543620
    },
    {
      "epoch": 2.5262007161419975,
      "grad_norm": 0.20449897646903992,
      "learning_rate": 4.980199389838103e-06,
      "loss": 0.0138,
      "step": 1543640
    },
    {
      "epoch": 2.526233446580651,
      "grad_norm": 0.9120463132858276,
      "learning_rate": 4.980133497624587e-06,
      "loss": 0.0137,
      "step": 1543660
    },
    {
      "epoch": 2.5262661770193047,
      "grad_norm": 0.32553765177726746,
      "learning_rate": 4.980067605411069e-06,
      "loss": 0.0107,
      "step": 1543680
    },
    {
      "epoch": 2.526298907457958,
      "grad_norm": 1.864884614944458,
      "learning_rate": 4.980001713197552e-06,
      "loss": 0.0196,
      "step": 1543700
    },
    {
      "epoch": 2.526331637896611,
      "grad_norm": 0.12924280762672424,
      "learning_rate": 4.979935820984035e-06,
      "loss": 0.0113,
      "step": 1543720
    },
    {
      "epoch": 2.5263643683352646,
      "grad_norm": 0.37971824407577515,
      "learning_rate": 4.979869928770518e-06,
      "loss": 0.0105,
      "step": 1543740
    },
    {
      "epoch": 2.5263970987739177,
      "grad_norm": 0.5273966789245605,
      "learning_rate": 4.979804036557e-06,
      "loss": 0.0154,
      "step": 1543760
    },
    {
      "epoch": 2.526429829212571,
      "grad_norm": 0.2656555771827698,
      "learning_rate": 4.979738144343483e-06,
      "loss": 0.016,
      "step": 1543780
    },
    {
      "epoch": 2.5264625596512245,
      "grad_norm": 0.3764599859714508,
      "learning_rate": 4.979672252129966e-06,
      "loss": 0.0146,
      "step": 1543800
    },
    {
      "epoch": 2.526495290089878,
      "grad_norm": 0.19038988649845123,
      "learning_rate": 4.9796063599164494e-06,
      "loss": 0.0132,
      "step": 1543820
    },
    {
      "epoch": 2.526528020528531,
      "grad_norm": 0.4238314926624298,
      "learning_rate": 4.979540467702932e-06,
      "loss": 0.0212,
      "step": 1543840
    },
    {
      "epoch": 2.5265607509671844,
      "grad_norm": 0.7719257473945618,
      "learning_rate": 4.979474575489415e-06,
      "loss": 0.015,
      "step": 1543860
    },
    {
      "epoch": 2.526593481405838,
      "grad_norm": 0.2123480588197708,
      "learning_rate": 4.979408683275898e-06,
      "loss": 0.0162,
      "step": 1543880
    },
    {
      "epoch": 2.526626211844491,
      "grad_norm": 0.30770209431648254,
      "learning_rate": 4.97934279106238e-06,
      "loss": 0.0137,
      "step": 1543900
    },
    {
      "epoch": 2.5266589422831442,
      "grad_norm": 0.8545365929603577,
      "learning_rate": 4.979276898848863e-06,
      "loss": 0.016,
      "step": 1543920
    },
    {
      "epoch": 2.526691672721798,
      "grad_norm": 2.1069905757904053,
      "learning_rate": 4.979211006635346e-06,
      "loss": 0.02,
      "step": 1543940
    },
    {
      "epoch": 2.526724403160451,
      "grad_norm": 0.23855739831924438,
      "learning_rate": 4.979145114421829e-06,
      "loss": 0.0117,
      "step": 1543960
    },
    {
      "epoch": 2.5267571335991046,
      "grad_norm": 0.6215586066246033,
      "learning_rate": 4.979079222208312e-06,
      "loss": 0.0139,
      "step": 1543980
    },
    {
      "epoch": 2.5267898640377577,
      "grad_norm": 0.25714683532714844,
      "learning_rate": 4.979013329994795e-06,
      "loss": 0.0154,
      "step": 1544000
    },
    {
      "epoch": 2.5268225944764113,
      "grad_norm": 1.0140507221221924,
      "learning_rate": 4.978947437781278e-06,
      "loss": 0.017,
      "step": 1544020
    },
    {
      "epoch": 2.5268553249150645,
      "grad_norm": 0.09567804634571075,
      "learning_rate": 4.9788815455677604e-06,
      "loss": 0.0107,
      "step": 1544040
    },
    {
      "epoch": 2.5268880553537176,
      "grad_norm": 0.34329938888549805,
      "learning_rate": 4.978815653354244e-06,
      "loss": 0.0166,
      "step": 1544060
    },
    {
      "epoch": 2.5269207857923712,
      "grad_norm": 0.2383546531200409,
      "learning_rate": 4.978749761140727e-06,
      "loss": 0.0147,
      "step": 1544080
    },
    {
      "epoch": 2.5269535162310244,
      "grad_norm": 0.49346303939819336,
      "learning_rate": 4.9786838689272095e-06,
      "loss": 0.0172,
      "step": 1544100
    },
    {
      "epoch": 2.526986246669678,
      "grad_norm": 0.2513769865036011,
      "learning_rate": 4.978617976713692e-06,
      "loss": 0.0143,
      "step": 1544120
    },
    {
      "epoch": 2.527018977108331,
      "grad_norm": 0.5060627460479736,
      "learning_rate": 4.978552084500175e-06,
      "loss": 0.0129,
      "step": 1544140
    },
    {
      "epoch": 2.5270517075469847,
      "grad_norm": 0.6003348231315613,
      "learning_rate": 4.978486192286658e-06,
      "loss": 0.0167,
      "step": 1544160
    },
    {
      "epoch": 2.527084437985638,
      "grad_norm": 0.519356906414032,
      "learning_rate": 4.9784203000731405e-06,
      "loss": 0.0128,
      "step": 1544180
    },
    {
      "epoch": 2.527117168424291,
      "grad_norm": 0.1473526954650879,
      "learning_rate": 4.978354407859623e-06,
      "loss": 0.0135,
      "step": 1544200
    },
    {
      "epoch": 2.5271498988629446,
      "grad_norm": 0.6038144826889038,
      "learning_rate": 4.978288515646107e-06,
      "loss": 0.0112,
      "step": 1544220
    },
    {
      "epoch": 2.5271826293015978,
      "grad_norm": 0.5062997341156006,
      "learning_rate": 4.9782226234325895e-06,
      "loss": 0.0152,
      "step": 1544240
    },
    {
      "epoch": 2.5272153597402514,
      "grad_norm": 0.20502540469169617,
      "learning_rate": 4.978156731219072e-06,
      "loss": 0.0133,
      "step": 1544260
    },
    {
      "epoch": 2.5272480901789045,
      "grad_norm": 0.27500247955322266,
      "learning_rate": 4.978090839005555e-06,
      "loss": 0.0145,
      "step": 1544280
    },
    {
      "epoch": 2.527280820617558,
      "grad_norm": 0.7040948867797852,
      "learning_rate": 4.978024946792038e-06,
      "loss": 0.0115,
      "step": 1544300
    },
    {
      "epoch": 2.5273135510562112,
      "grad_norm": 0.24741511046886444,
      "learning_rate": 4.977959054578521e-06,
      "loss": 0.0144,
      "step": 1544320
    },
    {
      "epoch": 2.5273462814948644,
      "grad_norm": 0.09974939376115799,
      "learning_rate": 4.977893162365004e-06,
      "loss": 0.0098,
      "step": 1544340
    },
    {
      "epoch": 2.527379011933518,
      "grad_norm": 0.12441935390233994,
      "learning_rate": 4.977827270151487e-06,
      "loss": 0.0202,
      "step": 1544360
    },
    {
      "epoch": 2.527411742372171,
      "grad_norm": 0.183260977268219,
      "learning_rate": 4.9777613779379695e-06,
      "loss": 0.0128,
      "step": 1544380
    },
    {
      "epoch": 2.5274444728108247,
      "grad_norm": 2.8084065914154053,
      "learning_rate": 4.977695485724452e-06,
      "loss": 0.0168,
      "step": 1544400
    },
    {
      "epoch": 2.527477203249478,
      "grad_norm": 0.3483926057815552,
      "learning_rate": 4.977629593510935e-06,
      "loss": 0.0137,
      "step": 1544420
    },
    {
      "epoch": 2.5275099336881315,
      "grad_norm": 0.40297675132751465,
      "learning_rate": 4.977563701297418e-06,
      "loss": 0.0204,
      "step": 1544440
    },
    {
      "epoch": 2.5275426641267846,
      "grad_norm": 0.6732840538024902,
      "learning_rate": 4.977497809083901e-06,
      "loss": 0.0129,
      "step": 1544460
    },
    {
      "epoch": 2.527575394565438,
      "grad_norm": 0.1478603035211563,
      "learning_rate": 4.977431916870384e-06,
      "loss": 0.0147,
      "step": 1544480
    },
    {
      "epoch": 2.5276081250040914,
      "grad_norm": 0.2960394322872162,
      "learning_rate": 4.977366024656867e-06,
      "loss": 0.01,
      "step": 1544500
    },
    {
      "epoch": 2.5276408554427445,
      "grad_norm": 0.7544108629226685,
      "learning_rate": 4.9773001324433496e-06,
      "loss": 0.0111,
      "step": 1544520
    },
    {
      "epoch": 2.527673585881398,
      "grad_norm": 0.3426794707775116,
      "learning_rate": 4.977234240229832e-06,
      "loss": 0.0132,
      "step": 1544540
    },
    {
      "epoch": 2.5277063163200513,
      "grad_norm": 0.2612368166446686,
      "learning_rate": 4.977168348016315e-06,
      "loss": 0.0176,
      "step": 1544560
    },
    {
      "epoch": 2.527739046758705,
      "grad_norm": 0.44208958745002747,
      "learning_rate": 4.977102455802798e-06,
      "loss": 0.0168,
      "step": 1544580
    },
    {
      "epoch": 2.527771777197358,
      "grad_norm": 0.43030282855033875,
      "learning_rate": 4.9770365635892805e-06,
      "loss": 0.015,
      "step": 1544600
    },
    {
      "epoch": 2.527804507636011,
      "grad_norm": 0.2549237012863159,
      "learning_rate": 4.976970671375764e-06,
      "loss": 0.0137,
      "step": 1544620
    },
    {
      "epoch": 2.5278372380746648,
      "grad_norm": 0.18914322555065155,
      "learning_rate": 4.976904779162247e-06,
      "loss": 0.017,
      "step": 1544640
    },
    {
      "epoch": 2.527869968513318,
      "grad_norm": 0.7764909863471985,
      "learning_rate": 4.97683888694873e-06,
      "loss": 0.0133,
      "step": 1544660
    },
    {
      "epoch": 2.5279026989519715,
      "grad_norm": 0.42936819791793823,
      "learning_rate": 4.976772994735213e-06,
      "loss": 0.0137,
      "step": 1544680
    },
    {
      "epoch": 2.5279354293906247,
      "grad_norm": 0.5800363421440125,
      "learning_rate": 4.976707102521696e-06,
      "loss": 0.0136,
      "step": 1544700
    },
    {
      "epoch": 2.5279681598292782,
      "grad_norm": 0.5063924789428711,
      "learning_rate": 4.976641210308179e-06,
      "loss": 0.013,
      "step": 1544720
    },
    {
      "epoch": 2.5280008902679314,
      "grad_norm": 0.3633871078491211,
      "learning_rate": 4.976575318094661e-06,
      "loss": 0.0147,
      "step": 1544740
    },
    {
      "epoch": 2.5280336207065845,
      "grad_norm": 0.8250985145568848,
      "learning_rate": 4.976509425881144e-06,
      "loss": 0.0137,
      "step": 1544760
    },
    {
      "epoch": 2.528066351145238,
      "grad_norm": 0.2803516387939453,
      "learning_rate": 4.976443533667627e-06,
      "loss": 0.0172,
      "step": 1544780
    },
    {
      "epoch": 2.5280990815838913,
      "grad_norm": 0.5425081253051758,
      "learning_rate": 4.97637764145411e-06,
      "loss": 0.0141,
      "step": 1544800
    },
    {
      "epoch": 2.528131812022545,
      "grad_norm": 0.6305177807807922,
      "learning_rate": 4.976311749240592e-06,
      "loss": 0.0112,
      "step": 1544820
    },
    {
      "epoch": 2.528164542461198,
      "grad_norm": 0.1935962289571762,
      "learning_rate": 4.976245857027076e-06,
      "loss": 0.012,
      "step": 1544840
    },
    {
      "epoch": 2.5281972728998516,
      "grad_norm": 0.24728383123874664,
      "learning_rate": 4.976179964813559e-06,
      "loss": 0.0158,
      "step": 1544860
    },
    {
      "epoch": 2.528230003338505,
      "grad_norm": 0.27745160460472107,
      "learning_rate": 4.9761140726000414e-06,
      "loss": 0.0156,
      "step": 1544880
    },
    {
      "epoch": 2.528262733777158,
      "grad_norm": 0.6073769330978394,
      "learning_rate": 4.976048180386524e-06,
      "loss": 0.0133,
      "step": 1544900
    },
    {
      "epoch": 2.5282954642158115,
      "grad_norm": 0.500370979309082,
      "learning_rate": 4.975982288173007e-06,
      "loss": 0.0188,
      "step": 1544920
    },
    {
      "epoch": 2.5283281946544647,
      "grad_norm": 0.2917410135269165,
      "learning_rate": 4.97591639595949e-06,
      "loss": 0.0152,
      "step": 1544940
    },
    {
      "epoch": 2.528360925093118,
      "grad_norm": 0.289235383272171,
      "learning_rate": 4.975850503745972e-06,
      "loss": 0.0162,
      "step": 1544960
    },
    {
      "epoch": 2.5283936555317714,
      "grad_norm": 0.37948885560035706,
      "learning_rate": 4.975784611532455e-06,
      "loss": 0.015,
      "step": 1544980
    },
    {
      "epoch": 2.528426385970425,
      "grad_norm": 0.17035876214504242,
      "learning_rate": 4.975718719318938e-06,
      "loss": 0.0076,
      "step": 1545000
    },
    {
      "epoch": 2.528459116409078,
      "grad_norm": 0.21530447900295258,
      "learning_rate": 4.9756528271054214e-06,
      "loss": 0.0195,
      "step": 1545020
    },
    {
      "epoch": 2.5284918468477313,
      "grad_norm": 0.5354916453361511,
      "learning_rate": 4.975586934891904e-06,
      "loss": 0.0221,
      "step": 1545040
    },
    {
      "epoch": 2.528524577286385,
      "grad_norm": 0.5021731853485107,
      "learning_rate": 4.975521042678387e-06,
      "loss": 0.016,
      "step": 1545060
    },
    {
      "epoch": 2.528557307725038,
      "grad_norm": 0.18400901556015015,
      "learning_rate": 4.9754551504648705e-06,
      "loss": 0.0139,
      "step": 1545080
    },
    {
      "epoch": 2.528590038163691,
      "grad_norm": 0.1319388449192047,
      "learning_rate": 4.975389258251353e-06,
      "loss": 0.0185,
      "step": 1545100
    },
    {
      "epoch": 2.528622768602345,
      "grad_norm": 0.30113473534584045,
      "learning_rate": 4.975323366037836e-06,
      "loss": 0.0153,
      "step": 1545120
    },
    {
      "epoch": 2.5286554990409984,
      "grad_norm": 0.9250019788742065,
      "learning_rate": 4.975257473824319e-06,
      "loss": 0.02,
      "step": 1545140
    },
    {
      "epoch": 2.5286882294796516,
      "grad_norm": 0.41087087988853455,
      "learning_rate": 4.9751915816108015e-06,
      "loss": 0.0144,
      "step": 1545160
    },
    {
      "epoch": 2.5287209599183047,
      "grad_norm": 0.31579098105430603,
      "learning_rate": 4.975125689397284e-06,
      "loss": 0.0134,
      "step": 1545180
    },
    {
      "epoch": 2.5287536903569583,
      "grad_norm": 0.6922377943992615,
      "learning_rate": 4.975059797183767e-06,
      "loss": 0.0156,
      "step": 1545200
    },
    {
      "epoch": 2.5287864207956114,
      "grad_norm": 0.17617768049240112,
      "learning_rate": 4.97499390497025e-06,
      "loss": 0.0219,
      "step": 1545220
    },
    {
      "epoch": 2.5288191512342646,
      "grad_norm": 0.1657266914844513,
      "learning_rate": 4.974928012756733e-06,
      "loss": 0.0169,
      "step": 1545240
    },
    {
      "epoch": 2.528851881672918,
      "grad_norm": 0.3069319725036621,
      "learning_rate": 4.974862120543216e-06,
      "loss": 0.0164,
      "step": 1545260
    },
    {
      "epoch": 2.528884612111572,
      "grad_norm": 0.38974249362945557,
      "learning_rate": 4.974796228329699e-06,
      "loss": 0.0134,
      "step": 1545280
    },
    {
      "epoch": 2.528917342550225,
      "grad_norm": 0.5838217735290527,
      "learning_rate": 4.9747303361161815e-06,
      "loss": 0.0128,
      "step": 1545300
    },
    {
      "epoch": 2.528950072988878,
      "grad_norm": 0.6348460912704468,
      "learning_rate": 4.974664443902664e-06,
      "loss": 0.0173,
      "step": 1545320
    },
    {
      "epoch": 2.5289828034275317,
      "grad_norm": 0.2660883963108063,
      "learning_rate": 4.974598551689147e-06,
      "loss": 0.0088,
      "step": 1545340
    },
    {
      "epoch": 2.529015533866185,
      "grad_norm": 0.1370641589164734,
      "learning_rate": 4.97453265947563e-06,
      "loss": 0.0114,
      "step": 1545360
    },
    {
      "epoch": 2.529048264304838,
      "grad_norm": 0.12228228896856308,
      "learning_rate": 4.974466767262113e-06,
      "loss": 0.0139,
      "step": 1545380
    },
    {
      "epoch": 2.5290809947434916,
      "grad_norm": 0.5584127902984619,
      "learning_rate": 4.974400875048596e-06,
      "loss": 0.012,
      "step": 1545400
    },
    {
      "epoch": 2.529113725182145,
      "grad_norm": 0.3956843316555023,
      "learning_rate": 4.974334982835079e-06,
      "loss": 0.0104,
      "step": 1545420
    },
    {
      "epoch": 2.5291464556207983,
      "grad_norm": 0.22306574881076813,
      "learning_rate": 4.9742690906215615e-06,
      "loss": 0.0124,
      "step": 1545440
    },
    {
      "epoch": 2.5291791860594515,
      "grad_norm": 0.5218859314918518,
      "learning_rate": 4.974203198408044e-06,
      "loss": 0.0173,
      "step": 1545460
    },
    {
      "epoch": 2.529211916498105,
      "grad_norm": 0.39732134342193604,
      "learning_rate": 4.974137306194528e-06,
      "loss": 0.0102,
      "step": 1545480
    },
    {
      "epoch": 2.529244646936758,
      "grad_norm": 0.16800084710121155,
      "learning_rate": 4.974071413981011e-06,
      "loss": 0.0148,
      "step": 1545500
    },
    {
      "epoch": 2.5292773773754114,
      "grad_norm": 0.2208252102136612,
      "learning_rate": 4.974005521767493e-06,
      "loss": 0.0091,
      "step": 1545520
    },
    {
      "epoch": 2.529310107814065,
      "grad_norm": 0.18414661288261414,
      "learning_rate": 4.973939629553976e-06,
      "loss": 0.0122,
      "step": 1545540
    },
    {
      "epoch": 2.529342838252718,
      "grad_norm": 0.8628283143043518,
      "learning_rate": 4.973873737340459e-06,
      "loss": 0.0217,
      "step": 1545560
    },
    {
      "epoch": 2.5293755686913717,
      "grad_norm": 0.407784104347229,
      "learning_rate": 4.9738078451269416e-06,
      "loss": 0.0154,
      "step": 1545580
    },
    {
      "epoch": 2.529408299130025,
      "grad_norm": 0.44379377365112305,
      "learning_rate": 4.973741952913424e-06,
      "loss": 0.0151,
      "step": 1545600
    },
    {
      "epoch": 2.5294410295686784,
      "grad_norm": 0.16761748492717743,
      "learning_rate": 4.973676060699907e-06,
      "loss": 0.0135,
      "step": 1545620
    },
    {
      "epoch": 2.5294737600073316,
      "grad_norm": 0.5584957599639893,
      "learning_rate": 4.973610168486391e-06,
      "loss": 0.0192,
      "step": 1545640
    },
    {
      "epoch": 2.5295064904459847,
      "grad_norm": 0.3182899057865143,
      "learning_rate": 4.973544276272873e-06,
      "loss": 0.0156,
      "step": 1545660
    },
    {
      "epoch": 2.5295392208846383,
      "grad_norm": 0.18816278874874115,
      "learning_rate": 4.973478384059356e-06,
      "loss": 0.0136,
      "step": 1545680
    },
    {
      "epoch": 2.5295719513232915,
      "grad_norm": 0.2704741954803467,
      "learning_rate": 4.973412491845839e-06,
      "loss": 0.0103,
      "step": 1545700
    },
    {
      "epoch": 2.529604681761945,
      "grad_norm": 0.3113917112350464,
      "learning_rate": 4.9733465996323216e-06,
      "loss": 0.0173,
      "step": 1545720
    },
    {
      "epoch": 2.5296374122005982,
      "grad_norm": 0.07468024641275406,
      "learning_rate": 4.973280707418805e-06,
      "loss": 0.0161,
      "step": 1545740
    },
    {
      "epoch": 2.529670142639252,
      "grad_norm": 0.6008318662643433,
      "learning_rate": 4.973214815205288e-06,
      "loss": 0.0178,
      "step": 1545760
    },
    {
      "epoch": 2.529702873077905,
      "grad_norm": 0.46967747807502747,
      "learning_rate": 4.973148922991771e-06,
      "loss": 0.0189,
      "step": 1545780
    },
    {
      "epoch": 2.529735603516558,
      "grad_norm": 0.4498053193092346,
      "learning_rate": 4.973083030778253e-06,
      "loss": 0.0148,
      "step": 1545800
    },
    {
      "epoch": 2.5297683339552117,
      "grad_norm": 0.18442808091640472,
      "learning_rate": 4.973017138564736e-06,
      "loss": 0.0082,
      "step": 1545820
    },
    {
      "epoch": 2.529801064393865,
      "grad_norm": 0.4475752115249634,
      "learning_rate": 4.972951246351219e-06,
      "loss": 0.011,
      "step": 1545840
    },
    {
      "epoch": 2.5298337948325185,
      "grad_norm": 1.6920415163040161,
      "learning_rate": 4.972885354137702e-06,
      "loss": 0.0179,
      "step": 1545860
    },
    {
      "epoch": 2.5298665252711716,
      "grad_norm": 0.46884170174598694,
      "learning_rate": 4.972819461924185e-06,
      "loss": 0.0118,
      "step": 1545880
    },
    {
      "epoch": 2.529899255709825,
      "grad_norm": 0.4623231589794159,
      "learning_rate": 4.972753569710668e-06,
      "loss": 0.0146,
      "step": 1545900
    },
    {
      "epoch": 2.5299319861484784,
      "grad_norm": 0.33189135789871216,
      "learning_rate": 4.972687677497151e-06,
      "loss": 0.0146,
      "step": 1545920
    },
    {
      "epoch": 2.5299647165871315,
      "grad_norm": 0.24225299060344696,
      "learning_rate": 4.972621785283633e-06,
      "loss": 0.0161,
      "step": 1545940
    },
    {
      "epoch": 2.529997447025785,
      "grad_norm": 0.9469951391220093,
      "learning_rate": 4.972555893070116e-06,
      "loss": 0.0208,
      "step": 1545960
    },
    {
      "epoch": 2.5300301774644383,
      "grad_norm": 0.16876979172229767,
      "learning_rate": 4.972490000856599e-06,
      "loss": 0.017,
      "step": 1545980
    },
    {
      "epoch": 2.530062907903092,
      "grad_norm": 0.7143537402153015,
      "learning_rate": 4.972424108643082e-06,
      "loss": 0.0165,
      "step": 1546000
    },
    {
      "epoch": 2.530095638341745,
      "grad_norm": 0.4229205250740051,
      "learning_rate": 4.972358216429564e-06,
      "loss": 0.0121,
      "step": 1546020
    },
    {
      "epoch": 2.5301283687803986,
      "grad_norm": 0.9051196575164795,
      "learning_rate": 4.972292324216048e-06,
      "loss": 0.0165,
      "step": 1546040
    },
    {
      "epoch": 2.5301610992190517,
      "grad_norm": 0.15542255342006683,
      "learning_rate": 4.972226432002531e-06,
      "loss": 0.0105,
      "step": 1546060
    },
    {
      "epoch": 2.530193829657705,
      "grad_norm": 0.2839672863483429,
      "learning_rate": 4.9721605397890134e-06,
      "loss": 0.0129,
      "step": 1546080
    },
    {
      "epoch": 2.5302265600963585,
      "grad_norm": 0.7606526017189026,
      "learning_rate": 4.972094647575497e-06,
      "loss": 0.0177,
      "step": 1546100
    },
    {
      "epoch": 2.5302592905350116,
      "grad_norm": 0.25580665469169617,
      "learning_rate": 4.97202875536198e-06,
      "loss": 0.0119,
      "step": 1546120
    },
    {
      "epoch": 2.5302920209736652,
      "grad_norm": 0.1354752480983734,
      "learning_rate": 4.9719628631484625e-06,
      "loss": 0.0181,
      "step": 1546140
    },
    {
      "epoch": 2.5303247514123184,
      "grad_norm": 0.12747953832149506,
      "learning_rate": 4.971896970934945e-06,
      "loss": 0.0157,
      "step": 1546160
    },
    {
      "epoch": 2.530357481850972,
      "grad_norm": 0.31075572967529297,
      "learning_rate": 4.971831078721428e-06,
      "loss": 0.0141,
      "step": 1546180
    },
    {
      "epoch": 2.530390212289625,
      "grad_norm": 0.2862517237663269,
      "learning_rate": 4.971765186507911e-06,
      "loss": 0.0122,
      "step": 1546200
    },
    {
      "epoch": 2.5304229427282783,
      "grad_norm": 0.9663010239601135,
      "learning_rate": 4.9716992942943935e-06,
      "loss": 0.0129,
      "step": 1546220
    },
    {
      "epoch": 2.530455673166932,
      "grad_norm": 0.16533438861370087,
      "learning_rate": 4.971633402080876e-06,
      "loss": 0.0131,
      "step": 1546240
    },
    {
      "epoch": 2.530488403605585,
      "grad_norm": 0.5612038373947144,
      "learning_rate": 4.97156750986736e-06,
      "loss": 0.0175,
      "step": 1546260
    },
    {
      "epoch": 2.5305211340442386,
      "grad_norm": 0.37055841088294983,
      "learning_rate": 4.9715016176538425e-06,
      "loss": 0.016,
      "step": 1546280
    },
    {
      "epoch": 2.5305538644828918,
      "grad_norm": 0.37138843536376953,
      "learning_rate": 4.971435725440325e-06,
      "loss": 0.018,
      "step": 1546300
    },
    {
      "epoch": 2.5305865949215454,
      "grad_norm": 0.0713275745511055,
      "learning_rate": 4.971369833226808e-06,
      "loss": 0.0126,
      "step": 1546320
    },
    {
      "epoch": 2.5306193253601985,
      "grad_norm": 0.1829695850610733,
      "learning_rate": 4.971303941013291e-06,
      "loss": 0.0105,
      "step": 1546340
    },
    {
      "epoch": 2.5306520557988517,
      "grad_norm": 0.17102639377117157,
      "learning_rate": 4.9712380487997735e-06,
      "loss": 0.0152,
      "step": 1546360
    },
    {
      "epoch": 2.5306847862375053,
      "grad_norm": 0.11619090288877487,
      "learning_rate": 4.971172156586256e-06,
      "loss": 0.0107,
      "step": 1546380
    },
    {
      "epoch": 2.5307175166761584,
      "grad_norm": 0.3844723403453827,
      "learning_rate": 4.971106264372739e-06,
      "loss": 0.0214,
      "step": 1546400
    },
    {
      "epoch": 2.5307502471148116,
      "grad_norm": 0.36152997612953186,
      "learning_rate": 4.971040372159222e-06,
      "loss": 0.0154,
      "step": 1546420
    },
    {
      "epoch": 2.530782977553465,
      "grad_norm": 0.028508365154266357,
      "learning_rate": 4.970974479945705e-06,
      "loss": 0.0082,
      "step": 1546440
    },
    {
      "epoch": 2.5308157079921187,
      "grad_norm": 0.20279443264007568,
      "learning_rate": 4.970908587732188e-06,
      "loss": 0.0137,
      "step": 1546460
    },
    {
      "epoch": 2.530848438430772,
      "grad_norm": 0.1919761598110199,
      "learning_rate": 4.970842695518671e-06,
      "loss": 0.0148,
      "step": 1546480
    },
    {
      "epoch": 2.530881168869425,
      "grad_norm": 0.6021191477775574,
      "learning_rate": 4.970776803305154e-06,
      "loss": 0.0167,
      "step": 1546500
    },
    {
      "epoch": 2.5309138993080786,
      "grad_norm": 0.4267544746398926,
      "learning_rate": 4.970710911091637e-06,
      "loss": 0.0154,
      "step": 1546520
    },
    {
      "epoch": 2.530946629746732,
      "grad_norm": 0.20928774774074554,
      "learning_rate": 4.97064501887812e-06,
      "loss": 0.0088,
      "step": 1546540
    },
    {
      "epoch": 2.530979360185385,
      "grad_norm": 1.209625482559204,
      "learning_rate": 4.9705791266646026e-06,
      "loss": 0.0244,
      "step": 1546560
    },
    {
      "epoch": 2.5310120906240385,
      "grad_norm": 0.6535124182701111,
      "learning_rate": 4.970513234451085e-06,
      "loss": 0.013,
      "step": 1546580
    },
    {
      "epoch": 2.531044821062692,
      "grad_norm": 0.35939034819602966,
      "learning_rate": 4.970447342237568e-06,
      "loss": 0.0221,
      "step": 1546600
    },
    {
      "epoch": 2.5310775515013453,
      "grad_norm": 0.6130063533782959,
      "learning_rate": 4.970381450024051e-06,
      "loss": 0.0148,
      "step": 1546620
    },
    {
      "epoch": 2.5311102819399984,
      "grad_norm": 0.3015337288379669,
      "learning_rate": 4.9703155578105335e-06,
      "loss": 0.0107,
      "step": 1546640
    },
    {
      "epoch": 2.531143012378652,
      "grad_norm": 0.4499197006225586,
      "learning_rate": 4.970249665597017e-06,
      "loss": 0.014,
      "step": 1546660
    },
    {
      "epoch": 2.531175742817305,
      "grad_norm": 0.2569113075733185,
      "learning_rate": 4.9701837733835e-06,
      "loss": 0.0168,
      "step": 1546680
    },
    {
      "epoch": 2.5312084732559583,
      "grad_norm": 0.24276632070541382,
      "learning_rate": 4.970117881169983e-06,
      "loss": 0.0141,
      "step": 1546700
    },
    {
      "epoch": 2.531241203694612,
      "grad_norm": 1.011419653892517,
      "learning_rate": 4.970051988956465e-06,
      "loss": 0.0211,
      "step": 1546720
    },
    {
      "epoch": 2.5312739341332655,
      "grad_norm": 0.5959199070930481,
      "learning_rate": 4.969986096742948e-06,
      "loss": 0.0144,
      "step": 1546740
    },
    {
      "epoch": 2.5313066645719187,
      "grad_norm": 0.688103973865509,
      "learning_rate": 4.969920204529431e-06,
      "loss": 0.0159,
      "step": 1546760
    },
    {
      "epoch": 2.531339395010572,
      "grad_norm": 0.39403632283210754,
      "learning_rate": 4.969854312315914e-06,
      "loss": 0.0109,
      "step": 1546780
    },
    {
      "epoch": 2.5313721254492254,
      "grad_norm": 0.1913723647594452,
      "learning_rate": 4.969788420102397e-06,
      "loss": 0.0172,
      "step": 1546800
    },
    {
      "epoch": 2.5314048558878786,
      "grad_norm": 0.7130398154258728,
      "learning_rate": 4.96972252788888e-06,
      "loss": 0.0237,
      "step": 1546820
    },
    {
      "epoch": 2.5314375863265317,
      "grad_norm": 0.29663780331611633,
      "learning_rate": 4.969656635675363e-06,
      "loss": 0.0175,
      "step": 1546840
    },
    {
      "epoch": 2.5314703167651853,
      "grad_norm": 0.5793166756629944,
      "learning_rate": 4.969590743461845e-06,
      "loss": 0.0105,
      "step": 1546860
    },
    {
      "epoch": 2.531503047203839,
      "grad_norm": 0.4176308810710907,
      "learning_rate": 4.969524851248328e-06,
      "loss": 0.0152,
      "step": 1546880
    },
    {
      "epoch": 2.531535777642492,
      "grad_norm": 0.3396683931350708,
      "learning_rate": 4.969458959034812e-06,
      "loss": 0.0116,
      "step": 1546900
    },
    {
      "epoch": 2.531568508081145,
      "grad_norm": 0.9597322940826416,
      "learning_rate": 4.9693930668212944e-06,
      "loss": 0.0209,
      "step": 1546920
    },
    {
      "epoch": 2.531601238519799,
      "grad_norm": 0.31354832649230957,
      "learning_rate": 4.969327174607777e-06,
      "loss": 0.0146,
      "step": 1546940
    },
    {
      "epoch": 2.531633968958452,
      "grad_norm": 0.42115432024002075,
      "learning_rate": 4.96926128239426e-06,
      "loss": 0.013,
      "step": 1546960
    },
    {
      "epoch": 2.531666699397105,
      "grad_norm": 0.8936882615089417,
      "learning_rate": 4.969195390180743e-06,
      "loss": 0.0123,
      "step": 1546980
    },
    {
      "epoch": 2.5316994298357587,
      "grad_norm": 0.1898583322763443,
      "learning_rate": 4.969129497967225e-06,
      "loss": 0.0133,
      "step": 1547000
    },
    {
      "epoch": 2.531732160274412,
      "grad_norm": 0.16946536302566528,
      "learning_rate": 4.969063605753708e-06,
      "loss": 0.0146,
      "step": 1547020
    },
    {
      "epoch": 2.5317648907130654,
      "grad_norm": 0.6762943267822266,
      "learning_rate": 4.968997713540191e-06,
      "loss": 0.013,
      "step": 1547040
    },
    {
      "epoch": 2.5317976211517186,
      "grad_norm": 0.29166343808174133,
      "learning_rate": 4.9689318213266745e-06,
      "loss": 0.0119,
      "step": 1547060
    },
    {
      "epoch": 2.531830351590372,
      "grad_norm": 1.0981099605560303,
      "learning_rate": 4.968865929113157e-06,
      "loss": 0.0107,
      "step": 1547080
    },
    {
      "epoch": 2.5318630820290253,
      "grad_norm": 0.10022764652967453,
      "learning_rate": 4.96880003689964e-06,
      "loss": 0.0162,
      "step": 1547100
    },
    {
      "epoch": 2.5318958124676785,
      "grad_norm": 0.2891938090324402,
      "learning_rate": 4.968734144686123e-06,
      "loss": 0.015,
      "step": 1547120
    },
    {
      "epoch": 2.531928542906332,
      "grad_norm": 0.13099700212478638,
      "learning_rate": 4.968668252472606e-06,
      "loss": 0.0105,
      "step": 1547140
    },
    {
      "epoch": 2.531961273344985,
      "grad_norm": 0.21896280348300934,
      "learning_rate": 4.968602360259089e-06,
      "loss": 0.019,
      "step": 1547160
    },
    {
      "epoch": 2.531994003783639,
      "grad_norm": 0.7739510536193848,
      "learning_rate": 4.968536468045572e-06,
      "loss": 0.0192,
      "step": 1547180
    },
    {
      "epoch": 2.532026734222292,
      "grad_norm": 0.15925922989845276,
      "learning_rate": 4.9684705758320545e-06,
      "loss": 0.0165,
      "step": 1547200
    },
    {
      "epoch": 2.5320594646609456,
      "grad_norm": 0.5187504291534424,
      "learning_rate": 4.968404683618537e-06,
      "loss": 0.011,
      "step": 1547220
    },
    {
      "epoch": 2.5320921950995987,
      "grad_norm": 0.19021044671535492,
      "learning_rate": 4.96833879140502e-06,
      "loss": 0.0147,
      "step": 1547240
    },
    {
      "epoch": 2.532124925538252,
      "grad_norm": 0.3022974729537964,
      "learning_rate": 4.968272899191503e-06,
      "loss": 0.0107,
      "step": 1547260
    },
    {
      "epoch": 2.5321576559769055,
      "grad_norm": 0.6724414229393005,
      "learning_rate": 4.9682070069779854e-06,
      "loss": 0.0129,
      "step": 1547280
    },
    {
      "epoch": 2.5321903864155586,
      "grad_norm": 0.19644226133823395,
      "learning_rate": 4.968141114764469e-06,
      "loss": 0.0206,
      "step": 1547300
    },
    {
      "epoch": 2.532223116854212,
      "grad_norm": 0.30055445432662964,
      "learning_rate": 4.968075222550952e-06,
      "loss": 0.0175,
      "step": 1547320
    },
    {
      "epoch": 2.5322558472928653,
      "grad_norm": 0.3414860963821411,
      "learning_rate": 4.9680093303374345e-06,
      "loss": 0.0122,
      "step": 1547340
    },
    {
      "epoch": 2.532288577731519,
      "grad_norm": 0.2186884880065918,
      "learning_rate": 4.967943438123917e-06,
      "loss": 0.016,
      "step": 1547360
    },
    {
      "epoch": 2.532321308170172,
      "grad_norm": 0.36288055777549744,
      "learning_rate": 4.9678775459104e-06,
      "loss": 0.023,
      "step": 1547380
    },
    {
      "epoch": 2.5323540386088252,
      "grad_norm": 0.37051138281822205,
      "learning_rate": 4.967811653696883e-06,
      "loss": 0.0119,
      "step": 1547400
    },
    {
      "epoch": 2.532386769047479,
      "grad_norm": 0.1757017970085144,
      "learning_rate": 4.9677457614833655e-06,
      "loss": 0.0094,
      "step": 1547420
    },
    {
      "epoch": 2.532419499486132,
      "grad_norm": 0.32507359981536865,
      "learning_rate": 4.967679869269848e-06,
      "loss": 0.0081,
      "step": 1547440
    },
    {
      "epoch": 2.5324522299247856,
      "grad_norm": 0.17979511618614197,
      "learning_rate": 4.967613977056332e-06,
      "loss": 0.0157,
      "step": 1547460
    },
    {
      "epoch": 2.5324849603634387,
      "grad_norm": 0.17509695887565613,
      "learning_rate": 4.9675480848428145e-06,
      "loss": 0.015,
      "step": 1547480
    },
    {
      "epoch": 2.5325176908020923,
      "grad_norm": 0.03720233589410782,
      "learning_rate": 4.967482192629297e-06,
      "loss": 0.0151,
      "step": 1547500
    },
    {
      "epoch": 2.5325504212407455,
      "grad_norm": 0.2417309433221817,
      "learning_rate": 4.967416300415781e-06,
      "loss": 0.0157,
      "step": 1547520
    },
    {
      "epoch": 2.5325831516793986,
      "grad_norm": 0.20123104751110077,
      "learning_rate": 4.967350408202264e-06,
      "loss": 0.0091,
      "step": 1547540
    },
    {
      "epoch": 2.532615882118052,
      "grad_norm": 0.2899095118045807,
      "learning_rate": 4.967284515988746e-06,
      "loss": 0.0109,
      "step": 1547560
    },
    {
      "epoch": 2.5326486125567054,
      "grad_norm": 0.334663987159729,
      "learning_rate": 4.967218623775229e-06,
      "loss": 0.0091,
      "step": 1547580
    },
    {
      "epoch": 2.532681342995359,
      "grad_norm": 0.3955266773700714,
      "learning_rate": 4.967152731561712e-06,
      "loss": 0.0118,
      "step": 1547600
    },
    {
      "epoch": 2.532714073434012,
      "grad_norm": 0.36650508642196655,
      "learning_rate": 4.9670868393481946e-06,
      "loss": 0.0145,
      "step": 1547620
    },
    {
      "epoch": 2.5327468038726657,
      "grad_norm": 0.1325630098581314,
      "learning_rate": 4.967020947134677e-06,
      "loss": 0.0192,
      "step": 1547640
    },
    {
      "epoch": 2.532779534311319,
      "grad_norm": 0.12628059089183807,
      "learning_rate": 4.96695505492116e-06,
      "loss": 0.0115,
      "step": 1547660
    },
    {
      "epoch": 2.532812264749972,
      "grad_norm": 0.249764084815979,
      "learning_rate": 4.966889162707644e-06,
      "loss": 0.0166,
      "step": 1547680
    },
    {
      "epoch": 2.5328449951886256,
      "grad_norm": 0.30285346508026123,
      "learning_rate": 4.966823270494126e-06,
      "loss": 0.0169,
      "step": 1547700
    },
    {
      "epoch": 2.5328777256272788,
      "grad_norm": 0.5159839987754822,
      "learning_rate": 4.966757378280609e-06,
      "loss": 0.0181,
      "step": 1547720
    },
    {
      "epoch": 2.5329104560659323,
      "grad_norm": 0.9554610252380371,
      "learning_rate": 4.966691486067092e-06,
      "loss": 0.0113,
      "step": 1547740
    },
    {
      "epoch": 2.5329431865045855,
      "grad_norm": 0.34755054116249084,
      "learning_rate": 4.966625593853575e-06,
      "loss": 0.0121,
      "step": 1547760
    },
    {
      "epoch": 2.532975916943239,
      "grad_norm": 0.256564736366272,
      "learning_rate": 4.966559701640057e-06,
      "loss": 0.0101,
      "step": 1547780
    },
    {
      "epoch": 2.5330086473818922,
      "grad_norm": 0.261383593082428,
      "learning_rate": 4.96649380942654e-06,
      "loss": 0.0217,
      "step": 1547800
    },
    {
      "epoch": 2.5330413778205454,
      "grad_norm": 0.2929280698299408,
      "learning_rate": 4.966427917213023e-06,
      "loss": 0.0146,
      "step": 1547820
    },
    {
      "epoch": 2.533074108259199,
      "grad_norm": 0.37476277351379395,
      "learning_rate": 4.966362024999506e-06,
      "loss": 0.0114,
      "step": 1547840
    },
    {
      "epoch": 2.533106838697852,
      "grad_norm": 0.5637838840484619,
      "learning_rate": 4.966296132785989e-06,
      "loss": 0.0125,
      "step": 1547860
    },
    {
      "epoch": 2.5331395691365057,
      "grad_norm": 0.03924749046564102,
      "learning_rate": 4.966230240572472e-06,
      "loss": 0.0173,
      "step": 1547880
    },
    {
      "epoch": 2.533172299575159,
      "grad_norm": 0.09549818933010101,
      "learning_rate": 4.966164348358955e-06,
      "loss": 0.0133,
      "step": 1547900
    },
    {
      "epoch": 2.5332050300138125,
      "grad_norm": 0.558590829372406,
      "learning_rate": 4.966098456145438e-06,
      "loss": 0.016,
      "step": 1547920
    },
    {
      "epoch": 2.5332377604524656,
      "grad_norm": 0.4953647553920746,
      "learning_rate": 4.966032563931921e-06,
      "loss": 0.0131,
      "step": 1547940
    },
    {
      "epoch": 2.5332704908911188,
      "grad_norm": 0.08340287208557129,
      "learning_rate": 4.965966671718404e-06,
      "loss": 0.0186,
      "step": 1547960
    },
    {
      "epoch": 2.5333032213297724,
      "grad_norm": 0.26311469078063965,
      "learning_rate": 4.965900779504886e-06,
      "loss": 0.0091,
      "step": 1547980
    },
    {
      "epoch": 2.5333359517684255,
      "grad_norm": 0.3563030958175659,
      "learning_rate": 4.965834887291369e-06,
      "loss": 0.0187,
      "step": 1548000
    },
    {
      "epoch": 2.5333686822070787,
      "grad_norm": 0.5202031135559082,
      "learning_rate": 4.965768995077852e-06,
      "loss": 0.016,
      "step": 1548020
    },
    {
      "epoch": 2.5334014126457323,
      "grad_norm": 1.5559591054916382,
      "learning_rate": 4.965703102864335e-06,
      "loss": 0.0102,
      "step": 1548040
    },
    {
      "epoch": 2.533434143084386,
      "grad_norm": 0.7547535300254822,
      "learning_rate": 4.965637210650817e-06,
      "loss": 0.0139,
      "step": 1548060
    },
    {
      "epoch": 2.533466873523039,
      "grad_norm": 0.1903982162475586,
      "learning_rate": 4.965571318437301e-06,
      "loss": 0.0125,
      "step": 1548080
    },
    {
      "epoch": 2.533499603961692,
      "grad_norm": 0.2873651087284088,
      "learning_rate": 4.965505426223784e-06,
      "loss": 0.0124,
      "step": 1548100
    },
    {
      "epoch": 2.5335323344003458,
      "grad_norm": 0.4483262896537781,
      "learning_rate": 4.9654395340102664e-06,
      "loss": 0.013,
      "step": 1548120
    },
    {
      "epoch": 2.533565064838999,
      "grad_norm": 0.14153993129730225,
      "learning_rate": 4.965373641796749e-06,
      "loss": 0.0131,
      "step": 1548140
    },
    {
      "epoch": 2.533597795277652,
      "grad_norm": 0.18881884217262268,
      "learning_rate": 4.965307749583232e-06,
      "loss": 0.0117,
      "step": 1548160
    },
    {
      "epoch": 2.5336305257163056,
      "grad_norm": 0.19404236972332,
      "learning_rate": 4.965241857369715e-06,
      "loss": 0.0149,
      "step": 1548180
    },
    {
      "epoch": 2.5336632561549592,
      "grad_norm": 0.2215515375137329,
      "learning_rate": 4.965175965156198e-06,
      "loss": 0.014,
      "step": 1548200
    },
    {
      "epoch": 2.5336959865936124,
      "grad_norm": 0.2074197381734848,
      "learning_rate": 4.965110072942681e-06,
      "loss": 0.0158,
      "step": 1548220
    },
    {
      "epoch": 2.5337287170322655,
      "grad_norm": 0.22418971359729767,
      "learning_rate": 4.965044180729164e-06,
      "loss": 0.0127,
      "step": 1548240
    },
    {
      "epoch": 2.533761447470919,
      "grad_norm": 0.15264026820659637,
      "learning_rate": 4.9649782885156465e-06,
      "loss": 0.0164,
      "step": 1548260
    },
    {
      "epoch": 2.5337941779095723,
      "grad_norm": 0.11316896229982376,
      "learning_rate": 4.964912396302129e-06,
      "loss": 0.0129,
      "step": 1548280
    },
    {
      "epoch": 2.5338269083482254,
      "grad_norm": 0.21961097419261932,
      "learning_rate": 4.964846504088612e-06,
      "loss": 0.0152,
      "step": 1548300
    },
    {
      "epoch": 2.533859638786879,
      "grad_norm": 0.37726709246635437,
      "learning_rate": 4.9647806118750955e-06,
      "loss": 0.014,
      "step": 1548320
    },
    {
      "epoch": 2.5338923692255326,
      "grad_norm": 0.2861425280570984,
      "learning_rate": 4.964714719661578e-06,
      "loss": 0.0102,
      "step": 1548340
    },
    {
      "epoch": 2.5339250996641858,
      "grad_norm": 0.09877851605415344,
      "learning_rate": 4.964648827448061e-06,
      "loss": 0.0088,
      "step": 1548360
    },
    {
      "epoch": 2.533957830102839,
      "grad_norm": 2.6941707134246826,
      "learning_rate": 4.964582935234544e-06,
      "loss": 0.0228,
      "step": 1548380
    },
    {
      "epoch": 2.5339905605414925,
      "grad_norm": 0.1976674497127533,
      "learning_rate": 4.9645170430210265e-06,
      "loss": 0.0217,
      "step": 1548400
    },
    {
      "epoch": 2.5340232909801457,
      "grad_norm": 0.06542445719242096,
      "learning_rate": 4.964451150807509e-06,
      "loss": 0.0131,
      "step": 1548420
    },
    {
      "epoch": 2.534056021418799,
      "grad_norm": 0.2696584463119507,
      "learning_rate": 4.964385258593992e-06,
      "loss": 0.0112,
      "step": 1548440
    },
    {
      "epoch": 2.5340887518574524,
      "grad_norm": 0.6006886959075928,
      "learning_rate": 4.964319366380475e-06,
      "loss": 0.0243,
      "step": 1548460
    },
    {
      "epoch": 2.534121482296106,
      "grad_norm": 0.3899894952774048,
      "learning_rate": 4.964253474166958e-06,
      "loss": 0.0176,
      "step": 1548480
    },
    {
      "epoch": 2.534154212734759,
      "grad_norm": 0.2673131227493286,
      "learning_rate": 4.964187581953441e-06,
      "loss": 0.0179,
      "step": 1548500
    },
    {
      "epoch": 2.5341869431734123,
      "grad_norm": 1.1990997791290283,
      "learning_rate": 4.964121689739924e-06,
      "loss": 0.0147,
      "step": 1548520
    },
    {
      "epoch": 2.534219673612066,
      "grad_norm": 0.18821494281291962,
      "learning_rate": 4.964055797526407e-06,
      "loss": 0.0119,
      "step": 1548540
    },
    {
      "epoch": 2.534252404050719,
      "grad_norm": 0.29156795144081116,
      "learning_rate": 4.96398990531289e-06,
      "loss": 0.0101,
      "step": 1548560
    },
    {
      "epoch": 2.534285134489372,
      "grad_norm": 0.4654633402824402,
      "learning_rate": 4.963924013099373e-06,
      "loss": 0.0118,
      "step": 1548580
    },
    {
      "epoch": 2.534317864928026,
      "grad_norm": 0.16788317263126373,
      "learning_rate": 4.963858120885856e-06,
      "loss": 0.0206,
      "step": 1548600
    },
    {
      "epoch": 2.534350595366679,
      "grad_norm": 0.4175853431224823,
      "learning_rate": 4.963792228672338e-06,
      "loss": 0.0149,
      "step": 1548620
    },
    {
      "epoch": 2.5343833258053325,
      "grad_norm": 0.5286477208137512,
      "learning_rate": 4.963726336458821e-06,
      "loss": 0.0128,
      "step": 1548640
    },
    {
      "epoch": 2.5344160562439857,
      "grad_norm": 0.5670107007026672,
      "learning_rate": 4.963660444245304e-06,
      "loss": 0.0143,
      "step": 1548660
    },
    {
      "epoch": 2.5344487866826393,
      "grad_norm": 0.2971951365470886,
      "learning_rate": 4.9635945520317865e-06,
      "loss": 0.0185,
      "step": 1548680
    },
    {
      "epoch": 2.5344815171212924,
      "grad_norm": 0.3798118829727173,
      "learning_rate": 4.963528659818269e-06,
      "loss": 0.0107,
      "step": 1548700
    },
    {
      "epoch": 2.5345142475599456,
      "grad_norm": 0.5436046719551086,
      "learning_rate": 4.963462767604753e-06,
      "loss": 0.017,
      "step": 1548720
    },
    {
      "epoch": 2.534546977998599,
      "grad_norm": 0.515473484992981,
      "learning_rate": 4.963396875391236e-06,
      "loss": 0.0115,
      "step": 1548740
    },
    {
      "epoch": 2.5345797084372523,
      "grad_norm": 0.38450106978416443,
      "learning_rate": 4.963330983177718e-06,
      "loss": 0.019,
      "step": 1548760
    },
    {
      "epoch": 2.534612438875906,
      "grad_norm": 0.7812320590019226,
      "learning_rate": 4.963265090964201e-06,
      "loss": 0.0112,
      "step": 1548780
    },
    {
      "epoch": 2.534645169314559,
      "grad_norm": 0.5123112797737122,
      "learning_rate": 4.963199198750684e-06,
      "loss": 0.0172,
      "step": 1548800
    },
    {
      "epoch": 2.5346778997532127,
      "grad_norm": 1.3135977983474731,
      "learning_rate": 4.9631333065371666e-06,
      "loss": 0.0166,
      "step": 1548820
    },
    {
      "epoch": 2.534710630191866,
      "grad_norm": 0.3477798402309418,
      "learning_rate": 4.963067414323649e-06,
      "loss": 0.0124,
      "step": 1548840
    },
    {
      "epoch": 2.534743360630519,
      "grad_norm": 0.4017964005470276,
      "learning_rate": 4.963001522110132e-06,
      "loss": 0.016,
      "step": 1548860
    },
    {
      "epoch": 2.5347760910691726,
      "grad_norm": 0.5258440375328064,
      "learning_rate": 4.962935629896616e-06,
      "loss": 0.0094,
      "step": 1548880
    },
    {
      "epoch": 2.5348088215078257,
      "grad_norm": 0.6627250909805298,
      "learning_rate": 4.962869737683098e-06,
      "loss": 0.0162,
      "step": 1548900
    },
    {
      "epoch": 2.5348415519464793,
      "grad_norm": 0.5800628066062927,
      "learning_rate": 4.962803845469581e-06,
      "loss": 0.0129,
      "step": 1548920
    },
    {
      "epoch": 2.5348742823851325,
      "grad_norm": 0.3127797842025757,
      "learning_rate": 4.962737953256065e-06,
      "loss": 0.0087,
      "step": 1548940
    },
    {
      "epoch": 2.534907012823786,
      "grad_norm": 0.44056934118270874,
      "learning_rate": 4.9626720610425474e-06,
      "loss": 0.0188,
      "step": 1548960
    },
    {
      "epoch": 2.534939743262439,
      "grad_norm": 0.48495274782180786,
      "learning_rate": 4.96260616882903e-06,
      "loss": 0.0112,
      "step": 1548980
    },
    {
      "epoch": 2.5349724737010924,
      "grad_norm": 0.21132242679595947,
      "learning_rate": 4.962540276615513e-06,
      "loss": 0.0156,
      "step": 1549000
    },
    {
      "epoch": 2.535005204139746,
      "grad_norm": 0.13032573461532593,
      "learning_rate": 4.962474384401996e-06,
      "loss": 0.0167,
      "step": 1549020
    },
    {
      "epoch": 2.535037934578399,
      "grad_norm": 0.10554640740156174,
      "learning_rate": 4.962408492188478e-06,
      "loss": 0.0112,
      "step": 1549040
    },
    {
      "epoch": 2.5350706650170527,
      "grad_norm": 0.7346598505973816,
      "learning_rate": 4.962342599974961e-06,
      "loss": 0.0125,
      "step": 1549060
    },
    {
      "epoch": 2.535103395455706,
      "grad_norm": 0.4912874102592468,
      "learning_rate": 4.962276707761444e-06,
      "loss": 0.0137,
      "step": 1549080
    },
    {
      "epoch": 2.5351361258943594,
      "grad_norm": 0.26954877376556396,
      "learning_rate": 4.9622108155479275e-06,
      "loss": 0.0161,
      "step": 1549100
    },
    {
      "epoch": 2.5351688563330126,
      "grad_norm": 0.4724830985069275,
      "learning_rate": 4.96214492333441e-06,
      "loss": 0.0154,
      "step": 1549120
    },
    {
      "epoch": 2.5352015867716657,
      "grad_norm": 0.4627492427825928,
      "learning_rate": 4.962079031120893e-06,
      "loss": 0.0104,
      "step": 1549140
    },
    {
      "epoch": 2.5352343172103193,
      "grad_norm": 0.1562882661819458,
      "learning_rate": 4.962013138907376e-06,
      "loss": 0.014,
      "step": 1549160
    },
    {
      "epoch": 2.5352670476489725,
      "grad_norm": 0.15837471187114716,
      "learning_rate": 4.961947246693858e-06,
      "loss": 0.0117,
      "step": 1549180
    },
    {
      "epoch": 2.535299778087626,
      "grad_norm": 0.49761658906936646,
      "learning_rate": 4.961881354480341e-06,
      "loss": 0.0114,
      "step": 1549200
    },
    {
      "epoch": 2.5353325085262792,
      "grad_norm": 0.1728333979845047,
      "learning_rate": 4.961815462266824e-06,
      "loss": 0.0158,
      "step": 1549220
    },
    {
      "epoch": 2.535365238964933,
      "grad_norm": 0.6566173434257507,
      "learning_rate": 4.961749570053307e-06,
      "loss": 0.0161,
      "step": 1549240
    },
    {
      "epoch": 2.535397969403586,
      "grad_norm": 0.9307505488395691,
      "learning_rate": 4.96168367783979e-06,
      "loss": 0.034,
      "step": 1549260
    },
    {
      "epoch": 2.535430699842239,
      "grad_norm": 0.3275405764579773,
      "learning_rate": 4.961617785626273e-06,
      "loss": 0.019,
      "step": 1549280
    },
    {
      "epoch": 2.5354634302808927,
      "grad_norm": 0.11234293133020401,
      "learning_rate": 4.961551893412756e-06,
      "loss": 0.0158,
      "step": 1549300
    },
    {
      "epoch": 2.535496160719546,
      "grad_norm": 0.20134137570858002,
      "learning_rate": 4.9614860011992384e-06,
      "loss": 0.0117,
      "step": 1549320
    },
    {
      "epoch": 2.5355288911581995,
      "grad_norm": 0.12317325919866562,
      "learning_rate": 4.961420108985722e-06,
      "loss": 0.0114,
      "step": 1549340
    },
    {
      "epoch": 2.5355616215968526,
      "grad_norm": 0.31998616456985474,
      "learning_rate": 4.961354216772205e-06,
      "loss": 0.01,
      "step": 1549360
    },
    {
      "epoch": 2.535594352035506,
      "grad_norm": 0.6701710224151611,
      "learning_rate": 4.9612883245586875e-06,
      "loss": 0.0141,
      "step": 1549380
    },
    {
      "epoch": 2.5356270824741594,
      "grad_norm": 0.37547972798347473,
      "learning_rate": 4.96122243234517e-06,
      "loss": 0.0145,
      "step": 1549400
    },
    {
      "epoch": 2.5356598129128125,
      "grad_norm": 0.22831284999847412,
      "learning_rate": 4.961156540131653e-06,
      "loss": 0.0179,
      "step": 1549420
    },
    {
      "epoch": 2.535692543351466,
      "grad_norm": 0.6202446818351746,
      "learning_rate": 4.961090647918136e-06,
      "loss": 0.0157,
      "step": 1549440
    },
    {
      "epoch": 2.5357252737901192,
      "grad_norm": 0.5559494495391846,
      "learning_rate": 4.9610247557046185e-06,
      "loss": 0.0097,
      "step": 1549460
    },
    {
      "epoch": 2.5357580042287724,
      "grad_norm": 0.6691461801528931,
      "learning_rate": 4.960958863491101e-06,
      "loss": 0.0176,
      "step": 1549480
    },
    {
      "epoch": 2.535790734667426,
      "grad_norm": 0.9157832264900208,
      "learning_rate": 4.960892971277585e-06,
      "loss": 0.0129,
      "step": 1549500
    },
    {
      "epoch": 2.5358234651060796,
      "grad_norm": 0.45883554220199585,
      "learning_rate": 4.9608270790640675e-06,
      "loss": 0.0143,
      "step": 1549520
    },
    {
      "epoch": 2.5358561955447327,
      "grad_norm": 0.3026896119117737,
      "learning_rate": 4.96076118685055e-06,
      "loss": 0.0174,
      "step": 1549540
    },
    {
      "epoch": 2.535888925983386,
      "grad_norm": 0.12193305790424347,
      "learning_rate": 4.960695294637033e-06,
      "loss": 0.0131,
      "step": 1549560
    },
    {
      "epoch": 2.5359216564220395,
      "grad_norm": 0.40223678946495056,
      "learning_rate": 4.960629402423516e-06,
      "loss": 0.0144,
      "step": 1549580
    },
    {
      "epoch": 2.5359543868606926,
      "grad_norm": 0.2771971821784973,
      "learning_rate": 4.960563510209999e-06,
      "loss": 0.0191,
      "step": 1549600
    },
    {
      "epoch": 2.535987117299346,
      "grad_norm": 0.44055619835853577,
      "learning_rate": 4.960497617996482e-06,
      "loss": 0.018,
      "step": 1549620
    },
    {
      "epoch": 2.5360198477379994,
      "grad_norm": 0.09191510826349258,
      "learning_rate": 4.960431725782965e-06,
      "loss": 0.0182,
      "step": 1549640
    },
    {
      "epoch": 2.536052578176653,
      "grad_norm": 0.5021892786026001,
      "learning_rate": 4.9603658335694476e-06,
      "loss": 0.0142,
      "step": 1549660
    },
    {
      "epoch": 2.536085308615306,
      "grad_norm": 0.11883223801851273,
      "learning_rate": 4.96029994135593e-06,
      "loss": 0.0135,
      "step": 1549680
    },
    {
      "epoch": 2.5361180390539593,
      "grad_norm": 0.1923103630542755,
      "learning_rate": 4.960234049142413e-06,
      "loss": 0.0182,
      "step": 1549700
    },
    {
      "epoch": 2.536150769492613,
      "grad_norm": 0.17029735445976257,
      "learning_rate": 4.960168156928896e-06,
      "loss": 0.0146,
      "step": 1549720
    },
    {
      "epoch": 2.536183499931266,
      "grad_norm": 0.23418478667736053,
      "learning_rate": 4.960102264715379e-06,
      "loss": 0.0127,
      "step": 1549740
    },
    {
      "epoch": 2.536216230369919,
      "grad_norm": 0.1477249562740326,
      "learning_rate": 4.960036372501862e-06,
      "loss": 0.0205,
      "step": 1549760
    },
    {
      "epoch": 2.5362489608085728,
      "grad_norm": 1.0222378969192505,
      "learning_rate": 4.959970480288345e-06,
      "loss": 0.0186,
      "step": 1549780
    },
    {
      "epoch": 2.5362816912472264,
      "grad_norm": 1.1563339233398438,
      "learning_rate": 4.959904588074828e-06,
      "loss": 0.013,
      "step": 1549800
    },
    {
      "epoch": 2.5363144216858795,
      "grad_norm": 0.3372921943664551,
      "learning_rate": 4.95983869586131e-06,
      "loss": 0.0129,
      "step": 1549820
    },
    {
      "epoch": 2.5363471521245327,
      "grad_norm": 0.21851927042007446,
      "learning_rate": 4.959772803647793e-06,
      "loss": 0.0227,
      "step": 1549840
    },
    {
      "epoch": 2.5363798825631862,
      "grad_norm": 0.3282376825809479,
      "learning_rate": 4.959706911434276e-06,
      "loss": 0.0153,
      "step": 1549860
    },
    {
      "epoch": 2.5364126130018394,
      "grad_norm": 0.20109917223453522,
      "learning_rate": 4.9596410192207585e-06,
      "loss": 0.0113,
      "step": 1549880
    },
    {
      "epoch": 2.5364453434404926,
      "grad_norm": 0.3923054337501526,
      "learning_rate": 4.959575127007242e-06,
      "loss": 0.0143,
      "step": 1549900
    },
    {
      "epoch": 2.536478073879146,
      "grad_norm": 0.4067069888114929,
      "learning_rate": 4.959509234793725e-06,
      "loss": 0.0112,
      "step": 1549920
    },
    {
      "epoch": 2.5365108043177997,
      "grad_norm": 0.5716626644134521,
      "learning_rate": 4.959443342580208e-06,
      "loss": 0.0149,
      "step": 1549940
    },
    {
      "epoch": 2.536543534756453,
      "grad_norm": 0.33549320697784424,
      "learning_rate": 4.959377450366691e-06,
      "loss": 0.0134,
      "step": 1549960
    },
    {
      "epoch": 2.536576265195106,
      "grad_norm": 0.2923556864261627,
      "learning_rate": 4.959311558153174e-06,
      "loss": 0.016,
      "step": 1549980
    },
    {
      "epoch": 2.5366089956337596,
      "grad_norm": 0.15787914395332336,
      "learning_rate": 4.959245665939657e-06,
      "loss": 0.0196,
      "step": 1550000
    },
    {
      "epoch": 2.5366089956337596,
      "eval_loss": 0.008073369041085243,
      "eval_runtime": 6509.7691,
      "eval_samples_per_second": 157.895,
      "eval_steps_per_second": 15.79,
      "eval_sts-dev_pearson_cosine": 0.9814525786702755,
      "eval_sts-dev_spearman_cosine": 0.8932551047923136,
      "step": 1550000
    },
    {
      "epoch": 2.536641726072413,
      "grad_norm": 0.08115661144256592,
      "learning_rate": 4.959179773726139e-06,
      "loss": 0.0122,
      "step": 1550020
    },
    {
      "epoch": 2.536674456511066,
      "grad_norm": 0.30006319284439087,
      "learning_rate": 4.959113881512622e-06,
      "loss": 0.0211,
      "step": 1550040
    },
    {
      "epoch": 2.5367071869497195,
      "grad_norm": 0.6453320384025574,
      "learning_rate": 4.959047989299105e-06,
      "loss": 0.0118,
      "step": 1550060
    },
    {
      "epoch": 2.5367399173883727,
      "grad_norm": 0.6555392742156982,
      "learning_rate": 4.958982097085588e-06,
      "loss": 0.0149,
      "step": 1550080
    },
    {
      "epoch": 2.5367726478270263,
      "grad_norm": 0.29594677686691284,
      "learning_rate": 4.95891620487207e-06,
      "loss": 0.0101,
      "step": 1550100
    },
    {
      "epoch": 2.5368053782656794,
      "grad_norm": 0.3175802230834961,
      "learning_rate": 4.958850312658554e-06,
      "loss": 0.013,
      "step": 1550120
    },
    {
      "epoch": 2.536838108704333,
      "grad_norm": 0.3483627438545227,
      "learning_rate": 4.958784420445037e-06,
      "loss": 0.0164,
      "step": 1550140
    },
    {
      "epoch": 2.536870839142986,
      "grad_norm": 0.21798722445964813,
      "learning_rate": 4.9587185282315194e-06,
      "loss": 0.0142,
      "step": 1550160
    },
    {
      "epoch": 2.5369035695816393,
      "grad_norm": 0.4917621910572052,
      "learning_rate": 4.958652636018002e-06,
      "loss": 0.0095,
      "step": 1550180
    },
    {
      "epoch": 2.536936300020293,
      "grad_norm": 0.7122160792350769,
      "learning_rate": 4.958586743804485e-06,
      "loss": 0.013,
      "step": 1550200
    },
    {
      "epoch": 2.536969030458946,
      "grad_norm": 0.47497931122779846,
      "learning_rate": 4.958520851590968e-06,
      "loss": 0.0096,
      "step": 1550220
    },
    {
      "epoch": 2.5370017608975997,
      "grad_norm": 0.3229328989982605,
      "learning_rate": 4.95845495937745e-06,
      "loss": 0.0121,
      "step": 1550240
    },
    {
      "epoch": 2.537034491336253,
      "grad_norm": 0.28501126170158386,
      "learning_rate": 4.958389067163933e-06,
      "loss": 0.018,
      "step": 1550260
    },
    {
      "epoch": 2.5370672217749064,
      "grad_norm": 0.6384785175323486,
      "learning_rate": 4.958323174950416e-06,
      "loss": 0.0179,
      "step": 1550280
    },
    {
      "epoch": 2.5370999522135596,
      "grad_norm": 0.20684532821178436,
      "learning_rate": 4.9582572827368995e-06,
      "loss": 0.012,
      "step": 1550300
    },
    {
      "epoch": 2.5371326826522127,
      "grad_norm": 0.7301006317138672,
      "learning_rate": 4.958191390523382e-06,
      "loss": 0.0186,
      "step": 1550320
    },
    {
      "epoch": 2.5371654130908663,
      "grad_norm": 0.897739589214325,
      "learning_rate": 4.958125498309865e-06,
      "loss": 0.0117,
      "step": 1550340
    },
    {
      "epoch": 2.5371981435295194,
      "grad_norm": 0.5520824790000916,
      "learning_rate": 4.9580596060963485e-06,
      "loss": 0.015,
      "step": 1550360
    },
    {
      "epoch": 2.537230873968173,
      "grad_norm": 2.31729793548584,
      "learning_rate": 4.957993713882831e-06,
      "loss": 0.0142,
      "step": 1550380
    },
    {
      "epoch": 2.537263604406826,
      "grad_norm": 0.9167271852493286,
      "learning_rate": 4.957927821669314e-06,
      "loss": 0.0179,
      "step": 1550400
    },
    {
      "epoch": 2.53729633484548,
      "grad_norm": 0.4308801591396332,
      "learning_rate": 4.957861929455797e-06,
      "loss": 0.0132,
      "step": 1550420
    },
    {
      "epoch": 2.537329065284133,
      "grad_norm": 0.3202710747718811,
      "learning_rate": 4.9577960372422795e-06,
      "loss": 0.0099,
      "step": 1550440
    },
    {
      "epoch": 2.537361795722786,
      "grad_norm": 0.35223034024238586,
      "learning_rate": 4.957730145028762e-06,
      "loss": 0.0161,
      "step": 1550460
    },
    {
      "epoch": 2.5373945261614397,
      "grad_norm": 0.609341561794281,
      "learning_rate": 4.957664252815245e-06,
      "loss": 0.0186,
      "step": 1550480
    },
    {
      "epoch": 2.537427256600093,
      "grad_norm": 0.24035975337028503,
      "learning_rate": 4.957598360601728e-06,
      "loss": 0.0178,
      "step": 1550500
    },
    {
      "epoch": 2.5374599870387464,
      "grad_norm": 0.19032879173755646,
      "learning_rate": 4.957532468388211e-06,
      "loss": 0.014,
      "step": 1550520
    },
    {
      "epoch": 2.5374927174773996,
      "grad_norm": 0.4233717918395996,
      "learning_rate": 4.957466576174694e-06,
      "loss": 0.0125,
      "step": 1550540
    },
    {
      "epoch": 2.537525447916053,
      "grad_norm": 0.5262549519538879,
      "learning_rate": 4.957400683961177e-06,
      "loss": 0.0142,
      "step": 1550560
    },
    {
      "epoch": 2.5375581783547063,
      "grad_norm": 0.2727848291397095,
      "learning_rate": 4.9573347917476595e-06,
      "loss": 0.016,
      "step": 1550580
    },
    {
      "epoch": 2.5375909087933595,
      "grad_norm": 0.39304375648498535,
      "learning_rate": 4.957268899534142e-06,
      "loss": 0.0131,
      "step": 1550600
    },
    {
      "epoch": 2.537623639232013,
      "grad_norm": 0.1845499873161316,
      "learning_rate": 4.957203007320625e-06,
      "loss": 0.0143,
      "step": 1550620
    },
    {
      "epoch": 2.537656369670666,
      "grad_norm": 0.14472076296806335,
      "learning_rate": 4.957137115107108e-06,
      "loss": 0.0123,
      "step": 1550640
    },
    {
      "epoch": 2.53768910010932,
      "grad_norm": 0.6436418890953064,
      "learning_rate": 4.957071222893591e-06,
      "loss": 0.0128,
      "step": 1550660
    },
    {
      "epoch": 2.537721830547973,
      "grad_norm": 0.6886204481124878,
      "learning_rate": 4.957005330680074e-06,
      "loss": 0.0184,
      "step": 1550680
    },
    {
      "epoch": 2.5377545609866266,
      "grad_norm": 0.45020222663879395,
      "learning_rate": 4.956939438466557e-06,
      "loss": 0.0129,
      "step": 1550700
    },
    {
      "epoch": 2.5377872914252797,
      "grad_norm": 0.11211161315441132,
      "learning_rate": 4.9568735462530395e-06,
      "loss": 0.0261,
      "step": 1550720
    },
    {
      "epoch": 2.537820021863933,
      "grad_norm": 0.6551321148872375,
      "learning_rate": 4.956807654039522e-06,
      "loss": 0.0197,
      "step": 1550740
    },
    {
      "epoch": 2.5378527523025864,
      "grad_norm": 0.5730473399162292,
      "learning_rate": 4.956741761826006e-06,
      "loss": 0.0135,
      "step": 1550760
    },
    {
      "epoch": 2.5378854827412396,
      "grad_norm": 0.3165506422519684,
      "learning_rate": 4.956675869612489e-06,
      "loss": 0.0119,
      "step": 1550780
    },
    {
      "epoch": 2.537918213179893,
      "grad_norm": 0.23541752994060516,
      "learning_rate": 4.956609977398971e-06,
      "loss": 0.0156,
      "step": 1550800
    },
    {
      "epoch": 2.5379509436185463,
      "grad_norm": 0.21047182381153107,
      "learning_rate": 4.956544085185454e-06,
      "loss": 0.0159,
      "step": 1550820
    },
    {
      "epoch": 2.5379836740572,
      "grad_norm": 0.5054485201835632,
      "learning_rate": 4.956478192971937e-06,
      "loss": 0.0141,
      "step": 1550840
    },
    {
      "epoch": 2.538016404495853,
      "grad_norm": 0.17579331994056702,
      "learning_rate": 4.9564123007584196e-06,
      "loss": 0.0188,
      "step": 1550860
    },
    {
      "epoch": 2.5380491349345062,
      "grad_norm": 0.14038901031017303,
      "learning_rate": 4.956346408544902e-06,
      "loss": 0.024,
      "step": 1550880
    },
    {
      "epoch": 2.53808186537316,
      "grad_norm": 0.14122740924358368,
      "learning_rate": 4.956280516331385e-06,
      "loss": 0.0128,
      "step": 1550900
    },
    {
      "epoch": 2.538114595811813,
      "grad_norm": 0.5170190930366516,
      "learning_rate": 4.956214624117869e-06,
      "loss": 0.0156,
      "step": 1550920
    },
    {
      "epoch": 2.5381473262504666,
      "grad_norm": 0.21769417822360992,
      "learning_rate": 4.956148731904351e-06,
      "loss": 0.0129,
      "step": 1550940
    },
    {
      "epoch": 2.5381800566891197,
      "grad_norm": 0.36081835627555847,
      "learning_rate": 4.956082839690834e-06,
      "loss": 0.0117,
      "step": 1550960
    },
    {
      "epoch": 2.5382127871277733,
      "grad_norm": 0.5474894642829895,
      "learning_rate": 4.956016947477317e-06,
      "loss": 0.0105,
      "step": 1550980
    },
    {
      "epoch": 2.5382455175664265,
      "grad_norm": 1.1968883275985718,
      "learning_rate": 4.9559510552638004e-06,
      "loss": 0.0143,
      "step": 1551000
    },
    {
      "epoch": 2.5382782480050796,
      "grad_norm": 0.18943041563034058,
      "learning_rate": 4.955885163050283e-06,
      "loss": 0.018,
      "step": 1551020
    },
    {
      "epoch": 2.538310978443733,
      "grad_norm": 0.13007374107837677,
      "learning_rate": 4.955819270836766e-06,
      "loss": 0.0171,
      "step": 1551040
    },
    {
      "epoch": 2.5383437088823864,
      "grad_norm": 0.19715915620326996,
      "learning_rate": 4.955753378623249e-06,
      "loss": 0.0149,
      "step": 1551060
    },
    {
      "epoch": 2.5383764393210395,
      "grad_norm": 0.19295266270637512,
      "learning_rate": 4.955687486409731e-06,
      "loss": 0.0113,
      "step": 1551080
    },
    {
      "epoch": 2.538409169759693,
      "grad_norm": 0.1856914609670639,
      "learning_rate": 4.955621594196214e-06,
      "loss": 0.0193,
      "step": 1551100
    },
    {
      "epoch": 2.5384419001983467,
      "grad_norm": 0.22024095058441162,
      "learning_rate": 4.955555701982697e-06,
      "loss": 0.0134,
      "step": 1551120
    },
    {
      "epoch": 2.538474630637,
      "grad_norm": 0.3019557595252991,
      "learning_rate": 4.95548980976918e-06,
      "loss": 0.0151,
      "step": 1551140
    },
    {
      "epoch": 2.538507361075653,
      "grad_norm": 0.3905131220817566,
      "learning_rate": 4.955423917555663e-06,
      "loss": 0.0163,
      "step": 1551160
    },
    {
      "epoch": 2.5385400915143066,
      "grad_norm": 0.25183749198913574,
      "learning_rate": 4.955358025342146e-06,
      "loss": 0.0177,
      "step": 1551180
    },
    {
      "epoch": 2.5385728219529597,
      "grad_norm": 1.446258783340454,
      "learning_rate": 4.955292133128629e-06,
      "loss": 0.0109,
      "step": 1551200
    },
    {
      "epoch": 2.538605552391613,
      "grad_norm": 0.14575235545635223,
      "learning_rate": 4.9552262409151114e-06,
      "loss": 0.0092,
      "step": 1551220
    },
    {
      "epoch": 2.5386382828302665,
      "grad_norm": 0.17158164083957672,
      "learning_rate": 4.955160348701594e-06,
      "loss": 0.0145,
      "step": 1551240
    },
    {
      "epoch": 2.53867101326892,
      "grad_norm": 0.24094100296497345,
      "learning_rate": 4.955094456488077e-06,
      "loss": 0.0104,
      "step": 1551260
    },
    {
      "epoch": 2.5387037437075732,
      "grad_norm": 0.5541378259658813,
      "learning_rate": 4.95502856427456e-06,
      "loss": 0.0147,
      "step": 1551280
    },
    {
      "epoch": 2.5387364741462264,
      "grad_norm": 0.3397463858127594,
      "learning_rate": 4.954962672061042e-06,
      "loss": 0.0145,
      "step": 1551300
    },
    {
      "epoch": 2.53876920458488,
      "grad_norm": 0.46102336049079895,
      "learning_rate": 4.954896779847526e-06,
      "loss": 0.0153,
      "step": 1551320
    },
    {
      "epoch": 2.538801935023533,
      "grad_norm": 0.27510130405426025,
      "learning_rate": 4.954830887634009e-06,
      "loss": 0.0153,
      "step": 1551340
    },
    {
      "epoch": 2.5388346654621863,
      "grad_norm": 0.6246312260627747,
      "learning_rate": 4.9547649954204915e-06,
      "loss": 0.0127,
      "step": 1551360
    },
    {
      "epoch": 2.53886739590084,
      "grad_norm": 0.36399364471435547,
      "learning_rate": 4.954699103206975e-06,
      "loss": 0.0123,
      "step": 1551380
    },
    {
      "epoch": 2.5389001263394935,
      "grad_norm": 0.2466670125722885,
      "learning_rate": 4.954633210993458e-06,
      "loss": 0.0155,
      "step": 1551400
    },
    {
      "epoch": 2.5389328567781466,
      "grad_norm": 0.4169941544532776,
      "learning_rate": 4.9545673187799405e-06,
      "loss": 0.0095,
      "step": 1551420
    },
    {
      "epoch": 2.5389655872167998,
      "grad_norm": 0.23440803587436676,
      "learning_rate": 4.954501426566423e-06,
      "loss": 0.0172,
      "step": 1551440
    },
    {
      "epoch": 2.5389983176554534,
      "grad_norm": 5.597238063812256,
      "learning_rate": 4.954435534352906e-06,
      "loss": 0.0157,
      "step": 1551460
    },
    {
      "epoch": 2.5390310480941065,
      "grad_norm": 0.15150222182273865,
      "learning_rate": 4.954369642139389e-06,
      "loss": 0.0118,
      "step": 1551480
    },
    {
      "epoch": 2.5390637785327597,
      "grad_norm": 0.08498239517211914,
      "learning_rate": 4.9543037499258715e-06,
      "loss": 0.0129,
      "step": 1551500
    },
    {
      "epoch": 2.5390965089714133,
      "grad_norm": 1.192832589149475,
      "learning_rate": 4.954237857712354e-06,
      "loss": 0.0103,
      "step": 1551520
    },
    {
      "epoch": 2.5391292394100664,
      "grad_norm": 0.4598534405231476,
      "learning_rate": 4.954171965498838e-06,
      "loss": 0.0187,
      "step": 1551540
    },
    {
      "epoch": 2.53916196984872,
      "grad_norm": 0.13098859786987305,
      "learning_rate": 4.9541060732853205e-06,
      "loss": 0.0111,
      "step": 1551560
    },
    {
      "epoch": 2.539194700287373,
      "grad_norm": 0.39577966928482056,
      "learning_rate": 4.954040181071803e-06,
      "loss": 0.0155,
      "step": 1551580
    },
    {
      "epoch": 2.5392274307260267,
      "grad_norm": 0.4022020399570465,
      "learning_rate": 4.953974288858286e-06,
      "loss": 0.0131,
      "step": 1551600
    },
    {
      "epoch": 2.53926016116468,
      "grad_norm": 0.23344679176807404,
      "learning_rate": 4.953908396644769e-06,
      "loss": 0.0103,
      "step": 1551620
    },
    {
      "epoch": 2.539292891603333,
      "grad_norm": 0.5604174733161926,
      "learning_rate": 4.9538425044312515e-06,
      "loss": 0.013,
      "step": 1551640
    },
    {
      "epoch": 2.5393256220419866,
      "grad_norm": 0.35802051424980164,
      "learning_rate": 4.953776612217734e-06,
      "loss": 0.0093,
      "step": 1551660
    },
    {
      "epoch": 2.53935835248064,
      "grad_norm": 0.34988027811050415,
      "learning_rate": 4.953710720004217e-06,
      "loss": 0.0135,
      "step": 1551680
    },
    {
      "epoch": 2.5393910829192934,
      "grad_norm": 0.09920591115951538,
      "learning_rate": 4.9536448277907e-06,
      "loss": 0.0179,
      "step": 1551700
    },
    {
      "epoch": 2.5394238133579465,
      "grad_norm": 0.24144825339317322,
      "learning_rate": 4.953578935577183e-06,
      "loss": 0.017,
      "step": 1551720
    },
    {
      "epoch": 2.5394565437966,
      "grad_norm": 0.6876910328865051,
      "learning_rate": 4.953513043363666e-06,
      "loss": 0.0135,
      "step": 1551740
    },
    {
      "epoch": 2.5394892742352533,
      "grad_norm": 0.36429232358932495,
      "learning_rate": 4.953447151150149e-06,
      "loss": 0.0094,
      "step": 1551760
    },
    {
      "epoch": 2.5395220046739064,
      "grad_norm": 0.46743547916412354,
      "learning_rate": 4.953381258936632e-06,
      "loss": 0.0158,
      "step": 1551780
    },
    {
      "epoch": 2.53955473511256,
      "grad_norm": 0.6304109692573547,
      "learning_rate": 4.953315366723115e-06,
      "loss": 0.0077,
      "step": 1551800
    },
    {
      "epoch": 2.539587465551213,
      "grad_norm": 0.37047526240348816,
      "learning_rate": 4.953249474509598e-06,
      "loss": 0.0181,
      "step": 1551820
    },
    {
      "epoch": 2.5396201959898668,
      "grad_norm": 0.143337681889534,
      "learning_rate": 4.953183582296081e-06,
      "loss": 0.0113,
      "step": 1551840
    },
    {
      "epoch": 2.53965292642852,
      "grad_norm": 0.29082775115966797,
      "learning_rate": 4.953117690082563e-06,
      "loss": 0.0171,
      "step": 1551860
    },
    {
      "epoch": 2.5396856568671735,
      "grad_norm": 0.5387061238288879,
      "learning_rate": 4.953051797869046e-06,
      "loss": 0.012,
      "step": 1551880
    },
    {
      "epoch": 2.5397183873058267,
      "grad_norm": 0.21313704550266266,
      "learning_rate": 4.952985905655529e-06,
      "loss": 0.0187,
      "step": 1551900
    },
    {
      "epoch": 2.53975111774448,
      "grad_norm": 0.06795048713684082,
      "learning_rate": 4.9529200134420116e-06,
      "loss": 0.0112,
      "step": 1551920
    },
    {
      "epoch": 2.5397838481831334,
      "grad_norm": 0.21343958377838135,
      "learning_rate": 4.952854121228495e-06,
      "loss": 0.0115,
      "step": 1551940
    },
    {
      "epoch": 2.5398165786217866,
      "grad_norm": 0.17105939984321594,
      "learning_rate": 4.952788229014978e-06,
      "loss": 0.0144,
      "step": 1551960
    },
    {
      "epoch": 2.53984930906044,
      "grad_norm": 0.44083791971206665,
      "learning_rate": 4.952722336801461e-06,
      "loss": 0.0215,
      "step": 1551980
    },
    {
      "epoch": 2.5398820394990933,
      "grad_norm": 0.10470380634069443,
      "learning_rate": 4.952656444587943e-06,
      "loss": 0.0139,
      "step": 1552000
    },
    {
      "epoch": 2.539914769937747,
      "grad_norm": 0.1935153752565384,
      "learning_rate": 4.952590552374426e-06,
      "loss": 0.0136,
      "step": 1552020
    },
    {
      "epoch": 2.5399475003764,
      "grad_norm": 0.8362825512886047,
      "learning_rate": 4.952524660160909e-06,
      "loss": 0.0208,
      "step": 1552040
    },
    {
      "epoch": 2.539980230815053,
      "grad_norm": 0.41168177127838135,
      "learning_rate": 4.9524587679473924e-06,
      "loss": 0.0118,
      "step": 1552060
    },
    {
      "epoch": 2.540012961253707,
      "grad_norm": 0.5885128378868103,
      "learning_rate": 4.952392875733875e-06,
      "loss": 0.0148,
      "step": 1552080
    },
    {
      "epoch": 2.54004569169236,
      "grad_norm": 0.9635065793991089,
      "learning_rate": 4.952326983520358e-06,
      "loss": 0.0151,
      "step": 1552100
    },
    {
      "epoch": 2.5400784221310135,
      "grad_norm": 0.03400157764554024,
      "learning_rate": 4.952261091306841e-06,
      "loss": 0.0104,
      "step": 1552120
    },
    {
      "epoch": 2.5401111525696667,
      "grad_norm": 0.13617898523807526,
      "learning_rate": 4.952195199093323e-06,
      "loss": 0.0161,
      "step": 1552140
    },
    {
      "epoch": 2.5401438830083203,
      "grad_norm": 0.5764341950416565,
      "learning_rate": 4.952129306879806e-06,
      "loss": 0.0184,
      "step": 1552160
    },
    {
      "epoch": 2.5401766134469734,
      "grad_norm": 0.24593950808048248,
      "learning_rate": 4.95206341466629e-06,
      "loss": 0.014,
      "step": 1552180
    },
    {
      "epoch": 2.5402093438856266,
      "grad_norm": 0.32713884115219116,
      "learning_rate": 4.9519975224527725e-06,
      "loss": 0.0128,
      "step": 1552200
    },
    {
      "epoch": 2.54024207432428,
      "grad_norm": 0.1637716442346573,
      "learning_rate": 4.951931630239255e-06,
      "loss": 0.0105,
      "step": 1552220
    },
    {
      "epoch": 2.5402748047629333,
      "grad_norm": 0.317759245634079,
      "learning_rate": 4.951865738025738e-06,
      "loss": 0.0116,
      "step": 1552240
    },
    {
      "epoch": 2.540307535201587,
      "grad_norm": 0.9928830862045288,
      "learning_rate": 4.951799845812221e-06,
      "loss": 0.0142,
      "step": 1552260
    },
    {
      "epoch": 2.54034026564024,
      "grad_norm": 0.19330495595932007,
      "learning_rate": 4.951733953598703e-06,
      "loss": 0.0116,
      "step": 1552280
    },
    {
      "epoch": 2.5403729960788937,
      "grad_norm": 0.9509620666503906,
      "learning_rate": 4.951668061385186e-06,
      "loss": 0.0149,
      "step": 1552300
    },
    {
      "epoch": 2.540405726517547,
      "grad_norm": 0.3917691707611084,
      "learning_rate": 4.951602169171669e-06,
      "loss": 0.0089,
      "step": 1552320
    },
    {
      "epoch": 2.5404384569562,
      "grad_norm": 0.22692446410655975,
      "learning_rate": 4.9515362769581525e-06,
      "loss": 0.0096,
      "step": 1552340
    },
    {
      "epoch": 2.5404711873948536,
      "grad_norm": 0.3507084548473358,
      "learning_rate": 4.951470384744635e-06,
      "loss": 0.015,
      "step": 1552360
    },
    {
      "epoch": 2.5405039178335067,
      "grad_norm": 0.3852202594280243,
      "learning_rate": 4.951404492531118e-06,
      "loss": 0.0124,
      "step": 1552380
    },
    {
      "epoch": 2.5405366482721603,
      "grad_norm": 0.44030895829200745,
      "learning_rate": 4.951338600317601e-06,
      "loss": 0.0144,
      "step": 1552400
    },
    {
      "epoch": 2.5405693787108135,
      "grad_norm": 0.44555509090423584,
      "learning_rate": 4.951272708104084e-06,
      "loss": 0.012,
      "step": 1552420
    },
    {
      "epoch": 2.540602109149467,
      "grad_norm": 0.4161127507686615,
      "learning_rate": 4.951206815890567e-06,
      "loss": 0.012,
      "step": 1552440
    },
    {
      "epoch": 2.54063483958812,
      "grad_norm": 0.7531165480613708,
      "learning_rate": 4.95114092367705e-06,
      "loss": 0.0158,
      "step": 1552460
    },
    {
      "epoch": 2.5406675700267733,
      "grad_norm": 0.20212970674037933,
      "learning_rate": 4.9510750314635325e-06,
      "loss": 0.0194,
      "step": 1552480
    },
    {
      "epoch": 2.540700300465427,
      "grad_norm": 0.5161972641944885,
      "learning_rate": 4.951009139250015e-06,
      "loss": 0.0228,
      "step": 1552500
    },
    {
      "epoch": 2.54073303090408,
      "grad_norm": 0.28969961404800415,
      "learning_rate": 4.950943247036498e-06,
      "loss": 0.0199,
      "step": 1552520
    },
    {
      "epoch": 2.5407657613427332,
      "grad_norm": 0.2963080406188965,
      "learning_rate": 4.950877354822981e-06,
      "loss": 0.011,
      "step": 1552540
    },
    {
      "epoch": 2.540798491781387,
      "grad_norm": 3.328648567199707,
      "learning_rate": 4.9508114626094635e-06,
      "loss": 0.0114,
      "step": 1552560
    },
    {
      "epoch": 2.5408312222200404,
      "grad_norm": 0.31492507457733154,
      "learning_rate": 4.950745570395947e-06,
      "loss": 0.0167,
      "step": 1552580
    },
    {
      "epoch": 2.5408639526586936,
      "grad_norm": 0.7037689685821533,
      "learning_rate": 4.95067967818243e-06,
      "loss": 0.0104,
      "step": 1552600
    },
    {
      "epoch": 2.5408966830973467,
      "grad_norm": 0.5314279794692993,
      "learning_rate": 4.9506137859689125e-06,
      "loss": 0.0127,
      "step": 1552620
    },
    {
      "epoch": 2.5409294135360003,
      "grad_norm": 0.4022960662841797,
      "learning_rate": 4.950547893755395e-06,
      "loss": 0.0203,
      "step": 1552640
    },
    {
      "epoch": 2.5409621439746535,
      "grad_norm": 0.22721797227859497,
      "learning_rate": 4.950482001541878e-06,
      "loss": 0.0116,
      "step": 1552660
    },
    {
      "epoch": 2.5409948744133066,
      "grad_norm": 1.122679591178894,
      "learning_rate": 4.950416109328361e-06,
      "loss": 0.0089,
      "step": 1552680
    },
    {
      "epoch": 2.54102760485196,
      "grad_norm": 0.3977371156215668,
      "learning_rate": 4.9503502171148435e-06,
      "loss": 0.0127,
      "step": 1552700
    },
    {
      "epoch": 2.541060335290614,
      "grad_norm": 0.24910801649093628,
      "learning_rate": 4.950284324901326e-06,
      "loss": 0.0106,
      "step": 1552720
    },
    {
      "epoch": 2.541093065729267,
      "grad_norm": 1.1193100214004517,
      "learning_rate": 4.95021843268781e-06,
      "loss": 0.0154,
      "step": 1552740
    },
    {
      "epoch": 2.54112579616792,
      "grad_norm": 0.27020207047462463,
      "learning_rate": 4.9501525404742926e-06,
      "loss": 0.0131,
      "step": 1552760
    },
    {
      "epoch": 2.5411585266065737,
      "grad_norm": 0.774516761302948,
      "learning_rate": 4.950086648260775e-06,
      "loss": 0.0137,
      "step": 1552780
    },
    {
      "epoch": 2.541191257045227,
      "grad_norm": 0.6036701202392578,
      "learning_rate": 4.950020756047259e-06,
      "loss": 0.0148,
      "step": 1552800
    },
    {
      "epoch": 2.54122398748388,
      "grad_norm": 0.3255966007709503,
      "learning_rate": 4.949954863833742e-06,
      "loss": 0.0087,
      "step": 1552820
    },
    {
      "epoch": 2.5412567179225336,
      "grad_norm": 0.2971791625022888,
      "learning_rate": 4.949888971620224e-06,
      "loss": 0.0178,
      "step": 1552840
    },
    {
      "epoch": 2.541289448361187,
      "grad_norm": 0.3338201940059662,
      "learning_rate": 4.949823079406707e-06,
      "loss": 0.0173,
      "step": 1552860
    },
    {
      "epoch": 2.5413221787998403,
      "grad_norm": 0.16974321007728577,
      "learning_rate": 4.94975718719319e-06,
      "loss": 0.0132,
      "step": 1552880
    },
    {
      "epoch": 2.5413549092384935,
      "grad_norm": 0.08656530827283859,
      "learning_rate": 4.949691294979673e-06,
      "loss": 0.0196,
      "step": 1552900
    },
    {
      "epoch": 2.541387639677147,
      "grad_norm": 0.5613947510719299,
      "learning_rate": 4.949625402766155e-06,
      "loss": 0.0222,
      "step": 1552920
    },
    {
      "epoch": 2.5414203701158002,
      "grad_norm": 0.6949902772903442,
      "learning_rate": 4.949559510552638e-06,
      "loss": 0.0162,
      "step": 1552940
    },
    {
      "epoch": 2.5414531005544534,
      "grad_norm": 0.3579159677028656,
      "learning_rate": 4.949493618339122e-06,
      "loss": 0.0177,
      "step": 1552960
    },
    {
      "epoch": 2.541485830993107,
      "grad_norm": 0.28317660093307495,
      "learning_rate": 4.949427726125604e-06,
      "loss": 0.0112,
      "step": 1552980
    },
    {
      "epoch": 2.5415185614317606,
      "grad_norm": 0.2917744517326355,
      "learning_rate": 4.949361833912087e-06,
      "loss": 0.0197,
      "step": 1553000
    },
    {
      "epoch": 2.5415512918704137,
      "grad_norm": 0.3076995015144348,
      "learning_rate": 4.94929594169857e-06,
      "loss": 0.016,
      "step": 1553020
    },
    {
      "epoch": 2.541584022309067,
      "grad_norm": 0.14836385846138,
      "learning_rate": 4.949230049485053e-06,
      "loss": 0.0171,
      "step": 1553040
    },
    {
      "epoch": 2.5416167527477205,
      "grad_norm": 0.11074045300483704,
      "learning_rate": 4.949164157271535e-06,
      "loss": 0.0132,
      "step": 1553060
    },
    {
      "epoch": 2.5416494831863736,
      "grad_norm": 0.24667349457740784,
      "learning_rate": 4.949098265058018e-06,
      "loss": 0.0104,
      "step": 1553080
    },
    {
      "epoch": 2.5416822136250268,
      "grad_norm": 0.18195387721061707,
      "learning_rate": 4.949032372844501e-06,
      "loss": 0.0099,
      "step": 1553100
    },
    {
      "epoch": 2.5417149440636804,
      "grad_norm": 0.16059993207454681,
      "learning_rate": 4.948966480630984e-06,
      "loss": 0.0092,
      "step": 1553120
    },
    {
      "epoch": 2.5417476745023335,
      "grad_norm": 0.31468701362609863,
      "learning_rate": 4.948900588417467e-06,
      "loss": 0.016,
      "step": 1553140
    },
    {
      "epoch": 2.541780404940987,
      "grad_norm": 0.28891077637672424,
      "learning_rate": 4.94883469620395e-06,
      "loss": 0.0193,
      "step": 1553160
    },
    {
      "epoch": 2.5418131353796403,
      "grad_norm": 0.21532750129699707,
      "learning_rate": 4.948768803990433e-06,
      "loss": 0.0094,
      "step": 1553180
    },
    {
      "epoch": 2.541845865818294,
      "grad_norm": 0.24796174466609955,
      "learning_rate": 4.948702911776916e-06,
      "loss": 0.0142,
      "step": 1553200
    },
    {
      "epoch": 2.541878596256947,
      "grad_norm": 0.23297040164470673,
      "learning_rate": 4.948637019563399e-06,
      "loss": 0.0081,
      "step": 1553220
    },
    {
      "epoch": 2.5419113266956,
      "grad_norm": 0.2779815196990967,
      "learning_rate": 4.948571127349882e-06,
      "loss": 0.0155,
      "step": 1553240
    },
    {
      "epoch": 2.5419440571342538,
      "grad_norm": 0.44751760363578796,
      "learning_rate": 4.9485052351363644e-06,
      "loss": 0.0108,
      "step": 1553260
    },
    {
      "epoch": 2.541976787572907,
      "grad_norm": 0.9701117277145386,
      "learning_rate": 4.948439342922847e-06,
      "loss": 0.0146,
      "step": 1553280
    },
    {
      "epoch": 2.5420095180115605,
      "grad_norm": 0.2431776374578476,
      "learning_rate": 4.94837345070933e-06,
      "loss": 0.0121,
      "step": 1553300
    },
    {
      "epoch": 2.5420422484502136,
      "grad_norm": 0.38527747988700867,
      "learning_rate": 4.948307558495813e-06,
      "loss": 0.0106,
      "step": 1553320
    },
    {
      "epoch": 2.5420749788888672,
      "grad_norm": 0.2055351585149765,
      "learning_rate": 4.948241666282295e-06,
      "loss": 0.019,
      "step": 1553340
    },
    {
      "epoch": 2.5421077093275204,
      "grad_norm": 0.09407239407300949,
      "learning_rate": 4.948175774068779e-06,
      "loss": 0.0124,
      "step": 1553360
    },
    {
      "epoch": 2.5421404397661735,
      "grad_norm": 0.1766994744539261,
      "learning_rate": 4.948109881855262e-06,
      "loss": 0.0149,
      "step": 1553380
    },
    {
      "epoch": 2.542173170204827,
      "grad_norm": 0.18953773379325867,
      "learning_rate": 4.9480439896417445e-06,
      "loss": 0.0202,
      "step": 1553400
    },
    {
      "epoch": 2.5422059006434803,
      "grad_norm": 0.7099969387054443,
      "learning_rate": 4.947978097428227e-06,
      "loss": 0.0122,
      "step": 1553420
    },
    {
      "epoch": 2.542238631082134,
      "grad_norm": 0.44089776277542114,
      "learning_rate": 4.94791220521471e-06,
      "loss": 0.0144,
      "step": 1553440
    },
    {
      "epoch": 2.542271361520787,
      "grad_norm": 1.6262426376342773,
      "learning_rate": 4.947846313001193e-06,
      "loss": 0.017,
      "step": 1553460
    },
    {
      "epoch": 2.5423040919594406,
      "grad_norm": 0.1187392920255661,
      "learning_rate": 4.947780420787676e-06,
      "loss": 0.0216,
      "step": 1553480
    },
    {
      "epoch": 2.5423368223980938,
      "grad_norm": 0.6842015385627747,
      "learning_rate": 4.947714528574159e-06,
      "loss": 0.0173,
      "step": 1553500
    },
    {
      "epoch": 2.542369552836747,
      "grad_norm": 0.13604584336280823,
      "learning_rate": 4.947648636360642e-06,
      "loss": 0.0181,
      "step": 1553520
    },
    {
      "epoch": 2.5424022832754005,
      "grad_norm": 0.3349919319152832,
      "learning_rate": 4.9475827441471245e-06,
      "loss": 0.0106,
      "step": 1553540
    },
    {
      "epoch": 2.5424350137140537,
      "grad_norm": 0.4544362425804138,
      "learning_rate": 4.947516851933607e-06,
      "loss": 0.0183,
      "step": 1553560
    },
    {
      "epoch": 2.5424677441527073,
      "grad_norm": 0.26062774658203125,
      "learning_rate": 4.94745095972009e-06,
      "loss": 0.0142,
      "step": 1553580
    },
    {
      "epoch": 2.5425004745913604,
      "grad_norm": 0.2535351812839508,
      "learning_rate": 4.9473850675065736e-06,
      "loss": 0.0112,
      "step": 1553600
    },
    {
      "epoch": 2.542533205030014,
      "grad_norm": 0.4049489498138428,
      "learning_rate": 4.947319175293056e-06,
      "loss": 0.0269,
      "step": 1553620
    },
    {
      "epoch": 2.542565935468667,
      "grad_norm": 0.6205483078956604,
      "learning_rate": 4.947253283079539e-06,
      "loss": 0.0153,
      "step": 1553640
    },
    {
      "epoch": 2.5425986659073203,
      "grad_norm": 0.30308467149734497,
      "learning_rate": 4.947187390866022e-06,
      "loss": 0.0091,
      "step": 1553660
    },
    {
      "epoch": 2.542631396345974,
      "grad_norm": 0.7807021141052246,
      "learning_rate": 4.9471214986525045e-06,
      "loss": 0.019,
      "step": 1553680
    },
    {
      "epoch": 2.542664126784627,
      "grad_norm": 0.5396717190742493,
      "learning_rate": 4.947055606438987e-06,
      "loss": 0.0176,
      "step": 1553700
    },
    {
      "epoch": 2.5426968572232806,
      "grad_norm": 0.39547210931777954,
      "learning_rate": 4.94698971422547e-06,
      "loss": 0.0176,
      "step": 1553720
    },
    {
      "epoch": 2.542729587661934,
      "grad_norm": 0.21557356417179108,
      "learning_rate": 4.946923822011953e-06,
      "loss": 0.0178,
      "step": 1553740
    },
    {
      "epoch": 2.5427623181005874,
      "grad_norm": 0.19306637346744537,
      "learning_rate": 4.946857929798436e-06,
      "loss": 0.0128,
      "step": 1553760
    },
    {
      "epoch": 2.5427950485392405,
      "grad_norm": 0.29195746779441833,
      "learning_rate": 4.946792037584919e-06,
      "loss": 0.0147,
      "step": 1553780
    },
    {
      "epoch": 2.5428277789778937,
      "grad_norm": 1.078332543373108,
      "learning_rate": 4.946726145371402e-06,
      "loss": 0.021,
      "step": 1553800
    },
    {
      "epoch": 2.5428605094165473,
      "grad_norm": 0.6720646619796753,
      "learning_rate": 4.946660253157885e-06,
      "loss": 0.0144,
      "step": 1553820
    },
    {
      "epoch": 2.5428932398552004,
      "grad_norm": 0.25408682227134705,
      "learning_rate": 4.946594360944368e-06,
      "loss": 0.0155,
      "step": 1553840
    },
    {
      "epoch": 2.542925970293854,
      "grad_norm": 0.46654394268989563,
      "learning_rate": 4.946528468730851e-06,
      "loss": 0.0185,
      "step": 1553860
    },
    {
      "epoch": 2.542958700732507,
      "grad_norm": 1.0027573108673096,
      "learning_rate": 4.946462576517334e-06,
      "loss": 0.0176,
      "step": 1553880
    },
    {
      "epoch": 2.5429914311711608,
      "grad_norm": 0.24996979534626007,
      "learning_rate": 4.946396684303816e-06,
      "loss": 0.0119,
      "step": 1553900
    },
    {
      "epoch": 2.543024161609814,
      "grad_norm": 0.19929629564285278,
      "learning_rate": 4.946330792090299e-06,
      "loss": 0.0148,
      "step": 1553920
    },
    {
      "epoch": 2.543056892048467,
      "grad_norm": 0.34557828307151794,
      "learning_rate": 4.946264899876782e-06,
      "loss": 0.0142,
      "step": 1553940
    },
    {
      "epoch": 2.5430896224871207,
      "grad_norm": 0.22312979400157928,
      "learning_rate": 4.9461990076632646e-06,
      "loss": 0.0126,
      "step": 1553960
    },
    {
      "epoch": 2.543122352925774,
      "grad_norm": 0.4612828195095062,
      "learning_rate": 4.946133115449747e-06,
      "loss": 0.0138,
      "step": 1553980
    },
    {
      "epoch": 2.5431550833644274,
      "grad_norm": 0.35620972514152527,
      "learning_rate": 4.946067223236231e-06,
      "loss": 0.0119,
      "step": 1554000
    },
    {
      "epoch": 2.5431878138030806,
      "grad_norm": 0.3915284276008606,
      "learning_rate": 4.946001331022714e-06,
      "loss": 0.0095,
      "step": 1554020
    },
    {
      "epoch": 2.543220544241734,
      "grad_norm": 0.631641149520874,
      "learning_rate": 4.945935438809196e-06,
      "loss": 0.0172,
      "step": 1554040
    },
    {
      "epoch": 2.5432532746803873,
      "grad_norm": 0.1919824630022049,
      "learning_rate": 4.945869546595679e-06,
      "loss": 0.0127,
      "step": 1554060
    },
    {
      "epoch": 2.5432860051190405,
      "grad_norm": 0.3302293121814728,
      "learning_rate": 4.945803654382162e-06,
      "loss": 0.0124,
      "step": 1554080
    },
    {
      "epoch": 2.543318735557694,
      "grad_norm": 1.0176461935043335,
      "learning_rate": 4.945737762168645e-06,
      "loss": 0.0155,
      "step": 1554100
    },
    {
      "epoch": 2.543351465996347,
      "grad_norm": 0.4480064809322357,
      "learning_rate": 4.945671869955127e-06,
      "loss": 0.0163,
      "step": 1554120
    },
    {
      "epoch": 2.5433841964350004,
      "grad_norm": 0.46451669931411743,
      "learning_rate": 4.94560597774161e-06,
      "loss": 0.0186,
      "step": 1554140
    },
    {
      "epoch": 2.543416926873654,
      "grad_norm": 0.41907623410224915,
      "learning_rate": 4.945540085528094e-06,
      "loss": 0.0165,
      "step": 1554160
    },
    {
      "epoch": 2.5434496573123075,
      "grad_norm": 0.16182370483875275,
      "learning_rate": 4.945474193314576e-06,
      "loss": 0.011,
      "step": 1554180
    },
    {
      "epoch": 2.5434823877509607,
      "grad_norm": 0.3048035204410553,
      "learning_rate": 4.945408301101059e-06,
      "loss": 0.015,
      "step": 1554200
    },
    {
      "epoch": 2.543515118189614,
      "grad_norm": 0.34920263290405273,
      "learning_rate": 4.945342408887543e-06,
      "loss": 0.0109,
      "step": 1554220
    },
    {
      "epoch": 2.5435478486282674,
      "grad_norm": 0.2010098099708557,
      "learning_rate": 4.9452765166740255e-06,
      "loss": 0.0126,
      "step": 1554240
    },
    {
      "epoch": 2.5435805790669206,
      "grad_norm": 0.20062075555324554,
      "learning_rate": 4.945210624460508e-06,
      "loss": 0.015,
      "step": 1554260
    },
    {
      "epoch": 2.5436133095055737,
      "grad_norm": 0.4181711673736572,
      "learning_rate": 4.945144732246991e-06,
      "loss": 0.0109,
      "step": 1554280
    },
    {
      "epoch": 2.5436460399442273,
      "grad_norm": 0.4564549922943115,
      "learning_rate": 4.945078840033474e-06,
      "loss": 0.013,
      "step": 1554300
    },
    {
      "epoch": 2.543678770382881,
      "grad_norm": 0.5859991908073425,
      "learning_rate": 4.945012947819956e-06,
      "loss": 0.0159,
      "step": 1554320
    },
    {
      "epoch": 2.543711500821534,
      "grad_norm": 0.7805464863777161,
      "learning_rate": 4.944947055606439e-06,
      "loss": 0.0145,
      "step": 1554340
    },
    {
      "epoch": 2.5437442312601872,
      "grad_norm": 0.6424394845962524,
      "learning_rate": 4.944881163392922e-06,
      "loss": 0.0192,
      "step": 1554360
    },
    {
      "epoch": 2.543776961698841,
      "grad_norm": 0.5546672940254211,
      "learning_rate": 4.9448152711794055e-06,
      "loss": 0.0129,
      "step": 1554380
    },
    {
      "epoch": 2.543809692137494,
      "grad_norm": 0.24372516572475433,
      "learning_rate": 4.944749378965888e-06,
      "loss": 0.0136,
      "step": 1554400
    },
    {
      "epoch": 2.543842422576147,
      "grad_norm": 0.23773686587810516,
      "learning_rate": 4.944683486752371e-06,
      "loss": 0.0159,
      "step": 1554420
    },
    {
      "epoch": 2.5438751530148007,
      "grad_norm": 0.6278468370437622,
      "learning_rate": 4.944617594538854e-06,
      "loss": 0.013,
      "step": 1554440
    },
    {
      "epoch": 2.5439078834534543,
      "grad_norm": 0.1730574667453766,
      "learning_rate": 4.9445517023253364e-06,
      "loss": 0.0149,
      "step": 1554460
    },
    {
      "epoch": 2.5439406138921075,
      "grad_norm": 2.7930405139923096,
      "learning_rate": 4.944485810111819e-06,
      "loss": 0.0153,
      "step": 1554480
    },
    {
      "epoch": 2.5439733443307606,
      "grad_norm": 0.06408966332674026,
      "learning_rate": 4.944419917898302e-06,
      "loss": 0.0086,
      "step": 1554500
    },
    {
      "epoch": 2.544006074769414,
      "grad_norm": 0.06361041963100433,
      "learning_rate": 4.9443540256847855e-06,
      "loss": 0.0136,
      "step": 1554520
    },
    {
      "epoch": 2.5440388052080674,
      "grad_norm": 0.31742703914642334,
      "learning_rate": 4.944288133471268e-06,
      "loss": 0.0118,
      "step": 1554540
    },
    {
      "epoch": 2.5440715356467205,
      "grad_norm": 0.18910159170627594,
      "learning_rate": 4.944222241257751e-06,
      "loss": 0.0149,
      "step": 1554560
    },
    {
      "epoch": 2.544104266085374,
      "grad_norm": 0.576389491558075,
      "learning_rate": 4.944156349044234e-06,
      "loss": 0.0173,
      "step": 1554580
    },
    {
      "epoch": 2.5441369965240272,
      "grad_norm": 0.4287392497062683,
      "learning_rate": 4.9440904568307165e-06,
      "loss": 0.0116,
      "step": 1554600
    },
    {
      "epoch": 2.544169726962681,
      "grad_norm": 0.5491766333580017,
      "learning_rate": 4.9440245646172e-06,
      "loss": 0.0144,
      "step": 1554620
    },
    {
      "epoch": 2.544202457401334,
      "grad_norm": 0.46416300535202026,
      "learning_rate": 4.943958672403683e-06,
      "loss": 0.0173,
      "step": 1554640
    },
    {
      "epoch": 2.5442351878399876,
      "grad_norm": 0.36371591687202454,
      "learning_rate": 4.9438927801901655e-06,
      "loss": 0.0122,
      "step": 1554660
    },
    {
      "epoch": 2.5442679182786407,
      "grad_norm": 0.1833111196756363,
      "learning_rate": 4.943826887976648e-06,
      "loss": 0.0091,
      "step": 1554680
    },
    {
      "epoch": 2.544300648717294,
      "grad_norm": 0.6499812006950378,
      "learning_rate": 4.943760995763131e-06,
      "loss": 0.0144,
      "step": 1554700
    },
    {
      "epoch": 2.5443333791559475,
      "grad_norm": 0.3290597200393677,
      "learning_rate": 4.943695103549614e-06,
      "loss": 0.0142,
      "step": 1554720
    },
    {
      "epoch": 2.5443661095946006,
      "grad_norm": 0.1512276530265808,
      "learning_rate": 4.9436292113360965e-06,
      "loss": 0.0131,
      "step": 1554740
    },
    {
      "epoch": 2.5443988400332542,
      "grad_norm": 0.7582207322120667,
      "learning_rate": 4.943563319122579e-06,
      "loss": 0.013,
      "step": 1554760
    },
    {
      "epoch": 2.5444315704719074,
      "grad_norm": 0.3817140758037567,
      "learning_rate": 4.943497426909063e-06,
      "loss": 0.0152,
      "step": 1554780
    },
    {
      "epoch": 2.544464300910561,
      "grad_norm": 0.37780076265335083,
      "learning_rate": 4.9434315346955456e-06,
      "loss": 0.0137,
      "step": 1554800
    },
    {
      "epoch": 2.544497031349214,
      "grad_norm": 0.16662058234214783,
      "learning_rate": 4.943365642482028e-06,
      "loss": 0.0105,
      "step": 1554820
    },
    {
      "epoch": 2.5445297617878673,
      "grad_norm": 0.7151229381561279,
      "learning_rate": 4.943299750268511e-06,
      "loss": 0.0132,
      "step": 1554840
    },
    {
      "epoch": 2.544562492226521,
      "grad_norm": 0.44735947251319885,
      "learning_rate": 4.943233858054994e-06,
      "loss": 0.01,
      "step": 1554860
    },
    {
      "epoch": 2.544595222665174,
      "grad_norm": 0.2605309784412384,
      "learning_rate": 4.943167965841477e-06,
      "loss": 0.0157,
      "step": 1554880
    },
    {
      "epoch": 2.5446279531038276,
      "grad_norm": 0.19700558483600616,
      "learning_rate": 4.94310207362796e-06,
      "loss": 0.0101,
      "step": 1554900
    },
    {
      "epoch": 2.5446606835424808,
      "grad_norm": 0.5786941051483154,
      "learning_rate": 4.943036181414443e-06,
      "loss": 0.0151,
      "step": 1554920
    },
    {
      "epoch": 2.5446934139811344,
      "grad_norm": 0.472113698720932,
      "learning_rate": 4.942970289200926e-06,
      "loss": 0.0145,
      "step": 1554940
    },
    {
      "epoch": 2.5447261444197875,
      "grad_norm": 0.10864485055208206,
      "learning_rate": 4.942904396987408e-06,
      "loss": 0.0092,
      "step": 1554960
    },
    {
      "epoch": 2.5447588748584407,
      "grad_norm": 0.4523758590221405,
      "learning_rate": 4.942838504773891e-06,
      "loss": 0.0155,
      "step": 1554980
    },
    {
      "epoch": 2.5447916052970943,
      "grad_norm": 0.422212690114975,
      "learning_rate": 4.942772612560374e-06,
      "loss": 0.0178,
      "step": 1555000
    },
    {
      "epoch": 2.5448243357357474,
      "grad_norm": 0.5919660329818726,
      "learning_rate": 4.942706720346857e-06,
      "loss": 0.0175,
      "step": 1555020
    },
    {
      "epoch": 2.544857066174401,
      "grad_norm": 0.15174579620361328,
      "learning_rate": 4.94264082813334e-06,
      "loss": 0.0197,
      "step": 1555040
    },
    {
      "epoch": 2.544889796613054,
      "grad_norm": 0.14532187581062317,
      "learning_rate": 4.942574935919823e-06,
      "loss": 0.0153,
      "step": 1555060
    },
    {
      "epoch": 2.5449225270517077,
      "grad_norm": 0.3298869729042053,
      "learning_rate": 4.942509043706306e-06,
      "loss": 0.0131,
      "step": 1555080
    },
    {
      "epoch": 2.544955257490361,
      "grad_norm": 0.27177828550338745,
      "learning_rate": 4.942443151492788e-06,
      "loss": 0.015,
      "step": 1555100
    },
    {
      "epoch": 2.544987987929014,
      "grad_norm": 0.32179051637649536,
      "learning_rate": 4.942377259279271e-06,
      "loss": 0.0169,
      "step": 1555120
    },
    {
      "epoch": 2.5450207183676676,
      "grad_norm": 0.4712947607040405,
      "learning_rate": 4.942311367065754e-06,
      "loss": 0.0121,
      "step": 1555140
    },
    {
      "epoch": 2.545053448806321,
      "grad_norm": 0.405985563993454,
      "learning_rate": 4.9422454748522366e-06,
      "loss": 0.0111,
      "step": 1555160
    },
    {
      "epoch": 2.5450861792449744,
      "grad_norm": 0.0966632217168808,
      "learning_rate": 4.94217958263872e-06,
      "loss": 0.0148,
      "step": 1555180
    },
    {
      "epoch": 2.5451189096836275,
      "grad_norm": 0.4422946870326996,
      "learning_rate": 4.942113690425203e-06,
      "loss": 0.0134,
      "step": 1555200
    },
    {
      "epoch": 2.545151640122281,
      "grad_norm": 0.07517016679048538,
      "learning_rate": 4.942047798211686e-06,
      "loss": 0.0179,
      "step": 1555220
    },
    {
      "epoch": 2.5451843705609343,
      "grad_norm": 0.23035457730293274,
      "learning_rate": 4.941981905998169e-06,
      "loss": 0.0121,
      "step": 1555240
    },
    {
      "epoch": 2.5452171009995874,
      "grad_norm": 0.11437933146953583,
      "learning_rate": 4.941916013784652e-06,
      "loss": 0.0118,
      "step": 1555260
    },
    {
      "epoch": 2.545249831438241,
      "grad_norm": 0.20884758234024048,
      "learning_rate": 4.941850121571135e-06,
      "loss": 0.0172,
      "step": 1555280
    },
    {
      "epoch": 2.545282561876894,
      "grad_norm": 0.6059095859527588,
      "learning_rate": 4.9417842293576174e-06,
      "loss": 0.0148,
      "step": 1555300
    },
    {
      "epoch": 2.5453152923155478,
      "grad_norm": 0.15155093371868134,
      "learning_rate": 4.9417183371441e-06,
      "loss": 0.0164,
      "step": 1555320
    },
    {
      "epoch": 2.545348022754201,
      "grad_norm": 0.33287790417671204,
      "learning_rate": 4.941652444930583e-06,
      "loss": 0.0218,
      "step": 1555340
    },
    {
      "epoch": 2.5453807531928545,
      "grad_norm": 0.2503819167613983,
      "learning_rate": 4.941586552717066e-06,
      "loss": 0.0146,
      "step": 1555360
    },
    {
      "epoch": 2.5454134836315077,
      "grad_norm": 0.3663640320301056,
      "learning_rate": 4.941520660503548e-06,
      "loss": 0.0191,
      "step": 1555380
    },
    {
      "epoch": 2.545446214070161,
      "grad_norm": 0.2853791117668152,
      "learning_rate": 4.941454768290032e-06,
      "loss": 0.0115,
      "step": 1555400
    },
    {
      "epoch": 2.5454789445088144,
      "grad_norm": 0.13639314472675323,
      "learning_rate": 4.941388876076515e-06,
      "loss": 0.0159,
      "step": 1555420
    },
    {
      "epoch": 2.5455116749474676,
      "grad_norm": 0.2608194947242737,
      "learning_rate": 4.9413229838629975e-06,
      "loss": 0.0139,
      "step": 1555440
    },
    {
      "epoch": 2.545544405386121,
      "grad_norm": 0.7198879718780518,
      "learning_rate": 4.94125709164948e-06,
      "loss": 0.0138,
      "step": 1555460
    },
    {
      "epoch": 2.5455771358247743,
      "grad_norm": 0.9279159903526306,
      "learning_rate": 4.941191199435963e-06,
      "loss": 0.0138,
      "step": 1555480
    },
    {
      "epoch": 2.545609866263428,
      "grad_norm": 0.20136485993862152,
      "learning_rate": 4.941125307222446e-06,
      "loss": 0.0133,
      "step": 1555500
    },
    {
      "epoch": 2.545642596702081,
      "grad_norm": 0.16287963092327118,
      "learning_rate": 4.9410594150089284e-06,
      "loss": 0.0143,
      "step": 1555520
    },
    {
      "epoch": 2.545675327140734,
      "grad_norm": 0.1911558359861374,
      "learning_rate": 4.940993522795411e-06,
      "loss": 0.0194,
      "step": 1555540
    },
    {
      "epoch": 2.545708057579388,
      "grad_norm": 0.348926305770874,
      "learning_rate": 4.940927630581894e-06,
      "loss": 0.0121,
      "step": 1555560
    },
    {
      "epoch": 2.545740788018041,
      "grad_norm": 0.34112218022346497,
      "learning_rate": 4.9408617383683775e-06,
      "loss": 0.016,
      "step": 1555580
    },
    {
      "epoch": 2.545773518456694,
      "grad_norm": 0.1193215474486351,
      "learning_rate": 4.94079584615486e-06,
      "loss": 0.0129,
      "step": 1555600
    },
    {
      "epoch": 2.5458062488953477,
      "grad_norm": 0.15467135608196259,
      "learning_rate": 4.940729953941343e-06,
      "loss": 0.0157,
      "step": 1555620
    },
    {
      "epoch": 2.5458389793340013,
      "grad_norm": 0.153862863779068,
      "learning_rate": 4.9406640617278266e-06,
      "loss": 0.0149,
      "step": 1555640
    },
    {
      "epoch": 2.5458717097726544,
      "grad_norm": 0.09268160164356232,
      "learning_rate": 4.940598169514309e-06,
      "loss": 0.0174,
      "step": 1555660
    },
    {
      "epoch": 2.5459044402113076,
      "grad_norm": 0.25007665157318115,
      "learning_rate": 4.940532277300792e-06,
      "loss": 0.0151,
      "step": 1555680
    },
    {
      "epoch": 2.545937170649961,
      "grad_norm": 0.98076331615448,
      "learning_rate": 4.940466385087275e-06,
      "loss": 0.0116,
      "step": 1555700
    },
    {
      "epoch": 2.5459699010886143,
      "grad_norm": 0.19158871471881866,
      "learning_rate": 4.9404004928737575e-06,
      "loss": 0.0133,
      "step": 1555720
    },
    {
      "epoch": 2.5460026315272675,
      "grad_norm": 0.09949074685573578,
      "learning_rate": 4.94033460066024e-06,
      "loss": 0.0092,
      "step": 1555740
    },
    {
      "epoch": 2.546035361965921,
      "grad_norm": 0.45871976017951965,
      "learning_rate": 4.940268708446723e-06,
      "loss": 0.0149,
      "step": 1555760
    },
    {
      "epoch": 2.5460680924045747,
      "grad_norm": 0.25227391719818115,
      "learning_rate": 4.940202816233206e-06,
      "loss": 0.0172,
      "step": 1555780
    },
    {
      "epoch": 2.546100822843228,
      "grad_norm": 0.3666653633117676,
      "learning_rate": 4.940136924019689e-06,
      "loss": 0.0162,
      "step": 1555800
    },
    {
      "epoch": 2.546133553281881,
      "grad_norm": 0.873744785785675,
      "learning_rate": 4.940071031806172e-06,
      "loss": 0.0141,
      "step": 1555820
    },
    {
      "epoch": 2.5461662837205346,
      "grad_norm": 0.29771843552589417,
      "learning_rate": 4.940005139592655e-06,
      "loss": 0.0149,
      "step": 1555840
    },
    {
      "epoch": 2.5461990141591877,
      "grad_norm": 0.6998478770256042,
      "learning_rate": 4.9399392473791375e-06,
      "loss": 0.0125,
      "step": 1555860
    },
    {
      "epoch": 2.546231744597841,
      "grad_norm": 0.2507254481315613,
      "learning_rate": 4.93987335516562e-06,
      "loss": 0.0097,
      "step": 1555880
    },
    {
      "epoch": 2.5462644750364944,
      "grad_norm": 0.30079638957977295,
      "learning_rate": 4.939807462952103e-06,
      "loss": 0.0201,
      "step": 1555900
    },
    {
      "epoch": 2.546297205475148,
      "grad_norm": 0.7758076786994934,
      "learning_rate": 4.939741570738586e-06,
      "loss": 0.014,
      "step": 1555920
    },
    {
      "epoch": 2.546329935913801,
      "grad_norm": 0.2501402199268341,
      "learning_rate": 4.939675678525069e-06,
      "loss": 0.0122,
      "step": 1555940
    },
    {
      "epoch": 2.5463626663524543,
      "grad_norm": 0.6907467246055603,
      "learning_rate": 4.939609786311552e-06,
      "loss": 0.0131,
      "step": 1555960
    },
    {
      "epoch": 2.546395396791108,
      "grad_norm": 0.6165746450424194,
      "learning_rate": 4.939543894098035e-06,
      "loss": 0.0136,
      "step": 1555980
    },
    {
      "epoch": 2.546428127229761,
      "grad_norm": 0.4117615520954132,
      "learning_rate": 4.9394780018845176e-06,
      "loss": 0.0188,
      "step": 1556000
    },
    {
      "epoch": 2.5464608576684142,
      "grad_norm": 0.2510150671005249,
      "learning_rate": 4.939412109671e-06,
      "loss": 0.0191,
      "step": 1556020
    },
    {
      "epoch": 2.546493588107068,
      "grad_norm": 0.33422908186912537,
      "learning_rate": 4.939346217457484e-06,
      "loss": 0.0182,
      "step": 1556040
    },
    {
      "epoch": 2.5465263185457214,
      "grad_norm": 0.4127717912197113,
      "learning_rate": 4.939280325243967e-06,
      "loss": 0.0175,
      "step": 1556060
    },
    {
      "epoch": 2.5465590489843746,
      "grad_norm": 0.22438760101795197,
      "learning_rate": 4.939214433030449e-06,
      "loss": 0.0215,
      "step": 1556080
    },
    {
      "epoch": 2.5465917794230277,
      "grad_norm": 0.4068257212638855,
      "learning_rate": 4.939148540816932e-06,
      "loss": 0.0157,
      "step": 1556100
    },
    {
      "epoch": 2.5466245098616813,
      "grad_norm": 0.36204466223716736,
      "learning_rate": 4.939082648603415e-06,
      "loss": 0.0152,
      "step": 1556120
    },
    {
      "epoch": 2.5466572403003345,
      "grad_norm": 0.24377582967281342,
      "learning_rate": 4.939016756389898e-06,
      "loss": 0.0142,
      "step": 1556140
    },
    {
      "epoch": 2.5466899707389876,
      "grad_norm": 0.22295427322387695,
      "learning_rate": 4.93895086417638e-06,
      "loss": 0.0143,
      "step": 1556160
    },
    {
      "epoch": 2.546722701177641,
      "grad_norm": 0.08388350158929825,
      "learning_rate": 4.938884971962863e-06,
      "loss": 0.009,
      "step": 1556180
    },
    {
      "epoch": 2.5467554316162944,
      "grad_norm": 0.3485983908176422,
      "learning_rate": 4.938819079749347e-06,
      "loss": 0.0174,
      "step": 1556200
    },
    {
      "epoch": 2.546788162054948,
      "grad_norm": 0.3728253245353699,
      "learning_rate": 4.938753187535829e-06,
      "loss": 0.0132,
      "step": 1556220
    },
    {
      "epoch": 2.546820892493601,
      "grad_norm": 0.26184049248695374,
      "learning_rate": 4.938687295322312e-06,
      "loss": 0.0132,
      "step": 1556240
    },
    {
      "epoch": 2.5468536229322547,
      "grad_norm": 0.10240175575017929,
      "learning_rate": 4.938621403108795e-06,
      "loss": 0.0207,
      "step": 1556260
    },
    {
      "epoch": 2.546886353370908,
      "grad_norm": 0.3654673099517822,
      "learning_rate": 4.9385555108952785e-06,
      "loss": 0.0145,
      "step": 1556280
    },
    {
      "epoch": 2.546919083809561,
      "grad_norm": 0.5359199643135071,
      "learning_rate": 4.938489618681761e-06,
      "loss": 0.0129,
      "step": 1556300
    },
    {
      "epoch": 2.5469518142482146,
      "grad_norm": 0.991665780544281,
      "learning_rate": 4.938423726468244e-06,
      "loss": 0.0153,
      "step": 1556320
    },
    {
      "epoch": 2.5469845446868677,
      "grad_norm": 0.07710712403059006,
      "learning_rate": 4.938357834254727e-06,
      "loss": 0.0131,
      "step": 1556340
    },
    {
      "epoch": 2.5470172751255213,
      "grad_norm": 0.6626068949699402,
      "learning_rate": 4.9382919420412094e-06,
      "loss": 0.0164,
      "step": 1556360
    },
    {
      "epoch": 2.5470500055641745,
      "grad_norm": 0.5605109930038452,
      "learning_rate": 4.938226049827692e-06,
      "loss": 0.0141,
      "step": 1556380
    },
    {
      "epoch": 2.547082736002828,
      "grad_norm": 0.17495344579219818,
      "learning_rate": 4.938160157614175e-06,
      "loss": 0.0102,
      "step": 1556400
    },
    {
      "epoch": 2.5471154664414812,
      "grad_norm": 0.22410637140274048,
      "learning_rate": 4.938094265400658e-06,
      "loss": 0.0158,
      "step": 1556420
    },
    {
      "epoch": 2.5471481968801344,
      "grad_norm": 0.4258897304534912,
      "learning_rate": 4.938028373187141e-06,
      "loss": 0.0142,
      "step": 1556440
    },
    {
      "epoch": 2.547180927318788,
      "grad_norm": 0.28926655650138855,
      "learning_rate": 4.937962480973624e-06,
      "loss": 0.0162,
      "step": 1556460
    },
    {
      "epoch": 2.547213657757441,
      "grad_norm": 0.39047837257385254,
      "learning_rate": 4.937896588760107e-06,
      "loss": 0.012,
      "step": 1556480
    },
    {
      "epoch": 2.5472463881960947,
      "grad_norm": 0.10008230060338974,
      "learning_rate": 4.9378306965465894e-06,
      "loss": 0.0081,
      "step": 1556500
    },
    {
      "epoch": 2.547279118634748,
      "grad_norm": 0.1924659013748169,
      "learning_rate": 4.937764804333072e-06,
      "loss": 0.0138,
      "step": 1556520
    },
    {
      "epoch": 2.5473118490734015,
      "grad_norm": 0.15822917222976685,
      "learning_rate": 4.937698912119555e-06,
      "loss": 0.0134,
      "step": 1556540
    },
    {
      "epoch": 2.5473445795120546,
      "grad_norm": 0.18277576565742493,
      "learning_rate": 4.937633019906038e-06,
      "loss": 0.0149,
      "step": 1556560
    },
    {
      "epoch": 2.5473773099507078,
      "grad_norm": 0.4401363730430603,
      "learning_rate": 4.93756712769252e-06,
      "loss": 0.0206,
      "step": 1556580
    },
    {
      "epoch": 2.5474100403893614,
      "grad_norm": 0.07852492481470108,
      "learning_rate": 4.937501235479004e-06,
      "loss": 0.0132,
      "step": 1556600
    },
    {
      "epoch": 2.5474427708280145,
      "grad_norm": 0.49346986413002014,
      "learning_rate": 4.937435343265487e-06,
      "loss": 0.0146,
      "step": 1556620
    },
    {
      "epoch": 2.547475501266668,
      "grad_norm": 0.42662057280540466,
      "learning_rate": 4.9373694510519695e-06,
      "loss": 0.0168,
      "step": 1556640
    },
    {
      "epoch": 2.5475082317053213,
      "grad_norm": 0.28741389513015747,
      "learning_rate": 4.937303558838453e-06,
      "loss": 0.011,
      "step": 1556660
    },
    {
      "epoch": 2.547540962143975,
      "grad_norm": 0.1421627700328827,
      "learning_rate": 4.937237666624936e-06,
      "loss": 0.016,
      "step": 1556680
    },
    {
      "epoch": 2.547573692582628,
      "grad_norm": 0.29944777488708496,
      "learning_rate": 4.9371717744114185e-06,
      "loss": 0.0122,
      "step": 1556700
    },
    {
      "epoch": 2.547606423021281,
      "grad_norm": 0.7234629392623901,
      "learning_rate": 4.937105882197901e-06,
      "loss": 0.0161,
      "step": 1556720
    },
    {
      "epoch": 2.5476391534599347,
      "grad_norm": 0.42591744661331177,
      "learning_rate": 4.937039989984384e-06,
      "loss": 0.0192,
      "step": 1556740
    },
    {
      "epoch": 2.547671883898588,
      "grad_norm": 0.30153587460517883,
      "learning_rate": 4.936974097770867e-06,
      "loss": 0.0156,
      "step": 1556760
    },
    {
      "epoch": 2.5477046143372415,
      "grad_norm": 0.2706581950187683,
      "learning_rate": 4.9369082055573495e-06,
      "loss": 0.012,
      "step": 1556780
    },
    {
      "epoch": 2.5477373447758946,
      "grad_norm": 0.7231191992759705,
      "learning_rate": 4.936842313343832e-06,
      "loss": 0.0134,
      "step": 1556800
    },
    {
      "epoch": 2.5477700752145482,
      "grad_norm": 0.3833758533000946,
      "learning_rate": 4.936776421130316e-06,
      "loss": 0.016,
      "step": 1556820
    },
    {
      "epoch": 2.5478028056532014,
      "grad_norm": 0.26836898922920227,
      "learning_rate": 4.9367105289167986e-06,
      "loss": 0.0129,
      "step": 1556840
    },
    {
      "epoch": 2.5478355360918545,
      "grad_norm": 0.47154682874679565,
      "learning_rate": 4.936644636703281e-06,
      "loss": 0.0152,
      "step": 1556860
    },
    {
      "epoch": 2.547868266530508,
      "grad_norm": 0.20556917786598206,
      "learning_rate": 4.936578744489764e-06,
      "loss": 0.0127,
      "step": 1556880
    },
    {
      "epoch": 2.5479009969691613,
      "grad_norm": 0.7335603833198547,
      "learning_rate": 4.936512852276247e-06,
      "loss": 0.0138,
      "step": 1556900
    },
    {
      "epoch": 2.547933727407815,
      "grad_norm": 0.2941960096359253,
      "learning_rate": 4.9364469600627295e-06,
      "loss": 0.0144,
      "step": 1556920
    },
    {
      "epoch": 2.547966457846468,
      "grad_norm": 0.48968052864074707,
      "learning_rate": 4.936381067849212e-06,
      "loss": 0.0181,
      "step": 1556940
    },
    {
      "epoch": 2.5479991882851216,
      "grad_norm": 1.2574657201766968,
      "learning_rate": 4.936315175635695e-06,
      "loss": 0.0131,
      "step": 1556960
    },
    {
      "epoch": 2.5480319187237748,
      "grad_norm": 1.324599027633667,
      "learning_rate": 4.936249283422178e-06,
      "loss": 0.0183,
      "step": 1556980
    },
    {
      "epoch": 2.548064649162428,
      "grad_norm": 0.533181369304657,
      "learning_rate": 4.936183391208661e-06,
      "loss": 0.016,
      "step": 1557000
    },
    {
      "epoch": 2.5480973796010815,
      "grad_norm": 0.4755735397338867,
      "learning_rate": 4.936117498995144e-06,
      "loss": 0.0119,
      "step": 1557020
    },
    {
      "epoch": 2.5481301100397347,
      "grad_norm": 0.7090848088264465,
      "learning_rate": 4.936051606781627e-06,
      "loss": 0.0174,
      "step": 1557040
    },
    {
      "epoch": 2.548162840478388,
      "grad_norm": 0.2608436346054077,
      "learning_rate": 4.93598571456811e-06,
      "loss": 0.0124,
      "step": 1557060
    },
    {
      "epoch": 2.5481955709170414,
      "grad_norm": 0.35143089294433594,
      "learning_rate": 4.935919822354593e-06,
      "loss": 0.0189,
      "step": 1557080
    },
    {
      "epoch": 2.548228301355695,
      "grad_norm": 0.39882248640060425,
      "learning_rate": 4.935853930141076e-06,
      "loss": 0.0159,
      "step": 1557100
    },
    {
      "epoch": 2.548261031794348,
      "grad_norm": 0.46621865034103394,
      "learning_rate": 4.935788037927559e-06,
      "loss": 0.0088,
      "step": 1557120
    },
    {
      "epoch": 2.5482937622330013,
      "grad_norm": 0.1829126924276352,
      "learning_rate": 4.935722145714041e-06,
      "loss": 0.0147,
      "step": 1557140
    },
    {
      "epoch": 2.548326492671655,
      "grad_norm": 0.1621767282485962,
      "learning_rate": 4.935656253500524e-06,
      "loss": 0.0116,
      "step": 1557160
    },
    {
      "epoch": 2.548359223110308,
      "grad_norm": 0.625482439994812,
      "learning_rate": 4.935590361287007e-06,
      "loss": 0.0113,
      "step": 1557180
    },
    {
      "epoch": 2.548391953548961,
      "grad_norm": 0.602270781993866,
      "learning_rate": 4.9355244690734896e-06,
      "loss": 0.0129,
      "step": 1557200
    },
    {
      "epoch": 2.548424683987615,
      "grad_norm": 0.2126050591468811,
      "learning_rate": 4.935458576859973e-06,
      "loss": 0.0127,
      "step": 1557220
    },
    {
      "epoch": 2.5484574144262684,
      "grad_norm": 0.21408295631408691,
      "learning_rate": 4.935392684646456e-06,
      "loss": 0.0173,
      "step": 1557240
    },
    {
      "epoch": 2.5484901448649215,
      "grad_norm": 0.49161848425865173,
      "learning_rate": 4.935326792432939e-06,
      "loss": 0.0197,
      "step": 1557260
    },
    {
      "epoch": 2.5485228753035747,
      "grad_norm": 0.3413059413433075,
      "learning_rate": 4.935260900219421e-06,
      "loss": 0.01,
      "step": 1557280
    },
    {
      "epoch": 2.5485556057422283,
      "grad_norm": 0.5997861623764038,
      "learning_rate": 4.935195008005904e-06,
      "loss": 0.0139,
      "step": 1557300
    },
    {
      "epoch": 2.5485883361808814,
      "grad_norm": 0.23414817452430725,
      "learning_rate": 4.935129115792387e-06,
      "loss": 0.0132,
      "step": 1557320
    },
    {
      "epoch": 2.5486210666195346,
      "grad_norm": 0.07429662346839905,
      "learning_rate": 4.9350632235788704e-06,
      "loss": 0.0123,
      "step": 1557340
    },
    {
      "epoch": 2.548653797058188,
      "grad_norm": 0.46388256549835205,
      "learning_rate": 4.934997331365353e-06,
      "loss": 0.0168,
      "step": 1557360
    },
    {
      "epoch": 2.5486865274968418,
      "grad_norm": 0.33752724528312683,
      "learning_rate": 4.934931439151836e-06,
      "loss": 0.0123,
      "step": 1557380
    },
    {
      "epoch": 2.548719257935495,
      "grad_norm": 0.544519305229187,
      "learning_rate": 4.934865546938319e-06,
      "loss": 0.0175,
      "step": 1557400
    },
    {
      "epoch": 2.548751988374148,
      "grad_norm": 0.9646382331848145,
      "learning_rate": 4.934799654724801e-06,
      "loss": 0.0151,
      "step": 1557420
    },
    {
      "epoch": 2.5487847188128017,
      "grad_norm": 0.5883301496505737,
      "learning_rate": 4.934733762511284e-06,
      "loss": 0.0191,
      "step": 1557440
    },
    {
      "epoch": 2.548817449251455,
      "grad_norm": 0.8377568125724792,
      "learning_rate": 4.934667870297768e-06,
      "loss": 0.0202,
      "step": 1557460
    },
    {
      "epoch": 2.548850179690108,
      "grad_norm": 0.23980613052845,
      "learning_rate": 4.9346019780842505e-06,
      "loss": 0.0127,
      "step": 1557480
    },
    {
      "epoch": 2.5488829101287616,
      "grad_norm": 0.8093228340148926,
      "learning_rate": 4.934536085870733e-06,
      "loss": 0.0183,
      "step": 1557500
    },
    {
      "epoch": 2.548915640567415,
      "grad_norm": 0.5220037698745728,
      "learning_rate": 4.934470193657216e-06,
      "loss": 0.0165,
      "step": 1557520
    },
    {
      "epoch": 2.5489483710060683,
      "grad_norm": 2.2026588916778564,
      "learning_rate": 4.934404301443699e-06,
      "loss": 0.015,
      "step": 1557540
    },
    {
      "epoch": 2.5489811014447215,
      "grad_norm": 0.376376211643219,
      "learning_rate": 4.9343384092301814e-06,
      "loss": 0.0118,
      "step": 1557560
    },
    {
      "epoch": 2.549013831883375,
      "grad_norm": 0.46421465277671814,
      "learning_rate": 4.934272517016664e-06,
      "loss": 0.0118,
      "step": 1557580
    },
    {
      "epoch": 2.549046562322028,
      "grad_norm": 0.24555560946464539,
      "learning_rate": 4.934206624803147e-06,
      "loss": 0.0104,
      "step": 1557600
    },
    {
      "epoch": 2.5490792927606813,
      "grad_norm": 0.2064868062734604,
      "learning_rate": 4.9341407325896305e-06,
      "loss": 0.0125,
      "step": 1557620
    },
    {
      "epoch": 2.549112023199335,
      "grad_norm": 0.5063617825508118,
      "learning_rate": 4.934074840376113e-06,
      "loss": 0.0134,
      "step": 1557640
    },
    {
      "epoch": 2.549144753637988,
      "grad_norm": 0.06403721868991852,
      "learning_rate": 4.934008948162596e-06,
      "loss": 0.0129,
      "step": 1557660
    },
    {
      "epoch": 2.5491774840766417,
      "grad_norm": 0.17915838956832886,
      "learning_rate": 4.933943055949079e-06,
      "loss": 0.015,
      "step": 1557680
    },
    {
      "epoch": 2.549210214515295,
      "grad_norm": 1.3868231773376465,
      "learning_rate": 4.933877163735562e-06,
      "loss": 0.0152,
      "step": 1557700
    },
    {
      "epoch": 2.5492429449539484,
      "grad_norm": 0.8078722357749939,
      "learning_rate": 4.933811271522045e-06,
      "loss": 0.0153,
      "step": 1557720
    },
    {
      "epoch": 2.5492756753926016,
      "grad_norm": 0.30807703733444214,
      "learning_rate": 4.933745379308528e-06,
      "loss": 0.0127,
      "step": 1557740
    },
    {
      "epoch": 2.5493084058312547,
      "grad_norm": 0.45695433020591736,
      "learning_rate": 4.9336794870950105e-06,
      "loss": 0.0142,
      "step": 1557760
    },
    {
      "epoch": 2.5493411362699083,
      "grad_norm": 0.13433130085468292,
      "learning_rate": 4.933613594881493e-06,
      "loss": 0.0148,
      "step": 1557780
    },
    {
      "epoch": 2.5493738667085615,
      "grad_norm": 0.3204386532306671,
      "learning_rate": 4.933547702667976e-06,
      "loss": 0.0143,
      "step": 1557800
    },
    {
      "epoch": 2.549406597147215,
      "grad_norm": 0.3975556492805481,
      "learning_rate": 4.933481810454459e-06,
      "loss": 0.0179,
      "step": 1557820
    },
    {
      "epoch": 2.549439327585868,
      "grad_norm": 0.3380206823348999,
      "learning_rate": 4.9334159182409415e-06,
      "loss": 0.0167,
      "step": 1557840
    },
    {
      "epoch": 2.549472058024522,
      "grad_norm": 0.7568616271018982,
      "learning_rate": 4.933350026027425e-06,
      "loss": 0.0155,
      "step": 1557860
    },
    {
      "epoch": 2.549504788463175,
      "grad_norm": 0.23621034622192383,
      "learning_rate": 4.933284133813908e-06,
      "loss": 0.0177,
      "step": 1557880
    },
    {
      "epoch": 2.549537518901828,
      "grad_norm": 0.3931312561035156,
      "learning_rate": 4.9332182416003905e-06,
      "loss": 0.0236,
      "step": 1557900
    },
    {
      "epoch": 2.5495702493404817,
      "grad_norm": 0.09965085238218307,
      "learning_rate": 4.933152349386873e-06,
      "loss": 0.0112,
      "step": 1557920
    },
    {
      "epoch": 2.549602979779135,
      "grad_norm": 0.19281142950057983,
      "learning_rate": 4.933086457173356e-06,
      "loss": 0.0119,
      "step": 1557940
    },
    {
      "epoch": 2.5496357102177885,
      "grad_norm": 0.44079649448394775,
      "learning_rate": 4.933020564959839e-06,
      "loss": 0.0116,
      "step": 1557960
    },
    {
      "epoch": 2.5496684406564416,
      "grad_norm": 0.12406640499830246,
      "learning_rate": 4.9329546727463215e-06,
      "loss": 0.011,
      "step": 1557980
    },
    {
      "epoch": 2.549701171095095,
      "grad_norm": 1.4906558990478516,
      "learning_rate": 4.932888780532804e-06,
      "loss": 0.014,
      "step": 1558000
    },
    {
      "epoch": 2.5497339015337483,
      "grad_norm": 0.6340197324752808,
      "learning_rate": 4.932822888319288e-06,
      "loss": 0.0148,
      "step": 1558020
    },
    {
      "epoch": 2.5497666319724015,
      "grad_norm": 0.25638511776924133,
      "learning_rate": 4.9327569961057706e-06,
      "loss": 0.0089,
      "step": 1558040
    },
    {
      "epoch": 2.549799362411055,
      "grad_norm": 0.5795689821243286,
      "learning_rate": 4.932691103892253e-06,
      "loss": 0.0152,
      "step": 1558060
    },
    {
      "epoch": 2.5498320928497082,
      "grad_norm": 0.2653612792491913,
      "learning_rate": 4.932625211678737e-06,
      "loss": 0.0144,
      "step": 1558080
    },
    {
      "epoch": 2.549864823288362,
      "grad_norm": 0.055737849324941635,
      "learning_rate": 4.93255931946522e-06,
      "loss": 0.0157,
      "step": 1558100
    },
    {
      "epoch": 2.549897553727015,
      "grad_norm": 0.33739280700683594,
      "learning_rate": 4.932493427251702e-06,
      "loss": 0.0129,
      "step": 1558120
    },
    {
      "epoch": 2.5499302841656686,
      "grad_norm": 0.5731087327003479,
      "learning_rate": 4.932427535038185e-06,
      "loss": 0.0121,
      "step": 1558140
    },
    {
      "epoch": 2.5499630146043217,
      "grad_norm": 0.6353155374526978,
      "learning_rate": 4.932361642824668e-06,
      "loss": 0.0209,
      "step": 1558160
    },
    {
      "epoch": 2.549995745042975,
      "grad_norm": 0.1546439230442047,
      "learning_rate": 4.932295750611151e-06,
      "loss": 0.0113,
      "step": 1558180
    },
    {
      "epoch": 2.5500284754816285,
      "grad_norm": 0.27333444356918335,
      "learning_rate": 4.932229858397633e-06,
      "loss": 0.0143,
      "step": 1558200
    },
    {
      "epoch": 2.5500612059202816,
      "grad_norm": 0.4773727357387543,
      "learning_rate": 4.932163966184116e-06,
      "loss": 0.0207,
      "step": 1558220
    },
    {
      "epoch": 2.550093936358935,
      "grad_norm": 0.13566836714744568,
      "learning_rate": 4.9320980739706e-06,
      "loss": 0.0177,
      "step": 1558240
    },
    {
      "epoch": 2.5501266667975884,
      "grad_norm": 0.6796588897705078,
      "learning_rate": 4.932032181757082e-06,
      "loss": 0.0114,
      "step": 1558260
    },
    {
      "epoch": 2.550159397236242,
      "grad_norm": 0.12441816926002502,
      "learning_rate": 4.931966289543565e-06,
      "loss": 0.0069,
      "step": 1558280
    },
    {
      "epoch": 2.550192127674895,
      "grad_norm": 0.3981100022792816,
      "learning_rate": 4.931900397330048e-06,
      "loss": 0.009,
      "step": 1558300
    },
    {
      "epoch": 2.5502248581135483,
      "grad_norm": 0.9712099432945251,
      "learning_rate": 4.931834505116531e-06,
      "loss": 0.0146,
      "step": 1558320
    },
    {
      "epoch": 2.550257588552202,
      "grad_norm": 0.31850048899650574,
      "learning_rate": 4.931768612903013e-06,
      "loss": 0.0203,
      "step": 1558340
    },
    {
      "epoch": 2.550290318990855,
      "grad_norm": 0.4009067416191101,
      "learning_rate": 4.931702720689496e-06,
      "loss": 0.0209,
      "step": 1558360
    },
    {
      "epoch": 2.5503230494295086,
      "grad_norm": 0.3026590347290039,
      "learning_rate": 4.931636828475979e-06,
      "loss": 0.0204,
      "step": 1558380
    },
    {
      "epoch": 2.5503557798681618,
      "grad_norm": 0.42475464940071106,
      "learning_rate": 4.9315709362624624e-06,
      "loss": 0.0117,
      "step": 1558400
    },
    {
      "epoch": 2.5503885103068153,
      "grad_norm": 0.3169582486152649,
      "learning_rate": 4.931505044048945e-06,
      "loss": 0.0123,
      "step": 1558420
    },
    {
      "epoch": 2.5504212407454685,
      "grad_norm": 0.19466905295848846,
      "learning_rate": 4.931439151835428e-06,
      "loss": 0.0118,
      "step": 1558440
    },
    {
      "epoch": 2.5504539711841216,
      "grad_norm": 0.24918526411056519,
      "learning_rate": 4.931373259621911e-06,
      "loss": 0.0111,
      "step": 1558460
    },
    {
      "epoch": 2.5504867016227752,
      "grad_norm": 0.15378274023532867,
      "learning_rate": 4.931307367408394e-06,
      "loss": 0.0124,
      "step": 1558480
    },
    {
      "epoch": 2.5505194320614284,
      "grad_norm": 0.4330484867095947,
      "learning_rate": 4.931241475194877e-06,
      "loss": 0.0099,
      "step": 1558500
    },
    {
      "epoch": 2.550552162500082,
      "grad_norm": 0.672761082649231,
      "learning_rate": 4.93117558298136e-06,
      "loss": 0.0185,
      "step": 1558520
    },
    {
      "epoch": 2.550584892938735,
      "grad_norm": 0.13073422014713287,
      "learning_rate": 4.9311096907678425e-06,
      "loss": 0.0092,
      "step": 1558540
    },
    {
      "epoch": 2.5506176233773887,
      "grad_norm": 0.15848848223686218,
      "learning_rate": 4.931043798554325e-06,
      "loss": 0.0101,
      "step": 1558560
    },
    {
      "epoch": 2.550650353816042,
      "grad_norm": 0.2602353096008301,
      "learning_rate": 4.930977906340808e-06,
      "loss": 0.0135,
      "step": 1558580
    },
    {
      "epoch": 2.550683084254695,
      "grad_norm": 0.1850026696920395,
      "learning_rate": 4.930912014127291e-06,
      "loss": 0.0094,
      "step": 1558600
    },
    {
      "epoch": 2.5507158146933486,
      "grad_norm": 0.388327419757843,
      "learning_rate": 4.930846121913773e-06,
      "loss": 0.0204,
      "step": 1558620
    },
    {
      "epoch": 2.5507485451320018,
      "grad_norm": 0.18889977037906647,
      "learning_rate": 4.930780229700257e-06,
      "loss": 0.018,
      "step": 1558640
    },
    {
      "epoch": 2.550781275570655,
      "grad_norm": 0.2040829062461853,
      "learning_rate": 4.93071433748674e-06,
      "loss": 0.0155,
      "step": 1558660
    },
    {
      "epoch": 2.5508140060093085,
      "grad_norm": 0.4132206439971924,
      "learning_rate": 4.9306484452732225e-06,
      "loss": 0.0168,
      "step": 1558680
    },
    {
      "epoch": 2.550846736447962,
      "grad_norm": 2.2562735080718994,
      "learning_rate": 4.930582553059705e-06,
      "loss": 0.0159,
      "step": 1558700
    },
    {
      "epoch": 2.5508794668866153,
      "grad_norm": 0.5888941287994385,
      "learning_rate": 4.930516660846188e-06,
      "loss": 0.0124,
      "step": 1558720
    },
    {
      "epoch": 2.5509121973252684,
      "grad_norm": 1.1696940660476685,
      "learning_rate": 4.9304507686326715e-06,
      "loss": 0.011,
      "step": 1558740
    },
    {
      "epoch": 2.550944927763922,
      "grad_norm": 0.17878113687038422,
      "learning_rate": 4.930384876419154e-06,
      "loss": 0.0111,
      "step": 1558760
    },
    {
      "epoch": 2.550977658202575,
      "grad_norm": 0.6709474325180054,
      "learning_rate": 4.930318984205637e-06,
      "loss": 0.0094,
      "step": 1558780
    },
    {
      "epoch": 2.5510103886412283,
      "grad_norm": 0.20457236468791962,
      "learning_rate": 4.93025309199212e-06,
      "loss": 0.015,
      "step": 1558800
    },
    {
      "epoch": 2.551043119079882,
      "grad_norm": 0.24985386431217194,
      "learning_rate": 4.9301871997786025e-06,
      "loss": 0.0105,
      "step": 1558820
    },
    {
      "epoch": 2.5510758495185355,
      "grad_norm": 0.17739684879779816,
      "learning_rate": 4.930121307565085e-06,
      "loss": 0.0176,
      "step": 1558840
    },
    {
      "epoch": 2.5511085799571886,
      "grad_norm": 0.20460174977779388,
      "learning_rate": 4.930055415351568e-06,
      "loss": 0.0094,
      "step": 1558860
    },
    {
      "epoch": 2.551141310395842,
      "grad_norm": 0.9458816051483154,
      "learning_rate": 4.9299895231380516e-06,
      "loss": 0.0173,
      "step": 1558880
    },
    {
      "epoch": 2.5511740408344954,
      "grad_norm": 0.36181801557540894,
      "learning_rate": 4.929923630924534e-06,
      "loss": 0.0115,
      "step": 1558900
    },
    {
      "epoch": 2.5512067712731485,
      "grad_norm": 0.35259535908699036,
      "learning_rate": 4.929857738711017e-06,
      "loss": 0.0113,
      "step": 1558920
    },
    {
      "epoch": 2.5512395017118017,
      "grad_norm": 0.30350634455680847,
      "learning_rate": 4.9297918464975e-06,
      "loss": 0.0118,
      "step": 1558940
    },
    {
      "epoch": 2.5512722321504553,
      "grad_norm": 0.2023172527551651,
      "learning_rate": 4.9297259542839825e-06,
      "loss": 0.0139,
      "step": 1558960
    },
    {
      "epoch": 2.551304962589109,
      "grad_norm": 0.2197245955467224,
      "learning_rate": 4.929660062070465e-06,
      "loss": 0.0135,
      "step": 1558980
    },
    {
      "epoch": 2.551337693027762,
      "grad_norm": 0.35884517431259155,
      "learning_rate": 4.929594169856948e-06,
      "loss": 0.013,
      "step": 1559000
    },
    {
      "epoch": 2.551370423466415,
      "grad_norm": 0.48327723145484924,
      "learning_rate": 4.929528277643431e-06,
      "loss": 0.0173,
      "step": 1559020
    },
    {
      "epoch": 2.5514031539050688,
      "grad_norm": 0.37706080079078674,
      "learning_rate": 4.929462385429914e-06,
      "loss": 0.0135,
      "step": 1559040
    },
    {
      "epoch": 2.551435884343722,
      "grad_norm": 0.3256579041481018,
      "learning_rate": 4.929396493216397e-06,
      "loss": 0.0139,
      "step": 1559060
    },
    {
      "epoch": 2.551468614782375,
      "grad_norm": 0.13509951531887054,
      "learning_rate": 4.92933060100288e-06,
      "loss": 0.0112,
      "step": 1559080
    },
    {
      "epoch": 2.5515013452210287,
      "grad_norm": 0.5757502317428589,
      "learning_rate": 4.929264708789363e-06,
      "loss": 0.0107,
      "step": 1559100
    },
    {
      "epoch": 2.5515340756596823,
      "grad_norm": 0.12673580646514893,
      "learning_rate": 4.929198816575846e-06,
      "loss": 0.0176,
      "step": 1559120
    },
    {
      "epoch": 2.5515668060983354,
      "grad_norm": 0.36909136176109314,
      "learning_rate": 4.929132924362329e-06,
      "loss": 0.0154,
      "step": 1559140
    },
    {
      "epoch": 2.5515995365369886,
      "grad_norm": 0.36817362904548645,
      "learning_rate": 4.929067032148812e-06,
      "loss": 0.0165,
      "step": 1559160
    },
    {
      "epoch": 2.551632266975642,
      "grad_norm": 0.12233912944793701,
      "learning_rate": 4.929001139935294e-06,
      "loss": 0.016,
      "step": 1559180
    },
    {
      "epoch": 2.5516649974142953,
      "grad_norm": 0.09499270468950272,
      "learning_rate": 4.928935247721777e-06,
      "loss": 0.0193,
      "step": 1559200
    },
    {
      "epoch": 2.5516977278529485,
      "grad_norm": 0.5556048154830933,
      "learning_rate": 4.92886935550826e-06,
      "loss": 0.0137,
      "step": 1559220
    },
    {
      "epoch": 2.551730458291602,
      "grad_norm": 0.11969728767871857,
      "learning_rate": 4.928803463294743e-06,
      "loss": 0.0184,
      "step": 1559240
    },
    {
      "epoch": 2.551763188730255,
      "grad_norm": 0.34954026341438293,
      "learning_rate": 4.928737571081226e-06,
      "loss": 0.0164,
      "step": 1559260
    },
    {
      "epoch": 2.551795919168909,
      "grad_norm": 0.178832545876503,
      "learning_rate": 4.928671678867709e-06,
      "loss": 0.019,
      "step": 1559280
    },
    {
      "epoch": 2.551828649607562,
      "grad_norm": 0.3493589460849762,
      "learning_rate": 4.928605786654192e-06,
      "loss": 0.0118,
      "step": 1559300
    },
    {
      "epoch": 2.5518613800462155,
      "grad_norm": 0.08685536682605743,
      "learning_rate": 4.928539894440674e-06,
      "loss": 0.0143,
      "step": 1559320
    },
    {
      "epoch": 2.5518941104848687,
      "grad_norm": 0.5069946646690369,
      "learning_rate": 4.928474002227157e-06,
      "loss": 0.022,
      "step": 1559340
    },
    {
      "epoch": 2.551926840923522,
      "grad_norm": 0.4038711190223694,
      "learning_rate": 4.92840811001364e-06,
      "loss": 0.0182,
      "step": 1559360
    },
    {
      "epoch": 2.5519595713621754,
      "grad_norm": 0.14163453876972198,
      "learning_rate": 4.928342217800123e-06,
      "loss": 0.0118,
      "step": 1559380
    },
    {
      "epoch": 2.5519923018008286,
      "grad_norm": 0.845720648765564,
      "learning_rate": 4.928276325586605e-06,
      "loss": 0.0114,
      "step": 1559400
    },
    {
      "epoch": 2.552025032239482,
      "grad_norm": 0.7446593046188354,
      "learning_rate": 4.928210433373088e-06,
      "loss": 0.024,
      "step": 1559420
    },
    {
      "epoch": 2.5520577626781353,
      "grad_norm": 0.183232381939888,
      "learning_rate": 4.928144541159572e-06,
      "loss": 0.0103,
      "step": 1559440
    },
    {
      "epoch": 2.552090493116789,
      "grad_norm": 0.4928927421569824,
      "learning_rate": 4.928078648946054e-06,
      "loss": 0.0135,
      "step": 1559460
    },
    {
      "epoch": 2.552123223555442,
      "grad_norm": 0.4813419282436371,
      "learning_rate": 4.928012756732537e-06,
      "loss": 0.01,
      "step": 1559480
    },
    {
      "epoch": 2.5521559539940952,
      "grad_norm": 0.546277642250061,
      "learning_rate": 4.927946864519021e-06,
      "loss": 0.0167,
      "step": 1559500
    },
    {
      "epoch": 2.552188684432749,
      "grad_norm": 0.3707866668701172,
      "learning_rate": 4.9278809723055035e-06,
      "loss": 0.015,
      "step": 1559520
    },
    {
      "epoch": 2.552221414871402,
      "grad_norm": 0.19598355889320374,
      "learning_rate": 4.927815080091986e-06,
      "loss": 0.0136,
      "step": 1559540
    },
    {
      "epoch": 2.5522541453100556,
      "grad_norm": 0.36652764678001404,
      "learning_rate": 4.927749187878469e-06,
      "loss": 0.0139,
      "step": 1559560
    },
    {
      "epoch": 2.5522868757487087,
      "grad_norm": 0.41431424021720886,
      "learning_rate": 4.927683295664952e-06,
      "loss": 0.011,
      "step": 1559580
    },
    {
      "epoch": 2.5523196061873623,
      "grad_norm": 1.3248060941696167,
      "learning_rate": 4.9276174034514344e-06,
      "loss": 0.0147,
      "step": 1559600
    },
    {
      "epoch": 2.5523523366260155,
      "grad_norm": 0.5407754778862,
      "learning_rate": 4.927551511237917e-06,
      "loss": 0.019,
      "step": 1559620
    },
    {
      "epoch": 2.5523850670646686,
      "grad_norm": 0.5755707025527954,
      "learning_rate": 4.9274856190244e-06,
      "loss": 0.0143,
      "step": 1559640
    },
    {
      "epoch": 2.552417797503322,
      "grad_norm": 0.2336122840642929,
      "learning_rate": 4.9274197268108835e-06,
      "loss": 0.0101,
      "step": 1559660
    },
    {
      "epoch": 2.5524505279419754,
      "grad_norm": 0.06684549152851105,
      "learning_rate": 4.927353834597366e-06,
      "loss": 0.01,
      "step": 1559680
    },
    {
      "epoch": 2.552483258380629,
      "grad_norm": 0.36388149857521057,
      "learning_rate": 4.927287942383849e-06,
      "loss": 0.0173,
      "step": 1559700
    },
    {
      "epoch": 2.552515988819282,
      "grad_norm": 0.3041349947452545,
      "learning_rate": 4.927222050170332e-06,
      "loss": 0.013,
      "step": 1559720
    },
    {
      "epoch": 2.5525487192579357,
      "grad_norm": 0.4539770781993866,
      "learning_rate": 4.9271561579568145e-06,
      "loss": 0.0132,
      "step": 1559740
    },
    {
      "epoch": 2.552581449696589,
      "grad_norm": 0.2633714973926544,
      "learning_rate": 4.927090265743297e-06,
      "loss": 0.0254,
      "step": 1559760
    },
    {
      "epoch": 2.552614180135242,
      "grad_norm": 0.32026323676109314,
      "learning_rate": 4.92702437352978e-06,
      "loss": 0.0102,
      "step": 1559780
    },
    {
      "epoch": 2.5526469105738956,
      "grad_norm": 0.5972400903701782,
      "learning_rate": 4.9269584813162635e-06,
      "loss": 0.0123,
      "step": 1559800
    },
    {
      "epoch": 2.5526796410125487,
      "grad_norm": 0.4603043496608734,
      "learning_rate": 4.926892589102746e-06,
      "loss": 0.0135,
      "step": 1559820
    },
    {
      "epoch": 2.5527123714512023,
      "grad_norm": 0.8300490975379944,
      "learning_rate": 4.926826696889229e-06,
      "loss": 0.018,
      "step": 1559840
    },
    {
      "epoch": 2.5527451018898555,
      "grad_norm": 0.7789483666419983,
      "learning_rate": 4.926760804675712e-06,
      "loss": 0.0192,
      "step": 1559860
    },
    {
      "epoch": 2.552777832328509,
      "grad_norm": 0.28736233711242676,
      "learning_rate": 4.9266949124621945e-06,
      "loss": 0.0124,
      "step": 1559880
    },
    {
      "epoch": 2.5528105627671622,
      "grad_norm": 0.2292226403951645,
      "learning_rate": 4.926629020248678e-06,
      "loss": 0.0094,
      "step": 1559900
    },
    {
      "epoch": 2.5528432932058154,
      "grad_norm": 0.1567411571741104,
      "learning_rate": 4.926563128035161e-06,
      "loss": 0.015,
      "step": 1559920
    },
    {
      "epoch": 2.552876023644469,
      "grad_norm": 0.27856531739234924,
      "learning_rate": 4.9264972358216436e-06,
      "loss": 0.0172,
      "step": 1559940
    },
    {
      "epoch": 2.552908754083122,
      "grad_norm": 0.5321594476699829,
      "learning_rate": 4.926431343608126e-06,
      "loss": 0.0203,
      "step": 1559960
    },
    {
      "epoch": 2.5529414845217757,
      "grad_norm": 0.34300559759140015,
      "learning_rate": 4.926365451394609e-06,
      "loss": 0.0117,
      "step": 1559980
    },
    {
      "epoch": 2.552974214960429,
      "grad_norm": 0.2821432948112488,
      "learning_rate": 4.926299559181092e-06,
      "loss": 0.0154,
      "step": 1560000
    },
    {
      "epoch": 2.5530069453990825,
      "grad_norm": 0.09492635726928711,
      "learning_rate": 4.9262336669675745e-06,
      "loss": 0.0113,
      "step": 1560020
    },
    {
      "epoch": 2.5530396758377356,
      "grad_norm": 0.09806645661592484,
      "learning_rate": 4.926167774754057e-06,
      "loss": 0.0143,
      "step": 1560040
    },
    {
      "epoch": 2.5530724062763888,
      "grad_norm": 0.27896687388420105,
      "learning_rate": 4.926101882540541e-06,
      "loss": 0.0193,
      "step": 1560060
    },
    {
      "epoch": 2.5531051367150424,
      "grad_norm": 0.1640443503856659,
      "learning_rate": 4.926035990327024e-06,
      "loss": 0.0129,
      "step": 1560080
    },
    {
      "epoch": 2.5531378671536955,
      "grad_norm": 0.12081020325422287,
      "learning_rate": 4.925970098113506e-06,
      "loss": 0.0142,
      "step": 1560100
    },
    {
      "epoch": 2.5531705975923487,
      "grad_norm": 0.4181036949157715,
      "learning_rate": 4.925904205899989e-06,
      "loss": 0.0165,
      "step": 1560120
    },
    {
      "epoch": 2.5532033280310023,
      "grad_norm": 3.555509090423584,
      "learning_rate": 4.925838313686472e-06,
      "loss": 0.0099,
      "step": 1560140
    },
    {
      "epoch": 2.553236058469656,
      "grad_norm": 0.0776766762137413,
      "learning_rate": 4.925772421472955e-06,
      "loss": 0.0132,
      "step": 1560160
    },
    {
      "epoch": 2.553268788908309,
      "grad_norm": 0.47632813453674316,
      "learning_rate": 4.925706529259438e-06,
      "loss": 0.0127,
      "step": 1560180
    },
    {
      "epoch": 2.553301519346962,
      "grad_norm": 0.45900535583496094,
      "learning_rate": 4.925640637045921e-06,
      "loss": 0.0142,
      "step": 1560200
    },
    {
      "epoch": 2.5533342497856157,
      "grad_norm": 0.7214465141296387,
      "learning_rate": 4.925574744832404e-06,
      "loss": 0.0144,
      "step": 1560220
    },
    {
      "epoch": 2.553366980224269,
      "grad_norm": 0.46179988980293274,
      "learning_rate": 4.925508852618886e-06,
      "loss": 0.0135,
      "step": 1560240
    },
    {
      "epoch": 2.553399710662922,
      "grad_norm": 0.5605793595314026,
      "learning_rate": 4.925442960405369e-06,
      "loss": 0.0119,
      "step": 1560260
    },
    {
      "epoch": 2.5534324411015756,
      "grad_norm": 0.2029491662979126,
      "learning_rate": 4.925377068191852e-06,
      "loss": 0.0102,
      "step": 1560280
    },
    {
      "epoch": 2.5534651715402292,
      "grad_norm": 0.581912100315094,
      "learning_rate": 4.925311175978335e-06,
      "loss": 0.0108,
      "step": 1560300
    },
    {
      "epoch": 2.5534979019788824,
      "grad_norm": 0.12274280935525894,
      "learning_rate": 4.925245283764818e-06,
      "loss": 0.0088,
      "step": 1560320
    },
    {
      "epoch": 2.5535306324175355,
      "grad_norm": 0.39848592877388,
      "learning_rate": 4.925179391551301e-06,
      "loss": 0.0099,
      "step": 1560340
    },
    {
      "epoch": 2.553563362856189,
      "grad_norm": 0.7340162992477417,
      "learning_rate": 4.925113499337784e-06,
      "loss": 0.0127,
      "step": 1560360
    },
    {
      "epoch": 2.5535960932948423,
      "grad_norm": 1.1685892343521118,
      "learning_rate": 4.925047607124266e-06,
      "loss": 0.0164,
      "step": 1560380
    },
    {
      "epoch": 2.5536288237334954,
      "grad_norm": 0.2670367658138275,
      "learning_rate": 4.924981714910749e-06,
      "loss": 0.0186,
      "step": 1560400
    },
    {
      "epoch": 2.553661554172149,
      "grad_norm": 0.11051572859287262,
      "learning_rate": 4.924915822697232e-06,
      "loss": 0.0119,
      "step": 1560420
    },
    {
      "epoch": 2.5536942846108026,
      "grad_norm": 0.16687099635601044,
      "learning_rate": 4.924849930483715e-06,
      "loss": 0.0148,
      "step": 1560440
    },
    {
      "epoch": 2.5537270150494558,
      "grad_norm": 0.944840669631958,
      "learning_rate": 4.924784038270198e-06,
      "loss": 0.0161,
      "step": 1560460
    },
    {
      "epoch": 2.553759745488109,
      "grad_norm": 0.11278363317251205,
      "learning_rate": 4.924718146056681e-06,
      "loss": 0.017,
      "step": 1560480
    },
    {
      "epoch": 2.5537924759267625,
      "grad_norm": 0.7091867923736572,
      "learning_rate": 4.924652253843164e-06,
      "loss": 0.0211,
      "step": 1560500
    },
    {
      "epoch": 2.5538252063654157,
      "grad_norm": 0.20962059497833252,
      "learning_rate": 4.924586361629647e-06,
      "loss": 0.0167,
      "step": 1560520
    },
    {
      "epoch": 2.553857936804069,
      "grad_norm": 0.3439197242259979,
      "learning_rate": 4.92452046941613e-06,
      "loss": 0.0156,
      "step": 1560540
    },
    {
      "epoch": 2.5538906672427224,
      "grad_norm": 0.14168697595596313,
      "learning_rate": 4.924454577202613e-06,
      "loss": 0.018,
      "step": 1560560
    },
    {
      "epoch": 2.553923397681376,
      "grad_norm": 0.22570465505123138,
      "learning_rate": 4.9243886849890955e-06,
      "loss": 0.0124,
      "step": 1560580
    },
    {
      "epoch": 2.553956128120029,
      "grad_norm": 0.44848504662513733,
      "learning_rate": 4.924322792775578e-06,
      "loss": 0.0112,
      "step": 1560600
    },
    {
      "epoch": 2.5539888585586823,
      "grad_norm": 0.24965675175189972,
      "learning_rate": 4.924256900562061e-06,
      "loss": 0.0081,
      "step": 1560620
    },
    {
      "epoch": 2.554021588997336,
      "grad_norm": 0.20212602615356445,
      "learning_rate": 4.924191008348544e-06,
      "loss": 0.0153,
      "step": 1560640
    },
    {
      "epoch": 2.554054319435989,
      "grad_norm": 0.1181938499212265,
      "learning_rate": 4.924125116135026e-06,
      "loss": 0.0123,
      "step": 1560660
    },
    {
      "epoch": 2.554087049874642,
      "grad_norm": 0.6025152206420898,
      "learning_rate": 4.92405922392151e-06,
      "loss": 0.0175,
      "step": 1560680
    },
    {
      "epoch": 2.554119780313296,
      "grad_norm": 0.3660903871059418,
      "learning_rate": 4.923993331707993e-06,
      "loss": 0.0147,
      "step": 1560700
    },
    {
      "epoch": 2.554152510751949,
      "grad_norm": 0.25419944524765015,
      "learning_rate": 4.9239274394944755e-06,
      "loss": 0.0155,
      "step": 1560720
    },
    {
      "epoch": 2.5541852411906025,
      "grad_norm": 0.18922437727451324,
      "learning_rate": 4.923861547280958e-06,
      "loss": 0.0141,
      "step": 1560740
    },
    {
      "epoch": 2.5542179716292557,
      "grad_norm": 0.11255385726690292,
      "learning_rate": 4.923795655067441e-06,
      "loss": 0.0198,
      "step": 1560760
    },
    {
      "epoch": 2.5542507020679093,
      "grad_norm": 0.4354393780231476,
      "learning_rate": 4.923729762853924e-06,
      "loss": 0.0206,
      "step": 1560780
    },
    {
      "epoch": 2.5542834325065624,
      "grad_norm": 0.07801541686058044,
      "learning_rate": 4.9236638706404064e-06,
      "loss": 0.0078,
      "step": 1560800
    },
    {
      "epoch": 2.5543161629452156,
      "grad_norm": 0.231547012925148,
      "learning_rate": 4.923597978426889e-06,
      "loss": 0.0132,
      "step": 1560820
    },
    {
      "epoch": 2.554348893383869,
      "grad_norm": 0.23689818382263184,
      "learning_rate": 4.923532086213372e-06,
      "loss": 0.0123,
      "step": 1560840
    },
    {
      "epoch": 2.5543816238225223,
      "grad_norm": 0.20417214930057526,
      "learning_rate": 4.9234661939998555e-06,
      "loss": 0.0154,
      "step": 1560860
    },
    {
      "epoch": 2.554414354261176,
      "grad_norm": 1.0325745344161987,
      "learning_rate": 4.923400301786338e-06,
      "loss": 0.0204,
      "step": 1560880
    },
    {
      "epoch": 2.554447084699829,
      "grad_norm": 0.13458102941513062,
      "learning_rate": 4.923334409572821e-06,
      "loss": 0.0106,
      "step": 1560900
    },
    {
      "epoch": 2.5544798151384827,
      "grad_norm": 0.3353424668312073,
      "learning_rate": 4.923268517359305e-06,
      "loss": 0.013,
      "step": 1560920
    },
    {
      "epoch": 2.554512545577136,
      "grad_norm": 0.23902396857738495,
      "learning_rate": 4.923202625145787e-06,
      "loss": 0.0134,
      "step": 1560940
    },
    {
      "epoch": 2.554545276015789,
      "grad_norm": 0.7826796770095825,
      "learning_rate": 4.92313673293227e-06,
      "loss": 0.0147,
      "step": 1560960
    },
    {
      "epoch": 2.5545780064544426,
      "grad_norm": 0.44775140285491943,
      "learning_rate": 4.923070840718753e-06,
      "loss": 0.0128,
      "step": 1560980
    },
    {
      "epoch": 2.5546107368930957,
      "grad_norm": 0.31440892815589905,
      "learning_rate": 4.9230049485052355e-06,
      "loss": 0.0132,
      "step": 1561000
    },
    {
      "epoch": 2.5546434673317493,
      "grad_norm": 0.5351506471633911,
      "learning_rate": 4.922939056291718e-06,
      "loss": 0.0181,
      "step": 1561020
    },
    {
      "epoch": 2.5546761977704024,
      "grad_norm": 0.7104167938232422,
      "learning_rate": 4.922873164078201e-06,
      "loss": 0.0148,
      "step": 1561040
    },
    {
      "epoch": 2.554708928209056,
      "grad_norm": 0.2523761987686157,
      "learning_rate": 4.922807271864684e-06,
      "loss": 0.0129,
      "step": 1561060
    },
    {
      "epoch": 2.554741658647709,
      "grad_norm": 0.3703201115131378,
      "learning_rate": 4.922741379651167e-06,
      "loss": 0.019,
      "step": 1561080
    },
    {
      "epoch": 2.5547743890863623,
      "grad_norm": 0.33412376046180725,
      "learning_rate": 4.92267548743765e-06,
      "loss": 0.0118,
      "step": 1561100
    },
    {
      "epoch": 2.554807119525016,
      "grad_norm": 0.5292757749557495,
      "learning_rate": 4.922609595224133e-06,
      "loss": 0.0122,
      "step": 1561120
    },
    {
      "epoch": 2.554839849963669,
      "grad_norm": 0.3167307674884796,
      "learning_rate": 4.9225437030106156e-06,
      "loss": 0.0141,
      "step": 1561140
    },
    {
      "epoch": 2.5548725804023227,
      "grad_norm": 0.22713196277618408,
      "learning_rate": 4.922477810797098e-06,
      "loss": 0.0124,
      "step": 1561160
    },
    {
      "epoch": 2.554905310840976,
      "grad_norm": 0.3066042959690094,
      "learning_rate": 4.922411918583581e-06,
      "loss": 0.0166,
      "step": 1561180
    },
    {
      "epoch": 2.5549380412796294,
      "grad_norm": 0.5317322611808777,
      "learning_rate": 4.922346026370064e-06,
      "loss": 0.0167,
      "step": 1561200
    },
    {
      "epoch": 2.5549707717182826,
      "grad_norm": 0.19304205477237701,
      "learning_rate": 4.922280134156547e-06,
      "loss": 0.0112,
      "step": 1561220
    },
    {
      "epoch": 2.5550035021569357,
      "grad_norm": 0.10394728928804398,
      "learning_rate": 4.92221424194303e-06,
      "loss": 0.0136,
      "step": 1561240
    },
    {
      "epoch": 2.5550362325955893,
      "grad_norm": 0.7565112113952637,
      "learning_rate": 4.922148349729513e-06,
      "loss": 0.0144,
      "step": 1561260
    },
    {
      "epoch": 2.5550689630342425,
      "grad_norm": 0.4088587760925293,
      "learning_rate": 4.922082457515996e-06,
      "loss": 0.0132,
      "step": 1561280
    },
    {
      "epoch": 2.555101693472896,
      "grad_norm": 0.36185988783836365,
      "learning_rate": 4.922016565302478e-06,
      "loss": 0.0153,
      "step": 1561300
    },
    {
      "epoch": 2.555134423911549,
      "grad_norm": 0.22166424989700317,
      "learning_rate": 4.921950673088962e-06,
      "loss": 0.0144,
      "step": 1561320
    },
    {
      "epoch": 2.555167154350203,
      "grad_norm": 0.6029559969902039,
      "learning_rate": 4.921884780875445e-06,
      "loss": 0.0159,
      "step": 1561340
    },
    {
      "epoch": 2.555199884788856,
      "grad_norm": 0.4860011637210846,
      "learning_rate": 4.921818888661927e-06,
      "loss": 0.0147,
      "step": 1561360
    },
    {
      "epoch": 2.555232615227509,
      "grad_norm": 1.6728299856185913,
      "learning_rate": 4.92175299644841e-06,
      "loss": 0.0161,
      "step": 1561380
    },
    {
      "epoch": 2.5552653456661627,
      "grad_norm": 1.157535195350647,
      "learning_rate": 4.921687104234893e-06,
      "loss": 0.0135,
      "step": 1561400
    },
    {
      "epoch": 2.555298076104816,
      "grad_norm": 0.893015444278717,
      "learning_rate": 4.921621212021376e-06,
      "loss": 0.0169,
      "step": 1561420
    },
    {
      "epoch": 2.5553308065434694,
      "grad_norm": 0.6155818104743958,
      "learning_rate": 4.921555319807858e-06,
      "loss": 0.0152,
      "step": 1561440
    },
    {
      "epoch": 2.5553635369821226,
      "grad_norm": 0.26006168127059937,
      "learning_rate": 4.921489427594341e-06,
      "loss": 0.0139,
      "step": 1561460
    },
    {
      "epoch": 2.555396267420776,
      "grad_norm": 0.3727800250053406,
      "learning_rate": 4.921423535380825e-06,
      "loss": 0.0173,
      "step": 1561480
    },
    {
      "epoch": 2.5554289978594293,
      "grad_norm": 0.3573821485042572,
      "learning_rate": 4.921357643167307e-06,
      "loss": 0.0159,
      "step": 1561500
    },
    {
      "epoch": 2.5554617282980825,
      "grad_norm": 0.7420544624328613,
      "learning_rate": 4.92129175095379e-06,
      "loss": 0.0146,
      "step": 1561520
    },
    {
      "epoch": 2.555494458736736,
      "grad_norm": 0.8660898804664612,
      "learning_rate": 4.921225858740273e-06,
      "loss": 0.0192,
      "step": 1561540
    },
    {
      "epoch": 2.5555271891753892,
      "grad_norm": 0.25861483812332153,
      "learning_rate": 4.9211599665267565e-06,
      "loss": 0.016,
      "step": 1561560
    },
    {
      "epoch": 2.555559919614043,
      "grad_norm": 0.16985653340816498,
      "learning_rate": 4.921094074313239e-06,
      "loss": 0.015,
      "step": 1561580
    },
    {
      "epoch": 2.555592650052696,
      "grad_norm": 0.1684221476316452,
      "learning_rate": 4.921028182099722e-06,
      "loss": 0.013,
      "step": 1561600
    },
    {
      "epoch": 2.5556253804913496,
      "grad_norm": 0.15912197530269623,
      "learning_rate": 4.920962289886205e-06,
      "loss": 0.009,
      "step": 1561620
    },
    {
      "epoch": 2.5556581109300027,
      "grad_norm": 0.48595309257507324,
      "learning_rate": 4.9208963976726874e-06,
      "loss": 0.0111,
      "step": 1561640
    },
    {
      "epoch": 2.555690841368656,
      "grad_norm": 0.2512677013874054,
      "learning_rate": 4.92083050545917e-06,
      "loss": 0.0183,
      "step": 1561660
    },
    {
      "epoch": 2.5557235718073095,
      "grad_norm": 0.3085005283355713,
      "learning_rate": 4.920764613245653e-06,
      "loss": 0.0143,
      "step": 1561680
    },
    {
      "epoch": 2.5557563022459626,
      "grad_norm": 0.3636047840118408,
      "learning_rate": 4.920698721032136e-06,
      "loss": 0.0117,
      "step": 1561700
    },
    {
      "epoch": 2.5557890326846158,
      "grad_norm": 0.34196388721466064,
      "learning_rate": 4.920632828818619e-06,
      "loss": 0.0216,
      "step": 1561720
    },
    {
      "epoch": 2.5558217631232694,
      "grad_norm": 0.08774463832378387,
      "learning_rate": 4.920566936605102e-06,
      "loss": 0.0144,
      "step": 1561740
    },
    {
      "epoch": 2.555854493561923,
      "grad_norm": 0.20000489056110382,
      "learning_rate": 4.920501044391585e-06,
      "loss": 0.015,
      "step": 1561760
    },
    {
      "epoch": 2.555887224000576,
      "grad_norm": 0.4664602279663086,
      "learning_rate": 4.9204351521780675e-06,
      "loss": 0.0184,
      "step": 1561780
    },
    {
      "epoch": 2.5559199544392293,
      "grad_norm": 0.33917516469955444,
      "learning_rate": 4.92036925996455e-06,
      "loss": 0.0127,
      "step": 1561800
    },
    {
      "epoch": 2.555952684877883,
      "grad_norm": 0.15254253149032593,
      "learning_rate": 4.920303367751033e-06,
      "loss": 0.0156,
      "step": 1561820
    },
    {
      "epoch": 2.555985415316536,
      "grad_norm": 0.37230348587036133,
      "learning_rate": 4.920237475537516e-06,
      "loss": 0.0113,
      "step": 1561840
    },
    {
      "epoch": 2.556018145755189,
      "grad_norm": 0.5532728433609009,
      "learning_rate": 4.9201715833239984e-06,
      "loss": 0.0104,
      "step": 1561860
    },
    {
      "epoch": 2.5560508761938427,
      "grad_norm": 0.6449495553970337,
      "learning_rate": 4.920105691110482e-06,
      "loss": 0.0121,
      "step": 1561880
    },
    {
      "epoch": 2.5560836066324963,
      "grad_norm": 0.16448089480400085,
      "learning_rate": 4.920039798896965e-06,
      "loss": 0.0133,
      "step": 1561900
    },
    {
      "epoch": 2.5561163370711495,
      "grad_norm": 0.22225630283355713,
      "learning_rate": 4.9199739066834475e-06,
      "loss": 0.0107,
      "step": 1561920
    },
    {
      "epoch": 2.5561490675098026,
      "grad_norm": 1.277191162109375,
      "learning_rate": 4.919908014469931e-06,
      "loss": 0.0174,
      "step": 1561940
    },
    {
      "epoch": 2.5561817979484562,
      "grad_norm": 0.18377868831157684,
      "learning_rate": 4.919842122256414e-06,
      "loss": 0.0123,
      "step": 1561960
    },
    {
      "epoch": 2.5562145283871094,
      "grad_norm": 0.14478699862957,
      "learning_rate": 4.9197762300428966e-06,
      "loss": 0.0101,
      "step": 1561980
    },
    {
      "epoch": 2.5562472588257625,
      "grad_norm": 0.1897689253091812,
      "learning_rate": 4.919710337829379e-06,
      "loss": 0.0176,
      "step": 1562000
    },
    {
      "epoch": 2.556279989264416,
      "grad_norm": 0.7707822322845459,
      "learning_rate": 4.919644445615862e-06,
      "loss": 0.0176,
      "step": 1562020
    },
    {
      "epoch": 2.5563127197030697,
      "grad_norm": 0.2709822356700897,
      "learning_rate": 4.919578553402345e-06,
      "loss": 0.0179,
      "step": 1562040
    },
    {
      "epoch": 2.556345450141723,
      "grad_norm": 0.17132985591888428,
      "learning_rate": 4.9195126611888275e-06,
      "loss": 0.0087,
      "step": 1562060
    },
    {
      "epoch": 2.556378180580376,
      "grad_norm": 0.41284462809562683,
      "learning_rate": 4.91944676897531e-06,
      "loss": 0.0174,
      "step": 1562080
    },
    {
      "epoch": 2.5564109110190296,
      "grad_norm": 0.5285604596138,
      "learning_rate": 4.919380876761794e-06,
      "loss": 0.0223,
      "step": 1562100
    },
    {
      "epoch": 2.5564436414576828,
      "grad_norm": 0.3485405445098877,
      "learning_rate": 4.919314984548277e-06,
      "loss": 0.0165,
      "step": 1562120
    },
    {
      "epoch": 2.556476371896336,
      "grad_norm": 0.2303863912820816,
      "learning_rate": 4.919249092334759e-06,
      "loss": 0.0164,
      "step": 1562140
    },
    {
      "epoch": 2.5565091023349895,
      "grad_norm": 0.3346080183982849,
      "learning_rate": 4.919183200121242e-06,
      "loss": 0.0173,
      "step": 1562160
    },
    {
      "epoch": 2.556541832773643,
      "grad_norm": 0.14859332144260406,
      "learning_rate": 4.919117307907725e-06,
      "loss": 0.0154,
      "step": 1562180
    },
    {
      "epoch": 2.5565745632122963,
      "grad_norm": 0.3329899311065674,
      "learning_rate": 4.9190514156942075e-06,
      "loss": 0.0156,
      "step": 1562200
    },
    {
      "epoch": 2.5566072936509494,
      "grad_norm": 0.3413895070552826,
      "learning_rate": 4.91898552348069e-06,
      "loss": 0.0134,
      "step": 1562220
    },
    {
      "epoch": 2.556640024089603,
      "grad_norm": 0.26303231716156006,
      "learning_rate": 4.918919631267173e-06,
      "loss": 0.0174,
      "step": 1562240
    },
    {
      "epoch": 2.556672754528256,
      "grad_norm": 0.1759440302848816,
      "learning_rate": 4.918853739053657e-06,
      "loss": 0.0207,
      "step": 1562260
    },
    {
      "epoch": 2.5567054849669093,
      "grad_norm": 0.2915937304496765,
      "learning_rate": 4.918787846840139e-06,
      "loss": 0.0193,
      "step": 1562280
    },
    {
      "epoch": 2.556738215405563,
      "grad_norm": 0.29998573660850525,
      "learning_rate": 4.918721954626622e-06,
      "loss": 0.0121,
      "step": 1562300
    },
    {
      "epoch": 2.556770945844216,
      "grad_norm": 0.11462999880313873,
      "learning_rate": 4.918656062413105e-06,
      "loss": 0.0166,
      "step": 1562320
    },
    {
      "epoch": 2.5568036762828696,
      "grad_norm": 0.2360956221818924,
      "learning_rate": 4.918590170199588e-06,
      "loss": 0.0159,
      "step": 1562340
    },
    {
      "epoch": 2.556836406721523,
      "grad_norm": 0.25021812319755554,
      "learning_rate": 4.918524277986071e-06,
      "loss": 0.0155,
      "step": 1562360
    },
    {
      "epoch": 2.5568691371601764,
      "grad_norm": 0.31535834074020386,
      "learning_rate": 4.918458385772554e-06,
      "loss": 0.0088,
      "step": 1562380
    },
    {
      "epoch": 2.5569018675988295,
      "grad_norm": 0.2964557111263275,
      "learning_rate": 4.918392493559037e-06,
      "loss": 0.0154,
      "step": 1562400
    },
    {
      "epoch": 2.5569345980374827,
      "grad_norm": 0.2952837347984314,
      "learning_rate": 4.918326601345519e-06,
      "loss": 0.0171,
      "step": 1562420
    },
    {
      "epoch": 2.5569673284761363,
      "grad_norm": 0.2119068056344986,
      "learning_rate": 4.918260709132002e-06,
      "loss": 0.0176,
      "step": 1562440
    },
    {
      "epoch": 2.5570000589147894,
      "grad_norm": 0.0918266549706459,
      "learning_rate": 4.918194816918485e-06,
      "loss": 0.0109,
      "step": 1562460
    },
    {
      "epoch": 2.557032789353443,
      "grad_norm": 0.5377728343009949,
      "learning_rate": 4.918128924704968e-06,
      "loss": 0.0119,
      "step": 1562480
    },
    {
      "epoch": 2.557065519792096,
      "grad_norm": 0.21853980422019958,
      "learning_rate": 4.918063032491451e-06,
      "loss": 0.0147,
      "step": 1562500
    },
    {
      "epoch": 2.5570982502307498,
      "grad_norm": 0.2909727692604065,
      "learning_rate": 4.917997140277934e-06,
      "loss": 0.0197,
      "step": 1562520
    },
    {
      "epoch": 2.557130980669403,
      "grad_norm": 0.24137331545352936,
      "learning_rate": 4.917931248064417e-06,
      "loss": 0.022,
      "step": 1562540
    },
    {
      "epoch": 2.557163711108056,
      "grad_norm": 0.2964231073856354,
      "learning_rate": 4.917865355850899e-06,
      "loss": 0.0151,
      "step": 1562560
    },
    {
      "epoch": 2.5571964415467097,
      "grad_norm": 0.5255488753318787,
      "learning_rate": 4.917799463637382e-06,
      "loss": 0.0105,
      "step": 1562580
    },
    {
      "epoch": 2.557229171985363,
      "grad_norm": 0.7011429667472839,
      "learning_rate": 4.917733571423865e-06,
      "loss": 0.0112,
      "step": 1562600
    },
    {
      "epoch": 2.5572619024240164,
      "grad_norm": 0.8353886604309082,
      "learning_rate": 4.9176676792103485e-06,
      "loss": 0.0125,
      "step": 1562620
    },
    {
      "epoch": 2.5572946328626696,
      "grad_norm": 0.19636404514312744,
      "learning_rate": 4.917601786996831e-06,
      "loss": 0.0138,
      "step": 1562640
    },
    {
      "epoch": 2.557327363301323,
      "grad_norm": 0.1355419009923935,
      "learning_rate": 4.917535894783314e-06,
      "loss": 0.0119,
      "step": 1562660
    },
    {
      "epoch": 2.5573600937399763,
      "grad_norm": 0.04986456781625748,
      "learning_rate": 4.917470002569797e-06,
      "loss": 0.0123,
      "step": 1562680
    },
    {
      "epoch": 2.5573928241786295,
      "grad_norm": 0.4052949547767639,
      "learning_rate": 4.9174041103562794e-06,
      "loss": 0.0128,
      "step": 1562700
    },
    {
      "epoch": 2.557425554617283,
      "grad_norm": 0.9238067865371704,
      "learning_rate": 4.917338218142762e-06,
      "loss": 0.0167,
      "step": 1562720
    },
    {
      "epoch": 2.557458285055936,
      "grad_norm": 0.925860583782196,
      "learning_rate": 4.917272325929246e-06,
      "loss": 0.009,
      "step": 1562740
    },
    {
      "epoch": 2.55749101549459,
      "grad_norm": 0.587654709815979,
      "learning_rate": 4.9172064337157285e-06,
      "loss": 0.0173,
      "step": 1562760
    },
    {
      "epoch": 2.557523745933243,
      "grad_norm": 0.259072482585907,
      "learning_rate": 4.917140541502211e-06,
      "loss": 0.0115,
      "step": 1562780
    },
    {
      "epoch": 2.5575564763718965,
      "grad_norm": 0.28300100564956665,
      "learning_rate": 4.917074649288694e-06,
      "loss": 0.0138,
      "step": 1562800
    },
    {
      "epoch": 2.5575892068105497,
      "grad_norm": 0.12249001860618591,
      "learning_rate": 4.917008757075177e-06,
      "loss": 0.0181,
      "step": 1562820
    },
    {
      "epoch": 2.557621937249203,
      "grad_norm": 0.19896657764911652,
      "learning_rate": 4.9169428648616595e-06,
      "loss": 0.0157,
      "step": 1562840
    },
    {
      "epoch": 2.5576546676878564,
      "grad_norm": 0.1309257298707962,
      "learning_rate": 4.916876972648142e-06,
      "loss": 0.0122,
      "step": 1562860
    },
    {
      "epoch": 2.5576873981265096,
      "grad_norm": 0.2963787019252777,
      "learning_rate": 4.916811080434625e-06,
      "loss": 0.015,
      "step": 1562880
    },
    {
      "epoch": 2.557720128565163,
      "grad_norm": 0.7369802594184875,
      "learning_rate": 4.9167451882211085e-06,
      "loss": 0.021,
      "step": 1562900
    },
    {
      "epoch": 2.5577528590038163,
      "grad_norm": 0.36515846848487854,
      "learning_rate": 4.916679296007591e-06,
      "loss": 0.0111,
      "step": 1562920
    },
    {
      "epoch": 2.55778558944247,
      "grad_norm": 0.6060984134674072,
      "learning_rate": 4.916613403794074e-06,
      "loss": 0.0117,
      "step": 1562940
    },
    {
      "epoch": 2.557818319881123,
      "grad_norm": 0.7749763131141663,
      "learning_rate": 4.916547511580557e-06,
      "loss": 0.0108,
      "step": 1562960
    },
    {
      "epoch": 2.557851050319776,
      "grad_norm": 0.2986130118370056,
      "learning_rate": 4.91648161936704e-06,
      "loss": 0.0093,
      "step": 1562980
    },
    {
      "epoch": 2.55788378075843,
      "grad_norm": 0.36127769947052,
      "learning_rate": 4.916415727153523e-06,
      "loss": 0.0145,
      "step": 1563000
    },
    {
      "epoch": 2.557916511197083,
      "grad_norm": 0.0699227973818779,
      "learning_rate": 4.916349834940006e-06,
      "loss": 0.0127,
      "step": 1563020
    },
    {
      "epoch": 2.5579492416357366,
      "grad_norm": 1.0324621200561523,
      "learning_rate": 4.9162839427264885e-06,
      "loss": 0.0148,
      "step": 1563040
    },
    {
      "epoch": 2.5579819720743897,
      "grad_norm": 0.24497322738170624,
      "learning_rate": 4.916218050512971e-06,
      "loss": 0.0185,
      "step": 1563060
    },
    {
      "epoch": 2.5580147025130433,
      "grad_norm": 0.3197168707847595,
      "learning_rate": 4.916152158299454e-06,
      "loss": 0.0107,
      "step": 1563080
    },
    {
      "epoch": 2.5580474329516965,
      "grad_norm": 0.6391514539718628,
      "learning_rate": 4.916086266085937e-06,
      "loss": 0.0119,
      "step": 1563100
    },
    {
      "epoch": 2.5580801633903496,
      "grad_norm": 0.2190207540988922,
      "learning_rate": 4.9160203738724195e-06,
      "loss": 0.0177,
      "step": 1563120
    },
    {
      "epoch": 2.558112893829003,
      "grad_norm": 0.6682811975479126,
      "learning_rate": 4.915954481658903e-06,
      "loss": 0.0109,
      "step": 1563140
    },
    {
      "epoch": 2.5581456242676563,
      "grad_norm": 0.08969516307115555,
      "learning_rate": 4.915888589445386e-06,
      "loss": 0.0113,
      "step": 1563160
    },
    {
      "epoch": 2.5581783547063095,
      "grad_norm": 0.7362998723983765,
      "learning_rate": 4.9158226972318686e-06,
      "loss": 0.0189,
      "step": 1563180
    },
    {
      "epoch": 2.558211085144963,
      "grad_norm": 1.7485393285751343,
      "learning_rate": 4.915756805018351e-06,
      "loss": 0.0138,
      "step": 1563200
    },
    {
      "epoch": 2.5582438155836167,
      "grad_norm": 0.13635756075382233,
      "learning_rate": 4.915690912804834e-06,
      "loss": 0.0115,
      "step": 1563220
    },
    {
      "epoch": 2.55827654602227,
      "grad_norm": 0.47480508685112,
      "learning_rate": 4.915625020591317e-06,
      "loss": 0.0143,
      "step": 1563240
    },
    {
      "epoch": 2.558309276460923,
      "grad_norm": 0.16901324689388275,
      "learning_rate": 4.9155591283777995e-06,
      "loss": 0.0118,
      "step": 1563260
    },
    {
      "epoch": 2.5583420068995766,
      "grad_norm": 0.5266576409339905,
      "learning_rate": 4.915493236164282e-06,
      "loss": 0.0182,
      "step": 1563280
    },
    {
      "epoch": 2.5583747373382297,
      "grad_norm": 0.775960385799408,
      "learning_rate": 4.915427343950766e-06,
      "loss": 0.014,
      "step": 1563300
    },
    {
      "epoch": 2.558407467776883,
      "grad_norm": 0.615534782409668,
      "learning_rate": 4.915361451737249e-06,
      "loss": 0.0143,
      "step": 1563320
    },
    {
      "epoch": 2.5584401982155365,
      "grad_norm": 0.5963298082351685,
      "learning_rate": 4.915295559523731e-06,
      "loss": 0.0143,
      "step": 1563340
    },
    {
      "epoch": 2.55847292865419,
      "grad_norm": 0.38485580682754517,
      "learning_rate": 4.915229667310215e-06,
      "loss": 0.0151,
      "step": 1563360
    },
    {
      "epoch": 2.558505659092843,
      "grad_norm": 0.22196562588214874,
      "learning_rate": 4.915163775096698e-06,
      "loss": 0.0142,
      "step": 1563380
    },
    {
      "epoch": 2.5585383895314964,
      "grad_norm": 1.1580921411514282,
      "learning_rate": 4.91509788288318e-06,
      "loss": 0.0142,
      "step": 1563400
    },
    {
      "epoch": 2.55857111997015,
      "grad_norm": 0.22177617251873016,
      "learning_rate": 4.915031990669663e-06,
      "loss": 0.014,
      "step": 1563420
    },
    {
      "epoch": 2.558603850408803,
      "grad_norm": 2.8881258964538574,
      "learning_rate": 4.914966098456146e-06,
      "loss": 0.0224,
      "step": 1563440
    },
    {
      "epoch": 2.5586365808474563,
      "grad_norm": 0.4257194995880127,
      "learning_rate": 4.914900206242629e-06,
      "loss": 0.0159,
      "step": 1563460
    },
    {
      "epoch": 2.55866931128611,
      "grad_norm": 0.235161691904068,
      "learning_rate": 4.914834314029111e-06,
      "loss": 0.0152,
      "step": 1563480
    },
    {
      "epoch": 2.5587020417247635,
      "grad_norm": 0.5532808899879456,
      "learning_rate": 4.914768421815594e-06,
      "loss": 0.0151,
      "step": 1563500
    },
    {
      "epoch": 2.5587347721634166,
      "grad_norm": 0.14467686414718628,
      "learning_rate": 4.914702529602078e-06,
      "loss": 0.0161,
      "step": 1563520
    },
    {
      "epoch": 2.5587675026020698,
      "grad_norm": 0.23592783510684967,
      "learning_rate": 4.9146366373885604e-06,
      "loss": 0.0194,
      "step": 1563540
    },
    {
      "epoch": 2.5588002330407233,
      "grad_norm": 0.1959548145532608,
      "learning_rate": 4.914570745175043e-06,
      "loss": 0.0111,
      "step": 1563560
    },
    {
      "epoch": 2.5588329634793765,
      "grad_norm": 0.18699218332767487,
      "learning_rate": 4.914504852961526e-06,
      "loss": 0.0142,
      "step": 1563580
    },
    {
      "epoch": 2.5588656939180296,
      "grad_norm": 0.3660906255245209,
      "learning_rate": 4.914438960748009e-06,
      "loss": 0.0122,
      "step": 1563600
    },
    {
      "epoch": 2.5588984243566832,
      "grad_norm": 0.1298283487558365,
      "learning_rate": 4.914373068534491e-06,
      "loss": 0.015,
      "step": 1563620
    },
    {
      "epoch": 2.558931154795337,
      "grad_norm": 0.3422616124153137,
      "learning_rate": 4.914307176320974e-06,
      "loss": 0.0117,
      "step": 1563640
    },
    {
      "epoch": 2.55896388523399,
      "grad_norm": 0.37180912494659424,
      "learning_rate": 4.914241284107457e-06,
      "loss": 0.0168,
      "step": 1563660
    },
    {
      "epoch": 2.558996615672643,
      "grad_norm": 0.11975385248661041,
      "learning_rate": 4.9141753918939405e-06,
      "loss": 0.0151,
      "step": 1563680
    },
    {
      "epoch": 2.5590293461112967,
      "grad_norm": 0.18381910026073456,
      "learning_rate": 4.914109499680423e-06,
      "loss": 0.0115,
      "step": 1563700
    },
    {
      "epoch": 2.55906207654995,
      "grad_norm": 0.33652037382125854,
      "learning_rate": 4.914043607466906e-06,
      "loss": 0.014,
      "step": 1563720
    },
    {
      "epoch": 2.559094806988603,
      "grad_norm": 0.17592255771160126,
      "learning_rate": 4.913977715253389e-06,
      "loss": 0.0121,
      "step": 1563740
    },
    {
      "epoch": 2.5591275374272566,
      "grad_norm": 0.45876988768577576,
      "learning_rate": 4.913911823039872e-06,
      "loss": 0.017,
      "step": 1563760
    },
    {
      "epoch": 2.5591602678659098,
      "grad_norm": 0.6488320827484131,
      "learning_rate": 4.913845930826355e-06,
      "loss": 0.0136,
      "step": 1563780
    },
    {
      "epoch": 2.5591929983045634,
      "grad_norm": 0.09992794692516327,
      "learning_rate": 4.913780038612838e-06,
      "loss": 0.0124,
      "step": 1563800
    },
    {
      "epoch": 2.5592257287432165,
      "grad_norm": 0.18394017219543457,
      "learning_rate": 4.9137141463993205e-06,
      "loss": 0.0202,
      "step": 1563820
    },
    {
      "epoch": 2.55925845918187,
      "grad_norm": 0.6509360074996948,
      "learning_rate": 4.913648254185803e-06,
      "loss": 0.013,
      "step": 1563840
    },
    {
      "epoch": 2.5592911896205233,
      "grad_norm": 0.2985026240348816,
      "learning_rate": 4.913582361972286e-06,
      "loss": 0.0163,
      "step": 1563860
    },
    {
      "epoch": 2.5593239200591764,
      "grad_norm": 0.3693818151950836,
      "learning_rate": 4.913516469758769e-06,
      "loss": 0.0175,
      "step": 1563880
    },
    {
      "epoch": 2.55935665049783,
      "grad_norm": 0.33210641145706177,
      "learning_rate": 4.9134505775452514e-06,
      "loss": 0.012,
      "step": 1563900
    },
    {
      "epoch": 2.559389380936483,
      "grad_norm": 0.2907904386520386,
      "learning_rate": 4.913384685331735e-06,
      "loss": 0.0088,
      "step": 1563920
    },
    {
      "epoch": 2.5594221113751368,
      "grad_norm": 0.1892726719379425,
      "learning_rate": 4.913318793118218e-06,
      "loss": 0.0162,
      "step": 1563940
    },
    {
      "epoch": 2.55945484181379,
      "grad_norm": 0.386309415102005,
      "learning_rate": 4.9132529009047005e-06,
      "loss": 0.0127,
      "step": 1563960
    },
    {
      "epoch": 2.5594875722524435,
      "grad_norm": 0.29461929202079773,
      "learning_rate": 4.913187008691183e-06,
      "loss": 0.0141,
      "step": 1563980
    },
    {
      "epoch": 2.5595203026910966,
      "grad_norm": 1.3295989036560059,
      "learning_rate": 4.913121116477666e-06,
      "loss": 0.0193,
      "step": 1564000
    },
    {
      "epoch": 2.55955303312975,
      "grad_norm": 0.11042264848947525,
      "learning_rate": 4.9130552242641496e-06,
      "loss": 0.0127,
      "step": 1564020
    },
    {
      "epoch": 2.5595857635684034,
      "grad_norm": 0.6496904492378235,
      "learning_rate": 4.912989332050632e-06,
      "loss": 0.0167,
      "step": 1564040
    },
    {
      "epoch": 2.5596184940070565,
      "grad_norm": 0.340593159198761,
      "learning_rate": 4.912923439837115e-06,
      "loss": 0.0146,
      "step": 1564060
    },
    {
      "epoch": 2.55965122444571,
      "grad_norm": 1.0619502067565918,
      "learning_rate": 4.912857547623598e-06,
      "loss": 0.0167,
      "step": 1564080
    },
    {
      "epoch": 2.5596839548843633,
      "grad_norm": 0.7387065291404724,
      "learning_rate": 4.9127916554100805e-06,
      "loss": 0.0136,
      "step": 1564100
    },
    {
      "epoch": 2.559716685323017,
      "grad_norm": 0.26978224515914917,
      "learning_rate": 4.912725763196563e-06,
      "loss": 0.0175,
      "step": 1564120
    },
    {
      "epoch": 2.55974941576167,
      "grad_norm": 0.3294343948364258,
      "learning_rate": 4.912659870983046e-06,
      "loss": 0.0146,
      "step": 1564140
    },
    {
      "epoch": 2.559782146200323,
      "grad_norm": 0.8126890063285828,
      "learning_rate": 4.91259397876953e-06,
      "loss": 0.0148,
      "step": 1564160
    },
    {
      "epoch": 2.5598148766389768,
      "grad_norm": 0.5349542498588562,
      "learning_rate": 4.912528086556012e-06,
      "loss": 0.0092,
      "step": 1564180
    },
    {
      "epoch": 2.55984760707763,
      "grad_norm": 0.3846248388290405,
      "learning_rate": 4.912462194342495e-06,
      "loss": 0.0146,
      "step": 1564200
    },
    {
      "epoch": 2.5598803375162835,
      "grad_norm": 0.28633660078048706,
      "learning_rate": 4.912396302128978e-06,
      "loss": 0.0217,
      "step": 1564220
    },
    {
      "epoch": 2.5599130679549367,
      "grad_norm": 0.2860603928565979,
      "learning_rate": 4.9123304099154606e-06,
      "loss": 0.0169,
      "step": 1564240
    },
    {
      "epoch": 2.5599457983935903,
      "grad_norm": 0.23336270451545715,
      "learning_rate": 4.912264517701943e-06,
      "loss": 0.0118,
      "step": 1564260
    },
    {
      "epoch": 2.5599785288322434,
      "grad_norm": 0.32341745495796204,
      "learning_rate": 4.912198625488426e-06,
      "loss": 0.0165,
      "step": 1564280
    },
    {
      "epoch": 2.5600112592708966,
      "grad_norm": 0.29319074749946594,
      "learning_rate": 4.912132733274909e-06,
      "loss": 0.0111,
      "step": 1564300
    },
    {
      "epoch": 2.56004398970955,
      "grad_norm": 0.08672783523797989,
      "learning_rate": 4.912066841061392e-06,
      "loss": 0.02,
      "step": 1564320
    },
    {
      "epoch": 2.5600767201482033,
      "grad_norm": 0.3791007399559021,
      "learning_rate": 4.912000948847875e-06,
      "loss": 0.0115,
      "step": 1564340
    },
    {
      "epoch": 2.560109450586857,
      "grad_norm": 0.4854067862033844,
      "learning_rate": 4.911935056634358e-06,
      "loss": 0.016,
      "step": 1564360
    },
    {
      "epoch": 2.56014218102551,
      "grad_norm": 0.13359567523002625,
      "learning_rate": 4.9118691644208414e-06,
      "loss": 0.017,
      "step": 1564380
    },
    {
      "epoch": 2.5601749114641636,
      "grad_norm": 0.14502611756324768,
      "learning_rate": 4.911803272207324e-06,
      "loss": 0.0083,
      "step": 1564400
    },
    {
      "epoch": 2.560207641902817,
      "grad_norm": 0.42357829213142395,
      "learning_rate": 4.911737379993807e-06,
      "loss": 0.0102,
      "step": 1564420
    },
    {
      "epoch": 2.56024037234147,
      "grad_norm": 0.4152725338935852,
      "learning_rate": 4.91167148778029e-06,
      "loss": 0.0107,
      "step": 1564440
    },
    {
      "epoch": 2.5602731027801235,
      "grad_norm": 0.12298832833766937,
      "learning_rate": 4.911605595566772e-06,
      "loss": 0.0154,
      "step": 1564460
    },
    {
      "epoch": 2.5603058332187767,
      "grad_norm": 0.5119529366493225,
      "learning_rate": 4.911539703353255e-06,
      "loss": 0.0156,
      "step": 1564480
    },
    {
      "epoch": 2.5603385636574303,
      "grad_norm": 0.6965094804763794,
      "learning_rate": 4.911473811139738e-06,
      "loss": 0.0164,
      "step": 1564500
    },
    {
      "epoch": 2.5603712940960834,
      "grad_norm": 0.444806307554245,
      "learning_rate": 4.911407918926221e-06,
      "loss": 0.0127,
      "step": 1564520
    },
    {
      "epoch": 2.560404024534737,
      "grad_norm": 0.5872292518615723,
      "learning_rate": 4.911342026712704e-06,
      "loss": 0.0136,
      "step": 1564540
    },
    {
      "epoch": 2.56043675497339,
      "grad_norm": 0.5324094295501709,
      "learning_rate": 4.911276134499187e-06,
      "loss": 0.0131,
      "step": 1564560
    },
    {
      "epoch": 2.5604694854120433,
      "grad_norm": 0.16414624452590942,
      "learning_rate": 4.91121024228567e-06,
      "loss": 0.0139,
      "step": 1564580
    },
    {
      "epoch": 2.560502215850697,
      "grad_norm": 0.3850906193256378,
      "learning_rate": 4.911144350072152e-06,
      "loss": 0.0134,
      "step": 1564600
    },
    {
      "epoch": 2.56053494628935,
      "grad_norm": 0.26638638973236084,
      "learning_rate": 4.911078457858635e-06,
      "loss": 0.0156,
      "step": 1564620
    },
    {
      "epoch": 2.5605676767280037,
      "grad_norm": 0.8494572639465332,
      "learning_rate": 4.911012565645118e-06,
      "loss": 0.0127,
      "step": 1564640
    },
    {
      "epoch": 2.560600407166657,
      "grad_norm": 0.2581179440021515,
      "learning_rate": 4.910946673431601e-06,
      "loss": 0.013,
      "step": 1564660
    },
    {
      "epoch": 2.5606331376053104,
      "grad_norm": 0.2104787826538086,
      "learning_rate": 4.910880781218083e-06,
      "loss": 0.0117,
      "step": 1564680
    },
    {
      "epoch": 2.5606658680439636,
      "grad_norm": 0.10552624613046646,
      "learning_rate": 4.910814889004566e-06,
      "loss": 0.0149,
      "step": 1564700
    },
    {
      "epoch": 2.5606985984826167,
      "grad_norm": 0.48319709300994873,
      "learning_rate": 4.91074899679105e-06,
      "loss": 0.0152,
      "step": 1564720
    },
    {
      "epoch": 2.5607313289212703,
      "grad_norm": 0.46600058674812317,
      "learning_rate": 4.9106831045775324e-06,
      "loss": 0.0112,
      "step": 1564740
    },
    {
      "epoch": 2.5607640593599235,
      "grad_norm": 1.6152395009994507,
      "learning_rate": 4.910617212364015e-06,
      "loss": 0.0169,
      "step": 1564760
    },
    {
      "epoch": 2.5607967897985766,
      "grad_norm": 0.08306460082530975,
      "learning_rate": 4.910551320150499e-06,
      "loss": 0.011,
      "step": 1564780
    },
    {
      "epoch": 2.56082952023723,
      "grad_norm": 0.3126358687877655,
      "learning_rate": 4.9104854279369815e-06,
      "loss": 0.013,
      "step": 1564800
    },
    {
      "epoch": 2.560862250675884,
      "grad_norm": 1.6043980121612549,
      "learning_rate": 4.910419535723464e-06,
      "loss": 0.0146,
      "step": 1564820
    },
    {
      "epoch": 2.560894981114537,
      "grad_norm": 0.5632651448249817,
      "learning_rate": 4.910353643509947e-06,
      "loss": 0.0123,
      "step": 1564840
    },
    {
      "epoch": 2.56092771155319,
      "grad_norm": 0.28368398547172546,
      "learning_rate": 4.91028775129643e-06,
      "loss": 0.0104,
      "step": 1564860
    },
    {
      "epoch": 2.5609604419918437,
      "grad_norm": 0.37153154611587524,
      "learning_rate": 4.9102218590829125e-06,
      "loss": 0.0131,
      "step": 1564880
    },
    {
      "epoch": 2.560993172430497,
      "grad_norm": 0.20291607081890106,
      "learning_rate": 4.910155966869395e-06,
      "loss": 0.0126,
      "step": 1564900
    },
    {
      "epoch": 2.56102590286915,
      "grad_norm": 0.6977382898330688,
      "learning_rate": 4.910090074655878e-06,
      "loss": 0.0185,
      "step": 1564920
    },
    {
      "epoch": 2.5610586333078036,
      "grad_norm": 0.5632449388504028,
      "learning_rate": 4.9100241824423615e-06,
      "loss": 0.0211,
      "step": 1564940
    },
    {
      "epoch": 2.561091363746457,
      "grad_norm": 0.5982706546783447,
      "learning_rate": 4.909958290228844e-06,
      "loss": 0.0145,
      "step": 1564960
    },
    {
      "epoch": 2.5611240941851103,
      "grad_norm": 0.4763237535953522,
      "learning_rate": 4.909892398015327e-06,
      "loss": 0.0106,
      "step": 1564980
    },
    {
      "epoch": 2.5611568246237635,
      "grad_norm": 0.5856189131736755,
      "learning_rate": 4.90982650580181e-06,
      "loss": 0.0136,
      "step": 1565000
    },
    {
      "epoch": 2.561189555062417,
      "grad_norm": 0.41406282782554626,
      "learning_rate": 4.9097606135882925e-06,
      "loss": 0.0174,
      "step": 1565020
    },
    {
      "epoch": 2.5612222855010702,
      "grad_norm": 0.3909934163093567,
      "learning_rate": 4.909694721374775e-06,
      "loss": 0.0182,
      "step": 1565040
    },
    {
      "epoch": 2.5612550159397234,
      "grad_norm": 0.5815780758857727,
      "learning_rate": 4.909628829161258e-06,
      "loss": 0.0154,
      "step": 1565060
    },
    {
      "epoch": 2.561287746378377,
      "grad_norm": 0.199101522564888,
      "learning_rate": 4.9095629369477416e-06,
      "loss": 0.0144,
      "step": 1565080
    },
    {
      "epoch": 2.5613204768170306,
      "grad_norm": 0.2360423356294632,
      "learning_rate": 4.909497044734224e-06,
      "loss": 0.0143,
      "step": 1565100
    },
    {
      "epoch": 2.5613532072556837,
      "grad_norm": 0.42182862758636475,
      "learning_rate": 4.909431152520707e-06,
      "loss": 0.0197,
      "step": 1565120
    },
    {
      "epoch": 2.561385937694337,
      "grad_norm": 0.9066696763038635,
      "learning_rate": 4.90936526030719e-06,
      "loss": 0.0131,
      "step": 1565140
    },
    {
      "epoch": 2.5614186681329905,
      "grad_norm": 0.2446002960205078,
      "learning_rate": 4.9092993680936725e-06,
      "loss": 0.0191,
      "step": 1565160
    },
    {
      "epoch": 2.5614513985716436,
      "grad_norm": 0.16472484171390533,
      "learning_rate": 4.909233475880156e-06,
      "loss": 0.0103,
      "step": 1565180
    },
    {
      "epoch": 2.5614841290102968,
      "grad_norm": 0.42433276772499084,
      "learning_rate": 4.909167583666639e-06,
      "loss": 0.0091,
      "step": 1565200
    },
    {
      "epoch": 2.5615168594489504,
      "grad_norm": 0.48352929949760437,
      "learning_rate": 4.909101691453122e-06,
      "loss": 0.0164,
      "step": 1565220
    },
    {
      "epoch": 2.561549589887604,
      "grad_norm": 0.2434939742088318,
      "learning_rate": 4.909035799239604e-06,
      "loss": 0.0115,
      "step": 1565240
    },
    {
      "epoch": 2.561582320326257,
      "grad_norm": 0.2392105609178543,
      "learning_rate": 4.908969907026087e-06,
      "loss": 0.0122,
      "step": 1565260
    },
    {
      "epoch": 2.5616150507649103,
      "grad_norm": 0.2597939670085907,
      "learning_rate": 4.90890401481257e-06,
      "loss": 0.0099,
      "step": 1565280
    },
    {
      "epoch": 2.561647781203564,
      "grad_norm": 0.6418852210044861,
      "learning_rate": 4.9088381225990525e-06,
      "loss": 0.0164,
      "step": 1565300
    },
    {
      "epoch": 2.561680511642217,
      "grad_norm": 0.41343969106674194,
      "learning_rate": 4.908772230385535e-06,
      "loss": 0.0195,
      "step": 1565320
    },
    {
      "epoch": 2.56171324208087,
      "grad_norm": 0.31257161498069763,
      "learning_rate": 4.908706338172019e-06,
      "loss": 0.015,
      "step": 1565340
    },
    {
      "epoch": 2.5617459725195237,
      "grad_norm": 0.036449626088142395,
      "learning_rate": 4.908640445958502e-06,
      "loss": 0.0144,
      "step": 1565360
    },
    {
      "epoch": 2.561778702958177,
      "grad_norm": 0.19034624099731445,
      "learning_rate": 4.908574553744984e-06,
      "loss": 0.0136,
      "step": 1565380
    },
    {
      "epoch": 2.5618114333968305,
      "grad_norm": 0.3472115099430084,
      "learning_rate": 4.908508661531467e-06,
      "loss": 0.0148,
      "step": 1565400
    },
    {
      "epoch": 2.5618441638354836,
      "grad_norm": 0.5103645324707031,
      "learning_rate": 4.90844276931795e-06,
      "loss": 0.0118,
      "step": 1565420
    },
    {
      "epoch": 2.5618768942741372,
      "grad_norm": 0.871322751045227,
      "learning_rate": 4.908376877104433e-06,
      "loss": 0.0105,
      "step": 1565440
    },
    {
      "epoch": 2.5619096247127904,
      "grad_norm": 0.6315807104110718,
      "learning_rate": 4.908310984890916e-06,
      "loss": 0.015,
      "step": 1565460
    },
    {
      "epoch": 2.5619423551514435,
      "grad_norm": 0.1846534013748169,
      "learning_rate": 4.908245092677399e-06,
      "loss": 0.0128,
      "step": 1565480
    },
    {
      "epoch": 2.561975085590097,
      "grad_norm": 0.3374016582965851,
      "learning_rate": 4.908179200463882e-06,
      "loss": 0.0135,
      "step": 1565500
    },
    {
      "epoch": 2.5620078160287503,
      "grad_norm": 0.6185811758041382,
      "learning_rate": 4.908113308250364e-06,
      "loss": 0.0123,
      "step": 1565520
    },
    {
      "epoch": 2.562040546467404,
      "grad_norm": 0.3032323122024536,
      "learning_rate": 4.908047416036847e-06,
      "loss": 0.0157,
      "step": 1565540
    },
    {
      "epoch": 2.562073276906057,
      "grad_norm": 0.41655805706977844,
      "learning_rate": 4.90798152382333e-06,
      "loss": 0.0134,
      "step": 1565560
    },
    {
      "epoch": 2.5621060073447106,
      "grad_norm": 0.13817626237869263,
      "learning_rate": 4.9079156316098134e-06,
      "loss": 0.0178,
      "step": 1565580
    },
    {
      "epoch": 2.5621387377833638,
      "grad_norm": 0.7257975339889526,
      "learning_rate": 4.907849739396296e-06,
      "loss": 0.0147,
      "step": 1565600
    },
    {
      "epoch": 2.562171468222017,
      "grad_norm": 0.32716789841651917,
      "learning_rate": 4.907783847182779e-06,
      "loss": 0.0155,
      "step": 1565620
    },
    {
      "epoch": 2.5622041986606705,
      "grad_norm": 0.44677576422691345,
      "learning_rate": 4.907717954969262e-06,
      "loss": 0.0134,
      "step": 1565640
    },
    {
      "epoch": 2.5622369290993237,
      "grad_norm": 0.29751041531562805,
      "learning_rate": 4.907652062755744e-06,
      "loss": 0.013,
      "step": 1565660
    },
    {
      "epoch": 2.5622696595379773,
      "grad_norm": 0.5249167084693909,
      "learning_rate": 4.907586170542227e-06,
      "loss": 0.0124,
      "step": 1565680
    },
    {
      "epoch": 2.5623023899766304,
      "grad_norm": 1.1351070404052734,
      "learning_rate": 4.90752027832871e-06,
      "loss": 0.0184,
      "step": 1565700
    },
    {
      "epoch": 2.562335120415284,
      "grad_norm": 1.3764503002166748,
      "learning_rate": 4.907454386115193e-06,
      "loss": 0.013,
      "step": 1565720
    },
    {
      "epoch": 2.562367850853937,
      "grad_norm": 0.404064416885376,
      "learning_rate": 4.907388493901676e-06,
      "loss": 0.0137,
      "step": 1565740
    },
    {
      "epoch": 2.5624005812925903,
      "grad_norm": 0.1704804003238678,
      "learning_rate": 4.907322601688159e-06,
      "loss": 0.0125,
      "step": 1565760
    },
    {
      "epoch": 2.562433311731244,
      "grad_norm": 0.2975757420063019,
      "learning_rate": 4.907256709474642e-06,
      "loss": 0.0103,
      "step": 1565780
    },
    {
      "epoch": 2.562466042169897,
      "grad_norm": 0.49757009744644165,
      "learning_rate": 4.907190817261125e-06,
      "loss": 0.0189,
      "step": 1565800
    },
    {
      "epoch": 2.5624987726085506,
      "grad_norm": 0.17363163828849792,
      "learning_rate": 4.907124925047608e-06,
      "loss": 0.0107,
      "step": 1565820
    },
    {
      "epoch": 2.562531503047204,
      "grad_norm": 0.08001625537872314,
      "learning_rate": 4.907059032834091e-06,
      "loss": 0.0151,
      "step": 1565840
    },
    {
      "epoch": 2.5625642334858574,
      "grad_norm": 2.222679376602173,
      "learning_rate": 4.9069931406205735e-06,
      "loss": 0.0132,
      "step": 1565860
    },
    {
      "epoch": 2.5625969639245105,
      "grad_norm": 0.0783061757683754,
      "learning_rate": 4.906927248407056e-06,
      "loss": 0.0107,
      "step": 1565880
    },
    {
      "epoch": 2.5626296943631637,
      "grad_norm": 0.27932295203208923,
      "learning_rate": 4.906861356193539e-06,
      "loss": 0.0128,
      "step": 1565900
    },
    {
      "epoch": 2.5626624248018173,
      "grad_norm": 0.318723201751709,
      "learning_rate": 4.906795463980022e-06,
      "loss": 0.017,
      "step": 1565920
    },
    {
      "epoch": 2.5626951552404704,
      "grad_norm": 0.7172956466674805,
      "learning_rate": 4.9067295717665044e-06,
      "loss": 0.0151,
      "step": 1565940
    },
    {
      "epoch": 2.562727885679124,
      "grad_norm": 0.2829056978225708,
      "learning_rate": 4.906663679552988e-06,
      "loss": 0.0136,
      "step": 1565960
    },
    {
      "epoch": 2.562760616117777,
      "grad_norm": 0.2869158685207367,
      "learning_rate": 4.906597787339471e-06,
      "loss": 0.0098,
      "step": 1565980
    },
    {
      "epoch": 2.5627933465564308,
      "grad_norm": 0.21501366794109344,
      "learning_rate": 4.9065318951259535e-06,
      "loss": 0.0124,
      "step": 1566000
    },
    {
      "epoch": 2.562826076995084,
      "grad_norm": 0.33361178636550903,
      "learning_rate": 4.906466002912436e-06,
      "loss": 0.0107,
      "step": 1566020
    },
    {
      "epoch": 2.562858807433737,
      "grad_norm": 0.2797142267227173,
      "learning_rate": 4.906400110698919e-06,
      "loss": 0.016,
      "step": 1566040
    },
    {
      "epoch": 2.5628915378723907,
      "grad_norm": 0.719988226890564,
      "learning_rate": 4.906334218485402e-06,
      "loss": 0.0124,
      "step": 1566060
    },
    {
      "epoch": 2.562924268311044,
      "grad_norm": 0.6296414136886597,
      "learning_rate": 4.9062683262718845e-06,
      "loss": 0.014,
      "step": 1566080
    },
    {
      "epoch": 2.5629569987496974,
      "grad_norm": 0.41054248809814453,
      "learning_rate": 4.906202434058367e-06,
      "loss": 0.0137,
      "step": 1566100
    },
    {
      "epoch": 2.5629897291883506,
      "grad_norm": 0.14830732345581055,
      "learning_rate": 4.90613654184485e-06,
      "loss": 0.019,
      "step": 1566120
    },
    {
      "epoch": 2.563022459627004,
      "grad_norm": 0.30460652709007263,
      "learning_rate": 4.9060706496313335e-06,
      "loss": 0.0092,
      "step": 1566140
    },
    {
      "epoch": 2.5630551900656573,
      "grad_norm": 0.2889992296695709,
      "learning_rate": 4.906004757417816e-06,
      "loss": 0.013,
      "step": 1566160
    },
    {
      "epoch": 2.5630879205043104,
      "grad_norm": 0.5968977808952332,
      "learning_rate": 4.905938865204299e-06,
      "loss": 0.011,
      "step": 1566180
    },
    {
      "epoch": 2.563120650942964,
      "grad_norm": 0.9237537384033203,
      "learning_rate": 4.905872972990783e-06,
      "loss": 0.0101,
      "step": 1566200
    },
    {
      "epoch": 2.563153381381617,
      "grad_norm": 0.7128119468688965,
      "learning_rate": 4.905807080777265e-06,
      "loss": 0.0155,
      "step": 1566220
    },
    {
      "epoch": 2.5631861118202703,
      "grad_norm": 0.7251578569412231,
      "learning_rate": 4.905741188563748e-06,
      "loss": 0.0115,
      "step": 1566240
    },
    {
      "epoch": 2.563218842258924,
      "grad_norm": 0.2489907443523407,
      "learning_rate": 4.905675296350231e-06,
      "loss": 0.0194,
      "step": 1566260
    },
    {
      "epoch": 2.5632515726975775,
      "grad_norm": 0.9772140979766846,
      "learning_rate": 4.9056094041367136e-06,
      "loss": 0.0136,
      "step": 1566280
    },
    {
      "epoch": 2.5632843031362307,
      "grad_norm": 0.21732476353645325,
      "learning_rate": 4.905543511923196e-06,
      "loss": 0.0139,
      "step": 1566300
    },
    {
      "epoch": 2.563317033574884,
      "grad_norm": 0.30153563618659973,
      "learning_rate": 4.905477619709679e-06,
      "loss": 0.0096,
      "step": 1566320
    },
    {
      "epoch": 2.5633497640135374,
      "grad_norm": 0.2151404619216919,
      "learning_rate": 4.905411727496162e-06,
      "loss": 0.0144,
      "step": 1566340
    },
    {
      "epoch": 2.5633824944521906,
      "grad_norm": 0.24116438627243042,
      "learning_rate": 4.905345835282645e-06,
      "loss": 0.012,
      "step": 1566360
    },
    {
      "epoch": 2.5634152248908437,
      "grad_norm": 0.6351658701896667,
      "learning_rate": 4.905279943069128e-06,
      "loss": 0.0189,
      "step": 1566380
    },
    {
      "epoch": 2.5634479553294973,
      "grad_norm": 0.26193657517433167,
      "learning_rate": 4.905214050855611e-06,
      "loss": 0.0208,
      "step": 1566400
    },
    {
      "epoch": 2.563480685768151,
      "grad_norm": 0.1633870005607605,
      "learning_rate": 4.905148158642094e-06,
      "loss": 0.0198,
      "step": 1566420
    },
    {
      "epoch": 2.563513416206804,
      "grad_norm": 0.33305737376213074,
      "learning_rate": 4.905082266428576e-06,
      "loss": 0.0164,
      "step": 1566440
    },
    {
      "epoch": 2.563546146645457,
      "grad_norm": 0.459879070520401,
      "learning_rate": 4.905016374215059e-06,
      "loss": 0.0112,
      "step": 1566460
    },
    {
      "epoch": 2.563578877084111,
      "grad_norm": 0.5094482898712158,
      "learning_rate": 4.904950482001542e-06,
      "loss": 0.0169,
      "step": 1566480
    },
    {
      "epoch": 2.563611607522764,
      "grad_norm": 0.3549521565437317,
      "learning_rate": 4.904884589788025e-06,
      "loss": 0.0149,
      "step": 1566500
    },
    {
      "epoch": 2.563644337961417,
      "grad_norm": 0.296826034784317,
      "learning_rate": 4.904818697574508e-06,
      "loss": 0.0188,
      "step": 1566520
    },
    {
      "epoch": 2.5636770684000707,
      "grad_norm": 0.29312387108802795,
      "learning_rate": 4.904752805360991e-06,
      "loss": 0.0186,
      "step": 1566540
    },
    {
      "epoch": 2.5637097988387243,
      "grad_norm": 0.4772709012031555,
      "learning_rate": 4.904686913147474e-06,
      "loss": 0.0212,
      "step": 1566560
    },
    {
      "epoch": 2.5637425292773774,
      "grad_norm": 0.21352499723434448,
      "learning_rate": 4.904621020933956e-06,
      "loss": 0.018,
      "step": 1566580
    },
    {
      "epoch": 2.5637752597160306,
      "grad_norm": 0.6829367876052856,
      "learning_rate": 4.90455512872044e-06,
      "loss": 0.0119,
      "step": 1566600
    },
    {
      "epoch": 2.563807990154684,
      "grad_norm": 0.33890900015830994,
      "learning_rate": 4.904489236506923e-06,
      "loss": 0.0126,
      "step": 1566620
    },
    {
      "epoch": 2.5638407205933373,
      "grad_norm": 1.4013370275497437,
      "learning_rate": 4.904423344293405e-06,
      "loss": 0.0165,
      "step": 1566640
    },
    {
      "epoch": 2.5638734510319905,
      "grad_norm": 1.0404337644577026,
      "learning_rate": 4.904357452079888e-06,
      "loss": 0.0116,
      "step": 1566660
    },
    {
      "epoch": 2.563906181470644,
      "grad_norm": 0.42443153262138367,
      "learning_rate": 4.904291559866371e-06,
      "loss": 0.0093,
      "step": 1566680
    },
    {
      "epoch": 2.5639389119092977,
      "grad_norm": 0.5905247926712036,
      "learning_rate": 4.904225667652854e-06,
      "loss": 0.0099,
      "step": 1566700
    },
    {
      "epoch": 2.563971642347951,
      "grad_norm": 0.43046993017196655,
      "learning_rate": 4.904159775439336e-06,
      "loss": 0.0177,
      "step": 1566720
    },
    {
      "epoch": 2.564004372786604,
      "grad_norm": 0.16593188047409058,
      "learning_rate": 4.904093883225819e-06,
      "loss": 0.0172,
      "step": 1566740
    },
    {
      "epoch": 2.5640371032252576,
      "grad_norm": 0.3222578763961792,
      "learning_rate": 4.904027991012303e-06,
      "loss": 0.0103,
      "step": 1566760
    },
    {
      "epoch": 2.5640698336639107,
      "grad_norm": 0.5155311226844788,
      "learning_rate": 4.9039620987987854e-06,
      "loss": 0.0213,
      "step": 1566780
    },
    {
      "epoch": 2.564102564102564,
      "grad_norm": 0.9455243349075317,
      "learning_rate": 4.903896206585268e-06,
      "loss": 0.0254,
      "step": 1566800
    },
    {
      "epoch": 2.5641352945412175,
      "grad_norm": 0.9377069473266602,
      "learning_rate": 4.903830314371751e-06,
      "loss": 0.0137,
      "step": 1566820
    },
    {
      "epoch": 2.5641680249798706,
      "grad_norm": 0.5174872875213623,
      "learning_rate": 4.9037644221582345e-06,
      "loss": 0.0176,
      "step": 1566840
    },
    {
      "epoch": 2.564200755418524,
      "grad_norm": 0.17656764388084412,
      "learning_rate": 4.903698529944717e-06,
      "loss": 0.0108,
      "step": 1566860
    },
    {
      "epoch": 2.5642334858571774,
      "grad_norm": 0.16541025042533875,
      "learning_rate": 4.9036326377312e-06,
      "loss": 0.0082,
      "step": 1566880
    },
    {
      "epoch": 2.564266216295831,
      "grad_norm": 0.4030575454235077,
      "learning_rate": 4.903566745517683e-06,
      "loss": 0.0113,
      "step": 1566900
    },
    {
      "epoch": 2.564298946734484,
      "grad_norm": 0.23931308090686798,
      "learning_rate": 4.9035008533041655e-06,
      "loss": 0.0117,
      "step": 1566920
    },
    {
      "epoch": 2.5643316771731373,
      "grad_norm": 0.16792261600494385,
      "learning_rate": 4.903434961090648e-06,
      "loss": 0.0143,
      "step": 1566940
    },
    {
      "epoch": 2.564364407611791,
      "grad_norm": 0.3952774107456207,
      "learning_rate": 4.903369068877131e-06,
      "loss": 0.0149,
      "step": 1566960
    },
    {
      "epoch": 2.564397138050444,
      "grad_norm": 0.129423588514328,
      "learning_rate": 4.903303176663614e-06,
      "loss": 0.0132,
      "step": 1566980
    },
    {
      "epoch": 2.5644298684890976,
      "grad_norm": 0.5370185375213623,
      "learning_rate": 4.903237284450097e-06,
      "loss": 0.0113,
      "step": 1567000
    },
    {
      "epoch": 2.5644625989277507,
      "grad_norm": 0.2229505330324173,
      "learning_rate": 4.90317139223658e-06,
      "loss": 0.0127,
      "step": 1567020
    },
    {
      "epoch": 2.5644953293664043,
      "grad_norm": 0.28328025341033936,
      "learning_rate": 4.903105500023063e-06,
      "loss": 0.0223,
      "step": 1567040
    },
    {
      "epoch": 2.5645280598050575,
      "grad_norm": 0.4322092533111572,
      "learning_rate": 4.9030396078095455e-06,
      "loss": 0.018,
      "step": 1567060
    },
    {
      "epoch": 2.5645607902437106,
      "grad_norm": 0.24543358385562897,
      "learning_rate": 4.902973715596028e-06,
      "loss": 0.0124,
      "step": 1567080
    },
    {
      "epoch": 2.5645935206823642,
      "grad_norm": 0.7536402344703674,
      "learning_rate": 4.902907823382511e-06,
      "loss": 0.0124,
      "step": 1567100
    },
    {
      "epoch": 2.5646262511210174,
      "grad_norm": 0.11920848488807678,
      "learning_rate": 4.902841931168994e-06,
      "loss": 0.0085,
      "step": 1567120
    },
    {
      "epoch": 2.564658981559671,
      "grad_norm": 0.0812382623553276,
      "learning_rate": 4.9027760389554765e-06,
      "loss": 0.017,
      "step": 1567140
    },
    {
      "epoch": 2.564691711998324,
      "grad_norm": 0.3279822766780853,
      "learning_rate": 4.90271014674196e-06,
      "loss": 0.0159,
      "step": 1567160
    },
    {
      "epoch": 2.5647244424369777,
      "grad_norm": 0.25351980328559875,
      "learning_rate": 4.902644254528443e-06,
      "loss": 0.012,
      "step": 1567180
    },
    {
      "epoch": 2.564757172875631,
      "grad_norm": 0.2051825225353241,
      "learning_rate": 4.9025783623149255e-06,
      "loss": 0.0133,
      "step": 1567200
    },
    {
      "epoch": 2.564789903314284,
      "grad_norm": 0.29119351506233215,
      "learning_rate": 4.902512470101409e-06,
      "loss": 0.0134,
      "step": 1567220
    },
    {
      "epoch": 2.5648226337529376,
      "grad_norm": 0.31664565205574036,
      "learning_rate": 4.902446577887892e-06,
      "loss": 0.0122,
      "step": 1567240
    },
    {
      "epoch": 2.5648553641915908,
      "grad_norm": 0.13185711205005646,
      "learning_rate": 4.902380685674375e-06,
      "loss": 0.017,
      "step": 1567260
    },
    {
      "epoch": 2.5648880946302444,
      "grad_norm": 0.4250860810279846,
      "learning_rate": 4.902314793460857e-06,
      "loss": 0.0175,
      "step": 1567280
    },
    {
      "epoch": 2.5649208250688975,
      "grad_norm": 0.35911187529563904,
      "learning_rate": 4.90224890124734e-06,
      "loss": 0.0178,
      "step": 1567300
    },
    {
      "epoch": 2.564953555507551,
      "grad_norm": 0.33110296726226807,
      "learning_rate": 4.902183009033823e-06,
      "loss": 0.0118,
      "step": 1567320
    },
    {
      "epoch": 2.5649862859462043,
      "grad_norm": 0.1598300188779831,
      "learning_rate": 4.9021171168203055e-06,
      "loss": 0.0153,
      "step": 1567340
    },
    {
      "epoch": 2.5650190163848574,
      "grad_norm": 0.1953444629907608,
      "learning_rate": 4.902051224606788e-06,
      "loss": 0.0119,
      "step": 1567360
    },
    {
      "epoch": 2.565051746823511,
      "grad_norm": 0.1814555823802948,
      "learning_rate": 4.901985332393272e-06,
      "loss": 0.0128,
      "step": 1567380
    },
    {
      "epoch": 2.565084477262164,
      "grad_norm": 0.7577424645423889,
      "learning_rate": 4.901919440179755e-06,
      "loss": 0.0183,
      "step": 1567400
    },
    {
      "epoch": 2.5651172077008177,
      "grad_norm": 0.41351860761642456,
      "learning_rate": 4.901853547966237e-06,
      "loss": 0.0144,
      "step": 1567420
    },
    {
      "epoch": 2.565149938139471,
      "grad_norm": 0.201816126704216,
      "learning_rate": 4.90178765575272e-06,
      "loss": 0.014,
      "step": 1567440
    },
    {
      "epoch": 2.5651826685781245,
      "grad_norm": 0.3921961784362793,
      "learning_rate": 4.901721763539203e-06,
      "loss": 0.016,
      "step": 1567460
    },
    {
      "epoch": 2.5652153990167776,
      "grad_norm": 0.6429226994514465,
      "learning_rate": 4.9016558713256856e-06,
      "loss": 0.0181,
      "step": 1567480
    },
    {
      "epoch": 2.565248129455431,
      "grad_norm": 0.6116700768470764,
      "learning_rate": 4.901589979112168e-06,
      "loss": 0.0156,
      "step": 1567500
    },
    {
      "epoch": 2.5652808598940844,
      "grad_norm": 1.0928492546081543,
      "learning_rate": 4.901524086898651e-06,
      "loss": 0.017,
      "step": 1567520
    },
    {
      "epoch": 2.5653135903327375,
      "grad_norm": 0.423578679561615,
      "learning_rate": 4.901458194685135e-06,
      "loss": 0.0236,
      "step": 1567540
    },
    {
      "epoch": 2.565346320771391,
      "grad_norm": 0.20908567309379578,
      "learning_rate": 4.901392302471617e-06,
      "loss": 0.013,
      "step": 1567560
    },
    {
      "epoch": 2.5653790512100443,
      "grad_norm": 1.037112832069397,
      "learning_rate": 4.9013264102581e-06,
      "loss": 0.0179,
      "step": 1567580
    },
    {
      "epoch": 2.565411781648698,
      "grad_norm": 0.5667390823364258,
      "learning_rate": 4.901260518044583e-06,
      "loss": 0.0153,
      "step": 1567600
    },
    {
      "epoch": 2.565444512087351,
      "grad_norm": 0.11602753400802612,
      "learning_rate": 4.9011946258310664e-06,
      "loss": 0.0115,
      "step": 1567620
    },
    {
      "epoch": 2.565477242526004,
      "grad_norm": 0.7666038870811462,
      "learning_rate": 4.901128733617549e-06,
      "loss": 0.0141,
      "step": 1567640
    },
    {
      "epoch": 2.5655099729646578,
      "grad_norm": 0.32326212525367737,
      "learning_rate": 4.901062841404032e-06,
      "loss": 0.0143,
      "step": 1567660
    },
    {
      "epoch": 2.565542703403311,
      "grad_norm": 0.14037661254405975,
      "learning_rate": 4.900996949190515e-06,
      "loss": 0.0172,
      "step": 1567680
    },
    {
      "epoch": 2.5655754338419645,
      "grad_norm": 0.39572274684906006,
      "learning_rate": 4.900931056976997e-06,
      "loss": 0.0167,
      "step": 1567700
    },
    {
      "epoch": 2.5656081642806177,
      "grad_norm": 0.439895361661911,
      "learning_rate": 4.90086516476348e-06,
      "loss": 0.0136,
      "step": 1567720
    },
    {
      "epoch": 2.5656408947192713,
      "grad_norm": 0.1420120745897293,
      "learning_rate": 4.900799272549963e-06,
      "loss": 0.0176,
      "step": 1567740
    },
    {
      "epoch": 2.5656736251579244,
      "grad_norm": 0.3700625002384186,
      "learning_rate": 4.900733380336446e-06,
      "loss": 0.0123,
      "step": 1567760
    },
    {
      "epoch": 2.5657063555965776,
      "grad_norm": 0.36235904693603516,
      "learning_rate": 4.900667488122929e-06,
      "loss": 0.0079,
      "step": 1567780
    },
    {
      "epoch": 2.565739086035231,
      "grad_norm": 0.6147348880767822,
      "learning_rate": 4.900601595909412e-06,
      "loss": 0.0148,
      "step": 1567800
    },
    {
      "epoch": 2.5657718164738843,
      "grad_norm": 0.32921189069747925,
      "learning_rate": 4.900535703695895e-06,
      "loss": 0.0096,
      "step": 1567820
    },
    {
      "epoch": 2.5658045469125375,
      "grad_norm": 0.5873233079910278,
      "learning_rate": 4.9004698114823774e-06,
      "loss": 0.0119,
      "step": 1567840
    },
    {
      "epoch": 2.565837277351191,
      "grad_norm": 0.1564943492412567,
      "learning_rate": 4.90040391926886e-06,
      "loss": 0.014,
      "step": 1567860
    },
    {
      "epoch": 2.5658700077898446,
      "grad_norm": 0.08564259111881256,
      "learning_rate": 4.900338027055343e-06,
      "loss": 0.009,
      "step": 1567880
    },
    {
      "epoch": 2.565902738228498,
      "grad_norm": 0.35684576630592346,
      "learning_rate": 4.9002721348418265e-06,
      "loss": 0.0147,
      "step": 1567900
    },
    {
      "epoch": 2.565935468667151,
      "grad_norm": 0.8552544116973877,
      "learning_rate": 4.900206242628309e-06,
      "loss": 0.0258,
      "step": 1567920
    },
    {
      "epoch": 2.5659681991058045,
      "grad_norm": 0.4551762044429779,
      "learning_rate": 4.900140350414792e-06,
      "loss": 0.014,
      "step": 1567940
    },
    {
      "epoch": 2.5660009295444577,
      "grad_norm": 0.27788111567497253,
      "learning_rate": 4.900074458201275e-06,
      "loss": 0.0133,
      "step": 1567960
    },
    {
      "epoch": 2.566033659983111,
      "grad_norm": 0.2352922260761261,
      "learning_rate": 4.9000085659877574e-06,
      "loss": 0.0152,
      "step": 1567980
    },
    {
      "epoch": 2.5660663904217644,
      "grad_norm": 0.047843459993600845,
      "learning_rate": 4.89994267377424e-06,
      "loss": 0.0172,
      "step": 1568000
    },
    {
      "epoch": 2.566099120860418,
      "grad_norm": 0.4832817316055298,
      "learning_rate": 4.899876781560724e-06,
      "loss": 0.0156,
      "step": 1568020
    },
    {
      "epoch": 2.566131851299071,
      "grad_norm": 0.16938874125480652,
      "learning_rate": 4.8998108893472065e-06,
      "loss": 0.0163,
      "step": 1568040
    },
    {
      "epoch": 2.5661645817377243,
      "grad_norm": 0.639710545539856,
      "learning_rate": 4.899744997133689e-06,
      "loss": 0.0147,
      "step": 1568060
    },
    {
      "epoch": 2.566197312176378,
      "grad_norm": 0.4053906202316284,
      "learning_rate": 4.899679104920172e-06,
      "loss": 0.0143,
      "step": 1568080
    },
    {
      "epoch": 2.566230042615031,
      "grad_norm": 0.7157871723175049,
      "learning_rate": 4.899613212706655e-06,
      "loss": 0.0222,
      "step": 1568100
    },
    {
      "epoch": 2.566262773053684,
      "grad_norm": 1.3732521533966064,
      "learning_rate": 4.8995473204931375e-06,
      "loss": 0.02,
      "step": 1568120
    },
    {
      "epoch": 2.566295503492338,
      "grad_norm": 0.7595099806785583,
      "learning_rate": 4.89948142827962e-06,
      "loss": 0.0147,
      "step": 1568140
    },
    {
      "epoch": 2.5663282339309914,
      "grad_norm": 0.3501821458339691,
      "learning_rate": 4.899415536066103e-06,
      "loss": 0.0146,
      "step": 1568160
    },
    {
      "epoch": 2.5663609643696446,
      "grad_norm": 0.6352779269218445,
      "learning_rate": 4.8993496438525865e-06,
      "loss": 0.0143,
      "step": 1568180
    },
    {
      "epoch": 2.5663936948082977,
      "grad_norm": 0.12233959138393402,
      "learning_rate": 4.899283751639069e-06,
      "loss": 0.0172,
      "step": 1568200
    },
    {
      "epoch": 2.5664264252469513,
      "grad_norm": 0.778634786605835,
      "learning_rate": 4.899217859425552e-06,
      "loss": 0.0143,
      "step": 1568220
    },
    {
      "epoch": 2.5664591556856045,
      "grad_norm": 0.19364020228385925,
      "learning_rate": 4.899151967212036e-06,
      "loss": 0.0147,
      "step": 1568240
    },
    {
      "epoch": 2.5664918861242576,
      "grad_norm": 0.2088499516248703,
      "learning_rate": 4.899086074998518e-06,
      "loss": 0.0132,
      "step": 1568260
    },
    {
      "epoch": 2.566524616562911,
      "grad_norm": 0.6125404834747314,
      "learning_rate": 4.899020182785001e-06,
      "loss": 0.0166,
      "step": 1568280
    },
    {
      "epoch": 2.5665573470015643,
      "grad_norm": 0.3197249472141266,
      "learning_rate": 4.898954290571484e-06,
      "loss": 0.0184,
      "step": 1568300
    },
    {
      "epoch": 2.566590077440218,
      "grad_norm": 0.3517072796821594,
      "learning_rate": 4.8988883983579666e-06,
      "loss": 0.0124,
      "step": 1568320
    },
    {
      "epoch": 2.566622807878871,
      "grad_norm": 0.46193063259124756,
      "learning_rate": 4.898822506144449e-06,
      "loss": 0.0159,
      "step": 1568340
    },
    {
      "epoch": 2.5666555383175247,
      "grad_norm": 0.22792817652225494,
      "learning_rate": 4.898756613930932e-06,
      "loss": 0.0195,
      "step": 1568360
    },
    {
      "epoch": 2.566688268756178,
      "grad_norm": 0.342696875333786,
      "learning_rate": 4.898690721717415e-06,
      "loss": 0.0136,
      "step": 1568380
    },
    {
      "epoch": 2.566720999194831,
      "grad_norm": 0.09071779251098633,
      "learning_rate": 4.898624829503898e-06,
      "loss": 0.0113,
      "step": 1568400
    },
    {
      "epoch": 2.5667537296334846,
      "grad_norm": 0.36727890372276306,
      "learning_rate": 4.898558937290381e-06,
      "loss": 0.0144,
      "step": 1568420
    },
    {
      "epoch": 2.5667864600721377,
      "grad_norm": 0.2466144561767578,
      "learning_rate": 4.898493045076864e-06,
      "loss": 0.0143,
      "step": 1568440
    },
    {
      "epoch": 2.5668191905107913,
      "grad_norm": 0.42987731099128723,
      "learning_rate": 4.898427152863347e-06,
      "loss": 0.011,
      "step": 1568460
    },
    {
      "epoch": 2.5668519209494445,
      "grad_norm": 0.7454710602760315,
      "learning_rate": 4.898361260649829e-06,
      "loss": 0.0152,
      "step": 1568480
    },
    {
      "epoch": 2.566884651388098,
      "grad_norm": 0.30581045150756836,
      "learning_rate": 4.898295368436312e-06,
      "loss": 0.0163,
      "step": 1568500
    },
    {
      "epoch": 2.566917381826751,
      "grad_norm": 0.3103043735027313,
      "learning_rate": 4.898229476222795e-06,
      "loss": 0.0108,
      "step": 1568520
    },
    {
      "epoch": 2.5669501122654044,
      "grad_norm": 0.12669850885868073,
      "learning_rate": 4.8981635840092776e-06,
      "loss": 0.01,
      "step": 1568540
    },
    {
      "epoch": 2.566982842704058,
      "grad_norm": 0.20395015180110931,
      "learning_rate": 4.89809769179576e-06,
      "loss": 0.0169,
      "step": 1568560
    },
    {
      "epoch": 2.567015573142711,
      "grad_norm": 0.75882887840271,
      "learning_rate": 4.898031799582244e-06,
      "loss": 0.017,
      "step": 1568580
    },
    {
      "epoch": 2.5670483035813647,
      "grad_norm": 0.2862260043621063,
      "learning_rate": 4.897965907368727e-06,
      "loss": 0.0212,
      "step": 1568600
    },
    {
      "epoch": 2.567081034020018,
      "grad_norm": 0.47989940643310547,
      "learning_rate": 4.897900015155209e-06,
      "loss": 0.0125,
      "step": 1568620
    },
    {
      "epoch": 2.5671137644586715,
      "grad_norm": 0.17953000962734222,
      "learning_rate": 4.897834122941693e-06,
      "loss": 0.0177,
      "step": 1568640
    },
    {
      "epoch": 2.5671464948973246,
      "grad_norm": 0.7647923231124878,
      "learning_rate": 4.897768230728176e-06,
      "loss": 0.0151,
      "step": 1568660
    },
    {
      "epoch": 2.5671792253359778,
      "grad_norm": 0.3782620131969452,
      "learning_rate": 4.897702338514658e-06,
      "loss": 0.0123,
      "step": 1568680
    },
    {
      "epoch": 2.5672119557746313,
      "grad_norm": 0.30420389771461487,
      "learning_rate": 4.897636446301141e-06,
      "loss": 0.0138,
      "step": 1568700
    },
    {
      "epoch": 2.5672446862132845,
      "grad_norm": 0.4337286949157715,
      "learning_rate": 4.897570554087624e-06,
      "loss": 0.0133,
      "step": 1568720
    },
    {
      "epoch": 2.567277416651938,
      "grad_norm": 0.6450955867767334,
      "learning_rate": 4.897504661874107e-06,
      "loss": 0.0149,
      "step": 1568740
    },
    {
      "epoch": 2.5673101470905912,
      "grad_norm": 0.4880650043487549,
      "learning_rate": 4.897438769660589e-06,
      "loss": 0.0149,
      "step": 1568760
    },
    {
      "epoch": 2.567342877529245,
      "grad_norm": 0.2326110601425171,
      "learning_rate": 4.897372877447072e-06,
      "loss": 0.017,
      "step": 1568780
    },
    {
      "epoch": 2.567375607967898,
      "grad_norm": 0.2301352620124817,
      "learning_rate": 4.897306985233556e-06,
      "loss": 0.0131,
      "step": 1568800
    },
    {
      "epoch": 2.567408338406551,
      "grad_norm": 0.39852744340896606,
      "learning_rate": 4.8972410930200384e-06,
      "loss": 0.0154,
      "step": 1568820
    },
    {
      "epoch": 2.5674410688452047,
      "grad_norm": 0.43109095096588135,
      "learning_rate": 4.897175200806521e-06,
      "loss": 0.0126,
      "step": 1568840
    },
    {
      "epoch": 2.567473799283858,
      "grad_norm": 0.024684971198439598,
      "learning_rate": 4.897109308593004e-06,
      "loss": 0.0155,
      "step": 1568860
    },
    {
      "epoch": 2.5675065297225115,
      "grad_norm": 0.22560952603816986,
      "learning_rate": 4.897043416379487e-06,
      "loss": 0.0121,
      "step": 1568880
    },
    {
      "epoch": 2.5675392601611646,
      "grad_norm": 0.6276766061782837,
      "learning_rate": 4.896977524165969e-06,
      "loss": 0.0138,
      "step": 1568900
    },
    {
      "epoch": 2.567571990599818,
      "grad_norm": 0.30940622091293335,
      "learning_rate": 4.896911631952452e-06,
      "loss": 0.0147,
      "step": 1568920
    },
    {
      "epoch": 2.5676047210384714,
      "grad_norm": 0.47590523958206177,
      "learning_rate": 4.896845739738935e-06,
      "loss": 0.0142,
      "step": 1568940
    },
    {
      "epoch": 2.5676374514771245,
      "grad_norm": 0.2181646078824997,
      "learning_rate": 4.8967798475254185e-06,
      "loss": 0.0142,
      "step": 1568960
    },
    {
      "epoch": 2.567670181915778,
      "grad_norm": 0.22470518946647644,
      "learning_rate": 4.896713955311901e-06,
      "loss": 0.0145,
      "step": 1568980
    },
    {
      "epoch": 2.5677029123544313,
      "grad_norm": 0.38158711791038513,
      "learning_rate": 4.896648063098384e-06,
      "loss": 0.0163,
      "step": 1569000
    },
    {
      "epoch": 2.567735642793085,
      "grad_norm": 0.17523910105228424,
      "learning_rate": 4.896582170884867e-06,
      "loss": 0.013,
      "step": 1569020
    },
    {
      "epoch": 2.567768373231738,
      "grad_norm": 0.8265906572341919,
      "learning_rate": 4.89651627867135e-06,
      "loss": 0.0135,
      "step": 1569040
    },
    {
      "epoch": 2.5678011036703916,
      "grad_norm": 1.2209981679916382,
      "learning_rate": 4.896450386457833e-06,
      "loss": 0.0158,
      "step": 1569060
    },
    {
      "epoch": 2.5678338341090448,
      "grad_norm": 0.6548309922218323,
      "learning_rate": 4.896384494244316e-06,
      "loss": 0.0197,
      "step": 1569080
    },
    {
      "epoch": 2.567866564547698,
      "grad_norm": 0.1949133276939392,
      "learning_rate": 4.8963186020307985e-06,
      "loss": 0.0154,
      "step": 1569100
    },
    {
      "epoch": 2.5678992949863515,
      "grad_norm": 0.8987394571304321,
      "learning_rate": 4.896252709817281e-06,
      "loss": 0.0162,
      "step": 1569120
    },
    {
      "epoch": 2.5679320254250046,
      "grad_norm": 0.18731367588043213,
      "learning_rate": 4.896186817603764e-06,
      "loss": 0.0149,
      "step": 1569140
    },
    {
      "epoch": 2.5679647558636582,
      "grad_norm": 0.04899992421269417,
      "learning_rate": 4.896120925390247e-06,
      "loss": 0.0112,
      "step": 1569160
    },
    {
      "epoch": 2.5679974863023114,
      "grad_norm": 0.3869325518608093,
      "learning_rate": 4.8960550331767295e-06,
      "loss": 0.0094,
      "step": 1569180
    },
    {
      "epoch": 2.568030216740965,
      "grad_norm": 0.2433958202600479,
      "learning_rate": 4.895989140963213e-06,
      "loss": 0.0123,
      "step": 1569200
    },
    {
      "epoch": 2.568062947179618,
      "grad_norm": 0.18116804957389832,
      "learning_rate": 4.895923248749696e-06,
      "loss": 0.012,
      "step": 1569220
    },
    {
      "epoch": 2.5680956776182713,
      "grad_norm": 0.7465178966522217,
      "learning_rate": 4.8958573565361785e-06,
      "loss": 0.014,
      "step": 1569240
    },
    {
      "epoch": 2.568128408056925,
      "grad_norm": 0.4580288529396057,
      "learning_rate": 4.895791464322661e-06,
      "loss": 0.0129,
      "step": 1569260
    },
    {
      "epoch": 2.568161138495578,
      "grad_norm": 0.3773302435874939,
      "learning_rate": 4.895725572109144e-06,
      "loss": 0.0183,
      "step": 1569280
    },
    {
      "epoch": 2.568193868934231,
      "grad_norm": 0.5639258027076721,
      "learning_rate": 4.895659679895628e-06,
      "loss": 0.0153,
      "step": 1569300
    },
    {
      "epoch": 2.5682265993728848,
      "grad_norm": 0.43537700176239014,
      "learning_rate": 4.89559378768211e-06,
      "loss": 0.0135,
      "step": 1569320
    },
    {
      "epoch": 2.5682593298115384,
      "grad_norm": 0.31685635447502136,
      "learning_rate": 4.895527895468593e-06,
      "loss": 0.0094,
      "step": 1569340
    },
    {
      "epoch": 2.5682920602501915,
      "grad_norm": 0.6116861701011658,
      "learning_rate": 4.895462003255076e-06,
      "loss": 0.0141,
      "step": 1569360
    },
    {
      "epoch": 2.5683247906888447,
      "grad_norm": 0.6827601790428162,
      "learning_rate": 4.8953961110415585e-06,
      "loss": 0.012,
      "step": 1569380
    },
    {
      "epoch": 2.5683575211274983,
      "grad_norm": 0.24628037214279175,
      "learning_rate": 4.895330218828041e-06,
      "loss": 0.0191,
      "step": 1569400
    },
    {
      "epoch": 2.5683902515661514,
      "grad_norm": 0.11173494905233383,
      "learning_rate": 4.895264326614524e-06,
      "loss": 0.0165,
      "step": 1569420
    },
    {
      "epoch": 2.5684229820048046,
      "grad_norm": 0.2123466432094574,
      "learning_rate": 4.895198434401008e-06,
      "loss": 0.0103,
      "step": 1569440
    },
    {
      "epoch": 2.568455712443458,
      "grad_norm": 5.136903285980225,
      "learning_rate": 4.89513254218749e-06,
      "loss": 0.0137,
      "step": 1569460
    },
    {
      "epoch": 2.5684884428821118,
      "grad_norm": 0.25550737977027893,
      "learning_rate": 4.895066649973973e-06,
      "loss": 0.0161,
      "step": 1569480
    },
    {
      "epoch": 2.568521173320765,
      "grad_norm": 0.39376765489578247,
      "learning_rate": 4.895000757760456e-06,
      "loss": 0.0106,
      "step": 1569500
    },
    {
      "epoch": 2.568553903759418,
      "grad_norm": 0.5294768214225769,
      "learning_rate": 4.8949348655469386e-06,
      "loss": 0.0232,
      "step": 1569520
    },
    {
      "epoch": 2.5685866341980716,
      "grad_norm": 0.30002689361572266,
      "learning_rate": 4.894868973333421e-06,
      "loss": 0.0145,
      "step": 1569540
    },
    {
      "epoch": 2.568619364636725,
      "grad_norm": 1.1314425468444824,
      "learning_rate": 4.894803081119904e-06,
      "loss": 0.0177,
      "step": 1569560
    },
    {
      "epoch": 2.568652095075378,
      "grad_norm": 0.2466660737991333,
      "learning_rate": 4.894737188906387e-06,
      "loss": 0.0171,
      "step": 1569580
    },
    {
      "epoch": 2.5686848255140315,
      "grad_norm": 0.21064543724060059,
      "learning_rate": 4.89467129669287e-06,
      "loss": 0.0184,
      "step": 1569600
    },
    {
      "epoch": 2.568717555952685,
      "grad_norm": 0.48459380865097046,
      "learning_rate": 4.894605404479353e-06,
      "loss": 0.0129,
      "step": 1569620
    },
    {
      "epoch": 2.5687502863913383,
      "grad_norm": 0.24779413640499115,
      "learning_rate": 4.894539512265836e-06,
      "loss": 0.0122,
      "step": 1569640
    },
    {
      "epoch": 2.5687830168299914,
      "grad_norm": 0.21672163903713226,
      "learning_rate": 4.8944736200523194e-06,
      "loss": 0.0143,
      "step": 1569660
    },
    {
      "epoch": 2.568815747268645,
      "grad_norm": 0.12451913952827454,
      "learning_rate": 4.894407727838802e-06,
      "loss": 0.0137,
      "step": 1569680
    },
    {
      "epoch": 2.568848477707298,
      "grad_norm": 0.9804623126983643,
      "learning_rate": 4.894341835625285e-06,
      "loss": 0.0155,
      "step": 1569700
    },
    {
      "epoch": 2.5688812081459513,
      "grad_norm": 0.41955435276031494,
      "learning_rate": 4.894275943411768e-06,
      "loss": 0.0215,
      "step": 1569720
    },
    {
      "epoch": 2.568913938584605,
      "grad_norm": 0.7806602716445923,
      "learning_rate": 4.89421005119825e-06,
      "loss": 0.0153,
      "step": 1569740
    },
    {
      "epoch": 2.5689466690232585,
      "grad_norm": 0.26879987120628357,
      "learning_rate": 4.894144158984733e-06,
      "loss": 0.0176,
      "step": 1569760
    },
    {
      "epoch": 2.5689793994619117,
      "grad_norm": 0.20079700648784637,
      "learning_rate": 4.894078266771216e-06,
      "loss": 0.0144,
      "step": 1569780
    },
    {
      "epoch": 2.569012129900565,
      "grad_norm": 0.49503928422927856,
      "learning_rate": 4.894012374557699e-06,
      "loss": 0.0115,
      "step": 1569800
    },
    {
      "epoch": 2.5690448603392184,
      "grad_norm": 0.6195122003555298,
      "learning_rate": 4.893946482344182e-06,
      "loss": 0.0137,
      "step": 1569820
    },
    {
      "epoch": 2.5690775907778716,
      "grad_norm": 0.17519032955169678,
      "learning_rate": 4.893880590130665e-06,
      "loss": 0.0196,
      "step": 1569840
    },
    {
      "epoch": 2.5691103212165247,
      "grad_norm": 0.5889866948127747,
      "learning_rate": 4.893814697917148e-06,
      "loss": 0.0133,
      "step": 1569860
    },
    {
      "epoch": 2.5691430516551783,
      "grad_norm": 0.23248827457427979,
      "learning_rate": 4.8937488057036304e-06,
      "loss": 0.0162,
      "step": 1569880
    },
    {
      "epoch": 2.5691757820938315,
      "grad_norm": 0.2223827987909317,
      "learning_rate": 4.893682913490113e-06,
      "loss": 0.0157,
      "step": 1569900
    },
    {
      "epoch": 2.569208512532485,
      "grad_norm": 0.19357061386108398,
      "learning_rate": 4.893617021276596e-06,
      "loss": 0.0175,
      "step": 1569920
    },
    {
      "epoch": 2.569241242971138,
      "grad_norm": 0.12834398448467255,
      "learning_rate": 4.893551129063079e-06,
      "loss": 0.0172,
      "step": 1569940
    },
    {
      "epoch": 2.569273973409792,
      "grad_norm": 0.3161662817001343,
      "learning_rate": 4.893485236849561e-06,
      "loss": 0.0103,
      "step": 1569960
    },
    {
      "epoch": 2.569306703848445,
      "grad_norm": 0.6598321199417114,
      "learning_rate": 4.893419344636044e-06,
      "loss": 0.0131,
      "step": 1569980
    },
    {
      "epoch": 2.569339434287098,
      "grad_norm": 0.28075531125068665,
      "learning_rate": 4.893353452422528e-06,
      "loss": 0.0127,
      "step": 1570000
    },
    {
      "epoch": 2.5693721647257517,
      "grad_norm": 0.5847848653793335,
      "learning_rate": 4.8932875602090105e-06,
      "loss": 0.0125,
      "step": 1570020
    },
    {
      "epoch": 2.569404895164405,
      "grad_norm": 0.556149959564209,
      "learning_rate": 4.893221667995493e-06,
      "loss": 0.0194,
      "step": 1570040
    },
    {
      "epoch": 2.5694376256030584,
      "grad_norm": 0.41632524132728577,
      "learning_rate": 4.893155775781977e-06,
      "loss": 0.0116,
      "step": 1570060
    },
    {
      "epoch": 2.5694703560417116,
      "grad_norm": 0.9429691433906555,
      "learning_rate": 4.8930898835684595e-06,
      "loss": 0.0166,
      "step": 1570080
    },
    {
      "epoch": 2.569503086480365,
      "grad_norm": 0.5254193544387817,
      "learning_rate": 4.893023991354942e-06,
      "loss": 0.015,
      "step": 1570100
    },
    {
      "epoch": 2.5695358169190183,
      "grad_norm": 0.0809326171875,
      "learning_rate": 4.892958099141425e-06,
      "loss": 0.0125,
      "step": 1570120
    },
    {
      "epoch": 2.5695685473576715,
      "grad_norm": 0.9600022435188293,
      "learning_rate": 4.892892206927908e-06,
      "loss": 0.0173,
      "step": 1570140
    },
    {
      "epoch": 2.569601277796325,
      "grad_norm": 0.8203689455986023,
      "learning_rate": 4.8928263147143905e-06,
      "loss": 0.0268,
      "step": 1570160
    },
    {
      "epoch": 2.5696340082349782,
      "grad_norm": 0.4662071466445923,
      "learning_rate": 4.892760422500873e-06,
      "loss": 0.0138,
      "step": 1570180
    },
    {
      "epoch": 2.569666738673632,
      "grad_norm": 0.8636625409126282,
      "learning_rate": 4.892694530287356e-06,
      "loss": 0.0207,
      "step": 1570200
    },
    {
      "epoch": 2.569699469112285,
      "grad_norm": 0.24446426331996918,
      "learning_rate": 4.8926286380738395e-06,
      "loss": 0.0125,
      "step": 1570220
    },
    {
      "epoch": 2.5697321995509386,
      "grad_norm": 0.7452976107597351,
      "learning_rate": 4.892562745860322e-06,
      "loss": 0.0162,
      "step": 1570240
    },
    {
      "epoch": 2.5697649299895917,
      "grad_norm": 0.1822895109653473,
      "learning_rate": 4.892496853646805e-06,
      "loss": 0.0129,
      "step": 1570260
    },
    {
      "epoch": 2.569797660428245,
      "grad_norm": 0.40600186586380005,
      "learning_rate": 4.892430961433288e-06,
      "loss": 0.0116,
      "step": 1570280
    },
    {
      "epoch": 2.5698303908668985,
      "grad_norm": 0.343450129032135,
      "learning_rate": 4.8923650692197705e-06,
      "loss": 0.0141,
      "step": 1570300
    },
    {
      "epoch": 2.5698631213055516,
      "grad_norm": 0.27560290694236755,
      "learning_rate": 4.892299177006253e-06,
      "loss": 0.0141,
      "step": 1570320
    },
    {
      "epoch": 2.569895851744205,
      "grad_norm": 0.6590597629547119,
      "learning_rate": 4.892233284792736e-06,
      "loss": 0.0117,
      "step": 1570340
    },
    {
      "epoch": 2.5699285821828584,
      "grad_norm": 0.15541023015975952,
      "learning_rate": 4.8921673925792196e-06,
      "loss": 0.0123,
      "step": 1570360
    },
    {
      "epoch": 2.569961312621512,
      "grad_norm": 0.16028042137622833,
      "learning_rate": 4.892101500365702e-06,
      "loss": 0.0166,
      "step": 1570380
    },
    {
      "epoch": 2.569994043060165,
      "grad_norm": 0.43721112608909607,
      "learning_rate": 4.892035608152185e-06,
      "loss": 0.0167,
      "step": 1570400
    },
    {
      "epoch": 2.5700267734988183,
      "grad_norm": 0.28661030530929565,
      "learning_rate": 4.891969715938668e-06,
      "loss": 0.0139,
      "step": 1570420
    },
    {
      "epoch": 2.570059503937472,
      "grad_norm": 0.1835450977087021,
      "learning_rate": 4.8919038237251505e-06,
      "loss": 0.0136,
      "step": 1570440
    },
    {
      "epoch": 2.570092234376125,
      "grad_norm": 0.40927544236183167,
      "learning_rate": 4.891837931511634e-06,
      "loss": 0.0178,
      "step": 1570460
    },
    {
      "epoch": 2.5701249648147786,
      "grad_norm": 0.13595908880233765,
      "learning_rate": 4.891772039298117e-06,
      "loss": 0.0153,
      "step": 1570480
    },
    {
      "epoch": 2.5701576952534317,
      "grad_norm": 0.6267136931419373,
      "learning_rate": 4.8917061470846e-06,
      "loss": 0.0226,
      "step": 1570500
    },
    {
      "epoch": 2.5701904256920853,
      "grad_norm": 0.31648415327072144,
      "learning_rate": 4.891640254871082e-06,
      "loss": 0.0118,
      "step": 1570520
    },
    {
      "epoch": 2.5702231561307385,
      "grad_norm": 1.474583625793457,
      "learning_rate": 4.891574362657565e-06,
      "loss": 0.0161,
      "step": 1570540
    },
    {
      "epoch": 2.5702558865693916,
      "grad_norm": 0.173245370388031,
      "learning_rate": 4.891508470444048e-06,
      "loss": 0.0175,
      "step": 1570560
    },
    {
      "epoch": 2.5702886170080452,
      "grad_norm": 0.17950649559497833,
      "learning_rate": 4.8914425782305306e-06,
      "loss": 0.0158,
      "step": 1570580
    },
    {
      "epoch": 2.5703213474466984,
      "grad_norm": 0.4091804623603821,
      "learning_rate": 4.891376686017013e-06,
      "loss": 0.0109,
      "step": 1570600
    },
    {
      "epoch": 2.570354077885352,
      "grad_norm": 0.17678575217723846,
      "learning_rate": 4.891310793803497e-06,
      "loss": 0.0151,
      "step": 1570620
    },
    {
      "epoch": 2.570386808324005,
      "grad_norm": 0.08810599148273468,
      "learning_rate": 4.89124490158998e-06,
      "loss": 0.0159,
      "step": 1570640
    },
    {
      "epoch": 2.5704195387626587,
      "grad_norm": 0.16236606240272522,
      "learning_rate": 4.891179009376462e-06,
      "loss": 0.0106,
      "step": 1570660
    },
    {
      "epoch": 2.570452269201312,
      "grad_norm": 0.3684564530849457,
      "learning_rate": 4.891113117162945e-06,
      "loss": 0.0115,
      "step": 1570680
    },
    {
      "epoch": 2.570484999639965,
      "grad_norm": 0.448271781206131,
      "learning_rate": 4.891047224949428e-06,
      "loss": 0.0189,
      "step": 1570700
    },
    {
      "epoch": 2.5705177300786186,
      "grad_norm": 0.41326722502708435,
      "learning_rate": 4.8909813327359114e-06,
      "loss": 0.0103,
      "step": 1570720
    },
    {
      "epoch": 2.5705504605172718,
      "grad_norm": 0.06292878091335297,
      "learning_rate": 4.890915440522394e-06,
      "loss": 0.0132,
      "step": 1570740
    },
    {
      "epoch": 2.5705831909559254,
      "grad_norm": 0.5760017037391663,
      "learning_rate": 4.890849548308877e-06,
      "loss": 0.0135,
      "step": 1570760
    },
    {
      "epoch": 2.5706159213945785,
      "grad_norm": 0.39945054054260254,
      "learning_rate": 4.89078365609536e-06,
      "loss": 0.0109,
      "step": 1570780
    },
    {
      "epoch": 2.570648651833232,
      "grad_norm": 0.4186282157897949,
      "learning_rate": 4.890717763881842e-06,
      "loss": 0.0145,
      "step": 1570800
    },
    {
      "epoch": 2.5706813822718853,
      "grad_norm": 0.27924299240112305,
      "learning_rate": 4.890651871668325e-06,
      "loss": 0.0165,
      "step": 1570820
    },
    {
      "epoch": 2.5707141127105384,
      "grad_norm": 0.3859376013278961,
      "learning_rate": 4.890585979454808e-06,
      "loss": 0.0198,
      "step": 1570840
    },
    {
      "epoch": 2.570746843149192,
      "grad_norm": 0.9787114858627319,
      "learning_rate": 4.8905200872412915e-06,
      "loss": 0.0171,
      "step": 1570860
    },
    {
      "epoch": 2.570779573587845,
      "grad_norm": 0.19835789501667023,
      "learning_rate": 4.890454195027774e-06,
      "loss": 0.0134,
      "step": 1570880
    },
    {
      "epoch": 2.5708123040264983,
      "grad_norm": 0.526666522026062,
      "learning_rate": 4.890388302814257e-06,
      "loss": 0.0168,
      "step": 1570900
    },
    {
      "epoch": 2.570845034465152,
      "grad_norm": 0.4764261245727539,
      "learning_rate": 4.89032241060074e-06,
      "loss": 0.0186,
      "step": 1570920
    },
    {
      "epoch": 2.5708777649038055,
      "grad_norm": 0.8312339186668396,
      "learning_rate": 4.890256518387222e-06,
      "loss": 0.0109,
      "step": 1570940
    },
    {
      "epoch": 2.5709104953424586,
      "grad_norm": 0.8925871849060059,
      "learning_rate": 4.890190626173705e-06,
      "loss": 0.0148,
      "step": 1570960
    },
    {
      "epoch": 2.570943225781112,
      "grad_norm": 1.9269541501998901,
      "learning_rate": 4.890124733960188e-06,
      "loss": 0.0162,
      "step": 1570980
    },
    {
      "epoch": 2.5709759562197654,
      "grad_norm": 0.25336143374443054,
      "learning_rate": 4.890058841746671e-06,
      "loss": 0.0144,
      "step": 1571000
    },
    {
      "epoch": 2.5710086866584185,
      "grad_norm": 0.5436486601829529,
      "learning_rate": 4.889992949533154e-06,
      "loss": 0.0171,
      "step": 1571020
    },
    {
      "epoch": 2.5710414170970717,
      "grad_norm": 0.35398703813552856,
      "learning_rate": 4.889927057319637e-06,
      "loss": 0.0147,
      "step": 1571040
    },
    {
      "epoch": 2.5710741475357253,
      "grad_norm": 0.6479472517967224,
      "learning_rate": 4.88986116510612e-06,
      "loss": 0.0124,
      "step": 1571060
    },
    {
      "epoch": 2.571106877974379,
      "grad_norm": 0.13582392036914825,
      "learning_rate": 4.889795272892603e-06,
      "loss": 0.0135,
      "step": 1571080
    },
    {
      "epoch": 2.571139608413032,
      "grad_norm": 0.22582747042179108,
      "learning_rate": 4.889729380679086e-06,
      "loss": 0.0101,
      "step": 1571100
    },
    {
      "epoch": 2.571172338851685,
      "grad_norm": 0.8926719427108765,
      "learning_rate": 4.889663488465569e-06,
      "loss": 0.0155,
      "step": 1571120
    },
    {
      "epoch": 2.5712050692903388,
      "grad_norm": 1.009191632270813,
      "learning_rate": 4.8895975962520515e-06,
      "loss": 0.0151,
      "step": 1571140
    },
    {
      "epoch": 2.571237799728992,
      "grad_norm": 0.24417711794376373,
      "learning_rate": 4.889531704038534e-06,
      "loss": 0.0152,
      "step": 1571160
    },
    {
      "epoch": 2.571270530167645,
      "grad_norm": 0.262642502784729,
      "learning_rate": 4.889465811825017e-06,
      "loss": 0.0147,
      "step": 1571180
    },
    {
      "epoch": 2.5713032606062987,
      "grad_norm": 0.45751598477363586,
      "learning_rate": 4.8893999196115e-06,
      "loss": 0.0112,
      "step": 1571200
    },
    {
      "epoch": 2.5713359910449523,
      "grad_norm": 0.07124847173690796,
      "learning_rate": 4.8893340273979825e-06,
      "loss": 0.0082,
      "step": 1571220
    },
    {
      "epoch": 2.5713687214836054,
      "grad_norm": 0.4083176553249359,
      "learning_rate": 4.889268135184466e-06,
      "loss": 0.0153,
      "step": 1571240
    },
    {
      "epoch": 2.5714014519222586,
      "grad_norm": 0.2386018931865692,
      "learning_rate": 4.889202242970949e-06,
      "loss": 0.0179,
      "step": 1571260
    },
    {
      "epoch": 2.571434182360912,
      "grad_norm": 0.15664935111999512,
      "learning_rate": 4.8891363507574315e-06,
      "loss": 0.0105,
      "step": 1571280
    },
    {
      "epoch": 2.5714669127995653,
      "grad_norm": 0.35172826051712036,
      "learning_rate": 4.889070458543914e-06,
      "loss": 0.0127,
      "step": 1571300
    },
    {
      "epoch": 2.5714996432382184,
      "grad_norm": 0.31423068046569824,
      "learning_rate": 4.889004566330397e-06,
      "loss": 0.0135,
      "step": 1571320
    },
    {
      "epoch": 2.571532373676872,
      "grad_norm": 0.25727778673171997,
      "learning_rate": 4.88893867411688e-06,
      "loss": 0.0108,
      "step": 1571340
    },
    {
      "epoch": 2.571565104115525,
      "grad_norm": 0.5579448342323303,
      "learning_rate": 4.8888727819033625e-06,
      "loss": 0.0168,
      "step": 1571360
    },
    {
      "epoch": 2.571597834554179,
      "grad_norm": 0.3684973418712616,
      "learning_rate": 4.888806889689845e-06,
      "loss": 0.0116,
      "step": 1571380
    },
    {
      "epoch": 2.571630564992832,
      "grad_norm": 0.19523973762989044,
      "learning_rate": 4.888740997476328e-06,
      "loss": 0.0142,
      "step": 1571400
    },
    {
      "epoch": 2.5716632954314855,
      "grad_norm": 0.38014790415763855,
      "learning_rate": 4.8886751052628116e-06,
      "loss": 0.0116,
      "step": 1571420
    },
    {
      "epoch": 2.5716960258701387,
      "grad_norm": 0.44205379486083984,
      "learning_rate": 4.888609213049294e-06,
      "loss": 0.0159,
      "step": 1571440
    },
    {
      "epoch": 2.571728756308792,
      "grad_norm": 0.26821157336235046,
      "learning_rate": 4.888543320835777e-06,
      "loss": 0.0149,
      "step": 1571460
    },
    {
      "epoch": 2.5717614867474454,
      "grad_norm": 0.2622604966163635,
      "learning_rate": 4.888477428622261e-06,
      "loss": 0.012,
      "step": 1571480
    },
    {
      "epoch": 2.5717942171860986,
      "grad_norm": 0.1410452276468277,
      "learning_rate": 4.888411536408743e-06,
      "loss": 0.0124,
      "step": 1571500
    },
    {
      "epoch": 2.571826947624752,
      "grad_norm": 0.4423333406448364,
      "learning_rate": 4.888345644195226e-06,
      "loss": 0.0092,
      "step": 1571520
    },
    {
      "epoch": 2.5718596780634053,
      "grad_norm": 0.5496110320091248,
      "learning_rate": 4.888279751981709e-06,
      "loss": 0.0131,
      "step": 1571540
    },
    {
      "epoch": 2.571892408502059,
      "grad_norm": 0.7486894130706787,
      "learning_rate": 4.888213859768192e-06,
      "loss": 0.0163,
      "step": 1571560
    },
    {
      "epoch": 2.571925138940712,
      "grad_norm": 0.30374595522880554,
      "learning_rate": 4.888147967554674e-06,
      "loss": 0.0128,
      "step": 1571580
    },
    {
      "epoch": 2.571957869379365,
      "grad_norm": 0.23193177580833435,
      "learning_rate": 4.888082075341157e-06,
      "loss": 0.0099,
      "step": 1571600
    },
    {
      "epoch": 2.571990599818019,
      "grad_norm": 0.5121281147003174,
      "learning_rate": 4.88801618312764e-06,
      "loss": 0.0178,
      "step": 1571620
    },
    {
      "epoch": 2.572023330256672,
      "grad_norm": 0.5782712697982788,
      "learning_rate": 4.887950290914123e-06,
      "loss": 0.0117,
      "step": 1571640
    },
    {
      "epoch": 2.5720560606953256,
      "grad_norm": 0.6142623424530029,
      "learning_rate": 4.887884398700606e-06,
      "loss": 0.0158,
      "step": 1571660
    },
    {
      "epoch": 2.5720887911339787,
      "grad_norm": 0.10269691050052643,
      "learning_rate": 4.887818506487089e-06,
      "loss": 0.0135,
      "step": 1571680
    },
    {
      "epoch": 2.5721215215726323,
      "grad_norm": 0.8343331217765808,
      "learning_rate": 4.887752614273572e-06,
      "loss": 0.0211,
      "step": 1571700
    },
    {
      "epoch": 2.5721542520112854,
      "grad_norm": 0.14478041231632233,
      "learning_rate": 4.887686722060054e-06,
      "loss": 0.0109,
      "step": 1571720
    },
    {
      "epoch": 2.5721869824499386,
      "grad_norm": 0.5018455982208252,
      "learning_rate": 4.887620829846537e-06,
      "loss": 0.0126,
      "step": 1571740
    },
    {
      "epoch": 2.572219712888592,
      "grad_norm": 0.6845029592514038,
      "learning_rate": 4.887554937633021e-06,
      "loss": 0.0161,
      "step": 1571760
    },
    {
      "epoch": 2.5722524433272453,
      "grad_norm": 0.2000482827425003,
      "learning_rate": 4.887489045419503e-06,
      "loss": 0.0152,
      "step": 1571780
    },
    {
      "epoch": 2.572285173765899,
      "grad_norm": 0.11124540865421295,
      "learning_rate": 4.887423153205986e-06,
      "loss": 0.01,
      "step": 1571800
    },
    {
      "epoch": 2.572317904204552,
      "grad_norm": 0.4786284267902374,
      "learning_rate": 4.887357260992469e-06,
      "loss": 0.018,
      "step": 1571820
    },
    {
      "epoch": 2.5723506346432057,
      "grad_norm": 0.2673570215702057,
      "learning_rate": 4.887291368778952e-06,
      "loss": 0.0166,
      "step": 1571840
    },
    {
      "epoch": 2.572383365081859,
      "grad_norm": 0.21049165725708008,
      "learning_rate": 4.887225476565434e-06,
      "loss": 0.012,
      "step": 1571860
    },
    {
      "epoch": 2.572416095520512,
      "grad_norm": 0.29682376980781555,
      "learning_rate": 4.887159584351918e-06,
      "loss": 0.0178,
      "step": 1571880
    },
    {
      "epoch": 2.5724488259591656,
      "grad_norm": 0.4314936101436615,
      "learning_rate": 4.887093692138401e-06,
      "loss": 0.0121,
      "step": 1571900
    },
    {
      "epoch": 2.5724815563978187,
      "grad_norm": 0.2194843888282776,
      "learning_rate": 4.8870277999248834e-06,
      "loss": 0.0147,
      "step": 1571920
    },
    {
      "epoch": 2.5725142868364723,
      "grad_norm": 0.554947555065155,
      "learning_rate": 4.886961907711366e-06,
      "loss": 0.0203,
      "step": 1571940
    },
    {
      "epoch": 2.5725470172751255,
      "grad_norm": 0.4474996328353882,
      "learning_rate": 4.886896015497849e-06,
      "loss": 0.0099,
      "step": 1571960
    },
    {
      "epoch": 2.572579747713779,
      "grad_norm": 0.15242017805576324,
      "learning_rate": 4.886830123284332e-06,
      "loss": 0.0133,
      "step": 1571980
    },
    {
      "epoch": 2.572612478152432,
      "grad_norm": 0.3474370539188385,
      "learning_rate": 4.886764231070814e-06,
      "loss": 0.011,
      "step": 1572000
    },
    {
      "epoch": 2.5726452085910854,
      "grad_norm": 0.24614217877388,
      "learning_rate": 4.886698338857297e-06,
      "loss": 0.0145,
      "step": 1572020
    },
    {
      "epoch": 2.572677939029739,
      "grad_norm": 0.06198261305689812,
      "learning_rate": 4.886632446643781e-06,
      "loss": 0.0141,
      "step": 1572040
    },
    {
      "epoch": 2.572710669468392,
      "grad_norm": 0.7584179639816284,
      "learning_rate": 4.8865665544302635e-06,
      "loss": 0.012,
      "step": 1572060
    },
    {
      "epoch": 2.5727433999070457,
      "grad_norm": 0.4714775085449219,
      "learning_rate": 4.886500662216746e-06,
      "loss": 0.0161,
      "step": 1572080
    },
    {
      "epoch": 2.572776130345699,
      "grad_norm": 0.5109309554100037,
      "learning_rate": 4.886434770003229e-06,
      "loss": 0.0108,
      "step": 1572100
    },
    {
      "epoch": 2.5728088607843524,
      "grad_norm": 0.16216355562210083,
      "learning_rate": 4.8863688777897125e-06,
      "loss": 0.014,
      "step": 1572120
    },
    {
      "epoch": 2.5728415912230056,
      "grad_norm": 0.30467870831489563,
      "learning_rate": 4.886302985576195e-06,
      "loss": 0.0108,
      "step": 1572140
    },
    {
      "epoch": 2.5728743216616587,
      "grad_norm": 0.3192797303199768,
      "learning_rate": 4.886237093362678e-06,
      "loss": 0.0159,
      "step": 1572160
    },
    {
      "epoch": 2.5729070521003123,
      "grad_norm": 0.29815980792045593,
      "learning_rate": 4.886171201149161e-06,
      "loss": 0.0136,
      "step": 1572180
    },
    {
      "epoch": 2.5729397825389655,
      "grad_norm": 0.2937794625759125,
      "learning_rate": 4.8861053089356435e-06,
      "loss": 0.0141,
      "step": 1572200
    },
    {
      "epoch": 2.572972512977619,
      "grad_norm": 0.18526983261108398,
      "learning_rate": 4.886039416722126e-06,
      "loss": 0.0141,
      "step": 1572220
    },
    {
      "epoch": 2.5730052434162722,
      "grad_norm": 0.4928608536720276,
      "learning_rate": 4.885973524508609e-06,
      "loss": 0.0099,
      "step": 1572240
    },
    {
      "epoch": 2.573037973854926,
      "grad_norm": 0.35520291328430176,
      "learning_rate": 4.885907632295092e-06,
      "loss": 0.0149,
      "step": 1572260
    },
    {
      "epoch": 2.573070704293579,
      "grad_norm": 0.9241328835487366,
      "learning_rate": 4.885841740081575e-06,
      "loss": 0.0105,
      "step": 1572280
    },
    {
      "epoch": 2.573103434732232,
      "grad_norm": 0.26093509793281555,
      "learning_rate": 4.885775847868058e-06,
      "loss": 0.0113,
      "step": 1572300
    },
    {
      "epoch": 2.5731361651708857,
      "grad_norm": 0.45876750349998474,
      "learning_rate": 4.885709955654541e-06,
      "loss": 0.0128,
      "step": 1572320
    },
    {
      "epoch": 2.573168895609539,
      "grad_norm": 0.35317936539649963,
      "learning_rate": 4.8856440634410235e-06,
      "loss": 0.0094,
      "step": 1572340
    },
    {
      "epoch": 2.573201626048192,
      "grad_norm": 0.3957575559616089,
      "learning_rate": 4.885578171227506e-06,
      "loss": 0.0198,
      "step": 1572360
    },
    {
      "epoch": 2.5732343564868456,
      "grad_norm": 0.38644275069236755,
      "learning_rate": 4.885512279013989e-06,
      "loss": 0.0112,
      "step": 1572380
    },
    {
      "epoch": 2.573267086925499,
      "grad_norm": 0.3943508565425873,
      "learning_rate": 4.885446386800472e-06,
      "loss": 0.0184,
      "step": 1572400
    },
    {
      "epoch": 2.5732998173641524,
      "grad_norm": 0.574894905090332,
      "learning_rate": 4.8853804945869545e-06,
      "loss": 0.018,
      "step": 1572420
    },
    {
      "epoch": 2.5733325478028055,
      "grad_norm": 0.2928254306316376,
      "learning_rate": 4.885314602373438e-06,
      "loss": 0.0164,
      "step": 1572440
    },
    {
      "epoch": 2.573365278241459,
      "grad_norm": 2.372417688369751,
      "learning_rate": 4.885248710159921e-06,
      "loss": 0.011,
      "step": 1572460
    },
    {
      "epoch": 2.5733980086801123,
      "grad_norm": 0.13686886429786682,
      "learning_rate": 4.8851828179464035e-06,
      "loss": 0.0122,
      "step": 1572480
    },
    {
      "epoch": 2.5734307391187654,
      "grad_norm": 0.26631274819374084,
      "learning_rate": 4.885116925732887e-06,
      "loss": 0.0121,
      "step": 1572500
    },
    {
      "epoch": 2.573463469557419,
      "grad_norm": 0.037681758403778076,
      "learning_rate": 4.88505103351937e-06,
      "loss": 0.0102,
      "step": 1572520
    },
    {
      "epoch": 2.5734961999960726,
      "grad_norm": 1.0659325122833252,
      "learning_rate": 4.884985141305853e-06,
      "loss": 0.0131,
      "step": 1572540
    },
    {
      "epoch": 2.5735289304347257,
      "grad_norm": 0.5951010584831238,
      "learning_rate": 4.884919249092335e-06,
      "loss": 0.0129,
      "step": 1572560
    },
    {
      "epoch": 2.573561660873379,
      "grad_norm": 0.44701841473579407,
      "learning_rate": 4.884853356878818e-06,
      "loss": 0.0137,
      "step": 1572580
    },
    {
      "epoch": 2.5735943913120325,
      "grad_norm": 0.4996533691883087,
      "learning_rate": 4.884787464665301e-06,
      "loss": 0.013,
      "step": 1572600
    },
    {
      "epoch": 2.5736271217506856,
      "grad_norm": 0.8073796033859253,
      "learning_rate": 4.8847215724517836e-06,
      "loss": 0.0117,
      "step": 1572620
    },
    {
      "epoch": 2.573659852189339,
      "grad_norm": 0.23783144354820251,
      "learning_rate": 4.884655680238266e-06,
      "loss": 0.0199,
      "step": 1572640
    },
    {
      "epoch": 2.5736925826279924,
      "grad_norm": 0.09224146604537964,
      "learning_rate": 4.88458978802475e-06,
      "loss": 0.0144,
      "step": 1572660
    },
    {
      "epoch": 2.573725313066646,
      "grad_norm": 0.9547150731086731,
      "learning_rate": 4.884523895811233e-06,
      "loss": 0.0189,
      "step": 1572680
    },
    {
      "epoch": 2.573758043505299,
      "grad_norm": 0.783394992351532,
      "learning_rate": 4.884458003597715e-06,
      "loss": 0.022,
      "step": 1572700
    },
    {
      "epoch": 2.5737907739439523,
      "grad_norm": 0.10327617079019547,
      "learning_rate": 4.884392111384198e-06,
      "loss": 0.0162,
      "step": 1572720
    },
    {
      "epoch": 2.573823504382606,
      "grad_norm": 0.06111704185605049,
      "learning_rate": 4.884326219170681e-06,
      "loss": 0.0087,
      "step": 1572740
    },
    {
      "epoch": 2.573856234821259,
      "grad_norm": 0.438871830701828,
      "learning_rate": 4.884260326957164e-06,
      "loss": 0.0231,
      "step": 1572760
    },
    {
      "epoch": 2.573888965259912,
      "grad_norm": 0.5342345237731934,
      "learning_rate": 4.884194434743646e-06,
      "loss": 0.0116,
      "step": 1572780
    },
    {
      "epoch": 2.5739216956985658,
      "grad_norm": 0.1943560391664505,
      "learning_rate": 4.884128542530129e-06,
      "loss": 0.0103,
      "step": 1572800
    },
    {
      "epoch": 2.5739544261372194,
      "grad_norm": 0.4184767007827759,
      "learning_rate": 4.884062650316613e-06,
      "loss": 0.0088,
      "step": 1572820
    },
    {
      "epoch": 2.5739871565758725,
      "grad_norm": 0.31716009974479675,
      "learning_rate": 4.883996758103095e-06,
      "loss": 0.0118,
      "step": 1572840
    },
    {
      "epoch": 2.5740198870145257,
      "grad_norm": 0.46811413764953613,
      "learning_rate": 4.883930865889578e-06,
      "loss": 0.0101,
      "step": 1572860
    },
    {
      "epoch": 2.5740526174531793,
      "grad_norm": 0.5250235795974731,
      "learning_rate": 4.883864973676061e-06,
      "loss": 0.0135,
      "step": 1572880
    },
    {
      "epoch": 2.5740853478918324,
      "grad_norm": 0.28485870361328125,
      "learning_rate": 4.8837990814625445e-06,
      "loss": 0.0185,
      "step": 1572900
    },
    {
      "epoch": 2.5741180783304856,
      "grad_norm": 0.3610529601573944,
      "learning_rate": 4.883733189249027e-06,
      "loss": 0.021,
      "step": 1572920
    },
    {
      "epoch": 2.574150808769139,
      "grad_norm": 0.7853872776031494,
      "learning_rate": 4.88366729703551e-06,
      "loss": 0.0139,
      "step": 1572940
    },
    {
      "epoch": 2.5741835392077923,
      "grad_norm": 0.32437291741371155,
      "learning_rate": 4.883601404821993e-06,
      "loss": 0.0092,
      "step": 1572960
    },
    {
      "epoch": 2.574216269646446,
      "grad_norm": 0.14196579158306122,
      "learning_rate": 4.883535512608475e-06,
      "loss": 0.0168,
      "step": 1572980
    },
    {
      "epoch": 2.574249000085099,
      "grad_norm": 0.6453896760940552,
      "learning_rate": 4.883469620394958e-06,
      "loss": 0.0138,
      "step": 1573000
    },
    {
      "epoch": 2.5742817305237526,
      "grad_norm": 0.5334203243255615,
      "learning_rate": 4.883403728181441e-06,
      "loss": 0.0141,
      "step": 1573020
    },
    {
      "epoch": 2.574314460962406,
      "grad_norm": 5.072393894195557,
      "learning_rate": 4.883337835967924e-06,
      "loss": 0.0192,
      "step": 1573040
    },
    {
      "epoch": 2.574347191401059,
      "grad_norm": 0.5973476767539978,
      "learning_rate": 4.883271943754407e-06,
      "loss": 0.0168,
      "step": 1573060
    },
    {
      "epoch": 2.5743799218397125,
      "grad_norm": 0.7409176230430603,
      "learning_rate": 4.88320605154089e-06,
      "loss": 0.0151,
      "step": 1573080
    },
    {
      "epoch": 2.5744126522783657,
      "grad_norm": 0.9517642855644226,
      "learning_rate": 4.883140159327373e-06,
      "loss": 0.0189,
      "step": 1573100
    },
    {
      "epoch": 2.5744453827170193,
      "grad_norm": 0.5745867490768433,
      "learning_rate": 4.8830742671138554e-06,
      "loss": 0.015,
      "step": 1573120
    },
    {
      "epoch": 2.5744781131556724,
      "grad_norm": 0.4971157908439636,
      "learning_rate": 4.883008374900338e-06,
      "loss": 0.0234,
      "step": 1573140
    },
    {
      "epoch": 2.574510843594326,
      "grad_norm": 0.17203295230865479,
      "learning_rate": 4.882942482686821e-06,
      "loss": 0.0119,
      "step": 1573160
    },
    {
      "epoch": 2.574543574032979,
      "grad_norm": 0.7980541586875916,
      "learning_rate": 4.8828765904733045e-06,
      "loss": 0.0199,
      "step": 1573180
    },
    {
      "epoch": 2.5745763044716323,
      "grad_norm": 0.09540556371212006,
      "learning_rate": 4.882810698259787e-06,
      "loss": 0.0089,
      "step": 1573200
    },
    {
      "epoch": 2.574609034910286,
      "grad_norm": 0.20710839331150055,
      "learning_rate": 4.88274480604627e-06,
      "loss": 0.0177,
      "step": 1573220
    },
    {
      "epoch": 2.574641765348939,
      "grad_norm": 0.34830430150032043,
      "learning_rate": 4.882678913832753e-06,
      "loss": 0.012,
      "step": 1573240
    },
    {
      "epoch": 2.5746744957875927,
      "grad_norm": 1.0951642990112305,
      "learning_rate": 4.8826130216192355e-06,
      "loss": 0.0101,
      "step": 1573260
    },
    {
      "epoch": 2.574707226226246,
      "grad_norm": 0.3952101469039917,
      "learning_rate": 4.882547129405718e-06,
      "loss": 0.022,
      "step": 1573280
    },
    {
      "epoch": 2.5747399566648994,
      "grad_norm": 0.30348706245422363,
      "learning_rate": 4.882481237192202e-06,
      "loss": 0.0158,
      "step": 1573300
    },
    {
      "epoch": 2.5747726871035526,
      "grad_norm": 0.7211140394210815,
      "learning_rate": 4.8824153449786845e-06,
      "loss": 0.0207,
      "step": 1573320
    },
    {
      "epoch": 2.5748054175422057,
      "grad_norm": 0.7564604878425598,
      "learning_rate": 4.882349452765167e-06,
      "loss": 0.0086,
      "step": 1573340
    },
    {
      "epoch": 2.5748381479808593,
      "grad_norm": 0.2968979477882385,
      "learning_rate": 4.88228356055165e-06,
      "loss": 0.017,
      "step": 1573360
    },
    {
      "epoch": 2.5748708784195125,
      "grad_norm": 0.23323208093643188,
      "learning_rate": 4.882217668338133e-06,
      "loss": 0.0095,
      "step": 1573380
    },
    {
      "epoch": 2.574903608858166,
      "grad_norm": 0.4625800549983978,
      "learning_rate": 4.8821517761246155e-06,
      "loss": 0.011,
      "step": 1573400
    },
    {
      "epoch": 2.574936339296819,
      "grad_norm": 0.4076728820800781,
      "learning_rate": 4.882085883911098e-06,
      "loss": 0.0154,
      "step": 1573420
    },
    {
      "epoch": 2.574969069735473,
      "grad_norm": 0.19083204865455627,
      "learning_rate": 4.882019991697581e-06,
      "loss": 0.0165,
      "step": 1573440
    },
    {
      "epoch": 2.575001800174126,
      "grad_norm": 0.27323415875434875,
      "learning_rate": 4.8819540994840646e-06,
      "loss": 0.0171,
      "step": 1573460
    },
    {
      "epoch": 2.575034530612779,
      "grad_norm": 0.8851814270019531,
      "learning_rate": 4.881888207270547e-06,
      "loss": 0.0155,
      "step": 1573480
    },
    {
      "epoch": 2.5750672610514327,
      "grad_norm": 0.2604283392429352,
      "learning_rate": 4.88182231505703e-06,
      "loss": 0.013,
      "step": 1573500
    },
    {
      "epoch": 2.575099991490086,
      "grad_norm": 0.05788794159889221,
      "learning_rate": 4.881756422843514e-06,
      "loss": 0.0075,
      "step": 1573520
    },
    {
      "epoch": 2.5751327219287394,
      "grad_norm": 0.17888052761554718,
      "learning_rate": 4.881690530629996e-06,
      "loss": 0.0122,
      "step": 1573540
    },
    {
      "epoch": 2.5751654523673926,
      "grad_norm": 0.24241191148757935,
      "learning_rate": 4.881624638416479e-06,
      "loss": 0.0126,
      "step": 1573560
    },
    {
      "epoch": 2.575198182806046,
      "grad_norm": 0.23719729483127594,
      "learning_rate": 4.881558746202962e-06,
      "loss": 0.0156,
      "step": 1573580
    },
    {
      "epoch": 2.5752309132446993,
      "grad_norm": 0.1806890070438385,
      "learning_rate": 4.881492853989445e-06,
      "loss": 0.0138,
      "step": 1573600
    },
    {
      "epoch": 2.5752636436833525,
      "grad_norm": 0.04767569154500961,
      "learning_rate": 4.881426961775927e-06,
      "loss": 0.0141,
      "step": 1573620
    },
    {
      "epoch": 2.575296374122006,
      "grad_norm": 0.11378667503595352,
      "learning_rate": 4.88136106956241e-06,
      "loss": 0.0162,
      "step": 1573640
    },
    {
      "epoch": 2.575329104560659,
      "grad_norm": 0.6634069681167603,
      "learning_rate": 4.881295177348893e-06,
      "loss": 0.0099,
      "step": 1573660
    },
    {
      "epoch": 2.575361834999313,
      "grad_norm": 0.7769986391067505,
      "learning_rate": 4.881229285135376e-06,
      "loss": 0.0158,
      "step": 1573680
    },
    {
      "epoch": 2.575394565437966,
      "grad_norm": 0.23606367409229279,
      "learning_rate": 4.881163392921859e-06,
      "loss": 0.0127,
      "step": 1573700
    },
    {
      "epoch": 2.5754272958766196,
      "grad_norm": 0.12510497868061066,
      "learning_rate": 4.881097500708342e-06,
      "loss": 0.0119,
      "step": 1573720
    },
    {
      "epoch": 2.5754600263152727,
      "grad_norm": 0.3833725154399872,
      "learning_rate": 4.881031608494825e-06,
      "loss": 0.0138,
      "step": 1573740
    },
    {
      "epoch": 2.575492756753926,
      "grad_norm": 0.30542752146720886,
      "learning_rate": 4.880965716281307e-06,
      "loss": 0.0164,
      "step": 1573760
    },
    {
      "epoch": 2.5755254871925795,
      "grad_norm": 0.9268181324005127,
      "learning_rate": 4.88089982406779e-06,
      "loss": 0.0113,
      "step": 1573780
    },
    {
      "epoch": 2.5755582176312326,
      "grad_norm": 1.0987730026245117,
      "learning_rate": 4.880833931854273e-06,
      "loss": 0.0241,
      "step": 1573800
    },
    {
      "epoch": 2.575590948069886,
      "grad_norm": 0.19525720179080963,
      "learning_rate": 4.8807680396407556e-06,
      "loss": 0.0134,
      "step": 1573820
    },
    {
      "epoch": 2.5756236785085393,
      "grad_norm": 0.21754013001918793,
      "learning_rate": 4.880702147427238e-06,
      "loss": 0.0132,
      "step": 1573840
    },
    {
      "epoch": 2.575656408947193,
      "grad_norm": 0.08853418380022049,
      "learning_rate": 4.880636255213722e-06,
      "loss": 0.0111,
      "step": 1573860
    },
    {
      "epoch": 2.575689139385846,
      "grad_norm": 0.1903700977563858,
      "learning_rate": 4.880570363000205e-06,
      "loss": 0.0204,
      "step": 1573880
    },
    {
      "epoch": 2.5757218698244992,
      "grad_norm": 0.4542773365974426,
      "learning_rate": 4.880504470786687e-06,
      "loss": 0.0146,
      "step": 1573900
    },
    {
      "epoch": 2.575754600263153,
      "grad_norm": 0.0916794091463089,
      "learning_rate": 4.880438578573171e-06,
      "loss": 0.0146,
      "step": 1573920
    },
    {
      "epoch": 2.575787330701806,
      "grad_norm": 0.5526378750801086,
      "learning_rate": 4.880372686359654e-06,
      "loss": 0.0125,
      "step": 1573940
    },
    {
      "epoch": 2.575820061140459,
      "grad_norm": 0.5686544179916382,
      "learning_rate": 4.8803067941461364e-06,
      "loss": 0.017,
      "step": 1573960
    },
    {
      "epoch": 2.5758527915791127,
      "grad_norm": 0.42629677057266235,
      "learning_rate": 4.880240901932619e-06,
      "loss": 0.0155,
      "step": 1573980
    },
    {
      "epoch": 2.5758855220177663,
      "grad_norm": 0.5888129472732544,
      "learning_rate": 4.880175009719102e-06,
      "loss": 0.0206,
      "step": 1574000
    },
    {
      "epoch": 2.5759182524564195,
      "grad_norm": 0.4452650547027588,
      "learning_rate": 4.880109117505585e-06,
      "loss": 0.0106,
      "step": 1574020
    },
    {
      "epoch": 2.5759509828950726,
      "grad_norm": 0.8178685307502747,
      "learning_rate": 4.880043225292067e-06,
      "loss": 0.011,
      "step": 1574040
    },
    {
      "epoch": 2.575983713333726,
      "grad_norm": 0.3938778042793274,
      "learning_rate": 4.87997733307855e-06,
      "loss": 0.0121,
      "step": 1574060
    },
    {
      "epoch": 2.5760164437723794,
      "grad_norm": 0.7708685398101807,
      "learning_rate": 4.879911440865034e-06,
      "loss": 0.016,
      "step": 1574080
    },
    {
      "epoch": 2.5760491742110325,
      "grad_norm": 1.1375532150268555,
      "learning_rate": 4.8798455486515165e-06,
      "loss": 0.0146,
      "step": 1574100
    },
    {
      "epoch": 2.576081904649686,
      "grad_norm": 0.44527241587638855,
      "learning_rate": 4.879779656437999e-06,
      "loss": 0.0176,
      "step": 1574120
    },
    {
      "epoch": 2.5761146350883397,
      "grad_norm": 0.4818599224090576,
      "learning_rate": 4.879713764224482e-06,
      "loss": 0.0117,
      "step": 1574140
    },
    {
      "epoch": 2.576147365526993,
      "grad_norm": 0.7368859052658081,
      "learning_rate": 4.879647872010965e-06,
      "loss": 0.019,
      "step": 1574160
    },
    {
      "epoch": 2.576180095965646,
      "grad_norm": 0.54155033826828,
      "learning_rate": 4.8795819797974474e-06,
      "loss": 0.0209,
      "step": 1574180
    },
    {
      "epoch": 2.5762128264042996,
      "grad_norm": 0.17439375817775726,
      "learning_rate": 4.87951608758393e-06,
      "loss": 0.015,
      "step": 1574200
    },
    {
      "epoch": 2.5762455568429528,
      "grad_norm": 0.7809882164001465,
      "learning_rate": 4.879450195370413e-06,
      "loss": 0.0194,
      "step": 1574220
    },
    {
      "epoch": 2.576278287281606,
      "grad_norm": 0.12193312495946884,
      "learning_rate": 4.8793843031568965e-06,
      "loss": 0.0127,
      "step": 1574240
    },
    {
      "epoch": 2.5763110177202595,
      "grad_norm": 0.1888803094625473,
      "learning_rate": 4.879318410943379e-06,
      "loss": 0.0171,
      "step": 1574260
    },
    {
      "epoch": 2.576343748158913,
      "grad_norm": 0.2099834382534027,
      "learning_rate": 4.879252518729862e-06,
      "loss": 0.0126,
      "step": 1574280
    },
    {
      "epoch": 2.5763764785975662,
      "grad_norm": 0.15108653903007507,
      "learning_rate": 4.879186626516345e-06,
      "loss": 0.0121,
      "step": 1574300
    },
    {
      "epoch": 2.5764092090362194,
      "grad_norm": 0.1843091994524002,
      "learning_rate": 4.879120734302828e-06,
      "loss": 0.0178,
      "step": 1574320
    },
    {
      "epoch": 2.576441939474873,
      "grad_norm": 0.31172817945480347,
      "learning_rate": 4.879054842089311e-06,
      "loss": 0.0138,
      "step": 1574340
    },
    {
      "epoch": 2.576474669913526,
      "grad_norm": 0.7731339335441589,
      "learning_rate": 4.878988949875794e-06,
      "loss": 0.0092,
      "step": 1574360
    },
    {
      "epoch": 2.5765074003521793,
      "grad_norm": 0.7349441051483154,
      "learning_rate": 4.8789230576622765e-06,
      "loss": 0.0162,
      "step": 1574380
    },
    {
      "epoch": 2.576540130790833,
      "grad_norm": 0.19812989234924316,
      "learning_rate": 4.878857165448759e-06,
      "loss": 0.0102,
      "step": 1574400
    },
    {
      "epoch": 2.576572861229486,
      "grad_norm": 0.1648704558610916,
      "learning_rate": 4.878791273235242e-06,
      "loss": 0.014,
      "step": 1574420
    },
    {
      "epoch": 2.5766055916681396,
      "grad_norm": 0.22130092978477478,
      "learning_rate": 4.878725381021725e-06,
      "loss": 0.0176,
      "step": 1574440
    },
    {
      "epoch": 2.5766383221067928,
      "grad_norm": 0.3646082580089569,
      "learning_rate": 4.8786594888082075e-06,
      "loss": 0.0137,
      "step": 1574460
    },
    {
      "epoch": 2.5766710525454464,
      "grad_norm": 0.8919294476509094,
      "learning_rate": 4.878593596594691e-06,
      "loss": 0.0187,
      "step": 1574480
    },
    {
      "epoch": 2.5767037829840995,
      "grad_norm": 0.19286902248859406,
      "learning_rate": 4.878527704381174e-06,
      "loss": 0.0118,
      "step": 1574500
    },
    {
      "epoch": 2.5767365134227527,
      "grad_norm": 0.6624476313591003,
      "learning_rate": 4.8784618121676565e-06,
      "loss": 0.0169,
      "step": 1574520
    },
    {
      "epoch": 2.5767692438614063,
      "grad_norm": 0.3060804605484009,
      "learning_rate": 4.878395919954139e-06,
      "loss": 0.0107,
      "step": 1574540
    },
    {
      "epoch": 2.5768019743000594,
      "grad_norm": 0.7011379599571228,
      "learning_rate": 4.878330027740622e-06,
      "loss": 0.0138,
      "step": 1574560
    },
    {
      "epoch": 2.576834704738713,
      "grad_norm": 0.18449747562408447,
      "learning_rate": 4.878264135527106e-06,
      "loss": 0.018,
      "step": 1574580
    },
    {
      "epoch": 2.576867435177366,
      "grad_norm": 0.31113359332084656,
      "learning_rate": 4.878198243313588e-06,
      "loss": 0.014,
      "step": 1574600
    },
    {
      "epoch": 2.5769001656160198,
      "grad_norm": 0.9467839598655701,
      "learning_rate": 4.878132351100071e-06,
      "loss": 0.016,
      "step": 1574620
    },
    {
      "epoch": 2.576932896054673,
      "grad_norm": 1.462670922279358,
      "learning_rate": 4.878066458886554e-06,
      "loss": 0.0132,
      "step": 1574640
    },
    {
      "epoch": 2.576965626493326,
      "grad_norm": 0.13466200232505798,
      "learning_rate": 4.8780005666730366e-06,
      "loss": 0.0227,
      "step": 1574660
    },
    {
      "epoch": 2.5769983569319797,
      "grad_norm": 0.18406865000724792,
      "learning_rate": 4.877934674459519e-06,
      "loss": 0.018,
      "step": 1574680
    },
    {
      "epoch": 2.577031087370633,
      "grad_norm": 0.31999608874320984,
      "learning_rate": 4.877868782246002e-06,
      "loss": 0.0122,
      "step": 1574700
    },
    {
      "epoch": 2.5770638178092864,
      "grad_norm": 0.5140162110328674,
      "learning_rate": 4.877802890032486e-06,
      "loss": 0.0205,
      "step": 1574720
    },
    {
      "epoch": 2.5770965482479395,
      "grad_norm": 0.6900601387023926,
      "learning_rate": 4.877736997818968e-06,
      "loss": 0.0154,
      "step": 1574740
    },
    {
      "epoch": 2.577129278686593,
      "grad_norm": 0.16908018290996552,
      "learning_rate": 4.877671105605451e-06,
      "loss": 0.0144,
      "step": 1574760
    },
    {
      "epoch": 2.5771620091252463,
      "grad_norm": 1.4064701795578003,
      "learning_rate": 4.877605213391934e-06,
      "loss": 0.014,
      "step": 1574780
    },
    {
      "epoch": 2.5771947395638994,
      "grad_norm": 0.19172121584415436,
      "learning_rate": 4.877539321178417e-06,
      "loss": 0.0162,
      "step": 1574800
    },
    {
      "epoch": 2.577227470002553,
      "grad_norm": 0.21674717962741852,
      "learning_rate": 4.877473428964899e-06,
      "loss": 0.0108,
      "step": 1574820
    },
    {
      "epoch": 2.577260200441206,
      "grad_norm": 0.9840187430381775,
      "learning_rate": 4.877407536751382e-06,
      "loss": 0.0087,
      "step": 1574840
    },
    {
      "epoch": 2.57729293087986,
      "grad_norm": 0.08013180643320084,
      "learning_rate": 4.877341644537865e-06,
      "loss": 0.0143,
      "step": 1574860
    },
    {
      "epoch": 2.577325661318513,
      "grad_norm": 0.31881430745124817,
      "learning_rate": 4.877275752324348e-06,
      "loss": 0.0139,
      "step": 1574880
    },
    {
      "epoch": 2.5773583917571665,
      "grad_norm": 0.19131824374198914,
      "learning_rate": 4.877209860110831e-06,
      "loss": 0.0134,
      "step": 1574900
    },
    {
      "epoch": 2.5773911221958197,
      "grad_norm": 0.6737619638442993,
      "learning_rate": 4.877143967897314e-06,
      "loss": 0.0114,
      "step": 1574920
    },
    {
      "epoch": 2.577423852634473,
      "grad_norm": 1.4225584268569946,
      "learning_rate": 4.8770780756837975e-06,
      "loss": 0.0128,
      "step": 1574940
    },
    {
      "epoch": 2.5774565830731264,
      "grad_norm": 0.30964750051498413,
      "learning_rate": 4.87701218347028e-06,
      "loss": 0.0096,
      "step": 1574960
    },
    {
      "epoch": 2.5774893135117796,
      "grad_norm": 0.4087887406349182,
      "learning_rate": 4.876946291256763e-06,
      "loss": 0.0124,
      "step": 1574980
    },
    {
      "epoch": 2.577522043950433,
      "grad_norm": 0.4105813205242157,
      "learning_rate": 4.876880399043246e-06,
      "loss": 0.011,
      "step": 1575000
    },
    {
      "epoch": 2.5775547743890863,
      "grad_norm": 0.02527013048529625,
      "learning_rate": 4.8768145068297284e-06,
      "loss": 0.0124,
      "step": 1575020
    },
    {
      "epoch": 2.57758750482774,
      "grad_norm": 0.5823609828948975,
      "learning_rate": 4.876748614616211e-06,
      "loss": 0.0129,
      "step": 1575040
    },
    {
      "epoch": 2.577620235266393,
      "grad_norm": 0.2948666214942932,
      "learning_rate": 4.876682722402694e-06,
      "loss": 0.0106,
      "step": 1575060
    },
    {
      "epoch": 2.577652965705046,
      "grad_norm": 0.1637900173664093,
      "learning_rate": 4.876616830189177e-06,
      "loss": 0.0136,
      "step": 1575080
    },
    {
      "epoch": 2.5776856961437,
      "grad_norm": 0.166262686252594,
      "learning_rate": 4.87655093797566e-06,
      "loss": 0.015,
      "step": 1575100
    },
    {
      "epoch": 2.577718426582353,
      "grad_norm": 0.4221302270889282,
      "learning_rate": 4.876485045762143e-06,
      "loss": 0.0211,
      "step": 1575120
    },
    {
      "epoch": 2.5777511570210065,
      "grad_norm": 0.21926112473011017,
      "learning_rate": 4.876419153548626e-06,
      "loss": 0.0114,
      "step": 1575140
    },
    {
      "epoch": 2.5777838874596597,
      "grad_norm": 0.09914039075374603,
      "learning_rate": 4.8763532613351085e-06,
      "loss": 0.0099,
      "step": 1575160
    },
    {
      "epoch": 2.5778166178983133,
      "grad_norm": 0.13041724264621735,
      "learning_rate": 4.876287369121591e-06,
      "loss": 0.0241,
      "step": 1575180
    },
    {
      "epoch": 2.5778493483369664,
      "grad_norm": 1.2419296503067017,
      "learning_rate": 4.876221476908074e-06,
      "loss": 0.0161,
      "step": 1575200
    },
    {
      "epoch": 2.5778820787756196,
      "grad_norm": 0.24783745408058167,
      "learning_rate": 4.876155584694557e-06,
      "loss": 0.0127,
      "step": 1575220
    },
    {
      "epoch": 2.577914809214273,
      "grad_norm": 0.30263540148735046,
      "learning_rate": 4.876089692481039e-06,
      "loss": 0.0175,
      "step": 1575240
    },
    {
      "epoch": 2.5779475396529263,
      "grad_norm": 0.1866351068019867,
      "learning_rate": 4.876023800267522e-06,
      "loss": 0.0122,
      "step": 1575260
    },
    {
      "epoch": 2.57798027009158,
      "grad_norm": 0.1922721266746521,
      "learning_rate": 4.875957908054006e-06,
      "loss": 0.0163,
      "step": 1575280
    },
    {
      "epoch": 2.578013000530233,
      "grad_norm": 0.35199472308158875,
      "learning_rate": 4.8758920158404885e-06,
      "loss": 0.0144,
      "step": 1575300
    },
    {
      "epoch": 2.5780457309688867,
      "grad_norm": 0.09555523842573166,
      "learning_rate": 4.875826123626971e-06,
      "loss": 0.0115,
      "step": 1575320
    },
    {
      "epoch": 2.57807846140754,
      "grad_norm": 0.32660892605781555,
      "learning_rate": 4.875760231413455e-06,
      "loss": 0.0095,
      "step": 1575340
    },
    {
      "epoch": 2.578111191846193,
      "grad_norm": 0.2718465030193329,
      "learning_rate": 4.8756943391999375e-06,
      "loss": 0.0141,
      "step": 1575360
    },
    {
      "epoch": 2.5781439222848466,
      "grad_norm": 0.40059205889701843,
      "learning_rate": 4.87562844698642e-06,
      "loss": 0.0172,
      "step": 1575380
    },
    {
      "epoch": 2.5781766527234997,
      "grad_norm": 0.6530827283859253,
      "learning_rate": 4.875562554772903e-06,
      "loss": 0.016,
      "step": 1575400
    },
    {
      "epoch": 2.578209383162153,
      "grad_norm": 0.35895398259162903,
      "learning_rate": 4.875496662559386e-06,
      "loss": 0.0142,
      "step": 1575420
    },
    {
      "epoch": 2.5782421136008065,
      "grad_norm": 0.18177548050880432,
      "learning_rate": 4.8754307703458685e-06,
      "loss": 0.0141,
      "step": 1575440
    },
    {
      "epoch": 2.57827484403946,
      "grad_norm": 1.1649892330169678,
      "learning_rate": 4.875364878132351e-06,
      "loss": 0.0186,
      "step": 1575460
    },
    {
      "epoch": 2.578307574478113,
      "grad_norm": 0.1838371604681015,
      "learning_rate": 4.875298985918834e-06,
      "loss": 0.0181,
      "step": 1575480
    },
    {
      "epoch": 2.5783403049167664,
      "grad_norm": 0.5124859809875488,
      "learning_rate": 4.8752330937053176e-06,
      "loss": 0.0176,
      "step": 1575500
    },
    {
      "epoch": 2.57837303535542,
      "grad_norm": 0.17791962623596191,
      "learning_rate": 4.8751672014918e-06,
      "loss": 0.0143,
      "step": 1575520
    },
    {
      "epoch": 2.578405765794073,
      "grad_norm": 0.6333153247833252,
      "learning_rate": 4.875101309278283e-06,
      "loss": 0.0126,
      "step": 1575540
    },
    {
      "epoch": 2.5784384962327263,
      "grad_norm": 0.04161253944039345,
      "learning_rate": 4.875035417064766e-06,
      "loss": 0.0103,
      "step": 1575560
    },
    {
      "epoch": 2.57847122667138,
      "grad_norm": 0.6263806819915771,
      "learning_rate": 4.8749695248512485e-06,
      "loss": 0.0146,
      "step": 1575580
    },
    {
      "epoch": 2.5785039571100334,
      "grad_norm": 0.1563797891139984,
      "learning_rate": 4.874903632637731e-06,
      "loss": 0.0099,
      "step": 1575600
    },
    {
      "epoch": 2.5785366875486866,
      "grad_norm": 0.9570778608322144,
      "learning_rate": 4.874837740424214e-06,
      "loss": 0.0128,
      "step": 1575620
    },
    {
      "epoch": 2.5785694179873397,
      "grad_norm": 0.8696302175521851,
      "learning_rate": 4.874771848210698e-06,
      "loss": 0.016,
      "step": 1575640
    },
    {
      "epoch": 2.5786021484259933,
      "grad_norm": 0.5154504776000977,
      "learning_rate": 4.87470595599718e-06,
      "loss": 0.0132,
      "step": 1575660
    },
    {
      "epoch": 2.5786348788646465,
      "grad_norm": 0.3342747390270233,
      "learning_rate": 4.874640063783663e-06,
      "loss": 0.016,
      "step": 1575680
    },
    {
      "epoch": 2.5786676093032996,
      "grad_norm": 0.500758945941925,
      "learning_rate": 4.874574171570146e-06,
      "loss": 0.0143,
      "step": 1575700
    },
    {
      "epoch": 2.5787003397419532,
      "grad_norm": 0.4369356632232666,
      "learning_rate": 4.8745082793566286e-06,
      "loss": 0.0132,
      "step": 1575720
    },
    {
      "epoch": 2.578733070180607,
      "grad_norm": 0.1349831223487854,
      "learning_rate": 4.874442387143112e-06,
      "loss": 0.0187,
      "step": 1575740
    },
    {
      "epoch": 2.57876580061926,
      "grad_norm": 0.17244403064250946,
      "learning_rate": 4.874376494929595e-06,
      "loss": 0.02,
      "step": 1575760
    },
    {
      "epoch": 2.578798531057913,
      "grad_norm": 0.2811586856842041,
      "learning_rate": 4.874310602716078e-06,
      "loss": 0.0127,
      "step": 1575780
    },
    {
      "epoch": 2.5788312614965667,
      "grad_norm": 0.48014864325523376,
      "learning_rate": 4.87424471050256e-06,
      "loss": 0.0107,
      "step": 1575800
    },
    {
      "epoch": 2.57886399193522,
      "grad_norm": 0.31866806745529175,
      "learning_rate": 4.874178818289043e-06,
      "loss": 0.0144,
      "step": 1575820
    },
    {
      "epoch": 2.578896722373873,
      "grad_norm": 0.7703903913497925,
      "learning_rate": 4.874112926075526e-06,
      "loss": 0.0097,
      "step": 1575840
    },
    {
      "epoch": 2.5789294528125266,
      "grad_norm": 0.1345897614955902,
      "learning_rate": 4.874047033862009e-06,
      "loss": 0.0182,
      "step": 1575860
    },
    {
      "epoch": 2.57896218325118,
      "grad_norm": 0.39048856496810913,
      "learning_rate": 4.873981141648491e-06,
      "loss": 0.0119,
      "step": 1575880
    },
    {
      "epoch": 2.5789949136898334,
      "grad_norm": 0.1532653421163559,
      "learning_rate": 4.873915249434975e-06,
      "loss": 0.0087,
      "step": 1575900
    },
    {
      "epoch": 2.5790276441284865,
      "grad_norm": 0.966725766658783,
      "learning_rate": 4.873849357221458e-06,
      "loss": 0.0165,
      "step": 1575920
    },
    {
      "epoch": 2.57906037456714,
      "grad_norm": 0.37462401390075684,
      "learning_rate": 4.87378346500794e-06,
      "loss": 0.0244,
      "step": 1575940
    },
    {
      "epoch": 2.5790931050057933,
      "grad_norm": 1.0355645418167114,
      "learning_rate": 4.873717572794423e-06,
      "loss": 0.0151,
      "step": 1575960
    },
    {
      "epoch": 2.5791258354444464,
      "grad_norm": 0.5037118792533875,
      "learning_rate": 4.873651680580907e-06,
      "loss": 0.0215,
      "step": 1575980
    },
    {
      "epoch": 2.5791585658831,
      "grad_norm": 1.131259799003601,
      "learning_rate": 4.8735857883673895e-06,
      "loss": 0.0089,
      "step": 1576000
    },
    {
      "epoch": 2.579191296321753,
      "grad_norm": 0.186234250664711,
      "learning_rate": 4.873519896153872e-06,
      "loss": 0.0104,
      "step": 1576020
    },
    {
      "epoch": 2.5792240267604067,
      "grad_norm": 0.8809617161750793,
      "learning_rate": 4.873454003940355e-06,
      "loss": 0.0147,
      "step": 1576040
    },
    {
      "epoch": 2.57925675719906,
      "grad_norm": 0.6069936752319336,
      "learning_rate": 4.873388111726838e-06,
      "loss": 0.0146,
      "step": 1576060
    },
    {
      "epoch": 2.5792894876377135,
      "grad_norm": 0.446510910987854,
      "learning_rate": 4.87332221951332e-06,
      "loss": 0.0188,
      "step": 1576080
    },
    {
      "epoch": 2.5793222180763666,
      "grad_norm": 0.5509014129638672,
      "learning_rate": 4.873256327299803e-06,
      "loss": 0.0181,
      "step": 1576100
    },
    {
      "epoch": 2.57935494851502,
      "grad_norm": 0.3463171422481537,
      "learning_rate": 4.873190435086286e-06,
      "loss": 0.0183,
      "step": 1576120
    },
    {
      "epoch": 2.5793876789536734,
      "grad_norm": 0.1126120314002037,
      "learning_rate": 4.8731245428727695e-06,
      "loss": 0.0085,
      "step": 1576140
    },
    {
      "epoch": 2.5794204093923265,
      "grad_norm": 0.2891401946544647,
      "learning_rate": 4.873058650659252e-06,
      "loss": 0.0164,
      "step": 1576160
    },
    {
      "epoch": 2.57945313983098,
      "grad_norm": 0.3076678216457367,
      "learning_rate": 4.872992758445735e-06,
      "loss": 0.0083,
      "step": 1576180
    },
    {
      "epoch": 2.5794858702696333,
      "grad_norm": 0.754308819770813,
      "learning_rate": 4.872926866232218e-06,
      "loss": 0.0157,
      "step": 1576200
    },
    {
      "epoch": 2.579518600708287,
      "grad_norm": 0.6554914116859436,
      "learning_rate": 4.8728609740187004e-06,
      "loss": 0.0188,
      "step": 1576220
    },
    {
      "epoch": 2.57955133114694,
      "grad_norm": 0.7025151252746582,
      "learning_rate": 4.872795081805183e-06,
      "loss": 0.0159,
      "step": 1576240
    },
    {
      "epoch": 2.579584061585593,
      "grad_norm": 0.3517930507659912,
      "learning_rate": 4.872729189591666e-06,
      "loss": 0.016,
      "step": 1576260
    },
    {
      "epoch": 2.5796167920242468,
      "grad_norm": 0.45994213223457336,
      "learning_rate": 4.872663297378149e-06,
      "loss": 0.0122,
      "step": 1576280
    },
    {
      "epoch": 2.5796495224629,
      "grad_norm": 0.2847074866294861,
      "learning_rate": 4.872597405164632e-06,
      "loss": 0.0171,
      "step": 1576300
    },
    {
      "epoch": 2.5796822529015535,
      "grad_norm": 0.5256219506263733,
      "learning_rate": 4.872531512951115e-06,
      "loss": 0.0124,
      "step": 1576320
    },
    {
      "epoch": 2.5797149833402067,
      "grad_norm": 0.1149045079946518,
      "learning_rate": 4.872465620737598e-06,
      "loss": 0.0171,
      "step": 1576340
    },
    {
      "epoch": 2.5797477137788603,
      "grad_norm": 0.4024108648300171,
      "learning_rate": 4.872399728524081e-06,
      "loss": 0.0199,
      "step": 1576360
    },
    {
      "epoch": 2.5797804442175134,
      "grad_norm": 0.0977226123213768,
      "learning_rate": 4.872333836310564e-06,
      "loss": 0.0142,
      "step": 1576380
    },
    {
      "epoch": 2.5798131746561666,
      "grad_norm": 0.26021409034729004,
      "learning_rate": 4.872267944097047e-06,
      "loss": 0.0179,
      "step": 1576400
    },
    {
      "epoch": 2.57984590509482,
      "grad_norm": 0.09363147616386414,
      "learning_rate": 4.8722020518835295e-06,
      "loss": 0.0073,
      "step": 1576420
    },
    {
      "epoch": 2.5798786355334733,
      "grad_norm": 0.8678417801856995,
      "learning_rate": 4.872136159670012e-06,
      "loss": 0.021,
      "step": 1576440
    },
    {
      "epoch": 2.579911365972127,
      "grad_norm": 0.35955119132995605,
      "learning_rate": 4.872070267456495e-06,
      "loss": 0.0173,
      "step": 1576460
    },
    {
      "epoch": 2.57994409641078,
      "grad_norm": 0.4544564485549927,
      "learning_rate": 4.872004375242978e-06,
      "loss": 0.0117,
      "step": 1576480
    },
    {
      "epoch": 2.5799768268494336,
      "grad_norm": 0.5421348214149475,
      "learning_rate": 4.8719384830294605e-06,
      "loss": 0.0156,
      "step": 1576500
    },
    {
      "epoch": 2.580009557288087,
      "grad_norm": 0.3914858400821686,
      "learning_rate": 4.871872590815944e-06,
      "loss": 0.0168,
      "step": 1576520
    },
    {
      "epoch": 2.58004228772674,
      "grad_norm": 0.2705831825733185,
      "learning_rate": 4.871806698602427e-06,
      "loss": 0.0102,
      "step": 1576540
    },
    {
      "epoch": 2.5800750181653935,
      "grad_norm": 0.8475984334945679,
      "learning_rate": 4.8717408063889096e-06,
      "loss": 0.0127,
      "step": 1576560
    },
    {
      "epoch": 2.5801077486040467,
      "grad_norm": 0.19341911375522614,
      "learning_rate": 4.871674914175392e-06,
      "loss": 0.015,
      "step": 1576580
    },
    {
      "epoch": 2.5801404790427003,
      "grad_norm": 0.2982322573661804,
      "learning_rate": 4.871609021961875e-06,
      "loss": 0.018,
      "step": 1576600
    },
    {
      "epoch": 2.5801732094813534,
      "grad_norm": 0.5417072772979736,
      "learning_rate": 4.871543129748358e-06,
      "loss": 0.0193,
      "step": 1576620
    },
    {
      "epoch": 2.580205939920007,
      "grad_norm": 2.57413911819458,
      "learning_rate": 4.8714772375348405e-06,
      "loss": 0.0156,
      "step": 1576640
    },
    {
      "epoch": 2.58023867035866,
      "grad_norm": 0.514126181602478,
      "learning_rate": 4.871411345321323e-06,
      "loss": 0.012,
      "step": 1576660
    },
    {
      "epoch": 2.5802714007973133,
      "grad_norm": 0.11024830490350723,
      "learning_rate": 4.871345453107806e-06,
      "loss": 0.0145,
      "step": 1576680
    },
    {
      "epoch": 2.580304131235967,
      "grad_norm": 0.4690610468387604,
      "learning_rate": 4.87127956089429e-06,
      "loss": 0.0091,
      "step": 1576700
    },
    {
      "epoch": 2.58033686167462,
      "grad_norm": 0.23307734727859497,
      "learning_rate": 4.871213668680772e-06,
      "loss": 0.013,
      "step": 1576720
    },
    {
      "epoch": 2.5803695921132737,
      "grad_norm": 0.6175603866577148,
      "learning_rate": 4.871147776467255e-06,
      "loss": 0.027,
      "step": 1576740
    },
    {
      "epoch": 2.580402322551927,
      "grad_norm": 0.19246496260166168,
      "learning_rate": 4.871081884253739e-06,
      "loss": 0.0144,
      "step": 1576760
    },
    {
      "epoch": 2.5804350529905804,
      "grad_norm": 0.17769873142242432,
      "learning_rate": 4.871015992040221e-06,
      "loss": 0.017,
      "step": 1576780
    },
    {
      "epoch": 2.5804677834292336,
      "grad_norm": 0.2805519104003906,
      "learning_rate": 4.870950099826704e-06,
      "loss": 0.0099,
      "step": 1576800
    },
    {
      "epoch": 2.5805005138678867,
      "grad_norm": 0.10062394291162491,
      "learning_rate": 4.870884207613187e-06,
      "loss": 0.0172,
      "step": 1576820
    },
    {
      "epoch": 2.5805332443065403,
      "grad_norm": 0.21886636316776276,
      "learning_rate": 4.87081831539967e-06,
      "loss": 0.0108,
      "step": 1576840
    },
    {
      "epoch": 2.5805659747451934,
      "grad_norm": 0.5447084307670593,
      "learning_rate": 4.870752423186152e-06,
      "loss": 0.0149,
      "step": 1576860
    },
    {
      "epoch": 2.5805987051838466,
      "grad_norm": 1.090521216392517,
      "learning_rate": 4.870686530972635e-06,
      "loss": 0.0227,
      "step": 1576880
    },
    {
      "epoch": 2.5806314356225,
      "grad_norm": 0.2686951756477356,
      "learning_rate": 4.870620638759118e-06,
      "loss": 0.0132,
      "step": 1576900
    },
    {
      "epoch": 2.580664166061154,
      "grad_norm": 0.46002230048179626,
      "learning_rate": 4.870554746545601e-06,
      "loss": 0.0247,
      "step": 1576920
    },
    {
      "epoch": 2.580696896499807,
      "grad_norm": 0.03967667743563652,
      "learning_rate": 4.870488854332084e-06,
      "loss": 0.013,
      "step": 1576940
    },
    {
      "epoch": 2.58072962693846,
      "grad_norm": 0.3046717345714569,
      "learning_rate": 4.870422962118567e-06,
      "loss": 0.0103,
      "step": 1576960
    },
    {
      "epoch": 2.5807623573771137,
      "grad_norm": 0.11700669676065445,
      "learning_rate": 4.87035706990505e-06,
      "loss": 0.0206,
      "step": 1576980
    },
    {
      "epoch": 2.580795087815767,
      "grad_norm": 0.6670724749565125,
      "learning_rate": 4.870291177691532e-06,
      "loss": 0.0203,
      "step": 1577000
    },
    {
      "epoch": 2.58082781825442,
      "grad_norm": 1.155132532119751,
      "learning_rate": 4.870225285478015e-06,
      "loss": 0.0116,
      "step": 1577020
    },
    {
      "epoch": 2.5808605486930736,
      "grad_norm": 0.17076916992664337,
      "learning_rate": 4.870159393264499e-06,
      "loss": 0.02,
      "step": 1577040
    },
    {
      "epoch": 2.580893279131727,
      "grad_norm": 0.7144426107406616,
      "learning_rate": 4.8700935010509814e-06,
      "loss": 0.0147,
      "step": 1577060
    },
    {
      "epoch": 2.5809260095703803,
      "grad_norm": 0.13318108022212982,
      "learning_rate": 4.870027608837464e-06,
      "loss": 0.0112,
      "step": 1577080
    },
    {
      "epoch": 2.5809587400090335,
      "grad_norm": 0.23925459384918213,
      "learning_rate": 4.869961716623947e-06,
      "loss": 0.0091,
      "step": 1577100
    },
    {
      "epoch": 2.580991470447687,
      "grad_norm": 0.30835551023483276,
      "learning_rate": 4.86989582441043e-06,
      "loss": 0.0103,
      "step": 1577120
    },
    {
      "epoch": 2.58102420088634,
      "grad_norm": 0.4452970325946808,
      "learning_rate": 4.869829932196912e-06,
      "loss": 0.0145,
      "step": 1577140
    },
    {
      "epoch": 2.5810569313249934,
      "grad_norm": 0.5627257823944092,
      "learning_rate": 4.869764039983396e-06,
      "loss": 0.0184,
      "step": 1577160
    },
    {
      "epoch": 2.581089661763647,
      "grad_norm": 0.22649343311786652,
      "learning_rate": 4.869698147769879e-06,
      "loss": 0.0217,
      "step": 1577180
    },
    {
      "epoch": 2.5811223922023006,
      "grad_norm": 0.06633613258600235,
      "learning_rate": 4.8696322555563615e-06,
      "loss": 0.0174,
      "step": 1577200
    },
    {
      "epoch": 2.5811551226409537,
      "grad_norm": 0.1565813273191452,
      "learning_rate": 4.869566363342844e-06,
      "loss": 0.0278,
      "step": 1577220
    },
    {
      "epoch": 2.581187853079607,
      "grad_norm": 0.49244821071624756,
      "learning_rate": 4.869500471129327e-06,
      "loss": 0.0143,
      "step": 1577240
    },
    {
      "epoch": 2.5812205835182604,
      "grad_norm": 0.6656282544136047,
      "learning_rate": 4.86943457891581e-06,
      "loss": 0.0181,
      "step": 1577260
    },
    {
      "epoch": 2.5812533139569136,
      "grad_norm": 0.915290892124176,
      "learning_rate": 4.869368686702292e-06,
      "loss": 0.0191,
      "step": 1577280
    },
    {
      "epoch": 2.5812860443955667,
      "grad_norm": 0.17536798119544983,
      "learning_rate": 4.869302794488775e-06,
      "loss": 0.0108,
      "step": 1577300
    },
    {
      "epoch": 2.5813187748342203,
      "grad_norm": 0.3207712769508362,
      "learning_rate": 4.869236902275259e-06,
      "loss": 0.0116,
      "step": 1577320
    },
    {
      "epoch": 2.581351505272874,
      "grad_norm": 0.16295188665390015,
      "learning_rate": 4.8691710100617415e-06,
      "loss": 0.0109,
      "step": 1577340
    },
    {
      "epoch": 2.581384235711527,
      "grad_norm": 0.1314084529876709,
      "learning_rate": 4.869105117848224e-06,
      "loss": 0.0115,
      "step": 1577360
    },
    {
      "epoch": 2.5814169661501802,
      "grad_norm": 0.21938888728618622,
      "learning_rate": 4.869039225634707e-06,
      "loss": 0.0112,
      "step": 1577380
    },
    {
      "epoch": 2.581449696588834,
      "grad_norm": 0.19320550560951233,
      "learning_rate": 4.8689733334211906e-06,
      "loss": 0.0081,
      "step": 1577400
    },
    {
      "epoch": 2.581482427027487,
      "grad_norm": 0.8086053729057312,
      "learning_rate": 4.868907441207673e-06,
      "loss": 0.0124,
      "step": 1577420
    },
    {
      "epoch": 2.58151515746614,
      "grad_norm": 0.38544976711273193,
      "learning_rate": 4.868841548994156e-06,
      "loss": 0.0127,
      "step": 1577440
    },
    {
      "epoch": 2.5815478879047937,
      "grad_norm": 0.21442848443984985,
      "learning_rate": 4.868775656780639e-06,
      "loss": 0.0099,
      "step": 1577460
    },
    {
      "epoch": 2.581580618343447,
      "grad_norm": 0.1101987287402153,
      "learning_rate": 4.8687097645671215e-06,
      "loss": 0.0178,
      "step": 1577480
    },
    {
      "epoch": 2.5816133487821005,
      "grad_norm": 0.28865253925323486,
      "learning_rate": 4.868643872353604e-06,
      "loss": 0.0112,
      "step": 1577500
    },
    {
      "epoch": 2.5816460792207536,
      "grad_norm": 0.14513592422008514,
      "learning_rate": 4.868577980140087e-06,
      "loss": 0.0153,
      "step": 1577520
    },
    {
      "epoch": 2.581678809659407,
      "grad_norm": 0.4064250588417053,
      "learning_rate": 4.86851208792657e-06,
      "loss": 0.0151,
      "step": 1577540
    },
    {
      "epoch": 2.5817115400980604,
      "grad_norm": 0.309698224067688,
      "learning_rate": 4.868446195713053e-06,
      "loss": 0.0146,
      "step": 1577560
    },
    {
      "epoch": 2.5817442705367135,
      "grad_norm": 0.40623632073402405,
      "learning_rate": 4.868380303499536e-06,
      "loss": 0.0149,
      "step": 1577580
    },
    {
      "epoch": 2.581777000975367,
      "grad_norm": 0.5174314975738525,
      "learning_rate": 4.868314411286019e-06,
      "loss": 0.0169,
      "step": 1577600
    },
    {
      "epoch": 2.5818097314140203,
      "grad_norm": 0.22272777557373047,
      "learning_rate": 4.8682485190725015e-06,
      "loss": 0.0173,
      "step": 1577620
    },
    {
      "epoch": 2.581842461852674,
      "grad_norm": 0.3906506597995758,
      "learning_rate": 4.868182626858984e-06,
      "loss": 0.0108,
      "step": 1577640
    },
    {
      "epoch": 2.581875192291327,
      "grad_norm": 0.44991248846054077,
      "learning_rate": 4.868116734645467e-06,
      "loss": 0.0125,
      "step": 1577660
    },
    {
      "epoch": 2.5819079227299806,
      "grad_norm": 0.3836330771446228,
      "learning_rate": 4.86805084243195e-06,
      "loss": 0.0184,
      "step": 1577680
    },
    {
      "epoch": 2.5819406531686337,
      "grad_norm": 0.3791699707508087,
      "learning_rate": 4.8679849502184325e-06,
      "loss": 0.0118,
      "step": 1577700
    },
    {
      "epoch": 2.581973383607287,
      "grad_norm": 0.5320200324058533,
      "learning_rate": 4.867919058004916e-06,
      "loss": 0.0105,
      "step": 1577720
    },
    {
      "epoch": 2.5820061140459405,
      "grad_norm": 0.2492072582244873,
      "learning_rate": 4.867853165791399e-06,
      "loss": 0.0173,
      "step": 1577740
    },
    {
      "epoch": 2.5820388444845936,
      "grad_norm": 0.13179484009742737,
      "learning_rate": 4.8677872735778816e-06,
      "loss": 0.0173,
      "step": 1577760
    },
    {
      "epoch": 2.5820715749232472,
      "grad_norm": 0.5243625044822693,
      "learning_rate": 4.867721381364365e-06,
      "loss": 0.0136,
      "step": 1577780
    },
    {
      "epoch": 2.5821043053619004,
      "grad_norm": 0.23049409687519073,
      "learning_rate": 4.867655489150848e-06,
      "loss": 0.0136,
      "step": 1577800
    },
    {
      "epoch": 2.582137035800554,
      "grad_norm": 0.7133916020393372,
      "learning_rate": 4.867589596937331e-06,
      "loss": 0.0187,
      "step": 1577820
    },
    {
      "epoch": 2.582169766239207,
      "grad_norm": 0.4564151465892792,
      "learning_rate": 4.867523704723813e-06,
      "loss": 0.0172,
      "step": 1577840
    },
    {
      "epoch": 2.5822024966778603,
      "grad_norm": 0.47030186653137207,
      "learning_rate": 4.867457812510296e-06,
      "loss": 0.0137,
      "step": 1577860
    },
    {
      "epoch": 2.582235227116514,
      "grad_norm": 0.34339869022369385,
      "learning_rate": 4.867391920296779e-06,
      "loss": 0.0159,
      "step": 1577880
    },
    {
      "epoch": 2.582267957555167,
      "grad_norm": 0.44193604588508606,
      "learning_rate": 4.867326028083262e-06,
      "loss": 0.0135,
      "step": 1577900
    },
    {
      "epoch": 2.5823006879938206,
      "grad_norm": 0.13421565294265747,
      "learning_rate": 4.867260135869744e-06,
      "loss": 0.0141,
      "step": 1577920
    },
    {
      "epoch": 2.5823334184324738,
      "grad_norm": 0.23056133091449738,
      "learning_rate": 4.867194243656228e-06,
      "loss": 0.0185,
      "step": 1577940
    },
    {
      "epoch": 2.5823661488711274,
      "grad_norm": 0.7574682831764221,
      "learning_rate": 4.867128351442711e-06,
      "loss": 0.013,
      "step": 1577960
    },
    {
      "epoch": 2.5823988793097805,
      "grad_norm": 0.4135388135910034,
      "learning_rate": 4.867062459229193e-06,
      "loss": 0.0178,
      "step": 1577980
    },
    {
      "epoch": 2.5824316097484337,
      "grad_norm": 0.17309734225273132,
      "learning_rate": 4.866996567015676e-06,
      "loss": 0.0204,
      "step": 1578000
    },
    {
      "epoch": 2.5824643401870873,
      "grad_norm": 0.20396217703819275,
      "learning_rate": 4.866930674802159e-06,
      "loss": 0.0118,
      "step": 1578020
    },
    {
      "epoch": 2.5824970706257404,
      "grad_norm": 0.27049747109413147,
      "learning_rate": 4.866864782588642e-06,
      "loss": 0.014,
      "step": 1578040
    },
    {
      "epoch": 2.582529801064394,
      "grad_norm": 0.17500753700733185,
      "learning_rate": 4.866798890375124e-06,
      "loss": 0.0112,
      "step": 1578060
    },
    {
      "epoch": 2.582562531503047,
      "grad_norm": 0.7983859181404114,
      "learning_rate": 4.866732998161607e-06,
      "loss": 0.0107,
      "step": 1578080
    },
    {
      "epoch": 2.5825952619417007,
      "grad_norm": 0.8057452440261841,
      "learning_rate": 4.866667105948091e-06,
      "loss": 0.012,
      "step": 1578100
    },
    {
      "epoch": 2.582627992380354,
      "grad_norm": 0.4641876518726349,
      "learning_rate": 4.866601213734573e-06,
      "loss": 0.0101,
      "step": 1578120
    },
    {
      "epoch": 2.582660722819007,
      "grad_norm": 0.48129013180732727,
      "learning_rate": 4.866535321521056e-06,
      "loss": 0.0108,
      "step": 1578140
    },
    {
      "epoch": 2.5826934532576606,
      "grad_norm": 0.3546997308731079,
      "learning_rate": 4.866469429307539e-06,
      "loss": 0.0161,
      "step": 1578160
    },
    {
      "epoch": 2.582726183696314,
      "grad_norm": 0.8239527344703674,
      "learning_rate": 4.8664035370940225e-06,
      "loss": 0.0142,
      "step": 1578180
    },
    {
      "epoch": 2.5827589141349674,
      "grad_norm": 0.4265345335006714,
      "learning_rate": 4.866337644880505e-06,
      "loss": 0.0155,
      "step": 1578200
    },
    {
      "epoch": 2.5827916445736205,
      "grad_norm": 0.0467163510620594,
      "learning_rate": 4.866271752666988e-06,
      "loss": 0.0147,
      "step": 1578220
    },
    {
      "epoch": 2.582824375012274,
      "grad_norm": 0.13576258718967438,
      "learning_rate": 4.866205860453471e-06,
      "loss": 0.0146,
      "step": 1578240
    },
    {
      "epoch": 2.5828571054509273,
      "grad_norm": 0.14552763104438782,
      "learning_rate": 4.8661399682399534e-06,
      "loss": 0.0112,
      "step": 1578260
    },
    {
      "epoch": 2.5828898358895804,
      "grad_norm": 0.1231788620352745,
      "learning_rate": 4.866074076026436e-06,
      "loss": 0.0099,
      "step": 1578280
    },
    {
      "epoch": 2.582922566328234,
      "grad_norm": 0.18917451798915863,
      "learning_rate": 4.866008183812919e-06,
      "loss": 0.0161,
      "step": 1578300
    },
    {
      "epoch": 2.582955296766887,
      "grad_norm": 0.11380600929260254,
      "learning_rate": 4.865942291599402e-06,
      "loss": 0.0149,
      "step": 1578320
    },
    {
      "epoch": 2.5829880272055408,
      "grad_norm": 0.4078870713710785,
      "learning_rate": 4.865876399385885e-06,
      "loss": 0.0098,
      "step": 1578340
    },
    {
      "epoch": 2.583020757644194,
      "grad_norm": 0.2273958921432495,
      "learning_rate": 4.865810507172368e-06,
      "loss": 0.0191,
      "step": 1578360
    },
    {
      "epoch": 2.5830534880828475,
      "grad_norm": 0.25413575768470764,
      "learning_rate": 4.865744614958851e-06,
      "loss": 0.0154,
      "step": 1578380
    },
    {
      "epoch": 2.5830862185215007,
      "grad_norm": 0.3543708324432373,
      "learning_rate": 4.8656787227453335e-06,
      "loss": 0.0193,
      "step": 1578400
    },
    {
      "epoch": 2.583118948960154,
      "grad_norm": 0.2695320248603821,
      "learning_rate": 4.865612830531816e-06,
      "loss": 0.0207,
      "step": 1578420
    },
    {
      "epoch": 2.5831516793988074,
      "grad_norm": 0.15654206275939941,
      "learning_rate": 4.865546938318299e-06,
      "loss": 0.0157,
      "step": 1578440
    },
    {
      "epoch": 2.5831844098374606,
      "grad_norm": 0.042885977774858475,
      "learning_rate": 4.8654810461047825e-06,
      "loss": 0.0121,
      "step": 1578460
    },
    {
      "epoch": 2.5832171402761137,
      "grad_norm": 0.49099260568618774,
      "learning_rate": 4.865415153891265e-06,
      "loss": 0.0171,
      "step": 1578480
    },
    {
      "epoch": 2.5832498707147673,
      "grad_norm": 0.7075722813606262,
      "learning_rate": 4.865349261677748e-06,
      "loss": 0.0166,
      "step": 1578500
    },
    {
      "epoch": 2.583282601153421,
      "grad_norm": 0.14338551461696625,
      "learning_rate": 4.865283369464231e-06,
      "loss": 0.009,
      "step": 1578520
    },
    {
      "epoch": 2.583315331592074,
      "grad_norm": 1.1149402856826782,
      "learning_rate": 4.8652174772507135e-06,
      "loss": 0.0087,
      "step": 1578540
    },
    {
      "epoch": 2.583348062030727,
      "grad_norm": 0.11603111028671265,
      "learning_rate": 4.865151585037196e-06,
      "loss": 0.0136,
      "step": 1578560
    },
    {
      "epoch": 2.583380792469381,
      "grad_norm": 0.5010838508605957,
      "learning_rate": 4.86508569282368e-06,
      "loss": 0.0123,
      "step": 1578580
    },
    {
      "epoch": 2.583413522908034,
      "grad_norm": 0.12522955238819122,
      "learning_rate": 4.8650198006101626e-06,
      "loss": 0.0107,
      "step": 1578600
    },
    {
      "epoch": 2.583446253346687,
      "grad_norm": 0.16866527497768402,
      "learning_rate": 4.864953908396645e-06,
      "loss": 0.0169,
      "step": 1578620
    },
    {
      "epoch": 2.5834789837853407,
      "grad_norm": 0.5684182643890381,
      "learning_rate": 4.864888016183128e-06,
      "loss": 0.0219,
      "step": 1578640
    },
    {
      "epoch": 2.5835117142239943,
      "grad_norm": 0.5116860270500183,
      "learning_rate": 4.864822123969611e-06,
      "loss": 0.0129,
      "step": 1578660
    },
    {
      "epoch": 2.5835444446626474,
      "grad_norm": 1.053606629371643,
      "learning_rate": 4.8647562317560935e-06,
      "loss": 0.0148,
      "step": 1578680
    },
    {
      "epoch": 2.5835771751013006,
      "grad_norm": 0.2671988308429718,
      "learning_rate": 4.864690339542576e-06,
      "loss": 0.0092,
      "step": 1578700
    },
    {
      "epoch": 2.583609905539954,
      "grad_norm": 0.27710071206092834,
      "learning_rate": 4.864624447329059e-06,
      "loss": 0.0115,
      "step": 1578720
    },
    {
      "epoch": 2.5836426359786073,
      "grad_norm": 0.5686153769493103,
      "learning_rate": 4.864558555115543e-06,
      "loss": 0.0148,
      "step": 1578740
    },
    {
      "epoch": 2.5836753664172605,
      "grad_norm": 0.7077967524528503,
      "learning_rate": 4.864492662902025e-06,
      "loss": 0.01,
      "step": 1578760
    },
    {
      "epoch": 2.583708096855914,
      "grad_norm": 0.13104118406772614,
      "learning_rate": 4.864426770688508e-06,
      "loss": 0.0116,
      "step": 1578780
    },
    {
      "epoch": 2.5837408272945677,
      "grad_norm": 0.16780763864517212,
      "learning_rate": 4.864360878474992e-06,
      "loss": 0.0131,
      "step": 1578800
    },
    {
      "epoch": 2.583773557733221,
      "grad_norm": 0.1945272982120514,
      "learning_rate": 4.864294986261474e-06,
      "loss": 0.0189,
      "step": 1578820
    },
    {
      "epoch": 2.583806288171874,
      "grad_norm": 0.4159054458141327,
      "learning_rate": 4.864229094047957e-06,
      "loss": 0.012,
      "step": 1578840
    },
    {
      "epoch": 2.5838390186105276,
      "grad_norm": 0.23126593232154846,
      "learning_rate": 4.86416320183444e-06,
      "loss": 0.0134,
      "step": 1578860
    },
    {
      "epoch": 2.5838717490491807,
      "grad_norm": 0.3258126378059387,
      "learning_rate": 4.864097309620923e-06,
      "loss": 0.0138,
      "step": 1578880
    },
    {
      "epoch": 2.583904479487834,
      "grad_norm": 0.22161619365215302,
      "learning_rate": 4.864031417407405e-06,
      "loss": 0.014,
      "step": 1578900
    },
    {
      "epoch": 2.5839372099264875,
      "grad_norm": 0.7812865972518921,
      "learning_rate": 4.863965525193888e-06,
      "loss": 0.0155,
      "step": 1578920
    },
    {
      "epoch": 2.583969940365141,
      "grad_norm": 0.4097100496292114,
      "learning_rate": 4.863899632980371e-06,
      "loss": 0.0118,
      "step": 1578940
    },
    {
      "epoch": 2.584002670803794,
      "grad_norm": 0.2702907919883728,
      "learning_rate": 4.863833740766854e-06,
      "loss": 0.0154,
      "step": 1578960
    },
    {
      "epoch": 2.5840354012424473,
      "grad_norm": 0.10740238428115845,
      "learning_rate": 4.863767848553337e-06,
      "loss": 0.0154,
      "step": 1578980
    },
    {
      "epoch": 2.584068131681101,
      "grad_norm": 0.14818139374256134,
      "learning_rate": 4.86370195633982e-06,
      "loss": 0.0153,
      "step": 1579000
    },
    {
      "epoch": 2.584100862119754,
      "grad_norm": 0.9953807592391968,
      "learning_rate": 4.863636064126303e-06,
      "loss": 0.0144,
      "step": 1579020
    },
    {
      "epoch": 2.5841335925584072,
      "grad_norm": 0.4575498700141907,
      "learning_rate": 4.863570171912785e-06,
      "loss": 0.0223,
      "step": 1579040
    },
    {
      "epoch": 2.584166322997061,
      "grad_norm": 0.19529889523983002,
      "learning_rate": 4.863504279699268e-06,
      "loss": 0.011,
      "step": 1579060
    },
    {
      "epoch": 2.584199053435714,
      "grad_norm": 0.5428846478462219,
      "learning_rate": 4.863438387485751e-06,
      "loss": 0.0179,
      "step": 1579080
    },
    {
      "epoch": 2.5842317838743676,
      "grad_norm": 0.3004503846168518,
      "learning_rate": 4.863372495272234e-06,
      "loss": 0.0148,
      "step": 1579100
    },
    {
      "epoch": 2.5842645143130207,
      "grad_norm": 0.351581335067749,
      "learning_rate": 4.863306603058716e-06,
      "loss": 0.0132,
      "step": 1579120
    },
    {
      "epoch": 2.5842972447516743,
      "grad_norm": 0.24856321513652802,
      "learning_rate": 4.8632407108452e-06,
      "loss": 0.0126,
      "step": 1579140
    },
    {
      "epoch": 2.5843299751903275,
      "grad_norm": 0.3260759711265564,
      "learning_rate": 4.863174818631683e-06,
      "loss": 0.0154,
      "step": 1579160
    },
    {
      "epoch": 2.5843627056289806,
      "grad_norm": 0.15835195779800415,
      "learning_rate": 4.863108926418165e-06,
      "loss": 0.0113,
      "step": 1579180
    },
    {
      "epoch": 2.584395436067634,
      "grad_norm": 0.291139155626297,
      "learning_rate": 4.863043034204649e-06,
      "loss": 0.0126,
      "step": 1579200
    },
    {
      "epoch": 2.5844281665062874,
      "grad_norm": 0.20523065328598022,
      "learning_rate": 4.862977141991132e-06,
      "loss": 0.0164,
      "step": 1579220
    },
    {
      "epoch": 2.584460896944941,
      "grad_norm": 0.2050275057554245,
      "learning_rate": 4.8629112497776145e-06,
      "loss": 0.0181,
      "step": 1579240
    },
    {
      "epoch": 2.584493627383594,
      "grad_norm": 0.22885850071907043,
      "learning_rate": 4.862845357564097e-06,
      "loss": 0.02,
      "step": 1579260
    },
    {
      "epoch": 2.5845263578222477,
      "grad_norm": 0.13240903615951538,
      "learning_rate": 4.86277946535058e-06,
      "loss": 0.0135,
      "step": 1579280
    },
    {
      "epoch": 2.584559088260901,
      "grad_norm": 0.47716280817985535,
      "learning_rate": 4.862713573137063e-06,
      "loss": 0.0158,
      "step": 1579300
    },
    {
      "epoch": 2.584591818699554,
      "grad_norm": 0.7431261539459229,
      "learning_rate": 4.8626476809235454e-06,
      "loss": 0.0164,
      "step": 1579320
    },
    {
      "epoch": 2.5846245491382076,
      "grad_norm": 0.4280948042869568,
      "learning_rate": 4.862581788710028e-06,
      "loss": 0.0119,
      "step": 1579340
    },
    {
      "epoch": 2.5846572795768608,
      "grad_norm": 0.1113240122795105,
      "learning_rate": 4.862515896496512e-06,
      "loss": 0.0136,
      "step": 1579360
    },
    {
      "epoch": 2.5846900100155143,
      "grad_norm": 0.29768839478492737,
      "learning_rate": 4.8624500042829945e-06,
      "loss": 0.0119,
      "step": 1579380
    },
    {
      "epoch": 2.5847227404541675,
      "grad_norm": 0.3101627230644226,
      "learning_rate": 4.862384112069477e-06,
      "loss": 0.0109,
      "step": 1579400
    },
    {
      "epoch": 2.584755470892821,
      "grad_norm": 0.46304261684417725,
      "learning_rate": 4.86231821985596e-06,
      "loss": 0.0142,
      "step": 1579420
    },
    {
      "epoch": 2.5847882013314742,
      "grad_norm": 0.26242750883102417,
      "learning_rate": 4.862252327642443e-06,
      "loss": 0.0119,
      "step": 1579440
    },
    {
      "epoch": 2.5848209317701274,
      "grad_norm": 0.4630430042743683,
      "learning_rate": 4.8621864354289254e-06,
      "loss": 0.0154,
      "step": 1579460
    },
    {
      "epoch": 2.584853662208781,
      "grad_norm": 0.2723519802093506,
      "learning_rate": 4.862120543215408e-06,
      "loss": 0.0134,
      "step": 1579480
    },
    {
      "epoch": 2.584886392647434,
      "grad_norm": 0.4781532287597656,
      "learning_rate": 4.862054651001892e-06,
      "loss": 0.0185,
      "step": 1579500
    },
    {
      "epoch": 2.5849191230860877,
      "grad_norm": 0.8419873714447021,
      "learning_rate": 4.8619887587883745e-06,
      "loss": 0.0153,
      "step": 1579520
    },
    {
      "epoch": 2.584951853524741,
      "grad_norm": 0.43895643949508667,
      "learning_rate": 4.861922866574857e-06,
      "loss": 0.0146,
      "step": 1579540
    },
    {
      "epoch": 2.5849845839633945,
      "grad_norm": 0.6219248175621033,
      "learning_rate": 4.86185697436134e-06,
      "loss": 0.0142,
      "step": 1579560
    },
    {
      "epoch": 2.5850173144020476,
      "grad_norm": 0.45238712430000305,
      "learning_rate": 4.861791082147823e-06,
      "loss": 0.0126,
      "step": 1579580
    },
    {
      "epoch": 2.585050044840701,
      "grad_norm": 0.8792968392372131,
      "learning_rate": 4.861725189934306e-06,
      "loss": 0.0183,
      "step": 1579600
    },
    {
      "epoch": 2.5850827752793544,
      "grad_norm": 0.36994609236717224,
      "learning_rate": 4.861659297720789e-06,
      "loss": 0.019,
      "step": 1579620
    },
    {
      "epoch": 2.5851155057180075,
      "grad_norm": 0.08194369077682495,
      "learning_rate": 4.861593405507272e-06,
      "loss": 0.0181,
      "step": 1579640
    },
    {
      "epoch": 2.585148236156661,
      "grad_norm": 0.3078603148460388,
      "learning_rate": 4.8615275132937545e-06,
      "loss": 0.0151,
      "step": 1579660
    },
    {
      "epoch": 2.5851809665953143,
      "grad_norm": 0.34941720962524414,
      "learning_rate": 4.861461621080237e-06,
      "loss": 0.0083,
      "step": 1579680
    },
    {
      "epoch": 2.585213697033968,
      "grad_norm": 0.5457094311714172,
      "learning_rate": 4.86139572886672e-06,
      "loss": 0.0173,
      "step": 1579700
    },
    {
      "epoch": 2.585246427472621,
      "grad_norm": 0.2192211151123047,
      "learning_rate": 4.861329836653203e-06,
      "loss": 0.0144,
      "step": 1579720
    },
    {
      "epoch": 2.585279157911274,
      "grad_norm": 0.3768516778945923,
      "learning_rate": 4.8612639444396855e-06,
      "loss": 0.0097,
      "step": 1579740
    },
    {
      "epoch": 2.5853118883499278,
      "grad_norm": 0.7582180500030518,
      "learning_rate": 4.861198052226169e-06,
      "loss": 0.0131,
      "step": 1579760
    },
    {
      "epoch": 2.585344618788581,
      "grad_norm": 0.7871571779251099,
      "learning_rate": 4.861132160012652e-06,
      "loss": 0.0136,
      "step": 1579780
    },
    {
      "epoch": 2.5853773492272345,
      "grad_norm": 0.06595724821090698,
      "learning_rate": 4.8610662677991346e-06,
      "loss": 0.012,
      "step": 1579800
    },
    {
      "epoch": 2.5854100796658877,
      "grad_norm": 0.5745722651481628,
      "learning_rate": 4.861000375585617e-06,
      "loss": 0.0117,
      "step": 1579820
    },
    {
      "epoch": 2.5854428101045412,
      "grad_norm": 0.5632173418998718,
      "learning_rate": 4.8609344833721e-06,
      "loss": 0.0107,
      "step": 1579840
    },
    {
      "epoch": 2.5854755405431944,
      "grad_norm": 0.07558278739452362,
      "learning_rate": 4.860868591158584e-06,
      "loss": 0.0173,
      "step": 1579860
    },
    {
      "epoch": 2.5855082709818475,
      "grad_norm": 0.19665630161762238,
      "learning_rate": 4.860802698945066e-06,
      "loss": 0.0174,
      "step": 1579880
    },
    {
      "epoch": 2.585541001420501,
      "grad_norm": 0.4185744822025299,
      "learning_rate": 4.860736806731549e-06,
      "loss": 0.0125,
      "step": 1579900
    },
    {
      "epoch": 2.5855737318591543,
      "grad_norm": 0.7243179082870483,
      "learning_rate": 4.860670914518032e-06,
      "loss": 0.0174,
      "step": 1579920
    },
    {
      "epoch": 2.5856064622978074,
      "grad_norm": 0.5384514331817627,
      "learning_rate": 4.860605022304515e-06,
      "loss": 0.0118,
      "step": 1579940
    },
    {
      "epoch": 2.585639192736461,
      "grad_norm": 0.4510400891304016,
      "learning_rate": 4.860539130090997e-06,
      "loss": 0.0108,
      "step": 1579960
    },
    {
      "epoch": 2.5856719231751146,
      "grad_norm": 0.9957419037818909,
      "learning_rate": 4.86047323787748e-06,
      "loss": 0.02,
      "step": 1579980
    },
    {
      "epoch": 2.585704653613768,
      "grad_norm": 0.4742821455001831,
      "learning_rate": 4.860407345663964e-06,
      "loss": 0.0191,
      "step": 1580000
    },
    {
      "epoch": 2.585737384052421,
      "grad_norm": 0.3314512073993683,
      "learning_rate": 4.860341453450446e-06,
      "loss": 0.0144,
      "step": 1580020
    },
    {
      "epoch": 2.5857701144910745,
      "grad_norm": 0.46952059864997864,
      "learning_rate": 4.860275561236929e-06,
      "loss": 0.0114,
      "step": 1580040
    },
    {
      "epoch": 2.5858028449297277,
      "grad_norm": 0.09853705018758774,
      "learning_rate": 4.860209669023412e-06,
      "loss": 0.0084,
      "step": 1580060
    },
    {
      "epoch": 2.585835575368381,
      "grad_norm": 0.21014147996902466,
      "learning_rate": 4.860143776809895e-06,
      "loss": 0.0181,
      "step": 1580080
    },
    {
      "epoch": 2.5858683058070344,
      "grad_norm": 0.24885915219783783,
      "learning_rate": 4.860077884596377e-06,
      "loss": 0.0113,
      "step": 1580100
    },
    {
      "epoch": 2.585901036245688,
      "grad_norm": 0.39266616106033325,
      "learning_rate": 4.86001199238286e-06,
      "loss": 0.0148,
      "step": 1580120
    },
    {
      "epoch": 2.585933766684341,
      "grad_norm": 0.1420043706893921,
      "learning_rate": 4.859946100169343e-06,
      "loss": 0.0095,
      "step": 1580140
    },
    {
      "epoch": 2.5859664971229943,
      "grad_norm": 0.1321740299463272,
      "learning_rate": 4.859880207955826e-06,
      "loss": 0.0123,
      "step": 1580160
    },
    {
      "epoch": 2.585999227561648,
      "grad_norm": 0.8114405274391174,
      "learning_rate": 4.859814315742309e-06,
      "loss": 0.0148,
      "step": 1580180
    },
    {
      "epoch": 2.586031958000301,
      "grad_norm": 1.0934560298919678,
      "learning_rate": 4.859748423528792e-06,
      "loss": 0.0171,
      "step": 1580200
    },
    {
      "epoch": 2.586064688438954,
      "grad_norm": 0.40668442845344543,
      "learning_rate": 4.8596825313152755e-06,
      "loss": 0.0126,
      "step": 1580220
    },
    {
      "epoch": 2.586097418877608,
      "grad_norm": 0.36632731556892395,
      "learning_rate": 4.859616639101758e-06,
      "loss": 0.0132,
      "step": 1580240
    },
    {
      "epoch": 2.5861301493162614,
      "grad_norm": 0.2909139394760132,
      "learning_rate": 4.859550746888241e-06,
      "loss": 0.0151,
      "step": 1580260
    },
    {
      "epoch": 2.5861628797549145,
      "grad_norm": 0.6737173795700073,
      "learning_rate": 4.859484854674724e-06,
      "loss": 0.0117,
      "step": 1580280
    },
    {
      "epoch": 2.5861956101935677,
      "grad_norm": 0.41565176844596863,
      "learning_rate": 4.8594189624612064e-06,
      "loss": 0.0114,
      "step": 1580300
    },
    {
      "epoch": 2.5862283406322213,
      "grad_norm": 0.36045241355895996,
      "learning_rate": 4.859353070247689e-06,
      "loss": 0.0105,
      "step": 1580320
    },
    {
      "epoch": 2.5862610710708744,
      "grad_norm": 0.19237276911735535,
      "learning_rate": 4.859287178034172e-06,
      "loss": 0.0117,
      "step": 1580340
    },
    {
      "epoch": 2.5862938015095276,
      "grad_norm": 0.497869074344635,
      "learning_rate": 4.859221285820655e-06,
      "loss": 0.014,
      "step": 1580360
    },
    {
      "epoch": 2.586326531948181,
      "grad_norm": 0.8192079067230225,
      "learning_rate": 4.859155393607138e-06,
      "loss": 0.0129,
      "step": 1580380
    },
    {
      "epoch": 2.586359262386835,
      "grad_norm": 0.4152428209781647,
      "learning_rate": 4.859089501393621e-06,
      "loss": 0.0139,
      "step": 1580400
    },
    {
      "epoch": 2.586391992825488,
      "grad_norm": 0.5381928086280823,
      "learning_rate": 4.859023609180104e-06,
      "loss": 0.0134,
      "step": 1580420
    },
    {
      "epoch": 2.586424723264141,
      "grad_norm": 0.10206066071987152,
      "learning_rate": 4.8589577169665865e-06,
      "loss": 0.0103,
      "step": 1580440
    },
    {
      "epoch": 2.5864574537027947,
      "grad_norm": 0.02954500913619995,
      "learning_rate": 4.858891824753069e-06,
      "loss": 0.0109,
      "step": 1580460
    },
    {
      "epoch": 2.586490184141448,
      "grad_norm": 0.24932245910167694,
      "learning_rate": 4.858825932539552e-06,
      "loss": 0.0072,
      "step": 1580480
    },
    {
      "epoch": 2.586522914580101,
      "grad_norm": 0.3521256446838379,
      "learning_rate": 4.858760040326035e-06,
      "loss": 0.012,
      "step": 1580500
    },
    {
      "epoch": 2.5865556450187546,
      "grad_norm": 0.3487309515476227,
      "learning_rate": 4.8586941481125174e-06,
      "loss": 0.0146,
      "step": 1580520
    },
    {
      "epoch": 2.5865883754574077,
      "grad_norm": 0.18260568380355835,
      "learning_rate": 4.858628255899e-06,
      "loss": 0.0128,
      "step": 1580540
    },
    {
      "epoch": 2.5866211058960613,
      "grad_norm": 0.22936855256557465,
      "learning_rate": 4.858562363685484e-06,
      "loss": 0.0143,
      "step": 1580560
    },
    {
      "epoch": 2.5866538363347145,
      "grad_norm": 0.6940212845802307,
      "learning_rate": 4.8584964714719665e-06,
      "loss": 0.0168,
      "step": 1580580
    },
    {
      "epoch": 2.586686566773368,
      "grad_norm": 0.3610331416130066,
      "learning_rate": 4.858430579258449e-06,
      "loss": 0.0129,
      "step": 1580600
    },
    {
      "epoch": 2.586719297212021,
      "grad_norm": 0.25389304757118225,
      "learning_rate": 4.858364687044933e-06,
      "loss": 0.0143,
      "step": 1580620
    },
    {
      "epoch": 2.5867520276506744,
      "grad_norm": 0.4976225197315216,
      "learning_rate": 4.8582987948314156e-06,
      "loss": 0.0134,
      "step": 1580640
    },
    {
      "epoch": 2.586784758089328,
      "grad_norm": 0.23518159985542297,
      "learning_rate": 4.858232902617898e-06,
      "loss": 0.0115,
      "step": 1580660
    },
    {
      "epoch": 2.586817488527981,
      "grad_norm": 0.10704509168863297,
      "learning_rate": 4.858167010404381e-06,
      "loss": 0.0102,
      "step": 1580680
    },
    {
      "epoch": 2.5868502189666347,
      "grad_norm": 0.35822612047195435,
      "learning_rate": 4.858101118190864e-06,
      "loss": 0.016,
      "step": 1580700
    },
    {
      "epoch": 2.586882949405288,
      "grad_norm": 0.06130698695778847,
      "learning_rate": 4.8580352259773465e-06,
      "loss": 0.0098,
      "step": 1580720
    },
    {
      "epoch": 2.5869156798439414,
      "grad_norm": 0.4412686824798584,
      "learning_rate": 4.857969333763829e-06,
      "loss": 0.0137,
      "step": 1580740
    },
    {
      "epoch": 2.5869484102825946,
      "grad_norm": 0.11555662006139755,
      "learning_rate": 4.857903441550312e-06,
      "loss": 0.0157,
      "step": 1580760
    },
    {
      "epoch": 2.5869811407212477,
      "grad_norm": 0.26779672503471375,
      "learning_rate": 4.857837549336796e-06,
      "loss": 0.0118,
      "step": 1580780
    },
    {
      "epoch": 2.5870138711599013,
      "grad_norm": 0.4614333212375641,
      "learning_rate": 4.857771657123278e-06,
      "loss": 0.0146,
      "step": 1580800
    },
    {
      "epoch": 2.5870466015985545,
      "grad_norm": 0.5737125277519226,
      "learning_rate": 4.857705764909761e-06,
      "loss": 0.0154,
      "step": 1580820
    },
    {
      "epoch": 2.587079332037208,
      "grad_norm": 0.57228684425354,
      "learning_rate": 4.857639872696244e-06,
      "loss": 0.0167,
      "step": 1580840
    },
    {
      "epoch": 2.5871120624758612,
      "grad_norm": 0.30314627289772034,
      "learning_rate": 4.8575739804827265e-06,
      "loss": 0.018,
      "step": 1580860
    },
    {
      "epoch": 2.587144792914515,
      "grad_norm": 0.15194456279277802,
      "learning_rate": 4.857508088269209e-06,
      "loss": 0.0152,
      "step": 1580880
    },
    {
      "epoch": 2.587177523353168,
      "grad_norm": 0.4267410635948181,
      "learning_rate": 4.857442196055692e-06,
      "loss": 0.0155,
      "step": 1580900
    },
    {
      "epoch": 2.587210253791821,
      "grad_norm": 0.4964713156223297,
      "learning_rate": 4.857376303842176e-06,
      "loss": 0.012,
      "step": 1580920
    },
    {
      "epoch": 2.5872429842304747,
      "grad_norm": 0.34658804535865784,
      "learning_rate": 4.857310411628658e-06,
      "loss": 0.0199,
      "step": 1580940
    },
    {
      "epoch": 2.587275714669128,
      "grad_norm": 0.290967732667923,
      "learning_rate": 4.857244519415141e-06,
      "loss": 0.0162,
      "step": 1580960
    },
    {
      "epoch": 2.5873084451077815,
      "grad_norm": 0.6498357057571411,
      "learning_rate": 4.857178627201624e-06,
      "loss": 0.0142,
      "step": 1580980
    },
    {
      "epoch": 2.5873411755464346,
      "grad_norm": 0.2418062835931778,
      "learning_rate": 4.8571127349881066e-06,
      "loss": 0.0108,
      "step": 1581000
    },
    {
      "epoch": 2.587373905985088,
      "grad_norm": 0.489040732383728,
      "learning_rate": 4.85704684277459e-06,
      "loss": 0.0124,
      "step": 1581020
    },
    {
      "epoch": 2.5874066364237414,
      "grad_norm": 0.3366282284259796,
      "learning_rate": 4.856980950561073e-06,
      "loss": 0.0128,
      "step": 1581040
    },
    {
      "epoch": 2.5874393668623945,
      "grad_norm": 0.5822028517723083,
      "learning_rate": 4.856915058347556e-06,
      "loss": 0.0201,
      "step": 1581060
    },
    {
      "epoch": 2.587472097301048,
      "grad_norm": 0.8200406432151794,
      "learning_rate": 4.856849166134038e-06,
      "loss": 0.0243,
      "step": 1581080
    },
    {
      "epoch": 2.5875048277397013,
      "grad_norm": 0.6644584536552429,
      "learning_rate": 4.856783273920521e-06,
      "loss": 0.019,
      "step": 1581100
    },
    {
      "epoch": 2.587537558178355,
      "grad_norm": 0.1859518140554428,
      "learning_rate": 4.856717381707004e-06,
      "loss": 0.0167,
      "step": 1581120
    },
    {
      "epoch": 2.587570288617008,
      "grad_norm": 0.3700707256793976,
      "learning_rate": 4.856651489493487e-06,
      "loss": 0.0083,
      "step": 1581140
    },
    {
      "epoch": 2.5876030190556616,
      "grad_norm": 0.57464599609375,
      "learning_rate": 4.856585597279969e-06,
      "loss": 0.0166,
      "step": 1581160
    },
    {
      "epoch": 2.5876357494943147,
      "grad_norm": 0.33865994215011597,
      "learning_rate": 4.856519705066453e-06,
      "loss": 0.0182,
      "step": 1581180
    },
    {
      "epoch": 2.587668479932968,
      "grad_norm": 0.19329208135604858,
      "learning_rate": 4.856453812852936e-06,
      "loss": 0.0197,
      "step": 1581200
    },
    {
      "epoch": 2.5877012103716215,
      "grad_norm": 0.2798459231853485,
      "learning_rate": 4.856387920639418e-06,
      "loss": 0.011,
      "step": 1581220
    },
    {
      "epoch": 2.5877339408102746,
      "grad_norm": 0.5137783288955688,
      "learning_rate": 4.856322028425901e-06,
      "loss": 0.0157,
      "step": 1581240
    },
    {
      "epoch": 2.5877666712489282,
      "grad_norm": 0.33598026633262634,
      "learning_rate": 4.856256136212385e-06,
      "loss": 0.0189,
      "step": 1581260
    },
    {
      "epoch": 2.5877994016875814,
      "grad_norm": 0.11399280279874802,
      "learning_rate": 4.8561902439988675e-06,
      "loss": 0.0093,
      "step": 1581280
    },
    {
      "epoch": 2.587832132126235,
      "grad_norm": 0.19240345060825348,
      "learning_rate": 4.85612435178535e-06,
      "loss": 0.0129,
      "step": 1581300
    },
    {
      "epoch": 2.587864862564888,
      "grad_norm": 0.19376426935195923,
      "learning_rate": 4.856058459571833e-06,
      "loss": 0.0138,
      "step": 1581320
    },
    {
      "epoch": 2.5878975930035413,
      "grad_norm": 0.17432236671447754,
      "learning_rate": 4.855992567358316e-06,
      "loss": 0.0117,
      "step": 1581340
    },
    {
      "epoch": 2.587930323442195,
      "grad_norm": 0.3865301311016083,
      "learning_rate": 4.8559266751447984e-06,
      "loss": 0.0147,
      "step": 1581360
    },
    {
      "epoch": 2.587963053880848,
      "grad_norm": 0.08943735063076019,
      "learning_rate": 4.855860782931281e-06,
      "loss": 0.016,
      "step": 1581380
    },
    {
      "epoch": 2.5879957843195016,
      "grad_norm": 0.9101626873016357,
      "learning_rate": 4.855794890717764e-06,
      "loss": 0.017,
      "step": 1581400
    },
    {
      "epoch": 2.5880285147581548,
      "grad_norm": 0.2237657904624939,
      "learning_rate": 4.8557289985042475e-06,
      "loss": 0.0176,
      "step": 1581420
    },
    {
      "epoch": 2.5880612451968084,
      "grad_norm": 0.2687438726425171,
      "learning_rate": 4.85566310629073e-06,
      "loss": 0.0095,
      "step": 1581440
    },
    {
      "epoch": 2.5880939756354615,
      "grad_norm": 0.5619410276412964,
      "learning_rate": 4.855597214077213e-06,
      "loss": 0.0135,
      "step": 1581460
    },
    {
      "epoch": 2.5881267060741147,
      "grad_norm": 0.6094204783439636,
      "learning_rate": 4.855531321863696e-06,
      "loss": 0.011,
      "step": 1581480
    },
    {
      "epoch": 2.5881594365127683,
      "grad_norm": 0.30327221751213074,
      "learning_rate": 4.8554654296501785e-06,
      "loss": 0.0162,
      "step": 1581500
    },
    {
      "epoch": 2.5881921669514214,
      "grad_norm": 0.2704499065876007,
      "learning_rate": 4.855399537436661e-06,
      "loss": 0.0158,
      "step": 1581520
    },
    {
      "epoch": 2.5882248973900746,
      "grad_norm": 0.29283639788627625,
      "learning_rate": 4.855333645223144e-06,
      "loss": 0.0204,
      "step": 1581540
    },
    {
      "epoch": 2.588257627828728,
      "grad_norm": 0.3407946228981018,
      "learning_rate": 4.855267753009627e-06,
      "loss": 0.0123,
      "step": 1581560
    },
    {
      "epoch": 2.5882903582673817,
      "grad_norm": 0.3377722501754761,
      "learning_rate": 4.85520186079611e-06,
      "loss": 0.0122,
      "step": 1581580
    },
    {
      "epoch": 2.588323088706035,
      "grad_norm": 0.3858155310153961,
      "learning_rate": 4.855135968582593e-06,
      "loss": 0.0155,
      "step": 1581600
    },
    {
      "epoch": 2.588355819144688,
      "grad_norm": 0.304117888212204,
      "learning_rate": 4.855070076369076e-06,
      "loss": 0.0131,
      "step": 1581620
    },
    {
      "epoch": 2.5883885495833416,
      "grad_norm": 0.4583035111427307,
      "learning_rate": 4.855004184155559e-06,
      "loss": 0.0171,
      "step": 1581640
    },
    {
      "epoch": 2.588421280021995,
      "grad_norm": 0.25466957688331604,
      "learning_rate": 4.854938291942042e-06,
      "loss": 0.0143,
      "step": 1581660
    },
    {
      "epoch": 2.588454010460648,
      "grad_norm": 0.15686777234077454,
      "learning_rate": 4.854872399728525e-06,
      "loss": 0.0067,
      "step": 1581680
    },
    {
      "epoch": 2.5884867408993015,
      "grad_norm": 0.8541088104248047,
      "learning_rate": 4.8548065075150075e-06,
      "loss": 0.0126,
      "step": 1581700
    },
    {
      "epoch": 2.588519471337955,
      "grad_norm": 0.26200857758522034,
      "learning_rate": 4.85474061530149e-06,
      "loss": 0.016,
      "step": 1581720
    },
    {
      "epoch": 2.5885522017766083,
      "grad_norm": 0.6668660640716553,
      "learning_rate": 4.854674723087973e-06,
      "loss": 0.0108,
      "step": 1581740
    },
    {
      "epoch": 2.5885849322152614,
      "grad_norm": 0.15915122628211975,
      "learning_rate": 4.854608830874456e-06,
      "loss": 0.0166,
      "step": 1581760
    },
    {
      "epoch": 2.588617662653915,
      "grad_norm": 0.1310383379459381,
      "learning_rate": 4.8545429386609385e-06,
      "loss": 0.0164,
      "step": 1581780
    },
    {
      "epoch": 2.588650393092568,
      "grad_norm": 1.8922888040542603,
      "learning_rate": 4.854477046447422e-06,
      "loss": 0.0132,
      "step": 1581800
    },
    {
      "epoch": 2.5886831235312213,
      "grad_norm": 0.24206630885601044,
      "learning_rate": 4.854411154233905e-06,
      "loss": 0.0132,
      "step": 1581820
    },
    {
      "epoch": 2.588715853969875,
      "grad_norm": 0.4985756576061249,
      "learning_rate": 4.8543452620203876e-06,
      "loss": 0.0276,
      "step": 1581840
    },
    {
      "epoch": 2.5887485844085285,
      "grad_norm": 0.24101921916007996,
      "learning_rate": 4.85427936980687e-06,
      "loss": 0.0129,
      "step": 1581860
    },
    {
      "epoch": 2.5887813148471817,
      "grad_norm": 0.21623744070529938,
      "learning_rate": 4.854213477593353e-06,
      "loss": 0.0149,
      "step": 1581880
    },
    {
      "epoch": 2.588814045285835,
      "grad_norm": 0.26852741837501526,
      "learning_rate": 4.854147585379836e-06,
      "loss": 0.0098,
      "step": 1581900
    },
    {
      "epoch": 2.5888467757244884,
      "grad_norm": 0.5935810208320618,
      "learning_rate": 4.8540816931663185e-06,
      "loss": 0.0112,
      "step": 1581920
    },
    {
      "epoch": 2.5888795061631416,
      "grad_norm": 0.3243984878063202,
      "learning_rate": 4.854015800952801e-06,
      "loss": 0.0137,
      "step": 1581940
    },
    {
      "epoch": 2.5889122366017947,
      "grad_norm": 0.44723746180534363,
      "learning_rate": 4.853949908739284e-06,
      "loss": 0.0091,
      "step": 1581960
    },
    {
      "epoch": 2.5889449670404483,
      "grad_norm": 0.2767946720123291,
      "learning_rate": 4.853884016525768e-06,
      "loss": 0.009,
      "step": 1581980
    },
    {
      "epoch": 2.588977697479102,
      "grad_norm": 0.09256043285131454,
      "learning_rate": 4.85381812431225e-06,
      "loss": 0.0119,
      "step": 1582000
    },
    {
      "epoch": 2.589010427917755,
      "grad_norm": 0.35800665616989136,
      "learning_rate": 4.853752232098733e-06,
      "loss": 0.0136,
      "step": 1582020
    },
    {
      "epoch": 2.589043158356408,
      "grad_norm": 0.24326959252357483,
      "learning_rate": 4.853686339885217e-06,
      "loss": 0.0215,
      "step": 1582040
    },
    {
      "epoch": 2.589075888795062,
      "grad_norm": 0.39863574504852295,
      "learning_rate": 4.853620447671699e-06,
      "loss": 0.0138,
      "step": 1582060
    },
    {
      "epoch": 2.589108619233715,
      "grad_norm": 0.08704604208469391,
      "learning_rate": 4.853554555458182e-06,
      "loss": 0.0126,
      "step": 1582080
    },
    {
      "epoch": 2.589141349672368,
      "grad_norm": 0.5365399718284607,
      "learning_rate": 4.853488663244665e-06,
      "loss": 0.0161,
      "step": 1582100
    },
    {
      "epoch": 2.5891740801110217,
      "grad_norm": 0.4186520278453827,
      "learning_rate": 4.853422771031148e-06,
      "loss": 0.0128,
      "step": 1582120
    },
    {
      "epoch": 2.589206810549675,
      "grad_norm": 0.15320958197116852,
      "learning_rate": 4.85335687881763e-06,
      "loss": 0.0113,
      "step": 1582140
    },
    {
      "epoch": 2.5892395409883284,
      "grad_norm": 0.9177258610725403,
      "learning_rate": 4.853290986604113e-06,
      "loss": 0.0095,
      "step": 1582160
    },
    {
      "epoch": 2.5892722714269816,
      "grad_norm": 0.22500604391098022,
      "learning_rate": 4.853225094390596e-06,
      "loss": 0.0156,
      "step": 1582180
    },
    {
      "epoch": 2.589305001865635,
      "grad_norm": 0.13677650690078735,
      "learning_rate": 4.8531592021770794e-06,
      "loss": 0.0163,
      "step": 1582200
    },
    {
      "epoch": 2.5893377323042883,
      "grad_norm": 0.1399189680814743,
      "learning_rate": 4.853093309963562e-06,
      "loss": 0.015,
      "step": 1582220
    },
    {
      "epoch": 2.5893704627429415,
      "grad_norm": 0.31342488527297974,
      "learning_rate": 4.853027417750045e-06,
      "loss": 0.0153,
      "step": 1582240
    },
    {
      "epoch": 2.589403193181595,
      "grad_norm": 0.2577067017555237,
      "learning_rate": 4.852961525536528e-06,
      "loss": 0.0167,
      "step": 1582260
    },
    {
      "epoch": 2.589435923620248,
      "grad_norm": 0.26290443539619446,
      "learning_rate": 4.85289563332301e-06,
      "loss": 0.0097,
      "step": 1582280
    },
    {
      "epoch": 2.589468654058902,
      "grad_norm": 0.48676472902297974,
      "learning_rate": 4.852829741109493e-06,
      "loss": 0.0144,
      "step": 1582300
    },
    {
      "epoch": 2.589501384497555,
      "grad_norm": 0.2615966498851776,
      "learning_rate": 4.852763848895977e-06,
      "loss": 0.0259,
      "step": 1582320
    },
    {
      "epoch": 2.5895341149362086,
      "grad_norm": 0.2815887928009033,
      "learning_rate": 4.8526979566824595e-06,
      "loss": 0.0107,
      "step": 1582340
    },
    {
      "epoch": 2.5895668453748617,
      "grad_norm": 0.13901564478874207,
      "learning_rate": 4.852632064468942e-06,
      "loss": 0.0155,
      "step": 1582360
    },
    {
      "epoch": 2.589599575813515,
      "grad_norm": 0.26125237345695496,
      "learning_rate": 4.852566172255425e-06,
      "loss": 0.0093,
      "step": 1582380
    },
    {
      "epoch": 2.5896323062521684,
      "grad_norm": 0.5439075231552124,
      "learning_rate": 4.852500280041908e-06,
      "loss": 0.0146,
      "step": 1582400
    },
    {
      "epoch": 2.5896650366908216,
      "grad_norm": 0.6774423122406006,
      "learning_rate": 4.85243438782839e-06,
      "loss": 0.0177,
      "step": 1582420
    },
    {
      "epoch": 2.589697767129475,
      "grad_norm": 0.14259114861488342,
      "learning_rate": 4.852368495614874e-06,
      "loss": 0.0159,
      "step": 1582440
    },
    {
      "epoch": 2.5897304975681283,
      "grad_norm": 0.24234819412231445,
      "learning_rate": 4.852302603401357e-06,
      "loss": 0.0137,
      "step": 1582460
    },
    {
      "epoch": 2.589763228006782,
      "grad_norm": 0.27843210101127625,
      "learning_rate": 4.8522367111878395e-06,
      "loss": 0.0126,
      "step": 1582480
    },
    {
      "epoch": 2.589795958445435,
      "grad_norm": 0.3286783695220947,
      "learning_rate": 4.852170818974322e-06,
      "loss": 0.0155,
      "step": 1582500
    },
    {
      "epoch": 2.5898286888840882,
      "grad_norm": 0.12384865432977676,
      "learning_rate": 4.852104926760805e-06,
      "loss": 0.015,
      "step": 1582520
    },
    {
      "epoch": 2.589861419322742,
      "grad_norm": 0.444880872964859,
      "learning_rate": 4.852039034547288e-06,
      "loss": 0.0178,
      "step": 1582540
    },
    {
      "epoch": 2.589894149761395,
      "grad_norm": 0.6744247674942017,
      "learning_rate": 4.8519731423337704e-06,
      "loss": 0.0108,
      "step": 1582560
    },
    {
      "epoch": 2.5899268802000486,
      "grad_norm": 0.5028791427612305,
      "learning_rate": 4.851907250120253e-06,
      "loss": 0.0121,
      "step": 1582580
    },
    {
      "epoch": 2.5899596106387017,
      "grad_norm": 0.3903053402900696,
      "learning_rate": 4.851841357906737e-06,
      "loss": 0.0175,
      "step": 1582600
    },
    {
      "epoch": 2.5899923410773553,
      "grad_norm": 1.005859136581421,
      "learning_rate": 4.8517754656932195e-06,
      "loss": 0.0149,
      "step": 1582620
    },
    {
      "epoch": 2.5900250715160085,
      "grad_norm": 0.598163902759552,
      "learning_rate": 4.851709573479702e-06,
      "loss": 0.0244,
      "step": 1582640
    },
    {
      "epoch": 2.5900578019546616,
      "grad_norm": 0.44799134135246277,
      "learning_rate": 4.851643681266185e-06,
      "loss": 0.0102,
      "step": 1582660
    },
    {
      "epoch": 2.590090532393315,
      "grad_norm": 0.047841668128967285,
      "learning_rate": 4.8515777890526686e-06,
      "loss": 0.0178,
      "step": 1582680
    },
    {
      "epoch": 2.5901232628319684,
      "grad_norm": 0.32774847745895386,
      "learning_rate": 4.851511896839151e-06,
      "loss": 0.012,
      "step": 1582700
    },
    {
      "epoch": 2.590155993270622,
      "grad_norm": 0.3507767915725708,
      "learning_rate": 4.851446004625634e-06,
      "loss": 0.0184,
      "step": 1582720
    },
    {
      "epoch": 2.590188723709275,
      "grad_norm": 0.44691839814186096,
      "learning_rate": 4.851380112412117e-06,
      "loss": 0.012,
      "step": 1582740
    },
    {
      "epoch": 2.5902214541479287,
      "grad_norm": 0.4106554687023163,
      "learning_rate": 4.8513142201985995e-06,
      "loss": 0.0093,
      "step": 1582760
    },
    {
      "epoch": 2.590254184586582,
      "grad_norm": 0.4883630871772766,
      "learning_rate": 4.851248327985082e-06,
      "loss": 0.0123,
      "step": 1582780
    },
    {
      "epoch": 2.590286915025235,
      "grad_norm": 0.34836670756340027,
      "learning_rate": 4.851182435771565e-06,
      "loss": 0.0129,
      "step": 1582800
    },
    {
      "epoch": 2.5903196454638886,
      "grad_norm": 0.8112602829933167,
      "learning_rate": 4.851116543558049e-06,
      "loss": 0.0173,
      "step": 1582820
    },
    {
      "epoch": 2.5903523759025417,
      "grad_norm": 0.23285835981369019,
      "learning_rate": 4.851050651344531e-06,
      "loss": 0.0122,
      "step": 1582840
    },
    {
      "epoch": 2.5903851063411953,
      "grad_norm": 0.2097940444946289,
      "learning_rate": 4.850984759131014e-06,
      "loss": 0.0189,
      "step": 1582860
    },
    {
      "epoch": 2.5904178367798485,
      "grad_norm": 0.5536577701568604,
      "learning_rate": 4.850918866917497e-06,
      "loss": 0.0089,
      "step": 1582880
    },
    {
      "epoch": 2.590450567218502,
      "grad_norm": 0.24947331845760345,
      "learning_rate": 4.8508529747039796e-06,
      "loss": 0.0156,
      "step": 1582900
    },
    {
      "epoch": 2.5904832976571552,
      "grad_norm": 0.09078369289636612,
      "learning_rate": 4.850787082490462e-06,
      "loss": 0.0129,
      "step": 1582920
    },
    {
      "epoch": 2.5905160280958084,
      "grad_norm": 0.37560155987739563,
      "learning_rate": 4.850721190276945e-06,
      "loss": 0.0159,
      "step": 1582940
    },
    {
      "epoch": 2.590548758534462,
      "grad_norm": 0.2649581730365753,
      "learning_rate": 4.850655298063428e-06,
      "loss": 0.0115,
      "step": 1582960
    },
    {
      "epoch": 2.590581488973115,
      "grad_norm": 1.0529017448425293,
      "learning_rate": 4.8505894058499105e-06,
      "loss": 0.0111,
      "step": 1582980
    },
    {
      "epoch": 2.5906142194117683,
      "grad_norm": 0.4353114664554596,
      "learning_rate": 4.850523513636394e-06,
      "loss": 0.0157,
      "step": 1583000
    },
    {
      "epoch": 2.590646949850422,
      "grad_norm": 0.2697789967060089,
      "learning_rate": 4.850457621422877e-06,
      "loss": 0.0164,
      "step": 1583020
    },
    {
      "epoch": 2.5906796802890755,
      "grad_norm": 0.4729064404964447,
      "learning_rate": 4.85039172920936e-06,
      "loss": 0.0104,
      "step": 1583040
    },
    {
      "epoch": 2.5907124107277286,
      "grad_norm": 0.26040205359458923,
      "learning_rate": 4.850325836995843e-06,
      "loss": 0.0167,
      "step": 1583060
    },
    {
      "epoch": 2.5907451411663818,
      "grad_norm": 0.23750293254852295,
      "learning_rate": 4.850259944782326e-06,
      "loss": 0.0171,
      "step": 1583080
    },
    {
      "epoch": 2.5907778716050354,
      "grad_norm": 0.14007586240768433,
      "learning_rate": 4.850194052568809e-06,
      "loss": 0.02,
      "step": 1583100
    },
    {
      "epoch": 2.5908106020436885,
      "grad_norm": 0.4523620009422302,
      "learning_rate": 4.850128160355291e-06,
      "loss": 0.016,
      "step": 1583120
    },
    {
      "epoch": 2.5908433324823417,
      "grad_norm": 0.24809591472148895,
      "learning_rate": 4.850062268141774e-06,
      "loss": 0.0189,
      "step": 1583140
    },
    {
      "epoch": 2.5908760629209953,
      "grad_norm": 0.2820959687232971,
      "learning_rate": 4.849996375928257e-06,
      "loss": 0.0125,
      "step": 1583160
    },
    {
      "epoch": 2.590908793359649,
      "grad_norm": 0.5669817924499512,
      "learning_rate": 4.84993048371474e-06,
      "loss": 0.0184,
      "step": 1583180
    },
    {
      "epoch": 2.590941523798302,
      "grad_norm": 0.27109044790267944,
      "learning_rate": 4.849864591501222e-06,
      "loss": 0.0133,
      "step": 1583200
    },
    {
      "epoch": 2.590974254236955,
      "grad_norm": 0.15337486565113068,
      "learning_rate": 4.849798699287706e-06,
      "loss": 0.0116,
      "step": 1583220
    },
    {
      "epoch": 2.5910069846756087,
      "grad_norm": 0.6088334918022156,
      "learning_rate": 4.849732807074189e-06,
      "loss": 0.0118,
      "step": 1583240
    },
    {
      "epoch": 2.591039715114262,
      "grad_norm": 0.48316138982772827,
      "learning_rate": 4.849666914860671e-06,
      "loss": 0.0138,
      "step": 1583260
    },
    {
      "epoch": 2.591072445552915,
      "grad_norm": 0.27589768171310425,
      "learning_rate": 4.849601022647154e-06,
      "loss": 0.0114,
      "step": 1583280
    },
    {
      "epoch": 2.5911051759915686,
      "grad_norm": 0.14409375190734863,
      "learning_rate": 4.849535130433637e-06,
      "loss": 0.0136,
      "step": 1583300
    },
    {
      "epoch": 2.5911379064302222,
      "grad_norm": 0.03543964400887489,
      "learning_rate": 4.84946923822012e-06,
      "loss": 0.0104,
      "step": 1583320
    },
    {
      "epoch": 2.5911706368688754,
      "grad_norm": 0.1452084481716156,
      "learning_rate": 4.849403346006602e-06,
      "loss": 0.0114,
      "step": 1583340
    },
    {
      "epoch": 2.5912033673075285,
      "grad_norm": 0.3535889685153961,
      "learning_rate": 4.849337453793085e-06,
      "loss": 0.0184,
      "step": 1583360
    },
    {
      "epoch": 2.591236097746182,
      "grad_norm": 0.1186831146478653,
      "learning_rate": 4.849271561579569e-06,
      "loss": 0.0102,
      "step": 1583380
    },
    {
      "epoch": 2.5912688281848353,
      "grad_norm": 0.1481270045042038,
      "learning_rate": 4.8492056693660514e-06,
      "loss": 0.0088,
      "step": 1583400
    },
    {
      "epoch": 2.5913015586234884,
      "grad_norm": 0.3808923065662384,
      "learning_rate": 4.849139777152534e-06,
      "loss": 0.0108,
      "step": 1583420
    },
    {
      "epoch": 2.591334289062142,
      "grad_norm": 0.5796113610267639,
      "learning_rate": 4.849073884939017e-06,
      "loss": 0.0213,
      "step": 1583440
    },
    {
      "epoch": 2.5913670195007956,
      "grad_norm": 0.6205364465713501,
      "learning_rate": 4.8490079927255005e-06,
      "loss": 0.0132,
      "step": 1583460
    },
    {
      "epoch": 2.5913997499394488,
      "grad_norm": 0.7763132452964783,
      "learning_rate": 4.848942100511983e-06,
      "loss": 0.0144,
      "step": 1583480
    },
    {
      "epoch": 2.591432480378102,
      "grad_norm": 0.5342257022857666,
      "learning_rate": 4.848876208298466e-06,
      "loss": 0.0182,
      "step": 1583500
    },
    {
      "epoch": 2.5914652108167555,
      "grad_norm": 0.19909155368804932,
      "learning_rate": 4.848810316084949e-06,
      "loss": 0.0122,
      "step": 1583520
    },
    {
      "epoch": 2.5914979412554087,
      "grad_norm": 0.1772439032793045,
      "learning_rate": 4.8487444238714315e-06,
      "loss": 0.0173,
      "step": 1583540
    },
    {
      "epoch": 2.591530671694062,
      "grad_norm": 0.6073971390724182,
      "learning_rate": 4.848678531657914e-06,
      "loss": 0.0122,
      "step": 1583560
    },
    {
      "epoch": 2.5915634021327154,
      "grad_norm": 0.31776079535484314,
      "learning_rate": 4.848612639444397e-06,
      "loss": 0.0123,
      "step": 1583580
    },
    {
      "epoch": 2.5915961325713686,
      "grad_norm": 0.5778513550758362,
      "learning_rate": 4.84854674723088e-06,
      "loss": 0.0121,
      "step": 1583600
    },
    {
      "epoch": 2.591628863010022,
      "grad_norm": 0.22836637496948242,
      "learning_rate": 4.848480855017363e-06,
      "loss": 0.0164,
      "step": 1583620
    },
    {
      "epoch": 2.5916615934486753,
      "grad_norm": 0.0937856137752533,
      "learning_rate": 4.848414962803846e-06,
      "loss": 0.0138,
      "step": 1583640
    },
    {
      "epoch": 2.591694323887329,
      "grad_norm": 1.2371851205825806,
      "learning_rate": 4.848349070590329e-06,
      "loss": 0.0159,
      "step": 1583660
    },
    {
      "epoch": 2.591727054325982,
      "grad_norm": 0.7272777557373047,
      "learning_rate": 4.8482831783768115e-06,
      "loss": 0.0158,
      "step": 1583680
    },
    {
      "epoch": 2.591759784764635,
      "grad_norm": 0.5177717208862305,
      "learning_rate": 4.848217286163294e-06,
      "loss": 0.01,
      "step": 1583700
    },
    {
      "epoch": 2.591792515203289,
      "grad_norm": 0.363015353679657,
      "learning_rate": 4.848151393949777e-06,
      "loss": 0.0123,
      "step": 1583720
    },
    {
      "epoch": 2.591825245641942,
      "grad_norm": 0.3012380599975586,
      "learning_rate": 4.8480855017362606e-06,
      "loss": 0.0143,
      "step": 1583740
    },
    {
      "epoch": 2.5918579760805955,
      "grad_norm": 0.6317964792251587,
      "learning_rate": 4.848019609522743e-06,
      "loss": 0.0156,
      "step": 1583760
    },
    {
      "epoch": 2.5918907065192487,
      "grad_norm": 0.4833622872829437,
      "learning_rate": 4.847953717309226e-06,
      "loss": 0.02,
      "step": 1583780
    },
    {
      "epoch": 2.5919234369579023,
      "grad_norm": 0.4269202649593353,
      "learning_rate": 4.847887825095709e-06,
      "loss": 0.0206,
      "step": 1583800
    },
    {
      "epoch": 2.5919561673965554,
      "grad_norm": 0.37341195344924927,
      "learning_rate": 4.8478219328821915e-06,
      "loss": 0.0106,
      "step": 1583820
    },
    {
      "epoch": 2.5919888978352086,
      "grad_norm": 0.6325801610946655,
      "learning_rate": 4.847756040668674e-06,
      "loss": 0.0173,
      "step": 1583840
    },
    {
      "epoch": 2.592021628273862,
      "grad_norm": 0.22800777852535248,
      "learning_rate": 4.847690148455158e-06,
      "loss": 0.0109,
      "step": 1583860
    },
    {
      "epoch": 2.5920543587125153,
      "grad_norm": 0.10157325863838196,
      "learning_rate": 4.847624256241641e-06,
      "loss": 0.0115,
      "step": 1583880
    },
    {
      "epoch": 2.592087089151169,
      "grad_norm": 0.6637526750564575,
      "learning_rate": 4.847558364028123e-06,
      "loss": 0.0136,
      "step": 1583900
    },
    {
      "epoch": 2.592119819589822,
      "grad_norm": 0.10947640240192413,
      "learning_rate": 4.847492471814606e-06,
      "loss": 0.0117,
      "step": 1583920
    },
    {
      "epoch": 2.5921525500284757,
      "grad_norm": 0.07831542193889618,
      "learning_rate": 4.847426579601089e-06,
      "loss": 0.0093,
      "step": 1583940
    },
    {
      "epoch": 2.592185280467129,
      "grad_norm": 0.25132423639297485,
      "learning_rate": 4.8473606873875715e-06,
      "loss": 0.0102,
      "step": 1583960
    },
    {
      "epoch": 2.592218010905782,
      "grad_norm": 0.44179680943489075,
      "learning_rate": 4.847294795174054e-06,
      "loss": 0.0136,
      "step": 1583980
    },
    {
      "epoch": 2.5922507413444356,
      "grad_norm": 0.21781447529792786,
      "learning_rate": 4.847228902960537e-06,
      "loss": 0.0194,
      "step": 1584000
    },
    {
      "epoch": 2.5922834717830887,
      "grad_norm": 0.4358499348163605,
      "learning_rate": 4.847163010747021e-06,
      "loss": 0.0183,
      "step": 1584020
    },
    {
      "epoch": 2.5923162022217423,
      "grad_norm": 0.13892655074596405,
      "learning_rate": 4.847097118533503e-06,
      "loss": 0.0094,
      "step": 1584040
    },
    {
      "epoch": 2.5923489326603955,
      "grad_norm": 0.2639865279197693,
      "learning_rate": 4.847031226319986e-06,
      "loss": 0.017,
      "step": 1584060
    },
    {
      "epoch": 2.592381663099049,
      "grad_norm": 0.2034246325492859,
      "learning_rate": 4.84696533410647e-06,
      "loss": 0.0196,
      "step": 1584080
    },
    {
      "epoch": 2.592414393537702,
      "grad_norm": 0.8971467614173889,
      "learning_rate": 4.846899441892952e-06,
      "loss": 0.0217,
      "step": 1584100
    },
    {
      "epoch": 2.5924471239763553,
      "grad_norm": 0.2707555592060089,
      "learning_rate": 4.846833549679435e-06,
      "loss": 0.0186,
      "step": 1584120
    },
    {
      "epoch": 2.592479854415009,
      "grad_norm": 0.44834253191947937,
      "learning_rate": 4.846767657465918e-06,
      "loss": 0.0142,
      "step": 1584140
    },
    {
      "epoch": 2.592512584853662,
      "grad_norm": 0.611456573009491,
      "learning_rate": 4.846701765252401e-06,
      "loss": 0.0128,
      "step": 1584160
    },
    {
      "epoch": 2.5925453152923157,
      "grad_norm": 0.5056644082069397,
      "learning_rate": 4.846635873038883e-06,
      "loss": 0.0152,
      "step": 1584180
    },
    {
      "epoch": 2.592578045730969,
      "grad_norm": 0.19550150632858276,
      "learning_rate": 4.846569980825366e-06,
      "loss": 0.0112,
      "step": 1584200
    },
    {
      "epoch": 2.5926107761696224,
      "grad_norm": 0.2339393049478531,
      "learning_rate": 4.846504088611849e-06,
      "loss": 0.0211,
      "step": 1584220
    },
    {
      "epoch": 2.5926435066082756,
      "grad_norm": 0.61690354347229,
      "learning_rate": 4.8464381963983324e-06,
      "loss": 0.0127,
      "step": 1584240
    },
    {
      "epoch": 2.5926762370469287,
      "grad_norm": 0.23319993913173676,
      "learning_rate": 4.846372304184815e-06,
      "loss": 0.013,
      "step": 1584260
    },
    {
      "epoch": 2.5927089674855823,
      "grad_norm": 0.2084597945213318,
      "learning_rate": 4.846306411971298e-06,
      "loss": 0.0147,
      "step": 1584280
    },
    {
      "epoch": 2.5927416979242355,
      "grad_norm": 0.15173844993114471,
      "learning_rate": 4.846240519757781e-06,
      "loss": 0.0136,
      "step": 1584300
    },
    {
      "epoch": 2.592774428362889,
      "grad_norm": 0.8906396627426147,
      "learning_rate": 4.846174627544263e-06,
      "loss": 0.0161,
      "step": 1584320
    },
    {
      "epoch": 2.5928071588015422,
      "grad_norm": 0.41500818729400635,
      "learning_rate": 4.846108735330746e-06,
      "loss": 0.0121,
      "step": 1584340
    },
    {
      "epoch": 2.592839889240196,
      "grad_norm": 0.4686849117279053,
      "learning_rate": 4.846042843117229e-06,
      "loss": 0.0168,
      "step": 1584360
    },
    {
      "epoch": 2.592872619678849,
      "grad_norm": 0.17692631483078003,
      "learning_rate": 4.845976950903712e-06,
      "loss": 0.013,
      "step": 1584380
    },
    {
      "epoch": 2.592905350117502,
      "grad_norm": 0.43461793661117554,
      "learning_rate": 4.845911058690194e-06,
      "loss": 0.023,
      "step": 1584400
    },
    {
      "epoch": 2.5929380805561557,
      "grad_norm": 0.11568126827478409,
      "learning_rate": 4.845845166476678e-06,
      "loss": 0.0154,
      "step": 1584420
    },
    {
      "epoch": 2.592970810994809,
      "grad_norm": 0.7501147985458374,
      "learning_rate": 4.845779274263161e-06,
      "loss": 0.0174,
      "step": 1584440
    },
    {
      "epoch": 2.5930035414334625,
      "grad_norm": 0.19537316262722015,
      "learning_rate": 4.845713382049643e-06,
      "loss": 0.0112,
      "step": 1584460
    },
    {
      "epoch": 2.5930362718721156,
      "grad_norm": 0.37183964252471924,
      "learning_rate": 4.845647489836127e-06,
      "loss": 0.0135,
      "step": 1584480
    },
    {
      "epoch": 2.593069002310769,
      "grad_norm": 1.2733125686645508,
      "learning_rate": 4.84558159762261e-06,
      "loss": 0.0259,
      "step": 1584500
    },
    {
      "epoch": 2.5931017327494224,
      "grad_norm": 0.29992401599884033,
      "learning_rate": 4.8455157054090925e-06,
      "loss": 0.0142,
      "step": 1584520
    },
    {
      "epoch": 2.5931344631880755,
      "grad_norm": 0.24583037197589874,
      "learning_rate": 4.845449813195575e-06,
      "loss": 0.0189,
      "step": 1584540
    },
    {
      "epoch": 2.593167193626729,
      "grad_norm": 0.21472644805908203,
      "learning_rate": 4.845383920982058e-06,
      "loss": 0.0123,
      "step": 1584560
    },
    {
      "epoch": 2.5931999240653822,
      "grad_norm": 0.38336071372032166,
      "learning_rate": 4.845318028768541e-06,
      "loss": 0.0124,
      "step": 1584580
    },
    {
      "epoch": 2.5932326545040354,
      "grad_norm": 0.36192482709884644,
      "learning_rate": 4.8452521365550234e-06,
      "loss": 0.0086,
      "step": 1584600
    },
    {
      "epoch": 2.593265384942689,
      "grad_norm": 0.38251444697380066,
      "learning_rate": 4.845186244341506e-06,
      "loss": 0.0128,
      "step": 1584620
    },
    {
      "epoch": 2.5932981153813426,
      "grad_norm": 0.6787700057029724,
      "learning_rate": 4.84512035212799e-06,
      "loss": 0.0119,
      "step": 1584640
    },
    {
      "epoch": 2.5933308458199957,
      "grad_norm": 0.7546865940093994,
      "learning_rate": 4.8450544599144725e-06,
      "loss": 0.0181,
      "step": 1584660
    },
    {
      "epoch": 2.593363576258649,
      "grad_norm": 0.046329256147146225,
      "learning_rate": 4.844988567700955e-06,
      "loss": 0.0158,
      "step": 1584680
    },
    {
      "epoch": 2.5933963066973025,
      "grad_norm": 0.13813631236553192,
      "learning_rate": 4.844922675487438e-06,
      "loss": 0.0119,
      "step": 1584700
    },
    {
      "epoch": 2.5934290371359556,
      "grad_norm": 0.923907458782196,
      "learning_rate": 4.844856783273921e-06,
      "loss": 0.0177,
      "step": 1584720
    },
    {
      "epoch": 2.593461767574609,
      "grad_norm": 0.22021698951721191,
      "learning_rate": 4.8447908910604035e-06,
      "loss": 0.0185,
      "step": 1584740
    },
    {
      "epoch": 2.5934944980132624,
      "grad_norm": 0.32934609055519104,
      "learning_rate": 4.844724998846886e-06,
      "loss": 0.0087,
      "step": 1584760
    },
    {
      "epoch": 2.593527228451916,
      "grad_norm": 1.1308170557022095,
      "learning_rate": 4.84465910663337e-06,
      "loss": 0.0152,
      "step": 1584780
    },
    {
      "epoch": 2.593559958890569,
      "grad_norm": 0.6522926688194275,
      "learning_rate": 4.8445932144198525e-06,
      "loss": 0.0154,
      "step": 1584800
    },
    {
      "epoch": 2.5935926893292223,
      "grad_norm": 0.6066601872444153,
      "learning_rate": 4.844527322206335e-06,
      "loss": 0.0162,
      "step": 1584820
    },
    {
      "epoch": 2.593625419767876,
      "grad_norm": 0.5590306520462036,
      "learning_rate": 4.844461429992818e-06,
      "loss": 0.0117,
      "step": 1584840
    },
    {
      "epoch": 2.593658150206529,
      "grad_norm": 0.5765365362167358,
      "learning_rate": 4.844395537779301e-06,
      "loss": 0.0244,
      "step": 1584860
    },
    {
      "epoch": 2.593690880645182,
      "grad_norm": 0.204604834318161,
      "learning_rate": 4.844329645565784e-06,
      "loss": 0.0088,
      "step": 1584880
    },
    {
      "epoch": 2.5937236110838358,
      "grad_norm": 0.9504161477088928,
      "learning_rate": 4.844263753352267e-06,
      "loss": 0.0208,
      "step": 1584900
    },
    {
      "epoch": 2.5937563415224894,
      "grad_norm": 0.6279975771903992,
      "learning_rate": 4.84419786113875e-06,
      "loss": 0.0113,
      "step": 1584920
    },
    {
      "epoch": 2.5937890719611425,
      "grad_norm": 0.22313694655895233,
      "learning_rate": 4.8441319689252326e-06,
      "loss": 0.0175,
      "step": 1584940
    },
    {
      "epoch": 2.5938218023997957,
      "grad_norm": 0.3633459806442261,
      "learning_rate": 4.844066076711715e-06,
      "loss": 0.0121,
      "step": 1584960
    },
    {
      "epoch": 2.5938545328384492,
      "grad_norm": 0.6345070600509644,
      "learning_rate": 4.844000184498198e-06,
      "loss": 0.0139,
      "step": 1584980
    },
    {
      "epoch": 2.5938872632771024,
      "grad_norm": 0.2813417315483093,
      "learning_rate": 4.843934292284681e-06,
      "loss": 0.0164,
      "step": 1585000
    },
    {
      "epoch": 2.5939199937157555,
      "grad_norm": 0.4219059944152832,
      "learning_rate": 4.8438684000711635e-06,
      "loss": 0.0106,
      "step": 1585020
    },
    {
      "epoch": 2.593952724154409,
      "grad_norm": 0.14740559458732605,
      "learning_rate": 4.843802507857647e-06,
      "loss": 0.0111,
      "step": 1585040
    },
    {
      "epoch": 2.5939854545930623,
      "grad_norm": 0.36782577633857727,
      "learning_rate": 4.84373661564413e-06,
      "loss": 0.0155,
      "step": 1585060
    },
    {
      "epoch": 2.594018185031716,
      "grad_norm": 0.2589619755744934,
      "learning_rate": 4.843670723430613e-06,
      "loss": 0.0176,
      "step": 1585080
    },
    {
      "epoch": 2.594050915470369,
      "grad_norm": 0.24832452833652496,
      "learning_rate": 4.843604831217095e-06,
      "loss": 0.0135,
      "step": 1585100
    },
    {
      "epoch": 2.5940836459090226,
      "grad_norm": 0.37367916107177734,
      "learning_rate": 4.843538939003578e-06,
      "loss": 0.0166,
      "step": 1585120
    },
    {
      "epoch": 2.594116376347676,
      "grad_norm": 0.175246924161911,
      "learning_rate": 4.843473046790062e-06,
      "loss": 0.0205,
      "step": 1585140
    },
    {
      "epoch": 2.594149106786329,
      "grad_norm": 0.13722901046276093,
      "learning_rate": 4.843407154576544e-06,
      "loss": 0.0098,
      "step": 1585160
    },
    {
      "epoch": 2.5941818372249825,
      "grad_norm": 0.5319122672080994,
      "learning_rate": 4.843341262363027e-06,
      "loss": 0.0128,
      "step": 1585180
    },
    {
      "epoch": 2.5942145676636357,
      "grad_norm": 0.46584615111351013,
      "learning_rate": 4.84327537014951e-06,
      "loss": 0.0194,
      "step": 1585200
    },
    {
      "epoch": 2.5942472981022893,
      "grad_norm": 1.0405895709991455,
      "learning_rate": 4.843209477935993e-06,
      "loss": 0.015,
      "step": 1585220
    },
    {
      "epoch": 2.5942800285409424,
      "grad_norm": 0.4569590985774994,
      "learning_rate": 4.843143585722475e-06,
      "loss": 0.0158,
      "step": 1585240
    },
    {
      "epoch": 2.594312758979596,
      "grad_norm": 0.19510120153427124,
      "learning_rate": 4.843077693508958e-06,
      "loss": 0.0136,
      "step": 1585260
    },
    {
      "epoch": 2.594345489418249,
      "grad_norm": 0.19749845564365387,
      "learning_rate": 4.843011801295442e-06,
      "loss": 0.011,
      "step": 1585280
    },
    {
      "epoch": 2.5943782198569023,
      "grad_norm": 0.43519043922424316,
      "learning_rate": 4.842945909081924e-06,
      "loss": 0.0176,
      "step": 1585300
    },
    {
      "epoch": 2.594410950295556,
      "grad_norm": 0.3659325838088989,
      "learning_rate": 4.842880016868407e-06,
      "loss": 0.0156,
      "step": 1585320
    },
    {
      "epoch": 2.594443680734209,
      "grad_norm": 0.3445329964160919,
      "learning_rate": 4.84281412465489e-06,
      "loss": 0.0161,
      "step": 1585340
    },
    {
      "epoch": 2.5944764111728627,
      "grad_norm": 0.18777306377887726,
      "learning_rate": 4.842748232441373e-06,
      "loss": 0.0129,
      "step": 1585360
    },
    {
      "epoch": 2.594509141611516,
      "grad_norm": 0.22368913888931274,
      "learning_rate": 4.842682340227855e-06,
      "loss": 0.0113,
      "step": 1585380
    },
    {
      "epoch": 2.5945418720501694,
      "grad_norm": 0.27334854006767273,
      "learning_rate": 4.842616448014338e-06,
      "loss": 0.0122,
      "step": 1585400
    },
    {
      "epoch": 2.5945746024888225,
      "grad_norm": 0.1843957155942917,
      "learning_rate": 4.842550555800821e-06,
      "loss": 0.0142,
      "step": 1585420
    },
    {
      "epoch": 2.5946073329274757,
      "grad_norm": 0.07076006382703781,
      "learning_rate": 4.8424846635873044e-06,
      "loss": 0.0155,
      "step": 1585440
    },
    {
      "epoch": 2.5946400633661293,
      "grad_norm": 0.11493612825870514,
      "learning_rate": 4.842418771373787e-06,
      "loss": 0.0141,
      "step": 1585460
    },
    {
      "epoch": 2.5946727938047824,
      "grad_norm": 0.35011395812034607,
      "learning_rate": 4.84235287916027e-06,
      "loss": 0.0128,
      "step": 1585480
    },
    {
      "epoch": 2.594705524243436,
      "grad_norm": 0.20334821939468384,
      "learning_rate": 4.8422869869467535e-06,
      "loss": 0.0211,
      "step": 1585500
    },
    {
      "epoch": 2.594738254682089,
      "grad_norm": 0.7864314317703247,
      "learning_rate": 4.842221094733236e-06,
      "loss": 0.0111,
      "step": 1585520
    },
    {
      "epoch": 2.594770985120743,
      "grad_norm": 0.25924915075302124,
      "learning_rate": 4.842155202519719e-06,
      "loss": 0.0144,
      "step": 1585540
    },
    {
      "epoch": 2.594803715559396,
      "grad_norm": 0.23536476492881775,
      "learning_rate": 4.842089310306202e-06,
      "loss": 0.0108,
      "step": 1585560
    },
    {
      "epoch": 2.594836445998049,
      "grad_norm": 0.4787188172340393,
      "learning_rate": 4.8420234180926845e-06,
      "loss": 0.0195,
      "step": 1585580
    },
    {
      "epoch": 2.5948691764367027,
      "grad_norm": 0.40241819620132446,
      "learning_rate": 4.841957525879167e-06,
      "loss": 0.013,
      "step": 1585600
    },
    {
      "epoch": 2.594901906875356,
      "grad_norm": 0.3228282928466797,
      "learning_rate": 4.84189163366565e-06,
      "loss": 0.0092,
      "step": 1585620
    },
    {
      "epoch": 2.5949346373140094,
      "grad_norm": 0.3562277555465698,
      "learning_rate": 4.841825741452133e-06,
      "loss": 0.0151,
      "step": 1585640
    },
    {
      "epoch": 2.5949673677526626,
      "grad_norm": 0.2885459363460541,
      "learning_rate": 4.841759849238616e-06,
      "loss": 0.0125,
      "step": 1585660
    },
    {
      "epoch": 2.595000098191316,
      "grad_norm": 0.33504045009613037,
      "learning_rate": 4.841693957025099e-06,
      "loss": 0.0118,
      "step": 1585680
    },
    {
      "epoch": 2.5950328286299693,
      "grad_norm": 0.28415390849113464,
      "learning_rate": 4.841628064811582e-06,
      "loss": 0.0131,
      "step": 1585700
    },
    {
      "epoch": 2.5950655590686225,
      "grad_norm": 0.09727571904659271,
      "learning_rate": 4.8415621725980645e-06,
      "loss": 0.0139,
      "step": 1585720
    },
    {
      "epoch": 2.595098289507276,
      "grad_norm": 0.491317480802536,
      "learning_rate": 4.841496280384547e-06,
      "loss": 0.0098,
      "step": 1585740
    },
    {
      "epoch": 2.595131019945929,
      "grad_norm": 0.6681296229362488,
      "learning_rate": 4.84143038817103e-06,
      "loss": 0.0197,
      "step": 1585760
    },
    {
      "epoch": 2.595163750384583,
      "grad_norm": 0.7311333417892456,
      "learning_rate": 4.841364495957513e-06,
      "loss": 0.0151,
      "step": 1585780
    },
    {
      "epoch": 2.595196480823236,
      "grad_norm": 1.2094162702560425,
      "learning_rate": 4.8412986037439955e-06,
      "loss": 0.0153,
      "step": 1585800
    },
    {
      "epoch": 2.5952292112618895,
      "grad_norm": 0.625781774520874,
      "learning_rate": 4.841232711530478e-06,
      "loss": 0.0127,
      "step": 1585820
    },
    {
      "epoch": 2.5952619417005427,
      "grad_norm": 0.8553531765937805,
      "learning_rate": 4.841166819316962e-06,
      "loss": 0.0184,
      "step": 1585840
    },
    {
      "epoch": 2.595294672139196,
      "grad_norm": 1.3810962438583374,
      "learning_rate": 4.8411009271034445e-06,
      "loss": 0.0094,
      "step": 1585860
    },
    {
      "epoch": 2.5953274025778494,
      "grad_norm": 0.3049561381340027,
      "learning_rate": 4.841035034889927e-06,
      "loss": 0.0164,
      "step": 1585880
    },
    {
      "epoch": 2.5953601330165026,
      "grad_norm": 0.189166858792305,
      "learning_rate": 4.840969142676411e-06,
      "loss": 0.0223,
      "step": 1585900
    },
    {
      "epoch": 2.595392863455156,
      "grad_norm": 0.2769424617290497,
      "learning_rate": 4.840903250462894e-06,
      "loss": 0.0212,
      "step": 1585920
    },
    {
      "epoch": 2.5954255938938093,
      "grad_norm": 0.03043527528643608,
      "learning_rate": 4.840837358249376e-06,
      "loss": 0.0117,
      "step": 1585940
    },
    {
      "epoch": 2.595458324332463,
      "grad_norm": 0.6233152747154236,
      "learning_rate": 4.840771466035859e-06,
      "loss": 0.0223,
      "step": 1585960
    },
    {
      "epoch": 2.595491054771116,
      "grad_norm": 0.25188109278678894,
      "learning_rate": 4.840705573822342e-06,
      "loss": 0.0161,
      "step": 1585980
    },
    {
      "epoch": 2.5955237852097692,
      "grad_norm": 0.186621755361557,
      "learning_rate": 4.8406396816088245e-06,
      "loss": 0.0101,
      "step": 1586000
    },
    {
      "epoch": 2.595556515648423,
      "grad_norm": 0.3814854323863983,
      "learning_rate": 4.840573789395307e-06,
      "loss": 0.0148,
      "step": 1586020
    },
    {
      "epoch": 2.595589246087076,
      "grad_norm": 0.4563714861869812,
      "learning_rate": 4.84050789718179e-06,
      "loss": 0.0123,
      "step": 1586040
    },
    {
      "epoch": 2.595621976525729,
      "grad_norm": 0.5793346166610718,
      "learning_rate": 4.840442004968274e-06,
      "loss": 0.0132,
      "step": 1586060
    },
    {
      "epoch": 2.5956547069643827,
      "grad_norm": 0.45107516646385193,
      "learning_rate": 4.840376112754756e-06,
      "loss": 0.0138,
      "step": 1586080
    },
    {
      "epoch": 2.5956874374030363,
      "grad_norm": 0.2824070453643799,
      "learning_rate": 4.840310220541239e-06,
      "loss": 0.0122,
      "step": 1586100
    },
    {
      "epoch": 2.5957201678416895,
      "grad_norm": 0.46878814697265625,
      "learning_rate": 4.840244328327722e-06,
      "loss": 0.015,
      "step": 1586120
    },
    {
      "epoch": 2.5957528982803426,
      "grad_norm": 0.1238861009478569,
      "learning_rate": 4.8401784361142046e-06,
      "loss": 0.0192,
      "step": 1586140
    },
    {
      "epoch": 2.595785628718996,
      "grad_norm": 0.6135530471801758,
      "learning_rate": 4.840112543900687e-06,
      "loss": 0.0139,
      "step": 1586160
    },
    {
      "epoch": 2.5958183591576494,
      "grad_norm": 0.3373364210128784,
      "learning_rate": 4.84004665168717e-06,
      "loss": 0.0133,
      "step": 1586180
    },
    {
      "epoch": 2.5958510895963025,
      "grad_norm": 0.7431996464729309,
      "learning_rate": 4.839980759473654e-06,
      "loss": 0.0114,
      "step": 1586200
    },
    {
      "epoch": 2.595883820034956,
      "grad_norm": 0.20522378385066986,
      "learning_rate": 4.839914867260136e-06,
      "loss": 0.0128,
      "step": 1586220
    },
    {
      "epoch": 2.5959165504736097,
      "grad_norm": 0.1513158679008484,
      "learning_rate": 4.839848975046619e-06,
      "loss": 0.0158,
      "step": 1586240
    },
    {
      "epoch": 2.595949280912263,
      "grad_norm": 0.666316032409668,
      "learning_rate": 4.839783082833102e-06,
      "loss": 0.0103,
      "step": 1586260
    },
    {
      "epoch": 2.595982011350916,
      "grad_norm": 0.11107975989580154,
      "learning_rate": 4.839717190619585e-06,
      "loss": 0.0127,
      "step": 1586280
    },
    {
      "epoch": 2.5960147417895696,
      "grad_norm": 0.22642087936401367,
      "learning_rate": 4.839651298406068e-06,
      "loss": 0.0117,
      "step": 1586300
    },
    {
      "epoch": 2.5960474722282227,
      "grad_norm": 0.17232100665569305,
      "learning_rate": 4.839585406192551e-06,
      "loss": 0.0184,
      "step": 1586320
    },
    {
      "epoch": 2.596080202666876,
      "grad_norm": 0.19078867137432098,
      "learning_rate": 4.839519513979034e-06,
      "loss": 0.0131,
      "step": 1586340
    },
    {
      "epoch": 2.5961129331055295,
      "grad_norm": 0.0835387259721756,
      "learning_rate": 4.839453621765516e-06,
      "loss": 0.0208,
      "step": 1586360
    },
    {
      "epoch": 2.596145663544183,
      "grad_norm": 0.22571434080600739,
      "learning_rate": 4.839387729551999e-06,
      "loss": 0.0248,
      "step": 1586380
    },
    {
      "epoch": 2.5961783939828362,
      "grad_norm": 0.5763135552406311,
      "learning_rate": 4.839321837338482e-06,
      "loss": 0.0179,
      "step": 1586400
    },
    {
      "epoch": 2.5962111244214894,
      "grad_norm": 0.46110984683036804,
      "learning_rate": 4.839255945124965e-06,
      "loss": 0.0097,
      "step": 1586420
    },
    {
      "epoch": 2.596243854860143,
      "grad_norm": 0.28174299001693726,
      "learning_rate": 4.839190052911447e-06,
      "loss": 0.0146,
      "step": 1586440
    },
    {
      "epoch": 2.596276585298796,
      "grad_norm": 0.7299913167953491,
      "learning_rate": 4.839124160697931e-06,
      "loss": 0.0244,
      "step": 1586460
    },
    {
      "epoch": 2.5963093157374493,
      "grad_norm": 0.25326937437057495,
      "learning_rate": 4.839058268484414e-06,
      "loss": 0.0079,
      "step": 1586480
    },
    {
      "epoch": 2.596342046176103,
      "grad_norm": 0.6497163772583008,
      "learning_rate": 4.8389923762708964e-06,
      "loss": 0.0145,
      "step": 1586500
    },
    {
      "epoch": 2.5963747766147565,
      "grad_norm": 0.16357648372650146,
      "learning_rate": 4.838926484057379e-06,
      "loss": 0.0092,
      "step": 1586520
    },
    {
      "epoch": 2.5964075070534096,
      "grad_norm": 0.3851313889026642,
      "learning_rate": 4.838860591843863e-06,
      "loss": 0.0129,
      "step": 1586540
    },
    {
      "epoch": 2.5964402374920628,
      "grad_norm": 0.8950898051261902,
      "learning_rate": 4.8387946996303455e-06,
      "loss": 0.0207,
      "step": 1586560
    },
    {
      "epoch": 2.5964729679307164,
      "grad_norm": 0.5251776576042175,
      "learning_rate": 4.838728807416828e-06,
      "loss": 0.0143,
      "step": 1586580
    },
    {
      "epoch": 2.5965056983693695,
      "grad_norm": 0.40013763308525085,
      "learning_rate": 4.838662915203311e-06,
      "loss": 0.0143,
      "step": 1586600
    },
    {
      "epoch": 2.5965384288080227,
      "grad_norm": 0.13908527791500092,
      "learning_rate": 4.838597022989794e-06,
      "loss": 0.0138,
      "step": 1586620
    },
    {
      "epoch": 2.5965711592466763,
      "grad_norm": 0.139101043343544,
      "learning_rate": 4.8385311307762765e-06,
      "loss": 0.0154,
      "step": 1586640
    },
    {
      "epoch": 2.5966038896853294,
      "grad_norm": 0.20467542111873627,
      "learning_rate": 4.838465238562759e-06,
      "loss": 0.0184,
      "step": 1586660
    },
    {
      "epoch": 2.596636620123983,
      "grad_norm": 0.528786838054657,
      "learning_rate": 4.838399346349242e-06,
      "loss": 0.0145,
      "step": 1586680
    },
    {
      "epoch": 2.596669350562636,
      "grad_norm": 1.4509072303771973,
      "learning_rate": 4.8383334541357255e-06,
      "loss": 0.0129,
      "step": 1586700
    },
    {
      "epoch": 2.5967020810012897,
      "grad_norm": 0.19583655893802643,
      "learning_rate": 4.838267561922208e-06,
      "loss": 0.015,
      "step": 1586720
    },
    {
      "epoch": 2.596734811439943,
      "grad_norm": 0.15580357611179352,
      "learning_rate": 4.838201669708691e-06,
      "loss": 0.0157,
      "step": 1586740
    },
    {
      "epoch": 2.596767541878596,
      "grad_norm": 1.1371002197265625,
      "learning_rate": 4.838135777495174e-06,
      "loss": 0.0118,
      "step": 1586760
    },
    {
      "epoch": 2.5968002723172496,
      "grad_norm": 0.370352566242218,
      "learning_rate": 4.8380698852816565e-06,
      "loss": 0.0157,
      "step": 1586780
    },
    {
      "epoch": 2.596833002755903,
      "grad_norm": 0.5080158114433289,
      "learning_rate": 4.838003993068139e-06,
      "loss": 0.0203,
      "step": 1586800
    },
    {
      "epoch": 2.5968657331945564,
      "grad_norm": 0.20374608039855957,
      "learning_rate": 4.837938100854622e-06,
      "loss": 0.0159,
      "step": 1586820
    },
    {
      "epoch": 2.5968984636332095,
      "grad_norm": 0.19346672296524048,
      "learning_rate": 4.837872208641105e-06,
      "loss": 0.0138,
      "step": 1586840
    },
    {
      "epoch": 2.596931194071863,
      "grad_norm": 0.3056182861328125,
      "learning_rate": 4.837806316427588e-06,
      "loss": 0.0101,
      "step": 1586860
    },
    {
      "epoch": 2.5969639245105163,
      "grad_norm": 0.144993394613266,
      "learning_rate": 4.837740424214071e-06,
      "loss": 0.0104,
      "step": 1586880
    },
    {
      "epoch": 2.5969966549491694,
      "grad_norm": 0.21240107715129852,
      "learning_rate": 4.837674532000554e-06,
      "loss": 0.0123,
      "step": 1586900
    },
    {
      "epoch": 2.597029385387823,
      "grad_norm": 0.46966081857681274,
      "learning_rate": 4.837608639787037e-06,
      "loss": 0.0172,
      "step": 1586920
    },
    {
      "epoch": 2.597062115826476,
      "grad_norm": 0.15844717621803284,
      "learning_rate": 4.83754274757352e-06,
      "loss": 0.0165,
      "step": 1586940
    },
    {
      "epoch": 2.5970948462651298,
      "grad_norm": 0.5220367908477783,
      "learning_rate": 4.837476855360003e-06,
      "loss": 0.0181,
      "step": 1586960
    },
    {
      "epoch": 2.597127576703783,
      "grad_norm": 0.652262270450592,
      "learning_rate": 4.8374109631464856e-06,
      "loss": 0.0182,
      "step": 1586980
    },
    {
      "epoch": 2.5971603071424365,
      "grad_norm": 0.36001166701316833,
      "learning_rate": 4.837345070932968e-06,
      "loss": 0.0136,
      "step": 1587000
    },
    {
      "epoch": 2.5971930375810897,
      "grad_norm": 0.5728998184204102,
      "learning_rate": 4.837279178719451e-06,
      "loss": 0.0145,
      "step": 1587020
    },
    {
      "epoch": 2.597225768019743,
      "grad_norm": 0.31970930099487305,
      "learning_rate": 4.837213286505934e-06,
      "loss": 0.0136,
      "step": 1587040
    },
    {
      "epoch": 2.5972584984583964,
      "grad_norm": 0.34498363733291626,
      "learning_rate": 4.8371473942924165e-06,
      "loss": 0.0144,
      "step": 1587060
    },
    {
      "epoch": 2.5972912288970496,
      "grad_norm": 0.3697775602340698,
      "learning_rate": 4.8370815020789e-06,
      "loss": 0.0112,
      "step": 1587080
    },
    {
      "epoch": 2.597323959335703,
      "grad_norm": 0.4623577296733856,
      "learning_rate": 4.837015609865383e-06,
      "loss": 0.012,
      "step": 1587100
    },
    {
      "epoch": 2.5973566897743563,
      "grad_norm": 0.2401370257139206,
      "learning_rate": 4.836949717651866e-06,
      "loss": 0.0237,
      "step": 1587120
    },
    {
      "epoch": 2.59738942021301,
      "grad_norm": 0.17330962419509888,
      "learning_rate": 4.836883825438348e-06,
      "loss": 0.0136,
      "step": 1587140
    },
    {
      "epoch": 2.597422150651663,
      "grad_norm": 0.8516677618026733,
      "learning_rate": 4.836817933224831e-06,
      "loss": 0.0141,
      "step": 1587160
    },
    {
      "epoch": 2.597454881090316,
      "grad_norm": 0.210651695728302,
      "learning_rate": 4.836752041011314e-06,
      "loss": 0.0115,
      "step": 1587180
    },
    {
      "epoch": 2.59748761152897,
      "grad_norm": 0.6266327500343323,
      "learning_rate": 4.8366861487977966e-06,
      "loss": 0.013,
      "step": 1587200
    },
    {
      "epoch": 2.597520341967623,
      "grad_norm": 0.8157235383987427,
      "learning_rate": 4.836620256584279e-06,
      "loss": 0.0152,
      "step": 1587220
    },
    {
      "epoch": 2.5975530724062765,
      "grad_norm": 1.8688102960586548,
      "learning_rate": 4.836554364370763e-06,
      "loss": 0.019,
      "step": 1587240
    },
    {
      "epoch": 2.5975858028449297,
      "grad_norm": 0.3102743625640869,
      "learning_rate": 4.836488472157246e-06,
      "loss": 0.0088,
      "step": 1587260
    },
    {
      "epoch": 2.5976185332835833,
      "grad_norm": 0.18891486525535583,
      "learning_rate": 4.836422579943728e-06,
      "loss": 0.0135,
      "step": 1587280
    },
    {
      "epoch": 2.5976512637222364,
      "grad_norm": 0.7656391859054565,
      "learning_rate": 4.836356687730211e-06,
      "loss": 0.0207,
      "step": 1587300
    },
    {
      "epoch": 2.5976839941608896,
      "grad_norm": 0.3522055447101593,
      "learning_rate": 4.836290795516695e-06,
      "loss": 0.0136,
      "step": 1587320
    },
    {
      "epoch": 2.597716724599543,
      "grad_norm": 0.7622514963150024,
      "learning_rate": 4.8362249033031774e-06,
      "loss": 0.0131,
      "step": 1587340
    },
    {
      "epoch": 2.5977494550381963,
      "grad_norm": 0.17334404587745667,
      "learning_rate": 4.83615901108966e-06,
      "loss": 0.0168,
      "step": 1587360
    },
    {
      "epoch": 2.59778218547685,
      "grad_norm": 0.32609355449676514,
      "learning_rate": 4.836093118876143e-06,
      "loss": 0.0118,
      "step": 1587380
    },
    {
      "epoch": 2.597814915915503,
      "grad_norm": 0.27816131711006165,
      "learning_rate": 4.836027226662626e-06,
      "loss": 0.0186,
      "step": 1587400
    },
    {
      "epoch": 2.5978476463541567,
      "grad_norm": 0.40410590171813965,
      "learning_rate": 4.835961334449108e-06,
      "loss": 0.0088,
      "step": 1587420
    },
    {
      "epoch": 2.59788037679281,
      "grad_norm": 1.5173225402832031,
      "learning_rate": 4.835895442235591e-06,
      "loss": 0.0116,
      "step": 1587440
    },
    {
      "epoch": 2.597913107231463,
      "grad_norm": 0.6879150867462158,
      "learning_rate": 4.835829550022074e-06,
      "loss": 0.0118,
      "step": 1587460
    },
    {
      "epoch": 2.5979458376701166,
      "grad_norm": 0.11588853597640991,
      "learning_rate": 4.8357636578085575e-06,
      "loss": 0.0079,
      "step": 1587480
    },
    {
      "epoch": 2.5979785681087697,
      "grad_norm": 0.6740793585777283,
      "learning_rate": 4.83569776559504e-06,
      "loss": 0.0141,
      "step": 1587500
    },
    {
      "epoch": 2.5980112985474233,
      "grad_norm": 0.5500514507293701,
      "learning_rate": 4.835631873381523e-06,
      "loss": 0.0094,
      "step": 1587520
    },
    {
      "epoch": 2.5980440289860764,
      "grad_norm": 0.17518281936645508,
      "learning_rate": 4.835565981168006e-06,
      "loss": 0.0206,
      "step": 1587540
    },
    {
      "epoch": 2.59807675942473,
      "grad_norm": 0.138965904712677,
      "learning_rate": 4.835500088954488e-06,
      "loss": 0.0108,
      "step": 1587560
    },
    {
      "epoch": 2.598109489863383,
      "grad_norm": 0.4335944652557373,
      "learning_rate": 4.835434196740971e-06,
      "loss": 0.0167,
      "step": 1587580
    },
    {
      "epoch": 2.5981422203020363,
      "grad_norm": 0.12291275709867477,
      "learning_rate": 4.835368304527455e-06,
      "loss": 0.0128,
      "step": 1587600
    },
    {
      "epoch": 2.59817495074069,
      "grad_norm": 0.12448525428771973,
      "learning_rate": 4.8353024123139375e-06,
      "loss": 0.0113,
      "step": 1587620
    },
    {
      "epoch": 2.598207681179343,
      "grad_norm": 0.2794574201107025,
      "learning_rate": 4.83523652010042e-06,
      "loss": 0.0148,
      "step": 1587640
    },
    {
      "epoch": 2.5982404116179962,
      "grad_norm": 0.4418106973171234,
      "learning_rate": 4.835170627886903e-06,
      "loss": 0.011,
      "step": 1587660
    },
    {
      "epoch": 2.59827314205665,
      "grad_norm": 0.22847241163253784,
      "learning_rate": 4.835104735673386e-06,
      "loss": 0.0113,
      "step": 1587680
    },
    {
      "epoch": 2.5983058724953034,
      "grad_norm": 0.36195921897888184,
      "learning_rate": 4.8350388434598684e-06,
      "loss": 0.0128,
      "step": 1587700
    },
    {
      "epoch": 2.5983386029339566,
      "grad_norm": 0.047881655395030975,
      "learning_rate": 4.834972951246352e-06,
      "loss": 0.0093,
      "step": 1587720
    },
    {
      "epoch": 2.5983713333726097,
      "grad_norm": 0.9703535437583923,
      "learning_rate": 4.834907059032835e-06,
      "loss": 0.0134,
      "step": 1587740
    },
    {
      "epoch": 2.5984040638112633,
      "grad_norm": 0.2801404893398285,
      "learning_rate": 4.8348411668193175e-06,
      "loss": 0.014,
      "step": 1587760
    },
    {
      "epoch": 2.5984367942499165,
      "grad_norm": 0.22644244134426117,
      "learning_rate": 4.8347752746058e-06,
      "loss": 0.0144,
      "step": 1587780
    },
    {
      "epoch": 2.5984695246885696,
      "grad_norm": 0.7045603394508362,
      "learning_rate": 4.834709382392283e-06,
      "loss": 0.0175,
      "step": 1587800
    },
    {
      "epoch": 2.598502255127223,
      "grad_norm": 1.1467362642288208,
      "learning_rate": 4.834643490178766e-06,
      "loss": 0.0119,
      "step": 1587820
    },
    {
      "epoch": 2.598534985565877,
      "grad_norm": 0.5008385181427002,
      "learning_rate": 4.8345775979652485e-06,
      "loss": 0.0092,
      "step": 1587840
    },
    {
      "epoch": 2.59856771600453,
      "grad_norm": 0.4094001352787018,
      "learning_rate": 4.834511705751731e-06,
      "loss": 0.0119,
      "step": 1587860
    },
    {
      "epoch": 2.598600446443183,
      "grad_norm": 0.5748826861381531,
      "learning_rate": 4.834445813538215e-06,
      "loss": 0.0156,
      "step": 1587880
    },
    {
      "epoch": 2.5986331768818367,
      "grad_norm": 0.40196722745895386,
      "learning_rate": 4.8343799213246975e-06,
      "loss": 0.0134,
      "step": 1587900
    },
    {
      "epoch": 2.59866590732049,
      "grad_norm": 0.10203515738248825,
      "learning_rate": 4.83431402911118e-06,
      "loss": 0.0125,
      "step": 1587920
    },
    {
      "epoch": 2.598698637759143,
      "grad_norm": 0.9378442764282227,
      "learning_rate": 4.834248136897663e-06,
      "loss": 0.0143,
      "step": 1587940
    },
    {
      "epoch": 2.5987313681977966,
      "grad_norm": 0.323691725730896,
      "learning_rate": 4.834182244684147e-06,
      "loss": 0.0143,
      "step": 1587960
    },
    {
      "epoch": 2.59876409863645,
      "grad_norm": 0.4165661633014679,
      "learning_rate": 4.834116352470629e-06,
      "loss": 0.0155,
      "step": 1587980
    },
    {
      "epoch": 2.5987968290751033,
      "grad_norm": 0.35824963450431824,
      "learning_rate": 4.834050460257112e-06,
      "loss": 0.0093,
      "step": 1588000
    },
    {
      "epoch": 2.5988295595137565,
      "grad_norm": 0.7483570575714111,
      "learning_rate": 4.833984568043595e-06,
      "loss": 0.0187,
      "step": 1588020
    },
    {
      "epoch": 2.59886228995241,
      "grad_norm": 0.5218238830566406,
      "learning_rate": 4.8339186758300776e-06,
      "loss": 0.0169,
      "step": 1588040
    },
    {
      "epoch": 2.5988950203910632,
      "grad_norm": 0.2821083068847656,
      "learning_rate": 4.83385278361656e-06,
      "loss": 0.0076,
      "step": 1588060
    },
    {
      "epoch": 2.5989277508297164,
      "grad_norm": 0.2634655237197876,
      "learning_rate": 4.833786891403043e-06,
      "loss": 0.008,
      "step": 1588080
    },
    {
      "epoch": 2.59896048126837,
      "grad_norm": 0.6477000117301941,
      "learning_rate": 4.833720999189527e-06,
      "loss": 0.013,
      "step": 1588100
    },
    {
      "epoch": 2.598993211707023,
      "grad_norm": 0.6285508871078491,
      "learning_rate": 4.833655106976009e-06,
      "loss": 0.0202,
      "step": 1588120
    },
    {
      "epoch": 2.5990259421456767,
      "grad_norm": 0.3643096685409546,
      "learning_rate": 4.833589214762492e-06,
      "loss": 0.0224,
      "step": 1588140
    },
    {
      "epoch": 2.59905867258433,
      "grad_norm": 0.2735935151576996,
      "learning_rate": 4.833523322548975e-06,
      "loss": 0.0093,
      "step": 1588160
    },
    {
      "epoch": 2.5990914030229835,
      "grad_norm": 0.2343401312828064,
      "learning_rate": 4.833457430335458e-06,
      "loss": 0.0138,
      "step": 1588180
    },
    {
      "epoch": 2.5991241334616366,
      "grad_norm": 0.2299579679965973,
      "learning_rate": 4.83339153812194e-06,
      "loss": 0.0141,
      "step": 1588200
    },
    {
      "epoch": 2.5991568639002898,
      "grad_norm": 0.6215233206748962,
      "learning_rate": 4.833325645908423e-06,
      "loss": 0.0182,
      "step": 1588220
    },
    {
      "epoch": 2.5991895943389434,
      "grad_norm": 0.5266966819763184,
      "learning_rate": 4.833259753694906e-06,
      "loss": 0.015,
      "step": 1588240
    },
    {
      "epoch": 2.5992223247775965,
      "grad_norm": 0.1577746719121933,
      "learning_rate": 4.8331938614813885e-06,
      "loss": 0.0158,
      "step": 1588260
    },
    {
      "epoch": 2.59925505521625,
      "grad_norm": 0.4474388659000397,
      "learning_rate": 4.833127969267872e-06,
      "loss": 0.0123,
      "step": 1588280
    },
    {
      "epoch": 2.5992877856549033,
      "grad_norm": 0.12824715673923492,
      "learning_rate": 4.833062077054355e-06,
      "loss": 0.0111,
      "step": 1588300
    },
    {
      "epoch": 2.599320516093557,
      "grad_norm": 0.2869890630245209,
      "learning_rate": 4.832996184840838e-06,
      "loss": 0.0125,
      "step": 1588320
    },
    {
      "epoch": 2.59935324653221,
      "grad_norm": 0.32272934913635254,
      "learning_rate": 4.832930292627321e-06,
      "loss": 0.0219,
      "step": 1588340
    },
    {
      "epoch": 2.599385976970863,
      "grad_norm": 0.26590752601623535,
      "learning_rate": 4.832864400413804e-06,
      "loss": 0.0184,
      "step": 1588360
    },
    {
      "epoch": 2.5994187074095167,
      "grad_norm": 0.29690021276474,
      "learning_rate": 4.832798508200287e-06,
      "loss": 0.0153,
      "step": 1588380
    },
    {
      "epoch": 2.59945143784817,
      "grad_norm": 0.12591968476772308,
      "learning_rate": 4.832732615986769e-06,
      "loss": 0.0109,
      "step": 1588400
    },
    {
      "epoch": 2.5994841682868235,
      "grad_norm": 0.4292058050632477,
      "learning_rate": 4.832666723773252e-06,
      "loss": 0.0105,
      "step": 1588420
    },
    {
      "epoch": 2.5995168987254766,
      "grad_norm": 1.2951312065124512,
      "learning_rate": 4.832600831559735e-06,
      "loss": 0.0147,
      "step": 1588440
    },
    {
      "epoch": 2.5995496291641302,
      "grad_norm": 0.22148144245147705,
      "learning_rate": 4.832534939346218e-06,
      "loss": 0.0146,
      "step": 1588460
    },
    {
      "epoch": 2.5995823596027834,
      "grad_norm": 0.3559989631175995,
      "learning_rate": 4.8324690471327e-06,
      "loss": 0.0153,
      "step": 1588480
    },
    {
      "epoch": 2.5996150900414365,
      "grad_norm": 0.3368445634841919,
      "learning_rate": 4.832403154919184e-06,
      "loss": 0.0144,
      "step": 1588500
    },
    {
      "epoch": 2.59964782048009,
      "grad_norm": 0.11999895423650742,
      "learning_rate": 4.832337262705667e-06,
      "loss": 0.0101,
      "step": 1588520
    },
    {
      "epoch": 2.5996805509187433,
      "grad_norm": 0.4468197226524353,
      "learning_rate": 4.8322713704921494e-06,
      "loss": 0.0124,
      "step": 1588540
    },
    {
      "epoch": 2.599713281357397,
      "grad_norm": 0.34285494685173035,
      "learning_rate": 4.832205478278632e-06,
      "loss": 0.0114,
      "step": 1588560
    },
    {
      "epoch": 2.59974601179605,
      "grad_norm": 0.7026714086532593,
      "learning_rate": 4.832139586065115e-06,
      "loss": 0.0171,
      "step": 1588580
    },
    {
      "epoch": 2.5997787422347036,
      "grad_norm": 0.3835081458091736,
      "learning_rate": 4.832073693851598e-06,
      "loss": 0.0157,
      "step": 1588600
    },
    {
      "epoch": 2.5998114726733568,
      "grad_norm": 0.42329731583595276,
      "learning_rate": 4.83200780163808e-06,
      "loss": 0.0179,
      "step": 1588620
    },
    {
      "epoch": 2.59984420311201,
      "grad_norm": 0.3735390603542328,
      "learning_rate": 4.831941909424563e-06,
      "loss": 0.0128,
      "step": 1588640
    },
    {
      "epoch": 2.5998769335506635,
      "grad_norm": 0.2233194261789322,
      "learning_rate": 4.831876017211047e-06,
      "loss": 0.0132,
      "step": 1588660
    },
    {
      "epoch": 2.5999096639893167,
      "grad_norm": 0.6749343276023865,
      "learning_rate": 4.8318101249975295e-06,
      "loss": 0.0159,
      "step": 1588680
    },
    {
      "epoch": 2.5999423944279703,
      "grad_norm": 0.15866389870643616,
      "learning_rate": 4.831744232784012e-06,
      "loss": 0.0147,
      "step": 1588700
    },
    {
      "epoch": 2.5999751248666234,
      "grad_norm": 0.11993657797574997,
      "learning_rate": 4.831678340570495e-06,
      "loss": 0.0146,
      "step": 1588720
    },
    {
      "epoch": 2.600007855305277,
      "grad_norm": 0.10503292828798294,
      "learning_rate": 4.8316124483569785e-06,
      "loss": 0.0181,
      "step": 1588740
    },
    {
      "epoch": 2.60004058574393,
      "grad_norm": 0.5290665030479431,
      "learning_rate": 4.831546556143461e-06,
      "loss": 0.018,
      "step": 1588760
    },
    {
      "epoch": 2.6000733161825833,
      "grad_norm": 0.12623360753059387,
      "learning_rate": 4.831480663929944e-06,
      "loss": 0.0115,
      "step": 1588780
    },
    {
      "epoch": 2.600106046621237,
      "grad_norm": 0.36071428656578064,
      "learning_rate": 4.831414771716427e-06,
      "loss": 0.0163,
      "step": 1588800
    },
    {
      "epoch": 2.60013877705989,
      "grad_norm": 0.7100075483322144,
      "learning_rate": 4.8313488795029095e-06,
      "loss": 0.0152,
      "step": 1588820
    },
    {
      "epoch": 2.6001715074985436,
      "grad_norm": 0.4641660451889038,
      "learning_rate": 4.831282987289392e-06,
      "loss": 0.0132,
      "step": 1588840
    },
    {
      "epoch": 2.600204237937197,
      "grad_norm": 0.12673307955265045,
      "learning_rate": 4.831217095075875e-06,
      "loss": 0.0117,
      "step": 1588860
    },
    {
      "epoch": 2.6002369683758504,
      "grad_norm": 0.22554141283035278,
      "learning_rate": 4.831151202862358e-06,
      "loss": 0.0133,
      "step": 1588880
    },
    {
      "epoch": 2.6002696988145035,
      "grad_norm": 1.673008918762207,
      "learning_rate": 4.831085310648841e-06,
      "loss": 0.0143,
      "step": 1588900
    },
    {
      "epoch": 2.6003024292531567,
      "grad_norm": 0.19776518642902374,
      "learning_rate": 4.831019418435324e-06,
      "loss": 0.0171,
      "step": 1588920
    },
    {
      "epoch": 2.6003351596918103,
      "grad_norm": 0.39856186509132385,
      "learning_rate": 4.830953526221807e-06,
      "loss": 0.0155,
      "step": 1588940
    },
    {
      "epoch": 2.6003678901304634,
      "grad_norm": 0.5781674981117249,
      "learning_rate": 4.8308876340082895e-06,
      "loss": 0.017,
      "step": 1588960
    },
    {
      "epoch": 2.600400620569117,
      "grad_norm": 0.21614083647727966,
      "learning_rate": 4.830821741794772e-06,
      "loss": 0.015,
      "step": 1588980
    },
    {
      "epoch": 2.60043335100777,
      "grad_norm": 0.5970013737678528,
      "learning_rate": 4.830755849581256e-06,
      "loss": 0.0138,
      "step": 1589000
    },
    {
      "epoch": 2.6004660814464238,
      "grad_norm": 0.16731156408786774,
      "learning_rate": 4.8306899573677386e-06,
      "loss": 0.0158,
      "step": 1589020
    },
    {
      "epoch": 2.600498811885077,
      "grad_norm": 0.22000400722026825,
      "learning_rate": 4.830624065154221e-06,
      "loss": 0.0184,
      "step": 1589040
    },
    {
      "epoch": 2.60053154232373,
      "grad_norm": 0.13969846069812775,
      "learning_rate": 4.830558172940704e-06,
      "loss": 0.0137,
      "step": 1589060
    },
    {
      "epoch": 2.6005642727623837,
      "grad_norm": 0.4168643355369568,
      "learning_rate": 4.830492280727187e-06,
      "loss": 0.0166,
      "step": 1589080
    },
    {
      "epoch": 2.600597003201037,
      "grad_norm": 0.4649449586868286,
      "learning_rate": 4.8304263885136695e-06,
      "loss": 0.0151,
      "step": 1589100
    },
    {
      "epoch": 2.60062973363969,
      "grad_norm": 0.19914047420024872,
      "learning_rate": 4.830360496300152e-06,
      "loss": 0.0098,
      "step": 1589120
    },
    {
      "epoch": 2.6006624640783436,
      "grad_norm": 0.3156483471393585,
      "learning_rate": 4.830294604086636e-06,
      "loss": 0.0138,
      "step": 1589140
    },
    {
      "epoch": 2.600695194516997,
      "grad_norm": 0.2135629951953888,
      "learning_rate": 4.830228711873119e-06,
      "loss": 0.0132,
      "step": 1589160
    },
    {
      "epoch": 2.6007279249556503,
      "grad_norm": 0.2931559979915619,
      "learning_rate": 4.830162819659601e-06,
      "loss": 0.0197,
      "step": 1589180
    },
    {
      "epoch": 2.6007606553943035,
      "grad_norm": 0.15303398668766022,
      "learning_rate": 4.830096927446084e-06,
      "loss": 0.0127,
      "step": 1589200
    },
    {
      "epoch": 2.600793385832957,
      "grad_norm": 1.0775896310806274,
      "learning_rate": 4.830031035232567e-06,
      "loss": 0.0161,
      "step": 1589220
    },
    {
      "epoch": 2.60082611627161,
      "grad_norm": 0.23041607439517975,
      "learning_rate": 4.8299651430190496e-06,
      "loss": 0.0139,
      "step": 1589240
    },
    {
      "epoch": 2.6008588467102634,
      "grad_norm": 0.351338654756546,
      "learning_rate": 4.829899250805532e-06,
      "loss": 0.0115,
      "step": 1589260
    },
    {
      "epoch": 2.600891577148917,
      "grad_norm": 0.5979698896408081,
      "learning_rate": 4.829833358592015e-06,
      "loss": 0.0144,
      "step": 1589280
    },
    {
      "epoch": 2.6009243075875705,
      "grad_norm": 0.1213839128613472,
      "learning_rate": 4.829767466378499e-06,
      "loss": 0.0196,
      "step": 1589300
    },
    {
      "epoch": 2.6009570380262237,
      "grad_norm": 0.10563424974679947,
      "learning_rate": 4.829701574164981e-06,
      "loss": 0.0135,
      "step": 1589320
    },
    {
      "epoch": 2.600989768464877,
      "grad_norm": 0.47295960783958435,
      "learning_rate": 4.829635681951464e-06,
      "loss": 0.0136,
      "step": 1589340
    },
    {
      "epoch": 2.6010224989035304,
      "grad_norm": 0.36713454127311707,
      "learning_rate": 4.829569789737948e-06,
      "loss": 0.0116,
      "step": 1589360
    },
    {
      "epoch": 2.6010552293421836,
      "grad_norm": 0.29881027340888977,
      "learning_rate": 4.8295038975244304e-06,
      "loss": 0.009,
      "step": 1589380
    },
    {
      "epoch": 2.6010879597808367,
      "grad_norm": 0.034719083458185196,
      "learning_rate": 4.829438005310913e-06,
      "loss": 0.0156,
      "step": 1589400
    },
    {
      "epoch": 2.6011206902194903,
      "grad_norm": 1.0491609573364258,
      "learning_rate": 4.829372113097396e-06,
      "loss": 0.0175,
      "step": 1589420
    },
    {
      "epoch": 2.601153420658144,
      "grad_norm": 0.5184223055839539,
      "learning_rate": 4.829306220883879e-06,
      "loss": 0.0143,
      "step": 1589440
    },
    {
      "epoch": 2.601186151096797,
      "grad_norm": 0.22172120213508606,
      "learning_rate": 4.829240328670361e-06,
      "loss": 0.0163,
      "step": 1589460
    },
    {
      "epoch": 2.6012188815354502,
      "grad_norm": 0.27627846598625183,
      "learning_rate": 4.829174436456844e-06,
      "loss": 0.0174,
      "step": 1589480
    },
    {
      "epoch": 2.601251611974104,
      "grad_norm": 0.4220644235610962,
      "learning_rate": 4.829108544243327e-06,
      "loss": 0.0176,
      "step": 1589500
    },
    {
      "epoch": 2.601284342412757,
      "grad_norm": 0.5416426658630371,
      "learning_rate": 4.8290426520298105e-06,
      "loss": 0.0178,
      "step": 1589520
    },
    {
      "epoch": 2.60131707285141,
      "grad_norm": 0.18396994471549988,
      "learning_rate": 4.828976759816293e-06,
      "loss": 0.0133,
      "step": 1589540
    },
    {
      "epoch": 2.6013498032900637,
      "grad_norm": 0.5053672194480896,
      "learning_rate": 4.828910867602776e-06,
      "loss": 0.0107,
      "step": 1589560
    },
    {
      "epoch": 2.6013825337287173,
      "grad_norm": 0.14422883093357086,
      "learning_rate": 4.828844975389259e-06,
      "loss": 0.0138,
      "step": 1589580
    },
    {
      "epoch": 2.6014152641673705,
      "grad_norm": 0.3497481346130371,
      "learning_rate": 4.828779083175741e-06,
      "loss": 0.0119,
      "step": 1589600
    },
    {
      "epoch": 2.6014479946060236,
      "grad_norm": 0.5573589205741882,
      "learning_rate": 4.828713190962224e-06,
      "loss": 0.0128,
      "step": 1589620
    },
    {
      "epoch": 2.601480725044677,
      "grad_norm": 0.8930413126945496,
      "learning_rate": 4.828647298748707e-06,
      "loss": 0.0145,
      "step": 1589640
    },
    {
      "epoch": 2.6015134554833304,
      "grad_norm": 0.1328599452972412,
      "learning_rate": 4.82858140653519e-06,
      "loss": 0.0149,
      "step": 1589660
    },
    {
      "epoch": 2.6015461859219835,
      "grad_norm": 0.5955067873001099,
      "learning_rate": 4.828515514321672e-06,
      "loss": 0.0128,
      "step": 1589680
    },
    {
      "epoch": 2.601578916360637,
      "grad_norm": 1.4863375425338745,
      "learning_rate": 4.828449622108156e-06,
      "loss": 0.0142,
      "step": 1589700
    },
    {
      "epoch": 2.6016116467992902,
      "grad_norm": 0.2363007366657257,
      "learning_rate": 4.828383729894639e-06,
      "loss": 0.0108,
      "step": 1589720
    },
    {
      "epoch": 2.601644377237944,
      "grad_norm": 0.9832965731620789,
      "learning_rate": 4.8283178376811214e-06,
      "loss": 0.0213,
      "step": 1589740
    },
    {
      "epoch": 2.601677107676597,
      "grad_norm": 0.5583295822143555,
      "learning_rate": 4.828251945467605e-06,
      "loss": 0.0138,
      "step": 1589760
    },
    {
      "epoch": 2.6017098381152506,
      "grad_norm": 0.1805655062198639,
      "learning_rate": 4.828186053254088e-06,
      "loss": 0.0115,
      "step": 1589780
    },
    {
      "epoch": 2.6017425685539037,
      "grad_norm": 0.08789981156587601,
      "learning_rate": 4.8281201610405705e-06,
      "loss": 0.015,
      "step": 1589800
    },
    {
      "epoch": 2.601775298992557,
      "grad_norm": 0.4369582533836365,
      "learning_rate": 4.828054268827053e-06,
      "loss": 0.0169,
      "step": 1589820
    },
    {
      "epoch": 2.6018080294312105,
      "grad_norm": 0.12686453759670258,
      "learning_rate": 4.827988376613536e-06,
      "loss": 0.0121,
      "step": 1589840
    },
    {
      "epoch": 2.6018407598698636,
      "grad_norm": 0.4919552803039551,
      "learning_rate": 4.827922484400019e-06,
      "loss": 0.0132,
      "step": 1589860
    },
    {
      "epoch": 2.6018734903085172,
      "grad_norm": 0.13694699108600616,
      "learning_rate": 4.8278565921865015e-06,
      "loss": 0.0116,
      "step": 1589880
    },
    {
      "epoch": 2.6019062207471704,
      "grad_norm": 0.4107709228992462,
      "learning_rate": 4.827790699972984e-06,
      "loss": 0.0135,
      "step": 1589900
    },
    {
      "epoch": 2.601938951185824,
      "grad_norm": 0.31539386510849,
      "learning_rate": 4.827724807759468e-06,
      "loss": 0.0163,
      "step": 1589920
    },
    {
      "epoch": 2.601971681624477,
      "grad_norm": 0.20526602864265442,
      "learning_rate": 4.8276589155459505e-06,
      "loss": 0.0235,
      "step": 1589940
    },
    {
      "epoch": 2.6020044120631303,
      "grad_norm": 0.3032350242137909,
      "learning_rate": 4.827593023332433e-06,
      "loss": 0.0118,
      "step": 1589960
    },
    {
      "epoch": 2.602037142501784,
      "grad_norm": 0.19105714559555054,
      "learning_rate": 4.827527131118916e-06,
      "loss": 0.0129,
      "step": 1589980
    },
    {
      "epoch": 2.602069872940437,
      "grad_norm": 1.3617165088653564,
      "learning_rate": 4.827461238905399e-06,
      "loss": 0.0127,
      "step": 1590000
    },
    {
      "epoch": 2.6021026033790906,
      "grad_norm": 0.15393780171871185,
      "learning_rate": 4.8273953466918815e-06,
      "loss": 0.0227,
      "step": 1590020
    },
    {
      "epoch": 2.6021353338177438,
      "grad_norm": 0.40521731972694397,
      "learning_rate": 4.827329454478364e-06,
      "loss": 0.0149,
      "step": 1590040
    },
    {
      "epoch": 2.6021680642563974,
      "grad_norm": 0.45823484659194946,
      "learning_rate": 4.827263562264848e-06,
      "loss": 0.013,
      "step": 1590060
    },
    {
      "epoch": 2.6022007946950505,
      "grad_norm": 0.1680421084165573,
      "learning_rate": 4.8271976700513306e-06,
      "loss": 0.0188,
      "step": 1590080
    },
    {
      "epoch": 2.6022335251337037,
      "grad_norm": 0.750515341758728,
      "learning_rate": 4.827131777837813e-06,
      "loss": 0.0171,
      "step": 1590100
    },
    {
      "epoch": 2.6022662555723572,
      "grad_norm": 0.5007653832435608,
      "learning_rate": 4.827065885624296e-06,
      "loss": 0.012,
      "step": 1590120
    },
    {
      "epoch": 2.6022989860110104,
      "grad_norm": 0.770087480545044,
      "learning_rate": 4.826999993410779e-06,
      "loss": 0.0207,
      "step": 1590140
    },
    {
      "epoch": 2.602331716449664,
      "grad_norm": 0.25583407282829285,
      "learning_rate": 4.826934101197262e-06,
      "loss": 0.0193,
      "step": 1590160
    },
    {
      "epoch": 2.602364446888317,
      "grad_norm": 1.0271800756454468,
      "learning_rate": 4.826868208983745e-06,
      "loss": 0.0134,
      "step": 1590180
    },
    {
      "epoch": 2.6023971773269707,
      "grad_norm": 0.36482948064804077,
      "learning_rate": 4.826802316770228e-06,
      "loss": 0.0161,
      "step": 1590200
    },
    {
      "epoch": 2.602429907765624,
      "grad_norm": 0.3601943850517273,
      "learning_rate": 4.826736424556711e-06,
      "loss": 0.0146,
      "step": 1590220
    },
    {
      "epoch": 2.602462638204277,
      "grad_norm": 0.15933121740818024,
      "learning_rate": 4.826670532343193e-06,
      "loss": 0.016,
      "step": 1590240
    },
    {
      "epoch": 2.6024953686429306,
      "grad_norm": 0.3686828315258026,
      "learning_rate": 4.826604640129676e-06,
      "loss": 0.0112,
      "step": 1590260
    },
    {
      "epoch": 2.602528099081584,
      "grad_norm": 0.34575480222702026,
      "learning_rate": 4.826538747916159e-06,
      "loss": 0.0142,
      "step": 1590280
    },
    {
      "epoch": 2.6025608295202374,
      "grad_norm": 0.449950248003006,
      "learning_rate": 4.8264728557026415e-06,
      "loss": 0.0117,
      "step": 1590300
    },
    {
      "epoch": 2.6025935599588905,
      "grad_norm": 0.15123039484024048,
      "learning_rate": 4.826406963489125e-06,
      "loss": 0.0111,
      "step": 1590320
    },
    {
      "epoch": 2.602626290397544,
      "grad_norm": 0.2392820566892624,
      "learning_rate": 4.826341071275608e-06,
      "loss": 0.02,
      "step": 1590340
    },
    {
      "epoch": 2.6026590208361973,
      "grad_norm": 0.6388562917709351,
      "learning_rate": 4.826275179062091e-06,
      "loss": 0.0147,
      "step": 1590360
    },
    {
      "epoch": 2.6026917512748504,
      "grad_norm": 0.26842427253723145,
      "learning_rate": 4.826209286848573e-06,
      "loss": 0.012,
      "step": 1590380
    },
    {
      "epoch": 2.602724481713504,
      "grad_norm": 0.12821519374847412,
      "learning_rate": 4.826143394635056e-06,
      "loss": 0.0151,
      "step": 1590400
    },
    {
      "epoch": 2.602757212152157,
      "grad_norm": 0.25670620799064636,
      "learning_rate": 4.82607750242154e-06,
      "loss": 0.0101,
      "step": 1590420
    },
    {
      "epoch": 2.6027899425908108,
      "grad_norm": 0.21428191661834717,
      "learning_rate": 4.826011610208022e-06,
      "loss": 0.013,
      "step": 1590440
    },
    {
      "epoch": 2.602822673029464,
      "grad_norm": 1.1661300659179688,
      "learning_rate": 4.825945717994505e-06,
      "loss": 0.0164,
      "step": 1590460
    },
    {
      "epoch": 2.6028554034681175,
      "grad_norm": 0.13957394659519196,
      "learning_rate": 4.825879825780988e-06,
      "loss": 0.012,
      "step": 1590480
    },
    {
      "epoch": 2.6028881339067707,
      "grad_norm": 0.6826591491699219,
      "learning_rate": 4.825813933567471e-06,
      "loss": 0.0125,
      "step": 1590500
    },
    {
      "epoch": 2.602920864345424,
      "grad_norm": 0.23152098059654236,
      "learning_rate": 4.825748041353953e-06,
      "loss": 0.0119,
      "step": 1590520
    },
    {
      "epoch": 2.6029535947840774,
      "grad_norm": 0.8874267339706421,
      "learning_rate": 4.825682149140436e-06,
      "loss": 0.0196,
      "step": 1590540
    },
    {
      "epoch": 2.6029863252227305,
      "grad_norm": 0.2746516466140747,
      "learning_rate": 4.82561625692692e-06,
      "loss": 0.0166,
      "step": 1590560
    },
    {
      "epoch": 2.603019055661384,
      "grad_norm": 0.4570138156414032,
      "learning_rate": 4.8255503647134024e-06,
      "loss": 0.0203,
      "step": 1590580
    },
    {
      "epoch": 2.6030517861000373,
      "grad_norm": 0.07321364432573318,
      "learning_rate": 4.825484472499885e-06,
      "loss": 0.014,
      "step": 1590600
    },
    {
      "epoch": 2.603084516538691,
      "grad_norm": 0.2074078768491745,
      "learning_rate": 4.825418580286368e-06,
      "loss": 0.0131,
      "step": 1590620
    },
    {
      "epoch": 2.603117246977344,
      "grad_norm": 0.2171785831451416,
      "learning_rate": 4.825352688072851e-06,
      "loss": 0.0124,
      "step": 1590640
    },
    {
      "epoch": 2.603149977415997,
      "grad_norm": 0.09485690295696259,
      "learning_rate": 4.825286795859333e-06,
      "loss": 0.012,
      "step": 1590660
    },
    {
      "epoch": 2.603182707854651,
      "grad_norm": 0.28819817304611206,
      "learning_rate": 4.825220903645816e-06,
      "loss": 0.0136,
      "step": 1590680
    },
    {
      "epoch": 2.603215438293304,
      "grad_norm": 0.5843617916107178,
      "learning_rate": 4.825155011432299e-06,
      "loss": 0.0177,
      "step": 1590700
    },
    {
      "epoch": 2.603248168731957,
      "grad_norm": 0.4592629075050354,
      "learning_rate": 4.8250891192187825e-06,
      "loss": 0.0142,
      "step": 1590720
    },
    {
      "epoch": 2.6032808991706107,
      "grad_norm": 0.15330944955348969,
      "learning_rate": 4.825023227005265e-06,
      "loss": 0.0208,
      "step": 1590740
    },
    {
      "epoch": 2.6033136296092643,
      "grad_norm": 0.4668559730052948,
      "learning_rate": 4.824957334791748e-06,
      "loss": 0.0169,
      "step": 1590760
    },
    {
      "epoch": 2.6033463600479174,
      "grad_norm": 0.22762785851955414,
      "learning_rate": 4.8248914425782315e-06,
      "loss": 0.0133,
      "step": 1590780
    },
    {
      "epoch": 2.6033790904865706,
      "grad_norm": 0.37790003418922424,
      "learning_rate": 4.824825550364714e-06,
      "loss": 0.0142,
      "step": 1590800
    },
    {
      "epoch": 2.603411820925224,
      "grad_norm": 0.179928719997406,
      "learning_rate": 4.824759658151197e-06,
      "loss": 0.0148,
      "step": 1590820
    },
    {
      "epoch": 2.6034445513638773,
      "grad_norm": 0.16654503345489502,
      "learning_rate": 4.82469376593768e-06,
      "loss": 0.0128,
      "step": 1590840
    },
    {
      "epoch": 2.6034772818025305,
      "grad_norm": 0.9019047021865845,
      "learning_rate": 4.8246278737241625e-06,
      "loss": 0.017,
      "step": 1590860
    },
    {
      "epoch": 2.603510012241184,
      "grad_norm": 0.5538470149040222,
      "learning_rate": 4.824561981510645e-06,
      "loss": 0.0153,
      "step": 1590880
    },
    {
      "epoch": 2.6035427426798377,
      "grad_norm": 0.3291475772857666,
      "learning_rate": 4.824496089297128e-06,
      "loss": 0.0104,
      "step": 1590900
    },
    {
      "epoch": 2.603575473118491,
      "grad_norm": 0.13054881989955902,
      "learning_rate": 4.824430197083611e-06,
      "loss": 0.0156,
      "step": 1590920
    },
    {
      "epoch": 2.603608203557144,
      "grad_norm": 0.6819355487823486,
      "learning_rate": 4.824364304870094e-06,
      "loss": 0.013,
      "step": 1590940
    },
    {
      "epoch": 2.6036409339957975,
      "grad_norm": 0.4689731299877167,
      "learning_rate": 4.824298412656577e-06,
      "loss": 0.0176,
      "step": 1590960
    },
    {
      "epoch": 2.6036736644344507,
      "grad_norm": 0.13924817740917206,
      "learning_rate": 4.82423252044306e-06,
      "loss": 0.0295,
      "step": 1590980
    },
    {
      "epoch": 2.603706394873104,
      "grad_norm": 0.18976886570453644,
      "learning_rate": 4.8241666282295425e-06,
      "loss": 0.0181,
      "step": 1591000
    },
    {
      "epoch": 2.6037391253117574,
      "grad_norm": 0.05406946316361427,
      "learning_rate": 4.824100736016025e-06,
      "loss": 0.015,
      "step": 1591020
    },
    {
      "epoch": 2.603771855750411,
      "grad_norm": 0.21778324246406555,
      "learning_rate": 4.824034843802508e-06,
      "loss": 0.0131,
      "step": 1591040
    },
    {
      "epoch": 2.603804586189064,
      "grad_norm": 0.5154920220375061,
      "learning_rate": 4.823968951588991e-06,
      "loss": 0.0124,
      "step": 1591060
    },
    {
      "epoch": 2.6038373166277173,
      "grad_norm": 0.12549547851085663,
      "learning_rate": 4.8239030593754735e-06,
      "loss": 0.0133,
      "step": 1591080
    },
    {
      "epoch": 2.603870047066371,
      "grad_norm": 1.1179819107055664,
      "learning_rate": 4.823837167161956e-06,
      "loss": 0.0165,
      "step": 1591100
    },
    {
      "epoch": 2.603902777505024,
      "grad_norm": 0.23333311080932617,
      "learning_rate": 4.82377127494844e-06,
      "loss": 0.0177,
      "step": 1591120
    },
    {
      "epoch": 2.6039355079436772,
      "grad_norm": 0.2833597660064697,
      "learning_rate": 4.8237053827349225e-06,
      "loss": 0.0167,
      "step": 1591140
    },
    {
      "epoch": 2.603968238382331,
      "grad_norm": 0.3592844605445862,
      "learning_rate": 4.823639490521405e-06,
      "loss": 0.0143,
      "step": 1591160
    },
    {
      "epoch": 2.604000968820984,
      "grad_norm": 0.2185084968805313,
      "learning_rate": 4.823573598307889e-06,
      "loss": 0.0141,
      "step": 1591180
    },
    {
      "epoch": 2.6040336992596376,
      "grad_norm": 0.13582302629947662,
      "learning_rate": 4.823507706094372e-06,
      "loss": 0.0163,
      "step": 1591200
    },
    {
      "epoch": 2.6040664296982907,
      "grad_norm": 0.45791560411453247,
      "learning_rate": 4.823441813880854e-06,
      "loss": 0.018,
      "step": 1591220
    },
    {
      "epoch": 2.6040991601369443,
      "grad_norm": 0.09464667737483978,
      "learning_rate": 4.823375921667337e-06,
      "loss": 0.0062,
      "step": 1591240
    },
    {
      "epoch": 2.6041318905755975,
      "grad_norm": 0.2782120704650879,
      "learning_rate": 4.82331002945382e-06,
      "loss": 0.0127,
      "step": 1591260
    },
    {
      "epoch": 2.6041646210142506,
      "grad_norm": 0.3447296917438507,
      "learning_rate": 4.8232441372403026e-06,
      "loss": 0.0083,
      "step": 1591280
    },
    {
      "epoch": 2.604197351452904,
      "grad_norm": 0.24644005298614502,
      "learning_rate": 4.823178245026785e-06,
      "loss": 0.0129,
      "step": 1591300
    },
    {
      "epoch": 2.6042300818915574,
      "grad_norm": 0.7035235166549683,
      "learning_rate": 4.823112352813268e-06,
      "loss": 0.0172,
      "step": 1591320
    },
    {
      "epoch": 2.604262812330211,
      "grad_norm": 0.47322699427604675,
      "learning_rate": 4.823046460599752e-06,
      "loss": 0.012,
      "step": 1591340
    },
    {
      "epoch": 2.604295542768864,
      "grad_norm": 0.5735477209091187,
      "learning_rate": 4.822980568386234e-06,
      "loss": 0.0248,
      "step": 1591360
    },
    {
      "epoch": 2.6043282732075177,
      "grad_norm": 0.49877291917800903,
      "learning_rate": 4.822914676172717e-06,
      "loss": 0.0154,
      "step": 1591380
    },
    {
      "epoch": 2.604361003646171,
      "grad_norm": 0.5793160796165466,
      "learning_rate": 4.8228487839592e-06,
      "loss": 0.0138,
      "step": 1591400
    },
    {
      "epoch": 2.604393734084824,
      "grad_norm": 0.10155268013477325,
      "learning_rate": 4.822782891745683e-06,
      "loss": 0.0186,
      "step": 1591420
    },
    {
      "epoch": 2.6044264645234776,
      "grad_norm": 0.28169283270835876,
      "learning_rate": 4.822716999532165e-06,
      "loss": 0.0115,
      "step": 1591440
    },
    {
      "epoch": 2.6044591949621307,
      "grad_norm": 1.739966630935669,
      "learning_rate": 4.822651107318648e-06,
      "loss": 0.0161,
      "step": 1591460
    },
    {
      "epoch": 2.6044919254007843,
      "grad_norm": 0.2995277941226959,
      "learning_rate": 4.822585215105132e-06,
      "loss": 0.0189,
      "step": 1591480
    },
    {
      "epoch": 2.6045246558394375,
      "grad_norm": 0.35471901297569275,
      "learning_rate": 4.822519322891614e-06,
      "loss": 0.0145,
      "step": 1591500
    },
    {
      "epoch": 2.604557386278091,
      "grad_norm": 0.4682686924934387,
      "learning_rate": 4.822453430678097e-06,
      "loss": 0.0172,
      "step": 1591520
    },
    {
      "epoch": 2.6045901167167442,
      "grad_norm": 0.32483747601509094,
      "learning_rate": 4.82238753846458e-06,
      "loss": 0.0148,
      "step": 1591540
    },
    {
      "epoch": 2.6046228471553974,
      "grad_norm": 0.20373089611530304,
      "learning_rate": 4.822321646251063e-06,
      "loss": 0.0171,
      "step": 1591560
    },
    {
      "epoch": 2.604655577594051,
      "grad_norm": 0.49074602127075195,
      "learning_rate": 4.822255754037546e-06,
      "loss": 0.0143,
      "step": 1591580
    },
    {
      "epoch": 2.604688308032704,
      "grad_norm": 0.23094680905342102,
      "learning_rate": 4.822189861824029e-06,
      "loss": 0.0122,
      "step": 1591600
    },
    {
      "epoch": 2.6047210384713577,
      "grad_norm": 0.2107599675655365,
      "learning_rate": 4.822123969610512e-06,
      "loss": 0.0145,
      "step": 1591620
    },
    {
      "epoch": 2.604753768910011,
      "grad_norm": 0.11879490315914154,
      "learning_rate": 4.822058077396994e-06,
      "loss": 0.0108,
      "step": 1591640
    },
    {
      "epoch": 2.6047864993486645,
      "grad_norm": 0.2706460952758789,
      "learning_rate": 4.821992185183477e-06,
      "loss": 0.0121,
      "step": 1591660
    },
    {
      "epoch": 2.6048192297873176,
      "grad_norm": 0.5439354181289673,
      "learning_rate": 4.82192629296996e-06,
      "loss": 0.017,
      "step": 1591680
    },
    {
      "epoch": 2.6048519602259708,
      "grad_norm": 0.1215277686715126,
      "learning_rate": 4.821860400756443e-06,
      "loss": 0.0183,
      "step": 1591700
    },
    {
      "epoch": 2.6048846906646244,
      "grad_norm": 0.49414122104644775,
      "learning_rate": 4.821794508542925e-06,
      "loss": 0.0115,
      "step": 1591720
    },
    {
      "epoch": 2.6049174211032775,
      "grad_norm": 0.34349754452705383,
      "learning_rate": 4.821728616329409e-06,
      "loss": 0.0083,
      "step": 1591740
    },
    {
      "epoch": 2.604950151541931,
      "grad_norm": 0.4192127287387848,
      "learning_rate": 4.821662724115892e-06,
      "loss": 0.0104,
      "step": 1591760
    },
    {
      "epoch": 2.6049828819805843,
      "grad_norm": 0.44895562529563904,
      "learning_rate": 4.8215968319023744e-06,
      "loss": 0.0109,
      "step": 1591780
    },
    {
      "epoch": 2.605015612419238,
      "grad_norm": 0.2669801115989685,
      "learning_rate": 4.821530939688857e-06,
      "loss": 0.0091,
      "step": 1591800
    },
    {
      "epoch": 2.605048342857891,
      "grad_norm": 0.24502752721309662,
      "learning_rate": 4.821465047475341e-06,
      "loss": 0.0126,
      "step": 1591820
    },
    {
      "epoch": 2.605081073296544,
      "grad_norm": 0.23020336031913757,
      "learning_rate": 4.8213991552618235e-06,
      "loss": 0.0165,
      "step": 1591840
    },
    {
      "epoch": 2.6051138037351977,
      "grad_norm": 0.6494467258453369,
      "learning_rate": 4.821333263048306e-06,
      "loss": 0.0207,
      "step": 1591860
    },
    {
      "epoch": 2.605146534173851,
      "grad_norm": 0.17856000363826752,
      "learning_rate": 4.821267370834789e-06,
      "loss": 0.0138,
      "step": 1591880
    },
    {
      "epoch": 2.6051792646125045,
      "grad_norm": 0.42754676938056946,
      "learning_rate": 4.821201478621272e-06,
      "loss": 0.0216,
      "step": 1591900
    },
    {
      "epoch": 2.6052119950511576,
      "grad_norm": 0.2793259918689728,
      "learning_rate": 4.8211355864077545e-06,
      "loss": 0.0153,
      "step": 1591920
    },
    {
      "epoch": 2.6052447254898112,
      "grad_norm": 0.5671834945678711,
      "learning_rate": 4.821069694194237e-06,
      "loss": 0.0155,
      "step": 1591940
    },
    {
      "epoch": 2.6052774559284644,
      "grad_norm": 0.19860869646072388,
      "learning_rate": 4.821003801980721e-06,
      "loss": 0.0144,
      "step": 1591960
    },
    {
      "epoch": 2.6053101863671175,
      "grad_norm": 0.3701761066913605,
      "learning_rate": 4.8209379097672035e-06,
      "loss": 0.0224,
      "step": 1591980
    },
    {
      "epoch": 2.605342916805771,
      "grad_norm": 0.7307258248329163,
      "learning_rate": 4.820872017553686e-06,
      "loss": 0.0202,
      "step": 1592000
    },
    {
      "epoch": 2.6053756472444243,
      "grad_norm": 0.23450453579425812,
      "learning_rate": 4.820806125340169e-06,
      "loss": 0.0143,
      "step": 1592020
    },
    {
      "epoch": 2.605408377683078,
      "grad_norm": 0.5913766026496887,
      "learning_rate": 4.820740233126652e-06,
      "loss": 0.0135,
      "step": 1592040
    },
    {
      "epoch": 2.605441108121731,
      "grad_norm": 0.3010466396808624,
      "learning_rate": 4.8206743409131345e-06,
      "loss": 0.0188,
      "step": 1592060
    },
    {
      "epoch": 2.6054738385603846,
      "grad_norm": 0.5209710001945496,
      "learning_rate": 4.820608448699617e-06,
      "loss": 0.014,
      "step": 1592080
    },
    {
      "epoch": 2.6055065689990378,
      "grad_norm": 0.6305111646652222,
      "learning_rate": 4.8205425564861e-06,
      "loss": 0.0158,
      "step": 1592100
    },
    {
      "epoch": 2.605539299437691,
      "grad_norm": 0.07991117238998413,
      "learning_rate": 4.820476664272583e-06,
      "loss": 0.0105,
      "step": 1592120
    },
    {
      "epoch": 2.6055720298763445,
      "grad_norm": 1.0518550872802734,
      "learning_rate": 4.820410772059066e-06,
      "loss": 0.011,
      "step": 1592140
    },
    {
      "epoch": 2.6056047603149977,
      "grad_norm": 0.58377605676651,
      "learning_rate": 4.820344879845549e-06,
      "loss": 0.0248,
      "step": 1592160
    },
    {
      "epoch": 2.605637490753651,
      "grad_norm": 0.39178040623664856,
      "learning_rate": 4.820278987632032e-06,
      "loss": 0.0199,
      "step": 1592180
    },
    {
      "epoch": 2.6056702211923044,
      "grad_norm": 0.5350446105003357,
      "learning_rate": 4.820213095418515e-06,
      "loss": 0.0256,
      "step": 1592200
    },
    {
      "epoch": 2.605702951630958,
      "grad_norm": 0.2110310047864914,
      "learning_rate": 4.820147203204998e-06,
      "loss": 0.0152,
      "step": 1592220
    },
    {
      "epoch": 2.605735682069611,
      "grad_norm": 0.5027494430541992,
      "learning_rate": 4.820081310991481e-06,
      "loss": 0.0167,
      "step": 1592240
    },
    {
      "epoch": 2.6057684125082643,
      "grad_norm": 0.3565395474433899,
      "learning_rate": 4.820015418777964e-06,
      "loss": 0.0098,
      "step": 1592260
    },
    {
      "epoch": 2.605801142946918,
      "grad_norm": 0.09489909559488297,
      "learning_rate": 4.819949526564446e-06,
      "loss": 0.0186,
      "step": 1592280
    },
    {
      "epoch": 2.605833873385571,
      "grad_norm": 0.37824493646621704,
      "learning_rate": 4.819883634350929e-06,
      "loss": 0.0136,
      "step": 1592300
    },
    {
      "epoch": 2.605866603824224,
      "grad_norm": 1.0280330181121826,
      "learning_rate": 4.819817742137412e-06,
      "loss": 0.0147,
      "step": 1592320
    },
    {
      "epoch": 2.605899334262878,
      "grad_norm": 0.47237205505371094,
      "learning_rate": 4.8197518499238945e-06,
      "loss": 0.0132,
      "step": 1592340
    },
    {
      "epoch": 2.6059320647015314,
      "grad_norm": 0.2289937287569046,
      "learning_rate": 4.819685957710378e-06,
      "loss": 0.0114,
      "step": 1592360
    },
    {
      "epoch": 2.6059647951401845,
      "grad_norm": 1.127549171447754,
      "learning_rate": 4.819620065496861e-06,
      "loss": 0.0109,
      "step": 1592380
    },
    {
      "epoch": 2.6059975255788377,
      "grad_norm": 0.19091224670410156,
      "learning_rate": 4.819554173283344e-06,
      "loss": 0.0151,
      "step": 1592400
    },
    {
      "epoch": 2.6060302560174913,
      "grad_norm": 0.25367629528045654,
      "learning_rate": 4.819488281069826e-06,
      "loss": 0.0137,
      "step": 1592420
    },
    {
      "epoch": 2.6060629864561444,
      "grad_norm": 0.43486058712005615,
      "learning_rate": 4.819422388856309e-06,
      "loss": 0.0087,
      "step": 1592440
    },
    {
      "epoch": 2.6060957168947976,
      "grad_norm": 0.7391338348388672,
      "learning_rate": 4.819356496642792e-06,
      "loss": 0.0156,
      "step": 1592460
    },
    {
      "epoch": 2.606128447333451,
      "grad_norm": 0.663775622844696,
      "learning_rate": 4.8192906044292746e-06,
      "loss": 0.0151,
      "step": 1592480
    },
    {
      "epoch": 2.6061611777721048,
      "grad_norm": 0.7819733619689941,
      "learning_rate": 4.819224712215757e-06,
      "loss": 0.0155,
      "step": 1592500
    },
    {
      "epoch": 2.606193908210758,
      "grad_norm": 0.3112187683582306,
      "learning_rate": 4.819158820002241e-06,
      "loss": 0.0177,
      "step": 1592520
    },
    {
      "epoch": 2.606226638649411,
      "grad_norm": 0.20003792643547058,
      "learning_rate": 4.819092927788724e-06,
      "loss": 0.0133,
      "step": 1592540
    },
    {
      "epoch": 2.6062593690880647,
      "grad_norm": 0.33907389640808105,
      "learning_rate": 4.819027035575206e-06,
      "loss": 0.007,
      "step": 1592560
    },
    {
      "epoch": 2.606292099526718,
      "grad_norm": 0.22779618203639984,
      "learning_rate": 4.818961143361689e-06,
      "loss": 0.0184,
      "step": 1592580
    },
    {
      "epoch": 2.606324829965371,
      "grad_norm": 0.09021657705307007,
      "learning_rate": 4.818895251148173e-06,
      "loss": 0.0105,
      "step": 1592600
    },
    {
      "epoch": 2.6063575604040246,
      "grad_norm": 0.4693780243396759,
      "learning_rate": 4.8188293589346554e-06,
      "loss": 0.0138,
      "step": 1592620
    },
    {
      "epoch": 2.606390290842678,
      "grad_norm": 0.3181837797164917,
      "learning_rate": 4.818763466721138e-06,
      "loss": 0.0134,
      "step": 1592640
    },
    {
      "epoch": 2.6064230212813313,
      "grad_norm": 0.17053747177124023,
      "learning_rate": 4.818697574507621e-06,
      "loss": 0.0179,
      "step": 1592660
    },
    {
      "epoch": 2.6064557517199844,
      "grad_norm": 0.27003365755081177,
      "learning_rate": 4.818631682294104e-06,
      "loss": 0.015,
      "step": 1592680
    },
    {
      "epoch": 2.606488482158638,
      "grad_norm": 0.5199468731880188,
      "learning_rate": 4.818565790080586e-06,
      "loss": 0.0141,
      "step": 1592700
    },
    {
      "epoch": 2.606521212597291,
      "grad_norm": 0.1906600147485733,
      "learning_rate": 4.818499897867069e-06,
      "loss": 0.0097,
      "step": 1592720
    },
    {
      "epoch": 2.6065539430359443,
      "grad_norm": 0.13326898217201233,
      "learning_rate": 4.818434005653552e-06,
      "loss": 0.0157,
      "step": 1592740
    },
    {
      "epoch": 2.606586673474598,
      "grad_norm": 0.2222796529531479,
      "learning_rate": 4.8183681134400355e-06,
      "loss": 0.0128,
      "step": 1592760
    },
    {
      "epoch": 2.606619403913251,
      "grad_norm": 1.323456883430481,
      "learning_rate": 4.818302221226518e-06,
      "loss": 0.0133,
      "step": 1592780
    },
    {
      "epoch": 2.6066521343519047,
      "grad_norm": 0.1748906821012497,
      "learning_rate": 4.818236329013001e-06,
      "loss": 0.0217,
      "step": 1592800
    },
    {
      "epoch": 2.606684864790558,
      "grad_norm": 0.391463965177536,
      "learning_rate": 4.818170436799484e-06,
      "loss": 0.0147,
      "step": 1592820
    },
    {
      "epoch": 2.6067175952292114,
      "grad_norm": 0.26975730061531067,
      "learning_rate": 4.8181045445859664e-06,
      "loss": 0.012,
      "step": 1592840
    },
    {
      "epoch": 2.6067503256678646,
      "grad_norm": 0.396421879529953,
      "learning_rate": 4.818038652372449e-06,
      "loss": 0.0212,
      "step": 1592860
    },
    {
      "epoch": 2.6067830561065177,
      "grad_norm": 0.08358609676361084,
      "learning_rate": 4.817972760158933e-06,
      "loss": 0.0128,
      "step": 1592880
    },
    {
      "epoch": 2.6068157865451713,
      "grad_norm": 0.32953080534935,
      "learning_rate": 4.8179068679454155e-06,
      "loss": 0.0148,
      "step": 1592900
    },
    {
      "epoch": 2.6068485169838245,
      "grad_norm": 0.13365399837493896,
      "learning_rate": 4.817840975731898e-06,
      "loss": 0.014,
      "step": 1592920
    },
    {
      "epoch": 2.606881247422478,
      "grad_norm": 0.5112720727920532,
      "learning_rate": 4.817775083518381e-06,
      "loss": 0.0139,
      "step": 1592940
    },
    {
      "epoch": 2.606913977861131,
      "grad_norm": 0.20852935314178467,
      "learning_rate": 4.817709191304864e-06,
      "loss": 0.012,
      "step": 1592960
    },
    {
      "epoch": 2.606946708299785,
      "grad_norm": 0.10993712395429611,
      "learning_rate": 4.8176432990913465e-06,
      "loss": 0.0115,
      "step": 1592980
    },
    {
      "epoch": 2.606979438738438,
      "grad_norm": 0.5508292317390442,
      "learning_rate": 4.81757740687783e-06,
      "loss": 0.0133,
      "step": 1593000
    },
    {
      "epoch": 2.607012169177091,
      "grad_norm": 0.2079717218875885,
      "learning_rate": 4.817511514664313e-06,
      "loss": 0.0096,
      "step": 1593020
    },
    {
      "epoch": 2.6070448996157447,
      "grad_norm": 0.2879669666290283,
      "learning_rate": 4.8174456224507955e-06,
      "loss": 0.0162,
      "step": 1593040
    },
    {
      "epoch": 2.607077630054398,
      "grad_norm": 0.3891911804676056,
      "learning_rate": 4.817379730237278e-06,
      "loss": 0.016,
      "step": 1593060
    },
    {
      "epoch": 2.6071103604930514,
      "grad_norm": 0.7128995656967163,
      "learning_rate": 4.817313838023761e-06,
      "loss": 0.018,
      "step": 1593080
    },
    {
      "epoch": 2.6071430909317046,
      "grad_norm": 0.8433089256286621,
      "learning_rate": 4.817247945810244e-06,
      "loss": 0.021,
      "step": 1593100
    },
    {
      "epoch": 2.607175821370358,
      "grad_norm": 0.5773334503173828,
      "learning_rate": 4.8171820535967265e-06,
      "loss": 0.0081,
      "step": 1593120
    },
    {
      "epoch": 2.6072085518090113,
      "grad_norm": 0.21028071641921997,
      "learning_rate": 4.817116161383209e-06,
      "loss": 0.014,
      "step": 1593140
    },
    {
      "epoch": 2.6072412822476645,
      "grad_norm": 0.5797403454780579,
      "learning_rate": 4.817050269169693e-06,
      "loss": 0.0152,
      "step": 1593160
    },
    {
      "epoch": 2.607274012686318,
      "grad_norm": 0.16911058127880096,
      "learning_rate": 4.8169843769561755e-06,
      "loss": 0.0148,
      "step": 1593180
    },
    {
      "epoch": 2.6073067431249712,
      "grad_norm": 0.3146022856235504,
      "learning_rate": 4.816918484742658e-06,
      "loss": 0.0093,
      "step": 1593200
    },
    {
      "epoch": 2.607339473563625,
      "grad_norm": 0.08584564924240112,
      "learning_rate": 4.816852592529142e-06,
      "loss": 0.0166,
      "step": 1593220
    },
    {
      "epoch": 2.607372204002278,
      "grad_norm": 0.23781728744506836,
      "learning_rate": 4.816786700315625e-06,
      "loss": 0.0117,
      "step": 1593240
    },
    {
      "epoch": 2.6074049344409316,
      "grad_norm": 0.081807941198349,
      "learning_rate": 4.816720808102107e-06,
      "loss": 0.0123,
      "step": 1593260
    },
    {
      "epoch": 2.6074376648795847,
      "grad_norm": 0.12196628004312515,
      "learning_rate": 4.81665491588859e-06,
      "loss": 0.0138,
      "step": 1593280
    },
    {
      "epoch": 2.607470395318238,
      "grad_norm": 0.4400158226490021,
      "learning_rate": 4.816589023675073e-06,
      "loss": 0.0164,
      "step": 1593300
    },
    {
      "epoch": 2.6075031257568915,
      "grad_norm": 0.3321593701839447,
      "learning_rate": 4.8165231314615556e-06,
      "loss": 0.0139,
      "step": 1593320
    },
    {
      "epoch": 2.6075358561955446,
      "grad_norm": 0.2964411675930023,
      "learning_rate": 4.816457239248038e-06,
      "loss": 0.0156,
      "step": 1593340
    },
    {
      "epoch": 2.607568586634198,
      "grad_norm": 0.4503951668739319,
      "learning_rate": 4.816391347034521e-06,
      "loss": 0.0181,
      "step": 1593360
    },
    {
      "epoch": 2.6076013170728514,
      "grad_norm": 0.260765016078949,
      "learning_rate": 4.816325454821005e-06,
      "loss": 0.01,
      "step": 1593380
    },
    {
      "epoch": 2.607634047511505,
      "grad_norm": 0.14062662422657013,
      "learning_rate": 4.816259562607487e-06,
      "loss": 0.0117,
      "step": 1593400
    },
    {
      "epoch": 2.607666777950158,
      "grad_norm": 0.29593196511268616,
      "learning_rate": 4.81619367039397e-06,
      "loss": 0.0157,
      "step": 1593420
    },
    {
      "epoch": 2.6076995083888113,
      "grad_norm": 0.3598625659942627,
      "learning_rate": 4.816127778180453e-06,
      "loss": 0.0142,
      "step": 1593440
    },
    {
      "epoch": 2.607732238827465,
      "grad_norm": 0.7100847959518433,
      "learning_rate": 4.816061885966936e-06,
      "loss": 0.0119,
      "step": 1593460
    },
    {
      "epoch": 2.607764969266118,
      "grad_norm": 0.5412764549255371,
      "learning_rate": 4.815995993753418e-06,
      "loss": 0.0119,
      "step": 1593480
    },
    {
      "epoch": 2.6077976997047716,
      "grad_norm": 0.2561282217502594,
      "learning_rate": 4.815930101539901e-06,
      "loss": 0.0103,
      "step": 1593500
    },
    {
      "epoch": 2.6078304301434247,
      "grad_norm": 0.5465226173400879,
      "learning_rate": 4.815864209326384e-06,
      "loss": 0.0144,
      "step": 1593520
    },
    {
      "epoch": 2.6078631605820783,
      "grad_norm": 0.467372328042984,
      "learning_rate": 4.8157983171128666e-06,
      "loss": 0.014,
      "step": 1593540
    },
    {
      "epoch": 2.6078958910207315,
      "grad_norm": 0.17135460674762726,
      "learning_rate": 4.81573242489935e-06,
      "loss": 0.0114,
      "step": 1593560
    },
    {
      "epoch": 2.6079286214593846,
      "grad_norm": 0.05693046376109123,
      "learning_rate": 4.815666532685833e-06,
      "loss": 0.014,
      "step": 1593580
    },
    {
      "epoch": 2.6079613518980382,
      "grad_norm": 0.4982088506221771,
      "learning_rate": 4.815600640472316e-06,
      "loss": 0.0177,
      "step": 1593600
    },
    {
      "epoch": 2.6079940823366914,
      "grad_norm": 0.18816904723644257,
      "learning_rate": 4.815534748258799e-06,
      "loss": 0.0196,
      "step": 1593620
    },
    {
      "epoch": 2.6080268127753445,
      "grad_norm": 0.12391546368598938,
      "learning_rate": 4.815468856045282e-06,
      "loss": 0.0118,
      "step": 1593640
    },
    {
      "epoch": 2.608059543213998,
      "grad_norm": 0.22652064263820648,
      "learning_rate": 4.815402963831765e-06,
      "loss": 0.0103,
      "step": 1593660
    },
    {
      "epoch": 2.6080922736526517,
      "grad_norm": 0.8012819886207581,
      "learning_rate": 4.8153370716182474e-06,
      "loss": 0.0163,
      "step": 1593680
    },
    {
      "epoch": 2.608125004091305,
      "grad_norm": 0.19247078895568848,
      "learning_rate": 4.81527117940473e-06,
      "loss": 0.0116,
      "step": 1593700
    },
    {
      "epoch": 2.608157734529958,
      "grad_norm": 0.5424181818962097,
      "learning_rate": 4.815205287191213e-06,
      "loss": 0.0113,
      "step": 1593720
    },
    {
      "epoch": 2.6081904649686116,
      "grad_norm": 0.22321385145187378,
      "learning_rate": 4.815139394977696e-06,
      "loss": 0.0132,
      "step": 1593740
    },
    {
      "epoch": 2.6082231954072648,
      "grad_norm": 0.34701547026634216,
      "learning_rate": 4.815073502764178e-06,
      "loss": 0.0115,
      "step": 1593760
    },
    {
      "epoch": 2.608255925845918,
      "grad_norm": 0.20325785875320435,
      "learning_rate": 4.815007610550662e-06,
      "loss": 0.0161,
      "step": 1593780
    },
    {
      "epoch": 2.6082886562845715,
      "grad_norm": 0.16943404078483582,
      "learning_rate": 4.814941718337145e-06,
      "loss": 0.0196,
      "step": 1593800
    },
    {
      "epoch": 2.608321386723225,
      "grad_norm": 0.31221944093704224,
      "learning_rate": 4.8148758261236275e-06,
      "loss": 0.0123,
      "step": 1593820
    },
    {
      "epoch": 2.6083541171618783,
      "grad_norm": 0.24458125233650208,
      "learning_rate": 4.81480993391011e-06,
      "loss": 0.0146,
      "step": 1593840
    },
    {
      "epoch": 2.6083868476005314,
      "grad_norm": 0.8360055685043335,
      "learning_rate": 4.814744041696593e-06,
      "loss": 0.0106,
      "step": 1593860
    },
    {
      "epoch": 2.608419578039185,
      "grad_norm": 0.9671855568885803,
      "learning_rate": 4.814678149483076e-06,
      "loss": 0.0113,
      "step": 1593880
    },
    {
      "epoch": 2.608452308477838,
      "grad_norm": 0.4048234522342682,
      "learning_rate": 4.814612257269558e-06,
      "loss": 0.018,
      "step": 1593900
    },
    {
      "epoch": 2.6084850389164913,
      "grad_norm": 0.6064451932907104,
      "learning_rate": 4.814546365056041e-06,
      "loss": 0.0126,
      "step": 1593920
    },
    {
      "epoch": 2.608517769355145,
      "grad_norm": 0.5503160357475281,
      "learning_rate": 4.814480472842525e-06,
      "loss": 0.0205,
      "step": 1593940
    },
    {
      "epoch": 2.6085504997937985,
      "grad_norm": 0.2840038239955902,
      "learning_rate": 4.8144145806290075e-06,
      "loss": 0.0118,
      "step": 1593960
    },
    {
      "epoch": 2.6085832302324516,
      "grad_norm": 0.3354654610157013,
      "learning_rate": 4.81434868841549e-06,
      "loss": 0.0138,
      "step": 1593980
    },
    {
      "epoch": 2.608615960671105,
      "grad_norm": 0.6335347294807434,
      "learning_rate": 4.814282796201973e-06,
      "loss": 0.0163,
      "step": 1594000
    },
    {
      "epoch": 2.6086486911097584,
      "grad_norm": 0.1634681522846222,
      "learning_rate": 4.8142169039884565e-06,
      "loss": 0.0116,
      "step": 1594020
    },
    {
      "epoch": 2.6086814215484115,
      "grad_norm": 0.452305406332016,
      "learning_rate": 4.814151011774939e-06,
      "loss": 0.0235,
      "step": 1594040
    },
    {
      "epoch": 2.6087141519870647,
      "grad_norm": 0.10919322073459625,
      "learning_rate": 4.814085119561422e-06,
      "loss": 0.0139,
      "step": 1594060
    },
    {
      "epoch": 2.6087468824257183,
      "grad_norm": 0.18137326836585999,
      "learning_rate": 4.814019227347905e-06,
      "loss": 0.0114,
      "step": 1594080
    },
    {
      "epoch": 2.608779612864372,
      "grad_norm": 0.5661206841468811,
      "learning_rate": 4.8139533351343875e-06,
      "loss": 0.0117,
      "step": 1594100
    },
    {
      "epoch": 2.608812343303025,
      "grad_norm": 0.23563456535339355,
      "learning_rate": 4.81388744292087e-06,
      "loss": 0.0147,
      "step": 1594120
    },
    {
      "epoch": 2.608845073741678,
      "grad_norm": 0.8354123830795288,
      "learning_rate": 4.813821550707353e-06,
      "loss": 0.0196,
      "step": 1594140
    },
    {
      "epoch": 2.6088778041803318,
      "grad_norm": 0.11283266544342041,
      "learning_rate": 4.813755658493836e-06,
      "loss": 0.0184,
      "step": 1594160
    },
    {
      "epoch": 2.608910534618985,
      "grad_norm": 0.40511059761047363,
      "learning_rate": 4.813689766280319e-06,
      "loss": 0.0163,
      "step": 1594180
    },
    {
      "epoch": 2.608943265057638,
      "grad_norm": 0.5518929958343506,
      "learning_rate": 4.813623874066802e-06,
      "loss": 0.0109,
      "step": 1594200
    },
    {
      "epoch": 2.6089759954962917,
      "grad_norm": 1.071922779083252,
      "learning_rate": 4.813557981853285e-06,
      "loss": 0.0176,
      "step": 1594220
    },
    {
      "epoch": 2.609008725934945,
      "grad_norm": 0.3722895085811615,
      "learning_rate": 4.8134920896397675e-06,
      "loss": 0.0147,
      "step": 1594240
    },
    {
      "epoch": 2.6090414563735984,
      "grad_norm": 0.11525209993124008,
      "learning_rate": 4.81342619742625e-06,
      "loss": 0.0146,
      "step": 1594260
    },
    {
      "epoch": 2.6090741868122516,
      "grad_norm": 0.5604785084724426,
      "learning_rate": 4.813360305212734e-06,
      "loss": 0.023,
      "step": 1594280
    },
    {
      "epoch": 2.609106917250905,
      "grad_norm": 0.4081014394760132,
      "learning_rate": 4.813294412999217e-06,
      "loss": 0.0167,
      "step": 1594300
    },
    {
      "epoch": 2.6091396476895583,
      "grad_norm": 0.36886075139045715,
      "learning_rate": 4.813228520785699e-06,
      "loss": 0.0126,
      "step": 1594320
    },
    {
      "epoch": 2.6091723781282115,
      "grad_norm": 0.09239403903484344,
      "learning_rate": 4.813162628572182e-06,
      "loss": 0.0106,
      "step": 1594340
    },
    {
      "epoch": 2.609205108566865,
      "grad_norm": 0.3756316304206848,
      "learning_rate": 4.813096736358665e-06,
      "loss": 0.015,
      "step": 1594360
    },
    {
      "epoch": 2.609237839005518,
      "grad_norm": 0.7199539542198181,
      "learning_rate": 4.8130308441451476e-06,
      "loss": 0.0168,
      "step": 1594380
    },
    {
      "epoch": 2.609270569444172,
      "grad_norm": 0.3663894832134247,
      "learning_rate": 4.81296495193163e-06,
      "loss": 0.012,
      "step": 1594400
    },
    {
      "epoch": 2.609303299882825,
      "grad_norm": 0.31265899538993835,
      "learning_rate": 4.812899059718114e-06,
      "loss": 0.0145,
      "step": 1594420
    },
    {
      "epoch": 2.6093360303214785,
      "grad_norm": 0.2576444149017334,
      "learning_rate": 4.812833167504597e-06,
      "loss": 0.0163,
      "step": 1594440
    },
    {
      "epoch": 2.6093687607601317,
      "grad_norm": 0.42966988682746887,
      "learning_rate": 4.812767275291079e-06,
      "loss": 0.0127,
      "step": 1594460
    },
    {
      "epoch": 2.609401491198785,
      "grad_norm": 0.049631405621767044,
      "learning_rate": 4.812701383077562e-06,
      "loss": 0.0159,
      "step": 1594480
    },
    {
      "epoch": 2.6094342216374384,
      "grad_norm": 0.4375617206096649,
      "learning_rate": 4.812635490864045e-06,
      "loss": 0.0105,
      "step": 1594500
    },
    {
      "epoch": 2.6094669520760916,
      "grad_norm": 0.14354288578033447,
      "learning_rate": 4.812569598650528e-06,
      "loss": 0.0167,
      "step": 1594520
    },
    {
      "epoch": 2.609499682514745,
      "grad_norm": 0.3318917751312256,
      "learning_rate": 4.81250370643701e-06,
      "loss": 0.0166,
      "step": 1594540
    },
    {
      "epoch": 2.6095324129533983,
      "grad_norm": 0.2693934440612793,
      "learning_rate": 4.812437814223493e-06,
      "loss": 0.0144,
      "step": 1594560
    },
    {
      "epoch": 2.609565143392052,
      "grad_norm": 0.18250641226768494,
      "learning_rate": 4.812371922009977e-06,
      "loss": 0.0151,
      "step": 1594580
    },
    {
      "epoch": 2.609597873830705,
      "grad_norm": 0.24329623579978943,
      "learning_rate": 4.812306029796459e-06,
      "loss": 0.0145,
      "step": 1594600
    },
    {
      "epoch": 2.6096306042693582,
      "grad_norm": 0.2679627239704132,
      "learning_rate": 4.812240137582942e-06,
      "loss": 0.0142,
      "step": 1594620
    },
    {
      "epoch": 2.609663334708012,
      "grad_norm": 0.41492730379104614,
      "learning_rate": 4.812174245369426e-06,
      "loss": 0.0163,
      "step": 1594640
    },
    {
      "epoch": 2.609696065146665,
      "grad_norm": 0.29606810212135315,
      "learning_rate": 4.8121083531559085e-06,
      "loss": 0.0147,
      "step": 1594660
    },
    {
      "epoch": 2.6097287955853186,
      "grad_norm": 0.1024477630853653,
      "learning_rate": 4.812042460942391e-06,
      "loss": 0.0134,
      "step": 1594680
    },
    {
      "epoch": 2.6097615260239717,
      "grad_norm": 0.6020993590354919,
      "learning_rate": 4.811976568728874e-06,
      "loss": 0.0154,
      "step": 1594700
    },
    {
      "epoch": 2.6097942564626253,
      "grad_norm": 0.39925438165664673,
      "learning_rate": 4.811910676515357e-06,
      "loss": 0.0147,
      "step": 1594720
    },
    {
      "epoch": 2.6098269869012785,
      "grad_norm": 0.3014458417892456,
      "learning_rate": 4.811844784301839e-06,
      "loss": 0.0154,
      "step": 1594740
    },
    {
      "epoch": 2.6098597173399316,
      "grad_norm": 0.5522007346153259,
      "learning_rate": 4.811778892088322e-06,
      "loss": 0.0228,
      "step": 1594760
    },
    {
      "epoch": 2.609892447778585,
      "grad_norm": 0.3518826365470886,
      "learning_rate": 4.811712999874805e-06,
      "loss": 0.0105,
      "step": 1594780
    },
    {
      "epoch": 2.6099251782172384,
      "grad_norm": 0.44930416345596313,
      "learning_rate": 4.8116471076612885e-06,
      "loss": 0.0159,
      "step": 1594800
    },
    {
      "epoch": 2.609957908655892,
      "grad_norm": 0.21415840089321136,
      "learning_rate": 4.811581215447771e-06,
      "loss": 0.0107,
      "step": 1594820
    },
    {
      "epoch": 2.609990639094545,
      "grad_norm": 0.9274038672447205,
      "learning_rate": 4.811515323234254e-06,
      "loss": 0.0205,
      "step": 1594840
    },
    {
      "epoch": 2.6100233695331987,
      "grad_norm": 0.4192858934402466,
      "learning_rate": 4.811449431020737e-06,
      "loss": 0.0133,
      "step": 1594860
    },
    {
      "epoch": 2.610056099971852,
      "grad_norm": 0.1281508356332779,
      "learning_rate": 4.8113835388072194e-06,
      "loss": 0.0179,
      "step": 1594880
    },
    {
      "epoch": 2.610088830410505,
      "grad_norm": 0.15982110798358917,
      "learning_rate": 4.811317646593702e-06,
      "loss": 0.0132,
      "step": 1594900
    },
    {
      "epoch": 2.6101215608491586,
      "grad_norm": 0.2789498567581177,
      "learning_rate": 4.811251754380185e-06,
      "loss": 0.0162,
      "step": 1594920
    },
    {
      "epoch": 2.6101542912878117,
      "grad_norm": 0.2516302168369293,
      "learning_rate": 4.811185862166668e-06,
      "loss": 0.0163,
      "step": 1594940
    },
    {
      "epoch": 2.6101870217264653,
      "grad_norm": 0.3689258396625519,
      "learning_rate": 4.81111996995315e-06,
      "loss": 0.0165,
      "step": 1594960
    },
    {
      "epoch": 2.6102197521651185,
      "grad_norm": 0.18331082165241241,
      "learning_rate": 4.811054077739634e-06,
      "loss": 0.0169,
      "step": 1594980
    },
    {
      "epoch": 2.610252482603772,
      "grad_norm": 0.27727606892585754,
      "learning_rate": 4.810988185526117e-06,
      "loss": 0.0106,
      "step": 1595000
    },
    {
      "epoch": 2.6102852130424252,
      "grad_norm": 0.4316774904727936,
      "learning_rate": 4.8109222933125995e-06,
      "loss": 0.0145,
      "step": 1595020
    },
    {
      "epoch": 2.6103179434810784,
      "grad_norm": 0.22954176366329193,
      "learning_rate": 4.810856401099083e-06,
      "loss": 0.0132,
      "step": 1595040
    },
    {
      "epoch": 2.610350673919732,
      "grad_norm": 0.34344518184661865,
      "learning_rate": 4.810790508885566e-06,
      "loss": 0.0082,
      "step": 1595060
    },
    {
      "epoch": 2.610383404358385,
      "grad_norm": 0.386726438999176,
      "learning_rate": 4.8107246166720485e-06,
      "loss": 0.0144,
      "step": 1595080
    },
    {
      "epoch": 2.6104161347970387,
      "grad_norm": 0.2343619465827942,
      "learning_rate": 4.810658724458531e-06,
      "loss": 0.0137,
      "step": 1595100
    },
    {
      "epoch": 2.610448865235692,
      "grad_norm": 0.20881779491901398,
      "learning_rate": 4.810592832245014e-06,
      "loss": 0.0108,
      "step": 1595120
    },
    {
      "epoch": 2.6104815956743455,
      "grad_norm": 1.1416724920272827,
      "learning_rate": 4.810526940031497e-06,
      "loss": 0.0143,
      "step": 1595140
    },
    {
      "epoch": 2.6105143261129986,
      "grad_norm": 0.4523867070674896,
      "learning_rate": 4.8104610478179795e-06,
      "loss": 0.0099,
      "step": 1595160
    },
    {
      "epoch": 2.6105470565516518,
      "grad_norm": 0.5865762233734131,
      "learning_rate": 4.810395155604462e-06,
      "loss": 0.0134,
      "step": 1595180
    },
    {
      "epoch": 2.6105797869903054,
      "grad_norm": 0.47661444544792175,
      "learning_rate": 4.810329263390946e-06,
      "loss": 0.0145,
      "step": 1595200
    },
    {
      "epoch": 2.6106125174289585,
      "grad_norm": 0.5232226252555847,
      "learning_rate": 4.8102633711774286e-06,
      "loss": 0.0157,
      "step": 1595220
    },
    {
      "epoch": 2.6106452478676117,
      "grad_norm": 0.2129768282175064,
      "learning_rate": 4.810197478963911e-06,
      "loss": 0.0104,
      "step": 1595240
    },
    {
      "epoch": 2.6106779783062652,
      "grad_norm": 0.38674792647361755,
      "learning_rate": 4.810131586750394e-06,
      "loss": 0.0131,
      "step": 1595260
    },
    {
      "epoch": 2.610710708744919,
      "grad_norm": 0.13773265480995178,
      "learning_rate": 4.810065694536877e-06,
      "loss": 0.0121,
      "step": 1595280
    },
    {
      "epoch": 2.610743439183572,
      "grad_norm": 0.0884312242269516,
      "learning_rate": 4.8099998023233595e-06,
      "loss": 0.0124,
      "step": 1595300
    },
    {
      "epoch": 2.610776169622225,
      "grad_norm": 0.21410895884037018,
      "learning_rate": 4.809933910109842e-06,
      "loss": 0.012,
      "step": 1595320
    },
    {
      "epoch": 2.6108089000608787,
      "grad_norm": 1.1134799718856812,
      "learning_rate": 4.809868017896326e-06,
      "loss": 0.0133,
      "step": 1595340
    },
    {
      "epoch": 2.610841630499532,
      "grad_norm": 0.26955121755599976,
      "learning_rate": 4.809802125682809e-06,
      "loss": 0.0156,
      "step": 1595360
    },
    {
      "epoch": 2.610874360938185,
      "grad_norm": 0.8171480298042297,
      "learning_rate": 4.809736233469291e-06,
      "loss": 0.0149,
      "step": 1595380
    },
    {
      "epoch": 2.6109070913768386,
      "grad_norm": 0.2672390341758728,
      "learning_rate": 4.809670341255774e-06,
      "loss": 0.0141,
      "step": 1595400
    },
    {
      "epoch": 2.6109398218154922,
      "grad_norm": 0.3860681354999542,
      "learning_rate": 4.809604449042257e-06,
      "loss": 0.0166,
      "step": 1595420
    },
    {
      "epoch": 2.6109725522541454,
      "grad_norm": 0.19725508987903595,
      "learning_rate": 4.80953855682874e-06,
      "loss": 0.0143,
      "step": 1595440
    },
    {
      "epoch": 2.6110052826927985,
      "grad_norm": 0.2885937988758087,
      "learning_rate": 4.809472664615223e-06,
      "loss": 0.0143,
      "step": 1595460
    },
    {
      "epoch": 2.611038013131452,
      "grad_norm": 0.4889078736305237,
      "learning_rate": 4.809406772401706e-06,
      "loss": 0.0106,
      "step": 1595480
    },
    {
      "epoch": 2.6110707435701053,
      "grad_norm": 0.23880083858966827,
      "learning_rate": 4.809340880188189e-06,
      "loss": 0.0138,
      "step": 1595500
    },
    {
      "epoch": 2.6111034740087584,
      "grad_norm": 0.1515900045633316,
      "learning_rate": 4.809274987974671e-06,
      "loss": 0.0145,
      "step": 1595520
    },
    {
      "epoch": 2.611136204447412,
      "grad_norm": 0.09760867059230804,
      "learning_rate": 4.809209095761154e-06,
      "loss": 0.0147,
      "step": 1595540
    },
    {
      "epoch": 2.6111689348860656,
      "grad_norm": 0.4129124879837036,
      "learning_rate": 4.809143203547637e-06,
      "loss": 0.012,
      "step": 1595560
    },
    {
      "epoch": 2.6112016653247188,
      "grad_norm": 0.27492594718933105,
      "learning_rate": 4.8090773113341196e-06,
      "loss": 0.0121,
      "step": 1595580
    },
    {
      "epoch": 2.611234395763372,
      "grad_norm": 0.7430654168128967,
      "learning_rate": 4.809011419120603e-06,
      "loss": 0.0199,
      "step": 1595600
    },
    {
      "epoch": 2.6112671262020255,
      "grad_norm": 0.1821582019329071,
      "learning_rate": 4.808945526907086e-06,
      "loss": 0.0174,
      "step": 1595620
    },
    {
      "epoch": 2.6112998566406787,
      "grad_norm": 0.12048608809709549,
      "learning_rate": 4.808879634693569e-06,
      "loss": 0.0183,
      "step": 1595640
    },
    {
      "epoch": 2.611332587079332,
      "grad_norm": 0.477581650018692,
      "learning_rate": 4.808813742480051e-06,
      "loss": 0.0143,
      "step": 1595660
    },
    {
      "epoch": 2.6113653175179854,
      "grad_norm": 1.9759109020233154,
      "learning_rate": 4.808747850266534e-06,
      "loss": 0.0097,
      "step": 1595680
    },
    {
      "epoch": 2.611398047956639,
      "grad_norm": 0.468313068151474,
      "learning_rate": 4.808681958053018e-06,
      "loss": 0.0132,
      "step": 1595700
    },
    {
      "epoch": 2.611430778395292,
      "grad_norm": 0.8528236150741577,
      "learning_rate": 4.8086160658395004e-06,
      "loss": 0.0164,
      "step": 1595720
    },
    {
      "epoch": 2.6114635088339453,
      "grad_norm": 0.5101934671401978,
      "learning_rate": 4.808550173625983e-06,
      "loss": 0.0193,
      "step": 1595740
    },
    {
      "epoch": 2.611496239272599,
      "grad_norm": 0.4524352550506592,
      "learning_rate": 4.808484281412466e-06,
      "loss": 0.0107,
      "step": 1595760
    },
    {
      "epoch": 2.611528969711252,
      "grad_norm": 1.0678801536560059,
      "learning_rate": 4.808418389198949e-06,
      "loss": 0.0154,
      "step": 1595780
    },
    {
      "epoch": 2.611561700149905,
      "grad_norm": 0.2565678656101227,
      "learning_rate": 4.808352496985431e-06,
      "loss": 0.0171,
      "step": 1595800
    },
    {
      "epoch": 2.611594430588559,
      "grad_norm": 0.07194676995277405,
      "learning_rate": 4.808286604771914e-06,
      "loss": 0.0073,
      "step": 1595820
    },
    {
      "epoch": 2.611627161027212,
      "grad_norm": 1.024591088294983,
      "learning_rate": 4.808220712558398e-06,
      "loss": 0.0171,
      "step": 1595840
    },
    {
      "epoch": 2.6116598914658655,
      "grad_norm": 0.12116532772779465,
      "learning_rate": 4.8081548203448805e-06,
      "loss": 0.0177,
      "step": 1595860
    },
    {
      "epoch": 2.6116926219045187,
      "grad_norm": 0.29211169481277466,
      "learning_rate": 4.808088928131363e-06,
      "loss": 0.0135,
      "step": 1595880
    },
    {
      "epoch": 2.6117253523431723,
      "grad_norm": 0.24228738248348236,
      "learning_rate": 4.808023035917846e-06,
      "loss": 0.0143,
      "step": 1595900
    },
    {
      "epoch": 2.6117580827818254,
      "grad_norm": 1.4390512704849243,
      "learning_rate": 4.807957143704329e-06,
      "loss": 0.0139,
      "step": 1595920
    },
    {
      "epoch": 2.6117908132204786,
      "grad_norm": 0.6116085052490234,
      "learning_rate": 4.807891251490811e-06,
      "loss": 0.0179,
      "step": 1595940
    },
    {
      "epoch": 2.611823543659132,
      "grad_norm": 0.06067142263054848,
      "learning_rate": 4.807825359277294e-06,
      "loss": 0.0106,
      "step": 1595960
    },
    {
      "epoch": 2.6118562740977853,
      "grad_norm": 0.3279396891593933,
      "learning_rate": 4.807759467063777e-06,
      "loss": 0.0148,
      "step": 1595980
    },
    {
      "epoch": 2.611889004536439,
      "grad_norm": 0.4642479717731476,
      "learning_rate": 4.8076935748502605e-06,
      "loss": 0.0176,
      "step": 1596000
    },
    {
      "epoch": 2.611921734975092,
      "grad_norm": 0.30003979802131653,
      "learning_rate": 4.807627682636743e-06,
      "loss": 0.0202,
      "step": 1596020
    },
    {
      "epoch": 2.6119544654137457,
      "grad_norm": 0.07528312504291534,
      "learning_rate": 4.807561790423226e-06,
      "loss": 0.0104,
      "step": 1596040
    },
    {
      "epoch": 2.611987195852399,
      "grad_norm": 0.5977529287338257,
      "learning_rate": 4.8074958982097096e-06,
      "loss": 0.0165,
      "step": 1596060
    },
    {
      "epoch": 2.612019926291052,
      "grad_norm": 0.5143587589263916,
      "learning_rate": 4.807430005996192e-06,
      "loss": 0.0174,
      "step": 1596080
    },
    {
      "epoch": 2.6120526567297055,
      "grad_norm": 0.6141423583030701,
      "learning_rate": 4.807364113782675e-06,
      "loss": 0.0114,
      "step": 1596100
    },
    {
      "epoch": 2.6120853871683587,
      "grad_norm": 0.1567186564207077,
      "learning_rate": 4.807298221569158e-06,
      "loss": 0.0115,
      "step": 1596120
    },
    {
      "epoch": 2.6121181176070123,
      "grad_norm": 0.30039066076278687,
      "learning_rate": 4.8072323293556405e-06,
      "loss": 0.015,
      "step": 1596140
    },
    {
      "epoch": 2.6121508480456654,
      "grad_norm": 0.514598548412323,
      "learning_rate": 4.807166437142123e-06,
      "loss": 0.019,
      "step": 1596160
    },
    {
      "epoch": 2.612183578484319,
      "grad_norm": 0.2295224964618683,
      "learning_rate": 4.807100544928606e-06,
      "loss": 0.0097,
      "step": 1596180
    },
    {
      "epoch": 2.612216308922972,
      "grad_norm": 0.4927922785282135,
      "learning_rate": 4.807034652715089e-06,
      "loss": 0.0132,
      "step": 1596200
    },
    {
      "epoch": 2.6122490393616253,
      "grad_norm": 0.6607950329780579,
      "learning_rate": 4.806968760501572e-06,
      "loss": 0.014,
      "step": 1596220
    },
    {
      "epoch": 2.612281769800279,
      "grad_norm": 0.13601011037826538,
      "learning_rate": 4.806902868288055e-06,
      "loss": 0.0133,
      "step": 1596240
    },
    {
      "epoch": 2.612314500238932,
      "grad_norm": 0.6759145855903625,
      "learning_rate": 4.806836976074538e-06,
      "loss": 0.0134,
      "step": 1596260
    },
    {
      "epoch": 2.6123472306775857,
      "grad_norm": 2.1780617237091064,
      "learning_rate": 4.8067710838610205e-06,
      "loss": 0.0208,
      "step": 1596280
    },
    {
      "epoch": 2.612379961116239,
      "grad_norm": 0.16321614384651184,
      "learning_rate": 4.806705191647503e-06,
      "loss": 0.0144,
      "step": 1596300
    },
    {
      "epoch": 2.6124126915548924,
      "grad_norm": 0.8998557925224304,
      "learning_rate": 4.806639299433986e-06,
      "loss": 0.0112,
      "step": 1596320
    },
    {
      "epoch": 2.6124454219935456,
      "grad_norm": 0.2722504138946533,
      "learning_rate": 4.806573407220469e-06,
      "loss": 0.0095,
      "step": 1596340
    },
    {
      "epoch": 2.6124781524321987,
      "grad_norm": 1.2432684898376465,
      "learning_rate": 4.8065075150069515e-06,
      "loss": 0.0131,
      "step": 1596360
    },
    {
      "epoch": 2.6125108828708523,
      "grad_norm": 0.11426468938589096,
      "learning_rate": 4.806441622793434e-06,
      "loss": 0.0164,
      "step": 1596380
    },
    {
      "epoch": 2.6125436133095055,
      "grad_norm": 0.39082199335098267,
      "learning_rate": 4.806375730579918e-06,
      "loss": 0.0152,
      "step": 1596400
    },
    {
      "epoch": 2.612576343748159,
      "grad_norm": 0.641782820224762,
      "learning_rate": 4.8063098383664006e-06,
      "loss": 0.0131,
      "step": 1596420
    },
    {
      "epoch": 2.612609074186812,
      "grad_norm": 0.6948993802070618,
      "learning_rate": 4.806243946152883e-06,
      "loss": 0.0124,
      "step": 1596440
    },
    {
      "epoch": 2.612641804625466,
      "grad_norm": 0.13468945026397705,
      "learning_rate": 4.806178053939367e-06,
      "loss": 0.0131,
      "step": 1596460
    },
    {
      "epoch": 2.612674535064119,
      "grad_norm": 0.3089381456375122,
      "learning_rate": 4.80611216172585e-06,
      "loss": 0.0192,
      "step": 1596480
    },
    {
      "epoch": 2.612707265502772,
      "grad_norm": 0.11721208691596985,
      "learning_rate": 4.806046269512332e-06,
      "loss": 0.013,
      "step": 1596500
    },
    {
      "epoch": 2.6127399959414257,
      "grad_norm": 0.608792245388031,
      "learning_rate": 4.805980377298815e-06,
      "loss": 0.0145,
      "step": 1596520
    },
    {
      "epoch": 2.612772726380079,
      "grad_norm": 0.7181947231292725,
      "learning_rate": 4.805914485085298e-06,
      "loss": 0.0166,
      "step": 1596540
    },
    {
      "epoch": 2.6128054568187324,
      "grad_norm": 0.2834160625934601,
      "learning_rate": 4.805848592871781e-06,
      "loss": 0.0137,
      "step": 1596560
    },
    {
      "epoch": 2.6128381872573856,
      "grad_norm": 0.49474918842315674,
      "learning_rate": 4.805782700658263e-06,
      "loss": 0.0147,
      "step": 1596580
    },
    {
      "epoch": 2.612870917696039,
      "grad_norm": 0.18065249919891357,
      "learning_rate": 4.805716808444746e-06,
      "loss": 0.0114,
      "step": 1596600
    },
    {
      "epoch": 2.6129036481346923,
      "grad_norm": 0.23935575783252716,
      "learning_rate": 4.80565091623123e-06,
      "loss": 0.0108,
      "step": 1596620
    },
    {
      "epoch": 2.6129363785733455,
      "grad_norm": 0.7852180600166321,
      "learning_rate": 4.805585024017712e-06,
      "loss": 0.0161,
      "step": 1596640
    },
    {
      "epoch": 2.612969109011999,
      "grad_norm": 0.18324342370033264,
      "learning_rate": 4.805519131804195e-06,
      "loss": 0.0116,
      "step": 1596660
    },
    {
      "epoch": 2.6130018394506522,
      "grad_norm": 0.22723689675331116,
      "learning_rate": 4.805453239590678e-06,
      "loss": 0.0151,
      "step": 1596680
    },
    {
      "epoch": 2.6130345698893054,
      "grad_norm": 0.048857979476451874,
      "learning_rate": 4.805387347377161e-06,
      "loss": 0.0133,
      "step": 1596700
    },
    {
      "epoch": 2.613067300327959,
      "grad_norm": 0.5718292593955994,
      "learning_rate": 4.805321455163643e-06,
      "loss": 0.0097,
      "step": 1596720
    },
    {
      "epoch": 2.6131000307666126,
      "grad_norm": 1.121495246887207,
      "learning_rate": 4.805255562950127e-06,
      "loss": 0.014,
      "step": 1596740
    },
    {
      "epoch": 2.6131327612052657,
      "grad_norm": 0.2690925896167755,
      "learning_rate": 4.80518967073661e-06,
      "loss": 0.0159,
      "step": 1596760
    },
    {
      "epoch": 2.613165491643919,
      "grad_norm": 0.5852006673812866,
      "learning_rate": 4.805123778523092e-06,
      "loss": 0.0114,
      "step": 1596780
    },
    {
      "epoch": 2.6131982220825725,
      "grad_norm": 0.5283703804016113,
      "learning_rate": 4.805057886309575e-06,
      "loss": 0.0163,
      "step": 1596800
    },
    {
      "epoch": 2.6132309525212256,
      "grad_norm": 0.3260651230812073,
      "learning_rate": 4.804991994096058e-06,
      "loss": 0.0165,
      "step": 1596820
    },
    {
      "epoch": 2.6132636829598788,
      "grad_norm": 0.07498269528150558,
      "learning_rate": 4.804926101882541e-06,
      "loss": 0.013,
      "step": 1596840
    },
    {
      "epoch": 2.6132964133985324,
      "grad_norm": 0.1028810441493988,
      "learning_rate": 4.804860209669024e-06,
      "loss": 0.0199,
      "step": 1596860
    },
    {
      "epoch": 2.613329143837186,
      "grad_norm": 0.5636252760887146,
      "learning_rate": 4.804794317455507e-06,
      "loss": 0.0219,
      "step": 1596880
    },
    {
      "epoch": 2.613361874275839,
      "grad_norm": 0.5060307383537292,
      "learning_rate": 4.80472842524199e-06,
      "loss": 0.0194,
      "step": 1596900
    },
    {
      "epoch": 2.6133946047144923,
      "grad_norm": 0.2678530812263489,
      "learning_rate": 4.8046625330284724e-06,
      "loss": 0.0126,
      "step": 1596920
    },
    {
      "epoch": 2.613427335153146,
      "grad_norm": 0.24651610851287842,
      "learning_rate": 4.804596640814955e-06,
      "loss": 0.0128,
      "step": 1596940
    },
    {
      "epoch": 2.613460065591799,
      "grad_norm": 0.581626296043396,
      "learning_rate": 4.804530748601438e-06,
      "loss": 0.0097,
      "step": 1596960
    },
    {
      "epoch": 2.613492796030452,
      "grad_norm": 0.2799581289291382,
      "learning_rate": 4.804464856387921e-06,
      "loss": 0.022,
      "step": 1596980
    },
    {
      "epoch": 2.6135255264691057,
      "grad_norm": 0.297736793756485,
      "learning_rate": 4.804398964174403e-06,
      "loss": 0.0121,
      "step": 1597000
    },
    {
      "epoch": 2.6135582569077593,
      "grad_norm": 0.3439926207065582,
      "learning_rate": 4.804333071960887e-06,
      "loss": 0.0139,
      "step": 1597020
    },
    {
      "epoch": 2.6135909873464125,
      "grad_norm": 0.11233750730752945,
      "learning_rate": 4.80426717974737e-06,
      "loss": 0.0119,
      "step": 1597040
    },
    {
      "epoch": 2.6136237177850656,
      "grad_norm": 0.201880544424057,
      "learning_rate": 4.8042012875338525e-06,
      "loss": 0.0151,
      "step": 1597060
    },
    {
      "epoch": 2.6136564482237192,
      "grad_norm": 0.13128601014614105,
      "learning_rate": 4.804135395320335e-06,
      "loss": 0.0121,
      "step": 1597080
    },
    {
      "epoch": 2.6136891786623724,
      "grad_norm": 0.23015642166137695,
      "learning_rate": 4.804069503106819e-06,
      "loss": 0.0094,
      "step": 1597100
    },
    {
      "epoch": 2.6137219091010255,
      "grad_norm": 0.11406828463077545,
      "learning_rate": 4.8040036108933015e-06,
      "loss": 0.0162,
      "step": 1597120
    },
    {
      "epoch": 2.613754639539679,
      "grad_norm": 0.45013025403022766,
      "learning_rate": 4.803937718679784e-06,
      "loss": 0.0094,
      "step": 1597140
    },
    {
      "epoch": 2.6137873699783327,
      "grad_norm": 0.18679648637771606,
      "learning_rate": 4.803871826466267e-06,
      "loss": 0.017,
      "step": 1597160
    },
    {
      "epoch": 2.613820100416986,
      "grad_norm": 0.42835262417793274,
      "learning_rate": 4.80380593425275e-06,
      "loss": 0.0155,
      "step": 1597180
    },
    {
      "epoch": 2.613852830855639,
      "grad_norm": 0.26000770926475525,
      "learning_rate": 4.8037400420392325e-06,
      "loss": 0.0127,
      "step": 1597200
    },
    {
      "epoch": 2.6138855612942926,
      "grad_norm": 0.2271350473165512,
      "learning_rate": 4.803674149825715e-06,
      "loss": 0.0162,
      "step": 1597220
    },
    {
      "epoch": 2.6139182917329458,
      "grad_norm": 0.07120920717716217,
      "learning_rate": 4.803608257612199e-06,
      "loss": 0.0115,
      "step": 1597240
    },
    {
      "epoch": 2.613951022171599,
      "grad_norm": 0.47300195693969727,
      "learning_rate": 4.8035423653986816e-06,
      "loss": 0.013,
      "step": 1597260
    },
    {
      "epoch": 2.6139837526102525,
      "grad_norm": 0.14791037142276764,
      "learning_rate": 4.803476473185164e-06,
      "loss": 0.0159,
      "step": 1597280
    },
    {
      "epoch": 2.6140164830489057,
      "grad_norm": 0.34642595052719116,
      "learning_rate": 4.803410580971647e-06,
      "loss": 0.0146,
      "step": 1597300
    },
    {
      "epoch": 2.6140492134875593,
      "grad_norm": 0.3636074364185333,
      "learning_rate": 4.80334468875813e-06,
      "loss": 0.0204,
      "step": 1597320
    },
    {
      "epoch": 2.6140819439262124,
      "grad_norm": 1.460237741470337,
      "learning_rate": 4.8032787965446125e-06,
      "loss": 0.0141,
      "step": 1597340
    },
    {
      "epoch": 2.614114674364866,
      "grad_norm": 0.28517770767211914,
      "learning_rate": 4.803212904331095e-06,
      "loss": 0.0117,
      "step": 1597360
    },
    {
      "epoch": 2.614147404803519,
      "grad_norm": 0.3929721415042877,
      "learning_rate": 4.803147012117578e-06,
      "loss": 0.0102,
      "step": 1597380
    },
    {
      "epoch": 2.6141801352421723,
      "grad_norm": 0.6949822306632996,
      "learning_rate": 4.803081119904061e-06,
      "loss": 0.0171,
      "step": 1597400
    },
    {
      "epoch": 2.614212865680826,
      "grad_norm": 0.5489014983177185,
      "learning_rate": 4.803015227690544e-06,
      "loss": 0.0148,
      "step": 1597420
    },
    {
      "epoch": 2.614245596119479,
      "grad_norm": 0.14452232420444489,
      "learning_rate": 4.802949335477027e-06,
      "loss": 0.0103,
      "step": 1597440
    },
    {
      "epoch": 2.6142783265581326,
      "grad_norm": 0.08059600740671158,
      "learning_rate": 4.80288344326351e-06,
      "loss": 0.0138,
      "step": 1597460
    },
    {
      "epoch": 2.614311056996786,
      "grad_norm": 0.23336580395698547,
      "learning_rate": 4.802817551049993e-06,
      "loss": 0.0107,
      "step": 1597480
    },
    {
      "epoch": 2.6143437874354394,
      "grad_norm": 0.2804984748363495,
      "learning_rate": 4.802751658836476e-06,
      "loss": 0.0097,
      "step": 1597500
    },
    {
      "epoch": 2.6143765178740925,
      "grad_norm": 0.12404068559408188,
      "learning_rate": 4.802685766622959e-06,
      "loss": 0.0119,
      "step": 1597520
    },
    {
      "epoch": 2.6144092483127457,
      "grad_norm": 0.455217570066452,
      "learning_rate": 4.802619874409442e-06,
      "loss": 0.0178,
      "step": 1597540
    },
    {
      "epoch": 2.6144419787513993,
      "grad_norm": 1.338786244392395,
      "learning_rate": 4.802553982195924e-06,
      "loss": 0.0212,
      "step": 1597560
    },
    {
      "epoch": 2.6144747091900524,
      "grad_norm": 0.10420399159193039,
      "learning_rate": 4.802488089982407e-06,
      "loss": 0.0119,
      "step": 1597580
    },
    {
      "epoch": 2.614507439628706,
      "grad_norm": 0.5841161608695984,
      "learning_rate": 4.80242219776889e-06,
      "loss": 0.0186,
      "step": 1597600
    },
    {
      "epoch": 2.614540170067359,
      "grad_norm": 0.48185425996780396,
      "learning_rate": 4.8023563055553726e-06,
      "loss": 0.0109,
      "step": 1597620
    },
    {
      "epoch": 2.6145729005060128,
      "grad_norm": 0.49922826886177063,
      "learning_rate": 4.802290413341856e-06,
      "loss": 0.0173,
      "step": 1597640
    },
    {
      "epoch": 2.614605630944666,
      "grad_norm": 0.2676560580730438,
      "learning_rate": 4.802224521128339e-06,
      "loss": 0.0187,
      "step": 1597660
    },
    {
      "epoch": 2.614638361383319,
      "grad_norm": 0.29949551820755005,
      "learning_rate": 4.802158628914822e-06,
      "loss": 0.0135,
      "step": 1597680
    },
    {
      "epoch": 2.6146710918219727,
      "grad_norm": 0.27232450246810913,
      "learning_rate": 4.802092736701304e-06,
      "loss": 0.0153,
      "step": 1597700
    },
    {
      "epoch": 2.614703822260626,
      "grad_norm": 1.0110965967178345,
      "learning_rate": 4.802026844487787e-06,
      "loss": 0.0163,
      "step": 1597720
    },
    {
      "epoch": 2.6147365526992794,
      "grad_norm": 0.5230469107627869,
      "learning_rate": 4.80196095227427e-06,
      "loss": 0.0189,
      "step": 1597740
    },
    {
      "epoch": 2.6147692831379326,
      "grad_norm": 0.2521067261695862,
      "learning_rate": 4.801895060060753e-06,
      "loss": 0.0101,
      "step": 1597760
    },
    {
      "epoch": 2.614802013576586,
      "grad_norm": 0.4344203770160675,
      "learning_rate": 4.801829167847235e-06,
      "loss": 0.0143,
      "step": 1597780
    },
    {
      "epoch": 2.6148347440152393,
      "grad_norm": 0.12432326376438141,
      "learning_rate": 4.801763275633719e-06,
      "loss": 0.0129,
      "step": 1597800
    },
    {
      "epoch": 2.6148674744538924,
      "grad_norm": 0.7964603900909424,
      "learning_rate": 4.801697383420202e-06,
      "loss": 0.0198,
      "step": 1597820
    },
    {
      "epoch": 2.614900204892546,
      "grad_norm": 0.3626689910888672,
      "learning_rate": 4.801631491206684e-06,
      "loss": 0.0189,
      "step": 1597840
    },
    {
      "epoch": 2.614932935331199,
      "grad_norm": 0.20039600133895874,
      "learning_rate": 4.801565598993167e-06,
      "loss": 0.0086,
      "step": 1597860
    },
    {
      "epoch": 2.614965665769853,
      "grad_norm": 0.3858654797077179,
      "learning_rate": 4.801499706779651e-06,
      "loss": 0.0137,
      "step": 1597880
    },
    {
      "epoch": 2.614998396208506,
      "grad_norm": 0.13176922500133514,
      "learning_rate": 4.8014338145661335e-06,
      "loss": 0.0105,
      "step": 1597900
    },
    {
      "epoch": 2.6150311266471595,
      "grad_norm": 0.3225626051425934,
      "learning_rate": 4.801367922352616e-06,
      "loss": 0.0161,
      "step": 1597920
    },
    {
      "epoch": 2.6150638570858127,
      "grad_norm": 0.5508381128311157,
      "learning_rate": 4.801302030139099e-06,
      "loss": 0.0153,
      "step": 1597940
    },
    {
      "epoch": 2.615096587524466,
      "grad_norm": 1.1031461954116821,
      "learning_rate": 4.801236137925582e-06,
      "loss": 0.0134,
      "step": 1597960
    },
    {
      "epoch": 2.6151293179631194,
      "grad_norm": 0.4400388300418854,
      "learning_rate": 4.8011702457120644e-06,
      "loss": 0.0144,
      "step": 1597980
    },
    {
      "epoch": 2.6151620484017726,
      "grad_norm": 0.491519033908844,
      "learning_rate": 4.801104353498547e-06,
      "loss": 0.0182,
      "step": 1598000
    },
    {
      "epoch": 2.615194778840426,
      "grad_norm": 0.17233490943908691,
      "learning_rate": 4.80103846128503e-06,
      "loss": 0.0111,
      "step": 1598020
    },
    {
      "epoch": 2.6152275092790793,
      "grad_norm": 0.7622979879379272,
      "learning_rate": 4.8009725690715135e-06,
      "loss": 0.0163,
      "step": 1598040
    },
    {
      "epoch": 2.615260239717733,
      "grad_norm": 0.5391910076141357,
      "learning_rate": 4.800906676857996e-06,
      "loss": 0.0144,
      "step": 1598060
    },
    {
      "epoch": 2.615292970156386,
      "grad_norm": 0.23442628979682922,
      "learning_rate": 4.800840784644479e-06,
      "loss": 0.0151,
      "step": 1598080
    },
    {
      "epoch": 2.615325700595039,
      "grad_norm": 0.5996417999267578,
      "learning_rate": 4.800774892430962e-06,
      "loss": 0.0106,
      "step": 1598100
    },
    {
      "epoch": 2.615358431033693,
      "grad_norm": 0.5483563542366028,
      "learning_rate": 4.8007090002174445e-06,
      "loss": 0.0185,
      "step": 1598120
    },
    {
      "epoch": 2.615391161472346,
      "grad_norm": 0.32735106348991394,
      "learning_rate": 4.800643108003927e-06,
      "loss": 0.0161,
      "step": 1598140
    },
    {
      "epoch": 2.6154238919109996,
      "grad_norm": 0.3204652667045593,
      "learning_rate": 4.800577215790411e-06,
      "loss": 0.0149,
      "step": 1598160
    },
    {
      "epoch": 2.6154566223496527,
      "grad_norm": 0.24452945590019226,
      "learning_rate": 4.8005113235768935e-06,
      "loss": 0.0143,
      "step": 1598180
    },
    {
      "epoch": 2.6154893527883063,
      "grad_norm": 0.26910558342933655,
      "learning_rate": 4.800445431363376e-06,
      "loss": 0.0155,
      "step": 1598200
    },
    {
      "epoch": 2.6155220832269594,
      "grad_norm": 0.4682353436946869,
      "learning_rate": 4.800379539149859e-06,
      "loss": 0.0146,
      "step": 1598220
    },
    {
      "epoch": 2.6155548136656126,
      "grad_norm": 0.21327649056911469,
      "learning_rate": 4.800313646936342e-06,
      "loss": 0.0102,
      "step": 1598240
    },
    {
      "epoch": 2.615587544104266,
      "grad_norm": 0.04775681719183922,
      "learning_rate": 4.8002477547228245e-06,
      "loss": 0.0167,
      "step": 1598260
    },
    {
      "epoch": 2.6156202745429193,
      "grad_norm": 0.4784943163394928,
      "learning_rate": 4.800181862509308e-06,
      "loss": 0.0164,
      "step": 1598280
    },
    {
      "epoch": 2.6156530049815725,
      "grad_norm": 0.3653520345687866,
      "learning_rate": 4.800115970295791e-06,
      "loss": 0.0181,
      "step": 1598300
    },
    {
      "epoch": 2.615685735420226,
      "grad_norm": 0.45932725071907043,
      "learning_rate": 4.8000500780822735e-06,
      "loss": 0.0292,
      "step": 1598320
    },
    {
      "epoch": 2.6157184658588797,
      "grad_norm": 1.8667186498641968,
      "learning_rate": 4.799984185868756e-06,
      "loss": 0.0155,
      "step": 1598340
    },
    {
      "epoch": 2.615751196297533,
      "grad_norm": 0.33110523223876953,
      "learning_rate": 4.799918293655239e-06,
      "loss": 0.0151,
      "step": 1598360
    },
    {
      "epoch": 2.615783926736186,
      "grad_norm": 0.3632522225379944,
      "learning_rate": 4.799852401441722e-06,
      "loss": 0.0127,
      "step": 1598380
    },
    {
      "epoch": 2.6158166571748396,
      "grad_norm": 0.34093135595321655,
      "learning_rate": 4.7997865092282045e-06,
      "loss": 0.0111,
      "step": 1598400
    },
    {
      "epoch": 2.6158493876134927,
      "grad_norm": 0.2753429710865021,
      "learning_rate": 4.799720617014687e-06,
      "loss": 0.0107,
      "step": 1598420
    },
    {
      "epoch": 2.615882118052146,
      "grad_norm": 0.23206979036331177,
      "learning_rate": 4.799654724801171e-06,
      "loss": 0.0138,
      "step": 1598440
    },
    {
      "epoch": 2.6159148484907995,
      "grad_norm": 0.3100902736186981,
      "learning_rate": 4.7995888325876536e-06,
      "loss": 0.0091,
      "step": 1598460
    },
    {
      "epoch": 2.615947578929453,
      "grad_norm": 0.9847576022148132,
      "learning_rate": 4.799522940374136e-06,
      "loss": 0.0228,
      "step": 1598480
    },
    {
      "epoch": 2.615980309368106,
      "grad_norm": 0.19016027450561523,
      "learning_rate": 4.79945704816062e-06,
      "loss": 0.0154,
      "step": 1598500
    },
    {
      "epoch": 2.6160130398067594,
      "grad_norm": 0.46287426352500916,
      "learning_rate": 4.799391155947103e-06,
      "loss": 0.018,
      "step": 1598520
    },
    {
      "epoch": 2.616045770245413,
      "grad_norm": 0.36218196153640747,
      "learning_rate": 4.799325263733585e-06,
      "loss": 0.0135,
      "step": 1598540
    },
    {
      "epoch": 2.616078500684066,
      "grad_norm": 0.3854704797267914,
      "learning_rate": 4.799259371520068e-06,
      "loss": 0.0156,
      "step": 1598560
    },
    {
      "epoch": 2.6161112311227193,
      "grad_norm": 0.6914215683937073,
      "learning_rate": 4.799193479306551e-06,
      "loss": 0.0176,
      "step": 1598580
    },
    {
      "epoch": 2.616143961561373,
      "grad_norm": 0.14247651398181915,
      "learning_rate": 4.799127587093034e-06,
      "loss": 0.0122,
      "step": 1598600
    },
    {
      "epoch": 2.6161766920000264,
      "grad_norm": 0.26518791913986206,
      "learning_rate": 4.799061694879516e-06,
      "loss": 0.0096,
      "step": 1598620
    },
    {
      "epoch": 2.6162094224386796,
      "grad_norm": 0.20844511687755585,
      "learning_rate": 4.798995802665999e-06,
      "loss": 0.0162,
      "step": 1598640
    },
    {
      "epoch": 2.6162421528773327,
      "grad_norm": 0.16111767292022705,
      "learning_rate": 4.798929910452483e-06,
      "loss": 0.0148,
      "step": 1598660
    },
    {
      "epoch": 2.6162748833159863,
      "grad_norm": 0.2840893566608429,
      "learning_rate": 4.798864018238965e-06,
      "loss": 0.0187,
      "step": 1598680
    },
    {
      "epoch": 2.6163076137546395,
      "grad_norm": 0.1699630320072174,
      "learning_rate": 4.798798126025448e-06,
      "loss": 0.0158,
      "step": 1598700
    },
    {
      "epoch": 2.6163403441932926,
      "grad_norm": 0.34850651025772095,
      "learning_rate": 4.798732233811931e-06,
      "loss": 0.0113,
      "step": 1598720
    },
    {
      "epoch": 2.6163730746319462,
      "grad_norm": 0.5309023857116699,
      "learning_rate": 4.798666341598414e-06,
      "loss": 0.0137,
      "step": 1598740
    },
    {
      "epoch": 2.6164058050706,
      "grad_norm": 0.6814549565315247,
      "learning_rate": 4.798600449384896e-06,
      "loss": 0.0152,
      "step": 1598760
    },
    {
      "epoch": 2.616438535509253,
      "grad_norm": 0.5279472470283508,
      "learning_rate": 4.798534557171379e-06,
      "loss": 0.016,
      "step": 1598780
    },
    {
      "epoch": 2.616471265947906,
      "grad_norm": 0.3148579001426697,
      "learning_rate": 4.798468664957862e-06,
      "loss": 0.0111,
      "step": 1598800
    },
    {
      "epoch": 2.6165039963865597,
      "grad_norm": 0.07942607998847961,
      "learning_rate": 4.798402772744345e-06,
      "loss": 0.0125,
      "step": 1598820
    },
    {
      "epoch": 2.616536726825213,
      "grad_norm": 0.23436950147151947,
      "learning_rate": 4.798336880530828e-06,
      "loss": 0.019,
      "step": 1598840
    },
    {
      "epoch": 2.616569457263866,
      "grad_norm": 0.7598020434379578,
      "learning_rate": 4.798270988317311e-06,
      "loss": 0.0099,
      "step": 1598860
    },
    {
      "epoch": 2.6166021877025196,
      "grad_norm": 0.2636389136314392,
      "learning_rate": 4.798205096103794e-06,
      "loss": 0.0144,
      "step": 1598880
    },
    {
      "epoch": 2.6166349181411728,
      "grad_norm": 0.4464246928691864,
      "learning_rate": 4.798139203890277e-06,
      "loss": 0.0153,
      "step": 1598900
    },
    {
      "epoch": 2.6166676485798264,
      "grad_norm": 0.5237688422203064,
      "learning_rate": 4.79807331167676e-06,
      "loss": 0.0221,
      "step": 1598920
    },
    {
      "epoch": 2.6167003790184795,
      "grad_norm": 0.4215669631958008,
      "learning_rate": 4.798007419463243e-06,
      "loss": 0.0207,
      "step": 1598940
    },
    {
      "epoch": 2.616733109457133,
      "grad_norm": 0.28854042291641235,
      "learning_rate": 4.7979415272497255e-06,
      "loss": 0.0198,
      "step": 1598960
    },
    {
      "epoch": 2.6167658398957863,
      "grad_norm": 0.1380181908607483,
      "learning_rate": 4.797875635036208e-06,
      "loss": 0.01,
      "step": 1598980
    },
    {
      "epoch": 2.6167985703344394,
      "grad_norm": 0.20128367841243744,
      "learning_rate": 4.797809742822691e-06,
      "loss": 0.0177,
      "step": 1599000
    },
    {
      "epoch": 2.616831300773093,
      "grad_norm": 0.2552408277988434,
      "learning_rate": 4.797743850609174e-06,
      "loss": 0.0158,
      "step": 1599020
    },
    {
      "epoch": 2.616864031211746,
      "grad_norm": 0.060544323176145554,
      "learning_rate": 4.797677958395656e-06,
      "loss": 0.0093,
      "step": 1599040
    },
    {
      "epoch": 2.6168967616503997,
      "grad_norm": 1.1863962411880493,
      "learning_rate": 4.79761206618214e-06,
      "loss": 0.0182,
      "step": 1599060
    },
    {
      "epoch": 2.616929492089053,
      "grad_norm": 0.20695795118808746,
      "learning_rate": 4.797546173968623e-06,
      "loss": 0.0203,
      "step": 1599080
    },
    {
      "epoch": 2.6169622225277065,
      "grad_norm": 0.1711818277835846,
      "learning_rate": 4.7974802817551055e-06,
      "loss": 0.0133,
      "step": 1599100
    },
    {
      "epoch": 2.6169949529663596,
      "grad_norm": 0.2862071990966797,
      "learning_rate": 4.797414389541588e-06,
      "loss": 0.0108,
      "step": 1599120
    },
    {
      "epoch": 2.617027683405013,
      "grad_norm": 0.5285082459449768,
      "learning_rate": 4.797348497328071e-06,
      "loss": 0.0165,
      "step": 1599140
    },
    {
      "epoch": 2.6170604138436664,
      "grad_norm": 0.18666300177574158,
      "learning_rate": 4.797282605114554e-06,
      "loss": 0.014,
      "step": 1599160
    },
    {
      "epoch": 2.6170931442823195,
      "grad_norm": 0.2427346259355545,
      "learning_rate": 4.7972167129010364e-06,
      "loss": 0.0141,
      "step": 1599180
    },
    {
      "epoch": 2.617125874720973,
      "grad_norm": 0.2743598520755768,
      "learning_rate": 4.797150820687519e-06,
      "loss": 0.0151,
      "step": 1599200
    },
    {
      "epoch": 2.6171586051596263,
      "grad_norm": 0.750540554523468,
      "learning_rate": 4.797084928474003e-06,
      "loss": 0.0127,
      "step": 1599220
    },
    {
      "epoch": 2.61719133559828,
      "grad_norm": 0.7586962580680847,
      "learning_rate": 4.7970190362604855e-06,
      "loss": 0.0163,
      "step": 1599240
    },
    {
      "epoch": 2.617224066036933,
      "grad_norm": 0.2751752436161041,
      "learning_rate": 4.796953144046968e-06,
      "loss": 0.0144,
      "step": 1599260
    },
    {
      "epoch": 2.617256796475586,
      "grad_norm": 0.8862255215644836,
      "learning_rate": 4.796887251833451e-06,
      "loss": 0.016,
      "step": 1599280
    },
    {
      "epoch": 2.6172895269142398,
      "grad_norm": 0.09046083688735962,
      "learning_rate": 4.7968213596199346e-06,
      "loss": 0.0108,
      "step": 1599300
    },
    {
      "epoch": 2.617322257352893,
      "grad_norm": 0.27141812443733215,
      "learning_rate": 4.796755467406417e-06,
      "loss": 0.0122,
      "step": 1599320
    },
    {
      "epoch": 2.6173549877915465,
      "grad_norm": 0.1450914740562439,
      "learning_rate": 4.7966895751929e-06,
      "loss": 0.0159,
      "step": 1599340
    },
    {
      "epoch": 2.6173877182301997,
      "grad_norm": 0.8764574527740479,
      "learning_rate": 4.796623682979383e-06,
      "loss": 0.0198,
      "step": 1599360
    },
    {
      "epoch": 2.6174204486688533,
      "grad_norm": 0.06646434962749481,
      "learning_rate": 4.7965577907658655e-06,
      "loss": 0.0087,
      "step": 1599380
    },
    {
      "epoch": 2.6174531791075064,
      "grad_norm": 0.22386479377746582,
      "learning_rate": 4.796491898552348e-06,
      "loss": 0.0147,
      "step": 1599400
    },
    {
      "epoch": 2.6174859095461596,
      "grad_norm": 0.3917173147201538,
      "learning_rate": 4.796426006338831e-06,
      "loss": 0.0158,
      "step": 1599420
    },
    {
      "epoch": 2.617518639984813,
      "grad_norm": 0.7470444440841675,
      "learning_rate": 4.796360114125314e-06,
      "loss": 0.0159,
      "step": 1599440
    },
    {
      "epoch": 2.6175513704234663,
      "grad_norm": 0.44031158089637756,
      "learning_rate": 4.796294221911797e-06,
      "loss": 0.0129,
      "step": 1599460
    },
    {
      "epoch": 2.61758410086212,
      "grad_norm": 0.5741551518440247,
      "learning_rate": 4.79622832969828e-06,
      "loss": 0.0112,
      "step": 1599480
    },
    {
      "epoch": 2.617616831300773,
      "grad_norm": 1.0552817583084106,
      "learning_rate": 4.796162437484763e-06,
      "loss": 0.0129,
      "step": 1599500
    },
    {
      "epoch": 2.6176495617394266,
      "grad_norm": 0.18125171959400177,
      "learning_rate": 4.7960965452712456e-06,
      "loss": 0.0141,
      "step": 1599520
    },
    {
      "epoch": 2.61768229217808,
      "grad_norm": 0.2674051821231842,
      "learning_rate": 4.796030653057728e-06,
      "loss": 0.0083,
      "step": 1599540
    },
    {
      "epoch": 2.617715022616733,
      "grad_norm": 0.21172121167182922,
      "learning_rate": 4.795964760844212e-06,
      "loss": 0.0158,
      "step": 1599560
    },
    {
      "epoch": 2.6177477530553865,
      "grad_norm": 0.36442065238952637,
      "learning_rate": 4.795898868630695e-06,
      "loss": 0.0147,
      "step": 1599580
    },
    {
      "epoch": 2.6177804834940397,
      "grad_norm": 0.4704985022544861,
      "learning_rate": 4.795832976417177e-06,
      "loss": 0.0166,
      "step": 1599600
    },
    {
      "epoch": 2.6178132139326933,
      "grad_norm": 0.90425044298172,
      "learning_rate": 4.79576708420366e-06,
      "loss": 0.0135,
      "step": 1599620
    },
    {
      "epoch": 2.6178459443713464,
      "grad_norm": 0.30893197655677795,
      "learning_rate": 4.795701191990143e-06,
      "loss": 0.0157,
      "step": 1599640
    },
    {
      "epoch": 2.61787867481,
      "grad_norm": 0.38429024815559387,
      "learning_rate": 4.795635299776626e-06,
      "loss": 0.0175,
      "step": 1599660
    },
    {
      "epoch": 2.617911405248653,
      "grad_norm": 0.18546611070632935,
      "learning_rate": 4.795569407563108e-06,
      "loss": 0.0116,
      "step": 1599680
    },
    {
      "epoch": 2.6179441356873063,
      "grad_norm": 0.378325492143631,
      "learning_rate": 4.795503515349592e-06,
      "loss": 0.0164,
      "step": 1599700
    },
    {
      "epoch": 2.61797686612596,
      "grad_norm": 0.4279176592826843,
      "learning_rate": 4.795437623136075e-06,
      "loss": 0.0212,
      "step": 1599720
    },
    {
      "epoch": 2.618009596564613,
      "grad_norm": 0.5192771553993225,
      "learning_rate": 4.795371730922557e-06,
      "loss": 0.0228,
      "step": 1599740
    },
    {
      "epoch": 2.6180423270032662,
      "grad_norm": 0.29800575971603394,
      "learning_rate": 4.79530583870904e-06,
      "loss": 0.0176,
      "step": 1599760
    },
    {
      "epoch": 2.61807505744192,
      "grad_norm": 0.13039033114910126,
      "learning_rate": 4.795239946495523e-06,
      "loss": 0.0081,
      "step": 1599780
    },
    {
      "epoch": 2.6181077878805734,
      "grad_norm": 0.6442984342575073,
      "learning_rate": 4.795174054282006e-06,
      "loss": 0.0183,
      "step": 1599800
    },
    {
      "epoch": 2.6181405183192266,
      "grad_norm": 0.4078550636768341,
      "learning_rate": 4.795108162068488e-06,
      "loss": 0.0165,
      "step": 1599820
    },
    {
      "epoch": 2.6181732487578797,
      "grad_norm": 0.18880140781402588,
      "learning_rate": 4.795042269854971e-06,
      "loss": 0.0102,
      "step": 1599840
    },
    {
      "epoch": 2.6182059791965333,
      "grad_norm": 0.3560272753238678,
      "learning_rate": 4.794976377641455e-06,
      "loss": 0.0136,
      "step": 1599860
    },
    {
      "epoch": 2.6182387096351865,
      "grad_norm": 0.2007787525653839,
      "learning_rate": 4.794910485427937e-06,
      "loss": 0.0111,
      "step": 1599880
    },
    {
      "epoch": 2.6182714400738396,
      "grad_norm": 0.06508413702249527,
      "learning_rate": 4.79484459321442e-06,
      "loss": 0.0167,
      "step": 1599900
    },
    {
      "epoch": 2.618304170512493,
      "grad_norm": 0.11855296045541763,
      "learning_rate": 4.794778701000904e-06,
      "loss": 0.0144,
      "step": 1599920
    },
    {
      "epoch": 2.618336900951147,
      "grad_norm": 0.5877384543418884,
      "learning_rate": 4.7947128087873865e-06,
      "loss": 0.0182,
      "step": 1599940
    },
    {
      "epoch": 2.6183696313898,
      "grad_norm": 0.5743573904037476,
      "learning_rate": 4.794646916573869e-06,
      "loss": 0.0135,
      "step": 1599960
    },
    {
      "epoch": 2.618402361828453,
      "grad_norm": 0.29018205404281616,
      "learning_rate": 4.794581024360352e-06,
      "loss": 0.0222,
      "step": 1599980
    },
    {
      "epoch": 2.6184350922671067,
      "grad_norm": 1.404712438583374,
      "learning_rate": 4.794515132146835e-06,
      "loss": 0.0143,
      "step": 1600000
    },
    {
      "epoch": 2.6184350922671067,
      "eval_loss": 0.007881873287260532,
      "eval_runtime": 6505.1369,
      "eval_samples_per_second": 158.007,
      "eval_steps_per_second": 15.801,
      "eval_sts-dev_pearson_cosine": 0.9819023846642312,
      "eval_sts-dev_spearman_cosine": 0.8936090397090972,
      "step": 1600000
    },
    {
      "epoch": 2.61846782270576,
      "grad_norm": 0.09426992386579514,
      "learning_rate": 4.7944492399333174e-06,
      "loss": 0.0147,
      "step": 1600020
    },
    {
      "epoch": 2.618500553144413,
      "grad_norm": 0.26652973890304565,
      "learning_rate": 4.7943833477198e-06,
      "loss": 0.0146,
      "step": 1600040
    },
    {
      "epoch": 2.6185332835830666,
      "grad_norm": 0.3820095658302307,
      "learning_rate": 4.794317455506283e-06,
      "loss": 0.0194,
      "step": 1600060
    },
    {
      "epoch": 2.61856601402172,
      "grad_norm": 0.3127037286758423,
      "learning_rate": 4.7942515632927665e-06,
      "loss": 0.0147,
      "step": 1600080
    },
    {
      "epoch": 2.6185987444603733,
      "grad_norm": 0.4301440715789795,
      "learning_rate": 4.794185671079249e-06,
      "loss": 0.0201,
      "step": 1600100
    },
    {
      "epoch": 2.6186314748990265,
      "grad_norm": 0.6313170194625854,
      "learning_rate": 4.794119778865732e-06,
      "loss": 0.0188,
      "step": 1600120
    },
    {
      "epoch": 2.61866420533768,
      "grad_norm": 0.18735875189304352,
      "learning_rate": 4.794053886652215e-06,
      "loss": 0.0158,
      "step": 1600140
    },
    {
      "epoch": 2.6186969357763332,
      "grad_norm": 0.10915391147136688,
      "learning_rate": 4.7939879944386975e-06,
      "loss": 0.0136,
      "step": 1600160
    },
    {
      "epoch": 2.6187296662149864,
      "grad_norm": 0.32432180643081665,
      "learning_rate": 4.79392210222518e-06,
      "loss": 0.0132,
      "step": 1600180
    },
    {
      "epoch": 2.61876239665364,
      "grad_norm": 0.29256096482276917,
      "learning_rate": 4.793856210011663e-06,
      "loss": 0.0095,
      "step": 1600200
    },
    {
      "epoch": 2.6187951270922936,
      "grad_norm": 0.1093721017241478,
      "learning_rate": 4.793790317798146e-06,
      "loss": 0.0137,
      "step": 1600220
    },
    {
      "epoch": 2.6188278575309467,
      "grad_norm": 0.590771496295929,
      "learning_rate": 4.793724425584628e-06,
      "loss": 0.0126,
      "step": 1600240
    },
    {
      "epoch": 2.6188605879696,
      "grad_norm": 0.4148813486099243,
      "learning_rate": 4.793658533371112e-06,
      "loss": 0.01,
      "step": 1600260
    },
    {
      "epoch": 2.6188933184082535,
      "grad_norm": 0.5243923664093018,
      "learning_rate": 4.793592641157595e-06,
      "loss": 0.0091,
      "step": 1600280
    },
    {
      "epoch": 2.6189260488469066,
      "grad_norm": 0.2782098352909088,
      "learning_rate": 4.7935267489440775e-06,
      "loss": 0.0152,
      "step": 1600300
    },
    {
      "epoch": 2.6189587792855598,
      "grad_norm": 0.6146543622016907,
      "learning_rate": 4.793460856730561e-06,
      "loss": 0.019,
      "step": 1600320
    },
    {
      "epoch": 2.6189915097242134,
      "grad_norm": 0.09921132773160934,
      "learning_rate": 4.793394964517044e-06,
      "loss": 0.0149,
      "step": 1600340
    },
    {
      "epoch": 2.6190242401628665,
      "grad_norm": 0.4832195043563843,
      "learning_rate": 4.7933290723035266e-06,
      "loss": 0.0184,
      "step": 1600360
    },
    {
      "epoch": 2.61905697060152,
      "grad_norm": 0.16077366471290588,
      "learning_rate": 4.793263180090009e-06,
      "loss": 0.0169,
      "step": 1600380
    },
    {
      "epoch": 2.6190897010401732,
      "grad_norm": 0.24873727560043335,
      "learning_rate": 4.793197287876492e-06,
      "loss": 0.0137,
      "step": 1600400
    },
    {
      "epoch": 2.619122431478827,
      "grad_norm": 0.40462544560432434,
      "learning_rate": 4.793131395662975e-06,
      "loss": 0.0132,
      "step": 1600420
    },
    {
      "epoch": 2.61915516191748,
      "grad_norm": 0.37400832772254944,
      "learning_rate": 4.7930655034494575e-06,
      "loss": 0.0145,
      "step": 1600440
    },
    {
      "epoch": 2.619187892356133,
      "grad_norm": 0.5824259519577026,
      "learning_rate": 4.79299961123594e-06,
      "loss": 0.0138,
      "step": 1600460
    },
    {
      "epoch": 2.6192206227947867,
      "grad_norm": 0.2928561270236969,
      "learning_rate": 4.792933719022424e-06,
      "loss": 0.0154,
      "step": 1600480
    },
    {
      "epoch": 2.61925335323344,
      "grad_norm": 0.5056636929512024,
      "learning_rate": 4.7928678268089066e-06,
      "loss": 0.0199,
      "step": 1600500
    },
    {
      "epoch": 2.6192860836720935,
      "grad_norm": 0.35321515798568726,
      "learning_rate": 4.792801934595389e-06,
      "loss": 0.0173,
      "step": 1600520
    },
    {
      "epoch": 2.6193188141107466,
      "grad_norm": 0.2025851607322693,
      "learning_rate": 4.792736042381872e-06,
      "loss": 0.0139,
      "step": 1600540
    },
    {
      "epoch": 2.6193515445494002,
      "grad_norm": 0.22408118844032288,
      "learning_rate": 4.792670150168355e-06,
      "loss": 0.0128,
      "step": 1600560
    },
    {
      "epoch": 2.6193842749880534,
      "grad_norm": 0.4822286367416382,
      "learning_rate": 4.7926042579548375e-06,
      "loss": 0.011,
      "step": 1600580
    },
    {
      "epoch": 2.6194170054267065,
      "grad_norm": 0.20424054563045502,
      "learning_rate": 4.79253836574132e-06,
      "loss": 0.0083,
      "step": 1600600
    },
    {
      "epoch": 2.61944973586536,
      "grad_norm": 0.4933544397354126,
      "learning_rate": 4.792472473527804e-06,
      "loss": 0.0132,
      "step": 1600620
    },
    {
      "epoch": 2.6194824663040133,
      "grad_norm": 0.7636298537254333,
      "learning_rate": 4.792406581314287e-06,
      "loss": 0.01,
      "step": 1600640
    },
    {
      "epoch": 2.619515196742667,
      "grad_norm": 0.23162250220775604,
      "learning_rate": 4.792340689100769e-06,
      "loss": 0.0145,
      "step": 1600660
    },
    {
      "epoch": 2.61954792718132,
      "grad_norm": 0.15734218060970306,
      "learning_rate": 4.792274796887252e-06,
      "loss": 0.0171,
      "step": 1600680
    },
    {
      "epoch": 2.6195806576199736,
      "grad_norm": 0.3676929175853729,
      "learning_rate": 4.792208904673735e-06,
      "loss": 0.0144,
      "step": 1600700
    },
    {
      "epoch": 2.6196133880586268,
      "grad_norm": 0.16533292829990387,
      "learning_rate": 4.792143012460218e-06,
      "loss": 0.0099,
      "step": 1600720
    },
    {
      "epoch": 2.61964611849728,
      "grad_norm": 0.23379093408584595,
      "learning_rate": 4.792077120246701e-06,
      "loss": 0.0102,
      "step": 1600740
    },
    {
      "epoch": 2.6196788489359335,
      "grad_norm": 0.48819780349731445,
      "learning_rate": 4.792011228033184e-06,
      "loss": 0.0103,
      "step": 1600760
    },
    {
      "epoch": 2.6197115793745867,
      "grad_norm": 0.3099786639213562,
      "learning_rate": 4.791945335819667e-06,
      "loss": 0.0099,
      "step": 1600780
    },
    {
      "epoch": 2.6197443098132402,
      "grad_norm": 0.1475280076265335,
      "learning_rate": 4.791879443606149e-06,
      "loss": 0.0156,
      "step": 1600800
    },
    {
      "epoch": 2.6197770402518934,
      "grad_norm": 0.2552967667579651,
      "learning_rate": 4.791813551392632e-06,
      "loss": 0.0164,
      "step": 1600820
    },
    {
      "epoch": 2.619809770690547,
      "grad_norm": 0.4656486511230469,
      "learning_rate": 4.791747659179115e-06,
      "loss": 0.0126,
      "step": 1600840
    },
    {
      "epoch": 2.6198425011292,
      "grad_norm": 0.29601892828941345,
      "learning_rate": 4.791681766965598e-06,
      "loss": 0.0182,
      "step": 1600860
    },
    {
      "epoch": 2.6198752315678533,
      "grad_norm": 0.28353235125541687,
      "learning_rate": 4.791615874752081e-06,
      "loss": 0.0163,
      "step": 1600880
    },
    {
      "epoch": 2.619907962006507,
      "grad_norm": 0.5446204543113708,
      "learning_rate": 4.791549982538564e-06,
      "loss": 0.0121,
      "step": 1600900
    },
    {
      "epoch": 2.61994069244516,
      "grad_norm": 0.3968949615955353,
      "learning_rate": 4.791484090325047e-06,
      "loss": 0.0134,
      "step": 1600920
    },
    {
      "epoch": 2.6199734228838136,
      "grad_norm": 0.4301718771457672,
      "learning_rate": 4.791418198111529e-06,
      "loss": 0.0192,
      "step": 1600940
    },
    {
      "epoch": 2.620006153322467,
      "grad_norm": 1.3435959815979004,
      "learning_rate": 4.791352305898013e-06,
      "loss": 0.0115,
      "step": 1600960
    },
    {
      "epoch": 2.6200388837611204,
      "grad_norm": 0.4189411699771881,
      "learning_rate": 4.791286413684496e-06,
      "loss": 0.0221,
      "step": 1600980
    },
    {
      "epoch": 2.6200716141997735,
      "grad_norm": 0.11239209771156311,
      "learning_rate": 4.7912205214709785e-06,
      "loss": 0.0119,
      "step": 1601000
    },
    {
      "epoch": 2.6201043446384267,
      "grad_norm": 0.2157754749059677,
      "learning_rate": 4.791154629257461e-06,
      "loss": 0.0169,
      "step": 1601020
    },
    {
      "epoch": 2.6201370750770803,
      "grad_norm": 0.27804964780807495,
      "learning_rate": 4.791088737043944e-06,
      "loss": 0.0125,
      "step": 1601040
    },
    {
      "epoch": 2.6201698055157334,
      "grad_norm": 0.40582552552223206,
      "learning_rate": 4.791022844830427e-06,
      "loss": 0.0114,
      "step": 1601060
    },
    {
      "epoch": 2.620202535954387,
      "grad_norm": 0.4673082232475281,
      "learning_rate": 4.790956952616909e-06,
      "loss": 0.0142,
      "step": 1601080
    },
    {
      "epoch": 2.62023526639304,
      "grad_norm": 0.7111815810203552,
      "learning_rate": 4.790891060403392e-06,
      "loss": 0.0116,
      "step": 1601100
    },
    {
      "epoch": 2.6202679968316938,
      "grad_norm": 0.48557814955711365,
      "learning_rate": 4.790825168189876e-06,
      "loss": 0.0165,
      "step": 1601120
    },
    {
      "epoch": 2.620300727270347,
      "grad_norm": 0.28955045342445374,
      "learning_rate": 4.7907592759763585e-06,
      "loss": 0.0132,
      "step": 1601140
    },
    {
      "epoch": 2.620333457709,
      "grad_norm": 0.08278849720954895,
      "learning_rate": 4.790693383762841e-06,
      "loss": 0.0247,
      "step": 1601160
    },
    {
      "epoch": 2.6203661881476537,
      "grad_norm": 0.23582983016967773,
      "learning_rate": 4.790627491549324e-06,
      "loss": 0.0135,
      "step": 1601180
    },
    {
      "epoch": 2.620398918586307,
      "grad_norm": 0.3425624668598175,
      "learning_rate": 4.790561599335807e-06,
      "loss": 0.0077,
      "step": 1601200
    },
    {
      "epoch": 2.6204316490249604,
      "grad_norm": 0.32077106833457947,
      "learning_rate": 4.7904957071222894e-06,
      "loss": 0.0145,
      "step": 1601220
    },
    {
      "epoch": 2.6204643794636135,
      "grad_norm": 0.39753758907318115,
      "learning_rate": 4.790429814908772e-06,
      "loss": 0.0129,
      "step": 1601240
    },
    {
      "epoch": 2.620497109902267,
      "grad_norm": 0.5568125247955322,
      "learning_rate": 4.790363922695255e-06,
      "loss": 0.0142,
      "step": 1601260
    },
    {
      "epoch": 2.6205298403409203,
      "grad_norm": 0.3058100938796997,
      "learning_rate": 4.7902980304817385e-06,
      "loss": 0.0124,
      "step": 1601280
    },
    {
      "epoch": 2.6205625707795734,
      "grad_norm": 0.1499568670988083,
      "learning_rate": 4.790232138268221e-06,
      "loss": 0.0102,
      "step": 1601300
    },
    {
      "epoch": 2.620595301218227,
      "grad_norm": 0.3999588191509247,
      "learning_rate": 4.790166246054704e-06,
      "loss": 0.0144,
      "step": 1601320
    },
    {
      "epoch": 2.62062803165688,
      "grad_norm": 0.1268794685602188,
      "learning_rate": 4.7901003538411876e-06,
      "loss": 0.0099,
      "step": 1601340
    },
    {
      "epoch": 2.6206607620955333,
      "grad_norm": 0.2508406639099121,
      "learning_rate": 4.79003446162767e-06,
      "loss": 0.0103,
      "step": 1601360
    },
    {
      "epoch": 2.620693492534187,
      "grad_norm": 0.2556911110877991,
      "learning_rate": 4.789968569414153e-06,
      "loss": 0.0143,
      "step": 1601380
    },
    {
      "epoch": 2.6207262229728405,
      "grad_norm": 0.1712089329957962,
      "learning_rate": 4.789902677200636e-06,
      "loss": 0.0129,
      "step": 1601400
    },
    {
      "epoch": 2.6207589534114937,
      "grad_norm": 0.33564862608909607,
      "learning_rate": 4.7898367849871185e-06,
      "loss": 0.0142,
      "step": 1601420
    },
    {
      "epoch": 2.620791683850147,
      "grad_norm": 0.31712260842323303,
      "learning_rate": 4.789770892773601e-06,
      "loss": 0.0152,
      "step": 1601440
    },
    {
      "epoch": 2.6208244142888004,
      "grad_norm": 0.35444629192352295,
      "learning_rate": 4.789705000560084e-06,
      "loss": 0.0125,
      "step": 1601460
    },
    {
      "epoch": 2.6208571447274536,
      "grad_norm": 0.32878679037094116,
      "learning_rate": 4.789639108346567e-06,
      "loss": 0.0118,
      "step": 1601480
    },
    {
      "epoch": 2.6208898751661067,
      "grad_norm": 0.4298938810825348,
      "learning_rate": 4.78957321613305e-06,
      "loss": 0.0162,
      "step": 1601500
    },
    {
      "epoch": 2.6209226056047603,
      "grad_norm": 0.405479371547699,
      "learning_rate": 4.789507323919533e-06,
      "loss": 0.0126,
      "step": 1601520
    },
    {
      "epoch": 2.620955336043414,
      "grad_norm": 0.1911131590604782,
      "learning_rate": 4.789441431706016e-06,
      "loss": 0.0131,
      "step": 1601540
    },
    {
      "epoch": 2.620988066482067,
      "grad_norm": 1.5107393264770508,
      "learning_rate": 4.7893755394924986e-06,
      "loss": 0.0167,
      "step": 1601560
    },
    {
      "epoch": 2.62102079692072,
      "grad_norm": 0.160027876496315,
      "learning_rate": 4.789309647278981e-06,
      "loss": 0.0174,
      "step": 1601580
    },
    {
      "epoch": 2.621053527359374,
      "grad_norm": 0.24238960444927216,
      "learning_rate": 4.789243755065464e-06,
      "loss": 0.0102,
      "step": 1601600
    },
    {
      "epoch": 2.621086257798027,
      "grad_norm": 0.8925458788871765,
      "learning_rate": 4.789177862851947e-06,
      "loss": 0.016,
      "step": 1601620
    },
    {
      "epoch": 2.62111898823668,
      "grad_norm": 0.7193922996520996,
      "learning_rate": 4.7891119706384295e-06,
      "loss": 0.0155,
      "step": 1601640
    },
    {
      "epoch": 2.6211517186753337,
      "grad_norm": 0.13123126327991486,
      "learning_rate": 4.789046078424912e-06,
      "loss": 0.0212,
      "step": 1601660
    },
    {
      "epoch": 2.6211844491139873,
      "grad_norm": 0.25211596488952637,
      "learning_rate": 4.788980186211396e-06,
      "loss": 0.0144,
      "step": 1601680
    },
    {
      "epoch": 2.6212171795526404,
      "grad_norm": 0.41520896553993225,
      "learning_rate": 4.788914293997879e-06,
      "loss": 0.0129,
      "step": 1601700
    },
    {
      "epoch": 2.6212499099912936,
      "grad_norm": 0.8864738941192627,
      "learning_rate": 4.788848401784361e-06,
      "loss": 0.0129,
      "step": 1601720
    },
    {
      "epoch": 2.621282640429947,
      "grad_norm": 0.27940869331359863,
      "learning_rate": 4.788782509570845e-06,
      "loss": 0.0183,
      "step": 1601740
    },
    {
      "epoch": 2.6213153708686003,
      "grad_norm": 0.29974108934402466,
      "learning_rate": 4.788716617357328e-06,
      "loss": 0.0103,
      "step": 1601760
    },
    {
      "epoch": 2.6213481013072535,
      "grad_norm": 0.1954575479030609,
      "learning_rate": 4.78865072514381e-06,
      "loss": 0.0187,
      "step": 1601780
    },
    {
      "epoch": 2.621380831745907,
      "grad_norm": 0.2334648072719574,
      "learning_rate": 4.788584832930293e-06,
      "loss": 0.01,
      "step": 1601800
    },
    {
      "epoch": 2.6214135621845607,
      "grad_norm": 0.06951551884412766,
      "learning_rate": 4.788518940716776e-06,
      "loss": 0.0158,
      "step": 1601820
    },
    {
      "epoch": 2.621446292623214,
      "grad_norm": 0.3220919370651245,
      "learning_rate": 4.788453048503259e-06,
      "loss": 0.0155,
      "step": 1601840
    },
    {
      "epoch": 2.621479023061867,
      "grad_norm": 0.8074731826782227,
      "learning_rate": 4.788387156289741e-06,
      "loss": 0.0133,
      "step": 1601860
    },
    {
      "epoch": 2.6215117535005206,
      "grad_norm": 0.3883003294467926,
      "learning_rate": 4.788321264076224e-06,
      "loss": 0.0166,
      "step": 1601880
    },
    {
      "epoch": 2.6215444839391737,
      "grad_norm": 0.303181529045105,
      "learning_rate": 4.788255371862708e-06,
      "loss": 0.0118,
      "step": 1601900
    },
    {
      "epoch": 2.621577214377827,
      "grad_norm": 1.0144890546798706,
      "learning_rate": 4.78818947964919e-06,
      "loss": 0.0262,
      "step": 1601920
    },
    {
      "epoch": 2.6216099448164805,
      "grad_norm": 0.22125984728336334,
      "learning_rate": 4.788123587435673e-06,
      "loss": 0.0179,
      "step": 1601940
    },
    {
      "epoch": 2.6216426752551336,
      "grad_norm": 0.21948935091495514,
      "learning_rate": 4.788057695222156e-06,
      "loss": 0.0149,
      "step": 1601960
    },
    {
      "epoch": 2.621675405693787,
      "grad_norm": 0.5972341895103455,
      "learning_rate": 4.787991803008639e-06,
      "loss": 0.0179,
      "step": 1601980
    },
    {
      "epoch": 2.6217081361324404,
      "grad_norm": 0.06598787754774094,
      "learning_rate": 4.787925910795121e-06,
      "loss": 0.0174,
      "step": 1602000
    },
    {
      "epoch": 2.621740866571094,
      "grad_norm": 0.2698201537132263,
      "learning_rate": 4.787860018581605e-06,
      "loss": 0.0145,
      "step": 1602020
    },
    {
      "epoch": 2.621773597009747,
      "grad_norm": 0.44103264808654785,
      "learning_rate": 4.787794126368088e-06,
      "loss": 0.0145,
      "step": 1602040
    },
    {
      "epoch": 2.6218063274484003,
      "grad_norm": 0.15502150356769562,
      "learning_rate": 4.7877282341545704e-06,
      "loss": 0.0179,
      "step": 1602060
    },
    {
      "epoch": 2.621839057887054,
      "grad_norm": 0.3473016619682312,
      "learning_rate": 4.787662341941053e-06,
      "loss": 0.0125,
      "step": 1602080
    },
    {
      "epoch": 2.621871788325707,
      "grad_norm": 0.5018883347511292,
      "learning_rate": 4.787596449727536e-06,
      "loss": 0.0119,
      "step": 1602100
    },
    {
      "epoch": 2.6219045187643606,
      "grad_norm": 0.21470710635185242,
      "learning_rate": 4.787530557514019e-06,
      "loss": 0.0114,
      "step": 1602120
    },
    {
      "epoch": 2.6219372492030137,
      "grad_norm": 0.4307849705219269,
      "learning_rate": 4.787464665300502e-06,
      "loss": 0.0159,
      "step": 1602140
    },
    {
      "epoch": 2.6219699796416673,
      "grad_norm": 0.652087926864624,
      "learning_rate": 4.787398773086985e-06,
      "loss": 0.0104,
      "step": 1602160
    },
    {
      "epoch": 2.6220027100803205,
      "grad_norm": 0.3654009699821472,
      "learning_rate": 4.787332880873468e-06,
      "loss": 0.0146,
      "step": 1602180
    },
    {
      "epoch": 2.6220354405189736,
      "grad_norm": 0.30893802642822266,
      "learning_rate": 4.7872669886599505e-06,
      "loss": 0.0207,
      "step": 1602200
    },
    {
      "epoch": 2.6220681709576272,
      "grad_norm": 0.16274353861808777,
      "learning_rate": 4.787201096446433e-06,
      "loss": 0.015,
      "step": 1602220
    },
    {
      "epoch": 2.6221009013962804,
      "grad_norm": 2.0371804237365723,
      "learning_rate": 4.787135204232916e-06,
      "loss": 0.0201,
      "step": 1602240
    },
    {
      "epoch": 2.622133631834934,
      "grad_norm": 2.3855397701263428,
      "learning_rate": 4.787069312019399e-06,
      "loss": 0.0147,
      "step": 1602260
    },
    {
      "epoch": 2.622166362273587,
      "grad_norm": 0.26000624895095825,
      "learning_rate": 4.7870034198058814e-06,
      "loss": 0.0135,
      "step": 1602280
    },
    {
      "epoch": 2.6221990927122407,
      "grad_norm": 0.24925597012043,
      "learning_rate": 4.786937527592365e-06,
      "loss": 0.01,
      "step": 1602300
    },
    {
      "epoch": 2.622231823150894,
      "grad_norm": 0.44149139523506165,
      "learning_rate": 4.786871635378848e-06,
      "loss": 0.016,
      "step": 1602320
    },
    {
      "epoch": 2.622264553589547,
      "grad_norm": 0.47584471106529236,
      "learning_rate": 4.7868057431653305e-06,
      "loss": 0.0187,
      "step": 1602340
    },
    {
      "epoch": 2.6222972840282006,
      "grad_norm": 0.11858458071947098,
      "learning_rate": 4.786739850951813e-06,
      "loss": 0.0156,
      "step": 1602360
    },
    {
      "epoch": 2.6223300144668538,
      "grad_norm": 0.2879618704319,
      "learning_rate": 4.786673958738297e-06,
      "loss": 0.0215,
      "step": 1602380
    },
    {
      "epoch": 2.6223627449055074,
      "grad_norm": 0.18144714832305908,
      "learning_rate": 4.7866080665247796e-06,
      "loss": 0.0155,
      "step": 1602400
    },
    {
      "epoch": 2.6223954753441605,
      "grad_norm": 0.11787264049053192,
      "learning_rate": 4.786542174311262e-06,
      "loss": 0.0116,
      "step": 1602420
    },
    {
      "epoch": 2.622428205782814,
      "grad_norm": 0.20415793359279633,
      "learning_rate": 4.786476282097745e-06,
      "loss": 0.018,
      "step": 1602440
    },
    {
      "epoch": 2.6224609362214673,
      "grad_norm": 1.0440140962600708,
      "learning_rate": 4.786410389884228e-06,
      "loss": 0.0176,
      "step": 1602460
    },
    {
      "epoch": 2.6224936666601204,
      "grad_norm": 0.29797133803367615,
      "learning_rate": 4.7863444976707105e-06,
      "loss": 0.0235,
      "step": 1602480
    },
    {
      "epoch": 2.622526397098774,
      "grad_norm": 0.36995676159858704,
      "learning_rate": 4.786278605457193e-06,
      "loss": 0.0125,
      "step": 1602500
    },
    {
      "epoch": 2.622559127537427,
      "grad_norm": 0.5647470355033875,
      "learning_rate": 4.786212713243677e-06,
      "loss": 0.0132,
      "step": 1602520
    },
    {
      "epoch": 2.6225918579760807,
      "grad_norm": 0.32594677805900574,
      "learning_rate": 4.78614682103016e-06,
      "loss": 0.0083,
      "step": 1602540
    },
    {
      "epoch": 2.622624588414734,
      "grad_norm": 0.16723142564296722,
      "learning_rate": 4.786080928816642e-06,
      "loss": 0.0128,
      "step": 1602560
    },
    {
      "epoch": 2.6226573188533875,
      "grad_norm": 0.14643093943595886,
      "learning_rate": 4.786015036603125e-06,
      "loss": 0.024,
      "step": 1602580
    },
    {
      "epoch": 2.6226900492920406,
      "grad_norm": 0.09733463078737259,
      "learning_rate": 4.785949144389608e-06,
      "loss": 0.0143,
      "step": 1602600
    },
    {
      "epoch": 2.622722779730694,
      "grad_norm": 0.31664931774139404,
      "learning_rate": 4.7858832521760905e-06,
      "loss": 0.0179,
      "step": 1602620
    },
    {
      "epoch": 2.6227555101693474,
      "grad_norm": 0.15715283155441284,
      "learning_rate": 4.785817359962573e-06,
      "loss": 0.0184,
      "step": 1602640
    },
    {
      "epoch": 2.6227882406080005,
      "grad_norm": 0.35698774456977844,
      "learning_rate": 4.785751467749056e-06,
      "loss": 0.0119,
      "step": 1602660
    },
    {
      "epoch": 2.622820971046654,
      "grad_norm": 0.09346487373113632,
      "learning_rate": 4.785685575535539e-06,
      "loss": 0.0225,
      "step": 1602680
    },
    {
      "epoch": 2.6228537014853073,
      "grad_norm": 0.29157373309135437,
      "learning_rate": 4.785619683322022e-06,
      "loss": 0.0122,
      "step": 1602700
    },
    {
      "epoch": 2.622886431923961,
      "grad_norm": 0.21387340128421783,
      "learning_rate": 4.785553791108505e-06,
      "loss": 0.0081,
      "step": 1602720
    },
    {
      "epoch": 2.622919162362614,
      "grad_norm": 0.6080538630485535,
      "learning_rate": 4.785487898894988e-06,
      "loss": 0.0118,
      "step": 1602740
    },
    {
      "epoch": 2.622951892801267,
      "grad_norm": 1.044311285018921,
      "learning_rate": 4.785422006681471e-06,
      "loss": 0.0135,
      "step": 1602760
    },
    {
      "epoch": 2.6229846232399208,
      "grad_norm": 0.13830173015594482,
      "learning_rate": 4.785356114467954e-06,
      "loss": 0.0121,
      "step": 1602780
    },
    {
      "epoch": 2.623017353678574,
      "grad_norm": 0.14856109023094177,
      "learning_rate": 4.785290222254437e-06,
      "loss": 0.0177,
      "step": 1602800
    },
    {
      "epoch": 2.623050084117227,
      "grad_norm": 0.1589973121881485,
      "learning_rate": 4.78522433004092e-06,
      "loss": 0.0182,
      "step": 1602820
    },
    {
      "epoch": 2.6230828145558807,
      "grad_norm": 0.2280290275812149,
      "learning_rate": 4.785158437827402e-06,
      "loss": 0.0095,
      "step": 1602840
    },
    {
      "epoch": 2.6231155449945343,
      "grad_norm": 0.33875176310539246,
      "learning_rate": 4.785092545613885e-06,
      "loss": 0.0155,
      "step": 1602860
    },
    {
      "epoch": 2.6231482754331874,
      "grad_norm": 0.20423120260238647,
      "learning_rate": 4.785026653400368e-06,
      "loss": 0.0134,
      "step": 1602880
    },
    {
      "epoch": 2.6231810058718406,
      "grad_norm": 0.35076776146888733,
      "learning_rate": 4.784960761186851e-06,
      "loss": 0.017,
      "step": 1602900
    },
    {
      "epoch": 2.623213736310494,
      "grad_norm": 0.04010695591568947,
      "learning_rate": 4.784894868973334e-06,
      "loss": 0.0145,
      "step": 1602920
    },
    {
      "epoch": 2.6232464667491473,
      "grad_norm": 0.21399293839931488,
      "learning_rate": 4.784828976759817e-06,
      "loss": 0.012,
      "step": 1602940
    },
    {
      "epoch": 2.6232791971878004,
      "grad_norm": 0.42916321754455566,
      "learning_rate": 4.7847630845463e-06,
      "loss": 0.0091,
      "step": 1602960
    },
    {
      "epoch": 2.623311927626454,
      "grad_norm": 0.4798934757709503,
      "learning_rate": 4.784697192332782e-06,
      "loss": 0.0126,
      "step": 1602980
    },
    {
      "epoch": 2.6233446580651076,
      "grad_norm": 0.18288098275661469,
      "learning_rate": 4.784631300119265e-06,
      "loss": 0.0143,
      "step": 1603000
    },
    {
      "epoch": 2.623377388503761,
      "grad_norm": 0.39025557041168213,
      "learning_rate": 4.784565407905748e-06,
      "loss": 0.0131,
      "step": 1603020
    },
    {
      "epoch": 2.623410118942414,
      "grad_norm": 0.97993004322052,
      "learning_rate": 4.784499515692231e-06,
      "loss": 0.014,
      "step": 1603040
    },
    {
      "epoch": 2.6234428493810675,
      "grad_norm": 0.7874652147293091,
      "learning_rate": 4.784433623478713e-06,
      "loss": 0.0116,
      "step": 1603060
    },
    {
      "epoch": 2.6234755798197207,
      "grad_norm": 0.1960110068321228,
      "learning_rate": 4.784367731265197e-06,
      "loss": 0.0117,
      "step": 1603080
    },
    {
      "epoch": 2.623508310258374,
      "grad_norm": 0.3063550591468811,
      "learning_rate": 4.78430183905168e-06,
      "loss": 0.0139,
      "step": 1603100
    },
    {
      "epoch": 2.6235410406970274,
      "grad_norm": 0.14010697603225708,
      "learning_rate": 4.7842359468381624e-06,
      "loss": 0.0118,
      "step": 1603120
    },
    {
      "epoch": 2.623573771135681,
      "grad_norm": 0.9766452312469482,
      "learning_rate": 4.784170054624645e-06,
      "loss": 0.0089,
      "step": 1603140
    },
    {
      "epoch": 2.623606501574334,
      "grad_norm": 0.2206353098154068,
      "learning_rate": 4.784104162411129e-06,
      "loss": 0.0148,
      "step": 1603160
    },
    {
      "epoch": 2.6236392320129873,
      "grad_norm": 0.3449782133102417,
      "learning_rate": 4.7840382701976115e-06,
      "loss": 0.0147,
      "step": 1603180
    },
    {
      "epoch": 2.623671962451641,
      "grad_norm": 0.4016965329647064,
      "learning_rate": 4.783972377984094e-06,
      "loss": 0.0127,
      "step": 1603200
    },
    {
      "epoch": 2.623704692890294,
      "grad_norm": 0.2267596572637558,
      "learning_rate": 4.783906485770577e-06,
      "loss": 0.0122,
      "step": 1603220
    },
    {
      "epoch": 2.623737423328947,
      "grad_norm": 0.14057576656341553,
      "learning_rate": 4.78384059355706e-06,
      "loss": 0.0133,
      "step": 1603240
    },
    {
      "epoch": 2.623770153767601,
      "grad_norm": 0.2180086076259613,
      "learning_rate": 4.7837747013435424e-06,
      "loss": 0.0161,
      "step": 1603260
    },
    {
      "epoch": 2.6238028842062544,
      "grad_norm": 0.23598265647888184,
      "learning_rate": 4.783708809130025e-06,
      "loss": 0.0173,
      "step": 1603280
    },
    {
      "epoch": 2.6238356146449076,
      "grad_norm": 0.09972148388624191,
      "learning_rate": 4.783642916916508e-06,
      "loss": 0.0119,
      "step": 1603300
    },
    {
      "epoch": 2.6238683450835607,
      "grad_norm": 0.07736945152282715,
      "learning_rate": 4.7835770247029915e-06,
      "loss": 0.0152,
      "step": 1603320
    },
    {
      "epoch": 2.6239010755222143,
      "grad_norm": 0.15078425407409668,
      "learning_rate": 4.783511132489474e-06,
      "loss": 0.0109,
      "step": 1603340
    },
    {
      "epoch": 2.6239338059608674,
      "grad_norm": Infinity,
      "learning_rate": 4.783445240275957e-06,
      "loss": 0.0103,
      "step": 1603360
    },
    {
      "epoch": 2.6239665363995206,
      "grad_norm": 0.17953979969024658,
      "learning_rate": 4.78337934806244e-06,
      "loss": 0.0145,
      "step": 1603380
    },
    {
      "epoch": 2.623999266838174,
      "grad_norm": 1.1084140539169312,
      "learning_rate": 4.7833134558489225e-06,
      "loss": 0.0183,
      "step": 1603400
    },
    {
      "epoch": 2.6240319972768273,
      "grad_norm": 0.2970576882362366,
      "learning_rate": 4.783247563635405e-06,
      "loss": 0.0119,
      "step": 1603420
    },
    {
      "epoch": 2.624064727715481,
      "grad_norm": 0.11985687166452408,
      "learning_rate": 4.783181671421889e-06,
      "loss": 0.0141,
      "step": 1603440
    },
    {
      "epoch": 2.624097458154134,
      "grad_norm": 0.16765697300434113,
      "learning_rate": 4.7831157792083715e-06,
      "loss": 0.0115,
      "step": 1603460
    },
    {
      "epoch": 2.6241301885927877,
      "grad_norm": 0.1705828309059143,
      "learning_rate": 4.783049886994854e-06,
      "loss": 0.0119,
      "step": 1603480
    },
    {
      "epoch": 2.624162919031441,
      "grad_norm": 0.7313121557235718,
      "learning_rate": 4.782983994781337e-06,
      "loss": 0.0198,
      "step": 1603500
    },
    {
      "epoch": 2.624195649470094,
      "grad_norm": 0.10170435905456543,
      "learning_rate": 4.78291810256782e-06,
      "loss": 0.0159,
      "step": 1603520
    },
    {
      "epoch": 2.6242283799087476,
      "grad_norm": 0.28904393315315247,
      "learning_rate": 4.7828522103543025e-06,
      "loss": 0.0151,
      "step": 1603540
    },
    {
      "epoch": 2.6242611103474007,
      "grad_norm": 0.307007372379303,
      "learning_rate": 4.782786318140786e-06,
      "loss": 0.0145,
      "step": 1603560
    },
    {
      "epoch": 2.6242938407860543,
      "grad_norm": 0.3317832946777344,
      "learning_rate": 4.782720425927269e-06,
      "loss": 0.0164,
      "step": 1603580
    },
    {
      "epoch": 2.6243265712247075,
      "grad_norm": 1.0013748407363892,
      "learning_rate": 4.7826545337137516e-06,
      "loss": 0.0161,
      "step": 1603600
    },
    {
      "epoch": 2.624359301663361,
      "grad_norm": 0.30945533514022827,
      "learning_rate": 4.782588641500234e-06,
      "loss": 0.0143,
      "step": 1603620
    },
    {
      "epoch": 2.624392032102014,
      "grad_norm": 0.3612632155418396,
      "learning_rate": 4.782522749286717e-06,
      "loss": 0.0147,
      "step": 1603640
    },
    {
      "epoch": 2.6244247625406674,
      "grad_norm": 0.07981706410646439,
      "learning_rate": 4.7824568570732e-06,
      "loss": 0.0122,
      "step": 1603660
    },
    {
      "epoch": 2.624457492979321,
      "grad_norm": 0.12170597910881042,
      "learning_rate": 4.7823909648596825e-06,
      "loss": 0.0128,
      "step": 1603680
    },
    {
      "epoch": 2.624490223417974,
      "grad_norm": 0.09957322478294373,
      "learning_rate": 4.782325072646165e-06,
      "loss": 0.0123,
      "step": 1603700
    },
    {
      "epoch": 2.6245229538566277,
      "grad_norm": 0.274068146944046,
      "learning_rate": 4.782259180432649e-06,
      "loss": 0.0158,
      "step": 1603720
    },
    {
      "epoch": 2.624555684295281,
      "grad_norm": 0.8442374467849731,
      "learning_rate": 4.782193288219132e-06,
      "loss": 0.0122,
      "step": 1603740
    },
    {
      "epoch": 2.6245884147339344,
      "grad_norm": 0.19243836402893066,
      "learning_rate": 4.782127396005614e-06,
      "loss": 0.01,
      "step": 1603760
    },
    {
      "epoch": 2.6246211451725876,
      "grad_norm": 0.7437329888343811,
      "learning_rate": 4.782061503792098e-06,
      "loss": 0.012,
      "step": 1603780
    },
    {
      "epoch": 2.6246538756112407,
      "grad_norm": 0.881788969039917,
      "learning_rate": 4.781995611578581e-06,
      "loss": 0.0253,
      "step": 1603800
    },
    {
      "epoch": 2.6246866060498943,
      "grad_norm": 0.4408346712589264,
      "learning_rate": 4.781929719365063e-06,
      "loss": 0.0164,
      "step": 1603820
    },
    {
      "epoch": 2.6247193364885475,
      "grad_norm": 0.6871791481971741,
      "learning_rate": 4.781863827151546e-06,
      "loss": 0.0188,
      "step": 1603840
    },
    {
      "epoch": 2.624752066927201,
      "grad_norm": 0.11081340163946152,
      "learning_rate": 4.781797934938029e-06,
      "loss": 0.0151,
      "step": 1603860
    },
    {
      "epoch": 2.6247847973658542,
      "grad_norm": 0.13930463790893555,
      "learning_rate": 4.781732042724512e-06,
      "loss": 0.0102,
      "step": 1603880
    },
    {
      "epoch": 2.624817527804508,
      "grad_norm": 0.513579785823822,
      "learning_rate": 4.781666150510994e-06,
      "loss": 0.0118,
      "step": 1603900
    },
    {
      "epoch": 2.624850258243161,
      "grad_norm": 0.489156574010849,
      "learning_rate": 4.781600258297477e-06,
      "loss": 0.0112,
      "step": 1603920
    },
    {
      "epoch": 2.624882988681814,
      "grad_norm": 2.254478931427002,
      "learning_rate": 4.781534366083961e-06,
      "loss": 0.0199,
      "step": 1603940
    },
    {
      "epoch": 2.6249157191204677,
      "grad_norm": 4.757086753845215,
      "learning_rate": 4.781468473870443e-06,
      "loss": 0.0159,
      "step": 1603960
    },
    {
      "epoch": 2.624948449559121,
      "grad_norm": 0.18539008498191833,
      "learning_rate": 4.781402581656926e-06,
      "loss": 0.0153,
      "step": 1603980
    },
    {
      "epoch": 2.6249811799977745,
      "grad_norm": 0.7312965989112854,
      "learning_rate": 4.781336689443409e-06,
      "loss": 0.0203,
      "step": 1604000
    },
    {
      "epoch": 2.6250139104364276,
      "grad_norm": 0.8830591440200806,
      "learning_rate": 4.781270797229892e-06,
      "loss": 0.0163,
      "step": 1604020
    },
    {
      "epoch": 2.625046640875081,
      "grad_norm": 0.2735002040863037,
      "learning_rate": 4.781204905016374e-06,
      "loss": 0.0093,
      "step": 1604040
    },
    {
      "epoch": 2.6250793713137344,
      "grad_norm": 0.3060314655303955,
      "learning_rate": 4.781139012802857e-06,
      "loss": 0.0106,
      "step": 1604060
    },
    {
      "epoch": 2.6251121017523875,
      "grad_norm": 0.7015973329544067,
      "learning_rate": 4.78107312058934e-06,
      "loss": 0.0082,
      "step": 1604080
    },
    {
      "epoch": 2.625144832191041,
      "grad_norm": 0.24502217769622803,
      "learning_rate": 4.781007228375823e-06,
      "loss": 0.0179,
      "step": 1604100
    },
    {
      "epoch": 2.6251775626296943,
      "grad_norm": 0.1751869171857834,
      "learning_rate": 4.780941336162306e-06,
      "loss": 0.0115,
      "step": 1604120
    },
    {
      "epoch": 2.625210293068348,
      "grad_norm": 0.266033798456192,
      "learning_rate": 4.780875443948789e-06,
      "loss": 0.0145,
      "step": 1604140
    },
    {
      "epoch": 2.625243023507001,
      "grad_norm": 0.15193010866641998,
      "learning_rate": 4.780809551735272e-06,
      "loss": 0.0145,
      "step": 1604160
    },
    {
      "epoch": 2.6252757539456546,
      "grad_norm": 0.3647352457046509,
      "learning_rate": 4.780743659521755e-06,
      "loss": 0.0183,
      "step": 1604180
    },
    {
      "epoch": 2.6253084843843078,
      "grad_norm": 0.46360698342323303,
      "learning_rate": 4.780677767308238e-06,
      "loss": 0.0184,
      "step": 1604200
    },
    {
      "epoch": 2.625341214822961,
      "grad_norm": 0.14686499536037445,
      "learning_rate": 4.780611875094721e-06,
      "loss": 0.0118,
      "step": 1604220
    },
    {
      "epoch": 2.6253739452616145,
      "grad_norm": 0.20600509643554688,
      "learning_rate": 4.7805459828812035e-06,
      "loss": 0.0084,
      "step": 1604240
    },
    {
      "epoch": 2.6254066757002676,
      "grad_norm": 0.19988508522510529,
      "learning_rate": 4.780480090667686e-06,
      "loss": 0.0179,
      "step": 1604260
    },
    {
      "epoch": 2.6254394061389212,
      "grad_norm": 0.2153269648551941,
      "learning_rate": 4.780414198454169e-06,
      "loss": 0.0188,
      "step": 1604280
    },
    {
      "epoch": 2.6254721365775744,
      "grad_norm": 0.18842820823192596,
      "learning_rate": 4.780348306240652e-06,
      "loss": 0.0111,
      "step": 1604300
    },
    {
      "epoch": 2.625504867016228,
      "grad_norm": 0.3456801474094391,
      "learning_rate": 4.7802824140271344e-06,
      "loss": 0.0129,
      "step": 1604320
    },
    {
      "epoch": 2.625537597454881,
      "grad_norm": 0.8926085233688354,
      "learning_rate": 4.780216521813618e-06,
      "loss": 0.0175,
      "step": 1604340
    },
    {
      "epoch": 2.6255703278935343,
      "grad_norm": 0.3533938527107239,
      "learning_rate": 4.780150629600101e-06,
      "loss": 0.0148,
      "step": 1604360
    },
    {
      "epoch": 2.625603058332188,
      "grad_norm": 0.4842020571231842,
      "learning_rate": 4.7800847373865835e-06,
      "loss": 0.0179,
      "step": 1604380
    },
    {
      "epoch": 2.625635788770841,
      "grad_norm": 0.31151658296585083,
      "learning_rate": 4.780018845173066e-06,
      "loss": 0.0225,
      "step": 1604400
    },
    {
      "epoch": 2.625668519209494,
      "grad_norm": 0.24088065326213837,
      "learning_rate": 4.779952952959549e-06,
      "loss": 0.0136,
      "step": 1604420
    },
    {
      "epoch": 2.6257012496481478,
      "grad_norm": 0.33558616042137146,
      "learning_rate": 4.779887060746032e-06,
      "loss": 0.0099,
      "step": 1604440
    },
    {
      "epoch": 2.6257339800868014,
      "grad_norm": 0.6733317971229553,
      "learning_rate": 4.7798211685325145e-06,
      "loss": 0.0151,
      "step": 1604460
    },
    {
      "epoch": 2.6257667105254545,
      "grad_norm": 0.11533664911985397,
      "learning_rate": 4.779755276318998e-06,
      "loss": 0.019,
      "step": 1604480
    },
    {
      "epoch": 2.6257994409641077,
      "grad_norm": 0.374204158782959,
      "learning_rate": 4.779689384105481e-06,
      "loss": 0.0162,
      "step": 1604500
    },
    {
      "epoch": 2.6258321714027613,
      "grad_norm": 0.15223638713359833,
      "learning_rate": 4.7796234918919635e-06,
      "loss": 0.0136,
      "step": 1604520
    },
    {
      "epoch": 2.6258649018414144,
      "grad_norm": 0.2324959933757782,
      "learning_rate": 4.779557599678446e-06,
      "loss": 0.0205,
      "step": 1604540
    },
    {
      "epoch": 2.6258976322800676,
      "grad_norm": 0.5780513286590576,
      "learning_rate": 4.779491707464929e-06,
      "loss": 0.0114,
      "step": 1604560
    },
    {
      "epoch": 2.625930362718721,
      "grad_norm": 0.3769547939300537,
      "learning_rate": 4.779425815251413e-06,
      "loss": 0.0152,
      "step": 1604580
    },
    {
      "epoch": 2.6259630931573748,
      "grad_norm": 0.16534313559532166,
      "learning_rate": 4.779359923037895e-06,
      "loss": 0.0105,
      "step": 1604600
    },
    {
      "epoch": 2.625995823596028,
      "grad_norm": 0.4257103502750397,
      "learning_rate": 4.779294030824378e-06,
      "loss": 0.01,
      "step": 1604620
    },
    {
      "epoch": 2.626028554034681,
      "grad_norm": 0.2122933268547058,
      "learning_rate": 4.779228138610861e-06,
      "loss": 0.0113,
      "step": 1604640
    },
    {
      "epoch": 2.6260612844733346,
      "grad_norm": 0.5500808358192444,
      "learning_rate": 4.7791622463973435e-06,
      "loss": 0.013,
      "step": 1604660
    },
    {
      "epoch": 2.626094014911988,
      "grad_norm": 0.4595955014228821,
      "learning_rate": 4.779096354183826e-06,
      "loss": 0.012,
      "step": 1604680
    },
    {
      "epoch": 2.626126745350641,
      "grad_norm": 0.36598801612854004,
      "learning_rate": 4.779030461970309e-06,
      "loss": 0.016,
      "step": 1604700
    },
    {
      "epoch": 2.6261594757892945,
      "grad_norm": 0.3986223340034485,
      "learning_rate": 4.778964569756792e-06,
      "loss": 0.0169,
      "step": 1604720
    },
    {
      "epoch": 2.626192206227948,
      "grad_norm": 0.6093181371688843,
      "learning_rate": 4.778898677543275e-06,
      "loss": 0.0203,
      "step": 1604740
    },
    {
      "epoch": 2.6262249366666013,
      "grad_norm": 0.12075696885585785,
      "learning_rate": 4.778832785329758e-06,
      "loss": 0.014,
      "step": 1604760
    },
    {
      "epoch": 2.6262576671052544,
      "grad_norm": 0.05448954924941063,
      "learning_rate": 4.778766893116241e-06,
      "loss": 0.022,
      "step": 1604780
    },
    {
      "epoch": 2.626290397543908,
      "grad_norm": 0.7365610003471375,
      "learning_rate": 4.7787010009027236e-06,
      "loss": 0.0102,
      "step": 1604800
    },
    {
      "epoch": 2.626323127982561,
      "grad_norm": 0.10380351543426514,
      "learning_rate": 4.778635108689206e-06,
      "loss": 0.0122,
      "step": 1604820
    },
    {
      "epoch": 2.6263558584212143,
      "grad_norm": 0.31359681487083435,
      "learning_rate": 4.77856921647569e-06,
      "loss": 0.0183,
      "step": 1604840
    },
    {
      "epoch": 2.626388588859868,
      "grad_norm": 0.5323066711425781,
      "learning_rate": 4.778503324262173e-06,
      "loss": 0.0153,
      "step": 1604860
    },
    {
      "epoch": 2.626421319298521,
      "grad_norm": 0.22166860103607178,
      "learning_rate": 4.778437432048655e-06,
      "loss": 0.0162,
      "step": 1604880
    },
    {
      "epoch": 2.6264540497371747,
      "grad_norm": 0.2659626603126526,
      "learning_rate": 4.778371539835138e-06,
      "loss": 0.02,
      "step": 1604900
    },
    {
      "epoch": 2.626486780175828,
      "grad_norm": 0.03797826170921326,
      "learning_rate": 4.778305647621621e-06,
      "loss": 0.0109,
      "step": 1604920
    },
    {
      "epoch": 2.6265195106144814,
      "grad_norm": 0.1384938359260559,
      "learning_rate": 4.778239755408104e-06,
      "loss": 0.0174,
      "step": 1604940
    },
    {
      "epoch": 2.6265522410531346,
      "grad_norm": 1.1555620431900024,
      "learning_rate": 4.778173863194586e-06,
      "loss": 0.0192,
      "step": 1604960
    },
    {
      "epoch": 2.6265849714917877,
      "grad_norm": 0.9455401301383972,
      "learning_rate": 4.77810797098107e-06,
      "loss": 0.0118,
      "step": 1604980
    },
    {
      "epoch": 2.6266177019304413,
      "grad_norm": 0.20574259757995605,
      "learning_rate": 4.778042078767553e-06,
      "loss": 0.0177,
      "step": 1605000
    },
    {
      "epoch": 2.6266504323690945,
      "grad_norm": 0.4633714556694031,
      "learning_rate": 4.777976186554035e-06,
      "loss": 0.0112,
      "step": 1605020
    },
    {
      "epoch": 2.626683162807748,
      "grad_norm": 0.21067988872528076,
      "learning_rate": 4.777910294340518e-06,
      "loss": 0.0202,
      "step": 1605040
    },
    {
      "epoch": 2.626715893246401,
      "grad_norm": 0.3512071669101715,
      "learning_rate": 4.777844402127001e-06,
      "loss": 0.0157,
      "step": 1605060
    },
    {
      "epoch": 2.626748623685055,
      "grad_norm": 0.28938454389572144,
      "learning_rate": 4.777778509913484e-06,
      "loss": 0.0221,
      "step": 1605080
    },
    {
      "epoch": 2.626781354123708,
      "grad_norm": 0.26093965768814087,
      "learning_rate": 4.777712617699966e-06,
      "loss": 0.0139,
      "step": 1605100
    },
    {
      "epoch": 2.626814084562361,
      "grad_norm": 0.36017441749572754,
      "learning_rate": 4.777646725486449e-06,
      "loss": 0.0124,
      "step": 1605120
    },
    {
      "epoch": 2.6268468150010147,
      "grad_norm": 0.16662460565567017,
      "learning_rate": 4.777580833272933e-06,
      "loss": 0.0133,
      "step": 1605140
    },
    {
      "epoch": 2.626879545439668,
      "grad_norm": 0.8176252841949463,
      "learning_rate": 4.7775149410594154e-06,
      "loss": 0.0193,
      "step": 1605160
    },
    {
      "epoch": 2.6269122758783214,
      "grad_norm": 0.07957673072814941,
      "learning_rate": 4.777449048845898e-06,
      "loss": 0.0104,
      "step": 1605180
    },
    {
      "epoch": 2.6269450063169746,
      "grad_norm": 0.20609630644321442,
      "learning_rate": 4.777383156632382e-06,
      "loss": 0.0161,
      "step": 1605200
    },
    {
      "epoch": 2.626977736755628,
      "grad_norm": 0.13969802856445312,
      "learning_rate": 4.7773172644188645e-06,
      "loss": 0.0175,
      "step": 1605220
    },
    {
      "epoch": 2.6270104671942813,
      "grad_norm": 0.17315591871738434,
      "learning_rate": 4.777251372205347e-06,
      "loss": 0.0136,
      "step": 1605240
    },
    {
      "epoch": 2.6270431976329345,
      "grad_norm": 0.25361064076423645,
      "learning_rate": 4.77718547999183e-06,
      "loss": 0.0118,
      "step": 1605260
    },
    {
      "epoch": 2.627075928071588,
      "grad_norm": 0.7406476140022278,
      "learning_rate": 4.777119587778313e-06,
      "loss": 0.0151,
      "step": 1605280
    },
    {
      "epoch": 2.6271086585102412,
      "grad_norm": 0.33732742071151733,
      "learning_rate": 4.7770536955647955e-06,
      "loss": 0.0168,
      "step": 1605300
    },
    {
      "epoch": 2.627141388948895,
      "grad_norm": 1.0656635761260986,
      "learning_rate": 4.776987803351278e-06,
      "loss": 0.0157,
      "step": 1605320
    },
    {
      "epoch": 2.627174119387548,
      "grad_norm": 0.7783425450325012,
      "learning_rate": 4.776921911137761e-06,
      "loss": 0.0174,
      "step": 1605340
    },
    {
      "epoch": 2.6272068498262016,
      "grad_norm": 0.3208049535751343,
      "learning_rate": 4.7768560189242445e-06,
      "loss": 0.0121,
      "step": 1605360
    },
    {
      "epoch": 2.6272395802648547,
      "grad_norm": 1.3154219388961792,
      "learning_rate": 4.776790126710727e-06,
      "loss": 0.0123,
      "step": 1605380
    },
    {
      "epoch": 2.627272310703508,
      "grad_norm": 0.4268086552619934,
      "learning_rate": 4.77672423449721e-06,
      "loss": 0.0237,
      "step": 1605400
    },
    {
      "epoch": 2.6273050411421615,
      "grad_norm": 0.13178251683712006,
      "learning_rate": 4.776658342283693e-06,
      "loss": 0.0185,
      "step": 1605420
    },
    {
      "epoch": 2.6273377715808146,
      "grad_norm": 0.5596264004707336,
      "learning_rate": 4.7765924500701755e-06,
      "loss": 0.0122,
      "step": 1605440
    },
    {
      "epoch": 2.627370502019468,
      "grad_norm": 0.26312923431396484,
      "learning_rate": 4.776526557856658e-06,
      "loss": 0.0123,
      "step": 1605460
    },
    {
      "epoch": 2.6274032324581214,
      "grad_norm": 0.1690325289964676,
      "learning_rate": 4.776460665643141e-06,
      "loss": 0.0128,
      "step": 1605480
    },
    {
      "epoch": 2.627435962896775,
      "grad_norm": 0.6349098086357117,
      "learning_rate": 4.776394773429624e-06,
      "loss": 0.0236,
      "step": 1605500
    },
    {
      "epoch": 2.627468693335428,
      "grad_norm": 0.3022060990333557,
      "learning_rate": 4.7763288812161064e-06,
      "loss": 0.0167,
      "step": 1605520
    },
    {
      "epoch": 2.6275014237740812,
      "grad_norm": 0.09018982201814651,
      "learning_rate": 4.77626298900259e-06,
      "loss": 0.0138,
      "step": 1605540
    },
    {
      "epoch": 2.627534154212735,
      "grad_norm": 0.21105992794036865,
      "learning_rate": 4.776197096789073e-06,
      "loss": 0.0166,
      "step": 1605560
    },
    {
      "epoch": 2.627566884651388,
      "grad_norm": 0.3404403328895569,
      "learning_rate": 4.7761312045755555e-06,
      "loss": 0.0173,
      "step": 1605580
    },
    {
      "epoch": 2.6275996150900416,
      "grad_norm": 0.6378120183944702,
      "learning_rate": 4.776065312362039e-06,
      "loss": 0.0199,
      "step": 1605600
    },
    {
      "epoch": 2.6276323455286947,
      "grad_norm": 0.4307800531387329,
      "learning_rate": 4.775999420148522e-06,
      "loss": 0.0144,
      "step": 1605620
    },
    {
      "epoch": 2.6276650759673483,
      "grad_norm": 0.3660876154899597,
      "learning_rate": 4.7759335279350046e-06,
      "loss": 0.0134,
      "step": 1605640
    },
    {
      "epoch": 2.6276978064060015,
      "grad_norm": 0.1264006346464157,
      "learning_rate": 4.775867635721487e-06,
      "loss": 0.0097,
      "step": 1605660
    },
    {
      "epoch": 2.6277305368446546,
      "grad_norm": 0.777232825756073,
      "learning_rate": 4.77580174350797e-06,
      "loss": 0.013,
      "step": 1605680
    },
    {
      "epoch": 2.6277632672833082,
      "grad_norm": 0.9481176137924194,
      "learning_rate": 4.775735851294453e-06,
      "loss": 0.016,
      "step": 1605700
    },
    {
      "epoch": 2.6277959977219614,
      "grad_norm": 0.374904990196228,
      "learning_rate": 4.7756699590809355e-06,
      "loss": 0.0094,
      "step": 1605720
    },
    {
      "epoch": 2.627828728160615,
      "grad_norm": 1.2747278213500977,
      "learning_rate": 4.775604066867418e-06,
      "loss": 0.0132,
      "step": 1605740
    },
    {
      "epoch": 2.627861458599268,
      "grad_norm": 0.536931037902832,
      "learning_rate": 4.775538174653902e-06,
      "loss": 0.0143,
      "step": 1605760
    },
    {
      "epoch": 2.6278941890379217,
      "grad_norm": 0.5566474199295044,
      "learning_rate": 4.775472282440385e-06,
      "loss": 0.015,
      "step": 1605780
    },
    {
      "epoch": 2.627926919476575,
      "grad_norm": 0.19289076328277588,
      "learning_rate": 4.775406390226867e-06,
      "loss": 0.0094,
      "step": 1605800
    },
    {
      "epoch": 2.627959649915228,
      "grad_norm": 0.9009885191917419,
      "learning_rate": 4.77534049801335e-06,
      "loss": 0.0214,
      "step": 1605820
    },
    {
      "epoch": 2.6279923803538816,
      "grad_norm": 1.749704360961914,
      "learning_rate": 4.775274605799833e-06,
      "loss": 0.0139,
      "step": 1605840
    },
    {
      "epoch": 2.6280251107925348,
      "grad_norm": 0.3933265209197998,
      "learning_rate": 4.7752087135863156e-06,
      "loss": 0.0161,
      "step": 1605860
    },
    {
      "epoch": 2.628057841231188,
      "grad_norm": 0.29758667945861816,
      "learning_rate": 4.775142821372798e-06,
      "loss": 0.0101,
      "step": 1605880
    },
    {
      "epoch": 2.6280905716698415,
      "grad_norm": 0.7287753820419312,
      "learning_rate": 4.775076929159282e-06,
      "loss": 0.0194,
      "step": 1605900
    },
    {
      "epoch": 2.628123302108495,
      "grad_norm": 0.11665212363004684,
      "learning_rate": 4.775011036945765e-06,
      "loss": 0.0149,
      "step": 1605920
    },
    {
      "epoch": 2.6281560325471482,
      "grad_norm": 0.2173648625612259,
      "learning_rate": 4.774945144732247e-06,
      "loss": 0.0187,
      "step": 1605940
    },
    {
      "epoch": 2.6281887629858014,
      "grad_norm": 1.2027058601379395,
      "learning_rate": 4.77487925251873e-06,
      "loss": 0.0127,
      "step": 1605960
    },
    {
      "epoch": 2.628221493424455,
      "grad_norm": 1.0725977420806885,
      "learning_rate": 4.774813360305213e-06,
      "loss": 0.0146,
      "step": 1605980
    },
    {
      "epoch": 2.628254223863108,
      "grad_norm": 0.14248137176036835,
      "learning_rate": 4.7747474680916964e-06,
      "loss": 0.0121,
      "step": 1606000
    },
    {
      "epoch": 2.6282869543017613,
      "grad_norm": 0.30623859167099,
      "learning_rate": 4.774681575878179e-06,
      "loss": 0.013,
      "step": 1606020
    },
    {
      "epoch": 2.628319684740415,
      "grad_norm": 0.2652757167816162,
      "learning_rate": 4.774615683664662e-06,
      "loss": 0.0161,
      "step": 1606040
    },
    {
      "epoch": 2.6283524151790685,
      "grad_norm": 0.13072162866592407,
      "learning_rate": 4.774549791451145e-06,
      "loss": 0.0107,
      "step": 1606060
    },
    {
      "epoch": 2.6283851456177216,
      "grad_norm": 0.34509024024009705,
      "learning_rate": 4.774483899237627e-06,
      "loss": 0.0113,
      "step": 1606080
    },
    {
      "epoch": 2.628417876056375,
      "grad_norm": 0.44027799367904663,
      "learning_rate": 4.77441800702411e-06,
      "loss": 0.0141,
      "step": 1606100
    },
    {
      "epoch": 2.6284506064950284,
      "grad_norm": 0.24681542813777924,
      "learning_rate": 4.774352114810593e-06,
      "loss": 0.0117,
      "step": 1606120
    },
    {
      "epoch": 2.6284833369336815,
      "grad_norm": 0.25375238060951233,
      "learning_rate": 4.774286222597076e-06,
      "loss": 0.0097,
      "step": 1606140
    },
    {
      "epoch": 2.6285160673723347,
      "grad_norm": 0.11623619496822357,
      "learning_rate": 4.774220330383559e-06,
      "loss": 0.0176,
      "step": 1606160
    },
    {
      "epoch": 2.6285487978109883,
      "grad_norm": 0.20578385889530182,
      "learning_rate": 4.774154438170042e-06,
      "loss": 0.0127,
      "step": 1606180
    },
    {
      "epoch": 2.628581528249642,
      "grad_norm": 0.8833977580070496,
      "learning_rate": 4.774088545956525e-06,
      "loss": 0.0097,
      "step": 1606200
    },
    {
      "epoch": 2.628614258688295,
      "grad_norm": 0.49787673354148865,
      "learning_rate": 4.774022653743007e-06,
      "loss": 0.0226,
      "step": 1606220
    },
    {
      "epoch": 2.628646989126948,
      "grad_norm": 0.5853182673454285,
      "learning_rate": 4.773956761529491e-06,
      "loss": 0.0176,
      "step": 1606240
    },
    {
      "epoch": 2.6286797195656018,
      "grad_norm": 0.1529095470905304,
      "learning_rate": 4.773890869315974e-06,
      "loss": 0.0141,
      "step": 1606260
    },
    {
      "epoch": 2.628712450004255,
      "grad_norm": 0.2263040393590927,
      "learning_rate": 4.7738249771024565e-06,
      "loss": 0.0093,
      "step": 1606280
    },
    {
      "epoch": 2.628745180442908,
      "grad_norm": 0.08033552765846252,
      "learning_rate": 4.773759084888939e-06,
      "loss": 0.0093,
      "step": 1606300
    },
    {
      "epoch": 2.6287779108815617,
      "grad_norm": 1.0516812801361084,
      "learning_rate": 4.773693192675422e-06,
      "loss": 0.0128,
      "step": 1606320
    },
    {
      "epoch": 2.6288106413202152,
      "grad_norm": 0.8140460252761841,
      "learning_rate": 4.773627300461905e-06,
      "loss": 0.0174,
      "step": 1606340
    },
    {
      "epoch": 2.6288433717588684,
      "grad_norm": 0.6777746081352234,
      "learning_rate": 4.7735614082483874e-06,
      "loss": 0.0119,
      "step": 1606360
    },
    {
      "epoch": 2.6288761021975215,
      "grad_norm": 0.5899279713630676,
      "learning_rate": 4.773495516034871e-06,
      "loss": 0.0136,
      "step": 1606380
    },
    {
      "epoch": 2.628908832636175,
      "grad_norm": 0.6653571128845215,
      "learning_rate": 4.773429623821354e-06,
      "loss": 0.0113,
      "step": 1606400
    },
    {
      "epoch": 2.6289415630748283,
      "grad_norm": 0.023222044110298157,
      "learning_rate": 4.7733637316078365e-06,
      "loss": 0.0125,
      "step": 1606420
    },
    {
      "epoch": 2.6289742935134814,
      "grad_norm": 0.06551136821508408,
      "learning_rate": 4.773297839394319e-06,
      "loss": 0.0158,
      "step": 1606440
    },
    {
      "epoch": 2.629007023952135,
      "grad_norm": 0.14420534670352936,
      "learning_rate": 4.773231947180802e-06,
      "loss": 0.015,
      "step": 1606460
    },
    {
      "epoch": 2.629039754390788,
      "grad_norm": 0.15427690744400024,
      "learning_rate": 4.773166054967285e-06,
      "loss": 0.0162,
      "step": 1606480
    },
    {
      "epoch": 2.629072484829442,
      "grad_norm": 0.16309040784835815,
      "learning_rate": 4.7731001627537675e-06,
      "loss": 0.0101,
      "step": 1606500
    },
    {
      "epoch": 2.629105215268095,
      "grad_norm": 0.17980259656906128,
      "learning_rate": 4.77303427054025e-06,
      "loss": 0.0105,
      "step": 1606520
    },
    {
      "epoch": 2.6291379457067485,
      "grad_norm": 0.6304925084114075,
      "learning_rate": 4.772968378326733e-06,
      "loss": 0.0125,
      "step": 1606540
    },
    {
      "epoch": 2.6291706761454017,
      "grad_norm": 0.6800256967544556,
      "learning_rate": 4.7729024861132165e-06,
      "loss": 0.0143,
      "step": 1606560
    },
    {
      "epoch": 2.629203406584055,
      "grad_norm": 0.400199294090271,
      "learning_rate": 4.772836593899699e-06,
      "loss": 0.0097,
      "step": 1606580
    },
    {
      "epoch": 2.6292361370227084,
      "grad_norm": 0.7820540070533752,
      "learning_rate": 4.772770701686182e-06,
      "loss": 0.0162,
      "step": 1606600
    },
    {
      "epoch": 2.6292688674613616,
      "grad_norm": 0.18301509320735931,
      "learning_rate": 4.772704809472666e-06,
      "loss": 0.0194,
      "step": 1606620
    },
    {
      "epoch": 2.629301597900015,
      "grad_norm": 2.809793472290039,
      "learning_rate": 4.772638917259148e-06,
      "loss": 0.019,
      "step": 1606640
    },
    {
      "epoch": 2.6293343283386683,
      "grad_norm": 0.47284990549087524,
      "learning_rate": 4.772573025045631e-06,
      "loss": 0.028,
      "step": 1606660
    },
    {
      "epoch": 2.629367058777322,
      "grad_norm": 0.12640050053596497,
      "learning_rate": 4.772507132832114e-06,
      "loss": 0.0104,
      "step": 1606680
    },
    {
      "epoch": 2.629399789215975,
      "grad_norm": 0.08539561182260513,
      "learning_rate": 4.7724412406185966e-06,
      "loss": 0.0141,
      "step": 1606700
    },
    {
      "epoch": 2.629432519654628,
      "grad_norm": 1.2800074815750122,
      "learning_rate": 4.772375348405079e-06,
      "loss": 0.0176,
      "step": 1606720
    },
    {
      "epoch": 2.629465250093282,
      "grad_norm": 0.953652560710907,
      "learning_rate": 4.772309456191562e-06,
      "loss": 0.0166,
      "step": 1606740
    },
    {
      "epoch": 2.629497980531935,
      "grad_norm": 0.41296350955963135,
      "learning_rate": 4.772243563978045e-06,
      "loss": 0.0176,
      "step": 1606760
    },
    {
      "epoch": 2.6295307109705885,
      "grad_norm": 0.31141573190689087,
      "learning_rate": 4.772177671764528e-06,
      "loss": 0.0188,
      "step": 1606780
    },
    {
      "epoch": 2.6295634414092417,
      "grad_norm": 0.24429620802402496,
      "learning_rate": 4.772111779551011e-06,
      "loss": 0.0114,
      "step": 1606800
    },
    {
      "epoch": 2.6295961718478953,
      "grad_norm": 0.3801126778125763,
      "learning_rate": 4.772045887337494e-06,
      "loss": 0.0165,
      "step": 1606820
    },
    {
      "epoch": 2.6296289022865484,
      "grad_norm": 0.693888247013092,
      "learning_rate": 4.771979995123977e-06,
      "loss": 0.0158,
      "step": 1606840
    },
    {
      "epoch": 2.6296616327252016,
      "grad_norm": 0.3543458878993988,
      "learning_rate": 4.771914102910459e-06,
      "loss": 0.0129,
      "step": 1606860
    },
    {
      "epoch": 2.629694363163855,
      "grad_norm": 0.35658392310142517,
      "learning_rate": 4.771848210696942e-06,
      "loss": 0.0219,
      "step": 1606880
    },
    {
      "epoch": 2.6297270936025083,
      "grad_norm": 0.3129079043865204,
      "learning_rate": 4.771782318483425e-06,
      "loss": 0.0142,
      "step": 1606900
    },
    {
      "epoch": 2.629759824041162,
      "grad_norm": 0.2589464783668518,
      "learning_rate": 4.7717164262699075e-06,
      "loss": 0.0109,
      "step": 1606920
    },
    {
      "epoch": 2.629792554479815,
      "grad_norm": 0.1608860194683075,
      "learning_rate": 4.77165053405639e-06,
      "loss": 0.0152,
      "step": 1606940
    },
    {
      "epoch": 2.6298252849184687,
      "grad_norm": 0.34939438104629517,
      "learning_rate": 4.771584641842874e-06,
      "loss": 0.0183,
      "step": 1606960
    },
    {
      "epoch": 2.629858015357122,
      "grad_norm": 0.6350535750389099,
      "learning_rate": 4.771518749629357e-06,
      "loss": 0.0131,
      "step": 1606980
    },
    {
      "epoch": 2.629890745795775,
      "grad_norm": 0.248062402009964,
      "learning_rate": 4.771452857415839e-06,
      "loss": 0.0121,
      "step": 1607000
    },
    {
      "epoch": 2.6299234762344286,
      "grad_norm": 1.3096545934677124,
      "learning_rate": 4.771386965202323e-06,
      "loss": 0.0172,
      "step": 1607020
    },
    {
      "epoch": 2.6299562066730817,
      "grad_norm": 0.4311349391937256,
      "learning_rate": 4.771321072988806e-06,
      "loss": 0.0155,
      "step": 1607040
    },
    {
      "epoch": 2.6299889371117353,
      "grad_norm": 0.2476009726524353,
      "learning_rate": 4.771255180775288e-06,
      "loss": 0.0084,
      "step": 1607060
    },
    {
      "epoch": 2.6300216675503885,
      "grad_norm": 0.39315494894981384,
      "learning_rate": 4.771189288561771e-06,
      "loss": 0.0114,
      "step": 1607080
    },
    {
      "epoch": 2.630054397989042,
      "grad_norm": 0.354704886674881,
      "learning_rate": 4.771123396348254e-06,
      "loss": 0.0161,
      "step": 1607100
    },
    {
      "epoch": 2.630087128427695,
      "grad_norm": 0.7446300983428955,
      "learning_rate": 4.771057504134737e-06,
      "loss": 0.0119,
      "step": 1607120
    },
    {
      "epoch": 2.6301198588663484,
      "grad_norm": 0.35058337450027466,
      "learning_rate": 4.770991611921219e-06,
      "loss": 0.0173,
      "step": 1607140
    },
    {
      "epoch": 2.630152589305002,
      "grad_norm": 0.29600176215171814,
      "learning_rate": 4.770925719707702e-06,
      "loss": 0.0104,
      "step": 1607160
    },
    {
      "epoch": 2.630185319743655,
      "grad_norm": 0.41341206431388855,
      "learning_rate": 4.770859827494186e-06,
      "loss": 0.0103,
      "step": 1607180
    },
    {
      "epoch": 2.6302180501823087,
      "grad_norm": 0.8437760472297668,
      "learning_rate": 4.7707939352806684e-06,
      "loss": 0.0131,
      "step": 1607200
    },
    {
      "epoch": 2.630250780620962,
      "grad_norm": 0.2317204624414444,
      "learning_rate": 4.770728043067151e-06,
      "loss": 0.0116,
      "step": 1607220
    },
    {
      "epoch": 2.6302835110596154,
      "grad_norm": 0.2197970151901245,
      "learning_rate": 4.770662150853634e-06,
      "loss": 0.0105,
      "step": 1607240
    },
    {
      "epoch": 2.6303162414982686,
      "grad_norm": 0.16130977869033813,
      "learning_rate": 4.770596258640117e-06,
      "loss": 0.0095,
      "step": 1607260
    },
    {
      "epoch": 2.6303489719369217,
      "grad_norm": 0.10811200737953186,
      "learning_rate": 4.770530366426599e-06,
      "loss": 0.0103,
      "step": 1607280
    },
    {
      "epoch": 2.6303817023755753,
      "grad_norm": 0.3022354543209076,
      "learning_rate": 4.770464474213083e-06,
      "loss": 0.0129,
      "step": 1607300
    },
    {
      "epoch": 2.6304144328142285,
      "grad_norm": 0.5137245655059814,
      "learning_rate": 4.770398581999566e-06,
      "loss": 0.0204,
      "step": 1607320
    },
    {
      "epoch": 2.630447163252882,
      "grad_norm": 0.5094093680381775,
      "learning_rate": 4.7703326897860485e-06,
      "loss": 0.0155,
      "step": 1607340
    },
    {
      "epoch": 2.6304798936915352,
      "grad_norm": 0.5815303325653076,
      "learning_rate": 4.770266797572531e-06,
      "loss": 0.0205,
      "step": 1607360
    },
    {
      "epoch": 2.630512624130189,
      "grad_norm": 0.25463634729385376,
      "learning_rate": 4.770200905359014e-06,
      "loss": 0.0111,
      "step": 1607380
    },
    {
      "epoch": 2.630545354568842,
      "grad_norm": 0.3906741738319397,
      "learning_rate": 4.770135013145497e-06,
      "loss": 0.0097,
      "step": 1607400
    },
    {
      "epoch": 2.630578085007495,
      "grad_norm": 0.5006880164146423,
      "learning_rate": 4.77006912093198e-06,
      "loss": 0.0167,
      "step": 1607420
    },
    {
      "epoch": 2.6306108154461487,
      "grad_norm": 0.36556994915008545,
      "learning_rate": 4.770003228718463e-06,
      "loss": 0.0185,
      "step": 1607440
    },
    {
      "epoch": 2.630643545884802,
      "grad_norm": 0.3129879832267761,
      "learning_rate": 4.769937336504946e-06,
      "loss": 0.0136,
      "step": 1607460
    },
    {
      "epoch": 2.630676276323455,
      "grad_norm": 0.13102342188358307,
      "learning_rate": 4.7698714442914285e-06,
      "loss": 0.0205,
      "step": 1607480
    },
    {
      "epoch": 2.6307090067621086,
      "grad_norm": 0.7869452238082886,
      "learning_rate": 4.769805552077911e-06,
      "loss": 0.0161,
      "step": 1607500
    },
    {
      "epoch": 2.630741737200762,
      "grad_norm": 0.23129625618457794,
      "learning_rate": 4.769739659864394e-06,
      "loss": 0.0136,
      "step": 1607520
    },
    {
      "epoch": 2.6307744676394154,
      "grad_norm": 0.4068956971168518,
      "learning_rate": 4.769673767650877e-06,
      "loss": 0.0114,
      "step": 1607540
    },
    {
      "epoch": 2.6308071980780685,
      "grad_norm": 0.16157051920890808,
      "learning_rate": 4.7696078754373594e-06,
      "loss": 0.0104,
      "step": 1607560
    },
    {
      "epoch": 2.630839928516722,
      "grad_norm": 0.17187902331352234,
      "learning_rate": 4.769541983223843e-06,
      "loss": 0.0119,
      "step": 1607580
    },
    {
      "epoch": 2.6308726589553753,
      "grad_norm": 0.21643739938735962,
      "learning_rate": 4.769476091010326e-06,
      "loss": 0.0085,
      "step": 1607600
    },
    {
      "epoch": 2.6309053893940284,
      "grad_norm": 0.892967700958252,
      "learning_rate": 4.7694101987968085e-06,
      "loss": 0.0153,
      "step": 1607620
    },
    {
      "epoch": 2.630938119832682,
      "grad_norm": 0.2074304223060608,
      "learning_rate": 4.769344306583291e-06,
      "loss": 0.0095,
      "step": 1607640
    },
    {
      "epoch": 2.6309708502713356,
      "grad_norm": 0.27267923951148987,
      "learning_rate": 4.769278414369775e-06,
      "loss": 0.0155,
      "step": 1607660
    },
    {
      "epoch": 2.6310035807099887,
      "grad_norm": 0.8738297820091248,
      "learning_rate": 4.769212522156258e-06,
      "loss": 0.0124,
      "step": 1607680
    },
    {
      "epoch": 2.631036311148642,
      "grad_norm": 0.13351255655288696,
      "learning_rate": 4.76914662994274e-06,
      "loss": 0.0186,
      "step": 1607700
    },
    {
      "epoch": 2.6310690415872955,
      "grad_norm": 0.2141348123550415,
      "learning_rate": 4.769080737729223e-06,
      "loss": 0.011,
      "step": 1607720
    },
    {
      "epoch": 2.6311017720259486,
      "grad_norm": 0.5971750020980835,
      "learning_rate": 4.769014845515706e-06,
      "loss": 0.0132,
      "step": 1607740
    },
    {
      "epoch": 2.631134502464602,
      "grad_norm": 0.2286200225353241,
      "learning_rate": 4.7689489533021885e-06,
      "loss": 0.0116,
      "step": 1607760
    },
    {
      "epoch": 2.6311672329032554,
      "grad_norm": 0.37024709582328796,
      "learning_rate": 4.768883061088671e-06,
      "loss": 0.0109,
      "step": 1607780
    },
    {
      "epoch": 2.631199963341909,
      "grad_norm": 0.47179505228996277,
      "learning_rate": 4.768817168875155e-06,
      "loss": 0.0167,
      "step": 1607800
    },
    {
      "epoch": 2.631232693780562,
      "grad_norm": 0.29028886556625366,
      "learning_rate": 4.768751276661638e-06,
      "loss": 0.0125,
      "step": 1607820
    },
    {
      "epoch": 2.6312654242192153,
      "grad_norm": 0.08148007839918137,
      "learning_rate": 4.76868538444812e-06,
      "loss": 0.0095,
      "step": 1607840
    },
    {
      "epoch": 2.631298154657869,
      "grad_norm": 0.5530539751052856,
      "learning_rate": 4.768619492234603e-06,
      "loss": 0.0131,
      "step": 1607860
    },
    {
      "epoch": 2.631330885096522,
      "grad_norm": 0.6574378609657288,
      "learning_rate": 4.768553600021086e-06,
      "loss": 0.0139,
      "step": 1607880
    },
    {
      "epoch": 2.631363615535175,
      "grad_norm": 0.1978212594985962,
      "learning_rate": 4.7684877078075686e-06,
      "loss": 0.0119,
      "step": 1607900
    },
    {
      "epoch": 2.6313963459738288,
      "grad_norm": 0.39390507340431213,
      "learning_rate": 4.768421815594051e-06,
      "loss": 0.0091,
      "step": 1607920
    },
    {
      "epoch": 2.631429076412482,
      "grad_norm": 0.11761008948087692,
      "learning_rate": 4.768355923380534e-06,
      "loss": 0.0135,
      "step": 1607940
    },
    {
      "epoch": 2.6314618068511355,
      "grad_norm": 0.24453282356262207,
      "learning_rate": 4.768290031167017e-06,
      "loss": 0.0198,
      "step": 1607960
    },
    {
      "epoch": 2.6314945372897887,
      "grad_norm": 0.33921733498573303,
      "learning_rate": 4.7682241389535e-06,
      "loss": 0.0211,
      "step": 1607980
    },
    {
      "epoch": 2.6315272677284423,
      "grad_norm": 0.2085406631231308,
      "learning_rate": 4.768158246739983e-06,
      "loss": 0.0122,
      "step": 1608000
    },
    {
      "epoch": 2.6315599981670954,
      "grad_norm": 0.0812375396490097,
      "learning_rate": 4.768092354526466e-06,
      "loss": 0.0087,
      "step": 1608020
    },
    {
      "epoch": 2.6315927286057486,
      "grad_norm": 0.16954191029071808,
      "learning_rate": 4.7680264623129494e-06,
      "loss": 0.0172,
      "step": 1608040
    },
    {
      "epoch": 2.631625459044402,
      "grad_norm": 0.3619517683982849,
      "learning_rate": 4.767960570099432e-06,
      "loss": 0.0141,
      "step": 1608060
    },
    {
      "epoch": 2.6316581894830553,
      "grad_norm": 0.3426852524280548,
      "learning_rate": 4.767894677885915e-06,
      "loss": 0.0139,
      "step": 1608080
    },
    {
      "epoch": 2.631690919921709,
      "grad_norm": 0.37179696559906006,
      "learning_rate": 4.767828785672398e-06,
      "loss": 0.0141,
      "step": 1608100
    },
    {
      "epoch": 2.631723650360362,
      "grad_norm": 0.3055981397628784,
      "learning_rate": 4.76776289345888e-06,
      "loss": 0.0174,
      "step": 1608120
    },
    {
      "epoch": 2.6317563807990156,
      "grad_norm": 0.1428598314523697,
      "learning_rate": 4.767697001245363e-06,
      "loss": 0.0141,
      "step": 1608140
    },
    {
      "epoch": 2.631789111237669,
      "grad_norm": 0.1734299659729004,
      "learning_rate": 4.767631109031846e-06,
      "loss": 0.0143,
      "step": 1608160
    },
    {
      "epoch": 2.631821841676322,
      "grad_norm": 0.20118126273155212,
      "learning_rate": 4.767565216818329e-06,
      "loss": 0.0128,
      "step": 1608180
    },
    {
      "epoch": 2.6318545721149755,
      "grad_norm": 0.22990064322948456,
      "learning_rate": 4.767499324604812e-06,
      "loss": 0.0096,
      "step": 1608200
    },
    {
      "epoch": 2.6318873025536287,
      "grad_norm": 0.3756157159805298,
      "learning_rate": 4.767433432391295e-06,
      "loss": 0.0107,
      "step": 1608220
    },
    {
      "epoch": 2.6319200329922823,
      "grad_norm": 0.7238908410072327,
      "learning_rate": 4.767367540177778e-06,
      "loss": 0.0127,
      "step": 1608240
    },
    {
      "epoch": 2.6319527634309354,
      "grad_norm": 0.24500392377376556,
      "learning_rate": 4.76730164796426e-06,
      "loss": 0.0142,
      "step": 1608260
    },
    {
      "epoch": 2.631985493869589,
      "grad_norm": 0.47481516003608704,
      "learning_rate": 4.767235755750743e-06,
      "loss": 0.0241,
      "step": 1608280
    },
    {
      "epoch": 2.632018224308242,
      "grad_norm": 0.38926470279693604,
      "learning_rate": 4.767169863537226e-06,
      "loss": 0.0125,
      "step": 1608300
    },
    {
      "epoch": 2.6320509547468953,
      "grad_norm": 0.4720437824726105,
      "learning_rate": 4.767103971323709e-06,
      "loss": 0.0139,
      "step": 1608320
    },
    {
      "epoch": 2.632083685185549,
      "grad_norm": 0.5484031438827515,
      "learning_rate": 4.767038079110191e-06,
      "loss": 0.0191,
      "step": 1608340
    },
    {
      "epoch": 2.632116415624202,
      "grad_norm": 0.43670937418937683,
      "learning_rate": 4.766972186896675e-06,
      "loss": 0.0171,
      "step": 1608360
    },
    {
      "epoch": 2.6321491460628557,
      "grad_norm": 0.5533968806266785,
      "learning_rate": 4.766906294683158e-06,
      "loss": 0.0145,
      "step": 1608380
    },
    {
      "epoch": 2.632181876501509,
      "grad_norm": 0.1869313269853592,
      "learning_rate": 4.7668404024696404e-06,
      "loss": 0.013,
      "step": 1608400
    },
    {
      "epoch": 2.6322146069401624,
      "grad_norm": 0.21658051013946533,
      "learning_rate": 4.766774510256123e-06,
      "loss": 0.0095,
      "step": 1608420
    },
    {
      "epoch": 2.6322473373788156,
      "grad_norm": 0.43589353561401367,
      "learning_rate": 4.766708618042607e-06,
      "loss": 0.0198,
      "step": 1608440
    },
    {
      "epoch": 2.6322800678174687,
      "grad_norm": 1.1900359392166138,
      "learning_rate": 4.7666427258290895e-06,
      "loss": 0.0252,
      "step": 1608460
    },
    {
      "epoch": 2.6323127982561223,
      "grad_norm": 0.7207303047180176,
      "learning_rate": 4.766576833615572e-06,
      "loss": 0.0166,
      "step": 1608480
    },
    {
      "epoch": 2.6323455286947754,
      "grad_norm": 0.6583690047264099,
      "learning_rate": 4.766510941402055e-06,
      "loss": 0.0175,
      "step": 1608500
    },
    {
      "epoch": 2.632378259133429,
      "grad_norm": 0.31825000047683716,
      "learning_rate": 4.766445049188538e-06,
      "loss": 0.0161,
      "step": 1608520
    },
    {
      "epoch": 2.632410989572082,
      "grad_norm": 0.4841214120388031,
      "learning_rate": 4.7663791569750205e-06,
      "loss": 0.0108,
      "step": 1608540
    },
    {
      "epoch": 2.632443720010736,
      "grad_norm": 0.46171218156814575,
      "learning_rate": 4.766313264761503e-06,
      "loss": 0.0181,
      "step": 1608560
    },
    {
      "epoch": 2.632476450449389,
      "grad_norm": 0.21899163722991943,
      "learning_rate": 4.766247372547986e-06,
      "loss": 0.0086,
      "step": 1608580
    },
    {
      "epoch": 2.632509180888042,
      "grad_norm": 0.10749319940805435,
      "learning_rate": 4.7661814803344695e-06,
      "loss": 0.0158,
      "step": 1608600
    },
    {
      "epoch": 2.6325419113266957,
      "grad_norm": 0.5174360275268555,
      "learning_rate": 4.766115588120952e-06,
      "loss": 0.0134,
      "step": 1608620
    },
    {
      "epoch": 2.632574641765349,
      "grad_norm": 0.25291183590888977,
      "learning_rate": 4.766049695907435e-06,
      "loss": 0.0112,
      "step": 1608640
    },
    {
      "epoch": 2.6326073722040024,
      "grad_norm": 0.12351955473423004,
      "learning_rate": 4.765983803693918e-06,
      "loss": 0.0123,
      "step": 1608660
    },
    {
      "epoch": 2.6326401026426556,
      "grad_norm": 0.3495750427246094,
      "learning_rate": 4.7659179114804005e-06,
      "loss": 0.0139,
      "step": 1608680
    },
    {
      "epoch": 2.632672833081309,
      "grad_norm": 0.2640601098537445,
      "learning_rate": 4.765852019266883e-06,
      "loss": 0.0183,
      "step": 1608700
    },
    {
      "epoch": 2.6327055635199623,
      "grad_norm": 0.27087435126304626,
      "learning_rate": 4.765786127053367e-06,
      "loss": 0.0147,
      "step": 1608720
    },
    {
      "epoch": 2.6327382939586155,
      "grad_norm": 0.6948369145393372,
      "learning_rate": 4.7657202348398496e-06,
      "loss": 0.0216,
      "step": 1608740
    },
    {
      "epoch": 2.632771024397269,
      "grad_norm": 0.2939528822898865,
      "learning_rate": 4.765654342626332e-06,
      "loss": 0.0202,
      "step": 1608760
    },
    {
      "epoch": 2.632803754835922,
      "grad_norm": 0.21208599209785461,
      "learning_rate": 4.765588450412815e-06,
      "loss": 0.0168,
      "step": 1608780
    },
    {
      "epoch": 2.632836485274576,
      "grad_norm": 0.29469239711761475,
      "learning_rate": 4.765522558199298e-06,
      "loss": 0.0159,
      "step": 1608800
    },
    {
      "epoch": 2.632869215713229,
      "grad_norm": 0.04911540448665619,
      "learning_rate": 4.7654566659857805e-06,
      "loss": 0.02,
      "step": 1608820
    },
    {
      "epoch": 2.6329019461518826,
      "grad_norm": 0.7454177141189575,
      "learning_rate": 4.765390773772264e-06,
      "loss": 0.0108,
      "step": 1608840
    },
    {
      "epoch": 2.6329346765905357,
      "grad_norm": 0.20234496891498566,
      "learning_rate": 4.765324881558747e-06,
      "loss": 0.0109,
      "step": 1608860
    },
    {
      "epoch": 2.632967407029189,
      "grad_norm": 0.5802914500236511,
      "learning_rate": 4.76525898934523e-06,
      "loss": 0.0126,
      "step": 1608880
    },
    {
      "epoch": 2.6330001374678424,
      "grad_norm": 0.2579449415206909,
      "learning_rate": 4.765193097131712e-06,
      "loss": 0.0166,
      "step": 1608900
    },
    {
      "epoch": 2.6330328679064956,
      "grad_norm": 0.2051902562379837,
      "learning_rate": 4.765127204918195e-06,
      "loss": 0.0167,
      "step": 1608920
    },
    {
      "epoch": 2.6330655983451488,
      "grad_norm": 0.16867876052856445,
      "learning_rate": 4.765061312704678e-06,
      "loss": 0.0121,
      "step": 1608940
    },
    {
      "epoch": 2.6330983287838023,
      "grad_norm": 1.5990136861801147,
      "learning_rate": 4.7649954204911605e-06,
      "loss": 0.0131,
      "step": 1608960
    },
    {
      "epoch": 2.633131059222456,
      "grad_norm": 0.9820323586463928,
      "learning_rate": 4.764929528277643e-06,
      "loss": 0.0244,
      "step": 1608980
    },
    {
      "epoch": 2.633163789661109,
      "grad_norm": 0.2677839696407318,
      "learning_rate": 4.764863636064127e-06,
      "loss": 0.0149,
      "step": 1609000
    },
    {
      "epoch": 2.6331965200997622,
      "grad_norm": 0.236494243144989,
      "learning_rate": 4.76479774385061e-06,
      "loss": 0.0095,
      "step": 1609020
    },
    {
      "epoch": 2.633229250538416,
      "grad_norm": 0.35395196080207825,
      "learning_rate": 4.764731851637092e-06,
      "loss": 0.0143,
      "step": 1609040
    },
    {
      "epoch": 2.633261980977069,
      "grad_norm": 0.35686540603637695,
      "learning_rate": 4.764665959423576e-06,
      "loss": 0.0193,
      "step": 1609060
    },
    {
      "epoch": 2.633294711415722,
      "grad_norm": 0.2702588140964508,
      "learning_rate": 4.764600067210059e-06,
      "loss": 0.0121,
      "step": 1609080
    },
    {
      "epoch": 2.6333274418543757,
      "grad_norm": 0.23390598595142365,
      "learning_rate": 4.764534174996541e-06,
      "loss": 0.0175,
      "step": 1609100
    },
    {
      "epoch": 2.6333601722930293,
      "grad_norm": 0.1815854161977768,
      "learning_rate": 4.764468282783024e-06,
      "loss": 0.0138,
      "step": 1609120
    },
    {
      "epoch": 2.6333929027316825,
      "grad_norm": 0.4892410337924957,
      "learning_rate": 4.764402390569507e-06,
      "loss": 0.0148,
      "step": 1609140
    },
    {
      "epoch": 2.6334256331703356,
      "grad_norm": 0.30387064814567566,
      "learning_rate": 4.76433649835599e-06,
      "loss": 0.0139,
      "step": 1609160
    },
    {
      "epoch": 2.633458363608989,
      "grad_norm": 0.2862454354763031,
      "learning_rate": 4.764270606142472e-06,
      "loss": 0.0121,
      "step": 1609180
    },
    {
      "epoch": 2.6334910940476424,
      "grad_norm": 0.197188600897789,
      "learning_rate": 4.764204713928955e-06,
      "loss": 0.0204,
      "step": 1609200
    },
    {
      "epoch": 2.6335238244862955,
      "grad_norm": 0.34129422903060913,
      "learning_rate": 4.764138821715439e-06,
      "loss": 0.0125,
      "step": 1609220
    },
    {
      "epoch": 2.633556554924949,
      "grad_norm": 0.37164655327796936,
      "learning_rate": 4.7640729295019214e-06,
      "loss": 0.0229,
      "step": 1609240
    },
    {
      "epoch": 2.6335892853636027,
      "grad_norm": 0.17300525307655334,
      "learning_rate": 4.764007037288404e-06,
      "loss": 0.0145,
      "step": 1609260
    },
    {
      "epoch": 2.633622015802256,
      "grad_norm": 0.24516423046588898,
      "learning_rate": 4.763941145074887e-06,
      "loss": 0.0138,
      "step": 1609280
    },
    {
      "epoch": 2.633654746240909,
      "grad_norm": 0.25479763746261597,
      "learning_rate": 4.76387525286137e-06,
      "loss": 0.0147,
      "step": 1609300
    },
    {
      "epoch": 2.6336874766795626,
      "grad_norm": 0.3129221200942993,
      "learning_rate": 4.763809360647852e-06,
      "loss": 0.0152,
      "step": 1609320
    },
    {
      "epoch": 2.6337202071182158,
      "grad_norm": 0.9574208855628967,
      "learning_rate": 4.763743468434335e-06,
      "loss": 0.0198,
      "step": 1609340
    },
    {
      "epoch": 2.633752937556869,
      "grad_norm": 0.49455904960632324,
      "learning_rate": 4.763677576220818e-06,
      "loss": 0.0156,
      "step": 1609360
    },
    {
      "epoch": 2.6337856679955225,
      "grad_norm": 0.3110501766204834,
      "learning_rate": 4.763611684007301e-06,
      "loss": 0.0094,
      "step": 1609380
    },
    {
      "epoch": 2.633818398434176,
      "grad_norm": 0.3594575822353363,
      "learning_rate": 4.763545791793784e-06,
      "loss": 0.0085,
      "step": 1609400
    },
    {
      "epoch": 2.6338511288728292,
      "grad_norm": 0.6464399695396423,
      "learning_rate": 4.763479899580267e-06,
      "loss": 0.0208,
      "step": 1609420
    },
    {
      "epoch": 2.6338838593114824,
      "grad_norm": 0.1465577632188797,
      "learning_rate": 4.76341400736675e-06,
      "loss": 0.0133,
      "step": 1609440
    },
    {
      "epoch": 2.633916589750136,
      "grad_norm": 0.24720020592212677,
      "learning_rate": 4.763348115153233e-06,
      "loss": 0.0176,
      "step": 1609460
    },
    {
      "epoch": 2.633949320188789,
      "grad_norm": 0.5355771780014038,
      "learning_rate": 4.763282222939716e-06,
      "loss": 0.0141,
      "step": 1609480
    },
    {
      "epoch": 2.6339820506274423,
      "grad_norm": 0.3962092101573944,
      "learning_rate": 4.763216330726199e-06,
      "loss": 0.0093,
      "step": 1609500
    },
    {
      "epoch": 2.634014781066096,
      "grad_norm": 0.11058782786130905,
      "learning_rate": 4.7631504385126815e-06,
      "loss": 0.014,
      "step": 1609520
    },
    {
      "epoch": 2.634047511504749,
      "grad_norm": 0.6875158548355103,
      "learning_rate": 4.763084546299164e-06,
      "loss": 0.0119,
      "step": 1609540
    },
    {
      "epoch": 2.6340802419434026,
      "grad_norm": 0.07818028330802917,
      "learning_rate": 4.763018654085647e-06,
      "loss": 0.0149,
      "step": 1609560
    },
    {
      "epoch": 2.6341129723820558,
      "grad_norm": 0.5771155953407288,
      "learning_rate": 4.76295276187213e-06,
      "loss": 0.0188,
      "step": 1609580
    },
    {
      "epoch": 2.6341457028207094,
      "grad_norm": 1.2182817459106445,
      "learning_rate": 4.7628868696586125e-06,
      "loss": 0.0129,
      "step": 1609600
    },
    {
      "epoch": 2.6341784332593625,
      "grad_norm": 0.45374563336372375,
      "learning_rate": 4.762820977445096e-06,
      "loss": 0.0133,
      "step": 1609620
    },
    {
      "epoch": 2.6342111636980157,
      "grad_norm": 0.1887972503900528,
      "learning_rate": 4.762755085231579e-06,
      "loss": 0.0118,
      "step": 1609640
    },
    {
      "epoch": 2.6342438941366693,
      "grad_norm": 0.3040999174118042,
      "learning_rate": 4.7626891930180615e-06,
      "loss": 0.0154,
      "step": 1609660
    },
    {
      "epoch": 2.6342766245753224,
      "grad_norm": 0.7956418395042419,
      "learning_rate": 4.762623300804544e-06,
      "loss": 0.0103,
      "step": 1609680
    },
    {
      "epoch": 2.634309355013976,
      "grad_norm": 0.25587257742881775,
      "learning_rate": 4.762557408591027e-06,
      "loss": 0.0106,
      "step": 1609700
    },
    {
      "epoch": 2.634342085452629,
      "grad_norm": 0.5082299709320068,
      "learning_rate": 4.76249151637751e-06,
      "loss": 0.0149,
      "step": 1609720
    },
    {
      "epoch": 2.6343748158912828,
      "grad_norm": 0.36331042647361755,
      "learning_rate": 4.7624256241639925e-06,
      "loss": 0.0128,
      "step": 1609740
    },
    {
      "epoch": 2.634407546329936,
      "grad_norm": 0.41343793272972107,
      "learning_rate": 4.762359731950476e-06,
      "loss": 0.0151,
      "step": 1609760
    },
    {
      "epoch": 2.634440276768589,
      "grad_norm": 0.45040464401245117,
      "learning_rate": 4.762293839736959e-06,
      "loss": 0.0159,
      "step": 1609780
    },
    {
      "epoch": 2.6344730072072426,
      "grad_norm": 0.2494657188653946,
      "learning_rate": 4.7622279475234415e-06,
      "loss": 0.0117,
      "step": 1609800
    },
    {
      "epoch": 2.634505737645896,
      "grad_norm": 0.25833263993263245,
      "learning_rate": 4.762162055309924e-06,
      "loss": 0.0163,
      "step": 1609820
    },
    {
      "epoch": 2.6345384680845494,
      "grad_norm": 0.36604002118110657,
      "learning_rate": 4.762096163096407e-06,
      "loss": 0.0076,
      "step": 1609840
    },
    {
      "epoch": 2.6345711985232025,
      "grad_norm": 0.22411757707595825,
      "learning_rate": 4.762030270882891e-06,
      "loss": 0.0118,
      "step": 1609860
    },
    {
      "epoch": 2.634603928961856,
      "grad_norm": 0.5812450647354126,
      "learning_rate": 4.761964378669373e-06,
      "loss": 0.0116,
      "step": 1609880
    },
    {
      "epoch": 2.6346366594005093,
      "grad_norm": 0.43902587890625,
      "learning_rate": 4.761898486455856e-06,
      "loss": 0.0136,
      "step": 1609900
    },
    {
      "epoch": 2.6346693898391624,
      "grad_norm": 0.1391977071762085,
      "learning_rate": 4.761832594242339e-06,
      "loss": 0.0112,
      "step": 1609920
    },
    {
      "epoch": 2.634702120277816,
      "grad_norm": 0.07967283576726913,
      "learning_rate": 4.7617667020288216e-06,
      "loss": 0.0094,
      "step": 1609940
    },
    {
      "epoch": 2.634734850716469,
      "grad_norm": 0.36955177783966064,
      "learning_rate": 4.761700809815304e-06,
      "loss": 0.0133,
      "step": 1609960
    },
    {
      "epoch": 2.6347675811551228,
      "grad_norm": 0.18923208117485046,
      "learning_rate": 4.761634917601787e-06,
      "loss": 0.0149,
      "step": 1609980
    },
    {
      "epoch": 2.634800311593776,
      "grad_norm": 0.4199172258377075,
      "learning_rate": 4.76156902538827e-06,
      "loss": 0.0137,
      "step": 1610000
    },
    {
      "epoch": 2.6348330420324295,
      "grad_norm": 0.27794572710990906,
      "learning_rate": 4.761503133174753e-06,
      "loss": 0.0148,
      "step": 1610020
    },
    {
      "epoch": 2.6348657724710827,
      "grad_norm": 0.21730147302150726,
      "learning_rate": 4.761437240961236e-06,
      "loss": 0.0119,
      "step": 1610040
    },
    {
      "epoch": 2.634898502909736,
      "grad_norm": 0.41297781467437744,
      "learning_rate": 4.761371348747719e-06,
      "loss": 0.0135,
      "step": 1610060
    },
    {
      "epoch": 2.6349312333483894,
      "grad_norm": 0.27104413509368896,
      "learning_rate": 4.761305456534202e-06,
      "loss": 0.0105,
      "step": 1610080
    },
    {
      "epoch": 2.6349639637870426,
      "grad_norm": 0.5152419805526733,
      "learning_rate": 4.761239564320684e-06,
      "loss": 0.0127,
      "step": 1610100
    },
    {
      "epoch": 2.634996694225696,
      "grad_norm": 0.2851443290710449,
      "learning_rate": 4.761173672107168e-06,
      "loss": 0.0116,
      "step": 1610120
    },
    {
      "epoch": 2.6350294246643493,
      "grad_norm": 0.3865605592727661,
      "learning_rate": 4.761107779893651e-06,
      "loss": 0.0149,
      "step": 1610140
    },
    {
      "epoch": 2.635062155103003,
      "grad_norm": 1.9682080745697021,
      "learning_rate": 4.761041887680133e-06,
      "loss": 0.0122,
      "step": 1610160
    },
    {
      "epoch": 2.635094885541656,
      "grad_norm": 0.5839363932609558,
      "learning_rate": 4.760975995466616e-06,
      "loss": 0.0126,
      "step": 1610180
    },
    {
      "epoch": 2.635127615980309,
      "grad_norm": 0.288308322429657,
      "learning_rate": 4.760910103253099e-06,
      "loss": 0.0121,
      "step": 1610200
    },
    {
      "epoch": 2.635160346418963,
      "grad_norm": 0.3177167475223541,
      "learning_rate": 4.760844211039582e-06,
      "loss": 0.0097,
      "step": 1610220
    },
    {
      "epoch": 2.635193076857616,
      "grad_norm": 0.05824665352702141,
      "learning_rate": 4.760778318826064e-06,
      "loss": 0.0139,
      "step": 1610240
    },
    {
      "epoch": 2.6352258072962695,
      "grad_norm": 0.37757056951522827,
      "learning_rate": 4.760712426612548e-06,
      "loss": 0.0125,
      "step": 1610260
    },
    {
      "epoch": 2.6352585377349227,
      "grad_norm": 0.3197658658027649,
      "learning_rate": 4.760646534399031e-06,
      "loss": 0.0124,
      "step": 1610280
    },
    {
      "epoch": 2.6352912681735763,
      "grad_norm": 1.0381474494934082,
      "learning_rate": 4.7605806421855134e-06,
      "loss": 0.0184,
      "step": 1610300
    },
    {
      "epoch": 2.6353239986122294,
      "grad_norm": 0.15062865614891052,
      "learning_rate": 4.760514749971996e-06,
      "loss": 0.0125,
      "step": 1610320
    },
    {
      "epoch": 2.6353567290508826,
      "grad_norm": 0.1727484166622162,
      "learning_rate": 4.760448857758479e-06,
      "loss": 0.0082,
      "step": 1610340
    },
    {
      "epoch": 2.635389459489536,
      "grad_norm": 0.6881146430969238,
      "learning_rate": 4.760382965544962e-06,
      "loss": 0.0181,
      "step": 1610360
    },
    {
      "epoch": 2.6354221899281893,
      "grad_norm": 0.3505163788795471,
      "learning_rate": 4.760317073331444e-06,
      "loss": 0.0133,
      "step": 1610380
    },
    {
      "epoch": 2.6354549203668425,
      "grad_norm": 0.7265021800994873,
      "learning_rate": 4.760251181117927e-06,
      "loss": 0.0121,
      "step": 1610400
    },
    {
      "epoch": 2.635487650805496,
      "grad_norm": 0.8849778175354004,
      "learning_rate": 4.760185288904411e-06,
      "loss": 0.02,
      "step": 1610420
    },
    {
      "epoch": 2.6355203812441497,
      "grad_norm": 0.3728983700275421,
      "learning_rate": 4.7601193966908935e-06,
      "loss": 0.0148,
      "step": 1610440
    },
    {
      "epoch": 2.635553111682803,
      "grad_norm": 0.5208212733268738,
      "learning_rate": 4.760053504477376e-06,
      "loss": 0.0167,
      "step": 1610460
    },
    {
      "epoch": 2.635585842121456,
      "grad_norm": 0.1558745801448822,
      "learning_rate": 4.75998761226386e-06,
      "loss": 0.0118,
      "step": 1610480
    },
    {
      "epoch": 2.6356185725601096,
      "grad_norm": 0.22187626361846924,
      "learning_rate": 4.7599217200503425e-06,
      "loss": 0.0163,
      "step": 1610500
    },
    {
      "epoch": 2.6356513029987627,
      "grad_norm": 0.3930300176143646,
      "learning_rate": 4.759855827836825e-06,
      "loss": 0.0131,
      "step": 1610520
    },
    {
      "epoch": 2.635684033437416,
      "grad_norm": 0.6161254048347473,
      "learning_rate": 4.759789935623308e-06,
      "loss": 0.0152,
      "step": 1610540
    },
    {
      "epoch": 2.6357167638760695,
      "grad_norm": 0.4207897186279297,
      "learning_rate": 4.759724043409791e-06,
      "loss": 0.0215,
      "step": 1610560
    },
    {
      "epoch": 2.635749494314723,
      "grad_norm": 0.19570104777812958,
      "learning_rate": 4.7596581511962735e-06,
      "loss": 0.0159,
      "step": 1610580
    },
    {
      "epoch": 2.635782224753376,
      "grad_norm": 0.44831690192222595,
      "learning_rate": 4.759592258982756e-06,
      "loss": 0.0153,
      "step": 1610600
    },
    {
      "epoch": 2.6358149551920294,
      "grad_norm": 0.23284047842025757,
      "learning_rate": 4.759526366769239e-06,
      "loss": 0.0097,
      "step": 1610620
    },
    {
      "epoch": 2.635847685630683,
      "grad_norm": 0.180002823472023,
      "learning_rate": 4.7594604745557225e-06,
      "loss": 0.0136,
      "step": 1610640
    },
    {
      "epoch": 2.635880416069336,
      "grad_norm": 0.9820750951766968,
      "learning_rate": 4.759394582342205e-06,
      "loss": 0.0161,
      "step": 1610660
    },
    {
      "epoch": 2.6359131465079892,
      "grad_norm": 0.4148298501968384,
      "learning_rate": 4.759328690128688e-06,
      "loss": 0.0147,
      "step": 1610680
    },
    {
      "epoch": 2.635945876946643,
      "grad_norm": 0.3329559564590454,
      "learning_rate": 4.759262797915171e-06,
      "loss": 0.0134,
      "step": 1610700
    },
    {
      "epoch": 2.6359786073852964,
      "grad_norm": 0.32280105352401733,
      "learning_rate": 4.7591969057016535e-06,
      "loss": 0.0106,
      "step": 1610720
    },
    {
      "epoch": 2.6360113378239496,
      "grad_norm": 0.11183947324752808,
      "learning_rate": 4.759131013488136e-06,
      "loss": 0.0185,
      "step": 1610740
    },
    {
      "epoch": 2.6360440682626027,
      "grad_norm": 0.0784602090716362,
      "learning_rate": 4.759065121274619e-06,
      "loss": 0.014,
      "step": 1610760
    },
    {
      "epoch": 2.6360767987012563,
      "grad_norm": 0.14286118745803833,
      "learning_rate": 4.758999229061102e-06,
      "loss": 0.0097,
      "step": 1610780
    },
    {
      "epoch": 2.6361095291399095,
      "grad_norm": 0.22930052876472473,
      "learning_rate": 4.7589333368475845e-06,
      "loss": 0.0133,
      "step": 1610800
    },
    {
      "epoch": 2.6361422595785626,
      "grad_norm": 0.885804295539856,
      "learning_rate": 4.758867444634068e-06,
      "loss": 0.0149,
      "step": 1610820
    },
    {
      "epoch": 2.6361749900172162,
      "grad_norm": 0.24549265205860138,
      "learning_rate": 4.758801552420551e-06,
      "loss": 0.0148,
      "step": 1610840
    },
    {
      "epoch": 2.63620772045587,
      "grad_norm": 0.1984211653470993,
      "learning_rate": 4.7587356602070335e-06,
      "loss": 0.0201,
      "step": 1610860
    },
    {
      "epoch": 2.636240450894523,
      "grad_norm": 0.28826531767845154,
      "learning_rate": 4.758669767993517e-06,
      "loss": 0.0135,
      "step": 1610880
    },
    {
      "epoch": 2.636273181333176,
      "grad_norm": 1.332047939300537,
      "learning_rate": 4.75860387578e-06,
      "loss": 0.0145,
      "step": 1610900
    },
    {
      "epoch": 2.6363059117718297,
      "grad_norm": 0.5278061032295227,
      "learning_rate": 4.758537983566483e-06,
      "loss": 0.0157,
      "step": 1610920
    },
    {
      "epoch": 2.636338642210483,
      "grad_norm": 0.6970029473304749,
      "learning_rate": 4.758472091352965e-06,
      "loss": 0.0103,
      "step": 1610940
    },
    {
      "epoch": 2.636371372649136,
      "grad_norm": 0.7359527349472046,
      "learning_rate": 4.758406199139448e-06,
      "loss": 0.015,
      "step": 1610960
    },
    {
      "epoch": 2.6364041030877896,
      "grad_norm": 0.16671426594257355,
      "learning_rate": 4.758340306925931e-06,
      "loss": 0.0115,
      "step": 1610980
    },
    {
      "epoch": 2.6364368335264428,
      "grad_norm": 0.2096695899963379,
      "learning_rate": 4.7582744147124136e-06,
      "loss": 0.0172,
      "step": 1611000
    },
    {
      "epoch": 2.6364695639650964,
      "grad_norm": 0.767422616481781,
      "learning_rate": 4.758208522498896e-06,
      "loss": 0.0116,
      "step": 1611020
    },
    {
      "epoch": 2.6365022944037495,
      "grad_norm": 0.19999520480632782,
      "learning_rate": 4.75814263028538e-06,
      "loss": 0.0131,
      "step": 1611040
    },
    {
      "epoch": 2.636535024842403,
      "grad_norm": 0.6569109559059143,
      "learning_rate": 4.758076738071863e-06,
      "loss": 0.0182,
      "step": 1611060
    },
    {
      "epoch": 2.6365677552810562,
      "grad_norm": 0.8229334950447083,
      "learning_rate": 4.758010845858345e-06,
      "loss": 0.0131,
      "step": 1611080
    },
    {
      "epoch": 2.6366004857197094,
      "grad_norm": 0.5145174264907837,
      "learning_rate": 4.757944953644828e-06,
      "loss": 0.0207,
      "step": 1611100
    },
    {
      "epoch": 2.636633216158363,
      "grad_norm": 0.3576265871524811,
      "learning_rate": 4.757879061431311e-06,
      "loss": 0.0169,
      "step": 1611120
    },
    {
      "epoch": 2.636665946597016,
      "grad_norm": 1.2535306215286255,
      "learning_rate": 4.757813169217794e-06,
      "loss": 0.0107,
      "step": 1611140
    },
    {
      "epoch": 2.6366986770356697,
      "grad_norm": 0.5970300436019897,
      "learning_rate": 4.757747277004276e-06,
      "loss": 0.0108,
      "step": 1611160
    },
    {
      "epoch": 2.636731407474323,
      "grad_norm": 0.37498483061790466,
      "learning_rate": 4.75768138479076e-06,
      "loss": 0.015,
      "step": 1611180
    },
    {
      "epoch": 2.6367641379129765,
      "grad_norm": 0.13143280148506165,
      "learning_rate": 4.757615492577243e-06,
      "loss": 0.0138,
      "step": 1611200
    },
    {
      "epoch": 2.6367968683516296,
      "grad_norm": 0.6433039307594299,
      "learning_rate": 4.757549600363725e-06,
      "loss": 0.01,
      "step": 1611220
    },
    {
      "epoch": 2.636829598790283,
      "grad_norm": 0.6251307129859924,
      "learning_rate": 4.757483708150208e-06,
      "loss": 0.0128,
      "step": 1611240
    },
    {
      "epoch": 2.6368623292289364,
      "grad_norm": 0.28932985663414,
      "learning_rate": 4.757417815936691e-06,
      "loss": 0.018,
      "step": 1611260
    },
    {
      "epoch": 2.6368950596675895,
      "grad_norm": 0.6322333812713623,
      "learning_rate": 4.7573519237231745e-06,
      "loss": 0.0118,
      "step": 1611280
    },
    {
      "epoch": 2.636927790106243,
      "grad_norm": 0.3050512373447418,
      "learning_rate": 4.757286031509657e-06,
      "loss": 0.0123,
      "step": 1611300
    },
    {
      "epoch": 2.6369605205448963,
      "grad_norm": 0.4403466284275055,
      "learning_rate": 4.75722013929614e-06,
      "loss": 0.0129,
      "step": 1611320
    },
    {
      "epoch": 2.63699325098355,
      "grad_norm": 0.3860996663570404,
      "learning_rate": 4.757154247082623e-06,
      "loss": 0.0121,
      "step": 1611340
    },
    {
      "epoch": 2.637025981422203,
      "grad_norm": 0.08196878433227539,
      "learning_rate": 4.757088354869105e-06,
      "loss": 0.0105,
      "step": 1611360
    },
    {
      "epoch": 2.637058711860856,
      "grad_norm": 0.23040583729743958,
      "learning_rate": 4.757022462655588e-06,
      "loss": 0.0103,
      "step": 1611380
    },
    {
      "epoch": 2.6370914422995098,
      "grad_norm": 0.3364216089248657,
      "learning_rate": 4.756956570442071e-06,
      "loss": 0.0155,
      "step": 1611400
    },
    {
      "epoch": 2.637124172738163,
      "grad_norm": 0.5447317957878113,
      "learning_rate": 4.756890678228554e-06,
      "loss": 0.0169,
      "step": 1611420
    },
    {
      "epoch": 2.6371569031768165,
      "grad_norm": 0.23186272382736206,
      "learning_rate": 4.756824786015037e-06,
      "loss": 0.0115,
      "step": 1611440
    },
    {
      "epoch": 2.6371896336154697,
      "grad_norm": 0.8004288673400879,
      "learning_rate": 4.75675889380152e-06,
      "loss": 0.0172,
      "step": 1611460
    },
    {
      "epoch": 2.6372223640541232,
      "grad_norm": 0.04414452239871025,
      "learning_rate": 4.756693001588003e-06,
      "loss": 0.0178,
      "step": 1611480
    },
    {
      "epoch": 2.6372550944927764,
      "grad_norm": 0.9896714687347412,
      "learning_rate": 4.7566271093744854e-06,
      "loss": 0.0163,
      "step": 1611500
    },
    {
      "epoch": 2.6372878249314295,
      "grad_norm": 0.2756444215774536,
      "learning_rate": 4.756561217160969e-06,
      "loss": 0.0142,
      "step": 1611520
    },
    {
      "epoch": 2.637320555370083,
      "grad_norm": 0.3301979601383209,
      "learning_rate": 4.756495324947452e-06,
      "loss": 0.0148,
      "step": 1611540
    },
    {
      "epoch": 2.6373532858087363,
      "grad_norm": 1.176918625831604,
      "learning_rate": 4.7564294327339345e-06,
      "loss": 0.0251,
      "step": 1611560
    },
    {
      "epoch": 2.63738601624739,
      "grad_norm": 0.15028809010982513,
      "learning_rate": 4.756363540520417e-06,
      "loss": 0.0108,
      "step": 1611580
    },
    {
      "epoch": 2.637418746686043,
      "grad_norm": 0.13643448054790497,
      "learning_rate": 4.7562976483069e-06,
      "loss": 0.0145,
      "step": 1611600
    },
    {
      "epoch": 2.6374514771246966,
      "grad_norm": 0.1721346527338028,
      "learning_rate": 4.756231756093383e-06,
      "loss": 0.0146,
      "step": 1611620
    },
    {
      "epoch": 2.63748420756335,
      "grad_norm": 0.24850015342235565,
      "learning_rate": 4.7561658638798655e-06,
      "loss": 0.011,
      "step": 1611640
    },
    {
      "epoch": 2.637516938002003,
      "grad_norm": 0.3308393359184265,
      "learning_rate": 4.756099971666349e-06,
      "loss": 0.0095,
      "step": 1611660
    },
    {
      "epoch": 2.6375496684406565,
      "grad_norm": 0.846152126789093,
      "learning_rate": 4.756034079452832e-06,
      "loss": 0.0167,
      "step": 1611680
    },
    {
      "epoch": 2.6375823988793097,
      "grad_norm": 0.5694327354431152,
      "learning_rate": 4.7559681872393145e-06,
      "loss": 0.0164,
      "step": 1611700
    },
    {
      "epoch": 2.6376151293179633,
      "grad_norm": 0.5361470580101013,
      "learning_rate": 4.755902295025797e-06,
      "loss": 0.0174,
      "step": 1611720
    },
    {
      "epoch": 2.6376478597566164,
      "grad_norm": 0.1466665118932724,
      "learning_rate": 4.75583640281228e-06,
      "loss": 0.0136,
      "step": 1611740
    },
    {
      "epoch": 2.63768059019527,
      "grad_norm": 0.24081261456012726,
      "learning_rate": 4.755770510598763e-06,
      "loss": 0.0147,
      "step": 1611760
    },
    {
      "epoch": 2.637713320633923,
      "grad_norm": 0.3536885380744934,
      "learning_rate": 4.7557046183852455e-06,
      "loss": 0.0138,
      "step": 1611780
    },
    {
      "epoch": 2.6377460510725763,
      "grad_norm": 0.24437494575977325,
      "learning_rate": 4.755638726171728e-06,
      "loss": 0.0117,
      "step": 1611800
    },
    {
      "epoch": 2.63777878151123,
      "grad_norm": 0.3677014708518982,
      "learning_rate": 4.755572833958211e-06,
      "loss": 0.0108,
      "step": 1611820
    },
    {
      "epoch": 2.637811511949883,
      "grad_norm": 0.8951573371887207,
      "learning_rate": 4.7555069417446946e-06,
      "loss": 0.0216,
      "step": 1611840
    },
    {
      "epoch": 2.6378442423885367,
      "grad_norm": 0.18579930067062378,
      "learning_rate": 4.755441049531177e-06,
      "loss": 0.0096,
      "step": 1611860
    },
    {
      "epoch": 2.63787697282719,
      "grad_norm": 0.9606502056121826,
      "learning_rate": 4.75537515731766e-06,
      "loss": 0.0172,
      "step": 1611880
    },
    {
      "epoch": 2.6379097032658434,
      "grad_norm": 0.25769856572151184,
      "learning_rate": 4.755309265104144e-06,
      "loss": 0.0091,
      "step": 1611900
    },
    {
      "epoch": 2.6379424337044965,
      "grad_norm": 0.13877031207084656,
      "learning_rate": 4.755243372890626e-06,
      "loss": 0.0172,
      "step": 1611920
    },
    {
      "epoch": 2.6379751641431497,
      "grad_norm": 0.5940564870834351,
      "learning_rate": 4.755177480677109e-06,
      "loss": 0.0146,
      "step": 1611940
    },
    {
      "epoch": 2.6380078945818033,
      "grad_norm": 0.5826781988143921,
      "learning_rate": 4.755111588463592e-06,
      "loss": 0.0097,
      "step": 1611960
    },
    {
      "epoch": 2.6380406250204564,
      "grad_norm": 0.5959327220916748,
      "learning_rate": 4.7550456962500746e-06,
      "loss": 0.0161,
      "step": 1611980
    },
    {
      "epoch": 2.6380733554591096,
      "grad_norm": 0.18471364676952362,
      "learning_rate": 4.754979804036557e-06,
      "loss": 0.0107,
      "step": 1612000
    },
    {
      "epoch": 2.638106085897763,
      "grad_norm": 0.21364349126815796,
      "learning_rate": 4.75491391182304e-06,
      "loss": 0.0131,
      "step": 1612020
    },
    {
      "epoch": 2.638138816336417,
      "grad_norm": 1.0814001560211182,
      "learning_rate": 4.754848019609523e-06,
      "loss": 0.0172,
      "step": 1612040
    },
    {
      "epoch": 2.63817154677507,
      "grad_norm": 0.11566309630870819,
      "learning_rate": 4.754782127396006e-06,
      "loss": 0.0089,
      "step": 1612060
    },
    {
      "epoch": 2.638204277213723,
      "grad_norm": 0.6515703201293945,
      "learning_rate": 4.754716235182489e-06,
      "loss": 0.0188,
      "step": 1612080
    },
    {
      "epoch": 2.6382370076523767,
      "grad_norm": 0.19788436591625214,
      "learning_rate": 4.754650342968972e-06,
      "loss": 0.021,
      "step": 1612100
    },
    {
      "epoch": 2.63826973809103,
      "grad_norm": 0.4078076481819153,
      "learning_rate": 4.754584450755455e-06,
      "loss": 0.0115,
      "step": 1612120
    },
    {
      "epoch": 2.638302468529683,
      "grad_norm": 0.1097683310508728,
      "learning_rate": 4.754518558541937e-06,
      "loss": 0.0097,
      "step": 1612140
    },
    {
      "epoch": 2.6383351989683366,
      "grad_norm": 0.07272164523601532,
      "learning_rate": 4.75445266632842e-06,
      "loss": 0.0204,
      "step": 1612160
    },
    {
      "epoch": 2.63836792940699,
      "grad_norm": 0.15272939205169678,
      "learning_rate": 4.754386774114903e-06,
      "loss": 0.0134,
      "step": 1612180
    },
    {
      "epoch": 2.6384006598456433,
      "grad_norm": 0.39751970767974854,
      "learning_rate": 4.7543208819013856e-06,
      "loss": 0.0183,
      "step": 1612200
    },
    {
      "epoch": 2.6384333902842965,
      "grad_norm": 0.19556879997253418,
      "learning_rate": 4.754254989687869e-06,
      "loss": 0.0082,
      "step": 1612220
    },
    {
      "epoch": 2.63846612072295,
      "grad_norm": 1.0944043397903442,
      "learning_rate": 4.754189097474352e-06,
      "loss": 0.0099,
      "step": 1612240
    },
    {
      "epoch": 2.638498851161603,
      "grad_norm": 0.350344717502594,
      "learning_rate": 4.754123205260835e-06,
      "loss": 0.0211,
      "step": 1612260
    },
    {
      "epoch": 2.6385315816002564,
      "grad_norm": 0.7648894190788269,
      "learning_rate": 4.754057313047317e-06,
      "loss": 0.0154,
      "step": 1612280
    },
    {
      "epoch": 2.63856431203891,
      "grad_norm": 1.0636478662490845,
      "learning_rate": 4.753991420833801e-06,
      "loss": 0.018,
      "step": 1612300
    },
    {
      "epoch": 2.6385970424775635,
      "grad_norm": 0.12666203081607819,
      "learning_rate": 4.753925528620284e-06,
      "loss": 0.0116,
      "step": 1612320
    },
    {
      "epoch": 2.6386297729162167,
      "grad_norm": 0.39445653557777405,
      "learning_rate": 4.7538596364067664e-06,
      "loss": 0.0107,
      "step": 1612340
    },
    {
      "epoch": 2.63866250335487,
      "grad_norm": 0.5888729095458984,
      "learning_rate": 4.753793744193249e-06,
      "loss": 0.0134,
      "step": 1612360
    },
    {
      "epoch": 2.6386952337935234,
      "grad_norm": 1.7005385160446167,
      "learning_rate": 4.753727851979732e-06,
      "loss": 0.0197,
      "step": 1612380
    },
    {
      "epoch": 2.6387279642321766,
      "grad_norm": 0.2690295875072479,
      "learning_rate": 4.753661959766215e-06,
      "loss": 0.0148,
      "step": 1612400
    },
    {
      "epoch": 2.6387606946708297,
      "grad_norm": 0.38815343379974365,
      "learning_rate": 4.753596067552697e-06,
      "loss": 0.0241,
      "step": 1612420
    },
    {
      "epoch": 2.6387934251094833,
      "grad_norm": 0.38963010907173157,
      "learning_rate": 4.75353017533918e-06,
      "loss": 0.0206,
      "step": 1612440
    },
    {
      "epoch": 2.638826155548137,
      "grad_norm": 0.6141453981399536,
      "learning_rate": 4.753464283125664e-06,
      "loss": 0.009,
      "step": 1612460
    },
    {
      "epoch": 2.63885888598679,
      "grad_norm": 0.41472128033638,
      "learning_rate": 4.7533983909121465e-06,
      "loss": 0.0176,
      "step": 1612480
    },
    {
      "epoch": 2.6388916164254432,
      "grad_norm": 1.0307029485702515,
      "learning_rate": 4.753332498698629e-06,
      "loss": 0.0143,
      "step": 1612500
    },
    {
      "epoch": 2.638924346864097,
      "grad_norm": 0.22778992354869843,
      "learning_rate": 4.753266606485112e-06,
      "loss": 0.0122,
      "step": 1612520
    },
    {
      "epoch": 2.63895707730275,
      "grad_norm": 0.2838183045387268,
      "learning_rate": 4.753200714271595e-06,
      "loss": 0.0177,
      "step": 1612540
    },
    {
      "epoch": 2.638989807741403,
      "grad_norm": 0.2255823314189911,
      "learning_rate": 4.753134822058077e-06,
      "loss": 0.0121,
      "step": 1612560
    },
    {
      "epoch": 2.6390225381800567,
      "grad_norm": 0.30543825030326843,
      "learning_rate": 4.753068929844561e-06,
      "loss": 0.0175,
      "step": 1612580
    },
    {
      "epoch": 2.63905526861871,
      "grad_norm": 0.34093403816223145,
      "learning_rate": 4.753003037631044e-06,
      "loss": 0.012,
      "step": 1612600
    },
    {
      "epoch": 2.6390879990573635,
      "grad_norm": 0.4570537507534027,
      "learning_rate": 4.7529371454175265e-06,
      "loss": 0.0155,
      "step": 1612620
    },
    {
      "epoch": 2.6391207294960166,
      "grad_norm": 0.1589096188545227,
      "learning_rate": 4.752871253204009e-06,
      "loss": 0.0087,
      "step": 1612640
    },
    {
      "epoch": 2.63915345993467,
      "grad_norm": 0.10354241728782654,
      "learning_rate": 4.752805360990492e-06,
      "loss": 0.012,
      "step": 1612660
    },
    {
      "epoch": 2.6391861903733234,
      "grad_norm": 0.5374879837036133,
      "learning_rate": 4.752739468776975e-06,
      "loss": 0.0156,
      "step": 1612680
    },
    {
      "epoch": 2.6392189208119765,
      "grad_norm": 0.2501693069934845,
      "learning_rate": 4.752673576563458e-06,
      "loss": 0.0127,
      "step": 1612700
    },
    {
      "epoch": 2.63925165125063,
      "grad_norm": 0.274429589509964,
      "learning_rate": 4.752607684349941e-06,
      "loss": 0.0123,
      "step": 1612720
    },
    {
      "epoch": 2.6392843816892833,
      "grad_norm": 0.4928070306777954,
      "learning_rate": 4.752541792136424e-06,
      "loss": 0.0096,
      "step": 1612740
    },
    {
      "epoch": 2.639317112127937,
      "grad_norm": 0.2844490110874176,
      "learning_rate": 4.7524758999229065e-06,
      "loss": 0.0143,
      "step": 1612760
    },
    {
      "epoch": 2.63934984256659,
      "grad_norm": 0.16326183080673218,
      "learning_rate": 4.752410007709389e-06,
      "loss": 0.0117,
      "step": 1612780
    },
    {
      "epoch": 2.6393825730052436,
      "grad_norm": 0.19733496010303497,
      "learning_rate": 4.752344115495872e-06,
      "loss": 0.0167,
      "step": 1612800
    },
    {
      "epoch": 2.6394153034438967,
      "grad_norm": 0.6004432439804077,
      "learning_rate": 4.752278223282355e-06,
      "loss": 0.0153,
      "step": 1612820
    },
    {
      "epoch": 2.63944803388255,
      "grad_norm": 0.26343026757240295,
      "learning_rate": 4.7522123310688375e-06,
      "loss": 0.0175,
      "step": 1612840
    },
    {
      "epoch": 2.6394807643212035,
      "grad_norm": 0.11542550474405289,
      "learning_rate": 4.752146438855321e-06,
      "loss": 0.0147,
      "step": 1612860
    },
    {
      "epoch": 2.6395134947598566,
      "grad_norm": 0.17070630192756653,
      "learning_rate": 4.752080546641804e-06,
      "loss": 0.0239,
      "step": 1612880
    },
    {
      "epoch": 2.6395462251985102,
      "grad_norm": 0.46292683482170105,
      "learning_rate": 4.7520146544282865e-06,
      "loss": 0.0103,
      "step": 1612900
    },
    {
      "epoch": 2.6395789556371634,
      "grad_norm": 1.9276604652404785,
      "learning_rate": 4.751948762214769e-06,
      "loss": 0.0214,
      "step": 1612920
    },
    {
      "epoch": 2.639611686075817,
      "grad_norm": 0.2488095909357071,
      "learning_rate": 4.751882870001253e-06,
      "loss": 0.0207,
      "step": 1612940
    },
    {
      "epoch": 2.63964441651447,
      "grad_norm": 0.12450806051492691,
      "learning_rate": 4.751816977787736e-06,
      "loss": 0.0103,
      "step": 1612960
    },
    {
      "epoch": 2.6396771469531233,
      "grad_norm": 0.16231797635555267,
      "learning_rate": 4.751751085574218e-06,
      "loss": 0.017,
      "step": 1612980
    },
    {
      "epoch": 2.639709877391777,
      "grad_norm": 0.29160216450691223,
      "learning_rate": 4.751685193360701e-06,
      "loss": 0.0122,
      "step": 1613000
    },
    {
      "epoch": 2.63974260783043,
      "grad_norm": 0.18024121224880219,
      "learning_rate": 4.751619301147184e-06,
      "loss": 0.0139,
      "step": 1613020
    },
    {
      "epoch": 2.6397753382690836,
      "grad_norm": 0.16295188665390015,
      "learning_rate": 4.7515534089336666e-06,
      "loss": 0.0081,
      "step": 1613040
    },
    {
      "epoch": 2.6398080687077368,
      "grad_norm": 1.4462392330169678,
      "learning_rate": 4.751487516720149e-06,
      "loss": 0.0108,
      "step": 1613060
    },
    {
      "epoch": 2.6398407991463904,
      "grad_norm": 0.3091140389442444,
      "learning_rate": 4.751421624506633e-06,
      "loss": 0.0103,
      "step": 1613080
    },
    {
      "epoch": 2.6398735295850435,
      "grad_norm": 0.2390154004096985,
      "learning_rate": 4.751355732293116e-06,
      "loss": 0.012,
      "step": 1613100
    },
    {
      "epoch": 2.6399062600236967,
      "grad_norm": 0.24504977464675903,
      "learning_rate": 4.751289840079598e-06,
      "loss": 0.0126,
      "step": 1613120
    },
    {
      "epoch": 2.6399389904623503,
      "grad_norm": 0.5318848490715027,
      "learning_rate": 4.751223947866081e-06,
      "loss": 0.0129,
      "step": 1613140
    },
    {
      "epoch": 2.6399717209010034,
      "grad_norm": 0.4929550588130951,
      "learning_rate": 4.751158055652564e-06,
      "loss": 0.0166,
      "step": 1613160
    },
    {
      "epoch": 2.640004451339657,
      "grad_norm": 0.11129248142242432,
      "learning_rate": 4.751092163439047e-06,
      "loss": 0.0142,
      "step": 1613180
    },
    {
      "epoch": 2.64003718177831,
      "grad_norm": 0.2275983691215515,
      "learning_rate": 4.751026271225529e-06,
      "loss": 0.0077,
      "step": 1613200
    },
    {
      "epoch": 2.6400699122169637,
      "grad_norm": 0.12545165419578552,
      "learning_rate": 4.750960379012012e-06,
      "loss": 0.0211,
      "step": 1613220
    },
    {
      "epoch": 2.640102642655617,
      "grad_norm": 0.45256271958351135,
      "learning_rate": 4.750894486798495e-06,
      "loss": 0.0197,
      "step": 1613240
    },
    {
      "epoch": 2.64013537309427,
      "grad_norm": 0.9820348024368286,
      "learning_rate": 4.750828594584978e-06,
      "loss": 0.0085,
      "step": 1613260
    },
    {
      "epoch": 2.6401681035329236,
      "grad_norm": 0.559428334236145,
      "learning_rate": 4.750762702371461e-06,
      "loss": 0.0272,
      "step": 1613280
    },
    {
      "epoch": 2.640200833971577,
      "grad_norm": 0.175399050116539,
      "learning_rate": 4.750696810157944e-06,
      "loss": 0.0249,
      "step": 1613300
    },
    {
      "epoch": 2.6402335644102304,
      "grad_norm": 0.24177046120166779,
      "learning_rate": 4.7506309179444275e-06,
      "loss": 0.0166,
      "step": 1613320
    },
    {
      "epoch": 2.6402662948488835,
      "grad_norm": 0.32674872875213623,
      "learning_rate": 4.75056502573091e-06,
      "loss": 0.0106,
      "step": 1613340
    },
    {
      "epoch": 2.640299025287537,
      "grad_norm": 0.40987205505371094,
      "learning_rate": 4.750499133517393e-06,
      "loss": 0.0098,
      "step": 1613360
    },
    {
      "epoch": 2.6403317557261903,
      "grad_norm": 0.1532837450504303,
      "learning_rate": 4.750433241303876e-06,
      "loss": 0.0133,
      "step": 1613380
    },
    {
      "epoch": 2.6403644861648434,
      "grad_norm": 0.11818855255842209,
      "learning_rate": 4.750367349090358e-06,
      "loss": 0.0124,
      "step": 1613400
    },
    {
      "epoch": 2.640397216603497,
      "grad_norm": 0.37338802218437195,
      "learning_rate": 4.750301456876841e-06,
      "loss": 0.0123,
      "step": 1613420
    },
    {
      "epoch": 2.64042994704215,
      "grad_norm": 0.10516436398029327,
      "learning_rate": 4.750235564663324e-06,
      "loss": 0.0105,
      "step": 1613440
    },
    {
      "epoch": 2.6404626774808033,
      "grad_norm": 0.3163159489631653,
      "learning_rate": 4.750169672449807e-06,
      "loss": 0.0175,
      "step": 1613460
    },
    {
      "epoch": 2.640495407919457,
      "grad_norm": 0.1362529695034027,
      "learning_rate": 4.75010378023629e-06,
      "loss": 0.0127,
      "step": 1613480
    },
    {
      "epoch": 2.6405281383581105,
      "grad_norm": 0.20462356507778168,
      "learning_rate": 4.750037888022773e-06,
      "loss": 0.0138,
      "step": 1613500
    },
    {
      "epoch": 2.6405608687967637,
      "grad_norm": 0.046317700296640396,
      "learning_rate": 4.749971995809256e-06,
      "loss": 0.0186,
      "step": 1613520
    },
    {
      "epoch": 2.640593599235417,
      "grad_norm": 0.538832426071167,
      "learning_rate": 4.7499061035957384e-06,
      "loss": 0.0188,
      "step": 1613540
    },
    {
      "epoch": 2.6406263296740704,
      "grad_norm": 0.10863538086414337,
      "learning_rate": 4.749840211382221e-06,
      "loss": 0.0063,
      "step": 1613560
    },
    {
      "epoch": 2.6406590601127236,
      "grad_norm": 0.6754353046417236,
      "learning_rate": 4.749774319168704e-06,
      "loss": 0.0141,
      "step": 1613580
    },
    {
      "epoch": 2.6406917905513767,
      "grad_norm": 0.1231926679611206,
      "learning_rate": 4.749708426955187e-06,
      "loss": 0.0121,
      "step": 1613600
    },
    {
      "epoch": 2.6407245209900303,
      "grad_norm": 0.21792319416999817,
      "learning_rate": 4.749642534741669e-06,
      "loss": 0.0127,
      "step": 1613620
    },
    {
      "epoch": 2.640757251428684,
      "grad_norm": 0.08207215368747711,
      "learning_rate": 4.749576642528153e-06,
      "loss": 0.0085,
      "step": 1613640
    },
    {
      "epoch": 2.640789981867337,
      "grad_norm": 0.357013463973999,
      "learning_rate": 4.749510750314636e-06,
      "loss": 0.0199,
      "step": 1613660
    },
    {
      "epoch": 2.64082271230599,
      "grad_norm": 0.18222379684448242,
      "learning_rate": 4.7494448581011185e-06,
      "loss": 0.0113,
      "step": 1613680
    },
    {
      "epoch": 2.640855442744644,
      "grad_norm": 0.2826369106769562,
      "learning_rate": 4.749378965887601e-06,
      "loss": 0.0129,
      "step": 1613700
    },
    {
      "epoch": 2.640888173183297,
      "grad_norm": 0.7512059211730957,
      "learning_rate": 4.749313073674085e-06,
      "loss": 0.0087,
      "step": 1613720
    },
    {
      "epoch": 2.64092090362195,
      "grad_norm": 0.6434996128082275,
      "learning_rate": 4.7492471814605675e-06,
      "loss": 0.0114,
      "step": 1613740
    },
    {
      "epoch": 2.6409536340606037,
      "grad_norm": 0.2068549394607544,
      "learning_rate": 4.74918128924705e-06,
      "loss": 0.0126,
      "step": 1613760
    },
    {
      "epoch": 2.6409863644992573,
      "grad_norm": 0.25637558102607727,
      "learning_rate": 4.749115397033533e-06,
      "loss": 0.0156,
      "step": 1613780
    },
    {
      "epoch": 2.6410190949379104,
      "grad_norm": 0.4402920603752136,
      "learning_rate": 4.749049504820016e-06,
      "loss": 0.0165,
      "step": 1613800
    },
    {
      "epoch": 2.6410518253765636,
      "grad_norm": 0.2211313545703888,
      "learning_rate": 4.7489836126064985e-06,
      "loss": 0.0163,
      "step": 1613820
    },
    {
      "epoch": 2.641084555815217,
      "grad_norm": 0.3524969816207886,
      "learning_rate": 4.748917720392981e-06,
      "loss": 0.0091,
      "step": 1613840
    },
    {
      "epoch": 2.6411172862538703,
      "grad_norm": 0.1614035665988922,
      "learning_rate": 4.748851828179464e-06,
      "loss": 0.0109,
      "step": 1613860
    },
    {
      "epoch": 2.6411500166925235,
      "grad_norm": 0.459097683429718,
      "learning_rate": 4.7487859359659476e-06,
      "loss": 0.0188,
      "step": 1613880
    },
    {
      "epoch": 2.641182747131177,
      "grad_norm": 0.17429086565971375,
      "learning_rate": 4.74872004375243e-06,
      "loss": 0.0117,
      "step": 1613900
    },
    {
      "epoch": 2.6412154775698307,
      "grad_norm": 0.264312744140625,
      "learning_rate": 4.748654151538913e-06,
      "loss": 0.0095,
      "step": 1613920
    },
    {
      "epoch": 2.641248208008484,
      "grad_norm": 0.6026067733764648,
      "learning_rate": 4.748588259325396e-06,
      "loss": 0.0142,
      "step": 1613940
    },
    {
      "epoch": 2.641280938447137,
      "grad_norm": 0.2729441523551941,
      "learning_rate": 4.7485223671118785e-06,
      "loss": 0.0144,
      "step": 1613960
    },
    {
      "epoch": 2.6413136688857906,
      "grad_norm": 0.3194233775138855,
      "learning_rate": 4.748456474898362e-06,
      "loss": 0.0123,
      "step": 1613980
    },
    {
      "epoch": 2.6413463993244437,
      "grad_norm": 0.5927219986915588,
      "learning_rate": 4.748390582684845e-06,
      "loss": 0.0136,
      "step": 1614000
    },
    {
      "epoch": 2.641379129763097,
      "grad_norm": 0.12780345976352692,
      "learning_rate": 4.748324690471328e-06,
      "loss": 0.0144,
      "step": 1614020
    },
    {
      "epoch": 2.6414118602017505,
      "grad_norm": 0.21490263938903809,
      "learning_rate": 4.74825879825781e-06,
      "loss": 0.0175,
      "step": 1614040
    },
    {
      "epoch": 2.6414445906404036,
      "grad_norm": 0.08972929418087006,
      "learning_rate": 4.748192906044293e-06,
      "loss": 0.011,
      "step": 1614060
    },
    {
      "epoch": 2.641477321079057,
      "grad_norm": 0.7261695265769958,
      "learning_rate": 4.748127013830776e-06,
      "loss": 0.0089,
      "step": 1614080
    },
    {
      "epoch": 2.6415100515177103,
      "grad_norm": 0.4442974328994751,
      "learning_rate": 4.7480611216172585e-06,
      "loss": 0.0135,
      "step": 1614100
    },
    {
      "epoch": 2.641542781956364,
      "grad_norm": 0.49457672238349915,
      "learning_rate": 4.747995229403742e-06,
      "loss": 0.01,
      "step": 1614120
    },
    {
      "epoch": 2.641575512395017,
      "grad_norm": 0.46974098682403564,
      "learning_rate": 4.747929337190225e-06,
      "loss": 0.007,
      "step": 1614140
    },
    {
      "epoch": 2.6416082428336702,
      "grad_norm": 0.9261959791183472,
      "learning_rate": 4.747863444976708e-06,
      "loss": 0.0225,
      "step": 1614160
    },
    {
      "epoch": 2.641640973272324,
      "grad_norm": 0.7001960277557373,
      "learning_rate": 4.74779755276319e-06,
      "loss": 0.0196,
      "step": 1614180
    },
    {
      "epoch": 2.641673703710977,
      "grad_norm": 0.16502225399017334,
      "learning_rate": 4.747731660549673e-06,
      "loss": 0.0135,
      "step": 1614200
    },
    {
      "epoch": 2.6417064341496306,
      "grad_norm": 0.1848045289516449,
      "learning_rate": 4.747665768336156e-06,
      "loss": 0.0101,
      "step": 1614220
    },
    {
      "epoch": 2.6417391645882837,
      "grad_norm": 0.16084793210029602,
      "learning_rate": 4.7475998761226386e-06,
      "loss": 0.0138,
      "step": 1614240
    },
    {
      "epoch": 2.6417718950269373,
      "grad_norm": 0.15286225080490112,
      "learning_rate": 4.747533983909121e-06,
      "loss": 0.0115,
      "step": 1614260
    },
    {
      "epoch": 2.6418046254655905,
      "grad_norm": 0.25459831953048706,
      "learning_rate": 4.747468091695605e-06,
      "loss": 0.0108,
      "step": 1614280
    },
    {
      "epoch": 2.6418373559042436,
      "grad_norm": 0.4698779284954071,
      "learning_rate": 4.747402199482088e-06,
      "loss": 0.0137,
      "step": 1614300
    },
    {
      "epoch": 2.641870086342897,
      "grad_norm": 0.09443413466215134,
      "learning_rate": 4.74733630726857e-06,
      "loss": 0.0142,
      "step": 1614320
    },
    {
      "epoch": 2.6419028167815504,
      "grad_norm": 0.18462806940078735,
      "learning_rate": 4.747270415055054e-06,
      "loss": 0.0166,
      "step": 1614340
    },
    {
      "epoch": 2.641935547220204,
      "grad_norm": 0.5340941548347473,
      "learning_rate": 4.747204522841537e-06,
      "loss": 0.0115,
      "step": 1614360
    },
    {
      "epoch": 2.641968277658857,
      "grad_norm": 0.4428512454032898,
      "learning_rate": 4.7471386306280194e-06,
      "loss": 0.02,
      "step": 1614380
    },
    {
      "epoch": 2.6420010080975107,
      "grad_norm": 0.35219699144363403,
      "learning_rate": 4.747072738414502e-06,
      "loss": 0.0131,
      "step": 1614400
    },
    {
      "epoch": 2.642033738536164,
      "grad_norm": 0.18391954898834229,
      "learning_rate": 4.747006846200985e-06,
      "loss": 0.0146,
      "step": 1614420
    },
    {
      "epoch": 2.642066468974817,
      "grad_norm": 0.40317511558532715,
      "learning_rate": 4.746940953987468e-06,
      "loss": 0.0163,
      "step": 1614440
    },
    {
      "epoch": 2.6420991994134706,
      "grad_norm": 0.31013572216033936,
      "learning_rate": 4.74687506177395e-06,
      "loss": 0.0116,
      "step": 1614460
    },
    {
      "epoch": 2.6421319298521238,
      "grad_norm": 0.5721510648727417,
      "learning_rate": 4.746809169560433e-06,
      "loss": 0.018,
      "step": 1614480
    },
    {
      "epoch": 2.6421646602907773,
      "grad_norm": 0.2042255848646164,
      "learning_rate": 4.746743277346917e-06,
      "loss": 0.0115,
      "step": 1614500
    },
    {
      "epoch": 2.6421973907294305,
      "grad_norm": 1.6545543670654297,
      "learning_rate": 4.7466773851333995e-06,
      "loss": 0.0145,
      "step": 1614520
    },
    {
      "epoch": 2.642230121168084,
      "grad_norm": 0.15488941967487335,
      "learning_rate": 4.746611492919882e-06,
      "loss": 0.0112,
      "step": 1614540
    },
    {
      "epoch": 2.6422628516067372,
      "grad_norm": 0.52392578125,
      "learning_rate": 4.746545600706365e-06,
      "loss": 0.0103,
      "step": 1614560
    },
    {
      "epoch": 2.6422955820453904,
      "grad_norm": 0.7241544723510742,
      "learning_rate": 4.746479708492848e-06,
      "loss": 0.0121,
      "step": 1614580
    },
    {
      "epoch": 2.642328312484044,
      "grad_norm": 0.12534794211387634,
      "learning_rate": 4.7464138162793304e-06,
      "loss": 0.0089,
      "step": 1614600
    },
    {
      "epoch": 2.642361042922697,
      "grad_norm": 0.5369741916656494,
      "learning_rate": 4.746347924065813e-06,
      "loss": 0.0121,
      "step": 1614620
    },
    {
      "epoch": 2.6423937733613507,
      "grad_norm": 0.1501123309135437,
      "learning_rate": 4.746282031852296e-06,
      "loss": 0.014,
      "step": 1614640
    },
    {
      "epoch": 2.642426503800004,
      "grad_norm": 0.12611882388591766,
      "learning_rate": 4.746216139638779e-06,
      "loss": 0.0132,
      "step": 1614660
    },
    {
      "epoch": 2.6424592342386575,
      "grad_norm": 0.2952895164489746,
      "learning_rate": 4.746150247425262e-06,
      "loss": 0.0233,
      "step": 1614680
    },
    {
      "epoch": 2.6424919646773106,
      "grad_norm": 0.36902275681495667,
      "learning_rate": 4.746084355211745e-06,
      "loss": 0.0132,
      "step": 1614700
    },
    {
      "epoch": 2.6425246951159638,
      "grad_norm": 0.4796920716762543,
      "learning_rate": 4.746018462998228e-06,
      "loss": 0.0155,
      "step": 1614720
    },
    {
      "epoch": 2.6425574255546174,
      "grad_norm": 0.13674719631671906,
      "learning_rate": 4.745952570784711e-06,
      "loss": 0.0157,
      "step": 1614740
    },
    {
      "epoch": 2.6425901559932705,
      "grad_norm": 0.31217411160469055,
      "learning_rate": 4.745886678571194e-06,
      "loss": 0.0182,
      "step": 1614760
    },
    {
      "epoch": 2.642622886431924,
      "grad_norm": 0.13325990736484528,
      "learning_rate": 4.745820786357677e-06,
      "loss": 0.0133,
      "step": 1614780
    },
    {
      "epoch": 2.6426556168705773,
      "grad_norm": 0.23785518109798431,
      "learning_rate": 4.7457548941441595e-06,
      "loss": 0.0148,
      "step": 1614800
    },
    {
      "epoch": 2.642688347309231,
      "grad_norm": 2.3018269538879395,
      "learning_rate": 4.745689001930642e-06,
      "loss": 0.0126,
      "step": 1614820
    },
    {
      "epoch": 2.642721077747884,
      "grad_norm": 1.2475274801254272,
      "learning_rate": 4.745623109717125e-06,
      "loss": 0.0105,
      "step": 1614840
    },
    {
      "epoch": 2.642753808186537,
      "grad_norm": 0.22388356924057007,
      "learning_rate": 4.745557217503608e-06,
      "loss": 0.0121,
      "step": 1614860
    },
    {
      "epoch": 2.6427865386251908,
      "grad_norm": 0.1009494811296463,
      "learning_rate": 4.7454913252900905e-06,
      "loss": 0.0136,
      "step": 1614880
    },
    {
      "epoch": 2.642819269063844,
      "grad_norm": 0.33723708987236023,
      "learning_rate": 4.745425433076574e-06,
      "loss": 0.0134,
      "step": 1614900
    },
    {
      "epoch": 2.6428519995024975,
      "grad_norm": 0.29341328144073486,
      "learning_rate": 4.745359540863057e-06,
      "loss": 0.0139,
      "step": 1614920
    },
    {
      "epoch": 2.6428847299411506,
      "grad_norm": 0.4279573857784271,
      "learning_rate": 4.7452936486495395e-06,
      "loss": 0.0164,
      "step": 1614940
    },
    {
      "epoch": 2.6429174603798042,
      "grad_norm": 0.2802189290523529,
      "learning_rate": 4.745227756436022e-06,
      "loss": 0.0115,
      "step": 1614960
    },
    {
      "epoch": 2.6429501908184574,
      "grad_norm": 0.4595884382724762,
      "learning_rate": 4.745161864222505e-06,
      "loss": 0.0113,
      "step": 1614980
    },
    {
      "epoch": 2.6429829212571105,
      "grad_norm": 0.22999544441699982,
      "learning_rate": 4.745095972008988e-06,
      "loss": 0.0195,
      "step": 1615000
    },
    {
      "epoch": 2.643015651695764,
      "grad_norm": 0.3473907709121704,
      "learning_rate": 4.7450300797954705e-06,
      "loss": 0.0146,
      "step": 1615020
    },
    {
      "epoch": 2.6430483821344173,
      "grad_norm": 0.7624312043190002,
      "learning_rate": 4.744964187581954e-06,
      "loss": 0.0156,
      "step": 1615040
    },
    {
      "epoch": 2.6430811125730704,
      "grad_norm": 0.35892412066459656,
      "learning_rate": 4.744898295368437e-06,
      "loss": 0.0142,
      "step": 1615060
    },
    {
      "epoch": 2.643113843011724,
      "grad_norm": 0.12770076096057892,
      "learning_rate": 4.7448324031549196e-06,
      "loss": 0.0083,
      "step": 1615080
    },
    {
      "epoch": 2.6431465734503776,
      "grad_norm": 0.6503980755805969,
      "learning_rate": 4.744766510941402e-06,
      "loss": 0.0182,
      "step": 1615100
    },
    {
      "epoch": 2.6431793038890308,
      "grad_norm": 0.15520305931568146,
      "learning_rate": 4.744700618727885e-06,
      "loss": 0.0204,
      "step": 1615120
    },
    {
      "epoch": 2.643212034327684,
      "grad_norm": 0.3801490068435669,
      "learning_rate": 4.744634726514369e-06,
      "loss": 0.0126,
      "step": 1615140
    },
    {
      "epoch": 2.6432447647663375,
      "grad_norm": 0.44898006319999695,
      "learning_rate": 4.744568834300851e-06,
      "loss": 0.0146,
      "step": 1615160
    },
    {
      "epoch": 2.6432774952049907,
      "grad_norm": 0.11801512539386749,
      "learning_rate": 4.744502942087334e-06,
      "loss": 0.0133,
      "step": 1615180
    },
    {
      "epoch": 2.643310225643644,
      "grad_norm": 0.34457504749298096,
      "learning_rate": 4.744437049873817e-06,
      "loss": 0.0131,
      "step": 1615200
    },
    {
      "epoch": 2.6433429560822974,
      "grad_norm": 0.6071756482124329,
      "learning_rate": 4.7443711576603e-06,
      "loss": 0.0163,
      "step": 1615220
    },
    {
      "epoch": 2.643375686520951,
      "grad_norm": 0.2758568823337555,
      "learning_rate": 4.744305265446782e-06,
      "loss": 0.013,
      "step": 1615240
    },
    {
      "epoch": 2.643408416959604,
      "grad_norm": 0.5883260369300842,
      "learning_rate": 4.744239373233265e-06,
      "loss": 0.0119,
      "step": 1615260
    },
    {
      "epoch": 2.6434411473982573,
      "grad_norm": 0.5378352403640747,
      "learning_rate": 4.744173481019748e-06,
      "loss": 0.0098,
      "step": 1615280
    },
    {
      "epoch": 2.643473877836911,
      "grad_norm": 0.2898408770561218,
      "learning_rate": 4.744107588806231e-06,
      "loss": 0.0144,
      "step": 1615300
    },
    {
      "epoch": 2.643506608275564,
      "grad_norm": 0.07653219997882843,
      "learning_rate": 4.744041696592714e-06,
      "loss": 0.0175,
      "step": 1615320
    },
    {
      "epoch": 2.643539338714217,
      "grad_norm": 0.09234590083360672,
      "learning_rate": 4.743975804379197e-06,
      "loss": 0.0082,
      "step": 1615340
    },
    {
      "epoch": 2.643572069152871,
      "grad_norm": 0.1298574060201645,
      "learning_rate": 4.74390991216568e-06,
      "loss": 0.0113,
      "step": 1615360
    },
    {
      "epoch": 2.6436047995915244,
      "grad_norm": 0.45264309644699097,
      "learning_rate": 4.743844019952162e-06,
      "loss": 0.0067,
      "step": 1615380
    },
    {
      "epoch": 2.6436375300301775,
      "grad_norm": 0.20809131860733032,
      "learning_rate": 4.743778127738646e-06,
      "loss": 0.0126,
      "step": 1615400
    },
    {
      "epoch": 2.6436702604688307,
      "grad_norm": 0.4887913763523102,
      "learning_rate": 4.743712235525129e-06,
      "loss": 0.0136,
      "step": 1615420
    },
    {
      "epoch": 2.6437029909074843,
      "grad_norm": 0.10157246142625809,
      "learning_rate": 4.743646343311611e-06,
      "loss": 0.0146,
      "step": 1615440
    },
    {
      "epoch": 2.6437357213461374,
      "grad_norm": 0.3562104105949402,
      "learning_rate": 4.743580451098094e-06,
      "loss": 0.0153,
      "step": 1615460
    },
    {
      "epoch": 2.6437684517847906,
      "grad_norm": 0.3676992356777191,
      "learning_rate": 4.743514558884577e-06,
      "loss": 0.0164,
      "step": 1615480
    },
    {
      "epoch": 2.643801182223444,
      "grad_norm": 0.20407769083976746,
      "learning_rate": 4.74344866667106e-06,
      "loss": 0.0154,
      "step": 1615500
    },
    {
      "epoch": 2.6438339126620978,
      "grad_norm": 0.552134096622467,
      "learning_rate": 4.743382774457543e-06,
      "loss": 0.0101,
      "step": 1615520
    },
    {
      "epoch": 2.643866643100751,
      "grad_norm": 0.14335426688194275,
      "learning_rate": 4.743316882244026e-06,
      "loss": 0.0098,
      "step": 1615540
    },
    {
      "epoch": 2.643899373539404,
      "grad_norm": 0.1591571718454361,
      "learning_rate": 4.743250990030509e-06,
      "loss": 0.0137,
      "step": 1615560
    },
    {
      "epoch": 2.6439321039780577,
      "grad_norm": 0.2895570397377014,
      "learning_rate": 4.7431850978169914e-06,
      "loss": 0.0167,
      "step": 1615580
    },
    {
      "epoch": 2.643964834416711,
      "grad_norm": 0.20045062899589539,
      "learning_rate": 4.743119205603474e-06,
      "loss": 0.0152,
      "step": 1615600
    },
    {
      "epoch": 2.643997564855364,
      "grad_norm": 0.49146294593811035,
      "learning_rate": 4.743053313389957e-06,
      "loss": 0.0116,
      "step": 1615620
    },
    {
      "epoch": 2.6440302952940176,
      "grad_norm": 0.7097955942153931,
      "learning_rate": 4.74298742117644e-06,
      "loss": 0.0164,
      "step": 1615640
    },
    {
      "epoch": 2.6440630257326707,
      "grad_norm": 0.6268488764762878,
      "learning_rate": 4.742921528962922e-06,
      "loss": 0.0148,
      "step": 1615660
    },
    {
      "epoch": 2.6440957561713243,
      "grad_norm": 0.4909524619579315,
      "learning_rate": 4.742855636749405e-06,
      "loss": 0.0163,
      "step": 1615680
    },
    {
      "epoch": 2.6441284866099775,
      "grad_norm": 0.13137170672416687,
      "learning_rate": 4.742789744535889e-06,
      "loss": 0.0152,
      "step": 1615700
    },
    {
      "epoch": 2.644161217048631,
      "grad_norm": 0.15963411331176758,
      "learning_rate": 4.7427238523223715e-06,
      "loss": 0.0108,
      "step": 1615720
    },
    {
      "epoch": 2.644193947487284,
      "grad_norm": 0.044505126774311066,
      "learning_rate": 4.742657960108854e-06,
      "loss": 0.0143,
      "step": 1615740
    },
    {
      "epoch": 2.6442266779259374,
      "grad_norm": 0.5414465069770813,
      "learning_rate": 4.742592067895338e-06,
      "loss": 0.0161,
      "step": 1615760
    },
    {
      "epoch": 2.644259408364591,
      "grad_norm": 0.22956331074237823,
      "learning_rate": 4.7425261756818205e-06,
      "loss": 0.0206,
      "step": 1615780
    },
    {
      "epoch": 2.644292138803244,
      "grad_norm": 0.26202088594436646,
      "learning_rate": 4.742460283468303e-06,
      "loss": 0.0126,
      "step": 1615800
    },
    {
      "epoch": 2.6443248692418977,
      "grad_norm": 0.27010592818260193,
      "learning_rate": 4.742394391254786e-06,
      "loss": 0.0173,
      "step": 1615820
    },
    {
      "epoch": 2.644357599680551,
      "grad_norm": 0.5159419775009155,
      "learning_rate": 4.742328499041269e-06,
      "loss": 0.0115,
      "step": 1615840
    },
    {
      "epoch": 2.6443903301192044,
      "grad_norm": 0.6654971837997437,
      "learning_rate": 4.7422626068277515e-06,
      "loss": 0.0115,
      "step": 1615860
    },
    {
      "epoch": 2.6444230605578576,
      "grad_norm": 0.2645438313484192,
      "learning_rate": 4.742196714614234e-06,
      "loss": 0.015,
      "step": 1615880
    },
    {
      "epoch": 2.6444557909965107,
      "grad_norm": 0.6109879016876221,
      "learning_rate": 4.742130822400717e-06,
      "loss": 0.0131,
      "step": 1615900
    },
    {
      "epoch": 2.6444885214351643,
      "grad_norm": 1.8127639293670654,
      "learning_rate": 4.7420649301872006e-06,
      "loss": 0.021,
      "step": 1615920
    },
    {
      "epoch": 2.6445212518738175,
      "grad_norm": 0.5039730668067932,
      "learning_rate": 4.741999037973683e-06,
      "loss": 0.0144,
      "step": 1615940
    },
    {
      "epoch": 2.644553982312471,
      "grad_norm": 0.11454788595438004,
      "learning_rate": 4.741933145760166e-06,
      "loss": 0.0171,
      "step": 1615960
    },
    {
      "epoch": 2.6445867127511242,
      "grad_norm": 0.23398594558238983,
      "learning_rate": 4.741867253546649e-06,
      "loss": 0.0157,
      "step": 1615980
    },
    {
      "epoch": 2.644619443189778,
      "grad_norm": 0.11406928300857544,
      "learning_rate": 4.7418013613331315e-06,
      "loss": 0.0154,
      "step": 1616000
    },
    {
      "epoch": 2.644652173628431,
      "grad_norm": 0.2172870934009552,
      "learning_rate": 4.741735469119614e-06,
      "loss": 0.0149,
      "step": 1616020
    },
    {
      "epoch": 2.644684904067084,
      "grad_norm": 0.482210636138916,
      "learning_rate": 4.741669576906097e-06,
      "loss": 0.011,
      "step": 1616040
    },
    {
      "epoch": 2.6447176345057377,
      "grad_norm": 0.8460125923156738,
      "learning_rate": 4.74160368469258e-06,
      "loss": 0.0127,
      "step": 1616060
    },
    {
      "epoch": 2.644750364944391,
      "grad_norm": 0.9183889627456665,
      "learning_rate": 4.7415377924790625e-06,
      "loss": 0.0122,
      "step": 1616080
    },
    {
      "epoch": 2.6447830953830445,
      "grad_norm": 0.19962747395038605,
      "learning_rate": 4.741471900265546e-06,
      "loss": 0.0158,
      "step": 1616100
    },
    {
      "epoch": 2.6448158258216976,
      "grad_norm": 0.5265708565711975,
      "learning_rate": 4.741406008052029e-06,
      "loss": 0.0185,
      "step": 1616120
    },
    {
      "epoch": 2.644848556260351,
      "grad_norm": 0.3524720072746277,
      "learning_rate": 4.7413401158385115e-06,
      "loss": 0.0106,
      "step": 1616140
    },
    {
      "epoch": 2.6448812866990044,
      "grad_norm": 0.30751970410346985,
      "learning_rate": 4.741274223624995e-06,
      "loss": 0.0183,
      "step": 1616160
    },
    {
      "epoch": 2.6449140171376575,
      "grad_norm": 0.5475156903266907,
      "learning_rate": 4.741208331411478e-06,
      "loss": 0.0147,
      "step": 1616180
    },
    {
      "epoch": 2.644946747576311,
      "grad_norm": 0.6884445548057556,
      "learning_rate": 4.741142439197961e-06,
      "loss": 0.0137,
      "step": 1616200
    },
    {
      "epoch": 2.6449794780149642,
      "grad_norm": 0.764481782913208,
      "learning_rate": 4.741076546984443e-06,
      "loss": 0.0131,
      "step": 1616220
    },
    {
      "epoch": 2.645012208453618,
      "grad_norm": 0.25036612153053284,
      "learning_rate": 4.741010654770926e-06,
      "loss": 0.0123,
      "step": 1616240
    },
    {
      "epoch": 2.645044938892271,
      "grad_norm": 0.21643011271953583,
      "learning_rate": 4.740944762557409e-06,
      "loss": 0.0135,
      "step": 1616260
    },
    {
      "epoch": 2.6450776693309246,
      "grad_norm": 0.347629576921463,
      "learning_rate": 4.7408788703438916e-06,
      "loss": 0.0186,
      "step": 1616280
    },
    {
      "epoch": 2.6451103997695777,
      "grad_norm": 0.40570494532585144,
      "learning_rate": 4.740812978130374e-06,
      "loss": 0.0088,
      "step": 1616300
    },
    {
      "epoch": 2.645143130208231,
      "grad_norm": 0.06405915319919586,
      "learning_rate": 4.740747085916858e-06,
      "loss": 0.0126,
      "step": 1616320
    },
    {
      "epoch": 2.6451758606468845,
      "grad_norm": 0.2878980040550232,
      "learning_rate": 4.740681193703341e-06,
      "loss": 0.0123,
      "step": 1616340
    },
    {
      "epoch": 2.6452085910855376,
      "grad_norm": 0.32236191630363464,
      "learning_rate": 4.740615301489823e-06,
      "loss": 0.0236,
      "step": 1616360
    },
    {
      "epoch": 2.6452413215241912,
      "grad_norm": 0.20068787038326263,
      "learning_rate": 4.740549409276306e-06,
      "loss": 0.0141,
      "step": 1616380
    },
    {
      "epoch": 2.6452740519628444,
      "grad_norm": 0.4743466079235077,
      "learning_rate": 4.740483517062789e-06,
      "loss": 0.0114,
      "step": 1616400
    },
    {
      "epoch": 2.645306782401498,
      "grad_norm": 0.22302263975143433,
      "learning_rate": 4.740417624849272e-06,
      "loss": 0.0092,
      "step": 1616420
    },
    {
      "epoch": 2.645339512840151,
      "grad_norm": 0.25259077548980713,
      "learning_rate": 4.740351732635754e-06,
      "loss": 0.0128,
      "step": 1616440
    },
    {
      "epoch": 2.6453722432788043,
      "grad_norm": 0.523604691028595,
      "learning_rate": 4.740285840422238e-06,
      "loss": 0.0163,
      "step": 1616460
    },
    {
      "epoch": 2.645404973717458,
      "grad_norm": 0.06822098046541214,
      "learning_rate": 4.740219948208721e-06,
      "loss": 0.0187,
      "step": 1616480
    },
    {
      "epoch": 2.645437704156111,
      "grad_norm": 0.1581495851278305,
      "learning_rate": 4.740154055995203e-06,
      "loss": 0.0168,
      "step": 1616500
    },
    {
      "epoch": 2.645470434594764,
      "grad_norm": 0.8068113923072815,
      "learning_rate": 4.740088163781686e-06,
      "loss": 0.0131,
      "step": 1616520
    },
    {
      "epoch": 2.6455031650334178,
      "grad_norm": 1.452763557434082,
      "learning_rate": 4.740022271568169e-06,
      "loss": 0.0131,
      "step": 1616540
    },
    {
      "epoch": 2.6455358954720714,
      "grad_norm": 0.7393039464950562,
      "learning_rate": 4.7399563793546525e-06,
      "loss": 0.0175,
      "step": 1616560
    },
    {
      "epoch": 2.6455686259107245,
      "grad_norm": 0.43198326230049133,
      "learning_rate": 4.739890487141135e-06,
      "loss": 0.0123,
      "step": 1616580
    },
    {
      "epoch": 2.6456013563493777,
      "grad_norm": 0.4005447328090668,
      "learning_rate": 4.739824594927618e-06,
      "loss": 0.0088,
      "step": 1616600
    },
    {
      "epoch": 2.6456340867880312,
      "grad_norm": 0.20494642853736877,
      "learning_rate": 4.739758702714101e-06,
      "loss": 0.018,
      "step": 1616620
    },
    {
      "epoch": 2.6456668172266844,
      "grad_norm": 0.3954262435436249,
      "learning_rate": 4.7396928105005834e-06,
      "loss": 0.0135,
      "step": 1616640
    },
    {
      "epoch": 2.6456995476653375,
      "grad_norm": 0.7686087489128113,
      "learning_rate": 4.739626918287066e-06,
      "loss": 0.0132,
      "step": 1616660
    },
    {
      "epoch": 2.645732278103991,
      "grad_norm": 0.6059328317642212,
      "learning_rate": 4.739561026073549e-06,
      "loss": 0.0189,
      "step": 1616680
    },
    {
      "epoch": 2.6457650085426447,
      "grad_norm": 0.7818160057067871,
      "learning_rate": 4.739495133860032e-06,
      "loss": 0.0161,
      "step": 1616700
    },
    {
      "epoch": 2.645797738981298,
      "grad_norm": 0.184144526720047,
      "learning_rate": 4.739429241646515e-06,
      "loss": 0.0086,
      "step": 1616720
    },
    {
      "epoch": 2.645830469419951,
      "grad_norm": 0.5217957496643066,
      "learning_rate": 4.739363349432998e-06,
      "loss": 0.0119,
      "step": 1616740
    },
    {
      "epoch": 2.6458631998586046,
      "grad_norm": 0.6178199052810669,
      "learning_rate": 4.739297457219481e-06,
      "loss": 0.016,
      "step": 1616760
    },
    {
      "epoch": 2.645895930297258,
      "grad_norm": 0.19761158525943756,
      "learning_rate": 4.7392315650059635e-06,
      "loss": 0.0131,
      "step": 1616780
    },
    {
      "epoch": 2.645928660735911,
      "grad_norm": 0.4038739800453186,
      "learning_rate": 4.739165672792447e-06,
      "loss": 0.0144,
      "step": 1616800
    },
    {
      "epoch": 2.6459613911745645,
      "grad_norm": 0.36754468083381653,
      "learning_rate": 4.73909978057893e-06,
      "loss": 0.0175,
      "step": 1616820
    },
    {
      "epoch": 2.645994121613218,
      "grad_norm": 0.4361024498939514,
      "learning_rate": 4.7390338883654125e-06,
      "loss": 0.0127,
      "step": 1616840
    },
    {
      "epoch": 2.6460268520518713,
      "grad_norm": 0.4704585671424866,
      "learning_rate": 4.738967996151895e-06,
      "loss": 0.0144,
      "step": 1616860
    },
    {
      "epoch": 2.6460595824905244,
      "grad_norm": 0.13573795557022095,
      "learning_rate": 4.738902103938378e-06,
      "loss": 0.0137,
      "step": 1616880
    },
    {
      "epoch": 2.646092312929178,
      "grad_norm": 0.13426576554775238,
      "learning_rate": 4.738836211724861e-06,
      "loss": 0.0142,
      "step": 1616900
    },
    {
      "epoch": 2.646125043367831,
      "grad_norm": 0.2443491369485855,
      "learning_rate": 4.7387703195113435e-06,
      "loss": 0.0116,
      "step": 1616920
    },
    {
      "epoch": 2.6461577738064843,
      "grad_norm": 0.5805941820144653,
      "learning_rate": 4.738704427297827e-06,
      "loss": 0.0156,
      "step": 1616940
    },
    {
      "epoch": 2.646190504245138,
      "grad_norm": 1.0035005807876587,
      "learning_rate": 4.73863853508431e-06,
      "loss": 0.0121,
      "step": 1616960
    },
    {
      "epoch": 2.6462232346837915,
      "grad_norm": 0.2742733955383301,
      "learning_rate": 4.7385726428707925e-06,
      "loss": 0.0104,
      "step": 1616980
    },
    {
      "epoch": 2.6462559651224447,
      "grad_norm": 0.23279207944869995,
      "learning_rate": 4.738506750657275e-06,
      "loss": 0.0106,
      "step": 1617000
    },
    {
      "epoch": 2.646288695561098,
      "grad_norm": 0.5441147685050964,
      "learning_rate": 4.738440858443758e-06,
      "loss": 0.0203,
      "step": 1617020
    },
    {
      "epoch": 2.6463214259997514,
      "grad_norm": 0.5018726587295532,
      "learning_rate": 4.738374966230241e-06,
      "loss": 0.015,
      "step": 1617040
    },
    {
      "epoch": 2.6463541564384045,
      "grad_norm": 0.1790643185377121,
      "learning_rate": 4.7383090740167235e-06,
      "loss": 0.011,
      "step": 1617060
    },
    {
      "epoch": 2.6463868868770577,
      "grad_norm": 0.38617902994155884,
      "learning_rate": 4.738243181803206e-06,
      "loss": 0.0202,
      "step": 1617080
    },
    {
      "epoch": 2.6464196173157113,
      "grad_norm": 0.45709431171417236,
      "learning_rate": 4.738177289589689e-06,
      "loss": 0.0119,
      "step": 1617100
    },
    {
      "epoch": 2.6464523477543644,
      "grad_norm": 0.5920094847679138,
      "learning_rate": 4.7381113973761726e-06,
      "loss": 0.0087,
      "step": 1617120
    },
    {
      "epoch": 2.646485078193018,
      "grad_norm": 0.47154027223587036,
      "learning_rate": 4.738045505162655e-06,
      "loss": 0.0112,
      "step": 1617140
    },
    {
      "epoch": 2.646517808631671,
      "grad_norm": 0.17677101492881775,
      "learning_rate": 4.737979612949138e-06,
      "loss": 0.0152,
      "step": 1617160
    },
    {
      "epoch": 2.646550539070325,
      "grad_norm": 1.0569276809692383,
      "learning_rate": 4.737913720735622e-06,
      "loss": 0.0174,
      "step": 1617180
    },
    {
      "epoch": 2.646583269508978,
      "grad_norm": 0.4863298237323761,
      "learning_rate": 4.737847828522104e-06,
      "loss": 0.0171,
      "step": 1617200
    },
    {
      "epoch": 2.646615999947631,
      "grad_norm": 0.21739205718040466,
      "learning_rate": 4.737781936308587e-06,
      "loss": 0.0139,
      "step": 1617220
    },
    {
      "epoch": 2.6466487303862847,
      "grad_norm": 0.36947157979011536,
      "learning_rate": 4.73771604409507e-06,
      "loss": 0.0164,
      "step": 1617240
    },
    {
      "epoch": 2.646681460824938,
      "grad_norm": 0.767102062702179,
      "learning_rate": 4.737650151881553e-06,
      "loss": 0.0128,
      "step": 1617260
    },
    {
      "epoch": 2.6467141912635914,
      "grad_norm": 0.21417751908302307,
      "learning_rate": 4.737584259668035e-06,
      "loss": 0.0075,
      "step": 1617280
    },
    {
      "epoch": 2.6467469217022446,
      "grad_norm": 0.2713581621646881,
      "learning_rate": 4.737518367454518e-06,
      "loss": 0.0177,
      "step": 1617300
    },
    {
      "epoch": 2.646779652140898,
      "grad_norm": 0.6175239682197571,
      "learning_rate": 4.737452475241001e-06,
      "loss": 0.0138,
      "step": 1617320
    },
    {
      "epoch": 2.6468123825795513,
      "grad_norm": 0.7487890124320984,
      "learning_rate": 4.737386583027484e-06,
      "loss": 0.0113,
      "step": 1617340
    },
    {
      "epoch": 2.6468451130182045,
      "grad_norm": 0.5423746109008789,
      "learning_rate": 4.737320690813967e-06,
      "loss": 0.0189,
      "step": 1617360
    },
    {
      "epoch": 2.646877843456858,
      "grad_norm": 1.8244643211364746,
      "learning_rate": 4.73725479860045e-06,
      "loss": 0.0116,
      "step": 1617380
    },
    {
      "epoch": 2.646910573895511,
      "grad_norm": 0.2299661636352539,
      "learning_rate": 4.737188906386933e-06,
      "loss": 0.015,
      "step": 1617400
    },
    {
      "epoch": 2.646943304334165,
      "grad_norm": 0.38182532787323,
      "learning_rate": 4.737123014173415e-06,
      "loss": 0.0141,
      "step": 1617420
    },
    {
      "epoch": 2.646976034772818,
      "grad_norm": 0.35696184635162354,
      "learning_rate": 4.737057121959898e-06,
      "loss": 0.0151,
      "step": 1617440
    },
    {
      "epoch": 2.6470087652114715,
      "grad_norm": 0.5800498723983765,
      "learning_rate": 4.736991229746381e-06,
      "loss": 0.0117,
      "step": 1617460
    },
    {
      "epoch": 2.6470414956501247,
      "grad_norm": 0.390979140996933,
      "learning_rate": 4.736925337532864e-06,
      "loss": 0.0159,
      "step": 1617480
    },
    {
      "epoch": 2.647074226088778,
      "grad_norm": 0.3426596224308014,
      "learning_rate": 4.736859445319347e-06,
      "loss": 0.0134,
      "step": 1617500
    },
    {
      "epoch": 2.6471069565274314,
      "grad_norm": 0.501968502998352,
      "learning_rate": 4.73679355310583e-06,
      "loss": 0.0161,
      "step": 1617520
    },
    {
      "epoch": 2.6471396869660846,
      "grad_norm": 0.36428382992744446,
      "learning_rate": 4.736727660892313e-06,
      "loss": 0.0247,
      "step": 1617540
    },
    {
      "epoch": 2.647172417404738,
      "grad_norm": 1.15083646774292,
      "learning_rate": 4.736661768678795e-06,
      "loss": 0.0165,
      "step": 1617560
    },
    {
      "epoch": 2.6472051478433913,
      "grad_norm": 0.24893252551555634,
      "learning_rate": 4.736595876465279e-06,
      "loss": 0.0129,
      "step": 1617580
    },
    {
      "epoch": 2.647237878282045,
      "grad_norm": 0.3631417751312256,
      "learning_rate": 4.736529984251762e-06,
      "loss": 0.0144,
      "step": 1617600
    },
    {
      "epoch": 2.647270608720698,
      "grad_norm": 0.6703575849533081,
      "learning_rate": 4.7364640920382445e-06,
      "loss": 0.0148,
      "step": 1617620
    },
    {
      "epoch": 2.6473033391593512,
      "grad_norm": 0.201975017786026,
      "learning_rate": 4.736398199824727e-06,
      "loss": 0.011,
      "step": 1617640
    },
    {
      "epoch": 2.647336069598005,
      "grad_norm": 0.43865546584129333,
      "learning_rate": 4.73633230761121e-06,
      "loss": 0.0094,
      "step": 1617660
    },
    {
      "epoch": 2.647368800036658,
      "grad_norm": 0.4408882260322571,
      "learning_rate": 4.736266415397693e-06,
      "loss": 0.0156,
      "step": 1617680
    },
    {
      "epoch": 2.6474015304753116,
      "grad_norm": 0.18643856048583984,
      "learning_rate": 4.736200523184175e-06,
      "loss": 0.0108,
      "step": 1617700
    },
    {
      "epoch": 2.6474342609139647,
      "grad_norm": 0.06327658146619797,
      "learning_rate": 4.736134630970658e-06,
      "loss": 0.0124,
      "step": 1617720
    },
    {
      "epoch": 2.6474669913526183,
      "grad_norm": 0.27808907628059387,
      "learning_rate": 4.736068738757142e-06,
      "loss": 0.0105,
      "step": 1617740
    },
    {
      "epoch": 2.6474997217912715,
      "grad_norm": 0.9743085503578186,
      "learning_rate": 4.7360028465436245e-06,
      "loss": 0.012,
      "step": 1617760
    },
    {
      "epoch": 2.6475324522299246,
      "grad_norm": 0.18640029430389404,
      "learning_rate": 4.735936954330107e-06,
      "loss": 0.016,
      "step": 1617780
    },
    {
      "epoch": 2.647565182668578,
      "grad_norm": 0.3881835639476776,
      "learning_rate": 4.73587106211659e-06,
      "loss": 0.0099,
      "step": 1617800
    },
    {
      "epoch": 2.6475979131072314,
      "grad_norm": 0.7077329158782959,
      "learning_rate": 4.735805169903073e-06,
      "loss": 0.0154,
      "step": 1617820
    },
    {
      "epoch": 2.647630643545885,
      "grad_norm": 0.2131669819355011,
      "learning_rate": 4.7357392776895554e-06,
      "loss": 0.016,
      "step": 1617840
    },
    {
      "epoch": 2.647663373984538,
      "grad_norm": 0.8150599002838135,
      "learning_rate": 4.735673385476039e-06,
      "loss": 0.0119,
      "step": 1617860
    },
    {
      "epoch": 2.6476961044231917,
      "grad_norm": 0.3546300530433655,
      "learning_rate": 4.735607493262522e-06,
      "loss": 0.0164,
      "step": 1617880
    },
    {
      "epoch": 2.647728834861845,
      "grad_norm": 0.3543860614299774,
      "learning_rate": 4.7355416010490045e-06,
      "loss": 0.0158,
      "step": 1617900
    },
    {
      "epoch": 2.647761565300498,
      "grad_norm": 0.1534298211336136,
      "learning_rate": 4.735475708835487e-06,
      "loss": 0.0092,
      "step": 1617920
    },
    {
      "epoch": 2.6477942957391516,
      "grad_norm": 0.2002197802066803,
      "learning_rate": 4.73540981662197e-06,
      "loss": 0.0121,
      "step": 1617940
    },
    {
      "epoch": 2.6478270261778047,
      "grad_norm": 0.7438878417015076,
      "learning_rate": 4.735343924408453e-06,
      "loss": 0.0106,
      "step": 1617960
    },
    {
      "epoch": 2.6478597566164583,
      "grad_norm": 0.14036497473716736,
      "learning_rate": 4.735278032194936e-06,
      "loss": 0.0095,
      "step": 1617980
    },
    {
      "epoch": 2.6478924870551115,
      "grad_norm": 0.3097861409187317,
      "learning_rate": 4.735212139981419e-06,
      "loss": 0.0126,
      "step": 1618000
    },
    {
      "epoch": 2.647925217493765,
      "grad_norm": 0.5360050797462463,
      "learning_rate": 4.735146247767902e-06,
      "loss": 0.0181,
      "step": 1618020
    },
    {
      "epoch": 2.6479579479324182,
      "grad_norm": 0.1611216813325882,
      "learning_rate": 4.7350803555543845e-06,
      "loss": 0.0156,
      "step": 1618040
    },
    {
      "epoch": 2.6479906783710714,
      "grad_norm": 0.6461268067359924,
      "learning_rate": 4.735014463340867e-06,
      "loss": 0.0148,
      "step": 1618060
    },
    {
      "epoch": 2.648023408809725,
      "grad_norm": 0.3523285686969757,
      "learning_rate": 4.73494857112735e-06,
      "loss": 0.0139,
      "step": 1618080
    },
    {
      "epoch": 2.648056139248378,
      "grad_norm": 0.1399674117565155,
      "learning_rate": 4.734882678913833e-06,
      "loss": 0.0141,
      "step": 1618100
    },
    {
      "epoch": 2.6480888696870313,
      "grad_norm": 0.19403819739818573,
      "learning_rate": 4.7348167867003155e-06,
      "loss": 0.0103,
      "step": 1618120
    },
    {
      "epoch": 2.648121600125685,
      "grad_norm": 0.3185156285762787,
      "learning_rate": 4.734750894486799e-06,
      "loss": 0.0082,
      "step": 1618140
    },
    {
      "epoch": 2.6481543305643385,
      "grad_norm": 0.213233083486557,
      "learning_rate": 4.734685002273282e-06,
      "loss": 0.0113,
      "step": 1618160
    },
    {
      "epoch": 2.6481870610029916,
      "grad_norm": 0.21297797560691833,
      "learning_rate": 4.7346191100597646e-06,
      "loss": 0.014,
      "step": 1618180
    },
    {
      "epoch": 2.6482197914416448,
      "grad_norm": 0.15355418622493744,
      "learning_rate": 4.734553217846248e-06,
      "loss": 0.0159,
      "step": 1618200
    },
    {
      "epoch": 2.6482525218802984,
      "grad_norm": 0.1645926684141159,
      "learning_rate": 4.734487325632731e-06,
      "loss": 0.0093,
      "step": 1618220
    },
    {
      "epoch": 2.6482852523189515,
      "grad_norm": 0.14611123502254486,
      "learning_rate": 4.734421433419214e-06,
      "loss": 0.0087,
      "step": 1618240
    },
    {
      "epoch": 2.6483179827576047,
      "grad_norm": 0.15838728845119476,
      "learning_rate": 4.734355541205696e-06,
      "loss": 0.0176,
      "step": 1618260
    },
    {
      "epoch": 2.6483507131962583,
      "grad_norm": 0.3429309129714966,
      "learning_rate": 4.734289648992179e-06,
      "loss": 0.0142,
      "step": 1618280
    },
    {
      "epoch": 2.648383443634912,
      "grad_norm": 0.16736619174480438,
      "learning_rate": 4.734223756778662e-06,
      "loss": 0.0122,
      "step": 1618300
    },
    {
      "epoch": 2.648416174073565,
      "grad_norm": 0.6550852060317993,
      "learning_rate": 4.734157864565145e-06,
      "loss": 0.0177,
      "step": 1618320
    },
    {
      "epoch": 2.648448904512218,
      "grad_norm": 0.11227848380804062,
      "learning_rate": 4.734091972351627e-06,
      "loss": 0.0166,
      "step": 1618340
    },
    {
      "epoch": 2.6484816349508717,
      "grad_norm": 0.28145116567611694,
      "learning_rate": 4.734026080138111e-06,
      "loss": 0.0108,
      "step": 1618360
    },
    {
      "epoch": 2.648514365389525,
      "grad_norm": 0.36530590057373047,
      "learning_rate": 4.733960187924594e-06,
      "loss": 0.0111,
      "step": 1618380
    },
    {
      "epoch": 2.648547095828178,
      "grad_norm": 0.268519788980484,
      "learning_rate": 4.733894295711076e-06,
      "loss": 0.0163,
      "step": 1618400
    },
    {
      "epoch": 2.6485798262668316,
      "grad_norm": 0.43417155742645264,
      "learning_rate": 4.733828403497559e-06,
      "loss": 0.017,
      "step": 1618420
    },
    {
      "epoch": 2.6486125567054852,
      "grad_norm": 0.3446655571460724,
      "learning_rate": 4.733762511284042e-06,
      "loss": 0.0251,
      "step": 1618440
    },
    {
      "epoch": 2.6486452871441384,
      "grad_norm": 0.290912002325058,
      "learning_rate": 4.733696619070525e-06,
      "loss": 0.0148,
      "step": 1618460
    },
    {
      "epoch": 2.6486780175827915,
      "grad_norm": 0.1581956446170807,
      "learning_rate": 4.733630726857007e-06,
      "loss": 0.0103,
      "step": 1618480
    },
    {
      "epoch": 2.648710748021445,
      "grad_norm": 0.12174155563116074,
      "learning_rate": 4.73356483464349e-06,
      "loss": 0.0162,
      "step": 1618500
    },
    {
      "epoch": 2.6487434784600983,
      "grad_norm": 1.409408450126648,
      "learning_rate": 4.733498942429973e-06,
      "loss": 0.0221,
      "step": 1618520
    },
    {
      "epoch": 2.6487762088987514,
      "grad_norm": 0.5325192213058472,
      "learning_rate": 4.733433050216456e-06,
      "loss": 0.01,
      "step": 1618540
    },
    {
      "epoch": 2.648808939337405,
      "grad_norm": 2.800832986831665,
      "learning_rate": 4.733367158002939e-06,
      "loss": 0.011,
      "step": 1618560
    },
    {
      "epoch": 2.6488416697760586,
      "grad_norm": 0.6009296774864197,
      "learning_rate": 4.733301265789422e-06,
      "loss": 0.0126,
      "step": 1618580
    },
    {
      "epoch": 2.6488744002147118,
      "grad_norm": 0.13327407836914062,
      "learning_rate": 4.7332353735759055e-06,
      "loss": 0.0117,
      "step": 1618600
    },
    {
      "epoch": 2.648907130653365,
      "grad_norm": 0.23515203595161438,
      "learning_rate": 4.733169481362388e-06,
      "loss": 0.0128,
      "step": 1618620
    },
    {
      "epoch": 2.6489398610920185,
      "grad_norm": 0.2852868139743805,
      "learning_rate": 4.733103589148871e-06,
      "loss": 0.0156,
      "step": 1618640
    },
    {
      "epoch": 2.6489725915306717,
      "grad_norm": 0.2081441879272461,
      "learning_rate": 4.733037696935354e-06,
      "loss": 0.0133,
      "step": 1618660
    },
    {
      "epoch": 2.649005321969325,
      "grad_norm": 0.174042209982872,
      "learning_rate": 4.7329718047218364e-06,
      "loss": 0.0097,
      "step": 1618680
    },
    {
      "epoch": 2.6490380524079784,
      "grad_norm": 0.3898578882217407,
      "learning_rate": 4.732905912508319e-06,
      "loss": 0.0171,
      "step": 1618700
    },
    {
      "epoch": 2.6490707828466316,
      "grad_norm": 0.23872271180152893,
      "learning_rate": 4.732840020294802e-06,
      "loss": 0.0113,
      "step": 1618720
    },
    {
      "epoch": 2.649103513285285,
      "grad_norm": 0.3988032937049866,
      "learning_rate": 4.732774128081285e-06,
      "loss": 0.014,
      "step": 1618740
    },
    {
      "epoch": 2.6491362437239383,
      "grad_norm": 1.3738083839416504,
      "learning_rate": 4.732708235867768e-06,
      "loss": 0.0117,
      "step": 1618760
    },
    {
      "epoch": 2.649168974162592,
      "grad_norm": 0.24290412664413452,
      "learning_rate": 4.732642343654251e-06,
      "loss": 0.0162,
      "step": 1618780
    },
    {
      "epoch": 2.649201704601245,
      "grad_norm": 0.3817305266857147,
      "learning_rate": 4.732576451440734e-06,
      "loss": 0.017,
      "step": 1618800
    },
    {
      "epoch": 2.649234435039898,
      "grad_norm": 0.5245422124862671,
      "learning_rate": 4.7325105592272165e-06,
      "loss": 0.0179,
      "step": 1618820
    },
    {
      "epoch": 2.649267165478552,
      "grad_norm": 0.04143844172358513,
      "learning_rate": 4.732444667013699e-06,
      "loss": 0.014,
      "step": 1618840
    },
    {
      "epoch": 2.649299895917205,
      "grad_norm": 0.18723705410957336,
      "learning_rate": 4.732378774800182e-06,
      "loss": 0.0134,
      "step": 1618860
    },
    {
      "epoch": 2.6493326263558585,
      "grad_norm": 0.4377050995826721,
      "learning_rate": 4.732312882586665e-06,
      "loss": 0.0104,
      "step": 1618880
    },
    {
      "epoch": 2.6493653567945117,
      "grad_norm": 0.9401438236236572,
      "learning_rate": 4.732246990373147e-06,
      "loss": 0.0133,
      "step": 1618900
    },
    {
      "epoch": 2.6493980872331653,
      "grad_norm": 0.6782065033912659,
      "learning_rate": 4.732181098159631e-06,
      "loss": 0.0117,
      "step": 1618920
    },
    {
      "epoch": 2.6494308176718184,
      "grad_norm": 0.16577757894992828,
      "learning_rate": 4.732115205946114e-06,
      "loss": 0.015,
      "step": 1618940
    },
    {
      "epoch": 2.6494635481104716,
      "grad_norm": 0.12001572549343109,
      "learning_rate": 4.7320493137325965e-06,
      "loss": 0.0211,
      "step": 1618960
    },
    {
      "epoch": 2.649496278549125,
      "grad_norm": 0.09729477763175964,
      "learning_rate": 4.731983421519079e-06,
      "loss": 0.0159,
      "step": 1618980
    },
    {
      "epoch": 2.6495290089877783,
      "grad_norm": 0.4333963990211487,
      "learning_rate": 4.731917529305563e-06,
      "loss": 0.0112,
      "step": 1619000
    },
    {
      "epoch": 2.649561739426432,
      "grad_norm": 0.5334978699684143,
      "learning_rate": 4.7318516370920456e-06,
      "loss": 0.015,
      "step": 1619020
    },
    {
      "epoch": 2.649594469865085,
      "grad_norm": 0.3031507730484009,
      "learning_rate": 4.731785744878528e-06,
      "loss": 0.0111,
      "step": 1619040
    },
    {
      "epoch": 2.6496272003037387,
      "grad_norm": 0.2069254070520401,
      "learning_rate": 4.731719852665011e-06,
      "loss": 0.0109,
      "step": 1619060
    },
    {
      "epoch": 2.649659930742392,
      "grad_norm": 0.3021566867828369,
      "learning_rate": 4.731653960451494e-06,
      "loss": 0.0176,
      "step": 1619080
    },
    {
      "epoch": 2.649692661181045,
      "grad_norm": 0.06954564154148102,
      "learning_rate": 4.7315880682379765e-06,
      "loss": 0.0148,
      "step": 1619100
    },
    {
      "epoch": 2.6497253916196986,
      "grad_norm": 0.35117101669311523,
      "learning_rate": 4.731522176024459e-06,
      "loss": 0.0077,
      "step": 1619120
    },
    {
      "epoch": 2.6497581220583517,
      "grad_norm": 0.11160952597856522,
      "learning_rate": 4.731456283810942e-06,
      "loss": 0.0185,
      "step": 1619140
    },
    {
      "epoch": 2.6497908524970053,
      "grad_norm": 0.13009239733219147,
      "learning_rate": 4.731390391597426e-06,
      "loss": 0.0203,
      "step": 1619160
    },
    {
      "epoch": 2.6498235829356585,
      "grad_norm": 0.4482693076133728,
      "learning_rate": 4.731324499383908e-06,
      "loss": 0.0086,
      "step": 1619180
    },
    {
      "epoch": 2.649856313374312,
      "grad_norm": 0.24032194912433624,
      "learning_rate": 4.731258607170391e-06,
      "loss": 0.0164,
      "step": 1619200
    },
    {
      "epoch": 2.649889043812965,
      "grad_norm": 0.3604413568973541,
      "learning_rate": 4.731192714956874e-06,
      "loss": 0.0103,
      "step": 1619220
    },
    {
      "epoch": 2.6499217742516183,
      "grad_norm": 0.3044614791870117,
      "learning_rate": 4.7311268227433565e-06,
      "loss": 0.0147,
      "step": 1619240
    },
    {
      "epoch": 2.649954504690272,
      "grad_norm": 0.26083463430404663,
      "learning_rate": 4.73106093052984e-06,
      "loss": 0.0198,
      "step": 1619260
    },
    {
      "epoch": 2.649987235128925,
      "grad_norm": 0.40151795744895935,
      "learning_rate": 4.730995038316323e-06,
      "loss": 0.0158,
      "step": 1619280
    },
    {
      "epoch": 2.6500199655675787,
      "grad_norm": 0.16577938199043274,
      "learning_rate": 4.730929146102806e-06,
      "loss": 0.0142,
      "step": 1619300
    },
    {
      "epoch": 2.650052696006232,
      "grad_norm": 0.1454714983701706,
      "learning_rate": 4.730863253889288e-06,
      "loss": 0.0129,
      "step": 1619320
    },
    {
      "epoch": 2.6500854264448854,
      "grad_norm": 0.15486469864845276,
      "learning_rate": 4.730797361675771e-06,
      "loss": 0.0186,
      "step": 1619340
    },
    {
      "epoch": 2.6501181568835386,
      "grad_norm": 0.10420739650726318,
      "learning_rate": 4.730731469462254e-06,
      "loss": 0.0129,
      "step": 1619360
    },
    {
      "epoch": 2.6501508873221917,
      "grad_norm": 0.3264716863632202,
      "learning_rate": 4.7306655772487366e-06,
      "loss": 0.0138,
      "step": 1619380
    },
    {
      "epoch": 2.6501836177608453,
      "grad_norm": 0.36627596616744995,
      "learning_rate": 4.73059968503522e-06,
      "loss": 0.0143,
      "step": 1619400
    },
    {
      "epoch": 2.6502163481994985,
      "grad_norm": 0.48505187034606934,
      "learning_rate": 4.730533792821703e-06,
      "loss": 0.013,
      "step": 1619420
    },
    {
      "epoch": 2.650249078638152,
      "grad_norm": 0.29798969626426697,
      "learning_rate": 4.730467900608186e-06,
      "loss": 0.0197,
      "step": 1619440
    },
    {
      "epoch": 2.650281809076805,
      "grad_norm": 1.2533841133117676,
      "learning_rate": 4.730402008394668e-06,
      "loss": 0.0143,
      "step": 1619460
    },
    {
      "epoch": 2.650314539515459,
      "grad_norm": 0.6191412210464478,
      "learning_rate": 4.730336116181151e-06,
      "loss": 0.0115,
      "step": 1619480
    },
    {
      "epoch": 2.650347269954112,
      "grad_norm": 0.5618572235107422,
      "learning_rate": 4.730270223967634e-06,
      "loss": 0.0149,
      "step": 1619500
    },
    {
      "epoch": 2.650380000392765,
      "grad_norm": 0.2490624636411667,
      "learning_rate": 4.730204331754117e-06,
      "loss": 0.0113,
      "step": 1619520
    },
    {
      "epoch": 2.6504127308314187,
      "grad_norm": 0.36288806796073914,
      "learning_rate": 4.730138439540599e-06,
      "loss": 0.0142,
      "step": 1619540
    },
    {
      "epoch": 2.650445461270072,
      "grad_norm": 0.33441469073295593,
      "learning_rate": 4.730072547327083e-06,
      "loss": 0.0144,
      "step": 1619560
    },
    {
      "epoch": 2.650478191708725,
      "grad_norm": 0.4961126446723938,
      "learning_rate": 4.730006655113566e-06,
      "loss": 0.0151,
      "step": 1619580
    },
    {
      "epoch": 2.6505109221473786,
      "grad_norm": 0.0762568786740303,
      "learning_rate": 4.729940762900048e-06,
      "loss": 0.0188,
      "step": 1619600
    },
    {
      "epoch": 2.650543652586032,
      "grad_norm": 0.19051070511341095,
      "learning_rate": 4.729874870686532e-06,
      "loss": 0.0125,
      "step": 1619620
    },
    {
      "epoch": 2.6505763830246853,
      "grad_norm": 0.5572226643562317,
      "learning_rate": 4.729808978473015e-06,
      "loss": 0.018,
      "step": 1619640
    },
    {
      "epoch": 2.6506091134633385,
      "grad_norm": 0.6106662750244141,
      "learning_rate": 4.7297430862594975e-06,
      "loss": 0.0147,
      "step": 1619660
    },
    {
      "epoch": 2.650641843901992,
      "grad_norm": 0.1668330729007721,
      "learning_rate": 4.72967719404598e-06,
      "loss": 0.0123,
      "step": 1619680
    },
    {
      "epoch": 2.6506745743406452,
      "grad_norm": 0.336885929107666,
      "learning_rate": 4.729611301832463e-06,
      "loss": 0.0166,
      "step": 1619700
    },
    {
      "epoch": 2.6507073047792984,
      "grad_norm": 0.4502715766429901,
      "learning_rate": 4.729545409618946e-06,
      "loss": 0.0108,
      "step": 1619720
    },
    {
      "epoch": 2.650740035217952,
      "grad_norm": 0.08966349810361862,
      "learning_rate": 4.729479517405428e-06,
      "loss": 0.0105,
      "step": 1619740
    },
    {
      "epoch": 2.6507727656566056,
      "grad_norm": 0.29983705282211304,
      "learning_rate": 4.729413625191911e-06,
      "loss": 0.0124,
      "step": 1619760
    },
    {
      "epoch": 2.6508054960952587,
      "grad_norm": 0.7502481341362,
      "learning_rate": 4.729347732978395e-06,
      "loss": 0.0154,
      "step": 1619780
    },
    {
      "epoch": 2.650838226533912,
      "grad_norm": 0.7201810479164124,
      "learning_rate": 4.7292818407648775e-06,
      "loss": 0.0131,
      "step": 1619800
    },
    {
      "epoch": 2.6508709569725655,
      "grad_norm": 0.3695625960826874,
      "learning_rate": 4.72921594855136e-06,
      "loss": 0.0183,
      "step": 1619820
    },
    {
      "epoch": 2.6509036874112186,
      "grad_norm": 0.33632758259773254,
      "learning_rate": 4.729150056337843e-06,
      "loss": 0.015,
      "step": 1619840
    },
    {
      "epoch": 2.6509364178498718,
      "grad_norm": 0.3892901539802551,
      "learning_rate": 4.729084164124326e-06,
      "loss": 0.0171,
      "step": 1619860
    },
    {
      "epoch": 2.6509691482885254,
      "grad_norm": 0.09824781119823456,
      "learning_rate": 4.7290182719108084e-06,
      "loss": 0.0164,
      "step": 1619880
    },
    {
      "epoch": 2.651001878727179,
      "grad_norm": 1.1639457941055298,
      "learning_rate": 4.728952379697291e-06,
      "loss": 0.0147,
      "step": 1619900
    },
    {
      "epoch": 2.651034609165832,
      "grad_norm": 0.06917506456375122,
      "learning_rate": 4.728886487483774e-06,
      "loss": 0.0099,
      "step": 1619920
    },
    {
      "epoch": 2.6510673396044853,
      "grad_norm": 0.10005251318216324,
      "learning_rate": 4.728820595270257e-06,
      "loss": 0.0136,
      "step": 1619940
    },
    {
      "epoch": 2.651100070043139,
      "grad_norm": 0.48192664980888367,
      "learning_rate": 4.72875470305674e-06,
      "loss": 0.02,
      "step": 1619960
    },
    {
      "epoch": 2.651132800481792,
      "grad_norm": 0.07900618761777878,
      "learning_rate": 4.728688810843223e-06,
      "loss": 0.0208,
      "step": 1619980
    },
    {
      "epoch": 2.651165530920445,
      "grad_norm": 0.2981981933116913,
      "learning_rate": 4.728622918629706e-06,
      "loss": 0.0106,
      "step": 1620000
    },
    {
      "epoch": 2.6511982613590988,
      "grad_norm": 0.5178987979888916,
      "learning_rate": 4.728557026416189e-06,
      "loss": 0.0128,
      "step": 1620020
    },
    {
      "epoch": 2.6512309917977523,
      "grad_norm": 0.46565675735473633,
      "learning_rate": 4.728491134202672e-06,
      "loss": 0.0188,
      "step": 1620040
    },
    {
      "epoch": 2.6512637222364055,
      "grad_norm": 0.22809891402721405,
      "learning_rate": 4.728425241989155e-06,
      "loss": 0.0111,
      "step": 1620060
    },
    {
      "epoch": 2.6512964526750586,
      "grad_norm": 0.3389904201030731,
      "learning_rate": 4.7283593497756375e-06,
      "loss": 0.0157,
      "step": 1620080
    },
    {
      "epoch": 2.6513291831137122,
      "grad_norm": 0.6293578147888184,
      "learning_rate": 4.72829345756212e-06,
      "loss": 0.0161,
      "step": 1620100
    },
    {
      "epoch": 2.6513619135523654,
      "grad_norm": 0.46751779317855835,
      "learning_rate": 4.728227565348603e-06,
      "loss": 0.0131,
      "step": 1620120
    },
    {
      "epoch": 2.6513946439910185,
      "grad_norm": 0.24374079704284668,
      "learning_rate": 4.728161673135086e-06,
      "loss": 0.0147,
      "step": 1620140
    },
    {
      "epoch": 2.651427374429672,
      "grad_norm": 0.17433775961399078,
      "learning_rate": 4.7280957809215685e-06,
      "loss": 0.0133,
      "step": 1620160
    },
    {
      "epoch": 2.6514601048683253,
      "grad_norm": 1.2663798332214355,
      "learning_rate": 4.728029888708052e-06,
      "loss": 0.0139,
      "step": 1620180
    },
    {
      "epoch": 2.651492835306979,
      "grad_norm": 0.12453831732273102,
      "learning_rate": 4.727963996494535e-06,
      "loss": 0.0118,
      "step": 1620200
    },
    {
      "epoch": 2.651525565745632,
      "grad_norm": 0.4183315932750702,
      "learning_rate": 4.7278981042810176e-06,
      "loss": 0.0206,
      "step": 1620220
    },
    {
      "epoch": 2.6515582961842856,
      "grad_norm": 0.20085646212100983,
      "learning_rate": 4.7278322120675e-06,
      "loss": 0.0117,
      "step": 1620240
    },
    {
      "epoch": 2.6515910266229388,
      "grad_norm": 0.6108338236808777,
      "learning_rate": 4.727766319853983e-06,
      "loss": 0.0095,
      "step": 1620260
    },
    {
      "epoch": 2.651623757061592,
      "grad_norm": 0.27096661925315857,
      "learning_rate": 4.727700427640466e-06,
      "loss": 0.0122,
      "step": 1620280
    },
    {
      "epoch": 2.6516564875002455,
      "grad_norm": 0.12420874834060669,
      "learning_rate": 4.7276345354269485e-06,
      "loss": 0.0126,
      "step": 1620300
    },
    {
      "epoch": 2.6516892179388987,
      "grad_norm": 0.18217803537845612,
      "learning_rate": 4.727568643213432e-06,
      "loss": 0.0082,
      "step": 1620320
    },
    {
      "epoch": 2.6517219483775523,
      "grad_norm": 0.2613590359687805,
      "learning_rate": 4.727502750999915e-06,
      "loss": 0.019,
      "step": 1620340
    },
    {
      "epoch": 2.6517546788162054,
      "grad_norm": 0.1905272901058197,
      "learning_rate": 4.727436858786398e-06,
      "loss": 0.0148,
      "step": 1620360
    },
    {
      "epoch": 2.651787409254859,
      "grad_norm": 0.3894713222980499,
      "learning_rate": 4.72737096657288e-06,
      "loss": 0.0139,
      "step": 1620380
    },
    {
      "epoch": 2.651820139693512,
      "grad_norm": 0.11921291053295135,
      "learning_rate": 4.727305074359363e-06,
      "loss": 0.0098,
      "step": 1620400
    },
    {
      "epoch": 2.6518528701321653,
      "grad_norm": 0.2909429967403412,
      "learning_rate": 4.727239182145847e-06,
      "loss": 0.0138,
      "step": 1620420
    },
    {
      "epoch": 2.651885600570819,
      "grad_norm": 0.3142223358154297,
      "learning_rate": 4.727173289932329e-06,
      "loss": 0.0113,
      "step": 1620440
    },
    {
      "epoch": 2.651918331009472,
      "grad_norm": 0.3395601809024811,
      "learning_rate": 4.727107397718812e-06,
      "loss": 0.0117,
      "step": 1620460
    },
    {
      "epoch": 2.6519510614481256,
      "grad_norm": 0.20139312744140625,
      "learning_rate": 4.727041505505295e-06,
      "loss": 0.0088,
      "step": 1620480
    },
    {
      "epoch": 2.651983791886779,
      "grad_norm": 0.38870108127593994,
      "learning_rate": 4.726975613291778e-06,
      "loss": 0.0119,
      "step": 1620500
    },
    {
      "epoch": 2.6520165223254324,
      "grad_norm": 0.13028398156166077,
      "learning_rate": 4.72690972107826e-06,
      "loss": 0.0141,
      "step": 1620520
    },
    {
      "epoch": 2.6520492527640855,
      "grad_norm": 0.39196527004241943,
      "learning_rate": 4.726843828864743e-06,
      "loss": 0.0118,
      "step": 1620540
    },
    {
      "epoch": 2.6520819832027387,
      "grad_norm": 0.35984012484550476,
      "learning_rate": 4.726777936651226e-06,
      "loss": 0.0171,
      "step": 1620560
    },
    {
      "epoch": 2.6521147136413923,
      "grad_norm": 1.3355134725570679,
      "learning_rate": 4.726712044437709e-06,
      "loss": 0.0242,
      "step": 1620580
    },
    {
      "epoch": 2.6521474440800454,
      "grad_norm": 0.49069496989250183,
      "learning_rate": 4.726646152224192e-06,
      "loss": 0.0135,
      "step": 1620600
    },
    {
      "epoch": 2.652180174518699,
      "grad_norm": 1.2556885480880737,
      "learning_rate": 4.726580260010675e-06,
      "loss": 0.014,
      "step": 1620620
    },
    {
      "epoch": 2.652212904957352,
      "grad_norm": 0.05595153942704201,
      "learning_rate": 4.726514367797158e-06,
      "loss": 0.0146,
      "step": 1620640
    },
    {
      "epoch": 2.6522456353960058,
      "grad_norm": 0.31413719058036804,
      "learning_rate": 4.72644847558364e-06,
      "loss": 0.0106,
      "step": 1620660
    },
    {
      "epoch": 2.652278365834659,
      "grad_norm": 0.6905402541160583,
      "learning_rate": 4.726382583370124e-06,
      "loss": 0.0127,
      "step": 1620680
    },
    {
      "epoch": 2.652311096273312,
      "grad_norm": 0.30227527022361755,
      "learning_rate": 4.726316691156607e-06,
      "loss": 0.0125,
      "step": 1620700
    },
    {
      "epoch": 2.6523438267119657,
      "grad_norm": 0.37917497754096985,
      "learning_rate": 4.7262507989430894e-06,
      "loss": 0.0174,
      "step": 1620720
    },
    {
      "epoch": 2.652376557150619,
      "grad_norm": 0.22006377577781677,
      "learning_rate": 4.726184906729572e-06,
      "loss": 0.0129,
      "step": 1620740
    },
    {
      "epoch": 2.6524092875892724,
      "grad_norm": 0.2022656500339508,
      "learning_rate": 4.726119014516055e-06,
      "loss": 0.0114,
      "step": 1620760
    },
    {
      "epoch": 2.6524420180279256,
      "grad_norm": 0.22445639967918396,
      "learning_rate": 4.726053122302538e-06,
      "loss": 0.0085,
      "step": 1620780
    },
    {
      "epoch": 2.652474748466579,
      "grad_norm": 0.2842090129852295,
      "learning_rate": 4.725987230089021e-06,
      "loss": 0.0189,
      "step": 1620800
    },
    {
      "epoch": 2.6525074789052323,
      "grad_norm": 0.2698034346103668,
      "learning_rate": 4.725921337875504e-06,
      "loss": 0.0165,
      "step": 1620820
    },
    {
      "epoch": 2.6525402093438855,
      "grad_norm": 0.11702391505241394,
      "learning_rate": 4.725855445661987e-06,
      "loss": 0.0124,
      "step": 1620840
    },
    {
      "epoch": 2.652572939782539,
      "grad_norm": 0.22762902081012726,
      "learning_rate": 4.7257895534484695e-06,
      "loss": 0.0149,
      "step": 1620860
    },
    {
      "epoch": 2.652605670221192,
      "grad_norm": 0.09742359071969986,
      "learning_rate": 4.725723661234952e-06,
      "loss": 0.0118,
      "step": 1620880
    },
    {
      "epoch": 2.652638400659846,
      "grad_norm": 0.09000185132026672,
      "learning_rate": 4.725657769021435e-06,
      "loss": 0.0203,
      "step": 1620900
    },
    {
      "epoch": 2.652671131098499,
      "grad_norm": 0.23270992934703827,
      "learning_rate": 4.725591876807918e-06,
      "loss": 0.016,
      "step": 1620920
    },
    {
      "epoch": 2.6527038615371525,
      "grad_norm": 0.7118547558784485,
      "learning_rate": 4.7255259845944004e-06,
      "loss": 0.0124,
      "step": 1620940
    },
    {
      "epoch": 2.6527365919758057,
      "grad_norm": 0.5314921140670776,
      "learning_rate": 4.725460092380883e-06,
      "loss": 0.0132,
      "step": 1620960
    },
    {
      "epoch": 2.652769322414459,
      "grad_norm": 0.4147837460041046,
      "learning_rate": 4.725394200167367e-06,
      "loss": 0.0149,
      "step": 1620980
    },
    {
      "epoch": 2.6528020528531124,
      "grad_norm": 0.5773820281028748,
      "learning_rate": 4.7253283079538495e-06,
      "loss": 0.0151,
      "step": 1621000
    },
    {
      "epoch": 2.6528347832917656,
      "grad_norm": 0.19810548424720764,
      "learning_rate": 4.725262415740332e-06,
      "loss": 0.016,
      "step": 1621020
    },
    {
      "epoch": 2.652867513730419,
      "grad_norm": 0.6311516165733337,
      "learning_rate": 4.725196523526816e-06,
      "loss": 0.012,
      "step": 1621040
    },
    {
      "epoch": 2.6529002441690723,
      "grad_norm": 0.2778785526752472,
      "learning_rate": 4.7251306313132986e-06,
      "loss": 0.0168,
      "step": 1621060
    },
    {
      "epoch": 2.652932974607726,
      "grad_norm": 0.23283611238002777,
      "learning_rate": 4.725064739099781e-06,
      "loss": 0.0128,
      "step": 1621080
    },
    {
      "epoch": 2.652965705046379,
      "grad_norm": 0.27912893891334534,
      "learning_rate": 4.724998846886264e-06,
      "loss": 0.0126,
      "step": 1621100
    },
    {
      "epoch": 2.6529984354850322,
      "grad_norm": 0.39510107040405273,
      "learning_rate": 4.724932954672747e-06,
      "loss": 0.0124,
      "step": 1621120
    },
    {
      "epoch": 2.653031165923686,
      "grad_norm": 0.2981713116168976,
      "learning_rate": 4.7248670624592295e-06,
      "loss": 0.016,
      "step": 1621140
    },
    {
      "epoch": 2.653063896362339,
      "grad_norm": 0.9099607467651367,
      "learning_rate": 4.724801170245712e-06,
      "loss": 0.0229,
      "step": 1621160
    },
    {
      "epoch": 2.653096626800992,
      "grad_norm": 0.6505731344223022,
      "learning_rate": 4.724735278032195e-06,
      "loss": 0.0151,
      "step": 1621180
    },
    {
      "epoch": 2.6531293572396457,
      "grad_norm": 0.20826445519924164,
      "learning_rate": 4.724669385818679e-06,
      "loss": 0.0107,
      "step": 1621200
    },
    {
      "epoch": 2.6531620876782993,
      "grad_norm": 0.34780681133270264,
      "learning_rate": 4.724603493605161e-06,
      "loss": 0.0106,
      "step": 1621220
    },
    {
      "epoch": 2.6531948181169525,
      "grad_norm": 1.4361449480056763,
      "learning_rate": 4.724537601391644e-06,
      "loss": 0.0213,
      "step": 1621240
    },
    {
      "epoch": 2.6532275485556056,
      "grad_norm": 0.09300531446933746,
      "learning_rate": 4.724471709178127e-06,
      "loss": 0.0116,
      "step": 1621260
    },
    {
      "epoch": 2.653260278994259,
      "grad_norm": 0.7031634449958801,
      "learning_rate": 4.7244058169646095e-06,
      "loss": 0.0125,
      "step": 1621280
    },
    {
      "epoch": 2.6532930094329124,
      "grad_norm": 0.28800511360168457,
      "learning_rate": 4.724339924751092e-06,
      "loss": 0.0153,
      "step": 1621300
    },
    {
      "epoch": 2.6533257398715655,
      "grad_norm": 0.4034344553947449,
      "learning_rate": 4.724274032537575e-06,
      "loss": 0.0151,
      "step": 1621320
    },
    {
      "epoch": 2.653358470310219,
      "grad_norm": 0.622640073299408,
      "learning_rate": 4.724208140324058e-06,
      "loss": 0.0124,
      "step": 1621340
    },
    {
      "epoch": 2.6533912007488727,
      "grad_norm": 0.6776414513587952,
      "learning_rate": 4.7241422481105405e-06,
      "loss": 0.0148,
      "step": 1621360
    },
    {
      "epoch": 2.653423931187526,
      "grad_norm": 0.43143096566200256,
      "learning_rate": 4.724076355897024e-06,
      "loss": 0.012,
      "step": 1621380
    },
    {
      "epoch": 2.653456661626179,
      "grad_norm": 0.5025795698165894,
      "learning_rate": 4.724010463683507e-06,
      "loss": 0.0076,
      "step": 1621400
    },
    {
      "epoch": 2.6534893920648326,
      "grad_norm": 0.29780134558677673,
      "learning_rate": 4.7239445714699896e-06,
      "loss": 0.0145,
      "step": 1621420
    },
    {
      "epoch": 2.6535221225034857,
      "grad_norm": 0.1990821361541748,
      "learning_rate": 4.723878679256473e-06,
      "loss": 0.0166,
      "step": 1621440
    },
    {
      "epoch": 2.653554852942139,
      "grad_norm": 0.19493956863880157,
      "learning_rate": 4.723812787042956e-06,
      "loss": 0.0107,
      "step": 1621460
    },
    {
      "epoch": 2.6535875833807925,
      "grad_norm": 0.2614729106426239,
      "learning_rate": 4.723746894829439e-06,
      "loss": 0.011,
      "step": 1621480
    },
    {
      "epoch": 2.653620313819446,
      "grad_norm": 0.13618631660938263,
      "learning_rate": 4.723681002615921e-06,
      "loss": 0.0077,
      "step": 1621500
    },
    {
      "epoch": 2.6536530442580992,
      "grad_norm": 0.27921774983406067,
      "learning_rate": 4.723615110402404e-06,
      "loss": 0.0138,
      "step": 1621520
    },
    {
      "epoch": 2.6536857746967524,
      "grad_norm": 0.3200281262397766,
      "learning_rate": 4.723549218188887e-06,
      "loss": 0.017,
      "step": 1621540
    },
    {
      "epoch": 2.653718505135406,
      "grad_norm": 0.11975511163473129,
      "learning_rate": 4.72348332597537e-06,
      "loss": 0.0168,
      "step": 1621560
    },
    {
      "epoch": 2.653751235574059,
      "grad_norm": 0.6424769759178162,
      "learning_rate": 4.723417433761852e-06,
      "loss": 0.0132,
      "step": 1621580
    },
    {
      "epoch": 2.6537839660127123,
      "grad_norm": 0.26876023411750793,
      "learning_rate": 4.723351541548336e-06,
      "loss": 0.0164,
      "step": 1621600
    },
    {
      "epoch": 2.653816696451366,
      "grad_norm": 0.31752318143844604,
      "learning_rate": 4.723285649334819e-06,
      "loss": 0.0139,
      "step": 1621620
    },
    {
      "epoch": 2.653849426890019,
      "grad_norm": 0.02749963290989399,
      "learning_rate": 4.723219757121301e-06,
      "loss": 0.01,
      "step": 1621640
    },
    {
      "epoch": 2.6538821573286726,
      "grad_norm": 0.08577956259250641,
      "learning_rate": 4.723153864907784e-06,
      "loss": 0.0149,
      "step": 1621660
    },
    {
      "epoch": 2.6539148877673258,
      "grad_norm": 0.15601877868175507,
      "learning_rate": 4.723087972694267e-06,
      "loss": 0.0101,
      "step": 1621680
    },
    {
      "epoch": 2.6539476182059794,
      "grad_norm": 1.3944772481918335,
      "learning_rate": 4.72302208048075e-06,
      "loss": 0.0183,
      "step": 1621700
    },
    {
      "epoch": 2.6539803486446325,
      "grad_norm": 0.7417718172073364,
      "learning_rate": 4.722956188267233e-06,
      "loss": 0.0159,
      "step": 1621720
    },
    {
      "epoch": 2.6540130790832857,
      "grad_norm": 0.6442430019378662,
      "learning_rate": 4.722890296053716e-06,
      "loss": 0.0182,
      "step": 1621740
    },
    {
      "epoch": 2.6540458095219392,
      "grad_norm": 0.1701614260673523,
      "learning_rate": 4.722824403840199e-06,
      "loss": 0.0172,
      "step": 1621760
    },
    {
      "epoch": 2.6540785399605924,
      "grad_norm": 0.3275797367095947,
      "learning_rate": 4.7227585116266814e-06,
      "loss": 0.0108,
      "step": 1621780
    },
    {
      "epoch": 2.654111270399246,
      "grad_norm": 0.534603476524353,
      "learning_rate": 4.722692619413164e-06,
      "loss": 0.0174,
      "step": 1621800
    },
    {
      "epoch": 2.654144000837899,
      "grad_norm": 0.35543960332870483,
      "learning_rate": 4.722626727199647e-06,
      "loss": 0.0169,
      "step": 1621820
    },
    {
      "epoch": 2.6541767312765527,
      "grad_norm": 0.6934182643890381,
      "learning_rate": 4.7225608349861305e-06,
      "loss": 0.0139,
      "step": 1621840
    },
    {
      "epoch": 2.654209461715206,
      "grad_norm": 0.5689955353736877,
      "learning_rate": 4.722494942772613e-06,
      "loss": 0.0183,
      "step": 1621860
    },
    {
      "epoch": 2.654242192153859,
      "grad_norm": 1.561062216758728,
      "learning_rate": 4.722429050559096e-06,
      "loss": 0.015,
      "step": 1621880
    },
    {
      "epoch": 2.6542749225925126,
      "grad_norm": 0.28821152448654175,
      "learning_rate": 4.722363158345579e-06,
      "loss": 0.0117,
      "step": 1621900
    },
    {
      "epoch": 2.654307653031166,
      "grad_norm": 0.47506922483444214,
      "learning_rate": 4.7222972661320615e-06,
      "loss": 0.0086,
      "step": 1621920
    },
    {
      "epoch": 2.6543403834698194,
      "grad_norm": 0.5252192616462708,
      "learning_rate": 4.722231373918544e-06,
      "loss": 0.0084,
      "step": 1621940
    },
    {
      "epoch": 2.6543731139084725,
      "grad_norm": 0.3376180827617645,
      "learning_rate": 4.722165481705027e-06,
      "loss": 0.015,
      "step": 1621960
    },
    {
      "epoch": 2.654405844347126,
      "grad_norm": 0.3196803629398346,
      "learning_rate": 4.72209958949151e-06,
      "loss": 0.0145,
      "step": 1621980
    },
    {
      "epoch": 2.6544385747857793,
      "grad_norm": 0.6091244220733643,
      "learning_rate": 4.722033697277993e-06,
      "loss": 0.0205,
      "step": 1622000
    },
    {
      "epoch": 2.6544713052244324,
      "grad_norm": 0.15274089574813843,
      "learning_rate": 4.721967805064476e-06,
      "loss": 0.0239,
      "step": 1622020
    },
    {
      "epoch": 2.654504035663086,
      "grad_norm": 0.7909863591194153,
      "learning_rate": 4.721901912850959e-06,
      "loss": 0.0154,
      "step": 1622040
    },
    {
      "epoch": 2.654536766101739,
      "grad_norm": 0.318962961435318,
      "learning_rate": 4.7218360206374415e-06,
      "loss": 0.0148,
      "step": 1622060
    },
    {
      "epoch": 2.6545694965403928,
      "grad_norm": 0.5124344229698181,
      "learning_rate": 4.721770128423925e-06,
      "loss": 0.0195,
      "step": 1622080
    },
    {
      "epoch": 2.654602226979046,
      "grad_norm": 0.3227026164531708,
      "learning_rate": 4.721704236210408e-06,
      "loss": 0.0202,
      "step": 1622100
    },
    {
      "epoch": 2.6546349574176995,
      "grad_norm": 0.2038695216178894,
      "learning_rate": 4.7216383439968905e-06,
      "loss": 0.0119,
      "step": 1622120
    },
    {
      "epoch": 2.6546676878563527,
      "grad_norm": 0.28649649024009705,
      "learning_rate": 4.721572451783373e-06,
      "loss": 0.019,
      "step": 1622140
    },
    {
      "epoch": 2.654700418295006,
      "grad_norm": 0.15884961187839508,
      "learning_rate": 4.721506559569856e-06,
      "loss": 0.016,
      "step": 1622160
    },
    {
      "epoch": 2.6547331487336594,
      "grad_norm": 0.3406651020050049,
      "learning_rate": 4.721440667356339e-06,
      "loss": 0.017,
      "step": 1622180
    },
    {
      "epoch": 2.6547658791723125,
      "grad_norm": 0.18600785732269287,
      "learning_rate": 4.7213747751428215e-06,
      "loss": 0.0226,
      "step": 1622200
    },
    {
      "epoch": 2.654798609610966,
      "grad_norm": 0.5584917068481445,
      "learning_rate": 4.721308882929305e-06,
      "loss": 0.021,
      "step": 1622220
    },
    {
      "epoch": 2.6548313400496193,
      "grad_norm": 0.22511902451515198,
      "learning_rate": 4.721242990715788e-06,
      "loss": 0.0158,
      "step": 1622240
    },
    {
      "epoch": 2.654864070488273,
      "grad_norm": 0.20637939870357513,
      "learning_rate": 4.7211770985022706e-06,
      "loss": 0.0138,
      "step": 1622260
    },
    {
      "epoch": 2.654896800926926,
      "grad_norm": 0.15233390033245087,
      "learning_rate": 4.721111206288753e-06,
      "loss": 0.0093,
      "step": 1622280
    },
    {
      "epoch": 2.654929531365579,
      "grad_norm": 0.8127347230911255,
      "learning_rate": 4.721045314075236e-06,
      "loss": 0.0166,
      "step": 1622300
    },
    {
      "epoch": 2.654962261804233,
      "grad_norm": 0.28272348642349243,
      "learning_rate": 4.720979421861719e-06,
      "loss": 0.0108,
      "step": 1622320
    },
    {
      "epoch": 2.654994992242886,
      "grad_norm": 0.5166755318641663,
      "learning_rate": 4.7209135296482015e-06,
      "loss": 0.0189,
      "step": 1622340
    },
    {
      "epoch": 2.6550277226815395,
      "grad_norm": 0.0958891436457634,
      "learning_rate": 4.720847637434684e-06,
      "loss": 0.0109,
      "step": 1622360
    },
    {
      "epoch": 2.6550604531201927,
      "grad_norm": 0.3038319945335388,
      "learning_rate": 4.720781745221167e-06,
      "loss": 0.0128,
      "step": 1622380
    },
    {
      "epoch": 2.6550931835588463,
      "grad_norm": 0.7866619825363159,
      "learning_rate": 4.720715853007651e-06,
      "loss": 0.017,
      "step": 1622400
    },
    {
      "epoch": 2.6551259139974994,
      "grad_norm": 0.19037853181362152,
      "learning_rate": 4.720649960794133e-06,
      "loss": 0.013,
      "step": 1622420
    },
    {
      "epoch": 2.6551586444361526,
      "grad_norm": 0.35207805037498474,
      "learning_rate": 4.720584068580616e-06,
      "loss": 0.0124,
      "step": 1622440
    },
    {
      "epoch": 2.655191374874806,
      "grad_norm": 0.9722366333007812,
      "learning_rate": 4.7205181763671e-06,
      "loss": 0.0154,
      "step": 1622460
    },
    {
      "epoch": 2.6552241053134593,
      "grad_norm": 0.20936667919158936,
      "learning_rate": 4.720452284153582e-06,
      "loss": 0.018,
      "step": 1622480
    },
    {
      "epoch": 2.655256835752113,
      "grad_norm": 0.36019253730773926,
      "learning_rate": 4.720386391940065e-06,
      "loss": 0.0172,
      "step": 1622500
    },
    {
      "epoch": 2.655289566190766,
      "grad_norm": 0.32816457748413086,
      "learning_rate": 4.720320499726548e-06,
      "loss": 0.0114,
      "step": 1622520
    },
    {
      "epoch": 2.6553222966294197,
      "grad_norm": 0.050020571798086166,
      "learning_rate": 4.720254607513031e-06,
      "loss": 0.0103,
      "step": 1622540
    },
    {
      "epoch": 2.655355027068073,
      "grad_norm": 0.30117806792259216,
      "learning_rate": 4.720188715299513e-06,
      "loss": 0.0141,
      "step": 1622560
    },
    {
      "epoch": 2.655387757506726,
      "grad_norm": 1.0072818994522095,
      "learning_rate": 4.720122823085996e-06,
      "loss": 0.0207,
      "step": 1622580
    },
    {
      "epoch": 2.6554204879453795,
      "grad_norm": 0.36297646164894104,
      "learning_rate": 4.720056930872479e-06,
      "loss": 0.0196,
      "step": 1622600
    },
    {
      "epoch": 2.6554532183840327,
      "grad_norm": 0.23224109411239624,
      "learning_rate": 4.7199910386589624e-06,
      "loss": 0.0178,
      "step": 1622620
    },
    {
      "epoch": 2.655485948822686,
      "grad_norm": 1.0151528120040894,
      "learning_rate": 4.719925146445445e-06,
      "loss": 0.0165,
      "step": 1622640
    },
    {
      "epoch": 2.6555186792613394,
      "grad_norm": 0.6746690273284912,
      "learning_rate": 4.719859254231928e-06,
      "loss": 0.0191,
      "step": 1622660
    },
    {
      "epoch": 2.655551409699993,
      "grad_norm": 1.0863800048828125,
      "learning_rate": 4.719793362018411e-06,
      "loss": 0.018,
      "step": 1622680
    },
    {
      "epoch": 2.655584140138646,
      "grad_norm": 0.10894085466861725,
      "learning_rate": 4.719727469804893e-06,
      "loss": 0.0135,
      "step": 1622700
    },
    {
      "epoch": 2.6556168705772993,
      "grad_norm": 0.29242679476737976,
      "learning_rate": 4.719661577591376e-06,
      "loss": 0.016,
      "step": 1622720
    },
    {
      "epoch": 2.655649601015953,
      "grad_norm": 0.8479276895523071,
      "learning_rate": 4.719595685377859e-06,
      "loss": 0.021,
      "step": 1622740
    },
    {
      "epoch": 2.655682331454606,
      "grad_norm": 0.4377777874469757,
      "learning_rate": 4.719529793164342e-06,
      "loss": 0.0149,
      "step": 1622760
    },
    {
      "epoch": 2.6557150618932592,
      "grad_norm": 0.5286900997161865,
      "learning_rate": 4.719463900950825e-06,
      "loss": 0.0112,
      "step": 1622780
    },
    {
      "epoch": 2.655747792331913,
      "grad_norm": 0.16554133594036102,
      "learning_rate": 4.719398008737308e-06,
      "loss": 0.011,
      "step": 1622800
    },
    {
      "epoch": 2.6557805227705664,
      "grad_norm": 0.3652116656303406,
      "learning_rate": 4.719332116523791e-06,
      "loss": 0.0165,
      "step": 1622820
    },
    {
      "epoch": 2.6558132532092196,
      "grad_norm": 0.49983182549476624,
      "learning_rate": 4.719266224310273e-06,
      "loss": 0.0158,
      "step": 1622840
    },
    {
      "epoch": 2.6558459836478727,
      "grad_norm": 0.30741003155708313,
      "learning_rate": 4.719200332096757e-06,
      "loss": 0.0177,
      "step": 1622860
    },
    {
      "epoch": 2.6558787140865263,
      "grad_norm": 0.5158022046089172,
      "learning_rate": 4.71913443988324e-06,
      "loss": 0.0137,
      "step": 1622880
    },
    {
      "epoch": 2.6559114445251795,
      "grad_norm": 0.17675669491291046,
      "learning_rate": 4.7190685476697225e-06,
      "loss": 0.0146,
      "step": 1622900
    },
    {
      "epoch": 2.6559441749638326,
      "grad_norm": 0.5794386267662048,
      "learning_rate": 4.719002655456205e-06,
      "loss": 0.0115,
      "step": 1622920
    },
    {
      "epoch": 2.655976905402486,
      "grad_norm": 2.5050246715545654,
      "learning_rate": 4.718936763242688e-06,
      "loss": 0.0127,
      "step": 1622940
    },
    {
      "epoch": 2.65600963584114,
      "grad_norm": 0.26195529103279114,
      "learning_rate": 4.718870871029171e-06,
      "loss": 0.016,
      "step": 1622960
    },
    {
      "epoch": 2.656042366279793,
      "grad_norm": 1.1732914447784424,
      "learning_rate": 4.7188049788156534e-06,
      "loss": 0.0183,
      "step": 1622980
    },
    {
      "epoch": 2.656075096718446,
      "grad_norm": 10.993425369262695,
      "learning_rate": 4.718739086602136e-06,
      "loss": 0.0155,
      "step": 1623000
    },
    {
      "epoch": 2.6561078271570997,
      "grad_norm": 0.3380736708641052,
      "learning_rate": 4.71867319438862e-06,
      "loss": 0.0123,
      "step": 1623020
    },
    {
      "epoch": 2.656140557595753,
      "grad_norm": 0.11884811520576477,
      "learning_rate": 4.7186073021751025e-06,
      "loss": 0.018,
      "step": 1623040
    },
    {
      "epoch": 2.656173288034406,
      "grad_norm": 0.2454155534505844,
      "learning_rate": 4.718541409961585e-06,
      "loss": 0.0165,
      "step": 1623060
    },
    {
      "epoch": 2.6562060184730596,
      "grad_norm": 0.16671998798847198,
      "learning_rate": 4.718475517748068e-06,
      "loss": 0.0122,
      "step": 1623080
    },
    {
      "epoch": 2.656238748911713,
      "grad_norm": 0.33816012740135193,
      "learning_rate": 4.718409625534551e-06,
      "loss": 0.0141,
      "step": 1623100
    },
    {
      "epoch": 2.6562714793503663,
      "grad_norm": 0.12386970221996307,
      "learning_rate": 4.7183437333210335e-06,
      "loss": 0.0134,
      "step": 1623120
    },
    {
      "epoch": 2.6563042097890195,
      "grad_norm": 0.2680530548095703,
      "learning_rate": 4.718277841107517e-06,
      "loss": 0.0234,
      "step": 1623140
    },
    {
      "epoch": 2.656336940227673,
      "grad_norm": 0.2599947452545166,
      "learning_rate": 4.718211948894e-06,
      "loss": 0.0144,
      "step": 1623160
    },
    {
      "epoch": 2.6563696706663262,
      "grad_norm": 0.21399037539958954,
      "learning_rate": 4.7181460566804825e-06,
      "loss": 0.0153,
      "step": 1623180
    },
    {
      "epoch": 2.6564024011049794,
      "grad_norm": 0.10929255932569504,
      "learning_rate": 4.718080164466965e-06,
      "loss": 0.0125,
      "step": 1623200
    },
    {
      "epoch": 2.656435131543633,
      "grad_norm": 0.13265517354011536,
      "learning_rate": 4.718014272253448e-06,
      "loss": 0.0207,
      "step": 1623220
    },
    {
      "epoch": 2.656467861982286,
      "grad_norm": 0.14014066755771637,
      "learning_rate": 4.717948380039931e-06,
      "loss": 0.0117,
      "step": 1623240
    },
    {
      "epoch": 2.6565005924209397,
      "grad_norm": 0.27694061398506165,
      "learning_rate": 4.717882487826414e-06,
      "loss": 0.0212,
      "step": 1623260
    },
    {
      "epoch": 2.656533322859593,
      "grad_norm": 0.42612800002098083,
      "learning_rate": 4.717816595612897e-06,
      "loss": 0.0134,
      "step": 1623280
    },
    {
      "epoch": 2.6565660532982465,
      "grad_norm": 0.3605787456035614,
      "learning_rate": 4.71775070339938e-06,
      "loss": 0.0121,
      "step": 1623300
    },
    {
      "epoch": 2.6565987837368996,
      "grad_norm": 0.37123429775238037,
      "learning_rate": 4.7176848111858626e-06,
      "loss": 0.0108,
      "step": 1623320
    },
    {
      "epoch": 2.6566315141755528,
      "grad_norm": 0.17766454815864563,
      "learning_rate": 4.717618918972345e-06,
      "loss": 0.0135,
      "step": 1623340
    },
    {
      "epoch": 2.6566642446142064,
      "grad_norm": 0.588351309299469,
      "learning_rate": 4.717553026758828e-06,
      "loss": 0.0127,
      "step": 1623360
    },
    {
      "epoch": 2.6566969750528595,
      "grad_norm": 0.34730902314186096,
      "learning_rate": 4.717487134545311e-06,
      "loss": 0.0136,
      "step": 1623380
    },
    {
      "epoch": 2.656729705491513,
      "grad_norm": 0.2730063498020172,
      "learning_rate": 4.7174212423317935e-06,
      "loss": 0.0146,
      "step": 1623400
    },
    {
      "epoch": 2.6567624359301663,
      "grad_norm": 0.6796473860740662,
      "learning_rate": 4.717355350118277e-06,
      "loss": 0.0109,
      "step": 1623420
    },
    {
      "epoch": 2.65679516636882,
      "grad_norm": 0.27349206805229187,
      "learning_rate": 4.71728945790476e-06,
      "loss": 0.0198,
      "step": 1623440
    },
    {
      "epoch": 2.656827896807473,
      "grad_norm": 0.4321068823337555,
      "learning_rate": 4.717223565691243e-06,
      "loss": 0.0158,
      "step": 1623460
    },
    {
      "epoch": 2.656860627246126,
      "grad_norm": 0.2832641899585724,
      "learning_rate": 4.717157673477726e-06,
      "loss": 0.0162,
      "step": 1623480
    },
    {
      "epoch": 2.6568933576847797,
      "grad_norm": 0.3010210692882538,
      "learning_rate": 4.717091781264209e-06,
      "loss": 0.0148,
      "step": 1623500
    },
    {
      "epoch": 2.656926088123433,
      "grad_norm": 0.8846948742866516,
      "learning_rate": 4.717025889050692e-06,
      "loss": 0.0213,
      "step": 1623520
    },
    {
      "epoch": 2.6569588185620865,
      "grad_norm": 0.5388503074645996,
      "learning_rate": 4.716959996837174e-06,
      "loss": 0.0178,
      "step": 1623540
    },
    {
      "epoch": 2.6569915490007396,
      "grad_norm": 0.3934527039527893,
      "learning_rate": 4.716894104623657e-06,
      "loss": 0.0144,
      "step": 1623560
    },
    {
      "epoch": 2.6570242794393932,
      "grad_norm": 0.1273823380470276,
      "learning_rate": 4.71682821241014e-06,
      "loss": 0.0123,
      "step": 1623580
    },
    {
      "epoch": 2.6570570098780464,
      "grad_norm": 0.594521701335907,
      "learning_rate": 4.716762320196623e-06,
      "loss": 0.0096,
      "step": 1623600
    },
    {
      "epoch": 2.6570897403166995,
      "grad_norm": 0.16450704634189606,
      "learning_rate": 4.716696427983105e-06,
      "loss": 0.0166,
      "step": 1623620
    },
    {
      "epoch": 2.657122470755353,
      "grad_norm": 0.1841309517621994,
      "learning_rate": 4.716630535769589e-06,
      "loss": 0.0166,
      "step": 1623640
    },
    {
      "epoch": 2.6571552011940063,
      "grad_norm": 0.14799313247203827,
      "learning_rate": 4.716564643556072e-06,
      "loss": 0.0117,
      "step": 1623660
    },
    {
      "epoch": 2.65718793163266,
      "grad_norm": 0.35963624715805054,
      "learning_rate": 4.716498751342554e-06,
      "loss": 0.0093,
      "step": 1623680
    },
    {
      "epoch": 2.657220662071313,
      "grad_norm": 0.3477712571620941,
      "learning_rate": 4.716432859129037e-06,
      "loss": 0.0166,
      "step": 1623700
    },
    {
      "epoch": 2.6572533925099666,
      "grad_norm": 0.6075891852378845,
      "learning_rate": 4.71636696691552e-06,
      "loss": 0.0185,
      "step": 1623720
    },
    {
      "epoch": 2.6572861229486198,
      "grad_norm": 0.32784444093704224,
      "learning_rate": 4.716301074702003e-06,
      "loss": 0.0097,
      "step": 1623740
    },
    {
      "epoch": 2.657318853387273,
      "grad_norm": 0.4035257399082184,
      "learning_rate": 4.716235182488485e-06,
      "loss": 0.0118,
      "step": 1623760
    },
    {
      "epoch": 2.6573515838259265,
      "grad_norm": 0.11951223760843277,
      "learning_rate": 4.716169290274968e-06,
      "loss": 0.0126,
      "step": 1623780
    },
    {
      "epoch": 2.6573843142645797,
      "grad_norm": 0.4459577798843384,
      "learning_rate": 4.716103398061451e-06,
      "loss": 0.0176,
      "step": 1623800
    },
    {
      "epoch": 2.6574170447032333,
      "grad_norm": 0.20768818259239197,
      "learning_rate": 4.7160375058479344e-06,
      "loss": 0.0188,
      "step": 1623820
    },
    {
      "epoch": 2.6574497751418864,
      "grad_norm": 0.18469133973121643,
      "learning_rate": 4.715971613634417e-06,
      "loss": 0.0191,
      "step": 1623840
    },
    {
      "epoch": 2.65748250558054,
      "grad_norm": 0.3214644491672516,
      "learning_rate": 4.7159057214209e-06,
      "loss": 0.0149,
      "step": 1623860
    },
    {
      "epoch": 2.657515236019193,
      "grad_norm": 0.40604329109191895,
      "learning_rate": 4.7158398292073835e-06,
      "loss": 0.013,
      "step": 1623880
    },
    {
      "epoch": 2.6575479664578463,
      "grad_norm": 0.7663156390190125,
      "learning_rate": 4.715773936993866e-06,
      "loss": 0.0201,
      "step": 1623900
    },
    {
      "epoch": 2.6575806968965,
      "grad_norm": 0.3415672183036804,
      "learning_rate": 4.715708044780349e-06,
      "loss": 0.019,
      "step": 1623920
    },
    {
      "epoch": 2.657613427335153,
      "grad_norm": 0.29638370871543884,
      "learning_rate": 4.715642152566832e-06,
      "loss": 0.0179,
      "step": 1623940
    },
    {
      "epoch": 2.6576461577738066,
      "grad_norm": 0.8564144968986511,
      "learning_rate": 4.7155762603533145e-06,
      "loss": 0.0139,
      "step": 1623960
    },
    {
      "epoch": 2.65767888821246,
      "grad_norm": 2.2813360691070557,
      "learning_rate": 4.715510368139797e-06,
      "loss": 0.0139,
      "step": 1623980
    },
    {
      "epoch": 2.6577116186511134,
      "grad_norm": 0.5366864800453186,
      "learning_rate": 4.71544447592628e-06,
      "loss": 0.0145,
      "step": 1624000
    },
    {
      "epoch": 2.6577443490897665,
      "grad_norm": 1.7785815000534058,
      "learning_rate": 4.715378583712763e-06,
      "loss": 0.0157,
      "step": 1624020
    },
    {
      "epoch": 2.6577770795284197,
      "grad_norm": 0.21006007492542267,
      "learning_rate": 4.715312691499246e-06,
      "loss": 0.0089,
      "step": 1624040
    },
    {
      "epoch": 2.6578098099670733,
      "grad_norm": 0.22955147922039032,
      "learning_rate": 4.715246799285729e-06,
      "loss": 0.0168,
      "step": 1624060
    },
    {
      "epoch": 2.6578425404057264,
      "grad_norm": 0.545656144618988,
      "learning_rate": 4.715180907072212e-06,
      "loss": 0.0145,
      "step": 1624080
    },
    {
      "epoch": 2.65787527084438,
      "grad_norm": 0.2503992021083832,
      "learning_rate": 4.7151150148586945e-06,
      "loss": 0.014,
      "step": 1624100
    },
    {
      "epoch": 2.657908001283033,
      "grad_norm": 0.11254019290208817,
      "learning_rate": 4.715049122645177e-06,
      "loss": 0.0094,
      "step": 1624120
    },
    {
      "epoch": 2.6579407317216868,
      "grad_norm": 0.1853552758693695,
      "learning_rate": 4.71498323043166e-06,
      "loss": 0.0139,
      "step": 1624140
    },
    {
      "epoch": 2.65797346216034,
      "grad_norm": 0.2512907385826111,
      "learning_rate": 4.714917338218143e-06,
      "loss": 0.0158,
      "step": 1624160
    },
    {
      "epoch": 2.658006192598993,
      "grad_norm": 0.26319578289985657,
      "learning_rate": 4.7148514460046254e-06,
      "loss": 0.0164,
      "step": 1624180
    },
    {
      "epoch": 2.6580389230376467,
      "grad_norm": 0.14140042662620544,
      "learning_rate": 4.714785553791109e-06,
      "loss": 0.0159,
      "step": 1624200
    },
    {
      "epoch": 2.6580716534763,
      "grad_norm": 0.29864490032196045,
      "learning_rate": 4.714719661577592e-06,
      "loss": 0.012,
      "step": 1624220
    },
    {
      "epoch": 2.658104383914953,
      "grad_norm": 0.052318766713142395,
      "learning_rate": 4.7146537693640745e-06,
      "loss": 0.0124,
      "step": 1624240
    },
    {
      "epoch": 2.6581371143536066,
      "grad_norm": 1.0473196506500244,
      "learning_rate": 4.714587877150557e-06,
      "loss": 0.0134,
      "step": 1624260
    },
    {
      "epoch": 2.65816984479226,
      "grad_norm": 0.23389770090579987,
      "learning_rate": 4.714521984937041e-06,
      "loss": 0.0153,
      "step": 1624280
    },
    {
      "epoch": 2.6582025752309133,
      "grad_norm": 0.83820641040802,
      "learning_rate": 4.7144560927235236e-06,
      "loss": 0.015,
      "step": 1624300
    },
    {
      "epoch": 2.6582353056695665,
      "grad_norm": 0.27705156803131104,
      "learning_rate": 4.714390200510006e-06,
      "loss": 0.0069,
      "step": 1624320
    },
    {
      "epoch": 2.65826803610822,
      "grad_norm": 0.4161039888858795,
      "learning_rate": 4.714324308296489e-06,
      "loss": 0.0132,
      "step": 1624340
    },
    {
      "epoch": 2.658300766546873,
      "grad_norm": 0.4258638918399811,
      "learning_rate": 4.714258416082972e-06,
      "loss": 0.0178,
      "step": 1624360
    },
    {
      "epoch": 2.6583334969855263,
      "grad_norm": 0.03156731277704239,
      "learning_rate": 4.7141925238694545e-06,
      "loss": 0.0137,
      "step": 1624380
    },
    {
      "epoch": 2.65836622742418,
      "grad_norm": 0.4050704538822174,
      "learning_rate": 4.714126631655937e-06,
      "loss": 0.0221,
      "step": 1624400
    },
    {
      "epoch": 2.6583989578628335,
      "grad_norm": 0.911319375038147,
      "learning_rate": 4.71406073944242e-06,
      "loss": 0.0233,
      "step": 1624420
    },
    {
      "epoch": 2.6584316883014867,
      "grad_norm": 0.4452662765979767,
      "learning_rate": 4.713994847228904e-06,
      "loss": 0.0164,
      "step": 1624440
    },
    {
      "epoch": 2.65846441874014,
      "grad_norm": 0.7263277769088745,
      "learning_rate": 4.713928955015386e-06,
      "loss": 0.0152,
      "step": 1624460
    },
    {
      "epoch": 2.6584971491787934,
      "grad_norm": 0.7993529438972473,
      "learning_rate": 4.713863062801869e-06,
      "loss": 0.0096,
      "step": 1624480
    },
    {
      "epoch": 2.6585298796174466,
      "grad_norm": 0.20285707712173462,
      "learning_rate": 4.713797170588352e-06,
      "loss": 0.0148,
      "step": 1624500
    },
    {
      "epoch": 2.6585626100560997,
      "grad_norm": 0.2750069797039032,
      "learning_rate": 4.7137312783748346e-06,
      "loss": 0.0143,
      "step": 1624520
    },
    {
      "epoch": 2.6585953404947533,
      "grad_norm": 0.6190347671508789,
      "learning_rate": 4.713665386161318e-06,
      "loss": 0.0221,
      "step": 1624540
    },
    {
      "epoch": 2.658628070933407,
      "grad_norm": 0.4544205963611603,
      "learning_rate": 4.713599493947801e-06,
      "loss": 0.0159,
      "step": 1624560
    },
    {
      "epoch": 2.65866080137206,
      "grad_norm": 0.7416742444038391,
      "learning_rate": 4.713533601734284e-06,
      "loss": 0.0119,
      "step": 1624580
    },
    {
      "epoch": 2.658693531810713,
      "grad_norm": 0.7396405339241028,
      "learning_rate": 4.713467709520766e-06,
      "loss": 0.0188,
      "step": 1624600
    },
    {
      "epoch": 2.658726262249367,
      "grad_norm": 0.07921214401721954,
      "learning_rate": 4.713401817307249e-06,
      "loss": 0.018,
      "step": 1624620
    },
    {
      "epoch": 2.65875899268802,
      "grad_norm": 0.2585615813732147,
      "learning_rate": 4.713335925093732e-06,
      "loss": 0.0155,
      "step": 1624640
    },
    {
      "epoch": 2.658791723126673,
      "grad_norm": 0.32109975814819336,
      "learning_rate": 4.713270032880215e-06,
      "loss": 0.0153,
      "step": 1624660
    },
    {
      "epoch": 2.6588244535653267,
      "grad_norm": 0.21916145086288452,
      "learning_rate": 4.713204140666698e-06,
      "loss": 0.0129,
      "step": 1624680
    },
    {
      "epoch": 2.65885718400398,
      "grad_norm": 0.21111711859703064,
      "learning_rate": 4.713138248453181e-06,
      "loss": 0.0132,
      "step": 1624700
    },
    {
      "epoch": 2.6588899144426335,
      "grad_norm": 0.6875109672546387,
      "learning_rate": 4.713072356239664e-06,
      "loss": 0.0123,
      "step": 1624720
    },
    {
      "epoch": 2.6589226448812866,
      "grad_norm": 0.5141668319702148,
      "learning_rate": 4.713006464026146e-06,
      "loss": 0.0142,
      "step": 1624740
    },
    {
      "epoch": 2.65895537531994,
      "grad_norm": 1.1480693817138672,
      "learning_rate": 4.712940571812629e-06,
      "loss": 0.0156,
      "step": 1624760
    },
    {
      "epoch": 2.6589881057585933,
      "grad_norm": 0.6495369076728821,
      "learning_rate": 4.712874679599112e-06,
      "loss": 0.0136,
      "step": 1624780
    },
    {
      "epoch": 2.6590208361972465,
      "grad_norm": 0.19569379091262817,
      "learning_rate": 4.712808787385595e-06,
      "loss": 0.0148,
      "step": 1624800
    },
    {
      "epoch": 2.6590535666359,
      "grad_norm": 0.8450332283973694,
      "learning_rate": 4.712742895172077e-06,
      "loss": 0.0168,
      "step": 1624820
    },
    {
      "epoch": 2.6590862970745532,
      "grad_norm": 0.7751008868217468,
      "learning_rate": 4.712677002958561e-06,
      "loss": 0.0187,
      "step": 1624840
    },
    {
      "epoch": 2.659119027513207,
      "grad_norm": 0.13613256812095642,
      "learning_rate": 4.712611110745044e-06,
      "loss": 0.0102,
      "step": 1624860
    },
    {
      "epoch": 2.65915175795186,
      "grad_norm": 0.5959937572479248,
      "learning_rate": 4.712545218531526e-06,
      "loss": 0.0134,
      "step": 1624880
    },
    {
      "epoch": 2.6591844883905136,
      "grad_norm": 0.20123550295829773,
      "learning_rate": 4.71247932631801e-06,
      "loss": 0.0127,
      "step": 1624900
    },
    {
      "epoch": 2.6592172188291667,
      "grad_norm": 0.14948493242263794,
      "learning_rate": 4.712413434104493e-06,
      "loss": 0.0105,
      "step": 1624920
    },
    {
      "epoch": 2.65924994926782,
      "grad_norm": 0.30572906136512756,
      "learning_rate": 4.7123475418909755e-06,
      "loss": 0.0165,
      "step": 1624940
    },
    {
      "epoch": 2.6592826797064735,
      "grad_norm": 0.145099475979805,
      "learning_rate": 4.712281649677458e-06,
      "loss": 0.0215,
      "step": 1624960
    },
    {
      "epoch": 2.6593154101451266,
      "grad_norm": 0.717877209186554,
      "learning_rate": 4.712215757463941e-06,
      "loss": 0.0106,
      "step": 1624980
    },
    {
      "epoch": 2.65934814058378,
      "grad_norm": 0.8639935851097107,
      "learning_rate": 4.712149865250424e-06,
      "loss": 0.0127,
      "step": 1625000
    },
    {
      "epoch": 2.6593808710224334,
      "grad_norm": 0.3067719042301178,
      "learning_rate": 4.7120839730369064e-06,
      "loss": 0.0179,
      "step": 1625020
    },
    {
      "epoch": 2.659413601461087,
      "grad_norm": 0.1897406280040741,
      "learning_rate": 4.712018080823389e-06,
      "loss": 0.014,
      "step": 1625040
    },
    {
      "epoch": 2.65944633189974,
      "grad_norm": 0.1839669942855835,
      "learning_rate": 4.711952188609873e-06,
      "loss": 0.0126,
      "step": 1625060
    },
    {
      "epoch": 2.6594790623383933,
      "grad_norm": 0.3752739727497101,
      "learning_rate": 4.7118862963963555e-06,
      "loss": 0.019,
      "step": 1625080
    },
    {
      "epoch": 2.659511792777047,
      "grad_norm": 0.8551315069198608,
      "learning_rate": 4.711820404182838e-06,
      "loss": 0.0165,
      "step": 1625100
    },
    {
      "epoch": 2.6595445232157,
      "grad_norm": 0.3610025644302368,
      "learning_rate": 4.711754511969321e-06,
      "loss": 0.0114,
      "step": 1625120
    },
    {
      "epoch": 2.6595772536543536,
      "grad_norm": 0.30814602971076965,
      "learning_rate": 4.711688619755804e-06,
      "loss": 0.016,
      "step": 1625140
    },
    {
      "epoch": 2.6596099840930068,
      "grad_norm": 0.18643677234649658,
      "learning_rate": 4.7116227275422865e-06,
      "loss": 0.0188,
      "step": 1625160
    },
    {
      "epoch": 2.6596427145316603,
      "grad_norm": 0.5418510437011719,
      "learning_rate": 4.711556835328769e-06,
      "loss": 0.0122,
      "step": 1625180
    },
    {
      "epoch": 2.6596754449703135,
      "grad_norm": 0.27852320671081543,
      "learning_rate": 4.711490943115252e-06,
      "loss": 0.0239,
      "step": 1625200
    },
    {
      "epoch": 2.6597081754089666,
      "grad_norm": 0.6425778269767761,
      "learning_rate": 4.711425050901735e-06,
      "loss": 0.0123,
      "step": 1625220
    },
    {
      "epoch": 2.6597409058476202,
      "grad_norm": 0.31458568572998047,
      "learning_rate": 4.711359158688218e-06,
      "loss": 0.0128,
      "step": 1625240
    },
    {
      "epoch": 2.6597736362862734,
      "grad_norm": 0.2127537727355957,
      "learning_rate": 4.711293266474701e-06,
      "loss": 0.0147,
      "step": 1625260
    },
    {
      "epoch": 2.659806366724927,
      "grad_norm": 0.3067755699157715,
      "learning_rate": 4.711227374261184e-06,
      "loss": 0.0196,
      "step": 1625280
    },
    {
      "epoch": 2.65983909716358,
      "grad_norm": 0.26816993951797485,
      "learning_rate": 4.711161482047667e-06,
      "loss": 0.0148,
      "step": 1625300
    },
    {
      "epoch": 2.6598718276022337,
      "grad_norm": 0.4980987012386322,
      "learning_rate": 4.71109558983415e-06,
      "loss": 0.0132,
      "step": 1625320
    },
    {
      "epoch": 2.659904558040887,
      "grad_norm": 0.35386696457862854,
      "learning_rate": 4.711029697620633e-06,
      "loss": 0.0151,
      "step": 1625340
    },
    {
      "epoch": 2.65993728847954,
      "grad_norm": 0.42598679661750793,
      "learning_rate": 4.7109638054071156e-06,
      "loss": 0.0163,
      "step": 1625360
    },
    {
      "epoch": 2.6599700189181936,
      "grad_norm": 0.1303391009569168,
      "learning_rate": 4.710897913193598e-06,
      "loss": 0.0143,
      "step": 1625380
    },
    {
      "epoch": 2.6600027493568468,
      "grad_norm": 1.3679360151290894,
      "learning_rate": 4.710832020980081e-06,
      "loss": 0.0143,
      "step": 1625400
    },
    {
      "epoch": 2.6600354797955004,
      "grad_norm": 0.343304842710495,
      "learning_rate": 4.710766128766564e-06,
      "loss": 0.0124,
      "step": 1625420
    },
    {
      "epoch": 2.6600682102341535,
      "grad_norm": 0.2927360534667969,
      "learning_rate": 4.7107002365530465e-06,
      "loss": 0.0104,
      "step": 1625440
    },
    {
      "epoch": 2.660100940672807,
      "grad_norm": 0.14468525350093842,
      "learning_rate": 4.71063434433953e-06,
      "loss": 0.0206,
      "step": 1625460
    },
    {
      "epoch": 2.6601336711114603,
      "grad_norm": 0.1707705408334732,
      "learning_rate": 4.710568452126013e-06,
      "loss": 0.0159,
      "step": 1625480
    },
    {
      "epoch": 2.6601664015501134,
      "grad_norm": 0.712656557559967,
      "learning_rate": 4.710502559912496e-06,
      "loss": 0.0133,
      "step": 1625500
    },
    {
      "epoch": 2.660199131988767,
      "grad_norm": 0.6446670293807983,
      "learning_rate": 4.710436667698978e-06,
      "loss": 0.0139,
      "step": 1625520
    },
    {
      "epoch": 2.66023186242742,
      "grad_norm": 0.22893363237380981,
      "learning_rate": 4.710370775485461e-06,
      "loss": 0.0133,
      "step": 1625540
    },
    {
      "epoch": 2.6602645928660738,
      "grad_norm": 0.5963773727416992,
      "learning_rate": 4.710304883271944e-06,
      "loss": 0.0153,
      "step": 1625560
    },
    {
      "epoch": 2.660297323304727,
      "grad_norm": 0.21844792366027832,
      "learning_rate": 4.7102389910584265e-06,
      "loss": 0.0108,
      "step": 1625580
    },
    {
      "epoch": 2.6603300537433805,
      "grad_norm": 0.11262263357639313,
      "learning_rate": 4.71017309884491e-06,
      "loss": 0.0078,
      "step": 1625600
    },
    {
      "epoch": 2.6603627841820336,
      "grad_norm": 0.687985360622406,
      "learning_rate": 4.710107206631393e-06,
      "loss": 0.0158,
      "step": 1625620
    },
    {
      "epoch": 2.660395514620687,
      "grad_norm": 0.47163093090057373,
      "learning_rate": 4.710041314417876e-06,
      "loss": 0.0145,
      "step": 1625640
    },
    {
      "epoch": 2.6604282450593404,
      "grad_norm": 0.26705917716026306,
      "learning_rate": 4.709975422204358e-06,
      "loss": 0.0102,
      "step": 1625660
    },
    {
      "epoch": 2.6604609754979935,
      "grad_norm": 0.6677785515785217,
      "learning_rate": 4.709909529990841e-06,
      "loss": 0.0152,
      "step": 1625680
    },
    {
      "epoch": 2.6604937059366467,
      "grad_norm": 0.1305101066827774,
      "learning_rate": 4.709843637777325e-06,
      "loss": 0.0125,
      "step": 1625700
    },
    {
      "epoch": 2.6605264363753003,
      "grad_norm": 0.1264239251613617,
      "learning_rate": 4.709777745563807e-06,
      "loss": 0.0188,
      "step": 1625720
    },
    {
      "epoch": 2.660559166813954,
      "grad_norm": 0.309175580739975,
      "learning_rate": 4.70971185335029e-06,
      "loss": 0.0188,
      "step": 1625740
    },
    {
      "epoch": 2.660591897252607,
      "grad_norm": 0.03514738380908966,
      "learning_rate": 4.709645961136773e-06,
      "loss": 0.0164,
      "step": 1625760
    },
    {
      "epoch": 2.66062462769126,
      "grad_norm": 0.07195468246936798,
      "learning_rate": 4.709580068923256e-06,
      "loss": 0.0128,
      "step": 1625780
    },
    {
      "epoch": 2.6606573581299138,
      "grad_norm": 0.425079345703125,
      "learning_rate": 4.709514176709738e-06,
      "loss": 0.014,
      "step": 1625800
    },
    {
      "epoch": 2.660690088568567,
      "grad_norm": 0.10444390773773193,
      "learning_rate": 4.709448284496221e-06,
      "loss": 0.0143,
      "step": 1625820
    },
    {
      "epoch": 2.66072281900722,
      "grad_norm": 0.9143573641777039,
      "learning_rate": 4.709382392282704e-06,
      "loss": 0.0182,
      "step": 1625840
    },
    {
      "epoch": 2.6607555494458737,
      "grad_norm": 0.7014896273612976,
      "learning_rate": 4.7093165000691874e-06,
      "loss": 0.0133,
      "step": 1625860
    },
    {
      "epoch": 2.6607882798845273,
      "grad_norm": 0.16427567601203918,
      "learning_rate": 4.70925060785567e-06,
      "loss": 0.0129,
      "step": 1625880
    },
    {
      "epoch": 2.6608210103231804,
      "grad_norm": 0.6191108226776123,
      "learning_rate": 4.709184715642153e-06,
      "loss": 0.0134,
      "step": 1625900
    },
    {
      "epoch": 2.6608537407618336,
      "grad_norm": 0.5033443570137024,
      "learning_rate": 4.709118823428636e-06,
      "loss": 0.0088,
      "step": 1625920
    },
    {
      "epoch": 2.660886471200487,
      "grad_norm": 0.4956185221672058,
      "learning_rate": 4.709052931215119e-06,
      "loss": 0.0142,
      "step": 1625940
    },
    {
      "epoch": 2.6609192016391403,
      "grad_norm": 1.1846492290496826,
      "learning_rate": 4.708987039001602e-06,
      "loss": 0.0121,
      "step": 1625960
    },
    {
      "epoch": 2.6609519320777935,
      "grad_norm": 0.38664042949676514,
      "learning_rate": 4.708921146788085e-06,
      "loss": 0.0114,
      "step": 1625980
    },
    {
      "epoch": 2.660984662516447,
      "grad_norm": 0.33891746401786804,
      "learning_rate": 4.7088552545745675e-06,
      "loss": 0.0131,
      "step": 1626000
    },
    {
      "epoch": 2.6610173929551006,
      "grad_norm": 0.6342112421989441,
      "learning_rate": 4.70878936236105e-06,
      "loss": 0.0147,
      "step": 1626020
    },
    {
      "epoch": 2.661050123393754,
      "grad_norm": 0.05445410683751106,
      "learning_rate": 4.708723470147533e-06,
      "loss": 0.0159,
      "step": 1626040
    },
    {
      "epoch": 2.661082853832407,
      "grad_norm": 0.48225364089012146,
      "learning_rate": 4.708657577934016e-06,
      "loss": 0.0129,
      "step": 1626060
    },
    {
      "epoch": 2.6611155842710605,
      "grad_norm": 0.3975810110569,
      "learning_rate": 4.708591685720499e-06,
      "loss": 0.0133,
      "step": 1626080
    },
    {
      "epoch": 2.6611483147097137,
      "grad_norm": 0.35702452063560486,
      "learning_rate": 4.708525793506982e-06,
      "loss": 0.0086,
      "step": 1626100
    },
    {
      "epoch": 2.661181045148367,
      "grad_norm": 0.7486496567726135,
      "learning_rate": 4.708459901293465e-06,
      "loss": 0.0155,
      "step": 1626120
    },
    {
      "epoch": 2.6612137755870204,
      "grad_norm": 0.11230256408452988,
      "learning_rate": 4.7083940090799475e-06,
      "loss": 0.0163,
      "step": 1626140
    },
    {
      "epoch": 2.661246506025674,
      "grad_norm": 0.28077250719070435,
      "learning_rate": 4.70832811686643e-06,
      "loss": 0.0147,
      "step": 1626160
    },
    {
      "epoch": 2.661279236464327,
      "grad_norm": 0.28421255946159363,
      "learning_rate": 4.708262224652913e-06,
      "loss": 0.0153,
      "step": 1626180
    },
    {
      "epoch": 2.6613119669029803,
      "grad_norm": 0.6449581980705261,
      "learning_rate": 4.708196332439396e-06,
      "loss": 0.01,
      "step": 1626200
    },
    {
      "epoch": 2.661344697341634,
      "grad_norm": 0.10677197575569153,
      "learning_rate": 4.7081304402258784e-06,
      "loss": 0.0144,
      "step": 1626220
    },
    {
      "epoch": 2.661377427780287,
      "grad_norm": 0.4101075530052185,
      "learning_rate": 4.708064548012361e-06,
      "loss": 0.0154,
      "step": 1626240
    },
    {
      "epoch": 2.6614101582189402,
      "grad_norm": 0.13706494867801666,
      "learning_rate": 4.707998655798845e-06,
      "loss": 0.0222,
      "step": 1626260
    },
    {
      "epoch": 2.661442888657594,
      "grad_norm": 0.15318025648593903,
      "learning_rate": 4.7079327635853275e-06,
      "loss": 0.0152,
      "step": 1626280
    },
    {
      "epoch": 2.661475619096247,
      "grad_norm": 0.33968430757522583,
      "learning_rate": 4.70786687137181e-06,
      "loss": 0.0171,
      "step": 1626300
    },
    {
      "epoch": 2.6615083495349006,
      "grad_norm": 0.23297171294689178,
      "learning_rate": 4.707800979158294e-06,
      "loss": 0.0151,
      "step": 1626320
    },
    {
      "epoch": 2.6615410799735537,
      "grad_norm": 0.22437560558319092,
      "learning_rate": 4.707735086944777e-06,
      "loss": 0.0139,
      "step": 1626340
    },
    {
      "epoch": 2.6615738104122073,
      "grad_norm": 0.2070533037185669,
      "learning_rate": 4.707669194731259e-06,
      "loss": 0.0131,
      "step": 1626360
    },
    {
      "epoch": 2.6616065408508605,
      "grad_norm": 0.13542145490646362,
      "learning_rate": 4.707603302517742e-06,
      "loss": 0.0113,
      "step": 1626380
    },
    {
      "epoch": 2.6616392712895136,
      "grad_norm": 0.2162581831216812,
      "learning_rate": 4.707537410304225e-06,
      "loss": 0.013,
      "step": 1626400
    },
    {
      "epoch": 2.661672001728167,
      "grad_norm": 0.39683717489242554,
      "learning_rate": 4.7074715180907075e-06,
      "loss": 0.0178,
      "step": 1626420
    },
    {
      "epoch": 2.6617047321668204,
      "grad_norm": 0.4867953956127167,
      "learning_rate": 4.70740562587719e-06,
      "loss": 0.0136,
      "step": 1626440
    },
    {
      "epoch": 2.661737462605474,
      "grad_norm": 0.21363325417041779,
      "learning_rate": 4.707339733663673e-06,
      "loss": 0.0091,
      "step": 1626460
    },
    {
      "epoch": 2.661770193044127,
      "grad_norm": 0.7629294395446777,
      "learning_rate": 4.707273841450157e-06,
      "loss": 0.0158,
      "step": 1626480
    },
    {
      "epoch": 2.6618029234827807,
      "grad_norm": 0.17115594446659088,
      "learning_rate": 4.707207949236639e-06,
      "loss": 0.0167,
      "step": 1626500
    },
    {
      "epoch": 2.661835653921434,
      "grad_norm": 0.5014370679855347,
      "learning_rate": 4.707142057023122e-06,
      "loss": 0.0111,
      "step": 1626520
    },
    {
      "epoch": 2.661868384360087,
      "grad_norm": 0.5397683382034302,
      "learning_rate": 4.707076164809605e-06,
      "loss": 0.0231,
      "step": 1626540
    },
    {
      "epoch": 2.6619011147987406,
      "grad_norm": 0.7340003848075867,
      "learning_rate": 4.7070102725960876e-06,
      "loss": 0.0156,
      "step": 1626560
    },
    {
      "epoch": 2.6619338452373937,
      "grad_norm": 0.18873131275177002,
      "learning_rate": 4.70694438038257e-06,
      "loss": 0.0111,
      "step": 1626580
    },
    {
      "epoch": 2.6619665756760473,
      "grad_norm": 0.17944547533988953,
      "learning_rate": 4.706878488169053e-06,
      "loss": 0.0159,
      "step": 1626600
    },
    {
      "epoch": 2.6619993061147005,
      "grad_norm": 0.31166672706604004,
      "learning_rate": 4.706812595955536e-06,
      "loss": 0.0137,
      "step": 1626620
    },
    {
      "epoch": 2.662032036553354,
      "grad_norm": 0.350972443819046,
      "learning_rate": 4.7067467037420185e-06,
      "loss": 0.0167,
      "step": 1626640
    },
    {
      "epoch": 2.6620647669920072,
      "grad_norm": 0.5765329003334045,
      "learning_rate": 4.706680811528502e-06,
      "loss": 0.0212,
      "step": 1626660
    },
    {
      "epoch": 2.6620974974306604,
      "grad_norm": 0.1948293000459671,
      "learning_rate": 4.706614919314985e-06,
      "loss": 0.0086,
      "step": 1626680
    },
    {
      "epoch": 2.662130227869314,
      "grad_norm": 0.19312241673469543,
      "learning_rate": 4.706549027101468e-06,
      "loss": 0.0198,
      "step": 1626700
    },
    {
      "epoch": 2.662162958307967,
      "grad_norm": 1.641156554222107,
      "learning_rate": 4.706483134887951e-06,
      "loss": 0.0132,
      "step": 1626720
    },
    {
      "epoch": 2.6621956887466207,
      "grad_norm": 0.5439913868904114,
      "learning_rate": 4.706417242674434e-06,
      "loss": 0.0119,
      "step": 1626740
    },
    {
      "epoch": 2.662228419185274,
      "grad_norm": 0.023376677185297012,
      "learning_rate": 4.706351350460917e-06,
      "loss": 0.0139,
      "step": 1626760
    },
    {
      "epoch": 2.6622611496239275,
      "grad_norm": 0.5737183094024658,
      "learning_rate": 4.706285458247399e-06,
      "loss": 0.0125,
      "step": 1626780
    },
    {
      "epoch": 2.6622938800625806,
      "grad_norm": 0.3687933087348938,
      "learning_rate": 4.706219566033882e-06,
      "loss": 0.0149,
      "step": 1626800
    },
    {
      "epoch": 2.6623266105012338,
      "grad_norm": 0.06778494268655777,
      "learning_rate": 4.706153673820365e-06,
      "loss": 0.0122,
      "step": 1626820
    },
    {
      "epoch": 2.6623593409398874,
      "grad_norm": 0.43317291140556335,
      "learning_rate": 4.706087781606848e-06,
      "loss": 0.0115,
      "step": 1626840
    },
    {
      "epoch": 2.6623920713785405,
      "grad_norm": 0.5355703234672546,
      "learning_rate": 4.70602188939333e-06,
      "loss": 0.0127,
      "step": 1626860
    },
    {
      "epoch": 2.662424801817194,
      "grad_norm": 0.08433546125888824,
      "learning_rate": 4.705955997179814e-06,
      "loss": 0.0097,
      "step": 1626880
    },
    {
      "epoch": 2.6624575322558472,
      "grad_norm": 0.11130852997303009,
      "learning_rate": 4.705890104966297e-06,
      "loss": 0.0089,
      "step": 1626900
    },
    {
      "epoch": 2.662490262694501,
      "grad_norm": 0.24934621155261993,
      "learning_rate": 4.705824212752779e-06,
      "loss": 0.0104,
      "step": 1626920
    },
    {
      "epoch": 2.662522993133154,
      "grad_norm": 0.45505544543266296,
      "learning_rate": 4.705758320539262e-06,
      "loss": 0.0118,
      "step": 1626940
    },
    {
      "epoch": 2.662555723571807,
      "grad_norm": 0.5547412633895874,
      "learning_rate": 4.705692428325745e-06,
      "loss": 0.0141,
      "step": 1626960
    },
    {
      "epoch": 2.6625884540104607,
      "grad_norm": 0.690051257610321,
      "learning_rate": 4.705626536112228e-06,
      "loss": 0.0142,
      "step": 1626980
    },
    {
      "epoch": 2.662621184449114,
      "grad_norm": 0.5016953945159912,
      "learning_rate": 4.705560643898711e-06,
      "loss": 0.0145,
      "step": 1627000
    },
    {
      "epoch": 2.6626539148877675,
      "grad_norm": 0.23111028969287872,
      "learning_rate": 4.705494751685194e-06,
      "loss": 0.0129,
      "step": 1627020
    },
    {
      "epoch": 2.6626866453264206,
      "grad_norm": 0.21047179400920868,
      "learning_rate": 4.705428859471677e-06,
      "loss": 0.011,
      "step": 1627040
    },
    {
      "epoch": 2.6627193757650742,
      "grad_norm": 0.31114059686660767,
      "learning_rate": 4.7053629672581594e-06,
      "loss": 0.0146,
      "step": 1627060
    },
    {
      "epoch": 2.6627521062037274,
      "grad_norm": 0.2011350691318512,
      "learning_rate": 4.705297075044642e-06,
      "loss": 0.0163,
      "step": 1627080
    },
    {
      "epoch": 2.6627848366423805,
      "grad_norm": 0.2127634882926941,
      "learning_rate": 4.705231182831125e-06,
      "loss": 0.0117,
      "step": 1627100
    },
    {
      "epoch": 2.662817567081034,
      "grad_norm": 0.6635761857032776,
      "learning_rate": 4.7051652906176085e-06,
      "loss": 0.0104,
      "step": 1627120
    },
    {
      "epoch": 2.6628502975196873,
      "grad_norm": 0.17643074691295624,
      "learning_rate": 4.705099398404091e-06,
      "loss": 0.0151,
      "step": 1627140
    },
    {
      "epoch": 2.6628830279583404,
      "grad_norm": 0.19419439136981964,
      "learning_rate": 4.705033506190574e-06,
      "loss": 0.0121,
      "step": 1627160
    },
    {
      "epoch": 2.662915758396994,
      "grad_norm": 0.3049826920032501,
      "learning_rate": 4.704967613977057e-06,
      "loss": 0.0148,
      "step": 1627180
    },
    {
      "epoch": 2.6629484888356476,
      "grad_norm": 0.7072409391403198,
      "learning_rate": 4.7049017217635395e-06,
      "loss": 0.0155,
      "step": 1627200
    },
    {
      "epoch": 2.6629812192743008,
      "grad_norm": 0.3857910931110382,
      "learning_rate": 4.704835829550022e-06,
      "loss": 0.0128,
      "step": 1627220
    },
    {
      "epoch": 2.663013949712954,
      "grad_norm": 0.335462749004364,
      "learning_rate": 4.704769937336505e-06,
      "loss": 0.0168,
      "step": 1627240
    },
    {
      "epoch": 2.6630466801516075,
      "grad_norm": 0.5209269523620605,
      "learning_rate": 4.704704045122988e-06,
      "loss": 0.0152,
      "step": 1627260
    },
    {
      "epoch": 2.6630794105902607,
      "grad_norm": 0.952997088432312,
      "learning_rate": 4.704638152909471e-06,
      "loss": 0.0109,
      "step": 1627280
    },
    {
      "epoch": 2.663112141028914,
      "grad_norm": 0.2003096640110016,
      "learning_rate": 4.704572260695954e-06,
      "loss": 0.0205,
      "step": 1627300
    },
    {
      "epoch": 2.6631448714675674,
      "grad_norm": 0.303385853767395,
      "learning_rate": 4.704506368482437e-06,
      "loss": 0.0202,
      "step": 1627320
    },
    {
      "epoch": 2.663177601906221,
      "grad_norm": 0.10593714565038681,
      "learning_rate": 4.7044404762689195e-06,
      "loss": 0.0109,
      "step": 1627340
    },
    {
      "epoch": 2.663210332344874,
      "grad_norm": 0.6807772517204285,
      "learning_rate": 4.704374584055403e-06,
      "loss": 0.0147,
      "step": 1627360
    },
    {
      "epoch": 2.6632430627835273,
      "grad_norm": 0.49661630392074585,
      "learning_rate": 4.704308691841886e-06,
      "loss": 0.0125,
      "step": 1627380
    },
    {
      "epoch": 2.663275793222181,
      "grad_norm": 0.1559848189353943,
      "learning_rate": 4.7042427996283686e-06,
      "loss": 0.0179,
      "step": 1627400
    },
    {
      "epoch": 2.663308523660834,
      "grad_norm": 0.12503118813037872,
      "learning_rate": 4.704176907414851e-06,
      "loss": 0.0124,
      "step": 1627420
    },
    {
      "epoch": 2.663341254099487,
      "grad_norm": 0.30440717935562134,
      "learning_rate": 4.704111015201334e-06,
      "loss": 0.0122,
      "step": 1627440
    },
    {
      "epoch": 2.663373984538141,
      "grad_norm": 0.1707484871149063,
      "learning_rate": 4.704045122987817e-06,
      "loss": 0.0192,
      "step": 1627460
    },
    {
      "epoch": 2.6634067149767944,
      "grad_norm": 0.3047678470611572,
      "learning_rate": 4.7039792307742995e-06,
      "loss": 0.0204,
      "step": 1627480
    },
    {
      "epoch": 2.6634394454154475,
      "grad_norm": 0.11840144544839859,
      "learning_rate": 4.703913338560783e-06,
      "loss": 0.0115,
      "step": 1627500
    },
    {
      "epoch": 2.6634721758541007,
      "grad_norm": 0.9077913761138916,
      "learning_rate": 4.703847446347266e-06,
      "loss": 0.0095,
      "step": 1627520
    },
    {
      "epoch": 2.6635049062927543,
      "grad_norm": 0.8951987028121948,
      "learning_rate": 4.703781554133749e-06,
      "loss": 0.0139,
      "step": 1627540
    },
    {
      "epoch": 2.6635376367314074,
      "grad_norm": 0.3363702893257141,
      "learning_rate": 4.703715661920231e-06,
      "loss": 0.01,
      "step": 1627560
    },
    {
      "epoch": 2.6635703671700606,
      "grad_norm": 0.8191312551498413,
      "learning_rate": 4.703649769706714e-06,
      "loss": 0.0109,
      "step": 1627580
    },
    {
      "epoch": 2.663603097608714,
      "grad_norm": 0.32886993885040283,
      "learning_rate": 4.703583877493197e-06,
      "loss": 0.0139,
      "step": 1627600
    },
    {
      "epoch": 2.6636358280473678,
      "grad_norm": 0.2753835618495941,
      "learning_rate": 4.7035179852796795e-06,
      "loss": 0.0228,
      "step": 1627620
    },
    {
      "epoch": 2.663668558486021,
      "grad_norm": 0.17672382295131683,
      "learning_rate": 4.703452093066162e-06,
      "loss": 0.0127,
      "step": 1627640
    },
    {
      "epoch": 2.663701288924674,
      "grad_norm": 0.1674594134092331,
      "learning_rate": 4.703386200852645e-06,
      "loss": 0.0175,
      "step": 1627660
    },
    {
      "epoch": 2.6637340193633277,
      "grad_norm": 0.5254101753234863,
      "learning_rate": 4.703320308639129e-06,
      "loss": 0.0157,
      "step": 1627680
    },
    {
      "epoch": 2.663766749801981,
      "grad_norm": 0.31109219789505005,
      "learning_rate": 4.703254416425611e-06,
      "loss": 0.0134,
      "step": 1627700
    },
    {
      "epoch": 2.663799480240634,
      "grad_norm": 0.15572045743465424,
      "learning_rate": 4.703188524212094e-06,
      "loss": 0.0119,
      "step": 1627720
    },
    {
      "epoch": 2.6638322106792875,
      "grad_norm": 0.15406256914138794,
      "learning_rate": 4.703122631998578e-06,
      "loss": 0.012,
      "step": 1627740
    },
    {
      "epoch": 2.6638649411179407,
      "grad_norm": 0.3231349587440491,
      "learning_rate": 4.70305673978506e-06,
      "loss": 0.0182,
      "step": 1627760
    },
    {
      "epoch": 2.6638976715565943,
      "grad_norm": 0.4936996400356293,
      "learning_rate": 4.702990847571543e-06,
      "loss": 0.0082,
      "step": 1627780
    },
    {
      "epoch": 2.6639304019952474,
      "grad_norm": 0.25992339849472046,
      "learning_rate": 4.702924955358026e-06,
      "loss": 0.0114,
      "step": 1627800
    },
    {
      "epoch": 2.663963132433901,
      "grad_norm": 0.19814443588256836,
      "learning_rate": 4.702859063144509e-06,
      "loss": 0.0208,
      "step": 1627820
    },
    {
      "epoch": 2.663995862872554,
      "grad_norm": 0.1568455845117569,
      "learning_rate": 4.702793170930991e-06,
      "loss": 0.0142,
      "step": 1627840
    },
    {
      "epoch": 2.6640285933112073,
      "grad_norm": 0.5678542852401733,
      "learning_rate": 4.702727278717474e-06,
      "loss": 0.0243,
      "step": 1627860
    },
    {
      "epoch": 2.664061323749861,
      "grad_norm": 0.472777396440506,
      "learning_rate": 4.702661386503957e-06,
      "loss": 0.0122,
      "step": 1627880
    },
    {
      "epoch": 2.664094054188514,
      "grad_norm": 0.19088046252727509,
      "learning_rate": 4.7025954942904404e-06,
      "loss": 0.0167,
      "step": 1627900
    },
    {
      "epoch": 2.6641267846271677,
      "grad_norm": 0.6508312225341797,
      "learning_rate": 4.702529602076923e-06,
      "loss": 0.013,
      "step": 1627920
    },
    {
      "epoch": 2.664159515065821,
      "grad_norm": 0.25317633152008057,
      "learning_rate": 4.702463709863406e-06,
      "loss": 0.0132,
      "step": 1627940
    },
    {
      "epoch": 2.6641922455044744,
      "grad_norm": 0.5660403966903687,
      "learning_rate": 4.702397817649889e-06,
      "loss": 0.009,
      "step": 1627960
    },
    {
      "epoch": 2.6642249759431276,
      "grad_norm": 0.2453315556049347,
      "learning_rate": 4.702331925436371e-06,
      "loss": 0.0136,
      "step": 1627980
    },
    {
      "epoch": 2.6642577063817807,
      "grad_norm": 0.1556997299194336,
      "learning_rate": 4.702266033222854e-06,
      "loss": 0.0131,
      "step": 1628000
    },
    {
      "epoch": 2.6642904368204343,
      "grad_norm": 0.7129567265510559,
      "learning_rate": 4.702200141009337e-06,
      "loss": 0.0174,
      "step": 1628020
    },
    {
      "epoch": 2.6643231672590875,
      "grad_norm": 0.563509464263916,
      "learning_rate": 4.70213424879582e-06,
      "loss": 0.0126,
      "step": 1628040
    },
    {
      "epoch": 2.664355897697741,
      "grad_norm": 0.16359424591064453,
      "learning_rate": 4.702068356582303e-06,
      "loss": 0.0107,
      "step": 1628060
    },
    {
      "epoch": 2.664388628136394,
      "grad_norm": 0.5864456295967102,
      "learning_rate": 4.702002464368786e-06,
      "loss": 0.0101,
      "step": 1628080
    },
    {
      "epoch": 2.664421358575048,
      "grad_norm": 0.4224635064601898,
      "learning_rate": 4.701936572155269e-06,
      "loss": 0.0133,
      "step": 1628100
    },
    {
      "epoch": 2.664454089013701,
      "grad_norm": 0.38912731409072876,
      "learning_rate": 4.7018706799417514e-06,
      "loss": 0.0131,
      "step": 1628120
    },
    {
      "epoch": 2.664486819452354,
      "grad_norm": 0.5597891807556152,
      "learning_rate": 4.701804787728235e-06,
      "loss": 0.0121,
      "step": 1628140
    },
    {
      "epoch": 2.6645195498910077,
      "grad_norm": 0.26108694076538086,
      "learning_rate": 4.701738895514718e-06,
      "loss": 0.0101,
      "step": 1628160
    },
    {
      "epoch": 2.664552280329661,
      "grad_norm": 0.8617225289344788,
      "learning_rate": 4.7016730033012005e-06,
      "loss": 0.0155,
      "step": 1628180
    },
    {
      "epoch": 2.6645850107683144,
      "grad_norm": 0.3634239733219147,
      "learning_rate": 4.701607111087683e-06,
      "loss": 0.0139,
      "step": 1628200
    },
    {
      "epoch": 2.6646177412069676,
      "grad_norm": 0.20566560328006744,
      "learning_rate": 4.701541218874166e-06,
      "loss": 0.0111,
      "step": 1628220
    },
    {
      "epoch": 2.664650471645621,
      "grad_norm": 0.44434648752212524,
      "learning_rate": 4.701475326660649e-06,
      "loss": 0.0141,
      "step": 1628240
    },
    {
      "epoch": 2.6646832020842743,
      "grad_norm": 0.33671385049819946,
      "learning_rate": 4.7014094344471315e-06,
      "loss": 0.0123,
      "step": 1628260
    },
    {
      "epoch": 2.6647159325229275,
      "grad_norm": 0.32495948672294617,
      "learning_rate": 4.701343542233614e-06,
      "loss": 0.0135,
      "step": 1628280
    },
    {
      "epoch": 2.664748662961581,
      "grad_norm": 0.19366195797920227,
      "learning_rate": 4.701277650020098e-06,
      "loss": 0.0161,
      "step": 1628300
    },
    {
      "epoch": 2.6647813934002342,
      "grad_norm": 0.225443035364151,
      "learning_rate": 4.7012117578065805e-06,
      "loss": 0.0183,
      "step": 1628320
    },
    {
      "epoch": 2.664814123838888,
      "grad_norm": 0.30497950315475464,
      "learning_rate": 4.701145865593063e-06,
      "loss": 0.0158,
      "step": 1628340
    },
    {
      "epoch": 2.664846854277541,
      "grad_norm": 1.2052100896835327,
      "learning_rate": 4.701079973379546e-06,
      "loss": 0.0163,
      "step": 1628360
    },
    {
      "epoch": 2.6648795847161946,
      "grad_norm": 0.1709481030702591,
      "learning_rate": 4.701014081166029e-06,
      "loss": 0.0086,
      "step": 1628380
    },
    {
      "epoch": 2.6649123151548477,
      "grad_norm": 0.6713486313819885,
      "learning_rate": 4.7009481889525115e-06,
      "loss": 0.0213,
      "step": 1628400
    },
    {
      "epoch": 2.664945045593501,
      "grad_norm": 0.5281974077224731,
      "learning_rate": 4.700882296738995e-06,
      "loss": 0.0123,
      "step": 1628420
    },
    {
      "epoch": 2.6649777760321545,
      "grad_norm": 0.46184012293815613,
      "learning_rate": 4.700816404525478e-06,
      "loss": 0.0136,
      "step": 1628440
    },
    {
      "epoch": 2.6650105064708076,
      "grad_norm": 0.17988285422325134,
      "learning_rate": 4.7007505123119605e-06,
      "loss": 0.0143,
      "step": 1628460
    },
    {
      "epoch": 2.665043236909461,
      "grad_norm": 0.22213180363178253,
      "learning_rate": 4.700684620098443e-06,
      "loss": 0.0133,
      "step": 1628480
    },
    {
      "epoch": 2.6650759673481144,
      "grad_norm": 0.08153253048658371,
      "learning_rate": 4.700618727884926e-06,
      "loss": 0.0109,
      "step": 1628500
    },
    {
      "epoch": 2.665108697786768,
      "grad_norm": 0.5398318767547607,
      "learning_rate": 4.700552835671409e-06,
      "loss": 0.0166,
      "step": 1628520
    },
    {
      "epoch": 2.665141428225421,
      "grad_norm": 0.341968297958374,
      "learning_rate": 4.700486943457892e-06,
      "loss": 0.0164,
      "step": 1628540
    },
    {
      "epoch": 2.6651741586640743,
      "grad_norm": 0.287314236164093,
      "learning_rate": 4.700421051244375e-06,
      "loss": 0.0156,
      "step": 1628560
    },
    {
      "epoch": 2.665206889102728,
      "grad_norm": 0.7348118424415588,
      "learning_rate": 4.700355159030858e-06,
      "loss": 0.0083,
      "step": 1628580
    },
    {
      "epoch": 2.665239619541381,
      "grad_norm": 1.0615928173065186,
      "learning_rate": 4.7002892668173406e-06,
      "loss": 0.0088,
      "step": 1628600
    },
    {
      "epoch": 2.6652723499800346,
      "grad_norm": 1.4567564725875854,
      "learning_rate": 4.700223374603823e-06,
      "loss": 0.0108,
      "step": 1628620
    },
    {
      "epoch": 2.6653050804186877,
      "grad_norm": 0.5320668816566467,
      "learning_rate": 4.700157482390306e-06,
      "loss": 0.0071,
      "step": 1628640
    },
    {
      "epoch": 2.6653378108573413,
      "grad_norm": 0.28077730536460876,
      "learning_rate": 4.700091590176789e-06,
      "loss": 0.0233,
      "step": 1628660
    },
    {
      "epoch": 2.6653705412959945,
      "grad_norm": 1.2668272256851196,
      "learning_rate": 4.7000256979632715e-06,
      "loss": 0.0174,
      "step": 1628680
    },
    {
      "epoch": 2.6654032717346476,
      "grad_norm": 0.4442710876464844,
      "learning_rate": 4.699959805749755e-06,
      "loss": 0.0104,
      "step": 1628700
    },
    {
      "epoch": 2.6654360021733012,
      "grad_norm": 0.8243886828422546,
      "learning_rate": 4.699893913536238e-06,
      "loss": 0.0161,
      "step": 1628720
    },
    {
      "epoch": 2.6654687326119544,
      "grad_norm": 0.33730044960975647,
      "learning_rate": 4.699828021322721e-06,
      "loss": 0.0139,
      "step": 1628740
    },
    {
      "epoch": 2.6655014630506075,
      "grad_norm": 0.1267673373222351,
      "learning_rate": 4.699762129109204e-06,
      "loss": 0.0194,
      "step": 1628760
    },
    {
      "epoch": 2.665534193489261,
      "grad_norm": 0.4953174889087677,
      "learning_rate": 4.699696236895687e-06,
      "loss": 0.0169,
      "step": 1628780
    },
    {
      "epoch": 2.6655669239279147,
      "grad_norm": 0.1604289412498474,
      "learning_rate": 4.69963034468217e-06,
      "loss": 0.0081,
      "step": 1628800
    },
    {
      "epoch": 2.665599654366568,
      "grad_norm": 0.19187819957733154,
      "learning_rate": 4.699564452468652e-06,
      "loss": 0.0155,
      "step": 1628820
    },
    {
      "epoch": 2.665632384805221,
      "grad_norm": 0.2675170600414276,
      "learning_rate": 4.699498560255135e-06,
      "loss": 0.0194,
      "step": 1628840
    },
    {
      "epoch": 2.6656651152438746,
      "grad_norm": 0.15804429352283478,
      "learning_rate": 4.699432668041618e-06,
      "loss": 0.0159,
      "step": 1628860
    },
    {
      "epoch": 2.6656978456825278,
      "grad_norm": 0.18518289923667908,
      "learning_rate": 4.699366775828101e-06,
      "loss": 0.0094,
      "step": 1628880
    },
    {
      "epoch": 2.665730576121181,
      "grad_norm": 0.27159860730171204,
      "learning_rate": 4.699300883614583e-06,
      "loss": 0.0087,
      "step": 1628900
    },
    {
      "epoch": 2.6657633065598345,
      "grad_norm": 0.8152679204940796,
      "learning_rate": 4.699234991401067e-06,
      "loss": 0.0178,
      "step": 1628920
    },
    {
      "epoch": 2.665796036998488,
      "grad_norm": 0.18869659304618835,
      "learning_rate": 4.69916909918755e-06,
      "loss": 0.0099,
      "step": 1628940
    },
    {
      "epoch": 2.6658287674371413,
      "grad_norm": 0.25872984528541565,
      "learning_rate": 4.6991032069740324e-06,
      "loss": 0.0135,
      "step": 1628960
    },
    {
      "epoch": 2.6658614978757944,
      "grad_norm": 0.3435455858707428,
      "learning_rate": 4.699037314760515e-06,
      "loss": 0.019,
      "step": 1628980
    },
    {
      "epoch": 2.665894228314448,
      "grad_norm": 0.49075862765312195,
      "learning_rate": 4.698971422546998e-06,
      "loss": 0.0098,
      "step": 1629000
    },
    {
      "epoch": 2.665926958753101,
      "grad_norm": 0.46136030554771423,
      "learning_rate": 4.698905530333481e-06,
      "loss": 0.0133,
      "step": 1629020
    },
    {
      "epoch": 2.6659596891917543,
      "grad_norm": 0.2941185534000397,
      "learning_rate": 4.698839638119963e-06,
      "loss": 0.0101,
      "step": 1629040
    },
    {
      "epoch": 2.665992419630408,
      "grad_norm": 0.3557203412055969,
      "learning_rate": 4.698773745906446e-06,
      "loss": 0.0176,
      "step": 1629060
    },
    {
      "epoch": 2.6660251500690615,
      "grad_norm": 0.17751823365688324,
      "learning_rate": 4.698707853692929e-06,
      "loss": 0.0139,
      "step": 1629080
    },
    {
      "epoch": 2.6660578805077146,
      "grad_norm": 0.36141499876976013,
      "learning_rate": 4.6986419614794125e-06,
      "loss": 0.0092,
      "step": 1629100
    },
    {
      "epoch": 2.666090610946368,
      "grad_norm": 0.15488290786743164,
      "learning_rate": 4.698576069265895e-06,
      "loss": 0.0145,
      "step": 1629120
    },
    {
      "epoch": 2.6661233413850214,
      "grad_norm": 0.3800809979438782,
      "learning_rate": 4.698510177052378e-06,
      "loss": 0.0145,
      "step": 1629140
    },
    {
      "epoch": 2.6661560718236745,
      "grad_norm": 0.29812729358673096,
      "learning_rate": 4.6984442848388615e-06,
      "loss": 0.0156,
      "step": 1629160
    },
    {
      "epoch": 2.6661888022623277,
      "grad_norm": 0.4071741998195648,
      "learning_rate": 4.698378392625344e-06,
      "loss": 0.0148,
      "step": 1629180
    },
    {
      "epoch": 2.6662215327009813,
      "grad_norm": 0.3219681978225708,
      "learning_rate": 4.698312500411827e-06,
      "loss": 0.0139,
      "step": 1629200
    },
    {
      "epoch": 2.666254263139635,
      "grad_norm": 0.3129216432571411,
      "learning_rate": 4.69824660819831e-06,
      "loss": 0.0144,
      "step": 1629220
    },
    {
      "epoch": 2.666286993578288,
      "grad_norm": 0.07702373713254929,
      "learning_rate": 4.6981807159847925e-06,
      "loss": 0.0097,
      "step": 1629240
    },
    {
      "epoch": 2.666319724016941,
      "grad_norm": 0.7114764451980591,
      "learning_rate": 4.698114823771275e-06,
      "loss": 0.0181,
      "step": 1629260
    },
    {
      "epoch": 2.6663524544555948,
      "grad_norm": 0.29964369535446167,
      "learning_rate": 4.698048931557758e-06,
      "loss": 0.0149,
      "step": 1629280
    },
    {
      "epoch": 2.666385184894248,
      "grad_norm": 0.5588276386260986,
      "learning_rate": 4.697983039344241e-06,
      "loss": 0.0159,
      "step": 1629300
    },
    {
      "epoch": 2.666417915332901,
      "grad_norm": 0.0697534903883934,
      "learning_rate": 4.697917147130724e-06,
      "loss": 0.0135,
      "step": 1629320
    },
    {
      "epoch": 2.6664506457715547,
      "grad_norm": 0.18660622835159302,
      "learning_rate": 4.697851254917207e-06,
      "loss": 0.0181,
      "step": 1629340
    },
    {
      "epoch": 2.666483376210208,
      "grad_norm": 0.0567578449845314,
      "learning_rate": 4.69778536270369e-06,
      "loss": 0.0101,
      "step": 1629360
    },
    {
      "epoch": 2.6665161066488614,
      "grad_norm": 0.5728991031646729,
      "learning_rate": 4.6977194704901725e-06,
      "loss": 0.021,
      "step": 1629380
    },
    {
      "epoch": 2.6665488370875146,
      "grad_norm": 0.547103226184845,
      "learning_rate": 4.697653578276655e-06,
      "loss": 0.0153,
      "step": 1629400
    },
    {
      "epoch": 2.666581567526168,
      "grad_norm": 0.3674584925174713,
      "learning_rate": 4.697587686063138e-06,
      "loss": 0.0154,
      "step": 1629420
    },
    {
      "epoch": 2.6666142979648213,
      "grad_norm": 0.14179271459579468,
      "learning_rate": 4.697521793849621e-06,
      "loss": 0.0149,
      "step": 1629440
    },
    {
      "epoch": 2.6666470284034745,
      "grad_norm": 0.23059411346912384,
      "learning_rate": 4.697455901636104e-06,
      "loss": 0.0234,
      "step": 1629460
    },
    {
      "epoch": 2.666679758842128,
      "grad_norm": 0.09003584831953049,
      "learning_rate": 4.697390009422587e-06,
      "loss": 0.0116,
      "step": 1629480
    },
    {
      "epoch": 2.666712489280781,
      "grad_norm": 0.5733070373535156,
      "learning_rate": 4.69732411720907e-06,
      "loss": 0.0155,
      "step": 1629500
    },
    {
      "epoch": 2.666745219719435,
      "grad_norm": 0.49123507738113403,
      "learning_rate": 4.6972582249955525e-06,
      "loss": 0.014,
      "step": 1629520
    },
    {
      "epoch": 2.666777950158088,
      "grad_norm": 0.40887102484703064,
      "learning_rate": 4.697192332782035e-06,
      "loss": 0.012,
      "step": 1629540
    },
    {
      "epoch": 2.6668106805967415,
      "grad_norm": 0.23902304470539093,
      "learning_rate": 4.697126440568519e-06,
      "loss": 0.0091,
      "step": 1629560
    },
    {
      "epoch": 2.6668434110353947,
      "grad_norm": 0.39122113585472107,
      "learning_rate": 4.697060548355002e-06,
      "loss": 0.0129,
      "step": 1629580
    },
    {
      "epoch": 2.666876141474048,
      "grad_norm": 0.3028443455696106,
      "learning_rate": 4.696994656141484e-06,
      "loss": 0.0175,
      "step": 1629600
    },
    {
      "epoch": 2.6669088719127014,
      "grad_norm": 0.2948789894580841,
      "learning_rate": 4.696928763927967e-06,
      "loss": 0.0159,
      "step": 1629620
    },
    {
      "epoch": 2.6669416023513546,
      "grad_norm": 1.655958890914917,
      "learning_rate": 4.69686287171445e-06,
      "loss": 0.0179,
      "step": 1629640
    },
    {
      "epoch": 2.666974332790008,
      "grad_norm": 0.19385088980197906,
      "learning_rate": 4.6967969795009326e-06,
      "loss": 0.01,
      "step": 1629660
    },
    {
      "epoch": 2.6670070632286613,
      "grad_norm": 0.2689753472805023,
      "learning_rate": 4.696731087287415e-06,
      "loss": 0.0111,
      "step": 1629680
    },
    {
      "epoch": 2.667039793667315,
      "grad_norm": 0.40261828899383545,
      "learning_rate": 4.696665195073898e-06,
      "loss": 0.01,
      "step": 1629700
    },
    {
      "epoch": 2.667072524105968,
      "grad_norm": 0.35932257771492004,
      "learning_rate": 4.696599302860382e-06,
      "loss": 0.0167,
      "step": 1629720
    },
    {
      "epoch": 2.667105254544621,
      "grad_norm": 0.23933793604373932,
      "learning_rate": 4.696533410646864e-06,
      "loss": 0.0127,
      "step": 1629740
    },
    {
      "epoch": 2.667137984983275,
      "grad_norm": 0.5359691381454468,
      "learning_rate": 4.696467518433347e-06,
      "loss": 0.0209,
      "step": 1629760
    },
    {
      "epoch": 2.667170715421928,
      "grad_norm": 1.1433035135269165,
      "learning_rate": 4.69640162621983e-06,
      "loss": 0.0207,
      "step": 1629780
    },
    {
      "epoch": 2.6672034458605816,
      "grad_norm": 0.19516265392303467,
      "learning_rate": 4.696335734006313e-06,
      "loss": 0.0143,
      "step": 1629800
    },
    {
      "epoch": 2.6672361762992347,
      "grad_norm": 0.15037479996681213,
      "learning_rate": 4.696269841792796e-06,
      "loss": 0.0091,
      "step": 1629820
    },
    {
      "epoch": 2.6672689067378883,
      "grad_norm": 0.18654464185237885,
      "learning_rate": 4.696203949579279e-06,
      "loss": 0.0122,
      "step": 1629840
    },
    {
      "epoch": 2.6673016371765415,
      "grad_norm": 0.6826810240745544,
      "learning_rate": 4.696138057365762e-06,
      "loss": 0.0175,
      "step": 1629860
    },
    {
      "epoch": 2.6673343676151946,
      "grad_norm": 0.2107139229774475,
      "learning_rate": 4.696072165152244e-06,
      "loss": 0.0131,
      "step": 1629880
    },
    {
      "epoch": 2.667367098053848,
      "grad_norm": 0.5086606740951538,
      "learning_rate": 4.696006272938727e-06,
      "loss": 0.013,
      "step": 1629900
    },
    {
      "epoch": 2.6673998284925013,
      "grad_norm": 0.5778705477714539,
      "learning_rate": 4.69594038072521e-06,
      "loss": 0.0144,
      "step": 1629920
    },
    {
      "epoch": 2.667432558931155,
      "grad_norm": 0.23691649734973907,
      "learning_rate": 4.6958744885116935e-06,
      "loss": 0.0219,
      "step": 1629940
    },
    {
      "epoch": 2.667465289369808,
      "grad_norm": 0.5015757083892822,
      "learning_rate": 4.695808596298176e-06,
      "loss": 0.0149,
      "step": 1629960
    },
    {
      "epoch": 2.6674980198084617,
      "grad_norm": 0.17307759821414948,
      "learning_rate": 4.695742704084659e-06,
      "loss": 0.0158,
      "step": 1629980
    },
    {
      "epoch": 2.667530750247115,
      "grad_norm": 0.5661731362342834,
      "learning_rate": 4.695676811871142e-06,
      "loss": 0.0152,
      "step": 1630000
    },
    {
      "epoch": 2.667563480685768,
      "grad_norm": 0.4387985169887543,
      "learning_rate": 4.695610919657624e-06,
      "loss": 0.0122,
      "step": 1630020
    },
    {
      "epoch": 2.6675962111244216,
      "grad_norm": 0.045103203505277634,
      "learning_rate": 4.695545027444107e-06,
      "loss": 0.0133,
      "step": 1630040
    },
    {
      "epoch": 2.6676289415630747,
      "grad_norm": 0.402606338262558,
      "learning_rate": 4.69547913523059e-06,
      "loss": 0.0126,
      "step": 1630060
    },
    {
      "epoch": 2.6676616720017283,
      "grad_norm": 2.2348835468292236,
      "learning_rate": 4.695413243017073e-06,
      "loss": 0.0138,
      "step": 1630080
    },
    {
      "epoch": 2.6676944024403815,
      "grad_norm": 0.11440353095531464,
      "learning_rate": 4.695347350803555e-06,
      "loss": 0.0112,
      "step": 1630100
    },
    {
      "epoch": 2.667727132879035,
      "grad_norm": 0.16472598910331726,
      "learning_rate": 4.695281458590039e-06,
      "loss": 0.0116,
      "step": 1630120
    },
    {
      "epoch": 2.667759863317688,
      "grad_norm": 0.468728631734848,
      "learning_rate": 4.695215566376522e-06,
      "loss": 0.0144,
      "step": 1630140
    },
    {
      "epoch": 2.6677925937563414,
      "grad_norm": 0.24863599240779877,
      "learning_rate": 4.6951496741630044e-06,
      "loss": 0.0099,
      "step": 1630160
    },
    {
      "epoch": 2.667825324194995,
      "grad_norm": 0.3211714029312134,
      "learning_rate": 4.695083781949488e-06,
      "loss": 0.0166,
      "step": 1630180
    },
    {
      "epoch": 2.667858054633648,
      "grad_norm": 0.21070879697799683,
      "learning_rate": 4.695017889735971e-06,
      "loss": 0.0154,
      "step": 1630200
    },
    {
      "epoch": 2.6678907850723013,
      "grad_norm": 0.4062575399875641,
      "learning_rate": 4.6949519975224535e-06,
      "loss": 0.0129,
      "step": 1630220
    },
    {
      "epoch": 2.667923515510955,
      "grad_norm": 0.3498236835002899,
      "learning_rate": 4.694886105308936e-06,
      "loss": 0.0136,
      "step": 1630240
    },
    {
      "epoch": 2.6679562459496085,
      "grad_norm": 0.2302248775959015,
      "learning_rate": 4.694820213095419e-06,
      "loss": 0.0105,
      "step": 1630260
    },
    {
      "epoch": 2.6679889763882616,
      "grad_norm": 0.30315807461738586,
      "learning_rate": 4.694754320881902e-06,
      "loss": 0.0092,
      "step": 1630280
    },
    {
      "epoch": 2.6680217068269148,
      "grad_norm": 0.5396225452423096,
      "learning_rate": 4.6946884286683845e-06,
      "loss": 0.0141,
      "step": 1630300
    },
    {
      "epoch": 2.6680544372655683,
      "grad_norm": 0.2101597636938095,
      "learning_rate": 4.694622536454867e-06,
      "loss": 0.0136,
      "step": 1630320
    },
    {
      "epoch": 2.6680871677042215,
      "grad_norm": 0.09389200806617737,
      "learning_rate": 4.694556644241351e-06,
      "loss": 0.0124,
      "step": 1630340
    },
    {
      "epoch": 2.6681198981428746,
      "grad_norm": 0.41591617465019226,
      "learning_rate": 4.6944907520278335e-06,
      "loss": 0.013,
      "step": 1630360
    },
    {
      "epoch": 2.6681526285815282,
      "grad_norm": 0.09352526813745499,
      "learning_rate": 4.694424859814316e-06,
      "loss": 0.0116,
      "step": 1630380
    },
    {
      "epoch": 2.668185359020182,
      "grad_norm": 0.2830057442188263,
      "learning_rate": 4.694358967600799e-06,
      "loss": 0.0112,
      "step": 1630400
    },
    {
      "epoch": 2.668218089458835,
      "grad_norm": 0.3674848675727844,
      "learning_rate": 4.694293075387282e-06,
      "loss": 0.0136,
      "step": 1630420
    },
    {
      "epoch": 2.668250819897488,
      "grad_norm": 0.18461354076862335,
      "learning_rate": 4.6942271831737645e-06,
      "loss": 0.0123,
      "step": 1630440
    },
    {
      "epoch": 2.6682835503361417,
      "grad_norm": 0.49598753452301025,
      "learning_rate": 4.694161290960247e-06,
      "loss": 0.0202,
      "step": 1630460
    },
    {
      "epoch": 2.668316280774795,
      "grad_norm": 1.1422003507614136,
      "learning_rate": 4.69409539874673e-06,
      "loss": 0.0155,
      "step": 1630480
    },
    {
      "epoch": 2.668349011213448,
      "grad_norm": 0.5710375308990479,
      "learning_rate": 4.694029506533213e-06,
      "loss": 0.0184,
      "step": 1630500
    },
    {
      "epoch": 2.6683817416521016,
      "grad_norm": 0.17342495918273926,
      "learning_rate": 4.693963614319696e-06,
      "loss": 0.012,
      "step": 1630520
    },
    {
      "epoch": 2.668414472090755,
      "grad_norm": 0.4382465183734894,
      "learning_rate": 4.693897722106179e-06,
      "loss": 0.0168,
      "step": 1630540
    },
    {
      "epoch": 2.6684472025294084,
      "grad_norm": 0.3406457304954529,
      "learning_rate": 4.693831829892662e-06,
      "loss": 0.016,
      "step": 1630560
    },
    {
      "epoch": 2.6684799329680615,
      "grad_norm": 0.6633822321891785,
      "learning_rate": 4.693765937679145e-06,
      "loss": 0.0142,
      "step": 1630580
    },
    {
      "epoch": 2.668512663406715,
      "grad_norm": 0.4093839228153229,
      "learning_rate": 4.693700045465628e-06,
      "loss": 0.0111,
      "step": 1630600
    },
    {
      "epoch": 2.6685453938453683,
      "grad_norm": 0.22750228643417358,
      "learning_rate": 4.693634153252111e-06,
      "loss": 0.0158,
      "step": 1630620
    },
    {
      "epoch": 2.6685781242840214,
      "grad_norm": 0.10303327441215515,
      "learning_rate": 4.693568261038594e-06,
      "loss": 0.0204,
      "step": 1630640
    },
    {
      "epoch": 2.668610854722675,
      "grad_norm": 0.1634361892938614,
      "learning_rate": 4.693502368825076e-06,
      "loss": 0.0163,
      "step": 1630660
    },
    {
      "epoch": 2.6686435851613286,
      "grad_norm": 0.19344618916511536,
      "learning_rate": 4.693436476611559e-06,
      "loss": 0.0141,
      "step": 1630680
    },
    {
      "epoch": 2.6686763155999818,
      "grad_norm": 0.14153021574020386,
      "learning_rate": 4.693370584398042e-06,
      "loss": 0.0117,
      "step": 1630700
    },
    {
      "epoch": 2.668709046038635,
      "grad_norm": 0.12856492400169373,
      "learning_rate": 4.6933046921845245e-06,
      "loss": 0.0124,
      "step": 1630720
    },
    {
      "epoch": 2.6687417764772885,
      "grad_norm": 0.4901615083217621,
      "learning_rate": 4.693238799971008e-06,
      "loss": 0.0146,
      "step": 1630740
    },
    {
      "epoch": 2.6687745069159416,
      "grad_norm": 0.22841663658618927,
      "learning_rate": 4.693172907757491e-06,
      "loss": 0.0221,
      "step": 1630760
    },
    {
      "epoch": 2.668807237354595,
      "grad_norm": 0.2395952343940735,
      "learning_rate": 4.693107015543974e-06,
      "loss": 0.013,
      "step": 1630780
    },
    {
      "epoch": 2.6688399677932484,
      "grad_norm": 0.09251344949007034,
      "learning_rate": 4.693041123330456e-06,
      "loss": 0.011,
      "step": 1630800
    },
    {
      "epoch": 2.6688726982319015,
      "grad_norm": 0.2911260724067688,
      "learning_rate": 4.692975231116939e-06,
      "loss": 0.0101,
      "step": 1630820
    },
    {
      "epoch": 2.668905428670555,
      "grad_norm": 0.11469918489456177,
      "learning_rate": 4.692909338903422e-06,
      "loss": 0.02,
      "step": 1630840
    },
    {
      "epoch": 2.6689381591092083,
      "grad_norm": 0.2144397795200348,
      "learning_rate": 4.6928434466899046e-06,
      "loss": 0.0163,
      "step": 1630860
    },
    {
      "epoch": 2.668970889547862,
      "grad_norm": 1.1313668489456177,
      "learning_rate": 4.692777554476388e-06,
      "loss": 0.0122,
      "step": 1630880
    },
    {
      "epoch": 2.669003619986515,
      "grad_norm": 1.1626091003417969,
      "learning_rate": 4.692711662262871e-06,
      "loss": 0.0263,
      "step": 1630900
    },
    {
      "epoch": 2.669036350425168,
      "grad_norm": 0.3789726197719574,
      "learning_rate": 4.692645770049354e-06,
      "loss": 0.0115,
      "step": 1630920
    },
    {
      "epoch": 2.6690690808638218,
      "grad_norm": 0.11212177574634552,
      "learning_rate": 4.692579877835836e-06,
      "loss": 0.0096,
      "step": 1630940
    },
    {
      "epoch": 2.669101811302475,
      "grad_norm": 0.1622639298439026,
      "learning_rate": 4.692513985622319e-06,
      "loss": 0.0134,
      "step": 1630960
    },
    {
      "epoch": 2.6691345417411285,
      "grad_norm": 0.7430932521820068,
      "learning_rate": 4.692448093408803e-06,
      "loss": 0.013,
      "step": 1630980
    },
    {
      "epoch": 2.6691672721797817,
      "grad_norm": 0.2523149847984314,
      "learning_rate": 4.6923822011952854e-06,
      "loss": 0.0092,
      "step": 1631000
    },
    {
      "epoch": 2.6692000026184353,
      "grad_norm": 0.5234122276306152,
      "learning_rate": 4.692316308981768e-06,
      "loss": 0.014,
      "step": 1631020
    },
    {
      "epoch": 2.6692327330570884,
      "grad_norm": 0.2994774281978607,
      "learning_rate": 4.692250416768251e-06,
      "loss": 0.0122,
      "step": 1631040
    },
    {
      "epoch": 2.6692654634957416,
      "grad_norm": 0.6836793422698975,
      "learning_rate": 4.692184524554734e-06,
      "loss": 0.0114,
      "step": 1631060
    },
    {
      "epoch": 2.669298193934395,
      "grad_norm": 0.8422186970710754,
      "learning_rate": 4.692118632341216e-06,
      "loss": 0.0107,
      "step": 1631080
    },
    {
      "epoch": 2.6693309243730483,
      "grad_norm": 0.41804251074790955,
      "learning_rate": 4.692052740127699e-06,
      "loss": 0.0162,
      "step": 1631100
    },
    {
      "epoch": 2.669363654811702,
      "grad_norm": 0.148234561085701,
      "learning_rate": 4.691986847914182e-06,
      "loss": 0.0169,
      "step": 1631120
    },
    {
      "epoch": 2.669396385250355,
      "grad_norm": 0.7428748607635498,
      "learning_rate": 4.6919209557006655e-06,
      "loss": 0.0108,
      "step": 1631140
    },
    {
      "epoch": 2.6694291156890086,
      "grad_norm": 0.40966281294822693,
      "learning_rate": 4.691855063487148e-06,
      "loss": 0.0165,
      "step": 1631160
    },
    {
      "epoch": 2.669461846127662,
      "grad_norm": 0.30822035670280457,
      "learning_rate": 4.691789171273631e-06,
      "loss": 0.017,
      "step": 1631180
    },
    {
      "epoch": 2.669494576566315,
      "grad_norm": 0.1384766399860382,
      "learning_rate": 4.691723279060114e-06,
      "loss": 0.0099,
      "step": 1631200
    },
    {
      "epoch": 2.6695273070049685,
      "grad_norm": 0.3437965214252472,
      "learning_rate": 4.691657386846597e-06,
      "loss": 0.0136,
      "step": 1631220
    },
    {
      "epoch": 2.6695600374436217,
      "grad_norm": 0.3314148187637329,
      "learning_rate": 4.69159149463308e-06,
      "loss": 0.0092,
      "step": 1631240
    },
    {
      "epoch": 2.6695927678822753,
      "grad_norm": 1.3234952688217163,
      "learning_rate": 4.691525602419563e-06,
      "loss": 0.0162,
      "step": 1631260
    },
    {
      "epoch": 2.6696254983209284,
      "grad_norm": 0.25868603587150574,
      "learning_rate": 4.6914597102060455e-06,
      "loss": 0.0211,
      "step": 1631280
    },
    {
      "epoch": 2.669658228759582,
      "grad_norm": 0.7238439321517944,
      "learning_rate": 4.691393817992528e-06,
      "loss": 0.0125,
      "step": 1631300
    },
    {
      "epoch": 2.669690959198235,
      "grad_norm": 0.26834842562675476,
      "learning_rate": 4.691327925779011e-06,
      "loss": 0.0104,
      "step": 1631320
    },
    {
      "epoch": 2.6697236896368883,
      "grad_norm": 0.06152752786874771,
      "learning_rate": 4.691262033565494e-06,
      "loss": 0.0101,
      "step": 1631340
    },
    {
      "epoch": 2.669756420075542,
      "grad_norm": 0.14176739752292633,
      "learning_rate": 4.691196141351977e-06,
      "loss": 0.016,
      "step": 1631360
    },
    {
      "epoch": 2.669789150514195,
      "grad_norm": 0.2627580165863037,
      "learning_rate": 4.69113024913846e-06,
      "loss": 0.013,
      "step": 1631380
    },
    {
      "epoch": 2.6698218809528487,
      "grad_norm": 0.6918144226074219,
      "learning_rate": 4.691064356924943e-06,
      "loss": 0.0163,
      "step": 1631400
    },
    {
      "epoch": 2.669854611391502,
      "grad_norm": 0.12136854976415634,
      "learning_rate": 4.6909984647114255e-06,
      "loss": 0.0157,
      "step": 1631420
    },
    {
      "epoch": 2.6698873418301554,
      "grad_norm": 0.37852030992507935,
      "learning_rate": 4.690932572497908e-06,
      "loss": 0.011,
      "step": 1631440
    },
    {
      "epoch": 2.6699200722688086,
      "grad_norm": 0.27492591738700867,
      "learning_rate": 4.690866680284391e-06,
      "loss": 0.0136,
      "step": 1631460
    },
    {
      "epoch": 2.6699528027074617,
      "grad_norm": 0.5337488651275635,
      "learning_rate": 4.690800788070874e-06,
      "loss": 0.0155,
      "step": 1631480
    },
    {
      "epoch": 2.6699855331461153,
      "grad_norm": 0.36182868480682373,
      "learning_rate": 4.6907348958573565e-06,
      "loss": 0.0194,
      "step": 1631500
    },
    {
      "epoch": 2.6700182635847685,
      "grad_norm": 0.261400043964386,
      "learning_rate": 4.690669003643839e-06,
      "loss": 0.0114,
      "step": 1631520
    },
    {
      "epoch": 2.670050994023422,
      "grad_norm": 0.03530198708176613,
      "learning_rate": 4.690603111430323e-06,
      "loss": 0.0099,
      "step": 1631540
    },
    {
      "epoch": 2.670083724462075,
      "grad_norm": 0.09855254739522934,
      "learning_rate": 4.6905372192168055e-06,
      "loss": 0.0108,
      "step": 1631560
    },
    {
      "epoch": 2.670116454900729,
      "grad_norm": 0.36105525493621826,
      "learning_rate": 4.690471327003288e-06,
      "loss": 0.0157,
      "step": 1631580
    },
    {
      "epoch": 2.670149185339382,
      "grad_norm": 0.4902385473251343,
      "learning_rate": 4.690405434789772e-06,
      "loss": 0.013,
      "step": 1631600
    },
    {
      "epoch": 2.670181915778035,
      "grad_norm": 0.3944501280784607,
      "learning_rate": 4.690339542576255e-06,
      "loss": 0.0079,
      "step": 1631620
    },
    {
      "epoch": 2.6702146462166887,
      "grad_norm": 0.20172135531902313,
      "learning_rate": 4.690273650362737e-06,
      "loss": 0.0112,
      "step": 1631640
    },
    {
      "epoch": 2.670247376655342,
      "grad_norm": 0.3444734215736389,
      "learning_rate": 4.69020775814922e-06,
      "loss": 0.0131,
      "step": 1631660
    },
    {
      "epoch": 2.6702801070939954,
      "grad_norm": 0.5156239867210388,
      "learning_rate": 4.690141865935703e-06,
      "loss": 0.0113,
      "step": 1631680
    },
    {
      "epoch": 2.6703128375326486,
      "grad_norm": 0.17463459074497223,
      "learning_rate": 4.6900759737221856e-06,
      "loss": 0.0115,
      "step": 1631700
    },
    {
      "epoch": 2.670345567971302,
      "grad_norm": 0.18582475185394287,
      "learning_rate": 4.690010081508668e-06,
      "loss": 0.0128,
      "step": 1631720
    },
    {
      "epoch": 2.6703782984099553,
      "grad_norm": 0.17733849585056305,
      "learning_rate": 4.689944189295151e-06,
      "loss": 0.016,
      "step": 1631740
    },
    {
      "epoch": 2.6704110288486085,
      "grad_norm": 0.2733035683631897,
      "learning_rate": 4.689878297081635e-06,
      "loss": 0.0227,
      "step": 1631760
    },
    {
      "epoch": 2.670443759287262,
      "grad_norm": 0.18355029821395874,
      "learning_rate": 4.689812404868117e-06,
      "loss": 0.0133,
      "step": 1631780
    },
    {
      "epoch": 2.6704764897259152,
      "grad_norm": 0.17536653578281403,
      "learning_rate": 4.6897465126546e-06,
      "loss": 0.0222,
      "step": 1631800
    },
    {
      "epoch": 2.6705092201645684,
      "grad_norm": 1.2631772756576538,
      "learning_rate": 4.689680620441083e-06,
      "loss": 0.0149,
      "step": 1631820
    },
    {
      "epoch": 2.670541950603222,
      "grad_norm": 0.5627340078353882,
      "learning_rate": 4.689614728227566e-06,
      "loss": 0.016,
      "step": 1631840
    },
    {
      "epoch": 2.6705746810418756,
      "grad_norm": 0.1806969791650772,
      "learning_rate": 4.689548836014048e-06,
      "loss": 0.0148,
      "step": 1631860
    },
    {
      "epoch": 2.6706074114805287,
      "grad_norm": 0.44129204750061035,
      "learning_rate": 4.689482943800531e-06,
      "loss": 0.0136,
      "step": 1631880
    },
    {
      "epoch": 2.670640141919182,
      "grad_norm": 0.18347053229808807,
      "learning_rate": 4.689417051587014e-06,
      "loss": 0.0152,
      "step": 1631900
    },
    {
      "epoch": 2.6706728723578355,
      "grad_norm": 0.17223088443279266,
      "learning_rate": 4.6893511593734965e-06,
      "loss": 0.0132,
      "step": 1631920
    },
    {
      "epoch": 2.6707056027964886,
      "grad_norm": 0.14731381833553314,
      "learning_rate": 4.68928526715998e-06,
      "loss": 0.0139,
      "step": 1631940
    },
    {
      "epoch": 2.6707383332351418,
      "grad_norm": 3.5148448944091797,
      "learning_rate": 4.689219374946463e-06,
      "loss": 0.013,
      "step": 1631960
    },
    {
      "epoch": 2.6707710636737954,
      "grad_norm": 0.45694467425346375,
      "learning_rate": 4.689153482732946e-06,
      "loss": 0.0156,
      "step": 1631980
    },
    {
      "epoch": 2.670803794112449,
      "grad_norm": 0.8122085332870483,
      "learning_rate": 4.689087590519429e-06,
      "loss": 0.0153,
      "step": 1632000
    },
    {
      "epoch": 2.670836524551102,
      "grad_norm": 0.2600952386856079,
      "learning_rate": 4.689021698305912e-06,
      "loss": 0.0097,
      "step": 1632020
    },
    {
      "epoch": 2.6708692549897552,
      "grad_norm": 0.22166113555431366,
      "learning_rate": 4.688955806092395e-06,
      "loss": 0.0131,
      "step": 1632040
    },
    {
      "epoch": 2.670901985428409,
      "grad_norm": 0.2660854756832123,
      "learning_rate": 4.688889913878877e-06,
      "loss": 0.0106,
      "step": 1632060
    },
    {
      "epoch": 2.670934715867062,
      "grad_norm": 0.8171680569648743,
      "learning_rate": 4.68882402166536e-06,
      "loss": 0.0146,
      "step": 1632080
    },
    {
      "epoch": 2.670967446305715,
      "grad_norm": 0.12964238226413727,
      "learning_rate": 4.688758129451843e-06,
      "loss": 0.0123,
      "step": 1632100
    },
    {
      "epoch": 2.6710001767443687,
      "grad_norm": 0.17490103840827942,
      "learning_rate": 4.688692237238326e-06,
      "loss": 0.0119,
      "step": 1632120
    },
    {
      "epoch": 2.6710329071830223,
      "grad_norm": 1.424978256225586,
      "learning_rate": 4.688626345024808e-06,
      "loss": 0.01,
      "step": 1632140
    },
    {
      "epoch": 2.6710656376216755,
      "grad_norm": 0.19553957879543304,
      "learning_rate": 4.688560452811292e-06,
      "loss": 0.0117,
      "step": 1632160
    },
    {
      "epoch": 2.6710983680603286,
      "grad_norm": 1.1019679307937622,
      "learning_rate": 4.688494560597775e-06,
      "loss": 0.0133,
      "step": 1632180
    },
    {
      "epoch": 2.6711310984989822,
      "grad_norm": 0.41196244955062866,
      "learning_rate": 4.6884286683842574e-06,
      "loss": 0.0252,
      "step": 1632200
    },
    {
      "epoch": 2.6711638289376354,
      "grad_norm": 0.7455911636352539,
      "learning_rate": 4.68836277617074e-06,
      "loss": 0.0146,
      "step": 1632220
    },
    {
      "epoch": 2.6711965593762885,
      "grad_norm": 0.40933647751808167,
      "learning_rate": 4.688296883957223e-06,
      "loss": 0.0152,
      "step": 1632240
    },
    {
      "epoch": 2.671229289814942,
      "grad_norm": 0.5390161275863647,
      "learning_rate": 4.688230991743706e-06,
      "loss": 0.018,
      "step": 1632260
    },
    {
      "epoch": 2.6712620202535957,
      "grad_norm": 0.9425789713859558,
      "learning_rate": 4.688165099530189e-06,
      "loss": 0.0101,
      "step": 1632280
    },
    {
      "epoch": 2.671294750692249,
      "grad_norm": 0.43084198236465454,
      "learning_rate": 4.688099207316672e-06,
      "loss": 0.0158,
      "step": 1632300
    },
    {
      "epoch": 2.671327481130902,
      "grad_norm": 0.3490740656852722,
      "learning_rate": 4.688033315103155e-06,
      "loss": 0.0104,
      "step": 1632320
    },
    {
      "epoch": 2.6713602115695556,
      "grad_norm": 0.2129039466381073,
      "learning_rate": 4.6879674228896375e-06,
      "loss": 0.0156,
      "step": 1632340
    },
    {
      "epoch": 2.6713929420082088,
      "grad_norm": 0.22470209002494812,
      "learning_rate": 4.68790153067612e-06,
      "loss": 0.0162,
      "step": 1632360
    },
    {
      "epoch": 2.671425672446862,
      "grad_norm": 0.31065043807029724,
      "learning_rate": 4.687835638462603e-06,
      "loss": 0.0138,
      "step": 1632380
    },
    {
      "epoch": 2.6714584028855155,
      "grad_norm": 0.4130021035671234,
      "learning_rate": 4.6877697462490865e-06,
      "loss": 0.0216,
      "step": 1632400
    },
    {
      "epoch": 2.6714911333241687,
      "grad_norm": 0.1394382119178772,
      "learning_rate": 4.687703854035569e-06,
      "loss": 0.0089,
      "step": 1632420
    },
    {
      "epoch": 2.6715238637628222,
      "grad_norm": 0.2703545391559601,
      "learning_rate": 4.687637961822052e-06,
      "loss": 0.0146,
      "step": 1632440
    },
    {
      "epoch": 2.6715565942014754,
      "grad_norm": 0.41980376839637756,
      "learning_rate": 4.687572069608535e-06,
      "loss": 0.0132,
      "step": 1632460
    },
    {
      "epoch": 2.671589324640129,
      "grad_norm": 0.4345622658729553,
      "learning_rate": 4.6875061773950175e-06,
      "loss": 0.0171,
      "step": 1632480
    },
    {
      "epoch": 2.671622055078782,
      "grad_norm": 0.16845601797103882,
      "learning_rate": 4.6874402851815e-06,
      "loss": 0.0129,
      "step": 1632500
    },
    {
      "epoch": 2.6716547855174353,
      "grad_norm": 0.8582788705825806,
      "learning_rate": 4.687374392967983e-06,
      "loss": 0.0122,
      "step": 1632520
    },
    {
      "epoch": 2.671687515956089,
      "grad_norm": 0.9421804547309875,
      "learning_rate": 4.687308500754466e-06,
      "loss": 0.0095,
      "step": 1632540
    },
    {
      "epoch": 2.671720246394742,
      "grad_norm": 0.12115004658699036,
      "learning_rate": 4.687242608540949e-06,
      "loss": 0.0111,
      "step": 1632560
    },
    {
      "epoch": 2.6717529768333956,
      "grad_norm": 0.2488846480846405,
      "learning_rate": 4.687176716327432e-06,
      "loss": 0.0144,
      "step": 1632580
    },
    {
      "epoch": 2.671785707272049,
      "grad_norm": 0.17839516699314117,
      "learning_rate": 4.687110824113915e-06,
      "loss": 0.0165,
      "step": 1632600
    },
    {
      "epoch": 2.6718184377107024,
      "grad_norm": 0.12585438787937164,
      "learning_rate": 4.6870449319003975e-06,
      "loss": 0.0125,
      "step": 1632620
    },
    {
      "epoch": 2.6718511681493555,
      "grad_norm": 0.204735666513443,
      "learning_rate": 4.686979039686881e-06,
      "loss": 0.0196,
      "step": 1632640
    },
    {
      "epoch": 2.6718838985880087,
      "grad_norm": 0.9687901735305786,
      "learning_rate": 4.686913147473364e-06,
      "loss": 0.0106,
      "step": 1632660
    },
    {
      "epoch": 2.6719166290266623,
      "grad_norm": 0.2854108512401581,
      "learning_rate": 4.686847255259847e-06,
      "loss": 0.0161,
      "step": 1632680
    },
    {
      "epoch": 2.6719493594653154,
      "grad_norm": 0.6115687489509583,
      "learning_rate": 4.686781363046329e-06,
      "loss": 0.013,
      "step": 1632700
    },
    {
      "epoch": 2.671982089903969,
      "grad_norm": 0.21070539951324463,
      "learning_rate": 4.686715470832812e-06,
      "loss": 0.0153,
      "step": 1632720
    },
    {
      "epoch": 2.672014820342622,
      "grad_norm": 0.5744642615318298,
      "learning_rate": 4.686649578619295e-06,
      "loss": 0.0171,
      "step": 1632740
    },
    {
      "epoch": 2.6720475507812758,
      "grad_norm": 0.16690942645072937,
      "learning_rate": 4.6865836864057775e-06,
      "loss": 0.0158,
      "step": 1632760
    },
    {
      "epoch": 2.672080281219929,
      "grad_norm": 0.8456787467002869,
      "learning_rate": 4.686517794192261e-06,
      "loss": 0.0157,
      "step": 1632780
    },
    {
      "epoch": 2.672113011658582,
      "grad_norm": 0.37577351927757263,
      "learning_rate": 4.686451901978744e-06,
      "loss": 0.0116,
      "step": 1632800
    },
    {
      "epoch": 2.6721457420972357,
      "grad_norm": 0.19292192161083221,
      "learning_rate": 4.686386009765227e-06,
      "loss": 0.027,
      "step": 1632820
    },
    {
      "epoch": 2.672178472535889,
      "grad_norm": 0.5587310194969177,
      "learning_rate": 4.686320117551709e-06,
      "loss": 0.0112,
      "step": 1632840
    },
    {
      "epoch": 2.6722112029745424,
      "grad_norm": 0.2518562972545624,
      "learning_rate": 4.686254225338192e-06,
      "loss": 0.0128,
      "step": 1632860
    },
    {
      "epoch": 2.6722439334131955,
      "grad_norm": 1.7270116806030273,
      "learning_rate": 4.686188333124675e-06,
      "loss": 0.0197,
      "step": 1632880
    },
    {
      "epoch": 2.672276663851849,
      "grad_norm": 0.2262253314256668,
      "learning_rate": 4.6861224409111576e-06,
      "loss": 0.0106,
      "step": 1632900
    },
    {
      "epoch": 2.6723093942905023,
      "grad_norm": 0.2878126800060272,
      "learning_rate": 4.68605654869764e-06,
      "loss": 0.0147,
      "step": 1632920
    },
    {
      "epoch": 2.6723421247291554,
      "grad_norm": 1.7900274991989136,
      "learning_rate": 4.685990656484123e-06,
      "loss": 0.0162,
      "step": 1632940
    },
    {
      "epoch": 2.672374855167809,
      "grad_norm": 0.27842968702316284,
      "learning_rate": 4.685924764270607e-06,
      "loss": 0.0153,
      "step": 1632960
    },
    {
      "epoch": 2.672407585606462,
      "grad_norm": 0.2870737612247467,
      "learning_rate": 4.685858872057089e-06,
      "loss": 0.013,
      "step": 1632980
    },
    {
      "epoch": 2.672440316045116,
      "grad_norm": 0.17330265045166016,
      "learning_rate": 4.685792979843572e-06,
      "loss": 0.009,
      "step": 1633000
    },
    {
      "epoch": 2.672473046483769,
      "grad_norm": 0.24526920914649963,
      "learning_rate": 4.685727087630056e-06,
      "loss": 0.0156,
      "step": 1633020
    },
    {
      "epoch": 2.6725057769224225,
      "grad_norm": 0.11456657946109772,
      "learning_rate": 4.6856611954165384e-06,
      "loss": 0.0165,
      "step": 1633040
    },
    {
      "epoch": 2.6725385073610757,
      "grad_norm": 0.3332350552082062,
      "learning_rate": 4.685595303203021e-06,
      "loss": 0.0133,
      "step": 1633060
    },
    {
      "epoch": 2.672571237799729,
      "grad_norm": 0.41911840438842773,
      "learning_rate": 4.685529410989504e-06,
      "loss": 0.0115,
      "step": 1633080
    },
    {
      "epoch": 2.6726039682383824,
      "grad_norm": 0.5281231999397278,
      "learning_rate": 4.685463518775987e-06,
      "loss": 0.0132,
      "step": 1633100
    },
    {
      "epoch": 2.6726366986770356,
      "grad_norm": 0.399321973323822,
      "learning_rate": 4.685397626562469e-06,
      "loss": 0.0105,
      "step": 1633120
    },
    {
      "epoch": 2.672669429115689,
      "grad_norm": 0.10647734999656677,
      "learning_rate": 4.685331734348952e-06,
      "loss": 0.0103,
      "step": 1633140
    },
    {
      "epoch": 2.6727021595543423,
      "grad_norm": 2.6135921478271484,
      "learning_rate": 4.685265842135435e-06,
      "loss": 0.0105,
      "step": 1633160
    },
    {
      "epoch": 2.672734889992996,
      "grad_norm": 0.8928409218788147,
      "learning_rate": 4.6851999499219185e-06,
      "loss": 0.0172,
      "step": 1633180
    },
    {
      "epoch": 2.672767620431649,
      "grad_norm": 0.2257930189371109,
      "learning_rate": 4.685134057708401e-06,
      "loss": 0.0143,
      "step": 1633200
    },
    {
      "epoch": 2.672800350870302,
      "grad_norm": 0.5249489545822144,
      "learning_rate": 4.685068165494884e-06,
      "loss": 0.011,
      "step": 1633220
    },
    {
      "epoch": 2.672833081308956,
      "grad_norm": 1.0497798919677734,
      "learning_rate": 4.685002273281367e-06,
      "loss": 0.0149,
      "step": 1633240
    },
    {
      "epoch": 2.672865811747609,
      "grad_norm": 0.5508214235305786,
      "learning_rate": 4.6849363810678494e-06,
      "loss": 0.012,
      "step": 1633260
    },
    {
      "epoch": 2.672898542186262,
      "grad_norm": 0.2852727174758911,
      "learning_rate": 4.684870488854332e-06,
      "loss": 0.0172,
      "step": 1633280
    },
    {
      "epoch": 2.6729312726249157,
      "grad_norm": 0.29568222165107727,
      "learning_rate": 4.684804596640815e-06,
      "loss": 0.0119,
      "step": 1633300
    },
    {
      "epoch": 2.6729640030635693,
      "grad_norm": 0.19055791199207306,
      "learning_rate": 4.684738704427298e-06,
      "loss": 0.0116,
      "step": 1633320
    },
    {
      "epoch": 2.6729967335022224,
      "grad_norm": 0.31885382533073425,
      "learning_rate": 4.684672812213781e-06,
      "loss": 0.0142,
      "step": 1633340
    },
    {
      "epoch": 2.6730294639408756,
      "grad_norm": 0.36428582668304443,
      "learning_rate": 4.684606920000264e-06,
      "loss": 0.0137,
      "step": 1633360
    },
    {
      "epoch": 2.673062194379529,
      "grad_norm": 0.5299010872840881,
      "learning_rate": 4.684541027786747e-06,
      "loss": 0.0113,
      "step": 1633380
    },
    {
      "epoch": 2.6730949248181823,
      "grad_norm": 0.8599087595939636,
      "learning_rate": 4.6844751355732295e-06,
      "loss": 0.0132,
      "step": 1633400
    },
    {
      "epoch": 2.6731276552568355,
      "grad_norm": 0.572982132434845,
      "learning_rate": 4.684409243359713e-06,
      "loss": 0.0197,
      "step": 1633420
    },
    {
      "epoch": 2.673160385695489,
      "grad_norm": 0.22716161608695984,
      "learning_rate": 4.684343351146196e-06,
      "loss": 0.0126,
      "step": 1633440
    },
    {
      "epoch": 2.6731931161341427,
      "grad_norm": 0.41451114416122437,
      "learning_rate": 4.6842774589326785e-06,
      "loss": 0.0158,
      "step": 1633460
    },
    {
      "epoch": 2.673225846572796,
      "grad_norm": 0.28438636660575867,
      "learning_rate": 4.684211566719161e-06,
      "loss": 0.0174,
      "step": 1633480
    },
    {
      "epoch": 2.673258577011449,
      "grad_norm": 0.3199647068977356,
      "learning_rate": 4.684145674505644e-06,
      "loss": 0.0136,
      "step": 1633500
    },
    {
      "epoch": 2.6732913074501026,
      "grad_norm": 0.20434705913066864,
      "learning_rate": 4.684079782292127e-06,
      "loss": 0.0129,
      "step": 1633520
    },
    {
      "epoch": 2.6733240378887557,
      "grad_norm": 0.6200129389762878,
      "learning_rate": 4.6840138900786095e-06,
      "loss": 0.0141,
      "step": 1633540
    },
    {
      "epoch": 2.673356768327409,
      "grad_norm": 0.20997865498065948,
      "learning_rate": 4.683947997865092e-06,
      "loss": 0.0088,
      "step": 1633560
    },
    {
      "epoch": 2.6733894987660625,
      "grad_norm": 0.22785867750644684,
      "learning_rate": 4.683882105651576e-06,
      "loss": 0.0118,
      "step": 1633580
    },
    {
      "epoch": 2.673422229204716,
      "grad_norm": 0.3178580105304718,
      "learning_rate": 4.6838162134380585e-06,
      "loss": 0.011,
      "step": 1633600
    },
    {
      "epoch": 2.673454959643369,
      "grad_norm": 0.31799083948135376,
      "learning_rate": 4.683750321224541e-06,
      "loss": 0.0101,
      "step": 1633620
    },
    {
      "epoch": 2.6734876900820224,
      "grad_norm": 1.4785513877868652,
      "learning_rate": 4.683684429011024e-06,
      "loss": 0.0157,
      "step": 1633640
    },
    {
      "epoch": 2.673520420520676,
      "grad_norm": 0.2795815169811249,
      "learning_rate": 4.683618536797507e-06,
      "loss": 0.0192,
      "step": 1633660
    },
    {
      "epoch": 2.673553150959329,
      "grad_norm": 0.15844924747943878,
      "learning_rate": 4.6835526445839895e-06,
      "loss": 0.0143,
      "step": 1633680
    },
    {
      "epoch": 2.6735858813979823,
      "grad_norm": 0.06649971008300781,
      "learning_rate": 4.683486752370473e-06,
      "loss": 0.015,
      "step": 1633700
    },
    {
      "epoch": 2.673618611836636,
      "grad_norm": 0.18320472538471222,
      "learning_rate": 4.683420860156956e-06,
      "loss": 0.0099,
      "step": 1633720
    },
    {
      "epoch": 2.6736513422752894,
      "grad_norm": 0.22421088814735413,
      "learning_rate": 4.6833549679434386e-06,
      "loss": 0.0163,
      "step": 1633740
    },
    {
      "epoch": 2.6736840727139426,
      "grad_norm": 0.7904382348060608,
      "learning_rate": 4.683289075729921e-06,
      "loss": 0.0145,
      "step": 1633760
    },
    {
      "epoch": 2.6737168031525957,
      "grad_norm": 0.1622549444437027,
      "learning_rate": 4.683223183516404e-06,
      "loss": 0.017,
      "step": 1633780
    },
    {
      "epoch": 2.6737495335912493,
      "grad_norm": 0.2741840183734894,
      "learning_rate": 4.683157291302887e-06,
      "loss": 0.0101,
      "step": 1633800
    },
    {
      "epoch": 2.6737822640299025,
      "grad_norm": 0.5260310173034668,
      "learning_rate": 4.68309139908937e-06,
      "loss": 0.0101,
      "step": 1633820
    },
    {
      "epoch": 2.6738149944685556,
      "grad_norm": 0.18113374710083008,
      "learning_rate": 4.683025506875853e-06,
      "loss": 0.0131,
      "step": 1633840
    },
    {
      "epoch": 2.6738477249072092,
      "grad_norm": 0.8422613739967346,
      "learning_rate": 4.682959614662336e-06,
      "loss": 0.0169,
      "step": 1633860
    },
    {
      "epoch": 2.6738804553458624,
      "grad_norm": 0.19167782366275787,
      "learning_rate": 4.682893722448819e-06,
      "loss": 0.0217,
      "step": 1633880
    },
    {
      "epoch": 2.673913185784516,
      "grad_norm": 0.14503517746925354,
      "learning_rate": 4.682827830235301e-06,
      "loss": 0.012,
      "step": 1633900
    },
    {
      "epoch": 2.673945916223169,
      "grad_norm": 0.6232592463493347,
      "learning_rate": 4.682761938021784e-06,
      "loss": 0.0119,
      "step": 1633920
    },
    {
      "epoch": 2.6739786466618227,
      "grad_norm": 0.17749451100826263,
      "learning_rate": 4.682696045808267e-06,
      "loss": 0.0124,
      "step": 1633940
    },
    {
      "epoch": 2.674011377100476,
      "grad_norm": 0.23350271582603455,
      "learning_rate": 4.6826301535947496e-06,
      "loss": 0.0143,
      "step": 1633960
    },
    {
      "epoch": 2.674044107539129,
      "grad_norm": 0.17203401029109955,
      "learning_rate": 4.682564261381233e-06,
      "loss": 0.0149,
      "step": 1633980
    },
    {
      "epoch": 2.6740768379777826,
      "grad_norm": 0.44216498732566833,
      "learning_rate": 4.682498369167716e-06,
      "loss": 0.014,
      "step": 1634000
    },
    {
      "epoch": 2.6741095684164358,
      "grad_norm": 0.2513090670108795,
      "learning_rate": 4.682432476954199e-06,
      "loss": 0.0107,
      "step": 1634020
    },
    {
      "epoch": 2.6741422988550894,
      "grad_norm": 0.1162194088101387,
      "learning_rate": 4.682366584740682e-06,
      "loss": 0.0157,
      "step": 1634040
    },
    {
      "epoch": 2.6741750292937425,
      "grad_norm": 0.5592396855354309,
      "learning_rate": 4.682300692527165e-06,
      "loss": 0.0136,
      "step": 1634060
    },
    {
      "epoch": 2.674207759732396,
      "grad_norm": 0.3482148349285126,
      "learning_rate": 4.682234800313648e-06,
      "loss": 0.0172,
      "step": 1634080
    },
    {
      "epoch": 2.6742404901710493,
      "grad_norm": 0.4852326214313507,
      "learning_rate": 4.6821689081001304e-06,
      "loss": 0.0126,
      "step": 1634100
    },
    {
      "epoch": 2.6742732206097024,
      "grad_norm": 0.31491681933403015,
      "learning_rate": 4.682103015886613e-06,
      "loss": 0.0186,
      "step": 1634120
    },
    {
      "epoch": 2.674305951048356,
      "grad_norm": 0.31501510739326477,
      "learning_rate": 4.682037123673096e-06,
      "loss": 0.0099,
      "step": 1634140
    },
    {
      "epoch": 2.674338681487009,
      "grad_norm": 1.1023014783859253,
      "learning_rate": 4.681971231459579e-06,
      "loss": 0.0137,
      "step": 1634160
    },
    {
      "epoch": 2.6743714119256627,
      "grad_norm": 0.3804495930671692,
      "learning_rate": 4.681905339246061e-06,
      "loss": 0.0124,
      "step": 1634180
    },
    {
      "epoch": 2.674404142364316,
      "grad_norm": 0.2723260521888733,
      "learning_rate": 4.681839447032545e-06,
      "loss": 0.0154,
      "step": 1634200
    },
    {
      "epoch": 2.6744368728029695,
      "grad_norm": 0.4373583495616913,
      "learning_rate": 4.681773554819028e-06,
      "loss": 0.0171,
      "step": 1634220
    },
    {
      "epoch": 2.6744696032416226,
      "grad_norm": 0.9188046455383301,
      "learning_rate": 4.6817076626055105e-06,
      "loss": 0.0205,
      "step": 1634240
    },
    {
      "epoch": 2.674502333680276,
      "grad_norm": 0.2056681364774704,
      "learning_rate": 4.681641770391993e-06,
      "loss": 0.0171,
      "step": 1634260
    },
    {
      "epoch": 2.6745350641189294,
      "grad_norm": 0.15765748918056488,
      "learning_rate": 4.681575878178476e-06,
      "loss": 0.0136,
      "step": 1634280
    },
    {
      "epoch": 2.6745677945575825,
      "grad_norm": 0.14556996524333954,
      "learning_rate": 4.681509985964959e-06,
      "loss": 0.011,
      "step": 1634300
    },
    {
      "epoch": 2.674600524996236,
      "grad_norm": 0.4382568299770355,
      "learning_rate": 4.681444093751441e-06,
      "loss": 0.0166,
      "step": 1634320
    },
    {
      "epoch": 2.6746332554348893,
      "grad_norm": 0.5725775361061096,
      "learning_rate": 4.681378201537924e-06,
      "loss": 0.0139,
      "step": 1634340
    },
    {
      "epoch": 2.674665985873543,
      "grad_norm": 0.09170542657375336,
      "learning_rate": 4.681312309324407e-06,
      "loss": 0.0182,
      "step": 1634360
    },
    {
      "epoch": 2.674698716312196,
      "grad_norm": 0.6325734257698059,
      "learning_rate": 4.6812464171108905e-06,
      "loss": 0.0208,
      "step": 1634380
    },
    {
      "epoch": 2.674731446750849,
      "grad_norm": 0.15351811051368713,
      "learning_rate": 4.681180524897373e-06,
      "loss": 0.0101,
      "step": 1634400
    },
    {
      "epoch": 2.6747641771895028,
      "grad_norm": 0.3383314311504364,
      "learning_rate": 4.681114632683856e-06,
      "loss": 0.0162,
      "step": 1634420
    },
    {
      "epoch": 2.674796907628156,
      "grad_norm": 0.2710385322570801,
      "learning_rate": 4.6810487404703395e-06,
      "loss": 0.0137,
      "step": 1634440
    },
    {
      "epoch": 2.6748296380668095,
      "grad_norm": 0.20894640684127808,
      "learning_rate": 4.680982848256822e-06,
      "loss": 0.012,
      "step": 1634460
    },
    {
      "epoch": 2.6748623685054627,
      "grad_norm": 0.10232581198215485,
      "learning_rate": 4.680916956043305e-06,
      "loss": 0.0107,
      "step": 1634480
    },
    {
      "epoch": 2.6748950989441163,
      "grad_norm": 0.7746186852455139,
      "learning_rate": 4.680851063829788e-06,
      "loss": 0.0137,
      "step": 1634500
    },
    {
      "epoch": 2.6749278293827694,
      "grad_norm": 0.7874884009361267,
      "learning_rate": 4.6807851716162705e-06,
      "loss": 0.0153,
      "step": 1634520
    },
    {
      "epoch": 2.6749605598214226,
      "grad_norm": 0.31869667768478394,
      "learning_rate": 4.680719279402753e-06,
      "loss": 0.0157,
      "step": 1634540
    },
    {
      "epoch": 2.674993290260076,
      "grad_norm": 0.2262798696756363,
      "learning_rate": 4.680653387189236e-06,
      "loss": 0.0125,
      "step": 1634560
    },
    {
      "epoch": 2.6750260206987293,
      "grad_norm": 0.31506699323654175,
      "learning_rate": 4.680587494975719e-06,
      "loss": 0.0117,
      "step": 1634580
    },
    {
      "epoch": 2.675058751137383,
      "grad_norm": 0.10751348733901978,
      "learning_rate": 4.680521602762202e-06,
      "loss": 0.0183,
      "step": 1634600
    },
    {
      "epoch": 2.675091481576036,
      "grad_norm": 0.5104892253875732,
      "learning_rate": 4.680455710548685e-06,
      "loss": 0.0179,
      "step": 1634620
    },
    {
      "epoch": 2.6751242120146896,
      "grad_norm": 0.7432755827903748,
      "learning_rate": 4.680389818335168e-06,
      "loss": 0.0225,
      "step": 1634640
    },
    {
      "epoch": 2.675156942453343,
      "grad_norm": 0.7085313200950623,
      "learning_rate": 4.6803239261216505e-06,
      "loss": 0.016,
      "step": 1634660
    },
    {
      "epoch": 2.675189672891996,
      "grad_norm": 0.07311348617076874,
      "learning_rate": 4.680258033908133e-06,
      "loss": 0.0086,
      "step": 1634680
    },
    {
      "epoch": 2.6752224033306495,
      "grad_norm": 0.16136449575424194,
      "learning_rate": 4.680192141694616e-06,
      "loss": 0.0169,
      "step": 1634700
    },
    {
      "epoch": 2.6752551337693027,
      "grad_norm": 0.12399747222661972,
      "learning_rate": 4.680126249481099e-06,
      "loss": 0.0158,
      "step": 1634720
    },
    {
      "epoch": 2.6752878642079563,
      "grad_norm": 0.2567155659198761,
      "learning_rate": 4.680060357267582e-06,
      "loss": 0.0127,
      "step": 1634740
    },
    {
      "epoch": 2.6753205946466094,
      "grad_norm": 0.26755085587501526,
      "learning_rate": 4.679994465054065e-06,
      "loss": 0.0192,
      "step": 1634760
    },
    {
      "epoch": 2.675353325085263,
      "grad_norm": 0.6720846891403198,
      "learning_rate": 4.679928572840548e-06,
      "loss": 0.0117,
      "step": 1634780
    },
    {
      "epoch": 2.675386055523916,
      "grad_norm": 0.26802852749824524,
      "learning_rate": 4.6798626806270306e-06,
      "loss": 0.0147,
      "step": 1634800
    },
    {
      "epoch": 2.6754187859625693,
      "grad_norm": 0.19866952300071716,
      "learning_rate": 4.679796788413513e-06,
      "loss": 0.0179,
      "step": 1634820
    },
    {
      "epoch": 2.675451516401223,
      "grad_norm": 0.300094872713089,
      "learning_rate": 4.679730896199997e-06,
      "loss": 0.0192,
      "step": 1634840
    },
    {
      "epoch": 2.675484246839876,
      "grad_norm": 0.24224841594696045,
      "learning_rate": 4.67966500398648e-06,
      "loss": 0.0149,
      "step": 1634860
    },
    {
      "epoch": 2.675516977278529,
      "grad_norm": 0.5855428576469421,
      "learning_rate": 4.679599111772962e-06,
      "loss": 0.0144,
      "step": 1634880
    },
    {
      "epoch": 2.675549707717183,
      "grad_norm": 0.32559826970100403,
      "learning_rate": 4.679533219559445e-06,
      "loss": 0.0152,
      "step": 1634900
    },
    {
      "epoch": 2.6755824381558364,
      "grad_norm": 0.08689358085393906,
      "learning_rate": 4.679467327345928e-06,
      "loss": 0.0086,
      "step": 1634920
    },
    {
      "epoch": 2.6756151685944896,
      "grad_norm": 0.3553333580493927,
      "learning_rate": 4.679401435132411e-06,
      "loss": 0.0132,
      "step": 1634940
    },
    {
      "epoch": 2.6756478990331427,
      "grad_norm": 0.8128706812858582,
      "learning_rate": 4.679335542918893e-06,
      "loss": 0.0135,
      "step": 1634960
    },
    {
      "epoch": 2.6756806294717963,
      "grad_norm": 0.12093929201364517,
      "learning_rate": 4.679269650705376e-06,
      "loss": 0.0146,
      "step": 1634980
    },
    {
      "epoch": 2.6757133599104495,
      "grad_norm": 0.22313521802425385,
      "learning_rate": 4.67920375849186e-06,
      "loss": 0.0133,
      "step": 1635000
    },
    {
      "epoch": 2.6757460903491026,
      "grad_norm": 0.36024609208106995,
      "learning_rate": 4.679137866278342e-06,
      "loss": 0.0107,
      "step": 1635020
    },
    {
      "epoch": 2.675778820787756,
      "grad_norm": 0.7619085311889648,
      "learning_rate": 4.679071974064825e-06,
      "loss": 0.0168,
      "step": 1635040
    },
    {
      "epoch": 2.67581155122641,
      "grad_norm": 0.29879963397979736,
      "learning_rate": 4.679006081851308e-06,
      "loss": 0.0112,
      "step": 1635060
    },
    {
      "epoch": 2.675844281665063,
      "grad_norm": 0.7220969200134277,
      "learning_rate": 4.678940189637791e-06,
      "loss": 0.0114,
      "step": 1635080
    },
    {
      "epoch": 2.675877012103716,
      "grad_norm": 0.10828785598278046,
      "learning_rate": 4.678874297424274e-06,
      "loss": 0.0138,
      "step": 1635100
    },
    {
      "epoch": 2.6759097425423697,
      "grad_norm": 0.2541024684906006,
      "learning_rate": 4.678808405210757e-06,
      "loss": 0.018,
      "step": 1635120
    },
    {
      "epoch": 2.675942472981023,
      "grad_norm": 0.37495195865631104,
      "learning_rate": 4.67874251299724e-06,
      "loss": 0.0173,
      "step": 1635140
    },
    {
      "epoch": 2.675975203419676,
      "grad_norm": 1.2044782638549805,
      "learning_rate": 4.678676620783722e-06,
      "loss": 0.0156,
      "step": 1635160
    },
    {
      "epoch": 2.6760079338583296,
      "grad_norm": 0.3453322947025299,
      "learning_rate": 4.678610728570205e-06,
      "loss": 0.0154,
      "step": 1635180
    },
    {
      "epoch": 2.676040664296983,
      "grad_norm": 1.4545634984970093,
      "learning_rate": 4.678544836356688e-06,
      "loss": 0.0128,
      "step": 1635200
    },
    {
      "epoch": 2.6760733947356363,
      "grad_norm": 0.5906256437301636,
      "learning_rate": 4.6784789441431715e-06,
      "loss": 0.02,
      "step": 1635220
    },
    {
      "epoch": 2.6761061251742895,
      "grad_norm": 0.20440708100795746,
      "learning_rate": 4.678413051929654e-06,
      "loss": 0.0132,
      "step": 1635240
    },
    {
      "epoch": 2.676138855612943,
      "grad_norm": 0.21645160019397736,
      "learning_rate": 4.678347159716137e-06,
      "loss": 0.0101,
      "step": 1635260
    },
    {
      "epoch": 2.676171586051596,
      "grad_norm": 0.29097384214401245,
      "learning_rate": 4.67828126750262e-06,
      "loss": 0.0174,
      "step": 1635280
    },
    {
      "epoch": 2.6762043164902494,
      "grad_norm": 0.24929547309875488,
      "learning_rate": 4.6782153752891024e-06,
      "loss": 0.0094,
      "step": 1635300
    },
    {
      "epoch": 2.676237046928903,
      "grad_norm": 0.17897416651248932,
      "learning_rate": 4.678149483075585e-06,
      "loss": 0.0144,
      "step": 1635320
    },
    {
      "epoch": 2.6762697773675566,
      "grad_norm": 0.25759032368659973,
      "learning_rate": 4.678083590862068e-06,
      "loss": 0.014,
      "step": 1635340
    },
    {
      "epoch": 2.6763025078062097,
      "grad_norm": 0.3036193251609802,
      "learning_rate": 4.678017698648551e-06,
      "loss": 0.0091,
      "step": 1635360
    },
    {
      "epoch": 2.676335238244863,
      "grad_norm": 0.21099074184894562,
      "learning_rate": 4.677951806435033e-06,
      "loss": 0.019,
      "step": 1635380
    },
    {
      "epoch": 2.6763679686835165,
      "grad_norm": 0.10065151005983353,
      "learning_rate": 4.677885914221517e-06,
      "loss": 0.0167,
      "step": 1635400
    },
    {
      "epoch": 2.6764006991221696,
      "grad_norm": 1.9888207912445068,
      "learning_rate": 4.677820022008e-06,
      "loss": 0.0113,
      "step": 1635420
    },
    {
      "epoch": 2.6764334295608228,
      "grad_norm": 0.2960832715034485,
      "learning_rate": 4.6777541297944825e-06,
      "loss": 0.0183,
      "step": 1635440
    },
    {
      "epoch": 2.6764661599994763,
      "grad_norm": 0.47436684370040894,
      "learning_rate": 4.677688237580966e-06,
      "loss": 0.0109,
      "step": 1635460
    },
    {
      "epoch": 2.6764988904381295,
      "grad_norm": 0.3093392848968506,
      "learning_rate": 4.677622345367449e-06,
      "loss": 0.0145,
      "step": 1635480
    },
    {
      "epoch": 2.676531620876783,
      "grad_norm": 0.5748595595359802,
      "learning_rate": 4.6775564531539315e-06,
      "loss": 0.0147,
      "step": 1635500
    },
    {
      "epoch": 2.6765643513154362,
      "grad_norm": 0.6908278465270996,
      "learning_rate": 4.677490560940414e-06,
      "loss": 0.0129,
      "step": 1635520
    },
    {
      "epoch": 2.67659708175409,
      "grad_norm": 4.46655797958374,
      "learning_rate": 4.677424668726897e-06,
      "loss": 0.0185,
      "step": 1635540
    },
    {
      "epoch": 2.676629812192743,
      "grad_norm": 0.7827983498573303,
      "learning_rate": 4.67735877651338e-06,
      "loss": 0.012,
      "step": 1635560
    },
    {
      "epoch": 2.676662542631396,
      "grad_norm": 0.16375866532325745,
      "learning_rate": 4.6772928842998625e-06,
      "loss": 0.0107,
      "step": 1635580
    },
    {
      "epoch": 2.6766952730700497,
      "grad_norm": 0.3085079789161682,
      "learning_rate": 4.677226992086345e-06,
      "loss": 0.0137,
      "step": 1635600
    },
    {
      "epoch": 2.676728003508703,
      "grad_norm": 0.2786944508552551,
      "learning_rate": 4.677161099872829e-06,
      "loss": 0.0122,
      "step": 1635620
    },
    {
      "epoch": 2.6767607339473565,
      "grad_norm": 0.5597097277641296,
      "learning_rate": 4.6770952076593115e-06,
      "loss": 0.0191,
      "step": 1635640
    },
    {
      "epoch": 2.6767934643860096,
      "grad_norm": 0.30554914474487305,
      "learning_rate": 4.677029315445794e-06,
      "loss": 0.0158,
      "step": 1635660
    },
    {
      "epoch": 2.676826194824663,
      "grad_norm": 0.37517881393432617,
      "learning_rate": 4.676963423232277e-06,
      "loss": 0.0143,
      "step": 1635680
    },
    {
      "epoch": 2.6768589252633164,
      "grad_norm": 0.11923536658287048,
      "learning_rate": 4.67689753101876e-06,
      "loss": 0.0149,
      "step": 1635700
    },
    {
      "epoch": 2.6768916557019695,
      "grad_norm": 0.42152419686317444,
      "learning_rate": 4.6768316388052425e-06,
      "loss": 0.01,
      "step": 1635720
    },
    {
      "epoch": 2.676924386140623,
      "grad_norm": 0.4490402340888977,
      "learning_rate": 4.676765746591725e-06,
      "loss": 0.0144,
      "step": 1635740
    },
    {
      "epoch": 2.6769571165792763,
      "grad_norm": 0.34029948711395264,
      "learning_rate": 4.676699854378208e-06,
      "loss": 0.015,
      "step": 1635760
    },
    {
      "epoch": 2.67698984701793,
      "grad_norm": 0.18103644251823425,
      "learning_rate": 4.676633962164691e-06,
      "loss": 0.0158,
      "step": 1635780
    },
    {
      "epoch": 2.677022577456583,
      "grad_norm": 0.6080004572868347,
      "learning_rate": 4.676568069951174e-06,
      "loss": 0.0212,
      "step": 1635800
    },
    {
      "epoch": 2.6770553078952366,
      "grad_norm": 0.14453944563865662,
      "learning_rate": 4.676502177737657e-06,
      "loss": 0.0136,
      "step": 1635820
    },
    {
      "epoch": 2.6770880383338898,
      "grad_norm": 0.24083566665649414,
      "learning_rate": 4.67643628552414e-06,
      "loss": 0.0101,
      "step": 1635840
    },
    {
      "epoch": 2.677120768772543,
      "grad_norm": 0.48014509677886963,
      "learning_rate": 4.676370393310623e-06,
      "loss": 0.0133,
      "step": 1635860
    },
    {
      "epoch": 2.6771534992111965,
      "grad_norm": 0.46208474040031433,
      "learning_rate": 4.676304501097106e-06,
      "loss": 0.023,
      "step": 1635880
    },
    {
      "epoch": 2.6771862296498496,
      "grad_norm": 0.7391209006309509,
      "learning_rate": 4.676238608883589e-06,
      "loss": 0.0138,
      "step": 1635900
    },
    {
      "epoch": 2.6772189600885032,
      "grad_norm": 0.11297876387834549,
      "learning_rate": 4.676172716670072e-06,
      "loss": 0.0149,
      "step": 1635920
    },
    {
      "epoch": 2.6772516905271564,
      "grad_norm": 0.2350643128156662,
      "learning_rate": 4.676106824456554e-06,
      "loss": 0.0141,
      "step": 1635940
    },
    {
      "epoch": 2.67728442096581,
      "grad_norm": 0.2172796130180359,
      "learning_rate": 4.676040932243037e-06,
      "loss": 0.0086,
      "step": 1635960
    },
    {
      "epoch": 2.677317151404463,
      "grad_norm": 0.7216859459877014,
      "learning_rate": 4.67597504002952e-06,
      "loss": 0.011,
      "step": 1635980
    },
    {
      "epoch": 2.6773498818431163,
      "grad_norm": 0.28047025203704834,
      "learning_rate": 4.6759091478160026e-06,
      "loss": 0.0127,
      "step": 1636000
    },
    {
      "epoch": 2.67738261228177,
      "grad_norm": 0.30356574058532715,
      "learning_rate": 4.675843255602486e-06,
      "loss": 0.0093,
      "step": 1636020
    },
    {
      "epoch": 2.677415342720423,
      "grad_norm": 0.1919441968202591,
      "learning_rate": 4.675777363388969e-06,
      "loss": 0.0131,
      "step": 1636040
    },
    {
      "epoch": 2.6774480731590766,
      "grad_norm": 0.6503249406814575,
      "learning_rate": 4.675711471175452e-06,
      "loss": 0.0154,
      "step": 1636060
    },
    {
      "epoch": 2.6774808035977298,
      "grad_norm": 0.5126788020133972,
      "learning_rate": 4.675645578961934e-06,
      "loss": 0.0156,
      "step": 1636080
    },
    {
      "epoch": 2.6775135340363834,
      "grad_norm": 0.47918984293937683,
      "learning_rate": 4.675579686748417e-06,
      "loss": 0.0124,
      "step": 1636100
    },
    {
      "epoch": 2.6775462644750365,
      "grad_norm": 0.23341572284698486,
      "learning_rate": 4.6755137945349e-06,
      "loss": 0.0074,
      "step": 1636120
    },
    {
      "epoch": 2.6775789949136897,
      "grad_norm": 0.19729381799697876,
      "learning_rate": 4.675447902321383e-06,
      "loss": 0.0142,
      "step": 1636140
    },
    {
      "epoch": 2.6776117253523433,
      "grad_norm": 0.40568989515304565,
      "learning_rate": 4.675382010107866e-06,
      "loss": 0.0167,
      "step": 1636160
    },
    {
      "epoch": 2.6776444557909964,
      "grad_norm": 0.4508973956108093,
      "learning_rate": 4.675316117894349e-06,
      "loss": 0.0171,
      "step": 1636180
    },
    {
      "epoch": 2.67767718622965,
      "grad_norm": 0.12537680566310883,
      "learning_rate": 4.675250225680832e-06,
      "loss": 0.011,
      "step": 1636200
    },
    {
      "epoch": 2.677709916668303,
      "grad_norm": 0.370680034160614,
      "learning_rate": 4.675184333467314e-06,
      "loss": 0.0134,
      "step": 1636220
    },
    {
      "epoch": 2.6777426471069568,
      "grad_norm": 1.2385034561157227,
      "learning_rate": 4.675118441253797e-06,
      "loss": 0.016,
      "step": 1636240
    },
    {
      "epoch": 2.67777537754561,
      "grad_norm": 0.07993268221616745,
      "learning_rate": 4.675052549040281e-06,
      "loss": 0.0119,
      "step": 1636260
    },
    {
      "epoch": 2.677808107984263,
      "grad_norm": 0.3253132700920105,
      "learning_rate": 4.6749866568267635e-06,
      "loss": 0.0096,
      "step": 1636280
    },
    {
      "epoch": 2.6778408384229166,
      "grad_norm": 0.45856794714927673,
      "learning_rate": 4.674920764613246e-06,
      "loss": 0.0174,
      "step": 1636300
    },
    {
      "epoch": 2.67787356886157,
      "grad_norm": 0.7043306827545166,
      "learning_rate": 4.674854872399729e-06,
      "loss": 0.016,
      "step": 1636320
    },
    {
      "epoch": 2.677906299300223,
      "grad_norm": 0.1949981302022934,
      "learning_rate": 4.674788980186212e-06,
      "loss": 0.0227,
      "step": 1636340
    },
    {
      "epoch": 2.6779390297388765,
      "grad_norm": 0.46560701727867126,
      "learning_rate": 4.674723087972694e-06,
      "loss": 0.0187,
      "step": 1636360
    },
    {
      "epoch": 2.67797176017753,
      "grad_norm": 0.4029874801635742,
      "learning_rate": 4.674657195759177e-06,
      "loss": 0.0139,
      "step": 1636380
    },
    {
      "epoch": 2.6780044906161833,
      "grad_norm": 0.7874648571014404,
      "learning_rate": 4.67459130354566e-06,
      "loss": 0.0207,
      "step": 1636400
    },
    {
      "epoch": 2.6780372210548364,
      "grad_norm": 0.16658969223499298,
      "learning_rate": 4.6745254113321435e-06,
      "loss": 0.0104,
      "step": 1636420
    },
    {
      "epoch": 2.67806995149349,
      "grad_norm": 0.14601737260818481,
      "learning_rate": 4.674459519118626e-06,
      "loss": 0.0093,
      "step": 1636440
    },
    {
      "epoch": 2.678102681932143,
      "grad_norm": 0.1147303581237793,
      "learning_rate": 4.674393626905109e-06,
      "loss": 0.0116,
      "step": 1636460
    },
    {
      "epoch": 2.6781354123707963,
      "grad_norm": 0.7825653553009033,
      "learning_rate": 4.674327734691592e-06,
      "loss": 0.0166,
      "step": 1636480
    },
    {
      "epoch": 2.67816814280945,
      "grad_norm": 0.07953960448503494,
      "learning_rate": 4.674261842478075e-06,
      "loss": 0.019,
      "step": 1636500
    },
    {
      "epoch": 2.6782008732481035,
      "grad_norm": 0.5115732550621033,
      "learning_rate": 4.674195950264558e-06,
      "loss": 0.0121,
      "step": 1636520
    },
    {
      "epoch": 2.6782336036867567,
      "grad_norm": 0.6447278261184692,
      "learning_rate": 4.674130058051041e-06,
      "loss": 0.0093,
      "step": 1636540
    },
    {
      "epoch": 2.67826633412541,
      "grad_norm": 0.31287702918052673,
      "learning_rate": 4.6740641658375235e-06,
      "loss": 0.0152,
      "step": 1636560
    },
    {
      "epoch": 2.6782990645640634,
      "grad_norm": 0.43853405117988586,
      "learning_rate": 4.673998273624006e-06,
      "loss": 0.0147,
      "step": 1636580
    },
    {
      "epoch": 2.6783317950027166,
      "grad_norm": 0.4592202603816986,
      "learning_rate": 4.673932381410489e-06,
      "loss": 0.0104,
      "step": 1636600
    },
    {
      "epoch": 2.6783645254413697,
      "grad_norm": 0.11962360143661499,
      "learning_rate": 4.673866489196972e-06,
      "loss": 0.0231,
      "step": 1636620
    },
    {
      "epoch": 2.6783972558800233,
      "grad_norm": 0.20105762779712677,
      "learning_rate": 4.673800596983455e-06,
      "loss": 0.0204,
      "step": 1636640
    },
    {
      "epoch": 2.678429986318677,
      "grad_norm": 0.38042116165161133,
      "learning_rate": 4.673734704769938e-06,
      "loss": 0.0163,
      "step": 1636660
    },
    {
      "epoch": 2.67846271675733,
      "grad_norm": 0.44427284598350525,
      "learning_rate": 4.673668812556421e-06,
      "loss": 0.0177,
      "step": 1636680
    },
    {
      "epoch": 2.678495447195983,
      "grad_norm": 0.2939542829990387,
      "learning_rate": 4.6736029203429035e-06,
      "loss": 0.0174,
      "step": 1636700
    },
    {
      "epoch": 2.678528177634637,
      "grad_norm": 0.23751194775104523,
      "learning_rate": 4.673537028129386e-06,
      "loss": 0.0122,
      "step": 1636720
    },
    {
      "epoch": 2.67856090807329,
      "grad_norm": 0.09587901085615158,
      "learning_rate": 4.673471135915869e-06,
      "loss": 0.0163,
      "step": 1636740
    },
    {
      "epoch": 2.678593638511943,
      "grad_norm": 0.07439292222261429,
      "learning_rate": 4.673405243702352e-06,
      "loss": 0.0126,
      "step": 1636760
    },
    {
      "epoch": 2.6786263689505967,
      "grad_norm": 0.20012876391410828,
      "learning_rate": 4.6733393514888345e-06,
      "loss": 0.0139,
      "step": 1636780
    },
    {
      "epoch": 2.6786590993892503,
      "grad_norm": 0.4040225148200989,
      "learning_rate": 4.673273459275317e-06,
      "loss": 0.0149,
      "step": 1636800
    },
    {
      "epoch": 2.6786918298279034,
      "grad_norm": 0.7817457318305969,
      "learning_rate": 4.673207567061801e-06,
      "loss": 0.013,
      "step": 1636820
    },
    {
      "epoch": 2.6787245602665566,
      "grad_norm": 0.715965986251831,
      "learning_rate": 4.6731416748482836e-06,
      "loss": 0.0125,
      "step": 1636840
    },
    {
      "epoch": 2.67875729070521,
      "grad_norm": 0.44426047801971436,
      "learning_rate": 4.673075782634766e-06,
      "loss": 0.0127,
      "step": 1636860
    },
    {
      "epoch": 2.6787900211438633,
      "grad_norm": 0.3457307517528534,
      "learning_rate": 4.67300989042125e-06,
      "loss": 0.0177,
      "step": 1636880
    },
    {
      "epoch": 2.6788227515825165,
      "grad_norm": 0.5688890814781189,
      "learning_rate": 4.672943998207733e-06,
      "loss": 0.018,
      "step": 1636900
    },
    {
      "epoch": 2.67885548202117,
      "grad_norm": 0.07193253934383392,
      "learning_rate": 4.672878105994215e-06,
      "loss": 0.0099,
      "step": 1636920
    },
    {
      "epoch": 2.6788882124598232,
      "grad_norm": 0.49054065346717834,
      "learning_rate": 4.672812213780698e-06,
      "loss": 0.014,
      "step": 1636940
    },
    {
      "epoch": 2.678920942898477,
      "grad_norm": 0.2793309986591339,
      "learning_rate": 4.672746321567181e-06,
      "loss": 0.0197,
      "step": 1636960
    },
    {
      "epoch": 2.67895367333713,
      "grad_norm": 0.3521858751773834,
      "learning_rate": 4.672680429353664e-06,
      "loss": 0.0108,
      "step": 1636980
    },
    {
      "epoch": 2.6789864037757836,
      "grad_norm": 0.250426709651947,
      "learning_rate": 4.672614537140146e-06,
      "loss": 0.0151,
      "step": 1637000
    },
    {
      "epoch": 2.6790191342144367,
      "grad_norm": 0.2741629183292389,
      "learning_rate": 4.672548644926629e-06,
      "loss": 0.0132,
      "step": 1637020
    },
    {
      "epoch": 2.67905186465309,
      "grad_norm": 0.46058550477027893,
      "learning_rate": 4.672482752713113e-06,
      "loss": 0.016,
      "step": 1637040
    },
    {
      "epoch": 2.6790845950917435,
      "grad_norm": 0.18103516101837158,
      "learning_rate": 4.672416860499595e-06,
      "loss": 0.0101,
      "step": 1637060
    },
    {
      "epoch": 2.6791173255303966,
      "grad_norm": 0.34493446350097656,
      "learning_rate": 4.672350968286078e-06,
      "loss": 0.0112,
      "step": 1637080
    },
    {
      "epoch": 2.67915005596905,
      "grad_norm": 0.11163081973791122,
      "learning_rate": 4.672285076072561e-06,
      "loss": 0.0129,
      "step": 1637100
    },
    {
      "epoch": 2.6791827864077034,
      "grad_norm": 0.26224273443222046,
      "learning_rate": 4.672219183859044e-06,
      "loss": 0.019,
      "step": 1637120
    },
    {
      "epoch": 2.679215516846357,
      "grad_norm": 0.29031816124916077,
      "learning_rate": 4.672153291645526e-06,
      "loss": 0.0154,
      "step": 1637140
    },
    {
      "epoch": 2.67924824728501,
      "grad_norm": 0.553229570388794,
      "learning_rate": 4.672087399432009e-06,
      "loss": 0.0145,
      "step": 1637160
    },
    {
      "epoch": 2.6792809777236632,
      "grad_norm": 0.2602354884147644,
      "learning_rate": 4.672021507218492e-06,
      "loss": 0.0146,
      "step": 1637180
    },
    {
      "epoch": 2.679313708162317,
      "grad_norm": 0.12237454950809479,
      "learning_rate": 4.671955615004975e-06,
      "loss": 0.0117,
      "step": 1637200
    },
    {
      "epoch": 2.67934643860097,
      "grad_norm": 0.09747850149869919,
      "learning_rate": 4.671889722791458e-06,
      "loss": 0.0191,
      "step": 1637220
    },
    {
      "epoch": 2.6793791690396236,
      "grad_norm": 0.12904076278209686,
      "learning_rate": 4.671823830577941e-06,
      "loss": 0.011,
      "step": 1637240
    },
    {
      "epoch": 2.6794118994782767,
      "grad_norm": 0.17500032484531403,
      "learning_rate": 4.671757938364424e-06,
      "loss": 0.0143,
      "step": 1637260
    },
    {
      "epoch": 2.6794446299169303,
      "grad_norm": 0.23501063883304596,
      "learning_rate": 4.671692046150907e-06,
      "loss": 0.0246,
      "step": 1637280
    },
    {
      "epoch": 2.6794773603555835,
      "grad_norm": 0.6758726835250854,
      "learning_rate": 4.67162615393739e-06,
      "loss": 0.0114,
      "step": 1637300
    },
    {
      "epoch": 2.6795100907942366,
      "grad_norm": 0.17008760571479797,
      "learning_rate": 4.671560261723873e-06,
      "loss": 0.0151,
      "step": 1637320
    },
    {
      "epoch": 2.6795428212328902,
      "grad_norm": 0.5221111178398132,
      "learning_rate": 4.6714943695103554e-06,
      "loss": 0.0166,
      "step": 1637340
    },
    {
      "epoch": 2.6795755516715434,
      "grad_norm": 0.0805065780878067,
      "learning_rate": 4.671428477296838e-06,
      "loss": 0.0145,
      "step": 1637360
    },
    {
      "epoch": 2.679608282110197,
      "grad_norm": 0.9608066082000732,
      "learning_rate": 4.671362585083321e-06,
      "loss": 0.0156,
      "step": 1637380
    },
    {
      "epoch": 2.67964101254885,
      "grad_norm": 0.2785215377807617,
      "learning_rate": 4.671296692869804e-06,
      "loss": 0.0137,
      "step": 1637400
    },
    {
      "epoch": 2.6796737429875037,
      "grad_norm": 0.2790709137916565,
      "learning_rate": 4.671230800656286e-06,
      "loss": 0.0157,
      "step": 1637420
    },
    {
      "epoch": 2.679706473426157,
      "grad_norm": 0.4164130389690399,
      "learning_rate": 4.67116490844277e-06,
      "loss": 0.0164,
      "step": 1637440
    },
    {
      "epoch": 2.67973920386481,
      "grad_norm": 0.08956167846918106,
      "learning_rate": 4.671099016229253e-06,
      "loss": 0.0156,
      "step": 1637460
    },
    {
      "epoch": 2.6797719343034636,
      "grad_norm": 0.09886270016431808,
      "learning_rate": 4.6710331240157355e-06,
      "loss": 0.0175,
      "step": 1637480
    },
    {
      "epoch": 2.6798046647421168,
      "grad_norm": 0.2779243290424347,
      "learning_rate": 4.670967231802218e-06,
      "loss": 0.0178,
      "step": 1637500
    },
    {
      "epoch": 2.6798373951807704,
      "grad_norm": 0.24161173403263092,
      "learning_rate": 4.670901339588701e-06,
      "loss": 0.0128,
      "step": 1637520
    },
    {
      "epoch": 2.6798701256194235,
      "grad_norm": 0.7581660151481628,
      "learning_rate": 4.670835447375184e-06,
      "loss": 0.017,
      "step": 1637540
    },
    {
      "epoch": 2.679902856058077,
      "grad_norm": 0.2356044054031372,
      "learning_rate": 4.670769555161667e-06,
      "loss": 0.0154,
      "step": 1637560
    },
    {
      "epoch": 2.6799355864967302,
      "grad_norm": 0.22416403889656067,
      "learning_rate": 4.67070366294815e-06,
      "loss": 0.0125,
      "step": 1637580
    },
    {
      "epoch": 2.6799683169353834,
      "grad_norm": 0.6969963312149048,
      "learning_rate": 4.670637770734633e-06,
      "loss": 0.0101,
      "step": 1637600
    },
    {
      "epoch": 2.680001047374037,
      "grad_norm": 0.1825667917728424,
      "learning_rate": 4.6705718785211155e-06,
      "loss": 0.0104,
      "step": 1637620
    },
    {
      "epoch": 2.68003377781269,
      "grad_norm": 1.112597107887268,
      "learning_rate": 4.670505986307598e-06,
      "loss": 0.0152,
      "step": 1637640
    },
    {
      "epoch": 2.6800665082513437,
      "grad_norm": 0.9327551126480103,
      "learning_rate": 4.670440094094081e-06,
      "loss": 0.0177,
      "step": 1637660
    },
    {
      "epoch": 2.680099238689997,
      "grad_norm": 0.4836541712284088,
      "learning_rate": 4.6703742018805646e-06,
      "loss": 0.0128,
      "step": 1637680
    },
    {
      "epoch": 2.6801319691286505,
      "grad_norm": 0.35876524448394775,
      "learning_rate": 4.670308309667047e-06,
      "loss": 0.0077,
      "step": 1637700
    },
    {
      "epoch": 2.6801646995673036,
      "grad_norm": 1.1467866897583008,
      "learning_rate": 4.67024241745353e-06,
      "loss": 0.0095,
      "step": 1637720
    },
    {
      "epoch": 2.680197430005957,
      "grad_norm": 0.7743706107139587,
      "learning_rate": 4.670176525240013e-06,
      "loss": 0.0126,
      "step": 1637740
    },
    {
      "epoch": 2.6802301604446104,
      "grad_norm": 0.41146236658096313,
      "learning_rate": 4.6701106330264955e-06,
      "loss": 0.0156,
      "step": 1637760
    },
    {
      "epoch": 2.6802628908832635,
      "grad_norm": 0.21702590584754944,
      "learning_rate": 4.670044740812978e-06,
      "loss": 0.0119,
      "step": 1637780
    },
    {
      "epoch": 2.680295621321917,
      "grad_norm": 0.6158666610717773,
      "learning_rate": 4.669978848599461e-06,
      "loss": 0.015,
      "step": 1637800
    },
    {
      "epoch": 2.6803283517605703,
      "grad_norm": 0.30615732073783875,
      "learning_rate": 4.669912956385944e-06,
      "loss": 0.0126,
      "step": 1637820
    },
    {
      "epoch": 2.680361082199224,
      "grad_norm": 0.22098280489444733,
      "learning_rate": 4.669847064172427e-06,
      "loss": 0.015,
      "step": 1637840
    },
    {
      "epoch": 2.680393812637877,
      "grad_norm": 0.9584158062934875,
      "learning_rate": 4.66978117195891e-06,
      "loss": 0.0186,
      "step": 1637860
    },
    {
      "epoch": 2.68042654307653,
      "grad_norm": 0.058117762207984924,
      "learning_rate": 4.669715279745393e-06,
      "loss": 0.0164,
      "step": 1637880
    },
    {
      "epoch": 2.6804592735151838,
      "grad_norm": 0.5163610577583313,
      "learning_rate": 4.6696493875318755e-06,
      "loss": 0.0111,
      "step": 1637900
    },
    {
      "epoch": 2.680492003953837,
      "grad_norm": 0.32086193561553955,
      "learning_rate": 4.669583495318359e-06,
      "loss": 0.0122,
      "step": 1637920
    },
    {
      "epoch": 2.68052473439249,
      "grad_norm": 0.4464304745197296,
      "learning_rate": 4.669517603104842e-06,
      "loss": 0.011,
      "step": 1637940
    },
    {
      "epoch": 2.6805574648311437,
      "grad_norm": 0.3065822720527649,
      "learning_rate": 4.669451710891325e-06,
      "loss": 0.0114,
      "step": 1637960
    },
    {
      "epoch": 2.6805901952697972,
      "grad_norm": 1.0747047662734985,
      "learning_rate": 4.669385818677807e-06,
      "loss": 0.0198,
      "step": 1637980
    },
    {
      "epoch": 2.6806229257084504,
      "grad_norm": 0.2252652794122696,
      "learning_rate": 4.66931992646429e-06,
      "loss": 0.0156,
      "step": 1638000
    },
    {
      "epoch": 2.6806556561471035,
      "grad_norm": 0.6327843070030212,
      "learning_rate": 4.669254034250773e-06,
      "loss": 0.0139,
      "step": 1638020
    },
    {
      "epoch": 2.680688386585757,
      "grad_norm": 0.7778614163398743,
      "learning_rate": 4.6691881420372556e-06,
      "loss": 0.0152,
      "step": 1638040
    },
    {
      "epoch": 2.6807211170244103,
      "grad_norm": 0.4314051568508148,
      "learning_rate": 4.669122249823739e-06,
      "loss": 0.0121,
      "step": 1638060
    },
    {
      "epoch": 2.6807538474630634,
      "grad_norm": 0.4992741048336029,
      "learning_rate": 4.669056357610222e-06,
      "loss": 0.0101,
      "step": 1638080
    },
    {
      "epoch": 2.680786577901717,
      "grad_norm": 0.4325909912586212,
      "learning_rate": 4.668990465396705e-06,
      "loss": 0.0152,
      "step": 1638100
    },
    {
      "epoch": 2.6808193083403706,
      "grad_norm": 0.06478912383317947,
      "learning_rate": 4.668924573183187e-06,
      "loss": 0.0094,
      "step": 1638120
    },
    {
      "epoch": 2.680852038779024,
      "grad_norm": 0.3554183542728424,
      "learning_rate": 4.66885868096967e-06,
      "loss": 0.0137,
      "step": 1638140
    },
    {
      "epoch": 2.680884769217677,
      "grad_norm": 0.2654229998588562,
      "learning_rate": 4.668792788756153e-06,
      "loss": 0.0126,
      "step": 1638160
    },
    {
      "epoch": 2.6809174996563305,
      "grad_norm": 0.23687827587127686,
      "learning_rate": 4.668726896542636e-06,
      "loss": 0.0179,
      "step": 1638180
    },
    {
      "epoch": 2.6809502300949837,
      "grad_norm": 0.12117484956979752,
      "learning_rate": 4.668661004329118e-06,
      "loss": 0.0155,
      "step": 1638200
    },
    {
      "epoch": 2.680982960533637,
      "grad_norm": 0.29264509677886963,
      "learning_rate": 4.668595112115601e-06,
      "loss": 0.0176,
      "step": 1638220
    },
    {
      "epoch": 2.6810156909722904,
      "grad_norm": 1.160131812095642,
      "learning_rate": 4.668529219902085e-06,
      "loss": 0.0202,
      "step": 1638240
    },
    {
      "epoch": 2.681048421410944,
      "grad_norm": 0.19972005486488342,
      "learning_rate": 4.668463327688567e-06,
      "loss": 0.0169,
      "step": 1638260
    },
    {
      "epoch": 2.681081151849597,
      "grad_norm": 0.2861942946910858,
      "learning_rate": 4.66839743547505e-06,
      "loss": 0.0113,
      "step": 1638280
    },
    {
      "epoch": 2.6811138822882503,
      "grad_norm": 0.6341270208358765,
      "learning_rate": 4.668331543261534e-06,
      "loss": 0.0088,
      "step": 1638300
    },
    {
      "epoch": 2.681146612726904,
      "grad_norm": 0.4187159836292267,
      "learning_rate": 4.6682656510480165e-06,
      "loss": 0.0125,
      "step": 1638320
    },
    {
      "epoch": 2.681179343165557,
      "grad_norm": 0.04945215955376625,
      "learning_rate": 4.668199758834499e-06,
      "loss": 0.012,
      "step": 1638340
    },
    {
      "epoch": 2.68121207360421,
      "grad_norm": 0.46414512395858765,
      "learning_rate": 4.668133866620982e-06,
      "loss": 0.016,
      "step": 1638360
    },
    {
      "epoch": 2.681244804042864,
      "grad_norm": 0.534055233001709,
      "learning_rate": 4.668067974407465e-06,
      "loss": 0.0115,
      "step": 1638380
    },
    {
      "epoch": 2.681277534481517,
      "grad_norm": 0.34835463762283325,
      "learning_rate": 4.668002082193947e-06,
      "loss": 0.0207,
      "step": 1638400
    },
    {
      "epoch": 2.6813102649201705,
      "grad_norm": 0.8104895949363708,
      "learning_rate": 4.66793618998043e-06,
      "loss": 0.0179,
      "step": 1638420
    },
    {
      "epoch": 2.6813429953588237,
      "grad_norm": 0.4575294256210327,
      "learning_rate": 4.667870297766913e-06,
      "loss": 0.0183,
      "step": 1638440
    },
    {
      "epoch": 2.6813757257974773,
      "grad_norm": 0.28083372116088867,
      "learning_rate": 4.6678044055533965e-06,
      "loss": 0.0126,
      "step": 1638460
    },
    {
      "epoch": 2.6814084562361304,
      "grad_norm": 2.902909755706787,
      "learning_rate": 4.667738513339879e-06,
      "loss": 0.0202,
      "step": 1638480
    },
    {
      "epoch": 2.6814411866747836,
      "grad_norm": 0.19543182849884033,
      "learning_rate": 4.667672621126362e-06,
      "loss": 0.0166,
      "step": 1638500
    },
    {
      "epoch": 2.681473917113437,
      "grad_norm": 0.7613855004310608,
      "learning_rate": 4.667606728912845e-06,
      "loss": 0.0113,
      "step": 1638520
    },
    {
      "epoch": 2.6815066475520903,
      "grad_norm": 0.15672187507152557,
      "learning_rate": 4.6675408366993274e-06,
      "loss": 0.0145,
      "step": 1638540
    },
    {
      "epoch": 2.681539377990744,
      "grad_norm": 0.5627574920654297,
      "learning_rate": 4.66747494448581e-06,
      "loss": 0.0136,
      "step": 1638560
    },
    {
      "epoch": 2.681572108429397,
      "grad_norm": 0.11777124553918839,
      "learning_rate": 4.667409052272293e-06,
      "loss": 0.015,
      "step": 1638580
    },
    {
      "epoch": 2.6816048388680507,
      "grad_norm": 0.7142239809036255,
      "learning_rate": 4.667343160058776e-06,
      "loss": 0.0107,
      "step": 1638600
    },
    {
      "epoch": 2.681637569306704,
      "grad_norm": 0.21985603868961334,
      "learning_rate": 4.667277267845259e-06,
      "loss": 0.0082,
      "step": 1638620
    },
    {
      "epoch": 2.681670299745357,
      "grad_norm": 0.9759573340415955,
      "learning_rate": 4.667211375631742e-06,
      "loss": 0.0111,
      "step": 1638640
    },
    {
      "epoch": 2.6817030301840106,
      "grad_norm": 0.4433578848838806,
      "learning_rate": 4.667145483418225e-06,
      "loss": 0.0109,
      "step": 1638660
    },
    {
      "epoch": 2.6817357606226637,
      "grad_norm": 0.5435702204704285,
      "learning_rate": 4.6670795912047075e-06,
      "loss": 0.014,
      "step": 1638680
    },
    {
      "epoch": 2.6817684910613173,
      "grad_norm": 0.4591621160507202,
      "learning_rate": 4.667013698991191e-06,
      "loss": 0.016,
      "step": 1638700
    },
    {
      "epoch": 2.6818012214999705,
      "grad_norm": 0.3299710154533386,
      "learning_rate": 4.666947806777674e-06,
      "loss": 0.0131,
      "step": 1638720
    },
    {
      "epoch": 2.681833951938624,
      "grad_norm": 0.4281032979488373,
      "learning_rate": 4.6668819145641565e-06,
      "loss": 0.0137,
      "step": 1638740
    },
    {
      "epoch": 2.681866682377277,
      "grad_norm": 0.4467242360115051,
      "learning_rate": 4.666816022350639e-06,
      "loss": 0.0171,
      "step": 1638760
    },
    {
      "epoch": 2.6818994128159304,
      "grad_norm": 0.47077998518943787,
      "learning_rate": 4.666750130137122e-06,
      "loss": 0.007,
      "step": 1638780
    },
    {
      "epoch": 2.681932143254584,
      "grad_norm": 0.24755904078483582,
      "learning_rate": 4.666684237923605e-06,
      "loss": 0.0168,
      "step": 1638800
    },
    {
      "epoch": 2.681964873693237,
      "grad_norm": 0.2041899710893631,
      "learning_rate": 4.6666183457100875e-06,
      "loss": 0.0113,
      "step": 1638820
    },
    {
      "epoch": 2.6819976041318907,
      "grad_norm": 0.4082169830799103,
      "learning_rate": 4.66655245349657e-06,
      "loss": 0.018,
      "step": 1638840
    },
    {
      "epoch": 2.682030334570544,
      "grad_norm": 0.1456756442785263,
      "learning_rate": 4.666486561283054e-06,
      "loss": 0.0095,
      "step": 1638860
    },
    {
      "epoch": 2.6820630650091974,
      "grad_norm": 0.4029306173324585,
      "learning_rate": 4.6664206690695366e-06,
      "loss": 0.0105,
      "step": 1638880
    },
    {
      "epoch": 2.6820957954478506,
      "grad_norm": 0.3172222971916199,
      "learning_rate": 4.666354776856019e-06,
      "loss": 0.0154,
      "step": 1638900
    },
    {
      "epoch": 2.6821285258865037,
      "grad_norm": 0.41299065947532654,
      "learning_rate": 4.666288884642502e-06,
      "loss": 0.0131,
      "step": 1638920
    },
    {
      "epoch": 2.6821612563251573,
      "grad_norm": 0.5082738399505615,
      "learning_rate": 4.666222992428985e-06,
      "loss": 0.0111,
      "step": 1638940
    },
    {
      "epoch": 2.6821939867638105,
      "grad_norm": 0.3503042161464691,
      "learning_rate": 4.666157100215468e-06,
      "loss": 0.0115,
      "step": 1638960
    },
    {
      "epoch": 2.682226717202464,
      "grad_norm": 0.5586205124855042,
      "learning_rate": 4.666091208001951e-06,
      "loss": 0.0119,
      "step": 1638980
    },
    {
      "epoch": 2.6822594476411172,
      "grad_norm": 0.4319123923778534,
      "learning_rate": 4.666025315788434e-06,
      "loss": 0.0175,
      "step": 1639000
    },
    {
      "epoch": 2.682292178079771,
      "grad_norm": 0.2432577908039093,
      "learning_rate": 4.665959423574917e-06,
      "loss": 0.0145,
      "step": 1639020
    },
    {
      "epoch": 2.682324908518424,
      "grad_norm": 0.26083308458328247,
      "learning_rate": 4.665893531361399e-06,
      "loss": 0.0143,
      "step": 1639040
    },
    {
      "epoch": 2.682357638957077,
      "grad_norm": 0.4070660471916199,
      "learning_rate": 4.665827639147882e-06,
      "loss": 0.016,
      "step": 1639060
    },
    {
      "epoch": 2.6823903693957307,
      "grad_norm": 0.16438814997673035,
      "learning_rate": 4.665761746934365e-06,
      "loss": 0.0171,
      "step": 1639080
    },
    {
      "epoch": 2.682423099834384,
      "grad_norm": 0.6530625820159912,
      "learning_rate": 4.665695854720848e-06,
      "loss": 0.0151,
      "step": 1639100
    },
    {
      "epoch": 2.6824558302730375,
      "grad_norm": 0.5780792832374573,
      "learning_rate": 4.665629962507331e-06,
      "loss": 0.0132,
      "step": 1639120
    },
    {
      "epoch": 2.6824885607116906,
      "grad_norm": 0.13959026336669922,
      "learning_rate": 4.665564070293814e-06,
      "loss": 0.016,
      "step": 1639140
    },
    {
      "epoch": 2.682521291150344,
      "grad_norm": 0.48997652530670166,
      "learning_rate": 4.665498178080297e-06,
      "loss": 0.0114,
      "step": 1639160
    },
    {
      "epoch": 2.6825540215889974,
      "grad_norm": 0.8294179439544678,
      "learning_rate": 4.665432285866779e-06,
      "loss": 0.0161,
      "step": 1639180
    },
    {
      "epoch": 2.6825867520276505,
      "grad_norm": 0.33543160557746887,
      "learning_rate": 4.665366393653262e-06,
      "loss": 0.0178,
      "step": 1639200
    },
    {
      "epoch": 2.682619482466304,
      "grad_norm": 0.33602458238601685,
      "learning_rate": 4.665300501439745e-06,
      "loss": 0.0119,
      "step": 1639220
    },
    {
      "epoch": 2.6826522129049573,
      "grad_norm": 0.27216416597366333,
      "learning_rate": 4.6652346092262276e-06,
      "loss": 0.017,
      "step": 1639240
    },
    {
      "epoch": 2.682684943343611,
      "grad_norm": 0.6796420216560364,
      "learning_rate": 4.665168717012711e-06,
      "loss": 0.0214,
      "step": 1639260
    },
    {
      "epoch": 2.682717673782264,
      "grad_norm": 0.39848044514656067,
      "learning_rate": 4.665102824799194e-06,
      "loss": 0.0138,
      "step": 1639280
    },
    {
      "epoch": 2.6827504042209176,
      "grad_norm": 1.0996618270874023,
      "learning_rate": 4.665036932585677e-06,
      "loss": 0.0084,
      "step": 1639300
    },
    {
      "epoch": 2.6827831346595707,
      "grad_norm": 0.49357593059539795,
      "learning_rate": 4.66497104037216e-06,
      "loss": 0.0199,
      "step": 1639320
    },
    {
      "epoch": 2.682815865098224,
      "grad_norm": 0.48857685923576355,
      "learning_rate": 4.664905148158643e-06,
      "loss": 0.0175,
      "step": 1639340
    },
    {
      "epoch": 2.6828485955368775,
      "grad_norm": 0.16090762615203857,
      "learning_rate": 4.664839255945126e-06,
      "loss": 0.0128,
      "step": 1639360
    },
    {
      "epoch": 2.6828813259755306,
      "grad_norm": 0.623517632484436,
      "learning_rate": 4.6647733637316084e-06,
      "loss": 0.0211,
      "step": 1639380
    },
    {
      "epoch": 2.682914056414184,
      "grad_norm": 0.9023244976997375,
      "learning_rate": 4.664707471518091e-06,
      "loss": 0.0185,
      "step": 1639400
    },
    {
      "epoch": 2.6829467868528374,
      "grad_norm": 0.16811077296733856,
      "learning_rate": 4.664641579304574e-06,
      "loss": 0.0134,
      "step": 1639420
    },
    {
      "epoch": 2.682979517291491,
      "grad_norm": 0.18558156490325928,
      "learning_rate": 4.664575687091057e-06,
      "loss": 0.0183,
      "step": 1639440
    },
    {
      "epoch": 2.683012247730144,
      "grad_norm": 0.19107788801193237,
      "learning_rate": 4.664509794877539e-06,
      "loss": 0.0116,
      "step": 1639460
    },
    {
      "epoch": 2.6830449781687973,
      "grad_norm": 0.48263344168663025,
      "learning_rate": 4.664443902664023e-06,
      "loss": 0.0177,
      "step": 1639480
    },
    {
      "epoch": 2.683077708607451,
      "grad_norm": 0.30404147505760193,
      "learning_rate": 4.664378010450506e-06,
      "loss": 0.0136,
      "step": 1639500
    },
    {
      "epoch": 2.683110439046104,
      "grad_norm": 0.8602684140205383,
      "learning_rate": 4.6643121182369885e-06,
      "loss": 0.023,
      "step": 1639520
    },
    {
      "epoch": 2.683143169484757,
      "grad_norm": 0.117642842233181,
      "learning_rate": 4.664246226023471e-06,
      "loss": 0.0106,
      "step": 1639540
    },
    {
      "epoch": 2.6831758999234108,
      "grad_norm": 0.33686769008636475,
      "learning_rate": 4.664180333809954e-06,
      "loss": 0.011,
      "step": 1639560
    },
    {
      "epoch": 2.6832086303620644,
      "grad_norm": 0.19035255908966064,
      "learning_rate": 4.664114441596437e-06,
      "loss": 0.0121,
      "step": 1639580
    },
    {
      "epoch": 2.6832413608007175,
      "grad_norm": 0.0989837795495987,
      "learning_rate": 4.6640485493829194e-06,
      "loss": 0.015,
      "step": 1639600
    },
    {
      "epoch": 2.6832740912393707,
      "grad_norm": 1.306270956993103,
      "learning_rate": 4.663982657169402e-06,
      "loss": 0.0162,
      "step": 1639620
    },
    {
      "epoch": 2.6833068216780243,
      "grad_norm": 1.0446784496307373,
      "learning_rate": 4.663916764955885e-06,
      "loss": 0.0216,
      "step": 1639640
    },
    {
      "epoch": 2.6833395521166774,
      "grad_norm": 0.5330795049667358,
      "learning_rate": 4.6638508727423685e-06,
      "loss": 0.0124,
      "step": 1639660
    },
    {
      "epoch": 2.6833722825553306,
      "grad_norm": 0.7179630398750305,
      "learning_rate": 4.663784980528851e-06,
      "loss": 0.0178,
      "step": 1639680
    },
    {
      "epoch": 2.683405012993984,
      "grad_norm": 0.18313837051391602,
      "learning_rate": 4.663719088315334e-06,
      "loss": 0.0145,
      "step": 1639700
    },
    {
      "epoch": 2.6834377434326377,
      "grad_norm": 0.4411693513393402,
      "learning_rate": 4.6636531961018176e-06,
      "loss": 0.0146,
      "step": 1639720
    },
    {
      "epoch": 2.683470473871291,
      "grad_norm": 0.13572527468204498,
      "learning_rate": 4.6635873038883e-06,
      "loss": 0.0109,
      "step": 1639740
    },
    {
      "epoch": 2.683503204309944,
      "grad_norm": 0.31072020530700684,
      "learning_rate": 4.663521411674783e-06,
      "loss": 0.0122,
      "step": 1639760
    },
    {
      "epoch": 2.6835359347485976,
      "grad_norm": 0.2687162756919861,
      "learning_rate": 4.663455519461266e-06,
      "loss": 0.0146,
      "step": 1639780
    },
    {
      "epoch": 2.683568665187251,
      "grad_norm": 0.2202731966972351,
      "learning_rate": 4.6633896272477485e-06,
      "loss": 0.0171,
      "step": 1639800
    },
    {
      "epoch": 2.683601395625904,
      "grad_norm": 0.8032798171043396,
      "learning_rate": 4.663323735034231e-06,
      "loss": 0.0126,
      "step": 1639820
    },
    {
      "epoch": 2.6836341260645575,
      "grad_norm": 0.6114647388458252,
      "learning_rate": 4.663257842820714e-06,
      "loss": 0.0177,
      "step": 1639840
    },
    {
      "epoch": 2.683666856503211,
      "grad_norm": 0.18219663202762604,
      "learning_rate": 4.663191950607197e-06,
      "loss": 0.0124,
      "step": 1639860
    },
    {
      "epoch": 2.6836995869418643,
      "grad_norm": 0.8909791111946106,
      "learning_rate": 4.66312605839368e-06,
      "loss": 0.0199,
      "step": 1639880
    },
    {
      "epoch": 2.6837323173805174,
      "grad_norm": 0.12732776999473572,
      "learning_rate": 4.663060166180163e-06,
      "loss": 0.0201,
      "step": 1639900
    },
    {
      "epoch": 2.683765047819171,
      "grad_norm": 0.22478371858596802,
      "learning_rate": 4.662994273966646e-06,
      "loss": 0.0168,
      "step": 1639920
    },
    {
      "epoch": 2.683797778257824,
      "grad_norm": 0.36675190925598145,
      "learning_rate": 4.6629283817531285e-06,
      "loss": 0.0131,
      "step": 1639940
    },
    {
      "epoch": 2.6838305086964773,
      "grad_norm": 0.15551242232322693,
      "learning_rate": 4.662862489539611e-06,
      "loss": 0.0161,
      "step": 1639960
    },
    {
      "epoch": 2.683863239135131,
      "grad_norm": 0.352224200963974,
      "learning_rate": 4.662796597326094e-06,
      "loss": 0.0125,
      "step": 1639980
    },
    {
      "epoch": 2.683895969573784,
      "grad_norm": 0.5948978662490845,
      "learning_rate": 4.662730705112577e-06,
      "loss": 0.0136,
      "step": 1640000
    },
    {
      "epoch": 2.6839287000124377,
      "grad_norm": 0.46228092908859253,
      "learning_rate": 4.66266481289906e-06,
      "loss": 0.0134,
      "step": 1640020
    },
    {
      "epoch": 2.683961430451091,
      "grad_norm": 0.36072078347206116,
      "learning_rate": 4.662598920685543e-06,
      "loss": 0.0163,
      "step": 1640040
    },
    {
      "epoch": 2.6839941608897444,
      "grad_norm": 0.09777289628982544,
      "learning_rate": 4.662533028472026e-06,
      "loss": 0.0159,
      "step": 1640060
    },
    {
      "epoch": 2.6840268913283976,
      "grad_norm": 0.9721544981002808,
      "learning_rate": 4.6624671362585086e-06,
      "loss": 0.0139,
      "step": 1640080
    },
    {
      "epoch": 2.6840596217670507,
      "grad_norm": 0.4273510277271271,
      "learning_rate": 4.662401244044991e-06,
      "loss": 0.0163,
      "step": 1640100
    },
    {
      "epoch": 2.6840923522057043,
      "grad_norm": 0.22220617532730103,
      "learning_rate": 4.662335351831475e-06,
      "loss": 0.0166,
      "step": 1640120
    },
    {
      "epoch": 2.6841250826443575,
      "grad_norm": 0.15063363313674927,
      "learning_rate": 4.662269459617958e-06,
      "loss": 0.0091,
      "step": 1640140
    },
    {
      "epoch": 2.684157813083011,
      "grad_norm": 0.4935050904750824,
      "learning_rate": 4.66220356740444e-06,
      "loss": 0.0128,
      "step": 1640160
    },
    {
      "epoch": 2.684190543521664,
      "grad_norm": 0.13005557656288147,
      "learning_rate": 4.662137675190923e-06,
      "loss": 0.0172,
      "step": 1640180
    },
    {
      "epoch": 2.684223273960318,
      "grad_norm": 0.3106363117694855,
      "learning_rate": 4.662071782977406e-06,
      "loss": 0.0189,
      "step": 1640200
    },
    {
      "epoch": 2.684256004398971,
      "grad_norm": 0.3879590332508087,
      "learning_rate": 4.662005890763889e-06,
      "loss": 0.0126,
      "step": 1640220
    },
    {
      "epoch": 2.684288734837624,
      "grad_norm": 0.6399744749069214,
      "learning_rate": 4.661939998550371e-06,
      "loss": 0.0144,
      "step": 1640240
    },
    {
      "epoch": 2.6843214652762777,
      "grad_norm": 0.4340534806251526,
      "learning_rate": 4.661874106336854e-06,
      "loss": 0.0141,
      "step": 1640260
    },
    {
      "epoch": 2.684354195714931,
      "grad_norm": 0.6219707727432251,
      "learning_rate": 4.661808214123338e-06,
      "loss": 0.0156,
      "step": 1640280
    },
    {
      "epoch": 2.6843869261535844,
      "grad_norm": 0.16270418465137482,
      "learning_rate": 4.66174232190982e-06,
      "loss": 0.0099,
      "step": 1640300
    },
    {
      "epoch": 2.6844196565922376,
      "grad_norm": 0.04920697957277298,
      "learning_rate": 4.661676429696303e-06,
      "loss": 0.0152,
      "step": 1640320
    },
    {
      "epoch": 2.684452387030891,
      "grad_norm": 0.23617373406887054,
      "learning_rate": 4.661610537482786e-06,
      "loss": 0.0099,
      "step": 1640340
    },
    {
      "epoch": 2.6844851174695443,
      "grad_norm": 0.20330409705638885,
      "learning_rate": 4.661544645269269e-06,
      "loss": 0.024,
      "step": 1640360
    },
    {
      "epoch": 2.6845178479081975,
      "grad_norm": 0.3785288333892822,
      "learning_rate": 4.661478753055752e-06,
      "loss": 0.0103,
      "step": 1640380
    },
    {
      "epoch": 2.684550578346851,
      "grad_norm": 0.3928172290325165,
      "learning_rate": 4.661412860842235e-06,
      "loss": 0.0121,
      "step": 1640400
    },
    {
      "epoch": 2.684583308785504,
      "grad_norm": 0.12694475054740906,
      "learning_rate": 4.661346968628718e-06,
      "loss": 0.0109,
      "step": 1640420
    },
    {
      "epoch": 2.684616039224158,
      "grad_norm": 0.7260985374450684,
      "learning_rate": 4.6612810764152004e-06,
      "loss": 0.0171,
      "step": 1640440
    },
    {
      "epoch": 2.684648769662811,
      "grad_norm": 0.22410069406032562,
      "learning_rate": 4.661215184201683e-06,
      "loss": 0.0104,
      "step": 1640460
    },
    {
      "epoch": 2.6846815001014646,
      "grad_norm": 0.3297930061817169,
      "learning_rate": 4.661149291988166e-06,
      "loss": 0.0134,
      "step": 1640480
    },
    {
      "epoch": 2.6847142305401177,
      "grad_norm": 0.38662609457969666,
      "learning_rate": 4.6610833997746495e-06,
      "loss": 0.012,
      "step": 1640500
    },
    {
      "epoch": 2.684746960978771,
      "grad_norm": 0.5421817302703857,
      "learning_rate": 4.661017507561132e-06,
      "loss": 0.0114,
      "step": 1640520
    },
    {
      "epoch": 2.6847796914174245,
      "grad_norm": 0.14040584862232208,
      "learning_rate": 4.660951615347615e-06,
      "loss": 0.0134,
      "step": 1640540
    },
    {
      "epoch": 2.6848124218560776,
      "grad_norm": 0.5244430303573608,
      "learning_rate": 4.660885723134098e-06,
      "loss": 0.0154,
      "step": 1640560
    },
    {
      "epoch": 2.684845152294731,
      "grad_norm": 0.5119425654411316,
      "learning_rate": 4.6608198309205805e-06,
      "loss": 0.0124,
      "step": 1640580
    },
    {
      "epoch": 2.6848778827333843,
      "grad_norm": 0.22620829939842224,
      "learning_rate": 4.660753938707063e-06,
      "loss": 0.0115,
      "step": 1640600
    },
    {
      "epoch": 2.684910613172038,
      "grad_norm": 0.28652724623680115,
      "learning_rate": 4.660688046493546e-06,
      "loss": 0.0123,
      "step": 1640620
    },
    {
      "epoch": 2.684943343610691,
      "grad_norm": 0.5347287058830261,
      "learning_rate": 4.660622154280029e-06,
      "loss": 0.0138,
      "step": 1640640
    },
    {
      "epoch": 2.6849760740493442,
      "grad_norm": 0.2412230372428894,
      "learning_rate": 4.660556262066511e-06,
      "loss": 0.0142,
      "step": 1640660
    },
    {
      "epoch": 2.685008804487998,
      "grad_norm": 0.43246933817863464,
      "learning_rate": 4.660490369852995e-06,
      "loss": 0.0116,
      "step": 1640680
    },
    {
      "epoch": 2.685041534926651,
      "grad_norm": 0.30355095863342285,
      "learning_rate": 4.660424477639478e-06,
      "loss": 0.0175,
      "step": 1640700
    },
    {
      "epoch": 2.6850742653653046,
      "grad_norm": 0.46489328145980835,
      "learning_rate": 4.6603585854259605e-06,
      "loss": 0.0137,
      "step": 1640720
    },
    {
      "epoch": 2.6851069958039577,
      "grad_norm": 0.21659502387046814,
      "learning_rate": 4.660292693212444e-06,
      "loss": 0.0114,
      "step": 1640740
    },
    {
      "epoch": 2.6851397262426113,
      "grad_norm": 0.3067547380924225,
      "learning_rate": 4.660226800998927e-06,
      "loss": 0.0182,
      "step": 1640760
    },
    {
      "epoch": 2.6851724566812645,
      "grad_norm": 0.30172112584114075,
      "learning_rate": 4.6601609087854095e-06,
      "loss": 0.0164,
      "step": 1640780
    },
    {
      "epoch": 2.6852051871199176,
      "grad_norm": 0.7781100869178772,
      "learning_rate": 4.660095016571892e-06,
      "loss": 0.0143,
      "step": 1640800
    },
    {
      "epoch": 2.685237917558571,
      "grad_norm": 0.18612171709537506,
      "learning_rate": 4.660029124358375e-06,
      "loss": 0.0104,
      "step": 1640820
    },
    {
      "epoch": 2.6852706479972244,
      "grad_norm": 0.10745612531900406,
      "learning_rate": 4.659963232144858e-06,
      "loss": 0.0168,
      "step": 1640840
    },
    {
      "epoch": 2.685303378435878,
      "grad_norm": 0.19884127378463745,
      "learning_rate": 4.6598973399313405e-06,
      "loss": 0.0141,
      "step": 1640860
    },
    {
      "epoch": 2.685336108874531,
      "grad_norm": 0.17419785261154175,
      "learning_rate": 4.659831447717823e-06,
      "loss": 0.0142,
      "step": 1640880
    },
    {
      "epoch": 2.6853688393131847,
      "grad_norm": 0.31451621651649475,
      "learning_rate": 4.659765555504307e-06,
      "loss": 0.0145,
      "step": 1640900
    },
    {
      "epoch": 2.685401569751838,
      "grad_norm": 0.25857359170913696,
      "learning_rate": 4.6596996632907896e-06,
      "loss": 0.0141,
      "step": 1640920
    },
    {
      "epoch": 2.685434300190491,
      "grad_norm": 0.47507259249687195,
      "learning_rate": 4.659633771077272e-06,
      "loss": 0.0113,
      "step": 1640940
    },
    {
      "epoch": 2.6854670306291446,
      "grad_norm": 0.2925157845020294,
      "learning_rate": 4.659567878863755e-06,
      "loss": 0.0115,
      "step": 1640960
    },
    {
      "epoch": 2.6854997610677978,
      "grad_norm": 0.14967405796051025,
      "learning_rate": 4.659501986650238e-06,
      "loss": 0.0086,
      "step": 1640980
    },
    {
      "epoch": 2.685532491506451,
      "grad_norm": 0.6974532008171082,
      "learning_rate": 4.6594360944367205e-06,
      "loss": 0.0115,
      "step": 1641000
    },
    {
      "epoch": 2.6855652219451045,
      "grad_norm": 0.1269078552722931,
      "learning_rate": 4.659370202223203e-06,
      "loss": 0.0139,
      "step": 1641020
    },
    {
      "epoch": 2.685597952383758,
      "grad_norm": 0.4121815264225006,
      "learning_rate": 4.659304310009686e-06,
      "loss": 0.0097,
      "step": 1641040
    },
    {
      "epoch": 2.6856306828224112,
      "grad_norm": 0.26229673624038696,
      "learning_rate": 4.659238417796169e-06,
      "loss": 0.0127,
      "step": 1641060
    },
    {
      "epoch": 2.6856634132610644,
      "grad_norm": 0.14060410857200623,
      "learning_rate": 4.659172525582652e-06,
      "loss": 0.0132,
      "step": 1641080
    },
    {
      "epoch": 2.685696143699718,
      "grad_norm": 0.49632903933525085,
      "learning_rate": 4.659106633369135e-06,
      "loss": 0.0125,
      "step": 1641100
    },
    {
      "epoch": 2.685728874138371,
      "grad_norm": 0.434199720621109,
      "learning_rate": 4.659040741155618e-06,
      "loss": 0.0213,
      "step": 1641120
    },
    {
      "epoch": 2.6857616045770243,
      "grad_norm": 0.42299410700798035,
      "learning_rate": 4.658974848942101e-06,
      "loss": 0.0115,
      "step": 1641140
    },
    {
      "epoch": 2.685794335015678,
      "grad_norm": 0.1748553365468979,
      "learning_rate": 4.658908956728584e-06,
      "loss": 0.0152,
      "step": 1641160
    },
    {
      "epoch": 2.6858270654543315,
      "grad_norm": 0.39744362235069275,
      "learning_rate": 4.658843064515067e-06,
      "loss": 0.0158,
      "step": 1641180
    },
    {
      "epoch": 2.6858597958929846,
      "grad_norm": 0.41947606205940247,
      "learning_rate": 4.65877717230155e-06,
      "loss": 0.0168,
      "step": 1641200
    },
    {
      "epoch": 2.6858925263316378,
      "grad_norm": 0.26681989431381226,
      "learning_rate": 4.658711280088032e-06,
      "loss": 0.012,
      "step": 1641220
    },
    {
      "epoch": 2.6859252567702914,
      "grad_norm": 0.4851965308189392,
      "learning_rate": 4.658645387874515e-06,
      "loss": 0.0125,
      "step": 1641240
    },
    {
      "epoch": 2.6859579872089445,
      "grad_norm": 0.3343384265899658,
      "learning_rate": 4.658579495660998e-06,
      "loss": 0.0178,
      "step": 1641260
    },
    {
      "epoch": 2.6859907176475977,
      "grad_norm": 0.1550435721874237,
      "learning_rate": 4.658513603447481e-06,
      "loss": 0.0128,
      "step": 1641280
    },
    {
      "epoch": 2.6860234480862513,
      "grad_norm": 0.32524311542510986,
      "learning_rate": 4.658447711233964e-06,
      "loss": 0.0081,
      "step": 1641300
    },
    {
      "epoch": 2.686056178524905,
      "grad_norm": 0.16845755279064178,
      "learning_rate": 4.658381819020447e-06,
      "loss": 0.0145,
      "step": 1641320
    },
    {
      "epoch": 2.686088908963558,
      "grad_norm": 0.1950564831495285,
      "learning_rate": 4.65831592680693e-06,
      "loss": 0.0129,
      "step": 1641340
    },
    {
      "epoch": 2.686121639402211,
      "grad_norm": 0.16800454258918762,
      "learning_rate": 4.658250034593412e-06,
      "loss": 0.0182,
      "step": 1641360
    },
    {
      "epoch": 2.6861543698408648,
      "grad_norm": 0.6279379725456238,
      "learning_rate": 4.658184142379895e-06,
      "loss": 0.0147,
      "step": 1641380
    },
    {
      "epoch": 2.686187100279518,
      "grad_norm": 0.5977398753166199,
      "learning_rate": 4.658118250166378e-06,
      "loss": 0.014,
      "step": 1641400
    },
    {
      "epoch": 2.686219830718171,
      "grad_norm": 0.3412456214427948,
      "learning_rate": 4.658052357952861e-06,
      "loss": 0.0121,
      "step": 1641420
    },
    {
      "epoch": 2.6862525611568246,
      "grad_norm": 1.1499207019805908,
      "learning_rate": 4.657986465739344e-06,
      "loss": 0.0145,
      "step": 1641440
    },
    {
      "epoch": 2.686285291595478,
      "grad_norm": 0.6007299423217773,
      "learning_rate": 4.657920573525827e-06,
      "loss": 0.0154,
      "step": 1641460
    },
    {
      "epoch": 2.6863180220341314,
      "grad_norm": 0.451636403799057,
      "learning_rate": 4.65785468131231e-06,
      "loss": 0.0103,
      "step": 1641480
    },
    {
      "epoch": 2.6863507524727845,
      "grad_norm": 0.8116122484207153,
      "learning_rate": 4.657788789098792e-06,
      "loss": 0.0159,
      "step": 1641500
    },
    {
      "epoch": 2.686383482911438,
      "grad_norm": 0.1490650475025177,
      "learning_rate": 4.657722896885275e-06,
      "loss": 0.0197,
      "step": 1641520
    },
    {
      "epoch": 2.6864162133500913,
      "grad_norm": 0.6103435158729553,
      "learning_rate": 4.657657004671759e-06,
      "loss": 0.0134,
      "step": 1641540
    },
    {
      "epoch": 2.6864489437887444,
      "grad_norm": 0.26624301075935364,
      "learning_rate": 4.6575911124582415e-06,
      "loss": 0.0135,
      "step": 1641560
    },
    {
      "epoch": 2.686481674227398,
      "grad_norm": 0.4904281198978424,
      "learning_rate": 4.657525220244724e-06,
      "loss": 0.0193,
      "step": 1641580
    },
    {
      "epoch": 2.686514404666051,
      "grad_norm": 0.846431314945221,
      "learning_rate": 4.657459328031207e-06,
      "loss": 0.0181,
      "step": 1641600
    },
    {
      "epoch": 2.6865471351047048,
      "grad_norm": 0.38330304622650146,
      "learning_rate": 4.65739343581769e-06,
      "loss": 0.0121,
      "step": 1641620
    },
    {
      "epoch": 2.686579865543358,
      "grad_norm": 0.2371814101934433,
      "learning_rate": 4.6573275436041724e-06,
      "loss": 0.0135,
      "step": 1641640
    },
    {
      "epoch": 2.6866125959820115,
      "grad_norm": 0.36590781807899475,
      "learning_rate": 4.657261651390655e-06,
      "loss": 0.0092,
      "step": 1641660
    },
    {
      "epoch": 2.6866453264206647,
      "grad_norm": 0.2756970226764679,
      "learning_rate": 4.657195759177138e-06,
      "loss": 0.0136,
      "step": 1641680
    },
    {
      "epoch": 2.686678056859318,
      "grad_norm": 0.11617561429738998,
      "learning_rate": 4.6571298669636215e-06,
      "loss": 0.0186,
      "step": 1641700
    },
    {
      "epoch": 2.6867107872979714,
      "grad_norm": 0.655592679977417,
      "learning_rate": 4.657063974750104e-06,
      "loss": 0.0151,
      "step": 1641720
    },
    {
      "epoch": 2.6867435177366246,
      "grad_norm": 0.4988844394683838,
      "learning_rate": 4.656998082536587e-06,
      "loss": 0.0188,
      "step": 1641740
    },
    {
      "epoch": 2.686776248175278,
      "grad_norm": 0.42634496092796326,
      "learning_rate": 4.65693219032307e-06,
      "loss": 0.0108,
      "step": 1641760
    },
    {
      "epoch": 2.6868089786139313,
      "grad_norm": 0.232672318816185,
      "learning_rate": 4.656866298109553e-06,
      "loss": 0.0134,
      "step": 1641780
    },
    {
      "epoch": 2.686841709052585,
      "grad_norm": 0.6244937777519226,
      "learning_rate": 4.656800405896036e-06,
      "loss": 0.0126,
      "step": 1641800
    },
    {
      "epoch": 2.686874439491238,
      "grad_norm": 0.23878125846385956,
      "learning_rate": 4.656734513682519e-06,
      "loss": 0.0174,
      "step": 1641820
    },
    {
      "epoch": 2.686907169929891,
      "grad_norm": 0.40858978033065796,
      "learning_rate": 4.6566686214690015e-06,
      "loss": 0.0138,
      "step": 1641840
    },
    {
      "epoch": 2.686939900368545,
      "grad_norm": 0.35020554065704346,
      "learning_rate": 4.656602729255484e-06,
      "loss": 0.0095,
      "step": 1641860
    },
    {
      "epoch": 2.686972630807198,
      "grad_norm": 0.2836012840270996,
      "learning_rate": 4.656536837041967e-06,
      "loss": 0.0164,
      "step": 1641880
    },
    {
      "epoch": 2.6870053612458515,
      "grad_norm": 0.4944021701812744,
      "learning_rate": 4.65647094482845e-06,
      "loss": 0.0221,
      "step": 1641900
    },
    {
      "epoch": 2.6870380916845047,
      "grad_norm": 0.3617430627346039,
      "learning_rate": 4.656405052614933e-06,
      "loss": 0.0143,
      "step": 1641920
    },
    {
      "epoch": 2.6870708221231583,
      "grad_norm": 0.15186558663845062,
      "learning_rate": 4.656339160401416e-06,
      "loss": 0.0117,
      "step": 1641940
    },
    {
      "epoch": 2.6871035525618114,
      "grad_norm": 3.2921509742736816,
      "learning_rate": 4.656273268187899e-06,
      "loss": 0.0164,
      "step": 1641960
    },
    {
      "epoch": 2.6871362830004646,
      "grad_norm": 0.6665489077568054,
      "learning_rate": 4.6562073759743816e-06,
      "loss": 0.0094,
      "step": 1641980
    },
    {
      "epoch": 2.687169013439118,
      "grad_norm": 0.16613617539405823,
      "learning_rate": 4.656141483760864e-06,
      "loss": 0.0118,
      "step": 1642000
    },
    {
      "epoch": 2.6872017438777713,
      "grad_norm": 0.13844305276870728,
      "learning_rate": 4.656075591547347e-06,
      "loss": 0.0095,
      "step": 1642020
    },
    {
      "epoch": 2.687234474316425,
      "grad_norm": 0.021587759256362915,
      "learning_rate": 4.65600969933383e-06,
      "loss": 0.012,
      "step": 1642040
    },
    {
      "epoch": 2.687267204755078,
      "grad_norm": 0.20686957240104675,
      "learning_rate": 4.6559438071203125e-06,
      "loss": 0.0169,
      "step": 1642060
    },
    {
      "epoch": 2.6872999351937317,
      "grad_norm": 0.3886055648326874,
      "learning_rate": 4.655877914906795e-06,
      "loss": 0.0203,
      "step": 1642080
    },
    {
      "epoch": 2.687332665632385,
      "grad_norm": 0.2613580822944641,
      "learning_rate": 4.655812022693279e-06,
      "loss": 0.0162,
      "step": 1642100
    },
    {
      "epoch": 2.687365396071038,
      "grad_norm": 1.7231025695800781,
      "learning_rate": 4.655746130479762e-06,
      "loss": 0.0221,
      "step": 1642120
    },
    {
      "epoch": 2.6873981265096916,
      "grad_norm": 0.20247887074947357,
      "learning_rate": 4.655680238266244e-06,
      "loss": 0.0103,
      "step": 1642140
    },
    {
      "epoch": 2.6874308569483447,
      "grad_norm": 0.32163485884666443,
      "learning_rate": 4.655614346052728e-06,
      "loss": 0.0147,
      "step": 1642160
    },
    {
      "epoch": 2.6874635873869983,
      "grad_norm": 0.20394030213356018,
      "learning_rate": 4.655548453839211e-06,
      "loss": 0.014,
      "step": 1642180
    },
    {
      "epoch": 2.6874963178256515,
      "grad_norm": 0.1792687028646469,
      "learning_rate": 4.655482561625693e-06,
      "loss": 0.0163,
      "step": 1642200
    },
    {
      "epoch": 2.687529048264305,
      "grad_norm": 0.5924496650695801,
      "learning_rate": 4.655416669412176e-06,
      "loss": 0.01,
      "step": 1642220
    },
    {
      "epoch": 2.687561778702958,
      "grad_norm": 0.14665819704532623,
      "learning_rate": 4.655350777198659e-06,
      "loss": 0.0112,
      "step": 1642240
    },
    {
      "epoch": 2.6875945091416114,
      "grad_norm": 0.268798828125,
      "learning_rate": 4.655284884985142e-06,
      "loss": 0.0186,
      "step": 1642260
    },
    {
      "epoch": 2.687627239580265,
      "grad_norm": 0.4573329985141754,
      "learning_rate": 4.655218992771624e-06,
      "loss": 0.0129,
      "step": 1642280
    },
    {
      "epoch": 2.687659970018918,
      "grad_norm": 0.1725156009197235,
      "learning_rate": 4.655153100558107e-06,
      "loss": 0.0144,
      "step": 1642300
    },
    {
      "epoch": 2.6876927004575717,
      "grad_norm": 0.3647560477256775,
      "learning_rate": 4.655087208344591e-06,
      "loss": 0.0191,
      "step": 1642320
    },
    {
      "epoch": 2.687725430896225,
      "grad_norm": 0.3702690005302429,
      "learning_rate": 4.655021316131073e-06,
      "loss": 0.0143,
      "step": 1642340
    },
    {
      "epoch": 2.6877581613348784,
      "grad_norm": 0.22050794959068298,
      "learning_rate": 4.654955423917556e-06,
      "loss": 0.0109,
      "step": 1642360
    },
    {
      "epoch": 2.6877908917735316,
      "grad_norm": 0.13064855337142944,
      "learning_rate": 4.654889531704039e-06,
      "loss": 0.0149,
      "step": 1642380
    },
    {
      "epoch": 2.6878236222121847,
      "grad_norm": 0.14685770869255066,
      "learning_rate": 4.654823639490522e-06,
      "loss": 0.013,
      "step": 1642400
    },
    {
      "epoch": 2.6878563526508383,
      "grad_norm": 0.5830609202384949,
      "learning_rate": 4.654757747277004e-06,
      "loss": 0.0135,
      "step": 1642420
    },
    {
      "epoch": 2.6878890830894915,
      "grad_norm": 0.3028566241264343,
      "learning_rate": 4.654691855063487e-06,
      "loss": 0.0106,
      "step": 1642440
    },
    {
      "epoch": 2.6879218135281446,
      "grad_norm": 0.40275856852531433,
      "learning_rate": 4.65462596284997e-06,
      "loss": 0.0194,
      "step": 1642460
    },
    {
      "epoch": 2.6879545439667982,
      "grad_norm": 0.13752731680870056,
      "learning_rate": 4.6545600706364534e-06,
      "loss": 0.0165,
      "step": 1642480
    },
    {
      "epoch": 2.687987274405452,
      "grad_norm": 0.26771825551986694,
      "learning_rate": 4.654494178422936e-06,
      "loss": 0.0134,
      "step": 1642500
    },
    {
      "epoch": 2.688020004844105,
      "grad_norm": 0.22025103867053986,
      "learning_rate": 4.654428286209419e-06,
      "loss": 0.0081,
      "step": 1642520
    },
    {
      "epoch": 2.688052735282758,
      "grad_norm": 0.08189555257558823,
      "learning_rate": 4.654362393995902e-06,
      "loss": 0.0103,
      "step": 1642540
    },
    {
      "epoch": 2.6880854657214117,
      "grad_norm": 0.1269768625497818,
      "learning_rate": 4.654296501782385e-06,
      "loss": 0.0122,
      "step": 1642560
    },
    {
      "epoch": 2.688118196160065,
      "grad_norm": 0.58375483751297,
      "learning_rate": 4.654230609568868e-06,
      "loss": 0.0146,
      "step": 1642580
    },
    {
      "epoch": 2.688150926598718,
      "grad_norm": 0.13092158734798431,
      "learning_rate": 4.654164717355351e-06,
      "loss": 0.0157,
      "step": 1642600
    },
    {
      "epoch": 2.6881836570373716,
      "grad_norm": 0.8572289943695068,
      "learning_rate": 4.6540988251418335e-06,
      "loss": 0.0132,
      "step": 1642620
    },
    {
      "epoch": 2.688216387476025,
      "grad_norm": 0.35676348209381104,
      "learning_rate": 4.654032932928316e-06,
      "loss": 0.0176,
      "step": 1642640
    },
    {
      "epoch": 2.6882491179146784,
      "grad_norm": 0.5256754159927368,
      "learning_rate": 4.653967040714799e-06,
      "loss": 0.014,
      "step": 1642660
    },
    {
      "epoch": 2.6882818483533315,
      "grad_norm": 0.8734802007675171,
      "learning_rate": 4.653901148501282e-06,
      "loss": 0.0154,
      "step": 1642680
    },
    {
      "epoch": 2.688314578791985,
      "grad_norm": 0.37113794684410095,
      "learning_rate": 4.653835256287764e-06,
      "loss": 0.0202,
      "step": 1642700
    },
    {
      "epoch": 2.6883473092306382,
      "grad_norm": 0.34556904435157776,
      "learning_rate": 4.653769364074248e-06,
      "loss": 0.0147,
      "step": 1642720
    },
    {
      "epoch": 2.6883800396692914,
      "grad_norm": 0.20180897414684296,
      "learning_rate": 4.653703471860731e-06,
      "loss": 0.0128,
      "step": 1642740
    },
    {
      "epoch": 2.688412770107945,
      "grad_norm": 0.23530684411525726,
      "learning_rate": 4.6536375796472135e-06,
      "loss": 0.0153,
      "step": 1642760
    },
    {
      "epoch": 2.6884455005465986,
      "grad_norm": 0.2585819661617279,
      "learning_rate": 4.653571687433696e-06,
      "loss": 0.009,
      "step": 1642780
    },
    {
      "epoch": 2.6884782309852517,
      "grad_norm": 0.3852419853210449,
      "learning_rate": 4.653505795220179e-06,
      "loss": 0.0146,
      "step": 1642800
    },
    {
      "epoch": 2.688510961423905,
      "grad_norm": 0.2696114182472229,
      "learning_rate": 4.653439903006662e-06,
      "loss": 0.0189,
      "step": 1642820
    },
    {
      "epoch": 2.6885436918625585,
      "grad_norm": 0.6978378295898438,
      "learning_rate": 4.653374010793145e-06,
      "loss": 0.0134,
      "step": 1642840
    },
    {
      "epoch": 2.6885764223012116,
      "grad_norm": 0.2708539664745331,
      "learning_rate": 4.653308118579628e-06,
      "loss": 0.0158,
      "step": 1642860
    },
    {
      "epoch": 2.688609152739865,
      "grad_norm": 0.5081586837768555,
      "learning_rate": 4.653242226366111e-06,
      "loss": 0.0126,
      "step": 1642880
    },
    {
      "epoch": 2.6886418831785184,
      "grad_norm": 0.4274231493473053,
      "learning_rate": 4.6531763341525935e-06,
      "loss": 0.0146,
      "step": 1642900
    },
    {
      "epoch": 2.688674613617172,
      "grad_norm": 0.13991029560565948,
      "learning_rate": 4.653110441939076e-06,
      "loss": 0.0161,
      "step": 1642920
    },
    {
      "epoch": 2.688707344055825,
      "grad_norm": 0.5003696084022522,
      "learning_rate": 4.653044549725559e-06,
      "loss": 0.0136,
      "step": 1642940
    },
    {
      "epoch": 2.6887400744944783,
      "grad_norm": 0.34701070189476013,
      "learning_rate": 4.652978657512043e-06,
      "loss": 0.0177,
      "step": 1642960
    },
    {
      "epoch": 2.688772804933132,
      "grad_norm": 0.1709427684545517,
      "learning_rate": 4.652912765298525e-06,
      "loss": 0.0135,
      "step": 1642980
    },
    {
      "epoch": 2.688805535371785,
      "grad_norm": 0.13977372646331787,
      "learning_rate": 4.652846873085008e-06,
      "loss": 0.013,
      "step": 1643000
    },
    {
      "epoch": 2.688838265810438,
      "grad_norm": 0.602867841720581,
      "learning_rate": 4.652780980871491e-06,
      "loss": 0.0146,
      "step": 1643020
    },
    {
      "epoch": 2.6888709962490918,
      "grad_norm": 0.7859026789665222,
      "learning_rate": 4.6527150886579735e-06,
      "loss": 0.0161,
      "step": 1643040
    },
    {
      "epoch": 2.688903726687745,
      "grad_norm": 0.10742425918579102,
      "learning_rate": 4.652649196444456e-06,
      "loss": 0.016,
      "step": 1643060
    },
    {
      "epoch": 2.6889364571263985,
      "grad_norm": 0.6150549054145813,
      "learning_rate": 4.652583304230939e-06,
      "loss": 0.0131,
      "step": 1643080
    },
    {
      "epoch": 2.6889691875650517,
      "grad_norm": 0.46291306614875793,
      "learning_rate": 4.652517412017422e-06,
      "loss": 0.0161,
      "step": 1643100
    },
    {
      "epoch": 2.6890019180037052,
      "grad_norm": 0.3542819023132324,
      "learning_rate": 4.652451519803905e-06,
      "loss": 0.0161,
      "step": 1643120
    },
    {
      "epoch": 2.6890346484423584,
      "grad_norm": 0.5008918642997742,
      "learning_rate": 4.652385627590388e-06,
      "loss": 0.0132,
      "step": 1643140
    },
    {
      "epoch": 2.6890673788810115,
      "grad_norm": 0.7062675952911377,
      "learning_rate": 4.652319735376871e-06,
      "loss": 0.0097,
      "step": 1643160
    },
    {
      "epoch": 2.689100109319665,
      "grad_norm": 0.4848960041999817,
      "learning_rate": 4.652253843163354e-06,
      "loss": 0.0117,
      "step": 1643180
    },
    {
      "epoch": 2.6891328397583183,
      "grad_norm": 0.06402554363012314,
      "learning_rate": 4.652187950949837e-06,
      "loss": 0.0108,
      "step": 1643200
    },
    {
      "epoch": 2.689165570196972,
      "grad_norm": 0.28402119874954224,
      "learning_rate": 4.65212205873632e-06,
      "loss": 0.0174,
      "step": 1643220
    },
    {
      "epoch": 2.689198300635625,
      "grad_norm": 0.09248417615890503,
      "learning_rate": 4.652056166522803e-06,
      "loss": 0.0088,
      "step": 1643240
    },
    {
      "epoch": 2.6892310310742786,
      "grad_norm": 0.8203673958778381,
      "learning_rate": 4.651990274309285e-06,
      "loss": 0.0223,
      "step": 1643260
    },
    {
      "epoch": 2.689263761512932,
      "grad_norm": 0.4583379924297333,
      "learning_rate": 4.651924382095768e-06,
      "loss": 0.0177,
      "step": 1643280
    },
    {
      "epoch": 2.689296491951585,
      "grad_norm": 0.5543815493583679,
      "learning_rate": 4.651858489882251e-06,
      "loss": 0.015,
      "step": 1643300
    },
    {
      "epoch": 2.6893292223902385,
      "grad_norm": 0.7834005355834961,
      "learning_rate": 4.651792597668734e-06,
      "loss": 0.017,
      "step": 1643320
    },
    {
      "epoch": 2.6893619528288917,
      "grad_norm": 0.2592770755290985,
      "learning_rate": 4.651726705455217e-06,
      "loss": 0.0114,
      "step": 1643340
    },
    {
      "epoch": 2.6893946832675453,
      "grad_norm": 0.43786177039146423,
      "learning_rate": 4.6516608132417e-06,
      "loss": 0.0166,
      "step": 1643360
    },
    {
      "epoch": 2.6894274137061984,
      "grad_norm": 0.4136480391025543,
      "learning_rate": 4.651594921028183e-06,
      "loss": 0.0146,
      "step": 1643380
    },
    {
      "epoch": 2.689460144144852,
      "grad_norm": 0.4281630218029022,
      "learning_rate": 4.651529028814665e-06,
      "loss": 0.0192,
      "step": 1643400
    },
    {
      "epoch": 2.689492874583505,
      "grad_norm": 0.2625986635684967,
      "learning_rate": 4.651463136601148e-06,
      "loss": 0.0203,
      "step": 1643420
    },
    {
      "epoch": 2.6895256050221583,
      "grad_norm": 1.3539880514144897,
      "learning_rate": 4.651397244387631e-06,
      "loss": 0.012,
      "step": 1643440
    },
    {
      "epoch": 2.689558335460812,
      "grad_norm": 1.6894513368606567,
      "learning_rate": 4.651331352174114e-06,
      "loss": 0.0145,
      "step": 1643460
    },
    {
      "epoch": 2.689591065899465,
      "grad_norm": 0.7388727068901062,
      "learning_rate": 4.651265459960596e-06,
      "loss": 0.0135,
      "step": 1643480
    },
    {
      "epoch": 2.6896237963381187,
      "grad_norm": 0.23006293177604675,
      "learning_rate": 4.651199567747079e-06,
      "loss": 0.0132,
      "step": 1643500
    },
    {
      "epoch": 2.689656526776772,
      "grad_norm": 0.18569909036159515,
      "learning_rate": 4.651133675533563e-06,
      "loss": 0.0095,
      "step": 1643520
    },
    {
      "epoch": 2.6896892572154254,
      "grad_norm": 0.157075434923172,
      "learning_rate": 4.651067783320045e-06,
      "loss": 0.0108,
      "step": 1643540
    },
    {
      "epoch": 2.6897219876540785,
      "grad_norm": 0.21030579507350922,
      "learning_rate": 4.651001891106528e-06,
      "loss": 0.0116,
      "step": 1643560
    },
    {
      "epoch": 2.6897547180927317,
      "grad_norm": 0.1202138215303421,
      "learning_rate": 4.650935998893012e-06,
      "loss": 0.0128,
      "step": 1643580
    },
    {
      "epoch": 2.6897874485313853,
      "grad_norm": 0.1261020004749298,
      "learning_rate": 4.6508701066794945e-06,
      "loss": 0.0131,
      "step": 1643600
    },
    {
      "epoch": 2.6898201789700384,
      "grad_norm": 0.14159686863422394,
      "learning_rate": 4.650804214465977e-06,
      "loss": 0.0101,
      "step": 1643620
    },
    {
      "epoch": 2.689852909408692,
      "grad_norm": 0.08902175724506378,
      "learning_rate": 4.65073832225246e-06,
      "loss": 0.0167,
      "step": 1643640
    },
    {
      "epoch": 2.689885639847345,
      "grad_norm": 0.9175995588302612,
      "learning_rate": 4.650672430038943e-06,
      "loss": 0.0107,
      "step": 1643660
    },
    {
      "epoch": 2.689918370285999,
      "grad_norm": 0.24714936316013336,
      "learning_rate": 4.6506065378254254e-06,
      "loss": 0.0146,
      "step": 1643680
    },
    {
      "epoch": 2.689951100724652,
      "grad_norm": 0.2948237955570221,
      "learning_rate": 4.650540645611908e-06,
      "loss": 0.0089,
      "step": 1643700
    },
    {
      "epoch": 2.689983831163305,
      "grad_norm": 0.11851799488067627,
      "learning_rate": 4.650474753398391e-06,
      "loss": 0.0102,
      "step": 1643720
    },
    {
      "epoch": 2.6900165616019587,
      "grad_norm": 0.18696221709251404,
      "learning_rate": 4.6504088611848745e-06,
      "loss": 0.009,
      "step": 1643740
    },
    {
      "epoch": 2.690049292040612,
      "grad_norm": 0.43665018677711487,
      "learning_rate": 4.650342968971357e-06,
      "loss": 0.016,
      "step": 1643760
    },
    {
      "epoch": 2.6900820224792654,
      "grad_norm": 0.19493533670902252,
      "learning_rate": 4.65027707675784e-06,
      "loss": 0.0135,
      "step": 1643780
    },
    {
      "epoch": 2.6901147529179186,
      "grad_norm": 0.7878322005271912,
      "learning_rate": 4.650211184544323e-06,
      "loss": 0.0141,
      "step": 1643800
    },
    {
      "epoch": 2.690147483356572,
      "grad_norm": 0.3586343824863434,
      "learning_rate": 4.6501452923308055e-06,
      "loss": 0.0152,
      "step": 1643820
    },
    {
      "epoch": 2.6901802137952253,
      "grad_norm": 0.20027777552604675,
      "learning_rate": 4.650079400117288e-06,
      "loss": 0.0127,
      "step": 1643840
    },
    {
      "epoch": 2.6902129442338785,
      "grad_norm": 0.24278385937213898,
      "learning_rate": 4.650013507903771e-06,
      "loss": 0.0147,
      "step": 1643860
    },
    {
      "epoch": 2.690245674672532,
      "grad_norm": 0.2746392786502838,
      "learning_rate": 4.649947615690254e-06,
      "loss": 0.0107,
      "step": 1643880
    },
    {
      "epoch": 2.690278405111185,
      "grad_norm": 1.7287763357162476,
      "learning_rate": 4.649881723476737e-06,
      "loss": 0.0172,
      "step": 1643900
    },
    {
      "epoch": 2.6903111355498384,
      "grad_norm": 0.15282948315143585,
      "learning_rate": 4.64981583126322e-06,
      "loss": 0.0214,
      "step": 1643920
    },
    {
      "epoch": 2.690343865988492,
      "grad_norm": 0.20607712864875793,
      "learning_rate": 4.649749939049703e-06,
      "loss": 0.008,
      "step": 1643940
    },
    {
      "epoch": 2.6903765964271456,
      "grad_norm": 0.6169254183769226,
      "learning_rate": 4.6496840468361855e-06,
      "loss": 0.0163,
      "step": 1643960
    },
    {
      "epoch": 2.6904093268657987,
      "grad_norm": 0.3984176516532898,
      "learning_rate": 4.649618154622669e-06,
      "loss": 0.0182,
      "step": 1643980
    },
    {
      "epoch": 2.690442057304452,
      "grad_norm": 0.32188352942466736,
      "learning_rate": 4.649552262409152e-06,
      "loss": 0.0192,
      "step": 1644000
    },
    {
      "epoch": 2.6904747877431054,
      "grad_norm": 0.5230767130851746,
      "learning_rate": 4.6494863701956346e-06,
      "loss": 0.0106,
      "step": 1644020
    },
    {
      "epoch": 2.6905075181817586,
      "grad_norm": 1.356857180595398,
      "learning_rate": 4.649420477982117e-06,
      "loss": 0.0174,
      "step": 1644040
    },
    {
      "epoch": 2.6905402486204117,
      "grad_norm": 0.4194431006908417,
      "learning_rate": 4.6493545857686e-06,
      "loss": 0.0131,
      "step": 1644060
    },
    {
      "epoch": 2.6905729790590653,
      "grad_norm": 0.1871138960123062,
      "learning_rate": 4.649288693555083e-06,
      "loss": 0.0158,
      "step": 1644080
    },
    {
      "epoch": 2.690605709497719,
      "grad_norm": 0.36817315220832825,
      "learning_rate": 4.6492228013415655e-06,
      "loss": 0.0145,
      "step": 1644100
    },
    {
      "epoch": 2.690638439936372,
      "grad_norm": 0.06607759743928909,
      "learning_rate": 4.649156909128048e-06,
      "loss": 0.0133,
      "step": 1644120
    },
    {
      "epoch": 2.6906711703750252,
      "grad_norm": 0.20703193545341492,
      "learning_rate": 4.649091016914532e-06,
      "loss": 0.0123,
      "step": 1644140
    },
    {
      "epoch": 2.690703900813679,
      "grad_norm": 0.3864750862121582,
      "learning_rate": 4.649025124701015e-06,
      "loss": 0.0126,
      "step": 1644160
    },
    {
      "epoch": 2.690736631252332,
      "grad_norm": 0.32176679372787476,
      "learning_rate": 4.648959232487497e-06,
      "loss": 0.0128,
      "step": 1644180
    },
    {
      "epoch": 2.690769361690985,
      "grad_norm": 0.5657247304916382,
      "learning_rate": 4.64889334027398e-06,
      "loss": 0.013,
      "step": 1644200
    },
    {
      "epoch": 2.6908020921296387,
      "grad_norm": 0.28577300906181335,
      "learning_rate": 4.648827448060463e-06,
      "loss": 0.0135,
      "step": 1644220
    },
    {
      "epoch": 2.6908348225682923,
      "grad_norm": 0.5907551050186157,
      "learning_rate": 4.648761555846946e-06,
      "loss": 0.0148,
      "step": 1644240
    },
    {
      "epoch": 2.6908675530069455,
      "grad_norm": 0.11628463864326477,
      "learning_rate": 4.648695663633429e-06,
      "loss": 0.0148,
      "step": 1644260
    },
    {
      "epoch": 2.6909002834455986,
      "grad_norm": 0.13212117552757263,
      "learning_rate": 4.648629771419912e-06,
      "loss": 0.0124,
      "step": 1644280
    },
    {
      "epoch": 2.690933013884252,
      "grad_norm": 0.27646610140800476,
      "learning_rate": 4.648563879206395e-06,
      "loss": 0.0178,
      "step": 1644300
    },
    {
      "epoch": 2.6909657443229054,
      "grad_norm": 0.4326786398887634,
      "learning_rate": 4.648497986992877e-06,
      "loss": 0.019,
      "step": 1644320
    },
    {
      "epoch": 2.6909984747615585,
      "grad_norm": 0.6426141858100891,
      "learning_rate": 4.64843209477936e-06,
      "loss": 0.0126,
      "step": 1644340
    },
    {
      "epoch": 2.691031205200212,
      "grad_norm": 0.46100738644599915,
      "learning_rate": 4.648366202565844e-06,
      "loss": 0.0161,
      "step": 1644360
    },
    {
      "epoch": 2.6910639356388657,
      "grad_norm": 0.30447959899902344,
      "learning_rate": 4.648300310352326e-06,
      "loss": 0.0128,
      "step": 1644380
    },
    {
      "epoch": 2.691096666077519,
      "grad_norm": 0.30977290868759155,
      "learning_rate": 4.648234418138809e-06,
      "loss": 0.0141,
      "step": 1644400
    },
    {
      "epoch": 2.691129396516172,
      "grad_norm": 0.32884031534194946,
      "learning_rate": 4.648168525925292e-06,
      "loss": 0.0116,
      "step": 1644420
    },
    {
      "epoch": 2.6911621269548256,
      "grad_norm": 0.26662757992744446,
      "learning_rate": 4.648102633711775e-06,
      "loss": 0.0147,
      "step": 1644440
    },
    {
      "epoch": 2.6911948573934787,
      "grad_norm": 0.30909401178359985,
      "learning_rate": 4.648036741498257e-06,
      "loss": 0.0156,
      "step": 1644460
    },
    {
      "epoch": 2.691227587832132,
      "grad_norm": 0.10710296779870987,
      "learning_rate": 4.64797084928474e-06,
      "loss": 0.0247,
      "step": 1644480
    },
    {
      "epoch": 2.6912603182707855,
      "grad_norm": 0.258870005607605,
      "learning_rate": 4.647904957071223e-06,
      "loss": 0.0144,
      "step": 1644500
    },
    {
      "epoch": 2.6912930487094386,
      "grad_norm": 0.42592644691467285,
      "learning_rate": 4.647839064857706e-06,
      "loss": 0.0163,
      "step": 1644520
    },
    {
      "epoch": 2.6913257791480922,
      "grad_norm": 0.364077627658844,
      "learning_rate": 4.647773172644189e-06,
      "loss": 0.0093,
      "step": 1644540
    },
    {
      "epoch": 2.6913585095867454,
      "grad_norm": 0.753349244594574,
      "learning_rate": 4.647707280430672e-06,
      "loss": 0.0229,
      "step": 1644560
    },
    {
      "epoch": 2.691391240025399,
      "grad_norm": 0.528650164604187,
      "learning_rate": 4.647641388217155e-06,
      "loss": 0.011,
      "step": 1644580
    },
    {
      "epoch": 2.691423970464052,
      "grad_norm": 0.2953219711780548,
      "learning_rate": 4.647575496003638e-06,
      "loss": 0.0089,
      "step": 1644600
    },
    {
      "epoch": 2.6914567009027053,
      "grad_norm": 0.3089170455932617,
      "learning_rate": 4.647509603790121e-06,
      "loss": 0.01,
      "step": 1644620
    },
    {
      "epoch": 2.691489431341359,
      "grad_norm": 0.2637462615966797,
      "learning_rate": 4.647443711576604e-06,
      "loss": 0.0104,
      "step": 1644640
    },
    {
      "epoch": 2.691522161780012,
      "grad_norm": 0.17008917033672333,
      "learning_rate": 4.6473778193630865e-06,
      "loss": 0.0077,
      "step": 1644660
    },
    {
      "epoch": 2.6915548922186656,
      "grad_norm": 0.17066818475723267,
      "learning_rate": 4.647311927149569e-06,
      "loss": 0.0095,
      "step": 1644680
    },
    {
      "epoch": 2.6915876226573188,
      "grad_norm": 0.7225512862205505,
      "learning_rate": 4.647246034936052e-06,
      "loss": 0.023,
      "step": 1644700
    },
    {
      "epoch": 2.6916203530959724,
      "grad_norm": 0.4048102796077728,
      "learning_rate": 4.647180142722535e-06,
      "loss": 0.0148,
      "step": 1644720
    },
    {
      "epoch": 2.6916530835346255,
      "grad_norm": 0.34199559688568115,
      "learning_rate": 4.6471142505090174e-06,
      "loss": 0.0192,
      "step": 1644740
    },
    {
      "epoch": 2.6916858139732787,
      "grad_norm": 0.7003785371780396,
      "learning_rate": 4.647048358295501e-06,
      "loss": 0.0123,
      "step": 1644760
    },
    {
      "epoch": 2.6917185444119323,
      "grad_norm": 0.387214720249176,
      "learning_rate": 4.646982466081984e-06,
      "loss": 0.0134,
      "step": 1644780
    },
    {
      "epoch": 2.6917512748505854,
      "grad_norm": 0.5124874711036682,
      "learning_rate": 4.6469165738684665e-06,
      "loss": 0.0119,
      "step": 1644800
    },
    {
      "epoch": 2.691784005289239,
      "grad_norm": 0.1642351746559143,
      "learning_rate": 4.646850681654949e-06,
      "loss": 0.0102,
      "step": 1644820
    },
    {
      "epoch": 2.691816735727892,
      "grad_norm": 0.5228636860847473,
      "learning_rate": 4.646784789441432e-06,
      "loss": 0.018,
      "step": 1644840
    },
    {
      "epoch": 2.6918494661665457,
      "grad_norm": 0.9152992963790894,
      "learning_rate": 4.646718897227915e-06,
      "loss": 0.0154,
      "step": 1644860
    },
    {
      "epoch": 2.691882196605199,
      "grad_norm": 0.16966359317302704,
      "learning_rate": 4.6466530050143975e-06,
      "loss": 0.0128,
      "step": 1644880
    },
    {
      "epoch": 2.691914927043852,
      "grad_norm": 0.8259298205375671,
      "learning_rate": 4.64658711280088e-06,
      "loss": 0.014,
      "step": 1644900
    },
    {
      "epoch": 2.6919476574825056,
      "grad_norm": 0.5719343423843384,
      "learning_rate": 4.646521220587363e-06,
      "loss": 0.0138,
      "step": 1644920
    },
    {
      "epoch": 2.691980387921159,
      "grad_norm": 0.4481031000614166,
      "learning_rate": 4.6464553283738465e-06,
      "loss": 0.0084,
      "step": 1644940
    },
    {
      "epoch": 2.6920131183598124,
      "grad_norm": 0.2815685570240021,
      "learning_rate": 4.646389436160329e-06,
      "loss": 0.0101,
      "step": 1644960
    },
    {
      "epoch": 2.6920458487984655,
      "grad_norm": 0.43336251378059387,
      "learning_rate": 4.646323543946812e-06,
      "loss": 0.0125,
      "step": 1644980
    },
    {
      "epoch": 2.692078579237119,
      "grad_norm": 0.7568131685256958,
      "learning_rate": 4.646257651733296e-06,
      "loss": 0.0178,
      "step": 1645000
    },
    {
      "epoch": 2.6921113096757723,
      "grad_norm": 0.15283936262130737,
      "learning_rate": 4.646191759519778e-06,
      "loss": 0.0148,
      "step": 1645020
    },
    {
      "epoch": 2.6921440401144254,
      "grad_norm": 0.13793900609016418,
      "learning_rate": 4.646125867306261e-06,
      "loss": 0.0099,
      "step": 1645040
    },
    {
      "epoch": 2.692176770553079,
      "grad_norm": 0.5184704661369324,
      "learning_rate": 4.646059975092744e-06,
      "loss": 0.016,
      "step": 1645060
    },
    {
      "epoch": 2.692209500991732,
      "grad_norm": 0.46666428446769714,
      "learning_rate": 4.6459940828792265e-06,
      "loss": 0.0163,
      "step": 1645080
    },
    {
      "epoch": 2.6922422314303858,
      "grad_norm": 0.11311068385839462,
      "learning_rate": 4.645928190665709e-06,
      "loss": 0.0151,
      "step": 1645100
    },
    {
      "epoch": 2.692274961869039,
      "grad_norm": 0.09076819568872452,
      "learning_rate": 4.645862298452192e-06,
      "loss": 0.01,
      "step": 1645120
    },
    {
      "epoch": 2.6923076923076925,
      "grad_norm": 0.1065884456038475,
      "learning_rate": 4.645796406238675e-06,
      "loss": 0.0136,
      "step": 1645140
    },
    {
      "epoch": 2.6923404227463457,
      "grad_norm": 0.6305034756660461,
      "learning_rate": 4.645730514025158e-06,
      "loss": 0.0184,
      "step": 1645160
    },
    {
      "epoch": 2.692373153184999,
      "grad_norm": 0.3283316195011139,
      "learning_rate": 4.645664621811641e-06,
      "loss": 0.0179,
      "step": 1645180
    },
    {
      "epoch": 2.6924058836236524,
      "grad_norm": 0.22053726017475128,
      "learning_rate": 4.645598729598124e-06,
      "loss": 0.0124,
      "step": 1645200
    },
    {
      "epoch": 2.6924386140623056,
      "grad_norm": 0.06837148219347,
      "learning_rate": 4.6455328373846066e-06,
      "loss": 0.0093,
      "step": 1645220
    },
    {
      "epoch": 2.692471344500959,
      "grad_norm": 0.48455706238746643,
      "learning_rate": 4.645466945171089e-06,
      "loss": 0.015,
      "step": 1645240
    },
    {
      "epoch": 2.6925040749396123,
      "grad_norm": 0.11964258551597595,
      "learning_rate": 4.645401052957572e-06,
      "loss": 0.0119,
      "step": 1645260
    },
    {
      "epoch": 2.692536805378266,
      "grad_norm": 0.3819133937358856,
      "learning_rate": 4.645335160744055e-06,
      "loss": 0.0155,
      "step": 1645280
    },
    {
      "epoch": 2.692569535816919,
      "grad_norm": 0.43745261430740356,
      "learning_rate": 4.645269268530538e-06,
      "loss": 0.0144,
      "step": 1645300
    },
    {
      "epoch": 2.692602266255572,
      "grad_norm": 1.0670597553253174,
      "learning_rate": 4.645203376317021e-06,
      "loss": 0.0159,
      "step": 1645320
    },
    {
      "epoch": 2.692634996694226,
      "grad_norm": 0.39482760429382324,
      "learning_rate": 4.645137484103504e-06,
      "loss": 0.0135,
      "step": 1645340
    },
    {
      "epoch": 2.692667727132879,
      "grad_norm": 0.13486768305301666,
      "learning_rate": 4.645071591889987e-06,
      "loss": 0.0139,
      "step": 1645360
    },
    {
      "epoch": 2.6927004575715325,
      "grad_norm": 0.21795547008514404,
      "learning_rate": 4.645005699676469e-06,
      "loss": 0.0107,
      "step": 1645380
    },
    {
      "epoch": 2.6927331880101857,
      "grad_norm": 0.4087023138999939,
      "learning_rate": 4.644939807462953e-06,
      "loss": 0.0158,
      "step": 1645400
    },
    {
      "epoch": 2.6927659184488393,
      "grad_norm": 0.6553846001625061,
      "learning_rate": 4.644873915249436e-06,
      "loss": 0.0165,
      "step": 1645420
    },
    {
      "epoch": 2.6927986488874924,
      "grad_norm": 0.30791229009628296,
      "learning_rate": 4.644808023035918e-06,
      "loss": 0.0164,
      "step": 1645440
    },
    {
      "epoch": 2.6928313793261456,
      "grad_norm": 0.4915856420993805,
      "learning_rate": 4.644742130822401e-06,
      "loss": 0.0138,
      "step": 1645460
    },
    {
      "epoch": 2.692864109764799,
      "grad_norm": 0.5969464778900146,
      "learning_rate": 4.644676238608884e-06,
      "loss": 0.0143,
      "step": 1645480
    },
    {
      "epoch": 2.6928968402034523,
      "grad_norm": 0.6224451065063477,
      "learning_rate": 4.644610346395367e-06,
      "loss": 0.0173,
      "step": 1645500
    },
    {
      "epoch": 2.6929295706421055,
      "grad_norm": 0.9374948740005493,
      "learning_rate": 4.644544454181849e-06,
      "loss": 0.0175,
      "step": 1645520
    },
    {
      "epoch": 2.692962301080759,
      "grad_norm": 1.036598801612854,
      "learning_rate": 4.644478561968332e-06,
      "loss": 0.0138,
      "step": 1645540
    },
    {
      "epoch": 2.6929950315194127,
      "grad_norm": 0.4106728732585907,
      "learning_rate": 4.644412669754816e-06,
      "loss": 0.0205,
      "step": 1645560
    },
    {
      "epoch": 2.693027761958066,
      "grad_norm": 0.14034916460514069,
      "learning_rate": 4.6443467775412984e-06,
      "loss": 0.0104,
      "step": 1645580
    },
    {
      "epoch": 2.693060492396719,
      "grad_norm": 0.33972984552383423,
      "learning_rate": 4.644280885327781e-06,
      "loss": 0.0142,
      "step": 1645600
    },
    {
      "epoch": 2.6930932228353726,
      "grad_norm": 0.397840678691864,
      "learning_rate": 4.644214993114264e-06,
      "loss": 0.0225,
      "step": 1645620
    },
    {
      "epoch": 2.6931259532740257,
      "grad_norm": 0.3851095736026764,
      "learning_rate": 4.644149100900747e-06,
      "loss": 0.0149,
      "step": 1645640
    },
    {
      "epoch": 2.693158683712679,
      "grad_norm": 1.172377109527588,
      "learning_rate": 4.64408320868723e-06,
      "loss": 0.0138,
      "step": 1645660
    },
    {
      "epoch": 2.6931914141513325,
      "grad_norm": 0.29234644770622253,
      "learning_rate": 4.644017316473713e-06,
      "loss": 0.0149,
      "step": 1645680
    },
    {
      "epoch": 2.693224144589986,
      "grad_norm": 0.41969597339630127,
      "learning_rate": 4.643951424260196e-06,
      "loss": 0.0149,
      "step": 1645700
    },
    {
      "epoch": 2.693256875028639,
      "grad_norm": 0.2714086174964905,
      "learning_rate": 4.6438855320466785e-06,
      "loss": 0.0139,
      "step": 1645720
    },
    {
      "epoch": 2.6932896054672923,
      "grad_norm": 0.543764591217041,
      "learning_rate": 4.643819639833161e-06,
      "loss": 0.0104,
      "step": 1645740
    },
    {
      "epoch": 2.693322335905946,
      "grad_norm": 2.5618650913238525,
      "learning_rate": 4.643753747619644e-06,
      "loss": 0.0146,
      "step": 1645760
    },
    {
      "epoch": 2.693355066344599,
      "grad_norm": 0.14408206939697266,
      "learning_rate": 4.6436878554061275e-06,
      "loss": 0.0162,
      "step": 1645780
    },
    {
      "epoch": 2.6933877967832522,
      "grad_norm": 0.28315332531929016,
      "learning_rate": 4.64362196319261e-06,
      "loss": 0.0124,
      "step": 1645800
    },
    {
      "epoch": 2.693420527221906,
      "grad_norm": 0.472648024559021,
      "learning_rate": 4.643556070979093e-06,
      "loss": 0.0159,
      "step": 1645820
    },
    {
      "epoch": 2.6934532576605594,
      "grad_norm": 1.582706093788147,
      "learning_rate": 4.643490178765576e-06,
      "loss": 0.0184,
      "step": 1645840
    },
    {
      "epoch": 2.6934859880992126,
      "grad_norm": 0.48001882433891296,
      "learning_rate": 4.6434242865520585e-06,
      "loss": 0.0192,
      "step": 1645860
    },
    {
      "epoch": 2.6935187185378657,
      "grad_norm": 0.10264050960540771,
      "learning_rate": 4.643358394338541e-06,
      "loss": 0.0156,
      "step": 1645880
    },
    {
      "epoch": 2.6935514489765193,
      "grad_norm": 0.6581870317459106,
      "learning_rate": 4.643292502125024e-06,
      "loss": 0.0142,
      "step": 1645900
    },
    {
      "epoch": 2.6935841794151725,
      "grad_norm": 0.26934123039245605,
      "learning_rate": 4.643226609911507e-06,
      "loss": 0.0143,
      "step": 1645920
    },
    {
      "epoch": 2.6936169098538256,
      "grad_norm": 0.11961179226636887,
      "learning_rate": 4.6431607176979894e-06,
      "loss": 0.0196,
      "step": 1645940
    },
    {
      "epoch": 2.693649640292479,
      "grad_norm": 0.21173830330371857,
      "learning_rate": 4.643094825484473e-06,
      "loss": 0.0168,
      "step": 1645960
    },
    {
      "epoch": 2.693682370731133,
      "grad_norm": 0.39531606435775757,
      "learning_rate": 4.643028933270956e-06,
      "loss": 0.0169,
      "step": 1645980
    },
    {
      "epoch": 2.693715101169786,
      "grad_norm": 0.12502314150333405,
      "learning_rate": 4.6429630410574385e-06,
      "loss": 0.0193,
      "step": 1646000
    },
    {
      "epoch": 2.693747831608439,
      "grad_norm": 0.21350015699863434,
      "learning_rate": 4.642897148843922e-06,
      "loss": 0.0104,
      "step": 1646020
    },
    {
      "epoch": 2.6937805620470927,
      "grad_norm": 0.19374029338359833,
      "learning_rate": 4.642831256630405e-06,
      "loss": 0.0175,
      "step": 1646040
    },
    {
      "epoch": 2.693813292485746,
      "grad_norm": 0.5211679339408875,
      "learning_rate": 4.6427653644168876e-06,
      "loss": 0.0129,
      "step": 1646060
    },
    {
      "epoch": 2.693846022924399,
      "grad_norm": 0.758510947227478,
      "learning_rate": 4.64269947220337e-06,
      "loss": 0.0166,
      "step": 1646080
    },
    {
      "epoch": 2.6938787533630526,
      "grad_norm": 0.5245894193649292,
      "learning_rate": 4.642633579989853e-06,
      "loss": 0.0126,
      "step": 1646100
    },
    {
      "epoch": 2.6939114838017058,
      "grad_norm": 0.615259051322937,
      "learning_rate": 4.642567687776336e-06,
      "loss": 0.0112,
      "step": 1646120
    },
    {
      "epoch": 2.6939442142403593,
      "grad_norm": 0.1481313556432724,
      "learning_rate": 4.6425017955628185e-06,
      "loss": 0.0128,
      "step": 1646140
    },
    {
      "epoch": 2.6939769446790125,
      "grad_norm": 0.17703135311603546,
      "learning_rate": 4.642435903349301e-06,
      "loss": 0.0183,
      "step": 1646160
    },
    {
      "epoch": 2.694009675117666,
      "grad_norm": 0.19837448000907898,
      "learning_rate": 4.642370011135785e-06,
      "loss": 0.0164,
      "step": 1646180
    },
    {
      "epoch": 2.6940424055563192,
      "grad_norm": 0.16498258709907532,
      "learning_rate": 4.642304118922268e-06,
      "loss": 0.0067,
      "step": 1646200
    },
    {
      "epoch": 2.6940751359949724,
      "grad_norm": 0.3605765998363495,
      "learning_rate": 4.64223822670875e-06,
      "loss": 0.018,
      "step": 1646220
    },
    {
      "epoch": 2.694107866433626,
      "grad_norm": 0.11419538408517838,
      "learning_rate": 4.642172334495233e-06,
      "loss": 0.0132,
      "step": 1646240
    },
    {
      "epoch": 2.694140596872279,
      "grad_norm": 0.20873920619487762,
      "learning_rate": 4.642106442281716e-06,
      "loss": 0.0106,
      "step": 1646260
    },
    {
      "epoch": 2.6941733273109327,
      "grad_norm": 0.3584308326244354,
      "learning_rate": 4.6420405500681986e-06,
      "loss": 0.0132,
      "step": 1646280
    },
    {
      "epoch": 2.694206057749586,
      "grad_norm": 2.051966905593872,
      "learning_rate": 4.641974657854681e-06,
      "loss": 0.0105,
      "step": 1646300
    },
    {
      "epoch": 2.6942387881882395,
      "grad_norm": 0.47888368368148804,
      "learning_rate": 4.641908765641164e-06,
      "loss": 0.0149,
      "step": 1646320
    },
    {
      "epoch": 2.6942715186268926,
      "grad_norm": 0.27827009558677673,
      "learning_rate": 4.641842873427647e-06,
      "loss": 0.0139,
      "step": 1646340
    },
    {
      "epoch": 2.6943042490655458,
      "grad_norm": 1.444634199142456,
      "learning_rate": 4.64177698121413e-06,
      "loss": 0.0129,
      "step": 1646360
    },
    {
      "epoch": 2.6943369795041994,
      "grad_norm": 0.2803913354873657,
      "learning_rate": 4.641711089000613e-06,
      "loss": 0.0129,
      "step": 1646380
    },
    {
      "epoch": 2.6943697099428525,
      "grad_norm": 0.3081360161304474,
      "learning_rate": 4.641645196787096e-06,
      "loss": 0.0121,
      "step": 1646400
    },
    {
      "epoch": 2.694402440381506,
      "grad_norm": 0.8212825059890747,
      "learning_rate": 4.6415793045735794e-06,
      "loss": 0.0124,
      "step": 1646420
    },
    {
      "epoch": 2.6944351708201593,
      "grad_norm": 0.42023906111717224,
      "learning_rate": 4.641513412360062e-06,
      "loss": 0.0134,
      "step": 1646440
    },
    {
      "epoch": 2.694467901258813,
      "grad_norm": 0.38748466968536377,
      "learning_rate": 4.641447520146545e-06,
      "loss": 0.0122,
      "step": 1646460
    },
    {
      "epoch": 2.694500631697466,
      "grad_norm": 0.07800835371017456,
      "learning_rate": 4.641381627933028e-06,
      "loss": 0.0112,
      "step": 1646480
    },
    {
      "epoch": 2.694533362136119,
      "grad_norm": 0.4677335023880005,
      "learning_rate": 4.64131573571951e-06,
      "loss": 0.0141,
      "step": 1646500
    },
    {
      "epoch": 2.6945660925747728,
      "grad_norm": 0.2648809552192688,
      "learning_rate": 4.641249843505993e-06,
      "loss": 0.0114,
      "step": 1646520
    },
    {
      "epoch": 2.694598823013426,
      "grad_norm": 0.8994946479797363,
      "learning_rate": 4.641183951292476e-06,
      "loss": 0.0107,
      "step": 1646540
    },
    {
      "epoch": 2.6946315534520795,
      "grad_norm": 0.541319727897644,
      "learning_rate": 4.641118059078959e-06,
      "loss": 0.0143,
      "step": 1646560
    },
    {
      "epoch": 2.6946642838907326,
      "grad_norm": 0.38681185245513916,
      "learning_rate": 4.641052166865442e-06,
      "loss": 0.0128,
      "step": 1646580
    },
    {
      "epoch": 2.6946970143293862,
      "grad_norm": 0.09112514555454254,
      "learning_rate": 4.640986274651925e-06,
      "loss": 0.0148,
      "step": 1646600
    },
    {
      "epoch": 2.6947297447680394,
      "grad_norm": 0.541488766670227,
      "learning_rate": 4.640920382438408e-06,
      "loss": 0.0163,
      "step": 1646620
    },
    {
      "epoch": 2.6947624752066925,
      "grad_norm": 1.3869476318359375,
      "learning_rate": 4.64085449022489e-06,
      "loss": 0.0162,
      "step": 1646640
    },
    {
      "epoch": 2.694795205645346,
      "grad_norm": 0.23702409863471985,
      "learning_rate": 4.640788598011373e-06,
      "loss": 0.0134,
      "step": 1646660
    },
    {
      "epoch": 2.6948279360839993,
      "grad_norm": 0.5416634678840637,
      "learning_rate": 4.640722705797856e-06,
      "loss": 0.0153,
      "step": 1646680
    },
    {
      "epoch": 2.694860666522653,
      "grad_norm": 0.3212466239929199,
      "learning_rate": 4.6406568135843395e-06,
      "loss": 0.0126,
      "step": 1646700
    },
    {
      "epoch": 2.694893396961306,
      "grad_norm": 0.14130635559558868,
      "learning_rate": 4.640590921370822e-06,
      "loss": 0.017,
      "step": 1646720
    },
    {
      "epoch": 2.6949261273999596,
      "grad_norm": 0.24479937553405762,
      "learning_rate": 4.640525029157305e-06,
      "loss": 0.0142,
      "step": 1646740
    },
    {
      "epoch": 2.6949588578386128,
      "grad_norm": 2.491503953933716,
      "learning_rate": 4.640459136943788e-06,
      "loss": 0.0162,
      "step": 1646760
    },
    {
      "epoch": 2.694991588277266,
      "grad_norm": 0.2657119631767273,
      "learning_rate": 4.6403932447302704e-06,
      "loss": 0.0176,
      "step": 1646780
    },
    {
      "epoch": 2.6950243187159195,
      "grad_norm": 0.08033584803342819,
      "learning_rate": 4.640327352516753e-06,
      "loss": 0.0193,
      "step": 1646800
    },
    {
      "epoch": 2.6950570491545727,
      "grad_norm": 0.39384013414382935,
      "learning_rate": 4.640261460303237e-06,
      "loss": 0.0094,
      "step": 1646820
    },
    {
      "epoch": 2.6950897795932263,
      "grad_norm": 0.21171848475933075,
      "learning_rate": 4.6401955680897195e-06,
      "loss": 0.012,
      "step": 1646840
    },
    {
      "epoch": 2.6951225100318794,
      "grad_norm": 0.220594584941864,
      "learning_rate": 4.640129675876202e-06,
      "loss": 0.0118,
      "step": 1646860
    },
    {
      "epoch": 2.695155240470533,
      "grad_norm": 0.31173163652420044,
      "learning_rate": 4.640063783662685e-06,
      "loss": 0.01,
      "step": 1646880
    },
    {
      "epoch": 2.695187970909186,
      "grad_norm": 0.08035937696695328,
      "learning_rate": 4.639997891449168e-06,
      "loss": 0.0114,
      "step": 1646900
    },
    {
      "epoch": 2.6952207013478393,
      "grad_norm": 0.27951952815055847,
      "learning_rate": 4.6399319992356505e-06,
      "loss": 0.0168,
      "step": 1646920
    },
    {
      "epoch": 2.695253431786493,
      "grad_norm": 0.43631598353385925,
      "learning_rate": 4.639866107022133e-06,
      "loss": 0.0232,
      "step": 1646940
    },
    {
      "epoch": 2.695286162225146,
      "grad_norm": 0.21167029440402985,
      "learning_rate": 4.639800214808616e-06,
      "loss": 0.0124,
      "step": 1646960
    },
    {
      "epoch": 2.695318892663799,
      "grad_norm": 0.17512422800064087,
      "learning_rate": 4.6397343225950995e-06,
      "loss": 0.0179,
      "step": 1646980
    },
    {
      "epoch": 2.695351623102453,
      "grad_norm": 0.36661428213119507,
      "learning_rate": 4.639668430381582e-06,
      "loss": 0.0183,
      "step": 1647000
    },
    {
      "epoch": 2.6953843535411064,
      "grad_norm": 0.3373030722141266,
      "learning_rate": 4.639602538168065e-06,
      "loss": 0.0182,
      "step": 1647020
    },
    {
      "epoch": 2.6954170839797595,
      "grad_norm": 0.19016872346401215,
      "learning_rate": 4.639536645954548e-06,
      "loss": 0.0165,
      "step": 1647040
    },
    {
      "epoch": 2.6954498144184127,
      "grad_norm": 0.4356965720653534,
      "learning_rate": 4.639470753741031e-06,
      "loss": 0.0129,
      "step": 1647060
    },
    {
      "epoch": 2.6954825448570663,
      "grad_norm": 0.31376197934150696,
      "learning_rate": 4.639404861527514e-06,
      "loss": 0.02,
      "step": 1647080
    },
    {
      "epoch": 2.6955152752957194,
      "grad_norm": 0.11903880536556244,
      "learning_rate": 4.639338969313997e-06,
      "loss": 0.0095,
      "step": 1647100
    },
    {
      "epoch": 2.6955480057343726,
      "grad_norm": 0.276131808757782,
      "learning_rate": 4.6392730771004796e-06,
      "loss": 0.016,
      "step": 1647120
    },
    {
      "epoch": 2.695580736173026,
      "grad_norm": 0.48041006922721863,
      "learning_rate": 4.639207184886962e-06,
      "loss": 0.0199,
      "step": 1647140
    },
    {
      "epoch": 2.6956134666116798,
      "grad_norm": 0.7564186453819275,
      "learning_rate": 4.639141292673445e-06,
      "loss": 0.0208,
      "step": 1647160
    },
    {
      "epoch": 2.695646197050333,
      "grad_norm": 0.45956066250801086,
      "learning_rate": 4.639075400459928e-06,
      "loss": 0.02,
      "step": 1647180
    },
    {
      "epoch": 2.695678927488986,
      "grad_norm": 0.7284066081047058,
      "learning_rate": 4.639009508246411e-06,
      "loss": 0.0121,
      "step": 1647200
    },
    {
      "epoch": 2.6957116579276397,
      "grad_norm": 0.16751395165920258,
      "learning_rate": 4.638943616032894e-06,
      "loss": 0.0126,
      "step": 1647220
    },
    {
      "epoch": 2.695744388366293,
      "grad_norm": 0.28025591373443604,
      "learning_rate": 4.638877723819377e-06,
      "loss": 0.0094,
      "step": 1647240
    },
    {
      "epoch": 2.695777118804946,
      "grad_norm": 0.7620657682418823,
      "learning_rate": 4.6388118316058596e-06,
      "loss": 0.011,
      "step": 1647260
    },
    {
      "epoch": 2.6958098492435996,
      "grad_norm": 0.33468180894851685,
      "learning_rate": 4.638745939392342e-06,
      "loss": 0.0092,
      "step": 1647280
    },
    {
      "epoch": 2.695842579682253,
      "grad_norm": 0.4928797483444214,
      "learning_rate": 4.638680047178825e-06,
      "loss": 0.0163,
      "step": 1647300
    },
    {
      "epoch": 2.6958753101209063,
      "grad_norm": 0.38611793518066406,
      "learning_rate": 4.638614154965308e-06,
      "loss": 0.0183,
      "step": 1647320
    },
    {
      "epoch": 2.6959080405595595,
      "grad_norm": 0.15744951367378235,
      "learning_rate": 4.6385482627517905e-06,
      "loss": 0.0115,
      "step": 1647340
    },
    {
      "epoch": 2.695940770998213,
      "grad_norm": 0.7837251424789429,
      "learning_rate": 4.638482370538273e-06,
      "loss": 0.0103,
      "step": 1647360
    },
    {
      "epoch": 2.695973501436866,
      "grad_norm": 0.07290345430374146,
      "learning_rate": 4.638416478324757e-06,
      "loss": 0.0122,
      "step": 1647380
    },
    {
      "epoch": 2.6960062318755194,
      "grad_norm": 0.5675575733184814,
      "learning_rate": 4.63835058611124e-06,
      "loss": 0.0087,
      "step": 1647400
    },
    {
      "epoch": 2.696038962314173,
      "grad_norm": 0.4437567889690399,
      "learning_rate": 4.638284693897722e-06,
      "loss": 0.0163,
      "step": 1647420
    },
    {
      "epoch": 2.6960716927528265,
      "grad_norm": 0.12857899069786072,
      "learning_rate": 4.638218801684206e-06,
      "loss": 0.0066,
      "step": 1647440
    },
    {
      "epoch": 2.6961044231914797,
      "grad_norm": 0.15249642729759216,
      "learning_rate": 4.638152909470689e-06,
      "loss": 0.0157,
      "step": 1647460
    },
    {
      "epoch": 2.696137153630133,
      "grad_norm": 0.3607124984264374,
      "learning_rate": 4.638087017257171e-06,
      "loss": 0.0209,
      "step": 1647480
    },
    {
      "epoch": 2.6961698840687864,
      "grad_norm": 0.2269715666770935,
      "learning_rate": 4.638021125043654e-06,
      "loss": 0.0173,
      "step": 1647500
    },
    {
      "epoch": 2.6962026145074396,
      "grad_norm": 1.4713590145111084,
      "learning_rate": 4.637955232830137e-06,
      "loss": 0.0199,
      "step": 1647520
    },
    {
      "epoch": 2.6962353449460927,
      "grad_norm": 0.30420753359794617,
      "learning_rate": 4.63788934061662e-06,
      "loss": 0.0141,
      "step": 1647540
    },
    {
      "epoch": 2.6962680753847463,
      "grad_norm": 0.22735606133937836,
      "learning_rate": 4.637823448403102e-06,
      "loss": 0.0172,
      "step": 1647560
    },
    {
      "epoch": 2.6963008058233995,
      "grad_norm": 0.5748800039291382,
      "learning_rate": 4.637757556189585e-06,
      "loss": 0.01,
      "step": 1647580
    },
    {
      "epoch": 2.696333536262053,
      "grad_norm": 0.31817421317100525,
      "learning_rate": 4.637691663976069e-06,
      "loss": 0.0184,
      "step": 1647600
    },
    {
      "epoch": 2.6963662667007062,
      "grad_norm": 0.9840109944343567,
      "learning_rate": 4.6376257717625514e-06,
      "loss": 0.0131,
      "step": 1647620
    },
    {
      "epoch": 2.69639899713936,
      "grad_norm": 0.3199755549430847,
      "learning_rate": 4.637559879549034e-06,
      "loss": 0.0106,
      "step": 1647640
    },
    {
      "epoch": 2.696431727578013,
      "grad_norm": 0.3475772440433502,
      "learning_rate": 4.637493987335517e-06,
      "loss": 0.0141,
      "step": 1647660
    },
    {
      "epoch": 2.696464458016666,
      "grad_norm": 0.5431126952171326,
      "learning_rate": 4.637428095122e-06,
      "loss": 0.0143,
      "step": 1647680
    },
    {
      "epoch": 2.6964971884553197,
      "grad_norm": 0.2674170434474945,
      "learning_rate": 4.637362202908482e-06,
      "loss": 0.0143,
      "step": 1647700
    },
    {
      "epoch": 2.696529918893973,
      "grad_norm": 0.8131321668624878,
      "learning_rate": 4.637296310694965e-06,
      "loss": 0.0162,
      "step": 1647720
    },
    {
      "epoch": 2.6965626493326265,
      "grad_norm": 0.2164161056280136,
      "learning_rate": 4.637230418481448e-06,
      "loss": 0.0155,
      "step": 1647740
    },
    {
      "epoch": 2.6965953797712796,
      "grad_norm": 0.5873381495475769,
      "learning_rate": 4.6371645262679315e-06,
      "loss": 0.0164,
      "step": 1647760
    },
    {
      "epoch": 2.696628110209933,
      "grad_norm": 0.34283265471458435,
      "learning_rate": 4.637098634054414e-06,
      "loss": 0.0204,
      "step": 1647780
    },
    {
      "epoch": 2.6966608406485864,
      "grad_norm": 0.6190761923789978,
      "learning_rate": 4.637032741840897e-06,
      "loss": 0.0132,
      "step": 1647800
    },
    {
      "epoch": 2.6966935710872395,
      "grad_norm": 0.20292934775352478,
      "learning_rate": 4.63696684962738e-06,
      "loss": 0.0119,
      "step": 1647820
    },
    {
      "epoch": 2.696726301525893,
      "grad_norm": 0.47504082322120667,
      "learning_rate": 4.636900957413863e-06,
      "loss": 0.0201,
      "step": 1647840
    },
    {
      "epoch": 2.6967590319645462,
      "grad_norm": 0.36576199531555176,
      "learning_rate": 4.636835065200346e-06,
      "loss": 0.0088,
      "step": 1647860
    },
    {
      "epoch": 2.6967917624032,
      "grad_norm": 0.5758967995643616,
      "learning_rate": 4.636769172986829e-06,
      "loss": 0.0127,
      "step": 1647880
    },
    {
      "epoch": 2.696824492841853,
      "grad_norm": 0.5624330639839172,
      "learning_rate": 4.6367032807733115e-06,
      "loss": 0.0134,
      "step": 1647900
    },
    {
      "epoch": 2.6968572232805066,
      "grad_norm": 0.7029220461845398,
      "learning_rate": 4.636637388559794e-06,
      "loss": 0.0135,
      "step": 1647920
    },
    {
      "epoch": 2.6968899537191597,
      "grad_norm": 0.14349310100078583,
      "learning_rate": 4.636571496346277e-06,
      "loss": 0.012,
      "step": 1647940
    },
    {
      "epoch": 2.696922684157813,
      "grad_norm": 0.1127946600317955,
      "learning_rate": 4.63650560413276e-06,
      "loss": 0.0091,
      "step": 1647960
    },
    {
      "epoch": 2.6969554145964665,
      "grad_norm": 0.6907278299331665,
      "learning_rate": 4.6364397119192424e-06,
      "loss": 0.0165,
      "step": 1647980
    },
    {
      "epoch": 2.6969881450351196,
      "grad_norm": 0.7266546487808228,
      "learning_rate": 4.636373819705726e-06,
      "loss": 0.021,
      "step": 1648000
    },
    {
      "epoch": 2.6970208754737732,
      "grad_norm": 1.0426788330078125,
      "learning_rate": 4.636307927492209e-06,
      "loss": 0.0166,
      "step": 1648020
    },
    {
      "epoch": 2.6970536059124264,
      "grad_norm": 0.14165142178535461,
      "learning_rate": 4.6362420352786915e-06,
      "loss": 0.0145,
      "step": 1648040
    },
    {
      "epoch": 2.69708633635108,
      "grad_norm": 0.39574506878852844,
      "learning_rate": 4.636176143065174e-06,
      "loss": 0.0144,
      "step": 1648060
    },
    {
      "epoch": 2.697119066789733,
      "grad_norm": 0.21344304084777832,
      "learning_rate": 4.636110250851657e-06,
      "loss": 0.0147,
      "step": 1648080
    },
    {
      "epoch": 2.6971517972283863,
      "grad_norm": 0.9107224941253662,
      "learning_rate": 4.63604435863814e-06,
      "loss": 0.0128,
      "step": 1648100
    },
    {
      "epoch": 2.69718452766704,
      "grad_norm": 0.23125503957271576,
      "learning_rate": 4.635978466424623e-06,
      "loss": 0.0201,
      "step": 1648120
    },
    {
      "epoch": 2.697217258105693,
      "grad_norm": 0.7865080237388611,
      "learning_rate": 4.635912574211106e-06,
      "loss": 0.0136,
      "step": 1648140
    },
    {
      "epoch": 2.6972499885443466,
      "grad_norm": 0.20326976478099823,
      "learning_rate": 4.635846681997589e-06,
      "loss": 0.0117,
      "step": 1648160
    },
    {
      "epoch": 2.6972827189829998,
      "grad_norm": 0.3908240497112274,
      "learning_rate": 4.6357807897840715e-06,
      "loss": 0.0149,
      "step": 1648180
    },
    {
      "epoch": 2.6973154494216534,
      "grad_norm": 0.419035941362381,
      "learning_rate": 4.635714897570554e-06,
      "loss": 0.0142,
      "step": 1648200
    },
    {
      "epoch": 2.6973481798603065,
      "grad_norm": 0.2572317123413086,
      "learning_rate": 4.635649005357037e-06,
      "loss": 0.0158,
      "step": 1648220
    },
    {
      "epoch": 2.6973809102989597,
      "grad_norm": 0.7709857821464539,
      "learning_rate": 4.635583113143521e-06,
      "loss": 0.0119,
      "step": 1648240
    },
    {
      "epoch": 2.6974136407376132,
      "grad_norm": 0.25599929690361023,
      "learning_rate": 4.635517220930003e-06,
      "loss": 0.0142,
      "step": 1648260
    },
    {
      "epoch": 2.6974463711762664,
      "grad_norm": 0.15119460225105286,
      "learning_rate": 4.635451328716486e-06,
      "loss": 0.0129,
      "step": 1648280
    },
    {
      "epoch": 2.69747910161492,
      "grad_norm": 0.1828717142343521,
      "learning_rate": 4.635385436502969e-06,
      "loss": 0.0119,
      "step": 1648300
    },
    {
      "epoch": 2.697511832053573,
      "grad_norm": 0.26454591751098633,
      "learning_rate": 4.6353195442894516e-06,
      "loss": 0.0152,
      "step": 1648320
    },
    {
      "epoch": 2.6975445624922267,
      "grad_norm": 0.25510841608047485,
      "learning_rate": 4.635253652075934e-06,
      "loss": 0.0107,
      "step": 1648340
    },
    {
      "epoch": 2.69757729293088,
      "grad_norm": 0.41015762090682983,
      "learning_rate": 4.635187759862417e-06,
      "loss": 0.0114,
      "step": 1648360
    },
    {
      "epoch": 2.697610023369533,
      "grad_norm": 0.5203912854194641,
      "learning_rate": 4.6351218676489e-06,
      "loss": 0.0188,
      "step": 1648380
    },
    {
      "epoch": 2.6976427538081866,
      "grad_norm": 0.23119203746318817,
      "learning_rate": 4.635055975435383e-06,
      "loss": 0.0083,
      "step": 1648400
    },
    {
      "epoch": 2.69767548424684,
      "grad_norm": 0.18073393404483795,
      "learning_rate": 4.634990083221866e-06,
      "loss": 0.0147,
      "step": 1648420
    },
    {
      "epoch": 2.6977082146854934,
      "grad_norm": 0.33267149329185486,
      "learning_rate": 4.634924191008349e-06,
      "loss": 0.0166,
      "step": 1648440
    },
    {
      "epoch": 2.6977409451241465,
      "grad_norm": 0.18578876554965973,
      "learning_rate": 4.6348582987948324e-06,
      "loss": 0.0155,
      "step": 1648460
    },
    {
      "epoch": 2.6977736755628,
      "grad_norm": 0.33346378803253174,
      "learning_rate": 4.634792406581315e-06,
      "loss": 0.0188,
      "step": 1648480
    },
    {
      "epoch": 2.6978064060014533,
      "grad_norm": 0.3706457316875458,
      "learning_rate": 4.634726514367798e-06,
      "loss": 0.0156,
      "step": 1648500
    },
    {
      "epoch": 2.6978391364401064,
      "grad_norm": 0.22292354702949524,
      "learning_rate": 4.634660622154281e-06,
      "loss": 0.0114,
      "step": 1648520
    },
    {
      "epoch": 2.69787186687876,
      "grad_norm": 1.1340001821517944,
      "learning_rate": 4.634594729940763e-06,
      "loss": 0.012,
      "step": 1648540
    },
    {
      "epoch": 2.697904597317413,
      "grad_norm": 0.3218114972114563,
      "learning_rate": 4.634528837727246e-06,
      "loss": 0.0103,
      "step": 1648560
    },
    {
      "epoch": 2.6979373277560663,
      "grad_norm": 0.35599854588508606,
      "learning_rate": 4.634462945513729e-06,
      "loss": 0.0104,
      "step": 1648580
    },
    {
      "epoch": 2.69797005819472,
      "grad_norm": 0.08062207698822021,
      "learning_rate": 4.634397053300212e-06,
      "loss": 0.0124,
      "step": 1648600
    },
    {
      "epoch": 2.6980027886333735,
      "grad_norm": 0.6714060306549072,
      "learning_rate": 4.634331161086695e-06,
      "loss": 0.0086,
      "step": 1648620
    },
    {
      "epoch": 2.6980355190720267,
      "grad_norm": 0.27575328946113586,
      "learning_rate": 4.634265268873178e-06,
      "loss": 0.0139,
      "step": 1648640
    },
    {
      "epoch": 2.69806824951068,
      "grad_norm": 0.24082982540130615,
      "learning_rate": 4.634199376659661e-06,
      "loss": 0.0163,
      "step": 1648660
    },
    {
      "epoch": 2.6981009799493334,
      "grad_norm": 0.3545756936073303,
      "learning_rate": 4.634133484446143e-06,
      "loss": 0.0165,
      "step": 1648680
    },
    {
      "epoch": 2.6981337103879866,
      "grad_norm": 0.40573850274086,
      "learning_rate": 4.634067592232626e-06,
      "loss": 0.0108,
      "step": 1648700
    },
    {
      "epoch": 2.6981664408266397,
      "grad_norm": 0.23180700838565826,
      "learning_rate": 4.634001700019109e-06,
      "loss": 0.0132,
      "step": 1648720
    },
    {
      "epoch": 2.6981991712652933,
      "grad_norm": 0.11317049711942673,
      "learning_rate": 4.633935807805592e-06,
      "loss": 0.0142,
      "step": 1648740
    },
    {
      "epoch": 2.698231901703947,
      "grad_norm": 0.42275547981262207,
      "learning_rate": 4.633869915592074e-06,
      "loss": 0.0181,
      "step": 1648760
    },
    {
      "epoch": 2.6982646321426,
      "grad_norm": 0.47776085138320923,
      "learning_rate": 4.633804023378557e-06,
      "loss": 0.0102,
      "step": 1648780
    },
    {
      "epoch": 2.698297362581253,
      "grad_norm": 0.16716791689395905,
      "learning_rate": 4.633738131165041e-06,
      "loss": 0.0113,
      "step": 1648800
    },
    {
      "epoch": 2.698330093019907,
      "grad_norm": 0.19155344367027283,
      "learning_rate": 4.6336722389515234e-06,
      "loss": 0.0101,
      "step": 1648820
    },
    {
      "epoch": 2.69836282345856,
      "grad_norm": 0.27280235290527344,
      "learning_rate": 4.633606346738006e-06,
      "loss": 0.0081,
      "step": 1648840
    },
    {
      "epoch": 2.698395553897213,
      "grad_norm": 0.2623293101787567,
      "learning_rate": 4.63354045452449e-06,
      "loss": 0.0151,
      "step": 1648860
    },
    {
      "epoch": 2.6984282843358667,
      "grad_norm": 0.32410547137260437,
      "learning_rate": 4.6334745623109725e-06,
      "loss": 0.0123,
      "step": 1648880
    },
    {
      "epoch": 2.6984610147745203,
      "grad_norm": 0.2589474320411682,
      "learning_rate": 4.633408670097455e-06,
      "loss": 0.0117,
      "step": 1648900
    },
    {
      "epoch": 2.6984937452131734,
      "grad_norm": 0.35898467898368835,
      "learning_rate": 4.633342777883938e-06,
      "loss": 0.0162,
      "step": 1648920
    },
    {
      "epoch": 2.6985264756518266,
      "grad_norm": 0.5836843848228455,
      "learning_rate": 4.633276885670421e-06,
      "loss": 0.0147,
      "step": 1648940
    },
    {
      "epoch": 2.69855920609048,
      "grad_norm": 0.9201198220252991,
      "learning_rate": 4.6332109934569035e-06,
      "loss": 0.0164,
      "step": 1648960
    },
    {
      "epoch": 2.6985919365291333,
      "grad_norm": 0.11795724183320999,
      "learning_rate": 4.633145101243386e-06,
      "loss": 0.0155,
      "step": 1648980
    },
    {
      "epoch": 2.6986246669677865,
      "grad_norm": 0.22950442135334015,
      "learning_rate": 4.633079209029869e-06,
      "loss": 0.0218,
      "step": 1649000
    },
    {
      "epoch": 2.69865739740644,
      "grad_norm": 0.32616162300109863,
      "learning_rate": 4.6330133168163525e-06,
      "loss": 0.0185,
      "step": 1649020
    },
    {
      "epoch": 2.6986901278450937,
      "grad_norm": 0.20982834696769714,
      "learning_rate": 4.632947424602835e-06,
      "loss": 0.0085,
      "step": 1649040
    },
    {
      "epoch": 2.698722858283747,
      "grad_norm": 0.2812015414237976,
      "learning_rate": 4.632881532389318e-06,
      "loss": 0.0132,
      "step": 1649060
    },
    {
      "epoch": 2.6987555887224,
      "grad_norm": 0.41574394702911377,
      "learning_rate": 4.632815640175801e-06,
      "loss": 0.0124,
      "step": 1649080
    },
    {
      "epoch": 2.6987883191610536,
      "grad_norm": 0.28595468401908875,
      "learning_rate": 4.6327497479622835e-06,
      "loss": 0.0118,
      "step": 1649100
    },
    {
      "epoch": 2.6988210495997067,
      "grad_norm": 0.2622388005256653,
      "learning_rate": 4.632683855748766e-06,
      "loss": 0.0106,
      "step": 1649120
    },
    {
      "epoch": 2.69885378003836,
      "grad_norm": 0.13299016654491425,
      "learning_rate": 4.632617963535249e-06,
      "loss": 0.0177,
      "step": 1649140
    },
    {
      "epoch": 2.6988865104770134,
      "grad_norm": 0.15270613133907318,
      "learning_rate": 4.632552071321732e-06,
      "loss": 0.0117,
      "step": 1649160
    },
    {
      "epoch": 2.6989192409156666,
      "grad_norm": 0.19184312224388123,
      "learning_rate": 4.632486179108215e-06,
      "loss": 0.0086,
      "step": 1649180
    },
    {
      "epoch": 2.69895197135432,
      "grad_norm": 0.33119267225265503,
      "learning_rate": 4.632420286894698e-06,
      "loss": 0.018,
      "step": 1649200
    },
    {
      "epoch": 2.6989847017929733,
      "grad_norm": 0.45690861344337463,
      "learning_rate": 4.632354394681181e-06,
      "loss": 0.0109,
      "step": 1649220
    },
    {
      "epoch": 2.699017432231627,
      "grad_norm": 0.6499747633934021,
      "learning_rate": 4.6322885024676635e-06,
      "loss": 0.0116,
      "step": 1649240
    },
    {
      "epoch": 2.69905016267028,
      "grad_norm": 0.30933815240859985,
      "learning_rate": 4.632222610254147e-06,
      "loss": 0.0133,
      "step": 1649260
    },
    {
      "epoch": 2.6990828931089332,
      "grad_norm": 0.30687659978866577,
      "learning_rate": 4.63215671804063e-06,
      "loss": 0.0193,
      "step": 1649280
    },
    {
      "epoch": 2.699115623547587,
      "grad_norm": 0.30043888092041016,
      "learning_rate": 4.632090825827113e-06,
      "loss": 0.0112,
      "step": 1649300
    },
    {
      "epoch": 2.69914835398624,
      "grad_norm": 1.110735297203064,
      "learning_rate": 4.632024933613595e-06,
      "loss": 0.0152,
      "step": 1649320
    },
    {
      "epoch": 2.6991810844248936,
      "grad_norm": 0.27095136046409607,
      "learning_rate": 4.631959041400078e-06,
      "loss": 0.0103,
      "step": 1649340
    },
    {
      "epoch": 2.6992138148635467,
      "grad_norm": 0.36794641613960266,
      "learning_rate": 4.631893149186561e-06,
      "loss": 0.0104,
      "step": 1649360
    },
    {
      "epoch": 2.6992465453022003,
      "grad_norm": 1.0437225103378296,
      "learning_rate": 4.6318272569730435e-06,
      "loss": 0.0134,
      "step": 1649380
    },
    {
      "epoch": 2.6992792757408535,
      "grad_norm": 0.6057441234588623,
      "learning_rate": 4.631761364759526e-06,
      "loss": 0.0202,
      "step": 1649400
    },
    {
      "epoch": 2.6993120061795066,
      "grad_norm": 0.2680942416191101,
      "learning_rate": 4.63169547254601e-06,
      "loss": 0.0215,
      "step": 1649420
    },
    {
      "epoch": 2.69934473661816,
      "grad_norm": 0.1542302817106247,
      "learning_rate": 4.631629580332493e-06,
      "loss": 0.014,
      "step": 1649440
    },
    {
      "epoch": 2.6993774670568134,
      "grad_norm": 0.3911980390548706,
      "learning_rate": 4.631563688118975e-06,
      "loss": 0.011,
      "step": 1649460
    },
    {
      "epoch": 2.699410197495467,
      "grad_norm": 0.5448976755142212,
      "learning_rate": 4.631497795905458e-06,
      "loss": 0.0116,
      "step": 1649480
    },
    {
      "epoch": 2.69944292793412,
      "grad_norm": 0.1382555216550827,
      "learning_rate": 4.631431903691941e-06,
      "loss": 0.0158,
      "step": 1649500
    },
    {
      "epoch": 2.6994756583727737,
      "grad_norm": 0.799169659614563,
      "learning_rate": 4.631366011478424e-06,
      "loss": 0.0134,
      "step": 1649520
    },
    {
      "epoch": 2.699508388811427,
      "grad_norm": 0.30572351813316345,
      "learning_rate": 4.631300119264907e-06,
      "loss": 0.0096,
      "step": 1649540
    },
    {
      "epoch": 2.69954111925008,
      "grad_norm": 0.05696200579404831,
      "learning_rate": 4.63123422705139e-06,
      "loss": 0.0133,
      "step": 1649560
    },
    {
      "epoch": 2.6995738496887336,
      "grad_norm": 0.39961227774620056,
      "learning_rate": 4.631168334837873e-06,
      "loss": 0.0132,
      "step": 1649580
    },
    {
      "epoch": 2.6996065801273867,
      "grad_norm": 0.6891749501228333,
      "learning_rate": 4.631102442624355e-06,
      "loss": 0.0139,
      "step": 1649600
    },
    {
      "epoch": 2.6996393105660403,
      "grad_norm": 0.33205750584602356,
      "learning_rate": 4.631036550410838e-06,
      "loss": 0.0117,
      "step": 1649620
    },
    {
      "epoch": 2.6996720410046935,
      "grad_norm": 0.14973850548267365,
      "learning_rate": 4.630970658197322e-06,
      "loss": 0.0113,
      "step": 1649640
    },
    {
      "epoch": 2.699704771443347,
      "grad_norm": 2.1633925437927246,
      "learning_rate": 4.6309047659838044e-06,
      "loss": 0.0134,
      "step": 1649660
    },
    {
      "epoch": 2.6997375018820002,
      "grad_norm": 0.3707776963710785,
      "learning_rate": 4.630838873770287e-06,
      "loss": 0.0128,
      "step": 1649680
    },
    {
      "epoch": 2.6997702323206534,
      "grad_norm": 0.23958224058151245,
      "learning_rate": 4.63077298155677e-06,
      "loss": 0.0184,
      "step": 1649700
    },
    {
      "epoch": 2.699802962759307,
      "grad_norm": 0.14844916760921478,
      "learning_rate": 4.630707089343253e-06,
      "loss": 0.0245,
      "step": 1649720
    },
    {
      "epoch": 2.69983569319796,
      "grad_norm": 0.429597944021225,
      "learning_rate": 4.630641197129735e-06,
      "loss": 0.0156,
      "step": 1649740
    },
    {
      "epoch": 2.6998684236366137,
      "grad_norm": 0.40587368607521057,
      "learning_rate": 4.630575304916218e-06,
      "loss": 0.0195,
      "step": 1649760
    },
    {
      "epoch": 2.699901154075267,
      "grad_norm": 0.2293868362903595,
      "learning_rate": 4.630509412702701e-06,
      "loss": 0.0149,
      "step": 1649780
    },
    {
      "epoch": 2.6999338845139205,
      "grad_norm": 0.695631742477417,
      "learning_rate": 4.630443520489184e-06,
      "loss": 0.0163,
      "step": 1649800
    },
    {
      "epoch": 2.6999666149525736,
      "grad_norm": 0.7220357060432434,
      "learning_rate": 4.630377628275667e-06,
      "loss": 0.0127,
      "step": 1649820
    },
    {
      "epoch": 2.6999993453912268,
      "grad_norm": 1.5362253189086914,
      "learning_rate": 4.63031173606215e-06,
      "loss": 0.0152,
      "step": 1649840
    },
    {
      "epoch": 2.7000320758298804,
      "grad_norm": 0.2666501998901367,
      "learning_rate": 4.630245843848633e-06,
      "loss": 0.0132,
      "step": 1649860
    },
    {
      "epoch": 2.7000648062685335,
      "grad_norm": 0.4153362214565277,
      "learning_rate": 4.630179951635116e-06,
      "loss": 0.0127,
      "step": 1649880
    },
    {
      "epoch": 2.700097536707187,
      "grad_norm": 0.3939746618270874,
      "learning_rate": 4.630114059421599e-06,
      "loss": 0.0148,
      "step": 1649900
    },
    {
      "epoch": 2.7001302671458403,
      "grad_norm": 0.29324671626091003,
      "learning_rate": 4.630048167208082e-06,
      "loss": 0.0146,
      "step": 1649920
    },
    {
      "epoch": 2.700162997584494,
      "grad_norm": 0.3738076090812683,
      "learning_rate": 4.6299822749945645e-06,
      "loss": 0.0129,
      "step": 1649940
    },
    {
      "epoch": 2.700195728023147,
      "grad_norm": 0.0455123707652092,
      "learning_rate": 4.629916382781047e-06,
      "loss": 0.0126,
      "step": 1649960
    },
    {
      "epoch": 2.7002284584618,
      "grad_norm": 0.42753133177757263,
      "learning_rate": 4.62985049056753e-06,
      "loss": 0.0081,
      "step": 1649980
    },
    {
      "epoch": 2.7002611889004537,
      "grad_norm": 0.13970209658145905,
      "learning_rate": 4.629784598354013e-06,
      "loss": 0.0162,
      "step": 1650000
    },
    {
      "epoch": 2.7002611889004537,
      "eval_loss": 0.007747943978756666,
      "eval_runtime": 6508.7306,
      "eval_samples_per_second": 157.92,
      "eval_steps_per_second": 15.792,
      "eval_sts-dev_pearson_cosine": 0.982182604619947,
      "eval_sts-dev_spearman_cosine": 0.89371883156575,
      "step": 1650000
    },
    {
      "epoch": 2.700293919339107,
      "grad_norm": 0.4448281228542328,
      "learning_rate": 4.6297187061404954e-06,
      "loss": 0.0123,
      "step": 1650020
    },
    {
      "epoch": 2.70032664977776,
      "grad_norm": 0.10876286029815674,
      "learning_rate": 4.629652813926979e-06,
      "loss": 0.0178,
      "step": 1650040
    },
    {
      "epoch": 2.7003593802164136,
      "grad_norm": 0.4292166829109192,
      "learning_rate": 4.629586921713462e-06,
      "loss": 0.0188,
      "step": 1650060
    },
    {
      "epoch": 2.7003921106550672,
      "grad_norm": 0.23924313485622406,
      "learning_rate": 4.6295210294999445e-06,
      "loss": 0.0125,
      "step": 1650080
    },
    {
      "epoch": 2.7004248410937204,
      "grad_norm": 0.36885759234428406,
      "learning_rate": 4.629455137286427e-06,
      "loss": 0.0108,
      "step": 1650100
    },
    {
      "epoch": 2.7004575715323735,
      "grad_norm": 0.5022483468055725,
      "learning_rate": 4.62938924507291e-06,
      "loss": 0.0185,
      "step": 1650120
    },
    {
      "epoch": 2.700490301971027,
      "grad_norm": 0.09828303754329681,
      "learning_rate": 4.629323352859393e-06,
      "loss": 0.0101,
      "step": 1650140
    },
    {
      "epoch": 2.7005230324096803,
      "grad_norm": 0.1441429853439331,
      "learning_rate": 4.6292574606458755e-06,
      "loss": 0.0115,
      "step": 1650160
    },
    {
      "epoch": 2.7005557628483334,
      "grad_norm": 0.6727906465530396,
      "learning_rate": 4.629191568432358e-06,
      "loss": 0.0203,
      "step": 1650180
    },
    {
      "epoch": 2.700588493286987,
      "grad_norm": 0.36514171957969666,
      "learning_rate": 4.629125676218841e-06,
      "loss": 0.0143,
      "step": 1650200
    },
    {
      "epoch": 2.7006212237256406,
      "grad_norm": 0.49674052000045776,
      "learning_rate": 4.6290597840053245e-06,
      "loss": 0.0128,
      "step": 1650220
    },
    {
      "epoch": 2.7006539541642938,
      "grad_norm": 0.49132853746414185,
      "learning_rate": 4.628993891791807e-06,
      "loss": 0.0101,
      "step": 1650240
    },
    {
      "epoch": 2.700686684602947,
      "grad_norm": 0.17637899518013,
      "learning_rate": 4.62892799957829e-06,
      "loss": 0.0112,
      "step": 1650260
    },
    {
      "epoch": 2.7007194150416005,
      "grad_norm": 0.11441665142774582,
      "learning_rate": 4.628862107364774e-06,
      "loss": 0.01,
      "step": 1650280
    },
    {
      "epoch": 2.7007521454802537,
      "grad_norm": 0.37146449089050293,
      "learning_rate": 4.628796215151256e-06,
      "loss": 0.0129,
      "step": 1650300
    },
    {
      "epoch": 2.700784875918907,
      "grad_norm": 1.152419924736023,
      "learning_rate": 4.628730322937739e-06,
      "loss": 0.0124,
      "step": 1650320
    },
    {
      "epoch": 2.7008176063575604,
      "grad_norm": 0.44021347165107727,
      "learning_rate": 4.628664430724222e-06,
      "loss": 0.0202,
      "step": 1650340
    },
    {
      "epoch": 2.700850336796214,
      "grad_norm": 0.4100612998008728,
      "learning_rate": 4.6285985385107046e-06,
      "loss": 0.0168,
      "step": 1650360
    },
    {
      "epoch": 2.700883067234867,
      "grad_norm": 0.22961661219596863,
      "learning_rate": 4.628532646297187e-06,
      "loss": 0.0094,
      "step": 1650380
    },
    {
      "epoch": 2.7009157976735203,
      "grad_norm": 0.16358385980129242,
      "learning_rate": 4.62846675408367e-06,
      "loss": 0.0154,
      "step": 1650400
    },
    {
      "epoch": 2.700948528112174,
      "grad_norm": 0.7276291251182556,
      "learning_rate": 4.628400861870153e-06,
      "loss": 0.0124,
      "step": 1650420
    },
    {
      "epoch": 2.700981258550827,
      "grad_norm": 0.7622537016868591,
      "learning_rate": 4.628334969656636e-06,
      "loss": 0.0082,
      "step": 1650440
    },
    {
      "epoch": 2.70101398898948,
      "grad_norm": 0.1222233846783638,
      "learning_rate": 4.628269077443119e-06,
      "loss": 0.0126,
      "step": 1650460
    },
    {
      "epoch": 2.701046719428134,
      "grad_norm": 0.423585444688797,
      "learning_rate": 4.628203185229602e-06,
      "loss": 0.0179,
      "step": 1650480
    },
    {
      "epoch": 2.7010794498667874,
      "grad_norm": 0.12184913456439972,
      "learning_rate": 4.628137293016085e-06,
      "loss": 0.0206,
      "step": 1650500
    },
    {
      "epoch": 2.7011121803054405,
      "grad_norm": 0.2584494352340698,
      "learning_rate": 4.628071400802567e-06,
      "loss": 0.0087,
      "step": 1650520
    },
    {
      "epoch": 2.7011449107440937,
      "grad_norm": 0.19214220345020294,
      "learning_rate": 4.62800550858905e-06,
      "loss": 0.0172,
      "step": 1650540
    },
    {
      "epoch": 2.7011776411827473,
      "grad_norm": 0.3120903968811035,
      "learning_rate": 4.627939616375533e-06,
      "loss": 0.0114,
      "step": 1650560
    },
    {
      "epoch": 2.7012103716214004,
      "grad_norm": 0.23327478766441345,
      "learning_rate": 4.627873724162016e-06,
      "loss": 0.0206,
      "step": 1650580
    },
    {
      "epoch": 2.7012431020600536,
      "grad_norm": 0.3885892927646637,
      "learning_rate": 4.627807831948499e-06,
      "loss": 0.0203,
      "step": 1650600
    },
    {
      "epoch": 2.701275832498707,
      "grad_norm": 0.3962099552154541,
      "learning_rate": 4.627741939734982e-06,
      "loss": 0.0243,
      "step": 1650620
    },
    {
      "epoch": 2.7013085629373603,
      "grad_norm": 0.27774691581726074,
      "learning_rate": 4.627676047521465e-06,
      "loss": 0.011,
      "step": 1650640
    },
    {
      "epoch": 2.701341293376014,
      "grad_norm": 0.42447081208229065,
      "learning_rate": 4.627610155307947e-06,
      "loss": 0.0154,
      "step": 1650660
    },
    {
      "epoch": 2.701374023814667,
      "grad_norm": 0.6512194275856018,
      "learning_rate": 4.627544263094431e-06,
      "loss": 0.0166,
      "step": 1650680
    },
    {
      "epoch": 2.7014067542533207,
      "grad_norm": 0.17149390280246735,
      "learning_rate": 4.627478370880914e-06,
      "loss": 0.0112,
      "step": 1650700
    },
    {
      "epoch": 2.701439484691974,
      "grad_norm": 0.23686031997203827,
      "learning_rate": 4.627412478667396e-06,
      "loss": 0.0096,
      "step": 1650720
    },
    {
      "epoch": 2.701472215130627,
      "grad_norm": 0.43595096468925476,
      "learning_rate": 4.627346586453879e-06,
      "loss": 0.0188,
      "step": 1650740
    },
    {
      "epoch": 2.7015049455692806,
      "grad_norm": 0.17127957940101624,
      "learning_rate": 4.627280694240362e-06,
      "loss": 0.0154,
      "step": 1650760
    },
    {
      "epoch": 2.7015376760079337,
      "grad_norm": 0.11936238408088684,
      "learning_rate": 4.627214802026845e-06,
      "loss": 0.011,
      "step": 1650780
    },
    {
      "epoch": 2.7015704064465873,
      "grad_norm": 1.097975492477417,
      "learning_rate": 4.627148909813327e-06,
      "loss": 0.0141,
      "step": 1650800
    },
    {
      "epoch": 2.7016031368852405,
      "grad_norm": 1.1785833835601807,
      "learning_rate": 4.62708301759981e-06,
      "loss": 0.0151,
      "step": 1650820
    },
    {
      "epoch": 2.701635867323894,
      "grad_norm": 0.2100152224302292,
      "learning_rate": 4.627017125386294e-06,
      "loss": 0.017,
      "step": 1650840
    },
    {
      "epoch": 2.701668597762547,
      "grad_norm": 0.3678778111934662,
      "learning_rate": 4.6269512331727764e-06,
      "loss": 0.0175,
      "step": 1650860
    },
    {
      "epoch": 2.7017013282012003,
      "grad_norm": 0.20991355180740356,
      "learning_rate": 4.626885340959259e-06,
      "loss": 0.0173,
      "step": 1650880
    },
    {
      "epoch": 2.701734058639854,
      "grad_norm": 0.3979656994342804,
      "learning_rate": 4.626819448745742e-06,
      "loss": 0.0153,
      "step": 1650900
    },
    {
      "epoch": 2.701766789078507,
      "grad_norm": 0.32420283555984497,
      "learning_rate": 4.626753556532225e-06,
      "loss": 0.0171,
      "step": 1650920
    },
    {
      "epoch": 2.7017995195171607,
      "grad_norm": 0.20867621898651123,
      "learning_rate": 4.626687664318708e-06,
      "loss": 0.0153,
      "step": 1650940
    },
    {
      "epoch": 2.701832249955814,
      "grad_norm": 0.3186934292316437,
      "learning_rate": 4.626621772105191e-06,
      "loss": 0.0135,
      "step": 1650960
    },
    {
      "epoch": 2.7018649803944674,
      "grad_norm": 1.0598573684692383,
      "learning_rate": 4.626555879891674e-06,
      "loss": 0.0127,
      "step": 1650980
    },
    {
      "epoch": 2.7018977108331206,
      "grad_norm": 0.38511648774147034,
      "learning_rate": 4.6264899876781565e-06,
      "loss": 0.0141,
      "step": 1651000
    },
    {
      "epoch": 2.7019304412717737,
      "grad_norm": 0.3201836943626404,
      "learning_rate": 4.626424095464639e-06,
      "loss": 0.0132,
      "step": 1651020
    },
    {
      "epoch": 2.7019631717104273,
      "grad_norm": 0.4930262565612793,
      "learning_rate": 4.626358203251122e-06,
      "loss": 0.0122,
      "step": 1651040
    },
    {
      "epoch": 2.7019959021490805,
      "grad_norm": 0.4085467457771301,
      "learning_rate": 4.6262923110376055e-06,
      "loss": 0.01,
      "step": 1651060
    },
    {
      "epoch": 2.702028632587734,
      "grad_norm": 0.2733961045742035,
      "learning_rate": 4.626226418824088e-06,
      "loss": 0.02,
      "step": 1651080
    },
    {
      "epoch": 2.702061363026387,
      "grad_norm": 0.4777149260044098,
      "learning_rate": 4.626160526610571e-06,
      "loss": 0.0194,
      "step": 1651100
    },
    {
      "epoch": 2.702094093465041,
      "grad_norm": 0.2638617753982544,
      "learning_rate": 4.626094634397054e-06,
      "loss": 0.0114,
      "step": 1651120
    },
    {
      "epoch": 2.702126823903694,
      "grad_norm": 0.2107550948858261,
      "learning_rate": 4.6260287421835365e-06,
      "loss": 0.0077,
      "step": 1651140
    },
    {
      "epoch": 2.702159554342347,
      "grad_norm": 0.5143478512763977,
      "learning_rate": 4.625962849970019e-06,
      "loss": 0.0186,
      "step": 1651160
    },
    {
      "epoch": 2.7021922847810007,
      "grad_norm": 0.09160149842500687,
      "learning_rate": 4.625896957756502e-06,
      "loss": 0.0107,
      "step": 1651180
    },
    {
      "epoch": 2.702225015219654,
      "grad_norm": 0.1355523020029068,
      "learning_rate": 4.625831065542985e-06,
      "loss": 0.015,
      "step": 1651200
    },
    {
      "epoch": 2.7022577456583075,
      "grad_norm": 0.18919643759727478,
      "learning_rate": 4.6257651733294675e-06,
      "loss": 0.011,
      "step": 1651220
    },
    {
      "epoch": 2.7022904760969606,
      "grad_norm": 0.21520628035068512,
      "learning_rate": 4.625699281115951e-06,
      "loss": 0.0087,
      "step": 1651240
    },
    {
      "epoch": 2.702323206535614,
      "grad_norm": 0.20487527549266815,
      "learning_rate": 4.625633388902434e-06,
      "loss": 0.0115,
      "step": 1651260
    },
    {
      "epoch": 2.7023559369742673,
      "grad_norm": 0.2925484776496887,
      "learning_rate": 4.6255674966889165e-06,
      "loss": 0.013,
      "step": 1651280
    },
    {
      "epoch": 2.7023886674129205,
      "grad_norm": 0.3710920810699463,
      "learning_rate": 4.6255016044754e-06,
      "loss": 0.0133,
      "step": 1651300
    },
    {
      "epoch": 2.702421397851574,
      "grad_norm": 0.11810307204723358,
      "learning_rate": 4.625435712261883e-06,
      "loss": 0.0129,
      "step": 1651320
    },
    {
      "epoch": 2.7024541282902272,
      "grad_norm": 0.31318244338035583,
      "learning_rate": 4.625369820048366e-06,
      "loss": 0.0111,
      "step": 1651340
    },
    {
      "epoch": 2.702486858728881,
      "grad_norm": 0.16896946728229523,
      "learning_rate": 4.625303927834848e-06,
      "loss": 0.0148,
      "step": 1651360
    },
    {
      "epoch": 2.702519589167534,
      "grad_norm": 0.37351056933403015,
      "learning_rate": 4.625238035621331e-06,
      "loss": 0.0133,
      "step": 1651380
    },
    {
      "epoch": 2.7025523196061876,
      "grad_norm": 0.4158852994441986,
      "learning_rate": 4.625172143407814e-06,
      "loss": 0.0113,
      "step": 1651400
    },
    {
      "epoch": 2.7025850500448407,
      "grad_norm": 0.15097665786743164,
      "learning_rate": 4.6251062511942965e-06,
      "loss": 0.0136,
      "step": 1651420
    },
    {
      "epoch": 2.702617780483494,
      "grad_norm": 0.688759446144104,
      "learning_rate": 4.625040358980779e-06,
      "loss": 0.0231,
      "step": 1651440
    },
    {
      "epoch": 2.7026505109221475,
      "grad_norm": 0.08497129380702972,
      "learning_rate": 4.624974466767263e-06,
      "loss": 0.0147,
      "step": 1651460
    },
    {
      "epoch": 2.7026832413608006,
      "grad_norm": 0.4653940200805664,
      "learning_rate": 4.624908574553746e-06,
      "loss": 0.011,
      "step": 1651480
    },
    {
      "epoch": 2.702715971799454,
      "grad_norm": 0.17296850681304932,
      "learning_rate": 4.624842682340228e-06,
      "loss": 0.0138,
      "step": 1651500
    },
    {
      "epoch": 2.7027487022381074,
      "grad_norm": 0.5003204941749573,
      "learning_rate": 4.624776790126711e-06,
      "loss": 0.0083,
      "step": 1651520
    },
    {
      "epoch": 2.702781432676761,
      "grad_norm": 0.525523841381073,
      "learning_rate": 4.624710897913194e-06,
      "loss": 0.0116,
      "step": 1651540
    },
    {
      "epoch": 2.702814163115414,
      "grad_norm": 0.038540616631507874,
      "learning_rate": 4.6246450056996766e-06,
      "loss": 0.0111,
      "step": 1651560
    },
    {
      "epoch": 2.7028468935540673,
      "grad_norm": 0.40707507729530334,
      "learning_rate": 4.624579113486159e-06,
      "loss": 0.011,
      "step": 1651580
    },
    {
      "epoch": 2.702879623992721,
      "grad_norm": 0.14789198338985443,
      "learning_rate": 4.624513221272642e-06,
      "loss": 0.0119,
      "step": 1651600
    },
    {
      "epoch": 2.702912354431374,
      "grad_norm": 0.1687098741531372,
      "learning_rate": 4.624447329059125e-06,
      "loss": 0.0109,
      "step": 1651620
    },
    {
      "epoch": 2.702945084870027,
      "grad_norm": 0.5089512467384338,
      "learning_rate": 4.624381436845608e-06,
      "loss": 0.0135,
      "step": 1651640
    },
    {
      "epoch": 2.7029778153086808,
      "grad_norm": 0.41684114933013916,
      "learning_rate": 4.624315544632091e-06,
      "loss": 0.0168,
      "step": 1651660
    },
    {
      "epoch": 2.7030105457473343,
      "grad_norm": 0.4720086455345154,
      "learning_rate": 4.624249652418574e-06,
      "loss": 0.0098,
      "step": 1651680
    },
    {
      "epoch": 2.7030432761859875,
      "grad_norm": 0.42813795804977417,
      "learning_rate": 4.6241837602050574e-06,
      "loss": 0.0167,
      "step": 1651700
    },
    {
      "epoch": 2.7030760066246406,
      "grad_norm": 0.12061312049627304,
      "learning_rate": 4.62411786799154e-06,
      "loss": 0.0114,
      "step": 1651720
    },
    {
      "epoch": 2.7031087370632942,
      "grad_norm": 0.15650010108947754,
      "learning_rate": 4.624051975778023e-06,
      "loss": 0.0177,
      "step": 1651740
    },
    {
      "epoch": 2.7031414675019474,
      "grad_norm": 0.18813419342041016,
      "learning_rate": 4.623986083564506e-06,
      "loss": 0.0117,
      "step": 1651760
    },
    {
      "epoch": 2.7031741979406005,
      "grad_norm": 0.7414719462394714,
      "learning_rate": 4.623920191350988e-06,
      "loss": 0.0195,
      "step": 1651780
    },
    {
      "epoch": 2.703206928379254,
      "grad_norm": 0.11068318039178848,
      "learning_rate": 4.623854299137471e-06,
      "loss": 0.0129,
      "step": 1651800
    },
    {
      "epoch": 2.7032396588179077,
      "grad_norm": 0.30752620100975037,
      "learning_rate": 4.623788406923954e-06,
      "loss": 0.0171,
      "step": 1651820
    },
    {
      "epoch": 2.703272389256561,
      "grad_norm": 0.19089344143867493,
      "learning_rate": 4.623722514710437e-06,
      "loss": 0.0176,
      "step": 1651840
    },
    {
      "epoch": 2.703305119695214,
      "grad_norm": 0.4752899408340454,
      "learning_rate": 4.62365662249692e-06,
      "loss": 0.01,
      "step": 1651860
    },
    {
      "epoch": 2.7033378501338676,
      "grad_norm": 0.395883709192276,
      "learning_rate": 4.623590730283403e-06,
      "loss": 0.0188,
      "step": 1651880
    },
    {
      "epoch": 2.7033705805725208,
      "grad_norm": 0.5486852526664734,
      "learning_rate": 4.623524838069886e-06,
      "loss": 0.0147,
      "step": 1651900
    },
    {
      "epoch": 2.703403311011174,
      "grad_norm": 0.34553858637809753,
      "learning_rate": 4.6234589458563684e-06,
      "loss": 0.0123,
      "step": 1651920
    },
    {
      "epoch": 2.7034360414498275,
      "grad_norm": 0.09523922204971313,
      "learning_rate": 4.623393053642851e-06,
      "loss": 0.0108,
      "step": 1651940
    },
    {
      "epoch": 2.703468771888481,
      "grad_norm": 0.26459380984306335,
      "learning_rate": 4.623327161429334e-06,
      "loss": 0.0169,
      "step": 1651960
    },
    {
      "epoch": 2.7035015023271343,
      "grad_norm": 0.256215363740921,
      "learning_rate": 4.6232612692158175e-06,
      "loss": 0.0141,
      "step": 1651980
    },
    {
      "epoch": 2.7035342327657874,
      "grad_norm": 0.10290133953094482,
      "learning_rate": 4.6231953770023e-06,
      "loss": 0.0101,
      "step": 1652000
    },
    {
      "epoch": 2.703566963204441,
      "grad_norm": 0.191567525267601,
      "learning_rate": 4.623129484788783e-06,
      "loss": 0.0119,
      "step": 1652020
    },
    {
      "epoch": 2.703599693643094,
      "grad_norm": 0.13554935157299042,
      "learning_rate": 4.623063592575266e-06,
      "loss": 0.0142,
      "step": 1652040
    },
    {
      "epoch": 2.7036324240817473,
      "grad_norm": 0.33842483162879944,
      "learning_rate": 4.6229977003617485e-06,
      "loss": 0.0128,
      "step": 1652060
    },
    {
      "epoch": 2.703665154520401,
      "grad_norm": 0.26031237840652466,
      "learning_rate": 4.622931808148231e-06,
      "loss": 0.01,
      "step": 1652080
    },
    {
      "epoch": 2.7036978849590545,
      "grad_norm": 0.2835429310798645,
      "learning_rate": 4.622865915934715e-06,
      "loss": 0.0126,
      "step": 1652100
    },
    {
      "epoch": 2.7037306153977076,
      "grad_norm": 0.19531698524951935,
      "learning_rate": 4.6228000237211975e-06,
      "loss": 0.011,
      "step": 1652120
    },
    {
      "epoch": 2.703763345836361,
      "grad_norm": 0.5960538983345032,
      "learning_rate": 4.62273413150768e-06,
      "loss": 0.0078,
      "step": 1652140
    },
    {
      "epoch": 2.7037960762750144,
      "grad_norm": 0.18355663120746613,
      "learning_rate": 4.622668239294163e-06,
      "loss": 0.0084,
      "step": 1652160
    },
    {
      "epoch": 2.7038288067136675,
      "grad_norm": 0.11405960470438004,
      "learning_rate": 4.622602347080646e-06,
      "loss": 0.0186,
      "step": 1652180
    },
    {
      "epoch": 2.7038615371523207,
      "grad_norm": 0.5086545348167419,
      "learning_rate": 4.6225364548671285e-06,
      "loss": 0.013,
      "step": 1652200
    },
    {
      "epoch": 2.7038942675909743,
      "grad_norm": 0.4736592471599579,
      "learning_rate": 4.622470562653611e-06,
      "loss": 0.0168,
      "step": 1652220
    },
    {
      "epoch": 2.7039269980296274,
      "grad_norm": 0.15060468018054962,
      "learning_rate": 4.622404670440094e-06,
      "loss": 0.0216,
      "step": 1652240
    },
    {
      "epoch": 2.703959728468281,
      "grad_norm": 0.2392456829547882,
      "learning_rate": 4.6223387782265775e-06,
      "loss": 0.0187,
      "step": 1652260
    },
    {
      "epoch": 2.703992458906934,
      "grad_norm": 1.393876314163208,
      "learning_rate": 4.62227288601306e-06,
      "loss": 0.0182,
      "step": 1652280
    },
    {
      "epoch": 2.7040251893455878,
      "grad_norm": 0.42905640602111816,
      "learning_rate": 4.622206993799543e-06,
      "loss": 0.0123,
      "step": 1652300
    },
    {
      "epoch": 2.704057919784241,
      "grad_norm": 0.12602470815181732,
      "learning_rate": 4.622141101586026e-06,
      "loss": 0.0192,
      "step": 1652320
    },
    {
      "epoch": 2.704090650222894,
      "grad_norm": 0.36780011653900146,
      "learning_rate": 4.622075209372509e-06,
      "loss": 0.0119,
      "step": 1652340
    },
    {
      "epoch": 2.7041233806615477,
      "grad_norm": 0.14756441116333008,
      "learning_rate": 4.622009317158992e-06,
      "loss": 0.0079,
      "step": 1652360
    },
    {
      "epoch": 2.704156111100201,
      "grad_norm": 0.4095897674560547,
      "learning_rate": 4.621943424945475e-06,
      "loss": 0.0166,
      "step": 1652380
    },
    {
      "epoch": 2.7041888415388544,
      "grad_norm": 0.8271951675415039,
      "learning_rate": 4.6218775327319576e-06,
      "loss": 0.0153,
      "step": 1652400
    },
    {
      "epoch": 2.7042215719775076,
      "grad_norm": 0.8934326171875,
      "learning_rate": 4.62181164051844e-06,
      "loss": 0.0131,
      "step": 1652420
    },
    {
      "epoch": 2.704254302416161,
      "grad_norm": 0.6364243626594543,
      "learning_rate": 4.621745748304923e-06,
      "loss": 0.0154,
      "step": 1652440
    },
    {
      "epoch": 2.7042870328548143,
      "grad_norm": 0.5093064904212952,
      "learning_rate": 4.621679856091406e-06,
      "loss": 0.0212,
      "step": 1652460
    },
    {
      "epoch": 2.7043197632934675,
      "grad_norm": 0.21725909411907196,
      "learning_rate": 4.621613963877889e-06,
      "loss": 0.0157,
      "step": 1652480
    },
    {
      "epoch": 2.704352493732121,
      "grad_norm": 0.8980877995491028,
      "learning_rate": 4.621548071664372e-06,
      "loss": 0.0143,
      "step": 1652500
    },
    {
      "epoch": 2.704385224170774,
      "grad_norm": 0.14145517349243164,
      "learning_rate": 4.621482179450855e-06,
      "loss": 0.0104,
      "step": 1652520
    },
    {
      "epoch": 2.704417954609428,
      "grad_norm": 0.6408877968788147,
      "learning_rate": 4.621416287237338e-06,
      "loss": 0.0163,
      "step": 1652540
    },
    {
      "epoch": 2.704450685048081,
      "grad_norm": 0.43255460262298584,
      "learning_rate": 4.62135039502382e-06,
      "loss": 0.0105,
      "step": 1652560
    },
    {
      "epoch": 2.7044834154867345,
      "grad_norm": 0.31020721793174744,
      "learning_rate": 4.621284502810303e-06,
      "loss": 0.0193,
      "step": 1652580
    },
    {
      "epoch": 2.7045161459253877,
      "grad_norm": 0.33967411518096924,
      "learning_rate": 4.621218610596786e-06,
      "loss": 0.0086,
      "step": 1652600
    },
    {
      "epoch": 2.704548876364041,
      "grad_norm": 0.2156073898077011,
      "learning_rate": 4.6211527183832686e-06,
      "loss": 0.0127,
      "step": 1652620
    },
    {
      "epoch": 2.7045816068026944,
      "grad_norm": 0.3976384103298187,
      "learning_rate": 4.621086826169751e-06,
      "loss": 0.0149,
      "step": 1652640
    },
    {
      "epoch": 2.7046143372413476,
      "grad_norm": 0.6062741875648499,
      "learning_rate": 4.621020933956235e-06,
      "loss": 0.0117,
      "step": 1652660
    },
    {
      "epoch": 2.704647067680001,
      "grad_norm": 0.03306584805250168,
      "learning_rate": 4.620955041742718e-06,
      "loss": 0.0095,
      "step": 1652680
    },
    {
      "epoch": 2.7046797981186543,
      "grad_norm": 0.8800179362297058,
      "learning_rate": 4.6208891495292e-06,
      "loss": 0.0191,
      "step": 1652700
    },
    {
      "epoch": 2.704712528557308,
      "grad_norm": 0.5110061764717102,
      "learning_rate": 4.620823257315684e-06,
      "loss": 0.0143,
      "step": 1652720
    },
    {
      "epoch": 2.704745258995961,
      "grad_norm": 0.574078381061554,
      "learning_rate": 4.620757365102167e-06,
      "loss": 0.0135,
      "step": 1652740
    },
    {
      "epoch": 2.7047779894346142,
      "grad_norm": 0.2770287096500397,
      "learning_rate": 4.6206914728886494e-06,
      "loss": 0.0097,
      "step": 1652760
    },
    {
      "epoch": 2.704810719873268,
      "grad_norm": 0.16277159750461578,
      "learning_rate": 4.620625580675132e-06,
      "loss": 0.0135,
      "step": 1652780
    },
    {
      "epoch": 2.704843450311921,
      "grad_norm": 0.2749764323234558,
      "learning_rate": 4.620559688461615e-06,
      "loss": 0.0159,
      "step": 1652800
    },
    {
      "epoch": 2.7048761807505746,
      "grad_norm": 0.18206831812858582,
      "learning_rate": 4.620493796248098e-06,
      "loss": 0.0142,
      "step": 1652820
    },
    {
      "epoch": 2.7049089111892277,
      "grad_norm": 0.4600571095943451,
      "learning_rate": 4.62042790403458e-06,
      "loss": 0.0145,
      "step": 1652840
    },
    {
      "epoch": 2.7049416416278813,
      "grad_norm": 0.48354029655456543,
      "learning_rate": 4.620362011821063e-06,
      "loss": 0.0119,
      "step": 1652860
    },
    {
      "epoch": 2.7049743720665345,
      "grad_norm": 0.0833340510725975,
      "learning_rate": 4.620296119607547e-06,
      "loss": 0.0168,
      "step": 1652880
    },
    {
      "epoch": 2.7050071025051876,
      "grad_norm": 0.36284932494163513,
      "learning_rate": 4.6202302273940295e-06,
      "loss": 0.0131,
      "step": 1652900
    },
    {
      "epoch": 2.705039832943841,
      "grad_norm": 0.5023349523544312,
      "learning_rate": 4.620164335180512e-06,
      "loss": 0.0176,
      "step": 1652920
    },
    {
      "epoch": 2.7050725633824944,
      "grad_norm": 1.2164278030395508,
      "learning_rate": 4.620098442966995e-06,
      "loss": 0.0096,
      "step": 1652940
    },
    {
      "epoch": 2.705105293821148,
      "grad_norm": 0.1970810741186142,
      "learning_rate": 4.620032550753478e-06,
      "loss": 0.0126,
      "step": 1652960
    },
    {
      "epoch": 2.705138024259801,
      "grad_norm": 0.47951585054397583,
      "learning_rate": 4.61996665853996e-06,
      "loss": 0.0147,
      "step": 1652980
    },
    {
      "epoch": 2.7051707546984547,
      "grad_norm": 0.2795976400375366,
      "learning_rate": 4.619900766326443e-06,
      "loss": 0.015,
      "step": 1653000
    },
    {
      "epoch": 2.705203485137108,
      "grad_norm": 0.151357039809227,
      "learning_rate": 4.619834874112926e-06,
      "loss": 0.012,
      "step": 1653020
    },
    {
      "epoch": 2.705236215575761,
      "grad_norm": 0.5522950291633606,
      "learning_rate": 4.6197689818994095e-06,
      "loss": 0.013,
      "step": 1653040
    },
    {
      "epoch": 2.7052689460144146,
      "grad_norm": 0.1965082436800003,
      "learning_rate": 4.619703089685892e-06,
      "loss": 0.0097,
      "step": 1653060
    },
    {
      "epoch": 2.7053016764530677,
      "grad_norm": 0.2785455882549286,
      "learning_rate": 4.619637197472375e-06,
      "loss": 0.0154,
      "step": 1653080
    },
    {
      "epoch": 2.705334406891721,
      "grad_norm": 0.16159336268901825,
      "learning_rate": 4.619571305258858e-06,
      "loss": 0.0104,
      "step": 1653100
    },
    {
      "epoch": 2.7053671373303745,
      "grad_norm": 0.12125638127326965,
      "learning_rate": 4.619505413045341e-06,
      "loss": 0.01,
      "step": 1653120
    },
    {
      "epoch": 2.705399867769028,
      "grad_norm": 0.3980642557144165,
      "learning_rate": 4.619439520831824e-06,
      "loss": 0.0136,
      "step": 1653140
    },
    {
      "epoch": 2.7054325982076812,
      "grad_norm": 0.8522933721542358,
      "learning_rate": 4.619373628618307e-06,
      "loss": 0.0133,
      "step": 1653160
    },
    {
      "epoch": 2.7054653286463344,
      "grad_norm": 0.27804186940193176,
      "learning_rate": 4.6193077364047895e-06,
      "loss": 0.0127,
      "step": 1653180
    },
    {
      "epoch": 2.705498059084988,
      "grad_norm": 0.42742711305618286,
      "learning_rate": 4.619241844191272e-06,
      "loss": 0.0115,
      "step": 1653200
    },
    {
      "epoch": 2.705530789523641,
      "grad_norm": 0.42903950810432434,
      "learning_rate": 4.619175951977755e-06,
      "loss": 0.0099,
      "step": 1653220
    },
    {
      "epoch": 2.7055635199622943,
      "grad_norm": 0.35149621963500977,
      "learning_rate": 4.619110059764238e-06,
      "loss": 0.0091,
      "step": 1653240
    },
    {
      "epoch": 2.705596250400948,
      "grad_norm": 0.2404104620218277,
      "learning_rate": 4.6190441675507205e-06,
      "loss": 0.0139,
      "step": 1653260
    },
    {
      "epoch": 2.7056289808396015,
      "grad_norm": 0.38658687472343445,
      "learning_rate": 4.618978275337204e-06,
      "loss": 0.0141,
      "step": 1653280
    },
    {
      "epoch": 2.7056617112782546,
      "grad_norm": 0.0696718618273735,
      "learning_rate": 4.618912383123687e-06,
      "loss": 0.0155,
      "step": 1653300
    },
    {
      "epoch": 2.7056944417169078,
      "grad_norm": 0.7383882403373718,
      "learning_rate": 4.6188464909101695e-06,
      "loss": 0.0216,
      "step": 1653320
    },
    {
      "epoch": 2.7057271721555614,
      "grad_norm": 0.2857985198497772,
      "learning_rate": 4.618780598696652e-06,
      "loss": 0.0135,
      "step": 1653340
    },
    {
      "epoch": 2.7057599025942145,
      "grad_norm": 0.39247575402259827,
      "learning_rate": 4.618714706483135e-06,
      "loss": 0.0114,
      "step": 1653360
    },
    {
      "epoch": 2.7057926330328677,
      "grad_norm": 0.33223074674606323,
      "learning_rate": 4.618648814269618e-06,
      "loss": 0.0199,
      "step": 1653380
    },
    {
      "epoch": 2.7058253634715212,
      "grad_norm": 1.1461213827133179,
      "learning_rate": 4.618582922056101e-06,
      "loss": 0.0167,
      "step": 1653400
    },
    {
      "epoch": 2.705858093910175,
      "grad_norm": 0.36039480566978455,
      "learning_rate": 4.618517029842584e-06,
      "loss": 0.0154,
      "step": 1653420
    },
    {
      "epoch": 2.705890824348828,
      "grad_norm": 0.31520286202430725,
      "learning_rate": 4.618451137629067e-06,
      "loss": 0.0184,
      "step": 1653440
    },
    {
      "epoch": 2.705923554787481,
      "grad_norm": 0.616121768951416,
      "learning_rate": 4.6183852454155496e-06,
      "loss": 0.0133,
      "step": 1653460
    },
    {
      "epoch": 2.7059562852261347,
      "grad_norm": 0.9117964506149292,
      "learning_rate": 4.618319353202032e-06,
      "loss": 0.0116,
      "step": 1653480
    },
    {
      "epoch": 2.705989015664788,
      "grad_norm": 0.36400148272514343,
      "learning_rate": 4.618253460988516e-06,
      "loss": 0.0174,
      "step": 1653500
    },
    {
      "epoch": 2.706021746103441,
      "grad_norm": 0.11457432806491852,
      "learning_rate": 4.618187568774999e-06,
      "loss": 0.0113,
      "step": 1653520
    },
    {
      "epoch": 2.7060544765420946,
      "grad_norm": 0.268437922000885,
      "learning_rate": 4.618121676561481e-06,
      "loss": 0.0127,
      "step": 1653540
    },
    {
      "epoch": 2.7060872069807482,
      "grad_norm": 0.2965445816516876,
      "learning_rate": 4.618055784347964e-06,
      "loss": 0.016,
      "step": 1653560
    },
    {
      "epoch": 2.7061199374194014,
      "grad_norm": 0.6863998770713806,
      "learning_rate": 4.617989892134447e-06,
      "loss": 0.0142,
      "step": 1653580
    },
    {
      "epoch": 2.7061526678580545,
      "grad_norm": 1.1175537109375,
      "learning_rate": 4.61792399992093e-06,
      "loss": 0.0172,
      "step": 1653600
    },
    {
      "epoch": 2.706185398296708,
      "grad_norm": 0.2977917492389679,
      "learning_rate": 4.617858107707412e-06,
      "loss": 0.0147,
      "step": 1653620
    },
    {
      "epoch": 2.7062181287353613,
      "grad_norm": 0.5963128805160522,
      "learning_rate": 4.617792215493895e-06,
      "loss": 0.0093,
      "step": 1653640
    },
    {
      "epoch": 2.7062508591740144,
      "grad_norm": 0.26959413290023804,
      "learning_rate": 4.617726323280378e-06,
      "loss": 0.0102,
      "step": 1653660
    },
    {
      "epoch": 2.706283589612668,
      "grad_norm": 0.24798092246055603,
      "learning_rate": 4.617660431066861e-06,
      "loss": 0.0112,
      "step": 1653680
    },
    {
      "epoch": 2.706316320051321,
      "grad_norm": 0.41965875029563904,
      "learning_rate": 4.617594538853344e-06,
      "loss": 0.0135,
      "step": 1653700
    },
    {
      "epoch": 2.7063490504899748,
      "grad_norm": 0.6426476240158081,
      "learning_rate": 4.617528646639827e-06,
      "loss": 0.0174,
      "step": 1653720
    },
    {
      "epoch": 2.706381780928628,
      "grad_norm": 0.3466891944408417,
      "learning_rate": 4.6174627544263105e-06,
      "loss": 0.0117,
      "step": 1653740
    },
    {
      "epoch": 2.7064145113672815,
      "grad_norm": 0.04424377530813217,
      "learning_rate": 4.617396862212793e-06,
      "loss": 0.0173,
      "step": 1653760
    },
    {
      "epoch": 2.7064472418059347,
      "grad_norm": 1.1508663892745972,
      "learning_rate": 4.617330969999276e-06,
      "loss": 0.0182,
      "step": 1653780
    },
    {
      "epoch": 2.706479972244588,
      "grad_norm": 0.4424949288368225,
      "learning_rate": 4.617265077785759e-06,
      "loss": 0.0139,
      "step": 1653800
    },
    {
      "epoch": 2.7065127026832414,
      "grad_norm": 0.6707673668861389,
      "learning_rate": 4.617199185572241e-06,
      "loss": 0.0118,
      "step": 1653820
    },
    {
      "epoch": 2.7065454331218946,
      "grad_norm": 0.12415565550327301,
      "learning_rate": 4.617133293358724e-06,
      "loss": 0.0089,
      "step": 1653840
    },
    {
      "epoch": 2.706578163560548,
      "grad_norm": 0.17973461747169495,
      "learning_rate": 4.617067401145207e-06,
      "loss": 0.0117,
      "step": 1653860
    },
    {
      "epoch": 2.7066108939992013,
      "grad_norm": 0.3330416679382324,
      "learning_rate": 4.61700150893169e-06,
      "loss": 0.0112,
      "step": 1653880
    },
    {
      "epoch": 2.706643624437855,
      "grad_norm": 0.25023338198661804,
      "learning_rate": 4.616935616718173e-06,
      "loss": 0.0198,
      "step": 1653900
    },
    {
      "epoch": 2.706676354876508,
      "grad_norm": 1.2558104991912842,
      "learning_rate": 4.616869724504656e-06,
      "loss": 0.0128,
      "step": 1653920
    },
    {
      "epoch": 2.706709085315161,
      "grad_norm": 2.0189883708953857,
      "learning_rate": 4.616803832291139e-06,
      "loss": 0.0163,
      "step": 1653940
    },
    {
      "epoch": 2.706741815753815,
      "grad_norm": 0.0680643767118454,
      "learning_rate": 4.6167379400776214e-06,
      "loss": 0.0107,
      "step": 1653960
    },
    {
      "epoch": 2.706774546192468,
      "grad_norm": 0.6085197925567627,
      "learning_rate": 4.616672047864104e-06,
      "loss": 0.0162,
      "step": 1653980
    },
    {
      "epoch": 2.7068072766311215,
      "grad_norm": 0.3831504285335541,
      "learning_rate": 4.616606155650587e-06,
      "loss": 0.0129,
      "step": 1654000
    },
    {
      "epoch": 2.7068400070697747,
      "grad_norm": 0.7286141514778137,
      "learning_rate": 4.61654026343707e-06,
      "loss": 0.0194,
      "step": 1654020
    },
    {
      "epoch": 2.7068727375084283,
      "grad_norm": 0.07877659797668457,
      "learning_rate": 4.616474371223552e-06,
      "loss": 0.0118,
      "step": 1654040
    },
    {
      "epoch": 2.7069054679470814,
      "grad_norm": 0.045833952724933624,
      "learning_rate": 4.616408479010035e-06,
      "loss": 0.0142,
      "step": 1654060
    },
    {
      "epoch": 2.7069381983857346,
      "grad_norm": 0.5683315396308899,
      "learning_rate": 4.616342586796519e-06,
      "loss": 0.0117,
      "step": 1654080
    },
    {
      "epoch": 2.706970928824388,
      "grad_norm": 0.06458543986082077,
      "learning_rate": 4.6162766945830015e-06,
      "loss": 0.0162,
      "step": 1654100
    },
    {
      "epoch": 2.7070036592630413,
      "grad_norm": 0.2734083831310272,
      "learning_rate": 4.616210802369484e-06,
      "loss": 0.0167,
      "step": 1654120
    },
    {
      "epoch": 2.707036389701695,
      "grad_norm": 0.18030792474746704,
      "learning_rate": 4.616144910155968e-06,
      "loss": 0.0155,
      "step": 1654140
    },
    {
      "epoch": 2.707069120140348,
      "grad_norm": 0.331293523311615,
      "learning_rate": 4.6160790179424505e-06,
      "loss": 0.0172,
      "step": 1654160
    },
    {
      "epoch": 2.7071018505790017,
      "grad_norm": 0.7331915497779846,
      "learning_rate": 4.616013125728933e-06,
      "loss": 0.0191,
      "step": 1654180
    },
    {
      "epoch": 2.707134581017655,
      "grad_norm": 0.36950087547302246,
      "learning_rate": 4.615947233515416e-06,
      "loss": 0.0084,
      "step": 1654200
    },
    {
      "epoch": 2.707167311456308,
      "grad_norm": 0.5221837162971497,
      "learning_rate": 4.615881341301899e-06,
      "loss": 0.0158,
      "step": 1654220
    },
    {
      "epoch": 2.7072000418949616,
      "grad_norm": 0.12910304963588715,
      "learning_rate": 4.6158154490883815e-06,
      "loss": 0.01,
      "step": 1654240
    },
    {
      "epoch": 2.7072327723336147,
      "grad_norm": 0.12821049988269806,
      "learning_rate": 4.615749556874864e-06,
      "loss": 0.0257,
      "step": 1654260
    },
    {
      "epoch": 2.7072655027722683,
      "grad_norm": 0.34398210048675537,
      "learning_rate": 4.615683664661347e-06,
      "loss": 0.0208,
      "step": 1654280
    },
    {
      "epoch": 2.7072982332109214,
      "grad_norm": 0.1745155304670334,
      "learning_rate": 4.6156177724478306e-06,
      "loss": 0.0101,
      "step": 1654300
    },
    {
      "epoch": 2.707330963649575,
      "grad_norm": 0.3422726094722748,
      "learning_rate": 4.615551880234313e-06,
      "loss": 0.0153,
      "step": 1654320
    },
    {
      "epoch": 2.707363694088228,
      "grad_norm": 0.3743886649608612,
      "learning_rate": 4.615485988020796e-06,
      "loss": 0.0083,
      "step": 1654340
    },
    {
      "epoch": 2.7073964245268813,
      "grad_norm": 0.1669999212026596,
      "learning_rate": 4.615420095807279e-06,
      "loss": 0.0159,
      "step": 1654360
    },
    {
      "epoch": 2.707429154965535,
      "grad_norm": 0.32390180230140686,
      "learning_rate": 4.6153542035937615e-06,
      "loss": 0.0181,
      "step": 1654380
    },
    {
      "epoch": 2.707461885404188,
      "grad_norm": 0.5046222805976868,
      "learning_rate": 4.615288311380244e-06,
      "loss": 0.013,
      "step": 1654400
    },
    {
      "epoch": 2.7074946158428417,
      "grad_norm": 2.0047903060913086,
      "learning_rate": 4.615222419166727e-06,
      "loss": 0.0103,
      "step": 1654420
    },
    {
      "epoch": 2.707527346281495,
      "grad_norm": 0.2982129752635956,
      "learning_rate": 4.615156526953211e-06,
      "loss": 0.0163,
      "step": 1654440
    },
    {
      "epoch": 2.7075600767201484,
      "grad_norm": 0.1364472359418869,
      "learning_rate": 4.615090634739693e-06,
      "loss": 0.0189,
      "step": 1654460
    },
    {
      "epoch": 2.7075928071588016,
      "grad_norm": 0.6075469851493835,
      "learning_rate": 4.615024742526176e-06,
      "loss": 0.0129,
      "step": 1654480
    },
    {
      "epoch": 2.7076255375974547,
      "grad_norm": 0.04068594053387642,
      "learning_rate": 4.614958850312659e-06,
      "loss": 0.014,
      "step": 1654500
    },
    {
      "epoch": 2.7076582680361083,
      "grad_norm": 0.650484025478363,
      "learning_rate": 4.6148929580991415e-06,
      "loss": 0.0141,
      "step": 1654520
    },
    {
      "epoch": 2.7076909984747615,
      "grad_norm": 0.4711371660232544,
      "learning_rate": 4.614827065885625e-06,
      "loss": 0.0135,
      "step": 1654540
    },
    {
      "epoch": 2.707723728913415,
      "grad_norm": 0.4811626374721527,
      "learning_rate": 4.614761173672108e-06,
      "loss": 0.0116,
      "step": 1654560
    },
    {
      "epoch": 2.707756459352068,
      "grad_norm": 0.11111754924058914,
      "learning_rate": 4.614695281458591e-06,
      "loss": 0.0106,
      "step": 1654580
    },
    {
      "epoch": 2.707789189790722,
      "grad_norm": 0.21044524013996124,
      "learning_rate": 4.614629389245073e-06,
      "loss": 0.0096,
      "step": 1654600
    },
    {
      "epoch": 2.707821920229375,
      "grad_norm": 0.4027809500694275,
      "learning_rate": 4.614563497031556e-06,
      "loss": 0.0116,
      "step": 1654620
    },
    {
      "epoch": 2.707854650668028,
      "grad_norm": 0.03970799967646599,
      "learning_rate": 4.614497604818039e-06,
      "loss": 0.0158,
      "step": 1654640
    },
    {
      "epoch": 2.7078873811066817,
      "grad_norm": 0.4267989993095398,
      "learning_rate": 4.6144317126045216e-06,
      "loss": 0.0145,
      "step": 1654660
    },
    {
      "epoch": 2.707920111545335,
      "grad_norm": 0.37745776772499084,
      "learning_rate": 4.614365820391004e-06,
      "loss": 0.0116,
      "step": 1654680
    },
    {
      "epoch": 2.707952841983988,
      "grad_norm": 0.21744295954704285,
      "learning_rate": 4.614299928177488e-06,
      "loss": 0.0127,
      "step": 1654700
    },
    {
      "epoch": 2.7079855724226416,
      "grad_norm": 0.2724902927875519,
      "learning_rate": 4.614234035963971e-06,
      "loss": 0.0105,
      "step": 1654720
    },
    {
      "epoch": 2.708018302861295,
      "grad_norm": 0.3407490849494934,
      "learning_rate": 4.614168143750453e-06,
      "loss": 0.0153,
      "step": 1654740
    },
    {
      "epoch": 2.7080510332999483,
      "grad_norm": 0.5688180923461914,
      "learning_rate": 4.614102251536936e-06,
      "loss": 0.0095,
      "step": 1654760
    },
    {
      "epoch": 2.7080837637386015,
      "grad_norm": 0.4799545109272003,
      "learning_rate": 4.614036359323419e-06,
      "loss": 0.016,
      "step": 1654780
    },
    {
      "epoch": 2.708116494177255,
      "grad_norm": 0.33742600679397583,
      "learning_rate": 4.6139704671099024e-06,
      "loss": 0.0115,
      "step": 1654800
    },
    {
      "epoch": 2.7081492246159082,
      "grad_norm": 0.33861997723579407,
      "learning_rate": 4.613904574896385e-06,
      "loss": 0.0098,
      "step": 1654820
    },
    {
      "epoch": 2.7081819550545614,
      "grad_norm": 0.19627684354782104,
      "learning_rate": 4.613838682682868e-06,
      "loss": 0.0158,
      "step": 1654840
    },
    {
      "epoch": 2.708214685493215,
      "grad_norm": 0.8572350144386292,
      "learning_rate": 4.613772790469351e-06,
      "loss": 0.0259,
      "step": 1654860
    },
    {
      "epoch": 2.7082474159318686,
      "grad_norm": 0.14792108535766602,
      "learning_rate": 4.613706898255833e-06,
      "loss": 0.008,
      "step": 1654880
    },
    {
      "epoch": 2.7082801463705217,
      "grad_norm": 0.2451399862766266,
      "learning_rate": 4.613641006042316e-06,
      "loss": 0.0107,
      "step": 1654900
    },
    {
      "epoch": 2.708312876809175,
      "grad_norm": 0.5990016460418701,
      "learning_rate": 4.6135751138288e-06,
      "loss": 0.015,
      "step": 1654920
    },
    {
      "epoch": 2.7083456072478285,
      "grad_norm": 1.5020859241485596,
      "learning_rate": 4.6135092216152825e-06,
      "loss": 0.01,
      "step": 1654940
    },
    {
      "epoch": 2.7083783376864816,
      "grad_norm": 0.28890544176101685,
      "learning_rate": 4.613443329401765e-06,
      "loss": 0.0129,
      "step": 1654960
    },
    {
      "epoch": 2.7084110681251348,
      "grad_norm": 0.4700704514980316,
      "learning_rate": 4.613377437188248e-06,
      "loss": 0.0111,
      "step": 1654980
    },
    {
      "epoch": 2.7084437985637884,
      "grad_norm": 0.36227160692214966,
      "learning_rate": 4.613311544974731e-06,
      "loss": 0.011,
      "step": 1655000
    },
    {
      "epoch": 2.708476529002442,
      "grad_norm": 0.2836671471595764,
      "learning_rate": 4.613245652761213e-06,
      "loss": 0.0123,
      "step": 1655020
    },
    {
      "epoch": 2.708509259441095,
      "grad_norm": 0.2322821021080017,
      "learning_rate": 4.613179760547696e-06,
      "loss": 0.0158,
      "step": 1655040
    },
    {
      "epoch": 2.7085419898797483,
      "grad_norm": 0.7268139719963074,
      "learning_rate": 4.613113868334179e-06,
      "loss": 0.016,
      "step": 1655060
    },
    {
      "epoch": 2.708574720318402,
      "grad_norm": 0.4979608356952667,
      "learning_rate": 4.613047976120662e-06,
      "loss": 0.0141,
      "step": 1655080
    },
    {
      "epoch": 2.708607450757055,
      "grad_norm": 0.28545868396759033,
      "learning_rate": 4.612982083907145e-06,
      "loss": 0.0141,
      "step": 1655100
    },
    {
      "epoch": 2.708640181195708,
      "grad_norm": 0.2728753685951233,
      "learning_rate": 4.612916191693628e-06,
      "loss": 0.0139,
      "step": 1655120
    },
    {
      "epoch": 2.7086729116343617,
      "grad_norm": 1.627529263496399,
      "learning_rate": 4.612850299480111e-06,
      "loss": 0.0137,
      "step": 1655140
    },
    {
      "epoch": 2.708705642073015,
      "grad_norm": 0.726403534412384,
      "learning_rate": 4.612784407266594e-06,
      "loss": 0.0132,
      "step": 1655160
    },
    {
      "epoch": 2.7087383725116685,
      "grad_norm": 0.23208816349506378,
      "learning_rate": 4.612718515053077e-06,
      "loss": 0.0135,
      "step": 1655180
    },
    {
      "epoch": 2.7087711029503216,
      "grad_norm": 0.20746617019176483,
      "learning_rate": 4.61265262283956e-06,
      "loss": 0.0128,
      "step": 1655200
    },
    {
      "epoch": 2.7088038333889752,
      "grad_norm": 0.5254524946212769,
      "learning_rate": 4.6125867306260425e-06,
      "loss": 0.015,
      "step": 1655220
    },
    {
      "epoch": 2.7088365638276284,
      "grad_norm": 0.1438945084810257,
      "learning_rate": 4.612520838412525e-06,
      "loss": 0.0085,
      "step": 1655240
    },
    {
      "epoch": 2.7088692942662815,
      "grad_norm": 0.2701626420021057,
      "learning_rate": 4.612454946199008e-06,
      "loss": 0.0155,
      "step": 1655260
    },
    {
      "epoch": 2.708902024704935,
      "grad_norm": 0.28127044439315796,
      "learning_rate": 4.612389053985491e-06,
      "loss": 0.0118,
      "step": 1655280
    },
    {
      "epoch": 2.7089347551435883,
      "grad_norm": 0.7464891672134399,
      "learning_rate": 4.6123231617719735e-06,
      "loss": 0.0129,
      "step": 1655300
    },
    {
      "epoch": 2.708967485582242,
      "grad_norm": 0.09672936052083969,
      "learning_rate": 4.612257269558457e-06,
      "loss": 0.0153,
      "step": 1655320
    },
    {
      "epoch": 2.709000216020895,
      "grad_norm": 0.5312827229499817,
      "learning_rate": 4.61219137734494e-06,
      "loss": 0.0106,
      "step": 1655340
    },
    {
      "epoch": 2.7090329464595486,
      "grad_norm": 0.25506141781806946,
      "learning_rate": 4.6121254851314225e-06,
      "loss": 0.0162,
      "step": 1655360
    },
    {
      "epoch": 2.7090656768982018,
      "grad_norm": 0.172247976064682,
      "learning_rate": 4.612059592917905e-06,
      "loss": 0.0133,
      "step": 1655380
    },
    {
      "epoch": 2.709098407336855,
      "grad_norm": 0.6794188022613525,
      "learning_rate": 4.611993700704388e-06,
      "loss": 0.0151,
      "step": 1655400
    },
    {
      "epoch": 2.7091311377755085,
      "grad_norm": 0.5053899884223938,
      "learning_rate": 4.611927808490871e-06,
      "loss": 0.0163,
      "step": 1655420
    },
    {
      "epoch": 2.7091638682141617,
      "grad_norm": 1.6418269872665405,
      "learning_rate": 4.6118619162773535e-06,
      "loss": 0.0169,
      "step": 1655440
    },
    {
      "epoch": 2.7091965986528153,
      "grad_norm": 0.15813589096069336,
      "learning_rate": 4.611796024063836e-06,
      "loss": 0.0191,
      "step": 1655460
    },
    {
      "epoch": 2.7092293290914684,
      "grad_norm": 0.5690213441848755,
      "learning_rate": 4.611730131850319e-06,
      "loss": 0.0149,
      "step": 1655480
    },
    {
      "epoch": 2.709262059530122,
      "grad_norm": 0.408712774515152,
      "learning_rate": 4.6116642396368026e-06,
      "loss": 0.0147,
      "step": 1655500
    },
    {
      "epoch": 2.709294789968775,
      "grad_norm": 0.11633234471082687,
      "learning_rate": 4.611598347423285e-06,
      "loss": 0.0111,
      "step": 1655520
    },
    {
      "epoch": 2.7093275204074283,
      "grad_norm": 0.4079974591732025,
      "learning_rate": 4.611532455209768e-06,
      "loss": 0.0152,
      "step": 1655540
    },
    {
      "epoch": 2.709360250846082,
      "grad_norm": 0.0782158151268959,
      "learning_rate": 4.611466562996252e-06,
      "loss": 0.0161,
      "step": 1655560
    },
    {
      "epoch": 2.709392981284735,
      "grad_norm": 0.5131924748420715,
      "learning_rate": 4.611400670782734e-06,
      "loss": 0.0138,
      "step": 1655580
    },
    {
      "epoch": 2.7094257117233886,
      "grad_norm": 0.33665114641189575,
      "learning_rate": 4.611334778569217e-06,
      "loss": 0.016,
      "step": 1655600
    },
    {
      "epoch": 2.709458442162042,
      "grad_norm": 7.584549427032471,
      "learning_rate": 4.6112688863557e-06,
      "loss": 0.0163,
      "step": 1655620
    },
    {
      "epoch": 2.7094911726006954,
      "grad_norm": 0.4518199861049652,
      "learning_rate": 4.611202994142183e-06,
      "loss": 0.0114,
      "step": 1655640
    },
    {
      "epoch": 2.7095239030393485,
      "grad_norm": 0.3661286234855652,
      "learning_rate": 4.611137101928665e-06,
      "loss": 0.0192,
      "step": 1655660
    },
    {
      "epoch": 2.7095566334780017,
      "grad_norm": 0.2350904941558838,
      "learning_rate": 4.611071209715148e-06,
      "loss": 0.0163,
      "step": 1655680
    },
    {
      "epoch": 2.7095893639166553,
      "grad_norm": 0.10258198529481888,
      "learning_rate": 4.611005317501631e-06,
      "loss": 0.0155,
      "step": 1655700
    },
    {
      "epoch": 2.7096220943553084,
      "grad_norm": 0.5927408337593079,
      "learning_rate": 4.610939425288114e-06,
      "loss": 0.0191,
      "step": 1655720
    },
    {
      "epoch": 2.709654824793962,
      "grad_norm": 0.047617752104997635,
      "learning_rate": 4.610873533074597e-06,
      "loss": 0.0174,
      "step": 1655740
    },
    {
      "epoch": 2.709687555232615,
      "grad_norm": 0.36027976870536804,
      "learning_rate": 4.61080764086108e-06,
      "loss": 0.0104,
      "step": 1655760
    },
    {
      "epoch": 2.7097202856712688,
      "grad_norm": 0.14440229535102844,
      "learning_rate": 4.610741748647563e-06,
      "loss": 0.0112,
      "step": 1655780
    },
    {
      "epoch": 2.709753016109922,
      "grad_norm": 0.2969353199005127,
      "learning_rate": 4.610675856434045e-06,
      "loss": 0.0132,
      "step": 1655800
    },
    {
      "epoch": 2.709785746548575,
      "grad_norm": 0.33373862504959106,
      "learning_rate": 4.610609964220528e-06,
      "loss": 0.0104,
      "step": 1655820
    },
    {
      "epoch": 2.7098184769872287,
      "grad_norm": 0.4912702441215515,
      "learning_rate": 4.610544072007011e-06,
      "loss": 0.0127,
      "step": 1655840
    },
    {
      "epoch": 2.709851207425882,
      "grad_norm": 0.10315228998661041,
      "learning_rate": 4.610478179793494e-06,
      "loss": 0.0099,
      "step": 1655860
    },
    {
      "epoch": 2.7098839378645354,
      "grad_norm": 0.39934197068214417,
      "learning_rate": 4.610412287579977e-06,
      "loss": 0.0142,
      "step": 1655880
    },
    {
      "epoch": 2.7099166683031886,
      "grad_norm": 0.08003854751586914,
      "learning_rate": 4.61034639536646e-06,
      "loss": 0.0155,
      "step": 1655900
    },
    {
      "epoch": 2.709949398741842,
      "grad_norm": 0.33776623010635376,
      "learning_rate": 4.610280503152943e-06,
      "loss": 0.0157,
      "step": 1655920
    },
    {
      "epoch": 2.7099821291804953,
      "grad_norm": 0.5046213865280151,
      "learning_rate": 4.610214610939425e-06,
      "loss": 0.0177,
      "step": 1655940
    },
    {
      "epoch": 2.7100148596191485,
      "grad_norm": 0.3894170820713043,
      "learning_rate": 4.610148718725909e-06,
      "loss": 0.0121,
      "step": 1655960
    },
    {
      "epoch": 2.710047590057802,
      "grad_norm": 0.1752992570400238,
      "learning_rate": 4.610082826512392e-06,
      "loss": 0.0106,
      "step": 1655980
    },
    {
      "epoch": 2.710080320496455,
      "grad_norm": 0.06745640188455582,
      "learning_rate": 4.6100169342988744e-06,
      "loss": 0.0166,
      "step": 1656000
    },
    {
      "epoch": 2.710113050935109,
      "grad_norm": 0.5174589157104492,
      "learning_rate": 4.609951042085357e-06,
      "loss": 0.0074,
      "step": 1656020
    },
    {
      "epoch": 2.710145781373762,
      "grad_norm": 0.9986464381217957,
      "learning_rate": 4.60988514987184e-06,
      "loss": 0.0109,
      "step": 1656040
    },
    {
      "epoch": 2.7101785118124155,
      "grad_norm": 0.4994428753852844,
      "learning_rate": 4.609819257658323e-06,
      "loss": 0.0095,
      "step": 1656060
    },
    {
      "epoch": 2.7102112422510687,
      "grad_norm": 0.5688464045524597,
      "learning_rate": 4.609753365444805e-06,
      "loss": 0.0148,
      "step": 1656080
    },
    {
      "epoch": 2.710243972689722,
      "grad_norm": 0.45807942748069763,
      "learning_rate": 4.609687473231288e-06,
      "loss": 0.0125,
      "step": 1656100
    },
    {
      "epoch": 2.7102767031283754,
      "grad_norm": 0.06971890479326248,
      "learning_rate": 4.609621581017772e-06,
      "loss": 0.012,
      "step": 1656120
    },
    {
      "epoch": 2.7103094335670286,
      "grad_norm": 0.14903481304645538,
      "learning_rate": 4.6095556888042545e-06,
      "loss": 0.0134,
      "step": 1656140
    },
    {
      "epoch": 2.7103421640056817,
      "grad_norm": 10.159677505493164,
      "learning_rate": 4.609489796590737e-06,
      "loss": 0.0172,
      "step": 1656160
    },
    {
      "epoch": 2.7103748944443353,
      "grad_norm": 0.420433908700943,
      "learning_rate": 4.60942390437722e-06,
      "loss": 0.0182,
      "step": 1656180
    },
    {
      "epoch": 2.710407624882989,
      "grad_norm": 2.3147075176239014,
      "learning_rate": 4.6093580121637035e-06,
      "loss": 0.0174,
      "step": 1656200
    },
    {
      "epoch": 2.710440355321642,
      "grad_norm": 0.5100041627883911,
      "learning_rate": 4.609292119950186e-06,
      "loss": 0.0115,
      "step": 1656220
    },
    {
      "epoch": 2.710473085760295,
      "grad_norm": 0.21437573432922363,
      "learning_rate": 4.609226227736669e-06,
      "loss": 0.0173,
      "step": 1656240
    },
    {
      "epoch": 2.710505816198949,
      "grad_norm": 0.06054733693599701,
      "learning_rate": 4.609160335523152e-06,
      "loss": 0.0122,
      "step": 1656260
    },
    {
      "epoch": 2.710538546637602,
      "grad_norm": 0.06284520775079727,
      "learning_rate": 4.6090944433096345e-06,
      "loss": 0.0128,
      "step": 1656280
    },
    {
      "epoch": 2.710571277076255,
      "grad_norm": 0.2649884521961212,
      "learning_rate": 4.609028551096117e-06,
      "loss": 0.0127,
      "step": 1656300
    },
    {
      "epoch": 2.7106040075149087,
      "grad_norm": 0.2890510857105255,
      "learning_rate": 4.6089626588826e-06,
      "loss": 0.015,
      "step": 1656320
    },
    {
      "epoch": 2.7106367379535623,
      "grad_norm": 0.4444688856601715,
      "learning_rate": 4.6088967666690836e-06,
      "loss": 0.0141,
      "step": 1656340
    },
    {
      "epoch": 2.7106694683922155,
      "grad_norm": 0.08621188998222351,
      "learning_rate": 4.608830874455566e-06,
      "loss": 0.0133,
      "step": 1656360
    },
    {
      "epoch": 2.7107021988308686,
      "grad_norm": 0.43465667963027954,
      "learning_rate": 4.608764982242049e-06,
      "loss": 0.0141,
      "step": 1656380
    },
    {
      "epoch": 2.710734929269522,
      "grad_norm": 0.14978617429733276,
      "learning_rate": 4.608699090028532e-06,
      "loss": 0.0121,
      "step": 1656400
    },
    {
      "epoch": 2.7107676597081753,
      "grad_norm": 0.3277655243873596,
      "learning_rate": 4.6086331978150145e-06,
      "loss": 0.0136,
      "step": 1656420
    },
    {
      "epoch": 2.7108003901468285,
      "grad_norm": 0.5757526755332947,
      "learning_rate": 4.608567305601497e-06,
      "loss": 0.0107,
      "step": 1656440
    },
    {
      "epoch": 2.710833120585482,
      "grad_norm": 0.27251458168029785,
      "learning_rate": 4.60850141338798e-06,
      "loss": 0.0177,
      "step": 1656460
    },
    {
      "epoch": 2.7108658510241357,
      "grad_norm": 0.4709727168083191,
      "learning_rate": 4.608435521174463e-06,
      "loss": 0.0142,
      "step": 1656480
    },
    {
      "epoch": 2.710898581462789,
      "grad_norm": 0.37260308861732483,
      "learning_rate": 4.6083696289609455e-06,
      "loss": 0.0092,
      "step": 1656500
    },
    {
      "epoch": 2.710931311901442,
      "grad_norm": 0.9214942455291748,
      "learning_rate": 4.608303736747429e-06,
      "loss": 0.0119,
      "step": 1656520
    },
    {
      "epoch": 2.7109640423400956,
      "grad_norm": 0.1856992393732071,
      "learning_rate": 4.608237844533912e-06,
      "loss": 0.0181,
      "step": 1656540
    },
    {
      "epoch": 2.7109967727787487,
      "grad_norm": 0.5564754009246826,
      "learning_rate": 4.6081719523203945e-06,
      "loss": 0.0138,
      "step": 1656560
    },
    {
      "epoch": 2.711029503217402,
      "grad_norm": 0.09018272161483765,
      "learning_rate": 4.608106060106878e-06,
      "loss": 0.0196,
      "step": 1656580
    },
    {
      "epoch": 2.7110622336560555,
      "grad_norm": 0.1956363171339035,
      "learning_rate": 4.608040167893361e-06,
      "loss": 0.0086,
      "step": 1656600
    },
    {
      "epoch": 2.711094964094709,
      "grad_norm": 0.7971367239952087,
      "learning_rate": 4.607974275679844e-06,
      "loss": 0.0159,
      "step": 1656620
    },
    {
      "epoch": 2.711127694533362,
      "grad_norm": 0.22842460870742798,
      "learning_rate": 4.607908383466326e-06,
      "loss": 0.0117,
      "step": 1656640
    },
    {
      "epoch": 2.7111604249720154,
      "grad_norm": 0.14772504568099976,
      "learning_rate": 4.607842491252809e-06,
      "loss": 0.0093,
      "step": 1656660
    },
    {
      "epoch": 2.711193155410669,
      "grad_norm": 0.2588544487953186,
      "learning_rate": 4.607776599039292e-06,
      "loss": 0.0146,
      "step": 1656680
    },
    {
      "epoch": 2.711225885849322,
      "grad_norm": 0.3761690855026245,
      "learning_rate": 4.6077107068257746e-06,
      "loss": 0.0158,
      "step": 1656700
    },
    {
      "epoch": 2.7112586162879753,
      "grad_norm": 0.42995336651802063,
      "learning_rate": 4.607644814612257e-06,
      "loss": 0.0107,
      "step": 1656720
    },
    {
      "epoch": 2.711291346726629,
      "grad_norm": 0.19715234637260437,
      "learning_rate": 4.607578922398741e-06,
      "loss": 0.0172,
      "step": 1656740
    },
    {
      "epoch": 2.711324077165282,
      "grad_norm": 0.32073843479156494,
      "learning_rate": 4.607513030185224e-06,
      "loss": 0.0097,
      "step": 1656760
    },
    {
      "epoch": 2.7113568076039356,
      "grad_norm": 0.1312260627746582,
      "learning_rate": 4.607447137971706e-06,
      "loss": 0.0142,
      "step": 1656780
    },
    {
      "epoch": 2.7113895380425888,
      "grad_norm": 0.11214257031679153,
      "learning_rate": 4.607381245758189e-06,
      "loss": 0.01,
      "step": 1656800
    },
    {
      "epoch": 2.7114222684812423,
      "grad_norm": 0.42247554659843445,
      "learning_rate": 4.607315353544672e-06,
      "loss": 0.0189,
      "step": 1656820
    },
    {
      "epoch": 2.7114549989198955,
      "grad_norm": 0.40708404779434204,
      "learning_rate": 4.607249461331155e-06,
      "loss": 0.0109,
      "step": 1656840
    },
    {
      "epoch": 2.7114877293585486,
      "grad_norm": 0.10533701628446579,
      "learning_rate": 4.607183569117637e-06,
      "loss": 0.0113,
      "step": 1656860
    },
    {
      "epoch": 2.7115204597972022,
      "grad_norm": 0.9241527318954468,
      "learning_rate": 4.60711767690412e-06,
      "loss": 0.0102,
      "step": 1656880
    },
    {
      "epoch": 2.7115531902358554,
      "grad_norm": 0.3707371652126312,
      "learning_rate": 4.607051784690603e-06,
      "loss": 0.0133,
      "step": 1656900
    },
    {
      "epoch": 2.711585920674509,
      "grad_norm": 0.5412880778312683,
      "learning_rate": 4.606985892477086e-06,
      "loss": 0.0138,
      "step": 1656920
    },
    {
      "epoch": 2.711618651113162,
      "grad_norm": 0.20049357414245605,
      "learning_rate": 4.606920000263569e-06,
      "loss": 0.0125,
      "step": 1656940
    },
    {
      "epoch": 2.7116513815518157,
      "grad_norm": 0.44407621026039124,
      "learning_rate": 4.606854108050052e-06,
      "loss": 0.0108,
      "step": 1656960
    },
    {
      "epoch": 2.711684111990469,
      "grad_norm": 0.39138221740722656,
      "learning_rate": 4.6067882158365355e-06,
      "loss": 0.0177,
      "step": 1656980
    },
    {
      "epoch": 2.711716842429122,
      "grad_norm": 0.3371916711330414,
      "learning_rate": 4.606722323623018e-06,
      "loss": 0.0097,
      "step": 1657000
    },
    {
      "epoch": 2.7117495728677756,
      "grad_norm": 1.294355034828186,
      "learning_rate": 4.606656431409501e-06,
      "loss": 0.0165,
      "step": 1657020
    },
    {
      "epoch": 2.7117823033064288,
      "grad_norm": 0.6731539964675903,
      "learning_rate": 4.606590539195984e-06,
      "loss": 0.0165,
      "step": 1657040
    },
    {
      "epoch": 2.7118150337450824,
      "grad_norm": 0.3560113310813904,
      "learning_rate": 4.6065246469824664e-06,
      "loss": 0.0156,
      "step": 1657060
    },
    {
      "epoch": 2.7118477641837355,
      "grad_norm": 0.24815945327281952,
      "learning_rate": 4.606458754768949e-06,
      "loss": 0.0126,
      "step": 1657080
    },
    {
      "epoch": 2.711880494622389,
      "grad_norm": 0.4929874837398529,
      "learning_rate": 4.606392862555432e-06,
      "loss": 0.0109,
      "step": 1657100
    },
    {
      "epoch": 2.7119132250610423,
      "grad_norm": 0.25979480147361755,
      "learning_rate": 4.606326970341915e-06,
      "loss": 0.0115,
      "step": 1657120
    },
    {
      "epoch": 2.7119459554996954,
      "grad_norm": 0.5679559111595154,
      "learning_rate": 4.606261078128398e-06,
      "loss": 0.0087,
      "step": 1657140
    },
    {
      "epoch": 2.711978685938349,
      "grad_norm": 0.2624916136264801,
      "learning_rate": 4.606195185914881e-06,
      "loss": 0.0139,
      "step": 1657160
    },
    {
      "epoch": 2.712011416377002,
      "grad_norm": 0.30009788274765015,
      "learning_rate": 4.606129293701364e-06,
      "loss": 0.0109,
      "step": 1657180
    },
    {
      "epoch": 2.7120441468156558,
      "grad_norm": 0.9496184587478638,
      "learning_rate": 4.6060634014878465e-06,
      "loss": 0.018,
      "step": 1657200
    },
    {
      "epoch": 2.712076877254309,
      "grad_norm": 0.6404100060462952,
      "learning_rate": 4.605997509274329e-06,
      "loss": 0.0156,
      "step": 1657220
    },
    {
      "epoch": 2.7121096076929625,
      "grad_norm": 0.0896904468536377,
      "learning_rate": 4.605931617060812e-06,
      "loss": 0.0146,
      "step": 1657240
    },
    {
      "epoch": 2.7121423381316156,
      "grad_norm": 0.5258235931396484,
      "learning_rate": 4.6058657248472955e-06,
      "loss": 0.0205,
      "step": 1657260
    },
    {
      "epoch": 2.712175068570269,
      "grad_norm": 1.347365379333496,
      "learning_rate": 4.605799832633778e-06,
      "loss": 0.0135,
      "step": 1657280
    },
    {
      "epoch": 2.7122077990089224,
      "grad_norm": 0.11068417876958847,
      "learning_rate": 4.605733940420261e-06,
      "loss": 0.0126,
      "step": 1657300
    },
    {
      "epoch": 2.7122405294475755,
      "grad_norm": 0.2761824429035187,
      "learning_rate": 4.605668048206744e-06,
      "loss": 0.0132,
      "step": 1657320
    },
    {
      "epoch": 2.712273259886229,
      "grad_norm": NaN,
      "learning_rate": 4.6056021559932265e-06,
      "loss": 0.0093,
      "step": 1657340
    },
    {
      "epoch": 2.7123059903248823,
      "grad_norm": 5.69625997543335,
      "learning_rate": 4.605536263779709e-06,
      "loss": 0.0127,
      "step": 1657360
    },
    {
      "epoch": 2.712338720763536,
      "grad_norm": 0.24968229234218597,
      "learning_rate": 4.605470371566193e-06,
      "loss": 0.0169,
      "step": 1657380
    },
    {
      "epoch": 2.712371451202189,
      "grad_norm": 0.3324180543422699,
      "learning_rate": 4.6054044793526755e-06,
      "loss": 0.0182,
      "step": 1657400
    },
    {
      "epoch": 2.712404181640842,
      "grad_norm": 0.18647466599941254,
      "learning_rate": 4.605338587139158e-06,
      "loss": 0.0157,
      "step": 1657420
    },
    {
      "epoch": 2.7124369120794958,
      "grad_norm": 0.36194759607315063,
      "learning_rate": 4.605272694925641e-06,
      "loss": 0.0148,
      "step": 1657440
    },
    {
      "epoch": 2.712469642518149,
      "grad_norm": 0.17757876217365265,
      "learning_rate": 4.605206802712124e-06,
      "loss": 0.0186,
      "step": 1657460
    },
    {
      "epoch": 2.7125023729568025,
      "grad_norm": 0.30945977568626404,
      "learning_rate": 4.6051409104986065e-06,
      "loss": 0.0169,
      "step": 1657480
    },
    {
      "epoch": 2.7125351033954557,
      "grad_norm": 0.257267564535141,
      "learning_rate": 4.605075018285089e-06,
      "loss": 0.0151,
      "step": 1657500
    },
    {
      "epoch": 2.7125678338341093,
      "grad_norm": 0.2830407917499542,
      "learning_rate": 4.605009126071572e-06,
      "loss": 0.0177,
      "step": 1657520
    },
    {
      "epoch": 2.7126005642727624,
      "grad_norm": 0.8895821571350098,
      "learning_rate": 4.6049432338580556e-06,
      "loss": 0.0176,
      "step": 1657540
    },
    {
      "epoch": 2.7126332947114156,
      "grad_norm": 0.31631654500961304,
      "learning_rate": 4.604877341644538e-06,
      "loss": 0.0141,
      "step": 1657560
    },
    {
      "epoch": 2.712666025150069,
      "grad_norm": 0.20760434865951538,
      "learning_rate": 4.604811449431021e-06,
      "loss": 0.008,
      "step": 1657580
    },
    {
      "epoch": 2.7126987555887223,
      "grad_norm": 0.9679979681968689,
      "learning_rate": 4.604745557217504e-06,
      "loss": 0.012,
      "step": 1657600
    },
    {
      "epoch": 2.712731486027376,
      "grad_norm": 1.0592397451400757,
      "learning_rate": 4.604679665003987e-06,
      "loss": 0.0168,
      "step": 1657620
    },
    {
      "epoch": 2.712764216466029,
      "grad_norm": 0.1156073659658432,
      "learning_rate": 4.60461377279047e-06,
      "loss": 0.0183,
      "step": 1657640
    },
    {
      "epoch": 2.7127969469046826,
      "grad_norm": 0.6411867141723633,
      "learning_rate": 4.604547880576953e-06,
      "loss": 0.0146,
      "step": 1657660
    },
    {
      "epoch": 2.712829677343336,
      "grad_norm": 0.578567385673523,
      "learning_rate": 4.604481988363436e-06,
      "loss": 0.0148,
      "step": 1657680
    },
    {
      "epoch": 2.712862407781989,
      "grad_norm": 0.46436604857444763,
      "learning_rate": 4.604416096149918e-06,
      "loss": 0.0121,
      "step": 1657700
    },
    {
      "epoch": 2.7128951382206425,
      "grad_norm": 0.36265829205513,
      "learning_rate": 4.604350203936401e-06,
      "loss": 0.0104,
      "step": 1657720
    },
    {
      "epoch": 2.7129278686592957,
      "grad_norm": 0.06745775043964386,
      "learning_rate": 4.604284311722884e-06,
      "loss": 0.0142,
      "step": 1657740
    },
    {
      "epoch": 2.712960599097949,
      "grad_norm": 0.331942617893219,
      "learning_rate": 4.604218419509367e-06,
      "loss": 0.0125,
      "step": 1657760
    },
    {
      "epoch": 2.7129933295366024,
      "grad_norm": 0.6302595734596252,
      "learning_rate": 4.60415252729585e-06,
      "loss": 0.0133,
      "step": 1657780
    },
    {
      "epoch": 2.713026059975256,
      "grad_norm": 0.1903526335954666,
      "learning_rate": 4.604086635082333e-06,
      "loss": 0.0141,
      "step": 1657800
    },
    {
      "epoch": 2.713058790413909,
      "grad_norm": 0.7648199796676636,
      "learning_rate": 4.604020742868816e-06,
      "loss": 0.0093,
      "step": 1657820
    },
    {
      "epoch": 2.7130915208525623,
      "grad_norm": 0.5709174275398254,
      "learning_rate": 4.603954850655298e-06,
      "loss": 0.0115,
      "step": 1657840
    },
    {
      "epoch": 2.713124251291216,
      "grad_norm": 0.12942254543304443,
      "learning_rate": 4.603888958441781e-06,
      "loss": 0.0139,
      "step": 1657860
    },
    {
      "epoch": 2.713156981729869,
      "grad_norm": 0.6355511546134949,
      "learning_rate": 4.603823066228264e-06,
      "loss": 0.0142,
      "step": 1657880
    },
    {
      "epoch": 2.7131897121685222,
      "grad_norm": 0.24221280217170715,
      "learning_rate": 4.603757174014747e-06,
      "loss": 0.0103,
      "step": 1657900
    },
    {
      "epoch": 2.713222442607176,
      "grad_norm": 0.3610404133796692,
      "learning_rate": 4.603691281801229e-06,
      "loss": 0.01,
      "step": 1657920
    },
    {
      "epoch": 2.7132551730458294,
      "grad_norm": 0.6295295357704163,
      "learning_rate": 4.603625389587713e-06,
      "loss": 0.0155,
      "step": 1657940
    },
    {
      "epoch": 2.7132879034844826,
      "grad_norm": 0.39816564321517944,
      "learning_rate": 4.603559497374196e-06,
      "loss": 0.0086,
      "step": 1657960
    },
    {
      "epoch": 2.7133206339231357,
      "grad_norm": 0.5929073691368103,
      "learning_rate": 4.603493605160678e-06,
      "loss": 0.0118,
      "step": 1657980
    },
    {
      "epoch": 2.7133533643617893,
      "grad_norm": 0.5179719924926758,
      "learning_rate": 4.603427712947162e-06,
      "loss": 0.0127,
      "step": 1658000
    },
    {
      "epoch": 2.7133860948004425,
      "grad_norm": 0.3021845817565918,
      "learning_rate": 4.603361820733645e-06,
      "loss": 0.0133,
      "step": 1658020
    },
    {
      "epoch": 2.7134188252390956,
      "grad_norm": 0.10309401154518127,
      "learning_rate": 4.6032959285201274e-06,
      "loss": 0.0101,
      "step": 1658040
    },
    {
      "epoch": 2.713451555677749,
      "grad_norm": 0.40466853976249695,
      "learning_rate": 4.60323003630661e-06,
      "loss": 0.0141,
      "step": 1658060
    },
    {
      "epoch": 2.713484286116403,
      "grad_norm": 0.4108997583389282,
      "learning_rate": 4.603164144093093e-06,
      "loss": 0.0129,
      "step": 1658080
    },
    {
      "epoch": 2.713517016555056,
      "grad_norm": 0.7230708599090576,
      "learning_rate": 4.603098251879576e-06,
      "loss": 0.0143,
      "step": 1658100
    },
    {
      "epoch": 2.713549746993709,
      "grad_norm": 0.14763914048671722,
      "learning_rate": 4.603032359666058e-06,
      "loss": 0.0116,
      "step": 1658120
    },
    {
      "epoch": 2.7135824774323627,
      "grad_norm": 0.33592459559440613,
      "learning_rate": 4.602966467452541e-06,
      "loss": 0.0193,
      "step": 1658140
    },
    {
      "epoch": 2.713615207871016,
      "grad_norm": 0.5804426074028015,
      "learning_rate": 4.602900575239025e-06,
      "loss": 0.0128,
      "step": 1658160
    },
    {
      "epoch": 2.713647938309669,
      "grad_norm": 0.06709848344326019,
      "learning_rate": 4.6028346830255075e-06,
      "loss": 0.0126,
      "step": 1658180
    },
    {
      "epoch": 2.7136806687483226,
      "grad_norm": 0.24061286449432373,
      "learning_rate": 4.60276879081199e-06,
      "loss": 0.0106,
      "step": 1658200
    },
    {
      "epoch": 2.7137133991869757,
      "grad_norm": 0.2036326676607132,
      "learning_rate": 4.602702898598473e-06,
      "loss": 0.0147,
      "step": 1658220
    },
    {
      "epoch": 2.7137461296256293,
      "grad_norm": 0.8550323843955994,
      "learning_rate": 4.602637006384956e-06,
      "loss": 0.0095,
      "step": 1658240
    },
    {
      "epoch": 2.7137788600642825,
      "grad_norm": 0.18279147148132324,
      "learning_rate": 4.6025711141714384e-06,
      "loss": 0.0138,
      "step": 1658260
    },
    {
      "epoch": 2.713811590502936,
      "grad_norm": 0.9050366282463074,
      "learning_rate": 4.602505221957921e-06,
      "loss": 0.0154,
      "step": 1658280
    },
    {
      "epoch": 2.7138443209415892,
      "grad_norm": 0.21232068538665771,
      "learning_rate": 4.602439329744404e-06,
      "loss": 0.0101,
      "step": 1658300
    },
    {
      "epoch": 2.7138770513802424,
      "grad_norm": 0.9718400239944458,
      "learning_rate": 4.6023734375308875e-06,
      "loss": 0.0139,
      "step": 1658320
    },
    {
      "epoch": 2.713909781818896,
      "grad_norm": 0.18116158246994019,
      "learning_rate": 4.60230754531737e-06,
      "loss": 0.0105,
      "step": 1658340
    },
    {
      "epoch": 2.713942512257549,
      "grad_norm": 0.25975096225738525,
      "learning_rate": 4.602241653103853e-06,
      "loss": 0.0169,
      "step": 1658360
    },
    {
      "epoch": 2.7139752426962027,
      "grad_norm": 0.3358624279499054,
      "learning_rate": 4.602175760890336e-06,
      "loss": 0.0101,
      "step": 1658380
    },
    {
      "epoch": 2.714007973134856,
      "grad_norm": 0.5153867602348328,
      "learning_rate": 4.602109868676819e-06,
      "loss": 0.0117,
      "step": 1658400
    },
    {
      "epoch": 2.7140407035735095,
      "grad_norm": 0.1824411153793335,
      "learning_rate": 4.602043976463302e-06,
      "loss": 0.0096,
      "step": 1658420
    },
    {
      "epoch": 2.7140734340121626,
      "grad_norm": 0.2995181679725647,
      "learning_rate": 4.601978084249785e-06,
      "loss": 0.0134,
      "step": 1658440
    },
    {
      "epoch": 2.7141061644508158,
      "grad_norm": 1.0951887369155884,
      "learning_rate": 4.6019121920362675e-06,
      "loss": 0.0142,
      "step": 1658460
    },
    {
      "epoch": 2.7141388948894694,
      "grad_norm": 0.14163921773433685,
      "learning_rate": 4.60184629982275e-06,
      "loss": 0.0211,
      "step": 1658480
    },
    {
      "epoch": 2.7141716253281225,
      "grad_norm": 0.07419988512992859,
      "learning_rate": 4.601780407609233e-06,
      "loss": 0.0102,
      "step": 1658500
    },
    {
      "epoch": 2.714204355766776,
      "grad_norm": 0.7763975858688354,
      "learning_rate": 4.601714515395716e-06,
      "loss": 0.016,
      "step": 1658520
    },
    {
      "epoch": 2.7142370862054293,
      "grad_norm": 0.3587574064731598,
      "learning_rate": 4.6016486231821985e-06,
      "loss": 0.0174,
      "step": 1658540
    },
    {
      "epoch": 2.714269816644083,
      "grad_norm": 0.8026540875434875,
      "learning_rate": 4.601582730968682e-06,
      "loss": 0.0144,
      "step": 1658560
    },
    {
      "epoch": 2.714302547082736,
      "grad_norm": 0.41707146167755127,
      "learning_rate": 4.601516838755165e-06,
      "loss": 0.017,
      "step": 1658580
    },
    {
      "epoch": 2.714335277521389,
      "grad_norm": 0.16371457278728485,
      "learning_rate": 4.6014509465416476e-06,
      "loss": 0.0152,
      "step": 1658600
    },
    {
      "epoch": 2.7143680079600427,
      "grad_norm": 0.16068586707115173,
      "learning_rate": 4.60138505432813e-06,
      "loss": 0.0136,
      "step": 1658620
    },
    {
      "epoch": 2.714400738398696,
      "grad_norm": 0.10432747006416321,
      "learning_rate": 4.601319162114613e-06,
      "loss": 0.0172,
      "step": 1658640
    },
    {
      "epoch": 2.7144334688373495,
      "grad_norm": 0.31756529211997986,
      "learning_rate": 4.601253269901096e-06,
      "loss": 0.0117,
      "step": 1658660
    },
    {
      "epoch": 2.7144661992760026,
      "grad_norm": 0.2677614986896515,
      "learning_rate": 4.601187377687579e-06,
      "loss": 0.0143,
      "step": 1658680
    },
    {
      "epoch": 2.7144989297146562,
      "grad_norm": 0.2748599648475647,
      "learning_rate": 4.601121485474062e-06,
      "loss": 0.0127,
      "step": 1658700
    },
    {
      "epoch": 2.7145316601533094,
      "grad_norm": 0.9321877360343933,
      "learning_rate": 4.601055593260545e-06,
      "loss": 0.0163,
      "step": 1658720
    },
    {
      "epoch": 2.7145643905919625,
      "grad_norm": 0.3470696210861206,
      "learning_rate": 4.6009897010470276e-06,
      "loss": 0.0193,
      "step": 1658740
    },
    {
      "epoch": 2.714597121030616,
      "grad_norm": 0.28298628330230713,
      "learning_rate": 4.60092380883351e-06,
      "loss": 0.0139,
      "step": 1658760
    },
    {
      "epoch": 2.7146298514692693,
      "grad_norm": 0.4423310160636902,
      "learning_rate": 4.600857916619994e-06,
      "loss": 0.0142,
      "step": 1658780
    },
    {
      "epoch": 2.714662581907923,
      "grad_norm": 0.38865330815315247,
      "learning_rate": 4.600792024406477e-06,
      "loss": 0.0087,
      "step": 1658800
    },
    {
      "epoch": 2.714695312346576,
      "grad_norm": 0.3501187264919281,
      "learning_rate": 4.600726132192959e-06,
      "loss": 0.0239,
      "step": 1658820
    },
    {
      "epoch": 2.7147280427852296,
      "grad_norm": 0.2599170207977295,
      "learning_rate": 4.600660239979442e-06,
      "loss": 0.0181,
      "step": 1658840
    },
    {
      "epoch": 2.7147607732238828,
      "grad_norm": 0.25493067502975464,
      "learning_rate": 4.600594347765925e-06,
      "loss": 0.0114,
      "step": 1658860
    },
    {
      "epoch": 2.714793503662536,
      "grad_norm": 0.29301223158836365,
      "learning_rate": 4.600528455552408e-06,
      "loss": 0.0136,
      "step": 1658880
    },
    {
      "epoch": 2.7148262341011895,
      "grad_norm": 0.13423387706279755,
      "learning_rate": 4.60046256333889e-06,
      "loss": 0.0148,
      "step": 1658900
    },
    {
      "epoch": 2.7148589645398427,
      "grad_norm": 0.6356438994407654,
      "learning_rate": 4.600396671125373e-06,
      "loss": 0.0116,
      "step": 1658920
    },
    {
      "epoch": 2.7148916949784963,
      "grad_norm": 0.129672572016716,
      "learning_rate": 4.600330778911856e-06,
      "loss": 0.0108,
      "step": 1658940
    },
    {
      "epoch": 2.7149244254171494,
      "grad_norm": 0.655977189540863,
      "learning_rate": 4.600264886698339e-06,
      "loss": 0.0133,
      "step": 1658960
    },
    {
      "epoch": 2.714957155855803,
      "grad_norm": 1.1096360683441162,
      "learning_rate": 4.600198994484822e-06,
      "loss": 0.0151,
      "step": 1658980
    },
    {
      "epoch": 2.714989886294456,
      "grad_norm": 0.4494151175022125,
      "learning_rate": 4.600133102271305e-06,
      "loss": 0.0092,
      "step": 1659000
    },
    {
      "epoch": 2.7150226167331093,
      "grad_norm": 0.15151157975196838,
      "learning_rate": 4.6000672100577885e-06,
      "loss": 0.011,
      "step": 1659020
    },
    {
      "epoch": 2.715055347171763,
      "grad_norm": 0.5171805620193481,
      "learning_rate": 4.600001317844271e-06,
      "loss": 0.0101,
      "step": 1659040
    },
    {
      "epoch": 2.715088077610416,
      "grad_norm": 0.14195485413074493,
      "learning_rate": 4.599935425630754e-06,
      "loss": 0.0196,
      "step": 1659060
    },
    {
      "epoch": 2.7151208080490696,
      "grad_norm": 0.25759604573249817,
      "learning_rate": 4.599869533417237e-06,
      "loss": 0.0095,
      "step": 1659080
    },
    {
      "epoch": 2.715153538487723,
      "grad_norm": 0.27663180232048035,
      "learning_rate": 4.5998036412037194e-06,
      "loss": 0.0098,
      "step": 1659100
    },
    {
      "epoch": 2.7151862689263764,
      "grad_norm": 0.22908315062522888,
      "learning_rate": 4.599737748990202e-06,
      "loss": 0.0161,
      "step": 1659120
    },
    {
      "epoch": 2.7152189993650295,
      "grad_norm": 0.11210256069898605,
      "learning_rate": 4.599671856776685e-06,
      "loss": 0.0089,
      "step": 1659140
    },
    {
      "epoch": 2.7152517298036827,
      "grad_norm": 0.15975753962993622,
      "learning_rate": 4.599605964563168e-06,
      "loss": 0.011,
      "step": 1659160
    },
    {
      "epoch": 2.7152844602423363,
      "grad_norm": 0.13949579000473022,
      "learning_rate": 4.599540072349651e-06,
      "loss": 0.011,
      "step": 1659180
    },
    {
      "epoch": 2.7153171906809894,
      "grad_norm": 0.2193329781293869,
      "learning_rate": 4.599474180136134e-06,
      "loss": 0.0169,
      "step": 1659200
    },
    {
      "epoch": 2.7153499211196426,
      "grad_norm": 0.6550987362861633,
      "learning_rate": 4.599408287922617e-06,
      "loss": 0.0212,
      "step": 1659220
    },
    {
      "epoch": 2.715382651558296,
      "grad_norm": 0.32593053579330444,
      "learning_rate": 4.5993423957090995e-06,
      "loss": 0.0146,
      "step": 1659240
    },
    {
      "epoch": 2.7154153819969498,
      "grad_norm": 0.25567546486854553,
      "learning_rate": 4.599276503495582e-06,
      "loss": 0.0139,
      "step": 1659260
    },
    {
      "epoch": 2.715448112435603,
      "grad_norm": 0.09225969761610031,
      "learning_rate": 4.599210611282065e-06,
      "loss": 0.0115,
      "step": 1659280
    },
    {
      "epoch": 2.715480842874256,
      "grad_norm": 0.5894002318382263,
      "learning_rate": 4.599144719068548e-06,
      "loss": 0.0127,
      "step": 1659300
    },
    {
      "epoch": 2.7155135733129097,
      "grad_norm": 0.20162370800971985,
      "learning_rate": 4.59907882685503e-06,
      "loss": 0.0148,
      "step": 1659320
    },
    {
      "epoch": 2.715546303751563,
      "grad_norm": 0.17937001585960388,
      "learning_rate": 4.599012934641513e-06,
      "loss": 0.0109,
      "step": 1659340
    },
    {
      "epoch": 2.715579034190216,
      "grad_norm": 0.6764808893203735,
      "learning_rate": 4.598947042427997e-06,
      "loss": 0.0165,
      "step": 1659360
    },
    {
      "epoch": 2.7156117646288696,
      "grad_norm": 0.09038987010717392,
      "learning_rate": 4.5988811502144795e-06,
      "loss": 0.0149,
      "step": 1659380
    },
    {
      "epoch": 2.715644495067523,
      "grad_norm": 0.1392546445131302,
      "learning_rate": 4.598815258000962e-06,
      "loss": 0.0082,
      "step": 1659400
    },
    {
      "epoch": 2.7156772255061763,
      "grad_norm": 0.46737948060035706,
      "learning_rate": 4.598749365787446e-06,
      "loss": 0.0199,
      "step": 1659420
    },
    {
      "epoch": 2.7157099559448294,
      "grad_norm": 0.21835866570472717,
      "learning_rate": 4.5986834735739285e-06,
      "loss": 0.012,
      "step": 1659440
    },
    {
      "epoch": 2.715742686383483,
      "grad_norm": 0.5539422631263733,
      "learning_rate": 4.598617581360411e-06,
      "loss": 0.0162,
      "step": 1659460
    },
    {
      "epoch": 2.715775416822136,
      "grad_norm": 0.0956253781914711,
      "learning_rate": 4.598551689146894e-06,
      "loss": 0.0118,
      "step": 1659480
    },
    {
      "epoch": 2.7158081472607893,
      "grad_norm": 0.14716261625289917,
      "learning_rate": 4.598485796933377e-06,
      "loss": 0.0137,
      "step": 1659500
    },
    {
      "epoch": 2.715840877699443,
      "grad_norm": 0.19434364140033722,
      "learning_rate": 4.5984199047198595e-06,
      "loss": 0.0117,
      "step": 1659520
    },
    {
      "epoch": 2.7158736081380965,
      "grad_norm": 0.16453826427459717,
      "learning_rate": 4.598354012506342e-06,
      "loss": 0.0139,
      "step": 1659540
    },
    {
      "epoch": 2.7159063385767497,
      "grad_norm": 0.36195674538612366,
      "learning_rate": 4.598288120292825e-06,
      "loss": 0.0136,
      "step": 1659560
    },
    {
      "epoch": 2.715939069015403,
      "grad_norm": 0.48875218629837036,
      "learning_rate": 4.5982222280793086e-06,
      "loss": 0.0125,
      "step": 1659580
    },
    {
      "epoch": 2.7159717994540564,
      "grad_norm": 0.4027322232723236,
      "learning_rate": 4.598156335865791e-06,
      "loss": 0.0119,
      "step": 1659600
    },
    {
      "epoch": 2.7160045298927096,
      "grad_norm": 0.5271179676055908,
      "learning_rate": 4.598090443652274e-06,
      "loss": 0.0162,
      "step": 1659620
    },
    {
      "epoch": 2.7160372603313627,
      "grad_norm": 0.257829487323761,
      "learning_rate": 4.598024551438757e-06,
      "loss": 0.0123,
      "step": 1659640
    },
    {
      "epoch": 2.7160699907700163,
      "grad_norm": 0.3513517677783966,
      "learning_rate": 4.5979586592252395e-06,
      "loss": 0.0133,
      "step": 1659660
    },
    {
      "epoch": 2.71610272120867,
      "grad_norm": 0.2544335722923279,
      "learning_rate": 4.597892767011722e-06,
      "loss": 0.0132,
      "step": 1659680
    },
    {
      "epoch": 2.716135451647323,
      "grad_norm": 0.06931448727846146,
      "learning_rate": 4.597826874798205e-06,
      "loss": 0.0131,
      "step": 1659700
    },
    {
      "epoch": 2.716168182085976,
      "grad_norm": 0.2809770703315735,
      "learning_rate": 4.597760982584689e-06,
      "loss": 0.0121,
      "step": 1659720
    },
    {
      "epoch": 2.71620091252463,
      "grad_norm": 0.31227701902389526,
      "learning_rate": 4.597695090371171e-06,
      "loss": 0.0124,
      "step": 1659740
    },
    {
      "epoch": 2.716233642963283,
      "grad_norm": 0.590070903301239,
      "learning_rate": 4.597629198157654e-06,
      "loss": 0.0131,
      "step": 1659760
    },
    {
      "epoch": 2.716266373401936,
      "grad_norm": 0.20591722428798676,
      "learning_rate": 4.597563305944137e-06,
      "loss": 0.0112,
      "step": 1659780
    },
    {
      "epoch": 2.7162991038405897,
      "grad_norm": 0.23152273893356323,
      "learning_rate": 4.5974974137306196e-06,
      "loss": 0.0103,
      "step": 1659800
    },
    {
      "epoch": 2.716331834279243,
      "grad_norm": 0.510295033454895,
      "learning_rate": 4.597431521517103e-06,
      "loss": 0.0131,
      "step": 1659820
    },
    {
      "epoch": 2.7163645647178964,
      "grad_norm": 0.46181994676589966,
      "learning_rate": 4.597365629303586e-06,
      "loss": 0.0116,
      "step": 1659840
    },
    {
      "epoch": 2.7163972951565496,
      "grad_norm": 0.22702808678150177,
      "learning_rate": 4.597299737090069e-06,
      "loss": 0.0123,
      "step": 1659860
    },
    {
      "epoch": 2.716430025595203,
      "grad_norm": 0.56685870885849,
      "learning_rate": 4.597233844876551e-06,
      "loss": 0.0094,
      "step": 1659880
    },
    {
      "epoch": 2.7164627560338563,
      "grad_norm": 0.5400993824005127,
      "learning_rate": 4.597167952663034e-06,
      "loss": 0.0164,
      "step": 1659900
    },
    {
      "epoch": 2.7164954864725095,
      "grad_norm": 0.19641898572444916,
      "learning_rate": 4.597102060449517e-06,
      "loss": 0.0113,
      "step": 1659920
    },
    {
      "epoch": 2.716528216911163,
      "grad_norm": 0.6249347925186157,
      "learning_rate": 4.597036168236e-06,
      "loss": 0.015,
      "step": 1659940
    },
    {
      "epoch": 2.7165609473498162,
      "grad_norm": 0.6196620464324951,
      "learning_rate": 4.596970276022482e-06,
      "loss": 0.0193,
      "step": 1659960
    },
    {
      "epoch": 2.71659367778847,
      "grad_norm": 1.094642996788025,
      "learning_rate": 4.596904383808966e-06,
      "loss": 0.0112,
      "step": 1659980
    },
    {
      "epoch": 2.716626408227123,
      "grad_norm": 0.48159897327423096,
      "learning_rate": 4.596838491595449e-06,
      "loss": 0.0126,
      "step": 1660000
    },
    {
      "epoch": 2.7166591386657766,
      "grad_norm": 0.23185329139232635,
      "learning_rate": 4.596772599381931e-06,
      "loss": 0.0154,
      "step": 1660020
    },
    {
      "epoch": 2.7166918691044297,
      "grad_norm": 0.5210713148117065,
      "learning_rate": 4.596706707168414e-06,
      "loss": 0.0149,
      "step": 1660040
    },
    {
      "epoch": 2.716724599543083,
      "grad_norm": 0.18932434916496277,
      "learning_rate": 4.596640814954897e-06,
      "loss": 0.0151,
      "step": 1660060
    },
    {
      "epoch": 2.7167573299817365,
      "grad_norm": 0.2134561389684677,
      "learning_rate": 4.5965749227413805e-06,
      "loss": 0.0167,
      "step": 1660080
    },
    {
      "epoch": 2.7167900604203896,
      "grad_norm": 0.3303033709526062,
      "learning_rate": 4.596509030527863e-06,
      "loss": 0.0169,
      "step": 1660100
    },
    {
      "epoch": 2.716822790859043,
      "grad_norm": 0.1413041055202484,
      "learning_rate": 4.596443138314346e-06,
      "loss": 0.0141,
      "step": 1660120
    },
    {
      "epoch": 2.7168555212976964,
      "grad_norm": 0.7035030126571655,
      "learning_rate": 4.596377246100829e-06,
      "loss": 0.0115,
      "step": 1660140
    },
    {
      "epoch": 2.71688825173635,
      "grad_norm": 0.3907942473888397,
      "learning_rate": 4.596311353887311e-06,
      "loss": 0.0195,
      "step": 1660160
    },
    {
      "epoch": 2.716920982175003,
      "grad_norm": 0.5129055380821228,
      "learning_rate": 4.596245461673794e-06,
      "loss": 0.0113,
      "step": 1660180
    },
    {
      "epoch": 2.7169537126136563,
      "grad_norm": 0.39126455783843994,
      "learning_rate": 4.596179569460278e-06,
      "loss": 0.0151,
      "step": 1660200
    },
    {
      "epoch": 2.71698644305231,
      "grad_norm": 0.3254699409008026,
      "learning_rate": 4.5961136772467605e-06,
      "loss": 0.0263,
      "step": 1660220
    },
    {
      "epoch": 2.717019173490963,
      "grad_norm": 0.46039879322052,
      "learning_rate": 4.596047785033243e-06,
      "loss": 0.0118,
      "step": 1660240
    },
    {
      "epoch": 2.7170519039296166,
      "grad_norm": 0.3937818706035614,
      "learning_rate": 4.595981892819726e-06,
      "loss": 0.0193,
      "step": 1660260
    },
    {
      "epoch": 2.7170846343682697,
      "grad_norm": 0.15649576485157013,
      "learning_rate": 4.595916000606209e-06,
      "loss": 0.011,
      "step": 1660280
    },
    {
      "epoch": 2.7171173648069233,
      "grad_norm": 0.21724767982959747,
      "learning_rate": 4.5958501083926914e-06,
      "loss": 0.0176,
      "step": 1660300
    },
    {
      "epoch": 2.7171500952455765,
      "grad_norm": 0.16540013253688812,
      "learning_rate": 4.595784216179174e-06,
      "loss": 0.0149,
      "step": 1660320
    },
    {
      "epoch": 2.7171828256842296,
      "grad_norm": 1.8675205707550049,
      "learning_rate": 4.595718323965657e-06,
      "loss": 0.0148,
      "step": 1660340
    },
    {
      "epoch": 2.7172155561228832,
      "grad_norm": 0.12390000373125076,
      "learning_rate": 4.59565243175214e-06,
      "loss": 0.0141,
      "step": 1660360
    },
    {
      "epoch": 2.7172482865615364,
      "grad_norm": 0.26781320571899414,
      "learning_rate": 4.595586539538623e-06,
      "loss": 0.0122,
      "step": 1660380
    },
    {
      "epoch": 2.71728101700019,
      "grad_norm": 0.22149093449115753,
      "learning_rate": 4.595520647325106e-06,
      "loss": 0.0112,
      "step": 1660400
    },
    {
      "epoch": 2.717313747438843,
      "grad_norm": 0.17913152277469635,
      "learning_rate": 4.595454755111589e-06,
      "loss": 0.0166,
      "step": 1660420
    },
    {
      "epoch": 2.7173464778774967,
      "grad_norm": 0.1685064435005188,
      "learning_rate": 4.595388862898072e-06,
      "loss": 0.0128,
      "step": 1660440
    },
    {
      "epoch": 2.71737920831615,
      "grad_norm": 0.21669583022594452,
      "learning_rate": 4.595322970684555e-06,
      "loss": 0.0125,
      "step": 1660460
    },
    {
      "epoch": 2.717411938754803,
      "grad_norm": 0.07248592376708984,
      "learning_rate": 4.595257078471038e-06,
      "loss": 0.0127,
      "step": 1660480
    },
    {
      "epoch": 2.7174446691934566,
      "grad_norm": 0.636471688747406,
      "learning_rate": 4.5951911862575205e-06,
      "loss": 0.0137,
      "step": 1660500
    },
    {
      "epoch": 2.7174773996321098,
      "grad_norm": 0.16709940135478973,
      "learning_rate": 4.595125294044003e-06,
      "loss": 0.0119,
      "step": 1660520
    },
    {
      "epoch": 2.7175101300707634,
      "grad_norm": 0.10899093747138977,
      "learning_rate": 4.595059401830486e-06,
      "loss": 0.0108,
      "step": 1660540
    },
    {
      "epoch": 2.7175428605094165,
      "grad_norm": 0.39404040575027466,
      "learning_rate": 4.594993509616969e-06,
      "loss": 0.01,
      "step": 1660560
    },
    {
      "epoch": 2.71757559094807,
      "grad_norm": 0.6207817196846008,
      "learning_rate": 4.5949276174034515e-06,
      "loss": 0.0151,
      "step": 1660580
    },
    {
      "epoch": 2.7176083213867233,
      "grad_norm": 0.5559732913970947,
      "learning_rate": 4.594861725189935e-06,
      "loss": 0.0187,
      "step": 1660600
    },
    {
      "epoch": 2.7176410518253764,
      "grad_norm": 0.24174416065216064,
      "learning_rate": 4.594795832976418e-06,
      "loss": 0.016,
      "step": 1660620
    },
    {
      "epoch": 2.71767378226403,
      "grad_norm": 0.3625379502773285,
      "learning_rate": 4.5947299407629006e-06,
      "loss": 0.0115,
      "step": 1660640
    },
    {
      "epoch": 2.717706512702683,
      "grad_norm": 0.3400648236274719,
      "learning_rate": 4.594664048549383e-06,
      "loss": 0.0188,
      "step": 1660660
    },
    {
      "epoch": 2.7177392431413363,
      "grad_norm": 0.09585588425397873,
      "learning_rate": 4.594598156335866e-06,
      "loss": 0.0116,
      "step": 1660680
    },
    {
      "epoch": 2.71777197357999,
      "grad_norm": 0.5438550114631653,
      "learning_rate": 4.594532264122349e-06,
      "loss": 0.0154,
      "step": 1660700
    },
    {
      "epoch": 2.7178047040186435,
      "grad_norm": 0.6352513432502747,
      "learning_rate": 4.5944663719088315e-06,
      "loss": 0.0112,
      "step": 1660720
    },
    {
      "epoch": 2.7178374344572966,
      "grad_norm": 0.1274300366640091,
      "learning_rate": 4.594400479695314e-06,
      "loss": 0.0135,
      "step": 1660740
    },
    {
      "epoch": 2.71787016489595,
      "grad_norm": 0.1813257783651352,
      "learning_rate": 4.594334587481797e-06,
      "loss": 0.0159,
      "step": 1660760
    },
    {
      "epoch": 2.7179028953346034,
      "grad_norm": 0.16631975769996643,
      "learning_rate": 4.594268695268281e-06,
      "loss": 0.0093,
      "step": 1660780
    },
    {
      "epoch": 2.7179356257732565,
      "grad_norm": 0.13486768305301666,
      "learning_rate": 4.594202803054763e-06,
      "loss": 0.0115,
      "step": 1660800
    },
    {
      "epoch": 2.7179683562119097,
      "grad_norm": 1.117492437362671,
      "learning_rate": 4.594136910841246e-06,
      "loss": 0.0167,
      "step": 1660820
    },
    {
      "epoch": 2.7180010866505633,
      "grad_norm": 0.49732205271720886,
      "learning_rate": 4.59407101862773e-06,
      "loss": 0.0131,
      "step": 1660840
    },
    {
      "epoch": 2.718033817089217,
      "grad_norm": 0.25416576862335205,
      "learning_rate": 4.594005126414212e-06,
      "loss": 0.015,
      "step": 1660860
    },
    {
      "epoch": 2.71806654752787,
      "grad_norm": 0.17168205976486206,
      "learning_rate": 4.593939234200695e-06,
      "loss": 0.0142,
      "step": 1660880
    },
    {
      "epoch": 2.718099277966523,
      "grad_norm": 0.44719865918159485,
      "learning_rate": 4.593873341987178e-06,
      "loss": 0.0165,
      "step": 1660900
    },
    {
      "epoch": 2.7181320084051768,
      "grad_norm": 0.4252511262893677,
      "learning_rate": 4.593807449773661e-06,
      "loss": 0.0128,
      "step": 1660920
    },
    {
      "epoch": 2.71816473884383,
      "grad_norm": 0.4543485939502716,
      "learning_rate": 4.593741557560143e-06,
      "loss": 0.0176,
      "step": 1660940
    },
    {
      "epoch": 2.718197469282483,
      "grad_norm": 0.2351990044116974,
      "learning_rate": 4.593675665346626e-06,
      "loss": 0.0161,
      "step": 1660960
    },
    {
      "epoch": 2.7182301997211367,
      "grad_norm": 0.17942188680171967,
      "learning_rate": 4.593609773133109e-06,
      "loss": 0.0137,
      "step": 1660980
    },
    {
      "epoch": 2.7182629301597903,
      "grad_norm": 0.15444958209991455,
      "learning_rate": 4.593543880919592e-06,
      "loss": 0.014,
      "step": 1661000
    },
    {
      "epoch": 2.7182956605984434,
      "grad_norm": 0.6294684410095215,
      "learning_rate": 4.593477988706075e-06,
      "loss": 0.0134,
      "step": 1661020
    },
    {
      "epoch": 2.7183283910370966,
      "grad_norm": 0.3107433319091797,
      "learning_rate": 4.593412096492558e-06,
      "loss": 0.0181,
      "step": 1661040
    },
    {
      "epoch": 2.71836112147575,
      "grad_norm": 0.07138407230377197,
      "learning_rate": 4.593346204279041e-06,
      "loss": 0.0128,
      "step": 1661060
    },
    {
      "epoch": 2.7183938519144033,
      "grad_norm": 0.4226652681827545,
      "learning_rate": 4.593280312065523e-06,
      "loss": 0.0162,
      "step": 1661080
    },
    {
      "epoch": 2.7184265823530565,
      "grad_norm": 0.27812159061431885,
      "learning_rate": 4.593214419852006e-06,
      "loss": 0.0147,
      "step": 1661100
    },
    {
      "epoch": 2.71845931279171,
      "grad_norm": 0.10102145373821259,
      "learning_rate": 4.593148527638489e-06,
      "loss": 0.0136,
      "step": 1661120
    },
    {
      "epoch": 2.7184920432303636,
      "grad_norm": 0.27833858132362366,
      "learning_rate": 4.5930826354249724e-06,
      "loss": 0.0193,
      "step": 1661140
    },
    {
      "epoch": 2.718524773669017,
      "grad_norm": 0.4026466906070709,
      "learning_rate": 4.593016743211455e-06,
      "loss": 0.0162,
      "step": 1661160
    },
    {
      "epoch": 2.71855750410767,
      "grad_norm": 0.4705488681793213,
      "learning_rate": 4.592950850997938e-06,
      "loss": 0.0116,
      "step": 1661180
    },
    {
      "epoch": 2.7185902345463235,
      "grad_norm": 0.4261312782764435,
      "learning_rate": 4.592884958784421e-06,
      "loss": 0.0141,
      "step": 1661200
    },
    {
      "epoch": 2.7186229649849767,
      "grad_norm": 0.17020541429519653,
      "learning_rate": 4.592819066570903e-06,
      "loss": 0.0109,
      "step": 1661220
    },
    {
      "epoch": 2.71865569542363,
      "grad_norm": 0.12513257563114166,
      "learning_rate": 4.592753174357387e-06,
      "loss": 0.014,
      "step": 1661240
    },
    {
      "epoch": 2.7186884258622834,
      "grad_norm": 0.31412771344184875,
      "learning_rate": 4.59268728214387e-06,
      "loss": 0.0168,
      "step": 1661260
    },
    {
      "epoch": 2.7187211563009366,
      "grad_norm": 0.4495863616466522,
      "learning_rate": 4.5926213899303525e-06,
      "loss": 0.0235,
      "step": 1661280
    },
    {
      "epoch": 2.71875388673959,
      "grad_norm": 0.30735236406326294,
      "learning_rate": 4.592555497716835e-06,
      "loss": 0.0104,
      "step": 1661300
    },
    {
      "epoch": 2.7187866171782433,
      "grad_norm": 0.5162182450294495,
      "learning_rate": 4.592489605503318e-06,
      "loss": 0.0124,
      "step": 1661320
    },
    {
      "epoch": 2.718819347616897,
      "grad_norm": 0.11841258406639099,
      "learning_rate": 4.592423713289801e-06,
      "loss": 0.0199,
      "step": 1661340
    },
    {
      "epoch": 2.71885207805555,
      "grad_norm": 0.19445015490055084,
      "learning_rate": 4.592357821076283e-06,
      "loss": 0.0168,
      "step": 1661360
    },
    {
      "epoch": 2.718884808494203,
      "grad_norm": 0.28435900807380676,
      "learning_rate": 4.592291928862766e-06,
      "loss": 0.0129,
      "step": 1661380
    },
    {
      "epoch": 2.718917538932857,
      "grad_norm": 0.768919825553894,
      "learning_rate": 4.59222603664925e-06,
      "loss": 0.0234,
      "step": 1661400
    },
    {
      "epoch": 2.71895026937151,
      "grad_norm": 0.11704562604427338,
      "learning_rate": 4.5921601444357325e-06,
      "loss": 0.0129,
      "step": 1661420
    },
    {
      "epoch": 2.7189829998101636,
      "grad_norm": 0.2444608360528946,
      "learning_rate": 4.592094252222215e-06,
      "loss": 0.016,
      "step": 1661440
    },
    {
      "epoch": 2.7190157302488167,
      "grad_norm": 0.2986711859703064,
      "learning_rate": 4.592028360008698e-06,
      "loss": 0.0121,
      "step": 1661460
    },
    {
      "epoch": 2.7190484606874703,
      "grad_norm": 0.29754719138145447,
      "learning_rate": 4.5919624677951816e-06,
      "loss": 0.0141,
      "step": 1661480
    },
    {
      "epoch": 2.7190811911261235,
      "grad_norm": 0.4517471194267273,
      "learning_rate": 4.591896575581664e-06,
      "loss": 0.015,
      "step": 1661500
    },
    {
      "epoch": 2.7191139215647766,
      "grad_norm": 0.6975220441818237,
      "learning_rate": 4.591830683368147e-06,
      "loss": 0.0107,
      "step": 1661520
    },
    {
      "epoch": 2.71914665200343,
      "grad_norm": 0.18740594387054443,
      "learning_rate": 4.59176479115463e-06,
      "loss": 0.0107,
      "step": 1661540
    },
    {
      "epoch": 2.7191793824420833,
      "grad_norm": 0.25888630747795105,
      "learning_rate": 4.5916988989411125e-06,
      "loss": 0.0163,
      "step": 1661560
    },
    {
      "epoch": 2.719212112880737,
      "grad_norm": 0.23577545583248138,
      "learning_rate": 4.591633006727595e-06,
      "loss": 0.0151,
      "step": 1661580
    },
    {
      "epoch": 2.71924484331939,
      "grad_norm": 0.7024158239364624,
      "learning_rate": 4.591567114514078e-06,
      "loss": 0.0219,
      "step": 1661600
    },
    {
      "epoch": 2.7192775737580437,
      "grad_norm": 0.3463932275772095,
      "learning_rate": 4.591501222300562e-06,
      "loss": 0.0138,
      "step": 1661620
    },
    {
      "epoch": 2.719310304196697,
      "grad_norm": 1.0284253358840942,
      "learning_rate": 4.591435330087044e-06,
      "loss": 0.0195,
      "step": 1661640
    },
    {
      "epoch": 2.71934303463535,
      "grad_norm": 0.22705361247062683,
      "learning_rate": 4.591369437873527e-06,
      "loss": 0.0149,
      "step": 1661660
    },
    {
      "epoch": 2.7193757650740036,
      "grad_norm": 0.6653025150299072,
      "learning_rate": 4.59130354566001e-06,
      "loss": 0.0215,
      "step": 1661680
    },
    {
      "epoch": 2.7194084955126567,
      "grad_norm": 0.13542550802230835,
      "learning_rate": 4.5912376534464925e-06,
      "loss": 0.0126,
      "step": 1661700
    },
    {
      "epoch": 2.7194412259513103,
      "grad_norm": 0.6992393136024475,
      "learning_rate": 4.591171761232975e-06,
      "loss": 0.017,
      "step": 1661720
    },
    {
      "epoch": 2.7194739563899635,
      "grad_norm": 0.4670819938182831,
      "learning_rate": 4.591105869019458e-06,
      "loss": 0.011,
      "step": 1661740
    },
    {
      "epoch": 2.719506686828617,
      "grad_norm": 0.20356634259223938,
      "learning_rate": 4.591039976805941e-06,
      "loss": 0.0169,
      "step": 1661760
    },
    {
      "epoch": 2.71953941726727,
      "grad_norm": 0.1339300125837326,
      "learning_rate": 4.5909740845924235e-06,
      "loss": 0.0147,
      "step": 1661780
    },
    {
      "epoch": 2.7195721477059234,
      "grad_norm": 0.37349236011505127,
      "learning_rate": 4.590908192378907e-06,
      "loss": 0.0118,
      "step": 1661800
    },
    {
      "epoch": 2.719604878144577,
      "grad_norm": 0.46255549788475037,
      "learning_rate": 4.59084230016539e-06,
      "loss": 0.0103,
      "step": 1661820
    },
    {
      "epoch": 2.71963760858323,
      "grad_norm": 0.40531155467033386,
      "learning_rate": 4.5907764079518726e-06,
      "loss": 0.0128,
      "step": 1661840
    },
    {
      "epoch": 2.7196703390218837,
      "grad_norm": 1.464309573173523,
      "learning_rate": 4.590710515738356e-06,
      "loss": 0.0168,
      "step": 1661860
    },
    {
      "epoch": 2.719703069460537,
      "grad_norm": 0.08014339953660965,
      "learning_rate": 4.590644623524839e-06,
      "loss": 0.0155,
      "step": 1661880
    },
    {
      "epoch": 2.7197357998991905,
      "grad_norm": 0.6487123966217041,
      "learning_rate": 4.590578731311322e-06,
      "loss": 0.0148,
      "step": 1661900
    },
    {
      "epoch": 2.7197685303378436,
      "grad_norm": 0.8323010802268982,
      "learning_rate": 4.590512839097804e-06,
      "loss": 0.013,
      "step": 1661920
    },
    {
      "epoch": 2.7198012607764968,
      "grad_norm": 0.2925380766391754,
      "learning_rate": 4.590446946884287e-06,
      "loss": 0.0181,
      "step": 1661940
    },
    {
      "epoch": 2.7198339912151503,
      "grad_norm": 0.7004131078720093,
      "learning_rate": 4.59038105467077e-06,
      "loss": 0.0188,
      "step": 1661960
    },
    {
      "epoch": 2.7198667216538035,
      "grad_norm": 0.20991791784763336,
      "learning_rate": 4.590315162457253e-06,
      "loss": 0.0161,
      "step": 1661980
    },
    {
      "epoch": 2.719899452092457,
      "grad_norm": 1.102462649345398,
      "learning_rate": 4.590249270243735e-06,
      "loss": 0.0174,
      "step": 1662000
    },
    {
      "epoch": 2.7199321825311102,
      "grad_norm": 0.3687317967414856,
      "learning_rate": 4.590183378030219e-06,
      "loss": 0.0114,
      "step": 1662020
    },
    {
      "epoch": 2.719964912969764,
      "grad_norm": 0.603347659111023,
      "learning_rate": 4.590117485816702e-06,
      "loss": 0.015,
      "step": 1662040
    },
    {
      "epoch": 2.719997643408417,
      "grad_norm": 0.9184269905090332,
      "learning_rate": 4.590051593603184e-06,
      "loss": 0.0218,
      "step": 1662060
    },
    {
      "epoch": 2.72003037384707,
      "grad_norm": 0.32574978470802307,
      "learning_rate": 4.589985701389667e-06,
      "loss": 0.0164,
      "step": 1662080
    },
    {
      "epoch": 2.7200631042857237,
      "grad_norm": 0.2913588881492615,
      "learning_rate": 4.58991980917615e-06,
      "loss": 0.0102,
      "step": 1662100
    },
    {
      "epoch": 2.720095834724377,
      "grad_norm": 0.2873806357383728,
      "learning_rate": 4.589853916962633e-06,
      "loss": 0.0118,
      "step": 1662120
    },
    {
      "epoch": 2.7201285651630305,
      "grad_norm": 0.06295442581176758,
      "learning_rate": 4.589788024749115e-06,
      "loss": 0.0123,
      "step": 1662140
    },
    {
      "epoch": 2.7201612956016836,
      "grad_norm": 1.021634817123413,
      "learning_rate": 4.589722132535598e-06,
      "loss": 0.0131,
      "step": 1662160
    },
    {
      "epoch": 2.720194026040337,
      "grad_norm": 0.2339727282524109,
      "learning_rate": 4.589656240322082e-06,
      "loss": 0.0133,
      "step": 1662180
    },
    {
      "epoch": 2.7202267564789904,
      "grad_norm": 0.13917188346385956,
      "learning_rate": 4.589590348108564e-06,
      "loss": 0.0094,
      "step": 1662200
    },
    {
      "epoch": 2.7202594869176435,
      "grad_norm": 0.20211002230644226,
      "learning_rate": 4.589524455895047e-06,
      "loss": 0.0129,
      "step": 1662220
    },
    {
      "epoch": 2.720292217356297,
      "grad_norm": 0.30586910247802734,
      "learning_rate": 4.58945856368153e-06,
      "loss": 0.018,
      "step": 1662240
    },
    {
      "epoch": 2.7203249477949503,
      "grad_norm": 0.6757210493087769,
      "learning_rate": 4.5893926714680135e-06,
      "loss": 0.0192,
      "step": 1662260
    },
    {
      "epoch": 2.7203576782336034,
      "grad_norm": 0.1539192944765091,
      "learning_rate": 4.589326779254496e-06,
      "loss": 0.0206,
      "step": 1662280
    },
    {
      "epoch": 2.720390408672257,
      "grad_norm": 0.5322107076644897,
      "learning_rate": 4.589260887040979e-06,
      "loss": 0.0135,
      "step": 1662300
    },
    {
      "epoch": 2.7204231391109106,
      "grad_norm": 0.44394028186798096,
      "learning_rate": 4.589194994827462e-06,
      "loss": 0.0132,
      "step": 1662320
    },
    {
      "epoch": 2.7204558695495638,
      "grad_norm": 0.7274375557899475,
      "learning_rate": 4.5891291026139444e-06,
      "loss": 0.0179,
      "step": 1662340
    },
    {
      "epoch": 2.720488599988217,
      "grad_norm": 0.21980462968349457,
      "learning_rate": 4.589063210400427e-06,
      "loss": 0.0104,
      "step": 1662360
    },
    {
      "epoch": 2.7205213304268705,
      "grad_norm": 0.48534899950027466,
      "learning_rate": 4.58899731818691e-06,
      "loss": 0.0118,
      "step": 1662380
    },
    {
      "epoch": 2.7205540608655236,
      "grad_norm": 0.38372233510017395,
      "learning_rate": 4.588931425973393e-06,
      "loss": 0.0109,
      "step": 1662400
    },
    {
      "epoch": 2.720586791304177,
      "grad_norm": 0.0860646516084671,
      "learning_rate": 4.588865533759876e-06,
      "loss": 0.0123,
      "step": 1662420
    },
    {
      "epoch": 2.7206195217428304,
      "grad_norm": 0.9698997735977173,
      "learning_rate": 4.588799641546359e-06,
      "loss": 0.0095,
      "step": 1662440
    },
    {
      "epoch": 2.720652252181484,
      "grad_norm": 2.0179567337036133,
      "learning_rate": 4.588733749332842e-06,
      "loss": 0.0085,
      "step": 1662460
    },
    {
      "epoch": 2.720684982620137,
      "grad_norm": 0.7397021651268005,
      "learning_rate": 4.5886678571193245e-06,
      "loss": 0.0198,
      "step": 1662480
    },
    {
      "epoch": 2.7207177130587903,
      "grad_norm": 0.5489878058433533,
      "learning_rate": 4.588601964905807e-06,
      "loss": 0.0157,
      "step": 1662500
    },
    {
      "epoch": 2.720750443497444,
      "grad_norm": 0.3015378415584564,
      "learning_rate": 4.58853607269229e-06,
      "loss": 0.0171,
      "step": 1662520
    },
    {
      "epoch": 2.720783173936097,
      "grad_norm": 0.5507921576499939,
      "learning_rate": 4.5884701804787735e-06,
      "loss": 0.0154,
      "step": 1662540
    },
    {
      "epoch": 2.72081590437475,
      "grad_norm": 0.46129029989242554,
      "learning_rate": 4.588404288265256e-06,
      "loss": 0.0158,
      "step": 1662560
    },
    {
      "epoch": 2.7208486348134038,
      "grad_norm": 0.6159932613372803,
      "learning_rate": 4.588338396051739e-06,
      "loss": 0.0124,
      "step": 1662580
    },
    {
      "epoch": 2.7208813652520574,
      "grad_norm": 0.2921886742115021,
      "learning_rate": 4.588272503838222e-06,
      "loss": 0.0094,
      "step": 1662600
    },
    {
      "epoch": 2.7209140956907105,
      "grad_norm": 0.2924860417842865,
      "learning_rate": 4.5882066116247045e-06,
      "loss": 0.0136,
      "step": 1662620
    },
    {
      "epoch": 2.7209468261293637,
      "grad_norm": 0.13506612181663513,
      "learning_rate": 4.588140719411187e-06,
      "loss": 0.0161,
      "step": 1662640
    },
    {
      "epoch": 2.7209795565680173,
      "grad_norm": 0.2269740253686905,
      "learning_rate": 4.588074827197671e-06,
      "loss": 0.0138,
      "step": 1662660
    },
    {
      "epoch": 2.7210122870066704,
      "grad_norm": 0.19003325700759888,
      "learning_rate": 4.5880089349841536e-06,
      "loss": 0.0103,
      "step": 1662680
    },
    {
      "epoch": 2.7210450174453236,
      "grad_norm": 0.5325256586074829,
      "learning_rate": 4.587943042770636e-06,
      "loss": 0.0109,
      "step": 1662700
    },
    {
      "epoch": 2.721077747883977,
      "grad_norm": 0.38315579295158386,
      "learning_rate": 4.587877150557119e-06,
      "loss": 0.0089,
      "step": 1662720
    },
    {
      "epoch": 2.7211104783226308,
      "grad_norm": 0.35690268874168396,
      "learning_rate": 4.587811258343602e-06,
      "loss": 0.0115,
      "step": 1662740
    },
    {
      "epoch": 2.721143208761284,
      "grad_norm": 1.554551124572754,
      "learning_rate": 4.5877453661300845e-06,
      "loss": 0.0113,
      "step": 1662760
    },
    {
      "epoch": 2.721175939199937,
      "grad_norm": 0.5201061964035034,
      "learning_rate": 4.587679473916567e-06,
      "loss": 0.0114,
      "step": 1662780
    },
    {
      "epoch": 2.7212086696385906,
      "grad_norm": 0.24198830127716064,
      "learning_rate": 4.58761358170305e-06,
      "loss": 0.0121,
      "step": 1662800
    },
    {
      "epoch": 2.721241400077244,
      "grad_norm": 0.14543844759464264,
      "learning_rate": 4.587547689489534e-06,
      "loss": 0.011,
      "step": 1662820
    },
    {
      "epoch": 2.721274130515897,
      "grad_norm": 0.6557918190956116,
      "learning_rate": 4.587481797276016e-06,
      "loss": 0.0123,
      "step": 1662840
    },
    {
      "epoch": 2.7213068609545505,
      "grad_norm": 0.3396812379360199,
      "learning_rate": 4.587415905062499e-06,
      "loss": 0.0129,
      "step": 1662860
    },
    {
      "epoch": 2.7213395913932037,
      "grad_norm": 0.2500598132610321,
      "learning_rate": 4.587350012848982e-06,
      "loss": 0.0156,
      "step": 1662880
    },
    {
      "epoch": 2.7213723218318573,
      "grad_norm": 0.4478965997695923,
      "learning_rate": 4.587284120635465e-06,
      "loss": 0.0157,
      "step": 1662900
    },
    {
      "epoch": 2.7214050522705104,
      "grad_norm": 0.8907901644706726,
      "learning_rate": 4.587218228421948e-06,
      "loss": 0.0184,
      "step": 1662920
    },
    {
      "epoch": 2.721437782709164,
      "grad_norm": 1.5751785039901733,
      "learning_rate": 4.587152336208431e-06,
      "loss": 0.0084,
      "step": 1662940
    },
    {
      "epoch": 2.721470513147817,
      "grad_norm": 0.28345200419425964,
      "learning_rate": 4.587086443994914e-06,
      "loss": 0.0174,
      "step": 1662960
    },
    {
      "epoch": 2.7215032435864703,
      "grad_norm": 1.4647727012634277,
      "learning_rate": 4.587020551781396e-06,
      "loss": 0.0146,
      "step": 1662980
    },
    {
      "epoch": 2.721535974025124,
      "grad_norm": 0.25599509477615356,
      "learning_rate": 4.586954659567879e-06,
      "loss": 0.014,
      "step": 1663000
    },
    {
      "epoch": 2.721568704463777,
      "grad_norm": 0.10744630545377731,
      "learning_rate": 4.586888767354362e-06,
      "loss": 0.0096,
      "step": 1663020
    },
    {
      "epoch": 2.7216014349024307,
      "grad_norm": 0.6312958598136902,
      "learning_rate": 4.586822875140845e-06,
      "loss": 0.014,
      "step": 1663040
    },
    {
      "epoch": 2.721634165341084,
      "grad_norm": 0.3679393231868744,
      "learning_rate": 4.586756982927328e-06,
      "loss": 0.0111,
      "step": 1663060
    },
    {
      "epoch": 2.7216668957797374,
      "grad_norm": 0.27649447321891785,
      "learning_rate": 4.586691090713811e-06,
      "loss": 0.0117,
      "step": 1663080
    },
    {
      "epoch": 2.7216996262183906,
      "grad_norm": 0.5043407082557678,
      "learning_rate": 4.586625198500294e-06,
      "loss": 0.0162,
      "step": 1663100
    },
    {
      "epoch": 2.7217323566570437,
      "grad_norm": 0.5861632823944092,
      "learning_rate": 4.586559306286776e-06,
      "loss": 0.0157,
      "step": 1663120
    },
    {
      "epoch": 2.7217650870956973,
      "grad_norm": 0.7895311117172241,
      "learning_rate": 4.586493414073259e-06,
      "loss": 0.0145,
      "step": 1663140
    },
    {
      "epoch": 2.7217978175343505,
      "grad_norm": 0.16767598688602448,
      "learning_rate": 4.586427521859742e-06,
      "loss": 0.0155,
      "step": 1663160
    },
    {
      "epoch": 2.721830547973004,
      "grad_norm": 0.5409627556800842,
      "learning_rate": 4.586361629646225e-06,
      "loss": 0.0153,
      "step": 1663180
    },
    {
      "epoch": 2.721863278411657,
      "grad_norm": 0.5949559211730957,
      "learning_rate": 4.586295737432707e-06,
      "loss": 0.0159,
      "step": 1663200
    },
    {
      "epoch": 2.721896008850311,
      "grad_norm": 0.2642127573490143,
      "learning_rate": 4.586229845219191e-06,
      "loss": 0.0136,
      "step": 1663220
    },
    {
      "epoch": 2.721928739288964,
      "grad_norm": 0.4044846296310425,
      "learning_rate": 4.586163953005674e-06,
      "loss": 0.0131,
      "step": 1663240
    },
    {
      "epoch": 2.721961469727617,
      "grad_norm": 0.1541241705417633,
      "learning_rate": 4.586098060792156e-06,
      "loss": 0.0103,
      "step": 1663260
    },
    {
      "epoch": 2.7219942001662707,
      "grad_norm": 0.07075982540845871,
      "learning_rate": 4.58603216857864e-06,
      "loss": 0.0126,
      "step": 1663280
    },
    {
      "epoch": 2.722026930604924,
      "grad_norm": 0.8433787822723389,
      "learning_rate": 4.585966276365123e-06,
      "loss": 0.0092,
      "step": 1663300
    },
    {
      "epoch": 2.7220596610435774,
      "grad_norm": 0.24280622601509094,
      "learning_rate": 4.5859003841516055e-06,
      "loss": 0.0142,
      "step": 1663320
    },
    {
      "epoch": 2.7220923914822306,
      "grad_norm": 0.38570746779441833,
      "learning_rate": 4.585834491938088e-06,
      "loss": 0.0111,
      "step": 1663340
    },
    {
      "epoch": 2.722125121920884,
      "grad_norm": 0.23349910974502563,
      "learning_rate": 4.585768599724571e-06,
      "loss": 0.0166,
      "step": 1663360
    },
    {
      "epoch": 2.7221578523595373,
      "grad_norm": 0.13852061331272125,
      "learning_rate": 4.585702707511054e-06,
      "loss": 0.0209,
      "step": 1663380
    },
    {
      "epoch": 2.7221905827981905,
      "grad_norm": 0.8012794852256775,
      "learning_rate": 4.5856368152975364e-06,
      "loss": 0.0131,
      "step": 1663400
    },
    {
      "epoch": 2.722223313236844,
      "grad_norm": 0.20963092148303986,
      "learning_rate": 4.585570923084019e-06,
      "loss": 0.0164,
      "step": 1663420
    },
    {
      "epoch": 2.7222560436754972,
      "grad_norm": 0.24802841246128082,
      "learning_rate": 4.585505030870503e-06,
      "loss": 0.0114,
      "step": 1663440
    },
    {
      "epoch": 2.722288774114151,
      "grad_norm": 0.1799786388874054,
      "learning_rate": 4.5854391386569855e-06,
      "loss": 0.0158,
      "step": 1663460
    },
    {
      "epoch": 2.722321504552804,
      "grad_norm": 0.5072799921035767,
      "learning_rate": 4.585373246443468e-06,
      "loss": 0.0222,
      "step": 1663480
    },
    {
      "epoch": 2.7223542349914576,
      "grad_norm": 0.372132271528244,
      "learning_rate": 4.585307354229951e-06,
      "loss": 0.0122,
      "step": 1663500
    },
    {
      "epoch": 2.7223869654301107,
      "grad_norm": 0.371263712644577,
      "learning_rate": 4.585241462016434e-06,
      "loss": 0.0151,
      "step": 1663520
    },
    {
      "epoch": 2.722419695868764,
      "grad_norm": 0.11428195238113403,
      "learning_rate": 4.5851755698029165e-06,
      "loss": 0.0138,
      "step": 1663540
    },
    {
      "epoch": 2.7224524263074175,
      "grad_norm": 0.10787811875343323,
      "learning_rate": 4.585109677589399e-06,
      "loss": 0.0161,
      "step": 1663560
    },
    {
      "epoch": 2.7224851567460706,
      "grad_norm": 0.40208911895751953,
      "learning_rate": 4.585043785375882e-06,
      "loss": 0.017,
      "step": 1663580
    },
    {
      "epoch": 2.722517887184724,
      "grad_norm": 0.37771207094192505,
      "learning_rate": 4.5849778931623655e-06,
      "loss": 0.0155,
      "step": 1663600
    },
    {
      "epoch": 2.7225506176233774,
      "grad_norm": 0.27058812975883484,
      "learning_rate": 4.584912000948848e-06,
      "loss": 0.0166,
      "step": 1663620
    },
    {
      "epoch": 2.722583348062031,
      "grad_norm": 0.14007942378520966,
      "learning_rate": 4.584846108735331e-06,
      "loss": 0.0134,
      "step": 1663640
    },
    {
      "epoch": 2.722616078500684,
      "grad_norm": 0.3044993579387665,
      "learning_rate": 4.584780216521814e-06,
      "loss": 0.011,
      "step": 1663660
    },
    {
      "epoch": 2.7226488089393373,
      "grad_norm": 0.5757948756217957,
      "learning_rate": 4.584714324308297e-06,
      "loss": 0.0183,
      "step": 1663680
    },
    {
      "epoch": 2.722681539377991,
      "grad_norm": 0.1975855976343155,
      "learning_rate": 4.58464843209478e-06,
      "loss": 0.0213,
      "step": 1663700
    },
    {
      "epoch": 2.722714269816644,
      "grad_norm": 0.09196782857179642,
      "learning_rate": 4.584582539881263e-06,
      "loss": 0.0156,
      "step": 1663720
    },
    {
      "epoch": 2.722747000255297,
      "grad_norm": 0.48916715383529663,
      "learning_rate": 4.5845166476677455e-06,
      "loss": 0.0168,
      "step": 1663740
    },
    {
      "epoch": 2.7227797306939507,
      "grad_norm": 0.18221335113048553,
      "learning_rate": 4.584450755454228e-06,
      "loss": 0.0108,
      "step": 1663760
    },
    {
      "epoch": 2.7228124611326043,
      "grad_norm": 0.4290373623371124,
      "learning_rate": 4.584384863240711e-06,
      "loss": 0.0112,
      "step": 1663780
    },
    {
      "epoch": 2.7228451915712575,
      "grad_norm": 0.09525011479854584,
      "learning_rate": 4.584318971027194e-06,
      "loss": 0.0145,
      "step": 1663800
    },
    {
      "epoch": 2.7228779220099106,
      "grad_norm": 0.8161650896072388,
      "learning_rate": 4.5842530788136765e-06,
      "loss": 0.0164,
      "step": 1663820
    },
    {
      "epoch": 2.7229106524485642,
      "grad_norm": 0.20153354108333588,
      "learning_rate": 4.58418718660016e-06,
      "loss": 0.0176,
      "step": 1663840
    },
    {
      "epoch": 2.7229433828872174,
      "grad_norm": 0.2367461770772934,
      "learning_rate": 4.584121294386643e-06,
      "loss": 0.017,
      "step": 1663860
    },
    {
      "epoch": 2.7229761133258705,
      "grad_norm": 0.48805174231529236,
      "learning_rate": 4.5840554021731256e-06,
      "loss": 0.0169,
      "step": 1663880
    },
    {
      "epoch": 2.723008843764524,
      "grad_norm": 0.261830598115921,
      "learning_rate": 4.583989509959608e-06,
      "loss": 0.0149,
      "step": 1663900
    },
    {
      "epoch": 2.7230415742031777,
      "grad_norm": 0.23428817093372345,
      "learning_rate": 4.583923617746091e-06,
      "loss": 0.0097,
      "step": 1663920
    },
    {
      "epoch": 2.723074304641831,
      "grad_norm": 0.48005199432373047,
      "learning_rate": 4.583857725532575e-06,
      "loss": 0.0138,
      "step": 1663940
    },
    {
      "epoch": 2.723107035080484,
      "grad_norm": 0.30632483959198,
      "learning_rate": 4.583791833319057e-06,
      "loss": 0.0147,
      "step": 1663960
    },
    {
      "epoch": 2.7231397655191376,
      "grad_norm": 0.6248444318771362,
      "learning_rate": 4.58372594110554e-06,
      "loss": 0.0212,
      "step": 1663980
    },
    {
      "epoch": 2.7231724959577908,
      "grad_norm": 0.20328165590763092,
      "learning_rate": 4.583660048892023e-06,
      "loss": 0.0121,
      "step": 1664000
    },
    {
      "epoch": 2.723205226396444,
      "grad_norm": 0.7536045908927917,
      "learning_rate": 4.583594156678506e-06,
      "loss": 0.0125,
      "step": 1664020
    },
    {
      "epoch": 2.7232379568350975,
      "grad_norm": 0.14421097934246063,
      "learning_rate": 4.583528264464988e-06,
      "loss": 0.0139,
      "step": 1664040
    },
    {
      "epoch": 2.723270687273751,
      "grad_norm": 0.5536401271820068,
      "learning_rate": 4.583462372251472e-06,
      "loss": 0.0111,
      "step": 1664060
    },
    {
      "epoch": 2.7233034177124043,
      "grad_norm": 1.0315128564834595,
      "learning_rate": 4.583396480037955e-06,
      "loss": 0.0126,
      "step": 1664080
    },
    {
      "epoch": 2.7233361481510574,
      "grad_norm": 0.34213340282440186,
      "learning_rate": 4.583330587824437e-06,
      "loss": 0.0112,
      "step": 1664100
    },
    {
      "epoch": 2.723368878589711,
      "grad_norm": 0.0962110161781311,
      "learning_rate": 4.58326469561092e-06,
      "loss": 0.0163,
      "step": 1664120
    },
    {
      "epoch": 2.723401609028364,
      "grad_norm": 0.7141157984733582,
      "learning_rate": 4.583198803397403e-06,
      "loss": 0.0148,
      "step": 1664140
    },
    {
      "epoch": 2.7234343394670173,
      "grad_norm": 0.2856554090976715,
      "learning_rate": 4.583132911183886e-06,
      "loss": 0.0115,
      "step": 1664160
    },
    {
      "epoch": 2.723467069905671,
      "grad_norm": 0.16536352038383484,
      "learning_rate": 4.583067018970368e-06,
      "loss": 0.0206,
      "step": 1664180
    },
    {
      "epoch": 2.7234998003443245,
      "grad_norm": 0.15237513184547424,
      "learning_rate": 4.583001126756851e-06,
      "loss": 0.0142,
      "step": 1664200
    },
    {
      "epoch": 2.7235325307829776,
      "grad_norm": 0.22849678993225098,
      "learning_rate": 4.582935234543334e-06,
      "loss": 0.0187,
      "step": 1664220
    },
    {
      "epoch": 2.723565261221631,
      "grad_norm": 0.0996132344007492,
      "learning_rate": 4.5828693423298174e-06,
      "loss": 0.0239,
      "step": 1664240
    },
    {
      "epoch": 2.7235979916602844,
      "grad_norm": 0.06691643595695496,
      "learning_rate": 4.5828034501163e-06,
      "loss": 0.0178,
      "step": 1664260
    },
    {
      "epoch": 2.7236307220989375,
      "grad_norm": 0.38304251432418823,
      "learning_rate": 4.582737557902783e-06,
      "loss": 0.0211,
      "step": 1664280
    },
    {
      "epoch": 2.7236634525375907,
      "grad_norm": 0.39827975630760193,
      "learning_rate": 4.5826716656892665e-06,
      "loss": 0.0166,
      "step": 1664300
    },
    {
      "epoch": 2.7236961829762443,
      "grad_norm": 0.2672078013420105,
      "learning_rate": 4.582605773475749e-06,
      "loss": 0.0144,
      "step": 1664320
    },
    {
      "epoch": 2.7237289134148974,
      "grad_norm": 0.1518523097038269,
      "learning_rate": 4.582539881262232e-06,
      "loss": 0.0108,
      "step": 1664340
    },
    {
      "epoch": 2.723761643853551,
      "grad_norm": 0.47514426708221436,
      "learning_rate": 4.582473989048715e-06,
      "loss": 0.0221,
      "step": 1664360
    },
    {
      "epoch": 2.723794374292204,
      "grad_norm": 0.26818907260894775,
      "learning_rate": 4.5824080968351975e-06,
      "loss": 0.0158,
      "step": 1664380
    },
    {
      "epoch": 2.7238271047308578,
      "grad_norm": 0.185529887676239,
      "learning_rate": 4.58234220462168e-06,
      "loss": 0.0099,
      "step": 1664400
    },
    {
      "epoch": 2.723859835169511,
      "grad_norm": 0.11361824721097946,
      "learning_rate": 4.582276312408163e-06,
      "loss": 0.0189,
      "step": 1664420
    },
    {
      "epoch": 2.723892565608164,
      "grad_norm": 0.18377834558486938,
      "learning_rate": 4.582210420194646e-06,
      "loss": 0.0106,
      "step": 1664440
    },
    {
      "epoch": 2.7239252960468177,
      "grad_norm": 0.3811459243297577,
      "learning_rate": 4.582144527981129e-06,
      "loss": 0.0186,
      "step": 1664460
    },
    {
      "epoch": 2.723958026485471,
      "grad_norm": 0.1487194001674652,
      "learning_rate": 4.582078635767612e-06,
      "loss": 0.0103,
      "step": 1664480
    },
    {
      "epoch": 2.7239907569241244,
      "grad_norm": 0.29655057191848755,
      "learning_rate": 4.582012743554095e-06,
      "loss": 0.0107,
      "step": 1664500
    },
    {
      "epoch": 2.7240234873627776,
      "grad_norm": 0.31893816590309143,
      "learning_rate": 4.5819468513405775e-06,
      "loss": 0.0152,
      "step": 1664520
    },
    {
      "epoch": 2.724056217801431,
      "grad_norm": 0.2742130756378174,
      "learning_rate": 4.58188095912706e-06,
      "loss": 0.0185,
      "step": 1664540
    },
    {
      "epoch": 2.7240889482400843,
      "grad_norm": 0.44481417536735535,
      "learning_rate": 4.581815066913543e-06,
      "loss": 0.0112,
      "step": 1664560
    },
    {
      "epoch": 2.7241216786787374,
      "grad_norm": 0.24860717356204987,
      "learning_rate": 4.581749174700026e-06,
      "loss": 0.0118,
      "step": 1664580
    },
    {
      "epoch": 2.724154409117391,
      "grad_norm": 0.223890483379364,
      "learning_rate": 4.5816832824865084e-06,
      "loss": 0.0159,
      "step": 1664600
    },
    {
      "epoch": 2.724187139556044,
      "grad_norm": 0.1025506854057312,
      "learning_rate": 4.581617390272991e-06,
      "loss": 0.0139,
      "step": 1664620
    },
    {
      "epoch": 2.724219869994698,
      "grad_norm": 0.8173882961273193,
      "learning_rate": 4.581551498059475e-06,
      "loss": 0.0143,
      "step": 1664640
    },
    {
      "epoch": 2.724252600433351,
      "grad_norm": 0.3934353291988373,
      "learning_rate": 4.5814856058459575e-06,
      "loss": 0.0254,
      "step": 1664660
    },
    {
      "epoch": 2.7242853308720045,
      "grad_norm": 0.313563734292984,
      "learning_rate": 4.58141971363244e-06,
      "loss": 0.0092,
      "step": 1664680
    },
    {
      "epoch": 2.7243180613106577,
      "grad_norm": 0.3107142448425293,
      "learning_rate": 4.581353821418924e-06,
      "loss": 0.0116,
      "step": 1664700
    },
    {
      "epoch": 2.724350791749311,
      "grad_norm": 0.4745866358280182,
      "learning_rate": 4.5812879292054066e-06,
      "loss": 0.0151,
      "step": 1664720
    },
    {
      "epoch": 2.7243835221879644,
      "grad_norm": 0.3966319262981415,
      "learning_rate": 4.581222036991889e-06,
      "loss": 0.0142,
      "step": 1664740
    },
    {
      "epoch": 2.7244162526266176,
      "grad_norm": 0.5143370628356934,
      "learning_rate": 4.581156144778372e-06,
      "loss": 0.0133,
      "step": 1664760
    },
    {
      "epoch": 2.724448983065271,
      "grad_norm": 0.09898507595062256,
      "learning_rate": 4.581090252564855e-06,
      "loss": 0.0179,
      "step": 1664780
    },
    {
      "epoch": 2.7244817135039243,
      "grad_norm": 0.0722876563668251,
      "learning_rate": 4.5810243603513375e-06,
      "loss": 0.0071,
      "step": 1664800
    },
    {
      "epoch": 2.724514443942578,
      "grad_norm": 0.3504106402397156,
      "learning_rate": 4.58095846813782e-06,
      "loss": 0.0119,
      "step": 1664820
    },
    {
      "epoch": 2.724547174381231,
      "grad_norm": 0.2185761034488678,
      "learning_rate": 4.580892575924303e-06,
      "loss": 0.01,
      "step": 1664840
    },
    {
      "epoch": 2.724579904819884,
      "grad_norm": 0.2554950714111328,
      "learning_rate": 4.580826683710787e-06,
      "loss": 0.011,
      "step": 1664860
    },
    {
      "epoch": 2.724612635258538,
      "grad_norm": 0.3236157298088074,
      "learning_rate": 4.580760791497269e-06,
      "loss": 0.0097,
      "step": 1664880
    },
    {
      "epoch": 2.724645365697191,
      "grad_norm": 0.6124314069747925,
      "learning_rate": 4.580694899283752e-06,
      "loss": 0.0118,
      "step": 1664900
    },
    {
      "epoch": 2.7246780961358446,
      "grad_norm": 0.19222715497016907,
      "learning_rate": 4.580629007070235e-06,
      "loss": 0.0102,
      "step": 1664920
    },
    {
      "epoch": 2.7247108265744977,
      "grad_norm": 0.3798859119415283,
      "learning_rate": 4.5805631148567176e-06,
      "loss": 0.0088,
      "step": 1664940
    },
    {
      "epoch": 2.7247435570131513,
      "grad_norm": 0.24168120324611664,
      "learning_rate": 4.5804972226432e-06,
      "loss": 0.0131,
      "step": 1664960
    },
    {
      "epoch": 2.7247762874518044,
      "grad_norm": 0.15691806375980377,
      "learning_rate": 4.580431330429683e-06,
      "loss": 0.0197,
      "step": 1664980
    },
    {
      "epoch": 2.7248090178904576,
      "grad_norm": 0.2514950931072235,
      "learning_rate": 4.580365438216167e-06,
      "loss": 0.0132,
      "step": 1665000
    },
    {
      "epoch": 2.724841748329111,
      "grad_norm": 0.3640977144241333,
      "learning_rate": 4.580299546002649e-06,
      "loss": 0.0137,
      "step": 1665020
    },
    {
      "epoch": 2.7248744787677643,
      "grad_norm": 0.48600658774375916,
      "learning_rate": 4.580233653789132e-06,
      "loss": 0.024,
      "step": 1665040
    },
    {
      "epoch": 2.724907209206418,
      "grad_norm": 0.2221880704164505,
      "learning_rate": 4.580167761575615e-06,
      "loss": 0.0102,
      "step": 1665060
    },
    {
      "epoch": 2.724939939645071,
      "grad_norm": 0.9154570698738098,
      "learning_rate": 4.580101869362098e-06,
      "loss": 0.0101,
      "step": 1665080
    },
    {
      "epoch": 2.7249726700837247,
      "grad_norm": 0.45490723848342896,
      "learning_rate": 4.580035977148581e-06,
      "loss": 0.012,
      "step": 1665100
    },
    {
      "epoch": 2.725005400522378,
      "grad_norm": 0.3675602674484253,
      "learning_rate": 4.579970084935064e-06,
      "loss": 0.0147,
      "step": 1665120
    },
    {
      "epoch": 2.725038130961031,
      "grad_norm": 0.07523449510335922,
      "learning_rate": 4.579904192721547e-06,
      "loss": 0.0139,
      "step": 1665140
    },
    {
      "epoch": 2.7250708613996846,
      "grad_norm": 0.41506829857826233,
      "learning_rate": 4.579838300508029e-06,
      "loss": 0.0106,
      "step": 1665160
    },
    {
      "epoch": 2.7251035918383377,
      "grad_norm": 0.16192515194416046,
      "learning_rate": 4.579772408294512e-06,
      "loss": 0.0139,
      "step": 1665180
    },
    {
      "epoch": 2.7251363222769913,
      "grad_norm": 0.19943712651729584,
      "learning_rate": 4.579706516080995e-06,
      "loss": 0.0164,
      "step": 1665200
    },
    {
      "epoch": 2.7251690527156445,
      "grad_norm": 1.4007407426834106,
      "learning_rate": 4.579640623867478e-06,
      "loss": 0.0145,
      "step": 1665220
    },
    {
      "epoch": 2.725201783154298,
      "grad_norm": 0.18804746866226196,
      "learning_rate": 4.57957473165396e-06,
      "loss": 0.0185,
      "step": 1665240
    },
    {
      "epoch": 2.725234513592951,
      "grad_norm": 0.5422937870025635,
      "learning_rate": 4.579508839440444e-06,
      "loss": 0.0144,
      "step": 1665260
    },
    {
      "epoch": 2.7252672440316044,
      "grad_norm": 0.19513903558254242,
      "learning_rate": 4.579442947226927e-06,
      "loss": 0.0105,
      "step": 1665280
    },
    {
      "epoch": 2.725299974470258,
      "grad_norm": 0.43005576729774475,
      "learning_rate": 4.579377055013409e-06,
      "loss": 0.0128,
      "step": 1665300
    },
    {
      "epoch": 2.725332704908911,
      "grad_norm": 0.057803183794021606,
      "learning_rate": 4.579311162799892e-06,
      "loss": 0.0116,
      "step": 1665320
    },
    {
      "epoch": 2.7253654353475643,
      "grad_norm": 0.27625834941864014,
      "learning_rate": 4.579245270586375e-06,
      "loss": 0.0109,
      "step": 1665340
    },
    {
      "epoch": 2.725398165786218,
      "grad_norm": 0.1943497210741043,
      "learning_rate": 4.5791793783728585e-06,
      "loss": 0.0169,
      "step": 1665360
    },
    {
      "epoch": 2.7254308962248714,
      "grad_norm": 0.4579117000102997,
      "learning_rate": 4.579113486159341e-06,
      "loss": 0.0143,
      "step": 1665380
    },
    {
      "epoch": 2.7254636266635246,
      "grad_norm": 0.38386234641075134,
      "learning_rate": 4.579047593945824e-06,
      "loss": 0.0174,
      "step": 1665400
    },
    {
      "epoch": 2.7254963571021777,
      "grad_norm": 0.4797854423522949,
      "learning_rate": 4.578981701732307e-06,
      "loss": 0.0138,
      "step": 1665420
    },
    {
      "epoch": 2.7255290875408313,
      "grad_norm": 0.4259769320487976,
      "learning_rate": 4.5789158095187894e-06,
      "loss": 0.0149,
      "step": 1665440
    },
    {
      "epoch": 2.7255618179794845,
      "grad_norm": 0.3153154253959656,
      "learning_rate": 4.578849917305272e-06,
      "loss": 0.0133,
      "step": 1665460
    },
    {
      "epoch": 2.7255945484181376,
      "grad_norm": 0.26849058270454407,
      "learning_rate": 4.578784025091756e-06,
      "loss": 0.0175,
      "step": 1665480
    },
    {
      "epoch": 2.7256272788567912,
      "grad_norm": 0.37675940990448,
      "learning_rate": 4.5787181328782385e-06,
      "loss": 0.017,
      "step": 1665500
    },
    {
      "epoch": 2.725660009295445,
      "grad_norm": 0.07258003950119019,
      "learning_rate": 4.578652240664721e-06,
      "loss": 0.0154,
      "step": 1665520
    },
    {
      "epoch": 2.725692739734098,
      "grad_norm": 0.5547388195991516,
      "learning_rate": 4.578586348451204e-06,
      "loss": 0.0181,
      "step": 1665540
    },
    {
      "epoch": 2.725725470172751,
      "grad_norm": 0.17101594805717468,
      "learning_rate": 4.578520456237687e-06,
      "loss": 0.0106,
      "step": 1665560
    },
    {
      "epoch": 2.7257582006114047,
      "grad_norm": 0.4929583966732025,
      "learning_rate": 4.5784545640241695e-06,
      "loss": 0.0208,
      "step": 1665580
    },
    {
      "epoch": 2.725790931050058,
      "grad_norm": 0.2219441831111908,
      "learning_rate": 4.578388671810652e-06,
      "loss": 0.0129,
      "step": 1665600
    },
    {
      "epoch": 2.725823661488711,
      "grad_norm": 0.6046427488327026,
      "learning_rate": 4.578322779597135e-06,
      "loss": 0.0173,
      "step": 1665620
    },
    {
      "epoch": 2.7258563919273646,
      "grad_norm": 0.8423275351524353,
      "learning_rate": 4.578256887383618e-06,
      "loss": 0.01,
      "step": 1665640
    },
    {
      "epoch": 2.725889122366018,
      "grad_norm": 0.992638111114502,
      "learning_rate": 4.578190995170101e-06,
      "loss": 0.0108,
      "step": 1665660
    },
    {
      "epoch": 2.7259218528046714,
      "grad_norm": 0.9764003753662109,
      "learning_rate": 4.578125102956584e-06,
      "loss": 0.0177,
      "step": 1665680
    },
    {
      "epoch": 2.7259545832433245,
      "grad_norm": 0.31470558047294617,
      "learning_rate": 4.578059210743067e-06,
      "loss": 0.0207,
      "step": 1665700
    },
    {
      "epoch": 2.725987313681978,
      "grad_norm": 0.1485319882631302,
      "learning_rate": 4.57799331852955e-06,
      "loss": 0.0181,
      "step": 1665720
    },
    {
      "epoch": 2.7260200441206313,
      "grad_norm": 0.49797770380973816,
      "learning_rate": 4.577927426316033e-06,
      "loss": 0.0119,
      "step": 1665740
    },
    {
      "epoch": 2.7260527745592844,
      "grad_norm": 0.4698261022567749,
      "learning_rate": 4.577861534102516e-06,
      "loss": 0.0119,
      "step": 1665760
    },
    {
      "epoch": 2.726085504997938,
      "grad_norm": 0.6573948264122009,
      "learning_rate": 4.5777956418889986e-06,
      "loss": 0.0126,
      "step": 1665780
    },
    {
      "epoch": 2.7261182354365916,
      "grad_norm": 0.2755483090877533,
      "learning_rate": 4.577729749675481e-06,
      "loss": 0.0198,
      "step": 1665800
    },
    {
      "epoch": 2.7261509658752447,
      "grad_norm": 0.4357644319534302,
      "learning_rate": 4.577663857461964e-06,
      "loss": 0.0138,
      "step": 1665820
    },
    {
      "epoch": 2.726183696313898,
      "grad_norm": 0.37021175026893616,
      "learning_rate": 4.577597965248447e-06,
      "loss": 0.0118,
      "step": 1665840
    },
    {
      "epoch": 2.7262164267525515,
      "grad_norm": 0.19560560584068298,
      "learning_rate": 4.5775320730349295e-06,
      "loss": 0.0105,
      "step": 1665860
    },
    {
      "epoch": 2.7262491571912046,
      "grad_norm": 0.6423625349998474,
      "learning_rate": 4.577466180821413e-06,
      "loss": 0.0132,
      "step": 1665880
    },
    {
      "epoch": 2.726281887629858,
      "grad_norm": 0.46073365211486816,
      "learning_rate": 4.577400288607896e-06,
      "loss": 0.0163,
      "step": 1665900
    },
    {
      "epoch": 2.7263146180685114,
      "grad_norm": 0.5289650559425354,
      "learning_rate": 4.577334396394379e-06,
      "loss": 0.014,
      "step": 1665920
    },
    {
      "epoch": 2.7263473485071645,
      "grad_norm": 0.14176373183727264,
      "learning_rate": 4.577268504180861e-06,
      "loss": 0.0134,
      "step": 1665940
    },
    {
      "epoch": 2.726380078945818,
      "grad_norm": 0.1800912767648697,
      "learning_rate": 4.577202611967344e-06,
      "loss": 0.0154,
      "step": 1665960
    },
    {
      "epoch": 2.7264128093844713,
      "grad_norm": 0.5803561210632324,
      "learning_rate": 4.577136719753827e-06,
      "loss": 0.0169,
      "step": 1665980
    },
    {
      "epoch": 2.726445539823125,
      "grad_norm": 0.1473734974861145,
      "learning_rate": 4.5770708275403095e-06,
      "loss": 0.0145,
      "step": 1666000
    },
    {
      "epoch": 2.726478270261778,
      "grad_norm": 0.4587085545063019,
      "learning_rate": 4.577004935326792e-06,
      "loss": 0.013,
      "step": 1666020
    },
    {
      "epoch": 2.726511000700431,
      "grad_norm": 0.20877227187156677,
      "learning_rate": 4.576939043113275e-06,
      "loss": 0.0115,
      "step": 1666040
    },
    {
      "epoch": 2.7265437311390848,
      "grad_norm": 0.2917502820491791,
      "learning_rate": 4.576873150899759e-06,
      "loss": 0.0215,
      "step": 1666060
    },
    {
      "epoch": 2.726576461577738,
      "grad_norm": 0.20051710307598114,
      "learning_rate": 4.576807258686241e-06,
      "loss": 0.01,
      "step": 1666080
    },
    {
      "epoch": 2.7266091920163915,
      "grad_norm": 0.5948827862739563,
      "learning_rate": 4.576741366472724e-06,
      "loss": 0.0116,
      "step": 1666100
    },
    {
      "epoch": 2.7266419224550447,
      "grad_norm": 0.509524941444397,
      "learning_rate": 4.576675474259208e-06,
      "loss": 0.0148,
      "step": 1666120
    },
    {
      "epoch": 2.7266746528936983,
      "grad_norm": 0.3936397433280945,
      "learning_rate": 4.57660958204569e-06,
      "loss": 0.0138,
      "step": 1666140
    },
    {
      "epoch": 2.7267073833323514,
      "grad_norm": 0.6955968141555786,
      "learning_rate": 4.576543689832173e-06,
      "loss": 0.0148,
      "step": 1666160
    },
    {
      "epoch": 2.7267401137710046,
      "grad_norm": 0.571099042892456,
      "learning_rate": 4.576477797618656e-06,
      "loss": 0.0083,
      "step": 1666180
    },
    {
      "epoch": 2.726772844209658,
      "grad_norm": 0.271961510181427,
      "learning_rate": 4.576411905405139e-06,
      "loss": 0.0109,
      "step": 1666200
    },
    {
      "epoch": 2.7268055746483113,
      "grad_norm": 0.5373695492744446,
      "learning_rate": 4.576346013191621e-06,
      "loss": 0.0137,
      "step": 1666220
    },
    {
      "epoch": 2.726838305086965,
      "grad_norm": 0.40008485317230225,
      "learning_rate": 4.576280120978104e-06,
      "loss": 0.018,
      "step": 1666240
    },
    {
      "epoch": 2.726871035525618,
      "grad_norm": 0.3678933084011078,
      "learning_rate": 4.576214228764587e-06,
      "loss": 0.0126,
      "step": 1666260
    },
    {
      "epoch": 2.7269037659642716,
      "grad_norm": 0.20571982860565186,
      "learning_rate": 4.5761483365510704e-06,
      "loss": 0.014,
      "step": 1666280
    },
    {
      "epoch": 2.726936496402925,
      "grad_norm": 0.23927174508571625,
      "learning_rate": 4.576082444337553e-06,
      "loss": 0.0132,
      "step": 1666300
    },
    {
      "epoch": 2.726969226841578,
      "grad_norm": 0.27333781123161316,
      "learning_rate": 4.576016552124036e-06,
      "loss": 0.012,
      "step": 1666320
    },
    {
      "epoch": 2.7270019572802315,
      "grad_norm": 0.7060815691947937,
      "learning_rate": 4.575950659910519e-06,
      "loss": 0.0189,
      "step": 1666340
    },
    {
      "epoch": 2.7270346877188847,
      "grad_norm": 0.3301982581615448,
      "learning_rate": 4.575884767697001e-06,
      "loss": 0.0166,
      "step": 1666360
    },
    {
      "epoch": 2.7270674181575383,
      "grad_norm": 0.6028242111206055,
      "learning_rate": 4.575818875483484e-06,
      "loss": 0.013,
      "step": 1666380
    },
    {
      "epoch": 2.7271001485961914,
      "grad_norm": 0.16149602830410004,
      "learning_rate": 4.575752983269967e-06,
      "loss": 0.0131,
      "step": 1666400
    },
    {
      "epoch": 2.727132879034845,
      "grad_norm": 0.4153786599636078,
      "learning_rate": 4.5756870910564505e-06,
      "loss": 0.0151,
      "step": 1666420
    },
    {
      "epoch": 2.727165609473498,
      "grad_norm": 0.6279217004776001,
      "learning_rate": 4.575621198842933e-06,
      "loss": 0.0119,
      "step": 1666440
    },
    {
      "epoch": 2.7271983399121513,
      "grad_norm": 0.12514536082744598,
      "learning_rate": 4.575555306629416e-06,
      "loss": 0.0115,
      "step": 1666460
    },
    {
      "epoch": 2.727231070350805,
      "grad_norm": 0.2580532133579254,
      "learning_rate": 4.575489414415899e-06,
      "loss": 0.0142,
      "step": 1666480
    },
    {
      "epoch": 2.727263800789458,
      "grad_norm": 0.1069699302315712,
      "learning_rate": 4.575423522202381e-06,
      "loss": 0.012,
      "step": 1666500
    },
    {
      "epoch": 2.7272965312281117,
      "grad_norm": 0.0928015485405922,
      "learning_rate": 4.575357629988865e-06,
      "loss": 0.0126,
      "step": 1666520
    },
    {
      "epoch": 2.727329261666765,
      "grad_norm": 0.3517664074897766,
      "learning_rate": 4.575291737775348e-06,
      "loss": 0.0125,
      "step": 1666540
    },
    {
      "epoch": 2.7273619921054184,
      "grad_norm": 0.4475000500679016,
      "learning_rate": 4.5752258455618305e-06,
      "loss": 0.0115,
      "step": 1666560
    },
    {
      "epoch": 2.7273947225440716,
      "grad_norm": 0.3273828327655792,
      "learning_rate": 4.575159953348313e-06,
      "loss": 0.0183,
      "step": 1666580
    },
    {
      "epoch": 2.7274274529827247,
      "grad_norm": 0.14445790648460388,
      "learning_rate": 4.575094061134796e-06,
      "loss": 0.0143,
      "step": 1666600
    },
    {
      "epoch": 2.7274601834213783,
      "grad_norm": 0.17133338749408722,
      "learning_rate": 4.575028168921279e-06,
      "loss": 0.0102,
      "step": 1666620
    },
    {
      "epoch": 2.7274929138600315,
      "grad_norm": 0.08071311563253403,
      "learning_rate": 4.5749622767077614e-06,
      "loss": 0.0105,
      "step": 1666640
    },
    {
      "epoch": 2.727525644298685,
      "grad_norm": 0.30577248334884644,
      "learning_rate": 4.574896384494244e-06,
      "loss": 0.0122,
      "step": 1666660
    },
    {
      "epoch": 2.727558374737338,
      "grad_norm": 0.14432752132415771,
      "learning_rate": 4.574830492280728e-06,
      "loss": 0.0146,
      "step": 1666680
    },
    {
      "epoch": 2.727591105175992,
      "grad_norm": 0.3621680438518524,
      "learning_rate": 4.5747646000672105e-06,
      "loss": 0.019,
      "step": 1666700
    },
    {
      "epoch": 2.727623835614645,
      "grad_norm": 0.3882715404033661,
      "learning_rate": 4.574698707853693e-06,
      "loss": 0.0109,
      "step": 1666720
    },
    {
      "epoch": 2.727656566053298,
      "grad_norm": 0.2620431184768677,
      "learning_rate": 4.574632815640176e-06,
      "loss": 0.0139,
      "step": 1666740
    },
    {
      "epoch": 2.7276892964919517,
      "grad_norm": 0.12532591819763184,
      "learning_rate": 4.57456692342666e-06,
      "loss": 0.0088,
      "step": 1666760
    },
    {
      "epoch": 2.727722026930605,
      "grad_norm": 0.2544850707054138,
      "learning_rate": 4.574501031213142e-06,
      "loss": 0.0135,
      "step": 1666780
    },
    {
      "epoch": 2.727754757369258,
      "grad_norm": 0.1559005081653595,
      "learning_rate": 4.574435138999625e-06,
      "loss": 0.0162,
      "step": 1666800
    },
    {
      "epoch": 2.7277874878079116,
      "grad_norm": 0.32253557443618774,
      "learning_rate": 4.574369246786108e-06,
      "loss": 0.0211,
      "step": 1666820
    },
    {
      "epoch": 2.727820218246565,
      "grad_norm": 0.2580523192882538,
      "learning_rate": 4.5743033545725905e-06,
      "loss": 0.0088,
      "step": 1666840
    },
    {
      "epoch": 2.7278529486852183,
      "grad_norm": 0.321537584066391,
      "learning_rate": 4.574237462359073e-06,
      "loss": 0.0166,
      "step": 1666860
    },
    {
      "epoch": 2.7278856791238715,
      "grad_norm": 0.15698961913585663,
      "learning_rate": 4.574171570145556e-06,
      "loss": 0.0139,
      "step": 1666880
    },
    {
      "epoch": 2.727918409562525,
      "grad_norm": 0.6687123775482178,
      "learning_rate": 4.57410567793204e-06,
      "loss": 0.0157,
      "step": 1666900
    },
    {
      "epoch": 2.727951140001178,
      "grad_norm": 0.14220090210437775,
      "learning_rate": 4.574039785718522e-06,
      "loss": 0.0084,
      "step": 1666920
    },
    {
      "epoch": 2.7279838704398314,
      "grad_norm": 0.26209700107574463,
      "learning_rate": 4.573973893505005e-06,
      "loss": 0.0151,
      "step": 1666940
    },
    {
      "epoch": 2.728016600878485,
      "grad_norm": 0.24954433739185333,
      "learning_rate": 4.573908001291488e-06,
      "loss": 0.0139,
      "step": 1666960
    },
    {
      "epoch": 2.7280493313171386,
      "grad_norm": 1.5305715799331665,
      "learning_rate": 4.5738421090779706e-06,
      "loss": 0.018,
      "step": 1666980
    },
    {
      "epoch": 2.7280820617557917,
      "grad_norm": 0.325807124376297,
      "learning_rate": 4.573776216864453e-06,
      "loss": 0.0137,
      "step": 1667000
    },
    {
      "epoch": 2.728114792194445,
      "grad_norm": 0.1191849634051323,
      "learning_rate": 4.573710324650936e-06,
      "loss": 0.015,
      "step": 1667020
    },
    {
      "epoch": 2.7281475226330985,
      "grad_norm": 0.5547603368759155,
      "learning_rate": 4.573644432437419e-06,
      "loss": 0.0161,
      "step": 1667040
    },
    {
      "epoch": 2.7281802530717516,
      "grad_norm": 0.40442007780075073,
      "learning_rate": 4.5735785402239015e-06,
      "loss": 0.0136,
      "step": 1667060
    },
    {
      "epoch": 2.7282129835104048,
      "grad_norm": 0.13038118183612823,
      "learning_rate": 4.573512648010385e-06,
      "loss": 0.0224,
      "step": 1667080
    },
    {
      "epoch": 2.7282457139490583,
      "grad_norm": 0.6222299337387085,
      "learning_rate": 4.573446755796868e-06,
      "loss": 0.0213,
      "step": 1667100
    },
    {
      "epoch": 2.728278444387712,
      "grad_norm": 0.15934322774410248,
      "learning_rate": 4.573380863583351e-06,
      "loss": 0.0167,
      "step": 1667120
    },
    {
      "epoch": 2.728311174826365,
      "grad_norm": 0.37862327694892883,
      "learning_rate": 4.573314971369834e-06,
      "loss": 0.0156,
      "step": 1667140
    },
    {
      "epoch": 2.7283439052650182,
      "grad_norm": 0.539448082447052,
      "learning_rate": 4.573249079156317e-06,
      "loss": 0.0128,
      "step": 1667160
    },
    {
      "epoch": 2.728376635703672,
      "grad_norm": 0.5383533239364624,
      "learning_rate": 4.5731831869428e-06,
      "loss": 0.0191,
      "step": 1667180
    },
    {
      "epoch": 2.728409366142325,
      "grad_norm": 0.1730869710445404,
      "learning_rate": 4.573117294729282e-06,
      "loss": 0.011,
      "step": 1667200
    },
    {
      "epoch": 2.728442096580978,
      "grad_norm": 0.08419975638389587,
      "learning_rate": 4.573051402515765e-06,
      "loss": 0.0208,
      "step": 1667220
    },
    {
      "epoch": 2.7284748270196317,
      "grad_norm": 0.0930667296051979,
      "learning_rate": 4.572985510302248e-06,
      "loss": 0.0138,
      "step": 1667240
    },
    {
      "epoch": 2.7285075574582853,
      "grad_norm": 0.42129284143447876,
      "learning_rate": 4.572919618088731e-06,
      "loss": 0.0115,
      "step": 1667260
    },
    {
      "epoch": 2.7285402878969385,
      "grad_norm": 0.08157259970903397,
      "learning_rate": 4.572853725875213e-06,
      "loss": 0.0159,
      "step": 1667280
    },
    {
      "epoch": 2.7285730183355916,
      "grad_norm": 0.11393159627914429,
      "learning_rate": 4.572787833661697e-06,
      "loss": 0.0184,
      "step": 1667300
    },
    {
      "epoch": 2.728605748774245,
      "grad_norm": 0.3743521273136139,
      "learning_rate": 4.57272194144818e-06,
      "loss": 0.0167,
      "step": 1667320
    },
    {
      "epoch": 2.7286384792128984,
      "grad_norm": 0.3355010449886322,
      "learning_rate": 4.572656049234662e-06,
      "loss": 0.0178,
      "step": 1667340
    },
    {
      "epoch": 2.7286712096515515,
      "grad_norm": 0.3447244465351105,
      "learning_rate": 4.572590157021145e-06,
      "loss": 0.0161,
      "step": 1667360
    },
    {
      "epoch": 2.728703940090205,
      "grad_norm": 0.20336806774139404,
      "learning_rate": 4.572524264807628e-06,
      "loss": 0.0127,
      "step": 1667380
    },
    {
      "epoch": 2.7287366705288583,
      "grad_norm": 0.259406715631485,
      "learning_rate": 4.572458372594111e-06,
      "loss": 0.0116,
      "step": 1667400
    },
    {
      "epoch": 2.728769400967512,
      "grad_norm": 0.3382081389427185,
      "learning_rate": 4.572392480380593e-06,
      "loss": 0.015,
      "step": 1667420
    },
    {
      "epoch": 2.728802131406165,
      "grad_norm": 0.2540108561515808,
      "learning_rate": 4.572326588167076e-06,
      "loss": 0.0146,
      "step": 1667440
    },
    {
      "epoch": 2.7288348618448186,
      "grad_norm": 0.14111627638339996,
      "learning_rate": 4.57226069595356e-06,
      "loss": 0.0182,
      "step": 1667460
    },
    {
      "epoch": 2.7288675922834718,
      "grad_norm": 0.2647261619567871,
      "learning_rate": 4.5721948037400424e-06,
      "loss": 0.0127,
      "step": 1667480
    },
    {
      "epoch": 2.728900322722125,
      "grad_norm": 0.04568730667233467,
      "learning_rate": 4.572128911526525e-06,
      "loss": 0.019,
      "step": 1667500
    },
    {
      "epoch": 2.7289330531607785,
      "grad_norm": 0.3683417737483978,
      "learning_rate": 4.572063019313008e-06,
      "loss": 0.0218,
      "step": 1667520
    },
    {
      "epoch": 2.7289657835994316,
      "grad_norm": 0.10185996443033218,
      "learning_rate": 4.5719971270994915e-06,
      "loss": 0.0118,
      "step": 1667540
    },
    {
      "epoch": 2.7289985140380852,
      "grad_norm": 0.21649274230003357,
      "learning_rate": 4.571931234885974e-06,
      "loss": 0.0181,
      "step": 1667560
    },
    {
      "epoch": 2.7290312444767384,
      "grad_norm": 0.9576385617256165,
      "learning_rate": 4.571865342672457e-06,
      "loss": 0.0138,
      "step": 1667580
    },
    {
      "epoch": 2.729063974915392,
      "grad_norm": 0.12371526658535004,
      "learning_rate": 4.57179945045894e-06,
      "loss": 0.0121,
      "step": 1667600
    },
    {
      "epoch": 2.729096705354045,
      "grad_norm": 0.12387910485267639,
      "learning_rate": 4.5717335582454225e-06,
      "loss": 0.011,
      "step": 1667620
    },
    {
      "epoch": 2.7291294357926983,
      "grad_norm": 0.3306712210178375,
      "learning_rate": 4.571667666031905e-06,
      "loss": 0.0142,
      "step": 1667640
    },
    {
      "epoch": 2.729162166231352,
      "grad_norm": 0.3626790940761566,
      "learning_rate": 4.571601773818388e-06,
      "loss": 0.0162,
      "step": 1667660
    },
    {
      "epoch": 2.729194896670005,
      "grad_norm": 0.7512821555137634,
      "learning_rate": 4.571535881604871e-06,
      "loss": 0.01,
      "step": 1667680
    },
    {
      "epoch": 2.7292276271086586,
      "grad_norm": 0.674755871295929,
      "learning_rate": 4.571469989391354e-06,
      "loss": 0.0117,
      "step": 1667700
    },
    {
      "epoch": 2.7292603575473118,
      "grad_norm": 0.07780705392360687,
      "learning_rate": 4.571404097177837e-06,
      "loss": 0.0105,
      "step": 1667720
    },
    {
      "epoch": 2.7292930879859654,
      "grad_norm": 0.4491012394428253,
      "learning_rate": 4.57133820496432e-06,
      "loss": 0.0139,
      "step": 1667740
    },
    {
      "epoch": 2.7293258184246185,
      "grad_norm": 0.2550455331802368,
      "learning_rate": 4.5712723127508025e-06,
      "loss": 0.0127,
      "step": 1667760
    },
    {
      "epoch": 2.7293585488632717,
      "grad_norm": 0.6294836401939392,
      "learning_rate": 4.571206420537285e-06,
      "loss": 0.0155,
      "step": 1667780
    },
    {
      "epoch": 2.7293912793019253,
      "grad_norm": 0.45655953884124756,
      "learning_rate": 4.571140528323768e-06,
      "loss": 0.0151,
      "step": 1667800
    },
    {
      "epoch": 2.7294240097405784,
      "grad_norm": 0.37689268589019775,
      "learning_rate": 4.5710746361102516e-06,
      "loss": 0.0169,
      "step": 1667820
    },
    {
      "epoch": 2.729456740179232,
      "grad_norm": 0.41567766666412354,
      "learning_rate": 4.571008743896734e-06,
      "loss": 0.0187,
      "step": 1667840
    },
    {
      "epoch": 2.729489470617885,
      "grad_norm": 0.46205300092697144,
      "learning_rate": 4.570942851683217e-06,
      "loss": 0.011,
      "step": 1667860
    },
    {
      "epoch": 2.7295222010565388,
      "grad_norm": 0.41324642300605774,
      "learning_rate": 4.5708769594697e-06,
      "loss": 0.0124,
      "step": 1667880
    },
    {
      "epoch": 2.729554931495192,
      "grad_norm": 0.5739789009094238,
      "learning_rate": 4.5708110672561825e-06,
      "loss": 0.0172,
      "step": 1667900
    },
    {
      "epoch": 2.729587661933845,
      "grad_norm": 0.1474066972732544,
      "learning_rate": 4.570745175042666e-06,
      "loss": 0.0191,
      "step": 1667920
    },
    {
      "epoch": 2.7296203923724986,
      "grad_norm": 0.23492950201034546,
      "learning_rate": 4.570679282829149e-06,
      "loss": 0.0094,
      "step": 1667940
    },
    {
      "epoch": 2.729653122811152,
      "grad_norm": 0.6406270265579224,
      "learning_rate": 4.570613390615632e-06,
      "loss": 0.0137,
      "step": 1667960
    },
    {
      "epoch": 2.7296858532498054,
      "grad_norm": 2.115123987197876,
      "learning_rate": 4.570547498402114e-06,
      "loss": 0.0219,
      "step": 1667980
    },
    {
      "epoch": 2.7297185836884585,
      "grad_norm": 0.5158718824386597,
      "learning_rate": 4.570481606188597e-06,
      "loss": 0.0147,
      "step": 1668000
    },
    {
      "epoch": 2.729751314127112,
      "grad_norm": 0.42343854904174805,
      "learning_rate": 4.57041571397508e-06,
      "loss": 0.0115,
      "step": 1668020
    },
    {
      "epoch": 2.7297840445657653,
      "grad_norm": 0.14026093482971191,
      "learning_rate": 4.5703498217615625e-06,
      "loss": 0.0101,
      "step": 1668040
    },
    {
      "epoch": 2.7298167750044184,
      "grad_norm": 0.29350027441978455,
      "learning_rate": 4.570283929548045e-06,
      "loss": 0.0109,
      "step": 1668060
    },
    {
      "epoch": 2.729849505443072,
      "grad_norm": 0.10499266535043716,
      "learning_rate": 4.570218037334528e-06,
      "loss": 0.0117,
      "step": 1668080
    },
    {
      "epoch": 2.729882235881725,
      "grad_norm": 0.6583012342453003,
      "learning_rate": 4.570152145121012e-06,
      "loss": 0.0164,
      "step": 1668100
    },
    {
      "epoch": 2.7299149663203788,
      "grad_norm": 0.17100054025650024,
      "learning_rate": 4.570086252907494e-06,
      "loss": 0.0218,
      "step": 1668120
    },
    {
      "epoch": 2.729947696759032,
      "grad_norm": 0.3402368724346161,
      "learning_rate": 4.570020360693977e-06,
      "loss": 0.0206,
      "step": 1668140
    },
    {
      "epoch": 2.7299804271976855,
      "grad_norm": 0.3006207048892975,
      "learning_rate": 4.569954468480461e-06,
      "loss": 0.0153,
      "step": 1668160
    },
    {
      "epoch": 2.7300131576363387,
      "grad_norm": 0.11238501965999603,
      "learning_rate": 4.569888576266943e-06,
      "loss": 0.0211,
      "step": 1668180
    },
    {
      "epoch": 2.730045888074992,
      "grad_norm": 1.2729699611663818,
      "learning_rate": 4.569822684053426e-06,
      "loss": 0.0135,
      "step": 1668200
    },
    {
      "epoch": 2.7300786185136454,
      "grad_norm": 0.13483409583568573,
      "learning_rate": 4.569756791839909e-06,
      "loss": 0.0077,
      "step": 1668220
    },
    {
      "epoch": 2.7301113489522986,
      "grad_norm": 0.4561970829963684,
      "learning_rate": 4.569690899626392e-06,
      "loss": 0.0126,
      "step": 1668240
    },
    {
      "epoch": 2.730144079390952,
      "grad_norm": 0.3271411061286926,
      "learning_rate": 4.569625007412874e-06,
      "loss": 0.013,
      "step": 1668260
    },
    {
      "epoch": 2.7301768098296053,
      "grad_norm": 0.1568472534418106,
      "learning_rate": 4.569559115199357e-06,
      "loss": 0.0087,
      "step": 1668280
    },
    {
      "epoch": 2.730209540268259,
      "grad_norm": 0.16512788832187653,
      "learning_rate": 4.56949322298584e-06,
      "loss": 0.0109,
      "step": 1668300
    },
    {
      "epoch": 2.730242270706912,
      "grad_norm": 0.30149760842323303,
      "learning_rate": 4.5694273307723234e-06,
      "loss": 0.0109,
      "step": 1668320
    },
    {
      "epoch": 2.730275001145565,
      "grad_norm": 0.3505147695541382,
      "learning_rate": 4.569361438558806e-06,
      "loss": 0.0136,
      "step": 1668340
    },
    {
      "epoch": 2.730307731584219,
      "grad_norm": 0.41297441720962524,
      "learning_rate": 4.569295546345289e-06,
      "loss": 0.0119,
      "step": 1668360
    },
    {
      "epoch": 2.730340462022872,
      "grad_norm": 0.2949470281600952,
      "learning_rate": 4.569229654131772e-06,
      "loss": 0.0152,
      "step": 1668380
    },
    {
      "epoch": 2.730373192461525,
      "grad_norm": 0.2891760766506195,
      "learning_rate": 4.569163761918254e-06,
      "loss": 0.0107,
      "step": 1668400
    },
    {
      "epoch": 2.7304059229001787,
      "grad_norm": 0.3503069281578064,
      "learning_rate": 4.569097869704737e-06,
      "loss": 0.0139,
      "step": 1668420
    },
    {
      "epoch": 2.7304386533388323,
      "grad_norm": 0.6954478025436401,
      "learning_rate": 4.56903197749122e-06,
      "loss": 0.0184,
      "step": 1668440
    },
    {
      "epoch": 2.7304713837774854,
      "grad_norm": 0.1436637043952942,
      "learning_rate": 4.568966085277703e-06,
      "loss": 0.0137,
      "step": 1668460
    },
    {
      "epoch": 2.7305041142161386,
      "grad_norm": 0.3878895342350006,
      "learning_rate": 4.568900193064185e-06,
      "loss": 0.0129,
      "step": 1668480
    },
    {
      "epoch": 2.730536844654792,
      "grad_norm": 2.491366147994995,
      "learning_rate": 4.568834300850669e-06,
      "loss": 0.0172,
      "step": 1668500
    },
    {
      "epoch": 2.7305695750934453,
      "grad_norm": 0.76151442527771,
      "learning_rate": 4.568768408637152e-06,
      "loss": 0.0152,
      "step": 1668520
    },
    {
      "epoch": 2.7306023055320985,
      "grad_norm": 0.19313442707061768,
      "learning_rate": 4.5687025164236344e-06,
      "loss": 0.0151,
      "step": 1668540
    },
    {
      "epoch": 2.730635035970752,
      "grad_norm": 0.27433526515960693,
      "learning_rate": 4.568636624210118e-06,
      "loss": 0.0206,
      "step": 1668560
    },
    {
      "epoch": 2.7306677664094057,
      "grad_norm": 0.17545098066329956,
      "learning_rate": 4.568570731996601e-06,
      "loss": 0.0105,
      "step": 1668580
    },
    {
      "epoch": 2.730700496848059,
      "grad_norm": 0.8940134644508362,
      "learning_rate": 4.5685048397830835e-06,
      "loss": 0.0138,
      "step": 1668600
    },
    {
      "epoch": 2.730733227286712,
      "grad_norm": 0.36444738507270813,
      "learning_rate": 4.568438947569566e-06,
      "loss": 0.0104,
      "step": 1668620
    },
    {
      "epoch": 2.7307659577253656,
      "grad_norm": 0.8312705159187317,
      "learning_rate": 4.568373055356049e-06,
      "loss": 0.0144,
      "step": 1668640
    },
    {
      "epoch": 2.7307986881640187,
      "grad_norm": 0.10545815527439117,
      "learning_rate": 4.568307163142532e-06,
      "loss": 0.0106,
      "step": 1668660
    },
    {
      "epoch": 2.730831418602672,
      "grad_norm": 0.23493078351020813,
      "learning_rate": 4.5682412709290145e-06,
      "loss": 0.0108,
      "step": 1668680
    },
    {
      "epoch": 2.7308641490413255,
      "grad_norm": 0.2934820055961609,
      "learning_rate": 4.568175378715497e-06,
      "loss": 0.0123,
      "step": 1668700
    },
    {
      "epoch": 2.730896879479979,
      "grad_norm": 0.6352644562721252,
      "learning_rate": 4.568109486501981e-06,
      "loss": 0.0112,
      "step": 1668720
    },
    {
      "epoch": 2.730929609918632,
      "grad_norm": 0.22948725521564484,
      "learning_rate": 4.5680435942884635e-06,
      "loss": 0.0107,
      "step": 1668740
    },
    {
      "epoch": 2.7309623403572854,
      "grad_norm": 0.08283958584070206,
      "learning_rate": 4.567977702074946e-06,
      "loss": 0.0109,
      "step": 1668760
    },
    {
      "epoch": 2.730995070795939,
      "grad_norm": 0.7505985498428345,
      "learning_rate": 4.567911809861429e-06,
      "loss": 0.0174,
      "step": 1668780
    },
    {
      "epoch": 2.731027801234592,
      "grad_norm": 0.4810255765914917,
      "learning_rate": 4.567845917647912e-06,
      "loss": 0.0181,
      "step": 1668800
    },
    {
      "epoch": 2.7310605316732453,
      "grad_norm": 0.772810161113739,
      "learning_rate": 4.5677800254343945e-06,
      "loss": 0.0178,
      "step": 1668820
    },
    {
      "epoch": 2.731093262111899,
      "grad_norm": 0.3216798007488251,
      "learning_rate": 4.567714133220877e-06,
      "loss": 0.0168,
      "step": 1668840
    },
    {
      "epoch": 2.7311259925505524,
      "grad_norm": 0.16926877200603485,
      "learning_rate": 4.56764824100736e-06,
      "loss": 0.0134,
      "step": 1668860
    },
    {
      "epoch": 2.7311587229892056,
      "grad_norm": 0.502705991268158,
      "learning_rate": 4.5675823487938435e-06,
      "loss": 0.0135,
      "step": 1668880
    },
    {
      "epoch": 2.7311914534278587,
      "grad_norm": 0.53682541847229,
      "learning_rate": 4.567516456580326e-06,
      "loss": 0.0178,
      "step": 1668900
    },
    {
      "epoch": 2.7312241838665123,
      "grad_norm": 0.1412515789270401,
      "learning_rate": 4.567450564366809e-06,
      "loss": 0.0116,
      "step": 1668920
    },
    {
      "epoch": 2.7312569143051655,
      "grad_norm": 0.29572391510009766,
      "learning_rate": 4.567384672153292e-06,
      "loss": 0.0103,
      "step": 1668940
    },
    {
      "epoch": 2.7312896447438186,
      "grad_norm": 0.12442275881767273,
      "learning_rate": 4.567318779939775e-06,
      "loss": 0.0184,
      "step": 1668960
    },
    {
      "epoch": 2.7313223751824722,
      "grad_norm": 1.1626216173171997,
      "learning_rate": 4.567252887726258e-06,
      "loss": 0.0132,
      "step": 1668980
    },
    {
      "epoch": 2.7313551056211254,
      "grad_norm": 0.3270966708660126,
      "learning_rate": 4.567186995512741e-06,
      "loss": 0.0113,
      "step": 1669000
    },
    {
      "epoch": 2.731387836059779,
      "grad_norm": 0.329733669757843,
      "learning_rate": 4.5671211032992236e-06,
      "loss": 0.0195,
      "step": 1669020
    },
    {
      "epoch": 2.731420566498432,
      "grad_norm": 0.24939903616905212,
      "learning_rate": 4.567055211085706e-06,
      "loss": 0.0168,
      "step": 1669040
    },
    {
      "epoch": 2.7314532969370857,
      "grad_norm": 0.4351133108139038,
      "learning_rate": 4.566989318872189e-06,
      "loss": 0.0138,
      "step": 1669060
    },
    {
      "epoch": 2.731486027375739,
      "grad_norm": 0.17333374917507172,
      "learning_rate": 4.566923426658672e-06,
      "loss": 0.0142,
      "step": 1669080
    },
    {
      "epoch": 2.731518757814392,
      "grad_norm": 0.26645371317863464,
      "learning_rate": 4.5668575344451545e-06,
      "loss": 0.013,
      "step": 1669100
    },
    {
      "epoch": 2.7315514882530456,
      "grad_norm": 0.33438533544540405,
      "learning_rate": 4.566791642231638e-06,
      "loss": 0.0128,
      "step": 1669120
    },
    {
      "epoch": 2.7315842186916988,
      "grad_norm": 0.1399035006761551,
      "learning_rate": 4.566725750018121e-06,
      "loss": 0.0146,
      "step": 1669140
    },
    {
      "epoch": 2.7316169491303524,
      "grad_norm": 0.3124290108680725,
      "learning_rate": 4.566659857804604e-06,
      "loss": 0.0159,
      "step": 1669160
    },
    {
      "epoch": 2.7316496795690055,
      "grad_norm": 0.4661206007003784,
      "learning_rate": 4.566593965591086e-06,
      "loss": 0.0115,
      "step": 1669180
    },
    {
      "epoch": 2.731682410007659,
      "grad_norm": 0.04981021210551262,
      "learning_rate": 4.566528073377569e-06,
      "loss": 0.0109,
      "step": 1669200
    },
    {
      "epoch": 2.7317151404463123,
      "grad_norm": 0.26014357805252075,
      "learning_rate": 4.566462181164053e-06,
      "loss": 0.0137,
      "step": 1669220
    },
    {
      "epoch": 2.7317478708849654,
      "grad_norm": 0.24893103539943695,
      "learning_rate": 4.566396288950535e-06,
      "loss": 0.0152,
      "step": 1669240
    },
    {
      "epoch": 2.731780601323619,
      "grad_norm": 0.1667579561471939,
      "learning_rate": 4.566330396737018e-06,
      "loss": 0.0165,
      "step": 1669260
    },
    {
      "epoch": 2.731813331762272,
      "grad_norm": 0.34170883893966675,
      "learning_rate": 4.566264504523501e-06,
      "loss": 0.0116,
      "step": 1669280
    },
    {
      "epoch": 2.7318460622009257,
      "grad_norm": 0.1672619879245758,
      "learning_rate": 4.566198612309984e-06,
      "loss": 0.0173,
      "step": 1669300
    },
    {
      "epoch": 2.731878792639579,
      "grad_norm": 0.19906316697597504,
      "learning_rate": 4.566132720096466e-06,
      "loss": 0.0131,
      "step": 1669320
    },
    {
      "epoch": 2.7319115230782325,
      "grad_norm": 0.42842063307762146,
      "learning_rate": 4.56606682788295e-06,
      "loss": 0.0162,
      "step": 1669340
    },
    {
      "epoch": 2.7319442535168856,
      "grad_norm": 0.36034655570983887,
      "learning_rate": 4.566000935669433e-06,
      "loss": 0.0167,
      "step": 1669360
    },
    {
      "epoch": 2.731976983955539,
      "grad_norm": 0.30398571491241455,
      "learning_rate": 4.5659350434559154e-06,
      "loss": 0.0083,
      "step": 1669380
    },
    {
      "epoch": 2.7320097143941924,
      "grad_norm": 0.8155253529548645,
      "learning_rate": 4.565869151242398e-06,
      "loss": 0.0192,
      "step": 1669400
    },
    {
      "epoch": 2.7320424448328455,
      "grad_norm": 0.31622153520584106,
      "learning_rate": 4.565803259028881e-06,
      "loss": 0.0139,
      "step": 1669420
    },
    {
      "epoch": 2.732075175271499,
      "grad_norm": 0.6576920747756958,
      "learning_rate": 4.565737366815364e-06,
      "loss": 0.0133,
      "step": 1669440
    },
    {
      "epoch": 2.7321079057101523,
      "grad_norm": 0.4084145128726959,
      "learning_rate": 4.565671474601846e-06,
      "loss": 0.022,
      "step": 1669460
    },
    {
      "epoch": 2.732140636148806,
      "grad_norm": 0.8967509269714355,
      "learning_rate": 4.565605582388329e-06,
      "loss": 0.0151,
      "step": 1669480
    },
    {
      "epoch": 2.732173366587459,
      "grad_norm": 0.12968207895755768,
      "learning_rate": 4.565539690174812e-06,
      "loss": 0.012,
      "step": 1669500
    },
    {
      "epoch": 2.732206097026112,
      "grad_norm": 0.418791800737381,
      "learning_rate": 4.5654737979612954e-06,
      "loss": 0.0136,
      "step": 1669520
    },
    {
      "epoch": 2.7322388274647658,
      "grad_norm": 0.40268585085868835,
      "learning_rate": 4.565407905747778e-06,
      "loss": 0.0156,
      "step": 1669540
    },
    {
      "epoch": 2.732271557903419,
      "grad_norm": 0.1974959373474121,
      "learning_rate": 4.565342013534261e-06,
      "loss": 0.0174,
      "step": 1669560
    },
    {
      "epoch": 2.7323042883420725,
      "grad_norm": 0.07542098313570023,
      "learning_rate": 4.5652761213207445e-06,
      "loss": 0.0135,
      "step": 1669580
    },
    {
      "epoch": 2.7323370187807257,
      "grad_norm": 0.2120320349931717,
      "learning_rate": 4.565210229107227e-06,
      "loss": 0.0143,
      "step": 1669600
    },
    {
      "epoch": 2.7323697492193793,
      "grad_norm": 0.4760640561580658,
      "learning_rate": 4.56514433689371e-06,
      "loss": 0.0095,
      "step": 1669620
    },
    {
      "epoch": 2.7324024796580324,
      "grad_norm": 0.24604690074920654,
      "learning_rate": 4.565078444680193e-06,
      "loss": 0.0108,
      "step": 1669640
    },
    {
      "epoch": 2.7324352100966856,
      "grad_norm": 0.47142189741134644,
      "learning_rate": 4.5650125524666755e-06,
      "loss": 0.0109,
      "step": 1669660
    },
    {
      "epoch": 2.732467940535339,
      "grad_norm": 0.2020067572593689,
      "learning_rate": 4.564946660253158e-06,
      "loss": 0.0179,
      "step": 1669680
    },
    {
      "epoch": 2.7325006709739923,
      "grad_norm": 0.36545854806900024,
      "learning_rate": 4.564880768039641e-06,
      "loss": 0.0128,
      "step": 1669700
    },
    {
      "epoch": 2.732533401412646,
      "grad_norm": 1.1579079627990723,
      "learning_rate": 4.564814875826124e-06,
      "loss": 0.0172,
      "step": 1669720
    },
    {
      "epoch": 2.732566131851299,
      "grad_norm": 0.15112414956092834,
      "learning_rate": 4.564748983612607e-06,
      "loss": 0.0166,
      "step": 1669740
    },
    {
      "epoch": 2.7325988622899526,
      "grad_norm": 0.09000884741544724,
      "learning_rate": 4.56468309139909e-06,
      "loss": 0.0117,
      "step": 1669760
    },
    {
      "epoch": 2.732631592728606,
      "grad_norm": 0.42993849515914917,
      "learning_rate": 4.564617199185573e-06,
      "loss": 0.013,
      "step": 1669780
    },
    {
      "epoch": 2.732664323167259,
      "grad_norm": 0.3639823794364929,
      "learning_rate": 4.5645513069720555e-06,
      "loss": 0.0127,
      "step": 1669800
    },
    {
      "epoch": 2.7326970536059125,
      "grad_norm": 0.3397689163684845,
      "learning_rate": 4.564485414758538e-06,
      "loss": 0.0164,
      "step": 1669820
    },
    {
      "epoch": 2.7327297840445657,
      "grad_norm": 0.3533385396003723,
      "learning_rate": 4.564419522545021e-06,
      "loss": 0.0111,
      "step": 1669840
    },
    {
      "epoch": 2.732762514483219,
      "grad_norm": 0.4274173676967621,
      "learning_rate": 4.564353630331504e-06,
      "loss": 0.0149,
      "step": 1669860
    },
    {
      "epoch": 2.7327952449218724,
      "grad_norm": 0.5481433272361755,
      "learning_rate": 4.5642877381179865e-06,
      "loss": 0.0158,
      "step": 1669880
    },
    {
      "epoch": 2.732827975360526,
      "grad_norm": 0.3979032039642334,
      "learning_rate": 4.564221845904469e-06,
      "loss": 0.0187,
      "step": 1669900
    },
    {
      "epoch": 2.732860705799179,
      "grad_norm": 0.5795654058456421,
      "learning_rate": 4.564155953690953e-06,
      "loss": 0.0099,
      "step": 1669920
    },
    {
      "epoch": 2.7328934362378323,
      "grad_norm": 0.37973684072494507,
      "learning_rate": 4.5640900614774355e-06,
      "loss": 0.0102,
      "step": 1669940
    },
    {
      "epoch": 2.732926166676486,
      "grad_norm": 0.3256279528141022,
      "learning_rate": 4.564024169263918e-06,
      "loss": 0.0118,
      "step": 1669960
    },
    {
      "epoch": 2.732958897115139,
      "grad_norm": 0.31887149810791016,
      "learning_rate": 4.563958277050402e-06,
      "loss": 0.01,
      "step": 1669980
    },
    {
      "epoch": 2.732991627553792,
      "grad_norm": 0.6233091354370117,
      "learning_rate": 4.563892384836885e-06,
      "loss": 0.0127,
      "step": 1670000
    },
    {
      "epoch": 2.733024357992446,
      "grad_norm": 0.25008487701416016,
      "learning_rate": 4.563826492623367e-06,
      "loss": 0.0159,
      "step": 1670020
    },
    {
      "epoch": 2.7330570884310994,
      "grad_norm": 0.04862333834171295,
      "learning_rate": 4.56376060040985e-06,
      "loss": 0.0116,
      "step": 1670040
    },
    {
      "epoch": 2.7330898188697526,
      "grad_norm": 0.2590235471725464,
      "learning_rate": 4.563694708196333e-06,
      "loss": 0.0113,
      "step": 1670060
    },
    {
      "epoch": 2.7331225493084057,
      "grad_norm": 1.5448706150054932,
      "learning_rate": 4.5636288159828156e-06,
      "loss": 0.0135,
      "step": 1670080
    },
    {
      "epoch": 2.7331552797470593,
      "grad_norm": 0.4662764072418213,
      "learning_rate": 4.563562923769298e-06,
      "loss": 0.0114,
      "step": 1670100
    },
    {
      "epoch": 2.7331880101857124,
      "grad_norm": 0.2071281373500824,
      "learning_rate": 4.563497031555781e-06,
      "loss": 0.0222,
      "step": 1670120
    },
    {
      "epoch": 2.7332207406243656,
      "grad_norm": 0.3570096492767334,
      "learning_rate": 4.563431139342265e-06,
      "loss": 0.0177,
      "step": 1670140
    },
    {
      "epoch": 2.733253471063019,
      "grad_norm": 1.6463055610656738,
      "learning_rate": 4.563365247128747e-06,
      "loss": 0.0197,
      "step": 1670160
    },
    {
      "epoch": 2.733286201501673,
      "grad_norm": 0.5079171061515808,
      "learning_rate": 4.56329935491523e-06,
      "loss": 0.009,
      "step": 1670180
    },
    {
      "epoch": 2.733318931940326,
      "grad_norm": 0.13454249501228333,
      "learning_rate": 4.563233462701713e-06,
      "loss": 0.0111,
      "step": 1670200
    },
    {
      "epoch": 2.733351662378979,
      "grad_norm": 2.105703115463257,
      "learning_rate": 4.5631675704881956e-06,
      "loss": 0.0123,
      "step": 1670220
    },
    {
      "epoch": 2.7333843928176327,
      "grad_norm": 0.26279231905937195,
      "learning_rate": 4.563101678274678e-06,
      "loss": 0.0221,
      "step": 1670240
    },
    {
      "epoch": 2.733417123256286,
      "grad_norm": 0.10839977115392685,
      "learning_rate": 4.563035786061161e-06,
      "loss": 0.0126,
      "step": 1670260
    },
    {
      "epoch": 2.733449853694939,
      "grad_norm": 0.2771401107311249,
      "learning_rate": 4.562969893847645e-06,
      "loss": 0.0137,
      "step": 1670280
    },
    {
      "epoch": 2.7334825841335926,
      "grad_norm": 0.45088815689086914,
      "learning_rate": 4.562904001634127e-06,
      "loss": 0.016,
      "step": 1670300
    },
    {
      "epoch": 2.733515314572246,
      "grad_norm": 1.2803120613098145,
      "learning_rate": 4.56283810942061e-06,
      "loss": 0.0114,
      "step": 1670320
    },
    {
      "epoch": 2.7335480450108993,
      "grad_norm": 0.2546391189098358,
      "learning_rate": 4.562772217207093e-06,
      "loss": 0.0163,
      "step": 1670340
    },
    {
      "epoch": 2.7335807754495525,
      "grad_norm": 0.16039909422397614,
      "learning_rate": 4.562706324993576e-06,
      "loss": 0.012,
      "step": 1670360
    },
    {
      "epoch": 2.733613505888206,
      "grad_norm": 0.2583467364311218,
      "learning_rate": 4.562640432780059e-06,
      "loss": 0.0125,
      "step": 1670380
    },
    {
      "epoch": 2.733646236326859,
      "grad_norm": 0.3831869065761566,
      "learning_rate": 4.562574540566542e-06,
      "loss": 0.009,
      "step": 1670400
    },
    {
      "epoch": 2.7336789667655124,
      "grad_norm": 0.2691061198711395,
      "learning_rate": 4.562508648353025e-06,
      "loss": 0.0175,
      "step": 1670420
    },
    {
      "epoch": 2.733711697204166,
      "grad_norm": 0.2450745552778244,
      "learning_rate": 4.562442756139507e-06,
      "loss": 0.0209,
      "step": 1670440
    },
    {
      "epoch": 2.733744427642819,
      "grad_norm": 0.4345819056034088,
      "learning_rate": 4.56237686392599e-06,
      "loss": 0.0211,
      "step": 1670460
    },
    {
      "epoch": 2.7337771580814727,
      "grad_norm": 0.5512691736221313,
      "learning_rate": 4.562310971712473e-06,
      "loss": 0.0088,
      "step": 1670480
    },
    {
      "epoch": 2.733809888520126,
      "grad_norm": 0.4249345362186432,
      "learning_rate": 4.562245079498956e-06,
      "loss": 0.0095,
      "step": 1670500
    },
    {
      "epoch": 2.7338426189587794,
      "grad_norm": 0.16257666051387787,
      "learning_rate": 4.562179187285438e-06,
      "loss": 0.011,
      "step": 1670520
    },
    {
      "epoch": 2.7338753493974326,
      "grad_norm": 0.06453289091587067,
      "learning_rate": 4.562113295071922e-06,
      "loss": 0.0136,
      "step": 1670540
    },
    {
      "epoch": 2.7339080798360857,
      "grad_norm": 0.16872748732566833,
      "learning_rate": 4.562047402858405e-06,
      "loss": 0.0159,
      "step": 1670560
    },
    {
      "epoch": 2.7339408102747393,
      "grad_norm": 0.31770825386047363,
      "learning_rate": 4.5619815106448874e-06,
      "loss": 0.0106,
      "step": 1670580
    },
    {
      "epoch": 2.7339735407133925,
      "grad_norm": 0.2779119312763214,
      "learning_rate": 4.56191561843137e-06,
      "loss": 0.014,
      "step": 1670600
    },
    {
      "epoch": 2.734006271152046,
      "grad_norm": 0.9803679585456848,
      "learning_rate": 4.561849726217853e-06,
      "loss": 0.0131,
      "step": 1670620
    },
    {
      "epoch": 2.7340390015906992,
      "grad_norm": 0.3517445921897888,
      "learning_rate": 4.5617838340043365e-06,
      "loss": 0.0133,
      "step": 1670640
    },
    {
      "epoch": 2.734071732029353,
      "grad_norm": 0.09880372881889343,
      "learning_rate": 4.561717941790819e-06,
      "loss": 0.0171,
      "step": 1670660
    },
    {
      "epoch": 2.734104462468006,
      "grad_norm": 0.8623262643814087,
      "learning_rate": 4.561652049577302e-06,
      "loss": 0.0168,
      "step": 1670680
    },
    {
      "epoch": 2.734137192906659,
      "grad_norm": 0.2715059220790863,
      "learning_rate": 4.561586157363785e-06,
      "loss": 0.0151,
      "step": 1670700
    },
    {
      "epoch": 2.7341699233453127,
      "grad_norm": 0.270323246717453,
      "learning_rate": 4.5615202651502675e-06,
      "loss": 0.0204,
      "step": 1670720
    },
    {
      "epoch": 2.734202653783966,
      "grad_norm": 0.14503724873065948,
      "learning_rate": 4.56145437293675e-06,
      "loss": 0.0135,
      "step": 1670740
    },
    {
      "epoch": 2.7342353842226195,
      "grad_norm": 1.0478402376174927,
      "learning_rate": 4.561388480723234e-06,
      "loss": 0.0146,
      "step": 1670760
    },
    {
      "epoch": 2.7342681146612726,
      "grad_norm": 0.7905227541923523,
      "learning_rate": 4.5613225885097165e-06,
      "loss": 0.0152,
      "step": 1670780
    },
    {
      "epoch": 2.734300845099926,
      "grad_norm": 0.6745022535324097,
      "learning_rate": 4.561256696296199e-06,
      "loss": 0.0146,
      "step": 1670800
    },
    {
      "epoch": 2.7343335755385794,
      "grad_norm": 0.4315326511859894,
      "learning_rate": 4.561190804082682e-06,
      "loss": 0.0164,
      "step": 1670820
    },
    {
      "epoch": 2.7343663059772325,
      "grad_norm": 1.0189613103866577,
      "learning_rate": 4.561124911869165e-06,
      "loss": 0.0126,
      "step": 1670840
    },
    {
      "epoch": 2.734399036415886,
      "grad_norm": 0.1302822083234787,
      "learning_rate": 4.5610590196556475e-06,
      "loss": 0.0121,
      "step": 1670860
    },
    {
      "epoch": 2.7344317668545393,
      "grad_norm": 0.49908018112182617,
      "learning_rate": 4.56099312744213e-06,
      "loss": 0.0156,
      "step": 1670880
    },
    {
      "epoch": 2.734464497293193,
      "grad_norm": 0.20674195885658264,
      "learning_rate": 4.560927235228613e-06,
      "loss": 0.0148,
      "step": 1670900
    },
    {
      "epoch": 2.734497227731846,
      "grad_norm": 0.5697701573371887,
      "learning_rate": 4.560861343015096e-06,
      "loss": 0.0154,
      "step": 1670920
    },
    {
      "epoch": 2.7345299581704996,
      "grad_norm": 0.2211829125881195,
      "learning_rate": 4.560795450801579e-06,
      "loss": 0.0129,
      "step": 1670940
    },
    {
      "epoch": 2.7345626886091527,
      "grad_norm": 0.4788278639316559,
      "learning_rate": 4.560729558588062e-06,
      "loss": 0.0113,
      "step": 1670960
    },
    {
      "epoch": 2.734595419047806,
      "grad_norm": 0.3852521777153015,
      "learning_rate": 4.560663666374545e-06,
      "loss": 0.0119,
      "step": 1670980
    },
    {
      "epoch": 2.7346281494864595,
      "grad_norm": 0.11224868148565292,
      "learning_rate": 4.560597774161028e-06,
      "loss": 0.0166,
      "step": 1671000
    },
    {
      "epoch": 2.7346608799251126,
      "grad_norm": 0.1878996342420578,
      "learning_rate": 4.560531881947511e-06,
      "loss": 0.0095,
      "step": 1671020
    },
    {
      "epoch": 2.7346936103637662,
      "grad_norm": 0.14628024399280548,
      "learning_rate": 4.560465989733994e-06,
      "loss": 0.0136,
      "step": 1671040
    },
    {
      "epoch": 2.7347263408024194,
      "grad_norm": 0.17578968405723572,
      "learning_rate": 4.5604000975204766e-06,
      "loss": 0.021,
      "step": 1671060
    },
    {
      "epoch": 2.734759071241073,
      "grad_norm": 0.16940367221832275,
      "learning_rate": 4.560334205306959e-06,
      "loss": 0.0081,
      "step": 1671080
    },
    {
      "epoch": 2.734791801679726,
      "grad_norm": 0.19824562966823578,
      "learning_rate": 4.560268313093442e-06,
      "loss": 0.013,
      "step": 1671100
    },
    {
      "epoch": 2.7348245321183793,
      "grad_norm": 0.23768822848796844,
      "learning_rate": 4.560202420879925e-06,
      "loss": 0.0221,
      "step": 1671120
    },
    {
      "epoch": 2.734857262557033,
      "grad_norm": 0.2802738845348358,
      "learning_rate": 4.5601365286664075e-06,
      "loss": 0.0121,
      "step": 1671140
    },
    {
      "epoch": 2.734889992995686,
      "grad_norm": 0.0870857909321785,
      "learning_rate": 4.560070636452891e-06,
      "loss": 0.0127,
      "step": 1671160
    },
    {
      "epoch": 2.7349227234343396,
      "grad_norm": 0.45781826972961426,
      "learning_rate": 4.560004744239374e-06,
      "loss": 0.015,
      "step": 1671180
    },
    {
      "epoch": 2.7349554538729928,
      "grad_norm": 0.5287936329841614,
      "learning_rate": 4.559938852025857e-06,
      "loss": 0.0122,
      "step": 1671200
    },
    {
      "epoch": 2.7349881843116464,
      "grad_norm": 0.654873788356781,
      "learning_rate": 4.559872959812339e-06,
      "loss": 0.0117,
      "step": 1671220
    },
    {
      "epoch": 2.7350209147502995,
      "grad_norm": 0.5035253763198853,
      "learning_rate": 4.559807067598822e-06,
      "loss": 0.0164,
      "step": 1671240
    },
    {
      "epoch": 2.7350536451889527,
      "grad_norm": 0.15033778548240662,
      "learning_rate": 4.559741175385305e-06,
      "loss": 0.0132,
      "step": 1671260
    },
    {
      "epoch": 2.7350863756276063,
      "grad_norm": 0.18842357397079468,
      "learning_rate": 4.5596752831717876e-06,
      "loss": 0.0103,
      "step": 1671280
    },
    {
      "epoch": 2.7351191060662594,
      "grad_norm": 0.3140008747577667,
      "learning_rate": 4.55960939095827e-06,
      "loss": 0.0121,
      "step": 1671300
    },
    {
      "epoch": 2.735151836504913,
      "grad_norm": 0.31271201372146606,
      "learning_rate": 4.559543498744753e-06,
      "loss": 0.0095,
      "step": 1671320
    },
    {
      "epoch": 2.735184566943566,
      "grad_norm": 0.39820563793182373,
      "learning_rate": 4.559477606531237e-06,
      "loss": 0.0162,
      "step": 1671340
    },
    {
      "epoch": 2.7352172973822197,
      "grad_norm": 0.33779045939445496,
      "learning_rate": 4.559411714317719e-06,
      "loss": 0.0161,
      "step": 1671360
    },
    {
      "epoch": 2.735250027820873,
      "grad_norm": 0.3153544068336487,
      "learning_rate": 4.559345822104202e-06,
      "loss": 0.013,
      "step": 1671380
    },
    {
      "epoch": 2.735282758259526,
      "grad_norm": 0.42109230160713196,
      "learning_rate": 4.559279929890686e-06,
      "loss": 0.0148,
      "step": 1671400
    },
    {
      "epoch": 2.7353154886981796,
      "grad_norm": 0.23794937133789062,
      "learning_rate": 4.5592140376771684e-06,
      "loss": 0.0112,
      "step": 1671420
    },
    {
      "epoch": 2.735348219136833,
      "grad_norm": 0.3806122839450836,
      "learning_rate": 4.559148145463651e-06,
      "loss": 0.0139,
      "step": 1671440
    },
    {
      "epoch": 2.735380949575486,
      "grad_norm": 0.22757934033870697,
      "learning_rate": 4.559082253250134e-06,
      "loss": 0.0146,
      "step": 1671460
    },
    {
      "epoch": 2.7354136800141395,
      "grad_norm": 1.0983033180236816,
      "learning_rate": 4.559016361036617e-06,
      "loss": 0.0143,
      "step": 1671480
    },
    {
      "epoch": 2.735446410452793,
      "grad_norm": 0.5671526789665222,
      "learning_rate": 4.558950468823099e-06,
      "loss": 0.0127,
      "step": 1671500
    },
    {
      "epoch": 2.7354791408914463,
      "grad_norm": 0.22839994728565216,
      "learning_rate": 4.558884576609582e-06,
      "loss": 0.0104,
      "step": 1671520
    },
    {
      "epoch": 2.7355118713300994,
      "grad_norm": 0.20187869668006897,
      "learning_rate": 4.558818684396065e-06,
      "loss": 0.0135,
      "step": 1671540
    },
    {
      "epoch": 2.735544601768753,
      "grad_norm": 0.21902896463871002,
      "learning_rate": 4.5587527921825485e-06,
      "loss": 0.0096,
      "step": 1671560
    },
    {
      "epoch": 2.735577332207406,
      "grad_norm": 0.1591075360774994,
      "learning_rate": 4.558686899969031e-06,
      "loss": 0.0135,
      "step": 1671580
    },
    {
      "epoch": 2.7356100626460593,
      "grad_norm": 0.10980793088674545,
      "learning_rate": 4.558621007755514e-06,
      "loss": 0.0126,
      "step": 1671600
    },
    {
      "epoch": 2.735642793084713,
      "grad_norm": 0.45397549867630005,
      "learning_rate": 4.558555115541997e-06,
      "loss": 0.0139,
      "step": 1671620
    },
    {
      "epoch": 2.7356755235233665,
      "grad_norm": 0.406380832195282,
      "learning_rate": 4.558489223328479e-06,
      "loss": 0.0143,
      "step": 1671640
    },
    {
      "epoch": 2.7357082539620197,
      "grad_norm": 0.3684781789779663,
      "learning_rate": 4.558423331114962e-06,
      "loss": 0.0084,
      "step": 1671660
    },
    {
      "epoch": 2.735740984400673,
      "grad_norm": 0.9526561498641968,
      "learning_rate": 4.558357438901446e-06,
      "loss": 0.0167,
      "step": 1671680
    },
    {
      "epoch": 2.7357737148393264,
      "grad_norm": 0.4282820224761963,
      "learning_rate": 4.5582915466879285e-06,
      "loss": 0.0215,
      "step": 1671700
    },
    {
      "epoch": 2.7358064452779796,
      "grad_norm": 0.7087728381156921,
      "learning_rate": 4.558225654474411e-06,
      "loss": 0.0154,
      "step": 1671720
    },
    {
      "epoch": 2.7358391757166327,
      "grad_norm": 0.17413173615932465,
      "learning_rate": 4.558159762260894e-06,
      "loss": 0.0121,
      "step": 1671740
    },
    {
      "epoch": 2.7358719061552863,
      "grad_norm": 0.39579012989997864,
      "learning_rate": 4.558093870047377e-06,
      "loss": 0.0154,
      "step": 1671760
    },
    {
      "epoch": 2.73590463659394,
      "grad_norm": 0.22845710813999176,
      "learning_rate": 4.5580279778338594e-06,
      "loss": 0.0207,
      "step": 1671780
    },
    {
      "epoch": 2.735937367032593,
      "grad_norm": 0.5509227514266968,
      "learning_rate": 4.557962085620343e-06,
      "loss": 0.0161,
      "step": 1671800
    },
    {
      "epoch": 2.735970097471246,
      "grad_norm": 0.386234849691391,
      "learning_rate": 4.557896193406826e-06,
      "loss": 0.0129,
      "step": 1671820
    },
    {
      "epoch": 2.7360028279099,
      "grad_norm": 0.11127692461013794,
      "learning_rate": 4.5578303011933085e-06,
      "loss": 0.0116,
      "step": 1671840
    },
    {
      "epoch": 2.736035558348553,
      "grad_norm": 0.4033021628856659,
      "learning_rate": 4.557764408979791e-06,
      "loss": 0.0151,
      "step": 1671860
    },
    {
      "epoch": 2.736068288787206,
      "grad_norm": 0.7724740505218506,
      "learning_rate": 4.557698516766274e-06,
      "loss": 0.0162,
      "step": 1671880
    },
    {
      "epoch": 2.7361010192258597,
      "grad_norm": 0.3007231056690216,
      "learning_rate": 4.557632624552757e-06,
      "loss": 0.0226,
      "step": 1671900
    },
    {
      "epoch": 2.736133749664513,
      "grad_norm": 0.17863446474075317,
      "learning_rate": 4.5575667323392395e-06,
      "loss": 0.0134,
      "step": 1671920
    },
    {
      "epoch": 2.7361664801031664,
      "grad_norm": 0.5176066756248474,
      "learning_rate": 4.557500840125722e-06,
      "loss": 0.0121,
      "step": 1671940
    },
    {
      "epoch": 2.7361992105418196,
      "grad_norm": 0.712306559085846,
      "learning_rate": 4.557434947912206e-06,
      "loss": 0.0128,
      "step": 1671960
    },
    {
      "epoch": 2.736231940980473,
      "grad_norm": 0.2365625500679016,
      "learning_rate": 4.5573690556986885e-06,
      "loss": 0.016,
      "step": 1671980
    },
    {
      "epoch": 2.7362646714191263,
      "grad_norm": 0.27459755539894104,
      "learning_rate": 4.557303163485171e-06,
      "loss": 0.0134,
      "step": 1672000
    },
    {
      "epoch": 2.7362974018577795,
      "grad_norm": 0.2872340679168701,
      "learning_rate": 4.557237271271654e-06,
      "loss": 0.0126,
      "step": 1672020
    },
    {
      "epoch": 2.736330132296433,
      "grad_norm": 0.3254101574420929,
      "learning_rate": 4.557171379058138e-06,
      "loss": 0.0156,
      "step": 1672040
    },
    {
      "epoch": 2.736362862735086,
      "grad_norm": 0.4834229648113251,
      "learning_rate": 4.55710548684462e-06,
      "loss": 0.0105,
      "step": 1672060
    },
    {
      "epoch": 2.73639559317374,
      "grad_norm": 0.83316969871521,
      "learning_rate": 4.557039594631103e-06,
      "loss": 0.016,
      "step": 1672080
    },
    {
      "epoch": 2.736428323612393,
      "grad_norm": 0.22392411530017853,
      "learning_rate": 4.556973702417586e-06,
      "loss": 0.0159,
      "step": 1672100
    },
    {
      "epoch": 2.7364610540510466,
      "grad_norm": 0.4871215522289276,
      "learning_rate": 4.5569078102040686e-06,
      "loss": 0.01,
      "step": 1672120
    },
    {
      "epoch": 2.7364937844896997,
      "grad_norm": 0.32988688349723816,
      "learning_rate": 4.556841917990551e-06,
      "loss": 0.0135,
      "step": 1672140
    },
    {
      "epoch": 2.736526514928353,
      "grad_norm": 0.10982397943735123,
      "learning_rate": 4.556776025777034e-06,
      "loss": 0.0112,
      "step": 1672160
    },
    {
      "epoch": 2.7365592453670065,
      "grad_norm": 0.10557237267494202,
      "learning_rate": 4.556710133563518e-06,
      "loss": 0.0124,
      "step": 1672180
    },
    {
      "epoch": 2.7365919758056596,
      "grad_norm": 0.3249448239803314,
      "learning_rate": 4.55664424135e-06,
      "loss": 0.012,
      "step": 1672200
    },
    {
      "epoch": 2.736624706244313,
      "grad_norm": 0.388895720243454,
      "learning_rate": 4.556578349136483e-06,
      "loss": 0.0112,
      "step": 1672220
    },
    {
      "epoch": 2.7366574366829663,
      "grad_norm": 0.27010616660118103,
      "learning_rate": 4.556512456922966e-06,
      "loss": 0.022,
      "step": 1672240
    },
    {
      "epoch": 2.73669016712162,
      "grad_norm": 0.2708877623081207,
      "learning_rate": 4.556446564709449e-06,
      "loss": 0.0188,
      "step": 1672260
    },
    {
      "epoch": 2.736722897560273,
      "grad_norm": 0.22238482534885406,
      "learning_rate": 4.556380672495931e-06,
      "loss": 0.01,
      "step": 1672280
    },
    {
      "epoch": 2.7367556279989262,
      "grad_norm": 0.5054548382759094,
      "learning_rate": 4.556314780282414e-06,
      "loss": 0.0137,
      "step": 1672300
    },
    {
      "epoch": 2.73678835843758,
      "grad_norm": 0.418081134557724,
      "learning_rate": 4.556248888068897e-06,
      "loss": 0.0215,
      "step": 1672320
    },
    {
      "epoch": 2.736821088876233,
      "grad_norm": 0.26904183626174927,
      "learning_rate": 4.5561829958553795e-06,
      "loss": 0.0104,
      "step": 1672340
    },
    {
      "epoch": 2.7368538193148866,
      "grad_norm": 0.2614084780216217,
      "learning_rate": 4.556117103641863e-06,
      "loss": 0.0125,
      "step": 1672360
    },
    {
      "epoch": 2.7368865497535397,
      "grad_norm": 0.32272303104400635,
      "learning_rate": 4.556051211428346e-06,
      "loss": 0.015,
      "step": 1672380
    },
    {
      "epoch": 2.7369192801921933,
      "grad_norm": 0.4130342900753021,
      "learning_rate": 4.555985319214829e-06,
      "loss": 0.0151,
      "step": 1672400
    },
    {
      "epoch": 2.7369520106308465,
      "grad_norm": 0.21414291858673096,
      "learning_rate": 4.555919427001312e-06,
      "loss": 0.0153,
      "step": 1672420
    },
    {
      "epoch": 2.7369847410694996,
      "grad_norm": 0.5345759987831116,
      "learning_rate": 4.555853534787795e-06,
      "loss": 0.0212,
      "step": 1672440
    },
    {
      "epoch": 2.737017471508153,
      "grad_norm": 0.26465606689453125,
      "learning_rate": 4.555787642574278e-06,
      "loss": 0.0109,
      "step": 1672460
    },
    {
      "epoch": 2.7370502019468064,
      "grad_norm": 1.6495299339294434,
      "learning_rate": 4.55572175036076e-06,
      "loss": 0.0156,
      "step": 1672480
    },
    {
      "epoch": 2.73708293238546,
      "grad_norm": 0.4883122146129608,
      "learning_rate": 4.555655858147243e-06,
      "loss": 0.0156,
      "step": 1672500
    },
    {
      "epoch": 2.737115662824113,
      "grad_norm": 0.43961143493652344,
      "learning_rate": 4.555589965933726e-06,
      "loss": 0.0116,
      "step": 1672520
    },
    {
      "epoch": 2.7371483932627667,
      "grad_norm": 0.30225351452827454,
      "learning_rate": 4.555524073720209e-06,
      "loss": 0.0167,
      "step": 1672540
    },
    {
      "epoch": 2.73718112370142,
      "grad_norm": 0.34471964836120605,
      "learning_rate": 4.555458181506691e-06,
      "loss": 0.0156,
      "step": 1672560
    },
    {
      "epoch": 2.737213854140073,
      "grad_norm": 0.20620575547218323,
      "learning_rate": 4.555392289293175e-06,
      "loss": 0.0128,
      "step": 1672580
    },
    {
      "epoch": 2.7372465845787266,
      "grad_norm": 0.3441266119480133,
      "learning_rate": 4.555326397079658e-06,
      "loss": 0.0152,
      "step": 1672600
    },
    {
      "epoch": 2.7372793150173798,
      "grad_norm": 0.24099457263946533,
      "learning_rate": 4.5552605048661404e-06,
      "loss": 0.008,
      "step": 1672620
    },
    {
      "epoch": 2.7373120454560333,
      "grad_norm": 0.46747007966041565,
      "learning_rate": 4.555194612652623e-06,
      "loss": 0.0165,
      "step": 1672640
    },
    {
      "epoch": 2.7373447758946865,
      "grad_norm": 0.19381679594516754,
      "learning_rate": 4.555128720439106e-06,
      "loss": 0.0105,
      "step": 1672660
    },
    {
      "epoch": 2.73737750633334,
      "grad_norm": 0.18558599054813385,
      "learning_rate": 4.555062828225589e-06,
      "loss": 0.0084,
      "step": 1672680
    },
    {
      "epoch": 2.7374102367719932,
      "grad_norm": 0.07979629933834076,
      "learning_rate": 4.554996936012071e-06,
      "loss": 0.013,
      "step": 1672700
    },
    {
      "epoch": 2.7374429672106464,
      "grad_norm": 0.5560704469680786,
      "learning_rate": 4.554931043798554e-06,
      "loss": 0.0103,
      "step": 1672720
    },
    {
      "epoch": 2.7374756976493,
      "grad_norm": 0.3772340714931488,
      "learning_rate": 4.554865151585038e-06,
      "loss": 0.0136,
      "step": 1672740
    },
    {
      "epoch": 2.737508428087953,
      "grad_norm": 0.3268738389015198,
      "learning_rate": 4.5547992593715205e-06,
      "loss": 0.0146,
      "step": 1672760
    },
    {
      "epoch": 2.7375411585266067,
      "grad_norm": 0.3887737989425659,
      "learning_rate": 4.554733367158003e-06,
      "loss": 0.0137,
      "step": 1672780
    },
    {
      "epoch": 2.73757388896526,
      "grad_norm": 0.5358777046203613,
      "learning_rate": 4.554667474944486e-06,
      "loss": 0.012,
      "step": 1672800
    },
    {
      "epoch": 2.7376066194039135,
      "grad_norm": 0.3004398047924042,
      "learning_rate": 4.5546015827309695e-06,
      "loss": 0.0168,
      "step": 1672820
    },
    {
      "epoch": 2.7376393498425666,
      "grad_norm": 0.7263381481170654,
      "learning_rate": 4.554535690517452e-06,
      "loss": 0.0194,
      "step": 1672840
    },
    {
      "epoch": 2.7376720802812198,
      "grad_norm": 0.1433221697807312,
      "learning_rate": 4.554469798303935e-06,
      "loss": 0.0121,
      "step": 1672860
    },
    {
      "epoch": 2.7377048107198734,
      "grad_norm": 0.14437304437160492,
      "learning_rate": 4.554403906090418e-06,
      "loss": 0.0175,
      "step": 1672880
    },
    {
      "epoch": 2.7377375411585265,
      "grad_norm": 0.2714195251464844,
      "learning_rate": 4.5543380138769005e-06,
      "loss": 0.0164,
      "step": 1672900
    },
    {
      "epoch": 2.7377702715971797,
      "grad_norm": 0.4801013767719269,
      "learning_rate": 4.554272121663383e-06,
      "loss": 0.0108,
      "step": 1672920
    },
    {
      "epoch": 2.7378030020358333,
      "grad_norm": 0.47130271792411804,
      "learning_rate": 4.554206229449866e-06,
      "loss": 0.0111,
      "step": 1672940
    },
    {
      "epoch": 2.737835732474487,
      "grad_norm": 0.1996147632598877,
      "learning_rate": 4.554140337236349e-06,
      "loss": 0.0201,
      "step": 1672960
    },
    {
      "epoch": 2.73786846291314,
      "grad_norm": 0.16399650275707245,
      "learning_rate": 4.554074445022832e-06,
      "loss": 0.0108,
      "step": 1672980
    },
    {
      "epoch": 2.737901193351793,
      "grad_norm": 0.08246447890996933,
      "learning_rate": 4.554008552809315e-06,
      "loss": 0.013,
      "step": 1673000
    },
    {
      "epoch": 2.7379339237904468,
      "grad_norm": 1.7099924087524414,
      "learning_rate": 4.553942660595798e-06,
      "loss": 0.0141,
      "step": 1673020
    },
    {
      "epoch": 2.7379666542291,
      "grad_norm": 0.07896499335765839,
      "learning_rate": 4.5538767683822805e-06,
      "loss": 0.0095,
      "step": 1673040
    },
    {
      "epoch": 2.737999384667753,
      "grad_norm": 0.347821444272995,
      "learning_rate": 4.553810876168763e-06,
      "loss": 0.0099,
      "step": 1673060
    },
    {
      "epoch": 2.7380321151064066,
      "grad_norm": 0.28133174777030945,
      "learning_rate": 4.553744983955246e-06,
      "loss": 0.0088,
      "step": 1673080
    },
    {
      "epoch": 2.7380648455450602,
      "grad_norm": 0.2485089898109436,
      "learning_rate": 4.55367909174173e-06,
      "loss": 0.0119,
      "step": 1673100
    },
    {
      "epoch": 2.7380975759837134,
      "grad_norm": 0.20678341388702393,
      "learning_rate": 4.553613199528212e-06,
      "loss": 0.0114,
      "step": 1673120
    },
    {
      "epoch": 2.7381303064223665,
      "grad_norm": 0.38267186284065247,
      "learning_rate": 4.553547307314695e-06,
      "loss": 0.0148,
      "step": 1673140
    },
    {
      "epoch": 2.73816303686102,
      "grad_norm": 0.27269110083580017,
      "learning_rate": 4.553481415101178e-06,
      "loss": 0.0137,
      "step": 1673160
    },
    {
      "epoch": 2.7381957672996733,
      "grad_norm": 0.21685174107551575,
      "learning_rate": 4.5534155228876605e-06,
      "loss": 0.012,
      "step": 1673180
    },
    {
      "epoch": 2.7382284977383264,
      "grad_norm": 0.2544102668762207,
      "learning_rate": 4.553349630674144e-06,
      "loss": 0.0149,
      "step": 1673200
    },
    {
      "epoch": 2.73826122817698,
      "grad_norm": 0.3100687861442566,
      "learning_rate": 4.553283738460627e-06,
      "loss": 0.01,
      "step": 1673220
    },
    {
      "epoch": 2.7382939586156336,
      "grad_norm": 0.7311344146728516,
      "learning_rate": 4.55321784624711e-06,
      "loss": 0.0103,
      "step": 1673240
    },
    {
      "epoch": 2.7383266890542868,
      "grad_norm": 0.04458686336874962,
      "learning_rate": 4.553151954033592e-06,
      "loss": 0.0187,
      "step": 1673260
    },
    {
      "epoch": 2.73835941949294,
      "grad_norm": 0.10488112270832062,
      "learning_rate": 4.553086061820075e-06,
      "loss": 0.0157,
      "step": 1673280
    },
    {
      "epoch": 2.7383921499315935,
      "grad_norm": 0.254617303609848,
      "learning_rate": 4.553020169606558e-06,
      "loss": 0.013,
      "step": 1673300
    },
    {
      "epoch": 2.7384248803702467,
      "grad_norm": 0.37067651748657227,
      "learning_rate": 4.5529542773930406e-06,
      "loss": 0.0089,
      "step": 1673320
    },
    {
      "epoch": 2.7384576108089,
      "grad_norm": 0.1299859881401062,
      "learning_rate": 4.552888385179523e-06,
      "loss": 0.0107,
      "step": 1673340
    },
    {
      "epoch": 2.7384903412475534,
      "grad_norm": 0.5994515419006348,
      "learning_rate": 4.552822492966006e-06,
      "loss": 0.0122,
      "step": 1673360
    },
    {
      "epoch": 2.738523071686207,
      "grad_norm": 0.32270383834838867,
      "learning_rate": 4.55275660075249e-06,
      "loss": 0.0095,
      "step": 1673380
    },
    {
      "epoch": 2.73855580212486,
      "grad_norm": 0.24840788543224335,
      "learning_rate": 4.552690708538972e-06,
      "loss": 0.0167,
      "step": 1673400
    },
    {
      "epoch": 2.7385885325635133,
      "grad_norm": 0.1532953381538391,
      "learning_rate": 4.552624816325455e-06,
      "loss": 0.0144,
      "step": 1673420
    },
    {
      "epoch": 2.738621263002167,
      "grad_norm": 0.3819369375705719,
      "learning_rate": 4.552558924111939e-06,
      "loss": 0.0116,
      "step": 1673440
    },
    {
      "epoch": 2.73865399344082,
      "grad_norm": 0.5490268468856812,
      "learning_rate": 4.5524930318984214e-06,
      "loss": 0.0157,
      "step": 1673460
    },
    {
      "epoch": 2.738686723879473,
      "grad_norm": 0.3211447298526764,
      "learning_rate": 4.552427139684904e-06,
      "loss": 0.0111,
      "step": 1673480
    },
    {
      "epoch": 2.738719454318127,
      "grad_norm": 0.34726130962371826,
      "learning_rate": 4.552361247471387e-06,
      "loss": 0.0123,
      "step": 1673500
    },
    {
      "epoch": 2.73875218475678,
      "grad_norm": 0.1435438096523285,
      "learning_rate": 4.55229535525787e-06,
      "loss": 0.0147,
      "step": 1673520
    },
    {
      "epoch": 2.7387849151954335,
      "grad_norm": 0.5103548169136047,
      "learning_rate": 4.552229463044352e-06,
      "loss": 0.0149,
      "step": 1673540
    },
    {
      "epoch": 2.7388176456340867,
      "grad_norm": 0.30046018958091736,
      "learning_rate": 4.552163570830835e-06,
      "loss": 0.0146,
      "step": 1673560
    },
    {
      "epoch": 2.7388503760727403,
      "grad_norm": 0.10782236605882645,
      "learning_rate": 4.552097678617318e-06,
      "loss": 0.0131,
      "step": 1673580
    },
    {
      "epoch": 2.7388831065113934,
      "grad_norm": 0.17089596390724182,
      "learning_rate": 4.5520317864038015e-06,
      "loss": 0.0202,
      "step": 1673600
    },
    {
      "epoch": 2.7389158369500466,
      "grad_norm": 0.1399996429681778,
      "learning_rate": 4.551965894190284e-06,
      "loss": 0.0123,
      "step": 1673620
    },
    {
      "epoch": 2.7389485673887,
      "grad_norm": 0.46301352977752686,
      "learning_rate": 4.551900001976767e-06,
      "loss": 0.0145,
      "step": 1673640
    },
    {
      "epoch": 2.7389812978273533,
      "grad_norm": 0.18397244811058044,
      "learning_rate": 4.55183410976325e-06,
      "loss": 0.0123,
      "step": 1673660
    },
    {
      "epoch": 2.739014028266007,
      "grad_norm": 0.2657608389854431,
      "learning_rate": 4.551768217549732e-06,
      "loss": 0.0139,
      "step": 1673680
    },
    {
      "epoch": 2.73904675870466,
      "grad_norm": 0.24802646040916443,
      "learning_rate": 4.551702325336215e-06,
      "loss": 0.0156,
      "step": 1673700
    },
    {
      "epoch": 2.7390794891433137,
      "grad_norm": 0.4583958089351654,
      "learning_rate": 4.551636433122698e-06,
      "loss": 0.0087,
      "step": 1673720
    },
    {
      "epoch": 2.739112219581967,
      "grad_norm": 0.3052124083042145,
      "learning_rate": 4.551570540909181e-06,
      "loss": 0.0209,
      "step": 1673740
    },
    {
      "epoch": 2.73914495002062,
      "grad_norm": 0.8002421855926514,
      "learning_rate": 4.551504648695663e-06,
      "loss": 0.0104,
      "step": 1673760
    },
    {
      "epoch": 2.7391776804592736,
      "grad_norm": 0.26745158433914185,
      "learning_rate": 4.551438756482147e-06,
      "loss": 0.0091,
      "step": 1673780
    },
    {
      "epoch": 2.7392104108979267,
      "grad_norm": 0.6021590232849121,
      "learning_rate": 4.55137286426863e-06,
      "loss": 0.0204,
      "step": 1673800
    },
    {
      "epoch": 2.7392431413365803,
      "grad_norm": 0.6299001574516296,
      "learning_rate": 4.5513069720551124e-06,
      "loss": 0.0122,
      "step": 1673820
    },
    {
      "epoch": 2.7392758717752335,
      "grad_norm": 0.145546555519104,
      "learning_rate": 4.551241079841596e-06,
      "loss": 0.0114,
      "step": 1673840
    },
    {
      "epoch": 2.739308602213887,
      "grad_norm": 2.0182690620422363,
      "learning_rate": 4.551175187628079e-06,
      "loss": 0.0092,
      "step": 1673860
    },
    {
      "epoch": 2.73934133265254,
      "grad_norm": 0.5254053473472595,
      "learning_rate": 4.5511092954145615e-06,
      "loss": 0.0143,
      "step": 1673880
    },
    {
      "epoch": 2.7393740630911934,
      "grad_norm": 4.048122406005859,
      "learning_rate": 4.551043403201044e-06,
      "loss": 0.0106,
      "step": 1673900
    },
    {
      "epoch": 2.739406793529847,
      "grad_norm": 0.38427287340164185,
      "learning_rate": 4.550977510987527e-06,
      "loss": 0.0159,
      "step": 1673920
    },
    {
      "epoch": 2.7394395239685,
      "grad_norm": 1.046183466911316,
      "learning_rate": 4.55091161877401e-06,
      "loss": 0.0149,
      "step": 1673940
    },
    {
      "epoch": 2.7394722544071537,
      "grad_norm": 0.5446059107780457,
      "learning_rate": 4.5508457265604925e-06,
      "loss": 0.0117,
      "step": 1673960
    },
    {
      "epoch": 2.739504984845807,
      "grad_norm": 0.16555966436862946,
      "learning_rate": 4.550779834346975e-06,
      "loss": 0.0126,
      "step": 1673980
    },
    {
      "epoch": 2.7395377152844604,
      "grad_norm": 0.5147638320922852,
      "learning_rate": 4.550713942133459e-06,
      "loss": 0.0139,
      "step": 1674000
    },
    {
      "epoch": 2.7395704457231136,
      "grad_norm": 1.6796497106552124,
      "learning_rate": 4.5506480499199415e-06,
      "loss": 0.0152,
      "step": 1674020
    },
    {
      "epoch": 2.7396031761617667,
      "grad_norm": 0.25817105174064636,
      "learning_rate": 4.550582157706424e-06,
      "loss": 0.0104,
      "step": 1674040
    },
    {
      "epoch": 2.7396359066004203,
      "grad_norm": 0.19356054067611694,
      "learning_rate": 4.550516265492907e-06,
      "loss": 0.0096,
      "step": 1674060
    },
    {
      "epoch": 2.7396686370390735,
      "grad_norm": 0.5657780766487122,
      "learning_rate": 4.55045037327939e-06,
      "loss": 0.0156,
      "step": 1674080
    },
    {
      "epoch": 2.739701367477727,
      "grad_norm": 0.4661944806575775,
      "learning_rate": 4.5503844810658725e-06,
      "loss": 0.0121,
      "step": 1674100
    },
    {
      "epoch": 2.7397340979163802,
      "grad_norm": 0.24550463259220123,
      "learning_rate": 4.550318588852355e-06,
      "loss": 0.014,
      "step": 1674120
    },
    {
      "epoch": 2.739766828355034,
      "grad_norm": 0.21601496636867523,
      "learning_rate": 4.550252696638838e-06,
      "loss": 0.0135,
      "step": 1674140
    },
    {
      "epoch": 2.739799558793687,
      "grad_norm": 0.0923449695110321,
      "learning_rate": 4.5501868044253216e-06,
      "loss": 0.0148,
      "step": 1674160
    },
    {
      "epoch": 2.73983228923234,
      "grad_norm": 0.4487238824367523,
      "learning_rate": 4.550120912211804e-06,
      "loss": 0.0112,
      "step": 1674180
    },
    {
      "epoch": 2.7398650196709937,
      "grad_norm": 0.09450074285268784,
      "learning_rate": 4.550055019998287e-06,
      "loss": 0.0109,
      "step": 1674200
    },
    {
      "epoch": 2.739897750109647,
      "grad_norm": 0.3350008726119995,
      "learning_rate": 4.54998912778477e-06,
      "loss": 0.0164,
      "step": 1674220
    },
    {
      "epoch": 2.7399304805483005,
      "grad_norm": 0.1365860253572464,
      "learning_rate": 4.549923235571253e-06,
      "loss": 0.0109,
      "step": 1674240
    },
    {
      "epoch": 2.7399632109869536,
      "grad_norm": 0.3585689067840576,
      "learning_rate": 4.549857343357736e-06,
      "loss": 0.0229,
      "step": 1674260
    },
    {
      "epoch": 2.739995941425607,
      "grad_norm": 0.061393093317747116,
      "learning_rate": 4.549791451144219e-06,
      "loss": 0.0102,
      "step": 1674280
    },
    {
      "epoch": 2.7400286718642604,
      "grad_norm": 0.8639485239982605,
      "learning_rate": 4.549725558930702e-06,
      "loss": 0.0204,
      "step": 1674300
    },
    {
      "epoch": 2.7400614023029135,
      "grad_norm": 0.535504162311554,
      "learning_rate": 4.549659666717184e-06,
      "loss": 0.016,
      "step": 1674320
    },
    {
      "epoch": 2.740094132741567,
      "grad_norm": 1.2759459018707275,
      "learning_rate": 4.549593774503667e-06,
      "loss": 0.0102,
      "step": 1674340
    },
    {
      "epoch": 2.7401268631802203,
      "grad_norm": 0.4223669767379761,
      "learning_rate": 4.54952788229015e-06,
      "loss": 0.0148,
      "step": 1674360
    },
    {
      "epoch": 2.740159593618874,
      "grad_norm": 0.34603604674339294,
      "learning_rate": 4.5494619900766325e-06,
      "loss": 0.0122,
      "step": 1674380
    },
    {
      "epoch": 2.740192324057527,
      "grad_norm": 0.6758071780204773,
      "learning_rate": 4.549396097863116e-06,
      "loss": 0.0149,
      "step": 1674400
    },
    {
      "epoch": 2.7402250544961806,
      "grad_norm": 0.1835072636604309,
      "learning_rate": 4.549330205649599e-06,
      "loss": 0.0112,
      "step": 1674420
    },
    {
      "epoch": 2.7402577849348337,
      "grad_norm": 0.1748506724834442,
      "learning_rate": 4.549264313436082e-06,
      "loss": 0.0194,
      "step": 1674440
    },
    {
      "epoch": 2.740290515373487,
      "grad_norm": 0.2679773271083832,
      "learning_rate": 4.549198421222564e-06,
      "loss": 0.0104,
      "step": 1674460
    },
    {
      "epoch": 2.7403232458121405,
      "grad_norm": 0.33340317010879517,
      "learning_rate": 4.549132529009047e-06,
      "loss": 0.0133,
      "step": 1674480
    },
    {
      "epoch": 2.7403559762507936,
      "grad_norm": 0.5329284071922302,
      "learning_rate": 4.549066636795531e-06,
      "loss": 0.0102,
      "step": 1674500
    },
    {
      "epoch": 2.740388706689447,
      "grad_norm": 0.5409422516822815,
      "learning_rate": 4.549000744582013e-06,
      "loss": 0.0202,
      "step": 1674520
    },
    {
      "epoch": 2.7404214371281004,
      "grad_norm": 0.3188311457633972,
      "learning_rate": 4.548934852368496e-06,
      "loss": 0.0139,
      "step": 1674540
    },
    {
      "epoch": 2.740454167566754,
      "grad_norm": 0.42728668451309204,
      "learning_rate": 4.548868960154979e-06,
      "loss": 0.01,
      "step": 1674560
    },
    {
      "epoch": 2.740486898005407,
      "grad_norm": 0.7162382006645203,
      "learning_rate": 4.548803067941462e-06,
      "loss": 0.012,
      "step": 1674580
    },
    {
      "epoch": 2.7405196284440603,
      "grad_norm": 0.15448087453842163,
      "learning_rate": 4.548737175727944e-06,
      "loss": 0.0166,
      "step": 1674600
    },
    {
      "epoch": 2.740552358882714,
      "grad_norm": 0.19094693660736084,
      "learning_rate": 4.548671283514428e-06,
      "loss": 0.0118,
      "step": 1674620
    },
    {
      "epoch": 2.740585089321367,
      "grad_norm": 0.3284188508987427,
      "learning_rate": 4.548605391300911e-06,
      "loss": 0.0176,
      "step": 1674640
    },
    {
      "epoch": 2.74061781976002,
      "grad_norm": 1.5688104629516602,
      "learning_rate": 4.5485394990873934e-06,
      "loss": 0.012,
      "step": 1674660
    },
    {
      "epoch": 2.7406505501986738,
      "grad_norm": 0.12132389098405838,
      "learning_rate": 4.548473606873876e-06,
      "loss": 0.0093,
      "step": 1674680
    },
    {
      "epoch": 2.7406832806373274,
      "grad_norm": 0.15233591198921204,
      "learning_rate": 4.548407714660359e-06,
      "loss": 0.0132,
      "step": 1674700
    },
    {
      "epoch": 2.7407160110759805,
      "grad_norm": 0.9313073754310608,
      "learning_rate": 4.548341822446842e-06,
      "loss": 0.0162,
      "step": 1674720
    },
    {
      "epoch": 2.7407487415146337,
      "grad_norm": 0.5439924001693726,
      "learning_rate": 4.548275930233324e-06,
      "loss": 0.0168,
      "step": 1674740
    },
    {
      "epoch": 2.7407814719532873,
      "grad_norm": 0.2343960702419281,
      "learning_rate": 4.548210038019807e-06,
      "loss": 0.02,
      "step": 1674760
    },
    {
      "epoch": 2.7408142023919404,
      "grad_norm": 0.09272472560405731,
      "learning_rate": 4.54814414580629e-06,
      "loss": 0.012,
      "step": 1674780
    },
    {
      "epoch": 2.7408469328305936,
      "grad_norm": 0.3395238518714905,
      "learning_rate": 4.5480782535927735e-06,
      "loss": 0.0162,
      "step": 1674800
    },
    {
      "epoch": 2.740879663269247,
      "grad_norm": 0.27715206146240234,
      "learning_rate": 4.548012361379256e-06,
      "loss": 0.017,
      "step": 1674820
    },
    {
      "epoch": 2.7409123937079007,
      "grad_norm": 0.4489433169364929,
      "learning_rate": 4.547946469165739e-06,
      "loss": 0.0168,
      "step": 1674840
    },
    {
      "epoch": 2.740945124146554,
      "grad_norm": 0.49202054738998413,
      "learning_rate": 4.5478805769522225e-06,
      "loss": 0.0146,
      "step": 1674860
    },
    {
      "epoch": 2.740977854585207,
      "grad_norm": 0.5489707589149475,
      "learning_rate": 4.547814684738705e-06,
      "loss": 0.0185,
      "step": 1674880
    },
    {
      "epoch": 2.7410105850238606,
      "grad_norm": 0.9490757584571838,
      "learning_rate": 4.547748792525188e-06,
      "loss": 0.0123,
      "step": 1674900
    },
    {
      "epoch": 2.741043315462514,
      "grad_norm": 0.12369026988744736,
      "learning_rate": 4.547682900311671e-06,
      "loss": 0.0135,
      "step": 1674920
    },
    {
      "epoch": 2.741076045901167,
      "grad_norm": 0.06052020937204361,
      "learning_rate": 4.5476170080981535e-06,
      "loss": 0.0097,
      "step": 1674940
    },
    {
      "epoch": 2.7411087763398205,
      "grad_norm": 0.1630319505929947,
      "learning_rate": 4.547551115884636e-06,
      "loss": 0.0168,
      "step": 1674960
    },
    {
      "epoch": 2.7411415067784737,
      "grad_norm": 0.17067451775074005,
      "learning_rate": 4.547485223671119e-06,
      "loss": 0.0129,
      "step": 1674980
    },
    {
      "epoch": 2.7411742372171273,
      "grad_norm": 0.1626671701669693,
      "learning_rate": 4.547419331457602e-06,
      "loss": 0.012,
      "step": 1675000
    },
    {
      "epoch": 2.7412069676557804,
      "grad_norm": 0.4080076217651367,
      "learning_rate": 4.547353439244085e-06,
      "loss": 0.0164,
      "step": 1675020
    },
    {
      "epoch": 2.741239698094434,
      "grad_norm": 0.3676077425479889,
      "learning_rate": 4.547287547030568e-06,
      "loss": 0.0104,
      "step": 1675040
    },
    {
      "epoch": 2.741272428533087,
      "grad_norm": 0.36158132553100586,
      "learning_rate": 4.547221654817051e-06,
      "loss": 0.0139,
      "step": 1675060
    },
    {
      "epoch": 2.7413051589717403,
      "grad_norm": 0.45492759346961975,
      "learning_rate": 4.5471557626035335e-06,
      "loss": 0.0109,
      "step": 1675080
    },
    {
      "epoch": 2.741337889410394,
      "grad_norm": 0.48958098888397217,
      "learning_rate": 4.547089870390016e-06,
      "loss": 0.0113,
      "step": 1675100
    },
    {
      "epoch": 2.741370619849047,
      "grad_norm": 0.3706951141357422,
      "learning_rate": 4.547023978176499e-06,
      "loss": 0.016,
      "step": 1675120
    },
    {
      "epoch": 2.7414033502877007,
      "grad_norm": 0.2126961499452591,
      "learning_rate": 4.546958085962982e-06,
      "loss": 0.0131,
      "step": 1675140
    },
    {
      "epoch": 2.741436080726354,
      "grad_norm": 0.18680405616760254,
      "learning_rate": 4.5468921937494645e-06,
      "loss": 0.0146,
      "step": 1675160
    },
    {
      "epoch": 2.7414688111650074,
      "grad_norm": 0.2667686343193054,
      "learning_rate": 4.546826301535947e-06,
      "loss": 0.0184,
      "step": 1675180
    },
    {
      "epoch": 2.7415015416036606,
      "grad_norm": 0.20505854487419128,
      "learning_rate": 4.546760409322431e-06,
      "loss": 0.013,
      "step": 1675200
    },
    {
      "epoch": 2.7415342720423137,
      "grad_norm": 0.8647220730781555,
      "learning_rate": 4.5466945171089135e-06,
      "loss": 0.0142,
      "step": 1675220
    },
    {
      "epoch": 2.7415670024809673,
      "grad_norm": 0.14076374471187592,
      "learning_rate": 4.546628624895396e-06,
      "loss": 0.0174,
      "step": 1675240
    },
    {
      "epoch": 2.7415997329196204,
      "grad_norm": 0.267214298248291,
      "learning_rate": 4.54656273268188e-06,
      "loss": 0.0209,
      "step": 1675260
    },
    {
      "epoch": 2.741632463358274,
      "grad_norm": 0.38516131043434143,
      "learning_rate": 4.546496840468363e-06,
      "loss": 0.013,
      "step": 1675280
    },
    {
      "epoch": 2.741665193796927,
      "grad_norm": 0.12151339650154114,
      "learning_rate": 4.546430948254845e-06,
      "loss": 0.0159,
      "step": 1675300
    },
    {
      "epoch": 2.741697924235581,
      "grad_norm": 0.2306685745716095,
      "learning_rate": 4.546365056041328e-06,
      "loss": 0.0133,
      "step": 1675320
    },
    {
      "epoch": 2.741730654674234,
      "grad_norm": 0.8580905795097351,
      "learning_rate": 4.546299163827811e-06,
      "loss": 0.0211,
      "step": 1675340
    },
    {
      "epoch": 2.741763385112887,
      "grad_norm": 0.7539815902709961,
      "learning_rate": 4.5462332716142936e-06,
      "loss": 0.0128,
      "step": 1675360
    },
    {
      "epoch": 2.7417961155515407,
      "grad_norm": 0.353192001581192,
      "learning_rate": 4.546167379400776e-06,
      "loss": 0.0118,
      "step": 1675380
    },
    {
      "epoch": 2.741828845990194,
      "grad_norm": 0.27006447315216064,
      "learning_rate": 4.546101487187259e-06,
      "loss": 0.0138,
      "step": 1675400
    },
    {
      "epoch": 2.7418615764288474,
      "grad_norm": 0.25723111629486084,
      "learning_rate": 4.546035594973743e-06,
      "loss": 0.02,
      "step": 1675420
    },
    {
      "epoch": 2.7418943068675006,
      "grad_norm": 0.09177602082490921,
      "learning_rate": 4.545969702760225e-06,
      "loss": 0.0149,
      "step": 1675440
    },
    {
      "epoch": 2.741927037306154,
      "grad_norm": 0.47549208998680115,
      "learning_rate": 4.545903810546708e-06,
      "loss": 0.0153,
      "step": 1675460
    },
    {
      "epoch": 2.7419597677448073,
      "grad_norm": 0.19518880546092987,
      "learning_rate": 4.545837918333191e-06,
      "loss": 0.027,
      "step": 1675480
    },
    {
      "epoch": 2.7419924981834605,
      "grad_norm": 0.35463929176330566,
      "learning_rate": 4.545772026119674e-06,
      "loss": 0.011,
      "step": 1675500
    },
    {
      "epoch": 2.742025228622114,
      "grad_norm": 0.304662823677063,
      "learning_rate": 4.545706133906156e-06,
      "loss": 0.0135,
      "step": 1675520
    },
    {
      "epoch": 2.742057959060767,
      "grad_norm": 0.09861606359481812,
      "learning_rate": 4.545640241692639e-06,
      "loss": 0.0202,
      "step": 1675540
    },
    {
      "epoch": 2.742090689499421,
      "grad_norm": 0.2487727403640747,
      "learning_rate": 4.545574349479123e-06,
      "loss": 0.013,
      "step": 1675560
    },
    {
      "epoch": 2.742123419938074,
      "grad_norm": 0.11537794023752213,
      "learning_rate": 4.545508457265605e-06,
      "loss": 0.0149,
      "step": 1675580
    },
    {
      "epoch": 2.7421561503767276,
      "grad_norm": 1.7787418365478516,
      "learning_rate": 4.545442565052088e-06,
      "loss": 0.0125,
      "step": 1675600
    },
    {
      "epoch": 2.7421888808153807,
      "grad_norm": 0.7408913969993591,
      "learning_rate": 4.545376672838571e-06,
      "loss": 0.0113,
      "step": 1675620
    },
    {
      "epoch": 2.742221611254034,
      "grad_norm": 0.45759686827659607,
      "learning_rate": 4.545310780625054e-06,
      "loss": 0.016,
      "step": 1675640
    },
    {
      "epoch": 2.7422543416926874,
      "grad_norm": 0.04199143499135971,
      "learning_rate": 4.545244888411537e-06,
      "loss": 0.0078,
      "step": 1675660
    },
    {
      "epoch": 2.7422870721313406,
      "grad_norm": 0.325214147567749,
      "learning_rate": 4.54517899619802e-06,
      "loss": 0.0168,
      "step": 1675680
    },
    {
      "epoch": 2.742319802569994,
      "grad_norm": 0.5308575630187988,
      "learning_rate": 4.545113103984503e-06,
      "loss": 0.0169,
      "step": 1675700
    },
    {
      "epoch": 2.7423525330086473,
      "grad_norm": 0.5643300414085388,
      "learning_rate": 4.5450472117709854e-06,
      "loss": 0.0092,
      "step": 1675720
    },
    {
      "epoch": 2.742385263447301,
      "grad_norm": 0.300834983587265,
      "learning_rate": 4.544981319557468e-06,
      "loss": 0.0094,
      "step": 1675740
    },
    {
      "epoch": 2.742417993885954,
      "grad_norm": 0.27513420581817627,
      "learning_rate": 4.544915427343951e-06,
      "loss": 0.0138,
      "step": 1675760
    },
    {
      "epoch": 2.7424507243246072,
      "grad_norm": 0.4297593832015991,
      "learning_rate": 4.544849535130434e-06,
      "loss": 0.0176,
      "step": 1675780
    },
    {
      "epoch": 2.742483454763261,
      "grad_norm": 0.2358742207288742,
      "learning_rate": 4.544783642916916e-06,
      "loss": 0.0135,
      "step": 1675800
    },
    {
      "epoch": 2.742516185201914,
      "grad_norm": 1.876705527305603,
      "learning_rate": 4.5447177507034e-06,
      "loss": 0.0146,
      "step": 1675820
    },
    {
      "epoch": 2.7425489156405676,
      "grad_norm": 0.290367990732193,
      "learning_rate": 4.544651858489883e-06,
      "loss": 0.0125,
      "step": 1675840
    },
    {
      "epoch": 2.7425816460792207,
      "grad_norm": 0.5624768733978271,
      "learning_rate": 4.5445859662763655e-06,
      "loss": 0.0146,
      "step": 1675860
    },
    {
      "epoch": 2.7426143765178743,
      "grad_norm": 3.4244225025177,
      "learning_rate": 4.544520074062848e-06,
      "loss": 0.0126,
      "step": 1675880
    },
    {
      "epoch": 2.7426471069565275,
      "grad_norm": 0.37002432346343994,
      "learning_rate": 4.544454181849331e-06,
      "loss": 0.0115,
      "step": 1675900
    },
    {
      "epoch": 2.7426798373951806,
      "grad_norm": 0.22618329524993896,
      "learning_rate": 4.5443882896358145e-06,
      "loss": 0.0139,
      "step": 1675920
    },
    {
      "epoch": 2.742712567833834,
      "grad_norm": 0.3191666901111603,
      "learning_rate": 4.544322397422297e-06,
      "loss": 0.0115,
      "step": 1675940
    },
    {
      "epoch": 2.7427452982724874,
      "grad_norm": 0.41456782817840576,
      "learning_rate": 4.54425650520878e-06,
      "loss": 0.013,
      "step": 1675960
    },
    {
      "epoch": 2.7427780287111405,
      "grad_norm": 0.4013894498348236,
      "learning_rate": 4.544190612995263e-06,
      "loss": 0.0131,
      "step": 1675980
    },
    {
      "epoch": 2.742810759149794,
      "grad_norm": 0.31370434165000916,
      "learning_rate": 4.5441247207817455e-06,
      "loss": 0.0139,
      "step": 1676000
    },
    {
      "epoch": 2.7428434895884477,
      "grad_norm": 0.24439874291419983,
      "learning_rate": 4.544058828568228e-06,
      "loss": 0.0179,
      "step": 1676020
    },
    {
      "epoch": 2.742876220027101,
      "grad_norm": 0.1495303511619568,
      "learning_rate": 4.543992936354712e-06,
      "loss": 0.0087,
      "step": 1676040
    },
    {
      "epoch": 2.742908950465754,
      "grad_norm": 0.07230762392282486,
      "learning_rate": 4.5439270441411945e-06,
      "loss": 0.0136,
      "step": 1676060
    },
    {
      "epoch": 2.7429416809044076,
      "grad_norm": 0.7336921095848083,
      "learning_rate": 4.543861151927677e-06,
      "loss": 0.016,
      "step": 1676080
    },
    {
      "epoch": 2.7429744113430607,
      "grad_norm": 0.1598481386899948,
      "learning_rate": 4.54379525971416e-06,
      "loss": 0.0097,
      "step": 1676100
    },
    {
      "epoch": 2.743007141781714,
      "grad_norm": 0.5318775773048401,
      "learning_rate": 4.543729367500643e-06,
      "loss": 0.0118,
      "step": 1676120
    },
    {
      "epoch": 2.7430398722203675,
      "grad_norm": 0.22887249290943146,
      "learning_rate": 4.5436634752871255e-06,
      "loss": 0.0106,
      "step": 1676140
    },
    {
      "epoch": 2.743072602659021,
      "grad_norm": 0.05363244563341141,
      "learning_rate": 4.543597583073608e-06,
      "loss": 0.0136,
      "step": 1676160
    },
    {
      "epoch": 2.7431053330976742,
      "grad_norm": 0.17874984443187714,
      "learning_rate": 4.543531690860091e-06,
      "loss": 0.0141,
      "step": 1676180
    },
    {
      "epoch": 2.7431380635363274,
      "grad_norm": 0.3398616909980774,
      "learning_rate": 4.543465798646574e-06,
      "loss": 0.0104,
      "step": 1676200
    },
    {
      "epoch": 2.743170793974981,
      "grad_norm": 1.041994571685791,
      "learning_rate": 4.543399906433057e-06,
      "loss": 0.017,
      "step": 1676220
    },
    {
      "epoch": 2.743203524413634,
      "grad_norm": 0.2709396779537201,
      "learning_rate": 4.54333401421954e-06,
      "loss": 0.0121,
      "step": 1676240
    },
    {
      "epoch": 2.7432362548522873,
      "grad_norm": 0.29389676451683044,
      "learning_rate": 4.543268122006023e-06,
      "loss": 0.0103,
      "step": 1676260
    },
    {
      "epoch": 2.743268985290941,
      "grad_norm": 0.839788019657135,
      "learning_rate": 4.543202229792506e-06,
      "loss": 0.0105,
      "step": 1676280
    },
    {
      "epoch": 2.7433017157295945,
      "grad_norm": 0.6412293910980225,
      "learning_rate": 4.543136337578989e-06,
      "loss": 0.0123,
      "step": 1676300
    },
    {
      "epoch": 2.7433344461682476,
      "grad_norm": 0.40806642174720764,
      "learning_rate": 4.543070445365472e-06,
      "loss": 0.0117,
      "step": 1676320
    },
    {
      "epoch": 2.7433671766069008,
      "grad_norm": 0.7235975861549377,
      "learning_rate": 4.543004553151955e-06,
      "loss": 0.018,
      "step": 1676340
    },
    {
      "epoch": 2.7433999070455544,
      "grad_norm": 0.26140981912612915,
      "learning_rate": 4.542938660938437e-06,
      "loss": 0.0131,
      "step": 1676360
    },
    {
      "epoch": 2.7434326374842075,
      "grad_norm": 0.24891726672649384,
      "learning_rate": 4.54287276872492e-06,
      "loss": 0.0161,
      "step": 1676380
    },
    {
      "epoch": 2.7434653679228607,
      "grad_norm": 0.06658630818128586,
      "learning_rate": 4.542806876511403e-06,
      "loss": 0.0149,
      "step": 1676400
    },
    {
      "epoch": 2.7434980983615143,
      "grad_norm": 0.40580812096595764,
      "learning_rate": 4.5427409842978856e-06,
      "loss": 0.0094,
      "step": 1676420
    },
    {
      "epoch": 2.743530828800168,
      "grad_norm": 0.4809751808643341,
      "learning_rate": 4.542675092084369e-06,
      "loss": 0.0184,
      "step": 1676440
    },
    {
      "epoch": 2.743563559238821,
      "grad_norm": 0.18571043014526367,
      "learning_rate": 4.542609199870852e-06,
      "loss": 0.0116,
      "step": 1676460
    },
    {
      "epoch": 2.743596289677474,
      "grad_norm": 0.44173097610473633,
      "learning_rate": 4.542543307657335e-06,
      "loss": 0.0137,
      "step": 1676480
    },
    {
      "epoch": 2.7436290201161277,
      "grad_norm": 0.1182689517736435,
      "learning_rate": 4.542477415443817e-06,
      "loss": 0.0075,
      "step": 1676500
    },
    {
      "epoch": 2.743661750554781,
      "grad_norm": 0.2910636365413666,
      "learning_rate": 4.5424115232303e-06,
      "loss": 0.0153,
      "step": 1676520
    },
    {
      "epoch": 2.743694480993434,
      "grad_norm": 0.5821791887283325,
      "learning_rate": 4.542345631016783e-06,
      "loss": 0.0158,
      "step": 1676540
    },
    {
      "epoch": 2.7437272114320876,
      "grad_norm": 0.23793618381023407,
      "learning_rate": 4.542279738803266e-06,
      "loss": 0.0141,
      "step": 1676560
    },
    {
      "epoch": 2.743759941870741,
      "grad_norm": 0.15085835754871368,
      "learning_rate": 4.542213846589748e-06,
      "loss": 0.0124,
      "step": 1676580
    },
    {
      "epoch": 2.7437926723093944,
      "grad_norm": 0.22814834117889404,
      "learning_rate": 4.542147954376231e-06,
      "loss": 0.0124,
      "step": 1676600
    },
    {
      "epoch": 2.7438254027480475,
      "grad_norm": 0.47844719886779785,
      "learning_rate": 4.542082062162715e-06,
      "loss": 0.0098,
      "step": 1676620
    },
    {
      "epoch": 2.743858133186701,
      "grad_norm": 0.126280277967453,
      "learning_rate": 4.542016169949197e-06,
      "loss": 0.0109,
      "step": 1676640
    },
    {
      "epoch": 2.7438908636253543,
      "grad_norm": 0.2120143473148346,
      "learning_rate": 4.54195027773568e-06,
      "loss": 0.0134,
      "step": 1676660
    },
    {
      "epoch": 2.7439235940640074,
      "grad_norm": 0.7459450364112854,
      "learning_rate": 4.541884385522164e-06,
      "loss": 0.0191,
      "step": 1676680
    },
    {
      "epoch": 2.743956324502661,
      "grad_norm": 0.47522762417793274,
      "learning_rate": 4.5418184933086465e-06,
      "loss": 0.0104,
      "step": 1676700
    },
    {
      "epoch": 2.743989054941314,
      "grad_norm": 0.6845769882202148,
      "learning_rate": 4.541752601095129e-06,
      "loss": 0.0149,
      "step": 1676720
    },
    {
      "epoch": 2.7440217853799678,
      "grad_norm": 0.15202026069164276,
      "learning_rate": 4.541686708881612e-06,
      "loss": 0.0166,
      "step": 1676740
    },
    {
      "epoch": 2.744054515818621,
      "grad_norm": 0.25838547945022583,
      "learning_rate": 4.541620816668095e-06,
      "loss": 0.0118,
      "step": 1676760
    },
    {
      "epoch": 2.7440872462572745,
      "grad_norm": 0.8257812857627869,
      "learning_rate": 4.541554924454577e-06,
      "loss": 0.0135,
      "step": 1676780
    },
    {
      "epoch": 2.7441199766959277,
      "grad_norm": 0.3249654471874237,
      "learning_rate": 4.54148903224106e-06,
      "loss": 0.0156,
      "step": 1676800
    },
    {
      "epoch": 2.744152707134581,
      "grad_norm": 0.42723020911216736,
      "learning_rate": 4.541423140027543e-06,
      "loss": 0.0169,
      "step": 1676820
    },
    {
      "epoch": 2.7441854375732344,
      "grad_norm": 0.1760592758655548,
      "learning_rate": 4.5413572478140265e-06,
      "loss": 0.0229,
      "step": 1676840
    },
    {
      "epoch": 2.7442181680118876,
      "grad_norm": 0.32376739382743835,
      "learning_rate": 4.541291355600509e-06,
      "loss": 0.0155,
      "step": 1676860
    },
    {
      "epoch": 2.744250898450541,
      "grad_norm": 0.6990559697151184,
      "learning_rate": 4.541225463386992e-06,
      "loss": 0.0192,
      "step": 1676880
    },
    {
      "epoch": 2.7442836288891943,
      "grad_norm": 0.41257140040397644,
      "learning_rate": 4.541159571173475e-06,
      "loss": 0.0132,
      "step": 1676900
    },
    {
      "epoch": 2.744316359327848,
      "grad_norm": 0.419030100107193,
      "learning_rate": 4.5410936789599574e-06,
      "loss": 0.0167,
      "step": 1676920
    },
    {
      "epoch": 2.744349089766501,
      "grad_norm": 0.16597288846969604,
      "learning_rate": 4.54102778674644e-06,
      "loss": 0.0114,
      "step": 1676940
    },
    {
      "epoch": 2.744381820205154,
      "grad_norm": 0.14397567510604858,
      "learning_rate": 4.540961894532924e-06,
      "loss": 0.0157,
      "step": 1676960
    },
    {
      "epoch": 2.744414550643808,
      "grad_norm": 0.13702234625816345,
      "learning_rate": 4.5408960023194065e-06,
      "loss": 0.0174,
      "step": 1676980
    },
    {
      "epoch": 2.744447281082461,
      "grad_norm": 0.0659317672252655,
      "learning_rate": 4.540830110105889e-06,
      "loss": 0.0141,
      "step": 1677000
    },
    {
      "epoch": 2.7444800115211145,
      "grad_norm": 0.12977388501167297,
      "learning_rate": 4.540764217892372e-06,
      "loss": 0.0179,
      "step": 1677020
    },
    {
      "epoch": 2.7445127419597677,
      "grad_norm": 1.1263781785964966,
      "learning_rate": 4.540698325678855e-06,
      "loss": 0.0165,
      "step": 1677040
    },
    {
      "epoch": 2.7445454723984213,
      "grad_norm": 0.43961888551712036,
      "learning_rate": 4.540632433465338e-06,
      "loss": 0.0079,
      "step": 1677060
    },
    {
      "epoch": 2.7445782028370744,
      "grad_norm": 0.16074277460575104,
      "learning_rate": 4.540566541251821e-06,
      "loss": 0.0084,
      "step": 1677080
    },
    {
      "epoch": 2.7446109332757276,
      "grad_norm": 0.5447452664375305,
      "learning_rate": 4.540500649038304e-06,
      "loss": 0.0183,
      "step": 1677100
    },
    {
      "epoch": 2.744643663714381,
      "grad_norm": 0.28047803044319153,
      "learning_rate": 4.5404347568247865e-06,
      "loss": 0.0193,
      "step": 1677120
    },
    {
      "epoch": 2.7446763941530343,
      "grad_norm": 0.11383621394634247,
      "learning_rate": 4.540368864611269e-06,
      "loss": 0.0183,
      "step": 1677140
    },
    {
      "epoch": 2.744709124591688,
      "grad_norm": 0.16361142694950104,
      "learning_rate": 4.540302972397752e-06,
      "loss": 0.0164,
      "step": 1677160
    },
    {
      "epoch": 2.744741855030341,
      "grad_norm": 0.11928796768188477,
      "learning_rate": 4.540237080184235e-06,
      "loss": 0.0113,
      "step": 1677180
    },
    {
      "epoch": 2.7447745854689947,
      "grad_norm": 0.32671695947647095,
      "learning_rate": 4.5401711879707175e-06,
      "loss": 0.0153,
      "step": 1677200
    },
    {
      "epoch": 2.744807315907648,
      "grad_norm": 0.5167746543884277,
      "learning_rate": 4.5401052957572e-06,
      "loss": 0.0204,
      "step": 1677220
    },
    {
      "epoch": 2.744840046346301,
      "grad_norm": 0.24161691963672638,
      "learning_rate": 4.540039403543684e-06,
      "loss": 0.0146,
      "step": 1677240
    },
    {
      "epoch": 2.7448727767849546,
      "grad_norm": 0.12109765410423279,
      "learning_rate": 4.5399735113301666e-06,
      "loss": 0.0091,
      "step": 1677260
    },
    {
      "epoch": 2.7449055072236077,
      "grad_norm": 0.29424968361854553,
      "learning_rate": 4.539907619116649e-06,
      "loss": 0.0109,
      "step": 1677280
    },
    {
      "epoch": 2.7449382376622613,
      "grad_norm": 0.8088173270225525,
      "learning_rate": 4.539841726903132e-06,
      "loss": 0.0112,
      "step": 1677300
    },
    {
      "epoch": 2.7449709681009145,
      "grad_norm": 0.5970495939254761,
      "learning_rate": 4.539775834689616e-06,
      "loss": 0.0205,
      "step": 1677320
    },
    {
      "epoch": 2.745003698539568,
      "grad_norm": 0.41007301211357117,
      "learning_rate": 4.539709942476098e-06,
      "loss": 0.0083,
      "step": 1677340
    },
    {
      "epoch": 2.745036428978221,
      "grad_norm": 0.2566472887992859,
      "learning_rate": 4.539644050262581e-06,
      "loss": 0.0131,
      "step": 1677360
    },
    {
      "epoch": 2.7450691594168743,
      "grad_norm": 0.18255040049552917,
      "learning_rate": 4.539578158049064e-06,
      "loss": 0.007,
      "step": 1677380
    },
    {
      "epoch": 2.745101889855528,
      "grad_norm": 0.22633135318756104,
      "learning_rate": 4.539512265835547e-06,
      "loss": 0.0149,
      "step": 1677400
    },
    {
      "epoch": 2.745134620294181,
      "grad_norm": 1.2458906173706055,
      "learning_rate": 4.539446373622029e-06,
      "loss": 0.0221,
      "step": 1677420
    },
    {
      "epoch": 2.7451673507328347,
      "grad_norm": 0.2910265326499939,
      "learning_rate": 4.539380481408512e-06,
      "loss": 0.0177,
      "step": 1677440
    },
    {
      "epoch": 2.745200081171488,
      "grad_norm": 1.2613669633865356,
      "learning_rate": 4.539314589194996e-06,
      "loss": 0.0148,
      "step": 1677460
    },
    {
      "epoch": 2.7452328116101414,
      "grad_norm": 0.41928085684776306,
      "learning_rate": 4.539248696981478e-06,
      "loss": 0.0143,
      "step": 1677480
    },
    {
      "epoch": 2.7452655420487946,
      "grad_norm": 0.19758759438991547,
      "learning_rate": 4.539182804767961e-06,
      "loss": 0.0182,
      "step": 1677500
    },
    {
      "epoch": 2.7452982724874477,
      "grad_norm": 0.26369428634643555,
      "learning_rate": 4.539116912554444e-06,
      "loss": 0.0113,
      "step": 1677520
    },
    {
      "epoch": 2.7453310029261013,
      "grad_norm": 0.39986369013786316,
      "learning_rate": 4.539051020340927e-06,
      "loss": 0.0094,
      "step": 1677540
    },
    {
      "epoch": 2.7453637333647545,
      "grad_norm": 0.6630224585533142,
      "learning_rate": 4.538985128127409e-06,
      "loss": 0.0121,
      "step": 1677560
    },
    {
      "epoch": 2.7453964638034076,
      "grad_norm": 0.32473477721214294,
      "learning_rate": 4.538919235913892e-06,
      "loss": 0.0176,
      "step": 1677580
    },
    {
      "epoch": 2.745429194242061,
      "grad_norm": 0.13230514526367188,
      "learning_rate": 4.538853343700375e-06,
      "loss": 0.0113,
      "step": 1677600
    },
    {
      "epoch": 2.745461924680715,
      "grad_norm": 0.48717376589775085,
      "learning_rate": 4.5387874514868576e-06,
      "loss": 0.012,
      "step": 1677620
    },
    {
      "epoch": 2.745494655119368,
      "grad_norm": 0.5093438625335693,
      "learning_rate": 4.538721559273341e-06,
      "loss": 0.0147,
      "step": 1677640
    },
    {
      "epoch": 2.745527385558021,
      "grad_norm": 0.7010465860366821,
      "learning_rate": 4.538655667059824e-06,
      "loss": 0.0164,
      "step": 1677660
    },
    {
      "epoch": 2.7455601159966747,
      "grad_norm": 0.5188513994216919,
      "learning_rate": 4.538589774846307e-06,
      "loss": 0.0117,
      "step": 1677680
    },
    {
      "epoch": 2.745592846435328,
      "grad_norm": 0.2735273540019989,
      "learning_rate": 4.53852388263279e-06,
      "loss": 0.0232,
      "step": 1677700
    },
    {
      "epoch": 2.745625576873981,
      "grad_norm": 1.0022903680801392,
      "learning_rate": 4.538457990419273e-06,
      "loss": 0.0124,
      "step": 1677720
    },
    {
      "epoch": 2.7456583073126346,
      "grad_norm": 0.15475283563137054,
      "learning_rate": 4.538392098205756e-06,
      "loss": 0.0162,
      "step": 1677740
    },
    {
      "epoch": 2.745691037751288,
      "grad_norm": 0.5801966190338135,
      "learning_rate": 4.5383262059922384e-06,
      "loss": 0.02,
      "step": 1677760
    },
    {
      "epoch": 2.7457237681899413,
      "grad_norm": 2.2210440635681152,
      "learning_rate": 4.538260313778721e-06,
      "loss": 0.017,
      "step": 1677780
    },
    {
      "epoch": 2.7457564986285945,
      "grad_norm": 1.1950039863586426,
      "learning_rate": 4.538194421565204e-06,
      "loss": 0.016,
      "step": 1677800
    },
    {
      "epoch": 2.745789229067248,
      "grad_norm": 0.5535786151885986,
      "learning_rate": 4.538128529351687e-06,
      "loss": 0.0169,
      "step": 1677820
    },
    {
      "epoch": 2.7458219595059012,
      "grad_norm": 0.26816561818122864,
      "learning_rate": 4.538062637138169e-06,
      "loss": 0.0168,
      "step": 1677840
    },
    {
      "epoch": 2.7458546899445544,
      "grad_norm": 0.4729710817337036,
      "learning_rate": 4.537996744924653e-06,
      "loss": 0.0135,
      "step": 1677860
    },
    {
      "epoch": 2.745887420383208,
      "grad_norm": 0.5648012757301331,
      "learning_rate": 4.537930852711136e-06,
      "loss": 0.0167,
      "step": 1677880
    },
    {
      "epoch": 2.7459201508218616,
      "grad_norm": 0.24115826189517975,
      "learning_rate": 4.5378649604976185e-06,
      "loss": 0.0148,
      "step": 1677900
    },
    {
      "epoch": 2.7459528812605147,
      "grad_norm": 0.6332328915596008,
      "learning_rate": 4.537799068284101e-06,
      "loss": 0.0115,
      "step": 1677920
    },
    {
      "epoch": 2.745985611699168,
      "grad_norm": 0.518798828125,
      "learning_rate": 4.537733176070584e-06,
      "loss": 0.0112,
      "step": 1677940
    },
    {
      "epoch": 2.7460183421378215,
      "grad_norm": 0.6186667084693909,
      "learning_rate": 4.537667283857067e-06,
      "loss": 0.0136,
      "step": 1677960
    },
    {
      "epoch": 2.7460510725764746,
      "grad_norm": 0.2571561634540558,
      "learning_rate": 4.537601391643549e-06,
      "loss": 0.0115,
      "step": 1677980
    },
    {
      "epoch": 2.7460838030151278,
      "grad_norm": 0.26478973031044006,
      "learning_rate": 4.537535499430032e-06,
      "loss": 0.0205,
      "step": 1678000
    },
    {
      "epoch": 2.7461165334537814,
      "grad_norm": 0.26334837079048157,
      "learning_rate": 4.537469607216516e-06,
      "loss": 0.0232,
      "step": 1678020
    },
    {
      "epoch": 2.7461492638924345,
      "grad_norm": 0.3740849494934082,
      "learning_rate": 4.5374037150029985e-06,
      "loss": 0.0119,
      "step": 1678040
    },
    {
      "epoch": 2.746181994331088,
      "grad_norm": 0.9279129505157471,
      "learning_rate": 4.537337822789481e-06,
      "loss": 0.0218,
      "step": 1678060
    },
    {
      "epoch": 2.7462147247697413,
      "grad_norm": 0.6300792098045349,
      "learning_rate": 4.537271930575964e-06,
      "loss": 0.0107,
      "step": 1678080
    },
    {
      "epoch": 2.746247455208395,
      "grad_norm": 0.3040562868118286,
      "learning_rate": 4.5372060383624476e-06,
      "loss": 0.0127,
      "step": 1678100
    },
    {
      "epoch": 2.746280185647048,
      "grad_norm": 0.3126204013824463,
      "learning_rate": 4.53714014614893e-06,
      "loss": 0.0137,
      "step": 1678120
    },
    {
      "epoch": 2.746312916085701,
      "grad_norm": 0.9710485935211182,
      "learning_rate": 4.537074253935413e-06,
      "loss": 0.0148,
      "step": 1678140
    },
    {
      "epoch": 2.7463456465243548,
      "grad_norm": 0.3270074427127838,
      "learning_rate": 4.537008361721896e-06,
      "loss": 0.0134,
      "step": 1678160
    },
    {
      "epoch": 2.746378376963008,
      "grad_norm": 0.03747757524251938,
      "learning_rate": 4.5369424695083785e-06,
      "loss": 0.0134,
      "step": 1678180
    },
    {
      "epoch": 2.7464111074016615,
      "grad_norm": 0.13809338212013245,
      "learning_rate": 4.536876577294861e-06,
      "loss": 0.0096,
      "step": 1678200
    },
    {
      "epoch": 2.7464438378403147,
      "grad_norm": 0.9062871932983398,
      "learning_rate": 4.536810685081344e-06,
      "loss": 0.0127,
      "step": 1678220
    },
    {
      "epoch": 2.7464765682789682,
      "grad_norm": 0.07601562887430191,
      "learning_rate": 4.536744792867827e-06,
      "loss": 0.0121,
      "step": 1678240
    },
    {
      "epoch": 2.7465092987176214,
      "grad_norm": 0.29845646023750305,
      "learning_rate": 4.53667890065431e-06,
      "loss": 0.0112,
      "step": 1678260
    },
    {
      "epoch": 2.7465420291562745,
      "grad_norm": 0.42360803484916687,
      "learning_rate": 4.536613008440793e-06,
      "loss": 0.019,
      "step": 1678280
    },
    {
      "epoch": 2.746574759594928,
      "grad_norm": 0.6333705186843872,
      "learning_rate": 4.536547116227276e-06,
      "loss": 0.0148,
      "step": 1678300
    },
    {
      "epoch": 2.7466074900335813,
      "grad_norm": 0.6334775686264038,
      "learning_rate": 4.5364812240137585e-06,
      "loss": 0.0176,
      "step": 1678320
    },
    {
      "epoch": 2.746640220472235,
      "grad_norm": 0.17745043337345123,
      "learning_rate": 4.536415331800241e-06,
      "loss": 0.0155,
      "step": 1678340
    },
    {
      "epoch": 2.746672950910888,
      "grad_norm": 0.5387029647827148,
      "learning_rate": 4.536349439586724e-06,
      "loss": 0.0131,
      "step": 1678360
    },
    {
      "epoch": 2.7467056813495416,
      "grad_norm": 0.4098975956439972,
      "learning_rate": 4.536283547373208e-06,
      "loss": 0.0157,
      "step": 1678380
    },
    {
      "epoch": 2.746738411788195,
      "grad_norm": 0.09330559521913528,
      "learning_rate": 4.53621765515969e-06,
      "loss": 0.0103,
      "step": 1678400
    },
    {
      "epoch": 2.746771142226848,
      "grad_norm": 0.9687848687171936,
      "learning_rate": 4.536151762946173e-06,
      "loss": 0.0152,
      "step": 1678420
    },
    {
      "epoch": 2.7468038726655015,
      "grad_norm": 0.1343267560005188,
      "learning_rate": 4.536085870732656e-06,
      "loss": 0.0194,
      "step": 1678440
    },
    {
      "epoch": 2.7468366031041547,
      "grad_norm": 0.30610018968582153,
      "learning_rate": 4.5360199785191386e-06,
      "loss": 0.0146,
      "step": 1678460
    },
    {
      "epoch": 2.7468693335428083,
      "grad_norm": 0.32414454221725464,
      "learning_rate": 4.535954086305622e-06,
      "loss": 0.0128,
      "step": 1678480
    },
    {
      "epoch": 2.7469020639814614,
      "grad_norm": 0.5266018509864807,
      "learning_rate": 4.535888194092105e-06,
      "loss": 0.0129,
      "step": 1678500
    },
    {
      "epoch": 2.746934794420115,
      "grad_norm": 0.5935274958610535,
      "learning_rate": 4.535822301878588e-06,
      "loss": 0.0138,
      "step": 1678520
    },
    {
      "epoch": 2.746967524858768,
      "grad_norm": 0.3114149272441864,
      "learning_rate": 4.53575640966507e-06,
      "loss": 0.0078,
      "step": 1678540
    },
    {
      "epoch": 2.7470002552974213,
      "grad_norm": 0.6122803092002869,
      "learning_rate": 4.535690517451553e-06,
      "loss": 0.0101,
      "step": 1678560
    },
    {
      "epoch": 2.747032985736075,
      "grad_norm": 1.110734224319458,
      "learning_rate": 4.535624625238036e-06,
      "loss": 0.0102,
      "step": 1678580
    },
    {
      "epoch": 2.747065716174728,
      "grad_norm": 0.3425905406475067,
      "learning_rate": 4.535558733024519e-06,
      "loss": 0.0103,
      "step": 1678600
    },
    {
      "epoch": 2.7470984466133817,
      "grad_norm": 0.4649151563644409,
      "learning_rate": 4.535492840811001e-06,
      "loss": 0.012,
      "step": 1678620
    },
    {
      "epoch": 2.747131177052035,
      "grad_norm": 0.4740421772003174,
      "learning_rate": 4.535426948597484e-06,
      "loss": 0.0109,
      "step": 1678640
    },
    {
      "epoch": 2.7471639074906884,
      "grad_norm": 0.2374841272830963,
      "learning_rate": 4.535361056383968e-06,
      "loss": 0.0164,
      "step": 1678660
    },
    {
      "epoch": 2.7471966379293415,
      "grad_norm": 0.23479922115802765,
      "learning_rate": 4.53529516417045e-06,
      "loss": 0.0112,
      "step": 1678680
    },
    {
      "epoch": 2.7472293683679947,
      "grad_norm": 0.41346174478530884,
      "learning_rate": 4.535229271956933e-06,
      "loss": 0.0147,
      "step": 1678700
    },
    {
      "epoch": 2.7472620988066483,
      "grad_norm": 1.002856731414795,
      "learning_rate": 4.535163379743417e-06,
      "loss": 0.0168,
      "step": 1678720
    },
    {
      "epoch": 2.7472948292453014,
      "grad_norm": 0.2389204055070877,
      "learning_rate": 4.5350974875298995e-06,
      "loss": 0.0119,
      "step": 1678740
    },
    {
      "epoch": 2.747327559683955,
      "grad_norm": 0.3619319498538971,
      "learning_rate": 4.535031595316382e-06,
      "loss": 0.0121,
      "step": 1678760
    },
    {
      "epoch": 2.747360290122608,
      "grad_norm": 0.6378710865974426,
      "learning_rate": 4.534965703102865e-06,
      "loss": 0.0145,
      "step": 1678780
    },
    {
      "epoch": 2.747393020561262,
      "grad_norm": 0.3380245268344879,
      "learning_rate": 4.534899810889348e-06,
      "loss": 0.0106,
      "step": 1678800
    },
    {
      "epoch": 2.747425750999915,
      "grad_norm": 1.1149625778198242,
      "learning_rate": 4.53483391867583e-06,
      "loss": 0.0223,
      "step": 1678820
    },
    {
      "epoch": 2.747458481438568,
      "grad_norm": 0.24080051481723785,
      "learning_rate": 4.534768026462313e-06,
      "loss": 0.0119,
      "step": 1678840
    },
    {
      "epoch": 2.7474912118772217,
      "grad_norm": 0.0991455689072609,
      "learning_rate": 4.534702134248796e-06,
      "loss": 0.0264,
      "step": 1678860
    },
    {
      "epoch": 2.747523942315875,
      "grad_norm": 0.11573520302772522,
      "learning_rate": 4.5346362420352795e-06,
      "loss": 0.0134,
      "step": 1678880
    },
    {
      "epoch": 2.7475566727545284,
      "grad_norm": 0.5090286731719971,
      "learning_rate": 4.534570349821762e-06,
      "loss": 0.0079,
      "step": 1678900
    },
    {
      "epoch": 2.7475894031931816,
      "grad_norm": 0.2108173370361328,
      "learning_rate": 4.534504457608245e-06,
      "loss": 0.0105,
      "step": 1678920
    },
    {
      "epoch": 2.747622133631835,
      "grad_norm": 0.3368549346923828,
      "learning_rate": 4.534438565394728e-06,
      "loss": 0.0195,
      "step": 1678940
    },
    {
      "epoch": 2.7476548640704883,
      "grad_norm": 0.213376984000206,
      "learning_rate": 4.5343726731812104e-06,
      "loss": 0.0129,
      "step": 1678960
    },
    {
      "epoch": 2.7476875945091415,
      "grad_norm": 0.14073112607002258,
      "learning_rate": 4.534306780967693e-06,
      "loss": 0.0111,
      "step": 1678980
    },
    {
      "epoch": 2.747720324947795,
      "grad_norm": 0.32665127515792847,
      "learning_rate": 4.534240888754176e-06,
      "loss": 0.0129,
      "step": 1679000
    },
    {
      "epoch": 2.747753055386448,
      "grad_norm": 0.21855197846889496,
      "learning_rate": 4.534174996540659e-06,
      "loss": 0.0106,
      "step": 1679020
    },
    {
      "epoch": 2.7477857858251014,
      "grad_norm": 0.6616761088371277,
      "learning_rate": 4.534109104327141e-06,
      "loss": 0.0141,
      "step": 1679040
    },
    {
      "epoch": 2.747818516263755,
      "grad_norm": 0.22823667526245117,
      "learning_rate": 4.534043212113625e-06,
      "loss": 0.0095,
      "step": 1679060
    },
    {
      "epoch": 2.7478512467024085,
      "grad_norm": 0.6047981381416321,
      "learning_rate": 4.533977319900108e-06,
      "loss": 0.0113,
      "step": 1679080
    },
    {
      "epoch": 2.7478839771410617,
      "grad_norm": 0.23489879071712494,
      "learning_rate": 4.5339114276865905e-06,
      "loss": 0.0136,
      "step": 1679100
    },
    {
      "epoch": 2.747916707579715,
      "grad_norm": 0.3141252100467682,
      "learning_rate": 4.533845535473074e-06,
      "loss": 0.0172,
      "step": 1679120
    },
    {
      "epoch": 2.7479494380183684,
      "grad_norm": 0.6853723526000977,
      "learning_rate": 4.533779643259557e-06,
      "loss": 0.0199,
      "step": 1679140
    },
    {
      "epoch": 2.7479821684570216,
      "grad_norm": 0.47141069173812866,
      "learning_rate": 4.5337137510460395e-06,
      "loss": 0.0145,
      "step": 1679160
    },
    {
      "epoch": 2.7480148988956747,
      "grad_norm": 0.5149279832839966,
      "learning_rate": 4.533647858832522e-06,
      "loss": 0.0176,
      "step": 1679180
    },
    {
      "epoch": 2.7480476293343283,
      "grad_norm": 0.18099257349967957,
      "learning_rate": 4.533581966619005e-06,
      "loss": 0.0148,
      "step": 1679200
    },
    {
      "epoch": 2.748080359772982,
      "grad_norm": 0.6089685559272766,
      "learning_rate": 4.533516074405488e-06,
      "loss": 0.0184,
      "step": 1679220
    },
    {
      "epoch": 2.748113090211635,
      "grad_norm": 0.13185150921344757,
      "learning_rate": 4.5334501821919705e-06,
      "loss": 0.0091,
      "step": 1679240
    },
    {
      "epoch": 2.7481458206502882,
      "grad_norm": 0.5178914666175842,
      "learning_rate": 4.533384289978453e-06,
      "loss": 0.0137,
      "step": 1679260
    },
    {
      "epoch": 2.748178551088942,
      "grad_norm": 0.6248553395271301,
      "learning_rate": 4.533318397764937e-06,
      "loss": 0.011,
      "step": 1679280
    },
    {
      "epoch": 2.748211281527595,
      "grad_norm": 0.2298431545495987,
      "learning_rate": 4.5332525055514196e-06,
      "loss": 0.0162,
      "step": 1679300
    },
    {
      "epoch": 2.748244011966248,
      "grad_norm": 0.19650042057037354,
      "learning_rate": 4.533186613337902e-06,
      "loss": 0.0121,
      "step": 1679320
    },
    {
      "epoch": 2.7482767424049017,
      "grad_norm": 0.1512049436569214,
      "learning_rate": 4.533120721124385e-06,
      "loss": 0.0079,
      "step": 1679340
    },
    {
      "epoch": 2.7483094728435553,
      "grad_norm": 0.8119640350341797,
      "learning_rate": 4.533054828910868e-06,
      "loss": 0.0176,
      "step": 1679360
    },
    {
      "epoch": 2.7483422032822085,
      "grad_norm": 0.22439448535442352,
      "learning_rate": 4.5329889366973505e-06,
      "loss": 0.0155,
      "step": 1679380
    },
    {
      "epoch": 2.7483749337208616,
      "grad_norm": 0.12375286966562271,
      "learning_rate": 4.532923044483833e-06,
      "loss": 0.011,
      "step": 1679400
    },
    {
      "epoch": 2.748407664159515,
      "grad_norm": 0.26636552810668945,
      "learning_rate": 4.532857152270317e-06,
      "loss": 0.0192,
      "step": 1679420
    },
    {
      "epoch": 2.7484403945981684,
      "grad_norm": 0.16998617351055145,
      "learning_rate": 4.5327912600568e-06,
      "loss": 0.0123,
      "step": 1679440
    },
    {
      "epoch": 2.7484731250368215,
      "grad_norm": 0.2720465362071991,
      "learning_rate": 4.532725367843282e-06,
      "loss": 0.0102,
      "step": 1679460
    },
    {
      "epoch": 2.748505855475475,
      "grad_norm": 0.39466819167137146,
      "learning_rate": 4.532659475629765e-06,
      "loss": 0.0112,
      "step": 1679480
    },
    {
      "epoch": 2.7485385859141287,
      "grad_norm": 0.5046117901802063,
      "learning_rate": 4.532593583416248e-06,
      "loss": 0.0167,
      "step": 1679500
    },
    {
      "epoch": 2.748571316352782,
      "grad_norm": 0.285025417804718,
      "learning_rate": 4.532527691202731e-06,
      "loss": 0.0154,
      "step": 1679520
    },
    {
      "epoch": 2.748604046791435,
      "grad_norm": 0.3011811077594757,
      "learning_rate": 4.532461798989214e-06,
      "loss": 0.0129,
      "step": 1679540
    },
    {
      "epoch": 2.7486367772300886,
      "grad_norm": 0.19882319867610931,
      "learning_rate": 4.532395906775697e-06,
      "loss": 0.0202,
      "step": 1679560
    },
    {
      "epoch": 2.7486695076687417,
      "grad_norm": 0.3087758421897888,
      "learning_rate": 4.53233001456218e-06,
      "loss": 0.0151,
      "step": 1679580
    },
    {
      "epoch": 2.748702238107395,
      "grad_norm": 0.6529492735862732,
      "learning_rate": 4.532264122348662e-06,
      "loss": 0.0153,
      "step": 1679600
    },
    {
      "epoch": 2.7487349685460485,
      "grad_norm": 0.3289494812488556,
      "learning_rate": 4.532198230135145e-06,
      "loss": 0.0141,
      "step": 1679620
    },
    {
      "epoch": 2.7487676989847016,
      "grad_norm": 0.26418155431747437,
      "learning_rate": 4.532132337921628e-06,
      "loss": 0.0075,
      "step": 1679640
    },
    {
      "epoch": 2.7488004294233552,
      "grad_norm": 0.6595783829689026,
      "learning_rate": 4.5320664457081106e-06,
      "loss": 0.0182,
      "step": 1679660
    },
    {
      "epoch": 2.7488331598620084,
      "grad_norm": 1.039970874786377,
      "learning_rate": 4.532000553494594e-06,
      "loss": 0.0112,
      "step": 1679680
    },
    {
      "epoch": 2.748865890300662,
      "grad_norm": 0.46185511350631714,
      "learning_rate": 4.531934661281077e-06,
      "loss": 0.0119,
      "step": 1679700
    },
    {
      "epoch": 2.748898620739315,
      "grad_norm": 0.5424506068229675,
      "learning_rate": 4.53186876906756e-06,
      "loss": 0.0161,
      "step": 1679720
    },
    {
      "epoch": 2.7489313511779683,
      "grad_norm": 0.355726957321167,
      "learning_rate": 4.531802876854042e-06,
      "loss": 0.0173,
      "step": 1679740
    },
    {
      "epoch": 2.748964081616622,
      "grad_norm": 0.2288179248571396,
      "learning_rate": 4.531736984640525e-06,
      "loss": 0.0122,
      "step": 1679760
    },
    {
      "epoch": 2.748996812055275,
      "grad_norm": 0.3817470967769623,
      "learning_rate": 4.531671092427009e-06,
      "loss": 0.0111,
      "step": 1679780
    },
    {
      "epoch": 2.7490295424939286,
      "grad_norm": 0.3183024823665619,
      "learning_rate": 4.5316052002134914e-06,
      "loss": 0.0145,
      "step": 1679800
    },
    {
      "epoch": 2.7490622729325818,
      "grad_norm": 0.2796945869922638,
      "learning_rate": 4.531539307999974e-06,
      "loss": 0.0132,
      "step": 1679820
    },
    {
      "epoch": 2.7490950033712354,
      "grad_norm": 0.08926209062337875,
      "learning_rate": 4.531473415786457e-06,
      "loss": 0.0113,
      "step": 1679840
    },
    {
      "epoch": 2.7491277338098885,
      "grad_norm": 0.22961334884166718,
      "learning_rate": 4.53140752357294e-06,
      "loss": 0.0159,
      "step": 1679860
    },
    {
      "epoch": 2.7491604642485417,
      "grad_norm": 0.3342686593532562,
      "learning_rate": 4.531341631359422e-06,
      "loss": 0.0103,
      "step": 1679880
    },
    {
      "epoch": 2.7491931946871953,
      "grad_norm": 0.312313050031662,
      "learning_rate": 4.531275739145906e-06,
      "loss": 0.0142,
      "step": 1679900
    },
    {
      "epoch": 2.7492259251258484,
      "grad_norm": 0.10607358068227768,
      "learning_rate": 4.531209846932389e-06,
      "loss": 0.0121,
      "step": 1679920
    },
    {
      "epoch": 2.749258655564502,
      "grad_norm": 1.2918105125427246,
      "learning_rate": 4.5311439547188715e-06,
      "loss": 0.0107,
      "step": 1679940
    },
    {
      "epoch": 2.749291386003155,
      "grad_norm": 0.20106400549411774,
      "learning_rate": 4.531078062505354e-06,
      "loss": 0.0139,
      "step": 1679960
    },
    {
      "epoch": 2.7493241164418087,
      "grad_norm": 0.07918728142976761,
      "learning_rate": 4.531012170291837e-06,
      "loss": 0.0095,
      "step": 1679980
    },
    {
      "epoch": 2.749356846880462,
      "grad_norm": 0.33273887634277344,
      "learning_rate": 4.53094627807832e-06,
      "loss": 0.0121,
      "step": 1680000
    },
    {
      "epoch": 2.749389577319115,
      "grad_norm": 0.5106687545776367,
      "learning_rate": 4.5308803858648024e-06,
      "loss": 0.0176,
      "step": 1680020
    },
    {
      "epoch": 2.7494223077577686,
      "grad_norm": 1.9997625350952148,
      "learning_rate": 4.530814493651285e-06,
      "loss": 0.0135,
      "step": 1680040
    },
    {
      "epoch": 2.749455038196422,
      "grad_norm": 0.16545991599559784,
      "learning_rate": 4.530748601437768e-06,
      "loss": 0.011,
      "step": 1680060
    },
    {
      "epoch": 2.7494877686350754,
      "grad_norm": 0.3120081424713135,
      "learning_rate": 4.5306827092242515e-06,
      "loss": 0.0169,
      "step": 1680080
    },
    {
      "epoch": 2.7495204990737285,
      "grad_norm": 0.2503042221069336,
      "learning_rate": 4.530616817010734e-06,
      "loss": 0.018,
      "step": 1680100
    },
    {
      "epoch": 2.749553229512382,
      "grad_norm": 0.4986588954925537,
      "learning_rate": 4.530550924797217e-06,
      "loss": 0.0103,
      "step": 1680120
    },
    {
      "epoch": 2.7495859599510353,
      "grad_norm": 0.24107003211975098,
      "learning_rate": 4.5304850325837006e-06,
      "loss": 0.0181,
      "step": 1680140
    },
    {
      "epoch": 2.7496186903896884,
      "grad_norm": 0.46497249603271484,
      "learning_rate": 4.530419140370183e-06,
      "loss": 0.0148,
      "step": 1680160
    },
    {
      "epoch": 2.749651420828342,
      "grad_norm": 0.4779180884361267,
      "learning_rate": 4.530353248156666e-06,
      "loss": 0.0097,
      "step": 1680180
    },
    {
      "epoch": 2.749684151266995,
      "grad_norm": 0.48122331500053406,
      "learning_rate": 4.530287355943149e-06,
      "loss": 0.0145,
      "step": 1680200
    },
    {
      "epoch": 2.7497168817056488,
      "grad_norm": 0.2061031013727188,
      "learning_rate": 4.5302214637296315e-06,
      "loss": 0.0125,
      "step": 1680220
    },
    {
      "epoch": 2.749749612144302,
      "grad_norm": 0.34933826327323914,
      "learning_rate": 4.530155571516114e-06,
      "loss": 0.0143,
      "step": 1680240
    },
    {
      "epoch": 2.7497823425829555,
      "grad_norm": 0.19189713895320892,
      "learning_rate": 4.530089679302597e-06,
      "loss": 0.0109,
      "step": 1680260
    },
    {
      "epoch": 2.7498150730216087,
      "grad_norm": 0.37246909737586975,
      "learning_rate": 4.53002378708908e-06,
      "loss": 0.0131,
      "step": 1680280
    },
    {
      "epoch": 2.749847803460262,
      "grad_norm": 0.2345750629901886,
      "learning_rate": 4.529957894875563e-06,
      "loss": 0.0136,
      "step": 1680300
    },
    {
      "epoch": 2.7498805338989154,
      "grad_norm": 0.31857019662857056,
      "learning_rate": 4.529892002662046e-06,
      "loss": 0.0103,
      "step": 1680320
    },
    {
      "epoch": 2.7499132643375686,
      "grad_norm": 0.6499740481376648,
      "learning_rate": 4.529826110448529e-06,
      "loss": 0.0122,
      "step": 1680340
    },
    {
      "epoch": 2.749945994776222,
      "grad_norm": 0.6252172589302063,
      "learning_rate": 4.5297602182350115e-06,
      "loss": 0.0148,
      "step": 1680360
    },
    {
      "epoch": 2.7499787252148753,
      "grad_norm": 0.8199814558029175,
      "learning_rate": 4.529694326021494e-06,
      "loss": 0.0115,
      "step": 1680380
    },
    {
      "epoch": 2.750011455653529,
      "grad_norm": 0.4519381523132324,
      "learning_rate": 4.529628433807977e-06,
      "loss": 0.0207,
      "step": 1680400
    },
    {
      "epoch": 2.750044186092182,
      "grad_norm": 0.18233010172843933,
      "learning_rate": 4.52956254159446e-06,
      "loss": 0.0136,
      "step": 1680420
    },
    {
      "epoch": 2.750076916530835,
      "grad_norm": 0.8595592379570007,
      "learning_rate": 4.5294966493809425e-06,
      "loss": 0.0166,
      "step": 1680440
    },
    {
      "epoch": 2.750109646969489,
      "grad_norm": 0.13793855905532837,
      "learning_rate": 4.529430757167425e-06,
      "loss": 0.0123,
      "step": 1680460
    },
    {
      "epoch": 2.750142377408142,
      "grad_norm": 0.25846511125564575,
      "learning_rate": 4.529364864953909e-06,
      "loss": 0.0118,
      "step": 1680480
    },
    {
      "epoch": 2.750175107846795,
      "grad_norm": 0.4651869833469391,
      "learning_rate": 4.5292989727403916e-06,
      "loss": 0.0162,
      "step": 1680500
    },
    {
      "epoch": 2.7502078382854487,
      "grad_norm": 0.24418693780899048,
      "learning_rate": 4.529233080526874e-06,
      "loss": 0.0154,
      "step": 1680520
    },
    {
      "epoch": 2.7502405687241023,
      "grad_norm": 0.8364264369010925,
      "learning_rate": 4.529167188313358e-06,
      "loss": 0.0143,
      "step": 1680540
    },
    {
      "epoch": 2.7502732991627554,
      "grad_norm": 1.1937098503112793,
      "learning_rate": 4.529101296099841e-06,
      "loss": 0.014,
      "step": 1680560
    },
    {
      "epoch": 2.7503060296014086,
      "grad_norm": 0.6725499629974365,
      "learning_rate": 4.529035403886323e-06,
      "loss": 0.012,
      "step": 1680580
    },
    {
      "epoch": 2.750338760040062,
      "grad_norm": 0.3952220380306244,
      "learning_rate": 4.528969511672806e-06,
      "loss": 0.01,
      "step": 1680600
    },
    {
      "epoch": 2.7503714904787153,
      "grad_norm": 0.45843568444252014,
      "learning_rate": 4.528903619459289e-06,
      "loss": 0.0112,
      "step": 1680620
    },
    {
      "epoch": 2.7504042209173685,
      "grad_norm": 0.2111973762512207,
      "learning_rate": 4.528837727245772e-06,
      "loss": 0.0215,
      "step": 1680640
    },
    {
      "epoch": 2.750436951356022,
      "grad_norm": 0.4716070890426636,
      "learning_rate": 4.528771835032254e-06,
      "loss": 0.0145,
      "step": 1680660
    },
    {
      "epoch": 2.7504696817946757,
      "grad_norm": 0.2645658552646637,
      "learning_rate": 4.528705942818737e-06,
      "loss": 0.0117,
      "step": 1680680
    },
    {
      "epoch": 2.750502412233329,
      "grad_norm": 1.2772248983383179,
      "learning_rate": 4.528640050605221e-06,
      "loss": 0.0143,
      "step": 1680700
    },
    {
      "epoch": 2.750535142671982,
      "grad_norm": 0.13822363317012787,
      "learning_rate": 4.528574158391703e-06,
      "loss": 0.0169,
      "step": 1680720
    },
    {
      "epoch": 2.7505678731106356,
      "grad_norm": 0.14025384187698364,
      "learning_rate": 4.528508266178186e-06,
      "loss": 0.0136,
      "step": 1680740
    },
    {
      "epoch": 2.7506006035492887,
      "grad_norm": 0.2833135426044464,
      "learning_rate": 4.528442373964669e-06,
      "loss": 0.0124,
      "step": 1680760
    },
    {
      "epoch": 2.750633333987942,
      "grad_norm": 0.2434569150209427,
      "learning_rate": 4.528376481751152e-06,
      "loss": 0.009,
      "step": 1680780
    },
    {
      "epoch": 2.7506660644265954,
      "grad_norm": 0.4246842563152313,
      "learning_rate": 4.528310589537634e-06,
      "loss": 0.0153,
      "step": 1680800
    },
    {
      "epoch": 2.750698794865249,
      "grad_norm": 0.09583523124456406,
      "learning_rate": 4.528244697324117e-06,
      "loss": 0.0127,
      "step": 1680820
    },
    {
      "epoch": 2.750731525303902,
      "grad_norm": 0.742933988571167,
      "learning_rate": 4.528178805110601e-06,
      "loss": 0.0171,
      "step": 1680840
    },
    {
      "epoch": 2.7507642557425553,
      "grad_norm": 0.5125622153282166,
      "learning_rate": 4.5281129128970834e-06,
      "loss": 0.0174,
      "step": 1680860
    },
    {
      "epoch": 2.750796986181209,
      "grad_norm": 0.6787764430046082,
      "learning_rate": 4.528047020683566e-06,
      "loss": 0.0111,
      "step": 1680880
    },
    {
      "epoch": 2.750829716619862,
      "grad_norm": 0.2276836633682251,
      "learning_rate": 4.527981128470049e-06,
      "loss": 0.0167,
      "step": 1680900
    },
    {
      "epoch": 2.7508624470585152,
      "grad_norm": 0.6372482776641846,
      "learning_rate": 4.527915236256532e-06,
      "loss": 0.0123,
      "step": 1680920
    },
    {
      "epoch": 2.750895177497169,
      "grad_norm": 0.3860425055027008,
      "learning_rate": 4.527849344043015e-06,
      "loss": 0.0168,
      "step": 1680940
    },
    {
      "epoch": 2.7509279079358224,
      "grad_norm": 0.35659343004226685,
      "learning_rate": 4.527783451829498e-06,
      "loss": 0.0148,
      "step": 1680960
    },
    {
      "epoch": 2.7509606383744756,
      "grad_norm": 0.12989595532417297,
      "learning_rate": 4.527717559615981e-06,
      "loss": 0.0101,
      "step": 1680980
    },
    {
      "epoch": 2.7509933688131287,
      "grad_norm": 0.6554706692695618,
      "learning_rate": 4.5276516674024634e-06,
      "loss": 0.0099,
      "step": 1681000
    },
    {
      "epoch": 2.7510260992517823,
      "grad_norm": 0.7432681918144226,
      "learning_rate": 4.527585775188946e-06,
      "loss": 0.0134,
      "step": 1681020
    },
    {
      "epoch": 2.7510588296904355,
      "grad_norm": 0.11690694838762283,
      "learning_rate": 4.527519882975429e-06,
      "loss": 0.0131,
      "step": 1681040
    },
    {
      "epoch": 2.7510915601290886,
      "grad_norm": 0.8719868063926697,
      "learning_rate": 4.527453990761912e-06,
      "loss": 0.019,
      "step": 1681060
    },
    {
      "epoch": 2.751124290567742,
      "grad_norm": 0.16698670387268066,
      "learning_rate": 4.527388098548394e-06,
      "loss": 0.0118,
      "step": 1681080
    },
    {
      "epoch": 2.7511570210063954,
      "grad_norm": 0.5269892811775208,
      "learning_rate": 4.527322206334878e-06,
      "loss": 0.016,
      "step": 1681100
    },
    {
      "epoch": 2.751189751445049,
      "grad_norm": 0.24463187158107758,
      "learning_rate": 4.527256314121361e-06,
      "loss": 0.013,
      "step": 1681120
    },
    {
      "epoch": 2.751222481883702,
      "grad_norm": 0.26991429924964905,
      "learning_rate": 4.5271904219078435e-06,
      "loss": 0.0172,
      "step": 1681140
    },
    {
      "epoch": 2.7512552123223557,
      "grad_norm": 0.37527191638946533,
      "learning_rate": 4.527124529694326e-06,
      "loss": 0.014,
      "step": 1681160
    },
    {
      "epoch": 2.751287942761009,
      "grad_norm": 0.12601062655448914,
      "learning_rate": 4.52705863748081e-06,
      "loss": 0.0126,
      "step": 1681180
    },
    {
      "epoch": 2.751320673199662,
      "grad_norm": 0.42314672470092773,
      "learning_rate": 4.5269927452672925e-06,
      "loss": 0.0131,
      "step": 1681200
    },
    {
      "epoch": 2.7513534036383156,
      "grad_norm": 0.39670509099960327,
      "learning_rate": 4.526926853053775e-06,
      "loss": 0.0196,
      "step": 1681220
    },
    {
      "epoch": 2.7513861340769687,
      "grad_norm": 0.5849147439002991,
      "learning_rate": 4.526860960840258e-06,
      "loss": 0.0094,
      "step": 1681240
    },
    {
      "epoch": 2.7514188645156223,
      "grad_norm": 0.17475876212120056,
      "learning_rate": 4.526795068626741e-06,
      "loss": 0.014,
      "step": 1681260
    },
    {
      "epoch": 2.7514515949542755,
      "grad_norm": 0.6953937411308289,
      "learning_rate": 4.5267291764132235e-06,
      "loss": 0.0113,
      "step": 1681280
    },
    {
      "epoch": 2.751484325392929,
      "grad_norm": 0.35249948501586914,
      "learning_rate": 4.526663284199706e-06,
      "loss": 0.0178,
      "step": 1681300
    },
    {
      "epoch": 2.7515170558315822,
      "grad_norm": 0.19854803383350372,
      "learning_rate": 4.52659739198619e-06,
      "loss": 0.0149,
      "step": 1681320
    },
    {
      "epoch": 2.7515497862702354,
      "grad_norm": 0.49300721287727356,
      "learning_rate": 4.5265314997726726e-06,
      "loss": 0.0122,
      "step": 1681340
    },
    {
      "epoch": 2.751582516708889,
      "grad_norm": 0.27736467123031616,
      "learning_rate": 4.526465607559155e-06,
      "loss": 0.0152,
      "step": 1681360
    },
    {
      "epoch": 2.751615247147542,
      "grad_norm": 0.12620042264461517,
      "learning_rate": 4.526399715345638e-06,
      "loss": 0.0094,
      "step": 1681380
    },
    {
      "epoch": 2.7516479775861957,
      "grad_norm": 0.33722472190856934,
      "learning_rate": 4.526333823132121e-06,
      "loss": 0.0144,
      "step": 1681400
    },
    {
      "epoch": 2.751680708024849,
      "grad_norm": 1.2411161661148071,
      "learning_rate": 4.5262679309186035e-06,
      "loss": 0.0142,
      "step": 1681420
    },
    {
      "epoch": 2.7517134384635025,
      "grad_norm": 0.44461092352867126,
      "learning_rate": 4.526202038705086e-06,
      "loss": 0.0137,
      "step": 1681440
    },
    {
      "epoch": 2.7517461689021556,
      "grad_norm": 0.2747615575790405,
      "learning_rate": 4.526136146491569e-06,
      "loss": 0.0155,
      "step": 1681460
    },
    {
      "epoch": 2.7517788993408088,
      "grad_norm": 0.4777546525001526,
      "learning_rate": 4.526070254278052e-06,
      "loss": 0.0164,
      "step": 1681480
    },
    {
      "epoch": 2.7518116297794624,
      "grad_norm": 0.17085754871368408,
      "learning_rate": 4.526004362064535e-06,
      "loss": 0.0161,
      "step": 1681500
    },
    {
      "epoch": 2.7518443602181155,
      "grad_norm": 0.567801296710968,
      "learning_rate": 4.525938469851018e-06,
      "loss": 0.0178,
      "step": 1681520
    },
    {
      "epoch": 2.751877090656769,
      "grad_norm": 0.25491487979888916,
      "learning_rate": 4.525872577637501e-06,
      "loss": 0.0096,
      "step": 1681540
    },
    {
      "epoch": 2.7519098210954223,
      "grad_norm": 0.34241095185279846,
      "learning_rate": 4.525806685423984e-06,
      "loss": 0.0159,
      "step": 1681560
    },
    {
      "epoch": 2.751942551534076,
      "grad_norm": 0.19812630116939545,
      "learning_rate": 4.525740793210467e-06,
      "loss": 0.0119,
      "step": 1681580
    },
    {
      "epoch": 2.751975281972729,
      "grad_norm": 0.4696396589279175,
      "learning_rate": 4.52567490099695e-06,
      "loss": 0.0114,
      "step": 1681600
    },
    {
      "epoch": 2.752008012411382,
      "grad_norm": 0.4704046845436096,
      "learning_rate": 4.525609008783433e-06,
      "loss": 0.0104,
      "step": 1681620
    },
    {
      "epoch": 2.7520407428500357,
      "grad_norm": 0.05160624533891678,
      "learning_rate": 4.525543116569915e-06,
      "loss": 0.0149,
      "step": 1681640
    },
    {
      "epoch": 2.752073473288689,
      "grad_norm": 0.6757286787033081,
      "learning_rate": 4.525477224356398e-06,
      "loss": 0.0235,
      "step": 1681660
    },
    {
      "epoch": 2.7521062037273425,
      "grad_norm": 0.08959949761629105,
      "learning_rate": 4.525411332142881e-06,
      "loss": 0.0175,
      "step": 1681680
    },
    {
      "epoch": 2.7521389341659956,
      "grad_norm": 0.2236994355916977,
      "learning_rate": 4.5253454399293636e-06,
      "loss": 0.015,
      "step": 1681700
    },
    {
      "epoch": 2.7521716646046492,
      "grad_norm": 0.3293013870716095,
      "learning_rate": 4.525279547715847e-06,
      "loss": 0.0169,
      "step": 1681720
    },
    {
      "epoch": 2.7522043950433024,
      "grad_norm": 0.2696583569049835,
      "learning_rate": 4.52521365550233e-06,
      "loss": 0.0167,
      "step": 1681740
    },
    {
      "epoch": 2.7522371254819555,
      "grad_norm": 0.3393372595310211,
      "learning_rate": 4.525147763288813e-06,
      "loss": 0.0126,
      "step": 1681760
    },
    {
      "epoch": 2.752269855920609,
      "grad_norm": 1.277781367301941,
      "learning_rate": 4.525081871075295e-06,
      "loss": 0.0196,
      "step": 1681780
    },
    {
      "epoch": 2.7523025863592623,
      "grad_norm": 0.8840163946151733,
      "learning_rate": 4.525015978861778e-06,
      "loss": 0.0156,
      "step": 1681800
    },
    {
      "epoch": 2.752335316797916,
      "grad_norm": 0.3983660340309143,
      "learning_rate": 4.524950086648261e-06,
      "loss": 0.0129,
      "step": 1681820
    },
    {
      "epoch": 2.752368047236569,
      "grad_norm": 0.3996356129646301,
      "learning_rate": 4.524884194434744e-06,
      "loss": 0.0115,
      "step": 1681840
    },
    {
      "epoch": 2.7524007776752226,
      "grad_norm": 0.16373476386070251,
      "learning_rate": 4.524818302221226e-06,
      "loss": 0.0162,
      "step": 1681860
    },
    {
      "epoch": 2.7524335081138758,
      "grad_norm": 0.04774096980690956,
      "learning_rate": 4.524752410007709e-06,
      "loss": 0.0159,
      "step": 1681880
    },
    {
      "epoch": 2.752466238552529,
      "grad_norm": 0.17507818341255188,
      "learning_rate": 4.524686517794193e-06,
      "loss": 0.0187,
      "step": 1681900
    },
    {
      "epoch": 2.7524989689911825,
      "grad_norm": 0.09315662086009979,
      "learning_rate": 4.524620625580675e-06,
      "loss": 0.0126,
      "step": 1681920
    },
    {
      "epoch": 2.7525316994298357,
      "grad_norm": 3.7770659923553467,
      "learning_rate": 4.524554733367158e-06,
      "loss": 0.0213,
      "step": 1681940
    },
    {
      "epoch": 2.7525644298684893,
      "grad_norm": 0.377838134765625,
      "learning_rate": 4.524488841153642e-06,
      "loss": 0.0152,
      "step": 1681960
    },
    {
      "epoch": 2.7525971603071424,
      "grad_norm": 0.21551984548568726,
      "learning_rate": 4.5244229489401245e-06,
      "loss": 0.0106,
      "step": 1681980
    },
    {
      "epoch": 2.752629890745796,
      "grad_norm": 0.05649634823203087,
      "learning_rate": 4.524357056726607e-06,
      "loss": 0.0145,
      "step": 1682000
    },
    {
      "epoch": 2.752662621184449,
      "grad_norm": 0.11199454963207245,
      "learning_rate": 4.52429116451309e-06,
      "loss": 0.0149,
      "step": 1682020
    },
    {
      "epoch": 2.7526953516231023,
      "grad_norm": 0.7688155174255371,
      "learning_rate": 4.524225272299573e-06,
      "loss": 0.0122,
      "step": 1682040
    },
    {
      "epoch": 2.752728082061756,
      "grad_norm": 1.0648618936538696,
      "learning_rate": 4.5241593800860554e-06,
      "loss": 0.021,
      "step": 1682060
    },
    {
      "epoch": 2.752760812500409,
      "grad_norm": 0.45135509967803955,
      "learning_rate": 4.524093487872538e-06,
      "loss": 0.0104,
      "step": 1682080
    },
    {
      "epoch": 2.752793542939062,
      "grad_norm": 0.2606504261493683,
      "learning_rate": 4.524027595659021e-06,
      "loss": 0.0095,
      "step": 1682100
    },
    {
      "epoch": 2.752826273377716,
      "grad_norm": 0.14275477826595306,
      "learning_rate": 4.5239617034455045e-06,
      "loss": 0.0136,
      "step": 1682120
    },
    {
      "epoch": 2.7528590038163694,
      "grad_norm": 0.35852888226509094,
      "learning_rate": 4.523895811231987e-06,
      "loss": 0.019,
      "step": 1682140
    },
    {
      "epoch": 2.7528917342550225,
      "grad_norm": 2.8287980556488037,
      "learning_rate": 4.52382991901847e-06,
      "loss": 0.0139,
      "step": 1682160
    },
    {
      "epoch": 2.7529244646936757,
      "grad_norm": 0.2343749701976776,
      "learning_rate": 4.523764026804953e-06,
      "loss": 0.0211,
      "step": 1682180
    },
    {
      "epoch": 2.7529571951323293,
      "grad_norm": 0.21000072360038757,
      "learning_rate": 4.5236981345914355e-06,
      "loss": 0.0137,
      "step": 1682200
    },
    {
      "epoch": 2.7529899255709824,
      "grad_norm": 0.24364785850048065,
      "learning_rate": 4.523632242377918e-06,
      "loss": 0.0133,
      "step": 1682220
    },
    {
      "epoch": 2.7530226560096356,
      "grad_norm": 0.23648256063461304,
      "learning_rate": 4.523566350164402e-06,
      "loss": 0.0202,
      "step": 1682240
    },
    {
      "epoch": 2.753055386448289,
      "grad_norm": 0.695357084274292,
      "learning_rate": 4.5235004579508845e-06,
      "loss": 0.0167,
      "step": 1682260
    },
    {
      "epoch": 2.7530881168869428,
      "grad_norm": 0.14984464645385742,
      "learning_rate": 4.523434565737367e-06,
      "loss": 0.0139,
      "step": 1682280
    },
    {
      "epoch": 2.753120847325596,
      "grad_norm": 0.5707427263259888,
      "learning_rate": 4.52336867352385e-06,
      "loss": 0.0137,
      "step": 1682300
    },
    {
      "epoch": 2.753153577764249,
      "grad_norm": 0.06036696210503578,
      "learning_rate": 4.523302781310333e-06,
      "loss": 0.0125,
      "step": 1682320
    },
    {
      "epoch": 2.7531863082029027,
      "grad_norm": 0.24547314643859863,
      "learning_rate": 4.523236889096816e-06,
      "loss": 0.0177,
      "step": 1682340
    },
    {
      "epoch": 2.753219038641556,
      "grad_norm": 0.7778428196907043,
      "learning_rate": 4.523170996883299e-06,
      "loss": 0.0169,
      "step": 1682360
    },
    {
      "epoch": 2.753251769080209,
      "grad_norm": 0.17321783304214478,
      "learning_rate": 4.523105104669782e-06,
      "loss": 0.0144,
      "step": 1682380
    },
    {
      "epoch": 2.7532844995188626,
      "grad_norm": 1.0709033012390137,
      "learning_rate": 4.5230392124562645e-06,
      "loss": 0.0128,
      "step": 1682400
    },
    {
      "epoch": 2.753317229957516,
      "grad_norm": 0.1613970398902893,
      "learning_rate": 4.522973320242747e-06,
      "loss": 0.0167,
      "step": 1682420
    },
    {
      "epoch": 2.7533499603961693,
      "grad_norm": 0.2342163622379303,
      "learning_rate": 4.52290742802923e-06,
      "loss": 0.0115,
      "step": 1682440
    },
    {
      "epoch": 2.7533826908348225,
      "grad_norm": 0.41109177470207214,
      "learning_rate": 4.522841535815713e-06,
      "loss": 0.0158,
      "step": 1682460
    },
    {
      "epoch": 2.753415421273476,
      "grad_norm": 0.46183720231056213,
      "learning_rate": 4.5227756436021955e-06,
      "loss": 0.0059,
      "step": 1682480
    },
    {
      "epoch": 2.753448151712129,
      "grad_norm": 0.260223925113678,
      "learning_rate": 4.522709751388678e-06,
      "loss": 0.0101,
      "step": 1682500
    },
    {
      "epoch": 2.7534808821507823,
      "grad_norm": 0.3472304046154022,
      "learning_rate": 4.522643859175162e-06,
      "loss": 0.008,
      "step": 1682520
    },
    {
      "epoch": 2.753513612589436,
      "grad_norm": 0.6665266752243042,
      "learning_rate": 4.5225779669616446e-06,
      "loss": 0.0138,
      "step": 1682540
    },
    {
      "epoch": 2.7535463430280895,
      "grad_norm": 0.3704761564731598,
      "learning_rate": 4.522512074748127e-06,
      "loss": 0.017,
      "step": 1682560
    },
    {
      "epoch": 2.7535790734667427,
      "grad_norm": 0.22906890511512756,
      "learning_rate": 4.52244618253461e-06,
      "loss": 0.0169,
      "step": 1682580
    },
    {
      "epoch": 2.753611803905396,
      "grad_norm": 0.28526952862739563,
      "learning_rate": 4.522380290321094e-06,
      "loss": 0.0143,
      "step": 1682600
    },
    {
      "epoch": 2.7536445343440494,
      "grad_norm": 0.7398098111152649,
      "learning_rate": 4.522314398107576e-06,
      "loss": 0.0108,
      "step": 1682620
    },
    {
      "epoch": 2.7536772647827026,
      "grad_norm": 0.48245102167129517,
      "learning_rate": 4.522248505894059e-06,
      "loss": 0.0103,
      "step": 1682640
    },
    {
      "epoch": 2.7537099952213557,
      "grad_norm": 1.1546379327774048,
      "learning_rate": 4.522182613680542e-06,
      "loss": 0.0108,
      "step": 1682660
    },
    {
      "epoch": 2.7537427256600093,
      "grad_norm": 0.6865147948265076,
      "learning_rate": 4.522116721467025e-06,
      "loss": 0.0208,
      "step": 1682680
    },
    {
      "epoch": 2.7537754560986625,
      "grad_norm": 0.2749032974243164,
      "learning_rate": 4.522050829253507e-06,
      "loss": 0.0167,
      "step": 1682700
    },
    {
      "epoch": 2.753808186537316,
      "grad_norm": 0.26371175050735474,
      "learning_rate": 4.52198493703999e-06,
      "loss": 0.0112,
      "step": 1682720
    },
    {
      "epoch": 2.753840916975969,
      "grad_norm": 0.38888218998908997,
      "learning_rate": 4.521919044826474e-06,
      "loss": 0.0128,
      "step": 1682740
    },
    {
      "epoch": 2.753873647414623,
      "grad_norm": 0.14766350388526917,
      "learning_rate": 4.521853152612956e-06,
      "loss": 0.0116,
      "step": 1682760
    },
    {
      "epoch": 2.753906377853276,
      "grad_norm": 0.4943466782569885,
      "learning_rate": 4.521787260399439e-06,
      "loss": 0.0122,
      "step": 1682780
    },
    {
      "epoch": 2.753939108291929,
      "grad_norm": 0.33212020993232727,
      "learning_rate": 4.521721368185922e-06,
      "loss": 0.0156,
      "step": 1682800
    },
    {
      "epoch": 2.7539718387305827,
      "grad_norm": 0.1688096523284912,
      "learning_rate": 4.521655475972405e-06,
      "loss": 0.0124,
      "step": 1682820
    },
    {
      "epoch": 2.754004569169236,
      "grad_norm": 0.5403350591659546,
      "learning_rate": 4.521589583758887e-06,
      "loss": 0.0122,
      "step": 1682840
    },
    {
      "epoch": 2.7540372996078895,
      "grad_norm": 0.17662189900875092,
      "learning_rate": 4.52152369154537e-06,
      "loss": 0.0136,
      "step": 1682860
    },
    {
      "epoch": 2.7540700300465426,
      "grad_norm": 0.0880701020359993,
      "learning_rate": 4.521457799331853e-06,
      "loss": 0.0097,
      "step": 1682880
    },
    {
      "epoch": 2.754102760485196,
      "grad_norm": 0.7079519629478455,
      "learning_rate": 4.521391907118336e-06,
      "loss": 0.0131,
      "step": 1682900
    },
    {
      "epoch": 2.7541354909238493,
      "grad_norm": 0.4987170398235321,
      "learning_rate": 4.521326014904819e-06,
      "loss": 0.0133,
      "step": 1682920
    },
    {
      "epoch": 2.7541682213625025,
      "grad_norm": 0.4256272315979004,
      "learning_rate": 4.521260122691302e-06,
      "loss": 0.0177,
      "step": 1682940
    },
    {
      "epoch": 2.754200951801156,
      "grad_norm": 0.39894554018974304,
      "learning_rate": 4.521194230477785e-06,
      "loss": 0.0144,
      "step": 1682960
    },
    {
      "epoch": 2.7542336822398092,
      "grad_norm": 0.16000862419605255,
      "learning_rate": 4.521128338264268e-06,
      "loss": 0.012,
      "step": 1682980
    },
    {
      "epoch": 2.754266412678463,
      "grad_norm": 0.578705906867981,
      "learning_rate": 4.521062446050751e-06,
      "loss": 0.0124,
      "step": 1683000
    },
    {
      "epoch": 2.754299143117116,
      "grad_norm": 0.676548421382904,
      "learning_rate": 4.520996553837234e-06,
      "loss": 0.0084,
      "step": 1683020
    },
    {
      "epoch": 2.7543318735557696,
      "grad_norm": 0.16633003950119019,
      "learning_rate": 4.5209306616237165e-06,
      "loss": 0.0117,
      "step": 1683040
    },
    {
      "epoch": 2.7543646039944227,
      "grad_norm": 0.3722050189971924,
      "learning_rate": 4.520864769410199e-06,
      "loss": 0.0192,
      "step": 1683060
    },
    {
      "epoch": 2.754397334433076,
      "grad_norm": 0.751493513584137,
      "learning_rate": 4.520798877196682e-06,
      "loss": 0.017,
      "step": 1683080
    },
    {
      "epoch": 2.7544300648717295,
      "grad_norm": 0.1421351581811905,
      "learning_rate": 4.520732984983165e-06,
      "loss": 0.0131,
      "step": 1683100
    },
    {
      "epoch": 2.7544627953103826,
      "grad_norm": 0.3359087407588959,
      "learning_rate": 4.520667092769647e-06,
      "loss": 0.0105,
      "step": 1683120
    },
    {
      "epoch": 2.7544955257490362,
      "grad_norm": 0.4907962679862976,
      "learning_rate": 4.520601200556131e-06,
      "loss": 0.0076,
      "step": 1683140
    },
    {
      "epoch": 2.7545282561876894,
      "grad_norm": 1.120827317237854,
      "learning_rate": 4.520535308342614e-06,
      "loss": 0.0225,
      "step": 1683160
    },
    {
      "epoch": 2.754560986626343,
      "grad_norm": 0.48753106594085693,
      "learning_rate": 4.5204694161290965e-06,
      "loss": 0.0117,
      "step": 1683180
    },
    {
      "epoch": 2.754593717064996,
      "grad_norm": 0.2658711373806,
      "learning_rate": 4.520403523915579e-06,
      "loss": 0.0195,
      "step": 1683200
    },
    {
      "epoch": 2.7546264475036493,
      "grad_norm": 0.2391558438539505,
      "learning_rate": 4.520337631702062e-06,
      "loss": 0.0088,
      "step": 1683220
    },
    {
      "epoch": 2.754659177942303,
      "grad_norm": 0.16601364314556122,
      "learning_rate": 4.520271739488545e-06,
      "loss": 0.0164,
      "step": 1683240
    },
    {
      "epoch": 2.754691908380956,
      "grad_norm": 0.5720705389976501,
      "learning_rate": 4.5202058472750274e-06,
      "loss": 0.0166,
      "step": 1683260
    },
    {
      "epoch": 2.7547246388196096,
      "grad_norm": 0.5399901270866394,
      "learning_rate": 4.52013995506151e-06,
      "loss": 0.0209,
      "step": 1683280
    },
    {
      "epoch": 2.7547573692582628,
      "grad_norm": 0.24454684555530548,
      "learning_rate": 4.520074062847994e-06,
      "loss": 0.0144,
      "step": 1683300
    },
    {
      "epoch": 2.7547900996969164,
      "grad_norm": 0.2814551591873169,
      "learning_rate": 4.5200081706344765e-06,
      "loss": 0.0156,
      "step": 1683320
    },
    {
      "epoch": 2.7548228301355695,
      "grad_norm": 0.1733313649892807,
      "learning_rate": 4.519942278420959e-06,
      "loss": 0.0169,
      "step": 1683340
    },
    {
      "epoch": 2.7548555605742227,
      "grad_norm": 0.6213690638542175,
      "learning_rate": 4.519876386207442e-06,
      "loss": 0.0155,
      "step": 1683360
    },
    {
      "epoch": 2.7548882910128762,
      "grad_norm": 0.1860736757516861,
      "learning_rate": 4.5198104939939256e-06,
      "loss": 0.0125,
      "step": 1683380
    },
    {
      "epoch": 2.7549210214515294,
      "grad_norm": 0.2829541563987732,
      "learning_rate": 4.519744601780408e-06,
      "loss": 0.0188,
      "step": 1683400
    },
    {
      "epoch": 2.754953751890183,
      "grad_norm": 0.7256349325180054,
      "learning_rate": 4.519678709566891e-06,
      "loss": 0.0112,
      "step": 1683420
    },
    {
      "epoch": 2.754986482328836,
      "grad_norm": 0.2015712857246399,
      "learning_rate": 4.519612817353374e-06,
      "loss": 0.0101,
      "step": 1683440
    },
    {
      "epoch": 2.7550192127674897,
      "grad_norm": 0.34385257959365845,
      "learning_rate": 4.5195469251398565e-06,
      "loss": 0.0146,
      "step": 1683460
    },
    {
      "epoch": 2.755051943206143,
      "grad_norm": 0.139249786734581,
      "learning_rate": 4.519481032926339e-06,
      "loss": 0.0142,
      "step": 1683480
    },
    {
      "epoch": 2.755084673644796,
      "grad_norm": 1.1242839097976685,
      "learning_rate": 4.519415140712822e-06,
      "loss": 0.0173,
      "step": 1683500
    },
    {
      "epoch": 2.7551174040834496,
      "grad_norm": 0.23785088956356049,
      "learning_rate": 4.519349248499305e-06,
      "loss": 0.0105,
      "step": 1683520
    },
    {
      "epoch": 2.755150134522103,
      "grad_norm": 0.45533978939056396,
      "learning_rate": 4.519283356285788e-06,
      "loss": 0.0139,
      "step": 1683540
    },
    {
      "epoch": 2.755182864960756,
      "grad_norm": 0.22284166514873505,
      "learning_rate": 4.519217464072271e-06,
      "loss": 0.0137,
      "step": 1683560
    },
    {
      "epoch": 2.7552155953994095,
      "grad_norm": 0.1833004355430603,
      "learning_rate": 4.519151571858754e-06,
      "loss": 0.0124,
      "step": 1683580
    },
    {
      "epoch": 2.755248325838063,
      "grad_norm": 0.21012313663959503,
      "learning_rate": 4.5190856796452366e-06,
      "loss": 0.0129,
      "step": 1683600
    },
    {
      "epoch": 2.7552810562767163,
      "grad_norm": 2.401076078414917,
      "learning_rate": 4.519019787431719e-06,
      "loss": 0.0188,
      "step": 1683620
    },
    {
      "epoch": 2.7553137867153694,
      "grad_norm": 0.3192483186721802,
      "learning_rate": 4.518953895218202e-06,
      "loss": 0.009,
      "step": 1683640
    },
    {
      "epoch": 2.755346517154023,
      "grad_norm": 0.2840752899646759,
      "learning_rate": 4.518888003004686e-06,
      "loss": 0.0095,
      "step": 1683660
    },
    {
      "epoch": 2.755379247592676,
      "grad_norm": 0.10711261630058289,
      "learning_rate": 4.518822110791168e-06,
      "loss": 0.0099,
      "step": 1683680
    },
    {
      "epoch": 2.7554119780313293,
      "grad_norm": 0.17797966301441193,
      "learning_rate": 4.518756218577651e-06,
      "loss": 0.0132,
      "step": 1683700
    },
    {
      "epoch": 2.755444708469983,
      "grad_norm": 0.500491738319397,
      "learning_rate": 4.518690326364134e-06,
      "loss": 0.0131,
      "step": 1683720
    },
    {
      "epoch": 2.7554774389086365,
      "grad_norm": 0.7459332942962646,
      "learning_rate": 4.518624434150617e-06,
      "loss": 0.013,
      "step": 1683740
    },
    {
      "epoch": 2.7555101693472897,
      "grad_norm": 0.1861930936574936,
      "learning_rate": 4.5185585419371e-06,
      "loss": 0.0145,
      "step": 1683760
    },
    {
      "epoch": 2.755542899785943,
      "grad_norm": 1.189948320388794,
      "learning_rate": 4.518492649723583e-06,
      "loss": 0.0114,
      "step": 1683780
    },
    {
      "epoch": 2.7555756302245964,
      "grad_norm": 0.3911561369895935,
      "learning_rate": 4.518426757510066e-06,
      "loss": 0.0129,
      "step": 1683800
    },
    {
      "epoch": 2.7556083606632495,
      "grad_norm": 0.15396057069301605,
      "learning_rate": 4.518360865296548e-06,
      "loss": 0.0127,
      "step": 1683820
    },
    {
      "epoch": 2.7556410911019027,
      "grad_norm": 0.24422936141490936,
      "learning_rate": 4.518294973083031e-06,
      "loss": 0.0123,
      "step": 1683840
    },
    {
      "epoch": 2.7556738215405563,
      "grad_norm": 0.15730470418930054,
      "learning_rate": 4.518229080869514e-06,
      "loss": 0.0132,
      "step": 1683860
    },
    {
      "epoch": 2.75570655197921,
      "grad_norm": 0.34794965386390686,
      "learning_rate": 4.518163188655997e-06,
      "loss": 0.0192,
      "step": 1683880
    },
    {
      "epoch": 2.755739282417863,
      "grad_norm": 0.21329569816589355,
      "learning_rate": 4.518097296442479e-06,
      "loss": 0.013,
      "step": 1683900
    },
    {
      "epoch": 2.755772012856516,
      "grad_norm": 0.3253149390220642,
      "learning_rate": 4.518031404228962e-06,
      "loss": 0.0101,
      "step": 1683920
    },
    {
      "epoch": 2.75580474329517,
      "grad_norm": 0.0903339609503746,
      "learning_rate": 4.517965512015446e-06,
      "loss": 0.0147,
      "step": 1683940
    },
    {
      "epoch": 2.755837473733823,
      "grad_norm": 0.2744104266166687,
      "learning_rate": 4.517899619801928e-06,
      "loss": 0.0137,
      "step": 1683960
    },
    {
      "epoch": 2.755870204172476,
      "grad_norm": 0.26665523648262024,
      "learning_rate": 4.517833727588411e-06,
      "loss": 0.0114,
      "step": 1683980
    },
    {
      "epoch": 2.7559029346111297,
      "grad_norm": 0.9274882674217224,
      "learning_rate": 4.517767835374895e-06,
      "loss": 0.0145,
      "step": 1684000
    },
    {
      "epoch": 2.7559356650497833,
      "grad_norm": 0.47150182723999023,
      "learning_rate": 4.5177019431613775e-06,
      "loss": 0.0188,
      "step": 1684020
    },
    {
      "epoch": 2.7559683954884364,
      "grad_norm": 0.14952103793621063,
      "learning_rate": 4.51763605094786e-06,
      "loss": 0.0206,
      "step": 1684040
    },
    {
      "epoch": 2.7560011259270896,
      "grad_norm": 0.33443549275398254,
      "learning_rate": 4.517570158734343e-06,
      "loss": 0.0099,
      "step": 1684060
    },
    {
      "epoch": 2.756033856365743,
      "grad_norm": 0.6167719960212708,
      "learning_rate": 4.517504266520826e-06,
      "loss": 0.012,
      "step": 1684080
    },
    {
      "epoch": 2.7560665868043963,
      "grad_norm": 0.24978190660476685,
      "learning_rate": 4.5174383743073084e-06,
      "loss": 0.0102,
      "step": 1684100
    },
    {
      "epoch": 2.7560993172430495,
      "grad_norm": 0.2608482241630554,
      "learning_rate": 4.517372482093791e-06,
      "loss": 0.0161,
      "step": 1684120
    },
    {
      "epoch": 2.756132047681703,
      "grad_norm": 0.22883078455924988,
      "learning_rate": 4.517306589880274e-06,
      "loss": 0.0161,
      "step": 1684140
    },
    {
      "epoch": 2.756164778120356,
      "grad_norm": 0.9273426532745361,
      "learning_rate": 4.5172406976667575e-06,
      "loss": 0.013,
      "step": 1684160
    },
    {
      "epoch": 2.75619750855901,
      "grad_norm": 0.2897895872592926,
      "learning_rate": 4.51717480545324e-06,
      "loss": 0.0167,
      "step": 1684180
    },
    {
      "epoch": 2.756230238997663,
      "grad_norm": 0.3113647997379303,
      "learning_rate": 4.517108913239723e-06,
      "loss": 0.0135,
      "step": 1684200
    },
    {
      "epoch": 2.7562629694363165,
      "grad_norm": 0.4566725194454193,
      "learning_rate": 4.517043021026206e-06,
      "loss": 0.0157,
      "step": 1684220
    },
    {
      "epoch": 2.7562956998749697,
      "grad_norm": 0.06625734269618988,
      "learning_rate": 4.5169771288126885e-06,
      "loss": 0.0145,
      "step": 1684240
    },
    {
      "epoch": 2.756328430313623,
      "grad_norm": 0.4355183243751526,
      "learning_rate": 4.516911236599171e-06,
      "loss": 0.0148,
      "step": 1684260
    },
    {
      "epoch": 2.7563611607522764,
      "grad_norm": 0.4750284254550934,
      "learning_rate": 4.516845344385654e-06,
      "loss": 0.0151,
      "step": 1684280
    },
    {
      "epoch": 2.7563938911909296,
      "grad_norm": 0.4849945902824402,
      "learning_rate": 4.516779452172137e-06,
      "loss": 0.0113,
      "step": 1684300
    },
    {
      "epoch": 2.756426621629583,
      "grad_norm": 0.11492384225130081,
      "learning_rate": 4.516713559958619e-06,
      "loss": 0.0174,
      "step": 1684320
    },
    {
      "epoch": 2.7564593520682363,
      "grad_norm": 0.1493893563747406,
      "learning_rate": 4.516647667745103e-06,
      "loss": 0.0192,
      "step": 1684340
    },
    {
      "epoch": 2.75649208250689,
      "grad_norm": 0.32915711402893066,
      "learning_rate": 4.516581775531586e-06,
      "loss": 0.0163,
      "step": 1684360
    },
    {
      "epoch": 2.756524812945543,
      "grad_norm": 0.3362119495868683,
      "learning_rate": 4.5165158833180685e-06,
      "loss": 0.0103,
      "step": 1684380
    },
    {
      "epoch": 2.7565575433841962,
      "grad_norm": 0.29217126965522766,
      "learning_rate": 4.516449991104552e-06,
      "loss": 0.015,
      "step": 1684400
    },
    {
      "epoch": 2.75659027382285,
      "grad_norm": 0.5442444086074829,
      "learning_rate": 4.516384098891035e-06,
      "loss": 0.0122,
      "step": 1684420
    },
    {
      "epoch": 2.756623004261503,
      "grad_norm": 0.4176167845726013,
      "learning_rate": 4.5163182066775176e-06,
      "loss": 0.0095,
      "step": 1684440
    },
    {
      "epoch": 2.7566557347001566,
      "grad_norm": 0.20396853983402252,
      "learning_rate": 4.516252314464e-06,
      "loss": 0.0141,
      "step": 1684460
    },
    {
      "epoch": 2.7566884651388097,
      "grad_norm": 0.1541564166545868,
      "learning_rate": 4.516186422250483e-06,
      "loss": 0.009,
      "step": 1684480
    },
    {
      "epoch": 2.7567211955774633,
      "grad_norm": 0.2829589247703552,
      "learning_rate": 4.516120530036966e-06,
      "loss": 0.0152,
      "step": 1684500
    },
    {
      "epoch": 2.7567539260161165,
      "grad_norm": 0.05743587017059326,
      "learning_rate": 4.5160546378234485e-06,
      "loss": 0.0121,
      "step": 1684520
    },
    {
      "epoch": 2.7567866564547696,
      "grad_norm": 0.25105148553848267,
      "learning_rate": 4.515988745609931e-06,
      "loss": 0.0196,
      "step": 1684540
    },
    {
      "epoch": 2.756819386893423,
      "grad_norm": 0.25742363929748535,
      "learning_rate": 4.515922853396415e-06,
      "loss": 0.0129,
      "step": 1684560
    },
    {
      "epoch": 2.7568521173320764,
      "grad_norm": 0.3362888693809509,
      "learning_rate": 4.515856961182898e-06,
      "loss": 0.0128,
      "step": 1684580
    },
    {
      "epoch": 2.75688484777073,
      "grad_norm": 0.13180537521839142,
      "learning_rate": 4.51579106896938e-06,
      "loss": 0.0229,
      "step": 1684600
    },
    {
      "epoch": 2.756917578209383,
      "grad_norm": 0.4135774075984955,
      "learning_rate": 4.515725176755863e-06,
      "loss": 0.0103,
      "step": 1684620
    },
    {
      "epoch": 2.7569503086480367,
      "grad_norm": 0.16398905217647552,
      "learning_rate": 4.515659284542346e-06,
      "loss": 0.0126,
      "step": 1684640
    },
    {
      "epoch": 2.75698303908669,
      "grad_norm": 0.40046393871307373,
      "learning_rate": 4.5155933923288285e-06,
      "loss": 0.0124,
      "step": 1684660
    },
    {
      "epoch": 2.757015769525343,
      "grad_norm": 0.18736910820007324,
      "learning_rate": 4.515527500115311e-06,
      "loss": 0.0165,
      "step": 1684680
    },
    {
      "epoch": 2.7570484999639966,
      "grad_norm": 0.4653264880180359,
      "learning_rate": 4.515461607901795e-06,
      "loss": 0.0219,
      "step": 1684700
    },
    {
      "epoch": 2.7570812304026497,
      "grad_norm": 0.2014303356409073,
      "learning_rate": 4.515395715688278e-06,
      "loss": 0.0141,
      "step": 1684720
    },
    {
      "epoch": 2.7571139608413033,
      "grad_norm": 0.5202192664146423,
      "learning_rate": 4.51532982347476e-06,
      "loss": 0.0119,
      "step": 1684740
    },
    {
      "epoch": 2.7571466912799565,
      "grad_norm": 0.1735067218542099,
      "learning_rate": 4.515263931261243e-06,
      "loss": 0.0175,
      "step": 1684760
    },
    {
      "epoch": 2.75717942171861,
      "grad_norm": 0.5667677521705627,
      "learning_rate": 4.515198039047726e-06,
      "loss": 0.0157,
      "step": 1684780
    },
    {
      "epoch": 2.7572121521572632,
      "grad_norm": 0.09453507512807846,
      "learning_rate": 4.515132146834209e-06,
      "loss": 0.0106,
      "step": 1684800
    },
    {
      "epoch": 2.7572448825959164,
      "grad_norm": 0.40776491165161133,
      "learning_rate": 4.515066254620692e-06,
      "loss": 0.0113,
      "step": 1684820
    },
    {
      "epoch": 2.75727761303457,
      "grad_norm": 0.2015860378742218,
      "learning_rate": 4.515000362407175e-06,
      "loss": 0.0139,
      "step": 1684840
    },
    {
      "epoch": 2.757310343473223,
      "grad_norm": 0.14267025887966156,
      "learning_rate": 4.514934470193658e-06,
      "loss": 0.0153,
      "step": 1684860
    },
    {
      "epoch": 2.7573430739118767,
      "grad_norm": 0.2747442424297333,
      "learning_rate": 4.51486857798014e-06,
      "loss": 0.0128,
      "step": 1684880
    },
    {
      "epoch": 2.75737580435053,
      "grad_norm": 0.22609911859035492,
      "learning_rate": 4.514802685766623e-06,
      "loss": 0.0177,
      "step": 1684900
    },
    {
      "epoch": 2.7574085347891835,
      "grad_norm": 0.14385777711868286,
      "learning_rate": 4.514736793553106e-06,
      "loss": 0.0114,
      "step": 1684920
    },
    {
      "epoch": 2.7574412652278366,
      "grad_norm": 0.13746358454227448,
      "learning_rate": 4.514670901339589e-06,
      "loss": 0.0163,
      "step": 1684940
    },
    {
      "epoch": 2.7574739956664898,
      "grad_norm": 0.2050165981054306,
      "learning_rate": 4.514605009126072e-06,
      "loss": 0.0104,
      "step": 1684960
    },
    {
      "epoch": 2.7575067261051434,
      "grad_norm": 0.09127815812826157,
      "learning_rate": 4.514539116912555e-06,
      "loss": 0.014,
      "step": 1684980
    },
    {
      "epoch": 2.7575394565437965,
      "grad_norm": 0.30375292897224426,
      "learning_rate": 4.514473224699038e-06,
      "loss": 0.0139,
      "step": 1685000
    },
    {
      "epoch": 2.75757218698245,
      "grad_norm": 0.23450329899787903,
      "learning_rate": 4.51440733248552e-06,
      "loss": 0.011,
      "step": 1685020
    },
    {
      "epoch": 2.7576049174211033,
      "grad_norm": 0.4268158972263336,
      "learning_rate": 4.514341440272003e-06,
      "loss": 0.0176,
      "step": 1685040
    },
    {
      "epoch": 2.757637647859757,
      "grad_norm": 0.2502734363079071,
      "learning_rate": 4.514275548058487e-06,
      "loss": 0.0118,
      "step": 1685060
    },
    {
      "epoch": 2.75767037829841,
      "grad_norm": 0.36679649353027344,
      "learning_rate": 4.5142096558449695e-06,
      "loss": 0.0117,
      "step": 1685080
    },
    {
      "epoch": 2.757703108737063,
      "grad_norm": 0.4282107353210449,
      "learning_rate": 4.514143763631452e-06,
      "loss": 0.0149,
      "step": 1685100
    },
    {
      "epoch": 2.7577358391757167,
      "grad_norm": 0.25011298060417175,
      "learning_rate": 4.514077871417935e-06,
      "loss": 0.0159,
      "step": 1685120
    },
    {
      "epoch": 2.75776856961437,
      "grad_norm": 1.3827582597732544,
      "learning_rate": 4.514011979204418e-06,
      "loss": 0.0105,
      "step": 1685140
    },
    {
      "epoch": 2.757801300053023,
      "grad_norm": 0.46690842509269714,
      "learning_rate": 4.5139460869909e-06,
      "loss": 0.0128,
      "step": 1685160
    },
    {
      "epoch": 2.7578340304916766,
      "grad_norm": 0.4339160919189453,
      "learning_rate": 4.513880194777384e-06,
      "loss": 0.0122,
      "step": 1685180
    },
    {
      "epoch": 2.7578667609303302,
      "grad_norm": 0.26806479692459106,
      "learning_rate": 4.513814302563867e-06,
      "loss": 0.0135,
      "step": 1685200
    },
    {
      "epoch": 2.7578994913689834,
      "grad_norm": 0.7483692169189453,
      "learning_rate": 4.5137484103503495e-06,
      "loss": 0.014,
      "step": 1685220
    },
    {
      "epoch": 2.7579322218076365,
      "grad_norm": 0.5511428713798523,
      "learning_rate": 4.513682518136832e-06,
      "loss": 0.0125,
      "step": 1685240
    },
    {
      "epoch": 2.75796495224629,
      "grad_norm": 0.1876537948846817,
      "learning_rate": 4.513616625923315e-06,
      "loss": 0.0168,
      "step": 1685260
    },
    {
      "epoch": 2.7579976826849433,
      "grad_norm": 0.48236510157585144,
      "learning_rate": 4.513550733709798e-06,
      "loss": 0.0172,
      "step": 1685280
    },
    {
      "epoch": 2.7580304131235964,
      "grad_norm": 0.20812180638313293,
      "learning_rate": 4.5134848414962804e-06,
      "loss": 0.0094,
      "step": 1685300
    },
    {
      "epoch": 2.75806314356225,
      "grad_norm": 0.22557537257671356,
      "learning_rate": 4.513418949282763e-06,
      "loss": 0.0117,
      "step": 1685320
    },
    {
      "epoch": 2.7580958740009036,
      "grad_norm": 0.2823712229728699,
      "learning_rate": 4.513353057069246e-06,
      "loss": 0.0098,
      "step": 1685340
    },
    {
      "epoch": 2.7581286044395568,
      "grad_norm": 0.2924562692642212,
      "learning_rate": 4.5132871648557295e-06,
      "loss": 0.0179,
      "step": 1685360
    },
    {
      "epoch": 2.75816133487821,
      "grad_norm": 0.5214856863021851,
      "learning_rate": 4.513221272642212e-06,
      "loss": 0.014,
      "step": 1685380
    },
    {
      "epoch": 2.7581940653168635,
      "grad_norm": 0.40320977568626404,
      "learning_rate": 4.513155380428695e-06,
      "loss": 0.0154,
      "step": 1685400
    },
    {
      "epoch": 2.7582267957555167,
      "grad_norm": 0.3728404939174652,
      "learning_rate": 4.513089488215179e-06,
      "loss": 0.0175,
      "step": 1685420
    },
    {
      "epoch": 2.75825952619417,
      "grad_norm": 0.20884756743907928,
      "learning_rate": 4.513023596001661e-06,
      "loss": 0.0084,
      "step": 1685440
    },
    {
      "epoch": 2.7582922566328234,
      "grad_norm": 0.33819955587387085,
      "learning_rate": 4.512957703788144e-06,
      "loss": 0.0173,
      "step": 1685460
    },
    {
      "epoch": 2.758324987071477,
      "grad_norm": 0.2704671025276184,
      "learning_rate": 4.512891811574627e-06,
      "loss": 0.0097,
      "step": 1685480
    },
    {
      "epoch": 2.75835771751013,
      "grad_norm": 0.19586193561553955,
      "learning_rate": 4.5128259193611095e-06,
      "loss": 0.0103,
      "step": 1685500
    },
    {
      "epoch": 2.7583904479487833,
      "grad_norm": 0.3628421127796173,
      "learning_rate": 4.512760027147592e-06,
      "loss": 0.0174,
      "step": 1685520
    },
    {
      "epoch": 2.758423178387437,
      "grad_norm": 0.5033482313156128,
      "learning_rate": 4.512694134934075e-06,
      "loss": 0.0163,
      "step": 1685540
    },
    {
      "epoch": 2.75845590882609,
      "grad_norm": 0.5580724477767944,
      "learning_rate": 4.512628242720558e-06,
      "loss": 0.0124,
      "step": 1685560
    },
    {
      "epoch": 2.758488639264743,
      "grad_norm": 1.113503098487854,
      "learning_rate": 4.512562350507041e-06,
      "loss": 0.0154,
      "step": 1685580
    },
    {
      "epoch": 2.758521369703397,
      "grad_norm": 0.19511069357395172,
      "learning_rate": 4.512496458293524e-06,
      "loss": 0.015,
      "step": 1685600
    },
    {
      "epoch": 2.7585541001420504,
      "grad_norm": 0.4403107762336731,
      "learning_rate": 4.512430566080007e-06,
      "loss": 0.0138,
      "step": 1685620
    },
    {
      "epoch": 2.7585868305807035,
      "grad_norm": 1.388584017753601,
      "learning_rate": 4.5123646738664896e-06,
      "loss": 0.021,
      "step": 1685640
    },
    {
      "epoch": 2.7586195610193567,
      "grad_norm": 0.21102368831634521,
      "learning_rate": 4.512298781652972e-06,
      "loss": 0.0165,
      "step": 1685660
    },
    {
      "epoch": 2.7586522914580103,
      "grad_norm": 0.5378340482711792,
      "learning_rate": 4.512232889439455e-06,
      "loss": 0.0134,
      "step": 1685680
    },
    {
      "epoch": 2.7586850218966634,
      "grad_norm": 0.4293595254421234,
      "learning_rate": 4.512166997225938e-06,
      "loss": 0.012,
      "step": 1685700
    },
    {
      "epoch": 2.7587177523353166,
      "grad_norm": 0.28445571660995483,
      "learning_rate": 4.5121011050124205e-06,
      "loss": 0.0195,
      "step": 1685720
    },
    {
      "epoch": 2.75875048277397,
      "grad_norm": 0.6544908881187439,
      "learning_rate": 4.512035212798903e-06,
      "loss": 0.0208,
      "step": 1685740
    },
    {
      "epoch": 2.7587832132126233,
      "grad_norm": 0.4736279845237732,
      "learning_rate": 4.511969320585387e-06,
      "loss": 0.0122,
      "step": 1685760
    },
    {
      "epoch": 2.758815943651277,
      "grad_norm": 0.2108394354581833,
      "learning_rate": 4.51190342837187e-06,
      "loss": 0.0099,
      "step": 1685780
    },
    {
      "epoch": 2.75884867408993,
      "grad_norm": 0.3682384490966797,
      "learning_rate": 4.511837536158352e-06,
      "loss": 0.0156,
      "step": 1685800
    },
    {
      "epoch": 2.7588814045285837,
      "grad_norm": 0.22433891892433167,
      "learning_rate": 4.511771643944836e-06,
      "loss": 0.0125,
      "step": 1685820
    },
    {
      "epoch": 2.758914134967237,
      "grad_norm": 0.5832103490829468,
      "learning_rate": 4.511705751731319e-06,
      "loss": 0.0146,
      "step": 1685840
    },
    {
      "epoch": 2.75894686540589,
      "grad_norm": 0.07164239138364792,
      "learning_rate": 4.511639859517801e-06,
      "loss": 0.0164,
      "step": 1685860
    },
    {
      "epoch": 2.7589795958445436,
      "grad_norm": 0.24530816078186035,
      "learning_rate": 4.511573967304284e-06,
      "loss": 0.0097,
      "step": 1685880
    },
    {
      "epoch": 2.7590123262831967,
      "grad_norm": 0.19658738374710083,
      "learning_rate": 4.511508075090767e-06,
      "loss": 0.012,
      "step": 1685900
    },
    {
      "epoch": 2.7590450567218503,
      "grad_norm": 0.6678763031959534,
      "learning_rate": 4.51144218287725e-06,
      "loss": 0.013,
      "step": 1685920
    },
    {
      "epoch": 2.7590777871605034,
      "grad_norm": 1.2304075956344604,
      "learning_rate": 4.511376290663732e-06,
      "loss": 0.0188,
      "step": 1685940
    },
    {
      "epoch": 2.759110517599157,
      "grad_norm": 0.39773455262184143,
      "learning_rate": 4.511310398450215e-06,
      "loss": 0.0158,
      "step": 1685960
    },
    {
      "epoch": 2.75914324803781,
      "grad_norm": 0.196337029337883,
      "learning_rate": 4.511244506236699e-06,
      "loss": 0.0173,
      "step": 1685980
    },
    {
      "epoch": 2.7591759784764633,
      "grad_norm": 0.1310736984014511,
      "learning_rate": 4.511178614023181e-06,
      "loss": 0.0107,
      "step": 1686000
    },
    {
      "epoch": 2.759208708915117,
      "grad_norm": 0.4233766198158264,
      "learning_rate": 4.511112721809664e-06,
      "loss": 0.0161,
      "step": 1686020
    },
    {
      "epoch": 2.75924143935377,
      "grad_norm": 0.2900967001914978,
      "learning_rate": 4.511046829596147e-06,
      "loss": 0.0111,
      "step": 1686040
    },
    {
      "epoch": 2.7592741697924237,
      "grad_norm": 0.19654496014118195,
      "learning_rate": 4.51098093738263e-06,
      "loss": 0.0135,
      "step": 1686060
    },
    {
      "epoch": 2.759306900231077,
      "grad_norm": 0.3347321152687073,
      "learning_rate": 4.510915045169112e-06,
      "loss": 0.0194,
      "step": 1686080
    },
    {
      "epoch": 2.7593396306697304,
      "grad_norm": 0.13032807409763336,
      "learning_rate": 4.510849152955595e-06,
      "loss": 0.0137,
      "step": 1686100
    },
    {
      "epoch": 2.7593723611083836,
      "grad_norm": 0.4387567341327667,
      "learning_rate": 4.510783260742079e-06,
      "loss": 0.0121,
      "step": 1686120
    },
    {
      "epoch": 2.7594050915470367,
      "grad_norm": 0.4821765422821045,
      "learning_rate": 4.5107173685285614e-06,
      "loss": 0.014,
      "step": 1686140
    },
    {
      "epoch": 2.7594378219856903,
      "grad_norm": 0.6033323407173157,
      "learning_rate": 4.510651476315044e-06,
      "loss": 0.0121,
      "step": 1686160
    },
    {
      "epoch": 2.7594705524243435,
      "grad_norm": 0.1163439080119133,
      "learning_rate": 4.510585584101527e-06,
      "loss": 0.0148,
      "step": 1686180
    },
    {
      "epoch": 2.759503282862997,
      "grad_norm": 0.16209368407726288,
      "learning_rate": 4.51051969188801e-06,
      "loss": 0.0102,
      "step": 1686200
    },
    {
      "epoch": 2.75953601330165,
      "grad_norm": 0.3169042766094208,
      "learning_rate": 4.510453799674493e-06,
      "loss": 0.0083,
      "step": 1686220
    },
    {
      "epoch": 2.759568743740304,
      "grad_norm": 0.7208316326141357,
      "learning_rate": 4.510387907460976e-06,
      "loss": 0.0132,
      "step": 1686240
    },
    {
      "epoch": 2.759601474178957,
      "grad_norm": 0.09033448249101639,
      "learning_rate": 4.510322015247459e-06,
      "loss": 0.0137,
      "step": 1686260
    },
    {
      "epoch": 2.75963420461761,
      "grad_norm": 0.4229777157306671,
      "learning_rate": 4.5102561230339415e-06,
      "loss": 0.0151,
      "step": 1686280
    },
    {
      "epoch": 2.7596669350562637,
      "grad_norm": 0.340985506772995,
      "learning_rate": 4.510190230820424e-06,
      "loss": 0.014,
      "step": 1686300
    },
    {
      "epoch": 2.759699665494917,
      "grad_norm": 0.34385862946510315,
      "learning_rate": 4.510124338606907e-06,
      "loss": 0.014,
      "step": 1686320
    },
    {
      "epoch": 2.7597323959335704,
      "grad_norm": 0.09384988248348236,
      "learning_rate": 4.51005844639339e-06,
      "loss": 0.0079,
      "step": 1686340
    },
    {
      "epoch": 2.7597651263722236,
      "grad_norm": 0.1054493710398674,
      "learning_rate": 4.5099925541798724e-06,
      "loss": 0.0127,
      "step": 1686360
    },
    {
      "epoch": 2.759797856810877,
      "grad_norm": 0.10969265550374985,
      "learning_rate": 4.509926661966356e-06,
      "loss": 0.0194,
      "step": 1686380
    },
    {
      "epoch": 2.7598305872495303,
      "grad_norm": 0.3826940357685089,
      "learning_rate": 4.509860769752839e-06,
      "loss": 0.0109,
      "step": 1686400
    },
    {
      "epoch": 2.7598633176881835,
      "grad_norm": 0.3704981505870819,
      "learning_rate": 4.5097948775393215e-06,
      "loss": 0.0129,
      "step": 1686420
    },
    {
      "epoch": 2.759896048126837,
      "grad_norm": 0.420883446931839,
      "learning_rate": 4.509728985325804e-06,
      "loss": 0.0128,
      "step": 1686440
    },
    {
      "epoch": 2.7599287785654902,
      "grad_norm": 0.34246736764907837,
      "learning_rate": 4.509663093112288e-06,
      "loss": 0.0153,
      "step": 1686460
    },
    {
      "epoch": 2.759961509004144,
      "grad_norm": 0.9498793482780457,
      "learning_rate": 4.5095972008987706e-06,
      "loss": 0.0191,
      "step": 1686480
    },
    {
      "epoch": 2.759994239442797,
      "grad_norm": 0.1825440376996994,
      "learning_rate": 4.509531308685253e-06,
      "loss": 0.0138,
      "step": 1686500
    },
    {
      "epoch": 2.7600269698814506,
      "grad_norm": 0.24055343866348267,
      "learning_rate": 4.509465416471736e-06,
      "loss": 0.0091,
      "step": 1686520
    },
    {
      "epoch": 2.7600597003201037,
      "grad_norm": 0.409965842962265,
      "learning_rate": 4.509399524258219e-06,
      "loss": 0.0102,
      "step": 1686540
    },
    {
      "epoch": 2.760092430758757,
      "grad_norm": 0.28294965624809265,
      "learning_rate": 4.5093336320447015e-06,
      "loss": 0.0101,
      "step": 1686560
    },
    {
      "epoch": 2.7601251611974105,
      "grad_norm": 1.044301986694336,
      "learning_rate": 4.509267739831184e-06,
      "loss": 0.0193,
      "step": 1686580
    },
    {
      "epoch": 2.7601578916360636,
      "grad_norm": 0.3911777138710022,
      "learning_rate": 4.509201847617668e-06,
      "loss": 0.011,
      "step": 1686600
    },
    {
      "epoch": 2.7601906220747168,
      "grad_norm": 0.6379990577697754,
      "learning_rate": 4.509135955404151e-06,
      "loss": 0.0138,
      "step": 1686620
    },
    {
      "epoch": 2.7602233525133704,
      "grad_norm": 0.4077109098434448,
      "learning_rate": 4.509070063190633e-06,
      "loss": 0.0162,
      "step": 1686640
    },
    {
      "epoch": 2.760256082952024,
      "grad_norm": 0.6488597393035889,
      "learning_rate": 4.509004170977116e-06,
      "loss": 0.0166,
      "step": 1686660
    },
    {
      "epoch": 2.760288813390677,
      "grad_norm": 0.11142320930957794,
      "learning_rate": 4.508938278763599e-06,
      "loss": 0.0137,
      "step": 1686680
    },
    {
      "epoch": 2.7603215438293303,
      "grad_norm": 0.04888315498828888,
      "learning_rate": 4.5088723865500815e-06,
      "loss": 0.0128,
      "step": 1686700
    },
    {
      "epoch": 2.760354274267984,
      "grad_norm": 1.263395071029663,
      "learning_rate": 4.508806494336564e-06,
      "loss": 0.0089,
      "step": 1686720
    },
    {
      "epoch": 2.760387004706637,
      "grad_norm": 0.20566441118717194,
      "learning_rate": 4.508740602123047e-06,
      "loss": 0.008,
      "step": 1686740
    },
    {
      "epoch": 2.76041973514529,
      "grad_norm": 0.29787641763687134,
      "learning_rate": 4.50867470990953e-06,
      "loss": 0.0155,
      "step": 1686760
    },
    {
      "epoch": 2.7604524655839437,
      "grad_norm": 0.18264393508434296,
      "learning_rate": 4.508608817696013e-06,
      "loss": 0.0113,
      "step": 1686780
    },
    {
      "epoch": 2.7604851960225973,
      "grad_norm": 0.2593884766101837,
      "learning_rate": 4.508542925482496e-06,
      "loss": 0.0111,
      "step": 1686800
    },
    {
      "epoch": 2.7605179264612505,
      "grad_norm": 0.10442738234996796,
      "learning_rate": 4.508477033268979e-06,
      "loss": 0.0089,
      "step": 1686820
    },
    {
      "epoch": 2.7605506568999036,
      "grad_norm": 0.28280091285705566,
      "learning_rate": 4.508411141055462e-06,
      "loss": 0.0108,
      "step": 1686840
    },
    {
      "epoch": 2.7605833873385572,
      "grad_norm": 0.07820956408977509,
      "learning_rate": 4.508345248841945e-06,
      "loss": 0.0125,
      "step": 1686860
    },
    {
      "epoch": 2.7606161177772104,
      "grad_norm": 0.2643643021583557,
      "learning_rate": 4.508279356628428e-06,
      "loss": 0.0101,
      "step": 1686880
    },
    {
      "epoch": 2.7606488482158635,
      "grad_norm": 0.288434237241745,
      "learning_rate": 4.508213464414911e-06,
      "loss": 0.0108,
      "step": 1686900
    },
    {
      "epoch": 2.760681578654517,
      "grad_norm": 0.4879135489463806,
      "learning_rate": 4.508147572201393e-06,
      "loss": 0.0152,
      "step": 1686920
    },
    {
      "epoch": 2.7607143090931707,
      "grad_norm": 0.28490254282951355,
      "learning_rate": 4.508081679987876e-06,
      "loss": 0.0138,
      "step": 1686940
    },
    {
      "epoch": 2.760747039531824,
      "grad_norm": 0.35352379083633423,
      "learning_rate": 4.508015787774359e-06,
      "loss": 0.0158,
      "step": 1686960
    },
    {
      "epoch": 2.760779769970477,
      "grad_norm": 0.33358463644981384,
      "learning_rate": 4.507949895560842e-06,
      "loss": 0.0109,
      "step": 1686980
    },
    {
      "epoch": 2.7608125004091306,
      "grad_norm": 0.18065245449543,
      "learning_rate": 4.507884003347325e-06,
      "loss": 0.0209,
      "step": 1687000
    },
    {
      "epoch": 2.7608452308477838,
      "grad_norm": 1.0828263759613037,
      "learning_rate": 4.507818111133808e-06,
      "loss": 0.015,
      "step": 1687020
    },
    {
      "epoch": 2.760877961286437,
      "grad_norm": 0.3823993504047394,
      "learning_rate": 4.507752218920291e-06,
      "loss": 0.0158,
      "step": 1687040
    },
    {
      "epoch": 2.7609106917250905,
      "grad_norm": 0.32191529870033264,
      "learning_rate": 4.507686326706773e-06,
      "loss": 0.0185,
      "step": 1687060
    },
    {
      "epoch": 2.760943422163744,
      "grad_norm": 0.6497096419334412,
      "learning_rate": 4.507620434493256e-06,
      "loss": 0.0127,
      "step": 1687080
    },
    {
      "epoch": 2.7609761526023973,
      "grad_norm": 0.13539311289787292,
      "learning_rate": 4.507554542279739e-06,
      "loss": 0.0122,
      "step": 1687100
    },
    {
      "epoch": 2.7610088830410504,
      "grad_norm": 0.3706825375556946,
      "learning_rate": 4.507488650066222e-06,
      "loss": 0.0125,
      "step": 1687120
    },
    {
      "epoch": 2.761041613479704,
      "grad_norm": 0.6200798749923706,
      "learning_rate": 4.507422757852704e-06,
      "loss": 0.0154,
      "step": 1687140
    },
    {
      "epoch": 2.761074343918357,
      "grad_norm": 0.39393094182014465,
      "learning_rate": 4.507356865639188e-06,
      "loss": 0.0117,
      "step": 1687160
    },
    {
      "epoch": 2.7611070743570103,
      "grad_norm": 0.3357725739479065,
      "learning_rate": 4.507290973425671e-06,
      "loss": 0.0082,
      "step": 1687180
    },
    {
      "epoch": 2.761139804795664,
      "grad_norm": 0.40196776390075684,
      "learning_rate": 4.5072250812121534e-06,
      "loss": 0.0154,
      "step": 1687200
    },
    {
      "epoch": 2.761172535234317,
      "grad_norm": 1.1341609954833984,
      "learning_rate": 4.507159188998636e-06,
      "loss": 0.0205,
      "step": 1687220
    },
    {
      "epoch": 2.7612052656729706,
      "grad_norm": 0.21018442511558533,
      "learning_rate": 4.50709329678512e-06,
      "loss": 0.0086,
      "step": 1687240
    },
    {
      "epoch": 2.761237996111624,
      "grad_norm": 0.36884739995002747,
      "learning_rate": 4.5070274045716025e-06,
      "loss": 0.0156,
      "step": 1687260
    },
    {
      "epoch": 2.7612707265502774,
      "grad_norm": 0.07753835618495941,
      "learning_rate": 4.506961512358085e-06,
      "loss": 0.0152,
      "step": 1687280
    },
    {
      "epoch": 2.7613034569889305,
      "grad_norm": 0.32225218415260315,
      "learning_rate": 4.506895620144568e-06,
      "loss": 0.0083,
      "step": 1687300
    },
    {
      "epoch": 2.7613361874275837,
      "grad_norm": 0.8544428944587708,
      "learning_rate": 4.506829727931051e-06,
      "loss": 0.0192,
      "step": 1687320
    },
    {
      "epoch": 2.7613689178662373,
      "grad_norm": 0.3816820979118347,
      "learning_rate": 4.5067638357175335e-06,
      "loss": 0.0138,
      "step": 1687340
    },
    {
      "epoch": 2.7614016483048904,
      "grad_norm": 0.3655394911766052,
      "learning_rate": 4.506697943504016e-06,
      "loss": 0.0133,
      "step": 1687360
    },
    {
      "epoch": 2.761434378743544,
      "grad_norm": 0.32718661427497864,
      "learning_rate": 4.506632051290499e-06,
      "loss": 0.0137,
      "step": 1687380
    },
    {
      "epoch": 2.761467109182197,
      "grad_norm": 0.055074192583560944,
      "learning_rate": 4.5065661590769825e-06,
      "loss": 0.0162,
      "step": 1687400
    },
    {
      "epoch": 2.7614998396208508,
      "grad_norm": 0.1658555120229721,
      "learning_rate": 4.506500266863465e-06,
      "loss": 0.0127,
      "step": 1687420
    },
    {
      "epoch": 2.761532570059504,
      "grad_norm": 0.40436652302742004,
      "learning_rate": 4.506434374649948e-06,
      "loss": 0.0098,
      "step": 1687440
    },
    {
      "epoch": 2.761565300498157,
      "grad_norm": 0.2817412316799164,
      "learning_rate": 4.506368482436431e-06,
      "loss": 0.0129,
      "step": 1687460
    },
    {
      "epoch": 2.7615980309368107,
      "grad_norm": 0.1663084775209427,
      "learning_rate": 4.5063025902229135e-06,
      "loss": 0.0117,
      "step": 1687480
    },
    {
      "epoch": 2.761630761375464,
      "grad_norm": 0.1293613612651825,
      "learning_rate": 4.506236698009396e-06,
      "loss": 0.0092,
      "step": 1687500
    },
    {
      "epoch": 2.7616634918141174,
      "grad_norm": 0.20905695855617523,
      "learning_rate": 4.50617080579588e-06,
      "loss": 0.0086,
      "step": 1687520
    },
    {
      "epoch": 2.7616962222527706,
      "grad_norm": 0.344495564699173,
      "learning_rate": 4.5061049135823625e-06,
      "loss": 0.0162,
      "step": 1687540
    },
    {
      "epoch": 2.761728952691424,
      "grad_norm": 0.43187737464904785,
      "learning_rate": 4.506039021368845e-06,
      "loss": 0.0119,
      "step": 1687560
    },
    {
      "epoch": 2.7617616831300773,
      "grad_norm": 0.31152403354644775,
      "learning_rate": 4.505973129155328e-06,
      "loss": 0.016,
      "step": 1687580
    },
    {
      "epoch": 2.7617944135687305,
      "grad_norm": 0.643632709980011,
      "learning_rate": 4.505907236941811e-06,
      "loss": 0.0144,
      "step": 1687600
    },
    {
      "epoch": 2.761827144007384,
      "grad_norm": 0.1249837800860405,
      "learning_rate": 4.505841344728294e-06,
      "loss": 0.0137,
      "step": 1687620
    },
    {
      "epoch": 2.761859874446037,
      "grad_norm": 0.7259995937347412,
      "learning_rate": 4.505775452514777e-06,
      "loss": 0.0153,
      "step": 1687640
    },
    {
      "epoch": 2.761892604884691,
      "grad_norm": 0.28126269578933716,
      "learning_rate": 4.50570956030126e-06,
      "loss": 0.0156,
      "step": 1687660
    },
    {
      "epoch": 2.761925335323344,
      "grad_norm": 0.09308302402496338,
      "learning_rate": 4.5056436680877426e-06,
      "loss": 0.0157,
      "step": 1687680
    },
    {
      "epoch": 2.7619580657619975,
      "grad_norm": 0.2059049755334854,
      "learning_rate": 4.505577775874225e-06,
      "loss": 0.0098,
      "step": 1687700
    },
    {
      "epoch": 2.7619907962006507,
      "grad_norm": 0.22012145817279816,
      "learning_rate": 4.505511883660708e-06,
      "loss": 0.013,
      "step": 1687720
    },
    {
      "epoch": 2.762023526639304,
      "grad_norm": 0.5428124070167542,
      "learning_rate": 4.505445991447191e-06,
      "loss": 0.019,
      "step": 1687740
    },
    {
      "epoch": 2.7620562570779574,
      "grad_norm": 0.945135235786438,
      "learning_rate": 4.5053800992336735e-06,
      "loss": 0.0174,
      "step": 1687760
    },
    {
      "epoch": 2.7620889875166106,
      "grad_norm": 0.43268585205078125,
      "learning_rate": 4.505314207020156e-06,
      "loss": 0.0137,
      "step": 1687780
    },
    {
      "epoch": 2.762121717955264,
      "grad_norm": 0.298471599817276,
      "learning_rate": 4.50524831480664e-06,
      "loss": 0.0125,
      "step": 1687800
    },
    {
      "epoch": 2.7621544483939173,
      "grad_norm": 0.22290655970573425,
      "learning_rate": 4.505182422593123e-06,
      "loss": 0.0111,
      "step": 1687820
    },
    {
      "epoch": 2.762187178832571,
      "grad_norm": 0.22199073433876038,
      "learning_rate": 4.505116530379605e-06,
      "loss": 0.0125,
      "step": 1687840
    },
    {
      "epoch": 2.762219909271224,
      "grad_norm": 0.4297144412994385,
      "learning_rate": 4.505050638166088e-06,
      "loss": 0.0126,
      "step": 1687860
    },
    {
      "epoch": 2.7622526397098772,
      "grad_norm": 0.32429975271224976,
      "learning_rate": 4.504984745952572e-06,
      "loss": 0.0158,
      "step": 1687880
    },
    {
      "epoch": 2.762285370148531,
      "grad_norm": 0.44701290130615234,
      "learning_rate": 4.504918853739054e-06,
      "loss": 0.0124,
      "step": 1687900
    },
    {
      "epoch": 2.762318100587184,
      "grad_norm": 0.6331098079681396,
      "learning_rate": 4.504852961525537e-06,
      "loss": 0.0281,
      "step": 1687920
    },
    {
      "epoch": 2.7623508310258376,
      "grad_norm": 0.3437739312648773,
      "learning_rate": 4.50478706931202e-06,
      "loss": 0.022,
      "step": 1687940
    },
    {
      "epoch": 2.7623835614644907,
      "grad_norm": 1.524004340171814,
      "learning_rate": 4.504721177098503e-06,
      "loss": 0.0175,
      "step": 1687960
    },
    {
      "epoch": 2.7624162919031443,
      "grad_norm": 0.26410096883773804,
      "learning_rate": 4.504655284884985e-06,
      "loss": 0.0173,
      "step": 1687980
    },
    {
      "epoch": 2.7624490223417975,
      "grad_norm": 0.1359885036945343,
      "learning_rate": 4.504589392671468e-06,
      "loss": 0.0161,
      "step": 1688000
    },
    {
      "epoch": 2.7624817527804506,
      "grad_norm": 0.34589534997940063,
      "learning_rate": 4.504523500457952e-06,
      "loss": 0.0079,
      "step": 1688020
    },
    {
      "epoch": 2.762514483219104,
      "grad_norm": 0.35852280259132385,
      "learning_rate": 4.5044576082444344e-06,
      "loss": 0.0111,
      "step": 1688040
    },
    {
      "epoch": 2.7625472136577574,
      "grad_norm": 0.16978798806667328,
      "learning_rate": 4.504391716030917e-06,
      "loss": 0.0152,
      "step": 1688060
    },
    {
      "epoch": 2.762579944096411,
      "grad_norm": 0.5879924297332764,
      "learning_rate": 4.5043258238174e-06,
      "loss": 0.0109,
      "step": 1688080
    },
    {
      "epoch": 2.762612674535064,
      "grad_norm": 0.15518850088119507,
      "learning_rate": 4.504259931603883e-06,
      "loss": 0.011,
      "step": 1688100
    },
    {
      "epoch": 2.7626454049737177,
      "grad_norm": 0.19837909936904907,
      "learning_rate": 4.504194039390365e-06,
      "loss": 0.0114,
      "step": 1688120
    },
    {
      "epoch": 2.762678135412371,
      "grad_norm": 0.1525413990020752,
      "learning_rate": 4.504128147176848e-06,
      "loss": 0.0117,
      "step": 1688140
    },
    {
      "epoch": 2.762710865851024,
      "grad_norm": 0.8058901429176331,
      "learning_rate": 4.504062254963331e-06,
      "loss": 0.0091,
      "step": 1688160
    },
    {
      "epoch": 2.7627435962896776,
      "grad_norm": 0.3953091502189636,
      "learning_rate": 4.503996362749814e-06,
      "loss": 0.0105,
      "step": 1688180
    },
    {
      "epoch": 2.7627763267283307,
      "grad_norm": 0.6611382961273193,
      "learning_rate": 4.503930470536297e-06,
      "loss": 0.0203,
      "step": 1688200
    },
    {
      "epoch": 2.762809057166984,
      "grad_norm": 0.2351817935705185,
      "learning_rate": 4.50386457832278e-06,
      "loss": 0.0131,
      "step": 1688220
    },
    {
      "epoch": 2.7628417876056375,
      "grad_norm": 0.5680968761444092,
      "learning_rate": 4.503798686109263e-06,
      "loss": 0.0176,
      "step": 1688240
    },
    {
      "epoch": 2.762874518044291,
      "grad_norm": 0.474491149187088,
      "learning_rate": 4.503732793895746e-06,
      "loss": 0.0149,
      "step": 1688260
    },
    {
      "epoch": 2.7629072484829442,
      "grad_norm": 0.4472712576389313,
      "learning_rate": 4.503666901682229e-06,
      "loss": 0.0234,
      "step": 1688280
    },
    {
      "epoch": 2.7629399789215974,
      "grad_norm": 1.8566761016845703,
      "learning_rate": 4.503601009468712e-06,
      "loss": 0.0154,
      "step": 1688300
    },
    {
      "epoch": 2.762972709360251,
      "grad_norm": 0.1408039629459381,
      "learning_rate": 4.5035351172551945e-06,
      "loss": 0.0153,
      "step": 1688320
    },
    {
      "epoch": 2.763005439798904,
      "grad_norm": 0.12779994308948517,
      "learning_rate": 4.503469225041677e-06,
      "loss": 0.0138,
      "step": 1688340
    },
    {
      "epoch": 2.7630381702375573,
      "grad_norm": 0.7663223147392273,
      "learning_rate": 4.50340333282816e-06,
      "loss": 0.0139,
      "step": 1688360
    },
    {
      "epoch": 2.763070900676211,
      "grad_norm": 0.9166075587272644,
      "learning_rate": 4.503337440614643e-06,
      "loss": 0.0186,
      "step": 1688380
    },
    {
      "epoch": 2.7631036311148645,
      "grad_norm": 0.22350554168224335,
      "learning_rate": 4.5032715484011254e-06,
      "loss": 0.0127,
      "step": 1688400
    },
    {
      "epoch": 2.7631363615535176,
      "grad_norm": 0.15241806209087372,
      "learning_rate": 4.503205656187609e-06,
      "loss": 0.0149,
      "step": 1688420
    },
    {
      "epoch": 2.7631690919921708,
      "grad_norm": 0.5168361663818359,
      "learning_rate": 4.503139763974092e-06,
      "loss": 0.0121,
      "step": 1688440
    },
    {
      "epoch": 2.7632018224308244,
      "grad_norm": 0.6601406931877136,
      "learning_rate": 4.5030738717605745e-06,
      "loss": 0.0095,
      "step": 1688460
    },
    {
      "epoch": 2.7632345528694775,
      "grad_norm": 0.31697171926498413,
      "learning_rate": 4.503007979547057e-06,
      "loss": 0.0179,
      "step": 1688480
    },
    {
      "epoch": 2.7632672833081307,
      "grad_norm": 0.6448202133178711,
      "learning_rate": 4.50294208733354e-06,
      "loss": 0.009,
      "step": 1688500
    },
    {
      "epoch": 2.7633000137467842,
      "grad_norm": 0.3134725093841553,
      "learning_rate": 4.502876195120023e-06,
      "loss": 0.0113,
      "step": 1688520
    },
    {
      "epoch": 2.763332744185438,
      "grad_norm": 0.6884880661964417,
      "learning_rate": 4.5028103029065055e-06,
      "loss": 0.0189,
      "step": 1688540
    },
    {
      "epoch": 2.763365474624091,
      "grad_norm": 0.1745297908782959,
      "learning_rate": 4.502744410692988e-06,
      "loss": 0.0123,
      "step": 1688560
    },
    {
      "epoch": 2.763398205062744,
      "grad_norm": 0.20540611445903778,
      "learning_rate": 4.502678518479472e-06,
      "loss": 0.0096,
      "step": 1688580
    },
    {
      "epoch": 2.7634309355013977,
      "grad_norm": 0.45478102564811707,
      "learning_rate": 4.5026126262659545e-06,
      "loss": 0.0106,
      "step": 1688600
    },
    {
      "epoch": 2.763463665940051,
      "grad_norm": 0.6096376776695251,
      "learning_rate": 4.502546734052437e-06,
      "loss": 0.0174,
      "step": 1688620
    },
    {
      "epoch": 2.763496396378704,
      "grad_norm": 0.47421538829803467,
      "learning_rate": 4.50248084183892e-06,
      "loss": 0.0109,
      "step": 1688640
    },
    {
      "epoch": 2.7635291268173576,
      "grad_norm": 0.4160309135913849,
      "learning_rate": 4.502414949625404e-06,
      "loss": 0.0117,
      "step": 1688660
    },
    {
      "epoch": 2.763561857256011,
      "grad_norm": 0.3491858243942261,
      "learning_rate": 4.502349057411886e-06,
      "loss": 0.0146,
      "step": 1688680
    },
    {
      "epoch": 2.7635945876946644,
      "grad_norm": 0.24706454575061798,
      "learning_rate": 4.502283165198369e-06,
      "loss": 0.0097,
      "step": 1688700
    },
    {
      "epoch": 2.7636273181333175,
      "grad_norm": 1.3326395750045776,
      "learning_rate": 4.502217272984852e-06,
      "loss": 0.0137,
      "step": 1688720
    },
    {
      "epoch": 2.763660048571971,
      "grad_norm": 0.22679966688156128,
      "learning_rate": 4.5021513807713346e-06,
      "loss": 0.0112,
      "step": 1688740
    },
    {
      "epoch": 2.7636927790106243,
      "grad_norm": 0.5998216867446899,
      "learning_rate": 4.502085488557817e-06,
      "loss": 0.0201,
      "step": 1688760
    },
    {
      "epoch": 2.7637255094492774,
      "grad_norm": 0.5179131031036377,
      "learning_rate": 4.5020195963443e-06,
      "loss": 0.012,
      "step": 1688780
    },
    {
      "epoch": 2.763758239887931,
      "grad_norm": 0.3289596140384674,
      "learning_rate": 4.501953704130783e-06,
      "loss": 0.014,
      "step": 1688800
    },
    {
      "epoch": 2.763790970326584,
      "grad_norm": 0.44965842366218567,
      "learning_rate": 4.501887811917266e-06,
      "loss": 0.021,
      "step": 1688820
    },
    {
      "epoch": 2.7638237007652378,
      "grad_norm": 0.40082883834838867,
      "learning_rate": 4.501821919703749e-06,
      "loss": 0.0094,
      "step": 1688840
    },
    {
      "epoch": 2.763856431203891,
      "grad_norm": 0.937773585319519,
      "learning_rate": 4.501756027490232e-06,
      "loss": 0.0092,
      "step": 1688860
    },
    {
      "epoch": 2.7638891616425445,
      "grad_norm": 0.5639124512672424,
      "learning_rate": 4.501690135276715e-06,
      "loss": 0.015,
      "step": 1688880
    },
    {
      "epoch": 2.7639218920811977,
      "grad_norm": 0.4017177224159241,
      "learning_rate": 4.501624243063197e-06,
      "loss": 0.0165,
      "step": 1688900
    },
    {
      "epoch": 2.763954622519851,
      "grad_norm": 0.536626398563385,
      "learning_rate": 4.501558350849681e-06,
      "loss": 0.0108,
      "step": 1688920
    },
    {
      "epoch": 2.7639873529585044,
      "grad_norm": 0.1777246594429016,
      "learning_rate": 4.501492458636164e-06,
      "loss": 0.015,
      "step": 1688940
    },
    {
      "epoch": 2.7640200833971575,
      "grad_norm": 0.4351440668106079,
      "learning_rate": 4.501426566422646e-06,
      "loss": 0.0142,
      "step": 1688960
    },
    {
      "epoch": 2.764052813835811,
      "grad_norm": 0.33953872323036194,
      "learning_rate": 4.501360674209129e-06,
      "loss": 0.0095,
      "step": 1688980
    },
    {
      "epoch": 2.7640855442744643,
      "grad_norm": 0.34110626578330994,
      "learning_rate": 4.501294781995612e-06,
      "loss": 0.0162,
      "step": 1689000
    },
    {
      "epoch": 2.764118274713118,
      "grad_norm": 1.112417459487915,
      "learning_rate": 4.501228889782095e-06,
      "loss": 0.017,
      "step": 1689020
    },
    {
      "epoch": 2.764151005151771,
      "grad_norm": 0.8235225081443787,
      "learning_rate": 4.501162997568578e-06,
      "loss": 0.0119,
      "step": 1689040
    },
    {
      "epoch": 2.764183735590424,
      "grad_norm": 0.22637420892715454,
      "learning_rate": 4.501097105355061e-06,
      "loss": 0.0158,
      "step": 1689060
    },
    {
      "epoch": 2.764216466029078,
      "grad_norm": 0.11155705153942108,
      "learning_rate": 4.501031213141544e-06,
      "loss": 0.0131,
      "step": 1689080
    },
    {
      "epoch": 2.764249196467731,
      "grad_norm": 0.23141412436962128,
      "learning_rate": 4.500965320928026e-06,
      "loss": 0.0133,
      "step": 1689100
    },
    {
      "epoch": 2.7642819269063845,
      "grad_norm": 0.0786723867058754,
      "learning_rate": 4.500899428714509e-06,
      "loss": 0.013,
      "step": 1689120
    },
    {
      "epoch": 2.7643146573450377,
      "grad_norm": 0.10029996931552887,
      "learning_rate": 4.500833536500992e-06,
      "loss": 0.0098,
      "step": 1689140
    },
    {
      "epoch": 2.7643473877836913,
      "grad_norm": 0.4794914126396179,
      "learning_rate": 4.500767644287475e-06,
      "loss": 0.0105,
      "step": 1689160
    },
    {
      "epoch": 2.7643801182223444,
      "grad_norm": 0.3422289788722992,
      "learning_rate": 4.500701752073957e-06,
      "loss": 0.0161,
      "step": 1689180
    },
    {
      "epoch": 2.7644128486609976,
      "grad_norm": 0.6866604089736938,
      "learning_rate": 4.50063585986044e-06,
      "loss": 0.0133,
      "step": 1689200
    },
    {
      "epoch": 2.764445579099651,
      "grad_norm": 0.19205951690673828,
      "learning_rate": 4.500569967646924e-06,
      "loss": 0.0187,
      "step": 1689220
    },
    {
      "epoch": 2.7644783095383043,
      "grad_norm": 0.12249743193387985,
      "learning_rate": 4.5005040754334064e-06,
      "loss": 0.0126,
      "step": 1689240
    },
    {
      "epoch": 2.764511039976958,
      "grad_norm": 0.13188661634922028,
      "learning_rate": 4.500438183219889e-06,
      "loss": 0.013,
      "step": 1689260
    },
    {
      "epoch": 2.764543770415611,
      "grad_norm": 0.11513609439134598,
      "learning_rate": 4.500372291006373e-06,
      "loss": 0.012,
      "step": 1689280
    },
    {
      "epoch": 2.7645765008542647,
      "grad_norm": 0.4284902513027191,
      "learning_rate": 4.5003063987928555e-06,
      "loss": 0.0196,
      "step": 1689300
    },
    {
      "epoch": 2.764609231292918,
      "grad_norm": 0.2735431492328644,
      "learning_rate": 4.500240506579338e-06,
      "loss": 0.0185,
      "step": 1689320
    },
    {
      "epoch": 2.764641961731571,
      "grad_norm": 0.26825839281082153,
      "learning_rate": 4.500174614365821e-06,
      "loss": 0.0137,
      "step": 1689340
    },
    {
      "epoch": 2.7646746921702245,
      "grad_norm": 0.8774018883705139,
      "learning_rate": 4.500108722152304e-06,
      "loss": 0.012,
      "step": 1689360
    },
    {
      "epoch": 2.7647074226088777,
      "grad_norm": 0.20333896577358246,
      "learning_rate": 4.5000428299387865e-06,
      "loss": 0.0172,
      "step": 1689380
    },
    {
      "epoch": 2.7647401530475313,
      "grad_norm": 0.7181005477905273,
      "learning_rate": 4.499976937725269e-06,
      "loss": 0.0168,
      "step": 1689400
    },
    {
      "epoch": 2.7647728834861844,
      "grad_norm": 0.6392517685890198,
      "learning_rate": 4.499911045511752e-06,
      "loss": 0.0129,
      "step": 1689420
    },
    {
      "epoch": 2.764805613924838,
      "grad_norm": 0.16247963905334473,
      "learning_rate": 4.4998451532982355e-06,
      "loss": 0.015,
      "step": 1689440
    },
    {
      "epoch": 2.764838344363491,
      "grad_norm": 0.37434595823287964,
      "learning_rate": 4.499779261084718e-06,
      "loss": 0.0142,
      "step": 1689460
    },
    {
      "epoch": 2.7648710748021443,
      "grad_norm": 0.5289337038993835,
      "learning_rate": 4.499713368871201e-06,
      "loss": 0.0115,
      "step": 1689480
    },
    {
      "epoch": 2.764903805240798,
      "grad_norm": 0.2202841341495514,
      "learning_rate": 4.499647476657684e-06,
      "loss": 0.0129,
      "step": 1689500
    },
    {
      "epoch": 2.764936535679451,
      "grad_norm": 0.1922832727432251,
      "learning_rate": 4.4995815844441665e-06,
      "loss": 0.0106,
      "step": 1689520
    },
    {
      "epoch": 2.7649692661181047,
      "grad_norm": 0.08099567890167236,
      "learning_rate": 4.499515692230649e-06,
      "loss": 0.0093,
      "step": 1689540
    },
    {
      "epoch": 2.765001996556758,
      "grad_norm": 0.6682034134864807,
      "learning_rate": 4.499449800017132e-06,
      "loss": 0.0162,
      "step": 1689560
    },
    {
      "epoch": 2.7650347269954114,
      "grad_norm": 0.24662965536117554,
      "learning_rate": 4.499383907803615e-06,
      "loss": 0.0174,
      "step": 1689580
    },
    {
      "epoch": 2.7650674574340646,
      "grad_norm": 0.35811638832092285,
      "learning_rate": 4.4993180155900974e-06,
      "loss": 0.0145,
      "step": 1689600
    },
    {
      "epoch": 2.7651001878727177,
      "grad_norm": 0.33071574568748474,
      "learning_rate": 4.499252123376581e-06,
      "loss": 0.0175,
      "step": 1689620
    },
    {
      "epoch": 2.7651329183113713,
      "grad_norm": 0.30227768421173096,
      "learning_rate": 4.499186231163064e-06,
      "loss": 0.0152,
      "step": 1689640
    },
    {
      "epoch": 2.7651656487500245,
      "grad_norm": 0.38128072023391724,
      "learning_rate": 4.4991203389495465e-06,
      "loss": 0.0156,
      "step": 1689660
    },
    {
      "epoch": 2.7651983791886776,
      "grad_norm": 0.0741872638463974,
      "learning_rate": 4.49905444673603e-06,
      "loss": 0.0122,
      "step": 1689680
    },
    {
      "epoch": 2.765231109627331,
      "grad_norm": 0.4232422411441803,
      "learning_rate": 4.498988554522513e-06,
      "loss": 0.0119,
      "step": 1689700
    },
    {
      "epoch": 2.765263840065985,
      "grad_norm": 0.5212311744689941,
      "learning_rate": 4.498922662308996e-06,
      "loss": 0.0187,
      "step": 1689720
    },
    {
      "epoch": 2.765296570504638,
      "grad_norm": 0.08473511040210724,
      "learning_rate": 4.498856770095478e-06,
      "loss": 0.0162,
      "step": 1689740
    },
    {
      "epoch": 2.765329300943291,
      "grad_norm": 0.31207334995269775,
      "learning_rate": 4.498790877881961e-06,
      "loss": 0.0132,
      "step": 1689760
    },
    {
      "epoch": 2.7653620313819447,
      "grad_norm": 0.7373132109642029,
      "learning_rate": 4.498724985668444e-06,
      "loss": 0.0094,
      "step": 1689780
    },
    {
      "epoch": 2.765394761820598,
      "grad_norm": 0.3654995858669281,
      "learning_rate": 4.4986590934549265e-06,
      "loss": 0.0117,
      "step": 1689800
    },
    {
      "epoch": 2.765427492259251,
      "grad_norm": 0.37291356921195984,
      "learning_rate": 4.498593201241409e-06,
      "loss": 0.0141,
      "step": 1689820
    },
    {
      "epoch": 2.7654602226979046,
      "grad_norm": 0.29945018887519836,
      "learning_rate": 4.498527309027893e-06,
      "loss": 0.0124,
      "step": 1689840
    },
    {
      "epoch": 2.765492953136558,
      "grad_norm": 0.5611737966537476,
      "learning_rate": 4.498461416814376e-06,
      "loss": 0.0115,
      "step": 1689860
    },
    {
      "epoch": 2.7655256835752113,
      "grad_norm": 0.4722169041633606,
      "learning_rate": 4.498395524600858e-06,
      "loss": 0.0156,
      "step": 1689880
    },
    {
      "epoch": 2.7655584140138645,
      "grad_norm": 0.3318212330341339,
      "learning_rate": 4.498329632387341e-06,
      "loss": 0.014,
      "step": 1689900
    },
    {
      "epoch": 2.765591144452518,
      "grad_norm": 1.2893426418304443,
      "learning_rate": 4.498263740173824e-06,
      "loss": 0.0105,
      "step": 1689920
    },
    {
      "epoch": 2.7656238748911712,
      "grad_norm": 0.633691668510437,
      "learning_rate": 4.4981978479603066e-06,
      "loss": 0.0109,
      "step": 1689940
    },
    {
      "epoch": 2.7656566053298244,
      "grad_norm": 0.35278642177581787,
      "learning_rate": 4.498131955746789e-06,
      "loss": 0.0172,
      "step": 1689960
    },
    {
      "epoch": 2.765689335768478,
      "grad_norm": 0.2698046863079071,
      "learning_rate": 4.498066063533273e-06,
      "loss": 0.0115,
      "step": 1689980
    },
    {
      "epoch": 2.7657220662071316,
      "grad_norm": 0.22582438588142395,
      "learning_rate": 4.498000171319756e-06,
      "loss": 0.0157,
      "step": 1690000
    },
    {
      "epoch": 2.7657547966457847,
      "grad_norm": 0.533769965171814,
      "learning_rate": 4.497934279106238e-06,
      "loss": 0.0103,
      "step": 1690020
    },
    {
      "epoch": 2.765787527084438,
      "grad_norm": 0.15532061457633972,
      "learning_rate": 4.497868386892721e-06,
      "loss": 0.0104,
      "step": 1690040
    },
    {
      "epoch": 2.7658202575230915,
      "grad_norm": 0.2034497857093811,
      "learning_rate": 4.497802494679204e-06,
      "loss": 0.0109,
      "step": 1690060
    },
    {
      "epoch": 2.7658529879617446,
      "grad_norm": 0.24565871059894562,
      "learning_rate": 4.4977366024656874e-06,
      "loss": 0.0176,
      "step": 1690080
    },
    {
      "epoch": 2.7658857184003978,
      "grad_norm": 0.10366659611463547,
      "learning_rate": 4.49767071025217e-06,
      "loss": 0.0136,
      "step": 1690100
    },
    {
      "epoch": 2.7659184488390514,
      "grad_norm": 0.3764130473136902,
      "learning_rate": 4.497604818038653e-06,
      "loss": 0.0114,
      "step": 1690120
    },
    {
      "epoch": 2.765951179277705,
      "grad_norm": 0.2147638201713562,
      "learning_rate": 4.497538925825136e-06,
      "loss": 0.01,
      "step": 1690140
    },
    {
      "epoch": 2.765983909716358,
      "grad_norm": 0.46940353512763977,
      "learning_rate": 4.497473033611618e-06,
      "loss": 0.0146,
      "step": 1690160
    },
    {
      "epoch": 2.7660166401550113,
      "grad_norm": 1.3949546813964844,
      "learning_rate": 4.497407141398101e-06,
      "loss": 0.0122,
      "step": 1690180
    },
    {
      "epoch": 2.766049370593665,
      "grad_norm": 0.13299456238746643,
      "learning_rate": 4.497341249184584e-06,
      "loss": 0.0183,
      "step": 1690200
    },
    {
      "epoch": 2.766082101032318,
      "grad_norm": 0.07636198401451111,
      "learning_rate": 4.497275356971067e-06,
      "loss": 0.0142,
      "step": 1690220
    },
    {
      "epoch": 2.766114831470971,
      "grad_norm": 0.2925644814968109,
      "learning_rate": 4.49720946475755e-06,
      "loss": 0.0103,
      "step": 1690240
    },
    {
      "epoch": 2.7661475619096247,
      "grad_norm": 0.09812533855438232,
      "learning_rate": 4.497143572544033e-06,
      "loss": 0.0132,
      "step": 1690260
    },
    {
      "epoch": 2.766180292348278,
      "grad_norm": 0.3546893298625946,
      "learning_rate": 4.497077680330516e-06,
      "loss": 0.0198,
      "step": 1690280
    },
    {
      "epoch": 2.7662130227869315,
      "grad_norm": 0.27539631724357605,
      "learning_rate": 4.497011788116998e-06,
      "loss": 0.0125,
      "step": 1690300
    },
    {
      "epoch": 2.7662457532255846,
      "grad_norm": 0.2743963897228241,
      "learning_rate": 4.496945895903481e-06,
      "loss": 0.0108,
      "step": 1690320
    },
    {
      "epoch": 2.7662784836642382,
      "grad_norm": 0.9253437519073486,
      "learning_rate": 4.496880003689965e-06,
      "loss": 0.0129,
      "step": 1690340
    },
    {
      "epoch": 2.7663112141028914,
      "grad_norm": 9.602893829345703,
      "learning_rate": 4.4968141114764475e-06,
      "loss": 0.0135,
      "step": 1690360
    },
    {
      "epoch": 2.7663439445415445,
      "grad_norm": 0.1799493283033371,
      "learning_rate": 4.49674821926293e-06,
      "loss": 0.0149,
      "step": 1690380
    },
    {
      "epoch": 2.766376674980198,
      "grad_norm": 0.45360225439071655,
      "learning_rate": 4.496682327049413e-06,
      "loss": 0.0163,
      "step": 1690400
    },
    {
      "epoch": 2.7664094054188513,
      "grad_norm": 0.6321641206741333,
      "learning_rate": 4.496616434835896e-06,
      "loss": 0.0149,
      "step": 1690420
    },
    {
      "epoch": 2.766442135857505,
      "grad_norm": 0.4841495156288147,
      "learning_rate": 4.4965505426223784e-06,
      "loss": 0.0213,
      "step": 1690440
    },
    {
      "epoch": 2.766474866296158,
      "grad_norm": 0.15449108183383942,
      "learning_rate": 4.496484650408862e-06,
      "loss": 0.0165,
      "step": 1690460
    },
    {
      "epoch": 2.7665075967348116,
      "grad_norm": 0.593565821647644,
      "learning_rate": 4.496418758195345e-06,
      "loss": 0.0138,
      "step": 1690480
    },
    {
      "epoch": 2.7665403271734648,
      "grad_norm": 0.2522539496421814,
      "learning_rate": 4.4963528659818275e-06,
      "loss": 0.0201,
      "step": 1690500
    },
    {
      "epoch": 2.766573057612118,
      "grad_norm": 0.36141908168792725,
      "learning_rate": 4.49628697376831e-06,
      "loss": 0.017,
      "step": 1690520
    },
    {
      "epoch": 2.7666057880507715,
      "grad_norm": 0.2465212494134903,
      "learning_rate": 4.496221081554793e-06,
      "loss": 0.0127,
      "step": 1690540
    },
    {
      "epoch": 2.7666385184894247,
      "grad_norm": 0.5839236974716187,
      "learning_rate": 4.496155189341276e-06,
      "loss": 0.0155,
      "step": 1690560
    },
    {
      "epoch": 2.7666712489280783,
      "grad_norm": 1.152947187423706,
      "learning_rate": 4.4960892971277585e-06,
      "loss": 0.014,
      "step": 1690580
    },
    {
      "epoch": 2.7667039793667314,
      "grad_norm": 0.3292029798030853,
      "learning_rate": 4.496023404914241e-06,
      "loss": 0.0176,
      "step": 1690600
    },
    {
      "epoch": 2.766736709805385,
      "grad_norm": 0.19254069030284882,
      "learning_rate": 4.495957512700724e-06,
      "loss": 0.0136,
      "step": 1690620
    },
    {
      "epoch": 2.766769440244038,
      "grad_norm": 0.713516116142273,
      "learning_rate": 4.4958916204872075e-06,
      "loss": 0.0124,
      "step": 1690640
    },
    {
      "epoch": 2.7668021706826913,
      "grad_norm": 0.45901256799697876,
      "learning_rate": 4.49582572827369e-06,
      "loss": 0.0129,
      "step": 1690660
    },
    {
      "epoch": 2.766834901121345,
      "grad_norm": 0.17093636095523834,
      "learning_rate": 4.495759836060173e-06,
      "loss": 0.0107,
      "step": 1690680
    },
    {
      "epoch": 2.766867631559998,
      "grad_norm": 0.38342127203941345,
      "learning_rate": 4.495693943846657e-06,
      "loss": 0.0146,
      "step": 1690700
    },
    {
      "epoch": 2.7669003619986516,
      "grad_norm": 0.1362859308719635,
      "learning_rate": 4.495628051633139e-06,
      "loss": 0.0149,
      "step": 1690720
    },
    {
      "epoch": 2.766933092437305,
      "grad_norm": 0.3877287805080414,
      "learning_rate": 4.495562159419622e-06,
      "loss": 0.0118,
      "step": 1690740
    },
    {
      "epoch": 2.7669658228759584,
      "grad_norm": 0.3423681855201721,
      "learning_rate": 4.495496267206105e-06,
      "loss": 0.0145,
      "step": 1690760
    },
    {
      "epoch": 2.7669985533146115,
      "grad_norm": 0.5335445404052734,
      "learning_rate": 4.4954303749925876e-06,
      "loss": 0.0113,
      "step": 1690780
    },
    {
      "epoch": 2.7670312837532647,
      "grad_norm": 0.12074007838964462,
      "learning_rate": 4.49536448277907e-06,
      "loss": 0.0151,
      "step": 1690800
    },
    {
      "epoch": 2.7670640141919183,
      "grad_norm": 0.5306654572486877,
      "learning_rate": 4.495298590565553e-06,
      "loss": 0.0188,
      "step": 1690820
    },
    {
      "epoch": 2.7670967446305714,
      "grad_norm": 0.17257460951805115,
      "learning_rate": 4.495232698352036e-06,
      "loss": 0.012,
      "step": 1690840
    },
    {
      "epoch": 2.767129475069225,
      "grad_norm": 0.5087023973464966,
      "learning_rate": 4.495166806138519e-06,
      "loss": 0.0111,
      "step": 1690860
    },
    {
      "epoch": 2.767162205507878,
      "grad_norm": 0.6722028851509094,
      "learning_rate": 4.495100913925002e-06,
      "loss": 0.016,
      "step": 1690880
    },
    {
      "epoch": 2.7671949359465318,
      "grad_norm": 0.5350138545036316,
      "learning_rate": 4.495035021711485e-06,
      "loss": 0.0154,
      "step": 1690900
    },
    {
      "epoch": 2.767227666385185,
      "grad_norm": 0.3221004009246826,
      "learning_rate": 4.494969129497968e-06,
      "loss": 0.0123,
      "step": 1690920
    },
    {
      "epoch": 2.767260396823838,
      "grad_norm": 0.4638565182685852,
      "learning_rate": 4.49490323728445e-06,
      "loss": 0.014,
      "step": 1690940
    },
    {
      "epoch": 2.7672931272624917,
      "grad_norm": 0.3814166784286499,
      "learning_rate": 4.494837345070933e-06,
      "loss": 0.0143,
      "step": 1690960
    },
    {
      "epoch": 2.767325857701145,
      "grad_norm": 0.6287350654602051,
      "learning_rate": 4.494771452857416e-06,
      "loss": 0.0097,
      "step": 1690980
    },
    {
      "epoch": 2.7673585881397984,
      "grad_norm": 0.438978374004364,
      "learning_rate": 4.4947055606438985e-06,
      "loss": 0.0124,
      "step": 1691000
    },
    {
      "epoch": 2.7673913185784516,
      "grad_norm": 0.718259334564209,
      "learning_rate": 4.494639668430381e-06,
      "loss": 0.014,
      "step": 1691020
    },
    {
      "epoch": 2.767424049017105,
      "grad_norm": 0.17860743403434753,
      "learning_rate": 4.494573776216865e-06,
      "loss": 0.017,
      "step": 1691040
    },
    {
      "epoch": 2.7674567794557583,
      "grad_norm": 0.16945359110832214,
      "learning_rate": 4.494507884003348e-06,
      "loss": 0.012,
      "step": 1691060
    },
    {
      "epoch": 2.7674895098944114,
      "grad_norm": 0.04518674686551094,
      "learning_rate": 4.49444199178983e-06,
      "loss": 0.0098,
      "step": 1691080
    },
    {
      "epoch": 2.767522240333065,
      "grad_norm": 0.5584126710891724,
      "learning_rate": 4.494376099576314e-06,
      "loss": 0.0152,
      "step": 1691100
    },
    {
      "epoch": 2.767554970771718,
      "grad_norm": 0.3673997223377228,
      "learning_rate": 4.494310207362797e-06,
      "loss": 0.0087,
      "step": 1691120
    },
    {
      "epoch": 2.767587701210372,
      "grad_norm": 0.16261161863803864,
      "learning_rate": 4.494244315149279e-06,
      "loss": 0.0095,
      "step": 1691140
    },
    {
      "epoch": 2.767620431649025,
      "grad_norm": 0.38100600242614746,
      "learning_rate": 4.494178422935762e-06,
      "loss": 0.0135,
      "step": 1691160
    },
    {
      "epoch": 2.7676531620876785,
      "grad_norm": 0.29235193133354187,
      "learning_rate": 4.494112530722245e-06,
      "loss": 0.0143,
      "step": 1691180
    },
    {
      "epoch": 2.7676858925263317,
      "grad_norm": 0.40410178899765015,
      "learning_rate": 4.494046638508728e-06,
      "loss": 0.0194,
      "step": 1691200
    },
    {
      "epoch": 2.767718622964985,
      "grad_norm": 1.1794142723083496,
      "learning_rate": 4.49398074629521e-06,
      "loss": 0.0189,
      "step": 1691220
    },
    {
      "epoch": 2.7677513534036384,
      "grad_norm": 0.7873626351356506,
      "learning_rate": 4.493914854081693e-06,
      "loss": 0.0191,
      "step": 1691240
    },
    {
      "epoch": 2.7677840838422916,
      "grad_norm": 0.2718505859375,
      "learning_rate": 4.493848961868177e-06,
      "loss": 0.009,
      "step": 1691260
    },
    {
      "epoch": 2.7678168142809447,
      "grad_norm": 0.12044432014226913,
      "learning_rate": 4.4937830696546594e-06,
      "loss": 0.0163,
      "step": 1691280
    },
    {
      "epoch": 2.7678495447195983,
      "grad_norm": 0.1623363196849823,
      "learning_rate": 4.493717177441142e-06,
      "loss": 0.0114,
      "step": 1691300
    },
    {
      "epoch": 2.767882275158252,
      "grad_norm": 0.4422232210636139,
      "learning_rate": 4.493651285227625e-06,
      "loss": 0.0237,
      "step": 1691320
    },
    {
      "epoch": 2.767915005596905,
      "grad_norm": 0.17912447452545166,
      "learning_rate": 4.493585393014108e-06,
      "loss": 0.013,
      "step": 1691340
    },
    {
      "epoch": 2.767947736035558,
      "grad_norm": 0.5826486945152283,
      "learning_rate": 4.49351950080059e-06,
      "loss": 0.019,
      "step": 1691360
    },
    {
      "epoch": 2.767980466474212,
      "grad_norm": 0.38039854168891907,
      "learning_rate": 4.493453608587073e-06,
      "loss": 0.0143,
      "step": 1691380
    },
    {
      "epoch": 2.768013196912865,
      "grad_norm": 0.4322124421596527,
      "learning_rate": 4.493387716373557e-06,
      "loss": 0.0122,
      "step": 1691400
    },
    {
      "epoch": 2.768045927351518,
      "grad_norm": 1.4019744396209717,
      "learning_rate": 4.4933218241600395e-06,
      "loss": 0.0181,
      "step": 1691420
    },
    {
      "epoch": 2.7680786577901717,
      "grad_norm": 0.31176915764808655,
      "learning_rate": 4.493255931946522e-06,
      "loss": 0.0088,
      "step": 1691440
    },
    {
      "epoch": 2.7681113882288253,
      "grad_norm": 0.3996044993400574,
      "learning_rate": 4.493190039733005e-06,
      "loss": 0.0173,
      "step": 1691460
    },
    {
      "epoch": 2.7681441186674784,
      "grad_norm": 0.22217489778995514,
      "learning_rate": 4.4931241475194885e-06,
      "loss": 0.016,
      "step": 1691480
    },
    {
      "epoch": 2.7681768491061316,
      "grad_norm": 0.09425316005945206,
      "learning_rate": 4.493058255305971e-06,
      "loss": 0.0116,
      "step": 1691500
    },
    {
      "epoch": 2.768209579544785,
      "grad_norm": 0.28924962878227234,
      "learning_rate": 4.492992363092454e-06,
      "loss": 0.0135,
      "step": 1691520
    },
    {
      "epoch": 2.7682423099834383,
      "grad_norm": 0.16263902187347412,
      "learning_rate": 4.492926470878937e-06,
      "loss": 0.0119,
      "step": 1691540
    },
    {
      "epoch": 2.7682750404220915,
      "grad_norm": 0.2495226114988327,
      "learning_rate": 4.4928605786654195e-06,
      "loss": 0.0117,
      "step": 1691560
    },
    {
      "epoch": 2.768307770860745,
      "grad_norm": 0.125792995095253,
      "learning_rate": 4.492794686451902e-06,
      "loss": 0.0119,
      "step": 1691580
    },
    {
      "epoch": 2.7683405012993987,
      "grad_norm": 0.9007449150085449,
      "learning_rate": 4.492728794238385e-06,
      "loss": 0.0149,
      "step": 1691600
    },
    {
      "epoch": 2.768373231738052,
      "grad_norm": 0.35872501134872437,
      "learning_rate": 4.492662902024868e-06,
      "loss": 0.0099,
      "step": 1691620
    },
    {
      "epoch": 2.768405962176705,
      "grad_norm": 0.48287683725357056,
      "learning_rate": 4.4925970098113505e-06,
      "loss": 0.015,
      "step": 1691640
    },
    {
      "epoch": 2.7684386926153586,
      "grad_norm": 0.4364086985588074,
      "learning_rate": 4.492531117597834e-06,
      "loss": 0.016,
      "step": 1691660
    },
    {
      "epoch": 2.7684714230540117,
      "grad_norm": 0.3779667615890503,
      "learning_rate": 4.492465225384317e-06,
      "loss": 0.0192,
      "step": 1691680
    },
    {
      "epoch": 2.768504153492665,
      "grad_norm": 0.33072802424430847,
      "learning_rate": 4.4923993331707995e-06,
      "loss": 0.0185,
      "step": 1691700
    },
    {
      "epoch": 2.7685368839313185,
      "grad_norm": 0.9208924174308777,
      "learning_rate": 4.492333440957282e-06,
      "loss": 0.0163,
      "step": 1691720
    },
    {
      "epoch": 2.7685696143699716,
      "grad_norm": 0.5709294676780701,
      "learning_rate": 4.492267548743766e-06,
      "loss": 0.0179,
      "step": 1691740
    },
    {
      "epoch": 2.768602344808625,
      "grad_norm": 0.34009113907814026,
      "learning_rate": 4.492201656530249e-06,
      "loss": 0.0114,
      "step": 1691760
    },
    {
      "epoch": 2.7686350752472784,
      "grad_norm": 0.45685985684394836,
      "learning_rate": 4.492135764316731e-06,
      "loss": 0.0113,
      "step": 1691780
    },
    {
      "epoch": 2.768667805685932,
      "grad_norm": 0.123468779027462,
      "learning_rate": 4.492069872103214e-06,
      "loss": 0.0135,
      "step": 1691800
    },
    {
      "epoch": 2.768700536124585,
      "grad_norm": 0.11359703540802002,
      "learning_rate": 4.492003979889697e-06,
      "loss": 0.0106,
      "step": 1691820
    },
    {
      "epoch": 2.7687332665632383,
      "grad_norm": 0.208347886800766,
      "learning_rate": 4.4919380876761795e-06,
      "loss": 0.0085,
      "step": 1691840
    },
    {
      "epoch": 2.768765997001892,
      "grad_norm": 3.4701995849609375,
      "learning_rate": 4.491872195462662e-06,
      "loss": 0.017,
      "step": 1691860
    },
    {
      "epoch": 2.768798727440545,
      "grad_norm": 0.28182828426361084,
      "learning_rate": 4.491806303249146e-06,
      "loss": 0.0149,
      "step": 1691880
    },
    {
      "epoch": 2.7688314578791986,
      "grad_norm": 0.8589381575584412,
      "learning_rate": 4.491740411035629e-06,
      "loss": 0.0219,
      "step": 1691900
    },
    {
      "epoch": 2.7688641883178517,
      "grad_norm": 0.546722412109375,
      "learning_rate": 4.491674518822111e-06,
      "loss": 0.0146,
      "step": 1691920
    },
    {
      "epoch": 2.7688969187565053,
      "grad_norm": 0.5549097657203674,
      "learning_rate": 4.491608626608594e-06,
      "loss": 0.0126,
      "step": 1691940
    },
    {
      "epoch": 2.7689296491951585,
      "grad_norm": 0.11389530450105667,
      "learning_rate": 4.491542734395077e-06,
      "loss": 0.0122,
      "step": 1691960
    },
    {
      "epoch": 2.7689623796338116,
      "grad_norm": 0.5874797701835632,
      "learning_rate": 4.4914768421815596e-06,
      "loss": 0.0174,
      "step": 1691980
    },
    {
      "epoch": 2.7689951100724652,
      "grad_norm": 0.31946876645088196,
      "learning_rate": 4.491410949968042e-06,
      "loss": 0.0135,
      "step": 1692000
    },
    {
      "epoch": 2.7690278405111184,
      "grad_norm": 0.23936155438423157,
      "learning_rate": 4.491345057754525e-06,
      "loss": 0.0164,
      "step": 1692020
    },
    {
      "epoch": 2.769060570949772,
      "grad_norm": 0.3886219263076782,
      "learning_rate": 4.491279165541008e-06,
      "loss": 0.0139,
      "step": 1692040
    },
    {
      "epoch": 2.769093301388425,
      "grad_norm": 0.6570219397544861,
      "learning_rate": 4.491213273327491e-06,
      "loss": 0.0173,
      "step": 1692060
    },
    {
      "epoch": 2.7691260318270787,
      "grad_norm": 0.1301143616437912,
      "learning_rate": 4.491147381113974e-06,
      "loss": 0.0096,
      "step": 1692080
    },
    {
      "epoch": 2.769158762265732,
      "grad_norm": 0.12579470872879028,
      "learning_rate": 4.491081488900457e-06,
      "loss": 0.0125,
      "step": 1692100
    },
    {
      "epoch": 2.769191492704385,
      "grad_norm": 0.28378966450691223,
      "learning_rate": 4.4910155966869404e-06,
      "loss": 0.0177,
      "step": 1692120
    },
    {
      "epoch": 2.7692242231430386,
      "grad_norm": 0.341604083776474,
      "learning_rate": 4.490949704473423e-06,
      "loss": 0.0143,
      "step": 1692140
    },
    {
      "epoch": 2.7692569535816918,
      "grad_norm": 0.45042771100997925,
      "learning_rate": 4.490883812259906e-06,
      "loss": 0.0116,
      "step": 1692160
    },
    {
      "epoch": 2.7692896840203454,
      "grad_norm": 0.7085691094398499,
      "learning_rate": 4.490817920046389e-06,
      "loss": 0.0175,
      "step": 1692180
    },
    {
      "epoch": 2.7693224144589985,
      "grad_norm": 0.17213138937950134,
      "learning_rate": 4.490752027832871e-06,
      "loss": 0.014,
      "step": 1692200
    },
    {
      "epoch": 2.769355144897652,
      "grad_norm": 0.4960228204727173,
      "learning_rate": 4.490686135619354e-06,
      "loss": 0.0106,
      "step": 1692220
    },
    {
      "epoch": 2.7693878753363053,
      "grad_norm": 0.3029617369174957,
      "learning_rate": 4.490620243405837e-06,
      "loss": 0.0111,
      "step": 1692240
    },
    {
      "epoch": 2.7694206057749584,
      "grad_norm": 0.0401419922709465,
      "learning_rate": 4.49055435119232e-06,
      "loss": 0.0202,
      "step": 1692260
    },
    {
      "epoch": 2.769453336213612,
      "grad_norm": 0.3855413794517517,
      "learning_rate": 4.490488458978803e-06,
      "loss": 0.0189,
      "step": 1692280
    },
    {
      "epoch": 2.769486066652265,
      "grad_norm": 0.34141358733177185,
      "learning_rate": 4.490422566765286e-06,
      "loss": 0.0203,
      "step": 1692300
    },
    {
      "epoch": 2.7695187970909187,
      "grad_norm": 0.1932598352432251,
      "learning_rate": 4.490356674551769e-06,
      "loss": 0.0159,
      "step": 1692320
    },
    {
      "epoch": 2.769551527529572,
      "grad_norm": 0.12142263352870941,
      "learning_rate": 4.4902907823382514e-06,
      "loss": 0.0121,
      "step": 1692340
    },
    {
      "epoch": 2.7695842579682255,
      "grad_norm": 0.14421415328979492,
      "learning_rate": 4.490224890124734e-06,
      "loss": 0.0158,
      "step": 1692360
    },
    {
      "epoch": 2.7696169884068786,
      "grad_norm": 0.14871899783611298,
      "learning_rate": 4.490158997911217e-06,
      "loss": 0.0167,
      "step": 1692380
    },
    {
      "epoch": 2.769649718845532,
      "grad_norm": 0.5954661965370178,
      "learning_rate": 4.4900931056977e-06,
      "loss": 0.017,
      "step": 1692400
    },
    {
      "epoch": 2.7696824492841854,
      "grad_norm": 1.1947053670883179,
      "learning_rate": 4.490027213484182e-06,
      "loss": 0.0215,
      "step": 1692420
    },
    {
      "epoch": 2.7697151797228385,
      "grad_norm": 0.20662467181682587,
      "learning_rate": 4.489961321270666e-06,
      "loss": 0.0088,
      "step": 1692440
    },
    {
      "epoch": 2.769747910161492,
      "grad_norm": 0.7778869867324829,
      "learning_rate": 4.489895429057149e-06,
      "loss": 0.012,
      "step": 1692460
    },
    {
      "epoch": 2.7697806406001453,
      "grad_norm": 0.03448165953159332,
      "learning_rate": 4.4898295368436314e-06,
      "loss": 0.0114,
      "step": 1692480
    },
    {
      "epoch": 2.769813371038799,
      "grad_norm": 0.34582313895225525,
      "learning_rate": 4.489763644630114e-06,
      "loss": 0.0107,
      "step": 1692500
    },
    {
      "epoch": 2.769846101477452,
      "grad_norm": 0.4105028510093689,
      "learning_rate": 4.489697752416598e-06,
      "loss": 0.0121,
      "step": 1692520
    },
    {
      "epoch": 2.769878831916105,
      "grad_norm": 0.4326920211315155,
      "learning_rate": 4.4896318602030805e-06,
      "loss": 0.0136,
      "step": 1692540
    },
    {
      "epoch": 2.7699115623547588,
      "grad_norm": 0.25386786460876465,
      "learning_rate": 4.489565967989563e-06,
      "loss": 0.012,
      "step": 1692560
    },
    {
      "epoch": 2.769944292793412,
      "grad_norm": 0.43697091937065125,
      "learning_rate": 4.489500075776046e-06,
      "loss": 0.0159,
      "step": 1692580
    },
    {
      "epoch": 2.7699770232320655,
      "grad_norm": 0.31180086731910706,
      "learning_rate": 4.489434183562529e-06,
      "loss": 0.0154,
      "step": 1692600
    },
    {
      "epoch": 2.7700097536707187,
      "grad_norm": 0.29648557305336,
      "learning_rate": 4.4893682913490115e-06,
      "loss": 0.0121,
      "step": 1692620
    },
    {
      "epoch": 2.7700424841093723,
      "grad_norm": 0.44221097230911255,
      "learning_rate": 4.489302399135494e-06,
      "loss": 0.0137,
      "step": 1692640
    },
    {
      "epoch": 2.7700752145480254,
      "grad_norm": 0.5753874778747559,
      "learning_rate": 4.489236506921977e-06,
      "loss": 0.0072,
      "step": 1692660
    },
    {
      "epoch": 2.7701079449866786,
      "grad_norm": 0.1931745558977127,
      "learning_rate": 4.4891706147084605e-06,
      "loss": 0.0135,
      "step": 1692680
    },
    {
      "epoch": 2.770140675425332,
      "grad_norm": 0.8707501292228699,
      "learning_rate": 4.489104722494943e-06,
      "loss": 0.0137,
      "step": 1692700
    },
    {
      "epoch": 2.7701734058639853,
      "grad_norm": 0.4072829782962799,
      "learning_rate": 4.489038830281426e-06,
      "loss": 0.0104,
      "step": 1692720
    },
    {
      "epoch": 2.7702061363026385,
      "grad_norm": 0.23096564412117004,
      "learning_rate": 4.488972938067909e-06,
      "loss": 0.0091,
      "step": 1692740
    },
    {
      "epoch": 2.770238866741292,
      "grad_norm": 0.5385809540748596,
      "learning_rate": 4.4889070458543915e-06,
      "loss": 0.0187,
      "step": 1692760
    },
    {
      "epoch": 2.7702715971799456,
      "grad_norm": 0.6755445003509521,
      "learning_rate": 4.488841153640874e-06,
      "loss": 0.0115,
      "step": 1692780
    },
    {
      "epoch": 2.770304327618599,
      "grad_norm": 0.21081556379795074,
      "learning_rate": 4.488775261427358e-06,
      "loss": 0.0165,
      "step": 1692800
    },
    {
      "epoch": 2.770337058057252,
      "grad_norm": 1.3445731401443481,
      "learning_rate": 4.4887093692138406e-06,
      "loss": 0.0149,
      "step": 1692820
    },
    {
      "epoch": 2.7703697884959055,
      "grad_norm": 0.22443944215774536,
      "learning_rate": 4.488643477000323e-06,
      "loss": 0.0135,
      "step": 1692840
    },
    {
      "epoch": 2.7704025189345587,
      "grad_norm": 0.16535648703575134,
      "learning_rate": 4.488577584786806e-06,
      "loss": 0.0155,
      "step": 1692860
    },
    {
      "epoch": 2.770435249373212,
      "grad_norm": 0.2872130870819092,
      "learning_rate": 4.488511692573289e-06,
      "loss": 0.013,
      "step": 1692880
    },
    {
      "epoch": 2.7704679798118654,
      "grad_norm": 0.18186137080192566,
      "learning_rate": 4.488445800359772e-06,
      "loss": 0.0099,
      "step": 1692900
    },
    {
      "epoch": 2.770500710250519,
      "grad_norm": 0.7429172992706299,
      "learning_rate": 4.488379908146255e-06,
      "loss": 0.0173,
      "step": 1692920
    },
    {
      "epoch": 2.770533440689172,
      "grad_norm": 0.3158469498157501,
      "learning_rate": 4.488314015932738e-06,
      "loss": 0.0103,
      "step": 1692940
    },
    {
      "epoch": 2.7705661711278253,
      "grad_norm": 0.7731581330299377,
      "learning_rate": 4.488248123719221e-06,
      "loss": 0.0096,
      "step": 1692960
    },
    {
      "epoch": 2.770598901566479,
      "grad_norm": 0.0780758261680603,
      "learning_rate": 4.488182231505703e-06,
      "loss": 0.0121,
      "step": 1692980
    },
    {
      "epoch": 2.770631632005132,
      "grad_norm": 1.5280495882034302,
      "learning_rate": 4.488116339292186e-06,
      "loss": 0.0173,
      "step": 1693000
    },
    {
      "epoch": 2.7706643624437852,
      "grad_norm": 0.10727114230394363,
      "learning_rate": 4.488050447078669e-06,
      "loss": 0.0213,
      "step": 1693020
    },
    {
      "epoch": 2.770697092882439,
      "grad_norm": 0.17451119422912598,
      "learning_rate": 4.4879845548651516e-06,
      "loss": 0.0097,
      "step": 1693040
    },
    {
      "epoch": 2.7707298233210924,
      "grad_norm": 0.7203014492988586,
      "learning_rate": 4.487918662651634e-06,
      "loss": 0.0205,
      "step": 1693060
    },
    {
      "epoch": 2.7707625537597456,
      "grad_norm": 0.38802582025527954,
      "learning_rate": 4.487852770438118e-06,
      "loss": 0.0135,
      "step": 1693080
    },
    {
      "epoch": 2.7707952841983987,
      "grad_norm": 0.8632259964942932,
      "learning_rate": 4.487786878224601e-06,
      "loss": 0.0176,
      "step": 1693100
    },
    {
      "epoch": 2.7708280146370523,
      "grad_norm": 0.23629556596279144,
      "learning_rate": 4.487720986011083e-06,
      "loss": 0.0152,
      "step": 1693120
    },
    {
      "epoch": 2.7708607450757055,
      "grad_norm": 0.845215380191803,
      "learning_rate": 4.487655093797566e-06,
      "loss": 0.0122,
      "step": 1693140
    },
    {
      "epoch": 2.7708934755143586,
      "grad_norm": 0.4125670790672302,
      "learning_rate": 4.48758920158405e-06,
      "loss": 0.0133,
      "step": 1693160
    },
    {
      "epoch": 2.770926205953012,
      "grad_norm": 2.179107904434204,
      "learning_rate": 4.487523309370532e-06,
      "loss": 0.0107,
      "step": 1693180
    },
    {
      "epoch": 2.770958936391666,
      "grad_norm": 0.1832529753446579,
      "learning_rate": 4.487457417157015e-06,
      "loss": 0.0159,
      "step": 1693200
    },
    {
      "epoch": 2.770991666830319,
      "grad_norm": 0.41863441467285156,
      "learning_rate": 4.487391524943498e-06,
      "loss": 0.015,
      "step": 1693220
    },
    {
      "epoch": 2.771024397268972,
      "grad_norm": 0.10160239785909653,
      "learning_rate": 4.487325632729981e-06,
      "loss": 0.0114,
      "step": 1693240
    },
    {
      "epoch": 2.7710571277076257,
      "grad_norm": 0.4970301687717438,
      "learning_rate": 4.487259740516463e-06,
      "loss": 0.0191,
      "step": 1693260
    },
    {
      "epoch": 2.771089858146279,
      "grad_norm": 0.14919741451740265,
      "learning_rate": 4.487193848302946e-06,
      "loss": 0.0179,
      "step": 1693280
    },
    {
      "epoch": 2.771122588584932,
      "grad_norm": 1.2933224439620972,
      "learning_rate": 4.48712795608943e-06,
      "loss": 0.0202,
      "step": 1693300
    },
    {
      "epoch": 2.7711553190235856,
      "grad_norm": 0.05739825963973999,
      "learning_rate": 4.4870620638759124e-06,
      "loss": 0.012,
      "step": 1693320
    },
    {
      "epoch": 2.7711880494622387,
      "grad_norm": 0.1267821490764618,
      "learning_rate": 4.486996171662395e-06,
      "loss": 0.0149,
      "step": 1693340
    },
    {
      "epoch": 2.7712207799008923,
      "grad_norm": 0.4923034608364105,
      "learning_rate": 4.486930279448878e-06,
      "loss": 0.0157,
      "step": 1693360
    },
    {
      "epoch": 2.7712535103395455,
      "grad_norm": 0.14946018159389496,
      "learning_rate": 4.486864387235361e-06,
      "loss": 0.014,
      "step": 1693380
    },
    {
      "epoch": 2.771286240778199,
      "grad_norm": 0.29706019163131714,
      "learning_rate": 4.486798495021843e-06,
      "loss": 0.0138,
      "step": 1693400
    },
    {
      "epoch": 2.7713189712168522,
      "grad_norm": 0.2261486053466797,
      "learning_rate": 4.486732602808326e-06,
      "loss": 0.0123,
      "step": 1693420
    },
    {
      "epoch": 2.7713517016555054,
      "grad_norm": 0.10314220935106277,
      "learning_rate": 4.486666710594809e-06,
      "loss": 0.009,
      "step": 1693440
    },
    {
      "epoch": 2.771384432094159,
      "grad_norm": 0.7579694986343384,
      "learning_rate": 4.486600818381292e-06,
      "loss": 0.0204,
      "step": 1693460
    },
    {
      "epoch": 2.771417162532812,
      "grad_norm": 7.8759050369262695,
      "learning_rate": 4.486534926167775e-06,
      "loss": 0.0127,
      "step": 1693480
    },
    {
      "epoch": 2.7714498929714657,
      "grad_norm": 0.4722740352153778,
      "learning_rate": 4.486469033954258e-06,
      "loss": 0.0091,
      "step": 1693500
    },
    {
      "epoch": 2.771482623410119,
      "grad_norm": 0.25071048736572266,
      "learning_rate": 4.486403141740741e-06,
      "loss": 0.0148,
      "step": 1693520
    },
    {
      "epoch": 2.7715153538487725,
      "grad_norm": 0.10928068310022354,
      "learning_rate": 4.486337249527224e-06,
      "loss": 0.0135,
      "step": 1693540
    },
    {
      "epoch": 2.7715480842874256,
      "grad_norm": 0.33772358298301697,
      "learning_rate": 4.486271357313707e-06,
      "loss": 0.0127,
      "step": 1693560
    },
    {
      "epoch": 2.7715808147260788,
      "grad_norm": 0.2651403248310089,
      "learning_rate": 4.48620546510019e-06,
      "loss": 0.021,
      "step": 1693580
    },
    {
      "epoch": 2.7716135451647324,
      "grad_norm": 0.24349328875541687,
      "learning_rate": 4.4861395728866725e-06,
      "loss": 0.0172,
      "step": 1693600
    },
    {
      "epoch": 2.7716462756033855,
      "grad_norm": 3.898118495941162,
      "learning_rate": 4.486073680673155e-06,
      "loss": 0.0122,
      "step": 1693620
    },
    {
      "epoch": 2.771679006042039,
      "grad_norm": 0.5056021809577942,
      "learning_rate": 4.486007788459638e-06,
      "loss": 0.0141,
      "step": 1693640
    },
    {
      "epoch": 2.7717117364806922,
      "grad_norm": 0.21106137335300446,
      "learning_rate": 4.485941896246121e-06,
      "loss": 0.0168,
      "step": 1693660
    },
    {
      "epoch": 2.771744466919346,
      "grad_norm": 0.512043297290802,
      "learning_rate": 4.4858760040326035e-06,
      "loss": 0.0134,
      "step": 1693680
    },
    {
      "epoch": 2.771777197357999,
      "grad_norm": 0.14360229671001434,
      "learning_rate": 4.485810111819087e-06,
      "loss": 0.0158,
      "step": 1693700
    },
    {
      "epoch": 2.771809927796652,
      "grad_norm": 0.22694969177246094,
      "learning_rate": 4.48574421960557e-06,
      "loss": 0.0172,
      "step": 1693720
    },
    {
      "epoch": 2.7718426582353057,
      "grad_norm": 1.5912691354751587,
      "learning_rate": 4.4856783273920525e-06,
      "loss": 0.013,
      "step": 1693740
    },
    {
      "epoch": 2.771875388673959,
      "grad_norm": 0.29182925820350647,
      "learning_rate": 4.485612435178535e-06,
      "loss": 0.0165,
      "step": 1693760
    },
    {
      "epoch": 2.7719081191126125,
      "grad_norm": 0.12288456410169601,
      "learning_rate": 4.485546542965018e-06,
      "loss": 0.0155,
      "step": 1693780
    },
    {
      "epoch": 2.7719408495512656,
      "grad_norm": 0.24484455585479736,
      "learning_rate": 4.485480650751501e-06,
      "loss": 0.014,
      "step": 1693800
    },
    {
      "epoch": 2.7719735799899192,
      "grad_norm": 0.18321271240711212,
      "learning_rate": 4.4854147585379835e-06,
      "loss": 0.0144,
      "step": 1693820
    },
    {
      "epoch": 2.7720063104285724,
      "grad_norm": 0.10127927362918854,
      "learning_rate": 4.485348866324466e-06,
      "loss": 0.0162,
      "step": 1693840
    },
    {
      "epoch": 2.7720390408672255,
      "grad_norm": 0.3238464295864105,
      "learning_rate": 4.48528297411095e-06,
      "loss": 0.009,
      "step": 1693860
    },
    {
      "epoch": 2.772071771305879,
      "grad_norm": 0.44188469648361206,
      "learning_rate": 4.4852170818974325e-06,
      "loss": 0.0113,
      "step": 1693880
    },
    {
      "epoch": 2.7721045017445323,
      "grad_norm": 0.3502737581729889,
      "learning_rate": 4.485151189683915e-06,
      "loss": 0.0104,
      "step": 1693900
    },
    {
      "epoch": 2.772137232183186,
      "grad_norm": 0.5153079628944397,
      "learning_rate": 4.485085297470398e-06,
      "loss": 0.0115,
      "step": 1693920
    },
    {
      "epoch": 2.772169962621839,
      "grad_norm": 0.4292564392089844,
      "learning_rate": 4.485019405256882e-06,
      "loss": 0.0163,
      "step": 1693940
    },
    {
      "epoch": 2.7722026930604926,
      "grad_norm": 0.3438795804977417,
      "learning_rate": 4.484953513043364e-06,
      "loss": 0.0124,
      "step": 1693960
    },
    {
      "epoch": 2.7722354234991458,
      "grad_norm": 0.6576657295227051,
      "learning_rate": 4.484887620829847e-06,
      "loss": 0.0116,
      "step": 1693980
    },
    {
      "epoch": 2.772268153937799,
      "grad_norm": 0.4665602147579193,
      "learning_rate": 4.48482172861633e-06,
      "loss": 0.0171,
      "step": 1694000
    },
    {
      "epoch": 2.7723008843764525,
      "grad_norm": 0.28975433111190796,
      "learning_rate": 4.4847558364028126e-06,
      "loss": 0.0095,
      "step": 1694020
    },
    {
      "epoch": 2.7723336148151057,
      "grad_norm": 0.23755398392677307,
      "learning_rate": 4.484689944189295e-06,
      "loss": 0.0116,
      "step": 1694040
    },
    {
      "epoch": 2.7723663452537592,
      "grad_norm": 0.13491633534431458,
      "learning_rate": 4.484624051975778e-06,
      "loss": 0.0125,
      "step": 1694060
    },
    {
      "epoch": 2.7723990756924124,
      "grad_norm": 0.39601796865463257,
      "learning_rate": 4.484558159762261e-06,
      "loss": 0.015,
      "step": 1694080
    },
    {
      "epoch": 2.772431806131066,
      "grad_norm": 0.4645616114139557,
      "learning_rate": 4.484492267548744e-06,
      "loss": 0.013,
      "step": 1694100
    },
    {
      "epoch": 2.772464536569719,
      "grad_norm": 1.9731910228729248,
      "learning_rate": 4.484426375335227e-06,
      "loss": 0.0175,
      "step": 1694120
    },
    {
      "epoch": 2.7724972670083723,
      "grad_norm": 0.29265984892845154,
      "learning_rate": 4.48436048312171e-06,
      "loss": 0.0115,
      "step": 1694140
    },
    {
      "epoch": 2.772529997447026,
      "grad_norm": 0.10993987321853638,
      "learning_rate": 4.484294590908193e-06,
      "loss": 0.0151,
      "step": 1694160
    },
    {
      "epoch": 2.772562727885679,
      "grad_norm": 0.3069590628147125,
      "learning_rate": 4.484228698694675e-06,
      "loss": 0.0104,
      "step": 1694180
    },
    {
      "epoch": 2.7725954583243326,
      "grad_norm": 0.23719535768032074,
      "learning_rate": 4.484162806481159e-06,
      "loss": 0.0099,
      "step": 1694200
    },
    {
      "epoch": 2.772628188762986,
      "grad_norm": 0.22471334040164948,
      "learning_rate": 4.484096914267642e-06,
      "loss": 0.0146,
      "step": 1694220
    },
    {
      "epoch": 2.7726609192016394,
      "grad_norm": 0.8607893586158752,
      "learning_rate": 4.484031022054124e-06,
      "loss": 0.0091,
      "step": 1694240
    },
    {
      "epoch": 2.7726936496402925,
      "grad_norm": 0.0355992466211319,
      "learning_rate": 4.483965129840607e-06,
      "loss": 0.0147,
      "step": 1694260
    },
    {
      "epoch": 2.7727263800789457,
      "grad_norm": 0.20844319462776184,
      "learning_rate": 4.48389923762709e-06,
      "loss": 0.0148,
      "step": 1694280
    },
    {
      "epoch": 2.7727591105175993,
      "grad_norm": 0.30139535665512085,
      "learning_rate": 4.483833345413573e-06,
      "loss": 0.0149,
      "step": 1694300
    },
    {
      "epoch": 2.7727918409562524,
      "grad_norm": 0.20588865876197815,
      "learning_rate": 4.483767453200056e-06,
      "loss": 0.0087,
      "step": 1694320
    },
    {
      "epoch": 2.7728245713949056,
      "grad_norm": 0.3490932881832123,
      "learning_rate": 4.483701560986539e-06,
      "loss": 0.015,
      "step": 1694340
    },
    {
      "epoch": 2.772857301833559,
      "grad_norm": 0.18077488243579865,
      "learning_rate": 4.483635668773022e-06,
      "loss": 0.0127,
      "step": 1694360
    },
    {
      "epoch": 2.7728900322722128,
      "grad_norm": 0.5841201543807983,
      "learning_rate": 4.4835697765595044e-06,
      "loss": 0.0166,
      "step": 1694380
    },
    {
      "epoch": 2.772922762710866,
      "grad_norm": 0.1889529824256897,
      "learning_rate": 4.483503884345987e-06,
      "loss": 0.0178,
      "step": 1694400
    },
    {
      "epoch": 2.772955493149519,
      "grad_norm": 0.47497498989105225,
      "learning_rate": 4.48343799213247e-06,
      "loss": 0.0144,
      "step": 1694420
    },
    {
      "epoch": 2.7729882235881727,
      "grad_norm": 0.4028765857219696,
      "learning_rate": 4.483372099918953e-06,
      "loss": 0.0139,
      "step": 1694440
    },
    {
      "epoch": 2.773020954026826,
      "grad_norm": 0.1625053882598877,
      "learning_rate": 4.483306207705435e-06,
      "loss": 0.0102,
      "step": 1694460
    },
    {
      "epoch": 2.773053684465479,
      "grad_norm": 0.4099183678627014,
      "learning_rate": 4.483240315491918e-06,
      "loss": 0.0148,
      "step": 1694480
    },
    {
      "epoch": 2.7730864149041325,
      "grad_norm": 0.09308654069900513,
      "learning_rate": 4.483174423278402e-06,
      "loss": 0.0094,
      "step": 1694500
    },
    {
      "epoch": 2.773119145342786,
      "grad_norm": 0.18105147778987885,
      "learning_rate": 4.4831085310648845e-06,
      "loss": 0.0087,
      "step": 1694520
    },
    {
      "epoch": 2.7731518757814393,
      "grad_norm": 0.5475103855133057,
      "learning_rate": 4.483042638851367e-06,
      "loss": 0.0093,
      "step": 1694540
    },
    {
      "epoch": 2.7731846062200924,
      "grad_norm": 0.2875102162361145,
      "learning_rate": 4.482976746637851e-06,
      "loss": 0.0178,
      "step": 1694560
    },
    {
      "epoch": 2.773217336658746,
      "grad_norm": 0.6374059915542603,
      "learning_rate": 4.4829108544243335e-06,
      "loss": 0.0151,
      "step": 1694580
    },
    {
      "epoch": 2.773250067097399,
      "grad_norm": 1.7086005210876465,
      "learning_rate": 4.482844962210816e-06,
      "loss": 0.0139,
      "step": 1694600
    },
    {
      "epoch": 2.7732827975360523,
      "grad_norm": 0.5303798317909241,
      "learning_rate": 4.482779069997299e-06,
      "loss": 0.0152,
      "step": 1694620
    },
    {
      "epoch": 2.773315527974706,
      "grad_norm": 0.10850581526756287,
      "learning_rate": 4.482713177783782e-06,
      "loss": 0.0151,
      "step": 1694640
    },
    {
      "epoch": 2.7733482584133595,
      "grad_norm": 0.13422732055187225,
      "learning_rate": 4.4826472855702645e-06,
      "loss": 0.0142,
      "step": 1694660
    },
    {
      "epoch": 2.7733809888520127,
      "grad_norm": 0.1279587745666504,
      "learning_rate": 4.482581393356747e-06,
      "loss": 0.0114,
      "step": 1694680
    },
    {
      "epoch": 2.773413719290666,
      "grad_norm": 0.5144849419593811,
      "learning_rate": 4.48251550114323e-06,
      "loss": 0.0092,
      "step": 1694700
    },
    {
      "epoch": 2.7734464497293194,
      "grad_norm": 0.4882267415523529,
      "learning_rate": 4.4824496089297135e-06,
      "loss": 0.0203,
      "step": 1694720
    },
    {
      "epoch": 2.7734791801679726,
      "grad_norm": 0.5332976579666138,
      "learning_rate": 4.482383716716196e-06,
      "loss": 0.0175,
      "step": 1694740
    },
    {
      "epoch": 2.7735119106066257,
      "grad_norm": 0.20542040467262268,
      "learning_rate": 4.482317824502679e-06,
      "loss": 0.0097,
      "step": 1694760
    },
    {
      "epoch": 2.7735446410452793,
      "grad_norm": 0.2916880249977112,
      "learning_rate": 4.482251932289162e-06,
      "loss": 0.0153,
      "step": 1694780
    },
    {
      "epoch": 2.7735773714839325,
      "grad_norm": 0.16604670882225037,
      "learning_rate": 4.4821860400756445e-06,
      "loss": 0.0177,
      "step": 1694800
    },
    {
      "epoch": 2.773610101922586,
      "grad_norm": 0.20465804636478424,
      "learning_rate": 4.482120147862127e-06,
      "loss": 0.0129,
      "step": 1694820
    },
    {
      "epoch": 2.773642832361239,
      "grad_norm": 0.1945829838514328,
      "learning_rate": 4.48205425564861e-06,
      "loss": 0.0143,
      "step": 1694840
    },
    {
      "epoch": 2.773675562799893,
      "grad_norm": 0.22688521444797516,
      "learning_rate": 4.481988363435093e-06,
      "loss": 0.0124,
      "step": 1694860
    },
    {
      "epoch": 2.773708293238546,
      "grad_norm": 0.20133286714553833,
      "learning_rate": 4.4819224712215755e-06,
      "loss": 0.0111,
      "step": 1694880
    },
    {
      "epoch": 2.773741023677199,
      "grad_norm": 0.6507478952407837,
      "learning_rate": 4.481856579008059e-06,
      "loss": 0.0193,
      "step": 1694900
    },
    {
      "epoch": 2.7737737541158527,
      "grad_norm": 0.3896371126174927,
      "learning_rate": 4.481790686794542e-06,
      "loss": 0.0091,
      "step": 1694920
    },
    {
      "epoch": 2.773806484554506,
      "grad_norm": 0.3170574903488159,
      "learning_rate": 4.4817247945810245e-06,
      "loss": 0.0216,
      "step": 1694940
    },
    {
      "epoch": 2.7738392149931594,
      "grad_norm": 0.09686221927404404,
      "learning_rate": 4.481658902367508e-06,
      "loss": 0.0144,
      "step": 1694960
    },
    {
      "epoch": 2.7738719454318126,
      "grad_norm": 0.22525475919246674,
      "learning_rate": 4.481593010153991e-06,
      "loss": 0.0126,
      "step": 1694980
    },
    {
      "epoch": 2.773904675870466,
      "grad_norm": 0.16819560527801514,
      "learning_rate": 4.481527117940474e-06,
      "loss": 0.0127,
      "step": 1695000
    },
    {
      "epoch": 2.7739374063091193,
      "grad_norm": 0.3339514136314392,
      "learning_rate": 4.481461225726956e-06,
      "loss": 0.011,
      "step": 1695020
    },
    {
      "epoch": 2.7739701367477725,
      "grad_norm": 0.10366027802228928,
      "learning_rate": 4.481395333513439e-06,
      "loss": 0.017,
      "step": 1695040
    },
    {
      "epoch": 2.774002867186426,
      "grad_norm": 0.1247662603855133,
      "learning_rate": 4.481329441299922e-06,
      "loss": 0.0238,
      "step": 1695060
    },
    {
      "epoch": 2.7740355976250792,
      "grad_norm": 0.36194583773612976,
      "learning_rate": 4.4812635490864046e-06,
      "loss": 0.0074,
      "step": 1695080
    },
    {
      "epoch": 2.774068328063733,
      "grad_norm": 0.4824533462524414,
      "learning_rate": 4.481197656872887e-06,
      "loss": 0.0109,
      "step": 1695100
    },
    {
      "epoch": 2.774101058502386,
      "grad_norm": 0.7655509114265442,
      "learning_rate": 4.481131764659371e-06,
      "loss": 0.0157,
      "step": 1695120
    },
    {
      "epoch": 2.7741337889410396,
      "grad_norm": 0.16927970945835114,
      "learning_rate": 4.481065872445854e-06,
      "loss": 0.0148,
      "step": 1695140
    },
    {
      "epoch": 2.7741665193796927,
      "grad_norm": 0.22177797555923462,
      "learning_rate": 4.480999980232336e-06,
      "loss": 0.0129,
      "step": 1695160
    },
    {
      "epoch": 2.774199249818346,
      "grad_norm": 0.6696363091468811,
      "learning_rate": 4.480934088018819e-06,
      "loss": 0.0114,
      "step": 1695180
    },
    {
      "epoch": 2.7742319802569995,
      "grad_norm": 0.5626381635665894,
      "learning_rate": 4.480868195805302e-06,
      "loss": 0.0192,
      "step": 1695200
    },
    {
      "epoch": 2.7742647106956526,
      "grad_norm": 0.9897124767303467,
      "learning_rate": 4.480802303591785e-06,
      "loss": 0.0153,
      "step": 1695220
    },
    {
      "epoch": 2.774297441134306,
      "grad_norm": 0.22061163187026978,
      "learning_rate": 4.480736411378267e-06,
      "loss": 0.02,
      "step": 1695240
    },
    {
      "epoch": 2.7743301715729594,
      "grad_norm": 0.1394074708223343,
      "learning_rate": 4.480670519164751e-06,
      "loss": 0.013,
      "step": 1695260
    },
    {
      "epoch": 2.774362902011613,
      "grad_norm": 0.45130157470703125,
      "learning_rate": 4.480604626951234e-06,
      "loss": 0.0141,
      "step": 1695280
    },
    {
      "epoch": 2.774395632450266,
      "grad_norm": 0.48959213495254517,
      "learning_rate": 4.480538734737716e-06,
      "loss": 0.0088,
      "step": 1695300
    },
    {
      "epoch": 2.7744283628889193,
      "grad_norm": 0.3392927348613739,
      "learning_rate": 4.480472842524199e-06,
      "loss": 0.0138,
      "step": 1695320
    },
    {
      "epoch": 2.774461093327573,
      "grad_norm": 0.10634690523147583,
      "learning_rate": 4.480406950310682e-06,
      "loss": 0.0136,
      "step": 1695340
    },
    {
      "epoch": 2.774493823766226,
      "grad_norm": 0.24155838787555695,
      "learning_rate": 4.4803410580971655e-06,
      "loss": 0.0145,
      "step": 1695360
    },
    {
      "epoch": 2.7745265542048796,
      "grad_norm": 0.11425510793924332,
      "learning_rate": 4.480275165883648e-06,
      "loss": 0.015,
      "step": 1695380
    },
    {
      "epoch": 2.7745592846435327,
      "grad_norm": 0.38800179958343506,
      "learning_rate": 4.480209273670131e-06,
      "loss": 0.0128,
      "step": 1695400
    },
    {
      "epoch": 2.7745920150821863,
      "grad_norm": 0.44488176703453064,
      "learning_rate": 4.480143381456614e-06,
      "loss": 0.0139,
      "step": 1695420
    },
    {
      "epoch": 2.7746247455208395,
      "grad_norm": 2.2823426723480225,
      "learning_rate": 4.480077489243096e-06,
      "loss": 0.0138,
      "step": 1695440
    },
    {
      "epoch": 2.7746574759594926,
      "grad_norm": 0.3955870270729065,
      "learning_rate": 4.480011597029579e-06,
      "loss": 0.0128,
      "step": 1695460
    },
    {
      "epoch": 2.7746902063981462,
      "grad_norm": 0.4057033360004425,
      "learning_rate": 4.479945704816062e-06,
      "loss": 0.0109,
      "step": 1695480
    },
    {
      "epoch": 2.7747229368367994,
      "grad_norm": 0.7094305157661438,
      "learning_rate": 4.479879812602545e-06,
      "loss": 0.0134,
      "step": 1695500
    },
    {
      "epoch": 2.774755667275453,
      "grad_norm": 0.3080482482910156,
      "learning_rate": 4.479813920389028e-06,
      "loss": 0.0081,
      "step": 1695520
    },
    {
      "epoch": 2.774788397714106,
      "grad_norm": 1.5484325885772705,
      "learning_rate": 4.479748028175511e-06,
      "loss": 0.0199,
      "step": 1695540
    },
    {
      "epoch": 2.7748211281527597,
      "grad_norm": 0.6267054080963135,
      "learning_rate": 4.479682135961994e-06,
      "loss": 0.0171,
      "step": 1695560
    },
    {
      "epoch": 2.774853858591413,
      "grad_norm": 0.6910218000411987,
      "learning_rate": 4.4796162437484764e-06,
      "loss": 0.018,
      "step": 1695580
    },
    {
      "epoch": 2.774886589030066,
      "grad_norm": 0.37912875413894653,
      "learning_rate": 4.479550351534959e-06,
      "loss": 0.0116,
      "step": 1695600
    },
    {
      "epoch": 2.7749193194687196,
      "grad_norm": 0.2857670187950134,
      "learning_rate": 4.479484459321443e-06,
      "loss": 0.0111,
      "step": 1695620
    },
    {
      "epoch": 2.7749520499073728,
      "grad_norm": 0.5434138774871826,
      "learning_rate": 4.4794185671079255e-06,
      "loss": 0.0091,
      "step": 1695640
    },
    {
      "epoch": 2.7749847803460264,
      "grad_norm": 0.08606155216693878,
      "learning_rate": 4.479352674894408e-06,
      "loss": 0.0097,
      "step": 1695660
    },
    {
      "epoch": 2.7750175107846795,
      "grad_norm": 0.327858030796051,
      "learning_rate": 4.479286782680891e-06,
      "loss": 0.0088,
      "step": 1695680
    },
    {
      "epoch": 2.775050241223333,
      "grad_norm": 0.5341073870658875,
      "learning_rate": 4.479220890467374e-06,
      "loss": 0.0259,
      "step": 1695700
    },
    {
      "epoch": 2.7750829716619863,
      "grad_norm": 0.20112209022045135,
      "learning_rate": 4.4791549982538565e-06,
      "loss": 0.0212,
      "step": 1695720
    },
    {
      "epoch": 2.7751157021006394,
      "grad_norm": 0.2893141210079193,
      "learning_rate": 4.47908910604034e-06,
      "loss": 0.0141,
      "step": 1695740
    },
    {
      "epoch": 2.775148432539293,
      "grad_norm": 0.28304535150527954,
      "learning_rate": 4.479023213826823e-06,
      "loss": 0.01,
      "step": 1695760
    },
    {
      "epoch": 2.775181162977946,
      "grad_norm": 0.33690106868743896,
      "learning_rate": 4.4789573216133055e-06,
      "loss": 0.0129,
      "step": 1695780
    },
    {
      "epoch": 2.7752138934165993,
      "grad_norm": 0.18919755518436432,
      "learning_rate": 4.478891429399788e-06,
      "loss": 0.0147,
      "step": 1695800
    },
    {
      "epoch": 2.775246623855253,
      "grad_norm": 0.47449496388435364,
      "learning_rate": 4.478825537186271e-06,
      "loss": 0.0138,
      "step": 1695820
    },
    {
      "epoch": 2.7752793542939065,
      "grad_norm": 0.13737598061561584,
      "learning_rate": 4.478759644972754e-06,
      "loss": 0.0082,
      "step": 1695840
    },
    {
      "epoch": 2.7753120847325596,
      "grad_norm": 0.10707122087478638,
      "learning_rate": 4.4786937527592365e-06,
      "loss": 0.0172,
      "step": 1695860
    },
    {
      "epoch": 2.775344815171213,
      "grad_norm": 0.17530161142349243,
      "learning_rate": 4.478627860545719e-06,
      "loss": 0.0145,
      "step": 1695880
    },
    {
      "epoch": 2.7753775456098664,
      "grad_norm": 0.6826977133750916,
      "learning_rate": 4.478561968332202e-06,
      "loss": 0.0122,
      "step": 1695900
    },
    {
      "epoch": 2.7754102760485195,
      "grad_norm": 0.3453812003135681,
      "learning_rate": 4.4784960761186856e-06,
      "loss": 0.0174,
      "step": 1695920
    },
    {
      "epoch": 2.7754430064871727,
      "grad_norm": 0.6136601567268372,
      "learning_rate": 4.478430183905168e-06,
      "loss": 0.011,
      "step": 1695940
    },
    {
      "epoch": 2.7754757369258263,
      "grad_norm": 0.5439351201057434,
      "learning_rate": 4.478364291691651e-06,
      "loss": 0.0115,
      "step": 1695960
    },
    {
      "epoch": 2.77550846736448,
      "grad_norm": 0.2039354145526886,
      "learning_rate": 4.478298399478135e-06,
      "loss": 0.0103,
      "step": 1695980
    },
    {
      "epoch": 2.775541197803133,
      "grad_norm": 0.2917405664920807,
      "learning_rate": 4.478232507264617e-06,
      "loss": 0.0128,
      "step": 1696000
    },
    {
      "epoch": 2.775573928241786,
      "grad_norm": 0.14038169384002686,
      "learning_rate": 4.4781666150511e-06,
      "loss": 0.0091,
      "step": 1696020
    },
    {
      "epoch": 2.7756066586804398,
      "grad_norm": 0.3093861937522888,
      "learning_rate": 4.478100722837583e-06,
      "loss": 0.0094,
      "step": 1696040
    },
    {
      "epoch": 2.775639389119093,
      "grad_norm": 0.572307288646698,
      "learning_rate": 4.478034830624066e-06,
      "loss": 0.0163,
      "step": 1696060
    },
    {
      "epoch": 2.775672119557746,
      "grad_norm": 0.15901833772659302,
      "learning_rate": 4.477968938410548e-06,
      "loss": 0.0126,
      "step": 1696080
    },
    {
      "epoch": 2.7757048499963997,
      "grad_norm": 0.39669156074523926,
      "learning_rate": 4.477903046197031e-06,
      "loss": 0.0087,
      "step": 1696100
    },
    {
      "epoch": 2.7757375804350533,
      "grad_norm": 0.1647988110780716,
      "learning_rate": 4.477837153983514e-06,
      "loss": 0.0114,
      "step": 1696120
    },
    {
      "epoch": 2.7757703108737064,
      "grad_norm": 0.35239386558532715,
      "learning_rate": 4.477771261769997e-06,
      "loss": 0.0071,
      "step": 1696140
    },
    {
      "epoch": 2.7758030413123596,
      "grad_norm": 0.1882927417755127,
      "learning_rate": 4.47770536955648e-06,
      "loss": 0.0163,
      "step": 1696160
    },
    {
      "epoch": 2.775835771751013,
      "grad_norm": 0.7736552953720093,
      "learning_rate": 4.477639477342963e-06,
      "loss": 0.0134,
      "step": 1696180
    },
    {
      "epoch": 2.7758685021896663,
      "grad_norm": 0.30164259672164917,
      "learning_rate": 4.477573585129446e-06,
      "loss": 0.0113,
      "step": 1696200
    },
    {
      "epoch": 2.7759012326283194,
      "grad_norm": 0.2546358108520508,
      "learning_rate": 4.477507692915928e-06,
      "loss": 0.0135,
      "step": 1696220
    },
    {
      "epoch": 2.775933963066973,
      "grad_norm": 0.1729053258895874,
      "learning_rate": 4.477441800702411e-06,
      "loss": 0.0146,
      "step": 1696240
    },
    {
      "epoch": 2.7759666935056266,
      "grad_norm": 0.31574034690856934,
      "learning_rate": 4.477375908488894e-06,
      "loss": 0.0201,
      "step": 1696260
    },
    {
      "epoch": 2.77599942394428,
      "grad_norm": 0.45927464962005615,
      "learning_rate": 4.4773100162753766e-06,
      "loss": 0.0129,
      "step": 1696280
    },
    {
      "epoch": 2.776032154382933,
      "grad_norm": 0.4152534604072571,
      "learning_rate": 4.477244124061859e-06,
      "loss": 0.0105,
      "step": 1696300
    },
    {
      "epoch": 2.7760648848215865,
      "grad_norm": 0.33670711517333984,
      "learning_rate": 4.477178231848343e-06,
      "loss": 0.0146,
      "step": 1696320
    },
    {
      "epoch": 2.7760976152602397,
      "grad_norm": 0.4220319390296936,
      "learning_rate": 4.477112339634826e-06,
      "loss": 0.0095,
      "step": 1696340
    },
    {
      "epoch": 2.776130345698893,
      "grad_norm": 0.2506982982158661,
      "learning_rate": 4.477046447421308e-06,
      "loss": 0.0121,
      "step": 1696360
    },
    {
      "epoch": 2.7761630761375464,
      "grad_norm": 1.101002812385559,
      "learning_rate": 4.476980555207792e-06,
      "loss": 0.0178,
      "step": 1696380
    },
    {
      "epoch": 2.7761958065761996,
      "grad_norm": 0.33204513788223267,
      "learning_rate": 4.476914662994275e-06,
      "loss": 0.014,
      "step": 1696400
    },
    {
      "epoch": 2.776228537014853,
      "grad_norm": 0.37845778465270996,
      "learning_rate": 4.4768487707807574e-06,
      "loss": 0.012,
      "step": 1696420
    },
    {
      "epoch": 2.7762612674535063,
      "grad_norm": 0.5372650623321533,
      "learning_rate": 4.47678287856724e-06,
      "loss": 0.0173,
      "step": 1696440
    },
    {
      "epoch": 2.77629399789216,
      "grad_norm": 0.38486629724502563,
      "learning_rate": 4.476716986353723e-06,
      "loss": 0.0133,
      "step": 1696460
    },
    {
      "epoch": 2.776326728330813,
      "grad_norm": 1.3477445840835571,
      "learning_rate": 4.476651094140206e-06,
      "loss": 0.0118,
      "step": 1696480
    },
    {
      "epoch": 2.776359458769466,
      "grad_norm": 0.26506730914115906,
      "learning_rate": 4.476585201926688e-06,
      "loss": 0.0147,
      "step": 1696500
    },
    {
      "epoch": 2.77639218920812,
      "grad_norm": 0.4409807622432709,
      "learning_rate": 4.476519309713171e-06,
      "loss": 0.014,
      "step": 1696520
    },
    {
      "epoch": 2.776424919646773,
      "grad_norm": 0.9444643259048462,
      "learning_rate": 4.476453417499655e-06,
      "loss": 0.0134,
      "step": 1696540
    },
    {
      "epoch": 2.7764576500854266,
      "grad_norm": 0.10761790722608566,
      "learning_rate": 4.4763875252861375e-06,
      "loss": 0.0124,
      "step": 1696560
    },
    {
      "epoch": 2.7764903805240797,
      "grad_norm": 0.3779415488243103,
      "learning_rate": 4.47632163307262e-06,
      "loss": 0.0089,
      "step": 1696580
    },
    {
      "epoch": 2.7765231109627333,
      "grad_norm": 0.09203214943408966,
      "learning_rate": 4.476255740859103e-06,
      "loss": 0.0083,
      "step": 1696600
    },
    {
      "epoch": 2.7765558414013864,
      "grad_norm": 0.6451401114463806,
      "learning_rate": 4.476189848645586e-06,
      "loss": 0.0129,
      "step": 1696620
    },
    {
      "epoch": 2.7765885718400396,
      "grad_norm": 0.506578266620636,
      "learning_rate": 4.476123956432068e-06,
      "loss": 0.0119,
      "step": 1696640
    },
    {
      "epoch": 2.776621302278693,
      "grad_norm": 0.19979390501976013,
      "learning_rate": 4.476058064218552e-06,
      "loss": 0.0216,
      "step": 1696660
    },
    {
      "epoch": 2.7766540327173463,
      "grad_norm": 0.3277427554130554,
      "learning_rate": 4.475992172005035e-06,
      "loss": 0.0122,
      "step": 1696680
    },
    {
      "epoch": 2.776686763156,
      "grad_norm": 0.06958665698766708,
      "learning_rate": 4.4759262797915175e-06,
      "loss": 0.0119,
      "step": 1696700
    },
    {
      "epoch": 2.776719493594653,
      "grad_norm": 0.5314967036247253,
      "learning_rate": 4.475860387578e-06,
      "loss": 0.012,
      "step": 1696720
    },
    {
      "epoch": 2.7767522240333067,
      "grad_norm": 0.32566094398498535,
      "learning_rate": 4.475794495364483e-06,
      "loss": 0.0101,
      "step": 1696740
    },
    {
      "epoch": 2.77678495447196,
      "grad_norm": 0.458033949136734,
      "learning_rate": 4.4757286031509666e-06,
      "loss": 0.0155,
      "step": 1696760
    },
    {
      "epoch": 2.776817684910613,
      "grad_norm": 0.23336488008499146,
      "learning_rate": 4.475662710937449e-06,
      "loss": 0.0166,
      "step": 1696780
    },
    {
      "epoch": 2.7768504153492666,
      "grad_norm": 0.2641984224319458,
      "learning_rate": 4.475596818723932e-06,
      "loss": 0.0179,
      "step": 1696800
    },
    {
      "epoch": 2.7768831457879197,
      "grad_norm": 0.38572779297828674,
      "learning_rate": 4.475530926510415e-06,
      "loss": 0.0102,
      "step": 1696820
    },
    {
      "epoch": 2.7769158762265733,
      "grad_norm": 0.4240044355392456,
      "learning_rate": 4.4754650342968975e-06,
      "loss": 0.0188,
      "step": 1696840
    },
    {
      "epoch": 2.7769486066652265,
      "grad_norm": 0.6995892524719238,
      "learning_rate": 4.47539914208338e-06,
      "loss": 0.0096,
      "step": 1696860
    },
    {
      "epoch": 2.77698133710388,
      "grad_norm": 0.17770537734031677,
      "learning_rate": 4.475333249869863e-06,
      "loss": 0.0115,
      "step": 1696880
    },
    {
      "epoch": 2.777014067542533,
      "grad_norm": 0.18594636023044586,
      "learning_rate": 4.475267357656346e-06,
      "loss": 0.0179,
      "step": 1696900
    },
    {
      "epoch": 2.7770467979811864,
      "grad_norm": 0.10096480697393417,
      "learning_rate": 4.4752014654428285e-06,
      "loss": 0.0189,
      "step": 1696920
    },
    {
      "epoch": 2.77707952841984,
      "grad_norm": 0.4729005694389343,
      "learning_rate": 4.475135573229312e-06,
      "loss": 0.0097,
      "step": 1696940
    },
    {
      "epoch": 2.777112258858493,
      "grad_norm": 0.19243985414505005,
      "learning_rate": 4.475069681015795e-06,
      "loss": 0.0172,
      "step": 1696960
    },
    {
      "epoch": 2.7771449892971467,
      "grad_norm": 0.17284515500068665,
      "learning_rate": 4.4750037888022775e-06,
      "loss": 0.0129,
      "step": 1696980
    },
    {
      "epoch": 2.7771777197358,
      "grad_norm": 0.27787357568740845,
      "learning_rate": 4.47493789658876e-06,
      "loss": 0.0101,
      "step": 1697000
    },
    {
      "epoch": 2.7772104501744534,
      "grad_norm": 0.23551523685455322,
      "learning_rate": 4.474872004375244e-06,
      "loss": 0.0101,
      "step": 1697020
    },
    {
      "epoch": 2.7772431806131066,
      "grad_norm": 0.20218199491500854,
      "learning_rate": 4.474806112161727e-06,
      "loss": 0.0144,
      "step": 1697040
    },
    {
      "epoch": 2.7772759110517597,
      "grad_norm": 0.6514610052108765,
      "learning_rate": 4.474740219948209e-06,
      "loss": 0.0162,
      "step": 1697060
    },
    {
      "epoch": 2.7773086414904133,
      "grad_norm": 0.6389088034629822,
      "learning_rate": 4.474674327734692e-06,
      "loss": 0.0157,
      "step": 1697080
    },
    {
      "epoch": 2.7773413719290665,
      "grad_norm": 0.935880720615387,
      "learning_rate": 4.474608435521175e-06,
      "loss": 0.0152,
      "step": 1697100
    },
    {
      "epoch": 2.77737410236772,
      "grad_norm": 0.08070090413093567,
      "learning_rate": 4.4745425433076576e-06,
      "loss": 0.0154,
      "step": 1697120
    },
    {
      "epoch": 2.7774068328063732,
      "grad_norm": 0.3228718340396881,
      "learning_rate": 4.47447665109414e-06,
      "loss": 0.0125,
      "step": 1697140
    },
    {
      "epoch": 2.777439563245027,
      "grad_norm": 0.45386791229248047,
      "learning_rate": 4.474410758880624e-06,
      "loss": 0.0168,
      "step": 1697160
    },
    {
      "epoch": 2.77747229368368,
      "grad_norm": 0.1531737893819809,
      "learning_rate": 4.474344866667107e-06,
      "loss": 0.0153,
      "step": 1697180
    },
    {
      "epoch": 2.777505024122333,
      "grad_norm": 0.1380307525396347,
      "learning_rate": 4.474278974453589e-06,
      "loss": 0.0203,
      "step": 1697200
    },
    {
      "epoch": 2.7775377545609867,
      "grad_norm": 0.13804161548614502,
      "learning_rate": 4.474213082240072e-06,
      "loss": 0.0141,
      "step": 1697220
    },
    {
      "epoch": 2.77757048499964,
      "grad_norm": 0.3132999837398529,
      "learning_rate": 4.474147190026555e-06,
      "loss": 0.01,
      "step": 1697240
    },
    {
      "epoch": 2.777603215438293,
      "grad_norm": 0.49604612588882446,
      "learning_rate": 4.474081297813038e-06,
      "loss": 0.011,
      "step": 1697260
    },
    {
      "epoch": 2.7776359458769466,
      "grad_norm": 0.45906418561935425,
      "learning_rate": 4.47401540559952e-06,
      "loss": 0.0144,
      "step": 1697280
    },
    {
      "epoch": 2.7776686763156,
      "grad_norm": 0.38843485713005066,
      "learning_rate": 4.473949513386003e-06,
      "loss": 0.0135,
      "step": 1697300
    },
    {
      "epoch": 2.7777014067542534,
      "grad_norm": 0.594509482383728,
      "learning_rate": 4.473883621172486e-06,
      "loss": 0.0123,
      "step": 1697320
    },
    {
      "epoch": 2.7777341371929065,
      "grad_norm": 0.27942049503326416,
      "learning_rate": 4.473817728958969e-06,
      "loss": 0.0182,
      "step": 1697340
    },
    {
      "epoch": 2.77776686763156,
      "grad_norm": 0.4197768270969391,
      "learning_rate": 4.473751836745452e-06,
      "loss": 0.015,
      "step": 1697360
    },
    {
      "epoch": 2.7777995980702133,
      "grad_norm": 0.2155005782842636,
      "learning_rate": 4.473685944531935e-06,
      "loss": 0.0081,
      "step": 1697380
    },
    {
      "epoch": 2.7778323285088664,
      "grad_norm": 0.34198036789894104,
      "learning_rate": 4.4736200523184185e-06,
      "loss": 0.0136,
      "step": 1697400
    },
    {
      "epoch": 2.77786505894752,
      "grad_norm": 0.314827561378479,
      "learning_rate": 4.473554160104901e-06,
      "loss": 0.0163,
      "step": 1697420
    },
    {
      "epoch": 2.7778977893861736,
      "grad_norm": 0.11776505410671234,
      "learning_rate": 4.473488267891384e-06,
      "loss": 0.0097,
      "step": 1697440
    },
    {
      "epoch": 2.7779305198248267,
      "grad_norm": 0.23919892311096191,
      "learning_rate": 4.473422375677867e-06,
      "loss": 0.0168,
      "step": 1697460
    },
    {
      "epoch": 2.77796325026348,
      "grad_norm": 0.10715049505233765,
      "learning_rate": 4.473356483464349e-06,
      "loss": 0.0131,
      "step": 1697480
    },
    {
      "epoch": 2.7779959807021335,
      "grad_norm": 0.08615121990442276,
      "learning_rate": 4.473290591250832e-06,
      "loss": 0.0144,
      "step": 1697500
    },
    {
      "epoch": 2.7780287111407866,
      "grad_norm": 0.3110528886318207,
      "learning_rate": 4.473224699037315e-06,
      "loss": 0.0105,
      "step": 1697520
    },
    {
      "epoch": 2.77806144157944,
      "grad_norm": 0.25196778774261475,
      "learning_rate": 4.473158806823798e-06,
      "loss": 0.0157,
      "step": 1697540
    },
    {
      "epoch": 2.7780941720180934,
      "grad_norm": 0.15876144170761108,
      "learning_rate": 4.473092914610281e-06,
      "loss": 0.0179,
      "step": 1697560
    },
    {
      "epoch": 2.778126902456747,
      "grad_norm": 0.6414051055908203,
      "learning_rate": 4.473027022396764e-06,
      "loss": 0.0134,
      "step": 1697580
    },
    {
      "epoch": 2.7781596328954,
      "grad_norm": 0.4665100574493408,
      "learning_rate": 4.472961130183247e-06,
      "loss": 0.021,
      "step": 1697600
    },
    {
      "epoch": 2.7781923633340533,
      "grad_norm": 0.24230951070785522,
      "learning_rate": 4.4728952379697294e-06,
      "loss": 0.0138,
      "step": 1697620
    },
    {
      "epoch": 2.778225093772707,
      "grad_norm": 0.943382740020752,
      "learning_rate": 4.472829345756212e-06,
      "loss": 0.0137,
      "step": 1697640
    },
    {
      "epoch": 2.77825782421136,
      "grad_norm": 0.850101888179779,
      "learning_rate": 4.472763453542695e-06,
      "loss": 0.0167,
      "step": 1697660
    },
    {
      "epoch": 2.778290554650013,
      "grad_norm": 0.9818955659866333,
      "learning_rate": 4.472697561329178e-06,
      "loss": 0.0124,
      "step": 1697680
    },
    {
      "epoch": 2.7783232850886668,
      "grad_norm": 0.25467032194137573,
      "learning_rate": 4.47263166911566e-06,
      "loss": 0.0209,
      "step": 1697700
    },
    {
      "epoch": 2.7783560155273204,
      "grad_norm": 0.33642882108688354,
      "learning_rate": 4.472565776902144e-06,
      "loss": 0.0212,
      "step": 1697720
    },
    {
      "epoch": 2.7783887459659735,
      "grad_norm": 0.5011067986488342,
      "learning_rate": 4.472499884688627e-06,
      "loss": 0.0146,
      "step": 1697740
    },
    {
      "epoch": 2.7784214764046267,
      "grad_norm": 0.25281187891960144,
      "learning_rate": 4.4724339924751095e-06,
      "loss": 0.0086,
      "step": 1697760
    },
    {
      "epoch": 2.7784542068432803,
      "grad_norm": 0.4412320852279663,
      "learning_rate": 4.472368100261592e-06,
      "loss": 0.0157,
      "step": 1697780
    },
    {
      "epoch": 2.7784869372819334,
      "grad_norm": 0.18195778131484985,
      "learning_rate": 4.472302208048076e-06,
      "loss": 0.0127,
      "step": 1697800
    },
    {
      "epoch": 2.7785196677205866,
      "grad_norm": 0.509127676486969,
      "learning_rate": 4.4722363158345585e-06,
      "loss": 0.0131,
      "step": 1697820
    },
    {
      "epoch": 2.77855239815924,
      "grad_norm": 0.15487448871135712,
      "learning_rate": 4.472170423621041e-06,
      "loss": 0.0151,
      "step": 1697840
    },
    {
      "epoch": 2.7785851285978933,
      "grad_norm": 0.15862464904785156,
      "learning_rate": 4.472104531407524e-06,
      "loss": 0.0128,
      "step": 1697860
    },
    {
      "epoch": 2.778617859036547,
      "grad_norm": 0.29624053835868835,
      "learning_rate": 4.472038639194007e-06,
      "loss": 0.0128,
      "step": 1697880
    },
    {
      "epoch": 2.7786505894752,
      "grad_norm": 0.37485092878341675,
      "learning_rate": 4.4719727469804895e-06,
      "loss": 0.0133,
      "step": 1697900
    },
    {
      "epoch": 2.7786833199138536,
      "grad_norm": 0.3113023042678833,
      "learning_rate": 4.471906854766972e-06,
      "loss": 0.0102,
      "step": 1697920
    },
    {
      "epoch": 2.778716050352507,
      "grad_norm": 0.2710167169570923,
      "learning_rate": 4.471840962553455e-06,
      "loss": 0.0147,
      "step": 1697940
    },
    {
      "epoch": 2.77874878079116,
      "grad_norm": 0.23541074991226196,
      "learning_rate": 4.4717750703399386e-06,
      "loss": 0.009,
      "step": 1697960
    },
    {
      "epoch": 2.7787815112298135,
      "grad_norm": 0.19558417797088623,
      "learning_rate": 4.471709178126421e-06,
      "loss": 0.0123,
      "step": 1697980
    },
    {
      "epoch": 2.7788142416684667,
      "grad_norm": 0.12567488849163055,
      "learning_rate": 4.471643285912904e-06,
      "loss": 0.0148,
      "step": 1698000
    },
    {
      "epoch": 2.7788469721071203,
      "grad_norm": 0.21714593470096588,
      "learning_rate": 4.471577393699387e-06,
      "loss": 0.0149,
      "step": 1698020
    },
    {
      "epoch": 2.7788797025457734,
      "grad_norm": 0.4333891272544861,
      "learning_rate": 4.4715115014858695e-06,
      "loss": 0.0146,
      "step": 1698040
    },
    {
      "epoch": 2.778912432984427,
      "grad_norm": 0.559319257736206,
      "learning_rate": 4.471445609272352e-06,
      "loss": 0.0196,
      "step": 1698060
    },
    {
      "epoch": 2.77894516342308,
      "grad_norm": 0.11373332142829895,
      "learning_rate": 4.471379717058836e-06,
      "loss": 0.0145,
      "step": 1698080
    },
    {
      "epoch": 2.7789778938617333,
      "grad_norm": 0.1778019666671753,
      "learning_rate": 4.471313824845319e-06,
      "loss": 0.0133,
      "step": 1698100
    },
    {
      "epoch": 2.779010624300387,
      "grad_norm": 0.14860869944095612,
      "learning_rate": 4.471247932631801e-06,
      "loss": 0.0095,
      "step": 1698120
    },
    {
      "epoch": 2.77904335473904,
      "grad_norm": 0.638614296913147,
      "learning_rate": 4.471182040418284e-06,
      "loss": 0.0139,
      "step": 1698140
    },
    {
      "epoch": 2.7790760851776937,
      "grad_norm": 0.5238606333732605,
      "learning_rate": 4.471116148204767e-06,
      "loss": 0.0144,
      "step": 1698160
    },
    {
      "epoch": 2.779108815616347,
      "grad_norm": 0.9442831873893738,
      "learning_rate": 4.47105025599125e-06,
      "loss": 0.0202,
      "step": 1698180
    },
    {
      "epoch": 2.7791415460550004,
      "grad_norm": 0.444455623626709,
      "learning_rate": 4.470984363777733e-06,
      "loss": 0.0108,
      "step": 1698200
    },
    {
      "epoch": 2.7791742764936536,
      "grad_norm": 0.9648962616920471,
      "learning_rate": 4.470918471564216e-06,
      "loss": 0.0126,
      "step": 1698220
    },
    {
      "epoch": 2.7792070069323067,
      "grad_norm": 0.4233848750591278,
      "learning_rate": 4.470852579350699e-06,
      "loss": 0.0112,
      "step": 1698240
    },
    {
      "epoch": 2.7792397373709603,
      "grad_norm": 0.2741304934024811,
      "learning_rate": 4.470786687137181e-06,
      "loss": 0.0107,
      "step": 1698260
    },
    {
      "epoch": 2.7792724678096135,
      "grad_norm": 0.568652868270874,
      "learning_rate": 4.470720794923664e-06,
      "loss": 0.0105,
      "step": 1698280
    },
    {
      "epoch": 2.779305198248267,
      "grad_norm": 0.300192654132843,
      "learning_rate": 4.470654902710147e-06,
      "loss": 0.0159,
      "step": 1698300
    },
    {
      "epoch": 2.77933792868692,
      "grad_norm": 0.3937641382217407,
      "learning_rate": 4.4705890104966296e-06,
      "loss": 0.0146,
      "step": 1698320
    },
    {
      "epoch": 2.779370659125574,
      "grad_norm": 0.39062607288360596,
      "learning_rate": 4.470523118283112e-06,
      "loss": 0.0137,
      "step": 1698340
    },
    {
      "epoch": 2.779403389564227,
      "grad_norm": 0.32494810223579407,
      "learning_rate": 4.470457226069596e-06,
      "loss": 0.01,
      "step": 1698360
    },
    {
      "epoch": 2.77943612000288,
      "grad_norm": 0.44716963171958923,
      "learning_rate": 4.470391333856079e-06,
      "loss": 0.0129,
      "step": 1698380
    },
    {
      "epoch": 2.7794688504415337,
      "grad_norm": 0.77349853515625,
      "learning_rate": 4.470325441642561e-06,
      "loss": 0.0181,
      "step": 1698400
    },
    {
      "epoch": 2.779501580880187,
      "grad_norm": 0.07070637494325638,
      "learning_rate": 4.470259549429045e-06,
      "loss": 0.0123,
      "step": 1698420
    },
    {
      "epoch": 2.7795343113188404,
      "grad_norm": 0.23051178455352783,
      "learning_rate": 4.470193657215528e-06,
      "loss": 0.0136,
      "step": 1698440
    },
    {
      "epoch": 2.7795670417574936,
      "grad_norm": 0.04286952689290047,
      "learning_rate": 4.4701277650020104e-06,
      "loss": 0.0142,
      "step": 1698460
    },
    {
      "epoch": 2.779599772196147,
      "grad_norm": 0.7997866272926331,
      "learning_rate": 4.470061872788493e-06,
      "loss": 0.015,
      "step": 1698480
    },
    {
      "epoch": 2.7796325026348003,
      "grad_norm": 0.17686988413333893,
      "learning_rate": 4.469995980574976e-06,
      "loss": 0.0156,
      "step": 1698500
    },
    {
      "epoch": 2.7796652330734535,
      "grad_norm": 0.25394773483276367,
      "learning_rate": 4.469930088361459e-06,
      "loss": 0.0136,
      "step": 1698520
    },
    {
      "epoch": 2.779697963512107,
      "grad_norm": 0.13876521587371826,
      "learning_rate": 4.469864196147941e-06,
      "loss": 0.0177,
      "step": 1698540
    },
    {
      "epoch": 2.7797306939507602,
      "grad_norm": 0.2885090112686157,
      "learning_rate": 4.469798303934424e-06,
      "loss": 0.0194,
      "step": 1698560
    },
    {
      "epoch": 2.779763424389414,
      "grad_norm": 0.10316687822341919,
      "learning_rate": 4.469732411720908e-06,
      "loss": 0.0114,
      "step": 1698580
    },
    {
      "epoch": 2.779796154828067,
      "grad_norm": 0.9072532057762146,
      "learning_rate": 4.4696665195073905e-06,
      "loss": 0.015,
      "step": 1698600
    },
    {
      "epoch": 2.7798288852667206,
      "grad_norm": 0.11375228315591812,
      "learning_rate": 4.469600627293873e-06,
      "loss": 0.021,
      "step": 1698620
    },
    {
      "epoch": 2.7798616157053737,
      "grad_norm": 0.29311665892601013,
      "learning_rate": 4.469534735080356e-06,
      "loss": 0.0107,
      "step": 1698640
    },
    {
      "epoch": 2.779894346144027,
      "grad_norm": 0.21970613300800323,
      "learning_rate": 4.469468842866839e-06,
      "loss": 0.0128,
      "step": 1698660
    },
    {
      "epoch": 2.7799270765826805,
      "grad_norm": 0.46663734316825867,
      "learning_rate": 4.4694029506533214e-06,
      "loss": 0.0156,
      "step": 1698680
    },
    {
      "epoch": 2.7799598070213336,
      "grad_norm": 0.19814756512641907,
      "learning_rate": 4.469337058439804e-06,
      "loss": 0.0138,
      "step": 1698700
    },
    {
      "epoch": 2.779992537459987,
      "grad_norm": 0.21265454590320587,
      "learning_rate": 4.469271166226287e-06,
      "loss": 0.0125,
      "step": 1698720
    },
    {
      "epoch": 2.7800252678986404,
      "grad_norm": 0.5553520321846008,
      "learning_rate": 4.46920527401277e-06,
      "loss": 0.0111,
      "step": 1698740
    },
    {
      "epoch": 2.780057998337294,
      "grad_norm": 0.07044681906700134,
      "learning_rate": 4.469139381799253e-06,
      "loss": 0.0099,
      "step": 1698760
    },
    {
      "epoch": 2.780090728775947,
      "grad_norm": 1.0472084283828735,
      "learning_rate": 4.469073489585736e-06,
      "loss": 0.0121,
      "step": 1698780
    },
    {
      "epoch": 2.7801234592146002,
      "grad_norm": 0.22053122520446777,
      "learning_rate": 4.469007597372219e-06,
      "loss": 0.013,
      "step": 1698800
    },
    {
      "epoch": 2.780156189653254,
      "grad_norm": 0.10447081923484802,
      "learning_rate": 4.468941705158702e-06,
      "loss": 0.0171,
      "step": 1698820
    },
    {
      "epoch": 2.780188920091907,
      "grad_norm": 0.2342257797718048,
      "learning_rate": 4.468875812945185e-06,
      "loss": 0.0106,
      "step": 1698840
    },
    {
      "epoch": 2.78022165053056,
      "grad_norm": 0.17819936573505402,
      "learning_rate": 4.468809920731668e-06,
      "loss": 0.0203,
      "step": 1698860
    },
    {
      "epoch": 2.7802543809692137,
      "grad_norm": 0.2871593236923218,
      "learning_rate": 4.4687440285181505e-06,
      "loss": 0.0187,
      "step": 1698880
    },
    {
      "epoch": 2.7802871114078673,
      "grad_norm": 0.3921663165092468,
      "learning_rate": 4.468678136304633e-06,
      "loss": 0.0117,
      "step": 1698900
    },
    {
      "epoch": 2.7803198418465205,
      "grad_norm": 0.23528142273426056,
      "learning_rate": 4.468612244091116e-06,
      "loss": 0.0121,
      "step": 1698920
    },
    {
      "epoch": 2.7803525722851736,
      "grad_norm": 0.2519400119781494,
      "learning_rate": 4.468546351877599e-06,
      "loss": 0.0143,
      "step": 1698940
    },
    {
      "epoch": 2.7803853027238272,
      "grad_norm": 0.44078847765922546,
      "learning_rate": 4.4684804596640815e-06,
      "loss": 0.0094,
      "step": 1698960
    },
    {
      "epoch": 2.7804180331624804,
      "grad_norm": 0.26574358344078064,
      "learning_rate": 4.468414567450565e-06,
      "loss": 0.0105,
      "step": 1698980
    },
    {
      "epoch": 2.7804507636011335,
      "grad_norm": 0.2859340310096741,
      "learning_rate": 4.468348675237048e-06,
      "loss": 0.012,
      "step": 1699000
    },
    {
      "epoch": 2.780483494039787,
      "grad_norm": 0.5737408399581909,
      "learning_rate": 4.4682827830235305e-06,
      "loss": 0.0162,
      "step": 1699020
    },
    {
      "epoch": 2.7805162244784407,
      "grad_norm": 0.1084553450345993,
      "learning_rate": 4.468216890810013e-06,
      "loss": 0.009,
      "step": 1699040
    },
    {
      "epoch": 2.780548954917094,
      "grad_norm": 0.4972992241382599,
      "learning_rate": 4.468150998596496e-06,
      "loss": 0.0214,
      "step": 1699060
    },
    {
      "epoch": 2.780581685355747,
      "grad_norm": 0.08204857259988785,
      "learning_rate": 4.468085106382979e-06,
      "loss": 0.0165,
      "step": 1699080
    },
    {
      "epoch": 2.7806144157944006,
      "grad_norm": 0.13038650155067444,
      "learning_rate": 4.4680192141694615e-06,
      "loss": 0.011,
      "step": 1699100
    },
    {
      "epoch": 2.7806471462330538,
      "grad_norm": 0.7397000193595886,
      "learning_rate": 4.467953321955944e-06,
      "loss": 0.0131,
      "step": 1699120
    },
    {
      "epoch": 2.780679876671707,
      "grad_norm": 0.17755450308322906,
      "learning_rate": 4.467887429742428e-06,
      "loss": 0.0127,
      "step": 1699140
    },
    {
      "epoch": 2.7807126071103605,
      "grad_norm": 0.12171036750078201,
      "learning_rate": 4.4678215375289106e-06,
      "loss": 0.0133,
      "step": 1699160
    },
    {
      "epoch": 2.780745337549014,
      "grad_norm": 0.39794930815696716,
      "learning_rate": 4.467755645315393e-06,
      "loss": 0.017,
      "step": 1699180
    },
    {
      "epoch": 2.7807780679876672,
      "grad_norm": 0.4175872802734375,
      "learning_rate": 4.467689753101876e-06,
      "loss": 0.0165,
      "step": 1699200
    },
    {
      "epoch": 2.7808107984263204,
      "grad_norm": 0.7620068192481995,
      "learning_rate": 4.46762386088836e-06,
      "loss": 0.0131,
      "step": 1699220
    },
    {
      "epoch": 2.780843528864974,
      "grad_norm": 0.33436229825019836,
      "learning_rate": 4.467557968674842e-06,
      "loss": 0.0114,
      "step": 1699240
    },
    {
      "epoch": 2.780876259303627,
      "grad_norm": 0.19368897378444672,
      "learning_rate": 4.467492076461325e-06,
      "loss": 0.0096,
      "step": 1699260
    },
    {
      "epoch": 2.7809089897422803,
      "grad_norm": 0.3049388825893402,
      "learning_rate": 4.467426184247808e-06,
      "loss": 0.0148,
      "step": 1699280
    },
    {
      "epoch": 2.780941720180934,
      "grad_norm": 0.09562723338603973,
      "learning_rate": 4.467360292034291e-06,
      "loss": 0.0102,
      "step": 1699300
    },
    {
      "epoch": 2.7809744506195875,
      "grad_norm": 0.3443283140659332,
      "learning_rate": 4.467294399820773e-06,
      "loss": 0.0143,
      "step": 1699320
    },
    {
      "epoch": 2.7810071810582406,
      "grad_norm": 0.2787575423717499,
      "learning_rate": 4.467228507607256e-06,
      "loss": 0.0153,
      "step": 1699340
    },
    {
      "epoch": 2.781039911496894,
      "grad_norm": 0.40504735708236694,
      "learning_rate": 4.467162615393739e-06,
      "loss": 0.0156,
      "step": 1699360
    },
    {
      "epoch": 2.7810726419355474,
      "grad_norm": 0.2616403102874756,
      "learning_rate": 4.467096723180222e-06,
      "loss": 0.0134,
      "step": 1699380
    },
    {
      "epoch": 2.7811053723742005,
      "grad_norm": 0.30657896399497986,
      "learning_rate": 4.467030830966705e-06,
      "loss": 0.0155,
      "step": 1699400
    },
    {
      "epoch": 2.7811381028128537,
      "grad_norm": 0.8346031308174133,
      "learning_rate": 4.466964938753188e-06,
      "loss": 0.0145,
      "step": 1699420
    },
    {
      "epoch": 2.7811708332515073,
      "grad_norm": 0.24043378233909607,
      "learning_rate": 4.466899046539671e-06,
      "loss": 0.0094,
      "step": 1699440
    },
    {
      "epoch": 2.7812035636901604,
      "grad_norm": 0.30649077892303467,
      "learning_rate": 4.466833154326153e-06,
      "loss": 0.0157,
      "step": 1699460
    },
    {
      "epoch": 2.781236294128814,
      "grad_norm": 0.3187084496021271,
      "learning_rate": 4.466767262112637e-06,
      "loss": 0.0206,
      "step": 1699480
    },
    {
      "epoch": 2.781269024567467,
      "grad_norm": 0.3494683504104614,
      "learning_rate": 4.46670136989912e-06,
      "loss": 0.0129,
      "step": 1699500
    },
    {
      "epoch": 2.7813017550061208,
      "grad_norm": 0.11001449823379517,
      "learning_rate": 4.4666354776856024e-06,
      "loss": 0.0101,
      "step": 1699520
    },
    {
      "epoch": 2.781334485444774,
      "grad_norm": 0.41542574763298035,
      "learning_rate": 4.466569585472085e-06,
      "loss": 0.0103,
      "step": 1699540
    },
    {
      "epoch": 2.781367215883427,
      "grad_norm": 0.6017875075340271,
      "learning_rate": 4.466503693258568e-06,
      "loss": 0.0189,
      "step": 1699560
    },
    {
      "epoch": 2.7813999463220807,
      "grad_norm": 1.2239811420440674,
      "learning_rate": 4.466437801045051e-06,
      "loss": 0.0181,
      "step": 1699580
    },
    {
      "epoch": 2.781432676760734,
      "grad_norm": 0.2375885546207428,
      "learning_rate": 4.466371908831534e-06,
      "loss": 0.0173,
      "step": 1699600
    },
    {
      "epoch": 2.7814654071993874,
      "grad_norm": 0.4055435359477997,
      "learning_rate": 4.466306016618017e-06,
      "loss": 0.0085,
      "step": 1699620
    },
    {
      "epoch": 2.7814981376380405,
      "grad_norm": 0.08648474514484406,
      "learning_rate": 4.4662401244045e-06,
      "loss": 0.0141,
      "step": 1699640
    },
    {
      "epoch": 2.781530868076694,
      "grad_norm": 0.2814939320087433,
      "learning_rate": 4.4661742321909825e-06,
      "loss": 0.0098,
      "step": 1699660
    },
    {
      "epoch": 2.7815635985153473,
      "grad_norm": 1.9281959533691406,
      "learning_rate": 4.466108339977465e-06,
      "loss": 0.0144,
      "step": 1699680
    },
    {
      "epoch": 2.7815963289540004,
      "grad_norm": 0.43715140223503113,
      "learning_rate": 4.466042447763948e-06,
      "loss": 0.0112,
      "step": 1699700
    },
    {
      "epoch": 2.781629059392654,
      "grad_norm": 0.28268828988075256,
      "learning_rate": 4.465976555550431e-06,
      "loss": 0.0175,
      "step": 1699720
    },
    {
      "epoch": 2.781661789831307,
      "grad_norm": 0.37149739265441895,
      "learning_rate": 4.465910663336913e-06,
      "loss": 0.0133,
      "step": 1699740
    },
    {
      "epoch": 2.781694520269961,
      "grad_norm": 0.048555128276348114,
      "learning_rate": 4.465844771123396e-06,
      "loss": 0.0126,
      "step": 1699760
    },
    {
      "epoch": 2.781727250708614,
      "grad_norm": 0.4328807294368744,
      "learning_rate": 4.46577887890988e-06,
      "loss": 0.0109,
      "step": 1699780
    },
    {
      "epoch": 2.7817599811472675,
      "grad_norm": 0.35864904522895813,
      "learning_rate": 4.4657129866963625e-06,
      "loss": 0.0129,
      "step": 1699800
    },
    {
      "epoch": 2.7817927115859207,
      "grad_norm": 0.17174464464187622,
      "learning_rate": 4.465647094482845e-06,
      "loss": 0.0149,
      "step": 1699820
    },
    {
      "epoch": 2.781825442024574,
      "grad_norm": 0.5201823115348816,
      "learning_rate": 4.465581202269329e-06,
      "loss": 0.0149,
      "step": 1699840
    },
    {
      "epoch": 2.7818581724632274,
      "grad_norm": 0.19210104644298553,
      "learning_rate": 4.4655153100558115e-06,
      "loss": 0.0113,
      "step": 1699860
    },
    {
      "epoch": 2.7818909029018806,
      "grad_norm": 0.5097942352294922,
      "learning_rate": 4.465449417842294e-06,
      "loss": 0.0175,
      "step": 1699880
    },
    {
      "epoch": 2.781923633340534,
      "grad_norm": 0.12833453714847565,
      "learning_rate": 4.465383525628777e-06,
      "loss": 0.0123,
      "step": 1699900
    },
    {
      "epoch": 2.7819563637791873,
      "grad_norm": 0.2179010510444641,
      "learning_rate": 4.46531763341526e-06,
      "loss": 0.0129,
      "step": 1699920
    },
    {
      "epoch": 2.781989094217841,
      "grad_norm": 0.12323744595050812,
      "learning_rate": 4.4652517412017425e-06,
      "loss": 0.0158,
      "step": 1699940
    },
    {
      "epoch": 2.782021824656494,
      "grad_norm": 0.6510597467422485,
      "learning_rate": 4.465185848988225e-06,
      "loss": 0.0125,
      "step": 1699960
    },
    {
      "epoch": 2.782054555095147,
      "grad_norm": 0.33134907484054565,
      "learning_rate": 4.465119956774708e-06,
      "loss": 0.0105,
      "step": 1699980
    },
    {
      "epoch": 2.782087285533801,
      "grad_norm": 0.09900204092264175,
      "learning_rate": 4.4650540645611916e-06,
      "loss": 0.0128,
      "step": 1700000
    },
    {
      "epoch": 2.782087285533801,
      "eval_loss": 0.0076308180578053,
      "eval_runtime": 6517.3631,
      "eval_samples_per_second": 157.711,
      "eval_steps_per_second": 15.771,
      "eval_sts-dev_pearson_cosine": 0.9824747382539333,
      "eval_sts-dev_spearman_cosine": 0.893936948244662,
      "step": 1700000
    },
    {
      "epoch": 2.782120015972454,
      "grad_norm": 0.24478954076766968,
      "learning_rate": 4.464988172347674e-06,
      "loss": 0.0108,
      "step": 1700020
    },
    {
      "epoch": 2.7821527464111075,
      "grad_norm": 0.1592838317155838,
      "learning_rate": 4.464922280134157e-06,
      "loss": 0.011,
      "step": 1700040
    },
    {
      "epoch": 2.7821854768497607,
      "grad_norm": 0.11017468571662903,
      "learning_rate": 4.46485638792064e-06,
      "loss": 0.0097,
      "step": 1700060
    },
    {
      "epoch": 2.7822182072884143,
      "grad_norm": 0.18369267880916595,
      "learning_rate": 4.4647904957071225e-06,
      "loss": 0.0132,
      "step": 1700080
    },
    {
      "epoch": 2.7822509377270674,
      "grad_norm": 0.5362558960914612,
      "learning_rate": 4.464724603493605e-06,
      "loss": 0.0158,
      "step": 1700100
    },
    {
      "epoch": 2.7822836681657206,
      "grad_norm": 0.3094538450241089,
      "learning_rate": 4.464658711280088e-06,
      "loss": 0.0191,
      "step": 1700120
    },
    {
      "epoch": 2.782316398604374,
      "grad_norm": 0.16649870574474335,
      "learning_rate": 4.464592819066571e-06,
      "loss": 0.0109,
      "step": 1700140
    },
    {
      "epoch": 2.7823491290430273,
      "grad_norm": 0.32186150550842285,
      "learning_rate": 4.4645269268530535e-06,
      "loss": 0.0108,
      "step": 1700160
    },
    {
      "epoch": 2.782381859481681,
      "grad_norm": 0.24277126789093018,
      "learning_rate": 4.464461034639537e-06,
      "loss": 0.0159,
      "step": 1700180
    },
    {
      "epoch": 2.782414589920334,
      "grad_norm": 0.5280629992485046,
      "learning_rate": 4.46439514242602e-06,
      "loss": 0.0093,
      "step": 1700200
    },
    {
      "epoch": 2.7824473203589877,
      "grad_norm": 0.21990512311458588,
      "learning_rate": 4.4643292502125026e-06,
      "loss": 0.0148,
      "step": 1700220
    },
    {
      "epoch": 2.782480050797641,
      "grad_norm": 0.6536983847618103,
      "learning_rate": 4.464263357998986e-06,
      "loss": 0.0153,
      "step": 1700240
    },
    {
      "epoch": 2.782512781236294,
      "grad_norm": 0.39174336194992065,
      "learning_rate": 4.464197465785469e-06,
      "loss": 0.0083,
      "step": 1700260
    },
    {
      "epoch": 2.7825455116749476,
      "grad_norm": 0.4127531349658966,
      "learning_rate": 4.464131573571952e-06,
      "loss": 0.0138,
      "step": 1700280
    },
    {
      "epoch": 2.7825782421136007,
      "grad_norm": 0.5916951298713684,
      "learning_rate": 4.464065681358434e-06,
      "loss": 0.0113,
      "step": 1700300
    },
    {
      "epoch": 2.782610972552254,
      "grad_norm": 0.9878321290016174,
      "learning_rate": 4.463999789144917e-06,
      "loss": 0.0191,
      "step": 1700320
    },
    {
      "epoch": 2.7826437029909075,
      "grad_norm": 0.216761976480484,
      "learning_rate": 4.4639338969314e-06,
      "loss": 0.0148,
      "step": 1700340
    },
    {
      "epoch": 2.782676433429561,
      "grad_norm": 0.1777646541595459,
      "learning_rate": 4.463868004717883e-06,
      "loss": 0.0141,
      "step": 1700360
    },
    {
      "epoch": 2.782709163868214,
      "grad_norm": 0.18215832114219666,
      "learning_rate": 4.463802112504365e-06,
      "loss": 0.0096,
      "step": 1700380
    },
    {
      "epoch": 2.7827418943068674,
      "grad_norm": 0.20748193562030792,
      "learning_rate": 4.463736220290849e-06,
      "loss": 0.0136,
      "step": 1700400
    },
    {
      "epoch": 2.782774624745521,
      "grad_norm": 0.4239956736564636,
      "learning_rate": 4.463670328077332e-06,
      "loss": 0.0108,
      "step": 1700420
    },
    {
      "epoch": 2.782807355184174,
      "grad_norm": 0.17844520509243011,
      "learning_rate": 4.463604435863814e-06,
      "loss": 0.0109,
      "step": 1700440
    },
    {
      "epoch": 2.7828400856228273,
      "grad_norm": 0.12159644067287445,
      "learning_rate": 4.463538543650297e-06,
      "loss": 0.0174,
      "step": 1700460
    },
    {
      "epoch": 2.782872816061481,
      "grad_norm": 0.5216274857521057,
      "learning_rate": 4.46347265143678e-06,
      "loss": 0.0158,
      "step": 1700480
    },
    {
      "epoch": 2.7829055465001344,
      "grad_norm": 0.5102623105049133,
      "learning_rate": 4.463406759223263e-06,
      "loss": 0.0122,
      "step": 1700500
    },
    {
      "epoch": 2.7829382769387876,
      "grad_norm": 0.6451380848884583,
      "learning_rate": 4.463340867009745e-06,
      "loss": 0.0115,
      "step": 1700520
    },
    {
      "epoch": 2.7829710073774407,
      "grad_norm": 0.3003912568092346,
      "learning_rate": 4.463274974796229e-06,
      "loss": 0.0149,
      "step": 1700540
    },
    {
      "epoch": 2.7830037378160943,
      "grad_norm": 0.5146080255508423,
      "learning_rate": 4.463209082582712e-06,
      "loss": 0.0245,
      "step": 1700560
    },
    {
      "epoch": 2.7830364682547475,
      "grad_norm": 0.17708668112754822,
      "learning_rate": 4.463143190369194e-06,
      "loss": 0.0158,
      "step": 1700580
    },
    {
      "epoch": 2.7830691986934006,
      "grad_norm": 0.31674423813819885,
      "learning_rate": 4.463077298155677e-06,
      "loss": 0.0114,
      "step": 1700600
    },
    {
      "epoch": 2.7831019291320542,
      "grad_norm": 0.3752850890159607,
      "learning_rate": 4.463011405942161e-06,
      "loss": 0.0148,
      "step": 1700620
    },
    {
      "epoch": 2.783134659570708,
      "grad_norm": 0.7238761782646179,
      "learning_rate": 4.4629455137286435e-06,
      "loss": 0.0256,
      "step": 1700640
    },
    {
      "epoch": 2.783167390009361,
      "grad_norm": 0.2701529562473297,
      "learning_rate": 4.462879621515126e-06,
      "loss": 0.0133,
      "step": 1700660
    },
    {
      "epoch": 2.783200120448014,
      "grad_norm": 0.19203947484493256,
      "learning_rate": 4.462813729301609e-06,
      "loss": 0.0142,
      "step": 1700680
    },
    {
      "epoch": 2.7832328508866677,
      "grad_norm": 0.0900697112083435,
      "learning_rate": 4.462747837088092e-06,
      "loss": 0.0153,
      "step": 1700700
    },
    {
      "epoch": 2.783265581325321,
      "grad_norm": 0.1924421191215515,
      "learning_rate": 4.4626819448745744e-06,
      "loss": 0.0105,
      "step": 1700720
    },
    {
      "epoch": 2.783298311763974,
      "grad_norm": 0.3723667860031128,
      "learning_rate": 4.462616052661057e-06,
      "loss": 0.0188,
      "step": 1700740
    },
    {
      "epoch": 2.7833310422026276,
      "grad_norm": 2.02824068069458,
      "learning_rate": 4.46255016044754e-06,
      "loss": 0.0191,
      "step": 1700760
    },
    {
      "epoch": 2.783363772641281,
      "grad_norm": 0.10919225960969925,
      "learning_rate": 4.462484268234023e-06,
      "loss": 0.0123,
      "step": 1700780
    },
    {
      "epoch": 2.7833965030799344,
      "grad_norm": 0.41203513741493225,
      "learning_rate": 4.462418376020506e-06,
      "loss": 0.0117,
      "step": 1700800
    },
    {
      "epoch": 2.7834292335185875,
      "grad_norm": 0.29683560132980347,
      "learning_rate": 4.462352483806989e-06,
      "loss": 0.0171,
      "step": 1700820
    },
    {
      "epoch": 2.783461963957241,
      "grad_norm": 0.5797659158706665,
      "learning_rate": 4.462286591593472e-06,
      "loss": 0.019,
      "step": 1700840
    },
    {
      "epoch": 2.7834946943958943,
      "grad_norm": 0.12585340440273285,
      "learning_rate": 4.4622206993799545e-06,
      "loss": 0.0177,
      "step": 1700860
    },
    {
      "epoch": 2.7835274248345474,
      "grad_norm": 0.10643377900123596,
      "learning_rate": 4.462154807166437e-06,
      "loss": 0.0108,
      "step": 1700880
    },
    {
      "epoch": 2.783560155273201,
      "grad_norm": 0.18415573239326477,
      "learning_rate": 4.462088914952921e-06,
      "loss": 0.0153,
      "step": 1700900
    },
    {
      "epoch": 2.783592885711854,
      "grad_norm": 0.15235604345798492,
      "learning_rate": 4.4620230227394035e-06,
      "loss": 0.0116,
      "step": 1700920
    },
    {
      "epoch": 2.7836256161505077,
      "grad_norm": 0.12965570390224457,
      "learning_rate": 4.461957130525886e-06,
      "loss": 0.0137,
      "step": 1700940
    },
    {
      "epoch": 2.783658346589161,
      "grad_norm": 0.3292236924171448,
      "learning_rate": 4.461891238312369e-06,
      "loss": 0.0157,
      "step": 1700960
    },
    {
      "epoch": 2.7836910770278145,
      "grad_norm": 0.5923365950584412,
      "learning_rate": 4.461825346098852e-06,
      "loss": 0.0176,
      "step": 1700980
    },
    {
      "epoch": 2.7837238074664676,
      "grad_norm": 0.21201369166374207,
      "learning_rate": 4.4617594538853345e-06,
      "loss": 0.012,
      "step": 1701000
    },
    {
      "epoch": 2.783756537905121,
      "grad_norm": 1.4357116222381592,
      "learning_rate": 4.461693561671818e-06,
      "loss": 0.0128,
      "step": 1701020
    },
    {
      "epoch": 2.7837892683437744,
      "grad_norm": 0.3317511975765228,
      "learning_rate": 4.461627669458301e-06,
      "loss": 0.0184,
      "step": 1701040
    },
    {
      "epoch": 2.7838219987824275,
      "grad_norm": 1.0499862432479858,
      "learning_rate": 4.4615617772447836e-06,
      "loss": 0.0102,
      "step": 1701060
    },
    {
      "epoch": 2.783854729221081,
      "grad_norm": 0.7085772752761841,
      "learning_rate": 4.461495885031266e-06,
      "loss": 0.0144,
      "step": 1701080
    },
    {
      "epoch": 2.7838874596597343,
      "grad_norm": 0.2589752674102783,
      "learning_rate": 4.461429992817749e-06,
      "loss": 0.0117,
      "step": 1701100
    },
    {
      "epoch": 2.783920190098388,
      "grad_norm": 0.25869572162628174,
      "learning_rate": 4.461364100604232e-06,
      "loss": 0.0124,
      "step": 1701120
    },
    {
      "epoch": 2.783952920537041,
      "grad_norm": 0.5753869414329529,
      "learning_rate": 4.4612982083907145e-06,
      "loss": 0.0089,
      "step": 1701140
    },
    {
      "epoch": 2.783985650975694,
      "grad_norm": 0.3671010434627533,
      "learning_rate": 4.461232316177197e-06,
      "loss": 0.0149,
      "step": 1701160
    },
    {
      "epoch": 2.7840183814143478,
      "grad_norm": 0.3767070770263672,
      "learning_rate": 4.46116642396368e-06,
      "loss": 0.0154,
      "step": 1701180
    },
    {
      "epoch": 2.784051111853001,
      "grad_norm": 0.35510578751564026,
      "learning_rate": 4.461100531750164e-06,
      "loss": 0.0123,
      "step": 1701200
    },
    {
      "epoch": 2.7840838422916545,
      "grad_norm": 2.1165783405303955,
      "learning_rate": 4.461034639536646e-06,
      "loss": 0.0186,
      "step": 1701220
    },
    {
      "epoch": 2.7841165727303077,
      "grad_norm": 1.2169604301452637,
      "learning_rate": 4.460968747323129e-06,
      "loss": 0.0147,
      "step": 1701240
    },
    {
      "epoch": 2.7841493031689613,
      "grad_norm": 0.3497714400291443,
      "learning_rate": 4.460902855109613e-06,
      "loss": 0.0078,
      "step": 1701260
    },
    {
      "epoch": 2.7841820336076144,
      "grad_norm": 0.33984795212745667,
      "learning_rate": 4.460836962896095e-06,
      "loss": 0.0124,
      "step": 1701280
    },
    {
      "epoch": 2.7842147640462676,
      "grad_norm": 0.08332991600036621,
      "learning_rate": 4.460771070682578e-06,
      "loss": 0.0148,
      "step": 1701300
    },
    {
      "epoch": 2.784247494484921,
      "grad_norm": 0.34225553274154663,
      "learning_rate": 4.460705178469061e-06,
      "loss": 0.0124,
      "step": 1701320
    },
    {
      "epoch": 2.7842802249235743,
      "grad_norm": 0.3068380355834961,
      "learning_rate": 4.460639286255544e-06,
      "loss": 0.0099,
      "step": 1701340
    },
    {
      "epoch": 2.784312955362228,
      "grad_norm": 1.3305261135101318,
      "learning_rate": 4.460573394042026e-06,
      "loss": 0.0148,
      "step": 1701360
    },
    {
      "epoch": 2.784345685800881,
      "grad_norm": 1.1826574802398682,
      "learning_rate": 4.460507501828509e-06,
      "loss": 0.016,
      "step": 1701380
    },
    {
      "epoch": 2.7843784162395346,
      "grad_norm": 0.10378510504961014,
      "learning_rate": 4.460441609614992e-06,
      "loss": 0.0172,
      "step": 1701400
    },
    {
      "epoch": 2.784411146678188,
      "grad_norm": 0.2527959942817688,
      "learning_rate": 4.460375717401475e-06,
      "loss": 0.0121,
      "step": 1701420
    },
    {
      "epoch": 2.784443877116841,
      "grad_norm": 0.26167675852775574,
      "learning_rate": 4.460309825187958e-06,
      "loss": 0.0201,
      "step": 1701440
    },
    {
      "epoch": 2.7844766075554945,
      "grad_norm": 0.40621525049209595,
      "learning_rate": 4.460243932974441e-06,
      "loss": 0.0079,
      "step": 1701460
    },
    {
      "epoch": 2.7845093379941477,
      "grad_norm": 0.2856399416923523,
      "learning_rate": 4.460178040760924e-06,
      "loss": 0.0084,
      "step": 1701480
    },
    {
      "epoch": 2.7845420684328013,
      "grad_norm": 0.7377222180366516,
      "learning_rate": 4.460112148547406e-06,
      "loss": 0.0109,
      "step": 1701500
    },
    {
      "epoch": 2.7845747988714544,
      "grad_norm": 0.6936909556388855,
      "learning_rate": 4.460046256333889e-06,
      "loss": 0.0174,
      "step": 1701520
    },
    {
      "epoch": 2.784607529310108,
      "grad_norm": 0.0923980250954628,
      "learning_rate": 4.459980364120372e-06,
      "loss": 0.0134,
      "step": 1701540
    },
    {
      "epoch": 2.784640259748761,
      "grad_norm": 0.19879961013793945,
      "learning_rate": 4.459914471906855e-06,
      "loss": 0.0104,
      "step": 1701560
    },
    {
      "epoch": 2.7846729901874143,
      "grad_norm": 0.31400665640830994,
      "learning_rate": 4.459848579693337e-06,
      "loss": 0.011,
      "step": 1701580
    },
    {
      "epoch": 2.784705720626068,
      "grad_norm": 0.4962938725948334,
      "learning_rate": 4.459782687479821e-06,
      "loss": 0.0104,
      "step": 1701600
    },
    {
      "epoch": 2.784738451064721,
      "grad_norm": 0.2401888221502304,
      "learning_rate": 4.459716795266304e-06,
      "loss": 0.0172,
      "step": 1701620
    },
    {
      "epoch": 2.7847711815033747,
      "grad_norm": 0.04369337856769562,
      "learning_rate": 4.459650903052786e-06,
      "loss": 0.0124,
      "step": 1701640
    },
    {
      "epoch": 2.784803911942028,
      "grad_norm": 0.1450636088848114,
      "learning_rate": 4.45958501083927e-06,
      "loss": 0.0098,
      "step": 1701660
    },
    {
      "epoch": 2.7848366423806814,
      "grad_norm": 0.12332814186811447,
      "learning_rate": 4.459519118625753e-06,
      "loss": 0.0132,
      "step": 1701680
    },
    {
      "epoch": 2.7848693728193346,
      "grad_norm": 0.97572922706604,
      "learning_rate": 4.4594532264122355e-06,
      "loss": 0.0181,
      "step": 1701700
    },
    {
      "epoch": 2.7849021032579877,
      "grad_norm": 0.1672181785106659,
      "learning_rate": 4.459387334198718e-06,
      "loss": 0.0093,
      "step": 1701720
    },
    {
      "epoch": 2.7849348336966413,
      "grad_norm": 0.24973997473716736,
      "learning_rate": 4.459321441985201e-06,
      "loss": 0.011,
      "step": 1701740
    },
    {
      "epoch": 2.7849675641352944,
      "grad_norm": 0.22412985563278198,
      "learning_rate": 4.459255549771684e-06,
      "loss": 0.0167,
      "step": 1701760
    },
    {
      "epoch": 2.785000294573948,
      "grad_norm": 0.28297701478004456,
      "learning_rate": 4.459189657558166e-06,
      "loss": 0.0105,
      "step": 1701780
    },
    {
      "epoch": 2.785033025012601,
      "grad_norm": 0.21178288757801056,
      "learning_rate": 4.459123765344649e-06,
      "loss": 0.0107,
      "step": 1701800
    },
    {
      "epoch": 2.785065755451255,
      "grad_norm": 0.5233063697814941,
      "learning_rate": 4.459057873131133e-06,
      "loss": 0.0128,
      "step": 1701820
    },
    {
      "epoch": 2.785098485889908,
      "grad_norm": 0.2738586962223053,
      "learning_rate": 4.4589919809176155e-06,
      "loss": 0.0142,
      "step": 1701840
    },
    {
      "epoch": 2.785131216328561,
      "grad_norm": 0.46516939997673035,
      "learning_rate": 4.458926088704098e-06,
      "loss": 0.0224,
      "step": 1701860
    },
    {
      "epoch": 2.7851639467672147,
      "grad_norm": 0.4577118754386902,
      "learning_rate": 4.458860196490581e-06,
      "loss": 0.0142,
      "step": 1701880
    },
    {
      "epoch": 2.785196677205868,
      "grad_norm": 0.07663536071777344,
      "learning_rate": 4.458794304277064e-06,
      "loss": 0.0104,
      "step": 1701900
    },
    {
      "epoch": 2.785229407644521,
      "grad_norm": 0.49229511618614197,
      "learning_rate": 4.4587284120635464e-06,
      "loss": 0.0142,
      "step": 1701920
    },
    {
      "epoch": 2.7852621380831746,
      "grad_norm": 0.2839459776878357,
      "learning_rate": 4.45866251985003e-06,
      "loss": 0.0098,
      "step": 1701940
    },
    {
      "epoch": 2.785294868521828,
      "grad_norm": 0.09087200462818146,
      "learning_rate": 4.458596627636513e-06,
      "loss": 0.02,
      "step": 1701960
    },
    {
      "epoch": 2.7853275989604813,
      "grad_norm": 0.4188403785228729,
      "learning_rate": 4.4585307354229955e-06,
      "loss": 0.0117,
      "step": 1701980
    },
    {
      "epoch": 2.7853603293991345,
      "grad_norm": 0.4944915771484375,
      "learning_rate": 4.458464843209478e-06,
      "loss": 0.0147,
      "step": 1702000
    },
    {
      "epoch": 2.785393059837788,
      "grad_norm": 0.1475069522857666,
      "learning_rate": 4.458398950995961e-06,
      "loss": 0.0128,
      "step": 1702020
    },
    {
      "epoch": 2.785425790276441,
      "grad_norm": 0.6613920331001282,
      "learning_rate": 4.4583330587824446e-06,
      "loss": 0.0115,
      "step": 1702040
    },
    {
      "epoch": 2.7854585207150944,
      "grad_norm": 0.4656117260456085,
      "learning_rate": 4.458267166568927e-06,
      "loss": 0.0193,
      "step": 1702060
    },
    {
      "epoch": 2.785491251153748,
      "grad_norm": 0.8612162470817566,
      "learning_rate": 4.45820127435541e-06,
      "loss": 0.0203,
      "step": 1702080
    },
    {
      "epoch": 2.7855239815924016,
      "grad_norm": 0.23383910953998566,
      "learning_rate": 4.458135382141893e-06,
      "loss": 0.0087,
      "step": 1702100
    },
    {
      "epoch": 2.7855567120310547,
      "grad_norm": 0.3386383056640625,
      "learning_rate": 4.4580694899283755e-06,
      "loss": 0.0167,
      "step": 1702120
    },
    {
      "epoch": 2.785589442469708,
      "grad_norm": 0.07970712333917618,
      "learning_rate": 4.458003597714858e-06,
      "loss": 0.0134,
      "step": 1702140
    },
    {
      "epoch": 2.7856221729083614,
      "grad_norm": 0.12734483182430267,
      "learning_rate": 4.457937705501341e-06,
      "loss": 0.0152,
      "step": 1702160
    },
    {
      "epoch": 2.7856549033470146,
      "grad_norm": 0.5408629179000854,
      "learning_rate": 4.457871813287824e-06,
      "loss": 0.0188,
      "step": 1702180
    },
    {
      "epoch": 2.7856876337856677,
      "grad_norm": 0.6889929175376892,
      "learning_rate": 4.4578059210743065e-06,
      "loss": 0.0157,
      "step": 1702200
    },
    {
      "epoch": 2.7857203642243213,
      "grad_norm": 0.16840027272701263,
      "learning_rate": 4.45774002886079e-06,
      "loss": 0.0139,
      "step": 1702220
    },
    {
      "epoch": 2.785753094662975,
      "grad_norm": 0.1699667125940323,
      "learning_rate": 4.457674136647273e-06,
      "loss": 0.0158,
      "step": 1702240
    },
    {
      "epoch": 2.785785825101628,
      "grad_norm": 0.41418495774269104,
      "learning_rate": 4.4576082444337556e-06,
      "loss": 0.014,
      "step": 1702260
    },
    {
      "epoch": 2.7858185555402812,
      "grad_norm": 0.3470005989074707,
      "learning_rate": 4.457542352220238e-06,
      "loss": 0.018,
      "step": 1702280
    },
    {
      "epoch": 2.785851285978935,
      "grad_norm": 0.21204374730587006,
      "learning_rate": 4.457476460006722e-06,
      "loss": 0.0135,
      "step": 1702300
    },
    {
      "epoch": 2.785884016417588,
      "grad_norm": 0.3428763747215271,
      "learning_rate": 4.457410567793205e-06,
      "loss": 0.0114,
      "step": 1702320
    },
    {
      "epoch": 2.785916746856241,
      "grad_norm": 0.272148996591568,
      "learning_rate": 4.457344675579687e-06,
      "loss": 0.0168,
      "step": 1702340
    },
    {
      "epoch": 2.7859494772948947,
      "grad_norm": 0.12197326123714447,
      "learning_rate": 4.45727878336617e-06,
      "loss": 0.0095,
      "step": 1702360
    },
    {
      "epoch": 2.7859822077335483,
      "grad_norm": 0.48585045337677,
      "learning_rate": 4.457212891152653e-06,
      "loss": 0.0127,
      "step": 1702380
    },
    {
      "epoch": 2.7860149381722015,
      "grad_norm": 0.5756152868270874,
      "learning_rate": 4.457146998939136e-06,
      "loss": 0.0166,
      "step": 1702400
    },
    {
      "epoch": 2.7860476686108546,
      "grad_norm": 0.2528233826160431,
      "learning_rate": 4.457081106725618e-06,
      "loss": 0.0092,
      "step": 1702420
    },
    {
      "epoch": 2.786080399049508,
      "grad_norm": 0.21943411231040955,
      "learning_rate": 4.457015214512102e-06,
      "loss": 0.0133,
      "step": 1702440
    },
    {
      "epoch": 2.7861131294881614,
      "grad_norm": 0.15868504345417023,
      "learning_rate": 4.456949322298585e-06,
      "loss": 0.0144,
      "step": 1702460
    },
    {
      "epoch": 2.7861458599268145,
      "grad_norm": 0.17708395421504974,
      "learning_rate": 4.456883430085067e-06,
      "loss": 0.0139,
      "step": 1702480
    },
    {
      "epoch": 2.786178590365468,
      "grad_norm": 0.2773963212966919,
      "learning_rate": 4.45681753787155e-06,
      "loss": 0.0134,
      "step": 1702500
    },
    {
      "epoch": 2.7862113208041213,
      "grad_norm": 0.17312419414520264,
      "learning_rate": 4.456751645658033e-06,
      "loss": 0.0105,
      "step": 1702520
    },
    {
      "epoch": 2.786244051242775,
      "grad_norm": 0.3527075946331024,
      "learning_rate": 4.456685753444516e-06,
      "loss": 0.0137,
      "step": 1702540
    },
    {
      "epoch": 2.786276781681428,
      "grad_norm": 0.1862584799528122,
      "learning_rate": 4.456619861230998e-06,
      "loss": 0.0138,
      "step": 1702560
    },
    {
      "epoch": 2.7863095121200816,
      "grad_norm": 0.29109594225883484,
      "learning_rate": 4.456553969017481e-06,
      "loss": 0.0109,
      "step": 1702580
    },
    {
      "epoch": 2.7863422425587347,
      "grad_norm": 0.1722862720489502,
      "learning_rate": 4.456488076803964e-06,
      "loss": 0.0153,
      "step": 1702600
    },
    {
      "epoch": 2.786374972997388,
      "grad_norm": 0.19936835765838623,
      "learning_rate": 4.456422184590447e-06,
      "loss": 0.0072,
      "step": 1702620
    },
    {
      "epoch": 2.7864077034360415,
      "grad_norm": 0.26465344429016113,
      "learning_rate": 4.45635629237693e-06,
      "loss": 0.0178,
      "step": 1702640
    },
    {
      "epoch": 2.7864404338746946,
      "grad_norm": 0.40042558312416077,
      "learning_rate": 4.456290400163413e-06,
      "loss": 0.0175,
      "step": 1702660
    },
    {
      "epoch": 2.7864731643133482,
      "grad_norm": 1.0156277418136597,
      "learning_rate": 4.4562245079498965e-06,
      "loss": 0.011,
      "step": 1702680
    },
    {
      "epoch": 2.7865058947520014,
      "grad_norm": 0.2990874648094177,
      "learning_rate": 4.456158615736379e-06,
      "loss": 0.0119,
      "step": 1702700
    },
    {
      "epoch": 2.786538625190655,
      "grad_norm": 0.1479220986366272,
      "learning_rate": 4.456092723522862e-06,
      "loss": 0.0171,
      "step": 1702720
    },
    {
      "epoch": 2.786571355629308,
      "grad_norm": 0.243901789188385,
      "learning_rate": 4.456026831309345e-06,
      "loss": 0.0139,
      "step": 1702740
    },
    {
      "epoch": 2.7866040860679613,
      "grad_norm": 0.28475135564804077,
      "learning_rate": 4.4559609390958274e-06,
      "loss": 0.0158,
      "step": 1702760
    },
    {
      "epoch": 2.786636816506615,
      "grad_norm": 0.28500694036483765,
      "learning_rate": 4.45589504688231e-06,
      "loss": 0.0205,
      "step": 1702780
    },
    {
      "epoch": 2.786669546945268,
      "grad_norm": 0.2375873327255249,
      "learning_rate": 4.455829154668793e-06,
      "loss": 0.0147,
      "step": 1702800
    },
    {
      "epoch": 2.7867022773839216,
      "grad_norm": 0.49090006947517395,
      "learning_rate": 4.455763262455276e-06,
      "loss": 0.013,
      "step": 1702820
    },
    {
      "epoch": 2.7867350078225748,
      "grad_norm": 0.6008480191230774,
      "learning_rate": 4.455697370241759e-06,
      "loss": 0.0118,
      "step": 1702840
    },
    {
      "epoch": 2.7867677382612284,
      "grad_norm": 0.15228146314620972,
      "learning_rate": 4.455631478028242e-06,
      "loss": 0.0084,
      "step": 1702860
    },
    {
      "epoch": 2.7868004686998815,
      "grad_norm": 0.4711242616176605,
      "learning_rate": 4.455565585814725e-06,
      "loss": 0.0106,
      "step": 1702880
    },
    {
      "epoch": 2.7868331991385347,
      "grad_norm": 0.11007942259311676,
      "learning_rate": 4.4554996936012075e-06,
      "loss": 0.0093,
      "step": 1702900
    },
    {
      "epoch": 2.7868659295771883,
      "grad_norm": 0.4079582095146179,
      "learning_rate": 4.45543380138769e-06,
      "loss": 0.0139,
      "step": 1702920
    },
    {
      "epoch": 2.7868986600158414,
      "grad_norm": 0.12224634736776352,
      "learning_rate": 4.455367909174173e-06,
      "loss": 0.0161,
      "step": 1702940
    },
    {
      "epoch": 2.786931390454495,
      "grad_norm": 0.14239084720611572,
      "learning_rate": 4.455302016960656e-06,
      "loss": 0.0089,
      "step": 1702960
    },
    {
      "epoch": 2.786964120893148,
      "grad_norm": 0.12356610596179962,
      "learning_rate": 4.4552361247471384e-06,
      "loss": 0.0139,
      "step": 1702980
    },
    {
      "epoch": 2.7869968513318018,
      "grad_norm": 0.27075639367103577,
      "learning_rate": 4.455170232533622e-06,
      "loss": 0.0117,
      "step": 1703000
    },
    {
      "epoch": 2.787029581770455,
      "grad_norm": 0.39822450280189514,
      "learning_rate": 4.455104340320105e-06,
      "loss": 0.0123,
      "step": 1703020
    },
    {
      "epoch": 2.787062312209108,
      "grad_norm": 0.47997140884399414,
      "learning_rate": 4.4550384481065875e-06,
      "loss": 0.0121,
      "step": 1703040
    },
    {
      "epoch": 2.7870950426477616,
      "grad_norm": 0.10261408239603043,
      "learning_rate": 4.45497255589307e-06,
      "loss": 0.0172,
      "step": 1703060
    },
    {
      "epoch": 2.787127773086415,
      "grad_norm": 0.24028947949409485,
      "learning_rate": 4.454906663679554e-06,
      "loss": 0.01,
      "step": 1703080
    },
    {
      "epoch": 2.7871605035250684,
      "grad_norm": 0.2382747232913971,
      "learning_rate": 4.4548407714660366e-06,
      "loss": 0.0186,
      "step": 1703100
    },
    {
      "epoch": 2.7871932339637215,
      "grad_norm": 0.37422695755958557,
      "learning_rate": 4.454774879252519e-06,
      "loss": 0.0078,
      "step": 1703120
    },
    {
      "epoch": 2.787225964402375,
      "grad_norm": 0.17969456315040588,
      "learning_rate": 4.454708987039002e-06,
      "loss": 0.0166,
      "step": 1703140
    },
    {
      "epoch": 2.7872586948410283,
      "grad_norm": 0.4540388286113739,
      "learning_rate": 4.454643094825485e-06,
      "loss": 0.0076,
      "step": 1703160
    },
    {
      "epoch": 2.7872914252796814,
      "grad_norm": 0.21546028554439545,
      "learning_rate": 4.4545772026119675e-06,
      "loss": 0.0177,
      "step": 1703180
    },
    {
      "epoch": 2.787324155718335,
      "grad_norm": 0.14988800883293152,
      "learning_rate": 4.45451131039845e-06,
      "loss": 0.0169,
      "step": 1703200
    },
    {
      "epoch": 2.787356886156988,
      "grad_norm": 0.5073782205581665,
      "learning_rate": 4.454445418184933e-06,
      "loss": 0.012,
      "step": 1703220
    },
    {
      "epoch": 2.7873896165956418,
      "grad_norm": 0.442911297082901,
      "learning_rate": 4.454379525971417e-06,
      "loss": 0.0106,
      "step": 1703240
    },
    {
      "epoch": 2.787422347034295,
      "grad_norm": 0.4064704179763794,
      "learning_rate": 4.454313633757899e-06,
      "loss": 0.0143,
      "step": 1703260
    },
    {
      "epoch": 2.7874550774729485,
      "grad_norm": 0.9714222550392151,
      "learning_rate": 4.454247741544382e-06,
      "loss": 0.0103,
      "step": 1703280
    },
    {
      "epoch": 2.7874878079116017,
      "grad_norm": 0.2536148726940155,
      "learning_rate": 4.454181849330865e-06,
      "loss": 0.013,
      "step": 1703300
    },
    {
      "epoch": 2.787520538350255,
      "grad_norm": 0.4933558702468872,
      "learning_rate": 4.4541159571173475e-06,
      "loss": 0.0134,
      "step": 1703320
    },
    {
      "epoch": 2.7875532687889084,
      "grad_norm": 0.14187359809875488,
      "learning_rate": 4.45405006490383e-06,
      "loss": 0.0176,
      "step": 1703340
    },
    {
      "epoch": 2.7875859992275616,
      "grad_norm": 0.23264944553375244,
      "learning_rate": 4.453984172690314e-06,
      "loss": 0.0224,
      "step": 1703360
    },
    {
      "epoch": 2.7876187296662147,
      "grad_norm": 0.26832669973373413,
      "learning_rate": 4.453918280476797e-06,
      "loss": 0.0155,
      "step": 1703380
    },
    {
      "epoch": 2.7876514601048683,
      "grad_norm": 0.5434706807136536,
      "learning_rate": 4.453852388263279e-06,
      "loss": 0.0158,
      "step": 1703400
    },
    {
      "epoch": 2.787684190543522,
      "grad_norm": 2.327592611312866,
      "learning_rate": 4.453786496049762e-06,
      "loss": 0.0152,
      "step": 1703420
    },
    {
      "epoch": 2.787716920982175,
      "grad_norm": 0.3746492266654968,
      "learning_rate": 4.453720603836245e-06,
      "loss": 0.014,
      "step": 1703440
    },
    {
      "epoch": 2.787749651420828,
      "grad_norm": 0.30894818902015686,
      "learning_rate": 4.453654711622728e-06,
      "loss": 0.0237,
      "step": 1703460
    },
    {
      "epoch": 2.787782381859482,
      "grad_norm": 0.12256994098424911,
      "learning_rate": 4.453588819409211e-06,
      "loss": 0.0109,
      "step": 1703480
    },
    {
      "epoch": 2.787815112298135,
      "grad_norm": 0.3446318805217743,
      "learning_rate": 4.453522927195694e-06,
      "loss": 0.0171,
      "step": 1703500
    },
    {
      "epoch": 2.787847842736788,
      "grad_norm": 0.06887686252593994,
      "learning_rate": 4.453457034982177e-06,
      "loss": 0.0126,
      "step": 1703520
    },
    {
      "epoch": 2.7878805731754417,
      "grad_norm": 0.17240402102470398,
      "learning_rate": 4.453391142768659e-06,
      "loss": 0.0136,
      "step": 1703540
    },
    {
      "epoch": 2.7879133036140953,
      "grad_norm": 0.18572765588760376,
      "learning_rate": 4.453325250555142e-06,
      "loss": 0.0153,
      "step": 1703560
    },
    {
      "epoch": 2.7879460340527484,
      "grad_norm": 0.885355532169342,
      "learning_rate": 4.453259358341625e-06,
      "loss": 0.0185,
      "step": 1703580
    },
    {
      "epoch": 2.7879787644914016,
      "grad_norm": 0.08471426367759705,
      "learning_rate": 4.453193466128108e-06,
      "loss": 0.0088,
      "step": 1703600
    },
    {
      "epoch": 2.788011494930055,
      "grad_norm": 0.07187530398368835,
      "learning_rate": 4.45312757391459e-06,
      "loss": 0.0112,
      "step": 1703620
    },
    {
      "epoch": 2.7880442253687083,
      "grad_norm": 0.3857177197933197,
      "learning_rate": 4.453061681701074e-06,
      "loss": 0.0149,
      "step": 1703640
    },
    {
      "epoch": 2.7880769558073615,
      "grad_norm": 0.3364115357398987,
      "learning_rate": 4.452995789487557e-06,
      "loss": 0.0104,
      "step": 1703660
    },
    {
      "epoch": 2.788109686246015,
      "grad_norm": 0.23413316905498505,
      "learning_rate": 4.452929897274039e-06,
      "loss": 0.0107,
      "step": 1703680
    },
    {
      "epoch": 2.7881424166846687,
      "grad_norm": 0.35042262077331543,
      "learning_rate": 4.452864005060523e-06,
      "loss": 0.0101,
      "step": 1703700
    },
    {
      "epoch": 2.788175147123322,
      "grad_norm": 0.44626951217651367,
      "learning_rate": 4.452798112847006e-06,
      "loss": 0.0119,
      "step": 1703720
    },
    {
      "epoch": 2.788207877561975,
      "grad_norm": 1.2376344203948975,
      "learning_rate": 4.4527322206334885e-06,
      "loss": 0.0192,
      "step": 1703740
    },
    {
      "epoch": 2.7882406080006286,
      "grad_norm": 0.2242252230644226,
      "learning_rate": 4.452666328419971e-06,
      "loss": 0.0122,
      "step": 1703760
    },
    {
      "epoch": 2.7882733384392817,
      "grad_norm": 0.4629540741443634,
      "learning_rate": 4.452600436206454e-06,
      "loss": 0.0145,
      "step": 1703780
    },
    {
      "epoch": 2.788306068877935,
      "grad_norm": 0.345302015542984,
      "learning_rate": 4.452534543992937e-06,
      "loss": 0.0138,
      "step": 1703800
    },
    {
      "epoch": 2.7883387993165885,
      "grad_norm": 0.686733603477478,
      "learning_rate": 4.4524686517794194e-06,
      "loss": 0.0157,
      "step": 1703820
    },
    {
      "epoch": 2.788371529755242,
      "grad_norm": 0.35889390110969543,
      "learning_rate": 4.452402759565902e-06,
      "loss": 0.0161,
      "step": 1703840
    },
    {
      "epoch": 2.788404260193895,
      "grad_norm": 0.3436516225337982,
      "learning_rate": 4.452336867352386e-06,
      "loss": 0.0177,
      "step": 1703860
    },
    {
      "epoch": 2.7884369906325484,
      "grad_norm": 0.45794305205345154,
      "learning_rate": 4.4522709751388685e-06,
      "loss": 0.0144,
      "step": 1703880
    },
    {
      "epoch": 2.788469721071202,
      "grad_norm": 0.48952794075012207,
      "learning_rate": 4.452205082925351e-06,
      "loss": 0.012,
      "step": 1703900
    },
    {
      "epoch": 2.788502451509855,
      "grad_norm": 0.8192431330680847,
      "learning_rate": 4.452139190711834e-06,
      "loss": 0.0155,
      "step": 1703920
    },
    {
      "epoch": 2.7885351819485082,
      "grad_norm": 0.10900723934173584,
      "learning_rate": 4.452073298498317e-06,
      "loss": 0.0098,
      "step": 1703940
    },
    {
      "epoch": 2.788567912387162,
      "grad_norm": 0.08056871592998505,
      "learning_rate": 4.4520074062847994e-06,
      "loss": 0.0117,
      "step": 1703960
    },
    {
      "epoch": 2.788600642825815,
      "grad_norm": 0.196224644780159,
      "learning_rate": 4.451941514071282e-06,
      "loss": 0.0142,
      "step": 1703980
    },
    {
      "epoch": 2.7886333732644686,
      "grad_norm": 0.21746979653835297,
      "learning_rate": 4.451875621857765e-06,
      "loss": 0.0121,
      "step": 1704000
    },
    {
      "epoch": 2.7886661037031217,
      "grad_norm": 0.8572277426719666,
      "learning_rate": 4.451809729644248e-06,
      "loss": 0.0092,
      "step": 1704020
    },
    {
      "epoch": 2.7886988341417753,
      "grad_norm": 0.15517877042293549,
      "learning_rate": 4.451743837430731e-06,
      "loss": 0.009,
      "step": 1704040
    },
    {
      "epoch": 2.7887315645804285,
      "grad_norm": 0.12397913634777069,
      "learning_rate": 4.451677945217214e-06,
      "loss": 0.0138,
      "step": 1704060
    },
    {
      "epoch": 2.7887642950190816,
      "grad_norm": 0.12483195960521698,
      "learning_rate": 4.451612053003697e-06,
      "loss": 0.02,
      "step": 1704080
    },
    {
      "epoch": 2.7887970254577352,
      "grad_norm": 1.3428819179534912,
      "learning_rate": 4.45154616079018e-06,
      "loss": 0.0157,
      "step": 1704100
    },
    {
      "epoch": 2.7888297558963884,
      "grad_norm": 0.23902073502540588,
      "learning_rate": 4.451480268576663e-06,
      "loss": 0.0193,
      "step": 1704120
    },
    {
      "epoch": 2.788862486335042,
      "grad_norm": 0.17171043157577515,
      "learning_rate": 4.451414376363146e-06,
      "loss": 0.0128,
      "step": 1704140
    },
    {
      "epoch": 2.788895216773695,
      "grad_norm": 0.3102887272834778,
      "learning_rate": 4.4513484841496285e-06,
      "loss": 0.0156,
      "step": 1704160
    },
    {
      "epoch": 2.7889279472123487,
      "grad_norm": 0.17567940056324005,
      "learning_rate": 4.451282591936111e-06,
      "loss": 0.0194,
      "step": 1704180
    },
    {
      "epoch": 2.788960677651002,
      "grad_norm": 0.3934677541255951,
      "learning_rate": 4.451216699722594e-06,
      "loss": 0.0217,
      "step": 1704200
    },
    {
      "epoch": 2.788993408089655,
      "grad_norm": 0.27853327989578247,
      "learning_rate": 4.451150807509077e-06,
      "loss": 0.0118,
      "step": 1704220
    },
    {
      "epoch": 2.7890261385283086,
      "grad_norm": 0.35048189759254456,
      "learning_rate": 4.4510849152955595e-06,
      "loss": 0.019,
      "step": 1704240
    },
    {
      "epoch": 2.7890588689669618,
      "grad_norm": 0.21217864751815796,
      "learning_rate": 4.451019023082043e-06,
      "loss": 0.0122,
      "step": 1704260
    },
    {
      "epoch": 2.7890915994056154,
      "grad_norm": 0.7128481268882751,
      "learning_rate": 4.450953130868526e-06,
      "loss": 0.0146,
      "step": 1704280
    },
    {
      "epoch": 2.7891243298442685,
      "grad_norm": 0.32666245102882385,
      "learning_rate": 4.4508872386550086e-06,
      "loss": 0.0105,
      "step": 1704300
    },
    {
      "epoch": 2.789157060282922,
      "grad_norm": 0.09018755704164505,
      "learning_rate": 4.450821346441491e-06,
      "loss": 0.0104,
      "step": 1704320
    },
    {
      "epoch": 2.7891897907215752,
      "grad_norm": 0.20967087149620056,
      "learning_rate": 4.450755454227974e-06,
      "loss": 0.0138,
      "step": 1704340
    },
    {
      "epoch": 2.7892225211602284,
      "grad_norm": 1.469864010810852,
      "learning_rate": 4.450689562014457e-06,
      "loss": 0.0149,
      "step": 1704360
    },
    {
      "epoch": 2.789255251598882,
      "grad_norm": 0.33942726254463196,
      "learning_rate": 4.4506236698009395e-06,
      "loss": 0.0088,
      "step": 1704380
    },
    {
      "epoch": 2.789287982037535,
      "grad_norm": 0.6375714540481567,
      "learning_rate": 4.450557777587423e-06,
      "loss": 0.0103,
      "step": 1704400
    },
    {
      "epoch": 2.7893207124761887,
      "grad_norm": 0.6765345335006714,
      "learning_rate": 4.450491885373906e-06,
      "loss": 0.0147,
      "step": 1704420
    },
    {
      "epoch": 2.789353442914842,
      "grad_norm": 0.11208084970712662,
      "learning_rate": 4.450425993160389e-06,
      "loss": 0.0177,
      "step": 1704440
    },
    {
      "epoch": 2.7893861733534955,
      "grad_norm": 0.2854901850223541,
      "learning_rate": 4.450360100946871e-06,
      "loss": 0.0189,
      "step": 1704460
    },
    {
      "epoch": 2.7894189037921486,
      "grad_norm": 0.15557458996772766,
      "learning_rate": 4.450294208733354e-06,
      "loss": 0.012,
      "step": 1704480
    },
    {
      "epoch": 2.789451634230802,
      "grad_norm": 0.6815929412841797,
      "learning_rate": 4.450228316519838e-06,
      "loss": 0.0102,
      "step": 1704500
    },
    {
      "epoch": 2.7894843646694554,
      "grad_norm": 0.1299135684967041,
      "learning_rate": 4.45016242430632e-06,
      "loss": 0.0117,
      "step": 1704520
    },
    {
      "epoch": 2.7895170951081085,
      "grad_norm": 0.10745800286531448,
      "learning_rate": 4.450096532092803e-06,
      "loss": 0.0125,
      "step": 1704540
    },
    {
      "epoch": 2.789549825546762,
      "grad_norm": 0.24191932380199432,
      "learning_rate": 4.450030639879286e-06,
      "loss": 0.0137,
      "step": 1704560
    },
    {
      "epoch": 2.7895825559854153,
      "grad_norm": 0.6070263981819153,
      "learning_rate": 4.449964747665769e-06,
      "loss": 0.014,
      "step": 1704580
    },
    {
      "epoch": 2.789615286424069,
      "grad_norm": 0.2762238085269928,
      "learning_rate": 4.449898855452251e-06,
      "loss": 0.0195,
      "step": 1704600
    },
    {
      "epoch": 2.789648016862722,
      "grad_norm": 0.6072761416435242,
      "learning_rate": 4.449832963238734e-06,
      "loss": 0.0153,
      "step": 1704620
    },
    {
      "epoch": 2.789680747301375,
      "grad_norm": 0.05668492615222931,
      "learning_rate": 4.449767071025217e-06,
      "loss": 0.0096,
      "step": 1704640
    },
    {
      "epoch": 2.7897134777400288,
      "grad_norm": 0.5095757842063904,
      "learning_rate": 4.4497011788117e-06,
      "loss": 0.0152,
      "step": 1704660
    },
    {
      "epoch": 2.789746208178682,
      "grad_norm": 0.12094461917877197,
      "learning_rate": 4.449635286598183e-06,
      "loss": 0.0107,
      "step": 1704680
    },
    {
      "epoch": 2.7897789386173355,
      "grad_norm": 0.41608622670173645,
      "learning_rate": 4.449569394384666e-06,
      "loss": 0.0138,
      "step": 1704700
    },
    {
      "epoch": 2.7898116690559887,
      "grad_norm": 0.3983820080757141,
      "learning_rate": 4.449503502171149e-06,
      "loss": 0.0166,
      "step": 1704720
    },
    {
      "epoch": 2.7898443994946422,
      "grad_norm": 0.3376733660697937,
      "learning_rate": 4.449437609957631e-06,
      "loss": 0.0174,
      "step": 1704740
    },
    {
      "epoch": 2.7898771299332954,
      "grad_norm": 0.30839842557907104,
      "learning_rate": 4.449371717744115e-06,
      "loss": 0.0078,
      "step": 1704760
    },
    {
      "epoch": 2.7899098603719485,
      "grad_norm": 0.34200814366340637,
      "learning_rate": 4.449305825530598e-06,
      "loss": 0.017,
      "step": 1704780
    },
    {
      "epoch": 2.789942590810602,
      "grad_norm": 1.149430274963379,
      "learning_rate": 4.4492399333170804e-06,
      "loss": 0.0154,
      "step": 1704800
    },
    {
      "epoch": 2.7899753212492553,
      "grad_norm": 0.24361151456832886,
      "learning_rate": 4.449174041103563e-06,
      "loss": 0.0149,
      "step": 1704820
    },
    {
      "epoch": 2.790008051687909,
      "grad_norm": 1.183982253074646,
      "learning_rate": 4.449108148890046e-06,
      "loss": 0.0108,
      "step": 1704840
    },
    {
      "epoch": 2.790040782126562,
      "grad_norm": 0.5008707642555237,
      "learning_rate": 4.449042256676529e-06,
      "loss": 0.0135,
      "step": 1704860
    },
    {
      "epoch": 2.7900735125652156,
      "grad_norm": 0.43173226714134216,
      "learning_rate": 4.448976364463012e-06,
      "loss": 0.0186,
      "step": 1704880
    },
    {
      "epoch": 2.790106243003869,
      "grad_norm": 0.08752976357936859,
      "learning_rate": 4.448910472249495e-06,
      "loss": 0.0133,
      "step": 1704900
    },
    {
      "epoch": 2.790138973442522,
      "grad_norm": 0.15762324631214142,
      "learning_rate": 4.448844580035978e-06,
      "loss": 0.0186,
      "step": 1704920
    },
    {
      "epoch": 2.7901717038811755,
      "grad_norm": 1.4176305532455444,
      "learning_rate": 4.4487786878224605e-06,
      "loss": 0.0146,
      "step": 1704940
    },
    {
      "epoch": 2.7902044343198287,
      "grad_norm": 0.8980138301849365,
      "learning_rate": 4.448712795608943e-06,
      "loss": 0.0124,
      "step": 1704960
    },
    {
      "epoch": 2.790237164758482,
      "grad_norm": 0.21402813494205475,
      "learning_rate": 4.448646903395426e-06,
      "loss": 0.0143,
      "step": 1704980
    },
    {
      "epoch": 2.7902698951971354,
      "grad_norm": 0.3282226324081421,
      "learning_rate": 4.448581011181909e-06,
      "loss": 0.0199,
      "step": 1705000
    },
    {
      "epoch": 2.790302625635789,
      "grad_norm": 0.12222161144018173,
      "learning_rate": 4.4485151189683914e-06,
      "loss": 0.0114,
      "step": 1705020
    },
    {
      "epoch": 2.790335356074442,
      "grad_norm": 0.18518105149269104,
      "learning_rate": 4.448449226754874e-06,
      "loss": 0.0109,
      "step": 1705040
    },
    {
      "epoch": 2.7903680865130953,
      "grad_norm": 0.5962966680526733,
      "learning_rate": 4.448383334541358e-06,
      "loss": 0.0151,
      "step": 1705060
    },
    {
      "epoch": 2.790400816951749,
      "grad_norm": 0.12462630122900009,
      "learning_rate": 4.4483174423278405e-06,
      "loss": 0.0181,
      "step": 1705080
    },
    {
      "epoch": 2.790433547390402,
      "grad_norm": 0.09277231991291046,
      "learning_rate": 4.448251550114323e-06,
      "loss": 0.0178,
      "step": 1705100
    },
    {
      "epoch": 2.790466277829055,
      "grad_norm": 0.5506734848022461,
      "learning_rate": 4.448185657900807e-06,
      "loss": 0.0125,
      "step": 1705120
    },
    {
      "epoch": 2.790499008267709,
      "grad_norm": 0.6914402842521667,
      "learning_rate": 4.4481197656872896e-06,
      "loss": 0.0115,
      "step": 1705140
    },
    {
      "epoch": 2.7905317387063624,
      "grad_norm": 0.39620476961135864,
      "learning_rate": 4.448053873473772e-06,
      "loss": 0.0167,
      "step": 1705160
    },
    {
      "epoch": 2.7905644691450155,
      "grad_norm": 0.6145407557487488,
      "learning_rate": 4.447987981260255e-06,
      "loss": 0.0144,
      "step": 1705180
    },
    {
      "epoch": 2.7905971995836687,
      "grad_norm": 0.6523615121841431,
      "learning_rate": 4.447922089046738e-06,
      "loss": 0.0111,
      "step": 1705200
    },
    {
      "epoch": 2.7906299300223223,
      "grad_norm": 0.34902068972587585,
      "learning_rate": 4.4478561968332205e-06,
      "loss": 0.0117,
      "step": 1705220
    },
    {
      "epoch": 2.7906626604609754,
      "grad_norm": 0.12612871825695038,
      "learning_rate": 4.447790304619703e-06,
      "loss": 0.0132,
      "step": 1705240
    },
    {
      "epoch": 2.7906953908996286,
      "grad_norm": 0.17712703347206116,
      "learning_rate": 4.447724412406186e-06,
      "loss": 0.0137,
      "step": 1705260
    },
    {
      "epoch": 2.790728121338282,
      "grad_norm": 0.2443060278892517,
      "learning_rate": 4.44765852019267e-06,
      "loss": 0.0172,
      "step": 1705280
    },
    {
      "epoch": 2.790760851776936,
      "grad_norm": 0.20470374822616577,
      "learning_rate": 4.447592627979152e-06,
      "loss": 0.0156,
      "step": 1705300
    },
    {
      "epoch": 2.790793582215589,
      "grad_norm": 0.27319157123565674,
      "learning_rate": 4.447526735765635e-06,
      "loss": 0.0118,
      "step": 1705320
    },
    {
      "epoch": 2.790826312654242,
      "grad_norm": 0.28068095445632935,
      "learning_rate": 4.447460843552118e-06,
      "loss": 0.0175,
      "step": 1705340
    },
    {
      "epoch": 2.7908590430928957,
      "grad_norm": 0.25757959485054016,
      "learning_rate": 4.4473949513386005e-06,
      "loss": 0.0152,
      "step": 1705360
    },
    {
      "epoch": 2.790891773531549,
      "grad_norm": 0.6143525242805481,
      "learning_rate": 4.447329059125083e-06,
      "loss": 0.0132,
      "step": 1705380
    },
    {
      "epoch": 2.790924503970202,
      "grad_norm": 0.33029669523239136,
      "learning_rate": 4.447263166911566e-06,
      "loss": 0.0128,
      "step": 1705400
    },
    {
      "epoch": 2.7909572344088556,
      "grad_norm": 0.5913986563682556,
      "learning_rate": 4.447197274698049e-06,
      "loss": 0.0165,
      "step": 1705420
    },
    {
      "epoch": 2.790989964847509,
      "grad_norm": 0.7647857666015625,
      "learning_rate": 4.4471313824845315e-06,
      "loss": 0.0147,
      "step": 1705440
    },
    {
      "epoch": 2.7910226952861623,
      "grad_norm": 0.2614801228046417,
      "learning_rate": 4.447065490271015e-06,
      "loss": 0.018,
      "step": 1705460
    },
    {
      "epoch": 2.7910554257248155,
      "grad_norm": 0.1143132820725441,
      "learning_rate": 4.446999598057498e-06,
      "loss": 0.0173,
      "step": 1705480
    },
    {
      "epoch": 2.791088156163469,
      "grad_norm": 0.5892049074172974,
      "learning_rate": 4.4469337058439806e-06,
      "loss": 0.0125,
      "step": 1705500
    },
    {
      "epoch": 2.791120886602122,
      "grad_norm": 0.24893437325954437,
      "learning_rate": 4.446867813630464e-06,
      "loss": 0.0095,
      "step": 1705520
    },
    {
      "epoch": 2.7911536170407754,
      "grad_norm": 0.23908326029777527,
      "learning_rate": 4.446801921416947e-06,
      "loss": 0.0099,
      "step": 1705540
    },
    {
      "epoch": 2.791186347479429,
      "grad_norm": 0.18991810083389282,
      "learning_rate": 4.44673602920343e-06,
      "loss": 0.0125,
      "step": 1705560
    },
    {
      "epoch": 2.791219077918082,
      "grad_norm": 0.17728747427463531,
      "learning_rate": 4.446670136989912e-06,
      "loss": 0.0219,
      "step": 1705580
    },
    {
      "epoch": 2.7912518083567357,
      "grad_norm": 0.19697047770023346,
      "learning_rate": 4.446604244776395e-06,
      "loss": 0.0121,
      "step": 1705600
    },
    {
      "epoch": 2.791284538795389,
      "grad_norm": 0.4619561433792114,
      "learning_rate": 4.446538352562878e-06,
      "loss": 0.0107,
      "step": 1705620
    },
    {
      "epoch": 2.7913172692340424,
      "grad_norm": 0.28943684697151184,
      "learning_rate": 4.446472460349361e-06,
      "loss": 0.0129,
      "step": 1705640
    },
    {
      "epoch": 2.7913499996726956,
      "grad_norm": 0.11791519820690155,
      "learning_rate": 4.446406568135843e-06,
      "loss": 0.0155,
      "step": 1705660
    },
    {
      "epoch": 2.7913827301113487,
      "grad_norm": 0.25801506638526917,
      "learning_rate": 4.446340675922327e-06,
      "loss": 0.0097,
      "step": 1705680
    },
    {
      "epoch": 2.7914154605500023,
      "grad_norm": 0.29272472858428955,
      "learning_rate": 4.44627478370881e-06,
      "loss": 0.0118,
      "step": 1705700
    },
    {
      "epoch": 2.7914481909886555,
      "grad_norm": 0.5625393390655518,
      "learning_rate": 4.446208891495292e-06,
      "loss": 0.0178,
      "step": 1705720
    },
    {
      "epoch": 2.791480921427309,
      "grad_norm": 0.21444399654865265,
      "learning_rate": 4.446142999281775e-06,
      "loss": 0.0147,
      "step": 1705740
    },
    {
      "epoch": 2.7915136518659622,
      "grad_norm": 0.11550602316856384,
      "learning_rate": 4.446077107068258e-06,
      "loss": 0.0081,
      "step": 1705760
    },
    {
      "epoch": 2.791546382304616,
      "grad_norm": 0.6069624423980713,
      "learning_rate": 4.446011214854741e-06,
      "loss": 0.0179,
      "step": 1705780
    },
    {
      "epoch": 2.791579112743269,
      "grad_norm": 0.7492401003837585,
      "learning_rate": 4.445945322641223e-06,
      "loss": 0.0139,
      "step": 1705800
    },
    {
      "epoch": 2.791611843181922,
      "grad_norm": 0.13048055768013,
      "learning_rate": 4.445879430427707e-06,
      "loss": 0.0095,
      "step": 1705820
    },
    {
      "epoch": 2.7916445736205757,
      "grad_norm": 0.4894034266471863,
      "learning_rate": 4.44581353821419e-06,
      "loss": 0.0141,
      "step": 1705840
    },
    {
      "epoch": 2.791677304059229,
      "grad_norm": 0.09774433076381683,
      "learning_rate": 4.4457476460006724e-06,
      "loss": 0.0115,
      "step": 1705860
    },
    {
      "epoch": 2.7917100344978825,
      "grad_norm": 1.0190483331680298,
      "learning_rate": 4.445681753787155e-06,
      "loss": 0.0223,
      "step": 1705880
    },
    {
      "epoch": 2.7917427649365356,
      "grad_norm": 0.21018637716770172,
      "learning_rate": 4.445615861573639e-06,
      "loss": 0.0088,
      "step": 1705900
    },
    {
      "epoch": 2.791775495375189,
      "grad_norm": 0.156626358628273,
      "learning_rate": 4.4455499693601215e-06,
      "loss": 0.0146,
      "step": 1705920
    },
    {
      "epoch": 2.7918082258138424,
      "grad_norm": 0.759050190448761,
      "learning_rate": 4.445484077146604e-06,
      "loss": 0.0138,
      "step": 1705940
    },
    {
      "epoch": 2.7918409562524955,
      "grad_norm": 1.017014503479004,
      "learning_rate": 4.445418184933087e-06,
      "loss": 0.0152,
      "step": 1705960
    },
    {
      "epoch": 2.791873686691149,
      "grad_norm": 0.05952487140893936,
      "learning_rate": 4.44535229271957e-06,
      "loss": 0.0116,
      "step": 1705980
    },
    {
      "epoch": 2.7919064171298023,
      "grad_norm": 0.28003188967704773,
      "learning_rate": 4.4452864005060525e-06,
      "loss": 0.0128,
      "step": 1706000
    },
    {
      "epoch": 2.791939147568456,
      "grad_norm": 0.30179694294929504,
      "learning_rate": 4.445220508292535e-06,
      "loss": 0.0155,
      "step": 1706020
    },
    {
      "epoch": 2.791971878007109,
      "grad_norm": 0.1842242181301117,
      "learning_rate": 4.445154616079018e-06,
      "loss": 0.013,
      "step": 1706040
    },
    {
      "epoch": 2.7920046084457626,
      "grad_norm": 0.5698469281196594,
      "learning_rate": 4.445088723865501e-06,
      "loss": 0.0199,
      "step": 1706060
    },
    {
      "epoch": 2.7920373388844157,
      "grad_norm": 0.3475431501865387,
      "learning_rate": 4.445022831651984e-06,
      "loss": 0.0153,
      "step": 1706080
    },
    {
      "epoch": 2.792070069323069,
      "grad_norm": 0.7606238126754761,
      "learning_rate": 4.444956939438467e-06,
      "loss": 0.0165,
      "step": 1706100
    },
    {
      "epoch": 2.7921027997617225,
      "grad_norm": 0.18642538785934448,
      "learning_rate": 4.44489104722495e-06,
      "loss": 0.0155,
      "step": 1706120
    },
    {
      "epoch": 2.7921355302003756,
      "grad_norm": 0.435442715883255,
      "learning_rate": 4.4448251550114325e-06,
      "loss": 0.0149,
      "step": 1706140
    },
    {
      "epoch": 2.7921682606390292,
      "grad_norm": 0.43181952834129333,
      "learning_rate": 4.444759262797916e-06,
      "loss": 0.0135,
      "step": 1706160
    },
    {
      "epoch": 2.7922009910776824,
      "grad_norm": 0.26666247844696045,
      "learning_rate": 4.444693370584399e-06,
      "loss": 0.0116,
      "step": 1706180
    },
    {
      "epoch": 2.792233721516336,
      "grad_norm": 0.27818936109542847,
      "learning_rate": 4.4446274783708815e-06,
      "loss": 0.0157,
      "step": 1706200
    },
    {
      "epoch": 2.792266451954989,
      "grad_norm": 0.21900175511837006,
      "learning_rate": 4.444561586157364e-06,
      "loss": 0.008,
      "step": 1706220
    },
    {
      "epoch": 2.7922991823936423,
      "grad_norm": 0.5095028281211853,
      "learning_rate": 4.444495693943847e-06,
      "loss": 0.0132,
      "step": 1706240
    },
    {
      "epoch": 2.792331912832296,
      "grad_norm": 0.6067028045654297,
      "learning_rate": 4.44442980173033e-06,
      "loss": 0.009,
      "step": 1706260
    },
    {
      "epoch": 2.792364643270949,
      "grad_norm": 0.3476199209690094,
      "learning_rate": 4.4443639095168125e-06,
      "loss": 0.0104,
      "step": 1706280
    },
    {
      "epoch": 2.7923973737096026,
      "grad_norm": 0.3479312062263489,
      "learning_rate": 4.444298017303296e-06,
      "loss": 0.0134,
      "step": 1706300
    },
    {
      "epoch": 2.7924301041482558,
      "grad_norm": 0.3434271812438965,
      "learning_rate": 4.444232125089779e-06,
      "loss": 0.0146,
      "step": 1706320
    },
    {
      "epoch": 2.7924628345869094,
      "grad_norm": 0.1960846483707428,
      "learning_rate": 4.4441662328762616e-06,
      "loss": 0.0201,
      "step": 1706340
    },
    {
      "epoch": 2.7924955650255625,
      "grad_norm": 0.13650932908058167,
      "learning_rate": 4.444100340662744e-06,
      "loss": 0.0133,
      "step": 1706360
    },
    {
      "epoch": 2.7925282954642157,
      "grad_norm": 0.3082851767539978,
      "learning_rate": 4.444034448449227e-06,
      "loss": 0.014,
      "step": 1706380
    },
    {
      "epoch": 2.7925610259028693,
      "grad_norm": 0.2691373825073242,
      "learning_rate": 4.44396855623571e-06,
      "loss": 0.014,
      "step": 1706400
    },
    {
      "epoch": 2.7925937563415224,
      "grad_norm": 0.34524667263031006,
      "learning_rate": 4.4439026640221925e-06,
      "loss": 0.011,
      "step": 1706420
    },
    {
      "epoch": 2.7926264867801756,
      "grad_norm": 0.2259785383939743,
      "learning_rate": 4.443836771808675e-06,
      "loss": 0.0151,
      "step": 1706440
    },
    {
      "epoch": 2.792659217218829,
      "grad_norm": 0.10057666152715683,
      "learning_rate": 4.443770879595158e-06,
      "loss": 0.0093,
      "step": 1706460
    },
    {
      "epoch": 2.7926919476574827,
      "grad_norm": 1.009082555770874,
      "learning_rate": 4.443704987381642e-06,
      "loss": 0.0194,
      "step": 1706480
    },
    {
      "epoch": 2.792724678096136,
      "grad_norm": 0.4072173833847046,
      "learning_rate": 4.443639095168124e-06,
      "loss": 0.0146,
      "step": 1706500
    },
    {
      "epoch": 2.792757408534789,
      "grad_norm": 0.29574787616729736,
      "learning_rate": 4.443573202954607e-06,
      "loss": 0.0144,
      "step": 1706520
    },
    {
      "epoch": 2.7927901389734426,
      "grad_norm": 0.2957753539085388,
      "learning_rate": 4.443507310741091e-06,
      "loss": 0.0178,
      "step": 1706540
    },
    {
      "epoch": 2.792822869412096,
      "grad_norm": 0.342323899269104,
      "learning_rate": 4.443441418527573e-06,
      "loss": 0.0124,
      "step": 1706560
    },
    {
      "epoch": 2.792855599850749,
      "grad_norm": 0.229283407330513,
      "learning_rate": 4.443375526314056e-06,
      "loss": 0.0117,
      "step": 1706580
    },
    {
      "epoch": 2.7928883302894025,
      "grad_norm": 0.39792630076408386,
      "learning_rate": 4.443309634100539e-06,
      "loss": 0.0128,
      "step": 1706600
    },
    {
      "epoch": 2.792921060728056,
      "grad_norm": 0.21912507712841034,
      "learning_rate": 4.443243741887022e-06,
      "loss": 0.0128,
      "step": 1706620
    },
    {
      "epoch": 2.7929537911667093,
      "grad_norm": 0.5828996300697327,
      "learning_rate": 4.443177849673504e-06,
      "loss": 0.0126,
      "step": 1706640
    },
    {
      "epoch": 2.7929865216053624,
      "grad_norm": 0.762291431427002,
      "learning_rate": 4.443111957459987e-06,
      "loss": 0.0165,
      "step": 1706660
    },
    {
      "epoch": 2.793019252044016,
      "grad_norm": 0.24961045384407043,
      "learning_rate": 4.44304606524647e-06,
      "loss": 0.0107,
      "step": 1706680
    },
    {
      "epoch": 2.793051982482669,
      "grad_norm": 0.1968822330236435,
      "learning_rate": 4.4429801730329534e-06,
      "loss": 0.0166,
      "step": 1706700
    },
    {
      "epoch": 2.7930847129213223,
      "grad_norm": 0.3611457645893097,
      "learning_rate": 4.442914280819436e-06,
      "loss": 0.0162,
      "step": 1706720
    },
    {
      "epoch": 2.793117443359976,
      "grad_norm": 0.3173406720161438,
      "learning_rate": 4.442848388605919e-06,
      "loss": 0.0137,
      "step": 1706740
    },
    {
      "epoch": 2.7931501737986295,
      "grad_norm": 0.43947264552116394,
      "learning_rate": 4.442782496392402e-06,
      "loss": 0.0134,
      "step": 1706760
    },
    {
      "epoch": 2.7931829042372827,
      "grad_norm": 0.12059587240219116,
      "learning_rate": 4.442716604178884e-06,
      "loss": 0.0133,
      "step": 1706780
    },
    {
      "epoch": 2.793215634675936,
      "grad_norm": 0.48479604721069336,
      "learning_rate": 4.442650711965367e-06,
      "loss": 0.0151,
      "step": 1706800
    },
    {
      "epoch": 2.7932483651145894,
      "grad_norm": 0.31884482502937317,
      "learning_rate": 4.44258481975185e-06,
      "loss": 0.0164,
      "step": 1706820
    },
    {
      "epoch": 2.7932810955532426,
      "grad_norm": 0.28112298250198364,
      "learning_rate": 4.442518927538333e-06,
      "loss": 0.0106,
      "step": 1706840
    },
    {
      "epoch": 2.7933138259918957,
      "grad_norm": 0.1479082703590393,
      "learning_rate": 4.442453035324815e-06,
      "loss": 0.0148,
      "step": 1706860
    },
    {
      "epoch": 2.7933465564305493,
      "grad_norm": 0.07018709927797318,
      "learning_rate": 4.442387143111299e-06,
      "loss": 0.0147,
      "step": 1706880
    },
    {
      "epoch": 2.793379286869203,
      "grad_norm": 0.25685960054397583,
      "learning_rate": 4.442321250897782e-06,
      "loss": 0.019,
      "step": 1706900
    },
    {
      "epoch": 2.793412017307856,
      "grad_norm": 0.053514961153268814,
      "learning_rate": 4.442255358684264e-06,
      "loss": 0.0122,
      "step": 1706920
    },
    {
      "epoch": 2.793444747746509,
      "grad_norm": 0.13411472737789154,
      "learning_rate": 4.442189466470748e-06,
      "loss": 0.0138,
      "step": 1706940
    },
    {
      "epoch": 2.793477478185163,
      "grad_norm": 0.6740633845329285,
      "learning_rate": 4.442123574257231e-06,
      "loss": 0.0196,
      "step": 1706960
    },
    {
      "epoch": 2.793510208623816,
      "grad_norm": 0.3422238230705261,
      "learning_rate": 4.4420576820437135e-06,
      "loss": 0.0122,
      "step": 1706980
    },
    {
      "epoch": 2.793542939062469,
      "grad_norm": 0.2544625699520111,
      "learning_rate": 4.441991789830196e-06,
      "loss": 0.0101,
      "step": 1707000
    },
    {
      "epoch": 2.7935756695011227,
      "grad_norm": 0.43678754568099976,
      "learning_rate": 4.441925897616679e-06,
      "loss": 0.0176,
      "step": 1707020
    },
    {
      "epoch": 2.793608399939776,
      "grad_norm": 0.40476807951927185,
      "learning_rate": 4.441860005403162e-06,
      "loss": 0.0131,
      "step": 1707040
    },
    {
      "epoch": 2.7936411303784294,
      "grad_norm": 0.6475504040718079,
      "learning_rate": 4.4417941131896444e-06,
      "loss": 0.0113,
      "step": 1707060
    },
    {
      "epoch": 2.7936738608170826,
      "grad_norm": 0.18297052383422852,
      "learning_rate": 4.441728220976127e-06,
      "loss": 0.0148,
      "step": 1707080
    },
    {
      "epoch": 2.793706591255736,
      "grad_norm": 0.4878673255443573,
      "learning_rate": 4.441662328762611e-06,
      "loss": 0.0109,
      "step": 1707100
    },
    {
      "epoch": 2.7937393216943893,
      "grad_norm": 0.38119083642959595,
      "learning_rate": 4.4415964365490935e-06,
      "loss": 0.0179,
      "step": 1707120
    },
    {
      "epoch": 2.7937720521330425,
      "grad_norm": 0.7160647511482239,
      "learning_rate": 4.441530544335576e-06,
      "loss": 0.0182,
      "step": 1707140
    },
    {
      "epoch": 2.793804782571696,
      "grad_norm": 0.0512460395693779,
      "learning_rate": 4.441464652122059e-06,
      "loss": 0.0099,
      "step": 1707160
    },
    {
      "epoch": 2.793837513010349,
      "grad_norm": 0.285172700881958,
      "learning_rate": 4.441398759908542e-06,
      "loss": 0.0132,
      "step": 1707180
    },
    {
      "epoch": 2.793870243449003,
      "grad_norm": 0.31228286027908325,
      "learning_rate": 4.4413328676950245e-06,
      "loss": 0.0119,
      "step": 1707200
    },
    {
      "epoch": 2.793902973887656,
      "grad_norm": 0.552929699420929,
      "learning_rate": 4.441266975481508e-06,
      "loss": 0.0168,
      "step": 1707220
    },
    {
      "epoch": 2.7939357043263096,
      "grad_norm": 0.411210298538208,
      "learning_rate": 4.441201083267991e-06,
      "loss": 0.0122,
      "step": 1707240
    },
    {
      "epoch": 2.7939684347649627,
      "grad_norm": 0.4415247142314911,
      "learning_rate": 4.4411351910544735e-06,
      "loss": 0.0106,
      "step": 1707260
    },
    {
      "epoch": 2.794001165203616,
      "grad_norm": 0.6066614389419556,
      "learning_rate": 4.441069298840956e-06,
      "loss": 0.0113,
      "step": 1707280
    },
    {
      "epoch": 2.7940338956422694,
      "grad_norm": 0.1849675178527832,
      "learning_rate": 4.441003406627439e-06,
      "loss": 0.0158,
      "step": 1707300
    },
    {
      "epoch": 2.7940666260809226,
      "grad_norm": 0.7550978064537048,
      "learning_rate": 4.440937514413923e-06,
      "loss": 0.0105,
      "step": 1707320
    },
    {
      "epoch": 2.794099356519576,
      "grad_norm": 0.38076546788215637,
      "learning_rate": 4.440871622200405e-06,
      "loss": 0.0093,
      "step": 1707340
    },
    {
      "epoch": 2.7941320869582293,
      "grad_norm": 0.5176230072975159,
      "learning_rate": 4.440805729986888e-06,
      "loss": 0.0174,
      "step": 1707360
    },
    {
      "epoch": 2.794164817396883,
      "grad_norm": 0.14196088910102844,
      "learning_rate": 4.440739837773371e-06,
      "loss": 0.0091,
      "step": 1707380
    },
    {
      "epoch": 2.794197547835536,
      "grad_norm": 0.18622155487537384,
      "learning_rate": 4.4406739455598536e-06,
      "loss": 0.0109,
      "step": 1707400
    },
    {
      "epoch": 2.7942302782741892,
      "grad_norm": 0.27337193489074707,
      "learning_rate": 4.440608053346336e-06,
      "loss": 0.0147,
      "step": 1707420
    },
    {
      "epoch": 2.794263008712843,
      "grad_norm": 0.3824911415576935,
      "learning_rate": 4.440542161132819e-06,
      "loss": 0.0129,
      "step": 1707440
    },
    {
      "epoch": 2.794295739151496,
      "grad_norm": 0.19449371099472046,
      "learning_rate": 4.440476268919302e-06,
      "loss": 0.0087,
      "step": 1707460
    },
    {
      "epoch": 2.7943284695901496,
      "grad_norm": 1.011250615119934,
      "learning_rate": 4.4404103767057845e-06,
      "loss": 0.0152,
      "step": 1707480
    },
    {
      "epoch": 2.7943612000288027,
      "grad_norm": 0.295405775308609,
      "learning_rate": 4.440344484492268e-06,
      "loss": 0.0167,
      "step": 1707500
    },
    {
      "epoch": 2.7943939304674563,
      "grad_norm": 0.3410479426383972,
      "learning_rate": 4.440278592278751e-06,
      "loss": 0.0126,
      "step": 1707520
    },
    {
      "epoch": 2.7944266609061095,
      "grad_norm": 0.902957022190094,
      "learning_rate": 4.440212700065234e-06,
      "loss": 0.0183,
      "step": 1707540
    },
    {
      "epoch": 2.7944593913447626,
      "grad_norm": 0.1956481635570526,
      "learning_rate": 4.440146807851716e-06,
      "loss": 0.0146,
      "step": 1707560
    },
    {
      "epoch": 2.794492121783416,
      "grad_norm": 0.16538044810295105,
      "learning_rate": 4.4400809156382e-06,
      "loss": 0.0109,
      "step": 1707580
    },
    {
      "epoch": 2.7945248522220694,
      "grad_norm": 0.5122836828231812,
      "learning_rate": 4.440015023424683e-06,
      "loss": 0.0147,
      "step": 1707600
    },
    {
      "epoch": 2.794557582660723,
      "grad_norm": 0.23191270232200623,
      "learning_rate": 4.439949131211165e-06,
      "loss": 0.0101,
      "step": 1707620
    },
    {
      "epoch": 2.794590313099376,
      "grad_norm": 0.3614342510700226,
      "learning_rate": 4.439883238997648e-06,
      "loss": 0.0154,
      "step": 1707640
    },
    {
      "epoch": 2.7946230435380297,
      "grad_norm": 0.2552739679813385,
      "learning_rate": 4.439817346784131e-06,
      "loss": 0.0135,
      "step": 1707660
    },
    {
      "epoch": 2.794655773976683,
      "grad_norm": 0.1131168082356453,
      "learning_rate": 4.439751454570614e-06,
      "loss": 0.0155,
      "step": 1707680
    },
    {
      "epoch": 2.794688504415336,
      "grad_norm": 1.0102721452713013,
      "learning_rate": 4.439685562357096e-06,
      "loss": 0.0156,
      "step": 1707700
    },
    {
      "epoch": 2.7947212348539896,
      "grad_norm": 0.22259333729743958,
      "learning_rate": 4.43961967014358e-06,
      "loss": 0.0166,
      "step": 1707720
    },
    {
      "epoch": 2.7947539652926428,
      "grad_norm": 0.671438992023468,
      "learning_rate": 4.439553777930063e-06,
      "loss": 0.0096,
      "step": 1707740
    },
    {
      "epoch": 2.7947866957312963,
      "grad_norm": 0.30494800209999084,
      "learning_rate": 4.439487885716545e-06,
      "loss": 0.013,
      "step": 1707760
    },
    {
      "epoch": 2.7948194261699495,
      "grad_norm": 0.09252146631479263,
      "learning_rate": 4.439421993503028e-06,
      "loss": 0.0185,
      "step": 1707780
    },
    {
      "epoch": 2.794852156608603,
      "grad_norm": 0.32829299569129944,
      "learning_rate": 4.439356101289511e-06,
      "loss": 0.0195,
      "step": 1707800
    },
    {
      "epoch": 2.7948848870472562,
      "grad_norm": 0.2783955931663513,
      "learning_rate": 4.439290209075994e-06,
      "loss": 0.0168,
      "step": 1707820
    },
    {
      "epoch": 2.7949176174859094,
      "grad_norm": 0.25035560131073,
      "learning_rate": 4.439224316862476e-06,
      "loss": 0.0095,
      "step": 1707840
    },
    {
      "epoch": 2.794950347924563,
      "grad_norm": 0.2508479654788971,
      "learning_rate": 4.439158424648959e-06,
      "loss": 0.0101,
      "step": 1707860
    },
    {
      "epoch": 2.794983078363216,
      "grad_norm": 0.32204174995422363,
      "learning_rate": 4.439092532435442e-06,
      "loss": 0.0116,
      "step": 1707880
    },
    {
      "epoch": 2.7950158088018697,
      "grad_norm": 0.7996256947517395,
      "learning_rate": 4.4390266402219254e-06,
      "loss": 0.0114,
      "step": 1707900
    },
    {
      "epoch": 2.795048539240523,
      "grad_norm": 0.2000480741262436,
      "learning_rate": 4.438960748008408e-06,
      "loss": 0.0109,
      "step": 1707920
    },
    {
      "epoch": 2.7950812696791765,
      "grad_norm": 0.20020271837711334,
      "learning_rate": 4.438894855794891e-06,
      "loss": 0.0095,
      "step": 1707940
    },
    {
      "epoch": 2.7951140001178296,
      "grad_norm": 0.22814594209194183,
      "learning_rate": 4.4388289635813745e-06,
      "loss": 0.0201,
      "step": 1707960
    },
    {
      "epoch": 2.7951467305564828,
      "grad_norm": 0.40402424335479736,
      "learning_rate": 4.438763071367857e-06,
      "loss": 0.0122,
      "step": 1707980
    },
    {
      "epoch": 2.7951794609951364,
      "grad_norm": 0.2159128040075302,
      "learning_rate": 4.43869717915434e-06,
      "loss": 0.0119,
      "step": 1708000
    },
    {
      "epoch": 2.7952121914337895,
      "grad_norm": 0.1432536244392395,
      "learning_rate": 4.438631286940823e-06,
      "loss": 0.0105,
      "step": 1708020
    },
    {
      "epoch": 2.7952449218724427,
      "grad_norm": 0.07673687487840652,
      "learning_rate": 4.4385653947273055e-06,
      "loss": 0.0139,
      "step": 1708040
    },
    {
      "epoch": 2.7952776523110963,
      "grad_norm": 0.25677216053009033,
      "learning_rate": 4.438499502513788e-06,
      "loss": 0.0189,
      "step": 1708060
    },
    {
      "epoch": 2.79531038274975,
      "grad_norm": 0.2216523438692093,
      "learning_rate": 4.438433610300271e-06,
      "loss": 0.0118,
      "step": 1708080
    },
    {
      "epoch": 2.795343113188403,
      "grad_norm": 0.3001379668712616,
      "learning_rate": 4.438367718086754e-06,
      "loss": 0.0093,
      "step": 1708100
    },
    {
      "epoch": 2.795375843627056,
      "grad_norm": 0.17840273678302765,
      "learning_rate": 4.438301825873237e-06,
      "loss": 0.0178,
      "step": 1708120
    },
    {
      "epoch": 2.7954085740657098,
      "grad_norm": 0.7082235217094421,
      "learning_rate": 4.43823593365972e-06,
      "loss": 0.0124,
      "step": 1708140
    },
    {
      "epoch": 2.795441304504363,
      "grad_norm": 0.46053826808929443,
      "learning_rate": 4.438170041446203e-06,
      "loss": 0.0144,
      "step": 1708160
    },
    {
      "epoch": 2.795474034943016,
      "grad_norm": 0.09556036442518234,
      "learning_rate": 4.4381041492326855e-06,
      "loss": 0.0102,
      "step": 1708180
    },
    {
      "epoch": 2.7955067653816696,
      "grad_norm": 0.37665900588035583,
      "learning_rate": 4.438038257019168e-06,
      "loss": 0.0102,
      "step": 1708200
    },
    {
      "epoch": 2.7955394958203232,
      "grad_norm": 0.26523247361183167,
      "learning_rate": 4.437972364805651e-06,
      "loss": 0.0053,
      "step": 1708220
    },
    {
      "epoch": 2.7955722262589764,
      "grad_norm": 0.39319270849227905,
      "learning_rate": 4.437906472592134e-06,
      "loss": 0.0152,
      "step": 1708240
    },
    {
      "epoch": 2.7956049566976295,
      "grad_norm": 0.6491108536720276,
      "learning_rate": 4.4378405803786164e-06,
      "loss": 0.0133,
      "step": 1708260
    },
    {
      "epoch": 2.795637687136283,
      "grad_norm": 0.31137973070144653,
      "learning_rate": 4.4377746881651e-06,
      "loss": 0.0185,
      "step": 1708280
    },
    {
      "epoch": 2.7956704175749363,
      "grad_norm": 0.30175331234931946,
      "learning_rate": 4.437708795951583e-06,
      "loss": 0.02,
      "step": 1708300
    },
    {
      "epoch": 2.7957031480135894,
      "grad_norm": 0.18022088706493378,
      "learning_rate": 4.4376429037380655e-06,
      "loss": 0.0154,
      "step": 1708320
    },
    {
      "epoch": 2.795735878452243,
      "grad_norm": 0.44204187393188477,
      "learning_rate": 4.437577011524548e-06,
      "loss": 0.009,
      "step": 1708340
    },
    {
      "epoch": 2.7957686088908966,
      "grad_norm": 0.10483124107122421,
      "learning_rate": 4.437511119311032e-06,
      "loss": 0.012,
      "step": 1708360
    },
    {
      "epoch": 2.7958013393295498,
      "grad_norm": 0.28811413049697876,
      "learning_rate": 4.437445227097515e-06,
      "loss": 0.0097,
      "step": 1708380
    },
    {
      "epoch": 2.795834069768203,
      "grad_norm": 0.30476605892181396,
      "learning_rate": 4.437379334883997e-06,
      "loss": 0.0216,
      "step": 1708400
    },
    {
      "epoch": 2.7958668002068565,
      "grad_norm": 0.5913434624671936,
      "learning_rate": 4.43731344267048e-06,
      "loss": 0.0148,
      "step": 1708420
    },
    {
      "epoch": 2.7958995306455097,
      "grad_norm": 0.07927414029836655,
      "learning_rate": 4.437247550456963e-06,
      "loss": 0.0102,
      "step": 1708440
    },
    {
      "epoch": 2.795932261084163,
      "grad_norm": 0.30283015966415405,
      "learning_rate": 4.4371816582434455e-06,
      "loss": 0.018,
      "step": 1708460
    },
    {
      "epoch": 2.7959649915228164,
      "grad_norm": 0.22383549809455872,
      "learning_rate": 4.437115766029928e-06,
      "loss": 0.0115,
      "step": 1708480
    },
    {
      "epoch": 2.7959977219614696,
      "grad_norm": 0.43470415472984314,
      "learning_rate": 4.437049873816411e-06,
      "loss": 0.0137,
      "step": 1708500
    },
    {
      "epoch": 2.796030452400123,
      "grad_norm": 0.1524638682603836,
      "learning_rate": 4.436983981602895e-06,
      "loss": 0.0138,
      "step": 1708520
    },
    {
      "epoch": 2.7960631828387763,
      "grad_norm": 0.16795840859413147,
      "learning_rate": 4.436918089389377e-06,
      "loss": 0.0184,
      "step": 1708540
    },
    {
      "epoch": 2.79609591327743,
      "grad_norm": 0.16117924451828003,
      "learning_rate": 4.43685219717586e-06,
      "loss": 0.0101,
      "step": 1708560
    },
    {
      "epoch": 2.796128643716083,
      "grad_norm": 0.1963677555322647,
      "learning_rate": 4.436786304962343e-06,
      "loss": 0.0122,
      "step": 1708580
    },
    {
      "epoch": 2.796161374154736,
      "grad_norm": 0.6086069345474243,
      "learning_rate": 4.4367204127488256e-06,
      "loss": 0.0145,
      "step": 1708600
    },
    {
      "epoch": 2.79619410459339,
      "grad_norm": 0.13759450614452362,
      "learning_rate": 4.436654520535308e-06,
      "loss": 0.0149,
      "step": 1708620
    },
    {
      "epoch": 2.796226835032043,
      "grad_norm": 0.6905004382133484,
      "learning_rate": 4.436588628321792e-06,
      "loss": 0.0168,
      "step": 1708640
    },
    {
      "epoch": 2.7962595654706965,
      "grad_norm": 0.17877346277236938,
      "learning_rate": 4.436522736108275e-06,
      "loss": 0.0118,
      "step": 1708660
    },
    {
      "epoch": 2.7962922959093497,
      "grad_norm": 0.16594091057777405,
      "learning_rate": 4.436456843894757e-06,
      "loss": 0.0161,
      "step": 1708680
    },
    {
      "epoch": 2.7963250263480033,
      "grad_norm": 0.16548550128936768,
      "learning_rate": 4.43639095168124e-06,
      "loss": 0.0093,
      "step": 1708700
    },
    {
      "epoch": 2.7963577567866564,
      "grad_norm": 0.1493992805480957,
      "learning_rate": 4.436325059467723e-06,
      "loss": 0.0135,
      "step": 1708720
    },
    {
      "epoch": 2.7963904872253096,
      "grad_norm": 0.4222782254219055,
      "learning_rate": 4.4362591672542064e-06,
      "loss": 0.0135,
      "step": 1708740
    },
    {
      "epoch": 2.796423217663963,
      "grad_norm": 0.42901870608329773,
      "learning_rate": 4.436193275040689e-06,
      "loss": 0.0146,
      "step": 1708760
    },
    {
      "epoch": 2.7964559481026163,
      "grad_norm": 0.5376349687576294,
      "learning_rate": 4.436127382827172e-06,
      "loss": 0.0144,
      "step": 1708780
    },
    {
      "epoch": 2.79648867854127,
      "grad_norm": 0.3129834532737732,
      "learning_rate": 4.436061490613655e-06,
      "loss": 0.0205,
      "step": 1708800
    },
    {
      "epoch": 2.796521408979923,
      "grad_norm": 0.404246985912323,
      "learning_rate": 4.435995598400137e-06,
      "loss": 0.0113,
      "step": 1708820
    },
    {
      "epoch": 2.7965541394185767,
      "grad_norm": 0.7843327522277832,
      "learning_rate": 4.43592970618662e-06,
      "loss": 0.018,
      "step": 1708840
    },
    {
      "epoch": 2.79658686985723,
      "grad_norm": 0.4099092483520508,
      "learning_rate": 4.435863813973103e-06,
      "loss": 0.013,
      "step": 1708860
    },
    {
      "epoch": 2.796619600295883,
      "grad_norm": 0.12864583730697632,
      "learning_rate": 4.435797921759586e-06,
      "loss": 0.017,
      "step": 1708880
    },
    {
      "epoch": 2.7966523307345366,
      "grad_norm": 0.355497270822525,
      "learning_rate": 4.435732029546068e-06,
      "loss": 0.0199,
      "step": 1708900
    },
    {
      "epoch": 2.7966850611731897,
      "grad_norm": 0.37860947847366333,
      "learning_rate": 4.435666137332552e-06,
      "loss": 0.0091,
      "step": 1708920
    },
    {
      "epoch": 2.7967177916118433,
      "grad_norm": 0.09660635888576508,
      "learning_rate": 4.435600245119035e-06,
      "loss": 0.0174,
      "step": 1708940
    },
    {
      "epoch": 2.7967505220504965,
      "grad_norm": 0.24827693402767181,
      "learning_rate": 4.435534352905517e-06,
      "loss": 0.0183,
      "step": 1708960
    },
    {
      "epoch": 2.79678325248915,
      "grad_norm": 0.79543137550354,
      "learning_rate": 4.435468460692001e-06,
      "loss": 0.0176,
      "step": 1708980
    },
    {
      "epoch": 2.796815982927803,
      "grad_norm": 0.09231442958116531,
      "learning_rate": 4.435402568478484e-06,
      "loss": 0.0132,
      "step": 1709000
    },
    {
      "epoch": 2.7968487133664564,
      "grad_norm": 0.6794975996017456,
      "learning_rate": 4.4353366762649665e-06,
      "loss": 0.0219,
      "step": 1709020
    },
    {
      "epoch": 2.79688144380511,
      "grad_norm": 0.3082832396030426,
      "learning_rate": 4.435270784051449e-06,
      "loss": 0.0191,
      "step": 1709040
    },
    {
      "epoch": 2.796914174243763,
      "grad_norm": 0.45089253783226013,
      "learning_rate": 4.435204891837932e-06,
      "loss": 0.0135,
      "step": 1709060
    },
    {
      "epoch": 2.7969469046824167,
      "grad_norm": 0.6537244915962219,
      "learning_rate": 4.435138999624415e-06,
      "loss": 0.0167,
      "step": 1709080
    },
    {
      "epoch": 2.79697963512107,
      "grad_norm": 0.23334893584251404,
      "learning_rate": 4.4350731074108974e-06,
      "loss": 0.0107,
      "step": 1709100
    },
    {
      "epoch": 2.7970123655597234,
      "grad_norm": 0.14909319579601288,
      "learning_rate": 4.43500721519738e-06,
      "loss": 0.0185,
      "step": 1709120
    },
    {
      "epoch": 2.7970450959983766,
      "grad_norm": 0.16210712492465973,
      "learning_rate": 4.434941322983864e-06,
      "loss": 0.0211,
      "step": 1709140
    },
    {
      "epoch": 2.7970778264370297,
      "grad_norm": 0.20242024958133698,
      "learning_rate": 4.4348754307703465e-06,
      "loss": 0.01,
      "step": 1709160
    },
    {
      "epoch": 2.7971105568756833,
      "grad_norm": 0.3487327992916107,
      "learning_rate": 4.434809538556829e-06,
      "loss": 0.0133,
      "step": 1709180
    },
    {
      "epoch": 2.7971432873143365,
      "grad_norm": 0.3349546492099762,
      "learning_rate": 4.434743646343312e-06,
      "loss": 0.0099,
      "step": 1709200
    },
    {
      "epoch": 2.79717601775299,
      "grad_norm": 0.2712300717830658,
      "learning_rate": 4.434677754129795e-06,
      "loss": 0.0127,
      "step": 1709220
    },
    {
      "epoch": 2.7972087481916432,
      "grad_norm": 0.081159807741642,
      "learning_rate": 4.4346118619162775e-06,
      "loss": 0.0119,
      "step": 1709240
    },
    {
      "epoch": 2.797241478630297,
      "grad_norm": 0.3060508668422699,
      "learning_rate": 4.43454596970276e-06,
      "loss": 0.0176,
      "step": 1709260
    },
    {
      "epoch": 2.79727420906895,
      "grad_norm": 0.37955886125564575,
      "learning_rate": 4.434480077489243e-06,
      "loss": 0.0148,
      "step": 1709280
    },
    {
      "epoch": 2.797306939507603,
      "grad_norm": 0.43485027551651,
      "learning_rate": 4.434414185275726e-06,
      "loss": 0.0154,
      "step": 1709300
    },
    {
      "epoch": 2.7973396699462567,
      "grad_norm": 0.21176032721996307,
      "learning_rate": 4.434348293062209e-06,
      "loss": 0.0141,
      "step": 1709320
    },
    {
      "epoch": 2.79737240038491,
      "grad_norm": 0.2673916816711426,
      "learning_rate": 4.434282400848692e-06,
      "loss": 0.0163,
      "step": 1709340
    },
    {
      "epoch": 2.7974051308235635,
      "grad_norm": 0.918246865272522,
      "learning_rate": 4.434216508635175e-06,
      "loss": 0.0115,
      "step": 1709360
    },
    {
      "epoch": 2.7974378612622166,
      "grad_norm": 0.6142465472221375,
      "learning_rate": 4.434150616421658e-06,
      "loss": 0.0097,
      "step": 1709380
    },
    {
      "epoch": 2.79747059170087,
      "grad_norm": 0.07692550867795944,
      "learning_rate": 4.434084724208141e-06,
      "loss": 0.0136,
      "step": 1709400
    },
    {
      "epoch": 2.7975033221395234,
      "grad_norm": 0.09682192653417587,
      "learning_rate": 4.434018831994624e-06,
      "loss": 0.0135,
      "step": 1709420
    },
    {
      "epoch": 2.7975360525781765,
      "grad_norm": 0.44782590866088867,
      "learning_rate": 4.4339529397811066e-06,
      "loss": 0.012,
      "step": 1709440
    },
    {
      "epoch": 2.79756878301683,
      "grad_norm": 0.4547173082828522,
      "learning_rate": 4.433887047567589e-06,
      "loss": 0.0088,
      "step": 1709460
    },
    {
      "epoch": 2.7976015134554832,
      "grad_norm": 0.46024978160858154,
      "learning_rate": 4.433821155354072e-06,
      "loss": 0.0171,
      "step": 1709480
    },
    {
      "epoch": 2.7976342438941364,
      "grad_norm": 0.2627093195915222,
      "learning_rate": 4.433755263140555e-06,
      "loss": 0.02,
      "step": 1709500
    },
    {
      "epoch": 2.79766697433279,
      "grad_norm": 0.4227600395679474,
      "learning_rate": 4.4336893709270375e-06,
      "loss": 0.0142,
      "step": 1709520
    },
    {
      "epoch": 2.7976997047714436,
      "grad_norm": 0.310678094625473,
      "learning_rate": 4.433623478713521e-06,
      "loss": 0.0155,
      "step": 1709540
    },
    {
      "epoch": 2.7977324352100967,
      "grad_norm": 0.9510853290557861,
      "learning_rate": 4.433557586500004e-06,
      "loss": 0.0158,
      "step": 1709560
    },
    {
      "epoch": 2.79776516564875,
      "grad_norm": 0.22097758948802948,
      "learning_rate": 4.433491694286487e-06,
      "loss": 0.0126,
      "step": 1709580
    },
    {
      "epoch": 2.7977978960874035,
      "grad_norm": 0.3183043301105499,
      "learning_rate": 4.433425802072969e-06,
      "loss": 0.0121,
      "step": 1709600
    },
    {
      "epoch": 2.7978306265260566,
      "grad_norm": 0.5315079092979431,
      "learning_rate": 4.433359909859452e-06,
      "loss": 0.0137,
      "step": 1709620
    },
    {
      "epoch": 2.79786335696471,
      "grad_norm": 0.2768875062465668,
      "learning_rate": 4.433294017645935e-06,
      "loss": 0.015,
      "step": 1709640
    },
    {
      "epoch": 2.7978960874033634,
      "grad_norm": 0.34729042649269104,
      "learning_rate": 4.4332281254324175e-06,
      "loss": 0.0126,
      "step": 1709660
    },
    {
      "epoch": 2.797928817842017,
      "grad_norm": 0.2007325142621994,
      "learning_rate": 4.433162233218901e-06,
      "loss": 0.0118,
      "step": 1709680
    },
    {
      "epoch": 2.79796154828067,
      "grad_norm": 0.13750527799129486,
      "learning_rate": 4.433096341005384e-06,
      "loss": 0.0117,
      "step": 1709700
    },
    {
      "epoch": 2.7979942787193233,
      "grad_norm": 0.23584307730197906,
      "learning_rate": 4.433030448791867e-06,
      "loss": 0.0123,
      "step": 1709720
    },
    {
      "epoch": 2.798027009157977,
      "grad_norm": 1.0373640060424805,
      "learning_rate": 4.432964556578349e-06,
      "loss": 0.0155,
      "step": 1709740
    },
    {
      "epoch": 2.79805973959663,
      "grad_norm": 0.24167780578136444,
      "learning_rate": 4.432898664364832e-06,
      "loss": 0.0144,
      "step": 1709760
    },
    {
      "epoch": 2.798092470035283,
      "grad_norm": 0.9494938850402832,
      "learning_rate": 4.432832772151316e-06,
      "loss": 0.0136,
      "step": 1709780
    },
    {
      "epoch": 2.7981252004739368,
      "grad_norm": 0.22516849637031555,
      "learning_rate": 4.432766879937798e-06,
      "loss": 0.0144,
      "step": 1709800
    },
    {
      "epoch": 2.7981579309125904,
      "grad_norm": 0.4770149290561676,
      "learning_rate": 4.432700987724281e-06,
      "loss": 0.0114,
      "step": 1709820
    },
    {
      "epoch": 2.7981906613512435,
      "grad_norm": 0.1255652904510498,
      "learning_rate": 4.432635095510764e-06,
      "loss": 0.0145,
      "step": 1709840
    },
    {
      "epoch": 2.7982233917898967,
      "grad_norm": 0.5367595553398132,
      "learning_rate": 4.432569203297247e-06,
      "loss": 0.011,
      "step": 1709860
    },
    {
      "epoch": 2.7982561222285502,
      "grad_norm": 0.5644578337669373,
      "learning_rate": 4.432503311083729e-06,
      "loss": 0.0178,
      "step": 1709880
    },
    {
      "epoch": 2.7982888526672034,
      "grad_norm": 0.13532330095767975,
      "learning_rate": 4.432437418870212e-06,
      "loss": 0.0106,
      "step": 1709900
    },
    {
      "epoch": 2.7983215831058565,
      "grad_norm": 0.6805073618888855,
      "learning_rate": 4.432371526656695e-06,
      "loss": 0.0082,
      "step": 1709920
    },
    {
      "epoch": 2.79835431354451,
      "grad_norm": 0.16713201999664307,
      "learning_rate": 4.4323056344431784e-06,
      "loss": 0.0122,
      "step": 1709940
    },
    {
      "epoch": 2.7983870439831637,
      "grad_norm": 0.25681522488594055,
      "learning_rate": 4.432239742229661e-06,
      "loss": 0.0119,
      "step": 1709960
    },
    {
      "epoch": 2.798419774421817,
      "grad_norm": 0.45733681321144104,
      "learning_rate": 4.432173850016144e-06,
      "loss": 0.0127,
      "step": 1709980
    },
    {
      "epoch": 2.79845250486047,
      "grad_norm": 0.34807294607162476,
      "learning_rate": 4.432107957802627e-06,
      "loss": 0.0146,
      "step": 1710000
    },
    {
      "epoch": 2.7984852352991236,
      "grad_norm": 0.20469127595424652,
      "learning_rate": 4.432042065589109e-06,
      "loss": 0.0162,
      "step": 1710020
    },
    {
      "epoch": 2.798517965737777,
      "grad_norm": 0.12507584691047668,
      "learning_rate": 4.431976173375593e-06,
      "loss": 0.0145,
      "step": 1710040
    },
    {
      "epoch": 2.79855069617643,
      "grad_norm": 0.0812937542796135,
      "learning_rate": 4.431910281162076e-06,
      "loss": 0.0179,
      "step": 1710060
    },
    {
      "epoch": 2.7985834266150835,
      "grad_norm": 0.2635051906108856,
      "learning_rate": 4.4318443889485585e-06,
      "loss": 0.018,
      "step": 1710080
    },
    {
      "epoch": 2.7986161570537367,
      "grad_norm": 0.5321335792541504,
      "learning_rate": 4.431778496735041e-06,
      "loss": 0.0103,
      "step": 1710100
    },
    {
      "epoch": 2.7986488874923903,
      "grad_norm": 0.2816760241985321,
      "learning_rate": 4.431712604521524e-06,
      "loss": 0.0165,
      "step": 1710120
    },
    {
      "epoch": 2.7986816179310434,
      "grad_norm": 0.19509463012218475,
      "learning_rate": 4.431646712308007e-06,
      "loss": 0.0161,
      "step": 1710140
    },
    {
      "epoch": 2.798714348369697,
      "grad_norm": 0.7178540825843811,
      "learning_rate": 4.43158082009449e-06,
      "loss": 0.0164,
      "step": 1710160
    },
    {
      "epoch": 2.79874707880835,
      "grad_norm": 0.11159707605838776,
      "learning_rate": 4.431514927880973e-06,
      "loss": 0.0105,
      "step": 1710180
    },
    {
      "epoch": 2.7987798092470033,
      "grad_norm": 0.3236144781112671,
      "learning_rate": 4.431449035667456e-06,
      "loss": 0.0095,
      "step": 1710200
    },
    {
      "epoch": 2.798812539685657,
      "grad_norm": 0.4641682803630829,
      "learning_rate": 4.4313831434539385e-06,
      "loss": 0.0139,
      "step": 1710220
    },
    {
      "epoch": 2.79884527012431,
      "grad_norm": 0.5244763493537903,
      "learning_rate": 4.431317251240421e-06,
      "loss": 0.0153,
      "step": 1710240
    },
    {
      "epoch": 2.7988780005629637,
      "grad_norm": 0.3142735958099365,
      "learning_rate": 4.431251359026904e-06,
      "loss": 0.0147,
      "step": 1710260
    },
    {
      "epoch": 2.798910731001617,
      "grad_norm": 0.2540922462940216,
      "learning_rate": 4.431185466813387e-06,
      "loss": 0.0149,
      "step": 1710280
    },
    {
      "epoch": 2.7989434614402704,
      "grad_norm": 0.12959320843219757,
      "learning_rate": 4.4311195745998695e-06,
      "loss": 0.0116,
      "step": 1710300
    },
    {
      "epoch": 2.7989761918789235,
      "grad_norm": 0.0725666880607605,
      "learning_rate": 4.431053682386352e-06,
      "loss": 0.0158,
      "step": 1710320
    },
    {
      "epoch": 2.7990089223175767,
      "grad_norm": 0.4322425425052643,
      "learning_rate": 4.430987790172836e-06,
      "loss": 0.0153,
      "step": 1710340
    },
    {
      "epoch": 2.7990416527562303,
      "grad_norm": 0.8630720376968384,
      "learning_rate": 4.4309218979593185e-06,
      "loss": 0.0138,
      "step": 1710360
    },
    {
      "epoch": 2.7990743831948834,
      "grad_norm": 0.16868126392364502,
      "learning_rate": 4.430856005745801e-06,
      "loss": 0.0127,
      "step": 1710380
    },
    {
      "epoch": 2.799107113633537,
      "grad_norm": 0.352071613073349,
      "learning_rate": 4.430790113532285e-06,
      "loss": 0.0071,
      "step": 1710400
    },
    {
      "epoch": 2.79913984407219,
      "grad_norm": 0.5265167355537415,
      "learning_rate": 4.430724221318768e-06,
      "loss": 0.0136,
      "step": 1710420
    },
    {
      "epoch": 2.799172574510844,
      "grad_norm": 0.3087719678878784,
      "learning_rate": 4.43065832910525e-06,
      "loss": 0.019,
      "step": 1710440
    },
    {
      "epoch": 2.799205304949497,
      "grad_norm": 0.4821210205554962,
      "learning_rate": 4.430592436891733e-06,
      "loss": 0.013,
      "step": 1710460
    },
    {
      "epoch": 2.79923803538815,
      "grad_norm": 0.10553009808063507,
      "learning_rate": 4.430526544678216e-06,
      "loss": 0.0094,
      "step": 1710480
    },
    {
      "epoch": 2.7992707658268037,
      "grad_norm": 0.10903946310281754,
      "learning_rate": 4.4304606524646985e-06,
      "loss": 0.0161,
      "step": 1710500
    },
    {
      "epoch": 2.799303496265457,
      "grad_norm": 0.2727409899234772,
      "learning_rate": 4.430394760251181e-06,
      "loss": 0.0082,
      "step": 1710520
    },
    {
      "epoch": 2.7993362267041104,
      "grad_norm": 0.12605807185173035,
      "learning_rate": 4.430328868037664e-06,
      "loss": 0.0087,
      "step": 1710540
    },
    {
      "epoch": 2.7993689571427636,
      "grad_norm": 0.4476417601108551,
      "learning_rate": 4.430262975824148e-06,
      "loss": 0.0103,
      "step": 1710560
    },
    {
      "epoch": 2.799401687581417,
      "grad_norm": 0.24545647203922272,
      "learning_rate": 4.43019708361063e-06,
      "loss": 0.014,
      "step": 1710580
    },
    {
      "epoch": 2.7994344180200703,
      "grad_norm": 0.23799048364162445,
      "learning_rate": 4.430131191397113e-06,
      "loss": 0.0111,
      "step": 1710600
    },
    {
      "epoch": 2.7994671484587235,
      "grad_norm": 1.1268022060394287,
      "learning_rate": 4.430065299183596e-06,
      "loss": 0.0147,
      "step": 1710620
    },
    {
      "epoch": 2.799499878897377,
      "grad_norm": 0.5602276921272278,
      "learning_rate": 4.4299994069700786e-06,
      "loss": 0.0124,
      "step": 1710640
    },
    {
      "epoch": 2.79953260933603,
      "grad_norm": 0.6439642310142517,
      "learning_rate": 4.429933514756561e-06,
      "loss": 0.013,
      "step": 1710660
    },
    {
      "epoch": 2.799565339774684,
      "grad_norm": 0.1796354502439499,
      "learning_rate": 4.429867622543044e-06,
      "loss": 0.0116,
      "step": 1710680
    },
    {
      "epoch": 2.799598070213337,
      "grad_norm": 0.5080209374427795,
      "learning_rate": 4.429801730329527e-06,
      "loss": 0.0182,
      "step": 1710700
    },
    {
      "epoch": 2.7996308006519905,
      "grad_norm": 0.13978467881679535,
      "learning_rate": 4.4297358381160095e-06,
      "loss": 0.0175,
      "step": 1710720
    },
    {
      "epoch": 2.7996635310906437,
      "grad_norm": 0.45003899931907654,
      "learning_rate": 4.429669945902493e-06,
      "loss": 0.014,
      "step": 1710740
    },
    {
      "epoch": 2.799696261529297,
      "grad_norm": 0.17442719638347626,
      "learning_rate": 4.429604053688976e-06,
      "loss": 0.0138,
      "step": 1710760
    },
    {
      "epoch": 2.7997289919679504,
      "grad_norm": 0.33898627758026123,
      "learning_rate": 4.429538161475459e-06,
      "loss": 0.0222,
      "step": 1710780
    },
    {
      "epoch": 2.7997617224066036,
      "grad_norm": 0.2315150648355484,
      "learning_rate": 4.429472269261942e-06,
      "loss": 0.0149,
      "step": 1710800
    },
    {
      "epoch": 2.799794452845257,
      "grad_norm": 0.21470896899700165,
      "learning_rate": 4.429406377048425e-06,
      "loss": 0.0184,
      "step": 1710820
    },
    {
      "epoch": 2.7998271832839103,
      "grad_norm": 0.5645889043807983,
      "learning_rate": 4.429340484834908e-06,
      "loss": 0.0151,
      "step": 1710840
    },
    {
      "epoch": 2.799859913722564,
      "grad_norm": 0.1787114143371582,
      "learning_rate": 4.42927459262139e-06,
      "loss": 0.0104,
      "step": 1710860
    },
    {
      "epoch": 2.799892644161217,
      "grad_norm": 0.1482572704553604,
      "learning_rate": 4.429208700407873e-06,
      "loss": 0.0175,
      "step": 1710880
    },
    {
      "epoch": 2.7999253745998702,
      "grad_norm": 0.8616834878921509,
      "learning_rate": 4.429142808194356e-06,
      "loss": 0.0182,
      "step": 1710900
    },
    {
      "epoch": 2.799958105038524,
      "grad_norm": 1.4242312908172607,
      "learning_rate": 4.429076915980839e-06,
      "loss": 0.0165,
      "step": 1710920
    },
    {
      "epoch": 2.799990835477177,
      "grad_norm": 0.16443973779678345,
      "learning_rate": 4.429011023767321e-06,
      "loss": 0.0146,
      "step": 1710940
    },
    {
      "epoch": 2.8000235659158306,
      "grad_norm": 0.13654682040214539,
      "learning_rate": 4.428945131553805e-06,
      "loss": 0.0118,
      "step": 1710960
    },
    {
      "epoch": 2.8000562963544837,
      "grad_norm": 0.3845604658126831,
      "learning_rate": 4.428879239340288e-06,
      "loss": 0.0202,
      "step": 1710980
    },
    {
      "epoch": 2.8000890267931373,
      "grad_norm": 0.8895427584648132,
      "learning_rate": 4.4288133471267704e-06,
      "loss": 0.015,
      "step": 1711000
    },
    {
      "epoch": 2.8001217572317905,
      "grad_norm": 0.05088699609041214,
      "learning_rate": 4.428747454913253e-06,
      "loss": 0.0153,
      "step": 1711020
    },
    {
      "epoch": 2.8001544876704436,
      "grad_norm": 0.4608515501022339,
      "learning_rate": 4.428681562699736e-06,
      "loss": 0.0131,
      "step": 1711040
    },
    {
      "epoch": 2.800187218109097,
      "grad_norm": 0.22659799456596375,
      "learning_rate": 4.428615670486219e-06,
      "loss": 0.0166,
      "step": 1711060
    },
    {
      "epoch": 2.8002199485477504,
      "grad_norm": 0.13066235184669495,
      "learning_rate": 4.428549778272701e-06,
      "loss": 0.0078,
      "step": 1711080
    },
    {
      "epoch": 2.8002526789864035,
      "grad_norm": 0.1342245489358902,
      "learning_rate": 4.428483886059185e-06,
      "loss": 0.0104,
      "step": 1711100
    },
    {
      "epoch": 2.800285409425057,
      "grad_norm": 0.19401076436042786,
      "learning_rate": 4.428417993845668e-06,
      "loss": 0.0109,
      "step": 1711120
    },
    {
      "epoch": 2.8003181398637107,
      "grad_norm": 0.2267940491437912,
      "learning_rate": 4.4283521016321505e-06,
      "loss": 0.0102,
      "step": 1711140
    },
    {
      "epoch": 2.800350870302364,
      "grad_norm": 0.24311627447605133,
      "learning_rate": 4.428286209418633e-06,
      "loss": 0.0131,
      "step": 1711160
    },
    {
      "epoch": 2.800383600741017,
      "grad_norm": 0.6125907897949219,
      "learning_rate": 4.428220317205117e-06,
      "loss": 0.0152,
      "step": 1711180
    },
    {
      "epoch": 2.8004163311796706,
      "grad_norm": 1.386242389678955,
      "learning_rate": 4.4281544249915995e-06,
      "loss": 0.0131,
      "step": 1711200
    },
    {
      "epoch": 2.8004490616183237,
      "grad_norm": 0.1959274560213089,
      "learning_rate": 4.428088532778082e-06,
      "loss": 0.0134,
      "step": 1711220
    },
    {
      "epoch": 2.800481792056977,
      "grad_norm": 0.664896547794342,
      "learning_rate": 4.428022640564565e-06,
      "loss": 0.0105,
      "step": 1711240
    },
    {
      "epoch": 2.8005145224956305,
      "grad_norm": 0.3535696864128113,
      "learning_rate": 4.427956748351048e-06,
      "loss": 0.0166,
      "step": 1711260
    },
    {
      "epoch": 2.800547252934284,
      "grad_norm": 0.09752676635980606,
      "learning_rate": 4.4278908561375305e-06,
      "loss": 0.0209,
      "step": 1711280
    },
    {
      "epoch": 2.8005799833729372,
      "grad_norm": 0.14235778152942657,
      "learning_rate": 4.427824963924013e-06,
      "loss": 0.0099,
      "step": 1711300
    },
    {
      "epoch": 2.8006127138115904,
      "grad_norm": 0.4334695637226105,
      "learning_rate": 4.427759071710496e-06,
      "loss": 0.0186,
      "step": 1711320
    },
    {
      "epoch": 2.800645444250244,
      "grad_norm": 0.6136985421180725,
      "learning_rate": 4.427693179496979e-06,
      "loss": 0.0128,
      "step": 1711340
    },
    {
      "epoch": 2.800678174688897,
      "grad_norm": 0.7742792963981628,
      "learning_rate": 4.427627287283462e-06,
      "loss": 0.017,
      "step": 1711360
    },
    {
      "epoch": 2.8007109051275503,
      "grad_norm": 0.4383624494075775,
      "learning_rate": 4.427561395069945e-06,
      "loss": 0.017,
      "step": 1711380
    },
    {
      "epoch": 2.800743635566204,
      "grad_norm": 0.2506466805934906,
      "learning_rate": 4.427495502856428e-06,
      "loss": 0.0084,
      "step": 1711400
    },
    {
      "epoch": 2.8007763660048575,
      "grad_norm": 0.5265721082687378,
      "learning_rate": 4.4274296106429105e-06,
      "loss": 0.0141,
      "step": 1711420
    },
    {
      "epoch": 2.8008090964435106,
      "grad_norm": 0.2651529610157013,
      "learning_rate": 4.427363718429394e-06,
      "loss": 0.0125,
      "step": 1711440
    },
    {
      "epoch": 2.8008418268821638,
      "grad_norm": 0.5842600464820862,
      "learning_rate": 4.427297826215877e-06,
      "loss": 0.0114,
      "step": 1711460
    },
    {
      "epoch": 2.8008745573208174,
      "grad_norm": 0.47284531593322754,
      "learning_rate": 4.4272319340023596e-06,
      "loss": 0.0193,
      "step": 1711480
    },
    {
      "epoch": 2.8009072877594705,
      "grad_norm": 0.08500184118747711,
      "learning_rate": 4.427166041788842e-06,
      "loss": 0.0084,
      "step": 1711500
    },
    {
      "epoch": 2.8009400181981237,
      "grad_norm": 0.4648677408695221,
      "learning_rate": 4.427100149575325e-06,
      "loss": 0.0126,
      "step": 1711520
    },
    {
      "epoch": 2.8009727486367773,
      "grad_norm": 1.8169589042663574,
      "learning_rate": 4.427034257361808e-06,
      "loss": 0.0139,
      "step": 1711540
    },
    {
      "epoch": 2.8010054790754304,
      "grad_norm": 0.203899547457695,
      "learning_rate": 4.4269683651482905e-06,
      "loss": 0.0127,
      "step": 1711560
    },
    {
      "epoch": 2.801038209514084,
      "grad_norm": 0.2583320438861847,
      "learning_rate": 4.426902472934774e-06,
      "loss": 0.0129,
      "step": 1711580
    },
    {
      "epoch": 2.801070939952737,
      "grad_norm": 0.4435648024082184,
      "learning_rate": 4.426836580721257e-06,
      "loss": 0.0127,
      "step": 1711600
    },
    {
      "epoch": 2.8011036703913907,
      "grad_norm": 0.31813111901283264,
      "learning_rate": 4.42677068850774e-06,
      "loss": 0.01,
      "step": 1711620
    },
    {
      "epoch": 2.801136400830044,
      "grad_norm": 0.2309156358242035,
      "learning_rate": 4.426704796294222e-06,
      "loss": 0.0159,
      "step": 1711640
    },
    {
      "epoch": 2.801169131268697,
      "grad_norm": 0.15978184342384338,
      "learning_rate": 4.426638904080705e-06,
      "loss": 0.0134,
      "step": 1711660
    },
    {
      "epoch": 2.8012018617073506,
      "grad_norm": 0.22861753404140472,
      "learning_rate": 4.426573011867188e-06,
      "loss": 0.0136,
      "step": 1711680
    },
    {
      "epoch": 2.801234592146004,
      "grad_norm": 0.5603706240653992,
      "learning_rate": 4.4265071196536706e-06,
      "loss": 0.0165,
      "step": 1711700
    },
    {
      "epoch": 2.8012673225846574,
      "grad_norm": 0.17757059633731842,
      "learning_rate": 4.426441227440153e-06,
      "loss": 0.0165,
      "step": 1711720
    },
    {
      "epoch": 2.8013000530233105,
      "grad_norm": 0.4511343538761139,
      "learning_rate": 4.426375335226636e-06,
      "loss": 0.0116,
      "step": 1711740
    },
    {
      "epoch": 2.801332783461964,
      "grad_norm": 0.14004918932914734,
      "learning_rate": 4.42630944301312e-06,
      "loss": 0.0169,
      "step": 1711760
    },
    {
      "epoch": 2.8013655139006173,
      "grad_norm": 0.21769842505455017,
      "learning_rate": 4.426243550799602e-06,
      "loss": 0.0075,
      "step": 1711780
    },
    {
      "epoch": 2.8013982443392704,
      "grad_norm": 0.42127490043640137,
      "learning_rate": 4.426177658586085e-06,
      "loss": 0.0173,
      "step": 1711800
    },
    {
      "epoch": 2.801430974777924,
      "grad_norm": 0.1179109662771225,
      "learning_rate": 4.426111766372569e-06,
      "loss": 0.0179,
      "step": 1711820
    },
    {
      "epoch": 2.801463705216577,
      "grad_norm": 0.45851194858551025,
      "learning_rate": 4.4260458741590514e-06,
      "loss": 0.0151,
      "step": 1711840
    },
    {
      "epoch": 2.8014964356552308,
      "grad_norm": 2.4148311614990234,
      "learning_rate": 4.425979981945534e-06,
      "loss": 0.0123,
      "step": 1711860
    },
    {
      "epoch": 2.801529166093884,
      "grad_norm": 0.2805365324020386,
      "learning_rate": 4.425914089732017e-06,
      "loss": 0.0197,
      "step": 1711880
    },
    {
      "epoch": 2.8015618965325375,
      "grad_norm": 0.3653389811515808,
      "learning_rate": 4.4258481975185e-06,
      "loss": 0.0179,
      "step": 1711900
    },
    {
      "epoch": 2.8015946269711907,
      "grad_norm": 0.46656808257102966,
      "learning_rate": 4.425782305304982e-06,
      "loss": 0.0137,
      "step": 1711920
    },
    {
      "epoch": 2.801627357409844,
      "grad_norm": 0.08253075182437897,
      "learning_rate": 4.425716413091465e-06,
      "loss": 0.0142,
      "step": 1711940
    },
    {
      "epoch": 2.8016600878484974,
      "grad_norm": 0.3521989583969116,
      "learning_rate": 4.425650520877948e-06,
      "loss": 0.0136,
      "step": 1711960
    },
    {
      "epoch": 2.8016928182871506,
      "grad_norm": 0.30606788396835327,
      "learning_rate": 4.4255846286644315e-06,
      "loss": 0.0131,
      "step": 1711980
    },
    {
      "epoch": 2.801725548725804,
      "grad_norm": 0.07475631684064865,
      "learning_rate": 4.425518736450914e-06,
      "loss": 0.0176,
      "step": 1712000
    },
    {
      "epoch": 2.8017582791644573,
      "grad_norm": 0.6099033951759338,
      "learning_rate": 4.425452844237397e-06,
      "loss": 0.0188,
      "step": 1712020
    },
    {
      "epoch": 2.801791009603111,
      "grad_norm": 0.8895081877708435,
      "learning_rate": 4.42538695202388e-06,
      "loss": 0.0085,
      "step": 1712040
    },
    {
      "epoch": 2.801823740041764,
      "grad_norm": 0.3843924105167389,
      "learning_rate": 4.425321059810362e-06,
      "loss": 0.0185,
      "step": 1712060
    },
    {
      "epoch": 2.801856470480417,
      "grad_norm": 0.4702185392379761,
      "learning_rate": 4.425255167596845e-06,
      "loss": 0.0101,
      "step": 1712080
    },
    {
      "epoch": 2.801889200919071,
      "grad_norm": 0.49116069078445435,
      "learning_rate": 4.425189275383328e-06,
      "loss": 0.0117,
      "step": 1712100
    },
    {
      "epoch": 2.801921931357724,
      "grad_norm": 0.16803167760372162,
      "learning_rate": 4.425123383169811e-06,
      "loss": 0.0094,
      "step": 1712120
    },
    {
      "epoch": 2.8019546617963775,
      "grad_norm": 0.21959790587425232,
      "learning_rate": 4.425057490956294e-06,
      "loss": 0.0121,
      "step": 1712140
    },
    {
      "epoch": 2.8019873922350307,
      "grad_norm": 0.4421198070049286,
      "learning_rate": 4.424991598742777e-06,
      "loss": 0.0162,
      "step": 1712160
    },
    {
      "epoch": 2.8020201226736843,
      "grad_norm": 0.3144896626472473,
      "learning_rate": 4.42492570652926e-06,
      "loss": 0.0139,
      "step": 1712180
    },
    {
      "epoch": 2.8020528531123374,
      "grad_norm": 0.4975573718547821,
      "learning_rate": 4.4248598143157424e-06,
      "loss": 0.0138,
      "step": 1712200
    },
    {
      "epoch": 2.8020855835509906,
      "grad_norm": 0.7965955138206482,
      "learning_rate": 4.424793922102226e-06,
      "loss": 0.0195,
      "step": 1712220
    },
    {
      "epoch": 2.802118313989644,
      "grad_norm": 0.6012554168701172,
      "learning_rate": 4.424728029888709e-06,
      "loss": 0.0144,
      "step": 1712240
    },
    {
      "epoch": 2.8021510444282973,
      "grad_norm": 0.14081773161888123,
      "learning_rate": 4.4246621376751915e-06,
      "loss": 0.0111,
      "step": 1712260
    },
    {
      "epoch": 2.802183774866951,
      "grad_norm": 0.1263190507888794,
      "learning_rate": 4.424596245461674e-06,
      "loss": 0.0113,
      "step": 1712280
    },
    {
      "epoch": 2.802216505305604,
      "grad_norm": 0.1846945583820343,
      "learning_rate": 4.424530353248157e-06,
      "loss": 0.0136,
      "step": 1712300
    },
    {
      "epoch": 2.8022492357442577,
      "grad_norm": 0.45375746488571167,
      "learning_rate": 4.42446446103464e-06,
      "loss": 0.0161,
      "step": 1712320
    },
    {
      "epoch": 2.802281966182911,
      "grad_norm": 0.19169358909130096,
      "learning_rate": 4.4243985688211225e-06,
      "loss": 0.0111,
      "step": 1712340
    },
    {
      "epoch": 2.802314696621564,
      "grad_norm": 0.2923942506313324,
      "learning_rate": 4.424332676607605e-06,
      "loss": 0.013,
      "step": 1712360
    },
    {
      "epoch": 2.8023474270602176,
      "grad_norm": 0.3103039562702179,
      "learning_rate": 4.424266784394089e-06,
      "loss": 0.0143,
      "step": 1712380
    },
    {
      "epoch": 2.8023801574988707,
      "grad_norm": 0.34302279353141785,
      "learning_rate": 4.4242008921805715e-06,
      "loss": 0.0157,
      "step": 1712400
    },
    {
      "epoch": 2.8024128879375243,
      "grad_norm": 0.3314288258552551,
      "learning_rate": 4.424134999967054e-06,
      "loss": 0.0137,
      "step": 1712420
    },
    {
      "epoch": 2.8024456183761774,
      "grad_norm": 0.15196436643600464,
      "learning_rate": 4.424069107753537e-06,
      "loss": 0.0112,
      "step": 1712440
    },
    {
      "epoch": 2.802478348814831,
      "grad_norm": 0.3785666525363922,
      "learning_rate": 4.42400321554002e-06,
      "loss": 0.0142,
      "step": 1712460
    },
    {
      "epoch": 2.802511079253484,
      "grad_norm": 0.22281964123249054,
      "learning_rate": 4.4239373233265025e-06,
      "loss": 0.0117,
      "step": 1712480
    },
    {
      "epoch": 2.8025438096921373,
      "grad_norm": 0.13407042622566223,
      "learning_rate": 4.423871431112986e-06,
      "loss": 0.0147,
      "step": 1712500
    },
    {
      "epoch": 2.802576540130791,
      "grad_norm": 0.3156810700893402,
      "learning_rate": 4.423805538899469e-06,
      "loss": 0.0138,
      "step": 1712520
    },
    {
      "epoch": 2.802609270569444,
      "grad_norm": 0.1182035356760025,
      "learning_rate": 4.4237396466859516e-06,
      "loss": 0.0158,
      "step": 1712540
    },
    {
      "epoch": 2.8026420010080972,
      "grad_norm": 0.14055506885051727,
      "learning_rate": 4.423673754472434e-06,
      "loss": 0.0098,
      "step": 1712560
    },
    {
      "epoch": 2.802674731446751,
      "grad_norm": 0.38649794459342957,
      "learning_rate": 4.423607862258917e-06,
      "loss": 0.0126,
      "step": 1712580
    },
    {
      "epoch": 2.8027074618854044,
      "grad_norm": 0.35188812017440796,
      "learning_rate": 4.423541970045401e-06,
      "loss": 0.0121,
      "step": 1712600
    },
    {
      "epoch": 2.8027401923240576,
      "grad_norm": 0.3576256334781647,
      "learning_rate": 4.423476077831883e-06,
      "loss": 0.0156,
      "step": 1712620
    },
    {
      "epoch": 2.8027729227627107,
      "grad_norm": 0.10990229994058609,
      "learning_rate": 4.423410185618366e-06,
      "loss": 0.0124,
      "step": 1712640
    },
    {
      "epoch": 2.8028056532013643,
      "grad_norm": 0.9054323434829712,
      "learning_rate": 4.423344293404849e-06,
      "loss": 0.0103,
      "step": 1712660
    },
    {
      "epoch": 2.8028383836400175,
      "grad_norm": 0.346844345331192,
      "learning_rate": 4.423278401191332e-06,
      "loss": 0.0222,
      "step": 1712680
    },
    {
      "epoch": 2.8028711140786706,
      "grad_norm": 0.5951820015907288,
      "learning_rate": 4.423212508977814e-06,
      "loss": 0.0107,
      "step": 1712700
    },
    {
      "epoch": 2.802903844517324,
      "grad_norm": 0.6020839810371399,
      "learning_rate": 4.423146616764297e-06,
      "loss": 0.0149,
      "step": 1712720
    },
    {
      "epoch": 2.802936574955978,
      "grad_norm": 1.0971639156341553,
      "learning_rate": 4.42308072455078e-06,
      "loss": 0.0185,
      "step": 1712740
    },
    {
      "epoch": 2.802969305394631,
      "grad_norm": 2.3593318462371826,
      "learning_rate": 4.4230148323372625e-06,
      "loss": 0.0121,
      "step": 1712760
    },
    {
      "epoch": 2.803002035833284,
      "grad_norm": 1.1219695806503296,
      "learning_rate": 4.422948940123746e-06,
      "loss": 0.0109,
      "step": 1712780
    },
    {
      "epoch": 2.8030347662719377,
      "grad_norm": 0.2070249766111374,
      "learning_rate": 4.422883047910229e-06,
      "loss": 0.0163,
      "step": 1712800
    },
    {
      "epoch": 2.803067496710591,
      "grad_norm": 0.1438029408454895,
      "learning_rate": 4.422817155696712e-06,
      "loss": 0.0136,
      "step": 1712820
    },
    {
      "epoch": 2.803100227149244,
      "grad_norm": 0.2564525604248047,
      "learning_rate": 4.422751263483194e-06,
      "loss": 0.0182,
      "step": 1712840
    },
    {
      "epoch": 2.8031329575878976,
      "grad_norm": 0.22675573825836182,
      "learning_rate": 4.422685371269678e-06,
      "loss": 0.0149,
      "step": 1712860
    },
    {
      "epoch": 2.803165688026551,
      "grad_norm": 0.2292187511920929,
      "learning_rate": 4.422619479056161e-06,
      "loss": 0.0158,
      "step": 1712880
    },
    {
      "epoch": 2.8031984184652043,
      "grad_norm": 0.4957897365093231,
      "learning_rate": 4.422553586842643e-06,
      "loss": 0.0113,
      "step": 1712900
    },
    {
      "epoch": 2.8032311489038575,
      "grad_norm": 0.12063702940940857,
      "learning_rate": 4.422487694629126e-06,
      "loss": 0.0185,
      "step": 1712920
    },
    {
      "epoch": 2.803263879342511,
      "grad_norm": 0.8199653625488281,
      "learning_rate": 4.422421802415609e-06,
      "loss": 0.0196,
      "step": 1712940
    },
    {
      "epoch": 2.8032966097811642,
      "grad_norm": 0.37541764974594116,
      "learning_rate": 4.422355910202092e-06,
      "loss": 0.0116,
      "step": 1712960
    },
    {
      "epoch": 2.8033293402198174,
      "grad_norm": 0.764013409614563,
      "learning_rate": 4.422290017988574e-06,
      "loss": 0.0138,
      "step": 1712980
    },
    {
      "epoch": 2.803362070658471,
      "grad_norm": 0.665962815284729,
      "learning_rate": 4.422224125775058e-06,
      "loss": 0.0142,
      "step": 1713000
    },
    {
      "epoch": 2.8033948010971246,
      "grad_norm": 0.6939478516578674,
      "learning_rate": 4.422158233561541e-06,
      "loss": 0.0151,
      "step": 1713020
    },
    {
      "epoch": 2.8034275315357777,
      "grad_norm": 0.4677561819553375,
      "learning_rate": 4.4220923413480234e-06,
      "loss": 0.0168,
      "step": 1713040
    },
    {
      "epoch": 2.803460261974431,
      "grad_norm": 0.497660368680954,
      "learning_rate": 4.422026449134506e-06,
      "loss": 0.0146,
      "step": 1713060
    },
    {
      "epoch": 2.8034929924130845,
      "grad_norm": 0.3205140233039856,
      "learning_rate": 4.421960556920989e-06,
      "loss": 0.0165,
      "step": 1713080
    },
    {
      "epoch": 2.8035257228517376,
      "grad_norm": 0.8135414123535156,
      "learning_rate": 4.421894664707472e-06,
      "loss": 0.0186,
      "step": 1713100
    },
    {
      "epoch": 2.8035584532903908,
      "grad_norm": 0.09442202001810074,
      "learning_rate": 4.421828772493954e-06,
      "loss": 0.0127,
      "step": 1713120
    },
    {
      "epoch": 2.8035911837290444,
      "grad_norm": 0.11773116141557693,
      "learning_rate": 4.421762880280437e-06,
      "loss": 0.0162,
      "step": 1713140
    },
    {
      "epoch": 2.8036239141676975,
      "grad_norm": 0.4508926570415497,
      "learning_rate": 4.42169698806692e-06,
      "loss": 0.0182,
      "step": 1713160
    },
    {
      "epoch": 2.803656644606351,
      "grad_norm": 0.539239764213562,
      "learning_rate": 4.4216310958534035e-06,
      "loss": 0.0137,
      "step": 1713180
    },
    {
      "epoch": 2.8036893750450043,
      "grad_norm": 0.19499091804027557,
      "learning_rate": 4.421565203639886e-06,
      "loss": 0.0133,
      "step": 1713200
    },
    {
      "epoch": 2.803722105483658,
      "grad_norm": 0.313213586807251,
      "learning_rate": 4.421499311426369e-06,
      "loss": 0.0131,
      "step": 1713220
    },
    {
      "epoch": 2.803754835922311,
      "grad_norm": 0.19778922200202942,
      "learning_rate": 4.4214334192128525e-06,
      "loss": 0.0116,
      "step": 1713240
    },
    {
      "epoch": 2.803787566360964,
      "grad_norm": 0.12340372800827026,
      "learning_rate": 4.421367526999335e-06,
      "loss": 0.0126,
      "step": 1713260
    },
    {
      "epoch": 2.8038202967996178,
      "grad_norm": 0.3376021385192871,
      "learning_rate": 4.421301634785818e-06,
      "loss": 0.0138,
      "step": 1713280
    },
    {
      "epoch": 2.803853027238271,
      "grad_norm": 0.2631879150867462,
      "learning_rate": 4.421235742572301e-06,
      "loss": 0.0123,
      "step": 1713300
    },
    {
      "epoch": 2.8038857576769245,
      "grad_norm": 0.27234095335006714,
      "learning_rate": 4.4211698503587835e-06,
      "loss": 0.0122,
      "step": 1713320
    },
    {
      "epoch": 2.8039184881155776,
      "grad_norm": 0.12697310745716095,
      "learning_rate": 4.421103958145266e-06,
      "loss": 0.0129,
      "step": 1713340
    },
    {
      "epoch": 2.8039512185542312,
      "grad_norm": 0.19087807834148407,
      "learning_rate": 4.421038065931749e-06,
      "loss": 0.0175,
      "step": 1713360
    },
    {
      "epoch": 2.8039839489928844,
      "grad_norm": 0.059672292321920395,
      "learning_rate": 4.420972173718232e-06,
      "loss": 0.0149,
      "step": 1713380
    },
    {
      "epoch": 2.8040166794315375,
      "grad_norm": 0.29770350456237793,
      "learning_rate": 4.420906281504715e-06,
      "loss": 0.0147,
      "step": 1713400
    },
    {
      "epoch": 2.804049409870191,
      "grad_norm": 0.17879903316497803,
      "learning_rate": 4.420840389291198e-06,
      "loss": 0.0131,
      "step": 1713420
    },
    {
      "epoch": 2.8040821403088443,
      "grad_norm": 0.25795602798461914,
      "learning_rate": 4.420774497077681e-06,
      "loss": 0.0126,
      "step": 1713440
    },
    {
      "epoch": 2.804114870747498,
      "grad_norm": 0.09428969770669937,
      "learning_rate": 4.4207086048641635e-06,
      "loss": 0.0183,
      "step": 1713460
    },
    {
      "epoch": 2.804147601186151,
      "grad_norm": 0.37424561381340027,
      "learning_rate": 4.420642712650646e-06,
      "loss": 0.0184,
      "step": 1713480
    },
    {
      "epoch": 2.8041803316248046,
      "grad_norm": 0.47795969247817993,
      "learning_rate": 4.420576820437129e-06,
      "loss": 0.0124,
      "step": 1713500
    },
    {
      "epoch": 2.8042130620634578,
      "grad_norm": 0.1646452397108078,
      "learning_rate": 4.420510928223612e-06,
      "loss": 0.0112,
      "step": 1713520
    },
    {
      "epoch": 2.804245792502111,
      "grad_norm": 0.45674094557762146,
      "learning_rate": 4.4204450360100945e-06,
      "loss": 0.0151,
      "step": 1713540
    },
    {
      "epoch": 2.8042785229407645,
      "grad_norm": 0.43926936388015747,
      "learning_rate": 4.420379143796578e-06,
      "loss": 0.0121,
      "step": 1713560
    },
    {
      "epoch": 2.8043112533794177,
      "grad_norm": 0.20510239899158478,
      "learning_rate": 4.420313251583061e-06,
      "loss": 0.0102,
      "step": 1713580
    },
    {
      "epoch": 2.8043439838180713,
      "grad_norm": 0.2554461658000946,
      "learning_rate": 4.4202473593695435e-06,
      "loss": 0.0134,
      "step": 1713600
    },
    {
      "epoch": 2.8043767142567244,
      "grad_norm": 0.31740301847457886,
      "learning_rate": 4.420181467156026e-06,
      "loss": 0.0167,
      "step": 1713620
    },
    {
      "epoch": 2.804409444695378,
      "grad_norm": 0.5353816747665405,
      "learning_rate": 4.42011557494251e-06,
      "loss": 0.0101,
      "step": 1713640
    },
    {
      "epoch": 2.804442175134031,
      "grad_norm": 0.5897719860076904,
      "learning_rate": 4.420049682728993e-06,
      "loss": 0.0132,
      "step": 1713660
    },
    {
      "epoch": 2.8044749055726843,
      "grad_norm": 0.1627771407365799,
      "learning_rate": 4.419983790515475e-06,
      "loss": 0.0112,
      "step": 1713680
    },
    {
      "epoch": 2.804507636011338,
      "grad_norm": 5.95507287979126,
      "learning_rate": 4.419917898301958e-06,
      "loss": 0.0104,
      "step": 1713700
    },
    {
      "epoch": 2.804540366449991,
      "grad_norm": 0.36080649495124817,
      "learning_rate": 4.419852006088441e-06,
      "loss": 0.0137,
      "step": 1713720
    },
    {
      "epoch": 2.8045730968886446,
      "grad_norm": 0.19503074884414673,
      "learning_rate": 4.4197861138749236e-06,
      "loss": 0.0134,
      "step": 1713740
    },
    {
      "epoch": 2.804605827327298,
      "grad_norm": 0.34705495834350586,
      "learning_rate": 4.419720221661406e-06,
      "loss": 0.0109,
      "step": 1713760
    },
    {
      "epoch": 2.8046385577659514,
      "grad_norm": 0.3944191038608551,
      "learning_rate": 4.419654329447889e-06,
      "loss": 0.0135,
      "step": 1713780
    },
    {
      "epoch": 2.8046712882046045,
      "grad_norm": 0.3189755082130432,
      "learning_rate": 4.419588437234373e-06,
      "loss": 0.0149,
      "step": 1713800
    },
    {
      "epoch": 2.8047040186432577,
      "grad_norm": 0.1411389261484146,
      "learning_rate": 4.419522545020855e-06,
      "loss": 0.0108,
      "step": 1713820
    },
    {
      "epoch": 2.8047367490819113,
      "grad_norm": 0.3679802119731903,
      "learning_rate": 4.419456652807338e-06,
      "loss": 0.0126,
      "step": 1713840
    },
    {
      "epoch": 2.8047694795205644,
      "grad_norm": 0.16863510012626648,
      "learning_rate": 4.419390760593821e-06,
      "loss": 0.0123,
      "step": 1713860
    },
    {
      "epoch": 2.804802209959218,
      "grad_norm": 0.21090400218963623,
      "learning_rate": 4.419324868380304e-06,
      "loss": 0.0135,
      "step": 1713880
    },
    {
      "epoch": 2.804834940397871,
      "grad_norm": 0.3532800078392029,
      "learning_rate": 4.419258976166787e-06,
      "loss": 0.0168,
      "step": 1713900
    },
    {
      "epoch": 2.8048676708365248,
      "grad_norm": 0.045931458473205566,
      "learning_rate": 4.41919308395327e-06,
      "loss": 0.0135,
      "step": 1713920
    },
    {
      "epoch": 2.804900401275178,
      "grad_norm": 0.07419043779373169,
      "learning_rate": 4.419127191739753e-06,
      "loss": 0.0102,
      "step": 1713940
    },
    {
      "epoch": 2.804933131713831,
      "grad_norm": 0.4074400067329407,
      "learning_rate": 4.419061299526235e-06,
      "loss": 0.0085,
      "step": 1713960
    },
    {
      "epoch": 2.8049658621524847,
      "grad_norm": 0.32328009605407715,
      "learning_rate": 4.418995407312718e-06,
      "loss": 0.0152,
      "step": 1713980
    },
    {
      "epoch": 2.804998592591138,
      "grad_norm": 0.3123663067817688,
      "learning_rate": 4.418929515099201e-06,
      "loss": 0.0103,
      "step": 1714000
    },
    {
      "epoch": 2.805031323029791,
      "grad_norm": 0.26308387517929077,
      "learning_rate": 4.4188636228856845e-06,
      "loss": 0.0117,
      "step": 1714020
    },
    {
      "epoch": 2.8050640534684446,
      "grad_norm": 0.3821689784526825,
      "learning_rate": 4.418797730672167e-06,
      "loss": 0.0105,
      "step": 1714040
    },
    {
      "epoch": 2.805096783907098,
      "grad_norm": 0.38505658507347107,
      "learning_rate": 4.41873183845865e-06,
      "loss": 0.0125,
      "step": 1714060
    },
    {
      "epoch": 2.8051295143457513,
      "grad_norm": 0.21014566719532013,
      "learning_rate": 4.418665946245133e-06,
      "loss": 0.0134,
      "step": 1714080
    },
    {
      "epoch": 2.8051622447844045,
      "grad_norm": 0.14317956566810608,
      "learning_rate": 4.418600054031615e-06,
      "loss": 0.0145,
      "step": 1714100
    },
    {
      "epoch": 2.805194975223058,
      "grad_norm": 0.8573331236839294,
      "learning_rate": 4.418534161818098e-06,
      "loss": 0.0149,
      "step": 1714120
    },
    {
      "epoch": 2.805227705661711,
      "grad_norm": 0.4171769618988037,
      "learning_rate": 4.418468269604581e-06,
      "loss": 0.0085,
      "step": 1714140
    },
    {
      "epoch": 2.8052604361003644,
      "grad_norm": 0.6068707704544067,
      "learning_rate": 4.418402377391064e-06,
      "loss": 0.0068,
      "step": 1714160
    },
    {
      "epoch": 2.805293166539018,
      "grad_norm": 0.45788463950157166,
      "learning_rate": 4.418336485177546e-06,
      "loss": 0.0129,
      "step": 1714180
    },
    {
      "epoch": 2.8053258969776715,
      "grad_norm": 0.10014532506465912,
      "learning_rate": 4.41827059296403e-06,
      "loss": 0.0145,
      "step": 1714200
    },
    {
      "epoch": 2.8053586274163247,
      "grad_norm": 0.14923322200775146,
      "learning_rate": 4.418204700750513e-06,
      "loss": 0.0185,
      "step": 1714220
    },
    {
      "epoch": 2.805391357854978,
      "grad_norm": 0.2743220329284668,
      "learning_rate": 4.4181388085369954e-06,
      "loss": 0.0141,
      "step": 1714240
    },
    {
      "epoch": 2.8054240882936314,
      "grad_norm": 0.7215632796287537,
      "learning_rate": 4.418072916323479e-06,
      "loss": 0.0078,
      "step": 1714260
    },
    {
      "epoch": 2.8054568187322846,
      "grad_norm": 0.9565721154212952,
      "learning_rate": 4.418007024109962e-06,
      "loss": 0.0159,
      "step": 1714280
    },
    {
      "epoch": 2.8054895491709377,
      "grad_norm": 0.28140416741371155,
      "learning_rate": 4.4179411318964445e-06,
      "loss": 0.0139,
      "step": 1714300
    },
    {
      "epoch": 2.8055222796095913,
      "grad_norm": 0.3712557554244995,
      "learning_rate": 4.417875239682927e-06,
      "loss": 0.0122,
      "step": 1714320
    },
    {
      "epoch": 2.805555010048245,
      "grad_norm": 0.3059718608856201,
      "learning_rate": 4.41780934746941e-06,
      "loss": 0.0129,
      "step": 1714340
    },
    {
      "epoch": 2.805587740486898,
      "grad_norm": 0.9237450957298279,
      "learning_rate": 4.417743455255893e-06,
      "loss": 0.011,
      "step": 1714360
    },
    {
      "epoch": 2.8056204709255512,
      "grad_norm": 0.309500128030777,
      "learning_rate": 4.4176775630423755e-06,
      "loss": 0.0125,
      "step": 1714380
    },
    {
      "epoch": 2.805653201364205,
      "grad_norm": 0.09491962939500809,
      "learning_rate": 4.417611670828858e-06,
      "loss": 0.0137,
      "step": 1714400
    },
    {
      "epoch": 2.805685931802858,
      "grad_norm": 0.24623176455497742,
      "learning_rate": 4.417545778615342e-06,
      "loss": 0.0132,
      "step": 1714420
    },
    {
      "epoch": 2.805718662241511,
      "grad_norm": 0.15095612406730652,
      "learning_rate": 4.4174798864018245e-06,
      "loss": 0.014,
      "step": 1714440
    },
    {
      "epoch": 2.8057513926801647,
      "grad_norm": 0.24452878534793854,
      "learning_rate": 4.417413994188307e-06,
      "loss": 0.012,
      "step": 1714460
    },
    {
      "epoch": 2.8057841231188183,
      "grad_norm": 0.10707134008407593,
      "learning_rate": 4.41734810197479e-06,
      "loss": 0.0225,
      "step": 1714480
    },
    {
      "epoch": 2.8058168535574715,
      "grad_norm": 0.2225702702999115,
      "learning_rate": 4.417282209761273e-06,
      "loss": 0.0152,
      "step": 1714500
    },
    {
      "epoch": 2.8058495839961246,
      "grad_norm": 0.2423270046710968,
      "learning_rate": 4.4172163175477555e-06,
      "loss": 0.0099,
      "step": 1714520
    },
    {
      "epoch": 2.805882314434778,
      "grad_norm": 0.3527008593082428,
      "learning_rate": 4.417150425334238e-06,
      "loss": 0.0118,
      "step": 1714540
    },
    {
      "epoch": 2.8059150448734314,
      "grad_norm": 0.6207438111305237,
      "learning_rate": 4.417084533120721e-06,
      "loss": 0.0132,
      "step": 1714560
    },
    {
      "epoch": 2.8059477753120845,
      "grad_norm": 0.02369685471057892,
      "learning_rate": 4.417018640907204e-06,
      "loss": 0.0191,
      "step": 1714580
    },
    {
      "epoch": 2.805980505750738,
      "grad_norm": 0.17719724774360657,
      "learning_rate": 4.416952748693687e-06,
      "loss": 0.0098,
      "step": 1714600
    },
    {
      "epoch": 2.8060132361893912,
      "grad_norm": 0.10613227635622025,
      "learning_rate": 4.41688685648017e-06,
      "loss": 0.0127,
      "step": 1714620
    },
    {
      "epoch": 2.806045966628045,
      "grad_norm": 0.2996383011341095,
      "learning_rate": 4.416820964266653e-06,
      "loss": 0.0116,
      "step": 1714640
    },
    {
      "epoch": 2.806078697066698,
      "grad_norm": 0.10671564936637878,
      "learning_rate": 4.416755072053136e-06,
      "loss": 0.0166,
      "step": 1714660
    },
    {
      "epoch": 2.8061114275053516,
      "grad_norm": 0.22304874658584595,
      "learning_rate": 4.416689179839619e-06,
      "loss": 0.0188,
      "step": 1714680
    },
    {
      "epoch": 2.8061441579440047,
      "grad_norm": 0.3000696003437042,
      "learning_rate": 4.416623287626102e-06,
      "loss": 0.016,
      "step": 1714700
    },
    {
      "epoch": 2.806176888382658,
      "grad_norm": 0.3751515746116638,
      "learning_rate": 4.416557395412585e-06,
      "loss": 0.0165,
      "step": 1714720
    },
    {
      "epoch": 2.8062096188213115,
      "grad_norm": 0.6783932447433472,
      "learning_rate": 4.416491503199067e-06,
      "loss": 0.0136,
      "step": 1714740
    },
    {
      "epoch": 2.8062423492599646,
      "grad_norm": 0.5551561713218689,
      "learning_rate": 4.41642561098555e-06,
      "loss": 0.0147,
      "step": 1714760
    },
    {
      "epoch": 2.8062750796986182,
      "grad_norm": 0.1564405858516693,
      "learning_rate": 4.416359718772033e-06,
      "loss": 0.0141,
      "step": 1714780
    },
    {
      "epoch": 2.8063078101372714,
      "grad_norm": 0.5460833311080933,
      "learning_rate": 4.4162938265585155e-06,
      "loss": 0.0146,
      "step": 1714800
    },
    {
      "epoch": 2.806340540575925,
      "grad_norm": 0.4809325635433197,
      "learning_rate": 4.416227934344999e-06,
      "loss": 0.0115,
      "step": 1714820
    },
    {
      "epoch": 2.806373271014578,
      "grad_norm": 0.33450692892074585,
      "learning_rate": 4.416162042131482e-06,
      "loss": 0.0152,
      "step": 1714840
    },
    {
      "epoch": 2.8064060014532313,
      "grad_norm": 0.29867568612098694,
      "learning_rate": 4.416096149917965e-06,
      "loss": 0.0089,
      "step": 1714860
    },
    {
      "epoch": 2.806438731891885,
      "grad_norm": 0.3341449201107025,
      "learning_rate": 4.416030257704447e-06,
      "loss": 0.0163,
      "step": 1714880
    },
    {
      "epoch": 2.806471462330538,
      "grad_norm": 0.1265106052160263,
      "learning_rate": 4.41596436549093e-06,
      "loss": 0.0132,
      "step": 1714900
    },
    {
      "epoch": 2.8065041927691916,
      "grad_norm": 0.2006399929523468,
      "learning_rate": 4.415898473277413e-06,
      "loss": 0.0118,
      "step": 1714920
    },
    {
      "epoch": 2.8065369232078448,
      "grad_norm": 0.4513348340988159,
      "learning_rate": 4.4158325810638956e-06,
      "loss": 0.0144,
      "step": 1714940
    },
    {
      "epoch": 2.8065696536464984,
      "grad_norm": 0.40988337993621826,
      "learning_rate": 4.415766688850379e-06,
      "loss": 0.0105,
      "step": 1714960
    },
    {
      "epoch": 2.8066023840851515,
      "grad_norm": 0.20322206616401672,
      "learning_rate": 4.415700796636862e-06,
      "loss": 0.0172,
      "step": 1714980
    },
    {
      "epoch": 2.8066351145238047,
      "grad_norm": 0.2754141688346863,
      "learning_rate": 4.415634904423345e-06,
      "loss": 0.0112,
      "step": 1715000
    },
    {
      "epoch": 2.8066678449624582,
      "grad_norm": 0.07271961867809296,
      "learning_rate": 4.415569012209827e-06,
      "loss": 0.0069,
      "step": 1715020
    },
    {
      "epoch": 2.8067005754011114,
      "grad_norm": 0.4395994544029236,
      "learning_rate": 4.415503119996311e-06,
      "loss": 0.0164,
      "step": 1715040
    },
    {
      "epoch": 2.806733305839765,
      "grad_norm": 0.18084847927093506,
      "learning_rate": 4.415437227782794e-06,
      "loss": 0.0133,
      "step": 1715060
    },
    {
      "epoch": 2.806766036278418,
      "grad_norm": 0.06609956175088882,
      "learning_rate": 4.4153713355692764e-06,
      "loss": 0.0081,
      "step": 1715080
    },
    {
      "epoch": 2.8067987667170717,
      "grad_norm": 0.7781375050544739,
      "learning_rate": 4.415305443355759e-06,
      "loss": 0.0169,
      "step": 1715100
    },
    {
      "epoch": 2.806831497155725,
      "grad_norm": 0.16940872371196747,
      "learning_rate": 4.415239551142242e-06,
      "loss": 0.0134,
      "step": 1715120
    },
    {
      "epoch": 2.806864227594378,
      "grad_norm": 0.09686633944511414,
      "learning_rate": 4.415173658928725e-06,
      "loss": 0.0225,
      "step": 1715140
    },
    {
      "epoch": 2.8068969580330316,
      "grad_norm": 0.5653948187828064,
      "learning_rate": 4.415107766715207e-06,
      "loss": 0.0227,
      "step": 1715160
    },
    {
      "epoch": 2.806929688471685,
      "grad_norm": 0.8296601176261902,
      "learning_rate": 4.41504187450169e-06,
      "loss": 0.0108,
      "step": 1715180
    },
    {
      "epoch": 2.8069624189103384,
      "grad_norm": 0.44471612572669983,
      "learning_rate": 4.414975982288173e-06,
      "loss": 0.0112,
      "step": 1715200
    },
    {
      "epoch": 2.8069951493489915,
      "grad_norm": 0.945685625076294,
      "learning_rate": 4.4149100900746565e-06,
      "loss": 0.0176,
      "step": 1715220
    },
    {
      "epoch": 2.807027879787645,
      "grad_norm": 0.5985879302024841,
      "learning_rate": 4.414844197861139e-06,
      "loss": 0.0093,
      "step": 1715240
    },
    {
      "epoch": 2.8070606102262983,
      "grad_norm": 0.21442092955112457,
      "learning_rate": 4.414778305647622e-06,
      "loss": 0.0138,
      "step": 1715260
    },
    {
      "epoch": 2.8070933406649514,
      "grad_norm": 0.15660236775875092,
      "learning_rate": 4.414712413434105e-06,
      "loss": 0.014,
      "step": 1715280
    },
    {
      "epoch": 2.807126071103605,
      "grad_norm": 0.3074255883693695,
      "learning_rate": 4.4146465212205874e-06,
      "loss": 0.0088,
      "step": 1715300
    },
    {
      "epoch": 2.807158801542258,
      "grad_norm": 1.1884071826934814,
      "learning_rate": 4.414580629007071e-06,
      "loss": 0.0165,
      "step": 1715320
    },
    {
      "epoch": 2.8071915319809118,
      "grad_norm": 0.08642604947090149,
      "learning_rate": 4.414514736793554e-06,
      "loss": 0.0151,
      "step": 1715340
    },
    {
      "epoch": 2.807224262419565,
      "grad_norm": 0.1871531903743744,
      "learning_rate": 4.4144488445800365e-06,
      "loss": 0.0122,
      "step": 1715360
    },
    {
      "epoch": 2.8072569928582185,
      "grad_norm": 0.18268853425979614,
      "learning_rate": 4.414382952366519e-06,
      "loss": 0.0222,
      "step": 1715380
    },
    {
      "epoch": 2.8072897232968717,
      "grad_norm": 0.7116511464118958,
      "learning_rate": 4.414317060153002e-06,
      "loss": 0.0155,
      "step": 1715400
    },
    {
      "epoch": 2.807322453735525,
      "grad_norm": 0.4137687385082245,
      "learning_rate": 4.414251167939485e-06,
      "loss": 0.0132,
      "step": 1715420
    },
    {
      "epoch": 2.8073551841741784,
      "grad_norm": 0.18714642524719238,
      "learning_rate": 4.414185275725968e-06,
      "loss": 0.011,
      "step": 1715440
    },
    {
      "epoch": 2.8073879146128315,
      "grad_norm": 0.4424130618572235,
      "learning_rate": 4.414119383512451e-06,
      "loss": 0.0149,
      "step": 1715460
    },
    {
      "epoch": 2.807420645051485,
      "grad_norm": 0.8883917331695557,
      "learning_rate": 4.414053491298934e-06,
      "loss": 0.014,
      "step": 1715480
    },
    {
      "epoch": 2.8074533754901383,
      "grad_norm": 0.24857190251350403,
      "learning_rate": 4.4139875990854165e-06,
      "loss": 0.0165,
      "step": 1715500
    },
    {
      "epoch": 2.807486105928792,
      "grad_norm": 1.058038592338562,
      "learning_rate": 4.413921706871899e-06,
      "loss": 0.0117,
      "step": 1715520
    },
    {
      "epoch": 2.807518836367445,
      "grad_norm": 0.2722342312335968,
      "learning_rate": 4.413855814658382e-06,
      "loss": 0.0136,
      "step": 1715540
    },
    {
      "epoch": 2.807551566806098,
      "grad_norm": 0.22942671179771423,
      "learning_rate": 4.413789922444865e-06,
      "loss": 0.019,
      "step": 1715560
    },
    {
      "epoch": 2.807584297244752,
      "grad_norm": 1.0793439149856567,
      "learning_rate": 4.4137240302313475e-06,
      "loss": 0.0161,
      "step": 1715580
    },
    {
      "epoch": 2.807617027683405,
      "grad_norm": 0.24325093626976013,
      "learning_rate": 4.41365813801783e-06,
      "loss": 0.0227,
      "step": 1715600
    },
    {
      "epoch": 2.807649758122058,
      "grad_norm": 0.48825329542160034,
      "learning_rate": 4.413592245804314e-06,
      "loss": 0.0131,
      "step": 1715620
    },
    {
      "epoch": 2.8076824885607117,
      "grad_norm": 0.14332805573940277,
      "learning_rate": 4.4135263535907965e-06,
      "loss": 0.015,
      "step": 1715640
    },
    {
      "epoch": 2.8077152189993653,
      "grad_norm": 0.08655773848295212,
      "learning_rate": 4.413460461377279e-06,
      "loss": 0.0182,
      "step": 1715660
    },
    {
      "epoch": 2.8077479494380184,
      "grad_norm": 0.39871683716773987,
      "learning_rate": 4.413394569163763e-06,
      "loss": 0.0099,
      "step": 1715680
    },
    {
      "epoch": 2.8077806798766716,
      "grad_norm": 0.3769977390766144,
      "learning_rate": 4.413328676950246e-06,
      "loss": 0.0169,
      "step": 1715700
    },
    {
      "epoch": 2.807813410315325,
      "grad_norm": 0.29214030504226685,
      "learning_rate": 4.413262784736728e-06,
      "loss": 0.0126,
      "step": 1715720
    },
    {
      "epoch": 2.8078461407539783,
      "grad_norm": 0.2877638041973114,
      "learning_rate": 4.413196892523211e-06,
      "loss": 0.0116,
      "step": 1715740
    },
    {
      "epoch": 2.8078788711926315,
      "grad_norm": 0.17105460166931152,
      "learning_rate": 4.413131000309694e-06,
      "loss": 0.0122,
      "step": 1715760
    },
    {
      "epoch": 2.807911601631285,
      "grad_norm": 0.15756304562091827,
      "learning_rate": 4.4130651080961766e-06,
      "loss": 0.0139,
      "step": 1715780
    },
    {
      "epoch": 2.8079443320699387,
      "grad_norm": 0.25745558738708496,
      "learning_rate": 4.412999215882659e-06,
      "loss": 0.0199,
      "step": 1715800
    },
    {
      "epoch": 2.807977062508592,
      "grad_norm": 0.13963021337985992,
      "learning_rate": 4.412933323669142e-06,
      "loss": 0.0149,
      "step": 1715820
    },
    {
      "epoch": 2.808009792947245,
      "grad_norm": 0.17908267676830292,
      "learning_rate": 4.412867431455626e-06,
      "loss": 0.0148,
      "step": 1715840
    },
    {
      "epoch": 2.8080425233858985,
      "grad_norm": 0.39291614294052124,
      "learning_rate": 4.412801539242108e-06,
      "loss": 0.0152,
      "step": 1715860
    },
    {
      "epoch": 2.8080752538245517,
      "grad_norm": 0.2421635389328003,
      "learning_rate": 4.412735647028591e-06,
      "loss": 0.0142,
      "step": 1715880
    },
    {
      "epoch": 2.808107984263205,
      "grad_norm": 0.2799043655395508,
      "learning_rate": 4.412669754815074e-06,
      "loss": 0.0089,
      "step": 1715900
    },
    {
      "epoch": 2.8081407147018584,
      "grad_norm": 0.15192221105098724,
      "learning_rate": 4.412603862601557e-06,
      "loss": 0.0124,
      "step": 1715920
    },
    {
      "epoch": 2.808173445140512,
      "grad_norm": 0.7758943438529968,
      "learning_rate": 4.412537970388039e-06,
      "loss": 0.0135,
      "step": 1715940
    },
    {
      "epoch": 2.808206175579165,
      "grad_norm": 0.2944159507751465,
      "learning_rate": 4.412472078174522e-06,
      "loss": 0.0091,
      "step": 1715960
    },
    {
      "epoch": 2.8082389060178183,
      "grad_norm": 0.2599806785583496,
      "learning_rate": 4.412406185961005e-06,
      "loss": 0.0122,
      "step": 1715980
    },
    {
      "epoch": 2.808271636456472,
      "grad_norm": 0.16376613080501556,
      "learning_rate": 4.4123402937474876e-06,
      "loss": 0.0168,
      "step": 1716000
    },
    {
      "epoch": 2.808304366895125,
      "grad_norm": 0.8667117357254028,
      "learning_rate": 4.412274401533971e-06,
      "loss": 0.0126,
      "step": 1716020
    },
    {
      "epoch": 2.8083370973337782,
      "grad_norm": 0.8461139798164368,
      "learning_rate": 4.412208509320454e-06,
      "loss": 0.0186,
      "step": 1716040
    },
    {
      "epoch": 2.808369827772432,
      "grad_norm": 0.43465206027030945,
      "learning_rate": 4.412142617106937e-06,
      "loss": 0.0168,
      "step": 1716060
    },
    {
      "epoch": 2.8084025582110854,
      "grad_norm": 0.2696821689605713,
      "learning_rate": 4.41207672489342e-06,
      "loss": 0.0208,
      "step": 1716080
    },
    {
      "epoch": 2.8084352886497386,
      "grad_norm": 0.3701069951057434,
      "learning_rate": 4.412010832679903e-06,
      "loss": 0.0107,
      "step": 1716100
    },
    {
      "epoch": 2.8084680190883917,
      "grad_norm": 0.4571748673915863,
      "learning_rate": 4.411944940466386e-06,
      "loss": 0.0123,
      "step": 1716120
    },
    {
      "epoch": 2.8085007495270453,
      "grad_norm": 0.23543529212474823,
      "learning_rate": 4.411879048252868e-06,
      "loss": 0.0164,
      "step": 1716140
    },
    {
      "epoch": 2.8085334799656985,
      "grad_norm": 0.9606187343597412,
      "learning_rate": 4.411813156039351e-06,
      "loss": 0.0122,
      "step": 1716160
    },
    {
      "epoch": 2.8085662104043516,
      "grad_norm": 0.22113963961601257,
      "learning_rate": 4.411747263825834e-06,
      "loss": 0.0102,
      "step": 1716180
    },
    {
      "epoch": 2.808598940843005,
      "grad_norm": 0.2087882161140442,
      "learning_rate": 4.411681371612317e-06,
      "loss": 0.0105,
      "step": 1716200
    },
    {
      "epoch": 2.8086316712816584,
      "grad_norm": 0.14731140434741974,
      "learning_rate": 4.411615479398799e-06,
      "loss": 0.0123,
      "step": 1716220
    },
    {
      "epoch": 2.808664401720312,
      "grad_norm": 0.2931983172893524,
      "learning_rate": 4.411549587185283e-06,
      "loss": 0.0134,
      "step": 1716240
    },
    {
      "epoch": 2.808697132158965,
      "grad_norm": 0.3084896206855774,
      "learning_rate": 4.411483694971766e-06,
      "loss": 0.0129,
      "step": 1716260
    },
    {
      "epoch": 2.8087298625976187,
      "grad_norm": 0.39470231533050537,
      "learning_rate": 4.4114178027582484e-06,
      "loss": 0.015,
      "step": 1716280
    },
    {
      "epoch": 2.808762593036272,
      "grad_norm": 0.15859335660934448,
      "learning_rate": 4.411351910544731e-06,
      "loss": 0.0146,
      "step": 1716300
    },
    {
      "epoch": 2.808795323474925,
      "grad_norm": 0.38740336894989014,
      "learning_rate": 4.411286018331214e-06,
      "loss": 0.011,
      "step": 1716320
    },
    {
      "epoch": 2.8088280539135786,
      "grad_norm": 0.6071226000785828,
      "learning_rate": 4.411220126117697e-06,
      "loss": 0.0118,
      "step": 1716340
    },
    {
      "epoch": 2.8088607843522317,
      "grad_norm": 0.8672307133674622,
      "learning_rate": 4.411154233904179e-06,
      "loss": 0.0202,
      "step": 1716360
    },
    {
      "epoch": 2.8088935147908853,
      "grad_norm": 0.2194012999534607,
      "learning_rate": 4.411088341690663e-06,
      "loss": 0.011,
      "step": 1716380
    },
    {
      "epoch": 2.8089262452295385,
      "grad_norm": 0.047653183341026306,
      "learning_rate": 4.411022449477146e-06,
      "loss": 0.0126,
      "step": 1716400
    },
    {
      "epoch": 2.808958975668192,
      "grad_norm": 0.1699700802564621,
      "learning_rate": 4.4109565572636285e-06,
      "loss": 0.0177,
      "step": 1716420
    },
    {
      "epoch": 2.8089917061068452,
      "grad_norm": 0.32619211077690125,
      "learning_rate": 4.410890665050111e-06,
      "loss": 0.0158,
      "step": 1716440
    },
    {
      "epoch": 2.8090244365454984,
      "grad_norm": 0.3759007453918457,
      "learning_rate": 4.410824772836595e-06,
      "loss": 0.0144,
      "step": 1716460
    },
    {
      "epoch": 2.809057166984152,
      "grad_norm": 0.4230576753616333,
      "learning_rate": 4.4107588806230775e-06,
      "loss": 0.0113,
      "step": 1716480
    },
    {
      "epoch": 2.809089897422805,
      "grad_norm": 0.22373166680335999,
      "learning_rate": 4.41069298840956e-06,
      "loss": 0.0114,
      "step": 1716500
    },
    {
      "epoch": 2.8091226278614587,
      "grad_norm": 0.3411893844604492,
      "learning_rate": 4.410627096196043e-06,
      "loss": 0.0187,
      "step": 1716520
    },
    {
      "epoch": 2.809155358300112,
      "grad_norm": 0.6569787859916687,
      "learning_rate": 4.410561203982526e-06,
      "loss": 0.0098,
      "step": 1716540
    },
    {
      "epoch": 2.8091880887387655,
      "grad_norm": 0.5949827432632446,
      "learning_rate": 4.4104953117690085e-06,
      "loss": 0.0151,
      "step": 1716560
    },
    {
      "epoch": 2.8092208191774186,
      "grad_norm": 0.5138487815856934,
      "learning_rate": 4.410429419555491e-06,
      "loss": 0.0137,
      "step": 1716580
    },
    {
      "epoch": 2.8092535496160718,
      "grad_norm": 0.1882788985967636,
      "learning_rate": 4.410363527341974e-06,
      "loss": 0.0125,
      "step": 1716600
    },
    {
      "epoch": 2.8092862800547254,
      "grad_norm": 0.13500277698040009,
      "learning_rate": 4.410297635128457e-06,
      "loss": 0.0104,
      "step": 1716620
    },
    {
      "epoch": 2.8093190104933785,
      "grad_norm": 0.29244107007980347,
      "learning_rate": 4.41023174291494e-06,
      "loss": 0.0134,
      "step": 1716640
    },
    {
      "epoch": 2.809351740932032,
      "grad_norm": 0.17878197133541107,
      "learning_rate": 4.410165850701423e-06,
      "loss": 0.0129,
      "step": 1716660
    },
    {
      "epoch": 2.8093844713706853,
      "grad_norm": 6.316009044647217,
      "learning_rate": 4.410099958487906e-06,
      "loss": 0.0111,
      "step": 1716680
    },
    {
      "epoch": 2.809417201809339,
      "grad_norm": 0.3421652317047119,
      "learning_rate": 4.4100340662743885e-06,
      "loss": 0.0185,
      "step": 1716700
    },
    {
      "epoch": 2.809449932247992,
      "grad_norm": 0.4218827188014984,
      "learning_rate": 4.409968174060872e-06,
      "loss": 0.0114,
      "step": 1716720
    },
    {
      "epoch": 2.809482662686645,
      "grad_norm": 0.7321317791938782,
      "learning_rate": 4.409902281847355e-06,
      "loss": 0.0142,
      "step": 1716740
    },
    {
      "epoch": 2.8095153931252987,
      "grad_norm": 0.23654457926750183,
      "learning_rate": 4.409836389633838e-06,
      "loss": 0.0133,
      "step": 1716760
    },
    {
      "epoch": 2.809548123563952,
      "grad_norm": 0.1449350267648697,
      "learning_rate": 4.40977049742032e-06,
      "loss": 0.0092,
      "step": 1716780
    },
    {
      "epoch": 2.8095808540026055,
      "grad_norm": 0.29981744289398193,
      "learning_rate": 4.409704605206803e-06,
      "loss": 0.0138,
      "step": 1716800
    },
    {
      "epoch": 2.8096135844412586,
      "grad_norm": 0.6926199793815613,
      "learning_rate": 4.409638712993286e-06,
      "loss": 0.0102,
      "step": 1716820
    },
    {
      "epoch": 2.8096463148799122,
      "grad_norm": 0.24666163325309753,
      "learning_rate": 4.4095728207797685e-06,
      "loss": 0.0131,
      "step": 1716840
    },
    {
      "epoch": 2.8096790453185654,
      "grad_norm": 0.5849672555923462,
      "learning_rate": 4.409506928566252e-06,
      "loss": 0.014,
      "step": 1716860
    },
    {
      "epoch": 2.8097117757572185,
      "grad_norm": 0.12628962099552155,
      "learning_rate": 4.409441036352735e-06,
      "loss": 0.0092,
      "step": 1716880
    },
    {
      "epoch": 2.809744506195872,
      "grad_norm": 0.26981526613235474,
      "learning_rate": 4.409375144139218e-06,
      "loss": 0.014,
      "step": 1716900
    },
    {
      "epoch": 2.8097772366345253,
      "grad_norm": 1.4047582149505615,
      "learning_rate": 4.4093092519257e-06,
      "loss": 0.0168,
      "step": 1716920
    },
    {
      "epoch": 2.809809967073179,
      "grad_norm": 0.13733525574207306,
      "learning_rate": 4.409243359712183e-06,
      "loss": 0.0084,
      "step": 1716940
    },
    {
      "epoch": 2.809842697511832,
      "grad_norm": 0.522281289100647,
      "learning_rate": 4.409177467498666e-06,
      "loss": 0.0158,
      "step": 1716960
    },
    {
      "epoch": 2.8098754279504856,
      "grad_norm": 0.24498869478702545,
      "learning_rate": 4.4091115752851486e-06,
      "loss": 0.0134,
      "step": 1716980
    },
    {
      "epoch": 2.8099081583891388,
      "grad_norm": 0.6674491763114929,
      "learning_rate": 4.409045683071631e-06,
      "loss": 0.0117,
      "step": 1717000
    },
    {
      "epoch": 2.809940888827792,
      "grad_norm": 0.5228268504142761,
      "learning_rate": 4.408979790858114e-06,
      "loss": 0.0175,
      "step": 1717020
    },
    {
      "epoch": 2.8099736192664455,
      "grad_norm": 0.22484752535820007,
      "learning_rate": 4.408913898644598e-06,
      "loss": 0.0215,
      "step": 1717040
    },
    {
      "epoch": 2.8100063497050987,
      "grad_norm": 0.4959503412246704,
      "learning_rate": 4.40884800643108e-06,
      "loss": 0.0143,
      "step": 1717060
    },
    {
      "epoch": 2.810039080143752,
      "grad_norm": 0.3701442778110504,
      "learning_rate": 4.408782114217563e-06,
      "loss": 0.0128,
      "step": 1717080
    },
    {
      "epoch": 2.8100718105824054,
      "grad_norm": 0.695406973361969,
      "learning_rate": 4.408716222004047e-06,
      "loss": 0.0135,
      "step": 1717100
    },
    {
      "epoch": 2.810104541021059,
      "grad_norm": 0.4076096713542938,
      "learning_rate": 4.4086503297905294e-06,
      "loss": 0.0145,
      "step": 1717120
    },
    {
      "epoch": 2.810137271459712,
      "grad_norm": 0.47460243105888367,
      "learning_rate": 4.408584437577012e-06,
      "loss": 0.0124,
      "step": 1717140
    },
    {
      "epoch": 2.8101700018983653,
      "grad_norm": 0.20982924103736877,
      "learning_rate": 4.408518545363495e-06,
      "loss": 0.0155,
      "step": 1717160
    },
    {
      "epoch": 2.810202732337019,
      "grad_norm": 0.5925379991531372,
      "learning_rate": 4.408452653149978e-06,
      "loss": 0.0104,
      "step": 1717180
    },
    {
      "epoch": 2.810235462775672,
      "grad_norm": 0.1875869482755661,
      "learning_rate": 4.40838676093646e-06,
      "loss": 0.011,
      "step": 1717200
    },
    {
      "epoch": 2.810268193214325,
      "grad_norm": 0.3854079842567444,
      "learning_rate": 4.408320868722943e-06,
      "loss": 0.0112,
      "step": 1717220
    },
    {
      "epoch": 2.810300923652979,
      "grad_norm": 0.24926087260246277,
      "learning_rate": 4.408254976509426e-06,
      "loss": 0.0185,
      "step": 1717240
    },
    {
      "epoch": 2.8103336540916324,
      "grad_norm": 0.462993323802948,
      "learning_rate": 4.4081890842959095e-06,
      "loss": 0.0161,
      "step": 1717260
    },
    {
      "epoch": 2.8103663845302855,
      "grad_norm": 0.6637231707572937,
      "learning_rate": 4.408123192082392e-06,
      "loss": 0.0088,
      "step": 1717280
    },
    {
      "epoch": 2.8103991149689387,
      "grad_norm": 0.6476758122444153,
      "learning_rate": 4.408057299868875e-06,
      "loss": 0.0137,
      "step": 1717300
    },
    {
      "epoch": 2.8104318454075923,
      "grad_norm": 0.28028690814971924,
      "learning_rate": 4.407991407655358e-06,
      "loss": 0.0111,
      "step": 1717320
    },
    {
      "epoch": 2.8104645758462454,
      "grad_norm": 0.2558339536190033,
      "learning_rate": 4.4079255154418404e-06,
      "loss": 0.0115,
      "step": 1717340
    },
    {
      "epoch": 2.8104973062848986,
      "grad_norm": 0.21164445579051971,
      "learning_rate": 4.407859623228323e-06,
      "loss": 0.0131,
      "step": 1717360
    },
    {
      "epoch": 2.810530036723552,
      "grad_norm": 0.2295551747083664,
      "learning_rate": 4.407793731014806e-06,
      "loss": 0.0173,
      "step": 1717380
    },
    {
      "epoch": 2.8105627671622058,
      "grad_norm": 0.333504319190979,
      "learning_rate": 4.407727838801289e-06,
      "loss": 0.012,
      "step": 1717400
    },
    {
      "epoch": 2.810595497600859,
      "grad_norm": 0.4915606677532196,
      "learning_rate": 4.407661946587772e-06,
      "loss": 0.0115,
      "step": 1717420
    },
    {
      "epoch": 2.810628228039512,
      "grad_norm": 0.31697770953178406,
      "learning_rate": 4.407596054374255e-06,
      "loss": 0.0132,
      "step": 1717440
    },
    {
      "epoch": 2.8106609584781657,
      "grad_norm": 0.11898326873779297,
      "learning_rate": 4.407530162160738e-06,
      "loss": 0.0089,
      "step": 1717460
    },
    {
      "epoch": 2.810693688916819,
      "grad_norm": 1.0335620641708374,
      "learning_rate": 4.4074642699472205e-06,
      "loss": 0.0107,
      "step": 1717480
    },
    {
      "epoch": 2.810726419355472,
      "grad_norm": 0.7322537899017334,
      "learning_rate": 4.407398377733704e-06,
      "loss": 0.0198,
      "step": 1717500
    },
    {
      "epoch": 2.8107591497941256,
      "grad_norm": 0.26981255412101746,
      "learning_rate": 4.407332485520187e-06,
      "loss": 0.0113,
      "step": 1717520
    },
    {
      "epoch": 2.810791880232779,
      "grad_norm": 0.18957795202732086,
      "learning_rate": 4.4072665933066695e-06,
      "loss": 0.0125,
      "step": 1717540
    },
    {
      "epoch": 2.8108246106714323,
      "grad_norm": 0.7051098942756653,
      "learning_rate": 4.407200701093152e-06,
      "loss": 0.0167,
      "step": 1717560
    },
    {
      "epoch": 2.8108573411100854,
      "grad_norm": 1.6392279863357544,
      "learning_rate": 4.407134808879635e-06,
      "loss": 0.0127,
      "step": 1717580
    },
    {
      "epoch": 2.810890071548739,
      "grad_norm": 0.321194589138031,
      "learning_rate": 4.407068916666118e-06,
      "loss": 0.0123,
      "step": 1717600
    },
    {
      "epoch": 2.810922801987392,
      "grad_norm": 0.4079321622848511,
      "learning_rate": 4.4070030244526005e-06,
      "loss": 0.0136,
      "step": 1717620
    },
    {
      "epoch": 2.8109555324260453,
      "grad_norm": 0.12693673372268677,
      "learning_rate": 4.406937132239083e-06,
      "loss": 0.0117,
      "step": 1717640
    },
    {
      "epoch": 2.810988262864699,
      "grad_norm": 0.1734393984079361,
      "learning_rate": 4.406871240025567e-06,
      "loss": 0.0105,
      "step": 1717660
    },
    {
      "epoch": 2.811020993303352,
      "grad_norm": 0.10966550558805466,
      "learning_rate": 4.4068053478120495e-06,
      "loss": 0.0168,
      "step": 1717680
    },
    {
      "epoch": 2.8110537237420057,
      "grad_norm": 0.22403091192245483,
      "learning_rate": 4.406739455598532e-06,
      "loss": 0.0171,
      "step": 1717700
    },
    {
      "epoch": 2.811086454180659,
      "grad_norm": 0.1961040049791336,
      "learning_rate": 4.406673563385015e-06,
      "loss": 0.0147,
      "step": 1717720
    },
    {
      "epoch": 2.8111191846193124,
      "grad_norm": 0.4807260036468506,
      "learning_rate": 4.406607671171498e-06,
      "loss": 0.0099,
      "step": 1717740
    },
    {
      "epoch": 2.8111519150579656,
      "grad_norm": 0.49567800760269165,
      "learning_rate": 4.4065417789579805e-06,
      "loss": 0.0188,
      "step": 1717760
    },
    {
      "epoch": 2.8111846454966187,
      "grad_norm": 0.4044230878353119,
      "learning_rate": 4.406475886744464e-06,
      "loss": 0.0132,
      "step": 1717780
    },
    {
      "epoch": 2.8112173759352723,
      "grad_norm": 2.7582390308380127,
      "learning_rate": 4.406409994530947e-06,
      "loss": 0.0107,
      "step": 1717800
    },
    {
      "epoch": 2.8112501063739255,
      "grad_norm": 0.0752127394080162,
      "learning_rate": 4.4063441023174296e-06,
      "loss": 0.013,
      "step": 1717820
    },
    {
      "epoch": 2.811282836812579,
      "grad_norm": 0.759124755859375,
      "learning_rate": 4.406278210103912e-06,
      "loss": 0.0144,
      "step": 1717840
    },
    {
      "epoch": 2.811315567251232,
      "grad_norm": 0.23800691962242126,
      "learning_rate": 4.406212317890395e-06,
      "loss": 0.0091,
      "step": 1717860
    },
    {
      "epoch": 2.811348297689886,
      "grad_norm": 0.15025107562541962,
      "learning_rate": 4.406146425676879e-06,
      "loss": 0.0137,
      "step": 1717880
    },
    {
      "epoch": 2.811381028128539,
      "grad_norm": 0.4803844690322876,
      "learning_rate": 4.406080533463361e-06,
      "loss": 0.0165,
      "step": 1717900
    },
    {
      "epoch": 2.811413758567192,
      "grad_norm": 0.25656527280807495,
      "learning_rate": 4.406014641249844e-06,
      "loss": 0.0106,
      "step": 1717920
    },
    {
      "epoch": 2.8114464890058457,
      "grad_norm": 0.29983535408973694,
      "learning_rate": 4.405948749036327e-06,
      "loss": 0.0072,
      "step": 1717940
    },
    {
      "epoch": 2.811479219444499,
      "grad_norm": 0.5378651022911072,
      "learning_rate": 4.40588285682281e-06,
      "loss": 0.0154,
      "step": 1717960
    },
    {
      "epoch": 2.8115119498831525,
      "grad_norm": 0.4811113178730011,
      "learning_rate": 4.405816964609292e-06,
      "loss": 0.0164,
      "step": 1717980
    },
    {
      "epoch": 2.8115446803218056,
      "grad_norm": 0.4939514696598053,
      "learning_rate": 4.405751072395775e-06,
      "loss": 0.013,
      "step": 1718000
    },
    {
      "epoch": 2.811577410760459,
      "grad_norm": 0.15539216995239258,
      "learning_rate": 4.405685180182258e-06,
      "loss": 0.0126,
      "step": 1718020
    },
    {
      "epoch": 2.8116101411991123,
      "grad_norm": 0.24626770615577698,
      "learning_rate": 4.4056192879687406e-06,
      "loss": 0.0201,
      "step": 1718040
    },
    {
      "epoch": 2.8116428716377655,
      "grad_norm": 0.37570518255233765,
      "learning_rate": 4.405553395755224e-06,
      "loss": 0.0113,
      "step": 1718060
    },
    {
      "epoch": 2.811675602076419,
      "grad_norm": 0.16954809427261353,
      "learning_rate": 4.405487503541707e-06,
      "loss": 0.0162,
      "step": 1718080
    },
    {
      "epoch": 2.8117083325150722,
      "grad_norm": 0.5250110030174255,
      "learning_rate": 4.40542161132819e-06,
      "loss": 0.0174,
      "step": 1718100
    },
    {
      "epoch": 2.811741062953726,
      "grad_norm": 0.7194532752037048,
      "learning_rate": 4.405355719114672e-06,
      "loss": 0.0151,
      "step": 1718120
    },
    {
      "epoch": 2.811773793392379,
      "grad_norm": 0.4312145411968231,
      "learning_rate": 4.405289826901156e-06,
      "loss": 0.0164,
      "step": 1718140
    },
    {
      "epoch": 2.8118065238310326,
      "grad_norm": 0.7195653915405273,
      "learning_rate": 4.405223934687639e-06,
      "loss": 0.0186,
      "step": 1718160
    },
    {
      "epoch": 2.8118392542696857,
      "grad_norm": 0.5004002451896667,
      "learning_rate": 4.4051580424741214e-06,
      "loss": 0.0136,
      "step": 1718180
    },
    {
      "epoch": 2.811871984708339,
      "grad_norm": 0.26325124502182007,
      "learning_rate": 4.405092150260604e-06,
      "loss": 0.0172,
      "step": 1718200
    },
    {
      "epoch": 2.8119047151469925,
      "grad_norm": 0.13477970659732819,
      "learning_rate": 4.405026258047087e-06,
      "loss": 0.0137,
      "step": 1718220
    },
    {
      "epoch": 2.8119374455856456,
      "grad_norm": 0.28306272625923157,
      "learning_rate": 4.40496036583357e-06,
      "loss": 0.0161,
      "step": 1718240
    },
    {
      "epoch": 2.811970176024299,
      "grad_norm": 1.312567114830017,
      "learning_rate": 4.404894473620052e-06,
      "loss": 0.0204,
      "step": 1718260
    },
    {
      "epoch": 2.8120029064629524,
      "grad_norm": 0.7662696242332458,
      "learning_rate": 4.404828581406536e-06,
      "loss": 0.0145,
      "step": 1718280
    },
    {
      "epoch": 2.812035636901606,
      "grad_norm": 0.7714760899543762,
      "learning_rate": 4.404762689193019e-06,
      "loss": 0.0141,
      "step": 1718300
    },
    {
      "epoch": 2.812068367340259,
      "grad_norm": 0.8892701268196106,
      "learning_rate": 4.4046967969795015e-06,
      "loss": 0.0102,
      "step": 1718320
    },
    {
      "epoch": 2.8121010977789123,
      "grad_norm": 0.31685584783554077,
      "learning_rate": 4.404630904765984e-06,
      "loss": 0.0165,
      "step": 1718340
    },
    {
      "epoch": 2.812133828217566,
      "grad_norm": 0.4313753843307495,
      "learning_rate": 4.404565012552467e-06,
      "loss": 0.0132,
      "step": 1718360
    },
    {
      "epoch": 2.812166558656219,
      "grad_norm": 0.8927175402641296,
      "learning_rate": 4.40449912033895e-06,
      "loss": 0.0148,
      "step": 1718380
    },
    {
      "epoch": 2.8121992890948726,
      "grad_norm": 0.20236219465732574,
      "learning_rate": 4.404433228125432e-06,
      "loss": 0.0179,
      "step": 1718400
    },
    {
      "epoch": 2.8122320195335258,
      "grad_norm": 0.20495639741420746,
      "learning_rate": 4.404367335911915e-06,
      "loss": 0.0114,
      "step": 1718420
    },
    {
      "epoch": 2.8122647499721793,
      "grad_norm": 0.11716561019420624,
      "learning_rate": 4.404301443698398e-06,
      "loss": 0.0103,
      "step": 1718440
    },
    {
      "epoch": 2.8122974804108325,
      "grad_norm": 0.5943017601966858,
      "learning_rate": 4.4042355514848815e-06,
      "loss": 0.0089,
      "step": 1718460
    },
    {
      "epoch": 2.8123302108494856,
      "grad_norm": 0.1462431102991104,
      "learning_rate": 4.404169659271364e-06,
      "loss": 0.0154,
      "step": 1718480
    },
    {
      "epoch": 2.8123629412881392,
      "grad_norm": 0.5628013610839844,
      "learning_rate": 4.404103767057847e-06,
      "loss": 0.0136,
      "step": 1718500
    },
    {
      "epoch": 2.8123956717267924,
      "grad_norm": 0.32592201232910156,
      "learning_rate": 4.4040378748443305e-06,
      "loss": 0.012,
      "step": 1718520
    },
    {
      "epoch": 2.812428402165446,
      "grad_norm": 0.4019770622253418,
      "learning_rate": 4.403971982630813e-06,
      "loss": 0.0134,
      "step": 1718540
    },
    {
      "epoch": 2.812461132604099,
      "grad_norm": 0.4892861843109131,
      "learning_rate": 4.403906090417296e-06,
      "loss": 0.0149,
      "step": 1718560
    },
    {
      "epoch": 2.8124938630427527,
      "grad_norm": 0.3568935692310333,
      "learning_rate": 4.403840198203779e-06,
      "loss": 0.0145,
      "step": 1718580
    },
    {
      "epoch": 2.812526593481406,
      "grad_norm": 0.8162736296653748,
      "learning_rate": 4.4037743059902615e-06,
      "loss": 0.0124,
      "step": 1718600
    },
    {
      "epoch": 2.812559323920059,
      "grad_norm": 0.2179790586233139,
      "learning_rate": 4.403708413776744e-06,
      "loss": 0.0146,
      "step": 1718620
    },
    {
      "epoch": 2.8125920543587126,
      "grad_norm": 0.45609405636787415,
      "learning_rate": 4.403642521563227e-06,
      "loss": 0.0139,
      "step": 1718640
    },
    {
      "epoch": 2.8126247847973658,
      "grad_norm": 0.9333184361457825,
      "learning_rate": 4.40357662934971e-06,
      "loss": 0.0148,
      "step": 1718660
    },
    {
      "epoch": 2.812657515236019,
      "grad_norm": 0.14398568868637085,
      "learning_rate": 4.403510737136193e-06,
      "loss": 0.0172,
      "step": 1718680
    },
    {
      "epoch": 2.8126902456746725,
      "grad_norm": 0.1729142814874649,
      "learning_rate": 4.403444844922676e-06,
      "loss": 0.0157,
      "step": 1718700
    },
    {
      "epoch": 2.812722976113326,
      "grad_norm": 0.3171314001083374,
      "learning_rate": 4.403378952709159e-06,
      "loss": 0.0125,
      "step": 1718720
    },
    {
      "epoch": 2.8127557065519793,
      "grad_norm": 0.5582818388938904,
      "learning_rate": 4.4033130604956415e-06,
      "loss": 0.014,
      "step": 1718740
    },
    {
      "epoch": 2.8127884369906324,
      "grad_norm": 0.19854801893234253,
      "learning_rate": 4.403247168282124e-06,
      "loss": 0.0138,
      "step": 1718760
    },
    {
      "epoch": 2.812821167429286,
      "grad_norm": 0.08558986335992813,
      "learning_rate": 4.403181276068607e-06,
      "loss": 0.0146,
      "step": 1718780
    },
    {
      "epoch": 2.812853897867939,
      "grad_norm": 0.30140286684036255,
      "learning_rate": 4.40311538385509e-06,
      "loss": 0.0101,
      "step": 1718800
    },
    {
      "epoch": 2.8128866283065923,
      "grad_norm": 0.06489132344722748,
      "learning_rate": 4.4030494916415725e-06,
      "loss": 0.0109,
      "step": 1718820
    },
    {
      "epoch": 2.812919358745246,
      "grad_norm": 0.17864011228084564,
      "learning_rate": 4.402983599428056e-06,
      "loss": 0.0133,
      "step": 1718840
    },
    {
      "epoch": 2.8129520891838995,
      "grad_norm": 0.23207798600196838,
      "learning_rate": 4.402917707214539e-06,
      "loss": 0.0139,
      "step": 1718860
    },
    {
      "epoch": 2.8129848196225526,
      "grad_norm": 0.16611799597740173,
      "learning_rate": 4.4028518150010216e-06,
      "loss": 0.017,
      "step": 1718880
    },
    {
      "epoch": 2.813017550061206,
      "grad_norm": 0.3380039632320404,
      "learning_rate": 4.402785922787504e-06,
      "loss": 0.0104,
      "step": 1718900
    },
    {
      "epoch": 2.8130502804998594,
      "grad_norm": 0.22046461701393127,
      "learning_rate": 4.402720030573988e-06,
      "loss": 0.0141,
      "step": 1718920
    },
    {
      "epoch": 2.8130830109385125,
      "grad_norm": 0.4067254662513733,
      "learning_rate": 4.402654138360471e-06,
      "loss": 0.0163,
      "step": 1718940
    },
    {
      "epoch": 2.8131157413771657,
      "grad_norm": 0.454637736082077,
      "learning_rate": 4.402588246146953e-06,
      "loss": 0.0124,
      "step": 1718960
    },
    {
      "epoch": 2.8131484718158193,
      "grad_norm": 2.0166306495666504,
      "learning_rate": 4.402522353933436e-06,
      "loss": 0.0138,
      "step": 1718980
    },
    {
      "epoch": 2.813181202254473,
      "grad_norm": 0.2443464994430542,
      "learning_rate": 4.402456461719919e-06,
      "loss": 0.0122,
      "step": 1719000
    },
    {
      "epoch": 2.813213932693126,
      "grad_norm": 0.11767210811376572,
      "learning_rate": 4.402390569506402e-06,
      "loss": 0.012,
      "step": 1719020
    },
    {
      "epoch": 2.813246663131779,
      "grad_norm": 0.13861338794231415,
      "learning_rate": 4.402324677292884e-06,
      "loss": 0.0174,
      "step": 1719040
    },
    {
      "epoch": 2.8132793935704328,
      "grad_norm": 1.7355648279190063,
      "learning_rate": 4.402258785079367e-06,
      "loss": 0.0134,
      "step": 1719060
    },
    {
      "epoch": 2.813312124009086,
      "grad_norm": 0.3701608180999756,
      "learning_rate": 4.402192892865851e-06,
      "loss": 0.0142,
      "step": 1719080
    },
    {
      "epoch": 2.813344854447739,
      "grad_norm": 0.6751012802124023,
      "learning_rate": 4.402127000652333e-06,
      "loss": 0.0167,
      "step": 1719100
    },
    {
      "epoch": 2.8133775848863927,
      "grad_norm": 0.32048141956329346,
      "learning_rate": 4.402061108438816e-06,
      "loss": 0.0095,
      "step": 1719120
    },
    {
      "epoch": 2.8134103153250463,
      "grad_norm": 0.1830378770828247,
      "learning_rate": 4.401995216225299e-06,
      "loss": 0.0159,
      "step": 1719140
    },
    {
      "epoch": 2.8134430457636994,
      "grad_norm": 0.2633723020553589,
      "learning_rate": 4.401929324011782e-06,
      "loss": 0.0122,
      "step": 1719160
    },
    {
      "epoch": 2.8134757762023526,
      "grad_norm": 1.0092339515686035,
      "learning_rate": 4.401863431798265e-06,
      "loss": 0.009,
      "step": 1719180
    },
    {
      "epoch": 2.813508506641006,
      "grad_norm": 0.49076491594314575,
      "learning_rate": 4.401797539584748e-06,
      "loss": 0.0109,
      "step": 1719200
    },
    {
      "epoch": 2.8135412370796593,
      "grad_norm": 0.41441038250923157,
      "learning_rate": 4.401731647371231e-06,
      "loss": 0.0173,
      "step": 1719220
    },
    {
      "epoch": 2.8135739675183125,
      "grad_norm": 0.7572680115699768,
      "learning_rate": 4.401665755157713e-06,
      "loss": 0.0113,
      "step": 1719240
    },
    {
      "epoch": 2.813606697956966,
      "grad_norm": 0.09834142029285431,
      "learning_rate": 4.401599862944196e-06,
      "loss": 0.0115,
      "step": 1719260
    },
    {
      "epoch": 2.813639428395619,
      "grad_norm": 1.1234259605407715,
      "learning_rate": 4.401533970730679e-06,
      "loss": 0.0114,
      "step": 1719280
    },
    {
      "epoch": 2.813672158834273,
      "grad_norm": 0.5711905360221863,
      "learning_rate": 4.4014680785171625e-06,
      "loss": 0.0181,
      "step": 1719300
    },
    {
      "epoch": 2.813704889272926,
      "grad_norm": 0.2057340443134308,
      "learning_rate": 4.401402186303645e-06,
      "loss": 0.0123,
      "step": 1719320
    },
    {
      "epoch": 2.8137376197115795,
      "grad_norm": 0.16285650432109833,
      "learning_rate": 4.401336294090128e-06,
      "loss": 0.0123,
      "step": 1719340
    },
    {
      "epoch": 2.8137703501502327,
      "grad_norm": 0.21010060608386993,
      "learning_rate": 4.401270401876611e-06,
      "loss": 0.0108,
      "step": 1719360
    },
    {
      "epoch": 2.813803080588886,
      "grad_norm": 0.8244566917419434,
      "learning_rate": 4.4012045096630934e-06,
      "loss": 0.0158,
      "step": 1719380
    },
    {
      "epoch": 2.8138358110275394,
      "grad_norm": 0.7133608460426331,
      "learning_rate": 4.401138617449576e-06,
      "loss": 0.0129,
      "step": 1719400
    },
    {
      "epoch": 2.8138685414661926,
      "grad_norm": 0.2518448829650879,
      "learning_rate": 4.401072725236059e-06,
      "loss": 0.0148,
      "step": 1719420
    },
    {
      "epoch": 2.813901271904846,
      "grad_norm": 0.1435396820306778,
      "learning_rate": 4.401006833022542e-06,
      "loss": 0.0126,
      "step": 1719440
    },
    {
      "epoch": 2.8139340023434993,
      "grad_norm": 0.2958832383155823,
      "learning_rate": 4.400940940809024e-06,
      "loss": 0.0152,
      "step": 1719460
    },
    {
      "epoch": 2.813966732782153,
      "grad_norm": 1.399510145187378,
      "learning_rate": 4.400875048595508e-06,
      "loss": 0.0161,
      "step": 1719480
    },
    {
      "epoch": 2.813999463220806,
      "grad_norm": 0.26847076416015625,
      "learning_rate": 4.400809156381991e-06,
      "loss": 0.0116,
      "step": 1719500
    },
    {
      "epoch": 2.8140321936594592,
      "grad_norm": 0.3265659213066101,
      "learning_rate": 4.4007432641684735e-06,
      "loss": 0.0123,
      "step": 1719520
    },
    {
      "epoch": 2.814064924098113,
      "grad_norm": 0.7757180333137512,
      "learning_rate": 4.400677371954957e-06,
      "loss": 0.0179,
      "step": 1719540
    },
    {
      "epoch": 2.814097654536766,
      "grad_norm": 0.266808420419693,
      "learning_rate": 4.40061147974144e-06,
      "loss": 0.0114,
      "step": 1719560
    },
    {
      "epoch": 2.8141303849754196,
      "grad_norm": 0.5554044842720032,
      "learning_rate": 4.4005455875279225e-06,
      "loss": 0.0113,
      "step": 1719580
    },
    {
      "epoch": 2.8141631154140727,
      "grad_norm": 0.44378164410591125,
      "learning_rate": 4.400479695314405e-06,
      "loss": 0.008,
      "step": 1719600
    },
    {
      "epoch": 2.8141958458527263,
      "grad_norm": 0.4309898614883423,
      "learning_rate": 4.400413803100888e-06,
      "loss": 0.01,
      "step": 1719620
    },
    {
      "epoch": 2.8142285762913795,
      "grad_norm": 0.6173993945121765,
      "learning_rate": 4.400347910887371e-06,
      "loss": 0.0113,
      "step": 1719640
    },
    {
      "epoch": 2.8142613067300326,
      "grad_norm": 0.8686444163322449,
      "learning_rate": 4.4002820186738535e-06,
      "loss": 0.0106,
      "step": 1719660
    },
    {
      "epoch": 2.814294037168686,
      "grad_norm": 0.4921317994594574,
      "learning_rate": 4.400216126460336e-06,
      "loss": 0.0118,
      "step": 1719680
    },
    {
      "epoch": 2.8143267676073394,
      "grad_norm": 0.1793428212404251,
      "learning_rate": 4.40015023424682e-06,
      "loss": 0.0158,
      "step": 1719700
    },
    {
      "epoch": 2.814359498045993,
      "grad_norm": 0.47081586718559265,
      "learning_rate": 4.4000843420333026e-06,
      "loss": 0.0175,
      "step": 1719720
    },
    {
      "epoch": 2.814392228484646,
      "grad_norm": 0.11038066446781158,
      "learning_rate": 4.400018449819785e-06,
      "loss": 0.0208,
      "step": 1719740
    },
    {
      "epoch": 2.8144249589232997,
      "grad_norm": 0.44834086298942566,
      "learning_rate": 4.399952557606268e-06,
      "loss": 0.0176,
      "step": 1719760
    },
    {
      "epoch": 2.814457689361953,
      "grad_norm": 0.8636510968208313,
      "learning_rate": 4.399886665392751e-06,
      "loss": 0.0109,
      "step": 1719780
    },
    {
      "epoch": 2.814490419800606,
      "grad_norm": 0.21870064735412598,
      "learning_rate": 4.3998207731792335e-06,
      "loss": 0.0117,
      "step": 1719800
    },
    {
      "epoch": 2.8145231502392596,
      "grad_norm": 0.8636655807495117,
      "learning_rate": 4.399754880965716e-06,
      "loss": 0.0128,
      "step": 1719820
    },
    {
      "epoch": 2.8145558806779127,
      "grad_norm": 1.0135854482650757,
      "learning_rate": 4.399688988752199e-06,
      "loss": 0.0182,
      "step": 1719840
    },
    {
      "epoch": 2.8145886111165663,
      "grad_norm": 0.13483679294586182,
      "learning_rate": 4.399623096538682e-06,
      "loss": 0.0118,
      "step": 1719860
    },
    {
      "epoch": 2.8146213415552195,
      "grad_norm": 0.3703237771987915,
      "learning_rate": 4.399557204325165e-06,
      "loss": 0.0149,
      "step": 1719880
    },
    {
      "epoch": 2.814654071993873,
      "grad_norm": 0.19923318922519684,
      "learning_rate": 4.399491312111648e-06,
      "loss": 0.0164,
      "step": 1719900
    },
    {
      "epoch": 2.8146868024325262,
      "grad_norm": 0.2596265971660614,
      "learning_rate": 4.399425419898131e-06,
      "loss": 0.0118,
      "step": 1719920
    },
    {
      "epoch": 2.8147195328711794,
      "grad_norm": 0.1838494837284088,
      "learning_rate": 4.399359527684614e-06,
      "loss": 0.017,
      "step": 1719940
    },
    {
      "epoch": 2.814752263309833,
      "grad_norm": 0.07387687265872955,
      "learning_rate": 4.399293635471097e-06,
      "loss": 0.0101,
      "step": 1719960
    },
    {
      "epoch": 2.814784993748486,
      "grad_norm": 0.3008809983730316,
      "learning_rate": 4.39922774325758e-06,
      "loss": 0.0138,
      "step": 1719980
    },
    {
      "epoch": 2.8148177241871397,
      "grad_norm": 0.30289211869239807,
      "learning_rate": 4.399161851044063e-06,
      "loss": 0.0135,
      "step": 1720000
    },
    {
      "epoch": 2.814850454625793,
      "grad_norm": 0.4974198341369629,
      "learning_rate": 4.399095958830545e-06,
      "loss": 0.0174,
      "step": 1720020
    },
    {
      "epoch": 2.8148831850644465,
      "grad_norm": 0.6582648158073425,
      "learning_rate": 4.399030066617028e-06,
      "loss": 0.0125,
      "step": 1720040
    },
    {
      "epoch": 2.8149159155030996,
      "grad_norm": 0.7607945799827576,
      "learning_rate": 4.398964174403511e-06,
      "loss": 0.0145,
      "step": 1720060
    },
    {
      "epoch": 2.8149486459417528,
      "grad_norm": 0.29900190234184265,
      "learning_rate": 4.3988982821899936e-06,
      "loss": 0.0122,
      "step": 1720080
    },
    {
      "epoch": 2.8149813763804064,
      "grad_norm": 0.1289307326078415,
      "learning_rate": 4.398832389976477e-06,
      "loss": 0.0148,
      "step": 1720100
    },
    {
      "epoch": 2.8150141068190595,
      "grad_norm": 0.3952549397945404,
      "learning_rate": 4.39876649776296e-06,
      "loss": 0.0158,
      "step": 1720120
    },
    {
      "epoch": 2.8150468372577127,
      "grad_norm": 1.3030779361724854,
      "learning_rate": 4.398700605549443e-06,
      "loss": 0.0168,
      "step": 1720140
    },
    {
      "epoch": 2.8150795676963662,
      "grad_norm": 0.2460283488035202,
      "learning_rate": 4.398634713335925e-06,
      "loss": 0.0165,
      "step": 1720160
    },
    {
      "epoch": 2.81511229813502,
      "grad_norm": 0.41576385498046875,
      "learning_rate": 4.398568821122408e-06,
      "loss": 0.0192,
      "step": 1720180
    },
    {
      "epoch": 2.815145028573673,
      "grad_norm": 0.13037820160388947,
      "learning_rate": 4.398502928908891e-06,
      "loss": 0.0103,
      "step": 1720200
    },
    {
      "epoch": 2.815177759012326,
      "grad_norm": 0.4368838965892792,
      "learning_rate": 4.398437036695374e-06,
      "loss": 0.0129,
      "step": 1720220
    },
    {
      "epoch": 2.8152104894509797,
      "grad_norm": 0.22404763102531433,
      "learning_rate": 4.398371144481857e-06,
      "loss": 0.0122,
      "step": 1720240
    },
    {
      "epoch": 2.815243219889633,
      "grad_norm": 0.32193225622177124,
      "learning_rate": 4.39830525226834e-06,
      "loss": 0.0136,
      "step": 1720260
    },
    {
      "epoch": 2.815275950328286,
      "grad_norm": 0.12062272429466248,
      "learning_rate": 4.398239360054823e-06,
      "loss": 0.0125,
      "step": 1720280
    },
    {
      "epoch": 2.8153086807669396,
      "grad_norm": 0.1214161291718483,
      "learning_rate": 4.398173467841305e-06,
      "loss": 0.0116,
      "step": 1720300
    },
    {
      "epoch": 2.8153414112055932,
      "grad_norm": 0.21393026411533356,
      "learning_rate": 4.398107575627789e-06,
      "loss": 0.012,
      "step": 1720320
    },
    {
      "epoch": 2.8153741416442464,
      "grad_norm": 0.08734489977359772,
      "learning_rate": 4.398041683414272e-06,
      "loss": 0.013,
      "step": 1720340
    },
    {
      "epoch": 2.8154068720828995,
      "grad_norm": 0.17012245953083038,
      "learning_rate": 4.3979757912007545e-06,
      "loss": 0.0119,
      "step": 1720360
    },
    {
      "epoch": 2.815439602521553,
      "grad_norm": 0.497734010219574,
      "learning_rate": 4.397909898987237e-06,
      "loss": 0.0158,
      "step": 1720380
    },
    {
      "epoch": 2.8154723329602063,
      "grad_norm": 0.13096293807029724,
      "learning_rate": 4.39784400677372e-06,
      "loss": 0.0167,
      "step": 1720400
    },
    {
      "epoch": 2.8155050633988594,
      "grad_norm": 0.4389505386352539,
      "learning_rate": 4.397778114560203e-06,
      "loss": 0.0164,
      "step": 1720420
    },
    {
      "epoch": 2.815537793837513,
      "grad_norm": 0.23970180749893188,
      "learning_rate": 4.397712222346685e-06,
      "loss": 0.0104,
      "step": 1720440
    },
    {
      "epoch": 2.8155705242761666,
      "grad_norm": 0.06749925762414932,
      "learning_rate": 4.397646330133168e-06,
      "loss": 0.0098,
      "step": 1720460
    },
    {
      "epoch": 2.8156032547148198,
      "grad_norm": 0.46824347972869873,
      "learning_rate": 4.397580437919651e-06,
      "loss": 0.0129,
      "step": 1720480
    },
    {
      "epoch": 2.815635985153473,
      "grad_norm": 0.3405317962169647,
      "learning_rate": 4.3975145457061345e-06,
      "loss": 0.0124,
      "step": 1720500
    },
    {
      "epoch": 2.8156687155921265,
      "grad_norm": 0.21852460503578186,
      "learning_rate": 4.397448653492617e-06,
      "loss": 0.0137,
      "step": 1720520
    },
    {
      "epoch": 2.8157014460307797,
      "grad_norm": 0.2839182913303375,
      "learning_rate": 4.3973827612791e-06,
      "loss": 0.0142,
      "step": 1720540
    },
    {
      "epoch": 2.815734176469433,
      "grad_norm": 0.5470746159553528,
      "learning_rate": 4.397316869065583e-06,
      "loss": 0.0183,
      "step": 1720560
    },
    {
      "epoch": 2.8157669069080864,
      "grad_norm": 0.0976877510547638,
      "learning_rate": 4.3972509768520654e-06,
      "loss": 0.0107,
      "step": 1720580
    },
    {
      "epoch": 2.81579963734674,
      "grad_norm": 0.19411107897758484,
      "learning_rate": 4.397185084638549e-06,
      "loss": 0.013,
      "step": 1720600
    },
    {
      "epoch": 2.815832367785393,
      "grad_norm": 0.2575244903564453,
      "learning_rate": 4.397119192425032e-06,
      "loss": 0.0197,
      "step": 1720620
    },
    {
      "epoch": 2.8158650982240463,
      "grad_norm": 0.335309237241745,
      "learning_rate": 4.3970533002115145e-06,
      "loss": 0.0105,
      "step": 1720640
    },
    {
      "epoch": 2.8158978286627,
      "grad_norm": 0.30250081419944763,
      "learning_rate": 4.396987407997997e-06,
      "loss": 0.0114,
      "step": 1720660
    },
    {
      "epoch": 2.815930559101353,
      "grad_norm": 0.9428250789642334,
      "learning_rate": 4.39692151578448e-06,
      "loss": 0.0166,
      "step": 1720680
    },
    {
      "epoch": 2.815963289540006,
      "grad_norm": 0.5177114009857178,
      "learning_rate": 4.396855623570963e-06,
      "loss": 0.0095,
      "step": 1720700
    },
    {
      "epoch": 2.81599601997866,
      "grad_norm": 0.35438454151153564,
      "learning_rate": 4.396789731357446e-06,
      "loss": 0.0091,
      "step": 1720720
    },
    {
      "epoch": 2.816028750417313,
      "grad_norm": 0.15788140892982483,
      "learning_rate": 4.396723839143929e-06,
      "loss": 0.0077,
      "step": 1720740
    },
    {
      "epoch": 2.8160614808559665,
      "grad_norm": 0.21751004457473755,
      "learning_rate": 4.396657946930412e-06,
      "loss": 0.0164,
      "step": 1720760
    },
    {
      "epoch": 2.8160942112946197,
      "grad_norm": 0.2808228135108948,
      "learning_rate": 4.3965920547168945e-06,
      "loss": 0.0167,
      "step": 1720780
    },
    {
      "epoch": 2.8161269417332733,
      "grad_norm": 0.4359976053237915,
      "learning_rate": 4.396526162503377e-06,
      "loss": 0.0126,
      "step": 1720800
    },
    {
      "epoch": 2.8161596721719264,
      "grad_norm": 0.2370121031999588,
      "learning_rate": 4.39646027028986e-06,
      "loss": 0.0163,
      "step": 1720820
    },
    {
      "epoch": 2.8161924026105796,
      "grad_norm": 0.34074559807777405,
      "learning_rate": 4.396394378076343e-06,
      "loss": 0.0093,
      "step": 1720840
    },
    {
      "epoch": 2.816225133049233,
      "grad_norm": 0.3684089779853821,
      "learning_rate": 4.3963284858628255e-06,
      "loss": 0.0109,
      "step": 1720860
    },
    {
      "epoch": 2.8162578634878863,
      "grad_norm": 0.15774741768836975,
      "learning_rate": 4.396262593649308e-06,
      "loss": 0.0114,
      "step": 1720880
    },
    {
      "epoch": 2.81629059392654,
      "grad_norm": 0.2741880714893341,
      "learning_rate": 4.396196701435792e-06,
      "loss": 0.0096,
      "step": 1720900
    },
    {
      "epoch": 2.816323324365193,
      "grad_norm": 0.853309154510498,
      "learning_rate": 4.3961308092222746e-06,
      "loss": 0.0122,
      "step": 1720920
    },
    {
      "epoch": 2.8163560548038467,
      "grad_norm": 0.18544302880764008,
      "learning_rate": 4.396064917008757e-06,
      "loss": 0.0157,
      "step": 1720940
    },
    {
      "epoch": 2.8163887852425,
      "grad_norm": 0.19367378950119019,
      "learning_rate": 4.395999024795241e-06,
      "loss": 0.0139,
      "step": 1720960
    },
    {
      "epoch": 2.816421515681153,
      "grad_norm": 0.20815661549568176,
      "learning_rate": 4.395933132581724e-06,
      "loss": 0.0102,
      "step": 1720980
    },
    {
      "epoch": 2.8164542461198065,
      "grad_norm": 0.15240564942359924,
      "learning_rate": 4.395867240368206e-06,
      "loss": 0.0136,
      "step": 1721000
    },
    {
      "epoch": 2.8164869765584597,
      "grad_norm": 0.3833310008049011,
      "learning_rate": 4.395801348154689e-06,
      "loss": 0.0107,
      "step": 1721020
    },
    {
      "epoch": 2.8165197069971133,
      "grad_norm": 0.24205434322357178,
      "learning_rate": 4.395735455941172e-06,
      "loss": 0.0104,
      "step": 1721040
    },
    {
      "epoch": 2.8165524374357664,
      "grad_norm": 0.7990890741348267,
      "learning_rate": 4.395669563727655e-06,
      "loss": 0.0141,
      "step": 1721060
    },
    {
      "epoch": 2.81658516787442,
      "grad_norm": 0.32682865858078003,
      "learning_rate": 4.395603671514137e-06,
      "loss": 0.0115,
      "step": 1721080
    },
    {
      "epoch": 2.816617898313073,
      "grad_norm": 0.10517818480730057,
      "learning_rate": 4.39553777930062e-06,
      "loss": 0.012,
      "step": 1721100
    },
    {
      "epoch": 2.8166506287517263,
      "grad_norm": 0.7767759561538696,
      "learning_rate": 4.395471887087104e-06,
      "loss": 0.019,
      "step": 1721120
    },
    {
      "epoch": 2.81668335919038,
      "grad_norm": 0.12629805505275726,
      "learning_rate": 4.395405994873586e-06,
      "loss": 0.0116,
      "step": 1721140
    },
    {
      "epoch": 2.816716089629033,
      "grad_norm": 0.272940069437027,
      "learning_rate": 4.395340102660069e-06,
      "loss": 0.0153,
      "step": 1721160
    },
    {
      "epoch": 2.8167488200676867,
      "grad_norm": 0.5119532346725464,
      "learning_rate": 4.395274210446552e-06,
      "loss": 0.0138,
      "step": 1721180
    },
    {
      "epoch": 2.81678155050634,
      "grad_norm": 0.22781358659267426,
      "learning_rate": 4.395208318233035e-06,
      "loss": 0.0131,
      "step": 1721200
    },
    {
      "epoch": 2.8168142809449934,
      "grad_norm": 0.6997527480125427,
      "learning_rate": 4.395142426019517e-06,
      "loss": 0.0152,
      "step": 1721220
    },
    {
      "epoch": 2.8168470113836466,
      "grad_norm": 0.2373523861169815,
      "learning_rate": 4.395076533806e-06,
      "loss": 0.0144,
      "step": 1721240
    },
    {
      "epoch": 2.8168797418222997,
      "grad_norm": 0.15836167335510254,
      "learning_rate": 4.395010641592483e-06,
      "loss": 0.017,
      "step": 1721260
    },
    {
      "epoch": 2.8169124722609533,
      "grad_norm": 0.22121526300907135,
      "learning_rate": 4.3949447493789656e-06,
      "loss": 0.015,
      "step": 1721280
    },
    {
      "epoch": 2.8169452026996065,
      "grad_norm": 0.2678999900817871,
      "learning_rate": 4.394878857165449e-06,
      "loss": 0.0107,
      "step": 1721300
    },
    {
      "epoch": 2.81697793313826,
      "grad_norm": 0.21174773573875427,
      "learning_rate": 4.394812964951932e-06,
      "loss": 0.0104,
      "step": 1721320
    },
    {
      "epoch": 2.817010663576913,
      "grad_norm": 0.3988880217075348,
      "learning_rate": 4.394747072738415e-06,
      "loss": 0.0135,
      "step": 1721340
    },
    {
      "epoch": 2.817043394015567,
      "grad_norm": 0.08881527930498123,
      "learning_rate": 4.394681180524898e-06,
      "loss": 0.0164,
      "step": 1721360
    },
    {
      "epoch": 2.81707612445422,
      "grad_norm": 0.19806109368801117,
      "learning_rate": 4.394615288311381e-06,
      "loss": 0.0182,
      "step": 1721380
    },
    {
      "epoch": 2.817108854892873,
      "grad_norm": 0.682712972164154,
      "learning_rate": 4.394549396097864e-06,
      "loss": 0.0189,
      "step": 1721400
    },
    {
      "epoch": 2.8171415853315267,
      "grad_norm": 0.4283407926559448,
      "learning_rate": 4.3944835038843464e-06,
      "loss": 0.0109,
      "step": 1721420
    },
    {
      "epoch": 2.81717431577018,
      "grad_norm": 0.5049329996109009,
      "learning_rate": 4.394417611670829e-06,
      "loss": 0.0142,
      "step": 1721440
    },
    {
      "epoch": 2.8172070462088334,
      "grad_norm": 0.4123896658420563,
      "learning_rate": 4.394351719457312e-06,
      "loss": 0.0178,
      "step": 1721460
    },
    {
      "epoch": 2.8172397766474866,
      "grad_norm": 0.8147916197776794,
      "learning_rate": 4.394285827243795e-06,
      "loss": 0.0192,
      "step": 1721480
    },
    {
      "epoch": 2.81727250708614,
      "grad_norm": 0.09214700758457184,
      "learning_rate": 4.394219935030277e-06,
      "loss": 0.0145,
      "step": 1721500
    },
    {
      "epoch": 2.8173052375247933,
      "grad_norm": 0.2563316524028778,
      "learning_rate": 4.394154042816761e-06,
      "loss": 0.0134,
      "step": 1721520
    },
    {
      "epoch": 2.8173379679634465,
      "grad_norm": 0.186269611120224,
      "learning_rate": 4.394088150603244e-06,
      "loss": 0.0134,
      "step": 1721540
    },
    {
      "epoch": 2.8173706984021,
      "grad_norm": 0.378995418548584,
      "learning_rate": 4.3940222583897265e-06,
      "loss": 0.0132,
      "step": 1721560
    },
    {
      "epoch": 2.8174034288407532,
      "grad_norm": 0.0701519250869751,
      "learning_rate": 4.393956366176209e-06,
      "loss": 0.0118,
      "step": 1721580
    },
    {
      "epoch": 2.817436159279407,
      "grad_norm": 0.6358423829078674,
      "learning_rate": 4.393890473962692e-06,
      "loss": 0.0155,
      "step": 1721600
    },
    {
      "epoch": 2.81746888971806,
      "grad_norm": 6.237481117248535,
      "learning_rate": 4.393824581749175e-06,
      "loss": 0.0125,
      "step": 1721620
    },
    {
      "epoch": 2.8175016201567136,
      "grad_norm": 0.5245010852813721,
      "learning_rate": 4.393758689535658e-06,
      "loss": 0.0101,
      "step": 1721640
    },
    {
      "epoch": 2.8175343505953667,
      "grad_norm": 0.23964405059814453,
      "learning_rate": 4.393692797322141e-06,
      "loss": 0.0134,
      "step": 1721660
    },
    {
      "epoch": 2.81756708103402,
      "grad_norm": 0.36463552713394165,
      "learning_rate": 4.393626905108624e-06,
      "loss": 0.014,
      "step": 1721680
    },
    {
      "epoch": 2.8175998114726735,
      "grad_norm": 0.24926786124706268,
      "learning_rate": 4.3935610128951065e-06,
      "loss": 0.0177,
      "step": 1721700
    },
    {
      "epoch": 2.8176325419113266,
      "grad_norm": 0.08112680166959763,
      "learning_rate": 4.393495120681589e-06,
      "loss": 0.012,
      "step": 1721720
    },
    {
      "epoch": 2.8176652723499798,
      "grad_norm": 0.26284241676330566,
      "learning_rate": 4.393429228468073e-06,
      "loss": 0.0182,
      "step": 1721740
    },
    {
      "epoch": 2.8176980027886334,
      "grad_norm": 0.21504083275794983,
      "learning_rate": 4.3933633362545556e-06,
      "loss": 0.0136,
      "step": 1721760
    },
    {
      "epoch": 2.817730733227287,
      "grad_norm": 0.1949116438627243,
      "learning_rate": 4.393297444041038e-06,
      "loss": 0.0127,
      "step": 1721780
    },
    {
      "epoch": 2.81776346366594,
      "grad_norm": 0.31277957558631897,
      "learning_rate": 4.393231551827521e-06,
      "loss": 0.0161,
      "step": 1721800
    },
    {
      "epoch": 2.8177961941045933,
      "grad_norm": 0.16828513145446777,
      "learning_rate": 4.393165659614004e-06,
      "loss": 0.015,
      "step": 1721820
    },
    {
      "epoch": 2.817828924543247,
      "grad_norm": 0.42400869727134705,
      "learning_rate": 4.3930997674004865e-06,
      "loss": 0.0105,
      "step": 1721840
    },
    {
      "epoch": 2.8178616549819,
      "grad_norm": 0.21101169288158417,
      "learning_rate": 4.393033875186969e-06,
      "loss": 0.0112,
      "step": 1721860
    },
    {
      "epoch": 2.817894385420553,
      "grad_norm": 0.4973793923854828,
      "learning_rate": 4.392967982973452e-06,
      "loss": 0.0134,
      "step": 1721880
    },
    {
      "epoch": 2.8179271158592067,
      "grad_norm": 0.2563041150569916,
      "learning_rate": 4.392902090759935e-06,
      "loss": 0.0176,
      "step": 1721900
    },
    {
      "epoch": 2.8179598462978603,
      "grad_norm": 0.1353699266910553,
      "learning_rate": 4.392836198546418e-06,
      "loss": 0.013,
      "step": 1721920
    },
    {
      "epoch": 2.8179925767365135,
      "grad_norm": 0.39792847633361816,
      "learning_rate": 4.392770306332901e-06,
      "loss": 0.0126,
      "step": 1721940
    },
    {
      "epoch": 2.8180253071751666,
      "grad_norm": 0.17192547023296356,
      "learning_rate": 4.392704414119384e-06,
      "loss": 0.0126,
      "step": 1721960
    },
    {
      "epoch": 2.8180580376138202,
      "grad_norm": 0.131818026304245,
      "learning_rate": 4.3926385219058665e-06,
      "loss": 0.0092,
      "step": 1721980
    },
    {
      "epoch": 2.8180907680524734,
      "grad_norm": 0.23426282405853271,
      "learning_rate": 4.39257262969235e-06,
      "loss": 0.0169,
      "step": 1722000
    },
    {
      "epoch": 2.8181234984911265,
      "grad_norm": 0.47147616744041443,
      "learning_rate": 4.392506737478833e-06,
      "loss": 0.0124,
      "step": 1722020
    },
    {
      "epoch": 2.81815622892978,
      "grad_norm": 0.5747992992401123,
      "learning_rate": 4.392440845265316e-06,
      "loss": 0.0171,
      "step": 1722040
    },
    {
      "epoch": 2.8181889593684337,
      "grad_norm": 0.8152506351470947,
      "learning_rate": 4.392374953051798e-06,
      "loss": 0.011,
      "step": 1722060
    },
    {
      "epoch": 2.818221689807087,
      "grad_norm": 0.7696986794471741,
      "learning_rate": 4.392309060838281e-06,
      "loss": 0.0108,
      "step": 1722080
    },
    {
      "epoch": 2.81825442024574,
      "grad_norm": 1.1148606538772583,
      "learning_rate": 4.392243168624764e-06,
      "loss": 0.02,
      "step": 1722100
    },
    {
      "epoch": 2.8182871506843936,
      "grad_norm": 0.5737438201904297,
      "learning_rate": 4.3921772764112466e-06,
      "loss": 0.0133,
      "step": 1722120
    },
    {
      "epoch": 2.8183198811230468,
      "grad_norm": 0.12914389371871948,
      "learning_rate": 4.39211138419773e-06,
      "loss": 0.012,
      "step": 1722140
    },
    {
      "epoch": 2.8183526115617,
      "grad_norm": 0.09677375853061676,
      "learning_rate": 4.392045491984213e-06,
      "loss": 0.0136,
      "step": 1722160
    },
    {
      "epoch": 2.8183853420003535,
      "grad_norm": 0.38545727729797363,
      "learning_rate": 4.391979599770696e-06,
      "loss": 0.0148,
      "step": 1722180
    },
    {
      "epoch": 2.818418072439007,
      "grad_norm": 0.6240443587303162,
      "learning_rate": 4.391913707557178e-06,
      "loss": 0.0145,
      "step": 1722200
    },
    {
      "epoch": 2.8184508028776603,
      "grad_norm": 0.4214117228984833,
      "learning_rate": 4.391847815343661e-06,
      "loss": 0.0153,
      "step": 1722220
    },
    {
      "epoch": 2.8184835333163134,
      "grad_norm": 0.8921716809272766,
      "learning_rate": 4.391781923130144e-06,
      "loss": 0.014,
      "step": 1722240
    },
    {
      "epoch": 2.818516263754967,
      "grad_norm": 0.536190927028656,
      "learning_rate": 4.391716030916627e-06,
      "loss": 0.0127,
      "step": 1722260
    },
    {
      "epoch": 2.81854899419362,
      "grad_norm": 0.2923501431941986,
      "learning_rate": 4.391650138703109e-06,
      "loss": 0.0116,
      "step": 1722280
    },
    {
      "epoch": 2.8185817246322733,
      "grad_norm": 0.5358809232711792,
      "learning_rate": 4.391584246489592e-06,
      "loss": 0.0208,
      "step": 1722300
    },
    {
      "epoch": 2.818614455070927,
      "grad_norm": 0.24598123133182526,
      "learning_rate": 4.391518354276076e-06,
      "loss": 0.0136,
      "step": 1722320
    },
    {
      "epoch": 2.81864718550958,
      "grad_norm": 0.3860138952732086,
      "learning_rate": 4.391452462062558e-06,
      "loss": 0.0131,
      "step": 1722340
    },
    {
      "epoch": 2.8186799159482336,
      "grad_norm": 0.25421667098999023,
      "learning_rate": 4.391386569849041e-06,
      "loss": 0.023,
      "step": 1722360
    },
    {
      "epoch": 2.818712646386887,
      "grad_norm": 0.24507853388786316,
      "learning_rate": 4.391320677635525e-06,
      "loss": 0.0133,
      "step": 1722380
    },
    {
      "epoch": 2.8187453768255404,
      "grad_norm": 0.2137918621301651,
      "learning_rate": 4.3912547854220075e-06,
      "loss": 0.0183,
      "step": 1722400
    },
    {
      "epoch": 2.8187781072641935,
      "grad_norm": 0.24631819128990173,
      "learning_rate": 4.39118889320849e-06,
      "loss": 0.0112,
      "step": 1722420
    },
    {
      "epoch": 2.8188108377028467,
      "grad_norm": 0.20389585196971893,
      "learning_rate": 4.391123000994973e-06,
      "loss": 0.0179,
      "step": 1722440
    },
    {
      "epoch": 2.8188435681415003,
      "grad_norm": 0.4018990993499756,
      "learning_rate": 4.391057108781456e-06,
      "loss": 0.0095,
      "step": 1722460
    },
    {
      "epoch": 2.8188762985801534,
      "grad_norm": 0.31871989369392395,
      "learning_rate": 4.3909912165679384e-06,
      "loss": 0.009,
      "step": 1722480
    },
    {
      "epoch": 2.818909029018807,
      "grad_norm": 0.2957846224308014,
      "learning_rate": 4.390925324354421e-06,
      "loss": 0.0095,
      "step": 1722500
    },
    {
      "epoch": 2.81894175945746,
      "grad_norm": 0.5740848183631897,
      "learning_rate": 4.390859432140904e-06,
      "loss": 0.0131,
      "step": 1722520
    },
    {
      "epoch": 2.8189744898961138,
      "grad_norm": 0.3454400599002838,
      "learning_rate": 4.3907935399273875e-06,
      "loss": 0.0155,
      "step": 1722540
    },
    {
      "epoch": 2.819007220334767,
      "grad_norm": 0.1275002807378769,
      "learning_rate": 4.39072764771387e-06,
      "loss": 0.0114,
      "step": 1722560
    },
    {
      "epoch": 2.81903995077342,
      "grad_norm": 0.09102153033018112,
      "learning_rate": 4.390661755500353e-06,
      "loss": 0.0134,
      "step": 1722580
    },
    {
      "epoch": 2.8190726812120737,
      "grad_norm": 0.2913855016231537,
      "learning_rate": 4.390595863286836e-06,
      "loss": 0.0123,
      "step": 1722600
    },
    {
      "epoch": 2.819105411650727,
      "grad_norm": 0.33415278792381287,
      "learning_rate": 4.3905299710733185e-06,
      "loss": 0.0143,
      "step": 1722620
    },
    {
      "epoch": 2.8191381420893804,
      "grad_norm": 0.17818517982959747,
      "learning_rate": 4.390464078859801e-06,
      "loss": 0.0089,
      "step": 1722640
    },
    {
      "epoch": 2.8191708725280336,
      "grad_norm": 0.13413947820663452,
      "learning_rate": 4.390398186646284e-06,
      "loss": 0.0174,
      "step": 1722660
    },
    {
      "epoch": 2.819203602966687,
      "grad_norm": 0.12697088718414307,
      "learning_rate": 4.390332294432767e-06,
      "loss": 0.0125,
      "step": 1722680
    },
    {
      "epoch": 2.8192363334053403,
      "grad_norm": 0.1078755185008049,
      "learning_rate": 4.39026640221925e-06,
      "loss": 0.0109,
      "step": 1722700
    },
    {
      "epoch": 2.8192690638439935,
      "grad_norm": 0.19872623682022095,
      "learning_rate": 4.390200510005733e-06,
      "loss": 0.0132,
      "step": 1722720
    },
    {
      "epoch": 2.819301794282647,
      "grad_norm": 0.680769681930542,
      "learning_rate": 4.390134617792216e-06,
      "loss": 0.0165,
      "step": 1722740
    },
    {
      "epoch": 2.8193345247213,
      "grad_norm": 0.5713839530944824,
      "learning_rate": 4.3900687255786985e-06,
      "loss": 0.0169,
      "step": 1722760
    },
    {
      "epoch": 2.819367255159954,
      "grad_norm": 0.2577325403690338,
      "learning_rate": 4.390002833365182e-06,
      "loss": 0.0158,
      "step": 1722780
    },
    {
      "epoch": 2.819399985598607,
      "grad_norm": 0.13693341612815857,
      "learning_rate": 4.389936941151665e-06,
      "loss": 0.0102,
      "step": 1722800
    },
    {
      "epoch": 2.8194327160372605,
      "grad_norm": 0.24203607439994812,
      "learning_rate": 4.3898710489381475e-06,
      "loss": 0.016,
      "step": 1722820
    },
    {
      "epoch": 2.8194654464759137,
      "grad_norm": 0.33996549248695374,
      "learning_rate": 4.38980515672463e-06,
      "loss": 0.0127,
      "step": 1722840
    },
    {
      "epoch": 2.819498176914567,
      "grad_norm": 0.3101811707019806,
      "learning_rate": 4.389739264511113e-06,
      "loss": 0.0102,
      "step": 1722860
    },
    {
      "epoch": 2.8195309073532204,
      "grad_norm": 0.4550924301147461,
      "learning_rate": 4.389673372297596e-06,
      "loss": 0.0188,
      "step": 1722880
    },
    {
      "epoch": 2.8195636377918736,
      "grad_norm": 0.2114025056362152,
      "learning_rate": 4.3896074800840785e-06,
      "loss": 0.0146,
      "step": 1722900
    },
    {
      "epoch": 2.819596368230527,
      "grad_norm": 0.10627159476280212,
      "learning_rate": 4.389541587870561e-06,
      "loss": 0.0127,
      "step": 1722920
    },
    {
      "epoch": 2.8196290986691803,
      "grad_norm": 0.438571035861969,
      "learning_rate": 4.389475695657045e-06,
      "loss": 0.0202,
      "step": 1722940
    },
    {
      "epoch": 2.819661829107834,
      "grad_norm": 0.18863961100578308,
      "learning_rate": 4.3894098034435276e-06,
      "loss": 0.0157,
      "step": 1722960
    },
    {
      "epoch": 2.819694559546487,
      "grad_norm": 0.16940760612487793,
      "learning_rate": 4.38934391123001e-06,
      "loss": 0.0145,
      "step": 1722980
    },
    {
      "epoch": 2.81972728998514,
      "grad_norm": 0.2542828917503357,
      "learning_rate": 4.389278019016493e-06,
      "loss": 0.0103,
      "step": 1723000
    },
    {
      "epoch": 2.819760020423794,
      "grad_norm": 0.7554755806922913,
      "learning_rate": 4.389212126802976e-06,
      "loss": 0.0131,
      "step": 1723020
    },
    {
      "epoch": 2.819792750862447,
      "grad_norm": 0.22098782658576965,
      "learning_rate": 4.3891462345894585e-06,
      "loss": 0.0132,
      "step": 1723040
    },
    {
      "epoch": 2.8198254813011006,
      "grad_norm": 0.16485434770584106,
      "learning_rate": 4.389080342375942e-06,
      "loss": 0.0087,
      "step": 1723060
    },
    {
      "epoch": 2.8198582117397537,
      "grad_norm": 0.17442379891872406,
      "learning_rate": 4.389014450162425e-06,
      "loss": 0.0122,
      "step": 1723080
    },
    {
      "epoch": 2.8198909421784073,
      "grad_norm": 0.24663561582565308,
      "learning_rate": 4.388948557948908e-06,
      "loss": 0.0114,
      "step": 1723100
    },
    {
      "epoch": 2.8199236726170605,
      "grad_norm": 0.29261380434036255,
      "learning_rate": 4.38888266573539e-06,
      "loss": 0.0076,
      "step": 1723120
    },
    {
      "epoch": 2.8199564030557136,
      "grad_norm": 0.380575954914093,
      "learning_rate": 4.388816773521873e-06,
      "loss": 0.0134,
      "step": 1723140
    },
    {
      "epoch": 2.819989133494367,
      "grad_norm": 0.4304131269454956,
      "learning_rate": 4.388750881308357e-06,
      "loss": 0.0114,
      "step": 1723160
    },
    {
      "epoch": 2.8200218639330203,
      "grad_norm": 0.7468113899230957,
      "learning_rate": 4.388684989094839e-06,
      "loss": 0.013,
      "step": 1723180
    },
    {
      "epoch": 2.8200545943716735,
      "grad_norm": 0.15094831585884094,
      "learning_rate": 4.388619096881322e-06,
      "loss": 0.011,
      "step": 1723200
    },
    {
      "epoch": 2.820087324810327,
      "grad_norm": 0.18135830760002136,
      "learning_rate": 4.388553204667805e-06,
      "loss": 0.0117,
      "step": 1723220
    },
    {
      "epoch": 2.8201200552489807,
      "grad_norm": 0.5725855827331543,
      "learning_rate": 4.388487312454288e-06,
      "loss": 0.0159,
      "step": 1723240
    },
    {
      "epoch": 2.820152785687634,
      "grad_norm": 0.24197466671466827,
      "learning_rate": 4.38842142024077e-06,
      "loss": 0.0114,
      "step": 1723260
    },
    {
      "epoch": 2.820185516126287,
      "grad_norm": 0.6575810313224792,
      "learning_rate": 4.388355528027253e-06,
      "loss": 0.0094,
      "step": 1723280
    },
    {
      "epoch": 2.8202182465649406,
      "grad_norm": 0.138881117105484,
      "learning_rate": 4.388289635813736e-06,
      "loss": 0.0126,
      "step": 1723300
    },
    {
      "epoch": 2.8202509770035937,
      "grad_norm": 0.27184563875198364,
      "learning_rate": 4.388223743600219e-06,
      "loss": 0.0076,
      "step": 1723320
    },
    {
      "epoch": 2.820283707442247,
      "grad_norm": 0.26997828483581543,
      "learning_rate": 4.388157851386702e-06,
      "loss": 0.0108,
      "step": 1723340
    },
    {
      "epoch": 2.8203164378809005,
      "grad_norm": 0.062474507838487625,
      "learning_rate": 4.388091959173185e-06,
      "loss": 0.0132,
      "step": 1723360
    },
    {
      "epoch": 2.820349168319554,
      "grad_norm": 0.772625207901001,
      "learning_rate": 4.388026066959668e-06,
      "loss": 0.0142,
      "step": 1723380
    },
    {
      "epoch": 2.820381898758207,
      "grad_norm": 0.49173811078071594,
      "learning_rate": 4.387960174746151e-06,
      "loss": 0.0108,
      "step": 1723400
    },
    {
      "epoch": 2.8204146291968604,
      "grad_norm": 0.3726050555706024,
      "learning_rate": 4.387894282532634e-06,
      "loss": 0.0087,
      "step": 1723420
    },
    {
      "epoch": 2.820447359635514,
      "grad_norm": 0.24764108657836914,
      "learning_rate": 4.387828390319117e-06,
      "loss": 0.012,
      "step": 1723440
    },
    {
      "epoch": 2.820480090074167,
      "grad_norm": 0.4648328125476837,
      "learning_rate": 4.3877624981055995e-06,
      "loss": 0.0109,
      "step": 1723460
    },
    {
      "epoch": 2.8205128205128203,
      "grad_norm": 0.21498720347881317,
      "learning_rate": 4.387696605892082e-06,
      "loss": 0.0106,
      "step": 1723480
    },
    {
      "epoch": 2.820545550951474,
      "grad_norm": 0.3119691014289856,
      "learning_rate": 4.387630713678565e-06,
      "loss": 0.0129,
      "step": 1723500
    },
    {
      "epoch": 2.8205782813901275,
      "grad_norm": 0.2378779798746109,
      "learning_rate": 4.387564821465048e-06,
      "loss": 0.0109,
      "step": 1723520
    },
    {
      "epoch": 2.8206110118287806,
      "grad_norm": 0.34575968980789185,
      "learning_rate": 4.38749892925153e-06,
      "loss": 0.0118,
      "step": 1723540
    },
    {
      "epoch": 2.8206437422674338,
      "grad_norm": 0.5304740071296692,
      "learning_rate": 4.387433037038014e-06,
      "loss": 0.0152,
      "step": 1723560
    },
    {
      "epoch": 2.8206764727060873,
      "grad_norm": 0.3313206732273102,
      "learning_rate": 4.387367144824497e-06,
      "loss": 0.0106,
      "step": 1723580
    },
    {
      "epoch": 2.8207092031447405,
      "grad_norm": 0.8473978042602539,
      "learning_rate": 4.3873012526109795e-06,
      "loss": 0.0175,
      "step": 1723600
    },
    {
      "epoch": 2.8207419335833936,
      "grad_norm": 0.32950422167778015,
      "learning_rate": 4.387235360397462e-06,
      "loss": 0.016,
      "step": 1723620
    },
    {
      "epoch": 2.8207746640220472,
      "grad_norm": 0.1534709334373474,
      "learning_rate": 4.387169468183945e-06,
      "loss": 0.0164,
      "step": 1723640
    },
    {
      "epoch": 2.820807394460701,
      "grad_norm": 0.31433260440826416,
      "learning_rate": 4.387103575970428e-06,
      "loss": 0.0172,
      "step": 1723660
    },
    {
      "epoch": 2.820840124899354,
      "grad_norm": 0.19997356832027435,
      "learning_rate": 4.3870376837569104e-06,
      "loss": 0.0112,
      "step": 1723680
    },
    {
      "epoch": 2.820872855338007,
      "grad_norm": 0.10575602948665619,
      "learning_rate": 4.386971791543393e-06,
      "loss": 0.019,
      "step": 1723700
    },
    {
      "epoch": 2.8209055857766607,
      "grad_norm": 0.22142057120800018,
      "learning_rate": 4.386905899329876e-06,
      "loss": 0.0112,
      "step": 1723720
    },
    {
      "epoch": 2.820938316215314,
      "grad_norm": 0.7102224230766296,
      "learning_rate": 4.3868400071163595e-06,
      "loss": 0.0183,
      "step": 1723740
    },
    {
      "epoch": 2.820971046653967,
      "grad_norm": 0.08252496272325516,
      "learning_rate": 4.386774114902842e-06,
      "loss": 0.015,
      "step": 1723760
    },
    {
      "epoch": 2.8210037770926206,
      "grad_norm": 0.31574875116348267,
      "learning_rate": 4.386708222689325e-06,
      "loss": 0.0178,
      "step": 1723780
    },
    {
      "epoch": 2.8210365075312738,
      "grad_norm": 0.09922710061073303,
      "learning_rate": 4.3866423304758086e-06,
      "loss": 0.0183,
      "step": 1723800
    },
    {
      "epoch": 2.8210692379699274,
      "grad_norm": 0.5982416868209839,
      "learning_rate": 4.386576438262291e-06,
      "loss": 0.0173,
      "step": 1723820
    },
    {
      "epoch": 2.8211019684085805,
      "grad_norm": 0.15690426528453827,
      "learning_rate": 4.386510546048774e-06,
      "loss": 0.0125,
      "step": 1723840
    },
    {
      "epoch": 2.821134698847234,
      "grad_norm": 0.06703872978687286,
      "learning_rate": 4.386444653835257e-06,
      "loss": 0.0097,
      "step": 1723860
    },
    {
      "epoch": 2.8211674292858873,
      "grad_norm": 0.7132001519203186,
      "learning_rate": 4.3863787616217395e-06,
      "loss": 0.0154,
      "step": 1723880
    },
    {
      "epoch": 2.8212001597245404,
      "grad_norm": 0.7419021725654602,
      "learning_rate": 4.386312869408222e-06,
      "loss": 0.0126,
      "step": 1723900
    },
    {
      "epoch": 2.821232890163194,
      "grad_norm": 0.19690199196338654,
      "learning_rate": 4.386246977194705e-06,
      "loss": 0.016,
      "step": 1723920
    },
    {
      "epoch": 2.821265620601847,
      "grad_norm": 0.23752284049987793,
      "learning_rate": 4.386181084981188e-06,
      "loss": 0.0163,
      "step": 1723940
    },
    {
      "epoch": 2.8212983510405008,
      "grad_norm": 0.1619286984205246,
      "learning_rate": 4.386115192767671e-06,
      "loss": 0.0124,
      "step": 1723960
    },
    {
      "epoch": 2.821331081479154,
      "grad_norm": 0.6330199837684631,
      "learning_rate": 4.386049300554154e-06,
      "loss": 0.0112,
      "step": 1723980
    },
    {
      "epoch": 2.8213638119178075,
      "grad_norm": 0.16177226603031158,
      "learning_rate": 4.385983408340637e-06,
      "loss": 0.0125,
      "step": 1724000
    },
    {
      "epoch": 2.8213965423564606,
      "grad_norm": 0.5558722019195557,
      "learning_rate": 4.3859175161271196e-06,
      "loss": 0.0137,
      "step": 1724020
    },
    {
      "epoch": 2.821429272795114,
      "grad_norm": 0.4532715082168579,
      "learning_rate": 4.385851623913602e-06,
      "loss": 0.0147,
      "step": 1724040
    },
    {
      "epoch": 2.8214620032337674,
      "grad_norm": 0.41597825288772583,
      "learning_rate": 4.385785731700085e-06,
      "loss": 0.0119,
      "step": 1724060
    },
    {
      "epoch": 2.8214947336724205,
      "grad_norm": 0.7272751331329346,
      "learning_rate": 4.385719839486568e-06,
      "loss": 0.014,
      "step": 1724080
    },
    {
      "epoch": 2.821527464111074,
      "grad_norm": 0.2125542163848877,
      "learning_rate": 4.3856539472730505e-06,
      "loss": 0.0092,
      "step": 1724100
    },
    {
      "epoch": 2.8215601945497273,
      "grad_norm": 0.5696212649345398,
      "learning_rate": 4.385588055059534e-06,
      "loss": 0.0134,
      "step": 1724120
    },
    {
      "epoch": 2.821592924988381,
      "grad_norm": 0.39398446679115295,
      "learning_rate": 4.385522162846017e-06,
      "loss": 0.0178,
      "step": 1724140
    },
    {
      "epoch": 2.821625655427034,
      "grad_norm": 0.18121126294136047,
      "learning_rate": 4.3854562706325e-06,
      "loss": 0.0144,
      "step": 1724160
    },
    {
      "epoch": 2.821658385865687,
      "grad_norm": 0.34510377049446106,
      "learning_rate": 4.385390378418983e-06,
      "loss": 0.0126,
      "step": 1724180
    },
    {
      "epoch": 2.8216911163043408,
      "grad_norm": 0.0849878191947937,
      "learning_rate": 4.385324486205466e-06,
      "loss": 0.0149,
      "step": 1724200
    },
    {
      "epoch": 2.821723846742994,
      "grad_norm": 0.4176846444606781,
      "learning_rate": 4.385258593991949e-06,
      "loss": 0.0143,
      "step": 1724220
    },
    {
      "epoch": 2.8217565771816475,
      "grad_norm": 0.33965301513671875,
      "learning_rate": 4.385192701778431e-06,
      "loss": 0.0104,
      "step": 1724240
    },
    {
      "epoch": 2.8217893076203007,
      "grad_norm": 0.16872993111610413,
      "learning_rate": 4.385126809564914e-06,
      "loss": 0.007,
      "step": 1724260
    },
    {
      "epoch": 2.8218220380589543,
      "grad_norm": 0.5295690894126892,
      "learning_rate": 4.385060917351397e-06,
      "loss": 0.011,
      "step": 1724280
    },
    {
      "epoch": 2.8218547684976074,
      "grad_norm": 0.31667664647102356,
      "learning_rate": 4.38499502513788e-06,
      "loss": 0.0109,
      "step": 1724300
    },
    {
      "epoch": 2.8218874989362606,
      "grad_norm": 0.26127222180366516,
      "learning_rate": 4.384929132924362e-06,
      "loss": 0.0171,
      "step": 1724320
    },
    {
      "epoch": 2.821920229374914,
      "grad_norm": 0.3582540452480316,
      "learning_rate": 4.384863240710845e-06,
      "loss": 0.0133,
      "step": 1724340
    },
    {
      "epoch": 2.8219529598135673,
      "grad_norm": 0.19619107246398926,
      "learning_rate": 4.384797348497329e-06,
      "loss": 0.0114,
      "step": 1724360
    },
    {
      "epoch": 2.821985690252221,
      "grad_norm": 0.16638609766960144,
      "learning_rate": 4.384731456283811e-06,
      "loss": 0.0131,
      "step": 1724380
    },
    {
      "epoch": 2.822018420690874,
      "grad_norm": 0.36280110478401184,
      "learning_rate": 4.384665564070294e-06,
      "loss": 0.0131,
      "step": 1724400
    },
    {
      "epoch": 2.8220511511295276,
      "grad_norm": 0.23833051323890686,
      "learning_rate": 4.384599671856777e-06,
      "loss": 0.0149,
      "step": 1724420
    },
    {
      "epoch": 2.822083881568181,
      "grad_norm": 0.22322171926498413,
      "learning_rate": 4.38453377964326e-06,
      "loss": 0.0119,
      "step": 1724440
    },
    {
      "epoch": 2.822116612006834,
      "grad_norm": 0.2663905620574951,
      "learning_rate": 4.384467887429743e-06,
      "loss": 0.0098,
      "step": 1724460
    },
    {
      "epoch": 2.8221493424454875,
      "grad_norm": 0.3276059031486511,
      "learning_rate": 4.384401995216226e-06,
      "loss": 0.0171,
      "step": 1724480
    },
    {
      "epoch": 2.8221820728841407,
      "grad_norm": 0.1353338211774826,
      "learning_rate": 4.384336103002709e-06,
      "loss": 0.0134,
      "step": 1724500
    },
    {
      "epoch": 2.8222148033227943,
      "grad_norm": 0.3228684961795807,
      "learning_rate": 4.3842702107891914e-06,
      "loss": 0.0134,
      "step": 1724520
    },
    {
      "epoch": 2.8222475337614474,
      "grad_norm": 0.7459205985069275,
      "learning_rate": 4.384204318575674e-06,
      "loss": 0.0175,
      "step": 1724540
    },
    {
      "epoch": 2.822280264200101,
      "grad_norm": 0.7609097361564636,
      "learning_rate": 4.384138426362157e-06,
      "loss": 0.0174,
      "step": 1724560
    },
    {
      "epoch": 2.822312994638754,
      "grad_norm": 0.37613046169281006,
      "learning_rate": 4.3840725341486405e-06,
      "loss": 0.0148,
      "step": 1724580
    },
    {
      "epoch": 2.8223457250774073,
      "grad_norm": 0.31294935941696167,
      "learning_rate": 4.384006641935123e-06,
      "loss": 0.0104,
      "step": 1724600
    },
    {
      "epoch": 2.822378455516061,
      "grad_norm": 0.24220453202724457,
      "learning_rate": 4.383940749721606e-06,
      "loss": 0.01,
      "step": 1724620
    },
    {
      "epoch": 2.822411185954714,
      "grad_norm": 0.17497187852859497,
      "learning_rate": 4.383874857508089e-06,
      "loss": 0.0179,
      "step": 1724640
    },
    {
      "epoch": 2.8224439163933677,
      "grad_norm": 0.1208326518535614,
      "learning_rate": 4.3838089652945715e-06,
      "loss": 0.0132,
      "step": 1724660
    },
    {
      "epoch": 2.822476646832021,
      "grad_norm": 0.11164064705371857,
      "learning_rate": 4.383743073081054e-06,
      "loss": 0.0102,
      "step": 1724680
    },
    {
      "epoch": 2.8225093772706744,
      "grad_norm": 0.1799023151397705,
      "learning_rate": 4.383677180867537e-06,
      "loss": 0.0189,
      "step": 1724700
    },
    {
      "epoch": 2.8225421077093276,
      "grad_norm": 0.3008248805999756,
      "learning_rate": 4.38361128865402e-06,
      "loss": 0.0084,
      "step": 1724720
    },
    {
      "epoch": 2.8225748381479807,
      "grad_norm": 0.03239798545837402,
      "learning_rate": 4.383545396440502e-06,
      "loss": 0.0091,
      "step": 1724740
    },
    {
      "epoch": 2.8226075685866343,
      "grad_norm": 0.8780180215835571,
      "learning_rate": 4.383479504226986e-06,
      "loss": 0.0125,
      "step": 1724760
    },
    {
      "epoch": 2.8226402990252875,
      "grad_norm": 0.34715697169303894,
      "learning_rate": 4.383413612013469e-06,
      "loss": 0.0165,
      "step": 1724780
    },
    {
      "epoch": 2.8226730294639406,
      "grad_norm": 0.18092606961727142,
      "learning_rate": 4.3833477197999515e-06,
      "loss": 0.011,
      "step": 1724800
    },
    {
      "epoch": 2.822705759902594,
      "grad_norm": 0.11822137981653214,
      "learning_rate": 4.383281827586435e-06,
      "loss": 0.02,
      "step": 1724820
    },
    {
      "epoch": 2.822738490341248,
      "grad_norm": 0.5574772953987122,
      "learning_rate": 4.383215935372918e-06,
      "loss": 0.0202,
      "step": 1724840
    },
    {
      "epoch": 2.822771220779901,
      "grad_norm": 0.42361193895339966,
      "learning_rate": 4.3831500431594006e-06,
      "loss": 0.0125,
      "step": 1724860
    },
    {
      "epoch": 2.822803951218554,
      "grad_norm": 0.3695164918899536,
      "learning_rate": 4.383084150945883e-06,
      "loss": 0.0133,
      "step": 1724880
    },
    {
      "epoch": 2.8228366816572077,
      "grad_norm": 2.280850410461426,
      "learning_rate": 4.383018258732366e-06,
      "loss": 0.0158,
      "step": 1724900
    },
    {
      "epoch": 2.822869412095861,
      "grad_norm": 0.6234070062637329,
      "learning_rate": 4.382952366518849e-06,
      "loss": 0.0101,
      "step": 1724920
    },
    {
      "epoch": 2.822902142534514,
      "grad_norm": 0.15367981791496277,
      "learning_rate": 4.3828864743053315e-06,
      "loss": 0.0207,
      "step": 1724940
    },
    {
      "epoch": 2.8229348729731676,
      "grad_norm": 0.3094392716884613,
      "learning_rate": 4.382820582091814e-06,
      "loss": 0.0109,
      "step": 1724960
    },
    {
      "epoch": 2.822967603411821,
      "grad_norm": 0.7502604126930237,
      "learning_rate": 4.382754689878298e-06,
      "loss": 0.0162,
      "step": 1724980
    },
    {
      "epoch": 2.8230003338504743,
      "grad_norm": 0.3291541635990143,
      "learning_rate": 4.3826887976647806e-06,
      "loss": 0.0125,
      "step": 1725000
    },
    {
      "epoch": 2.8230330642891275,
      "grad_norm": 0.26479119062423706,
      "learning_rate": 4.382622905451263e-06,
      "loss": 0.0103,
      "step": 1725020
    },
    {
      "epoch": 2.823065794727781,
      "grad_norm": 0.8083062767982483,
      "learning_rate": 4.382557013237746e-06,
      "loss": 0.0144,
      "step": 1725040
    },
    {
      "epoch": 2.8230985251664342,
      "grad_norm": 0.1302127093076706,
      "learning_rate": 4.382491121024229e-06,
      "loss": 0.0191,
      "step": 1725060
    },
    {
      "epoch": 2.8231312556050874,
      "grad_norm": 0.28846731781959534,
      "learning_rate": 4.3824252288107115e-06,
      "loss": 0.01,
      "step": 1725080
    },
    {
      "epoch": 2.823163986043741,
      "grad_norm": 0.36497822403907776,
      "learning_rate": 4.382359336597194e-06,
      "loss": 0.0116,
      "step": 1725100
    },
    {
      "epoch": 2.8231967164823946,
      "grad_norm": 0.33684107661247253,
      "learning_rate": 4.382293444383677e-06,
      "loss": 0.0137,
      "step": 1725120
    },
    {
      "epoch": 2.8232294469210477,
      "grad_norm": 0.23621352016925812,
      "learning_rate": 4.38222755217016e-06,
      "loss": 0.0165,
      "step": 1725140
    },
    {
      "epoch": 2.823262177359701,
      "grad_norm": 0.3195550739765167,
      "learning_rate": 4.382161659956643e-06,
      "loss": 0.0108,
      "step": 1725160
    },
    {
      "epoch": 2.8232949077983545,
      "grad_norm": 1.1624735593795776,
      "learning_rate": 4.382095767743126e-06,
      "loss": 0.0195,
      "step": 1725180
    },
    {
      "epoch": 2.8233276382370076,
      "grad_norm": 0.23626302182674408,
      "learning_rate": 4.382029875529609e-06,
      "loss": 0.0148,
      "step": 1725200
    },
    {
      "epoch": 2.8233603686756608,
      "grad_norm": 0.4156248867511749,
      "learning_rate": 4.381963983316092e-06,
      "loss": 0.0122,
      "step": 1725220
    },
    {
      "epoch": 2.8233930991143144,
      "grad_norm": 0.2831808924674988,
      "learning_rate": 4.381898091102575e-06,
      "loss": 0.0125,
      "step": 1725240
    },
    {
      "epoch": 2.8234258295529675,
      "grad_norm": 0.45170050859451294,
      "learning_rate": 4.381832198889058e-06,
      "loss": 0.0127,
      "step": 1725260
    },
    {
      "epoch": 2.823458559991621,
      "grad_norm": 0.5310980081558228,
      "learning_rate": 4.381766306675541e-06,
      "loss": 0.0115,
      "step": 1725280
    },
    {
      "epoch": 2.8234912904302742,
      "grad_norm": 0.9449007511138916,
      "learning_rate": 4.381700414462023e-06,
      "loss": 0.0116,
      "step": 1725300
    },
    {
      "epoch": 2.823524020868928,
      "grad_norm": 1.1022899150848389,
      "learning_rate": 4.381634522248506e-06,
      "loss": 0.0174,
      "step": 1725320
    },
    {
      "epoch": 2.823556751307581,
      "grad_norm": 0.2016245424747467,
      "learning_rate": 4.381568630034989e-06,
      "loss": 0.0103,
      "step": 1725340
    },
    {
      "epoch": 2.823589481746234,
      "grad_norm": 0.1842879354953766,
      "learning_rate": 4.381502737821472e-06,
      "loss": 0.0189,
      "step": 1725360
    },
    {
      "epoch": 2.8236222121848877,
      "grad_norm": 0.4412180781364441,
      "learning_rate": 4.381436845607955e-06,
      "loss": 0.0124,
      "step": 1725380
    },
    {
      "epoch": 2.823654942623541,
      "grad_norm": 0.4720926582813263,
      "learning_rate": 4.381370953394438e-06,
      "loss": 0.0145,
      "step": 1725400
    },
    {
      "epoch": 2.8236876730621945,
      "grad_norm": 0.054755840450525284,
      "learning_rate": 4.381305061180921e-06,
      "loss": 0.0129,
      "step": 1725420
    },
    {
      "epoch": 2.8237204035008476,
      "grad_norm": 0.10895035415887833,
      "learning_rate": 4.381239168967403e-06,
      "loss": 0.0086,
      "step": 1725440
    },
    {
      "epoch": 2.8237531339395012,
      "grad_norm": 0.2847426235675812,
      "learning_rate": 4.381173276753886e-06,
      "loss": 0.0071,
      "step": 1725460
    },
    {
      "epoch": 2.8237858643781544,
      "grad_norm": 0.5011908411979675,
      "learning_rate": 4.381107384540369e-06,
      "loss": 0.0168,
      "step": 1725480
    },
    {
      "epoch": 2.8238185948168075,
      "grad_norm": 0.1639384776353836,
      "learning_rate": 4.381041492326852e-06,
      "loss": 0.0177,
      "step": 1725500
    },
    {
      "epoch": 2.823851325255461,
      "grad_norm": 0.3361241817474365,
      "learning_rate": 4.380975600113335e-06,
      "loss": 0.0126,
      "step": 1725520
    },
    {
      "epoch": 2.8238840556941143,
      "grad_norm": 0.35251182317733765,
      "learning_rate": 4.380909707899818e-06,
      "loss": 0.0101,
      "step": 1725540
    },
    {
      "epoch": 2.823916786132768,
      "grad_norm": 0.5724666118621826,
      "learning_rate": 4.380843815686301e-06,
      "loss": 0.0231,
      "step": 1725560
    },
    {
      "epoch": 2.823949516571421,
      "grad_norm": 0.9048390984535217,
      "learning_rate": 4.380777923472783e-06,
      "loss": 0.0188,
      "step": 1725580
    },
    {
      "epoch": 2.8239822470100746,
      "grad_norm": 0.35783877968788147,
      "learning_rate": 4.380712031259267e-06,
      "loss": 0.0176,
      "step": 1725600
    },
    {
      "epoch": 2.8240149774487278,
      "grad_norm": 0.5451036095619202,
      "learning_rate": 4.38064613904575e-06,
      "loss": 0.011,
      "step": 1725620
    },
    {
      "epoch": 2.824047707887381,
      "grad_norm": 0.1444249451160431,
      "learning_rate": 4.3805802468322325e-06,
      "loss": 0.0139,
      "step": 1725640
    },
    {
      "epoch": 2.8240804383260345,
      "grad_norm": 0.6819310188293457,
      "learning_rate": 4.380514354618715e-06,
      "loss": 0.0108,
      "step": 1725660
    },
    {
      "epoch": 2.8241131687646877,
      "grad_norm": 0.22547723352909088,
      "learning_rate": 4.380448462405198e-06,
      "loss": 0.0177,
      "step": 1725680
    },
    {
      "epoch": 2.8241458992033412,
      "grad_norm": 0.1355319619178772,
      "learning_rate": 4.380382570191681e-06,
      "loss": 0.0112,
      "step": 1725700
    },
    {
      "epoch": 2.8241786296419944,
      "grad_norm": 0.4777528941631317,
      "learning_rate": 4.3803166779781634e-06,
      "loss": 0.0132,
      "step": 1725720
    },
    {
      "epoch": 2.824211360080648,
      "grad_norm": 0.22559839487075806,
      "learning_rate": 4.380250785764646e-06,
      "loss": 0.016,
      "step": 1725740
    },
    {
      "epoch": 2.824244090519301,
      "grad_norm": 0.8079037666320801,
      "learning_rate": 4.380184893551129e-06,
      "loss": 0.0182,
      "step": 1725760
    },
    {
      "epoch": 2.8242768209579543,
      "grad_norm": 0.3933740258216858,
      "learning_rate": 4.3801190013376125e-06,
      "loss": 0.0164,
      "step": 1725780
    },
    {
      "epoch": 2.824309551396608,
      "grad_norm": 0.29227331280708313,
      "learning_rate": 4.380053109124095e-06,
      "loss": 0.0121,
      "step": 1725800
    },
    {
      "epoch": 2.824342281835261,
      "grad_norm": 0.6389546990394592,
      "learning_rate": 4.379987216910578e-06,
      "loss": 0.0158,
      "step": 1725820
    },
    {
      "epoch": 2.8243750122739146,
      "grad_norm": 0.13078173995018005,
      "learning_rate": 4.379921324697061e-06,
      "loss": 0.0135,
      "step": 1725840
    },
    {
      "epoch": 2.824407742712568,
      "grad_norm": 0.26510700583457947,
      "learning_rate": 4.3798554324835435e-06,
      "loss": 0.0196,
      "step": 1725860
    },
    {
      "epoch": 2.8244404731512214,
      "grad_norm": 0.49675285816192627,
      "learning_rate": 4.379789540270027e-06,
      "loss": 0.0118,
      "step": 1725880
    },
    {
      "epoch": 2.8244732035898745,
      "grad_norm": 0.33886492252349854,
      "learning_rate": 4.37972364805651e-06,
      "loss": 0.0117,
      "step": 1725900
    },
    {
      "epoch": 2.8245059340285277,
      "grad_norm": 0.8153707981109619,
      "learning_rate": 4.3796577558429925e-06,
      "loss": 0.0134,
      "step": 1725920
    },
    {
      "epoch": 2.8245386644671813,
      "grad_norm": 0.10654886811971664,
      "learning_rate": 4.379591863629475e-06,
      "loss": 0.0128,
      "step": 1725940
    },
    {
      "epoch": 2.8245713949058344,
      "grad_norm": 0.28180932998657227,
      "learning_rate": 4.379525971415958e-06,
      "loss": 0.0088,
      "step": 1725960
    },
    {
      "epoch": 2.824604125344488,
      "grad_norm": 0.28758418560028076,
      "learning_rate": 4.379460079202441e-06,
      "loss": 0.015,
      "step": 1725980
    },
    {
      "epoch": 2.824636855783141,
      "grad_norm": 0.18313972651958466,
      "learning_rate": 4.379394186988924e-06,
      "loss": 0.0135,
      "step": 1726000
    },
    {
      "epoch": 2.8246695862217948,
      "grad_norm": 0.18762250244617462,
      "learning_rate": 4.379328294775407e-06,
      "loss": 0.0088,
      "step": 1726020
    },
    {
      "epoch": 2.824702316660448,
      "grad_norm": 0.2650280296802521,
      "learning_rate": 4.37926240256189e-06,
      "loss": 0.0106,
      "step": 1726040
    },
    {
      "epoch": 2.824735047099101,
      "grad_norm": 1.7859302759170532,
      "learning_rate": 4.3791965103483726e-06,
      "loss": 0.0118,
      "step": 1726060
    },
    {
      "epoch": 2.8247677775377547,
      "grad_norm": 0.8197173476219177,
      "learning_rate": 4.379130618134855e-06,
      "loss": 0.0175,
      "step": 1726080
    },
    {
      "epoch": 2.824800507976408,
      "grad_norm": 0.26918214559555054,
      "learning_rate": 4.379064725921338e-06,
      "loss": 0.0104,
      "step": 1726100
    },
    {
      "epoch": 2.8248332384150614,
      "grad_norm": 0.08442064374685287,
      "learning_rate": 4.378998833707821e-06,
      "loss": 0.0146,
      "step": 1726120
    },
    {
      "epoch": 2.8248659688537145,
      "grad_norm": 0.5764051079750061,
      "learning_rate": 4.3789329414943035e-06,
      "loss": 0.0138,
      "step": 1726140
    },
    {
      "epoch": 2.824898699292368,
      "grad_norm": 0.6185629963874817,
      "learning_rate": 4.378867049280786e-06,
      "loss": 0.0107,
      "step": 1726160
    },
    {
      "epoch": 2.8249314297310213,
      "grad_norm": 0.6067783236503601,
      "learning_rate": 4.37880115706727e-06,
      "loss": 0.0116,
      "step": 1726180
    },
    {
      "epoch": 2.8249641601696744,
      "grad_norm": 0.6136802434921265,
      "learning_rate": 4.378735264853753e-06,
      "loss": 0.0119,
      "step": 1726200
    },
    {
      "epoch": 2.824996890608328,
      "grad_norm": 0.4120555520057678,
      "learning_rate": 4.378669372640235e-06,
      "loss": 0.0144,
      "step": 1726220
    },
    {
      "epoch": 2.825029621046981,
      "grad_norm": 0.08278800547122955,
      "learning_rate": 4.378603480426719e-06,
      "loss": 0.0102,
      "step": 1726240
    },
    {
      "epoch": 2.8250623514856343,
      "grad_norm": 0.29080599546432495,
      "learning_rate": 4.378537588213202e-06,
      "loss": 0.0152,
      "step": 1726260
    },
    {
      "epoch": 2.825095081924288,
      "grad_norm": 0.264413982629776,
      "learning_rate": 4.378471695999684e-06,
      "loss": 0.0177,
      "step": 1726280
    },
    {
      "epoch": 2.8251278123629415,
      "grad_norm": 0.2045935094356537,
      "learning_rate": 4.378405803786167e-06,
      "loss": 0.0167,
      "step": 1726300
    },
    {
      "epoch": 2.8251605428015947,
      "grad_norm": 0.13559244573116302,
      "learning_rate": 4.37833991157265e-06,
      "loss": 0.0135,
      "step": 1726320
    },
    {
      "epoch": 2.825193273240248,
      "grad_norm": 0.12761488556861877,
      "learning_rate": 4.378274019359133e-06,
      "loss": 0.0114,
      "step": 1726340
    },
    {
      "epoch": 2.8252260036789014,
      "grad_norm": 0.5609874129295349,
      "learning_rate": 4.378208127145615e-06,
      "loss": 0.0154,
      "step": 1726360
    },
    {
      "epoch": 2.8252587341175546,
      "grad_norm": 0.29724815487861633,
      "learning_rate": 4.378142234932098e-06,
      "loss": 0.0123,
      "step": 1726380
    },
    {
      "epoch": 2.8252914645562077,
      "grad_norm": 0.36082127690315247,
      "learning_rate": 4.378076342718582e-06,
      "loss": 0.016,
      "step": 1726400
    },
    {
      "epoch": 2.8253241949948613,
      "grad_norm": 0.5342503190040588,
      "learning_rate": 4.378010450505064e-06,
      "loss": 0.0176,
      "step": 1726420
    },
    {
      "epoch": 2.825356925433515,
      "grad_norm": 0.20963525772094727,
      "learning_rate": 4.377944558291547e-06,
      "loss": 0.0145,
      "step": 1726440
    },
    {
      "epoch": 2.825389655872168,
      "grad_norm": 0.2350643128156662,
      "learning_rate": 4.37787866607803e-06,
      "loss": 0.0103,
      "step": 1726460
    },
    {
      "epoch": 2.825422386310821,
      "grad_norm": 0.1270586997270584,
      "learning_rate": 4.377812773864513e-06,
      "loss": 0.0197,
      "step": 1726480
    },
    {
      "epoch": 2.825455116749475,
      "grad_norm": 0.04897642508149147,
      "learning_rate": 4.377746881650995e-06,
      "loss": 0.01,
      "step": 1726500
    },
    {
      "epoch": 2.825487847188128,
      "grad_norm": 0.6609030961990356,
      "learning_rate": 4.377680989437478e-06,
      "loss": 0.0085,
      "step": 1726520
    },
    {
      "epoch": 2.825520577626781,
      "grad_norm": 0.8787686824798584,
      "learning_rate": 4.377615097223961e-06,
      "loss": 0.0243,
      "step": 1726540
    },
    {
      "epoch": 2.8255533080654347,
      "grad_norm": 0.5758225321769714,
      "learning_rate": 4.377549205010444e-06,
      "loss": 0.0106,
      "step": 1726560
    },
    {
      "epoch": 2.8255860385040883,
      "grad_norm": 0.24341073632240295,
      "learning_rate": 4.377483312796927e-06,
      "loss": 0.011,
      "step": 1726580
    },
    {
      "epoch": 2.8256187689427414,
      "grad_norm": 0.4840429127216339,
      "learning_rate": 4.37741742058341e-06,
      "loss": 0.0217,
      "step": 1726600
    },
    {
      "epoch": 2.8256514993813946,
      "grad_norm": 0.4319210946559906,
      "learning_rate": 4.377351528369893e-06,
      "loss": 0.0131,
      "step": 1726620
    },
    {
      "epoch": 2.825684229820048,
      "grad_norm": 0.4463678300380707,
      "learning_rate": 4.377285636156376e-06,
      "loss": 0.0134,
      "step": 1726640
    },
    {
      "epoch": 2.8257169602587013,
      "grad_norm": 1.1804347038269043,
      "learning_rate": 4.377219743942859e-06,
      "loss": 0.0137,
      "step": 1726660
    },
    {
      "epoch": 2.8257496906973545,
      "grad_norm": 0.15548409521579742,
      "learning_rate": 4.377153851729342e-06,
      "loss": 0.011,
      "step": 1726680
    },
    {
      "epoch": 2.825782421136008,
      "grad_norm": 0.30575647950172424,
      "learning_rate": 4.3770879595158245e-06,
      "loss": 0.0117,
      "step": 1726700
    },
    {
      "epoch": 2.8258151515746617,
      "grad_norm": 0.1947469860315323,
      "learning_rate": 4.377022067302307e-06,
      "loss": 0.0161,
      "step": 1726720
    },
    {
      "epoch": 2.825847882013315,
      "grad_norm": 0.2446376234292984,
      "learning_rate": 4.37695617508879e-06,
      "loss": 0.0119,
      "step": 1726740
    },
    {
      "epoch": 2.825880612451968,
      "grad_norm": 0.37313205003738403,
      "learning_rate": 4.376890282875273e-06,
      "loss": 0.0149,
      "step": 1726760
    },
    {
      "epoch": 2.8259133428906216,
      "grad_norm": 0.751440167427063,
      "learning_rate": 4.3768243906617554e-06,
      "loss": 0.0125,
      "step": 1726780
    },
    {
      "epoch": 2.8259460733292747,
      "grad_norm": 0.09897332638502121,
      "learning_rate": 4.376758498448239e-06,
      "loss": 0.0159,
      "step": 1726800
    },
    {
      "epoch": 2.825978803767928,
      "grad_norm": 0.3430664539337158,
      "learning_rate": 4.376692606234722e-06,
      "loss": 0.013,
      "step": 1726820
    },
    {
      "epoch": 2.8260115342065815,
      "grad_norm": 0.20207376778125763,
      "learning_rate": 4.3766267140212045e-06,
      "loss": 0.0121,
      "step": 1726840
    },
    {
      "epoch": 2.8260442646452346,
      "grad_norm": 0.3441656827926636,
      "learning_rate": 4.376560821807687e-06,
      "loss": 0.0129,
      "step": 1726860
    },
    {
      "epoch": 2.826076995083888,
      "grad_norm": 0.4697423577308655,
      "learning_rate": 4.37649492959417e-06,
      "loss": 0.0137,
      "step": 1726880
    },
    {
      "epoch": 2.8261097255225414,
      "grad_norm": 1.8351595401763916,
      "learning_rate": 4.376429037380653e-06,
      "loss": 0.0165,
      "step": 1726900
    },
    {
      "epoch": 2.826142455961195,
      "grad_norm": 0.7213435769081116,
      "learning_rate": 4.376363145167136e-06,
      "loss": 0.0094,
      "step": 1726920
    },
    {
      "epoch": 2.826175186399848,
      "grad_norm": 0.477548748254776,
      "learning_rate": 4.376297252953619e-06,
      "loss": 0.0147,
      "step": 1726940
    },
    {
      "epoch": 2.8262079168385013,
      "grad_norm": 0.9485946297645569,
      "learning_rate": 4.376231360740102e-06,
      "loss": 0.0179,
      "step": 1726960
    },
    {
      "epoch": 2.826240647277155,
      "grad_norm": 0.22869694232940674,
      "learning_rate": 4.3761654685265845e-06,
      "loss": 0.0161,
      "step": 1726980
    },
    {
      "epoch": 2.826273377715808,
      "grad_norm": 0.2777106463909149,
      "learning_rate": 4.376099576313067e-06,
      "loss": 0.0143,
      "step": 1727000
    },
    {
      "epoch": 2.8263061081544616,
      "grad_norm": 0.38646572828292847,
      "learning_rate": 4.376033684099551e-06,
      "loss": 0.008,
      "step": 1727020
    },
    {
      "epoch": 2.8263388385931147,
      "grad_norm": 0.2679712176322937,
      "learning_rate": 4.375967791886034e-06,
      "loss": 0.0102,
      "step": 1727040
    },
    {
      "epoch": 2.8263715690317683,
      "grad_norm": 0.2763747572898865,
      "learning_rate": 4.375901899672516e-06,
      "loss": 0.0199,
      "step": 1727060
    },
    {
      "epoch": 2.8264042994704215,
      "grad_norm": 0.3976784646511078,
      "learning_rate": 4.375836007458999e-06,
      "loss": 0.0186,
      "step": 1727080
    },
    {
      "epoch": 2.8264370299090746,
      "grad_norm": 0.1532890945672989,
      "learning_rate": 4.375770115245482e-06,
      "loss": 0.0088,
      "step": 1727100
    },
    {
      "epoch": 2.8264697603477282,
      "grad_norm": 0.33428603410720825,
      "learning_rate": 4.3757042230319645e-06,
      "loss": 0.0133,
      "step": 1727120
    },
    {
      "epoch": 2.8265024907863814,
      "grad_norm": 0.32320845127105713,
      "learning_rate": 4.375638330818447e-06,
      "loss": 0.0102,
      "step": 1727140
    },
    {
      "epoch": 2.826535221225035,
      "grad_norm": 0.1295531690120697,
      "learning_rate": 4.37557243860493e-06,
      "loss": 0.0101,
      "step": 1727160
    },
    {
      "epoch": 2.826567951663688,
      "grad_norm": 0.5150257349014282,
      "learning_rate": 4.375506546391413e-06,
      "loss": 0.0206,
      "step": 1727180
    },
    {
      "epoch": 2.8266006821023417,
      "grad_norm": 0.21311312913894653,
      "learning_rate": 4.375440654177896e-06,
      "loss": 0.0106,
      "step": 1727200
    },
    {
      "epoch": 2.826633412540995,
      "grad_norm": 0.2475632280111313,
      "learning_rate": 4.375374761964379e-06,
      "loss": 0.0133,
      "step": 1727220
    },
    {
      "epoch": 2.826666142979648,
      "grad_norm": 0.15588364005088806,
      "learning_rate": 4.375308869750862e-06,
      "loss": 0.0137,
      "step": 1727240
    },
    {
      "epoch": 2.8266988734183016,
      "grad_norm": 0.25169748067855835,
      "learning_rate": 4.3752429775373446e-06,
      "loss": 0.0122,
      "step": 1727260
    },
    {
      "epoch": 2.8267316038569548,
      "grad_norm": 1.4624223709106445,
      "learning_rate": 4.375177085323828e-06,
      "loss": 0.015,
      "step": 1727280
    },
    {
      "epoch": 2.8267643342956084,
      "grad_norm": 0.15304802358150482,
      "learning_rate": 4.375111193110311e-06,
      "loss": 0.0113,
      "step": 1727300
    },
    {
      "epoch": 2.8267970647342615,
      "grad_norm": 0.27770793437957764,
      "learning_rate": 4.375045300896794e-06,
      "loss": 0.0122,
      "step": 1727320
    },
    {
      "epoch": 2.826829795172915,
      "grad_norm": 1.1251972913742065,
      "learning_rate": 4.374979408683276e-06,
      "loss": 0.0134,
      "step": 1727340
    },
    {
      "epoch": 2.8268625256115683,
      "grad_norm": 0.12009915709495544,
      "learning_rate": 4.374913516469759e-06,
      "loss": 0.0069,
      "step": 1727360
    },
    {
      "epoch": 2.8268952560502214,
      "grad_norm": 0.416579008102417,
      "learning_rate": 4.374847624256242e-06,
      "loss": 0.0188,
      "step": 1727380
    },
    {
      "epoch": 2.826927986488875,
      "grad_norm": 0.7708466649055481,
      "learning_rate": 4.374781732042725e-06,
      "loss": 0.0179,
      "step": 1727400
    },
    {
      "epoch": 2.826960716927528,
      "grad_norm": 0.8690760135650635,
      "learning_rate": 4.374715839829208e-06,
      "loss": 0.018,
      "step": 1727420
    },
    {
      "epoch": 2.8269934473661817,
      "grad_norm": 0.25361403822898865,
      "learning_rate": 4.374649947615691e-06,
      "loss": 0.0113,
      "step": 1727440
    },
    {
      "epoch": 2.827026177804835,
      "grad_norm": 0.3685081899166107,
      "learning_rate": 4.374584055402174e-06,
      "loss": 0.0155,
      "step": 1727460
    },
    {
      "epoch": 2.8270589082434885,
      "grad_norm": 0.3325287699699402,
      "learning_rate": 4.374518163188656e-06,
      "loss": 0.0095,
      "step": 1727480
    },
    {
      "epoch": 2.8270916386821416,
      "grad_norm": 0.6584179997444153,
      "learning_rate": 4.374452270975139e-06,
      "loss": 0.0157,
      "step": 1727500
    },
    {
      "epoch": 2.827124369120795,
      "grad_norm": 0.5224483609199524,
      "learning_rate": 4.374386378761622e-06,
      "loss": 0.0206,
      "step": 1727520
    },
    {
      "epoch": 2.8271570995594484,
      "grad_norm": 0.5483836531639099,
      "learning_rate": 4.374320486548105e-06,
      "loss": 0.0104,
      "step": 1727540
    },
    {
      "epoch": 2.8271898299981015,
      "grad_norm": 0.3750442862510681,
      "learning_rate": 4.374254594334587e-06,
      "loss": 0.0131,
      "step": 1727560
    },
    {
      "epoch": 2.827222560436755,
      "grad_norm": 0.49033787846565247,
      "learning_rate": 4.37418870212107e-06,
      "loss": 0.0224,
      "step": 1727580
    },
    {
      "epoch": 2.8272552908754083,
      "grad_norm": 1.6400381326675415,
      "learning_rate": 4.374122809907554e-06,
      "loss": 0.0117,
      "step": 1727600
    },
    {
      "epoch": 2.827288021314062,
      "grad_norm": 0.40716564655303955,
      "learning_rate": 4.374056917694036e-06,
      "loss": 0.0218,
      "step": 1727620
    },
    {
      "epoch": 2.827320751752715,
      "grad_norm": 0.3646138906478882,
      "learning_rate": 4.373991025480519e-06,
      "loss": 0.0109,
      "step": 1727640
    },
    {
      "epoch": 2.827353482191368,
      "grad_norm": 0.39233165979385376,
      "learning_rate": 4.373925133267003e-06,
      "loss": 0.0191,
      "step": 1727660
    },
    {
      "epoch": 2.8273862126300218,
      "grad_norm": 0.5695505142211914,
      "learning_rate": 4.3738592410534855e-06,
      "loss": 0.0144,
      "step": 1727680
    },
    {
      "epoch": 2.827418943068675,
      "grad_norm": 0.47382843494415283,
      "learning_rate": 4.373793348839968e-06,
      "loss": 0.0211,
      "step": 1727700
    },
    {
      "epoch": 2.8274516735073285,
      "grad_norm": 0.35988369584083557,
      "learning_rate": 4.373727456626451e-06,
      "loss": 0.0091,
      "step": 1727720
    },
    {
      "epoch": 2.8274844039459817,
      "grad_norm": 0.6162322163581848,
      "learning_rate": 4.373661564412934e-06,
      "loss": 0.0235,
      "step": 1727740
    },
    {
      "epoch": 2.8275171343846353,
      "grad_norm": 0.24496851861476898,
      "learning_rate": 4.3735956721994164e-06,
      "loss": 0.0115,
      "step": 1727760
    },
    {
      "epoch": 2.8275498648232884,
      "grad_norm": 0.48548707365989685,
      "learning_rate": 4.373529779985899e-06,
      "loss": 0.0123,
      "step": 1727780
    },
    {
      "epoch": 2.8275825952619416,
      "grad_norm": 0.5088622570037842,
      "learning_rate": 4.373463887772382e-06,
      "loss": 0.0137,
      "step": 1727800
    },
    {
      "epoch": 2.827615325700595,
      "grad_norm": 0.46531710028648376,
      "learning_rate": 4.3733979955588655e-06,
      "loss": 0.0166,
      "step": 1727820
    },
    {
      "epoch": 2.8276480561392483,
      "grad_norm": 0.06752905249595642,
      "learning_rate": 4.373332103345348e-06,
      "loss": 0.0119,
      "step": 1727840
    },
    {
      "epoch": 2.8276807865779015,
      "grad_norm": 0.30147403478622437,
      "learning_rate": 4.373266211131831e-06,
      "loss": 0.0125,
      "step": 1727860
    },
    {
      "epoch": 2.827713517016555,
      "grad_norm": 0.5186787247657776,
      "learning_rate": 4.373200318918314e-06,
      "loss": 0.0141,
      "step": 1727880
    },
    {
      "epoch": 2.8277462474552086,
      "grad_norm": 0.47908642888069153,
      "learning_rate": 4.3731344267047965e-06,
      "loss": 0.0141,
      "step": 1727900
    },
    {
      "epoch": 2.827778977893862,
      "grad_norm": 0.8945856690406799,
      "learning_rate": 4.373068534491279e-06,
      "loss": 0.0123,
      "step": 1727920
    },
    {
      "epoch": 2.827811708332515,
      "grad_norm": 0.7620335817337036,
      "learning_rate": 4.373002642277762e-06,
      "loss": 0.0169,
      "step": 1727940
    },
    {
      "epoch": 2.8278444387711685,
      "grad_norm": 0.1687678098678589,
      "learning_rate": 4.372936750064245e-06,
      "loss": 0.0096,
      "step": 1727960
    },
    {
      "epoch": 2.8278771692098217,
      "grad_norm": 0.08925095945596695,
      "learning_rate": 4.372870857850728e-06,
      "loss": 0.0128,
      "step": 1727980
    },
    {
      "epoch": 2.827909899648475,
      "grad_norm": 0.5637811422348022,
      "learning_rate": 4.372804965637211e-06,
      "loss": 0.0155,
      "step": 1728000
    },
    {
      "epoch": 2.8279426300871284,
      "grad_norm": 0.3273763954639435,
      "learning_rate": 4.372739073423694e-06,
      "loss": 0.0119,
      "step": 1728020
    },
    {
      "epoch": 2.827975360525782,
      "grad_norm": 0.9316684007644653,
      "learning_rate": 4.3726731812101765e-06,
      "loss": 0.0133,
      "step": 1728040
    },
    {
      "epoch": 2.828008090964435,
      "grad_norm": 0.5184918642044067,
      "learning_rate": 4.37260728899666e-06,
      "loss": 0.0183,
      "step": 1728060
    },
    {
      "epoch": 2.8280408214030883,
      "grad_norm": 0.34273067116737366,
      "learning_rate": 4.372541396783143e-06,
      "loss": 0.0132,
      "step": 1728080
    },
    {
      "epoch": 2.828073551841742,
      "grad_norm": 0.2563776671886444,
      "learning_rate": 4.3724755045696256e-06,
      "loss": 0.0104,
      "step": 1728100
    },
    {
      "epoch": 2.828106282280395,
      "grad_norm": 0.1773252636194229,
      "learning_rate": 4.372409612356108e-06,
      "loss": 0.0136,
      "step": 1728120
    },
    {
      "epoch": 2.828139012719048,
      "grad_norm": 0.08324132859706879,
      "learning_rate": 4.372343720142591e-06,
      "loss": 0.0077,
      "step": 1728140
    },
    {
      "epoch": 2.828171743157702,
      "grad_norm": 1.0308679342269897,
      "learning_rate": 4.372277827929074e-06,
      "loss": 0.0164,
      "step": 1728160
    },
    {
      "epoch": 2.8282044735963554,
      "grad_norm": 0.21219255030155182,
      "learning_rate": 4.3722119357155565e-06,
      "loss": 0.013,
      "step": 1728180
    },
    {
      "epoch": 2.8282372040350086,
      "grad_norm": 0.4560849070549011,
      "learning_rate": 4.372146043502039e-06,
      "loss": 0.0101,
      "step": 1728200
    },
    {
      "epoch": 2.8282699344736617,
      "grad_norm": 0.11606671661138535,
      "learning_rate": 4.372080151288523e-06,
      "loss": 0.0139,
      "step": 1728220
    },
    {
      "epoch": 2.8283026649123153,
      "grad_norm": 0.7577313780784607,
      "learning_rate": 4.372014259075006e-06,
      "loss": 0.0139,
      "step": 1728240
    },
    {
      "epoch": 2.8283353953509685,
      "grad_norm": 0.7419033646583557,
      "learning_rate": 4.371948366861488e-06,
      "loss": 0.019,
      "step": 1728260
    },
    {
      "epoch": 2.8283681257896216,
      "grad_norm": 0.3984854519367218,
      "learning_rate": 4.371882474647971e-06,
      "loss": 0.009,
      "step": 1728280
    },
    {
      "epoch": 2.828400856228275,
      "grad_norm": 0.43985190987586975,
      "learning_rate": 4.371816582434454e-06,
      "loss": 0.0139,
      "step": 1728300
    },
    {
      "epoch": 2.8284335866669283,
      "grad_norm": 0.2050270438194275,
      "learning_rate": 4.3717506902209365e-06,
      "loss": 0.0099,
      "step": 1728320
    },
    {
      "epoch": 2.828466317105582,
      "grad_norm": 0.6373745799064636,
      "learning_rate": 4.37168479800742e-06,
      "loss": 0.0182,
      "step": 1728340
    },
    {
      "epoch": 2.828499047544235,
      "grad_norm": 0.3789437413215637,
      "learning_rate": 4.371618905793903e-06,
      "loss": 0.0116,
      "step": 1728360
    },
    {
      "epoch": 2.8285317779828887,
      "grad_norm": 0.5197843313217163,
      "learning_rate": 4.371553013580386e-06,
      "loss": 0.021,
      "step": 1728380
    },
    {
      "epoch": 2.828564508421542,
      "grad_norm": 0.08406271785497665,
      "learning_rate": 4.371487121366868e-06,
      "loss": 0.0121,
      "step": 1728400
    },
    {
      "epoch": 2.828597238860195,
      "grad_norm": 1.3737692832946777,
      "learning_rate": 4.371421229153351e-06,
      "loss": 0.0166,
      "step": 1728420
    },
    {
      "epoch": 2.8286299692988486,
      "grad_norm": 0.11523714661598206,
      "learning_rate": 4.371355336939835e-06,
      "loss": 0.0159,
      "step": 1728440
    },
    {
      "epoch": 2.8286626997375017,
      "grad_norm": 0.4991722106933594,
      "learning_rate": 4.371289444726317e-06,
      "loss": 0.0138,
      "step": 1728460
    },
    {
      "epoch": 2.8286954301761553,
      "grad_norm": 0.6444640755653381,
      "learning_rate": 4.3712235525128e-06,
      "loss": 0.0136,
      "step": 1728480
    },
    {
      "epoch": 2.8287281606148085,
      "grad_norm": 0.19149146974086761,
      "learning_rate": 4.371157660299283e-06,
      "loss": 0.0161,
      "step": 1728500
    },
    {
      "epoch": 2.828760891053462,
      "grad_norm": 0.3103043735027313,
      "learning_rate": 4.371091768085766e-06,
      "loss": 0.0143,
      "step": 1728520
    },
    {
      "epoch": 2.828793621492115,
      "grad_norm": 0.35764268040657043,
      "learning_rate": 4.371025875872248e-06,
      "loss": 0.0161,
      "step": 1728540
    },
    {
      "epoch": 2.8288263519307684,
      "grad_norm": 0.3155769407749176,
      "learning_rate": 4.370959983658731e-06,
      "loss": 0.0149,
      "step": 1728560
    },
    {
      "epoch": 2.828859082369422,
      "grad_norm": 0.21561600267887115,
      "learning_rate": 4.370894091445214e-06,
      "loss": 0.009,
      "step": 1728580
    },
    {
      "epoch": 2.828891812808075,
      "grad_norm": 0.3957199454307556,
      "learning_rate": 4.370828199231697e-06,
      "loss": 0.0119,
      "step": 1728600
    },
    {
      "epoch": 2.8289245432467287,
      "grad_norm": 0.3685694932937622,
      "learning_rate": 4.37076230701818e-06,
      "loss": 0.0154,
      "step": 1728620
    },
    {
      "epoch": 2.828957273685382,
      "grad_norm": 0.427785187959671,
      "learning_rate": 4.370696414804663e-06,
      "loss": 0.0233,
      "step": 1728640
    },
    {
      "epoch": 2.8289900041240355,
      "grad_norm": 0.4503869414329529,
      "learning_rate": 4.370630522591146e-06,
      "loss": 0.0087,
      "step": 1728660
    },
    {
      "epoch": 2.8290227345626886,
      "grad_norm": 0.4566051661968231,
      "learning_rate": 4.370564630377629e-06,
      "loss": 0.0148,
      "step": 1728680
    },
    {
      "epoch": 2.8290554650013418,
      "grad_norm": 0.32356375455856323,
      "learning_rate": 4.370498738164112e-06,
      "loss": 0.0162,
      "step": 1728700
    },
    {
      "epoch": 2.8290881954399953,
      "grad_norm": 0.6614434123039246,
      "learning_rate": 4.370432845950595e-06,
      "loss": 0.022,
      "step": 1728720
    },
    {
      "epoch": 2.8291209258786485,
      "grad_norm": 0.6102551221847534,
      "learning_rate": 4.3703669537370775e-06,
      "loss": 0.0121,
      "step": 1728740
    },
    {
      "epoch": 2.829153656317302,
      "grad_norm": 0.4395444691181183,
      "learning_rate": 4.37030106152356e-06,
      "loss": 0.0183,
      "step": 1728760
    },
    {
      "epoch": 2.8291863867559552,
      "grad_norm": 0.4336654841899872,
      "learning_rate": 4.370235169310043e-06,
      "loss": 0.0109,
      "step": 1728780
    },
    {
      "epoch": 2.829219117194609,
      "grad_norm": 0.47611096501350403,
      "learning_rate": 4.370169277096526e-06,
      "loss": 0.0144,
      "step": 1728800
    },
    {
      "epoch": 2.829251847633262,
      "grad_norm": 0.35002467036247253,
      "learning_rate": 4.3701033848830084e-06,
      "loss": 0.0097,
      "step": 1728820
    },
    {
      "epoch": 2.829284578071915,
      "grad_norm": 0.07239780575037003,
      "learning_rate": 4.370037492669492e-06,
      "loss": 0.0122,
      "step": 1728840
    },
    {
      "epoch": 2.8293173085105687,
      "grad_norm": 0.4589889943599701,
      "learning_rate": 4.369971600455975e-06,
      "loss": 0.0092,
      "step": 1728860
    },
    {
      "epoch": 2.829350038949222,
      "grad_norm": 0.2126331776380539,
      "learning_rate": 4.3699057082424575e-06,
      "loss": 0.0171,
      "step": 1728880
    },
    {
      "epoch": 2.8293827693878755,
      "grad_norm": 0.6034623980522156,
      "learning_rate": 4.36983981602894e-06,
      "loss": 0.0121,
      "step": 1728900
    },
    {
      "epoch": 2.8294154998265286,
      "grad_norm": 0.08691348880529404,
      "learning_rate": 4.369773923815423e-06,
      "loss": 0.0159,
      "step": 1728920
    },
    {
      "epoch": 2.829448230265182,
      "grad_norm": 0.6147800087928772,
      "learning_rate": 4.369708031601906e-06,
      "loss": 0.0108,
      "step": 1728940
    },
    {
      "epoch": 2.8294809607038354,
      "grad_norm": 0.5611850023269653,
      "learning_rate": 4.3696421393883885e-06,
      "loss": 0.0146,
      "step": 1728960
    },
    {
      "epoch": 2.8295136911424885,
      "grad_norm": 0.14473141729831696,
      "learning_rate": 4.369576247174871e-06,
      "loss": 0.0067,
      "step": 1728980
    },
    {
      "epoch": 2.829546421581142,
      "grad_norm": 0.37970155477523804,
      "learning_rate": 4.369510354961354e-06,
      "loss": 0.0127,
      "step": 1729000
    },
    {
      "epoch": 2.8295791520197953,
      "grad_norm": 0.45665794610977173,
      "learning_rate": 4.3694444627478375e-06,
      "loss": 0.0147,
      "step": 1729020
    },
    {
      "epoch": 2.829611882458449,
      "grad_norm": 1.1608498096466064,
      "learning_rate": 4.36937857053432e-06,
      "loss": 0.0114,
      "step": 1729040
    },
    {
      "epoch": 2.829644612897102,
      "grad_norm": 0.31198737025260925,
      "learning_rate": 4.369312678320803e-06,
      "loss": 0.0144,
      "step": 1729060
    },
    {
      "epoch": 2.8296773433357556,
      "grad_norm": 0.3865714371204376,
      "learning_rate": 4.369246786107287e-06,
      "loss": 0.012,
      "step": 1729080
    },
    {
      "epoch": 2.8297100737744088,
      "grad_norm": 0.5872706770896912,
      "learning_rate": 4.369180893893769e-06,
      "loss": 0.0113,
      "step": 1729100
    },
    {
      "epoch": 2.829742804213062,
      "grad_norm": 0.38905924558639526,
      "learning_rate": 4.369115001680252e-06,
      "loss": 0.0108,
      "step": 1729120
    },
    {
      "epoch": 2.8297755346517155,
      "grad_norm": 0.19303883612155914,
      "learning_rate": 4.369049109466735e-06,
      "loss": 0.0133,
      "step": 1729140
    },
    {
      "epoch": 2.8298082650903686,
      "grad_norm": 0.30160510540008545,
      "learning_rate": 4.3689832172532175e-06,
      "loss": 0.0135,
      "step": 1729160
    },
    {
      "epoch": 2.8298409955290222,
      "grad_norm": 0.07508789747953415,
      "learning_rate": 4.3689173250397e-06,
      "loss": 0.01,
      "step": 1729180
    },
    {
      "epoch": 2.8298737259676754,
      "grad_norm": 0.5038198232650757,
      "learning_rate": 4.368851432826183e-06,
      "loss": 0.0127,
      "step": 1729200
    },
    {
      "epoch": 2.829906456406329,
      "grad_norm": 0.08651755750179291,
      "learning_rate": 4.368785540612666e-06,
      "loss": 0.0103,
      "step": 1729220
    },
    {
      "epoch": 2.829939186844982,
      "grad_norm": 0.59904545545578,
      "learning_rate": 4.368719648399149e-06,
      "loss": 0.0166,
      "step": 1729240
    },
    {
      "epoch": 2.8299719172836353,
      "grad_norm": 0.2455117255449295,
      "learning_rate": 4.368653756185632e-06,
      "loss": 0.0179,
      "step": 1729260
    },
    {
      "epoch": 2.830004647722289,
      "grad_norm": 0.06303601711988449,
      "learning_rate": 4.368587863972115e-06,
      "loss": 0.0147,
      "step": 1729280
    },
    {
      "epoch": 2.830037378160942,
      "grad_norm": 0.23949739336967468,
      "learning_rate": 4.3685219717585976e-06,
      "loss": 0.0223,
      "step": 1729300
    },
    {
      "epoch": 2.830070108599595,
      "grad_norm": 0.13833226263523102,
      "learning_rate": 4.36845607954508e-06,
      "loss": 0.0114,
      "step": 1729320
    },
    {
      "epoch": 2.8301028390382488,
      "grad_norm": 0.19295769929885864,
      "learning_rate": 4.368390187331563e-06,
      "loss": 0.0159,
      "step": 1729340
    },
    {
      "epoch": 2.8301355694769024,
      "grad_norm": 0.5321600437164307,
      "learning_rate": 4.368324295118046e-06,
      "loss": 0.0101,
      "step": 1729360
    },
    {
      "epoch": 2.8301682999155555,
      "grad_norm": 0.5010690689086914,
      "learning_rate": 4.368258402904529e-06,
      "loss": 0.0151,
      "step": 1729380
    },
    {
      "epoch": 2.8302010303542087,
      "grad_norm": 0.38125911355018616,
      "learning_rate": 4.368192510691012e-06,
      "loss": 0.0142,
      "step": 1729400
    },
    {
      "epoch": 2.8302337607928623,
      "grad_norm": 0.37720635533332825,
      "learning_rate": 4.368126618477495e-06,
      "loss": 0.0125,
      "step": 1729420
    },
    {
      "epoch": 2.8302664912315154,
      "grad_norm": 0.41355496644973755,
      "learning_rate": 4.368060726263978e-06,
      "loss": 0.0192,
      "step": 1729440
    },
    {
      "epoch": 2.8302992216701686,
      "grad_norm": 0.3792963922023773,
      "learning_rate": 4.367994834050461e-06,
      "loss": 0.0135,
      "step": 1729460
    },
    {
      "epoch": 2.830331952108822,
      "grad_norm": 0.7643067836761475,
      "learning_rate": 4.367928941836944e-06,
      "loss": 0.0218,
      "step": 1729480
    },
    {
      "epoch": 2.8303646825474758,
      "grad_norm": 0.17240016162395477,
      "learning_rate": 4.367863049623427e-06,
      "loss": 0.0093,
      "step": 1729500
    },
    {
      "epoch": 2.830397412986129,
      "grad_norm": 0.2453186810016632,
      "learning_rate": 4.367797157409909e-06,
      "loss": 0.0116,
      "step": 1729520
    },
    {
      "epoch": 2.830430143424782,
      "grad_norm": 0.34479090571403503,
      "learning_rate": 4.367731265196392e-06,
      "loss": 0.0157,
      "step": 1729540
    },
    {
      "epoch": 2.8304628738634356,
      "grad_norm": 0.30934539437294006,
      "learning_rate": 4.367665372982875e-06,
      "loss": 0.0148,
      "step": 1729560
    },
    {
      "epoch": 2.830495604302089,
      "grad_norm": 0.22802378237247467,
      "learning_rate": 4.367599480769358e-06,
      "loss": 0.0121,
      "step": 1729580
    },
    {
      "epoch": 2.830528334740742,
      "grad_norm": 0.31520432233810425,
      "learning_rate": 4.36753358855584e-06,
      "loss": 0.0144,
      "step": 1729600
    },
    {
      "epoch": 2.8305610651793955,
      "grad_norm": 0.5981683731079102,
      "learning_rate": 4.367467696342323e-06,
      "loss": 0.0144,
      "step": 1729620
    },
    {
      "epoch": 2.830593795618049,
      "grad_norm": 0.3589858412742615,
      "learning_rate": 4.367401804128807e-06,
      "loss": 0.0197,
      "step": 1729640
    },
    {
      "epoch": 2.8306265260567023,
      "grad_norm": 0.25600185990333557,
      "learning_rate": 4.3673359119152894e-06,
      "loss": 0.0128,
      "step": 1729660
    },
    {
      "epoch": 2.8306592564953554,
      "grad_norm": 0.49449169635772705,
      "learning_rate": 4.367270019701772e-06,
      "loss": 0.0133,
      "step": 1729680
    },
    {
      "epoch": 2.830691986934009,
      "grad_norm": 0.07911506295204163,
      "learning_rate": 4.367204127488255e-06,
      "loss": 0.0113,
      "step": 1729700
    },
    {
      "epoch": 2.830724717372662,
      "grad_norm": 0.3271588683128357,
      "learning_rate": 4.367138235274738e-06,
      "loss": 0.011,
      "step": 1729720
    },
    {
      "epoch": 2.8307574478113153,
      "grad_norm": 0.523597776889801,
      "learning_rate": 4.367072343061221e-06,
      "loss": 0.0102,
      "step": 1729740
    },
    {
      "epoch": 2.830790178249969,
      "grad_norm": 0.47082069516181946,
      "learning_rate": 4.367006450847704e-06,
      "loss": 0.0148,
      "step": 1729760
    },
    {
      "epoch": 2.8308229086886225,
      "grad_norm": 0.4205264449119568,
      "learning_rate": 4.366940558634187e-06,
      "loss": 0.0124,
      "step": 1729780
    },
    {
      "epoch": 2.8308556391272757,
      "grad_norm": 0.10756252706050873,
      "learning_rate": 4.3668746664206695e-06,
      "loss": 0.0157,
      "step": 1729800
    },
    {
      "epoch": 2.830888369565929,
      "grad_norm": 0.3475015461444855,
      "learning_rate": 4.366808774207152e-06,
      "loss": 0.014,
      "step": 1729820
    },
    {
      "epoch": 2.8309211000045824,
      "grad_norm": 0.3498685657978058,
      "learning_rate": 4.366742881993635e-06,
      "loss": 0.0079,
      "step": 1729840
    },
    {
      "epoch": 2.8309538304432356,
      "grad_norm": 0.2094074785709381,
      "learning_rate": 4.3666769897801185e-06,
      "loss": 0.0133,
      "step": 1729860
    },
    {
      "epoch": 2.8309865608818887,
      "grad_norm": 0.5598214864730835,
      "learning_rate": 4.366611097566601e-06,
      "loss": 0.0113,
      "step": 1729880
    },
    {
      "epoch": 2.8310192913205423,
      "grad_norm": 0.6547952890396118,
      "learning_rate": 4.366545205353084e-06,
      "loss": 0.0213,
      "step": 1729900
    },
    {
      "epoch": 2.8310520217591955,
      "grad_norm": 0.23782509565353394,
      "learning_rate": 4.366479313139567e-06,
      "loss": 0.0114,
      "step": 1729920
    },
    {
      "epoch": 2.831084752197849,
      "grad_norm": 0.27699756622314453,
      "learning_rate": 4.3664134209260495e-06,
      "loss": 0.0185,
      "step": 1729940
    },
    {
      "epoch": 2.831117482636502,
      "grad_norm": 0.2622106671333313,
      "learning_rate": 4.366347528712532e-06,
      "loss": 0.0108,
      "step": 1729960
    },
    {
      "epoch": 2.831150213075156,
      "grad_norm": 0.22882528603076935,
      "learning_rate": 4.366281636499015e-06,
      "loss": 0.0179,
      "step": 1729980
    },
    {
      "epoch": 2.831182943513809,
      "grad_norm": 0.10273070633411407,
      "learning_rate": 4.366215744285498e-06,
      "loss": 0.015,
      "step": 1730000
    },
    {
      "epoch": 2.831215673952462,
      "grad_norm": 0.1820906102657318,
      "learning_rate": 4.3661498520719804e-06,
      "loss": 0.0127,
      "step": 1730020
    },
    {
      "epoch": 2.8312484043911157,
      "grad_norm": 0.1416022777557373,
      "learning_rate": 4.366083959858464e-06,
      "loss": 0.0153,
      "step": 1730040
    },
    {
      "epoch": 2.831281134829769,
      "grad_norm": 0.17345991730690002,
      "learning_rate": 4.366018067644947e-06,
      "loss": 0.0113,
      "step": 1730060
    },
    {
      "epoch": 2.8313138652684224,
      "grad_norm": 0.982566773891449,
      "learning_rate": 4.3659521754314295e-06,
      "loss": 0.0148,
      "step": 1730080
    },
    {
      "epoch": 2.8313465957070756,
      "grad_norm": 0.2137010246515274,
      "learning_rate": 4.365886283217913e-06,
      "loss": 0.0129,
      "step": 1730100
    },
    {
      "epoch": 2.831379326145729,
      "grad_norm": 0.08495427668094635,
      "learning_rate": 4.365820391004396e-06,
      "loss": 0.012,
      "step": 1730120
    },
    {
      "epoch": 2.8314120565843823,
      "grad_norm": 0.24376259744167328,
      "learning_rate": 4.3657544987908786e-06,
      "loss": 0.0126,
      "step": 1730140
    },
    {
      "epoch": 2.8314447870230355,
      "grad_norm": 0.2801535725593567,
      "learning_rate": 4.365688606577361e-06,
      "loss": 0.0095,
      "step": 1730160
    },
    {
      "epoch": 2.831477517461689,
      "grad_norm": 0.07905874401330948,
      "learning_rate": 4.365622714363844e-06,
      "loss": 0.0099,
      "step": 1730180
    },
    {
      "epoch": 2.8315102479003422,
      "grad_norm": 0.6087698936462402,
      "learning_rate": 4.365556822150327e-06,
      "loss": 0.0122,
      "step": 1730200
    },
    {
      "epoch": 2.831542978338996,
      "grad_norm": 1.453831434249878,
      "learning_rate": 4.3654909299368095e-06,
      "loss": 0.0123,
      "step": 1730220
    },
    {
      "epoch": 2.831575708777649,
      "grad_norm": 0.4991576075553894,
      "learning_rate": 4.365425037723292e-06,
      "loss": 0.0107,
      "step": 1730240
    },
    {
      "epoch": 2.8316084392163026,
      "grad_norm": 0.38689321279525757,
      "learning_rate": 4.365359145509776e-06,
      "loss": 0.0112,
      "step": 1730260
    },
    {
      "epoch": 2.8316411696549557,
      "grad_norm": 0.38661786913871765,
      "learning_rate": 4.365293253296259e-06,
      "loss": 0.0118,
      "step": 1730280
    },
    {
      "epoch": 2.831673900093609,
      "grad_norm": 0.21215461194515228,
      "learning_rate": 4.365227361082741e-06,
      "loss": 0.0142,
      "step": 1730300
    },
    {
      "epoch": 2.8317066305322625,
      "grad_norm": 0.2615528404712677,
      "learning_rate": 4.365161468869224e-06,
      "loss": 0.0119,
      "step": 1730320
    },
    {
      "epoch": 2.8317393609709156,
      "grad_norm": 0.36200183629989624,
      "learning_rate": 4.365095576655707e-06,
      "loss": 0.0133,
      "step": 1730340
    },
    {
      "epoch": 2.831772091409569,
      "grad_norm": 0.18297551572322845,
      "learning_rate": 4.3650296844421896e-06,
      "loss": 0.0117,
      "step": 1730360
    },
    {
      "epoch": 2.8318048218482224,
      "grad_norm": 0.6575949788093567,
      "learning_rate": 4.364963792228672e-06,
      "loss": 0.0147,
      "step": 1730380
    },
    {
      "epoch": 2.831837552286876,
      "grad_norm": 0.14824692904949188,
      "learning_rate": 4.364897900015155e-06,
      "loss": 0.0109,
      "step": 1730400
    },
    {
      "epoch": 2.831870282725529,
      "grad_norm": 0.40090203285217285,
      "learning_rate": 4.364832007801638e-06,
      "loss": 0.0153,
      "step": 1730420
    },
    {
      "epoch": 2.8319030131641822,
      "grad_norm": 0.35347020626068115,
      "learning_rate": 4.364766115588121e-06,
      "loss": 0.0129,
      "step": 1730440
    },
    {
      "epoch": 2.831935743602836,
      "grad_norm": 0.29202619194984436,
      "learning_rate": 4.364700223374604e-06,
      "loss": 0.0086,
      "step": 1730460
    },
    {
      "epoch": 2.831968474041489,
      "grad_norm": 0.17026236653327942,
      "learning_rate": 4.364634331161087e-06,
      "loss": 0.0158,
      "step": 1730480
    },
    {
      "epoch": 2.8320012044801426,
      "grad_norm": 0.1795409768819809,
      "learning_rate": 4.3645684389475704e-06,
      "loss": 0.0118,
      "step": 1730500
    },
    {
      "epoch": 2.8320339349187957,
      "grad_norm": 0.3956523537635803,
      "learning_rate": 4.364502546734053e-06,
      "loss": 0.018,
      "step": 1730520
    },
    {
      "epoch": 2.8320666653574493,
      "grad_norm": 0.31966105103492737,
      "learning_rate": 4.364436654520536e-06,
      "loss": 0.0183,
      "step": 1730540
    },
    {
      "epoch": 2.8320993957961025,
      "grad_norm": 0.3700625002384186,
      "learning_rate": 4.364370762307019e-06,
      "loss": 0.0142,
      "step": 1730560
    },
    {
      "epoch": 2.8321321262347556,
      "grad_norm": 0.23487332463264465,
      "learning_rate": 4.364304870093501e-06,
      "loss": 0.0161,
      "step": 1730580
    },
    {
      "epoch": 2.8321648566734092,
      "grad_norm": 0.5818773508071899,
      "learning_rate": 4.364238977879984e-06,
      "loss": 0.0158,
      "step": 1730600
    },
    {
      "epoch": 2.8321975871120624,
      "grad_norm": 0.2708183526992798,
      "learning_rate": 4.364173085666467e-06,
      "loss": 0.0113,
      "step": 1730620
    },
    {
      "epoch": 2.832230317550716,
      "grad_norm": 0.4638481140136719,
      "learning_rate": 4.36410719345295e-06,
      "loss": 0.0146,
      "step": 1730640
    },
    {
      "epoch": 2.832263047989369,
      "grad_norm": 0.16086681187152863,
      "learning_rate": 4.364041301239433e-06,
      "loss": 0.0131,
      "step": 1730660
    },
    {
      "epoch": 2.8322957784280227,
      "grad_norm": 0.28709742426872253,
      "learning_rate": 4.363975409025916e-06,
      "loss": 0.0151,
      "step": 1730680
    },
    {
      "epoch": 2.832328508866676,
      "grad_norm": 0.8117577433586121,
      "learning_rate": 4.363909516812399e-06,
      "loss": 0.019,
      "step": 1730700
    },
    {
      "epoch": 2.832361239305329,
      "grad_norm": 0.2788870632648468,
      "learning_rate": 4.363843624598881e-06,
      "loss": 0.0084,
      "step": 1730720
    },
    {
      "epoch": 2.8323939697439826,
      "grad_norm": 0.2311490923166275,
      "learning_rate": 4.363777732385364e-06,
      "loss": 0.0155,
      "step": 1730740
    },
    {
      "epoch": 2.8324267001826358,
      "grad_norm": 0.7517320513725281,
      "learning_rate": 4.363711840171847e-06,
      "loss": 0.0193,
      "step": 1730760
    },
    {
      "epoch": 2.832459430621289,
      "grad_norm": 0.14880487322807312,
      "learning_rate": 4.36364594795833e-06,
      "loss": 0.018,
      "step": 1730780
    },
    {
      "epoch": 2.8324921610599425,
      "grad_norm": 0.318966805934906,
      "learning_rate": 4.363580055744813e-06,
      "loss": 0.0109,
      "step": 1730800
    },
    {
      "epoch": 2.832524891498596,
      "grad_norm": 0.11989682912826538,
      "learning_rate": 4.363514163531296e-06,
      "loss": 0.0096,
      "step": 1730820
    },
    {
      "epoch": 2.8325576219372492,
      "grad_norm": 0.12064722925424576,
      "learning_rate": 4.363448271317779e-06,
      "loss": 0.0142,
      "step": 1730840
    },
    {
      "epoch": 2.8325903523759024,
      "grad_norm": 0.4247993528842926,
      "learning_rate": 4.3633823791042614e-06,
      "loss": 0.0183,
      "step": 1730860
    },
    {
      "epoch": 2.832623082814556,
      "grad_norm": 0.31557556986808777,
      "learning_rate": 4.363316486890745e-06,
      "loss": 0.0106,
      "step": 1730880
    },
    {
      "epoch": 2.832655813253209,
      "grad_norm": 0.1427306979894638,
      "learning_rate": 4.363250594677228e-06,
      "loss": 0.0167,
      "step": 1730900
    },
    {
      "epoch": 2.8326885436918623,
      "grad_norm": 0.6083322763442993,
      "learning_rate": 4.3631847024637105e-06,
      "loss": 0.0151,
      "step": 1730920
    },
    {
      "epoch": 2.832721274130516,
      "grad_norm": 0.33735111355781555,
      "learning_rate": 4.363118810250193e-06,
      "loss": 0.0205,
      "step": 1730940
    },
    {
      "epoch": 2.8327540045691695,
      "grad_norm": 0.20882758498191833,
      "learning_rate": 4.363052918036676e-06,
      "loss": 0.0139,
      "step": 1730960
    },
    {
      "epoch": 2.8327867350078226,
      "grad_norm": 0.1071491464972496,
      "learning_rate": 4.362987025823159e-06,
      "loss": 0.0142,
      "step": 1730980
    },
    {
      "epoch": 2.832819465446476,
      "grad_norm": 0.3198995888233185,
      "learning_rate": 4.3629211336096415e-06,
      "loss": 0.0101,
      "step": 1731000
    },
    {
      "epoch": 2.8328521958851294,
      "grad_norm": 0.16832520067691803,
      "learning_rate": 4.362855241396124e-06,
      "loss": 0.0151,
      "step": 1731020
    },
    {
      "epoch": 2.8328849263237825,
      "grad_norm": 0.48387786746025085,
      "learning_rate": 4.362789349182607e-06,
      "loss": 0.0172,
      "step": 1731040
    },
    {
      "epoch": 2.8329176567624357,
      "grad_norm": 0.13368703424930573,
      "learning_rate": 4.3627234569690905e-06,
      "loss": 0.0072,
      "step": 1731060
    },
    {
      "epoch": 2.8329503872010893,
      "grad_norm": 0.8303149938583374,
      "learning_rate": 4.362657564755573e-06,
      "loss": 0.0101,
      "step": 1731080
    },
    {
      "epoch": 2.832983117639743,
      "grad_norm": 0.2350228875875473,
      "learning_rate": 4.362591672542056e-06,
      "loss": 0.0083,
      "step": 1731100
    },
    {
      "epoch": 2.833015848078396,
      "grad_norm": 0.5138136148452759,
      "learning_rate": 4.362525780328539e-06,
      "loss": 0.009,
      "step": 1731120
    },
    {
      "epoch": 2.833048578517049,
      "grad_norm": 0.34488970041275024,
      "learning_rate": 4.362459888115022e-06,
      "loss": 0.0154,
      "step": 1731140
    },
    {
      "epoch": 2.8330813089557028,
      "grad_norm": 0.17290721833705902,
      "learning_rate": 4.362393995901505e-06,
      "loss": 0.0104,
      "step": 1731160
    },
    {
      "epoch": 2.833114039394356,
      "grad_norm": 0.26242563128471375,
      "learning_rate": 4.362328103687988e-06,
      "loss": 0.0106,
      "step": 1731180
    },
    {
      "epoch": 2.833146769833009,
      "grad_norm": 0.32072946429252625,
      "learning_rate": 4.3622622114744706e-06,
      "loss": 0.0194,
      "step": 1731200
    },
    {
      "epoch": 2.8331795002716627,
      "grad_norm": 0.06274465471506119,
      "learning_rate": 4.362196319260953e-06,
      "loss": 0.0121,
      "step": 1731220
    },
    {
      "epoch": 2.8332122307103162,
      "grad_norm": 0.24364829063415527,
      "learning_rate": 4.362130427047436e-06,
      "loss": 0.0184,
      "step": 1731240
    },
    {
      "epoch": 2.8332449611489694,
      "grad_norm": 0.3459666967391968,
      "learning_rate": 4.362064534833919e-06,
      "loss": 0.0118,
      "step": 1731260
    },
    {
      "epoch": 2.8332776915876225,
      "grad_norm": 0.12012675404548645,
      "learning_rate": 4.361998642620402e-06,
      "loss": 0.0102,
      "step": 1731280
    },
    {
      "epoch": 2.833310422026276,
      "grad_norm": 0.08517150580883026,
      "learning_rate": 4.361932750406885e-06,
      "loss": 0.0172,
      "step": 1731300
    },
    {
      "epoch": 2.8333431524649293,
      "grad_norm": 1.2877016067504883,
      "learning_rate": 4.361866858193368e-06,
      "loss": 0.0142,
      "step": 1731320
    },
    {
      "epoch": 2.8333758829035824,
      "grad_norm": 0.3051324188709259,
      "learning_rate": 4.361800965979851e-06,
      "loss": 0.0083,
      "step": 1731340
    },
    {
      "epoch": 2.833408613342236,
      "grad_norm": 0.09261692315340042,
      "learning_rate": 4.361735073766333e-06,
      "loss": 0.0131,
      "step": 1731360
    },
    {
      "epoch": 2.833441343780889,
      "grad_norm": 0.329233318567276,
      "learning_rate": 4.361669181552816e-06,
      "loss": 0.011,
      "step": 1731380
    },
    {
      "epoch": 2.833474074219543,
      "grad_norm": 0.20877836644649506,
      "learning_rate": 4.361603289339299e-06,
      "loss": 0.0115,
      "step": 1731400
    },
    {
      "epoch": 2.833506804658196,
      "grad_norm": 0.0960266962647438,
      "learning_rate": 4.3615373971257815e-06,
      "loss": 0.0093,
      "step": 1731420
    },
    {
      "epoch": 2.8335395350968495,
      "grad_norm": 0.3210916817188263,
      "learning_rate": 4.361471504912264e-06,
      "loss": 0.0178,
      "step": 1731440
    },
    {
      "epoch": 2.8335722655355027,
      "grad_norm": 0.2302381843328476,
      "learning_rate": 4.361405612698748e-06,
      "loss": 0.0122,
      "step": 1731460
    },
    {
      "epoch": 2.833604995974156,
      "grad_norm": 0.5689265131950378,
      "learning_rate": 4.361339720485231e-06,
      "loss": 0.0104,
      "step": 1731480
    },
    {
      "epoch": 2.8336377264128094,
      "grad_norm": 0.21787405014038086,
      "learning_rate": 4.361273828271713e-06,
      "loss": 0.0129,
      "step": 1731500
    },
    {
      "epoch": 2.8336704568514626,
      "grad_norm": 0.2565961480140686,
      "learning_rate": 4.361207936058197e-06,
      "loss": 0.0136,
      "step": 1731520
    },
    {
      "epoch": 2.833703187290116,
      "grad_norm": 0.2504225969314575,
      "learning_rate": 4.36114204384468e-06,
      "loss": 0.0179,
      "step": 1731540
    },
    {
      "epoch": 2.8337359177287693,
      "grad_norm": 0.3205050230026245,
      "learning_rate": 4.361076151631162e-06,
      "loss": 0.0109,
      "step": 1731560
    },
    {
      "epoch": 2.833768648167423,
      "grad_norm": 0.12764962017536163,
      "learning_rate": 4.361010259417645e-06,
      "loss": 0.0119,
      "step": 1731580
    },
    {
      "epoch": 2.833801378606076,
      "grad_norm": 1.4801069498062134,
      "learning_rate": 4.360944367204128e-06,
      "loss": 0.0127,
      "step": 1731600
    },
    {
      "epoch": 2.833834109044729,
      "grad_norm": 0.38668256998062134,
      "learning_rate": 4.360878474990611e-06,
      "loss": 0.0126,
      "step": 1731620
    },
    {
      "epoch": 2.833866839483383,
      "grad_norm": 0.10627280175685883,
      "learning_rate": 4.360812582777093e-06,
      "loss": 0.0091,
      "step": 1731640
    },
    {
      "epoch": 2.833899569922036,
      "grad_norm": 0.7549755573272705,
      "learning_rate": 4.360746690563576e-06,
      "loss": 0.0188,
      "step": 1731660
    },
    {
      "epoch": 2.8339323003606895,
      "grad_norm": 0.12407071143388748,
      "learning_rate": 4.36068079835006e-06,
      "loss": 0.0153,
      "step": 1731680
    },
    {
      "epoch": 2.8339650307993427,
      "grad_norm": 0.5356428027153015,
      "learning_rate": 4.3606149061365424e-06,
      "loss": 0.013,
      "step": 1731700
    },
    {
      "epoch": 2.8339977612379963,
      "grad_norm": 0.06949128955602646,
      "learning_rate": 4.360549013923025e-06,
      "loss": 0.0165,
      "step": 1731720
    },
    {
      "epoch": 2.8340304916766494,
      "grad_norm": 0.08166862279176712,
      "learning_rate": 4.360483121709508e-06,
      "loss": 0.0168,
      "step": 1731740
    },
    {
      "epoch": 2.8340632221153026,
      "grad_norm": 0.2348124235868454,
      "learning_rate": 4.360417229495991e-06,
      "loss": 0.0142,
      "step": 1731760
    },
    {
      "epoch": 2.834095952553956,
      "grad_norm": 0.5050572752952576,
      "learning_rate": 4.360351337282473e-06,
      "loss": 0.0116,
      "step": 1731780
    },
    {
      "epoch": 2.8341286829926093,
      "grad_norm": 0.5486915707588196,
      "learning_rate": 4.360285445068956e-06,
      "loss": 0.0169,
      "step": 1731800
    },
    {
      "epoch": 2.834161413431263,
      "grad_norm": 0.5515746474266052,
      "learning_rate": 4.360219552855439e-06,
      "loss": 0.0099,
      "step": 1731820
    },
    {
      "epoch": 2.834194143869916,
      "grad_norm": 0.12620170414447784,
      "learning_rate": 4.360153660641922e-06,
      "loss": 0.0142,
      "step": 1731840
    },
    {
      "epoch": 2.8342268743085697,
      "grad_norm": 0.4104847311973572,
      "learning_rate": 4.360087768428405e-06,
      "loss": 0.0167,
      "step": 1731860
    },
    {
      "epoch": 2.834259604747223,
      "grad_norm": 0.07176829874515533,
      "learning_rate": 4.360021876214888e-06,
      "loss": 0.0223,
      "step": 1731880
    },
    {
      "epoch": 2.834292335185876,
      "grad_norm": 0.08420293033123016,
      "learning_rate": 4.359955984001371e-06,
      "loss": 0.0161,
      "step": 1731900
    },
    {
      "epoch": 2.8343250656245296,
      "grad_norm": 0.9389227032661438,
      "learning_rate": 4.359890091787854e-06,
      "loss": 0.0117,
      "step": 1731920
    },
    {
      "epoch": 2.8343577960631827,
      "grad_norm": 0.3075438141822815,
      "learning_rate": 4.359824199574337e-06,
      "loss": 0.0135,
      "step": 1731940
    },
    {
      "epoch": 2.8343905265018363,
      "grad_norm": 0.4016208350658417,
      "learning_rate": 4.35975830736082e-06,
      "loss": 0.0153,
      "step": 1731960
    },
    {
      "epoch": 2.8344232569404895,
      "grad_norm": 0.4680408835411072,
      "learning_rate": 4.3596924151473025e-06,
      "loss": 0.0195,
      "step": 1731980
    },
    {
      "epoch": 2.834455987379143,
      "grad_norm": 0.4132566452026367,
      "learning_rate": 4.359626522933785e-06,
      "loss": 0.0119,
      "step": 1732000
    },
    {
      "epoch": 2.834488717817796,
      "grad_norm": 0.2632029354572296,
      "learning_rate": 4.359560630720268e-06,
      "loss": 0.0114,
      "step": 1732020
    },
    {
      "epoch": 2.8345214482564494,
      "grad_norm": 0.26453542709350586,
      "learning_rate": 4.359494738506751e-06,
      "loss": 0.0109,
      "step": 1732040
    },
    {
      "epoch": 2.834554178695103,
      "grad_norm": 0.1566021740436554,
      "learning_rate": 4.3594288462932334e-06,
      "loss": 0.0127,
      "step": 1732060
    },
    {
      "epoch": 2.834586909133756,
      "grad_norm": 0.31803736090660095,
      "learning_rate": 4.359362954079717e-06,
      "loss": 0.0099,
      "step": 1732080
    },
    {
      "epoch": 2.8346196395724097,
      "grad_norm": 0.6583141684532166,
      "learning_rate": 4.3592970618662e-06,
      "loss": 0.0132,
      "step": 1732100
    },
    {
      "epoch": 2.834652370011063,
      "grad_norm": 0.2725836932659149,
      "learning_rate": 4.3592311696526825e-06,
      "loss": 0.0138,
      "step": 1732120
    },
    {
      "epoch": 2.8346851004497164,
      "grad_norm": 0.9011287093162537,
      "learning_rate": 4.359165277439165e-06,
      "loss": 0.0169,
      "step": 1732140
    },
    {
      "epoch": 2.8347178308883696,
      "grad_norm": 0.40859630703926086,
      "learning_rate": 4.359099385225648e-06,
      "loss": 0.0149,
      "step": 1732160
    },
    {
      "epoch": 2.8347505613270227,
      "grad_norm": 0.4419334828853607,
      "learning_rate": 4.359033493012131e-06,
      "loss": 0.0142,
      "step": 1732180
    },
    {
      "epoch": 2.8347832917656763,
      "grad_norm": 0.9698541164398193,
      "learning_rate": 4.358967600798614e-06,
      "loss": 0.0177,
      "step": 1732200
    },
    {
      "epoch": 2.8348160222043295,
      "grad_norm": 0.18730315566062927,
      "learning_rate": 4.358901708585097e-06,
      "loss": 0.01,
      "step": 1732220
    },
    {
      "epoch": 2.834848752642983,
      "grad_norm": 0.5809102654457092,
      "learning_rate": 4.35883581637158e-06,
      "loss": 0.0124,
      "step": 1732240
    },
    {
      "epoch": 2.8348814830816362,
      "grad_norm": 0.7369373440742493,
      "learning_rate": 4.3587699241580625e-06,
      "loss": 0.0161,
      "step": 1732260
    },
    {
      "epoch": 2.83491421352029,
      "grad_norm": 0.3509107828140259,
      "learning_rate": 4.358704031944545e-06,
      "loss": 0.0142,
      "step": 1732280
    },
    {
      "epoch": 2.834946943958943,
      "grad_norm": 0.22943587601184845,
      "learning_rate": 4.358638139731029e-06,
      "loss": 0.0127,
      "step": 1732300
    },
    {
      "epoch": 2.834979674397596,
      "grad_norm": 0.6523584723472595,
      "learning_rate": 4.358572247517512e-06,
      "loss": 0.0149,
      "step": 1732320
    },
    {
      "epoch": 2.8350124048362497,
      "grad_norm": 0.4184006154537201,
      "learning_rate": 4.358506355303994e-06,
      "loss": 0.0165,
      "step": 1732340
    },
    {
      "epoch": 2.835045135274903,
      "grad_norm": 0.17221254110336304,
      "learning_rate": 4.358440463090477e-06,
      "loss": 0.016,
      "step": 1732360
    },
    {
      "epoch": 2.835077865713556,
      "grad_norm": 0.23176933825016022,
      "learning_rate": 4.35837457087696e-06,
      "loss": 0.012,
      "step": 1732380
    },
    {
      "epoch": 2.8351105961522096,
      "grad_norm": 0.2773086130619049,
      "learning_rate": 4.3583086786634426e-06,
      "loss": 0.0117,
      "step": 1732400
    },
    {
      "epoch": 2.835143326590863,
      "grad_norm": 0.5003999471664429,
      "learning_rate": 4.358242786449925e-06,
      "loss": 0.0196,
      "step": 1732420
    },
    {
      "epoch": 2.8351760570295164,
      "grad_norm": 0.420774906873703,
      "learning_rate": 4.358176894236408e-06,
      "loss": 0.0103,
      "step": 1732440
    },
    {
      "epoch": 2.8352087874681695,
      "grad_norm": 1.1277278661727905,
      "learning_rate": 4.358111002022891e-06,
      "loss": 0.0137,
      "step": 1732460
    },
    {
      "epoch": 2.835241517906823,
      "grad_norm": 0.30223217606544495,
      "learning_rate": 4.358045109809374e-06,
      "loss": 0.0105,
      "step": 1732480
    },
    {
      "epoch": 2.8352742483454763,
      "grad_norm": 0.2898695766925812,
      "learning_rate": 4.357979217595857e-06,
      "loss": 0.0207,
      "step": 1732500
    },
    {
      "epoch": 2.8353069787841294,
      "grad_norm": 0.39868587255477905,
      "learning_rate": 4.35791332538234e-06,
      "loss": 0.0153,
      "step": 1732520
    },
    {
      "epoch": 2.835339709222783,
      "grad_norm": 0.2090299129486084,
      "learning_rate": 4.357847433168823e-06,
      "loss": 0.0088,
      "step": 1732540
    },
    {
      "epoch": 2.8353724396614366,
      "grad_norm": 0.057555146515369415,
      "learning_rate": 4.357781540955306e-06,
      "loss": 0.0135,
      "step": 1732560
    },
    {
      "epoch": 2.8354051701000897,
      "grad_norm": 0.37837737798690796,
      "learning_rate": 4.357715648741789e-06,
      "loss": 0.012,
      "step": 1732580
    },
    {
      "epoch": 2.835437900538743,
      "grad_norm": 0.29424864053726196,
      "learning_rate": 4.357649756528272e-06,
      "loss": 0.0132,
      "step": 1732600
    },
    {
      "epoch": 2.8354706309773965,
      "grad_norm": 0.7764633893966675,
      "learning_rate": 4.357583864314754e-06,
      "loss": 0.0174,
      "step": 1732620
    },
    {
      "epoch": 2.8355033614160496,
      "grad_norm": 0.3775133490562439,
      "learning_rate": 4.357517972101237e-06,
      "loss": 0.0114,
      "step": 1732640
    },
    {
      "epoch": 2.835536091854703,
      "grad_norm": 0.2605126202106476,
      "learning_rate": 4.35745207988772e-06,
      "loss": 0.0103,
      "step": 1732660
    },
    {
      "epoch": 2.8355688222933564,
      "grad_norm": 0.18017372488975525,
      "learning_rate": 4.357386187674203e-06,
      "loss": 0.0133,
      "step": 1732680
    },
    {
      "epoch": 2.83560155273201,
      "grad_norm": 0.2235678732395172,
      "learning_rate": 4.357320295460686e-06,
      "loss": 0.0095,
      "step": 1732700
    },
    {
      "epoch": 2.835634283170663,
      "grad_norm": 1.4847301244735718,
      "learning_rate": 4.357254403247169e-06,
      "loss": 0.0173,
      "step": 1732720
    },
    {
      "epoch": 2.8356670136093163,
      "grad_norm": 0.4570077955722809,
      "learning_rate": 4.357188511033652e-06,
      "loss": 0.0146,
      "step": 1732740
    },
    {
      "epoch": 2.83569974404797,
      "grad_norm": 0.205550417304039,
      "learning_rate": 4.357122618820134e-06,
      "loss": 0.0163,
      "step": 1732760
    },
    {
      "epoch": 2.835732474486623,
      "grad_norm": 0.5538720488548279,
      "learning_rate": 4.357056726606617e-06,
      "loss": 0.0127,
      "step": 1732780
    },
    {
      "epoch": 2.835765204925276,
      "grad_norm": 0.23725160956382751,
      "learning_rate": 4.3569908343931e-06,
      "loss": 0.0177,
      "step": 1732800
    },
    {
      "epoch": 2.8357979353639298,
      "grad_norm": 0.5358778834342957,
      "learning_rate": 4.356924942179583e-06,
      "loss": 0.0182,
      "step": 1732820
    },
    {
      "epoch": 2.8358306658025834,
      "grad_norm": 0.16896453499794006,
      "learning_rate": 4.356859049966065e-06,
      "loss": 0.0112,
      "step": 1732840
    },
    {
      "epoch": 2.8358633962412365,
      "grad_norm": 0.12829506397247314,
      "learning_rate": 4.356793157752548e-06,
      "loss": 0.014,
      "step": 1732860
    },
    {
      "epoch": 2.8358961266798897,
      "grad_norm": 0.2616417706012726,
      "learning_rate": 4.356727265539032e-06,
      "loss": 0.0156,
      "step": 1732880
    },
    {
      "epoch": 2.8359288571185433,
      "grad_norm": 0.47645655274391174,
      "learning_rate": 4.3566613733255144e-06,
      "loss": 0.0092,
      "step": 1732900
    },
    {
      "epoch": 2.8359615875571964,
      "grad_norm": 1.0741218328475952,
      "learning_rate": 4.356595481111997e-06,
      "loss": 0.0123,
      "step": 1732920
    },
    {
      "epoch": 2.8359943179958496,
      "grad_norm": 4.849648952484131,
      "learning_rate": 4.356529588898481e-06,
      "loss": 0.014,
      "step": 1732940
    },
    {
      "epoch": 2.836027048434503,
      "grad_norm": 0.4073013365268707,
      "learning_rate": 4.3564636966849635e-06,
      "loss": 0.0112,
      "step": 1732960
    },
    {
      "epoch": 2.8360597788731563,
      "grad_norm": 0.60508793592453,
      "learning_rate": 4.356397804471446e-06,
      "loss": 0.0176,
      "step": 1732980
    },
    {
      "epoch": 2.83609250931181,
      "grad_norm": 0.3265608549118042,
      "learning_rate": 4.356331912257929e-06,
      "loss": 0.0105,
      "step": 1733000
    },
    {
      "epoch": 2.836125239750463,
      "grad_norm": 0.4031110405921936,
      "learning_rate": 4.356266020044412e-06,
      "loss": 0.0109,
      "step": 1733020
    },
    {
      "epoch": 2.8361579701891166,
      "grad_norm": 0.21364715695381165,
      "learning_rate": 4.3562001278308945e-06,
      "loss": 0.0146,
      "step": 1733040
    },
    {
      "epoch": 2.83619070062777,
      "grad_norm": 0.23588930070400238,
      "learning_rate": 4.356134235617377e-06,
      "loss": 0.0122,
      "step": 1733060
    },
    {
      "epoch": 2.836223431066423,
      "grad_norm": 0.1842849850654602,
      "learning_rate": 4.35606834340386e-06,
      "loss": 0.0102,
      "step": 1733080
    },
    {
      "epoch": 2.8362561615050765,
      "grad_norm": 0.1963503658771515,
      "learning_rate": 4.3560024511903435e-06,
      "loss": 0.0211,
      "step": 1733100
    },
    {
      "epoch": 2.8362888919437297,
      "grad_norm": 0.4492592215538025,
      "learning_rate": 4.355936558976826e-06,
      "loss": 0.0112,
      "step": 1733120
    },
    {
      "epoch": 2.8363216223823833,
      "grad_norm": 0.2222636491060257,
      "learning_rate": 4.355870666763309e-06,
      "loss": 0.0105,
      "step": 1733140
    },
    {
      "epoch": 2.8363543528210364,
      "grad_norm": 0.6566314697265625,
      "learning_rate": 4.355804774549792e-06,
      "loss": 0.0176,
      "step": 1733160
    },
    {
      "epoch": 2.83638708325969,
      "grad_norm": 0.6944140791893005,
      "learning_rate": 4.3557388823362745e-06,
      "loss": 0.0173,
      "step": 1733180
    },
    {
      "epoch": 2.836419813698343,
      "grad_norm": 0.3060929477214813,
      "learning_rate": 4.355672990122757e-06,
      "loss": 0.013,
      "step": 1733200
    },
    {
      "epoch": 2.8364525441369963,
      "grad_norm": 1.0689071416854858,
      "learning_rate": 4.35560709790924e-06,
      "loss": 0.0138,
      "step": 1733220
    },
    {
      "epoch": 2.83648527457565,
      "grad_norm": 0.5305376052856445,
      "learning_rate": 4.355541205695723e-06,
      "loss": 0.0134,
      "step": 1733240
    },
    {
      "epoch": 2.836518005014303,
      "grad_norm": 0.5244871377944946,
      "learning_rate": 4.355475313482206e-06,
      "loss": 0.0151,
      "step": 1733260
    },
    {
      "epoch": 2.8365507354529567,
      "grad_norm": 0.31850457191467285,
      "learning_rate": 4.355409421268689e-06,
      "loss": 0.0132,
      "step": 1733280
    },
    {
      "epoch": 2.83658346589161,
      "grad_norm": 0.14285019040107727,
      "learning_rate": 4.355343529055172e-06,
      "loss": 0.0114,
      "step": 1733300
    },
    {
      "epoch": 2.8366161963302634,
      "grad_norm": 0.31993505358695984,
      "learning_rate": 4.3552776368416545e-06,
      "loss": 0.0185,
      "step": 1733320
    },
    {
      "epoch": 2.8366489267689166,
      "grad_norm": 0.23405036330223083,
      "learning_rate": 4.355211744628138e-06,
      "loss": 0.0167,
      "step": 1733340
    },
    {
      "epoch": 2.8366816572075697,
      "grad_norm": 0.49312707781791687,
      "learning_rate": 4.355145852414621e-06,
      "loss": 0.0113,
      "step": 1733360
    },
    {
      "epoch": 2.8367143876462233,
      "grad_norm": 1.0365568399429321,
      "learning_rate": 4.355079960201104e-06,
      "loss": 0.0164,
      "step": 1733380
    },
    {
      "epoch": 2.8367471180848765,
      "grad_norm": 1.8276780843734741,
      "learning_rate": 4.355014067987586e-06,
      "loss": 0.0192,
      "step": 1733400
    },
    {
      "epoch": 2.83677984852353,
      "grad_norm": 0.4151681363582611,
      "learning_rate": 4.354948175774069e-06,
      "loss": 0.0149,
      "step": 1733420
    },
    {
      "epoch": 2.836812578962183,
      "grad_norm": 0.495218425989151,
      "learning_rate": 4.354882283560552e-06,
      "loss": 0.0117,
      "step": 1733440
    },
    {
      "epoch": 2.836845309400837,
      "grad_norm": 0.2563410699367523,
      "learning_rate": 4.3548163913470345e-06,
      "loss": 0.0246,
      "step": 1733460
    },
    {
      "epoch": 2.83687803983949,
      "grad_norm": 0.15829649567604065,
      "learning_rate": 4.354750499133517e-06,
      "loss": 0.0171,
      "step": 1733480
    },
    {
      "epoch": 2.836910770278143,
      "grad_norm": 0.3178000748157501,
      "learning_rate": 4.354684606920001e-06,
      "loss": 0.0141,
      "step": 1733500
    },
    {
      "epoch": 2.8369435007167967,
      "grad_norm": 0.2026779055595398,
      "learning_rate": 4.354618714706484e-06,
      "loss": 0.0094,
      "step": 1733520
    },
    {
      "epoch": 2.83697623115545,
      "grad_norm": 0.16839879751205444,
      "learning_rate": 4.354552822492966e-06,
      "loss": 0.0106,
      "step": 1733540
    },
    {
      "epoch": 2.8370089615941034,
      "grad_norm": 0.414035826921463,
      "learning_rate": 4.354486930279449e-06,
      "loss": 0.0123,
      "step": 1733560
    },
    {
      "epoch": 2.8370416920327566,
      "grad_norm": 0.7366691827774048,
      "learning_rate": 4.354421038065932e-06,
      "loss": 0.0095,
      "step": 1733580
    },
    {
      "epoch": 2.83707442247141,
      "grad_norm": 0.4988747835159302,
      "learning_rate": 4.3543551458524146e-06,
      "loss": 0.0128,
      "step": 1733600
    },
    {
      "epoch": 2.8371071529100633,
      "grad_norm": 0.13328975439071655,
      "learning_rate": 4.354289253638898e-06,
      "loss": 0.0142,
      "step": 1733620
    },
    {
      "epoch": 2.8371398833487165,
      "grad_norm": 0.2656572163105011,
      "learning_rate": 4.354223361425381e-06,
      "loss": 0.0205,
      "step": 1733640
    },
    {
      "epoch": 2.83717261378737,
      "grad_norm": 0.18346096575260162,
      "learning_rate": 4.354157469211864e-06,
      "loss": 0.0129,
      "step": 1733660
    },
    {
      "epoch": 2.837205344226023,
      "grad_norm": 0.4136989116668701,
      "learning_rate": 4.354091576998346e-06,
      "loss": 0.019,
      "step": 1733680
    },
    {
      "epoch": 2.837238074664677,
      "grad_norm": 0.11235962808132172,
      "learning_rate": 4.354025684784829e-06,
      "loss": 0.0121,
      "step": 1733700
    },
    {
      "epoch": 2.83727080510333,
      "grad_norm": 0.2480478733778,
      "learning_rate": 4.353959792571313e-06,
      "loss": 0.0128,
      "step": 1733720
    },
    {
      "epoch": 2.8373035355419836,
      "grad_norm": 0.6393311619758606,
      "learning_rate": 4.3538939003577954e-06,
      "loss": 0.017,
      "step": 1733740
    },
    {
      "epoch": 2.8373362659806367,
      "grad_norm": 0.5085979104042053,
      "learning_rate": 4.353828008144278e-06,
      "loss": 0.0159,
      "step": 1733760
    },
    {
      "epoch": 2.83736899641929,
      "grad_norm": 0.4162415564060211,
      "learning_rate": 4.353762115930761e-06,
      "loss": 0.0163,
      "step": 1733780
    },
    {
      "epoch": 2.8374017268579435,
      "grad_norm": 0.5250546932220459,
      "learning_rate": 4.353696223717244e-06,
      "loss": 0.0148,
      "step": 1733800
    },
    {
      "epoch": 2.8374344572965966,
      "grad_norm": 0.5393399596214294,
      "learning_rate": 4.353630331503726e-06,
      "loss": 0.0128,
      "step": 1733820
    },
    {
      "epoch": 2.8374671877352498,
      "grad_norm": 0.15633238852024078,
      "learning_rate": 4.353564439290209e-06,
      "loss": 0.0102,
      "step": 1733840
    },
    {
      "epoch": 2.8374999181739033,
      "grad_norm": 0.20535854995250702,
      "learning_rate": 4.353498547076692e-06,
      "loss": 0.0118,
      "step": 1733860
    },
    {
      "epoch": 2.837532648612557,
      "grad_norm": 0.2764800488948822,
      "learning_rate": 4.353432654863175e-06,
      "loss": 0.0144,
      "step": 1733880
    },
    {
      "epoch": 2.83756537905121,
      "grad_norm": 0.3127530515193939,
      "learning_rate": 4.353366762649658e-06,
      "loss": 0.0137,
      "step": 1733900
    },
    {
      "epoch": 2.8375981094898632,
      "grad_norm": 0.3494531214237213,
      "learning_rate": 4.353300870436141e-06,
      "loss": 0.0302,
      "step": 1733920
    },
    {
      "epoch": 2.837630839928517,
      "grad_norm": 0.38592204451560974,
      "learning_rate": 4.353234978222624e-06,
      "loss": 0.0114,
      "step": 1733940
    },
    {
      "epoch": 2.83766357036717,
      "grad_norm": 0.9540917873382568,
      "learning_rate": 4.353169086009107e-06,
      "loss": 0.0172,
      "step": 1733960
    },
    {
      "epoch": 2.837696300805823,
      "grad_norm": 0.4249180555343628,
      "learning_rate": 4.35310319379559e-06,
      "loss": 0.0106,
      "step": 1733980
    },
    {
      "epoch": 2.8377290312444767,
      "grad_norm": 0.9085434675216675,
      "learning_rate": 4.353037301582073e-06,
      "loss": 0.0141,
      "step": 1734000
    },
    {
      "epoch": 2.8377617616831303,
      "grad_norm": 0.21162539720535278,
      "learning_rate": 4.3529714093685555e-06,
      "loss": 0.0102,
      "step": 1734020
    },
    {
      "epoch": 2.8377944921217835,
      "grad_norm": 0.6851134300231934,
      "learning_rate": 4.352905517155038e-06,
      "loss": 0.0137,
      "step": 1734040
    },
    {
      "epoch": 2.8378272225604366,
      "grad_norm": 0.5212956666946411,
      "learning_rate": 4.352839624941521e-06,
      "loss": 0.0157,
      "step": 1734060
    },
    {
      "epoch": 2.83785995299909,
      "grad_norm": 0.22086066007614136,
      "learning_rate": 4.352773732728004e-06,
      "loss": 0.0147,
      "step": 1734080
    },
    {
      "epoch": 2.8378926834377434,
      "grad_norm": 0.3093338906764984,
      "learning_rate": 4.3527078405144865e-06,
      "loss": 0.0127,
      "step": 1734100
    },
    {
      "epoch": 2.8379254138763965,
      "grad_norm": 0.40869012475013733,
      "learning_rate": 4.35264194830097e-06,
      "loss": 0.0116,
      "step": 1734120
    },
    {
      "epoch": 2.83795814431505,
      "grad_norm": 0.40420210361480713,
      "learning_rate": 4.352576056087453e-06,
      "loss": 0.0097,
      "step": 1734140
    },
    {
      "epoch": 2.8379908747537037,
      "grad_norm": 0.09144672006368637,
      "learning_rate": 4.3525101638739355e-06,
      "loss": 0.0111,
      "step": 1734160
    },
    {
      "epoch": 2.838023605192357,
      "grad_norm": 0.10145550966262817,
      "learning_rate": 4.352444271660418e-06,
      "loss": 0.0169,
      "step": 1734180
    },
    {
      "epoch": 2.83805633563101,
      "grad_norm": 0.3006357252597809,
      "learning_rate": 4.352378379446901e-06,
      "loss": 0.0137,
      "step": 1734200
    },
    {
      "epoch": 2.8380890660696636,
      "grad_norm": 0.36081963777542114,
      "learning_rate": 4.352312487233384e-06,
      "loss": 0.0174,
      "step": 1734220
    },
    {
      "epoch": 2.8381217965083168,
      "grad_norm": 0.2200145125389099,
      "learning_rate": 4.3522465950198665e-06,
      "loss": 0.019,
      "step": 1734240
    },
    {
      "epoch": 2.83815452694697,
      "grad_norm": 0.318948894739151,
      "learning_rate": 4.352180702806349e-06,
      "loss": 0.0119,
      "step": 1734260
    },
    {
      "epoch": 2.8381872573856235,
      "grad_norm": 0.38189661502838135,
      "learning_rate": 4.352114810592832e-06,
      "loss": 0.0138,
      "step": 1734280
    },
    {
      "epoch": 2.838219987824277,
      "grad_norm": 0.34156936407089233,
      "learning_rate": 4.3520489183793155e-06,
      "loss": 0.0197,
      "step": 1734300
    },
    {
      "epoch": 2.8382527182629302,
      "grad_norm": 0.947693943977356,
      "learning_rate": 4.351983026165798e-06,
      "loss": 0.015,
      "step": 1734320
    },
    {
      "epoch": 2.8382854487015834,
      "grad_norm": 0.707318902015686,
      "learning_rate": 4.351917133952281e-06,
      "loss": 0.0094,
      "step": 1734340
    },
    {
      "epoch": 2.838318179140237,
      "grad_norm": 0.31915533542633057,
      "learning_rate": 4.351851241738765e-06,
      "loss": 0.0131,
      "step": 1734360
    },
    {
      "epoch": 2.83835090957889,
      "grad_norm": 0.14367352426052094,
      "learning_rate": 4.351785349525247e-06,
      "loss": 0.0122,
      "step": 1734380
    },
    {
      "epoch": 2.8383836400175433,
      "grad_norm": 0.30557703971862793,
      "learning_rate": 4.35171945731173e-06,
      "loss": 0.0156,
      "step": 1734400
    },
    {
      "epoch": 2.838416370456197,
      "grad_norm": 0.534662663936615,
      "learning_rate": 4.351653565098213e-06,
      "loss": 0.0141,
      "step": 1734420
    },
    {
      "epoch": 2.83844910089485,
      "grad_norm": 0.20470815896987915,
      "learning_rate": 4.3515876728846956e-06,
      "loss": 0.0155,
      "step": 1734440
    },
    {
      "epoch": 2.8384818313335036,
      "grad_norm": 0.5610672831535339,
      "learning_rate": 4.351521780671178e-06,
      "loss": 0.014,
      "step": 1734460
    },
    {
      "epoch": 2.8385145617721568,
      "grad_norm": 0.2872673571109772,
      "learning_rate": 4.351455888457661e-06,
      "loss": 0.0164,
      "step": 1734480
    },
    {
      "epoch": 2.8385472922108104,
      "grad_norm": 0.4865036606788635,
      "learning_rate": 4.351389996244144e-06,
      "loss": 0.0162,
      "step": 1734500
    },
    {
      "epoch": 2.8385800226494635,
      "grad_norm": 0.3892146050930023,
      "learning_rate": 4.351324104030627e-06,
      "loss": 0.0126,
      "step": 1734520
    },
    {
      "epoch": 2.8386127530881167,
      "grad_norm": 0.23091262578964233,
      "learning_rate": 4.35125821181711e-06,
      "loss": 0.0112,
      "step": 1734540
    },
    {
      "epoch": 2.8386454835267703,
      "grad_norm": 0.4362475275993347,
      "learning_rate": 4.351192319603593e-06,
      "loss": 0.012,
      "step": 1734560
    },
    {
      "epoch": 2.8386782139654234,
      "grad_norm": 0.2572607398033142,
      "learning_rate": 4.351126427390076e-06,
      "loss": 0.0105,
      "step": 1734580
    },
    {
      "epoch": 2.838710944404077,
      "grad_norm": 0.11409536749124527,
      "learning_rate": 4.351060535176558e-06,
      "loss": 0.0177,
      "step": 1734600
    },
    {
      "epoch": 2.83874367484273,
      "grad_norm": 0.07391861081123352,
      "learning_rate": 4.350994642963041e-06,
      "loss": 0.0183,
      "step": 1734620
    },
    {
      "epoch": 2.8387764052813838,
      "grad_norm": 0.278191477060318,
      "learning_rate": 4.350928750749524e-06,
      "loss": 0.0086,
      "step": 1734640
    },
    {
      "epoch": 2.838809135720037,
      "grad_norm": 0.4713626503944397,
      "learning_rate": 4.350862858536007e-06,
      "loss": 0.0135,
      "step": 1734660
    },
    {
      "epoch": 2.83884186615869,
      "grad_norm": 0.09464184939861298,
      "learning_rate": 4.35079696632249e-06,
      "loss": 0.0124,
      "step": 1734680
    },
    {
      "epoch": 2.8388745965973436,
      "grad_norm": 0.44216904044151306,
      "learning_rate": 4.350731074108973e-06,
      "loss": 0.0142,
      "step": 1734700
    },
    {
      "epoch": 2.838907327035997,
      "grad_norm": 1.5607597827911377,
      "learning_rate": 4.350665181895456e-06,
      "loss": 0.02,
      "step": 1734720
    },
    {
      "epoch": 2.8389400574746504,
      "grad_norm": 0.2738758325576782,
      "learning_rate": 4.350599289681939e-06,
      "loss": 0.0159,
      "step": 1734740
    },
    {
      "epoch": 2.8389727879133035,
      "grad_norm": 0.3546983003616333,
      "learning_rate": 4.350533397468422e-06,
      "loss": 0.014,
      "step": 1734760
    },
    {
      "epoch": 2.839005518351957,
      "grad_norm": 0.40622881054878235,
      "learning_rate": 4.350467505254905e-06,
      "loss": 0.0136,
      "step": 1734780
    },
    {
      "epoch": 2.8390382487906103,
      "grad_norm": 0.6477662324905396,
      "learning_rate": 4.3504016130413874e-06,
      "loss": 0.0095,
      "step": 1734800
    },
    {
      "epoch": 2.8390709792292634,
      "grad_norm": 0.9730944633483887,
      "learning_rate": 4.35033572082787e-06,
      "loss": 0.0145,
      "step": 1734820
    },
    {
      "epoch": 2.839103709667917,
      "grad_norm": 0.0862433910369873,
      "learning_rate": 4.350269828614353e-06,
      "loss": 0.0161,
      "step": 1734840
    },
    {
      "epoch": 2.83913644010657,
      "grad_norm": 0.4099312424659729,
      "learning_rate": 4.350203936400836e-06,
      "loss": 0.0189,
      "step": 1734860
    },
    {
      "epoch": 2.8391691705452238,
      "grad_norm": 0.13840581476688385,
      "learning_rate": 4.350138044187318e-06,
      "loss": 0.0119,
      "step": 1734880
    },
    {
      "epoch": 2.839201900983877,
      "grad_norm": 0.37240490317344666,
      "learning_rate": 4.350072151973801e-06,
      "loss": 0.0149,
      "step": 1734900
    },
    {
      "epoch": 2.8392346314225305,
      "grad_norm": 0.7567481994628906,
      "learning_rate": 4.350006259760285e-06,
      "loss": 0.0152,
      "step": 1734920
    },
    {
      "epoch": 2.8392673618611837,
      "grad_norm": 1.271497130393982,
      "learning_rate": 4.3499403675467675e-06,
      "loss": 0.0124,
      "step": 1734940
    },
    {
      "epoch": 2.839300092299837,
      "grad_norm": 0.3296946585178375,
      "learning_rate": 4.34987447533325e-06,
      "loss": 0.016,
      "step": 1734960
    },
    {
      "epoch": 2.8393328227384904,
      "grad_norm": 0.17660440504550934,
      "learning_rate": 4.349808583119733e-06,
      "loss": 0.0109,
      "step": 1734980
    },
    {
      "epoch": 2.8393655531771436,
      "grad_norm": 0.36314138770103455,
      "learning_rate": 4.349742690906216e-06,
      "loss": 0.014,
      "step": 1735000
    },
    {
      "epoch": 2.839398283615797,
      "grad_norm": 0.8427808284759521,
      "learning_rate": 4.349676798692699e-06,
      "loss": 0.0185,
      "step": 1735020
    },
    {
      "epoch": 2.8394310140544503,
      "grad_norm": 0.23070743680000305,
      "learning_rate": 4.349610906479182e-06,
      "loss": 0.0151,
      "step": 1735040
    },
    {
      "epoch": 2.839463744493104,
      "grad_norm": 0.0869552344083786,
      "learning_rate": 4.349545014265665e-06,
      "loss": 0.0131,
      "step": 1735060
    },
    {
      "epoch": 2.839496474931757,
      "grad_norm": 0.258147656917572,
      "learning_rate": 4.3494791220521475e-06,
      "loss": 0.0135,
      "step": 1735080
    },
    {
      "epoch": 2.83952920537041,
      "grad_norm": 0.20848800241947174,
      "learning_rate": 4.34941322983863e-06,
      "loss": 0.0188,
      "step": 1735100
    },
    {
      "epoch": 2.839561935809064,
      "grad_norm": 0.41822120547294617,
      "learning_rate": 4.349347337625113e-06,
      "loss": 0.0075,
      "step": 1735120
    },
    {
      "epoch": 2.839594666247717,
      "grad_norm": 0.29236409068107605,
      "learning_rate": 4.3492814454115965e-06,
      "loss": 0.0128,
      "step": 1735140
    },
    {
      "epoch": 2.8396273966863705,
      "grad_norm": 0.19956377148628235,
      "learning_rate": 4.349215553198079e-06,
      "loss": 0.0192,
      "step": 1735160
    },
    {
      "epoch": 2.8396601271250237,
      "grad_norm": 0.223117858171463,
      "learning_rate": 4.349149660984562e-06,
      "loss": 0.0132,
      "step": 1735180
    },
    {
      "epoch": 2.8396928575636773,
      "grad_norm": 0.30323630571365356,
      "learning_rate": 4.349083768771045e-06,
      "loss": 0.0105,
      "step": 1735200
    },
    {
      "epoch": 2.8397255880023304,
      "grad_norm": 0.8321934938430786,
      "learning_rate": 4.3490178765575275e-06,
      "loss": 0.0117,
      "step": 1735220
    },
    {
      "epoch": 2.8397583184409836,
      "grad_norm": 0.24315547943115234,
      "learning_rate": 4.34895198434401e-06,
      "loss": 0.0157,
      "step": 1735240
    },
    {
      "epoch": 2.839791048879637,
      "grad_norm": 0.5001906752586365,
      "learning_rate": 4.348886092130493e-06,
      "loss": 0.0144,
      "step": 1735260
    },
    {
      "epoch": 2.8398237793182903,
      "grad_norm": 0.17955388128757477,
      "learning_rate": 4.348820199916976e-06,
      "loss": 0.0133,
      "step": 1735280
    },
    {
      "epoch": 2.839856509756944,
      "grad_norm": 0.5323085784912109,
      "learning_rate": 4.3487543077034585e-06,
      "loss": 0.0109,
      "step": 1735300
    },
    {
      "epoch": 2.839889240195597,
      "grad_norm": 0.23695112764835358,
      "learning_rate": 4.348688415489942e-06,
      "loss": 0.0168,
      "step": 1735320
    },
    {
      "epoch": 2.8399219706342507,
      "grad_norm": 0.2637065351009369,
      "learning_rate": 4.348622523276425e-06,
      "loss": 0.0099,
      "step": 1735340
    },
    {
      "epoch": 2.839954701072904,
      "grad_norm": 0.16950903832912445,
      "learning_rate": 4.3485566310629075e-06,
      "loss": 0.0164,
      "step": 1735360
    },
    {
      "epoch": 2.839987431511557,
      "grad_norm": 1.217437744140625,
      "learning_rate": 4.348490738849391e-06,
      "loss": 0.0144,
      "step": 1735380
    },
    {
      "epoch": 2.8400201619502106,
      "grad_norm": 0.6165484189987183,
      "learning_rate": 4.348424846635874e-06,
      "loss": 0.0182,
      "step": 1735400
    },
    {
      "epoch": 2.8400528923888637,
      "grad_norm": 0.05904155969619751,
      "learning_rate": 4.348358954422357e-06,
      "loss": 0.0119,
      "step": 1735420
    },
    {
      "epoch": 2.840085622827517,
      "grad_norm": 0.33997583389282227,
      "learning_rate": 4.348293062208839e-06,
      "loss": 0.0153,
      "step": 1735440
    },
    {
      "epoch": 2.8401183532661705,
      "grad_norm": 0.5722445845603943,
      "learning_rate": 4.348227169995322e-06,
      "loss": 0.0129,
      "step": 1735460
    },
    {
      "epoch": 2.840151083704824,
      "grad_norm": 0.4842972159385681,
      "learning_rate": 4.348161277781805e-06,
      "loss": 0.0101,
      "step": 1735480
    },
    {
      "epoch": 2.840183814143477,
      "grad_norm": 0.46610409021377563,
      "learning_rate": 4.3480953855682876e-06,
      "loss": 0.0105,
      "step": 1735500
    },
    {
      "epoch": 2.8402165445821304,
      "grad_norm": 0.18047168850898743,
      "learning_rate": 4.34802949335477e-06,
      "loss": 0.0142,
      "step": 1735520
    },
    {
      "epoch": 2.840249275020784,
      "grad_norm": 0.6366031169891357,
      "learning_rate": 4.347963601141254e-06,
      "loss": 0.0154,
      "step": 1735540
    },
    {
      "epoch": 2.840282005459437,
      "grad_norm": 0.14110606908798218,
      "learning_rate": 4.347897708927737e-06,
      "loss": 0.0121,
      "step": 1735560
    },
    {
      "epoch": 2.8403147358980902,
      "grad_norm": 0.0769810751080513,
      "learning_rate": 4.347831816714219e-06,
      "loss": 0.013,
      "step": 1735580
    },
    {
      "epoch": 2.840347466336744,
      "grad_norm": 0.373643696308136,
      "learning_rate": 4.347765924500702e-06,
      "loss": 0.0178,
      "step": 1735600
    },
    {
      "epoch": 2.8403801967753974,
      "grad_norm": 0.8392797112464905,
      "learning_rate": 4.347700032287185e-06,
      "loss": 0.0193,
      "step": 1735620
    },
    {
      "epoch": 2.8404129272140506,
      "grad_norm": 0.2149788737297058,
      "learning_rate": 4.347634140073668e-06,
      "loss": 0.0125,
      "step": 1735640
    },
    {
      "epoch": 2.8404456576527037,
      "grad_norm": 0.11441539973020554,
      "learning_rate": 4.34756824786015e-06,
      "loss": 0.0131,
      "step": 1735660
    },
    {
      "epoch": 2.8404783880913573,
      "grad_norm": 0.09729554504156113,
      "learning_rate": 4.347502355646633e-06,
      "loss": 0.0134,
      "step": 1735680
    },
    {
      "epoch": 2.8405111185300105,
      "grad_norm": 0.09518676996231079,
      "learning_rate": 4.347436463433116e-06,
      "loss": 0.0128,
      "step": 1735700
    },
    {
      "epoch": 2.8405438489686636,
      "grad_norm": 0.5459197163581848,
      "learning_rate": 4.347370571219599e-06,
      "loss": 0.009,
      "step": 1735720
    },
    {
      "epoch": 2.8405765794073172,
      "grad_norm": 0.14842911064624786,
      "learning_rate": 4.347304679006082e-06,
      "loss": 0.0123,
      "step": 1735740
    },
    {
      "epoch": 2.840609309845971,
      "grad_norm": 0.3417501449584961,
      "learning_rate": 4.347238786792565e-06,
      "loss": 0.0092,
      "step": 1735760
    },
    {
      "epoch": 2.840642040284624,
      "grad_norm": 0.26475897431373596,
      "learning_rate": 4.3471728945790484e-06,
      "loss": 0.0119,
      "step": 1735780
    },
    {
      "epoch": 2.840674770723277,
      "grad_norm": 0.35374021530151367,
      "learning_rate": 4.347107002365531e-06,
      "loss": 0.0179,
      "step": 1735800
    },
    {
      "epoch": 2.8407075011619307,
      "grad_norm": 0.4111541509628296,
      "learning_rate": 4.347041110152014e-06,
      "loss": 0.014,
      "step": 1735820
    },
    {
      "epoch": 2.840740231600584,
      "grad_norm": 0.10359865427017212,
      "learning_rate": 4.346975217938497e-06,
      "loss": 0.0126,
      "step": 1735840
    },
    {
      "epoch": 2.840772962039237,
      "grad_norm": 0.14484116435050964,
      "learning_rate": 4.346909325724979e-06,
      "loss": 0.0127,
      "step": 1735860
    },
    {
      "epoch": 2.8408056924778906,
      "grad_norm": 0.18475346267223358,
      "learning_rate": 4.346843433511462e-06,
      "loss": 0.0169,
      "step": 1735880
    },
    {
      "epoch": 2.840838422916544,
      "grad_norm": 0.3864089846611023,
      "learning_rate": 4.346777541297945e-06,
      "loss": 0.0216,
      "step": 1735900
    },
    {
      "epoch": 2.8408711533551974,
      "grad_norm": 0.44422072172164917,
      "learning_rate": 4.346711649084428e-06,
      "loss": 0.015,
      "step": 1735920
    },
    {
      "epoch": 2.8409038837938505,
      "grad_norm": 1.634312629699707,
      "learning_rate": 4.346645756870911e-06,
      "loss": 0.0147,
      "step": 1735940
    },
    {
      "epoch": 2.840936614232504,
      "grad_norm": 0.507415771484375,
      "learning_rate": 4.346579864657394e-06,
      "loss": 0.0155,
      "step": 1735960
    },
    {
      "epoch": 2.8409693446711572,
      "grad_norm": 0.2701282203197479,
      "learning_rate": 4.346513972443877e-06,
      "loss": 0.0175,
      "step": 1735980
    },
    {
      "epoch": 2.8410020751098104,
      "grad_norm": 0.14886696636676788,
      "learning_rate": 4.3464480802303594e-06,
      "loss": 0.0208,
      "step": 1736000
    },
    {
      "epoch": 2.841034805548464,
      "grad_norm": 0.20914249122142792,
      "learning_rate": 4.346382188016842e-06,
      "loss": 0.01,
      "step": 1736020
    },
    {
      "epoch": 2.841067535987117,
      "grad_norm": 0.5827035307884216,
      "learning_rate": 4.346316295803325e-06,
      "loss": 0.0154,
      "step": 1736040
    },
    {
      "epoch": 2.8411002664257707,
      "grad_norm": 0.32603076100349426,
      "learning_rate": 4.346250403589808e-06,
      "loss": 0.0126,
      "step": 1736060
    },
    {
      "epoch": 2.841132996864424,
      "grad_norm": 0.531818687915802,
      "learning_rate": 4.346184511376291e-06,
      "loss": 0.0161,
      "step": 1736080
    },
    {
      "epoch": 2.8411657273030775,
      "grad_norm": 0.3399464190006256,
      "learning_rate": 4.346118619162774e-06,
      "loss": 0.015,
      "step": 1736100
    },
    {
      "epoch": 2.8411984577417306,
      "grad_norm": 0.4871618449687958,
      "learning_rate": 4.346052726949257e-06,
      "loss": 0.0146,
      "step": 1736120
    },
    {
      "epoch": 2.841231188180384,
      "grad_norm": 0.10288424044847488,
      "learning_rate": 4.3459868347357395e-06,
      "loss": 0.0121,
      "step": 1736140
    },
    {
      "epoch": 2.8412639186190374,
      "grad_norm": 0.1257970929145813,
      "learning_rate": 4.345920942522223e-06,
      "loss": 0.0096,
      "step": 1736160
    },
    {
      "epoch": 2.8412966490576905,
      "grad_norm": 0.8648987412452698,
      "learning_rate": 4.345855050308706e-06,
      "loss": 0.0105,
      "step": 1736180
    },
    {
      "epoch": 2.841329379496344,
      "grad_norm": 0.22911995649337769,
      "learning_rate": 4.3457891580951885e-06,
      "loss": 0.0125,
      "step": 1736200
    },
    {
      "epoch": 2.8413621099349973,
      "grad_norm": 0.2536453604698181,
      "learning_rate": 4.345723265881671e-06,
      "loss": 0.018,
      "step": 1736220
    },
    {
      "epoch": 2.841394840373651,
      "grad_norm": 0.8616971373558044,
      "learning_rate": 4.345657373668154e-06,
      "loss": 0.0187,
      "step": 1736240
    },
    {
      "epoch": 2.841427570812304,
      "grad_norm": 0.2618030905723572,
      "learning_rate": 4.345591481454637e-06,
      "loss": 0.0162,
      "step": 1736260
    },
    {
      "epoch": 2.841460301250957,
      "grad_norm": 0.33787983655929565,
      "learning_rate": 4.3455255892411195e-06,
      "loss": 0.0115,
      "step": 1736280
    },
    {
      "epoch": 2.8414930316896108,
      "grad_norm": 0.14485134184360504,
      "learning_rate": 4.345459697027602e-06,
      "loss": 0.014,
      "step": 1736300
    },
    {
      "epoch": 2.841525762128264,
      "grad_norm": 0.16288356482982635,
      "learning_rate": 4.345393804814085e-06,
      "loss": 0.0095,
      "step": 1736320
    },
    {
      "epoch": 2.8415584925669175,
      "grad_norm": 0.5058900713920593,
      "learning_rate": 4.3453279126005686e-06,
      "loss": 0.0115,
      "step": 1736340
    },
    {
      "epoch": 2.8415912230055707,
      "grad_norm": 0.2423781007528305,
      "learning_rate": 4.345262020387051e-06,
      "loss": 0.0202,
      "step": 1736360
    },
    {
      "epoch": 2.8416239534442242,
      "grad_norm": 0.2589879631996155,
      "learning_rate": 4.345196128173534e-06,
      "loss": 0.0129,
      "step": 1736380
    },
    {
      "epoch": 2.8416566838828774,
      "grad_norm": 0.37437739968299866,
      "learning_rate": 4.345130235960017e-06,
      "loss": 0.0092,
      "step": 1736400
    },
    {
      "epoch": 2.8416894143215305,
      "grad_norm": 1.2042275667190552,
      "learning_rate": 4.3450643437465e-06,
      "loss": 0.0204,
      "step": 1736420
    },
    {
      "epoch": 2.841722144760184,
      "grad_norm": 0.173145592212677,
      "learning_rate": 4.344998451532983e-06,
      "loss": 0.0163,
      "step": 1736440
    },
    {
      "epoch": 2.8417548751988373,
      "grad_norm": 0.7676421999931335,
      "learning_rate": 4.344932559319466e-06,
      "loss": 0.0163,
      "step": 1736460
    },
    {
      "epoch": 2.841787605637491,
      "grad_norm": 0.21028485894203186,
      "learning_rate": 4.3448666671059486e-06,
      "loss": 0.0116,
      "step": 1736480
    },
    {
      "epoch": 2.841820336076144,
      "grad_norm": 0.18873842060565948,
      "learning_rate": 4.344800774892431e-06,
      "loss": 0.0127,
      "step": 1736500
    },
    {
      "epoch": 2.8418530665147976,
      "grad_norm": 0.2922735810279846,
      "learning_rate": 4.344734882678914e-06,
      "loss": 0.0146,
      "step": 1736520
    },
    {
      "epoch": 2.841885796953451,
      "grad_norm": 0.9082972407341003,
      "learning_rate": 4.344668990465397e-06,
      "loss": 0.0126,
      "step": 1736540
    },
    {
      "epoch": 2.841918527392104,
      "grad_norm": 0.37595948576927185,
      "learning_rate": 4.34460309825188e-06,
      "loss": 0.0174,
      "step": 1736560
    },
    {
      "epoch": 2.8419512578307575,
      "grad_norm": 0.28909042477607727,
      "learning_rate": 4.344537206038363e-06,
      "loss": 0.0192,
      "step": 1736580
    },
    {
      "epoch": 2.8419839882694107,
      "grad_norm": 0.28482455015182495,
      "learning_rate": 4.344471313824846e-06,
      "loss": 0.0109,
      "step": 1736600
    },
    {
      "epoch": 2.8420167187080643,
      "grad_norm": 0.5518263578414917,
      "learning_rate": 4.344405421611329e-06,
      "loss": 0.0085,
      "step": 1736620
    },
    {
      "epoch": 2.8420494491467174,
      "grad_norm": 0.2982795238494873,
      "learning_rate": 4.344339529397811e-06,
      "loss": 0.007,
      "step": 1736640
    },
    {
      "epoch": 2.842082179585371,
      "grad_norm": 0.966009795665741,
      "learning_rate": 4.344273637184294e-06,
      "loss": 0.017,
      "step": 1736660
    },
    {
      "epoch": 2.842114910024024,
      "grad_norm": 0.23339258134365082,
      "learning_rate": 4.344207744970777e-06,
      "loss": 0.0144,
      "step": 1736680
    },
    {
      "epoch": 2.8421476404626773,
      "grad_norm": 1.0400006771087646,
      "learning_rate": 4.3441418527572596e-06,
      "loss": 0.0187,
      "step": 1736700
    },
    {
      "epoch": 2.842180370901331,
      "grad_norm": 0.23925305902957916,
      "learning_rate": 4.344075960543742e-06,
      "loss": 0.0174,
      "step": 1736720
    },
    {
      "epoch": 2.842213101339984,
      "grad_norm": 0.29804471135139465,
      "learning_rate": 4.344010068330226e-06,
      "loss": 0.0187,
      "step": 1736740
    },
    {
      "epoch": 2.8422458317786377,
      "grad_norm": 0.35098960995674133,
      "learning_rate": 4.343944176116709e-06,
      "loss": 0.0181,
      "step": 1736760
    },
    {
      "epoch": 2.842278562217291,
      "grad_norm": 0.578526496887207,
      "learning_rate": 4.343878283903191e-06,
      "loss": 0.013,
      "step": 1736780
    },
    {
      "epoch": 2.8423112926559444,
      "grad_norm": 0.15419499576091766,
      "learning_rate": 4.343812391689675e-06,
      "loss": 0.0154,
      "step": 1736800
    },
    {
      "epoch": 2.8423440230945975,
      "grad_norm": 0.19902940094470978,
      "learning_rate": 4.343746499476158e-06,
      "loss": 0.009,
      "step": 1736820
    },
    {
      "epoch": 2.8423767535332507,
      "grad_norm": 0.27156293392181396,
      "learning_rate": 4.3436806072626404e-06,
      "loss": 0.013,
      "step": 1736840
    },
    {
      "epoch": 2.8424094839719043,
      "grad_norm": 0.3715200126171112,
      "learning_rate": 4.343614715049123e-06,
      "loss": 0.0094,
      "step": 1736860
    },
    {
      "epoch": 2.8424422144105574,
      "grad_norm": 0.7902148962020874,
      "learning_rate": 4.343548822835606e-06,
      "loss": 0.0094,
      "step": 1736880
    },
    {
      "epoch": 2.8424749448492106,
      "grad_norm": 0.2093244194984436,
      "learning_rate": 4.343482930622089e-06,
      "loss": 0.0118,
      "step": 1736900
    },
    {
      "epoch": 2.842507675287864,
      "grad_norm": 0.10816581547260284,
      "learning_rate": 4.343417038408571e-06,
      "loss": 0.0082,
      "step": 1736920
    },
    {
      "epoch": 2.842540405726518,
      "grad_norm": 0.06892596185207367,
      "learning_rate": 4.343351146195054e-06,
      "loss": 0.0107,
      "step": 1736940
    },
    {
      "epoch": 2.842573136165171,
      "grad_norm": 0.44308820366859436,
      "learning_rate": 4.343285253981538e-06,
      "loss": 0.015,
      "step": 1736960
    },
    {
      "epoch": 2.842605866603824,
      "grad_norm": 0.13316652178764343,
      "learning_rate": 4.3432193617680205e-06,
      "loss": 0.0108,
      "step": 1736980
    },
    {
      "epoch": 2.8426385970424777,
      "grad_norm": 0.13056157529354095,
      "learning_rate": 4.343153469554503e-06,
      "loss": 0.0106,
      "step": 1737000
    },
    {
      "epoch": 2.842671327481131,
      "grad_norm": 0.2034430056810379,
      "learning_rate": 4.343087577340986e-06,
      "loss": 0.0198,
      "step": 1737020
    },
    {
      "epoch": 2.842704057919784,
      "grad_norm": 0.07396271824836731,
      "learning_rate": 4.343021685127469e-06,
      "loss": 0.0072,
      "step": 1737040
    },
    {
      "epoch": 2.8427367883584376,
      "grad_norm": 0.142389178276062,
      "learning_rate": 4.342955792913951e-06,
      "loss": 0.0092,
      "step": 1737060
    },
    {
      "epoch": 2.842769518797091,
      "grad_norm": 2.307996988296509,
      "learning_rate": 4.342889900700434e-06,
      "loss": 0.0218,
      "step": 1737080
    },
    {
      "epoch": 2.8428022492357443,
      "grad_norm": 0.08280666172504425,
      "learning_rate": 4.342824008486917e-06,
      "loss": 0.0167,
      "step": 1737100
    },
    {
      "epoch": 2.8428349796743975,
      "grad_norm": 0.10333431512117386,
      "learning_rate": 4.3427581162734005e-06,
      "loss": 0.0164,
      "step": 1737120
    },
    {
      "epoch": 2.842867710113051,
      "grad_norm": 0.2998461127281189,
      "learning_rate": 4.342692224059883e-06,
      "loss": 0.0143,
      "step": 1737140
    },
    {
      "epoch": 2.842900440551704,
      "grad_norm": 0.39312678575515747,
      "learning_rate": 4.342626331846366e-06,
      "loss": 0.01,
      "step": 1737160
    },
    {
      "epoch": 2.8429331709903574,
      "grad_norm": 0.5960712432861328,
      "learning_rate": 4.342560439632849e-06,
      "loss": 0.011,
      "step": 1737180
    },
    {
      "epoch": 2.842965901429011,
      "grad_norm": 0.22654114663600922,
      "learning_rate": 4.342494547419332e-06,
      "loss": 0.0133,
      "step": 1737200
    },
    {
      "epoch": 2.8429986318676645,
      "grad_norm": 0.13051174581050873,
      "learning_rate": 4.342428655205815e-06,
      "loss": 0.0113,
      "step": 1737220
    },
    {
      "epoch": 2.8430313623063177,
      "grad_norm": 0.4293130040168762,
      "learning_rate": 4.342362762992298e-06,
      "loss": 0.0087,
      "step": 1737240
    },
    {
      "epoch": 2.843064092744971,
      "grad_norm": 0.2090831845998764,
      "learning_rate": 4.3422968707787805e-06,
      "loss": 0.0128,
      "step": 1737260
    },
    {
      "epoch": 2.8430968231836244,
      "grad_norm": 0.6289525032043457,
      "learning_rate": 4.342230978565263e-06,
      "loss": 0.0154,
      "step": 1737280
    },
    {
      "epoch": 2.8431295536222776,
      "grad_norm": 0.3019574284553528,
      "learning_rate": 4.342165086351746e-06,
      "loss": 0.009,
      "step": 1737300
    },
    {
      "epoch": 2.8431622840609307,
      "grad_norm": 0.29388853907585144,
      "learning_rate": 4.342099194138229e-06,
      "loss": 0.0236,
      "step": 1737320
    },
    {
      "epoch": 2.8431950144995843,
      "grad_norm": 0.4761209189891815,
      "learning_rate": 4.3420333019247115e-06,
      "loss": 0.0107,
      "step": 1737340
    },
    {
      "epoch": 2.843227744938238,
      "grad_norm": 0.4760726988315582,
      "learning_rate": 4.341967409711195e-06,
      "loss": 0.0194,
      "step": 1737360
    },
    {
      "epoch": 2.843260475376891,
      "grad_norm": 0.6288160085678101,
      "learning_rate": 4.341901517497678e-06,
      "loss": 0.0116,
      "step": 1737380
    },
    {
      "epoch": 2.8432932058155442,
      "grad_norm": 0.3193734586238861,
      "learning_rate": 4.3418356252841605e-06,
      "loss": 0.0113,
      "step": 1737400
    },
    {
      "epoch": 2.843325936254198,
      "grad_norm": 0.3452938497066498,
      "learning_rate": 4.341769733070643e-06,
      "loss": 0.0155,
      "step": 1737420
    },
    {
      "epoch": 2.843358666692851,
      "grad_norm": 0.16639593243598938,
      "learning_rate": 4.341703840857126e-06,
      "loss": 0.0157,
      "step": 1737440
    },
    {
      "epoch": 2.843391397131504,
      "grad_norm": 0.06690679490566254,
      "learning_rate": 4.341637948643609e-06,
      "loss": 0.0176,
      "step": 1737460
    },
    {
      "epoch": 2.8434241275701577,
      "grad_norm": 0.9202098846435547,
      "learning_rate": 4.341572056430092e-06,
      "loss": 0.0186,
      "step": 1737480
    },
    {
      "epoch": 2.843456858008811,
      "grad_norm": 0.4748265743255615,
      "learning_rate": 4.341506164216575e-06,
      "loss": 0.0207,
      "step": 1737500
    },
    {
      "epoch": 2.8434895884474645,
      "grad_norm": 0.1819249391555786,
      "learning_rate": 4.341440272003058e-06,
      "loss": 0.0112,
      "step": 1737520
    },
    {
      "epoch": 2.8435223188861176,
      "grad_norm": 0.31477898359298706,
      "learning_rate": 4.3413743797895406e-06,
      "loss": 0.0148,
      "step": 1737540
    },
    {
      "epoch": 2.843555049324771,
      "grad_norm": 0.43724849820137024,
      "learning_rate": 4.341308487576023e-06,
      "loss": 0.0173,
      "step": 1737560
    },
    {
      "epoch": 2.8435877797634244,
      "grad_norm": 0.7083009481430054,
      "learning_rate": 4.341242595362507e-06,
      "loss": 0.0104,
      "step": 1737580
    },
    {
      "epoch": 2.8436205102020775,
      "grad_norm": 0.35912734270095825,
      "learning_rate": 4.34117670314899e-06,
      "loss": 0.0109,
      "step": 1737600
    },
    {
      "epoch": 2.843653240640731,
      "grad_norm": 0.8655497431755066,
      "learning_rate": 4.341110810935472e-06,
      "loss": 0.0137,
      "step": 1737620
    },
    {
      "epoch": 2.8436859710793843,
      "grad_norm": 0.2448730319738388,
      "learning_rate": 4.341044918721955e-06,
      "loss": 0.0149,
      "step": 1737640
    },
    {
      "epoch": 2.843718701518038,
      "grad_norm": 0.7121551632881165,
      "learning_rate": 4.340979026508438e-06,
      "loss": 0.0103,
      "step": 1737660
    },
    {
      "epoch": 2.843751431956691,
      "grad_norm": 0.6592643857002258,
      "learning_rate": 4.340913134294921e-06,
      "loss": 0.0115,
      "step": 1737680
    },
    {
      "epoch": 2.8437841623953446,
      "grad_norm": 0.04548617824912071,
      "learning_rate": 4.340847242081403e-06,
      "loss": 0.009,
      "step": 1737700
    },
    {
      "epoch": 2.8438168928339977,
      "grad_norm": 0.37114280462265015,
      "learning_rate": 4.340781349867886e-06,
      "loss": 0.0169,
      "step": 1737720
    },
    {
      "epoch": 2.843849623272651,
      "grad_norm": 0.09776663035154343,
      "learning_rate": 4.340715457654369e-06,
      "loss": 0.013,
      "step": 1737740
    },
    {
      "epoch": 2.8438823537113045,
      "grad_norm": 0.094724141061306,
      "learning_rate": 4.340649565440852e-06,
      "loss": 0.0165,
      "step": 1737760
    },
    {
      "epoch": 2.8439150841499576,
      "grad_norm": 0.2463984489440918,
      "learning_rate": 4.340583673227335e-06,
      "loss": 0.0101,
      "step": 1737780
    },
    {
      "epoch": 2.8439478145886112,
      "grad_norm": 0.47703224420547485,
      "learning_rate": 4.340517781013818e-06,
      "loss": 0.0116,
      "step": 1737800
    },
    {
      "epoch": 2.8439805450272644,
      "grad_norm": 0.6490415334701538,
      "learning_rate": 4.340451888800301e-06,
      "loss": 0.0156,
      "step": 1737820
    },
    {
      "epoch": 2.844013275465918,
      "grad_norm": 0.19270211458206177,
      "learning_rate": 4.340385996586784e-06,
      "loss": 0.0133,
      "step": 1737840
    },
    {
      "epoch": 2.844046005904571,
      "grad_norm": 0.36677253246307373,
      "learning_rate": 4.340320104373267e-06,
      "loss": 0.0174,
      "step": 1737860
    },
    {
      "epoch": 2.8440787363432243,
      "grad_norm": 0.3025287687778473,
      "learning_rate": 4.34025421215975e-06,
      "loss": 0.0179,
      "step": 1737880
    },
    {
      "epoch": 2.844111466781878,
      "grad_norm": 0.24045352637767792,
      "learning_rate": 4.340188319946232e-06,
      "loss": 0.0123,
      "step": 1737900
    },
    {
      "epoch": 2.844144197220531,
      "grad_norm": 0.18376494944095612,
      "learning_rate": 4.340122427732715e-06,
      "loss": 0.0138,
      "step": 1737920
    },
    {
      "epoch": 2.8441769276591846,
      "grad_norm": 0.26122695207595825,
      "learning_rate": 4.340056535519198e-06,
      "loss": 0.0138,
      "step": 1737940
    },
    {
      "epoch": 2.8442096580978378,
      "grad_norm": 0.07982762902975082,
      "learning_rate": 4.339990643305681e-06,
      "loss": 0.0104,
      "step": 1737960
    },
    {
      "epoch": 2.8442423885364914,
      "grad_norm": 0.1691969931125641,
      "learning_rate": 4.339924751092164e-06,
      "loss": 0.0138,
      "step": 1737980
    },
    {
      "epoch": 2.8442751189751445,
      "grad_norm": 0.17915894091129303,
      "learning_rate": 4.339858858878647e-06,
      "loss": 0.0122,
      "step": 1738000
    },
    {
      "epoch": 2.8443078494137977,
      "grad_norm": 0.3436148464679718,
      "learning_rate": 4.33979296666513e-06,
      "loss": 0.0146,
      "step": 1738020
    },
    {
      "epoch": 2.8443405798524513,
      "grad_norm": 0.13188955187797546,
      "learning_rate": 4.3397270744516124e-06,
      "loss": 0.0098,
      "step": 1738040
    },
    {
      "epoch": 2.8443733102911044,
      "grad_norm": 0.047691088169813156,
      "learning_rate": 4.339661182238095e-06,
      "loss": 0.0086,
      "step": 1738060
    },
    {
      "epoch": 2.844406040729758,
      "grad_norm": 0.2730758488178253,
      "learning_rate": 4.339595290024578e-06,
      "loss": 0.0134,
      "step": 1738080
    },
    {
      "epoch": 2.844438771168411,
      "grad_norm": 0.5711995363235474,
      "learning_rate": 4.339529397811061e-06,
      "loss": 0.011,
      "step": 1738100
    },
    {
      "epoch": 2.8444715016070647,
      "grad_norm": 0.14326253533363342,
      "learning_rate": 4.339463505597543e-06,
      "loss": 0.0089,
      "step": 1738120
    },
    {
      "epoch": 2.844504232045718,
      "grad_norm": 0.21965107321739197,
      "learning_rate": 4.339397613384026e-06,
      "loss": 0.0163,
      "step": 1738140
    },
    {
      "epoch": 2.844536962484371,
      "grad_norm": 0.8310217261314392,
      "learning_rate": 4.33933172117051e-06,
      "loss": 0.0136,
      "step": 1738160
    },
    {
      "epoch": 2.8445696929230246,
      "grad_norm": 0.10103341192007065,
      "learning_rate": 4.3392658289569925e-06,
      "loss": 0.0128,
      "step": 1738180
    },
    {
      "epoch": 2.844602423361678,
      "grad_norm": 0.27820757031440735,
      "learning_rate": 4.339199936743475e-06,
      "loss": 0.0148,
      "step": 1738200
    },
    {
      "epoch": 2.8446351538003314,
      "grad_norm": 0.4328514337539673,
      "learning_rate": 4.339134044529959e-06,
      "loss": 0.0094,
      "step": 1738220
    },
    {
      "epoch": 2.8446678842389845,
      "grad_norm": 0.18765120208263397,
      "learning_rate": 4.3390681523164415e-06,
      "loss": 0.02,
      "step": 1738240
    },
    {
      "epoch": 2.844700614677638,
      "grad_norm": 0.3937324285507202,
      "learning_rate": 4.339002260102924e-06,
      "loss": 0.0104,
      "step": 1738260
    },
    {
      "epoch": 2.8447333451162913,
      "grad_norm": 0.19748221337795258,
      "learning_rate": 4.338936367889407e-06,
      "loss": 0.0128,
      "step": 1738280
    },
    {
      "epoch": 2.8447660755549444,
      "grad_norm": 0.37523558735847473,
      "learning_rate": 4.33887047567589e-06,
      "loss": 0.0142,
      "step": 1738300
    },
    {
      "epoch": 2.844798805993598,
      "grad_norm": 0.2960677742958069,
      "learning_rate": 4.3388045834623725e-06,
      "loss": 0.0094,
      "step": 1738320
    },
    {
      "epoch": 2.844831536432251,
      "grad_norm": 0.42767754197120667,
      "learning_rate": 4.338738691248855e-06,
      "loss": 0.0114,
      "step": 1738340
    },
    {
      "epoch": 2.8448642668709048,
      "grad_norm": 0.06918730586767197,
      "learning_rate": 4.338672799035338e-06,
      "loss": 0.01,
      "step": 1738360
    },
    {
      "epoch": 2.844896997309558,
      "grad_norm": 0.2698128819465637,
      "learning_rate": 4.3386069068218216e-06,
      "loss": 0.0175,
      "step": 1738380
    },
    {
      "epoch": 2.8449297277482115,
      "grad_norm": 0.15280142426490784,
      "learning_rate": 4.338541014608304e-06,
      "loss": 0.0125,
      "step": 1738400
    },
    {
      "epoch": 2.8449624581868647,
      "grad_norm": 0.37237662076950073,
      "learning_rate": 4.338475122394787e-06,
      "loss": 0.0146,
      "step": 1738420
    },
    {
      "epoch": 2.844995188625518,
      "grad_norm": 0.5717209577560425,
      "learning_rate": 4.33840923018127e-06,
      "loss": 0.0097,
      "step": 1738440
    },
    {
      "epoch": 2.8450279190641714,
      "grad_norm": 0.24652285873889923,
      "learning_rate": 4.3383433379677525e-06,
      "loss": 0.0163,
      "step": 1738460
    },
    {
      "epoch": 2.8450606495028246,
      "grad_norm": 0.09035822749137878,
      "learning_rate": 4.338277445754235e-06,
      "loss": 0.0116,
      "step": 1738480
    },
    {
      "epoch": 2.8450933799414777,
      "grad_norm": 0.27607765793800354,
      "learning_rate": 4.338211553540718e-06,
      "loss": 0.0112,
      "step": 1738500
    },
    {
      "epoch": 2.8451261103801313,
      "grad_norm": 0.21745868027210236,
      "learning_rate": 4.338145661327201e-06,
      "loss": 0.0143,
      "step": 1738520
    },
    {
      "epoch": 2.845158840818785,
      "grad_norm": 0.36903390288352966,
      "learning_rate": 4.338079769113684e-06,
      "loss": 0.0113,
      "step": 1738540
    },
    {
      "epoch": 2.845191571257438,
      "grad_norm": 0.42601993680000305,
      "learning_rate": 4.338013876900167e-06,
      "loss": 0.0196,
      "step": 1738560
    },
    {
      "epoch": 2.845224301696091,
      "grad_norm": 0.27237483859062195,
      "learning_rate": 4.33794798468665e-06,
      "loss": 0.0126,
      "step": 1738580
    },
    {
      "epoch": 2.845257032134745,
      "grad_norm": 0.6062273383140564,
      "learning_rate": 4.337882092473133e-06,
      "loss": 0.019,
      "step": 1738600
    },
    {
      "epoch": 2.845289762573398,
      "grad_norm": 0.09230749309062958,
      "learning_rate": 4.337816200259616e-06,
      "loss": 0.0115,
      "step": 1738620
    },
    {
      "epoch": 2.845322493012051,
      "grad_norm": 0.5373454093933105,
      "learning_rate": 4.337750308046099e-06,
      "loss": 0.016,
      "step": 1738640
    },
    {
      "epoch": 2.8453552234507047,
      "grad_norm": 0.44815436005592346,
      "learning_rate": 4.337684415832582e-06,
      "loss": 0.0147,
      "step": 1738660
    },
    {
      "epoch": 2.8453879538893583,
      "grad_norm": 0.3814573287963867,
      "learning_rate": 4.337618523619064e-06,
      "loss": 0.0124,
      "step": 1738680
    },
    {
      "epoch": 2.8454206843280114,
      "grad_norm": 0.12518607079982758,
      "learning_rate": 4.337552631405547e-06,
      "loss": 0.0131,
      "step": 1738700
    },
    {
      "epoch": 2.8454534147666646,
      "grad_norm": 0.3821369409561157,
      "learning_rate": 4.33748673919203e-06,
      "loss": 0.0124,
      "step": 1738720
    },
    {
      "epoch": 2.845486145205318,
      "grad_norm": 0.19472713768482208,
      "learning_rate": 4.3374208469785126e-06,
      "loss": 0.0144,
      "step": 1738740
    },
    {
      "epoch": 2.8455188756439713,
      "grad_norm": 0.2185758352279663,
      "learning_rate": 4.337354954764995e-06,
      "loss": 0.0107,
      "step": 1738760
    },
    {
      "epoch": 2.8455516060826245,
      "grad_norm": 0.17833004891872406,
      "learning_rate": 4.337289062551479e-06,
      "loss": 0.0091,
      "step": 1738780
    },
    {
      "epoch": 2.845584336521278,
      "grad_norm": 0.24131962656974792,
      "learning_rate": 4.337223170337962e-06,
      "loss": 0.0102,
      "step": 1738800
    },
    {
      "epoch": 2.8456170669599317,
      "grad_norm": 0.4417133331298828,
      "learning_rate": 4.337157278124444e-06,
      "loss": 0.0162,
      "step": 1738820
    },
    {
      "epoch": 2.845649797398585,
      "grad_norm": 0.888176441192627,
      "learning_rate": 4.337091385910927e-06,
      "loss": 0.0089,
      "step": 1738840
    },
    {
      "epoch": 2.845682527837238,
      "grad_norm": 0.5569881200790405,
      "learning_rate": 4.33702549369741e-06,
      "loss": 0.0127,
      "step": 1738860
    },
    {
      "epoch": 2.8457152582758916,
      "grad_norm": 0.690413236618042,
      "learning_rate": 4.3369596014838934e-06,
      "loss": 0.0144,
      "step": 1738880
    },
    {
      "epoch": 2.8457479887145447,
      "grad_norm": 0.3552349805831909,
      "learning_rate": 4.336893709270376e-06,
      "loss": 0.0135,
      "step": 1738900
    },
    {
      "epoch": 2.845780719153198,
      "grad_norm": 0.183817058801651,
      "learning_rate": 4.336827817056859e-06,
      "loss": 0.0164,
      "step": 1738920
    },
    {
      "epoch": 2.8458134495918515,
      "grad_norm": 0.8237040638923645,
      "learning_rate": 4.336761924843342e-06,
      "loss": 0.0119,
      "step": 1738940
    },
    {
      "epoch": 2.845846180030505,
      "grad_norm": 1.2916005849838257,
      "learning_rate": 4.336696032629824e-06,
      "loss": 0.0177,
      "step": 1738960
    },
    {
      "epoch": 2.845878910469158,
      "grad_norm": 0.14455083012580872,
      "learning_rate": 4.336630140416307e-06,
      "loss": 0.0109,
      "step": 1738980
    },
    {
      "epoch": 2.8459116409078113,
      "grad_norm": 0.18708671629428864,
      "learning_rate": 4.336564248202791e-06,
      "loss": 0.0161,
      "step": 1739000
    },
    {
      "epoch": 2.845944371346465,
      "grad_norm": 0.9337113499641418,
      "learning_rate": 4.3364983559892735e-06,
      "loss": 0.025,
      "step": 1739020
    },
    {
      "epoch": 2.845977101785118,
      "grad_norm": 0.13764041662216187,
      "learning_rate": 4.336432463775756e-06,
      "loss": 0.0156,
      "step": 1739040
    },
    {
      "epoch": 2.8460098322237712,
      "grad_norm": 0.08602793514728546,
      "learning_rate": 4.336366571562239e-06,
      "loss": 0.0102,
      "step": 1739060
    },
    {
      "epoch": 2.846042562662425,
      "grad_norm": 0.22097042202949524,
      "learning_rate": 4.336300679348722e-06,
      "loss": 0.0092,
      "step": 1739080
    },
    {
      "epoch": 2.846075293101078,
      "grad_norm": 0.5561110377311707,
      "learning_rate": 4.3362347871352044e-06,
      "loss": 0.0125,
      "step": 1739100
    },
    {
      "epoch": 2.8461080235397316,
      "grad_norm": 0.5419268012046814,
      "learning_rate": 4.336168894921687e-06,
      "loss": 0.0116,
      "step": 1739120
    },
    {
      "epoch": 2.8461407539783847,
      "grad_norm": 0.6218054294586182,
      "learning_rate": 4.33610300270817e-06,
      "loss": 0.0124,
      "step": 1739140
    },
    {
      "epoch": 2.8461734844170383,
      "grad_norm": 0.41861340403556824,
      "learning_rate": 4.336037110494653e-06,
      "loss": 0.0198,
      "step": 1739160
    },
    {
      "epoch": 2.8462062148556915,
      "grad_norm": 0.1535523533821106,
      "learning_rate": 4.335971218281136e-06,
      "loss": 0.0128,
      "step": 1739180
    },
    {
      "epoch": 2.8462389452943446,
      "grad_norm": 0.42092397809028625,
      "learning_rate": 4.335905326067619e-06,
      "loss": 0.0149,
      "step": 1739200
    },
    {
      "epoch": 2.846271675732998,
      "grad_norm": 0.6163967847824097,
      "learning_rate": 4.335839433854102e-06,
      "loss": 0.0197,
      "step": 1739220
    },
    {
      "epoch": 2.8463044061716514,
      "grad_norm": 0.144632488489151,
      "learning_rate": 4.335773541640585e-06,
      "loss": 0.01,
      "step": 1739240
    },
    {
      "epoch": 2.846337136610305,
      "grad_norm": 0.23205967247486115,
      "learning_rate": 4.335707649427068e-06,
      "loss": 0.012,
      "step": 1739260
    },
    {
      "epoch": 2.846369867048958,
      "grad_norm": 0.3259555995464325,
      "learning_rate": 4.335641757213551e-06,
      "loss": 0.0174,
      "step": 1739280
    },
    {
      "epoch": 2.8464025974876117,
      "grad_norm": 0.17281770706176758,
      "learning_rate": 4.3355758650000335e-06,
      "loss": 0.0151,
      "step": 1739300
    },
    {
      "epoch": 2.846435327926265,
      "grad_norm": 0.16516439616680145,
      "learning_rate": 4.335509972786516e-06,
      "loss": 0.0171,
      "step": 1739320
    },
    {
      "epoch": 2.846468058364918,
      "grad_norm": 0.22600679099559784,
      "learning_rate": 4.335444080572999e-06,
      "loss": 0.008,
      "step": 1739340
    },
    {
      "epoch": 2.8465007888035716,
      "grad_norm": 0.45622408390045166,
      "learning_rate": 4.335378188359482e-06,
      "loss": 0.0153,
      "step": 1739360
    },
    {
      "epoch": 2.8465335192422248,
      "grad_norm": 0.053537964820861816,
      "learning_rate": 4.3353122961459645e-06,
      "loss": 0.0099,
      "step": 1739380
    },
    {
      "epoch": 2.8465662496808783,
      "grad_norm": 0.17649418115615845,
      "learning_rate": 4.335246403932448e-06,
      "loss": 0.0169,
      "step": 1739400
    },
    {
      "epoch": 2.8465989801195315,
      "grad_norm": 0.19998668134212494,
      "learning_rate": 4.335180511718931e-06,
      "loss": 0.0153,
      "step": 1739420
    },
    {
      "epoch": 2.846631710558185,
      "grad_norm": 0.17003633081912994,
      "learning_rate": 4.3351146195054135e-06,
      "loss": 0.0092,
      "step": 1739440
    },
    {
      "epoch": 2.8466644409968382,
      "grad_norm": 0.3752453029155731,
      "learning_rate": 4.335048727291896e-06,
      "loss": 0.011,
      "step": 1739460
    },
    {
      "epoch": 2.8466971714354914,
      "grad_norm": 0.6955599188804626,
      "learning_rate": 4.334982835078379e-06,
      "loss": 0.0145,
      "step": 1739480
    },
    {
      "epoch": 2.846729901874145,
      "grad_norm": 0.8229631185531616,
      "learning_rate": 4.334916942864862e-06,
      "loss": 0.014,
      "step": 1739500
    },
    {
      "epoch": 2.846762632312798,
      "grad_norm": 0.715165913105011,
      "learning_rate": 4.3348510506513445e-06,
      "loss": 0.0125,
      "step": 1739520
    },
    {
      "epoch": 2.8467953627514517,
      "grad_norm": 0.2668074667453766,
      "learning_rate": 4.334785158437827e-06,
      "loss": 0.0109,
      "step": 1739540
    },
    {
      "epoch": 2.846828093190105,
      "grad_norm": 0.14369824528694153,
      "learning_rate": 4.33471926622431e-06,
      "loss": 0.013,
      "step": 1739560
    },
    {
      "epoch": 2.8468608236287585,
      "grad_norm": 0.11879073083400726,
      "learning_rate": 4.3346533740107936e-06,
      "loss": 0.013,
      "step": 1739580
    },
    {
      "epoch": 2.8468935540674116,
      "grad_norm": 0.2558482587337494,
      "learning_rate": 4.334587481797276e-06,
      "loss": 0.0117,
      "step": 1739600
    },
    {
      "epoch": 2.8469262845060648,
      "grad_norm": 0.19887787103652954,
      "learning_rate": 4.334521589583759e-06,
      "loss": 0.0151,
      "step": 1739620
    },
    {
      "epoch": 2.8469590149447184,
      "grad_norm": 0.6160089373588562,
      "learning_rate": 4.334455697370243e-06,
      "loss": 0.0132,
      "step": 1739640
    },
    {
      "epoch": 2.8469917453833715,
      "grad_norm": 0.09386162459850311,
      "learning_rate": 4.334389805156725e-06,
      "loss": 0.0112,
      "step": 1739660
    },
    {
      "epoch": 2.847024475822025,
      "grad_norm": 0.3707038462162018,
      "learning_rate": 4.334323912943208e-06,
      "loss": 0.0127,
      "step": 1739680
    },
    {
      "epoch": 2.8470572062606783,
      "grad_norm": 0.774587869644165,
      "learning_rate": 4.334258020729691e-06,
      "loss": 0.0124,
      "step": 1739700
    },
    {
      "epoch": 2.847089936699332,
      "grad_norm": 0.23099902272224426,
      "learning_rate": 4.334192128516174e-06,
      "loss": 0.0135,
      "step": 1739720
    },
    {
      "epoch": 2.847122667137985,
      "grad_norm": 0.3637726306915283,
      "learning_rate": 4.334126236302656e-06,
      "loss": 0.0153,
      "step": 1739740
    },
    {
      "epoch": 2.847155397576638,
      "grad_norm": 0.11822012811899185,
      "learning_rate": 4.334060344089139e-06,
      "loss": 0.013,
      "step": 1739760
    },
    {
      "epoch": 2.8471881280152918,
      "grad_norm": 0.4061947166919708,
      "learning_rate": 4.333994451875622e-06,
      "loss": 0.0143,
      "step": 1739780
    },
    {
      "epoch": 2.847220858453945,
      "grad_norm": 0.29166027903556824,
      "learning_rate": 4.333928559662105e-06,
      "loss": 0.0167,
      "step": 1739800
    },
    {
      "epoch": 2.8472535888925985,
      "grad_norm": 0.18638905882835388,
      "learning_rate": 4.333862667448588e-06,
      "loss": 0.0122,
      "step": 1739820
    },
    {
      "epoch": 2.8472863193312516,
      "grad_norm": 0.34356316924095154,
      "learning_rate": 4.333796775235071e-06,
      "loss": 0.0156,
      "step": 1739840
    },
    {
      "epoch": 2.8473190497699052,
      "grad_norm": 0.22212548553943634,
      "learning_rate": 4.333730883021554e-06,
      "loss": 0.0139,
      "step": 1739860
    },
    {
      "epoch": 2.8473517802085584,
      "grad_norm": 0.1453714370727539,
      "learning_rate": 4.333664990808036e-06,
      "loss": 0.0129,
      "step": 1739880
    },
    {
      "epoch": 2.8473845106472115,
      "grad_norm": 0.43080198764801025,
      "learning_rate": 4.333599098594519e-06,
      "loss": 0.0142,
      "step": 1739900
    },
    {
      "epoch": 2.847417241085865,
      "grad_norm": 0.3039008378982544,
      "learning_rate": 4.333533206381002e-06,
      "loss": 0.0147,
      "step": 1739920
    },
    {
      "epoch": 2.8474499715245183,
      "grad_norm": 0.7170483469963074,
      "learning_rate": 4.333467314167485e-06,
      "loss": 0.0184,
      "step": 1739940
    },
    {
      "epoch": 2.8474827019631714,
      "grad_norm": 0.13421952724456787,
      "learning_rate": 4.333401421953968e-06,
      "loss": 0.0147,
      "step": 1739960
    },
    {
      "epoch": 2.847515432401825,
      "grad_norm": 0.055505648255348206,
      "learning_rate": 4.333335529740451e-06,
      "loss": 0.0166,
      "step": 1739980
    },
    {
      "epoch": 2.8475481628404786,
      "grad_norm": 0.4989625811576843,
      "learning_rate": 4.333269637526934e-06,
      "loss": 0.0131,
      "step": 1740000
    },
    {
      "epoch": 2.8475808932791318,
      "grad_norm": 0.19337637722492218,
      "learning_rate": 4.333203745313417e-06,
      "loss": 0.0099,
      "step": 1740020
    },
    {
      "epoch": 2.847613623717785,
      "grad_norm": 0.2807971239089966,
      "learning_rate": 4.3331378530999e-06,
      "loss": 0.0153,
      "step": 1740040
    },
    {
      "epoch": 2.8476463541564385,
      "grad_norm": 0.22955912351608276,
      "learning_rate": 4.333071960886383e-06,
      "loss": 0.0106,
      "step": 1740060
    },
    {
      "epoch": 2.8476790845950917,
      "grad_norm": 0.33731895685195923,
      "learning_rate": 4.3330060686728654e-06,
      "loss": 0.0121,
      "step": 1740080
    },
    {
      "epoch": 2.847711815033745,
      "grad_norm": 0.4176378548145294,
      "learning_rate": 4.332940176459348e-06,
      "loss": 0.0102,
      "step": 1740100
    },
    {
      "epoch": 2.8477445454723984,
      "grad_norm": 0.13228225708007812,
      "learning_rate": 4.332874284245831e-06,
      "loss": 0.014,
      "step": 1740120
    },
    {
      "epoch": 2.847777275911052,
      "grad_norm": 1.0005184412002563,
      "learning_rate": 4.332808392032314e-06,
      "loss": 0.0136,
      "step": 1740140
    },
    {
      "epoch": 2.847810006349705,
      "grad_norm": 0.16614803671836853,
      "learning_rate": 4.332742499818796e-06,
      "loss": 0.0119,
      "step": 1740160
    },
    {
      "epoch": 2.8478427367883583,
      "grad_norm": 0.21019360423088074,
      "learning_rate": 4.332676607605279e-06,
      "loss": 0.0134,
      "step": 1740180
    },
    {
      "epoch": 2.847875467227012,
      "grad_norm": 0.3085007965564728,
      "learning_rate": 4.332610715391763e-06,
      "loss": 0.0109,
      "step": 1740200
    },
    {
      "epoch": 2.847908197665665,
      "grad_norm": 0.11420758813619614,
      "learning_rate": 4.3325448231782455e-06,
      "loss": 0.0121,
      "step": 1740220
    },
    {
      "epoch": 2.847940928104318,
      "grad_norm": 0.34259968996047974,
      "learning_rate": 4.332478930964728e-06,
      "loss": 0.0086,
      "step": 1740240
    },
    {
      "epoch": 2.847973658542972,
      "grad_norm": 0.8225462436676025,
      "learning_rate": 4.332413038751211e-06,
      "loss": 0.0122,
      "step": 1740260
    },
    {
      "epoch": 2.8480063889816254,
      "grad_norm": 0.649386465549469,
      "learning_rate": 4.332347146537694e-06,
      "loss": 0.0176,
      "step": 1740280
    },
    {
      "epoch": 2.8480391194202785,
      "grad_norm": 0.34333536028862,
      "learning_rate": 4.332281254324177e-06,
      "loss": 0.0117,
      "step": 1740300
    },
    {
      "epoch": 2.8480718498589317,
      "grad_norm": 0.19872379302978516,
      "learning_rate": 4.33221536211066e-06,
      "loss": 0.0177,
      "step": 1740320
    },
    {
      "epoch": 2.8481045802975853,
      "grad_norm": 0.12608204782009125,
      "learning_rate": 4.332149469897143e-06,
      "loss": 0.0151,
      "step": 1740340
    },
    {
      "epoch": 2.8481373107362384,
      "grad_norm": 0.36468565464019775,
      "learning_rate": 4.3320835776836255e-06,
      "loss": 0.014,
      "step": 1740360
    },
    {
      "epoch": 2.8481700411748916,
      "grad_norm": 0.4035651683807373,
      "learning_rate": 4.332017685470108e-06,
      "loss": 0.0255,
      "step": 1740380
    },
    {
      "epoch": 2.848202771613545,
      "grad_norm": 0.08472642302513123,
      "learning_rate": 4.331951793256591e-06,
      "loss": 0.0116,
      "step": 1740400
    },
    {
      "epoch": 2.8482355020521988,
      "grad_norm": 0.23791226744651794,
      "learning_rate": 4.3318859010430746e-06,
      "loss": 0.0112,
      "step": 1740420
    },
    {
      "epoch": 2.848268232490852,
      "grad_norm": 0.20138530433177948,
      "learning_rate": 4.331820008829557e-06,
      "loss": 0.0086,
      "step": 1740440
    },
    {
      "epoch": 2.848300962929505,
      "grad_norm": 1.0469889640808105,
      "learning_rate": 4.33175411661604e-06,
      "loss": 0.0141,
      "step": 1740460
    },
    {
      "epoch": 2.8483336933681587,
      "grad_norm": 1.3098376989364624,
      "learning_rate": 4.331688224402523e-06,
      "loss": 0.0111,
      "step": 1740480
    },
    {
      "epoch": 2.848366423806812,
      "grad_norm": 0.24659433960914612,
      "learning_rate": 4.3316223321890055e-06,
      "loss": 0.013,
      "step": 1740500
    },
    {
      "epoch": 2.848399154245465,
      "grad_norm": 1.3793495893478394,
      "learning_rate": 4.331556439975488e-06,
      "loss": 0.015,
      "step": 1740520
    },
    {
      "epoch": 2.8484318846841186,
      "grad_norm": 0.18770474195480347,
      "learning_rate": 4.331490547761971e-06,
      "loss": 0.0137,
      "step": 1740540
    },
    {
      "epoch": 2.8484646151227717,
      "grad_norm": 0.10436800867319107,
      "learning_rate": 4.331424655548454e-06,
      "loss": 0.0133,
      "step": 1740560
    },
    {
      "epoch": 2.8484973455614253,
      "grad_norm": 0.5166215896606445,
      "learning_rate": 4.3313587633349365e-06,
      "loss": 0.0119,
      "step": 1740580
    },
    {
      "epoch": 2.8485300760000785,
      "grad_norm": 0.3297314941883087,
      "learning_rate": 4.33129287112142e-06,
      "loss": 0.0172,
      "step": 1740600
    },
    {
      "epoch": 2.848562806438732,
      "grad_norm": 0.4598761796951294,
      "learning_rate": 4.331226978907903e-06,
      "loss": 0.0108,
      "step": 1740620
    },
    {
      "epoch": 2.848595536877385,
      "grad_norm": 0.2467302531003952,
      "learning_rate": 4.3311610866943855e-06,
      "loss": 0.0151,
      "step": 1740640
    },
    {
      "epoch": 2.8486282673160384,
      "grad_norm": 0.45615512132644653,
      "learning_rate": 4.331095194480869e-06,
      "loss": 0.0126,
      "step": 1740660
    },
    {
      "epoch": 2.848660997754692,
      "grad_norm": 0.284780889749527,
      "learning_rate": 4.331029302267352e-06,
      "loss": 0.0136,
      "step": 1740680
    },
    {
      "epoch": 2.848693728193345,
      "grad_norm": 0.3939640522003174,
      "learning_rate": 4.330963410053835e-06,
      "loss": 0.0153,
      "step": 1740700
    },
    {
      "epoch": 2.8487264586319987,
      "grad_norm": 1.351461410522461,
      "learning_rate": 4.330897517840317e-06,
      "loss": 0.0191,
      "step": 1740720
    },
    {
      "epoch": 2.848759189070652,
      "grad_norm": 0.0590810552239418,
      "learning_rate": 4.3308316256268e-06,
      "loss": 0.012,
      "step": 1740740
    },
    {
      "epoch": 2.8487919195093054,
      "grad_norm": 0.2681500017642975,
      "learning_rate": 4.330765733413283e-06,
      "loss": 0.0154,
      "step": 1740760
    },
    {
      "epoch": 2.8488246499479586,
      "grad_norm": 0.2434023916721344,
      "learning_rate": 4.3306998411997656e-06,
      "loss": 0.0124,
      "step": 1740780
    },
    {
      "epoch": 2.8488573803866117,
      "grad_norm": 0.1323508322238922,
      "learning_rate": 4.330633948986248e-06,
      "loss": 0.0106,
      "step": 1740800
    },
    {
      "epoch": 2.8488901108252653,
      "grad_norm": 0.3752440810203552,
      "learning_rate": 4.330568056772732e-06,
      "loss": 0.0134,
      "step": 1740820
    },
    {
      "epoch": 2.8489228412639185,
      "grad_norm": 1.1621299982070923,
      "learning_rate": 4.330502164559215e-06,
      "loss": 0.0152,
      "step": 1740840
    },
    {
      "epoch": 2.848955571702572,
      "grad_norm": 1.3612229824066162,
      "learning_rate": 4.330436272345697e-06,
      "loss": 0.018,
      "step": 1740860
    },
    {
      "epoch": 2.8489883021412252,
      "grad_norm": 0.28583037853240967,
      "learning_rate": 4.33037038013218e-06,
      "loss": 0.0083,
      "step": 1740880
    },
    {
      "epoch": 2.849021032579879,
      "grad_norm": 0.7295106649398804,
      "learning_rate": 4.330304487918663e-06,
      "loss": 0.0179,
      "step": 1740900
    },
    {
      "epoch": 2.849053763018532,
      "grad_norm": 0.1354662925004959,
      "learning_rate": 4.330238595705146e-06,
      "loss": 0.0162,
      "step": 1740920
    },
    {
      "epoch": 2.849086493457185,
      "grad_norm": 0.5064471960067749,
      "learning_rate": 4.330172703491628e-06,
      "loss": 0.0214,
      "step": 1740940
    },
    {
      "epoch": 2.8491192238958387,
      "grad_norm": 0.05779271572828293,
      "learning_rate": 4.330106811278111e-06,
      "loss": 0.0226,
      "step": 1740960
    },
    {
      "epoch": 2.849151954334492,
      "grad_norm": 0.5475521087646484,
      "learning_rate": 4.330040919064594e-06,
      "loss": 0.0123,
      "step": 1740980
    },
    {
      "epoch": 2.8491846847731455,
      "grad_norm": 0.28817465901374817,
      "learning_rate": 4.329975026851077e-06,
      "loss": 0.0127,
      "step": 1741000
    },
    {
      "epoch": 2.8492174152117986,
      "grad_norm": 0.17638444900512695,
      "learning_rate": 4.32990913463756e-06,
      "loss": 0.0166,
      "step": 1741020
    },
    {
      "epoch": 2.849250145650452,
      "grad_norm": 0.2575978636741638,
      "learning_rate": 4.329843242424043e-06,
      "loss": 0.013,
      "step": 1741040
    },
    {
      "epoch": 2.8492828760891054,
      "grad_norm": 0.21821652352809906,
      "learning_rate": 4.3297773502105265e-06,
      "loss": 0.0112,
      "step": 1741060
    },
    {
      "epoch": 2.8493156065277585,
      "grad_norm": 0.0782415047287941,
      "learning_rate": 4.329711457997009e-06,
      "loss": 0.0118,
      "step": 1741080
    },
    {
      "epoch": 2.849348336966412,
      "grad_norm": 1.4234751462936401,
      "learning_rate": 4.329645565783492e-06,
      "loss": 0.0139,
      "step": 1741100
    },
    {
      "epoch": 2.8493810674050652,
      "grad_norm": 0.765860915184021,
      "learning_rate": 4.329579673569975e-06,
      "loss": 0.0102,
      "step": 1741120
    },
    {
      "epoch": 2.849413797843719,
      "grad_norm": 0.7052228450775146,
      "learning_rate": 4.3295137813564574e-06,
      "loss": 0.0088,
      "step": 1741140
    },
    {
      "epoch": 2.849446528282372,
      "grad_norm": 0.11982694268226624,
      "learning_rate": 4.32944788914294e-06,
      "loss": 0.0129,
      "step": 1741160
    },
    {
      "epoch": 2.8494792587210256,
      "grad_norm": 0.26274675130844116,
      "learning_rate": 4.329381996929423e-06,
      "loss": 0.0142,
      "step": 1741180
    },
    {
      "epoch": 2.8495119891596787,
      "grad_norm": 0.17109519243240356,
      "learning_rate": 4.329316104715906e-06,
      "loss": 0.0141,
      "step": 1741200
    },
    {
      "epoch": 2.849544719598332,
      "grad_norm": 0.5547359585762024,
      "learning_rate": 4.329250212502389e-06,
      "loss": 0.009,
      "step": 1741220
    },
    {
      "epoch": 2.8495774500369855,
      "grad_norm": 0.44264042377471924,
      "learning_rate": 4.329184320288872e-06,
      "loss": 0.0105,
      "step": 1741240
    },
    {
      "epoch": 2.8496101804756386,
      "grad_norm": 0.6590240001678467,
      "learning_rate": 4.329118428075355e-06,
      "loss": 0.016,
      "step": 1741260
    },
    {
      "epoch": 2.8496429109142922,
      "grad_norm": 0.11121499538421631,
      "learning_rate": 4.3290525358618375e-06,
      "loss": 0.0148,
      "step": 1741280
    },
    {
      "epoch": 2.8496756413529454,
      "grad_norm": 0.4623332619667053,
      "learning_rate": 4.32898664364832e-06,
      "loss": 0.0134,
      "step": 1741300
    },
    {
      "epoch": 2.849708371791599,
      "grad_norm": 0.29516157507896423,
      "learning_rate": 4.328920751434803e-06,
      "loss": 0.0142,
      "step": 1741320
    },
    {
      "epoch": 2.849741102230252,
      "grad_norm": 0.4778435528278351,
      "learning_rate": 4.328854859221286e-06,
      "loss": 0.0144,
      "step": 1741340
    },
    {
      "epoch": 2.8497738326689053,
      "grad_norm": 0.10104668885469437,
      "learning_rate": 4.328788967007769e-06,
      "loss": 0.0131,
      "step": 1741360
    },
    {
      "epoch": 2.849806563107559,
      "grad_norm": 0.3539782762527466,
      "learning_rate": 4.328723074794252e-06,
      "loss": 0.0127,
      "step": 1741380
    },
    {
      "epoch": 2.849839293546212,
      "grad_norm": 0.10981426388025284,
      "learning_rate": 4.328657182580735e-06,
      "loss": 0.0271,
      "step": 1741400
    },
    {
      "epoch": 2.8498720239848656,
      "grad_norm": 0.14112384617328644,
      "learning_rate": 4.3285912903672175e-06,
      "loss": 0.017,
      "step": 1741420
    },
    {
      "epoch": 2.8499047544235188,
      "grad_norm": 0.6470407247543335,
      "learning_rate": 4.328525398153701e-06,
      "loss": 0.014,
      "step": 1741440
    },
    {
      "epoch": 2.8499374848621724,
      "grad_norm": 0.33680278062820435,
      "learning_rate": 4.328459505940184e-06,
      "loss": 0.0163,
      "step": 1741460
    },
    {
      "epoch": 2.8499702153008255,
      "grad_norm": 0.5608583092689514,
      "learning_rate": 4.3283936137266665e-06,
      "loss": 0.0167,
      "step": 1741480
    },
    {
      "epoch": 2.8500029457394787,
      "grad_norm": 0.09579342603683472,
      "learning_rate": 4.328327721513149e-06,
      "loss": 0.012,
      "step": 1741500
    },
    {
      "epoch": 2.8500356761781322,
      "grad_norm": 0.33006754517555237,
      "learning_rate": 4.328261829299632e-06,
      "loss": 0.0135,
      "step": 1741520
    },
    {
      "epoch": 2.8500684066167854,
      "grad_norm": 0.06299089640378952,
      "learning_rate": 4.328195937086115e-06,
      "loss": 0.01,
      "step": 1741540
    },
    {
      "epoch": 2.8501011370554385,
      "grad_norm": 0.9074851870536804,
      "learning_rate": 4.3281300448725975e-06,
      "loss": 0.0132,
      "step": 1741560
    },
    {
      "epoch": 2.850133867494092,
      "grad_norm": 0.17174895107746124,
      "learning_rate": 4.32806415265908e-06,
      "loss": 0.0118,
      "step": 1741580
    },
    {
      "epoch": 2.8501665979327457,
      "grad_norm": 0.3743078410625458,
      "learning_rate": 4.327998260445563e-06,
      "loss": 0.0199,
      "step": 1741600
    },
    {
      "epoch": 2.850199328371399,
      "grad_norm": 0.19335228204727173,
      "learning_rate": 4.3279323682320466e-06,
      "loss": 0.0131,
      "step": 1741620
    },
    {
      "epoch": 2.850232058810052,
      "grad_norm": 0.9030726552009583,
      "learning_rate": 4.327866476018529e-06,
      "loss": 0.0162,
      "step": 1741640
    },
    {
      "epoch": 2.8502647892487056,
      "grad_norm": 0.21393810212612152,
      "learning_rate": 4.327800583805012e-06,
      "loss": 0.014,
      "step": 1741660
    },
    {
      "epoch": 2.850297519687359,
      "grad_norm": 0.28635942935943604,
      "learning_rate": 4.327734691591495e-06,
      "loss": 0.0159,
      "step": 1741680
    },
    {
      "epoch": 2.850330250126012,
      "grad_norm": 0.3870326280593872,
      "learning_rate": 4.327668799377978e-06,
      "loss": 0.0107,
      "step": 1741700
    },
    {
      "epoch": 2.8503629805646655,
      "grad_norm": 0.9328256845474243,
      "learning_rate": 4.327602907164461e-06,
      "loss": 0.0173,
      "step": 1741720
    },
    {
      "epoch": 2.850395711003319,
      "grad_norm": 0.638507068157196,
      "learning_rate": 4.327537014950944e-06,
      "loss": 0.0132,
      "step": 1741740
    },
    {
      "epoch": 2.8504284414419723,
      "grad_norm": 0.4106496274471283,
      "learning_rate": 4.327471122737427e-06,
      "loss": 0.0143,
      "step": 1741760
    },
    {
      "epoch": 2.8504611718806254,
      "grad_norm": 0.05857187882065773,
      "learning_rate": 4.327405230523909e-06,
      "loss": 0.0103,
      "step": 1741780
    },
    {
      "epoch": 2.850493902319279,
      "grad_norm": 0.43531346321105957,
      "learning_rate": 4.327339338310392e-06,
      "loss": 0.0162,
      "step": 1741800
    },
    {
      "epoch": 2.850526632757932,
      "grad_norm": 1.110899567604065,
      "learning_rate": 4.327273446096875e-06,
      "loss": 0.0117,
      "step": 1741820
    },
    {
      "epoch": 2.8505593631965853,
      "grad_norm": 0.1602592170238495,
      "learning_rate": 4.327207553883358e-06,
      "loss": 0.0136,
      "step": 1741840
    },
    {
      "epoch": 2.850592093635239,
      "grad_norm": 0.14870868623256683,
      "learning_rate": 4.327141661669841e-06,
      "loss": 0.009,
      "step": 1741860
    },
    {
      "epoch": 2.8506248240738925,
      "grad_norm": 0.13343384861946106,
      "learning_rate": 4.327075769456324e-06,
      "loss": 0.0152,
      "step": 1741880
    },
    {
      "epoch": 2.8506575545125457,
      "grad_norm": 0.13307254016399384,
      "learning_rate": 4.327009877242807e-06,
      "loss": 0.0212,
      "step": 1741900
    },
    {
      "epoch": 2.850690284951199,
      "grad_norm": 0.16463615000247955,
      "learning_rate": 4.326943985029289e-06,
      "loss": 0.0119,
      "step": 1741920
    },
    {
      "epoch": 2.8507230153898524,
      "grad_norm": 0.2189740538597107,
      "learning_rate": 4.326878092815772e-06,
      "loss": 0.0131,
      "step": 1741940
    },
    {
      "epoch": 2.8507557458285055,
      "grad_norm": 0.46352824568748474,
      "learning_rate": 4.326812200602255e-06,
      "loss": 0.0156,
      "step": 1741960
    },
    {
      "epoch": 2.8507884762671587,
      "grad_norm": 0.17382904887199402,
      "learning_rate": 4.326746308388738e-06,
      "loss": 0.0112,
      "step": 1741980
    },
    {
      "epoch": 2.8508212067058123,
      "grad_norm": 0.9423137903213501,
      "learning_rate": 4.32668041617522e-06,
      "loss": 0.0142,
      "step": 1742000
    },
    {
      "epoch": 2.8508539371444654,
      "grad_norm": 0.15618892014026642,
      "learning_rate": 4.326614523961704e-06,
      "loss": 0.0155,
      "step": 1742020
    },
    {
      "epoch": 2.850886667583119,
      "grad_norm": 0.3156106770038605,
      "learning_rate": 4.326548631748187e-06,
      "loss": 0.0097,
      "step": 1742040
    },
    {
      "epoch": 2.850919398021772,
      "grad_norm": 0.42469120025634766,
      "learning_rate": 4.326482739534669e-06,
      "loss": 0.0091,
      "step": 1742060
    },
    {
      "epoch": 2.850952128460426,
      "grad_norm": 0.3601832091808319,
      "learning_rate": 4.326416847321153e-06,
      "loss": 0.0148,
      "step": 1742080
    },
    {
      "epoch": 2.850984858899079,
      "grad_norm": 0.2711733281612396,
      "learning_rate": 4.326350955107636e-06,
      "loss": 0.0144,
      "step": 1742100
    },
    {
      "epoch": 2.851017589337732,
      "grad_norm": 0.5398470759391785,
      "learning_rate": 4.3262850628941185e-06,
      "loss": 0.0147,
      "step": 1742120
    },
    {
      "epoch": 2.8510503197763857,
      "grad_norm": 0.46423617005348206,
      "learning_rate": 4.326219170680601e-06,
      "loss": 0.0171,
      "step": 1742140
    },
    {
      "epoch": 2.851083050215039,
      "grad_norm": 0.9955984354019165,
      "learning_rate": 4.326153278467084e-06,
      "loss": 0.0093,
      "step": 1742160
    },
    {
      "epoch": 2.8511157806536924,
      "grad_norm": 0.3428042232990265,
      "learning_rate": 4.326087386253567e-06,
      "loss": 0.0142,
      "step": 1742180
    },
    {
      "epoch": 2.8511485110923456,
      "grad_norm": 0.09432175755500793,
      "learning_rate": 4.326021494040049e-06,
      "loss": 0.01,
      "step": 1742200
    },
    {
      "epoch": 2.851181241530999,
      "grad_norm": 0.15297266840934753,
      "learning_rate": 4.325955601826532e-06,
      "loss": 0.0144,
      "step": 1742220
    },
    {
      "epoch": 2.8512139719696523,
      "grad_norm": 0.24473440647125244,
      "learning_rate": 4.325889709613016e-06,
      "loss": 0.0161,
      "step": 1742240
    },
    {
      "epoch": 2.8512467024083055,
      "grad_norm": 0.4897414743900299,
      "learning_rate": 4.3258238173994985e-06,
      "loss": 0.0144,
      "step": 1742260
    },
    {
      "epoch": 2.851279432846959,
      "grad_norm": 0.8097198605537415,
      "learning_rate": 4.325757925185981e-06,
      "loss": 0.0137,
      "step": 1742280
    },
    {
      "epoch": 2.851312163285612,
      "grad_norm": 0.11588386446237564,
      "learning_rate": 4.325692032972464e-06,
      "loss": 0.0222,
      "step": 1742300
    },
    {
      "epoch": 2.851344893724266,
      "grad_norm": 0.18101149797439575,
      "learning_rate": 4.325626140758947e-06,
      "loss": 0.0089,
      "step": 1742320
    },
    {
      "epoch": 2.851377624162919,
      "grad_norm": 1.3593555688858032,
      "learning_rate": 4.3255602485454294e-06,
      "loss": 0.0143,
      "step": 1742340
    },
    {
      "epoch": 2.8514103546015725,
      "grad_norm": 0.2245534360408783,
      "learning_rate": 4.325494356331912e-06,
      "loss": 0.0188,
      "step": 1742360
    },
    {
      "epoch": 2.8514430850402257,
      "grad_norm": 0.364946573972702,
      "learning_rate": 4.325428464118395e-06,
      "loss": 0.0168,
      "step": 1742380
    },
    {
      "epoch": 2.851475815478879,
      "grad_norm": 0.11287946254014969,
      "learning_rate": 4.3253625719048785e-06,
      "loss": 0.0141,
      "step": 1742400
    },
    {
      "epoch": 2.8515085459175324,
      "grad_norm": 0.32795098423957825,
      "learning_rate": 4.325296679691361e-06,
      "loss": 0.0202,
      "step": 1742420
    },
    {
      "epoch": 2.8515412763561856,
      "grad_norm": 0.5475580096244812,
      "learning_rate": 4.325230787477844e-06,
      "loss": 0.0135,
      "step": 1742440
    },
    {
      "epoch": 2.851574006794839,
      "grad_norm": 1.2292476892471313,
      "learning_rate": 4.325164895264327e-06,
      "loss": 0.0184,
      "step": 1742460
    },
    {
      "epoch": 2.8516067372334923,
      "grad_norm": 0.275900661945343,
      "learning_rate": 4.32509900305081e-06,
      "loss": 0.013,
      "step": 1742480
    },
    {
      "epoch": 2.851639467672146,
      "grad_norm": 0.23366020619869232,
      "learning_rate": 4.325033110837293e-06,
      "loss": 0.0097,
      "step": 1742500
    },
    {
      "epoch": 2.851672198110799,
      "grad_norm": 0.34372055530548096,
      "learning_rate": 4.324967218623776e-06,
      "loss": 0.0119,
      "step": 1742520
    },
    {
      "epoch": 2.8517049285494522,
      "grad_norm": 0.527757465839386,
      "learning_rate": 4.3249013264102585e-06,
      "loss": 0.0123,
      "step": 1742540
    },
    {
      "epoch": 2.851737658988106,
      "grad_norm": 0.19433985650539398,
      "learning_rate": 4.324835434196741e-06,
      "loss": 0.0213,
      "step": 1742560
    },
    {
      "epoch": 2.851770389426759,
      "grad_norm": 0.3317295014858246,
      "learning_rate": 4.324769541983224e-06,
      "loss": 0.0156,
      "step": 1742580
    },
    {
      "epoch": 2.8518031198654126,
      "grad_norm": 0.33650362491607666,
      "learning_rate": 4.324703649769707e-06,
      "loss": 0.0109,
      "step": 1742600
    },
    {
      "epoch": 2.8518358503040657,
      "grad_norm": 0.22992128133773804,
      "learning_rate": 4.3246377575561895e-06,
      "loss": 0.0124,
      "step": 1742620
    },
    {
      "epoch": 2.8518685807427193,
      "grad_norm": 0.5532218217849731,
      "learning_rate": 4.324571865342673e-06,
      "loss": 0.0129,
      "step": 1742640
    },
    {
      "epoch": 2.8519013111813725,
      "grad_norm": 0.07901465147733688,
      "learning_rate": 4.324505973129156e-06,
      "loss": 0.0134,
      "step": 1742660
    },
    {
      "epoch": 2.8519340416200256,
      "grad_norm": 0.6232296228408813,
      "learning_rate": 4.3244400809156386e-06,
      "loss": 0.0152,
      "step": 1742680
    },
    {
      "epoch": 2.851966772058679,
      "grad_norm": 0.4890267252922058,
      "learning_rate": 4.324374188702121e-06,
      "loss": 0.0097,
      "step": 1742700
    },
    {
      "epoch": 2.8519995024973324,
      "grad_norm": 0.16295574605464935,
      "learning_rate": 4.324308296488604e-06,
      "loss": 0.0074,
      "step": 1742720
    },
    {
      "epoch": 2.852032232935986,
      "grad_norm": 0.15426123142242432,
      "learning_rate": 4.324242404275087e-06,
      "loss": 0.0133,
      "step": 1742740
    },
    {
      "epoch": 2.852064963374639,
      "grad_norm": 0.15857502818107605,
      "learning_rate": 4.32417651206157e-06,
      "loss": 0.011,
      "step": 1742760
    },
    {
      "epoch": 2.8520976938132927,
      "grad_norm": 0.3599041700363159,
      "learning_rate": 4.324110619848053e-06,
      "loss": 0.0122,
      "step": 1742780
    },
    {
      "epoch": 2.852130424251946,
      "grad_norm": 0.6718965768814087,
      "learning_rate": 4.324044727634536e-06,
      "loss": 0.0177,
      "step": 1742800
    },
    {
      "epoch": 2.852163154690599,
      "grad_norm": 0.49225884675979614,
      "learning_rate": 4.323978835421019e-06,
      "loss": 0.0148,
      "step": 1742820
    },
    {
      "epoch": 2.8521958851292526,
      "grad_norm": 0.2398253232240677,
      "learning_rate": 4.323912943207501e-06,
      "loss": 0.018,
      "step": 1742840
    },
    {
      "epoch": 2.8522286155679057,
      "grad_norm": 0.2916719615459442,
      "learning_rate": 4.323847050993985e-06,
      "loss": 0.0133,
      "step": 1742860
    },
    {
      "epoch": 2.8522613460065593,
      "grad_norm": 0.4221230149269104,
      "learning_rate": 4.323781158780468e-06,
      "loss": 0.0176,
      "step": 1742880
    },
    {
      "epoch": 2.8522940764452125,
      "grad_norm": 0.22625060379505157,
      "learning_rate": 4.32371526656695e-06,
      "loss": 0.0116,
      "step": 1742900
    },
    {
      "epoch": 2.852326806883866,
      "grad_norm": 0.15696105360984802,
      "learning_rate": 4.323649374353433e-06,
      "loss": 0.015,
      "step": 1742920
    },
    {
      "epoch": 2.8523595373225192,
      "grad_norm": 0.38624536991119385,
      "learning_rate": 4.323583482139916e-06,
      "loss": 0.0147,
      "step": 1742940
    },
    {
      "epoch": 2.8523922677611724,
      "grad_norm": 0.16418154537677765,
      "learning_rate": 4.323517589926399e-06,
      "loss": 0.0172,
      "step": 1742960
    },
    {
      "epoch": 2.852424998199826,
      "grad_norm": 0.24513740837574005,
      "learning_rate": 4.323451697712881e-06,
      "loss": 0.0132,
      "step": 1742980
    },
    {
      "epoch": 2.852457728638479,
      "grad_norm": 0.15861240029335022,
      "learning_rate": 4.323385805499364e-06,
      "loss": 0.0127,
      "step": 1743000
    },
    {
      "epoch": 2.8524904590771323,
      "grad_norm": 0.9456281661987305,
      "learning_rate": 4.323319913285847e-06,
      "loss": 0.0185,
      "step": 1743020
    },
    {
      "epoch": 2.852523189515786,
      "grad_norm": 0.5268593430519104,
      "learning_rate": 4.32325402107233e-06,
      "loss": 0.0143,
      "step": 1743040
    },
    {
      "epoch": 2.8525559199544395,
      "grad_norm": 0.9805175065994263,
      "learning_rate": 4.323188128858813e-06,
      "loss": 0.0085,
      "step": 1743060
    },
    {
      "epoch": 2.8525886503930926,
      "grad_norm": 0.1942087560892105,
      "learning_rate": 4.323122236645296e-06,
      "loss": 0.0179,
      "step": 1743080
    },
    {
      "epoch": 2.8526213808317458,
      "grad_norm": 0.19417017698287964,
      "learning_rate": 4.323056344431779e-06,
      "loss": 0.0161,
      "step": 1743100
    },
    {
      "epoch": 2.8526541112703994,
      "grad_norm": 2.2691681385040283,
      "learning_rate": 4.322990452218262e-06,
      "loss": 0.014,
      "step": 1743120
    },
    {
      "epoch": 2.8526868417090525,
      "grad_norm": 0.20101621747016907,
      "learning_rate": 4.322924560004745e-06,
      "loss": 0.0091,
      "step": 1743140
    },
    {
      "epoch": 2.8527195721477057,
      "grad_norm": 0.2548498213291168,
      "learning_rate": 4.322858667791228e-06,
      "loss": 0.0185,
      "step": 1743160
    },
    {
      "epoch": 2.8527523025863593,
      "grad_norm": 0.3438771069049835,
      "learning_rate": 4.3227927755777104e-06,
      "loss": 0.0148,
      "step": 1743180
    },
    {
      "epoch": 2.852785033025013,
      "grad_norm": 0.29651233553886414,
      "learning_rate": 4.322726883364193e-06,
      "loss": 0.0109,
      "step": 1743200
    },
    {
      "epoch": 2.852817763463666,
      "grad_norm": 0.502449095249176,
      "learning_rate": 4.322660991150676e-06,
      "loss": 0.0156,
      "step": 1743220
    },
    {
      "epoch": 2.852850493902319,
      "grad_norm": 0.31935691833496094,
      "learning_rate": 4.322595098937159e-06,
      "loss": 0.0138,
      "step": 1743240
    },
    {
      "epoch": 2.8528832243409727,
      "grad_norm": 0.23089155554771423,
      "learning_rate": 4.322529206723642e-06,
      "loss": 0.0161,
      "step": 1743260
    },
    {
      "epoch": 2.852915954779626,
      "grad_norm": 0.15713892877101898,
      "learning_rate": 4.322463314510125e-06,
      "loss": 0.0132,
      "step": 1743280
    },
    {
      "epoch": 2.852948685218279,
      "grad_norm": 0.2994137704372406,
      "learning_rate": 4.322397422296608e-06,
      "loss": 0.0115,
      "step": 1743300
    },
    {
      "epoch": 2.8529814156569326,
      "grad_norm": 0.8992346525192261,
      "learning_rate": 4.3223315300830905e-06,
      "loss": 0.0109,
      "step": 1743320
    },
    {
      "epoch": 2.8530141460955862,
      "grad_norm": 0.522698163986206,
      "learning_rate": 4.322265637869573e-06,
      "loss": 0.0119,
      "step": 1743340
    },
    {
      "epoch": 2.8530468765342394,
      "grad_norm": 2.6085116863250732,
      "learning_rate": 4.322199745656056e-06,
      "loss": 0.017,
      "step": 1743360
    },
    {
      "epoch": 2.8530796069728925,
      "grad_norm": 0.28106629848480225,
      "learning_rate": 4.322133853442539e-06,
      "loss": 0.0122,
      "step": 1743380
    },
    {
      "epoch": 2.853112337411546,
      "grad_norm": 0.09036995470523834,
      "learning_rate": 4.322067961229021e-06,
      "loss": 0.0167,
      "step": 1743400
    },
    {
      "epoch": 2.8531450678501993,
      "grad_norm": 0.7298551201820374,
      "learning_rate": 4.322002069015504e-06,
      "loss": 0.0151,
      "step": 1743420
    },
    {
      "epoch": 2.8531777982888524,
      "grad_norm": 0.5430260300636292,
      "learning_rate": 4.321936176801988e-06,
      "loss": 0.0129,
      "step": 1743440
    },
    {
      "epoch": 2.853210528727506,
      "grad_norm": 0.375791072845459,
      "learning_rate": 4.3218702845884705e-06,
      "loss": 0.0125,
      "step": 1743460
    },
    {
      "epoch": 2.8532432591661596,
      "grad_norm": 0.515669047832489,
      "learning_rate": 4.321804392374953e-06,
      "loss": 0.0112,
      "step": 1743480
    },
    {
      "epoch": 2.8532759896048128,
      "grad_norm": 0.7820332646369934,
      "learning_rate": 4.321738500161437e-06,
      "loss": 0.0128,
      "step": 1743500
    },
    {
      "epoch": 2.853308720043466,
      "grad_norm": 2.366891860961914,
      "learning_rate": 4.3216726079479196e-06,
      "loss": 0.0096,
      "step": 1743520
    },
    {
      "epoch": 2.8533414504821195,
      "grad_norm": 0.3643934428691864,
      "learning_rate": 4.321606715734402e-06,
      "loss": 0.0098,
      "step": 1743540
    },
    {
      "epoch": 2.8533741809207727,
      "grad_norm": 0.16593290865421295,
      "learning_rate": 4.321540823520885e-06,
      "loss": 0.0123,
      "step": 1743560
    },
    {
      "epoch": 2.853406911359426,
      "grad_norm": 0.257371187210083,
      "learning_rate": 4.321474931307368e-06,
      "loss": 0.0175,
      "step": 1743580
    },
    {
      "epoch": 2.8534396417980794,
      "grad_norm": 0.3162866532802582,
      "learning_rate": 4.3214090390938505e-06,
      "loss": 0.0143,
      "step": 1743600
    },
    {
      "epoch": 2.8534723722367326,
      "grad_norm": 0.7630677223205566,
      "learning_rate": 4.321343146880333e-06,
      "loss": 0.0089,
      "step": 1743620
    },
    {
      "epoch": 2.853505102675386,
      "grad_norm": 0.29897215962409973,
      "learning_rate": 4.321277254666816e-06,
      "loss": 0.0139,
      "step": 1743640
    },
    {
      "epoch": 2.8535378331140393,
      "grad_norm": 0.39017072319984436,
      "learning_rate": 4.3212113624533e-06,
      "loss": 0.0207,
      "step": 1743660
    },
    {
      "epoch": 2.853570563552693,
      "grad_norm": 0.1572885513305664,
      "learning_rate": 4.321145470239782e-06,
      "loss": 0.0158,
      "step": 1743680
    },
    {
      "epoch": 2.853603293991346,
      "grad_norm": 0.5401336550712585,
      "learning_rate": 4.321079578026265e-06,
      "loss": 0.0098,
      "step": 1743700
    },
    {
      "epoch": 2.853636024429999,
      "grad_norm": 0.141832172870636,
      "learning_rate": 4.321013685812748e-06,
      "loss": 0.0139,
      "step": 1743720
    },
    {
      "epoch": 2.853668754868653,
      "grad_norm": 0.9130141735076904,
      "learning_rate": 4.3209477935992305e-06,
      "loss": 0.0154,
      "step": 1743740
    },
    {
      "epoch": 2.853701485307306,
      "grad_norm": 0.11230380088090897,
      "learning_rate": 4.320881901385713e-06,
      "loss": 0.0158,
      "step": 1743760
    },
    {
      "epoch": 2.8537342157459595,
      "grad_norm": 1.4387359619140625,
      "learning_rate": 4.320816009172196e-06,
      "loss": 0.0142,
      "step": 1743780
    },
    {
      "epoch": 2.8537669461846127,
      "grad_norm": 0.2202482670545578,
      "learning_rate": 4.320750116958679e-06,
      "loss": 0.0145,
      "step": 1743800
    },
    {
      "epoch": 2.8537996766232663,
      "grad_norm": 0.37336963415145874,
      "learning_rate": 4.320684224745162e-06,
      "loss": 0.0121,
      "step": 1743820
    },
    {
      "epoch": 2.8538324070619194,
      "grad_norm": 0.19479212164878845,
      "learning_rate": 4.320618332531645e-06,
      "loss": 0.0106,
      "step": 1743840
    },
    {
      "epoch": 2.8538651375005726,
      "grad_norm": 0.4318169057369232,
      "learning_rate": 4.320552440318128e-06,
      "loss": 0.0119,
      "step": 1743860
    },
    {
      "epoch": 2.853897867939226,
      "grad_norm": 5.564185619354248,
      "learning_rate": 4.320486548104611e-06,
      "loss": 0.0131,
      "step": 1743880
    },
    {
      "epoch": 2.8539305983778793,
      "grad_norm": 1.061356544494629,
      "learning_rate": 4.320420655891094e-06,
      "loss": 0.0162,
      "step": 1743900
    },
    {
      "epoch": 2.853963328816533,
      "grad_norm": 0.34730592370033264,
      "learning_rate": 4.320354763677577e-06,
      "loss": 0.0134,
      "step": 1743920
    },
    {
      "epoch": 2.853996059255186,
      "grad_norm": 0.3831181526184082,
      "learning_rate": 4.32028887146406e-06,
      "loss": 0.0137,
      "step": 1743940
    },
    {
      "epoch": 2.8540287896938397,
      "grad_norm": 0.23722508549690247,
      "learning_rate": 4.320222979250542e-06,
      "loss": 0.0176,
      "step": 1743960
    },
    {
      "epoch": 2.854061520132493,
      "grad_norm": 0.8840737342834473,
      "learning_rate": 4.320157087037025e-06,
      "loss": 0.0182,
      "step": 1743980
    },
    {
      "epoch": 2.854094250571146,
      "grad_norm": 0.5521458387374878,
      "learning_rate": 4.320091194823508e-06,
      "loss": 0.0088,
      "step": 1744000
    },
    {
      "epoch": 2.8541269810097996,
      "grad_norm": 0.20207180082798004,
      "learning_rate": 4.320025302609991e-06,
      "loss": 0.0116,
      "step": 1744020
    },
    {
      "epoch": 2.8541597114484527,
      "grad_norm": 0.4740745723247528,
      "learning_rate": 4.319959410396473e-06,
      "loss": 0.0131,
      "step": 1744040
    },
    {
      "epoch": 2.8541924418871063,
      "grad_norm": 0.18962329626083374,
      "learning_rate": 4.319893518182957e-06,
      "loss": 0.0095,
      "step": 1744060
    },
    {
      "epoch": 2.8542251723257595,
      "grad_norm": 0.1987738013267517,
      "learning_rate": 4.31982762596944e-06,
      "loss": 0.0113,
      "step": 1744080
    },
    {
      "epoch": 2.854257902764413,
      "grad_norm": 0.5981836915016174,
      "learning_rate": 4.319761733755922e-06,
      "loss": 0.009,
      "step": 1744100
    },
    {
      "epoch": 2.854290633203066,
      "grad_norm": 0.44097644090652466,
      "learning_rate": 4.319695841542405e-06,
      "loss": 0.0133,
      "step": 1744120
    },
    {
      "epoch": 2.8543233636417193,
      "grad_norm": 0.6110021471977234,
      "learning_rate": 4.319629949328888e-06,
      "loss": 0.0125,
      "step": 1744140
    },
    {
      "epoch": 2.854356094080373,
      "grad_norm": 0.24361293017864227,
      "learning_rate": 4.3195640571153715e-06,
      "loss": 0.0139,
      "step": 1744160
    },
    {
      "epoch": 2.854388824519026,
      "grad_norm": 0.7688785791397095,
      "learning_rate": 4.319498164901854e-06,
      "loss": 0.0143,
      "step": 1744180
    },
    {
      "epoch": 2.8544215549576797,
      "grad_norm": 0.19503247737884521,
      "learning_rate": 4.319432272688337e-06,
      "loss": 0.0119,
      "step": 1744200
    },
    {
      "epoch": 2.854454285396333,
      "grad_norm": 1.010672926902771,
      "learning_rate": 4.31936638047482e-06,
      "loss": 0.0154,
      "step": 1744220
    },
    {
      "epoch": 2.8544870158349864,
      "grad_norm": 0.36343225836753845,
      "learning_rate": 4.319300488261302e-06,
      "loss": 0.0101,
      "step": 1744240
    },
    {
      "epoch": 2.8545197462736396,
      "grad_norm": 0.7238497138023376,
      "learning_rate": 4.319234596047785e-06,
      "loss": 0.0128,
      "step": 1744260
    },
    {
      "epoch": 2.8545524767122927,
      "grad_norm": 0.28218725323677063,
      "learning_rate": 4.319168703834269e-06,
      "loss": 0.0191,
      "step": 1744280
    },
    {
      "epoch": 2.8545852071509463,
      "grad_norm": 0.12858325242996216,
      "learning_rate": 4.3191028116207515e-06,
      "loss": 0.0164,
      "step": 1744300
    },
    {
      "epoch": 2.8546179375895995,
      "grad_norm": 0.2436765730381012,
      "learning_rate": 4.319036919407234e-06,
      "loss": 0.011,
      "step": 1744320
    },
    {
      "epoch": 2.854650668028253,
      "grad_norm": 0.36803561449050903,
      "learning_rate": 4.318971027193717e-06,
      "loss": 0.0121,
      "step": 1744340
    },
    {
      "epoch": 2.854683398466906,
      "grad_norm": 0.2978035509586334,
      "learning_rate": 4.3189051349802e-06,
      "loss": 0.0153,
      "step": 1744360
    },
    {
      "epoch": 2.85471612890556,
      "grad_norm": 1.302018404006958,
      "learning_rate": 4.3188392427666824e-06,
      "loss": 0.0118,
      "step": 1744380
    },
    {
      "epoch": 2.854748859344213,
      "grad_norm": 0.11153396219015121,
      "learning_rate": 4.318773350553165e-06,
      "loss": 0.0118,
      "step": 1744400
    },
    {
      "epoch": 2.854781589782866,
      "grad_norm": 0.3462645411491394,
      "learning_rate": 4.318707458339648e-06,
      "loss": 0.0093,
      "step": 1744420
    },
    {
      "epoch": 2.8548143202215197,
      "grad_norm": 0.5368784070014954,
      "learning_rate": 4.318641566126131e-06,
      "loss": 0.011,
      "step": 1744440
    },
    {
      "epoch": 2.854847050660173,
      "grad_norm": 0.22358998656272888,
      "learning_rate": 4.318575673912614e-06,
      "loss": 0.0145,
      "step": 1744460
    },
    {
      "epoch": 2.8548797810988265,
      "grad_norm": 0.14381073415279388,
      "learning_rate": 4.318509781699097e-06,
      "loss": 0.0091,
      "step": 1744480
    },
    {
      "epoch": 2.8549125115374796,
      "grad_norm": 0.9422749280929565,
      "learning_rate": 4.31844388948558e-06,
      "loss": 0.0146,
      "step": 1744500
    },
    {
      "epoch": 2.854945241976133,
      "grad_norm": 0.3086368441581726,
      "learning_rate": 4.318377997272063e-06,
      "loss": 0.0154,
      "step": 1744520
    },
    {
      "epoch": 2.8549779724147863,
      "grad_norm": 0.04702316224575043,
      "learning_rate": 4.318312105058546e-06,
      "loss": 0.0194,
      "step": 1744540
    },
    {
      "epoch": 2.8550107028534395,
      "grad_norm": 0.681300163269043,
      "learning_rate": 4.318246212845029e-06,
      "loss": 0.0164,
      "step": 1744560
    },
    {
      "epoch": 2.855043433292093,
      "grad_norm": 0.8684307932853699,
      "learning_rate": 4.3181803206315115e-06,
      "loss": 0.0126,
      "step": 1744580
    },
    {
      "epoch": 2.8550761637307462,
      "grad_norm": 0.3422645032405853,
      "learning_rate": 4.318114428417994e-06,
      "loss": 0.0168,
      "step": 1744600
    },
    {
      "epoch": 2.8551088941693994,
      "grad_norm": 0.21348926424980164,
      "learning_rate": 4.318048536204477e-06,
      "loss": 0.0118,
      "step": 1744620
    },
    {
      "epoch": 2.855141624608053,
      "grad_norm": 0.40728193521499634,
      "learning_rate": 4.31798264399096e-06,
      "loss": 0.0188,
      "step": 1744640
    },
    {
      "epoch": 2.8551743550467066,
      "grad_norm": 0.5370692014694214,
      "learning_rate": 4.3179167517774425e-06,
      "loss": 0.0164,
      "step": 1744660
    },
    {
      "epoch": 2.8552070854853597,
      "grad_norm": 0.6702713966369629,
      "learning_rate": 4.317850859563926e-06,
      "loss": 0.0163,
      "step": 1744680
    },
    {
      "epoch": 2.855239815924013,
      "grad_norm": 0.2652173936367035,
      "learning_rate": 4.317784967350409e-06,
      "loss": 0.0102,
      "step": 1744700
    },
    {
      "epoch": 2.8552725463626665,
      "grad_norm": 0.32107776403427124,
      "learning_rate": 4.3177190751368916e-06,
      "loss": 0.0221,
      "step": 1744720
    },
    {
      "epoch": 2.8553052768013196,
      "grad_norm": 0.34126725792884827,
      "learning_rate": 4.317653182923374e-06,
      "loss": 0.0085,
      "step": 1744740
    },
    {
      "epoch": 2.8553380072399728,
      "grad_norm": 0.17084959149360657,
      "learning_rate": 4.317587290709857e-06,
      "loss": 0.0104,
      "step": 1744760
    },
    {
      "epoch": 2.8553707376786264,
      "grad_norm": 0.38154104351997375,
      "learning_rate": 4.31752139849634e-06,
      "loss": 0.0172,
      "step": 1744780
    },
    {
      "epoch": 2.85540346811728,
      "grad_norm": 0.5121158957481384,
      "learning_rate": 4.3174555062828225e-06,
      "loss": 0.012,
      "step": 1744800
    },
    {
      "epoch": 2.855436198555933,
      "grad_norm": 0.22939792275428772,
      "learning_rate": 4.317389614069305e-06,
      "loss": 0.0112,
      "step": 1744820
    },
    {
      "epoch": 2.8554689289945863,
      "grad_norm": 0.1817111372947693,
      "learning_rate": 4.317323721855788e-06,
      "loss": 0.0144,
      "step": 1744840
    },
    {
      "epoch": 2.85550165943324,
      "grad_norm": 0.3325262665748596,
      "learning_rate": 4.317257829642272e-06,
      "loss": 0.0113,
      "step": 1744860
    },
    {
      "epoch": 2.855534389871893,
      "grad_norm": 0.5990676879882812,
      "learning_rate": 4.317191937428754e-06,
      "loss": 0.0143,
      "step": 1744880
    },
    {
      "epoch": 2.855567120310546,
      "grad_norm": 0.9520576596260071,
      "learning_rate": 4.317126045215237e-06,
      "loss": 0.0116,
      "step": 1744900
    },
    {
      "epoch": 2.8555998507491998,
      "grad_norm": 0.058159057050943375,
      "learning_rate": 4.317060153001721e-06,
      "loss": 0.014,
      "step": 1744920
    },
    {
      "epoch": 2.8556325811878533,
      "grad_norm": 0.11346197128295898,
      "learning_rate": 4.316994260788203e-06,
      "loss": 0.0138,
      "step": 1744940
    },
    {
      "epoch": 2.8556653116265065,
      "grad_norm": 0.35467246174812317,
      "learning_rate": 4.316928368574686e-06,
      "loss": 0.014,
      "step": 1744960
    },
    {
      "epoch": 2.8556980420651596,
      "grad_norm": 0.23482131958007812,
      "learning_rate": 4.316862476361169e-06,
      "loss": 0.0192,
      "step": 1744980
    },
    {
      "epoch": 2.8557307725038132,
      "grad_norm": 0.7891115546226501,
      "learning_rate": 4.316796584147652e-06,
      "loss": 0.0139,
      "step": 1745000
    },
    {
      "epoch": 2.8557635029424664,
      "grad_norm": 0.1865149736404419,
      "learning_rate": 4.316730691934134e-06,
      "loss": 0.0159,
      "step": 1745020
    },
    {
      "epoch": 2.8557962333811195,
      "grad_norm": 0.5466490387916565,
      "learning_rate": 4.316664799720617e-06,
      "loss": 0.0177,
      "step": 1745040
    },
    {
      "epoch": 2.855828963819773,
      "grad_norm": 0.40925636887550354,
      "learning_rate": 4.3165989075071e-06,
      "loss": 0.0108,
      "step": 1745060
    },
    {
      "epoch": 2.8558616942584263,
      "grad_norm": 0.22740601003170013,
      "learning_rate": 4.316533015293583e-06,
      "loss": 0.0097,
      "step": 1745080
    },
    {
      "epoch": 2.85589442469708,
      "grad_norm": 0.15309292078018188,
      "learning_rate": 4.316467123080066e-06,
      "loss": 0.0089,
      "step": 1745100
    },
    {
      "epoch": 2.855927155135733,
      "grad_norm": 0.5293640494346619,
      "learning_rate": 4.316401230866549e-06,
      "loss": 0.0128,
      "step": 1745120
    },
    {
      "epoch": 2.8559598855743866,
      "grad_norm": 0.5591891407966614,
      "learning_rate": 4.316335338653032e-06,
      "loss": 0.0151,
      "step": 1745140
    },
    {
      "epoch": 2.8559926160130398,
      "grad_norm": 0.4498472809791565,
      "learning_rate": 4.316269446439514e-06,
      "loss": 0.0092,
      "step": 1745160
    },
    {
      "epoch": 2.856025346451693,
      "grad_norm": 0.2752664089202881,
      "learning_rate": 4.316203554225997e-06,
      "loss": 0.0089,
      "step": 1745180
    },
    {
      "epoch": 2.8560580768903465,
      "grad_norm": 0.370908260345459,
      "learning_rate": 4.31613766201248e-06,
      "loss": 0.0151,
      "step": 1745200
    },
    {
      "epoch": 2.8560908073289997,
      "grad_norm": 0.19205211102962494,
      "learning_rate": 4.3160717697989634e-06,
      "loss": 0.0126,
      "step": 1745220
    },
    {
      "epoch": 2.8561235377676533,
      "grad_norm": 0.2228369414806366,
      "learning_rate": 4.316005877585446e-06,
      "loss": 0.0124,
      "step": 1745240
    },
    {
      "epoch": 2.8561562682063064,
      "grad_norm": 0.42711353302001953,
      "learning_rate": 4.315939985371929e-06,
      "loss": 0.0168,
      "step": 1745260
    },
    {
      "epoch": 2.85618899864496,
      "grad_norm": 0.18911714851856232,
      "learning_rate": 4.315874093158412e-06,
      "loss": 0.0117,
      "step": 1745280
    },
    {
      "epoch": 2.856221729083613,
      "grad_norm": 0.21288418769836426,
      "learning_rate": 4.315808200944895e-06,
      "loss": 0.0156,
      "step": 1745300
    },
    {
      "epoch": 2.8562544595222663,
      "grad_norm": 2.115419387817383,
      "learning_rate": 4.315742308731378e-06,
      "loss": 0.0191,
      "step": 1745320
    },
    {
      "epoch": 2.85628718996092,
      "grad_norm": 0.44268909096717834,
      "learning_rate": 4.315676416517861e-06,
      "loss": 0.0152,
      "step": 1745340
    },
    {
      "epoch": 2.856319920399573,
      "grad_norm": 0.14576353132724762,
      "learning_rate": 4.3156105243043435e-06,
      "loss": 0.0132,
      "step": 1745360
    },
    {
      "epoch": 2.8563526508382266,
      "grad_norm": 0.7689421772956848,
      "learning_rate": 4.315544632090826e-06,
      "loss": 0.0118,
      "step": 1745380
    },
    {
      "epoch": 2.85638538127688,
      "grad_norm": 0.30372002720832825,
      "learning_rate": 4.315478739877309e-06,
      "loss": 0.0108,
      "step": 1745400
    },
    {
      "epoch": 2.8564181117155334,
      "grad_norm": 0.8240489959716797,
      "learning_rate": 4.315412847663792e-06,
      "loss": 0.0124,
      "step": 1745420
    },
    {
      "epoch": 2.8564508421541865,
      "grad_norm": 0.34206050634384155,
      "learning_rate": 4.3153469554502744e-06,
      "loss": 0.0147,
      "step": 1745440
    },
    {
      "epoch": 2.8564835725928397,
      "grad_norm": 0.5997934937477112,
      "learning_rate": 4.315281063236757e-06,
      "loss": 0.0218,
      "step": 1745460
    },
    {
      "epoch": 2.8565163030314933,
      "grad_norm": 0.41643503308296204,
      "learning_rate": 4.315215171023241e-06,
      "loss": 0.0146,
      "step": 1745480
    },
    {
      "epoch": 2.8565490334701464,
      "grad_norm": 0.20299793779850006,
      "learning_rate": 4.3151492788097235e-06,
      "loss": 0.0132,
      "step": 1745500
    },
    {
      "epoch": 2.8565817639088,
      "grad_norm": 0.47585248947143555,
      "learning_rate": 4.315083386596206e-06,
      "loss": 0.0138,
      "step": 1745520
    },
    {
      "epoch": 2.856614494347453,
      "grad_norm": 0.14862743020057678,
      "learning_rate": 4.315017494382689e-06,
      "loss": 0.0128,
      "step": 1745540
    },
    {
      "epoch": 2.8566472247861068,
      "grad_norm": 0.20946872234344482,
      "learning_rate": 4.314951602169172e-06,
      "loss": 0.0155,
      "step": 1745560
    },
    {
      "epoch": 2.85667995522476,
      "grad_norm": 0.422298789024353,
      "learning_rate": 4.314885709955655e-06,
      "loss": 0.0167,
      "step": 1745580
    },
    {
      "epoch": 2.856712685663413,
      "grad_norm": 0.16031821072101593,
      "learning_rate": 4.314819817742138e-06,
      "loss": 0.0108,
      "step": 1745600
    },
    {
      "epoch": 2.8567454161020667,
      "grad_norm": 0.8064621686935425,
      "learning_rate": 4.314753925528621e-06,
      "loss": 0.016,
      "step": 1745620
    },
    {
      "epoch": 2.85677814654072,
      "grad_norm": 0.297951877117157,
      "learning_rate": 4.3146880333151035e-06,
      "loss": 0.0104,
      "step": 1745640
    },
    {
      "epoch": 2.8568108769793734,
      "grad_norm": 0.1812669038772583,
      "learning_rate": 4.314622141101586e-06,
      "loss": 0.0135,
      "step": 1745660
    },
    {
      "epoch": 2.8568436074180266,
      "grad_norm": 0.6975072622299194,
      "learning_rate": 4.314556248888069e-06,
      "loss": 0.014,
      "step": 1745680
    },
    {
      "epoch": 2.85687633785668,
      "grad_norm": 0.3430289030075073,
      "learning_rate": 4.314490356674553e-06,
      "loss": 0.0127,
      "step": 1745700
    },
    {
      "epoch": 2.8569090682953333,
      "grad_norm": 0.3399984538555145,
      "learning_rate": 4.314424464461035e-06,
      "loss": 0.0161,
      "step": 1745720
    },
    {
      "epoch": 2.8569417987339865,
      "grad_norm": 0.4864431321620941,
      "learning_rate": 4.314358572247518e-06,
      "loss": 0.0121,
      "step": 1745740
    },
    {
      "epoch": 2.85697452917264,
      "grad_norm": 0.27410995960235596,
      "learning_rate": 4.314292680034001e-06,
      "loss": 0.0155,
      "step": 1745760
    },
    {
      "epoch": 2.857007259611293,
      "grad_norm": 0.31558284163475037,
      "learning_rate": 4.3142267878204835e-06,
      "loss": 0.0176,
      "step": 1745780
    },
    {
      "epoch": 2.857039990049947,
      "grad_norm": 0.15397346019744873,
      "learning_rate": 4.314160895606966e-06,
      "loss": 0.0106,
      "step": 1745800
    },
    {
      "epoch": 2.8570727204886,
      "grad_norm": 0.47676292061805725,
      "learning_rate": 4.314095003393449e-06,
      "loss": 0.0098,
      "step": 1745820
    },
    {
      "epoch": 2.8571054509272535,
      "grad_norm": 0.3890053927898407,
      "learning_rate": 4.314029111179932e-06,
      "loss": 0.0147,
      "step": 1745840
    },
    {
      "epoch": 2.8571381813659067,
      "grad_norm": 0.2633688151836395,
      "learning_rate": 4.3139632189664145e-06,
      "loss": 0.0198,
      "step": 1745860
    },
    {
      "epoch": 2.85717091180456,
      "grad_norm": 0.3764120936393738,
      "learning_rate": 4.313897326752898e-06,
      "loss": 0.0145,
      "step": 1745880
    },
    {
      "epoch": 2.8572036422432134,
      "grad_norm": 0.1798872947692871,
      "learning_rate": 4.313831434539381e-06,
      "loss": 0.0183,
      "step": 1745900
    },
    {
      "epoch": 2.8572363726818666,
      "grad_norm": 0.3118186891078949,
      "learning_rate": 4.3137655423258636e-06,
      "loss": 0.0132,
      "step": 1745920
    },
    {
      "epoch": 2.85726910312052,
      "grad_norm": 0.661341667175293,
      "learning_rate": 4.313699650112347e-06,
      "loss": 0.0174,
      "step": 1745940
    },
    {
      "epoch": 2.8573018335591733,
      "grad_norm": 0.2474614679813385,
      "learning_rate": 4.31363375789883e-06,
      "loss": 0.0162,
      "step": 1745960
    },
    {
      "epoch": 2.857334563997827,
      "grad_norm": 0.23271581530570984,
      "learning_rate": 4.313567865685313e-06,
      "loss": 0.0176,
      "step": 1745980
    },
    {
      "epoch": 2.85736729443648,
      "grad_norm": 0.04719262570142746,
      "learning_rate": 4.313501973471795e-06,
      "loss": 0.0181,
      "step": 1746000
    },
    {
      "epoch": 2.8574000248751332,
      "grad_norm": 0.8194255232810974,
      "learning_rate": 4.313436081258278e-06,
      "loss": 0.0154,
      "step": 1746020
    },
    {
      "epoch": 2.857432755313787,
      "grad_norm": 0.9536597728729248,
      "learning_rate": 4.313370189044761e-06,
      "loss": 0.0132,
      "step": 1746040
    },
    {
      "epoch": 2.85746548575244,
      "grad_norm": 2.0238635540008545,
      "learning_rate": 4.313304296831244e-06,
      "loss": 0.013,
      "step": 1746060
    },
    {
      "epoch": 2.857498216191093,
      "grad_norm": 0.3592715859413147,
      "learning_rate": 4.313238404617726e-06,
      "loss": 0.0121,
      "step": 1746080
    },
    {
      "epoch": 2.8575309466297467,
      "grad_norm": 0.41815951466560364,
      "learning_rate": 4.31317251240421e-06,
      "loss": 0.0121,
      "step": 1746100
    },
    {
      "epoch": 2.8575636770684003,
      "grad_norm": 0.3238433599472046,
      "learning_rate": 4.313106620190693e-06,
      "loss": 0.0121,
      "step": 1746120
    },
    {
      "epoch": 2.8575964075070535,
      "grad_norm": 0.3912450969219208,
      "learning_rate": 4.313040727977175e-06,
      "loss": 0.0168,
      "step": 1746140
    },
    {
      "epoch": 2.8576291379457066,
      "grad_norm": 0.4313991367816925,
      "learning_rate": 4.312974835763658e-06,
      "loss": 0.015,
      "step": 1746160
    },
    {
      "epoch": 2.85766186838436,
      "grad_norm": 0.1422739028930664,
      "learning_rate": 4.312908943550141e-06,
      "loss": 0.0091,
      "step": 1746180
    },
    {
      "epoch": 2.8576945988230134,
      "grad_norm": 0.12495771795511246,
      "learning_rate": 4.312843051336624e-06,
      "loss": 0.0133,
      "step": 1746200
    },
    {
      "epoch": 2.8577273292616665,
      "grad_norm": 0.26614823937416077,
      "learning_rate": 4.312777159123106e-06,
      "loss": 0.0155,
      "step": 1746220
    },
    {
      "epoch": 2.85776005970032,
      "grad_norm": 0.11188321560621262,
      "learning_rate": 4.312711266909589e-06,
      "loss": 0.011,
      "step": 1746240
    },
    {
      "epoch": 2.8577927901389737,
      "grad_norm": 0.2731846868991852,
      "learning_rate": 4.312645374696072e-06,
      "loss": 0.0153,
      "step": 1746260
    },
    {
      "epoch": 2.857825520577627,
      "grad_norm": 0.23678316175937653,
      "learning_rate": 4.3125794824825554e-06,
      "loss": 0.0103,
      "step": 1746280
    },
    {
      "epoch": 2.85785825101628,
      "grad_norm": 0.36706680059432983,
      "learning_rate": 4.312513590269038e-06,
      "loss": 0.0074,
      "step": 1746300
    },
    {
      "epoch": 2.8578909814549336,
      "grad_norm": 0.5563664436340332,
      "learning_rate": 4.312447698055521e-06,
      "loss": 0.0105,
      "step": 1746320
    },
    {
      "epoch": 2.8579237118935867,
      "grad_norm": 0.1672125607728958,
      "learning_rate": 4.3123818058420045e-06,
      "loss": 0.011,
      "step": 1746340
    },
    {
      "epoch": 2.85795644233224,
      "grad_norm": 0.3273845613002777,
      "learning_rate": 4.312315913628487e-06,
      "loss": 0.009,
      "step": 1746360
    },
    {
      "epoch": 2.8579891727708935,
      "grad_norm": 0.332274466753006,
      "learning_rate": 4.31225002141497e-06,
      "loss": 0.0113,
      "step": 1746380
    },
    {
      "epoch": 2.858021903209547,
      "grad_norm": 0.35314884781837463,
      "learning_rate": 4.312184129201453e-06,
      "loss": 0.0147,
      "step": 1746400
    },
    {
      "epoch": 2.8580546336482002,
      "grad_norm": 0.18383806943893433,
      "learning_rate": 4.3121182369879355e-06,
      "loss": 0.0117,
      "step": 1746420
    },
    {
      "epoch": 2.8580873640868534,
      "grad_norm": 0.3613533675670624,
      "learning_rate": 4.312052344774418e-06,
      "loss": 0.0152,
      "step": 1746440
    },
    {
      "epoch": 2.858120094525507,
      "grad_norm": 0.2823970317840576,
      "learning_rate": 4.311986452560901e-06,
      "loss": 0.0145,
      "step": 1746460
    },
    {
      "epoch": 2.85815282496416,
      "grad_norm": 0.46647876501083374,
      "learning_rate": 4.311920560347384e-06,
      "loss": 0.0087,
      "step": 1746480
    },
    {
      "epoch": 2.8581855554028133,
      "grad_norm": 0.142385333776474,
      "learning_rate": 4.311854668133867e-06,
      "loss": 0.0104,
      "step": 1746500
    },
    {
      "epoch": 2.858218285841467,
      "grad_norm": 0.18713217973709106,
      "learning_rate": 4.31178877592035e-06,
      "loss": 0.0153,
      "step": 1746520
    },
    {
      "epoch": 2.8582510162801205,
      "grad_norm": 0.24925771355628967,
      "learning_rate": 4.311722883706833e-06,
      "loss": 0.0116,
      "step": 1746540
    },
    {
      "epoch": 2.8582837467187736,
      "grad_norm": 0.511920690536499,
      "learning_rate": 4.3116569914933155e-06,
      "loss": 0.0176,
      "step": 1746560
    },
    {
      "epoch": 2.8583164771574268,
      "grad_norm": 0.6894045472145081,
      "learning_rate": 4.311591099279798e-06,
      "loss": 0.0139,
      "step": 1746580
    },
    {
      "epoch": 2.8583492075960804,
      "grad_norm": 0.5065536499023438,
      "learning_rate": 4.311525207066281e-06,
      "loss": 0.0154,
      "step": 1746600
    },
    {
      "epoch": 2.8583819380347335,
      "grad_norm": 0.25158995389938354,
      "learning_rate": 4.3114593148527645e-06,
      "loss": 0.0127,
      "step": 1746620
    },
    {
      "epoch": 2.8584146684733867,
      "grad_norm": 0.10517087578773499,
      "learning_rate": 4.311393422639247e-06,
      "loss": 0.0114,
      "step": 1746640
    },
    {
      "epoch": 2.8584473989120402,
      "grad_norm": 0.5705699920654297,
      "learning_rate": 4.31132753042573e-06,
      "loss": 0.0085,
      "step": 1746660
    },
    {
      "epoch": 2.8584801293506934,
      "grad_norm": 0.28011441230773926,
      "learning_rate": 4.311261638212213e-06,
      "loss": 0.0174,
      "step": 1746680
    },
    {
      "epoch": 2.858512859789347,
      "grad_norm": 0.37220504879951477,
      "learning_rate": 4.3111957459986955e-06,
      "loss": 0.0146,
      "step": 1746700
    },
    {
      "epoch": 2.858545590228,
      "grad_norm": 0.36496466398239136,
      "learning_rate": 4.311129853785179e-06,
      "loss": 0.0127,
      "step": 1746720
    },
    {
      "epoch": 2.8585783206666537,
      "grad_norm": 0.19074881076812744,
      "learning_rate": 4.311063961571662e-06,
      "loss": 0.0115,
      "step": 1746740
    },
    {
      "epoch": 2.858611051105307,
      "grad_norm": 1.3722327947616577,
      "learning_rate": 4.3109980693581446e-06,
      "loss": 0.0158,
      "step": 1746760
    },
    {
      "epoch": 2.85864378154396,
      "grad_norm": 0.15749028325080872,
      "learning_rate": 4.310932177144627e-06,
      "loss": 0.0127,
      "step": 1746780
    },
    {
      "epoch": 2.8586765119826136,
      "grad_norm": 0.29105013608932495,
      "learning_rate": 4.31086628493111e-06,
      "loss": 0.011,
      "step": 1746800
    },
    {
      "epoch": 2.858709242421267,
      "grad_norm": 0.24105854332447052,
      "learning_rate": 4.310800392717593e-06,
      "loss": 0.0125,
      "step": 1746820
    },
    {
      "epoch": 2.8587419728599204,
      "grad_norm": 0.33503544330596924,
      "learning_rate": 4.3107345005040755e-06,
      "loss": 0.0144,
      "step": 1746840
    },
    {
      "epoch": 2.8587747032985735,
      "grad_norm": 0.12200822681188583,
      "learning_rate": 4.310668608290558e-06,
      "loss": 0.012,
      "step": 1746860
    },
    {
      "epoch": 2.858807433737227,
      "grad_norm": 0.5236957669258118,
      "learning_rate": 4.310602716077041e-06,
      "loss": 0.011,
      "step": 1746880
    },
    {
      "epoch": 2.8588401641758803,
      "grad_norm": 0.22204044461250305,
      "learning_rate": 4.310536823863525e-06,
      "loss": 0.012,
      "step": 1746900
    },
    {
      "epoch": 2.8588728946145334,
      "grad_norm": 0.5903204083442688,
      "learning_rate": 4.310470931650007e-06,
      "loss": 0.0182,
      "step": 1746920
    },
    {
      "epoch": 2.858905625053187,
      "grad_norm": 0.38315942883491516,
      "learning_rate": 4.31040503943649e-06,
      "loss": 0.0093,
      "step": 1746940
    },
    {
      "epoch": 2.85893835549184,
      "grad_norm": 0.12821482121944427,
      "learning_rate": 4.310339147222973e-06,
      "loss": 0.015,
      "step": 1746960
    },
    {
      "epoch": 2.8589710859304938,
      "grad_norm": 0.24662958085536957,
      "learning_rate": 4.310273255009456e-06,
      "loss": 0.0079,
      "step": 1746980
    },
    {
      "epoch": 2.859003816369147,
      "grad_norm": 0.2148975431919098,
      "learning_rate": 4.310207362795939e-06,
      "loss": 0.0118,
      "step": 1747000
    },
    {
      "epoch": 2.8590365468078005,
      "grad_norm": 0.23170264065265656,
      "learning_rate": 4.310141470582422e-06,
      "loss": 0.0159,
      "step": 1747020
    },
    {
      "epoch": 2.8590692772464537,
      "grad_norm": 0.5039779543876648,
      "learning_rate": 4.310075578368905e-06,
      "loss": 0.0145,
      "step": 1747040
    },
    {
      "epoch": 2.859102007685107,
      "grad_norm": 2.9684715270996094,
      "learning_rate": 4.310009686155387e-06,
      "loss": 0.0115,
      "step": 1747060
    },
    {
      "epoch": 2.8591347381237604,
      "grad_norm": 0.10241150110960007,
      "learning_rate": 4.30994379394187e-06,
      "loss": 0.0188,
      "step": 1747080
    },
    {
      "epoch": 2.8591674685624135,
      "grad_norm": 0.5560650825500488,
      "learning_rate": 4.309877901728353e-06,
      "loss": 0.0181,
      "step": 1747100
    },
    {
      "epoch": 2.859200199001067,
      "grad_norm": 0.5005099177360535,
      "learning_rate": 4.3098120095148364e-06,
      "loss": 0.0104,
      "step": 1747120
    },
    {
      "epoch": 2.8592329294397203,
      "grad_norm": 0.06703861057758331,
      "learning_rate": 4.309746117301319e-06,
      "loss": 0.0124,
      "step": 1747140
    },
    {
      "epoch": 2.859265659878374,
      "grad_norm": 0.3583463728427887,
      "learning_rate": 4.309680225087802e-06,
      "loss": 0.0199,
      "step": 1747160
    },
    {
      "epoch": 2.859298390317027,
      "grad_norm": 0.8205341696739197,
      "learning_rate": 4.309614332874285e-06,
      "loss": 0.0176,
      "step": 1747180
    },
    {
      "epoch": 2.85933112075568,
      "grad_norm": 0.2955167591571808,
      "learning_rate": 4.309548440660767e-06,
      "loss": 0.0128,
      "step": 1747200
    },
    {
      "epoch": 2.859363851194334,
      "grad_norm": 0.15283729135990143,
      "learning_rate": 4.30948254844725e-06,
      "loss": 0.0104,
      "step": 1747220
    },
    {
      "epoch": 2.859396581632987,
      "grad_norm": 0.5173459649085999,
      "learning_rate": 4.309416656233733e-06,
      "loss": 0.0154,
      "step": 1747240
    },
    {
      "epoch": 2.8594293120716405,
      "grad_norm": 0.34277209639549255,
      "learning_rate": 4.309350764020216e-06,
      "loss": 0.0133,
      "step": 1747260
    },
    {
      "epoch": 2.8594620425102937,
      "grad_norm": 0.2951372265815735,
      "learning_rate": 4.309284871806698e-06,
      "loss": 0.0112,
      "step": 1747280
    },
    {
      "epoch": 2.8594947729489473,
      "grad_norm": 0.2249646782875061,
      "learning_rate": 4.309218979593182e-06,
      "loss": 0.0143,
      "step": 1747300
    },
    {
      "epoch": 2.8595275033876004,
      "grad_norm": 0.1415943056344986,
      "learning_rate": 4.309153087379665e-06,
      "loss": 0.0128,
      "step": 1747320
    },
    {
      "epoch": 2.8595602338262536,
      "grad_norm": 0.2565685212612152,
      "learning_rate": 4.309087195166147e-06,
      "loss": 0.0139,
      "step": 1747340
    },
    {
      "epoch": 2.859592964264907,
      "grad_norm": 0.6096978187561035,
      "learning_rate": 4.309021302952631e-06,
      "loss": 0.0125,
      "step": 1747360
    },
    {
      "epoch": 2.8596256947035603,
      "grad_norm": 0.8652006387710571,
      "learning_rate": 4.308955410739114e-06,
      "loss": 0.0188,
      "step": 1747380
    },
    {
      "epoch": 2.859658425142214,
      "grad_norm": 0.40539243817329407,
      "learning_rate": 4.3088895185255965e-06,
      "loss": 0.0124,
      "step": 1747400
    },
    {
      "epoch": 2.859691155580867,
      "grad_norm": 0.2688143253326416,
      "learning_rate": 4.308823626312079e-06,
      "loss": 0.0138,
      "step": 1747420
    },
    {
      "epoch": 2.8597238860195207,
      "grad_norm": 0.4794965982437134,
      "learning_rate": 4.308757734098562e-06,
      "loss": 0.0174,
      "step": 1747440
    },
    {
      "epoch": 2.859756616458174,
      "grad_norm": 0.2774421274662018,
      "learning_rate": 4.308691841885045e-06,
      "loss": 0.0152,
      "step": 1747460
    },
    {
      "epoch": 2.859789346896827,
      "grad_norm": 0.4495764374732971,
      "learning_rate": 4.3086259496715274e-06,
      "loss": 0.0108,
      "step": 1747480
    },
    {
      "epoch": 2.8598220773354806,
      "grad_norm": 0.17245517671108246,
      "learning_rate": 4.30856005745801e-06,
      "loss": 0.0067,
      "step": 1747500
    },
    {
      "epoch": 2.8598548077741337,
      "grad_norm": 0.0758962407708168,
      "learning_rate": 4.308494165244494e-06,
      "loss": 0.0114,
      "step": 1747520
    },
    {
      "epoch": 2.859887538212787,
      "grad_norm": 0.4377395510673523,
      "learning_rate": 4.3084282730309765e-06,
      "loss": 0.017,
      "step": 1747540
    },
    {
      "epoch": 2.8599202686514404,
      "grad_norm": 0.23850354552268982,
      "learning_rate": 4.308362380817459e-06,
      "loss": 0.0138,
      "step": 1747560
    },
    {
      "epoch": 2.859952999090094,
      "grad_norm": 0.32315191626548767,
      "learning_rate": 4.308296488603942e-06,
      "loss": 0.0148,
      "step": 1747580
    },
    {
      "epoch": 2.859985729528747,
      "grad_norm": 0.2992759346961975,
      "learning_rate": 4.308230596390425e-06,
      "loss": 0.012,
      "step": 1747600
    },
    {
      "epoch": 2.8600184599674003,
      "grad_norm": 0.22453682124614716,
      "learning_rate": 4.3081647041769075e-06,
      "loss": 0.0137,
      "step": 1747620
    },
    {
      "epoch": 2.860051190406054,
      "grad_norm": 0.41636034846305847,
      "learning_rate": 4.30809881196339e-06,
      "loss": 0.0128,
      "step": 1747640
    },
    {
      "epoch": 2.860083920844707,
      "grad_norm": 0.20244668424129486,
      "learning_rate": 4.308032919749873e-06,
      "loss": 0.0081,
      "step": 1747660
    },
    {
      "epoch": 2.8601166512833602,
      "grad_norm": 0.24286618828773499,
      "learning_rate": 4.3079670275363565e-06,
      "loss": 0.0123,
      "step": 1747680
    },
    {
      "epoch": 2.860149381722014,
      "grad_norm": 0.15893185138702393,
      "learning_rate": 4.307901135322839e-06,
      "loss": 0.0172,
      "step": 1747700
    },
    {
      "epoch": 2.8601821121606674,
      "grad_norm": 0.13161209225654602,
      "learning_rate": 4.307835243109322e-06,
      "loss": 0.0184,
      "step": 1747720
    },
    {
      "epoch": 2.8602148425993206,
      "grad_norm": 0.1738283783197403,
      "learning_rate": 4.307769350895806e-06,
      "loss": 0.0122,
      "step": 1747740
    },
    {
      "epoch": 2.8602475730379737,
      "grad_norm": 0.15744155645370483,
      "learning_rate": 4.307703458682288e-06,
      "loss": 0.0113,
      "step": 1747760
    },
    {
      "epoch": 2.8602803034766273,
      "grad_norm": 0.5390207767486572,
      "learning_rate": 4.307637566468771e-06,
      "loss": 0.0105,
      "step": 1747780
    },
    {
      "epoch": 2.8603130339152805,
      "grad_norm": 0.2523530125617981,
      "learning_rate": 4.307571674255254e-06,
      "loss": 0.0145,
      "step": 1747800
    },
    {
      "epoch": 2.8603457643539336,
      "grad_norm": 0.7178278565406799,
      "learning_rate": 4.3075057820417366e-06,
      "loss": 0.0129,
      "step": 1747820
    },
    {
      "epoch": 2.860378494792587,
      "grad_norm": 0.7237250804901123,
      "learning_rate": 4.307439889828219e-06,
      "loss": 0.0182,
      "step": 1747840
    },
    {
      "epoch": 2.860411225231241,
      "grad_norm": 0.11848527193069458,
      "learning_rate": 4.307373997614702e-06,
      "loss": 0.0175,
      "step": 1747860
    },
    {
      "epoch": 2.860443955669894,
      "grad_norm": 0.5875040888786316,
      "learning_rate": 4.307308105401185e-06,
      "loss": 0.0124,
      "step": 1747880
    },
    {
      "epoch": 2.860476686108547,
      "grad_norm": 0.28820332884788513,
      "learning_rate": 4.3072422131876675e-06,
      "loss": 0.0152,
      "step": 1747900
    },
    {
      "epoch": 2.8605094165472007,
      "grad_norm": 0.1047024056315422,
      "learning_rate": 4.307176320974151e-06,
      "loss": 0.0172,
      "step": 1747920
    },
    {
      "epoch": 2.860542146985854,
      "grad_norm": 0.13497668504714966,
      "learning_rate": 4.307110428760634e-06,
      "loss": 0.0206,
      "step": 1747940
    },
    {
      "epoch": 2.860574877424507,
      "grad_norm": 0.30730292201042175,
      "learning_rate": 4.3070445365471166e-06,
      "loss": 0.0127,
      "step": 1747960
    },
    {
      "epoch": 2.8606076078631606,
      "grad_norm": 0.3158087730407715,
      "learning_rate": 4.306978644333599e-06,
      "loss": 0.0132,
      "step": 1747980
    },
    {
      "epoch": 2.860640338301814,
      "grad_norm": 0.0818764939904213,
      "learning_rate": 4.306912752120082e-06,
      "loss": 0.0128,
      "step": 1748000
    },
    {
      "epoch": 2.8606730687404673,
      "grad_norm": 0.28998875617980957,
      "learning_rate": 4.306846859906565e-06,
      "loss": 0.0115,
      "step": 1748020
    },
    {
      "epoch": 2.8607057991791205,
      "grad_norm": 0.2960118353366852,
      "learning_rate": 4.306780967693048e-06,
      "loss": 0.0139,
      "step": 1748040
    },
    {
      "epoch": 2.860738529617774,
      "grad_norm": 0.25205522775650024,
      "learning_rate": 4.306715075479531e-06,
      "loss": 0.0166,
      "step": 1748060
    },
    {
      "epoch": 2.8607712600564272,
      "grad_norm": 0.18728211522102356,
      "learning_rate": 4.306649183266014e-06,
      "loss": 0.0108,
      "step": 1748080
    },
    {
      "epoch": 2.8608039904950804,
      "grad_norm": 0.2066601663827896,
      "learning_rate": 4.306583291052497e-06,
      "loss": 0.0164,
      "step": 1748100
    },
    {
      "epoch": 2.860836720933734,
      "grad_norm": 0.21012625098228455,
      "learning_rate": 4.306517398838979e-06,
      "loss": 0.0171,
      "step": 1748120
    },
    {
      "epoch": 2.860869451372387,
      "grad_norm": 0.34559065103530884,
      "learning_rate": 4.306451506625463e-06,
      "loss": 0.0131,
      "step": 1748140
    },
    {
      "epoch": 2.8609021818110407,
      "grad_norm": 0.15588918328285217,
      "learning_rate": 4.306385614411946e-06,
      "loss": 0.0124,
      "step": 1748160
    },
    {
      "epoch": 2.860934912249694,
      "grad_norm": 0.07321258634328842,
      "learning_rate": 4.306319722198428e-06,
      "loss": 0.0162,
      "step": 1748180
    },
    {
      "epoch": 2.8609676426883475,
      "grad_norm": 0.23220400512218475,
      "learning_rate": 4.306253829984911e-06,
      "loss": 0.0114,
      "step": 1748200
    },
    {
      "epoch": 2.8610003731270006,
      "grad_norm": 0.23670107126235962,
      "learning_rate": 4.306187937771394e-06,
      "loss": 0.0148,
      "step": 1748220
    },
    {
      "epoch": 2.8610331035656538,
      "grad_norm": 0.2041216790676117,
      "learning_rate": 4.306122045557877e-06,
      "loss": 0.0119,
      "step": 1748240
    },
    {
      "epoch": 2.8610658340043074,
      "grad_norm": 1.4347236156463623,
      "learning_rate": 4.306056153344359e-06,
      "loss": 0.0193,
      "step": 1748260
    },
    {
      "epoch": 2.8610985644429605,
      "grad_norm": 0.42696648836135864,
      "learning_rate": 4.305990261130842e-06,
      "loss": 0.0113,
      "step": 1748280
    },
    {
      "epoch": 2.861131294881614,
      "grad_norm": 0.5811243653297424,
      "learning_rate": 4.305924368917325e-06,
      "loss": 0.0136,
      "step": 1748300
    },
    {
      "epoch": 2.8611640253202673,
      "grad_norm": 1.5472391843795776,
      "learning_rate": 4.3058584767038084e-06,
      "loss": 0.018,
      "step": 1748320
    },
    {
      "epoch": 2.861196755758921,
      "grad_norm": 0.240275576710701,
      "learning_rate": 4.305792584490291e-06,
      "loss": 0.0129,
      "step": 1748340
    },
    {
      "epoch": 2.861229486197574,
      "grad_norm": 0.41287457942962646,
      "learning_rate": 4.305726692276774e-06,
      "loss": 0.0131,
      "step": 1748360
    },
    {
      "epoch": 2.861262216636227,
      "grad_norm": 0.14897195994853973,
      "learning_rate": 4.3056608000632575e-06,
      "loss": 0.0168,
      "step": 1748380
    },
    {
      "epoch": 2.8612949470748807,
      "grad_norm": 0.5593648552894592,
      "learning_rate": 4.30559490784974e-06,
      "loss": 0.0108,
      "step": 1748400
    },
    {
      "epoch": 2.861327677513534,
      "grad_norm": 0.16263726353645325,
      "learning_rate": 4.305529015636223e-06,
      "loss": 0.0119,
      "step": 1748420
    },
    {
      "epoch": 2.8613604079521875,
      "grad_norm": 0.24711236357688904,
      "learning_rate": 4.305463123422706e-06,
      "loss": 0.0145,
      "step": 1748440
    },
    {
      "epoch": 2.8613931383908406,
      "grad_norm": 0.32313504815101624,
      "learning_rate": 4.3053972312091885e-06,
      "loss": 0.0102,
      "step": 1748460
    },
    {
      "epoch": 2.8614258688294942,
      "grad_norm": 0.41440436244010925,
      "learning_rate": 4.305331338995671e-06,
      "loss": 0.0101,
      "step": 1748480
    },
    {
      "epoch": 2.8614585992681474,
      "grad_norm": 0.5169224739074707,
      "learning_rate": 4.305265446782154e-06,
      "loss": 0.0164,
      "step": 1748500
    },
    {
      "epoch": 2.8614913297068005,
      "grad_norm": 0.583652913570404,
      "learning_rate": 4.305199554568637e-06,
      "loss": 0.0125,
      "step": 1748520
    },
    {
      "epoch": 2.861524060145454,
      "grad_norm": 0.267082542181015,
      "learning_rate": 4.30513366235512e-06,
      "loss": 0.0172,
      "step": 1748540
    },
    {
      "epoch": 2.8615567905841073,
      "grad_norm": 1.0615308284759521,
      "learning_rate": 4.305067770141603e-06,
      "loss": 0.016,
      "step": 1748560
    },
    {
      "epoch": 2.861589521022761,
      "grad_norm": 0.3609968423843384,
      "learning_rate": 4.305001877928086e-06,
      "loss": 0.015,
      "step": 1748580
    },
    {
      "epoch": 2.861622251461414,
      "grad_norm": 0.24105434119701385,
      "learning_rate": 4.3049359857145685e-06,
      "loss": 0.013,
      "step": 1748600
    },
    {
      "epoch": 2.8616549819000676,
      "grad_norm": 0.07026113569736481,
      "learning_rate": 4.304870093501051e-06,
      "loss": 0.0136,
      "step": 1748620
    },
    {
      "epoch": 2.8616877123387208,
      "grad_norm": 0.14619527757167816,
      "learning_rate": 4.304804201287534e-06,
      "loss": 0.0137,
      "step": 1748640
    },
    {
      "epoch": 2.861720442777374,
      "grad_norm": 0.39972376823425293,
      "learning_rate": 4.304738309074017e-06,
      "loss": 0.0114,
      "step": 1748660
    },
    {
      "epoch": 2.8617531732160275,
      "grad_norm": 0.33532142639160156,
      "learning_rate": 4.3046724168604994e-06,
      "loss": 0.0194,
      "step": 1748680
    },
    {
      "epoch": 2.8617859036546807,
      "grad_norm": 0.31478428840637207,
      "learning_rate": 4.304606524646982e-06,
      "loss": 0.009,
      "step": 1748700
    },
    {
      "epoch": 2.8618186340933343,
      "grad_norm": 0.10714013129472733,
      "learning_rate": 4.304540632433466e-06,
      "loss": 0.0125,
      "step": 1748720
    },
    {
      "epoch": 2.8618513645319874,
      "grad_norm": 0.4185728430747986,
      "learning_rate": 4.3044747402199485e-06,
      "loss": 0.0099,
      "step": 1748740
    },
    {
      "epoch": 2.861884094970641,
      "grad_norm": 0.8679196238517761,
      "learning_rate": 4.304408848006431e-06,
      "loss": 0.0164,
      "step": 1748760
    },
    {
      "epoch": 2.861916825409294,
      "grad_norm": 0.746715784072876,
      "learning_rate": 4.304342955792915e-06,
      "loss": 0.0127,
      "step": 1748780
    },
    {
      "epoch": 2.8619495558479473,
      "grad_norm": 0.2925407290458679,
      "learning_rate": 4.3042770635793976e-06,
      "loss": 0.01,
      "step": 1748800
    },
    {
      "epoch": 2.861982286286601,
      "grad_norm": 0.07766074687242508,
      "learning_rate": 4.30421117136588e-06,
      "loss": 0.0113,
      "step": 1748820
    },
    {
      "epoch": 2.862015016725254,
      "grad_norm": 0.27341270446777344,
      "learning_rate": 4.304145279152363e-06,
      "loss": 0.0191,
      "step": 1748840
    },
    {
      "epoch": 2.8620477471639076,
      "grad_norm": 0.7323924899101257,
      "learning_rate": 4.304079386938846e-06,
      "loss": 0.0149,
      "step": 1748860
    },
    {
      "epoch": 2.862080477602561,
      "grad_norm": 0.3840387761592865,
      "learning_rate": 4.3040134947253285e-06,
      "loss": 0.0105,
      "step": 1748880
    },
    {
      "epoch": 2.8621132080412144,
      "grad_norm": 0.2034739851951599,
      "learning_rate": 4.303947602511811e-06,
      "loss": 0.0116,
      "step": 1748900
    },
    {
      "epoch": 2.8621459384798675,
      "grad_norm": 0.26663362979888916,
      "learning_rate": 4.303881710298294e-06,
      "loss": 0.0146,
      "step": 1748920
    },
    {
      "epoch": 2.8621786689185207,
      "grad_norm": 0.40540555119514465,
      "learning_rate": 4.303815818084778e-06,
      "loss": 0.0132,
      "step": 1748940
    },
    {
      "epoch": 2.8622113993571743,
      "grad_norm": 0.5638991594314575,
      "learning_rate": 4.30374992587126e-06,
      "loss": 0.0109,
      "step": 1748960
    },
    {
      "epoch": 2.8622441297958274,
      "grad_norm": 0.2971523404121399,
      "learning_rate": 4.303684033657743e-06,
      "loss": 0.0169,
      "step": 1748980
    },
    {
      "epoch": 2.862276860234481,
      "grad_norm": 0.3794339895248413,
      "learning_rate": 4.303618141444226e-06,
      "loss": 0.0147,
      "step": 1749000
    },
    {
      "epoch": 2.862309590673134,
      "grad_norm": 0.3798450529575348,
      "learning_rate": 4.3035522492307086e-06,
      "loss": 0.0133,
      "step": 1749020
    },
    {
      "epoch": 2.8623423211117878,
      "grad_norm": 0.31516629457473755,
      "learning_rate": 4.303486357017191e-06,
      "loss": 0.0096,
      "step": 1749040
    },
    {
      "epoch": 2.862375051550441,
      "grad_norm": 0.6051310896873474,
      "learning_rate": 4.303420464803674e-06,
      "loss": 0.0179,
      "step": 1749060
    },
    {
      "epoch": 2.862407781989094,
      "grad_norm": 0.2309834510087967,
      "learning_rate": 4.303354572590157e-06,
      "loss": 0.0152,
      "step": 1749080
    },
    {
      "epoch": 2.8624405124277477,
      "grad_norm": 0.4403647482395172,
      "learning_rate": 4.30328868037664e-06,
      "loss": 0.0103,
      "step": 1749100
    },
    {
      "epoch": 2.862473242866401,
      "grad_norm": 0.20015954971313477,
      "learning_rate": 4.303222788163123e-06,
      "loss": 0.0097,
      "step": 1749120
    },
    {
      "epoch": 2.862505973305054,
      "grad_norm": 0.2371017187833786,
      "learning_rate": 4.303156895949606e-06,
      "loss": 0.0135,
      "step": 1749140
    },
    {
      "epoch": 2.8625387037437076,
      "grad_norm": 0.2673724889755249,
      "learning_rate": 4.3030910037360894e-06,
      "loss": 0.0142,
      "step": 1749160
    },
    {
      "epoch": 2.862571434182361,
      "grad_norm": 0.27484482526779175,
      "learning_rate": 4.303025111522572e-06,
      "loss": 0.0133,
      "step": 1749180
    },
    {
      "epoch": 2.8626041646210143,
      "grad_norm": 3.234224796295166,
      "learning_rate": 4.302959219309055e-06,
      "loss": 0.0075,
      "step": 1749200
    },
    {
      "epoch": 2.8626368950596675,
      "grad_norm": 0.3160068392753601,
      "learning_rate": 4.302893327095538e-06,
      "loss": 0.011,
      "step": 1749220
    },
    {
      "epoch": 2.862669625498321,
      "grad_norm": 0.6047746539115906,
      "learning_rate": 4.30282743488202e-06,
      "loss": 0.0149,
      "step": 1749240
    },
    {
      "epoch": 2.862702355936974,
      "grad_norm": 0.08284005522727966,
      "learning_rate": 4.302761542668503e-06,
      "loss": 0.0192,
      "step": 1749260
    },
    {
      "epoch": 2.8627350863756273,
      "grad_norm": 0.38910478353500366,
      "learning_rate": 4.302695650454986e-06,
      "loss": 0.01,
      "step": 1749280
    },
    {
      "epoch": 2.862767816814281,
      "grad_norm": 0.3479537069797516,
      "learning_rate": 4.302629758241469e-06,
      "loss": 0.0126,
      "step": 1749300
    },
    {
      "epoch": 2.8628005472529345,
      "grad_norm": 0.5463902950286865,
      "learning_rate": 4.302563866027951e-06,
      "loss": 0.0138,
      "step": 1749320
    },
    {
      "epoch": 2.8628332776915877,
      "grad_norm": 0.20372137427330017,
      "learning_rate": 4.302497973814435e-06,
      "loss": 0.0166,
      "step": 1749340
    },
    {
      "epoch": 2.862866008130241,
      "grad_norm": 0.18939517438411713,
      "learning_rate": 4.302432081600918e-06,
      "loss": 0.0115,
      "step": 1749360
    },
    {
      "epoch": 2.8628987385688944,
      "grad_norm": 0.14724494516849518,
      "learning_rate": 4.3023661893874e-06,
      "loss": 0.0141,
      "step": 1749380
    },
    {
      "epoch": 2.8629314690075476,
      "grad_norm": 0.2260304093360901,
      "learning_rate": 4.302300297173883e-06,
      "loss": 0.0104,
      "step": 1749400
    },
    {
      "epoch": 2.8629641994462007,
      "grad_norm": 0.101019486784935,
      "learning_rate": 4.302234404960366e-06,
      "loss": 0.0116,
      "step": 1749420
    },
    {
      "epoch": 2.8629969298848543,
      "grad_norm": 0.24777784943580627,
      "learning_rate": 4.3021685127468495e-06,
      "loss": 0.0141,
      "step": 1749440
    },
    {
      "epoch": 2.863029660323508,
      "grad_norm": 0.07180158793926239,
      "learning_rate": 4.302102620533332e-06,
      "loss": 0.0162,
      "step": 1749460
    },
    {
      "epoch": 2.863062390762161,
      "grad_norm": 0.3527029752731323,
      "learning_rate": 4.302036728319815e-06,
      "loss": 0.0111,
      "step": 1749480
    },
    {
      "epoch": 2.863095121200814,
      "grad_norm": 0.4543263912200928,
      "learning_rate": 4.301970836106298e-06,
      "loss": 0.0151,
      "step": 1749500
    },
    {
      "epoch": 2.863127851639468,
      "grad_norm": 0.22269144654273987,
      "learning_rate": 4.3019049438927804e-06,
      "loss": 0.0103,
      "step": 1749520
    },
    {
      "epoch": 2.863160582078121,
      "grad_norm": 0.25410372018814087,
      "learning_rate": 4.301839051679263e-06,
      "loss": 0.0129,
      "step": 1749540
    },
    {
      "epoch": 2.863193312516774,
      "grad_norm": 0.3693296015262604,
      "learning_rate": 4.301773159465747e-06,
      "loss": 0.0108,
      "step": 1749560
    },
    {
      "epoch": 2.8632260429554277,
      "grad_norm": 0.20132938027381897,
      "learning_rate": 4.3017072672522295e-06,
      "loss": 0.0164,
      "step": 1749580
    },
    {
      "epoch": 2.8632587733940813,
      "grad_norm": 0.5949544906616211,
      "learning_rate": 4.301641375038712e-06,
      "loss": 0.0104,
      "step": 1749600
    },
    {
      "epoch": 2.8632915038327345,
      "grad_norm": 0.39497098326683044,
      "learning_rate": 4.301575482825195e-06,
      "loss": 0.0147,
      "step": 1749620
    },
    {
      "epoch": 2.8633242342713876,
      "grad_norm": 0.33826467394828796,
      "learning_rate": 4.301509590611678e-06,
      "loss": 0.0103,
      "step": 1749640
    },
    {
      "epoch": 2.863356964710041,
      "grad_norm": 0.25805482268333435,
      "learning_rate": 4.3014436983981605e-06,
      "loss": 0.0114,
      "step": 1749660
    },
    {
      "epoch": 2.8633896951486943,
      "grad_norm": 0.19198428094387054,
      "learning_rate": 4.301377806184643e-06,
      "loss": 0.0107,
      "step": 1749680
    },
    {
      "epoch": 2.8634224255873475,
      "grad_norm": 0.13744297623634338,
      "learning_rate": 4.301311913971126e-06,
      "loss": 0.0129,
      "step": 1749700
    },
    {
      "epoch": 2.863455156026001,
      "grad_norm": 0.13420991599559784,
      "learning_rate": 4.301246021757609e-06,
      "loss": 0.0165,
      "step": 1749720
    },
    {
      "epoch": 2.8634878864646542,
      "grad_norm": 0.4015812277793884,
      "learning_rate": 4.301180129544092e-06,
      "loss": 0.0093,
      "step": 1749740
    },
    {
      "epoch": 2.863520616903308,
      "grad_norm": 0.13699916005134583,
      "learning_rate": 4.301114237330575e-06,
      "loss": 0.0121,
      "step": 1749760
    },
    {
      "epoch": 2.863553347341961,
      "grad_norm": 0.23971298336982727,
      "learning_rate": 4.301048345117058e-06,
      "loss": 0.0109,
      "step": 1749780
    },
    {
      "epoch": 2.8635860777806146,
      "grad_norm": 0.6201772093772888,
      "learning_rate": 4.300982452903541e-06,
      "loss": 0.0142,
      "step": 1749800
    },
    {
      "epoch": 2.8636188082192677,
      "grad_norm": 0.17487123608589172,
      "learning_rate": 4.300916560690024e-06,
      "loss": 0.0119,
      "step": 1749820
    },
    {
      "epoch": 2.863651538657921,
      "grad_norm": 0.27609023451805115,
      "learning_rate": 4.300850668476507e-06,
      "loss": 0.014,
      "step": 1749840
    },
    {
      "epoch": 2.8636842690965745,
      "grad_norm": 0.40492117404937744,
      "learning_rate": 4.3007847762629896e-06,
      "loss": 0.0148,
      "step": 1749860
    },
    {
      "epoch": 2.8637169995352276,
      "grad_norm": 0.2242993861436844,
      "learning_rate": 4.300718884049472e-06,
      "loss": 0.0114,
      "step": 1749880
    },
    {
      "epoch": 2.863749729973881,
      "grad_norm": 0.29137787222862244,
      "learning_rate": 4.300652991835955e-06,
      "loss": 0.0155,
      "step": 1749900
    },
    {
      "epoch": 2.8637824604125344,
      "grad_norm": 0.14521943032741547,
      "learning_rate": 4.300587099622438e-06,
      "loss": 0.01,
      "step": 1749920
    },
    {
      "epoch": 2.863815190851188,
      "grad_norm": 0.2923494279384613,
      "learning_rate": 4.3005212074089205e-06,
      "loss": 0.0091,
      "step": 1749940
    },
    {
      "epoch": 2.863847921289841,
      "grad_norm": 0.22427517175674438,
      "learning_rate": 4.300455315195404e-06,
      "loss": 0.0164,
      "step": 1749960
    },
    {
      "epoch": 2.8638806517284943,
      "grad_norm": 0.32976672053337097,
      "learning_rate": 4.300389422981887e-06,
      "loss": 0.0129,
      "step": 1749980
    },
    {
      "epoch": 2.863913382167148,
      "grad_norm": 0.4878641664981842,
      "learning_rate": 4.30032353076837e-06,
      "loss": 0.0117,
      "step": 1750000
    },
    {
      "epoch": 2.863913382167148,
      "eval_loss": 0.007507512345910072,
      "eval_runtime": 6515.9932,
      "eval_samples_per_second": 157.744,
      "eval_steps_per_second": 15.774,
      "eval_sts-dev_pearson_cosine": 0.982777495139298,
      "eval_sts-dev_spearman_cosine": 0.8940643498619215,
      "step": 1750000
    },
    {
      "epoch": 2.863946112605801,
      "grad_norm": 0.22761985659599304,
      "learning_rate": 4.300257638554852e-06,
      "loss": 0.0116,
      "step": 1750020
    },
    {
      "epoch": 2.8639788430444546,
      "grad_norm": 0.4310489594936371,
      "learning_rate": 4.300191746341335e-06,
      "loss": 0.0129,
      "step": 1750040
    },
    {
      "epoch": 2.8640115734831078,
      "grad_norm": 0.3381883203983307,
      "learning_rate": 4.300125854127818e-06,
      "loss": 0.0204,
      "step": 1750060
    },
    {
      "epoch": 2.8640443039217613,
      "grad_norm": 0.5577631592750549,
      "learning_rate": 4.3000599619143005e-06,
      "loss": 0.0134,
      "step": 1750080
    },
    {
      "epoch": 2.8640770343604145,
      "grad_norm": 0.5523847937583923,
      "learning_rate": 4.299994069700783e-06,
      "loss": 0.0172,
      "step": 1750100
    },
    {
      "epoch": 2.8641097647990676,
      "grad_norm": 0.38246312737464905,
      "learning_rate": 4.299928177487266e-06,
      "loss": 0.0161,
      "step": 1750120
    },
    {
      "epoch": 2.8641424952377212,
      "grad_norm": 0.12523046135902405,
      "learning_rate": 4.29986228527375e-06,
      "loss": 0.0113,
      "step": 1750140
    },
    {
      "epoch": 2.8641752256763744,
      "grad_norm": 0.22125501930713654,
      "learning_rate": 4.299796393060232e-06,
      "loss": 0.0166,
      "step": 1750160
    },
    {
      "epoch": 2.864207956115028,
      "grad_norm": 0.7453182339668274,
      "learning_rate": 4.299730500846715e-06,
      "loss": 0.0125,
      "step": 1750180
    },
    {
      "epoch": 2.864240686553681,
      "grad_norm": 0.26977142691612244,
      "learning_rate": 4.299664608633199e-06,
      "loss": 0.0159,
      "step": 1750200
    },
    {
      "epoch": 2.8642734169923347,
      "grad_norm": 1.3032755851745605,
      "learning_rate": 4.299598716419681e-06,
      "loss": 0.0092,
      "step": 1750220
    },
    {
      "epoch": 2.864306147430988,
      "grad_norm": 0.559775173664093,
      "learning_rate": 4.299532824206164e-06,
      "loss": 0.0142,
      "step": 1750240
    },
    {
      "epoch": 2.864338877869641,
      "grad_norm": 0.613095223903656,
      "learning_rate": 4.299466931992647e-06,
      "loss": 0.015,
      "step": 1750260
    },
    {
      "epoch": 2.8643716083082946,
      "grad_norm": 0.10359722375869751,
      "learning_rate": 4.29940103977913e-06,
      "loss": 0.0151,
      "step": 1750280
    },
    {
      "epoch": 2.8644043387469478,
      "grad_norm": 0.2899684011936188,
      "learning_rate": 4.299335147565612e-06,
      "loss": 0.0161,
      "step": 1750300
    },
    {
      "epoch": 2.8644370691856014,
      "grad_norm": 1.1747678518295288,
      "learning_rate": 4.299269255352095e-06,
      "loss": 0.0137,
      "step": 1750320
    },
    {
      "epoch": 2.8644697996242545,
      "grad_norm": 0.40043917298316956,
      "learning_rate": 4.299203363138578e-06,
      "loss": 0.011,
      "step": 1750340
    },
    {
      "epoch": 2.864502530062908,
      "grad_norm": 0.34531354904174805,
      "learning_rate": 4.2991374709250614e-06,
      "loss": 0.0102,
      "step": 1750360
    },
    {
      "epoch": 2.8645352605015613,
      "grad_norm": 0.17618361115455627,
      "learning_rate": 4.299071578711544e-06,
      "loss": 0.0111,
      "step": 1750380
    },
    {
      "epoch": 2.8645679909402144,
      "grad_norm": 0.36548832058906555,
      "learning_rate": 4.299005686498027e-06,
      "loss": 0.0122,
      "step": 1750400
    },
    {
      "epoch": 2.864600721378868,
      "grad_norm": 0.46132194995880127,
      "learning_rate": 4.29893979428451e-06,
      "loss": 0.0111,
      "step": 1750420
    },
    {
      "epoch": 2.864633451817521,
      "grad_norm": 0.33905091881752014,
      "learning_rate": 4.298873902070992e-06,
      "loss": 0.0155,
      "step": 1750440
    },
    {
      "epoch": 2.8646661822561748,
      "grad_norm": 0.4050522446632385,
      "learning_rate": 4.298808009857475e-06,
      "loss": 0.0076,
      "step": 1750460
    },
    {
      "epoch": 2.864698912694828,
      "grad_norm": 0.13165926933288574,
      "learning_rate": 4.298742117643958e-06,
      "loss": 0.0162,
      "step": 1750480
    },
    {
      "epoch": 2.8647316431334815,
      "grad_norm": 0.5529059171676636,
      "learning_rate": 4.2986762254304415e-06,
      "loss": 0.0178,
      "step": 1750500
    },
    {
      "epoch": 2.8647643735721346,
      "grad_norm": 0.398296594619751,
      "learning_rate": 4.298610333216924e-06,
      "loss": 0.0135,
      "step": 1750520
    },
    {
      "epoch": 2.864797104010788,
      "grad_norm": 0.19670772552490234,
      "learning_rate": 4.298544441003407e-06,
      "loss": 0.0125,
      "step": 1750540
    },
    {
      "epoch": 2.8648298344494414,
      "grad_norm": 0.2769332528114319,
      "learning_rate": 4.29847854878989e-06,
      "loss": 0.012,
      "step": 1750560
    },
    {
      "epoch": 2.8648625648880945,
      "grad_norm": 0.3665620982646942,
      "learning_rate": 4.298412656576373e-06,
      "loss": 0.0126,
      "step": 1750580
    },
    {
      "epoch": 2.8648952953267477,
      "grad_norm": 0.23816251754760742,
      "learning_rate": 4.298346764362856e-06,
      "loss": 0.0155,
      "step": 1750600
    },
    {
      "epoch": 2.8649280257654013,
      "grad_norm": 0.7618870735168457,
      "learning_rate": 4.298280872149339e-06,
      "loss": 0.0107,
      "step": 1750620
    },
    {
      "epoch": 2.864960756204055,
      "grad_norm": 0.6881307363510132,
      "learning_rate": 4.2982149799358215e-06,
      "loss": 0.0113,
      "step": 1750640
    },
    {
      "epoch": 2.864993486642708,
      "grad_norm": 0.4237259328365326,
      "learning_rate": 4.298149087722304e-06,
      "loss": 0.0196,
      "step": 1750660
    },
    {
      "epoch": 2.865026217081361,
      "grad_norm": 0.43609732389450073,
      "learning_rate": 4.298083195508787e-06,
      "loss": 0.0151,
      "step": 1750680
    },
    {
      "epoch": 2.8650589475200148,
      "grad_norm": 0.49340277910232544,
      "learning_rate": 4.29801730329527e-06,
      "loss": 0.0134,
      "step": 1750700
    },
    {
      "epoch": 2.865091677958668,
      "grad_norm": 0.708233118057251,
      "learning_rate": 4.2979514110817524e-06,
      "loss": 0.0166,
      "step": 1750720
    },
    {
      "epoch": 2.865124408397321,
      "grad_norm": 0.3556700050830841,
      "learning_rate": 4.297885518868235e-06,
      "loss": 0.012,
      "step": 1750740
    },
    {
      "epoch": 2.8651571388359747,
      "grad_norm": 3.0794363021850586,
      "learning_rate": 4.297819626654719e-06,
      "loss": 0.013,
      "step": 1750760
    },
    {
      "epoch": 2.8651898692746283,
      "grad_norm": 0.3920195400714874,
      "learning_rate": 4.2977537344412015e-06,
      "loss": 0.012,
      "step": 1750780
    },
    {
      "epoch": 2.8652225997132814,
      "grad_norm": 0.42646342515945435,
      "learning_rate": 4.297687842227684e-06,
      "loss": 0.0124,
      "step": 1750800
    },
    {
      "epoch": 2.8652553301519346,
      "grad_norm": 0.19831404089927673,
      "learning_rate": 4.297621950014167e-06,
      "loss": 0.0106,
      "step": 1750820
    },
    {
      "epoch": 2.865288060590588,
      "grad_norm": 0.5702883005142212,
      "learning_rate": 4.29755605780065e-06,
      "loss": 0.0111,
      "step": 1750840
    },
    {
      "epoch": 2.8653207910292413,
      "grad_norm": 0.15189474821090698,
      "learning_rate": 4.297490165587133e-06,
      "loss": 0.0103,
      "step": 1750860
    },
    {
      "epoch": 2.8653535214678945,
      "grad_norm": 0.6168064475059509,
      "learning_rate": 4.297424273373616e-06,
      "loss": 0.015,
      "step": 1750880
    },
    {
      "epoch": 2.865386251906548,
      "grad_norm": 0.37488824129104614,
      "learning_rate": 4.297358381160099e-06,
      "loss": 0.0156,
      "step": 1750900
    },
    {
      "epoch": 2.8654189823452016,
      "grad_norm": 0.48964887857437134,
      "learning_rate": 4.2972924889465815e-06,
      "loss": 0.0125,
      "step": 1750920
    },
    {
      "epoch": 2.865451712783855,
      "grad_norm": 0.21473082900047302,
      "learning_rate": 4.297226596733064e-06,
      "loss": 0.0163,
      "step": 1750940
    },
    {
      "epoch": 2.865484443222508,
      "grad_norm": 0.43604233860969543,
      "learning_rate": 4.297160704519547e-06,
      "loss": 0.0176,
      "step": 1750960
    },
    {
      "epoch": 2.8655171736611615,
      "grad_norm": 0.22453658282756805,
      "learning_rate": 4.297094812306031e-06,
      "loss": 0.0077,
      "step": 1750980
    },
    {
      "epoch": 2.8655499040998147,
      "grad_norm": 0.22472740709781647,
      "learning_rate": 4.297028920092513e-06,
      "loss": 0.0123,
      "step": 1751000
    },
    {
      "epoch": 2.865582634538468,
      "grad_norm": 0.1556883156299591,
      "learning_rate": 4.296963027878996e-06,
      "loss": 0.0135,
      "step": 1751020
    },
    {
      "epoch": 2.8656153649771214,
      "grad_norm": 0.5996978282928467,
      "learning_rate": 4.296897135665479e-06,
      "loss": 0.0129,
      "step": 1751040
    },
    {
      "epoch": 2.865648095415775,
      "grad_norm": 0.27578499913215637,
      "learning_rate": 4.2968312434519616e-06,
      "loss": 0.0113,
      "step": 1751060
    },
    {
      "epoch": 2.865680825854428,
      "grad_norm": 0.2515324354171753,
      "learning_rate": 4.296765351238444e-06,
      "loss": 0.0222,
      "step": 1751080
    },
    {
      "epoch": 2.8657135562930813,
      "grad_norm": 0.7586936354637146,
      "learning_rate": 4.296699459024927e-06,
      "loss": 0.0155,
      "step": 1751100
    },
    {
      "epoch": 2.865746286731735,
      "grad_norm": 0.40129339694976807,
      "learning_rate": 4.29663356681141e-06,
      "loss": 0.0124,
      "step": 1751120
    },
    {
      "epoch": 2.865779017170388,
      "grad_norm": 0.12937210500240326,
      "learning_rate": 4.2965676745978925e-06,
      "loss": 0.0174,
      "step": 1751140
    },
    {
      "epoch": 2.8658117476090412,
      "grad_norm": 0.4836907386779785,
      "learning_rate": 4.296501782384376e-06,
      "loss": 0.0139,
      "step": 1751160
    },
    {
      "epoch": 2.865844478047695,
      "grad_norm": 0.35867586731910706,
      "learning_rate": 4.296435890170859e-06,
      "loss": 0.0145,
      "step": 1751180
    },
    {
      "epoch": 2.865877208486348,
      "grad_norm": 0.3283829987049103,
      "learning_rate": 4.296369997957342e-06,
      "loss": 0.0184,
      "step": 1751200
    },
    {
      "epoch": 2.8659099389250016,
      "grad_norm": 0.5301509499549866,
      "learning_rate": 4.296304105743825e-06,
      "loss": 0.0158,
      "step": 1751220
    },
    {
      "epoch": 2.8659426693636547,
      "grad_norm": 0.6895426511764526,
      "learning_rate": 4.296238213530308e-06,
      "loss": 0.0148,
      "step": 1751240
    },
    {
      "epoch": 2.8659753998023083,
      "grad_norm": 0.7758163809776306,
      "learning_rate": 4.296172321316791e-06,
      "loss": 0.0216,
      "step": 1751260
    },
    {
      "epoch": 2.8660081302409615,
      "grad_norm": 0.26470714807510376,
      "learning_rate": 4.296106429103273e-06,
      "loss": 0.0128,
      "step": 1751280
    },
    {
      "epoch": 2.8660408606796146,
      "grad_norm": 0.3511677384376526,
      "learning_rate": 4.296040536889756e-06,
      "loss": 0.0125,
      "step": 1751300
    },
    {
      "epoch": 2.866073591118268,
      "grad_norm": 0.2643929123878479,
      "learning_rate": 4.295974644676239e-06,
      "loss": 0.011,
      "step": 1751320
    },
    {
      "epoch": 2.8661063215569214,
      "grad_norm": 0.12382770329713821,
      "learning_rate": 4.295908752462722e-06,
      "loss": 0.0109,
      "step": 1751340
    },
    {
      "epoch": 2.866139051995575,
      "grad_norm": 0.6502647995948792,
      "learning_rate": 4.295842860249204e-06,
      "loss": 0.0159,
      "step": 1751360
    },
    {
      "epoch": 2.866171782434228,
      "grad_norm": 0.1910923570394516,
      "learning_rate": 4.295776968035688e-06,
      "loss": 0.0123,
      "step": 1751380
    },
    {
      "epoch": 2.8662045128728817,
      "grad_norm": 1.012371301651001,
      "learning_rate": 4.295711075822171e-06,
      "loss": 0.0102,
      "step": 1751400
    },
    {
      "epoch": 2.866237243311535,
      "grad_norm": 0.12843212485313416,
      "learning_rate": 4.295645183608653e-06,
      "loss": 0.0141,
      "step": 1751420
    },
    {
      "epoch": 2.866269973750188,
      "grad_norm": 0.20678700506687164,
      "learning_rate": 4.295579291395136e-06,
      "loss": 0.0116,
      "step": 1751440
    },
    {
      "epoch": 2.8663027041888416,
      "grad_norm": 0.15368971228599548,
      "learning_rate": 4.295513399181619e-06,
      "loss": 0.0098,
      "step": 1751460
    },
    {
      "epoch": 2.8663354346274947,
      "grad_norm": 0.3712061941623688,
      "learning_rate": 4.295447506968102e-06,
      "loss": 0.0155,
      "step": 1751480
    },
    {
      "epoch": 2.8663681650661483,
      "grad_norm": 0.243529811501503,
      "learning_rate": 4.295381614754584e-06,
      "loss": 0.0118,
      "step": 1751500
    },
    {
      "epoch": 2.8664008955048015,
      "grad_norm": 0.07776076346635818,
      "learning_rate": 4.295315722541067e-06,
      "loss": 0.0134,
      "step": 1751520
    },
    {
      "epoch": 2.866433625943455,
      "grad_norm": 0.36450809240341187,
      "learning_rate": 4.29524983032755e-06,
      "loss": 0.0159,
      "step": 1751540
    },
    {
      "epoch": 2.8664663563821082,
      "grad_norm": 0.29325753450393677,
      "learning_rate": 4.2951839381140334e-06,
      "loss": 0.0204,
      "step": 1751560
    },
    {
      "epoch": 2.8664990868207614,
      "grad_norm": 0.7602559328079224,
      "learning_rate": 4.295118045900516e-06,
      "loss": 0.0115,
      "step": 1751580
    },
    {
      "epoch": 2.866531817259415,
      "grad_norm": 0.20355837047100067,
      "learning_rate": 4.295052153686999e-06,
      "loss": 0.0139,
      "step": 1751600
    },
    {
      "epoch": 2.866564547698068,
      "grad_norm": 0.6212645173072815,
      "learning_rate": 4.2949862614734825e-06,
      "loss": 0.017,
      "step": 1751620
    },
    {
      "epoch": 2.8665972781367217,
      "grad_norm": 0.1205284595489502,
      "learning_rate": 4.294920369259965e-06,
      "loss": 0.0114,
      "step": 1751640
    },
    {
      "epoch": 2.866630008575375,
      "grad_norm": 0.13127537071704865,
      "learning_rate": 4.294854477046448e-06,
      "loss": 0.0125,
      "step": 1751660
    },
    {
      "epoch": 2.8666627390140285,
      "grad_norm": 0.11343242228031158,
      "learning_rate": 4.294788584832931e-06,
      "loss": 0.0139,
      "step": 1751680
    },
    {
      "epoch": 2.8666954694526816,
      "grad_norm": 0.3626159131526947,
      "learning_rate": 4.2947226926194135e-06,
      "loss": 0.0145,
      "step": 1751700
    },
    {
      "epoch": 2.8667281998913348,
      "grad_norm": 0.20839041471481323,
      "learning_rate": 4.294656800405896e-06,
      "loss": 0.0151,
      "step": 1751720
    },
    {
      "epoch": 2.8667609303299884,
      "grad_norm": 0.21242059767246246,
      "learning_rate": 4.294590908192379e-06,
      "loss": 0.0168,
      "step": 1751740
    },
    {
      "epoch": 2.8667936607686415,
      "grad_norm": 0.23186349868774414,
      "learning_rate": 4.294525015978862e-06,
      "loss": 0.0125,
      "step": 1751760
    },
    {
      "epoch": 2.866826391207295,
      "grad_norm": 0.6184601783752441,
      "learning_rate": 4.294459123765345e-06,
      "loss": 0.0116,
      "step": 1751780
    },
    {
      "epoch": 2.8668591216459482,
      "grad_norm": 0.15897490084171295,
      "learning_rate": 4.294393231551828e-06,
      "loss": 0.0218,
      "step": 1751800
    },
    {
      "epoch": 2.866891852084602,
      "grad_norm": 0.11011151969432831,
      "learning_rate": 4.294327339338311e-06,
      "loss": 0.0081,
      "step": 1751820
    },
    {
      "epoch": 2.866924582523255,
      "grad_norm": 0.5880951285362244,
      "learning_rate": 4.2942614471247935e-06,
      "loss": 0.0109,
      "step": 1751840
    },
    {
      "epoch": 2.866957312961908,
      "grad_norm": 0.14201252162456512,
      "learning_rate": 4.294195554911276e-06,
      "loss": 0.01,
      "step": 1751860
    },
    {
      "epoch": 2.8669900434005617,
      "grad_norm": 0.31718331575393677,
      "learning_rate": 4.294129662697759e-06,
      "loss": 0.014,
      "step": 1751880
    },
    {
      "epoch": 2.867022773839215,
      "grad_norm": 0.14749647676944733,
      "learning_rate": 4.2940637704842426e-06,
      "loss": 0.017,
      "step": 1751900
    },
    {
      "epoch": 2.8670555042778685,
      "grad_norm": 0.13602764904499054,
      "learning_rate": 4.293997878270725e-06,
      "loss": 0.0176,
      "step": 1751920
    },
    {
      "epoch": 2.8670882347165216,
      "grad_norm": 0.8852822780609131,
      "learning_rate": 4.293931986057208e-06,
      "loss": 0.0146,
      "step": 1751940
    },
    {
      "epoch": 2.8671209651551752,
      "grad_norm": 1.3779830932617188,
      "learning_rate": 4.293866093843691e-06,
      "loss": 0.0158,
      "step": 1751960
    },
    {
      "epoch": 2.8671536955938284,
      "grad_norm": 0.44615012407302856,
      "learning_rate": 4.2938002016301735e-06,
      "loss": 0.0153,
      "step": 1751980
    },
    {
      "epoch": 2.8671864260324815,
      "grad_norm": 0.33017510175704956,
      "learning_rate": 4.293734309416657e-06,
      "loss": 0.0147,
      "step": 1752000
    },
    {
      "epoch": 2.867219156471135,
      "grad_norm": 0.7707721590995789,
      "learning_rate": 4.29366841720314e-06,
      "loss": 0.0205,
      "step": 1752020
    },
    {
      "epoch": 2.8672518869097883,
      "grad_norm": 0.03123941458761692,
      "learning_rate": 4.293602524989623e-06,
      "loss": 0.0224,
      "step": 1752040
    },
    {
      "epoch": 2.867284617348442,
      "grad_norm": 0.1034073680639267,
      "learning_rate": 4.293536632776105e-06,
      "loss": 0.0105,
      "step": 1752060
    },
    {
      "epoch": 2.867317347787095,
      "grad_norm": 0.6146776080131531,
      "learning_rate": 4.293470740562588e-06,
      "loss": 0.0176,
      "step": 1752080
    },
    {
      "epoch": 2.8673500782257486,
      "grad_norm": 0.4662219285964966,
      "learning_rate": 4.293404848349071e-06,
      "loss": 0.014,
      "step": 1752100
    },
    {
      "epoch": 2.8673828086644018,
      "grad_norm": 0.1845432072877884,
      "learning_rate": 4.2933389561355535e-06,
      "loss": 0.0091,
      "step": 1752120
    },
    {
      "epoch": 2.867415539103055,
      "grad_norm": 0.3371601998806,
      "learning_rate": 4.293273063922036e-06,
      "loss": 0.0148,
      "step": 1752140
    },
    {
      "epoch": 2.8674482695417085,
      "grad_norm": 0.12166054546833038,
      "learning_rate": 4.293207171708519e-06,
      "loss": 0.0153,
      "step": 1752160
    },
    {
      "epoch": 2.8674809999803617,
      "grad_norm": 0.305046021938324,
      "learning_rate": 4.293141279495003e-06,
      "loss": 0.0173,
      "step": 1752180
    },
    {
      "epoch": 2.867513730419015,
      "grad_norm": 0.952391505241394,
      "learning_rate": 4.293075387281485e-06,
      "loss": 0.0167,
      "step": 1752200
    },
    {
      "epoch": 2.8675464608576684,
      "grad_norm": 0.32879674434661865,
      "learning_rate": 4.293009495067968e-06,
      "loss": 0.0139,
      "step": 1752220
    },
    {
      "epoch": 2.867579191296322,
      "grad_norm": 0.3429047465324402,
      "learning_rate": 4.292943602854451e-06,
      "loss": 0.0139,
      "step": 1752240
    },
    {
      "epoch": 2.867611921734975,
      "grad_norm": 1.758638620376587,
      "learning_rate": 4.292877710640934e-06,
      "loss": 0.0135,
      "step": 1752260
    },
    {
      "epoch": 2.8676446521736283,
      "grad_norm": 0.09862916171550751,
      "learning_rate": 4.292811818427417e-06,
      "loss": 0.0111,
      "step": 1752280
    },
    {
      "epoch": 2.867677382612282,
      "grad_norm": 0.16000032424926758,
      "learning_rate": 4.2927459262139e-06,
      "loss": 0.0147,
      "step": 1752300
    },
    {
      "epoch": 2.867710113050935,
      "grad_norm": 0.4273262917995453,
      "learning_rate": 4.292680034000383e-06,
      "loss": 0.0141,
      "step": 1752320
    },
    {
      "epoch": 2.867742843489588,
      "grad_norm": 0.4870937764644623,
      "learning_rate": 4.292614141786865e-06,
      "loss": 0.009,
      "step": 1752340
    },
    {
      "epoch": 2.867775573928242,
      "grad_norm": 0.19680434465408325,
      "learning_rate": 4.292548249573348e-06,
      "loss": 0.0122,
      "step": 1752360
    },
    {
      "epoch": 2.8678083043668954,
      "grad_norm": 0.664040744304657,
      "learning_rate": 4.292482357359831e-06,
      "loss": 0.0086,
      "step": 1752380
    },
    {
      "epoch": 2.8678410348055485,
      "grad_norm": 0.21218323707580566,
      "learning_rate": 4.2924164651463144e-06,
      "loss": 0.0126,
      "step": 1752400
    },
    {
      "epoch": 2.8678737652442017,
      "grad_norm": 0.3081258237361908,
      "learning_rate": 4.292350572932797e-06,
      "loss": 0.0125,
      "step": 1752420
    },
    {
      "epoch": 2.8679064956828553,
      "grad_norm": 0.5702528953552246,
      "learning_rate": 4.29228468071928e-06,
      "loss": 0.018,
      "step": 1752440
    },
    {
      "epoch": 2.8679392261215084,
      "grad_norm": 0.2835131287574768,
      "learning_rate": 4.292218788505763e-06,
      "loss": 0.0129,
      "step": 1752460
    },
    {
      "epoch": 2.8679719565601616,
      "grad_norm": 0.3830002248287201,
      "learning_rate": 4.292152896292245e-06,
      "loss": 0.0135,
      "step": 1752480
    },
    {
      "epoch": 2.868004686998815,
      "grad_norm": 1.0000475645065308,
      "learning_rate": 4.292087004078728e-06,
      "loss": 0.0101,
      "step": 1752500
    },
    {
      "epoch": 2.8680374174374688,
      "grad_norm": 0.18331944942474365,
      "learning_rate": 4.292021111865211e-06,
      "loss": 0.0119,
      "step": 1752520
    },
    {
      "epoch": 2.868070147876122,
      "grad_norm": 0.14010575413703918,
      "learning_rate": 4.291955219651694e-06,
      "loss": 0.0109,
      "step": 1752540
    },
    {
      "epoch": 2.868102878314775,
      "grad_norm": 0.4272432029247284,
      "learning_rate": 4.291889327438176e-06,
      "loss": 0.0136,
      "step": 1752560
    },
    {
      "epoch": 2.8681356087534287,
      "grad_norm": 0.37082287669181824,
      "learning_rate": 4.29182343522466e-06,
      "loss": 0.0198,
      "step": 1752580
    },
    {
      "epoch": 2.868168339192082,
      "grad_norm": 0.24086114764213562,
      "learning_rate": 4.291757543011143e-06,
      "loss": 0.012,
      "step": 1752600
    },
    {
      "epoch": 2.868201069630735,
      "grad_norm": 0.20129616558551788,
      "learning_rate": 4.2916916507976254e-06,
      "loss": 0.0168,
      "step": 1752620
    },
    {
      "epoch": 2.8682338000693886,
      "grad_norm": 0.0915595218539238,
      "learning_rate": 4.291625758584109e-06,
      "loss": 0.0092,
      "step": 1752640
    },
    {
      "epoch": 2.868266530508042,
      "grad_norm": 0.2054080069065094,
      "learning_rate": 4.291559866370592e-06,
      "loss": 0.0115,
      "step": 1752660
    },
    {
      "epoch": 2.8682992609466953,
      "grad_norm": 0.15742434561252594,
      "learning_rate": 4.2914939741570745e-06,
      "loss": 0.013,
      "step": 1752680
    },
    {
      "epoch": 2.8683319913853484,
      "grad_norm": 0.4839753806591034,
      "learning_rate": 4.291428081943557e-06,
      "loss": 0.0121,
      "step": 1752700
    },
    {
      "epoch": 2.868364721824002,
      "grad_norm": 0.3388362526893616,
      "learning_rate": 4.29136218973004e-06,
      "loss": 0.0115,
      "step": 1752720
    },
    {
      "epoch": 2.868397452262655,
      "grad_norm": 0.6799290776252747,
      "learning_rate": 4.291296297516523e-06,
      "loss": 0.0127,
      "step": 1752740
    },
    {
      "epoch": 2.8684301827013083,
      "grad_norm": 0.11485022306442261,
      "learning_rate": 4.2912304053030055e-06,
      "loss": 0.0197,
      "step": 1752760
    },
    {
      "epoch": 2.868462913139962,
      "grad_norm": 0.3168770968914032,
      "learning_rate": 4.291164513089488e-06,
      "loss": 0.0206,
      "step": 1752780
    },
    {
      "epoch": 2.868495643578615,
      "grad_norm": 0.33960455656051636,
      "learning_rate": 4.291098620875972e-06,
      "loss": 0.0125,
      "step": 1752800
    },
    {
      "epoch": 2.8685283740172687,
      "grad_norm": 0.5173277854919434,
      "learning_rate": 4.2910327286624545e-06,
      "loss": 0.0102,
      "step": 1752820
    },
    {
      "epoch": 2.868561104455922,
      "grad_norm": 0.22317911684513092,
      "learning_rate": 4.290966836448937e-06,
      "loss": 0.0164,
      "step": 1752840
    },
    {
      "epoch": 2.8685938348945754,
      "grad_norm": 0.908031165599823,
      "learning_rate": 4.29090094423542e-06,
      "loss": 0.018,
      "step": 1752860
    },
    {
      "epoch": 2.8686265653332286,
      "grad_norm": 0.1584591120481491,
      "learning_rate": 4.290835052021903e-06,
      "loss": 0.0199,
      "step": 1752880
    },
    {
      "epoch": 2.8686592957718817,
      "grad_norm": 0.12951774895191193,
      "learning_rate": 4.2907691598083855e-06,
      "loss": 0.0146,
      "step": 1752900
    },
    {
      "epoch": 2.8686920262105353,
      "grad_norm": 0.1597808301448822,
      "learning_rate": 4.290703267594868e-06,
      "loss": 0.0092,
      "step": 1752920
    },
    {
      "epoch": 2.8687247566491885,
      "grad_norm": 0.2445484846830368,
      "learning_rate": 4.290637375381351e-06,
      "loss": 0.0156,
      "step": 1752940
    },
    {
      "epoch": 2.868757487087842,
      "grad_norm": 0.7574231624603271,
      "learning_rate": 4.2905714831678345e-06,
      "loss": 0.0179,
      "step": 1752960
    },
    {
      "epoch": 2.868790217526495,
      "grad_norm": 0.07922258228063583,
      "learning_rate": 4.290505590954317e-06,
      "loss": 0.0093,
      "step": 1752980
    },
    {
      "epoch": 2.868822947965149,
      "grad_norm": 0.6824073791503906,
      "learning_rate": 4.2904396987408e-06,
      "loss": 0.0154,
      "step": 1753000
    },
    {
      "epoch": 2.868855678403802,
      "grad_norm": 0.9069284200668335,
      "learning_rate": 4.290373806527284e-06,
      "loss": 0.0126,
      "step": 1753020
    },
    {
      "epoch": 2.868888408842455,
      "grad_norm": 2.282650947570801,
      "learning_rate": 4.290307914313766e-06,
      "loss": 0.0179,
      "step": 1753040
    },
    {
      "epoch": 2.8689211392811087,
      "grad_norm": 0.4378809928894043,
      "learning_rate": 4.290242022100249e-06,
      "loss": 0.0159,
      "step": 1753060
    },
    {
      "epoch": 2.868953869719762,
      "grad_norm": 0.1401422917842865,
      "learning_rate": 4.290176129886732e-06,
      "loss": 0.013,
      "step": 1753080
    },
    {
      "epoch": 2.8689866001584154,
      "grad_norm": 0.2073768824338913,
      "learning_rate": 4.2901102376732146e-06,
      "loss": 0.0128,
      "step": 1753100
    },
    {
      "epoch": 2.8690193305970686,
      "grad_norm": 0.45205986499786377,
      "learning_rate": 4.290044345459697e-06,
      "loss": 0.013,
      "step": 1753120
    },
    {
      "epoch": 2.869052061035722,
      "grad_norm": 0.6004285216331482,
      "learning_rate": 4.28997845324618e-06,
      "loss": 0.0132,
      "step": 1753140
    },
    {
      "epoch": 2.8690847914743753,
      "grad_norm": 0.40213704109191895,
      "learning_rate": 4.289912561032663e-06,
      "loss": 0.0098,
      "step": 1753160
    },
    {
      "epoch": 2.8691175219130285,
      "grad_norm": 0.05839996784925461,
      "learning_rate": 4.2898466688191455e-06,
      "loss": 0.0129,
      "step": 1753180
    },
    {
      "epoch": 2.869150252351682,
      "grad_norm": 1.2805856466293335,
      "learning_rate": 4.289780776605629e-06,
      "loss": 0.0171,
      "step": 1753200
    },
    {
      "epoch": 2.8691829827903352,
      "grad_norm": 0.21969574689865112,
      "learning_rate": 4.289714884392112e-06,
      "loss": 0.0148,
      "step": 1753220
    },
    {
      "epoch": 2.869215713228989,
      "grad_norm": 0.5171757936477661,
      "learning_rate": 4.289648992178595e-06,
      "loss": 0.0116,
      "step": 1753240
    },
    {
      "epoch": 2.869248443667642,
      "grad_norm": 0.4151156544685364,
      "learning_rate": 4.289583099965077e-06,
      "loss": 0.0114,
      "step": 1753260
    },
    {
      "epoch": 2.8692811741062956,
      "grad_norm": 0.13466982543468475,
      "learning_rate": 4.28951720775156e-06,
      "loss": 0.0133,
      "step": 1753280
    },
    {
      "epoch": 2.8693139045449487,
      "grad_norm": 0.5046431422233582,
      "learning_rate": 4.289451315538043e-06,
      "loss": 0.0155,
      "step": 1753300
    },
    {
      "epoch": 2.869346634983602,
      "grad_norm": 0.20857995748519897,
      "learning_rate": 4.289385423324526e-06,
      "loss": 0.0217,
      "step": 1753320
    },
    {
      "epoch": 2.8693793654222555,
      "grad_norm": 0.2974226474761963,
      "learning_rate": 4.289319531111009e-06,
      "loss": 0.0092,
      "step": 1753340
    },
    {
      "epoch": 2.8694120958609086,
      "grad_norm": 0.2958325445652008,
      "learning_rate": 4.289253638897492e-06,
      "loss": 0.0167,
      "step": 1753360
    },
    {
      "epoch": 2.869444826299562,
      "grad_norm": 0.5228776335716248,
      "learning_rate": 4.289187746683975e-06,
      "loss": 0.017,
      "step": 1753380
    },
    {
      "epoch": 2.8694775567382154,
      "grad_norm": 0.11025891453027725,
      "learning_rate": 4.289121854470457e-06,
      "loss": 0.0102,
      "step": 1753400
    },
    {
      "epoch": 2.869510287176869,
      "grad_norm": 0.5333991050720215,
      "learning_rate": 4.289055962256941e-06,
      "loss": 0.013,
      "step": 1753420
    },
    {
      "epoch": 2.869543017615522,
      "grad_norm": 0.5957240462303162,
      "learning_rate": 4.288990070043424e-06,
      "loss": 0.0179,
      "step": 1753440
    },
    {
      "epoch": 2.8695757480541753,
      "grad_norm": 0.11349517852067947,
      "learning_rate": 4.2889241778299064e-06,
      "loss": 0.0156,
      "step": 1753460
    },
    {
      "epoch": 2.869608478492829,
      "grad_norm": 0.7162151336669922,
      "learning_rate": 4.288858285616389e-06,
      "loss": 0.0133,
      "step": 1753480
    },
    {
      "epoch": 2.869641208931482,
      "grad_norm": 0.3141403794288635,
      "learning_rate": 4.288792393402872e-06,
      "loss": 0.0192,
      "step": 1753500
    },
    {
      "epoch": 2.8696739393701356,
      "grad_norm": 0.31406182050704956,
      "learning_rate": 4.288726501189355e-06,
      "loss": 0.0133,
      "step": 1753520
    },
    {
      "epoch": 2.8697066698087887,
      "grad_norm": 5.57948637008667,
      "learning_rate": 4.288660608975837e-06,
      "loss": 0.0143,
      "step": 1753540
    },
    {
      "epoch": 2.8697394002474423,
      "grad_norm": 0.24146541953086853,
      "learning_rate": 4.28859471676232e-06,
      "loss": 0.0122,
      "step": 1753560
    },
    {
      "epoch": 2.8697721306860955,
      "grad_norm": 0.2640784978866577,
      "learning_rate": 4.288528824548803e-06,
      "loss": 0.0121,
      "step": 1753580
    },
    {
      "epoch": 2.8698048611247486,
      "grad_norm": 0.9372009038925171,
      "learning_rate": 4.2884629323352865e-06,
      "loss": 0.0113,
      "step": 1753600
    },
    {
      "epoch": 2.8698375915634022,
      "grad_norm": 0.20907652378082275,
      "learning_rate": 4.288397040121769e-06,
      "loss": 0.0097,
      "step": 1753620
    },
    {
      "epoch": 2.8698703220020554,
      "grad_norm": 0.24376429617404938,
      "learning_rate": 4.288331147908252e-06,
      "loss": 0.0163,
      "step": 1753640
    },
    {
      "epoch": 2.8699030524407085,
      "grad_norm": 0.3873516023159027,
      "learning_rate": 4.2882652556947355e-06,
      "loss": 0.0124,
      "step": 1753660
    },
    {
      "epoch": 2.869935782879362,
      "grad_norm": 0.23085322976112366,
      "learning_rate": 4.288199363481218e-06,
      "loss": 0.0148,
      "step": 1753680
    },
    {
      "epoch": 2.8699685133180157,
      "grad_norm": 0.4790872037410736,
      "learning_rate": 4.288133471267701e-06,
      "loss": 0.0154,
      "step": 1753700
    },
    {
      "epoch": 2.870001243756669,
      "grad_norm": 0.7705032229423523,
      "learning_rate": 4.288067579054184e-06,
      "loss": 0.0185,
      "step": 1753720
    },
    {
      "epoch": 2.870033974195322,
      "grad_norm": 0.3314429223537445,
      "learning_rate": 4.2880016868406665e-06,
      "loss": 0.011,
      "step": 1753740
    },
    {
      "epoch": 2.8700667046339756,
      "grad_norm": 0.5769588947296143,
      "learning_rate": 4.287935794627149e-06,
      "loss": 0.0118,
      "step": 1753760
    },
    {
      "epoch": 2.8700994350726288,
      "grad_norm": 0.2289852648973465,
      "learning_rate": 4.287869902413632e-06,
      "loss": 0.0087,
      "step": 1753780
    },
    {
      "epoch": 2.870132165511282,
      "grad_norm": 0.11971055716276169,
      "learning_rate": 4.287804010200115e-06,
      "loss": 0.0135,
      "step": 1753800
    },
    {
      "epoch": 2.8701648959499355,
      "grad_norm": 0.30480334162712097,
      "learning_rate": 4.287738117986598e-06,
      "loss": 0.0138,
      "step": 1753820
    },
    {
      "epoch": 2.870197626388589,
      "grad_norm": 0.09883163869380951,
      "learning_rate": 4.287672225773081e-06,
      "loss": 0.0106,
      "step": 1753840
    },
    {
      "epoch": 2.8702303568272423,
      "grad_norm": 0.3265584409236908,
      "learning_rate": 4.287606333559564e-06,
      "loss": 0.011,
      "step": 1753860
    },
    {
      "epoch": 2.8702630872658954,
      "grad_norm": 0.281281054019928,
      "learning_rate": 4.2875404413460465e-06,
      "loss": 0.0144,
      "step": 1753880
    },
    {
      "epoch": 2.870295817704549,
      "grad_norm": 0.13965082168579102,
      "learning_rate": 4.287474549132529e-06,
      "loss": 0.0126,
      "step": 1753900
    },
    {
      "epoch": 2.870328548143202,
      "grad_norm": 0.06513190269470215,
      "learning_rate": 4.287408656919012e-06,
      "loss": 0.0111,
      "step": 1753920
    },
    {
      "epoch": 2.8703612785818553,
      "grad_norm": 0.2842828929424286,
      "learning_rate": 4.287342764705495e-06,
      "loss": 0.0122,
      "step": 1753940
    },
    {
      "epoch": 2.870394009020509,
      "grad_norm": 0.141410231590271,
      "learning_rate": 4.2872768724919775e-06,
      "loss": 0.0117,
      "step": 1753960
    },
    {
      "epoch": 2.8704267394591625,
      "grad_norm": 0.18087245523929596,
      "learning_rate": 4.28721098027846e-06,
      "loss": 0.0102,
      "step": 1753980
    },
    {
      "epoch": 2.8704594698978156,
      "grad_norm": 0.6088244318962097,
      "learning_rate": 4.287145088064944e-06,
      "loss": 0.0203,
      "step": 1754000
    },
    {
      "epoch": 2.870492200336469,
      "grad_norm": 0.2994316518306732,
      "learning_rate": 4.2870791958514265e-06,
      "loss": 0.0104,
      "step": 1754020
    },
    {
      "epoch": 2.8705249307751224,
      "grad_norm": 0.24519477784633636,
      "learning_rate": 4.287013303637909e-06,
      "loss": 0.0106,
      "step": 1754040
    },
    {
      "epoch": 2.8705576612137755,
      "grad_norm": 0.0919773131608963,
      "learning_rate": 4.286947411424393e-06,
      "loss": 0.0086,
      "step": 1754060
    },
    {
      "epoch": 2.8705903916524287,
      "grad_norm": 0.17962788045406342,
      "learning_rate": 4.286881519210876e-06,
      "loss": 0.0151,
      "step": 1754080
    },
    {
      "epoch": 2.8706231220910823,
      "grad_norm": 0.7398037314414978,
      "learning_rate": 4.286815626997358e-06,
      "loss": 0.0157,
      "step": 1754100
    },
    {
      "epoch": 2.870655852529736,
      "grad_norm": 0.11137916147708893,
      "learning_rate": 4.286749734783841e-06,
      "loss": 0.0127,
      "step": 1754120
    },
    {
      "epoch": 2.870688582968389,
      "grad_norm": 0.7684920430183411,
      "learning_rate": 4.286683842570324e-06,
      "loss": 0.0129,
      "step": 1754140
    },
    {
      "epoch": 2.870721313407042,
      "grad_norm": 0.1846413016319275,
      "learning_rate": 4.2866179503568066e-06,
      "loss": 0.0128,
      "step": 1754160
    },
    {
      "epoch": 2.8707540438456958,
      "grad_norm": 0.1783728003501892,
      "learning_rate": 4.286552058143289e-06,
      "loss": 0.0101,
      "step": 1754180
    },
    {
      "epoch": 2.870786774284349,
      "grad_norm": 0.3207547962665558,
      "learning_rate": 4.286486165929772e-06,
      "loss": 0.0146,
      "step": 1754200
    },
    {
      "epoch": 2.870819504723002,
      "grad_norm": 0.26333633065223694,
      "learning_rate": 4.286420273716256e-06,
      "loss": 0.0151,
      "step": 1754220
    },
    {
      "epoch": 2.8708522351616557,
      "grad_norm": 0.2475663721561432,
      "learning_rate": 4.286354381502738e-06,
      "loss": 0.012,
      "step": 1754240
    },
    {
      "epoch": 2.870884965600309,
      "grad_norm": 0.5387585163116455,
      "learning_rate": 4.286288489289221e-06,
      "loss": 0.016,
      "step": 1754260
    },
    {
      "epoch": 2.8709176960389624,
      "grad_norm": 0.4627273380756378,
      "learning_rate": 4.286222597075704e-06,
      "loss": 0.0146,
      "step": 1754280
    },
    {
      "epoch": 2.8709504264776156,
      "grad_norm": 0.2957872748374939,
      "learning_rate": 4.286156704862187e-06,
      "loss": 0.0138,
      "step": 1754300
    },
    {
      "epoch": 2.870983156916269,
      "grad_norm": 0.41875821352005005,
      "learning_rate": 4.286090812648669e-06,
      "loss": 0.0144,
      "step": 1754320
    },
    {
      "epoch": 2.8710158873549223,
      "grad_norm": 0.4438144564628601,
      "learning_rate": 4.286024920435152e-06,
      "loss": 0.0131,
      "step": 1754340
    },
    {
      "epoch": 2.8710486177935755,
      "grad_norm": 0.1646609902381897,
      "learning_rate": 4.285959028221636e-06,
      "loss": 0.0102,
      "step": 1754360
    },
    {
      "epoch": 2.871081348232229,
      "grad_norm": 0.10814585536718369,
      "learning_rate": 4.285893136008118e-06,
      "loss": 0.0142,
      "step": 1754380
    },
    {
      "epoch": 2.871114078670882,
      "grad_norm": 0.40782731771469116,
      "learning_rate": 4.285827243794601e-06,
      "loss": 0.0146,
      "step": 1754400
    },
    {
      "epoch": 2.871146809109536,
      "grad_norm": 0.9861856698989868,
      "learning_rate": 4.285761351581084e-06,
      "loss": 0.0112,
      "step": 1754420
    },
    {
      "epoch": 2.871179539548189,
      "grad_norm": 0.3706420361995697,
      "learning_rate": 4.2856954593675675e-06,
      "loss": 0.017,
      "step": 1754440
    },
    {
      "epoch": 2.8712122699868425,
      "grad_norm": 0.2660188674926758,
      "learning_rate": 4.28562956715405e-06,
      "loss": 0.0126,
      "step": 1754460
    },
    {
      "epoch": 2.8712450004254957,
      "grad_norm": 1.0348026752471924,
      "learning_rate": 4.285563674940533e-06,
      "loss": 0.0171,
      "step": 1754480
    },
    {
      "epoch": 2.871277730864149,
      "grad_norm": 0.9189220666885376,
      "learning_rate": 4.285497782727016e-06,
      "loss": 0.0145,
      "step": 1754500
    },
    {
      "epoch": 2.8713104613028024,
      "grad_norm": 0.32841646671295166,
      "learning_rate": 4.285431890513498e-06,
      "loss": 0.0136,
      "step": 1754520
    },
    {
      "epoch": 2.8713431917414556,
      "grad_norm": 0.14073015749454498,
      "learning_rate": 4.285365998299981e-06,
      "loss": 0.0095,
      "step": 1754540
    },
    {
      "epoch": 2.871375922180109,
      "grad_norm": 0.19404587149620056,
      "learning_rate": 4.285300106086464e-06,
      "loss": 0.0137,
      "step": 1754560
    },
    {
      "epoch": 2.8714086526187623,
      "grad_norm": 0.2554800808429718,
      "learning_rate": 4.285234213872947e-06,
      "loss": 0.012,
      "step": 1754580
    },
    {
      "epoch": 2.871441383057416,
      "grad_norm": 0.4767175018787384,
      "learning_rate": 4.285168321659429e-06,
      "loss": 0.0189,
      "step": 1754600
    },
    {
      "epoch": 2.871474113496069,
      "grad_norm": 0.3353717625141144,
      "learning_rate": 4.285102429445913e-06,
      "loss": 0.0165,
      "step": 1754620
    },
    {
      "epoch": 2.871506843934722,
      "grad_norm": 0.2818413972854614,
      "learning_rate": 4.285036537232396e-06,
      "loss": 0.0089,
      "step": 1754640
    },
    {
      "epoch": 2.871539574373376,
      "grad_norm": 0.38926517963409424,
      "learning_rate": 4.2849706450188784e-06,
      "loss": 0.0172,
      "step": 1754660
    },
    {
      "epoch": 2.871572304812029,
      "grad_norm": 0.9105210900306702,
      "learning_rate": 4.284904752805361e-06,
      "loss": 0.0177,
      "step": 1754680
    },
    {
      "epoch": 2.8716050352506826,
      "grad_norm": 0.49204203486442566,
      "learning_rate": 4.284838860591844e-06,
      "loss": 0.0139,
      "step": 1754700
    },
    {
      "epoch": 2.8716377656893357,
      "grad_norm": 0.4016994833946228,
      "learning_rate": 4.2847729683783275e-06,
      "loss": 0.0143,
      "step": 1754720
    },
    {
      "epoch": 2.8716704961279893,
      "grad_norm": 0.09755232185125351,
      "learning_rate": 4.28470707616481e-06,
      "loss": 0.015,
      "step": 1754740
    },
    {
      "epoch": 2.8717032265666425,
      "grad_norm": 0.25557997822761536,
      "learning_rate": 4.284641183951293e-06,
      "loss": 0.0203,
      "step": 1754760
    },
    {
      "epoch": 2.8717359570052956,
      "grad_norm": 0.6329014301300049,
      "learning_rate": 4.284575291737776e-06,
      "loss": 0.0116,
      "step": 1754780
    },
    {
      "epoch": 2.871768687443949,
      "grad_norm": 0.18348698318004608,
      "learning_rate": 4.2845093995242585e-06,
      "loss": 0.0141,
      "step": 1754800
    },
    {
      "epoch": 2.8718014178826023,
      "grad_norm": 0.19541403651237488,
      "learning_rate": 4.284443507310741e-06,
      "loss": 0.0093,
      "step": 1754820
    },
    {
      "epoch": 2.871834148321256,
      "grad_norm": 0.13160885870456696,
      "learning_rate": 4.284377615097225e-06,
      "loss": 0.0148,
      "step": 1754840
    },
    {
      "epoch": 2.871866878759909,
      "grad_norm": 0.5702201128005981,
      "learning_rate": 4.2843117228837075e-06,
      "loss": 0.0157,
      "step": 1754860
    },
    {
      "epoch": 2.8718996091985627,
      "grad_norm": 0.8593663573265076,
      "learning_rate": 4.28424583067019e-06,
      "loss": 0.0162,
      "step": 1754880
    },
    {
      "epoch": 2.871932339637216,
      "grad_norm": 0.09945593774318695,
      "learning_rate": 4.284179938456673e-06,
      "loss": 0.0125,
      "step": 1754900
    },
    {
      "epoch": 2.871965070075869,
      "grad_norm": 0.3630436062812805,
      "learning_rate": 4.284114046243156e-06,
      "loss": 0.0168,
      "step": 1754920
    },
    {
      "epoch": 2.8719978005145226,
      "grad_norm": 1.0295395851135254,
      "learning_rate": 4.2840481540296385e-06,
      "loss": 0.0072,
      "step": 1754940
    },
    {
      "epoch": 2.8720305309531757,
      "grad_norm": 0.36209699511528015,
      "learning_rate": 4.283982261816121e-06,
      "loss": 0.0103,
      "step": 1754960
    },
    {
      "epoch": 2.8720632613918293,
      "grad_norm": 0.19340232014656067,
      "learning_rate": 4.283916369602604e-06,
      "loss": 0.0123,
      "step": 1754980
    },
    {
      "epoch": 2.8720959918304825,
      "grad_norm": 0.6370640993118286,
      "learning_rate": 4.283850477389087e-06,
      "loss": 0.0126,
      "step": 1755000
    },
    {
      "epoch": 2.872128722269136,
      "grad_norm": 0.09703181684017181,
      "learning_rate": 4.28378458517557e-06,
      "loss": 0.0114,
      "step": 1755020
    },
    {
      "epoch": 2.872161452707789,
      "grad_norm": 0.21321476995944977,
      "learning_rate": 4.283718692962053e-06,
      "loss": 0.0172,
      "step": 1755040
    },
    {
      "epoch": 2.8721941831464424,
      "grad_norm": 0.9965019226074219,
      "learning_rate": 4.283652800748536e-06,
      "loss": 0.0141,
      "step": 1755060
    },
    {
      "epoch": 2.872226913585096,
      "grad_norm": 0.5143894553184509,
      "learning_rate": 4.283586908535019e-06,
      "loss": 0.0146,
      "step": 1755080
    },
    {
      "epoch": 2.872259644023749,
      "grad_norm": 0.41549861431121826,
      "learning_rate": 4.283521016321502e-06,
      "loss": 0.0119,
      "step": 1755100
    },
    {
      "epoch": 2.8722923744624027,
      "grad_norm": 0.4093700051307678,
      "learning_rate": 4.283455124107985e-06,
      "loss": 0.0153,
      "step": 1755120
    },
    {
      "epoch": 2.872325104901056,
      "grad_norm": 0.11805152893066406,
      "learning_rate": 4.283389231894468e-06,
      "loss": 0.0101,
      "step": 1755140
    },
    {
      "epoch": 2.8723578353397095,
      "grad_norm": 0.4478532075881958,
      "learning_rate": 4.28332333968095e-06,
      "loss": 0.0111,
      "step": 1755160
    },
    {
      "epoch": 2.8723905657783626,
      "grad_norm": 0.5163971781730652,
      "learning_rate": 4.283257447467433e-06,
      "loss": 0.0177,
      "step": 1755180
    },
    {
      "epoch": 2.8724232962170158,
      "grad_norm": 0.4382765293121338,
      "learning_rate": 4.283191555253916e-06,
      "loss": 0.0111,
      "step": 1755200
    },
    {
      "epoch": 2.8724560266556693,
      "grad_norm": 0.11844801157712936,
      "learning_rate": 4.2831256630403985e-06,
      "loss": 0.0092,
      "step": 1755220
    },
    {
      "epoch": 2.8724887570943225,
      "grad_norm": 0.41915038228034973,
      "learning_rate": 4.283059770826882e-06,
      "loss": 0.0133,
      "step": 1755240
    },
    {
      "epoch": 2.8725214875329756,
      "grad_norm": 0.21025361120700836,
      "learning_rate": 4.282993878613365e-06,
      "loss": 0.0134,
      "step": 1755260
    },
    {
      "epoch": 2.8725542179716292,
      "grad_norm": 0.3640115261077881,
      "learning_rate": 4.282927986399848e-06,
      "loss": 0.0186,
      "step": 1755280
    },
    {
      "epoch": 2.872586948410283,
      "grad_norm": 0.3685325086116791,
      "learning_rate": 4.28286209418633e-06,
      "loss": 0.0137,
      "step": 1755300
    },
    {
      "epoch": 2.872619678848936,
      "grad_norm": 0.19629833102226257,
      "learning_rate": 4.282796201972813e-06,
      "loss": 0.0096,
      "step": 1755320
    },
    {
      "epoch": 2.872652409287589,
      "grad_norm": 0.3687836825847626,
      "learning_rate": 4.282730309759296e-06,
      "loss": 0.0128,
      "step": 1755340
    },
    {
      "epoch": 2.8726851397262427,
      "grad_norm": 0.4829130172729492,
      "learning_rate": 4.2826644175457786e-06,
      "loss": 0.0113,
      "step": 1755360
    },
    {
      "epoch": 2.872717870164896,
      "grad_norm": 0.30416107177734375,
      "learning_rate": 4.282598525332261e-06,
      "loss": 0.0168,
      "step": 1755380
    },
    {
      "epoch": 2.872750600603549,
      "grad_norm": 0.9696740508079529,
      "learning_rate": 4.282532633118744e-06,
      "loss": 0.016,
      "step": 1755400
    },
    {
      "epoch": 2.8727833310422026,
      "grad_norm": 0.32140660285949707,
      "learning_rate": 4.282466740905228e-06,
      "loss": 0.0112,
      "step": 1755420
    },
    {
      "epoch": 2.872816061480856,
      "grad_norm": 0.1185232475399971,
      "learning_rate": 4.28240084869171e-06,
      "loss": 0.0146,
      "step": 1755440
    },
    {
      "epoch": 2.8728487919195094,
      "grad_norm": 0.35931167006492615,
      "learning_rate": 4.282334956478193e-06,
      "loss": 0.015,
      "step": 1755460
    },
    {
      "epoch": 2.8728815223581625,
      "grad_norm": 0.8116185665130615,
      "learning_rate": 4.282269064264677e-06,
      "loss": 0.0115,
      "step": 1755480
    },
    {
      "epoch": 2.872914252796816,
      "grad_norm": 0.4735698103904724,
      "learning_rate": 4.2822031720511594e-06,
      "loss": 0.015,
      "step": 1755500
    },
    {
      "epoch": 2.8729469832354693,
      "grad_norm": 0.3025301694869995,
      "learning_rate": 4.282137279837642e-06,
      "loss": 0.011,
      "step": 1755520
    },
    {
      "epoch": 2.8729797136741224,
      "grad_norm": 0.2786652743816376,
      "learning_rate": 4.282071387624125e-06,
      "loss": 0.0129,
      "step": 1755540
    },
    {
      "epoch": 2.873012444112776,
      "grad_norm": 0.16007384657859802,
      "learning_rate": 4.282005495410608e-06,
      "loss": 0.0134,
      "step": 1755560
    },
    {
      "epoch": 2.8730451745514296,
      "grad_norm": 0.1316327452659607,
      "learning_rate": 4.28193960319709e-06,
      "loss": 0.0095,
      "step": 1755580
    },
    {
      "epoch": 2.8730779049900828,
      "grad_norm": 0.16352494060993195,
      "learning_rate": 4.281873710983573e-06,
      "loss": 0.015,
      "step": 1755600
    },
    {
      "epoch": 2.873110635428736,
      "grad_norm": 0.6486978530883789,
      "learning_rate": 4.281807818770056e-06,
      "loss": 0.0156,
      "step": 1755620
    },
    {
      "epoch": 2.8731433658673895,
      "grad_norm": 0.39137911796569824,
      "learning_rate": 4.2817419265565395e-06,
      "loss": 0.0105,
      "step": 1755640
    },
    {
      "epoch": 2.8731760963060426,
      "grad_norm": 0.567296028137207,
      "learning_rate": 4.281676034343022e-06,
      "loss": 0.0144,
      "step": 1755660
    },
    {
      "epoch": 2.873208826744696,
      "grad_norm": 0.3354150354862213,
      "learning_rate": 4.281610142129505e-06,
      "loss": 0.0167,
      "step": 1755680
    },
    {
      "epoch": 2.8732415571833494,
      "grad_norm": 0.41721072793006897,
      "learning_rate": 4.281544249915988e-06,
      "loss": 0.0192,
      "step": 1755700
    },
    {
      "epoch": 2.873274287622003,
      "grad_norm": 0.35344648361206055,
      "learning_rate": 4.28147835770247e-06,
      "loss": 0.0102,
      "step": 1755720
    },
    {
      "epoch": 2.873307018060656,
      "grad_norm": 0.6204479932785034,
      "learning_rate": 4.281412465488953e-06,
      "loss": 0.0205,
      "step": 1755740
    },
    {
      "epoch": 2.8733397484993093,
      "grad_norm": 0.20160359144210815,
      "learning_rate": 4.281346573275436e-06,
      "loss": 0.0074,
      "step": 1755760
    },
    {
      "epoch": 2.873372478937963,
      "grad_norm": 0.16620491445064545,
      "learning_rate": 4.2812806810619195e-06,
      "loss": 0.0106,
      "step": 1755780
    },
    {
      "epoch": 2.873405209376616,
      "grad_norm": 0.26896941661834717,
      "learning_rate": 4.281214788848402e-06,
      "loss": 0.0101,
      "step": 1755800
    },
    {
      "epoch": 2.873437939815269,
      "grad_norm": 0.40743955969810486,
      "learning_rate": 4.281148896634885e-06,
      "loss": 0.0141,
      "step": 1755820
    },
    {
      "epoch": 2.8734706702539228,
      "grad_norm": 0.4568824768066406,
      "learning_rate": 4.281083004421368e-06,
      "loss": 0.017,
      "step": 1755840
    },
    {
      "epoch": 2.873503400692576,
      "grad_norm": 0.1935386210680008,
      "learning_rate": 4.281017112207851e-06,
      "loss": 0.0156,
      "step": 1755860
    },
    {
      "epoch": 2.8735361311312295,
      "grad_norm": 0.0380559116601944,
      "learning_rate": 4.280951219994334e-06,
      "loss": 0.0133,
      "step": 1755880
    },
    {
      "epoch": 2.8735688615698827,
      "grad_norm": 0.11790508776903152,
      "learning_rate": 4.280885327780817e-06,
      "loss": 0.0108,
      "step": 1755900
    },
    {
      "epoch": 2.8736015920085363,
      "grad_norm": 0.20449121296405792,
      "learning_rate": 4.2808194355672995e-06,
      "loss": 0.0152,
      "step": 1755920
    },
    {
      "epoch": 2.8736343224471894,
      "grad_norm": 0.2753660976886749,
      "learning_rate": 4.280753543353782e-06,
      "loss": 0.0196,
      "step": 1755940
    },
    {
      "epoch": 2.8736670528858426,
      "grad_norm": 0.30861467123031616,
      "learning_rate": 4.280687651140265e-06,
      "loss": 0.0173,
      "step": 1755960
    },
    {
      "epoch": 2.873699783324496,
      "grad_norm": 0.9906166195869446,
      "learning_rate": 4.280621758926748e-06,
      "loss": 0.0169,
      "step": 1755980
    },
    {
      "epoch": 2.8737325137631493,
      "grad_norm": 0.5873088240623474,
      "learning_rate": 4.2805558667132305e-06,
      "loss": 0.0187,
      "step": 1756000
    },
    {
      "epoch": 2.873765244201803,
      "grad_norm": 0.5147708058357239,
      "learning_rate": 4.280489974499713e-06,
      "loss": 0.0137,
      "step": 1756020
    },
    {
      "epoch": 2.873797974640456,
      "grad_norm": 0.4309082329273224,
      "learning_rate": 4.280424082286197e-06,
      "loss": 0.0142,
      "step": 1756040
    },
    {
      "epoch": 2.8738307050791096,
      "grad_norm": 5.122408866882324,
      "learning_rate": 4.2803581900726795e-06,
      "loss": 0.0117,
      "step": 1756060
    },
    {
      "epoch": 2.873863435517763,
      "grad_norm": 0.4125305116176605,
      "learning_rate": 4.280292297859162e-06,
      "loss": 0.0171,
      "step": 1756080
    },
    {
      "epoch": 2.873896165956416,
      "grad_norm": 0.2732316553592682,
      "learning_rate": 4.280226405645645e-06,
      "loss": 0.0136,
      "step": 1756100
    },
    {
      "epoch": 2.8739288963950695,
      "grad_norm": 0.25480061769485474,
      "learning_rate": 4.280160513432129e-06,
      "loss": 0.0161,
      "step": 1756120
    },
    {
      "epoch": 2.8739616268337227,
      "grad_norm": 0.8414437174797058,
      "learning_rate": 4.280094621218611e-06,
      "loss": 0.014,
      "step": 1756140
    },
    {
      "epoch": 2.8739943572723763,
      "grad_norm": 0.08894894272089005,
      "learning_rate": 4.280028729005094e-06,
      "loss": 0.0116,
      "step": 1756160
    },
    {
      "epoch": 2.8740270877110294,
      "grad_norm": 0.4115968942642212,
      "learning_rate": 4.279962836791577e-06,
      "loss": 0.0155,
      "step": 1756180
    },
    {
      "epoch": 2.874059818149683,
      "grad_norm": 0.14394481480121613,
      "learning_rate": 4.2798969445780596e-06,
      "loss": 0.0172,
      "step": 1756200
    },
    {
      "epoch": 2.874092548588336,
      "grad_norm": 0.562164843082428,
      "learning_rate": 4.279831052364542e-06,
      "loss": 0.0225,
      "step": 1756220
    },
    {
      "epoch": 2.8741252790269893,
      "grad_norm": 0.3000716269016266,
      "learning_rate": 4.279765160151025e-06,
      "loss": 0.0123,
      "step": 1756240
    },
    {
      "epoch": 2.874158009465643,
      "grad_norm": 0.1851162612438202,
      "learning_rate": 4.279699267937509e-06,
      "loss": 0.007,
      "step": 1756260
    },
    {
      "epoch": 2.874190739904296,
      "grad_norm": 0.774784505367279,
      "learning_rate": 4.279633375723991e-06,
      "loss": 0.0173,
      "step": 1756280
    },
    {
      "epoch": 2.8742234703429497,
      "grad_norm": 0.9087032675743103,
      "learning_rate": 4.279567483510474e-06,
      "loss": 0.0208,
      "step": 1756300
    },
    {
      "epoch": 2.874256200781603,
      "grad_norm": 1.5352530479431152,
      "learning_rate": 4.279501591296957e-06,
      "loss": 0.0155,
      "step": 1756320
    },
    {
      "epoch": 2.8742889312202564,
      "grad_norm": 0.0959131270647049,
      "learning_rate": 4.27943569908344e-06,
      "loss": 0.015,
      "step": 1756340
    },
    {
      "epoch": 2.8743216616589096,
      "grad_norm": 0.6970568895339966,
      "learning_rate": 4.279369806869922e-06,
      "loss": 0.0139,
      "step": 1756360
    },
    {
      "epoch": 2.8743543920975627,
      "grad_norm": 0.10753882676362991,
      "learning_rate": 4.279303914656405e-06,
      "loss": 0.0135,
      "step": 1756380
    },
    {
      "epoch": 2.8743871225362163,
      "grad_norm": 0.2404145747423172,
      "learning_rate": 4.279238022442888e-06,
      "loss": 0.0134,
      "step": 1756400
    },
    {
      "epoch": 2.8744198529748695,
      "grad_norm": 0.407488077878952,
      "learning_rate": 4.2791721302293705e-06,
      "loss": 0.0135,
      "step": 1756420
    },
    {
      "epoch": 2.874452583413523,
      "grad_norm": 0.27174440026283264,
      "learning_rate": 4.279106238015854e-06,
      "loss": 0.0117,
      "step": 1756440
    },
    {
      "epoch": 2.874485313852176,
      "grad_norm": 0.3409385085105896,
      "learning_rate": 4.279040345802337e-06,
      "loss": 0.0196,
      "step": 1756460
    },
    {
      "epoch": 2.87451804429083,
      "grad_norm": 0.15321514010429382,
      "learning_rate": 4.27897445358882e-06,
      "loss": 0.0109,
      "step": 1756480
    },
    {
      "epoch": 2.874550774729483,
      "grad_norm": 0.4370522201061249,
      "learning_rate": 4.278908561375303e-06,
      "loss": 0.0122,
      "step": 1756500
    },
    {
      "epoch": 2.874583505168136,
      "grad_norm": 0.3634394705295563,
      "learning_rate": 4.278842669161786e-06,
      "loss": 0.0123,
      "step": 1756520
    },
    {
      "epoch": 2.8746162356067897,
      "grad_norm": 0.6508448719978333,
      "learning_rate": 4.278776776948269e-06,
      "loss": 0.0163,
      "step": 1756540
    },
    {
      "epoch": 2.874648966045443,
      "grad_norm": 0.43300661444664,
      "learning_rate": 4.278710884734751e-06,
      "loss": 0.0101,
      "step": 1756560
    },
    {
      "epoch": 2.8746816964840964,
      "grad_norm": 0.22253596782684326,
      "learning_rate": 4.278644992521234e-06,
      "loss": 0.0164,
      "step": 1756580
    },
    {
      "epoch": 2.8747144269227496,
      "grad_norm": 0.46478399634361267,
      "learning_rate": 4.278579100307717e-06,
      "loss": 0.0149,
      "step": 1756600
    },
    {
      "epoch": 2.874747157361403,
      "grad_norm": 1.3973468542099,
      "learning_rate": 4.2785132080942e-06,
      "loss": 0.0133,
      "step": 1756620
    },
    {
      "epoch": 2.8747798878000563,
      "grad_norm": 0.1994890719652176,
      "learning_rate": 4.278447315880682e-06,
      "loss": 0.0143,
      "step": 1756640
    },
    {
      "epoch": 2.8748126182387095,
      "grad_norm": 0.4298940896987915,
      "learning_rate": 4.278381423667166e-06,
      "loss": 0.0183,
      "step": 1756660
    },
    {
      "epoch": 2.874845348677363,
      "grad_norm": 0.5072382688522339,
      "learning_rate": 4.278315531453649e-06,
      "loss": 0.0132,
      "step": 1756680
    },
    {
      "epoch": 2.8748780791160162,
      "grad_norm": 0.22654737532138824,
      "learning_rate": 4.2782496392401314e-06,
      "loss": 0.0166,
      "step": 1756700
    },
    {
      "epoch": 2.8749108095546694,
      "grad_norm": 0.2354884296655655,
      "learning_rate": 4.278183747026614e-06,
      "loss": 0.015,
      "step": 1756720
    },
    {
      "epoch": 2.874943539993323,
      "grad_norm": 0.24436119198799133,
      "learning_rate": 4.278117854813097e-06,
      "loss": 0.0107,
      "step": 1756740
    },
    {
      "epoch": 2.8749762704319766,
      "grad_norm": 1.4375076293945312,
      "learning_rate": 4.27805196259958e-06,
      "loss": 0.0089,
      "step": 1756760
    },
    {
      "epoch": 2.8750090008706297,
      "grad_norm": 0.18379159271717072,
      "learning_rate": 4.277986070386062e-06,
      "loss": 0.0139,
      "step": 1756780
    },
    {
      "epoch": 2.875041731309283,
      "grad_norm": 0.40400955080986023,
      "learning_rate": 4.277920178172545e-06,
      "loss": 0.015,
      "step": 1756800
    },
    {
      "epoch": 2.8750744617479365,
      "grad_norm": 0.23263908922672272,
      "learning_rate": 4.277854285959028e-06,
      "loss": 0.0118,
      "step": 1756820
    },
    {
      "epoch": 2.8751071921865896,
      "grad_norm": 0.3621589243412018,
      "learning_rate": 4.2777883937455115e-06,
      "loss": 0.0112,
      "step": 1756840
    },
    {
      "epoch": 2.8751399226252428,
      "grad_norm": 0.43911948800086975,
      "learning_rate": 4.277722501531994e-06,
      "loss": 0.0143,
      "step": 1756860
    },
    {
      "epoch": 2.8751726530638964,
      "grad_norm": 0.18942829966545105,
      "learning_rate": 4.277656609318477e-06,
      "loss": 0.0174,
      "step": 1756880
    },
    {
      "epoch": 2.87520538350255,
      "grad_norm": 0.3293299376964569,
      "learning_rate": 4.2775907171049605e-06,
      "loss": 0.0149,
      "step": 1756900
    },
    {
      "epoch": 2.875238113941203,
      "grad_norm": 0.21436123549938202,
      "learning_rate": 4.277524824891443e-06,
      "loss": 0.0151,
      "step": 1756920
    },
    {
      "epoch": 2.8752708443798562,
      "grad_norm": 0.1591874212026596,
      "learning_rate": 4.277458932677926e-06,
      "loss": 0.0161,
      "step": 1756940
    },
    {
      "epoch": 2.87530357481851,
      "grad_norm": 0.3412478566169739,
      "learning_rate": 4.277393040464409e-06,
      "loss": 0.0075,
      "step": 1756960
    },
    {
      "epoch": 2.875336305257163,
      "grad_norm": 0.0804375633597374,
      "learning_rate": 4.2773271482508915e-06,
      "loss": 0.0114,
      "step": 1756980
    },
    {
      "epoch": 2.875369035695816,
      "grad_norm": 0.6865289807319641,
      "learning_rate": 4.277261256037374e-06,
      "loss": 0.0113,
      "step": 1757000
    },
    {
      "epoch": 2.8754017661344697,
      "grad_norm": 0.41286927461624146,
      "learning_rate": 4.277195363823857e-06,
      "loss": 0.012,
      "step": 1757020
    },
    {
      "epoch": 2.8754344965731233,
      "grad_norm": 0.2558238208293915,
      "learning_rate": 4.27712947161034e-06,
      "loss": 0.0077,
      "step": 1757040
    },
    {
      "epoch": 2.8754672270117765,
      "grad_norm": 0.7443094253540039,
      "learning_rate": 4.277063579396823e-06,
      "loss": 0.0184,
      "step": 1757060
    },
    {
      "epoch": 2.8754999574504296,
      "grad_norm": 0.08608436584472656,
      "learning_rate": 4.276997687183306e-06,
      "loss": 0.0139,
      "step": 1757080
    },
    {
      "epoch": 2.8755326878890832,
      "grad_norm": 0.24819958209991455,
      "learning_rate": 4.276931794969789e-06,
      "loss": 0.0138,
      "step": 1757100
    },
    {
      "epoch": 2.8755654183277364,
      "grad_norm": 0.5529855489730835,
      "learning_rate": 4.2768659027562715e-06,
      "loss": 0.0109,
      "step": 1757120
    },
    {
      "epoch": 2.8755981487663895,
      "grad_norm": 0.16087958216667175,
      "learning_rate": 4.276800010542754e-06,
      "loss": 0.0099,
      "step": 1757140
    },
    {
      "epoch": 2.875630879205043,
      "grad_norm": 0.23295089602470398,
      "learning_rate": 4.276734118329237e-06,
      "loss": 0.0105,
      "step": 1757160
    },
    {
      "epoch": 2.8756636096436967,
      "grad_norm": 0.49153319001197815,
      "learning_rate": 4.276668226115721e-06,
      "loss": 0.017,
      "step": 1757180
    },
    {
      "epoch": 2.87569634008235,
      "grad_norm": 0.13920612633228302,
      "learning_rate": 4.276602333902203e-06,
      "loss": 0.0109,
      "step": 1757200
    },
    {
      "epoch": 2.875729070521003,
      "grad_norm": 0.22458812594413757,
      "learning_rate": 4.276536441688686e-06,
      "loss": 0.016,
      "step": 1757220
    },
    {
      "epoch": 2.8757618009596566,
      "grad_norm": 0.17928291857242584,
      "learning_rate": 4.276470549475169e-06,
      "loss": 0.0148,
      "step": 1757240
    },
    {
      "epoch": 2.8757945313983098,
      "grad_norm": 0.12211082875728607,
      "learning_rate": 4.2764046572616515e-06,
      "loss": 0.0162,
      "step": 1757260
    },
    {
      "epoch": 2.875827261836963,
      "grad_norm": 0.19146491587162018,
      "learning_rate": 4.276338765048135e-06,
      "loss": 0.0107,
      "step": 1757280
    },
    {
      "epoch": 2.8758599922756165,
      "grad_norm": 2.9475364685058594,
      "learning_rate": 4.276272872834618e-06,
      "loss": 0.0199,
      "step": 1757300
    },
    {
      "epoch": 2.8758927227142697,
      "grad_norm": 0.10620798170566559,
      "learning_rate": 4.276206980621101e-06,
      "loss": 0.0138,
      "step": 1757320
    },
    {
      "epoch": 2.8759254531529233,
      "grad_norm": 0.5410826206207275,
      "learning_rate": 4.276141088407583e-06,
      "loss": 0.0142,
      "step": 1757340
    },
    {
      "epoch": 2.8759581835915764,
      "grad_norm": 0.2880907356739044,
      "learning_rate": 4.276075196194066e-06,
      "loss": 0.0091,
      "step": 1757360
    },
    {
      "epoch": 2.87599091403023,
      "grad_norm": 0.1947239339351654,
      "learning_rate": 4.276009303980549e-06,
      "loss": 0.0129,
      "step": 1757380
    },
    {
      "epoch": 2.876023644468883,
      "grad_norm": 0.3174506723880768,
      "learning_rate": 4.2759434117670316e-06,
      "loss": 0.0164,
      "step": 1757400
    },
    {
      "epoch": 2.8760563749075363,
      "grad_norm": 0.865244448184967,
      "learning_rate": 4.275877519553514e-06,
      "loss": 0.0134,
      "step": 1757420
    },
    {
      "epoch": 2.87608910534619,
      "grad_norm": 0.2928448021411896,
      "learning_rate": 4.275811627339997e-06,
      "loss": 0.0131,
      "step": 1757440
    },
    {
      "epoch": 2.876121835784843,
      "grad_norm": 0.5758584141731262,
      "learning_rate": 4.275745735126481e-06,
      "loss": 0.0106,
      "step": 1757460
    },
    {
      "epoch": 2.8761545662234966,
      "grad_norm": 1.2918962240219116,
      "learning_rate": 4.275679842912963e-06,
      "loss": 0.0182,
      "step": 1757480
    },
    {
      "epoch": 2.87618729666215,
      "grad_norm": 0.07947175204753876,
      "learning_rate": 4.275613950699446e-06,
      "loss": 0.0077,
      "step": 1757500
    },
    {
      "epoch": 2.8762200271008034,
      "grad_norm": 0.1674017608165741,
      "learning_rate": 4.275548058485929e-06,
      "loss": 0.0255,
      "step": 1757520
    },
    {
      "epoch": 2.8762527575394565,
      "grad_norm": 0.1190241128206253,
      "learning_rate": 4.2754821662724124e-06,
      "loss": 0.0132,
      "step": 1757540
    },
    {
      "epoch": 2.8762854879781097,
      "grad_norm": 0.1932767778635025,
      "learning_rate": 4.275416274058895e-06,
      "loss": 0.0166,
      "step": 1757560
    },
    {
      "epoch": 2.8763182184167633,
      "grad_norm": 0.2865805923938751,
      "learning_rate": 4.275350381845378e-06,
      "loss": 0.0116,
      "step": 1757580
    },
    {
      "epoch": 2.8763509488554164,
      "grad_norm": 0.4722746014595032,
      "learning_rate": 4.275284489631861e-06,
      "loss": 0.0157,
      "step": 1757600
    },
    {
      "epoch": 2.87638367929407,
      "grad_norm": 0.3859331011772156,
      "learning_rate": 4.275218597418343e-06,
      "loss": 0.0126,
      "step": 1757620
    },
    {
      "epoch": 2.876416409732723,
      "grad_norm": 0.10517890006303787,
      "learning_rate": 4.275152705204826e-06,
      "loss": 0.0137,
      "step": 1757640
    },
    {
      "epoch": 2.8764491401713768,
      "grad_norm": 0.20381496846675873,
      "learning_rate": 4.275086812991309e-06,
      "loss": 0.0101,
      "step": 1757660
    },
    {
      "epoch": 2.87648187061003,
      "grad_norm": 0.41621458530426025,
      "learning_rate": 4.2750209207777925e-06,
      "loss": 0.0188,
      "step": 1757680
    },
    {
      "epoch": 2.876514601048683,
      "grad_norm": 0.8641904592514038,
      "learning_rate": 4.274955028564275e-06,
      "loss": 0.0143,
      "step": 1757700
    },
    {
      "epoch": 2.8765473314873367,
      "grad_norm": 0.16268914937973022,
      "learning_rate": 4.274889136350758e-06,
      "loss": 0.0122,
      "step": 1757720
    },
    {
      "epoch": 2.87658006192599,
      "grad_norm": 0.26165908575057983,
      "learning_rate": 4.274823244137241e-06,
      "loss": 0.012,
      "step": 1757740
    },
    {
      "epoch": 2.8766127923646434,
      "grad_norm": 0.509952962398529,
      "learning_rate": 4.2747573519237234e-06,
      "loss": 0.0175,
      "step": 1757760
    },
    {
      "epoch": 2.8766455228032966,
      "grad_norm": 0.3586522936820984,
      "learning_rate": 4.274691459710206e-06,
      "loss": 0.0153,
      "step": 1757780
    },
    {
      "epoch": 2.87667825324195,
      "grad_norm": 0.1566343754529953,
      "learning_rate": 4.274625567496689e-06,
      "loss": 0.015,
      "step": 1757800
    },
    {
      "epoch": 2.8767109836806033,
      "grad_norm": 0.39853227138519287,
      "learning_rate": 4.274559675283172e-06,
      "loss": 0.0166,
      "step": 1757820
    },
    {
      "epoch": 2.8767437141192564,
      "grad_norm": 0.5547950863838196,
      "learning_rate": 4.274493783069654e-06,
      "loss": 0.0128,
      "step": 1757840
    },
    {
      "epoch": 2.87677644455791,
      "grad_norm": 0.3142944872379303,
      "learning_rate": 4.274427890856138e-06,
      "loss": 0.0161,
      "step": 1757860
    },
    {
      "epoch": 2.876809174996563,
      "grad_norm": 0.16568759083747864,
      "learning_rate": 4.274361998642621e-06,
      "loss": 0.0104,
      "step": 1757880
    },
    {
      "epoch": 2.876841905435217,
      "grad_norm": 0.688011884689331,
      "learning_rate": 4.2742961064291035e-06,
      "loss": 0.0165,
      "step": 1757900
    },
    {
      "epoch": 2.87687463587387,
      "grad_norm": 0.3298836648464203,
      "learning_rate": 4.274230214215587e-06,
      "loss": 0.0123,
      "step": 1757920
    },
    {
      "epoch": 2.8769073663125235,
      "grad_norm": 0.15988436341285706,
      "learning_rate": 4.27416432200207e-06,
      "loss": 0.0114,
      "step": 1757940
    },
    {
      "epoch": 2.8769400967511767,
      "grad_norm": 0.6786752343177795,
      "learning_rate": 4.2740984297885525e-06,
      "loss": 0.0108,
      "step": 1757960
    },
    {
      "epoch": 2.87697282718983,
      "grad_norm": 0.19419780373573303,
      "learning_rate": 4.274032537575035e-06,
      "loss": 0.0144,
      "step": 1757980
    },
    {
      "epoch": 2.8770055576284834,
      "grad_norm": 0.36928483843803406,
      "learning_rate": 4.273966645361518e-06,
      "loss": 0.01,
      "step": 1758000
    },
    {
      "epoch": 2.8770382880671366,
      "grad_norm": 0.10184748470783234,
      "learning_rate": 4.273900753148001e-06,
      "loss": 0.0165,
      "step": 1758020
    },
    {
      "epoch": 2.87707101850579,
      "grad_norm": 0.11820328235626221,
      "learning_rate": 4.2738348609344835e-06,
      "loss": 0.0159,
      "step": 1758040
    },
    {
      "epoch": 2.8771037489444433,
      "grad_norm": 0.10992974787950516,
      "learning_rate": 4.273768968720966e-06,
      "loss": 0.0189,
      "step": 1758060
    },
    {
      "epoch": 2.877136479383097,
      "grad_norm": 0.4092489182949066,
      "learning_rate": 4.27370307650745e-06,
      "loss": 0.013,
      "step": 1758080
    },
    {
      "epoch": 2.87716920982175,
      "grad_norm": 0.3579041659832001,
      "learning_rate": 4.2736371842939325e-06,
      "loss": 0.0161,
      "step": 1758100
    },
    {
      "epoch": 2.877201940260403,
      "grad_norm": 0.18584023416042328,
      "learning_rate": 4.273571292080415e-06,
      "loss": 0.0153,
      "step": 1758120
    },
    {
      "epoch": 2.877234670699057,
      "grad_norm": 0.5388925075531006,
      "learning_rate": 4.273505399866898e-06,
      "loss": 0.0135,
      "step": 1758140
    },
    {
      "epoch": 2.87726740113771,
      "grad_norm": 0.5249854326248169,
      "learning_rate": 4.273439507653381e-06,
      "loss": 0.0155,
      "step": 1758160
    },
    {
      "epoch": 2.8773001315763636,
      "grad_norm": 0.2719552516937256,
      "learning_rate": 4.2733736154398635e-06,
      "loss": 0.0104,
      "step": 1758180
    },
    {
      "epoch": 2.8773328620150167,
      "grad_norm": 0.3482779562473297,
      "learning_rate": 4.273307723226346e-06,
      "loss": 0.0101,
      "step": 1758200
    },
    {
      "epoch": 2.8773655924536703,
      "grad_norm": 0.22760286927223206,
      "learning_rate": 4.273241831012829e-06,
      "loss": 0.0141,
      "step": 1758220
    },
    {
      "epoch": 2.8773983228923234,
      "grad_norm": 0.35756629705429077,
      "learning_rate": 4.2731759387993126e-06,
      "loss": 0.0152,
      "step": 1758240
    },
    {
      "epoch": 2.8774310533309766,
      "grad_norm": 0.37027379870414734,
      "learning_rate": 4.273110046585795e-06,
      "loss": 0.0123,
      "step": 1758260
    },
    {
      "epoch": 2.87746378376963,
      "grad_norm": 0.32028183341026306,
      "learning_rate": 4.273044154372278e-06,
      "loss": 0.0114,
      "step": 1758280
    },
    {
      "epoch": 2.8774965142082833,
      "grad_norm": 0.7179794311523438,
      "learning_rate": 4.272978262158762e-06,
      "loss": 0.0201,
      "step": 1758300
    },
    {
      "epoch": 2.8775292446469365,
      "grad_norm": 1.1923110485076904,
      "learning_rate": 4.272912369945244e-06,
      "loss": 0.0189,
      "step": 1758320
    },
    {
      "epoch": 2.87756197508559,
      "grad_norm": 0.34177127480506897,
      "learning_rate": 4.272846477731727e-06,
      "loss": 0.0108,
      "step": 1758340
    },
    {
      "epoch": 2.8775947055242437,
      "grad_norm": 0.31075993180274963,
      "learning_rate": 4.27278058551821e-06,
      "loss": 0.0107,
      "step": 1758360
    },
    {
      "epoch": 2.877627435962897,
      "grad_norm": 0.606245756149292,
      "learning_rate": 4.272714693304693e-06,
      "loss": 0.0182,
      "step": 1758380
    },
    {
      "epoch": 2.87766016640155,
      "grad_norm": 0.4006141722202301,
      "learning_rate": 4.272648801091175e-06,
      "loss": 0.0148,
      "step": 1758400
    },
    {
      "epoch": 2.8776928968402036,
      "grad_norm": 0.8998698592185974,
      "learning_rate": 4.272582908877658e-06,
      "loss": 0.0192,
      "step": 1758420
    },
    {
      "epoch": 2.8777256272788567,
      "grad_norm": 0.2947182059288025,
      "learning_rate": 4.272517016664141e-06,
      "loss": 0.0149,
      "step": 1758440
    },
    {
      "epoch": 2.87775835771751,
      "grad_norm": 0.1843731552362442,
      "learning_rate": 4.2724511244506236e-06,
      "loss": 0.0138,
      "step": 1758460
    },
    {
      "epoch": 2.8777910881561635,
      "grad_norm": 0.2881334125995636,
      "learning_rate": 4.272385232237107e-06,
      "loss": 0.0138,
      "step": 1758480
    },
    {
      "epoch": 2.877823818594817,
      "grad_norm": 0.4018968939781189,
      "learning_rate": 4.27231934002359e-06,
      "loss": 0.0162,
      "step": 1758500
    },
    {
      "epoch": 2.87785654903347,
      "grad_norm": 0.2528289556503296,
      "learning_rate": 4.272253447810073e-06,
      "loss": 0.0069,
      "step": 1758520
    },
    {
      "epoch": 2.8778892794721234,
      "grad_norm": 0.49405232071876526,
      "learning_rate": 4.272187555596555e-06,
      "loss": 0.014,
      "step": 1758540
    },
    {
      "epoch": 2.877922009910777,
      "grad_norm": 0.1105232983827591,
      "learning_rate": 4.272121663383038e-06,
      "loss": 0.0114,
      "step": 1758560
    },
    {
      "epoch": 2.87795474034943,
      "grad_norm": 0.6000370383262634,
      "learning_rate": 4.272055771169521e-06,
      "loss": 0.0117,
      "step": 1758580
    },
    {
      "epoch": 2.8779874707880833,
      "grad_norm": 0.3857424855232239,
      "learning_rate": 4.2719898789560044e-06,
      "loss": 0.0111,
      "step": 1758600
    },
    {
      "epoch": 2.878020201226737,
      "grad_norm": 0.1975332498550415,
      "learning_rate": 4.271923986742487e-06,
      "loss": 0.0129,
      "step": 1758620
    },
    {
      "epoch": 2.8780529316653904,
      "grad_norm": 0.9172144532203674,
      "learning_rate": 4.27185809452897e-06,
      "loss": 0.012,
      "step": 1758640
    },
    {
      "epoch": 2.8780856621040436,
      "grad_norm": 0.12016566097736359,
      "learning_rate": 4.271792202315453e-06,
      "loss": 0.0179,
      "step": 1758660
    },
    {
      "epoch": 2.8781183925426967,
      "grad_norm": 0.13757479190826416,
      "learning_rate": 4.271726310101935e-06,
      "loss": 0.0168,
      "step": 1758680
    },
    {
      "epoch": 2.8781511229813503,
      "grad_norm": 0.42374828457832336,
      "learning_rate": 4.271660417888419e-06,
      "loss": 0.0153,
      "step": 1758700
    },
    {
      "epoch": 2.8781838534200035,
      "grad_norm": 0.06544796377420425,
      "learning_rate": 4.271594525674902e-06,
      "loss": 0.0068,
      "step": 1758720
    },
    {
      "epoch": 2.8782165838586566,
      "grad_norm": 0.3536730110645294,
      "learning_rate": 4.2715286334613845e-06,
      "loss": 0.0135,
      "step": 1758740
    },
    {
      "epoch": 2.8782493142973102,
      "grad_norm": 0.45524460077285767,
      "learning_rate": 4.271462741247867e-06,
      "loss": 0.015,
      "step": 1758760
    },
    {
      "epoch": 2.8782820447359634,
      "grad_norm": 0.29341670870780945,
      "learning_rate": 4.27139684903435e-06,
      "loss": 0.0105,
      "step": 1758780
    },
    {
      "epoch": 2.878314775174617,
      "grad_norm": 0.6394863724708557,
      "learning_rate": 4.271330956820833e-06,
      "loss": 0.009,
      "step": 1758800
    },
    {
      "epoch": 2.87834750561327,
      "grad_norm": 1.0702711343765259,
      "learning_rate": 4.271265064607315e-06,
      "loss": 0.0152,
      "step": 1758820
    },
    {
      "epoch": 2.8783802360519237,
      "grad_norm": 0.5036845803260803,
      "learning_rate": 4.271199172393798e-06,
      "loss": 0.0142,
      "step": 1758840
    },
    {
      "epoch": 2.878412966490577,
      "grad_norm": 0.39642229676246643,
      "learning_rate": 4.271133280180281e-06,
      "loss": 0.0114,
      "step": 1758860
    },
    {
      "epoch": 2.87844569692923,
      "grad_norm": 0.2516120672225952,
      "learning_rate": 4.2710673879667645e-06,
      "loss": 0.0204,
      "step": 1758880
    },
    {
      "epoch": 2.8784784273678836,
      "grad_norm": 1.2225600481033325,
      "learning_rate": 4.271001495753247e-06,
      "loss": 0.015,
      "step": 1758900
    },
    {
      "epoch": 2.8785111578065368,
      "grad_norm": 0.614446759223938,
      "learning_rate": 4.27093560353973e-06,
      "loss": 0.0112,
      "step": 1758920
    },
    {
      "epoch": 2.8785438882451904,
      "grad_norm": 0.3737216293811798,
      "learning_rate": 4.2708697113262135e-06,
      "loss": 0.0104,
      "step": 1758940
    },
    {
      "epoch": 2.8785766186838435,
      "grad_norm": 0.6664482951164246,
      "learning_rate": 4.270803819112696e-06,
      "loss": 0.0152,
      "step": 1758960
    },
    {
      "epoch": 2.878609349122497,
      "grad_norm": 0.3264870345592499,
      "learning_rate": 4.270737926899179e-06,
      "loss": 0.0092,
      "step": 1758980
    },
    {
      "epoch": 2.8786420795611503,
      "grad_norm": 0.3316464126110077,
      "learning_rate": 4.270672034685662e-06,
      "loss": 0.0177,
      "step": 1759000
    },
    {
      "epoch": 2.8786748099998034,
      "grad_norm": 0.9108233451843262,
      "learning_rate": 4.2706061424721445e-06,
      "loss": 0.0119,
      "step": 1759020
    },
    {
      "epoch": 2.878707540438457,
      "grad_norm": 0.46948617696762085,
      "learning_rate": 4.270540250258627e-06,
      "loss": 0.01,
      "step": 1759040
    },
    {
      "epoch": 2.87874027087711,
      "grad_norm": 0.1593603938817978,
      "learning_rate": 4.27047435804511e-06,
      "loss": 0.0086,
      "step": 1759060
    },
    {
      "epoch": 2.8787730013157637,
      "grad_norm": 0.5360938906669617,
      "learning_rate": 4.270408465831593e-06,
      "loss": 0.0119,
      "step": 1759080
    },
    {
      "epoch": 2.878805731754417,
      "grad_norm": 0.6666414737701416,
      "learning_rate": 4.270342573618076e-06,
      "loss": 0.0086,
      "step": 1759100
    },
    {
      "epoch": 2.8788384621930705,
      "grad_norm": 1.0073553323745728,
      "learning_rate": 4.270276681404559e-06,
      "loss": 0.0198,
      "step": 1759120
    },
    {
      "epoch": 2.8788711926317236,
      "grad_norm": 0.2681402862071991,
      "learning_rate": 4.270210789191042e-06,
      "loss": 0.0147,
      "step": 1759140
    },
    {
      "epoch": 2.878903923070377,
      "grad_norm": 0.35866016149520874,
      "learning_rate": 4.2701448969775245e-06,
      "loss": 0.0181,
      "step": 1759160
    },
    {
      "epoch": 2.8789366535090304,
      "grad_norm": 0.4602007567882538,
      "learning_rate": 4.270079004764007e-06,
      "loss": 0.0116,
      "step": 1759180
    },
    {
      "epoch": 2.8789693839476835,
      "grad_norm": 0.2914383113384247,
      "learning_rate": 4.27001311255049e-06,
      "loss": 0.0147,
      "step": 1759200
    },
    {
      "epoch": 2.879002114386337,
      "grad_norm": 0.49522703886032104,
      "learning_rate": 4.269947220336973e-06,
      "loss": 0.0206,
      "step": 1759220
    },
    {
      "epoch": 2.8790348448249903,
      "grad_norm": 1.6198911666870117,
      "learning_rate": 4.2698813281234555e-06,
      "loss": 0.0127,
      "step": 1759240
    },
    {
      "epoch": 2.879067575263644,
      "grad_norm": 0.6097495555877686,
      "learning_rate": 4.269815435909938e-06,
      "loss": 0.0143,
      "step": 1759260
    },
    {
      "epoch": 2.879100305702297,
      "grad_norm": 0.4495570659637451,
      "learning_rate": 4.269749543696422e-06,
      "loss": 0.0127,
      "step": 1759280
    },
    {
      "epoch": 2.87913303614095,
      "grad_norm": 0.3188875913619995,
      "learning_rate": 4.2696836514829046e-06,
      "loss": 0.0144,
      "step": 1759300
    },
    {
      "epoch": 2.8791657665796038,
      "grad_norm": 0.27132630348205566,
      "learning_rate": 4.269617759269387e-06,
      "loss": 0.016,
      "step": 1759320
    },
    {
      "epoch": 2.879198497018257,
      "grad_norm": 0.19529685378074646,
      "learning_rate": 4.269551867055871e-06,
      "loss": 0.0111,
      "step": 1759340
    },
    {
      "epoch": 2.8792312274569105,
      "grad_norm": 0.21885401010513306,
      "learning_rate": 4.269485974842354e-06,
      "loss": 0.0134,
      "step": 1759360
    },
    {
      "epoch": 2.8792639578955637,
      "grad_norm": 0.5774708986282349,
      "learning_rate": 4.269420082628836e-06,
      "loss": 0.0125,
      "step": 1759380
    },
    {
      "epoch": 2.8792966883342173,
      "grad_norm": 0.5205053091049194,
      "learning_rate": 4.269354190415319e-06,
      "loss": 0.0185,
      "step": 1759400
    },
    {
      "epoch": 2.8793294187728704,
      "grad_norm": 1.2837481498718262,
      "learning_rate": 4.269288298201802e-06,
      "loss": 0.0146,
      "step": 1759420
    },
    {
      "epoch": 2.8793621492115236,
      "grad_norm": 0.3142990469932556,
      "learning_rate": 4.2692224059882846e-06,
      "loss": 0.0127,
      "step": 1759440
    },
    {
      "epoch": 2.879394879650177,
      "grad_norm": 0.16098764538764954,
      "learning_rate": 4.269156513774767e-06,
      "loss": 0.0151,
      "step": 1759460
    },
    {
      "epoch": 2.8794276100888303,
      "grad_norm": 0.16403059661388397,
      "learning_rate": 4.26909062156125e-06,
      "loss": 0.0105,
      "step": 1759480
    },
    {
      "epoch": 2.879460340527484,
      "grad_norm": 0.4930528402328491,
      "learning_rate": 4.269024729347734e-06,
      "loss": 0.0086,
      "step": 1759500
    },
    {
      "epoch": 2.879493070966137,
      "grad_norm": 0.2796371579170227,
      "learning_rate": 4.268958837134216e-06,
      "loss": 0.021,
      "step": 1759520
    },
    {
      "epoch": 2.8795258014047906,
      "grad_norm": 0.3609482944011688,
      "learning_rate": 4.268892944920699e-06,
      "loss": 0.014,
      "step": 1759540
    },
    {
      "epoch": 2.879558531843444,
      "grad_norm": 1.1992871761322021,
      "learning_rate": 4.268827052707182e-06,
      "loss": 0.015,
      "step": 1759560
    },
    {
      "epoch": 2.879591262282097,
      "grad_norm": 0.1377892941236496,
      "learning_rate": 4.268761160493665e-06,
      "loss": 0.0115,
      "step": 1759580
    },
    {
      "epoch": 2.8796239927207505,
      "grad_norm": 0.43550461530685425,
      "learning_rate": 4.268695268280147e-06,
      "loss": 0.0197,
      "step": 1759600
    },
    {
      "epoch": 2.8796567231594037,
      "grad_norm": 3.296713352203369,
      "learning_rate": 4.26862937606663e-06,
      "loss": 0.0104,
      "step": 1759620
    },
    {
      "epoch": 2.8796894535980573,
      "grad_norm": 0.09394523501396179,
      "learning_rate": 4.268563483853114e-06,
      "loss": 0.0135,
      "step": 1759640
    },
    {
      "epoch": 2.8797221840367104,
      "grad_norm": 0.47109416127204895,
      "learning_rate": 4.268497591639596e-06,
      "loss": 0.0159,
      "step": 1759660
    },
    {
      "epoch": 2.879754914475364,
      "grad_norm": 0.16438651084899902,
      "learning_rate": 4.268431699426079e-06,
      "loss": 0.0125,
      "step": 1759680
    },
    {
      "epoch": 2.879787644914017,
      "grad_norm": 0.2125190794467926,
      "learning_rate": 4.268365807212562e-06,
      "loss": 0.0159,
      "step": 1759700
    },
    {
      "epoch": 2.8798203753526703,
      "grad_norm": 1.7589930295944214,
      "learning_rate": 4.2682999149990455e-06,
      "loss": 0.0202,
      "step": 1759720
    },
    {
      "epoch": 2.879853105791324,
      "grad_norm": 0.3940814435482025,
      "learning_rate": 4.268234022785528e-06,
      "loss": 0.0131,
      "step": 1759740
    },
    {
      "epoch": 2.879885836229977,
      "grad_norm": 0.6055051684379578,
      "learning_rate": 4.268168130572011e-06,
      "loss": 0.0116,
      "step": 1759760
    },
    {
      "epoch": 2.87991856666863,
      "grad_norm": 0.2370591014623642,
      "learning_rate": 4.268102238358494e-06,
      "loss": 0.0163,
      "step": 1759780
    },
    {
      "epoch": 2.879951297107284,
      "grad_norm": 1.0675921440124512,
      "learning_rate": 4.2680363461449764e-06,
      "loss": 0.0151,
      "step": 1759800
    },
    {
      "epoch": 2.8799840275459374,
      "grad_norm": 0.5037222504615784,
      "learning_rate": 4.267970453931459e-06,
      "loss": 0.0159,
      "step": 1759820
    },
    {
      "epoch": 2.8800167579845906,
      "grad_norm": 0.2625497281551361,
      "learning_rate": 4.267904561717942e-06,
      "loss": 0.0151,
      "step": 1759840
    },
    {
      "epoch": 2.8800494884232437,
      "grad_norm": 0.23551799356937408,
      "learning_rate": 4.267838669504425e-06,
      "loss": 0.0134,
      "step": 1759860
    },
    {
      "epoch": 2.8800822188618973,
      "grad_norm": 0.5003089308738708,
      "learning_rate": 4.267772777290907e-06,
      "loss": 0.0193,
      "step": 1759880
    },
    {
      "epoch": 2.8801149493005505,
      "grad_norm": 0.23135679960250854,
      "learning_rate": 4.267706885077391e-06,
      "loss": 0.0095,
      "step": 1759900
    },
    {
      "epoch": 2.8801476797392036,
      "grad_norm": 0.2715442478656769,
      "learning_rate": 4.267640992863874e-06,
      "loss": 0.0141,
      "step": 1759920
    },
    {
      "epoch": 2.880180410177857,
      "grad_norm": 0.11053154617547989,
      "learning_rate": 4.2675751006503565e-06,
      "loss": 0.0115,
      "step": 1759940
    },
    {
      "epoch": 2.880213140616511,
      "grad_norm": 0.6454805135726929,
      "learning_rate": 4.267509208436839e-06,
      "loss": 0.0163,
      "step": 1759960
    },
    {
      "epoch": 2.880245871055164,
      "grad_norm": 0.11798207461833954,
      "learning_rate": 4.267443316223322e-06,
      "loss": 0.0078,
      "step": 1759980
    },
    {
      "epoch": 2.880278601493817,
      "grad_norm": 0.2463378608226776,
      "learning_rate": 4.2673774240098055e-06,
      "loss": 0.0143,
      "step": 1760000
    },
    {
      "epoch": 2.8803113319324707,
      "grad_norm": 0.3783237636089325,
      "learning_rate": 4.267311531796288e-06,
      "loss": 0.0113,
      "step": 1760020
    },
    {
      "epoch": 2.880344062371124,
      "grad_norm": 0.34026458859443665,
      "learning_rate": 4.267245639582771e-06,
      "loss": 0.0102,
      "step": 1760040
    },
    {
      "epoch": 2.880376792809777,
      "grad_norm": 0.42526981234550476,
      "learning_rate": 4.267179747369254e-06,
      "loss": 0.0101,
      "step": 1760060
    },
    {
      "epoch": 2.8804095232484306,
      "grad_norm": 0.5813665390014648,
      "learning_rate": 4.2671138551557365e-06,
      "loss": 0.0095,
      "step": 1760080
    },
    {
      "epoch": 2.880442253687084,
      "grad_norm": 0.20921973884105682,
      "learning_rate": 4.267047962942219e-06,
      "loss": 0.0156,
      "step": 1760100
    },
    {
      "epoch": 2.8804749841257373,
      "grad_norm": 0.3961679935455322,
      "learning_rate": 4.266982070728703e-06,
      "loss": 0.0096,
      "step": 1760120
    },
    {
      "epoch": 2.8805077145643905,
      "grad_norm": 0.06144391745328903,
      "learning_rate": 4.2669161785151855e-06,
      "loss": 0.0099,
      "step": 1760140
    },
    {
      "epoch": 2.880540445003044,
      "grad_norm": 0.77434903383255,
      "learning_rate": 4.266850286301668e-06,
      "loss": 0.013,
      "step": 1760160
    },
    {
      "epoch": 2.880573175441697,
      "grad_norm": 0.1906374990940094,
      "learning_rate": 4.266784394088151e-06,
      "loss": 0.0105,
      "step": 1760180
    },
    {
      "epoch": 2.8806059058803504,
      "grad_norm": 0.5089936256408691,
      "learning_rate": 4.266718501874634e-06,
      "loss": 0.0208,
      "step": 1760200
    },
    {
      "epoch": 2.880638636319004,
      "grad_norm": 0.4218790829181671,
      "learning_rate": 4.2666526096611165e-06,
      "loss": 0.0123,
      "step": 1760220
    },
    {
      "epoch": 2.8806713667576576,
      "grad_norm": 0.08925928920507431,
      "learning_rate": 4.266586717447599e-06,
      "loss": 0.0232,
      "step": 1760240
    },
    {
      "epoch": 2.8807040971963107,
      "grad_norm": 0.27889859676361084,
      "learning_rate": 4.266520825234082e-06,
      "loss": 0.0114,
      "step": 1760260
    },
    {
      "epoch": 2.880736827634964,
      "grad_norm": 0.49065136909484863,
      "learning_rate": 4.266454933020565e-06,
      "loss": 0.0099,
      "step": 1760280
    },
    {
      "epoch": 2.8807695580736175,
      "grad_norm": 0.5543739199638367,
      "learning_rate": 4.266389040807048e-06,
      "loss": 0.0127,
      "step": 1760300
    },
    {
      "epoch": 2.8808022885122706,
      "grad_norm": 0.14875605702400208,
      "learning_rate": 4.266323148593531e-06,
      "loss": 0.0128,
      "step": 1760320
    },
    {
      "epoch": 2.8808350189509238,
      "grad_norm": 0.4194435179233551,
      "learning_rate": 4.266257256380014e-06,
      "loss": 0.013,
      "step": 1760340
    },
    {
      "epoch": 2.8808677493895773,
      "grad_norm": 0.22271834313869476,
      "learning_rate": 4.266191364166497e-06,
      "loss": 0.0104,
      "step": 1760360
    },
    {
      "epoch": 2.8809004798282305,
      "grad_norm": 0.644420862197876,
      "learning_rate": 4.26612547195298e-06,
      "loss": 0.0116,
      "step": 1760380
    },
    {
      "epoch": 2.880933210266884,
      "grad_norm": 0.6941128969192505,
      "learning_rate": 4.266059579739463e-06,
      "loss": 0.0146,
      "step": 1760400
    },
    {
      "epoch": 2.8809659407055372,
      "grad_norm": 0.24736465513706207,
      "learning_rate": 4.265993687525946e-06,
      "loss": 0.0085,
      "step": 1760420
    },
    {
      "epoch": 2.880998671144191,
      "grad_norm": 0.22135992348194122,
      "learning_rate": 4.265927795312428e-06,
      "loss": 0.019,
      "step": 1760440
    },
    {
      "epoch": 2.881031401582844,
      "grad_norm": 0.15075688064098358,
      "learning_rate": 4.265861903098911e-06,
      "loss": 0.0139,
      "step": 1760460
    },
    {
      "epoch": 2.881064132021497,
      "grad_norm": 0.6042725443840027,
      "learning_rate": 4.265796010885394e-06,
      "loss": 0.0124,
      "step": 1760480
    },
    {
      "epoch": 2.8810968624601507,
      "grad_norm": 0.3656529188156128,
      "learning_rate": 4.2657301186718766e-06,
      "loss": 0.0108,
      "step": 1760500
    },
    {
      "epoch": 2.881129592898804,
      "grad_norm": 0.2793270945549011,
      "learning_rate": 4.26566422645836e-06,
      "loss": 0.0135,
      "step": 1760520
    },
    {
      "epoch": 2.8811623233374575,
      "grad_norm": 0.5532371401786804,
      "learning_rate": 4.265598334244843e-06,
      "loss": 0.0126,
      "step": 1760540
    },
    {
      "epoch": 2.8811950537761106,
      "grad_norm": 0.19371315836906433,
      "learning_rate": 4.265532442031326e-06,
      "loss": 0.0111,
      "step": 1760560
    },
    {
      "epoch": 2.881227784214764,
      "grad_norm": 0.3224443197250366,
      "learning_rate": 4.265466549817808e-06,
      "loss": 0.0102,
      "step": 1760580
    },
    {
      "epoch": 2.8812605146534174,
      "grad_norm": 0.3258051574230194,
      "learning_rate": 4.265400657604291e-06,
      "loss": 0.0122,
      "step": 1760600
    },
    {
      "epoch": 2.8812932450920705,
      "grad_norm": 0.8287779688835144,
      "learning_rate": 4.265334765390774e-06,
      "loss": 0.0121,
      "step": 1760620
    },
    {
      "epoch": 2.881325975530724,
      "grad_norm": 0.1028120219707489,
      "learning_rate": 4.265268873177257e-06,
      "loss": 0.0108,
      "step": 1760640
    },
    {
      "epoch": 2.8813587059693773,
      "grad_norm": 0.38174065947532654,
      "learning_rate": 4.265202980963739e-06,
      "loss": 0.0217,
      "step": 1760660
    },
    {
      "epoch": 2.881391436408031,
      "grad_norm": 0.736734926700592,
      "learning_rate": 4.265137088750222e-06,
      "loss": 0.0124,
      "step": 1760680
    },
    {
      "epoch": 2.881424166846684,
      "grad_norm": 0.16092079877853394,
      "learning_rate": 4.265071196536706e-06,
      "loss": 0.015,
      "step": 1760700
    },
    {
      "epoch": 2.8814568972853376,
      "grad_norm": 0.2931959331035614,
      "learning_rate": 4.265005304323188e-06,
      "loss": 0.0133,
      "step": 1760720
    },
    {
      "epoch": 2.8814896277239908,
      "grad_norm": 0.2875235974788666,
      "learning_rate": 4.264939412109671e-06,
      "loss": 0.0143,
      "step": 1760740
    },
    {
      "epoch": 2.881522358162644,
      "grad_norm": 0.31357455253601074,
      "learning_rate": 4.264873519896155e-06,
      "loss": 0.0142,
      "step": 1760760
    },
    {
      "epoch": 2.8815550886012975,
      "grad_norm": 0.48660987615585327,
      "learning_rate": 4.2648076276826375e-06,
      "loss": 0.0172,
      "step": 1760780
    },
    {
      "epoch": 2.8815878190399506,
      "grad_norm": 0.4095912277698517,
      "learning_rate": 4.26474173546912e-06,
      "loss": 0.0086,
      "step": 1760800
    },
    {
      "epoch": 2.8816205494786042,
      "grad_norm": 0.535812497138977,
      "learning_rate": 4.264675843255603e-06,
      "loss": 0.0086,
      "step": 1760820
    },
    {
      "epoch": 2.8816532799172574,
      "grad_norm": 0.46520882844924927,
      "learning_rate": 4.264609951042086e-06,
      "loss": 0.0106,
      "step": 1760840
    },
    {
      "epoch": 2.881686010355911,
      "grad_norm": 0.35719063878059387,
      "learning_rate": 4.264544058828568e-06,
      "loss": 0.0164,
      "step": 1760860
    },
    {
      "epoch": 2.881718740794564,
      "grad_norm": 0.6934451460838318,
      "learning_rate": 4.264478166615051e-06,
      "loss": 0.0156,
      "step": 1760880
    },
    {
      "epoch": 2.8817514712332173,
      "grad_norm": 0.7471317052841187,
      "learning_rate": 4.264412274401534e-06,
      "loss": 0.015,
      "step": 1760900
    },
    {
      "epoch": 2.881784201671871,
      "grad_norm": 0.3779977262020111,
      "learning_rate": 4.2643463821880175e-06,
      "loss": 0.0135,
      "step": 1760920
    },
    {
      "epoch": 2.881816932110524,
      "grad_norm": 0.27301427721977234,
      "learning_rate": 4.2642804899745e-06,
      "loss": 0.0141,
      "step": 1760940
    },
    {
      "epoch": 2.8818496625491776,
      "grad_norm": 0.2907666265964508,
      "learning_rate": 4.264214597760983e-06,
      "loss": 0.0175,
      "step": 1760960
    },
    {
      "epoch": 2.8818823929878308,
      "grad_norm": 0.35320788621902466,
      "learning_rate": 4.264148705547466e-06,
      "loss": 0.0141,
      "step": 1760980
    },
    {
      "epoch": 2.8819151234264844,
      "grad_norm": 0.547199547290802,
      "learning_rate": 4.2640828133339484e-06,
      "loss": 0.0112,
      "step": 1761000
    },
    {
      "epoch": 2.8819478538651375,
      "grad_norm": 0.4492410123348236,
      "learning_rate": 4.264016921120431e-06,
      "loss": 0.013,
      "step": 1761020
    },
    {
      "epoch": 2.8819805843037907,
      "grad_norm": 0.1713259220123291,
      "learning_rate": 4.263951028906914e-06,
      "loss": 0.0127,
      "step": 1761040
    },
    {
      "epoch": 2.8820133147424443,
      "grad_norm": 0.16627265512943268,
      "learning_rate": 4.2638851366933975e-06,
      "loss": 0.0097,
      "step": 1761060
    },
    {
      "epoch": 2.8820460451810974,
      "grad_norm": 0.4910075068473816,
      "learning_rate": 4.26381924447988e-06,
      "loss": 0.0157,
      "step": 1761080
    },
    {
      "epoch": 2.882078775619751,
      "grad_norm": 0.2062729001045227,
      "learning_rate": 4.263753352266363e-06,
      "loss": 0.0108,
      "step": 1761100
    },
    {
      "epoch": 2.882111506058404,
      "grad_norm": 0.27853137254714966,
      "learning_rate": 4.263687460052846e-06,
      "loss": 0.0153,
      "step": 1761120
    },
    {
      "epoch": 2.8821442364970578,
      "grad_norm": 0.43921297788619995,
      "learning_rate": 4.263621567839329e-06,
      "loss": 0.0091,
      "step": 1761140
    },
    {
      "epoch": 2.882176966935711,
      "grad_norm": 0.19509460031986237,
      "learning_rate": 4.263555675625812e-06,
      "loss": 0.0109,
      "step": 1761160
    },
    {
      "epoch": 2.882209697374364,
      "grad_norm": 0.17736120522022247,
      "learning_rate": 4.263489783412295e-06,
      "loss": 0.0088,
      "step": 1761180
    },
    {
      "epoch": 2.8822424278130176,
      "grad_norm": 0.34586989879608154,
      "learning_rate": 4.2634238911987775e-06,
      "loss": 0.0142,
      "step": 1761200
    },
    {
      "epoch": 2.882275158251671,
      "grad_norm": 0.5630717277526855,
      "learning_rate": 4.26335799898526e-06,
      "loss": 0.0137,
      "step": 1761220
    },
    {
      "epoch": 2.8823078886903244,
      "grad_norm": 0.27556565403938293,
      "learning_rate": 4.263292106771743e-06,
      "loss": 0.0064,
      "step": 1761240
    },
    {
      "epoch": 2.8823406191289775,
      "grad_norm": 0.20541855692863464,
      "learning_rate": 4.263226214558226e-06,
      "loss": 0.014,
      "step": 1761260
    },
    {
      "epoch": 2.882373349567631,
      "grad_norm": 0.3149309754371643,
      "learning_rate": 4.2631603223447085e-06,
      "loss": 0.0113,
      "step": 1761280
    },
    {
      "epoch": 2.8824060800062843,
      "grad_norm": 0.21556614339351654,
      "learning_rate": 4.263094430131191e-06,
      "loss": 0.01,
      "step": 1761300
    },
    {
      "epoch": 2.8824388104449374,
      "grad_norm": 0.38142070174217224,
      "learning_rate": 4.263028537917675e-06,
      "loss": 0.0137,
      "step": 1761320
    },
    {
      "epoch": 2.882471540883591,
      "grad_norm": 0.4107173681259155,
      "learning_rate": 4.2629626457041576e-06,
      "loss": 0.0138,
      "step": 1761340
    },
    {
      "epoch": 2.882504271322244,
      "grad_norm": 0.32485559582710266,
      "learning_rate": 4.26289675349064e-06,
      "loss": 0.017,
      "step": 1761360
    },
    {
      "epoch": 2.8825370017608973,
      "grad_norm": 0.23580558598041534,
      "learning_rate": 4.262830861277123e-06,
      "loss": 0.0148,
      "step": 1761380
    },
    {
      "epoch": 2.882569732199551,
      "grad_norm": 0.31248360872268677,
      "learning_rate": 4.262764969063607e-06,
      "loss": 0.0111,
      "step": 1761400
    },
    {
      "epoch": 2.8826024626382045,
      "grad_norm": 0.4712117612361908,
      "learning_rate": 4.262699076850089e-06,
      "loss": 0.0125,
      "step": 1761420
    },
    {
      "epoch": 2.8826351930768577,
      "grad_norm": 0.4018586277961731,
      "learning_rate": 4.262633184636572e-06,
      "loss": 0.0113,
      "step": 1761440
    },
    {
      "epoch": 2.882667923515511,
      "grad_norm": 0.33889734745025635,
      "learning_rate": 4.262567292423055e-06,
      "loss": 0.0149,
      "step": 1761460
    },
    {
      "epoch": 2.8827006539541644,
      "grad_norm": 0.1641036868095398,
      "learning_rate": 4.262501400209538e-06,
      "loss": 0.0106,
      "step": 1761480
    },
    {
      "epoch": 2.8827333843928176,
      "grad_norm": 0.08037293702363968,
      "learning_rate": 4.26243550799602e-06,
      "loss": 0.0142,
      "step": 1761500
    },
    {
      "epoch": 2.8827661148314707,
      "grad_norm": 0.4637342095375061,
      "learning_rate": 4.262369615782503e-06,
      "loss": 0.0121,
      "step": 1761520
    },
    {
      "epoch": 2.8827988452701243,
      "grad_norm": 0.34626853466033936,
      "learning_rate": 4.262303723568987e-06,
      "loss": 0.0116,
      "step": 1761540
    },
    {
      "epoch": 2.882831575708778,
      "grad_norm": 0.20823997259140015,
      "learning_rate": 4.262237831355469e-06,
      "loss": 0.0173,
      "step": 1761560
    },
    {
      "epoch": 2.882864306147431,
      "grad_norm": 0.26842567324638367,
      "learning_rate": 4.262171939141952e-06,
      "loss": 0.0159,
      "step": 1761580
    },
    {
      "epoch": 2.882897036586084,
      "grad_norm": 0.2079639583826065,
      "learning_rate": 4.262106046928435e-06,
      "loss": 0.0143,
      "step": 1761600
    },
    {
      "epoch": 2.882929767024738,
      "grad_norm": 0.23514404892921448,
      "learning_rate": 4.262040154714918e-06,
      "loss": 0.0124,
      "step": 1761620
    },
    {
      "epoch": 2.882962497463391,
      "grad_norm": 0.24690784513950348,
      "learning_rate": 4.2619742625014e-06,
      "loss": 0.0174,
      "step": 1761640
    },
    {
      "epoch": 2.882995227902044,
      "grad_norm": 0.21557700634002686,
      "learning_rate": 4.261908370287883e-06,
      "loss": 0.0146,
      "step": 1761660
    },
    {
      "epoch": 2.8830279583406977,
      "grad_norm": 0.095138780772686,
      "learning_rate": 4.261842478074366e-06,
      "loss": 0.0128,
      "step": 1761680
    },
    {
      "epoch": 2.8830606887793513,
      "grad_norm": 0.6375165581703186,
      "learning_rate": 4.2617765858608486e-06,
      "loss": 0.0098,
      "step": 1761700
    },
    {
      "epoch": 2.8830934192180044,
      "grad_norm": 0.31637823581695557,
      "learning_rate": 4.261710693647332e-06,
      "loss": 0.0147,
      "step": 1761720
    },
    {
      "epoch": 2.8831261496566576,
      "grad_norm": 0.08181218802928925,
      "learning_rate": 4.261644801433815e-06,
      "loss": 0.014,
      "step": 1761740
    },
    {
      "epoch": 2.883158880095311,
      "grad_norm": 1.6945586204528809,
      "learning_rate": 4.261578909220298e-06,
      "loss": 0.0144,
      "step": 1761760
    },
    {
      "epoch": 2.8831916105339643,
      "grad_norm": 0.12413980066776276,
      "learning_rate": 4.261513017006781e-06,
      "loss": 0.0135,
      "step": 1761780
    },
    {
      "epoch": 2.8832243409726175,
      "grad_norm": 0.2753143310546875,
      "learning_rate": 4.261447124793264e-06,
      "loss": 0.0162,
      "step": 1761800
    },
    {
      "epoch": 2.883257071411271,
      "grad_norm": 0.7450360059738159,
      "learning_rate": 4.261381232579747e-06,
      "loss": 0.0072,
      "step": 1761820
    },
    {
      "epoch": 2.8832898018499242,
      "grad_norm": 0.9655260443687439,
      "learning_rate": 4.2613153403662294e-06,
      "loss": 0.0197,
      "step": 1761840
    },
    {
      "epoch": 2.883322532288578,
      "grad_norm": 0.07918202877044678,
      "learning_rate": 4.261249448152712e-06,
      "loss": 0.0167,
      "step": 1761860
    },
    {
      "epoch": 2.883355262727231,
      "grad_norm": 0.2271360605955124,
      "learning_rate": 4.261183555939195e-06,
      "loss": 0.0118,
      "step": 1761880
    },
    {
      "epoch": 2.8833879931658846,
      "grad_norm": 0.24342083930969238,
      "learning_rate": 4.261117663725678e-06,
      "loss": 0.011,
      "step": 1761900
    },
    {
      "epoch": 2.8834207236045377,
      "grad_norm": 0.09791938960552216,
      "learning_rate": 4.26105177151216e-06,
      "loss": 0.0129,
      "step": 1761920
    },
    {
      "epoch": 2.883453454043191,
      "grad_norm": 2.064018964767456,
      "learning_rate": 4.260985879298644e-06,
      "loss": 0.0103,
      "step": 1761940
    },
    {
      "epoch": 2.8834861844818445,
      "grad_norm": 0.3215143382549286,
      "learning_rate": 4.260919987085127e-06,
      "loss": 0.0146,
      "step": 1761960
    },
    {
      "epoch": 2.8835189149204976,
      "grad_norm": 0.21025660634040833,
      "learning_rate": 4.2608540948716095e-06,
      "loss": 0.0094,
      "step": 1761980
    },
    {
      "epoch": 2.883551645359151,
      "grad_norm": 0.1956571340560913,
      "learning_rate": 4.260788202658092e-06,
      "loss": 0.0122,
      "step": 1762000
    },
    {
      "epoch": 2.8835843757978044,
      "grad_norm": 0.6494035124778748,
      "learning_rate": 4.260722310444575e-06,
      "loss": 0.012,
      "step": 1762020
    },
    {
      "epoch": 2.883617106236458,
      "grad_norm": 0.13668282330036163,
      "learning_rate": 4.260656418231058e-06,
      "loss": 0.0116,
      "step": 1762040
    },
    {
      "epoch": 2.883649836675111,
      "grad_norm": 0.41558438539505005,
      "learning_rate": 4.2605905260175404e-06,
      "loss": 0.0183,
      "step": 1762060
    },
    {
      "epoch": 2.8836825671137643,
      "grad_norm": 0.08907375484704971,
      "learning_rate": 4.260524633804023e-06,
      "loss": 0.0145,
      "step": 1762080
    },
    {
      "epoch": 2.883715297552418,
      "grad_norm": 0.36037564277648926,
      "learning_rate": 4.260458741590507e-06,
      "loss": 0.0118,
      "step": 1762100
    },
    {
      "epoch": 2.883748027991071,
      "grad_norm": 0.150991290807724,
      "learning_rate": 4.2603928493769895e-06,
      "loss": 0.0112,
      "step": 1762120
    },
    {
      "epoch": 2.8837807584297246,
      "grad_norm": 0.2315133959054947,
      "learning_rate": 4.260326957163472e-06,
      "loss": 0.0175,
      "step": 1762140
    },
    {
      "epoch": 2.8838134888683777,
      "grad_norm": 0.9066762328147888,
      "learning_rate": 4.260261064949956e-06,
      "loss": 0.0179,
      "step": 1762160
    },
    {
      "epoch": 2.8838462193070313,
      "grad_norm": 0.12308283895254135,
      "learning_rate": 4.2601951727364386e-06,
      "loss": 0.0156,
      "step": 1762180
    },
    {
      "epoch": 2.8838789497456845,
      "grad_norm": 0.2995492219924927,
      "learning_rate": 4.260129280522921e-06,
      "loss": 0.0185,
      "step": 1762200
    },
    {
      "epoch": 2.8839116801843376,
      "grad_norm": 0.3321949541568756,
      "learning_rate": 4.260063388309404e-06,
      "loss": 0.0157,
      "step": 1762220
    },
    {
      "epoch": 2.8839444106229912,
      "grad_norm": 0.2692699432373047,
      "learning_rate": 4.259997496095887e-06,
      "loss": 0.0095,
      "step": 1762240
    },
    {
      "epoch": 2.8839771410616444,
      "grad_norm": 0.39790409803390503,
      "learning_rate": 4.2599316038823695e-06,
      "loss": 0.013,
      "step": 1762260
    },
    {
      "epoch": 2.884009871500298,
      "grad_norm": 0.5519422888755798,
      "learning_rate": 4.259865711668852e-06,
      "loss": 0.0146,
      "step": 1762280
    },
    {
      "epoch": 2.884042601938951,
      "grad_norm": 0.4559963345527649,
      "learning_rate": 4.259799819455335e-06,
      "loss": 0.0091,
      "step": 1762300
    },
    {
      "epoch": 2.8840753323776047,
      "grad_norm": 0.4511815905570984,
      "learning_rate": 4.259733927241818e-06,
      "loss": 0.0142,
      "step": 1762320
    },
    {
      "epoch": 2.884108062816258,
      "grad_norm": 0.5286181569099426,
      "learning_rate": 4.259668035028301e-06,
      "loss": 0.0174,
      "step": 1762340
    },
    {
      "epoch": 2.884140793254911,
      "grad_norm": 0.4168335795402527,
      "learning_rate": 4.259602142814784e-06,
      "loss": 0.0114,
      "step": 1762360
    },
    {
      "epoch": 2.8841735236935646,
      "grad_norm": 0.23129279911518097,
      "learning_rate": 4.259536250601267e-06,
      "loss": 0.0144,
      "step": 1762380
    },
    {
      "epoch": 2.8842062541322178,
      "grad_norm": 0.682647168636322,
      "learning_rate": 4.2594703583877495e-06,
      "loss": 0.0167,
      "step": 1762400
    },
    {
      "epoch": 2.8842389845708714,
      "grad_norm": 0.9693589210510254,
      "learning_rate": 4.259404466174232e-06,
      "loss": 0.0159,
      "step": 1762420
    },
    {
      "epoch": 2.8842717150095245,
      "grad_norm": 0.14494921267032623,
      "learning_rate": 4.259338573960715e-06,
      "loss": 0.015,
      "step": 1762440
    },
    {
      "epoch": 2.884304445448178,
      "grad_norm": 0.545972466468811,
      "learning_rate": 4.259272681747199e-06,
      "loss": 0.0141,
      "step": 1762460
    },
    {
      "epoch": 2.8843371758868313,
      "grad_norm": 0.3347611129283905,
      "learning_rate": 4.259206789533681e-06,
      "loss": 0.0089,
      "step": 1762480
    },
    {
      "epoch": 2.8843699063254844,
      "grad_norm": 0.5916540622711182,
      "learning_rate": 4.259140897320164e-06,
      "loss": 0.0161,
      "step": 1762500
    },
    {
      "epoch": 2.884402636764138,
      "grad_norm": 0.1005386933684349,
      "learning_rate": 4.259075005106647e-06,
      "loss": 0.0126,
      "step": 1762520
    },
    {
      "epoch": 2.884435367202791,
      "grad_norm": 0.27622541785240173,
      "learning_rate": 4.2590091128931296e-06,
      "loss": 0.0138,
      "step": 1762540
    },
    {
      "epoch": 2.8844680976414447,
      "grad_norm": 0.40141087770462036,
      "learning_rate": 4.258943220679613e-06,
      "loss": 0.0173,
      "step": 1762560
    },
    {
      "epoch": 2.884500828080098,
      "grad_norm": 0.42762449383735657,
      "learning_rate": 4.258877328466096e-06,
      "loss": 0.0101,
      "step": 1762580
    },
    {
      "epoch": 2.8845335585187515,
      "grad_norm": 0.08629364520311356,
      "learning_rate": 4.258811436252579e-06,
      "loss": 0.0107,
      "step": 1762600
    },
    {
      "epoch": 2.8845662889574046,
      "grad_norm": 0.22789038717746735,
      "learning_rate": 4.258745544039061e-06,
      "loss": 0.0113,
      "step": 1762620
    },
    {
      "epoch": 2.884599019396058,
      "grad_norm": 0.45806360244750977,
      "learning_rate": 4.258679651825544e-06,
      "loss": 0.0132,
      "step": 1762640
    },
    {
      "epoch": 2.8846317498347114,
      "grad_norm": 0.4600502848625183,
      "learning_rate": 4.258613759612027e-06,
      "loss": 0.0123,
      "step": 1762660
    },
    {
      "epoch": 2.8846644802733645,
      "grad_norm": 0.5775558352470398,
      "learning_rate": 4.25854786739851e-06,
      "loss": 0.0119,
      "step": 1762680
    },
    {
      "epoch": 2.884697210712018,
      "grad_norm": 0.4057812988758087,
      "learning_rate": 4.258481975184992e-06,
      "loss": 0.0182,
      "step": 1762700
    },
    {
      "epoch": 2.8847299411506713,
      "grad_norm": 0.20009754598140717,
      "learning_rate": 4.258416082971475e-06,
      "loss": 0.0117,
      "step": 1762720
    },
    {
      "epoch": 2.884762671589325,
      "grad_norm": 0.7238509058952332,
      "learning_rate": 4.258350190757959e-06,
      "loss": 0.0136,
      "step": 1762740
    },
    {
      "epoch": 2.884795402027978,
      "grad_norm": 0.24519236385822296,
      "learning_rate": 4.258284298544441e-06,
      "loss": 0.0129,
      "step": 1762760
    },
    {
      "epoch": 2.884828132466631,
      "grad_norm": 0.34249645471572876,
      "learning_rate": 4.258218406330924e-06,
      "loss": 0.0098,
      "step": 1762780
    },
    {
      "epoch": 2.8848608629052848,
      "grad_norm": 0.1891205608844757,
      "learning_rate": 4.258152514117407e-06,
      "loss": 0.0148,
      "step": 1762800
    },
    {
      "epoch": 2.884893593343938,
      "grad_norm": 0.16714434325695038,
      "learning_rate": 4.2580866219038905e-06,
      "loss": 0.0146,
      "step": 1762820
    },
    {
      "epoch": 2.884926323782591,
      "grad_norm": 0.10908093303442001,
      "learning_rate": 4.258020729690373e-06,
      "loss": 0.0139,
      "step": 1762840
    },
    {
      "epoch": 2.8849590542212447,
      "grad_norm": 0.19368192553520203,
      "learning_rate": 4.257954837476856e-06,
      "loss": 0.0139,
      "step": 1762860
    },
    {
      "epoch": 2.8849917846598983,
      "grad_norm": 0.18395978212356567,
      "learning_rate": 4.257888945263339e-06,
      "loss": 0.0178,
      "step": 1762880
    },
    {
      "epoch": 2.8850245150985514,
      "grad_norm": 0.3894297182559967,
      "learning_rate": 4.257823053049821e-06,
      "loss": 0.0154,
      "step": 1762900
    },
    {
      "epoch": 2.8850572455372046,
      "grad_norm": 0.08487837761640549,
      "learning_rate": 4.257757160836304e-06,
      "loss": 0.0118,
      "step": 1762920
    },
    {
      "epoch": 2.885089975975858,
      "grad_norm": 0.18143883347511292,
      "learning_rate": 4.257691268622787e-06,
      "loss": 0.0104,
      "step": 1762940
    },
    {
      "epoch": 2.8851227064145113,
      "grad_norm": 0.46480128169059753,
      "learning_rate": 4.2576253764092705e-06,
      "loss": 0.0196,
      "step": 1762960
    },
    {
      "epoch": 2.8851554368531644,
      "grad_norm": 0.14030875265598297,
      "learning_rate": 4.257559484195753e-06,
      "loss": 0.0155,
      "step": 1762980
    },
    {
      "epoch": 2.885188167291818,
      "grad_norm": 0.22183741629123688,
      "learning_rate": 4.257493591982236e-06,
      "loss": 0.0128,
      "step": 1763000
    },
    {
      "epoch": 2.8852208977304716,
      "grad_norm": 0.1665990799665451,
      "learning_rate": 4.257427699768719e-06,
      "loss": 0.0113,
      "step": 1763020
    },
    {
      "epoch": 2.885253628169125,
      "grad_norm": 1.573087215423584,
      "learning_rate": 4.2573618075552014e-06,
      "loss": 0.011,
      "step": 1763040
    },
    {
      "epoch": 2.885286358607778,
      "grad_norm": 0.7077526450157166,
      "learning_rate": 4.257295915341684e-06,
      "loss": 0.0118,
      "step": 1763060
    },
    {
      "epoch": 2.8853190890464315,
      "grad_norm": 0.1709185242652893,
      "learning_rate": 4.257230023128167e-06,
      "loss": 0.0085,
      "step": 1763080
    },
    {
      "epoch": 2.8853518194850847,
      "grad_norm": 0.1652751863002777,
      "learning_rate": 4.25716413091465e-06,
      "loss": 0.0113,
      "step": 1763100
    },
    {
      "epoch": 2.885384549923738,
      "grad_norm": 1.185206413269043,
      "learning_rate": 4.257098238701132e-06,
      "loss": 0.02,
      "step": 1763120
    },
    {
      "epoch": 2.8854172803623914,
      "grad_norm": 0.1826404482126236,
      "learning_rate": 4.257032346487616e-06,
      "loss": 0.0146,
      "step": 1763140
    },
    {
      "epoch": 2.885450010801045,
      "grad_norm": 0.2462845742702484,
      "learning_rate": 4.256966454274099e-06,
      "loss": 0.0066,
      "step": 1763160
    },
    {
      "epoch": 2.885482741239698,
      "grad_norm": 0.2194562703371048,
      "learning_rate": 4.2569005620605815e-06,
      "loss": 0.0103,
      "step": 1763180
    },
    {
      "epoch": 2.8855154716783513,
      "grad_norm": 0.03674764931201935,
      "learning_rate": 4.256834669847065e-06,
      "loss": 0.0132,
      "step": 1763200
    },
    {
      "epoch": 2.885548202117005,
      "grad_norm": 0.7913162112236023,
      "learning_rate": 4.256768777633548e-06,
      "loss": 0.0123,
      "step": 1763220
    },
    {
      "epoch": 2.885580932555658,
      "grad_norm": 0.1400766223669052,
      "learning_rate": 4.2567028854200305e-06,
      "loss": 0.0114,
      "step": 1763240
    },
    {
      "epoch": 2.885613662994311,
      "grad_norm": 0.28873538970947266,
      "learning_rate": 4.256636993206513e-06,
      "loss": 0.014,
      "step": 1763260
    },
    {
      "epoch": 2.885646393432965,
      "grad_norm": 0.1850983053445816,
      "learning_rate": 4.256571100992996e-06,
      "loss": 0.0108,
      "step": 1763280
    },
    {
      "epoch": 2.8856791238716184,
      "grad_norm": 0.4821837246417999,
      "learning_rate": 4.256505208779479e-06,
      "loss": 0.0114,
      "step": 1763300
    },
    {
      "epoch": 2.8857118543102716,
      "grad_norm": 0.12182004004716873,
      "learning_rate": 4.2564393165659615e-06,
      "loss": 0.0122,
      "step": 1763320
    },
    {
      "epoch": 2.8857445847489247,
      "grad_norm": 0.17863121628761292,
      "learning_rate": 4.256373424352444e-06,
      "loss": 0.0081,
      "step": 1763340
    },
    {
      "epoch": 2.8857773151875783,
      "grad_norm": 0.611559271812439,
      "learning_rate": 4.256307532138928e-06,
      "loss": 0.0136,
      "step": 1763360
    },
    {
      "epoch": 2.8858100456262314,
      "grad_norm": 0.5852864384651184,
      "learning_rate": 4.2562416399254106e-06,
      "loss": 0.0102,
      "step": 1763380
    },
    {
      "epoch": 2.8858427760648846,
      "grad_norm": 0.24111570417881012,
      "learning_rate": 4.256175747711893e-06,
      "loss": 0.0127,
      "step": 1763400
    },
    {
      "epoch": 2.885875506503538,
      "grad_norm": 0.23495060205459595,
      "learning_rate": 4.256109855498376e-06,
      "loss": 0.013,
      "step": 1763420
    },
    {
      "epoch": 2.8859082369421913,
      "grad_norm": 0.1506074219942093,
      "learning_rate": 4.256043963284859e-06,
      "loss": 0.0117,
      "step": 1763440
    },
    {
      "epoch": 2.885940967380845,
      "grad_norm": 0.06021874025464058,
      "learning_rate": 4.2559780710713415e-06,
      "loss": 0.0121,
      "step": 1763460
    },
    {
      "epoch": 2.885973697819498,
      "grad_norm": 0.18284763395786285,
      "learning_rate": 4.255912178857824e-06,
      "loss": 0.0113,
      "step": 1763480
    },
    {
      "epoch": 2.8860064282581517,
      "grad_norm": 0.16052040457725525,
      "learning_rate": 4.255846286644307e-06,
      "loss": 0.013,
      "step": 1763500
    },
    {
      "epoch": 2.886039158696805,
      "grad_norm": 0.18514618277549744,
      "learning_rate": 4.255780394430791e-06,
      "loss": 0.0106,
      "step": 1763520
    },
    {
      "epoch": 2.886071889135458,
      "grad_norm": 0.3345276117324829,
      "learning_rate": 4.255714502217273e-06,
      "loss": 0.012,
      "step": 1763540
    },
    {
      "epoch": 2.8861046195741116,
      "grad_norm": 0.2721303105354309,
      "learning_rate": 4.255648610003756e-06,
      "loss": 0.009,
      "step": 1763560
    },
    {
      "epoch": 2.8861373500127647,
      "grad_norm": 0.48790261149406433,
      "learning_rate": 4.25558271779024e-06,
      "loss": 0.0115,
      "step": 1763580
    },
    {
      "epoch": 2.8861700804514183,
      "grad_norm": 0.4004135727882385,
      "learning_rate": 4.255516825576722e-06,
      "loss": 0.0131,
      "step": 1763600
    },
    {
      "epoch": 2.8862028108900715,
      "grad_norm": 0.28446969389915466,
      "learning_rate": 4.255450933363205e-06,
      "loss": 0.0166,
      "step": 1763620
    },
    {
      "epoch": 2.886235541328725,
      "grad_norm": 0.10943391919136047,
      "learning_rate": 4.255385041149688e-06,
      "loss": 0.0144,
      "step": 1763640
    },
    {
      "epoch": 2.886268271767378,
      "grad_norm": 0.2564135789871216,
      "learning_rate": 4.255319148936171e-06,
      "loss": 0.0156,
      "step": 1763660
    },
    {
      "epoch": 2.8863010022060314,
      "grad_norm": 0.44043660163879395,
      "learning_rate": 4.255253256722653e-06,
      "loss": 0.0185,
      "step": 1763680
    },
    {
      "epoch": 2.886333732644685,
      "grad_norm": 0.29178059101104736,
      "learning_rate": 4.255187364509136e-06,
      "loss": 0.008,
      "step": 1763700
    },
    {
      "epoch": 2.886366463083338,
      "grad_norm": 0.3538551926612854,
      "learning_rate": 4.255121472295619e-06,
      "loss": 0.015,
      "step": 1763720
    },
    {
      "epoch": 2.8863991935219917,
      "grad_norm": 0.2913933992385864,
      "learning_rate": 4.2550555800821016e-06,
      "loss": 0.0107,
      "step": 1763740
    },
    {
      "epoch": 2.886431923960645,
      "grad_norm": 0.45697882771492004,
      "learning_rate": 4.254989687868585e-06,
      "loss": 0.0184,
      "step": 1763760
    },
    {
      "epoch": 2.8864646543992984,
      "grad_norm": 0.41382908821105957,
      "learning_rate": 4.254923795655068e-06,
      "loss": 0.0096,
      "step": 1763780
    },
    {
      "epoch": 2.8864973848379516,
      "grad_norm": 0.058948908001184464,
      "learning_rate": 4.254857903441551e-06,
      "loss": 0.0101,
      "step": 1763800
    },
    {
      "epoch": 2.8865301152766047,
      "grad_norm": 0.6219856142997742,
      "learning_rate": 4.254792011228033e-06,
      "loss": 0.018,
      "step": 1763820
    },
    {
      "epoch": 2.8865628457152583,
      "grad_norm": 0.20413053035736084,
      "learning_rate": 4.254726119014516e-06,
      "loss": 0.0143,
      "step": 1763840
    },
    {
      "epoch": 2.8865955761539115,
      "grad_norm": 0.16256345808506012,
      "learning_rate": 4.254660226801e-06,
      "loss": 0.0139,
      "step": 1763860
    },
    {
      "epoch": 2.886628306592565,
      "grad_norm": 0.14984263479709625,
      "learning_rate": 4.2545943345874824e-06,
      "loss": 0.0085,
      "step": 1763880
    },
    {
      "epoch": 2.8866610370312182,
      "grad_norm": 0.12054291367530823,
      "learning_rate": 4.254528442373965e-06,
      "loss": 0.0179,
      "step": 1763900
    },
    {
      "epoch": 2.886693767469872,
      "grad_norm": 0.5629218816757202,
      "learning_rate": 4.254462550160448e-06,
      "loss": 0.0121,
      "step": 1763920
    },
    {
      "epoch": 2.886726497908525,
      "grad_norm": 0.13075044751167297,
      "learning_rate": 4.254396657946931e-06,
      "loss": 0.0104,
      "step": 1763940
    },
    {
      "epoch": 2.886759228347178,
      "grad_norm": 0.41148391366004944,
      "learning_rate": 4.254330765733413e-06,
      "loss": 0.0164,
      "step": 1763960
    },
    {
      "epoch": 2.8867919587858317,
      "grad_norm": 0.2051900029182434,
      "learning_rate": 4.254264873519897e-06,
      "loss": 0.0122,
      "step": 1763980
    },
    {
      "epoch": 2.886824689224485,
      "grad_norm": 0.2655792534351349,
      "learning_rate": 4.25419898130638e-06,
      "loss": 0.0101,
      "step": 1764000
    },
    {
      "epoch": 2.8868574196631385,
      "grad_norm": 0.5247807502746582,
      "learning_rate": 4.2541330890928625e-06,
      "loss": 0.0153,
      "step": 1764020
    },
    {
      "epoch": 2.8868901501017916,
      "grad_norm": 0.6482627987861633,
      "learning_rate": 4.254067196879345e-06,
      "loss": 0.0127,
      "step": 1764040
    },
    {
      "epoch": 2.886922880540445,
      "grad_norm": 0.38910266757011414,
      "learning_rate": 4.254001304665828e-06,
      "loss": 0.0139,
      "step": 1764060
    },
    {
      "epoch": 2.8869556109790984,
      "grad_norm": 0.2770503759384155,
      "learning_rate": 4.253935412452311e-06,
      "loss": 0.0178,
      "step": 1764080
    },
    {
      "epoch": 2.8869883414177515,
      "grad_norm": 0.8937317132949829,
      "learning_rate": 4.2538695202387934e-06,
      "loss": 0.0132,
      "step": 1764100
    },
    {
      "epoch": 2.887021071856405,
      "grad_norm": 0.3877190947532654,
      "learning_rate": 4.253803628025276e-06,
      "loss": 0.0118,
      "step": 1764120
    },
    {
      "epoch": 2.8870538022950583,
      "grad_norm": 0.2935450077056885,
      "learning_rate": 4.253737735811759e-06,
      "loss": 0.0134,
      "step": 1764140
    },
    {
      "epoch": 2.887086532733712,
      "grad_norm": 0.527813196182251,
      "learning_rate": 4.2536718435982425e-06,
      "loss": 0.0201,
      "step": 1764160
    },
    {
      "epoch": 2.887119263172365,
      "grad_norm": 0.29862722754478455,
      "learning_rate": 4.253605951384725e-06,
      "loss": 0.0112,
      "step": 1764180
    },
    {
      "epoch": 2.8871519936110186,
      "grad_norm": 0.3474120795726776,
      "learning_rate": 4.253540059171208e-06,
      "loss": 0.0178,
      "step": 1764200
    },
    {
      "epoch": 2.8871847240496717,
      "grad_norm": 0.13973957300186157,
      "learning_rate": 4.2534741669576916e-06,
      "loss": 0.0183,
      "step": 1764220
    },
    {
      "epoch": 2.887217454488325,
      "grad_norm": 0.3371359705924988,
      "learning_rate": 4.253408274744174e-06,
      "loss": 0.0134,
      "step": 1764240
    },
    {
      "epoch": 2.8872501849269785,
      "grad_norm": 0.13731662929058075,
      "learning_rate": 4.253342382530657e-06,
      "loss": 0.0108,
      "step": 1764260
    },
    {
      "epoch": 2.8872829153656316,
      "grad_norm": 0.1708582490682602,
      "learning_rate": 4.25327649031714e-06,
      "loss": 0.0161,
      "step": 1764280
    },
    {
      "epoch": 2.887315645804285,
      "grad_norm": 0.2348954826593399,
      "learning_rate": 4.2532105981036225e-06,
      "loss": 0.013,
      "step": 1764300
    },
    {
      "epoch": 2.8873483762429384,
      "grad_norm": 0.17208626866340637,
      "learning_rate": 4.253144705890105e-06,
      "loss": 0.01,
      "step": 1764320
    },
    {
      "epoch": 2.887381106681592,
      "grad_norm": 0.3361874520778656,
      "learning_rate": 4.253078813676588e-06,
      "loss": 0.0082,
      "step": 1764340
    },
    {
      "epoch": 2.887413837120245,
      "grad_norm": 0.2581190764904022,
      "learning_rate": 4.253012921463071e-06,
      "loss": 0.0126,
      "step": 1764360
    },
    {
      "epoch": 2.8874465675588983,
      "grad_norm": 0.1770150512456894,
      "learning_rate": 4.252947029249554e-06,
      "loss": 0.0143,
      "step": 1764380
    },
    {
      "epoch": 2.887479297997552,
      "grad_norm": 0.18905724585056305,
      "learning_rate": 4.252881137036037e-06,
      "loss": 0.0205,
      "step": 1764400
    },
    {
      "epoch": 2.887512028436205,
      "grad_norm": 0.8520404696464539,
      "learning_rate": 4.25281524482252e-06,
      "loss": 0.0176,
      "step": 1764420
    },
    {
      "epoch": 2.887544758874858,
      "grad_norm": 0.3583984673023224,
      "learning_rate": 4.2527493526090025e-06,
      "loss": 0.0106,
      "step": 1764440
    },
    {
      "epoch": 2.8875774893135118,
      "grad_norm": 0.2875291407108307,
      "learning_rate": 4.252683460395485e-06,
      "loss": 0.0172,
      "step": 1764460
    },
    {
      "epoch": 2.8876102197521654,
      "grad_norm": 0.6931434273719788,
      "learning_rate": 4.252617568181968e-06,
      "loss": 0.0143,
      "step": 1764480
    },
    {
      "epoch": 2.8876429501908185,
      "grad_norm": 0.1769588589668274,
      "learning_rate": 4.252551675968451e-06,
      "loss": 0.0129,
      "step": 1764500
    },
    {
      "epoch": 2.8876756806294717,
      "grad_norm": 0.2727507948875427,
      "learning_rate": 4.2524857837549335e-06,
      "loss": 0.0133,
      "step": 1764520
    },
    {
      "epoch": 2.8877084110681253,
      "grad_norm": 0.0879664197564125,
      "learning_rate": 4.252419891541416e-06,
      "loss": 0.0199,
      "step": 1764540
    },
    {
      "epoch": 2.8877411415067784,
      "grad_norm": 0.23419539630413055,
      "learning_rate": 4.2523539993279e-06,
      "loss": 0.0171,
      "step": 1764560
    },
    {
      "epoch": 2.8877738719454316,
      "grad_norm": 0.1344294250011444,
      "learning_rate": 4.2522881071143826e-06,
      "loss": 0.0108,
      "step": 1764580
    },
    {
      "epoch": 2.887806602384085,
      "grad_norm": 0.6794540286064148,
      "learning_rate": 4.252222214900865e-06,
      "loss": 0.0152,
      "step": 1764600
    },
    {
      "epoch": 2.8878393328227387,
      "grad_norm": 0.22253172099590302,
      "learning_rate": 4.252156322687349e-06,
      "loss": 0.0097,
      "step": 1764620
    },
    {
      "epoch": 2.887872063261392,
      "grad_norm": 0.23566162586212158,
      "learning_rate": 4.252090430473832e-06,
      "loss": 0.0075,
      "step": 1764640
    },
    {
      "epoch": 2.887904793700045,
      "grad_norm": 0.30662548542022705,
      "learning_rate": 4.252024538260314e-06,
      "loss": 0.0079,
      "step": 1764660
    },
    {
      "epoch": 2.8879375241386986,
      "grad_norm": 0.7969793081283569,
      "learning_rate": 4.251958646046797e-06,
      "loss": 0.0183,
      "step": 1764680
    },
    {
      "epoch": 2.887970254577352,
      "grad_norm": 0.41278403997421265,
      "learning_rate": 4.25189275383328e-06,
      "loss": 0.0132,
      "step": 1764700
    },
    {
      "epoch": 2.888002985016005,
      "grad_norm": 0.4131486713886261,
      "learning_rate": 4.251826861619763e-06,
      "loss": 0.0174,
      "step": 1764720
    },
    {
      "epoch": 2.8880357154546585,
      "grad_norm": 0.44444960355758667,
      "learning_rate": 4.251760969406245e-06,
      "loss": 0.0205,
      "step": 1764740
    },
    {
      "epoch": 2.888068445893312,
      "grad_norm": 0.13990284502506256,
      "learning_rate": 4.251695077192728e-06,
      "loss": 0.0157,
      "step": 1764760
    },
    {
      "epoch": 2.8881011763319653,
      "grad_norm": 0.21842268109321594,
      "learning_rate": 4.251629184979212e-06,
      "loss": 0.0126,
      "step": 1764780
    },
    {
      "epoch": 2.8881339067706184,
      "grad_norm": 0.6064074039459229,
      "learning_rate": 4.251563292765694e-06,
      "loss": 0.0137,
      "step": 1764800
    },
    {
      "epoch": 2.888166637209272,
      "grad_norm": 0.3462405502796173,
      "learning_rate": 4.251497400552177e-06,
      "loss": 0.0126,
      "step": 1764820
    },
    {
      "epoch": 2.888199367647925,
      "grad_norm": 0.16259703040122986,
      "learning_rate": 4.25143150833866e-06,
      "loss": 0.0133,
      "step": 1764840
    },
    {
      "epoch": 2.8882320980865783,
      "grad_norm": 0.11529001593589783,
      "learning_rate": 4.251365616125143e-06,
      "loss": 0.01,
      "step": 1764860
    },
    {
      "epoch": 2.888264828525232,
      "grad_norm": 0.5943276882171631,
      "learning_rate": 4.251299723911625e-06,
      "loss": 0.0148,
      "step": 1764880
    },
    {
      "epoch": 2.888297558963885,
      "grad_norm": 0.112409308552742,
      "learning_rate": 4.251233831698108e-06,
      "loss": 0.0107,
      "step": 1764900
    },
    {
      "epoch": 2.8883302894025387,
      "grad_norm": 0.163766011595726,
      "learning_rate": 4.251167939484592e-06,
      "loss": 0.0123,
      "step": 1764920
    },
    {
      "epoch": 2.888363019841192,
      "grad_norm": 0.18234199285507202,
      "learning_rate": 4.2511020472710744e-06,
      "loss": 0.0089,
      "step": 1764940
    },
    {
      "epoch": 2.8883957502798454,
      "grad_norm": 0.28672951459884644,
      "learning_rate": 4.251036155057557e-06,
      "loss": 0.0148,
      "step": 1764960
    },
    {
      "epoch": 2.8884284807184986,
      "grad_norm": 0.8350721001625061,
      "learning_rate": 4.25097026284404e-06,
      "loss": 0.0128,
      "step": 1764980
    },
    {
      "epoch": 2.8884612111571517,
      "grad_norm": 0.38348302245140076,
      "learning_rate": 4.2509043706305235e-06,
      "loss": 0.0119,
      "step": 1765000
    },
    {
      "epoch": 2.8884939415958053,
      "grad_norm": 0.44956091046333313,
      "learning_rate": 4.250838478417006e-06,
      "loss": 0.0148,
      "step": 1765020
    },
    {
      "epoch": 2.8885266720344585,
      "grad_norm": 0.44276466965675354,
      "learning_rate": 4.250772586203489e-06,
      "loss": 0.0127,
      "step": 1765040
    },
    {
      "epoch": 2.888559402473112,
      "grad_norm": 0.22001706063747406,
      "learning_rate": 4.250706693989972e-06,
      "loss": 0.0104,
      "step": 1765060
    },
    {
      "epoch": 2.888592132911765,
      "grad_norm": 0.20293834805488586,
      "learning_rate": 4.2506408017764545e-06,
      "loss": 0.0083,
      "step": 1765080
    },
    {
      "epoch": 2.888624863350419,
      "grad_norm": 0.6854221224784851,
      "learning_rate": 4.250574909562937e-06,
      "loss": 0.0141,
      "step": 1765100
    },
    {
      "epoch": 2.888657593789072,
      "grad_norm": 0.21747827529907227,
      "learning_rate": 4.25050901734942e-06,
      "loss": 0.0115,
      "step": 1765120
    },
    {
      "epoch": 2.888690324227725,
      "grad_norm": 0.17788240313529968,
      "learning_rate": 4.250443125135903e-06,
      "loss": 0.0106,
      "step": 1765140
    },
    {
      "epoch": 2.8887230546663787,
      "grad_norm": 0.09079370647668839,
      "learning_rate": 4.250377232922385e-06,
      "loss": 0.0189,
      "step": 1765160
    },
    {
      "epoch": 2.888755785105032,
      "grad_norm": 0.5264159440994263,
      "learning_rate": 4.250311340708869e-06,
      "loss": 0.0127,
      "step": 1765180
    },
    {
      "epoch": 2.8887885155436854,
      "grad_norm": 0.8116979598999023,
      "learning_rate": 4.250245448495352e-06,
      "loss": 0.0107,
      "step": 1765200
    },
    {
      "epoch": 2.8888212459823386,
      "grad_norm": 0.19734930992126465,
      "learning_rate": 4.2501795562818345e-06,
      "loss": 0.0125,
      "step": 1765220
    },
    {
      "epoch": 2.888853976420992,
      "grad_norm": 0.18610890209674835,
      "learning_rate": 4.250113664068317e-06,
      "loss": 0.0131,
      "step": 1765240
    },
    {
      "epoch": 2.8888867068596453,
      "grad_norm": 0.15605123341083527,
      "learning_rate": 4.2500477718548e-06,
      "loss": 0.011,
      "step": 1765260
    },
    {
      "epoch": 2.8889194372982985,
      "grad_norm": 0.30424201488494873,
      "learning_rate": 4.2499818796412835e-06,
      "loss": 0.0113,
      "step": 1765280
    },
    {
      "epoch": 2.888952167736952,
      "grad_norm": 0.11801819503307343,
      "learning_rate": 4.249915987427766e-06,
      "loss": 0.0183,
      "step": 1765300
    },
    {
      "epoch": 2.888984898175605,
      "grad_norm": 0.7580180168151855,
      "learning_rate": 4.249850095214249e-06,
      "loss": 0.0153,
      "step": 1765320
    },
    {
      "epoch": 2.889017628614259,
      "grad_norm": 0.6496317982673645,
      "learning_rate": 4.249784203000732e-06,
      "loss": 0.017,
      "step": 1765340
    },
    {
      "epoch": 2.889050359052912,
      "grad_norm": 0.36299410462379456,
      "learning_rate": 4.2497183107872145e-06,
      "loss": 0.0075,
      "step": 1765360
    },
    {
      "epoch": 2.8890830894915656,
      "grad_norm": 0.3577151596546173,
      "learning_rate": 4.249652418573697e-06,
      "loss": 0.01,
      "step": 1765380
    },
    {
      "epoch": 2.8891158199302187,
      "grad_norm": 0.5182702541351318,
      "learning_rate": 4.249586526360181e-06,
      "loss": 0.015,
      "step": 1765400
    },
    {
      "epoch": 2.889148550368872,
      "grad_norm": 0.16461573541164398,
      "learning_rate": 4.2495206341466636e-06,
      "loss": 0.0151,
      "step": 1765420
    },
    {
      "epoch": 2.8891812808075255,
      "grad_norm": 0.2618565261363983,
      "learning_rate": 4.249454741933146e-06,
      "loss": 0.0148,
      "step": 1765440
    },
    {
      "epoch": 2.8892140112461786,
      "grad_norm": 0.2599775493144989,
      "learning_rate": 4.249388849719629e-06,
      "loss": 0.0187,
      "step": 1765460
    },
    {
      "epoch": 2.889246741684832,
      "grad_norm": 0.40440240502357483,
      "learning_rate": 4.249322957506112e-06,
      "loss": 0.017,
      "step": 1765480
    },
    {
      "epoch": 2.8892794721234853,
      "grad_norm": 0.449384480714798,
      "learning_rate": 4.2492570652925945e-06,
      "loss": 0.0118,
      "step": 1765500
    },
    {
      "epoch": 2.889312202562139,
      "grad_norm": 0.22461727261543274,
      "learning_rate": 4.249191173079077e-06,
      "loss": 0.0151,
      "step": 1765520
    },
    {
      "epoch": 2.889344933000792,
      "grad_norm": 0.29693514108657837,
      "learning_rate": 4.24912528086556e-06,
      "loss": 0.0123,
      "step": 1765540
    },
    {
      "epoch": 2.8893776634394452,
      "grad_norm": 0.13479837775230408,
      "learning_rate": 4.249059388652043e-06,
      "loss": 0.0147,
      "step": 1765560
    },
    {
      "epoch": 2.889410393878099,
      "grad_norm": 0.3197786808013916,
      "learning_rate": 4.248993496438526e-06,
      "loss": 0.0129,
      "step": 1765580
    },
    {
      "epoch": 2.889443124316752,
      "grad_norm": 0.44632869958877563,
      "learning_rate": 4.248927604225009e-06,
      "loss": 0.0103,
      "step": 1765600
    },
    {
      "epoch": 2.8894758547554056,
      "grad_norm": 0.3992048501968384,
      "learning_rate": 4.248861712011492e-06,
      "loss": 0.0147,
      "step": 1765620
    },
    {
      "epoch": 2.8895085851940587,
      "grad_norm": 0.3401045501232147,
      "learning_rate": 4.248795819797975e-06,
      "loss": 0.0178,
      "step": 1765640
    },
    {
      "epoch": 2.8895413156327123,
      "grad_norm": 0.439228355884552,
      "learning_rate": 4.248729927584458e-06,
      "loss": 0.0141,
      "step": 1765660
    },
    {
      "epoch": 2.8895740460713655,
      "grad_norm": 0.2523525655269623,
      "learning_rate": 4.248664035370941e-06,
      "loss": 0.0158,
      "step": 1765680
    },
    {
      "epoch": 2.8896067765100186,
      "grad_norm": 0.4669348895549774,
      "learning_rate": 4.248598143157424e-06,
      "loss": 0.0104,
      "step": 1765700
    },
    {
      "epoch": 2.889639506948672,
      "grad_norm": 0.1230708584189415,
      "learning_rate": 4.248532250943906e-06,
      "loss": 0.0157,
      "step": 1765720
    },
    {
      "epoch": 2.8896722373873254,
      "grad_norm": 0.336997926235199,
      "learning_rate": 4.248466358730389e-06,
      "loss": 0.0116,
      "step": 1765740
    },
    {
      "epoch": 2.889704967825979,
      "grad_norm": 0.42842185497283936,
      "learning_rate": 4.248400466516872e-06,
      "loss": 0.0126,
      "step": 1765760
    },
    {
      "epoch": 2.889737698264632,
      "grad_norm": 0.2540145218372345,
      "learning_rate": 4.248334574303355e-06,
      "loss": 0.0121,
      "step": 1765780
    },
    {
      "epoch": 2.8897704287032857,
      "grad_norm": 0.3178577423095703,
      "learning_rate": 4.248268682089838e-06,
      "loss": 0.016,
      "step": 1765800
    },
    {
      "epoch": 2.889803159141939,
      "grad_norm": 0.3680475950241089,
      "learning_rate": 4.248202789876321e-06,
      "loss": 0.0153,
      "step": 1765820
    },
    {
      "epoch": 2.889835889580592,
      "grad_norm": 0.25443750619888306,
      "learning_rate": 4.248136897662804e-06,
      "loss": 0.0161,
      "step": 1765840
    },
    {
      "epoch": 2.8898686200192456,
      "grad_norm": 0.15909837186336517,
      "learning_rate": 4.248071005449286e-06,
      "loss": 0.021,
      "step": 1765860
    },
    {
      "epoch": 2.8899013504578988,
      "grad_norm": 0.2623203694820404,
      "learning_rate": 4.248005113235769e-06,
      "loss": 0.0115,
      "step": 1765880
    },
    {
      "epoch": 2.889934080896552,
      "grad_norm": 0.4503393769264221,
      "learning_rate": 4.247939221022252e-06,
      "loss": 0.0136,
      "step": 1765900
    },
    {
      "epoch": 2.8899668113352055,
      "grad_norm": 2.624706983566284,
      "learning_rate": 4.247873328808735e-06,
      "loss": 0.0121,
      "step": 1765920
    },
    {
      "epoch": 2.889999541773859,
      "grad_norm": 0.3076145350933075,
      "learning_rate": 4.247807436595217e-06,
      "loss": 0.0179,
      "step": 1765940
    },
    {
      "epoch": 2.8900322722125122,
      "grad_norm": 0.3569546341896057,
      "learning_rate": 4.2477415443817e-06,
      "loss": 0.0147,
      "step": 1765960
    },
    {
      "epoch": 2.8900650026511654,
      "grad_norm": 0.3918592035770416,
      "learning_rate": 4.247675652168184e-06,
      "loss": 0.0126,
      "step": 1765980
    },
    {
      "epoch": 2.890097733089819,
      "grad_norm": 0.2946532964706421,
      "learning_rate": 4.247609759954666e-06,
      "loss": 0.0156,
      "step": 1766000
    },
    {
      "epoch": 2.890130463528472,
      "grad_norm": 0.07982444018125534,
      "learning_rate": 4.247543867741149e-06,
      "loss": 0.0156,
      "step": 1766020
    },
    {
      "epoch": 2.8901631939671253,
      "grad_norm": 0.6381218433380127,
      "learning_rate": 4.247477975527633e-06,
      "loss": 0.015,
      "step": 1766040
    },
    {
      "epoch": 2.890195924405779,
      "grad_norm": 0.12449873238801956,
      "learning_rate": 4.2474120833141155e-06,
      "loss": 0.0108,
      "step": 1766060
    },
    {
      "epoch": 2.8902286548444325,
      "grad_norm": 0.3527713716030121,
      "learning_rate": 4.247346191100598e-06,
      "loss": 0.0167,
      "step": 1766080
    },
    {
      "epoch": 2.8902613852830856,
      "grad_norm": 0.2480933964252472,
      "learning_rate": 4.247280298887081e-06,
      "loss": 0.019,
      "step": 1766100
    },
    {
      "epoch": 2.8902941157217388,
      "grad_norm": 0.8067185282707214,
      "learning_rate": 4.247214406673564e-06,
      "loss": 0.0105,
      "step": 1766120
    },
    {
      "epoch": 2.8903268461603924,
      "grad_norm": 0.7397441864013672,
      "learning_rate": 4.2471485144600464e-06,
      "loss": 0.0115,
      "step": 1766140
    },
    {
      "epoch": 2.8903595765990455,
      "grad_norm": 0.2767881453037262,
      "learning_rate": 4.247082622246529e-06,
      "loss": 0.0149,
      "step": 1766160
    },
    {
      "epoch": 2.8903923070376987,
      "grad_norm": 0.355729341506958,
      "learning_rate": 4.247016730033012e-06,
      "loss": 0.0108,
      "step": 1766180
    },
    {
      "epoch": 2.8904250374763523,
      "grad_norm": 0.536828875541687,
      "learning_rate": 4.2469508378194955e-06,
      "loss": 0.0116,
      "step": 1766200
    },
    {
      "epoch": 2.890457767915006,
      "grad_norm": 0.22212912142276764,
      "learning_rate": 4.246884945605978e-06,
      "loss": 0.0114,
      "step": 1766220
    },
    {
      "epoch": 2.890490498353659,
      "grad_norm": 0.303204745054245,
      "learning_rate": 4.246819053392461e-06,
      "loss": 0.0117,
      "step": 1766240
    },
    {
      "epoch": 2.890523228792312,
      "grad_norm": 0.17985357344150543,
      "learning_rate": 4.246753161178944e-06,
      "loss": 0.018,
      "step": 1766260
    },
    {
      "epoch": 2.8905559592309658,
      "grad_norm": 1.2448891401290894,
      "learning_rate": 4.2466872689654265e-06,
      "loss": 0.0157,
      "step": 1766280
    },
    {
      "epoch": 2.890588689669619,
      "grad_norm": 0.27573785185813904,
      "learning_rate": 4.246621376751909e-06,
      "loss": 0.0164,
      "step": 1766300
    },
    {
      "epoch": 2.890621420108272,
      "grad_norm": 0.2393975555896759,
      "learning_rate": 4.246555484538392e-06,
      "loss": 0.0106,
      "step": 1766320
    },
    {
      "epoch": 2.8906541505469256,
      "grad_norm": 0.9792258739471436,
      "learning_rate": 4.2464895923248755e-06,
      "loss": 0.016,
      "step": 1766340
    },
    {
      "epoch": 2.8906868809855792,
      "grad_norm": 0.25578901171684265,
      "learning_rate": 4.246423700111358e-06,
      "loss": 0.0112,
      "step": 1766360
    },
    {
      "epoch": 2.8907196114242324,
      "grad_norm": 0.241365447640419,
      "learning_rate": 4.246357807897841e-06,
      "loss": 0.0143,
      "step": 1766380
    },
    {
      "epoch": 2.8907523418628855,
      "grad_norm": 0.3644368350505829,
      "learning_rate": 4.246291915684324e-06,
      "loss": 0.0133,
      "step": 1766400
    },
    {
      "epoch": 2.890785072301539,
      "grad_norm": 1.1677806377410889,
      "learning_rate": 4.246226023470807e-06,
      "loss": 0.0119,
      "step": 1766420
    },
    {
      "epoch": 2.8908178027401923,
      "grad_norm": 0.5227603912353516,
      "learning_rate": 4.24616013125729e-06,
      "loss": 0.0086,
      "step": 1766440
    },
    {
      "epoch": 2.8908505331788454,
      "grad_norm": 0.602224588394165,
      "learning_rate": 4.246094239043773e-06,
      "loss": 0.0167,
      "step": 1766460
    },
    {
      "epoch": 2.890883263617499,
      "grad_norm": 1.4437545537948608,
      "learning_rate": 4.2460283468302556e-06,
      "loss": 0.016,
      "step": 1766480
    },
    {
      "epoch": 2.890915994056152,
      "grad_norm": 0.29390278458595276,
      "learning_rate": 4.245962454616738e-06,
      "loss": 0.0101,
      "step": 1766500
    },
    {
      "epoch": 2.8909487244948058,
      "grad_norm": 0.12464184314012527,
      "learning_rate": 4.245896562403221e-06,
      "loss": 0.0094,
      "step": 1766520
    },
    {
      "epoch": 2.890981454933459,
      "grad_norm": 1.047163486480713,
      "learning_rate": 4.245830670189704e-06,
      "loss": 0.0199,
      "step": 1766540
    },
    {
      "epoch": 2.8910141853721125,
      "grad_norm": 0.935009241104126,
      "learning_rate": 4.2457647779761865e-06,
      "loss": 0.0144,
      "step": 1766560
    },
    {
      "epoch": 2.8910469158107657,
      "grad_norm": 0.9420400261878967,
      "learning_rate": 4.245698885762669e-06,
      "loss": 0.0124,
      "step": 1766580
    },
    {
      "epoch": 2.891079646249419,
      "grad_norm": 0.1166691854596138,
      "learning_rate": 4.245632993549153e-06,
      "loss": 0.01,
      "step": 1766600
    },
    {
      "epoch": 2.8911123766880724,
      "grad_norm": 0.6508873701095581,
      "learning_rate": 4.245567101335636e-06,
      "loss": 0.014,
      "step": 1766620
    },
    {
      "epoch": 2.8911451071267256,
      "grad_norm": 0.4174405038356781,
      "learning_rate": 4.245501209122118e-06,
      "loss": 0.0199,
      "step": 1766640
    },
    {
      "epoch": 2.891177837565379,
      "grad_norm": 0.22331756353378296,
      "learning_rate": 4.245435316908601e-06,
      "loss": 0.013,
      "step": 1766660
    },
    {
      "epoch": 2.8912105680040323,
      "grad_norm": 0.13188718259334564,
      "learning_rate": 4.245369424695085e-06,
      "loss": 0.0104,
      "step": 1766680
    },
    {
      "epoch": 2.891243298442686,
      "grad_norm": 0.42541182041168213,
      "learning_rate": 4.245303532481567e-06,
      "loss": 0.0113,
      "step": 1766700
    },
    {
      "epoch": 2.891276028881339,
      "grad_norm": 0.2825341820716858,
      "learning_rate": 4.24523764026805e-06,
      "loss": 0.0146,
      "step": 1766720
    },
    {
      "epoch": 2.891308759319992,
      "grad_norm": 0.2523882985115051,
      "learning_rate": 4.245171748054533e-06,
      "loss": 0.0114,
      "step": 1766740
    },
    {
      "epoch": 2.891341489758646,
      "grad_norm": 0.19117045402526855,
      "learning_rate": 4.245105855841016e-06,
      "loss": 0.01,
      "step": 1766760
    },
    {
      "epoch": 2.891374220197299,
      "grad_norm": 0.466509610414505,
      "learning_rate": 4.245039963627498e-06,
      "loss": 0.0121,
      "step": 1766780
    },
    {
      "epoch": 2.8914069506359525,
      "grad_norm": 0.47679609060287476,
      "learning_rate": 4.244974071413981e-06,
      "loss": 0.0136,
      "step": 1766800
    },
    {
      "epoch": 2.8914396810746057,
      "grad_norm": 0.21785540878772736,
      "learning_rate": 4.244908179200465e-06,
      "loss": 0.0133,
      "step": 1766820
    },
    {
      "epoch": 2.8914724115132593,
      "grad_norm": 0.3005925714969635,
      "learning_rate": 4.244842286986947e-06,
      "loss": 0.0072,
      "step": 1766840
    },
    {
      "epoch": 2.8915051419519124,
      "grad_norm": 0.15978626906871796,
      "learning_rate": 4.24477639477343e-06,
      "loss": 0.0126,
      "step": 1766860
    },
    {
      "epoch": 2.8915378723905656,
      "grad_norm": 0.33889833092689514,
      "learning_rate": 4.244710502559913e-06,
      "loss": 0.0132,
      "step": 1766880
    },
    {
      "epoch": 2.891570602829219,
      "grad_norm": 0.23771755397319794,
      "learning_rate": 4.244644610346396e-06,
      "loss": 0.0088,
      "step": 1766900
    },
    {
      "epoch": 2.8916033332678723,
      "grad_norm": 0.19405010342597961,
      "learning_rate": 4.244578718132878e-06,
      "loss": 0.0208,
      "step": 1766920
    },
    {
      "epoch": 2.891636063706526,
      "grad_norm": 0.2448413074016571,
      "learning_rate": 4.244512825919361e-06,
      "loss": 0.0128,
      "step": 1766940
    },
    {
      "epoch": 2.891668794145179,
      "grad_norm": 0.09272155165672302,
      "learning_rate": 4.244446933705844e-06,
      "loss": 0.0121,
      "step": 1766960
    },
    {
      "epoch": 2.8917015245838327,
      "grad_norm": 0.3414461314678192,
      "learning_rate": 4.244381041492327e-06,
      "loss": 0.0137,
      "step": 1766980
    },
    {
      "epoch": 2.891734255022486,
      "grad_norm": 0.09803511202335358,
      "learning_rate": 4.24431514927881e-06,
      "loss": 0.0134,
      "step": 1767000
    },
    {
      "epoch": 2.891766985461139,
      "grad_norm": 0.08145496994256973,
      "learning_rate": 4.244249257065293e-06,
      "loss": 0.0164,
      "step": 1767020
    },
    {
      "epoch": 2.8917997158997926,
      "grad_norm": 1.2229957580566406,
      "learning_rate": 4.244183364851776e-06,
      "loss": 0.0167,
      "step": 1767040
    },
    {
      "epoch": 2.8918324463384457,
      "grad_norm": 0.3628710210323334,
      "learning_rate": 4.244117472638259e-06,
      "loss": 0.0145,
      "step": 1767060
    },
    {
      "epoch": 2.8918651767770993,
      "grad_norm": 0.23279006779193878,
      "learning_rate": 4.244051580424742e-06,
      "loss": 0.0134,
      "step": 1767080
    },
    {
      "epoch": 2.8918979072157525,
      "grad_norm": 0.9713179469108582,
      "learning_rate": 4.243985688211225e-06,
      "loss": 0.0147,
      "step": 1767100
    },
    {
      "epoch": 2.891930637654406,
      "grad_norm": 0.15955452620983124,
      "learning_rate": 4.2439197959977075e-06,
      "loss": 0.0126,
      "step": 1767120
    },
    {
      "epoch": 2.891963368093059,
      "grad_norm": 0.2675706148147583,
      "learning_rate": 4.24385390378419e-06,
      "loss": 0.0167,
      "step": 1767140
    },
    {
      "epoch": 2.8919960985317124,
      "grad_norm": 0.17428797483444214,
      "learning_rate": 4.243788011570673e-06,
      "loss": 0.0147,
      "step": 1767160
    },
    {
      "epoch": 2.892028828970366,
      "grad_norm": 0.21592804789543152,
      "learning_rate": 4.243722119357156e-06,
      "loss": 0.0128,
      "step": 1767180
    },
    {
      "epoch": 2.892061559409019,
      "grad_norm": 0.38993632793426514,
      "learning_rate": 4.243656227143638e-06,
      "loss": 0.0122,
      "step": 1767200
    },
    {
      "epoch": 2.8920942898476727,
      "grad_norm": 0.3909042179584503,
      "learning_rate": 4.243590334930122e-06,
      "loss": 0.0155,
      "step": 1767220
    },
    {
      "epoch": 2.892127020286326,
      "grad_norm": 0.28763577342033386,
      "learning_rate": 4.243524442716605e-06,
      "loss": 0.0129,
      "step": 1767240
    },
    {
      "epoch": 2.8921597507249794,
      "grad_norm": 0.4465397894382477,
      "learning_rate": 4.2434585505030875e-06,
      "loss": 0.0126,
      "step": 1767260
    },
    {
      "epoch": 2.8921924811636326,
      "grad_norm": 0.106620654463768,
      "learning_rate": 4.24339265828957e-06,
      "loss": 0.0127,
      "step": 1767280
    },
    {
      "epoch": 2.8922252116022857,
      "grad_norm": 0.27780881524086,
      "learning_rate": 4.243326766076053e-06,
      "loss": 0.0142,
      "step": 1767300
    },
    {
      "epoch": 2.8922579420409393,
      "grad_norm": 0.1120874285697937,
      "learning_rate": 4.243260873862536e-06,
      "loss": 0.0104,
      "step": 1767320
    },
    {
      "epoch": 2.8922906724795925,
      "grad_norm": 0.34002208709716797,
      "learning_rate": 4.2431949816490184e-06,
      "loss": 0.0223,
      "step": 1767340
    },
    {
      "epoch": 2.8923234029182456,
      "grad_norm": 0.34413185715675354,
      "learning_rate": 4.243129089435501e-06,
      "loss": 0.0076,
      "step": 1767360
    },
    {
      "epoch": 2.8923561333568992,
      "grad_norm": 0.33224114775657654,
      "learning_rate": 4.243063197221985e-06,
      "loss": 0.0148,
      "step": 1767380
    },
    {
      "epoch": 2.892388863795553,
      "grad_norm": 0.2627665102481842,
      "learning_rate": 4.2429973050084675e-06,
      "loss": 0.0115,
      "step": 1767400
    },
    {
      "epoch": 2.892421594234206,
      "grad_norm": 0.2133679986000061,
      "learning_rate": 4.24293141279495e-06,
      "loss": 0.0134,
      "step": 1767420
    },
    {
      "epoch": 2.892454324672859,
      "grad_norm": 0.181584432721138,
      "learning_rate": 4.242865520581434e-06,
      "loss": 0.0101,
      "step": 1767440
    },
    {
      "epoch": 2.8924870551115127,
      "grad_norm": 0.6174174547195435,
      "learning_rate": 4.242799628367917e-06,
      "loss": 0.02,
      "step": 1767460
    },
    {
      "epoch": 2.892519785550166,
      "grad_norm": 0.8480240106582642,
      "learning_rate": 4.242733736154399e-06,
      "loss": 0.0183,
      "step": 1767480
    },
    {
      "epoch": 2.892552515988819,
      "grad_norm": 1.6359723806381226,
      "learning_rate": 4.242667843940882e-06,
      "loss": 0.0146,
      "step": 1767500
    },
    {
      "epoch": 2.8925852464274726,
      "grad_norm": 0.12410655617713928,
      "learning_rate": 4.242601951727365e-06,
      "loss": 0.0165,
      "step": 1767520
    },
    {
      "epoch": 2.892617976866126,
      "grad_norm": 0.16613970696926117,
      "learning_rate": 4.2425360595138475e-06,
      "loss": 0.0099,
      "step": 1767540
    },
    {
      "epoch": 2.8926507073047794,
      "grad_norm": 0.3131086230278015,
      "learning_rate": 4.24247016730033e-06,
      "loss": 0.0209,
      "step": 1767560
    },
    {
      "epoch": 2.8926834377434325,
      "grad_norm": 0.42747676372528076,
      "learning_rate": 4.242404275086813e-06,
      "loss": 0.0132,
      "step": 1767580
    },
    {
      "epoch": 2.892716168182086,
      "grad_norm": 0.30292314291000366,
      "learning_rate": 4.242338382873296e-06,
      "loss": 0.0152,
      "step": 1767600
    },
    {
      "epoch": 2.8927488986207393,
      "grad_norm": 0.25757160782814026,
      "learning_rate": 4.242272490659779e-06,
      "loss": 0.017,
      "step": 1767620
    },
    {
      "epoch": 2.8927816290593924,
      "grad_norm": 0.369942307472229,
      "learning_rate": 4.242206598446262e-06,
      "loss": 0.0151,
      "step": 1767640
    },
    {
      "epoch": 2.892814359498046,
      "grad_norm": 0.4576858878135681,
      "learning_rate": 4.242140706232745e-06,
      "loss": 0.0128,
      "step": 1767660
    },
    {
      "epoch": 2.8928470899366996,
      "grad_norm": 0.23536722362041473,
      "learning_rate": 4.2420748140192276e-06,
      "loss": 0.0129,
      "step": 1767680
    },
    {
      "epoch": 2.8928798203753527,
      "grad_norm": 0.5064370632171631,
      "learning_rate": 4.24200892180571e-06,
      "loss": 0.0097,
      "step": 1767700
    },
    {
      "epoch": 2.892912550814006,
      "grad_norm": 0.4140383303165436,
      "learning_rate": 4.241943029592193e-06,
      "loss": 0.0124,
      "step": 1767720
    },
    {
      "epoch": 2.8929452812526595,
      "grad_norm": 0.4066256880760193,
      "learning_rate": 4.241877137378677e-06,
      "loss": 0.0117,
      "step": 1767740
    },
    {
      "epoch": 2.8929780116913126,
      "grad_norm": 0.4216591715812683,
      "learning_rate": 4.241811245165159e-06,
      "loss": 0.0187,
      "step": 1767760
    },
    {
      "epoch": 2.893010742129966,
      "grad_norm": 0.14704599976539612,
      "learning_rate": 4.241745352951642e-06,
      "loss": 0.0184,
      "step": 1767780
    },
    {
      "epoch": 2.8930434725686194,
      "grad_norm": 0.4448830187320709,
      "learning_rate": 4.241679460738125e-06,
      "loss": 0.0168,
      "step": 1767800
    },
    {
      "epoch": 2.893076203007273,
      "grad_norm": 0.19521212577819824,
      "learning_rate": 4.241613568524608e-06,
      "loss": 0.0123,
      "step": 1767820
    },
    {
      "epoch": 2.893108933445926,
      "grad_norm": 0.5051820874214172,
      "learning_rate": 4.241547676311091e-06,
      "loss": 0.0132,
      "step": 1767840
    },
    {
      "epoch": 2.8931416638845793,
      "grad_norm": 0.36771833896636963,
      "learning_rate": 4.241481784097574e-06,
      "loss": 0.0153,
      "step": 1767860
    },
    {
      "epoch": 2.893174394323233,
      "grad_norm": 0.6488770842552185,
      "learning_rate": 4.241415891884057e-06,
      "loss": 0.0224,
      "step": 1767880
    },
    {
      "epoch": 2.893207124761886,
      "grad_norm": 0.22702468931674957,
      "learning_rate": 4.241349999670539e-06,
      "loss": 0.0139,
      "step": 1767900
    },
    {
      "epoch": 2.893239855200539,
      "grad_norm": 0.30003735423088074,
      "learning_rate": 4.241284107457022e-06,
      "loss": 0.0161,
      "step": 1767920
    },
    {
      "epoch": 2.8932725856391928,
      "grad_norm": 0.19763445854187012,
      "learning_rate": 4.241218215243505e-06,
      "loss": 0.0132,
      "step": 1767940
    },
    {
      "epoch": 2.893305316077846,
      "grad_norm": 0.3253755569458008,
      "learning_rate": 4.241152323029988e-06,
      "loss": 0.0164,
      "step": 1767960
    },
    {
      "epoch": 2.8933380465164995,
      "grad_norm": 0.32155150175094604,
      "learning_rate": 4.24108643081647e-06,
      "loss": 0.0096,
      "step": 1767980
    },
    {
      "epoch": 2.8933707769551527,
      "grad_norm": 0.41410765051841736,
      "learning_rate": 4.241020538602953e-06,
      "loss": 0.0108,
      "step": 1768000
    },
    {
      "epoch": 2.8934035073938063,
      "grad_norm": 0.902462363243103,
      "learning_rate": 4.240954646389437e-06,
      "loss": 0.0157,
      "step": 1768020
    },
    {
      "epoch": 2.8934362378324594,
      "grad_norm": 0.23896756768226624,
      "learning_rate": 4.240888754175919e-06,
      "loss": 0.0098,
      "step": 1768040
    },
    {
      "epoch": 2.8934689682711126,
      "grad_norm": 0.18222540616989136,
      "learning_rate": 4.240822861962402e-06,
      "loss": 0.0123,
      "step": 1768060
    },
    {
      "epoch": 2.893501698709766,
      "grad_norm": 0.19512656331062317,
      "learning_rate": 4.240756969748885e-06,
      "loss": 0.0152,
      "step": 1768080
    },
    {
      "epoch": 2.8935344291484193,
      "grad_norm": 0.35398417711257935,
      "learning_rate": 4.2406910775353685e-06,
      "loss": 0.0157,
      "step": 1768100
    },
    {
      "epoch": 2.893567159587073,
      "grad_norm": 0.11316876113414764,
      "learning_rate": 4.240625185321851e-06,
      "loss": 0.0101,
      "step": 1768120
    },
    {
      "epoch": 2.893599890025726,
      "grad_norm": 0.06461000442504883,
      "learning_rate": 4.240559293108334e-06,
      "loss": 0.0143,
      "step": 1768140
    },
    {
      "epoch": 2.8936326204643796,
      "grad_norm": 0.23188237845897675,
      "learning_rate": 4.240493400894817e-06,
      "loss": 0.0115,
      "step": 1768160
    },
    {
      "epoch": 2.893665350903033,
      "grad_norm": 0.8051134943962097,
      "learning_rate": 4.2404275086812994e-06,
      "loss": 0.0121,
      "step": 1768180
    },
    {
      "epoch": 2.893698081341686,
      "grad_norm": 0.23477958142757416,
      "learning_rate": 4.240361616467782e-06,
      "loss": 0.0142,
      "step": 1768200
    },
    {
      "epoch": 2.8937308117803395,
      "grad_norm": 0.2186369150876999,
      "learning_rate": 4.240295724254265e-06,
      "loss": 0.0087,
      "step": 1768220
    },
    {
      "epoch": 2.8937635422189927,
      "grad_norm": 0.4793912172317505,
      "learning_rate": 4.2402298320407485e-06,
      "loss": 0.0137,
      "step": 1768240
    },
    {
      "epoch": 2.8937962726576463,
      "grad_norm": 0.5600684881210327,
      "learning_rate": 4.240163939827231e-06,
      "loss": 0.018,
      "step": 1768260
    },
    {
      "epoch": 2.8938290030962994,
      "grad_norm": 0.21228207647800446,
      "learning_rate": 4.240098047613714e-06,
      "loss": 0.0179,
      "step": 1768280
    },
    {
      "epoch": 2.893861733534953,
      "grad_norm": 0.15180455148220062,
      "learning_rate": 4.240032155400197e-06,
      "loss": 0.0143,
      "step": 1768300
    },
    {
      "epoch": 2.893894463973606,
      "grad_norm": 0.3504331707954407,
      "learning_rate": 4.2399662631866795e-06,
      "loss": 0.0165,
      "step": 1768320
    },
    {
      "epoch": 2.8939271944122593,
      "grad_norm": 0.7137762904167175,
      "learning_rate": 4.239900370973162e-06,
      "loss": 0.0163,
      "step": 1768340
    },
    {
      "epoch": 2.893959924850913,
      "grad_norm": 0.31299537420272827,
      "learning_rate": 4.239834478759645e-06,
      "loss": 0.0146,
      "step": 1768360
    },
    {
      "epoch": 2.893992655289566,
      "grad_norm": 0.2458510845899582,
      "learning_rate": 4.239768586546128e-06,
      "loss": 0.0195,
      "step": 1768380
    },
    {
      "epoch": 2.8940253857282197,
      "grad_norm": 0.05765927582979202,
      "learning_rate": 4.2397026943326104e-06,
      "loss": 0.0171,
      "step": 1768400
    },
    {
      "epoch": 2.894058116166873,
      "grad_norm": 0.48645180463790894,
      "learning_rate": 4.239636802119094e-06,
      "loss": 0.0136,
      "step": 1768420
    },
    {
      "epoch": 2.8940908466055264,
      "grad_norm": 1.1243690252304077,
      "learning_rate": 4.239570909905577e-06,
      "loss": 0.0158,
      "step": 1768440
    },
    {
      "epoch": 2.8941235770441796,
      "grad_norm": 1.084733247756958,
      "learning_rate": 4.2395050176920595e-06,
      "loss": 0.016,
      "step": 1768460
    },
    {
      "epoch": 2.8941563074828327,
      "grad_norm": 0.5717998147010803,
      "learning_rate": 4.239439125478543e-06,
      "loss": 0.0084,
      "step": 1768480
    },
    {
      "epoch": 2.8941890379214863,
      "grad_norm": 0.6131100058555603,
      "learning_rate": 4.239373233265026e-06,
      "loss": 0.0217,
      "step": 1768500
    },
    {
      "epoch": 2.8942217683601394,
      "grad_norm": 0.30979737639427185,
      "learning_rate": 4.2393073410515086e-06,
      "loss": 0.0136,
      "step": 1768520
    },
    {
      "epoch": 2.894254498798793,
      "grad_norm": 0.4445240795612335,
      "learning_rate": 4.239241448837991e-06,
      "loss": 0.0096,
      "step": 1768540
    },
    {
      "epoch": 2.894287229237446,
      "grad_norm": 0.051469434052705765,
      "learning_rate": 4.239175556624474e-06,
      "loss": 0.0146,
      "step": 1768560
    },
    {
      "epoch": 2.8943199596761,
      "grad_norm": 0.6712742447853088,
      "learning_rate": 4.239109664410957e-06,
      "loss": 0.0111,
      "step": 1768580
    },
    {
      "epoch": 2.894352690114753,
      "grad_norm": 0.23831868171691895,
      "learning_rate": 4.2390437721974395e-06,
      "loss": 0.0217,
      "step": 1768600
    },
    {
      "epoch": 2.894385420553406,
      "grad_norm": 0.36906328797340393,
      "learning_rate": 4.238977879983922e-06,
      "loss": 0.0165,
      "step": 1768620
    },
    {
      "epoch": 2.8944181509920597,
      "grad_norm": 0.3635982573032379,
      "learning_rate": 4.238911987770406e-06,
      "loss": 0.021,
      "step": 1768640
    },
    {
      "epoch": 2.894450881430713,
      "grad_norm": 0.27266716957092285,
      "learning_rate": 4.238846095556889e-06,
      "loss": 0.0107,
      "step": 1768660
    },
    {
      "epoch": 2.8944836118693664,
      "grad_norm": 0.13006702065467834,
      "learning_rate": 4.238780203343371e-06,
      "loss": 0.0116,
      "step": 1768680
    },
    {
      "epoch": 2.8945163423080196,
      "grad_norm": 0.3812892436981201,
      "learning_rate": 4.238714311129854e-06,
      "loss": 0.0174,
      "step": 1768700
    },
    {
      "epoch": 2.894549072746673,
      "grad_norm": 0.5592648983001709,
      "learning_rate": 4.238648418916337e-06,
      "loss": 0.012,
      "step": 1768720
    },
    {
      "epoch": 2.8945818031853263,
      "grad_norm": 0.2483856976032257,
      "learning_rate": 4.2385825267028195e-06,
      "loss": 0.0198,
      "step": 1768740
    },
    {
      "epoch": 2.8946145336239795,
      "grad_norm": 0.33081579208374023,
      "learning_rate": 4.238516634489302e-06,
      "loss": 0.0119,
      "step": 1768760
    },
    {
      "epoch": 2.894647264062633,
      "grad_norm": 0.31454020738601685,
      "learning_rate": 4.238450742275785e-06,
      "loss": 0.0091,
      "step": 1768780
    },
    {
      "epoch": 2.894679994501286,
      "grad_norm": 0.19135063886642456,
      "learning_rate": 4.238384850062269e-06,
      "loss": 0.01,
      "step": 1768800
    },
    {
      "epoch": 2.89471272493994,
      "grad_norm": 0.5793164372444153,
      "learning_rate": 4.238318957848751e-06,
      "loss": 0.0141,
      "step": 1768820
    },
    {
      "epoch": 2.894745455378593,
      "grad_norm": 0.28886404633522034,
      "learning_rate": 4.238253065635234e-06,
      "loss": 0.0138,
      "step": 1768840
    },
    {
      "epoch": 2.8947781858172466,
      "grad_norm": 0.47397634387016296,
      "learning_rate": 4.238187173421718e-06,
      "loss": 0.0131,
      "step": 1768860
    },
    {
      "epoch": 2.8948109162558997,
      "grad_norm": 0.3182995915412903,
      "learning_rate": 4.2381212812082e-06,
      "loss": 0.0139,
      "step": 1768880
    },
    {
      "epoch": 2.894843646694553,
      "grad_norm": 0.2696388363838196,
      "learning_rate": 4.238055388994683e-06,
      "loss": 0.0174,
      "step": 1768900
    },
    {
      "epoch": 2.8948763771332064,
      "grad_norm": 0.1314326673746109,
      "learning_rate": 4.237989496781166e-06,
      "loss": 0.0096,
      "step": 1768920
    },
    {
      "epoch": 2.8949091075718596,
      "grad_norm": 0.28747740387916565,
      "learning_rate": 4.237923604567649e-06,
      "loss": 0.0123,
      "step": 1768940
    },
    {
      "epoch": 2.8949418380105127,
      "grad_norm": 0.2365557849407196,
      "learning_rate": 4.237857712354131e-06,
      "loss": 0.0146,
      "step": 1768960
    },
    {
      "epoch": 2.8949745684491663,
      "grad_norm": 0.13223102688789368,
      "learning_rate": 4.237791820140614e-06,
      "loss": 0.0178,
      "step": 1768980
    },
    {
      "epoch": 2.89500729888782,
      "grad_norm": 0.370548814535141,
      "learning_rate": 4.237725927927097e-06,
      "loss": 0.0114,
      "step": 1769000
    },
    {
      "epoch": 2.895040029326473,
      "grad_norm": 0.09364514797925949,
      "learning_rate": 4.23766003571358e-06,
      "loss": 0.0124,
      "step": 1769020
    },
    {
      "epoch": 2.8950727597651262,
      "grad_norm": 0.903130829334259,
      "learning_rate": 4.237594143500063e-06,
      "loss": 0.0139,
      "step": 1769040
    },
    {
      "epoch": 2.89510549020378,
      "grad_norm": 0.4307233393192291,
      "learning_rate": 4.237528251286546e-06,
      "loss": 0.0174,
      "step": 1769060
    },
    {
      "epoch": 2.895138220642433,
      "grad_norm": 0.1375308632850647,
      "learning_rate": 4.237462359073029e-06,
      "loss": 0.0147,
      "step": 1769080
    },
    {
      "epoch": 2.895170951081086,
      "grad_norm": 0.17321035265922546,
      "learning_rate": 4.237396466859511e-06,
      "loss": 0.0115,
      "step": 1769100
    },
    {
      "epoch": 2.8952036815197397,
      "grad_norm": 0.13275007903575897,
      "learning_rate": 4.237330574645994e-06,
      "loss": 0.0114,
      "step": 1769120
    },
    {
      "epoch": 2.8952364119583933,
      "grad_norm": 0.21644650399684906,
      "learning_rate": 4.237264682432478e-06,
      "loss": 0.0083,
      "step": 1769140
    },
    {
      "epoch": 2.8952691423970465,
      "grad_norm": 0.5624980330467224,
      "learning_rate": 4.2371987902189605e-06,
      "loss": 0.0118,
      "step": 1769160
    },
    {
      "epoch": 2.8953018728356996,
      "grad_norm": 0.119879812002182,
      "learning_rate": 4.237132898005443e-06,
      "loss": 0.0109,
      "step": 1769180
    },
    {
      "epoch": 2.895334603274353,
      "grad_norm": 0.37835878133773804,
      "learning_rate": 4.237067005791926e-06,
      "loss": 0.0187,
      "step": 1769200
    },
    {
      "epoch": 2.8953673337130064,
      "grad_norm": 0.42419272661209106,
      "learning_rate": 4.237001113578409e-06,
      "loss": 0.0165,
      "step": 1769220
    },
    {
      "epoch": 2.8954000641516595,
      "grad_norm": 0.17996598780155182,
      "learning_rate": 4.2369352213648914e-06,
      "loss": 0.0238,
      "step": 1769240
    },
    {
      "epoch": 2.895432794590313,
      "grad_norm": 0.5133079886436462,
      "learning_rate": 4.236869329151375e-06,
      "loss": 0.0105,
      "step": 1769260
    },
    {
      "epoch": 2.8954655250289667,
      "grad_norm": 0.3372803330421448,
      "learning_rate": 4.236803436937858e-06,
      "loss": 0.015,
      "step": 1769280
    },
    {
      "epoch": 2.89549825546762,
      "grad_norm": 0.31327882409095764,
      "learning_rate": 4.2367375447243405e-06,
      "loss": 0.0112,
      "step": 1769300
    },
    {
      "epoch": 2.895530985906273,
      "grad_norm": 0.7212745547294617,
      "learning_rate": 4.236671652510823e-06,
      "loss": 0.0118,
      "step": 1769320
    },
    {
      "epoch": 2.8955637163449266,
      "grad_norm": 0.9603317379951477,
      "learning_rate": 4.236605760297306e-06,
      "loss": 0.0238,
      "step": 1769340
    },
    {
      "epoch": 2.8955964467835797,
      "grad_norm": 0.29699626564979553,
      "learning_rate": 4.236539868083789e-06,
      "loss": 0.0138,
      "step": 1769360
    },
    {
      "epoch": 2.895629177222233,
      "grad_norm": 0.4386528730392456,
      "learning_rate": 4.2364739758702715e-06,
      "loss": 0.0135,
      "step": 1769380
    },
    {
      "epoch": 2.8956619076608865,
      "grad_norm": 0.2707207202911377,
      "learning_rate": 4.236408083656754e-06,
      "loss": 0.009,
      "step": 1769400
    },
    {
      "epoch": 2.89569463809954,
      "grad_norm": 0.26488932967185974,
      "learning_rate": 4.236342191443237e-06,
      "loss": 0.0102,
      "step": 1769420
    },
    {
      "epoch": 2.8957273685381932,
      "grad_norm": 0.4344799220561981,
      "learning_rate": 4.2362762992297205e-06,
      "loss": 0.017,
      "step": 1769440
    },
    {
      "epoch": 2.8957600989768464,
      "grad_norm": 0.21893975138664246,
      "learning_rate": 4.236210407016203e-06,
      "loss": 0.0185,
      "step": 1769460
    },
    {
      "epoch": 2.8957928294155,
      "grad_norm": 0.5183773636817932,
      "learning_rate": 4.236144514802686e-06,
      "loss": 0.0111,
      "step": 1769480
    },
    {
      "epoch": 2.895825559854153,
      "grad_norm": 0.8669491410255432,
      "learning_rate": 4.23607862258917e-06,
      "loss": 0.0188,
      "step": 1769500
    },
    {
      "epoch": 2.8958582902928063,
      "grad_norm": 0.23847560584545135,
      "learning_rate": 4.236012730375652e-06,
      "loss": 0.0206,
      "step": 1769520
    },
    {
      "epoch": 2.89589102073146,
      "grad_norm": 0.19205982983112335,
      "learning_rate": 4.235946838162135e-06,
      "loss": 0.0169,
      "step": 1769540
    },
    {
      "epoch": 2.895923751170113,
      "grad_norm": 0.512153148651123,
      "learning_rate": 4.235880945948618e-06,
      "loss": 0.0173,
      "step": 1769560
    },
    {
      "epoch": 2.8959564816087666,
      "grad_norm": 0.2696801722049713,
      "learning_rate": 4.2358150537351005e-06,
      "loss": 0.0079,
      "step": 1769580
    },
    {
      "epoch": 2.8959892120474198,
      "grad_norm": 0.13698610663414001,
      "learning_rate": 4.235749161521583e-06,
      "loss": 0.0173,
      "step": 1769600
    },
    {
      "epoch": 2.8960219424860734,
      "grad_norm": 0.21255184710025787,
      "learning_rate": 4.235683269308066e-06,
      "loss": 0.009,
      "step": 1769620
    },
    {
      "epoch": 2.8960546729247265,
      "grad_norm": 0.4096364378929138,
      "learning_rate": 4.235617377094549e-06,
      "loss": 0.0124,
      "step": 1769640
    },
    {
      "epoch": 2.8960874033633797,
      "grad_norm": 0.25281086564064026,
      "learning_rate": 4.235551484881032e-06,
      "loss": 0.0127,
      "step": 1769660
    },
    {
      "epoch": 2.8961201338020333,
      "grad_norm": 0.1040177047252655,
      "learning_rate": 4.235485592667515e-06,
      "loss": 0.0145,
      "step": 1769680
    },
    {
      "epoch": 2.8961528642406864,
      "grad_norm": 0.4610885679721832,
      "learning_rate": 4.235419700453998e-06,
      "loss": 0.0097,
      "step": 1769700
    },
    {
      "epoch": 2.89618559467934,
      "grad_norm": 0.7447465658187866,
      "learning_rate": 4.2353538082404806e-06,
      "loss": 0.0146,
      "step": 1769720
    },
    {
      "epoch": 2.896218325117993,
      "grad_norm": 0.2709367573261261,
      "learning_rate": 4.235287916026963e-06,
      "loss": 0.0103,
      "step": 1769740
    },
    {
      "epoch": 2.8962510555566467,
      "grad_norm": 0.45687276124954224,
      "learning_rate": 4.235222023813446e-06,
      "loss": 0.0156,
      "step": 1769760
    },
    {
      "epoch": 2.8962837859953,
      "grad_norm": 0.1773243099451065,
      "learning_rate": 4.235156131599929e-06,
      "loss": 0.0116,
      "step": 1769780
    },
    {
      "epoch": 2.896316516433953,
      "grad_norm": 0.48254114389419556,
      "learning_rate": 4.2350902393864115e-06,
      "loss": 0.0207,
      "step": 1769800
    },
    {
      "epoch": 2.8963492468726066,
      "grad_norm": 0.1136920303106308,
      "learning_rate": 4.235024347172894e-06,
      "loss": 0.0093,
      "step": 1769820
    },
    {
      "epoch": 2.89638197731126,
      "grad_norm": 0.6125809550285339,
      "learning_rate": 4.234958454959378e-06,
      "loss": 0.0112,
      "step": 1769840
    },
    {
      "epoch": 2.8964147077499134,
      "grad_norm": 0.23921643197536469,
      "learning_rate": 4.234892562745861e-06,
      "loss": 0.0112,
      "step": 1769860
    },
    {
      "epoch": 2.8964474381885665,
      "grad_norm": 0.18214677274227142,
      "learning_rate": 4.234826670532343e-06,
      "loss": 0.0114,
      "step": 1769880
    },
    {
      "epoch": 2.89648016862722,
      "grad_norm": 0.21116188168525696,
      "learning_rate": 4.234760778318827e-06,
      "loss": 0.0143,
      "step": 1769900
    },
    {
      "epoch": 2.8965128990658733,
      "grad_norm": 0.1621697098016739,
      "learning_rate": 4.23469488610531e-06,
      "loss": 0.0127,
      "step": 1769920
    },
    {
      "epoch": 2.8965456295045264,
      "grad_norm": 0.46473428606987,
      "learning_rate": 4.234628993891792e-06,
      "loss": 0.0159,
      "step": 1769940
    },
    {
      "epoch": 2.89657835994318,
      "grad_norm": 1.2604700326919556,
      "learning_rate": 4.234563101678275e-06,
      "loss": 0.0177,
      "step": 1769960
    },
    {
      "epoch": 2.896611090381833,
      "grad_norm": 0.6784818768501282,
      "learning_rate": 4.234497209464758e-06,
      "loss": 0.0127,
      "step": 1769980
    },
    {
      "epoch": 2.8966438208204868,
      "grad_norm": 0.4880046248435974,
      "learning_rate": 4.234431317251241e-06,
      "loss": 0.0155,
      "step": 1770000
    },
    {
      "epoch": 2.89667655125914,
      "grad_norm": 0.38186851143836975,
      "learning_rate": 4.234365425037723e-06,
      "loss": 0.0128,
      "step": 1770020
    },
    {
      "epoch": 2.8967092816977935,
      "grad_norm": 0.3855186402797699,
      "learning_rate": 4.234299532824206e-06,
      "loss": 0.0104,
      "step": 1770040
    },
    {
      "epoch": 2.8967420121364467,
      "grad_norm": 0.4175042510032654,
      "learning_rate": 4.23423364061069e-06,
      "loss": 0.0097,
      "step": 1770060
    },
    {
      "epoch": 2.8967747425751,
      "grad_norm": 0.3102550208568573,
      "learning_rate": 4.2341677483971724e-06,
      "loss": 0.01,
      "step": 1770080
    },
    {
      "epoch": 2.8968074730137534,
      "grad_norm": 0.1667914092540741,
      "learning_rate": 4.234101856183655e-06,
      "loss": 0.0113,
      "step": 1770100
    },
    {
      "epoch": 2.8968402034524066,
      "grad_norm": 0.46741050481796265,
      "learning_rate": 4.234035963970138e-06,
      "loss": 0.0109,
      "step": 1770120
    },
    {
      "epoch": 2.89687293389106,
      "grad_norm": 0.5110148787498474,
      "learning_rate": 4.233970071756621e-06,
      "loss": 0.0109,
      "step": 1770140
    },
    {
      "epoch": 2.8969056643297133,
      "grad_norm": 0.16633503139019012,
      "learning_rate": 4.233904179543103e-06,
      "loss": 0.0081,
      "step": 1770160
    },
    {
      "epoch": 2.896938394768367,
      "grad_norm": 0.5852143168449402,
      "learning_rate": 4.233838287329586e-06,
      "loss": 0.0155,
      "step": 1770180
    },
    {
      "epoch": 2.89697112520702,
      "grad_norm": 0.5745074152946472,
      "learning_rate": 4.23377239511607e-06,
      "loss": 0.0134,
      "step": 1770200
    },
    {
      "epoch": 2.897003855645673,
      "grad_norm": 0.1821124255657196,
      "learning_rate": 4.2337065029025525e-06,
      "loss": 0.0128,
      "step": 1770220
    },
    {
      "epoch": 2.897036586084327,
      "grad_norm": 0.11133672297000885,
      "learning_rate": 4.233640610689035e-06,
      "loss": 0.0139,
      "step": 1770240
    },
    {
      "epoch": 2.89706931652298,
      "grad_norm": 1.2279974222183228,
      "learning_rate": 4.233574718475518e-06,
      "loss": 0.0176,
      "step": 1770260
    },
    {
      "epoch": 2.8971020469616335,
      "grad_norm": 0.3124682307243347,
      "learning_rate": 4.2335088262620015e-06,
      "loss": 0.0141,
      "step": 1770280
    },
    {
      "epoch": 2.8971347774002867,
      "grad_norm": 0.44915252923965454,
      "learning_rate": 4.233442934048484e-06,
      "loss": 0.0111,
      "step": 1770300
    },
    {
      "epoch": 2.8971675078389403,
      "grad_norm": 0.645618736743927,
      "learning_rate": 4.233377041834967e-06,
      "loss": 0.0139,
      "step": 1770320
    },
    {
      "epoch": 2.8972002382775934,
      "grad_norm": 0.24318134784698486,
      "learning_rate": 4.23331114962145e-06,
      "loss": 0.0146,
      "step": 1770340
    },
    {
      "epoch": 2.8972329687162466,
      "grad_norm": 0.1676090955734253,
      "learning_rate": 4.2332452574079325e-06,
      "loss": 0.0204,
      "step": 1770360
    },
    {
      "epoch": 2.8972656991549,
      "grad_norm": 0.32694607973098755,
      "learning_rate": 4.233179365194415e-06,
      "loss": 0.0138,
      "step": 1770380
    },
    {
      "epoch": 2.8972984295935533,
      "grad_norm": 0.14667311310768127,
      "learning_rate": 4.233113472980898e-06,
      "loss": 0.0144,
      "step": 1770400
    },
    {
      "epoch": 2.8973311600322065,
      "grad_norm": 0.3005545735359192,
      "learning_rate": 4.233047580767381e-06,
      "loss": 0.0162,
      "step": 1770420
    },
    {
      "epoch": 2.89736389047086,
      "grad_norm": 0.3428550064563751,
      "learning_rate": 4.2329816885538634e-06,
      "loss": 0.0098,
      "step": 1770440
    },
    {
      "epoch": 2.8973966209095137,
      "grad_norm": 0.05372588336467743,
      "learning_rate": 4.232915796340347e-06,
      "loss": 0.0127,
      "step": 1770460
    },
    {
      "epoch": 2.897429351348167,
      "grad_norm": 0.44688910245895386,
      "learning_rate": 4.23284990412683e-06,
      "loss": 0.016,
      "step": 1770480
    },
    {
      "epoch": 2.89746208178682,
      "grad_norm": 0.05588441714644432,
      "learning_rate": 4.2327840119133125e-06,
      "loss": 0.0122,
      "step": 1770500
    },
    {
      "epoch": 2.8974948122254736,
      "grad_norm": 0.32328543066978455,
      "learning_rate": 4.232718119699795e-06,
      "loss": 0.0107,
      "step": 1770520
    },
    {
      "epoch": 2.8975275426641267,
      "grad_norm": 0.07668159902095795,
      "learning_rate": 4.232652227486278e-06,
      "loss": 0.0115,
      "step": 1770540
    },
    {
      "epoch": 2.89756027310278,
      "grad_norm": 0.44153863191604614,
      "learning_rate": 4.2325863352727616e-06,
      "loss": 0.0126,
      "step": 1770560
    },
    {
      "epoch": 2.8975930035414335,
      "grad_norm": 0.3239462077617645,
      "learning_rate": 4.232520443059244e-06,
      "loss": 0.0164,
      "step": 1770580
    },
    {
      "epoch": 2.897625733980087,
      "grad_norm": 0.7044128775596619,
      "learning_rate": 4.232454550845727e-06,
      "loss": 0.0137,
      "step": 1770600
    },
    {
      "epoch": 2.89765846441874,
      "grad_norm": 0.24837283790111542,
      "learning_rate": 4.23238865863221e-06,
      "loss": 0.0125,
      "step": 1770620
    },
    {
      "epoch": 2.8976911948573933,
      "grad_norm": 0.27861863374710083,
      "learning_rate": 4.2323227664186925e-06,
      "loss": 0.0141,
      "step": 1770640
    },
    {
      "epoch": 2.897723925296047,
      "grad_norm": 0.35957100987434387,
      "learning_rate": 4.232256874205175e-06,
      "loss": 0.0148,
      "step": 1770660
    },
    {
      "epoch": 2.8977566557347,
      "grad_norm": 0.07593843340873718,
      "learning_rate": 4.232190981991659e-06,
      "loss": 0.0167,
      "step": 1770680
    },
    {
      "epoch": 2.8977893861733532,
      "grad_norm": 0.21337628364562988,
      "learning_rate": 4.232125089778142e-06,
      "loss": 0.021,
      "step": 1770700
    },
    {
      "epoch": 2.897822116612007,
      "grad_norm": 0.4646852910518646,
      "learning_rate": 4.232059197564624e-06,
      "loss": 0.011,
      "step": 1770720
    },
    {
      "epoch": 2.8978548470506604,
      "grad_norm": 0.9196181297302246,
      "learning_rate": 4.231993305351107e-06,
      "loss": 0.0137,
      "step": 1770740
    },
    {
      "epoch": 2.8978875774893136,
      "grad_norm": 0.12288511544466019,
      "learning_rate": 4.23192741313759e-06,
      "loss": 0.0139,
      "step": 1770760
    },
    {
      "epoch": 2.8979203079279667,
      "grad_norm": 0.1977674514055252,
      "learning_rate": 4.2318615209240726e-06,
      "loss": 0.0108,
      "step": 1770780
    },
    {
      "epoch": 2.8979530383666203,
      "grad_norm": 0.43986019492149353,
      "learning_rate": 4.231795628710555e-06,
      "loss": 0.0163,
      "step": 1770800
    },
    {
      "epoch": 2.8979857688052735,
      "grad_norm": 0.33971455693244934,
      "learning_rate": 4.231729736497038e-06,
      "loss": 0.0081,
      "step": 1770820
    },
    {
      "epoch": 2.8980184992439266,
      "grad_norm": 0.29948365688323975,
      "learning_rate": 4.231663844283521e-06,
      "loss": 0.0086,
      "step": 1770840
    },
    {
      "epoch": 2.89805122968258,
      "grad_norm": 0.5274748802185059,
      "learning_rate": 4.231597952070004e-06,
      "loss": 0.013,
      "step": 1770860
    },
    {
      "epoch": 2.898083960121234,
      "grad_norm": 0.5714079141616821,
      "learning_rate": 4.231532059856487e-06,
      "loss": 0.0094,
      "step": 1770880
    },
    {
      "epoch": 2.898116690559887,
      "grad_norm": 0.34610116481781006,
      "learning_rate": 4.23146616764297e-06,
      "loss": 0.0147,
      "step": 1770900
    },
    {
      "epoch": 2.89814942099854,
      "grad_norm": 0.15858031809329987,
      "learning_rate": 4.2314002754294534e-06,
      "loss": 0.0126,
      "step": 1770920
    },
    {
      "epoch": 2.8981821514371937,
      "grad_norm": 0.6760327219963074,
      "learning_rate": 4.231334383215936e-06,
      "loss": 0.0103,
      "step": 1770940
    },
    {
      "epoch": 2.898214881875847,
      "grad_norm": 0.47467684745788574,
      "learning_rate": 4.231268491002419e-06,
      "loss": 0.0205,
      "step": 1770960
    },
    {
      "epoch": 2.8982476123145,
      "grad_norm": 0.5277446508407593,
      "learning_rate": 4.231202598788902e-06,
      "loss": 0.013,
      "step": 1770980
    },
    {
      "epoch": 2.8982803427531536,
      "grad_norm": 0.33515283465385437,
      "learning_rate": 4.231136706575384e-06,
      "loss": 0.014,
      "step": 1771000
    },
    {
      "epoch": 2.8983130731918068,
      "grad_norm": 0.2814372181892395,
      "learning_rate": 4.231070814361867e-06,
      "loss": 0.0109,
      "step": 1771020
    },
    {
      "epoch": 2.8983458036304603,
      "grad_norm": 0.5500850081443787,
      "learning_rate": 4.23100492214835e-06,
      "loss": 0.0102,
      "step": 1771040
    },
    {
      "epoch": 2.8983785340691135,
      "grad_norm": 0.38485386967658997,
      "learning_rate": 4.230939029934833e-06,
      "loss": 0.0109,
      "step": 1771060
    },
    {
      "epoch": 2.898411264507767,
      "grad_norm": 0.17374169826507568,
      "learning_rate": 4.230873137721316e-06,
      "loss": 0.0111,
      "step": 1771080
    },
    {
      "epoch": 2.8984439949464202,
      "grad_norm": 0.6902248859405518,
      "learning_rate": 4.230807245507799e-06,
      "loss": 0.0123,
      "step": 1771100
    },
    {
      "epoch": 2.8984767253850734,
      "grad_norm": 0.23421400785446167,
      "learning_rate": 4.230741353294282e-06,
      "loss": 0.0096,
      "step": 1771120
    },
    {
      "epoch": 2.898509455823727,
      "grad_norm": 0.2727019488811493,
      "learning_rate": 4.230675461080764e-06,
      "loss": 0.0098,
      "step": 1771140
    },
    {
      "epoch": 2.89854218626238,
      "grad_norm": 0.13152071833610535,
      "learning_rate": 4.230609568867247e-06,
      "loss": 0.0112,
      "step": 1771160
    },
    {
      "epoch": 2.8985749167010337,
      "grad_norm": 0.2967299520969391,
      "learning_rate": 4.23054367665373e-06,
      "loss": 0.0112,
      "step": 1771180
    },
    {
      "epoch": 2.898607647139687,
      "grad_norm": 0.15238767862319946,
      "learning_rate": 4.230477784440213e-06,
      "loss": 0.0124,
      "step": 1771200
    },
    {
      "epoch": 2.8986403775783405,
      "grad_norm": 0.24097220599651337,
      "learning_rate": 4.230411892226695e-06,
      "loss": 0.0148,
      "step": 1771220
    },
    {
      "epoch": 2.8986731080169936,
      "grad_norm": 0.09998922049999237,
      "learning_rate": 4.230346000013178e-06,
      "loss": 0.0165,
      "step": 1771240
    },
    {
      "epoch": 2.8987058384556468,
      "grad_norm": 0.26391491293907166,
      "learning_rate": 4.230280107799662e-06,
      "loss": 0.0129,
      "step": 1771260
    },
    {
      "epoch": 2.8987385688943004,
      "grad_norm": 0.1693960428237915,
      "learning_rate": 4.2302142155861444e-06,
      "loss": 0.0123,
      "step": 1771280
    },
    {
      "epoch": 2.8987712993329535,
      "grad_norm": 0.4329790472984314,
      "learning_rate": 4.230148323372628e-06,
      "loss": 0.0177,
      "step": 1771300
    },
    {
      "epoch": 2.898804029771607,
      "grad_norm": 0.44241395592689514,
      "learning_rate": 4.230082431159111e-06,
      "loss": 0.0108,
      "step": 1771320
    },
    {
      "epoch": 2.8988367602102603,
      "grad_norm": 0.10294515639543533,
      "learning_rate": 4.2300165389455935e-06,
      "loss": 0.0107,
      "step": 1771340
    },
    {
      "epoch": 2.898869490648914,
      "grad_norm": 0.47378435730934143,
      "learning_rate": 4.229950646732076e-06,
      "loss": 0.0134,
      "step": 1771360
    },
    {
      "epoch": 2.898902221087567,
      "grad_norm": 0.5102283358573914,
      "learning_rate": 4.229884754518559e-06,
      "loss": 0.0146,
      "step": 1771380
    },
    {
      "epoch": 2.89893495152622,
      "grad_norm": 0.12834158539772034,
      "learning_rate": 4.229818862305042e-06,
      "loss": 0.01,
      "step": 1771400
    },
    {
      "epoch": 2.8989676819648738,
      "grad_norm": 0.1294035166501999,
      "learning_rate": 4.2297529700915245e-06,
      "loss": 0.0152,
      "step": 1771420
    },
    {
      "epoch": 2.899000412403527,
      "grad_norm": 0.6167165637016296,
      "learning_rate": 4.229687077878007e-06,
      "loss": 0.0241,
      "step": 1771440
    },
    {
      "epoch": 2.8990331428421805,
      "grad_norm": 0.15755513310432434,
      "learning_rate": 4.22962118566449e-06,
      "loss": 0.0103,
      "step": 1771460
    },
    {
      "epoch": 2.8990658732808336,
      "grad_norm": 0.3622581362724304,
      "learning_rate": 4.2295552934509735e-06,
      "loss": 0.0129,
      "step": 1771480
    },
    {
      "epoch": 2.8990986037194872,
      "grad_norm": 0.35823962092399597,
      "learning_rate": 4.229489401237456e-06,
      "loss": 0.0166,
      "step": 1771500
    },
    {
      "epoch": 2.8991313341581404,
      "grad_norm": 0.23472560942173004,
      "learning_rate": 4.229423509023939e-06,
      "loss": 0.0106,
      "step": 1771520
    },
    {
      "epoch": 2.8991640645967935,
      "grad_norm": 0.6760479807853699,
      "learning_rate": 4.229357616810422e-06,
      "loss": 0.0148,
      "step": 1771540
    },
    {
      "epoch": 2.899196795035447,
      "grad_norm": 0.5318158864974976,
      "learning_rate": 4.2292917245969045e-06,
      "loss": 0.0109,
      "step": 1771560
    },
    {
      "epoch": 2.8992295254741003,
      "grad_norm": 0.1350294053554535,
      "learning_rate": 4.229225832383387e-06,
      "loss": 0.0135,
      "step": 1771580
    },
    {
      "epoch": 2.899262255912754,
      "grad_norm": 0.33043378591537476,
      "learning_rate": 4.229159940169871e-06,
      "loss": 0.0104,
      "step": 1771600
    },
    {
      "epoch": 2.899294986351407,
      "grad_norm": 0.35748159885406494,
      "learning_rate": 4.2290940479563535e-06,
      "loss": 0.0137,
      "step": 1771620
    },
    {
      "epoch": 2.8993277167900606,
      "grad_norm": 0.20963402092456818,
      "learning_rate": 4.229028155742836e-06,
      "loss": 0.011,
      "step": 1771640
    },
    {
      "epoch": 2.8993604472287138,
      "grad_norm": 1.4612599611282349,
      "learning_rate": 4.228962263529319e-06,
      "loss": 0.0204,
      "step": 1771660
    },
    {
      "epoch": 2.899393177667367,
      "grad_norm": 7.188711643218994,
      "learning_rate": 4.228896371315802e-06,
      "loss": 0.0122,
      "step": 1771680
    },
    {
      "epoch": 2.8994259081060205,
      "grad_norm": 0.08751504123210907,
      "learning_rate": 4.228830479102285e-06,
      "loss": 0.0166,
      "step": 1771700
    },
    {
      "epoch": 2.8994586385446737,
      "grad_norm": 0.28267452120780945,
      "learning_rate": 4.228764586888768e-06,
      "loss": 0.0138,
      "step": 1771720
    },
    {
      "epoch": 2.8994913689833273,
      "grad_norm": 0.11938425898551941,
      "learning_rate": 4.228698694675251e-06,
      "loss": 0.0166,
      "step": 1771740
    },
    {
      "epoch": 2.8995240994219804,
      "grad_norm": 0.4065619111061096,
      "learning_rate": 4.2286328024617336e-06,
      "loss": 0.0168,
      "step": 1771760
    },
    {
      "epoch": 2.899556829860634,
      "grad_norm": 0.28930655121803284,
      "learning_rate": 4.228566910248216e-06,
      "loss": 0.01,
      "step": 1771780
    },
    {
      "epoch": 2.899589560299287,
      "grad_norm": 0.15963837504386902,
      "learning_rate": 4.228501018034699e-06,
      "loss": 0.0144,
      "step": 1771800
    },
    {
      "epoch": 2.8996222907379403,
      "grad_norm": 0.34580257534980774,
      "learning_rate": 4.228435125821182e-06,
      "loss": 0.0147,
      "step": 1771820
    },
    {
      "epoch": 2.899655021176594,
      "grad_norm": 0.24218544363975525,
      "learning_rate": 4.2283692336076645e-06,
      "loss": 0.0117,
      "step": 1771840
    },
    {
      "epoch": 2.899687751615247,
      "grad_norm": 0.30669617652893066,
      "learning_rate": 4.228303341394147e-06,
      "loss": 0.0131,
      "step": 1771860
    },
    {
      "epoch": 2.8997204820539006,
      "grad_norm": 0.44061046838760376,
      "learning_rate": 4.228237449180631e-06,
      "loss": 0.0153,
      "step": 1771880
    },
    {
      "epoch": 2.899753212492554,
      "grad_norm": 0.2601211667060852,
      "learning_rate": 4.228171556967114e-06,
      "loss": 0.0175,
      "step": 1771900
    },
    {
      "epoch": 2.8997859429312074,
      "grad_norm": 0.25667306780815125,
      "learning_rate": 4.228105664753596e-06,
      "loss": 0.018,
      "step": 1771920
    },
    {
      "epoch": 2.8998186733698605,
      "grad_norm": 0.05383599177002907,
      "learning_rate": 4.228039772540079e-06,
      "loss": 0.0149,
      "step": 1771940
    },
    {
      "epoch": 2.8998514038085137,
      "grad_norm": 0.08241605013608932,
      "learning_rate": 4.227973880326563e-06,
      "loss": 0.0131,
      "step": 1771960
    },
    {
      "epoch": 2.8998841342471673,
      "grad_norm": 0.4659113585948944,
      "learning_rate": 4.227907988113045e-06,
      "loss": 0.0133,
      "step": 1771980
    },
    {
      "epoch": 2.8999168646858204,
      "grad_norm": 0.07840646803379059,
      "learning_rate": 4.227842095899528e-06,
      "loss": 0.0111,
      "step": 1772000
    },
    {
      "epoch": 2.8999495951244736,
      "grad_norm": 0.5026839971542358,
      "learning_rate": 4.227776203686011e-06,
      "loss": 0.0116,
      "step": 1772020
    },
    {
      "epoch": 2.899982325563127,
      "grad_norm": 0.158115953207016,
      "learning_rate": 4.227710311472494e-06,
      "loss": 0.022,
      "step": 1772040
    },
    {
      "epoch": 2.9000150560017808,
      "grad_norm": 0.775022029876709,
      "learning_rate": 4.227644419258976e-06,
      "loss": 0.0111,
      "step": 1772060
    },
    {
      "epoch": 2.900047786440434,
      "grad_norm": 0.1751118004322052,
      "learning_rate": 4.227578527045459e-06,
      "loss": 0.0151,
      "step": 1772080
    },
    {
      "epoch": 2.900080516879087,
      "grad_norm": 0.1946025788784027,
      "learning_rate": 4.227512634831943e-06,
      "loss": 0.0095,
      "step": 1772100
    },
    {
      "epoch": 2.9001132473177407,
      "grad_norm": 0.637059211730957,
      "learning_rate": 4.2274467426184254e-06,
      "loss": 0.0134,
      "step": 1772120
    },
    {
      "epoch": 2.900145977756394,
      "grad_norm": 0.1979970633983612,
      "learning_rate": 4.227380850404908e-06,
      "loss": 0.0129,
      "step": 1772140
    },
    {
      "epoch": 2.900178708195047,
      "grad_norm": 0.21064405143260956,
      "learning_rate": 4.227314958191391e-06,
      "loss": 0.0099,
      "step": 1772160
    },
    {
      "epoch": 2.9002114386337006,
      "grad_norm": 0.2858999967575073,
      "learning_rate": 4.227249065977874e-06,
      "loss": 0.0117,
      "step": 1772180
    },
    {
      "epoch": 2.900244169072354,
      "grad_norm": 0.30620354413986206,
      "learning_rate": 4.227183173764356e-06,
      "loss": 0.0112,
      "step": 1772200
    },
    {
      "epoch": 2.9002768995110073,
      "grad_norm": 0.41086405515670776,
      "learning_rate": 4.227117281550839e-06,
      "loss": 0.0145,
      "step": 1772220
    },
    {
      "epoch": 2.9003096299496605,
      "grad_norm": 0.14672625064849854,
      "learning_rate": 4.227051389337322e-06,
      "loss": 0.0138,
      "step": 1772240
    },
    {
      "epoch": 2.900342360388314,
      "grad_norm": 0.246835395693779,
      "learning_rate": 4.226985497123805e-06,
      "loss": 0.0118,
      "step": 1772260
    },
    {
      "epoch": 2.900375090826967,
      "grad_norm": 0.22970807552337646,
      "learning_rate": 4.226919604910288e-06,
      "loss": 0.014,
      "step": 1772280
    },
    {
      "epoch": 2.9004078212656204,
      "grad_norm": 0.5492910742759705,
      "learning_rate": 4.226853712696771e-06,
      "loss": 0.017,
      "step": 1772300
    },
    {
      "epoch": 2.900440551704274,
      "grad_norm": 0.3905283510684967,
      "learning_rate": 4.226787820483254e-06,
      "loss": 0.0191,
      "step": 1772320
    },
    {
      "epoch": 2.9004732821429275,
      "grad_norm": 0.2529143989086151,
      "learning_rate": 4.226721928269737e-06,
      "loss": 0.0216,
      "step": 1772340
    },
    {
      "epoch": 2.9005060125815807,
      "grad_norm": 0.8042912483215332,
      "learning_rate": 4.22665603605622e-06,
      "loss": 0.0254,
      "step": 1772360
    },
    {
      "epoch": 2.900538743020234,
      "grad_norm": 0.4079780578613281,
      "learning_rate": 4.226590143842703e-06,
      "loss": 0.012,
      "step": 1772380
    },
    {
      "epoch": 2.9005714734588874,
      "grad_norm": 0.6791112422943115,
      "learning_rate": 4.2265242516291855e-06,
      "loss": 0.0258,
      "step": 1772400
    },
    {
      "epoch": 2.9006042038975406,
      "grad_norm": 1.339138150215149,
      "learning_rate": 4.226458359415668e-06,
      "loss": 0.0129,
      "step": 1772420
    },
    {
      "epoch": 2.9006369343361937,
      "grad_norm": 0.7600240111351013,
      "learning_rate": 4.226392467202151e-06,
      "loss": 0.012,
      "step": 1772440
    },
    {
      "epoch": 2.9006696647748473,
      "grad_norm": 0.42238572239875793,
      "learning_rate": 4.226326574988634e-06,
      "loss": 0.0105,
      "step": 1772460
    },
    {
      "epoch": 2.900702395213501,
      "grad_norm": 0.28815194964408875,
      "learning_rate": 4.2262606827751164e-06,
      "loss": 0.0125,
      "step": 1772480
    },
    {
      "epoch": 2.900735125652154,
      "grad_norm": 0.13361488282680511,
      "learning_rate": 4.2261947905616e-06,
      "loss": 0.0118,
      "step": 1772500
    },
    {
      "epoch": 2.9007678560908072,
      "grad_norm": 1.0650593042373657,
      "learning_rate": 4.226128898348083e-06,
      "loss": 0.009,
      "step": 1772520
    },
    {
      "epoch": 2.900800586529461,
      "grad_norm": 0.18432140350341797,
      "learning_rate": 4.2260630061345655e-06,
      "loss": 0.0119,
      "step": 1772540
    },
    {
      "epoch": 2.900833316968114,
      "grad_norm": 0.929995059967041,
      "learning_rate": 4.225997113921048e-06,
      "loss": 0.0115,
      "step": 1772560
    },
    {
      "epoch": 2.900866047406767,
      "grad_norm": 0.3298197388648987,
      "learning_rate": 4.225931221707531e-06,
      "loss": 0.0094,
      "step": 1772580
    },
    {
      "epoch": 2.9008987778454207,
      "grad_norm": 0.28382471203804016,
      "learning_rate": 4.225865329494014e-06,
      "loss": 0.0175,
      "step": 1772600
    },
    {
      "epoch": 2.900931508284074,
      "grad_norm": 0.7149919867515564,
      "learning_rate": 4.2257994372804965e-06,
      "loss": 0.0162,
      "step": 1772620
    },
    {
      "epoch": 2.9009642387227275,
      "grad_norm": 0.6554045677185059,
      "learning_rate": 4.225733545066979e-06,
      "loss": 0.0134,
      "step": 1772640
    },
    {
      "epoch": 2.9009969691613806,
      "grad_norm": 0.32258734107017517,
      "learning_rate": 4.225667652853463e-06,
      "loss": 0.0153,
      "step": 1772660
    },
    {
      "epoch": 2.901029699600034,
      "grad_norm": 0.35855624079704285,
      "learning_rate": 4.2256017606399455e-06,
      "loss": 0.0224,
      "step": 1772680
    },
    {
      "epoch": 2.9010624300386874,
      "grad_norm": 0.28836381435394287,
      "learning_rate": 4.225535868426428e-06,
      "loss": 0.0113,
      "step": 1772700
    },
    {
      "epoch": 2.9010951604773405,
      "grad_norm": 0.49073460698127747,
      "learning_rate": 4.225469976212912e-06,
      "loss": 0.0293,
      "step": 1772720
    },
    {
      "epoch": 2.901127890915994,
      "grad_norm": 0.6341747045516968,
      "learning_rate": 4.225404083999395e-06,
      "loss": 0.0192,
      "step": 1772740
    },
    {
      "epoch": 2.9011606213546473,
      "grad_norm": 0.38154226541519165,
      "learning_rate": 4.225338191785877e-06,
      "loss": 0.0112,
      "step": 1772760
    },
    {
      "epoch": 2.901193351793301,
      "grad_norm": 0.14047114551067352,
      "learning_rate": 4.22527229957236e-06,
      "loss": 0.0145,
      "step": 1772780
    },
    {
      "epoch": 2.901226082231954,
      "grad_norm": 0.18120601773262024,
      "learning_rate": 4.225206407358843e-06,
      "loss": 0.0143,
      "step": 1772800
    },
    {
      "epoch": 2.9012588126706076,
      "grad_norm": 0.22481656074523926,
      "learning_rate": 4.2251405151453256e-06,
      "loss": 0.0137,
      "step": 1772820
    },
    {
      "epoch": 2.9012915431092607,
      "grad_norm": 0.16818402707576752,
      "learning_rate": 4.225074622931808e-06,
      "loss": 0.0134,
      "step": 1772840
    },
    {
      "epoch": 2.901324273547914,
      "grad_norm": 0.2681289613246918,
      "learning_rate": 4.225008730718291e-06,
      "loss": 0.0177,
      "step": 1772860
    },
    {
      "epoch": 2.9013570039865675,
      "grad_norm": 0.6521521210670471,
      "learning_rate": 4.224942838504774e-06,
      "loss": 0.0122,
      "step": 1772880
    },
    {
      "epoch": 2.9013897344252206,
      "grad_norm": 0.36186715960502625,
      "learning_rate": 4.224876946291257e-06,
      "loss": 0.0093,
      "step": 1772900
    },
    {
      "epoch": 2.9014224648638742,
      "grad_norm": 0.22081740200519562,
      "learning_rate": 4.22481105407774e-06,
      "loss": 0.0141,
      "step": 1772920
    },
    {
      "epoch": 2.9014551953025274,
      "grad_norm": 1.006931185722351,
      "learning_rate": 4.224745161864223e-06,
      "loss": 0.0137,
      "step": 1772940
    },
    {
      "epoch": 2.901487925741181,
      "grad_norm": 0.25842559337615967,
      "learning_rate": 4.224679269650706e-06,
      "loss": 0.0143,
      "step": 1772960
    },
    {
      "epoch": 2.901520656179834,
      "grad_norm": 0.19326433539390564,
      "learning_rate": 4.224613377437188e-06,
      "loss": 0.0145,
      "step": 1772980
    },
    {
      "epoch": 2.9015533866184873,
      "grad_norm": 0.14598438143730164,
      "learning_rate": 4.224547485223671e-06,
      "loss": 0.0125,
      "step": 1773000
    },
    {
      "epoch": 2.901586117057141,
      "grad_norm": 0.9080979228019714,
      "learning_rate": 4.224481593010155e-06,
      "loss": 0.0176,
      "step": 1773020
    },
    {
      "epoch": 2.901618847495794,
      "grad_norm": 0.5350702404975891,
      "learning_rate": 4.224415700796637e-06,
      "loss": 0.01,
      "step": 1773040
    },
    {
      "epoch": 2.9016515779344476,
      "grad_norm": 1.4184460639953613,
      "learning_rate": 4.22434980858312e-06,
      "loss": 0.0131,
      "step": 1773060
    },
    {
      "epoch": 2.9016843083731008,
      "grad_norm": 0.2997351884841919,
      "learning_rate": 4.224283916369603e-06,
      "loss": 0.0109,
      "step": 1773080
    },
    {
      "epoch": 2.9017170388117544,
      "grad_norm": 1.3865408897399902,
      "learning_rate": 4.224218024156086e-06,
      "loss": 0.0179,
      "step": 1773100
    },
    {
      "epoch": 2.9017497692504075,
      "grad_norm": 0.3700253665447235,
      "learning_rate": 4.224152131942569e-06,
      "loss": 0.0127,
      "step": 1773120
    },
    {
      "epoch": 2.9017824996890607,
      "grad_norm": 0.15747614204883575,
      "learning_rate": 4.224086239729052e-06,
      "loss": 0.0107,
      "step": 1773140
    },
    {
      "epoch": 2.9018152301277143,
      "grad_norm": 0.16867201030254364,
      "learning_rate": 4.224020347515535e-06,
      "loss": 0.0091,
      "step": 1773160
    },
    {
      "epoch": 2.9018479605663674,
      "grad_norm": 0.5060490369796753,
      "learning_rate": 4.223954455302017e-06,
      "loss": 0.0173,
      "step": 1773180
    },
    {
      "epoch": 2.901880691005021,
      "grad_norm": 0.42602241039276123,
      "learning_rate": 4.2238885630885e-06,
      "loss": 0.0174,
      "step": 1773200
    },
    {
      "epoch": 2.901913421443674,
      "grad_norm": 0.08796396851539612,
      "learning_rate": 4.223822670874983e-06,
      "loss": 0.0111,
      "step": 1773220
    },
    {
      "epoch": 2.9019461518823277,
      "grad_norm": 0.3778625428676605,
      "learning_rate": 4.223756778661466e-06,
      "loss": 0.0178,
      "step": 1773240
    },
    {
      "epoch": 2.901978882320981,
      "grad_norm": 0.3546387851238251,
      "learning_rate": 4.223690886447948e-06,
      "loss": 0.009,
      "step": 1773260
    },
    {
      "epoch": 2.902011612759634,
      "grad_norm": 0.15933585166931152,
      "learning_rate": 4.223624994234431e-06,
      "loss": 0.0158,
      "step": 1773280
    },
    {
      "epoch": 2.9020443431982876,
      "grad_norm": 0.587218165397644,
      "learning_rate": 4.223559102020915e-06,
      "loss": 0.0123,
      "step": 1773300
    },
    {
      "epoch": 2.902077073636941,
      "grad_norm": 0.8081496953964233,
      "learning_rate": 4.2234932098073974e-06,
      "loss": 0.0133,
      "step": 1773320
    },
    {
      "epoch": 2.9021098040755944,
      "grad_norm": 0.20149162411689758,
      "learning_rate": 4.22342731759388e-06,
      "loss": 0.0113,
      "step": 1773340
    },
    {
      "epoch": 2.9021425345142475,
      "grad_norm": 0.5938361883163452,
      "learning_rate": 4.223361425380364e-06,
      "loss": 0.0121,
      "step": 1773360
    },
    {
      "epoch": 2.902175264952901,
      "grad_norm": 0.5378628969192505,
      "learning_rate": 4.2232955331668465e-06,
      "loss": 0.0132,
      "step": 1773380
    },
    {
      "epoch": 2.9022079953915543,
      "grad_norm": 0.6135883331298828,
      "learning_rate": 4.223229640953329e-06,
      "loss": 0.0116,
      "step": 1773400
    },
    {
      "epoch": 2.9022407258302074,
      "grad_norm": 0.3480897843837738,
      "learning_rate": 4.223163748739812e-06,
      "loss": 0.0158,
      "step": 1773420
    },
    {
      "epoch": 2.902273456268861,
      "grad_norm": 0.2541061043739319,
      "learning_rate": 4.223097856526295e-06,
      "loss": 0.008,
      "step": 1773440
    },
    {
      "epoch": 2.902306186707514,
      "grad_norm": 0.48171737790107727,
      "learning_rate": 4.2230319643127775e-06,
      "loss": 0.0131,
      "step": 1773460
    },
    {
      "epoch": 2.9023389171461673,
      "grad_norm": 0.3994545340538025,
      "learning_rate": 4.22296607209926e-06,
      "loss": 0.0113,
      "step": 1773480
    },
    {
      "epoch": 2.902371647584821,
      "grad_norm": 0.4268985688686371,
      "learning_rate": 4.222900179885743e-06,
      "loss": 0.0099,
      "step": 1773500
    },
    {
      "epoch": 2.9024043780234745,
      "grad_norm": 0.3052414059638977,
      "learning_rate": 4.2228342876722265e-06,
      "loss": 0.0138,
      "step": 1773520
    },
    {
      "epoch": 2.9024371084621277,
      "grad_norm": 0.29894015192985535,
      "learning_rate": 4.222768395458709e-06,
      "loss": 0.0174,
      "step": 1773540
    },
    {
      "epoch": 2.902469838900781,
      "grad_norm": 0.7325232028961182,
      "learning_rate": 4.222702503245192e-06,
      "loss": 0.0151,
      "step": 1773560
    },
    {
      "epoch": 2.9025025693394344,
      "grad_norm": 0.17603369057178497,
      "learning_rate": 4.222636611031675e-06,
      "loss": 0.0074,
      "step": 1773580
    },
    {
      "epoch": 2.9025352997780876,
      "grad_norm": 0.16342931985855103,
      "learning_rate": 4.2225707188181575e-06,
      "loss": 0.0121,
      "step": 1773600
    },
    {
      "epoch": 2.9025680302167407,
      "grad_norm": 0.12386829406023026,
      "learning_rate": 4.22250482660464e-06,
      "loss": 0.0142,
      "step": 1773620
    },
    {
      "epoch": 2.9026007606553943,
      "grad_norm": 0.16554148495197296,
      "learning_rate": 4.222438934391123e-06,
      "loss": 0.0123,
      "step": 1773640
    },
    {
      "epoch": 2.902633491094048,
      "grad_norm": 0.2163051813840866,
      "learning_rate": 4.222373042177606e-06,
      "loss": 0.0203,
      "step": 1773660
    },
    {
      "epoch": 2.902666221532701,
      "grad_norm": 0.22629687190055847,
      "learning_rate": 4.2223071499640884e-06,
      "loss": 0.0113,
      "step": 1773680
    },
    {
      "epoch": 2.902698951971354,
      "grad_norm": 0.5720859169960022,
      "learning_rate": 4.222241257750572e-06,
      "loss": 0.0145,
      "step": 1773700
    },
    {
      "epoch": 2.902731682410008,
      "grad_norm": 0.2259167581796646,
      "learning_rate": 4.222175365537055e-06,
      "loss": 0.0142,
      "step": 1773720
    },
    {
      "epoch": 2.902764412848661,
      "grad_norm": 0.2605724334716797,
      "learning_rate": 4.2221094733235375e-06,
      "loss": 0.0119,
      "step": 1773740
    },
    {
      "epoch": 2.902797143287314,
      "grad_norm": 0.22753991186618805,
      "learning_rate": 4.222043581110021e-06,
      "loss": 0.014,
      "step": 1773760
    },
    {
      "epoch": 2.9028298737259677,
      "grad_norm": 0.08520485460758209,
      "learning_rate": 4.221977688896504e-06,
      "loss": 0.0091,
      "step": 1773780
    },
    {
      "epoch": 2.9028626041646213,
      "grad_norm": 0.8301662802696228,
      "learning_rate": 4.221911796682987e-06,
      "loss": 0.0148,
      "step": 1773800
    },
    {
      "epoch": 2.9028953346032744,
      "grad_norm": 0.43039730191230774,
      "learning_rate": 4.221845904469469e-06,
      "loss": 0.0165,
      "step": 1773820
    },
    {
      "epoch": 2.9029280650419276,
      "grad_norm": 0.27473482489585876,
      "learning_rate": 4.221780012255952e-06,
      "loss": 0.0129,
      "step": 1773840
    },
    {
      "epoch": 2.902960795480581,
      "grad_norm": 0.0727185383439064,
      "learning_rate": 4.221714120042435e-06,
      "loss": 0.009,
      "step": 1773860
    },
    {
      "epoch": 2.9029935259192343,
      "grad_norm": 0.1470646858215332,
      "learning_rate": 4.2216482278289175e-06,
      "loss": 0.0129,
      "step": 1773880
    },
    {
      "epoch": 2.9030262563578875,
      "grad_norm": 0.3145364224910736,
      "learning_rate": 4.2215823356154e-06,
      "loss": 0.0151,
      "step": 1773900
    },
    {
      "epoch": 2.903058986796541,
      "grad_norm": 0.1412988156080246,
      "learning_rate": 4.221516443401884e-06,
      "loss": 0.0109,
      "step": 1773920
    },
    {
      "epoch": 2.9030917172351947,
      "grad_norm": 0.44266465306282043,
      "learning_rate": 4.221450551188367e-06,
      "loss": 0.0107,
      "step": 1773940
    },
    {
      "epoch": 2.903124447673848,
      "grad_norm": 0.4599147439002991,
      "learning_rate": 4.221384658974849e-06,
      "loss": 0.0117,
      "step": 1773960
    },
    {
      "epoch": 2.903157178112501,
      "grad_norm": 0.21674658358097076,
      "learning_rate": 4.221318766761332e-06,
      "loss": 0.0118,
      "step": 1773980
    },
    {
      "epoch": 2.9031899085511546,
      "grad_norm": 0.1110071912407875,
      "learning_rate": 4.221252874547815e-06,
      "loss": 0.0177,
      "step": 1774000
    },
    {
      "epoch": 2.9032226389898077,
      "grad_norm": 0.7929354310035706,
      "learning_rate": 4.2211869823342976e-06,
      "loss": 0.0178,
      "step": 1774020
    },
    {
      "epoch": 2.903255369428461,
      "grad_norm": 0.24476107954978943,
      "learning_rate": 4.22112109012078e-06,
      "loss": 0.0133,
      "step": 1774040
    },
    {
      "epoch": 2.9032880998671144,
      "grad_norm": 0.44511961936950684,
      "learning_rate": 4.221055197907263e-06,
      "loss": 0.0126,
      "step": 1774060
    },
    {
      "epoch": 2.9033208303057676,
      "grad_norm": 0.18644501268863678,
      "learning_rate": 4.220989305693747e-06,
      "loss": 0.0124,
      "step": 1774080
    },
    {
      "epoch": 2.903353560744421,
      "grad_norm": 0.11511784791946411,
      "learning_rate": 4.220923413480229e-06,
      "loss": 0.0137,
      "step": 1774100
    },
    {
      "epoch": 2.9033862911830743,
      "grad_norm": 0.46788647770881653,
      "learning_rate": 4.220857521266712e-06,
      "loss": 0.0188,
      "step": 1774120
    },
    {
      "epoch": 2.903419021621728,
      "grad_norm": 0.11526336520910263,
      "learning_rate": 4.220791629053196e-06,
      "loss": 0.0165,
      "step": 1774140
    },
    {
      "epoch": 2.903451752060381,
      "grad_norm": 0.18027575314044952,
      "learning_rate": 4.2207257368396784e-06,
      "loss": 0.0154,
      "step": 1774160
    },
    {
      "epoch": 2.9034844824990342,
      "grad_norm": 0.08328227698802948,
      "learning_rate": 4.220659844626161e-06,
      "loss": 0.0146,
      "step": 1774180
    },
    {
      "epoch": 2.903517212937688,
      "grad_norm": 0.29023343324661255,
      "learning_rate": 4.220593952412644e-06,
      "loss": 0.013,
      "step": 1774200
    },
    {
      "epoch": 2.903549943376341,
      "grad_norm": 0.22334814071655273,
      "learning_rate": 4.220528060199127e-06,
      "loss": 0.016,
      "step": 1774220
    },
    {
      "epoch": 2.9035826738149946,
      "grad_norm": 0.6720448136329651,
      "learning_rate": 4.220462167985609e-06,
      "loss": 0.0221,
      "step": 1774240
    },
    {
      "epoch": 2.9036154042536477,
      "grad_norm": 0.10521291941404343,
      "learning_rate": 4.220396275772092e-06,
      "loss": 0.0123,
      "step": 1774260
    },
    {
      "epoch": 2.9036481346923013,
      "grad_norm": 0.18308605253696442,
      "learning_rate": 4.220330383558575e-06,
      "loss": 0.015,
      "step": 1774280
    },
    {
      "epoch": 2.9036808651309545,
      "grad_norm": 0.15979741513729095,
      "learning_rate": 4.220264491345058e-06,
      "loss": 0.0141,
      "step": 1774300
    },
    {
      "epoch": 2.9037135955696076,
      "grad_norm": 0.6926150918006897,
      "learning_rate": 4.220198599131541e-06,
      "loss": 0.0153,
      "step": 1774320
    },
    {
      "epoch": 2.903746326008261,
      "grad_norm": 0.1954597532749176,
      "learning_rate": 4.220132706918024e-06,
      "loss": 0.0097,
      "step": 1774340
    },
    {
      "epoch": 2.9037790564469144,
      "grad_norm": 0.8640662431716919,
      "learning_rate": 4.220066814704507e-06,
      "loss": 0.0102,
      "step": 1774360
    },
    {
      "epoch": 2.903811786885568,
      "grad_norm": 0.5302554368972778,
      "learning_rate": 4.220000922490989e-06,
      "loss": 0.0167,
      "step": 1774380
    },
    {
      "epoch": 2.903844517324221,
      "grad_norm": 0.41064730286598206,
      "learning_rate": 4.219935030277472e-06,
      "loss": 0.0196,
      "step": 1774400
    },
    {
      "epoch": 2.9038772477628747,
      "grad_norm": 0.26381343603134155,
      "learning_rate": 4.219869138063956e-06,
      "loss": 0.0148,
      "step": 1774420
    },
    {
      "epoch": 2.903909978201528,
      "grad_norm": 0.20988301932811737,
      "learning_rate": 4.2198032458504385e-06,
      "loss": 0.0127,
      "step": 1774440
    },
    {
      "epoch": 2.903942708640181,
      "grad_norm": 0.3885999917984009,
      "learning_rate": 4.219737353636921e-06,
      "loss": 0.0143,
      "step": 1774460
    },
    {
      "epoch": 2.9039754390788346,
      "grad_norm": 0.3640223741531372,
      "learning_rate": 4.219671461423404e-06,
      "loss": 0.0111,
      "step": 1774480
    },
    {
      "epoch": 2.9040081695174877,
      "grad_norm": 0.08107369393110275,
      "learning_rate": 4.219605569209887e-06,
      "loss": 0.012,
      "step": 1774500
    },
    {
      "epoch": 2.9040408999561413,
      "grad_norm": 0.13873358070850372,
      "learning_rate": 4.2195396769963694e-06,
      "loss": 0.0154,
      "step": 1774520
    },
    {
      "epoch": 2.9040736303947945,
      "grad_norm": 0.9972262382507324,
      "learning_rate": 4.219473784782853e-06,
      "loss": 0.019,
      "step": 1774540
    },
    {
      "epoch": 2.904106360833448,
      "grad_norm": 0.36237621307373047,
      "learning_rate": 4.219407892569336e-06,
      "loss": 0.0127,
      "step": 1774560
    },
    {
      "epoch": 2.9041390912721012,
      "grad_norm": 0.19050787389278412,
      "learning_rate": 4.2193420003558185e-06,
      "loss": 0.0153,
      "step": 1774580
    },
    {
      "epoch": 2.9041718217107544,
      "grad_norm": 0.1898268461227417,
      "learning_rate": 4.219276108142301e-06,
      "loss": 0.0098,
      "step": 1774600
    },
    {
      "epoch": 2.904204552149408,
      "grad_norm": 0.20483005046844482,
      "learning_rate": 4.219210215928784e-06,
      "loss": 0.0158,
      "step": 1774620
    },
    {
      "epoch": 2.904237282588061,
      "grad_norm": 0.23230740427970886,
      "learning_rate": 4.219144323715267e-06,
      "loss": 0.0152,
      "step": 1774640
    },
    {
      "epoch": 2.9042700130267147,
      "grad_norm": 0.9097381830215454,
      "learning_rate": 4.2190784315017495e-06,
      "loss": 0.0157,
      "step": 1774660
    },
    {
      "epoch": 2.904302743465368,
      "grad_norm": 0.40435922145843506,
      "learning_rate": 4.219012539288232e-06,
      "loss": 0.0108,
      "step": 1774680
    },
    {
      "epoch": 2.9043354739040215,
      "grad_norm": 0.1917242854833603,
      "learning_rate": 4.218946647074715e-06,
      "loss": 0.0134,
      "step": 1774700
    },
    {
      "epoch": 2.9043682043426746,
      "grad_norm": 0.44516369700431824,
      "learning_rate": 4.2188807548611985e-06,
      "loss": 0.0148,
      "step": 1774720
    },
    {
      "epoch": 2.9044009347813278,
      "grad_norm": 0.6625407934188843,
      "learning_rate": 4.218814862647681e-06,
      "loss": 0.0175,
      "step": 1774740
    },
    {
      "epoch": 2.9044336652199814,
      "grad_norm": 0.20334157347679138,
      "learning_rate": 4.218748970434164e-06,
      "loss": 0.0125,
      "step": 1774760
    },
    {
      "epoch": 2.9044663956586345,
      "grad_norm": 0.5451380014419556,
      "learning_rate": 4.218683078220648e-06,
      "loss": 0.0178,
      "step": 1774780
    },
    {
      "epoch": 2.904499126097288,
      "grad_norm": 0.5696233510971069,
      "learning_rate": 4.21861718600713e-06,
      "loss": 0.0102,
      "step": 1774800
    },
    {
      "epoch": 2.9045318565359413,
      "grad_norm": 0.7482143044471741,
      "learning_rate": 4.218551293793613e-06,
      "loss": 0.0116,
      "step": 1774820
    },
    {
      "epoch": 2.904564586974595,
      "grad_norm": 0.2952948212623596,
      "learning_rate": 4.218485401580096e-06,
      "loss": 0.019,
      "step": 1774840
    },
    {
      "epoch": 2.904597317413248,
      "grad_norm": 0.10993020981550217,
      "learning_rate": 4.2184195093665786e-06,
      "loss": 0.0164,
      "step": 1774860
    },
    {
      "epoch": 2.904630047851901,
      "grad_norm": 0.19896003603935242,
      "learning_rate": 4.218353617153061e-06,
      "loss": 0.0148,
      "step": 1774880
    },
    {
      "epoch": 2.9046627782905547,
      "grad_norm": 0.10306660085916519,
      "learning_rate": 4.218287724939544e-06,
      "loss": 0.0161,
      "step": 1774900
    },
    {
      "epoch": 2.904695508729208,
      "grad_norm": 0.284629762172699,
      "learning_rate": 4.218221832726027e-06,
      "loss": 0.0192,
      "step": 1774920
    },
    {
      "epoch": 2.9047282391678615,
      "grad_norm": 0.10665830969810486,
      "learning_rate": 4.21815594051251e-06,
      "loss": 0.0068,
      "step": 1774940
    },
    {
      "epoch": 2.9047609696065146,
      "grad_norm": 0.28178176283836365,
      "learning_rate": 4.218090048298993e-06,
      "loss": 0.0174,
      "step": 1774960
    },
    {
      "epoch": 2.9047937000451682,
      "grad_norm": 0.4976242184638977,
      "learning_rate": 4.218024156085476e-06,
      "loss": 0.0094,
      "step": 1774980
    },
    {
      "epoch": 2.9048264304838214,
      "grad_norm": 0.1469365507364273,
      "learning_rate": 4.217958263871959e-06,
      "loss": 0.0121,
      "step": 1775000
    },
    {
      "epoch": 2.9048591609224745,
      "grad_norm": 0.23489733040332794,
      "learning_rate": 4.217892371658441e-06,
      "loss": 0.0209,
      "step": 1775020
    },
    {
      "epoch": 2.904891891361128,
      "grad_norm": 0.25745484232902527,
      "learning_rate": 4.217826479444924e-06,
      "loss": 0.0172,
      "step": 1775040
    },
    {
      "epoch": 2.9049246217997813,
      "grad_norm": 0.3834177851676941,
      "learning_rate": 4.217760587231407e-06,
      "loss": 0.0135,
      "step": 1775060
    },
    {
      "epoch": 2.9049573522384344,
      "grad_norm": 0.15155652165412903,
      "learning_rate": 4.2176946950178895e-06,
      "loss": 0.0107,
      "step": 1775080
    },
    {
      "epoch": 2.904990082677088,
      "grad_norm": 0.309903621673584,
      "learning_rate": 4.217628802804372e-06,
      "loss": 0.0158,
      "step": 1775100
    },
    {
      "epoch": 2.9050228131157416,
      "grad_norm": 0.38188329339027405,
      "learning_rate": 4.217562910590856e-06,
      "loss": 0.0183,
      "step": 1775120
    },
    {
      "epoch": 2.9050555435543948,
      "grad_norm": 0.430814266204834,
      "learning_rate": 4.217497018377339e-06,
      "loss": 0.0098,
      "step": 1775140
    },
    {
      "epoch": 2.905088273993048,
      "grad_norm": 0.13577505946159363,
      "learning_rate": 4.217431126163821e-06,
      "loss": 0.0131,
      "step": 1775160
    },
    {
      "epoch": 2.9051210044317015,
      "grad_norm": 0.21432790160179138,
      "learning_rate": 4.217365233950305e-06,
      "loss": 0.0099,
      "step": 1775180
    },
    {
      "epoch": 2.9051537348703547,
      "grad_norm": 0.6235021352767944,
      "learning_rate": 4.217299341736788e-06,
      "loss": 0.0148,
      "step": 1775200
    },
    {
      "epoch": 2.905186465309008,
      "grad_norm": 0.7336583137512207,
      "learning_rate": 4.21723344952327e-06,
      "loss": 0.0222,
      "step": 1775220
    },
    {
      "epoch": 2.9052191957476614,
      "grad_norm": 0.321111261844635,
      "learning_rate": 4.217167557309753e-06,
      "loss": 0.0141,
      "step": 1775240
    },
    {
      "epoch": 2.905251926186315,
      "grad_norm": 0.4001268148422241,
      "learning_rate": 4.217101665096236e-06,
      "loss": 0.0204,
      "step": 1775260
    },
    {
      "epoch": 2.905284656624968,
      "grad_norm": 0.8568488359451294,
      "learning_rate": 4.217035772882719e-06,
      "loss": 0.0202,
      "step": 1775280
    },
    {
      "epoch": 2.9053173870636213,
      "grad_norm": 0.7365453839302063,
      "learning_rate": 4.216969880669201e-06,
      "loss": 0.0124,
      "step": 1775300
    },
    {
      "epoch": 2.905350117502275,
      "grad_norm": 0.5550649166107178,
      "learning_rate": 4.216903988455684e-06,
      "loss": 0.0118,
      "step": 1775320
    },
    {
      "epoch": 2.905382847940928,
      "grad_norm": 0.05121210590004921,
      "learning_rate": 4.216838096242168e-06,
      "loss": 0.0099,
      "step": 1775340
    },
    {
      "epoch": 2.905415578379581,
      "grad_norm": 0.36960458755493164,
      "learning_rate": 4.2167722040286504e-06,
      "loss": 0.0105,
      "step": 1775360
    },
    {
      "epoch": 2.905448308818235,
      "grad_norm": 0.7597998976707458,
      "learning_rate": 4.216706311815133e-06,
      "loss": 0.0153,
      "step": 1775380
    },
    {
      "epoch": 2.9054810392568884,
      "grad_norm": 0.03780818358063698,
      "learning_rate": 4.216640419601616e-06,
      "loss": 0.0164,
      "step": 1775400
    },
    {
      "epoch": 2.9055137696955415,
      "grad_norm": 0.07795154303312302,
      "learning_rate": 4.216574527388099e-06,
      "loss": 0.011,
      "step": 1775420
    },
    {
      "epoch": 2.9055465001341947,
      "grad_norm": 0.6063669919967651,
      "learning_rate": 4.216508635174581e-06,
      "loss": 0.0137,
      "step": 1775440
    },
    {
      "epoch": 2.9055792305728483,
      "grad_norm": 0.10441005975008011,
      "learning_rate": 4.216442742961064e-06,
      "loss": 0.0115,
      "step": 1775460
    },
    {
      "epoch": 2.9056119610115014,
      "grad_norm": 0.22515957057476044,
      "learning_rate": 4.216376850747548e-06,
      "loss": 0.0166,
      "step": 1775480
    },
    {
      "epoch": 2.9056446914501546,
      "grad_norm": 0.37164396047592163,
      "learning_rate": 4.2163109585340305e-06,
      "loss": 0.0103,
      "step": 1775500
    },
    {
      "epoch": 2.905677421888808,
      "grad_norm": 0.16221454739570618,
      "learning_rate": 4.216245066320513e-06,
      "loss": 0.0145,
      "step": 1775520
    },
    {
      "epoch": 2.9057101523274613,
      "grad_norm": 0.07033032923936844,
      "learning_rate": 4.216179174106996e-06,
      "loss": 0.01,
      "step": 1775540
    },
    {
      "epoch": 2.905742882766115,
      "grad_norm": 0.3099265694618225,
      "learning_rate": 4.2161132818934795e-06,
      "loss": 0.0162,
      "step": 1775560
    },
    {
      "epoch": 2.905775613204768,
      "grad_norm": 0.5526067614555359,
      "learning_rate": 4.216047389679962e-06,
      "loss": 0.0148,
      "step": 1775580
    },
    {
      "epoch": 2.9058083436434217,
      "grad_norm": 0.12165788561105728,
      "learning_rate": 4.215981497466445e-06,
      "loss": 0.0156,
      "step": 1775600
    },
    {
      "epoch": 2.905841074082075,
      "grad_norm": 0.4799152910709381,
      "learning_rate": 4.215915605252928e-06,
      "loss": 0.0132,
      "step": 1775620
    },
    {
      "epoch": 2.905873804520728,
      "grad_norm": 0.21997134387493134,
      "learning_rate": 4.2158497130394105e-06,
      "loss": 0.0136,
      "step": 1775640
    },
    {
      "epoch": 2.9059065349593816,
      "grad_norm": 0.8704251646995544,
      "learning_rate": 4.215783820825893e-06,
      "loss": 0.0186,
      "step": 1775660
    },
    {
      "epoch": 2.9059392653980347,
      "grad_norm": 0.42071065306663513,
      "learning_rate": 4.215717928612376e-06,
      "loss": 0.0109,
      "step": 1775680
    },
    {
      "epoch": 2.9059719958366883,
      "grad_norm": 0.7251749038696289,
      "learning_rate": 4.215652036398859e-06,
      "loss": 0.0154,
      "step": 1775700
    },
    {
      "epoch": 2.9060047262753415,
      "grad_norm": 0.21296562254428864,
      "learning_rate": 4.2155861441853415e-06,
      "loss": 0.0086,
      "step": 1775720
    },
    {
      "epoch": 2.906037456713995,
      "grad_norm": 0.1883711963891983,
      "learning_rate": 4.215520251971825e-06,
      "loss": 0.0125,
      "step": 1775740
    },
    {
      "epoch": 2.906070187152648,
      "grad_norm": 0.8050827980041504,
      "learning_rate": 4.215454359758308e-06,
      "loss": 0.0171,
      "step": 1775760
    },
    {
      "epoch": 2.9061029175913013,
      "grad_norm": 0.27613434195518494,
      "learning_rate": 4.2153884675447905e-06,
      "loss": 0.0097,
      "step": 1775780
    },
    {
      "epoch": 2.906135648029955,
      "grad_norm": 1.4471964836120605,
      "learning_rate": 4.215322575331273e-06,
      "loss": 0.0199,
      "step": 1775800
    },
    {
      "epoch": 2.906168378468608,
      "grad_norm": 0.4314524531364441,
      "learning_rate": 4.215256683117756e-06,
      "loss": 0.015,
      "step": 1775820
    },
    {
      "epoch": 2.9062011089072617,
      "grad_norm": 0.24309924244880676,
      "learning_rate": 4.21519079090424e-06,
      "loss": 0.0143,
      "step": 1775840
    },
    {
      "epoch": 2.906233839345915,
      "grad_norm": 0.23926852643489838,
      "learning_rate": 4.215124898690722e-06,
      "loss": 0.0129,
      "step": 1775860
    },
    {
      "epoch": 2.9062665697845684,
      "grad_norm": 0.2620919942855835,
      "learning_rate": 4.215059006477205e-06,
      "loss": 0.0145,
      "step": 1775880
    },
    {
      "epoch": 2.9062993002232216,
      "grad_norm": 0.2586037218570709,
      "learning_rate": 4.214993114263688e-06,
      "loss": 0.0142,
      "step": 1775900
    },
    {
      "epoch": 2.9063320306618747,
      "grad_norm": 0.3439045548439026,
      "learning_rate": 4.2149272220501705e-06,
      "loss": 0.0158,
      "step": 1775920
    },
    {
      "epoch": 2.9063647611005283,
      "grad_norm": 0.2896091043949127,
      "learning_rate": 4.214861329836653e-06,
      "loss": 0.0157,
      "step": 1775940
    },
    {
      "epoch": 2.9063974915391815,
      "grad_norm": 0.7265344262123108,
      "learning_rate": 4.214795437623137e-06,
      "loss": 0.0108,
      "step": 1775960
    },
    {
      "epoch": 2.906430221977835,
      "grad_norm": 0.16208094358444214,
      "learning_rate": 4.21472954540962e-06,
      "loss": 0.0148,
      "step": 1775980
    },
    {
      "epoch": 2.906462952416488,
      "grad_norm": 0.3438364565372467,
      "learning_rate": 4.214663653196102e-06,
      "loss": 0.0136,
      "step": 1776000
    },
    {
      "epoch": 2.906495682855142,
      "grad_norm": 0.18470321595668793,
      "learning_rate": 4.214597760982585e-06,
      "loss": 0.0121,
      "step": 1776020
    },
    {
      "epoch": 2.906528413293795,
      "grad_norm": 0.31011539697647095,
      "learning_rate": 4.214531868769068e-06,
      "loss": 0.0102,
      "step": 1776040
    },
    {
      "epoch": 2.906561143732448,
      "grad_norm": 0.3340495228767395,
      "learning_rate": 4.2144659765555506e-06,
      "loss": 0.0098,
      "step": 1776060
    },
    {
      "epoch": 2.9065938741711017,
      "grad_norm": 0.18212255835533142,
      "learning_rate": 4.214400084342033e-06,
      "loss": 0.0107,
      "step": 1776080
    },
    {
      "epoch": 2.906626604609755,
      "grad_norm": 0.3771403431892395,
      "learning_rate": 4.214334192128516e-06,
      "loss": 0.0137,
      "step": 1776100
    },
    {
      "epoch": 2.9066593350484085,
      "grad_norm": 0.4868481457233429,
      "learning_rate": 4.214268299914999e-06,
      "loss": 0.0146,
      "step": 1776120
    },
    {
      "epoch": 2.9066920654870616,
      "grad_norm": 0.2451273649930954,
      "learning_rate": 4.214202407701482e-06,
      "loss": 0.0092,
      "step": 1776140
    },
    {
      "epoch": 2.906724795925715,
      "grad_norm": 0.32998794317245483,
      "learning_rate": 4.214136515487965e-06,
      "loss": 0.0196,
      "step": 1776160
    },
    {
      "epoch": 2.9067575263643683,
      "grad_norm": 0.42966002225875854,
      "learning_rate": 4.214070623274448e-06,
      "loss": 0.0139,
      "step": 1776180
    },
    {
      "epoch": 2.9067902568030215,
      "grad_norm": 0.552630603313446,
      "learning_rate": 4.2140047310609314e-06,
      "loss": 0.0121,
      "step": 1776200
    },
    {
      "epoch": 2.906822987241675,
      "grad_norm": 0.40072834491729736,
      "learning_rate": 4.213938838847414e-06,
      "loss": 0.0082,
      "step": 1776220
    },
    {
      "epoch": 2.9068557176803282,
      "grad_norm": 0.13406513631343842,
      "learning_rate": 4.213872946633897e-06,
      "loss": 0.0138,
      "step": 1776240
    },
    {
      "epoch": 2.906888448118982,
      "grad_norm": 0.1762380748987198,
      "learning_rate": 4.21380705442038e-06,
      "loss": 0.0136,
      "step": 1776260
    },
    {
      "epoch": 2.906921178557635,
      "grad_norm": 0.29926732182502747,
      "learning_rate": 4.213741162206862e-06,
      "loss": 0.0201,
      "step": 1776280
    },
    {
      "epoch": 2.9069539089962886,
      "grad_norm": 0.07241034507751465,
      "learning_rate": 4.213675269993345e-06,
      "loss": 0.0148,
      "step": 1776300
    },
    {
      "epoch": 2.9069866394349417,
      "grad_norm": 0.507294237613678,
      "learning_rate": 4.213609377779828e-06,
      "loss": 0.0103,
      "step": 1776320
    },
    {
      "epoch": 2.907019369873595,
      "grad_norm": 0.6188467144966125,
      "learning_rate": 4.213543485566311e-06,
      "loss": 0.0093,
      "step": 1776340
    },
    {
      "epoch": 2.9070521003122485,
      "grad_norm": 0.25616252422332764,
      "learning_rate": 4.213477593352794e-06,
      "loss": 0.0199,
      "step": 1776360
    },
    {
      "epoch": 2.9070848307509016,
      "grad_norm": 0.6928125619888306,
      "learning_rate": 4.213411701139277e-06,
      "loss": 0.0159,
      "step": 1776380
    },
    {
      "epoch": 2.907117561189555,
      "grad_norm": 0.18018753826618195,
      "learning_rate": 4.21334580892576e-06,
      "loss": 0.015,
      "step": 1776400
    },
    {
      "epoch": 2.9071502916282084,
      "grad_norm": 0.30960988998413086,
      "learning_rate": 4.2132799167122424e-06,
      "loss": 0.011,
      "step": 1776420
    },
    {
      "epoch": 2.907183022066862,
      "grad_norm": 0.7262066006660461,
      "learning_rate": 4.213214024498725e-06,
      "loss": 0.0184,
      "step": 1776440
    },
    {
      "epoch": 2.907215752505515,
      "grad_norm": 0.16130481660366058,
      "learning_rate": 4.213148132285208e-06,
      "loss": 0.0157,
      "step": 1776460
    },
    {
      "epoch": 2.9072484829441683,
      "grad_norm": 0.898281455039978,
      "learning_rate": 4.213082240071691e-06,
      "loss": 0.0188,
      "step": 1776480
    },
    {
      "epoch": 2.907281213382822,
      "grad_norm": 0.5211381912231445,
      "learning_rate": 4.213016347858173e-06,
      "loss": 0.0122,
      "step": 1776500
    },
    {
      "epoch": 2.907313943821475,
      "grad_norm": 0.07933329790830612,
      "learning_rate": 4.212950455644656e-06,
      "loss": 0.0167,
      "step": 1776520
    },
    {
      "epoch": 2.907346674260128,
      "grad_norm": 0.4853573441505432,
      "learning_rate": 4.21288456343114e-06,
      "loss": 0.0134,
      "step": 1776540
    },
    {
      "epoch": 2.9073794046987818,
      "grad_norm": 0.2534591853618622,
      "learning_rate": 4.2128186712176225e-06,
      "loss": 0.0142,
      "step": 1776560
    },
    {
      "epoch": 2.9074121351374353,
      "grad_norm": 0.27696913480758667,
      "learning_rate": 4.212752779004106e-06,
      "loss": 0.0115,
      "step": 1776580
    },
    {
      "epoch": 2.9074448655760885,
      "grad_norm": 0.2655508816242218,
      "learning_rate": 4.212686886790589e-06,
      "loss": 0.0095,
      "step": 1776600
    },
    {
      "epoch": 2.9074775960147416,
      "grad_norm": 0.7952902317047119,
      "learning_rate": 4.2126209945770715e-06,
      "loss": 0.0111,
      "step": 1776620
    },
    {
      "epoch": 2.9075103264533952,
      "grad_norm": 0.6301617622375488,
      "learning_rate": 4.212555102363554e-06,
      "loss": 0.0083,
      "step": 1776640
    },
    {
      "epoch": 2.9075430568920484,
      "grad_norm": 0.35179978609085083,
      "learning_rate": 4.212489210150037e-06,
      "loss": 0.0111,
      "step": 1776660
    },
    {
      "epoch": 2.9075757873307015,
      "grad_norm": 0.39765915274620056,
      "learning_rate": 4.21242331793652e-06,
      "loss": 0.0109,
      "step": 1776680
    },
    {
      "epoch": 2.907608517769355,
      "grad_norm": 0.11670655757188797,
      "learning_rate": 4.2123574257230025e-06,
      "loss": 0.0139,
      "step": 1776700
    },
    {
      "epoch": 2.9076412482080087,
      "grad_norm": 0.366048663854599,
      "learning_rate": 4.212291533509485e-06,
      "loss": 0.0158,
      "step": 1776720
    },
    {
      "epoch": 2.907673978646662,
      "grad_norm": 0.21267656981945038,
      "learning_rate": 4.212225641295968e-06,
      "loss": 0.0107,
      "step": 1776740
    },
    {
      "epoch": 2.907706709085315,
      "grad_norm": 0.5282946228981018,
      "learning_rate": 4.2121597490824515e-06,
      "loss": 0.0144,
      "step": 1776760
    },
    {
      "epoch": 2.9077394395239686,
      "grad_norm": 0.173064723610878,
      "learning_rate": 4.212093856868934e-06,
      "loss": 0.0142,
      "step": 1776780
    },
    {
      "epoch": 2.9077721699626218,
      "grad_norm": 0.4787139892578125,
      "learning_rate": 4.212027964655417e-06,
      "loss": 0.0212,
      "step": 1776800
    },
    {
      "epoch": 2.907804900401275,
      "grad_norm": 0.23819082975387573,
      "learning_rate": 4.2119620724419e-06,
      "loss": 0.0107,
      "step": 1776820
    },
    {
      "epoch": 2.9078376308399285,
      "grad_norm": 0.7334052920341492,
      "learning_rate": 4.2118961802283825e-06,
      "loss": 0.0121,
      "step": 1776840
    },
    {
      "epoch": 2.907870361278582,
      "grad_norm": 0.26265114545822144,
      "learning_rate": 4.211830288014865e-06,
      "loss": 0.0126,
      "step": 1776860
    },
    {
      "epoch": 2.9079030917172353,
      "grad_norm": 0.5773788094520569,
      "learning_rate": 4.211764395801349e-06,
      "loss": 0.022,
      "step": 1776880
    },
    {
      "epoch": 2.9079358221558884,
      "grad_norm": 0.22978082299232483,
      "learning_rate": 4.2116985035878316e-06,
      "loss": 0.0138,
      "step": 1776900
    },
    {
      "epoch": 2.907968552594542,
      "grad_norm": 0.46924442052841187,
      "learning_rate": 4.211632611374314e-06,
      "loss": 0.0101,
      "step": 1776920
    },
    {
      "epoch": 2.908001283033195,
      "grad_norm": 0.8521338701248169,
      "learning_rate": 4.211566719160797e-06,
      "loss": 0.0177,
      "step": 1776940
    },
    {
      "epoch": 2.9080340134718483,
      "grad_norm": 0.39762210845947266,
      "learning_rate": 4.21150082694728e-06,
      "loss": 0.0104,
      "step": 1776960
    },
    {
      "epoch": 2.908066743910502,
      "grad_norm": 0.22204594314098358,
      "learning_rate": 4.211434934733763e-06,
      "loss": 0.0157,
      "step": 1776980
    },
    {
      "epoch": 2.9080994743491555,
      "grad_norm": 0.2393883615732193,
      "learning_rate": 4.211369042520246e-06,
      "loss": 0.015,
      "step": 1777000
    },
    {
      "epoch": 2.9081322047878087,
      "grad_norm": 0.3498496413230896,
      "learning_rate": 4.211303150306729e-06,
      "loss": 0.0134,
      "step": 1777020
    },
    {
      "epoch": 2.908164935226462,
      "grad_norm": 1.0015532970428467,
      "learning_rate": 4.211237258093212e-06,
      "loss": 0.0097,
      "step": 1777040
    },
    {
      "epoch": 2.9081976656651154,
      "grad_norm": 0.09103470295667648,
      "learning_rate": 4.211171365879694e-06,
      "loss": 0.0103,
      "step": 1777060
    },
    {
      "epoch": 2.9082303961037685,
      "grad_norm": 0.4523817300796509,
      "learning_rate": 4.211105473666177e-06,
      "loss": 0.0187,
      "step": 1777080
    },
    {
      "epoch": 2.9082631265424217,
      "grad_norm": 0.27551722526550293,
      "learning_rate": 4.21103958145266e-06,
      "loss": 0.0259,
      "step": 1777100
    },
    {
      "epoch": 2.9082958569810753,
      "grad_norm": 0.34198135137557983,
      "learning_rate": 4.2109736892391426e-06,
      "loss": 0.007,
      "step": 1777120
    },
    {
      "epoch": 2.9083285874197284,
      "grad_norm": 0.21789118647575378,
      "learning_rate": 4.210907797025625e-06,
      "loss": 0.0141,
      "step": 1777140
    },
    {
      "epoch": 2.908361317858382,
      "grad_norm": 0.7437898516654968,
      "learning_rate": 4.210841904812109e-06,
      "loss": 0.0151,
      "step": 1777160
    },
    {
      "epoch": 2.908394048297035,
      "grad_norm": 0.43497878313064575,
      "learning_rate": 4.210776012598592e-06,
      "loss": 0.0101,
      "step": 1777180
    },
    {
      "epoch": 2.908426778735689,
      "grad_norm": 0.13140451908111572,
      "learning_rate": 4.210710120385074e-06,
      "loss": 0.0103,
      "step": 1777200
    },
    {
      "epoch": 2.908459509174342,
      "grad_norm": 0.17494578659534454,
      "learning_rate": 4.210644228171557e-06,
      "loss": 0.0196,
      "step": 1777220
    },
    {
      "epoch": 2.908492239612995,
      "grad_norm": 0.18862341344356537,
      "learning_rate": 4.210578335958041e-06,
      "loss": 0.0168,
      "step": 1777240
    },
    {
      "epoch": 2.9085249700516487,
      "grad_norm": 0.11108121275901794,
      "learning_rate": 4.2105124437445234e-06,
      "loss": 0.0093,
      "step": 1777260
    },
    {
      "epoch": 2.908557700490302,
      "grad_norm": 1.3170273303985596,
      "learning_rate": 4.210446551531006e-06,
      "loss": 0.0192,
      "step": 1777280
    },
    {
      "epoch": 2.9085904309289554,
      "grad_norm": 0.38626304268836975,
      "learning_rate": 4.210380659317489e-06,
      "loss": 0.0121,
      "step": 1777300
    },
    {
      "epoch": 2.9086231613676086,
      "grad_norm": 0.31198248267173767,
      "learning_rate": 4.210314767103972e-06,
      "loss": 0.0159,
      "step": 1777320
    },
    {
      "epoch": 2.908655891806262,
      "grad_norm": 0.6374187469482422,
      "learning_rate": 4.210248874890454e-06,
      "loss": 0.0151,
      "step": 1777340
    },
    {
      "epoch": 2.9086886222449153,
      "grad_norm": 0.18397095799446106,
      "learning_rate": 4.210182982676937e-06,
      "loss": 0.0157,
      "step": 1777360
    },
    {
      "epoch": 2.9087213526835685,
      "grad_norm": 1.007546067237854,
      "learning_rate": 4.210117090463421e-06,
      "loss": 0.0113,
      "step": 1777380
    },
    {
      "epoch": 2.908754083122222,
      "grad_norm": 0.2597202956676483,
      "learning_rate": 4.2100511982499035e-06,
      "loss": 0.0143,
      "step": 1777400
    },
    {
      "epoch": 2.908786813560875,
      "grad_norm": 0.33724692463874817,
      "learning_rate": 4.209985306036386e-06,
      "loss": 0.0109,
      "step": 1777420
    },
    {
      "epoch": 2.908819543999529,
      "grad_norm": 0.23983635008335114,
      "learning_rate": 4.209919413822869e-06,
      "loss": 0.011,
      "step": 1777440
    },
    {
      "epoch": 2.908852274438182,
      "grad_norm": 0.11148866266012192,
      "learning_rate": 4.209853521609352e-06,
      "loss": 0.0122,
      "step": 1777460
    },
    {
      "epoch": 2.9088850048768355,
      "grad_norm": 0.4622950255870819,
      "learning_rate": 4.209787629395834e-06,
      "loss": 0.0127,
      "step": 1777480
    },
    {
      "epoch": 2.9089177353154887,
      "grad_norm": 0.42785143852233887,
      "learning_rate": 4.209721737182317e-06,
      "loss": 0.0186,
      "step": 1777500
    },
    {
      "epoch": 2.908950465754142,
      "grad_norm": 0.38615867495536804,
      "learning_rate": 4.2096558449688e-06,
      "loss": 0.009,
      "step": 1777520
    },
    {
      "epoch": 2.9089831961927954,
      "grad_norm": 0.31455352902412415,
      "learning_rate": 4.209589952755283e-06,
      "loss": 0.0124,
      "step": 1777540
    },
    {
      "epoch": 2.9090159266314486,
      "grad_norm": 1.4389402866363525,
      "learning_rate": 4.209524060541766e-06,
      "loss": 0.0115,
      "step": 1777560
    },
    {
      "epoch": 2.909048657070102,
      "grad_norm": 1.2567354440689087,
      "learning_rate": 4.209458168328249e-06,
      "loss": 0.0152,
      "step": 1777580
    },
    {
      "epoch": 2.9090813875087553,
      "grad_norm": 0.14061236381530762,
      "learning_rate": 4.209392276114732e-06,
      "loss": 0.0133,
      "step": 1777600
    },
    {
      "epoch": 2.909114117947409,
      "grad_norm": 0.6026883125305176,
      "learning_rate": 4.209326383901215e-06,
      "loss": 0.013,
      "step": 1777620
    },
    {
      "epoch": 2.909146848386062,
      "grad_norm": 0.3133995831012726,
      "learning_rate": 4.209260491687698e-06,
      "loss": 0.0158,
      "step": 1777640
    },
    {
      "epoch": 2.9091795788247152,
      "grad_norm": 0.1298641413450241,
      "learning_rate": 4.209194599474181e-06,
      "loss": 0.0152,
      "step": 1777660
    },
    {
      "epoch": 2.909212309263369,
      "grad_norm": 0.40033432841300964,
      "learning_rate": 4.2091287072606635e-06,
      "loss": 0.0146,
      "step": 1777680
    },
    {
      "epoch": 2.909245039702022,
      "grad_norm": 0.3394564092159271,
      "learning_rate": 4.209062815047146e-06,
      "loss": 0.0149,
      "step": 1777700
    },
    {
      "epoch": 2.9092777701406756,
      "grad_norm": 0.3909298777580261,
      "learning_rate": 4.208996922833629e-06,
      "loss": 0.0122,
      "step": 1777720
    },
    {
      "epoch": 2.9093105005793287,
      "grad_norm": 0.18370190262794495,
      "learning_rate": 4.208931030620112e-06,
      "loss": 0.0105,
      "step": 1777740
    },
    {
      "epoch": 2.9093432310179823,
      "grad_norm": 0.11793933808803558,
      "learning_rate": 4.2088651384065945e-06,
      "loss": 0.013,
      "step": 1777760
    },
    {
      "epoch": 2.9093759614566355,
      "grad_norm": 0.2428913563489914,
      "learning_rate": 4.208799246193078e-06,
      "loss": 0.0098,
      "step": 1777780
    },
    {
      "epoch": 2.9094086918952886,
      "grad_norm": 0.29612088203430176,
      "learning_rate": 4.208733353979561e-06,
      "loss": 0.0129,
      "step": 1777800
    },
    {
      "epoch": 2.909441422333942,
      "grad_norm": 0.2655535042285919,
      "learning_rate": 4.2086674617660435e-06,
      "loss": 0.0134,
      "step": 1777820
    },
    {
      "epoch": 2.9094741527725954,
      "grad_norm": 0.11147457361221313,
      "learning_rate": 4.208601569552526e-06,
      "loss": 0.0123,
      "step": 1777840
    },
    {
      "epoch": 2.909506883211249,
      "grad_norm": 0.08118066936731339,
      "learning_rate": 4.208535677339009e-06,
      "loss": 0.0097,
      "step": 1777860
    },
    {
      "epoch": 2.909539613649902,
      "grad_norm": 0.4605623185634613,
      "learning_rate": 4.208469785125492e-06,
      "loss": 0.0117,
      "step": 1777880
    },
    {
      "epoch": 2.9095723440885557,
      "grad_norm": 0.5304422378540039,
      "learning_rate": 4.2084038929119745e-06,
      "loss": 0.0122,
      "step": 1777900
    },
    {
      "epoch": 2.909605074527209,
      "grad_norm": 0.30850881338119507,
      "learning_rate": 4.208338000698457e-06,
      "loss": 0.0201,
      "step": 1777920
    },
    {
      "epoch": 2.909637804965862,
      "grad_norm": 0.13362374901771545,
      "learning_rate": 4.208272108484941e-06,
      "loss": 0.0146,
      "step": 1777940
    },
    {
      "epoch": 2.9096705354045156,
      "grad_norm": 0.38438868522644043,
      "learning_rate": 4.2082062162714236e-06,
      "loss": 0.01,
      "step": 1777960
    },
    {
      "epoch": 2.9097032658431687,
      "grad_norm": 0.12669281661510468,
      "learning_rate": 4.208140324057906e-06,
      "loss": 0.0118,
      "step": 1777980
    },
    {
      "epoch": 2.9097359962818223,
      "grad_norm": 0.34817585349082947,
      "learning_rate": 4.20807443184439e-06,
      "loss": 0.0146,
      "step": 1778000
    },
    {
      "epoch": 2.9097687267204755,
      "grad_norm": 0.49087923765182495,
      "learning_rate": 4.208008539630873e-06,
      "loss": 0.0101,
      "step": 1778020
    },
    {
      "epoch": 2.909801457159129,
      "grad_norm": 0.49221330881118774,
      "learning_rate": 4.207942647417355e-06,
      "loss": 0.0159,
      "step": 1778040
    },
    {
      "epoch": 2.9098341875977822,
      "grad_norm": 0.5970100164413452,
      "learning_rate": 4.207876755203838e-06,
      "loss": 0.014,
      "step": 1778060
    },
    {
      "epoch": 2.9098669180364354,
      "grad_norm": 0.2360910326242447,
      "learning_rate": 4.207810862990321e-06,
      "loss": 0.0116,
      "step": 1778080
    },
    {
      "epoch": 2.909899648475089,
      "grad_norm": 0.4566745460033417,
      "learning_rate": 4.207744970776804e-06,
      "loss": 0.0198,
      "step": 1778100
    },
    {
      "epoch": 2.909932378913742,
      "grad_norm": 0.3508566617965698,
      "learning_rate": 4.207679078563286e-06,
      "loss": 0.0093,
      "step": 1778120
    },
    {
      "epoch": 2.9099651093523953,
      "grad_norm": 1.1789796352386475,
      "learning_rate": 4.207613186349769e-06,
      "loss": 0.0132,
      "step": 1778140
    },
    {
      "epoch": 2.909997839791049,
      "grad_norm": 1.0419237613677979,
      "learning_rate": 4.207547294136252e-06,
      "loss": 0.0135,
      "step": 1778160
    },
    {
      "epoch": 2.9100305702297025,
      "grad_norm": 0.37867388129234314,
      "learning_rate": 4.207481401922735e-06,
      "loss": 0.0179,
      "step": 1778180
    },
    {
      "epoch": 2.9100633006683556,
      "grad_norm": 0.20748619735240936,
      "learning_rate": 4.207415509709218e-06,
      "loss": 0.0139,
      "step": 1778200
    },
    {
      "epoch": 2.9100960311070088,
      "grad_norm": 0.15135912597179413,
      "learning_rate": 4.207349617495701e-06,
      "loss": 0.0106,
      "step": 1778220
    },
    {
      "epoch": 2.9101287615456624,
      "grad_norm": 0.4043232798576355,
      "learning_rate": 4.207283725282184e-06,
      "loss": 0.0165,
      "step": 1778240
    },
    {
      "epoch": 2.9101614919843155,
      "grad_norm": 0.4645816385746002,
      "learning_rate": 4.207217833068666e-06,
      "loss": 0.0128,
      "step": 1778260
    },
    {
      "epoch": 2.9101942224229687,
      "grad_norm": 0.1596885472536087,
      "learning_rate": 4.207151940855149e-06,
      "loss": 0.0116,
      "step": 1778280
    },
    {
      "epoch": 2.9102269528616223,
      "grad_norm": 0.03202984109520912,
      "learning_rate": 4.207086048641633e-06,
      "loss": 0.0149,
      "step": 1778300
    },
    {
      "epoch": 2.910259683300276,
      "grad_norm": 0.15564686059951782,
      "learning_rate": 4.207020156428115e-06,
      "loss": 0.0107,
      "step": 1778320
    },
    {
      "epoch": 2.910292413738929,
      "grad_norm": 0.20675481855869293,
      "learning_rate": 4.206954264214598e-06,
      "loss": 0.0168,
      "step": 1778340
    },
    {
      "epoch": 2.910325144177582,
      "grad_norm": 0.4740353226661682,
      "learning_rate": 4.206888372001081e-06,
      "loss": 0.0143,
      "step": 1778360
    },
    {
      "epoch": 2.9103578746162357,
      "grad_norm": 0.21518509089946747,
      "learning_rate": 4.206822479787564e-06,
      "loss": 0.0118,
      "step": 1778380
    },
    {
      "epoch": 2.910390605054889,
      "grad_norm": 0.25644686818122864,
      "learning_rate": 4.206756587574047e-06,
      "loss": 0.0127,
      "step": 1778400
    },
    {
      "epoch": 2.910423335493542,
      "grad_norm": 0.19743035733699799,
      "learning_rate": 4.20669069536053e-06,
      "loss": 0.0095,
      "step": 1778420
    },
    {
      "epoch": 2.9104560659321956,
      "grad_norm": 3.0904903411865234,
      "learning_rate": 4.206624803147013e-06,
      "loss": 0.0111,
      "step": 1778440
    },
    {
      "epoch": 2.9104887963708492,
      "grad_norm": 0.23382797837257385,
      "learning_rate": 4.2065589109334954e-06,
      "loss": 0.011,
      "step": 1778460
    },
    {
      "epoch": 2.9105215268095024,
      "grad_norm": 0.4076288044452667,
      "learning_rate": 4.206493018719978e-06,
      "loss": 0.013,
      "step": 1778480
    },
    {
      "epoch": 2.9105542572481555,
      "grad_norm": 0.36446791887283325,
      "learning_rate": 4.206427126506461e-06,
      "loss": 0.0112,
      "step": 1778500
    },
    {
      "epoch": 2.910586987686809,
      "grad_norm": 1.3878602981567383,
      "learning_rate": 4.206361234292944e-06,
      "loss": 0.0131,
      "step": 1778520
    },
    {
      "epoch": 2.9106197181254623,
      "grad_norm": 0.3374440371990204,
      "learning_rate": 4.206295342079426e-06,
      "loss": 0.0169,
      "step": 1778540
    },
    {
      "epoch": 2.9106524485641154,
      "grad_norm": 0.14144401252269745,
      "learning_rate": 4.206229449865909e-06,
      "loss": 0.0132,
      "step": 1778560
    },
    {
      "epoch": 2.910685179002769,
      "grad_norm": 0.5180776119232178,
      "learning_rate": 4.206163557652393e-06,
      "loss": 0.0097,
      "step": 1778580
    },
    {
      "epoch": 2.910717909441422,
      "grad_norm": 0.28922906517982483,
      "learning_rate": 4.2060976654388755e-06,
      "loss": 0.0111,
      "step": 1778600
    },
    {
      "epoch": 2.9107506398800758,
      "grad_norm": 0.7485259175300598,
      "learning_rate": 4.206031773225358e-06,
      "loss": 0.0144,
      "step": 1778620
    },
    {
      "epoch": 2.910783370318729,
      "grad_norm": 0.233860045671463,
      "learning_rate": 4.205965881011842e-06,
      "loss": 0.0132,
      "step": 1778640
    },
    {
      "epoch": 2.9108161007573825,
      "grad_norm": 0.4491313397884369,
      "learning_rate": 4.2058999887983245e-06,
      "loss": 0.0134,
      "step": 1778660
    },
    {
      "epoch": 2.9108488311960357,
      "grad_norm": 0.23803651332855225,
      "learning_rate": 4.205834096584807e-06,
      "loss": 0.0091,
      "step": 1778680
    },
    {
      "epoch": 2.910881561634689,
      "grad_norm": 0.38736486434936523,
      "learning_rate": 4.20576820437129e-06,
      "loss": 0.0124,
      "step": 1778700
    },
    {
      "epoch": 2.9109142920733424,
      "grad_norm": 0.08249061554670334,
      "learning_rate": 4.205702312157773e-06,
      "loss": 0.0162,
      "step": 1778720
    },
    {
      "epoch": 2.9109470225119956,
      "grad_norm": 0.43147513270378113,
      "learning_rate": 4.2056364199442555e-06,
      "loss": 0.0156,
      "step": 1778740
    },
    {
      "epoch": 2.910979752950649,
      "grad_norm": 0.119548499584198,
      "learning_rate": 4.205570527730738e-06,
      "loss": 0.019,
      "step": 1778760
    },
    {
      "epoch": 2.9110124833893023,
      "grad_norm": 0.41339772939682007,
      "learning_rate": 4.205504635517221e-06,
      "loss": 0.0148,
      "step": 1778780
    },
    {
      "epoch": 2.911045213827956,
      "grad_norm": 0.3221149146556854,
      "learning_rate": 4.2054387433037046e-06,
      "loss": 0.0114,
      "step": 1778800
    },
    {
      "epoch": 2.911077944266609,
      "grad_norm": 0.410114586353302,
      "learning_rate": 4.205372851090187e-06,
      "loss": 0.0119,
      "step": 1778820
    },
    {
      "epoch": 2.911110674705262,
      "grad_norm": 0.14303435385227203,
      "learning_rate": 4.20530695887667e-06,
      "loss": 0.0173,
      "step": 1778840
    },
    {
      "epoch": 2.911143405143916,
      "grad_norm": 0.3122827410697937,
      "learning_rate": 4.205241066663153e-06,
      "loss": 0.0093,
      "step": 1778860
    },
    {
      "epoch": 2.911176135582569,
      "grad_norm": 0.1643538773059845,
      "learning_rate": 4.2051751744496355e-06,
      "loss": 0.0134,
      "step": 1778880
    },
    {
      "epoch": 2.9112088660212225,
      "grad_norm": 0.14635054767131805,
      "learning_rate": 4.205109282236118e-06,
      "loss": 0.0107,
      "step": 1778900
    },
    {
      "epoch": 2.9112415964598757,
      "grad_norm": 0.23814678192138672,
      "learning_rate": 4.205043390022601e-06,
      "loss": 0.0136,
      "step": 1778920
    },
    {
      "epoch": 2.9112743268985293,
      "grad_norm": 0.4626951217651367,
      "learning_rate": 4.204977497809084e-06,
      "loss": 0.0172,
      "step": 1778940
    },
    {
      "epoch": 2.9113070573371824,
      "grad_norm": 0.7990897297859192,
      "learning_rate": 4.2049116055955665e-06,
      "loss": 0.0193,
      "step": 1778960
    },
    {
      "epoch": 2.9113397877758356,
      "grad_norm": 0.7114030122756958,
      "learning_rate": 4.20484571338205e-06,
      "loss": 0.0141,
      "step": 1778980
    },
    {
      "epoch": 2.911372518214489,
      "grad_norm": 0.11825668811798096,
      "learning_rate": 4.204779821168533e-06,
      "loss": 0.0093,
      "step": 1779000
    },
    {
      "epoch": 2.9114052486531423,
      "grad_norm": 0.4657571017742157,
      "learning_rate": 4.2047139289550155e-06,
      "loss": 0.0142,
      "step": 1779020
    },
    {
      "epoch": 2.911437979091796,
      "grad_norm": 0.26532042026519775,
      "learning_rate": 4.204648036741499e-06,
      "loss": 0.0145,
      "step": 1779040
    },
    {
      "epoch": 2.911470709530449,
      "grad_norm": 1.7550872564315796,
      "learning_rate": 4.204582144527982e-06,
      "loss": 0.015,
      "step": 1779060
    },
    {
      "epoch": 2.9115034399691027,
      "grad_norm": 0.3803263008594513,
      "learning_rate": 4.204516252314465e-06,
      "loss": 0.0142,
      "step": 1779080
    },
    {
      "epoch": 2.911536170407756,
      "grad_norm": 0.20883776247501373,
      "learning_rate": 4.204450360100947e-06,
      "loss": 0.012,
      "step": 1779100
    },
    {
      "epoch": 2.911568900846409,
      "grad_norm": 0.7587526440620422,
      "learning_rate": 4.20438446788743e-06,
      "loss": 0.017,
      "step": 1779120
    },
    {
      "epoch": 2.9116016312850626,
      "grad_norm": 0.27111881971359253,
      "learning_rate": 4.204318575673913e-06,
      "loss": 0.0115,
      "step": 1779140
    },
    {
      "epoch": 2.9116343617237157,
      "grad_norm": 0.17013132572174072,
      "learning_rate": 4.2042526834603956e-06,
      "loss": 0.0137,
      "step": 1779160
    },
    {
      "epoch": 2.9116670921623693,
      "grad_norm": 0.13777273893356323,
      "learning_rate": 4.204186791246878e-06,
      "loss": 0.0126,
      "step": 1779180
    },
    {
      "epoch": 2.9116998226010224,
      "grad_norm": 0.1685493141412735,
      "learning_rate": 4.204120899033362e-06,
      "loss": 0.0106,
      "step": 1779200
    },
    {
      "epoch": 2.911732553039676,
      "grad_norm": 0.512624204158783,
      "learning_rate": 4.204055006819845e-06,
      "loss": 0.0139,
      "step": 1779220
    },
    {
      "epoch": 2.911765283478329,
      "grad_norm": 0.10977265983819962,
      "learning_rate": 4.203989114606327e-06,
      "loss": 0.0186,
      "step": 1779240
    },
    {
      "epoch": 2.9117980139169823,
      "grad_norm": 0.14554092288017273,
      "learning_rate": 4.20392322239281e-06,
      "loss": 0.0232,
      "step": 1779260
    },
    {
      "epoch": 2.911830744355636,
      "grad_norm": 0.5107442140579224,
      "learning_rate": 4.203857330179293e-06,
      "loss": 0.0166,
      "step": 1779280
    },
    {
      "epoch": 2.911863474794289,
      "grad_norm": 0.22149130702018738,
      "learning_rate": 4.203791437965776e-06,
      "loss": 0.0149,
      "step": 1779300
    },
    {
      "epoch": 2.9118962052329427,
      "grad_norm": 0.4187540113925934,
      "learning_rate": 4.203725545752258e-06,
      "loss": 0.0176,
      "step": 1779320
    },
    {
      "epoch": 2.911928935671596,
      "grad_norm": 0.26510125398635864,
      "learning_rate": 4.203659653538742e-06,
      "loss": 0.0098,
      "step": 1779340
    },
    {
      "epoch": 2.9119616661102494,
      "grad_norm": 0.15820905566215515,
      "learning_rate": 4.203593761325225e-06,
      "loss": 0.0153,
      "step": 1779360
    },
    {
      "epoch": 2.9119943965489026,
      "grad_norm": 0.5808587670326233,
      "learning_rate": 4.203527869111707e-06,
      "loss": 0.0145,
      "step": 1779380
    },
    {
      "epoch": 2.9120271269875557,
      "grad_norm": 0.37856489419937134,
      "learning_rate": 4.20346197689819e-06,
      "loss": 0.0132,
      "step": 1779400
    },
    {
      "epoch": 2.9120598574262093,
      "grad_norm": 0.5443952083587646,
      "learning_rate": 4.203396084684674e-06,
      "loss": 0.0158,
      "step": 1779420
    },
    {
      "epoch": 2.9120925878648625,
      "grad_norm": 0.5493021011352539,
      "learning_rate": 4.2033301924711565e-06,
      "loss": 0.0135,
      "step": 1779440
    },
    {
      "epoch": 2.912125318303516,
      "grad_norm": 0.4471661150455475,
      "learning_rate": 4.203264300257639e-06,
      "loss": 0.0201,
      "step": 1779460
    },
    {
      "epoch": 2.912158048742169,
      "grad_norm": 0.4718972146511078,
      "learning_rate": 4.203198408044122e-06,
      "loss": 0.013,
      "step": 1779480
    },
    {
      "epoch": 2.912190779180823,
      "grad_norm": 0.5991425514221191,
      "learning_rate": 4.203132515830605e-06,
      "loss": 0.0157,
      "step": 1779500
    },
    {
      "epoch": 2.912223509619476,
      "grad_norm": 0.5947785377502441,
      "learning_rate": 4.203066623617087e-06,
      "loss": 0.0105,
      "step": 1779520
    },
    {
      "epoch": 2.912256240058129,
      "grad_norm": 0.18449392914772034,
      "learning_rate": 4.20300073140357e-06,
      "loss": 0.0094,
      "step": 1779540
    },
    {
      "epoch": 2.9122889704967827,
      "grad_norm": 0.27469122409820557,
      "learning_rate": 4.202934839190053e-06,
      "loss": 0.0183,
      "step": 1779560
    },
    {
      "epoch": 2.912321700935436,
      "grad_norm": 0.15754976868629456,
      "learning_rate": 4.202868946976536e-06,
      "loss": 0.0122,
      "step": 1779580
    },
    {
      "epoch": 2.912354431374089,
      "grad_norm": 0.32172781229019165,
      "learning_rate": 4.202803054763019e-06,
      "loss": 0.0168,
      "step": 1779600
    },
    {
      "epoch": 2.9123871618127426,
      "grad_norm": 0.33596330881118774,
      "learning_rate": 4.202737162549502e-06,
      "loss": 0.0148,
      "step": 1779620
    },
    {
      "epoch": 2.912419892251396,
      "grad_norm": 0.30296453833580017,
      "learning_rate": 4.202671270335985e-06,
      "loss": 0.0144,
      "step": 1779640
    },
    {
      "epoch": 2.9124526226900493,
      "grad_norm": 0.2848530411720276,
      "learning_rate": 4.2026053781224674e-06,
      "loss": 0.0133,
      "step": 1779660
    },
    {
      "epoch": 2.9124853531287025,
      "grad_norm": 0.5853686332702637,
      "learning_rate": 4.20253948590895e-06,
      "loss": 0.0103,
      "step": 1779680
    },
    {
      "epoch": 2.912518083567356,
      "grad_norm": 0.2991202771663666,
      "learning_rate": 4.202473593695434e-06,
      "loss": 0.0127,
      "step": 1779700
    },
    {
      "epoch": 2.9125508140060092,
      "grad_norm": 0.20107756555080414,
      "learning_rate": 4.2024077014819165e-06,
      "loss": 0.0152,
      "step": 1779720
    },
    {
      "epoch": 2.9125835444446624,
      "grad_norm": 0.062194954603910446,
      "learning_rate": 4.202341809268399e-06,
      "loss": 0.0107,
      "step": 1779740
    },
    {
      "epoch": 2.912616274883316,
      "grad_norm": 0.19588865339756012,
      "learning_rate": 4.202275917054882e-06,
      "loss": 0.0212,
      "step": 1779760
    },
    {
      "epoch": 2.9126490053219696,
      "grad_norm": 0.3606906235218048,
      "learning_rate": 4.202210024841365e-06,
      "loss": 0.0147,
      "step": 1779780
    },
    {
      "epoch": 2.9126817357606227,
      "grad_norm": 1.1431430578231812,
      "learning_rate": 4.2021441326278475e-06,
      "loss": 0.0135,
      "step": 1779800
    },
    {
      "epoch": 2.912714466199276,
      "grad_norm": 0.07860106974840164,
      "learning_rate": 4.202078240414331e-06,
      "loss": 0.0134,
      "step": 1779820
    },
    {
      "epoch": 2.9127471966379295,
      "grad_norm": 0.28953367471694946,
      "learning_rate": 4.202012348200814e-06,
      "loss": 0.0106,
      "step": 1779840
    },
    {
      "epoch": 2.9127799270765826,
      "grad_norm": 0.29746395349502563,
      "learning_rate": 4.2019464559872965e-06,
      "loss": 0.0105,
      "step": 1779860
    },
    {
      "epoch": 2.9128126575152358,
      "grad_norm": 0.4260150194168091,
      "learning_rate": 4.201880563773779e-06,
      "loss": 0.0119,
      "step": 1779880
    },
    {
      "epoch": 2.9128453879538894,
      "grad_norm": 0.637596845626831,
      "learning_rate": 4.201814671560262e-06,
      "loss": 0.0146,
      "step": 1779900
    },
    {
      "epoch": 2.912878118392543,
      "grad_norm": 0.29446718096733093,
      "learning_rate": 4.201748779346745e-06,
      "loss": 0.0111,
      "step": 1779920
    },
    {
      "epoch": 2.912910848831196,
      "grad_norm": 0.2892690896987915,
      "learning_rate": 4.2016828871332275e-06,
      "loss": 0.0074,
      "step": 1779940
    },
    {
      "epoch": 2.9129435792698493,
      "grad_norm": 0.12314582616090775,
      "learning_rate": 4.20161699491971e-06,
      "loss": 0.0187,
      "step": 1779960
    },
    {
      "epoch": 2.912976309708503,
      "grad_norm": 0.29111573100090027,
      "learning_rate": 4.201551102706193e-06,
      "loss": 0.0164,
      "step": 1779980
    },
    {
      "epoch": 2.913009040147156,
      "grad_norm": 0.2412029206752777,
      "learning_rate": 4.2014852104926766e-06,
      "loss": 0.01,
      "step": 1780000
    },
    {
      "epoch": 2.913041770585809,
      "grad_norm": 0.7742682099342346,
      "learning_rate": 4.201419318279159e-06,
      "loss": 0.0142,
      "step": 1780020
    },
    {
      "epoch": 2.9130745010244627,
      "grad_norm": 0.36233463883399963,
      "learning_rate": 4.201353426065642e-06,
      "loss": 0.0153,
      "step": 1780040
    },
    {
      "epoch": 2.9131072314631163,
      "grad_norm": 0.6144742965698242,
      "learning_rate": 4.201287533852126e-06,
      "loss": 0.0143,
      "step": 1780060
    },
    {
      "epoch": 2.9131399619017695,
      "grad_norm": 0.6180171966552734,
      "learning_rate": 4.201221641638608e-06,
      "loss": 0.0125,
      "step": 1780080
    },
    {
      "epoch": 2.9131726923404226,
      "grad_norm": 0.3995908498764038,
      "learning_rate": 4.201155749425091e-06,
      "loss": 0.0195,
      "step": 1780100
    },
    {
      "epoch": 2.9132054227790762,
      "grad_norm": 0.18345512449741364,
      "learning_rate": 4.201089857211574e-06,
      "loss": 0.0158,
      "step": 1780120
    },
    {
      "epoch": 2.9132381532177294,
      "grad_norm": 0.16236263513565063,
      "learning_rate": 4.201023964998057e-06,
      "loss": 0.0085,
      "step": 1780140
    },
    {
      "epoch": 2.9132708836563825,
      "grad_norm": 0.10329200327396393,
      "learning_rate": 4.200958072784539e-06,
      "loss": 0.0134,
      "step": 1780160
    },
    {
      "epoch": 2.913303614095036,
      "grad_norm": 0.44979438185691833,
      "learning_rate": 4.200892180571022e-06,
      "loss": 0.0156,
      "step": 1780180
    },
    {
      "epoch": 2.9133363445336893,
      "grad_norm": 0.19344623386859894,
      "learning_rate": 4.200826288357505e-06,
      "loss": 0.0136,
      "step": 1780200
    },
    {
      "epoch": 2.913369074972343,
      "grad_norm": 0.5505489706993103,
      "learning_rate": 4.200760396143988e-06,
      "loss": 0.0122,
      "step": 1780220
    },
    {
      "epoch": 2.913401805410996,
      "grad_norm": 0.8707811832427979,
      "learning_rate": 4.200694503930471e-06,
      "loss": 0.0116,
      "step": 1780240
    },
    {
      "epoch": 2.9134345358496496,
      "grad_norm": 0.7350608110427856,
      "learning_rate": 4.200628611716954e-06,
      "loss": 0.0111,
      "step": 1780260
    },
    {
      "epoch": 2.9134672662883028,
      "grad_norm": 0.16635212302207947,
      "learning_rate": 4.200562719503437e-06,
      "loss": 0.0102,
      "step": 1780280
    },
    {
      "epoch": 2.913499996726956,
      "grad_norm": 0.10360120981931686,
      "learning_rate": 4.200496827289919e-06,
      "loss": 0.0133,
      "step": 1780300
    },
    {
      "epoch": 2.9135327271656095,
      "grad_norm": 0.4872262477874756,
      "learning_rate": 4.200430935076402e-06,
      "loss": 0.0127,
      "step": 1780320
    },
    {
      "epoch": 2.9135654576042627,
      "grad_norm": 0.3161880671977997,
      "learning_rate": 4.200365042862885e-06,
      "loss": 0.0081,
      "step": 1780340
    },
    {
      "epoch": 2.9135981880429163,
      "grad_norm": 0.05726448819041252,
      "learning_rate": 4.2002991506493676e-06,
      "loss": 0.0121,
      "step": 1780360
    },
    {
      "epoch": 2.9136309184815694,
      "grad_norm": 0.41795048117637634,
      "learning_rate": 4.20023325843585e-06,
      "loss": 0.0166,
      "step": 1780380
    },
    {
      "epoch": 2.913663648920223,
      "grad_norm": 0.5131523609161377,
      "learning_rate": 4.200167366222334e-06,
      "loss": 0.0177,
      "step": 1780400
    },
    {
      "epoch": 2.913696379358876,
      "grad_norm": 0.6342772841453552,
      "learning_rate": 4.200101474008817e-06,
      "loss": 0.0164,
      "step": 1780420
    },
    {
      "epoch": 2.9137291097975293,
      "grad_norm": 0.2090921700000763,
      "learning_rate": 4.200035581795299e-06,
      "loss": 0.014,
      "step": 1780440
    },
    {
      "epoch": 2.913761840236183,
      "grad_norm": 0.2304760366678238,
      "learning_rate": 4.199969689581783e-06,
      "loss": 0.016,
      "step": 1780460
    },
    {
      "epoch": 2.913794570674836,
      "grad_norm": 0.2129501849412918,
      "learning_rate": 4.199903797368266e-06,
      "loss": 0.0164,
      "step": 1780480
    },
    {
      "epoch": 2.9138273011134896,
      "grad_norm": 0.09790728986263275,
      "learning_rate": 4.1998379051547484e-06,
      "loss": 0.0115,
      "step": 1780500
    },
    {
      "epoch": 2.913860031552143,
      "grad_norm": 0.16449755430221558,
      "learning_rate": 4.199772012941231e-06,
      "loss": 0.0134,
      "step": 1780520
    },
    {
      "epoch": 2.9138927619907964,
      "grad_norm": 0.18096251785755157,
      "learning_rate": 4.199706120727714e-06,
      "loss": 0.0177,
      "step": 1780540
    },
    {
      "epoch": 2.9139254924294495,
      "grad_norm": 0.802746057510376,
      "learning_rate": 4.199640228514197e-06,
      "loss": 0.0198,
      "step": 1780560
    },
    {
      "epoch": 2.9139582228681027,
      "grad_norm": 0.1458195000886917,
      "learning_rate": 4.199574336300679e-06,
      "loss": 0.017,
      "step": 1780580
    },
    {
      "epoch": 2.9139909533067563,
      "grad_norm": 0.25777241587638855,
      "learning_rate": 4.199508444087162e-06,
      "loss": 0.0107,
      "step": 1780600
    },
    {
      "epoch": 2.9140236837454094,
      "grad_norm": 0.14653676748275757,
      "learning_rate": 4.199442551873646e-06,
      "loss": 0.0115,
      "step": 1780620
    },
    {
      "epoch": 2.914056414184063,
      "grad_norm": 0.27753520011901855,
      "learning_rate": 4.1993766596601285e-06,
      "loss": 0.0152,
      "step": 1780640
    },
    {
      "epoch": 2.914089144622716,
      "grad_norm": 0.22157490253448486,
      "learning_rate": 4.199310767446611e-06,
      "loss": 0.0127,
      "step": 1780660
    },
    {
      "epoch": 2.9141218750613698,
      "grad_norm": 0.28939488530158997,
      "learning_rate": 4.199244875233094e-06,
      "loss": 0.0088,
      "step": 1780680
    },
    {
      "epoch": 2.914154605500023,
      "grad_norm": 0.09199224412441254,
      "learning_rate": 4.199178983019577e-06,
      "loss": 0.0184,
      "step": 1780700
    },
    {
      "epoch": 2.914187335938676,
      "grad_norm": 0.2398117035627365,
      "learning_rate": 4.1991130908060594e-06,
      "loss": 0.0162,
      "step": 1780720
    },
    {
      "epoch": 2.9142200663773297,
      "grad_norm": 0.29097893834114075,
      "learning_rate": 4.199047198592542e-06,
      "loss": 0.0129,
      "step": 1780740
    },
    {
      "epoch": 2.914252796815983,
      "grad_norm": 0.5528895854949951,
      "learning_rate": 4.198981306379026e-06,
      "loss": 0.0085,
      "step": 1780760
    },
    {
      "epoch": 2.9142855272546364,
      "grad_norm": 0.3295646011829376,
      "learning_rate": 4.1989154141655085e-06,
      "loss": 0.0201,
      "step": 1780780
    },
    {
      "epoch": 2.9143182576932896,
      "grad_norm": 0.3777395486831665,
      "learning_rate": 4.198849521951991e-06,
      "loss": 0.0198,
      "step": 1780800
    },
    {
      "epoch": 2.914350988131943,
      "grad_norm": 0.31857144832611084,
      "learning_rate": 4.198783629738474e-06,
      "loss": 0.0139,
      "step": 1780820
    },
    {
      "epoch": 2.9143837185705963,
      "grad_norm": 0.42009735107421875,
      "learning_rate": 4.1987177375249576e-06,
      "loss": 0.0163,
      "step": 1780840
    },
    {
      "epoch": 2.9144164490092495,
      "grad_norm": 0.5115525126457214,
      "learning_rate": 4.19865184531144e-06,
      "loss": 0.0194,
      "step": 1780860
    },
    {
      "epoch": 2.914449179447903,
      "grad_norm": 1.6846086978912354,
      "learning_rate": 4.198585953097923e-06,
      "loss": 0.0193,
      "step": 1780880
    },
    {
      "epoch": 2.914481909886556,
      "grad_norm": 0.11729902029037476,
      "learning_rate": 4.198520060884406e-06,
      "loss": 0.0098,
      "step": 1780900
    },
    {
      "epoch": 2.91451464032521,
      "grad_norm": 0.3747401237487793,
      "learning_rate": 4.1984541686708885e-06,
      "loss": 0.0128,
      "step": 1780920
    },
    {
      "epoch": 2.914547370763863,
      "grad_norm": 0.26763007044792175,
      "learning_rate": 4.198388276457371e-06,
      "loss": 0.0148,
      "step": 1780940
    },
    {
      "epoch": 2.9145801012025165,
      "grad_norm": 0.3068593442440033,
      "learning_rate": 4.198322384243854e-06,
      "loss": 0.0184,
      "step": 1780960
    },
    {
      "epoch": 2.9146128316411697,
      "grad_norm": 0.8008392453193665,
      "learning_rate": 4.198256492030337e-06,
      "loss": 0.013,
      "step": 1780980
    },
    {
      "epoch": 2.914645562079823,
      "grad_norm": 0.07901529967784882,
      "learning_rate": 4.1981905998168195e-06,
      "loss": 0.0121,
      "step": 1781000
    },
    {
      "epoch": 2.9146782925184764,
      "grad_norm": 0.16834980249404907,
      "learning_rate": 4.198124707603303e-06,
      "loss": 0.0085,
      "step": 1781020
    },
    {
      "epoch": 2.9147110229571296,
      "grad_norm": 0.37909525632858276,
      "learning_rate": 4.198058815389786e-06,
      "loss": 0.016,
      "step": 1781040
    },
    {
      "epoch": 2.914743753395783,
      "grad_norm": 0.3143589496612549,
      "learning_rate": 4.1979929231762685e-06,
      "loss": 0.0083,
      "step": 1781060
    },
    {
      "epoch": 2.9147764838344363,
      "grad_norm": 0.09274853765964508,
      "learning_rate": 4.197927030962751e-06,
      "loss": 0.0089,
      "step": 1781080
    },
    {
      "epoch": 2.91480921427309,
      "grad_norm": 0.2553883492946625,
      "learning_rate": 4.197861138749235e-06,
      "loss": 0.0159,
      "step": 1781100
    },
    {
      "epoch": 2.914841944711743,
      "grad_norm": 0.426865816116333,
      "learning_rate": 4.197795246535718e-06,
      "loss": 0.0138,
      "step": 1781120
    },
    {
      "epoch": 2.914874675150396,
      "grad_norm": 0.11995155364274979,
      "learning_rate": 4.1977293543222e-06,
      "loss": 0.0154,
      "step": 1781140
    },
    {
      "epoch": 2.91490740558905,
      "grad_norm": 0.40559712052345276,
      "learning_rate": 4.197663462108683e-06,
      "loss": 0.0182,
      "step": 1781160
    },
    {
      "epoch": 2.914940136027703,
      "grad_norm": 0.1588449478149414,
      "learning_rate": 4.197597569895166e-06,
      "loss": 0.0157,
      "step": 1781180
    },
    {
      "epoch": 2.914972866466356,
      "grad_norm": 0.20269732177257538,
      "learning_rate": 4.1975316776816486e-06,
      "loss": 0.012,
      "step": 1781200
    },
    {
      "epoch": 2.9150055969050097,
      "grad_norm": 0.3614550530910492,
      "learning_rate": 4.197465785468131e-06,
      "loss": 0.015,
      "step": 1781220
    },
    {
      "epoch": 2.9150383273436633,
      "grad_norm": 0.5519113540649414,
      "learning_rate": 4.197399893254615e-06,
      "loss": 0.0134,
      "step": 1781240
    },
    {
      "epoch": 2.9150710577823165,
      "grad_norm": 0.1819925308227539,
      "learning_rate": 4.197334001041098e-06,
      "loss": 0.0167,
      "step": 1781260
    },
    {
      "epoch": 2.9151037882209696,
      "grad_norm": 0.4221590757369995,
      "learning_rate": 4.19726810882758e-06,
      "loss": 0.0134,
      "step": 1781280
    },
    {
      "epoch": 2.915136518659623,
      "grad_norm": 0.1483728587627411,
      "learning_rate": 4.197202216614063e-06,
      "loss": 0.0092,
      "step": 1781300
    },
    {
      "epoch": 2.9151692490982763,
      "grad_norm": 0.8874109387397766,
      "learning_rate": 4.197136324400546e-06,
      "loss": 0.0144,
      "step": 1781320
    },
    {
      "epoch": 2.9152019795369295,
      "grad_norm": 0.3861340284347534,
      "learning_rate": 4.197070432187029e-06,
      "loss": 0.0124,
      "step": 1781340
    },
    {
      "epoch": 2.915234709975583,
      "grad_norm": 0.12224169820547104,
      "learning_rate": 4.197004539973511e-06,
      "loss": 0.0122,
      "step": 1781360
    },
    {
      "epoch": 2.9152674404142367,
      "grad_norm": 0.7944061160087585,
      "learning_rate": 4.196938647759994e-06,
      "loss": 0.0129,
      "step": 1781380
    },
    {
      "epoch": 2.91530017085289,
      "grad_norm": 2.8100738525390625,
      "learning_rate": 4.196872755546477e-06,
      "loss": 0.0137,
      "step": 1781400
    },
    {
      "epoch": 2.915332901291543,
      "grad_norm": 0.301426500082016,
      "learning_rate": 4.19680686333296e-06,
      "loss": 0.0114,
      "step": 1781420
    },
    {
      "epoch": 2.9153656317301966,
      "grad_norm": 0.2296004742383957,
      "learning_rate": 4.196740971119443e-06,
      "loss": 0.0141,
      "step": 1781440
    },
    {
      "epoch": 2.9153983621688497,
      "grad_norm": 0.40449589490890503,
      "learning_rate": 4.196675078905926e-06,
      "loss": 0.0157,
      "step": 1781460
    },
    {
      "epoch": 2.915431092607503,
      "grad_norm": 0.1983097642660141,
      "learning_rate": 4.1966091866924095e-06,
      "loss": 0.0092,
      "step": 1781480
    },
    {
      "epoch": 2.9154638230461565,
      "grad_norm": 0.14297670125961304,
      "learning_rate": 4.196543294478892e-06,
      "loss": 0.0147,
      "step": 1781500
    },
    {
      "epoch": 2.91549655348481,
      "grad_norm": 0.18157510459423065,
      "learning_rate": 4.196477402265375e-06,
      "loss": 0.0162,
      "step": 1781520
    },
    {
      "epoch": 2.915529283923463,
      "grad_norm": 0.34512120485305786,
      "learning_rate": 4.196411510051858e-06,
      "loss": 0.0142,
      "step": 1781540
    },
    {
      "epoch": 2.9155620143621164,
      "grad_norm": 0.36197543144226074,
      "learning_rate": 4.1963456178383404e-06,
      "loss": 0.0253,
      "step": 1781560
    },
    {
      "epoch": 2.91559474480077,
      "grad_norm": 0.11134933680295944,
      "learning_rate": 4.196279725624823e-06,
      "loss": 0.0105,
      "step": 1781580
    },
    {
      "epoch": 2.915627475239423,
      "grad_norm": 0.47194501757621765,
      "learning_rate": 4.196213833411306e-06,
      "loss": 0.0179,
      "step": 1781600
    },
    {
      "epoch": 2.9156602056780763,
      "grad_norm": 0.534797728061676,
      "learning_rate": 4.196147941197789e-06,
      "loss": 0.0174,
      "step": 1781620
    },
    {
      "epoch": 2.91569293611673,
      "grad_norm": 0.43035370111465454,
      "learning_rate": 4.196082048984272e-06,
      "loss": 0.0116,
      "step": 1781640
    },
    {
      "epoch": 2.915725666555383,
      "grad_norm": 0.13741187751293182,
      "learning_rate": 4.196016156770755e-06,
      "loss": 0.015,
      "step": 1781660
    },
    {
      "epoch": 2.9157583969940366,
      "grad_norm": 0.3237842619419098,
      "learning_rate": 4.195950264557238e-06,
      "loss": 0.0162,
      "step": 1781680
    },
    {
      "epoch": 2.9157911274326898,
      "grad_norm": 0.4768409729003906,
      "learning_rate": 4.1958843723437205e-06,
      "loss": 0.0145,
      "step": 1781700
    },
    {
      "epoch": 2.9158238578713433,
      "grad_norm": 0.7248929142951965,
      "learning_rate": 4.195818480130203e-06,
      "loss": 0.0117,
      "step": 1781720
    },
    {
      "epoch": 2.9158565883099965,
      "grad_norm": 0.14748916029930115,
      "learning_rate": 4.195752587916686e-06,
      "loss": 0.0162,
      "step": 1781740
    },
    {
      "epoch": 2.9158893187486497,
      "grad_norm": 0.15911507606506348,
      "learning_rate": 4.195686695703169e-06,
      "loss": 0.0139,
      "step": 1781760
    },
    {
      "epoch": 2.9159220491873032,
      "grad_norm": 0.18020519614219666,
      "learning_rate": 4.195620803489651e-06,
      "loss": 0.0162,
      "step": 1781780
    },
    {
      "epoch": 2.9159547796259564,
      "grad_norm": 0.3543972969055176,
      "learning_rate": 4.195554911276134e-06,
      "loss": 0.0142,
      "step": 1781800
    },
    {
      "epoch": 2.91598751006461,
      "grad_norm": 0.4850477874279022,
      "learning_rate": 4.195489019062618e-06,
      "loss": 0.0124,
      "step": 1781820
    },
    {
      "epoch": 2.916020240503263,
      "grad_norm": 0.45094043016433716,
      "learning_rate": 4.1954231268491005e-06,
      "loss": 0.0146,
      "step": 1781840
    },
    {
      "epoch": 2.9160529709419167,
      "grad_norm": 0.7402362823486328,
      "learning_rate": 4.195357234635584e-06,
      "loss": 0.0155,
      "step": 1781860
    },
    {
      "epoch": 2.91608570138057,
      "grad_norm": 0.20124216377735138,
      "learning_rate": 4.195291342422067e-06,
      "loss": 0.0172,
      "step": 1781880
    },
    {
      "epoch": 2.916118431819223,
      "grad_norm": 0.8894156813621521,
      "learning_rate": 4.1952254502085495e-06,
      "loss": 0.0137,
      "step": 1781900
    },
    {
      "epoch": 2.9161511622578766,
      "grad_norm": 0.3635910749435425,
      "learning_rate": 4.195159557995032e-06,
      "loss": 0.0135,
      "step": 1781920
    },
    {
      "epoch": 2.91618389269653,
      "grad_norm": 0.10560809820890427,
      "learning_rate": 4.195093665781515e-06,
      "loss": 0.0159,
      "step": 1781940
    },
    {
      "epoch": 2.9162166231351834,
      "grad_norm": 0.6382506489753723,
      "learning_rate": 4.195027773567998e-06,
      "loss": 0.0131,
      "step": 1781960
    },
    {
      "epoch": 2.9162493535738365,
      "grad_norm": 0.20465394854545593,
      "learning_rate": 4.1949618813544805e-06,
      "loss": 0.0164,
      "step": 1781980
    },
    {
      "epoch": 2.91628208401249,
      "grad_norm": 0.5751181840896606,
      "learning_rate": 4.194895989140963e-06,
      "loss": 0.0153,
      "step": 1782000
    },
    {
      "epoch": 2.9163148144511433,
      "grad_norm": 0.29118743538856506,
      "learning_rate": 4.194830096927446e-06,
      "loss": 0.0126,
      "step": 1782020
    },
    {
      "epoch": 2.9163475448897964,
      "grad_norm": 0.1988401859998703,
      "learning_rate": 4.1947642047139296e-06,
      "loss": 0.0147,
      "step": 1782040
    },
    {
      "epoch": 2.91638027532845,
      "grad_norm": 0.10286237299442291,
      "learning_rate": 4.194698312500412e-06,
      "loss": 0.0122,
      "step": 1782060
    },
    {
      "epoch": 2.916413005767103,
      "grad_norm": 0.7162845134735107,
      "learning_rate": 4.194632420286895e-06,
      "loss": 0.0135,
      "step": 1782080
    },
    {
      "epoch": 2.9164457362057568,
      "grad_norm": 0.22442865371704102,
      "learning_rate": 4.194566528073378e-06,
      "loss": 0.0179,
      "step": 1782100
    },
    {
      "epoch": 2.91647846664441,
      "grad_norm": 0.3684748411178589,
      "learning_rate": 4.1945006358598605e-06,
      "loss": 0.0142,
      "step": 1782120
    },
    {
      "epoch": 2.9165111970830635,
      "grad_norm": 0.6245233416557312,
      "learning_rate": 4.194434743646343e-06,
      "loss": 0.0099,
      "step": 1782140
    },
    {
      "epoch": 2.9165439275217167,
      "grad_norm": 0.36056190729141235,
      "learning_rate": 4.194368851432827e-06,
      "loss": 0.0173,
      "step": 1782160
    },
    {
      "epoch": 2.91657665796037,
      "grad_norm": 0.19362764060497284,
      "learning_rate": 4.19430295921931e-06,
      "loss": 0.0082,
      "step": 1782180
    },
    {
      "epoch": 2.9166093883990234,
      "grad_norm": 0.17383511364459991,
      "learning_rate": 4.194237067005792e-06,
      "loss": 0.0093,
      "step": 1782200
    },
    {
      "epoch": 2.9166421188376765,
      "grad_norm": 0.17324693500995636,
      "learning_rate": 4.194171174792275e-06,
      "loss": 0.0095,
      "step": 1782220
    },
    {
      "epoch": 2.91667484927633,
      "grad_norm": 0.3448905348777771,
      "learning_rate": 4.194105282578758e-06,
      "loss": 0.0084,
      "step": 1782240
    },
    {
      "epoch": 2.9167075797149833,
      "grad_norm": 0.2688504457473755,
      "learning_rate": 4.194039390365241e-06,
      "loss": 0.0129,
      "step": 1782260
    },
    {
      "epoch": 2.916740310153637,
      "grad_norm": 0.21466805040836334,
      "learning_rate": 4.193973498151724e-06,
      "loss": 0.011,
      "step": 1782280
    },
    {
      "epoch": 2.91677304059229,
      "grad_norm": 1.238243579864502,
      "learning_rate": 4.193907605938207e-06,
      "loss": 0.0179,
      "step": 1782300
    },
    {
      "epoch": 2.916805771030943,
      "grad_norm": 0.2170596420764923,
      "learning_rate": 4.19384171372469e-06,
      "loss": 0.0133,
      "step": 1782320
    },
    {
      "epoch": 2.916838501469597,
      "grad_norm": 1.080826997756958,
      "learning_rate": 4.193775821511172e-06,
      "loss": 0.0145,
      "step": 1782340
    },
    {
      "epoch": 2.91687123190825,
      "grad_norm": 0.4275989830493927,
      "learning_rate": 4.193709929297655e-06,
      "loss": 0.0174,
      "step": 1782360
    },
    {
      "epoch": 2.9169039623469035,
      "grad_norm": 0.28134724497795105,
      "learning_rate": 4.193644037084138e-06,
      "loss": 0.0104,
      "step": 1782380
    },
    {
      "epoch": 2.9169366927855567,
      "grad_norm": 0.2903808653354645,
      "learning_rate": 4.193578144870621e-06,
      "loss": 0.0177,
      "step": 1782400
    },
    {
      "epoch": 2.9169694232242103,
      "grad_norm": 0.17941339313983917,
      "learning_rate": 4.193512252657103e-06,
      "loss": 0.0148,
      "step": 1782420
    },
    {
      "epoch": 2.9170021536628634,
      "grad_norm": 0.2542291581630707,
      "learning_rate": 4.193446360443587e-06,
      "loss": 0.0133,
      "step": 1782440
    },
    {
      "epoch": 2.9170348841015166,
      "grad_norm": 0.3547022342681885,
      "learning_rate": 4.19338046823007e-06,
      "loss": 0.0163,
      "step": 1782460
    },
    {
      "epoch": 2.91706761454017,
      "grad_norm": 0.33773210644721985,
      "learning_rate": 4.193314576016552e-06,
      "loss": 0.0162,
      "step": 1782480
    },
    {
      "epoch": 2.9171003449788233,
      "grad_norm": 0.4342353641986847,
      "learning_rate": 4.193248683803035e-06,
      "loss": 0.0114,
      "step": 1782500
    },
    {
      "epoch": 2.917133075417477,
      "grad_norm": 0.2607230842113495,
      "learning_rate": 4.193182791589519e-06,
      "loss": 0.0131,
      "step": 1782520
    },
    {
      "epoch": 2.91716580585613,
      "grad_norm": 0.12365086376667023,
      "learning_rate": 4.1931168993760014e-06,
      "loss": 0.0125,
      "step": 1782540
    },
    {
      "epoch": 2.9171985362947837,
      "grad_norm": 1.1686501502990723,
      "learning_rate": 4.193051007162484e-06,
      "loss": 0.0156,
      "step": 1782560
    },
    {
      "epoch": 2.917231266733437,
      "grad_norm": 0.17366550862789154,
      "learning_rate": 4.192985114948967e-06,
      "loss": 0.0112,
      "step": 1782580
    },
    {
      "epoch": 2.91726399717209,
      "grad_norm": 0.9561330080032349,
      "learning_rate": 4.19291922273545e-06,
      "loss": 0.0125,
      "step": 1782600
    },
    {
      "epoch": 2.9172967276107435,
      "grad_norm": 0.33334803581237793,
      "learning_rate": 4.192853330521932e-06,
      "loss": 0.012,
      "step": 1782620
    },
    {
      "epoch": 2.9173294580493967,
      "grad_norm": 0.34400251507759094,
      "learning_rate": 4.192787438308415e-06,
      "loss": 0.0079,
      "step": 1782640
    },
    {
      "epoch": 2.91736218848805,
      "grad_norm": 0.6692676544189453,
      "learning_rate": 4.192721546094899e-06,
      "loss": 0.012,
      "step": 1782660
    },
    {
      "epoch": 2.9173949189267034,
      "grad_norm": 0.09943326562643051,
      "learning_rate": 4.1926556538813815e-06,
      "loss": 0.0124,
      "step": 1782680
    },
    {
      "epoch": 2.917427649365357,
      "grad_norm": 0.18407201766967773,
      "learning_rate": 4.192589761667864e-06,
      "loss": 0.0189,
      "step": 1782700
    },
    {
      "epoch": 2.91746037980401,
      "grad_norm": 0.40758323669433594,
      "learning_rate": 4.192523869454347e-06,
      "loss": 0.0177,
      "step": 1782720
    },
    {
      "epoch": 2.9174931102426633,
      "grad_norm": 0.712502121925354,
      "learning_rate": 4.19245797724083e-06,
      "loss": 0.0077,
      "step": 1782740
    },
    {
      "epoch": 2.917525840681317,
      "grad_norm": 0.3151521384716034,
      "learning_rate": 4.1923920850273124e-06,
      "loss": 0.0168,
      "step": 1782760
    },
    {
      "epoch": 2.91755857111997,
      "grad_norm": 0.3001646101474762,
      "learning_rate": 4.192326192813795e-06,
      "loss": 0.0173,
      "step": 1782780
    },
    {
      "epoch": 2.9175913015586232,
      "grad_norm": 0.5334627628326416,
      "learning_rate": 4.192260300600278e-06,
      "loss": 0.0159,
      "step": 1782800
    },
    {
      "epoch": 2.917624031997277,
      "grad_norm": 0.4177047908306122,
      "learning_rate": 4.192194408386761e-06,
      "loss": 0.0127,
      "step": 1782820
    },
    {
      "epoch": 2.9176567624359304,
      "grad_norm": 0.37739449739456177,
      "learning_rate": 4.192128516173244e-06,
      "loss": 0.018,
      "step": 1782840
    },
    {
      "epoch": 2.9176894928745836,
      "grad_norm": 0.2223694771528244,
      "learning_rate": 4.192062623959727e-06,
      "loss": 0.0123,
      "step": 1782860
    },
    {
      "epoch": 2.9177222233132367,
      "grad_norm": 0.7444509267807007,
      "learning_rate": 4.19199673174621e-06,
      "loss": 0.0166,
      "step": 1782880
    },
    {
      "epoch": 2.9177549537518903,
      "grad_norm": 1.1366037130355835,
      "learning_rate": 4.191930839532693e-06,
      "loss": 0.0115,
      "step": 1782900
    },
    {
      "epoch": 2.9177876841905435,
      "grad_norm": 0.19501200318336487,
      "learning_rate": 4.191864947319176e-06,
      "loss": 0.0138,
      "step": 1782920
    },
    {
      "epoch": 2.9178204146291966,
      "grad_norm": 0.21101489663124084,
      "learning_rate": 4.191799055105659e-06,
      "loss": 0.0096,
      "step": 1782940
    },
    {
      "epoch": 2.91785314506785,
      "grad_norm": 0.6278448104858398,
      "learning_rate": 4.1917331628921415e-06,
      "loss": 0.0132,
      "step": 1782960
    },
    {
      "epoch": 2.917885875506504,
      "grad_norm": 0.33887621760368347,
      "learning_rate": 4.191667270678624e-06,
      "loss": 0.0149,
      "step": 1782980
    },
    {
      "epoch": 2.917918605945157,
      "grad_norm": 0.6542844176292419,
      "learning_rate": 4.191601378465107e-06,
      "loss": 0.0182,
      "step": 1783000
    },
    {
      "epoch": 2.91795133638381,
      "grad_norm": 0.2408216893672943,
      "learning_rate": 4.19153548625159e-06,
      "loss": 0.0123,
      "step": 1783020
    },
    {
      "epoch": 2.9179840668224637,
      "grad_norm": 0.1939871609210968,
      "learning_rate": 4.1914695940380725e-06,
      "loss": 0.0104,
      "step": 1783040
    },
    {
      "epoch": 2.918016797261117,
      "grad_norm": 0.4925703704357147,
      "learning_rate": 4.191403701824556e-06,
      "loss": 0.0157,
      "step": 1783060
    },
    {
      "epoch": 2.91804952769977,
      "grad_norm": 0.1721958965063095,
      "learning_rate": 4.191337809611039e-06,
      "loss": 0.0084,
      "step": 1783080
    },
    {
      "epoch": 2.9180822581384236,
      "grad_norm": 0.5933972597122192,
      "learning_rate": 4.1912719173975216e-06,
      "loss": 0.0128,
      "step": 1783100
    },
    {
      "epoch": 2.918114988577077,
      "grad_norm": 0.10214024782180786,
      "learning_rate": 4.191206025184004e-06,
      "loss": 0.0092,
      "step": 1783120
    },
    {
      "epoch": 2.9181477190157303,
      "grad_norm": 0.517587423324585,
      "learning_rate": 4.191140132970487e-06,
      "loss": 0.0173,
      "step": 1783140
    },
    {
      "epoch": 2.9181804494543835,
      "grad_norm": 0.2952963411808014,
      "learning_rate": 4.19107424075697e-06,
      "loss": 0.0103,
      "step": 1783160
    },
    {
      "epoch": 2.918213179893037,
      "grad_norm": 0.4913173317909241,
      "learning_rate": 4.1910083485434525e-06,
      "loss": 0.0147,
      "step": 1783180
    },
    {
      "epoch": 2.9182459103316902,
      "grad_norm": 0.20474092662334442,
      "learning_rate": 4.190942456329935e-06,
      "loss": 0.0189,
      "step": 1783200
    },
    {
      "epoch": 2.9182786407703434,
      "grad_norm": 0.3579270541667938,
      "learning_rate": 4.190876564116419e-06,
      "loss": 0.0092,
      "step": 1783220
    },
    {
      "epoch": 2.918311371208997,
      "grad_norm": 0.18178626894950867,
      "learning_rate": 4.1908106719029016e-06,
      "loss": 0.0158,
      "step": 1783240
    },
    {
      "epoch": 2.91834410164765,
      "grad_norm": 0.25367629528045654,
      "learning_rate": 4.190744779689384e-06,
      "loss": 0.0094,
      "step": 1783260
    },
    {
      "epoch": 2.9183768320863037,
      "grad_norm": 0.3901791572570801,
      "learning_rate": 4.190678887475868e-06,
      "loss": 0.0133,
      "step": 1783280
    },
    {
      "epoch": 2.918409562524957,
      "grad_norm": 0.4353705942630768,
      "learning_rate": 4.190612995262351e-06,
      "loss": 0.0111,
      "step": 1783300
    },
    {
      "epoch": 2.9184422929636105,
      "grad_norm": 0.8892282843589783,
      "learning_rate": 4.190547103048833e-06,
      "loss": 0.0149,
      "step": 1783320
    },
    {
      "epoch": 2.9184750234022636,
      "grad_norm": 0.4186854958534241,
      "learning_rate": 4.190481210835316e-06,
      "loss": 0.0142,
      "step": 1783340
    },
    {
      "epoch": 2.9185077538409168,
      "grad_norm": 0.49837806820869446,
      "learning_rate": 4.190415318621799e-06,
      "loss": 0.0131,
      "step": 1783360
    },
    {
      "epoch": 2.9185404842795704,
      "grad_norm": 0.24182139337062836,
      "learning_rate": 4.190349426408282e-06,
      "loss": 0.0166,
      "step": 1783380
    },
    {
      "epoch": 2.9185732147182235,
      "grad_norm": 0.23637302219867706,
      "learning_rate": 4.190283534194764e-06,
      "loss": 0.0118,
      "step": 1783400
    },
    {
      "epoch": 2.918605945156877,
      "grad_norm": 0.4888017475605011,
      "learning_rate": 4.190217641981247e-06,
      "loss": 0.0109,
      "step": 1783420
    },
    {
      "epoch": 2.9186386755955303,
      "grad_norm": 0.31138545274734497,
      "learning_rate": 4.19015174976773e-06,
      "loss": 0.0118,
      "step": 1783440
    },
    {
      "epoch": 2.918671406034184,
      "grad_norm": 0.20262157917022705,
      "learning_rate": 4.190085857554213e-06,
      "loss": 0.0138,
      "step": 1783460
    },
    {
      "epoch": 2.918704136472837,
      "grad_norm": 0.14320386946201324,
      "learning_rate": 4.190019965340696e-06,
      "loss": 0.0143,
      "step": 1783480
    },
    {
      "epoch": 2.91873686691149,
      "grad_norm": 0.15681298077106476,
      "learning_rate": 4.189954073127179e-06,
      "loss": 0.0166,
      "step": 1783500
    },
    {
      "epoch": 2.9187695973501437,
      "grad_norm": 0.507304847240448,
      "learning_rate": 4.189888180913662e-06,
      "loss": 0.0139,
      "step": 1783520
    },
    {
      "epoch": 2.918802327788797,
      "grad_norm": 0.7539709806442261,
      "learning_rate": 4.189822288700144e-06,
      "loss": 0.0128,
      "step": 1783540
    },
    {
      "epoch": 2.9188350582274505,
      "grad_norm": 0.24428418278694153,
      "learning_rate": 4.189756396486627e-06,
      "loss": 0.0104,
      "step": 1783560
    },
    {
      "epoch": 2.9188677886661036,
      "grad_norm": 0.3780364990234375,
      "learning_rate": 4.189690504273111e-06,
      "loss": 0.0145,
      "step": 1783580
    },
    {
      "epoch": 2.9189005191047572,
      "grad_norm": 0.10708487778902054,
      "learning_rate": 4.1896246120595934e-06,
      "loss": 0.011,
      "step": 1783600
    },
    {
      "epoch": 2.9189332495434104,
      "grad_norm": 0.1724211722612381,
      "learning_rate": 4.189558719846076e-06,
      "loss": 0.0122,
      "step": 1783620
    },
    {
      "epoch": 2.9189659799820635,
      "grad_norm": 0.42600470781326294,
      "learning_rate": 4.189492827632559e-06,
      "loss": 0.0129,
      "step": 1783640
    },
    {
      "epoch": 2.918998710420717,
      "grad_norm": 0.29764312505722046,
      "learning_rate": 4.189426935419042e-06,
      "loss": 0.0123,
      "step": 1783660
    },
    {
      "epoch": 2.9190314408593703,
      "grad_norm": 0.2778506577014923,
      "learning_rate": 4.189361043205525e-06,
      "loss": 0.0152,
      "step": 1783680
    },
    {
      "epoch": 2.919064171298024,
      "grad_norm": 0.4811263084411621,
      "learning_rate": 4.189295150992008e-06,
      "loss": 0.0168,
      "step": 1783700
    },
    {
      "epoch": 2.919096901736677,
      "grad_norm": 0.23843584954738617,
      "learning_rate": 4.189229258778491e-06,
      "loss": 0.0252,
      "step": 1783720
    },
    {
      "epoch": 2.9191296321753306,
      "grad_norm": 0.4266532063484192,
      "learning_rate": 4.1891633665649735e-06,
      "loss": 0.0198,
      "step": 1783740
    },
    {
      "epoch": 2.9191623626139838,
      "grad_norm": 0.06455866247415543,
      "learning_rate": 4.189097474351456e-06,
      "loss": 0.0126,
      "step": 1783760
    },
    {
      "epoch": 2.919195093052637,
      "grad_norm": 0.38259357213974,
      "learning_rate": 4.189031582137939e-06,
      "loss": 0.0092,
      "step": 1783780
    },
    {
      "epoch": 2.9192278234912905,
      "grad_norm": 0.05511965975165367,
      "learning_rate": 4.188965689924422e-06,
      "loss": 0.0108,
      "step": 1783800
    },
    {
      "epoch": 2.9192605539299437,
      "grad_norm": 0.4316916763782501,
      "learning_rate": 4.188899797710904e-06,
      "loss": 0.0095,
      "step": 1783820
    },
    {
      "epoch": 2.9192932843685973,
      "grad_norm": 0.2773662209510803,
      "learning_rate": 4.188833905497387e-06,
      "loss": 0.018,
      "step": 1783840
    },
    {
      "epoch": 2.9193260148072504,
      "grad_norm": 0.8130611777305603,
      "learning_rate": 4.188768013283871e-06,
      "loss": 0.0111,
      "step": 1783860
    },
    {
      "epoch": 2.919358745245904,
      "grad_norm": 0.11374849081039429,
      "learning_rate": 4.1887021210703535e-06,
      "loss": 0.015,
      "step": 1783880
    },
    {
      "epoch": 2.919391475684557,
      "grad_norm": 0.20080864429473877,
      "learning_rate": 4.188636228856836e-06,
      "loss": 0.0093,
      "step": 1783900
    },
    {
      "epoch": 2.9194242061232103,
      "grad_norm": 0.34283164143562317,
      "learning_rate": 4.18857033664332e-06,
      "loss": 0.0109,
      "step": 1783920
    },
    {
      "epoch": 2.919456936561864,
      "grad_norm": 0.23268447816371918,
      "learning_rate": 4.1885044444298025e-06,
      "loss": 0.0125,
      "step": 1783940
    },
    {
      "epoch": 2.919489667000517,
      "grad_norm": 0.12082727998495102,
      "learning_rate": 4.188438552216285e-06,
      "loss": 0.0175,
      "step": 1783960
    },
    {
      "epoch": 2.9195223974391706,
      "grad_norm": 0.2768738865852356,
      "learning_rate": 4.188372660002768e-06,
      "loss": 0.0155,
      "step": 1783980
    },
    {
      "epoch": 2.919555127877824,
      "grad_norm": 0.569843590259552,
      "learning_rate": 4.188306767789251e-06,
      "loss": 0.0119,
      "step": 1784000
    },
    {
      "epoch": 2.9195878583164774,
      "grad_norm": 0.27195021510124207,
      "learning_rate": 4.1882408755757335e-06,
      "loss": 0.0119,
      "step": 1784020
    },
    {
      "epoch": 2.9196205887551305,
      "grad_norm": 0.458522230386734,
      "learning_rate": 4.188174983362216e-06,
      "loss": 0.0125,
      "step": 1784040
    },
    {
      "epoch": 2.9196533191937837,
      "grad_norm": 0.3654523193836212,
      "learning_rate": 4.188109091148699e-06,
      "loss": 0.0117,
      "step": 1784060
    },
    {
      "epoch": 2.9196860496324373,
      "grad_norm": 0.29427796602249146,
      "learning_rate": 4.1880431989351826e-06,
      "loss": 0.0122,
      "step": 1784080
    },
    {
      "epoch": 2.9197187800710904,
      "grad_norm": 0.2210300862789154,
      "learning_rate": 4.187977306721665e-06,
      "loss": 0.0136,
      "step": 1784100
    },
    {
      "epoch": 2.9197515105097436,
      "grad_norm": 0.2057771533727646,
      "learning_rate": 4.187911414508148e-06,
      "loss": 0.0136,
      "step": 1784120
    },
    {
      "epoch": 2.919784240948397,
      "grad_norm": 0.20188525319099426,
      "learning_rate": 4.187845522294631e-06,
      "loss": 0.0114,
      "step": 1784140
    },
    {
      "epoch": 2.9198169713870508,
      "grad_norm": 0.1088828295469284,
      "learning_rate": 4.1877796300811135e-06,
      "loss": 0.0103,
      "step": 1784160
    },
    {
      "epoch": 2.919849701825704,
      "grad_norm": 0.34158092737197876,
      "learning_rate": 4.187713737867596e-06,
      "loss": 0.0095,
      "step": 1784180
    },
    {
      "epoch": 2.919882432264357,
      "grad_norm": 1.6516778469085693,
      "learning_rate": 4.187647845654079e-06,
      "loss": 0.0201,
      "step": 1784200
    },
    {
      "epoch": 2.9199151627030107,
      "grad_norm": 0.4303613305091858,
      "learning_rate": 4.187581953440562e-06,
      "loss": 0.0075,
      "step": 1784220
    },
    {
      "epoch": 2.919947893141664,
      "grad_norm": 0.3516009449958801,
      "learning_rate": 4.1875160612270445e-06,
      "loss": 0.0112,
      "step": 1784240
    },
    {
      "epoch": 2.919980623580317,
      "grad_norm": 0.2528732419013977,
      "learning_rate": 4.187450169013528e-06,
      "loss": 0.0108,
      "step": 1784260
    },
    {
      "epoch": 2.9200133540189706,
      "grad_norm": 0.6793666481971741,
      "learning_rate": 4.187384276800011e-06,
      "loss": 0.0134,
      "step": 1784280
    },
    {
      "epoch": 2.920046084457624,
      "grad_norm": 0.71908038854599,
      "learning_rate": 4.1873183845864936e-06,
      "loss": 0.0099,
      "step": 1784300
    },
    {
      "epoch": 2.9200788148962773,
      "grad_norm": 0.19002807140350342,
      "learning_rate": 4.187252492372977e-06,
      "loss": 0.016,
      "step": 1784320
    },
    {
      "epoch": 2.9201115453349304,
      "grad_norm": 0.11353914439678192,
      "learning_rate": 4.18718660015946e-06,
      "loss": 0.0075,
      "step": 1784340
    },
    {
      "epoch": 2.920144275773584,
      "grad_norm": 0.7577897906303406,
      "learning_rate": 4.187120707945943e-06,
      "loss": 0.0183,
      "step": 1784360
    },
    {
      "epoch": 2.920177006212237,
      "grad_norm": 0.3043482005596161,
      "learning_rate": 4.187054815732425e-06,
      "loss": 0.0196,
      "step": 1784380
    },
    {
      "epoch": 2.9202097366508903,
      "grad_norm": 0.45071741938591003,
      "learning_rate": 4.186988923518908e-06,
      "loss": 0.0122,
      "step": 1784400
    },
    {
      "epoch": 2.920242467089544,
      "grad_norm": 0.49812790751457214,
      "learning_rate": 4.186923031305391e-06,
      "loss": 0.0153,
      "step": 1784420
    },
    {
      "epoch": 2.9202751975281975,
      "grad_norm": 0.630676805973053,
      "learning_rate": 4.186857139091874e-06,
      "loss": 0.0116,
      "step": 1784440
    },
    {
      "epoch": 2.9203079279668507,
      "grad_norm": 0.330702543258667,
      "learning_rate": 4.186791246878356e-06,
      "loss": 0.015,
      "step": 1784460
    },
    {
      "epoch": 2.920340658405504,
      "grad_norm": 0.17313411831855774,
      "learning_rate": 4.18672535466484e-06,
      "loss": 0.0134,
      "step": 1784480
    },
    {
      "epoch": 2.9203733888441574,
      "grad_norm": 0.3500036299228668,
      "learning_rate": 4.186659462451323e-06,
      "loss": 0.0119,
      "step": 1784500
    },
    {
      "epoch": 2.9204061192828106,
      "grad_norm": 0.2637309730052948,
      "learning_rate": 4.186593570237805e-06,
      "loss": 0.0153,
      "step": 1784520
    },
    {
      "epoch": 2.9204388497214637,
      "grad_norm": 0.24525165557861328,
      "learning_rate": 4.186527678024288e-06,
      "loss": 0.0147,
      "step": 1784540
    },
    {
      "epoch": 2.9204715801601173,
      "grad_norm": 0.1905091106891632,
      "learning_rate": 4.186461785810771e-06,
      "loss": 0.0068,
      "step": 1784560
    },
    {
      "epoch": 2.920504310598771,
      "grad_norm": 0.1197035163640976,
      "learning_rate": 4.186395893597254e-06,
      "loss": 0.0179,
      "step": 1784580
    },
    {
      "epoch": 2.920537041037424,
      "grad_norm": 0.2851349115371704,
      "learning_rate": 4.186330001383736e-06,
      "loss": 0.0157,
      "step": 1784600
    },
    {
      "epoch": 2.920569771476077,
      "grad_norm": 0.22673986852169037,
      "learning_rate": 4.18626410917022e-06,
      "loss": 0.0144,
      "step": 1784620
    },
    {
      "epoch": 2.920602501914731,
      "grad_norm": 0.3794754445552826,
      "learning_rate": 4.186198216956703e-06,
      "loss": 0.0168,
      "step": 1784640
    },
    {
      "epoch": 2.920635232353384,
      "grad_norm": 0.09869647026062012,
      "learning_rate": 4.186132324743185e-06,
      "loss": 0.014,
      "step": 1784660
    },
    {
      "epoch": 2.920667962792037,
      "grad_norm": 0.18834352493286133,
      "learning_rate": 4.186066432529668e-06,
      "loss": 0.0134,
      "step": 1784680
    },
    {
      "epoch": 2.9207006932306907,
      "grad_norm": 0.9416751861572266,
      "learning_rate": 4.186000540316152e-06,
      "loss": 0.0116,
      "step": 1784700
    },
    {
      "epoch": 2.920733423669344,
      "grad_norm": 0.3593582808971405,
      "learning_rate": 4.1859346481026345e-06,
      "loss": 0.0155,
      "step": 1784720
    },
    {
      "epoch": 2.9207661541079974,
      "grad_norm": 0.5033482909202576,
      "learning_rate": 4.185868755889117e-06,
      "loss": 0.0116,
      "step": 1784740
    },
    {
      "epoch": 2.9207988845466506,
      "grad_norm": 0.6365482807159424,
      "learning_rate": 4.1858028636756e-06,
      "loss": 0.0131,
      "step": 1784760
    },
    {
      "epoch": 2.920831614985304,
      "grad_norm": 0.12945277988910675,
      "learning_rate": 4.185736971462083e-06,
      "loss": 0.0124,
      "step": 1784780
    },
    {
      "epoch": 2.9208643454239573,
      "grad_norm": 0.24702203273773193,
      "learning_rate": 4.1856710792485654e-06,
      "loss": 0.0132,
      "step": 1784800
    },
    {
      "epoch": 2.9208970758626105,
      "grad_norm": 0.2656295597553253,
      "learning_rate": 4.185605187035048e-06,
      "loss": 0.0135,
      "step": 1784820
    },
    {
      "epoch": 2.920929806301264,
      "grad_norm": 0.2863740622997284,
      "learning_rate": 4.185539294821531e-06,
      "loss": 0.0122,
      "step": 1784840
    },
    {
      "epoch": 2.9209625367399172,
      "grad_norm": 0.186947301030159,
      "learning_rate": 4.185473402608014e-06,
      "loss": 0.0135,
      "step": 1784860
    },
    {
      "epoch": 2.920995267178571,
      "grad_norm": 0.47215956449508667,
      "learning_rate": 4.185407510394497e-06,
      "loss": 0.0154,
      "step": 1784880
    },
    {
      "epoch": 2.921027997617224,
      "grad_norm": 1.066989541053772,
      "learning_rate": 4.18534161818098e-06,
      "loss": 0.0153,
      "step": 1784900
    },
    {
      "epoch": 2.9210607280558776,
      "grad_norm": 0.48736023902893066,
      "learning_rate": 4.185275725967463e-06,
      "loss": 0.0158,
      "step": 1784920
    },
    {
      "epoch": 2.9210934584945307,
      "grad_norm": 0.49236324429512024,
      "learning_rate": 4.1852098337539455e-06,
      "loss": 0.0173,
      "step": 1784940
    },
    {
      "epoch": 2.921126188933184,
      "grad_norm": 0.5846564173698425,
      "learning_rate": 4.185143941540428e-06,
      "loss": 0.0183,
      "step": 1784960
    },
    {
      "epoch": 2.9211589193718375,
      "grad_norm": 0.34552934765815735,
      "learning_rate": 4.185078049326912e-06,
      "loss": 0.0112,
      "step": 1784980
    },
    {
      "epoch": 2.9211916498104906,
      "grad_norm": 0.4259784519672394,
      "learning_rate": 4.1850121571133945e-06,
      "loss": 0.015,
      "step": 1785000
    },
    {
      "epoch": 2.921224380249144,
      "grad_norm": 0.22136008739471436,
      "learning_rate": 4.184946264899877e-06,
      "loss": 0.0117,
      "step": 1785020
    },
    {
      "epoch": 2.9212571106877974,
      "grad_norm": 0.20592960715293884,
      "learning_rate": 4.18488037268636e-06,
      "loss": 0.0114,
      "step": 1785040
    },
    {
      "epoch": 2.921289841126451,
      "grad_norm": 0.5155588984489441,
      "learning_rate": 4.184814480472843e-06,
      "loss": 0.0118,
      "step": 1785060
    },
    {
      "epoch": 2.921322571565104,
      "grad_norm": 0.6999818682670593,
      "learning_rate": 4.1847485882593255e-06,
      "loss": 0.0203,
      "step": 1785080
    },
    {
      "epoch": 2.9213553020037573,
      "grad_norm": 0.25908881425857544,
      "learning_rate": 4.184682696045809e-06,
      "loss": 0.0108,
      "step": 1785100
    },
    {
      "epoch": 2.921388032442411,
      "grad_norm": 0.6288278698921204,
      "learning_rate": 4.184616803832292e-06,
      "loss": 0.0108,
      "step": 1785120
    },
    {
      "epoch": 2.921420762881064,
      "grad_norm": 0.11034532636404037,
      "learning_rate": 4.1845509116187746e-06,
      "loss": 0.0114,
      "step": 1785140
    },
    {
      "epoch": 2.9214534933197176,
      "grad_norm": 0.20810338854789734,
      "learning_rate": 4.184485019405257e-06,
      "loss": 0.0078,
      "step": 1785160
    },
    {
      "epoch": 2.9214862237583707,
      "grad_norm": 0.4595070779323578,
      "learning_rate": 4.18441912719174e-06,
      "loss": 0.0197,
      "step": 1785180
    },
    {
      "epoch": 2.9215189541970243,
      "grad_norm": 0.4727858901023865,
      "learning_rate": 4.184353234978223e-06,
      "loss": 0.0174,
      "step": 1785200
    },
    {
      "epoch": 2.9215516846356775,
      "grad_norm": 0.5370036363601685,
      "learning_rate": 4.1842873427647055e-06,
      "loss": 0.0095,
      "step": 1785220
    },
    {
      "epoch": 2.9215844150743306,
      "grad_norm": 0.4766846299171448,
      "learning_rate": 4.184221450551188e-06,
      "loss": 0.0109,
      "step": 1785240
    },
    {
      "epoch": 2.9216171455129842,
      "grad_norm": 0.4958396255970001,
      "learning_rate": 4.184155558337671e-06,
      "loss": 0.0153,
      "step": 1785260
    },
    {
      "epoch": 2.9216498759516374,
      "grad_norm": 1.2414538860321045,
      "learning_rate": 4.184089666124155e-06,
      "loss": 0.0193,
      "step": 1785280
    },
    {
      "epoch": 2.921682606390291,
      "grad_norm": 0.12226201593875885,
      "learning_rate": 4.184023773910637e-06,
      "loss": 0.0157,
      "step": 1785300
    },
    {
      "epoch": 2.921715336828944,
      "grad_norm": 0.28033483028411865,
      "learning_rate": 4.18395788169712e-06,
      "loss": 0.0106,
      "step": 1785320
    },
    {
      "epoch": 2.9217480672675977,
      "grad_norm": 0.7279047966003418,
      "learning_rate": 4.183891989483604e-06,
      "loss": 0.0144,
      "step": 1785340
    },
    {
      "epoch": 2.921780797706251,
      "grad_norm": 0.3251136243343353,
      "learning_rate": 4.183826097270086e-06,
      "loss": 0.015,
      "step": 1785360
    },
    {
      "epoch": 2.921813528144904,
      "grad_norm": 0.2802712321281433,
      "learning_rate": 4.183760205056569e-06,
      "loss": 0.0093,
      "step": 1785380
    },
    {
      "epoch": 2.9218462585835576,
      "grad_norm": 0.430759996175766,
      "learning_rate": 4.183694312843052e-06,
      "loss": 0.0113,
      "step": 1785400
    },
    {
      "epoch": 2.9218789890222108,
      "grad_norm": 0.3964177668094635,
      "learning_rate": 4.183628420629535e-06,
      "loss": 0.0132,
      "step": 1785420
    },
    {
      "epoch": 2.9219117194608644,
      "grad_norm": 0.08725561946630478,
      "learning_rate": 4.183562528416017e-06,
      "loss": 0.0093,
      "step": 1785440
    },
    {
      "epoch": 2.9219444498995175,
      "grad_norm": 0.19978435337543488,
      "learning_rate": 4.1834966362025e-06,
      "loss": 0.0083,
      "step": 1785460
    },
    {
      "epoch": 2.921977180338171,
      "grad_norm": 0.3094119727611542,
      "learning_rate": 4.183430743988983e-06,
      "loss": 0.0205,
      "step": 1785480
    },
    {
      "epoch": 2.9220099107768243,
      "grad_norm": 0.2902311682701111,
      "learning_rate": 4.183364851775466e-06,
      "loss": 0.0172,
      "step": 1785500
    },
    {
      "epoch": 2.9220426412154774,
      "grad_norm": 0.177719846367836,
      "learning_rate": 4.183298959561949e-06,
      "loss": 0.0123,
      "step": 1785520
    },
    {
      "epoch": 2.922075371654131,
      "grad_norm": 0.18187662959098816,
      "learning_rate": 4.183233067348432e-06,
      "loss": 0.0143,
      "step": 1785540
    },
    {
      "epoch": 2.922108102092784,
      "grad_norm": 0.8236603736877441,
      "learning_rate": 4.183167175134915e-06,
      "loss": 0.015,
      "step": 1785560
    },
    {
      "epoch": 2.9221408325314377,
      "grad_norm": 0.6069349646568298,
      "learning_rate": 4.183101282921397e-06,
      "loss": 0.0133,
      "step": 1785580
    },
    {
      "epoch": 2.922173562970091,
      "grad_norm": 0.23945416510105133,
      "learning_rate": 4.18303539070788e-06,
      "loss": 0.0133,
      "step": 1785600
    },
    {
      "epoch": 2.9222062934087445,
      "grad_norm": 0.07392796874046326,
      "learning_rate": 4.182969498494363e-06,
      "loss": 0.0134,
      "step": 1785620
    },
    {
      "epoch": 2.9222390238473976,
      "grad_norm": 0.8222244381904602,
      "learning_rate": 4.182903606280846e-06,
      "loss": 0.0117,
      "step": 1785640
    },
    {
      "epoch": 2.922271754286051,
      "grad_norm": 0.1190829947590828,
      "learning_rate": 4.182837714067328e-06,
      "loss": 0.0142,
      "step": 1785660
    },
    {
      "epoch": 2.9223044847247044,
      "grad_norm": 0.518292248249054,
      "learning_rate": 4.182771821853812e-06,
      "loss": 0.0179,
      "step": 1785680
    },
    {
      "epoch": 2.9223372151633575,
      "grad_norm": 0.3387697637081146,
      "learning_rate": 4.182705929640295e-06,
      "loss": 0.0153,
      "step": 1785700
    },
    {
      "epoch": 2.9223699456020107,
      "grad_norm": 0.25069236755371094,
      "learning_rate": 4.182640037426778e-06,
      "loss": 0.013,
      "step": 1785720
    },
    {
      "epoch": 2.9224026760406643,
      "grad_norm": 0.11861004680395126,
      "learning_rate": 4.182574145213261e-06,
      "loss": 0.013,
      "step": 1785740
    },
    {
      "epoch": 2.922435406479318,
      "grad_norm": 0.10830798000097275,
      "learning_rate": 4.182508252999744e-06,
      "loss": 0.0109,
      "step": 1785760
    },
    {
      "epoch": 2.922468136917971,
      "grad_norm": 0.24528852105140686,
      "learning_rate": 4.1824423607862265e-06,
      "loss": 0.0115,
      "step": 1785780
    },
    {
      "epoch": 2.922500867356624,
      "grad_norm": 0.21718907356262207,
      "learning_rate": 4.182376468572709e-06,
      "loss": 0.0099,
      "step": 1785800
    },
    {
      "epoch": 2.9225335977952778,
      "grad_norm": 0.6159029603004456,
      "learning_rate": 4.182310576359192e-06,
      "loss": 0.0151,
      "step": 1785820
    },
    {
      "epoch": 2.922566328233931,
      "grad_norm": 0.2446395605802536,
      "learning_rate": 4.182244684145675e-06,
      "loss": 0.0154,
      "step": 1785840
    },
    {
      "epoch": 2.922599058672584,
      "grad_norm": 0.4083067774772644,
      "learning_rate": 4.182178791932157e-06,
      "loss": 0.009,
      "step": 1785860
    },
    {
      "epoch": 2.9226317891112377,
      "grad_norm": 0.4110250174999237,
      "learning_rate": 4.18211289971864e-06,
      "loss": 0.0147,
      "step": 1785880
    },
    {
      "epoch": 2.9226645195498913,
      "grad_norm": 0.15458816289901733,
      "learning_rate": 4.182047007505124e-06,
      "loss": 0.0121,
      "step": 1785900
    },
    {
      "epoch": 2.9226972499885444,
      "grad_norm": 0.4433061480522156,
      "learning_rate": 4.1819811152916065e-06,
      "loss": 0.012,
      "step": 1785920
    },
    {
      "epoch": 2.9227299804271976,
      "grad_norm": 0.7482021450996399,
      "learning_rate": 4.181915223078089e-06,
      "loss": 0.012,
      "step": 1785940
    },
    {
      "epoch": 2.922762710865851,
      "grad_norm": 0.6384840607643127,
      "learning_rate": 4.181849330864572e-06,
      "loss": 0.0121,
      "step": 1785960
    },
    {
      "epoch": 2.9227954413045043,
      "grad_norm": 0.516314685344696,
      "learning_rate": 4.181783438651055e-06,
      "loss": 0.0099,
      "step": 1785980
    },
    {
      "epoch": 2.9228281717431575,
      "grad_norm": 0.22914807498455048,
      "learning_rate": 4.1817175464375374e-06,
      "loss": 0.0084,
      "step": 1786000
    },
    {
      "epoch": 2.922860902181811,
      "grad_norm": 0.40518465638160706,
      "learning_rate": 4.18165165422402e-06,
      "loss": 0.0134,
      "step": 1786020
    },
    {
      "epoch": 2.9228936326204646,
      "grad_norm": 0.616602897644043,
      "learning_rate": 4.181585762010504e-06,
      "loss": 0.0168,
      "step": 1786040
    },
    {
      "epoch": 2.922926363059118,
      "grad_norm": 0.47555479407310486,
      "learning_rate": 4.1815198697969865e-06,
      "loss": 0.0151,
      "step": 1786060
    },
    {
      "epoch": 2.922959093497771,
      "grad_norm": 0.24688833951950073,
      "learning_rate": 4.181453977583469e-06,
      "loss": 0.0146,
      "step": 1786080
    },
    {
      "epoch": 2.9229918239364245,
      "grad_norm": 0.18849876523017883,
      "learning_rate": 4.181388085369952e-06,
      "loss": 0.0137,
      "step": 1786100
    },
    {
      "epoch": 2.9230245543750777,
      "grad_norm": 0.12328912317752838,
      "learning_rate": 4.181322193156436e-06,
      "loss": 0.0085,
      "step": 1786120
    },
    {
      "epoch": 2.923057284813731,
      "grad_norm": 0.20180170238018036,
      "learning_rate": 4.181256300942918e-06,
      "loss": 0.0167,
      "step": 1786140
    },
    {
      "epoch": 2.9230900152523844,
      "grad_norm": 0.24913261830806732,
      "learning_rate": 4.181190408729401e-06,
      "loss": 0.012,
      "step": 1786160
    },
    {
      "epoch": 2.923122745691038,
      "grad_norm": 0.6430767774581909,
      "learning_rate": 4.181124516515884e-06,
      "loss": 0.0179,
      "step": 1786180
    },
    {
      "epoch": 2.923155476129691,
      "grad_norm": 0.2797941267490387,
      "learning_rate": 4.1810586243023665e-06,
      "loss": 0.0158,
      "step": 1786200
    },
    {
      "epoch": 2.9231882065683443,
      "grad_norm": 0.40369439125061035,
      "learning_rate": 4.180992732088849e-06,
      "loss": 0.0157,
      "step": 1786220
    },
    {
      "epoch": 2.923220937006998,
      "grad_norm": 0.2621534466743469,
      "learning_rate": 4.180926839875332e-06,
      "loss": 0.0111,
      "step": 1786240
    },
    {
      "epoch": 2.923253667445651,
      "grad_norm": 0.3369009494781494,
      "learning_rate": 4.180860947661815e-06,
      "loss": 0.0173,
      "step": 1786260
    },
    {
      "epoch": 2.923286397884304,
      "grad_norm": 0.950329601764679,
      "learning_rate": 4.1807950554482975e-06,
      "loss": 0.0108,
      "step": 1786280
    },
    {
      "epoch": 2.923319128322958,
      "grad_norm": 0.5604227185249329,
      "learning_rate": 4.180729163234781e-06,
      "loss": 0.0123,
      "step": 1786300
    },
    {
      "epoch": 2.923351858761611,
      "grad_norm": 0.2193985879421234,
      "learning_rate": 4.180663271021264e-06,
      "loss": 0.0114,
      "step": 1786320
    },
    {
      "epoch": 2.9233845892002646,
      "grad_norm": 0.4489893317222595,
      "learning_rate": 4.1805973788077466e-06,
      "loss": 0.0139,
      "step": 1786340
    },
    {
      "epoch": 2.9234173196389177,
      "grad_norm": 0.1633676290512085,
      "learning_rate": 4.180531486594229e-06,
      "loss": 0.0125,
      "step": 1786360
    },
    {
      "epoch": 2.9234500500775713,
      "grad_norm": 0.12095242738723755,
      "learning_rate": 4.180465594380713e-06,
      "loss": 0.0105,
      "step": 1786380
    },
    {
      "epoch": 2.9234827805162245,
      "grad_norm": 0.5907931923866272,
      "learning_rate": 4.180399702167196e-06,
      "loss": 0.0144,
      "step": 1786400
    },
    {
      "epoch": 2.9235155109548776,
      "grad_norm": 0.3608636260032654,
      "learning_rate": 4.180333809953678e-06,
      "loss": 0.0126,
      "step": 1786420
    },
    {
      "epoch": 2.923548241393531,
      "grad_norm": 0.46765950322151184,
      "learning_rate": 4.180267917740161e-06,
      "loss": 0.0126,
      "step": 1786440
    },
    {
      "epoch": 2.9235809718321843,
      "grad_norm": 0.1910180002450943,
      "learning_rate": 4.180202025526644e-06,
      "loss": 0.0116,
      "step": 1786460
    },
    {
      "epoch": 2.923613702270838,
      "grad_norm": 0.3660251498222351,
      "learning_rate": 4.180136133313127e-06,
      "loss": 0.0121,
      "step": 1786480
    },
    {
      "epoch": 2.923646432709491,
      "grad_norm": 0.34528812766075134,
      "learning_rate": 4.180070241099609e-06,
      "loss": 0.0151,
      "step": 1786500
    },
    {
      "epoch": 2.9236791631481447,
      "grad_norm": 0.54942387342453,
      "learning_rate": 4.180004348886093e-06,
      "loss": 0.0108,
      "step": 1786520
    },
    {
      "epoch": 2.923711893586798,
      "grad_norm": 0.6689001321792603,
      "learning_rate": 4.179938456672576e-06,
      "loss": 0.0163,
      "step": 1786540
    },
    {
      "epoch": 2.923744624025451,
      "grad_norm": 0.2832534909248352,
      "learning_rate": 4.179872564459058e-06,
      "loss": 0.0167,
      "step": 1786560
    },
    {
      "epoch": 2.9237773544641046,
      "grad_norm": 0.563193142414093,
      "learning_rate": 4.179806672245541e-06,
      "loss": 0.0168,
      "step": 1786580
    },
    {
      "epoch": 2.9238100849027577,
      "grad_norm": 0.21704304218292236,
      "learning_rate": 4.179740780032024e-06,
      "loss": 0.0175,
      "step": 1786600
    },
    {
      "epoch": 2.9238428153414113,
      "grad_norm": 0.4346061646938324,
      "learning_rate": 4.179674887818507e-06,
      "loss": 0.0112,
      "step": 1786620
    },
    {
      "epoch": 2.9238755457800645,
      "grad_norm": 0.15549638867378235,
      "learning_rate": 4.179608995604989e-06,
      "loss": 0.0095,
      "step": 1786640
    },
    {
      "epoch": 2.923908276218718,
      "grad_norm": 0.3917368948459625,
      "learning_rate": 4.179543103391472e-06,
      "loss": 0.0197,
      "step": 1786660
    },
    {
      "epoch": 2.9239410066573712,
      "grad_norm": 0.5831327438354492,
      "learning_rate": 4.179477211177955e-06,
      "loss": 0.0209,
      "step": 1786680
    },
    {
      "epoch": 2.9239737370960244,
      "grad_norm": 0.13136479258537292,
      "learning_rate": 4.179411318964438e-06,
      "loss": 0.022,
      "step": 1786700
    },
    {
      "epoch": 2.924006467534678,
      "grad_norm": 0.03828493133187294,
      "learning_rate": 4.179345426750921e-06,
      "loss": 0.0168,
      "step": 1786720
    },
    {
      "epoch": 2.924039197973331,
      "grad_norm": 0.21310856938362122,
      "learning_rate": 4.179279534537404e-06,
      "loss": 0.0157,
      "step": 1786740
    },
    {
      "epoch": 2.9240719284119847,
      "grad_norm": Infinity,
      "learning_rate": 4.1792136423238875e-06,
      "loss": 0.0132,
      "step": 1786760
    },
    {
      "epoch": 2.924104658850638,
      "grad_norm": 0.9278591275215149,
      "learning_rate": 4.17914775011037e-06,
      "loss": 0.0133,
      "step": 1786780
    },
    {
      "epoch": 2.9241373892892915,
      "grad_norm": 0.05779479444026947,
      "learning_rate": 4.179081857896853e-06,
      "loss": 0.0136,
      "step": 1786800
    },
    {
      "epoch": 2.9241701197279446,
      "grad_norm": 0.5135089159011841,
      "learning_rate": 4.179015965683336e-06,
      "loss": 0.0093,
      "step": 1786820
    },
    {
      "epoch": 2.9242028501665978,
      "grad_norm": 0.4238077700138092,
      "learning_rate": 4.1789500734698184e-06,
      "loss": 0.0137,
      "step": 1786840
    },
    {
      "epoch": 2.9242355806052514,
      "grad_norm": 0.5928794145584106,
      "learning_rate": 4.178884181256301e-06,
      "loss": 0.0101,
      "step": 1786860
    },
    {
      "epoch": 2.9242683110439045,
      "grad_norm": 0.5088955760002136,
      "learning_rate": 4.178818289042784e-06,
      "loss": 0.0176,
      "step": 1786880
    },
    {
      "epoch": 2.924301041482558,
      "grad_norm": 0.46737033128738403,
      "learning_rate": 4.178752396829267e-06,
      "loss": 0.0216,
      "step": 1786900
    },
    {
      "epoch": 2.9243337719212112,
      "grad_norm": 0.30863696336746216,
      "learning_rate": 4.17868650461575e-06,
      "loss": 0.0142,
      "step": 1786920
    },
    {
      "epoch": 2.924366502359865,
      "grad_norm": 0.5551717281341553,
      "learning_rate": 4.178620612402233e-06,
      "loss": 0.0122,
      "step": 1786940
    },
    {
      "epoch": 2.924399232798518,
      "grad_norm": 0.22531816363334656,
      "learning_rate": 4.178554720188716e-06,
      "loss": 0.0134,
      "step": 1786960
    },
    {
      "epoch": 2.924431963237171,
      "grad_norm": 0.24053658545017242,
      "learning_rate": 4.1784888279751985e-06,
      "loss": 0.0108,
      "step": 1786980
    },
    {
      "epoch": 2.9244646936758247,
      "grad_norm": 0.04848194122314453,
      "learning_rate": 4.178422935761681e-06,
      "loss": 0.0107,
      "step": 1787000
    },
    {
      "epoch": 2.924497424114478,
      "grad_norm": 0.1391669362783432,
      "learning_rate": 4.178357043548164e-06,
      "loss": 0.0115,
      "step": 1787020
    },
    {
      "epoch": 2.9245301545531315,
      "grad_norm": 0.22362671792507172,
      "learning_rate": 4.178291151334647e-06,
      "loss": 0.0057,
      "step": 1787040
    },
    {
      "epoch": 2.9245628849917846,
      "grad_norm": 0.20107580721378326,
      "learning_rate": 4.1782252591211294e-06,
      "loss": 0.0156,
      "step": 1787060
    },
    {
      "epoch": 2.9245956154304382,
      "grad_norm": 0.19830110669136047,
      "learning_rate": 4.178159366907613e-06,
      "loss": 0.0143,
      "step": 1787080
    },
    {
      "epoch": 2.9246283458690914,
      "grad_norm": 0.354402631521225,
      "learning_rate": 4.178093474694096e-06,
      "loss": 0.0121,
      "step": 1787100
    },
    {
      "epoch": 2.9246610763077445,
      "grad_norm": 0.2625587284564972,
      "learning_rate": 4.1780275824805785e-06,
      "loss": 0.0112,
      "step": 1787120
    },
    {
      "epoch": 2.924693806746398,
      "grad_norm": 0.38460463285446167,
      "learning_rate": 4.177961690267062e-06,
      "loss": 0.0082,
      "step": 1787140
    },
    {
      "epoch": 2.9247265371850513,
      "grad_norm": 0.41393017768859863,
      "learning_rate": 4.177895798053545e-06,
      "loss": 0.0159,
      "step": 1787160
    },
    {
      "epoch": 2.9247592676237044,
      "grad_norm": 0.02337373048067093,
      "learning_rate": 4.1778299058400276e-06,
      "loss": 0.0116,
      "step": 1787180
    },
    {
      "epoch": 2.924791998062358,
      "grad_norm": 0.17700894176959991,
      "learning_rate": 4.17776401362651e-06,
      "loss": 0.0112,
      "step": 1787200
    },
    {
      "epoch": 2.9248247285010116,
      "grad_norm": 0.4040015935897827,
      "learning_rate": 4.177698121412993e-06,
      "loss": 0.0167,
      "step": 1787220
    },
    {
      "epoch": 2.9248574589396648,
      "grad_norm": 0.04184595122933388,
      "learning_rate": 4.177632229199476e-06,
      "loss": 0.014,
      "step": 1787240
    },
    {
      "epoch": 2.924890189378318,
      "grad_norm": 0.24315550923347473,
      "learning_rate": 4.1775663369859585e-06,
      "loss": 0.0111,
      "step": 1787260
    },
    {
      "epoch": 2.9249229198169715,
      "grad_norm": 0.3227182924747467,
      "learning_rate": 4.177500444772441e-06,
      "loss": 0.013,
      "step": 1787280
    },
    {
      "epoch": 2.9249556502556247,
      "grad_norm": 0.06843654066324234,
      "learning_rate": 4.177434552558924e-06,
      "loss": 0.0094,
      "step": 1787300
    },
    {
      "epoch": 2.924988380694278,
      "grad_norm": 0.36542999744415283,
      "learning_rate": 4.177368660345408e-06,
      "loss": 0.0144,
      "step": 1787320
    },
    {
      "epoch": 2.9250211111329314,
      "grad_norm": 0.42912521958351135,
      "learning_rate": 4.17730276813189e-06,
      "loss": 0.0151,
      "step": 1787340
    },
    {
      "epoch": 2.925053841571585,
      "grad_norm": 0.2818131148815155,
      "learning_rate": 4.177236875918373e-06,
      "loss": 0.0111,
      "step": 1787360
    },
    {
      "epoch": 2.925086572010238,
      "grad_norm": 0.21041321754455566,
      "learning_rate": 4.177170983704856e-06,
      "loss": 0.0114,
      "step": 1787380
    },
    {
      "epoch": 2.9251193024488913,
      "grad_norm": 0.3128718137741089,
      "learning_rate": 4.1771050914913385e-06,
      "loss": 0.0148,
      "step": 1787400
    },
    {
      "epoch": 2.925152032887545,
      "grad_norm": 0.2911287844181061,
      "learning_rate": 4.177039199277821e-06,
      "loss": 0.0136,
      "step": 1787420
    },
    {
      "epoch": 2.925184763326198,
      "grad_norm": 0.5842868685722351,
      "learning_rate": 4.176973307064305e-06,
      "loss": 0.0194,
      "step": 1787440
    },
    {
      "epoch": 2.925217493764851,
      "grad_norm": 0.6020945310592651,
      "learning_rate": 4.176907414850788e-06,
      "loss": 0.0128,
      "step": 1787460
    },
    {
      "epoch": 2.925250224203505,
      "grad_norm": 0.4010666012763977,
      "learning_rate": 4.17684152263727e-06,
      "loss": 0.0128,
      "step": 1787480
    },
    {
      "epoch": 2.9252829546421584,
      "grad_norm": 0.817215085029602,
      "learning_rate": 4.176775630423753e-06,
      "loss": 0.0121,
      "step": 1787500
    },
    {
      "epoch": 2.9253156850808115,
      "grad_norm": 0.4798949062824249,
      "learning_rate": 4.176709738210236e-06,
      "loss": 0.0125,
      "step": 1787520
    },
    {
      "epoch": 2.9253484155194647,
      "grad_norm": 0.8627563714981079,
      "learning_rate": 4.176643845996719e-06,
      "loss": 0.0153,
      "step": 1787540
    },
    {
      "epoch": 2.9253811459581183,
      "grad_norm": 0.35159924626350403,
      "learning_rate": 4.176577953783202e-06,
      "loss": 0.0092,
      "step": 1787560
    },
    {
      "epoch": 2.9254138763967714,
      "grad_norm": 0.21197426319122314,
      "learning_rate": 4.176512061569685e-06,
      "loss": 0.0123,
      "step": 1787580
    },
    {
      "epoch": 2.9254466068354246,
      "grad_norm": 0.8495975732803345,
      "learning_rate": 4.176446169356168e-06,
      "loss": 0.0182,
      "step": 1787600
    },
    {
      "epoch": 2.925479337274078,
      "grad_norm": 0.6308068037033081,
      "learning_rate": 4.17638027714265e-06,
      "loss": 0.0144,
      "step": 1787620
    },
    {
      "epoch": 2.9255120677127318,
      "grad_norm": 0.2847950756549835,
      "learning_rate": 4.176314384929133e-06,
      "loss": 0.0153,
      "step": 1787640
    },
    {
      "epoch": 2.925544798151385,
      "grad_norm": 0.3321116268634796,
      "learning_rate": 4.176248492715616e-06,
      "loss": 0.0161,
      "step": 1787660
    },
    {
      "epoch": 2.925577528590038,
      "grad_norm": 0.3480375111103058,
      "learning_rate": 4.176182600502099e-06,
      "loss": 0.0137,
      "step": 1787680
    },
    {
      "epoch": 2.9256102590286917,
      "grad_norm": 0.5726408958435059,
      "learning_rate": 4.176116708288581e-06,
      "loss": 0.0173,
      "step": 1787700
    },
    {
      "epoch": 2.925642989467345,
      "grad_norm": 0.21999205648899078,
      "learning_rate": 4.176050816075065e-06,
      "loss": 0.0193,
      "step": 1787720
    },
    {
      "epoch": 2.925675719905998,
      "grad_norm": 0.13455672562122345,
      "learning_rate": 4.175984923861548e-06,
      "loss": 0.0098,
      "step": 1787740
    },
    {
      "epoch": 2.9257084503446515,
      "grad_norm": 0.48929306864738464,
      "learning_rate": 4.17591903164803e-06,
      "loss": 0.0155,
      "step": 1787760
    },
    {
      "epoch": 2.9257411807833047,
      "grad_norm": 0.1819426417350769,
      "learning_rate": 4.175853139434513e-06,
      "loss": 0.0153,
      "step": 1787780
    },
    {
      "epoch": 2.9257739112219583,
      "grad_norm": 0.3241082429885864,
      "learning_rate": 4.175787247220997e-06,
      "loss": 0.018,
      "step": 1787800
    },
    {
      "epoch": 2.9258066416606114,
      "grad_norm": 0.6293792128562927,
      "learning_rate": 4.1757213550074795e-06,
      "loss": 0.0186,
      "step": 1787820
    },
    {
      "epoch": 2.925839372099265,
      "grad_norm": 0.2368062138557434,
      "learning_rate": 4.175655462793962e-06,
      "loss": 0.0104,
      "step": 1787840
    },
    {
      "epoch": 2.925872102537918,
      "grad_norm": 0.121371790766716,
      "learning_rate": 4.175589570580445e-06,
      "loss": 0.0082,
      "step": 1787860
    },
    {
      "epoch": 2.9259048329765713,
      "grad_norm": 0.8603407144546509,
      "learning_rate": 4.175523678366928e-06,
      "loss": 0.0095,
      "step": 1787880
    },
    {
      "epoch": 2.925937563415225,
      "grad_norm": 0.2962249517440796,
      "learning_rate": 4.1754577861534104e-06,
      "loss": 0.0143,
      "step": 1787900
    },
    {
      "epoch": 2.925970293853878,
      "grad_norm": 0.17661583423614502,
      "learning_rate": 4.175391893939893e-06,
      "loss": 0.0107,
      "step": 1787920
    },
    {
      "epoch": 2.9260030242925317,
      "grad_norm": 0.31102174520492554,
      "learning_rate": 4.175326001726377e-06,
      "loss": 0.012,
      "step": 1787940
    },
    {
      "epoch": 2.926035754731185,
      "grad_norm": 0.35311469435691833,
      "learning_rate": 4.1752601095128595e-06,
      "loss": 0.014,
      "step": 1787960
    },
    {
      "epoch": 2.9260684851698384,
      "grad_norm": 0.2601105272769928,
      "learning_rate": 4.175194217299342e-06,
      "loss": 0.0071,
      "step": 1787980
    },
    {
      "epoch": 2.9261012156084916,
      "grad_norm": 0.35767242312431335,
      "learning_rate": 4.175128325085825e-06,
      "loss": 0.012,
      "step": 1788000
    },
    {
      "epoch": 2.9261339460471447,
      "grad_norm": 0.5796496868133545,
      "learning_rate": 4.175062432872308e-06,
      "loss": 0.0152,
      "step": 1788020
    },
    {
      "epoch": 2.9261666764857983,
      "grad_norm": 0.30802038311958313,
      "learning_rate": 4.1749965406587905e-06,
      "loss": 0.0156,
      "step": 1788040
    },
    {
      "epoch": 2.9261994069244515,
      "grad_norm": 0.3177538812160492,
      "learning_rate": 4.174930648445273e-06,
      "loss": 0.0158,
      "step": 1788060
    },
    {
      "epoch": 2.926232137363105,
      "grad_norm": 0.27000147104263306,
      "learning_rate": 4.174864756231756e-06,
      "loss": 0.0145,
      "step": 1788080
    },
    {
      "epoch": 2.926264867801758,
      "grad_norm": 0.269500195980072,
      "learning_rate": 4.174798864018239e-06,
      "loss": 0.0137,
      "step": 1788100
    },
    {
      "epoch": 2.926297598240412,
      "grad_norm": 0.42365074157714844,
      "learning_rate": 4.174732971804722e-06,
      "loss": 0.0112,
      "step": 1788120
    },
    {
      "epoch": 2.926330328679065,
      "grad_norm": 0.7344752550125122,
      "learning_rate": 4.174667079591205e-06,
      "loss": 0.0113,
      "step": 1788140
    },
    {
      "epoch": 2.926363059117718,
      "grad_norm": 0.0930238887667656,
      "learning_rate": 4.174601187377688e-06,
      "loss": 0.0121,
      "step": 1788160
    },
    {
      "epoch": 2.9263957895563717,
      "grad_norm": 0.7842876315116882,
      "learning_rate": 4.174535295164171e-06,
      "loss": 0.0129,
      "step": 1788180
    },
    {
      "epoch": 2.926428519995025,
      "grad_norm": 0.7458771467208862,
      "learning_rate": 4.174469402950654e-06,
      "loss": 0.0101,
      "step": 1788200
    },
    {
      "epoch": 2.9264612504336784,
      "grad_norm": 0.22393906116485596,
      "learning_rate": 4.174403510737137e-06,
      "loss": 0.0175,
      "step": 1788220
    },
    {
      "epoch": 2.9264939808723316,
      "grad_norm": 0.5388919711112976,
      "learning_rate": 4.1743376185236195e-06,
      "loss": 0.0147,
      "step": 1788240
    },
    {
      "epoch": 2.926526711310985,
      "grad_norm": 0.478550523519516,
      "learning_rate": 4.174271726310102e-06,
      "loss": 0.0107,
      "step": 1788260
    },
    {
      "epoch": 2.9265594417496383,
      "grad_norm": 0.0819844901561737,
      "learning_rate": 4.174205834096585e-06,
      "loss": 0.0107,
      "step": 1788280
    },
    {
      "epoch": 2.9265921721882915,
      "grad_norm": 0.5007773041725159,
      "learning_rate": 4.174139941883068e-06,
      "loss": 0.0124,
      "step": 1788300
    },
    {
      "epoch": 2.926624902626945,
      "grad_norm": 0.2745918035507202,
      "learning_rate": 4.1740740496695505e-06,
      "loss": 0.02,
      "step": 1788320
    },
    {
      "epoch": 2.9266576330655982,
      "grad_norm": 0.41566702723503113,
      "learning_rate": 4.174008157456034e-06,
      "loss": 0.0103,
      "step": 1788340
    },
    {
      "epoch": 2.926690363504252,
      "grad_norm": 0.21640047430992126,
      "learning_rate": 4.173942265242517e-06,
      "loss": 0.016,
      "step": 1788360
    },
    {
      "epoch": 2.926723093942905,
      "grad_norm": 0.2852228879928589,
      "learning_rate": 4.1738763730289996e-06,
      "loss": 0.0135,
      "step": 1788380
    },
    {
      "epoch": 2.9267558243815586,
      "grad_norm": 0.3480822443962097,
      "learning_rate": 4.173810480815482e-06,
      "loss": 0.0101,
      "step": 1788400
    },
    {
      "epoch": 2.9267885548202117,
      "grad_norm": 0.11787071079015732,
      "learning_rate": 4.173744588601965e-06,
      "loss": 0.0134,
      "step": 1788420
    },
    {
      "epoch": 2.926821285258865,
      "grad_norm": 0.554817259311676,
      "learning_rate": 4.173678696388448e-06,
      "loss": 0.0151,
      "step": 1788440
    },
    {
      "epoch": 2.9268540156975185,
      "grad_norm": 0.17118123173713684,
      "learning_rate": 4.1736128041749305e-06,
      "loss": 0.018,
      "step": 1788460
    },
    {
      "epoch": 2.9268867461361716,
      "grad_norm": 0.32106268405914307,
      "learning_rate": 4.173546911961413e-06,
      "loss": 0.0135,
      "step": 1788480
    },
    {
      "epoch": 2.926919476574825,
      "grad_norm": 0.47292453050613403,
      "learning_rate": 4.173481019747897e-06,
      "loss": 0.0125,
      "step": 1788500
    },
    {
      "epoch": 2.9269522070134784,
      "grad_norm": 0.23765507340431213,
      "learning_rate": 4.17341512753438e-06,
      "loss": 0.0119,
      "step": 1788520
    },
    {
      "epoch": 2.926984937452132,
      "grad_norm": 0.5004513263702393,
      "learning_rate": 4.173349235320862e-06,
      "loss": 0.0142,
      "step": 1788540
    },
    {
      "epoch": 2.927017667890785,
      "grad_norm": 0.2517893612384796,
      "learning_rate": 4.173283343107346e-06,
      "loss": 0.0122,
      "step": 1788560
    },
    {
      "epoch": 2.9270503983294383,
      "grad_norm": 0.794694721698761,
      "learning_rate": 4.173217450893829e-06,
      "loss": 0.0273,
      "step": 1788580
    },
    {
      "epoch": 2.927083128768092,
      "grad_norm": 0.43053027987480164,
      "learning_rate": 4.173151558680311e-06,
      "loss": 0.011,
      "step": 1788600
    },
    {
      "epoch": 2.927115859206745,
      "grad_norm": 0.10849151760339737,
      "learning_rate": 4.173085666466794e-06,
      "loss": 0.0115,
      "step": 1788620
    },
    {
      "epoch": 2.9271485896453986,
      "grad_norm": 0.19260552525520325,
      "learning_rate": 4.173019774253277e-06,
      "loss": 0.0129,
      "step": 1788640
    },
    {
      "epoch": 2.9271813200840517,
      "grad_norm": 0.7481960654258728,
      "learning_rate": 4.17295388203976e-06,
      "loss": 0.0115,
      "step": 1788660
    },
    {
      "epoch": 2.9272140505227053,
      "grad_norm": 0.5415032505989075,
      "learning_rate": 4.172887989826242e-06,
      "loss": 0.0123,
      "step": 1788680
    },
    {
      "epoch": 2.9272467809613585,
      "grad_norm": 0.40139535069465637,
      "learning_rate": 4.172822097612725e-06,
      "loss": 0.0171,
      "step": 1788700
    },
    {
      "epoch": 2.9272795114000116,
      "grad_norm": 0.8774615526199341,
      "learning_rate": 4.172756205399208e-06,
      "loss": 0.011,
      "step": 1788720
    },
    {
      "epoch": 2.9273122418386652,
      "grad_norm": 0.09006313979625702,
      "learning_rate": 4.1726903131856914e-06,
      "loss": 0.0126,
      "step": 1788740
    },
    {
      "epoch": 2.9273449722773184,
      "grad_norm": 0.09345951676368713,
      "learning_rate": 4.172624420972174e-06,
      "loss": 0.0099,
      "step": 1788760
    },
    {
      "epoch": 2.9273777027159715,
      "grad_norm": 0.2310323864221573,
      "learning_rate": 4.172558528758657e-06,
      "loss": 0.0108,
      "step": 1788780
    },
    {
      "epoch": 2.927410433154625,
      "grad_norm": 0.7750471830368042,
      "learning_rate": 4.17249263654514e-06,
      "loss": 0.0177,
      "step": 1788800
    },
    {
      "epoch": 2.9274431635932787,
      "grad_norm": 0.6927977800369263,
      "learning_rate": 4.172426744331622e-06,
      "loss": 0.0124,
      "step": 1788820
    },
    {
      "epoch": 2.927475894031932,
      "grad_norm": 0.17964768409729004,
      "learning_rate": 4.172360852118106e-06,
      "loss": 0.0174,
      "step": 1788840
    },
    {
      "epoch": 2.927508624470585,
      "grad_norm": 0.8119052648544312,
      "learning_rate": 4.172294959904589e-06,
      "loss": 0.0149,
      "step": 1788860
    },
    {
      "epoch": 2.9275413549092386,
      "grad_norm": 0.8933792114257812,
      "learning_rate": 4.1722290676910715e-06,
      "loss": 0.0169,
      "step": 1788880
    },
    {
      "epoch": 2.9275740853478918,
      "grad_norm": 0.4739718735218048,
      "learning_rate": 4.172163175477554e-06,
      "loss": 0.0202,
      "step": 1788900
    },
    {
      "epoch": 2.927606815786545,
      "grad_norm": 0.24314725399017334,
      "learning_rate": 4.172097283264037e-06,
      "loss": 0.0162,
      "step": 1788920
    },
    {
      "epoch": 2.9276395462251985,
      "grad_norm": 0.1467524617910385,
      "learning_rate": 4.17203139105052e-06,
      "loss": 0.0144,
      "step": 1788940
    },
    {
      "epoch": 2.927672276663852,
      "grad_norm": 0.12026266008615494,
      "learning_rate": 4.171965498837003e-06,
      "loss": 0.0091,
      "step": 1788960
    },
    {
      "epoch": 2.9277050071025053,
      "grad_norm": 0.4278196692466736,
      "learning_rate": 4.171899606623486e-06,
      "loss": 0.0135,
      "step": 1788980
    },
    {
      "epoch": 2.9277377375411584,
      "grad_norm": 0.21041451394557953,
      "learning_rate": 4.171833714409969e-06,
      "loss": 0.0153,
      "step": 1789000
    },
    {
      "epoch": 2.927770467979812,
      "grad_norm": 0.1463213860988617,
      "learning_rate": 4.1717678221964515e-06,
      "loss": 0.0191,
      "step": 1789020
    },
    {
      "epoch": 2.927803198418465,
      "grad_norm": 0.17294852435588837,
      "learning_rate": 4.171701929982934e-06,
      "loss": 0.0081,
      "step": 1789040
    },
    {
      "epoch": 2.9278359288571183,
      "grad_norm": 0.18149340152740479,
      "learning_rate": 4.171636037769417e-06,
      "loss": 0.0172,
      "step": 1789060
    },
    {
      "epoch": 2.927868659295772,
      "grad_norm": 0.30500322580337524,
      "learning_rate": 4.1715701455559e-06,
      "loss": 0.0185,
      "step": 1789080
    },
    {
      "epoch": 2.9279013897344255,
      "grad_norm": 0.8044227361679077,
      "learning_rate": 4.1715042533423824e-06,
      "loss": 0.0146,
      "step": 1789100
    },
    {
      "epoch": 2.9279341201730786,
      "grad_norm": 0.515292763710022,
      "learning_rate": 4.171438361128865e-06,
      "loss": 0.0128,
      "step": 1789120
    },
    {
      "epoch": 2.927966850611732,
      "grad_norm": 0.5449249744415283,
      "learning_rate": 4.171372468915349e-06,
      "loss": 0.0183,
      "step": 1789140
    },
    {
      "epoch": 2.9279995810503854,
      "grad_norm": 0.14589761197566986,
      "learning_rate": 4.1713065767018315e-06,
      "loss": 0.0126,
      "step": 1789160
    },
    {
      "epoch": 2.9280323114890385,
      "grad_norm": 0.16229239106178284,
      "learning_rate": 4.171240684488314e-06,
      "loss": 0.0106,
      "step": 1789180
    },
    {
      "epoch": 2.9280650419276917,
      "grad_norm": 0.32777008414268494,
      "learning_rate": 4.171174792274798e-06,
      "loss": 0.0138,
      "step": 1789200
    },
    {
      "epoch": 2.9280977723663453,
      "grad_norm": 0.0752965435385704,
      "learning_rate": 4.1711089000612806e-06,
      "loss": 0.0148,
      "step": 1789220
    },
    {
      "epoch": 2.928130502804999,
      "grad_norm": 0.43977290391921997,
      "learning_rate": 4.171043007847763e-06,
      "loss": 0.0146,
      "step": 1789240
    },
    {
      "epoch": 2.928163233243652,
      "grad_norm": 1.1015273332595825,
      "learning_rate": 4.170977115634246e-06,
      "loss": 0.0109,
      "step": 1789260
    },
    {
      "epoch": 2.928195963682305,
      "grad_norm": 0.22790566086769104,
      "learning_rate": 4.170911223420729e-06,
      "loss": 0.0108,
      "step": 1789280
    },
    {
      "epoch": 2.9282286941209588,
      "grad_norm": 0.26473987102508545,
      "learning_rate": 4.1708453312072115e-06,
      "loss": 0.0162,
      "step": 1789300
    },
    {
      "epoch": 2.928261424559612,
      "grad_norm": 0.45274969935417175,
      "learning_rate": 4.170779438993694e-06,
      "loss": 0.0102,
      "step": 1789320
    },
    {
      "epoch": 2.928294154998265,
      "grad_norm": 0.3446923494338989,
      "learning_rate": 4.170713546780177e-06,
      "loss": 0.0124,
      "step": 1789340
    },
    {
      "epoch": 2.9283268854369187,
      "grad_norm": 0.4274761974811554,
      "learning_rate": 4.170647654566661e-06,
      "loss": 0.0144,
      "step": 1789360
    },
    {
      "epoch": 2.928359615875572,
      "grad_norm": 0.1096692904829979,
      "learning_rate": 4.170581762353143e-06,
      "loss": 0.0095,
      "step": 1789380
    },
    {
      "epoch": 2.9283923463142254,
      "grad_norm": 0.43338143825531006,
      "learning_rate": 4.170515870139626e-06,
      "loss": 0.0185,
      "step": 1789400
    },
    {
      "epoch": 2.9284250767528786,
      "grad_norm": 0.06591258198022842,
      "learning_rate": 4.170449977926109e-06,
      "loss": 0.012,
      "step": 1789420
    },
    {
      "epoch": 2.928457807191532,
      "grad_norm": 0.032103970646858215,
      "learning_rate": 4.1703840857125916e-06,
      "loss": 0.0168,
      "step": 1789440
    },
    {
      "epoch": 2.9284905376301853,
      "grad_norm": 0.4379720091819763,
      "learning_rate": 4.170318193499074e-06,
      "loss": 0.014,
      "step": 1789460
    },
    {
      "epoch": 2.9285232680688384,
      "grad_norm": 0.17666995525360107,
      "learning_rate": 4.170252301285557e-06,
      "loss": 0.009,
      "step": 1789480
    },
    {
      "epoch": 2.928555998507492,
      "grad_norm": 0.259956419467926,
      "learning_rate": 4.17018640907204e-06,
      "loss": 0.0127,
      "step": 1789500
    },
    {
      "epoch": 2.928588728946145,
      "grad_norm": 0.24770426750183105,
      "learning_rate": 4.1701205168585225e-06,
      "loss": 0.0098,
      "step": 1789520
    },
    {
      "epoch": 2.928621459384799,
      "grad_norm": 0.432560533285141,
      "learning_rate": 4.170054624645006e-06,
      "loss": 0.0182,
      "step": 1789540
    },
    {
      "epoch": 2.928654189823452,
      "grad_norm": 0.8362064361572266,
      "learning_rate": 4.169988732431489e-06,
      "loss": 0.0184,
      "step": 1789560
    },
    {
      "epoch": 2.9286869202621055,
      "grad_norm": 0.3206040859222412,
      "learning_rate": 4.169922840217972e-06,
      "loss": 0.0132,
      "step": 1789580
    },
    {
      "epoch": 2.9287196507007587,
      "grad_norm": 0.4221882224082947,
      "learning_rate": 4.169856948004455e-06,
      "loss": 0.0123,
      "step": 1789600
    },
    {
      "epoch": 2.928752381139412,
      "grad_norm": 0.4176272749900818,
      "learning_rate": 4.169791055790938e-06,
      "loss": 0.0141,
      "step": 1789620
    },
    {
      "epoch": 2.9287851115780654,
      "grad_norm": 0.2739432454109192,
      "learning_rate": 4.169725163577421e-06,
      "loss": 0.0136,
      "step": 1789640
    },
    {
      "epoch": 2.9288178420167186,
      "grad_norm": 0.16270014643669128,
      "learning_rate": 4.169659271363903e-06,
      "loss": 0.0084,
      "step": 1789660
    },
    {
      "epoch": 2.928850572455372,
      "grad_norm": 0.08184933662414551,
      "learning_rate": 4.169593379150386e-06,
      "loss": 0.0134,
      "step": 1789680
    },
    {
      "epoch": 2.9288833028940253,
      "grad_norm": 0.39043182134628296,
      "learning_rate": 4.169527486936869e-06,
      "loss": 0.0177,
      "step": 1789700
    },
    {
      "epoch": 2.928916033332679,
      "grad_norm": 0.17304371297359467,
      "learning_rate": 4.169461594723352e-06,
      "loss": 0.0153,
      "step": 1789720
    },
    {
      "epoch": 2.928948763771332,
      "grad_norm": 1.0953006744384766,
      "learning_rate": 4.169395702509834e-06,
      "loss": 0.0155,
      "step": 1789740
    },
    {
      "epoch": 2.928981494209985,
      "grad_norm": 0.266968309879303,
      "learning_rate": 4.169329810296318e-06,
      "loss": 0.0214,
      "step": 1789760
    },
    {
      "epoch": 2.929014224648639,
      "grad_norm": 0.30460411310195923,
      "learning_rate": 4.169263918082801e-06,
      "loss": 0.0167,
      "step": 1789780
    },
    {
      "epoch": 2.929046955087292,
      "grad_norm": 0.1865803450345993,
      "learning_rate": 4.169198025869283e-06,
      "loss": 0.0109,
      "step": 1789800
    },
    {
      "epoch": 2.9290796855259456,
      "grad_norm": 0.32814908027648926,
      "learning_rate": 4.169132133655766e-06,
      "loss": 0.0141,
      "step": 1789820
    },
    {
      "epoch": 2.9291124159645987,
      "grad_norm": 0.2958190143108368,
      "learning_rate": 4.169066241442249e-06,
      "loss": 0.0127,
      "step": 1789840
    },
    {
      "epoch": 2.9291451464032523,
      "grad_norm": 0.28516441583633423,
      "learning_rate": 4.169000349228732e-06,
      "loss": 0.0163,
      "step": 1789860
    },
    {
      "epoch": 2.9291778768419054,
      "grad_norm": 0.5572090744972229,
      "learning_rate": 4.168934457015214e-06,
      "loss": 0.0098,
      "step": 1789880
    },
    {
      "epoch": 2.9292106072805586,
      "grad_norm": 0.884329617023468,
      "learning_rate": 4.168868564801698e-06,
      "loss": 0.0112,
      "step": 1789900
    },
    {
      "epoch": 2.929243337719212,
      "grad_norm": 0.1892126351594925,
      "learning_rate": 4.168802672588181e-06,
      "loss": 0.0132,
      "step": 1789920
    },
    {
      "epoch": 2.9292760681578653,
      "grad_norm": 0.3855481743812561,
      "learning_rate": 4.1687367803746634e-06,
      "loss": 0.0181,
      "step": 1789940
    },
    {
      "epoch": 2.929308798596519,
      "grad_norm": 0.21287627518177032,
      "learning_rate": 4.168670888161146e-06,
      "loss": 0.0097,
      "step": 1789960
    },
    {
      "epoch": 2.929341529035172,
      "grad_norm": 0.4526638388633728,
      "learning_rate": 4.16860499594763e-06,
      "loss": 0.0159,
      "step": 1789980
    },
    {
      "epoch": 2.9293742594738257,
      "grad_norm": 0.05460277944803238,
      "learning_rate": 4.1685391037341125e-06,
      "loss": 0.0122,
      "step": 1790000
    },
    {
      "epoch": 2.929406989912479,
      "grad_norm": 0.28709492087364197,
      "learning_rate": 4.168473211520595e-06,
      "loss": 0.0096,
      "step": 1790020
    },
    {
      "epoch": 2.929439720351132,
      "grad_norm": 0.12613077461719513,
      "learning_rate": 4.168407319307078e-06,
      "loss": 0.0111,
      "step": 1790040
    },
    {
      "epoch": 2.9294724507897856,
      "grad_norm": 0.218802809715271,
      "learning_rate": 4.168341427093561e-06,
      "loss": 0.0109,
      "step": 1790060
    },
    {
      "epoch": 2.9295051812284387,
      "grad_norm": 0.17620106041431427,
      "learning_rate": 4.1682755348800435e-06,
      "loss": 0.0132,
      "step": 1790080
    },
    {
      "epoch": 2.9295379116670923,
      "grad_norm": 0.45070913434028625,
      "learning_rate": 4.168209642666526e-06,
      "loss": 0.0154,
      "step": 1790100
    },
    {
      "epoch": 2.9295706421057455,
      "grad_norm": 0.07500249147415161,
      "learning_rate": 4.168143750453009e-06,
      "loss": 0.009,
      "step": 1790120
    },
    {
      "epoch": 2.929603372544399,
      "grad_norm": 0.4814572334289551,
      "learning_rate": 4.168077858239492e-06,
      "loss": 0.0099,
      "step": 1790140
    },
    {
      "epoch": 2.929636102983052,
      "grad_norm": 0.3034047484397888,
      "learning_rate": 4.168011966025975e-06,
      "loss": 0.0126,
      "step": 1790160
    },
    {
      "epoch": 2.9296688334217054,
      "grad_norm": 0.14684905111789703,
      "learning_rate": 4.167946073812458e-06,
      "loss": 0.0171,
      "step": 1790180
    },
    {
      "epoch": 2.929701563860359,
      "grad_norm": 0.1273966282606125,
      "learning_rate": 4.167880181598941e-06,
      "loss": 0.0153,
      "step": 1790200
    },
    {
      "epoch": 2.929734294299012,
      "grad_norm": 0.3939167261123657,
      "learning_rate": 4.1678142893854235e-06,
      "loss": 0.015,
      "step": 1790220
    },
    {
      "epoch": 2.9297670247376653,
      "grad_norm": 0.49858522415161133,
      "learning_rate": 4.167748397171906e-06,
      "loss": 0.0172,
      "step": 1790240
    },
    {
      "epoch": 2.929799755176319,
      "grad_norm": 1.0931633710861206,
      "learning_rate": 4.16768250495839e-06,
      "loss": 0.0143,
      "step": 1790260
    },
    {
      "epoch": 2.9298324856149724,
      "grad_norm": 0.2871171236038208,
      "learning_rate": 4.1676166127448726e-06,
      "loss": 0.0108,
      "step": 1790280
    },
    {
      "epoch": 2.9298652160536256,
      "grad_norm": 0.36650487780570984,
      "learning_rate": 4.167550720531355e-06,
      "loss": 0.017,
      "step": 1790300
    },
    {
      "epoch": 2.9298979464922787,
      "grad_norm": 0.4117238223552704,
      "learning_rate": 4.167484828317838e-06,
      "loss": 0.0158,
      "step": 1790320
    },
    {
      "epoch": 2.9299306769309323,
      "grad_norm": 0.6051660776138306,
      "learning_rate": 4.167418936104321e-06,
      "loss": 0.0136,
      "step": 1790340
    },
    {
      "epoch": 2.9299634073695855,
      "grad_norm": 0.40111637115478516,
      "learning_rate": 4.1673530438908035e-06,
      "loss": 0.014,
      "step": 1790360
    },
    {
      "epoch": 2.9299961378082386,
      "grad_norm": 0.2767622768878937,
      "learning_rate": 4.167287151677287e-06,
      "loss": 0.0153,
      "step": 1790380
    },
    {
      "epoch": 2.9300288682468922,
      "grad_norm": 0.20327426493167877,
      "learning_rate": 4.16722125946377e-06,
      "loss": 0.0144,
      "step": 1790400
    },
    {
      "epoch": 2.930061598685546,
      "grad_norm": 0.19721363484859467,
      "learning_rate": 4.167155367250253e-06,
      "loss": 0.0113,
      "step": 1790420
    },
    {
      "epoch": 2.930094329124199,
      "grad_norm": 0.5901485085487366,
      "learning_rate": 4.167089475036735e-06,
      "loss": 0.0098,
      "step": 1790440
    },
    {
      "epoch": 2.930127059562852,
      "grad_norm": 0.8764855265617371,
      "learning_rate": 4.167023582823218e-06,
      "loss": 0.0147,
      "step": 1790460
    },
    {
      "epoch": 2.9301597900015057,
      "grad_norm": 0.460820734500885,
      "learning_rate": 4.166957690609701e-06,
      "loss": 0.0125,
      "step": 1790480
    },
    {
      "epoch": 2.930192520440159,
      "grad_norm": 0.27636420726776123,
      "learning_rate": 4.1668917983961835e-06,
      "loss": 0.0113,
      "step": 1790500
    },
    {
      "epoch": 2.930225250878812,
      "grad_norm": 0.17147120833396912,
      "learning_rate": 4.166825906182666e-06,
      "loss": 0.0144,
      "step": 1790520
    },
    {
      "epoch": 2.9302579813174656,
      "grad_norm": 0.5394638776779175,
      "learning_rate": 4.166760013969149e-06,
      "loss": 0.0133,
      "step": 1790540
    },
    {
      "epoch": 2.930290711756119,
      "grad_norm": 0.13640089333057404,
      "learning_rate": 4.166694121755633e-06,
      "loss": 0.0114,
      "step": 1790560
    },
    {
      "epoch": 2.9303234421947724,
      "grad_norm": 0.375156432390213,
      "learning_rate": 4.166628229542115e-06,
      "loss": 0.018,
      "step": 1790580
    },
    {
      "epoch": 2.9303561726334255,
      "grad_norm": 0.7490076422691345,
      "learning_rate": 4.166562337328598e-06,
      "loss": 0.0122,
      "step": 1790600
    },
    {
      "epoch": 2.930388903072079,
      "grad_norm": 0.39587828516960144,
      "learning_rate": 4.166496445115082e-06,
      "loss": 0.0152,
      "step": 1790620
    },
    {
      "epoch": 2.9304216335107323,
      "grad_norm": 0.08177625387907028,
      "learning_rate": 4.166430552901564e-06,
      "loss": 0.0155,
      "step": 1790640
    },
    {
      "epoch": 2.9304543639493854,
      "grad_norm": 0.12417653948068619,
      "learning_rate": 4.166364660688047e-06,
      "loss": 0.0116,
      "step": 1790660
    },
    {
      "epoch": 2.930487094388039,
      "grad_norm": 0.16965068876743317,
      "learning_rate": 4.16629876847453e-06,
      "loss": 0.0137,
      "step": 1790680
    },
    {
      "epoch": 2.9305198248266926,
      "grad_norm": 0.3167063295841217,
      "learning_rate": 4.166232876261013e-06,
      "loss": 0.0166,
      "step": 1790700
    },
    {
      "epoch": 2.9305525552653457,
      "grad_norm": 0.4870035648345947,
      "learning_rate": 4.166166984047495e-06,
      "loss": 0.0113,
      "step": 1790720
    },
    {
      "epoch": 2.930585285703999,
      "grad_norm": 0.38135674595832825,
      "learning_rate": 4.166101091833978e-06,
      "loss": 0.0158,
      "step": 1790740
    },
    {
      "epoch": 2.9306180161426525,
      "grad_norm": 0.21197134256362915,
      "learning_rate": 4.166035199620461e-06,
      "loss": 0.0162,
      "step": 1790760
    },
    {
      "epoch": 2.9306507465813056,
      "grad_norm": 0.32839277386665344,
      "learning_rate": 4.1659693074069444e-06,
      "loss": 0.0163,
      "step": 1790780
    },
    {
      "epoch": 2.930683477019959,
      "grad_norm": 0.20868390798568726,
      "learning_rate": 4.165903415193427e-06,
      "loss": 0.0127,
      "step": 1790800
    },
    {
      "epoch": 2.9307162074586124,
      "grad_norm": 0.17133387923240662,
      "learning_rate": 4.16583752297991e-06,
      "loss": 0.0198,
      "step": 1790820
    },
    {
      "epoch": 2.9307489378972655,
      "grad_norm": 1.0618630647659302,
      "learning_rate": 4.165771630766393e-06,
      "loss": 0.0115,
      "step": 1790840
    },
    {
      "epoch": 2.930781668335919,
      "grad_norm": 0.5460805296897888,
      "learning_rate": 4.165705738552875e-06,
      "loss": 0.0097,
      "step": 1790860
    },
    {
      "epoch": 2.9308143987745723,
      "grad_norm": 0.23475322127342224,
      "learning_rate": 4.165639846339358e-06,
      "loss": 0.0115,
      "step": 1790880
    },
    {
      "epoch": 2.930847129213226,
      "grad_norm": 0.11913049966096878,
      "learning_rate": 4.165573954125841e-06,
      "loss": 0.0102,
      "step": 1790900
    },
    {
      "epoch": 2.930879859651879,
      "grad_norm": 0.2742147445678711,
      "learning_rate": 4.165508061912324e-06,
      "loss": 0.0101,
      "step": 1790920
    },
    {
      "epoch": 2.930912590090532,
      "grad_norm": 0.46301931142807007,
      "learning_rate": 4.165442169698806e-06,
      "loss": 0.0104,
      "step": 1790940
    },
    {
      "epoch": 2.9309453205291858,
      "grad_norm": 0.11018181592226028,
      "learning_rate": 4.16537627748529e-06,
      "loss": 0.0113,
      "step": 1790960
    },
    {
      "epoch": 2.930978050967839,
      "grad_norm": 0.7289062738418579,
      "learning_rate": 4.165310385271773e-06,
      "loss": 0.0134,
      "step": 1790980
    },
    {
      "epoch": 2.9310107814064925,
      "grad_norm": 0.23639576137065887,
      "learning_rate": 4.165244493058256e-06,
      "loss": 0.0135,
      "step": 1791000
    },
    {
      "epoch": 2.9310435118451457,
      "grad_norm": 0.5894859433174133,
      "learning_rate": 4.165178600844739e-06,
      "loss": 0.0149,
      "step": 1791020
    },
    {
      "epoch": 2.9310762422837993,
      "grad_norm": 0.22134479880332947,
      "learning_rate": 4.165112708631222e-06,
      "loss": 0.0083,
      "step": 1791040
    },
    {
      "epoch": 2.9311089727224524,
      "grad_norm": 0.49762028455734253,
      "learning_rate": 4.1650468164177045e-06,
      "loss": 0.0106,
      "step": 1791060
    },
    {
      "epoch": 2.9311417031611056,
      "grad_norm": 0.7885658144950867,
      "learning_rate": 4.164980924204187e-06,
      "loss": 0.0104,
      "step": 1791080
    },
    {
      "epoch": 2.931174433599759,
      "grad_norm": 0.09248064458370209,
      "learning_rate": 4.16491503199067e-06,
      "loss": 0.0096,
      "step": 1791100
    },
    {
      "epoch": 2.9312071640384123,
      "grad_norm": 0.17516133189201355,
      "learning_rate": 4.164849139777153e-06,
      "loss": 0.0164,
      "step": 1791120
    },
    {
      "epoch": 2.931239894477066,
      "grad_norm": 0.2926770746707916,
      "learning_rate": 4.1647832475636354e-06,
      "loss": 0.0103,
      "step": 1791140
    },
    {
      "epoch": 2.931272624915719,
      "grad_norm": 0.2964579164981842,
      "learning_rate": 4.164717355350118e-06,
      "loss": 0.0104,
      "step": 1791160
    },
    {
      "epoch": 2.9313053553543726,
      "grad_norm": 0.3715931177139282,
      "learning_rate": 4.164651463136602e-06,
      "loss": 0.0122,
      "step": 1791180
    },
    {
      "epoch": 2.931338085793026,
      "grad_norm": 0.16237427294254303,
      "learning_rate": 4.1645855709230845e-06,
      "loss": 0.0153,
      "step": 1791200
    },
    {
      "epoch": 2.931370816231679,
      "grad_norm": 0.7132739424705505,
      "learning_rate": 4.164519678709567e-06,
      "loss": 0.0264,
      "step": 1791220
    },
    {
      "epoch": 2.9314035466703325,
      "grad_norm": 1.1796503067016602,
      "learning_rate": 4.16445378649605e-06,
      "loss": 0.0141,
      "step": 1791240
    },
    {
      "epoch": 2.9314362771089857,
      "grad_norm": 0.6099585890769958,
      "learning_rate": 4.164387894282533e-06,
      "loss": 0.009,
      "step": 1791260
    },
    {
      "epoch": 2.9314690075476393,
      "grad_norm": 0.8579277992248535,
      "learning_rate": 4.1643220020690155e-06,
      "loss": 0.0185,
      "step": 1791280
    },
    {
      "epoch": 2.9315017379862924,
      "grad_norm": 0.20426751673221588,
      "learning_rate": 4.164256109855498e-06,
      "loss": 0.0258,
      "step": 1791300
    },
    {
      "epoch": 2.931534468424946,
      "grad_norm": 0.11654926091432571,
      "learning_rate": 4.164190217641982e-06,
      "loss": 0.0183,
      "step": 1791320
    },
    {
      "epoch": 2.931567198863599,
      "grad_norm": 0.6444019675254822,
      "learning_rate": 4.1641243254284645e-06,
      "loss": 0.0146,
      "step": 1791340
    },
    {
      "epoch": 2.9315999293022523,
      "grad_norm": 0.3206949532032013,
      "learning_rate": 4.164058433214947e-06,
      "loss": 0.0106,
      "step": 1791360
    },
    {
      "epoch": 2.931632659740906,
      "grad_norm": 0.5131592750549316,
      "learning_rate": 4.16399254100143e-06,
      "loss": 0.0153,
      "step": 1791380
    },
    {
      "epoch": 2.931665390179559,
      "grad_norm": 0.4386744797229767,
      "learning_rate": 4.163926648787914e-06,
      "loss": 0.0122,
      "step": 1791400
    },
    {
      "epoch": 2.9316981206182127,
      "grad_norm": 0.4034675657749176,
      "learning_rate": 4.163860756574396e-06,
      "loss": 0.0133,
      "step": 1791420
    },
    {
      "epoch": 2.931730851056866,
      "grad_norm": 0.3286249041557312,
      "learning_rate": 4.163794864360879e-06,
      "loss": 0.0106,
      "step": 1791440
    },
    {
      "epoch": 2.9317635814955194,
      "grad_norm": 0.13537563383579254,
      "learning_rate": 4.163728972147362e-06,
      "loss": 0.0154,
      "step": 1791460
    },
    {
      "epoch": 2.9317963119341726,
      "grad_norm": 0.1657096892595291,
      "learning_rate": 4.1636630799338446e-06,
      "loss": 0.0226,
      "step": 1791480
    },
    {
      "epoch": 2.9318290423728257,
      "grad_norm": 0.3673715591430664,
      "learning_rate": 4.163597187720327e-06,
      "loss": 0.0126,
      "step": 1791500
    },
    {
      "epoch": 2.9318617728114793,
      "grad_norm": 0.20748531818389893,
      "learning_rate": 4.16353129550681e-06,
      "loss": 0.0139,
      "step": 1791520
    },
    {
      "epoch": 2.9318945032501325,
      "grad_norm": 0.43548035621643066,
      "learning_rate": 4.163465403293293e-06,
      "loss": 0.0093,
      "step": 1791540
    },
    {
      "epoch": 2.931927233688786,
      "grad_norm": 0.28697994351387024,
      "learning_rate": 4.1633995110797755e-06,
      "loss": 0.0152,
      "step": 1791560
    },
    {
      "epoch": 2.931959964127439,
      "grad_norm": 0.4528893828392029,
      "learning_rate": 4.163333618866259e-06,
      "loss": 0.012,
      "step": 1791580
    },
    {
      "epoch": 2.931992694566093,
      "grad_norm": 0.5807957649230957,
      "learning_rate": 4.163267726652742e-06,
      "loss": 0.0093,
      "step": 1791600
    },
    {
      "epoch": 2.932025425004746,
      "grad_norm": 0.14311429858207703,
      "learning_rate": 4.163201834439225e-06,
      "loss": 0.0189,
      "step": 1791620
    },
    {
      "epoch": 2.932058155443399,
      "grad_norm": 0.24112921953201294,
      "learning_rate": 4.163135942225707e-06,
      "loss": 0.0132,
      "step": 1791640
    },
    {
      "epoch": 2.9320908858820527,
      "grad_norm": 0.30603334307670593,
      "learning_rate": 4.163070050012191e-06,
      "loss": 0.0126,
      "step": 1791660
    },
    {
      "epoch": 2.932123616320706,
      "grad_norm": 0.15951451659202576,
      "learning_rate": 4.163004157798674e-06,
      "loss": 0.0142,
      "step": 1791680
    },
    {
      "epoch": 2.9321563467593594,
      "grad_norm": 0.16721504926681519,
      "learning_rate": 4.162938265585156e-06,
      "loss": 0.0162,
      "step": 1791700
    },
    {
      "epoch": 2.9321890771980126,
      "grad_norm": 0.30201733112335205,
      "learning_rate": 4.162872373371639e-06,
      "loss": 0.0241,
      "step": 1791720
    },
    {
      "epoch": 2.932221807636666,
      "grad_norm": 0.3110806345939636,
      "learning_rate": 4.162806481158122e-06,
      "loss": 0.0118,
      "step": 1791740
    },
    {
      "epoch": 2.9322545380753193,
      "grad_norm": 0.2640995383262634,
      "learning_rate": 4.162740588944605e-06,
      "loss": 0.0118,
      "step": 1791760
    },
    {
      "epoch": 2.9322872685139725,
      "grad_norm": 0.2438238114118576,
      "learning_rate": 4.162674696731087e-06,
      "loss": 0.0129,
      "step": 1791780
    },
    {
      "epoch": 2.932319998952626,
      "grad_norm": 0.7626849412918091,
      "learning_rate": 4.162608804517571e-06,
      "loss": 0.0165,
      "step": 1791800
    },
    {
      "epoch": 2.9323527293912792,
      "grad_norm": 0.38292962312698364,
      "learning_rate": 4.162542912304054e-06,
      "loss": 0.0168,
      "step": 1791820
    },
    {
      "epoch": 2.9323854598299324,
      "grad_norm": 0.3468363881111145,
      "learning_rate": 4.162477020090536e-06,
      "loss": 0.0183,
      "step": 1791840
    },
    {
      "epoch": 2.932418190268586,
      "grad_norm": 0.5156484246253967,
      "learning_rate": 4.162411127877019e-06,
      "loss": 0.0096,
      "step": 1791860
    },
    {
      "epoch": 2.9324509207072396,
      "grad_norm": 0.38848379254341125,
      "learning_rate": 4.162345235663502e-06,
      "loss": 0.0137,
      "step": 1791880
    },
    {
      "epoch": 2.9324836511458927,
      "grad_norm": 0.21122057735919952,
      "learning_rate": 4.162279343449985e-06,
      "loss": 0.0093,
      "step": 1791900
    },
    {
      "epoch": 2.932516381584546,
      "grad_norm": 0.20823654532432556,
      "learning_rate": 4.162213451236467e-06,
      "loss": 0.01,
      "step": 1791920
    },
    {
      "epoch": 2.9325491120231995,
      "grad_norm": 0.30312374234199524,
      "learning_rate": 4.16214755902295e-06,
      "loss": 0.0155,
      "step": 1791940
    },
    {
      "epoch": 2.9325818424618526,
      "grad_norm": 0.5246509313583374,
      "learning_rate": 4.162081666809433e-06,
      "loss": 0.0216,
      "step": 1791960
    },
    {
      "epoch": 2.9326145729005058,
      "grad_norm": 0.13891997933387756,
      "learning_rate": 4.1620157745959164e-06,
      "loss": 0.0115,
      "step": 1791980
    },
    {
      "epoch": 2.9326473033391594,
      "grad_norm": 0.2978437542915344,
      "learning_rate": 4.161949882382399e-06,
      "loss": 0.0114,
      "step": 1792000
    },
    {
      "epoch": 2.932680033777813,
      "grad_norm": 0.22338008880615234,
      "learning_rate": 4.161883990168882e-06,
      "loss": 0.0099,
      "step": 1792020
    },
    {
      "epoch": 2.932712764216466,
      "grad_norm": 0.38668161630630493,
      "learning_rate": 4.1618180979553655e-06,
      "loss": 0.0137,
      "step": 1792040
    },
    {
      "epoch": 2.9327454946551192,
      "grad_norm": 0.2571355104446411,
      "learning_rate": 4.161752205741848e-06,
      "loss": 0.0156,
      "step": 1792060
    },
    {
      "epoch": 2.932778225093773,
      "grad_norm": 0.2806606590747833,
      "learning_rate": 4.161686313528331e-06,
      "loss": 0.0099,
      "step": 1792080
    },
    {
      "epoch": 2.932810955532426,
      "grad_norm": 0.650070071220398,
      "learning_rate": 4.161620421314814e-06,
      "loss": 0.0123,
      "step": 1792100
    },
    {
      "epoch": 2.932843685971079,
      "grad_norm": 0.19307644665241241,
      "learning_rate": 4.1615545291012965e-06,
      "loss": 0.0157,
      "step": 1792120
    },
    {
      "epoch": 2.9328764164097327,
      "grad_norm": 0.042058445513248444,
      "learning_rate": 4.161488636887779e-06,
      "loss": 0.0094,
      "step": 1792140
    },
    {
      "epoch": 2.9329091468483863,
      "grad_norm": 0.35824844241142273,
      "learning_rate": 4.161422744674262e-06,
      "loss": 0.0127,
      "step": 1792160
    },
    {
      "epoch": 2.9329418772870395,
      "grad_norm": 1.2340232133865356,
      "learning_rate": 4.161356852460745e-06,
      "loss": 0.014,
      "step": 1792180
    },
    {
      "epoch": 2.9329746077256926,
      "grad_norm": 0.10584354400634766,
      "learning_rate": 4.161290960247228e-06,
      "loss": 0.009,
      "step": 1792200
    },
    {
      "epoch": 2.9330073381643462,
      "grad_norm": 0.19784073531627655,
      "learning_rate": 4.161225068033711e-06,
      "loss": 0.0195,
      "step": 1792220
    },
    {
      "epoch": 2.9330400686029994,
      "grad_norm": 0.11159581691026688,
      "learning_rate": 4.161159175820194e-06,
      "loss": 0.0123,
      "step": 1792240
    },
    {
      "epoch": 2.9330727990416525,
      "grad_norm": 0.560266375541687,
      "learning_rate": 4.1610932836066765e-06,
      "loss": 0.0135,
      "step": 1792260
    },
    {
      "epoch": 2.933105529480306,
      "grad_norm": 0.1431792825460434,
      "learning_rate": 4.161027391393159e-06,
      "loss": 0.0154,
      "step": 1792280
    },
    {
      "epoch": 2.9331382599189593,
      "grad_norm": 3.3509559631347656,
      "learning_rate": 4.160961499179642e-06,
      "loss": 0.0126,
      "step": 1792300
    },
    {
      "epoch": 2.933170990357613,
      "grad_norm": 0.35900893807411194,
      "learning_rate": 4.160895606966125e-06,
      "loss": 0.0118,
      "step": 1792320
    },
    {
      "epoch": 2.933203720796266,
      "grad_norm": 0.5800464749336243,
      "learning_rate": 4.1608297147526075e-06,
      "loss": 0.0117,
      "step": 1792340
    },
    {
      "epoch": 2.9332364512349196,
      "grad_norm": 1.0341322422027588,
      "learning_rate": 4.160763822539091e-06,
      "loss": 0.0152,
      "step": 1792360
    },
    {
      "epoch": 2.9332691816735728,
      "grad_norm": 0.1541193723678589,
      "learning_rate": 4.160697930325574e-06,
      "loss": 0.009,
      "step": 1792380
    },
    {
      "epoch": 2.933301912112226,
      "grad_norm": 0.38929861783981323,
      "learning_rate": 4.1606320381120565e-06,
      "loss": 0.0129,
      "step": 1792400
    },
    {
      "epoch": 2.9333346425508795,
      "grad_norm": 0.2866596281528473,
      "learning_rate": 4.16056614589854e-06,
      "loss": 0.0136,
      "step": 1792420
    },
    {
      "epoch": 2.9333673729895327,
      "grad_norm": 0.4285122752189636,
      "learning_rate": 4.160500253685023e-06,
      "loss": 0.0185,
      "step": 1792440
    },
    {
      "epoch": 2.9334001034281862,
      "grad_norm": 0.21318835020065308,
      "learning_rate": 4.160434361471506e-06,
      "loss": 0.0184,
      "step": 1792460
    },
    {
      "epoch": 2.9334328338668394,
      "grad_norm": 0.2826997935771942,
      "learning_rate": 4.160368469257988e-06,
      "loss": 0.011,
      "step": 1792480
    },
    {
      "epoch": 2.933465564305493,
      "grad_norm": 0.5660220384597778,
      "learning_rate": 4.160302577044471e-06,
      "loss": 0.0171,
      "step": 1792500
    },
    {
      "epoch": 2.933498294744146,
      "grad_norm": 0.45484504103660583,
      "learning_rate": 4.160236684830954e-06,
      "loss": 0.0133,
      "step": 1792520
    },
    {
      "epoch": 2.9335310251827993,
      "grad_norm": 0.3010309040546417,
      "learning_rate": 4.1601707926174365e-06,
      "loss": 0.01,
      "step": 1792540
    },
    {
      "epoch": 2.933563755621453,
      "grad_norm": 0.46488338708877563,
      "learning_rate": 4.160104900403919e-06,
      "loss": 0.009,
      "step": 1792560
    },
    {
      "epoch": 2.933596486060106,
      "grad_norm": 0.2431032508611679,
      "learning_rate": 4.160039008190402e-06,
      "loss": 0.0142,
      "step": 1792580
    },
    {
      "epoch": 2.9336292164987596,
      "grad_norm": 0.22516481578350067,
      "learning_rate": 4.159973115976886e-06,
      "loss": 0.0156,
      "step": 1792600
    },
    {
      "epoch": 2.933661946937413,
      "grad_norm": 0.27680307626724243,
      "learning_rate": 4.159907223763368e-06,
      "loss": 0.0123,
      "step": 1792620
    },
    {
      "epoch": 2.9336946773760664,
      "grad_norm": 0.31950557231903076,
      "learning_rate": 4.159841331549851e-06,
      "loss": 0.0095,
      "step": 1792640
    },
    {
      "epoch": 2.9337274078147195,
      "grad_norm": 0.3663095533847809,
      "learning_rate": 4.159775439336334e-06,
      "loss": 0.0116,
      "step": 1792660
    },
    {
      "epoch": 2.9337601382533727,
      "grad_norm": 0.3178923726081848,
      "learning_rate": 4.1597095471228166e-06,
      "loss": 0.0144,
      "step": 1792680
    },
    {
      "epoch": 2.9337928686920263,
      "grad_norm": 0.12253687530755997,
      "learning_rate": 4.159643654909299e-06,
      "loss": 0.0165,
      "step": 1792700
    },
    {
      "epoch": 2.9338255991306794,
      "grad_norm": 0.13531985878944397,
      "learning_rate": 4.159577762695783e-06,
      "loss": 0.0109,
      "step": 1792720
    },
    {
      "epoch": 2.933858329569333,
      "grad_norm": 0.30796125531196594,
      "learning_rate": 4.159511870482266e-06,
      "loss": 0.0104,
      "step": 1792740
    },
    {
      "epoch": 2.933891060007986,
      "grad_norm": 0.3614431619644165,
      "learning_rate": 4.159445978268748e-06,
      "loss": 0.0116,
      "step": 1792760
    },
    {
      "epoch": 2.9339237904466398,
      "grad_norm": 0.2685522437095642,
      "learning_rate": 4.159380086055231e-06,
      "loss": 0.0121,
      "step": 1792780
    },
    {
      "epoch": 2.933956520885293,
      "grad_norm": 0.12472915649414062,
      "learning_rate": 4.159314193841714e-06,
      "loss": 0.0113,
      "step": 1792800
    },
    {
      "epoch": 2.933989251323946,
      "grad_norm": 0.2042294144630432,
      "learning_rate": 4.1592483016281974e-06,
      "loss": 0.0132,
      "step": 1792820
    },
    {
      "epoch": 2.9340219817625997,
      "grad_norm": 0.24750065803527832,
      "learning_rate": 4.15918240941468e-06,
      "loss": 0.0107,
      "step": 1792840
    },
    {
      "epoch": 2.934054712201253,
      "grad_norm": 0.158747598528862,
      "learning_rate": 4.159116517201163e-06,
      "loss": 0.0123,
      "step": 1792860
    },
    {
      "epoch": 2.9340874426399064,
      "grad_norm": 0.2982930541038513,
      "learning_rate": 4.159050624987646e-06,
      "loss": 0.0129,
      "step": 1792880
    },
    {
      "epoch": 2.9341201730785595,
      "grad_norm": 0.4486228823661804,
      "learning_rate": 4.158984732774128e-06,
      "loss": 0.009,
      "step": 1792900
    },
    {
      "epoch": 2.934152903517213,
      "grad_norm": 0.1590697020292282,
      "learning_rate": 4.158918840560611e-06,
      "loss": 0.0188,
      "step": 1792920
    },
    {
      "epoch": 2.9341856339558663,
      "grad_norm": 0.49939751625061035,
      "learning_rate": 4.158852948347094e-06,
      "loss": 0.0149,
      "step": 1792940
    },
    {
      "epoch": 2.9342183643945194,
      "grad_norm": 0.4183630347251892,
      "learning_rate": 4.158787056133577e-06,
      "loss": 0.0091,
      "step": 1792960
    },
    {
      "epoch": 2.934251094833173,
      "grad_norm": 0.1109861433506012,
      "learning_rate": 4.158721163920059e-06,
      "loss": 0.0138,
      "step": 1792980
    },
    {
      "epoch": 2.934283825271826,
      "grad_norm": 0.44339579343795776,
      "learning_rate": 4.158655271706543e-06,
      "loss": 0.0162,
      "step": 1793000
    },
    {
      "epoch": 2.93431655571048,
      "grad_norm": 0.07887928932905197,
      "learning_rate": 4.158589379493026e-06,
      "loss": 0.0083,
      "step": 1793020
    },
    {
      "epoch": 2.934349286149133,
      "grad_norm": 0.40899136662483215,
      "learning_rate": 4.1585234872795084e-06,
      "loss": 0.0191,
      "step": 1793040
    },
    {
      "epoch": 2.9343820165877865,
      "grad_norm": 0.2684769034385681,
      "learning_rate": 4.158457595065991e-06,
      "loss": 0.0114,
      "step": 1793060
    },
    {
      "epoch": 2.9344147470264397,
      "grad_norm": 0.7351349592208862,
      "learning_rate": 4.158391702852475e-06,
      "loss": 0.0136,
      "step": 1793080
    },
    {
      "epoch": 2.934447477465093,
      "grad_norm": 0.5102216005325317,
      "learning_rate": 4.1583258106389575e-06,
      "loss": 0.0142,
      "step": 1793100
    },
    {
      "epoch": 2.9344802079037464,
      "grad_norm": 0.12682867050170898,
      "learning_rate": 4.15825991842544e-06,
      "loss": 0.0094,
      "step": 1793120
    },
    {
      "epoch": 2.9345129383423996,
      "grad_norm": 0.46170857548713684,
      "learning_rate": 4.158194026211923e-06,
      "loss": 0.0069,
      "step": 1793140
    },
    {
      "epoch": 2.934545668781053,
      "grad_norm": 0.0721806213259697,
      "learning_rate": 4.158128133998406e-06,
      "loss": 0.0104,
      "step": 1793160
    },
    {
      "epoch": 2.9345783992197063,
      "grad_norm": 0.25760364532470703,
      "learning_rate": 4.1580622417848885e-06,
      "loss": 0.0164,
      "step": 1793180
    },
    {
      "epoch": 2.93461112965836,
      "grad_norm": 0.5180652737617493,
      "learning_rate": 4.157996349571371e-06,
      "loss": 0.0136,
      "step": 1793200
    },
    {
      "epoch": 2.934643860097013,
      "grad_norm": 0.574203610420227,
      "learning_rate": 4.157930457357855e-06,
      "loss": 0.0127,
      "step": 1793220
    },
    {
      "epoch": 2.934676590535666,
      "grad_norm": 0.45305413007736206,
      "learning_rate": 4.1578645651443375e-06,
      "loss": 0.0079,
      "step": 1793240
    },
    {
      "epoch": 2.93470932097432,
      "grad_norm": 0.27458229660987854,
      "learning_rate": 4.15779867293082e-06,
      "loss": 0.0116,
      "step": 1793260
    },
    {
      "epoch": 2.934742051412973,
      "grad_norm": 0.3897574841976166,
      "learning_rate": 4.157732780717303e-06,
      "loss": 0.0118,
      "step": 1793280
    },
    {
      "epoch": 2.934774781851626,
      "grad_norm": 0.22348928451538086,
      "learning_rate": 4.157666888503786e-06,
      "loss": 0.0202,
      "step": 1793300
    },
    {
      "epoch": 2.9348075122902797,
      "grad_norm": 0.8236591815948486,
      "learning_rate": 4.1576009962902685e-06,
      "loss": 0.0178,
      "step": 1793320
    },
    {
      "epoch": 2.9348402427289333,
      "grad_norm": 0.37095943093299866,
      "learning_rate": 4.157535104076751e-06,
      "loss": 0.0136,
      "step": 1793340
    },
    {
      "epoch": 2.9348729731675864,
      "grad_norm": 0.5857434868812561,
      "learning_rate": 4.157469211863234e-06,
      "loss": 0.0077,
      "step": 1793360
    },
    {
      "epoch": 2.9349057036062396,
      "grad_norm": 0.2534545063972473,
      "learning_rate": 4.157403319649717e-06,
      "loss": 0.0132,
      "step": 1793380
    },
    {
      "epoch": 2.934938434044893,
      "grad_norm": 0.1996898651123047,
      "learning_rate": 4.1573374274362e-06,
      "loss": 0.0122,
      "step": 1793400
    },
    {
      "epoch": 2.9349711644835463,
      "grad_norm": 0.22925063967704773,
      "learning_rate": 4.157271535222683e-06,
      "loss": 0.0097,
      "step": 1793420
    },
    {
      "epoch": 2.9350038949221995,
      "grad_norm": 0.10975245386362076,
      "learning_rate": 4.157205643009166e-06,
      "loss": 0.014,
      "step": 1793440
    },
    {
      "epoch": 2.935036625360853,
      "grad_norm": 3.3810434341430664,
      "learning_rate": 4.157139750795649e-06,
      "loss": 0.0159,
      "step": 1793460
    },
    {
      "epoch": 2.9350693557995067,
      "grad_norm": 0.09896479547023773,
      "learning_rate": 4.157073858582132e-06,
      "loss": 0.007,
      "step": 1793480
    },
    {
      "epoch": 2.93510208623816,
      "grad_norm": 0.39200669527053833,
      "learning_rate": 4.157007966368615e-06,
      "loss": 0.0173,
      "step": 1793500
    },
    {
      "epoch": 2.935134816676813,
      "grad_norm": 0.19571593403816223,
      "learning_rate": 4.1569420741550976e-06,
      "loss": 0.0103,
      "step": 1793520
    },
    {
      "epoch": 2.9351675471154666,
      "grad_norm": 0.8258281946182251,
      "learning_rate": 4.15687618194158e-06,
      "loss": 0.0161,
      "step": 1793540
    },
    {
      "epoch": 2.9352002775541197,
      "grad_norm": 0.20672957599163055,
      "learning_rate": 4.156810289728063e-06,
      "loss": 0.0079,
      "step": 1793560
    },
    {
      "epoch": 2.935233007992773,
      "grad_norm": 0.2563517093658447,
      "learning_rate": 4.156744397514546e-06,
      "loss": 0.0122,
      "step": 1793580
    },
    {
      "epoch": 2.9352657384314265,
      "grad_norm": 1.2756116390228271,
      "learning_rate": 4.1566785053010285e-06,
      "loss": 0.0123,
      "step": 1793600
    },
    {
      "epoch": 2.93529846887008,
      "grad_norm": 0.4301312267780304,
      "learning_rate": 4.156612613087512e-06,
      "loss": 0.0118,
      "step": 1793620
    },
    {
      "epoch": 2.935331199308733,
      "grad_norm": 0.1673518270254135,
      "learning_rate": 4.156546720873995e-06,
      "loss": 0.0176,
      "step": 1793640
    },
    {
      "epoch": 2.9353639297473864,
      "grad_norm": 0.4636816382408142,
      "learning_rate": 4.156480828660478e-06,
      "loss": 0.0096,
      "step": 1793660
    },
    {
      "epoch": 2.93539666018604,
      "grad_norm": 0.09897036105394363,
      "learning_rate": 4.15641493644696e-06,
      "loss": 0.0121,
      "step": 1793680
    },
    {
      "epoch": 2.935429390624693,
      "grad_norm": 0.2702401280403137,
      "learning_rate": 4.156349044233443e-06,
      "loss": 0.014,
      "step": 1793700
    },
    {
      "epoch": 2.9354621210633463,
      "grad_norm": 0.3226566016674042,
      "learning_rate": 4.156283152019926e-06,
      "loss": 0.0166,
      "step": 1793720
    },
    {
      "epoch": 2.935494851502,
      "grad_norm": 0.2307121306657791,
      "learning_rate": 4.1562172598064086e-06,
      "loss": 0.0157,
      "step": 1793740
    },
    {
      "epoch": 2.9355275819406534,
      "grad_norm": 0.1268320381641388,
      "learning_rate": 4.156151367592891e-06,
      "loss": 0.0115,
      "step": 1793760
    },
    {
      "epoch": 2.9355603123793066,
      "grad_norm": 0.18755985796451569,
      "learning_rate": 4.156085475379375e-06,
      "loss": 0.0126,
      "step": 1793780
    },
    {
      "epoch": 2.9355930428179597,
      "grad_norm": 0.14388006925582886,
      "learning_rate": 4.156019583165858e-06,
      "loss": 0.0084,
      "step": 1793800
    },
    {
      "epoch": 2.9356257732566133,
      "grad_norm": 0.31699925661087036,
      "learning_rate": 4.15595369095234e-06,
      "loss": 0.0137,
      "step": 1793820
    },
    {
      "epoch": 2.9356585036952665,
      "grad_norm": 0.7956520318984985,
      "learning_rate": 4.155887798738824e-06,
      "loss": 0.0167,
      "step": 1793840
    },
    {
      "epoch": 2.9356912341339196,
      "grad_norm": 0.5171158909797668,
      "learning_rate": 4.155821906525307e-06,
      "loss": 0.0211,
      "step": 1793860
    },
    {
      "epoch": 2.9357239645725732,
      "grad_norm": 0.40595582127571106,
      "learning_rate": 4.1557560143117894e-06,
      "loss": 0.0112,
      "step": 1793880
    },
    {
      "epoch": 2.9357566950112264,
      "grad_norm": 0.2286863476037979,
      "learning_rate": 4.155690122098272e-06,
      "loss": 0.0105,
      "step": 1793900
    },
    {
      "epoch": 2.93578942544988,
      "grad_norm": 0.19726969301700592,
      "learning_rate": 4.155624229884755e-06,
      "loss": 0.0159,
      "step": 1793920
    },
    {
      "epoch": 2.935822155888533,
      "grad_norm": 0.31725332140922546,
      "learning_rate": 4.155558337671238e-06,
      "loss": 0.0113,
      "step": 1793940
    },
    {
      "epoch": 2.9358548863271867,
      "grad_norm": 0.10259443521499634,
      "learning_rate": 4.15549244545772e-06,
      "loss": 0.0147,
      "step": 1793960
    },
    {
      "epoch": 2.93588761676584,
      "grad_norm": 0.3890610337257385,
      "learning_rate": 4.155426553244203e-06,
      "loss": 0.0116,
      "step": 1793980
    },
    {
      "epoch": 2.935920347204493,
      "grad_norm": 0.8244636058807373,
      "learning_rate": 4.155360661030686e-06,
      "loss": 0.0126,
      "step": 1794000
    },
    {
      "epoch": 2.9359530776431466,
      "grad_norm": 1.3403691053390503,
      "learning_rate": 4.1552947688171694e-06,
      "loss": 0.0153,
      "step": 1794020
    },
    {
      "epoch": 2.9359858080817998,
      "grad_norm": 0.15468546748161316,
      "learning_rate": 4.155228876603652e-06,
      "loss": 0.0143,
      "step": 1794040
    },
    {
      "epoch": 2.9360185385204534,
      "grad_norm": 0.5315791964530945,
      "learning_rate": 4.155162984390135e-06,
      "loss": 0.011,
      "step": 1794060
    },
    {
      "epoch": 2.9360512689591065,
      "grad_norm": 0.2626805603504181,
      "learning_rate": 4.155097092176618e-06,
      "loss": 0.0088,
      "step": 1794080
    },
    {
      "epoch": 2.93608399939776,
      "grad_norm": 0.09467741847038269,
      "learning_rate": 4.1550311999631e-06,
      "loss": 0.0149,
      "step": 1794100
    },
    {
      "epoch": 2.9361167298364133,
      "grad_norm": 0.3007325530052185,
      "learning_rate": 4.154965307749584e-06,
      "loss": 0.0194,
      "step": 1794120
    },
    {
      "epoch": 2.9361494602750664,
      "grad_norm": 0.1397840976715088,
      "learning_rate": 4.154899415536067e-06,
      "loss": 0.0115,
      "step": 1794140
    },
    {
      "epoch": 2.93618219071372,
      "grad_norm": 0.09610418230295181,
      "learning_rate": 4.1548335233225495e-06,
      "loss": 0.0066,
      "step": 1794160
    },
    {
      "epoch": 2.936214921152373,
      "grad_norm": 0.198730006814003,
      "learning_rate": 4.154767631109032e-06,
      "loss": 0.0105,
      "step": 1794180
    },
    {
      "epoch": 2.9362476515910267,
      "grad_norm": 0.12148474156856537,
      "learning_rate": 4.154701738895515e-06,
      "loss": 0.0143,
      "step": 1794200
    },
    {
      "epoch": 2.93628038202968,
      "grad_norm": 0.07739242166280746,
      "learning_rate": 4.154635846681998e-06,
      "loss": 0.0109,
      "step": 1794220
    },
    {
      "epoch": 2.9363131124683335,
      "grad_norm": 1.0412187576293945,
      "learning_rate": 4.154569954468481e-06,
      "loss": 0.0114,
      "step": 1794240
    },
    {
      "epoch": 2.9363458429069866,
      "grad_norm": 0.1752597689628601,
      "learning_rate": 4.154504062254964e-06,
      "loss": 0.0129,
      "step": 1794260
    },
    {
      "epoch": 2.93637857334564,
      "grad_norm": 0.7274672389030457,
      "learning_rate": 4.154438170041447e-06,
      "loss": 0.0203,
      "step": 1794280
    },
    {
      "epoch": 2.9364113037842934,
      "grad_norm": 0.18024243414402008,
      "learning_rate": 4.1543722778279295e-06,
      "loss": 0.0115,
      "step": 1794300
    },
    {
      "epoch": 2.9364440342229465,
      "grad_norm": 1.1044201850891113,
      "learning_rate": 4.154306385614412e-06,
      "loss": 0.017,
      "step": 1794320
    },
    {
      "epoch": 2.9364767646616,
      "grad_norm": 0.35175350308418274,
      "learning_rate": 4.154240493400895e-06,
      "loss": 0.0283,
      "step": 1794340
    },
    {
      "epoch": 2.9365094951002533,
      "grad_norm": 0.30623793601989746,
      "learning_rate": 4.154174601187378e-06,
      "loss": 0.0142,
      "step": 1794360
    },
    {
      "epoch": 2.936542225538907,
      "grad_norm": 0.31726643443107605,
      "learning_rate": 4.1541087089738605e-06,
      "loss": 0.0118,
      "step": 1794380
    },
    {
      "epoch": 2.93657495597756,
      "grad_norm": 0.7653710842132568,
      "learning_rate": 4.154042816760343e-06,
      "loss": 0.0094,
      "step": 1794400
    },
    {
      "epoch": 2.936607686416213,
      "grad_norm": 0.06857823580503464,
      "learning_rate": 4.153976924546827e-06,
      "loss": 0.0112,
      "step": 1794420
    },
    {
      "epoch": 2.9366404168548668,
      "grad_norm": 0.501950740814209,
      "learning_rate": 4.1539110323333095e-06,
      "loss": 0.0111,
      "step": 1794440
    },
    {
      "epoch": 2.93667314729352,
      "grad_norm": 0.35957321524620056,
      "learning_rate": 4.153845140119792e-06,
      "loss": 0.0108,
      "step": 1794460
    },
    {
      "epoch": 2.9367058777321735,
      "grad_norm": 0.27612271904945374,
      "learning_rate": 4.153779247906276e-06,
      "loss": 0.0191,
      "step": 1794480
    },
    {
      "epoch": 2.9367386081708267,
      "grad_norm": 0.337584912776947,
      "learning_rate": 4.153713355692759e-06,
      "loss": 0.0128,
      "step": 1794500
    },
    {
      "epoch": 2.9367713386094803,
      "grad_norm": 0.018508097156882286,
      "learning_rate": 4.153647463479241e-06,
      "loss": 0.0144,
      "step": 1794520
    },
    {
      "epoch": 2.9368040690481334,
      "grad_norm": 0.22066469490528107,
      "learning_rate": 4.153581571265724e-06,
      "loss": 0.0112,
      "step": 1794540
    },
    {
      "epoch": 2.9368367994867866,
      "grad_norm": 0.12441808730363846,
      "learning_rate": 4.153515679052207e-06,
      "loss": 0.0099,
      "step": 1794560
    },
    {
      "epoch": 2.93686952992544,
      "grad_norm": 0.09620369225740433,
      "learning_rate": 4.1534497868386896e-06,
      "loss": 0.0124,
      "step": 1794580
    },
    {
      "epoch": 2.9369022603640933,
      "grad_norm": 0.0814894586801529,
      "learning_rate": 4.153383894625172e-06,
      "loss": 0.0142,
      "step": 1794600
    },
    {
      "epoch": 2.936934990802747,
      "grad_norm": 1.30929696559906,
      "learning_rate": 4.153318002411655e-06,
      "loss": 0.0153,
      "step": 1794620
    },
    {
      "epoch": 2.9369677212414,
      "grad_norm": 1.4774690866470337,
      "learning_rate": 4.153252110198139e-06,
      "loss": 0.0146,
      "step": 1794640
    },
    {
      "epoch": 2.9370004516800536,
      "grad_norm": 0.1226736307144165,
      "learning_rate": 4.153186217984621e-06,
      "loss": 0.0116,
      "step": 1794660
    },
    {
      "epoch": 2.937033182118707,
      "grad_norm": 0.5077716708183289,
      "learning_rate": 4.153120325771104e-06,
      "loss": 0.0125,
      "step": 1794680
    },
    {
      "epoch": 2.93706591255736,
      "grad_norm": 0.4241155683994293,
      "learning_rate": 4.153054433557587e-06,
      "loss": 0.0179,
      "step": 1794700
    },
    {
      "epoch": 2.9370986429960135,
      "grad_norm": 0.10467258840799332,
      "learning_rate": 4.1529885413440696e-06,
      "loss": 0.0119,
      "step": 1794720
    },
    {
      "epoch": 2.9371313734346667,
      "grad_norm": 0.32514727115631104,
      "learning_rate": 4.152922649130552e-06,
      "loss": 0.0138,
      "step": 1794740
    },
    {
      "epoch": 2.9371641038733203,
      "grad_norm": 0.09529360383749008,
      "learning_rate": 4.152856756917035e-06,
      "loss": 0.0146,
      "step": 1794760
    },
    {
      "epoch": 2.9371968343119734,
      "grad_norm": 0.380948543548584,
      "learning_rate": 4.152790864703518e-06,
      "loss": 0.0092,
      "step": 1794780
    },
    {
      "epoch": 2.937229564750627,
      "grad_norm": 0.17257632315158844,
      "learning_rate": 4.1527249724900005e-06,
      "loss": 0.0118,
      "step": 1794800
    },
    {
      "epoch": 2.93726229518928,
      "grad_norm": 0.9817175269126892,
      "learning_rate": 4.152659080276484e-06,
      "loss": 0.0097,
      "step": 1794820
    },
    {
      "epoch": 2.9372950256279333,
      "grad_norm": 0.42618340253829956,
      "learning_rate": 4.152593188062967e-06,
      "loss": 0.0172,
      "step": 1794840
    },
    {
      "epoch": 2.937327756066587,
      "grad_norm": 0.40770140290260315,
      "learning_rate": 4.15252729584945e-06,
      "loss": 0.0114,
      "step": 1794860
    },
    {
      "epoch": 2.93736048650524,
      "grad_norm": 0.20202259719371796,
      "learning_rate": 4.152461403635933e-06,
      "loss": 0.0106,
      "step": 1794880
    },
    {
      "epoch": 2.937393216943893,
      "grad_norm": 0.3945249319076538,
      "learning_rate": 4.152395511422416e-06,
      "loss": 0.013,
      "step": 1794900
    },
    {
      "epoch": 2.937425947382547,
      "grad_norm": 0.5237337350845337,
      "learning_rate": 4.152329619208899e-06,
      "loss": 0.0125,
      "step": 1794920
    },
    {
      "epoch": 2.9374586778212004,
      "grad_norm": 0.7109944820404053,
      "learning_rate": 4.152263726995381e-06,
      "loss": 0.0087,
      "step": 1794940
    },
    {
      "epoch": 2.9374914082598536,
      "grad_norm": 0.25725215673446655,
      "learning_rate": 4.152197834781864e-06,
      "loss": 0.0118,
      "step": 1794960
    },
    {
      "epoch": 2.9375241386985067,
      "grad_norm": 0.1597049981355667,
      "learning_rate": 4.152131942568347e-06,
      "loss": 0.0162,
      "step": 1794980
    },
    {
      "epoch": 2.9375568691371603,
      "grad_norm": 0.9110780358314514,
      "learning_rate": 4.15206605035483e-06,
      "loss": 0.0171,
      "step": 1795000
    },
    {
      "epoch": 2.9375895995758134,
      "grad_norm": 0.7280508875846863,
      "learning_rate": 4.152000158141312e-06,
      "loss": 0.014,
      "step": 1795020
    },
    {
      "epoch": 2.9376223300144666,
      "grad_norm": 0.2721408009529114,
      "learning_rate": 4.151934265927796e-06,
      "loss": 0.0097,
      "step": 1795040
    },
    {
      "epoch": 2.93765506045312,
      "grad_norm": 0.414004385471344,
      "learning_rate": 4.151868373714279e-06,
      "loss": 0.0115,
      "step": 1795060
    },
    {
      "epoch": 2.937687790891774,
      "grad_norm": 4.711311340332031,
      "learning_rate": 4.1518024815007614e-06,
      "loss": 0.0191,
      "step": 1795080
    },
    {
      "epoch": 2.937720521330427,
      "grad_norm": 0.13999558985233307,
      "learning_rate": 4.151736589287244e-06,
      "loss": 0.0107,
      "step": 1795100
    },
    {
      "epoch": 2.93775325176908,
      "grad_norm": 0.40195462107658386,
      "learning_rate": 4.151670697073727e-06,
      "loss": 0.0151,
      "step": 1795120
    },
    {
      "epoch": 2.9377859822077337,
      "grad_norm": 0.9771286249160767,
      "learning_rate": 4.15160480486021e-06,
      "loss": 0.0128,
      "step": 1795140
    },
    {
      "epoch": 2.937818712646387,
      "grad_norm": 0.32351845502853394,
      "learning_rate": 4.151538912646692e-06,
      "loss": 0.0086,
      "step": 1795160
    },
    {
      "epoch": 2.93785144308504,
      "grad_norm": 0.2653174102306366,
      "learning_rate": 4.151473020433176e-06,
      "loss": 0.0088,
      "step": 1795180
    },
    {
      "epoch": 2.9378841735236936,
      "grad_norm": 0.3517560362815857,
      "learning_rate": 4.151407128219659e-06,
      "loss": 0.0174,
      "step": 1795200
    },
    {
      "epoch": 2.937916903962347,
      "grad_norm": 0.3956294059753418,
      "learning_rate": 4.1513412360061415e-06,
      "loss": 0.0168,
      "step": 1795220
    },
    {
      "epoch": 2.9379496344010003,
      "grad_norm": 0.07585802674293518,
      "learning_rate": 4.151275343792624e-06,
      "loss": 0.0086,
      "step": 1795240
    },
    {
      "epoch": 2.9379823648396535,
      "grad_norm": 0.2462553083896637,
      "learning_rate": 4.151209451579108e-06,
      "loss": 0.0109,
      "step": 1795260
    },
    {
      "epoch": 2.938015095278307,
      "grad_norm": 0.2007942795753479,
      "learning_rate": 4.1511435593655905e-06,
      "loss": 0.011,
      "step": 1795280
    },
    {
      "epoch": 2.93804782571696,
      "grad_norm": 0.2677648961544037,
      "learning_rate": 4.151077667152073e-06,
      "loss": 0.0148,
      "step": 1795300
    },
    {
      "epoch": 2.9380805561556134,
      "grad_norm": 0.8152246475219727,
      "learning_rate": 4.151011774938556e-06,
      "loss": 0.0141,
      "step": 1795320
    },
    {
      "epoch": 2.938113286594267,
      "grad_norm": 0.3293890655040741,
      "learning_rate": 4.150945882725039e-06,
      "loss": 0.0136,
      "step": 1795340
    },
    {
      "epoch": 2.93814601703292,
      "grad_norm": 0.6902227401733398,
      "learning_rate": 4.1508799905115215e-06,
      "loss": 0.0176,
      "step": 1795360
    },
    {
      "epoch": 2.9381787474715737,
      "grad_norm": 0.09998518228530884,
      "learning_rate": 4.150814098298004e-06,
      "loss": 0.0089,
      "step": 1795380
    },
    {
      "epoch": 2.938211477910227,
      "grad_norm": 0.19322344660758972,
      "learning_rate": 4.150748206084487e-06,
      "loss": 0.0147,
      "step": 1795400
    },
    {
      "epoch": 2.9382442083488804,
      "grad_norm": 0.4280693531036377,
      "learning_rate": 4.15068231387097e-06,
      "loss": 0.0118,
      "step": 1795420
    },
    {
      "epoch": 2.9382769387875336,
      "grad_norm": 0.4463633894920349,
      "learning_rate": 4.150616421657453e-06,
      "loss": 0.0143,
      "step": 1795440
    },
    {
      "epoch": 2.9383096692261867,
      "grad_norm": 0.15645954012870789,
      "learning_rate": 4.150550529443936e-06,
      "loss": 0.0096,
      "step": 1795460
    },
    {
      "epoch": 2.9383423996648403,
      "grad_norm": 0.16980940103530884,
      "learning_rate": 4.150484637230419e-06,
      "loss": 0.0104,
      "step": 1795480
    },
    {
      "epoch": 2.9383751301034935,
      "grad_norm": 0.2059805691242218,
      "learning_rate": 4.1504187450169015e-06,
      "loss": 0.0118,
      "step": 1795500
    },
    {
      "epoch": 2.938407860542147,
      "grad_norm": 0.08164720237255096,
      "learning_rate": 4.150352852803384e-06,
      "loss": 0.0125,
      "step": 1795520
    },
    {
      "epoch": 2.9384405909808002,
      "grad_norm": 0.134370356798172,
      "learning_rate": 4.150286960589868e-06,
      "loss": 0.0095,
      "step": 1795540
    },
    {
      "epoch": 2.938473321419454,
      "grad_norm": 0.7160460948944092,
      "learning_rate": 4.1502210683763506e-06,
      "loss": 0.0157,
      "step": 1795560
    },
    {
      "epoch": 2.938506051858107,
      "grad_norm": 0.6855741143226624,
      "learning_rate": 4.150155176162833e-06,
      "loss": 0.0185,
      "step": 1795580
    },
    {
      "epoch": 2.93853878229676,
      "grad_norm": 0.7055765390396118,
      "learning_rate": 4.150089283949316e-06,
      "loss": 0.0163,
      "step": 1795600
    },
    {
      "epoch": 2.9385715127354137,
      "grad_norm": 0.24456779658794403,
      "learning_rate": 4.150023391735799e-06,
      "loss": 0.0109,
      "step": 1795620
    },
    {
      "epoch": 2.938604243174067,
      "grad_norm": 0.16009297966957092,
      "learning_rate": 4.1499574995222815e-06,
      "loss": 0.0131,
      "step": 1795640
    },
    {
      "epoch": 2.9386369736127205,
      "grad_norm": 0.2145516574382782,
      "learning_rate": 4.149891607308765e-06,
      "loss": 0.0103,
      "step": 1795660
    },
    {
      "epoch": 2.9386697040513736,
      "grad_norm": 0.23908597230911255,
      "learning_rate": 4.149825715095248e-06,
      "loss": 0.0176,
      "step": 1795680
    },
    {
      "epoch": 2.938702434490027,
      "grad_norm": 0.2868030369281769,
      "learning_rate": 4.149759822881731e-06,
      "loss": 0.0108,
      "step": 1795700
    },
    {
      "epoch": 2.9387351649286804,
      "grad_norm": 0.08880985528230667,
      "learning_rate": 4.149693930668213e-06,
      "loss": 0.0082,
      "step": 1795720
    },
    {
      "epoch": 2.9387678953673335,
      "grad_norm": 0.2918877899646759,
      "learning_rate": 4.149628038454696e-06,
      "loss": 0.0133,
      "step": 1795740
    },
    {
      "epoch": 2.938800625805987,
      "grad_norm": 0.2601790726184845,
      "learning_rate": 4.149562146241179e-06,
      "loss": 0.0167,
      "step": 1795760
    },
    {
      "epoch": 2.9388333562446403,
      "grad_norm": 0.9147237539291382,
      "learning_rate": 4.1494962540276616e-06,
      "loss": 0.0178,
      "step": 1795780
    },
    {
      "epoch": 2.938866086683294,
      "grad_norm": 0.16872721910476685,
      "learning_rate": 4.149430361814144e-06,
      "loss": 0.0122,
      "step": 1795800
    },
    {
      "epoch": 2.938898817121947,
      "grad_norm": 0.23123160004615784,
      "learning_rate": 4.149364469600627e-06,
      "loss": 0.0126,
      "step": 1795820
    },
    {
      "epoch": 2.9389315475606006,
      "grad_norm": 0.09496820718050003,
      "learning_rate": 4.149298577387111e-06,
      "loss": 0.0129,
      "step": 1795840
    },
    {
      "epoch": 2.9389642779992537,
      "grad_norm": 0.27778810262680054,
      "learning_rate": 4.149232685173593e-06,
      "loss": 0.0137,
      "step": 1795860
    },
    {
      "epoch": 2.938997008437907,
      "grad_norm": 0.6337826251983643,
      "learning_rate": 4.149166792960076e-06,
      "loss": 0.0178,
      "step": 1795880
    },
    {
      "epoch": 2.9390297388765605,
      "grad_norm": 0.30957719683647156,
      "learning_rate": 4.14910090074656e-06,
      "loss": 0.0163,
      "step": 1795900
    },
    {
      "epoch": 2.9390624693152136,
      "grad_norm": 0.1688329130411148,
      "learning_rate": 4.1490350085330424e-06,
      "loss": 0.0106,
      "step": 1795920
    },
    {
      "epoch": 2.9390951997538672,
      "grad_norm": 0.31264224648475647,
      "learning_rate": 4.148969116319525e-06,
      "loss": 0.0122,
      "step": 1795940
    },
    {
      "epoch": 2.9391279301925204,
      "grad_norm": 0.08240769058465958,
      "learning_rate": 4.148903224106008e-06,
      "loss": 0.0152,
      "step": 1795960
    },
    {
      "epoch": 2.939160660631174,
      "grad_norm": 0.3385780155658722,
      "learning_rate": 4.148837331892491e-06,
      "loss": 0.0161,
      "step": 1795980
    },
    {
      "epoch": 2.939193391069827,
      "grad_norm": 0.12743741273880005,
      "learning_rate": 4.148771439678973e-06,
      "loss": 0.0096,
      "step": 1796000
    },
    {
      "epoch": 2.9392261215084803,
      "grad_norm": 0.23325973749160767,
      "learning_rate": 4.148705547465456e-06,
      "loss": 0.0152,
      "step": 1796020
    },
    {
      "epoch": 2.939258851947134,
      "grad_norm": 0.10066097229719162,
      "learning_rate": 4.148639655251939e-06,
      "loss": 0.0126,
      "step": 1796040
    },
    {
      "epoch": 2.939291582385787,
      "grad_norm": 0.31395021080970764,
      "learning_rate": 4.1485737630384225e-06,
      "loss": 0.0122,
      "step": 1796060
    },
    {
      "epoch": 2.9393243128244406,
      "grad_norm": 0.2562360167503357,
      "learning_rate": 4.148507870824905e-06,
      "loss": 0.0096,
      "step": 1796080
    },
    {
      "epoch": 2.9393570432630938,
      "grad_norm": 0.4799705743789673,
      "learning_rate": 4.148441978611388e-06,
      "loss": 0.008,
      "step": 1796100
    },
    {
      "epoch": 2.9393897737017474,
      "grad_norm": 0.6548044681549072,
      "learning_rate": 4.148376086397871e-06,
      "loss": 0.0208,
      "step": 1796120
    },
    {
      "epoch": 2.9394225041404005,
      "grad_norm": 0.4713805615901947,
      "learning_rate": 4.148310194184353e-06,
      "loss": 0.0243,
      "step": 1796140
    },
    {
      "epoch": 2.9394552345790537,
      "grad_norm": 0.30325260758399963,
      "learning_rate": 4.148244301970836e-06,
      "loss": 0.0104,
      "step": 1796160
    },
    {
      "epoch": 2.9394879650177073,
      "grad_norm": 0.2094404101371765,
      "learning_rate": 4.148178409757319e-06,
      "loss": 0.0095,
      "step": 1796180
    },
    {
      "epoch": 2.9395206954563604,
      "grad_norm": 0.11454826593399048,
      "learning_rate": 4.148112517543802e-06,
      "loss": 0.0142,
      "step": 1796200
    },
    {
      "epoch": 2.939553425895014,
      "grad_norm": 0.4535897672176361,
      "learning_rate": 4.148046625330284e-06,
      "loss": 0.0124,
      "step": 1796220
    },
    {
      "epoch": 2.939586156333667,
      "grad_norm": 0.12504930794239044,
      "learning_rate": 4.147980733116768e-06,
      "loss": 0.0128,
      "step": 1796240
    },
    {
      "epoch": 2.9396188867723207,
      "grad_norm": 0.3085273802280426,
      "learning_rate": 4.147914840903251e-06,
      "loss": 0.0151,
      "step": 1796260
    },
    {
      "epoch": 2.939651617210974,
      "grad_norm": 0.26985153555870056,
      "learning_rate": 4.147848948689734e-06,
      "loss": 0.0089,
      "step": 1796280
    },
    {
      "epoch": 2.939684347649627,
      "grad_norm": 0.6224632859230042,
      "learning_rate": 4.147783056476217e-06,
      "loss": 0.0143,
      "step": 1796300
    },
    {
      "epoch": 2.9397170780882806,
      "grad_norm": 0.2640511393547058,
      "learning_rate": 4.1477171642627e-06,
      "loss": 0.0148,
      "step": 1796320
    },
    {
      "epoch": 2.939749808526934,
      "grad_norm": 0.21524101495742798,
      "learning_rate": 4.1476512720491825e-06,
      "loss": 0.0177,
      "step": 1796340
    },
    {
      "epoch": 2.939782538965587,
      "grad_norm": 0.10940110683441162,
      "learning_rate": 4.147585379835665e-06,
      "loss": 0.0152,
      "step": 1796360
    },
    {
      "epoch": 2.9398152694042405,
      "grad_norm": 0.4711238443851471,
      "learning_rate": 4.147519487622148e-06,
      "loss": 0.0146,
      "step": 1796380
    },
    {
      "epoch": 2.939847999842894,
      "grad_norm": 0.4015573263168335,
      "learning_rate": 4.147453595408631e-06,
      "loss": 0.0107,
      "step": 1796400
    },
    {
      "epoch": 2.9398807302815473,
      "grad_norm": 0.20082619786262512,
      "learning_rate": 4.1473877031951135e-06,
      "loss": 0.0118,
      "step": 1796420
    },
    {
      "epoch": 2.9399134607202004,
      "grad_norm": 0.764236569404602,
      "learning_rate": 4.147321810981596e-06,
      "loss": 0.0134,
      "step": 1796440
    },
    {
      "epoch": 2.939946191158854,
      "grad_norm": 0.398910254240036,
      "learning_rate": 4.14725591876808e-06,
      "loss": 0.0105,
      "step": 1796460
    },
    {
      "epoch": 2.939978921597507,
      "grad_norm": 0.12144306302070618,
      "learning_rate": 4.1471900265545625e-06,
      "loss": 0.0163,
      "step": 1796480
    },
    {
      "epoch": 2.9400116520361603,
      "grad_norm": 0.2360435426235199,
      "learning_rate": 4.147124134341045e-06,
      "loss": 0.0122,
      "step": 1796500
    },
    {
      "epoch": 2.940044382474814,
      "grad_norm": 0.1710810363292694,
      "learning_rate": 4.147058242127528e-06,
      "loss": 0.0142,
      "step": 1796520
    },
    {
      "epoch": 2.9400771129134675,
      "grad_norm": 0.31157657504081726,
      "learning_rate": 4.146992349914011e-06,
      "loss": 0.0138,
      "step": 1796540
    },
    {
      "epoch": 2.9401098433521207,
      "grad_norm": 0.11470112949609756,
      "learning_rate": 4.1469264577004935e-06,
      "loss": 0.01,
      "step": 1796560
    },
    {
      "epoch": 2.940142573790774,
      "grad_norm": 0.23805586993694305,
      "learning_rate": 4.146860565486977e-06,
      "loss": 0.0183,
      "step": 1796580
    },
    {
      "epoch": 2.9401753042294274,
      "grad_norm": 0.17505627870559692,
      "learning_rate": 4.14679467327346e-06,
      "loss": 0.0104,
      "step": 1796600
    },
    {
      "epoch": 2.9402080346680806,
      "grad_norm": 0.20906810462474823,
      "learning_rate": 4.1467287810599426e-06,
      "loss": 0.0111,
      "step": 1796620
    },
    {
      "epoch": 2.9402407651067337,
      "grad_norm": 0.39184343814849854,
      "learning_rate": 4.146662888846425e-06,
      "loss": 0.0156,
      "step": 1796640
    },
    {
      "epoch": 2.9402734955453873,
      "grad_norm": 0.1207781434059143,
      "learning_rate": 4.146596996632908e-06,
      "loss": 0.0128,
      "step": 1796660
    },
    {
      "epoch": 2.940306225984041,
      "grad_norm": 0.6166145205497742,
      "learning_rate": 4.146531104419392e-06,
      "loss": 0.0099,
      "step": 1796680
    },
    {
      "epoch": 2.940338956422694,
      "grad_norm": 0.21454979479312897,
      "learning_rate": 4.146465212205874e-06,
      "loss": 0.0107,
      "step": 1796700
    },
    {
      "epoch": 2.940371686861347,
      "grad_norm": 0.4567776024341583,
      "learning_rate": 4.146399319992357e-06,
      "loss": 0.0143,
      "step": 1796720
    },
    {
      "epoch": 2.940404417300001,
      "grad_norm": 0.34988123178482056,
      "learning_rate": 4.14633342777884e-06,
      "loss": 0.011,
      "step": 1796740
    },
    {
      "epoch": 2.940437147738654,
      "grad_norm": 0.33749809861183167,
      "learning_rate": 4.146267535565323e-06,
      "loss": 0.0203,
      "step": 1796760
    },
    {
      "epoch": 2.940469878177307,
      "grad_norm": 1.5797791481018066,
      "learning_rate": 4.146201643351805e-06,
      "loss": 0.0147,
      "step": 1796780
    },
    {
      "epoch": 2.9405026086159607,
      "grad_norm": 0.44422200322151184,
      "learning_rate": 4.146135751138288e-06,
      "loss": 0.0186,
      "step": 1796800
    },
    {
      "epoch": 2.9405353390546143,
      "grad_norm": 0.8100081086158752,
      "learning_rate": 4.146069858924771e-06,
      "loss": 0.0125,
      "step": 1796820
    },
    {
      "epoch": 2.9405680694932674,
      "grad_norm": 1.1686294078826904,
      "learning_rate": 4.1460039667112535e-06,
      "loss": 0.0115,
      "step": 1796840
    },
    {
      "epoch": 2.9406007999319206,
      "grad_norm": 0.2631499171257019,
      "learning_rate": 4.145938074497737e-06,
      "loss": 0.0109,
      "step": 1796860
    },
    {
      "epoch": 2.940633530370574,
      "grad_norm": 0.10875628888607025,
      "learning_rate": 4.14587218228422e-06,
      "loss": 0.0129,
      "step": 1796880
    },
    {
      "epoch": 2.9406662608092273,
      "grad_norm": 0.36379504203796387,
      "learning_rate": 4.145806290070703e-06,
      "loss": 0.0171,
      "step": 1796900
    },
    {
      "epoch": 2.9406989912478805,
      "grad_norm": 0.3046330511569977,
      "learning_rate": 4.145740397857185e-06,
      "loss": 0.0174,
      "step": 1796920
    },
    {
      "epoch": 2.940731721686534,
      "grad_norm": 0.30628129839897156,
      "learning_rate": 4.145674505643669e-06,
      "loss": 0.0149,
      "step": 1796940
    },
    {
      "epoch": 2.9407644521251872,
      "grad_norm": 0.09411036968231201,
      "learning_rate": 4.145608613430152e-06,
      "loss": 0.0173,
      "step": 1796960
    },
    {
      "epoch": 2.940797182563841,
      "grad_norm": 0.4947623908519745,
      "learning_rate": 4.145542721216634e-06,
      "loss": 0.0113,
      "step": 1796980
    },
    {
      "epoch": 2.940829913002494,
      "grad_norm": 0.250815749168396,
      "learning_rate": 4.145476829003117e-06,
      "loss": 0.0105,
      "step": 1797000
    },
    {
      "epoch": 2.9408626434411476,
      "grad_norm": 0.19321386516094208,
      "learning_rate": 4.1454109367896e-06,
      "loss": 0.0086,
      "step": 1797020
    },
    {
      "epoch": 2.9408953738798007,
      "grad_norm": 0.14684510231018066,
      "learning_rate": 4.145345044576083e-06,
      "loss": 0.0162,
      "step": 1797040
    },
    {
      "epoch": 2.940928104318454,
      "grad_norm": 0.08022665232419968,
      "learning_rate": 4.145279152362565e-06,
      "loss": 0.0136,
      "step": 1797060
    },
    {
      "epoch": 2.9409608347571075,
      "grad_norm": 0.1406579613685608,
      "learning_rate": 4.145213260149049e-06,
      "loss": 0.0128,
      "step": 1797080
    },
    {
      "epoch": 2.9409935651957606,
      "grad_norm": 0.3672710955142975,
      "learning_rate": 4.145147367935532e-06,
      "loss": 0.0106,
      "step": 1797100
    },
    {
      "epoch": 2.941026295634414,
      "grad_norm": 0.31608492136001587,
      "learning_rate": 4.1450814757220144e-06,
      "loss": 0.007,
      "step": 1797120
    },
    {
      "epoch": 2.9410590260730674,
      "grad_norm": 0.23156702518463135,
      "learning_rate": 4.145015583508497e-06,
      "loss": 0.012,
      "step": 1797140
    },
    {
      "epoch": 2.941091756511721,
      "grad_norm": 0.29758569598197937,
      "learning_rate": 4.14494969129498e-06,
      "loss": 0.0133,
      "step": 1797160
    },
    {
      "epoch": 2.941124486950374,
      "grad_norm": 0.13274067640304565,
      "learning_rate": 4.144883799081463e-06,
      "loss": 0.0104,
      "step": 1797180
    },
    {
      "epoch": 2.9411572173890272,
      "grad_norm": 0.24803714454174042,
      "learning_rate": 4.144817906867945e-06,
      "loss": 0.0101,
      "step": 1797200
    },
    {
      "epoch": 2.941189947827681,
      "grad_norm": 0.4321816563606262,
      "learning_rate": 4.144752014654428e-06,
      "loss": 0.0099,
      "step": 1797220
    },
    {
      "epoch": 2.941222678266334,
      "grad_norm": 0.14095932245254517,
      "learning_rate": 4.144686122440911e-06,
      "loss": 0.0147,
      "step": 1797240
    },
    {
      "epoch": 2.9412554087049876,
      "grad_norm": 0.21369118988513947,
      "learning_rate": 4.1446202302273945e-06,
      "loss": 0.0105,
      "step": 1797260
    },
    {
      "epoch": 2.9412881391436407,
      "grad_norm": 0.12182405591011047,
      "learning_rate": 4.144554338013877e-06,
      "loss": 0.0107,
      "step": 1797280
    },
    {
      "epoch": 2.9413208695822943,
      "grad_norm": 0.09908361732959747,
      "learning_rate": 4.14448844580036e-06,
      "loss": 0.0078,
      "step": 1797300
    },
    {
      "epoch": 2.9413536000209475,
      "grad_norm": 0.15831473469734192,
      "learning_rate": 4.1444225535868435e-06,
      "loss": 0.017,
      "step": 1797320
    },
    {
      "epoch": 2.9413863304596006,
      "grad_norm": 0.4868263602256775,
      "learning_rate": 4.144356661373326e-06,
      "loss": 0.0121,
      "step": 1797340
    },
    {
      "epoch": 2.9414190608982542,
      "grad_norm": 0.3742178678512573,
      "learning_rate": 4.144290769159809e-06,
      "loss": 0.0122,
      "step": 1797360
    },
    {
      "epoch": 2.9414517913369074,
      "grad_norm": 0.2534238398075104,
      "learning_rate": 4.144224876946292e-06,
      "loss": 0.0133,
      "step": 1797380
    },
    {
      "epoch": 2.941484521775561,
      "grad_norm": 0.14388394355773926,
      "learning_rate": 4.1441589847327745e-06,
      "loss": 0.0077,
      "step": 1797400
    },
    {
      "epoch": 2.941517252214214,
      "grad_norm": 0.15676431357860565,
      "learning_rate": 4.144093092519257e-06,
      "loss": 0.0171,
      "step": 1797420
    },
    {
      "epoch": 2.9415499826528677,
      "grad_norm": 0.7122833728790283,
      "learning_rate": 4.14402720030574e-06,
      "loss": 0.0145,
      "step": 1797440
    },
    {
      "epoch": 2.941582713091521,
      "grad_norm": 0.08077777177095413,
      "learning_rate": 4.143961308092223e-06,
      "loss": 0.0113,
      "step": 1797460
    },
    {
      "epoch": 2.941615443530174,
      "grad_norm": 0.6761759519577026,
      "learning_rate": 4.143895415878706e-06,
      "loss": 0.0128,
      "step": 1797480
    },
    {
      "epoch": 2.9416481739688276,
      "grad_norm": 0.19525715708732605,
      "learning_rate": 4.143829523665189e-06,
      "loss": 0.011,
      "step": 1797500
    },
    {
      "epoch": 2.9416809044074808,
      "grad_norm": 0.5931494832038879,
      "learning_rate": 4.143763631451672e-06,
      "loss": 0.0145,
      "step": 1797520
    },
    {
      "epoch": 2.9417136348461344,
      "grad_norm": 0.7423866987228394,
      "learning_rate": 4.1436977392381545e-06,
      "loss": 0.019,
      "step": 1797540
    },
    {
      "epoch": 2.9417463652847875,
      "grad_norm": 0.6674707531929016,
      "learning_rate": 4.143631847024637e-06,
      "loss": 0.0141,
      "step": 1797560
    },
    {
      "epoch": 2.941779095723441,
      "grad_norm": 0.23638254404067993,
      "learning_rate": 4.14356595481112e-06,
      "loss": 0.0139,
      "step": 1797580
    },
    {
      "epoch": 2.9418118261620942,
      "grad_norm": 0.4599020481109619,
      "learning_rate": 4.143500062597603e-06,
      "loss": 0.0135,
      "step": 1797600
    },
    {
      "epoch": 2.9418445566007474,
      "grad_norm": 0.18221090734004974,
      "learning_rate": 4.1434341703840855e-06,
      "loss": 0.0107,
      "step": 1797620
    },
    {
      "epoch": 2.941877287039401,
      "grad_norm": 0.08371584117412567,
      "learning_rate": 4.143368278170569e-06,
      "loss": 0.0115,
      "step": 1797640
    },
    {
      "epoch": 2.941910017478054,
      "grad_norm": 0.28877052664756775,
      "learning_rate": 4.143302385957052e-06,
      "loss": 0.0097,
      "step": 1797660
    },
    {
      "epoch": 2.9419427479167077,
      "grad_norm": 0.079078309237957,
      "learning_rate": 4.1432364937435345e-06,
      "loss": 0.0113,
      "step": 1797680
    },
    {
      "epoch": 2.941975478355361,
      "grad_norm": 0.14109614491462708,
      "learning_rate": 4.143170601530018e-06,
      "loss": 0.0104,
      "step": 1797700
    },
    {
      "epoch": 2.9420082087940145,
      "grad_norm": 0.3617173731327057,
      "learning_rate": 4.143104709316501e-06,
      "loss": 0.0149,
      "step": 1797720
    },
    {
      "epoch": 2.9420409392326676,
      "grad_norm": 0.29044583439826965,
      "learning_rate": 4.143038817102984e-06,
      "loss": 0.0148,
      "step": 1797740
    },
    {
      "epoch": 2.942073669671321,
      "grad_norm": 0.20212988555431366,
      "learning_rate": 4.142972924889466e-06,
      "loss": 0.0164,
      "step": 1797760
    },
    {
      "epoch": 2.9421064001099744,
      "grad_norm": 0.1560700386762619,
      "learning_rate": 4.142907032675949e-06,
      "loss": 0.0084,
      "step": 1797780
    },
    {
      "epoch": 2.9421391305486275,
      "grad_norm": 0.5869592428207397,
      "learning_rate": 4.142841140462432e-06,
      "loss": 0.0166,
      "step": 1797800
    },
    {
      "epoch": 2.942171860987281,
      "grad_norm": 0.33540695905685425,
      "learning_rate": 4.1427752482489146e-06,
      "loss": 0.0167,
      "step": 1797820
    },
    {
      "epoch": 2.9422045914259343,
      "grad_norm": 0.1574101448059082,
      "learning_rate": 4.142709356035397e-06,
      "loss": 0.0102,
      "step": 1797840
    },
    {
      "epoch": 2.942237321864588,
      "grad_norm": 0.35758957266807556,
      "learning_rate": 4.14264346382188e-06,
      "loss": 0.014,
      "step": 1797860
    },
    {
      "epoch": 2.942270052303241,
      "grad_norm": 0.13046519458293915,
      "learning_rate": 4.142577571608364e-06,
      "loss": 0.0157,
      "step": 1797880
    },
    {
      "epoch": 2.942302782741894,
      "grad_norm": 0.4406355619430542,
      "learning_rate": 4.142511679394846e-06,
      "loss": 0.01,
      "step": 1797900
    },
    {
      "epoch": 2.9423355131805478,
      "grad_norm": 0.8859590888023376,
      "learning_rate": 4.142445787181329e-06,
      "loss": 0.01,
      "step": 1797920
    },
    {
      "epoch": 2.942368243619201,
      "grad_norm": 0.20750056207180023,
      "learning_rate": 4.142379894967812e-06,
      "loss": 0.0105,
      "step": 1797940
    },
    {
      "epoch": 2.942400974057854,
      "grad_norm": 0.23578302562236786,
      "learning_rate": 4.142314002754295e-06,
      "loss": 0.0117,
      "step": 1797960
    },
    {
      "epoch": 2.9424337044965077,
      "grad_norm": 0.3825177848339081,
      "learning_rate": 4.142248110540777e-06,
      "loss": 0.0133,
      "step": 1797980
    },
    {
      "epoch": 2.9424664349351612,
      "grad_norm": 0.9208609461784363,
      "learning_rate": 4.142182218327261e-06,
      "loss": 0.0175,
      "step": 1798000
    },
    {
      "epoch": 2.9424991653738144,
      "grad_norm": 0.12601329386234283,
      "learning_rate": 4.142116326113744e-06,
      "loss": 0.0101,
      "step": 1798020
    },
    {
      "epoch": 2.9425318958124675,
      "grad_norm": 0.3782523572444916,
      "learning_rate": 4.142050433900226e-06,
      "loss": 0.0129,
      "step": 1798040
    },
    {
      "epoch": 2.942564626251121,
      "grad_norm": 0.197942852973938,
      "learning_rate": 4.141984541686709e-06,
      "loss": 0.014,
      "step": 1798060
    },
    {
      "epoch": 2.9425973566897743,
      "grad_norm": 0.3708757758140564,
      "learning_rate": 4.141918649473192e-06,
      "loss": 0.0097,
      "step": 1798080
    },
    {
      "epoch": 2.9426300871284274,
      "grad_norm": 0.47354641556739807,
      "learning_rate": 4.1418527572596755e-06,
      "loss": 0.0163,
      "step": 1798100
    },
    {
      "epoch": 2.942662817567081,
      "grad_norm": 0.4000568687915802,
      "learning_rate": 4.141786865046158e-06,
      "loss": 0.0113,
      "step": 1798120
    },
    {
      "epoch": 2.9426955480057346,
      "grad_norm": 0.4718233644962311,
      "learning_rate": 4.141720972832641e-06,
      "loss": 0.0138,
      "step": 1798140
    },
    {
      "epoch": 2.942728278444388,
      "grad_norm": 0.18013417720794678,
      "learning_rate": 4.141655080619124e-06,
      "loss": 0.0152,
      "step": 1798160
    },
    {
      "epoch": 2.942761008883041,
      "grad_norm": 0.13776841759681702,
      "learning_rate": 4.141589188405606e-06,
      "loss": 0.0143,
      "step": 1798180
    },
    {
      "epoch": 2.9427937393216945,
      "grad_norm": 0.29563865065574646,
      "learning_rate": 4.141523296192089e-06,
      "loss": 0.0113,
      "step": 1798200
    },
    {
      "epoch": 2.9428264697603477,
      "grad_norm": 0.2836052179336548,
      "learning_rate": 4.141457403978572e-06,
      "loss": 0.0105,
      "step": 1798220
    },
    {
      "epoch": 2.942859200199001,
      "grad_norm": 0.7950935363769531,
      "learning_rate": 4.141391511765055e-06,
      "loss": 0.0132,
      "step": 1798240
    },
    {
      "epoch": 2.9428919306376544,
      "grad_norm": 0.12434013187885284,
      "learning_rate": 4.141325619551537e-06,
      "loss": 0.0101,
      "step": 1798260
    },
    {
      "epoch": 2.942924661076308,
      "grad_norm": 0.14047542214393616,
      "learning_rate": 4.141259727338021e-06,
      "loss": 0.0163,
      "step": 1798280
    },
    {
      "epoch": 2.942957391514961,
      "grad_norm": 0.338358074426651,
      "learning_rate": 4.141193835124504e-06,
      "loss": 0.0174,
      "step": 1798300
    },
    {
      "epoch": 2.9429901219536143,
      "grad_norm": 0.43358054757118225,
      "learning_rate": 4.1411279429109864e-06,
      "loss": 0.0183,
      "step": 1798320
    },
    {
      "epoch": 2.943022852392268,
      "grad_norm": 0.3698464035987854,
      "learning_rate": 4.14106205069747e-06,
      "loss": 0.0118,
      "step": 1798340
    },
    {
      "epoch": 2.943055582830921,
      "grad_norm": 0.21198002994060516,
      "learning_rate": 4.140996158483953e-06,
      "loss": 0.0167,
      "step": 1798360
    },
    {
      "epoch": 2.943088313269574,
      "grad_norm": 0.8471655249595642,
      "learning_rate": 4.1409302662704355e-06,
      "loss": 0.0134,
      "step": 1798380
    },
    {
      "epoch": 2.943121043708228,
      "grad_norm": 0.5459766983985901,
      "learning_rate": 4.140864374056918e-06,
      "loss": 0.0217,
      "step": 1798400
    },
    {
      "epoch": 2.943153774146881,
      "grad_norm": 0.9624146819114685,
      "learning_rate": 4.140798481843401e-06,
      "loss": 0.0166,
      "step": 1798420
    },
    {
      "epoch": 2.9431865045855345,
      "grad_norm": 0.33252185583114624,
      "learning_rate": 4.140732589629884e-06,
      "loss": 0.0143,
      "step": 1798440
    },
    {
      "epoch": 2.9432192350241877,
      "grad_norm": 0.19736215472221375,
      "learning_rate": 4.1406666974163665e-06,
      "loss": 0.0119,
      "step": 1798460
    },
    {
      "epoch": 2.9432519654628413,
      "grad_norm": 0.4903048574924469,
      "learning_rate": 4.140600805202849e-06,
      "loss": 0.0133,
      "step": 1798480
    },
    {
      "epoch": 2.9432846959014944,
      "grad_norm": 0.41446566581726074,
      "learning_rate": 4.140534912989333e-06,
      "loss": 0.0132,
      "step": 1798500
    },
    {
      "epoch": 2.9433174263401476,
      "grad_norm": 0.14875413477420807,
      "learning_rate": 4.1404690207758155e-06,
      "loss": 0.011,
      "step": 1798520
    },
    {
      "epoch": 2.943350156778801,
      "grad_norm": 0.6350253224372864,
      "learning_rate": 4.140403128562298e-06,
      "loss": 0.0136,
      "step": 1798540
    },
    {
      "epoch": 2.9433828872174543,
      "grad_norm": 0.31050994992256165,
      "learning_rate": 4.140337236348781e-06,
      "loss": 0.0108,
      "step": 1798560
    },
    {
      "epoch": 2.943415617656108,
      "grad_norm": 0.3965955674648285,
      "learning_rate": 4.140271344135264e-06,
      "loss": 0.007,
      "step": 1798580
    },
    {
      "epoch": 2.943448348094761,
      "grad_norm": 0.10008972138166428,
      "learning_rate": 4.1402054519217465e-06,
      "loss": 0.0111,
      "step": 1798600
    },
    {
      "epoch": 2.9434810785334147,
      "grad_norm": 0.03882114961743355,
      "learning_rate": 4.140139559708229e-06,
      "loss": 0.0137,
      "step": 1798620
    },
    {
      "epoch": 2.943513808972068,
      "grad_norm": 0.3088494837284088,
      "learning_rate": 4.140073667494712e-06,
      "loss": 0.013,
      "step": 1798640
    },
    {
      "epoch": 2.943546539410721,
      "grad_norm": 0.21942636370658875,
      "learning_rate": 4.140007775281195e-06,
      "loss": 0.0122,
      "step": 1798660
    },
    {
      "epoch": 2.9435792698493746,
      "grad_norm": 0.22391068935394287,
      "learning_rate": 4.139941883067678e-06,
      "loss": 0.0118,
      "step": 1798680
    },
    {
      "epoch": 2.9436120002880277,
      "grad_norm": 0.12066749483346939,
      "learning_rate": 4.139875990854161e-06,
      "loss": 0.0099,
      "step": 1798700
    },
    {
      "epoch": 2.9436447307266813,
      "grad_norm": 0.24964390695095062,
      "learning_rate": 4.139810098640644e-06,
      "loss": 0.0082,
      "step": 1798720
    },
    {
      "epoch": 2.9436774611653345,
      "grad_norm": 0.16867783665657043,
      "learning_rate": 4.139744206427127e-06,
      "loss": 0.0095,
      "step": 1798740
    },
    {
      "epoch": 2.943710191603988,
      "grad_norm": 0.16333763301372528,
      "learning_rate": 4.13967831421361e-06,
      "loss": 0.0108,
      "step": 1798760
    },
    {
      "epoch": 2.943742922042641,
      "grad_norm": 0.9520021677017212,
      "learning_rate": 4.139612422000093e-06,
      "loss": 0.0173,
      "step": 1798780
    },
    {
      "epoch": 2.9437756524812944,
      "grad_norm": 0.22434523701667786,
      "learning_rate": 4.139546529786576e-06,
      "loss": 0.0118,
      "step": 1798800
    },
    {
      "epoch": 2.943808382919948,
      "grad_norm": 0.7492084503173828,
      "learning_rate": 4.139480637573058e-06,
      "loss": 0.0117,
      "step": 1798820
    },
    {
      "epoch": 2.943841113358601,
      "grad_norm": 0.39184367656707764,
      "learning_rate": 4.139414745359541e-06,
      "loss": 0.0131,
      "step": 1798840
    },
    {
      "epoch": 2.9438738437972547,
      "grad_norm": 0.7299474477767944,
      "learning_rate": 4.139348853146024e-06,
      "loss": 0.0203,
      "step": 1798860
    },
    {
      "epoch": 2.943906574235908,
      "grad_norm": 0.21731682121753693,
      "learning_rate": 4.1392829609325065e-06,
      "loss": 0.0173,
      "step": 1798880
    },
    {
      "epoch": 2.9439393046745614,
      "grad_norm": 0.060562863945961,
      "learning_rate": 4.13921706871899e-06,
      "loss": 0.009,
      "step": 1798900
    },
    {
      "epoch": 2.9439720351132146,
      "grad_norm": 0.10094618797302246,
      "learning_rate": 4.139151176505473e-06,
      "loss": 0.0109,
      "step": 1798920
    },
    {
      "epoch": 2.9440047655518677,
      "grad_norm": 0.13868597149848938,
      "learning_rate": 4.139085284291956e-06,
      "loss": 0.0167,
      "step": 1798940
    },
    {
      "epoch": 2.9440374959905213,
      "grad_norm": 0.21085569262504578,
      "learning_rate": 4.139019392078438e-06,
      "loss": 0.0135,
      "step": 1798960
    },
    {
      "epoch": 2.9440702264291745,
      "grad_norm": 0.3928163945674896,
      "learning_rate": 4.138953499864921e-06,
      "loss": 0.0179,
      "step": 1798980
    },
    {
      "epoch": 2.944102956867828,
      "grad_norm": 0.08214690536260605,
      "learning_rate": 4.138887607651404e-06,
      "loss": 0.0173,
      "step": 1799000
    },
    {
      "epoch": 2.9441356873064812,
      "grad_norm": 2.58227276802063,
      "learning_rate": 4.1388217154378866e-06,
      "loss": 0.0096,
      "step": 1799020
    },
    {
      "epoch": 2.944168417745135,
      "grad_norm": 0.3994671702384949,
      "learning_rate": 4.138755823224369e-06,
      "loss": 0.0176,
      "step": 1799040
    },
    {
      "epoch": 2.944201148183788,
      "grad_norm": 0.40194612741470337,
      "learning_rate": 4.138689931010853e-06,
      "loss": 0.0135,
      "step": 1799060
    },
    {
      "epoch": 2.944233878622441,
      "grad_norm": 0.3395316004753113,
      "learning_rate": 4.138624038797336e-06,
      "loss": 0.0126,
      "step": 1799080
    },
    {
      "epoch": 2.9442666090610947,
      "grad_norm": 0.5132019519805908,
      "learning_rate": 4.138558146583818e-06,
      "loss": 0.0089,
      "step": 1799100
    },
    {
      "epoch": 2.944299339499748,
      "grad_norm": 0.6947460770606995,
      "learning_rate": 4.138492254370302e-06,
      "loss": 0.0159,
      "step": 1799120
    },
    {
      "epoch": 2.9443320699384015,
      "grad_norm": 0.29697296023368835,
      "learning_rate": 4.138426362156785e-06,
      "loss": 0.0112,
      "step": 1799140
    },
    {
      "epoch": 2.9443648003770546,
      "grad_norm": 0.5689277648925781,
      "learning_rate": 4.1383604699432674e-06,
      "loss": 0.0123,
      "step": 1799160
    },
    {
      "epoch": 2.944397530815708,
      "grad_norm": 0.3490296006202698,
      "learning_rate": 4.13829457772975e-06,
      "loss": 0.0097,
      "step": 1799180
    },
    {
      "epoch": 2.9444302612543614,
      "grad_norm": 1.1098254919052124,
      "learning_rate": 4.138228685516233e-06,
      "loss": 0.0113,
      "step": 1799200
    },
    {
      "epoch": 2.9444629916930145,
      "grad_norm": 0.2465399205684662,
      "learning_rate": 4.138162793302716e-06,
      "loss": 0.0146,
      "step": 1799220
    },
    {
      "epoch": 2.944495722131668,
      "grad_norm": 0.28429675102233887,
      "learning_rate": 4.138096901089198e-06,
      "loss": 0.01,
      "step": 1799240
    },
    {
      "epoch": 2.9445284525703213,
      "grad_norm": 0.5058789849281311,
      "learning_rate": 4.138031008875681e-06,
      "loss": 0.0119,
      "step": 1799260
    },
    {
      "epoch": 2.944561183008975,
      "grad_norm": 0.2651875615119934,
      "learning_rate": 4.137965116662164e-06,
      "loss": 0.01,
      "step": 1799280
    },
    {
      "epoch": 2.944593913447628,
      "grad_norm": 0.5915402770042419,
      "learning_rate": 4.1378992244486475e-06,
      "loss": 0.0082,
      "step": 1799300
    },
    {
      "epoch": 2.9446266438862816,
      "grad_norm": 0.4080050587654114,
      "learning_rate": 4.13783333223513e-06,
      "loss": 0.0129,
      "step": 1799320
    },
    {
      "epoch": 2.9446593743249347,
      "grad_norm": 0.3530479669570923,
      "learning_rate": 4.137767440021613e-06,
      "loss": 0.0069,
      "step": 1799340
    },
    {
      "epoch": 2.944692104763588,
      "grad_norm": 0.23400694131851196,
      "learning_rate": 4.137701547808096e-06,
      "loss": 0.0122,
      "step": 1799360
    },
    {
      "epoch": 2.9447248352022415,
      "grad_norm": 1.016860008239746,
      "learning_rate": 4.1376356555945784e-06,
      "loss": 0.0123,
      "step": 1799380
    },
    {
      "epoch": 2.9447575656408946,
      "grad_norm": 0.30325552821159363,
      "learning_rate": 4.137569763381062e-06,
      "loss": 0.0154,
      "step": 1799400
    },
    {
      "epoch": 2.944790296079548,
      "grad_norm": 0.09610393643379211,
      "learning_rate": 4.137503871167545e-06,
      "loss": 0.01,
      "step": 1799420
    },
    {
      "epoch": 2.9448230265182014,
      "grad_norm": 0.5660906434059143,
      "learning_rate": 4.1374379789540275e-06,
      "loss": 0.0151,
      "step": 1799440
    },
    {
      "epoch": 2.944855756956855,
      "grad_norm": 0.331067830324173,
      "learning_rate": 4.13737208674051e-06,
      "loss": 0.0078,
      "step": 1799460
    },
    {
      "epoch": 2.944888487395508,
      "grad_norm": 0.28110355138778687,
      "learning_rate": 4.137306194526993e-06,
      "loss": 0.0171,
      "step": 1799480
    },
    {
      "epoch": 2.9449212178341613,
      "grad_norm": 0.24279747903347015,
      "learning_rate": 4.137240302313476e-06,
      "loss": 0.0122,
      "step": 1799500
    },
    {
      "epoch": 2.944953948272815,
      "grad_norm": 0.7259983420372009,
      "learning_rate": 4.137174410099959e-06,
      "loss": 0.0111,
      "step": 1799520
    },
    {
      "epoch": 2.944986678711468,
      "grad_norm": 0.1850617229938507,
      "learning_rate": 4.137108517886442e-06,
      "loss": 0.0135,
      "step": 1799540
    },
    {
      "epoch": 2.945019409150121,
      "grad_norm": 0.2104552537202835,
      "learning_rate": 4.137042625672925e-06,
      "loss": 0.0125,
      "step": 1799560
    },
    {
      "epoch": 2.9450521395887748,
      "grad_norm": 0.9142403602600098,
      "learning_rate": 4.1369767334594075e-06,
      "loss": 0.011,
      "step": 1799580
    },
    {
      "epoch": 2.9450848700274284,
      "grad_norm": 0.11818447709083557,
      "learning_rate": 4.13691084124589e-06,
      "loss": 0.0133,
      "step": 1799600
    },
    {
      "epoch": 2.9451176004660815,
      "grad_norm": 0.6596555709838867,
      "learning_rate": 4.136844949032373e-06,
      "loss": 0.015,
      "step": 1799620
    },
    {
      "epoch": 2.9451503309047347,
      "grad_norm": 1.0529557466506958,
      "learning_rate": 4.136779056818856e-06,
      "loss": 0.0108,
      "step": 1799640
    },
    {
      "epoch": 2.9451830613433883,
      "grad_norm": 0.9714435338973999,
      "learning_rate": 4.1367131646053385e-06,
      "loss": 0.014,
      "step": 1799660
    },
    {
      "epoch": 2.9452157917820414,
      "grad_norm": 0.6130732893943787,
      "learning_rate": 4.136647272391821e-06,
      "loss": 0.0093,
      "step": 1799680
    },
    {
      "epoch": 2.9452485222206946,
      "grad_norm": 0.4932153820991516,
      "learning_rate": 4.136581380178305e-06,
      "loss": 0.0133,
      "step": 1799700
    },
    {
      "epoch": 2.945281252659348,
      "grad_norm": 0.249437153339386,
      "learning_rate": 4.1365154879647875e-06,
      "loss": 0.0232,
      "step": 1799720
    },
    {
      "epoch": 2.9453139830980017,
      "grad_norm": 0.06209064647555351,
      "learning_rate": 4.13644959575127e-06,
      "loss": 0.0152,
      "step": 1799740
    },
    {
      "epoch": 2.945346713536655,
      "grad_norm": 0.13069939613342285,
      "learning_rate": 4.136383703537754e-06,
      "loss": 0.0108,
      "step": 1799760
    },
    {
      "epoch": 2.945379443975308,
      "grad_norm": 0.32763999700546265,
      "learning_rate": 4.136317811324237e-06,
      "loss": 0.0176,
      "step": 1799780
    },
    {
      "epoch": 2.9454121744139616,
      "grad_norm": 0.0629236251115799,
      "learning_rate": 4.136251919110719e-06,
      "loss": 0.0118,
      "step": 1799800
    },
    {
      "epoch": 2.945444904852615,
      "grad_norm": 0.18420375883579254,
      "learning_rate": 4.136186026897202e-06,
      "loss": 0.0128,
      "step": 1799820
    },
    {
      "epoch": 2.945477635291268,
      "grad_norm": 0.27068910002708435,
      "learning_rate": 4.136120134683685e-06,
      "loss": 0.0099,
      "step": 1799840
    },
    {
      "epoch": 2.9455103657299215,
      "grad_norm": 0.2530468702316284,
      "learning_rate": 4.1360542424701676e-06,
      "loss": 0.0129,
      "step": 1799860
    },
    {
      "epoch": 2.945543096168575,
      "grad_norm": 0.09011843055486679,
      "learning_rate": 4.13598835025665e-06,
      "loss": 0.0071,
      "step": 1799880
    },
    {
      "epoch": 2.9455758266072283,
      "grad_norm": 0.19352512061595917,
      "learning_rate": 4.135922458043133e-06,
      "loss": 0.0132,
      "step": 1799900
    },
    {
      "epoch": 2.9456085570458814,
      "grad_norm": 0.14903409779071808,
      "learning_rate": 4.135856565829617e-06,
      "loss": 0.0126,
      "step": 1799920
    },
    {
      "epoch": 2.945641287484535,
      "grad_norm": 0.21652714908123016,
      "learning_rate": 4.135790673616099e-06,
      "loss": 0.0087,
      "step": 1799940
    },
    {
      "epoch": 2.945674017923188,
      "grad_norm": 0.25614121556282043,
      "learning_rate": 4.135724781402582e-06,
      "loss": 0.0161,
      "step": 1799960
    },
    {
      "epoch": 2.9457067483618413,
      "grad_norm": 0.40684521198272705,
      "learning_rate": 4.135658889189065e-06,
      "loss": 0.0152,
      "step": 1799980
    },
    {
      "epoch": 2.945739478800495,
      "grad_norm": 0.3220960795879364,
      "learning_rate": 4.135592996975548e-06,
      "loss": 0.0109,
      "step": 1800000
    },
    {
      "epoch": 2.945739478800495,
      "eval_loss": 0.00739293685182929,
      "eval_runtime": 6506.8554,
      "eval_samples_per_second": 157.965,
      "eval_steps_per_second": 15.797,
      "eval_sts-dev_pearson_cosine": 0.9830641544319169,
      "eval_sts-dev_spearman_cosine": 0.8944039089352983,
      "step": 1800000
    },
    {
      "epoch": 2.945772209239148,
      "grad_norm": 0.3524416387081146,
      "learning_rate": 4.13552710476203e-06,
      "loss": 0.0173,
      "step": 1800020
    },
    {
      "epoch": 2.9458049396778017,
      "grad_norm": 0.4297627806663513,
      "learning_rate": 4.135461212548513e-06,
      "loss": 0.0125,
      "step": 1800040
    },
    {
      "epoch": 2.945837670116455,
      "grad_norm": 0.3309861719608307,
      "learning_rate": 4.135395320334996e-06,
      "loss": 0.0161,
      "step": 1800060
    },
    {
      "epoch": 2.9458704005551084,
      "grad_norm": 0.27719393372535706,
      "learning_rate": 4.1353294281214786e-06,
      "loss": 0.0136,
      "step": 1800080
    },
    {
      "epoch": 2.9459031309937616,
      "grad_norm": 0.4859699606895447,
      "learning_rate": 4.135263535907962e-06,
      "loss": 0.0123,
      "step": 1800100
    },
    {
      "epoch": 2.9459358614324147,
      "grad_norm": 0.1363830864429474,
      "learning_rate": 4.135197643694445e-06,
      "loss": 0.0179,
      "step": 1800120
    },
    {
      "epoch": 2.9459685918710683,
      "grad_norm": 0.5495046973228455,
      "learning_rate": 4.1351317514809285e-06,
      "loss": 0.013,
      "step": 1800140
    },
    {
      "epoch": 2.9460013223097214,
      "grad_norm": 0.5337380170822144,
      "learning_rate": 4.135065859267411e-06,
      "loss": 0.0081,
      "step": 1800160
    },
    {
      "epoch": 2.946034052748375,
      "grad_norm": 0.5972341895103455,
      "learning_rate": 4.134999967053894e-06,
      "loss": 0.0126,
      "step": 1800180
    },
    {
      "epoch": 2.946066783187028,
      "grad_norm": 0.34701716899871826,
      "learning_rate": 4.134934074840377e-06,
      "loss": 0.0112,
      "step": 1800200
    },
    {
      "epoch": 2.946099513625682,
      "grad_norm": 0.15150444209575653,
      "learning_rate": 4.1348681826268594e-06,
      "loss": 0.0102,
      "step": 1800220
    },
    {
      "epoch": 2.946132244064335,
      "grad_norm": 0.26381614804267883,
      "learning_rate": 4.134802290413342e-06,
      "loss": 0.0125,
      "step": 1800240
    },
    {
      "epoch": 2.946164974502988,
      "grad_norm": 0.4502934217453003,
      "learning_rate": 4.134736398199825e-06,
      "loss": 0.0122,
      "step": 1800260
    },
    {
      "epoch": 2.9461977049416417,
      "grad_norm": 0.30414536595344543,
      "learning_rate": 4.134670505986308e-06,
      "loss": 0.0111,
      "step": 1800280
    },
    {
      "epoch": 2.946230435380295,
      "grad_norm": 0.10644721239805222,
      "learning_rate": 4.13460461377279e-06,
      "loss": 0.0089,
      "step": 1800300
    },
    {
      "epoch": 2.9462631658189484,
      "grad_norm": 0.4679360091686249,
      "learning_rate": 4.134538721559274e-06,
      "loss": 0.0149,
      "step": 1800320
    },
    {
      "epoch": 2.9462958962576016,
      "grad_norm": 0.3415485620498657,
      "learning_rate": 4.134472829345757e-06,
      "loss": 0.0139,
      "step": 1800340
    },
    {
      "epoch": 2.946328626696255,
      "grad_norm": 0.14364482462406158,
      "learning_rate": 4.1344069371322395e-06,
      "loss": 0.0107,
      "step": 1800360
    },
    {
      "epoch": 2.9463613571349083,
      "grad_norm": 0.7248654365539551,
      "learning_rate": 4.134341044918722e-06,
      "loss": 0.0176,
      "step": 1800380
    },
    {
      "epoch": 2.9463940875735615,
      "grad_norm": 0.6513463258743286,
      "learning_rate": 4.134275152705205e-06,
      "loss": 0.0158,
      "step": 1800400
    },
    {
      "epoch": 2.946426818012215,
      "grad_norm": 0.5945164561271667,
      "learning_rate": 4.134209260491688e-06,
      "loss": 0.014,
      "step": 1800420
    },
    {
      "epoch": 2.946459548450868,
      "grad_norm": 0.1696024388074875,
      "learning_rate": 4.13414336827817e-06,
      "loss": 0.0143,
      "step": 1800440
    },
    {
      "epoch": 2.946492278889522,
      "grad_norm": 0.63919997215271,
      "learning_rate": 4.134077476064654e-06,
      "loss": 0.0107,
      "step": 1800460
    },
    {
      "epoch": 2.946525009328175,
      "grad_norm": 0.11969491094350815,
      "learning_rate": 4.134011583851137e-06,
      "loss": 0.0098,
      "step": 1800480
    },
    {
      "epoch": 2.9465577397668286,
      "grad_norm": 0.3485872149467468,
      "learning_rate": 4.1339456916376195e-06,
      "loss": 0.0198,
      "step": 1800500
    },
    {
      "epoch": 2.9465904702054817,
      "grad_norm": 0.9651497006416321,
      "learning_rate": 4.133879799424102e-06,
      "loss": 0.0147,
      "step": 1800520
    },
    {
      "epoch": 2.946623200644135,
      "grad_norm": 0.23156864941120148,
      "learning_rate": 4.133813907210586e-06,
      "loss": 0.0143,
      "step": 1800540
    },
    {
      "epoch": 2.9466559310827884,
      "grad_norm": 0.15577176213264465,
      "learning_rate": 4.1337480149970685e-06,
      "loss": 0.0123,
      "step": 1800560
    },
    {
      "epoch": 2.9466886615214416,
      "grad_norm": 0.6146914958953857,
      "learning_rate": 4.133682122783551e-06,
      "loss": 0.0159,
      "step": 1800580
    },
    {
      "epoch": 2.946721391960095,
      "grad_norm": 0.2278139293193817,
      "learning_rate": 4.133616230570034e-06,
      "loss": 0.0115,
      "step": 1800600
    },
    {
      "epoch": 2.9467541223987483,
      "grad_norm": 0.11484067887067795,
      "learning_rate": 4.133550338356517e-06,
      "loss": 0.0142,
      "step": 1800620
    },
    {
      "epoch": 2.946786852837402,
      "grad_norm": 0.07101182639598846,
      "learning_rate": 4.1334844461429995e-06,
      "loss": 0.0093,
      "step": 1800640
    },
    {
      "epoch": 2.946819583276055,
      "grad_norm": 0.361361026763916,
      "learning_rate": 4.133418553929482e-06,
      "loss": 0.0195,
      "step": 1800660
    },
    {
      "epoch": 2.9468523137147082,
      "grad_norm": 0.25300976634025574,
      "learning_rate": 4.133352661715965e-06,
      "loss": 0.016,
      "step": 1800680
    },
    {
      "epoch": 2.946885044153362,
      "grad_norm": 0.32415884733200073,
      "learning_rate": 4.133286769502448e-06,
      "loss": 0.0176,
      "step": 1800700
    },
    {
      "epoch": 2.946917774592015,
      "grad_norm": 0.29380807280540466,
      "learning_rate": 4.133220877288931e-06,
      "loss": 0.0102,
      "step": 1800720
    },
    {
      "epoch": 2.9469505050306686,
      "grad_norm": 0.5761180520057678,
      "learning_rate": 4.133154985075414e-06,
      "loss": 0.0224,
      "step": 1800740
    },
    {
      "epoch": 2.9469832354693217,
      "grad_norm": 0.26883384585380554,
      "learning_rate": 4.133089092861897e-06,
      "loss": 0.009,
      "step": 1800760
    },
    {
      "epoch": 2.9470159659079753,
      "grad_norm": 0.37058696150779724,
      "learning_rate": 4.1330232006483795e-06,
      "loss": 0.013,
      "step": 1800780
    },
    {
      "epoch": 2.9470486963466285,
      "grad_norm": 0.6424694061279297,
      "learning_rate": 4.132957308434862e-06,
      "loss": 0.0119,
      "step": 1800800
    },
    {
      "epoch": 2.9470814267852816,
      "grad_norm": 0.3608928918838501,
      "learning_rate": 4.132891416221346e-06,
      "loss": 0.0083,
      "step": 1800820
    },
    {
      "epoch": 2.947114157223935,
      "grad_norm": 0.49027618765830994,
      "learning_rate": 4.132825524007829e-06,
      "loss": 0.0122,
      "step": 1800840
    },
    {
      "epoch": 2.9471468876625884,
      "grad_norm": 0.4951537847518921,
      "learning_rate": 4.132759631794311e-06,
      "loss": 0.0149,
      "step": 1800860
    },
    {
      "epoch": 2.9471796181012415,
      "grad_norm": 0.09727706015110016,
      "learning_rate": 4.132693739580794e-06,
      "loss": 0.0169,
      "step": 1800880
    },
    {
      "epoch": 2.947212348539895,
      "grad_norm": 0.03731709346175194,
      "learning_rate": 4.132627847367277e-06,
      "loss": 0.0146,
      "step": 1800900
    },
    {
      "epoch": 2.9472450789785487,
      "grad_norm": 0.6614097952842712,
      "learning_rate": 4.1325619551537596e-06,
      "loss": 0.0198,
      "step": 1800920
    },
    {
      "epoch": 2.947277809417202,
      "grad_norm": 0.8222209811210632,
      "learning_rate": 4.132496062940243e-06,
      "loss": 0.0085,
      "step": 1800940
    },
    {
      "epoch": 2.947310539855855,
      "grad_norm": 0.5813026428222656,
      "learning_rate": 4.132430170726726e-06,
      "loss": 0.0136,
      "step": 1800960
    },
    {
      "epoch": 2.9473432702945086,
      "grad_norm": 0.39889585971832275,
      "learning_rate": 4.132364278513209e-06,
      "loss": 0.0106,
      "step": 1800980
    },
    {
      "epoch": 2.9473760007331617,
      "grad_norm": 0.14464305341243744,
      "learning_rate": 4.132298386299691e-06,
      "loss": 0.0197,
      "step": 1801000
    },
    {
      "epoch": 2.947408731171815,
      "grad_norm": 0.44153907895088196,
      "learning_rate": 4.132232494086174e-06,
      "loss": 0.0122,
      "step": 1801020
    },
    {
      "epoch": 2.9474414616104685,
      "grad_norm": 0.42382365465164185,
      "learning_rate": 4.132166601872657e-06,
      "loss": 0.0147,
      "step": 1801040
    },
    {
      "epoch": 2.947474192049122,
      "grad_norm": 0.21380119025707245,
      "learning_rate": 4.13210070965914e-06,
      "loss": 0.0102,
      "step": 1801060
    },
    {
      "epoch": 2.9475069224877752,
      "grad_norm": 0.2228870391845703,
      "learning_rate": 4.132034817445622e-06,
      "loss": 0.0085,
      "step": 1801080
    },
    {
      "epoch": 2.9475396529264284,
      "grad_norm": 0.25488194823265076,
      "learning_rate": 4.131968925232105e-06,
      "loss": 0.0104,
      "step": 1801100
    },
    {
      "epoch": 2.947572383365082,
      "grad_norm": 0.31901615858078003,
      "learning_rate": 4.131903033018589e-06,
      "loss": 0.0135,
      "step": 1801120
    },
    {
      "epoch": 2.947605113803735,
      "grad_norm": 0.0972510352730751,
      "learning_rate": 4.131837140805071e-06,
      "loss": 0.0144,
      "step": 1801140
    },
    {
      "epoch": 2.9476378442423883,
      "grad_norm": 0.2798575758934021,
      "learning_rate": 4.131771248591554e-06,
      "loss": 0.0165,
      "step": 1801160
    },
    {
      "epoch": 2.947670574681042,
      "grad_norm": 0.48905205726623535,
      "learning_rate": 4.131705356378038e-06,
      "loss": 0.0141,
      "step": 1801180
    },
    {
      "epoch": 2.9477033051196955,
      "grad_norm": 0.13964883983135223,
      "learning_rate": 4.1316394641645205e-06,
      "loss": 0.0144,
      "step": 1801200
    },
    {
      "epoch": 2.9477360355583486,
      "grad_norm": 0.1451319456100464,
      "learning_rate": 4.131573571951003e-06,
      "loss": 0.0188,
      "step": 1801220
    },
    {
      "epoch": 2.9477687659970018,
      "grad_norm": 0.1913043111562729,
      "learning_rate": 4.131507679737486e-06,
      "loss": 0.012,
      "step": 1801240
    },
    {
      "epoch": 2.9478014964356554,
      "grad_norm": 0.4438169300556183,
      "learning_rate": 4.131441787523969e-06,
      "loss": 0.0177,
      "step": 1801260
    },
    {
      "epoch": 2.9478342268743085,
      "grad_norm": 0.14167703688144684,
      "learning_rate": 4.131375895310451e-06,
      "loss": 0.021,
      "step": 1801280
    },
    {
      "epoch": 2.9478669573129617,
      "grad_norm": 0.3492535650730133,
      "learning_rate": 4.131310003096934e-06,
      "loss": 0.0135,
      "step": 1801300
    },
    {
      "epoch": 2.9478996877516153,
      "grad_norm": 0.6973274946212769,
      "learning_rate": 4.131244110883417e-06,
      "loss": 0.0204,
      "step": 1801320
    },
    {
      "epoch": 2.947932418190269,
      "grad_norm": 0.11225671321153641,
      "learning_rate": 4.1311782186699005e-06,
      "loss": 0.0132,
      "step": 1801340
    },
    {
      "epoch": 2.947965148628922,
      "grad_norm": 0.08575652539730072,
      "learning_rate": 4.131112326456383e-06,
      "loss": 0.0083,
      "step": 1801360
    },
    {
      "epoch": 2.947997879067575,
      "grad_norm": 0.20035520195960999,
      "learning_rate": 4.131046434242866e-06,
      "loss": 0.0188,
      "step": 1801380
    },
    {
      "epoch": 2.9480306095062287,
      "grad_norm": 0.06351480633020401,
      "learning_rate": 4.130980542029349e-06,
      "loss": 0.0116,
      "step": 1801400
    },
    {
      "epoch": 2.948063339944882,
      "grad_norm": 0.23128551244735718,
      "learning_rate": 4.1309146498158314e-06,
      "loss": 0.0166,
      "step": 1801420
    },
    {
      "epoch": 2.948096070383535,
      "grad_norm": 0.11544216424226761,
      "learning_rate": 4.130848757602314e-06,
      "loss": 0.0126,
      "step": 1801440
    },
    {
      "epoch": 2.9481288008221886,
      "grad_norm": 0.6698800325393677,
      "learning_rate": 4.130782865388797e-06,
      "loss": 0.0196,
      "step": 1801460
    },
    {
      "epoch": 2.948161531260842,
      "grad_norm": 0.3153635561466217,
      "learning_rate": 4.13071697317528e-06,
      "loss": 0.0144,
      "step": 1801480
    },
    {
      "epoch": 2.9481942616994954,
      "grad_norm": 0.4968706965446472,
      "learning_rate": 4.130651080961762e-06,
      "loss": 0.0126,
      "step": 1801500
    },
    {
      "epoch": 2.9482269921381485,
      "grad_norm": 0.42420703172683716,
      "learning_rate": 4.130585188748246e-06,
      "loss": 0.0121,
      "step": 1801520
    },
    {
      "epoch": 2.948259722576802,
      "grad_norm": 0.5973204970359802,
      "learning_rate": 4.130519296534729e-06,
      "loss": 0.0099,
      "step": 1801540
    },
    {
      "epoch": 2.9482924530154553,
      "grad_norm": 0.2503199279308319,
      "learning_rate": 4.130453404321212e-06,
      "loss": 0.0132,
      "step": 1801560
    },
    {
      "epoch": 2.9483251834541084,
      "grad_norm": 0.18800000846385956,
      "learning_rate": 4.130387512107695e-06,
      "loss": 0.0138,
      "step": 1801580
    },
    {
      "epoch": 2.948357913892762,
      "grad_norm": 0.19252680242061615,
      "learning_rate": 4.130321619894178e-06,
      "loss": 0.0131,
      "step": 1801600
    },
    {
      "epoch": 2.948390644331415,
      "grad_norm": 0.3779357373714447,
      "learning_rate": 4.1302557276806605e-06,
      "loss": 0.0137,
      "step": 1801620
    },
    {
      "epoch": 2.9484233747700688,
      "grad_norm": 0.06797898560762405,
      "learning_rate": 4.130189835467143e-06,
      "loss": 0.0108,
      "step": 1801640
    },
    {
      "epoch": 2.948456105208722,
      "grad_norm": 0.4805946946144104,
      "learning_rate": 4.130123943253626e-06,
      "loss": 0.0108,
      "step": 1801660
    },
    {
      "epoch": 2.9484888356473755,
      "grad_norm": 0.33006587624549866,
      "learning_rate": 4.130058051040109e-06,
      "loss": 0.0173,
      "step": 1801680
    },
    {
      "epoch": 2.9485215660860287,
      "grad_norm": 0.31877708435058594,
      "learning_rate": 4.1299921588265915e-06,
      "loss": 0.0083,
      "step": 1801700
    },
    {
      "epoch": 2.948554296524682,
      "grad_norm": 0.20560459792613983,
      "learning_rate": 4.129926266613074e-06,
      "loss": 0.0123,
      "step": 1801720
    },
    {
      "epoch": 2.9485870269633354,
      "grad_norm": 0.14108647406101227,
      "learning_rate": 4.129860374399558e-06,
      "loss": 0.0124,
      "step": 1801740
    },
    {
      "epoch": 2.9486197574019886,
      "grad_norm": 0.8188865780830383,
      "learning_rate": 4.1297944821860406e-06,
      "loss": 0.012,
      "step": 1801760
    },
    {
      "epoch": 2.948652487840642,
      "grad_norm": 0.5740010738372803,
      "learning_rate": 4.129728589972523e-06,
      "loss": 0.0157,
      "step": 1801780
    },
    {
      "epoch": 2.9486852182792953,
      "grad_norm": 0.19150884449481964,
      "learning_rate": 4.129662697759006e-06,
      "loss": 0.0115,
      "step": 1801800
    },
    {
      "epoch": 2.948717948717949,
      "grad_norm": 0.8609386682510376,
      "learning_rate": 4.129596805545489e-06,
      "loss": 0.0181,
      "step": 1801820
    },
    {
      "epoch": 2.948750679156602,
      "grad_norm": 0.3496800363063812,
      "learning_rate": 4.1295309133319715e-06,
      "loss": 0.0125,
      "step": 1801840
    },
    {
      "epoch": 2.948783409595255,
      "grad_norm": 0.6014847755432129,
      "learning_rate": 4.129465021118455e-06,
      "loss": 0.0132,
      "step": 1801860
    },
    {
      "epoch": 2.948816140033909,
      "grad_norm": 0.5468330383300781,
      "learning_rate": 4.129399128904938e-06,
      "loss": 0.0128,
      "step": 1801880
    },
    {
      "epoch": 2.948848870472562,
      "grad_norm": 0.2369827926158905,
      "learning_rate": 4.129333236691421e-06,
      "loss": 0.0096,
      "step": 1801900
    },
    {
      "epoch": 2.9488816009112155,
      "grad_norm": 0.673344075679779,
      "learning_rate": 4.129267344477903e-06,
      "loss": 0.0129,
      "step": 1801920
    },
    {
      "epoch": 2.9489143313498687,
      "grad_norm": 0.3352636992931366,
      "learning_rate": 4.129201452264386e-06,
      "loss": 0.0085,
      "step": 1801940
    },
    {
      "epoch": 2.9489470617885223,
      "grad_norm": 0.8831291198730469,
      "learning_rate": 4.12913556005087e-06,
      "loss": 0.0093,
      "step": 1801960
    },
    {
      "epoch": 2.9489797922271754,
      "grad_norm": 0.36887145042419434,
      "learning_rate": 4.129069667837352e-06,
      "loss": 0.0152,
      "step": 1801980
    },
    {
      "epoch": 2.9490125226658286,
      "grad_norm": 0.5956034064292908,
      "learning_rate": 4.129003775623835e-06,
      "loss": 0.0129,
      "step": 1802000
    },
    {
      "epoch": 2.949045253104482,
      "grad_norm": 0.2524702548980713,
      "learning_rate": 4.128937883410318e-06,
      "loss": 0.0154,
      "step": 1802020
    },
    {
      "epoch": 2.9490779835431353,
      "grad_norm": 0.1553838700056076,
      "learning_rate": 4.128871991196801e-06,
      "loss": 0.0103,
      "step": 1802040
    },
    {
      "epoch": 2.949110713981789,
      "grad_norm": 0.3051685690879822,
      "learning_rate": 4.128806098983283e-06,
      "loss": 0.0144,
      "step": 1802060
    },
    {
      "epoch": 2.949143444420442,
      "grad_norm": 0.12293387949466705,
      "learning_rate": 4.128740206769766e-06,
      "loss": 0.0182,
      "step": 1802080
    },
    {
      "epoch": 2.9491761748590957,
      "grad_norm": 0.07984801381826401,
      "learning_rate": 4.128674314556249e-06,
      "loss": 0.0091,
      "step": 1802100
    },
    {
      "epoch": 2.949208905297749,
      "grad_norm": 0.3965425193309784,
      "learning_rate": 4.1286084223427316e-06,
      "loss": 0.0154,
      "step": 1802120
    },
    {
      "epoch": 2.949241635736402,
      "grad_norm": 0.25605064630508423,
      "learning_rate": 4.128542530129215e-06,
      "loss": 0.007,
      "step": 1802140
    },
    {
      "epoch": 2.9492743661750556,
      "grad_norm": 0.2981839179992676,
      "learning_rate": 4.128476637915698e-06,
      "loss": 0.0104,
      "step": 1802160
    },
    {
      "epoch": 2.9493070966137087,
      "grad_norm": 0.1773620843887329,
      "learning_rate": 4.128410745702181e-06,
      "loss": 0.0079,
      "step": 1802180
    },
    {
      "epoch": 2.9493398270523623,
      "grad_norm": 1.6631810665130615,
      "learning_rate": 4.128344853488663e-06,
      "loss": 0.0163,
      "step": 1802200
    },
    {
      "epoch": 2.9493725574910155,
      "grad_norm": 0.8980333805084229,
      "learning_rate": 4.128278961275147e-06,
      "loss": 0.0149,
      "step": 1802220
    },
    {
      "epoch": 2.949405287929669,
      "grad_norm": 0.4433954656124115,
      "learning_rate": 4.12821306906163e-06,
      "loss": 0.0127,
      "step": 1802240
    },
    {
      "epoch": 2.949438018368322,
      "grad_norm": 0.31937235593795776,
      "learning_rate": 4.1281471768481124e-06,
      "loss": 0.0128,
      "step": 1802260
    },
    {
      "epoch": 2.9494707488069754,
      "grad_norm": 0.6760601997375488,
      "learning_rate": 4.128081284634595e-06,
      "loss": 0.0107,
      "step": 1802280
    },
    {
      "epoch": 2.949503479245629,
      "grad_norm": 0.24643440544605255,
      "learning_rate": 4.128015392421078e-06,
      "loss": 0.0081,
      "step": 1802300
    },
    {
      "epoch": 2.949536209684282,
      "grad_norm": 0.33678916096687317,
      "learning_rate": 4.127949500207561e-06,
      "loss": 0.0092,
      "step": 1802320
    },
    {
      "epoch": 2.9495689401229357,
      "grad_norm": 0.271967351436615,
      "learning_rate": 4.127883607994043e-06,
      "loss": 0.011,
      "step": 1802340
    },
    {
      "epoch": 2.949601670561589,
      "grad_norm": 0.14164111018180847,
      "learning_rate": 4.127817715780527e-06,
      "loss": 0.0109,
      "step": 1802360
    },
    {
      "epoch": 2.9496344010002424,
      "grad_norm": 0.10038720071315765,
      "learning_rate": 4.12775182356701e-06,
      "loss": 0.0096,
      "step": 1802380
    },
    {
      "epoch": 2.9496671314388956,
      "grad_norm": 0.3358253538608551,
      "learning_rate": 4.1276859313534925e-06,
      "loss": 0.0092,
      "step": 1802400
    },
    {
      "epoch": 2.9496998618775487,
      "grad_norm": 0.6235957145690918,
      "learning_rate": 4.127620039139975e-06,
      "loss": 0.0138,
      "step": 1802420
    },
    {
      "epoch": 2.9497325923162023,
      "grad_norm": 0.0989324077963829,
      "learning_rate": 4.127554146926458e-06,
      "loss": 0.0124,
      "step": 1802440
    },
    {
      "epoch": 2.9497653227548555,
      "grad_norm": 0.26680928468704224,
      "learning_rate": 4.127488254712941e-06,
      "loss": 0.0102,
      "step": 1802460
    },
    {
      "epoch": 2.9497980531935086,
      "grad_norm": 0.21828962862491608,
      "learning_rate": 4.127422362499423e-06,
      "loss": 0.0102,
      "step": 1802480
    },
    {
      "epoch": 2.9498307836321622,
      "grad_norm": 0.2990378439426422,
      "learning_rate": 4.127356470285906e-06,
      "loss": 0.0157,
      "step": 1802500
    },
    {
      "epoch": 2.949863514070816,
      "grad_norm": 0.28940725326538086,
      "learning_rate": 4.127290578072389e-06,
      "loss": 0.0118,
      "step": 1802520
    },
    {
      "epoch": 2.949896244509469,
      "grad_norm": 0.24203021824359894,
      "learning_rate": 4.1272246858588725e-06,
      "loss": 0.0094,
      "step": 1802540
    },
    {
      "epoch": 2.949928974948122,
      "grad_norm": 0.39987558126449585,
      "learning_rate": 4.127158793645355e-06,
      "loss": 0.0155,
      "step": 1802560
    },
    {
      "epoch": 2.9499617053867757,
      "grad_norm": 0.18207979202270508,
      "learning_rate": 4.127092901431838e-06,
      "loss": 0.0126,
      "step": 1802580
    },
    {
      "epoch": 2.949994435825429,
      "grad_norm": 0.41661253571510315,
      "learning_rate": 4.1270270092183216e-06,
      "loss": 0.0121,
      "step": 1802600
    },
    {
      "epoch": 2.950027166264082,
      "grad_norm": 0.277881383895874,
      "learning_rate": 4.126961117004804e-06,
      "loss": 0.0148,
      "step": 1802620
    },
    {
      "epoch": 2.9500598967027356,
      "grad_norm": 0.3032427430152893,
      "learning_rate": 4.126895224791287e-06,
      "loss": 0.0185,
      "step": 1802640
    },
    {
      "epoch": 2.950092627141389,
      "grad_norm": 0.2698791027069092,
      "learning_rate": 4.12682933257777e-06,
      "loss": 0.0177,
      "step": 1802660
    },
    {
      "epoch": 2.9501253575800424,
      "grad_norm": 0.21759827435016632,
      "learning_rate": 4.1267634403642525e-06,
      "loss": 0.0118,
      "step": 1802680
    },
    {
      "epoch": 2.9501580880186955,
      "grad_norm": 0.10598459094762802,
      "learning_rate": 4.126697548150735e-06,
      "loss": 0.0148,
      "step": 1802700
    },
    {
      "epoch": 2.950190818457349,
      "grad_norm": 0.17815735936164856,
      "learning_rate": 4.126631655937218e-06,
      "loss": 0.0092,
      "step": 1802720
    },
    {
      "epoch": 2.9502235488960022,
      "grad_norm": 0.3886551260948181,
      "learning_rate": 4.126565763723701e-06,
      "loss": 0.0119,
      "step": 1802740
    },
    {
      "epoch": 2.9502562793346554,
      "grad_norm": 0.5797805786132812,
      "learning_rate": 4.126499871510184e-06,
      "loss": 0.0098,
      "step": 1802760
    },
    {
      "epoch": 2.950289009773309,
      "grad_norm": 0.11417175829410553,
      "learning_rate": 4.126433979296667e-06,
      "loss": 0.014,
      "step": 1802780
    },
    {
      "epoch": 2.9503217402119626,
      "grad_norm": 0.10771038383245468,
      "learning_rate": 4.12636808708315e-06,
      "loss": 0.0142,
      "step": 1802800
    },
    {
      "epoch": 2.9503544706506157,
      "grad_norm": 0.4977889358997345,
      "learning_rate": 4.1263021948696325e-06,
      "loss": 0.0173,
      "step": 1802820
    },
    {
      "epoch": 2.950387201089269,
      "grad_norm": 0.4446587562561035,
      "learning_rate": 4.126236302656115e-06,
      "loss": 0.0183,
      "step": 1802840
    },
    {
      "epoch": 2.9504199315279225,
      "grad_norm": 0.40606996417045593,
      "learning_rate": 4.126170410442598e-06,
      "loss": 0.018,
      "step": 1802860
    },
    {
      "epoch": 2.9504526619665756,
      "grad_norm": 0.4751969277858734,
      "learning_rate": 4.126104518229081e-06,
      "loss": 0.0117,
      "step": 1802880
    },
    {
      "epoch": 2.950485392405229,
      "grad_norm": 0.45885610580444336,
      "learning_rate": 4.1260386260155635e-06,
      "loss": 0.013,
      "step": 1802900
    },
    {
      "epoch": 2.9505181228438824,
      "grad_norm": 0.4417172372341156,
      "learning_rate": 4.125972733802047e-06,
      "loss": 0.0124,
      "step": 1802920
    },
    {
      "epoch": 2.950550853282536,
      "grad_norm": 0.1647811084985733,
      "learning_rate": 4.12590684158853e-06,
      "loss": 0.0124,
      "step": 1802940
    },
    {
      "epoch": 2.950583583721189,
      "grad_norm": 0.958899974822998,
      "learning_rate": 4.1258409493750126e-06,
      "loss": 0.0127,
      "step": 1802960
    },
    {
      "epoch": 2.9506163141598423,
      "grad_norm": 0.4265304505825043,
      "learning_rate": 4.125775057161496e-06,
      "loss": 0.0091,
      "step": 1802980
    },
    {
      "epoch": 2.950649044598496,
      "grad_norm": 0.09442782402038574,
      "learning_rate": 4.125709164947979e-06,
      "loss": 0.0189,
      "step": 1803000
    },
    {
      "epoch": 2.950681775037149,
      "grad_norm": 0.4312871992588043,
      "learning_rate": 4.125643272734462e-06,
      "loss": 0.0152,
      "step": 1803020
    },
    {
      "epoch": 2.950714505475802,
      "grad_norm": 0.6744643449783325,
      "learning_rate": 4.125577380520944e-06,
      "loss": 0.0186,
      "step": 1803040
    },
    {
      "epoch": 2.9507472359144558,
      "grad_norm": 0.16527490317821503,
      "learning_rate": 4.125511488307427e-06,
      "loss": 0.0115,
      "step": 1803060
    },
    {
      "epoch": 2.950779966353109,
      "grad_norm": 0.41980165243148804,
      "learning_rate": 4.12544559609391e-06,
      "loss": 0.0115,
      "step": 1803080
    },
    {
      "epoch": 2.9508126967917625,
      "grad_norm": 0.6064548492431641,
      "learning_rate": 4.125379703880393e-06,
      "loss": 0.0144,
      "step": 1803100
    },
    {
      "epoch": 2.9508454272304157,
      "grad_norm": 0.2724819481372833,
      "learning_rate": 4.125313811666875e-06,
      "loss": 0.0163,
      "step": 1803120
    },
    {
      "epoch": 2.9508781576690692,
      "grad_norm": 0.5287975072860718,
      "learning_rate": 4.125247919453358e-06,
      "loss": 0.0148,
      "step": 1803140
    },
    {
      "epoch": 2.9509108881077224,
      "grad_norm": 0.13151825964450836,
      "learning_rate": 4.125182027239842e-06,
      "loss": 0.011,
      "step": 1803160
    },
    {
      "epoch": 2.9509436185463755,
      "grad_norm": 0.0803457573056221,
      "learning_rate": 4.125116135026324e-06,
      "loss": 0.0161,
      "step": 1803180
    },
    {
      "epoch": 2.950976348985029,
      "grad_norm": 0.20032444596290588,
      "learning_rate": 4.125050242812807e-06,
      "loss": 0.0094,
      "step": 1803200
    },
    {
      "epoch": 2.9510090794236823,
      "grad_norm": 0.0869918093085289,
      "learning_rate": 4.12498435059929e-06,
      "loss": 0.0105,
      "step": 1803220
    },
    {
      "epoch": 2.951041809862336,
      "grad_norm": 0.7580469846725464,
      "learning_rate": 4.124918458385773e-06,
      "loss": 0.0222,
      "step": 1803240
    },
    {
      "epoch": 2.951074540300989,
      "grad_norm": 0.501098096370697,
      "learning_rate": 4.124852566172255e-06,
      "loss": 0.0121,
      "step": 1803260
    },
    {
      "epoch": 2.9511072707396426,
      "grad_norm": 0.15268170833587646,
      "learning_rate": 4.124786673958739e-06,
      "loss": 0.0124,
      "step": 1803280
    },
    {
      "epoch": 2.951140001178296,
      "grad_norm": 0.2425413727760315,
      "learning_rate": 4.124720781745222e-06,
      "loss": 0.0124,
      "step": 1803300
    },
    {
      "epoch": 2.951172731616949,
      "grad_norm": 0.3095395863056183,
      "learning_rate": 4.124654889531704e-06,
      "loss": 0.0132,
      "step": 1803320
    },
    {
      "epoch": 2.9512054620556025,
      "grad_norm": 0.12608814239501953,
      "learning_rate": 4.124588997318187e-06,
      "loss": 0.0113,
      "step": 1803340
    },
    {
      "epoch": 2.9512381924942557,
      "grad_norm": 0.35060712695121765,
      "learning_rate": 4.12452310510467e-06,
      "loss": 0.0162,
      "step": 1803360
    },
    {
      "epoch": 2.9512709229329093,
      "grad_norm": 1.1322224140167236,
      "learning_rate": 4.1244572128911535e-06,
      "loss": 0.0161,
      "step": 1803380
    },
    {
      "epoch": 2.9513036533715624,
      "grad_norm": 0.518936038017273,
      "learning_rate": 4.124391320677636e-06,
      "loss": 0.0078,
      "step": 1803400
    },
    {
      "epoch": 2.951336383810216,
      "grad_norm": 0.6871524453163147,
      "learning_rate": 4.124325428464119e-06,
      "loss": 0.0127,
      "step": 1803420
    },
    {
      "epoch": 2.951369114248869,
      "grad_norm": 1.0185495615005493,
      "learning_rate": 4.124259536250602e-06,
      "loss": 0.0117,
      "step": 1803440
    },
    {
      "epoch": 2.9514018446875223,
      "grad_norm": 0.10180124640464783,
      "learning_rate": 4.1241936440370844e-06,
      "loss": 0.0108,
      "step": 1803460
    },
    {
      "epoch": 2.951434575126176,
      "grad_norm": 0.5400585532188416,
      "learning_rate": 4.124127751823567e-06,
      "loss": 0.0109,
      "step": 1803480
    },
    {
      "epoch": 2.951467305564829,
      "grad_norm": 1.8127620220184326,
      "learning_rate": 4.12406185961005e-06,
      "loss": 0.0116,
      "step": 1803500
    },
    {
      "epoch": 2.9515000360034827,
      "grad_norm": 0.5610337257385254,
      "learning_rate": 4.123995967396533e-06,
      "loss": 0.0158,
      "step": 1803520
    },
    {
      "epoch": 2.951532766442136,
      "grad_norm": 0.1376141756772995,
      "learning_rate": 4.123930075183015e-06,
      "loss": 0.0124,
      "step": 1803540
    },
    {
      "epoch": 2.9515654968807894,
      "grad_norm": 0.13925907015800476,
      "learning_rate": 4.123864182969499e-06,
      "loss": 0.0173,
      "step": 1803560
    },
    {
      "epoch": 2.9515982273194425,
      "grad_norm": 0.3462602496147156,
      "learning_rate": 4.123798290755982e-06,
      "loss": 0.0094,
      "step": 1803580
    },
    {
      "epoch": 2.9516309577580957,
      "grad_norm": 0.5251242518424988,
      "learning_rate": 4.1237323985424645e-06,
      "loss": 0.0101,
      "step": 1803600
    },
    {
      "epoch": 2.9516636881967493,
      "grad_norm": 0.33073312044143677,
      "learning_rate": 4.123666506328948e-06,
      "loss": 0.0105,
      "step": 1803620
    },
    {
      "epoch": 2.9516964186354024,
      "grad_norm": 0.3896169066429138,
      "learning_rate": 4.123600614115431e-06,
      "loss": 0.0174,
      "step": 1803640
    },
    {
      "epoch": 2.951729149074056,
      "grad_norm": 0.6212261915206909,
      "learning_rate": 4.1235347219019135e-06,
      "loss": 0.0107,
      "step": 1803660
    },
    {
      "epoch": 2.951761879512709,
      "grad_norm": 0.31229931116104126,
      "learning_rate": 4.123468829688396e-06,
      "loss": 0.0155,
      "step": 1803680
    },
    {
      "epoch": 2.951794609951363,
      "grad_norm": 0.4608602225780487,
      "learning_rate": 4.123402937474879e-06,
      "loss": 0.0162,
      "step": 1803700
    },
    {
      "epoch": 2.951827340390016,
      "grad_norm": 0.24454563856124878,
      "learning_rate": 4.123337045261362e-06,
      "loss": 0.0106,
      "step": 1803720
    },
    {
      "epoch": 2.951860070828669,
      "grad_norm": 0.29660600423812866,
      "learning_rate": 4.1232711530478445e-06,
      "loss": 0.0093,
      "step": 1803740
    },
    {
      "epoch": 2.9518928012673227,
      "grad_norm": 0.8474451303482056,
      "learning_rate": 4.123205260834327e-06,
      "loss": 0.0172,
      "step": 1803760
    },
    {
      "epoch": 2.951925531705976,
      "grad_norm": 0.14114601910114288,
      "learning_rate": 4.123139368620811e-06,
      "loss": 0.0111,
      "step": 1803780
    },
    {
      "epoch": 2.9519582621446294,
      "grad_norm": 0.4094422459602356,
      "learning_rate": 4.1230734764072936e-06,
      "loss": 0.0137,
      "step": 1803800
    },
    {
      "epoch": 2.9519909925832826,
      "grad_norm": 0.26479634642601013,
      "learning_rate": 4.123007584193776e-06,
      "loss": 0.0073,
      "step": 1803820
    },
    {
      "epoch": 2.952023723021936,
      "grad_norm": 0.5528708696365356,
      "learning_rate": 4.122941691980259e-06,
      "loss": 0.0101,
      "step": 1803840
    },
    {
      "epoch": 2.9520564534605893,
      "grad_norm": 0.3105921447277069,
      "learning_rate": 4.122875799766742e-06,
      "loss": 0.0128,
      "step": 1803860
    },
    {
      "epoch": 2.9520891838992425,
      "grad_norm": 0.5266913175582886,
      "learning_rate": 4.1228099075532245e-06,
      "loss": 0.0114,
      "step": 1803880
    },
    {
      "epoch": 2.952121914337896,
      "grad_norm": 0.636330246925354,
      "learning_rate": 4.122744015339707e-06,
      "loss": 0.0122,
      "step": 1803900
    },
    {
      "epoch": 2.952154644776549,
      "grad_norm": 0.47060275077819824,
      "learning_rate": 4.12267812312619e-06,
      "loss": 0.0204,
      "step": 1803920
    },
    {
      "epoch": 2.9521873752152024,
      "grad_norm": 0.11403826624155045,
      "learning_rate": 4.122612230912673e-06,
      "loss": 0.0149,
      "step": 1803940
    },
    {
      "epoch": 2.952220105653856,
      "grad_norm": 0.22793956100940704,
      "learning_rate": 4.122546338699156e-06,
      "loss": 0.0192,
      "step": 1803960
    },
    {
      "epoch": 2.9522528360925095,
      "grad_norm": 0.4216454327106476,
      "learning_rate": 4.122480446485639e-06,
      "loss": 0.0127,
      "step": 1803980
    },
    {
      "epoch": 2.9522855665311627,
      "grad_norm": 0.1776670217514038,
      "learning_rate": 4.122414554272122e-06,
      "loss": 0.0137,
      "step": 1804000
    },
    {
      "epoch": 2.952318296969816,
      "grad_norm": 0.5523748993873596,
      "learning_rate": 4.122348662058605e-06,
      "loss": 0.0127,
      "step": 1804020
    },
    {
      "epoch": 2.9523510274084694,
      "grad_norm": 0.37591123580932617,
      "learning_rate": 4.122282769845088e-06,
      "loss": 0.0099,
      "step": 1804040
    },
    {
      "epoch": 2.9523837578471226,
      "grad_norm": 0.44084909558296204,
      "learning_rate": 4.122216877631571e-06,
      "loss": 0.0118,
      "step": 1804060
    },
    {
      "epoch": 2.9524164882857757,
      "grad_norm": 0.36089208722114563,
      "learning_rate": 4.122150985418054e-06,
      "loss": 0.0163,
      "step": 1804080
    },
    {
      "epoch": 2.9524492187244293,
      "grad_norm": 0.5395875573158264,
      "learning_rate": 4.122085093204536e-06,
      "loss": 0.0148,
      "step": 1804100
    },
    {
      "epoch": 2.952481949163083,
      "grad_norm": 0.43440502882003784,
      "learning_rate": 4.122019200991019e-06,
      "loss": 0.0127,
      "step": 1804120
    },
    {
      "epoch": 2.952514679601736,
      "grad_norm": 0.8524567484855652,
      "learning_rate": 4.121953308777502e-06,
      "loss": 0.0135,
      "step": 1804140
    },
    {
      "epoch": 2.9525474100403892,
      "grad_norm": 0.9349452257156372,
      "learning_rate": 4.1218874165639846e-06,
      "loss": 0.0159,
      "step": 1804160
    },
    {
      "epoch": 2.952580140479043,
      "grad_norm": 0.2826339602470398,
      "learning_rate": 4.121821524350468e-06,
      "loss": 0.0167,
      "step": 1804180
    },
    {
      "epoch": 2.952612870917696,
      "grad_norm": 0.17742550373077393,
      "learning_rate": 4.121755632136951e-06,
      "loss": 0.0091,
      "step": 1804200
    },
    {
      "epoch": 2.952645601356349,
      "grad_norm": 0.5305030345916748,
      "learning_rate": 4.121689739923434e-06,
      "loss": 0.012,
      "step": 1804220
    },
    {
      "epoch": 2.9526783317950027,
      "grad_norm": 0.48792192339897156,
      "learning_rate": 4.121623847709916e-06,
      "loss": 0.0159,
      "step": 1804240
    },
    {
      "epoch": 2.9527110622336563,
      "grad_norm": 0.9284898638725281,
      "learning_rate": 4.121557955496399e-06,
      "loss": 0.0149,
      "step": 1804260
    },
    {
      "epoch": 2.9527437926723095,
      "grad_norm": 0.3170110285282135,
      "learning_rate": 4.121492063282882e-06,
      "loss": 0.0137,
      "step": 1804280
    },
    {
      "epoch": 2.9527765231109626,
      "grad_norm": 0.5294642448425293,
      "learning_rate": 4.121426171069365e-06,
      "loss": 0.0117,
      "step": 1804300
    },
    {
      "epoch": 2.952809253549616,
      "grad_norm": 1.8299814462661743,
      "learning_rate": 4.121360278855848e-06,
      "loss": 0.0151,
      "step": 1804320
    },
    {
      "epoch": 2.9528419839882694,
      "grad_norm": 0.45279937982559204,
      "learning_rate": 4.121294386642331e-06,
      "loss": 0.0198,
      "step": 1804340
    },
    {
      "epoch": 2.9528747144269225,
      "grad_norm": 0.1629684865474701,
      "learning_rate": 4.121228494428814e-06,
      "loss": 0.0094,
      "step": 1804360
    },
    {
      "epoch": 2.952907444865576,
      "grad_norm": 0.952423632144928,
      "learning_rate": 4.121162602215296e-06,
      "loss": 0.0139,
      "step": 1804380
    },
    {
      "epoch": 2.9529401753042297,
      "grad_norm": 0.21919508278369904,
      "learning_rate": 4.12109671000178e-06,
      "loss": 0.0157,
      "step": 1804400
    },
    {
      "epoch": 2.952972905742883,
      "grad_norm": 0.4166923761367798,
      "learning_rate": 4.121030817788263e-06,
      "loss": 0.0104,
      "step": 1804420
    },
    {
      "epoch": 2.953005636181536,
      "grad_norm": 0.40273430943489075,
      "learning_rate": 4.1209649255747455e-06,
      "loss": 0.0136,
      "step": 1804440
    },
    {
      "epoch": 2.9530383666201896,
      "grad_norm": 0.27878203988075256,
      "learning_rate": 4.120899033361228e-06,
      "loss": 0.0194,
      "step": 1804460
    },
    {
      "epoch": 2.9530710970588427,
      "grad_norm": 0.22868555784225464,
      "learning_rate": 4.120833141147711e-06,
      "loss": 0.0155,
      "step": 1804480
    },
    {
      "epoch": 2.953103827497496,
      "grad_norm": 0.2141479104757309,
      "learning_rate": 4.120767248934194e-06,
      "loss": 0.0105,
      "step": 1804500
    },
    {
      "epoch": 2.9531365579361495,
      "grad_norm": 0.0829949602484703,
      "learning_rate": 4.1207013567206764e-06,
      "loss": 0.0102,
      "step": 1804520
    },
    {
      "epoch": 2.9531692883748026,
      "grad_norm": 0.15663324296474457,
      "learning_rate": 4.120635464507159e-06,
      "loss": 0.0132,
      "step": 1804540
    },
    {
      "epoch": 2.9532020188134562,
      "grad_norm": 0.307934045791626,
      "learning_rate": 4.120569572293642e-06,
      "loss": 0.0165,
      "step": 1804560
    },
    {
      "epoch": 2.9532347492521094,
      "grad_norm": 0.5916528701782227,
      "learning_rate": 4.1205036800801255e-06,
      "loss": 0.0159,
      "step": 1804580
    },
    {
      "epoch": 2.953267479690763,
      "grad_norm": 0.32910650968551636,
      "learning_rate": 4.120437787866608e-06,
      "loss": 0.012,
      "step": 1804600
    },
    {
      "epoch": 2.953300210129416,
      "grad_norm": 0.4823998212814331,
      "learning_rate": 4.120371895653091e-06,
      "loss": 0.0123,
      "step": 1804620
    },
    {
      "epoch": 2.9533329405680693,
      "grad_norm": 0.22038201987743378,
      "learning_rate": 4.120306003439574e-06,
      "loss": 0.0188,
      "step": 1804640
    },
    {
      "epoch": 2.953365671006723,
      "grad_norm": 0.23058907687664032,
      "learning_rate": 4.1202401112260565e-06,
      "loss": 0.0107,
      "step": 1804660
    },
    {
      "epoch": 2.953398401445376,
      "grad_norm": 0.3246893286705017,
      "learning_rate": 4.12017421901254e-06,
      "loss": 0.0097,
      "step": 1804680
    },
    {
      "epoch": 2.9534311318840296,
      "grad_norm": 0.26957136392593384,
      "learning_rate": 4.120108326799023e-06,
      "loss": 0.0191,
      "step": 1804700
    },
    {
      "epoch": 2.9534638623226828,
      "grad_norm": 0.22110632061958313,
      "learning_rate": 4.1200424345855055e-06,
      "loss": 0.013,
      "step": 1804720
    },
    {
      "epoch": 2.9534965927613364,
      "grad_norm": 0.5377861857414246,
      "learning_rate": 4.119976542371988e-06,
      "loss": 0.0132,
      "step": 1804740
    },
    {
      "epoch": 2.9535293231999895,
      "grad_norm": 0.42364877462387085,
      "learning_rate": 4.119910650158471e-06,
      "loss": 0.0181,
      "step": 1804760
    },
    {
      "epoch": 2.9535620536386427,
      "grad_norm": 0.14885058999061584,
      "learning_rate": 4.119844757944954e-06,
      "loss": 0.0125,
      "step": 1804780
    },
    {
      "epoch": 2.9535947840772963,
      "grad_norm": 1.0807291269302368,
      "learning_rate": 4.119778865731437e-06,
      "loss": 0.016,
      "step": 1804800
    },
    {
      "epoch": 2.9536275145159494,
      "grad_norm": 0.4700329601764679,
      "learning_rate": 4.11971297351792e-06,
      "loss": 0.0161,
      "step": 1804820
    },
    {
      "epoch": 2.953660244954603,
      "grad_norm": 0.3851785957813263,
      "learning_rate": 4.119647081304403e-06,
      "loss": 0.0112,
      "step": 1804840
    },
    {
      "epoch": 2.953692975393256,
      "grad_norm": 0.07908132672309875,
      "learning_rate": 4.1195811890908855e-06,
      "loss": 0.0082,
      "step": 1804860
    },
    {
      "epoch": 2.9537257058319097,
      "grad_norm": 0.1576838195323944,
      "learning_rate": 4.119515296877368e-06,
      "loss": 0.0129,
      "step": 1804880
    },
    {
      "epoch": 2.953758436270563,
      "grad_norm": 0.3753383159637451,
      "learning_rate": 4.119449404663851e-06,
      "loss": 0.0128,
      "step": 1804900
    },
    {
      "epoch": 2.953791166709216,
      "grad_norm": 0.15319319069385529,
      "learning_rate": 4.119383512450334e-06,
      "loss": 0.0154,
      "step": 1804920
    },
    {
      "epoch": 2.9538238971478696,
      "grad_norm": 0.19870610535144806,
      "learning_rate": 4.1193176202368165e-06,
      "loss": 0.0129,
      "step": 1804940
    },
    {
      "epoch": 2.953856627586523,
      "grad_norm": 0.2766171395778656,
      "learning_rate": 4.119251728023299e-06,
      "loss": 0.0151,
      "step": 1804960
    },
    {
      "epoch": 2.9538893580251764,
      "grad_norm": 0.27733227610588074,
      "learning_rate": 4.119185835809783e-06,
      "loss": 0.0127,
      "step": 1804980
    },
    {
      "epoch": 2.9539220884638295,
      "grad_norm": 0.3094328045845032,
      "learning_rate": 4.1191199435962656e-06,
      "loss": 0.0133,
      "step": 1805000
    },
    {
      "epoch": 2.953954818902483,
      "grad_norm": 0.45165231823921204,
      "learning_rate": 4.119054051382748e-06,
      "loss": 0.0136,
      "step": 1805020
    },
    {
      "epoch": 2.9539875493411363,
      "grad_norm": 0.2856351137161255,
      "learning_rate": 4.118988159169232e-06,
      "loss": 0.0124,
      "step": 1805040
    },
    {
      "epoch": 2.9540202797797894,
      "grad_norm": 0.32870376110076904,
      "learning_rate": 4.118922266955715e-06,
      "loss": 0.0119,
      "step": 1805060
    },
    {
      "epoch": 2.954053010218443,
      "grad_norm": 0.32755452394485474,
      "learning_rate": 4.118856374742197e-06,
      "loss": 0.0143,
      "step": 1805080
    },
    {
      "epoch": 2.954085740657096,
      "grad_norm": 0.2480122148990631,
      "learning_rate": 4.11879048252868e-06,
      "loss": 0.0161,
      "step": 1805100
    },
    {
      "epoch": 2.9541184710957498,
      "grad_norm": 0.21998952329158783,
      "learning_rate": 4.118724590315163e-06,
      "loss": 0.0146,
      "step": 1805120
    },
    {
      "epoch": 2.954151201534403,
      "grad_norm": 0.505183756351471,
      "learning_rate": 4.118658698101646e-06,
      "loss": 0.0172,
      "step": 1805140
    },
    {
      "epoch": 2.9541839319730565,
      "grad_norm": 0.16818350553512573,
      "learning_rate": 4.118592805888128e-06,
      "loss": 0.0155,
      "step": 1805160
    },
    {
      "epoch": 2.9542166624117097,
      "grad_norm": 0.5218289494514465,
      "learning_rate": 4.118526913674611e-06,
      "loss": 0.0161,
      "step": 1805180
    },
    {
      "epoch": 2.954249392850363,
      "grad_norm": 0.5003170371055603,
      "learning_rate": 4.118461021461095e-06,
      "loss": 0.0126,
      "step": 1805200
    },
    {
      "epoch": 2.9542821232890164,
      "grad_norm": 0.17460763454437256,
      "learning_rate": 4.118395129247577e-06,
      "loss": 0.0116,
      "step": 1805220
    },
    {
      "epoch": 2.9543148537276696,
      "grad_norm": 0.19274109601974487,
      "learning_rate": 4.11832923703406e-06,
      "loss": 0.0226,
      "step": 1805240
    },
    {
      "epoch": 2.954347584166323,
      "grad_norm": 0.5610008835792542,
      "learning_rate": 4.118263344820543e-06,
      "loss": 0.0138,
      "step": 1805260
    },
    {
      "epoch": 2.9543803146049763,
      "grad_norm": 0.6845584511756897,
      "learning_rate": 4.118197452607026e-06,
      "loss": 0.0171,
      "step": 1805280
    },
    {
      "epoch": 2.95441304504363,
      "grad_norm": 0.14301538467407227,
      "learning_rate": 4.118131560393508e-06,
      "loss": 0.0111,
      "step": 1805300
    },
    {
      "epoch": 2.954445775482283,
      "grad_norm": 0.6133694648742676,
      "learning_rate": 4.118065668179991e-06,
      "loss": 0.0093,
      "step": 1805320
    },
    {
      "epoch": 2.954478505920936,
      "grad_norm": 0.3200087547302246,
      "learning_rate": 4.117999775966474e-06,
      "loss": 0.0187,
      "step": 1805340
    },
    {
      "epoch": 2.95451123635959,
      "grad_norm": 0.20427796244621277,
      "learning_rate": 4.117933883752957e-06,
      "loss": 0.017,
      "step": 1805360
    },
    {
      "epoch": 2.954543966798243,
      "grad_norm": 0.4339572489261627,
      "learning_rate": 4.11786799153944e-06,
      "loss": 0.0117,
      "step": 1805380
    },
    {
      "epoch": 2.9545766972368965,
      "grad_norm": 0.26886075735092163,
      "learning_rate": 4.117802099325923e-06,
      "loss": 0.0123,
      "step": 1805400
    },
    {
      "epoch": 2.9546094276755497,
      "grad_norm": 0.438522070646286,
      "learning_rate": 4.1177362071124065e-06,
      "loss": 0.0106,
      "step": 1805420
    },
    {
      "epoch": 2.9546421581142033,
      "grad_norm": 0.09133835881948471,
      "learning_rate": 4.117670314898889e-06,
      "loss": 0.0094,
      "step": 1805440
    },
    {
      "epoch": 2.9546748885528564,
      "grad_norm": 0.3001656234264374,
      "learning_rate": 4.117604422685372e-06,
      "loss": 0.0172,
      "step": 1805460
    },
    {
      "epoch": 2.9547076189915096,
      "grad_norm": 0.398296594619751,
      "learning_rate": 4.117538530471855e-06,
      "loss": 0.0154,
      "step": 1805480
    },
    {
      "epoch": 2.954740349430163,
      "grad_norm": 0.24653205275535583,
      "learning_rate": 4.1174726382583374e-06,
      "loss": 0.0159,
      "step": 1805500
    },
    {
      "epoch": 2.9547730798688163,
      "grad_norm": 0.28064075112342834,
      "learning_rate": 4.11740674604482e-06,
      "loss": 0.015,
      "step": 1805520
    },
    {
      "epoch": 2.9548058103074695,
      "grad_norm": 0.1613120436668396,
      "learning_rate": 4.117340853831303e-06,
      "loss": 0.0108,
      "step": 1805540
    },
    {
      "epoch": 2.954838540746123,
      "grad_norm": 0.40867504477500916,
      "learning_rate": 4.117274961617786e-06,
      "loss": 0.013,
      "step": 1805560
    },
    {
      "epoch": 2.9548712711847767,
      "grad_norm": 0.5910707116127014,
      "learning_rate": 4.117209069404268e-06,
      "loss": 0.0145,
      "step": 1805580
    },
    {
      "epoch": 2.95490400162343,
      "grad_norm": 0.5721312761306763,
      "learning_rate": 4.117143177190752e-06,
      "loss": 0.0146,
      "step": 1805600
    },
    {
      "epoch": 2.954936732062083,
      "grad_norm": 0.6347916126251221,
      "learning_rate": 4.117077284977235e-06,
      "loss": 0.0089,
      "step": 1805620
    },
    {
      "epoch": 2.9549694625007366,
      "grad_norm": 0.6924389600753784,
      "learning_rate": 4.1170113927637175e-06,
      "loss": 0.0111,
      "step": 1805640
    },
    {
      "epoch": 2.9550021929393897,
      "grad_norm": 0.5052056908607483,
      "learning_rate": 4.1169455005502e-06,
      "loss": 0.022,
      "step": 1805660
    },
    {
      "epoch": 2.955034923378043,
      "grad_norm": 0.1925235539674759,
      "learning_rate": 4.116879608336683e-06,
      "loss": 0.0111,
      "step": 1805680
    },
    {
      "epoch": 2.9550676538166964,
      "grad_norm": 0.2446979582309723,
      "learning_rate": 4.116813716123166e-06,
      "loss": 0.0151,
      "step": 1805700
    },
    {
      "epoch": 2.95510038425535,
      "grad_norm": 0.7892528176307678,
      "learning_rate": 4.1167478239096484e-06,
      "loss": 0.0149,
      "step": 1805720
    },
    {
      "epoch": 2.955133114694003,
      "grad_norm": 0.16829532384872437,
      "learning_rate": 4.116681931696132e-06,
      "loss": 0.0142,
      "step": 1805740
    },
    {
      "epoch": 2.9551658451326563,
      "grad_norm": 0.24574603140354156,
      "learning_rate": 4.116616039482615e-06,
      "loss": 0.0178,
      "step": 1805760
    },
    {
      "epoch": 2.95519857557131,
      "grad_norm": 0.21383751928806305,
      "learning_rate": 4.1165501472690975e-06,
      "loss": 0.0119,
      "step": 1805780
    },
    {
      "epoch": 2.955231306009963,
      "grad_norm": 0.08492162823677063,
      "learning_rate": 4.11648425505558e-06,
      "loss": 0.0081,
      "step": 1805800
    },
    {
      "epoch": 2.9552640364486162,
      "grad_norm": 0.17038816213607788,
      "learning_rate": 4.116418362842064e-06,
      "loss": 0.0101,
      "step": 1805820
    },
    {
      "epoch": 2.95529676688727,
      "grad_norm": 0.26527565717697144,
      "learning_rate": 4.1163524706285466e-06,
      "loss": 0.0071,
      "step": 1805840
    },
    {
      "epoch": 2.9553294973259234,
      "grad_norm": 0.3560521602630615,
      "learning_rate": 4.116286578415029e-06,
      "loss": 0.0152,
      "step": 1805860
    },
    {
      "epoch": 2.9553622277645766,
      "grad_norm": 6.92692756652832,
      "learning_rate": 4.116220686201512e-06,
      "loss": 0.009,
      "step": 1805880
    },
    {
      "epoch": 2.9553949582032297,
      "grad_norm": 0.09658597409725189,
      "learning_rate": 4.116154793987995e-06,
      "loss": 0.0175,
      "step": 1805900
    },
    {
      "epoch": 2.9554276886418833,
      "grad_norm": 0.5989220142364502,
      "learning_rate": 4.1160889017744775e-06,
      "loss": 0.0169,
      "step": 1805920
    },
    {
      "epoch": 2.9554604190805365,
      "grad_norm": 0.08347871899604797,
      "learning_rate": 4.11602300956096e-06,
      "loss": 0.0127,
      "step": 1805940
    },
    {
      "epoch": 2.9554931495191896,
      "grad_norm": 0.18631315231323242,
      "learning_rate": 4.115957117347443e-06,
      "loss": 0.0122,
      "step": 1805960
    },
    {
      "epoch": 2.955525879957843,
      "grad_norm": 0.11587151139974594,
      "learning_rate": 4.115891225133926e-06,
      "loss": 0.0099,
      "step": 1805980
    },
    {
      "epoch": 2.955558610396497,
      "grad_norm": 0.15026183426380157,
      "learning_rate": 4.115825332920409e-06,
      "loss": 0.0142,
      "step": 1806000
    },
    {
      "epoch": 2.95559134083515,
      "grad_norm": 0.5457901954650879,
      "learning_rate": 4.115759440706892e-06,
      "loss": 0.0139,
      "step": 1806020
    },
    {
      "epoch": 2.955624071273803,
      "grad_norm": 0.14181949198246002,
      "learning_rate": 4.115693548493375e-06,
      "loss": 0.0114,
      "step": 1806040
    },
    {
      "epoch": 2.9556568017124567,
      "grad_norm": 0.25891056656837463,
      "learning_rate": 4.1156276562798576e-06,
      "loss": 0.0139,
      "step": 1806060
    },
    {
      "epoch": 2.95568953215111,
      "grad_norm": 0.11785908043384552,
      "learning_rate": 4.115561764066341e-06,
      "loss": 0.0104,
      "step": 1806080
    },
    {
      "epoch": 2.955722262589763,
      "grad_norm": 0.16431544721126556,
      "learning_rate": 4.115495871852824e-06,
      "loss": 0.0172,
      "step": 1806100
    },
    {
      "epoch": 2.9557549930284166,
      "grad_norm": 0.12139686942100525,
      "learning_rate": 4.115429979639307e-06,
      "loss": 0.0096,
      "step": 1806120
    },
    {
      "epoch": 2.9557877234670697,
      "grad_norm": 0.38836535811424255,
      "learning_rate": 4.115364087425789e-06,
      "loss": 0.0085,
      "step": 1806140
    },
    {
      "epoch": 2.9558204539057233,
      "grad_norm": 0.4101393520832062,
      "learning_rate": 4.115298195212272e-06,
      "loss": 0.0098,
      "step": 1806160
    },
    {
      "epoch": 2.9558531843443765,
      "grad_norm": 0.1817486733198166,
      "learning_rate": 4.115232302998755e-06,
      "loss": 0.0211,
      "step": 1806180
    },
    {
      "epoch": 2.95588591478303,
      "grad_norm": 0.060486625880002975,
      "learning_rate": 4.1151664107852376e-06,
      "loss": 0.0141,
      "step": 1806200
    },
    {
      "epoch": 2.9559186452216832,
      "grad_norm": 0.7484152913093567,
      "learning_rate": 4.115100518571721e-06,
      "loss": 0.0137,
      "step": 1806220
    },
    {
      "epoch": 2.9559513756603364,
      "grad_norm": 0.8968496918678284,
      "learning_rate": 4.115034626358204e-06,
      "loss": 0.0086,
      "step": 1806240
    },
    {
      "epoch": 2.95598410609899,
      "grad_norm": 0.6626549363136292,
      "learning_rate": 4.114968734144687e-06,
      "loss": 0.0138,
      "step": 1806260
    },
    {
      "epoch": 2.956016836537643,
      "grad_norm": 0.2132822424173355,
      "learning_rate": 4.114902841931169e-06,
      "loss": 0.0116,
      "step": 1806280
    },
    {
      "epoch": 2.9560495669762967,
      "grad_norm": 0.20622195303440094,
      "learning_rate": 4.114836949717652e-06,
      "loss": 0.0142,
      "step": 1806300
    },
    {
      "epoch": 2.95608229741495,
      "grad_norm": 0.2494620680809021,
      "learning_rate": 4.114771057504135e-06,
      "loss": 0.0109,
      "step": 1806320
    },
    {
      "epoch": 2.9561150278536035,
      "grad_norm": 0.20296084880828857,
      "learning_rate": 4.114705165290618e-06,
      "loss": 0.0149,
      "step": 1806340
    },
    {
      "epoch": 2.9561477582922566,
      "grad_norm": 0.762444019317627,
      "learning_rate": 4.1146392730771e-06,
      "loss": 0.0142,
      "step": 1806360
    },
    {
      "epoch": 2.9561804887309098,
      "grad_norm": 0.3146299123764038,
      "learning_rate": 4.114573380863583e-06,
      "loss": 0.0095,
      "step": 1806380
    },
    {
      "epoch": 2.9562132191695634,
      "grad_norm": 0.5355970859527588,
      "learning_rate": 4.114507488650067e-06,
      "loss": 0.0109,
      "step": 1806400
    },
    {
      "epoch": 2.9562459496082165,
      "grad_norm": 0.19788217544555664,
      "learning_rate": 4.114441596436549e-06,
      "loss": 0.0129,
      "step": 1806420
    },
    {
      "epoch": 2.95627868004687,
      "grad_norm": 0.6002064347267151,
      "learning_rate": 4.114375704223032e-06,
      "loss": 0.0089,
      "step": 1806440
    },
    {
      "epoch": 2.9563114104855233,
      "grad_norm": 0.35447025299072266,
      "learning_rate": 4.114309812009516e-06,
      "loss": 0.0071,
      "step": 1806460
    },
    {
      "epoch": 2.956344140924177,
      "grad_norm": 0.39458540081977844,
      "learning_rate": 4.1142439197959985e-06,
      "loss": 0.0146,
      "step": 1806480
    },
    {
      "epoch": 2.95637687136283,
      "grad_norm": 0.21707390248775482,
      "learning_rate": 4.114178027582481e-06,
      "loss": 0.016,
      "step": 1806500
    },
    {
      "epoch": 2.956409601801483,
      "grad_norm": 0.6237785220146179,
      "learning_rate": 4.114112135368964e-06,
      "loss": 0.0152,
      "step": 1806520
    },
    {
      "epoch": 2.9564423322401367,
      "grad_norm": 0.18037782609462738,
      "learning_rate": 4.114046243155447e-06,
      "loss": 0.015,
      "step": 1806540
    },
    {
      "epoch": 2.95647506267879,
      "grad_norm": 0.35833725333213806,
      "learning_rate": 4.1139803509419294e-06,
      "loss": 0.0169,
      "step": 1806560
    },
    {
      "epoch": 2.9565077931174435,
      "grad_norm": 0.3080962896347046,
      "learning_rate": 4.113914458728412e-06,
      "loss": 0.0139,
      "step": 1806580
    },
    {
      "epoch": 2.9565405235560966,
      "grad_norm": 0.36973947286605835,
      "learning_rate": 4.113848566514895e-06,
      "loss": 0.015,
      "step": 1806600
    },
    {
      "epoch": 2.9565732539947502,
      "grad_norm": 0.5957580208778381,
      "learning_rate": 4.1137826743013785e-06,
      "loss": 0.0141,
      "step": 1806620
    },
    {
      "epoch": 2.9566059844334034,
      "grad_norm": 0.17067518830299377,
      "learning_rate": 4.113716782087861e-06,
      "loss": 0.0108,
      "step": 1806640
    },
    {
      "epoch": 2.9566387148720565,
      "grad_norm": 1.1084468364715576,
      "learning_rate": 4.113650889874344e-06,
      "loss": 0.0115,
      "step": 1806660
    },
    {
      "epoch": 2.95667144531071,
      "grad_norm": 0.12955132126808167,
      "learning_rate": 4.113584997660827e-06,
      "loss": 0.0111,
      "step": 1806680
    },
    {
      "epoch": 2.9567041757493633,
      "grad_norm": 0.45612597465515137,
      "learning_rate": 4.1135191054473095e-06,
      "loss": 0.0105,
      "step": 1806700
    },
    {
      "epoch": 2.956736906188017,
      "grad_norm": 0.09341150522232056,
      "learning_rate": 4.113453213233792e-06,
      "loss": 0.011,
      "step": 1806720
    },
    {
      "epoch": 2.95676963662667,
      "grad_norm": 0.13568122684955597,
      "learning_rate": 4.113387321020275e-06,
      "loss": 0.0096,
      "step": 1806740
    },
    {
      "epoch": 2.9568023670653236,
      "grad_norm": 0.7472578883171082,
      "learning_rate": 4.113321428806758e-06,
      "loss": 0.0106,
      "step": 1806760
    },
    {
      "epoch": 2.9568350975039768,
      "grad_norm": 0.12756070494651794,
      "learning_rate": 4.11325553659324e-06,
      "loss": 0.0105,
      "step": 1806780
    },
    {
      "epoch": 2.95686782794263,
      "grad_norm": 0.5305782556533813,
      "learning_rate": 4.113189644379724e-06,
      "loss": 0.0168,
      "step": 1806800
    },
    {
      "epoch": 2.9569005583812835,
      "grad_norm": 0.07854729890823364,
      "learning_rate": 4.113123752166207e-06,
      "loss": 0.0164,
      "step": 1806820
    },
    {
      "epoch": 2.9569332888199367,
      "grad_norm": 0.29513469338417053,
      "learning_rate": 4.11305785995269e-06,
      "loss": 0.0172,
      "step": 1806840
    },
    {
      "epoch": 2.9569660192585903,
      "grad_norm": 0.3214956223964691,
      "learning_rate": 4.112991967739173e-06,
      "loss": 0.0111,
      "step": 1806860
    },
    {
      "epoch": 2.9569987496972434,
      "grad_norm": 0.210491344332695,
      "learning_rate": 4.112926075525656e-06,
      "loss": 0.0108,
      "step": 1806880
    },
    {
      "epoch": 2.957031480135897,
      "grad_norm": 0.15149885416030884,
      "learning_rate": 4.1128601833121385e-06,
      "loss": 0.0139,
      "step": 1806900
    },
    {
      "epoch": 2.95706421057455,
      "grad_norm": 0.3590739965438843,
      "learning_rate": 4.112794291098621e-06,
      "loss": 0.0166,
      "step": 1806920
    },
    {
      "epoch": 2.9570969410132033,
      "grad_norm": 1.1979390382766724,
      "learning_rate": 4.112728398885104e-06,
      "loss": 0.0099,
      "step": 1806940
    },
    {
      "epoch": 2.957129671451857,
      "grad_norm": 0.5877086520195007,
      "learning_rate": 4.112662506671587e-06,
      "loss": 0.0132,
      "step": 1806960
    },
    {
      "epoch": 2.95716240189051,
      "grad_norm": 0.289146363735199,
      "learning_rate": 4.1125966144580695e-06,
      "loss": 0.0107,
      "step": 1806980
    },
    {
      "epoch": 2.957195132329163,
      "grad_norm": 0.2474912852048874,
      "learning_rate": 4.112530722244552e-06,
      "loss": 0.0078,
      "step": 1807000
    },
    {
      "epoch": 2.957227862767817,
      "grad_norm": 0.13880480825901031,
      "learning_rate": 4.112464830031036e-06,
      "loss": 0.0143,
      "step": 1807020
    },
    {
      "epoch": 2.9572605932064704,
      "grad_norm": 0.35067006945610046,
      "learning_rate": 4.1123989378175186e-06,
      "loss": 0.0111,
      "step": 1807040
    },
    {
      "epoch": 2.9572933236451235,
      "grad_norm": 0.5080698728561401,
      "learning_rate": 4.112333045604001e-06,
      "loss": 0.0144,
      "step": 1807060
    },
    {
      "epoch": 2.9573260540837767,
      "grad_norm": 0.549444317817688,
      "learning_rate": 4.112267153390484e-06,
      "loss": 0.0136,
      "step": 1807080
    },
    {
      "epoch": 2.9573587845224303,
      "grad_norm": 0.21065755188465118,
      "learning_rate": 4.112201261176967e-06,
      "loss": 0.0075,
      "step": 1807100
    },
    {
      "epoch": 2.9573915149610834,
      "grad_norm": 0.36219513416290283,
      "learning_rate": 4.1121353689634495e-06,
      "loss": 0.0125,
      "step": 1807120
    },
    {
      "epoch": 2.9574242453997366,
      "grad_norm": 0.12920710444450378,
      "learning_rate": 4.112069476749933e-06,
      "loss": 0.0129,
      "step": 1807140
    },
    {
      "epoch": 2.95745697583839,
      "grad_norm": 0.32659241557121277,
      "learning_rate": 4.112003584536416e-06,
      "loss": 0.0205,
      "step": 1807160
    },
    {
      "epoch": 2.9574897062770438,
      "grad_norm": 0.280527800321579,
      "learning_rate": 4.111937692322899e-06,
      "loss": 0.0129,
      "step": 1807180
    },
    {
      "epoch": 2.957522436715697,
      "grad_norm": 0.123018778860569,
      "learning_rate": 4.111871800109381e-06,
      "loss": 0.0143,
      "step": 1807200
    },
    {
      "epoch": 2.95755516715435,
      "grad_norm": 0.20064185559749603,
      "learning_rate": 4.111805907895864e-06,
      "loss": 0.0071,
      "step": 1807220
    },
    {
      "epoch": 2.9575878975930037,
      "grad_norm": 0.6221766471862793,
      "learning_rate": 4.111740015682348e-06,
      "loss": 0.0145,
      "step": 1807240
    },
    {
      "epoch": 2.957620628031657,
      "grad_norm": 0.1602310687303543,
      "learning_rate": 4.11167412346883e-06,
      "loss": 0.0135,
      "step": 1807260
    },
    {
      "epoch": 2.95765335847031,
      "grad_norm": 0.3565766513347626,
      "learning_rate": 4.111608231255313e-06,
      "loss": 0.0172,
      "step": 1807280
    },
    {
      "epoch": 2.9576860889089636,
      "grad_norm": 0.3057114779949188,
      "learning_rate": 4.111542339041796e-06,
      "loss": 0.0118,
      "step": 1807300
    },
    {
      "epoch": 2.957718819347617,
      "grad_norm": 0.15762826800346375,
      "learning_rate": 4.111476446828279e-06,
      "loss": 0.0172,
      "step": 1807320
    },
    {
      "epoch": 2.9577515497862703,
      "grad_norm": 0.08908753097057343,
      "learning_rate": 4.111410554614761e-06,
      "loss": 0.015,
      "step": 1807340
    },
    {
      "epoch": 2.9577842802249235,
      "grad_norm": 0.3728289008140564,
      "learning_rate": 4.111344662401244e-06,
      "loss": 0.0164,
      "step": 1807360
    },
    {
      "epoch": 2.957817010663577,
      "grad_norm": 1.1676594018936157,
      "learning_rate": 4.111278770187727e-06,
      "loss": 0.0128,
      "step": 1807380
    },
    {
      "epoch": 2.95784974110223,
      "grad_norm": 0.2369731068611145,
      "learning_rate": 4.11121287797421e-06,
      "loss": 0.0111,
      "step": 1807400
    },
    {
      "epoch": 2.9578824715408834,
      "grad_norm": 0.397491991519928,
      "learning_rate": 4.111146985760693e-06,
      "loss": 0.0143,
      "step": 1807420
    },
    {
      "epoch": 2.957915201979537,
      "grad_norm": 0.18436139822006226,
      "learning_rate": 4.111081093547176e-06,
      "loss": 0.0086,
      "step": 1807440
    },
    {
      "epoch": 2.9579479324181905,
      "grad_norm": 0.2605574131011963,
      "learning_rate": 4.111015201333659e-06,
      "loss": 0.012,
      "step": 1807460
    },
    {
      "epoch": 2.9579806628568437,
      "grad_norm": 0.7613222599029541,
      "learning_rate": 4.110949309120141e-06,
      "loss": 0.0139,
      "step": 1807480
    },
    {
      "epoch": 2.958013393295497,
      "grad_norm": 0.34063825011253357,
      "learning_rate": 4.110883416906625e-06,
      "loss": 0.0179,
      "step": 1807500
    },
    {
      "epoch": 2.9580461237341504,
      "grad_norm": 0.138950914144516,
      "learning_rate": 4.110817524693108e-06,
      "loss": 0.0155,
      "step": 1807520
    },
    {
      "epoch": 2.9580788541728036,
      "grad_norm": 0.24776338040828705,
      "learning_rate": 4.1107516324795905e-06,
      "loss": 0.0206,
      "step": 1807540
    },
    {
      "epoch": 2.9581115846114567,
      "grad_norm": 0.3581242859363556,
      "learning_rate": 4.110685740266073e-06,
      "loss": 0.0104,
      "step": 1807560
    },
    {
      "epoch": 2.9581443150501103,
      "grad_norm": 0.18757185339927673,
      "learning_rate": 4.110619848052556e-06,
      "loss": 0.0124,
      "step": 1807580
    },
    {
      "epoch": 2.9581770454887635,
      "grad_norm": 0.7841128706932068,
      "learning_rate": 4.110553955839039e-06,
      "loss": 0.0113,
      "step": 1807600
    },
    {
      "epoch": 2.958209775927417,
      "grad_norm": 0.32529914379119873,
      "learning_rate": 4.110488063625521e-06,
      "loss": 0.0125,
      "step": 1807620
    },
    {
      "epoch": 2.9582425063660702,
      "grad_norm": 0.1490294635295868,
      "learning_rate": 4.110422171412005e-06,
      "loss": 0.0177,
      "step": 1807640
    },
    {
      "epoch": 2.958275236804724,
      "grad_norm": 0.20907041430473328,
      "learning_rate": 4.110356279198488e-06,
      "loss": 0.0159,
      "step": 1807660
    },
    {
      "epoch": 2.958307967243377,
      "grad_norm": 0.3759591281414032,
      "learning_rate": 4.1102903869849705e-06,
      "loss": 0.0111,
      "step": 1807680
    },
    {
      "epoch": 2.95834069768203,
      "grad_norm": 0.2946203947067261,
      "learning_rate": 4.110224494771453e-06,
      "loss": 0.0092,
      "step": 1807700
    },
    {
      "epoch": 2.9583734281206837,
      "grad_norm": 0.5895365476608276,
      "learning_rate": 4.110158602557936e-06,
      "loss": 0.0193,
      "step": 1807720
    },
    {
      "epoch": 2.958406158559337,
      "grad_norm": 0.381845623254776,
      "learning_rate": 4.110092710344419e-06,
      "loss": 0.012,
      "step": 1807740
    },
    {
      "epoch": 2.9584388889979905,
      "grad_norm": 0.039813291281461716,
      "learning_rate": 4.1100268181309014e-06,
      "loss": 0.0165,
      "step": 1807760
    },
    {
      "epoch": 2.9584716194366436,
      "grad_norm": 0.7789846658706665,
      "learning_rate": 4.109960925917384e-06,
      "loss": 0.0131,
      "step": 1807780
    },
    {
      "epoch": 2.958504349875297,
      "grad_norm": 0.18547357618808746,
      "learning_rate": 4.109895033703867e-06,
      "loss": 0.0123,
      "step": 1807800
    },
    {
      "epoch": 2.9585370803139504,
      "grad_norm": 0.17371606826782227,
      "learning_rate": 4.1098291414903505e-06,
      "loss": 0.0135,
      "step": 1807820
    },
    {
      "epoch": 2.9585698107526035,
      "grad_norm": 0.22998429834842682,
      "learning_rate": 4.109763249276833e-06,
      "loss": 0.02,
      "step": 1807840
    },
    {
      "epoch": 2.958602541191257,
      "grad_norm": 0.19533218443393707,
      "learning_rate": 4.109697357063316e-06,
      "loss": 0.0095,
      "step": 1807860
    },
    {
      "epoch": 2.9586352716299102,
      "grad_norm": 0.17185859382152557,
      "learning_rate": 4.1096314648497996e-06,
      "loss": 0.0107,
      "step": 1807880
    },
    {
      "epoch": 2.958668002068564,
      "grad_norm": 0.3298235237598419,
      "learning_rate": 4.109565572636282e-06,
      "loss": 0.0077,
      "step": 1807900
    },
    {
      "epoch": 2.958700732507217,
      "grad_norm": 0.24206238985061646,
      "learning_rate": 4.109499680422765e-06,
      "loss": 0.009,
      "step": 1807920
    },
    {
      "epoch": 2.9587334629458706,
      "grad_norm": 0.4262487590312958,
      "learning_rate": 4.109433788209248e-06,
      "loss": 0.0183,
      "step": 1807940
    },
    {
      "epoch": 2.9587661933845237,
      "grad_norm": 0.13570043444633484,
      "learning_rate": 4.1093678959957305e-06,
      "loss": 0.0124,
      "step": 1807960
    },
    {
      "epoch": 2.958798923823177,
      "grad_norm": 0.6197572946548462,
      "learning_rate": 4.109302003782213e-06,
      "loss": 0.0149,
      "step": 1807980
    },
    {
      "epoch": 2.9588316542618305,
      "grad_norm": 0.23336870968341827,
      "learning_rate": 4.109236111568696e-06,
      "loss": 0.0095,
      "step": 1808000
    },
    {
      "epoch": 2.9588643847004836,
      "grad_norm": 0.22518552839756012,
      "learning_rate": 4.109170219355179e-06,
      "loss": 0.0183,
      "step": 1808020
    },
    {
      "epoch": 2.9588971151391372,
      "grad_norm": 0.18762333691120148,
      "learning_rate": 4.109104327141662e-06,
      "loss": 0.0155,
      "step": 1808040
    },
    {
      "epoch": 2.9589298455777904,
      "grad_norm": 0.22602878510951996,
      "learning_rate": 4.109038434928145e-06,
      "loss": 0.0122,
      "step": 1808060
    },
    {
      "epoch": 2.958962576016444,
      "grad_norm": 0.12139856815338135,
      "learning_rate": 4.108972542714628e-06,
      "loss": 0.0151,
      "step": 1808080
    },
    {
      "epoch": 2.958995306455097,
      "grad_norm": 0.4806516468524933,
      "learning_rate": 4.1089066505011106e-06,
      "loss": 0.009,
      "step": 1808100
    },
    {
      "epoch": 2.9590280368937503,
      "grad_norm": 0.2622280418872833,
      "learning_rate": 4.108840758287593e-06,
      "loss": 0.0189,
      "step": 1808120
    },
    {
      "epoch": 2.959060767332404,
      "grad_norm": 0.2183668613433838,
      "learning_rate": 4.108774866074076e-06,
      "loss": 0.0203,
      "step": 1808140
    },
    {
      "epoch": 2.959093497771057,
      "grad_norm": 0.5177434682846069,
      "learning_rate": 4.108708973860559e-06,
      "loss": 0.0113,
      "step": 1808160
    },
    {
      "epoch": 2.9591262282097106,
      "grad_norm": 0.8141350746154785,
      "learning_rate": 4.1086430816470415e-06,
      "loss": 0.0135,
      "step": 1808180
    },
    {
      "epoch": 2.9591589586483638,
      "grad_norm": 0.1505727469921112,
      "learning_rate": 4.108577189433525e-06,
      "loss": 0.0126,
      "step": 1808200
    },
    {
      "epoch": 2.9591916890870174,
      "grad_norm": 0.48532557487487793,
      "learning_rate": 4.108511297220008e-06,
      "loss": 0.0134,
      "step": 1808220
    },
    {
      "epoch": 2.9592244195256705,
      "grad_norm": 0.600466251373291,
      "learning_rate": 4.108445405006491e-06,
      "loss": 0.0186,
      "step": 1808240
    },
    {
      "epoch": 2.9592571499643237,
      "grad_norm": 0.17033825814723969,
      "learning_rate": 4.108379512792974e-06,
      "loss": 0.0154,
      "step": 1808260
    },
    {
      "epoch": 2.9592898804029772,
      "grad_norm": 0.1432923823595047,
      "learning_rate": 4.108313620579457e-06,
      "loss": 0.01,
      "step": 1808280
    },
    {
      "epoch": 2.9593226108416304,
      "grad_norm": 0.1704995185136795,
      "learning_rate": 4.10824772836594e-06,
      "loss": 0.0107,
      "step": 1808300
    },
    {
      "epoch": 2.959355341280284,
      "grad_norm": 0.10326162725687027,
      "learning_rate": 4.108181836152422e-06,
      "loss": 0.0121,
      "step": 1808320
    },
    {
      "epoch": 2.959388071718937,
      "grad_norm": 0.5719053149223328,
      "learning_rate": 4.108115943938905e-06,
      "loss": 0.0132,
      "step": 1808340
    },
    {
      "epoch": 2.9594208021575907,
      "grad_norm": 0.1666792929172516,
      "learning_rate": 4.108050051725388e-06,
      "loss": 0.0111,
      "step": 1808360
    },
    {
      "epoch": 2.959453532596244,
      "grad_norm": 0.3231465220451355,
      "learning_rate": 4.107984159511871e-06,
      "loss": 0.0203,
      "step": 1808380
    },
    {
      "epoch": 2.959486263034897,
      "grad_norm": 0.8555207252502441,
      "learning_rate": 4.107918267298353e-06,
      "loss": 0.016,
      "step": 1808400
    },
    {
      "epoch": 2.9595189934735506,
      "grad_norm": 0.40450727939605713,
      "learning_rate": 4.107852375084836e-06,
      "loss": 0.0106,
      "step": 1808420
    },
    {
      "epoch": 2.959551723912204,
      "grad_norm": 0.21471579372882843,
      "learning_rate": 4.10778648287132e-06,
      "loss": 0.0198,
      "step": 1808440
    },
    {
      "epoch": 2.9595844543508574,
      "grad_norm": 0.23763173818588257,
      "learning_rate": 4.107720590657802e-06,
      "loss": 0.0139,
      "step": 1808460
    },
    {
      "epoch": 2.9596171847895105,
      "grad_norm": 0.11451613903045654,
      "learning_rate": 4.107654698444285e-06,
      "loss": 0.0124,
      "step": 1808480
    },
    {
      "epoch": 2.959649915228164,
      "grad_norm": 0.2509554326534271,
      "learning_rate": 4.107588806230768e-06,
      "loss": 0.0101,
      "step": 1808500
    },
    {
      "epoch": 2.9596826456668173,
      "grad_norm": 0.4143271744251251,
      "learning_rate": 4.107522914017251e-06,
      "loss": 0.0136,
      "step": 1808520
    },
    {
      "epoch": 2.9597153761054704,
      "grad_norm": 0.5426110029220581,
      "learning_rate": 4.107457021803733e-06,
      "loss": 0.0191,
      "step": 1808540
    },
    {
      "epoch": 2.959748106544124,
      "grad_norm": 0.364849716424942,
      "learning_rate": 4.107391129590217e-06,
      "loss": 0.0107,
      "step": 1808560
    },
    {
      "epoch": 2.959780836982777,
      "grad_norm": 0.2036498636007309,
      "learning_rate": 4.1073252373767e-06,
      "loss": 0.0107,
      "step": 1808580
    },
    {
      "epoch": 2.9598135674214303,
      "grad_norm": 0.23458756506443024,
      "learning_rate": 4.1072593451631824e-06,
      "loss": 0.0126,
      "step": 1808600
    },
    {
      "epoch": 2.959846297860084,
      "grad_norm": 0.07819055765867233,
      "learning_rate": 4.107193452949665e-06,
      "loss": 0.0124,
      "step": 1808620
    },
    {
      "epoch": 2.9598790282987375,
      "grad_norm": 0.36757394671440125,
      "learning_rate": 4.107127560736148e-06,
      "loss": 0.0173,
      "step": 1808640
    },
    {
      "epoch": 2.9599117587373907,
      "grad_norm": 0.29912450909614563,
      "learning_rate": 4.1070616685226315e-06,
      "loss": 0.0105,
      "step": 1808660
    },
    {
      "epoch": 2.959944489176044,
      "grad_norm": 0.30158159136772156,
      "learning_rate": 4.106995776309114e-06,
      "loss": 0.0107,
      "step": 1808680
    },
    {
      "epoch": 2.9599772196146974,
      "grad_norm": 0.6440159678459167,
      "learning_rate": 4.106929884095597e-06,
      "loss": 0.013,
      "step": 1808700
    },
    {
      "epoch": 2.9600099500533505,
      "grad_norm": 0.21919864416122437,
      "learning_rate": 4.10686399188208e-06,
      "loss": 0.0091,
      "step": 1808720
    },
    {
      "epoch": 2.9600426804920037,
      "grad_norm": 0.16852138936519623,
      "learning_rate": 4.1067980996685625e-06,
      "loss": 0.0082,
      "step": 1808740
    },
    {
      "epoch": 2.9600754109306573,
      "grad_norm": 0.39060303568840027,
      "learning_rate": 4.106732207455045e-06,
      "loss": 0.0165,
      "step": 1808760
    },
    {
      "epoch": 2.960108141369311,
      "grad_norm": 0.17477725446224213,
      "learning_rate": 4.106666315241528e-06,
      "loss": 0.0102,
      "step": 1808780
    },
    {
      "epoch": 2.960140871807964,
      "grad_norm": 0.2742270231246948,
      "learning_rate": 4.106600423028011e-06,
      "loss": 0.0128,
      "step": 1808800
    },
    {
      "epoch": 2.960173602246617,
      "grad_norm": 0.08052094280719757,
      "learning_rate": 4.106534530814493e-06,
      "loss": 0.0204,
      "step": 1808820
    },
    {
      "epoch": 2.960206332685271,
      "grad_norm": 0.4122776985168457,
      "learning_rate": 4.106468638600977e-06,
      "loss": 0.0137,
      "step": 1808840
    },
    {
      "epoch": 2.960239063123924,
      "grad_norm": 0.12505985796451569,
      "learning_rate": 4.10640274638746e-06,
      "loss": 0.0078,
      "step": 1808860
    },
    {
      "epoch": 2.960271793562577,
      "grad_norm": 0.22230099141597748,
      "learning_rate": 4.1063368541739425e-06,
      "loss": 0.0131,
      "step": 1808880
    },
    {
      "epoch": 2.9603045240012307,
      "grad_norm": 0.13960866630077362,
      "learning_rate": 4.106270961960426e-06,
      "loss": 0.0123,
      "step": 1808900
    },
    {
      "epoch": 2.9603372544398843,
      "grad_norm": 0.1610168218612671,
      "learning_rate": 4.106205069746909e-06,
      "loss": 0.0129,
      "step": 1808920
    },
    {
      "epoch": 2.9603699848785374,
      "grad_norm": 0.38374119997024536,
      "learning_rate": 4.1061391775333916e-06,
      "loss": 0.0134,
      "step": 1808940
    },
    {
      "epoch": 2.9604027153171906,
      "grad_norm": 0.49199140071868896,
      "learning_rate": 4.106073285319874e-06,
      "loss": 0.0143,
      "step": 1808960
    },
    {
      "epoch": 2.960435445755844,
      "grad_norm": 0.21144086122512817,
      "learning_rate": 4.106007393106357e-06,
      "loss": 0.0104,
      "step": 1808980
    },
    {
      "epoch": 2.9604681761944973,
      "grad_norm": 0.698233425617218,
      "learning_rate": 4.10594150089284e-06,
      "loss": 0.0108,
      "step": 1809000
    },
    {
      "epoch": 2.9605009066331505,
      "grad_norm": 0.130376935005188,
      "learning_rate": 4.1058756086793225e-06,
      "loss": 0.0097,
      "step": 1809020
    },
    {
      "epoch": 2.960533637071804,
      "grad_norm": 0.257394403219223,
      "learning_rate": 4.105809716465805e-06,
      "loss": 0.0123,
      "step": 1809040
    },
    {
      "epoch": 2.9605663675104577,
      "grad_norm": 0.1897452026605606,
      "learning_rate": 4.105743824252289e-06,
      "loss": 0.012,
      "step": 1809060
    },
    {
      "epoch": 2.960599097949111,
      "grad_norm": 0.13281111419200897,
      "learning_rate": 4.105677932038772e-06,
      "loss": 0.015,
      "step": 1809080
    },
    {
      "epoch": 2.960631828387764,
      "grad_norm": 0.05643470957875252,
      "learning_rate": 4.105612039825254e-06,
      "loss": 0.0177,
      "step": 1809100
    },
    {
      "epoch": 2.9606645588264175,
      "grad_norm": 0.42813730239868164,
      "learning_rate": 4.105546147611737e-06,
      "loss": 0.0103,
      "step": 1809120
    },
    {
      "epoch": 2.9606972892650707,
      "grad_norm": 0.2460644245147705,
      "learning_rate": 4.10548025539822e-06,
      "loss": 0.0141,
      "step": 1809140
    },
    {
      "epoch": 2.960730019703724,
      "grad_norm": 0.4266251027584076,
      "learning_rate": 4.1054143631847025e-06,
      "loss": 0.0103,
      "step": 1809160
    },
    {
      "epoch": 2.9607627501423774,
      "grad_norm": 0.20705075562000275,
      "learning_rate": 4.105348470971185e-06,
      "loss": 0.0143,
      "step": 1809180
    },
    {
      "epoch": 2.9607954805810306,
      "grad_norm": 0.4064615070819855,
      "learning_rate": 4.105282578757668e-06,
      "loss": 0.017,
      "step": 1809200
    },
    {
      "epoch": 2.960828211019684,
      "grad_norm": 0.412090539932251,
      "learning_rate": 4.105216686544151e-06,
      "loss": 0.0114,
      "step": 1809220
    },
    {
      "epoch": 2.9608609414583373,
      "grad_norm": 0.6845029592514038,
      "learning_rate": 4.105150794330634e-06,
      "loss": 0.0122,
      "step": 1809240
    },
    {
      "epoch": 2.960893671896991,
      "grad_norm": 0.3108241558074951,
      "learning_rate": 4.105084902117117e-06,
      "loss": 0.0127,
      "step": 1809260
    },
    {
      "epoch": 2.960926402335644,
      "grad_norm": 0.09985838830471039,
      "learning_rate": 4.105019009903601e-06,
      "loss": 0.0146,
      "step": 1809280
    },
    {
      "epoch": 2.9609591327742972,
      "grad_norm": 0.1458805650472641,
      "learning_rate": 4.104953117690083e-06,
      "loss": 0.0106,
      "step": 1809300
    },
    {
      "epoch": 2.960991863212951,
      "grad_norm": 0.38346847891807556,
      "learning_rate": 4.104887225476566e-06,
      "loss": 0.0158,
      "step": 1809320
    },
    {
      "epoch": 2.961024593651604,
      "grad_norm": 0.18954631686210632,
      "learning_rate": 4.104821333263049e-06,
      "loss": 0.0164,
      "step": 1809340
    },
    {
      "epoch": 2.9610573240902576,
      "grad_norm": 0.16501855850219727,
      "learning_rate": 4.104755441049532e-06,
      "loss": 0.0156,
      "step": 1809360
    },
    {
      "epoch": 2.9610900545289107,
      "grad_norm": 1.1811903715133667,
      "learning_rate": 4.104689548836014e-06,
      "loss": 0.0214,
      "step": 1809380
    },
    {
      "epoch": 2.9611227849675643,
      "grad_norm": 0.15427199006080627,
      "learning_rate": 4.104623656622497e-06,
      "loss": 0.0149,
      "step": 1809400
    },
    {
      "epoch": 2.9611555154062175,
      "grad_norm": 0.048739928752183914,
      "learning_rate": 4.10455776440898e-06,
      "loss": 0.0108,
      "step": 1809420
    },
    {
      "epoch": 2.9611882458448706,
      "grad_norm": 0.09810379892587662,
      "learning_rate": 4.104491872195463e-06,
      "loss": 0.0156,
      "step": 1809440
    },
    {
      "epoch": 2.961220976283524,
      "grad_norm": 0.3559485673904419,
      "learning_rate": 4.104425979981946e-06,
      "loss": 0.0174,
      "step": 1809460
    },
    {
      "epoch": 2.9612537067221774,
      "grad_norm": 0.5370156764984131,
      "learning_rate": 4.104360087768429e-06,
      "loss": 0.0178,
      "step": 1809480
    },
    {
      "epoch": 2.961286437160831,
      "grad_norm": 0.7280139923095703,
      "learning_rate": 4.104294195554912e-06,
      "loss": 0.0155,
      "step": 1809500
    },
    {
      "epoch": 2.961319167599484,
      "grad_norm": 0.17528489232063293,
      "learning_rate": 4.104228303341394e-06,
      "loss": 0.0157,
      "step": 1809520
    },
    {
      "epoch": 2.9613518980381377,
      "grad_norm": 0.195574551820755,
      "learning_rate": 4.104162411127877e-06,
      "loss": 0.011,
      "step": 1809540
    },
    {
      "epoch": 2.961384628476791,
      "grad_norm": 0.5535017848014832,
      "learning_rate": 4.10409651891436e-06,
      "loss": 0.0106,
      "step": 1809560
    },
    {
      "epoch": 2.961417358915444,
      "grad_norm": 0.36073896288871765,
      "learning_rate": 4.104030626700843e-06,
      "loss": 0.0201,
      "step": 1809580
    },
    {
      "epoch": 2.9614500893540976,
      "grad_norm": 0.9818183779716492,
      "learning_rate": 4.103964734487326e-06,
      "loss": 0.0112,
      "step": 1809600
    },
    {
      "epoch": 2.9614828197927507,
      "grad_norm": 0.8056238889694214,
      "learning_rate": 4.103898842273809e-06,
      "loss": 0.0121,
      "step": 1809620
    },
    {
      "epoch": 2.9615155502314043,
      "grad_norm": 0.6284075379371643,
      "learning_rate": 4.103832950060292e-06,
      "loss": 0.0145,
      "step": 1809640
    },
    {
      "epoch": 2.9615482806700575,
      "grad_norm": 0.2763404846191406,
      "learning_rate": 4.103767057846774e-06,
      "loss": 0.0132,
      "step": 1809660
    },
    {
      "epoch": 2.961581011108711,
      "grad_norm": 0.09775155037641525,
      "learning_rate": 4.103701165633258e-06,
      "loss": 0.0143,
      "step": 1809680
    },
    {
      "epoch": 2.9616137415473642,
      "grad_norm": 0.5541028380393982,
      "learning_rate": 4.103635273419741e-06,
      "loss": 0.0142,
      "step": 1809700
    },
    {
      "epoch": 2.9616464719860174,
      "grad_norm": 0.14579527080059052,
      "learning_rate": 4.1035693812062235e-06,
      "loss": 0.0163,
      "step": 1809720
    },
    {
      "epoch": 2.961679202424671,
      "grad_norm": 0.5227693319320679,
      "learning_rate": 4.103503488992706e-06,
      "loss": 0.009,
      "step": 1809740
    },
    {
      "epoch": 2.961711932863324,
      "grad_norm": 0.35317370295524597,
      "learning_rate": 4.103437596779189e-06,
      "loss": 0.0172,
      "step": 1809760
    },
    {
      "epoch": 2.9617446633019777,
      "grad_norm": 0.4392125606536865,
      "learning_rate": 4.103371704565672e-06,
      "loss": 0.0129,
      "step": 1809780
    },
    {
      "epoch": 2.961777393740631,
      "grad_norm": 0.27355077862739563,
      "learning_rate": 4.1033058123521544e-06,
      "loss": 0.0211,
      "step": 1809800
    },
    {
      "epoch": 2.9618101241792845,
      "grad_norm": 0.461241215467453,
      "learning_rate": 4.103239920138637e-06,
      "loss": 0.0127,
      "step": 1809820
    },
    {
      "epoch": 2.9618428546179376,
      "grad_norm": 0.484225869178772,
      "learning_rate": 4.10317402792512e-06,
      "loss": 0.0119,
      "step": 1809840
    },
    {
      "epoch": 2.9618755850565908,
      "grad_norm": 0.7047913074493408,
      "learning_rate": 4.1031081357116035e-06,
      "loss": 0.0087,
      "step": 1809860
    },
    {
      "epoch": 2.9619083154952444,
      "grad_norm": 0.19993142783641815,
      "learning_rate": 4.103042243498086e-06,
      "loss": 0.0112,
      "step": 1809880
    },
    {
      "epoch": 2.9619410459338975,
      "grad_norm": 0.2293570637702942,
      "learning_rate": 4.102976351284569e-06,
      "loss": 0.0141,
      "step": 1809900
    },
    {
      "epoch": 2.961973776372551,
      "grad_norm": 0.8053439855575562,
      "learning_rate": 4.102910459071052e-06,
      "loss": 0.0151,
      "step": 1809920
    },
    {
      "epoch": 2.9620065068112043,
      "grad_norm": 0.15545053780078888,
      "learning_rate": 4.1028445668575345e-06,
      "loss": 0.0136,
      "step": 1809940
    },
    {
      "epoch": 2.962039237249858,
      "grad_norm": 0.18838699162006378,
      "learning_rate": 4.102778674644018e-06,
      "loss": 0.018,
      "step": 1809960
    },
    {
      "epoch": 2.962071967688511,
      "grad_norm": 0.22350651025772095,
      "learning_rate": 4.102712782430501e-06,
      "loss": 0.0122,
      "step": 1809980
    },
    {
      "epoch": 2.962104698127164,
      "grad_norm": 0.4159281253814697,
      "learning_rate": 4.1026468902169835e-06,
      "loss": 0.0074,
      "step": 1810000
    },
    {
      "epoch": 2.9621374285658177,
      "grad_norm": 0.05663663521409035,
      "learning_rate": 4.102580998003466e-06,
      "loss": 0.0125,
      "step": 1810020
    },
    {
      "epoch": 2.962170159004471,
      "grad_norm": 0.1966439038515091,
      "learning_rate": 4.102515105789949e-06,
      "loss": 0.0142,
      "step": 1810040
    },
    {
      "epoch": 2.962202889443124,
      "grad_norm": 0.2661772668361664,
      "learning_rate": 4.102449213576432e-06,
      "loss": 0.0186,
      "step": 1810060
    },
    {
      "epoch": 2.9622356198817776,
      "grad_norm": 0.29315295815467834,
      "learning_rate": 4.102383321362915e-06,
      "loss": 0.0226,
      "step": 1810080
    },
    {
      "epoch": 2.9622683503204312,
      "grad_norm": 0.247210294008255,
      "learning_rate": 4.102317429149398e-06,
      "loss": 0.0126,
      "step": 1810100
    },
    {
      "epoch": 2.9623010807590844,
      "grad_norm": 0.4598335921764374,
      "learning_rate": 4.102251536935881e-06,
      "loss": 0.0137,
      "step": 1810120
    },
    {
      "epoch": 2.9623338111977375,
      "grad_norm": 0.343368262052536,
      "learning_rate": 4.1021856447223636e-06,
      "loss": 0.0119,
      "step": 1810140
    },
    {
      "epoch": 2.962366541636391,
      "grad_norm": 0.1599467694759369,
      "learning_rate": 4.102119752508846e-06,
      "loss": 0.0189,
      "step": 1810160
    },
    {
      "epoch": 2.9623992720750443,
      "grad_norm": 0.19148220121860504,
      "learning_rate": 4.102053860295329e-06,
      "loss": 0.0101,
      "step": 1810180
    },
    {
      "epoch": 2.9624320025136974,
      "grad_norm": 0.43123194575309753,
      "learning_rate": 4.101987968081812e-06,
      "loss": 0.0167,
      "step": 1810200
    },
    {
      "epoch": 2.962464732952351,
      "grad_norm": 0.6065373420715332,
      "learning_rate": 4.1019220758682945e-06,
      "loss": 0.0157,
      "step": 1810220
    },
    {
      "epoch": 2.9624974633910046,
      "grad_norm": 0.10554784536361694,
      "learning_rate": 4.101856183654777e-06,
      "loss": 0.0109,
      "step": 1810240
    },
    {
      "epoch": 2.9625301938296578,
      "grad_norm": 0.37505361437797546,
      "learning_rate": 4.101790291441261e-06,
      "loss": 0.0216,
      "step": 1810260
    },
    {
      "epoch": 2.962562924268311,
      "grad_norm": 0.48339134454727173,
      "learning_rate": 4.101724399227744e-06,
      "loss": 0.0072,
      "step": 1810280
    },
    {
      "epoch": 2.9625956547069645,
      "grad_norm": 0.18179439008235931,
      "learning_rate": 4.101658507014226e-06,
      "loss": 0.0093,
      "step": 1810300
    },
    {
      "epoch": 2.9626283851456177,
      "grad_norm": 0.374162495136261,
      "learning_rate": 4.10159261480071e-06,
      "loss": 0.0115,
      "step": 1810320
    },
    {
      "epoch": 2.962661115584271,
      "grad_norm": 0.1736423224210739,
      "learning_rate": 4.101526722587193e-06,
      "loss": 0.0196,
      "step": 1810340
    },
    {
      "epoch": 2.9626938460229244,
      "grad_norm": 0.32650068402290344,
      "learning_rate": 4.101460830373675e-06,
      "loss": 0.0112,
      "step": 1810360
    },
    {
      "epoch": 2.962726576461578,
      "grad_norm": 0.3381901979446411,
      "learning_rate": 4.101394938160158e-06,
      "loss": 0.0125,
      "step": 1810380
    },
    {
      "epoch": 2.962759306900231,
      "grad_norm": 0.28851479291915894,
      "learning_rate": 4.101329045946641e-06,
      "loss": 0.0088,
      "step": 1810400
    },
    {
      "epoch": 2.9627920373388843,
      "grad_norm": 0.3953903317451477,
      "learning_rate": 4.101263153733124e-06,
      "loss": 0.0121,
      "step": 1810420
    },
    {
      "epoch": 2.962824767777538,
      "grad_norm": 0.8713378310203552,
      "learning_rate": 4.101197261519606e-06,
      "loss": 0.0138,
      "step": 1810440
    },
    {
      "epoch": 2.962857498216191,
      "grad_norm": 0.4473907947540283,
      "learning_rate": 4.101131369306089e-06,
      "loss": 0.0165,
      "step": 1810460
    },
    {
      "epoch": 2.962890228654844,
      "grad_norm": 0.2625576853752136,
      "learning_rate": 4.101065477092573e-06,
      "loss": 0.0187,
      "step": 1810480
    },
    {
      "epoch": 2.962922959093498,
      "grad_norm": 0.35569247603416443,
      "learning_rate": 4.100999584879055e-06,
      "loss": 0.0131,
      "step": 1810500
    },
    {
      "epoch": 2.9629556895321514,
      "grad_norm": 0.6024053692817688,
      "learning_rate": 4.100933692665538e-06,
      "loss": 0.0141,
      "step": 1810520
    },
    {
      "epoch": 2.9629884199708045,
      "grad_norm": 0.15489952266216278,
      "learning_rate": 4.100867800452021e-06,
      "loss": 0.0159,
      "step": 1810540
    },
    {
      "epoch": 2.9630211504094577,
      "grad_norm": 0.1919083297252655,
      "learning_rate": 4.100801908238504e-06,
      "loss": 0.0078,
      "step": 1810560
    },
    {
      "epoch": 2.9630538808481113,
      "grad_norm": 0.15395788848400116,
      "learning_rate": 4.100736016024986e-06,
      "loss": 0.0173,
      "step": 1810580
    },
    {
      "epoch": 2.9630866112867644,
      "grad_norm": 0.06872522085905075,
      "learning_rate": 4.100670123811469e-06,
      "loss": 0.0163,
      "step": 1810600
    },
    {
      "epoch": 2.9631193417254176,
      "grad_norm": 0.27309736609458923,
      "learning_rate": 4.100604231597952e-06,
      "loss": 0.0106,
      "step": 1810620
    },
    {
      "epoch": 2.963152072164071,
      "grad_norm": 0.19069521129131317,
      "learning_rate": 4.100538339384435e-06,
      "loss": 0.0136,
      "step": 1810640
    },
    {
      "epoch": 2.9631848026027243,
      "grad_norm": 0.3458068072795868,
      "learning_rate": 4.100472447170918e-06,
      "loss": 0.0126,
      "step": 1810660
    },
    {
      "epoch": 2.963217533041378,
      "grad_norm": 1.0913865566253662,
      "learning_rate": 4.100406554957401e-06,
      "loss": 0.0115,
      "step": 1810680
    },
    {
      "epoch": 2.963250263480031,
      "grad_norm": 0.19433455169200897,
      "learning_rate": 4.1003406627438845e-06,
      "loss": 0.0115,
      "step": 1810700
    },
    {
      "epoch": 2.9632829939186847,
      "grad_norm": 0.29586926102638245,
      "learning_rate": 4.100274770530367e-06,
      "loss": 0.0185,
      "step": 1810720
    },
    {
      "epoch": 2.963315724357338,
      "grad_norm": 0.42268651723861694,
      "learning_rate": 4.10020887831685e-06,
      "loss": 0.0112,
      "step": 1810740
    },
    {
      "epoch": 2.963348454795991,
      "grad_norm": 0.13360662758350372,
      "learning_rate": 4.100142986103333e-06,
      "loss": 0.0094,
      "step": 1810760
    },
    {
      "epoch": 2.9633811852346446,
      "grad_norm": 0.25198376178741455,
      "learning_rate": 4.1000770938898155e-06,
      "loss": 0.0144,
      "step": 1810780
    },
    {
      "epoch": 2.9634139156732977,
      "grad_norm": 1.3581622838974,
      "learning_rate": 4.100011201676298e-06,
      "loss": 0.0166,
      "step": 1810800
    },
    {
      "epoch": 2.9634466461119513,
      "grad_norm": 0.1117338240146637,
      "learning_rate": 4.099945309462781e-06,
      "loss": 0.0182,
      "step": 1810820
    },
    {
      "epoch": 2.9634793765506044,
      "grad_norm": 0.26643696427345276,
      "learning_rate": 4.099879417249264e-06,
      "loss": 0.0098,
      "step": 1810840
    },
    {
      "epoch": 2.963512106989258,
      "grad_norm": 0.9758217930793762,
      "learning_rate": 4.0998135250357464e-06,
      "loss": 0.0111,
      "step": 1810860
    },
    {
      "epoch": 2.963544837427911,
      "grad_norm": 0.2267346978187561,
      "learning_rate": 4.09974763282223e-06,
      "loss": 0.0169,
      "step": 1810880
    },
    {
      "epoch": 2.9635775678665643,
      "grad_norm": 0.2725892961025238,
      "learning_rate": 4.099681740608713e-06,
      "loss": 0.0125,
      "step": 1810900
    },
    {
      "epoch": 2.963610298305218,
      "grad_norm": 0.21174506843090057,
      "learning_rate": 4.0996158483951955e-06,
      "loss": 0.0194,
      "step": 1810920
    },
    {
      "epoch": 2.963643028743871,
      "grad_norm": 0.2830233573913574,
      "learning_rate": 4.099549956181678e-06,
      "loss": 0.0118,
      "step": 1810940
    },
    {
      "epoch": 2.9636757591825247,
      "grad_norm": 0.21382859349250793,
      "learning_rate": 4.099484063968161e-06,
      "loss": 0.0119,
      "step": 1810960
    },
    {
      "epoch": 2.963708489621178,
      "grad_norm": 0.12275390326976776,
      "learning_rate": 4.099418171754644e-06,
      "loss": 0.0119,
      "step": 1810980
    },
    {
      "epoch": 2.9637412200598314,
      "grad_norm": 0.21642214059829712,
      "learning_rate": 4.0993522795411265e-06,
      "loss": 0.0094,
      "step": 1811000
    },
    {
      "epoch": 2.9637739504984846,
      "grad_norm": 0.08799255639314651,
      "learning_rate": 4.09928638732761e-06,
      "loss": 0.0149,
      "step": 1811020
    },
    {
      "epoch": 2.9638066809371377,
      "grad_norm": 0.6791307926177979,
      "learning_rate": 4.099220495114093e-06,
      "loss": 0.0115,
      "step": 1811040
    },
    {
      "epoch": 2.9638394113757913,
      "grad_norm": 0.5070331692695618,
      "learning_rate": 4.0991546029005755e-06,
      "loss": 0.0184,
      "step": 1811060
    },
    {
      "epoch": 2.9638721418144445,
      "grad_norm": 0.14805366098880768,
      "learning_rate": 4.099088710687058e-06,
      "loss": 0.0095,
      "step": 1811080
    },
    {
      "epoch": 2.963904872253098,
      "grad_norm": 0.1804506480693817,
      "learning_rate": 4.099022818473542e-06,
      "loss": 0.0098,
      "step": 1811100
    },
    {
      "epoch": 2.963937602691751,
      "grad_norm": 0.2571297585964203,
      "learning_rate": 4.098956926260025e-06,
      "loss": 0.0107,
      "step": 1811120
    },
    {
      "epoch": 2.963970333130405,
      "grad_norm": 0.689289927482605,
      "learning_rate": 4.098891034046507e-06,
      "loss": 0.0153,
      "step": 1811140
    },
    {
      "epoch": 2.964003063569058,
      "grad_norm": 0.04517088830471039,
      "learning_rate": 4.09882514183299e-06,
      "loss": 0.0162,
      "step": 1811160
    },
    {
      "epoch": 2.964035794007711,
      "grad_norm": 0.3391176462173462,
      "learning_rate": 4.098759249619473e-06,
      "loss": 0.0143,
      "step": 1811180
    },
    {
      "epoch": 2.9640685244463647,
      "grad_norm": 0.7840768694877625,
      "learning_rate": 4.0986933574059555e-06,
      "loss": 0.0198,
      "step": 1811200
    },
    {
      "epoch": 2.964101254885018,
      "grad_norm": 0.3701276481151581,
      "learning_rate": 4.098627465192438e-06,
      "loss": 0.0123,
      "step": 1811220
    },
    {
      "epoch": 2.9641339853236714,
      "grad_norm": 0.3845885992050171,
      "learning_rate": 4.098561572978921e-06,
      "loss": 0.0138,
      "step": 1811240
    },
    {
      "epoch": 2.9641667157623246,
      "grad_norm": 0.10040479153394699,
      "learning_rate": 4.098495680765404e-06,
      "loss": 0.0128,
      "step": 1811260
    },
    {
      "epoch": 2.964199446200978,
      "grad_norm": 0.5934813618659973,
      "learning_rate": 4.098429788551887e-06,
      "loss": 0.0205,
      "step": 1811280
    },
    {
      "epoch": 2.9642321766396313,
      "grad_norm": 0.41326069831848145,
      "learning_rate": 4.09836389633837e-06,
      "loss": 0.0164,
      "step": 1811300
    },
    {
      "epoch": 2.9642649070782845,
      "grad_norm": 0.6910611987113953,
      "learning_rate": 4.098298004124853e-06,
      "loss": 0.0157,
      "step": 1811320
    },
    {
      "epoch": 2.964297637516938,
      "grad_norm": 0.4971415400505066,
      "learning_rate": 4.0982321119113356e-06,
      "loss": 0.01,
      "step": 1811340
    },
    {
      "epoch": 2.9643303679555912,
      "grad_norm": 0.3091789186000824,
      "learning_rate": 4.098166219697819e-06,
      "loss": 0.0153,
      "step": 1811360
    },
    {
      "epoch": 2.964363098394245,
      "grad_norm": 0.31390878558158875,
      "learning_rate": 4.098100327484302e-06,
      "loss": 0.0171,
      "step": 1811380
    },
    {
      "epoch": 2.964395828832898,
      "grad_norm": 0.20323728024959564,
      "learning_rate": 4.098034435270785e-06,
      "loss": 0.011,
      "step": 1811400
    },
    {
      "epoch": 2.9644285592715516,
      "grad_norm": 0.4775564968585968,
      "learning_rate": 4.097968543057267e-06,
      "loss": 0.0088,
      "step": 1811420
    },
    {
      "epoch": 2.9644612897102047,
      "grad_norm": 0.19690638780593872,
      "learning_rate": 4.09790265084375e-06,
      "loss": 0.0093,
      "step": 1811440
    },
    {
      "epoch": 2.964494020148858,
      "grad_norm": 0.7550210952758789,
      "learning_rate": 4.097836758630233e-06,
      "loss": 0.0096,
      "step": 1811460
    },
    {
      "epoch": 2.9645267505875115,
      "grad_norm": 0.8896446228027344,
      "learning_rate": 4.097770866416716e-06,
      "loss": 0.0171,
      "step": 1811480
    },
    {
      "epoch": 2.9645594810261646,
      "grad_norm": 0.7273123860359192,
      "learning_rate": 4.097704974203199e-06,
      "loss": 0.0147,
      "step": 1811500
    },
    {
      "epoch": 2.964592211464818,
      "grad_norm": 0.26727887988090515,
      "learning_rate": 4.097639081989682e-06,
      "loss": 0.0116,
      "step": 1811520
    },
    {
      "epoch": 2.9646249419034714,
      "grad_norm": 0.6424776315689087,
      "learning_rate": 4.097573189776165e-06,
      "loss": 0.0174,
      "step": 1811540
    },
    {
      "epoch": 2.964657672342125,
      "grad_norm": 0.7059694528579712,
      "learning_rate": 4.097507297562647e-06,
      "loss": 0.015,
      "step": 1811560
    },
    {
      "epoch": 2.964690402780778,
      "grad_norm": 0.15484736859798431,
      "learning_rate": 4.09744140534913e-06,
      "loss": 0.0121,
      "step": 1811580
    },
    {
      "epoch": 2.9647231332194313,
      "grad_norm": 0.753788948059082,
      "learning_rate": 4.097375513135613e-06,
      "loss": 0.0132,
      "step": 1811600
    },
    {
      "epoch": 2.964755863658085,
      "grad_norm": 0.19770485162734985,
      "learning_rate": 4.097309620922096e-06,
      "loss": 0.0163,
      "step": 1811620
    },
    {
      "epoch": 2.964788594096738,
      "grad_norm": 0.06371833384037018,
      "learning_rate": 4.097243728708578e-06,
      "loss": 0.0082,
      "step": 1811640
    },
    {
      "epoch": 2.964821324535391,
      "grad_norm": 0.26183733344078064,
      "learning_rate": 4.097177836495061e-06,
      "loss": 0.0099,
      "step": 1811660
    },
    {
      "epoch": 2.9648540549740448,
      "grad_norm": 0.3561922013759613,
      "learning_rate": 4.097111944281545e-06,
      "loss": 0.0134,
      "step": 1811680
    },
    {
      "epoch": 2.9648867854126983,
      "grad_norm": 0.23163002729415894,
      "learning_rate": 4.0970460520680274e-06,
      "loss": 0.0126,
      "step": 1811700
    },
    {
      "epoch": 2.9649195158513515,
      "grad_norm": 0.1244347020983696,
      "learning_rate": 4.09698015985451e-06,
      "loss": 0.0144,
      "step": 1811720
    },
    {
      "epoch": 2.9649522462900046,
      "grad_norm": 0.6518651843070984,
      "learning_rate": 4.096914267640994e-06,
      "loss": 0.0145,
      "step": 1811740
    },
    {
      "epoch": 2.9649849767286582,
      "grad_norm": 0.5418365597724915,
      "learning_rate": 4.0968483754274765e-06,
      "loss": 0.0131,
      "step": 1811760
    },
    {
      "epoch": 2.9650177071673114,
      "grad_norm": 0.22627833485603333,
      "learning_rate": 4.096782483213959e-06,
      "loss": 0.0127,
      "step": 1811780
    },
    {
      "epoch": 2.9650504376059645,
      "grad_norm": 1.2030079364776611,
      "learning_rate": 4.096716591000442e-06,
      "loss": 0.0092,
      "step": 1811800
    },
    {
      "epoch": 2.965083168044618,
      "grad_norm": 0.2779104709625244,
      "learning_rate": 4.096650698786925e-06,
      "loss": 0.0133,
      "step": 1811820
    },
    {
      "epoch": 2.9651158984832717,
      "grad_norm": 0.26563313603401184,
      "learning_rate": 4.0965848065734075e-06,
      "loss": 0.0096,
      "step": 1811840
    },
    {
      "epoch": 2.965148628921925,
      "grad_norm": 0.29491373896598816,
      "learning_rate": 4.09651891435989e-06,
      "loss": 0.0201,
      "step": 1811860
    },
    {
      "epoch": 2.965181359360578,
      "grad_norm": 0.7013726234436035,
      "learning_rate": 4.096453022146373e-06,
      "loss": 0.0125,
      "step": 1811880
    },
    {
      "epoch": 2.9652140897992316,
      "grad_norm": 1.0177754163742065,
      "learning_rate": 4.0963871299328565e-06,
      "loss": 0.0139,
      "step": 1811900
    },
    {
      "epoch": 2.9652468202378848,
      "grad_norm": 0.6185624599456787,
      "learning_rate": 4.096321237719339e-06,
      "loss": 0.01,
      "step": 1811920
    },
    {
      "epoch": 2.965279550676538,
      "grad_norm": 0.7184900641441345,
      "learning_rate": 4.096255345505822e-06,
      "loss": 0.01,
      "step": 1811940
    },
    {
      "epoch": 2.9653122811151915,
      "grad_norm": 0.0734807699918747,
      "learning_rate": 4.096189453292305e-06,
      "loss": 0.0123,
      "step": 1811960
    },
    {
      "epoch": 2.965345011553845,
      "grad_norm": 0.40820083022117615,
      "learning_rate": 4.0961235610787875e-06,
      "loss": 0.0156,
      "step": 1811980
    },
    {
      "epoch": 2.9653777419924983,
      "grad_norm": 0.7008665800094604,
      "learning_rate": 4.09605766886527e-06,
      "loss": 0.0131,
      "step": 1812000
    },
    {
      "epoch": 2.9654104724311514,
      "grad_norm": 0.1507614701986313,
      "learning_rate": 4.095991776651753e-06,
      "loss": 0.0146,
      "step": 1812020
    },
    {
      "epoch": 2.965443202869805,
      "grad_norm": 0.040011536329984665,
      "learning_rate": 4.095925884438236e-06,
      "loss": 0.011,
      "step": 1812040
    },
    {
      "epoch": 2.965475933308458,
      "grad_norm": 0.2761162519454956,
      "learning_rate": 4.095859992224719e-06,
      "loss": 0.0104,
      "step": 1812060
    },
    {
      "epoch": 2.9655086637471113,
      "grad_norm": 0.3731444478034973,
      "learning_rate": 4.095794100011202e-06,
      "loss": 0.0155,
      "step": 1812080
    },
    {
      "epoch": 2.965541394185765,
      "grad_norm": 0.4332059919834137,
      "learning_rate": 4.095728207797685e-06,
      "loss": 0.0201,
      "step": 1812100
    },
    {
      "epoch": 2.965574124624418,
      "grad_norm": 0.90709388256073,
      "learning_rate": 4.095662315584168e-06,
      "loss": 0.011,
      "step": 1812120
    },
    {
      "epoch": 2.9656068550630716,
      "grad_norm": 0.19898834824562073,
      "learning_rate": 4.095596423370651e-06,
      "loss": 0.0135,
      "step": 1812140
    },
    {
      "epoch": 2.965639585501725,
      "grad_norm": 0.3347052037715912,
      "learning_rate": 4.095530531157134e-06,
      "loss": 0.0147,
      "step": 1812160
    },
    {
      "epoch": 2.9656723159403784,
      "grad_norm": 0.3863053619861603,
      "learning_rate": 4.0954646389436166e-06,
      "loss": 0.0138,
      "step": 1812180
    },
    {
      "epoch": 2.9657050463790315,
      "grad_norm": 0.2722559869289398,
      "learning_rate": 4.095398746730099e-06,
      "loss": 0.014,
      "step": 1812200
    },
    {
      "epoch": 2.9657377768176847,
      "grad_norm": 0.4258089065551758,
      "learning_rate": 4.095332854516582e-06,
      "loss": 0.0124,
      "step": 1812220
    },
    {
      "epoch": 2.9657705072563383,
      "grad_norm": 0.35715949535369873,
      "learning_rate": 4.095266962303065e-06,
      "loss": 0.0133,
      "step": 1812240
    },
    {
      "epoch": 2.9658032376949914,
      "grad_norm": 0.172811821103096,
      "learning_rate": 4.0952010700895475e-06,
      "loss": 0.0119,
      "step": 1812260
    },
    {
      "epoch": 2.965835968133645,
      "grad_norm": 0.39724814891815186,
      "learning_rate": 4.09513517787603e-06,
      "loss": 0.0126,
      "step": 1812280
    },
    {
      "epoch": 2.965868698572298,
      "grad_norm": 0.25724947452545166,
      "learning_rate": 4.095069285662514e-06,
      "loss": 0.0088,
      "step": 1812300
    },
    {
      "epoch": 2.9659014290109518,
      "grad_norm": 0.260222852230072,
      "learning_rate": 4.095003393448997e-06,
      "loss": 0.0121,
      "step": 1812320
    },
    {
      "epoch": 2.965934159449605,
      "grad_norm": 0.3311096727848053,
      "learning_rate": 4.094937501235479e-06,
      "loss": 0.0096,
      "step": 1812340
    },
    {
      "epoch": 2.965966889888258,
      "grad_norm": 0.09363310039043427,
      "learning_rate": 4.094871609021962e-06,
      "loss": 0.0117,
      "step": 1812360
    },
    {
      "epoch": 2.9659996203269117,
      "grad_norm": 0.4870954155921936,
      "learning_rate": 4.094805716808445e-06,
      "loss": 0.0152,
      "step": 1812380
    },
    {
      "epoch": 2.966032350765565,
      "grad_norm": 0.33692607283592224,
      "learning_rate": 4.0947398245949276e-06,
      "loss": 0.0123,
      "step": 1812400
    },
    {
      "epoch": 2.9660650812042184,
      "grad_norm": 0.5164482593536377,
      "learning_rate": 4.094673932381411e-06,
      "loss": 0.0151,
      "step": 1812420
    },
    {
      "epoch": 2.9660978116428716,
      "grad_norm": 0.26700571179389954,
      "learning_rate": 4.094608040167894e-06,
      "loss": 0.0117,
      "step": 1812440
    },
    {
      "epoch": 2.966130542081525,
      "grad_norm": 0.23707978427410126,
      "learning_rate": 4.094542147954377e-06,
      "loss": 0.0112,
      "step": 1812460
    },
    {
      "epoch": 2.9661632725201783,
      "grad_norm": 0.16883468627929688,
      "learning_rate": 4.094476255740859e-06,
      "loss": 0.0155,
      "step": 1812480
    },
    {
      "epoch": 2.9661960029588315,
      "grad_norm": 0.2715499699115753,
      "learning_rate": 4.094410363527342e-06,
      "loss": 0.0122,
      "step": 1812500
    },
    {
      "epoch": 2.966228733397485,
      "grad_norm": 0.25362151861190796,
      "learning_rate": 4.094344471313826e-06,
      "loss": 0.0158,
      "step": 1812520
    },
    {
      "epoch": 2.966261463836138,
      "grad_norm": 0.14664986729621887,
      "learning_rate": 4.0942785791003084e-06,
      "loss": 0.0126,
      "step": 1812540
    },
    {
      "epoch": 2.966294194274792,
      "grad_norm": 0.40944159030914307,
      "learning_rate": 4.094212686886791e-06,
      "loss": 0.0168,
      "step": 1812560
    },
    {
      "epoch": 2.966326924713445,
      "grad_norm": 0.4006009101867676,
      "learning_rate": 4.094146794673274e-06,
      "loss": 0.0128,
      "step": 1812580
    },
    {
      "epoch": 2.9663596551520985,
      "grad_norm": 0.35287779569625854,
      "learning_rate": 4.094080902459757e-06,
      "loss": 0.0114,
      "step": 1812600
    },
    {
      "epoch": 2.9663923855907517,
      "grad_norm": 0.8574317693710327,
      "learning_rate": 4.094015010246239e-06,
      "loss": 0.0137,
      "step": 1812620
    },
    {
      "epoch": 2.966425116029405,
      "grad_norm": 0.11131981760263443,
      "learning_rate": 4.093949118032722e-06,
      "loss": 0.0143,
      "step": 1812640
    },
    {
      "epoch": 2.9664578464680584,
      "grad_norm": 0.5442534685134888,
      "learning_rate": 4.093883225819205e-06,
      "loss": 0.0093,
      "step": 1812660
    },
    {
      "epoch": 2.9664905769067116,
      "grad_norm": 0.6282036900520325,
      "learning_rate": 4.093817333605688e-06,
      "loss": 0.0204,
      "step": 1812680
    },
    {
      "epoch": 2.966523307345365,
      "grad_norm": 0.039401065558195114,
      "learning_rate": 4.093751441392171e-06,
      "loss": 0.0143,
      "step": 1812700
    },
    {
      "epoch": 2.9665560377840183,
      "grad_norm": 0.16804122924804688,
      "learning_rate": 4.093685549178654e-06,
      "loss": 0.0093,
      "step": 1812720
    },
    {
      "epoch": 2.966588768222672,
      "grad_norm": 1.2532440423965454,
      "learning_rate": 4.093619656965137e-06,
      "loss": 0.0107,
      "step": 1812740
    },
    {
      "epoch": 2.966621498661325,
      "grad_norm": 0.4248957335948944,
      "learning_rate": 4.093553764751619e-06,
      "loss": 0.0155,
      "step": 1812760
    },
    {
      "epoch": 2.9666542290999782,
      "grad_norm": 0.31498193740844727,
      "learning_rate": 4.093487872538103e-06,
      "loss": 0.0083,
      "step": 1812780
    },
    {
      "epoch": 2.966686959538632,
      "grad_norm": 0.5113030672073364,
      "learning_rate": 4.093421980324586e-06,
      "loss": 0.0135,
      "step": 1812800
    },
    {
      "epoch": 2.966719689977285,
      "grad_norm": 0.8201339244842529,
      "learning_rate": 4.0933560881110685e-06,
      "loss": 0.0151,
      "step": 1812820
    },
    {
      "epoch": 2.9667524204159386,
      "grad_norm": 0.6180959939956665,
      "learning_rate": 4.093290195897551e-06,
      "loss": 0.0105,
      "step": 1812840
    },
    {
      "epoch": 2.9667851508545917,
      "grad_norm": 0.8015777468681335,
      "learning_rate": 4.093224303684034e-06,
      "loss": 0.0144,
      "step": 1812860
    },
    {
      "epoch": 2.9668178812932453,
      "grad_norm": 0.2904644012451172,
      "learning_rate": 4.093158411470517e-06,
      "loss": 0.0141,
      "step": 1812880
    },
    {
      "epoch": 2.9668506117318985,
      "grad_norm": 0.4403446614742279,
      "learning_rate": 4.0930925192569994e-06,
      "loss": 0.0108,
      "step": 1812900
    },
    {
      "epoch": 2.9668833421705516,
      "grad_norm": 0.23867902159690857,
      "learning_rate": 4.093026627043483e-06,
      "loss": 0.0156,
      "step": 1812920
    },
    {
      "epoch": 2.966916072609205,
      "grad_norm": 0.4733210504055023,
      "learning_rate": 4.092960734829966e-06,
      "loss": 0.0204,
      "step": 1812940
    },
    {
      "epoch": 2.9669488030478584,
      "grad_norm": 0.13204874098300934,
      "learning_rate": 4.0928948426164485e-06,
      "loss": 0.0183,
      "step": 1812960
    },
    {
      "epoch": 2.966981533486512,
      "grad_norm": 0.1722552627325058,
      "learning_rate": 4.092828950402931e-06,
      "loss": 0.0163,
      "step": 1812980
    },
    {
      "epoch": 2.967014263925165,
      "grad_norm": 0.19267231225967407,
      "learning_rate": 4.092763058189414e-06,
      "loss": 0.0121,
      "step": 1813000
    },
    {
      "epoch": 2.9670469943638187,
      "grad_norm": 0.050180863589048386,
      "learning_rate": 4.092697165975897e-06,
      "loss": 0.0141,
      "step": 1813020
    },
    {
      "epoch": 2.967079724802472,
      "grad_norm": 0.22801610827445984,
      "learning_rate": 4.0926312737623795e-06,
      "loss": 0.0124,
      "step": 1813040
    },
    {
      "epoch": 2.967112455241125,
      "grad_norm": 0.3864196538925171,
      "learning_rate": 4.092565381548862e-06,
      "loss": 0.0129,
      "step": 1813060
    },
    {
      "epoch": 2.9671451856797786,
      "grad_norm": 0.17778466641902924,
      "learning_rate": 4.092499489335345e-06,
      "loss": 0.0115,
      "step": 1813080
    },
    {
      "epoch": 2.9671779161184317,
      "grad_norm": 0.10229181498289108,
      "learning_rate": 4.0924335971218285e-06,
      "loss": 0.0112,
      "step": 1813100
    },
    {
      "epoch": 2.967210646557085,
      "grad_norm": 0.362970232963562,
      "learning_rate": 4.092367704908311e-06,
      "loss": 0.0095,
      "step": 1813120
    },
    {
      "epoch": 2.9672433769957385,
      "grad_norm": 0.498217910528183,
      "learning_rate": 4.092301812694794e-06,
      "loss": 0.0137,
      "step": 1813140
    },
    {
      "epoch": 2.967276107434392,
      "grad_norm": 0.08578856289386749,
      "learning_rate": 4.092235920481278e-06,
      "loss": 0.0103,
      "step": 1813160
    },
    {
      "epoch": 2.9673088378730452,
      "grad_norm": 0.3096187114715576,
      "learning_rate": 4.09217002826776e-06,
      "loss": 0.0115,
      "step": 1813180
    },
    {
      "epoch": 2.9673415683116984,
      "grad_norm": 0.4976685643196106,
      "learning_rate": 4.092104136054243e-06,
      "loss": 0.0111,
      "step": 1813200
    },
    {
      "epoch": 2.967374298750352,
      "grad_norm": 0.35828685760498047,
      "learning_rate": 4.092038243840726e-06,
      "loss": 0.0136,
      "step": 1813220
    },
    {
      "epoch": 2.967407029189005,
      "grad_norm": 0.5695841312408447,
      "learning_rate": 4.0919723516272086e-06,
      "loss": 0.0165,
      "step": 1813240
    },
    {
      "epoch": 2.9674397596276583,
      "grad_norm": 0.31292101740837097,
      "learning_rate": 4.091906459413691e-06,
      "loss": 0.0133,
      "step": 1813260
    },
    {
      "epoch": 2.967472490066312,
      "grad_norm": 0.13505969941616058,
      "learning_rate": 4.091840567200174e-06,
      "loss": 0.0168,
      "step": 1813280
    },
    {
      "epoch": 2.9675052205049655,
      "grad_norm": 0.2867771089076996,
      "learning_rate": 4.091774674986657e-06,
      "loss": 0.0164,
      "step": 1813300
    },
    {
      "epoch": 2.9675379509436186,
      "grad_norm": 0.82574462890625,
      "learning_rate": 4.09170878277314e-06,
      "loss": 0.0176,
      "step": 1813320
    },
    {
      "epoch": 2.9675706813822718,
      "grad_norm": 0.5036848187446594,
      "learning_rate": 4.091642890559623e-06,
      "loss": 0.0144,
      "step": 1813340
    },
    {
      "epoch": 2.9676034118209254,
      "grad_norm": 0.13081860542297363,
      "learning_rate": 4.091576998346106e-06,
      "loss": 0.0126,
      "step": 1813360
    },
    {
      "epoch": 2.9676361422595785,
      "grad_norm": 0.24485981464385986,
      "learning_rate": 4.091511106132589e-06,
      "loss": 0.0106,
      "step": 1813380
    },
    {
      "epoch": 2.9676688726982317,
      "grad_norm": 0.47147324681282043,
      "learning_rate": 4.091445213919071e-06,
      "loss": 0.0112,
      "step": 1813400
    },
    {
      "epoch": 2.9677016031368852,
      "grad_norm": 0.3698934018611908,
      "learning_rate": 4.091379321705554e-06,
      "loss": 0.0117,
      "step": 1813420
    },
    {
      "epoch": 2.967734333575539,
      "grad_norm": 0.3329917788505554,
      "learning_rate": 4.091313429492037e-06,
      "loss": 0.0132,
      "step": 1813440
    },
    {
      "epoch": 2.967767064014192,
      "grad_norm": 0.1543588787317276,
      "learning_rate": 4.0912475372785195e-06,
      "loss": 0.0173,
      "step": 1813460
    },
    {
      "epoch": 2.967799794452845,
      "grad_norm": 0.29125145077705383,
      "learning_rate": 4.091181645065003e-06,
      "loss": 0.0164,
      "step": 1813480
    },
    {
      "epoch": 2.9678325248914987,
      "grad_norm": 0.27675944566726685,
      "learning_rate": 4.091115752851486e-06,
      "loss": 0.0137,
      "step": 1813500
    },
    {
      "epoch": 2.967865255330152,
      "grad_norm": 0.7549135088920593,
      "learning_rate": 4.091049860637969e-06,
      "loss": 0.0142,
      "step": 1813520
    },
    {
      "epoch": 2.967897985768805,
      "grad_norm": 0.24685029685497284,
      "learning_rate": 4.090983968424452e-06,
      "loss": 0.0143,
      "step": 1813540
    },
    {
      "epoch": 2.9679307162074586,
      "grad_norm": 0.2917267978191376,
      "learning_rate": 4.090918076210935e-06,
      "loss": 0.0199,
      "step": 1813560
    },
    {
      "epoch": 2.9679634466461122,
      "grad_norm": 0.3633066713809967,
      "learning_rate": 4.090852183997418e-06,
      "loss": 0.0102,
      "step": 1813580
    },
    {
      "epoch": 2.9679961770847654,
      "grad_norm": 0.47578248381614685,
      "learning_rate": 4.0907862917839e-06,
      "loss": 0.0171,
      "step": 1813600
    },
    {
      "epoch": 2.9680289075234185,
      "grad_norm": 0.12862694263458252,
      "learning_rate": 4.090720399570383e-06,
      "loss": 0.0096,
      "step": 1813620
    },
    {
      "epoch": 2.968061637962072,
      "grad_norm": 0.35441094636917114,
      "learning_rate": 4.090654507356866e-06,
      "loss": 0.0117,
      "step": 1813640
    },
    {
      "epoch": 2.9680943684007253,
      "grad_norm": 0.2798958718776703,
      "learning_rate": 4.090588615143349e-06,
      "loss": 0.015,
      "step": 1813660
    },
    {
      "epoch": 2.9681270988393784,
      "grad_norm": 0.19948598742485046,
      "learning_rate": 4.090522722929831e-06,
      "loss": 0.0105,
      "step": 1813680
    },
    {
      "epoch": 2.968159829278032,
      "grad_norm": 0.5177370309829712,
      "learning_rate": 4.090456830716314e-06,
      "loss": 0.0107,
      "step": 1813700
    },
    {
      "epoch": 2.968192559716685,
      "grad_norm": 0.16605624556541443,
      "learning_rate": 4.090390938502798e-06,
      "loss": 0.0171,
      "step": 1813720
    },
    {
      "epoch": 2.9682252901553388,
      "grad_norm": 0.49539682269096375,
      "learning_rate": 4.0903250462892804e-06,
      "loss": 0.0184,
      "step": 1813740
    },
    {
      "epoch": 2.968258020593992,
      "grad_norm": 1.1793797016143799,
      "learning_rate": 4.090259154075763e-06,
      "loss": 0.0152,
      "step": 1813760
    },
    {
      "epoch": 2.9682907510326455,
      "grad_norm": 0.10967029631137848,
      "learning_rate": 4.090193261862246e-06,
      "loss": 0.0097,
      "step": 1813780
    },
    {
      "epoch": 2.9683234814712987,
      "grad_norm": 0.2854577302932739,
      "learning_rate": 4.090127369648729e-06,
      "loss": 0.0102,
      "step": 1813800
    },
    {
      "epoch": 2.968356211909952,
      "grad_norm": 0.2112867832183838,
      "learning_rate": 4.090061477435212e-06,
      "loss": 0.0131,
      "step": 1813820
    },
    {
      "epoch": 2.9683889423486054,
      "grad_norm": 0.18627607822418213,
      "learning_rate": 4.089995585221695e-06,
      "loss": 0.0104,
      "step": 1813840
    },
    {
      "epoch": 2.9684216727872585,
      "grad_norm": 0.6106991171836853,
      "learning_rate": 4.089929693008178e-06,
      "loss": 0.0097,
      "step": 1813860
    },
    {
      "epoch": 2.968454403225912,
      "grad_norm": 0.0438501350581646,
      "learning_rate": 4.0898638007946605e-06,
      "loss": 0.0115,
      "step": 1813880
    },
    {
      "epoch": 2.9684871336645653,
      "grad_norm": 0.3178788423538208,
      "learning_rate": 4.089797908581143e-06,
      "loss": 0.0137,
      "step": 1813900
    },
    {
      "epoch": 2.968519864103219,
      "grad_norm": 0.3463093936443329,
      "learning_rate": 4.089732016367626e-06,
      "loss": 0.0138,
      "step": 1813920
    },
    {
      "epoch": 2.968552594541872,
      "grad_norm": 0.2327313870191574,
      "learning_rate": 4.0896661241541095e-06,
      "loss": 0.0122,
      "step": 1813940
    },
    {
      "epoch": 2.968585324980525,
      "grad_norm": 0.1916074901819229,
      "learning_rate": 4.089600231940592e-06,
      "loss": 0.0107,
      "step": 1813960
    },
    {
      "epoch": 2.968618055419179,
      "grad_norm": 0.18122230470180511,
      "learning_rate": 4.089534339727075e-06,
      "loss": 0.0124,
      "step": 1813980
    },
    {
      "epoch": 2.968650785857832,
      "grad_norm": 0.08304447680711746,
      "learning_rate": 4.089468447513558e-06,
      "loss": 0.0165,
      "step": 1814000
    },
    {
      "epoch": 2.9686835162964855,
      "grad_norm": 0.3212468922138214,
      "learning_rate": 4.0894025553000405e-06,
      "loss": 0.0132,
      "step": 1814020
    },
    {
      "epoch": 2.9687162467351387,
      "grad_norm": 0.34637248516082764,
      "learning_rate": 4.089336663086523e-06,
      "loss": 0.0132,
      "step": 1814040
    },
    {
      "epoch": 2.9687489771737923,
      "grad_norm": 0.25922971963882446,
      "learning_rate": 4.089270770873006e-06,
      "loss": 0.0141,
      "step": 1814060
    },
    {
      "epoch": 2.9687817076124454,
      "grad_norm": 0.11517694592475891,
      "learning_rate": 4.089204878659489e-06,
      "loss": 0.0162,
      "step": 1814080
    },
    {
      "epoch": 2.9688144380510986,
      "grad_norm": 0.37607428431510925,
      "learning_rate": 4.0891389864459714e-06,
      "loss": 0.0122,
      "step": 1814100
    },
    {
      "epoch": 2.968847168489752,
      "grad_norm": 0.8526782989501953,
      "learning_rate": 4.089073094232455e-06,
      "loss": 0.0179,
      "step": 1814120
    },
    {
      "epoch": 2.9688798989284053,
      "grad_norm": 0.4817083477973938,
      "learning_rate": 4.089007202018938e-06,
      "loss": 0.0108,
      "step": 1814140
    },
    {
      "epoch": 2.968912629367059,
      "grad_norm": 0.539361298084259,
      "learning_rate": 4.0889413098054205e-06,
      "loss": 0.0178,
      "step": 1814160
    },
    {
      "epoch": 2.968945359805712,
      "grad_norm": 0.603110671043396,
      "learning_rate": 4.088875417591904e-06,
      "loss": 0.0161,
      "step": 1814180
    },
    {
      "epoch": 2.9689780902443657,
      "grad_norm": 0.41667816042900085,
      "learning_rate": 4.088809525378387e-06,
      "loss": 0.0099,
      "step": 1814200
    },
    {
      "epoch": 2.969010820683019,
      "grad_norm": 0.3855830430984497,
      "learning_rate": 4.08874363316487e-06,
      "loss": 0.013,
      "step": 1814220
    },
    {
      "epoch": 2.969043551121672,
      "grad_norm": 0.7124976515769958,
      "learning_rate": 4.088677740951352e-06,
      "loss": 0.0156,
      "step": 1814240
    },
    {
      "epoch": 2.9690762815603255,
      "grad_norm": 0.2776973247528076,
      "learning_rate": 4.088611848737835e-06,
      "loss": 0.0125,
      "step": 1814260
    },
    {
      "epoch": 2.9691090119989787,
      "grad_norm": 0.18177461624145508,
      "learning_rate": 4.088545956524318e-06,
      "loss": 0.0132,
      "step": 1814280
    },
    {
      "epoch": 2.9691417424376323,
      "grad_norm": 0.399592787027359,
      "learning_rate": 4.0884800643108005e-06,
      "loss": 0.0134,
      "step": 1814300
    },
    {
      "epoch": 2.9691744728762854,
      "grad_norm": 0.5223777890205383,
      "learning_rate": 4.088414172097283e-06,
      "loss": 0.0135,
      "step": 1814320
    },
    {
      "epoch": 2.969207203314939,
      "grad_norm": 0.6724047064781189,
      "learning_rate": 4.088348279883767e-06,
      "loss": 0.0109,
      "step": 1814340
    },
    {
      "epoch": 2.969239933753592,
      "grad_norm": 0.2090434581041336,
      "learning_rate": 4.08828238767025e-06,
      "loss": 0.0144,
      "step": 1814360
    },
    {
      "epoch": 2.9692726641922453,
      "grad_norm": 0.09995948523283005,
      "learning_rate": 4.088216495456732e-06,
      "loss": 0.015,
      "step": 1814380
    },
    {
      "epoch": 2.969305394630899,
      "grad_norm": 0.6742810010910034,
      "learning_rate": 4.088150603243215e-06,
      "loss": 0.0109,
      "step": 1814400
    },
    {
      "epoch": 2.969338125069552,
      "grad_norm": 0.28352513909339905,
      "learning_rate": 4.088084711029698e-06,
      "loss": 0.0147,
      "step": 1814420
    },
    {
      "epoch": 2.9693708555082057,
      "grad_norm": 0.6286301612854004,
      "learning_rate": 4.0880188188161806e-06,
      "loss": 0.0171,
      "step": 1814440
    },
    {
      "epoch": 2.969403585946859,
      "grad_norm": 0.22787201404571533,
      "learning_rate": 4.087952926602663e-06,
      "loss": 0.0087,
      "step": 1814460
    },
    {
      "epoch": 2.9694363163855124,
      "grad_norm": 0.380416601896286,
      "learning_rate": 4.087887034389146e-06,
      "loss": 0.0138,
      "step": 1814480
    },
    {
      "epoch": 2.9694690468241656,
      "grad_norm": 0.3882041871547699,
      "learning_rate": 4.087821142175629e-06,
      "loss": 0.0109,
      "step": 1814500
    },
    {
      "epoch": 2.9695017772628187,
      "grad_norm": 0.26697635650634766,
      "learning_rate": 4.087755249962112e-06,
      "loss": 0.0205,
      "step": 1814520
    },
    {
      "epoch": 2.9695345077014723,
      "grad_norm": 0.4254051148891449,
      "learning_rate": 4.087689357748595e-06,
      "loss": 0.0131,
      "step": 1814540
    },
    {
      "epoch": 2.9695672381401255,
      "grad_norm": 0.34758228063583374,
      "learning_rate": 4.087623465535079e-06,
      "loss": 0.0142,
      "step": 1814560
    },
    {
      "epoch": 2.969599968578779,
      "grad_norm": 0.7506115436553955,
      "learning_rate": 4.0875575733215614e-06,
      "loss": 0.015,
      "step": 1814580
    },
    {
      "epoch": 2.969632699017432,
      "grad_norm": 0.49482956528663635,
      "learning_rate": 4.087491681108044e-06,
      "loss": 0.0125,
      "step": 1814600
    },
    {
      "epoch": 2.969665429456086,
      "grad_norm": 0.40435609221458435,
      "learning_rate": 4.087425788894527e-06,
      "loss": 0.0142,
      "step": 1814620
    },
    {
      "epoch": 2.969698159894739,
      "grad_norm": 0.4633826017379761,
      "learning_rate": 4.08735989668101e-06,
      "loss": 0.0088,
      "step": 1814640
    },
    {
      "epoch": 2.969730890333392,
      "grad_norm": 0.5059661269187927,
      "learning_rate": 4.087294004467492e-06,
      "loss": 0.0175,
      "step": 1814660
    },
    {
      "epoch": 2.9697636207720457,
      "grad_norm": 0.14119382202625275,
      "learning_rate": 4.087228112253975e-06,
      "loss": 0.0091,
      "step": 1814680
    },
    {
      "epoch": 2.969796351210699,
      "grad_norm": 0.17628826200962067,
      "learning_rate": 4.087162220040458e-06,
      "loss": 0.0175,
      "step": 1814700
    },
    {
      "epoch": 2.969829081649352,
      "grad_norm": 0.4441928565502167,
      "learning_rate": 4.087096327826941e-06,
      "loss": 0.0175,
      "step": 1814720
    },
    {
      "epoch": 2.9698618120880056,
      "grad_norm": 0.772490918636322,
      "learning_rate": 4.087030435613424e-06,
      "loss": 0.0103,
      "step": 1814740
    },
    {
      "epoch": 2.969894542526659,
      "grad_norm": 0.6962829232215881,
      "learning_rate": 4.086964543399907e-06,
      "loss": 0.018,
      "step": 1814760
    },
    {
      "epoch": 2.9699272729653123,
      "grad_norm": 0.77045738697052,
      "learning_rate": 4.08689865118639e-06,
      "loss": 0.0146,
      "step": 1814780
    },
    {
      "epoch": 2.9699600034039655,
      "grad_norm": 0.4340004026889801,
      "learning_rate": 4.086832758972872e-06,
      "loss": 0.0147,
      "step": 1814800
    },
    {
      "epoch": 2.969992733842619,
      "grad_norm": 0.7084284424781799,
      "learning_rate": 4.086766866759355e-06,
      "loss": 0.0141,
      "step": 1814820
    },
    {
      "epoch": 2.9700254642812722,
      "grad_norm": 0.506406843662262,
      "learning_rate": 4.086700974545838e-06,
      "loss": 0.0164,
      "step": 1814840
    },
    {
      "epoch": 2.9700581947199254,
      "grad_norm": 0.09767598658800125,
      "learning_rate": 4.086635082332321e-06,
      "loss": 0.0109,
      "step": 1814860
    },
    {
      "epoch": 2.970090925158579,
      "grad_norm": 0.5461215972900391,
      "learning_rate": 4.086569190118804e-06,
      "loss": 0.0182,
      "step": 1814880
    },
    {
      "epoch": 2.9701236555972326,
      "grad_norm": 0.5104952454566956,
      "learning_rate": 4.086503297905287e-06,
      "loss": 0.0167,
      "step": 1814900
    },
    {
      "epoch": 2.9701563860358857,
      "grad_norm": 0.29330718517303467,
      "learning_rate": 4.08643740569177e-06,
      "loss": 0.0101,
      "step": 1814920
    },
    {
      "epoch": 2.970189116474539,
      "grad_norm": 1.1279590129852295,
      "learning_rate": 4.0863715134782524e-06,
      "loss": 0.0172,
      "step": 1814940
    },
    {
      "epoch": 2.9702218469131925,
      "grad_norm": 0.7328122854232788,
      "learning_rate": 4.086305621264736e-06,
      "loss": 0.015,
      "step": 1814960
    },
    {
      "epoch": 2.9702545773518456,
      "grad_norm": 0.5918243527412415,
      "learning_rate": 4.086239729051219e-06,
      "loss": 0.0172,
      "step": 1814980
    },
    {
      "epoch": 2.9702873077904988,
      "grad_norm": 0.10038096457719803,
      "learning_rate": 4.0861738368377015e-06,
      "loss": 0.0127,
      "step": 1815000
    },
    {
      "epoch": 2.9703200382291524,
      "grad_norm": 0.3684583306312561,
      "learning_rate": 4.086107944624184e-06,
      "loss": 0.0118,
      "step": 1815020
    },
    {
      "epoch": 2.970352768667806,
      "grad_norm": 0.4242836534976959,
      "learning_rate": 4.086042052410667e-06,
      "loss": 0.0158,
      "step": 1815040
    },
    {
      "epoch": 2.970385499106459,
      "grad_norm": 0.27854737639427185,
      "learning_rate": 4.08597616019715e-06,
      "loss": 0.0137,
      "step": 1815060
    },
    {
      "epoch": 2.9704182295451123,
      "grad_norm": 0.5493617057800293,
      "learning_rate": 4.0859102679836325e-06,
      "loss": 0.0193,
      "step": 1815080
    },
    {
      "epoch": 2.970450959983766,
      "grad_norm": 0.4235503673553467,
      "learning_rate": 4.085844375770115e-06,
      "loss": 0.0155,
      "step": 1815100
    },
    {
      "epoch": 2.970483690422419,
      "grad_norm": 0.7015621662139893,
      "learning_rate": 4.085778483556598e-06,
      "loss": 0.0157,
      "step": 1815120
    },
    {
      "epoch": 2.970516420861072,
      "grad_norm": 0.08147764950990677,
      "learning_rate": 4.0857125913430815e-06,
      "loss": 0.0158,
      "step": 1815140
    },
    {
      "epoch": 2.9705491512997257,
      "grad_norm": 0.22991500794887543,
      "learning_rate": 4.085646699129564e-06,
      "loss": 0.0136,
      "step": 1815160
    },
    {
      "epoch": 2.970581881738379,
      "grad_norm": 0.2186676412820816,
      "learning_rate": 4.085580806916047e-06,
      "loss": 0.0166,
      "step": 1815180
    },
    {
      "epoch": 2.9706146121770325,
      "grad_norm": 0.07882896810770035,
      "learning_rate": 4.08551491470253e-06,
      "loss": 0.0127,
      "step": 1815200
    },
    {
      "epoch": 2.9706473426156856,
      "grad_norm": 0.20938265323638916,
      "learning_rate": 4.0854490224890125e-06,
      "loss": 0.0107,
      "step": 1815220
    },
    {
      "epoch": 2.9706800730543392,
      "grad_norm": 0.5724930763244629,
      "learning_rate": 4.085383130275496e-06,
      "loss": 0.0194,
      "step": 1815240
    },
    {
      "epoch": 2.9707128034929924,
      "grad_norm": 0.03365893289446831,
      "learning_rate": 4.085317238061979e-06,
      "loss": 0.0085,
      "step": 1815260
    },
    {
      "epoch": 2.9707455339316455,
      "grad_norm": 0.0513262115418911,
      "learning_rate": 4.0852513458484616e-06,
      "loss": 0.0129,
      "step": 1815280
    },
    {
      "epoch": 2.970778264370299,
      "grad_norm": 0.15246011316776276,
      "learning_rate": 4.085185453634944e-06,
      "loss": 0.0118,
      "step": 1815300
    },
    {
      "epoch": 2.9708109948089523,
      "grad_norm": 0.7056844830513,
      "learning_rate": 4.085119561421427e-06,
      "loss": 0.0165,
      "step": 1815320
    },
    {
      "epoch": 2.970843725247606,
      "grad_norm": 0.2228003740310669,
      "learning_rate": 4.08505366920791e-06,
      "loss": 0.013,
      "step": 1815340
    },
    {
      "epoch": 2.970876455686259,
      "grad_norm": 0.19390061497688293,
      "learning_rate": 4.084987776994393e-06,
      "loss": 0.0107,
      "step": 1815360
    },
    {
      "epoch": 2.9709091861249126,
      "grad_norm": 0.547845721244812,
      "learning_rate": 4.084921884780876e-06,
      "loss": 0.0115,
      "step": 1815380
    },
    {
      "epoch": 2.9709419165635658,
      "grad_norm": 0.8741148114204407,
      "learning_rate": 4.084855992567359e-06,
      "loss": 0.0135,
      "step": 1815400
    },
    {
      "epoch": 2.970974647002219,
      "grad_norm": 0.2883181571960449,
      "learning_rate": 4.084790100353842e-06,
      "loss": 0.0119,
      "step": 1815420
    },
    {
      "epoch": 2.9710073774408725,
      "grad_norm": 0.468295693397522,
      "learning_rate": 4.084724208140324e-06,
      "loss": 0.0152,
      "step": 1815440
    },
    {
      "epoch": 2.9710401078795257,
      "grad_norm": 0.13056309521198273,
      "learning_rate": 4.084658315926807e-06,
      "loss": 0.0121,
      "step": 1815460
    },
    {
      "epoch": 2.9710728383181793,
      "grad_norm": 0.1366606056690216,
      "learning_rate": 4.08459242371329e-06,
      "loss": 0.012,
      "step": 1815480
    },
    {
      "epoch": 2.9711055687568324,
      "grad_norm": 0.2984316051006317,
      "learning_rate": 4.0845265314997725e-06,
      "loss": 0.0136,
      "step": 1815500
    },
    {
      "epoch": 2.971138299195486,
      "grad_norm": 0.30805686116218567,
      "learning_rate": 4.084460639286255e-06,
      "loss": 0.0187,
      "step": 1815520
    },
    {
      "epoch": 2.971171029634139,
      "grad_norm": 0.1544034332036972,
      "learning_rate": 4.084394747072739e-06,
      "loss": 0.0134,
      "step": 1815540
    },
    {
      "epoch": 2.9712037600727923,
      "grad_norm": 0.4472302198410034,
      "learning_rate": 4.084328854859222e-06,
      "loss": 0.0152,
      "step": 1815560
    },
    {
      "epoch": 2.971236490511446,
      "grad_norm": 0.08010295033454895,
      "learning_rate": 4.084262962645704e-06,
      "loss": 0.0154,
      "step": 1815580
    },
    {
      "epoch": 2.971269220950099,
      "grad_norm": 0.9108453989028931,
      "learning_rate": 4.084197070432188e-06,
      "loss": 0.0092,
      "step": 1815600
    },
    {
      "epoch": 2.9713019513887526,
      "grad_norm": 0.4321337342262268,
      "learning_rate": 4.084131178218671e-06,
      "loss": 0.0127,
      "step": 1815620
    },
    {
      "epoch": 2.971334681827406,
      "grad_norm": 0.22907549142837524,
      "learning_rate": 4.084065286005153e-06,
      "loss": 0.0091,
      "step": 1815640
    },
    {
      "epoch": 2.9713674122660594,
      "grad_norm": 0.9213780760765076,
      "learning_rate": 4.083999393791636e-06,
      "loss": 0.016,
      "step": 1815660
    },
    {
      "epoch": 2.9714001427047125,
      "grad_norm": 0.1637296974658966,
      "learning_rate": 4.083933501578119e-06,
      "loss": 0.0113,
      "step": 1815680
    },
    {
      "epoch": 2.9714328731433657,
      "grad_norm": 0.52353835105896,
      "learning_rate": 4.083867609364602e-06,
      "loss": 0.0152,
      "step": 1815700
    },
    {
      "epoch": 2.9714656035820193,
      "grad_norm": 0.12393029034137726,
      "learning_rate": 4.083801717151084e-06,
      "loss": 0.0081,
      "step": 1815720
    },
    {
      "epoch": 2.9714983340206724,
      "grad_norm": 0.2568120062351227,
      "learning_rate": 4.083735824937567e-06,
      "loss": 0.0182,
      "step": 1815740
    },
    {
      "epoch": 2.971531064459326,
      "grad_norm": 0.10853786766529083,
      "learning_rate": 4.083669932724051e-06,
      "loss": 0.0169,
      "step": 1815760
    },
    {
      "epoch": 2.971563794897979,
      "grad_norm": 0.2836565375328064,
      "learning_rate": 4.0836040405105334e-06,
      "loss": 0.0189,
      "step": 1815780
    },
    {
      "epoch": 2.9715965253366328,
      "grad_norm": 0.3005199432373047,
      "learning_rate": 4.083538148297016e-06,
      "loss": 0.0163,
      "step": 1815800
    },
    {
      "epoch": 2.971629255775286,
      "grad_norm": 0.39356598258018494,
      "learning_rate": 4.083472256083499e-06,
      "loss": 0.011,
      "step": 1815820
    },
    {
      "epoch": 2.971661986213939,
      "grad_norm": 0.24623171985149384,
      "learning_rate": 4.083406363869982e-06,
      "loss": 0.0093,
      "step": 1815840
    },
    {
      "epoch": 2.9716947166525927,
      "grad_norm": 0.10225610435009003,
      "learning_rate": 4.083340471656464e-06,
      "loss": 0.0101,
      "step": 1815860
    },
    {
      "epoch": 2.971727447091246,
      "grad_norm": 0.10323302447795868,
      "learning_rate": 4.083274579442947e-06,
      "loss": 0.0109,
      "step": 1815880
    },
    {
      "epoch": 2.9717601775298994,
      "grad_norm": 0.4705057442188263,
      "learning_rate": 4.08320868722943e-06,
      "loss": 0.0138,
      "step": 1815900
    },
    {
      "epoch": 2.9717929079685526,
      "grad_norm": 0.6691318154335022,
      "learning_rate": 4.083142795015913e-06,
      "loss": 0.0112,
      "step": 1815920
    },
    {
      "epoch": 2.971825638407206,
      "grad_norm": 0.1515943706035614,
      "learning_rate": 4.083076902802396e-06,
      "loss": 0.0101,
      "step": 1815940
    },
    {
      "epoch": 2.9718583688458593,
      "grad_norm": 0.27302417159080505,
      "learning_rate": 4.083011010588879e-06,
      "loss": 0.0137,
      "step": 1815960
    },
    {
      "epoch": 2.9718910992845124,
      "grad_norm": 0.4159078001976013,
      "learning_rate": 4.0829451183753625e-06,
      "loss": 0.0111,
      "step": 1815980
    },
    {
      "epoch": 2.971923829723166,
      "grad_norm": 0.6860592365264893,
      "learning_rate": 4.082879226161845e-06,
      "loss": 0.0117,
      "step": 1816000
    },
    {
      "epoch": 2.971956560161819,
      "grad_norm": 0.3707215189933777,
      "learning_rate": 4.082813333948328e-06,
      "loss": 0.0093,
      "step": 1816020
    },
    {
      "epoch": 2.971989290600473,
      "grad_norm": 0.19000722467899323,
      "learning_rate": 4.082747441734811e-06,
      "loss": 0.0154,
      "step": 1816040
    },
    {
      "epoch": 2.972022021039126,
      "grad_norm": 0.2357131540775299,
      "learning_rate": 4.0826815495212935e-06,
      "loss": 0.0206,
      "step": 1816060
    },
    {
      "epoch": 2.9720547514777795,
      "grad_norm": 0.18558228015899658,
      "learning_rate": 4.082615657307776e-06,
      "loss": 0.0091,
      "step": 1816080
    },
    {
      "epoch": 2.9720874819164327,
      "grad_norm": 0.23231513798236847,
      "learning_rate": 4.082549765094259e-06,
      "loss": 0.0167,
      "step": 1816100
    },
    {
      "epoch": 2.972120212355086,
      "grad_norm": 0.20406551659107208,
      "learning_rate": 4.082483872880742e-06,
      "loss": 0.0109,
      "step": 1816120
    },
    {
      "epoch": 2.9721529427937394,
      "grad_norm": 0.43166694045066833,
      "learning_rate": 4.0824179806672245e-06,
      "loss": 0.0144,
      "step": 1816140
    },
    {
      "epoch": 2.9721856732323926,
      "grad_norm": 0.20130608975887299,
      "learning_rate": 4.082352088453708e-06,
      "loss": 0.0102,
      "step": 1816160
    },
    {
      "epoch": 2.9722184036710457,
      "grad_norm": 0.8105440139770508,
      "learning_rate": 4.082286196240191e-06,
      "loss": 0.01,
      "step": 1816180
    },
    {
      "epoch": 2.9722511341096993,
      "grad_norm": 0.6350113749504089,
      "learning_rate": 4.0822203040266735e-06,
      "loss": 0.0111,
      "step": 1816200
    },
    {
      "epoch": 2.972283864548353,
      "grad_norm": 0.17704994976520538,
      "learning_rate": 4.082154411813156e-06,
      "loss": 0.0154,
      "step": 1816220
    },
    {
      "epoch": 2.972316594987006,
      "grad_norm": 0.12459857016801834,
      "learning_rate": 4.082088519599639e-06,
      "loss": 0.0151,
      "step": 1816240
    },
    {
      "epoch": 2.972349325425659,
      "grad_norm": 0.6193901300430298,
      "learning_rate": 4.082022627386122e-06,
      "loss": 0.0092,
      "step": 1816260
    },
    {
      "epoch": 2.972382055864313,
      "grad_norm": 0.15214720368385315,
      "learning_rate": 4.0819567351726045e-06,
      "loss": 0.011,
      "step": 1816280
    },
    {
      "epoch": 2.972414786302966,
      "grad_norm": 0.30676811933517456,
      "learning_rate": 4.081890842959088e-06,
      "loss": 0.0204,
      "step": 1816300
    },
    {
      "epoch": 2.972447516741619,
      "grad_norm": 0.18829238414764404,
      "learning_rate": 4.081824950745571e-06,
      "loss": 0.0191,
      "step": 1816320
    },
    {
      "epoch": 2.9724802471802727,
      "grad_norm": 1.5021617412567139,
      "learning_rate": 4.0817590585320535e-06,
      "loss": 0.0171,
      "step": 1816340
    },
    {
      "epoch": 2.9725129776189263,
      "grad_norm": 0.459898442029953,
      "learning_rate": 4.081693166318536e-06,
      "loss": 0.0135,
      "step": 1816360
    },
    {
      "epoch": 2.9725457080575794,
      "grad_norm": 0.15375858545303345,
      "learning_rate": 4.08162727410502e-06,
      "loss": 0.0119,
      "step": 1816380
    },
    {
      "epoch": 2.9725784384962326,
      "grad_norm": 0.4024347960948944,
      "learning_rate": 4.081561381891503e-06,
      "loss": 0.0156,
      "step": 1816400
    },
    {
      "epoch": 2.972611168934886,
      "grad_norm": 0.28269246220588684,
      "learning_rate": 4.081495489677985e-06,
      "loss": 0.0131,
      "step": 1816420
    },
    {
      "epoch": 2.9726438993735393,
      "grad_norm": 0.4595879018306732,
      "learning_rate": 4.081429597464468e-06,
      "loss": 0.0181,
      "step": 1816440
    },
    {
      "epoch": 2.9726766298121925,
      "grad_norm": 0.1569913923740387,
      "learning_rate": 4.081363705250951e-06,
      "loss": 0.0118,
      "step": 1816460
    },
    {
      "epoch": 2.972709360250846,
      "grad_norm": 0.16372057795524597,
      "learning_rate": 4.0812978130374336e-06,
      "loss": 0.0113,
      "step": 1816480
    },
    {
      "epoch": 2.9727420906894997,
      "grad_norm": 0.2207358479499817,
      "learning_rate": 4.081231920823916e-06,
      "loss": 0.0106,
      "step": 1816500
    },
    {
      "epoch": 2.972774821128153,
      "grad_norm": 0.14450982213020325,
      "learning_rate": 4.081166028610399e-06,
      "loss": 0.0114,
      "step": 1816520
    },
    {
      "epoch": 2.972807551566806,
      "grad_norm": 0.15037107467651367,
      "learning_rate": 4.081100136396882e-06,
      "loss": 0.0113,
      "step": 1816540
    },
    {
      "epoch": 2.9728402820054596,
      "grad_norm": 0.1276729255914688,
      "learning_rate": 4.081034244183365e-06,
      "loss": 0.0119,
      "step": 1816560
    },
    {
      "epoch": 2.9728730124441127,
      "grad_norm": 0.22031717002391815,
      "learning_rate": 4.080968351969848e-06,
      "loss": 0.0094,
      "step": 1816580
    },
    {
      "epoch": 2.972905742882766,
      "grad_norm": 0.21638762950897217,
      "learning_rate": 4.080902459756331e-06,
      "loss": 0.0086,
      "step": 1816600
    },
    {
      "epoch": 2.9729384733214195,
      "grad_norm": 0.07898352295160294,
      "learning_rate": 4.080836567542814e-06,
      "loss": 0.0229,
      "step": 1816620
    },
    {
      "epoch": 2.972971203760073,
      "grad_norm": 0.29203805327415466,
      "learning_rate": 4.080770675329297e-06,
      "loss": 0.0173,
      "step": 1816640
    },
    {
      "epoch": 2.973003934198726,
      "grad_norm": 0.11663149297237396,
      "learning_rate": 4.08070478311578e-06,
      "loss": 0.0165,
      "step": 1816660
    },
    {
      "epoch": 2.9730366646373794,
      "grad_norm": 0.18999803066253662,
      "learning_rate": 4.080638890902263e-06,
      "loss": 0.0151,
      "step": 1816680
    },
    {
      "epoch": 2.973069395076033,
      "grad_norm": 0.06851723045110703,
      "learning_rate": 4.080572998688745e-06,
      "loss": 0.0135,
      "step": 1816700
    },
    {
      "epoch": 2.973102125514686,
      "grad_norm": 0.2501737177371979,
      "learning_rate": 4.080507106475228e-06,
      "loss": 0.0098,
      "step": 1816720
    },
    {
      "epoch": 2.9731348559533393,
      "grad_norm": 0.1349894255399704,
      "learning_rate": 4.080441214261711e-06,
      "loss": 0.01,
      "step": 1816740
    },
    {
      "epoch": 2.973167586391993,
      "grad_norm": 0.43951427936553955,
      "learning_rate": 4.080375322048194e-06,
      "loss": 0.0156,
      "step": 1816760
    },
    {
      "epoch": 2.973200316830646,
      "grad_norm": 0.1688964068889618,
      "learning_rate": 4.080309429834677e-06,
      "loss": 0.0096,
      "step": 1816780
    },
    {
      "epoch": 2.9732330472692996,
      "grad_norm": 0.46267393231391907,
      "learning_rate": 4.08024353762116e-06,
      "loss": 0.0168,
      "step": 1816800
    },
    {
      "epoch": 2.9732657777079528,
      "grad_norm": 0.36349353194236755,
      "learning_rate": 4.080177645407643e-06,
      "loss": 0.0201,
      "step": 1816820
    },
    {
      "epoch": 2.9732985081466063,
      "grad_norm": 0.22254934906959534,
      "learning_rate": 4.0801117531941254e-06,
      "loss": 0.0149,
      "step": 1816840
    },
    {
      "epoch": 2.9733312385852595,
      "grad_norm": 0.6109257340431213,
      "learning_rate": 4.080045860980608e-06,
      "loss": 0.0131,
      "step": 1816860
    },
    {
      "epoch": 2.9733639690239126,
      "grad_norm": 0.7546631693840027,
      "learning_rate": 4.079979968767091e-06,
      "loss": 0.0123,
      "step": 1816880
    },
    {
      "epoch": 2.9733966994625662,
      "grad_norm": 0.21913903951644897,
      "learning_rate": 4.079914076553574e-06,
      "loss": 0.0097,
      "step": 1816900
    },
    {
      "epoch": 2.9734294299012194,
      "grad_norm": 0.0990961343050003,
      "learning_rate": 4.079848184340056e-06,
      "loss": 0.0117,
      "step": 1816920
    },
    {
      "epoch": 2.973462160339873,
      "grad_norm": 0.33397358655929565,
      "learning_rate": 4.079782292126539e-06,
      "loss": 0.008,
      "step": 1816940
    },
    {
      "epoch": 2.973494890778526,
      "grad_norm": 0.051787443459033966,
      "learning_rate": 4.079716399913023e-06,
      "loss": 0.0157,
      "step": 1816960
    },
    {
      "epoch": 2.9735276212171797,
      "grad_norm": 0.5397896766662598,
      "learning_rate": 4.0796505076995054e-06,
      "loss": 0.0146,
      "step": 1816980
    },
    {
      "epoch": 2.973560351655833,
      "grad_norm": 0.6200757026672363,
      "learning_rate": 4.079584615485988e-06,
      "loss": 0.0115,
      "step": 1817000
    },
    {
      "epoch": 2.973593082094486,
      "grad_norm": 0.09946074336767197,
      "learning_rate": 4.079518723272472e-06,
      "loss": 0.0095,
      "step": 1817020
    },
    {
      "epoch": 2.9736258125331396,
      "grad_norm": 0.25185999274253845,
      "learning_rate": 4.0794528310589545e-06,
      "loss": 0.0126,
      "step": 1817040
    },
    {
      "epoch": 2.9736585429717928,
      "grad_norm": 0.37053385376930237,
      "learning_rate": 4.079386938845437e-06,
      "loss": 0.0101,
      "step": 1817060
    },
    {
      "epoch": 2.9736912734104464,
      "grad_norm": 0.2666924297809601,
      "learning_rate": 4.07932104663192e-06,
      "loss": 0.0094,
      "step": 1817080
    },
    {
      "epoch": 2.9737240038490995,
      "grad_norm": 0.29688847064971924,
      "learning_rate": 4.079255154418403e-06,
      "loss": 0.0119,
      "step": 1817100
    },
    {
      "epoch": 2.973756734287753,
      "grad_norm": 1.157699704170227,
      "learning_rate": 4.0791892622048855e-06,
      "loss": 0.0208,
      "step": 1817120
    },
    {
      "epoch": 2.9737894647264063,
      "grad_norm": 0.5960601568222046,
      "learning_rate": 4.079123369991368e-06,
      "loss": 0.014,
      "step": 1817140
    },
    {
      "epoch": 2.9738221951650594,
      "grad_norm": 0.6964330077171326,
      "learning_rate": 4.079057477777851e-06,
      "loss": 0.0121,
      "step": 1817160
    },
    {
      "epoch": 2.973854925603713,
      "grad_norm": 0.24170562624931335,
      "learning_rate": 4.0789915855643345e-06,
      "loss": 0.0121,
      "step": 1817180
    },
    {
      "epoch": 2.973887656042366,
      "grad_norm": 1.3322280645370483,
      "learning_rate": 4.078925693350817e-06,
      "loss": 0.0182,
      "step": 1817200
    },
    {
      "epoch": 2.9739203864810198,
      "grad_norm": 0.4992746114730835,
      "learning_rate": 4.0788598011373e-06,
      "loss": 0.0145,
      "step": 1817220
    },
    {
      "epoch": 2.973953116919673,
      "grad_norm": 0.6460182070732117,
      "learning_rate": 4.078793908923783e-06,
      "loss": 0.0138,
      "step": 1817240
    },
    {
      "epoch": 2.9739858473583265,
      "grad_norm": 0.7257114052772522,
      "learning_rate": 4.0787280167102655e-06,
      "loss": 0.0156,
      "step": 1817260
    },
    {
      "epoch": 2.9740185777969796,
      "grad_norm": 0.21777385473251343,
      "learning_rate": 4.078662124496748e-06,
      "loss": 0.0123,
      "step": 1817280
    },
    {
      "epoch": 2.974051308235633,
      "grad_norm": 0.6319276690483093,
      "learning_rate": 4.078596232283231e-06,
      "loss": 0.015,
      "step": 1817300
    },
    {
      "epoch": 2.9740840386742864,
      "grad_norm": 0.12357291579246521,
      "learning_rate": 4.078530340069714e-06,
      "loss": 0.0126,
      "step": 1817320
    },
    {
      "epoch": 2.9741167691129395,
      "grad_norm": 0.34042689204216003,
      "learning_rate": 4.078464447856197e-06,
      "loss": 0.0118,
      "step": 1817340
    },
    {
      "epoch": 2.974149499551593,
      "grad_norm": 1.6844247579574585,
      "learning_rate": 4.07839855564268e-06,
      "loss": 0.0123,
      "step": 1817360
    },
    {
      "epoch": 2.9741822299902463,
      "grad_norm": 0.9303013682365417,
      "learning_rate": 4.078332663429163e-06,
      "loss": 0.0163,
      "step": 1817380
    },
    {
      "epoch": 2.9742149604289,
      "grad_norm": 0.14196977019309998,
      "learning_rate": 4.078266771215646e-06,
      "loss": 0.0107,
      "step": 1817400
    },
    {
      "epoch": 2.974247690867553,
      "grad_norm": 0.174089252948761,
      "learning_rate": 4.078200879002129e-06,
      "loss": 0.0128,
      "step": 1817420
    },
    {
      "epoch": 2.974280421306206,
      "grad_norm": 0.1944892257452011,
      "learning_rate": 4.078134986788612e-06,
      "loss": 0.0166,
      "step": 1817440
    },
    {
      "epoch": 2.9743131517448598,
      "grad_norm": 0.40584877133369446,
      "learning_rate": 4.078069094575095e-06,
      "loss": 0.0147,
      "step": 1817460
    },
    {
      "epoch": 2.974345882183513,
      "grad_norm": 1.2663376331329346,
      "learning_rate": 4.078003202361577e-06,
      "loss": 0.0171,
      "step": 1817480
    },
    {
      "epoch": 2.9743786126221665,
      "grad_norm": 0.5635864734649658,
      "learning_rate": 4.07793731014806e-06,
      "loss": 0.0115,
      "step": 1817500
    },
    {
      "epoch": 2.9744113430608197,
      "grad_norm": 0.3664729595184326,
      "learning_rate": 4.077871417934543e-06,
      "loss": 0.0106,
      "step": 1817520
    },
    {
      "epoch": 2.9744440734994733,
      "grad_norm": 0.3133569359779358,
      "learning_rate": 4.0778055257210256e-06,
      "loss": 0.0111,
      "step": 1817540
    },
    {
      "epoch": 2.9744768039381264,
      "grad_norm": 0.09505580365657806,
      "learning_rate": 4.077739633507508e-06,
      "loss": 0.0095,
      "step": 1817560
    },
    {
      "epoch": 2.9745095343767796,
      "grad_norm": 0.4297436475753784,
      "learning_rate": 4.077673741293992e-06,
      "loss": 0.0176,
      "step": 1817580
    },
    {
      "epoch": 2.974542264815433,
      "grad_norm": 0.4620380103588104,
      "learning_rate": 4.077607849080475e-06,
      "loss": 0.0199,
      "step": 1817600
    },
    {
      "epoch": 2.9745749952540863,
      "grad_norm": 0.3816574513912201,
      "learning_rate": 4.077541956866957e-06,
      "loss": 0.0115,
      "step": 1817620
    },
    {
      "epoch": 2.9746077256927395,
      "grad_norm": 0.46213066577911377,
      "learning_rate": 4.07747606465344e-06,
      "loss": 0.0165,
      "step": 1817640
    },
    {
      "epoch": 2.974640456131393,
      "grad_norm": 0.14188560843467712,
      "learning_rate": 4.077410172439923e-06,
      "loss": 0.0134,
      "step": 1817660
    },
    {
      "epoch": 2.9746731865700466,
      "grad_norm": 0.14865528047084808,
      "learning_rate": 4.0773442802264056e-06,
      "loss": 0.0159,
      "step": 1817680
    },
    {
      "epoch": 2.9747059170087,
      "grad_norm": 0.16124548017978668,
      "learning_rate": 4.077278388012889e-06,
      "loss": 0.0101,
      "step": 1817700
    },
    {
      "epoch": 2.974738647447353,
      "grad_norm": 0.33922290802001953,
      "learning_rate": 4.077212495799372e-06,
      "loss": 0.0122,
      "step": 1817720
    },
    {
      "epoch": 2.9747713778860065,
      "grad_norm": 0.33613231778144836,
      "learning_rate": 4.077146603585855e-06,
      "loss": 0.0107,
      "step": 1817740
    },
    {
      "epoch": 2.9748041083246597,
      "grad_norm": 0.35745593905448914,
      "learning_rate": 4.077080711372337e-06,
      "loss": 0.0095,
      "step": 1817760
    },
    {
      "epoch": 2.974836838763313,
      "grad_norm": 0.8511754870414734,
      "learning_rate": 4.07701481915882e-06,
      "loss": 0.0099,
      "step": 1817780
    },
    {
      "epoch": 2.9748695692019664,
      "grad_norm": 0.21305270493030548,
      "learning_rate": 4.076948926945304e-06,
      "loss": 0.0156,
      "step": 1817800
    },
    {
      "epoch": 2.97490229964062,
      "grad_norm": 0.41506120562553406,
      "learning_rate": 4.0768830347317864e-06,
      "loss": 0.0156,
      "step": 1817820
    },
    {
      "epoch": 2.974935030079273,
      "grad_norm": 0.4318011701107025,
      "learning_rate": 4.076817142518269e-06,
      "loss": 0.0185,
      "step": 1817840
    },
    {
      "epoch": 2.9749677605179263,
      "grad_norm": 0.44019943475723267,
      "learning_rate": 4.076751250304752e-06,
      "loss": 0.0134,
      "step": 1817860
    },
    {
      "epoch": 2.97500049095658,
      "grad_norm": 0.35443365573883057,
      "learning_rate": 4.076685358091235e-06,
      "loss": 0.0118,
      "step": 1817880
    },
    {
      "epoch": 2.975033221395233,
      "grad_norm": 0.3990464210510254,
      "learning_rate": 4.076619465877717e-06,
      "loss": 0.0134,
      "step": 1817900
    },
    {
      "epoch": 2.9750659518338862,
      "grad_norm": 1.4894598722457886,
      "learning_rate": 4.0765535736642e-06,
      "loss": 0.0124,
      "step": 1817920
    },
    {
      "epoch": 2.97509868227254,
      "grad_norm": 0.40761637687683105,
      "learning_rate": 4.076487681450683e-06,
      "loss": 0.0126,
      "step": 1817940
    },
    {
      "epoch": 2.9751314127111934,
      "grad_norm": 0.23678356409072876,
      "learning_rate": 4.076421789237166e-06,
      "loss": 0.0136,
      "step": 1817960
    },
    {
      "epoch": 2.9751641431498466,
      "grad_norm": 0.12542492151260376,
      "learning_rate": 4.076355897023649e-06,
      "loss": 0.0205,
      "step": 1817980
    },
    {
      "epoch": 2.9751968735884997,
      "grad_norm": 0.07641716301441193,
      "learning_rate": 4.076290004810132e-06,
      "loss": 0.012,
      "step": 1818000
    },
    {
      "epoch": 2.9752296040271533,
      "grad_norm": 0.5155311822891235,
      "learning_rate": 4.076224112596615e-06,
      "loss": 0.0097,
      "step": 1818020
    },
    {
      "epoch": 2.9752623344658065,
      "grad_norm": 0.18013827502727509,
      "learning_rate": 4.0761582203830974e-06,
      "loss": 0.0152,
      "step": 1818040
    },
    {
      "epoch": 2.9752950649044596,
      "grad_norm": 0.3399427831172943,
      "learning_rate": 4.076092328169581e-06,
      "loss": 0.0113,
      "step": 1818060
    },
    {
      "epoch": 2.975327795343113,
      "grad_norm": 0.2560679018497467,
      "learning_rate": 4.076026435956064e-06,
      "loss": 0.014,
      "step": 1818080
    },
    {
      "epoch": 2.975360525781767,
      "grad_norm": 0.1278229057788849,
      "learning_rate": 4.0759605437425465e-06,
      "loss": 0.0139,
      "step": 1818100
    },
    {
      "epoch": 2.97539325622042,
      "grad_norm": 0.10409008711576462,
      "learning_rate": 4.075894651529029e-06,
      "loss": 0.0093,
      "step": 1818120
    },
    {
      "epoch": 2.975425986659073,
      "grad_norm": 0.21070009469985962,
      "learning_rate": 4.075828759315512e-06,
      "loss": 0.0123,
      "step": 1818140
    },
    {
      "epoch": 2.9754587170977267,
      "grad_norm": 0.30606475472450256,
      "learning_rate": 4.075762867101995e-06,
      "loss": 0.0159,
      "step": 1818160
    },
    {
      "epoch": 2.97549144753638,
      "grad_norm": 0.2522544264793396,
      "learning_rate": 4.0756969748884775e-06,
      "loss": 0.0157,
      "step": 1818180
    },
    {
      "epoch": 2.975524177975033,
      "grad_norm": 0.25305065512657166,
      "learning_rate": 4.075631082674961e-06,
      "loss": 0.0075,
      "step": 1818200
    },
    {
      "epoch": 2.9755569084136866,
      "grad_norm": 0.6751834154129028,
      "learning_rate": 4.075565190461444e-06,
      "loss": 0.0147,
      "step": 1818220
    },
    {
      "epoch": 2.9755896388523397,
      "grad_norm": 0.35348016023635864,
      "learning_rate": 4.0754992982479265e-06,
      "loss": 0.01,
      "step": 1818240
    },
    {
      "epoch": 2.9756223692909933,
      "grad_norm": 0.17371197044849396,
      "learning_rate": 4.075433406034409e-06,
      "loss": 0.0135,
      "step": 1818260
    },
    {
      "epoch": 2.9756550997296465,
      "grad_norm": 0.27227240800857544,
      "learning_rate": 4.075367513820892e-06,
      "loss": 0.015,
      "step": 1818280
    },
    {
      "epoch": 2.9756878301683,
      "grad_norm": 0.8411583304405212,
      "learning_rate": 4.075301621607375e-06,
      "loss": 0.0155,
      "step": 1818300
    },
    {
      "epoch": 2.9757205606069532,
      "grad_norm": 0.6217051148414612,
      "learning_rate": 4.0752357293938575e-06,
      "loss": 0.0123,
      "step": 1818320
    },
    {
      "epoch": 2.9757532910456064,
      "grad_norm": 0.436733603477478,
      "learning_rate": 4.07516983718034e-06,
      "loss": 0.0155,
      "step": 1818340
    },
    {
      "epoch": 2.97578602148426,
      "grad_norm": 0.2336398810148239,
      "learning_rate": 4.075103944966823e-06,
      "loss": 0.0117,
      "step": 1818360
    },
    {
      "epoch": 2.975818751922913,
      "grad_norm": 0.31371286511421204,
      "learning_rate": 4.0750380527533065e-06,
      "loss": 0.008,
      "step": 1818380
    },
    {
      "epoch": 2.9758514823615667,
      "grad_norm": 1.0926766395568848,
      "learning_rate": 4.074972160539789e-06,
      "loss": 0.0136,
      "step": 1818400
    },
    {
      "epoch": 2.97588421280022,
      "grad_norm": 0.3346303105354309,
      "learning_rate": 4.074906268326272e-06,
      "loss": 0.0131,
      "step": 1818420
    },
    {
      "epoch": 2.9759169432388735,
      "grad_norm": 0.2396371066570282,
      "learning_rate": 4.074840376112756e-06,
      "loss": 0.0117,
      "step": 1818440
    },
    {
      "epoch": 2.9759496736775266,
      "grad_norm": 0.17971189320087433,
      "learning_rate": 4.074774483899238e-06,
      "loss": 0.0146,
      "step": 1818460
    },
    {
      "epoch": 2.9759824041161798,
      "grad_norm": 0.0760316550731659,
      "learning_rate": 4.074708591685721e-06,
      "loss": 0.0153,
      "step": 1818480
    },
    {
      "epoch": 2.9760151345548334,
      "grad_norm": 0.2416829615831375,
      "learning_rate": 4.074642699472204e-06,
      "loss": 0.0189,
      "step": 1818500
    },
    {
      "epoch": 2.9760478649934865,
      "grad_norm": 0.27667704224586487,
      "learning_rate": 4.0745768072586866e-06,
      "loss": 0.0126,
      "step": 1818520
    },
    {
      "epoch": 2.97608059543214,
      "grad_norm": 0.08088028430938721,
      "learning_rate": 4.074510915045169e-06,
      "loss": 0.0153,
      "step": 1818540
    },
    {
      "epoch": 2.9761133258707932,
      "grad_norm": 0.5196701884269714,
      "learning_rate": 4.074445022831652e-06,
      "loss": 0.0147,
      "step": 1818560
    },
    {
      "epoch": 2.976146056309447,
      "grad_norm": 0.1964956820011139,
      "learning_rate": 4.074379130618135e-06,
      "loss": 0.0164,
      "step": 1818580
    },
    {
      "epoch": 2.9761787867481,
      "grad_norm": 0.5485240817070007,
      "learning_rate": 4.074313238404618e-06,
      "loss": 0.017,
      "step": 1818600
    },
    {
      "epoch": 2.976211517186753,
      "grad_norm": 0.21855910122394562,
      "learning_rate": 4.074247346191101e-06,
      "loss": 0.0115,
      "step": 1818620
    },
    {
      "epoch": 2.9762442476254067,
      "grad_norm": 0.3426990807056427,
      "learning_rate": 4.074181453977584e-06,
      "loss": 0.0176,
      "step": 1818640
    },
    {
      "epoch": 2.97627697806406,
      "grad_norm": 0.355464905500412,
      "learning_rate": 4.074115561764067e-06,
      "loss": 0.0111,
      "step": 1818660
    },
    {
      "epoch": 2.9763097085027135,
      "grad_norm": 0.19705374538898468,
      "learning_rate": 4.074049669550549e-06,
      "loss": 0.012,
      "step": 1818680
    },
    {
      "epoch": 2.9763424389413666,
      "grad_norm": 0.45830366015434265,
      "learning_rate": 4.073983777337032e-06,
      "loss": 0.0145,
      "step": 1818700
    },
    {
      "epoch": 2.9763751693800202,
      "grad_norm": 0.3736811876296997,
      "learning_rate": 4.073917885123515e-06,
      "loss": 0.0135,
      "step": 1818720
    },
    {
      "epoch": 2.9764078998186734,
      "grad_norm": 0.311227023601532,
      "learning_rate": 4.0738519929099976e-06,
      "loss": 0.0148,
      "step": 1818740
    },
    {
      "epoch": 2.9764406302573265,
      "grad_norm": 0.5425919890403748,
      "learning_rate": 4.073786100696481e-06,
      "loss": 0.0157,
      "step": 1818760
    },
    {
      "epoch": 2.97647336069598,
      "grad_norm": 0.33697399497032166,
      "learning_rate": 4.073720208482964e-06,
      "loss": 0.0134,
      "step": 1818780
    },
    {
      "epoch": 2.9765060911346333,
      "grad_norm": 0.11585375666618347,
      "learning_rate": 4.073654316269447e-06,
      "loss": 0.0089,
      "step": 1818800
    },
    {
      "epoch": 2.976538821573287,
      "grad_norm": 0.2830902636051178,
      "learning_rate": 4.07358842405593e-06,
      "loss": 0.0163,
      "step": 1818820
    },
    {
      "epoch": 2.97657155201194,
      "grad_norm": 0.6690572500228882,
      "learning_rate": 4.073522531842413e-06,
      "loss": 0.0157,
      "step": 1818840
    },
    {
      "epoch": 2.9766042824505936,
      "grad_norm": 0.3008936643600464,
      "learning_rate": 4.073456639628896e-06,
      "loss": 0.0135,
      "step": 1818860
    },
    {
      "epoch": 2.9766370128892468,
      "grad_norm": 0.40496689081192017,
      "learning_rate": 4.0733907474153784e-06,
      "loss": 0.0198,
      "step": 1818880
    },
    {
      "epoch": 2.9766697433279,
      "grad_norm": 0.06284299492835999,
      "learning_rate": 4.073324855201861e-06,
      "loss": 0.0135,
      "step": 1818900
    },
    {
      "epoch": 2.9767024737665535,
      "grad_norm": 0.22922663390636444,
      "learning_rate": 4.073258962988344e-06,
      "loss": 0.0136,
      "step": 1818920
    },
    {
      "epoch": 2.9767352042052067,
      "grad_norm": 0.1523810774087906,
      "learning_rate": 4.073193070774827e-06,
      "loss": 0.0178,
      "step": 1818940
    },
    {
      "epoch": 2.9767679346438602,
      "grad_norm": 0.21096442639827728,
      "learning_rate": 4.073127178561309e-06,
      "loss": 0.0101,
      "step": 1818960
    },
    {
      "epoch": 2.9768006650825134,
      "grad_norm": 0.42763739824295044,
      "learning_rate": 4.073061286347792e-06,
      "loss": 0.0176,
      "step": 1818980
    },
    {
      "epoch": 2.976833395521167,
      "grad_norm": 0.27839842438697815,
      "learning_rate": 4.072995394134276e-06,
      "loss": 0.0101,
      "step": 1819000
    },
    {
      "epoch": 2.97686612595982,
      "grad_norm": 0.7978717088699341,
      "learning_rate": 4.0729295019207585e-06,
      "loss": 0.0176,
      "step": 1819020
    },
    {
      "epoch": 2.9768988563984733,
      "grad_norm": 0.4867665469646454,
      "learning_rate": 4.072863609707241e-06,
      "loss": 0.01,
      "step": 1819040
    },
    {
      "epoch": 2.976931586837127,
      "grad_norm": 0.5596013069152832,
      "learning_rate": 4.072797717493724e-06,
      "loss": 0.0127,
      "step": 1819060
    },
    {
      "epoch": 2.97696431727578,
      "grad_norm": 0.2361305207014084,
      "learning_rate": 4.072731825280207e-06,
      "loss": 0.0116,
      "step": 1819080
    },
    {
      "epoch": 2.9769970477144336,
      "grad_norm": 0.3352912664413452,
      "learning_rate": 4.07266593306669e-06,
      "loss": 0.0146,
      "step": 1819100
    },
    {
      "epoch": 2.977029778153087,
      "grad_norm": 0.6657060384750366,
      "learning_rate": 4.072600040853173e-06,
      "loss": 0.0138,
      "step": 1819120
    },
    {
      "epoch": 2.9770625085917404,
      "grad_norm": 0.35545235872268677,
      "learning_rate": 4.072534148639656e-06,
      "loss": 0.0148,
      "step": 1819140
    },
    {
      "epoch": 2.9770952390303935,
      "grad_norm": 0.41975921392440796,
      "learning_rate": 4.0724682564261385e-06,
      "loss": 0.013,
      "step": 1819160
    },
    {
      "epoch": 2.9771279694690467,
      "grad_norm": 0.1558108627796173,
      "learning_rate": 4.072402364212621e-06,
      "loss": 0.0099,
      "step": 1819180
    },
    {
      "epoch": 2.9771606999077003,
      "grad_norm": 0.6919044852256775,
      "learning_rate": 4.072336471999104e-06,
      "loss": 0.0114,
      "step": 1819200
    },
    {
      "epoch": 2.9771934303463534,
      "grad_norm": 0.6014009118080139,
      "learning_rate": 4.0722705797855875e-06,
      "loss": 0.0096,
      "step": 1819220
    },
    {
      "epoch": 2.9772261607850066,
      "grad_norm": 0.39271557331085205,
      "learning_rate": 4.07220468757207e-06,
      "loss": 0.018,
      "step": 1819240
    },
    {
      "epoch": 2.97725889122366,
      "grad_norm": 0.5891130566596985,
      "learning_rate": 4.072138795358553e-06,
      "loss": 0.0146,
      "step": 1819260
    },
    {
      "epoch": 2.9772916216623138,
      "grad_norm": 0.46989282965660095,
      "learning_rate": 4.072072903145036e-06,
      "loss": 0.0154,
      "step": 1819280
    },
    {
      "epoch": 2.977324352100967,
      "grad_norm": 0.17276912927627563,
      "learning_rate": 4.0720070109315185e-06,
      "loss": 0.0162,
      "step": 1819300
    },
    {
      "epoch": 2.97735708253962,
      "grad_norm": 0.12079910933971405,
      "learning_rate": 4.071941118718001e-06,
      "loss": 0.0096,
      "step": 1819320
    },
    {
      "epoch": 2.9773898129782737,
      "grad_norm": 0.7196792960166931,
      "learning_rate": 4.071875226504484e-06,
      "loss": 0.0102,
      "step": 1819340
    },
    {
      "epoch": 2.977422543416927,
      "grad_norm": 0.4109395742416382,
      "learning_rate": 4.071809334290967e-06,
      "loss": 0.0148,
      "step": 1819360
    },
    {
      "epoch": 2.97745527385558,
      "grad_norm": 0.7074503898620605,
      "learning_rate": 4.0717434420774495e-06,
      "loss": 0.0231,
      "step": 1819380
    },
    {
      "epoch": 2.9774880042942335,
      "grad_norm": 0.4710334241390228,
      "learning_rate": 4.071677549863933e-06,
      "loss": 0.017,
      "step": 1819400
    },
    {
      "epoch": 2.977520734732887,
      "grad_norm": 0.3732863664627075,
      "learning_rate": 4.071611657650416e-06,
      "loss": 0.0193,
      "step": 1819420
    },
    {
      "epoch": 2.9775534651715403,
      "grad_norm": 0.13368700444698334,
      "learning_rate": 4.0715457654368985e-06,
      "loss": 0.0121,
      "step": 1819440
    },
    {
      "epoch": 2.9775861956101934,
      "grad_norm": 0.8658037781715393,
      "learning_rate": 4.071479873223382e-06,
      "loss": 0.0106,
      "step": 1819460
    },
    {
      "epoch": 2.977618926048847,
      "grad_norm": 0.27742865681648254,
      "learning_rate": 4.071413981009865e-06,
      "loss": 0.0088,
      "step": 1819480
    },
    {
      "epoch": 2.9776516564875,
      "grad_norm": 0.05203873664140701,
      "learning_rate": 4.071348088796348e-06,
      "loss": 0.0122,
      "step": 1819500
    },
    {
      "epoch": 2.9776843869261533,
      "grad_norm": 0.7528497576713562,
      "learning_rate": 4.07128219658283e-06,
      "loss": 0.0156,
      "step": 1819520
    },
    {
      "epoch": 2.977717117364807,
      "grad_norm": 0.351214736700058,
      "learning_rate": 4.071216304369313e-06,
      "loss": 0.0152,
      "step": 1819540
    },
    {
      "epoch": 2.9777498478034605,
      "grad_norm": 0.2387997806072235,
      "learning_rate": 4.071150412155796e-06,
      "loss": 0.01,
      "step": 1819560
    },
    {
      "epoch": 2.9777825782421137,
      "grad_norm": 0.4134913682937622,
      "learning_rate": 4.0710845199422786e-06,
      "loss": 0.0152,
      "step": 1819580
    },
    {
      "epoch": 2.977815308680767,
      "grad_norm": 0.6306293606758118,
      "learning_rate": 4.071018627728761e-06,
      "loss": 0.0144,
      "step": 1819600
    },
    {
      "epoch": 2.9778480391194204,
      "grad_norm": 0.6491580605506897,
      "learning_rate": 4.070952735515245e-06,
      "loss": 0.0138,
      "step": 1819620
    },
    {
      "epoch": 2.9778807695580736,
      "grad_norm": 0.7818936705589294,
      "learning_rate": 4.070886843301728e-06,
      "loss": 0.016,
      "step": 1819640
    },
    {
      "epoch": 2.9779134999967267,
      "grad_norm": 0.2057144045829773,
      "learning_rate": 4.07082095108821e-06,
      "loss": 0.0126,
      "step": 1819660
    },
    {
      "epoch": 2.9779462304353803,
      "grad_norm": 0.2642519474029541,
      "learning_rate": 4.070755058874693e-06,
      "loss": 0.0097,
      "step": 1819680
    },
    {
      "epoch": 2.977978960874034,
      "grad_norm": 0.41819411516189575,
      "learning_rate": 4.070689166661176e-06,
      "loss": 0.0134,
      "step": 1819700
    },
    {
      "epoch": 2.978011691312687,
      "grad_norm": 0.8546285033226013,
      "learning_rate": 4.070623274447659e-06,
      "loss": 0.0129,
      "step": 1819720
    },
    {
      "epoch": 2.97804442175134,
      "grad_norm": 0.21549633145332336,
      "learning_rate": 4.070557382234141e-06,
      "loss": 0.0087,
      "step": 1819740
    },
    {
      "epoch": 2.978077152189994,
      "grad_norm": 0.4526675343513489,
      "learning_rate": 4.070491490020624e-06,
      "loss": 0.0119,
      "step": 1819760
    },
    {
      "epoch": 2.978109882628647,
      "grad_norm": 0.18306758999824524,
      "learning_rate": 4.070425597807107e-06,
      "loss": 0.0095,
      "step": 1819780
    },
    {
      "epoch": 2.9781426130673,
      "grad_norm": 0.1462993323802948,
      "learning_rate": 4.07035970559359e-06,
      "loss": 0.0089,
      "step": 1819800
    },
    {
      "epoch": 2.9781753435059537,
      "grad_norm": 0.43180665373802185,
      "learning_rate": 4.070293813380073e-06,
      "loss": 0.0137,
      "step": 1819820
    },
    {
      "epoch": 2.978208073944607,
      "grad_norm": 0.3802727460861206,
      "learning_rate": 4.070227921166557e-06,
      "loss": 0.0142,
      "step": 1819840
    },
    {
      "epoch": 2.9782408043832604,
      "grad_norm": 0.493193656206131,
      "learning_rate": 4.0701620289530395e-06,
      "loss": 0.01,
      "step": 1819860
    },
    {
      "epoch": 2.9782735348219136,
      "grad_norm": 0.12421626597642899,
      "learning_rate": 4.070096136739522e-06,
      "loss": 0.0113,
      "step": 1819880
    },
    {
      "epoch": 2.978306265260567,
      "grad_norm": 0.47479182481765747,
      "learning_rate": 4.070030244526005e-06,
      "loss": 0.0127,
      "step": 1819900
    },
    {
      "epoch": 2.9783389956992203,
      "grad_norm": 0.23191671073436737,
      "learning_rate": 4.069964352312488e-06,
      "loss": 0.0116,
      "step": 1819920
    },
    {
      "epoch": 2.9783717261378735,
      "grad_norm": 0.4601483643054962,
      "learning_rate": 4.06989846009897e-06,
      "loss": 0.0162,
      "step": 1819940
    },
    {
      "epoch": 2.978404456576527,
      "grad_norm": 0.6817887425422668,
      "learning_rate": 4.069832567885453e-06,
      "loss": 0.0146,
      "step": 1819960
    },
    {
      "epoch": 2.9784371870151802,
      "grad_norm": 1.2963387966156006,
      "learning_rate": 4.069766675671936e-06,
      "loss": 0.0107,
      "step": 1819980
    },
    {
      "epoch": 2.978469917453834,
      "grad_norm": 0.4502962827682495,
      "learning_rate": 4.069700783458419e-06,
      "loss": 0.0186,
      "step": 1820000
    },
    {
      "epoch": 2.978502647892487,
      "grad_norm": 0.7135195732116699,
      "learning_rate": 4.069634891244902e-06,
      "loss": 0.0088,
      "step": 1820020
    },
    {
      "epoch": 2.9785353783311406,
      "grad_norm": 0.4172138571739197,
      "learning_rate": 4.069568999031385e-06,
      "loss": 0.0124,
      "step": 1820040
    },
    {
      "epoch": 2.9785681087697937,
      "grad_norm": 0.262808233499527,
      "learning_rate": 4.069503106817868e-06,
      "loss": 0.009,
      "step": 1820060
    },
    {
      "epoch": 2.978600839208447,
      "grad_norm": 0.19990621507167816,
      "learning_rate": 4.0694372146043504e-06,
      "loss": 0.016,
      "step": 1820080
    },
    {
      "epoch": 2.9786335696471005,
      "grad_norm": 0.38481566309928894,
      "learning_rate": 4.069371322390833e-06,
      "loss": 0.0108,
      "step": 1820100
    },
    {
      "epoch": 2.9786663000857536,
      "grad_norm": 0.5242482423782349,
      "learning_rate": 4.069305430177316e-06,
      "loss": 0.0261,
      "step": 1820120
    },
    {
      "epoch": 2.978699030524407,
      "grad_norm": 0.22579969465732574,
      "learning_rate": 4.069239537963799e-06,
      "loss": 0.0132,
      "step": 1820140
    },
    {
      "epoch": 2.9787317609630604,
      "grad_norm": 0.45625388622283936,
      "learning_rate": 4.069173645750282e-06,
      "loss": 0.0125,
      "step": 1820160
    },
    {
      "epoch": 2.978764491401714,
      "grad_norm": 0.10074355453252792,
      "learning_rate": 4.069107753536765e-06,
      "loss": 0.0082,
      "step": 1820180
    },
    {
      "epoch": 2.978797221840367,
      "grad_norm": 0.7085924744606018,
      "learning_rate": 4.069041861323248e-06,
      "loss": 0.0157,
      "step": 1820200
    },
    {
      "epoch": 2.9788299522790203,
      "grad_norm": 1.209343671798706,
      "learning_rate": 4.0689759691097305e-06,
      "loss": 0.0147,
      "step": 1820220
    },
    {
      "epoch": 2.978862682717674,
      "grad_norm": 0.20828475058078766,
      "learning_rate": 4.068910076896214e-06,
      "loss": 0.02,
      "step": 1820240
    },
    {
      "epoch": 2.978895413156327,
      "grad_norm": 0.6653943657875061,
      "learning_rate": 4.068844184682697e-06,
      "loss": 0.0196,
      "step": 1820260
    },
    {
      "epoch": 2.9789281435949806,
      "grad_norm": 0.09812436252832413,
      "learning_rate": 4.0687782924691795e-06,
      "loss": 0.0112,
      "step": 1820280
    },
    {
      "epoch": 2.9789608740336337,
      "grad_norm": 0.43193304538726807,
      "learning_rate": 4.068712400255662e-06,
      "loss": 0.0114,
      "step": 1820300
    },
    {
      "epoch": 2.9789936044722873,
      "grad_norm": 0.1812358796596527,
      "learning_rate": 4.068646508042145e-06,
      "loss": 0.0215,
      "step": 1820320
    },
    {
      "epoch": 2.9790263349109405,
      "grad_norm": 0.7140554785728455,
      "learning_rate": 4.068580615828628e-06,
      "loss": 0.0131,
      "step": 1820340
    },
    {
      "epoch": 2.9790590653495936,
      "grad_norm": 0.14502355456352234,
      "learning_rate": 4.0685147236151105e-06,
      "loss": 0.0127,
      "step": 1820360
    },
    {
      "epoch": 2.9790917957882472,
      "grad_norm": 0.4809193015098572,
      "learning_rate": 4.068448831401593e-06,
      "loss": 0.0223,
      "step": 1820380
    },
    {
      "epoch": 2.9791245262269004,
      "grad_norm": 0.42890259623527527,
      "learning_rate": 4.068382939188076e-06,
      "loss": 0.0229,
      "step": 1820400
    },
    {
      "epoch": 2.979157256665554,
      "grad_norm": 0.40723058581352234,
      "learning_rate": 4.0683170469745596e-06,
      "loss": 0.0131,
      "step": 1820420
    },
    {
      "epoch": 2.979189987104207,
      "grad_norm": 0.6910918354988098,
      "learning_rate": 4.068251154761042e-06,
      "loss": 0.0118,
      "step": 1820440
    },
    {
      "epoch": 2.9792227175428607,
      "grad_norm": 0.31014904379844666,
      "learning_rate": 4.068185262547525e-06,
      "loss": 0.011,
      "step": 1820460
    },
    {
      "epoch": 2.979255447981514,
      "grad_norm": 0.5388277173042297,
      "learning_rate": 4.068119370334008e-06,
      "loss": 0.0154,
      "step": 1820480
    },
    {
      "epoch": 2.979288178420167,
      "grad_norm": 0.49543362855911255,
      "learning_rate": 4.0680534781204905e-06,
      "loss": 0.0133,
      "step": 1820500
    },
    {
      "epoch": 2.9793209088588206,
      "grad_norm": 0.6199216842651367,
      "learning_rate": 4.067987585906974e-06,
      "loss": 0.017,
      "step": 1820520
    },
    {
      "epoch": 2.9793536392974738,
      "grad_norm": 0.2813262343406677,
      "learning_rate": 4.067921693693457e-06,
      "loss": 0.0108,
      "step": 1820540
    },
    {
      "epoch": 2.9793863697361274,
      "grad_norm": 0.3120292127132416,
      "learning_rate": 4.06785580147994e-06,
      "loss": 0.011,
      "step": 1820560
    },
    {
      "epoch": 2.9794191001747805,
      "grad_norm": 0.7118369340896606,
      "learning_rate": 4.067789909266422e-06,
      "loss": 0.0164,
      "step": 1820580
    },
    {
      "epoch": 2.979451830613434,
      "grad_norm": 0.422504723072052,
      "learning_rate": 4.067724017052905e-06,
      "loss": 0.0115,
      "step": 1820600
    },
    {
      "epoch": 2.9794845610520873,
      "grad_norm": 0.15199494361877441,
      "learning_rate": 4.067658124839388e-06,
      "loss": 0.0161,
      "step": 1820620
    },
    {
      "epoch": 2.9795172914907404,
      "grad_norm": 0.0879596397280693,
      "learning_rate": 4.067592232625871e-06,
      "loss": 0.0134,
      "step": 1820640
    },
    {
      "epoch": 2.979550021929394,
      "grad_norm": 0.42881014943122864,
      "learning_rate": 4.067526340412354e-06,
      "loss": 0.0134,
      "step": 1820660
    },
    {
      "epoch": 2.979582752368047,
      "grad_norm": 0.3429590165615082,
      "learning_rate": 4.067460448198837e-06,
      "loss": 0.0161,
      "step": 1820680
    },
    {
      "epoch": 2.9796154828067003,
      "grad_norm": 0.14396773278713226,
      "learning_rate": 4.06739455598532e-06,
      "loss": 0.0118,
      "step": 1820700
    },
    {
      "epoch": 2.979648213245354,
      "grad_norm": 0.7670310735702515,
      "learning_rate": 4.067328663771802e-06,
      "loss": 0.0142,
      "step": 1820720
    },
    {
      "epoch": 2.9796809436840075,
      "grad_norm": 0.26466405391693115,
      "learning_rate": 4.067262771558285e-06,
      "loss": 0.007,
      "step": 1820740
    },
    {
      "epoch": 2.9797136741226606,
      "grad_norm": 0.6036426424980164,
      "learning_rate": 4.067196879344768e-06,
      "loss": 0.0136,
      "step": 1820760
    },
    {
      "epoch": 2.979746404561314,
      "grad_norm": 0.8415553569793701,
      "learning_rate": 4.0671309871312506e-06,
      "loss": 0.0114,
      "step": 1820780
    },
    {
      "epoch": 2.9797791349999674,
      "grad_norm": 0.46074357628822327,
      "learning_rate": 4.067065094917733e-06,
      "loss": 0.0107,
      "step": 1820800
    },
    {
      "epoch": 2.9798118654386205,
      "grad_norm": 0.07744349539279938,
      "learning_rate": 4.066999202704217e-06,
      "loss": 0.0129,
      "step": 1820820
    },
    {
      "epoch": 2.9798445958772737,
      "grad_norm": 0.14402951300144196,
      "learning_rate": 4.0669333104907e-06,
      "loss": 0.0134,
      "step": 1820840
    },
    {
      "epoch": 2.9798773263159273,
      "grad_norm": 0.9603344202041626,
      "learning_rate": 4.066867418277182e-06,
      "loss": 0.0117,
      "step": 1820860
    },
    {
      "epoch": 2.979910056754581,
      "grad_norm": 0.8521335124969482,
      "learning_rate": 4.066801526063666e-06,
      "loss": 0.0181,
      "step": 1820880
    },
    {
      "epoch": 2.979942787193234,
      "grad_norm": 0.8639400005340576,
      "learning_rate": 4.066735633850149e-06,
      "loss": 0.0159,
      "step": 1820900
    },
    {
      "epoch": 2.979975517631887,
      "grad_norm": 0.3284636437892914,
      "learning_rate": 4.0666697416366314e-06,
      "loss": 0.0144,
      "step": 1820920
    },
    {
      "epoch": 2.9800082480705408,
      "grad_norm": 0.24191859364509583,
      "learning_rate": 4.066603849423114e-06,
      "loss": 0.018,
      "step": 1820940
    },
    {
      "epoch": 2.980040978509194,
      "grad_norm": 0.28794151544570923,
      "learning_rate": 4.066537957209597e-06,
      "loss": 0.0121,
      "step": 1820960
    },
    {
      "epoch": 2.980073708947847,
      "grad_norm": 0.48511460423469543,
      "learning_rate": 4.06647206499608e-06,
      "loss": 0.0181,
      "step": 1820980
    },
    {
      "epoch": 2.9801064393865007,
      "grad_norm": 0.17230892181396484,
      "learning_rate": 4.066406172782562e-06,
      "loss": 0.011,
      "step": 1821000
    },
    {
      "epoch": 2.9801391698251543,
      "grad_norm": 0.22139222919940948,
      "learning_rate": 4.066340280569045e-06,
      "loss": 0.0117,
      "step": 1821020
    },
    {
      "epoch": 2.9801719002638074,
      "grad_norm": 0.23272696137428284,
      "learning_rate": 4.066274388355529e-06,
      "loss": 0.0166,
      "step": 1821040
    },
    {
      "epoch": 2.9802046307024606,
      "grad_norm": 0.09524470567703247,
      "learning_rate": 4.0662084961420115e-06,
      "loss": 0.0092,
      "step": 1821060
    },
    {
      "epoch": 2.980237361141114,
      "grad_norm": 0.21851395070552826,
      "learning_rate": 4.066142603928494e-06,
      "loss": 0.0101,
      "step": 1821080
    },
    {
      "epoch": 2.9802700915797673,
      "grad_norm": 0.7657685279846191,
      "learning_rate": 4.066076711714977e-06,
      "loss": 0.0165,
      "step": 1821100
    },
    {
      "epoch": 2.9803028220184204,
      "grad_norm": 0.363004207611084,
      "learning_rate": 4.06601081950146e-06,
      "loss": 0.0156,
      "step": 1821120
    },
    {
      "epoch": 2.980335552457074,
      "grad_norm": 0.2775437533855438,
      "learning_rate": 4.065944927287942e-06,
      "loss": 0.0197,
      "step": 1821140
    },
    {
      "epoch": 2.9803682828957276,
      "grad_norm": 0.16013431549072266,
      "learning_rate": 4.065879035074425e-06,
      "loss": 0.0104,
      "step": 1821160
    },
    {
      "epoch": 2.980401013334381,
      "grad_norm": 0.1464633196592331,
      "learning_rate": 4.065813142860908e-06,
      "loss": 0.0142,
      "step": 1821180
    },
    {
      "epoch": 2.980433743773034,
      "grad_norm": 1.019904613494873,
      "learning_rate": 4.065747250647391e-06,
      "loss": 0.0154,
      "step": 1821200
    },
    {
      "epoch": 2.9804664742116875,
      "grad_norm": 0.4356001019477844,
      "learning_rate": 4.065681358433874e-06,
      "loss": 0.0089,
      "step": 1821220
    },
    {
      "epoch": 2.9804992046503407,
      "grad_norm": 0.5412189364433289,
      "learning_rate": 4.065615466220357e-06,
      "loss": 0.0136,
      "step": 1821240
    },
    {
      "epoch": 2.980531935088994,
      "grad_norm": 0.26295945048332214,
      "learning_rate": 4.0655495740068406e-06,
      "loss": 0.0196,
      "step": 1821260
    },
    {
      "epoch": 2.9805646655276474,
      "grad_norm": 0.6717678904533386,
      "learning_rate": 4.065483681793323e-06,
      "loss": 0.0124,
      "step": 1821280
    },
    {
      "epoch": 2.9805973959663006,
      "grad_norm": 0.6675727367401123,
      "learning_rate": 4.065417789579806e-06,
      "loss": 0.0088,
      "step": 1821300
    },
    {
      "epoch": 2.980630126404954,
      "grad_norm": 1.0763875246047974,
      "learning_rate": 4.065351897366289e-06,
      "loss": 0.0182,
      "step": 1821320
    },
    {
      "epoch": 2.9806628568436073,
      "grad_norm": 0.18912477791309357,
      "learning_rate": 4.0652860051527715e-06,
      "loss": 0.0136,
      "step": 1821340
    },
    {
      "epoch": 2.980695587282261,
      "grad_norm": 0.38895362615585327,
      "learning_rate": 4.065220112939254e-06,
      "loss": 0.0099,
      "step": 1821360
    },
    {
      "epoch": 2.980728317720914,
      "grad_norm": 0.2160995900630951,
      "learning_rate": 4.065154220725737e-06,
      "loss": 0.0166,
      "step": 1821380
    },
    {
      "epoch": 2.980761048159567,
      "grad_norm": 0.6499944925308228,
      "learning_rate": 4.06508832851222e-06,
      "loss": 0.019,
      "step": 1821400
    },
    {
      "epoch": 2.980793778598221,
      "grad_norm": 0.419097363948822,
      "learning_rate": 4.0650224362987025e-06,
      "loss": 0.0119,
      "step": 1821420
    },
    {
      "epoch": 2.980826509036874,
      "grad_norm": 0.22865022718906403,
      "learning_rate": 4.064956544085186e-06,
      "loss": 0.0149,
      "step": 1821440
    },
    {
      "epoch": 2.9808592394755276,
      "grad_norm": 0.36673882603645325,
      "learning_rate": 4.064890651871669e-06,
      "loss": 0.0168,
      "step": 1821460
    },
    {
      "epoch": 2.9808919699141807,
      "grad_norm": 0.20611059665679932,
      "learning_rate": 4.0648247596581515e-06,
      "loss": 0.0093,
      "step": 1821480
    },
    {
      "epoch": 2.9809247003528343,
      "grad_norm": 1.1100804805755615,
      "learning_rate": 4.064758867444634e-06,
      "loss": 0.012,
      "step": 1821500
    },
    {
      "epoch": 2.9809574307914875,
      "grad_norm": 0.3192610442638397,
      "learning_rate": 4.064692975231117e-06,
      "loss": 0.0208,
      "step": 1821520
    },
    {
      "epoch": 2.9809901612301406,
      "grad_norm": 0.19006109237670898,
      "learning_rate": 4.0646270830176e-06,
      "loss": 0.0187,
      "step": 1821540
    },
    {
      "epoch": 2.981022891668794,
      "grad_norm": 0.2857533395290375,
      "learning_rate": 4.064561190804083e-06,
      "loss": 0.0107,
      "step": 1821560
    },
    {
      "epoch": 2.9810556221074473,
      "grad_norm": 0.19581162929534912,
      "learning_rate": 4.064495298590566e-06,
      "loss": 0.0112,
      "step": 1821580
    },
    {
      "epoch": 2.981088352546101,
      "grad_norm": 0.2673994302749634,
      "learning_rate": 4.064429406377049e-06,
      "loss": 0.0092,
      "step": 1821600
    },
    {
      "epoch": 2.981121082984754,
      "grad_norm": 0.118096724152565,
      "learning_rate": 4.0643635141635316e-06,
      "loss": 0.0113,
      "step": 1821620
    },
    {
      "epoch": 2.9811538134234077,
      "grad_norm": 0.1716030240058899,
      "learning_rate": 4.064297621950014e-06,
      "loss": 0.0196,
      "step": 1821640
    },
    {
      "epoch": 2.981186543862061,
      "grad_norm": 0.3840106725692749,
      "learning_rate": 4.064231729736498e-06,
      "loss": 0.0111,
      "step": 1821660
    },
    {
      "epoch": 2.981219274300714,
      "grad_norm": 0.209600567817688,
      "learning_rate": 4.064165837522981e-06,
      "loss": 0.0132,
      "step": 1821680
    },
    {
      "epoch": 2.9812520047393676,
      "grad_norm": 0.14272062480449677,
      "learning_rate": 4.064099945309463e-06,
      "loss": 0.01,
      "step": 1821700
    },
    {
      "epoch": 2.9812847351780207,
      "grad_norm": 0.28592947125434875,
      "learning_rate": 4.064034053095946e-06,
      "loss": 0.0142,
      "step": 1821720
    },
    {
      "epoch": 2.9813174656166743,
      "grad_norm": 0.9104464054107666,
      "learning_rate": 4.063968160882429e-06,
      "loss": 0.011,
      "step": 1821740
    },
    {
      "epoch": 2.9813501960553275,
      "grad_norm": 0.19047963619232178,
      "learning_rate": 4.063902268668912e-06,
      "loss": 0.0095,
      "step": 1821760
    },
    {
      "epoch": 2.981382926493981,
      "grad_norm": 0.19099196791648865,
      "learning_rate": 4.063836376455394e-06,
      "loss": 0.012,
      "step": 1821780
    },
    {
      "epoch": 2.981415656932634,
      "grad_norm": 0.22219280898571014,
      "learning_rate": 4.063770484241877e-06,
      "loss": 0.0189,
      "step": 1821800
    },
    {
      "epoch": 2.9814483873712874,
      "grad_norm": 0.731640636920929,
      "learning_rate": 4.06370459202836e-06,
      "loss": 0.0157,
      "step": 1821820
    },
    {
      "epoch": 2.981481117809941,
      "grad_norm": 1.4379384517669678,
      "learning_rate": 4.063638699814843e-06,
      "loss": 0.0157,
      "step": 1821840
    },
    {
      "epoch": 2.981513848248594,
      "grad_norm": 0.08916132152080536,
      "learning_rate": 4.063572807601326e-06,
      "loss": 0.0165,
      "step": 1821860
    },
    {
      "epoch": 2.9815465786872477,
      "grad_norm": 0.3299511969089508,
      "learning_rate": 4.063506915387809e-06,
      "loss": 0.0117,
      "step": 1821880
    },
    {
      "epoch": 2.981579309125901,
      "grad_norm": 0.18474553525447845,
      "learning_rate": 4.063441023174292e-06,
      "loss": 0.0143,
      "step": 1821900
    },
    {
      "epoch": 2.9816120395645545,
      "grad_norm": 0.4982541799545288,
      "learning_rate": 4.063375130960775e-06,
      "loss": 0.0125,
      "step": 1821920
    },
    {
      "epoch": 2.9816447700032076,
      "grad_norm": 0.17588850855827332,
      "learning_rate": 4.063309238747258e-06,
      "loss": 0.012,
      "step": 1821940
    },
    {
      "epoch": 2.9816775004418608,
      "grad_norm": 0.18199965357780457,
      "learning_rate": 4.063243346533741e-06,
      "loss": 0.0129,
      "step": 1821960
    },
    {
      "epoch": 2.9817102308805143,
      "grad_norm": 0.17035134136676788,
      "learning_rate": 4.063177454320223e-06,
      "loss": 0.0178,
      "step": 1821980
    },
    {
      "epoch": 2.9817429613191675,
      "grad_norm": 0.17813415825366974,
      "learning_rate": 4.063111562106706e-06,
      "loss": 0.015,
      "step": 1822000
    },
    {
      "epoch": 2.981775691757821,
      "grad_norm": 0.0919560045003891,
      "learning_rate": 4.063045669893189e-06,
      "loss": 0.0161,
      "step": 1822020
    },
    {
      "epoch": 2.9818084221964742,
      "grad_norm": 0.5633732676506042,
      "learning_rate": 4.062979777679672e-06,
      "loss": 0.0115,
      "step": 1822040
    },
    {
      "epoch": 2.981841152635128,
      "grad_norm": 0.4075637757778168,
      "learning_rate": 4.062913885466155e-06,
      "loss": 0.0142,
      "step": 1822060
    },
    {
      "epoch": 2.981873883073781,
      "grad_norm": 0.3794052302837372,
      "learning_rate": 4.062847993252638e-06,
      "loss": 0.0082,
      "step": 1822080
    },
    {
      "epoch": 2.981906613512434,
      "grad_norm": 0.2593853175640106,
      "learning_rate": 4.062782101039121e-06,
      "loss": 0.0153,
      "step": 1822100
    },
    {
      "epoch": 2.9819393439510877,
      "grad_norm": 0.1544589251279831,
      "learning_rate": 4.0627162088256034e-06,
      "loss": 0.0106,
      "step": 1822120
    },
    {
      "epoch": 2.981972074389741,
      "grad_norm": 0.2404394894838333,
      "learning_rate": 4.062650316612086e-06,
      "loss": 0.0132,
      "step": 1822140
    },
    {
      "epoch": 2.9820048048283945,
      "grad_norm": 0.7520897388458252,
      "learning_rate": 4.062584424398569e-06,
      "loss": 0.0134,
      "step": 1822160
    },
    {
      "epoch": 2.9820375352670476,
      "grad_norm": 0.24691928923130035,
      "learning_rate": 4.062518532185052e-06,
      "loss": 0.0173,
      "step": 1822180
    },
    {
      "epoch": 2.982070265705701,
      "grad_norm": 0.551127016544342,
      "learning_rate": 4.062452639971534e-06,
      "loss": 0.0091,
      "step": 1822200
    },
    {
      "epoch": 2.9821029961443544,
      "grad_norm": 0.4215531647205353,
      "learning_rate": 4.062386747758017e-06,
      "loss": 0.014,
      "step": 1822220
    },
    {
      "epoch": 2.9821357265830075,
      "grad_norm": 0.40555891394615173,
      "learning_rate": 4.062320855544501e-06,
      "loss": 0.0114,
      "step": 1822240
    },
    {
      "epoch": 2.982168457021661,
      "grad_norm": 0.1968240886926651,
      "learning_rate": 4.0622549633309835e-06,
      "loss": 0.0107,
      "step": 1822260
    },
    {
      "epoch": 2.9822011874603143,
      "grad_norm": 0.336274117231369,
      "learning_rate": 4.062189071117466e-06,
      "loss": 0.0149,
      "step": 1822280
    },
    {
      "epoch": 2.9822339178989674,
      "grad_norm": 0.16795368492603302,
      "learning_rate": 4.06212317890395e-06,
      "loss": 0.0166,
      "step": 1822300
    },
    {
      "epoch": 2.982266648337621,
      "grad_norm": 0.2976450026035309,
      "learning_rate": 4.0620572866904325e-06,
      "loss": 0.0113,
      "step": 1822320
    },
    {
      "epoch": 2.9822993787762746,
      "grad_norm": 0.10998086631298065,
      "learning_rate": 4.061991394476915e-06,
      "loss": 0.0114,
      "step": 1822340
    },
    {
      "epoch": 2.9823321092149278,
      "grad_norm": 0.6479083299636841,
      "learning_rate": 4.061925502263398e-06,
      "loss": 0.0128,
      "step": 1822360
    },
    {
      "epoch": 2.982364839653581,
      "grad_norm": 0.336349755525589,
      "learning_rate": 4.061859610049881e-06,
      "loss": 0.015,
      "step": 1822380
    },
    {
      "epoch": 2.9823975700922345,
      "grad_norm": 0.5966946482658386,
      "learning_rate": 4.0617937178363635e-06,
      "loss": 0.0094,
      "step": 1822400
    },
    {
      "epoch": 2.9824303005308876,
      "grad_norm": 0.3638154864311218,
      "learning_rate": 4.061727825622846e-06,
      "loss": 0.0085,
      "step": 1822420
    },
    {
      "epoch": 2.982463030969541,
      "grad_norm": 0.49307700991630554,
      "learning_rate": 4.061661933409329e-06,
      "loss": 0.0102,
      "step": 1822440
    },
    {
      "epoch": 2.9824957614081944,
      "grad_norm": 0.34236690402030945,
      "learning_rate": 4.0615960411958126e-06,
      "loss": 0.0106,
      "step": 1822460
    },
    {
      "epoch": 2.982528491846848,
      "grad_norm": 0.33214157819747925,
      "learning_rate": 4.061530148982295e-06,
      "loss": 0.012,
      "step": 1822480
    },
    {
      "epoch": 2.982561222285501,
      "grad_norm": 0.45856207609176636,
      "learning_rate": 4.061464256768778e-06,
      "loss": 0.0178,
      "step": 1822500
    },
    {
      "epoch": 2.9825939527241543,
      "grad_norm": 0.12179544568061829,
      "learning_rate": 4.061398364555261e-06,
      "loss": 0.0136,
      "step": 1822520
    },
    {
      "epoch": 2.982626683162808,
      "grad_norm": 0.21282921731472015,
      "learning_rate": 4.0613324723417435e-06,
      "loss": 0.0175,
      "step": 1822540
    },
    {
      "epoch": 2.982659413601461,
      "grad_norm": 0.7305921316146851,
      "learning_rate": 4.061266580128226e-06,
      "loss": 0.0118,
      "step": 1822560
    },
    {
      "epoch": 2.982692144040114,
      "grad_norm": 0.2230999767780304,
      "learning_rate": 4.061200687914709e-06,
      "loss": 0.0121,
      "step": 1822580
    },
    {
      "epoch": 2.9827248744787678,
      "grad_norm": 0.4619891941547394,
      "learning_rate": 4.061134795701192e-06,
      "loss": 0.0124,
      "step": 1822600
    },
    {
      "epoch": 2.9827576049174214,
      "grad_norm": 0.651805579662323,
      "learning_rate": 4.061068903487675e-06,
      "loss": 0.0203,
      "step": 1822620
    },
    {
      "epoch": 2.9827903353560745,
      "grad_norm": 0.4599398076534271,
      "learning_rate": 4.061003011274158e-06,
      "loss": 0.0142,
      "step": 1822640
    },
    {
      "epoch": 2.9828230657947277,
      "grad_norm": 0.5972052216529846,
      "learning_rate": 4.060937119060641e-06,
      "loss": 0.0133,
      "step": 1822660
    },
    {
      "epoch": 2.9828557962333813,
      "grad_norm": 0.5608586072921753,
      "learning_rate": 4.060871226847124e-06,
      "loss": 0.0168,
      "step": 1822680
    },
    {
      "epoch": 2.9828885266720344,
      "grad_norm": 0.6191319227218628,
      "learning_rate": 4.060805334633607e-06,
      "loss": 0.0115,
      "step": 1822700
    },
    {
      "epoch": 2.9829212571106876,
      "grad_norm": 4.927525520324707,
      "learning_rate": 4.06073944242009e-06,
      "loss": 0.0117,
      "step": 1822720
    },
    {
      "epoch": 2.982953987549341,
      "grad_norm": 0.46511512994766235,
      "learning_rate": 4.060673550206573e-06,
      "loss": 0.0144,
      "step": 1822740
    },
    {
      "epoch": 2.9829867179879948,
      "grad_norm": 0.9074001908302307,
      "learning_rate": 4.060607657993055e-06,
      "loss": 0.0118,
      "step": 1822760
    },
    {
      "epoch": 2.983019448426648,
      "grad_norm": 0.26203426718711853,
      "learning_rate": 4.060541765779538e-06,
      "loss": 0.01,
      "step": 1822780
    },
    {
      "epoch": 2.983052178865301,
      "grad_norm": 3.6115360260009766,
      "learning_rate": 4.060475873566021e-06,
      "loss": 0.0169,
      "step": 1822800
    },
    {
      "epoch": 2.9830849093039546,
      "grad_norm": 0.22775453329086304,
      "learning_rate": 4.0604099813525036e-06,
      "loss": 0.0105,
      "step": 1822820
    },
    {
      "epoch": 2.983117639742608,
      "grad_norm": 0.86381995677948,
      "learning_rate": 4.060344089138986e-06,
      "loss": 0.0122,
      "step": 1822840
    },
    {
      "epoch": 2.983150370181261,
      "grad_norm": 0.2744314968585968,
      "learning_rate": 4.06027819692547e-06,
      "loss": 0.0187,
      "step": 1822860
    },
    {
      "epoch": 2.9831831006199145,
      "grad_norm": 0.5292660593986511,
      "learning_rate": 4.060212304711953e-06,
      "loss": 0.0184,
      "step": 1822880
    },
    {
      "epoch": 2.9832158310585677,
      "grad_norm": 0.17280744016170502,
      "learning_rate": 4.060146412498435e-06,
      "loss": 0.0153,
      "step": 1822900
    },
    {
      "epoch": 2.9832485614972213,
      "grad_norm": 0.02890383079648018,
      "learning_rate": 4.060080520284918e-06,
      "loss": 0.0083,
      "step": 1822920
    },
    {
      "epoch": 2.9832812919358744,
      "grad_norm": 0.0761890634894371,
      "learning_rate": 4.060014628071401e-06,
      "loss": 0.01,
      "step": 1822940
    },
    {
      "epoch": 2.983314022374528,
      "grad_norm": 0.19155500829219818,
      "learning_rate": 4.059948735857884e-06,
      "loss": 0.0099,
      "step": 1822960
    },
    {
      "epoch": 2.983346752813181,
      "grad_norm": 0.4806410074234009,
      "learning_rate": 4.059882843644367e-06,
      "loss": 0.0106,
      "step": 1822980
    },
    {
      "epoch": 2.9833794832518343,
      "grad_norm": 1.0364762544631958,
      "learning_rate": 4.05981695143085e-06,
      "loss": 0.0127,
      "step": 1823000
    },
    {
      "epoch": 2.983412213690488,
      "grad_norm": 0.2788075804710388,
      "learning_rate": 4.059751059217333e-06,
      "loss": 0.0156,
      "step": 1823020
    },
    {
      "epoch": 2.983444944129141,
      "grad_norm": 0.7797693014144897,
      "learning_rate": 4.059685167003815e-06,
      "loss": 0.0091,
      "step": 1823040
    },
    {
      "epoch": 2.9834776745677947,
      "grad_norm": 0.639275848865509,
      "learning_rate": 4.059619274790298e-06,
      "loss": 0.0173,
      "step": 1823060
    },
    {
      "epoch": 2.983510405006448,
      "grad_norm": 0.2238064855337143,
      "learning_rate": 4.059553382576782e-06,
      "loss": 0.0156,
      "step": 1823080
    },
    {
      "epoch": 2.9835431354451014,
      "grad_norm": 0.49682146310806274,
      "learning_rate": 4.0594874903632645e-06,
      "loss": 0.01,
      "step": 1823100
    },
    {
      "epoch": 2.9835758658837546,
      "grad_norm": 0.8026619553565979,
      "learning_rate": 4.059421598149747e-06,
      "loss": 0.0135,
      "step": 1823120
    },
    {
      "epoch": 2.9836085963224077,
      "grad_norm": 0.1966305673122406,
      "learning_rate": 4.05935570593623e-06,
      "loss": 0.0091,
      "step": 1823140
    },
    {
      "epoch": 2.9836413267610613,
      "grad_norm": 0.27642694115638733,
      "learning_rate": 4.059289813722713e-06,
      "loss": 0.0149,
      "step": 1823160
    },
    {
      "epoch": 2.9836740571997145,
      "grad_norm": 0.33313918113708496,
      "learning_rate": 4.0592239215091954e-06,
      "loss": 0.0165,
      "step": 1823180
    },
    {
      "epoch": 2.983706787638368,
      "grad_norm": 0.2936679422855377,
      "learning_rate": 4.059158029295678e-06,
      "loss": 0.0148,
      "step": 1823200
    },
    {
      "epoch": 2.983739518077021,
      "grad_norm": 0.3260834813117981,
      "learning_rate": 4.059092137082161e-06,
      "loss": 0.0095,
      "step": 1823220
    },
    {
      "epoch": 2.983772248515675,
      "grad_norm": 0.15592490136623383,
      "learning_rate": 4.059026244868644e-06,
      "loss": 0.0153,
      "step": 1823240
    },
    {
      "epoch": 2.983804978954328,
      "grad_norm": 0.15435649454593658,
      "learning_rate": 4.058960352655127e-06,
      "loss": 0.0127,
      "step": 1823260
    },
    {
      "epoch": 2.983837709392981,
      "grad_norm": 0.11820893734693527,
      "learning_rate": 4.05889446044161e-06,
      "loss": 0.0167,
      "step": 1823280
    },
    {
      "epoch": 2.9838704398316347,
      "grad_norm": 0.1998048573732376,
      "learning_rate": 4.058828568228093e-06,
      "loss": 0.0154,
      "step": 1823300
    },
    {
      "epoch": 2.983903170270288,
      "grad_norm": 0.19268232583999634,
      "learning_rate": 4.058762676014576e-06,
      "loss": 0.0138,
      "step": 1823320
    },
    {
      "epoch": 2.9839359007089414,
      "grad_norm": 0.28846582770347595,
      "learning_rate": 4.058696783801059e-06,
      "loss": 0.0153,
      "step": 1823340
    },
    {
      "epoch": 2.9839686311475946,
      "grad_norm": 0.1771571934223175,
      "learning_rate": 4.058630891587542e-06,
      "loss": 0.0132,
      "step": 1823360
    },
    {
      "epoch": 2.984001361586248,
      "grad_norm": 0.20011119544506073,
      "learning_rate": 4.0585649993740245e-06,
      "loss": 0.0139,
      "step": 1823380
    },
    {
      "epoch": 2.9840340920249013,
      "grad_norm": 0.38800686597824097,
      "learning_rate": 4.058499107160507e-06,
      "loss": 0.0069,
      "step": 1823400
    },
    {
      "epoch": 2.9840668224635545,
      "grad_norm": 0.2445390224456787,
      "learning_rate": 4.05843321494699e-06,
      "loss": 0.0123,
      "step": 1823420
    },
    {
      "epoch": 2.984099552902208,
      "grad_norm": 0.4752888083457947,
      "learning_rate": 4.058367322733473e-06,
      "loss": 0.013,
      "step": 1823440
    },
    {
      "epoch": 2.9841322833408612,
      "grad_norm": 0.15582363307476044,
      "learning_rate": 4.0583014305199555e-06,
      "loss": 0.0163,
      "step": 1823460
    },
    {
      "epoch": 2.984165013779515,
      "grad_norm": 0.38983798027038574,
      "learning_rate": 4.058235538306439e-06,
      "loss": 0.0116,
      "step": 1823480
    },
    {
      "epoch": 2.984197744218168,
      "grad_norm": 0.3697882294654846,
      "learning_rate": 4.058169646092922e-06,
      "loss": 0.0131,
      "step": 1823500
    },
    {
      "epoch": 2.9842304746568216,
      "grad_norm": 0.17185942828655243,
      "learning_rate": 4.0581037538794045e-06,
      "loss": 0.0124,
      "step": 1823520
    },
    {
      "epoch": 2.9842632050954747,
      "grad_norm": 0.28652262687683105,
      "learning_rate": 4.058037861665887e-06,
      "loss": 0.0112,
      "step": 1823540
    },
    {
      "epoch": 2.984295935534128,
      "grad_norm": 0.6546831727027893,
      "learning_rate": 4.05797196945237e-06,
      "loss": 0.0137,
      "step": 1823560
    },
    {
      "epoch": 2.9843286659727815,
      "grad_norm": 0.6416552066802979,
      "learning_rate": 4.057906077238853e-06,
      "loss": 0.0158,
      "step": 1823580
    },
    {
      "epoch": 2.9843613964114346,
      "grad_norm": 0.16020460426807404,
      "learning_rate": 4.0578401850253355e-06,
      "loss": 0.0131,
      "step": 1823600
    },
    {
      "epoch": 2.984394126850088,
      "grad_norm": 0.24054788053035736,
      "learning_rate": 4.057774292811818e-06,
      "loss": 0.0183,
      "step": 1823620
    },
    {
      "epoch": 2.9844268572887414,
      "grad_norm": 0.5777165293693542,
      "learning_rate": 4.057708400598301e-06,
      "loss": 0.0129,
      "step": 1823640
    },
    {
      "epoch": 2.984459587727395,
      "grad_norm": 0.7932609915733337,
      "learning_rate": 4.0576425083847846e-06,
      "loss": 0.0125,
      "step": 1823660
    },
    {
      "epoch": 2.984492318166048,
      "grad_norm": 0.29287493228912354,
      "learning_rate": 4.057576616171267e-06,
      "loss": 0.0151,
      "step": 1823680
    },
    {
      "epoch": 2.9845250486047012,
      "grad_norm": 0.1274116337299347,
      "learning_rate": 4.057510723957751e-06,
      "loss": 0.0118,
      "step": 1823700
    },
    {
      "epoch": 2.984557779043355,
      "grad_norm": 0.8478858470916748,
      "learning_rate": 4.057444831744234e-06,
      "loss": 0.0129,
      "step": 1823720
    },
    {
      "epoch": 2.984590509482008,
      "grad_norm": 0.5906266570091248,
      "learning_rate": 4.057378939530716e-06,
      "loss": 0.014,
      "step": 1823740
    },
    {
      "epoch": 2.984623239920661,
      "grad_norm": 0.25557762384414673,
      "learning_rate": 4.057313047317199e-06,
      "loss": 0.0202,
      "step": 1823760
    },
    {
      "epoch": 2.9846559703593147,
      "grad_norm": 0.18204446136951447,
      "learning_rate": 4.057247155103682e-06,
      "loss": 0.0128,
      "step": 1823780
    },
    {
      "epoch": 2.9846887007979683,
      "grad_norm": 0.6087046265602112,
      "learning_rate": 4.057181262890165e-06,
      "loss": 0.0111,
      "step": 1823800
    },
    {
      "epoch": 2.9847214312366215,
      "grad_norm": 0.3565448522567749,
      "learning_rate": 4.057115370676647e-06,
      "loss": 0.0124,
      "step": 1823820
    },
    {
      "epoch": 2.9847541616752746,
      "grad_norm": 0.29596197605133057,
      "learning_rate": 4.05704947846313e-06,
      "loss": 0.011,
      "step": 1823840
    },
    {
      "epoch": 2.9847868921139282,
      "grad_norm": 0.7876362800598145,
      "learning_rate": 4.056983586249613e-06,
      "loss": 0.012,
      "step": 1823860
    },
    {
      "epoch": 2.9848196225525814,
      "grad_norm": 0.4391535520553589,
      "learning_rate": 4.056917694036096e-06,
      "loss": 0.0137,
      "step": 1823880
    },
    {
      "epoch": 2.9848523529912345,
      "grad_norm": 0.3260049819946289,
      "learning_rate": 4.056851801822579e-06,
      "loss": 0.0109,
      "step": 1823900
    },
    {
      "epoch": 2.984885083429888,
      "grad_norm": 0.5490807890892029,
      "learning_rate": 4.056785909609062e-06,
      "loss": 0.0112,
      "step": 1823920
    },
    {
      "epoch": 2.9849178138685417,
      "grad_norm": 0.6097089648246765,
      "learning_rate": 4.056720017395545e-06,
      "loss": 0.0107,
      "step": 1823940
    },
    {
      "epoch": 2.984950544307195,
      "grad_norm": 0.31271079182624817,
      "learning_rate": 4.056654125182027e-06,
      "loss": 0.0142,
      "step": 1823960
    },
    {
      "epoch": 2.984983274745848,
      "grad_norm": 0.24764038622379303,
      "learning_rate": 4.05658823296851e-06,
      "loss": 0.0173,
      "step": 1823980
    },
    {
      "epoch": 2.9850160051845016,
      "grad_norm": 0.15427114069461823,
      "learning_rate": 4.056522340754993e-06,
      "loss": 0.0133,
      "step": 1824000
    },
    {
      "epoch": 2.9850487356231548,
      "grad_norm": 0.2290635108947754,
      "learning_rate": 4.056456448541476e-06,
      "loss": 0.011,
      "step": 1824020
    },
    {
      "epoch": 2.985081466061808,
      "grad_norm": 0.4625815451145172,
      "learning_rate": 4.056390556327959e-06,
      "loss": 0.0111,
      "step": 1824040
    },
    {
      "epoch": 2.9851141965004615,
      "grad_norm": 0.4135371148586273,
      "learning_rate": 4.056324664114442e-06,
      "loss": 0.0135,
      "step": 1824060
    },
    {
      "epoch": 2.985146926939115,
      "grad_norm": 0.17351269721984863,
      "learning_rate": 4.056258771900925e-06,
      "loss": 0.0141,
      "step": 1824080
    },
    {
      "epoch": 2.9851796573777682,
      "grad_norm": 0.260118305683136,
      "learning_rate": 4.056192879687408e-06,
      "loss": 0.0145,
      "step": 1824100
    },
    {
      "epoch": 2.9852123878164214,
      "grad_norm": 0.2867409586906433,
      "learning_rate": 4.056126987473891e-06,
      "loss": 0.0126,
      "step": 1824120
    },
    {
      "epoch": 2.985245118255075,
      "grad_norm": 0.1707703173160553,
      "learning_rate": 4.056061095260374e-06,
      "loss": 0.0128,
      "step": 1824140
    },
    {
      "epoch": 2.985277848693728,
      "grad_norm": 0.9145499467849731,
      "learning_rate": 4.0559952030468565e-06,
      "loss": 0.0181,
      "step": 1824160
    },
    {
      "epoch": 2.9853105791323813,
      "grad_norm": 0.2464679777622223,
      "learning_rate": 4.055929310833339e-06,
      "loss": 0.012,
      "step": 1824180
    },
    {
      "epoch": 2.985343309571035,
      "grad_norm": 0.15334664285182953,
      "learning_rate": 4.055863418619822e-06,
      "loss": 0.0113,
      "step": 1824200
    },
    {
      "epoch": 2.9853760400096885,
      "grad_norm": 0.20766179263591766,
      "learning_rate": 4.055797526406305e-06,
      "loss": 0.0132,
      "step": 1824220
    },
    {
      "epoch": 2.9854087704483416,
      "grad_norm": 0.2933107614517212,
      "learning_rate": 4.055731634192787e-06,
      "loss": 0.0143,
      "step": 1824240
    },
    {
      "epoch": 2.985441500886995,
      "grad_norm": 0.33600348234176636,
      "learning_rate": 4.05566574197927e-06,
      "loss": 0.0136,
      "step": 1824260
    },
    {
      "epoch": 2.9854742313256484,
      "grad_norm": 0.2942391633987427,
      "learning_rate": 4.055599849765754e-06,
      "loss": 0.0074,
      "step": 1824280
    },
    {
      "epoch": 2.9855069617643015,
      "grad_norm": 0.26700475811958313,
      "learning_rate": 4.0555339575522365e-06,
      "loss": 0.0074,
      "step": 1824300
    },
    {
      "epoch": 2.9855396922029547,
      "grad_norm": 0.20800825953483582,
      "learning_rate": 4.055468065338719e-06,
      "loss": 0.0134,
      "step": 1824320
    },
    {
      "epoch": 2.9855724226416083,
      "grad_norm": 0.346944659948349,
      "learning_rate": 4.055402173125202e-06,
      "loss": 0.0136,
      "step": 1824340
    },
    {
      "epoch": 2.9856051530802614,
      "grad_norm": 0.20642422139644623,
      "learning_rate": 4.055336280911685e-06,
      "loss": 0.0118,
      "step": 1824360
    },
    {
      "epoch": 2.985637883518915,
      "grad_norm": 1.1160975694656372,
      "learning_rate": 4.055270388698168e-06,
      "loss": 0.0164,
      "step": 1824380
    },
    {
      "epoch": 2.985670613957568,
      "grad_norm": 0.23962126672267914,
      "learning_rate": 4.055204496484651e-06,
      "loss": 0.017,
      "step": 1824400
    },
    {
      "epoch": 2.9857033443962218,
      "grad_norm": 0.09370562434196472,
      "learning_rate": 4.055138604271134e-06,
      "loss": 0.0121,
      "step": 1824420
    },
    {
      "epoch": 2.985736074834875,
      "grad_norm": 0.33639854192733765,
      "learning_rate": 4.0550727120576165e-06,
      "loss": 0.0107,
      "step": 1824440
    },
    {
      "epoch": 2.985768805273528,
      "grad_norm": 0.16747036576271057,
      "learning_rate": 4.055006819844099e-06,
      "loss": 0.013,
      "step": 1824460
    },
    {
      "epoch": 2.9858015357121817,
      "grad_norm": 0.09635631740093231,
      "learning_rate": 4.054940927630582e-06,
      "loss": 0.0143,
      "step": 1824480
    },
    {
      "epoch": 2.985834266150835,
      "grad_norm": 0.09661691635847092,
      "learning_rate": 4.0548750354170656e-06,
      "loss": 0.0102,
      "step": 1824500
    },
    {
      "epoch": 2.9858669965894884,
      "grad_norm": 0.21364957094192505,
      "learning_rate": 4.054809143203548e-06,
      "loss": 0.0213,
      "step": 1824520
    },
    {
      "epoch": 2.9858997270281415,
      "grad_norm": 0.20383600890636444,
      "learning_rate": 4.054743250990031e-06,
      "loss": 0.0068,
      "step": 1824540
    },
    {
      "epoch": 2.985932457466795,
      "grad_norm": 0.16456344723701477,
      "learning_rate": 4.054677358776514e-06,
      "loss": 0.0137,
      "step": 1824560
    },
    {
      "epoch": 2.9859651879054483,
      "grad_norm": 0.7393693327903748,
      "learning_rate": 4.0546114665629965e-06,
      "loss": 0.0124,
      "step": 1824580
    },
    {
      "epoch": 2.9859979183441014,
      "grad_norm": 0.22574812173843384,
      "learning_rate": 4.054545574349479e-06,
      "loss": 0.0104,
      "step": 1824600
    },
    {
      "epoch": 2.986030648782755,
      "grad_norm": 0.20427455008029938,
      "learning_rate": 4.054479682135962e-06,
      "loss": 0.0183,
      "step": 1824620
    },
    {
      "epoch": 2.986063379221408,
      "grad_norm": 0.4113937318325043,
      "learning_rate": 4.054413789922445e-06,
      "loss": 0.0112,
      "step": 1824640
    },
    {
      "epoch": 2.986096109660062,
      "grad_norm": 0.10697346925735474,
      "learning_rate": 4.0543478977089275e-06,
      "loss": 0.0105,
      "step": 1824660
    },
    {
      "epoch": 2.986128840098715,
      "grad_norm": 0.5174089074134827,
      "learning_rate": 4.054282005495411e-06,
      "loss": 0.0097,
      "step": 1824680
    },
    {
      "epoch": 2.9861615705373685,
      "grad_norm": 0.3465329706668854,
      "learning_rate": 4.054216113281894e-06,
      "loss": 0.0089,
      "step": 1824700
    },
    {
      "epoch": 2.9861943009760217,
      "grad_norm": 0.31255194544792175,
      "learning_rate": 4.0541502210683766e-06,
      "loss": 0.0093,
      "step": 1824720
    },
    {
      "epoch": 2.986227031414675,
      "grad_norm": 0.22371652722358704,
      "learning_rate": 4.05408432885486e-06,
      "loss": 0.0151,
      "step": 1824740
    },
    {
      "epoch": 2.9862597618533284,
      "grad_norm": 0.23191431164741516,
      "learning_rate": 4.054018436641343e-06,
      "loss": 0.0129,
      "step": 1824760
    },
    {
      "epoch": 2.9862924922919816,
      "grad_norm": 1.0875238180160522,
      "learning_rate": 4.053952544427826e-06,
      "loss": 0.0185,
      "step": 1824780
    },
    {
      "epoch": 2.986325222730635,
      "grad_norm": 0.5053040981292725,
      "learning_rate": 4.053886652214308e-06,
      "loss": 0.0115,
      "step": 1824800
    },
    {
      "epoch": 2.9863579531692883,
      "grad_norm": 0.5002503395080566,
      "learning_rate": 4.053820760000791e-06,
      "loss": 0.014,
      "step": 1824820
    },
    {
      "epoch": 2.986390683607942,
      "grad_norm": 0.2882176637649536,
      "learning_rate": 4.053754867787274e-06,
      "loss": 0.0125,
      "step": 1824840
    },
    {
      "epoch": 2.986423414046595,
      "grad_norm": 0.11261111497879028,
      "learning_rate": 4.053688975573757e-06,
      "loss": 0.0123,
      "step": 1824860
    },
    {
      "epoch": 2.986456144485248,
      "grad_norm": 0.6156505346298218,
      "learning_rate": 4.053623083360239e-06,
      "loss": 0.0173,
      "step": 1824880
    },
    {
      "epoch": 2.986488874923902,
      "grad_norm": 0.12352380156517029,
      "learning_rate": 4.053557191146723e-06,
      "loss": 0.0138,
      "step": 1824900
    },
    {
      "epoch": 2.986521605362555,
      "grad_norm": 0.5417686104774475,
      "learning_rate": 4.053491298933206e-06,
      "loss": 0.012,
      "step": 1824920
    },
    {
      "epoch": 2.9865543358012085,
      "grad_norm": 0.16694188117980957,
      "learning_rate": 4.053425406719688e-06,
      "loss": 0.0134,
      "step": 1824940
    },
    {
      "epoch": 2.9865870662398617,
      "grad_norm": 0.22882893681526184,
      "learning_rate": 4.053359514506171e-06,
      "loss": 0.0107,
      "step": 1824960
    },
    {
      "epoch": 2.9866197966785153,
      "grad_norm": 0.3891335129737854,
      "learning_rate": 4.053293622292654e-06,
      "loss": 0.0113,
      "step": 1824980
    },
    {
      "epoch": 2.9866525271171684,
      "grad_norm": 0.21935781836509705,
      "learning_rate": 4.053227730079137e-06,
      "loss": 0.0123,
      "step": 1825000
    },
    {
      "epoch": 2.9866852575558216,
      "grad_norm": 0.25635337829589844,
      "learning_rate": 4.053161837865619e-06,
      "loss": 0.0142,
      "step": 1825020
    },
    {
      "epoch": 2.986717987994475,
      "grad_norm": 0.6435047388076782,
      "learning_rate": 4.053095945652102e-06,
      "loss": 0.0095,
      "step": 1825040
    },
    {
      "epoch": 2.9867507184331283,
      "grad_norm": 0.3038892149925232,
      "learning_rate": 4.053030053438585e-06,
      "loss": 0.0097,
      "step": 1825060
    },
    {
      "epoch": 2.986783448871782,
      "grad_norm": 0.10144372284412384,
      "learning_rate": 4.052964161225068e-06,
      "loss": 0.0121,
      "step": 1825080
    },
    {
      "epoch": 2.986816179310435,
      "grad_norm": 1.6550495624542236,
      "learning_rate": 4.052898269011551e-06,
      "loss": 0.0138,
      "step": 1825100
    },
    {
      "epoch": 2.9868489097490887,
      "grad_norm": 0.05317721515893936,
      "learning_rate": 4.052832376798035e-06,
      "loss": 0.0207,
      "step": 1825120
    },
    {
      "epoch": 2.986881640187742,
      "grad_norm": 0.2141777127981186,
      "learning_rate": 4.0527664845845175e-06,
      "loss": 0.0172,
      "step": 1825140
    },
    {
      "epoch": 2.986914370626395,
      "grad_norm": 0.33164533972740173,
      "learning_rate": 4.052700592371e-06,
      "loss": 0.0123,
      "step": 1825160
    },
    {
      "epoch": 2.9869471010650486,
      "grad_norm": 0.17090797424316406,
      "learning_rate": 4.052634700157483e-06,
      "loss": 0.0106,
      "step": 1825180
    },
    {
      "epoch": 2.9869798315037017,
      "grad_norm": 0.28316977620124817,
      "learning_rate": 4.052568807943966e-06,
      "loss": 0.0127,
      "step": 1825200
    },
    {
      "epoch": 2.9870125619423553,
      "grad_norm": 0.7001391053199768,
      "learning_rate": 4.0525029157304484e-06,
      "loss": 0.011,
      "step": 1825220
    },
    {
      "epoch": 2.9870452923810085,
      "grad_norm": 0.486709862947464,
      "learning_rate": 4.052437023516931e-06,
      "loss": 0.0146,
      "step": 1825240
    },
    {
      "epoch": 2.987078022819662,
      "grad_norm": 0.2470730096101761,
      "learning_rate": 4.052371131303414e-06,
      "loss": 0.0085,
      "step": 1825260
    },
    {
      "epoch": 2.987110753258315,
      "grad_norm": 0.6169056296348572,
      "learning_rate": 4.052305239089897e-06,
      "loss": 0.0135,
      "step": 1825280
    },
    {
      "epoch": 2.9871434836969684,
      "grad_norm": 0.3186657130718231,
      "learning_rate": 4.05223934687638e-06,
      "loss": 0.0096,
      "step": 1825300
    },
    {
      "epoch": 2.987176214135622,
      "grad_norm": 0.11760028451681137,
      "learning_rate": 4.052173454662863e-06,
      "loss": 0.0139,
      "step": 1825320
    },
    {
      "epoch": 2.987208944574275,
      "grad_norm": 0.3419165015220642,
      "learning_rate": 4.052107562449346e-06,
      "loss": 0.0162,
      "step": 1825340
    },
    {
      "epoch": 2.9872416750129283,
      "grad_norm": 0.23447859287261963,
      "learning_rate": 4.0520416702358285e-06,
      "loss": 0.0106,
      "step": 1825360
    },
    {
      "epoch": 2.987274405451582,
      "grad_norm": 0.8166794180870056,
      "learning_rate": 4.051975778022311e-06,
      "loss": 0.0155,
      "step": 1825380
    },
    {
      "epoch": 2.9873071358902354,
      "grad_norm": 0.6093191504478455,
      "learning_rate": 4.051909885808794e-06,
      "loss": 0.0118,
      "step": 1825400
    },
    {
      "epoch": 2.9873398663288886,
      "grad_norm": 1.1909117698669434,
      "learning_rate": 4.051843993595277e-06,
      "loss": 0.008,
      "step": 1825420
    },
    {
      "epoch": 2.9873725967675417,
      "grad_norm": 0.16205725073814392,
      "learning_rate": 4.05177810138176e-06,
      "loss": 0.0145,
      "step": 1825440
    },
    {
      "epoch": 2.9874053272061953,
      "grad_norm": 0.07615572959184647,
      "learning_rate": 4.051712209168243e-06,
      "loss": 0.0139,
      "step": 1825460
    },
    {
      "epoch": 2.9874380576448485,
      "grad_norm": 0.6963377594947815,
      "learning_rate": 4.051646316954726e-06,
      "loss": 0.0108,
      "step": 1825480
    },
    {
      "epoch": 2.9874707880835016,
      "grad_norm": 0.09136773645877838,
      "learning_rate": 4.0515804247412085e-06,
      "loss": 0.0148,
      "step": 1825500
    },
    {
      "epoch": 2.9875035185221552,
      "grad_norm": 0.33086302876472473,
      "learning_rate": 4.051514532527692e-06,
      "loss": 0.0133,
      "step": 1825520
    },
    {
      "epoch": 2.987536248960809,
      "grad_norm": 0.622490644454956,
      "learning_rate": 4.051448640314175e-06,
      "loss": 0.0125,
      "step": 1825540
    },
    {
      "epoch": 2.987568979399462,
      "grad_norm": 0.10334846377372742,
      "learning_rate": 4.0513827481006576e-06,
      "loss": 0.0116,
      "step": 1825560
    },
    {
      "epoch": 2.987601709838115,
      "grad_norm": 0.2458772361278534,
      "learning_rate": 4.05131685588714e-06,
      "loss": 0.0093,
      "step": 1825580
    },
    {
      "epoch": 2.9876344402767687,
      "grad_norm": 0.1972568929195404,
      "learning_rate": 4.051250963673623e-06,
      "loss": 0.0147,
      "step": 1825600
    },
    {
      "epoch": 2.987667170715422,
      "grad_norm": 0.32435664534568787,
      "learning_rate": 4.051185071460106e-06,
      "loss": 0.0135,
      "step": 1825620
    },
    {
      "epoch": 2.987699901154075,
      "grad_norm": 0.3749598562717438,
      "learning_rate": 4.0511191792465885e-06,
      "loss": 0.0101,
      "step": 1825640
    },
    {
      "epoch": 2.9877326315927286,
      "grad_norm": 0.37895941734313965,
      "learning_rate": 4.051053287033071e-06,
      "loss": 0.0103,
      "step": 1825660
    },
    {
      "epoch": 2.987765362031382,
      "grad_norm": 0.2760273516178131,
      "learning_rate": 4.050987394819554e-06,
      "loss": 0.0146,
      "step": 1825680
    },
    {
      "epoch": 2.9877980924700354,
      "grad_norm": 0.41568806767463684,
      "learning_rate": 4.050921502606038e-06,
      "loss": 0.0119,
      "step": 1825700
    },
    {
      "epoch": 2.9878308229086885,
      "grad_norm": 0.3208782374858856,
      "learning_rate": 4.05085561039252e-06,
      "loss": 0.0133,
      "step": 1825720
    },
    {
      "epoch": 2.987863553347342,
      "grad_norm": 0.07211555540561676,
      "learning_rate": 4.050789718179003e-06,
      "loss": 0.0117,
      "step": 1825740
    },
    {
      "epoch": 2.9878962837859953,
      "grad_norm": 0.10444159805774689,
      "learning_rate": 4.050723825965486e-06,
      "loss": 0.0134,
      "step": 1825760
    },
    {
      "epoch": 2.9879290142246484,
      "grad_norm": 0.5671256184577942,
      "learning_rate": 4.0506579337519685e-06,
      "loss": 0.0088,
      "step": 1825780
    },
    {
      "epoch": 2.987961744663302,
      "grad_norm": 0.6445857882499695,
      "learning_rate": 4.050592041538452e-06,
      "loss": 0.0158,
      "step": 1825800
    },
    {
      "epoch": 2.9879944751019556,
      "grad_norm": 0.1802569329738617,
      "learning_rate": 4.050526149324935e-06,
      "loss": 0.0152,
      "step": 1825820
    },
    {
      "epoch": 2.9880272055406087,
      "grad_norm": 0.18763533234596252,
      "learning_rate": 4.050460257111418e-06,
      "loss": 0.0137,
      "step": 1825840
    },
    {
      "epoch": 2.988059935979262,
      "grad_norm": 0.10597369819879532,
      "learning_rate": 4.0503943648979e-06,
      "loss": 0.0089,
      "step": 1825860
    },
    {
      "epoch": 2.9880926664179155,
      "grad_norm": 0.8642940521240234,
      "learning_rate": 4.050328472684383e-06,
      "loss": 0.0104,
      "step": 1825880
    },
    {
      "epoch": 2.9881253968565686,
      "grad_norm": 0.026296336203813553,
      "learning_rate": 4.050262580470866e-06,
      "loss": 0.0106,
      "step": 1825900
    },
    {
      "epoch": 2.988158127295222,
      "grad_norm": 0.18187157809734344,
      "learning_rate": 4.050196688257349e-06,
      "loss": 0.0122,
      "step": 1825920
    },
    {
      "epoch": 2.9881908577338754,
      "grad_norm": 0.424873024225235,
      "learning_rate": 4.050130796043832e-06,
      "loss": 0.0196,
      "step": 1825940
    },
    {
      "epoch": 2.9882235881725285,
      "grad_norm": 0.855472207069397,
      "learning_rate": 4.050064903830315e-06,
      "loss": 0.0167,
      "step": 1825960
    },
    {
      "epoch": 2.988256318611182,
      "grad_norm": 0.19669866561889648,
      "learning_rate": 4.049999011616798e-06,
      "loss": 0.0126,
      "step": 1825980
    },
    {
      "epoch": 2.9882890490498353,
      "grad_norm": 0.3821769058704376,
      "learning_rate": 4.04993311940328e-06,
      "loss": 0.0084,
      "step": 1826000
    },
    {
      "epoch": 2.988321779488489,
      "grad_norm": 0.20298060774803162,
      "learning_rate": 4.049867227189763e-06,
      "loss": 0.0126,
      "step": 1826020
    },
    {
      "epoch": 2.988354509927142,
      "grad_norm": 0.9355258941650391,
      "learning_rate": 4.049801334976246e-06,
      "loss": 0.0123,
      "step": 1826040
    },
    {
      "epoch": 2.988387240365795,
      "grad_norm": 0.2039029896259308,
      "learning_rate": 4.049735442762729e-06,
      "loss": 0.0132,
      "step": 1826060
    },
    {
      "epoch": 2.9884199708044488,
      "grad_norm": 0.37875866889953613,
      "learning_rate": 4.049669550549211e-06,
      "loss": 0.0158,
      "step": 1826080
    },
    {
      "epoch": 2.988452701243102,
      "grad_norm": 0.16292428970336914,
      "learning_rate": 4.049603658335695e-06,
      "loss": 0.0176,
      "step": 1826100
    },
    {
      "epoch": 2.9884854316817555,
      "grad_norm": 0.13522905111312866,
      "learning_rate": 4.049537766122178e-06,
      "loss": 0.017,
      "step": 1826120
    },
    {
      "epoch": 2.9885181621204087,
      "grad_norm": 0.41843852400779724,
      "learning_rate": 4.04947187390866e-06,
      "loss": 0.0129,
      "step": 1826140
    },
    {
      "epoch": 2.9885508925590623,
      "grad_norm": 0.2600345015525818,
      "learning_rate": 4.049405981695144e-06,
      "loss": 0.0135,
      "step": 1826160
    },
    {
      "epoch": 2.9885836229977154,
      "grad_norm": 0.16359922289848328,
      "learning_rate": 4.049340089481627e-06,
      "loss": 0.0109,
      "step": 1826180
    },
    {
      "epoch": 2.9886163534363686,
      "grad_norm": 0.4108530879020691,
      "learning_rate": 4.0492741972681095e-06,
      "loss": 0.0124,
      "step": 1826200
    },
    {
      "epoch": 2.988649083875022,
      "grad_norm": 0.09928988665342331,
      "learning_rate": 4.049208305054592e-06,
      "loss": 0.0115,
      "step": 1826220
    },
    {
      "epoch": 2.9886818143136753,
      "grad_norm": 0.10400345176458359,
      "learning_rate": 4.049142412841075e-06,
      "loss": 0.0125,
      "step": 1826240
    },
    {
      "epoch": 2.988714544752329,
      "grad_norm": 0.5664816498756409,
      "learning_rate": 4.049076520627558e-06,
      "loss": 0.0135,
      "step": 1826260
    },
    {
      "epoch": 2.988747275190982,
      "grad_norm": 0.4427448809146881,
      "learning_rate": 4.04901062841404e-06,
      "loss": 0.0136,
      "step": 1826280
    },
    {
      "epoch": 2.9887800056296356,
      "grad_norm": 0.43913593888282776,
      "learning_rate": 4.048944736200523e-06,
      "loss": 0.014,
      "step": 1826300
    },
    {
      "epoch": 2.988812736068289,
      "grad_norm": 0.6812916994094849,
      "learning_rate": 4.048878843987007e-06,
      "loss": 0.0102,
      "step": 1826320
    },
    {
      "epoch": 2.988845466506942,
      "grad_norm": 0.11817022413015366,
      "learning_rate": 4.0488129517734895e-06,
      "loss": 0.0176,
      "step": 1826340
    },
    {
      "epoch": 2.9888781969455955,
      "grad_norm": 0.1644107848405838,
      "learning_rate": 4.048747059559972e-06,
      "loss": 0.0107,
      "step": 1826360
    },
    {
      "epoch": 2.9889109273842487,
      "grad_norm": 0.3195008933544159,
      "learning_rate": 4.048681167346455e-06,
      "loss": 0.0117,
      "step": 1826380
    },
    {
      "epoch": 2.9889436578229023,
      "grad_norm": 0.27325722575187683,
      "learning_rate": 4.048615275132938e-06,
      "loss": 0.0104,
      "step": 1826400
    },
    {
      "epoch": 2.9889763882615554,
      "grad_norm": 1.1251717805862427,
      "learning_rate": 4.0485493829194204e-06,
      "loss": 0.0151,
      "step": 1826420
    },
    {
      "epoch": 2.989009118700209,
      "grad_norm": 0.21734970808029175,
      "learning_rate": 4.048483490705903e-06,
      "loss": 0.0146,
      "step": 1826440
    },
    {
      "epoch": 2.989041849138862,
      "grad_norm": 0.5331710577011108,
      "learning_rate": 4.048417598492386e-06,
      "loss": 0.0096,
      "step": 1826460
    },
    {
      "epoch": 2.9890745795775153,
      "grad_norm": 1.22161066532135,
      "learning_rate": 4.0483517062788695e-06,
      "loss": 0.0171,
      "step": 1826480
    },
    {
      "epoch": 2.989107310016169,
      "grad_norm": 0.10739414393901825,
      "learning_rate": 4.048285814065352e-06,
      "loss": 0.0169,
      "step": 1826500
    },
    {
      "epoch": 2.989140040454822,
      "grad_norm": 0.4228024482727051,
      "learning_rate": 4.048219921851835e-06,
      "loss": 0.0104,
      "step": 1826520
    },
    {
      "epoch": 2.9891727708934757,
      "grad_norm": 0.34379905462265015,
      "learning_rate": 4.0481540296383186e-06,
      "loss": 0.0091,
      "step": 1826540
    },
    {
      "epoch": 2.989205501332129,
      "grad_norm": 0.4060730040073395,
      "learning_rate": 4.048088137424801e-06,
      "loss": 0.0147,
      "step": 1826560
    },
    {
      "epoch": 2.9892382317707824,
      "grad_norm": 0.5422187447547913,
      "learning_rate": 4.048022245211284e-06,
      "loss": 0.0109,
      "step": 1826580
    },
    {
      "epoch": 2.9892709622094356,
      "grad_norm": 0.3322436809539795,
      "learning_rate": 4.047956352997767e-06,
      "loss": 0.0132,
      "step": 1826600
    },
    {
      "epoch": 2.9893036926480887,
      "grad_norm": 0.3920140266418457,
      "learning_rate": 4.0478904607842495e-06,
      "loss": 0.0112,
      "step": 1826620
    },
    {
      "epoch": 2.9893364230867423,
      "grad_norm": 1.0193430185317993,
      "learning_rate": 4.047824568570732e-06,
      "loss": 0.0116,
      "step": 1826640
    },
    {
      "epoch": 2.9893691535253955,
      "grad_norm": 0.2274191528558731,
      "learning_rate": 4.047758676357215e-06,
      "loss": 0.0171,
      "step": 1826660
    },
    {
      "epoch": 2.989401883964049,
      "grad_norm": 0.2464049905538559,
      "learning_rate": 4.047692784143698e-06,
      "loss": 0.0119,
      "step": 1826680
    },
    {
      "epoch": 2.989434614402702,
      "grad_norm": 0.4028857946395874,
      "learning_rate": 4.0476268919301805e-06,
      "loss": 0.0127,
      "step": 1826700
    },
    {
      "epoch": 2.989467344841356,
      "grad_norm": 0.42168232798576355,
      "learning_rate": 4.047560999716664e-06,
      "loss": 0.0085,
      "step": 1826720
    },
    {
      "epoch": 2.989500075280009,
      "grad_norm": 0.10327587276697159,
      "learning_rate": 4.047495107503147e-06,
      "loss": 0.0131,
      "step": 1826740
    },
    {
      "epoch": 2.989532805718662,
      "grad_norm": 0.16781695187091827,
      "learning_rate": 4.0474292152896296e-06,
      "loss": 0.0133,
      "step": 1826760
    },
    {
      "epoch": 2.9895655361573157,
      "grad_norm": 0.6415258049964905,
      "learning_rate": 4.047363323076112e-06,
      "loss": 0.0148,
      "step": 1826780
    },
    {
      "epoch": 2.989598266595969,
      "grad_norm": 0.6947892904281616,
      "learning_rate": 4.047297430862595e-06,
      "loss": 0.0164,
      "step": 1826800
    },
    {
      "epoch": 2.989630997034622,
      "grad_norm": 0.23545119166374207,
      "learning_rate": 4.047231538649078e-06,
      "loss": 0.0235,
      "step": 1826820
    },
    {
      "epoch": 2.9896637274732756,
      "grad_norm": 0.15801894664764404,
      "learning_rate": 4.047165646435561e-06,
      "loss": 0.017,
      "step": 1826840
    },
    {
      "epoch": 2.989696457911929,
      "grad_norm": 0.11458403617143631,
      "learning_rate": 4.047099754222044e-06,
      "loss": 0.0112,
      "step": 1826860
    },
    {
      "epoch": 2.9897291883505823,
      "grad_norm": 0.21799176931381226,
      "learning_rate": 4.047033862008527e-06,
      "loss": 0.0139,
      "step": 1826880
    },
    {
      "epoch": 2.9897619187892355,
      "grad_norm": 0.2596508264541626,
      "learning_rate": 4.04696796979501e-06,
      "loss": 0.0081,
      "step": 1826900
    },
    {
      "epoch": 2.989794649227889,
      "grad_norm": 0.8013526797294617,
      "learning_rate": 4.046902077581492e-06,
      "loss": 0.0159,
      "step": 1826920
    },
    {
      "epoch": 2.989827379666542,
      "grad_norm": 0.4173179268836975,
      "learning_rate": 4.046836185367976e-06,
      "loss": 0.012,
      "step": 1826940
    },
    {
      "epoch": 2.9898601101051954,
      "grad_norm": 0.7655555009841919,
      "learning_rate": 4.046770293154459e-06,
      "loss": 0.018,
      "step": 1826960
    },
    {
      "epoch": 2.989892840543849,
      "grad_norm": 0.32980725169181824,
      "learning_rate": 4.046704400940941e-06,
      "loss": 0.0071,
      "step": 1826980
    },
    {
      "epoch": 2.9899255709825026,
      "grad_norm": 0.15413078665733337,
      "learning_rate": 4.046638508727424e-06,
      "loss": 0.0151,
      "step": 1827000
    },
    {
      "epoch": 2.9899583014211557,
      "grad_norm": 0.1174510195851326,
      "learning_rate": 4.046572616513907e-06,
      "loss": 0.0099,
      "step": 1827020
    },
    {
      "epoch": 2.989991031859809,
      "grad_norm": 0.13412970304489136,
      "learning_rate": 4.04650672430039e-06,
      "loss": 0.0094,
      "step": 1827040
    },
    {
      "epoch": 2.9900237622984625,
      "grad_norm": 0.4961579442024231,
      "learning_rate": 4.046440832086872e-06,
      "loss": 0.0108,
      "step": 1827060
    },
    {
      "epoch": 2.9900564927371156,
      "grad_norm": 0.22845059633255005,
      "learning_rate": 4.046374939873355e-06,
      "loss": 0.0121,
      "step": 1827080
    },
    {
      "epoch": 2.9900892231757688,
      "grad_norm": 0.17964155972003937,
      "learning_rate": 4.046309047659838e-06,
      "loss": 0.0118,
      "step": 1827100
    },
    {
      "epoch": 2.9901219536144223,
      "grad_norm": 0.406186044216156,
      "learning_rate": 4.046243155446321e-06,
      "loss": 0.0191,
      "step": 1827120
    },
    {
      "epoch": 2.990154684053076,
      "grad_norm": 0.17240016162395477,
      "learning_rate": 4.046177263232804e-06,
      "loss": 0.0099,
      "step": 1827140
    },
    {
      "epoch": 2.990187414491729,
      "grad_norm": 0.3240184783935547,
      "learning_rate": 4.046111371019287e-06,
      "loss": 0.0099,
      "step": 1827160
    },
    {
      "epoch": 2.9902201449303822,
      "grad_norm": 0.4637742340564728,
      "learning_rate": 4.04604547880577e-06,
      "loss": 0.0107,
      "step": 1827180
    },
    {
      "epoch": 2.990252875369036,
      "grad_norm": 0.2690310478210449,
      "learning_rate": 4.045979586592253e-06,
      "loss": 0.0131,
      "step": 1827200
    },
    {
      "epoch": 2.990285605807689,
      "grad_norm": 0.22757843136787415,
      "learning_rate": 4.045913694378736e-06,
      "loss": 0.0139,
      "step": 1827220
    },
    {
      "epoch": 2.990318336246342,
      "grad_norm": 0.2600742280483246,
      "learning_rate": 4.045847802165219e-06,
      "loss": 0.0178,
      "step": 1827240
    },
    {
      "epoch": 2.9903510666849957,
      "grad_norm": 0.6868214011192322,
      "learning_rate": 4.0457819099517014e-06,
      "loss": 0.0123,
      "step": 1827260
    },
    {
      "epoch": 2.9903837971236493,
      "grad_norm": 0.29252877831459045,
      "learning_rate": 4.045716017738184e-06,
      "loss": 0.0169,
      "step": 1827280
    },
    {
      "epoch": 2.9904165275623025,
      "grad_norm": 0.14953026175498962,
      "learning_rate": 4.045650125524667e-06,
      "loss": 0.0143,
      "step": 1827300
    },
    {
      "epoch": 2.9904492580009556,
      "grad_norm": 0.7892537713050842,
      "learning_rate": 4.04558423331115e-06,
      "loss": 0.0174,
      "step": 1827320
    },
    {
      "epoch": 2.990481988439609,
      "grad_norm": 0.9885402917861938,
      "learning_rate": 4.045518341097633e-06,
      "loss": 0.0206,
      "step": 1827340
    },
    {
      "epoch": 2.9905147188782624,
      "grad_norm": 0.06185387447476387,
      "learning_rate": 4.045452448884116e-06,
      "loss": 0.008,
      "step": 1827360
    },
    {
      "epoch": 2.9905474493169155,
      "grad_norm": 0.20526434481143951,
      "learning_rate": 4.045386556670599e-06,
      "loss": 0.0108,
      "step": 1827380
    },
    {
      "epoch": 2.990580179755569,
      "grad_norm": 0.16423456370830536,
      "learning_rate": 4.0453206644570815e-06,
      "loss": 0.0142,
      "step": 1827400
    },
    {
      "epoch": 2.9906129101942223,
      "grad_norm": 0.33684149384498596,
      "learning_rate": 4.045254772243564e-06,
      "loss": 0.0142,
      "step": 1827420
    },
    {
      "epoch": 2.990645640632876,
      "grad_norm": 0.30089759826660156,
      "learning_rate": 4.045188880030047e-06,
      "loss": 0.0112,
      "step": 1827440
    },
    {
      "epoch": 2.990678371071529,
      "grad_norm": 0.26619917154312134,
      "learning_rate": 4.04512298781653e-06,
      "loss": 0.0102,
      "step": 1827460
    },
    {
      "epoch": 2.9907111015101826,
      "grad_norm": 0.28518831729888916,
      "learning_rate": 4.0450570956030124e-06,
      "loss": 0.0123,
      "step": 1827480
    },
    {
      "epoch": 2.9907438319488358,
      "grad_norm": 0.13109220564365387,
      "learning_rate": 4.044991203389495e-06,
      "loss": 0.0146,
      "step": 1827500
    },
    {
      "epoch": 2.990776562387489,
      "grad_norm": 0.10578446835279465,
      "learning_rate": 4.044925311175979e-06,
      "loss": 0.0221,
      "step": 1827520
    },
    {
      "epoch": 2.9908092928261425,
      "grad_norm": 0.14220720529556274,
      "learning_rate": 4.0448594189624615e-06,
      "loss": 0.0151,
      "step": 1827540
    },
    {
      "epoch": 2.9908420232647956,
      "grad_norm": 0.1933683305978775,
      "learning_rate": 4.044793526748944e-06,
      "loss": 0.019,
      "step": 1827560
    },
    {
      "epoch": 2.9908747537034492,
      "grad_norm": 0.09712941199541092,
      "learning_rate": 4.044727634535428e-06,
      "loss": 0.0118,
      "step": 1827580
    },
    {
      "epoch": 2.9909074841421024,
      "grad_norm": 1.2990326881408691,
      "learning_rate": 4.0446617423219106e-06,
      "loss": 0.0172,
      "step": 1827600
    },
    {
      "epoch": 2.990940214580756,
      "grad_norm": 0.36815986037254333,
      "learning_rate": 4.044595850108393e-06,
      "loss": 0.0134,
      "step": 1827620
    },
    {
      "epoch": 2.990972945019409,
      "grad_norm": 0.5368801355361938,
      "learning_rate": 4.044529957894876e-06,
      "loss": 0.0169,
      "step": 1827640
    },
    {
      "epoch": 2.9910056754580623,
      "grad_norm": 0.07782953232526779,
      "learning_rate": 4.044464065681359e-06,
      "loss": 0.0078,
      "step": 1827660
    },
    {
      "epoch": 2.991038405896716,
      "grad_norm": 0.1441696286201477,
      "learning_rate": 4.0443981734678415e-06,
      "loss": 0.0124,
      "step": 1827680
    },
    {
      "epoch": 2.991071136335369,
      "grad_norm": 0.10208802670240402,
      "learning_rate": 4.044332281254324e-06,
      "loss": 0.0087,
      "step": 1827700
    },
    {
      "epoch": 2.9911038667740226,
      "grad_norm": 0.5840904712677002,
      "learning_rate": 4.044266389040807e-06,
      "loss": 0.0126,
      "step": 1827720
    },
    {
      "epoch": 2.9911365972126758,
      "grad_norm": 0.1971060037612915,
      "learning_rate": 4.044200496827291e-06,
      "loss": 0.0128,
      "step": 1827740
    },
    {
      "epoch": 2.9911693276513294,
      "grad_norm": 0.38846126198768616,
      "learning_rate": 4.044134604613773e-06,
      "loss": 0.0104,
      "step": 1827760
    },
    {
      "epoch": 2.9912020580899825,
      "grad_norm": 0.12457045167684555,
      "learning_rate": 4.044068712400256e-06,
      "loss": 0.0092,
      "step": 1827780
    },
    {
      "epoch": 2.9912347885286357,
      "grad_norm": 0.24944479763507843,
      "learning_rate": 4.044002820186739e-06,
      "loss": 0.0102,
      "step": 1827800
    },
    {
      "epoch": 2.9912675189672893,
      "grad_norm": 0.5557823181152344,
      "learning_rate": 4.0439369279732215e-06,
      "loss": 0.0192,
      "step": 1827820
    },
    {
      "epoch": 2.9913002494059424,
      "grad_norm": 0.16874094307422638,
      "learning_rate": 4.043871035759704e-06,
      "loss": 0.0167,
      "step": 1827840
    },
    {
      "epoch": 2.991332979844596,
      "grad_norm": 0.5985658168792725,
      "learning_rate": 4.043805143546187e-06,
      "loss": 0.0098,
      "step": 1827860
    },
    {
      "epoch": 2.991365710283249,
      "grad_norm": 0.1817290186882019,
      "learning_rate": 4.04373925133267e-06,
      "loss": 0.0148,
      "step": 1827880
    },
    {
      "epoch": 2.9913984407219028,
      "grad_norm": 0.09669084846973419,
      "learning_rate": 4.043673359119153e-06,
      "loss": 0.0081,
      "step": 1827900
    },
    {
      "epoch": 2.991431171160556,
      "grad_norm": 0.10124202072620392,
      "learning_rate": 4.043607466905636e-06,
      "loss": 0.0157,
      "step": 1827920
    },
    {
      "epoch": 2.991463901599209,
      "grad_norm": 0.3316936194896698,
      "learning_rate": 4.043541574692119e-06,
      "loss": 0.0133,
      "step": 1827940
    },
    {
      "epoch": 2.9914966320378626,
      "grad_norm": 0.08438292890787125,
      "learning_rate": 4.043475682478602e-06,
      "loss": 0.0141,
      "step": 1827960
    },
    {
      "epoch": 2.991529362476516,
      "grad_norm": 0.13453510403633118,
      "learning_rate": 4.043409790265085e-06,
      "loss": 0.0179,
      "step": 1827980
    },
    {
      "epoch": 2.9915620929151694,
      "grad_norm": 0.17344598472118378,
      "learning_rate": 4.043343898051568e-06,
      "loss": 0.0119,
      "step": 1828000
    },
    {
      "epoch": 2.9915948233538225,
      "grad_norm": 0.4276147186756134,
      "learning_rate": 4.043278005838051e-06,
      "loss": 0.0163,
      "step": 1828020
    },
    {
      "epoch": 2.991627553792476,
      "grad_norm": 0.12055805325508118,
      "learning_rate": 4.043212113624533e-06,
      "loss": 0.0121,
      "step": 1828040
    },
    {
      "epoch": 2.9916602842311293,
      "grad_norm": 0.29203280806541443,
      "learning_rate": 4.043146221411016e-06,
      "loss": 0.0123,
      "step": 1828060
    },
    {
      "epoch": 2.9916930146697824,
      "grad_norm": 0.17980603873729706,
      "learning_rate": 4.043080329197499e-06,
      "loss": 0.0252,
      "step": 1828080
    },
    {
      "epoch": 2.991725745108436,
      "grad_norm": 0.08485796302556992,
      "learning_rate": 4.043014436983982e-06,
      "loss": 0.0087,
      "step": 1828100
    },
    {
      "epoch": 2.991758475547089,
      "grad_norm": 0.8690175414085388,
      "learning_rate": 4.042948544770464e-06,
      "loss": 0.0157,
      "step": 1828120
    },
    {
      "epoch": 2.9917912059857428,
      "grad_norm": 0.5305155515670776,
      "learning_rate": 4.042882652556948e-06,
      "loss": 0.0142,
      "step": 1828140
    },
    {
      "epoch": 2.991823936424396,
      "grad_norm": 0.34345465898513794,
      "learning_rate": 4.042816760343431e-06,
      "loss": 0.0105,
      "step": 1828160
    },
    {
      "epoch": 2.9918566668630495,
      "grad_norm": 0.4983421564102173,
      "learning_rate": 4.042750868129913e-06,
      "loss": 0.0189,
      "step": 1828180
    },
    {
      "epoch": 2.9918893973017027,
      "grad_norm": 0.3033961057662964,
      "learning_rate": 4.042684975916396e-06,
      "loss": 0.0114,
      "step": 1828200
    },
    {
      "epoch": 2.991922127740356,
      "grad_norm": 0.28157755732536316,
      "learning_rate": 4.042619083702879e-06,
      "loss": 0.0075,
      "step": 1828220
    },
    {
      "epoch": 2.9919548581790094,
      "grad_norm": 0.5053398609161377,
      "learning_rate": 4.042553191489362e-06,
      "loss": 0.0136,
      "step": 1828240
    },
    {
      "epoch": 2.9919875886176626,
      "grad_norm": 0.23211196064949036,
      "learning_rate": 4.042487299275845e-06,
      "loss": 0.0118,
      "step": 1828260
    },
    {
      "epoch": 2.992020319056316,
      "grad_norm": 0.04686189070343971,
      "learning_rate": 4.042421407062328e-06,
      "loss": 0.0092,
      "step": 1828280
    },
    {
      "epoch": 2.9920530494949693,
      "grad_norm": 0.6019389629364014,
      "learning_rate": 4.042355514848811e-06,
      "loss": 0.0136,
      "step": 1828300
    },
    {
      "epoch": 2.992085779933623,
      "grad_norm": 0.8915169835090637,
      "learning_rate": 4.0422896226352934e-06,
      "loss": 0.0197,
      "step": 1828320
    },
    {
      "epoch": 2.992118510372276,
      "grad_norm": 0.25930356979370117,
      "learning_rate": 4.042223730421776e-06,
      "loss": 0.012,
      "step": 1828340
    },
    {
      "epoch": 2.992151240810929,
      "grad_norm": 1.8455227613449097,
      "learning_rate": 4.04215783820826e-06,
      "loss": 0.013,
      "step": 1828360
    },
    {
      "epoch": 2.992183971249583,
      "grad_norm": 0.405005544424057,
      "learning_rate": 4.0420919459947425e-06,
      "loss": 0.0119,
      "step": 1828380
    },
    {
      "epoch": 2.992216701688236,
      "grad_norm": 0.2643601894378662,
      "learning_rate": 4.042026053781225e-06,
      "loss": 0.0117,
      "step": 1828400
    },
    {
      "epoch": 2.992249432126889,
      "grad_norm": 0.33286789059638977,
      "learning_rate": 4.041960161567708e-06,
      "loss": 0.0124,
      "step": 1828420
    },
    {
      "epoch": 2.9922821625655427,
      "grad_norm": 1.7820276021957397,
      "learning_rate": 4.041894269354191e-06,
      "loss": 0.0153,
      "step": 1828440
    },
    {
      "epoch": 2.9923148930041963,
      "grad_norm": 0.5491428375244141,
      "learning_rate": 4.0418283771406734e-06,
      "loss": 0.0153,
      "step": 1828460
    },
    {
      "epoch": 2.9923476234428494,
      "grad_norm": 0.13568733632564545,
      "learning_rate": 4.041762484927156e-06,
      "loss": 0.0204,
      "step": 1828480
    },
    {
      "epoch": 2.9923803538815026,
      "grad_norm": 0.1758263260126114,
      "learning_rate": 4.041696592713639e-06,
      "loss": 0.0149,
      "step": 1828500
    },
    {
      "epoch": 2.992413084320156,
      "grad_norm": 1.096847414970398,
      "learning_rate": 4.041630700500122e-06,
      "loss": 0.0175,
      "step": 1828520
    },
    {
      "epoch": 2.9924458147588093,
      "grad_norm": 0.5986827611923218,
      "learning_rate": 4.041564808286605e-06,
      "loss": 0.0097,
      "step": 1828540
    },
    {
      "epoch": 2.9924785451974625,
      "grad_norm": 0.4481852948665619,
      "learning_rate": 4.041498916073088e-06,
      "loss": 0.0106,
      "step": 1828560
    },
    {
      "epoch": 2.992511275636116,
      "grad_norm": 0.21322347223758698,
      "learning_rate": 4.041433023859571e-06,
      "loss": 0.0157,
      "step": 1828580
    },
    {
      "epoch": 2.9925440060747697,
      "grad_norm": 0.19100354611873627,
      "learning_rate": 4.041367131646054e-06,
      "loss": 0.0142,
      "step": 1828600
    },
    {
      "epoch": 2.992576736513423,
      "grad_norm": 0.4533817172050476,
      "learning_rate": 4.041301239432537e-06,
      "loss": 0.0111,
      "step": 1828620
    },
    {
      "epoch": 2.992609466952076,
      "grad_norm": 0.23151849210262299,
      "learning_rate": 4.04123534721902e-06,
      "loss": 0.0146,
      "step": 1828640
    },
    {
      "epoch": 2.9926421973907296,
      "grad_norm": 0.4042724072933197,
      "learning_rate": 4.0411694550055025e-06,
      "loss": 0.0086,
      "step": 1828660
    },
    {
      "epoch": 2.9926749278293827,
      "grad_norm": 0.8595430254936218,
      "learning_rate": 4.041103562791985e-06,
      "loss": 0.0095,
      "step": 1828680
    },
    {
      "epoch": 2.992707658268036,
      "grad_norm": 0.1628781408071518,
      "learning_rate": 4.041037670578468e-06,
      "loss": 0.0116,
      "step": 1828700
    },
    {
      "epoch": 2.9927403887066895,
      "grad_norm": 0.1430625319480896,
      "learning_rate": 4.040971778364951e-06,
      "loss": 0.0138,
      "step": 1828720
    },
    {
      "epoch": 2.992773119145343,
      "grad_norm": 0.18651337921619415,
      "learning_rate": 4.0409058861514335e-06,
      "loss": 0.0138,
      "step": 1828740
    },
    {
      "epoch": 2.992805849583996,
      "grad_norm": 0.4161907136440277,
      "learning_rate": 4.040839993937917e-06,
      "loss": 0.0164,
      "step": 1828760
    },
    {
      "epoch": 2.9928385800226494,
      "grad_norm": 0.45325520634651184,
      "learning_rate": 4.0407741017244e-06,
      "loss": 0.0157,
      "step": 1828780
    },
    {
      "epoch": 2.992871310461303,
      "grad_norm": 0.33311691880226135,
      "learning_rate": 4.0407082095108826e-06,
      "loss": 0.0139,
      "step": 1828800
    },
    {
      "epoch": 2.992904040899956,
      "grad_norm": 0.4437718093395233,
      "learning_rate": 4.040642317297365e-06,
      "loss": 0.0145,
      "step": 1828820
    },
    {
      "epoch": 2.9929367713386092,
      "grad_norm": 0.39722543954849243,
      "learning_rate": 4.040576425083848e-06,
      "loss": 0.0117,
      "step": 1828840
    },
    {
      "epoch": 2.992969501777263,
      "grad_norm": 0.41293564438819885,
      "learning_rate": 4.040510532870331e-06,
      "loss": 0.0067,
      "step": 1828860
    },
    {
      "epoch": 2.993002232215916,
      "grad_norm": 0.5591164827346802,
      "learning_rate": 4.0404446406568135e-06,
      "loss": 0.0259,
      "step": 1828880
    },
    {
      "epoch": 2.9930349626545696,
      "grad_norm": 0.4929773807525635,
      "learning_rate": 4.040378748443296e-06,
      "loss": 0.0112,
      "step": 1828900
    },
    {
      "epoch": 2.9930676930932227,
      "grad_norm": 0.17414745688438416,
      "learning_rate": 4.040312856229779e-06,
      "loss": 0.012,
      "step": 1828920
    },
    {
      "epoch": 2.9931004235318763,
      "grad_norm": 0.44658198952674866,
      "learning_rate": 4.040246964016263e-06,
      "loss": 0.0163,
      "step": 1828940
    },
    {
      "epoch": 2.9931331539705295,
      "grad_norm": 0.646594226360321,
      "learning_rate": 4.040181071802745e-06,
      "loss": 0.0124,
      "step": 1828960
    },
    {
      "epoch": 2.9931658844091826,
      "grad_norm": 1.5097146034240723,
      "learning_rate": 4.040115179589229e-06,
      "loss": 0.0114,
      "step": 1828980
    },
    {
      "epoch": 2.9931986148478362,
      "grad_norm": 0.15279890596866608,
      "learning_rate": 4.040049287375712e-06,
      "loss": 0.0115,
      "step": 1829000
    },
    {
      "epoch": 2.9932313452864894,
      "grad_norm": 0.1433127224445343,
      "learning_rate": 4.039983395162194e-06,
      "loss": 0.0088,
      "step": 1829020
    },
    {
      "epoch": 2.993264075725143,
      "grad_norm": 0.10510992258787155,
      "learning_rate": 4.039917502948677e-06,
      "loss": 0.0139,
      "step": 1829040
    },
    {
      "epoch": 2.993296806163796,
      "grad_norm": 0.4055863618850708,
      "learning_rate": 4.03985161073516e-06,
      "loss": 0.015,
      "step": 1829060
    },
    {
      "epoch": 2.9933295366024497,
      "grad_norm": 0.49618345499038696,
      "learning_rate": 4.039785718521643e-06,
      "loss": 0.0121,
      "step": 1829080
    },
    {
      "epoch": 2.993362267041103,
      "grad_norm": 0.32180219888687134,
      "learning_rate": 4.039719826308125e-06,
      "loss": 0.0139,
      "step": 1829100
    },
    {
      "epoch": 2.993394997479756,
      "grad_norm": 0.08040261268615723,
      "learning_rate": 4.039653934094608e-06,
      "loss": 0.0111,
      "step": 1829120
    },
    {
      "epoch": 2.9934277279184096,
      "grad_norm": 0.3164448142051697,
      "learning_rate": 4.039588041881091e-06,
      "loss": 0.0154,
      "step": 1829140
    },
    {
      "epoch": 2.9934604583570628,
      "grad_norm": 0.14314182102680206,
      "learning_rate": 4.039522149667574e-06,
      "loss": 0.0091,
      "step": 1829160
    },
    {
      "epoch": 2.9934931887957164,
      "grad_norm": 0.569290280342102,
      "learning_rate": 4.039456257454057e-06,
      "loss": 0.0183,
      "step": 1829180
    },
    {
      "epoch": 2.9935259192343695,
      "grad_norm": 0.28052157163619995,
      "learning_rate": 4.03939036524054e-06,
      "loss": 0.0105,
      "step": 1829200
    },
    {
      "epoch": 2.993558649673023,
      "grad_norm": 0.3755739629268646,
      "learning_rate": 4.039324473027023e-06,
      "loss": 0.0164,
      "step": 1829220
    },
    {
      "epoch": 2.9935913801116762,
      "grad_norm": 0.115486741065979,
      "learning_rate": 4.039258580813505e-06,
      "loss": 0.0129,
      "step": 1829240
    },
    {
      "epoch": 2.9936241105503294,
      "grad_norm": 0.21106386184692383,
      "learning_rate": 4.039192688599988e-06,
      "loss": 0.0103,
      "step": 1829260
    },
    {
      "epoch": 2.993656840988983,
      "grad_norm": 0.2917384207248688,
      "learning_rate": 4.039126796386471e-06,
      "loss": 0.0121,
      "step": 1829280
    },
    {
      "epoch": 2.993689571427636,
      "grad_norm": 0.15351328253746033,
      "learning_rate": 4.0390609041729544e-06,
      "loss": 0.0175,
      "step": 1829300
    },
    {
      "epoch": 2.9937223018662897,
      "grad_norm": 0.608204185962677,
      "learning_rate": 4.038995011959437e-06,
      "loss": 0.0181,
      "step": 1829320
    },
    {
      "epoch": 2.993755032304943,
      "grad_norm": 0.179181307554245,
      "learning_rate": 4.03892911974592e-06,
      "loss": 0.0098,
      "step": 1829340
    },
    {
      "epoch": 2.9937877627435965,
      "grad_norm": 0.29888996481895447,
      "learning_rate": 4.038863227532403e-06,
      "loss": 0.0108,
      "step": 1829360
    },
    {
      "epoch": 2.9938204931822496,
      "grad_norm": 0.3876825273036957,
      "learning_rate": 4.038797335318886e-06,
      "loss": 0.0142,
      "step": 1829380
    },
    {
      "epoch": 2.993853223620903,
      "grad_norm": 0.1904909312725067,
      "learning_rate": 4.038731443105369e-06,
      "loss": 0.0142,
      "step": 1829400
    },
    {
      "epoch": 2.9938859540595564,
      "grad_norm": 0.3384942412376404,
      "learning_rate": 4.038665550891852e-06,
      "loss": 0.01,
      "step": 1829420
    },
    {
      "epoch": 2.9939186844982095,
      "grad_norm": 0.15532980859279633,
      "learning_rate": 4.0385996586783345e-06,
      "loss": 0.0127,
      "step": 1829440
    },
    {
      "epoch": 2.993951414936863,
      "grad_norm": 0.48441219329833984,
      "learning_rate": 4.038533766464817e-06,
      "loss": 0.0111,
      "step": 1829460
    },
    {
      "epoch": 2.9939841453755163,
      "grad_norm": 0.3327777683734894,
      "learning_rate": 4.0384678742513e-06,
      "loss": 0.0145,
      "step": 1829480
    },
    {
      "epoch": 2.99401687581417,
      "grad_norm": 0.14439107477664948,
      "learning_rate": 4.038401982037783e-06,
      "loss": 0.0138,
      "step": 1829500
    },
    {
      "epoch": 2.994049606252823,
      "grad_norm": 0.1826523244380951,
      "learning_rate": 4.0383360898242654e-06,
      "loss": 0.0097,
      "step": 1829520
    },
    {
      "epoch": 2.994082336691476,
      "grad_norm": 0.1475030481815338,
      "learning_rate": 4.038270197610748e-06,
      "loss": 0.011,
      "step": 1829540
    },
    {
      "epoch": 2.9941150671301298,
      "grad_norm": 0.2324737310409546,
      "learning_rate": 4.038204305397232e-06,
      "loss": 0.0085,
      "step": 1829560
    },
    {
      "epoch": 2.994147797568783,
      "grad_norm": 0.2658803462982178,
      "learning_rate": 4.0381384131837145e-06,
      "loss": 0.0105,
      "step": 1829580
    },
    {
      "epoch": 2.9941805280074365,
      "grad_norm": 0.1834973692893982,
      "learning_rate": 4.038072520970197e-06,
      "loss": 0.0099,
      "step": 1829600
    },
    {
      "epoch": 2.9942132584460897,
      "grad_norm": 0.26705437898635864,
      "learning_rate": 4.03800662875668e-06,
      "loss": 0.0134,
      "step": 1829620
    },
    {
      "epoch": 2.9942459888847432,
      "grad_norm": 0.2846337854862213,
      "learning_rate": 4.037940736543163e-06,
      "loss": 0.0099,
      "step": 1829640
    },
    {
      "epoch": 2.9942787193233964,
      "grad_norm": 0.5011658668518066,
      "learning_rate": 4.037874844329646e-06,
      "loss": 0.0146,
      "step": 1829660
    },
    {
      "epoch": 2.9943114497620495,
      "grad_norm": 0.06615381687879562,
      "learning_rate": 4.037808952116129e-06,
      "loss": 0.0142,
      "step": 1829680
    },
    {
      "epoch": 2.994344180200703,
      "grad_norm": 0.13266967236995697,
      "learning_rate": 4.037743059902612e-06,
      "loss": 0.0123,
      "step": 1829700
    },
    {
      "epoch": 2.9943769106393563,
      "grad_norm": 0.3502114713191986,
      "learning_rate": 4.0376771676890945e-06,
      "loss": 0.0139,
      "step": 1829720
    },
    {
      "epoch": 2.99440964107801,
      "grad_norm": 0.17518092691898346,
      "learning_rate": 4.037611275475577e-06,
      "loss": 0.0123,
      "step": 1829740
    },
    {
      "epoch": 2.994442371516663,
      "grad_norm": 0.3572525978088379,
      "learning_rate": 4.03754538326206e-06,
      "loss": 0.0115,
      "step": 1829760
    },
    {
      "epoch": 2.9944751019553166,
      "grad_norm": 0.4120355248451233,
      "learning_rate": 4.037479491048544e-06,
      "loss": 0.0156,
      "step": 1829780
    },
    {
      "epoch": 2.99450783239397,
      "grad_norm": 0.20283637940883636,
      "learning_rate": 4.037413598835026e-06,
      "loss": 0.0145,
      "step": 1829800
    },
    {
      "epoch": 2.994540562832623,
      "grad_norm": 0.17077210545539856,
      "learning_rate": 4.037347706621509e-06,
      "loss": 0.0102,
      "step": 1829820
    },
    {
      "epoch": 2.9945732932712765,
      "grad_norm": 0.13582664728164673,
      "learning_rate": 4.037281814407992e-06,
      "loss": 0.0093,
      "step": 1829840
    },
    {
      "epoch": 2.9946060237099297,
      "grad_norm": 0.38237905502319336,
      "learning_rate": 4.0372159221944745e-06,
      "loss": 0.0214,
      "step": 1829860
    },
    {
      "epoch": 2.994638754148583,
      "grad_norm": 0.3718082904815674,
      "learning_rate": 4.037150029980957e-06,
      "loss": 0.0113,
      "step": 1829880
    },
    {
      "epoch": 2.9946714845872364,
      "grad_norm": 0.4550894796848297,
      "learning_rate": 4.03708413776744e-06,
      "loss": 0.0176,
      "step": 1829900
    },
    {
      "epoch": 2.99470421502589,
      "grad_norm": 0.3326462507247925,
      "learning_rate": 4.037018245553923e-06,
      "loss": 0.016,
      "step": 1829920
    },
    {
      "epoch": 2.994736945464543,
      "grad_norm": 0.23167367279529572,
      "learning_rate": 4.0369523533404055e-06,
      "loss": 0.0152,
      "step": 1829940
    },
    {
      "epoch": 2.9947696759031963,
      "grad_norm": 0.31315889954566956,
      "learning_rate": 4.036886461126889e-06,
      "loss": 0.0127,
      "step": 1829960
    },
    {
      "epoch": 2.99480240634185,
      "grad_norm": 0.675480306148529,
      "learning_rate": 4.036820568913372e-06,
      "loss": 0.0087,
      "step": 1829980
    },
    {
      "epoch": 2.994835136780503,
      "grad_norm": 0.3002796769142151,
      "learning_rate": 4.0367546766998546e-06,
      "loss": 0.0123,
      "step": 1830000
    },
    {
      "epoch": 2.994867867219156,
      "grad_norm": 0.5419713258743286,
      "learning_rate": 4.036688784486338e-06,
      "loss": 0.0084,
      "step": 1830020
    },
    {
      "epoch": 2.99490059765781,
      "grad_norm": 0.2197502702474594,
      "learning_rate": 4.036622892272821e-06,
      "loss": 0.0116,
      "step": 1830040
    },
    {
      "epoch": 2.9949333280964634,
      "grad_norm": 0.16626107692718506,
      "learning_rate": 4.036557000059304e-06,
      "loss": 0.0214,
      "step": 1830060
    },
    {
      "epoch": 2.9949660585351165,
      "grad_norm": 0.5983031988143921,
      "learning_rate": 4.036491107845786e-06,
      "loss": 0.0098,
      "step": 1830080
    },
    {
      "epoch": 2.9949987889737697,
      "grad_norm": 0.442779004573822,
      "learning_rate": 4.036425215632269e-06,
      "loss": 0.0142,
      "step": 1830100
    },
    {
      "epoch": 2.9950315194124233,
      "grad_norm": 0.6449417471885681,
      "learning_rate": 4.036359323418752e-06,
      "loss": 0.0155,
      "step": 1830120
    },
    {
      "epoch": 2.9950642498510764,
      "grad_norm": 0.284776896238327,
      "learning_rate": 4.036293431205235e-06,
      "loss": 0.0118,
      "step": 1830140
    },
    {
      "epoch": 2.9950969802897296,
      "grad_norm": 0.15731003880500793,
      "learning_rate": 4.036227538991717e-06,
      "loss": 0.0108,
      "step": 1830160
    },
    {
      "epoch": 2.995129710728383,
      "grad_norm": 0.4357425272464752,
      "learning_rate": 4.036161646778201e-06,
      "loss": 0.0162,
      "step": 1830180
    },
    {
      "epoch": 2.995162441167037,
      "grad_norm": 0.23802979290485382,
      "learning_rate": 4.036095754564684e-06,
      "loss": 0.0115,
      "step": 1830200
    },
    {
      "epoch": 2.99519517160569,
      "grad_norm": 0.7783313989639282,
      "learning_rate": 4.036029862351166e-06,
      "loss": 0.0128,
      "step": 1830220
    },
    {
      "epoch": 2.995227902044343,
      "grad_norm": 0.5002195835113525,
      "learning_rate": 4.035963970137649e-06,
      "loss": 0.0207,
      "step": 1830240
    },
    {
      "epoch": 2.9952606324829967,
      "grad_norm": 1.087191104888916,
      "learning_rate": 4.035898077924132e-06,
      "loss": 0.0087,
      "step": 1830260
    },
    {
      "epoch": 2.99529336292165,
      "grad_norm": 0.06481199711561203,
      "learning_rate": 4.035832185710615e-06,
      "loss": 0.0152,
      "step": 1830280
    },
    {
      "epoch": 2.995326093360303,
      "grad_norm": 0.10880854725837708,
      "learning_rate": 4.035766293497097e-06,
      "loss": 0.0108,
      "step": 1830300
    },
    {
      "epoch": 2.9953588237989566,
      "grad_norm": 0.1349335014820099,
      "learning_rate": 4.03570040128358e-06,
      "loss": 0.0139,
      "step": 1830320
    },
    {
      "epoch": 2.99539155423761,
      "grad_norm": 1.507735013961792,
      "learning_rate": 4.035634509070063e-06,
      "loss": 0.0187,
      "step": 1830340
    },
    {
      "epoch": 2.9954242846762633,
      "grad_norm": 1.2662904262542725,
      "learning_rate": 4.0355686168565464e-06,
      "loss": 0.0114,
      "step": 1830360
    },
    {
      "epoch": 2.9954570151149165,
      "grad_norm": 2.1117875576019287,
      "learning_rate": 4.035502724643029e-06,
      "loss": 0.0189,
      "step": 1830380
    },
    {
      "epoch": 2.99548974555357,
      "grad_norm": 0.2981560230255127,
      "learning_rate": 4.035436832429513e-06,
      "loss": 0.0097,
      "step": 1830400
    },
    {
      "epoch": 2.995522475992223,
      "grad_norm": 0.21093061566352844,
      "learning_rate": 4.0353709402159955e-06,
      "loss": 0.0213,
      "step": 1830420
    },
    {
      "epoch": 2.9955552064308764,
      "grad_norm": 0.20978060364723206,
      "learning_rate": 4.035305048002478e-06,
      "loss": 0.0139,
      "step": 1830440
    },
    {
      "epoch": 2.99558793686953,
      "grad_norm": 0.0926850214600563,
      "learning_rate": 4.035239155788961e-06,
      "loss": 0.0099,
      "step": 1830460
    },
    {
      "epoch": 2.995620667308183,
      "grad_norm": 0.5322550535202026,
      "learning_rate": 4.035173263575444e-06,
      "loss": 0.0125,
      "step": 1830480
    },
    {
      "epoch": 2.9956533977468367,
      "grad_norm": 0.9011234641075134,
      "learning_rate": 4.0351073713619265e-06,
      "loss": 0.0112,
      "step": 1830500
    },
    {
      "epoch": 2.99568612818549,
      "grad_norm": 0.22506919503211975,
      "learning_rate": 4.035041479148409e-06,
      "loss": 0.0105,
      "step": 1830520
    },
    {
      "epoch": 2.9957188586241434,
      "grad_norm": 0.3183557689189911,
      "learning_rate": 4.034975586934892e-06,
      "loss": 0.0149,
      "step": 1830540
    },
    {
      "epoch": 2.9957515890627966,
      "grad_norm": 0.48588788509368896,
      "learning_rate": 4.034909694721375e-06,
      "loss": 0.0153,
      "step": 1830560
    },
    {
      "epoch": 2.9957843195014497,
      "grad_norm": 0.13169342279434204,
      "learning_rate": 4.034843802507858e-06,
      "loss": 0.0114,
      "step": 1830580
    },
    {
      "epoch": 2.9958170499401033,
      "grad_norm": 0.3163814842700958,
      "learning_rate": 4.034777910294341e-06,
      "loss": 0.0156,
      "step": 1830600
    },
    {
      "epoch": 2.9958497803787565,
      "grad_norm": 0.7244777679443359,
      "learning_rate": 4.034712018080824e-06,
      "loss": 0.0104,
      "step": 1830620
    },
    {
      "epoch": 2.99588251081741,
      "grad_norm": 0.3452768325805664,
      "learning_rate": 4.0346461258673065e-06,
      "loss": 0.0083,
      "step": 1830640
    },
    {
      "epoch": 2.9959152412560632,
      "grad_norm": 0.3616108000278473,
      "learning_rate": 4.034580233653789e-06,
      "loss": 0.0144,
      "step": 1830660
    },
    {
      "epoch": 2.995947971694717,
      "grad_norm": 0.3051603138446808,
      "learning_rate": 4.034514341440272e-06,
      "loss": 0.0151,
      "step": 1830680
    },
    {
      "epoch": 2.99598070213337,
      "grad_norm": 0.18691720068454742,
      "learning_rate": 4.034448449226755e-06,
      "loss": 0.0103,
      "step": 1830700
    },
    {
      "epoch": 2.996013432572023,
      "grad_norm": 0.3439836800098419,
      "learning_rate": 4.034382557013238e-06,
      "loss": 0.0122,
      "step": 1830720
    },
    {
      "epoch": 2.9960461630106767,
      "grad_norm": 0.09245381504297256,
      "learning_rate": 4.034316664799721e-06,
      "loss": 0.0086,
      "step": 1830740
    },
    {
      "epoch": 2.99607889344933,
      "grad_norm": 4.773160934448242,
      "learning_rate": 4.034250772586204e-06,
      "loss": 0.0131,
      "step": 1830760
    },
    {
      "epoch": 2.9961116238879835,
      "grad_norm": 0.4312346875667572,
      "learning_rate": 4.0341848803726865e-06,
      "loss": 0.015,
      "step": 1830780
    },
    {
      "epoch": 2.9961443543266366,
      "grad_norm": 0.6518782377243042,
      "learning_rate": 4.03411898815917e-06,
      "loss": 0.0117,
      "step": 1830800
    },
    {
      "epoch": 2.99617708476529,
      "grad_norm": 0.4489160180091858,
      "learning_rate": 4.034053095945653e-06,
      "loss": 0.0122,
      "step": 1830820
    },
    {
      "epoch": 2.9962098152039434,
      "grad_norm": 0.49191349744796753,
      "learning_rate": 4.0339872037321356e-06,
      "loss": 0.0112,
      "step": 1830840
    },
    {
      "epoch": 2.9962425456425965,
      "grad_norm": 0.4493805170059204,
      "learning_rate": 4.033921311518618e-06,
      "loss": 0.0132,
      "step": 1830860
    },
    {
      "epoch": 2.99627527608125,
      "grad_norm": 0.854245662689209,
      "learning_rate": 4.033855419305101e-06,
      "loss": 0.0108,
      "step": 1830880
    },
    {
      "epoch": 2.9963080065199033,
      "grad_norm": 0.4002753794193268,
      "learning_rate": 4.033789527091584e-06,
      "loss": 0.012,
      "step": 1830900
    },
    {
      "epoch": 2.996340736958557,
      "grad_norm": 0.20939797163009644,
      "learning_rate": 4.0337236348780665e-06,
      "loss": 0.011,
      "step": 1830920
    },
    {
      "epoch": 2.99637346739721,
      "grad_norm": 0.7327240705490112,
      "learning_rate": 4.033657742664549e-06,
      "loss": 0.0104,
      "step": 1830940
    },
    {
      "epoch": 2.9964061978358636,
      "grad_norm": 0.5617132782936096,
      "learning_rate": 4.033591850451032e-06,
      "loss": 0.0151,
      "step": 1830960
    },
    {
      "epoch": 2.9964389282745167,
      "grad_norm": 0.304840624332428,
      "learning_rate": 4.033525958237516e-06,
      "loss": 0.0076,
      "step": 1830980
    },
    {
      "epoch": 2.99647165871317,
      "grad_norm": 0.2022743970155716,
      "learning_rate": 4.033460066023998e-06,
      "loss": 0.0136,
      "step": 1831000
    },
    {
      "epoch": 2.9965043891518235,
      "grad_norm": 0.6600459218025208,
      "learning_rate": 4.033394173810481e-06,
      "loss": 0.0175,
      "step": 1831020
    },
    {
      "epoch": 2.9965371195904766,
      "grad_norm": 0.6529305577278137,
      "learning_rate": 4.033328281596964e-06,
      "loss": 0.0182,
      "step": 1831040
    },
    {
      "epoch": 2.9965698500291302,
      "grad_norm": 0.419116735458374,
      "learning_rate": 4.033262389383447e-06,
      "loss": 0.0096,
      "step": 1831060
    },
    {
      "epoch": 2.9966025804677834,
      "grad_norm": 0.08374372124671936,
      "learning_rate": 4.03319649716993e-06,
      "loss": 0.0124,
      "step": 1831080
    },
    {
      "epoch": 2.996635310906437,
      "grad_norm": 0.23442955315113068,
      "learning_rate": 4.033130604956413e-06,
      "loss": 0.0142,
      "step": 1831100
    },
    {
      "epoch": 2.99666804134509,
      "grad_norm": 0.5183407664299011,
      "learning_rate": 4.033064712742896e-06,
      "loss": 0.0128,
      "step": 1831120
    },
    {
      "epoch": 2.9967007717837433,
      "grad_norm": 0.4992654323577881,
      "learning_rate": 4.032998820529378e-06,
      "loss": 0.0138,
      "step": 1831140
    },
    {
      "epoch": 2.996733502222397,
      "grad_norm": 0.34221383929252625,
      "learning_rate": 4.032932928315861e-06,
      "loss": 0.01,
      "step": 1831160
    },
    {
      "epoch": 2.99676623266105,
      "grad_norm": 0.43678227066993713,
      "learning_rate": 4.032867036102344e-06,
      "loss": 0.0114,
      "step": 1831180
    },
    {
      "epoch": 2.9967989630997036,
      "grad_norm": 0.3274117410182953,
      "learning_rate": 4.0328011438888274e-06,
      "loss": 0.0116,
      "step": 1831200
    },
    {
      "epoch": 2.9968316935383568,
      "grad_norm": 0.1369892805814743,
      "learning_rate": 4.03273525167531e-06,
      "loss": 0.0149,
      "step": 1831220
    },
    {
      "epoch": 2.9968644239770104,
      "grad_norm": 0.5497426986694336,
      "learning_rate": 4.032669359461793e-06,
      "loss": 0.0132,
      "step": 1831240
    },
    {
      "epoch": 2.9968971544156635,
      "grad_norm": 0.6562260389328003,
      "learning_rate": 4.032603467248276e-06,
      "loss": 0.0099,
      "step": 1831260
    },
    {
      "epoch": 2.9969298848543167,
      "grad_norm": 0.056951865553855896,
      "learning_rate": 4.032537575034758e-06,
      "loss": 0.0147,
      "step": 1831280
    },
    {
      "epoch": 2.9969626152929703,
      "grad_norm": 0.07376809418201447,
      "learning_rate": 4.032471682821241e-06,
      "loss": 0.0155,
      "step": 1831300
    },
    {
      "epoch": 2.9969953457316234,
      "grad_norm": 0.35033416748046875,
      "learning_rate": 4.032405790607724e-06,
      "loss": 0.0163,
      "step": 1831320
    },
    {
      "epoch": 2.997028076170277,
      "grad_norm": 0.20555830001831055,
      "learning_rate": 4.032339898394207e-06,
      "loss": 0.0125,
      "step": 1831340
    },
    {
      "epoch": 2.99706080660893,
      "grad_norm": 0.15661092102527618,
      "learning_rate": 4.032274006180689e-06,
      "loss": 0.0131,
      "step": 1831360
    },
    {
      "epoch": 2.9970935370475837,
      "grad_norm": 0.338931143283844,
      "learning_rate": 4.032208113967173e-06,
      "loss": 0.0122,
      "step": 1831380
    },
    {
      "epoch": 2.997126267486237,
      "grad_norm": 0.2307305485010147,
      "learning_rate": 4.032142221753656e-06,
      "loss": 0.0131,
      "step": 1831400
    },
    {
      "epoch": 2.99715899792489,
      "grad_norm": 0.8374437689781189,
      "learning_rate": 4.032076329540138e-06,
      "loss": 0.0097,
      "step": 1831420
    },
    {
      "epoch": 2.9971917283635436,
      "grad_norm": 0.6685141921043396,
      "learning_rate": 4.032010437326622e-06,
      "loss": 0.0159,
      "step": 1831440
    },
    {
      "epoch": 2.997224458802197,
      "grad_norm": 2.937195301055908,
      "learning_rate": 4.031944545113105e-06,
      "loss": 0.0149,
      "step": 1831460
    },
    {
      "epoch": 2.99725718924085,
      "grad_norm": 0.18991416692733765,
      "learning_rate": 4.0318786528995875e-06,
      "loss": 0.0096,
      "step": 1831480
    },
    {
      "epoch": 2.9972899196795035,
      "grad_norm": 0.2845539152622223,
      "learning_rate": 4.03181276068607e-06,
      "loss": 0.0151,
      "step": 1831500
    },
    {
      "epoch": 2.997322650118157,
      "grad_norm": 0.4046129882335663,
      "learning_rate": 4.031746868472553e-06,
      "loss": 0.0097,
      "step": 1831520
    },
    {
      "epoch": 2.9973553805568103,
      "grad_norm": 0.8073639869689941,
      "learning_rate": 4.031680976259036e-06,
      "loss": 0.0158,
      "step": 1831540
    },
    {
      "epoch": 2.9973881109954634,
      "grad_norm": 0.26347315311431885,
      "learning_rate": 4.0316150840455184e-06,
      "loss": 0.0181,
      "step": 1831560
    },
    {
      "epoch": 2.997420841434117,
      "grad_norm": 0.5362319946289062,
      "learning_rate": 4.031549191832001e-06,
      "loss": 0.0137,
      "step": 1831580
    },
    {
      "epoch": 2.99745357187277,
      "grad_norm": 0.06699944287538528,
      "learning_rate": 4.031483299618485e-06,
      "loss": 0.011,
      "step": 1831600
    },
    {
      "epoch": 2.9974863023114233,
      "grad_norm": 0.269873708486557,
      "learning_rate": 4.0314174074049675e-06,
      "loss": 0.0103,
      "step": 1831620
    },
    {
      "epoch": 2.997519032750077,
      "grad_norm": 0.511088490486145,
      "learning_rate": 4.03135151519145e-06,
      "loss": 0.0147,
      "step": 1831640
    },
    {
      "epoch": 2.9975517631887305,
      "grad_norm": 0.3731996715068817,
      "learning_rate": 4.031285622977933e-06,
      "loss": 0.0108,
      "step": 1831660
    },
    {
      "epoch": 2.9975844936273837,
      "grad_norm": 0.3732951581478119,
      "learning_rate": 4.031219730764416e-06,
      "loss": 0.0089,
      "step": 1831680
    },
    {
      "epoch": 2.997617224066037,
      "grad_norm": 0.9499589204788208,
      "learning_rate": 4.0311538385508985e-06,
      "loss": 0.0102,
      "step": 1831700
    },
    {
      "epoch": 2.9976499545046904,
      "grad_norm": 0.5781645774841309,
      "learning_rate": 4.031087946337381e-06,
      "loss": 0.0157,
      "step": 1831720
    },
    {
      "epoch": 2.9976826849433436,
      "grad_norm": 0.3264457881450653,
      "learning_rate": 4.031022054123864e-06,
      "loss": 0.0172,
      "step": 1831740
    },
    {
      "epoch": 2.9977154153819967,
      "grad_norm": 0.13658970594406128,
      "learning_rate": 4.0309561619103475e-06,
      "loss": 0.0107,
      "step": 1831760
    },
    {
      "epoch": 2.9977481458206503,
      "grad_norm": 0.13926206529140472,
      "learning_rate": 4.03089026969683e-06,
      "loss": 0.0153,
      "step": 1831780
    },
    {
      "epoch": 2.997780876259304,
      "grad_norm": 0.3524072468280792,
      "learning_rate": 4.030824377483313e-06,
      "loss": 0.0099,
      "step": 1831800
    },
    {
      "epoch": 2.997813606697957,
      "grad_norm": 0.4242851734161377,
      "learning_rate": 4.030758485269797e-06,
      "loss": 0.0173,
      "step": 1831820
    },
    {
      "epoch": 2.99784633713661,
      "grad_norm": 0.25836995244026184,
      "learning_rate": 4.030692593056279e-06,
      "loss": 0.0139,
      "step": 1831840
    },
    {
      "epoch": 2.997879067575264,
      "grad_norm": 0.5746896862983704,
      "learning_rate": 4.030626700842762e-06,
      "loss": 0.0104,
      "step": 1831860
    },
    {
      "epoch": 2.997911798013917,
      "grad_norm": 0.524049699306488,
      "learning_rate": 4.030560808629245e-06,
      "loss": 0.0135,
      "step": 1831880
    },
    {
      "epoch": 2.99794452845257,
      "grad_norm": 0.2556280493736267,
      "learning_rate": 4.0304949164157276e-06,
      "loss": 0.0144,
      "step": 1831900
    },
    {
      "epoch": 2.9979772588912237,
      "grad_norm": 0.2516677677631378,
      "learning_rate": 4.03042902420221e-06,
      "loss": 0.0102,
      "step": 1831920
    },
    {
      "epoch": 2.998009989329877,
      "grad_norm": 0.40838971734046936,
      "learning_rate": 4.030363131988693e-06,
      "loss": 0.0187,
      "step": 1831940
    },
    {
      "epoch": 2.9980427197685304,
      "grad_norm": 1.2633767127990723,
      "learning_rate": 4.030297239775176e-06,
      "loss": 0.0179,
      "step": 1831960
    },
    {
      "epoch": 2.9980754502071836,
      "grad_norm": 0.27759987115859985,
      "learning_rate": 4.0302313475616585e-06,
      "loss": 0.0141,
      "step": 1831980
    },
    {
      "epoch": 2.998108180645837,
      "grad_norm": 0.13553501665592194,
      "learning_rate": 4.030165455348142e-06,
      "loss": 0.0115,
      "step": 1832000
    },
    {
      "epoch": 2.9981409110844903,
      "grad_norm": 0.4380134046077728,
      "learning_rate": 4.030099563134625e-06,
      "loss": 0.0111,
      "step": 1832020
    },
    {
      "epoch": 2.9981736415231435,
      "grad_norm": 0.289532870054245,
      "learning_rate": 4.030033670921108e-06,
      "loss": 0.0107,
      "step": 1832040
    },
    {
      "epoch": 2.998206371961797,
      "grad_norm": 0.5161228179931641,
      "learning_rate": 4.02996777870759e-06,
      "loss": 0.0114,
      "step": 1832060
    },
    {
      "epoch": 2.99823910240045,
      "grad_norm": 0.43454307317733765,
      "learning_rate": 4.029901886494073e-06,
      "loss": 0.0116,
      "step": 1832080
    },
    {
      "epoch": 2.998271832839104,
      "grad_norm": 0.1405036598443985,
      "learning_rate": 4.029835994280556e-06,
      "loss": 0.015,
      "step": 1832100
    },
    {
      "epoch": 2.998304563277757,
      "grad_norm": 0.5575229525566101,
      "learning_rate": 4.029770102067039e-06,
      "loss": 0.0131,
      "step": 1832120
    },
    {
      "epoch": 2.9983372937164106,
      "grad_norm": 0.09568746387958527,
      "learning_rate": 4.029704209853522e-06,
      "loss": 0.0128,
      "step": 1832140
    },
    {
      "epoch": 2.9983700241550637,
      "grad_norm": 0.49592530727386475,
      "learning_rate": 4.029638317640005e-06,
      "loss": 0.0088,
      "step": 1832160
    },
    {
      "epoch": 2.998402754593717,
      "grad_norm": 1.0953084230422974,
      "learning_rate": 4.029572425426488e-06,
      "loss": 0.014,
      "step": 1832180
    },
    {
      "epoch": 2.9984354850323705,
      "grad_norm": 1.0033854246139526,
      "learning_rate": 4.02950653321297e-06,
      "loss": 0.014,
      "step": 1832200
    },
    {
      "epoch": 2.9984682154710236,
      "grad_norm": 0.28472641110420227,
      "learning_rate": 4.029440640999454e-06,
      "loss": 0.0164,
      "step": 1832220
    },
    {
      "epoch": 2.998500945909677,
      "grad_norm": 0.23565563559532166,
      "learning_rate": 4.029374748785937e-06,
      "loss": 0.0079,
      "step": 1832240
    },
    {
      "epoch": 2.9985336763483303,
      "grad_norm": 0.26562589406967163,
      "learning_rate": 4.029308856572419e-06,
      "loss": 0.0101,
      "step": 1832260
    },
    {
      "epoch": 2.998566406786984,
      "grad_norm": 0.18171939253807068,
      "learning_rate": 4.029242964358902e-06,
      "loss": 0.0143,
      "step": 1832280
    },
    {
      "epoch": 2.998599137225637,
      "grad_norm": 0.260149210691452,
      "learning_rate": 4.029177072145385e-06,
      "loss": 0.0097,
      "step": 1832300
    },
    {
      "epoch": 2.9986318676642902,
      "grad_norm": 0.18098601698875427,
      "learning_rate": 4.029111179931868e-06,
      "loss": 0.0121,
      "step": 1832320
    },
    {
      "epoch": 2.998664598102944,
      "grad_norm": 1.1556702852249146,
      "learning_rate": 4.02904528771835e-06,
      "loss": 0.0105,
      "step": 1832340
    },
    {
      "epoch": 2.998697328541597,
      "grad_norm": 0.28763386607170105,
      "learning_rate": 4.028979395504833e-06,
      "loss": 0.0144,
      "step": 1832360
    },
    {
      "epoch": 2.9987300589802506,
      "grad_norm": 0.9133034944534302,
      "learning_rate": 4.028913503291316e-06,
      "loss": 0.0111,
      "step": 1832380
    },
    {
      "epoch": 2.9987627894189037,
      "grad_norm": 0.3924609422683716,
      "learning_rate": 4.0288476110777994e-06,
      "loss": 0.0126,
      "step": 1832400
    },
    {
      "epoch": 2.9987955198575573,
      "grad_norm": 0.2515013813972473,
      "learning_rate": 4.028781718864282e-06,
      "loss": 0.0117,
      "step": 1832420
    },
    {
      "epoch": 2.9988282502962105,
      "grad_norm": 0.11200820654630661,
      "learning_rate": 4.028715826650765e-06,
      "loss": 0.0091,
      "step": 1832440
    },
    {
      "epoch": 2.9988609807348636,
      "grad_norm": 0.23398517072200775,
      "learning_rate": 4.028649934437248e-06,
      "loss": 0.0116,
      "step": 1832460
    },
    {
      "epoch": 2.998893711173517,
      "grad_norm": 0.14702802896499634,
      "learning_rate": 4.028584042223731e-06,
      "loss": 0.0121,
      "step": 1832480
    },
    {
      "epoch": 2.9989264416121704,
      "grad_norm": 0.23526331782341003,
      "learning_rate": 4.028518150010214e-06,
      "loss": 0.011,
      "step": 1832500
    },
    {
      "epoch": 2.998959172050824,
      "grad_norm": 0.12482375651597977,
      "learning_rate": 4.028452257796697e-06,
      "loss": 0.021,
      "step": 1832520
    },
    {
      "epoch": 2.998991902489477,
      "grad_norm": 0.4872736632823944,
      "learning_rate": 4.0283863655831795e-06,
      "loss": 0.0098,
      "step": 1832540
    },
    {
      "epoch": 2.9990246329281307,
      "grad_norm": 0.16504895687103271,
      "learning_rate": 4.028320473369662e-06,
      "loss": 0.0092,
      "step": 1832560
    },
    {
      "epoch": 2.999057363366784,
      "grad_norm": 0.15200689435005188,
      "learning_rate": 4.028254581156145e-06,
      "loss": 0.0115,
      "step": 1832580
    },
    {
      "epoch": 2.999090093805437,
      "grad_norm": 0.240073561668396,
      "learning_rate": 4.028188688942628e-06,
      "loss": 0.0121,
      "step": 1832600
    },
    {
      "epoch": 2.9991228242440906,
      "grad_norm": 1.0453311204910278,
      "learning_rate": 4.028122796729111e-06,
      "loss": 0.0123,
      "step": 1832620
    },
    {
      "epoch": 2.9991555546827438,
      "grad_norm": 0.3941737413406372,
      "learning_rate": 4.028056904515594e-06,
      "loss": 0.0086,
      "step": 1832640
    },
    {
      "epoch": 2.9991882851213973,
      "grad_norm": 0.2659394443035126,
      "learning_rate": 4.027991012302077e-06,
      "loss": 0.0118,
      "step": 1832660
    },
    {
      "epoch": 2.9992210155600505,
      "grad_norm": 0.0717419907450676,
      "learning_rate": 4.0279251200885595e-06,
      "loss": 0.0139,
      "step": 1832680
    },
    {
      "epoch": 2.999253745998704,
      "grad_norm": 0.5798114538192749,
      "learning_rate": 4.027859227875042e-06,
      "loss": 0.018,
      "step": 1832700
    },
    {
      "epoch": 2.9992864764373572,
      "grad_norm": 0.5536496043205261,
      "learning_rate": 4.027793335661525e-06,
      "loss": 0.0139,
      "step": 1832720
    },
    {
      "epoch": 2.9993192068760104,
      "grad_norm": 0.17366111278533936,
      "learning_rate": 4.027727443448008e-06,
      "loss": 0.0091,
      "step": 1832740
    },
    {
      "epoch": 2.999351937314664,
      "grad_norm": 0.27472981810569763,
      "learning_rate": 4.0276615512344904e-06,
      "loss": 0.0132,
      "step": 1832760
    },
    {
      "epoch": 2.999384667753317,
      "grad_norm": 0.3675121068954468,
      "learning_rate": 4.027595659020973e-06,
      "loss": 0.02,
      "step": 1832780
    },
    {
      "epoch": 2.9994173981919707,
      "grad_norm": 0.2714650630950928,
      "learning_rate": 4.027529766807457e-06,
      "loss": 0.0073,
      "step": 1832800
    },
    {
      "epoch": 2.999450128630624,
      "grad_norm": 0.7647008299827576,
      "learning_rate": 4.0274638745939395e-06,
      "loss": 0.0114,
      "step": 1832820
    },
    {
      "epoch": 2.9994828590692775,
      "grad_norm": 0.8622974753379822,
      "learning_rate": 4.027397982380423e-06,
      "loss": 0.0119,
      "step": 1832840
    },
    {
      "epoch": 2.9995155895079306,
      "grad_norm": 0.20466136932373047,
      "learning_rate": 4.027332090166906e-06,
      "loss": 0.0125,
      "step": 1832860
    },
    {
      "epoch": 2.9995483199465838,
      "grad_norm": 0.1702132374048233,
      "learning_rate": 4.027266197953389e-06,
      "loss": 0.01,
      "step": 1832880
    },
    {
      "epoch": 2.9995810503852374,
      "grad_norm": 0.2039656937122345,
      "learning_rate": 4.027200305739871e-06,
      "loss": 0.0136,
      "step": 1832900
    },
    {
      "epoch": 2.9996137808238905,
      "grad_norm": 0.44400593638420105,
      "learning_rate": 4.027134413526354e-06,
      "loss": 0.0136,
      "step": 1832920
    },
    {
      "epoch": 2.9996465112625437,
      "grad_norm": 0.06198553368449211,
      "learning_rate": 4.027068521312837e-06,
      "loss": 0.0103,
      "step": 1832940
    },
    {
      "epoch": 2.9996792417011973,
      "grad_norm": 0.48976293206214905,
      "learning_rate": 4.0270026290993195e-06,
      "loss": 0.0183,
      "step": 1832960
    },
    {
      "epoch": 2.999711972139851,
      "grad_norm": 0.28212839365005493,
      "learning_rate": 4.026936736885802e-06,
      "loss": 0.0133,
      "step": 1832980
    },
    {
      "epoch": 2.999744702578504,
      "grad_norm": 0.3247843086719513,
      "learning_rate": 4.026870844672285e-06,
      "loss": 0.0107,
      "step": 1833000
    },
    {
      "epoch": 2.999777433017157,
      "grad_norm": 0.732522189617157,
      "learning_rate": 4.026804952458769e-06,
      "loss": 0.015,
      "step": 1833020
    },
    {
      "epoch": 2.9998101634558108,
      "grad_norm": 0.12437491118907928,
      "learning_rate": 4.026739060245251e-06,
      "loss": 0.0092,
      "step": 1833040
    },
    {
      "epoch": 2.999842893894464,
      "grad_norm": 0.2168656885623932,
      "learning_rate": 4.026673168031734e-06,
      "loss": 0.0153,
      "step": 1833060
    },
    {
      "epoch": 2.999875624333117,
      "grad_norm": 0.28681135177612305,
      "learning_rate": 4.026607275818217e-06,
      "loss": 0.0118,
      "step": 1833080
    },
    {
      "epoch": 2.9999083547717706,
      "grad_norm": 0.1625949740409851,
      "learning_rate": 4.0265413836046996e-06,
      "loss": 0.0128,
      "step": 1833100
    },
    {
      "epoch": 2.9999410852104242,
      "grad_norm": 0.41755521297454834,
      "learning_rate": 4.026475491391182e-06,
      "loss": 0.0222,
      "step": 1833120
    },
    {
      "epoch": 2.9999738156490774,
      "grad_norm": 0.16721799969673157,
      "learning_rate": 4.026409599177665e-06,
      "loss": 0.0084,
      "step": 1833140
    },
    {
      "epoch": 3.0000065460877305,
      "grad_norm": 0.25257837772369385,
      "learning_rate": 4.026343706964148e-06,
      "loss": 0.0113,
      "step": 1833160
    },
    {
      "epoch": 3.000039276526384,
      "grad_norm": 0.17059582471847534,
      "learning_rate": 4.026277814750631e-06,
      "loss": 0.015,
      "step": 1833180
    },
    {
      "epoch": 3.0000720069650373,
      "grad_norm": 0.10067356377840042,
      "learning_rate": 4.026211922537114e-06,
      "loss": 0.0098,
      "step": 1833200
    },
    {
      "epoch": 3.000104737403691,
      "grad_norm": 0.24767684936523438,
      "learning_rate": 4.026146030323597e-06,
      "loss": 0.0115,
      "step": 1833220
    },
    {
      "epoch": 3.000137467842344,
      "grad_norm": 0.2126089632511139,
      "learning_rate": 4.0260801381100804e-06,
      "loss": 0.0199,
      "step": 1833240
    },
    {
      "epoch": 3.000170198280997,
      "grad_norm": 0.13634854555130005,
      "learning_rate": 4.026014245896563e-06,
      "loss": 0.0103,
      "step": 1833260
    },
    {
      "epoch": 3.0002029287196508,
      "grad_norm": 0.6788968443870544,
      "learning_rate": 4.025948353683046e-06,
      "loss": 0.0151,
      "step": 1833280
    },
    {
      "epoch": 3.000235659158304,
      "grad_norm": 0.2383037805557251,
      "learning_rate": 4.025882461469529e-06,
      "loss": 0.01,
      "step": 1833300
    },
    {
      "epoch": 3.0002683895969575,
      "grad_norm": 0.11788219958543777,
      "learning_rate": 4.025816569256011e-06,
      "loss": 0.0198,
      "step": 1833320
    },
    {
      "epoch": 3.0003011200356107,
      "grad_norm": 0.3736536502838135,
      "learning_rate": 4.025750677042494e-06,
      "loss": 0.0177,
      "step": 1833340
    },
    {
      "epoch": 3.0003338504742643,
      "grad_norm": 0.2798103094100952,
      "learning_rate": 4.025684784828977e-06,
      "loss": 0.0169,
      "step": 1833360
    },
    {
      "epoch": 3.0003665809129174,
      "grad_norm": 0.26726922392845154,
      "learning_rate": 4.02561889261546e-06,
      "loss": 0.0111,
      "step": 1833380
    },
    {
      "epoch": 3.0003993113515706,
      "grad_norm": 0.45538726449012756,
      "learning_rate": 4.025553000401942e-06,
      "loss": 0.0131,
      "step": 1833400
    },
    {
      "epoch": 3.000432041790224,
      "grad_norm": 0.1268181949853897,
      "learning_rate": 4.025487108188426e-06,
      "loss": 0.0093,
      "step": 1833420
    },
    {
      "epoch": 3.0004647722288773,
      "grad_norm": 0.1661701202392578,
      "learning_rate": 4.025421215974909e-06,
      "loss": 0.0129,
      "step": 1833440
    },
    {
      "epoch": 3.000497502667531,
      "grad_norm": 0.2850368618965149,
      "learning_rate": 4.025355323761391e-06,
      "loss": 0.0137,
      "step": 1833460
    },
    {
      "epoch": 3.000530233106184,
      "grad_norm": 0.07921145856380463,
      "learning_rate": 4.025289431547874e-06,
      "loss": 0.0144,
      "step": 1833480
    },
    {
      "epoch": 3.0005629635448376,
      "grad_norm": 0.6702582240104675,
      "learning_rate": 4.025223539334357e-06,
      "loss": 0.0098,
      "step": 1833500
    },
    {
      "epoch": 3.000595693983491,
      "grad_norm": 0.4201146066188812,
      "learning_rate": 4.02515764712084e-06,
      "loss": 0.0208,
      "step": 1833520
    },
    {
      "epoch": 3.000628424422144,
      "grad_norm": 0.30671143531799316,
      "learning_rate": 4.025091754907323e-06,
      "loss": 0.0101,
      "step": 1833540
    },
    {
      "epoch": 3.0006611548607975,
      "grad_norm": 0.565636932849884,
      "learning_rate": 4.025025862693806e-06,
      "loss": 0.0111,
      "step": 1833560
    },
    {
      "epoch": 3.0006938852994507,
      "grad_norm": 0.047389257699251175,
      "learning_rate": 4.024959970480289e-06,
      "loss": 0.0111,
      "step": 1833580
    },
    {
      "epoch": 3.0007266157381043,
      "grad_norm": 0.18168751895427704,
      "learning_rate": 4.0248940782667714e-06,
      "loss": 0.0128,
      "step": 1833600
    },
    {
      "epoch": 3.0007593461767574,
      "grad_norm": 0.18240460753440857,
      "learning_rate": 4.024828186053254e-06,
      "loss": 0.0112,
      "step": 1833620
    },
    {
      "epoch": 3.0007920766154106,
      "grad_norm": 0.36898574233055115,
      "learning_rate": 4.024762293839738e-06,
      "loss": 0.0137,
      "step": 1833640
    },
    {
      "epoch": 3.000824807054064,
      "grad_norm": 0.15130198001861572,
      "learning_rate": 4.0246964016262205e-06,
      "loss": 0.0124,
      "step": 1833660
    },
    {
      "epoch": 3.0008575374927173,
      "grad_norm": 0.11353613436222076,
      "learning_rate": 4.024630509412703e-06,
      "loss": 0.0097,
      "step": 1833680
    },
    {
      "epoch": 3.000890267931371,
      "grad_norm": 0.29469674825668335,
      "learning_rate": 4.024564617199186e-06,
      "loss": 0.012,
      "step": 1833700
    },
    {
      "epoch": 3.000922998370024,
      "grad_norm": 0.0692140981554985,
      "learning_rate": 4.024498724985669e-06,
      "loss": 0.0104,
      "step": 1833720
    },
    {
      "epoch": 3.0009557288086777,
      "grad_norm": 0.48769378662109375,
      "learning_rate": 4.0244328327721515e-06,
      "loss": 0.0106,
      "step": 1833740
    },
    {
      "epoch": 3.000988459247331,
      "grad_norm": 0.25036197900772095,
      "learning_rate": 4.024366940558634e-06,
      "loss": 0.0079,
      "step": 1833760
    },
    {
      "epoch": 3.001021189685984,
      "grad_norm": 0.14784254133701324,
      "learning_rate": 4.024301048345117e-06,
      "loss": 0.0114,
      "step": 1833780
    },
    {
      "epoch": 3.0010539201246376,
      "grad_norm": 0.20325978100299835,
      "learning_rate": 4.0242351561316e-06,
      "loss": 0.006,
      "step": 1833800
    },
    {
      "epoch": 3.0010866505632907,
      "grad_norm": 0.3503570854663849,
      "learning_rate": 4.024169263918083e-06,
      "loss": 0.0117,
      "step": 1833820
    },
    {
      "epoch": 3.0011193810019443,
      "grad_norm": 0.22086414694786072,
      "learning_rate": 4.024103371704566e-06,
      "loss": 0.0092,
      "step": 1833840
    },
    {
      "epoch": 3.0011521114405975,
      "grad_norm": 0.5281339883804321,
      "learning_rate": 4.024037479491049e-06,
      "loss": 0.0141,
      "step": 1833860
    },
    {
      "epoch": 3.001184841879251,
      "grad_norm": 0.22029952704906464,
      "learning_rate": 4.023971587277532e-06,
      "loss": 0.0112,
      "step": 1833880
    },
    {
      "epoch": 3.001217572317904,
      "grad_norm": 0.5840945243835449,
      "learning_rate": 4.023905695064015e-06,
      "loss": 0.0124,
      "step": 1833900
    },
    {
      "epoch": 3.0012503027565574,
      "grad_norm": 0.29369792342185974,
      "learning_rate": 4.023839802850498e-06,
      "loss": 0.0081,
      "step": 1833920
    },
    {
      "epoch": 3.001283033195211,
      "grad_norm": 0.31087106466293335,
      "learning_rate": 4.0237739106369806e-06,
      "loss": 0.0071,
      "step": 1833940
    },
    {
      "epoch": 3.001315763633864,
      "grad_norm": 0.07239646464586258,
      "learning_rate": 4.023708018423463e-06,
      "loss": 0.01,
      "step": 1833960
    },
    {
      "epoch": 3.0013484940725177,
      "grad_norm": 0.17490717768669128,
      "learning_rate": 4.023642126209946e-06,
      "loss": 0.015,
      "step": 1833980
    },
    {
      "epoch": 3.001381224511171,
      "grad_norm": 0.3794824481010437,
      "learning_rate": 4.023576233996429e-06,
      "loss": 0.0087,
      "step": 1834000
    },
    {
      "epoch": 3.0014139549498244,
      "grad_norm": 0.45651042461395264,
      "learning_rate": 4.0235103417829115e-06,
      "loss": 0.0097,
      "step": 1834020
    },
    {
      "epoch": 3.0014466853884776,
      "grad_norm": 0.6247910857200623,
      "learning_rate": 4.023444449569395e-06,
      "loss": 0.0186,
      "step": 1834040
    },
    {
      "epoch": 3.0014794158271307,
      "grad_norm": 0.15614762902259827,
      "learning_rate": 4.023378557355878e-06,
      "loss": 0.0098,
      "step": 1834060
    },
    {
      "epoch": 3.0015121462657843,
      "grad_norm": 0.1415977030992508,
      "learning_rate": 4.023312665142361e-06,
      "loss": 0.0126,
      "step": 1834080
    },
    {
      "epoch": 3.0015448767044375,
      "grad_norm": 0.22472912073135376,
      "learning_rate": 4.023246772928843e-06,
      "loss": 0.0112,
      "step": 1834100
    },
    {
      "epoch": 3.001577607143091,
      "grad_norm": 0.23886099457740784,
      "learning_rate": 4.023180880715326e-06,
      "loss": 0.0121,
      "step": 1834120
    },
    {
      "epoch": 3.0016103375817442,
      "grad_norm": 0.2623845040798187,
      "learning_rate": 4.023114988501809e-06,
      "loss": 0.0074,
      "step": 1834140
    },
    {
      "epoch": 3.001643068020398,
      "grad_norm": 0.200670063495636,
      "learning_rate": 4.0230490962882915e-06,
      "loss": 0.0105,
      "step": 1834160
    },
    {
      "epoch": 3.001675798459051,
      "grad_norm": 0.13395297527313232,
      "learning_rate": 4.022983204074774e-06,
      "loss": 0.0116,
      "step": 1834180
    },
    {
      "epoch": 3.001708528897704,
      "grad_norm": 0.41080647706985474,
      "learning_rate": 4.022917311861257e-06,
      "loss": 0.0097,
      "step": 1834200
    },
    {
      "epoch": 3.0017412593363577,
      "grad_norm": 0.44586247205734253,
      "learning_rate": 4.022851419647741e-06,
      "loss": 0.0099,
      "step": 1834220
    },
    {
      "epoch": 3.001773989775011,
      "grad_norm": 0.44978001713752747,
      "learning_rate": 4.022785527434223e-06,
      "loss": 0.0131,
      "step": 1834240
    },
    {
      "epoch": 3.0018067202136645,
      "grad_norm": 0.2750394344329834,
      "learning_rate": 4.022719635220707e-06,
      "loss": 0.0064,
      "step": 1834260
    },
    {
      "epoch": 3.0018394506523176,
      "grad_norm": 0.14833614230155945,
      "learning_rate": 4.02265374300719e-06,
      "loss": 0.0144,
      "step": 1834280
    },
    {
      "epoch": 3.001872181090971,
      "grad_norm": 0.30523914098739624,
      "learning_rate": 4.022587850793672e-06,
      "loss": 0.0119,
      "step": 1834300
    },
    {
      "epoch": 3.0019049115296244,
      "grad_norm": 0.2816614508628845,
      "learning_rate": 4.022521958580155e-06,
      "loss": 0.0125,
      "step": 1834320
    },
    {
      "epoch": 3.0019376419682775,
      "grad_norm": 0.28115183115005493,
      "learning_rate": 4.022456066366638e-06,
      "loss": 0.0073,
      "step": 1834340
    },
    {
      "epoch": 3.001970372406931,
      "grad_norm": 0.20802845060825348,
      "learning_rate": 4.022390174153121e-06,
      "loss": 0.0142,
      "step": 1834360
    },
    {
      "epoch": 3.0020031028455842,
      "grad_norm": 0.27586203813552856,
      "learning_rate": 4.022324281939603e-06,
      "loss": 0.01,
      "step": 1834380
    },
    {
      "epoch": 3.002035833284238,
      "grad_norm": 0.2103743851184845,
      "learning_rate": 4.022258389726086e-06,
      "loss": 0.011,
      "step": 1834400
    },
    {
      "epoch": 3.002068563722891,
      "grad_norm": 0.3644236624240875,
      "learning_rate": 4.022192497512569e-06,
      "loss": 0.0112,
      "step": 1834420
    },
    {
      "epoch": 3.002101294161544,
      "grad_norm": 0.2657850384712219,
      "learning_rate": 4.0221266052990524e-06,
      "loss": 0.0093,
      "step": 1834440
    },
    {
      "epoch": 3.0021340246001977,
      "grad_norm": 0.26346665620803833,
      "learning_rate": 4.022060713085535e-06,
      "loss": 0.0109,
      "step": 1834460
    },
    {
      "epoch": 3.002166755038851,
      "grad_norm": 0.11027424037456512,
      "learning_rate": 4.021994820872018e-06,
      "loss": 0.0068,
      "step": 1834480
    },
    {
      "epoch": 3.0021994854775045,
      "grad_norm": 0.18650712072849274,
      "learning_rate": 4.021928928658501e-06,
      "loss": 0.0116,
      "step": 1834500
    },
    {
      "epoch": 3.0022322159161576,
      "grad_norm": 0.34317925572395325,
      "learning_rate": 4.021863036444983e-06,
      "loss": 0.0141,
      "step": 1834520
    },
    {
      "epoch": 3.0022649463548112,
      "grad_norm": 0.16184301674365997,
      "learning_rate": 4.021797144231466e-06,
      "loss": 0.0189,
      "step": 1834540
    },
    {
      "epoch": 3.0022976767934644,
      "grad_norm": 0.3049198389053345,
      "learning_rate": 4.021731252017949e-06,
      "loss": 0.013,
      "step": 1834560
    },
    {
      "epoch": 3.0023304072321175,
      "grad_norm": 0.12594982981681824,
      "learning_rate": 4.0216653598044325e-06,
      "loss": 0.0098,
      "step": 1834580
    },
    {
      "epoch": 3.002363137670771,
      "grad_norm": 0.23221373558044434,
      "learning_rate": 4.021599467590915e-06,
      "loss": 0.0155,
      "step": 1834600
    },
    {
      "epoch": 3.0023958681094243,
      "grad_norm": 0.2644160985946655,
      "learning_rate": 4.021533575377398e-06,
      "loss": 0.0097,
      "step": 1834620
    },
    {
      "epoch": 3.002428598548078,
      "grad_norm": 0.09625094383955002,
      "learning_rate": 4.021467683163881e-06,
      "loss": 0.0081,
      "step": 1834640
    },
    {
      "epoch": 3.002461328986731,
      "grad_norm": 0.1682693213224411,
      "learning_rate": 4.021401790950364e-06,
      "loss": 0.0072,
      "step": 1834660
    },
    {
      "epoch": 3.0024940594253846,
      "grad_norm": 0.48473140597343445,
      "learning_rate": 4.021335898736847e-06,
      "loss": 0.0138,
      "step": 1834680
    },
    {
      "epoch": 3.0025267898640378,
      "grad_norm": 0.6766295433044434,
      "learning_rate": 4.02127000652333e-06,
      "loss": 0.0104,
      "step": 1834700
    },
    {
      "epoch": 3.002559520302691,
      "grad_norm": 0.7170583605766296,
      "learning_rate": 4.0212041143098125e-06,
      "loss": 0.0134,
      "step": 1834720
    },
    {
      "epoch": 3.0025922507413445,
      "grad_norm": 0.27908653020858765,
      "learning_rate": 4.021138222096295e-06,
      "loss": 0.0102,
      "step": 1834740
    },
    {
      "epoch": 3.0026249811799977,
      "grad_norm": 0.4553402364253998,
      "learning_rate": 4.021072329882778e-06,
      "loss": 0.0118,
      "step": 1834760
    },
    {
      "epoch": 3.0026577116186512,
      "grad_norm": 0.10727912187576294,
      "learning_rate": 4.021006437669261e-06,
      "loss": 0.0098,
      "step": 1834780
    },
    {
      "epoch": 3.0026904420573044,
      "grad_norm": 0.12439840286970139,
      "learning_rate": 4.0209405454557435e-06,
      "loss": 0.011,
      "step": 1834800
    },
    {
      "epoch": 3.002723172495958,
      "grad_norm": 0.2659848928451538,
      "learning_rate": 4.020874653242226e-06,
      "loss": 0.009,
      "step": 1834820
    },
    {
      "epoch": 3.002755902934611,
      "grad_norm": 0.1372702568769455,
      "learning_rate": 4.02080876102871e-06,
      "loss": 0.0119,
      "step": 1834840
    },
    {
      "epoch": 3.0027886333732643,
      "grad_norm": 0.24385282397270203,
      "learning_rate": 4.0207428688151925e-06,
      "loss": 0.0116,
      "step": 1834860
    },
    {
      "epoch": 3.002821363811918,
      "grad_norm": 0.2886965572834015,
      "learning_rate": 4.020676976601675e-06,
      "loss": 0.0092,
      "step": 1834880
    },
    {
      "epoch": 3.002854094250571,
      "grad_norm": 0.16973188519477844,
      "learning_rate": 4.020611084388158e-06,
      "loss": 0.0113,
      "step": 1834900
    },
    {
      "epoch": 3.0028868246892246,
      "grad_norm": 0.4362407624721527,
      "learning_rate": 4.020545192174641e-06,
      "loss": 0.012,
      "step": 1834920
    },
    {
      "epoch": 3.002919555127878,
      "grad_norm": 0.26355162262916565,
      "learning_rate": 4.020479299961124e-06,
      "loss": 0.0159,
      "step": 1834940
    },
    {
      "epoch": 3.0029522855665314,
      "grad_norm": 0.18481998145580292,
      "learning_rate": 4.020413407747607e-06,
      "loss": 0.0102,
      "step": 1834960
    },
    {
      "epoch": 3.0029850160051845,
      "grad_norm": 0.2000817209482193,
      "learning_rate": 4.02034751553409e-06,
      "loss": 0.0141,
      "step": 1834980
    },
    {
      "epoch": 3.0030177464438377,
      "grad_norm": 0.2895300090312958,
      "learning_rate": 4.0202816233205725e-06,
      "loss": 0.0109,
      "step": 1835000
    },
    {
      "epoch": 3.0030504768824913,
      "grad_norm": 0.4563390612602234,
      "learning_rate": 4.020215731107055e-06,
      "loss": 0.0127,
      "step": 1835020
    },
    {
      "epoch": 3.0030832073211444,
      "grad_norm": 0.41983404755592346,
      "learning_rate": 4.020149838893538e-06,
      "loss": 0.0082,
      "step": 1835040
    },
    {
      "epoch": 3.003115937759798,
      "grad_norm": 0.22664502263069153,
      "learning_rate": 4.020083946680022e-06,
      "loss": 0.0112,
      "step": 1835060
    },
    {
      "epoch": 3.003148668198451,
      "grad_norm": 0.1398112177848816,
      "learning_rate": 4.020018054466504e-06,
      "loss": 0.0161,
      "step": 1835080
    },
    {
      "epoch": 3.0031813986371043,
      "grad_norm": 0.16383205354213715,
      "learning_rate": 4.019952162252987e-06,
      "loss": 0.0092,
      "step": 1835100
    },
    {
      "epoch": 3.003214129075758,
      "grad_norm": 0.299345999956131,
      "learning_rate": 4.01988627003947e-06,
      "loss": 0.0096,
      "step": 1835120
    },
    {
      "epoch": 3.003246859514411,
      "grad_norm": 0.18787169456481934,
      "learning_rate": 4.0198203778259526e-06,
      "loss": 0.0121,
      "step": 1835140
    },
    {
      "epoch": 3.0032795899530647,
      "grad_norm": 0.23272843658924103,
      "learning_rate": 4.019754485612435e-06,
      "loss": 0.0145,
      "step": 1835160
    },
    {
      "epoch": 3.003312320391718,
      "grad_norm": 0.2232714295387268,
      "learning_rate": 4.019688593398918e-06,
      "loss": 0.0133,
      "step": 1835180
    },
    {
      "epoch": 3.0033450508303714,
      "grad_norm": 0.2956087291240692,
      "learning_rate": 4.019622701185401e-06,
      "loss": 0.0067,
      "step": 1835200
    },
    {
      "epoch": 3.0033777812690245,
      "grad_norm": 0.3336525559425354,
      "learning_rate": 4.0195568089718835e-06,
      "loss": 0.0112,
      "step": 1835220
    },
    {
      "epoch": 3.0034105117076777,
      "grad_norm": 0.19136367738246918,
      "learning_rate": 4.019490916758367e-06,
      "loss": 0.0102,
      "step": 1835240
    },
    {
      "epoch": 3.0034432421463313,
      "grad_norm": 0.5946841835975647,
      "learning_rate": 4.01942502454485e-06,
      "loss": 0.0168,
      "step": 1835260
    },
    {
      "epoch": 3.0034759725849844,
      "grad_norm": 0.1445424109697342,
      "learning_rate": 4.019359132331333e-06,
      "loss": 0.0095,
      "step": 1835280
    },
    {
      "epoch": 3.003508703023638,
      "grad_norm": 0.05900920182466507,
      "learning_rate": 4.019293240117816e-06,
      "loss": 0.0134,
      "step": 1835300
    },
    {
      "epoch": 3.003541433462291,
      "grad_norm": 0.22562158107757568,
      "learning_rate": 4.019227347904299e-06,
      "loss": 0.0128,
      "step": 1835320
    },
    {
      "epoch": 3.003574163900945,
      "grad_norm": 0.22012674808502197,
      "learning_rate": 4.019161455690782e-06,
      "loss": 0.0077,
      "step": 1835340
    },
    {
      "epoch": 3.003606894339598,
      "grad_norm": 0.24166396260261536,
      "learning_rate": 4.019095563477264e-06,
      "loss": 0.0097,
      "step": 1835360
    },
    {
      "epoch": 3.003639624778251,
      "grad_norm": 0.314924418926239,
      "learning_rate": 4.019029671263747e-06,
      "loss": 0.0096,
      "step": 1835380
    },
    {
      "epoch": 3.0036723552169047,
      "grad_norm": 0.14558418095111847,
      "learning_rate": 4.01896377905023e-06,
      "loss": 0.0107,
      "step": 1835400
    },
    {
      "epoch": 3.003705085655558,
      "grad_norm": 0.4050233066082001,
      "learning_rate": 4.018897886836713e-06,
      "loss": 0.0109,
      "step": 1835420
    },
    {
      "epoch": 3.0037378160942114,
      "grad_norm": 0.27425530552864075,
      "learning_rate": 4.018831994623195e-06,
      "loss": 0.0118,
      "step": 1835440
    },
    {
      "epoch": 3.0037705465328646,
      "grad_norm": 0.37003692984580994,
      "learning_rate": 4.018766102409679e-06,
      "loss": 0.0104,
      "step": 1835460
    },
    {
      "epoch": 3.003803276971518,
      "grad_norm": 0.12555687129497528,
      "learning_rate": 4.018700210196162e-06,
      "loss": 0.0105,
      "step": 1835480
    },
    {
      "epoch": 3.0038360074101713,
      "grad_norm": 0.2652253806591034,
      "learning_rate": 4.0186343179826444e-06,
      "loss": 0.0078,
      "step": 1835500
    },
    {
      "epoch": 3.0038687378488245,
      "grad_norm": 0.46753615140914917,
      "learning_rate": 4.018568425769127e-06,
      "loss": 0.0105,
      "step": 1835520
    },
    {
      "epoch": 3.003901468287478,
      "grad_norm": 0.42003804445266724,
      "learning_rate": 4.01850253355561e-06,
      "loss": 0.0167,
      "step": 1835540
    },
    {
      "epoch": 3.003934198726131,
      "grad_norm": 0.5368241667747498,
      "learning_rate": 4.018436641342093e-06,
      "loss": 0.0097,
      "step": 1835560
    },
    {
      "epoch": 3.003966929164785,
      "grad_norm": 0.2776637673377991,
      "learning_rate": 4.018370749128575e-06,
      "loss": 0.0113,
      "step": 1835580
    },
    {
      "epoch": 3.003999659603438,
      "grad_norm": 0.5023194551467896,
      "learning_rate": 4.018304856915058e-06,
      "loss": 0.0122,
      "step": 1835600
    },
    {
      "epoch": 3.0040323900420915,
      "grad_norm": 0.48765838146209717,
      "learning_rate": 4.018238964701542e-06,
      "loss": 0.0108,
      "step": 1835620
    },
    {
      "epoch": 3.0040651204807447,
      "grad_norm": 0.2651703357696533,
      "learning_rate": 4.0181730724880245e-06,
      "loss": 0.0129,
      "step": 1835640
    },
    {
      "epoch": 3.004097850919398,
      "grad_norm": 0.2576735317707062,
      "learning_rate": 4.018107180274507e-06,
      "loss": 0.0087,
      "step": 1835660
    },
    {
      "epoch": 3.0041305813580514,
      "grad_norm": 0.3592540919780731,
      "learning_rate": 4.018041288060991e-06,
      "loss": 0.0104,
      "step": 1835680
    },
    {
      "epoch": 3.0041633117967046,
      "grad_norm": 0.3093215525150299,
      "learning_rate": 4.0179753958474735e-06,
      "loss": 0.0168,
      "step": 1835700
    },
    {
      "epoch": 3.004196042235358,
      "grad_norm": 0.17705649137496948,
      "learning_rate": 4.017909503633956e-06,
      "loss": 0.0086,
      "step": 1835720
    },
    {
      "epoch": 3.0042287726740113,
      "grad_norm": 0.11969583481550217,
      "learning_rate": 4.017843611420439e-06,
      "loss": 0.0125,
      "step": 1835740
    },
    {
      "epoch": 3.004261503112665,
      "grad_norm": 0.4205111861228943,
      "learning_rate": 4.017777719206922e-06,
      "loss": 0.0146,
      "step": 1835760
    },
    {
      "epoch": 3.004294233551318,
      "grad_norm": 0.48627451062202454,
      "learning_rate": 4.0177118269934045e-06,
      "loss": 0.0207,
      "step": 1835780
    },
    {
      "epoch": 3.0043269639899712,
      "grad_norm": 0.3508908450603485,
      "learning_rate": 4.017645934779887e-06,
      "loss": 0.0123,
      "step": 1835800
    },
    {
      "epoch": 3.004359694428625,
      "grad_norm": 0.18769004940986633,
      "learning_rate": 4.01758004256637e-06,
      "loss": 0.021,
      "step": 1835820
    },
    {
      "epoch": 3.004392424867278,
      "grad_norm": 0.5573145747184753,
      "learning_rate": 4.017514150352853e-06,
      "loss": 0.0126,
      "step": 1835840
    },
    {
      "epoch": 3.0044251553059316,
      "grad_norm": 0.8364638090133667,
      "learning_rate": 4.017448258139336e-06,
      "loss": 0.0122,
      "step": 1835860
    },
    {
      "epoch": 3.0044578857445847,
      "grad_norm": 0.9076282978057861,
      "learning_rate": 4.017382365925819e-06,
      "loss": 0.0172,
      "step": 1835880
    },
    {
      "epoch": 3.004490616183238,
      "grad_norm": 0.2727949023246765,
      "learning_rate": 4.017316473712302e-06,
      "loss": 0.0103,
      "step": 1835900
    },
    {
      "epoch": 3.0045233466218915,
      "grad_norm": 0.5627322793006897,
      "learning_rate": 4.0172505814987845e-06,
      "loss": 0.0103,
      "step": 1835920
    },
    {
      "epoch": 3.0045560770605446,
      "grad_norm": 0.16333019733428955,
      "learning_rate": 4.017184689285267e-06,
      "loss": 0.0114,
      "step": 1835940
    },
    {
      "epoch": 3.004588807499198,
      "grad_norm": 0.11474749445915222,
      "learning_rate": 4.01711879707175e-06,
      "loss": 0.0105,
      "step": 1835960
    },
    {
      "epoch": 3.0046215379378514,
      "grad_norm": 0.13364188373088837,
      "learning_rate": 4.017052904858233e-06,
      "loss": 0.0116,
      "step": 1835980
    },
    {
      "epoch": 3.004654268376505,
      "grad_norm": 0.1388094574213028,
      "learning_rate": 4.016987012644716e-06,
      "loss": 0.0116,
      "step": 1836000
    },
    {
      "epoch": 3.004686998815158,
      "grad_norm": 0.07996074110269547,
      "learning_rate": 4.016921120431199e-06,
      "loss": 0.0069,
      "step": 1836020
    },
    {
      "epoch": 3.0047197292538113,
      "grad_norm": 0.5747204422950745,
      "learning_rate": 4.016855228217682e-06,
      "loss": 0.0096,
      "step": 1836040
    },
    {
      "epoch": 3.004752459692465,
      "grad_norm": 0.6848069429397583,
      "learning_rate": 4.0167893360041645e-06,
      "loss": 0.01,
      "step": 1836060
    },
    {
      "epoch": 3.004785190131118,
      "grad_norm": 0.2840576171875,
      "learning_rate": 4.016723443790648e-06,
      "loss": 0.0082,
      "step": 1836080
    },
    {
      "epoch": 3.0048179205697716,
      "grad_norm": 0.11553501337766647,
      "learning_rate": 4.016657551577131e-06,
      "loss": 0.0094,
      "step": 1836100
    },
    {
      "epoch": 3.0048506510084247,
      "grad_norm": 0.5101653337478638,
      "learning_rate": 4.016591659363614e-06,
      "loss": 0.0133,
      "step": 1836120
    },
    {
      "epoch": 3.0048833814470783,
      "grad_norm": 0.5761990547180176,
      "learning_rate": 4.016525767150096e-06,
      "loss": 0.016,
      "step": 1836140
    },
    {
      "epoch": 3.0049161118857315,
      "grad_norm": 0.14883320033550262,
      "learning_rate": 4.016459874936579e-06,
      "loss": 0.0087,
      "step": 1836160
    },
    {
      "epoch": 3.0049488423243846,
      "grad_norm": 0.25504595041275024,
      "learning_rate": 4.016393982723062e-06,
      "loss": 0.0081,
      "step": 1836180
    },
    {
      "epoch": 3.0049815727630382,
      "grad_norm": 0.367972195148468,
      "learning_rate": 4.0163280905095446e-06,
      "loss": 0.0096,
      "step": 1836200
    },
    {
      "epoch": 3.0050143032016914,
      "grad_norm": 0.17942704260349274,
      "learning_rate": 4.016262198296027e-06,
      "loss": 0.0093,
      "step": 1836220
    },
    {
      "epoch": 3.005047033640345,
      "grad_norm": 0.3911038041114807,
      "learning_rate": 4.01619630608251e-06,
      "loss": 0.0183,
      "step": 1836240
    },
    {
      "epoch": 3.005079764078998,
      "grad_norm": 0.46568039059638977,
      "learning_rate": 4.016130413868994e-06,
      "loss": 0.011,
      "step": 1836260
    },
    {
      "epoch": 3.0051124945176517,
      "grad_norm": 0.17818336188793182,
      "learning_rate": 4.016064521655476e-06,
      "loss": 0.0105,
      "step": 1836280
    },
    {
      "epoch": 3.005145224956305,
      "grad_norm": 0.06232582405209541,
      "learning_rate": 4.015998629441959e-06,
      "loss": 0.0086,
      "step": 1836300
    },
    {
      "epoch": 3.005177955394958,
      "grad_norm": 0.11313265562057495,
      "learning_rate": 4.015932737228442e-06,
      "loss": 0.0087,
      "step": 1836320
    },
    {
      "epoch": 3.0052106858336116,
      "grad_norm": 0.7071481943130493,
      "learning_rate": 4.0158668450149254e-06,
      "loss": 0.0168,
      "step": 1836340
    },
    {
      "epoch": 3.0052434162722648,
      "grad_norm": 0.13919538259506226,
      "learning_rate": 4.015800952801408e-06,
      "loss": 0.0083,
      "step": 1836360
    },
    {
      "epoch": 3.0052761467109184,
      "grad_norm": 0.15001074969768524,
      "learning_rate": 4.015735060587891e-06,
      "loss": 0.0091,
      "step": 1836380
    },
    {
      "epoch": 3.0053088771495715,
      "grad_norm": 0.38698023557662964,
      "learning_rate": 4.015669168374374e-06,
      "loss": 0.0126,
      "step": 1836400
    },
    {
      "epoch": 3.005341607588225,
      "grad_norm": 0.38970106840133667,
      "learning_rate": 4.015603276160856e-06,
      "loss": 0.0096,
      "step": 1836420
    },
    {
      "epoch": 3.0053743380268783,
      "grad_norm": 0.16910505294799805,
      "learning_rate": 4.015537383947339e-06,
      "loss": 0.0149,
      "step": 1836440
    },
    {
      "epoch": 3.0054070684655314,
      "grad_norm": 0.07457008957862854,
      "learning_rate": 4.015471491733822e-06,
      "loss": 0.0111,
      "step": 1836460
    },
    {
      "epoch": 3.005439798904185,
      "grad_norm": 0.11535003781318665,
      "learning_rate": 4.0154055995203055e-06,
      "loss": 0.0062,
      "step": 1836480
    },
    {
      "epoch": 3.005472529342838,
      "grad_norm": 0.34239503741264343,
      "learning_rate": 4.015339707306788e-06,
      "loss": 0.0069,
      "step": 1836500
    },
    {
      "epoch": 3.0055052597814917,
      "grad_norm": 0.22350551187992096,
      "learning_rate": 4.015273815093271e-06,
      "loss": 0.0127,
      "step": 1836520
    },
    {
      "epoch": 3.005537990220145,
      "grad_norm": 0.27996253967285156,
      "learning_rate": 4.015207922879754e-06,
      "loss": 0.012,
      "step": 1836540
    },
    {
      "epoch": 3.0055707206587985,
      "grad_norm": 0.6105581521987915,
      "learning_rate": 4.015142030666236e-06,
      "loss": 0.0152,
      "step": 1836560
    },
    {
      "epoch": 3.0056034510974516,
      "grad_norm": 0.12226823717355728,
      "learning_rate": 4.015076138452719e-06,
      "loss": 0.0064,
      "step": 1836580
    },
    {
      "epoch": 3.005636181536105,
      "grad_norm": 0.4806016683578491,
      "learning_rate": 4.015010246239202e-06,
      "loss": 0.0121,
      "step": 1836600
    },
    {
      "epoch": 3.0056689119747584,
      "grad_norm": 0.09628090262413025,
      "learning_rate": 4.014944354025685e-06,
      "loss": 0.0137,
      "step": 1836620
    },
    {
      "epoch": 3.0057016424134115,
      "grad_norm": 0.09459137171506882,
      "learning_rate": 4.014878461812167e-06,
      "loss": 0.0118,
      "step": 1836640
    },
    {
      "epoch": 3.005734372852065,
      "grad_norm": 1.2223814725875854,
      "learning_rate": 4.014812569598651e-06,
      "loss": 0.0079,
      "step": 1836660
    },
    {
      "epoch": 3.0057671032907183,
      "grad_norm": 0.27561748027801514,
      "learning_rate": 4.014746677385134e-06,
      "loss": 0.0121,
      "step": 1836680
    },
    {
      "epoch": 3.0057998337293714,
      "grad_norm": 0.4054611623287201,
      "learning_rate": 4.0146807851716164e-06,
      "loss": 0.0076,
      "step": 1836700
    },
    {
      "epoch": 3.005832564168025,
      "grad_norm": 0.1729590892791748,
      "learning_rate": 4.0146148929581e-06,
      "loss": 0.013,
      "step": 1836720
    },
    {
      "epoch": 3.005865294606678,
      "grad_norm": 0.3636440634727478,
      "learning_rate": 4.014549000744583e-06,
      "loss": 0.0146,
      "step": 1836740
    },
    {
      "epoch": 3.0058980250453318,
      "grad_norm": 0.42587193846702576,
      "learning_rate": 4.0144831085310655e-06,
      "loss": 0.0136,
      "step": 1836760
    },
    {
      "epoch": 3.005930755483985,
      "grad_norm": 0.9104633331298828,
      "learning_rate": 4.014417216317548e-06,
      "loss": 0.0151,
      "step": 1836780
    },
    {
      "epoch": 3.0059634859226385,
      "grad_norm": 0.10954524576663971,
      "learning_rate": 4.014351324104031e-06,
      "loss": 0.0148,
      "step": 1836800
    },
    {
      "epoch": 3.0059962163612917,
      "grad_norm": 0.17184266448020935,
      "learning_rate": 4.014285431890514e-06,
      "loss": 0.0093,
      "step": 1836820
    },
    {
      "epoch": 3.006028946799945,
      "grad_norm": 0.2032099813222885,
      "learning_rate": 4.0142195396769965e-06,
      "loss": 0.0073,
      "step": 1836840
    },
    {
      "epoch": 3.0060616772385984,
      "grad_norm": 1.0502703189849854,
      "learning_rate": 4.014153647463479e-06,
      "loss": 0.0145,
      "step": 1836860
    },
    {
      "epoch": 3.0060944076772516,
      "grad_norm": 0.5711371898651123,
      "learning_rate": 4.014087755249963e-06,
      "loss": 0.0087,
      "step": 1836880
    },
    {
      "epoch": 3.006127138115905,
      "grad_norm": 0.43583646416664124,
      "learning_rate": 4.0140218630364455e-06,
      "loss": 0.0165,
      "step": 1836900
    },
    {
      "epoch": 3.0061598685545583,
      "grad_norm": 0.2609255313873291,
      "learning_rate": 4.013955970822928e-06,
      "loss": 0.011,
      "step": 1836920
    },
    {
      "epoch": 3.006192598993212,
      "grad_norm": 0.53626948595047,
      "learning_rate": 4.013890078609411e-06,
      "loss": 0.0063,
      "step": 1836940
    },
    {
      "epoch": 3.006225329431865,
      "grad_norm": 0.1732034832239151,
      "learning_rate": 4.013824186395894e-06,
      "loss": 0.011,
      "step": 1836960
    },
    {
      "epoch": 3.006258059870518,
      "grad_norm": 0.6420511603355408,
      "learning_rate": 4.0137582941823765e-06,
      "loss": 0.0098,
      "step": 1836980
    },
    {
      "epoch": 3.006290790309172,
      "grad_norm": 0.0717211589217186,
      "learning_rate": 4.013692401968859e-06,
      "loss": 0.0094,
      "step": 1837000
    },
    {
      "epoch": 3.006323520747825,
      "grad_norm": 1.8623888492584229,
      "learning_rate": 4.013626509755342e-06,
      "loss": 0.0188,
      "step": 1837020
    },
    {
      "epoch": 3.0063562511864785,
      "grad_norm": 0.2564188539981842,
      "learning_rate": 4.0135606175418256e-06,
      "loss": 0.0095,
      "step": 1837040
    },
    {
      "epoch": 3.0063889816251317,
      "grad_norm": 0.1153600737452507,
      "learning_rate": 4.013494725328308e-06,
      "loss": 0.0111,
      "step": 1837060
    },
    {
      "epoch": 3.0064217120637853,
      "grad_norm": 0.1775544434785843,
      "learning_rate": 4.013428833114791e-06,
      "loss": 0.0179,
      "step": 1837080
    },
    {
      "epoch": 3.0064544425024384,
      "grad_norm": 0.6371268630027771,
      "learning_rate": 4.013362940901275e-06,
      "loss": 0.0148,
      "step": 1837100
    },
    {
      "epoch": 3.0064871729410916,
      "grad_norm": 0.4054744243621826,
      "learning_rate": 4.013297048687757e-06,
      "loss": 0.0126,
      "step": 1837120
    },
    {
      "epoch": 3.006519903379745,
      "grad_norm": 0.13875006139278412,
      "learning_rate": 4.01323115647424e-06,
      "loss": 0.0117,
      "step": 1837140
    },
    {
      "epoch": 3.0065526338183983,
      "grad_norm": 0.1757482886314392,
      "learning_rate": 4.013165264260723e-06,
      "loss": 0.0118,
      "step": 1837160
    },
    {
      "epoch": 3.006585364257052,
      "grad_norm": 0.26795947551727295,
      "learning_rate": 4.013099372047206e-06,
      "loss": 0.009,
      "step": 1837180
    },
    {
      "epoch": 3.006618094695705,
      "grad_norm": 0.14602507650852203,
      "learning_rate": 4.013033479833688e-06,
      "loss": 0.0149,
      "step": 1837200
    },
    {
      "epoch": 3.0066508251343587,
      "grad_norm": 0.22450190782546997,
      "learning_rate": 4.012967587620171e-06,
      "loss": 0.0099,
      "step": 1837220
    },
    {
      "epoch": 3.006683555573012,
      "grad_norm": 0.10805395990610123,
      "learning_rate": 4.012901695406654e-06,
      "loss": 0.0105,
      "step": 1837240
    },
    {
      "epoch": 3.006716286011665,
      "grad_norm": 0.31130123138427734,
      "learning_rate": 4.0128358031931365e-06,
      "loss": 0.0107,
      "step": 1837260
    },
    {
      "epoch": 3.0067490164503186,
      "grad_norm": 0.9483778476715088,
      "learning_rate": 4.01276991097962e-06,
      "loss": 0.0125,
      "step": 1837280
    },
    {
      "epoch": 3.0067817468889717,
      "grad_norm": 0.23296092450618744,
      "learning_rate": 4.012704018766103e-06,
      "loss": 0.0144,
      "step": 1837300
    },
    {
      "epoch": 3.0068144773276253,
      "grad_norm": 1.1588724851608276,
      "learning_rate": 4.012638126552586e-06,
      "loss": 0.0144,
      "step": 1837320
    },
    {
      "epoch": 3.0068472077662785,
      "grad_norm": 0.407128244638443,
      "learning_rate": 4.012572234339068e-06,
      "loss": 0.0115,
      "step": 1837340
    },
    {
      "epoch": 3.0068799382049316,
      "grad_norm": 0.1741369068622589,
      "learning_rate": 4.012506342125551e-06,
      "loss": 0.0168,
      "step": 1837360
    },
    {
      "epoch": 3.006912668643585,
      "grad_norm": 0.8024141788482666,
      "learning_rate": 4.012440449912034e-06,
      "loss": 0.0122,
      "step": 1837380
    },
    {
      "epoch": 3.0069453990822383,
      "grad_norm": 0.1912626177072525,
      "learning_rate": 4.012374557698517e-06,
      "loss": 0.0133,
      "step": 1837400
    },
    {
      "epoch": 3.006978129520892,
      "grad_norm": 0.16226401925086975,
      "learning_rate": 4.012308665485e-06,
      "loss": 0.0122,
      "step": 1837420
    },
    {
      "epoch": 3.007010859959545,
      "grad_norm": 0.21503585577011108,
      "learning_rate": 4.012242773271483e-06,
      "loss": 0.0114,
      "step": 1837440
    },
    {
      "epoch": 3.0070435903981987,
      "grad_norm": 0.2809886634349823,
      "learning_rate": 4.012176881057966e-06,
      "loss": 0.0134,
      "step": 1837460
    },
    {
      "epoch": 3.007076320836852,
      "grad_norm": 0.16652525961399078,
      "learning_rate": 4.012110988844448e-06,
      "loss": 0.0084,
      "step": 1837480
    },
    {
      "epoch": 3.007109051275505,
      "grad_norm": 0.21048131585121155,
      "learning_rate": 4.012045096630932e-06,
      "loss": 0.0135,
      "step": 1837500
    },
    {
      "epoch": 3.0071417817141586,
      "grad_norm": 0.4421013593673706,
      "learning_rate": 4.011979204417415e-06,
      "loss": 0.0145,
      "step": 1837520
    },
    {
      "epoch": 3.0071745121528117,
      "grad_norm": 0.15691298246383667,
      "learning_rate": 4.0119133122038974e-06,
      "loss": 0.0129,
      "step": 1837540
    },
    {
      "epoch": 3.0072072425914653,
      "grad_norm": 0.1491883397102356,
      "learning_rate": 4.01184741999038e-06,
      "loss": 0.0108,
      "step": 1837560
    },
    {
      "epoch": 3.0072399730301185,
      "grad_norm": 0.2045518010854721,
      "learning_rate": 4.011781527776863e-06,
      "loss": 0.0127,
      "step": 1837580
    },
    {
      "epoch": 3.007272703468772,
      "grad_norm": 0.4636695086956024,
      "learning_rate": 4.011715635563346e-06,
      "loss": 0.0134,
      "step": 1837600
    },
    {
      "epoch": 3.007305433907425,
      "grad_norm": 0.5349385738372803,
      "learning_rate": 4.011649743349828e-06,
      "loss": 0.0119,
      "step": 1837620
    },
    {
      "epoch": 3.0073381643460784,
      "grad_norm": 0.5734817385673523,
      "learning_rate": 4.011583851136311e-06,
      "loss": 0.0143,
      "step": 1837640
    },
    {
      "epoch": 3.007370894784732,
      "grad_norm": 0.1350679248571396,
      "learning_rate": 4.011517958922794e-06,
      "loss": 0.0108,
      "step": 1837660
    },
    {
      "epoch": 3.007403625223385,
      "grad_norm": 0.2756738066673279,
      "learning_rate": 4.0114520667092775e-06,
      "loss": 0.0106,
      "step": 1837680
    },
    {
      "epoch": 3.0074363556620387,
      "grad_norm": 0.16218537092208862,
      "learning_rate": 4.01138617449576e-06,
      "loss": 0.0111,
      "step": 1837700
    },
    {
      "epoch": 3.007469086100692,
      "grad_norm": 0.4354689121246338,
      "learning_rate": 4.011320282282243e-06,
      "loss": 0.0159,
      "step": 1837720
    },
    {
      "epoch": 3.0075018165393455,
      "grad_norm": 0.23052948713302612,
      "learning_rate": 4.011254390068726e-06,
      "loss": 0.0125,
      "step": 1837740
    },
    {
      "epoch": 3.0075345469779986,
      "grad_norm": 0.35880863666534424,
      "learning_rate": 4.011188497855209e-06,
      "loss": 0.0104,
      "step": 1837760
    },
    {
      "epoch": 3.0075672774166518,
      "grad_norm": 0.3423875868320465,
      "learning_rate": 4.011122605641692e-06,
      "loss": 0.0105,
      "step": 1837780
    },
    {
      "epoch": 3.0076000078553053,
      "grad_norm": 0.32563474774360657,
      "learning_rate": 4.011056713428175e-06,
      "loss": 0.0134,
      "step": 1837800
    },
    {
      "epoch": 3.0076327382939585,
      "grad_norm": 0.1857467144727707,
      "learning_rate": 4.0109908212146575e-06,
      "loss": 0.0088,
      "step": 1837820
    },
    {
      "epoch": 3.007665468732612,
      "grad_norm": 0.09275937080383301,
      "learning_rate": 4.01092492900114e-06,
      "loss": 0.01,
      "step": 1837840
    },
    {
      "epoch": 3.0076981991712652,
      "grad_norm": 0.23872461915016174,
      "learning_rate": 4.010859036787623e-06,
      "loss": 0.0119,
      "step": 1837860
    },
    {
      "epoch": 3.007730929609919,
      "grad_norm": 0.04091718792915344,
      "learning_rate": 4.010793144574106e-06,
      "loss": 0.0107,
      "step": 1837880
    },
    {
      "epoch": 3.007763660048572,
      "grad_norm": 0.15063464641571045,
      "learning_rate": 4.010727252360589e-06,
      "loss": 0.0151,
      "step": 1837900
    },
    {
      "epoch": 3.007796390487225,
      "grad_norm": 0.7369277477264404,
      "learning_rate": 4.010661360147072e-06,
      "loss": 0.0118,
      "step": 1837920
    },
    {
      "epoch": 3.0078291209258787,
      "grad_norm": 0.6650683283805847,
      "learning_rate": 4.010595467933555e-06,
      "loss": 0.0139,
      "step": 1837940
    },
    {
      "epoch": 3.007861851364532,
      "grad_norm": 0.38930410146713257,
      "learning_rate": 4.0105295757200375e-06,
      "loss": 0.0123,
      "step": 1837960
    },
    {
      "epoch": 3.0078945818031855,
      "grad_norm": 0.2526567578315735,
      "learning_rate": 4.01046368350652e-06,
      "loss": 0.0096,
      "step": 1837980
    },
    {
      "epoch": 3.0079273122418386,
      "grad_norm": 0.18854908645153046,
      "learning_rate": 4.010397791293003e-06,
      "loss": 0.0128,
      "step": 1838000
    },
    {
      "epoch": 3.007960042680492,
      "grad_norm": 0.3105561137199402,
      "learning_rate": 4.010331899079486e-06,
      "loss": 0.0119,
      "step": 1838020
    },
    {
      "epoch": 3.0079927731191454,
      "grad_norm": 0.16437168419361115,
      "learning_rate": 4.0102660068659685e-06,
      "loss": 0.0166,
      "step": 1838040
    },
    {
      "epoch": 3.0080255035577985,
      "grad_norm": 0.1279476284980774,
      "learning_rate": 4.010200114652451e-06,
      "loss": 0.0142,
      "step": 1838060
    },
    {
      "epoch": 3.008058233996452,
      "grad_norm": 0.10902929306030273,
      "learning_rate": 4.010134222438935e-06,
      "loss": 0.0129,
      "step": 1838080
    },
    {
      "epoch": 3.0080909644351053,
      "grad_norm": 0.2367403358221054,
      "learning_rate": 4.0100683302254175e-06,
      "loss": 0.0097,
      "step": 1838100
    },
    {
      "epoch": 3.008123694873759,
      "grad_norm": 0.10714495182037354,
      "learning_rate": 4.010002438011901e-06,
      "loss": 0.0107,
      "step": 1838120
    },
    {
      "epoch": 3.008156425312412,
      "grad_norm": 0.7610074281692505,
      "learning_rate": 4.009936545798384e-06,
      "loss": 0.0099,
      "step": 1838140
    },
    {
      "epoch": 3.008189155751065,
      "grad_norm": 0.5001960396766663,
      "learning_rate": 4.009870653584867e-06,
      "loss": 0.0152,
      "step": 1838160
    },
    {
      "epoch": 3.0082218861897188,
      "grad_norm": 0.32268816232681274,
      "learning_rate": 4.009804761371349e-06,
      "loss": 0.0101,
      "step": 1838180
    },
    {
      "epoch": 3.008254616628372,
      "grad_norm": 0.26361900568008423,
      "learning_rate": 4.009738869157832e-06,
      "loss": 0.0133,
      "step": 1838200
    },
    {
      "epoch": 3.0082873470670255,
      "grad_norm": 0.32093581557273865,
      "learning_rate": 4.009672976944315e-06,
      "loss": 0.0108,
      "step": 1838220
    },
    {
      "epoch": 3.0083200775056786,
      "grad_norm": 0.08851755410432816,
      "learning_rate": 4.0096070847307976e-06,
      "loss": 0.0094,
      "step": 1838240
    },
    {
      "epoch": 3.0083528079443322,
      "grad_norm": 0.25552964210510254,
      "learning_rate": 4.00954119251728e-06,
      "loss": 0.0101,
      "step": 1838260
    },
    {
      "epoch": 3.0083855383829854,
      "grad_norm": 0.1496138721704483,
      "learning_rate": 4.009475300303763e-06,
      "loss": 0.0125,
      "step": 1838280
    },
    {
      "epoch": 3.0084182688216385,
      "grad_norm": 0.6387414336204529,
      "learning_rate": 4.009409408090247e-06,
      "loss": 0.0197,
      "step": 1838300
    },
    {
      "epoch": 3.008450999260292,
      "grad_norm": 0.4386827349662781,
      "learning_rate": 4.009343515876729e-06,
      "loss": 0.0125,
      "step": 1838320
    },
    {
      "epoch": 3.0084837296989453,
      "grad_norm": 0.1896134316921234,
      "learning_rate": 4.009277623663212e-06,
      "loss": 0.0095,
      "step": 1838340
    },
    {
      "epoch": 3.008516460137599,
      "grad_norm": 0.12741021811962128,
      "learning_rate": 4.009211731449695e-06,
      "loss": 0.0104,
      "step": 1838360
    },
    {
      "epoch": 3.008549190576252,
      "grad_norm": 0.30854836106300354,
      "learning_rate": 4.009145839236178e-06,
      "loss": 0.0114,
      "step": 1838380
    },
    {
      "epoch": 3.0085819210149056,
      "grad_norm": 0.8591457605361938,
      "learning_rate": 4.00907994702266e-06,
      "loss": 0.0126,
      "step": 1838400
    },
    {
      "epoch": 3.0086146514535588,
      "grad_norm": 0.08417000621557236,
      "learning_rate": 4.009014054809143e-06,
      "loss": 0.0145,
      "step": 1838420
    },
    {
      "epoch": 3.008647381892212,
      "grad_norm": 0.16641399264335632,
      "learning_rate": 4.008948162595626e-06,
      "loss": 0.0055,
      "step": 1838440
    },
    {
      "epoch": 3.0086801123308655,
      "grad_norm": 0.09485644102096558,
      "learning_rate": 4.008882270382109e-06,
      "loss": 0.0103,
      "step": 1838460
    },
    {
      "epoch": 3.0087128427695187,
      "grad_norm": 0.08060674369335175,
      "learning_rate": 4.008816378168592e-06,
      "loss": 0.0077,
      "step": 1838480
    },
    {
      "epoch": 3.0087455732081723,
      "grad_norm": 0.2957504391670227,
      "learning_rate": 4.008750485955075e-06,
      "loss": 0.0091,
      "step": 1838500
    },
    {
      "epoch": 3.0087783036468254,
      "grad_norm": 0.05940951034426689,
      "learning_rate": 4.0086845937415585e-06,
      "loss": 0.0088,
      "step": 1838520
    },
    {
      "epoch": 3.008811034085479,
      "grad_norm": 0.2205599695444107,
      "learning_rate": 4.008618701528041e-06,
      "loss": 0.0095,
      "step": 1838540
    },
    {
      "epoch": 3.008843764524132,
      "grad_norm": 0.08525478839874268,
      "learning_rate": 4.008552809314524e-06,
      "loss": 0.007,
      "step": 1838560
    },
    {
      "epoch": 3.0088764949627853,
      "grad_norm": 0.6580289602279663,
      "learning_rate": 4.008486917101007e-06,
      "loss": 0.0173,
      "step": 1838580
    },
    {
      "epoch": 3.008909225401439,
      "grad_norm": 0.1314067393541336,
      "learning_rate": 4.008421024887489e-06,
      "loss": 0.0139,
      "step": 1838600
    },
    {
      "epoch": 3.008941955840092,
      "grad_norm": 0.14968466758728027,
      "learning_rate": 4.008355132673972e-06,
      "loss": 0.0116,
      "step": 1838620
    },
    {
      "epoch": 3.0089746862787456,
      "grad_norm": 0.5007389187812805,
      "learning_rate": 4.008289240460455e-06,
      "loss": 0.0097,
      "step": 1838640
    },
    {
      "epoch": 3.009007416717399,
      "grad_norm": 0.14339010417461395,
      "learning_rate": 4.008223348246938e-06,
      "loss": 0.01,
      "step": 1838660
    },
    {
      "epoch": 3.0090401471560524,
      "grad_norm": 0.23659925162792206,
      "learning_rate": 4.00815745603342e-06,
      "loss": 0.0174,
      "step": 1838680
    },
    {
      "epoch": 3.0090728775947055,
      "grad_norm": 0.45016059279441833,
      "learning_rate": 4.008091563819904e-06,
      "loss": 0.0111,
      "step": 1838700
    },
    {
      "epoch": 3.0091056080333587,
      "grad_norm": 0.3803277611732483,
      "learning_rate": 4.008025671606387e-06,
      "loss": 0.0083,
      "step": 1838720
    },
    {
      "epoch": 3.0091383384720123,
      "grad_norm": 0.2547498941421509,
      "learning_rate": 4.0079597793928694e-06,
      "loss": 0.0077,
      "step": 1838740
    },
    {
      "epoch": 3.0091710689106654,
      "grad_norm": 0.4583128094673157,
      "learning_rate": 4.007893887179352e-06,
      "loss": 0.0141,
      "step": 1838760
    },
    {
      "epoch": 3.009203799349319,
      "grad_norm": 0.15137332677841187,
      "learning_rate": 4.007827994965835e-06,
      "loss": 0.0101,
      "step": 1838780
    },
    {
      "epoch": 3.009236529787972,
      "grad_norm": 0.08659133315086365,
      "learning_rate": 4.0077621027523185e-06,
      "loss": 0.0123,
      "step": 1838800
    },
    {
      "epoch": 3.0092692602266258,
      "grad_norm": 1.2986255884170532,
      "learning_rate": 4.007696210538801e-06,
      "loss": 0.012,
      "step": 1838820
    },
    {
      "epoch": 3.009301990665279,
      "grad_norm": 0.16501237452030182,
      "learning_rate": 4.007630318325284e-06,
      "loss": 0.0099,
      "step": 1838840
    },
    {
      "epoch": 3.009334721103932,
      "grad_norm": 0.09725120663642883,
      "learning_rate": 4.007564426111767e-06,
      "loss": 0.0076,
      "step": 1838860
    },
    {
      "epoch": 3.0093674515425857,
      "grad_norm": 0.5502399802207947,
      "learning_rate": 4.0074985338982495e-06,
      "loss": 0.0103,
      "step": 1838880
    },
    {
      "epoch": 3.009400181981239,
      "grad_norm": 0.22465090453624725,
      "learning_rate": 4.007432641684732e-06,
      "loss": 0.0113,
      "step": 1838900
    },
    {
      "epoch": 3.0094329124198924,
      "grad_norm": 0.11846612393856049,
      "learning_rate": 4.007366749471216e-06,
      "loss": 0.0128,
      "step": 1838920
    },
    {
      "epoch": 3.0094656428585456,
      "grad_norm": 0.24478848278522491,
      "learning_rate": 4.0073008572576985e-06,
      "loss": 0.0137,
      "step": 1838940
    },
    {
      "epoch": 3.0094983732971987,
      "grad_norm": 0.3574107587337494,
      "learning_rate": 4.007234965044181e-06,
      "loss": 0.0172,
      "step": 1838960
    },
    {
      "epoch": 3.0095311037358523,
      "grad_norm": 0.17825527489185333,
      "learning_rate": 4.007169072830664e-06,
      "loss": 0.0114,
      "step": 1838980
    },
    {
      "epoch": 3.0095638341745055,
      "grad_norm": 0.24856685101985931,
      "learning_rate": 4.007103180617147e-06,
      "loss": 0.0096,
      "step": 1839000
    },
    {
      "epoch": 3.009596564613159,
      "grad_norm": 0.08612911403179169,
      "learning_rate": 4.0070372884036295e-06,
      "loss": 0.0152,
      "step": 1839020
    },
    {
      "epoch": 3.009629295051812,
      "grad_norm": 0.4485425651073456,
      "learning_rate": 4.006971396190112e-06,
      "loss": 0.0114,
      "step": 1839040
    },
    {
      "epoch": 3.009662025490466,
      "grad_norm": 0.3866812586784363,
      "learning_rate": 4.006905503976595e-06,
      "loss": 0.0086,
      "step": 1839060
    },
    {
      "epoch": 3.009694755929119,
      "grad_norm": 0.45959943532943726,
      "learning_rate": 4.006839611763078e-06,
      "loss": 0.0106,
      "step": 1839080
    },
    {
      "epoch": 3.009727486367772,
      "grad_norm": 0.1796698123216629,
      "learning_rate": 4.006773719549561e-06,
      "loss": 0.0156,
      "step": 1839100
    },
    {
      "epoch": 3.0097602168064257,
      "grad_norm": 0.18151363730430603,
      "learning_rate": 4.006707827336044e-06,
      "loss": 0.0103,
      "step": 1839120
    },
    {
      "epoch": 3.009792947245079,
      "grad_norm": 0.5462206602096558,
      "learning_rate": 4.006641935122527e-06,
      "loss": 0.01,
      "step": 1839140
    },
    {
      "epoch": 3.0098256776837324,
      "grad_norm": 0.5898176431655884,
      "learning_rate": 4.00657604290901e-06,
      "loss": 0.0126,
      "step": 1839160
    },
    {
      "epoch": 3.0098584081223856,
      "grad_norm": 0.06145622953772545,
      "learning_rate": 4.006510150695493e-06,
      "loss": 0.0153,
      "step": 1839180
    },
    {
      "epoch": 3.009891138561039,
      "grad_norm": 0.30867379903793335,
      "learning_rate": 4.006444258481976e-06,
      "loss": 0.0128,
      "step": 1839200
    },
    {
      "epoch": 3.0099238689996923,
      "grad_norm": 0.7040164470672607,
      "learning_rate": 4.006378366268459e-06,
      "loss": 0.0111,
      "step": 1839220
    },
    {
      "epoch": 3.0099565994383455,
      "grad_norm": 0.20137517154216766,
      "learning_rate": 4.006312474054941e-06,
      "loss": 0.0085,
      "step": 1839240
    },
    {
      "epoch": 3.009989329876999,
      "grad_norm": 0.07648196071386337,
      "learning_rate": 4.006246581841424e-06,
      "loss": 0.0121,
      "step": 1839260
    },
    {
      "epoch": 3.0100220603156522,
      "grad_norm": 0.1557336151599884,
      "learning_rate": 4.006180689627907e-06,
      "loss": 0.0139,
      "step": 1839280
    },
    {
      "epoch": 3.010054790754306,
      "grad_norm": 0.14287365972995758,
      "learning_rate": 4.0061147974143895e-06,
      "loss": 0.0155,
      "step": 1839300
    },
    {
      "epoch": 3.010087521192959,
      "grad_norm": 0.16687937080860138,
      "learning_rate": 4.006048905200873e-06,
      "loss": 0.0111,
      "step": 1839320
    },
    {
      "epoch": 3.0101202516316126,
      "grad_norm": 0.14785857498645782,
      "learning_rate": 4.005983012987356e-06,
      "loss": 0.0157,
      "step": 1839340
    },
    {
      "epoch": 3.0101529820702657,
      "grad_norm": 0.5985525250434875,
      "learning_rate": 4.005917120773839e-06,
      "loss": 0.0128,
      "step": 1839360
    },
    {
      "epoch": 3.010185712508919,
      "grad_norm": 0.18581731617450714,
      "learning_rate": 4.005851228560321e-06,
      "loss": 0.0096,
      "step": 1839380
    },
    {
      "epoch": 3.0102184429475725,
      "grad_norm": 0.09105847775936127,
      "learning_rate": 4.005785336346804e-06,
      "loss": 0.0146,
      "step": 1839400
    },
    {
      "epoch": 3.0102511733862256,
      "grad_norm": 0.11416560411453247,
      "learning_rate": 4.005719444133287e-06,
      "loss": 0.0111,
      "step": 1839420
    },
    {
      "epoch": 3.010283903824879,
      "grad_norm": 0.294846773147583,
      "learning_rate": 4.0056535519197696e-06,
      "loss": 0.0128,
      "step": 1839440
    },
    {
      "epoch": 3.0103166342635324,
      "grad_norm": 0.10813948512077332,
      "learning_rate": 4.005587659706252e-06,
      "loss": 0.0074,
      "step": 1839460
    },
    {
      "epoch": 3.010349364702186,
      "grad_norm": 0.21610473096370697,
      "learning_rate": 4.005521767492735e-06,
      "loss": 0.0094,
      "step": 1839480
    },
    {
      "epoch": 3.010382095140839,
      "grad_norm": 0.5762506723403931,
      "learning_rate": 4.005455875279219e-06,
      "loss": 0.0218,
      "step": 1839500
    },
    {
      "epoch": 3.0104148255794922,
      "grad_norm": 0.6830448508262634,
      "learning_rate": 4.005389983065701e-06,
      "loss": 0.016,
      "step": 1839520
    },
    {
      "epoch": 3.010447556018146,
      "grad_norm": 0.3859805166721344,
      "learning_rate": 4.005324090852185e-06,
      "loss": 0.0127,
      "step": 1839540
    },
    {
      "epoch": 3.010480286456799,
      "grad_norm": 0.2882595956325531,
      "learning_rate": 4.005258198638668e-06,
      "loss": 0.0154,
      "step": 1839560
    },
    {
      "epoch": 3.0105130168954526,
      "grad_norm": 0.32985445857048035,
      "learning_rate": 4.0051923064251504e-06,
      "loss": 0.01,
      "step": 1839580
    },
    {
      "epoch": 3.0105457473341057,
      "grad_norm": 0.10133641213178635,
      "learning_rate": 4.005126414211633e-06,
      "loss": 0.0184,
      "step": 1839600
    },
    {
      "epoch": 3.0105784777727593,
      "grad_norm": 0.1544637531042099,
      "learning_rate": 4.005060521998116e-06,
      "loss": 0.0083,
      "step": 1839620
    },
    {
      "epoch": 3.0106112082114125,
      "grad_norm": 0.7530070543289185,
      "learning_rate": 4.004994629784599e-06,
      "loss": 0.0103,
      "step": 1839640
    },
    {
      "epoch": 3.0106439386500656,
      "grad_norm": 0.4363078474998474,
      "learning_rate": 4.004928737571081e-06,
      "loss": 0.0137,
      "step": 1839660
    },
    {
      "epoch": 3.0106766690887192,
      "grad_norm": 0.2854553163051605,
      "learning_rate": 4.004862845357564e-06,
      "loss": 0.01,
      "step": 1839680
    },
    {
      "epoch": 3.0107093995273724,
      "grad_norm": 0.06542938202619553,
      "learning_rate": 4.004796953144047e-06,
      "loss": 0.0109,
      "step": 1839700
    },
    {
      "epoch": 3.010742129966026,
      "grad_norm": 0.04565593972802162,
      "learning_rate": 4.0047310609305305e-06,
      "loss": 0.0109,
      "step": 1839720
    },
    {
      "epoch": 3.010774860404679,
      "grad_norm": 0.30727794766426086,
      "learning_rate": 4.004665168717013e-06,
      "loss": 0.0151,
      "step": 1839740
    },
    {
      "epoch": 3.0108075908433323,
      "grad_norm": 0.12714354693889618,
      "learning_rate": 4.004599276503496e-06,
      "loss": 0.0064,
      "step": 1839760
    },
    {
      "epoch": 3.010840321281986,
      "grad_norm": 0.6916824579238892,
      "learning_rate": 4.004533384289979e-06,
      "loss": 0.0121,
      "step": 1839780
    },
    {
      "epoch": 3.010873051720639,
      "grad_norm": 0.7768074870109558,
      "learning_rate": 4.0044674920764614e-06,
      "loss": 0.0134,
      "step": 1839800
    },
    {
      "epoch": 3.0109057821592926,
      "grad_norm": 0.3114388585090637,
      "learning_rate": 4.004401599862944e-06,
      "loss": 0.016,
      "step": 1839820
    },
    {
      "epoch": 3.0109385125979458,
      "grad_norm": 0.13244803249835968,
      "learning_rate": 4.004335707649427e-06,
      "loss": 0.0107,
      "step": 1839840
    },
    {
      "epoch": 3.0109712430365994,
      "grad_norm": 0.3171120882034302,
      "learning_rate": 4.0042698154359105e-06,
      "loss": 0.0127,
      "step": 1839860
    },
    {
      "epoch": 3.0110039734752525,
      "grad_norm": 0.26171132922172546,
      "learning_rate": 4.004203923222393e-06,
      "loss": 0.0109,
      "step": 1839880
    },
    {
      "epoch": 3.0110367039139057,
      "grad_norm": 0.19674374163150787,
      "learning_rate": 4.004138031008876e-06,
      "loss": 0.0081,
      "step": 1839900
    },
    {
      "epoch": 3.0110694343525592,
      "grad_norm": 0.7665444612503052,
      "learning_rate": 4.004072138795359e-06,
      "loss": 0.0096,
      "step": 1839920
    },
    {
      "epoch": 3.0111021647912124,
      "grad_norm": 0.28807884454727173,
      "learning_rate": 4.004006246581842e-06,
      "loss": 0.0096,
      "step": 1839940
    },
    {
      "epoch": 3.011134895229866,
      "grad_norm": 0.1315879225730896,
      "learning_rate": 4.003940354368325e-06,
      "loss": 0.012,
      "step": 1839960
    },
    {
      "epoch": 3.011167625668519,
      "grad_norm": 0.2747613787651062,
      "learning_rate": 4.003874462154808e-06,
      "loss": 0.0111,
      "step": 1839980
    },
    {
      "epoch": 3.0112003561071727,
      "grad_norm": 0.3139532804489136,
      "learning_rate": 4.0038085699412905e-06,
      "loss": 0.0097,
      "step": 1840000
    },
    {
      "epoch": 3.011233086545826,
      "grad_norm": 0.31034529209136963,
      "learning_rate": 4.003742677727773e-06,
      "loss": 0.011,
      "step": 1840020
    },
    {
      "epoch": 3.011265816984479,
      "grad_norm": 0.4871232807636261,
      "learning_rate": 4.003676785514256e-06,
      "loss": 0.0089,
      "step": 1840040
    },
    {
      "epoch": 3.0112985474231326,
      "grad_norm": 0.6088114976882935,
      "learning_rate": 4.003610893300739e-06,
      "loss": 0.0108,
      "step": 1840060
    },
    {
      "epoch": 3.011331277861786,
      "grad_norm": 0.6122811436653137,
      "learning_rate": 4.0035450010872215e-06,
      "loss": 0.0089,
      "step": 1840080
    },
    {
      "epoch": 3.0113640083004394,
      "grad_norm": 0.3079156279563904,
      "learning_rate": 4.003479108873704e-06,
      "loss": 0.0099,
      "step": 1840100
    },
    {
      "epoch": 3.0113967387390925,
      "grad_norm": 0.05995211377739906,
      "learning_rate": 4.003413216660188e-06,
      "loss": 0.0066,
      "step": 1840120
    },
    {
      "epoch": 3.011429469177746,
      "grad_norm": 0.18253090977668762,
      "learning_rate": 4.0033473244466705e-06,
      "loss": 0.0124,
      "step": 1840140
    },
    {
      "epoch": 3.0114621996163993,
      "grad_norm": 0.4497947096824646,
      "learning_rate": 4.003281432233153e-06,
      "loss": 0.0159,
      "step": 1840160
    },
    {
      "epoch": 3.0114949300550524,
      "grad_norm": 0.3744290769100189,
      "learning_rate": 4.003215540019636e-06,
      "loss": 0.0088,
      "step": 1840180
    },
    {
      "epoch": 3.011527660493706,
      "grad_norm": 0.23012909293174744,
      "learning_rate": 4.003149647806119e-06,
      "loss": 0.0168,
      "step": 1840200
    },
    {
      "epoch": 3.011560390932359,
      "grad_norm": 0.25213807821273804,
      "learning_rate": 4.003083755592602e-06,
      "loss": 0.0075,
      "step": 1840220
    },
    {
      "epoch": 3.0115931213710128,
      "grad_norm": 0.3673990070819855,
      "learning_rate": 4.003017863379085e-06,
      "loss": 0.0087,
      "step": 1840240
    },
    {
      "epoch": 3.011625851809666,
      "grad_norm": 0.5007103085517883,
      "learning_rate": 4.002951971165568e-06,
      "loss": 0.0144,
      "step": 1840260
    },
    {
      "epoch": 3.0116585822483195,
      "grad_norm": 0.4425774812698364,
      "learning_rate": 4.0028860789520506e-06,
      "loss": 0.0114,
      "step": 1840280
    },
    {
      "epoch": 3.0116913126869727,
      "grad_norm": 0.08231952786445618,
      "learning_rate": 4.002820186738533e-06,
      "loss": 0.0079,
      "step": 1840300
    },
    {
      "epoch": 3.011724043125626,
      "grad_norm": 0.19339703023433685,
      "learning_rate": 4.002754294525016e-06,
      "loss": 0.0117,
      "step": 1840320
    },
    {
      "epoch": 3.0117567735642794,
      "grad_norm": 0.2738177180290222,
      "learning_rate": 4.0026884023115e-06,
      "loss": 0.0121,
      "step": 1840340
    },
    {
      "epoch": 3.0117895040029325,
      "grad_norm": 0.2700446844100952,
      "learning_rate": 4.002622510097982e-06,
      "loss": 0.0097,
      "step": 1840360
    },
    {
      "epoch": 3.011822234441586,
      "grad_norm": 0.5792842507362366,
      "learning_rate": 4.002556617884465e-06,
      "loss": 0.0148,
      "step": 1840380
    },
    {
      "epoch": 3.0118549648802393,
      "grad_norm": 0.15487104654312134,
      "learning_rate": 4.002490725670948e-06,
      "loss": 0.0126,
      "step": 1840400
    },
    {
      "epoch": 3.0118876953188924,
      "grad_norm": 0.6394785642623901,
      "learning_rate": 4.002424833457431e-06,
      "loss": 0.0097,
      "step": 1840420
    },
    {
      "epoch": 3.011920425757546,
      "grad_norm": 0.3530155420303345,
      "learning_rate": 4.002358941243913e-06,
      "loss": 0.0097,
      "step": 1840440
    },
    {
      "epoch": 3.011953156196199,
      "grad_norm": 0.3140771687030792,
      "learning_rate": 4.002293049030396e-06,
      "loss": 0.0123,
      "step": 1840460
    },
    {
      "epoch": 3.011985886634853,
      "grad_norm": 1.4112274646759033,
      "learning_rate": 4.002227156816879e-06,
      "loss": 0.014,
      "step": 1840480
    },
    {
      "epoch": 3.012018617073506,
      "grad_norm": 0.5573118329048157,
      "learning_rate": 4.0021612646033616e-06,
      "loss": 0.0116,
      "step": 1840500
    },
    {
      "epoch": 3.0120513475121595,
      "grad_norm": 0.2363700419664383,
      "learning_rate": 4.002095372389845e-06,
      "loss": 0.0116,
      "step": 1840520
    },
    {
      "epoch": 3.0120840779508127,
      "grad_norm": 0.24633412063121796,
      "learning_rate": 4.002029480176328e-06,
      "loss": 0.0118,
      "step": 1840540
    },
    {
      "epoch": 3.012116808389466,
      "grad_norm": 0.06397804617881775,
      "learning_rate": 4.001963587962811e-06,
      "loss": 0.0077,
      "step": 1840560
    },
    {
      "epoch": 3.0121495388281194,
      "grad_norm": 0.25585678219795227,
      "learning_rate": 4.001897695749294e-06,
      "loss": 0.0104,
      "step": 1840580
    },
    {
      "epoch": 3.0121822692667726,
      "grad_norm": 0.6993990540504456,
      "learning_rate": 4.001831803535777e-06,
      "loss": 0.0125,
      "step": 1840600
    },
    {
      "epoch": 3.012214999705426,
      "grad_norm": 0.31903180480003357,
      "learning_rate": 4.00176591132226e-06,
      "loss": 0.0113,
      "step": 1840620
    },
    {
      "epoch": 3.0122477301440793,
      "grad_norm": 0.6081601977348328,
      "learning_rate": 4.001700019108742e-06,
      "loss": 0.0101,
      "step": 1840640
    },
    {
      "epoch": 3.012280460582733,
      "grad_norm": 0.408180296421051,
      "learning_rate": 4.001634126895225e-06,
      "loss": 0.0088,
      "step": 1840660
    },
    {
      "epoch": 3.012313191021386,
      "grad_norm": 0.3182677924633026,
      "learning_rate": 4.001568234681708e-06,
      "loss": 0.0195,
      "step": 1840680
    },
    {
      "epoch": 3.012345921460039,
      "grad_norm": 0.10741358995437622,
      "learning_rate": 4.001502342468191e-06,
      "loss": 0.0149,
      "step": 1840700
    },
    {
      "epoch": 3.012378651898693,
      "grad_norm": 0.43300631642341614,
      "learning_rate": 4.001436450254673e-06,
      "loss": 0.0153,
      "step": 1840720
    },
    {
      "epoch": 3.012411382337346,
      "grad_norm": 0.3708956837654114,
      "learning_rate": 4.001370558041157e-06,
      "loss": 0.0136,
      "step": 1840740
    },
    {
      "epoch": 3.0124441127759995,
      "grad_norm": 0.249888613820076,
      "learning_rate": 4.00130466582764e-06,
      "loss": 0.0094,
      "step": 1840760
    },
    {
      "epoch": 3.0124768432146527,
      "grad_norm": 0.3483031392097473,
      "learning_rate": 4.0012387736141224e-06,
      "loss": 0.0082,
      "step": 1840780
    },
    {
      "epoch": 3.0125095736533063,
      "grad_norm": 0.18295209109783173,
      "learning_rate": 4.001172881400605e-06,
      "loss": 0.0171,
      "step": 1840800
    },
    {
      "epoch": 3.0125423040919594,
      "grad_norm": 0.48354265093803406,
      "learning_rate": 4.001106989187088e-06,
      "loss": 0.015,
      "step": 1840820
    },
    {
      "epoch": 3.0125750345306126,
      "grad_norm": 0.2097005397081375,
      "learning_rate": 4.001041096973571e-06,
      "loss": 0.0062,
      "step": 1840840
    },
    {
      "epoch": 3.012607764969266,
      "grad_norm": 0.11197054386138916,
      "learning_rate": 4.000975204760053e-06,
      "loss": 0.0118,
      "step": 1840860
    },
    {
      "epoch": 3.0126404954079193,
      "grad_norm": 0.3886941373348236,
      "learning_rate": 4.000909312546536e-06,
      "loss": 0.0138,
      "step": 1840880
    },
    {
      "epoch": 3.012673225846573,
      "grad_norm": 0.22328545153141022,
      "learning_rate": 4.00084342033302e-06,
      "loss": 0.0071,
      "step": 1840900
    },
    {
      "epoch": 3.012705956285226,
      "grad_norm": 0.2892340123653412,
      "learning_rate": 4.0007775281195025e-06,
      "loss": 0.0071,
      "step": 1840920
    },
    {
      "epoch": 3.0127386867238797,
      "grad_norm": 0.1854712963104248,
      "learning_rate": 4.000711635905985e-06,
      "loss": 0.0126,
      "step": 1840940
    },
    {
      "epoch": 3.012771417162533,
      "grad_norm": 0.1334255337715149,
      "learning_rate": 4.000645743692469e-06,
      "loss": 0.0089,
      "step": 1840960
    },
    {
      "epoch": 3.012804147601186,
      "grad_norm": 0.26954057812690735,
      "learning_rate": 4.0005798514789515e-06,
      "loss": 0.0136,
      "step": 1840980
    },
    {
      "epoch": 3.0128368780398396,
      "grad_norm": 0.4424997866153717,
      "learning_rate": 4.000513959265434e-06,
      "loss": 0.0127,
      "step": 1841000
    },
    {
      "epoch": 3.0128696084784927,
      "grad_norm": 0.18841125071048737,
      "learning_rate": 4.000448067051917e-06,
      "loss": 0.0127,
      "step": 1841020
    },
    {
      "epoch": 3.0129023389171463,
      "grad_norm": 0.5398075580596924,
      "learning_rate": 4.0003821748384e-06,
      "loss": 0.0107,
      "step": 1841040
    },
    {
      "epoch": 3.0129350693557995,
      "grad_norm": 0.15749399363994598,
      "learning_rate": 4.0003162826248825e-06,
      "loss": 0.0116,
      "step": 1841060
    },
    {
      "epoch": 3.012967799794453,
      "grad_norm": 0.07247688621282578,
      "learning_rate": 4.000250390411365e-06,
      "loss": 0.0107,
      "step": 1841080
    },
    {
      "epoch": 3.013000530233106,
      "grad_norm": 0.08985970914363861,
      "learning_rate": 4.000184498197848e-06,
      "loss": 0.0086,
      "step": 1841100
    },
    {
      "epoch": 3.0130332606717594,
      "grad_norm": 0.13373343646526337,
      "learning_rate": 4.000118605984331e-06,
      "loss": 0.0108,
      "step": 1841120
    },
    {
      "epoch": 3.013065991110413,
      "grad_norm": 0.38337433338165283,
      "learning_rate": 4.000052713770814e-06,
      "loss": 0.0115,
      "step": 1841140
    },
    {
      "epoch": 3.013098721549066,
      "grad_norm": 0.4110257029533386,
      "learning_rate": 3.999986821557297e-06,
      "loss": 0.0108,
      "step": 1841160
    },
    {
      "epoch": 3.0131314519877197,
      "grad_norm": 0.2231549769639969,
      "learning_rate": 3.99992092934378e-06,
      "loss": 0.0091,
      "step": 1841180
    },
    {
      "epoch": 3.013164182426373,
      "grad_norm": 0.3065418601036072,
      "learning_rate": 3.9998550371302625e-06,
      "loss": 0.0115,
      "step": 1841200
    },
    {
      "epoch": 3.013196912865026,
      "grad_norm": 0.16787290573120117,
      "learning_rate": 3.999789144916745e-06,
      "loss": 0.0113,
      "step": 1841220
    },
    {
      "epoch": 3.0132296433036796,
      "grad_norm": 0.07313922792673111,
      "learning_rate": 3.999723252703228e-06,
      "loss": 0.014,
      "step": 1841240
    },
    {
      "epoch": 3.0132623737423327,
      "grad_norm": 0.26560118794441223,
      "learning_rate": 3.999657360489711e-06,
      "loss": 0.0084,
      "step": 1841260
    },
    {
      "epoch": 3.0132951041809863,
      "grad_norm": 0.279677152633667,
      "learning_rate": 3.999591468276194e-06,
      "loss": 0.0147,
      "step": 1841280
    },
    {
      "epoch": 3.0133278346196395,
      "grad_norm": 0.4916031062602997,
      "learning_rate": 3.999525576062677e-06,
      "loss": 0.0114,
      "step": 1841300
    },
    {
      "epoch": 3.013360565058293,
      "grad_norm": 0.20313672721385956,
      "learning_rate": 3.99945968384916e-06,
      "loss": 0.0168,
      "step": 1841320
    },
    {
      "epoch": 3.0133932954969462,
      "grad_norm": 0.2722736895084381,
      "learning_rate": 3.9993937916356425e-06,
      "loss": 0.0157,
      "step": 1841340
    },
    {
      "epoch": 3.0134260259355994,
      "grad_norm": 0.2153972089290619,
      "learning_rate": 3.999327899422126e-06,
      "loss": 0.0074,
      "step": 1841360
    },
    {
      "epoch": 3.013458756374253,
      "grad_norm": 0.26631975173950195,
      "learning_rate": 3.999262007208609e-06,
      "loss": 0.0068,
      "step": 1841380
    },
    {
      "epoch": 3.013491486812906,
      "grad_norm": 0.14895443618297577,
      "learning_rate": 3.999196114995092e-06,
      "loss": 0.0135,
      "step": 1841400
    },
    {
      "epoch": 3.0135242172515597,
      "grad_norm": 0.41284433007240295,
      "learning_rate": 3.999130222781574e-06,
      "loss": 0.023,
      "step": 1841420
    },
    {
      "epoch": 3.013556947690213,
      "grad_norm": 0.26552489399909973,
      "learning_rate": 3.999064330568057e-06,
      "loss": 0.0092,
      "step": 1841440
    },
    {
      "epoch": 3.0135896781288665,
      "grad_norm": 0.6113641262054443,
      "learning_rate": 3.99899843835454e-06,
      "loss": 0.0116,
      "step": 1841460
    },
    {
      "epoch": 3.0136224085675196,
      "grad_norm": 0.3099983334541321,
      "learning_rate": 3.9989325461410226e-06,
      "loss": 0.0171,
      "step": 1841480
    },
    {
      "epoch": 3.0136551390061728,
      "grad_norm": 1.2758054733276367,
      "learning_rate": 3.998866653927505e-06,
      "loss": 0.0112,
      "step": 1841500
    },
    {
      "epoch": 3.0136878694448264,
      "grad_norm": 0.7142248749732971,
      "learning_rate": 3.998800761713988e-06,
      "loss": 0.0141,
      "step": 1841520
    },
    {
      "epoch": 3.0137205998834795,
      "grad_norm": 0.10916801542043686,
      "learning_rate": 3.998734869500472e-06,
      "loss": 0.0075,
      "step": 1841540
    },
    {
      "epoch": 3.013753330322133,
      "grad_norm": 0.05667924880981445,
      "learning_rate": 3.998668977286954e-06,
      "loss": 0.0097,
      "step": 1841560
    },
    {
      "epoch": 3.0137860607607863,
      "grad_norm": 0.5944315195083618,
      "learning_rate": 3.998603085073437e-06,
      "loss": 0.0083,
      "step": 1841580
    },
    {
      "epoch": 3.01381879119944,
      "grad_norm": 0.25993412733078003,
      "learning_rate": 3.99853719285992e-06,
      "loss": 0.0127,
      "step": 1841600
    },
    {
      "epoch": 3.013851521638093,
      "grad_norm": 0.5276423692703247,
      "learning_rate": 3.9984713006464034e-06,
      "loss": 0.0135,
      "step": 1841620
    },
    {
      "epoch": 3.013884252076746,
      "grad_norm": 0.4335145652294159,
      "learning_rate": 3.998405408432886e-06,
      "loss": 0.016,
      "step": 1841640
    },
    {
      "epoch": 3.0139169825153997,
      "grad_norm": 0.0991547703742981,
      "learning_rate": 3.998339516219369e-06,
      "loss": 0.013,
      "step": 1841660
    },
    {
      "epoch": 3.013949712954053,
      "grad_norm": 0.3881610929965973,
      "learning_rate": 3.998273624005852e-06,
      "loss": 0.0087,
      "step": 1841680
    },
    {
      "epoch": 3.0139824433927065,
      "grad_norm": 0.30785152316093445,
      "learning_rate": 3.998207731792334e-06,
      "loss": 0.0151,
      "step": 1841700
    },
    {
      "epoch": 3.0140151738313596,
      "grad_norm": 0.2085602879524231,
      "learning_rate": 3.998141839578817e-06,
      "loss": 0.0165,
      "step": 1841720
    },
    {
      "epoch": 3.0140479042700132,
      "grad_norm": 0.22133125364780426,
      "learning_rate": 3.9980759473653e-06,
      "loss": 0.0071,
      "step": 1841740
    },
    {
      "epoch": 3.0140806347086664,
      "grad_norm": 0.6026594042778015,
      "learning_rate": 3.9980100551517835e-06,
      "loss": 0.0153,
      "step": 1841760
    },
    {
      "epoch": 3.0141133651473195,
      "grad_norm": 0.41969409584999084,
      "learning_rate": 3.997944162938266e-06,
      "loss": 0.0113,
      "step": 1841780
    },
    {
      "epoch": 3.014146095585973,
      "grad_norm": 0.426209419965744,
      "learning_rate": 3.997878270724749e-06,
      "loss": 0.0145,
      "step": 1841800
    },
    {
      "epoch": 3.0141788260246263,
      "grad_norm": 0.28658434748649597,
      "learning_rate": 3.997812378511232e-06,
      "loss": 0.0071,
      "step": 1841820
    },
    {
      "epoch": 3.01421155646328,
      "grad_norm": 0.19200091063976288,
      "learning_rate": 3.9977464862977144e-06,
      "loss": 0.0132,
      "step": 1841840
    },
    {
      "epoch": 3.014244286901933,
      "grad_norm": 0.5177412033081055,
      "learning_rate": 3.997680594084197e-06,
      "loss": 0.0115,
      "step": 1841860
    },
    {
      "epoch": 3.0142770173405866,
      "grad_norm": 1.9200705289840698,
      "learning_rate": 3.99761470187068e-06,
      "loss": 0.0161,
      "step": 1841880
    },
    {
      "epoch": 3.0143097477792398,
      "grad_norm": 0.2710264325141907,
      "learning_rate": 3.997548809657163e-06,
      "loss": 0.0093,
      "step": 1841900
    },
    {
      "epoch": 3.014342478217893,
      "grad_norm": 0.34598565101623535,
      "learning_rate": 3.997482917443645e-06,
      "loss": 0.0103,
      "step": 1841920
    },
    {
      "epoch": 3.0143752086565465,
      "grad_norm": 0.2100706249475479,
      "learning_rate": 3.997417025230129e-06,
      "loss": 0.0081,
      "step": 1841940
    },
    {
      "epoch": 3.0144079390951997,
      "grad_norm": 0.2656291425228119,
      "learning_rate": 3.997351133016612e-06,
      "loss": 0.0104,
      "step": 1841960
    },
    {
      "epoch": 3.0144406695338533,
      "grad_norm": 0.07536904513835907,
      "learning_rate": 3.9972852408030945e-06,
      "loss": 0.0129,
      "step": 1841980
    },
    {
      "epoch": 3.0144733999725064,
      "grad_norm": 0.25014588236808777,
      "learning_rate": 3.997219348589578e-06,
      "loss": 0.011,
      "step": 1842000
    },
    {
      "epoch": 3.0145061304111596,
      "grad_norm": 0.44064095616340637,
      "learning_rate": 3.997153456376061e-06,
      "loss": 0.0097,
      "step": 1842020
    },
    {
      "epoch": 3.014538860849813,
      "grad_norm": 0.1639215648174286,
      "learning_rate": 3.9970875641625435e-06,
      "loss": 0.0094,
      "step": 1842040
    },
    {
      "epoch": 3.0145715912884663,
      "grad_norm": 0.3549768030643463,
      "learning_rate": 3.997021671949026e-06,
      "loss": 0.01,
      "step": 1842060
    },
    {
      "epoch": 3.01460432172712,
      "grad_norm": 0.24262593686580658,
      "learning_rate": 3.996955779735509e-06,
      "loss": 0.0103,
      "step": 1842080
    },
    {
      "epoch": 3.014637052165773,
      "grad_norm": 0.31322914361953735,
      "learning_rate": 3.996889887521992e-06,
      "loss": 0.0085,
      "step": 1842100
    },
    {
      "epoch": 3.0146697826044266,
      "grad_norm": 0.2482220083475113,
      "learning_rate": 3.9968239953084745e-06,
      "loss": 0.0102,
      "step": 1842120
    },
    {
      "epoch": 3.01470251304308,
      "grad_norm": 0.5700355172157288,
      "learning_rate": 3.996758103094957e-06,
      "loss": 0.0131,
      "step": 1842140
    },
    {
      "epoch": 3.014735243481733,
      "grad_norm": 0.5225533843040466,
      "learning_rate": 3.996692210881441e-06,
      "loss": 0.0147,
      "step": 1842160
    },
    {
      "epoch": 3.0147679739203865,
      "grad_norm": 0.31440427899360657,
      "learning_rate": 3.9966263186679235e-06,
      "loss": 0.0136,
      "step": 1842180
    },
    {
      "epoch": 3.0148007043590397,
      "grad_norm": 0.18654070794582367,
      "learning_rate": 3.996560426454406e-06,
      "loss": 0.01,
      "step": 1842200
    },
    {
      "epoch": 3.0148334347976933,
      "grad_norm": 0.716349720954895,
      "learning_rate": 3.996494534240889e-06,
      "loss": 0.0109,
      "step": 1842220
    },
    {
      "epoch": 3.0148661652363464,
      "grad_norm": 0.07319584488868713,
      "learning_rate": 3.996428642027372e-06,
      "loss": 0.0132,
      "step": 1842240
    },
    {
      "epoch": 3.014898895675,
      "grad_norm": 0.44397929310798645,
      "learning_rate": 3.9963627498138545e-06,
      "loss": 0.0083,
      "step": 1842260
    },
    {
      "epoch": 3.014931626113653,
      "grad_norm": 0.8475536108016968,
      "learning_rate": 3.996296857600337e-06,
      "loss": 0.0133,
      "step": 1842280
    },
    {
      "epoch": 3.0149643565523063,
      "grad_norm": 0.4549793601036072,
      "learning_rate": 3.99623096538682e-06,
      "loss": 0.0064,
      "step": 1842300
    },
    {
      "epoch": 3.01499708699096,
      "grad_norm": 0.42841827869415283,
      "learning_rate": 3.9961650731733036e-06,
      "loss": 0.0123,
      "step": 1842320
    },
    {
      "epoch": 3.015029817429613,
      "grad_norm": 0.42578017711639404,
      "learning_rate": 3.996099180959786e-06,
      "loss": 0.0145,
      "step": 1842340
    },
    {
      "epoch": 3.0150625478682667,
      "grad_norm": 0.29485008120536804,
      "learning_rate": 3.996033288746269e-06,
      "loss": 0.0124,
      "step": 1842360
    },
    {
      "epoch": 3.01509527830692,
      "grad_norm": 0.06412310898303986,
      "learning_rate": 3.995967396532753e-06,
      "loss": 0.0123,
      "step": 1842380
    },
    {
      "epoch": 3.0151280087455734,
      "grad_norm": 0.1831493377685547,
      "learning_rate": 3.995901504319235e-06,
      "loss": 0.0117,
      "step": 1842400
    },
    {
      "epoch": 3.0151607391842266,
      "grad_norm": 0.46027472615242004,
      "learning_rate": 3.995835612105718e-06,
      "loss": 0.0118,
      "step": 1842420
    },
    {
      "epoch": 3.0151934696228797,
      "grad_norm": 0.4353799819946289,
      "learning_rate": 3.995769719892201e-06,
      "loss": 0.0125,
      "step": 1842440
    },
    {
      "epoch": 3.0152262000615333,
      "grad_norm": 0.11178658902645111,
      "learning_rate": 3.995703827678684e-06,
      "loss": 0.0124,
      "step": 1842460
    },
    {
      "epoch": 3.0152589305001865,
      "grad_norm": 0.07561355829238892,
      "learning_rate": 3.995637935465166e-06,
      "loss": 0.0093,
      "step": 1842480
    },
    {
      "epoch": 3.01529166093884,
      "grad_norm": 0.3319075405597687,
      "learning_rate": 3.995572043251649e-06,
      "loss": 0.0063,
      "step": 1842500
    },
    {
      "epoch": 3.015324391377493,
      "grad_norm": 0.29603540897369385,
      "learning_rate": 3.995506151038132e-06,
      "loss": 0.0113,
      "step": 1842520
    },
    {
      "epoch": 3.015357121816147,
      "grad_norm": 0.2982846796512604,
      "learning_rate": 3.9954402588246146e-06,
      "loss": 0.0106,
      "step": 1842540
    },
    {
      "epoch": 3.0153898522548,
      "grad_norm": 0.3514340817928314,
      "learning_rate": 3.995374366611098e-06,
      "loss": 0.0129,
      "step": 1842560
    },
    {
      "epoch": 3.015422582693453,
      "grad_norm": 0.3017507791519165,
      "learning_rate": 3.995308474397581e-06,
      "loss": 0.013,
      "step": 1842580
    },
    {
      "epoch": 3.0154553131321067,
      "grad_norm": 0.4062849283218384,
      "learning_rate": 3.995242582184064e-06,
      "loss": 0.0155,
      "step": 1842600
    },
    {
      "epoch": 3.01548804357076,
      "grad_norm": 0.2764860987663269,
      "learning_rate": 3.995176689970546e-06,
      "loss": 0.0093,
      "step": 1842620
    },
    {
      "epoch": 3.0155207740094134,
      "grad_norm": 0.3766271471977234,
      "learning_rate": 3.995110797757029e-06,
      "loss": 0.0087,
      "step": 1842640
    },
    {
      "epoch": 3.0155535044480666,
      "grad_norm": 0.5607405304908752,
      "learning_rate": 3.995044905543512e-06,
      "loss": 0.0205,
      "step": 1842660
    },
    {
      "epoch": 3.0155862348867197,
      "grad_norm": 0.21258017420768738,
      "learning_rate": 3.9949790133299954e-06,
      "loss": 0.0122,
      "step": 1842680
    },
    {
      "epoch": 3.0156189653253733,
      "grad_norm": 0.6436302661895752,
      "learning_rate": 3.994913121116478e-06,
      "loss": 0.0119,
      "step": 1842700
    },
    {
      "epoch": 3.0156516957640265,
      "grad_norm": 0.3184891939163208,
      "learning_rate": 3.994847228902961e-06,
      "loss": 0.017,
      "step": 1842720
    },
    {
      "epoch": 3.01568442620268,
      "grad_norm": 0.6318677067756653,
      "learning_rate": 3.994781336689444e-06,
      "loss": 0.01,
      "step": 1842740
    },
    {
      "epoch": 3.015717156641333,
      "grad_norm": 0.47143447399139404,
      "learning_rate": 3.994715444475926e-06,
      "loss": 0.0101,
      "step": 1842760
    },
    {
      "epoch": 3.015749887079987,
      "grad_norm": 0.2770083248615265,
      "learning_rate": 3.99464955226241e-06,
      "loss": 0.0123,
      "step": 1842780
    },
    {
      "epoch": 3.01578261751864,
      "grad_norm": 0.11196688562631607,
      "learning_rate": 3.994583660048893e-06,
      "loss": 0.012,
      "step": 1842800
    },
    {
      "epoch": 3.015815347957293,
      "grad_norm": 0.24489697813987732,
      "learning_rate": 3.9945177678353755e-06,
      "loss": 0.0111,
      "step": 1842820
    },
    {
      "epoch": 3.0158480783959467,
      "grad_norm": 0.4503069818019867,
      "learning_rate": 3.994451875621858e-06,
      "loss": 0.0083,
      "step": 1842840
    },
    {
      "epoch": 3.0158808088346,
      "grad_norm": 0.12173318862915039,
      "learning_rate": 3.994385983408341e-06,
      "loss": 0.0094,
      "step": 1842860
    },
    {
      "epoch": 3.0159135392732535,
      "grad_norm": 0.36059483885765076,
      "learning_rate": 3.994320091194824e-06,
      "loss": 0.0167,
      "step": 1842880
    },
    {
      "epoch": 3.0159462697119066,
      "grad_norm": 0.07686857879161835,
      "learning_rate": 3.994254198981306e-06,
      "loss": 0.0088,
      "step": 1842900
    },
    {
      "epoch": 3.01597900015056,
      "grad_norm": 0.09016099572181702,
      "learning_rate": 3.994188306767789e-06,
      "loss": 0.0099,
      "step": 1842920
    },
    {
      "epoch": 3.0160117305892133,
      "grad_norm": 0.5089447498321533,
      "learning_rate": 3.994122414554272e-06,
      "loss": 0.0093,
      "step": 1842940
    },
    {
      "epoch": 3.0160444610278665,
      "grad_norm": 0.25281283259391785,
      "learning_rate": 3.9940565223407555e-06,
      "loss": 0.0118,
      "step": 1842960
    },
    {
      "epoch": 3.01607719146652,
      "grad_norm": 0.1934225708246231,
      "learning_rate": 3.993990630127238e-06,
      "loss": 0.0136,
      "step": 1842980
    },
    {
      "epoch": 3.0161099219051732,
      "grad_norm": 0.21273213624954224,
      "learning_rate": 3.993924737913721e-06,
      "loss": 0.0117,
      "step": 1843000
    },
    {
      "epoch": 3.016142652343827,
      "grad_norm": 0.19303521513938904,
      "learning_rate": 3.993858845700204e-06,
      "loss": 0.0069,
      "step": 1843020
    },
    {
      "epoch": 3.01617538278248,
      "grad_norm": 0.12150730937719345,
      "learning_rate": 3.993792953486687e-06,
      "loss": 0.0174,
      "step": 1843040
    },
    {
      "epoch": 3.0162081132211336,
      "grad_norm": 0.08426463603973389,
      "learning_rate": 3.99372706127317e-06,
      "loss": 0.0106,
      "step": 1843060
    },
    {
      "epoch": 3.0162408436597867,
      "grad_norm": 0.18236742913722992,
      "learning_rate": 3.993661169059653e-06,
      "loss": 0.0119,
      "step": 1843080
    },
    {
      "epoch": 3.01627357409844,
      "grad_norm": 0.24687078595161438,
      "learning_rate": 3.9935952768461355e-06,
      "loss": 0.0093,
      "step": 1843100
    },
    {
      "epoch": 3.0163063045370935,
      "grad_norm": 0.7826604247093201,
      "learning_rate": 3.993529384632618e-06,
      "loss": 0.0091,
      "step": 1843120
    },
    {
      "epoch": 3.0163390349757466,
      "grad_norm": 0.3266407549381256,
      "learning_rate": 3.993463492419101e-06,
      "loss": 0.0088,
      "step": 1843140
    },
    {
      "epoch": 3.0163717654144,
      "grad_norm": 0.35462501645088196,
      "learning_rate": 3.993397600205584e-06,
      "loss": 0.0104,
      "step": 1843160
    },
    {
      "epoch": 3.0164044958530534,
      "grad_norm": 0.5791781544685364,
      "learning_rate": 3.993331707992067e-06,
      "loss": 0.0143,
      "step": 1843180
    },
    {
      "epoch": 3.016437226291707,
      "grad_norm": 0.3172231912612915,
      "learning_rate": 3.99326581577855e-06,
      "loss": 0.0154,
      "step": 1843200
    },
    {
      "epoch": 3.01646995673036,
      "grad_norm": 0.4826712906360626,
      "learning_rate": 3.993199923565033e-06,
      "loss": 0.0089,
      "step": 1843220
    },
    {
      "epoch": 3.0165026871690133,
      "grad_norm": 0.12170245498418808,
      "learning_rate": 3.9931340313515155e-06,
      "loss": 0.0126,
      "step": 1843240
    },
    {
      "epoch": 3.016535417607667,
      "grad_norm": 0.43508180975914,
      "learning_rate": 3.993068139137998e-06,
      "loss": 0.0165,
      "step": 1843260
    },
    {
      "epoch": 3.01656814804632,
      "grad_norm": 0.42904922366142273,
      "learning_rate": 3.993002246924481e-06,
      "loss": 0.0114,
      "step": 1843280
    },
    {
      "epoch": 3.0166008784849736,
      "grad_norm": 0.25253981351852417,
      "learning_rate": 3.992936354710964e-06,
      "loss": 0.0101,
      "step": 1843300
    },
    {
      "epoch": 3.0166336089236268,
      "grad_norm": 0.3496265113353729,
      "learning_rate": 3.9928704624974465e-06,
      "loss": 0.0082,
      "step": 1843320
    },
    {
      "epoch": 3.0166663393622803,
      "grad_norm": 0.08270792663097382,
      "learning_rate": 3.992804570283929e-06,
      "loss": 0.0154,
      "step": 1843340
    },
    {
      "epoch": 3.0166990698009335,
      "grad_norm": 0.3241647183895111,
      "learning_rate": 3.992738678070413e-06,
      "loss": 0.0116,
      "step": 1843360
    },
    {
      "epoch": 3.0167318002395866,
      "grad_norm": 0.3106011748313904,
      "learning_rate": 3.9926727858568956e-06,
      "loss": 0.0095,
      "step": 1843380
    },
    {
      "epoch": 3.0167645306782402,
      "grad_norm": 0.20248644053936005,
      "learning_rate": 3.992606893643379e-06,
      "loss": 0.006,
      "step": 1843400
    },
    {
      "epoch": 3.0167972611168934,
      "grad_norm": 0.3089991509914398,
      "learning_rate": 3.992541001429862e-06,
      "loss": 0.0119,
      "step": 1843420
    },
    {
      "epoch": 3.016829991555547,
      "grad_norm": 0.2812332212924957,
      "learning_rate": 3.992475109216345e-06,
      "loss": 0.0112,
      "step": 1843440
    },
    {
      "epoch": 3.0168627219942,
      "grad_norm": 0.9195430874824524,
      "learning_rate": 3.992409217002827e-06,
      "loss": 0.021,
      "step": 1843460
    },
    {
      "epoch": 3.0168954524328533,
      "grad_norm": 0.5623316168785095,
      "learning_rate": 3.99234332478931e-06,
      "loss": 0.0092,
      "step": 1843480
    },
    {
      "epoch": 3.016928182871507,
      "grad_norm": 0.2394063025712967,
      "learning_rate": 3.992277432575793e-06,
      "loss": 0.0102,
      "step": 1843500
    },
    {
      "epoch": 3.01696091331016,
      "grad_norm": 0.18271023035049438,
      "learning_rate": 3.992211540362276e-06,
      "loss": 0.0092,
      "step": 1843520
    },
    {
      "epoch": 3.0169936437488136,
      "grad_norm": 0.17994868755340576,
      "learning_rate": 3.992145648148758e-06,
      "loss": 0.0115,
      "step": 1843540
    },
    {
      "epoch": 3.0170263741874668,
      "grad_norm": 0.15556202828884125,
      "learning_rate": 3.992079755935241e-06,
      "loss": 0.0097,
      "step": 1843560
    },
    {
      "epoch": 3.0170591046261204,
      "grad_norm": 0.08941637724637985,
      "learning_rate": 3.992013863721725e-06,
      "loss": 0.0102,
      "step": 1843580
    },
    {
      "epoch": 3.0170918350647735,
      "grad_norm": 0.6010528802871704,
      "learning_rate": 3.991947971508207e-06,
      "loss": 0.0087,
      "step": 1843600
    },
    {
      "epoch": 3.0171245655034267,
      "grad_norm": 0.14098650217056274,
      "learning_rate": 3.99188207929469e-06,
      "loss": 0.0092,
      "step": 1843620
    },
    {
      "epoch": 3.0171572959420803,
      "grad_norm": 0.4689249098300934,
      "learning_rate": 3.991816187081173e-06,
      "loss": 0.0079,
      "step": 1843640
    },
    {
      "epoch": 3.0171900263807334,
      "grad_norm": 0.21553805470466614,
      "learning_rate": 3.991750294867656e-06,
      "loss": 0.0138,
      "step": 1843660
    },
    {
      "epoch": 3.017222756819387,
      "grad_norm": 0.10974158346652985,
      "learning_rate": 3.991684402654138e-06,
      "loss": 0.0132,
      "step": 1843680
    },
    {
      "epoch": 3.01725548725804,
      "grad_norm": 0.49299898743629456,
      "learning_rate": 3.991618510440621e-06,
      "loss": 0.0162,
      "step": 1843700
    },
    {
      "epoch": 3.0172882176966938,
      "grad_norm": 0.722361147403717,
      "learning_rate": 3.991552618227104e-06,
      "loss": 0.0112,
      "step": 1843720
    },
    {
      "epoch": 3.017320948135347,
      "grad_norm": 0.24910415709018707,
      "learning_rate": 3.991486726013587e-06,
      "loss": 0.0105,
      "step": 1843740
    },
    {
      "epoch": 3.017353678574,
      "grad_norm": 0.17871147394180298,
      "learning_rate": 3.99142083380007e-06,
      "loss": 0.0114,
      "step": 1843760
    },
    {
      "epoch": 3.0173864090126536,
      "grad_norm": 0.37422096729278564,
      "learning_rate": 3.991354941586553e-06,
      "loss": 0.0159,
      "step": 1843780
    },
    {
      "epoch": 3.017419139451307,
      "grad_norm": 0.16095107793807983,
      "learning_rate": 3.9912890493730365e-06,
      "loss": 0.0106,
      "step": 1843800
    },
    {
      "epoch": 3.0174518698899604,
      "grad_norm": 0.4763820171356201,
      "learning_rate": 3.991223157159519e-06,
      "loss": 0.0101,
      "step": 1843820
    },
    {
      "epoch": 3.0174846003286135,
      "grad_norm": 0.141134575009346,
      "learning_rate": 3.991157264946002e-06,
      "loss": 0.0114,
      "step": 1843840
    },
    {
      "epoch": 3.017517330767267,
      "grad_norm": 0.1728385090827942,
      "learning_rate": 3.991091372732485e-06,
      "loss": 0.0149,
      "step": 1843860
    },
    {
      "epoch": 3.0175500612059203,
      "grad_norm": 0.4081304669380188,
      "learning_rate": 3.9910254805189674e-06,
      "loss": 0.0132,
      "step": 1843880
    },
    {
      "epoch": 3.0175827916445734,
      "grad_norm": 0.544973611831665,
      "learning_rate": 3.99095958830545e-06,
      "loss": 0.0122,
      "step": 1843900
    },
    {
      "epoch": 3.017615522083227,
      "grad_norm": 0.16397657990455627,
      "learning_rate": 3.990893696091933e-06,
      "loss": 0.009,
      "step": 1843920
    },
    {
      "epoch": 3.01764825252188,
      "grad_norm": 0.08529841154813766,
      "learning_rate": 3.990827803878416e-06,
      "loss": 0.0125,
      "step": 1843940
    },
    {
      "epoch": 3.0176809829605338,
      "grad_norm": 0.4305875301361084,
      "learning_rate": 3.990761911664898e-06,
      "loss": 0.0173,
      "step": 1843960
    },
    {
      "epoch": 3.017713713399187,
      "grad_norm": 0.19967295229434967,
      "learning_rate": 3.990696019451382e-06,
      "loss": 0.011,
      "step": 1843980
    },
    {
      "epoch": 3.0177464438378405,
      "grad_norm": 0.24895665049552917,
      "learning_rate": 3.990630127237865e-06,
      "loss": 0.0109,
      "step": 1844000
    },
    {
      "epoch": 3.0177791742764937,
      "grad_norm": 0.13365326821804047,
      "learning_rate": 3.9905642350243475e-06,
      "loss": 0.0108,
      "step": 1844020
    },
    {
      "epoch": 3.017811904715147,
      "grad_norm": 0.2572520673274994,
      "learning_rate": 3.99049834281083e-06,
      "loss": 0.0058,
      "step": 1844040
    },
    {
      "epoch": 3.0178446351538004,
      "grad_norm": 0.2622697949409485,
      "learning_rate": 3.990432450597313e-06,
      "loss": 0.0164,
      "step": 1844060
    },
    {
      "epoch": 3.0178773655924536,
      "grad_norm": 0.09537705779075623,
      "learning_rate": 3.9903665583837965e-06,
      "loss": 0.0052,
      "step": 1844080
    },
    {
      "epoch": 3.017910096031107,
      "grad_norm": 0.21251140534877777,
      "learning_rate": 3.990300666170279e-06,
      "loss": 0.0124,
      "step": 1844100
    },
    {
      "epoch": 3.0179428264697603,
      "grad_norm": 0.21202076971530914,
      "learning_rate": 3.990234773956762e-06,
      "loss": 0.0065,
      "step": 1844120
    },
    {
      "epoch": 3.017975556908414,
      "grad_norm": 0.402898371219635,
      "learning_rate": 3.990168881743245e-06,
      "loss": 0.0081,
      "step": 1844140
    },
    {
      "epoch": 3.018008287347067,
      "grad_norm": 0.9531287550926208,
      "learning_rate": 3.9901029895297275e-06,
      "loss": 0.0125,
      "step": 1844160
    },
    {
      "epoch": 3.01804101778572,
      "grad_norm": 0.3812873661518097,
      "learning_rate": 3.99003709731621e-06,
      "loss": 0.0103,
      "step": 1844180
    },
    {
      "epoch": 3.018073748224374,
      "grad_norm": 0.03480134531855583,
      "learning_rate": 3.989971205102694e-06,
      "loss": 0.009,
      "step": 1844200
    },
    {
      "epoch": 3.018106478663027,
      "grad_norm": 0.20558492839336395,
      "learning_rate": 3.9899053128891766e-06,
      "loss": 0.0069,
      "step": 1844220
    },
    {
      "epoch": 3.0181392091016805,
      "grad_norm": 0.2750154733657837,
      "learning_rate": 3.989839420675659e-06,
      "loss": 0.0103,
      "step": 1844240
    },
    {
      "epoch": 3.0181719395403337,
      "grad_norm": 0.33689233660697937,
      "learning_rate": 3.989773528462142e-06,
      "loss": 0.012,
      "step": 1844260
    },
    {
      "epoch": 3.018204669978987,
      "grad_norm": 0.2710922658443451,
      "learning_rate": 3.989707636248625e-06,
      "loss": 0.0133,
      "step": 1844280
    },
    {
      "epoch": 3.0182374004176404,
      "grad_norm": 0.2912832498550415,
      "learning_rate": 3.9896417440351075e-06,
      "loss": 0.0117,
      "step": 1844300
    },
    {
      "epoch": 3.0182701308562936,
      "grad_norm": 0.6483832001686096,
      "learning_rate": 3.98957585182159e-06,
      "loss": 0.012,
      "step": 1844320
    },
    {
      "epoch": 3.018302861294947,
      "grad_norm": 0.18169493973255157,
      "learning_rate": 3.989509959608073e-06,
      "loss": 0.0134,
      "step": 1844340
    },
    {
      "epoch": 3.0183355917336003,
      "grad_norm": 0.5275125503540039,
      "learning_rate": 3.989444067394556e-06,
      "loss": 0.0125,
      "step": 1844360
    },
    {
      "epoch": 3.018368322172254,
      "grad_norm": 0.08194680511951447,
      "learning_rate": 3.989378175181039e-06,
      "loss": 0.019,
      "step": 1844380
    },
    {
      "epoch": 3.018401052610907,
      "grad_norm": 0.5374543070793152,
      "learning_rate": 3.989312282967522e-06,
      "loss": 0.0115,
      "step": 1844400
    },
    {
      "epoch": 3.0184337830495602,
      "grad_norm": 0.2918698787689209,
      "learning_rate": 3.989246390754005e-06,
      "loss": 0.0109,
      "step": 1844420
    },
    {
      "epoch": 3.018466513488214,
      "grad_norm": 0.19069954752922058,
      "learning_rate": 3.989180498540488e-06,
      "loss": 0.0092,
      "step": 1844440
    },
    {
      "epoch": 3.018499243926867,
      "grad_norm": 0.23780259490013123,
      "learning_rate": 3.989114606326971e-06,
      "loss": 0.0126,
      "step": 1844460
    },
    {
      "epoch": 3.0185319743655206,
      "grad_norm": 0.14150866866111755,
      "learning_rate": 3.989048714113454e-06,
      "loss": 0.0131,
      "step": 1844480
    },
    {
      "epoch": 3.0185647048041737,
      "grad_norm": 0.21351492404937744,
      "learning_rate": 3.988982821899937e-06,
      "loss": 0.011,
      "step": 1844500
    },
    {
      "epoch": 3.0185974352428273,
      "grad_norm": 0.2839854061603546,
      "learning_rate": 3.988916929686419e-06,
      "loss": 0.0173,
      "step": 1844520
    },
    {
      "epoch": 3.0186301656814805,
      "grad_norm": 0.09769173711538315,
      "learning_rate": 3.988851037472902e-06,
      "loss": 0.0135,
      "step": 1844540
    },
    {
      "epoch": 3.0186628961201336,
      "grad_norm": 0.18900150060653687,
      "learning_rate": 3.988785145259385e-06,
      "loss": 0.0116,
      "step": 1844560
    },
    {
      "epoch": 3.018695626558787,
      "grad_norm": 0.128711998462677,
      "learning_rate": 3.9887192530458676e-06,
      "loss": 0.0114,
      "step": 1844580
    },
    {
      "epoch": 3.0187283569974404,
      "grad_norm": 0.3855845630168915,
      "learning_rate": 3.988653360832351e-06,
      "loss": 0.0137,
      "step": 1844600
    },
    {
      "epoch": 3.018761087436094,
      "grad_norm": 0.6100257039070129,
      "learning_rate": 3.988587468618834e-06,
      "loss": 0.0145,
      "step": 1844620
    },
    {
      "epoch": 3.018793817874747,
      "grad_norm": 0.3334537744522095,
      "learning_rate": 3.988521576405317e-06,
      "loss": 0.0111,
      "step": 1844640
    },
    {
      "epoch": 3.0188265483134007,
      "grad_norm": 0.3098321855068207,
      "learning_rate": 3.988455684191799e-06,
      "loss": 0.0131,
      "step": 1844660
    },
    {
      "epoch": 3.018859278752054,
      "grad_norm": 0.15414734184741974,
      "learning_rate": 3.988389791978282e-06,
      "loss": 0.0096,
      "step": 1844680
    },
    {
      "epoch": 3.018892009190707,
      "grad_norm": 0.2684248387813568,
      "learning_rate": 3.988323899764765e-06,
      "loss": 0.0078,
      "step": 1844700
    },
    {
      "epoch": 3.0189247396293606,
      "grad_norm": 0.7977919578552246,
      "learning_rate": 3.988258007551248e-06,
      "loss": 0.0199,
      "step": 1844720
    },
    {
      "epoch": 3.0189574700680137,
      "grad_norm": 0.20193345844745636,
      "learning_rate": 3.98819211533773e-06,
      "loss": 0.0093,
      "step": 1844740
    },
    {
      "epoch": 3.0189902005066673,
      "grad_norm": 0.5883517861366272,
      "learning_rate": 3.988126223124213e-06,
      "loss": 0.0146,
      "step": 1844760
    },
    {
      "epoch": 3.0190229309453205,
      "grad_norm": 0.07355936616659164,
      "learning_rate": 3.988060330910697e-06,
      "loss": 0.0105,
      "step": 1844780
    },
    {
      "epoch": 3.019055661383974,
      "grad_norm": 0.49027109146118164,
      "learning_rate": 3.987994438697179e-06,
      "loss": 0.0138,
      "step": 1844800
    },
    {
      "epoch": 3.0190883918226272,
      "grad_norm": 0.5363237261772156,
      "learning_rate": 3.987928546483663e-06,
      "loss": 0.0149,
      "step": 1844820
    },
    {
      "epoch": 3.0191211222612804,
      "grad_norm": 0.3341999351978302,
      "learning_rate": 3.987862654270146e-06,
      "loss": 0.0116,
      "step": 1844840
    },
    {
      "epoch": 3.019153852699934,
      "grad_norm": 0.08303746581077576,
      "learning_rate": 3.9877967620566285e-06,
      "loss": 0.013,
      "step": 1844860
    },
    {
      "epoch": 3.019186583138587,
      "grad_norm": 0.2440686970949173,
      "learning_rate": 3.987730869843111e-06,
      "loss": 0.0089,
      "step": 1844880
    },
    {
      "epoch": 3.0192193135772407,
      "grad_norm": 0.12584154307842255,
      "learning_rate": 3.987664977629594e-06,
      "loss": 0.0092,
      "step": 1844900
    },
    {
      "epoch": 3.019252044015894,
      "grad_norm": 0.13059449195861816,
      "learning_rate": 3.987599085416077e-06,
      "loss": 0.0108,
      "step": 1844920
    },
    {
      "epoch": 3.0192847744545475,
      "grad_norm": 0.5843644738197327,
      "learning_rate": 3.987533193202559e-06,
      "loss": 0.0111,
      "step": 1844940
    },
    {
      "epoch": 3.0193175048932006,
      "grad_norm": 0.33746469020843506,
      "learning_rate": 3.987467300989042e-06,
      "loss": 0.0098,
      "step": 1844960
    },
    {
      "epoch": 3.0193502353318538,
      "grad_norm": 0.028924763202667236,
      "learning_rate": 3.987401408775525e-06,
      "loss": 0.0135,
      "step": 1844980
    },
    {
      "epoch": 3.0193829657705074,
      "grad_norm": 0.22906358540058136,
      "learning_rate": 3.9873355165620085e-06,
      "loss": 0.0122,
      "step": 1845000
    },
    {
      "epoch": 3.0194156962091605,
      "grad_norm": 0.21382252871990204,
      "learning_rate": 3.987269624348491e-06,
      "loss": 0.0114,
      "step": 1845020
    },
    {
      "epoch": 3.019448426647814,
      "grad_norm": 0.24665097892284393,
      "learning_rate": 3.987203732134974e-06,
      "loss": 0.0072,
      "step": 1845040
    },
    {
      "epoch": 3.0194811570864672,
      "grad_norm": 0.8761159181594849,
      "learning_rate": 3.987137839921457e-06,
      "loss": 0.0082,
      "step": 1845060
    },
    {
      "epoch": 3.0195138875251204,
      "grad_norm": 0.3643554747104645,
      "learning_rate": 3.9870719477079394e-06,
      "loss": 0.0109,
      "step": 1845080
    },
    {
      "epoch": 3.019546617963774,
      "grad_norm": 0.13587237894535065,
      "learning_rate": 3.987006055494422e-06,
      "loss": 0.0135,
      "step": 1845100
    },
    {
      "epoch": 3.019579348402427,
      "grad_norm": 0.9088932871818542,
      "learning_rate": 3.986940163280905e-06,
      "loss": 0.0102,
      "step": 1845120
    },
    {
      "epoch": 3.0196120788410807,
      "grad_norm": 0.10201291739940643,
      "learning_rate": 3.9868742710673885e-06,
      "loss": 0.0078,
      "step": 1845140
    },
    {
      "epoch": 3.019644809279734,
      "grad_norm": 0.1404297649860382,
      "learning_rate": 3.986808378853871e-06,
      "loss": 0.0135,
      "step": 1845160
    },
    {
      "epoch": 3.0196775397183875,
      "grad_norm": 0.3253970444202423,
      "learning_rate": 3.986742486640354e-06,
      "loss": 0.0101,
      "step": 1845180
    },
    {
      "epoch": 3.0197102701570406,
      "grad_norm": 0.18000830709934235,
      "learning_rate": 3.986676594426837e-06,
      "loss": 0.0113,
      "step": 1845200
    },
    {
      "epoch": 3.019743000595694,
      "grad_norm": 0.44622600078582764,
      "learning_rate": 3.98661070221332e-06,
      "loss": 0.0086,
      "step": 1845220
    },
    {
      "epoch": 3.0197757310343474,
      "grad_norm": 0.2844204008579254,
      "learning_rate": 3.986544809999803e-06,
      "loss": 0.0155,
      "step": 1845240
    },
    {
      "epoch": 3.0198084614730005,
      "grad_norm": 0.17784236371517181,
      "learning_rate": 3.986478917786286e-06,
      "loss": 0.0098,
      "step": 1845260
    },
    {
      "epoch": 3.019841191911654,
      "grad_norm": 0.2292499989271164,
      "learning_rate": 3.9864130255727685e-06,
      "loss": 0.0183,
      "step": 1845280
    },
    {
      "epoch": 3.0198739223503073,
      "grad_norm": 0.1113487109541893,
      "learning_rate": 3.986347133359251e-06,
      "loss": 0.012,
      "step": 1845300
    },
    {
      "epoch": 3.019906652788961,
      "grad_norm": 0.25917941331863403,
      "learning_rate": 3.986281241145734e-06,
      "loss": 0.0134,
      "step": 1845320
    },
    {
      "epoch": 3.019939383227614,
      "grad_norm": 0.17540769279003143,
      "learning_rate": 3.986215348932217e-06,
      "loss": 0.0117,
      "step": 1845340
    },
    {
      "epoch": 3.019972113666267,
      "grad_norm": 0.6849256157875061,
      "learning_rate": 3.9861494567186995e-06,
      "loss": 0.0079,
      "step": 1845360
    },
    {
      "epoch": 3.0200048441049208,
      "grad_norm": 0.24525965750217438,
      "learning_rate": 3.986083564505182e-06,
      "loss": 0.01,
      "step": 1845380
    },
    {
      "epoch": 3.020037574543574,
      "grad_norm": 0.1654372215270996,
      "learning_rate": 3.986017672291666e-06,
      "loss": 0.012,
      "step": 1845400
    },
    {
      "epoch": 3.0200703049822275,
      "grad_norm": 0.1876937747001648,
      "learning_rate": 3.9859517800781486e-06,
      "loss": 0.0103,
      "step": 1845420
    },
    {
      "epoch": 3.0201030354208807,
      "grad_norm": 0.0826725959777832,
      "learning_rate": 3.985885887864631e-06,
      "loss": 0.0099,
      "step": 1845440
    },
    {
      "epoch": 3.0201357658595342,
      "grad_norm": 0.08493376523256302,
      "learning_rate": 3.985819995651114e-06,
      "loss": 0.0122,
      "step": 1845460
    },
    {
      "epoch": 3.0201684962981874,
      "grad_norm": 0.6443952918052673,
      "learning_rate": 3.985754103437597e-06,
      "loss": 0.0094,
      "step": 1845480
    },
    {
      "epoch": 3.0202012267368405,
      "grad_norm": 0.37074220180511475,
      "learning_rate": 3.98568821122408e-06,
      "loss": 0.0093,
      "step": 1845500
    },
    {
      "epoch": 3.020233957175494,
      "grad_norm": 0.2211548388004303,
      "learning_rate": 3.985622319010563e-06,
      "loss": 0.0143,
      "step": 1845520
    },
    {
      "epoch": 3.0202666876141473,
      "grad_norm": 0.3197893500328064,
      "learning_rate": 3.985556426797046e-06,
      "loss": 0.0145,
      "step": 1845540
    },
    {
      "epoch": 3.020299418052801,
      "grad_norm": 0.42288923263549805,
      "learning_rate": 3.985490534583529e-06,
      "loss": 0.0092,
      "step": 1845560
    },
    {
      "epoch": 3.020332148491454,
      "grad_norm": 0.2883462905883789,
      "learning_rate": 3.985424642370011e-06,
      "loss": 0.0118,
      "step": 1845580
    },
    {
      "epoch": 3.0203648789301076,
      "grad_norm": 0.35202333331108093,
      "learning_rate": 3.985358750156494e-06,
      "loss": 0.0158,
      "step": 1845600
    },
    {
      "epoch": 3.020397609368761,
      "grad_norm": 0.18468943238258362,
      "learning_rate": 3.985292857942978e-06,
      "loss": 0.0071,
      "step": 1845620
    },
    {
      "epoch": 3.020430339807414,
      "grad_norm": 0.26904433965682983,
      "learning_rate": 3.98522696572946e-06,
      "loss": 0.014,
      "step": 1845640
    },
    {
      "epoch": 3.0204630702460675,
      "grad_norm": 0.732734739780426,
      "learning_rate": 3.985161073515943e-06,
      "loss": 0.0141,
      "step": 1845660
    },
    {
      "epoch": 3.0204958006847207,
      "grad_norm": 0.2736639976501465,
      "learning_rate": 3.985095181302426e-06,
      "loss": 0.0131,
      "step": 1845680
    },
    {
      "epoch": 3.0205285311233743,
      "grad_norm": 0.22843335568904877,
      "learning_rate": 3.985029289088909e-06,
      "loss": 0.0129,
      "step": 1845700
    },
    {
      "epoch": 3.0205612615620274,
      "grad_norm": 0.7710230946540833,
      "learning_rate": 3.984963396875391e-06,
      "loss": 0.0108,
      "step": 1845720
    },
    {
      "epoch": 3.0205939920006806,
      "grad_norm": 0.5103808641433716,
      "learning_rate": 3.984897504661874e-06,
      "loss": 0.0094,
      "step": 1845740
    },
    {
      "epoch": 3.020626722439334,
      "grad_norm": 0.9571344256401062,
      "learning_rate": 3.984831612448357e-06,
      "loss": 0.0218,
      "step": 1845760
    },
    {
      "epoch": 3.0206594528779873,
      "grad_norm": 0.10542207956314087,
      "learning_rate": 3.9847657202348396e-06,
      "loss": 0.0137,
      "step": 1845780
    },
    {
      "epoch": 3.020692183316641,
      "grad_norm": 0.30044662952423096,
      "learning_rate": 3.984699828021323e-06,
      "loss": 0.0103,
      "step": 1845800
    },
    {
      "epoch": 3.020724913755294,
      "grad_norm": 0.35212448239326477,
      "learning_rate": 3.984633935807806e-06,
      "loss": 0.0121,
      "step": 1845820
    },
    {
      "epoch": 3.0207576441939477,
      "grad_norm": 0.31410935521125793,
      "learning_rate": 3.984568043594289e-06,
      "loss": 0.0081,
      "step": 1845840
    },
    {
      "epoch": 3.020790374632601,
      "grad_norm": 0.9381953477859497,
      "learning_rate": 3.984502151380772e-06,
      "loss": 0.0081,
      "step": 1845860
    },
    {
      "epoch": 3.020823105071254,
      "grad_norm": 0.12374328076839447,
      "learning_rate": 3.984436259167255e-06,
      "loss": 0.0083,
      "step": 1845880
    },
    {
      "epoch": 3.0208558355099075,
      "grad_norm": 0.2162785828113556,
      "learning_rate": 3.984370366953738e-06,
      "loss": 0.0063,
      "step": 1845900
    },
    {
      "epoch": 3.0208885659485607,
      "grad_norm": 0.42894959449768066,
      "learning_rate": 3.9843044747402204e-06,
      "loss": 0.0126,
      "step": 1845920
    },
    {
      "epoch": 3.0209212963872143,
      "grad_norm": 0.121222123503685,
      "learning_rate": 3.984238582526703e-06,
      "loss": 0.0112,
      "step": 1845940
    },
    {
      "epoch": 3.0209540268258674,
      "grad_norm": 0.15614038705825806,
      "learning_rate": 3.984172690313186e-06,
      "loss": 0.009,
      "step": 1845960
    },
    {
      "epoch": 3.020986757264521,
      "grad_norm": 0.6802165508270264,
      "learning_rate": 3.984106798099669e-06,
      "loss": 0.013,
      "step": 1845980
    },
    {
      "epoch": 3.021019487703174,
      "grad_norm": 0.054048873484134674,
      "learning_rate": 3.984040905886151e-06,
      "loss": 0.0089,
      "step": 1846000
    },
    {
      "epoch": 3.0210522181418273,
      "grad_norm": 0.48779210448265076,
      "learning_rate": 3.983975013672635e-06,
      "loss": 0.0104,
      "step": 1846020
    },
    {
      "epoch": 3.021084948580481,
      "grad_norm": 0.7274675965309143,
      "learning_rate": 3.983909121459118e-06,
      "loss": 0.0138,
      "step": 1846040
    },
    {
      "epoch": 3.021117679019134,
      "grad_norm": 0.3234328329563141,
      "learning_rate": 3.9838432292456005e-06,
      "loss": 0.0134,
      "step": 1846060
    },
    {
      "epoch": 3.0211504094577877,
      "grad_norm": 0.17207208275794983,
      "learning_rate": 3.983777337032083e-06,
      "loss": 0.009,
      "step": 1846080
    },
    {
      "epoch": 3.021183139896441,
      "grad_norm": 0.33045873045921326,
      "learning_rate": 3.983711444818566e-06,
      "loss": 0.0153,
      "step": 1846100
    },
    {
      "epoch": 3.0212158703350944,
      "grad_norm": 0.1500011682510376,
      "learning_rate": 3.983645552605049e-06,
      "loss": 0.0143,
      "step": 1846120
    },
    {
      "epoch": 3.0212486007737476,
      "grad_norm": 0.16588419675827026,
      "learning_rate": 3.9835796603915314e-06,
      "loss": 0.0088,
      "step": 1846140
    },
    {
      "epoch": 3.0212813312124007,
      "grad_norm": 0.42538389563560486,
      "learning_rate": 3.983513768178014e-06,
      "loss": 0.0078,
      "step": 1846160
    },
    {
      "epoch": 3.0213140616510543,
      "grad_norm": 0.4025096595287323,
      "learning_rate": 3.983447875964498e-06,
      "loss": 0.0113,
      "step": 1846180
    },
    {
      "epoch": 3.0213467920897075,
      "grad_norm": 0.12321814149618149,
      "learning_rate": 3.9833819837509805e-06,
      "loss": 0.0113,
      "step": 1846200
    },
    {
      "epoch": 3.021379522528361,
      "grad_norm": 0.1569165587425232,
      "learning_rate": 3.983316091537463e-06,
      "loss": 0.0109,
      "step": 1846220
    },
    {
      "epoch": 3.021412252967014,
      "grad_norm": 0.35561153292655945,
      "learning_rate": 3.983250199323947e-06,
      "loss": 0.0158,
      "step": 1846240
    },
    {
      "epoch": 3.021444983405668,
      "grad_norm": 0.11414133757352829,
      "learning_rate": 3.9831843071104296e-06,
      "loss": 0.0119,
      "step": 1846260
    },
    {
      "epoch": 3.021477713844321,
      "grad_norm": 0.22748561203479767,
      "learning_rate": 3.983118414896912e-06,
      "loss": 0.011,
      "step": 1846280
    },
    {
      "epoch": 3.021510444282974,
      "grad_norm": 0.4211941659450531,
      "learning_rate": 3.983052522683395e-06,
      "loss": 0.0116,
      "step": 1846300
    },
    {
      "epoch": 3.0215431747216277,
      "grad_norm": 0.1463915854692459,
      "learning_rate": 3.982986630469878e-06,
      "loss": 0.011,
      "step": 1846320
    },
    {
      "epoch": 3.021575905160281,
      "grad_norm": 0.19434744119644165,
      "learning_rate": 3.9829207382563605e-06,
      "loss": 0.0147,
      "step": 1846340
    },
    {
      "epoch": 3.0216086355989344,
      "grad_norm": 0.5790651440620422,
      "learning_rate": 3.982854846042843e-06,
      "loss": 0.0149,
      "step": 1846360
    },
    {
      "epoch": 3.0216413660375876,
      "grad_norm": 0.5997707843780518,
      "learning_rate": 3.982788953829326e-06,
      "loss": 0.0085,
      "step": 1846380
    },
    {
      "epoch": 3.021674096476241,
      "grad_norm": 0.13414278626441956,
      "learning_rate": 3.982723061615809e-06,
      "loss": 0.013,
      "step": 1846400
    },
    {
      "epoch": 3.0217068269148943,
      "grad_norm": 0.7007831335067749,
      "learning_rate": 3.982657169402292e-06,
      "loss": 0.0134,
      "step": 1846420
    },
    {
      "epoch": 3.0217395573535475,
      "grad_norm": 0.21015699207782745,
      "learning_rate": 3.982591277188775e-06,
      "loss": 0.0086,
      "step": 1846440
    },
    {
      "epoch": 3.021772287792201,
      "grad_norm": 0.4270145297050476,
      "learning_rate": 3.982525384975258e-06,
      "loss": 0.0101,
      "step": 1846460
    },
    {
      "epoch": 3.0218050182308542,
      "grad_norm": 0.1652551293373108,
      "learning_rate": 3.9824594927617405e-06,
      "loss": 0.0118,
      "step": 1846480
    },
    {
      "epoch": 3.021837748669508,
      "grad_norm": 0.4202612340450287,
      "learning_rate": 3.982393600548223e-06,
      "loss": 0.0165,
      "step": 1846500
    },
    {
      "epoch": 3.021870479108161,
      "grad_norm": 0.07453431189060211,
      "learning_rate": 3.982327708334706e-06,
      "loss": 0.013,
      "step": 1846520
    },
    {
      "epoch": 3.021903209546814,
      "grad_norm": 0.17070211470127106,
      "learning_rate": 3.98226181612119e-06,
      "loss": 0.0102,
      "step": 1846540
    },
    {
      "epoch": 3.0219359399854677,
      "grad_norm": 0.24586781859397888,
      "learning_rate": 3.982195923907672e-06,
      "loss": 0.0088,
      "step": 1846560
    },
    {
      "epoch": 3.021968670424121,
      "grad_norm": 0.5210888385772705,
      "learning_rate": 3.982130031694155e-06,
      "loss": 0.0145,
      "step": 1846580
    },
    {
      "epoch": 3.0220014008627745,
      "grad_norm": 0.3948000371456146,
      "learning_rate": 3.982064139480638e-06,
      "loss": 0.0118,
      "step": 1846600
    },
    {
      "epoch": 3.0220341313014276,
      "grad_norm": 0.1213025227189064,
      "learning_rate": 3.9819982472671206e-06,
      "loss": 0.0087,
      "step": 1846620
    },
    {
      "epoch": 3.022066861740081,
      "grad_norm": 0.11009742319583893,
      "learning_rate": 3.981932355053604e-06,
      "loss": 0.0101,
      "step": 1846640
    },
    {
      "epoch": 3.0220995921787344,
      "grad_norm": 0.9168699979782104,
      "learning_rate": 3.981866462840087e-06,
      "loss": 0.0151,
      "step": 1846660
    },
    {
      "epoch": 3.0221323226173875,
      "grad_norm": 0.38809579610824585,
      "learning_rate": 3.98180057062657e-06,
      "loss": 0.0081,
      "step": 1846680
    },
    {
      "epoch": 3.022165053056041,
      "grad_norm": 0.563190758228302,
      "learning_rate": 3.981734678413052e-06,
      "loss": 0.0162,
      "step": 1846700
    },
    {
      "epoch": 3.0221977834946943,
      "grad_norm": 0.08650955557823181,
      "learning_rate": 3.981668786199535e-06,
      "loss": 0.0118,
      "step": 1846720
    },
    {
      "epoch": 3.022230513933348,
      "grad_norm": 0.5191096067428589,
      "learning_rate": 3.981602893986018e-06,
      "loss": 0.0153,
      "step": 1846740
    },
    {
      "epoch": 3.022263244372001,
      "grad_norm": 0.08842751383781433,
      "learning_rate": 3.981537001772501e-06,
      "loss": 0.0115,
      "step": 1846760
    },
    {
      "epoch": 3.0222959748106546,
      "grad_norm": 0.11369007080793381,
      "learning_rate": 3.981471109558983e-06,
      "loss": 0.0088,
      "step": 1846780
    },
    {
      "epoch": 3.0223287052493077,
      "grad_norm": 0.12357854098081589,
      "learning_rate": 3.981405217345466e-06,
      "loss": 0.0114,
      "step": 1846800
    },
    {
      "epoch": 3.022361435687961,
      "grad_norm": 0.2713358998298645,
      "learning_rate": 3.98133932513195e-06,
      "loss": 0.0064,
      "step": 1846820
    },
    {
      "epoch": 3.0223941661266145,
      "grad_norm": 0.11039294302463531,
      "learning_rate": 3.981273432918432e-06,
      "loss": 0.0172,
      "step": 1846840
    },
    {
      "epoch": 3.0224268965652676,
      "grad_norm": 0.30376216769218445,
      "learning_rate": 3.981207540704915e-06,
      "loss": 0.0123,
      "step": 1846860
    },
    {
      "epoch": 3.0224596270039212,
      "grad_norm": 0.4249629080295563,
      "learning_rate": 3.981141648491398e-06,
      "loss": 0.0075,
      "step": 1846880
    },
    {
      "epoch": 3.0224923574425744,
      "grad_norm": 0.25630658864974976,
      "learning_rate": 3.9810757562778815e-06,
      "loss": 0.0148,
      "step": 1846900
    },
    {
      "epoch": 3.022525087881228,
      "grad_norm": 0.14775559306144714,
      "learning_rate": 3.981009864064364e-06,
      "loss": 0.0129,
      "step": 1846920
    },
    {
      "epoch": 3.022557818319881,
      "grad_norm": 0.6775596141815186,
      "learning_rate": 3.980943971850847e-06,
      "loss": 0.0111,
      "step": 1846940
    },
    {
      "epoch": 3.0225905487585343,
      "grad_norm": 0.34058570861816406,
      "learning_rate": 3.98087807963733e-06,
      "loss": 0.0092,
      "step": 1846960
    },
    {
      "epoch": 3.022623279197188,
      "grad_norm": 0.1949981302022934,
      "learning_rate": 3.9808121874238124e-06,
      "loss": 0.0118,
      "step": 1846980
    },
    {
      "epoch": 3.022656009635841,
      "grad_norm": 0.2341233491897583,
      "learning_rate": 3.980746295210295e-06,
      "loss": 0.0076,
      "step": 1847000
    },
    {
      "epoch": 3.0226887400744946,
      "grad_norm": 0.11817260086536407,
      "learning_rate": 3.980680402996778e-06,
      "loss": 0.0121,
      "step": 1847020
    },
    {
      "epoch": 3.0227214705131478,
      "grad_norm": 0.5267739295959473,
      "learning_rate": 3.9806145107832615e-06,
      "loss": 0.0167,
      "step": 1847040
    },
    {
      "epoch": 3.0227542009518014,
      "grad_norm": 0.3630824685096741,
      "learning_rate": 3.980548618569744e-06,
      "loss": 0.0103,
      "step": 1847060
    },
    {
      "epoch": 3.0227869313904545,
      "grad_norm": 0.3520744740962982,
      "learning_rate": 3.980482726356227e-06,
      "loss": 0.0106,
      "step": 1847080
    },
    {
      "epoch": 3.0228196618291077,
      "grad_norm": 0.5127277374267578,
      "learning_rate": 3.98041683414271e-06,
      "loss": 0.013,
      "step": 1847100
    },
    {
      "epoch": 3.0228523922677613,
      "grad_norm": 0.28489363193511963,
      "learning_rate": 3.9803509419291925e-06,
      "loss": 0.0108,
      "step": 1847120
    },
    {
      "epoch": 3.0228851227064144,
      "grad_norm": 0.2595527470111847,
      "learning_rate": 3.980285049715675e-06,
      "loss": 0.008,
      "step": 1847140
    },
    {
      "epoch": 3.022917853145068,
      "grad_norm": 0.8009995222091675,
      "learning_rate": 3.980219157502158e-06,
      "loss": 0.0118,
      "step": 1847160
    },
    {
      "epoch": 3.022950583583721,
      "grad_norm": 0.6267959475517273,
      "learning_rate": 3.980153265288641e-06,
      "loss": 0.0086,
      "step": 1847180
    },
    {
      "epoch": 3.0229833140223747,
      "grad_norm": 0.5278940796852112,
      "learning_rate": 3.980087373075123e-06,
      "loss": 0.0098,
      "step": 1847200
    },
    {
      "epoch": 3.023016044461028,
      "grad_norm": 0.31795012950897217,
      "learning_rate": 3.980021480861607e-06,
      "loss": 0.0145,
      "step": 1847220
    },
    {
      "epoch": 3.023048774899681,
      "grad_norm": 0.18263530731201172,
      "learning_rate": 3.97995558864809e-06,
      "loss": 0.0115,
      "step": 1847240
    },
    {
      "epoch": 3.0230815053383346,
      "grad_norm": 0.43515875935554504,
      "learning_rate": 3.979889696434573e-06,
      "loss": 0.0095,
      "step": 1847260
    },
    {
      "epoch": 3.023114235776988,
      "grad_norm": 0.6540042757987976,
      "learning_rate": 3.979823804221056e-06,
      "loss": 0.012,
      "step": 1847280
    },
    {
      "epoch": 3.0231469662156414,
      "grad_norm": 0.6016075015068054,
      "learning_rate": 3.979757912007539e-06,
      "loss": 0.0117,
      "step": 1847300
    },
    {
      "epoch": 3.0231796966542945,
      "grad_norm": 0.2693791389465332,
      "learning_rate": 3.9796920197940215e-06,
      "loss": 0.013,
      "step": 1847320
    },
    {
      "epoch": 3.0232124270929477,
      "grad_norm": 0.13410741090774536,
      "learning_rate": 3.979626127580504e-06,
      "loss": 0.0088,
      "step": 1847340
    },
    {
      "epoch": 3.0232451575316013,
      "grad_norm": 0.44727593660354614,
      "learning_rate": 3.979560235366987e-06,
      "loss": 0.0075,
      "step": 1847360
    },
    {
      "epoch": 3.0232778879702544,
      "grad_norm": 0.3096437454223633,
      "learning_rate": 3.97949434315347e-06,
      "loss": 0.012,
      "step": 1847380
    },
    {
      "epoch": 3.023310618408908,
      "grad_norm": 0.1834428906440735,
      "learning_rate": 3.9794284509399525e-06,
      "loss": 0.0122,
      "step": 1847400
    },
    {
      "epoch": 3.023343348847561,
      "grad_norm": 0.2894422113895416,
      "learning_rate": 3.979362558726435e-06,
      "loss": 0.0108,
      "step": 1847420
    },
    {
      "epoch": 3.0233760792862148,
      "grad_norm": 0.32307344675064087,
      "learning_rate": 3.979296666512919e-06,
      "loss": 0.009,
      "step": 1847440
    },
    {
      "epoch": 3.023408809724868,
      "grad_norm": 0.03469531238079071,
      "learning_rate": 3.9792307742994016e-06,
      "loss": 0.0085,
      "step": 1847460
    },
    {
      "epoch": 3.023441540163521,
      "grad_norm": 0.4087255895137787,
      "learning_rate": 3.979164882085884e-06,
      "loss": 0.0106,
      "step": 1847480
    },
    {
      "epoch": 3.0234742706021747,
      "grad_norm": 0.14475463330745697,
      "learning_rate": 3.979098989872367e-06,
      "loss": 0.0073,
      "step": 1847500
    },
    {
      "epoch": 3.023507001040828,
      "grad_norm": 0.42235639691352844,
      "learning_rate": 3.97903309765885e-06,
      "loss": 0.0174,
      "step": 1847520
    },
    {
      "epoch": 3.0235397314794814,
      "grad_norm": 0.09797954559326172,
      "learning_rate": 3.9789672054453325e-06,
      "loss": 0.0096,
      "step": 1847540
    },
    {
      "epoch": 3.0235724619181346,
      "grad_norm": 0.2745164930820465,
      "learning_rate": 3.978901313231815e-06,
      "loss": 0.0109,
      "step": 1847560
    },
    {
      "epoch": 3.023605192356788,
      "grad_norm": 0.47021907567977905,
      "learning_rate": 3.978835421018298e-06,
      "loss": 0.0105,
      "step": 1847580
    },
    {
      "epoch": 3.0236379227954413,
      "grad_norm": 0.2472112625837326,
      "learning_rate": 3.978769528804782e-06,
      "loss": 0.0078,
      "step": 1847600
    },
    {
      "epoch": 3.0236706532340945,
      "grad_norm": 0.3156295120716095,
      "learning_rate": 3.978703636591264e-06,
      "loss": 0.0169,
      "step": 1847620
    },
    {
      "epoch": 3.023703383672748,
      "grad_norm": 0.4757022559642792,
      "learning_rate": 3.978637744377747e-06,
      "loss": 0.0072,
      "step": 1847640
    },
    {
      "epoch": 3.023736114111401,
      "grad_norm": 0.6711083650588989,
      "learning_rate": 3.978571852164231e-06,
      "loss": 0.0172,
      "step": 1847660
    },
    {
      "epoch": 3.023768844550055,
      "grad_norm": 0.8095806241035461,
      "learning_rate": 3.978505959950713e-06,
      "loss": 0.011,
      "step": 1847680
    },
    {
      "epoch": 3.023801574988708,
      "grad_norm": 0.06842231750488281,
      "learning_rate": 3.978440067737196e-06,
      "loss": 0.015,
      "step": 1847700
    },
    {
      "epoch": 3.0238343054273615,
      "grad_norm": 0.43153026700019836,
      "learning_rate": 3.978374175523679e-06,
      "loss": 0.014,
      "step": 1847720
    },
    {
      "epoch": 3.0238670358660147,
      "grad_norm": 0.09114936739206314,
      "learning_rate": 3.978308283310162e-06,
      "loss": 0.0103,
      "step": 1847740
    },
    {
      "epoch": 3.023899766304668,
      "grad_norm": 0.10657454282045364,
      "learning_rate": 3.978242391096644e-06,
      "loss": 0.0073,
      "step": 1847760
    },
    {
      "epoch": 3.0239324967433214,
      "grad_norm": 0.22404024004936218,
      "learning_rate": 3.978176498883127e-06,
      "loss": 0.0173,
      "step": 1847780
    },
    {
      "epoch": 3.0239652271819746,
      "grad_norm": 0.3114546239376068,
      "learning_rate": 3.97811060666961e-06,
      "loss": 0.0079,
      "step": 1847800
    },
    {
      "epoch": 3.023997957620628,
      "grad_norm": 0.1817663460969925,
      "learning_rate": 3.978044714456093e-06,
      "loss": 0.0137,
      "step": 1847820
    },
    {
      "epoch": 3.0240306880592813,
      "grad_norm": 0.23596711456775665,
      "learning_rate": 3.977978822242576e-06,
      "loss": 0.0113,
      "step": 1847840
    },
    {
      "epoch": 3.024063418497935,
      "grad_norm": 0.1666286736726761,
      "learning_rate": 3.977912930029059e-06,
      "loss": 0.0125,
      "step": 1847860
    },
    {
      "epoch": 3.024096148936588,
      "grad_norm": 0.2272217869758606,
      "learning_rate": 3.977847037815542e-06,
      "loss": 0.0119,
      "step": 1847880
    },
    {
      "epoch": 3.024128879375241,
      "grad_norm": 0.5293225049972534,
      "learning_rate": 3.977781145602024e-06,
      "loss": 0.0136,
      "step": 1847900
    },
    {
      "epoch": 3.024161609813895,
      "grad_norm": 0.36223098635673523,
      "learning_rate": 3.977715253388507e-06,
      "loss": 0.0093,
      "step": 1847920
    },
    {
      "epoch": 3.024194340252548,
      "grad_norm": 0.1707601249217987,
      "learning_rate": 3.97764936117499e-06,
      "loss": 0.0136,
      "step": 1847940
    },
    {
      "epoch": 3.0242270706912016,
      "grad_norm": 0.052967581897974014,
      "learning_rate": 3.9775834689614735e-06,
      "loss": 0.0131,
      "step": 1847960
    },
    {
      "epoch": 3.0242598011298547,
      "grad_norm": 0.08726012706756592,
      "learning_rate": 3.977517576747956e-06,
      "loss": 0.0111,
      "step": 1847980
    },
    {
      "epoch": 3.0242925315685083,
      "grad_norm": 0.0808180570602417,
      "learning_rate": 3.977451684534439e-06,
      "loss": 0.0108,
      "step": 1848000
    },
    {
      "epoch": 3.0243252620071615,
      "grad_norm": 0.4370694160461426,
      "learning_rate": 3.977385792320922e-06,
      "loss": 0.0119,
      "step": 1848020
    },
    {
      "epoch": 3.0243579924458146,
      "grad_norm": 0.35853686928749084,
      "learning_rate": 3.977319900107404e-06,
      "loss": 0.0179,
      "step": 1848040
    },
    {
      "epoch": 3.024390722884468,
      "grad_norm": 0.14835737645626068,
      "learning_rate": 3.977254007893888e-06,
      "loss": 0.0133,
      "step": 1848060
    },
    {
      "epoch": 3.0244234533231213,
      "grad_norm": 0.34513020515441895,
      "learning_rate": 3.977188115680371e-06,
      "loss": 0.0105,
      "step": 1848080
    },
    {
      "epoch": 3.024456183761775,
      "grad_norm": 0.45819246768951416,
      "learning_rate": 3.9771222234668535e-06,
      "loss": 0.015,
      "step": 1848100
    },
    {
      "epoch": 3.024488914200428,
      "grad_norm": 0.31487318873405457,
      "learning_rate": 3.977056331253336e-06,
      "loss": 0.0127,
      "step": 1848120
    },
    {
      "epoch": 3.0245216446390812,
      "grad_norm": 0.9736067056655884,
      "learning_rate": 3.976990439039819e-06,
      "loss": 0.009,
      "step": 1848140
    },
    {
      "epoch": 3.024554375077735,
      "grad_norm": 0.3204578459262848,
      "learning_rate": 3.976924546826302e-06,
      "loss": 0.0161,
      "step": 1848160
    },
    {
      "epoch": 3.024587105516388,
      "grad_norm": 0.16427560150623322,
      "learning_rate": 3.9768586546127844e-06,
      "loss": 0.0103,
      "step": 1848180
    },
    {
      "epoch": 3.0246198359550416,
      "grad_norm": 0.5882955193519592,
      "learning_rate": 3.976792762399267e-06,
      "loss": 0.0072,
      "step": 1848200
    },
    {
      "epoch": 3.0246525663936947,
      "grad_norm": 0.5226317048072815,
      "learning_rate": 3.97672687018575e-06,
      "loss": 0.0155,
      "step": 1848220
    },
    {
      "epoch": 3.0246852968323483,
      "grad_norm": 0.3126659691333771,
      "learning_rate": 3.9766609779722335e-06,
      "loss": 0.0126,
      "step": 1848240
    },
    {
      "epoch": 3.0247180272710015,
      "grad_norm": 0.31296035647392273,
      "learning_rate": 3.976595085758716e-06,
      "loss": 0.0101,
      "step": 1848260
    },
    {
      "epoch": 3.0247507577096546,
      "grad_norm": 0.3511905074119568,
      "learning_rate": 3.976529193545199e-06,
      "loss": 0.007,
      "step": 1848280
    },
    {
      "epoch": 3.024783488148308,
      "grad_norm": 0.3412109613418579,
      "learning_rate": 3.9764633013316826e-06,
      "loss": 0.0058,
      "step": 1848300
    },
    {
      "epoch": 3.0248162185869614,
      "grad_norm": 0.24177001416683197,
      "learning_rate": 3.976397409118165e-06,
      "loss": 0.0175,
      "step": 1848320
    },
    {
      "epoch": 3.024848949025615,
      "grad_norm": 0.4239458441734314,
      "learning_rate": 3.976331516904648e-06,
      "loss": 0.0107,
      "step": 1848340
    },
    {
      "epoch": 3.024881679464268,
      "grad_norm": 0.3350251615047455,
      "learning_rate": 3.976265624691131e-06,
      "loss": 0.0109,
      "step": 1848360
    },
    {
      "epoch": 3.0249144099029217,
      "grad_norm": 0.32358166575431824,
      "learning_rate": 3.9761997324776135e-06,
      "loss": 0.0104,
      "step": 1848380
    },
    {
      "epoch": 3.024947140341575,
      "grad_norm": 0.13716638088226318,
      "learning_rate": 3.976133840264096e-06,
      "loss": 0.0102,
      "step": 1848400
    },
    {
      "epoch": 3.024979870780228,
      "grad_norm": 0.7846043109893799,
      "learning_rate": 3.976067948050579e-06,
      "loss": 0.0139,
      "step": 1848420
    },
    {
      "epoch": 3.0250126012188816,
      "grad_norm": 0.464041531085968,
      "learning_rate": 3.976002055837062e-06,
      "loss": 0.0077,
      "step": 1848440
    },
    {
      "epoch": 3.0250453316575348,
      "grad_norm": 0.1461711823940277,
      "learning_rate": 3.975936163623545e-06,
      "loss": 0.0103,
      "step": 1848460
    },
    {
      "epoch": 3.0250780620961883,
      "grad_norm": 0.13145498931407928,
      "learning_rate": 3.975870271410028e-06,
      "loss": 0.0117,
      "step": 1848480
    },
    {
      "epoch": 3.0251107925348415,
      "grad_norm": 0.11531173437833786,
      "learning_rate": 3.975804379196511e-06,
      "loss": 0.0107,
      "step": 1848500
    },
    {
      "epoch": 3.025143522973495,
      "grad_norm": 0.4600743055343628,
      "learning_rate": 3.9757384869829936e-06,
      "loss": 0.0171,
      "step": 1848520
    },
    {
      "epoch": 3.0251762534121482,
      "grad_norm": 0.19806808233261108,
      "learning_rate": 3.975672594769476e-06,
      "loss": 0.0107,
      "step": 1848540
    },
    {
      "epoch": 3.0252089838508014,
      "grad_norm": 0.243615984916687,
      "learning_rate": 3.975606702555959e-06,
      "loss": 0.0081,
      "step": 1848560
    },
    {
      "epoch": 3.025241714289455,
      "grad_norm": 0.1918681114912033,
      "learning_rate": 3.975540810342442e-06,
      "loss": 0.0129,
      "step": 1848580
    },
    {
      "epoch": 3.025274444728108,
      "grad_norm": 0.1363317370414734,
      "learning_rate": 3.9754749181289245e-06,
      "loss": 0.01,
      "step": 1848600
    },
    {
      "epoch": 3.0253071751667617,
      "grad_norm": 0.4105530083179474,
      "learning_rate": 3.975409025915407e-06,
      "loss": 0.0132,
      "step": 1848620
    },
    {
      "epoch": 3.025339905605415,
      "grad_norm": 0.16061264276504517,
      "learning_rate": 3.975343133701891e-06,
      "loss": 0.0141,
      "step": 1848640
    },
    {
      "epoch": 3.0253726360440685,
      "grad_norm": 0.4094794988632202,
      "learning_rate": 3.975277241488374e-06,
      "loss": 0.0104,
      "step": 1848660
    },
    {
      "epoch": 3.0254053664827216,
      "grad_norm": 0.28317922353744507,
      "learning_rate": 3.975211349274857e-06,
      "loss": 0.0128,
      "step": 1848680
    },
    {
      "epoch": 3.0254380969213748,
      "grad_norm": 0.409598171710968,
      "learning_rate": 3.97514545706134e-06,
      "loss": 0.0158,
      "step": 1848700
    },
    {
      "epoch": 3.0254708273600284,
      "grad_norm": 0.24406692385673523,
      "learning_rate": 3.975079564847823e-06,
      "loss": 0.0142,
      "step": 1848720
    },
    {
      "epoch": 3.0255035577986815,
      "grad_norm": 0.05194110795855522,
      "learning_rate": 3.975013672634305e-06,
      "loss": 0.0097,
      "step": 1848740
    },
    {
      "epoch": 3.025536288237335,
      "grad_norm": 0.038615696132183075,
      "learning_rate": 3.974947780420788e-06,
      "loss": 0.0157,
      "step": 1848760
    },
    {
      "epoch": 3.0255690186759883,
      "grad_norm": 0.37147873640060425,
      "learning_rate": 3.974881888207271e-06,
      "loss": 0.0117,
      "step": 1848780
    },
    {
      "epoch": 3.0256017491146414,
      "grad_norm": 0.6430518627166748,
      "learning_rate": 3.974815995993754e-06,
      "loss": 0.0186,
      "step": 1848800
    },
    {
      "epoch": 3.025634479553295,
      "grad_norm": 0.4962948262691498,
      "learning_rate": 3.974750103780236e-06,
      "loss": 0.0093,
      "step": 1848820
    },
    {
      "epoch": 3.025667209991948,
      "grad_norm": 0.4934770166873932,
      "learning_rate": 3.974684211566719e-06,
      "loss": 0.0078,
      "step": 1848840
    },
    {
      "epoch": 3.0256999404306018,
      "grad_norm": 0.23821572959423065,
      "learning_rate": 3.974618319353203e-06,
      "loss": 0.0071,
      "step": 1848860
    },
    {
      "epoch": 3.025732670869255,
      "grad_norm": 0.553401529788971,
      "learning_rate": 3.974552427139685e-06,
      "loss": 0.0142,
      "step": 1848880
    },
    {
      "epoch": 3.0257654013079085,
      "grad_norm": 0.3950171172618866,
      "learning_rate": 3.974486534926168e-06,
      "loss": 0.0086,
      "step": 1848900
    },
    {
      "epoch": 3.0257981317465616,
      "grad_norm": 0.24709101021289825,
      "learning_rate": 3.974420642712651e-06,
      "loss": 0.0147,
      "step": 1848920
    },
    {
      "epoch": 3.025830862185215,
      "grad_norm": 0.2684931755065918,
      "learning_rate": 3.974354750499134e-06,
      "loss": 0.0122,
      "step": 1848940
    },
    {
      "epoch": 3.0258635926238684,
      "grad_norm": 1.3917584419250488,
      "learning_rate": 3.974288858285616e-06,
      "loss": 0.0086,
      "step": 1848960
    },
    {
      "epoch": 3.0258963230625215,
      "grad_norm": 0.27571937441825867,
      "learning_rate": 3.974222966072099e-06,
      "loss": 0.0185,
      "step": 1848980
    },
    {
      "epoch": 3.025929053501175,
      "grad_norm": 0.19827453792095184,
      "learning_rate": 3.974157073858582e-06,
      "loss": 0.0131,
      "step": 1849000
    },
    {
      "epoch": 3.0259617839398283,
      "grad_norm": 0.2976042330265045,
      "learning_rate": 3.9740911816450654e-06,
      "loss": 0.0101,
      "step": 1849020
    },
    {
      "epoch": 3.025994514378482,
      "grad_norm": 0.2504560351371765,
      "learning_rate": 3.974025289431548e-06,
      "loss": 0.0079,
      "step": 1849040
    },
    {
      "epoch": 3.026027244817135,
      "grad_norm": 0.5479939579963684,
      "learning_rate": 3.973959397218031e-06,
      "loss": 0.013,
      "step": 1849060
    },
    {
      "epoch": 3.026059975255788,
      "grad_norm": 0.48373353481292725,
      "learning_rate": 3.9738935050045145e-06,
      "loss": 0.0137,
      "step": 1849080
    },
    {
      "epoch": 3.0260927056944418,
      "grad_norm": 0.12341898679733276,
      "learning_rate": 3.973827612790997e-06,
      "loss": 0.0097,
      "step": 1849100
    },
    {
      "epoch": 3.026125436133095,
      "grad_norm": 0.2578497529029846,
      "learning_rate": 3.97376172057748e-06,
      "loss": 0.0089,
      "step": 1849120
    },
    {
      "epoch": 3.0261581665717485,
      "grad_norm": 0.6312592029571533,
      "learning_rate": 3.973695828363963e-06,
      "loss": 0.0145,
      "step": 1849140
    },
    {
      "epoch": 3.0261908970104017,
      "grad_norm": 0.304518461227417,
      "learning_rate": 3.9736299361504455e-06,
      "loss": 0.014,
      "step": 1849160
    },
    {
      "epoch": 3.0262236274490553,
      "grad_norm": 0.23291568458080292,
      "learning_rate": 3.973564043936928e-06,
      "loss": 0.008,
      "step": 1849180
    },
    {
      "epoch": 3.0262563578877084,
      "grad_norm": 0.18493972718715668,
      "learning_rate": 3.973498151723411e-06,
      "loss": 0.0069,
      "step": 1849200
    },
    {
      "epoch": 3.0262890883263616,
      "grad_norm": 0.5460203289985657,
      "learning_rate": 3.973432259509894e-06,
      "loss": 0.0094,
      "step": 1849220
    },
    {
      "epoch": 3.026321818765015,
      "grad_norm": 0.12975648045539856,
      "learning_rate": 3.973366367296376e-06,
      "loss": 0.0108,
      "step": 1849240
    },
    {
      "epoch": 3.0263545492036683,
      "grad_norm": 0.6366339921951294,
      "learning_rate": 3.97330047508286e-06,
      "loss": 0.0086,
      "step": 1849260
    },
    {
      "epoch": 3.026387279642322,
      "grad_norm": 0.7031404972076416,
      "learning_rate": 3.973234582869343e-06,
      "loss": 0.0123,
      "step": 1849280
    },
    {
      "epoch": 3.026420010080975,
      "grad_norm": 0.1364261656999588,
      "learning_rate": 3.9731686906558255e-06,
      "loss": 0.0115,
      "step": 1849300
    },
    {
      "epoch": 3.0264527405196286,
      "grad_norm": 0.1454840898513794,
      "learning_rate": 3.973102798442308e-06,
      "loss": 0.0089,
      "step": 1849320
    },
    {
      "epoch": 3.026485470958282,
      "grad_norm": 0.048669662326574326,
      "learning_rate": 3.973036906228791e-06,
      "loss": 0.0077,
      "step": 1849340
    },
    {
      "epoch": 3.026518201396935,
      "grad_norm": 0.27307799458503723,
      "learning_rate": 3.9729710140152746e-06,
      "loss": 0.0089,
      "step": 1849360
    },
    {
      "epoch": 3.0265509318355885,
      "grad_norm": 0.42795416712760925,
      "learning_rate": 3.972905121801757e-06,
      "loss": 0.0114,
      "step": 1849380
    },
    {
      "epoch": 3.0265836622742417,
      "grad_norm": 0.5487643480300903,
      "learning_rate": 3.97283922958824e-06,
      "loss": 0.0136,
      "step": 1849400
    },
    {
      "epoch": 3.0266163927128953,
      "grad_norm": 0.13163796067237854,
      "learning_rate": 3.972773337374723e-06,
      "loss": 0.0161,
      "step": 1849420
    },
    {
      "epoch": 3.0266491231515484,
      "grad_norm": 0.26850050687789917,
      "learning_rate": 3.9727074451612055e-06,
      "loss": 0.0153,
      "step": 1849440
    },
    {
      "epoch": 3.026681853590202,
      "grad_norm": 0.06181318685412407,
      "learning_rate": 3.972641552947688e-06,
      "loss": 0.0113,
      "step": 1849460
    },
    {
      "epoch": 3.026714584028855,
      "grad_norm": 0.43587055802345276,
      "learning_rate": 3.972575660734172e-06,
      "loss": 0.0139,
      "step": 1849480
    },
    {
      "epoch": 3.0267473144675083,
      "grad_norm": 0.3645227551460266,
      "learning_rate": 3.9725097685206546e-06,
      "loss": 0.0127,
      "step": 1849500
    },
    {
      "epoch": 3.026780044906162,
      "grad_norm": 0.18113715946674347,
      "learning_rate": 3.972443876307137e-06,
      "loss": 0.0126,
      "step": 1849520
    },
    {
      "epoch": 3.026812775344815,
      "grad_norm": 0.4035693407058716,
      "learning_rate": 3.97237798409362e-06,
      "loss": 0.013,
      "step": 1849540
    },
    {
      "epoch": 3.0268455057834687,
      "grad_norm": 0.16968415677547455,
      "learning_rate": 3.972312091880103e-06,
      "loss": 0.0127,
      "step": 1849560
    },
    {
      "epoch": 3.026878236222122,
      "grad_norm": 0.047421928495168686,
      "learning_rate": 3.9722461996665855e-06,
      "loss": 0.0067,
      "step": 1849580
    },
    {
      "epoch": 3.026910966660775,
      "grad_norm": 0.07213598489761353,
      "learning_rate": 3.972180307453068e-06,
      "loss": 0.0054,
      "step": 1849600
    },
    {
      "epoch": 3.0269436970994286,
      "grad_norm": 0.37616270780563354,
      "learning_rate": 3.972114415239551e-06,
      "loss": 0.0126,
      "step": 1849620
    },
    {
      "epoch": 3.0269764275380817,
      "grad_norm": 0.3463261127471924,
      "learning_rate": 3.972048523026034e-06,
      "loss": 0.0114,
      "step": 1849640
    },
    {
      "epoch": 3.0270091579767353,
      "grad_norm": 0.10430821031332016,
      "learning_rate": 3.971982630812517e-06,
      "loss": 0.0124,
      "step": 1849660
    },
    {
      "epoch": 3.0270418884153885,
      "grad_norm": 0.10863537341356277,
      "learning_rate": 3.971916738599e-06,
      "loss": 0.011,
      "step": 1849680
    },
    {
      "epoch": 3.027074618854042,
      "grad_norm": 0.12885352969169617,
      "learning_rate": 3.971850846385483e-06,
      "loss": 0.01,
      "step": 1849700
    },
    {
      "epoch": 3.027107349292695,
      "grad_norm": 0.3586205542087555,
      "learning_rate": 3.971784954171966e-06,
      "loss": 0.0097,
      "step": 1849720
    },
    {
      "epoch": 3.0271400797313484,
      "grad_norm": 0.2491896152496338,
      "learning_rate": 3.971719061958449e-06,
      "loss": 0.0164,
      "step": 1849740
    },
    {
      "epoch": 3.027172810170002,
      "grad_norm": 0.4213182032108307,
      "learning_rate": 3.971653169744932e-06,
      "loss": 0.0147,
      "step": 1849760
    },
    {
      "epoch": 3.027205540608655,
      "grad_norm": 0.2184402346611023,
      "learning_rate": 3.971587277531415e-06,
      "loss": 0.0142,
      "step": 1849780
    },
    {
      "epoch": 3.0272382710473087,
      "grad_norm": 0.09318004548549652,
      "learning_rate": 3.971521385317897e-06,
      "loss": 0.0138,
      "step": 1849800
    },
    {
      "epoch": 3.027271001485962,
      "grad_norm": 0.07363040745258331,
      "learning_rate": 3.97145549310438e-06,
      "loss": 0.0136,
      "step": 1849820
    },
    {
      "epoch": 3.0273037319246154,
      "grad_norm": 0.42789241671562195,
      "learning_rate": 3.971389600890863e-06,
      "loss": 0.0125,
      "step": 1849840
    },
    {
      "epoch": 3.0273364623632686,
      "grad_norm": 0.6621493101119995,
      "learning_rate": 3.971323708677346e-06,
      "loss": 0.0096,
      "step": 1849860
    },
    {
      "epoch": 3.0273691928019217,
      "grad_norm": 0.29832011461257935,
      "learning_rate": 3.971257816463829e-06,
      "loss": 0.0075,
      "step": 1849880
    },
    {
      "epoch": 3.0274019232405753,
      "grad_norm": 0.20913544297218323,
      "learning_rate": 3.971191924250312e-06,
      "loss": 0.0084,
      "step": 1849900
    },
    {
      "epoch": 3.0274346536792285,
      "grad_norm": 0.09661131352186203,
      "learning_rate": 3.971126032036795e-06,
      "loss": 0.0112,
      "step": 1849920
    },
    {
      "epoch": 3.027467384117882,
      "grad_norm": 0.1643485277891159,
      "learning_rate": 3.971060139823277e-06,
      "loss": 0.0127,
      "step": 1849940
    },
    {
      "epoch": 3.0275001145565352,
      "grad_norm": 0.4701911509037018,
      "learning_rate": 3.97099424760976e-06,
      "loss": 0.0076,
      "step": 1849960
    },
    {
      "epoch": 3.027532844995189,
      "grad_norm": 0.16596892476081848,
      "learning_rate": 3.970928355396243e-06,
      "loss": 0.0076,
      "step": 1849980
    },
    {
      "epoch": 3.027565575433842,
      "grad_norm": 0.30765363574028015,
      "learning_rate": 3.970862463182726e-06,
      "loss": 0.0065,
      "step": 1850000
    },
    {
      "epoch": 3.027565575433842,
      "eval_loss": 0.007260154001414776,
      "eval_runtime": 6520.2063,
      "eval_samples_per_second": 157.642,
      "eval_steps_per_second": 15.764,
      "eval_sts-dev_pearson_cosine": 0.9834058613746733,
      "eval_sts-dev_spearman_cosine": 0.8945155935784537,
      "step": 1850000
    },
    {
      "epoch": 3.027598305872495,
      "grad_norm": 0.1271951049566269,
      "learning_rate": 3.970796570969208e-06,
      "loss": 0.0128,
      "step": 1850020
    },
    {
      "epoch": 3.0276310363111487,
      "grad_norm": 0.22641165554523468,
      "learning_rate": 3.970730678755692e-06,
      "loss": 0.0083,
      "step": 1850040
    },
    {
      "epoch": 3.027663766749802,
      "grad_norm": 0.417682021856308,
      "learning_rate": 3.970664786542175e-06,
      "loss": 0.0158,
      "step": 1850060
    },
    {
      "epoch": 3.0276964971884555,
      "grad_norm": 0.21147532761096954,
      "learning_rate": 3.970598894328657e-06,
      "loss": 0.0114,
      "step": 1850080
    },
    {
      "epoch": 3.0277292276271086,
      "grad_norm": 0.4806188642978668,
      "learning_rate": 3.970533002115141e-06,
      "loss": 0.0137,
      "step": 1850100
    },
    {
      "epoch": 3.027761958065762,
      "grad_norm": 0.1593388319015503,
      "learning_rate": 3.970467109901624e-06,
      "loss": 0.0133,
      "step": 1850120
    },
    {
      "epoch": 3.0277946885044154,
      "grad_norm": 0.10285674035549164,
      "learning_rate": 3.9704012176881065e-06,
      "loss": 0.0095,
      "step": 1850140
    },
    {
      "epoch": 3.0278274189430685,
      "grad_norm": 0.30410829186439514,
      "learning_rate": 3.970335325474589e-06,
      "loss": 0.0088,
      "step": 1850160
    },
    {
      "epoch": 3.027860149381722,
      "grad_norm": 0.4157903492450714,
      "learning_rate": 3.970269433261072e-06,
      "loss": 0.0156,
      "step": 1850180
    },
    {
      "epoch": 3.0278928798203752,
      "grad_norm": 0.2342420071363449,
      "learning_rate": 3.970203541047555e-06,
      "loss": 0.0136,
      "step": 1850200
    },
    {
      "epoch": 3.027925610259029,
      "grad_norm": 0.12853094935417175,
      "learning_rate": 3.9701376488340374e-06,
      "loss": 0.0106,
      "step": 1850220
    },
    {
      "epoch": 3.027958340697682,
      "grad_norm": 0.1934777796268463,
      "learning_rate": 3.97007175662052e-06,
      "loss": 0.0138,
      "step": 1850240
    },
    {
      "epoch": 3.0279910711363356,
      "grad_norm": 0.49445584416389465,
      "learning_rate": 3.970005864407003e-06,
      "loss": 0.0099,
      "step": 1850260
    },
    {
      "epoch": 3.0280238015749887,
      "grad_norm": 0.48258426785469055,
      "learning_rate": 3.9699399721934865e-06,
      "loss": 0.0127,
      "step": 1850280
    },
    {
      "epoch": 3.028056532013642,
      "grad_norm": 0.5371736884117126,
      "learning_rate": 3.969874079979969e-06,
      "loss": 0.0073,
      "step": 1850300
    },
    {
      "epoch": 3.0280892624522955,
      "grad_norm": 0.12321455776691437,
      "learning_rate": 3.969808187766452e-06,
      "loss": 0.0131,
      "step": 1850320
    },
    {
      "epoch": 3.0281219928909486,
      "grad_norm": 0.06319725513458252,
      "learning_rate": 3.969742295552935e-06,
      "loss": 0.0111,
      "step": 1850340
    },
    {
      "epoch": 3.0281547233296022,
      "grad_norm": 0.15551875531673431,
      "learning_rate": 3.9696764033394175e-06,
      "loss": 0.0105,
      "step": 1850360
    },
    {
      "epoch": 3.0281874537682554,
      "grad_norm": 0.3412908911705017,
      "learning_rate": 3.9696105111259e-06,
      "loss": 0.0185,
      "step": 1850380
    },
    {
      "epoch": 3.0282201842069085,
      "grad_norm": 0.10173307359218597,
      "learning_rate": 3.969544618912383e-06,
      "loss": 0.0066,
      "step": 1850400
    },
    {
      "epoch": 3.028252914645562,
      "grad_norm": 0.20619142055511475,
      "learning_rate": 3.9694787266988665e-06,
      "loss": 0.0124,
      "step": 1850420
    },
    {
      "epoch": 3.0282856450842153,
      "grad_norm": 0.5840218663215637,
      "learning_rate": 3.969412834485349e-06,
      "loss": 0.0108,
      "step": 1850440
    },
    {
      "epoch": 3.028318375522869,
      "grad_norm": 0.08720166236162186,
      "learning_rate": 3.969346942271832e-06,
      "loss": 0.0164,
      "step": 1850460
    },
    {
      "epoch": 3.028351105961522,
      "grad_norm": 0.2239774912595749,
      "learning_rate": 3.969281050058315e-06,
      "loss": 0.0123,
      "step": 1850480
    },
    {
      "epoch": 3.0283838364001756,
      "grad_norm": 0.1935742050409317,
      "learning_rate": 3.969215157844798e-06,
      "loss": 0.0122,
      "step": 1850500
    },
    {
      "epoch": 3.0284165668388288,
      "grad_norm": 0.3667004108428955,
      "learning_rate": 3.969149265631281e-06,
      "loss": 0.0147,
      "step": 1850520
    },
    {
      "epoch": 3.028449297277482,
      "grad_norm": 0.31223034858703613,
      "learning_rate": 3.969083373417764e-06,
      "loss": 0.0095,
      "step": 1850540
    },
    {
      "epoch": 3.0284820277161355,
      "grad_norm": 0.3872078061103821,
      "learning_rate": 3.9690174812042466e-06,
      "loss": 0.013,
      "step": 1850560
    },
    {
      "epoch": 3.0285147581547887,
      "grad_norm": 0.048684027045965195,
      "learning_rate": 3.968951588990729e-06,
      "loss": 0.0105,
      "step": 1850580
    },
    {
      "epoch": 3.0285474885934422,
      "grad_norm": 0.18180382251739502,
      "learning_rate": 3.968885696777212e-06,
      "loss": 0.0098,
      "step": 1850600
    },
    {
      "epoch": 3.0285802190320954,
      "grad_norm": 0.21578961610794067,
      "learning_rate": 3.968819804563695e-06,
      "loss": 0.0123,
      "step": 1850620
    },
    {
      "epoch": 3.028612949470749,
      "grad_norm": 0.7765569686889648,
      "learning_rate": 3.9687539123501775e-06,
      "loss": 0.0148,
      "step": 1850640
    },
    {
      "epoch": 3.028645679909402,
      "grad_norm": 0.24187906086444855,
      "learning_rate": 3.96868802013666e-06,
      "loss": 0.0138,
      "step": 1850660
    },
    {
      "epoch": 3.0286784103480553,
      "grad_norm": 1.0724031925201416,
      "learning_rate": 3.968622127923144e-06,
      "loss": 0.0112,
      "step": 1850680
    },
    {
      "epoch": 3.028711140786709,
      "grad_norm": 0.32493799924850464,
      "learning_rate": 3.968556235709627e-06,
      "loss": 0.0137,
      "step": 1850700
    },
    {
      "epoch": 3.028743871225362,
      "grad_norm": 0.8379276990890503,
      "learning_rate": 3.968490343496109e-06,
      "loss": 0.0136,
      "step": 1850720
    },
    {
      "epoch": 3.0287766016640156,
      "grad_norm": 0.4863860011100769,
      "learning_rate": 3.968424451282592e-06,
      "loss": 0.0097,
      "step": 1850740
    },
    {
      "epoch": 3.028809332102669,
      "grad_norm": 0.4888942837715149,
      "learning_rate": 3.968358559069075e-06,
      "loss": 0.0151,
      "step": 1850760
    },
    {
      "epoch": 3.0288420625413224,
      "grad_norm": 0.6384084224700928,
      "learning_rate": 3.968292666855558e-06,
      "loss": 0.01,
      "step": 1850780
    },
    {
      "epoch": 3.0288747929799755,
      "grad_norm": 0.3494099974632263,
      "learning_rate": 3.968226774642041e-06,
      "loss": 0.0104,
      "step": 1850800
    },
    {
      "epoch": 3.0289075234186287,
      "grad_norm": 0.23007169365882874,
      "learning_rate": 3.968160882428524e-06,
      "loss": 0.0116,
      "step": 1850820
    },
    {
      "epoch": 3.0289402538572823,
      "grad_norm": 0.15886290371418,
      "learning_rate": 3.968094990215007e-06,
      "loss": 0.0148,
      "step": 1850840
    },
    {
      "epoch": 3.0289729842959354,
      "grad_norm": 0.5632207989692688,
      "learning_rate": 3.968029098001489e-06,
      "loss": 0.0125,
      "step": 1850860
    },
    {
      "epoch": 3.029005714734589,
      "grad_norm": 0.13854601979255676,
      "learning_rate": 3.967963205787972e-06,
      "loss": 0.0121,
      "step": 1850880
    },
    {
      "epoch": 3.029038445173242,
      "grad_norm": 0.402898371219635,
      "learning_rate": 3.967897313574456e-06,
      "loss": 0.0103,
      "step": 1850900
    },
    {
      "epoch": 3.0290711756118958,
      "grad_norm": 0.31878164410591125,
      "learning_rate": 3.967831421360938e-06,
      "loss": 0.0101,
      "step": 1850920
    },
    {
      "epoch": 3.029103906050549,
      "grad_norm": 0.35694119334220886,
      "learning_rate": 3.967765529147421e-06,
      "loss": 0.0091,
      "step": 1850940
    },
    {
      "epoch": 3.029136636489202,
      "grad_norm": 0.17540957033634186,
      "learning_rate": 3.967699636933904e-06,
      "loss": 0.0093,
      "step": 1850960
    },
    {
      "epoch": 3.0291693669278557,
      "grad_norm": 0.5639163851737976,
      "learning_rate": 3.967633744720387e-06,
      "loss": 0.0142,
      "step": 1850980
    },
    {
      "epoch": 3.029202097366509,
      "grad_norm": 1.6355005502700806,
      "learning_rate": 3.967567852506869e-06,
      "loss": 0.011,
      "step": 1851000
    },
    {
      "epoch": 3.0292348278051624,
      "grad_norm": 0.22757850587368011,
      "learning_rate": 3.967501960293352e-06,
      "loss": 0.015,
      "step": 1851020
    },
    {
      "epoch": 3.0292675582438156,
      "grad_norm": 0.30424007773399353,
      "learning_rate": 3.967436068079835e-06,
      "loss": 0.0116,
      "step": 1851040
    },
    {
      "epoch": 3.029300288682469,
      "grad_norm": 0.22665362060070038,
      "learning_rate": 3.967370175866318e-06,
      "loss": 0.0099,
      "step": 1851060
    },
    {
      "epoch": 3.0293330191211223,
      "grad_norm": 1.2668724060058594,
      "learning_rate": 3.967304283652801e-06,
      "loss": 0.0083,
      "step": 1851080
    },
    {
      "epoch": 3.0293657495597754,
      "grad_norm": 0.15283407270908356,
      "learning_rate": 3.967238391439284e-06,
      "loss": 0.0084,
      "step": 1851100
    },
    {
      "epoch": 3.029398479998429,
      "grad_norm": 0.19725415110588074,
      "learning_rate": 3.967172499225767e-06,
      "loss": 0.0103,
      "step": 1851120
    },
    {
      "epoch": 3.029431210437082,
      "grad_norm": 0.24751000106334686,
      "learning_rate": 3.96710660701225e-06,
      "loss": 0.0074,
      "step": 1851140
    },
    {
      "epoch": 3.029463940875736,
      "grad_norm": 0.13850373029708862,
      "learning_rate": 3.967040714798733e-06,
      "loss": 0.0096,
      "step": 1851160
    },
    {
      "epoch": 3.029496671314389,
      "grad_norm": 0.2128993570804596,
      "learning_rate": 3.966974822585216e-06,
      "loss": 0.0106,
      "step": 1851180
    },
    {
      "epoch": 3.029529401753042,
      "grad_norm": 0.2899051308631897,
      "learning_rate": 3.9669089303716985e-06,
      "loss": 0.0129,
      "step": 1851200
    },
    {
      "epoch": 3.0295621321916957,
      "grad_norm": 0.5608288049697876,
      "learning_rate": 3.966843038158181e-06,
      "loss": 0.0109,
      "step": 1851220
    },
    {
      "epoch": 3.029594862630349,
      "grad_norm": 0.145658940076828,
      "learning_rate": 3.966777145944664e-06,
      "loss": 0.0113,
      "step": 1851240
    },
    {
      "epoch": 3.0296275930690024,
      "grad_norm": 0.5750526785850525,
      "learning_rate": 3.966711253731147e-06,
      "loss": 0.0122,
      "step": 1851260
    },
    {
      "epoch": 3.0296603235076556,
      "grad_norm": 0.2626539468765259,
      "learning_rate": 3.9666453615176294e-06,
      "loss": 0.0111,
      "step": 1851280
    },
    {
      "epoch": 3.029693053946309,
      "grad_norm": 0.16238558292388916,
      "learning_rate": 3.966579469304113e-06,
      "loss": 0.0093,
      "step": 1851300
    },
    {
      "epoch": 3.0297257843849623,
      "grad_norm": 0.5871137976646423,
      "learning_rate": 3.966513577090596e-06,
      "loss": 0.0153,
      "step": 1851320
    },
    {
      "epoch": 3.0297585148236155,
      "grad_norm": 0.03143048286437988,
      "learning_rate": 3.9664476848770785e-06,
      "loss": 0.0076,
      "step": 1851340
    },
    {
      "epoch": 3.029791245262269,
      "grad_norm": 0.1704602986574173,
      "learning_rate": 3.966381792663561e-06,
      "loss": 0.0117,
      "step": 1851360
    },
    {
      "epoch": 3.029823975700922,
      "grad_norm": 0.1788053661584854,
      "learning_rate": 3.966315900450044e-06,
      "loss": 0.0078,
      "step": 1851380
    },
    {
      "epoch": 3.029856706139576,
      "grad_norm": 0.4120425581932068,
      "learning_rate": 3.966250008236527e-06,
      "loss": 0.0106,
      "step": 1851400
    },
    {
      "epoch": 3.029889436578229,
      "grad_norm": 0.3852071464061737,
      "learning_rate": 3.9661841160230094e-06,
      "loss": 0.0113,
      "step": 1851420
    },
    {
      "epoch": 3.0299221670168826,
      "grad_norm": 0.19263750314712524,
      "learning_rate": 3.966118223809492e-06,
      "loss": 0.0123,
      "step": 1851440
    },
    {
      "epoch": 3.0299548974555357,
      "grad_norm": 0.2861308157444,
      "learning_rate": 3.966052331595976e-06,
      "loss": 0.0094,
      "step": 1851460
    },
    {
      "epoch": 3.029987627894189,
      "grad_norm": 0.5878912806510925,
      "learning_rate": 3.9659864393824585e-06,
      "loss": 0.0136,
      "step": 1851480
    },
    {
      "epoch": 3.0300203583328424,
      "grad_norm": 0.8983539342880249,
      "learning_rate": 3.965920547168941e-06,
      "loss": 0.0126,
      "step": 1851500
    },
    {
      "epoch": 3.0300530887714956,
      "grad_norm": 0.2211981862783432,
      "learning_rate": 3.965854654955425e-06,
      "loss": 0.0136,
      "step": 1851520
    },
    {
      "epoch": 3.030085819210149,
      "grad_norm": 0.2811269462108612,
      "learning_rate": 3.965788762741908e-06,
      "loss": 0.0116,
      "step": 1851540
    },
    {
      "epoch": 3.0301185496488023,
      "grad_norm": 0.41934823989868164,
      "learning_rate": 3.96572287052839e-06,
      "loss": 0.011,
      "step": 1851560
    },
    {
      "epoch": 3.030151280087456,
      "grad_norm": 0.093244768679142,
      "learning_rate": 3.965656978314873e-06,
      "loss": 0.01,
      "step": 1851580
    },
    {
      "epoch": 3.030184010526109,
      "grad_norm": 0.14654120802879333,
      "learning_rate": 3.965591086101356e-06,
      "loss": 0.0104,
      "step": 1851600
    },
    {
      "epoch": 3.0302167409647622,
      "grad_norm": 0.405766099691391,
      "learning_rate": 3.9655251938878385e-06,
      "loss": 0.0118,
      "step": 1851620
    },
    {
      "epoch": 3.030249471403416,
      "grad_norm": 0.5639515519142151,
      "learning_rate": 3.965459301674321e-06,
      "loss": 0.0164,
      "step": 1851640
    },
    {
      "epoch": 3.030282201842069,
      "grad_norm": 0.2132038027048111,
      "learning_rate": 3.965393409460804e-06,
      "loss": 0.0163,
      "step": 1851660
    },
    {
      "epoch": 3.0303149322807226,
      "grad_norm": 0.23304502665996552,
      "learning_rate": 3.965327517247287e-06,
      "loss": 0.0098,
      "step": 1851680
    },
    {
      "epoch": 3.0303476627193757,
      "grad_norm": 0.8269738554954529,
      "learning_rate": 3.96526162503377e-06,
      "loss": 0.0121,
      "step": 1851700
    },
    {
      "epoch": 3.0303803931580293,
      "grad_norm": 0.0375053696334362,
      "learning_rate": 3.965195732820253e-06,
      "loss": 0.0145,
      "step": 1851720
    },
    {
      "epoch": 3.0304131235966825,
      "grad_norm": 0.26057565212249756,
      "learning_rate": 3.965129840606736e-06,
      "loss": 0.0133,
      "step": 1851740
    },
    {
      "epoch": 3.0304458540353356,
      "grad_norm": 0.13397589325904846,
      "learning_rate": 3.9650639483932186e-06,
      "loss": 0.008,
      "step": 1851760
    },
    {
      "epoch": 3.030478584473989,
      "grad_norm": 0.3685038983821869,
      "learning_rate": 3.964998056179701e-06,
      "loss": 0.0106,
      "step": 1851780
    },
    {
      "epoch": 3.0305113149126424,
      "grad_norm": 0.13442111015319824,
      "learning_rate": 3.964932163966184e-06,
      "loss": 0.0066,
      "step": 1851800
    },
    {
      "epoch": 3.030544045351296,
      "grad_norm": 0.4485322833061218,
      "learning_rate": 3.964866271752668e-06,
      "loss": 0.0112,
      "step": 1851820
    },
    {
      "epoch": 3.030576775789949,
      "grad_norm": 0.32339000701904297,
      "learning_rate": 3.96480037953915e-06,
      "loss": 0.0119,
      "step": 1851840
    },
    {
      "epoch": 3.0306095062286023,
      "grad_norm": 0.2962590157985687,
      "learning_rate": 3.964734487325633e-06,
      "loss": 0.0119,
      "step": 1851860
    },
    {
      "epoch": 3.030642236667256,
      "grad_norm": 0.18251660466194153,
      "learning_rate": 3.964668595112116e-06,
      "loss": 0.0093,
      "step": 1851880
    },
    {
      "epoch": 3.030674967105909,
      "grad_norm": 0.2658432424068451,
      "learning_rate": 3.964602702898599e-06,
      "loss": 0.016,
      "step": 1851900
    },
    {
      "epoch": 3.0307076975445626,
      "grad_norm": 0.2842836081981659,
      "learning_rate": 3.964536810685082e-06,
      "loss": 0.0138,
      "step": 1851920
    },
    {
      "epoch": 3.0307404279832157,
      "grad_norm": 0.1434628963470459,
      "learning_rate": 3.964470918471565e-06,
      "loss": 0.0121,
      "step": 1851940
    },
    {
      "epoch": 3.0307731584218693,
      "grad_norm": 0.14860254526138306,
      "learning_rate": 3.964405026258048e-06,
      "loss": 0.0097,
      "step": 1851960
    },
    {
      "epoch": 3.0308058888605225,
      "grad_norm": 0.6308533549308777,
      "learning_rate": 3.96433913404453e-06,
      "loss": 0.0129,
      "step": 1851980
    },
    {
      "epoch": 3.0308386192991756,
      "grad_norm": 0.33140134811401367,
      "learning_rate": 3.964273241831013e-06,
      "loss": 0.0085,
      "step": 1852000
    },
    {
      "epoch": 3.0308713497378292,
      "grad_norm": 0.16805778443813324,
      "learning_rate": 3.964207349617496e-06,
      "loss": 0.0077,
      "step": 1852020
    },
    {
      "epoch": 3.0309040801764824,
      "grad_norm": 0.1826152354478836,
      "learning_rate": 3.964141457403979e-06,
      "loss": 0.0143,
      "step": 1852040
    },
    {
      "epoch": 3.030936810615136,
      "grad_norm": 0.2478834092617035,
      "learning_rate": 3.964075565190461e-06,
      "loss": 0.0109,
      "step": 1852060
    },
    {
      "epoch": 3.030969541053789,
      "grad_norm": 0.1343514770269394,
      "learning_rate": 3.964009672976944e-06,
      "loss": 0.0101,
      "step": 1852080
    },
    {
      "epoch": 3.0310022714924427,
      "grad_norm": 0.40273910760879517,
      "learning_rate": 3.963943780763428e-06,
      "loss": 0.0126,
      "step": 1852100
    },
    {
      "epoch": 3.031035001931096,
      "grad_norm": 0.26879310607910156,
      "learning_rate": 3.96387788854991e-06,
      "loss": 0.0094,
      "step": 1852120
    },
    {
      "epoch": 3.031067732369749,
      "grad_norm": 0.25439971685409546,
      "learning_rate": 3.963811996336393e-06,
      "loss": 0.0109,
      "step": 1852140
    },
    {
      "epoch": 3.0311004628084026,
      "grad_norm": 0.2502839267253876,
      "learning_rate": 3.963746104122876e-06,
      "loss": 0.0133,
      "step": 1852160
    },
    {
      "epoch": 3.0311331932470558,
      "grad_norm": 0.13280628621578217,
      "learning_rate": 3.9636802119093595e-06,
      "loss": 0.0128,
      "step": 1852180
    },
    {
      "epoch": 3.0311659236857094,
      "grad_norm": 0.3595450520515442,
      "learning_rate": 3.963614319695842e-06,
      "loss": 0.0089,
      "step": 1852200
    },
    {
      "epoch": 3.0311986541243625,
      "grad_norm": 0.8872010707855225,
      "learning_rate": 3.963548427482325e-06,
      "loss": 0.0078,
      "step": 1852220
    },
    {
      "epoch": 3.031231384563016,
      "grad_norm": 0.1525646150112152,
      "learning_rate": 3.963482535268808e-06,
      "loss": 0.0094,
      "step": 1852240
    },
    {
      "epoch": 3.0312641150016693,
      "grad_norm": 0.8282542824745178,
      "learning_rate": 3.9634166430552904e-06,
      "loss": 0.016,
      "step": 1852260
    },
    {
      "epoch": 3.0312968454403224,
      "grad_norm": 0.30489736795425415,
      "learning_rate": 3.963350750841773e-06,
      "loss": 0.0114,
      "step": 1852280
    },
    {
      "epoch": 3.031329575878976,
      "grad_norm": 0.06308574229478836,
      "learning_rate": 3.963284858628256e-06,
      "loss": 0.0161,
      "step": 1852300
    },
    {
      "epoch": 3.031362306317629,
      "grad_norm": 0.3590037524700165,
      "learning_rate": 3.9632189664147395e-06,
      "loss": 0.0099,
      "step": 1852320
    },
    {
      "epoch": 3.0313950367562827,
      "grad_norm": 0.22698380053043365,
      "learning_rate": 3.963153074201222e-06,
      "loss": 0.0125,
      "step": 1852340
    },
    {
      "epoch": 3.031427767194936,
      "grad_norm": 0.2342948168516159,
      "learning_rate": 3.963087181987705e-06,
      "loss": 0.0094,
      "step": 1852360
    },
    {
      "epoch": 3.0314604976335895,
      "grad_norm": 0.1967957466840744,
      "learning_rate": 3.963021289774188e-06,
      "loss": 0.0086,
      "step": 1852380
    },
    {
      "epoch": 3.0314932280722426,
      "grad_norm": 0.253937304019928,
      "learning_rate": 3.9629553975606705e-06,
      "loss": 0.009,
      "step": 1852400
    },
    {
      "epoch": 3.031525958510896,
      "grad_norm": 0.44395682215690613,
      "learning_rate": 3.962889505347153e-06,
      "loss": 0.0175,
      "step": 1852420
    },
    {
      "epoch": 3.0315586889495494,
      "grad_norm": 0.28379929065704346,
      "learning_rate": 3.962823613133636e-06,
      "loss": 0.0131,
      "step": 1852440
    },
    {
      "epoch": 3.0315914193882025,
      "grad_norm": 0.29571428894996643,
      "learning_rate": 3.962757720920119e-06,
      "loss": 0.0135,
      "step": 1852460
    },
    {
      "epoch": 3.031624149826856,
      "grad_norm": 0.35186514258384705,
      "learning_rate": 3.9626918287066014e-06,
      "loss": 0.0102,
      "step": 1852480
    },
    {
      "epoch": 3.0316568802655093,
      "grad_norm": 0.30702677369117737,
      "learning_rate": 3.962625936493085e-06,
      "loss": 0.014,
      "step": 1852500
    },
    {
      "epoch": 3.031689610704163,
      "grad_norm": 0.12638308107852936,
      "learning_rate": 3.962560044279568e-06,
      "loss": 0.0208,
      "step": 1852520
    },
    {
      "epoch": 3.031722341142816,
      "grad_norm": 0.7286103367805481,
      "learning_rate": 3.962494152066051e-06,
      "loss": 0.0145,
      "step": 1852540
    },
    {
      "epoch": 3.031755071581469,
      "grad_norm": 0.3233882784843445,
      "learning_rate": 3.962428259852534e-06,
      "loss": 0.0102,
      "step": 1852560
    },
    {
      "epoch": 3.0317878020201228,
      "grad_norm": 0.5242286920547485,
      "learning_rate": 3.962362367639017e-06,
      "loss": 0.0116,
      "step": 1852580
    },
    {
      "epoch": 3.031820532458776,
      "grad_norm": 0.2985987067222595,
      "learning_rate": 3.9622964754254996e-06,
      "loss": 0.0123,
      "step": 1852600
    },
    {
      "epoch": 3.0318532628974295,
      "grad_norm": 0.1490175426006317,
      "learning_rate": 3.962230583211982e-06,
      "loss": 0.0081,
      "step": 1852620
    },
    {
      "epoch": 3.0318859933360827,
      "grad_norm": 0.7215936183929443,
      "learning_rate": 3.962164690998465e-06,
      "loss": 0.0081,
      "step": 1852640
    },
    {
      "epoch": 3.031918723774736,
      "grad_norm": 0.23652924597263336,
      "learning_rate": 3.962098798784948e-06,
      "loss": 0.0127,
      "step": 1852660
    },
    {
      "epoch": 3.0319514542133894,
      "grad_norm": 0.1489126980304718,
      "learning_rate": 3.9620329065714305e-06,
      "loss": 0.0128,
      "step": 1852680
    },
    {
      "epoch": 3.0319841846520426,
      "grad_norm": 0.6356171369552612,
      "learning_rate": 3.961967014357913e-06,
      "loss": 0.0099,
      "step": 1852700
    },
    {
      "epoch": 3.032016915090696,
      "grad_norm": 0.44689059257507324,
      "learning_rate": 3.961901122144397e-06,
      "loss": 0.0118,
      "step": 1852720
    },
    {
      "epoch": 3.0320496455293493,
      "grad_norm": 0.20105934143066406,
      "learning_rate": 3.96183522993088e-06,
      "loss": 0.0082,
      "step": 1852740
    },
    {
      "epoch": 3.032082375968003,
      "grad_norm": 0.3067080080509186,
      "learning_rate": 3.961769337717362e-06,
      "loss": 0.0134,
      "step": 1852760
    },
    {
      "epoch": 3.032115106406656,
      "grad_norm": 0.21974340081214905,
      "learning_rate": 3.961703445503845e-06,
      "loss": 0.009,
      "step": 1852780
    },
    {
      "epoch": 3.032147836845309,
      "grad_norm": 0.3337433934211731,
      "learning_rate": 3.961637553290328e-06,
      "loss": 0.0128,
      "step": 1852800
    },
    {
      "epoch": 3.032180567283963,
      "grad_norm": 0.21412579715251923,
      "learning_rate": 3.9615716610768105e-06,
      "loss": 0.0136,
      "step": 1852820
    },
    {
      "epoch": 3.032213297722616,
      "grad_norm": 0.34106501936912537,
      "learning_rate": 3.961505768863293e-06,
      "loss": 0.0099,
      "step": 1852840
    },
    {
      "epoch": 3.0322460281612695,
      "grad_norm": 0.10646422207355499,
      "learning_rate": 3.961439876649776e-06,
      "loss": 0.0097,
      "step": 1852860
    },
    {
      "epoch": 3.0322787585999227,
      "grad_norm": 0.44769421219825745,
      "learning_rate": 3.96137398443626e-06,
      "loss": 0.0157,
      "step": 1852880
    },
    {
      "epoch": 3.0323114890385763,
      "grad_norm": 0.08830751478672028,
      "learning_rate": 3.961308092222742e-06,
      "loss": 0.0094,
      "step": 1852900
    },
    {
      "epoch": 3.0323442194772294,
      "grad_norm": 0.34344953298568726,
      "learning_rate": 3.961242200009225e-06,
      "loss": 0.0128,
      "step": 1852920
    },
    {
      "epoch": 3.0323769499158826,
      "grad_norm": 0.1817447990179062,
      "learning_rate": 3.961176307795709e-06,
      "loss": 0.0158,
      "step": 1852940
    },
    {
      "epoch": 3.032409680354536,
      "grad_norm": 0.353831946849823,
      "learning_rate": 3.961110415582191e-06,
      "loss": 0.009,
      "step": 1852960
    },
    {
      "epoch": 3.0324424107931893,
      "grad_norm": 0.358913779258728,
      "learning_rate": 3.961044523368674e-06,
      "loss": 0.0143,
      "step": 1852980
    },
    {
      "epoch": 3.032475141231843,
      "grad_norm": 0.5222735404968262,
      "learning_rate": 3.960978631155157e-06,
      "loss": 0.0086,
      "step": 1853000
    },
    {
      "epoch": 3.032507871670496,
      "grad_norm": 0.1326172649860382,
      "learning_rate": 3.96091273894164e-06,
      "loss": 0.0094,
      "step": 1853020
    },
    {
      "epoch": 3.0325406021091497,
      "grad_norm": 0.16102050244808197,
      "learning_rate": 3.960846846728122e-06,
      "loss": 0.0108,
      "step": 1853040
    },
    {
      "epoch": 3.032573332547803,
      "grad_norm": 0.19271127879619598,
      "learning_rate": 3.960780954514605e-06,
      "loss": 0.0092,
      "step": 1853060
    },
    {
      "epoch": 3.032606062986456,
      "grad_norm": 0.15982191264629364,
      "learning_rate": 3.960715062301088e-06,
      "loss": 0.015,
      "step": 1853080
    },
    {
      "epoch": 3.0326387934251096,
      "grad_norm": 0.4029548764228821,
      "learning_rate": 3.960649170087571e-06,
      "loss": 0.0122,
      "step": 1853100
    },
    {
      "epoch": 3.0326715238637627,
      "grad_norm": 0.6776719093322754,
      "learning_rate": 3.960583277874054e-06,
      "loss": 0.012,
      "step": 1853120
    },
    {
      "epoch": 3.0327042543024163,
      "grad_norm": 0.3727145195007324,
      "learning_rate": 3.960517385660537e-06,
      "loss": 0.013,
      "step": 1853140
    },
    {
      "epoch": 3.0327369847410695,
      "grad_norm": 0.2999328076839447,
      "learning_rate": 3.96045149344702e-06,
      "loss": 0.0168,
      "step": 1853160
    },
    {
      "epoch": 3.032769715179723,
      "grad_norm": 0.5004924535751343,
      "learning_rate": 3.960385601233502e-06,
      "loss": 0.0128,
      "step": 1853180
    },
    {
      "epoch": 3.032802445618376,
      "grad_norm": 0.5684617757797241,
      "learning_rate": 3.960319709019985e-06,
      "loss": 0.0149,
      "step": 1853200
    },
    {
      "epoch": 3.0328351760570293,
      "grad_norm": 0.19632185995578766,
      "learning_rate": 3.960253816806468e-06,
      "loss": 0.014,
      "step": 1853220
    },
    {
      "epoch": 3.032867906495683,
      "grad_norm": 0.41156888008117676,
      "learning_rate": 3.9601879245929515e-06,
      "loss": 0.0106,
      "step": 1853240
    },
    {
      "epoch": 3.032900636934336,
      "grad_norm": 0.163556307554245,
      "learning_rate": 3.960122032379434e-06,
      "loss": 0.0135,
      "step": 1853260
    },
    {
      "epoch": 3.0329333673729897,
      "grad_norm": 0.20024354755878448,
      "learning_rate": 3.960056140165917e-06,
      "loss": 0.0106,
      "step": 1853280
    },
    {
      "epoch": 3.032966097811643,
      "grad_norm": 0.059548959136009216,
      "learning_rate": 3.9599902479524e-06,
      "loss": 0.0128,
      "step": 1853300
    },
    {
      "epoch": 3.032998828250296,
      "grad_norm": 0.6506186723709106,
      "learning_rate": 3.9599243557388824e-06,
      "loss": 0.011,
      "step": 1853320
    },
    {
      "epoch": 3.0330315586889496,
      "grad_norm": 0.6904352307319641,
      "learning_rate": 3.959858463525366e-06,
      "loss": 0.0134,
      "step": 1853340
    },
    {
      "epoch": 3.0330642891276027,
      "grad_norm": 0.12479754537343979,
      "learning_rate": 3.959792571311849e-06,
      "loss": 0.0128,
      "step": 1853360
    },
    {
      "epoch": 3.0330970195662563,
      "grad_norm": 0.41522783041000366,
      "learning_rate": 3.9597266790983315e-06,
      "loss": 0.0135,
      "step": 1853380
    },
    {
      "epoch": 3.0331297500049095,
      "grad_norm": 0.16378463804721832,
      "learning_rate": 3.959660786884814e-06,
      "loss": 0.0116,
      "step": 1853400
    },
    {
      "epoch": 3.033162480443563,
      "grad_norm": 0.6744353771209717,
      "learning_rate": 3.959594894671297e-06,
      "loss": 0.0165,
      "step": 1853420
    },
    {
      "epoch": 3.033195210882216,
      "grad_norm": 0.10936708003282547,
      "learning_rate": 3.95952900245778e-06,
      "loss": 0.0093,
      "step": 1853440
    },
    {
      "epoch": 3.0332279413208694,
      "grad_norm": 0.16277259588241577,
      "learning_rate": 3.9594631102442625e-06,
      "loss": 0.0073,
      "step": 1853460
    },
    {
      "epoch": 3.033260671759523,
      "grad_norm": 0.13821463286876678,
      "learning_rate": 3.959397218030745e-06,
      "loss": 0.0073,
      "step": 1853480
    },
    {
      "epoch": 3.033293402198176,
      "grad_norm": 0.9501696825027466,
      "learning_rate": 3.959331325817228e-06,
      "loss": 0.0144,
      "step": 1853500
    },
    {
      "epoch": 3.0333261326368297,
      "grad_norm": 0.26233935356140137,
      "learning_rate": 3.9592654336037115e-06,
      "loss": 0.0111,
      "step": 1853520
    },
    {
      "epoch": 3.033358863075483,
      "grad_norm": 0.31597277522087097,
      "learning_rate": 3.959199541390194e-06,
      "loss": 0.0137,
      "step": 1853540
    },
    {
      "epoch": 3.0333915935141365,
      "grad_norm": 0.4193085730075836,
      "learning_rate": 3.959133649176677e-06,
      "loss": 0.0129,
      "step": 1853560
    },
    {
      "epoch": 3.0334243239527896,
      "grad_norm": 0.126713365316391,
      "learning_rate": 3.959067756963161e-06,
      "loss": 0.0119,
      "step": 1853580
    },
    {
      "epoch": 3.0334570543914428,
      "grad_norm": 0.23875291645526886,
      "learning_rate": 3.959001864749643e-06,
      "loss": 0.0106,
      "step": 1853600
    },
    {
      "epoch": 3.0334897848300963,
      "grad_norm": 0.12763682007789612,
      "learning_rate": 3.958935972536126e-06,
      "loss": 0.0099,
      "step": 1853620
    },
    {
      "epoch": 3.0335225152687495,
      "grad_norm": 0.5114750266075134,
      "learning_rate": 3.958870080322609e-06,
      "loss": 0.0107,
      "step": 1853640
    },
    {
      "epoch": 3.033555245707403,
      "grad_norm": 0.2660793960094452,
      "learning_rate": 3.9588041881090915e-06,
      "loss": 0.0181,
      "step": 1853660
    },
    {
      "epoch": 3.0335879761460562,
      "grad_norm": 0.18430130183696747,
      "learning_rate": 3.958738295895574e-06,
      "loss": 0.0103,
      "step": 1853680
    },
    {
      "epoch": 3.03362070658471,
      "grad_norm": 0.1876002848148346,
      "learning_rate": 3.958672403682057e-06,
      "loss": 0.0125,
      "step": 1853700
    },
    {
      "epoch": 3.033653437023363,
      "grad_norm": 0.370986670255661,
      "learning_rate": 3.95860651146854e-06,
      "loss": 0.0109,
      "step": 1853720
    },
    {
      "epoch": 3.033686167462016,
      "grad_norm": 0.5457940101623535,
      "learning_rate": 3.958540619255023e-06,
      "loss": 0.0136,
      "step": 1853740
    },
    {
      "epoch": 3.0337188979006697,
      "grad_norm": 0.23765002191066742,
      "learning_rate": 3.958474727041506e-06,
      "loss": 0.0117,
      "step": 1853760
    },
    {
      "epoch": 3.033751628339323,
      "grad_norm": 0.3681284189224243,
      "learning_rate": 3.958408834827989e-06,
      "loss": 0.0112,
      "step": 1853780
    },
    {
      "epoch": 3.0337843587779765,
      "grad_norm": 0.04155417159199715,
      "learning_rate": 3.9583429426144716e-06,
      "loss": 0.0085,
      "step": 1853800
    },
    {
      "epoch": 3.0338170892166296,
      "grad_norm": 0.13224729895591736,
      "learning_rate": 3.958277050400954e-06,
      "loss": 0.0095,
      "step": 1853820
    },
    {
      "epoch": 3.033849819655283,
      "grad_norm": 0.6352738738059998,
      "learning_rate": 3.958211158187437e-06,
      "loss": 0.0123,
      "step": 1853840
    },
    {
      "epoch": 3.0338825500939364,
      "grad_norm": 0.15394724905490875,
      "learning_rate": 3.95814526597392e-06,
      "loss": 0.0095,
      "step": 1853860
    },
    {
      "epoch": 3.0339152805325895,
      "grad_norm": 0.09421433508396149,
      "learning_rate": 3.9580793737604025e-06,
      "loss": 0.0149,
      "step": 1853880
    },
    {
      "epoch": 3.033948010971243,
      "grad_norm": 0.7461302280426025,
      "learning_rate": 3.958013481546885e-06,
      "loss": 0.0108,
      "step": 1853900
    },
    {
      "epoch": 3.0339807414098963,
      "grad_norm": 0.12278125435113907,
      "learning_rate": 3.957947589333369e-06,
      "loss": 0.0147,
      "step": 1853920
    },
    {
      "epoch": 3.03401347184855,
      "grad_norm": 0.6998348832130432,
      "learning_rate": 3.957881697119852e-06,
      "loss": 0.0137,
      "step": 1853940
    },
    {
      "epoch": 3.034046202287203,
      "grad_norm": 0.7636720538139343,
      "learning_rate": 3.957815804906335e-06,
      "loss": 0.0139,
      "step": 1853960
    },
    {
      "epoch": 3.0340789327258566,
      "grad_norm": 0.1310770958662033,
      "learning_rate": 3.957749912692818e-06,
      "loss": 0.0105,
      "step": 1853980
    },
    {
      "epoch": 3.0341116631645098,
      "grad_norm": 0.18967196345329285,
      "learning_rate": 3.957684020479301e-06,
      "loss": 0.0097,
      "step": 1854000
    },
    {
      "epoch": 3.034144393603163,
      "grad_norm": 0.21571685373783112,
      "learning_rate": 3.957618128265783e-06,
      "loss": 0.0075,
      "step": 1854020
    },
    {
      "epoch": 3.0341771240418165,
      "grad_norm": 0.266385018825531,
      "learning_rate": 3.957552236052266e-06,
      "loss": 0.008,
      "step": 1854040
    },
    {
      "epoch": 3.0342098544804696,
      "grad_norm": 0.3210696578025818,
      "learning_rate": 3.957486343838749e-06,
      "loss": 0.0137,
      "step": 1854060
    },
    {
      "epoch": 3.0342425849191232,
      "grad_norm": 0.05516817420721054,
      "learning_rate": 3.957420451625232e-06,
      "loss": 0.0109,
      "step": 1854080
    },
    {
      "epoch": 3.0342753153577764,
      "grad_norm": 0.34806692600250244,
      "learning_rate": 3.957354559411714e-06,
      "loss": 0.0127,
      "step": 1854100
    },
    {
      "epoch": 3.03430804579643,
      "grad_norm": 0.3744409680366516,
      "learning_rate": 3.957288667198197e-06,
      "loss": 0.0133,
      "step": 1854120
    },
    {
      "epoch": 3.034340776235083,
      "grad_norm": 0.062332358211278915,
      "learning_rate": 3.957222774984681e-06,
      "loss": 0.0113,
      "step": 1854140
    },
    {
      "epoch": 3.0343735066737363,
      "grad_norm": 0.4331313967704773,
      "learning_rate": 3.9571568827711634e-06,
      "loss": 0.01,
      "step": 1854160
    },
    {
      "epoch": 3.03440623711239,
      "grad_norm": 0.06107022985816002,
      "learning_rate": 3.957090990557646e-06,
      "loss": 0.0157,
      "step": 1854180
    },
    {
      "epoch": 3.034438967551043,
      "grad_norm": 0.4102311134338379,
      "learning_rate": 3.957025098344129e-06,
      "loss": 0.009,
      "step": 1854200
    },
    {
      "epoch": 3.0344716979896966,
      "grad_norm": 0.35673367977142334,
      "learning_rate": 3.956959206130612e-06,
      "loss": 0.0098,
      "step": 1854220
    },
    {
      "epoch": 3.0345044284283498,
      "grad_norm": 0.43239089846611023,
      "learning_rate": 3.956893313917094e-06,
      "loss": 0.0088,
      "step": 1854240
    },
    {
      "epoch": 3.034537158867003,
      "grad_norm": 0.2738635838031769,
      "learning_rate": 3.956827421703577e-06,
      "loss": 0.0136,
      "step": 1854260
    },
    {
      "epoch": 3.0345698893056565,
      "grad_norm": 0.5368230938911438,
      "learning_rate": 3.956761529490061e-06,
      "loss": 0.0112,
      "step": 1854280
    },
    {
      "epoch": 3.0346026197443097,
      "grad_norm": 0.26006993651390076,
      "learning_rate": 3.9566956372765435e-06,
      "loss": 0.0121,
      "step": 1854300
    },
    {
      "epoch": 3.0346353501829633,
      "grad_norm": 0.07717809826135635,
      "learning_rate": 3.956629745063026e-06,
      "loss": 0.0106,
      "step": 1854320
    },
    {
      "epoch": 3.0346680806216164,
      "grad_norm": 0.26637086272239685,
      "learning_rate": 3.956563852849509e-06,
      "loss": 0.0118,
      "step": 1854340
    },
    {
      "epoch": 3.03470081106027,
      "grad_norm": 0.1009811982512474,
      "learning_rate": 3.9564979606359925e-06,
      "loss": 0.0101,
      "step": 1854360
    },
    {
      "epoch": 3.034733541498923,
      "grad_norm": 0.11410822719335556,
      "learning_rate": 3.956432068422475e-06,
      "loss": 0.0132,
      "step": 1854380
    },
    {
      "epoch": 3.0347662719375763,
      "grad_norm": 0.040116362273693085,
      "learning_rate": 3.956366176208958e-06,
      "loss": 0.0119,
      "step": 1854400
    },
    {
      "epoch": 3.03479900237623,
      "grad_norm": 0.2865797281265259,
      "learning_rate": 3.956300283995441e-06,
      "loss": 0.0088,
      "step": 1854420
    },
    {
      "epoch": 3.034831732814883,
      "grad_norm": 0.18850520253181458,
      "learning_rate": 3.9562343917819235e-06,
      "loss": 0.0094,
      "step": 1854440
    },
    {
      "epoch": 3.0348644632535366,
      "grad_norm": 0.6354866623878479,
      "learning_rate": 3.956168499568406e-06,
      "loss": 0.0126,
      "step": 1854460
    },
    {
      "epoch": 3.03489719369219,
      "grad_norm": 0.30561086535453796,
      "learning_rate": 3.956102607354889e-06,
      "loss": 0.0125,
      "step": 1854480
    },
    {
      "epoch": 3.0349299241308434,
      "grad_norm": 0.24756398797035217,
      "learning_rate": 3.956036715141372e-06,
      "loss": 0.0102,
      "step": 1854500
    },
    {
      "epoch": 3.0349626545694965,
      "grad_norm": 1.0016462802886963,
      "learning_rate": 3.9559708229278544e-06,
      "loss": 0.0123,
      "step": 1854520
    },
    {
      "epoch": 3.0349953850081497,
      "grad_norm": 0.4494993984699249,
      "learning_rate": 3.955904930714338e-06,
      "loss": 0.0101,
      "step": 1854540
    },
    {
      "epoch": 3.0350281154468033,
      "grad_norm": 0.3317141532897949,
      "learning_rate": 3.955839038500821e-06,
      "loss": 0.0118,
      "step": 1854560
    },
    {
      "epoch": 3.0350608458854564,
      "grad_norm": 0.2825119197368622,
      "learning_rate": 3.9557731462873035e-06,
      "loss": 0.0133,
      "step": 1854580
    },
    {
      "epoch": 3.03509357632411,
      "grad_norm": 0.26546961069107056,
      "learning_rate": 3.955707254073786e-06,
      "loss": 0.0132,
      "step": 1854600
    },
    {
      "epoch": 3.035126306762763,
      "grad_norm": 0.15275174379348755,
      "learning_rate": 3.955641361860269e-06,
      "loss": 0.011,
      "step": 1854620
    },
    {
      "epoch": 3.0351590372014168,
      "grad_norm": 0.2464989870786667,
      "learning_rate": 3.9555754696467526e-06,
      "loss": 0.0143,
      "step": 1854640
    },
    {
      "epoch": 3.03519176764007,
      "grad_norm": 1.2127246856689453,
      "learning_rate": 3.955509577433235e-06,
      "loss": 0.0078,
      "step": 1854660
    },
    {
      "epoch": 3.035224498078723,
      "grad_norm": 0.17454062402248383,
      "learning_rate": 3.955443685219718e-06,
      "loss": 0.0125,
      "step": 1854680
    },
    {
      "epoch": 3.0352572285173767,
      "grad_norm": 0.36061033606529236,
      "learning_rate": 3.955377793006201e-06,
      "loss": 0.0138,
      "step": 1854700
    },
    {
      "epoch": 3.03528995895603,
      "grad_norm": 0.21436138451099396,
      "learning_rate": 3.9553119007926835e-06,
      "loss": 0.0135,
      "step": 1854720
    },
    {
      "epoch": 3.0353226893946834,
      "grad_norm": 0.6704086065292358,
      "learning_rate": 3.955246008579166e-06,
      "loss": 0.0131,
      "step": 1854740
    },
    {
      "epoch": 3.0353554198333366,
      "grad_norm": 0.16994182765483856,
      "learning_rate": 3.95518011636565e-06,
      "loss": 0.0113,
      "step": 1854760
    },
    {
      "epoch": 3.03538815027199,
      "grad_norm": 0.34468457102775574,
      "learning_rate": 3.955114224152133e-06,
      "loss": 0.0147,
      "step": 1854780
    },
    {
      "epoch": 3.0354208807106433,
      "grad_norm": 1.0323947668075562,
      "learning_rate": 3.955048331938615e-06,
      "loss": 0.0172,
      "step": 1854800
    },
    {
      "epoch": 3.0354536111492965,
      "grad_norm": 0.43205732107162476,
      "learning_rate": 3.954982439725098e-06,
      "loss": 0.0163,
      "step": 1854820
    },
    {
      "epoch": 3.03548634158795,
      "grad_norm": 0.2440742701292038,
      "learning_rate": 3.954916547511581e-06,
      "loss": 0.0131,
      "step": 1854840
    },
    {
      "epoch": 3.035519072026603,
      "grad_norm": 0.2594943046569824,
      "learning_rate": 3.9548506552980636e-06,
      "loss": 0.0089,
      "step": 1854860
    },
    {
      "epoch": 3.035551802465257,
      "grad_norm": 0.06469113379716873,
      "learning_rate": 3.954784763084546e-06,
      "loss": 0.0113,
      "step": 1854880
    },
    {
      "epoch": 3.03558453290391,
      "grad_norm": 0.35550418496131897,
      "learning_rate": 3.954718870871029e-06,
      "loss": 0.0148,
      "step": 1854900
    },
    {
      "epoch": 3.035617263342563,
      "grad_norm": 0.10904604196548462,
      "learning_rate": 3.954652978657512e-06,
      "loss": 0.0094,
      "step": 1854920
    },
    {
      "epoch": 3.0356499937812167,
      "grad_norm": 0.07649634033441544,
      "learning_rate": 3.954587086443995e-06,
      "loss": 0.0129,
      "step": 1854940
    },
    {
      "epoch": 3.03568272421987,
      "grad_norm": 0.22159111499786377,
      "learning_rate": 3.954521194230478e-06,
      "loss": 0.0241,
      "step": 1854960
    },
    {
      "epoch": 3.0357154546585234,
      "grad_norm": 0.7098327279090881,
      "learning_rate": 3.954455302016961e-06,
      "loss": 0.008,
      "step": 1854980
    },
    {
      "epoch": 3.0357481850971766,
      "grad_norm": 0.15034174919128418,
      "learning_rate": 3.9543894098034444e-06,
      "loss": 0.0144,
      "step": 1855000
    },
    {
      "epoch": 3.03578091553583,
      "grad_norm": 0.4248493015766144,
      "learning_rate": 3.954323517589927e-06,
      "loss": 0.0158,
      "step": 1855020
    },
    {
      "epoch": 3.0358136459744833,
      "grad_norm": 0.15437744557857513,
      "learning_rate": 3.95425762537641e-06,
      "loss": 0.009,
      "step": 1855040
    },
    {
      "epoch": 3.0358463764131365,
      "grad_norm": 0.5222524404525757,
      "learning_rate": 3.954191733162893e-06,
      "loss": 0.0137,
      "step": 1855060
    },
    {
      "epoch": 3.03587910685179,
      "grad_norm": 0.2716795802116394,
      "learning_rate": 3.954125840949375e-06,
      "loss": 0.0166,
      "step": 1855080
    },
    {
      "epoch": 3.0359118372904432,
      "grad_norm": 0.23445706069469452,
      "learning_rate": 3.954059948735858e-06,
      "loss": 0.0133,
      "step": 1855100
    },
    {
      "epoch": 3.035944567729097,
      "grad_norm": 0.1689123511314392,
      "learning_rate": 3.953994056522341e-06,
      "loss": 0.011,
      "step": 1855120
    },
    {
      "epoch": 3.03597729816775,
      "grad_norm": 0.31423643231391907,
      "learning_rate": 3.953928164308824e-06,
      "loss": 0.0164,
      "step": 1855140
    },
    {
      "epoch": 3.0360100286064036,
      "grad_norm": 0.8130115270614624,
      "learning_rate": 3.953862272095307e-06,
      "loss": 0.0139,
      "step": 1855160
    },
    {
      "epoch": 3.0360427590450567,
      "grad_norm": 0.5843213796615601,
      "learning_rate": 3.95379637988179e-06,
      "loss": 0.0142,
      "step": 1855180
    },
    {
      "epoch": 3.03607548948371,
      "grad_norm": 0.49571868777275085,
      "learning_rate": 3.953730487668273e-06,
      "loss": 0.0116,
      "step": 1855200
    },
    {
      "epoch": 3.0361082199223635,
      "grad_norm": 0.38779768347740173,
      "learning_rate": 3.953664595454755e-06,
      "loss": 0.012,
      "step": 1855220
    },
    {
      "epoch": 3.0361409503610166,
      "grad_norm": 0.26695072650909424,
      "learning_rate": 3.953598703241238e-06,
      "loss": 0.0082,
      "step": 1855240
    },
    {
      "epoch": 3.03617368079967,
      "grad_norm": 0.4378817081451416,
      "learning_rate": 3.953532811027721e-06,
      "loss": 0.0134,
      "step": 1855260
    },
    {
      "epoch": 3.0362064112383234,
      "grad_norm": 0.4587648808956146,
      "learning_rate": 3.953466918814204e-06,
      "loss": 0.008,
      "step": 1855280
    },
    {
      "epoch": 3.036239141676977,
      "grad_norm": 0.22560444474220276,
      "learning_rate": 3.953401026600686e-06,
      "loss": 0.0141,
      "step": 1855300
    },
    {
      "epoch": 3.03627187211563,
      "grad_norm": 0.11246325820684433,
      "learning_rate": 3.95333513438717e-06,
      "loss": 0.0105,
      "step": 1855320
    },
    {
      "epoch": 3.0363046025542832,
      "grad_norm": 0.3035435676574707,
      "learning_rate": 3.953269242173653e-06,
      "loss": 0.0088,
      "step": 1855340
    },
    {
      "epoch": 3.036337332992937,
      "grad_norm": 0.10512899607419968,
      "learning_rate": 3.9532033499601354e-06,
      "loss": 0.024,
      "step": 1855360
    },
    {
      "epoch": 3.03637006343159,
      "grad_norm": 0.14920887351036072,
      "learning_rate": 3.953137457746619e-06,
      "loss": 0.0187,
      "step": 1855380
    },
    {
      "epoch": 3.0364027938702436,
      "grad_norm": 0.32737210392951965,
      "learning_rate": 3.953071565533102e-06,
      "loss": 0.0144,
      "step": 1855400
    },
    {
      "epoch": 3.0364355243088967,
      "grad_norm": 0.40968525409698486,
      "learning_rate": 3.9530056733195845e-06,
      "loss": 0.0207,
      "step": 1855420
    },
    {
      "epoch": 3.0364682547475503,
      "grad_norm": 1.2704195976257324,
      "learning_rate": 3.952939781106067e-06,
      "loss": 0.0178,
      "step": 1855440
    },
    {
      "epoch": 3.0365009851862035,
      "grad_norm": 1.06308114528656,
      "learning_rate": 3.95287388889255e-06,
      "loss": 0.0139,
      "step": 1855460
    },
    {
      "epoch": 3.0365337156248566,
      "grad_norm": 0.3379257917404175,
      "learning_rate": 3.952807996679033e-06,
      "loss": 0.0126,
      "step": 1855480
    },
    {
      "epoch": 3.0365664460635102,
      "grad_norm": 0.08094018697738647,
      "learning_rate": 3.9527421044655155e-06,
      "loss": 0.0115,
      "step": 1855500
    },
    {
      "epoch": 3.0365991765021634,
      "grad_norm": 0.2771725356578827,
      "learning_rate": 3.952676212251998e-06,
      "loss": 0.0152,
      "step": 1855520
    },
    {
      "epoch": 3.036631906940817,
      "grad_norm": 0.348982036113739,
      "learning_rate": 3.952610320038481e-06,
      "loss": 0.0087,
      "step": 1855540
    },
    {
      "epoch": 3.03666463737947,
      "grad_norm": 0.10126546770334244,
      "learning_rate": 3.9525444278249645e-06,
      "loss": 0.0099,
      "step": 1855560
    },
    {
      "epoch": 3.0366973678181237,
      "grad_norm": 0.08129950612783432,
      "learning_rate": 3.952478535611447e-06,
      "loss": 0.0225,
      "step": 1855580
    },
    {
      "epoch": 3.036730098256777,
      "grad_norm": 0.12259639799594879,
      "learning_rate": 3.95241264339793e-06,
      "loss": 0.0119,
      "step": 1855600
    },
    {
      "epoch": 3.03676282869543,
      "grad_norm": 0.35266953706741333,
      "learning_rate": 3.952346751184413e-06,
      "loss": 0.0111,
      "step": 1855620
    },
    {
      "epoch": 3.0367955591340836,
      "grad_norm": 0.8974499106407166,
      "learning_rate": 3.9522808589708955e-06,
      "loss": 0.0121,
      "step": 1855640
    },
    {
      "epoch": 3.0368282895727368,
      "grad_norm": 0.6267909407615662,
      "learning_rate": 3.952214966757378e-06,
      "loss": 0.0093,
      "step": 1855660
    },
    {
      "epoch": 3.0368610200113904,
      "grad_norm": 0.3885102868080139,
      "learning_rate": 3.952149074543861e-06,
      "loss": 0.0134,
      "step": 1855680
    },
    {
      "epoch": 3.0368937504500435,
      "grad_norm": 0.2586251497268677,
      "learning_rate": 3.9520831823303446e-06,
      "loss": 0.0091,
      "step": 1855700
    },
    {
      "epoch": 3.0369264808886967,
      "grad_norm": 0.18706579506397247,
      "learning_rate": 3.952017290116827e-06,
      "loss": 0.012,
      "step": 1855720
    },
    {
      "epoch": 3.0369592113273502,
      "grad_norm": 0.07817114144563675,
      "learning_rate": 3.95195139790331e-06,
      "loss": 0.0103,
      "step": 1855740
    },
    {
      "epoch": 3.0369919417660034,
      "grad_norm": 0.13238359987735748,
      "learning_rate": 3.951885505689793e-06,
      "loss": 0.0118,
      "step": 1855760
    },
    {
      "epoch": 3.037024672204657,
      "grad_norm": 0.43288829922676086,
      "learning_rate": 3.951819613476276e-06,
      "loss": 0.0123,
      "step": 1855780
    },
    {
      "epoch": 3.03705740264331,
      "grad_norm": 0.4536120593547821,
      "learning_rate": 3.951753721262759e-06,
      "loss": 0.0119,
      "step": 1855800
    },
    {
      "epoch": 3.0370901330819637,
      "grad_norm": 0.16837799549102783,
      "learning_rate": 3.951687829049242e-06,
      "loss": 0.0173,
      "step": 1855820
    },
    {
      "epoch": 3.037122863520617,
      "grad_norm": 0.42519739270210266,
      "learning_rate": 3.951621936835725e-06,
      "loss": 0.0116,
      "step": 1855840
    },
    {
      "epoch": 3.03715559395927,
      "grad_norm": 0.3545878529548645,
      "learning_rate": 3.951556044622207e-06,
      "loss": 0.0146,
      "step": 1855860
    },
    {
      "epoch": 3.0371883243979236,
      "grad_norm": 0.36462023854255676,
      "learning_rate": 3.95149015240869e-06,
      "loss": 0.0097,
      "step": 1855880
    },
    {
      "epoch": 3.037221054836577,
      "grad_norm": 0.6218274831771851,
      "learning_rate": 3.951424260195173e-06,
      "loss": 0.0108,
      "step": 1855900
    },
    {
      "epoch": 3.0372537852752304,
      "grad_norm": 0.39063888788223267,
      "learning_rate": 3.9513583679816555e-06,
      "loss": 0.0125,
      "step": 1855920
    },
    {
      "epoch": 3.0372865157138835,
      "grad_norm": 0.5474622249603271,
      "learning_rate": 3.951292475768138e-06,
      "loss": 0.013,
      "step": 1855940
    },
    {
      "epoch": 3.037319246152537,
      "grad_norm": 0.38704821467399597,
      "learning_rate": 3.951226583554622e-06,
      "loss": 0.0121,
      "step": 1855960
    },
    {
      "epoch": 3.0373519765911903,
      "grad_norm": 0.3043302893638611,
      "learning_rate": 3.951160691341105e-06,
      "loss": 0.0168,
      "step": 1855980
    },
    {
      "epoch": 3.0373847070298434,
      "grad_norm": 0.3908501863479614,
      "learning_rate": 3.951094799127587e-06,
      "loss": 0.0154,
      "step": 1856000
    },
    {
      "epoch": 3.037417437468497,
      "grad_norm": 0.19839482009410858,
      "learning_rate": 3.95102890691407e-06,
      "loss": 0.0146,
      "step": 1856020
    },
    {
      "epoch": 3.03745016790715,
      "grad_norm": 0.07535331696271896,
      "learning_rate": 3.950963014700554e-06,
      "loss": 0.0128,
      "step": 1856040
    },
    {
      "epoch": 3.0374828983458038,
      "grad_norm": 0.36256495118141174,
      "learning_rate": 3.950897122487036e-06,
      "loss": 0.0099,
      "step": 1856060
    },
    {
      "epoch": 3.037515628784457,
      "grad_norm": 0.6935782432556152,
      "learning_rate": 3.950831230273519e-06,
      "loss": 0.0103,
      "step": 1856080
    },
    {
      "epoch": 3.0375483592231105,
      "grad_norm": 0.3208297789096832,
      "learning_rate": 3.950765338060002e-06,
      "loss": 0.0096,
      "step": 1856100
    },
    {
      "epoch": 3.0375810896617637,
      "grad_norm": 0.17238999903202057,
      "learning_rate": 3.950699445846485e-06,
      "loss": 0.0099,
      "step": 1856120
    },
    {
      "epoch": 3.037613820100417,
      "grad_norm": 0.2952556610107422,
      "learning_rate": 3.950633553632967e-06,
      "loss": 0.0084,
      "step": 1856140
    },
    {
      "epoch": 3.0376465505390704,
      "grad_norm": 0.34881603717803955,
      "learning_rate": 3.95056766141945e-06,
      "loss": 0.0069,
      "step": 1856160
    },
    {
      "epoch": 3.0376792809777236,
      "grad_norm": 1.203796148300171,
      "learning_rate": 3.950501769205934e-06,
      "loss": 0.0143,
      "step": 1856180
    },
    {
      "epoch": 3.037712011416377,
      "grad_norm": Infinity,
      "learning_rate": 3.9504358769924164e-06,
      "loss": 0.0143,
      "step": 1856200
    },
    {
      "epoch": 3.0377447418550303,
      "grad_norm": 0.3818414509296417,
      "learning_rate": 3.950369984778899e-06,
      "loss": 0.0131,
      "step": 1856220
    },
    {
      "epoch": 3.037777472293684,
      "grad_norm": 0.3447268307209015,
      "learning_rate": 3.950304092565382e-06,
      "loss": 0.0073,
      "step": 1856240
    },
    {
      "epoch": 3.037810202732337,
      "grad_norm": 0.22920195758342743,
      "learning_rate": 3.950238200351865e-06,
      "loss": 0.0088,
      "step": 1856260
    },
    {
      "epoch": 3.03784293317099,
      "grad_norm": 0.2679685652256012,
      "learning_rate": 3.950172308138347e-06,
      "loss": 0.0135,
      "step": 1856280
    },
    {
      "epoch": 3.037875663609644,
      "grad_norm": 0.18932507932186127,
      "learning_rate": 3.95010641592483e-06,
      "loss": 0.013,
      "step": 1856300
    },
    {
      "epoch": 3.037908394048297,
      "grad_norm": 0.1323467344045639,
      "learning_rate": 3.950040523711313e-06,
      "loss": 0.0066,
      "step": 1856320
    },
    {
      "epoch": 3.0379411244869505,
      "grad_norm": 0.15284278988838196,
      "learning_rate": 3.949974631497796e-06,
      "loss": 0.0138,
      "step": 1856340
    },
    {
      "epoch": 3.0379738549256037,
      "grad_norm": 0.5498979091644287,
      "learning_rate": 3.949908739284279e-06,
      "loss": 0.0099,
      "step": 1856360
    },
    {
      "epoch": 3.038006585364257,
      "grad_norm": 1.2161544561386108,
      "learning_rate": 3.949842847070762e-06,
      "loss": 0.0131,
      "step": 1856380
    },
    {
      "epoch": 3.0380393158029104,
      "grad_norm": 0.41448554396629333,
      "learning_rate": 3.9497769548572455e-06,
      "loss": 0.0131,
      "step": 1856400
    },
    {
      "epoch": 3.0380720462415636,
      "grad_norm": 0.2915964424610138,
      "learning_rate": 3.949711062643728e-06,
      "loss": 0.0112,
      "step": 1856420
    },
    {
      "epoch": 3.038104776680217,
      "grad_norm": 0.1526404470205307,
      "learning_rate": 3.949645170430211e-06,
      "loss": 0.01,
      "step": 1856440
    },
    {
      "epoch": 3.0381375071188703,
      "grad_norm": 1.1027624607086182,
      "learning_rate": 3.949579278216694e-06,
      "loss": 0.0113,
      "step": 1856460
    },
    {
      "epoch": 3.038170237557524,
      "grad_norm": 0.2778811752796173,
      "learning_rate": 3.9495133860031765e-06,
      "loss": 0.0086,
      "step": 1856480
    },
    {
      "epoch": 3.038202967996177,
      "grad_norm": 0.5017201900482178,
      "learning_rate": 3.949447493789659e-06,
      "loss": 0.0127,
      "step": 1856500
    },
    {
      "epoch": 3.03823569843483,
      "grad_norm": 0.23249118030071259,
      "learning_rate": 3.949381601576142e-06,
      "loss": 0.0111,
      "step": 1856520
    },
    {
      "epoch": 3.038268428873484,
      "grad_norm": 0.3029215633869171,
      "learning_rate": 3.949315709362625e-06,
      "loss": 0.0084,
      "step": 1856540
    },
    {
      "epoch": 3.038301159312137,
      "grad_norm": 0.23969903588294983,
      "learning_rate": 3.9492498171491074e-06,
      "loss": 0.011,
      "step": 1856560
    },
    {
      "epoch": 3.0383338897507906,
      "grad_norm": 0.23086829483509064,
      "learning_rate": 3.949183924935591e-06,
      "loss": 0.0107,
      "step": 1856580
    },
    {
      "epoch": 3.0383666201894437,
      "grad_norm": 1.587479591369629,
      "learning_rate": 3.949118032722074e-06,
      "loss": 0.0096,
      "step": 1856600
    },
    {
      "epoch": 3.0383993506280973,
      "grad_norm": 0.21761083602905273,
      "learning_rate": 3.9490521405085565e-06,
      "loss": 0.0127,
      "step": 1856620
    },
    {
      "epoch": 3.0384320810667504,
      "grad_norm": 0.15533681213855743,
      "learning_rate": 3.948986248295039e-06,
      "loss": 0.012,
      "step": 1856640
    },
    {
      "epoch": 3.0384648115054036,
      "grad_norm": 0.2680470943450928,
      "learning_rate": 3.948920356081522e-06,
      "loss": 0.0138,
      "step": 1856660
    },
    {
      "epoch": 3.038497541944057,
      "grad_norm": 1.3106948137283325,
      "learning_rate": 3.948854463868005e-06,
      "loss": 0.0115,
      "step": 1856680
    },
    {
      "epoch": 3.0385302723827103,
      "grad_norm": 0.1388878971338272,
      "learning_rate": 3.9487885716544875e-06,
      "loss": 0.0188,
      "step": 1856700
    },
    {
      "epoch": 3.038563002821364,
      "grad_norm": 0.08877917379140854,
      "learning_rate": 3.94872267944097e-06,
      "loss": 0.0114,
      "step": 1856720
    },
    {
      "epoch": 3.038595733260017,
      "grad_norm": 0.23467622697353363,
      "learning_rate": 3.948656787227454e-06,
      "loss": 0.0108,
      "step": 1856740
    },
    {
      "epoch": 3.0386284636986707,
      "grad_norm": 0.32488787174224854,
      "learning_rate": 3.9485908950139365e-06,
      "loss": 0.0094,
      "step": 1856760
    },
    {
      "epoch": 3.038661194137324,
      "grad_norm": 0.17067185044288635,
      "learning_rate": 3.948525002800419e-06,
      "loss": 0.0099,
      "step": 1856780
    },
    {
      "epoch": 3.038693924575977,
      "grad_norm": 0.17925919592380524,
      "learning_rate": 3.948459110586903e-06,
      "loss": 0.0101,
      "step": 1856800
    },
    {
      "epoch": 3.0387266550146306,
      "grad_norm": 0.1682501882314682,
      "learning_rate": 3.948393218373386e-06,
      "loss": 0.0092,
      "step": 1856820
    },
    {
      "epoch": 3.0387593854532837,
      "grad_norm": 0.4493093192577362,
      "learning_rate": 3.948327326159868e-06,
      "loss": 0.0151,
      "step": 1856840
    },
    {
      "epoch": 3.0387921158919373,
      "grad_norm": 0.6020143628120422,
      "learning_rate": 3.948261433946351e-06,
      "loss": 0.0131,
      "step": 1856860
    },
    {
      "epoch": 3.0388248463305905,
      "grad_norm": 0.16188853979110718,
      "learning_rate": 3.948195541732834e-06,
      "loss": 0.0137,
      "step": 1856880
    },
    {
      "epoch": 3.038857576769244,
      "grad_norm": 0.14982549846172333,
      "learning_rate": 3.9481296495193166e-06,
      "loss": 0.0099,
      "step": 1856900
    },
    {
      "epoch": 3.038890307207897,
      "grad_norm": 0.20635271072387695,
      "learning_rate": 3.948063757305799e-06,
      "loss": 0.0123,
      "step": 1856920
    },
    {
      "epoch": 3.0389230376465504,
      "grad_norm": 0.20301853120326996,
      "learning_rate": 3.947997865092282e-06,
      "loss": 0.0099,
      "step": 1856940
    },
    {
      "epoch": 3.038955768085204,
      "grad_norm": 0.43858447670936584,
      "learning_rate": 3.947931972878765e-06,
      "loss": 0.0114,
      "step": 1856960
    },
    {
      "epoch": 3.038988498523857,
      "grad_norm": 0.12162739783525467,
      "learning_rate": 3.947866080665248e-06,
      "loss": 0.0114,
      "step": 1856980
    },
    {
      "epoch": 3.0390212289625107,
      "grad_norm": 0.0639026015996933,
      "learning_rate": 3.947800188451731e-06,
      "loss": 0.0126,
      "step": 1857000
    },
    {
      "epoch": 3.039053959401164,
      "grad_norm": 0.45868730545043945,
      "learning_rate": 3.947734296238214e-06,
      "loss": 0.0085,
      "step": 1857020
    },
    {
      "epoch": 3.0390866898398174,
      "grad_norm": 0.6381092667579651,
      "learning_rate": 3.947668404024697e-06,
      "loss": 0.0115,
      "step": 1857040
    },
    {
      "epoch": 3.0391194202784706,
      "grad_norm": 0.6253182291984558,
      "learning_rate": 3.947602511811179e-06,
      "loss": 0.0166,
      "step": 1857060
    },
    {
      "epoch": 3.0391521507171237,
      "grad_norm": 0.40505900979042053,
      "learning_rate": 3.947536619597662e-06,
      "loss": 0.0093,
      "step": 1857080
    },
    {
      "epoch": 3.0391848811557773,
      "grad_norm": 0.16900327801704407,
      "learning_rate": 3.947470727384146e-06,
      "loss": 0.0132,
      "step": 1857100
    },
    {
      "epoch": 3.0392176115944305,
      "grad_norm": 0.1241074651479721,
      "learning_rate": 3.947404835170628e-06,
      "loss": 0.0119,
      "step": 1857120
    },
    {
      "epoch": 3.039250342033084,
      "grad_norm": 0.24752391874790192,
      "learning_rate": 3.947338942957111e-06,
      "loss": 0.0075,
      "step": 1857140
    },
    {
      "epoch": 3.0392830724717372,
      "grad_norm": 0.1564810425043106,
      "learning_rate": 3.947273050743594e-06,
      "loss": 0.0145,
      "step": 1857160
    },
    {
      "epoch": 3.039315802910391,
      "grad_norm": 0.25241777300834656,
      "learning_rate": 3.947207158530077e-06,
      "loss": 0.0084,
      "step": 1857180
    },
    {
      "epoch": 3.039348533349044,
      "grad_norm": 0.5051990747451782,
      "learning_rate": 3.94714126631656e-06,
      "loss": 0.0089,
      "step": 1857200
    },
    {
      "epoch": 3.039381263787697,
      "grad_norm": 0.3123065233230591,
      "learning_rate": 3.947075374103043e-06,
      "loss": 0.009,
      "step": 1857220
    },
    {
      "epoch": 3.0394139942263507,
      "grad_norm": 0.17182406783103943,
      "learning_rate": 3.947009481889526e-06,
      "loss": 0.006,
      "step": 1857240
    },
    {
      "epoch": 3.039446724665004,
      "grad_norm": 0.09873484820127487,
      "learning_rate": 3.946943589676008e-06,
      "loss": 0.0122,
      "step": 1857260
    },
    {
      "epoch": 3.0394794551036575,
      "grad_norm": 0.06633401662111282,
      "learning_rate": 3.946877697462491e-06,
      "loss": 0.0069,
      "step": 1857280
    },
    {
      "epoch": 3.0395121855423106,
      "grad_norm": 0.3307817280292511,
      "learning_rate": 3.946811805248974e-06,
      "loss": 0.0105,
      "step": 1857300
    },
    {
      "epoch": 3.0395449159809638,
      "grad_norm": 0.15374639630317688,
      "learning_rate": 3.946745913035457e-06,
      "loss": 0.0119,
      "step": 1857320
    },
    {
      "epoch": 3.0395776464196174,
      "grad_norm": 0.1639581322669983,
      "learning_rate": 3.946680020821939e-06,
      "loss": 0.0133,
      "step": 1857340
    },
    {
      "epoch": 3.0396103768582705,
      "grad_norm": 0.47684329748153687,
      "learning_rate": 3.946614128608422e-06,
      "loss": 0.0082,
      "step": 1857360
    },
    {
      "epoch": 3.039643107296924,
      "grad_norm": 0.3904305100440979,
      "learning_rate": 3.946548236394906e-06,
      "loss": 0.0129,
      "step": 1857380
    },
    {
      "epoch": 3.0396758377355773,
      "grad_norm": 0.19788630306720734,
      "learning_rate": 3.9464823441813884e-06,
      "loss": 0.0082,
      "step": 1857400
    },
    {
      "epoch": 3.039708568174231,
      "grad_norm": 0.5893893837928772,
      "learning_rate": 3.946416451967871e-06,
      "loss": 0.0113,
      "step": 1857420
    },
    {
      "epoch": 3.039741298612884,
      "grad_norm": 0.274356484413147,
      "learning_rate": 3.946350559754354e-06,
      "loss": 0.0073,
      "step": 1857440
    },
    {
      "epoch": 3.039774029051537,
      "grad_norm": 0.1554798036813736,
      "learning_rate": 3.9462846675408375e-06,
      "loss": 0.0082,
      "step": 1857460
    },
    {
      "epoch": 3.0398067594901907,
      "grad_norm": 0.19565041363239288,
      "learning_rate": 3.94621877532732e-06,
      "loss": 0.0066,
      "step": 1857480
    },
    {
      "epoch": 3.039839489928844,
      "grad_norm": 0.18341009318828583,
      "learning_rate": 3.946152883113803e-06,
      "loss": 0.0102,
      "step": 1857500
    },
    {
      "epoch": 3.0398722203674975,
      "grad_norm": 0.36967501044273376,
      "learning_rate": 3.946086990900286e-06,
      "loss": 0.0093,
      "step": 1857520
    },
    {
      "epoch": 3.0399049508061506,
      "grad_norm": 0.4521501660346985,
      "learning_rate": 3.9460210986867685e-06,
      "loss": 0.0212,
      "step": 1857540
    },
    {
      "epoch": 3.0399376812448042,
      "grad_norm": 0.2610008716583252,
      "learning_rate": 3.945955206473251e-06,
      "loss": 0.0134,
      "step": 1857560
    },
    {
      "epoch": 3.0399704116834574,
      "grad_norm": 0.5894491672515869,
      "learning_rate": 3.945889314259734e-06,
      "loss": 0.013,
      "step": 1857580
    },
    {
      "epoch": 3.0400031421221105,
      "grad_norm": 0.25121384859085083,
      "learning_rate": 3.9458234220462175e-06,
      "loss": 0.0092,
      "step": 1857600
    },
    {
      "epoch": 3.040035872560764,
      "grad_norm": 0.4285668730735779,
      "learning_rate": 3.9457575298327e-06,
      "loss": 0.012,
      "step": 1857620
    },
    {
      "epoch": 3.0400686029994173,
      "grad_norm": 0.20636533200740814,
      "learning_rate": 3.945691637619183e-06,
      "loss": 0.0138,
      "step": 1857640
    },
    {
      "epoch": 3.040101333438071,
      "grad_norm": 0.4091535806655884,
      "learning_rate": 3.945625745405666e-06,
      "loss": 0.0136,
      "step": 1857660
    },
    {
      "epoch": 3.040134063876724,
      "grad_norm": 0.5548306703567505,
      "learning_rate": 3.9455598531921485e-06,
      "loss": 0.0211,
      "step": 1857680
    },
    {
      "epoch": 3.0401667943153776,
      "grad_norm": 0.34396159648895264,
      "learning_rate": 3.945493960978631e-06,
      "loss": 0.0089,
      "step": 1857700
    },
    {
      "epoch": 3.0401995247540308,
      "grad_norm": 0.2298017144203186,
      "learning_rate": 3.945428068765114e-06,
      "loss": 0.0148,
      "step": 1857720
    },
    {
      "epoch": 3.040232255192684,
      "grad_norm": 0.13111916184425354,
      "learning_rate": 3.945362176551597e-06,
      "loss": 0.0058,
      "step": 1857740
    },
    {
      "epoch": 3.0402649856313375,
      "grad_norm": 1.0252209901809692,
      "learning_rate": 3.9452962843380795e-06,
      "loss": 0.0142,
      "step": 1857760
    },
    {
      "epoch": 3.0402977160699907,
      "grad_norm": 0.21682903170585632,
      "learning_rate": 3.945230392124563e-06,
      "loss": 0.0109,
      "step": 1857780
    },
    {
      "epoch": 3.0403304465086443,
      "grad_norm": 0.16856396198272705,
      "learning_rate": 3.945164499911046e-06,
      "loss": 0.0103,
      "step": 1857800
    },
    {
      "epoch": 3.0403631769472974,
      "grad_norm": 0.11454900354146957,
      "learning_rate": 3.945098607697529e-06,
      "loss": 0.0126,
      "step": 1857820
    },
    {
      "epoch": 3.040395907385951,
      "grad_norm": 0.5713180899620056,
      "learning_rate": 3.945032715484012e-06,
      "loss": 0.0157,
      "step": 1857840
    },
    {
      "epoch": 3.040428637824604,
      "grad_norm": 0.668241560459137,
      "learning_rate": 3.944966823270495e-06,
      "loss": 0.0142,
      "step": 1857860
    },
    {
      "epoch": 3.0404613682632573,
      "grad_norm": 0.1408250778913498,
      "learning_rate": 3.944900931056978e-06,
      "loss": 0.011,
      "step": 1857880
    },
    {
      "epoch": 3.040494098701911,
      "grad_norm": 0.0986744835972786,
      "learning_rate": 3.94483503884346e-06,
      "loss": 0.0169,
      "step": 1857900
    },
    {
      "epoch": 3.040526829140564,
      "grad_norm": 0.6611611843109131,
      "learning_rate": 3.944769146629943e-06,
      "loss": 0.0146,
      "step": 1857920
    },
    {
      "epoch": 3.0405595595792176,
      "grad_norm": 0.1972673088312149,
      "learning_rate": 3.944703254416426e-06,
      "loss": 0.0101,
      "step": 1857940
    },
    {
      "epoch": 3.040592290017871,
      "grad_norm": 0.2465285211801529,
      "learning_rate": 3.9446373622029085e-06,
      "loss": 0.0134,
      "step": 1857960
    },
    {
      "epoch": 3.040625020456524,
      "grad_norm": 0.4461763799190521,
      "learning_rate": 3.944571469989391e-06,
      "loss": 0.0146,
      "step": 1857980
    },
    {
      "epoch": 3.0406577508951775,
      "grad_norm": 0.9013317227363586,
      "learning_rate": 3.944505577775875e-06,
      "loss": 0.0136,
      "step": 1858000
    },
    {
      "epoch": 3.0406904813338307,
      "grad_norm": 0.23647594451904297,
      "learning_rate": 3.944439685562358e-06,
      "loss": 0.0143,
      "step": 1858020
    },
    {
      "epoch": 3.0407232117724843,
      "grad_norm": 0.16771574318408966,
      "learning_rate": 3.94437379334884e-06,
      "loss": 0.0095,
      "step": 1858040
    },
    {
      "epoch": 3.0407559422111374,
      "grad_norm": 0.1268632858991623,
      "learning_rate": 3.944307901135323e-06,
      "loss": 0.0087,
      "step": 1858060
    },
    {
      "epoch": 3.040788672649791,
      "grad_norm": 0.14207474887371063,
      "learning_rate": 3.944242008921806e-06,
      "loss": 0.0126,
      "step": 1858080
    },
    {
      "epoch": 3.040821403088444,
      "grad_norm": 0.22972244024276733,
      "learning_rate": 3.9441761167082886e-06,
      "loss": 0.0164,
      "step": 1858100
    },
    {
      "epoch": 3.0408541335270973,
      "grad_norm": 0.5353816151618958,
      "learning_rate": 3.944110224494771e-06,
      "loss": 0.011,
      "step": 1858120
    },
    {
      "epoch": 3.040886863965751,
      "grad_norm": 0.47816985845565796,
      "learning_rate": 3.944044332281254e-06,
      "loss": 0.0094,
      "step": 1858140
    },
    {
      "epoch": 3.040919594404404,
      "grad_norm": 1.1042118072509766,
      "learning_rate": 3.943978440067738e-06,
      "loss": 0.0114,
      "step": 1858160
    },
    {
      "epoch": 3.0409523248430577,
      "grad_norm": 0.18245179951190948,
      "learning_rate": 3.94391254785422e-06,
      "loss": 0.0109,
      "step": 1858180
    },
    {
      "epoch": 3.040985055281711,
      "grad_norm": 0.14963796734809875,
      "learning_rate": 3.943846655640703e-06,
      "loss": 0.0086,
      "step": 1858200
    },
    {
      "epoch": 3.0410177857203644,
      "grad_norm": 0.07824729382991791,
      "learning_rate": 3.943780763427187e-06,
      "loss": 0.0103,
      "step": 1858220
    },
    {
      "epoch": 3.0410505161590176,
      "grad_norm": 0.5647605657577515,
      "learning_rate": 3.9437148712136694e-06,
      "loss": 0.0122,
      "step": 1858240
    },
    {
      "epoch": 3.0410832465976707,
      "grad_norm": 0.8034032583236694,
      "learning_rate": 3.943648979000152e-06,
      "loss": 0.0168,
      "step": 1858260
    },
    {
      "epoch": 3.0411159770363243,
      "grad_norm": 0.26522836089134216,
      "learning_rate": 3.943583086786635e-06,
      "loss": 0.0084,
      "step": 1858280
    },
    {
      "epoch": 3.0411487074749775,
      "grad_norm": 0.32730036973953247,
      "learning_rate": 3.943517194573118e-06,
      "loss": 0.0122,
      "step": 1858300
    },
    {
      "epoch": 3.041181437913631,
      "grad_norm": 0.30201011896133423,
      "learning_rate": 3.9434513023596e-06,
      "loss": 0.011,
      "step": 1858320
    },
    {
      "epoch": 3.041214168352284,
      "grad_norm": 0.39453181624412537,
      "learning_rate": 3.943385410146083e-06,
      "loss": 0.0106,
      "step": 1858340
    },
    {
      "epoch": 3.041246898790938,
      "grad_norm": 0.23274114727973938,
      "learning_rate": 3.943319517932566e-06,
      "loss": 0.0131,
      "step": 1858360
    },
    {
      "epoch": 3.041279629229591,
      "grad_norm": 0.9228208065032959,
      "learning_rate": 3.943253625719049e-06,
      "loss": 0.0183,
      "step": 1858380
    },
    {
      "epoch": 3.041312359668244,
      "grad_norm": 0.6194624900817871,
      "learning_rate": 3.943187733505532e-06,
      "loss": 0.0126,
      "step": 1858400
    },
    {
      "epoch": 3.0413450901068977,
      "grad_norm": 0.1955351084470749,
      "learning_rate": 3.943121841292015e-06,
      "loss": 0.0132,
      "step": 1858420
    },
    {
      "epoch": 3.041377820545551,
      "grad_norm": 0.06493795663118362,
      "learning_rate": 3.943055949078498e-06,
      "loss": 0.0108,
      "step": 1858440
    },
    {
      "epoch": 3.0414105509842044,
      "grad_norm": 0.3471814692020416,
      "learning_rate": 3.9429900568649804e-06,
      "loss": 0.0114,
      "step": 1858460
    },
    {
      "epoch": 3.0414432814228576,
      "grad_norm": 0.4519747793674469,
      "learning_rate": 3.942924164651463e-06,
      "loss": 0.01,
      "step": 1858480
    },
    {
      "epoch": 3.041476011861511,
      "grad_norm": 0.51119065284729,
      "learning_rate": 3.942858272437946e-06,
      "loss": 0.0106,
      "step": 1858500
    },
    {
      "epoch": 3.0415087423001643,
      "grad_norm": 0.382464200258255,
      "learning_rate": 3.9427923802244295e-06,
      "loss": 0.0118,
      "step": 1858520
    },
    {
      "epoch": 3.0415414727388175,
      "grad_norm": 0.15003551542758942,
      "learning_rate": 3.942726488010912e-06,
      "loss": 0.0177,
      "step": 1858540
    },
    {
      "epoch": 3.041574203177471,
      "grad_norm": 0.14247971773147583,
      "learning_rate": 3.942660595797395e-06,
      "loss": 0.0064,
      "step": 1858560
    },
    {
      "epoch": 3.041606933616124,
      "grad_norm": 0.03976866230368614,
      "learning_rate": 3.942594703583878e-06,
      "loss": 0.0076,
      "step": 1858580
    },
    {
      "epoch": 3.041639664054778,
      "grad_norm": 0.2237316370010376,
      "learning_rate": 3.9425288113703605e-06,
      "loss": 0.0154,
      "step": 1858600
    },
    {
      "epoch": 3.041672394493431,
      "grad_norm": 0.20023293793201447,
      "learning_rate": 3.942462919156844e-06,
      "loss": 0.0096,
      "step": 1858620
    },
    {
      "epoch": 3.0417051249320846,
      "grad_norm": 0.06917686015367508,
      "learning_rate": 3.942397026943327e-06,
      "loss": 0.0089,
      "step": 1858640
    },
    {
      "epoch": 3.0417378553707377,
      "grad_norm": 0.3108345866203308,
      "learning_rate": 3.9423311347298095e-06,
      "loss": 0.0081,
      "step": 1858660
    },
    {
      "epoch": 3.041770585809391,
      "grad_norm": 0.5026493072509766,
      "learning_rate": 3.942265242516292e-06,
      "loss": 0.0112,
      "step": 1858680
    },
    {
      "epoch": 3.0418033162480445,
      "grad_norm": 0.7271220088005066,
      "learning_rate": 3.942199350302775e-06,
      "loss": 0.01,
      "step": 1858700
    },
    {
      "epoch": 3.0418360466866976,
      "grad_norm": 0.503993034362793,
      "learning_rate": 3.942133458089258e-06,
      "loss": 0.0093,
      "step": 1858720
    },
    {
      "epoch": 3.041868777125351,
      "grad_norm": 0.196508526802063,
      "learning_rate": 3.9420675658757405e-06,
      "loss": 0.0135,
      "step": 1858740
    },
    {
      "epoch": 3.0419015075640043,
      "grad_norm": 0.1274344027042389,
      "learning_rate": 3.942001673662223e-06,
      "loss": 0.0084,
      "step": 1858760
    },
    {
      "epoch": 3.0419342380026575,
      "grad_norm": 0.42784345149993896,
      "learning_rate": 3.941935781448706e-06,
      "loss": 0.0124,
      "step": 1858780
    },
    {
      "epoch": 3.041966968441311,
      "grad_norm": 0.837814450263977,
      "learning_rate": 3.9418698892351895e-06,
      "loss": 0.0136,
      "step": 1858800
    },
    {
      "epoch": 3.0419996988799642,
      "grad_norm": 0.16121727228164673,
      "learning_rate": 3.941803997021672e-06,
      "loss": 0.0124,
      "step": 1858820
    },
    {
      "epoch": 3.042032429318618,
      "grad_norm": 0.28144514560699463,
      "learning_rate": 3.941738104808155e-06,
      "loss": 0.0129,
      "step": 1858840
    },
    {
      "epoch": 3.042065159757271,
      "grad_norm": 0.15244252979755402,
      "learning_rate": 3.941672212594639e-06,
      "loss": 0.0119,
      "step": 1858860
    },
    {
      "epoch": 3.0420978901959246,
      "grad_norm": 0.1842789202928543,
      "learning_rate": 3.941606320381121e-06,
      "loss": 0.0117,
      "step": 1858880
    },
    {
      "epoch": 3.0421306206345777,
      "grad_norm": 0.21548135578632355,
      "learning_rate": 3.941540428167604e-06,
      "loss": 0.0135,
      "step": 1858900
    },
    {
      "epoch": 3.042163351073231,
      "grad_norm": 0.23019644618034363,
      "learning_rate": 3.941474535954087e-06,
      "loss": 0.0095,
      "step": 1858920
    },
    {
      "epoch": 3.0421960815118845,
      "grad_norm": 0.07466303557157516,
      "learning_rate": 3.9414086437405696e-06,
      "loss": 0.0111,
      "step": 1858940
    },
    {
      "epoch": 3.0422288119505376,
      "grad_norm": 1.1220715045928955,
      "learning_rate": 3.941342751527052e-06,
      "loss": 0.0108,
      "step": 1858960
    },
    {
      "epoch": 3.042261542389191,
      "grad_norm": 0.3098296523094177,
      "learning_rate": 3.941276859313535e-06,
      "loss": 0.01,
      "step": 1858980
    },
    {
      "epoch": 3.0422942728278444,
      "grad_norm": 0.11616455018520355,
      "learning_rate": 3.941210967100018e-06,
      "loss": 0.01,
      "step": 1859000
    },
    {
      "epoch": 3.042327003266498,
      "grad_norm": 0.2978671193122864,
      "learning_rate": 3.941145074886501e-06,
      "loss": 0.0154,
      "step": 1859020
    },
    {
      "epoch": 3.042359733705151,
      "grad_norm": 0.6150475740432739,
      "learning_rate": 3.941079182672984e-06,
      "loss": 0.0128,
      "step": 1859040
    },
    {
      "epoch": 3.0423924641438043,
      "grad_norm": 0.6900640726089478,
      "learning_rate": 3.941013290459467e-06,
      "loss": 0.0128,
      "step": 1859060
    },
    {
      "epoch": 3.042425194582458,
      "grad_norm": 0.41927510499954224,
      "learning_rate": 3.94094739824595e-06,
      "loss": 0.0083,
      "step": 1859080
    },
    {
      "epoch": 3.042457925021111,
      "grad_norm": 0.42635712027549744,
      "learning_rate": 3.940881506032432e-06,
      "loss": 0.02,
      "step": 1859100
    },
    {
      "epoch": 3.0424906554597646,
      "grad_norm": 0.22031375765800476,
      "learning_rate": 3.940815613818915e-06,
      "loss": 0.012,
      "step": 1859120
    },
    {
      "epoch": 3.0425233858984178,
      "grad_norm": 1.9534482955932617,
      "learning_rate": 3.940749721605398e-06,
      "loss": 0.0136,
      "step": 1859140
    },
    {
      "epoch": 3.0425561163370713,
      "grad_norm": 0.3524514138698578,
      "learning_rate": 3.9406838293918806e-06,
      "loss": 0.0155,
      "step": 1859160
    },
    {
      "epoch": 3.0425888467757245,
      "grad_norm": 0.09166786819696426,
      "learning_rate": 3.940617937178364e-06,
      "loss": 0.0129,
      "step": 1859180
    },
    {
      "epoch": 3.0426215772143776,
      "grad_norm": 0.11490056663751602,
      "learning_rate": 3.940552044964847e-06,
      "loss": 0.0093,
      "step": 1859200
    },
    {
      "epoch": 3.0426543076530312,
      "grad_norm": 0.32241639494895935,
      "learning_rate": 3.94048615275133e-06,
      "loss": 0.0117,
      "step": 1859220
    },
    {
      "epoch": 3.0426870380916844,
      "grad_norm": 0.13185225427150726,
      "learning_rate": 3.940420260537813e-06,
      "loss": 0.0153,
      "step": 1859240
    },
    {
      "epoch": 3.042719768530338,
      "grad_norm": 0.6030184030532837,
      "learning_rate": 3.940354368324296e-06,
      "loss": 0.0091,
      "step": 1859260
    },
    {
      "epoch": 3.042752498968991,
      "grad_norm": 0.09599191695451736,
      "learning_rate": 3.940288476110779e-06,
      "loss": 0.012,
      "step": 1859280
    },
    {
      "epoch": 3.0427852294076447,
      "grad_norm": 0.10078851878643036,
      "learning_rate": 3.9402225838972614e-06,
      "loss": 0.0114,
      "step": 1859300
    },
    {
      "epoch": 3.042817959846298,
      "grad_norm": 0.11218151450157166,
      "learning_rate": 3.940156691683744e-06,
      "loss": 0.0071,
      "step": 1859320
    },
    {
      "epoch": 3.042850690284951,
      "grad_norm": 0.3232158124446869,
      "learning_rate": 3.940090799470227e-06,
      "loss": 0.0073,
      "step": 1859340
    },
    {
      "epoch": 3.0428834207236046,
      "grad_norm": 0.31985288858413696,
      "learning_rate": 3.94002490725671e-06,
      "loss": 0.0107,
      "step": 1859360
    },
    {
      "epoch": 3.0429161511622578,
      "grad_norm": 0.05628703534603119,
      "learning_rate": 3.939959015043192e-06,
      "loss": 0.0073,
      "step": 1859380
    },
    {
      "epoch": 3.0429488816009114,
      "grad_norm": 0.4041648209095001,
      "learning_rate": 3.939893122829675e-06,
      "loss": 0.0058,
      "step": 1859400
    },
    {
      "epoch": 3.0429816120395645,
      "grad_norm": 0.3548884093761444,
      "learning_rate": 3.939827230616159e-06,
      "loss": 0.0125,
      "step": 1859420
    },
    {
      "epoch": 3.0430143424782177,
      "grad_norm": 1.4383736848831177,
      "learning_rate": 3.9397613384026415e-06,
      "loss": 0.0113,
      "step": 1859440
    },
    {
      "epoch": 3.0430470729168713,
      "grad_norm": 0.2987741231918335,
      "learning_rate": 3.939695446189124e-06,
      "loss": 0.0126,
      "step": 1859460
    },
    {
      "epoch": 3.0430798033555244,
      "grad_norm": 0.1992729902267456,
      "learning_rate": 3.939629553975607e-06,
      "loss": 0.0106,
      "step": 1859480
    },
    {
      "epoch": 3.043112533794178,
      "grad_norm": 0.04088418185710907,
      "learning_rate": 3.93956366176209e-06,
      "loss": 0.0122,
      "step": 1859500
    },
    {
      "epoch": 3.043145264232831,
      "grad_norm": 0.8459049463272095,
      "learning_rate": 3.939497769548572e-06,
      "loss": 0.0137,
      "step": 1859520
    },
    {
      "epoch": 3.0431779946714848,
      "grad_norm": 0.21474894881248474,
      "learning_rate": 3.939431877335055e-06,
      "loss": 0.0099,
      "step": 1859540
    },
    {
      "epoch": 3.043210725110138,
      "grad_norm": 0.615209698677063,
      "learning_rate": 3.939365985121539e-06,
      "loss": 0.0135,
      "step": 1859560
    },
    {
      "epoch": 3.043243455548791,
      "grad_norm": 0.16247998178005219,
      "learning_rate": 3.9393000929080215e-06,
      "loss": 0.0111,
      "step": 1859580
    },
    {
      "epoch": 3.0432761859874446,
      "grad_norm": 0.14843900501728058,
      "learning_rate": 3.939234200694504e-06,
      "loss": 0.0066,
      "step": 1859600
    },
    {
      "epoch": 3.043308916426098,
      "grad_norm": 0.24157299101352692,
      "learning_rate": 3.939168308480987e-06,
      "loss": 0.0143,
      "step": 1859620
    },
    {
      "epoch": 3.0433416468647514,
      "grad_norm": 0.1274610012769699,
      "learning_rate": 3.9391024162674705e-06,
      "loss": 0.0102,
      "step": 1859640
    },
    {
      "epoch": 3.0433743773034045,
      "grad_norm": 0.17322833836078644,
      "learning_rate": 3.939036524053953e-06,
      "loss": 0.016,
      "step": 1859660
    },
    {
      "epoch": 3.043407107742058,
      "grad_norm": 0.22389006614685059,
      "learning_rate": 3.938970631840436e-06,
      "loss": 0.0099,
      "step": 1859680
    },
    {
      "epoch": 3.0434398381807113,
      "grad_norm": 0.21750935912132263,
      "learning_rate": 3.938904739626919e-06,
      "loss": 0.0126,
      "step": 1859700
    },
    {
      "epoch": 3.0434725686193644,
      "grad_norm": 0.7435833215713501,
      "learning_rate": 3.9388388474134015e-06,
      "loss": 0.0099,
      "step": 1859720
    },
    {
      "epoch": 3.043505299058018,
      "grad_norm": 0.5487762689590454,
      "learning_rate": 3.938772955199884e-06,
      "loss": 0.0137,
      "step": 1859740
    },
    {
      "epoch": 3.043538029496671,
      "grad_norm": 0.0827384814620018,
      "learning_rate": 3.938707062986367e-06,
      "loss": 0.0105,
      "step": 1859760
    },
    {
      "epoch": 3.0435707599353248,
      "grad_norm": 0.7047200202941895,
      "learning_rate": 3.93864117077285e-06,
      "loss": 0.0165,
      "step": 1859780
    },
    {
      "epoch": 3.043603490373978,
      "grad_norm": 0.15790894627571106,
      "learning_rate": 3.9385752785593325e-06,
      "loss": 0.0096,
      "step": 1859800
    },
    {
      "epoch": 3.0436362208126315,
      "grad_norm": 0.2683273255825043,
      "learning_rate": 3.938509386345816e-06,
      "loss": 0.0155,
      "step": 1859820
    },
    {
      "epoch": 3.0436689512512847,
      "grad_norm": 0.34539830684661865,
      "learning_rate": 3.938443494132299e-06,
      "loss": 0.015,
      "step": 1859840
    },
    {
      "epoch": 3.043701681689938,
      "grad_norm": 0.3022409677505493,
      "learning_rate": 3.9383776019187815e-06,
      "loss": 0.0123,
      "step": 1859860
    },
    {
      "epoch": 3.0437344121285914,
      "grad_norm": 0.04909820854663849,
      "learning_rate": 3.938311709705264e-06,
      "loss": 0.0064,
      "step": 1859880
    },
    {
      "epoch": 3.0437671425672446,
      "grad_norm": 0.2745964825153351,
      "learning_rate": 3.938245817491747e-06,
      "loss": 0.007,
      "step": 1859900
    },
    {
      "epoch": 3.043799873005898,
      "grad_norm": 0.3805198073387146,
      "learning_rate": 3.938179925278231e-06,
      "loss": 0.0113,
      "step": 1859920
    },
    {
      "epoch": 3.0438326034445513,
      "grad_norm": 0.20977304875850677,
      "learning_rate": 3.938114033064713e-06,
      "loss": 0.0119,
      "step": 1859940
    },
    {
      "epoch": 3.043865333883205,
      "grad_norm": 0.4143083691596985,
      "learning_rate": 3.938048140851196e-06,
      "loss": 0.0083,
      "step": 1859960
    },
    {
      "epoch": 3.043898064321858,
      "grad_norm": 0.22745177149772644,
      "learning_rate": 3.937982248637679e-06,
      "loss": 0.0084,
      "step": 1859980
    },
    {
      "epoch": 3.043930794760511,
      "grad_norm": 0.09484708309173584,
      "learning_rate": 3.9379163564241616e-06,
      "loss": 0.0136,
      "step": 1860000
    },
    {
      "epoch": 3.043963525199165,
      "grad_norm": 0.23393091559410095,
      "learning_rate": 3.937850464210644e-06,
      "loss": 0.0103,
      "step": 1860020
    },
    {
      "epoch": 3.043996255637818,
      "grad_norm": 0.4791499674320221,
      "learning_rate": 3.937784571997128e-06,
      "loss": 0.0088,
      "step": 1860040
    },
    {
      "epoch": 3.0440289860764715,
      "grad_norm": 0.1819898635149002,
      "learning_rate": 3.937718679783611e-06,
      "loss": 0.0108,
      "step": 1860060
    },
    {
      "epoch": 3.0440617165151247,
      "grad_norm": 0.08803480863571167,
      "learning_rate": 3.937652787570093e-06,
      "loss": 0.0136,
      "step": 1860080
    },
    {
      "epoch": 3.0440944469537783,
      "grad_norm": 0.430946946144104,
      "learning_rate": 3.937586895356576e-06,
      "loss": 0.0115,
      "step": 1860100
    },
    {
      "epoch": 3.0441271773924314,
      "grad_norm": 0.5323582887649536,
      "learning_rate": 3.937521003143059e-06,
      "loss": 0.0116,
      "step": 1860120
    },
    {
      "epoch": 3.0441599078310846,
      "grad_norm": 0.24969840049743652,
      "learning_rate": 3.937455110929542e-06,
      "loss": 0.0124,
      "step": 1860140
    },
    {
      "epoch": 3.044192638269738,
      "grad_norm": 0.07277149707078934,
      "learning_rate": 3.937389218716024e-06,
      "loss": 0.0103,
      "step": 1860160
    },
    {
      "epoch": 3.0442253687083913,
      "grad_norm": 0.2715805768966675,
      "learning_rate": 3.937323326502507e-06,
      "loss": 0.0107,
      "step": 1860180
    },
    {
      "epoch": 3.044258099147045,
      "grad_norm": 0.44193920493125916,
      "learning_rate": 3.93725743428899e-06,
      "loss": 0.0112,
      "step": 1860200
    },
    {
      "epoch": 3.044290829585698,
      "grad_norm": 0.25891607999801636,
      "learning_rate": 3.937191542075473e-06,
      "loss": 0.0116,
      "step": 1860220
    },
    {
      "epoch": 3.0443235600243517,
      "grad_norm": 0.28842297196388245,
      "learning_rate": 3.937125649861956e-06,
      "loss": 0.0104,
      "step": 1860240
    },
    {
      "epoch": 3.044356290463005,
      "grad_norm": 0.26273655891418457,
      "learning_rate": 3.937059757648439e-06,
      "loss": 0.0119,
      "step": 1860260
    },
    {
      "epoch": 3.044389020901658,
      "grad_norm": 0.47459930181503296,
      "learning_rate": 3.9369938654349224e-06,
      "loss": 0.0131,
      "step": 1860280
    },
    {
      "epoch": 3.0444217513403116,
      "grad_norm": 0.0856218934059143,
      "learning_rate": 3.936927973221405e-06,
      "loss": 0.0063,
      "step": 1860300
    },
    {
      "epoch": 3.0444544817789647,
      "grad_norm": 0.09744304418563843,
      "learning_rate": 3.936862081007888e-06,
      "loss": 0.0095,
      "step": 1860320
    },
    {
      "epoch": 3.0444872122176183,
      "grad_norm": 0.3487114906311035,
      "learning_rate": 3.936796188794371e-06,
      "loss": 0.0105,
      "step": 1860340
    },
    {
      "epoch": 3.0445199426562715,
      "grad_norm": 0.7079557180404663,
      "learning_rate": 3.936730296580853e-06,
      "loss": 0.013,
      "step": 1860360
    },
    {
      "epoch": 3.0445526730949246,
      "grad_norm": 0.7492108345031738,
      "learning_rate": 3.936664404367336e-06,
      "loss": 0.011,
      "step": 1860380
    },
    {
      "epoch": 3.044585403533578,
      "grad_norm": 0.23024019598960876,
      "learning_rate": 3.936598512153819e-06,
      "loss": 0.0143,
      "step": 1860400
    },
    {
      "epoch": 3.0446181339722314,
      "grad_norm": 0.8655999302864075,
      "learning_rate": 3.936532619940302e-06,
      "loss": 0.0101,
      "step": 1860420
    },
    {
      "epoch": 3.044650864410885,
      "grad_norm": 0.09430699050426483,
      "learning_rate": 3.936466727726785e-06,
      "loss": 0.0145,
      "step": 1860440
    },
    {
      "epoch": 3.044683594849538,
      "grad_norm": 0.30036410689353943,
      "learning_rate": 3.936400835513268e-06,
      "loss": 0.0106,
      "step": 1860460
    },
    {
      "epoch": 3.0447163252881917,
      "grad_norm": 0.3396628201007843,
      "learning_rate": 3.936334943299751e-06,
      "loss": 0.0146,
      "step": 1860480
    },
    {
      "epoch": 3.044749055726845,
      "grad_norm": 0.27437013387680054,
      "learning_rate": 3.9362690510862334e-06,
      "loss": 0.0095,
      "step": 1860500
    },
    {
      "epoch": 3.044781786165498,
      "grad_norm": 0.20712509751319885,
      "learning_rate": 3.936203158872716e-06,
      "loss": 0.0089,
      "step": 1860520
    },
    {
      "epoch": 3.0448145166041516,
      "grad_norm": 0.2312747985124588,
      "learning_rate": 3.936137266659199e-06,
      "loss": 0.0154,
      "step": 1860540
    },
    {
      "epoch": 3.0448472470428047,
      "grad_norm": 0.24308277666568756,
      "learning_rate": 3.936071374445682e-06,
      "loss": 0.0114,
      "step": 1860560
    },
    {
      "epoch": 3.0448799774814583,
      "grad_norm": 0.3799661099910736,
      "learning_rate": 3.936005482232164e-06,
      "loss": 0.0071,
      "step": 1860580
    },
    {
      "epoch": 3.0449127079201115,
      "grad_norm": 0.191549152135849,
      "learning_rate": 3.935939590018648e-06,
      "loss": 0.017,
      "step": 1860600
    },
    {
      "epoch": 3.044945438358765,
      "grad_norm": 0.15648765861988068,
      "learning_rate": 3.935873697805131e-06,
      "loss": 0.01,
      "step": 1860620
    },
    {
      "epoch": 3.0449781687974182,
      "grad_norm": 0.32018041610717773,
      "learning_rate": 3.9358078055916135e-06,
      "loss": 0.0118,
      "step": 1860640
    },
    {
      "epoch": 3.0450108992360714,
      "grad_norm": 0.6937215924263,
      "learning_rate": 3.935741913378097e-06,
      "loss": 0.0122,
      "step": 1860660
    },
    {
      "epoch": 3.045043629674725,
      "grad_norm": 0.1063469871878624,
      "learning_rate": 3.93567602116458e-06,
      "loss": 0.0103,
      "step": 1860680
    },
    {
      "epoch": 3.045076360113378,
      "grad_norm": 0.18306715786457062,
      "learning_rate": 3.9356101289510625e-06,
      "loss": 0.0111,
      "step": 1860700
    },
    {
      "epoch": 3.0451090905520317,
      "grad_norm": 1.3601715564727783,
      "learning_rate": 3.935544236737545e-06,
      "loss": 0.0127,
      "step": 1860720
    },
    {
      "epoch": 3.045141820990685,
      "grad_norm": 0.8202089071273804,
      "learning_rate": 3.935478344524028e-06,
      "loss": 0.0154,
      "step": 1860740
    },
    {
      "epoch": 3.0451745514293385,
      "grad_norm": 0.24765248596668243,
      "learning_rate": 3.935412452310511e-06,
      "loss": 0.015,
      "step": 1860760
    },
    {
      "epoch": 3.0452072818679916,
      "grad_norm": 0.19816845655441284,
      "learning_rate": 3.9353465600969935e-06,
      "loss": 0.0077,
      "step": 1860780
    },
    {
      "epoch": 3.0452400123066448,
      "grad_norm": 0.19366925954818726,
      "learning_rate": 3.935280667883476e-06,
      "loss": 0.014,
      "step": 1860800
    },
    {
      "epoch": 3.0452727427452984,
      "grad_norm": 0.3124770522117615,
      "learning_rate": 3.935214775669959e-06,
      "loss": 0.0118,
      "step": 1860820
    },
    {
      "epoch": 3.0453054731839515,
      "grad_norm": 0.7328392267227173,
      "learning_rate": 3.9351488834564426e-06,
      "loss": 0.0102,
      "step": 1860840
    },
    {
      "epoch": 3.045338203622605,
      "grad_norm": 0.09617370367050171,
      "learning_rate": 3.935082991242925e-06,
      "loss": 0.0105,
      "step": 1860860
    },
    {
      "epoch": 3.0453709340612583,
      "grad_norm": 0.04894913360476494,
      "learning_rate": 3.935017099029408e-06,
      "loss": 0.0127,
      "step": 1860880
    },
    {
      "epoch": 3.045403664499912,
      "grad_norm": 0.3249727189540863,
      "learning_rate": 3.934951206815891e-06,
      "loss": 0.0103,
      "step": 1860900
    },
    {
      "epoch": 3.045436394938565,
      "grad_norm": 0.5575857162475586,
      "learning_rate": 3.9348853146023735e-06,
      "loss": 0.0147,
      "step": 1860920
    },
    {
      "epoch": 3.045469125377218,
      "grad_norm": 0.4709576368331909,
      "learning_rate": 3.934819422388856e-06,
      "loss": 0.0084,
      "step": 1860940
    },
    {
      "epoch": 3.0455018558158717,
      "grad_norm": 0.5337667465209961,
      "learning_rate": 3.934753530175339e-06,
      "loss": 0.0152,
      "step": 1860960
    },
    {
      "epoch": 3.045534586254525,
      "grad_norm": 0.46667683124542236,
      "learning_rate": 3.9346876379618226e-06,
      "loss": 0.0118,
      "step": 1860980
    },
    {
      "epoch": 3.0455673166931785,
      "grad_norm": 0.7426719069480896,
      "learning_rate": 3.934621745748305e-06,
      "loss": 0.0136,
      "step": 1861000
    },
    {
      "epoch": 3.0456000471318316,
      "grad_norm": 0.23010791838169098,
      "learning_rate": 3.934555853534788e-06,
      "loss": 0.0131,
      "step": 1861020
    },
    {
      "epoch": 3.045632777570485,
      "grad_norm": 0.39357900619506836,
      "learning_rate": 3.934489961321271e-06,
      "loss": 0.0157,
      "step": 1861040
    },
    {
      "epoch": 3.0456655080091384,
      "grad_norm": 0.2808243930339813,
      "learning_rate": 3.934424069107754e-06,
      "loss": 0.01,
      "step": 1861060
    },
    {
      "epoch": 3.0456982384477915,
      "grad_norm": 0.27433401346206665,
      "learning_rate": 3.934358176894237e-06,
      "loss": 0.0125,
      "step": 1861080
    },
    {
      "epoch": 3.045730968886445,
      "grad_norm": 0.1985996514558792,
      "learning_rate": 3.93429228468072e-06,
      "loss": 0.012,
      "step": 1861100
    },
    {
      "epoch": 3.0457636993250983,
      "grad_norm": 0.3974519371986389,
      "learning_rate": 3.934226392467203e-06,
      "loss": 0.0169,
      "step": 1861120
    },
    {
      "epoch": 3.045796429763752,
      "grad_norm": 0.24513036012649536,
      "learning_rate": 3.934160500253685e-06,
      "loss": 0.0136,
      "step": 1861140
    },
    {
      "epoch": 3.045829160202405,
      "grad_norm": 0.1722218245267868,
      "learning_rate": 3.934094608040168e-06,
      "loss": 0.0138,
      "step": 1861160
    },
    {
      "epoch": 3.045861890641058,
      "grad_norm": 0.5159986615180969,
      "learning_rate": 3.934028715826651e-06,
      "loss": 0.0084,
      "step": 1861180
    },
    {
      "epoch": 3.0458946210797118,
      "grad_norm": 0.15315750241279602,
      "learning_rate": 3.9339628236131336e-06,
      "loss": 0.0168,
      "step": 1861200
    },
    {
      "epoch": 3.045927351518365,
      "grad_norm": 0.1850612759590149,
      "learning_rate": 3.933896931399616e-06,
      "loss": 0.0083,
      "step": 1861220
    },
    {
      "epoch": 3.0459600819570185,
      "grad_norm": 0.5333862900733948,
      "learning_rate": 3.9338310391861e-06,
      "loss": 0.0121,
      "step": 1861240
    },
    {
      "epoch": 3.0459928123956717,
      "grad_norm": 0.2773991525173187,
      "learning_rate": 3.933765146972583e-06,
      "loss": 0.0127,
      "step": 1861260
    },
    {
      "epoch": 3.0460255428343253,
      "grad_norm": 0.2662341892719269,
      "learning_rate": 3.933699254759065e-06,
      "loss": 0.0106,
      "step": 1861280
    },
    {
      "epoch": 3.0460582732729784,
      "grad_norm": 0.3196234405040741,
      "learning_rate": 3.933633362545548e-06,
      "loss": 0.0085,
      "step": 1861300
    },
    {
      "epoch": 3.0460910037116316,
      "grad_norm": 0.22348687052726746,
      "learning_rate": 3.933567470332032e-06,
      "loss": 0.0084,
      "step": 1861320
    },
    {
      "epoch": 3.046123734150285,
      "grad_norm": 0.20234332978725433,
      "learning_rate": 3.9335015781185144e-06,
      "loss": 0.0124,
      "step": 1861340
    },
    {
      "epoch": 3.0461564645889383,
      "grad_norm": 0.15716971457004547,
      "learning_rate": 3.933435685904997e-06,
      "loss": 0.0147,
      "step": 1861360
    },
    {
      "epoch": 3.046189195027592,
      "grad_norm": 0.10270040482282639,
      "learning_rate": 3.93336979369148e-06,
      "loss": 0.0089,
      "step": 1861380
    },
    {
      "epoch": 3.046221925466245,
      "grad_norm": 0.28322961926460266,
      "learning_rate": 3.933303901477963e-06,
      "loss": 0.0125,
      "step": 1861400
    },
    {
      "epoch": 3.0462546559048986,
      "grad_norm": 0.26990893483161926,
      "learning_rate": 3.933238009264445e-06,
      "loss": 0.0114,
      "step": 1861420
    },
    {
      "epoch": 3.046287386343552,
      "grad_norm": 0.469747930765152,
      "learning_rate": 3.933172117050928e-06,
      "loss": 0.0123,
      "step": 1861440
    },
    {
      "epoch": 3.046320116782205,
      "grad_norm": 0.5415199398994446,
      "learning_rate": 3.933106224837412e-06,
      "loss": 0.0124,
      "step": 1861460
    },
    {
      "epoch": 3.0463528472208585,
      "grad_norm": 0.2391737550497055,
      "learning_rate": 3.9330403326238945e-06,
      "loss": 0.0092,
      "step": 1861480
    },
    {
      "epoch": 3.0463855776595117,
      "grad_norm": 0.39656078815460205,
      "learning_rate": 3.932974440410377e-06,
      "loss": 0.0126,
      "step": 1861500
    },
    {
      "epoch": 3.0464183080981653,
      "grad_norm": 0.19621559977531433,
      "learning_rate": 3.93290854819686e-06,
      "loss": 0.0109,
      "step": 1861520
    },
    {
      "epoch": 3.0464510385368184,
      "grad_norm": 0.3065471053123474,
      "learning_rate": 3.932842655983343e-06,
      "loss": 0.0115,
      "step": 1861540
    },
    {
      "epoch": 3.046483768975472,
      "grad_norm": 0.16150102019309998,
      "learning_rate": 3.932776763769825e-06,
      "loss": 0.0141,
      "step": 1861560
    },
    {
      "epoch": 3.046516499414125,
      "grad_norm": 0.19384995102882385,
      "learning_rate": 3.932710871556308e-06,
      "loss": 0.0122,
      "step": 1861580
    },
    {
      "epoch": 3.0465492298527783,
      "grad_norm": 0.3090810179710388,
      "learning_rate": 3.932644979342791e-06,
      "loss": 0.0107,
      "step": 1861600
    },
    {
      "epoch": 3.046581960291432,
      "grad_norm": 0.08520693331956863,
      "learning_rate": 3.932579087129274e-06,
      "loss": 0.0131,
      "step": 1861620
    },
    {
      "epoch": 3.046614690730085,
      "grad_norm": 0.10920797288417816,
      "learning_rate": 3.932513194915757e-06,
      "loss": 0.012,
      "step": 1861640
    },
    {
      "epoch": 3.0466474211687387,
      "grad_norm": 0.06070176139473915,
      "learning_rate": 3.93244730270224e-06,
      "loss": 0.0121,
      "step": 1861660
    },
    {
      "epoch": 3.046680151607392,
      "grad_norm": 0.437360018491745,
      "learning_rate": 3.9323814104887235e-06,
      "loss": 0.0117,
      "step": 1861680
    },
    {
      "epoch": 3.0467128820460454,
      "grad_norm": 0.5865855813026428,
      "learning_rate": 3.932315518275206e-06,
      "loss": 0.0079,
      "step": 1861700
    },
    {
      "epoch": 3.0467456124846986,
      "grad_norm": 0.23428158462047577,
      "learning_rate": 3.932249626061689e-06,
      "loss": 0.0117,
      "step": 1861720
    },
    {
      "epoch": 3.0467783429233517,
      "grad_norm": 0.38589444756507874,
      "learning_rate": 3.932183733848172e-06,
      "loss": 0.0121,
      "step": 1861740
    },
    {
      "epoch": 3.0468110733620053,
      "grad_norm": 0.3326377272605896,
      "learning_rate": 3.9321178416346545e-06,
      "loss": 0.0141,
      "step": 1861760
    },
    {
      "epoch": 3.0468438038006584,
      "grad_norm": 0.3396153151988983,
      "learning_rate": 3.932051949421137e-06,
      "loss": 0.0113,
      "step": 1861780
    },
    {
      "epoch": 3.046876534239312,
      "grad_norm": 0.44902682304382324,
      "learning_rate": 3.93198605720762e-06,
      "loss": 0.0143,
      "step": 1861800
    },
    {
      "epoch": 3.046909264677965,
      "grad_norm": 0.30792319774627686,
      "learning_rate": 3.931920164994103e-06,
      "loss": 0.0098,
      "step": 1861820
    },
    {
      "epoch": 3.0469419951166183,
      "grad_norm": 0.3286028504371643,
      "learning_rate": 3.9318542727805855e-06,
      "loss": 0.0142,
      "step": 1861840
    },
    {
      "epoch": 3.046974725555272,
      "grad_norm": 0.10461457073688507,
      "learning_rate": 3.931788380567069e-06,
      "loss": 0.0118,
      "step": 1861860
    },
    {
      "epoch": 3.047007455993925,
      "grad_norm": 0.6421524286270142,
      "learning_rate": 3.931722488353552e-06,
      "loss": 0.0093,
      "step": 1861880
    },
    {
      "epoch": 3.0470401864325787,
      "grad_norm": 0.26046818494796753,
      "learning_rate": 3.9316565961400345e-06,
      "loss": 0.0089,
      "step": 1861900
    },
    {
      "epoch": 3.047072916871232,
      "grad_norm": 0.24869874119758606,
      "learning_rate": 3.931590703926517e-06,
      "loss": 0.0103,
      "step": 1861920
    },
    {
      "epoch": 3.0471056473098854,
      "grad_norm": 0.2553750276565552,
      "learning_rate": 3.931524811713e-06,
      "loss": 0.0088,
      "step": 1861940
    },
    {
      "epoch": 3.0471383777485386,
      "grad_norm": 0.09472403675317764,
      "learning_rate": 3.931458919499483e-06,
      "loss": 0.0084,
      "step": 1861960
    },
    {
      "epoch": 3.0471711081871917,
      "grad_norm": 0.17646238207817078,
      "learning_rate": 3.9313930272859655e-06,
      "loss": 0.0125,
      "step": 1861980
    },
    {
      "epoch": 3.0472038386258453,
      "grad_norm": 0.38543516397476196,
      "learning_rate": 3.931327135072448e-06,
      "loss": 0.0065,
      "step": 1862000
    },
    {
      "epoch": 3.0472365690644985,
      "grad_norm": 0.19034384191036224,
      "learning_rate": 3.931261242858932e-06,
      "loss": 0.0109,
      "step": 1862020
    },
    {
      "epoch": 3.047269299503152,
      "grad_norm": 0.22391600906848907,
      "learning_rate": 3.9311953506454146e-06,
      "loss": 0.0101,
      "step": 1862040
    },
    {
      "epoch": 3.047302029941805,
      "grad_norm": 0.13903528451919556,
      "learning_rate": 3.931129458431897e-06,
      "loss": 0.0125,
      "step": 1862060
    },
    {
      "epoch": 3.047334760380459,
      "grad_norm": 0.14756189286708832,
      "learning_rate": 3.931063566218381e-06,
      "loss": 0.012,
      "step": 1862080
    },
    {
      "epoch": 3.047367490819112,
      "grad_norm": 0.2482336461544037,
      "learning_rate": 3.930997674004864e-06,
      "loss": 0.0108,
      "step": 1862100
    },
    {
      "epoch": 3.047400221257765,
      "grad_norm": 0.13246475160121918,
      "learning_rate": 3.930931781791346e-06,
      "loss": 0.0151,
      "step": 1862120
    },
    {
      "epoch": 3.0474329516964187,
      "grad_norm": 0.26468291878700256,
      "learning_rate": 3.930865889577829e-06,
      "loss": 0.012,
      "step": 1862140
    },
    {
      "epoch": 3.047465682135072,
      "grad_norm": 0.3816402852535248,
      "learning_rate": 3.930799997364312e-06,
      "loss": 0.0131,
      "step": 1862160
    },
    {
      "epoch": 3.0474984125737254,
      "grad_norm": 0.34020063281059265,
      "learning_rate": 3.930734105150795e-06,
      "loss": 0.0129,
      "step": 1862180
    },
    {
      "epoch": 3.0475311430123786,
      "grad_norm": 0.11821337044239044,
      "learning_rate": 3.930668212937277e-06,
      "loss": 0.008,
      "step": 1862200
    },
    {
      "epoch": 3.047563873451032,
      "grad_norm": 0.2855512201786041,
      "learning_rate": 3.93060232072376e-06,
      "loss": 0.0188,
      "step": 1862220
    },
    {
      "epoch": 3.0475966038896853,
      "grad_norm": 0.3669930696487427,
      "learning_rate": 3.930536428510243e-06,
      "loss": 0.0094,
      "step": 1862240
    },
    {
      "epoch": 3.0476293343283385,
      "grad_norm": 0.14305981993675232,
      "learning_rate": 3.930470536296726e-06,
      "loss": 0.0122,
      "step": 1862260
    },
    {
      "epoch": 3.047662064766992,
      "grad_norm": 0.4368721842765808,
      "learning_rate": 3.930404644083209e-06,
      "loss": 0.0145,
      "step": 1862280
    },
    {
      "epoch": 3.0476947952056452,
      "grad_norm": 0.23211796581745148,
      "learning_rate": 3.930338751869692e-06,
      "loss": 0.0122,
      "step": 1862300
    },
    {
      "epoch": 3.047727525644299,
      "grad_norm": 0.47633814811706543,
      "learning_rate": 3.930272859656175e-06,
      "loss": 0.0078,
      "step": 1862320
    },
    {
      "epoch": 3.047760256082952,
      "grad_norm": 0.46656325459480286,
      "learning_rate": 3.930206967442657e-06,
      "loss": 0.0073,
      "step": 1862340
    },
    {
      "epoch": 3.0477929865216056,
      "grad_norm": 0.2348594069480896,
      "learning_rate": 3.93014107522914e-06,
      "loss": 0.0113,
      "step": 1862360
    },
    {
      "epoch": 3.0478257169602587,
      "grad_norm": 0.12756921350955963,
      "learning_rate": 3.930075183015624e-06,
      "loss": 0.0105,
      "step": 1862380
    },
    {
      "epoch": 3.047858447398912,
      "grad_norm": 0.0925137922167778,
      "learning_rate": 3.930009290802106e-06,
      "loss": 0.0104,
      "step": 1862400
    },
    {
      "epoch": 3.0478911778375655,
      "grad_norm": 0.24147959053516388,
      "learning_rate": 3.929943398588589e-06,
      "loss": 0.0127,
      "step": 1862420
    },
    {
      "epoch": 3.0479239082762186,
      "grad_norm": 0.3648335337638855,
      "learning_rate": 3.929877506375072e-06,
      "loss": 0.014,
      "step": 1862440
    },
    {
      "epoch": 3.047956638714872,
      "grad_norm": 0.11903848499059677,
      "learning_rate": 3.929811614161555e-06,
      "loss": 0.0069,
      "step": 1862460
    },
    {
      "epoch": 3.0479893691535254,
      "grad_norm": 0.24041207134723663,
      "learning_rate": 3.929745721948038e-06,
      "loss": 0.0118,
      "step": 1862480
    },
    {
      "epoch": 3.0480220995921785,
      "grad_norm": 0.25558289885520935,
      "learning_rate": 3.929679829734521e-06,
      "loss": 0.0174,
      "step": 1862500
    },
    {
      "epoch": 3.048054830030832,
      "grad_norm": 0.3163025975227356,
      "learning_rate": 3.929613937521004e-06,
      "loss": 0.0103,
      "step": 1862520
    },
    {
      "epoch": 3.0480875604694853,
      "grad_norm": 0.23129378259181976,
      "learning_rate": 3.9295480453074864e-06,
      "loss": 0.0132,
      "step": 1862540
    },
    {
      "epoch": 3.048120290908139,
      "grad_norm": 0.32355016469955444,
      "learning_rate": 3.929482153093969e-06,
      "loss": 0.0116,
      "step": 1862560
    },
    {
      "epoch": 3.048153021346792,
      "grad_norm": 0.7343614101409912,
      "learning_rate": 3.929416260880452e-06,
      "loss": 0.0131,
      "step": 1862580
    },
    {
      "epoch": 3.0481857517854456,
      "grad_norm": 0.3075945973396301,
      "learning_rate": 3.929350368666935e-06,
      "loss": 0.0101,
      "step": 1862600
    },
    {
      "epoch": 3.0482184822240987,
      "grad_norm": 0.14368699491024017,
      "learning_rate": 3.929284476453417e-06,
      "loss": 0.0096,
      "step": 1862620
    },
    {
      "epoch": 3.048251212662752,
      "grad_norm": 0.14483687281608582,
      "learning_rate": 3.9292185842399e-06,
      "loss": 0.0104,
      "step": 1862640
    },
    {
      "epoch": 3.0482839431014055,
      "grad_norm": 0.16086222231388092,
      "learning_rate": 3.929152692026384e-06,
      "loss": 0.009,
      "step": 1862660
    },
    {
      "epoch": 3.0483166735400586,
      "grad_norm": 0.2272139936685562,
      "learning_rate": 3.9290867998128665e-06,
      "loss": 0.0156,
      "step": 1862680
    },
    {
      "epoch": 3.0483494039787122,
      "grad_norm": 0.5740697979927063,
      "learning_rate": 3.929020907599349e-06,
      "loss": 0.0142,
      "step": 1862700
    },
    {
      "epoch": 3.0483821344173654,
      "grad_norm": 0.04662936180830002,
      "learning_rate": 3.928955015385832e-06,
      "loss": 0.0131,
      "step": 1862720
    },
    {
      "epoch": 3.048414864856019,
      "grad_norm": 0.4706633388996124,
      "learning_rate": 3.9288891231723155e-06,
      "loss": 0.0131,
      "step": 1862740
    },
    {
      "epoch": 3.048447595294672,
      "grad_norm": 0.08606672286987305,
      "learning_rate": 3.928823230958798e-06,
      "loss": 0.0109,
      "step": 1862760
    },
    {
      "epoch": 3.0484803257333253,
      "grad_norm": 0.29822900891304016,
      "learning_rate": 3.928757338745281e-06,
      "loss": 0.0154,
      "step": 1862780
    },
    {
      "epoch": 3.048513056171979,
      "grad_norm": 0.2766371965408325,
      "learning_rate": 3.928691446531764e-06,
      "loss": 0.0187,
      "step": 1862800
    },
    {
      "epoch": 3.048545786610632,
      "grad_norm": 0.2304191142320633,
      "learning_rate": 3.9286255543182465e-06,
      "loss": 0.0081,
      "step": 1862820
    },
    {
      "epoch": 3.0485785170492856,
      "grad_norm": 0.12516546249389648,
      "learning_rate": 3.928559662104729e-06,
      "loss": 0.0091,
      "step": 1862840
    },
    {
      "epoch": 3.0486112474879388,
      "grad_norm": 0.6334003210067749,
      "learning_rate": 3.928493769891212e-06,
      "loss": 0.0102,
      "step": 1862860
    },
    {
      "epoch": 3.0486439779265924,
      "grad_norm": 0.450115442276001,
      "learning_rate": 3.9284278776776956e-06,
      "loss": 0.0179,
      "step": 1862880
    },
    {
      "epoch": 3.0486767083652455,
      "grad_norm": 0.4026837646961212,
      "learning_rate": 3.928361985464178e-06,
      "loss": 0.0155,
      "step": 1862900
    },
    {
      "epoch": 3.0487094388038987,
      "grad_norm": 0.12207172065973282,
      "learning_rate": 3.928296093250661e-06,
      "loss": 0.012,
      "step": 1862920
    },
    {
      "epoch": 3.0487421692425523,
      "grad_norm": 0.5949299335479736,
      "learning_rate": 3.928230201037144e-06,
      "loss": 0.0143,
      "step": 1862940
    },
    {
      "epoch": 3.0487748996812054,
      "grad_norm": 0.17037831246852875,
      "learning_rate": 3.9281643088236265e-06,
      "loss": 0.0138,
      "step": 1862960
    },
    {
      "epoch": 3.048807630119859,
      "grad_norm": 0.31484454870224,
      "learning_rate": 3.928098416610109e-06,
      "loss": 0.0144,
      "step": 1862980
    },
    {
      "epoch": 3.048840360558512,
      "grad_norm": 0.14665983617305756,
      "learning_rate": 3.928032524396592e-06,
      "loss": 0.0091,
      "step": 1863000
    },
    {
      "epoch": 3.0488730909971657,
      "grad_norm": 0.42776980996131897,
      "learning_rate": 3.927966632183075e-06,
      "loss": 0.0098,
      "step": 1863020
    },
    {
      "epoch": 3.048905821435819,
      "grad_norm": 0.27069637179374695,
      "learning_rate": 3.9279007399695575e-06,
      "loss": 0.0177,
      "step": 1863040
    },
    {
      "epoch": 3.048938551874472,
      "grad_norm": 0.22090256214141846,
      "learning_rate": 3.927834847756041e-06,
      "loss": 0.0093,
      "step": 1863060
    },
    {
      "epoch": 3.0489712823131256,
      "grad_norm": 0.3237206041812897,
      "learning_rate": 3.927768955542524e-06,
      "loss": 0.0137,
      "step": 1863080
    },
    {
      "epoch": 3.049004012751779,
      "grad_norm": 0.13560669124126434,
      "learning_rate": 3.927703063329007e-06,
      "loss": 0.0119,
      "step": 1863100
    },
    {
      "epoch": 3.0490367431904324,
      "grad_norm": 0.0893549770116806,
      "learning_rate": 3.92763717111549e-06,
      "loss": 0.0107,
      "step": 1863120
    },
    {
      "epoch": 3.0490694736290855,
      "grad_norm": 0.28466829657554626,
      "learning_rate": 3.927571278901973e-06,
      "loss": 0.0097,
      "step": 1863140
    },
    {
      "epoch": 3.049102204067739,
      "grad_norm": 0.5113202333450317,
      "learning_rate": 3.927505386688456e-06,
      "loss": 0.0142,
      "step": 1863160
    },
    {
      "epoch": 3.0491349345063923,
      "grad_norm": 0.3768683969974518,
      "learning_rate": 3.927439494474938e-06,
      "loss": 0.0133,
      "step": 1863180
    },
    {
      "epoch": 3.0491676649450454,
      "grad_norm": 0.34226855635643005,
      "learning_rate": 3.927373602261421e-06,
      "loss": 0.0172,
      "step": 1863200
    },
    {
      "epoch": 3.049200395383699,
      "grad_norm": 0.3710300922393799,
      "learning_rate": 3.927307710047904e-06,
      "loss": 0.008,
      "step": 1863220
    },
    {
      "epoch": 3.049233125822352,
      "grad_norm": 0.26449915766716003,
      "learning_rate": 3.9272418178343866e-06,
      "loss": 0.0126,
      "step": 1863240
    },
    {
      "epoch": 3.0492658562610058,
      "grad_norm": 0.3926280736923218,
      "learning_rate": 3.927175925620869e-06,
      "loss": 0.0095,
      "step": 1863260
    },
    {
      "epoch": 3.049298586699659,
      "grad_norm": 0.2549624741077423,
      "learning_rate": 3.927110033407353e-06,
      "loss": 0.0078,
      "step": 1863280
    },
    {
      "epoch": 3.049331317138312,
      "grad_norm": 0.9211443066596985,
      "learning_rate": 3.927044141193836e-06,
      "loss": 0.0212,
      "step": 1863300
    },
    {
      "epoch": 3.0493640475769657,
      "grad_norm": 0.971190333366394,
      "learning_rate": 3.926978248980318e-06,
      "loss": 0.0142,
      "step": 1863320
    },
    {
      "epoch": 3.049396778015619,
      "grad_norm": 0.048713285475969315,
      "learning_rate": 3.926912356766801e-06,
      "loss": 0.0065,
      "step": 1863340
    },
    {
      "epoch": 3.0494295084542724,
      "grad_norm": 0.04942115768790245,
      "learning_rate": 3.926846464553284e-06,
      "loss": 0.0106,
      "step": 1863360
    },
    {
      "epoch": 3.0494622388929256,
      "grad_norm": 0.09291622042655945,
      "learning_rate": 3.926780572339767e-06,
      "loss": 0.0135,
      "step": 1863380
    },
    {
      "epoch": 3.049494969331579,
      "grad_norm": 0.3875989615917206,
      "learning_rate": 3.926714680126249e-06,
      "loss": 0.0124,
      "step": 1863400
    },
    {
      "epoch": 3.0495276997702323,
      "grad_norm": 0.10017406195402145,
      "learning_rate": 3.926648787912732e-06,
      "loss": 0.0116,
      "step": 1863420
    },
    {
      "epoch": 3.0495604302088855,
      "grad_norm": 0.3778877854347229,
      "learning_rate": 3.926582895699216e-06,
      "loss": 0.0092,
      "step": 1863440
    },
    {
      "epoch": 3.049593160647539,
      "grad_norm": 0.2258114218711853,
      "learning_rate": 3.926517003485698e-06,
      "loss": 0.0127,
      "step": 1863460
    },
    {
      "epoch": 3.049625891086192,
      "grad_norm": 0.14486603438854218,
      "learning_rate": 3.926451111272181e-06,
      "loss": 0.0086,
      "step": 1863480
    },
    {
      "epoch": 3.049658621524846,
      "grad_norm": 0.20262493193149567,
      "learning_rate": 3.926385219058665e-06,
      "loss": 0.0113,
      "step": 1863500
    },
    {
      "epoch": 3.049691351963499,
      "grad_norm": 0.25096702575683594,
      "learning_rate": 3.9263193268451475e-06,
      "loss": 0.0068,
      "step": 1863520
    },
    {
      "epoch": 3.0497240824021525,
      "grad_norm": 0.30553925037384033,
      "learning_rate": 3.92625343463163e-06,
      "loss": 0.0105,
      "step": 1863540
    },
    {
      "epoch": 3.0497568128408057,
      "grad_norm": 0.6645941734313965,
      "learning_rate": 3.926187542418113e-06,
      "loss": 0.0139,
      "step": 1863560
    },
    {
      "epoch": 3.049789543279459,
      "grad_norm": 0.7548800706863403,
      "learning_rate": 3.926121650204596e-06,
      "loss": 0.0128,
      "step": 1863580
    },
    {
      "epoch": 3.0498222737181124,
      "grad_norm": 0.6039351224899292,
      "learning_rate": 3.926055757991078e-06,
      "loss": 0.011,
      "step": 1863600
    },
    {
      "epoch": 3.0498550041567656,
      "grad_norm": 0.14853450655937195,
      "learning_rate": 3.925989865777561e-06,
      "loss": 0.0106,
      "step": 1863620
    },
    {
      "epoch": 3.049887734595419,
      "grad_norm": 0.36002930998802185,
      "learning_rate": 3.925923973564044e-06,
      "loss": 0.0124,
      "step": 1863640
    },
    {
      "epoch": 3.0499204650340723,
      "grad_norm": 0.261629194021225,
      "learning_rate": 3.925858081350527e-06,
      "loss": 0.0147,
      "step": 1863660
    },
    {
      "epoch": 3.049953195472726,
      "grad_norm": 0.31831246614456177,
      "learning_rate": 3.92579218913701e-06,
      "loss": 0.0099,
      "step": 1863680
    },
    {
      "epoch": 3.049985925911379,
      "grad_norm": 0.9267076849937439,
      "learning_rate": 3.925726296923493e-06,
      "loss": 0.0108,
      "step": 1863700
    },
    {
      "epoch": 3.050018656350032,
      "grad_norm": 0.37377598881721497,
      "learning_rate": 3.925660404709976e-06,
      "loss": 0.0188,
      "step": 1863720
    },
    {
      "epoch": 3.050051386788686,
      "grad_norm": 0.24735741317272186,
      "learning_rate": 3.9255945124964584e-06,
      "loss": 0.0121,
      "step": 1863740
    },
    {
      "epoch": 3.050084117227339,
      "grad_norm": 0.13009947538375854,
      "learning_rate": 3.925528620282941e-06,
      "loss": 0.0111,
      "step": 1863760
    },
    {
      "epoch": 3.0501168476659926,
      "grad_norm": 0.42721787095069885,
      "learning_rate": 3.925462728069425e-06,
      "loss": 0.0129,
      "step": 1863780
    },
    {
      "epoch": 3.0501495781046457,
      "grad_norm": 0.35106655955314636,
      "learning_rate": 3.9253968358559075e-06,
      "loss": 0.0122,
      "step": 1863800
    },
    {
      "epoch": 3.0501823085432993,
      "grad_norm": 0.25492072105407715,
      "learning_rate": 3.92533094364239e-06,
      "loss": 0.0126,
      "step": 1863820
    },
    {
      "epoch": 3.0502150389819525,
      "grad_norm": 0.9772111773490906,
      "learning_rate": 3.925265051428873e-06,
      "loss": 0.0144,
      "step": 1863840
    },
    {
      "epoch": 3.0502477694206056,
      "grad_norm": 0.6359803080558777,
      "learning_rate": 3.925199159215356e-06,
      "loss": 0.0177,
      "step": 1863860
    },
    {
      "epoch": 3.050280499859259,
      "grad_norm": 0.4745279550552368,
      "learning_rate": 3.9251332670018385e-06,
      "loss": 0.0159,
      "step": 1863880
    },
    {
      "epoch": 3.0503132302979123,
      "grad_norm": 0.8175570964813232,
      "learning_rate": 3.925067374788322e-06,
      "loss": 0.0121,
      "step": 1863900
    },
    {
      "epoch": 3.050345960736566,
      "grad_norm": 0.18290439248085022,
      "learning_rate": 3.925001482574805e-06,
      "loss": 0.0139,
      "step": 1863920
    },
    {
      "epoch": 3.050378691175219,
      "grad_norm": 0.7131936550140381,
      "learning_rate": 3.9249355903612875e-06,
      "loss": 0.0128,
      "step": 1863940
    },
    {
      "epoch": 3.0504114216138727,
      "grad_norm": 0.3133101165294647,
      "learning_rate": 3.92486969814777e-06,
      "loss": 0.0091,
      "step": 1863960
    },
    {
      "epoch": 3.050444152052526,
      "grad_norm": 0.331555038690567,
      "learning_rate": 3.924803805934253e-06,
      "loss": 0.0085,
      "step": 1863980
    },
    {
      "epoch": 3.050476882491179,
      "grad_norm": 0.6066673398017883,
      "learning_rate": 3.924737913720736e-06,
      "loss": 0.0171,
      "step": 1864000
    },
    {
      "epoch": 3.0505096129298326,
      "grad_norm": 0.1360941380262375,
      "learning_rate": 3.9246720215072185e-06,
      "loss": 0.0109,
      "step": 1864020
    },
    {
      "epoch": 3.0505423433684857,
      "grad_norm": 0.11459076404571533,
      "learning_rate": 3.924606129293701e-06,
      "loss": 0.01,
      "step": 1864040
    },
    {
      "epoch": 3.0505750738071393,
      "grad_norm": 1.2881672382354736,
      "learning_rate": 3.924540237080184e-06,
      "loss": 0.0107,
      "step": 1864060
    },
    {
      "epoch": 3.0506078042457925,
      "grad_norm": 0.21703268587589264,
      "learning_rate": 3.9244743448666676e-06,
      "loss": 0.0111,
      "step": 1864080
    },
    {
      "epoch": 3.0506405346844456,
      "grad_norm": 0.4241306185722351,
      "learning_rate": 3.92440845265315e-06,
      "loss": 0.012,
      "step": 1864100
    },
    {
      "epoch": 3.050673265123099,
      "grad_norm": 0.3792983889579773,
      "learning_rate": 3.924342560439633e-06,
      "loss": 0.0188,
      "step": 1864120
    },
    {
      "epoch": 3.0507059955617524,
      "grad_norm": 0.1694715917110443,
      "learning_rate": 3.924276668226117e-06,
      "loss": 0.015,
      "step": 1864140
    },
    {
      "epoch": 3.050738726000406,
      "grad_norm": 0.8621122241020203,
      "learning_rate": 3.924210776012599e-06,
      "loss": 0.0192,
      "step": 1864160
    },
    {
      "epoch": 3.050771456439059,
      "grad_norm": 0.300642192363739,
      "learning_rate": 3.924144883799082e-06,
      "loss": 0.0105,
      "step": 1864180
    },
    {
      "epoch": 3.0508041868777127,
      "grad_norm": 0.20062486827373505,
      "learning_rate": 3.924078991585565e-06,
      "loss": 0.0098,
      "step": 1864200
    },
    {
      "epoch": 3.050836917316366,
      "grad_norm": 0.723820149898529,
      "learning_rate": 3.924013099372048e-06,
      "loss": 0.0091,
      "step": 1864220
    },
    {
      "epoch": 3.050869647755019,
      "grad_norm": 0.09480903297662735,
      "learning_rate": 3.92394720715853e-06,
      "loss": 0.0094,
      "step": 1864240
    },
    {
      "epoch": 3.0509023781936726,
      "grad_norm": 0.07626409083604813,
      "learning_rate": 3.923881314945013e-06,
      "loss": 0.0101,
      "step": 1864260
    },
    {
      "epoch": 3.0509351086323258,
      "grad_norm": 0.13625240325927734,
      "learning_rate": 3.923815422731496e-06,
      "loss": 0.0143,
      "step": 1864280
    },
    {
      "epoch": 3.0509678390709793,
      "grad_norm": 0.31499338150024414,
      "learning_rate": 3.923749530517979e-06,
      "loss": 0.0125,
      "step": 1864300
    },
    {
      "epoch": 3.0510005695096325,
      "grad_norm": 0.5011507272720337,
      "learning_rate": 3.923683638304462e-06,
      "loss": 0.0162,
      "step": 1864320
    },
    {
      "epoch": 3.051033299948286,
      "grad_norm": 0.27821144461631775,
      "learning_rate": 3.923617746090945e-06,
      "loss": 0.0117,
      "step": 1864340
    },
    {
      "epoch": 3.0510660303869392,
      "grad_norm": 0.31659260392189026,
      "learning_rate": 3.923551853877428e-06,
      "loss": 0.0081,
      "step": 1864360
    },
    {
      "epoch": 3.0510987608255924,
      "grad_norm": 0.29778560996055603,
      "learning_rate": 3.92348596166391e-06,
      "loss": 0.006,
      "step": 1864380
    },
    {
      "epoch": 3.051131491264246,
      "grad_norm": 0.36721324920654297,
      "learning_rate": 3.923420069450393e-06,
      "loss": 0.0155,
      "step": 1864400
    },
    {
      "epoch": 3.051164221702899,
      "grad_norm": 1.401789665222168,
      "learning_rate": 3.923354177236876e-06,
      "loss": 0.0117,
      "step": 1864420
    },
    {
      "epoch": 3.0511969521415527,
      "grad_norm": 0.2138347625732422,
      "learning_rate": 3.9232882850233586e-06,
      "loss": 0.0116,
      "step": 1864440
    },
    {
      "epoch": 3.051229682580206,
      "grad_norm": 0.31146708130836487,
      "learning_rate": 3.923222392809842e-06,
      "loss": 0.0166,
      "step": 1864460
    },
    {
      "epoch": 3.0512624130188595,
      "grad_norm": 0.6364186406135559,
      "learning_rate": 3.923156500596325e-06,
      "loss": 0.0164,
      "step": 1864480
    },
    {
      "epoch": 3.0512951434575126,
      "grad_norm": 0.6193792819976807,
      "learning_rate": 3.923090608382808e-06,
      "loss": 0.0076,
      "step": 1864500
    },
    {
      "epoch": 3.0513278738961658,
      "grad_norm": 0.20481397211551666,
      "learning_rate": 3.923024716169291e-06,
      "loss": 0.0081,
      "step": 1864520
    },
    {
      "epoch": 3.0513606043348194,
      "grad_norm": 0.2999437153339386,
      "learning_rate": 3.922958823955774e-06,
      "loss": 0.0157,
      "step": 1864540
    },
    {
      "epoch": 3.0513933347734725,
      "grad_norm": 0.1621493399143219,
      "learning_rate": 3.922892931742257e-06,
      "loss": 0.0082,
      "step": 1864560
    },
    {
      "epoch": 3.051426065212126,
      "grad_norm": 0.21631988883018494,
      "learning_rate": 3.9228270395287394e-06,
      "loss": 0.0082,
      "step": 1864580
    },
    {
      "epoch": 3.0514587956507793,
      "grad_norm": 0.1303088217973709,
      "learning_rate": 3.922761147315222e-06,
      "loss": 0.0139,
      "step": 1864600
    },
    {
      "epoch": 3.051491526089433,
      "grad_norm": 2.4420506954193115,
      "learning_rate": 3.922695255101705e-06,
      "loss": 0.0159,
      "step": 1864620
    },
    {
      "epoch": 3.051524256528086,
      "grad_norm": 0.3096175491809845,
      "learning_rate": 3.922629362888188e-06,
      "loss": 0.0092,
      "step": 1864640
    },
    {
      "epoch": 3.051556986966739,
      "grad_norm": 0.16719290614128113,
      "learning_rate": 3.92256347067467e-06,
      "loss": 0.0131,
      "step": 1864660
    },
    {
      "epoch": 3.0515897174053928,
      "grad_norm": 0.041976913809776306,
      "learning_rate": 3.922497578461153e-06,
      "loss": 0.0077,
      "step": 1864680
    },
    {
      "epoch": 3.051622447844046,
      "grad_norm": 0.16111120581626892,
      "learning_rate": 3.922431686247637e-06,
      "loss": 0.0115,
      "step": 1864700
    },
    {
      "epoch": 3.0516551782826995,
      "grad_norm": 0.25032633543014526,
      "learning_rate": 3.9223657940341195e-06,
      "loss": 0.0093,
      "step": 1864720
    },
    {
      "epoch": 3.0516879087213526,
      "grad_norm": 0.18771034479141235,
      "learning_rate": 3.922299901820602e-06,
      "loss": 0.0115,
      "step": 1864740
    },
    {
      "epoch": 3.0517206391600062,
      "grad_norm": 0.13684020936489105,
      "learning_rate": 3.922234009607085e-06,
      "loss": 0.0128,
      "step": 1864760
    },
    {
      "epoch": 3.0517533695986594,
      "grad_norm": 0.12077704071998596,
      "learning_rate": 3.922168117393568e-06,
      "loss": 0.0271,
      "step": 1864780
    },
    {
      "epoch": 3.0517861000373125,
      "grad_norm": 0.4056764841079712,
      "learning_rate": 3.9221022251800504e-06,
      "loss": 0.013,
      "step": 1864800
    },
    {
      "epoch": 3.051818830475966,
      "grad_norm": 0.5492175221443176,
      "learning_rate": 3.922036332966533e-06,
      "loss": 0.0106,
      "step": 1864820
    },
    {
      "epoch": 3.0518515609146193,
      "grad_norm": 0.6849202513694763,
      "learning_rate": 3.921970440753017e-06,
      "loss": 0.0126,
      "step": 1864840
    },
    {
      "epoch": 3.051884291353273,
      "grad_norm": 0.21023252606391907,
      "learning_rate": 3.9219045485394995e-06,
      "loss": 0.0155,
      "step": 1864860
    },
    {
      "epoch": 3.051917021791926,
      "grad_norm": 0.2575337886810303,
      "learning_rate": 3.921838656325982e-06,
      "loss": 0.0118,
      "step": 1864880
    },
    {
      "epoch": 3.051949752230579,
      "grad_norm": 0.6738651990890503,
      "learning_rate": 3.921772764112465e-06,
      "loss": 0.0113,
      "step": 1864900
    },
    {
      "epoch": 3.0519824826692328,
      "grad_norm": 0.1419912576675415,
      "learning_rate": 3.9217068718989486e-06,
      "loss": 0.0092,
      "step": 1864920
    },
    {
      "epoch": 3.052015213107886,
      "grad_norm": 0.2677110731601715,
      "learning_rate": 3.921640979685431e-06,
      "loss": 0.0113,
      "step": 1864940
    },
    {
      "epoch": 3.0520479435465395,
      "grad_norm": 0.2918924391269684,
      "learning_rate": 3.921575087471914e-06,
      "loss": 0.0127,
      "step": 1864960
    },
    {
      "epoch": 3.0520806739851927,
      "grad_norm": 0.11433147639036179,
      "learning_rate": 3.921509195258397e-06,
      "loss": 0.0117,
      "step": 1864980
    },
    {
      "epoch": 3.0521134044238463,
      "grad_norm": 0.10753971338272095,
      "learning_rate": 3.9214433030448795e-06,
      "loss": 0.0196,
      "step": 1865000
    },
    {
      "epoch": 3.0521461348624994,
      "grad_norm": 0.2579255700111389,
      "learning_rate": 3.921377410831362e-06,
      "loss": 0.0161,
      "step": 1865020
    },
    {
      "epoch": 3.0521788653011526,
      "grad_norm": 0.3317582905292511,
      "learning_rate": 3.921311518617845e-06,
      "loss": 0.01,
      "step": 1865040
    },
    {
      "epoch": 3.052211595739806,
      "grad_norm": 0.4267899990081787,
      "learning_rate": 3.921245626404328e-06,
      "loss": 0.0172,
      "step": 1865060
    },
    {
      "epoch": 3.0522443261784593,
      "grad_norm": 1.278079867362976,
      "learning_rate": 3.9211797341908105e-06,
      "loss": 0.0137,
      "step": 1865080
    },
    {
      "epoch": 3.052277056617113,
      "grad_norm": 0.2501300573348999,
      "learning_rate": 3.921113841977294e-06,
      "loss": 0.0101,
      "step": 1865100
    },
    {
      "epoch": 3.052309787055766,
      "grad_norm": 0.45320549607276917,
      "learning_rate": 3.921047949763777e-06,
      "loss": 0.0104,
      "step": 1865120
    },
    {
      "epoch": 3.0523425174944196,
      "grad_norm": 0.3125351071357727,
      "learning_rate": 3.9209820575502595e-06,
      "loss": 0.0164,
      "step": 1865140
    },
    {
      "epoch": 3.052375247933073,
      "grad_norm": 0.5822694301605225,
      "learning_rate": 3.920916165336742e-06,
      "loss": 0.0118,
      "step": 1865160
    },
    {
      "epoch": 3.052407978371726,
      "grad_norm": 0.2684701383113861,
      "learning_rate": 3.920850273123225e-06,
      "loss": 0.0182,
      "step": 1865180
    },
    {
      "epoch": 3.0524407088103795,
      "grad_norm": 0.9471738338470459,
      "learning_rate": 3.920784380909709e-06,
      "loss": 0.0162,
      "step": 1865200
    },
    {
      "epoch": 3.0524734392490327,
      "grad_norm": 0.11436394602060318,
      "learning_rate": 3.920718488696191e-06,
      "loss": 0.0133,
      "step": 1865220
    },
    {
      "epoch": 3.0525061696876863,
      "grad_norm": 0.3909308612346649,
      "learning_rate": 3.920652596482674e-06,
      "loss": 0.0076,
      "step": 1865240
    },
    {
      "epoch": 3.0525389001263394,
      "grad_norm": 0.5322562456130981,
      "learning_rate": 3.920586704269157e-06,
      "loss": 0.0134,
      "step": 1865260
    },
    {
      "epoch": 3.052571630564993,
      "grad_norm": 0.34852662682533264,
      "learning_rate": 3.9205208120556396e-06,
      "loss": 0.0119,
      "step": 1865280
    },
    {
      "epoch": 3.052604361003646,
      "grad_norm": 0.1562308669090271,
      "learning_rate": 3.920454919842122e-06,
      "loss": 0.0091,
      "step": 1865300
    },
    {
      "epoch": 3.0526370914422993,
      "grad_norm": 0.3815619945526123,
      "learning_rate": 3.920389027628606e-06,
      "loss": 0.0081,
      "step": 1865320
    },
    {
      "epoch": 3.052669821880953,
      "grad_norm": 0.23351752758026123,
      "learning_rate": 3.920323135415089e-06,
      "loss": 0.0107,
      "step": 1865340
    },
    {
      "epoch": 3.052702552319606,
      "grad_norm": 0.13488158583641052,
      "learning_rate": 3.920257243201571e-06,
      "loss": 0.0093,
      "step": 1865360
    },
    {
      "epoch": 3.0527352827582597,
      "grad_norm": 0.3154049813747406,
      "learning_rate": 3.920191350988054e-06,
      "loss": 0.0089,
      "step": 1865380
    },
    {
      "epoch": 3.052768013196913,
      "grad_norm": 0.7046347260475159,
      "learning_rate": 3.920125458774537e-06,
      "loss": 0.0111,
      "step": 1865400
    },
    {
      "epoch": 3.0528007436355664,
      "grad_norm": 0.6984214186668396,
      "learning_rate": 3.92005956656102e-06,
      "loss": 0.0142,
      "step": 1865420
    },
    {
      "epoch": 3.0528334740742196,
      "grad_norm": 0.07968258112668991,
      "learning_rate": 3.919993674347502e-06,
      "loss": 0.013,
      "step": 1865440
    },
    {
      "epoch": 3.0528662045128727,
      "grad_norm": 0.2712765336036682,
      "learning_rate": 3.919927782133985e-06,
      "loss": 0.0152,
      "step": 1865460
    },
    {
      "epoch": 3.0528989349515263,
      "grad_norm": 0.2157779335975647,
      "learning_rate": 3.919861889920468e-06,
      "loss": 0.0063,
      "step": 1865480
    },
    {
      "epoch": 3.0529316653901795,
      "grad_norm": 0.1743142157793045,
      "learning_rate": 3.919795997706951e-06,
      "loss": 0.0093,
      "step": 1865500
    },
    {
      "epoch": 3.052964395828833,
      "grad_norm": 0.25180718302726746,
      "learning_rate": 3.919730105493434e-06,
      "loss": 0.0144,
      "step": 1865520
    },
    {
      "epoch": 3.052997126267486,
      "grad_norm": 0.23089735209941864,
      "learning_rate": 3.919664213279917e-06,
      "loss": 0.0086,
      "step": 1865540
    },
    {
      "epoch": 3.0530298567061394,
      "grad_norm": 0.35470789670944214,
      "learning_rate": 3.9195983210664005e-06,
      "loss": 0.0086,
      "step": 1865560
    },
    {
      "epoch": 3.053062587144793,
      "grad_norm": 0.38248059153556824,
      "learning_rate": 3.919532428852883e-06,
      "loss": 0.0089,
      "step": 1865580
    },
    {
      "epoch": 3.053095317583446,
      "grad_norm": 0.3090589940547943,
      "learning_rate": 3.919466536639366e-06,
      "loss": 0.0133,
      "step": 1865600
    },
    {
      "epoch": 3.0531280480220997,
      "grad_norm": 0.30387449264526367,
      "learning_rate": 3.919400644425849e-06,
      "loss": 0.0092,
      "step": 1865620
    },
    {
      "epoch": 3.053160778460753,
      "grad_norm": 0.18495577573776245,
      "learning_rate": 3.9193347522123314e-06,
      "loss": 0.0088,
      "step": 1865640
    },
    {
      "epoch": 3.0531935088994064,
      "grad_norm": 0.43940308690071106,
      "learning_rate": 3.919268859998814e-06,
      "loss": 0.0154,
      "step": 1865660
    },
    {
      "epoch": 3.0532262393380596,
      "grad_norm": 0.6037890911102295,
      "learning_rate": 3.919202967785297e-06,
      "loss": 0.0152,
      "step": 1865680
    },
    {
      "epoch": 3.0532589697767127,
      "grad_norm": 0.1349402517080307,
      "learning_rate": 3.91913707557178e-06,
      "loss": 0.0097,
      "step": 1865700
    },
    {
      "epoch": 3.0532917002153663,
      "grad_norm": 0.22510936856269836,
      "learning_rate": 3.919071183358263e-06,
      "loss": 0.0102,
      "step": 1865720
    },
    {
      "epoch": 3.0533244306540195,
      "grad_norm": 0.26655885577201843,
      "learning_rate": 3.919005291144746e-06,
      "loss": 0.0112,
      "step": 1865740
    },
    {
      "epoch": 3.053357161092673,
      "grad_norm": 0.3115260601043701,
      "learning_rate": 3.918939398931229e-06,
      "loss": 0.0114,
      "step": 1865760
    },
    {
      "epoch": 3.0533898915313262,
      "grad_norm": 0.3127656877040863,
      "learning_rate": 3.9188735067177115e-06,
      "loss": 0.0168,
      "step": 1865780
    },
    {
      "epoch": 3.05342262196998,
      "grad_norm": 0.2519429326057434,
      "learning_rate": 3.918807614504194e-06,
      "loss": 0.0072,
      "step": 1865800
    },
    {
      "epoch": 3.053455352408633,
      "grad_norm": 0.28002893924713135,
      "learning_rate": 3.918741722290677e-06,
      "loss": 0.0186,
      "step": 1865820
    },
    {
      "epoch": 3.053488082847286,
      "grad_norm": 0.5293073654174805,
      "learning_rate": 3.91867583007716e-06,
      "loss": 0.0067,
      "step": 1865840
    },
    {
      "epoch": 3.0535208132859397,
      "grad_norm": 0.6016597747802734,
      "learning_rate": 3.918609937863642e-06,
      "loss": 0.0108,
      "step": 1865860
    },
    {
      "epoch": 3.053553543724593,
      "grad_norm": 0.32918867468833923,
      "learning_rate": 3.918544045650126e-06,
      "loss": 0.0163,
      "step": 1865880
    },
    {
      "epoch": 3.0535862741632465,
      "grad_norm": 0.25161507725715637,
      "learning_rate": 3.918478153436609e-06,
      "loss": 0.0132,
      "step": 1865900
    },
    {
      "epoch": 3.0536190046018996,
      "grad_norm": 0.23037299513816833,
      "learning_rate": 3.9184122612230915e-06,
      "loss": 0.0097,
      "step": 1865920
    },
    {
      "epoch": 3.053651735040553,
      "grad_norm": 0.0956762358546257,
      "learning_rate": 3.918346369009575e-06,
      "loss": 0.0074,
      "step": 1865940
    },
    {
      "epoch": 3.0536844654792064,
      "grad_norm": 0.2950192391872406,
      "learning_rate": 3.918280476796058e-06,
      "loss": 0.0145,
      "step": 1865960
    },
    {
      "epoch": 3.0537171959178595,
      "grad_norm": 0.4469982087612152,
      "learning_rate": 3.9182145845825405e-06,
      "loss": 0.011,
      "step": 1865980
    },
    {
      "epoch": 3.053749926356513,
      "grad_norm": 0.18058659136295319,
      "learning_rate": 3.918148692369023e-06,
      "loss": 0.0109,
      "step": 1866000
    },
    {
      "epoch": 3.0537826567951663,
      "grad_norm": 0.3015754520893097,
      "learning_rate": 3.918082800155506e-06,
      "loss": 0.0181,
      "step": 1866020
    },
    {
      "epoch": 3.05381538723382,
      "grad_norm": 0.3091007173061371,
      "learning_rate": 3.918016907941989e-06,
      "loss": 0.0125,
      "step": 1866040
    },
    {
      "epoch": 3.053848117672473,
      "grad_norm": 1.0725566148757935,
      "learning_rate": 3.9179510157284715e-06,
      "loss": 0.0139,
      "step": 1866060
    },
    {
      "epoch": 3.0538808481111266,
      "grad_norm": 0.16188447177410126,
      "learning_rate": 3.917885123514954e-06,
      "loss": 0.0108,
      "step": 1866080
    },
    {
      "epoch": 3.0539135785497797,
      "grad_norm": 0.1395472139120102,
      "learning_rate": 3.917819231301437e-06,
      "loss": 0.0091,
      "step": 1866100
    },
    {
      "epoch": 3.053946308988433,
      "grad_norm": 0.42482656240463257,
      "learning_rate": 3.9177533390879206e-06,
      "loss": 0.0098,
      "step": 1866120
    },
    {
      "epoch": 3.0539790394270865,
      "grad_norm": 0.21230098605155945,
      "learning_rate": 3.917687446874403e-06,
      "loss": 0.0092,
      "step": 1866140
    },
    {
      "epoch": 3.0540117698657396,
      "grad_norm": 0.4968777894973755,
      "learning_rate": 3.917621554660886e-06,
      "loss": 0.015,
      "step": 1866160
    },
    {
      "epoch": 3.0540445003043932,
      "grad_norm": 0.46491917967796326,
      "learning_rate": 3.917555662447369e-06,
      "loss": 0.0118,
      "step": 1866180
    },
    {
      "epoch": 3.0540772307430464,
      "grad_norm": 0.2575926184654236,
      "learning_rate": 3.9174897702338515e-06,
      "loss": 0.0123,
      "step": 1866200
    },
    {
      "epoch": 3.0541099611817,
      "grad_norm": 0.5010199546813965,
      "learning_rate": 3.917423878020334e-06,
      "loss": 0.0088,
      "step": 1866220
    },
    {
      "epoch": 3.054142691620353,
      "grad_norm": 0.2028685361146927,
      "learning_rate": 3.917357985806817e-06,
      "loss": 0.012,
      "step": 1866240
    },
    {
      "epoch": 3.0541754220590063,
      "grad_norm": 0.048040926456451416,
      "learning_rate": 3.917292093593301e-06,
      "loss": 0.0149,
      "step": 1866260
    },
    {
      "epoch": 3.05420815249766,
      "grad_norm": 0.34404176473617554,
      "learning_rate": 3.917226201379783e-06,
      "loss": 0.0083,
      "step": 1866280
    },
    {
      "epoch": 3.054240882936313,
      "grad_norm": 0.21132558584213257,
      "learning_rate": 3.917160309166266e-06,
      "loss": 0.0109,
      "step": 1866300
    },
    {
      "epoch": 3.0542736133749666,
      "grad_norm": 0.6352128386497498,
      "learning_rate": 3.917094416952749e-06,
      "loss": 0.0164,
      "step": 1866320
    },
    {
      "epoch": 3.0543063438136198,
      "grad_norm": 0.5121869444847107,
      "learning_rate": 3.917028524739232e-06,
      "loss": 0.0129,
      "step": 1866340
    },
    {
      "epoch": 3.054339074252273,
      "grad_norm": 0.21399103105068207,
      "learning_rate": 3.916962632525715e-06,
      "loss": 0.0164,
      "step": 1866360
    },
    {
      "epoch": 3.0543718046909265,
      "grad_norm": 0.2277461141347885,
      "learning_rate": 3.916896740312198e-06,
      "loss": 0.0114,
      "step": 1866380
    },
    {
      "epoch": 3.0544045351295797,
      "grad_norm": 0.16049213707447052,
      "learning_rate": 3.916830848098681e-06,
      "loss": 0.0101,
      "step": 1866400
    },
    {
      "epoch": 3.0544372655682333,
      "grad_norm": 0.1782507747411728,
      "learning_rate": 3.916764955885163e-06,
      "loss": 0.0068,
      "step": 1866420
    },
    {
      "epoch": 3.0544699960068864,
      "grad_norm": 0.32579365372657776,
      "learning_rate": 3.916699063671646e-06,
      "loss": 0.0115,
      "step": 1866440
    },
    {
      "epoch": 3.05450272644554,
      "grad_norm": 0.19936616718769073,
      "learning_rate": 3.916633171458129e-06,
      "loss": 0.0082,
      "step": 1866460
    },
    {
      "epoch": 3.054535456884193,
      "grad_norm": 0.2179574966430664,
      "learning_rate": 3.916567279244612e-06,
      "loss": 0.009,
      "step": 1866480
    },
    {
      "epoch": 3.0545681873228463,
      "grad_norm": 0.38674265146255493,
      "learning_rate": 3.916501387031094e-06,
      "loss": 0.0076,
      "step": 1866500
    },
    {
      "epoch": 3.0546009177615,
      "grad_norm": 0.37662744522094727,
      "learning_rate": 3.916435494817578e-06,
      "loss": 0.0123,
      "step": 1866520
    },
    {
      "epoch": 3.054633648200153,
      "grad_norm": 0.45662426948547363,
      "learning_rate": 3.916369602604061e-06,
      "loss": 0.0124,
      "step": 1866540
    },
    {
      "epoch": 3.0546663786388066,
      "grad_norm": 0.3073441982269287,
      "learning_rate": 3.916303710390543e-06,
      "loss": 0.0137,
      "step": 1866560
    },
    {
      "epoch": 3.05469910907746,
      "grad_norm": 0.5412284731864929,
      "learning_rate": 3.916237818177026e-06,
      "loss": 0.009,
      "step": 1866580
    },
    {
      "epoch": 3.0547318395161134,
      "grad_norm": 0.17160208523273468,
      "learning_rate": 3.91617192596351e-06,
      "loss": 0.0149,
      "step": 1866600
    },
    {
      "epoch": 3.0547645699547665,
      "grad_norm": 0.33092308044433594,
      "learning_rate": 3.9161060337499925e-06,
      "loss": 0.0102,
      "step": 1866620
    },
    {
      "epoch": 3.0547973003934197,
      "grad_norm": 0.14769481122493744,
      "learning_rate": 3.916040141536475e-06,
      "loss": 0.0141,
      "step": 1866640
    },
    {
      "epoch": 3.0548300308320733,
      "grad_norm": 0.15914234519004822,
      "learning_rate": 3.915974249322958e-06,
      "loss": 0.024,
      "step": 1866660
    },
    {
      "epoch": 3.0548627612707264,
      "grad_norm": 0.18588115274906158,
      "learning_rate": 3.915908357109441e-06,
      "loss": 0.0065,
      "step": 1866680
    },
    {
      "epoch": 3.05489549170938,
      "grad_norm": 0.32180163264274597,
      "learning_rate": 3.915842464895923e-06,
      "loss": 0.0083,
      "step": 1866700
    },
    {
      "epoch": 3.054928222148033,
      "grad_norm": 0.37744179368019104,
      "learning_rate": 3.915776572682406e-06,
      "loss": 0.0131,
      "step": 1866720
    },
    {
      "epoch": 3.0549609525866868,
      "grad_norm": 0.2800605297088623,
      "learning_rate": 3.91571068046889e-06,
      "loss": 0.016,
      "step": 1866740
    },
    {
      "epoch": 3.05499368302534,
      "grad_norm": 0.20400398969650269,
      "learning_rate": 3.9156447882553725e-06,
      "loss": 0.0063,
      "step": 1866760
    },
    {
      "epoch": 3.055026413463993,
      "grad_norm": 0.22487489879131317,
      "learning_rate": 3.915578896041855e-06,
      "loss": 0.0097,
      "step": 1866780
    },
    {
      "epoch": 3.0550591439026467,
      "grad_norm": 0.14020848274230957,
      "learning_rate": 3.915513003828338e-06,
      "loss": 0.0108,
      "step": 1866800
    },
    {
      "epoch": 3.0550918743413,
      "grad_norm": 0.5766971111297607,
      "learning_rate": 3.915447111614821e-06,
      "loss": 0.0133,
      "step": 1866820
    },
    {
      "epoch": 3.0551246047799534,
      "grad_norm": 0.15493813157081604,
      "learning_rate": 3.9153812194013034e-06,
      "loss": 0.0095,
      "step": 1866840
    },
    {
      "epoch": 3.0551573352186066,
      "grad_norm": 0.32336947321891785,
      "learning_rate": 3.915315327187786e-06,
      "loss": 0.013,
      "step": 1866860
    },
    {
      "epoch": 3.05519006565726,
      "grad_norm": 0.17350855469703674,
      "learning_rate": 3.915249434974269e-06,
      "loss": 0.0156,
      "step": 1866880
    },
    {
      "epoch": 3.0552227960959133,
      "grad_norm": 0.08848340809345245,
      "learning_rate": 3.915183542760752e-06,
      "loss": 0.0159,
      "step": 1866900
    },
    {
      "epoch": 3.0552555265345664,
      "grad_norm": 0.5149508714675903,
      "learning_rate": 3.915117650547235e-06,
      "loss": 0.0113,
      "step": 1866920
    },
    {
      "epoch": 3.05528825697322,
      "grad_norm": 0.652719259262085,
      "learning_rate": 3.915051758333718e-06,
      "loss": 0.0129,
      "step": 1866940
    },
    {
      "epoch": 3.055320987411873,
      "grad_norm": 0.21096111834049225,
      "learning_rate": 3.9149858661202016e-06,
      "loss": 0.0144,
      "step": 1866960
    },
    {
      "epoch": 3.055353717850527,
      "grad_norm": 0.3640443682670593,
      "learning_rate": 3.914919973906684e-06,
      "loss": 0.0177,
      "step": 1866980
    },
    {
      "epoch": 3.05538644828918,
      "grad_norm": 0.5423175096511841,
      "learning_rate": 3.914854081693167e-06,
      "loss": 0.0108,
      "step": 1867000
    },
    {
      "epoch": 3.055419178727833,
      "grad_norm": 0.765903890132904,
      "learning_rate": 3.91478818947965e-06,
      "loss": 0.0122,
      "step": 1867020
    },
    {
      "epoch": 3.0554519091664867,
      "grad_norm": 0.3775448799133301,
      "learning_rate": 3.9147222972661325e-06,
      "loss": 0.0122,
      "step": 1867040
    },
    {
      "epoch": 3.05548463960514,
      "grad_norm": 0.13145793974399567,
      "learning_rate": 3.914656405052615e-06,
      "loss": 0.0116,
      "step": 1867060
    },
    {
      "epoch": 3.0555173700437934,
      "grad_norm": 0.20454873144626617,
      "learning_rate": 3.914590512839098e-06,
      "loss": 0.0149,
      "step": 1867080
    },
    {
      "epoch": 3.0555501004824466,
      "grad_norm": 0.32214632630348206,
      "learning_rate": 3.914524620625581e-06,
      "loss": 0.0095,
      "step": 1867100
    },
    {
      "epoch": 3.0555828309211,
      "grad_norm": 0.08963055908679962,
      "learning_rate": 3.9144587284120635e-06,
      "loss": 0.0071,
      "step": 1867120
    },
    {
      "epoch": 3.0556155613597533,
      "grad_norm": 0.31662988662719727,
      "learning_rate": 3.914392836198547e-06,
      "loss": 0.0087,
      "step": 1867140
    },
    {
      "epoch": 3.0556482917984065,
      "grad_norm": 0.135196715593338,
      "learning_rate": 3.91432694398503e-06,
      "loss": 0.0079,
      "step": 1867160
    },
    {
      "epoch": 3.05568102223706,
      "grad_norm": 0.3249995708465576,
      "learning_rate": 3.9142610517715126e-06,
      "loss": 0.0129,
      "step": 1867180
    },
    {
      "epoch": 3.055713752675713,
      "grad_norm": 0.49043530225753784,
      "learning_rate": 3.914195159557995e-06,
      "loss": 0.0095,
      "step": 1867200
    },
    {
      "epoch": 3.055746483114367,
      "grad_norm": 0.2882058620452881,
      "learning_rate": 3.914129267344478e-06,
      "loss": 0.0137,
      "step": 1867220
    },
    {
      "epoch": 3.05577921355302,
      "grad_norm": 0.23609334230422974,
      "learning_rate": 3.914063375130961e-06,
      "loss": 0.0124,
      "step": 1867240
    },
    {
      "epoch": 3.0558119439916736,
      "grad_norm": 0.20258426666259766,
      "learning_rate": 3.9139974829174435e-06,
      "loss": 0.0078,
      "step": 1867260
    },
    {
      "epoch": 3.0558446744303267,
      "grad_norm": 0.29781535267829895,
      "learning_rate": 3.913931590703926e-06,
      "loss": 0.0164,
      "step": 1867280
    },
    {
      "epoch": 3.05587740486898,
      "grad_norm": 0.38088372349739075,
      "learning_rate": 3.91386569849041e-06,
      "loss": 0.0161,
      "step": 1867300
    },
    {
      "epoch": 3.0559101353076334,
      "grad_norm": 0.4536055028438568,
      "learning_rate": 3.913799806276893e-06,
      "loss": 0.0144,
      "step": 1867320
    },
    {
      "epoch": 3.0559428657462866,
      "grad_norm": 0.512641191482544,
      "learning_rate": 3.913733914063375e-06,
      "loss": 0.0121,
      "step": 1867340
    },
    {
      "epoch": 3.05597559618494,
      "grad_norm": 1.0566595792770386,
      "learning_rate": 3.913668021849859e-06,
      "loss": 0.0103,
      "step": 1867360
    },
    {
      "epoch": 3.0560083266235933,
      "grad_norm": 0.276870459318161,
      "learning_rate": 3.913602129636342e-06,
      "loss": 0.0103,
      "step": 1867380
    },
    {
      "epoch": 3.056041057062247,
      "grad_norm": 0.6234961152076721,
      "learning_rate": 3.913536237422824e-06,
      "loss": 0.0156,
      "step": 1867400
    },
    {
      "epoch": 3.0560737875009,
      "grad_norm": 0.15202273428440094,
      "learning_rate": 3.913470345209307e-06,
      "loss": 0.0095,
      "step": 1867420
    },
    {
      "epoch": 3.0561065179395532,
      "grad_norm": 1.5810617208480835,
      "learning_rate": 3.91340445299579e-06,
      "loss": 0.0124,
      "step": 1867440
    },
    {
      "epoch": 3.056139248378207,
      "grad_norm": 0.08621259778738022,
      "learning_rate": 3.913338560782273e-06,
      "loss": 0.008,
      "step": 1867460
    },
    {
      "epoch": 3.05617197881686,
      "grad_norm": 0.7676152586936951,
      "learning_rate": 3.913272668568755e-06,
      "loss": 0.0141,
      "step": 1867480
    },
    {
      "epoch": 3.0562047092555136,
      "grad_norm": 0.10326565057039261,
      "learning_rate": 3.913206776355238e-06,
      "loss": 0.0099,
      "step": 1867500
    },
    {
      "epoch": 3.0562374396941667,
      "grad_norm": 0.10591906309127808,
      "learning_rate": 3.913140884141721e-06,
      "loss": 0.0081,
      "step": 1867520
    },
    {
      "epoch": 3.0562701701328203,
      "grad_norm": 0.43450891971588135,
      "learning_rate": 3.913074991928204e-06,
      "loss": 0.0157,
      "step": 1867540
    },
    {
      "epoch": 3.0563029005714735,
      "grad_norm": 0.09610437601804733,
      "learning_rate": 3.913009099714687e-06,
      "loss": 0.0125,
      "step": 1867560
    },
    {
      "epoch": 3.0563356310101266,
      "grad_norm": 0.3675581216812134,
      "learning_rate": 3.91294320750117e-06,
      "loss": 0.0163,
      "step": 1867580
    },
    {
      "epoch": 3.05636836144878,
      "grad_norm": 0.2518581449985504,
      "learning_rate": 3.912877315287653e-06,
      "loss": 0.0124,
      "step": 1867600
    },
    {
      "epoch": 3.0564010918874334,
      "grad_norm": 0.19641870260238647,
      "learning_rate": 3.912811423074135e-06,
      "loss": 0.0083,
      "step": 1867620
    },
    {
      "epoch": 3.056433822326087,
      "grad_norm": 0.09377450495958328,
      "learning_rate": 3.912745530860618e-06,
      "loss": 0.0145,
      "step": 1867640
    },
    {
      "epoch": 3.05646655276474,
      "grad_norm": 0.18518424034118652,
      "learning_rate": 3.912679638647102e-06,
      "loss": 0.0097,
      "step": 1867660
    },
    {
      "epoch": 3.0564992832033937,
      "grad_norm": 0.16698379814624786,
      "learning_rate": 3.9126137464335844e-06,
      "loss": 0.0146,
      "step": 1867680
    },
    {
      "epoch": 3.056532013642047,
      "grad_norm": 0.2749702036380768,
      "learning_rate": 3.912547854220067e-06,
      "loss": 0.0099,
      "step": 1867700
    },
    {
      "epoch": 3.0565647440807,
      "grad_norm": 0.2260366678237915,
      "learning_rate": 3.91248196200655e-06,
      "loss": 0.0076,
      "step": 1867720
    },
    {
      "epoch": 3.0565974745193536,
      "grad_norm": 0.2207559496164322,
      "learning_rate": 3.912416069793033e-06,
      "loss": 0.0148,
      "step": 1867740
    },
    {
      "epoch": 3.0566302049580067,
      "grad_norm": 0.3082878589630127,
      "learning_rate": 3.912350177579516e-06,
      "loss": 0.0128,
      "step": 1867760
    },
    {
      "epoch": 3.0566629353966603,
      "grad_norm": 0.34406495094299316,
      "learning_rate": 3.912284285365999e-06,
      "loss": 0.0087,
      "step": 1867780
    },
    {
      "epoch": 3.0566956658353135,
      "grad_norm": 0.041783008724451065,
      "learning_rate": 3.912218393152482e-06,
      "loss": 0.0064,
      "step": 1867800
    },
    {
      "epoch": 3.056728396273967,
      "grad_norm": 0.19186073541641235,
      "learning_rate": 3.9121525009389645e-06,
      "loss": 0.0093,
      "step": 1867820
    },
    {
      "epoch": 3.0567611267126202,
      "grad_norm": 0.2715548574924469,
      "learning_rate": 3.912086608725447e-06,
      "loss": 0.0098,
      "step": 1867840
    },
    {
      "epoch": 3.0567938571512734,
      "grad_norm": 0.2582390606403351,
      "learning_rate": 3.91202071651193e-06,
      "loss": 0.0079,
      "step": 1867860
    },
    {
      "epoch": 3.056826587589927,
      "grad_norm": 0.15848922729492188,
      "learning_rate": 3.911954824298413e-06,
      "loss": 0.0091,
      "step": 1867880
    },
    {
      "epoch": 3.05685931802858,
      "grad_norm": 0.29642701148986816,
      "learning_rate": 3.911888932084895e-06,
      "loss": 0.0114,
      "step": 1867900
    },
    {
      "epoch": 3.0568920484672337,
      "grad_norm": 0.07331410050392151,
      "learning_rate": 3.911823039871378e-06,
      "loss": 0.0084,
      "step": 1867920
    },
    {
      "epoch": 3.056924778905887,
      "grad_norm": 0.31219661235809326,
      "learning_rate": 3.911757147657862e-06,
      "loss": 0.0107,
      "step": 1867940
    },
    {
      "epoch": 3.05695750934454,
      "grad_norm": 0.1235838383436203,
      "learning_rate": 3.9116912554443445e-06,
      "loss": 0.0158,
      "step": 1867960
    },
    {
      "epoch": 3.0569902397831936,
      "grad_norm": 0.11631357669830322,
      "learning_rate": 3.911625363230827e-06,
      "loss": 0.0091,
      "step": 1867980
    },
    {
      "epoch": 3.0570229702218468,
      "grad_norm": 0.3528890609741211,
      "learning_rate": 3.91155947101731e-06,
      "loss": 0.008,
      "step": 1868000
    },
    {
      "epoch": 3.0570557006605004,
      "grad_norm": 0.26686573028564453,
      "learning_rate": 3.9114935788037936e-06,
      "loss": 0.0128,
      "step": 1868020
    },
    {
      "epoch": 3.0570884310991535,
      "grad_norm": 0.23132432997226715,
      "learning_rate": 3.911427686590276e-06,
      "loss": 0.0144,
      "step": 1868040
    },
    {
      "epoch": 3.057121161537807,
      "grad_norm": 0.28550222516059875,
      "learning_rate": 3.911361794376759e-06,
      "loss": 0.0112,
      "step": 1868060
    },
    {
      "epoch": 3.0571538919764603,
      "grad_norm": 0.18453529477119446,
      "learning_rate": 3.911295902163242e-06,
      "loss": 0.0112,
      "step": 1868080
    },
    {
      "epoch": 3.0571866224151134,
      "grad_norm": 0.6656744480133057,
      "learning_rate": 3.9112300099497245e-06,
      "loss": 0.0089,
      "step": 1868100
    },
    {
      "epoch": 3.057219352853767,
      "grad_norm": 0.18520011007785797,
      "learning_rate": 3.911164117736207e-06,
      "loss": 0.0155,
      "step": 1868120
    },
    {
      "epoch": 3.05725208329242,
      "grad_norm": 0.09234727919101715,
      "learning_rate": 3.91109822552269e-06,
      "loss": 0.0107,
      "step": 1868140
    },
    {
      "epoch": 3.0572848137310737,
      "grad_norm": 0.23099066317081451,
      "learning_rate": 3.911032333309174e-06,
      "loss": 0.0115,
      "step": 1868160
    },
    {
      "epoch": 3.057317544169727,
      "grad_norm": 0.20993924140930176,
      "learning_rate": 3.910966441095656e-06,
      "loss": 0.0165,
      "step": 1868180
    },
    {
      "epoch": 3.0573502746083805,
      "grad_norm": 0.49105384945869446,
      "learning_rate": 3.910900548882139e-06,
      "loss": 0.0131,
      "step": 1868200
    },
    {
      "epoch": 3.0573830050470336,
      "grad_norm": 0.07972720265388489,
      "learning_rate": 3.910834656668622e-06,
      "loss": 0.0093,
      "step": 1868220
    },
    {
      "epoch": 3.057415735485687,
      "grad_norm": 0.21539711952209473,
      "learning_rate": 3.9107687644551045e-06,
      "loss": 0.0126,
      "step": 1868240
    },
    {
      "epoch": 3.0574484659243404,
      "grad_norm": 0.29128459095954895,
      "learning_rate": 3.910702872241587e-06,
      "loss": 0.012,
      "step": 1868260
    },
    {
      "epoch": 3.0574811963629935,
      "grad_norm": 0.06911402940750122,
      "learning_rate": 3.91063698002807e-06,
      "loss": 0.013,
      "step": 1868280
    },
    {
      "epoch": 3.057513926801647,
      "grad_norm": 0.11725226789712906,
      "learning_rate": 3.910571087814553e-06,
      "loss": 0.0138,
      "step": 1868300
    },
    {
      "epoch": 3.0575466572403003,
      "grad_norm": 0.20817433297634125,
      "learning_rate": 3.9105051956010355e-06,
      "loss": 0.0081,
      "step": 1868320
    },
    {
      "epoch": 3.057579387678954,
      "grad_norm": 0.20309312641620636,
      "learning_rate": 3.910439303387519e-06,
      "loss": 0.0114,
      "step": 1868340
    },
    {
      "epoch": 3.057612118117607,
      "grad_norm": 0.20064778625965118,
      "learning_rate": 3.910373411174002e-06,
      "loss": 0.0099,
      "step": 1868360
    },
    {
      "epoch": 3.05764484855626,
      "grad_norm": 0.20284727215766907,
      "learning_rate": 3.910307518960485e-06,
      "loss": 0.0153,
      "step": 1868380
    },
    {
      "epoch": 3.0576775789949138,
      "grad_norm": 0.41940125823020935,
      "learning_rate": 3.910241626746968e-06,
      "loss": 0.0106,
      "step": 1868400
    },
    {
      "epoch": 3.057710309433567,
      "grad_norm": 0.41534361243247986,
      "learning_rate": 3.910175734533451e-06,
      "loss": 0.006,
      "step": 1868420
    },
    {
      "epoch": 3.0577430398722205,
      "grad_norm": 0.20580872893333435,
      "learning_rate": 3.910109842319934e-06,
      "loss": 0.0111,
      "step": 1868440
    },
    {
      "epoch": 3.0577757703108737,
      "grad_norm": 0.18767264485359192,
      "learning_rate": 3.910043950106416e-06,
      "loss": 0.0096,
      "step": 1868460
    },
    {
      "epoch": 3.0578085007495273,
      "grad_norm": 0.1709935963153839,
      "learning_rate": 3.909978057892899e-06,
      "loss": 0.0145,
      "step": 1868480
    },
    {
      "epoch": 3.0578412311881804,
      "grad_norm": 0.7365773320198059,
      "learning_rate": 3.909912165679382e-06,
      "loss": 0.0093,
      "step": 1868500
    },
    {
      "epoch": 3.0578739616268336,
      "grad_norm": 0.13206687569618225,
      "learning_rate": 3.909846273465865e-06,
      "loss": 0.0153,
      "step": 1868520
    },
    {
      "epoch": 3.057906692065487,
      "grad_norm": 0.3405173420906067,
      "learning_rate": 3.909780381252347e-06,
      "loss": 0.0112,
      "step": 1868540
    },
    {
      "epoch": 3.0579394225041403,
      "grad_norm": 0.19659988582134247,
      "learning_rate": 3.909714489038831e-06,
      "loss": 0.0096,
      "step": 1868560
    },
    {
      "epoch": 3.057972152942794,
      "grad_norm": 0.22577014565467834,
      "learning_rate": 3.909648596825314e-06,
      "loss": 0.0078,
      "step": 1868580
    },
    {
      "epoch": 3.058004883381447,
      "grad_norm": 1.1750802993774414,
      "learning_rate": 3.909582704611796e-06,
      "loss": 0.0182,
      "step": 1868600
    },
    {
      "epoch": 3.0580376138201,
      "grad_norm": 0.2700038254261017,
      "learning_rate": 3.909516812398279e-06,
      "loss": 0.0127,
      "step": 1868620
    },
    {
      "epoch": 3.058070344258754,
      "grad_norm": 0.22670941054821014,
      "learning_rate": 3.909450920184762e-06,
      "loss": 0.0087,
      "step": 1868640
    },
    {
      "epoch": 3.058103074697407,
      "grad_norm": 0.46549057960510254,
      "learning_rate": 3.909385027971245e-06,
      "loss": 0.0128,
      "step": 1868660
    },
    {
      "epoch": 3.0581358051360605,
      "grad_norm": 0.5759416818618774,
      "learning_rate": 3.909319135757727e-06,
      "loss": 0.0104,
      "step": 1868680
    },
    {
      "epoch": 3.0581685355747137,
      "grad_norm": 0.21458329260349274,
      "learning_rate": 3.90925324354421e-06,
      "loss": 0.0058,
      "step": 1868700
    },
    {
      "epoch": 3.0582012660133673,
      "grad_norm": 0.21327410638332367,
      "learning_rate": 3.909187351330694e-06,
      "loss": 0.0115,
      "step": 1868720
    },
    {
      "epoch": 3.0582339964520204,
      "grad_norm": 0.3257886469364166,
      "learning_rate": 3.909121459117176e-06,
      "loss": 0.0141,
      "step": 1868740
    },
    {
      "epoch": 3.0582667268906736,
      "grad_norm": 0.3681911528110504,
      "learning_rate": 3.909055566903659e-06,
      "loss": 0.0107,
      "step": 1868760
    },
    {
      "epoch": 3.058299457329327,
      "grad_norm": 0.258949875831604,
      "learning_rate": 3.908989674690143e-06,
      "loss": 0.0094,
      "step": 1868780
    },
    {
      "epoch": 3.0583321877679803,
      "grad_norm": 0.1447506695985794,
      "learning_rate": 3.9089237824766255e-06,
      "loss": 0.0114,
      "step": 1868800
    },
    {
      "epoch": 3.058364918206634,
      "grad_norm": 0.42026573419570923,
      "learning_rate": 3.908857890263108e-06,
      "loss": 0.0094,
      "step": 1868820
    },
    {
      "epoch": 3.058397648645287,
      "grad_norm": 0.37310314178466797,
      "learning_rate": 3.908791998049591e-06,
      "loss": 0.0126,
      "step": 1868840
    },
    {
      "epoch": 3.0584303790839407,
      "grad_norm": 0.7948110699653625,
      "learning_rate": 3.908726105836074e-06,
      "loss": 0.0125,
      "step": 1868860
    },
    {
      "epoch": 3.058463109522594,
      "grad_norm": 0.27187639474868774,
      "learning_rate": 3.9086602136225564e-06,
      "loss": 0.0126,
      "step": 1868880
    },
    {
      "epoch": 3.058495839961247,
      "grad_norm": 0.4623987376689911,
      "learning_rate": 3.908594321409039e-06,
      "loss": 0.009,
      "step": 1868900
    },
    {
      "epoch": 3.0585285703999006,
      "grad_norm": 0.35924336314201355,
      "learning_rate": 3.908528429195522e-06,
      "loss": 0.0103,
      "step": 1868920
    },
    {
      "epoch": 3.0585613008385537,
      "grad_norm": 0.09056302160024643,
      "learning_rate": 3.908462536982005e-06,
      "loss": 0.0076,
      "step": 1868940
    },
    {
      "epoch": 3.0585940312772073,
      "grad_norm": 0.48541176319122314,
      "learning_rate": 3.908396644768488e-06,
      "loss": 0.009,
      "step": 1868960
    },
    {
      "epoch": 3.0586267617158605,
      "grad_norm": 0.24158431589603424,
      "learning_rate": 3.908330752554971e-06,
      "loss": 0.0083,
      "step": 1868980
    },
    {
      "epoch": 3.058659492154514,
      "grad_norm": 0.28362175822257996,
      "learning_rate": 3.908264860341454e-06,
      "loss": 0.0095,
      "step": 1869000
    },
    {
      "epoch": 3.058692222593167,
      "grad_norm": 0.4308145344257355,
      "learning_rate": 3.9081989681279365e-06,
      "loss": 0.0114,
      "step": 1869020
    },
    {
      "epoch": 3.0587249530318203,
      "grad_norm": 0.2232733964920044,
      "learning_rate": 3.908133075914419e-06,
      "loss": 0.0111,
      "step": 1869040
    },
    {
      "epoch": 3.058757683470474,
      "grad_norm": 0.5057460069656372,
      "learning_rate": 3.908067183700903e-06,
      "loss": 0.0097,
      "step": 1869060
    },
    {
      "epoch": 3.058790413909127,
      "grad_norm": 0.3354988098144531,
      "learning_rate": 3.9080012914873855e-06,
      "loss": 0.0125,
      "step": 1869080
    },
    {
      "epoch": 3.0588231443477807,
      "grad_norm": 0.2438124567270279,
      "learning_rate": 3.907935399273868e-06,
      "loss": 0.0089,
      "step": 1869100
    },
    {
      "epoch": 3.058855874786434,
      "grad_norm": 0.2639463543891907,
      "learning_rate": 3.907869507060351e-06,
      "loss": 0.0095,
      "step": 1869120
    },
    {
      "epoch": 3.0588886052250874,
      "grad_norm": 0.06290267407894135,
      "learning_rate": 3.907803614846834e-06,
      "loss": 0.0111,
      "step": 1869140
    },
    {
      "epoch": 3.0589213356637406,
      "grad_norm": 0.4158270061016083,
      "learning_rate": 3.9077377226333165e-06,
      "loss": 0.0164,
      "step": 1869160
    },
    {
      "epoch": 3.0589540661023937,
      "grad_norm": 0.15139858424663544,
      "learning_rate": 3.9076718304198e-06,
      "loss": 0.0177,
      "step": 1869180
    },
    {
      "epoch": 3.0589867965410473,
      "grad_norm": 0.2015226185321808,
      "learning_rate": 3.907605938206283e-06,
      "loss": 0.0128,
      "step": 1869200
    },
    {
      "epoch": 3.0590195269797005,
      "grad_norm": 0.6781410574913025,
      "learning_rate": 3.9075400459927656e-06,
      "loss": 0.0129,
      "step": 1869220
    },
    {
      "epoch": 3.059052257418354,
      "grad_norm": 0.0698433667421341,
      "learning_rate": 3.907474153779248e-06,
      "loss": 0.0095,
      "step": 1869240
    },
    {
      "epoch": 3.059084987857007,
      "grad_norm": 0.6067920923233032,
      "learning_rate": 3.907408261565731e-06,
      "loss": 0.0156,
      "step": 1869260
    },
    {
      "epoch": 3.059117718295661,
      "grad_norm": 0.18324197828769684,
      "learning_rate": 3.907342369352214e-06,
      "loss": 0.0069,
      "step": 1869280
    },
    {
      "epoch": 3.059150448734314,
      "grad_norm": 0.11256352066993713,
      "learning_rate": 3.9072764771386965e-06,
      "loss": 0.0103,
      "step": 1869300
    },
    {
      "epoch": 3.059183179172967,
      "grad_norm": 0.14310322701931,
      "learning_rate": 3.907210584925179e-06,
      "loss": 0.0104,
      "step": 1869320
    },
    {
      "epoch": 3.0592159096116207,
      "grad_norm": 0.23953032493591309,
      "learning_rate": 3.907144692711662e-06,
      "loss": 0.0069,
      "step": 1869340
    },
    {
      "epoch": 3.059248640050274,
      "grad_norm": 0.20743830502033234,
      "learning_rate": 3.907078800498146e-06,
      "loss": 0.0103,
      "step": 1869360
    },
    {
      "epoch": 3.0592813704889275,
      "grad_norm": 0.3756380081176758,
      "learning_rate": 3.907012908284628e-06,
      "loss": 0.0162,
      "step": 1869380
    },
    {
      "epoch": 3.0593141009275806,
      "grad_norm": 0.32793164253234863,
      "learning_rate": 3.906947016071111e-06,
      "loss": 0.0125,
      "step": 1869400
    },
    {
      "epoch": 3.0593468313662338,
      "grad_norm": 0.340964138507843,
      "learning_rate": 3.906881123857595e-06,
      "loss": 0.0085,
      "step": 1869420
    },
    {
      "epoch": 3.0593795618048873,
      "grad_norm": 0.4586142301559448,
      "learning_rate": 3.906815231644077e-06,
      "loss": 0.0107,
      "step": 1869440
    },
    {
      "epoch": 3.0594122922435405,
      "grad_norm": 0.23122869431972504,
      "learning_rate": 3.90674933943056e-06,
      "loss": 0.0093,
      "step": 1869460
    },
    {
      "epoch": 3.059445022682194,
      "grad_norm": 0.0877855196595192,
      "learning_rate": 3.906683447217043e-06,
      "loss": 0.012,
      "step": 1869480
    },
    {
      "epoch": 3.0594777531208472,
      "grad_norm": 0.2066429853439331,
      "learning_rate": 3.906617555003526e-06,
      "loss": 0.0101,
      "step": 1869500
    },
    {
      "epoch": 3.059510483559501,
      "grad_norm": 0.3033895194530487,
      "learning_rate": 3.906551662790008e-06,
      "loss": 0.0146,
      "step": 1869520
    },
    {
      "epoch": 3.059543213998154,
      "grad_norm": 0.053102292120456696,
      "learning_rate": 3.906485770576491e-06,
      "loss": 0.0099,
      "step": 1869540
    },
    {
      "epoch": 3.059575944436807,
      "grad_norm": 0.5938929915428162,
      "learning_rate": 3.906419878362974e-06,
      "loss": 0.0108,
      "step": 1869560
    },
    {
      "epoch": 3.0596086748754607,
      "grad_norm": 0.11241883039474487,
      "learning_rate": 3.906353986149457e-06,
      "loss": 0.0154,
      "step": 1869580
    },
    {
      "epoch": 3.059641405314114,
      "grad_norm": 0.06880328059196472,
      "learning_rate": 3.90628809393594e-06,
      "loss": 0.0083,
      "step": 1869600
    },
    {
      "epoch": 3.0596741357527675,
      "grad_norm": 0.10091321915388107,
      "learning_rate": 3.906222201722423e-06,
      "loss": 0.0092,
      "step": 1869620
    },
    {
      "epoch": 3.0597068661914206,
      "grad_norm": 0.4126010239124298,
      "learning_rate": 3.906156309508906e-06,
      "loss": 0.0095,
      "step": 1869640
    },
    {
      "epoch": 3.059739596630074,
      "grad_norm": 0.23674653470516205,
      "learning_rate": 3.906090417295388e-06,
      "loss": 0.0103,
      "step": 1869660
    },
    {
      "epoch": 3.0597723270687274,
      "grad_norm": 0.14979048073291779,
      "learning_rate": 3.906024525081871e-06,
      "loss": 0.0078,
      "step": 1869680
    },
    {
      "epoch": 3.0598050575073805,
      "grad_norm": 0.12817032635211945,
      "learning_rate": 3.905958632868354e-06,
      "loss": 0.0154,
      "step": 1869700
    },
    {
      "epoch": 3.059837787946034,
      "grad_norm": 0.11184199154376984,
      "learning_rate": 3.905892740654837e-06,
      "loss": 0.0106,
      "step": 1869720
    },
    {
      "epoch": 3.0598705183846873,
      "grad_norm": 0.690067708492279,
      "learning_rate": 3.90582684844132e-06,
      "loss": 0.0135,
      "step": 1869740
    },
    {
      "epoch": 3.059903248823341,
      "grad_norm": 0.4573006331920624,
      "learning_rate": 3.905760956227803e-06,
      "loss": 0.0122,
      "step": 1869760
    },
    {
      "epoch": 3.059935979261994,
      "grad_norm": 0.4105490446090698,
      "learning_rate": 3.905695064014286e-06,
      "loss": 0.0144,
      "step": 1869780
    },
    {
      "epoch": 3.0599687097006476,
      "grad_norm": 1.1264644861221313,
      "learning_rate": 3.905629171800769e-06,
      "loss": 0.0116,
      "step": 1869800
    },
    {
      "epoch": 3.0600014401393008,
      "grad_norm": 0.13778239488601685,
      "learning_rate": 3.905563279587252e-06,
      "loss": 0.0093,
      "step": 1869820
    },
    {
      "epoch": 3.060034170577954,
      "grad_norm": 0.1589585691690445,
      "learning_rate": 3.905497387373735e-06,
      "loss": 0.0121,
      "step": 1869840
    },
    {
      "epoch": 3.0600669010166075,
      "grad_norm": 0.3299814462661743,
      "learning_rate": 3.9054314951602175e-06,
      "loss": 0.0095,
      "step": 1869860
    },
    {
      "epoch": 3.0600996314552606,
      "grad_norm": 0.244212806224823,
      "learning_rate": 3.9053656029467e-06,
      "loss": 0.0258,
      "step": 1869880
    },
    {
      "epoch": 3.0601323618939142,
      "grad_norm": 1.31679105758667,
      "learning_rate": 3.905299710733183e-06,
      "loss": 0.0141,
      "step": 1869900
    },
    {
      "epoch": 3.0601650923325674,
      "grad_norm": 0.12556788325309753,
      "learning_rate": 3.905233818519666e-06,
      "loss": 0.0136,
      "step": 1869920
    },
    {
      "epoch": 3.060197822771221,
      "grad_norm": 0.48349907994270325,
      "learning_rate": 3.9051679263061484e-06,
      "loss": 0.0226,
      "step": 1869940
    },
    {
      "epoch": 3.060230553209874,
      "grad_norm": 0.3391910195350647,
      "learning_rate": 3.905102034092631e-06,
      "loss": 0.0106,
      "step": 1869960
    },
    {
      "epoch": 3.0602632836485273,
      "grad_norm": 0.46152275800704956,
      "learning_rate": 3.905036141879115e-06,
      "loss": 0.012,
      "step": 1869980
    },
    {
      "epoch": 3.060296014087181,
      "grad_norm": 0.3991076350212097,
      "learning_rate": 3.9049702496655975e-06,
      "loss": 0.0089,
      "step": 1870000
    },
    {
      "epoch": 3.060328744525834,
      "grad_norm": 0.4130169749259949,
      "learning_rate": 3.90490435745208e-06,
      "loss": 0.0147,
      "step": 1870020
    },
    {
      "epoch": 3.0603614749644876,
      "grad_norm": 0.832125723361969,
      "learning_rate": 3.904838465238563e-06,
      "loss": 0.0119,
      "step": 1870040
    },
    {
      "epoch": 3.0603942054031408,
      "grad_norm": 0.43866464495658875,
      "learning_rate": 3.904772573025046e-06,
      "loss": 0.0096,
      "step": 1870060
    },
    {
      "epoch": 3.060426935841794,
      "grad_norm": 1.6632131338119507,
      "learning_rate": 3.9047066808115285e-06,
      "loss": 0.0164,
      "step": 1870080
    },
    {
      "epoch": 3.0604596662804475,
      "grad_norm": 0.4665241241455078,
      "learning_rate": 3.904640788598011e-06,
      "loss": 0.0141,
      "step": 1870100
    },
    {
      "epoch": 3.0604923967191007,
      "grad_norm": 0.2076660692691803,
      "learning_rate": 3.904574896384495e-06,
      "loss": 0.0081,
      "step": 1870120
    },
    {
      "epoch": 3.0605251271577543,
      "grad_norm": 0.4984419345855713,
      "learning_rate": 3.9045090041709775e-06,
      "loss": 0.013,
      "step": 1870140
    },
    {
      "epoch": 3.0605578575964074,
      "grad_norm": 0.06441265344619751,
      "learning_rate": 3.90444311195746e-06,
      "loss": 0.0088,
      "step": 1870160
    },
    {
      "epoch": 3.060590588035061,
      "grad_norm": 0.20969700813293457,
      "learning_rate": 3.904377219743943e-06,
      "loss": 0.0076,
      "step": 1870180
    },
    {
      "epoch": 3.060623318473714,
      "grad_norm": 0.10557648539543152,
      "learning_rate": 3.904311327530427e-06,
      "loss": 0.0133,
      "step": 1870200
    },
    {
      "epoch": 3.0606560489123673,
      "grad_norm": 0.26713359355926514,
      "learning_rate": 3.904245435316909e-06,
      "loss": 0.0086,
      "step": 1870220
    },
    {
      "epoch": 3.060688779351021,
      "grad_norm": 0.4989575743675232,
      "learning_rate": 3.904179543103392e-06,
      "loss": 0.0129,
      "step": 1870240
    },
    {
      "epoch": 3.060721509789674,
      "grad_norm": 0.25216957926750183,
      "learning_rate": 3.904113650889875e-06,
      "loss": 0.0107,
      "step": 1870260
    },
    {
      "epoch": 3.0607542402283276,
      "grad_norm": 0.32503193616867065,
      "learning_rate": 3.9040477586763575e-06,
      "loss": 0.0089,
      "step": 1870280
    },
    {
      "epoch": 3.060786970666981,
      "grad_norm": 0.27820703387260437,
      "learning_rate": 3.90398186646284e-06,
      "loss": 0.0133,
      "step": 1870300
    },
    {
      "epoch": 3.0608197011056344,
      "grad_norm": 0.14327701926231384,
      "learning_rate": 3.903915974249323e-06,
      "loss": 0.0127,
      "step": 1870320
    },
    {
      "epoch": 3.0608524315442875,
      "grad_norm": 0.08028849959373474,
      "learning_rate": 3.903850082035806e-06,
      "loss": 0.0145,
      "step": 1870340
    },
    {
      "epoch": 3.0608851619829407,
      "grad_norm": 0.21896134316921234,
      "learning_rate": 3.9037841898222885e-06,
      "loss": 0.0159,
      "step": 1870360
    },
    {
      "epoch": 3.0609178924215943,
      "grad_norm": 0.48709291219711304,
      "learning_rate": 3.903718297608772e-06,
      "loss": 0.0113,
      "step": 1870380
    },
    {
      "epoch": 3.0609506228602474,
      "grad_norm": 0.5244449377059937,
      "learning_rate": 3.903652405395255e-06,
      "loss": 0.0073,
      "step": 1870400
    },
    {
      "epoch": 3.060983353298901,
      "grad_norm": 0.7428314685821533,
      "learning_rate": 3.9035865131817376e-06,
      "loss": 0.0119,
      "step": 1870420
    },
    {
      "epoch": 3.061016083737554,
      "grad_norm": 0.6988013982772827,
      "learning_rate": 3.90352062096822e-06,
      "loss": 0.0099,
      "step": 1870440
    },
    {
      "epoch": 3.0610488141762078,
      "grad_norm": 0.13334892690181732,
      "learning_rate": 3.903454728754703e-06,
      "loss": 0.0149,
      "step": 1870460
    },
    {
      "epoch": 3.061081544614861,
      "grad_norm": 0.20275242626667023,
      "learning_rate": 3.903388836541187e-06,
      "loss": 0.0103,
      "step": 1870480
    },
    {
      "epoch": 3.061114275053514,
      "grad_norm": 0.2599702477455139,
      "learning_rate": 3.903322944327669e-06,
      "loss": 0.0087,
      "step": 1870500
    },
    {
      "epoch": 3.0611470054921677,
      "grad_norm": 0.21602340042591095,
      "learning_rate": 3.903257052114152e-06,
      "loss": 0.0089,
      "step": 1870520
    },
    {
      "epoch": 3.061179735930821,
      "grad_norm": 0.2060203105211258,
      "learning_rate": 3.903191159900635e-06,
      "loss": 0.0072,
      "step": 1870540
    },
    {
      "epoch": 3.0612124663694744,
      "grad_norm": 0.05841468647122383,
      "learning_rate": 3.903125267687118e-06,
      "loss": 0.0102,
      "step": 1870560
    },
    {
      "epoch": 3.0612451968081276,
      "grad_norm": 0.22874319553375244,
      "learning_rate": 3.9030593754736e-06,
      "loss": 0.0085,
      "step": 1870580
    },
    {
      "epoch": 3.061277927246781,
      "grad_norm": 0.3050835132598877,
      "learning_rate": 3.902993483260084e-06,
      "loss": 0.0112,
      "step": 1870600
    },
    {
      "epoch": 3.0613106576854343,
      "grad_norm": 0.23684902489185333,
      "learning_rate": 3.902927591046567e-06,
      "loss": 0.0089,
      "step": 1870620
    },
    {
      "epoch": 3.0613433881240875,
      "grad_norm": 0.1617536097764969,
      "learning_rate": 3.902861698833049e-06,
      "loss": 0.0072,
      "step": 1870640
    },
    {
      "epoch": 3.061376118562741,
      "grad_norm": 0.2140110582113266,
      "learning_rate": 3.902795806619532e-06,
      "loss": 0.0092,
      "step": 1870660
    },
    {
      "epoch": 3.061408849001394,
      "grad_norm": 0.13520421087741852,
      "learning_rate": 3.902729914406015e-06,
      "loss": 0.014,
      "step": 1870680
    },
    {
      "epoch": 3.061441579440048,
      "grad_norm": 0.463159441947937,
      "learning_rate": 3.902664022192498e-06,
      "loss": 0.0133,
      "step": 1870700
    },
    {
      "epoch": 3.061474309878701,
      "grad_norm": 0.5418708920478821,
      "learning_rate": 3.90259812997898e-06,
      "loss": 0.0134,
      "step": 1870720
    },
    {
      "epoch": 3.0615070403173545,
      "grad_norm": 0.5983135104179382,
      "learning_rate": 3.902532237765463e-06,
      "loss": 0.0136,
      "step": 1870740
    },
    {
      "epoch": 3.0615397707560077,
      "grad_norm": 0.21283979713916779,
      "learning_rate": 3.902466345551946e-06,
      "loss": 0.0078,
      "step": 1870760
    },
    {
      "epoch": 3.061572501194661,
      "grad_norm": 0.7591607570648193,
      "learning_rate": 3.9024004533384294e-06,
      "loss": 0.0107,
      "step": 1870780
    },
    {
      "epoch": 3.0616052316333144,
      "grad_norm": 0.724016547203064,
      "learning_rate": 3.902334561124912e-06,
      "loss": 0.0177,
      "step": 1870800
    },
    {
      "epoch": 3.0616379620719676,
      "grad_norm": 0.16236717998981476,
      "learning_rate": 3.902268668911396e-06,
      "loss": 0.0123,
      "step": 1870820
    },
    {
      "epoch": 3.061670692510621,
      "grad_norm": 0.16457773745059967,
      "learning_rate": 3.9022027766978785e-06,
      "loss": 0.0093,
      "step": 1870840
    },
    {
      "epoch": 3.0617034229492743,
      "grad_norm": 0.7938358783721924,
      "learning_rate": 3.902136884484361e-06,
      "loss": 0.0165,
      "step": 1870860
    },
    {
      "epoch": 3.061736153387928,
      "grad_norm": 0.13283675909042358,
      "learning_rate": 3.902070992270844e-06,
      "loss": 0.011,
      "step": 1870880
    },
    {
      "epoch": 3.061768883826581,
      "grad_norm": 0.1391841024160385,
      "learning_rate": 3.902005100057327e-06,
      "loss": 0.0114,
      "step": 1870900
    },
    {
      "epoch": 3.0618016142652342,
      "grad_norm": 0.12091906368732452,
      "learning_rate": 3.9019392078438095e-06,
      "loss": 0.0076,
      "step": 1870920
    },
    {
      "epoch": 3.061834344703888,
      "grad_norm": 0.33700650930404663,
      "learning_rate": 3.901873315630292e-06,
      "loss": 0.0111,
      "step": 1870940
    },
    {
      "epoch": 3.061867075142541,
      "grad_norm": 0.28941142559051514,
      "learning_rate": 3.901807423416775e-06,
      "loss": 0.0134,
      "step": 1870960
    },
    {
      "epoch": 3.0618998055811946,
      "grad_norm": 0.7389898896217346,
      "learning_rate": 3.901741531203258e-06,
      "loss": 0.0105,
      "step": 1870980
    },
    {
      "epoch": 3.0619325360198477,
      "grad_norm": 0.17240215837955475,
      "learning_rate": 3.901675638989741e-06,
      "loss": 0.0109,
      "step": 1871000
    },
    {
      "epoch": 3.061965266458501,
      "grad_norm": 0.630658745765686,
      "learning_rate": 3.901609746776224e-06,
      "loss": 0.0084,
      "step": 1871020
    },
    {
      "epoch": 3.0619979968971545,
      "grad_norm": 0.17788457870483398,
      "learning_rate": 3.901543854562707e-06,
      "loss": 0.0108,
      "step": 1871040
    },
    {
      "epoch": 3.0620307273358076,
      "grad_norm": 0.7613242268562317,
      "learning_rate": 3.9014779623491895e-06,
      "loss": 0.0156,
      "step": 1871060
    },
    {
      "epoch": 3.062063457774461,
      "grad_norm": 0.22842448949813843,
      "learning_rate": 3.901412070135672e-06,
      "loss": 0.0095,
      "step": 1871080
    },
    {
      "epoch": 3.0620961882131144,
      "grad_norm": 0.06835374981164932,
      "learning_rate": 3.901346177922155e-06,
      "loss": 0.0101,
      "step": 1871100
    },
    {
      "epoch": 3.062128918651768,
      "grad_norm": 0.6076014041900635,
      "learning_rate": 3.901280285708638e-06,
      "loss": 0.0146,
      "step": 1871120
    },
    {
      "epoch": 3.062161649090421,
      "grad_norm": 0.22850853204727173,
      "learning_rate": 3.9012143934951204e-06,
      "loss": 0.0114,
      "step": 1871140
    },
    {
      "epoch": 3.0621943795290743,
      "grad_norm": 0.08959591388702393,
      "learning_rate": 3.901148501281604e-06,
      "loss": 0.0094,
      "step": 1871160
    },
    {
      "epoch": 3.062227109967728,
      "grad_norm": 0.17933233082294464,
      "learning_rate": 3.901082609068087e-06,
      "loss": 0.0176,
      "step": 1871180
    },
    {
      "epoch": 3.062259840406381,
      "grad_norm": 0.31138336658477783,
      "learning_rate": 3.9010167168545695e-06,
      "loss": 0.0074,
      "step": 1871200
    },
    {
      "epoch": 3.0622925708450346,
      "grad_norm": 0.4931463301181793,
      "learning_rate": 3.900950824641053e-06,
      "loss": 0.0183,
      "step": 1871220
    },
    {
      "epoch": 3.0623253012836877,
      "grad_norm": 0.21579959988594055,
      "learning_rate": 3.900884932427536e-06,
      "loss": 0.0174,
      "step": 1871240
    },
    {
      "epoch": 3.0623580317223413,
      "grad_norm": 0.13388489186763763,
      "learning_rate": 3.9008190402140186e-06,
      "loss": 0.0096,
      "step": 1871260
    },
    {
      "epoch": 3.0623907621609945,
      "grad_norm": 0.2361477166414261,
      "learning_rate": 3.900753148000501e-06,
      "loss": 0.0089,
      "step": 1871280
    },
    {
      "epoch": 3.0624234925996476,
      "grad_norm": 0.19352422654628754,
      "learning_rate": 3.900687255786984e-06,
      "loss": 0.0128,
      "step": 1871300
    },
    {
      "epoch": 3.0624562230383012,
      "grad_norm": 0.2768021523952484,
      "learning_rate": 3.900621363573467e-06,
      "loss": 0.0134,
      "step": 1871320
    },
    {
      "epoch": 3.0624889534769544,
      "grad_norm": 0.14480184018611908,
      "learning_rate": 3.9005554713599495e-06,
      "loss": 0.0123,
      "step": 1871340
    },
    {
      "epoch": 3.062521683915608,
      "grad_norm": 0.11773200333118439,
      "learning_rate": 3.900489579146432e-06,
      "loss": 0.0094,
      "step": 1871360
    },
    {
      "epoch": 3.062554414354261,
      "grad_norm": 0.39745011925697327,
      "learning_rate": 3.900423686932915e-06,
      "loss": 0.0106,
      "step": 1871380
    },
    {
      "epoch": 3.0625871447929147,
      "grad_norm": 0.1674344688653946,
      "learning_rate": 3.900357794719399e-06,
      "loss": 0.013,
      "step": 1871400
    },
    {
      "epoch": 3.062619875231568,
      "grad_norm": 0.21075846254825592,
      "learning_rate": 3.900291902505881e-06,
      "loss": 0.011,
      "step": 1871420
    },
    {
      "epoch": 3.062652605670221,
      "grad_norm": 0.2450537234544754,
      "learning_rate": 3.900226010292364e-06,
      "loss": 0.0108,
      "step": 1871440
    },
    {
      "epoch": 3.0626853361088746,
      "grad_norm": 0.12834955751895905,
      "learning_rate": 3.900160118078847e-06,
      "loss": 0.0102,
      "step": 1871460
    },
    {
      "epoch": 3.0627180665475278,
      "grad_norm": 0.6503484845161438,
      "learning_rate": 3.9000942258653296e-06,
      "loss": 0.0114,
      "step": 1871480
    },
    {
      "epoch": 3.0627507969861814,
      "grad_norm": 0.16961055994033813,
      "learning_rate": 3.900028333651812e-06,
      "loss": 0.01,
      "step": 1871500
    },
    {
      "epoch": 3.0627835274248345,
      "grad_norm": 0.15109087526798248,
      "learning_rate": 3.899962441438296e-06,
      "loss": 0.0091,
      "step": 1871520
    },
    {
      "epoch": 3.062816257863488,
      "grad_norm": 0.4249444007873535,
      "learning_rate": 3.899896549224779e-06,
      "loss": 0.0121,
      "step": 1871540
    },
    {
      "epoch": 3.0628489883021413,
      "grad_norm": 0.5229130983352661,
      "learning_rate": 3.899830657011261e-06,
      "loss": 0.0128,
      "step": 1871560
    },
    {
      "epoch": 3.0628817187407944,
      "grad_norm": 0.16982613503932953,
      "learning_rate": 3.899764764797744e-06,
      "loss": 0.0076,
      "step": 1871580
    },
    {
      "epoch": 3.062914449179448,
      "grad_norm": 0.07934301346540451,
      "learning_rate": 3.899698872584227e-06,
      "loss": 0.0096,
      "step": 1871600
    },
    {
      "epoch": 3.062947179618101,
      "grad_norm": 0.46476811170578003,
      "learning_rate": 3.8996329803707104e-06,
      "loss": 0.0203,
      "step": 1871620
    },
    {
      "epoch": 3.0629799100567547,
      "grad_norm": 0.38441020250320435,
      "learning_rate": 3.899567088157193e-06,
      "loss": 0.0122,
      "step": 1871640
    },
    {
      "epoch": 3.063012640495408,
      "grad_norm": 0.06950423121452332,
      "learning_rate": 3.899501195943676e-06,
      "loss": 0.0108,
      "step": 1871660
    },
    {
      "epoch": 3.063045370934061,
      "grad_norm": 0.20428305864334106,
      "learning_rate": 3.899435303730159e-06,
      "loss": 0.0149,
      "step": 1871680
    },
    {
      "epoch": 3.0630781013727146,
      "grad_norm": 0.1417386829853058,
      "learning_rate": 3.899369411516641e-06,
      "loss": 0.0153,
      "step": 1871700
    },
    {
      "epoch": 3.063110831811368,
      "grad_norm": 0.33558717370033264,
      "learning_rate": 3.899303519303124e-06,
      "loss": 0.0101,
      "step": 1871720
    },
    {
      "epoch": 3.0631435622500214,
      "grad_norm": 0.36055058240890503,
      "learning_rate": 3.899237627089607e-06,
      "loss": 0.0148,
      "step": 1871740
    },
    {
      "epoch": 3.0631762926886745,
      "grad_norm": 0.6030104160308838,
      "learning_rate": 3.89917173487609e-06,
      "loss": 0.0118,
      "step": 1871760
    },
    {
      "epoch": 3.063209023127328,
      "grad_norm": 0.18879450857639313,
      "learning_rate": 3.899105842662572e-06,
      "loss": 0.0094,
      "step": 1871780
    },
    {
      "epoch": 3.0632417535659813,
      "grad_norm": 0.45924997329711914,
      "learning_rate": 3.899039950449056e-06,
      "loss": 0.0126,
      "step": 1871800
    },
    {
      "epoch": 3.0632744840046344,
      "grad_norm": 0.32555171847343445,
      "learning_rate": 3.898974058235539e-06,
      "loss": 0.01,
      "step": 1871820
    },
    {
      "epoch": 3.063307214443288,
      "grad_norm": 0.7538012266159058,
      "learning_rate": 3.898908166022021e-06,
      "loss": 0.0106,
      "step": 1871840
    },
    {
      "epoch": 3.063339944881941,
      "grad_norm": 0.1633729487657547,
      "learning_rate": 3.898842273808504e-06,
      "loss": 0.0111,
      "step": 1871860
    },
    {
      "epoch": 3.0633726753205948,
      "grad_norm": 0.43321049213409424,
      "learning_rate": 3.898776381594988e-06,
      "loss": 0.0154,
      "step": 1871880
    },
    {
      "epoch": 3.063405405759248,
      "grad_norm": 0.4779670834541321,
      "learning_rate": 3.8987104893814705e-06,
      "loss": 0.0157,
      "step": 1871900
    },
    {
      "epoch": 3.0634381361979015,
      "grad_norm": 0.33708059787750244,
      "learning_rate": 3.898644597167953e-06,
      "loss": 0.0106,
      "step": 1871920
    },
    {
      "epoch": 3.0634708666365547,
      "grad_norm": 0.19308780133724213,
      "learning_rate": 3.898578704954436e-06,
      "loss": 0.0121,
      "step": 1871940
    },
    {
      "epoch": 3.063503597075208,
      "grad_norm": 0.21832862496376038,
      "learning_rate": 3.898512812740919e-06,
      "loss": 0.0128,
      "step": 1871960
    },
    {
      "epoch": 3.0635363275138614,
      "grad_norm": 1.0156803131103516,
      "learning_rate": 3.8984469205274014e-06,
      "loss": 0.018,
      "step": 1871980
    },
    {
      "epoch": 3.0635690579525146,
      "grad_norm": 0.32410579919815063,
      "learning_rate": 3.898381028313884e-06,
      "loss": 0.0125,
      "step": 1872000
    },
    {
      "epoch": 3.063601788391168,
      "grad_norm": 0.29925790429115295,
      "learning_rate": 3.898315136100368e-06,
      "loss": 0.0087,
      "step": 1872020
    },
    {
      "epoch": 3.0636345188298213,
      "grad_norm": 0.4160052239894867,
      "learning_rate": 3.8982492438868505e-06,
      "loss": 0.0091,
      "step": 1872040
    },
    {
      "epoch": 3.063667249268475,
      "grad_norm": 0.17689500749111176,
      "learning_rate": 3.898183351673333e-06,
      "loss": 0.0115,
      "step": 1872060
    },
    {
      "epoch": 3.063699979707128,
      "grad_norm": 0.18207858502864838,
      "learning_rate": 3.898117459459816e-06,
      "loss": 0.0096,
      "step": 1872080
    },
    {
      "epoch": 3.063732710145781,
      "grad_norm": 0.38082098960876465,
      "learning_rate": 3.898051567246299e-06,
      "loss": 0.0126,
      "step": 1872100
    },
    {
      "epoch": 3.063765440584435,
      "grad_norm": 1.5393946170806885,
      "learning_rate": 3.8979856750327815e-06,
      "loss": 0.0101,
      "step": 1872120
    },
    {
      "epoch": 3.063798171023088,
      "grad_norm": 0.6854410171508789,
      "learning_rate": 3.897919782819264e-06,
      "loss": 0.0117,
      "step": 1872140
    },
    {
      "epoch": 3.0638309014617415,
      "grad_norm": 0.570019006729126,
      "learning_rate": 3.897853890605747e-06,
      "loss": 0.0086,
      "step": 1872160
    },
    {
      "epoch": 3.0638636319003947,
      "grad_norm": 0.11845757067203522,
      "learning_rate": 3.89778799839223e-06,
      "loss": 0.0091,
      "step": 1872180
    },
    {
      "epoch": 3.0638963623390483,
      "grad_norm": 0.19479437172412872,
      "learning_rate": 3.897722106178713e-06,
      "loss": 0.0078,
      "step": 1872200
    },
    {
      "epoch": 3.0639290927777014,
      "grad_norm": 0.2033255249261856,
      "learning_rate": 3.897656213965196e-06,
      "loss": 0.0104,
      "step": 1872220
    },
    {
      "epoch": 3.0639618232163546,
      "grad_norm": 0.41691264510154724,
      "learning_rate": 3.89759032175168e-06,
      "loss": 0.0171,
      "step": 1872240
    },
    {
      "epoch": 3.063994553655008,
      "grad_norm": 0.23285017907619476,
      "learning_rate": 3.897524429538162e-06,
      "loss": 0.0125,
      "step": 1872260
    },
    {
      "epoch": 3.0640272840936613,
      "grad_norm": 0.27060386538505554,
      "learning_rate": 3.897458537324645e-06,
      "loss": 0.0123,
      "step": 1872280
    },
    {
      "epoch": 3.064060014532315,
      "grad_norm": 0.12667766213417053,
      "learning_rate": 3.897392645111128e-06,
      "loss": 0.0141,
      "step": 1872300
    },
    {
      "epoch": 3.064092744970968,
      "grad_norm": 0.1925288438796997,
      "learning_rate": 3.8973267528976106e-06,
      "loss": 0.0138,
      "step": 1872320
    },
    {
      "epoch": 3.0641254754096217,
      "grad_norm": 0.41943836212158203,
      "learning_rate": 3.897260860684093e-06,
      "loss": 0.0169,
      "step": 1872340
    },
    {
      "epoch": 3.064158205848275,
      "grad_norm": 0.18734005093574524,
      "learning_rate": 3.897194968470576e-06,
      "loss": 0.0116,
      "step": 1872360
    },
    {
      "epoch": 3.064190936286928,
      "grad_norm": 0.4200570285320282,
      "learning_rate": 3.897129076257059e-06,
      "loss": 0.0143,
      "step": 1872380
    },
    {
      "epoch": 3.0642236667255816,
      "grad_norm": 0.3614893853664398,
      "learning_rate": 3.8970631840435415e-06,
      "loss": 0.0131,
      "step": 1872400
    },
    {
      "epoch": 3.0642563971642347,
      "grad_norm": 0.46990451216697693,
      "learning_rate": 3.896997291830025e-06,
      "loss": 0.0158,
      "step": 1872420
    },
    {
      "epoch": 3.0642891276028883,
      "grad_norm": 0.22109422087669373,
      "learning_rate": 3.896931399616508e-06,
      "loss": 0.0087,
      "step": 1872440
    },
    {
      "epoch": 3.0643218580415414,
      "grad_norm": 0.2537526786327362,
      "learning_rate": 3.8968655074029906e-06,
      "loss": 0.0094,
      "step": 1872460
    },
    {
      "epoch": 3.0643545884801946,
      "grad_norm": 0.15620283782482147,
      "learning_rate": 3.896799615189473e-06,
      "loss": 0.0111,
      "step": 1872480
    },
    {
      "epoch": 3.064387318918848,
      "grad_norm": 0.47902539372444153,
      "learning_rate": 3.896733722975956e-06,
      "loss": 0.0163,
      "step": 1872500
    },
    {
      "epoch": 3.0644200493575013,
      "grad_norm": 0.11534940451383591,
      "learning_rate": 3.896667830762439e-06,
      "loss": 0.0121,
      "step": 1872520
    },
    {
      "epoch": 3.064452779796155,
      "grad_norm": 0.17714941501617432,
      "learning_rate": 3.8966019385489215e-06,
      "loss": 0.016,
      "step": 1872540
    },
    {
      "epoch": 3.064485510234808,
      "grad_norm": 0.3355475962162018,
      "learning_rate": 3.896536046335404e-06,
      "loss": 0.006,
      "step": 1872560
    },
    {
      "epoch": 3.0645182406734617,
      "grad_norm": 0.3946157395839691,
      "learning_rate": 3.896470154121888e-06,
      "loss": 0.0167,
      "step": 1872580
    },
    {
      "epoch": 3.064550971112115,
      "grad_norm": 0.19022703170776367,
      "learning_rate": 3.896404261908371e-06,
      "loss": 0.0124,
      "step": 1872600
    },
    {
      "epoch": 3.064583701550768,
      "grad_norm": 0.3349504768848419,
      "learning_rate": 3.896338369694853e-06,
      "loss": 0.0108,
      "step": 1872620
    },
    {
      "epoch": 3.0646164319894216,
      "grad_norm": 0.23767229914665222,
      "learning_rate": 3.896272477481337e-06,
      "loss": 0.0131,
      "step": 1872640
    },
    {
      "epoch": 3.0646491624280747,
      "grad_norm": 0.2908186912536621,
      "learning_rate": 3.89620658526782e-06,
      "loss": 0.0158,
      "step": 1872660
    },
    {
      "epoch": 3.0646818928667283,
      "grad_norm": 0.1923680603504181,
      "learning_rate": 3.896140693054302e-06,
      "loss": 0.009,
      "step": 1872680
    },
    {
      "epoch": 3.0647146233053815,
      "grad_norm": 0.19018039107322693,
      "learning_rate": 3.896074800840785e-06,
      "loss": 0.0106,
      "step": 1872700
    },
    {
      "epoch": 3.064747353744035,
      "grad_norm": 0.08996211737394333,
      "learning_rate": 3.896008908627268e-06,
      "loss": 0.0118,
      "step": 1872720
    },
    {
      "epoch": 3.064780084182688,
      "grad_norm": 0.25810232758522034,
      "learning_rate": 3.895943016413751e-06,
      "loss": 0.0132,
      "step": 1872740
    },
    {
      "epoch": 3.0648128146213414,
      "grad_norm": 0.3120914101600647,
      "learning_rate": 3.895877124200233e-06,
      "loss": 0.0144,
      "step": 1872760
    },
    {
      "epoch": 3.064845545059995,
      "grad_norm": 0.34410151839256287,
      "learning_rate": 3.895811231986716e-06,
      "loss": 0.0148,
      "step": 1872780
    },
    {
      "epoch": 3.064878275498648,
      "grad_norm": 0.18789783120155334,
      "learning_rate": 3.895745339773199e-06,
      "loss": 0.0077,
      "step": 1872800
    },
    {
      "epoch": 3.0649110059373017,
      "grad_norm": 0.3061777353286743,
      "learning_rate": 3.8956794475596824e-06,
      "loss": 0.0075,
      "step": 1872820
    },
    {
      "epoch": 3.064943736375955,
      "grad_norm": 0.1384255737066269,
      "learning_rate": 3.895613555346165e-06,
      "loss": 0.0132,
      "step": 1872840
    },
    {
      "epoch": 3.0649764668146084,
      "grad_norm": 0.3678017556667328,
      "learning_rate": 3.895547663132648e-06,
      "loss": 0.0119,
      "step": 1872860
    },
    {
      "epoch": 3.0650091972532616,
      "grad_norm": 0.18725642561912537,
      "learning_rate": 3.895481770919131e-06,
      "loss": 0.0075,
      "step": 1872880
    },
    {
      "epoch": 3.0650419276919147,
      "grad_norm": 0.332044392824173,
      "learning_rate": 3.895415878705613e-06,
      "loss": 0.0108,
      "step": 1872900
    },
    {
      "epoch": 3.0650746581305683,
      "grad_norm": 0.22631427645683289,
      "learning_rate": 3.895349986492096e-06,
      "loss": 0.0149,
      "step": 1872920
    },
    {
      "epoch": 3.0651073885692215,
      "grad_norm": 0.19065611064434052,
      "learning_rate": 3.89528409427858e-06,
      "loss": 0.012,
      "step": 1872940
    },
    {
      "epoch": 3.065140119007875,
      "grad_norm": 0.15996648371219635,
      "learning_rate": 3.8952182020650625e-06,
      "loss": 0.0125,
      "step": 1872960
    },
    {
      "epoch": 3.0651728494465282,
      "grad_norm": 0.1266295313835144,
      "learning_rate": 3.895152309851545e-06,
      "loss": 0.0117,
      "step": 1872980
    },
    {
      "epoch": 3.065205579885182,
      "grad_norm": 0.18767140805721283,
      "learning_rate": 3.895086417638028e-06,
      "loss": 0.0125,
      "step": 1873000
    },
    {
      "epoch": 3.065238310323835,
      "grad_norm": 0.2043369561433792,
      "learning_rate": 3.895020525424511e-06,
      "loss": 0.0101,
      "step": 1873020
    },
    {
      "epoch": 3.065271040762488,
      "grad_norm": 0.5047414898872375,
      "learning_rate": 3.894954633210994e-06,
      "loss": 0.0089,
      "step": 1873040
    },
    {
      "epoch": 3.0653037712011417,
      "grad_norm": 0.1331505924463272,
      "learning_rate": 3.894888740997477e-06,
      "loss": 0.0136,
      "step": 1873060
    },
    {
      "epoch": 3.065336501639795,
      "grad_norm": 0.10392775386571884,
      "learning_rate": 3.89482284878396e-06,
      "loss": 0.0142,
      "step": 1873080
    },
    {
      "epoch": 3.0653692320784485,
      "grad_norm": 0.08137775212526321,
      "learning_rate": 3.8947569565704425e-06,
      "loss": 0.0098,
      "step": 1873100
    },
    {
      "epoch": 3.0654019625171016,
      "grad_norm": 0.6375195980072021,
      "learning_rate": 3.894691064356925e-06,
      "loss": 0.017,
      "step": 1873120
    },
    {
      "epoch": 3.0654346929557548,
      "grad_norm": 0.13896097242832184,
      "learning_rate": 3.894625172143408e-06,
      "loss": 0.0118,
      "step": 1873140
    },
    {
      "epoch": 3.0654674233944084,
      "grad_norm": 0.2505725622177124,
      "learning_rate": 3.894559279929891e-06,
      "loss": 0.0157,
      "step": 1873160
    },
    {
      "epoch": 3.0655001538330615,
      "grad_norm": 0.18980415165424347,
      "learning_rate": 3.8944933877163734e-06,
      "loss": 0.0081,
      "step": 1873180
    },
    {
      "epoch": 3.065532884271715,
      "grad_norm": 0.6908071041107178,
      "learning_rate": 3.894427495502856e-06,
      "loss": 0.0156,
      "step": 1873200
    },
    {
      "epoch": 3.0655656147103683,
      "grad_norm": 0.2365199774503708,
      "learning_rate": 3.89436160328934e-06,
      "loss": 0.0087,
      "step": 1873220
    },
    {
      "epoch": 3.065598345149022,
      "grad_norm": 0.24599500000476837,
      "learning_rate": 3.8942957110758225e-06,
      "loss": 0.0175,
      "step": 1873240
    },
    {
      "epoch": 3.065631075587675,
      "grad_norm": 0.5425105690956116,
      "learning_rate": 3.894229818862305e-06,
      "loss": 0.0174,
      "step": 1873260
    },
    {
      "epoch": 3.065663806026328,
      "grad_norm": 0.15079519152641296,
      "learning_rate": 3.894163926648789e-06,
      "loss": 0.0105,
      "step": 1873280
    },
    {
      "epoch": 3.0656965364649817,
      "grad_norm": 0.33118078112602234,
      "learning_rate": 3.8940980344352716e-06,
      "loss": 0.0104,
      "step": 1873300
    },
    {
      "epoch": 3.065729266903635,
      "grad_norm": 0.2669590711593628,
      "learning_rate": 3.894032142221754e-06,
      "loss": 0.0118,
      "step": 1873320
    },
    {
      "epoch": 3.0657619973422885,
      "grad_norm": 0.23655346035957336,
      "learning_rate": 3.893966250008237e-06,
      "loss": 0.0088,
      "step": 1873340
    },
    {
      "epoch": 3.0657947277809416,
      "grad_norm": 0.17698688805103302,
      "learning_rate": 3.89390035779472e-06,
      "loss": 0.0105,
      "step": 1873360
    },
    {
      "epoch": 3.0658274582195952,
      "grad_norm": 0.47305911779403687,
      "learning_rate": 3.8938344655812025e-06,
      "loss": 0.0117,
      "step": 1873380
    },
    {
      "epoch": 3.0658601886582484,
      "grad_norm": 0.23802445828914642,
      "learning_rate": 3.893768573367685e-06,
      "loss": 0.0084,
      "step": 1873400
    },
    {
      "epoch": 3.0658929190969015,
      "grad_norm": 0.29455652832984924,
      "learning_rate": 3.893702681154168e-06,
      "loss": 0.0114,
      "step": 1873420
    },
    {
      "epoch": 3.065925649535555,
      "grad_norm": 0.20278093218803406,
      "learning_rate": 3.893636788940652e-06,
      "loss": 0.0122,
      "step": 1873440
    },
    {
      "epoch": 3.0659583799742083,
      "grad_norm": 0.08183779567480087,
      "learning_rate": 3.893570896727134e-06,
      "loss": 0.0098,
      "step": 1873460
    },
    {
      "epoch": 3.065991110412862,
      "grad_norm": 0.2868090569972992,
      "learning_rate": 3.893505004513617e-06,
      "loss": 0.0072,
      "step": 1873480
    },
    {
      "epoch": 3.066023840851515,
      "grad_norm": 0.2481311410665512,
      "learning_rate": 3.8934391123001e-06,
      "loss": 0.0123,
      "step": 1873500
    },
    {
      "epoch": 3.0660565712901686,
      "grad_norm": 0.24564631283283234,
      "learning_rate": 3.8933732200865826e-06,
      "loss": 0.0099,
      "step": 1873520
    },
    {
      "epoch": 3.0660893017288218,
      "grad_norm": 0.16241587698459625,
      "learning_rate": 3.893307327873065e-06,
      "loss": 0.0073,
      "step": 1873540
    },
    {
      "epoch": 3.066122032167475,
      "grad_norm": 0.42221856117248535,
      "learning_rate": 3.893241435659548e-06,
      "loss": 0.0121,
      "step": 1873560
    },
    {
      "epoch": 3.0661547626061285,
      "grad_norm": 0.14742165803909302,
      "learning_rate": 3.893175543446031e-06,
      "loss": 0.0086,
      "step": 1873580
    },
    {
      "epoch": 3.0661874930447817,
      "grad_norm": 0.10129227489233017,
      "learning_rate": 3.893109651232514e-06,
      "loss": 0.0106,
      "step": 1873600
    },
    {
      "epoch": 3.0662202234834353,
      "grad_norm": 0.32557860016822815,
      "learning_rate": 3.893043759018997e-06,
      "loss": 0.0085,
      "step": 1873620
    },
    {
      "epoch": 3.0662529539220884,
      "grad_norm": 0.34938520193099976,
      "learning_rate": 3.89297786680548e-06,
      "loss": 0.0116,
      "step": 1873640
    },
    {
      "epoch": 3.066285684360742,
      "grad_norm": 0.0851803794503212,
      "learning_rate": 3.8929119745919634e-06,
      "loss": 0.008,
      "step": 1873660
    },
    {
      "epoch": 3.066318414799395,
      "grad_norm": 0.5854329466819763,
      "learning_rate": 3.892846082378446e-06,
      "loss": 0.0129,
      "step": 1873680
    },
    {
      "epoch": 3.0663511452380483,
      "grad_norm": 0.48044446110725403,
      "learning_rate": 3.892780190164929e-06,
      "loss": 0.01,
      "step": 1873700
    },
    {
      "epoch": 3.066383875676702,
      "grad_norm": 0.49238309264183044,
      "learning_rate": 3.892714297951412e-06,
      "loss": 0.0128,
      "step": 1873720
    },
    {
      "epoch": 3.066416606115355,
      "grad_norm": 0.4364241659641266,
      "learning_rate": 3.892648405737894e-06,
      "loss": 0.01,
      "step": 1873740
    },
    {
      "epoch": 3.0664493365540086,
      "grad_norm": 0.155767560005188,
      "learning_rate": 3.892582513524377e-06,
      "loss": 0.0106,
      "step": 1873760
    },
    {
      "epoch": 3.066482066992662,
      "grad_norm": 0.08348778635263443,
      "learning_rate": 3.89251662131086e-06,
      "loss": 0.0109,
      "step": 1873780
    },
    {
      "epoch": 3.0665147974313154,
      "grad_norm": 0.18251323699951172,
      "learning_rate": 3.892450729097343e-06,
      "loss": 0.0106,
      "step": 1873800
    },
    {
      "epoch": 3.0665475278699685,
      "grad_norm": 0.2768222987651825,
      "learning_rate": 3.892384836883825e-06,
      "loss": 0.0131,
      "step": 1873820
    },
    {
      "epoch": 3.0665802583086217,
      "grad_norm": 0.9131230115890503,
      "learning_rate": 3.892318944670309e-06,
      "loss": 0.0121,
      "step": 1873840
    },
    {
      "epoch": 3.0666129887472753,
      "grad_norm": 0.4944453239440918,
      "learning_rate": 3.892253052456792e-06,
      "loss": 0.0129,
      "step": 1873860
    },
    {
      "epoch": 3.0666457191859284,
      "grad_norm": 0.2347993105649948,
      "learning_rate": 3.892187160243274e-06,
      "loss": 0.014,
      "step": 1873880
    },
    {
      "epoch": 3.066678449624582,
      "grad_norm": 0.2964465022087097,
      "learning_rate": 3.892121268029757e-06,
      "loss": 0.0063,
      "step": 1873900
    },
    {
      "epoch": 3.066711180063235,
      "grad_norm": 0.55070561170578,
      "learning_rate": 3.89205537581624e-06,
      "loss": 0.012,
      "step": 1873920
    },
    {
      "epoch": 3.0667439105018888,
      "grad_norm": 0.3401770293712616,
      "learning_rate": 3.891989483602723e-06,
      "loss": 0.0137,
      "step": 1873940
    },
    {
      "epoch": 3.066776640940542,
      "grad_norm": 0.3606475293636322,
      "learning_rate": 3.891923591389205e-06,
      "loss": 0.018,
      "step": 1873960
    },
    {
      "epoch": 3.066809371379195,
      "grad_norm": 0.08319570869207382,
      "learning_rate": 3.891857699175688e-06,
      "loss": 0.0116,
      "step": 1873980
    },
    {
      "epoch": 3.0668421018178487,
      "grad_norm": 0.37238165736198425,
      "learning_rate": 3.891791806962172e-06,
      "loss": 0.0081,
      "step": 1874000
    },
    {
      "epoch": 3.066874832256502,
      "grad_norm": 0.22810187935829163,
      "learning_rate": 3.8917259147486544e-06,
      "loss": 0.0093,
      "step": 1874020
    },
    {
      "epoch": 3.0669075626951554,
      "grad_norm": 0.22035998106002808,
      "learning_rate": 3.891660022535137e-06,
      "loss": 0.0088,
      "step": 1874040
    },
    {
      "epoch": 3.0669402931338086,
      "grad_norm": 0.12833717465400696,
      "learning_rate": 3.891594130321621e-06,
      "loss": 0.0109,
      "step": 1874060
    },
    {
      "epoch": 3.0669730235724617,
      "grad_norm": 0.16026245057582855,
      "learning_rate": 3.8915282381081035e-06,
      "loss": 0.0099,
      "step": 1874080
    },
    {
      "epoch": 3.0670057540111153,
      "grad_norm": 0.17691665887832642,
      "learning_rate": 3.891462345894586e-06,
      "loss": 0.017,
      "step": 1874100
    },
    {
      "epoch": 3.0670384844497685,
      "grad_norm": 0.3392051160335541,
      "learning_rate": 3.891396453681069e-06,
      "loss": 0.0132,
      "step": 1874120
    },
    {
      "epoch": 3.067071214888422,
      "grad_norm": 0.19641649723052979,
      "learning_rate": 3.891330561467552e-06,
      "loss": 0.0146,
      "step": 1874140
    },
    {
      "epoch": 3.067103945327075,
      "grad_norm": 1.6408989429473877,
      "learning_rate": 3.8912646692540345e-06,
      "loss": 0.0177,
      "step": 1874160
    },
    {
      "epoch": 3.067136675765729,
      "grad_norm": 0.5133549571037292,
      "learning_rate": 3.891198777040517e-06,
      "loss": 0.0144,
      "step": 1874180
    },
    {
      "epoch": 3.067169406204382,
      "grad_norm": 0.23620076477527618,
      "learning_rate": 3.891132884827e-06,
      "loss": 0.0106,
      "step": 1874200
    },
    {
      "epoch": 3.067202136643035,
      "grad_norm": 0.09861059486865997,
      "learning_rate": 3.891066992613483e-06,
      "loss": 0.0123,
      "step": 1874220
    },
    {
      "epoch": 3.0672348670816887,
      "grad_norm": 0.7269899845123291,
      "learning_rate": 3.891001100399966e-06,
      "loss": 0.0148,
      "step": 1874240
    },
    {
      "epoch": 3.067267597520342,
      "grad_norm": 0.28230196237564087,
      "learning_rate": 3.890935208186449e-06,
      "loss": 0.0111,
      "step": 1874260
    },
    {
      "epoch": 3.0673003279589954,
      "grad_norm": 0.11353279650211334,
      "learning_rate": 3.890869315972932e-06,
      "loss": 0.0095,
      "step": 1874280
    },
    {
      "epoch": 3.0673330583976486,
      "grad_norm": 0.46227332949638367,
      "learning_rate": 3.8908034237594145e-06,
      "loss": 0.0134,
      "step": 1874300
    },
    {
      "epoch": 3.067365788836302,
      "grad_norm": 0.17338888347148895,
      "learning_rate": 3.890737531545897e-06,
      "loss": 0.0089,
      "step": 1874320
    },
    {
      "epoch": 3.0673985192749553,
      "grad_norm": 0.2932419180870056,
      "learning_rate": 3.890671639332381e-06,
      "loss": 0.0067,
      "step": 1874340
    },
    {
      "epoch": 3.0674312497136085,
      "grad_norm": 0.24957653880119324,
      "learning_rate": 3.8906057471188636e-06,
      "loss": 0.0118,
      "step": 1874360
    },
    {
      "epoch": 3.067463980152262,
      "grad_norm": 0.1363602578639984,
      "learning_rate": 3.890539854905346e-06,
      "loss": 0.0062,
      "step": 1874380
    },
    {
      "epoch": 3.067496710590915,
      "grad_norm": 0.2661762535572052,
      "learning_rate": 3.890473962691829e-06,
      "loss": 0.0131,
      "step": 1874400
    },
    {
      "epoch": 3.067529441029569,
      "grad_norm": 0.28353872895240784,
      "learning_rate": 3.890408070478312e-06,
      "loss": 0.0121,
      "step": 1874420
    },
    {
      "epoch": 3.067562171468222,
      "grad_norm": 0.23344804346561432,
      "learning_rate": 3.8903421782647945e-06,
      "loss": 0.0122,
      "step": 1874440
    },
    {
      "epoch": 3.0675949019068756,
      "grad_norm": 0.272631973028183,
      "learning_rate": 3.890276286051278e-06,
      "loss": 0.015,
      "step": 1874460
    },
    {
      "epoch": 3.0676276323455287,
      "grad_norm": 0.10994625091552734,
      "learning_rate": 3.890210393837761e-06,
      "loss": 0.0098,
      "step": 1874480
    },
    {
      "epoch": 3.067660362784182,
      "grad_norm": 0.1263434737920761,
      "learning_rate": 3.890144501624244e-06,
      "loss": 0.0142,
      "step": 1874500
    },
    {
      "epoch": 3.0676930932228355,
      "grad_norm": 0.13762949407100677,
      "learning_rate": 3.890078609410726e-06,
      "loss": 0.0094,
      "step": 1874520
    },
    {
      "epoch": 3.0677258236614886,
      "grad_norm": 0.30669647455215454,
      "learning_rate": 3.890012717197209e-06,
      "loss": 0.009,
      "step": 1874540
    },
    {
      "epoch": 3.067758554100142,
      "grad_norm": 0.17378434538841248,
      "learning_rate": 3.889946824983692e-06,
      "loss": 0.0134,
      "step": 1874560
    },
    {
      "epoch": 3.0677912845387953,
      "grad_norm": 0.27928030490875244,
      "learning_rate": 3.8898809327701745e-06,
      "loss": 0.013,
      "step": 1874580
    },
    {
      "epoch": 3.0678240149774485,
      "grad_norm": 0.4370073974132538,
      "learning_rate": 3.889815040556657e-06,
      "loss": 0.0096,
      "step": 1874600
    },
    {
      "epoch": 3.067856745416102,
      "grad_norm": 0.23221386969089508,
      "learning_rate": 3.88974914834314e-06,
      "loss": 0.0114,
      "step": 1874620
    },
    {
      "epoch": 3.0678894758547552,
      "grad_norm": 0.5464701652526855,
      "learning_rate": 3.889683256129624e-06,
      "loss": 0.0115,
      "step": 1874640
    },
    {
      "epoch": 3.067922206293409,
      "grad_norm": 0.07488112151622772,
      "learning_rate": 3.889617363916106e-06,
      "loss": 0.0138,
      "step": 1874660
    },
    {
      "epoch": 3.067954936732062,
      "grad_norm": 0.5390161871910095,
      "learning_rate": 3.889551471702589e-06,
      "loss": 0.0133,
      "step": 1874680
    },
    {
      "epoch": 3.0679876671707156,
      "grad_norm": 0.618938148021698,
      "learning_rate": 3.889485579489073e-06,
      "loss": 0.013,
      "step": 1874700
    },
    {
      "epoch": 3.0680203976093687,
      "grad_norm": 1.4717286825180054,
      "learning_rate": 3.889419687275555e-06,
      "loss": 0.013,
      "step": 1874720
    },
    {
      "epoch": 3.068053128048022,
      "grad_norm": 0.429345041513443,
      "learning_rate": 3.889353795062038e-06,
      "loss": 0.0086,
      "step": 1874740
    },
    {
      "epoch": 3.0680858584866755,
      "grad_norm": 0.06577923148870468,
      "learning_rate": 3.889287902848521e-06,
      "loss": 0.0085,
      "step": 1874760
    },
    {
      "epoch": 3.0681185889253286,
      "grad_norm": 0.4491615295410156,
      "learning_rate": 3.889222010635004e-06,
      "loss": 0.0102,
      "step": 1874780
    },
    {
      "epoch": 3.068151319363982,
      "grad_norm": 0.5211291909217834,
      "learning_rate": 3.889156118421486e-06,
      "loss": 0.0113,
      "step": 1874800
    },
    {
      "epoch": 3.0681840498026354,
      "grad_norm": 0.1552966833114624,
      "learning_rate": 3.889090226207969e-06,
      "loss": 0.0101,
      "step": 1874820
    },
    {
      "epoch": 3.068216780241289,
      "grad_norm": 0.33587560057640076,
      "learning_rate": 3.889024333994452e-06,
      "loss": 0.0092,
      "step": 1874840
    },
    {
      "epoch": 3.068249510679942,
      "grad_norm": 0.49772337079048157,
      "learning_rate": 3.8889584417809354e-06,
      "loss": 0.0085,
      "step": 1874860
    },
    {
      "epoch": 3.0682822411185953,
      "grad_norm": 0.48029670119285583,
      "learning_rate": 3.888892549567418e-06,
      "loss": 0.0163,
      "step": 1874880
    },
    {
      "epoch": 3.068314971557249,
      "grad_norm": 0.3707251250743866,
      "learning_rate": 3.888826657353901e-06,
      "loss": 0.0132,
      "step": 1874900
    },
    {
      "epoch": 3.068347701995902,
      "grad_norm": 0.13598619401454926,
      "learning_rate": 3.888760765140384e-06,
      "loss": 0.0132,
      "step": 1874920
    },
    {
      "epoch": 3.0683804324345556,
      "grad_norm": 0.2304617166519165,
      "learning_rate": 3.888694872926866e-06,
      "loss": 0.0082,
      "step": 1874940
    },
    {
      "epoch": 3.0684131628732088,
      "grad_norm": 0.11902176588773727,
      "learning_rate": 3.888628980713349e-06,
      "loss": 0.0189,
      "step": 1874960
    },
    {
      "epoch": 3.0684458933118623,
      "grad_norm": 0.4843798279762268,
      "learning_rate": 3.888563088499832e-06,
      "loss": 0.0093,
      "step": 1874980
    },
    {
      "epoch": 3.0684786237505155,
      "grad_norm": 0.34781375527381897,
      "learning_rate": 3.888497196286315e-06,
      "loss": 0.0122,
      "step": 1875000
    },
    {
      "epoch": 3.0685113541891686,
      "grad_norm": 0.17583425343036652,
      "learning_rate": 3.888431304072798e-06,
      "loss": 0.0127,
      "step": 1875020
    },
    {
      "epoch": 3.0685440846278222,
      "grad_norm": 0.6180002689361572,
      "learning_rate": 3.888365411859281e-06,
      "loss": 0.0088,
      "step": 1875040
    },
    {
      "epoch": 3.0685768150664754,
      "grad_norm": 0.23800234496593475,
      "learning_rate": 3.888299519645764e-06,
      "loss": 0.0095,
      "step": 1875060
    },
    {
      "epoch": 3.068609545505129,
      "grad_norm": 0.16027933359146118,
      "learning_rate": 3.888233627432247e-06,
      "loss": 0.0091,
      "step": 1875080
    },
    {
      "epoch": 3.068642275943782,
      "grad_norm": 0.19174544513225555,
      "learning_rate": 3.88816773521873e-06,
      "loss": 0.0114,
      "step": 1875100
    },
    {
      "epoch": 3.0686750063824357,
      "grad_norm": 0.5397921204566956,
      "learning_rate": 3.888101843005213e-06,
      "loss": 0.0089,
      "step": 1875120
    },
    {
      "epoch": 3.068707736821089,
      "grad_norm": 0.3522287607192993,
      "learning_rate": 3.8880359507916955e-06,
      "loss": 0.0137,
      "step": 1875140
    },
    {
      "epoch": 3.068740467259742,
      "grad_norm": 0.2746991515159607,
      "learning_rate": 3.887970058578178e-06,
      "loss": 0.0125,
      "step": 1875160
    },
    {
      "epoch": 3.0687731976983956,
      "grad_norm": 0.06905797123908997,
      "learning_rate": 3.887904166364661e-06,
      "loss": 0.0137,
      "step": 1875180
    },
    {
      "epoch": 3.0688059281370488,
      "grad_norm": 0.32028621435165405,
      "learning_rate": 3.887838274151144e-06,
      "loss": 0.0093,
      "step": 1875200
    },
    {
      "epoch": 3.0688386585757024,
      "grad_norm": 0.6640032529830933,
      "learning_rate": 3.8877723819376264e-06,
      "loss": 0.0153,
      "step": 1875220
    },
    {
      "epoch": 3.0688713890143555,
      "grad_norm": 0.5379658341407776,
      "learning_rate": 3.887706489724109e-06,
      "loss": 0.0152,
      "step": 1875240
    },
    {
      "epoch": 3.068904119453009,
      "grad_norm": 0.1664925068616867,
      "learning_rate": 3.887640597510593e-06,
      "loss": 0.0157,
      "step": 1875260
    },
    {
      "epoch": 3.0689368498916623,
      "grad_norm": 2.537205696105957,
      "learning_rate": 3.8875747052970755e-06,
      "loss": 0.0098,
      "step": 1875280
    },
    {
      "epoch": 3.0689695803303154,
      "grad_norm": 0.07127994298934937,
      "learning_rate": 3.887508813083558e-06,
      "loss": 0.0147,
      "step": 1875300
    },
    {
      "epoch": 3.069002310768969,
      "grad_norm": 0.48715996742248535,
      "learning_rate": 3.887442920870041e-06,
      "loss": 0.0115,
      "step": 1875320
    },
    {
      "epoch": 3.069035041207622,
      "grad_norm": 0.34278586506843567,
      "learning_rate": 3.887377028656524e-06,
      "loss": 0.0074,
      "step": 1875340
    },
    {
      "epoch": 3.0690677716462758,
      "grad_norm": 0.30743932723999023,
      "learning_rate": 3.8873111364430065e-06,
      "loss": 0.0127,
      "step": 1875360
    },
    {
      "epoch": 3.069100502084929,
      "grad_norm": 0.09655554592609406,
      "learning_rate": 3.887245244229489e-06,
      "loss": 0.0116,
      "step": 1875380
    },
    {
      "epoch": 3.0691332325235825,
      "grad_norm": 0.1680341213941574,
      "learning_rate": 3.887179352015973e-06,
      "loss": 0.0115,
      "step": 1875400
    },
    {
      "epoch": 3.0691659629622356,
      "grad_norm": 0.16176356375217438,
      "learning_rate": 3.8871134598024555e-06,
      "loss": 0.0115,
      "step": 1875420
    },
    {
      "epoch": 3.069198693400889,
      "grad_norm": 0.14263848960399628,
      "learning_rate": 3.887047567588938e-06,
      "loss": 0.0114,
      "step": 1875440
    },
    {
      "epoch": 3.0692314238395424,
      "grad_norm": 0.18017351627349854,
      "learning_rate": 3.886981675375421e-06,
      "loss": 0.0133,
      "step": 1875460
    },
    {
      "epoch": 3.0692641542781955,
      "grad_norm": 0.6983484625816345,
      "learning_rate": 3.886915783161905e-06,
      "loss": 0.0106,
      "step": 1875480
    },
    {
      "epoch": 3.069296884716849,
      "grad_norm": 0.30550575256347656,
      "learning_rate": 3.886849890948387e-06,
      "loss": 0.0229,
      "step": 1875500
    },
    {
      "epoch": 3.0693296151555023,
      "grad_norm": 0.24408960342407227,
      "learning_rate": 3.88678399873487e-06,
      "loss": 0.013,
      "step": 1875520
    },
    {
      "epoch": 3.0693623455941554,
      "grad_norm": 0.3698174059391022,
      "learning_rate": 3.886718106521353e-06,
      "loss": 0.0144,
      "step": 1875540
    },
    {
      "epoch": 3.069395076032809,
      "grad_norm": 0.3539227843284607,
      "learning_rate": 3.8866522143078356e-06,
      "loss": 0.0088,
      "step": 1875560
    },
    {
      "epoch": 3.069427806471462,
      "grad_norm": 0.1167762354016304,
      "learning_rate": 3.886586322094318e-06,
      "loss": 0.01,
      "step": 1875580
    },
    {
      "epoch": 3.0694605369101158,
      "grad_norm": 0.2871069610118866,
      "learning_rate": 3.886520429880801e-06,
      "loss": 0.0104,
      "step": 1875600
    },
    {
      "epoch": 3.069493267348769,
      "grad_norm": 0.24254277348518372,
      "learning_rate": 3.886454537667284e-06,
      "loss": 0.0077,
      "step": 1875620
    },
    {
      "epoch": 3.0695259977874225,
      "grad_norm": 0.37327662110328674,
      "learning_rate": 3.8863886454537665e-06,
      "loss": 0.0153,
      "step": 1875640
    },
    {
      "epoch": 3.0695587282260757,
      "grad_norm": 0.2535798251628876,
      "learning_rate": 3.88632275324025e-06,
      "loss": 0.0131,
      "step": 1875660
    },
    {
      "epoch": 3.069591458664729,
      "grad_norm": 0.24792370200157166,
      "learning_rate": 3.886256861026733e-06,
      "loss": 0.0102,
      "step": 1875680
    },
    {
      "epoch": 3.0696241891033824,
      "grad_norm": 0.1915757656097412,
      "learning_rate": 3.886190968813216e-06,
      "loss": 0.0158,
      "step": 1875700
    },
    {
      "epoch": 3.0696569195420356,
      "grad_norm": 0.44869109988212585,
      "learning_rate": 3.886125076599698e-06,
      "loss": 0.0113,
      "step": 1875720
    },
    {
      "epoch": 3.069689649980689,
      "grad_norm": 0.068476103246212,
      "learning_rate": 3.886059184386181e-06,
      "loss": 0.0085,
      "step": 1875740
    },
    {
      "epoch": 3.0697223804193423,
      "grad_norm": 0.48420873284339905,
      "learning_rate": 3.885993292172665e-06,
      "loss": 0.0234,
      "step": 1875760
    },
    {
      "epoch": 3.069755110857996,
      "grad_norm": 0.4859113395214081,
      "learning_rate": 3.885927399959147e-06,
      "loss": 0.0098,
      "step": 1875780
    },
    {
      "epoch": 3.069787841296649,
      "grad_norm": 0.1510172188282013,
      "learning_rate": 3.88586150774563e-06,
      "loss": 0.0108,
      "step": 1875800
    },
    {
      "epoch": 3.069820571735302,
      "grad_norm": 0.49953335523605347,
      "learning_rate": 3.885795615532113e-06,
      "loss": 0.0121,
      "step": 1875820
    },
    {
      "epoch": 3.069853302173956,
      "grad_norm": 0.15038655698299408,
      "learning_rate": 3.885729723318596e-06,
      "loss": 0.0095,
      "step": 1875840
    },
    {
      "epoch": 3.069886032612609,
      "grad_norm": 0.22174152731895447,
      "learning_rate": 3.885663831105078e-06,
      "loss": 0.01,
      "step": 1875860
    },
    {
      "epoch": 3.0699187630512625,
      "grad_norm": 0.4851328432559967,
      "learning_rate": 3.885597938891562e-06,
      "loss": 0.0152,
      "step": 1875880
    },
    {
      "epoch": 3.0699514934899157,
      "grad_norm": 0.2706417739391327,
      "learning_rate": 3.885532046678045e-06,
      "loss": 0.0141,
      "step": 1875900
    },
    {
      "epoch": 3.0699842239285693,
      "grad_norm": 0.559960126876831,
      "learning_rate": 3.885466154464527e-06,
      "loss": 0.0181,
      "step": 1875920
    },
    {
      "epoch": 3.0700169543672224,
      "grad_norm": 0.11511044204235077,
      "learning_rate": 3.88540026225101e-06,
      "loss": 0.0078,
      "step": 1875940
    },
    {
      "epoch": 3.0700496848058756,
      "grad_norm": 0.2542594075202942,
      "learning_rate": 3.885334370037493e-06,
      "loss": 0.0097,
      "step": 1875960
    },
    {
      "epoch": 3.070082415244529,
      "grad_norm": 0.30500856041908264,
      "learning_rate": 3.885268477823976e-06,
      "loss": 0.0126,
      "step": 1875980
    },
    {
      "epoch": 3.0701151456831823,
      "grad_norm": 0.2046528458595276,
      "learning_rate": 3.885202585610458e-06,
      "loss": 0.0118,
      "step": 1876000
    },
    {
      "epoch": 3.070147876121836,
      "grad_norm": 0.1512228101491928,
      "learning_rate": 3.885136693396941e-06,
      "loss": 0.0097,
      "step": 1876020
    },
    {
      "epoch": 3.070180606560489,
      "grad_norm": 0.2756156921386719,
      "learning_rate": 3.885070801183424e-06,
      "loss": 0.0092,
      "step": 1876040
    },
    {
      "epoch": 3.0702133369991427,
      "grad_norm": 0.20355430245399475,
      "learning_rate": 3.8850049089699074e-06,
      "loss": 0.0109,
      "step": 1876060
    },
    {
      "epoch": 3.070246067437796,
      "grad_norm": 0.11513438820838928,
      "learning_rate": 3.88493901675639e-06,
      "loss": 0.0145,
      "step": 1876080
    },
    {
      "epoch": 3.070278797876449,
      "grad_norm": 0.5193799138069153,
      "learning_rate": 3.884873124542874e-06,
      "loss": 0.019,
      "step": 1876100
    },
    {
      "epoch": 3.0703115283151026,
      "grad_norm": 0.3564838469028473,
      "learning_rate": 3.8848072323293565e-06,
      "loss": 0.0101,
      "step": 1876120
    },
    {
      "epoch": 3.0703442587537557,
      "grad_norm": 1.226586103439331,
      "learning_rate": 3.884741340115839e-06,
      "loss": 0.0099,
      "step": 1876140
    },
    {
      "epoch": 3.0703769891924093,
      "grad_norm": 0.2149754762649536,
      "learning_rate": 3.884675447902322e-06,
      "loss": 0.0116,
      "step": 1876160
    },
    {
      "epoch": 3.0704097196310625,
      "grad_norm": 0.3082236051559448,
      "learning_rate": 3.884609555688805e-06,
      "loss": 0.0113,
      "step": 1876180
    },
    {
      "epoch": 3.0704424500697156,
      "grad_norm": 0.4619333744049072,
      "learning_rate": 3.8845436634752875e-06,
      "loss": 0.0129,
      "step": 1876200
    },
    {
      "epoch": 3.070475180508369,
      "grad_norm": 0.6502541303634644,
      "learning_rate": 3.88447777126177e-06,
      "loss": 0.0126,
      "step": 1876220
    },
    {
      "epoch": 3.0705079109470224,
      "grad_norm": 0.206141859292984,
      "learning_rate": 3.884411879048253e-06,
      "loss": 0.0124,
      "step": 1876240
    },
    {
      "epoch": 3.070540641385676,
      "grad_norm": 0.52991783618927,
      "learning_rate": 3.884345986834736e-06,
      "loss": 0.0095,
      "step": 1876260
    },
    {
      "epoch": 3.070573371824329,
      "grad_norm": 0.2619691789150238,
      "learning_rate": 3.884280094621219e-06,
      "loss": 0.0117,
      "step": 1876280
    },
    {
      "epoch": 3.0706061022629827,
      "grad_norm": 0.5871158838272095,
      "learning_rate": 3.884214202407702e-06,
      "loss": 0.0152,
      "step": 1876300
    },
    {
      "epoch": 3.070638832701636,
      "grad_norm": 0.6845752000808716,
      "learning_rate": 3.884148310194185e-06,
      "loss": 0.0142,
      "step": 1876320
    },
    {
      "epoch": 3.070671563140289,
      "grad_norm": 0.25505927205085754,
      "learning_rate": 3.8840824179806675e-06,
      "loss": 0.0106,
      "step": 1876340
    },
    {
      "epoch": 3.0707042935789426,
      "grad_norm": 0.3097776472568512,
      "learning_rate": 3.88401652576715e-06,
      "loss": 0.0109,
      "step": 1876360
    },
    {
      "epoch": 3.0707370240175957,
      "grad_norm": 0.1311589777469635,
      "learning_rate": 3.883950633553633e-06,
      "loss": 0.0132,
      "step": 1876380
    },
    {
      "epoch": 3.0707697544562493,
      "grad_norm": 0.5667203068733215,
      "learning_rate": 3.883884741340116e-06,
      "loss": 0.0203,
      "step": 1876400
    },
    {
      "epoch": 3.0708024848949025,
      "grad_norm": 0.2680477797985077,
      "learning_rate": 3.8838188491265985e-06,
      "loss": 0.0154,
      "step": 1876420
    },
    {
      "epoch": 3.070835215333556,
      "grad_norm": 0.7020863890647888,
      "learning_rate": 3.883752956913082e-06,
      "loss": 0.0102,
      "step": 1876440
    },
    {
      "epoch": 3.0708679457722092,
      "grad_norm": 0.4446598291397095,
      "learning_rate": 3.883687064699565e-06,
      "loss": 0.0139,
      "step": 1876460
    },
    {
      "epoch": 3.0709006762108624,
      "grad_norm": 0.21338807046413422,
      "learning_rate": 3.8836211724860475e-06,
      "loss": 0.0125,
      "step": 1876480
    },
    {
      "epoch": 3.070933406649516,
      "grad_norm": 0.7414094805717468,
      "learning_rate": 3.883555280272531e-06,
      "loss": 0.0209,
      "step": 1876500
    },
    {
      "epoch": 3.070966137088169,
      "grad_norm": 0.3616024851799011,
      "learning_rate": 3.883489388059014e-06,
      "loss": 0.014,
      "step": 1876520
    },
    {
      "epoch": 3.0709988675268227,
      "grad_norm": 0.18729685246944427,
      "learning_rate": 3.883423495845497e-06,
      "loss": 0.0124,
      "step": 1876540
    },
    {
      "epoch": 3.071031597965476,
      "grad_norm": 0.46353432536125183,
      "learning_rate": 3.883357603631979e-06,
      "loss": 0.0102,
      "step": 1876560
    },
    {
      "epoch": 3.0710643284041295,
      "grad_norm": 0.17013345658779144,
      "learning_rate": 3.883291711418462e-06,
      "loss": 0.0114,
      "step": 1876580
    },
    {
      "epoch": 3.0710970588427826,
      "grad_norm": 0.6440556049346924,
      "learning_rate": 3.883225819204945e-06,
      "loss": 0.0083,
      "step": 1876600
    },
    {
      "epoch": 3.0711297892814358,
      "grad_norm": 0.4530413746833801,
      "learning_rate": 3.8831599269914275e-06,
      "loss": 0.0088,
      "step": 1876620
    },
    {
      "epoch": 3.0711625197200894,
      "grad_norm": 0.20072250068187714,
      "learning_rate": 3.88309403477791e-06,
      "loss": 0.017,
      "step": 1876640
    },
    {
      "epoch": 3.0711952501587425,
      "grad_norm": 0.38582146167755127,
      "learning_rate": 3.883028142564393e-06,
      "loss": 0.0142,
      "step": 1876660
    },
    {
      "epoch": 3.071227980597396,
      "grad_norm": 0.14976723492145538,
      "learning_rate": 3.882962250350877e-06,
      "loss": 0.0125,
      "step": 1876680
    },
    {
      "epoch": 3.0712607110360493,
      "grad_norm": 0.3109387755393982,
      "learning_rate": 3.882896358137359e-06,
      "loss": 0.0079,
      "step": 1876700
    },
    {
      "epoch": 3.071293441474703,
      "grad_norm": 0.8679628968238831,
      "learning_rate": 3.882830465923842e-06,
      "loss": 0.0142,
      "step": 1876720
    },
    {
      "epoch": 3.071326171913356,
      "grad_norm": 0.8504238128662109,
      "learning_rate": 3.882764573710325e-06,
      "loss": 0.0126,
      "step": 1876740
    },
    {
      "epoch": 3.071358902352009,
      "grad_norm": 0.16790813207626343,
      "learning_rate": 3.8826986814968076e-06,
      "loss": 0.011,
      "step": 1876760
    },
    {
      "epoch": 3.0713916327906627,
      "grad_norm": 0.5044171810150146,
      "learning_rate": 3.88263278928329e-06,
      "loss": 0.0126,
      "step": 1876780
    },
    {
      "epoch": 3.071424363229316,
      "grad_norm": 0.13788172602653503,
      "learning_rate": 3.882566897069774e-06,
      "loss": 0.0153,
      "step": 1876800
    },
    {
      "epoch": 3.0714570936679695,
      "grad_norm": 0.37476345896720886,
      "learning_rate": 3.882501004856257e-06,
      "loss": 0.0092,
      "step": 1876820
    },
    {
      "epoch": 3.0714898241066226,
      "grad_norm": 0.1989758461713791,
      "learning_rate": 3.882435112642739e-06,
      "loss": 0.0105,
      "step": 1876840
    },
    {
      "epoch": 3.0715225545452762,
      "grad_norm": 0.1931375414133072,
      "learning_rate": 3.882369220429222e-06,
      "loss": 0.0126,
      "step": 1876860
    },
    {
      "epoch": 3.0715552849839294,
      "grad_norm": 0.28441691398620605,
      "learning_rate": 3.882303328215705e-06,
      "loss": 0.0167,
      "step": 1876880
    },
    {
      "epoch": 3.0715880154225825,
      "grad_norm": 0.47905653715133667,
      "learning_rate": 3.8822374360021884e-06,
      "loss": 0.014,
      "step": 1876900
    },
    {
      "epoch": 3.071620745861236,
      "grad_norm": 0.17688846588134766,
      "learning_rate": 3.882171543788671e-06,
      "loss": 0.015,
      "step": 1876920
    },
    {
      "epoch": 3.0716534762998893,
      "grad_norm": 0.5718627572059631,
      "learning_rate": 3.882105651575154e-06,
      "loss": 0.0103,
      "step": 1876940
    },
    {
      "epoch": 3.071686206738543,
      "grad_norm": 0.31841230392456055,
      "learning_rate": 3.882039759361637e-06,
      "loss": 0.0093,
      "step": 1876960
    },
    {
      "epoch": 3.071718937177196,
      "grad_norm": 0.20513801276683807,
      "learning_rate": 3.881973867148119e-06,
      "loss": 0.0128,
      "step": 1876980
    },
    {
      "epoch": 3.0717516676158496,
      "grad_norm": 0.30826514959335327,
      "learning_rate": 3.881907974934602e-06,
      "loss": 0.0122,
      "step": 1877000
    },
    {
      "epoch": 3.0717843980545028,
      "grad_norm": 0.30799850821495056,
      "learning_rate": 3.881842082721085e-06,
      "loss": 0.013,
      "step": 1877020
    },
    {
      "epoch": 3.071817128493156,
      "grad_norm": 0.5062735080718994,
      "learning_rate": 3.881776190507568e-06,
      "loss": 0.0178,
      "step": 1877040
    },
    {
      "epoch": 3.0718498589318095,
      "grad_norm": 0.1640552431344986,
      "learning_rate": 3.88171029829405e-06,
      "loss": 0.0092,
      "step": 1877060
    },
    {
      "epoch": 3.0718825893704627,
      "grad_norm": 0.16379816830158234,
      "learning_rate": 3.881644406080534e-06,
      "loss": 0.012,
      "step": 1877080
    },
    {
      "epoch": 3.0719153198091163,
      "grad_norm": 0.49246856570243835,
      "learning_rate": 3.881578513867017e-06,
      "loss": 0.0104,
      "step": 1877100
    },
    {
      "epoch": 3.0719480502477694,
      "grad_norm": 0.2040787935256958,
      "learning_rate": 3.8815126216534994e-06,
      "loss": 0.0089,
      "step": 1877120
    },
    {
      "epoch": 3.0719807806864226,
      "grad_norm": 0.15119823813438416,
      "learning_rate": 3.881446729439982e-06,
      "loss": 0.0071,
      "step": 1877140
    },
    {
      "epoch": 3.072013511125076,
      "grad_norm": 0.32386842370033264,
      "learning_rate": 3.881380837226466e-06,
      "loss": 0.0171,
      "step": 1877160
    },
    {
      "epoch": 3.0720462415637293,
      "grad_norm": 0.03183608874678612,
      "learning_rate": 3.8813149450129485e-06,
      "loss": 0.0164,
      "step": 1877180
    },
    {
      "epoch": 3.072078972002383,
      "grad_norm": 0.8121675848960876,
      "learning_rate": 3.881249052799431e-06,
      "loss": 0.0123,
      "step": 1877200
    },
    {
      "epoch": 3.072111702441036,
      "grad_norm": 0.20277710258960724,
      "learning_rate": 3.881183160585914e-06,
      "loss": 0.0073,
      "step": 1877220
    },
    {
      "epoch": 3.0721444328796896,
      "grad_norm": 0.27335724234580994,
      "learning_rate": 3.881117268372397e-06,
      "loss": 0.009,
      "step": 1877240
    },
    {
      "epoch": 3.072177163318343,
      "grad_norm": 0.5017637014389038,
      "learning_rate": 3.8810513761588795e-06,
      "loss": 0.0138,
      "step": 1877260
    },
    {
      "epoch": 3.072209893756996,
      "grad_norm": 0.12852706015110016,
      "learning_rate": 3.880985483945362e-06,
      "loss": 0.0136,
      "step": 1877280
    },
    {
      "epoch": 3.0722426241956495,
      "grad_norm": 1.9726831912994385,
      "learning_rate": 3.880919591731846e-06,
      "loss": 0.0084,
      "step": 1877300
    },
    {
      "epoch": 3.0722753546343027,
      "grad_norm": 0.21092170476913452,
      "learning_rate": 3.8808536995183285e-06,
      "loss": 0.0103,
      "step": 1877320
    },
    {
      "epoch": 3.0723080850729563,
      "grad_norm": 0.6289653182029724,
      "learning_rate": 3.880787807304811e-06,
      "loss": 0.0151,
      "step": 1877340
    },
    {
      "epoch": 3.0723408155116094,
      "grad_norm": 0.3337337076663971,
      "learning_rate": 3.880721915091294e-06,
      "loss": 0.0116,
      "step": 1877360
    },
    {
      "epoch": 3.072373545950263,
      "grad_norm": 0.37992870807647705,
      "learning_rate": 3.880656022877777e-06,
      "loss": 0.0152,
      "step": 1877380
    },
    {
      "epoch": 3.072406276388916,
      "grad_norm": 0.4394855797290802,
      "learning_rate": 3.8805901306642595e-06,
      "loss": 0.0136,
      "step": 1877400
    },
    {
      "epoch": 3.0724390068275693,
      "grad_norm": 0.42243731021881104,
      "learning_rate": 3.880524238450742e-06,
      "loss": 0.0126,
      "step": 1877420
    },
    {
      "epoch": 3.072471737266223,
      "grad_norm": 0.2826988995075226,
      "learning_rate": 3.880458346237225e-06,
      "loss": 0.0115,
      "step": 1877440
    },
    {
      "epoch": 3.072504467704876,
      "grad_norm": 0.13061800599098206,
      "learning_rate": 3.880392454023708e-06,
      "loss": 0.0224,
      "step": 1877460
    },
    {
      "epoch": 3.0725371981435297,
      "grad_norm": 0.3171289265155792,
      "learning_rate": 3.880326561810191e-06,
      "loss": 0.0085,
      "step": 1877480
    },
    {
      "epoch": 3.072569928582183,
      "grad_norm": 0.04807080700993538,
      "learning_rate": 3.880260669596674e-06,
      "loss": 0.0116,
      "step": 1877500
    },
    {
      "epoch": 3.0726026590208364,
      "grad_norm": 0.17662698030471802,
      "learning_rate": 3.880194777383158e-06,
      "loss": 0.0108,
      "step": 1877520
    },
    {
      "epoch": 3.0726353894594896,
      "grad_norm": 0.32824239134788513,
      "learning_rate": 3.88012888516964e-06,
      "loss": 0.0101,
      "step": 1877540
    },
    {
      "epoch": 3.0726681198981427,
      "grad_norm": 0.28675204515457153,
      "learning_rate": 3.880062992956123e-06,
      "loss": 0.0114,
      "step": 1877560
    },
    {
      "epoch": 3.0727008503367963,
      "grad_norm": 0.5996170043945312,
      "learning_rate": 3.879997100742606e-06,
      "loss": 0.0127,
      "step": 1877580
    },
    {
      "epoch": 3.0727335807754494,
      "grad_norm": 0.26153701543807983,
      "learning_rate": 3.8799312085290886e-06,
      "loss": 0.0074,
      "step": 1877600
    },
    {
      "epoch": 3.072766311214103,
      "grad_norm": 0.11510784178972244,
      "learning_rate": 3.879865316315571e-06,
      "loss": 0.0111,
      "step": 1877620
    },
    {
      "epoch": 3.072799041652756,
      "grad_norm": 0.47779256105422974,
      "learning_rate": 3.879799424102054e-06,
      "loss": 0.0112,
      "step": 1877640
    },
    {
      "epoch": 3.0728317720914093,
      "grad_norm": 0.24024662375450134,
      "learning_rate": 3.879733531888537e-06,
      "loss": 0.0102,
      "step": 1877660
    },
    {
      "epoch": 3.072864502530063,
      "grad_norm": 0.04054177552461624,
      "learning_rate": 3.8796676396750195e-06,
      "loss": 0.0129,
      "step": 1877680
    },
    {
      "epoch": 3.072897232968716,
      "grad_norm": 0.8664406538009644,
      "learning_rate": 3.879601747461503e-06,
      "loss": 0.0163,
      "step": 1877700
    },
    {
      "epoch": 3.0729299634073697,
      "grad_norm": 1.9689048528671265,
      "learning_rate": 3.879535855247986e-06,
      "loss": 0.0124,
      "step": 1877720
    },
    {
      "epoch": 3.072962693846023,
      "grad_norm": 0.10155811160802841,
      "learning_rate": 3.879469963034469e-06,
      "loss": 0.0135,
      "step": 1877740
    },
    {
      "epoch": 3.0729954242846764,
      "grad_norm": 0.35922762751579285,
      "learning_rate": 3.879404070820951e-06,
      "loss": 0.0105,
      "step": 1877760
    },
    {
      "epoch": 3.0730281547233296,
      "grad_norm": 0.16959470510482788,
      "learning_rate": 3.879338178607434e-06,
      "loss": 0.0128,
      "step": 1877780
    },
    {
      "epoch": 3.0730608851619827,
      "grad_norm": 0.7159709930419922,
      "learning_rate": 3.879272286393917e-06,
      "loss": 0.0193,
      "step": 1877800
    },
    {
      "epoch": 3.0730936156006363,
      "grad_norm": 0.19170643389225006,
      "learning_rate": 3.8792063941803996e-06,
      "loss": 0.0107,
      "step": 1877820
    },
    {
      "epoch": 3.0731263460392895,
      "grad_norm": 0.33036524057388306,
      "learning_rate": 3.879140501966882e-06,
      "loss": 0.0148,
      "step": 1877840
    },
    {
      "epoch": 3.073159076477943,
      "grad_norm": 0.9779608249664307,
      "learning_rate": 3.879074609753366e-06,
      "loss": 0.015,
      "step": 1877860
    },
    {
      "epoch": 3.073191806916596,
      "grad_norm": 0.15780790150165558,
      "learning_rate": 3.879008717539849e-06,
      "loss": 0.0145,
      "step": 1877880
    },
    {
      "epoch": 3.07322453735525,
      "grad_norm": 0.4445526599884033,
      "learning_rate": 3.878942825326331e-06,
      "loss": 0.0137,
      "step": 1877900
    },
    {
      "epoch": 3.073257267793903,
      "grad_norm": 0.7469474673271179,
      "learning_rate": 3.878876933112815e-06,
      "loss": 0.0153,
      "step": 1877920
    },
    {
      "epoch": 3.073289998232556,
      "grad_norm": 1.9790838956832886,
      "learning_rate": 3.878811040899298e-06,
      "loss": 0.009,
      "step": 1877940
    },
    {
      "epoch": 3.0733227286712097,
      "grad_norm": 0.21259310841560364,
      "learning_rate": 3.8787451486857804e-06,
      "loss": 0.0107,
      "step": 1877960
    },
    {
      "epoch": 3.073355459109863,
      "grad_norm": 0.2613354027271271,
      "learning_rate": 3.878679256472263e-06,
      "loss": 0.0075,
      "step": 1877980
    },
    {
      "epoch": 3.0733881895485164,
      "grad_norm": 0.2516132891178131,
      "learning_rate": 3.878613364258746e-06,
      "loss": 0.0104,
      "step": 1878000
    },
    {
      "epoch": 3.0734209199871696,
      "grad_norm": 0.15953747928142548,
      "learning_rate": 3.878547472045229e-06,
      "loss": 0.0137,
      "step": 1878020
    },
    {
      "epoch": 3.073453650425823,
      "grad_norm": 0.20136243104934692,
      "learning_rate": 3.878481579831711e-06,
      "loss": 0.0176,
      "step": 1878040
    },
    {
      "epoch": 3.0734863808644763,
      "grad_norm": 0.3151146471500397,
      "learning_rate": 3.878415687618194e-06,
      "loss": 0.0109,
      "step": 1878060
    },
    {
      "epoch": 3.0735191113031295,
      "grad_norm": 0.1252504140138626,
      "learning_rate": 3.878349795404677e-06,
      "loss": 0.0133,
      "step": 1878080
    },
    {
      "epoch": 3.073551841741783,
      "grad_norm": 0.3080759346485138,
      "learning_rate": 3.8782839031911605e-06,
      "loss": 0.0176,
      "step": 1878100
    },
    {
      "epoch": 3.0735845721804362,
      "grad_norm": 0.15437789261341095,
      "learning_rate": 3.878218010977643e-06,
      "loss": 0.0118,
      "step": 1878120
    },
    {
      "epoch": 3.07361730261909,
      "grad_norm": 0.568858802318573,
      "learning_rate": 3.878152118764126e-06,
      "loss": 0.0117,
      "step": 1878140
    },
    {
      "epoch": 3.073650033057743,
      "grad_norm": 0.13110610842704773,
      "learning_rate": 3.878086226550609e-06,
      "loss": 0.0094,
      "step": 1878160
    },
    {
      "epoch": 3.0736827634963966,
      "grad_norm": 0.339176207780838,
      "learning_rate": 3.878020334337091e-06,
      "loss": 0.0143,
      "step": 1878180
    },
    {
      "epoch": 3.0737154939350497,
      "grad_norm": 0.3418709635734558,
      "learning_rate": 3.877954442123574e-06,
      "loss": 0.0098,
      "step": 1878200
    },
    {
      "epoch": 3.073748224373703,
      "grad_norm": 0.6161789298057556,
      "learning_rate": 3.877888549910058e-06,
      "loss": 0.0119,
      "step": 1878220
    },
    {
      "epoch": 3.0737809548123565,
      "grad_norm": 0.5851722955703735,
      "learning_rate": 3.8778226576965405e-06,
      "loss": 0.0125,
      "step": 1878240
    },
    {
      "epoch": 3.0738136852510096,
      "grad_norm": 0.6479900479316711,
      "learning_rate": 3.877756765483023e-06,
      "loss": 0.0118,
      "step": 1878260
    },
    {
      "epoch": 3.073846415689663,
      "grad_norm": 0.2001248151063919,
      "learning_rate": 3.877690873269506e-06,
      "loss": 0.0128,
      "step": 1878280
    },
    {
      "epoch": 3.0738791461283164,
      "grad_norm": 0.4199882745742798,
      "learning_rate": 3.877624981055989e-06,
      "loss": 0.0164,
      "step": 1878300
    },
    {
      "epoch": 3.07391187656697,
      "grad_norm": 0.1284913271665573,
      "learning_rate": 3.877559088842472e-06,
      "loss": 0.0109,
      "step": 1878320
    },
    {
      "epoch": 3.073944607005623,
      "grad_norm": 0.2676759958267212,
      "learning_rate": 3.877493196628955e-06,
      "loss": 0.0125,
      "step": 1878340
    },
    {
      "epoch": 3.0739773374442763,
      "grad_norm": 0.4747481048107147,
      "learning_rate": 3.877427304415438e-06,
      "loss": 0.0091,
      "step": 1878360
    },
    {
      "epoch": 3.07401006788293,
      "grad_norm": 0.026842691004276276,
      "learning_rate": 3.8773614122019205e-06,
      "loss": 0.0105,
      "step": 1878380
    },
    {
      "epoch": 3.074042798321583,
      "grad_norm": 0.5986719727516174,
      "learning_rate": 3.877295519988403e-06,
      "loss": 0.0097,
      "step": 1878400
    },
    {
      "epoch": 3.0740755287602366,
      "grad_norm": 0.45447811484336853,
      "learning_rate": 3.877229627774886e-06,
      "loss": 0.0174,
      "step": 1878420
    },
    {
      "epoch": 3.0741082591988897,
      "grad_norm": 0.14498473703861237,
      "learning_rate": 3.877163735561369e-06,
      "loss": 0.0176,
      "step": 1878440
    },
    {
      "epoch": 3.0741409896375433,
      "grad_norm": 0.35484960675239563,
      "learning_rate": 3.8770978433478515e-06,
      "loss": 0.0098,
      "step": 1878460
    },
    {
      "epoch": 3.0741737200761965,
      "grad_norm": 0.40400630235671997,
      "learning_rate": 3.877031951134334e-06,
      "loss": 0.0126,
      "step": 1878480
    },
    {
      "epoch": 3.0742064505148496,
      "grad_norm": 0.2616535723209381,
      "learning_rate": 3.876966058920818e-06,
      "loss": 0.006,
      "step": 1878500
    },
    {
      "epoch": 3.0742391809535032,
      "grad_norm": 0.6197804808616638,
      "learning_rate": 3.8769001667073005e-06,
      "loss": 0.0128,
      "step": 1878520
    },
    {
      "epoch": 3.0742719113921564,
      "grad_norm": 0.32975447177886963,
      "learning_rate": 3.876834274493783e-06,
      "loss": 0.0072,
      "step": 1878540
    },
    {
      "epoch": 3.07430464183081,
      "grad_norm": 0.2659263014793396,
      "learning_rate": 3.876768382280267e-06,
      "loss": 0.0144,
      "step": 1878560
    },
    {
      "epoch": 3.074337372269463,
      "grad_norm": 0.11952239274978638,
      "learning_rate": 3.87670249006675e-06,
      "loss": 0.0103,
      "step": 1878580
    },
    {
      "epoch": 3.0743701027081163,
      "grad_norm": 0.16404099762439728,
      "learning_rate": 3.876636597853232e-06,
      "loss": 0.0109,
      "step": 1878600
    },
    {
      "epoch": 3.07440283314677,
      "grad_norm": 0.4289045035839081,
      "learning_rate": 3.876570705639715e-06,
      "loss": 0.0089,
      "step": 1878620
    },
    {
      "epoch": 3.074435563585423,
      "grad_norm": 0.435784250497818,
      "learning_rate": 3.876504813426198e-06,
      "loss": 0.0115,
      "step": 1878640
    },
    {
      "epoch": 3.0744682940240766,
      "grad_norm": 0.5147138833999634,
      "learning_rate": 3.8764389212126806e-06,
      "loss": 0.0153,
      "step": 1878660
    },
    {
      "epoch": 3.0745010244627298,
      "grad_norm": 0.13230419158935547,
      "learning_rate": 3.876373028999163e-06,
      "loss": 0.0075,
      "step": 1878680
    },
    {
      "epoch": 3.0745337549013834,
      "grad_norm": 0.1997925490140915,
      "learning_rate": 3.876307136785646e-06,
      "loss": 0.0098,
      "step": 1878700
    },
    {
      "epoch": 3.0745664853400365,
      "grad_norm": 0.9925434589385986,
      "learning_rate": 3.87624124457213e-06,
      "loss": 0.0137,
      "step": 1878720
    },
    {
      "epoch": 3.0745992157786897,
      "grad_norm": 0.22760775685310364,
      "learning_rate": 3.876175352358612e-06,
      "loss": 0.008,
      "step": 1878740
    },
    {
      "epoch": 3.0746319462173433,
      "grad_norm": 0.4780546724796295,
      "learning_rate": 3.876109460145095e-06,
      "loss": 0.0152,
      "step": 1878760
    },
    {
      "epoch": 3.0746646766559964,
      "grad_norm": 0.29139813780784607,
      "learning_rate": 3.876043567931578e-06,
      "loss": 0.0085,
      "step": 1878780
    },
    {
      "epoch": 3.07469740709465,
      "grad_norm": 0.2590196430683136,
      "learning_rate": 3.875977675718061e-06,
      "loss": 0.0103,
      "step": 1878800
    },
    {
      "epoch": 3.074730137533303,
      "grad_norm": 0.08497975021600723,
      "learning_rate": 3.875911783504543e-06,
      "loss": 0.0083,
      "step": 1878820
    },
    {
      "epoch": 3.0747628679719567,
      "grad_norm": 0.20550265908241272,
      "learning_rate": 3.875845891291026e-06,
      "loss": 0.0074,
      "step": 1878840
    },
    {
      "epoch": 3.07479559841061,
      "grad_norm": 0.16943906247615814,
      "learning_rate": 3.875779999077509e-06,
      "loss": 0.0112,
      "step": 1878860
    },
    {
      "epoch": 3.074828328849263,
      "grad_norm": 0.12200284004211426,
      "learning_rate": 3.875714106863992e-06,
      "loss": 0.0103,
      "step": 1878880
    },
    {
      "epoch": 3.0748610592879166,
      "grad_norm": 0.7295234799385071,
      "learning_rate": 3.875648214650475e-06,
      "loss": 0.015,
      "step": 1878900
    },
    {
      "epoch": 3.07489378972657,
      "grad_norm": 0.22167694568634033,
      "learning_rate": 3.875582322436958e-06,
      "loss": 0.0091,
      "step": 1878920
    },
    {
      "epoch": 3.0749265201652234,
      "grad_norm": 0.13953764736652374,
      "learning_rate": 3.8755164302234415e-06,
      "loss": 0.0084,
      "step": 1878940
    },
    {
      "epoch": 3.0749592506038765,
      "grad_norm": 0.6318092942237854,
      "learning_rate": 3.875450538009924e-06,
      "loss": 0.013,
      "step": 1878960
    },
    {
      "epoch": 3.07499198104253,
      "grad_norm": 0.17876969277858734,
      "learning_rate": 3.875384645796407e-06,
      "loss": 0.0125,
      "step": 1878980
    },
    {
      "epoch": 3.0750247114811833,
      "grad_norm": 0.5916732549667358,
      "learning_rate": 3.87531875358289e-06,
      "loss": 0.0112,
      "step": 1879000
    },
    {
      "epoch": 3.0750574419198364,
      "grad_norm": 0.15161412954330444,
      "learning_rate": 3.875252861369372e-06,
      "loss": 0.0146,
      "step": 1879020
    },
    {
      "epoch": 3.07509017235849,
      "grad_norm": 0.2692939341068268,
      "learning_rate": 3.875186969155855e-06,
      "loss": 0.0127,
      "step": 1879040
    },
    {
      "epoch": 3.075122902797143,
      "grad_norm": 0.44498276710510254,
      "learning_rate": 3.875121076942338e-06,
      "loss": 0.0109,
      "step": 1879060
    },
    {
      "epoch": 3.0751556332357968,
      "grad_norm": 0.24563026428222656,
      "learning_rate": 3.875055184728821e-06,
      "loss": 0.0112,
      "step": 1879080
    },
    {
      "epoch": 3.07518836367445,
      "grad_norm": 0.5259124040603638,
      "learning_rate": 3.874989292515303e-06,
      "loss": 0.0128,
      "step": 1879100
    },
    {
      "epoch": 3.0752210941131035,
      "grad_norm": 0.2547888159751892,
      "learning_rate": 3.874923400301787e-06,
      "loss": 0.0093,
      "step": 1879120
    },
    {
      "epoch": 3.0752538245517567,
      "grad_norm": 0.2620089054107666,
      "learning_rate": 3.87485750808827e-06,
      "loss": 0.0167,
      "step": 1879140
    },
    {
      "epoch": 3.07528655499041,
      "grad_norm": 0.5843559503555298,
      "learning_rate": 3.8747916158747524e-06,
      "loss": 0.0167,
      "step": 1879160
    },
    {
      "epoch": 3.0753192854290634,
      "grad_norm": 0.13105899095535278,
      "learning_rate": 3.874725723661235e-06,
      "loss": 0.0106,
      "step": 1879180
    },
    {
      "epoch": 3.0753520158677166,
      "grad_norm": 0.30017897486686707,
      "learning_rate": 3.874659831447718e-06,
      "loss": 0.0131,
      "step": 1879200
    },
    {
      "epoch": 3.07538474630637,
      "grad_norm": 0.19026808440685272,
      "learning_rate": 3.874593939234201e-06,
      "loss": 0.0067,
      "step": 1879220
    },
    {
      "epoch": 3.0754174767450233,
      "grad_norm": 0.11922968178987503,
      "learning_rate": 3.874528047020683e-06,
      "loss": 0.0139,
      "step": 1879240
    },
    {
      "epoch": 3.0754502071836765,
      "grad_norm": 0.1741102784872055,
      "learning_rate": 3.874462154807167e-06,
      "loss": 0.0161,
      "step": 1879260
    },
    {
      "epoch": 3.07548293762233,
      "grad_norm": 4.243701934814453,
      "learning_rate": 3.87439626259365e-06,
      "loss": 0.0132,
      "step": 1879280
    },
    {
      "epoch": 3.075515668060983,
      "grad_norm": 0.1847107708454132,
      "learning_rate": 3.8743303703801325e-06,
      "loss": 0.0099,
      "step": 1879300
    },
    {
      "epoch": 3.075548398499637,
      "grad_norm": 0.2406691461801529,
      "learning_rate": 3.874264478166615e-06,
      "loss": 0.012,
      "step": 1879320
    },
    {
      "epoch": 3.07558112893829,
      "grad_norm": 0.17551247775554657,
      "learning_rate": 3.874198585953099e-06,
      "loss": 0.0117,
      "step": 1879340
    },
    {
      "epoch": 3.0756138593769435,
      "grad_norm": 0.7270354628562927,
      "learning_rate": 3.8741326937395815e-06,
      "loss": 0.0127,
      "step": 1879360
    },
    {
      "epoch": 3.0756465898155967,
      "grad_norm": 0.1489219069480896,
      "learning_rate": 3.874066801526064e-06,
      "loss": 0.0076,
      "step": 1879380
    },
    {
      "epoch": 3.07567932025425,
      "grad_norm": 0.2252187579870224,
      "learning_rate": 3.874000909312547e-06,
      "loss": 0.0123,
      "step": 1879400
    },
    {
      "epoch": 3.0757120506929034,
      "grad_norm": 0.3902367651462555,
      "learning_rate": 3.87393501709903e-06,
      "loss": 0.0121,
      "step": 1879420
    },
    {
      "epoch": 3.0757447811315566,
      "grad_norm": 0.21449467539787292,
      "learning_rate": 3.8738691248855125e-06,
      "loss": 0.0152,
      "step": 1879440
    },
    {
      "epoch": 3.07577751157021,
      "grad_norm": 0.47764185070991516,
      "learning_rate": 3.873803232671995e-06,
      "loss": 0.0118,
      "step": 1879460
    },
    {
      "epoch": 3.0758102420088633,
      "grad_norm": 0.5279173254966736,
      "learning_rate": 3.873737340458478e-06,
      "loss": 0.0112,
      "step": 1879480
    },
    {
      "epoch": 3.075842972447517,
      "grad_norm": 0.47567349672317505,
      "learning_rate": 3.873671448244961e-06,
      "loss": 0.0145,
      "step": 1879500
    },
    {
      "epoch": 3.07587570288617,
      "grad_norm": 0.8178057074546814,
      "learning_rate": 3.873605556031444e-06,
      "loss": 0.0105,
      "step": 1879520
    },
    {
      "epoch": 3.075908433324823,
      "grad_norm": 0.15830698609352112,
      "learning_rate": 3.873539663817927e-06,
      "loss": 0.0081,
      "step": 1879540
    },
    {
      "epoch": 3.075941163763477,
      "grad_norm": 0.4092009961605072,
      "learning_rate": 3.87347377160441e-06,
      "loss": 0.0089,
      "step": 1879560
    },
    {
      "epoch": 3.07597389420213,
      "grad_norm": 0.04479716345667839,
      "learning_rate": 3.8734078793908925e-06,
      "loss": 0.0095,
      "step": 1879580
    },
    {
      "epoch": 3.0760066246407836,
      "grad_norm": 0.16221925616264343,
      "learning_rate": 3.873341987177375e-06,
      "loss": 0.0128,
      "step": 1879600
    },
    {
      "epoch": 3.0760393550794367,
      "grad_norm": 0.2480117231607437,
      "learning_rate": 3.873276094963859e-06,
      "loss": 0.0075,
      "step": 1879620
    },
    {
      "epoch": 3.0760720855180903,
      "grad_norm": 0.32417023181915283,
      "learning_rate": 3.873210202750342e-06,
      "loss": 0.0115,
      "step": 1879640
    },
    {
      "epoch": 3.0761048159567435,
      "grad_norm": 0.17266449332237244,
      "learning_rate": 3.873144310536824e-06,
      "loss": 0.0122,
      "step": 1879660
    },
    {
      "epoch": 3.0761375463953966,
      "grad_norm": 0.12054967880249023,
      "learning_rate": 3.873078418323307e-06,
      "loss": 0.0093,
      "step": 1879680
    },
    {
      "epoch": 3.07617027683405,
      "grad_norm": 0.6157376170158386,
      "learning_rate": 3.87301252610979e-06,
      "loss": 0.0118,
      "step": 1879700
    },
    {
      "epoch": 3.0762030072727033,
      "grad_norm": 0.9666551947593689,
      "learning_rate": 3.8729466338962725e-06,
      "loss": 0.0154,
      "step": 1879720
    },
    {
      "epoch": 3.076235737711357,
      "grad_norm": 1.3151332139968872,
      "learning_rate": 3.872880741682756e-06,
      "loss": 0.0114,
      "step": 1879740
    },
    {
      "epoch": 3.07626846815001,
      "grad_norm": 0.15687164664268494,
      "learning_rate": 3.872814849469239e-06,
      "loss": 0.0094,
      "step": 1879760
    },
    {
      "epoch": 3.0763011985886637,
      "grad_norm": 0.2573307752609253,
      "learning_rate": 3.872748957255722e-06,
      "loss": 0.0137,
      "step": 1879780
    },
    {
      "epoch": 3.076333929027317,
      "grad_norm": 0.21694891154766083,
      "learning_rate": 3.872683065042204e-06,
      "loss": 0.0107,
      "step": 1879800
    },
    {
      "epoch": 3.07636665946597,
      "grad_norm": 0.900101900100708,
      "learning_rate": 3.872617172828687e-06,
      "loss": 0.0115,
      "step": 1879820
    },
    {
      "epoch": 3.0763993899046236,
      "grad_norm": 0.4239347577095032,
      "learning_rate": 3.87255128061517e-06,
      "loss": 0.0153,
      "step": 1879840
    },
    {
      "epoch": 3.0764321203432767,
      "grad_norm": 0.23739014565944672,
      "learning_rate": 3.8724853884016526e-06,
      "loss": 0.0115,
      "step": 1879860
    },
    {
      "epoch": 3.0764648507819303,
      "grad_norm": 0.6524049639701843,
      "learning_rate": 3.872419496188135e-06,
      "loss": 0.0128,
      "step": 1879880
    },
    {
      "epoch": 3.0764975812205835,
      "grad_norm": 0.8201763033866882,
      "learning_rate": 3.872353603974618e-06,
      "loss": 0.0086,
      "step": 1879900
    },
    {
      "epoch": 3.076530311659237,
      "grad_norm": 0.12312474101781845,
      "learning_rate": 3.872287711761102e-06,
      "loss": 0.0091,
      "step": 1879920
    },
    {
      "epoch": 3.07656304209789,
      "grad_norm": 0.2385278344154358,
      "learning_rate": 3.872221819547584e-06,
      "loss": 0.0106,
      "step": 1879940
    },
    {
      "epoch": 3.0765957725365434,
      "grad_norm": 0.5367743968963623,
      "learning_rate": 3.872155927334067e-06,
      "loss": 0.0132,
      "step": 1879960
    },
    {
      "epoch": 3.076628502975197,
      "grad_norm": 0.15602514147758484,
      "learning_rate": 3.872090035120551e-06,
      "loss": 0.0133,
      "step": 1879980
    },
    {
      "epoch": 3.07666123341385,
      "grad_norm": 0.2907622754573822,
      "learning_rate": 3.8720241429070334e-06,
      "loss": 0.0134,
      "step": 1880000
    },
    {
      "epoch": 3.0766939638525037,
      "grad_norm": 0.14678220450878143,
      "learning_rate": 3.871958250693516e-06,
      "loss": 0.0097,
      "step": 1880020
    },
    {
      "epoch": 3.076726694291157,
      "grad_norm": 0.21723519265651703,
      "learning_rate": 3.871892358479999e-06,
      "loss": 0.0114,
      "step": 1880040
    },
    {
      "epoch": 3.0767594247298105,
      "grad_norm": 0.5741099715232849,
      "learning_rate": 3.871826466266482e-06,
      "loss": 0.014,
      "step": 1880060
    },
    {
      "epoch": 3.0767921551684636,
      "grad_norm": 0.21484975516796112,
      "learning_rate": 3.871760574052964e-06,
      "loss": 0.0082,
      "step": 1880080
    },
    {
      "epoch": 3.0768248856071168,
      "grad_norm": 0.42621198296546936,
      "learning_rate": 3.871694681839447e-06,
      "loss": 0.0122,
      "step": 1880100
    },
    {
      "epoch": 3.0768576160457703,
      "grad_norm": 0.08077206462621689,
      "learning_rate": 3.87162878962593e-06,
      "loss": 0.0111,
      "step": 1880120
    },
    {
      "epoch": 3.0768903464844235,
      "grad_norm": 0.4419668912887573,
      "learning_rate": 3.8715628974124135e-06,
      "loss": 0.0088,
      "step": 1880140
    },
    {
      "epoch": 3.076923076923077,
      "grad_norm": 0.37982508540153503,
      "learning_rate": 3.871497005198896e-06,
      "loss": 0.0077,
      "step": 1880160
    },
    {
      "epoch": 3.0769558073617302,
      "grad_norm": 0.2999485433101654,
      "learning_rate": 3.871431112985379e-06,
      "loss": 0.0128,
      "step": 1880180
    },
    {
      "epoch": 3.0769885378003834,
      "grad_norm": 0.49159136414527893,
      "learning_rate": 3.871365220771862e-06,
      "loss": 0.013,
      "step": 1880200
    },
    {
      "epoch": 3.077021268239037,
      "grad_norm": 0.2272675633430481,
      "learning_rate": 3.871299328558344e-06,
      "loss": 0.0091,
      "step": 1880220
    },
    {
      "epoch": 3.07705399867769,
      "grad_norm": 0.20204097032546997,
      "learning_rate": 3.871233436344827e-06,
      "loss": 0.0148,
      "step": 1880240
    },
    {
      "epoch": 3.0770867291163437,
      "grad_norm": 0.4060905873775482,
      "learning_rate": 3.87116754413131e-06,
      "loss": 0.0136,
      "step": 1880260
    },
    {
      "epoch": 3.077119459554997,
      "grad_norm": 0.4034789800643921,
      "learning_rate": 3.871101651917793e-06,
      "loss": 0.016,
      "step": 1880280
    },
    {
      "epoch": 3.0771521899936505,
      "grad_norm": 0.09056400507688522,
      "learning_rate": 3.871035759704276e-06,
      "loss": 0.0121,
      "step": 1880300
    },
    {
      "epoch": 3.0771849204323036,
      "grad_norm": 0.12214955687522888,
      "learning_rate": 3.870969867490759e-06,
      "loss": 0.0067,
      "step": 1880320
    },
    {
      "epoch": 3.0772176508709568,
      "grad_norm": 0.2524060606956482,
      "learning_rate": 3.870903975277242e-06,
      "loss": 0.0142,
      "step": 1880340
    },
    {
      "epoch": 3.0772503813096104,
      "grad_norm": 0.3894587755203247,
      "learning_rate": 3.870838083063725e-06,
      "loss": 0.0127,
      "step": 1880360
    },
    {
      "epoch": 3.0772831117482635,
      "grad_norm": 0.3193740248680115,
      "learning_rate": 3.870772190850208e-06,
      "loss": 0.0092,
      "step": 1880380
    },
    {
      "epoch": 3.077315842186917,
      "grad_norm": 0.636438250541687,
      "learning_rate": 3.870706298636691e-06,
      "loss": 0.0105,
      "step": 1880400
    },
    {
      "epoch": 3.0773485726255703,
      "grad_norm": 0.40907740592956543,
      "learning_rate": 3.8706404064231735e-06,
      "loss": 0.0152,
      "step": 1880420
    },
    {
      "epoch": 3.077381303064224,
      "grad_norm": 0.20709435641765594,
      "learning_rate": 3.870574514209656e-06,
      "loss": 0.0084,
      "step": 1880440
    },
    {
      "epoch": 3.077414033502877,
      "grad_norm": 0.4576124846935272,
      "learning_rate": 3.870508621996139e-06,
      "loss": 0.0107,
      "step": 1880460
    },
    {
      "epoch": 3.07744676394153,
      "grad_norm": 0.24617941677570343,
      "learning_rate": 3.870442729782622e-06,
      "loss": 0.0074,
      "step": 1880480
    },
    {
      "epoch": 3.0774794943801838,
      "grad_norm": 0.34608253836631775,
      "learning_rate": 3.8703768375691045e-06,
      "loss": 0.012,
      "step": 1880500
    },
    {
      "epoch": 3.077512224818837,
      "grad_norm": 0.14892542362213135,
      "learning_rate": 3.870310945355587e-06,
      "loss": 0.0096,
      "step": 1880520
    },
    {
      "epoch": 3.0775449552574905,
      "grad_norm": 0.1428593397140503,
      "learning_rate": 3.870245053142071e-06,
      "loss": 0.0076,
      "step": 1880540
    },
    {
      "epoch": 3.0775776856961437,
      "grad_norm": 0.3231910765171051,
      "learning_rate": 3.8701791609285535e-06,
      "loss": 0.0144,
      "step": 1880560
    },
    {
      "epoch": 3.0776104161347972,
      "grad_norm": 0.14830808341503143,
      "learning_rate": 3.870113268715036e-06,
      "loss": 0.0096,
      "step": 1880580
    },
    {
      "epoch": 3.0776431465734504,
      "grad_norm": 0.25922298431396484,
      "learning_rate": 3.870047376501519e-06,
      "loss": 0.0093,
      "step": 1880600
    },
    {
      "epoch": 3.0776758770121035,
      "grad_norm": 0.07016805559396744,
      "learning_rate": 3.869981484288002e-06,
      "loss": 0.0115,
      "step": 1880620
    },
    {
      "epoch": 3.077708607450757,
      "grad_norm": 0.2469155639410019,
      "learning_rate": 3.8699155920744845e-06,
      "loss": 0.0143,
      "step": 1880640
    },
    {
      "epoch": 3.0777413378894103,
      "grad_norm": 0.18957939743995667,
      "learning_rate": 3.869849699860967e-06,
      "loss": 0.0096,
      "step": 1880660
    },
    {
      "epoch": 3.077774068328064,
      "grad_norm": 0.28981906175613403,
      "learning_rate": 3.869783807647451e-06,
      "loss": 0.0068,
      "step": 1880680
    },
    {
      "epoch": 3.077806798766717,
      "grad_norm": 0.1295921951532364,
      "learning_rate": 3.8697179154339336e-06,
      "loss": 0.0107,
      "step": 1880700
    },
    {
      "epoch": 3.07783952920537,
      "grad_norm": 0.10472959280014038,
      "learning_rate": 3.869652023220416e-06,
      "loss": 0.0111,
      "step": 1880720
    },
    {
      "epoch": 3.077872259644024,
      "grad_norm": 0.18119138479232788,
      "learning_rate": 3.869586131006899e-06,
      "loss": 0.0148,
      "step": 1880740
    },
    {
      "epoch": 3.077904990082677,
      "grad_norm": 0.36589181423187256,
      "learning_rate": 3.869520238793383e-06,
      "loss": 0.0087,
      "step": 1880760
    },
    {
      "epoch": 3.0779377205213305,
      "grad_norm": 0.1023152768611908,
      "learning_rate": 3.869454346579865e-06,
      "loss": 0.0096,
      "step": 1880780
    },
    {
      "epoch": 3.0779704509599837,
      "grad_norm": 0.21552209556102753,
      "learning_rate": 3.869388454366348e-06,
      "loss": 0.017,
      "step": 1880800
    },
    {
      "epoch": 3.0780031813986373,
      "grad_norm": 0.28035861253738403,
      "learning_rate": 3.869322562152831e-06,
      "loss": 0.0112,
      "step": 1880820
    },
    {
      "epoch": 3.0780359118372904,
      "grad_norm": 0.18594513833522797,
      "learning_rate": 3.869256669939314e-06,
      "loss": 0.0096,
      "step": 1880840
    },
    {
      "epoch": 3.0780686422759436,
      "grad_norm": 0.28547757863998413,
      "learning_rate": 3.869190777725796e-06,
      "loss": 0.0108,
      "step": 1880860
    },
    {
      "epoch": 3.078101372714597,
      "grad_norm": 0.05172139033675194,
      "learning_rate": 3.869124885512279e-06,
      "loss": 0.0103,
      "step": 1880880
    },
    {
      "epoch": 3.0781341031532503,
      "grad_norm": 1.0046203136444092,
      "learning_rate": 3.869058993298762e-06,
      "loss": 0.0153,
      "step": 1880900
    },
    {
      "epoch": 3.078166833591904,
      "grad_norm": 0.08172410726547241,
      "learning_rate": 3.8689931010852445e-06,
      "loss": 0.0164,
      "step": 1880920
    },
    {
      "epoch": 3.078199564030557,
      "grad_norm": 0.14994846284389496,
      "learning_rate": 3.868927208871728e-06,
      "loss": 0.0092,
      "step": 1880940
    },
    {
      "epoch": 3.0782322944692107,
      "grad_norm": 0.18922698497772217,
      "learning_rate": 3.868861316658211e-06,
      "loss": 0.0132,
      "step": 1880960
    },
    {
      "epoch": 3.078265024907864,
      "grad_norm": 0.46020829677581787,
      "learning_rate": 3.868795424444694e-06,
      "loss": 0.0096,
      "step": 1880980
    },
    {
      "epoch": 3.078297755346517,
      "grad_norm": 0.11614980548620224,
      "learning_rate": 3.868729532231176e-06,
      "loss": 0.0077,
      "step": 1881000
    },
    {
      "epoch": 3.0783304857851705,
      "grad_norm": 0.1406201422214508,
      "learning_rate": 3.86866364001766e-06,
      "loss": 0.0091,
      "step": 1881020
    },
    {
      "epoch": 3.0783632162238237,
      "grad_norm": 0.3989434540271759,
      "learning_rate": 3.868597747804143e-06,
      "loss": 0.0159,
      "step": 1881040
    },
    {
      "epoch": 3.0783959466624773,
      "grad_norm": 0.4109811782836914,
      "learning_rate": 3.868531855590625e-06,
      "loss": 0.0148,
      "step": 1881060
    },
    {
      "epoch": 3.0784286771011304,
      "grad_norm": 0.46518316864967346,
      "learning_rate": 3.868465963377108e-06,
      "loss": 0.0115,
      "step": 1881080
    },
    {
      "epoch": 3.078461407539784,
      "grad_norm": 0.32816651463508606,
      "learning_rate": 3.868400071163591e-06,
      "loss": 0.009,
      "step": 1881100
    },
    {
      "epoch": 3.078494137978437,
      "grad_norm": 0.4481279253959656,
      "learning_rate": 3.868334178950074e-06,
      "loss": 0.0135,
      "step": 1881120
    },
    {
      "epoch": 3.0785268684170903,
      "grad_norm": 0.684252917766571,
      "learning_rate": 3.868268286736556e-06,
      "loss": 0.0124,
      "step": 1881140
    },
    {
      "epoch": 3.078559598855744,
      "grad_norm": 0.33107882738113403,
      "learning_rate": 3.86820239452304e-06,
      "loss": 0.0094,
      "step": 1881160
    },
    {
      "epoch": 3.078592329294397,
      "grad_norm": 0.33336499333381653,
      "learning_rate": 3.868136502309523e-06,
      "loss": 0.0128,
      "step": 1881180
    },
    {
      "epoch": 3.0786250597330507,
      "grad_norm": 0.26538899540901184,
      "learning_rate": 3.8680706100960054e-06,
      "loss": 0.0134,
      "step": 1881200
    },
    {
      "epoch": 3.078657790171704,
      "grad_norm": 0.1060481071472168,
      "learning_rate": 3.868004717882488e-06,
      "loss": 0.0075,
      "step": 1881220
    },
    {
      "epoch": 3.0786905206103574,
      "grad_norm": 0.025644993409514427,
      "learning_rate": 3.867938825668971e-06,
      "loss": 0.0081,
      "step": 1881240
    },
    {
      "epoch": 3.0787232510490106,
      "grad_norm": 0.41542476415634155,
      "learning_rate": 3.867872933455454e-06,
      "loss": 0.0079,
      "step": 1881260
    },
    {
      "epoch": 3.0787559814876637,
      "grad_norm": 0.18948954343795776,
      "learning_rate": 3.867807041241936e-06,
      "loss": 0.0124,
      "step": 1881280
    },
    {
      "epoch": 3.0787887119263173,
      "grad_norm": 0.5833313465118408,
      "learning_rate": 3.867741149028419e-06,
      "loss": 0.0171,
      "step": 1881300
    },
    {
      "epoch": 3.0788214423649705,
      "grad_norm": 0.5451391935348511,
      "learning_rate": 3.867675256814902e-06,
      "loss": 0.011,
      "step": 1881320
    },
    {
      "epoch": 3.078854172803624,
      "grad_norm": 0.45675036311149597,
      "learning_rate": 3.8676093646013855e-06,
      "loss": 0.0139,
      "step": 1881340
    },
    {
      "epoch": 3.078886903242277,
      "grad_norm": 0.23777860403060913,
      "learning_rate": 3.867543472387868e-06,
      "loss": 0.017,
      "step": 1881360
    },
    {
      "epoch": 3.078919633680931,
      "grad_norm": 0.6580966114997864,
      "learning_rate": 3.867477580174352e-06,
      "loss": 0.0108,
      "step": 1881380
    },
    {
      "epoch": 3.078952364119584,
      "grad_norm": 0.3855418264865875,
      "learning_rate": 3.8674116879608345e-06,
      "loss": 0.0122,
      "step": 1881400
    },
    {
      "epoch": 3.078985094558237,
      "grad_norm": 0.2098693698644638,
      "learning_rate": 3.867345795747317e-06,
      "loss": 0.0151,
      "step": 1881420
    },
    {
      "epoch": 3.0790178249968907,
      "grad_norm": 0.5378009080886841,
      "learning_rate": 3.8672799035338e-06,
      "loss": 0.0147,
      "step": 1881440
    },
    {
      "epoch": 3.079050555435544,
      "grad_norm": 0.16788527369499207,
      "learning_rate": 3.867214011320283e-06,
      "loss": 0.0086,
      "step": 1881460
    },
    {
      "epoch": 3.0790832858741974,
      "grad_norm": 0.5402773022651672,
      "learning_rate": 3.8671481191067655e-06,
      "loss": 0.0116,
      "step": 1881480
    },
    {
      "epoch": 3.0791160163128506,
      "grad_norm": 0.6723416447639465,
      "learning_rate": 3.867082226893248e-06,
      "loss": 0.0118,
      "step": 1881500
    },
    {
      "epoch": 3.079148746751504,
      "grad_norm": 0.18227076530456543,
      "learning_rate": 3.867016334679731e-06,
      "loss": 0.0076,
      "step": 1881520
    },
    {
      "epoch": 3.0791814771901573,
      "grad_norm": 0.1982109397649765,
      "learning_rate": 3.866950442466214e-06,
      "loss": 0.0152,
      "step": 1881540
    },
    {
      "epoch": 3.0792142076288105,
      "grad_norm": 0.09221380949020386,
      "learning_rate": 3.866884550252697e-06,
      "loss": 0.0098,
      "step": 1881560
    },
    {
      "epoch": 3.079246938067464,
      "grad_norm": 0.38081738352775574,
      "learning_rate": 3.86681865803918e-06,
      "loss": 0.0122,
      "step": 1881580
    },
    {
      "epoch": 3.0792796685061172,
      "grad_norm": 0.5043469071388245,
      "learning_rate": 3.866752765825663e-06,
      "loss": 0.0109,
      "step": 1881600
    },
    {
      "epoch": 3.079312398944771,
      "grad_norm": 0.08867242187261581,
      "learning_rate": 3.8666868736121455e-06,
      "loss": 0.0113,
      "step": 1881620
    },
    {
      "epoch": 3.079345129383424,
      "grad_norm": 0.10065200924873352,
      "learning_rate": 3.866620981398628e-06,
      "loss": 0.0147,
      "step": 1881640
    },
    {
      "epoch": 3.079377859822077,
      "grad_norm": 0.15977272391319275,
      "learning_rate": 3.866555089185111e-06,
      "loss": 0.0124,
      "step": 1881660
    },
    {
      "epoch": 3.0794105902607307,
      "grad_norm": 0.28720521926879883,
      "learning_rate": 3.866489196971594e-06,
      "loss": 0.0106,
      "step": 1881680
    },
    {
      "epoch": 3.079443320699384,
      "grad_norm": 0.30247634649276733,
      "learning_rate": 3.8664233047580765e-06,
      "loss": 0.0134,
      "step": 1881700
    },
    {
      "epoch": 3.0794760511380375,
      "grad_norm": 0.5907198786735535,
      "learning_rate": 3.86635741254456e-06,
      "loss": 0.0159,
      "step": 1881720
    },
    {
      "epoch": 3.0795087815766906,
      "grad_norm": 0.43401816487312317,
      "learning_rate": 3.866291520331043e-06,
      "loss": 0.0123,
      "step": 1881740
    },
    {
      "epoch": 3.079541512015344,
      "grad_norm": 0.15362733602523804,
      "learning_rate": 3.8662256281175255e-06,
      "loss": 0.0134,
      "step": 1881760
    },
    {
      "epoch": 3.0795742424539974,
      "grad_norm": 0.3832054138183594,
      "learning_rate": 3.866159735904009e-06,
      "loss": 0.0153,
      "step": 1881780
    },
    {
      "epoch": 3.0796069728926505,
      "grad_norm": 0.3307208716869354,
      "learning_rate": 3.866093843690492e-06,
      "loss": 0.0137,
      "step": 1881800
    },
    {
      "epoch": 3.079639703331304,
      "grad_norm": 0.23745736479759216,
      "learning_rate": 3.866027951476975e-06,
      "loss": 0.0082,
      "step": 1881820
    },
    {
      "epoch": 3.0796724337699573,
      "grad_norm": 0.5012588500976562,
      "learning_rate": 3.865962059263457e-06,
      "loss": 0.0149,
      "step": 1881840
    },
    {
      "epoch": 3.079705164208611,
      "grad_norm": 0.23429574072360992,
      "learning_rate": 3.86589616704994e-06,
      "loss": 0.0133,
      "step": 1881860
    },
    {
      "epoch": 3.079737894647264,
      "grad_norm": 0.17261365056037903,
      "learning_rate": 3.865830274836423e-06,
      "loss": 0.0092,
      "step": 1881880
    },
    {
      "epoch": 3.0797706250859176,
      "grad_norm": 0.28080686926841736,
      "learning_rate": 3.8657643826229056e-06,
      "loss": 0.0139,
      "step": 1881900
    },
    {
      "epoch": 3.0798033555245707,
      "grad_norm": 0.3350066840648651,
      "learning_rate": 3.865698490409388e-06,
      "loss": 0.0085,
      "step": 1881920
    },
    {
      "epoch": 3.079836085963224,
      "grad_norm": 0.1970585137605667,
      "learning_rate": 3.865632598195871e-06,
      "loss": 0.0112,
      "step": 1881940
    },
    {
      "epoch": 3.0798688164018775,
      "grad_norm": 0.17177115380764008,
      "learning_rate": 3.865566705982355e-06,
      "loss": 0.0178,
      "step": 1881960
    },
    {
      "epoch": 3.0799015468405306,
      "grad_norm": 0.0608409084379673,
      "learning_rate": 3.865500813768837e-06,
      "loss": 0.0093,
      "step": 1881980
    },
    {
      "epoch": 3.0799342772791842,
      "grad_norm": 0.46165934205055237,
      "learning_rate": 3.86543492155532e-06,
      "loss": 0.0135,
      "step": 1882000
    },
    {
      "epoch": 3.0799670077178374,
      "grad_norm": 0.2337338924407959,
      "learning_rate": 3.865369029341803e-06,
      "loss": 0.0107,
      "step": 1882020
    },
    {
      "epoch": 3.079999738156491,
      "grad_norm": 0.10752031207084656,
      "learning_rate": 3.865303137128286e-06,
      "loss": 0.0084,
      "step": 1882040
    },
    {
      "epoch": 3.080032468595144,
      "grad_norm": 0.4838516414165497,
      "learning_rate": 3.865237244914768e-06,
      "loss": 0.0174,
      "step": 1882060
    },
    {
      "epoch": 3.0800651990337973,
      "grad_norm": 0.14506873488426208,
      "learning_rate": 3.865171352701252e-06,
      "loss": 0.0186,
      "step": 1882080
    },
    {
      "epoch": 3.080097929472451,
      "grad_norm": 0.25408312678337097,
      "learning_rate": 3.865105460487735e-06,
      "loss": 0.0102,
      "step": 1882100
    },
    {
      "epoch": 3.080130659911104,
      "grad_norm": 0.45179077982902527,
      "learning_rate": 3.865039568274217e-06,
      "loss": 0.0151,
      "step": 1882120
    },
    {
      "epoch": 3.0801633903497576,
      "grad_norm": 0.15103381872177124,
      "learning_rate": 3.8649736760607e-06,
      "loss": 0.0112,
      "step": 1882140
    },
    {
      "epoch": 3.0801961207884108,
      "grad_norm": 0.2660459578037262,
      "learning_rate": 3.864907783847183e-06,
      "loss": 0.0102,
      "step": 1882160
    },
    {
      "epoch": 3.0802288512270644,
      "grad_norm": 0.33891773223876953,
      "learning_rate": 3.8648418916336665e-06,
      "loss": 0.0091,
      "step": 1882180
    },
    {
      "epoch": 3.0802615816657175,
      "grad_norm": 0.4057445228099823,
      "learning_rate": 3.864775999420149e-06,
      "loss": 0.0099,
      "step": 1882200
    },
    {
      "epoch": 3.0802943121043707,
      "grad_norm": 0.7924843430519104,
      "learning_rate": 3.864710107206632e-06,
      "loss": 0.0092,
      "step": 1882220
    },
    {
      "epoch": 3.0803270425430243,
      "grad_norm": 0.3241218626499176,
      "learning_rate": 3.864644214993115e-06,
      "loss": 0.0119,
      "step": 1882240
    },
    {
      "epoch": 3.0803597729816774,
      "grad_norm": 0.6574310064315796,
      "learning_rate": 3.8645783227795974e-06,
      "loss": 0.0074,
      "step": 1882260
    },
    {
      "epoch": 3.080392503420331,
      "grad_norm": 0.23985274136066437,
      "learning_rate": 3.86451243056608e-06,
      "loss": 0.0101,
      "step": 1882280
    },
    {
      "epoch": 3.080425233858984,
      "grad_norm": 1.183803915977478,
      "learning_rate": 3.864446538352563e-06,
      "loss": 0.0151,
      "step": 1882300
    },
    {
      "epoch": 3.0804579642976373,
      "grad_norm": 0.3009848892688751,
      "learning_rate": 3.864380646139046e-06,
      "loss": 0.0088,
      "step": 1882320
    },
    {
      "epoch": 3.080490694736291,
      "grad_norm": 0.15912923216819763,
      "learning_rate": 3.864314753925528e-06,
      "loss": 0.0122,
      "step": 1882340
    },
    {
      "epoch": 3.080523425174944,
      "grad_norm": 0.1359604001045227,
      "learning_rate": 3.864248861712012e-06,
      "loss": 0.008,
      "step": 1882360
    },
    {
      "epoch": 3.0805561556135976,
      "grad_norm": 0.49080267548561096,
      "learning_rate": 3.864182969498495e-06,
      "loss": 0.0102,
      "step": 1882380
    },
    {
      "epoch": 3.080588886052251,
      "grad_norm": 0.5771922469139099,
      "learning_rate": 3.8641170772849775e-06,
      "loss": 0.0142,
      "step": 1882400
    },
    {
      "epoch": 3.0806216164909044,
      "grad_norm": 0.17077019810676575,
      "learning_rate": 3.86405118507146e-06,
      "loss": 0.0116,
      "step": 1882420
    },
    {
      "epoch": 3.0806543469295575,
      "grad_norm": 0.35975557565689087,
      "learning_rate": 3.863985292857944e-06,
      "loss": 0.0117,
      "step": 1882440
    },
    {
      "epoch": 3.0806870773682107,
      "grad_norm": 0.8635715842247009,
      "learning_rate": 3.8639194006444265e-06,
      "loss": 0.0082,
      "step": 1882460
    },
    {
      "epoch": 3.0807198078068643,
      "grad_norm": 0.29624277353286743,
      "learning_rate": 3.863853508430909e-06,
      "loss": 0.0079,
      "step": 1882480
    },
    {
      "epoch": 3.0807525382455174,
      "grad_norm": 0.2323143035173416,
      "learning_rate": 3.863787616217392e-06,
      "loss": 0.0125,
      "step": 1882500
    },
    {
      "epoch": 3.080785268684171,
      "grad_norm": 0.2586124539375305,
      "learning_rate": 3.863721724003875e-06,
      "loss": 0.008,
      "step": 1882520
    },
    {
      "epoch": 3.080817999122824,
      "grad_norm": 0.0955498069524765,
      "learning_rate": 3.8636558317903575e-06,
      "loss": 0.01,
      "step": 1882540
    },
    {
      "epoch": 3.0808507295614778,
      "grad_norm": 0.36392661929130554,
      "learning_rate": 3.86358993957684e-06,
      "loss": 0.0122,
      "step": 1882560
    },
    {
      "epoch": 3.080883460000131,
      "grad_norm": 0.12211442738771439,
      "learning_rate": 3.863524047363324e-06,
      "loss": 0.0122,
      "step": 1882580
    },
    {
      "epoch": 3.080916190438784,
      "grad_norm": 0.17846092581748962,
      "learning_rate": 3.8634581551498065e-06,
      "loss": 0.009,
      "step": 1882600
    },
    {
      "epoch": 3.0809489208774377,
      "grad_norm": 0.061044663190841675,
      "learning_rate": 3.863392262936289e-06,
      "loss": 0.0072,
      "step": 1882620
    },
    {
      "epoch": 3.080981651316091,
      "grad_norm": 0.9719878435134888,
      "learning_rate": 3.863326370722772e-06,
      "loss": 0.0115,
      "step": 1882640
    },
    {
      "epoch": 3.0810143817547444,
      "grad_norm": 0.6136903166770935,
      "learning_rate": 3.863260478509255e-06,
      "loss": 0.0119,
      "step": 1882660
    },
    {
      "epoch": 3.0810471121933976,
      "grad_norm": 0.20016515254974365,
      "learning_rate": 3.8631945862957375e-06,
      "loss": 0.0118,
      "step": 1882680
    },
    {
      "epoch": 3.081079842632051,
      "grad_norm": 0.35991111397743225,
      "learning_rate": 3.86312869408222e-06,
      "loss": 0.0129,
      "step": 1882700
    },
    {
      "epoch": 3.0811125730707043,
      "grad_norm": 0.44164153933525085,
      "learning_rate": 3.863062801868703e-06,
      "loss": 0.0105,
      "step": 1882720
    },
    {
      "epoch": 3.0811453035093574,
      "grad_norm": 0.34324222803115845,
      "learning_rate": 3.8629969096551866e-06,
      "loss": 0.0099,
      "step": 1882740
    },
    {
      "epoch": 3.081178033948011,
      "grad_norm": 0.4688011407852173,
      "learning_rate": 3.862931017441669e-06,
      "loss": 0.012,
      "step": 1882760
    },
    {
      "epoch": 3.081210764386664,
      "grad_norm": 0.25707873702049255,
      "learning_rate": 3.862865125228152e-06,
      "loss": 0.0145,
      "step": 1882780
    },
    {
      "epoch": 3.081243494825318,
      "grad_norm": 0.47605860233306885,
      "learning_rate": 3.862799233014636e-06,
      "loss": 0.0065,
      "step": 1882800
    },
    {
      "epoch": 3.081276225263971,
      "grad_norm": 0.25814464688301086,
      "learning_rate": 3.862733340801118e-06,
      "loss": 0.0137,
      "step": 1882820
    },
    {
      "epoch": 3.0813089557026245,
      "grad_norm": 0.2532672882080078,
      "learning_rate": 3.862667448587601e-06,
      "loss": 0.0139,
      "step": 1882840
    },
    {
      "epoch": 3.0813416861412777,
      "grad_norm": 0.13278838992118835,
      "learning_rate": 3.862601556374084e-06,
      "loss": 0.0086,
      "step": 1882860
    },
    {
      "epoch": 3.081374416579931,
      "grad_norm": 0.6164872646331787,
      "learning_rate": 3.862535664160567e-06,
      "loss": 0.015,
      "step": 1882880
    },
    {
      "epoch": 3.0814071470185844,
      "grad_norm": 0.8190088868141174,
      "learning_rate": 3.862469771947049e-06,
      "loss": 0.0159,
      "step": 1882900
    },
    {
      "epoch": 3.0814398774572376,
      "grad_norm": 0.2404295653104782,
      "learning_rate": 3.862403879733532e-06,
      "loss": 0.0117,
      "step": 1882920
    },
    {
      "epoch": 3.081472607895891,
      "grad_norm": 0.33936095237731934,
      "learning_rate": 3.862337987520015e-06,
      "loss": 0.0226,
      "step": 1882940
    },
    {
      "epoch": 3.0815053383345443,
      "grad_norm": 0.2234765887260437,
      "learning_rate": 3.8622720953064976e-06,
      "loss": 0.0085,
      "step": 1882960
    },
    {
      "epoch": 3.081538068773198,
      "grad_norm": 0.26491424441337585,
      "learning_rate": 3.862206203092981e-06,
      "loss": 0.0117,
      "step": 1882980
    },
    {
      "epoch": 3.081570799211851,
      "grad_norm": 0.2921586036682129,
      "learning_rate": 3.862140310879464e-06,
      "loss": 0.0112,
      "step": 1883000
    },
    {
      "epoch": 3.081603529650504,
      "grad_norm": 0.5659013390541077,
      "learning_rate": 3.862074418665947e-06,
      "loss": 0.0093,
      "step": 1883020
    },
    {
      "epoch": 3.081636260089158,
      "grad_norm": 0.649377703666687,
      "learning_rate": 3.862008526452429e-06,
      "loss": 0.0111,
      "step": 1883040
    },
    {
      "epoch": 3.081668990527811,
      "grad_norm": 0.04524584114551544,
      "learning_rate": 3.861942634238912e-06,
      "loss": 0.0086,
      "step": 1883060
    },
    {
      "epoch": 3.0817017209664646,
      "grad_norm": 0.2231912910938263,
      "learning_rate": 3.861876742025395e-06,
      "loss": 0.0109,
      "step": 1883080
    },
    {
      "epoch": 3.0817344514051177,
      "grad_norm": 0.13718117773532867,
      "learning_rate": 3.861810849811878e-06,
      "loss": 0.0095,
      "step": 1883100
    },
    {
      "epoch": 3.0817671818437713,
      "grad_norm": 0.22003693878650665,
      "learning_rate": 3.86174495759836e-06,
      "loss": 0.0149,
      "step": 1883120
    },
    {
      "epoch": 3.0817999122824244,
      "grad_norm": 0.36214083433151245,
      "learning_rate": 3.861679065384844e-06,
      "loss": 0.0126,
      "step": 1883140
    },
    {
      "epoch": 3.0818326427210776,
      "grad_norm": 0.7278038859367371,
      "learning_rate": 3.861613173171327e-06,
      "loss": 0.0088,
      "step": 1883160
    },
    {
      "epoch": 3.081865373159731,
      "grad_norm": 0.13948200643062592,
      "learning_rate": 3.861547280957809e-06,
      "loss": 0.0105,
      "step": 1883180
    },
    {
      "epoch": 3.0818981035983843,
      "grad_norm": 0.30100017786026,
      "learning_rate": 3.861481388744293e-06,
      "loss": 0.0181,
      "step": 1883200
    },
    {
      "epoch": 3.081930834037038,
      "grad_norm": 0.17821498215198517,
      "learning_rate": 3.861415496530776e-06,
      "loss": 0.0125,
      "step": 1883220
    },
    {
      "epoch": 3.081963564475691,
      "grad_norm": 0.16385774314403534,
      "learning_rate": 3.8613496043172584e-06,
      "loss": 0.0151,
      "step": 1883240
    },
    {
      "epoch": 3.0819962949143442,
      "grad_norm": 0.6669268608093262,
      "learning_rate": 3.861283712103741e-06,
      "loss": 0.0133,
      "step": 1883260
    },
    {
      "epoch": 3.082029025352998,
      "grad_norm": 0.2308562994003296,
      "learning_rate": 3.861217819890224e-06,
      "loss": 0.0103,
      "step": 1883280
    },
    {
      "epoch": 3.082061755791651,
      "grad_norm": 0.3754725754261017,
      "learning_rate": 3.861151927676707e-06,
      "loss": 0.0118,
      "step": 1883300
    },
    {
      "epoch": 3.0820944862303046,
      "grad_norm": 0.6496874094009399,
      "learning_rate": 3.861086035463189e-06,
      "loss": 0.0125,
      "step": 1883320
    },
    {
      "epoch": 3.0821272166689577,
      "grad_norm": 0.19367340207099915,
      "learning_rate": 3.861020143249672e-06,
      "loss": 0.0128,
      "step": 1883340
    },
    {
      "epoch": 3.0821599471076113,
      "grad_norm": 0.49770447611808777,
      "learning_rate": 3.860954251036155e-06,
      "loss": 0.0143,
      "step": 1883360
    },
    {
      "epoch": 3.0821926775462645,
      "grad_norm": 0.12990012764930725,
      "learning_rate": 3.8608883588226385e-06,
      "loss": 0.0138,
      "step": 1883380
    },
    {
      "epoch": 3.0822254079849176,
      "grad_norm": 0.1052638441324234,
      "learning_rate": 3.860822466609121e-06,
      "loss": 0.0089,
      "step": 1883400
    },
    {
      "epoch": 3.082258138423571,
      "grad_norm": 0.4280073642730713,
      "learning_rate": 3.860756574395604e-06,
      "loss": 0.0194,
      "step": 1883420
    },
    {
      "epoch": 3.0822908688622244,
      "grad_norm": 0.5599350929260254,
      "learning_rate": 3.860690682182087e-06,
      "loss": 0.0116,
      "step": 1883440
    },
    {
      "epoch": 3.082323599300878,
      "grad_norm": 0.2657633423805237,
      "learning_rate": 3.8606247899685694e-06,
      "loss": 0.0137,
      "step": 1883460
    },
    {
      "epoch": 3.082356329739531,
      "grad_norm": 0.11219105124473572,
      "learning_rate": 3.860558897755052e-06,
      "loss": 0.0081,
      "step": 1883480
    },
    {
      "epoch": 3.0823890601781847,
      "grad_norm": 0.46289700269699097,
      "learning_rate": 3.860493005541536e-06,
      "loss": 0.0132,
      "step": 1883500
    },
    {
      "epoch": 3.082421790616838,
      "grad_norm": 0.09202098101377487,
      "learning_rate": 3.8604271133280185e-06,
      "loss": 0.0134,
      "step": 1883520
    },
    {
      "epoch": 3.082454521055491,
      "grad_norm": 0.7094011902809143,
      "learning_rate": 3.860361221114501e-06,
      "loss": 0.0164,
      "step": 1883540
    },
    {
      "epoch": 3.0824872514941446,
      "grad_norm": 0.34585440158843994,
      "learning_rate": 3.860295328900984e-06,
      "loss": 0.0132,
      "step": 1883560
    },
    {
      "epoch": 3.0825199819327977,
      "grad_norm": 0.1503131091594696,
      "learning_rate": 3.860229436687467e-06,
      "loss": 0.0085,
      "step": 1883580
    },
    {
      "epoch": 3.0825527123714513,
      "grad_norm": 0.3546605408191681,
      "learning_rate": 3.86016354447395e-06,
      "loss": 0.0129,
      "step": 1883600
    },
    {
      "epoch": 3.0825854428101045,
      "grad_norm": 0.13371118903160095,
      "learning_rate": 3.860097652260433e-06,
      "loss": 0.0097,
      "step": 1883620
    },
    {
      "epoch": 3.082618173248758,
      "grad_norm": 0.3784264326095581,
      "learning_rate": 3.860031760046916e-06,
      "loss": 0.0137,
      "step": 1883640
    },
    {
      "epoch": 3.0826509036874112,
      "grad_norm": 0.15475137531757355,
      "learning_rate": 3.8599658678333985e-06,
      "loss": 0.0098,
      "step": 1883660
    },
    {
      "epoch": 3.0826836341260644,
      "grad_norm": 0.12761825323104858,
      "learning_rate": 3.859899975619881e-06,
      "loss": 0.0137,
      "step": 1883680
    },
    {
      "epoch": 3.082716364564718,
      "grad_norm": 0.13734449446201324,
      "learning_rate": 3.859834083406364e-06,
      "loss": 0.0112,
      "step": 1883700
    },
    {
      "epoch": 3.082749095003371,
      "grad_norm": 0.3031270205974579,
      "learning_rate": 3.859768191192847e-06,
      "loss": 0.0119,
      "step": 1883720
    },
    {
      "epoch": 3.0827818254420247,
      "grad_norm": 0.1214623972773552,
      "learning_rate": 3.8597022989793295e-06,
      "loss": 0.0098,
      "step": 1883740
    },
    {
      "epoch": 3.082814555880678,
      "grad_norm": 0.563041090965271,
      "learning_rate": 3.859636406765812e-06,
      "loss": 0.0118,
      "step": 1883760
    },
    {
      "epoch": 3.082847286319331,
      "grad_norm": 0.3097325265407562,
      "learning_rate": 3.859570514552296e-06,
      "loss": 0.0077,
      "step": 1883780
    },
    {
      "epoch": 3.0828800167579846,
      "grad_norm": 0.1944737583398819,
      "learning_rate": 3.8595046223387786e-06,
      "loss": 0.013,
      "step": 1883800
    },
    {
      "epoch": 3.0829127471966378,
      "grad_norm": 0.3727406859397888,
      "learning_rate": 3.859438730125261e-06,
      "loss": 0.0076,
      "step": 1883820
    },
    {
      "epoch": 3.0829454776352914,
      "grad_norm": 1.8594932556152344,
      "learning_rate": 3.859372837911745e-06,
      "loss": 0.0201,
      "step": 1883840
    },
    {
      "epoch": 3.0829782080739445,
      "grad_norm": 0.8721657395362854,
      "learning_rate": 3.859306945698228e-06,
      "loss": 0.0117,
      "step": 1883860
    },
    {
      "epoch": 3.083010938512598,
      "grad_norm": 0.08006806671619415,
      "learning_rate": 3.85924105348471e-06,
      "loss": 0.0084,
      "step": 1883880
    },
    {
      "epoch": 3.0830436689512513,
      "grad_norm": 0.3596799075603485,
      "learning_rate": 3.859175161271193e-06,
      "loss": 0.0147,
      "step": 1883900
    },
    {
      "epoch": 3.0830763993899044,
      "grad_norm": 0.18529529869556427,
      "learning_rate": 3.859109269057676e-06,
      "loss": 0.0088,
      "step": 1883920
    },
    {
      "epoch": 3.083109129828558,
      "grad_norm": 0.11446981132030487,
      "learning_rate": 3.8590433768441586e-06,
      "loss": 0.0136,
      "step": 1883940
    },
    {
      "epoch": 3.083141860267211,
      "grad_norm": 0.2741067111492157,
      "learning_rate": 3.858977484630641e-06,
      "loss": 0.0149,
      "step": 1883960
    },
    {
      "epoch": 3.0831745907058647,
      "grad_norm": 0.16104088723659515,
      "learning_rate": 3.858911592417124e-06,
      "loss": 0.0149,
      "step": 1883980
    },
    {
      "epoch": 3.083207321144518,
      "grad_norm": 2.054137945175171,
      "learning_rate": 3.858845700203608e-06,
      "loss": 0.0114,
      "step": 1884000
    },
    {
      "epoch": 3.0832400515831715,
      "grad_norm": 0.08865976333618164,
      "learning_rate": 3.85877980799009e-06,
      "loss": 0.0102,
      "step": 1884020
    },
    {
      "epoch": 3.0832727820218246,
      "grad_norm": 0.5367445945739746,
      "learning_rate": 3.858713915776573e-06,
      "loss": 0.0085,
      "step": 1884040
    },
    {
      "epoch": 3.083305512460478,
      "grad_norm": 0.4431508481502533,
      "learning_rate": 3.858648023563056e-06,
      "loss": 0.013,
      "step": 1884060
    },
    {
      "epoch": 3.0833382428991314,
      "grad_norm": 0.06700079143047333,
      "learning_rate": 3.858582131349539e-06,
      "loss": 0.0114,
      "step": 1884080
    },
    {
      "epoch": 3.0833709733377845,
      "grad_norm": 0.14796869456768036,
      "learning_rate": 3.858516239136021e-06,
      "loss": 0.0104,
      "step": 1884100
    },
    {
      "epoch": 3.083403703776438,
      "grad_norm": 0.22315633296966553,
      "learning_rate": 3.858450346922504e-06,
      "loss": 0.0141,
      "step": 1884120
    },
    {
      "epoch": 3.0834364342150913,
      "grad_norm": 0.42993640899658203,
      "learning_rate": 3.858384454708987e-06,
      "loss": 0.0175,
      "step": 1884140
    },
    {
      "epoch": 3.083469164653745,
      "grad_norm": 0.4525611400604248,
      "learning_rate": 3.85831856249547e-06,
      "loss": 0.0101,
      "step": 1884160
    },
    {
      "epoch": 3.083501895092398,
      "grad_norm": 0.16845525801181793,
      "learning_rate": 3.858252670281953e-06,
      "loss": 0.0091,
      "step": 1884180
    },
    {
      "epoch": 3.083534625531051,
      "grad_norm": 0.25681397318840027,
      "learning_rate": 3.858186778068436e-06,
      "loss": 0.0126,
      "step": 1884200
    },
    {
      "epoch": 3.0835673559697048,
      "grad_norm": 0.5804116725921631,
      "learning_rate": 3.8581208858549195e-06,
      "loss": 0.0123,
      "step": 1884220
    },
    {
      "epoch": 3.083600086408358,
      "grad_norm": 0.22616122663021088,
      "learning_rate": 3.858054993641402e-06,
      "loss": 0.0108,
      "step": 1884240
    },
    {
      "epoch": 3.0836328168470115,
      "grad_norm": 0.3002389669418335,
      "learning_rate": 3.857989101427885e-06,
      "loss": 0.0083,
      "step": 1884260
    },
    {
      "epoch": 3.0836655472856647,
      "grad_norm": 0.4029420018196106,
      "learning_rate": 3.857923209214368e-06,
      "loss": 0.0102,
      "step": 1884280
    },
    {
      "epoch": 3.0836982777243183,
      "grad_norm": 0.1774177849292755,
      "learning_rate": 3.8578573170008504e-06,
      "loss": 0.0103,
      "step": 1884300
    },
    {
      "epoch": 3.0837310081629714,
      "grad_norm": 0.1320231556892395,
      "learning_rate": 3.857791424787333e-06,
      "loss": 0.0117,
      "step": 1884320
    },
    {
      "epoch": 3.0837637386016246,
      "grad_norm": 0.2497911900281906,
      "learning_rate": 3.857725532573816e-06,
      "loss": 0.0108,
      "step": 1884340
    },
    {
      "epoch": 3.083796469040278,
      "grad_norm": 0.31209784746170044,
      "learning_rate": 3.857659640360299e-06,
      "loss": 0.0145,
      "step": 1884360
    },
    {
      "epoch": 3.0838291994789313,
      "grad_norm": 0.5393385291099548,
      "learning_rate": 3.857593748146781e-06,
      "loss": 0.0088,
      "step": 1884380
    },
    {
      "epoch": 3.083861929917585,
      "grad_norm": 0.21201859414577484,
      "learning_rate": 3.857527855933265e-06,
      "loss": 0.0117,
      "step": 1884400
    },
    {
      "epoch": 3.083894660356238,
      "grad_norm": 0.12978225946426392,
      "learning_rate": 3.857461963719748e-06,
      "loss": 0.0093,
      "step": 1884420
    },
    {
      "epoch": 3.0839273907948916,
      "grad_norm": 0.2931744456291199,
      "learning_rate": 3.8573960715062305e-06,
      "loss": 0.0112,
      "step": 1884440
    },
    {
      "epoch": 3.083960121233545,
      "grad_norm": 0.21837572753429413,
      "learning_rate": 3.857330179292713e-06,
      "loss": 0.0118,
      "step": 1884460
    },
    {
      "epoch": 3.083992851672198,
      "grad_norm": 0.15478229522705078,
      "learning_rate": 3.857264287079196e-06,
      "loss": 0.0116,
      "step": 1884480
    },
    {
      "epoch": 3.0840255821108515,
      "grad_norm": 0.5010243058204651,
      "learning_rate": 3.857198394865679e-06,
      "loss": 0.0107,
      "step": 1884500
    },
    {
      "epoch": 3.0840583125495047,
      "grad_norm": 0.24150392413139343,
      "learning_rate": 3.857132502652161e-06,
      "loss": 0.0119,
      "step": 1884520
    },
    {
      "epoch": 3.0840910429881583,
      "grad_norm": 0.31211981177330017,
      "learning_rate": 3.857066610438645e-06,
      "loss": 0.011,
      "step": 1884540
    },
    {
      "epoch": 3.0841237734268114,
      "grad_norm": 0.1552821397781372,
      "learning_rate": 3.857000718225128e-06,
      "loss": 0.0102,
      "step": 1884560
    },
    {
      "epoch": 3.084156503865465,
      "grad_norm": 0.3805156648159027,
      "learning_rate": 3.8569348260116105e-06,
      "loss": 0.0163,
      "step": 1884580
    },
    {
      "epoch": 3.084189234304118,
      "grad_norm": 0.16472749412059784,
      "learning_rate": 3.856868933798093e-06,
      "loss": 0.0122,
      "step": 1884600
    },
    {
      "epoch": 3.0842219647427713,
      "grad_norm": 0.13824249804019928,
      "learning_rate": 3.856803041584577e-06,
      "loss": 0.0085,
      "step": 1884620
    },
    {
      "epoch": 3.084254695181425,
      "grad_norm": 0.4111892282962799,
      "learning_rate": 3.8567371493710595e-06,
      "loss": 0.0132,
      "step": 1884640
    },
    {
      "epoch": 3.084287425620078,
      "grad_norm": 0.18651175498962402,
      "learning_rate": 3.856671257157542e-06,
      "loss": 0.0105,
      "step": 1884660
    },
    {
      "epoch": 3.0843201560587317,
      "grad_norm": 0.15252099931240082,
      "learning_rate": 3.856605364944025e-06,
      "loss": 0.0134,
      "step": 1884680
    },
    {
      "epoch": 3.084352886497385,
      "grad_norm": 0.24695676565170288,
      "learning_rate": 3.856539472730508e-06,
      "loss": 0.0113,
      "step": 1884700
    },
    {
      "epoch": 3.084385616936038,
      "grad_norm": 0.19151820242404938,
      "learning_rate": 3.8564735805169905e-06,
      "loss": 0.0085,
      "step": 1884720
    },
    {
      "epoch": 3.0844183473746916,
      "grad_norm": 0.2157786637544632,
      "learning_rate": 3.856407688303473e-06,
      "loss": 0.0111,
      "step": 1884740
    },
    {
      "epoch": 3.0844510778133447,
      "grad_norm": 0.1517825871706009,
      "learning_rate": 3.856341796089956e-06,
      "loss": 0.0133,
      "step": 1884760
    },
    {
      "epoch": 3.0844838082519983,
      "grad_norm": 0.3525709807872772,
      "learning_rate": 3.856275903876439e-06,
      "loss": 0.0094,
      "step": 1884780
    },
    {
      "epoch": 3.0845165386906515,
      "grad_norm": 0.1857760101556778,
      "learning_rate": 3.856210011662922e-06,
      "loss": 0.0107,
      "step": 1884800
    },
    {
      "epoch": 3.084549269129305,
      "grad_norm": 0.12842966616153717,
      "learning_rate": 3.856144119449405e-06,
      "loss": 0.0126,
      "step": 1884820
    },
    {
      "epoch": 3.084581999567958,
      "grad_norm": 0.06241967901587486,
      "learning_rate": 3.856078227235888e-06,
      "loss": 0.0094,
      "step": 1884840
    },
    {
      "epoch": 3.0846147300066113,
      "grad_norm": 0.20808130502700806,
      "learning_rate": 3.8560123350223705e-06,
      "loss": 0.0136,
      "step": 1884860
    },
    {
      "epoch": 3.084647460445265,
      "grad_norm": 0.2738244831562042,
      "learning_rate": 3.855946442808853e-06,
      "loss": 0.0149,
      "step": 1884880
    },
    {
      "epoch": 3.084680190883918,
      "grad_norm": 0.5763400197029114,
      "learning_rate": 3.855880550595337e-06,
      "loss": 0.0114,
      "step": 1884900
    },
    {
      "epoch": 3.0847129213225717,
      "grad_norm": 0.3640008866786957,
      "learning_rate": 3.85581465838182e-06,
      "loss": 0.0129,
      "step": 1884920
    },
    {
      "epoch": 3.084745651761225,
      "grad_norm": 0.292260080575943,
      "learning_rate": 3.855748766168302e-06,
      "loss": 0.0118,
      "step": 1884940
    },
    {
      "epoch": 3.0847783821998784,
      "grad_norm": 0.18924281001091003,
      "learning_rate": 3.855682873954785e-06,
      "loss": 0.0111,
      "step": 1884960
    },
    {
      "epoch": 3.0848111126385316,
      "grad_norm": 0.6129529476165771,
      "learning_rate": 3.855616981741268e-06,
      "loss": 0.0115,
      "step": 1884980
    },
    {
      "epoch": 3.0848438430771847,
      "grad_norm": 0.24223807454109192,
      "learning_rate": 3.8555510895277506e-06,
      "loss": 0.0114,
      "step": 1885000
    },
    {
      "epoch": 3.0848765735158383,
      "grad_norm": 0.20112758874893188,
      "learning_rate": 3.855485197314234e-06,
      "loss": 0.0114,
      "step": 1885020
    },
    {
      "epoch": 3.0849093039544915,
      "grad_norm": 0.6916741132736206,
      "learning_rate": 3.855419305100717e-06,
      "loss": 0.0115,
      "step": 1885040
    },
    {
      "epoch": 3.084942034393145,
      "grad_norm": 0.12745897471904755,
      "learning_rate": 3.8553534128872e-06,
      "loss": 0.0109,
      "step": 1885060
    },
    {
      "epoch": 3.084974764831798,
      "grad_norm": 0.5602886080741882,
      "learning_rate": 3.855287520673682e-06,
      "loss": 0.0135,
      "step": 1885080
    },
    {
      "epoch": 3.085007495270452,
      "grad_norm": 0.4278579354286194,
      "learning_rate": 3.855221628460165e-06,
      "loss": 0.0129,
      "step": 1885100
    },
    {
      "epoch": 3.085040225709105,
      "grad_norm": 1.207900881767273,
      "learning_rate": 3.855155736246648e-06,
      "loss": 0.011,
      "step": 1885120
    },
    {
      "epoch": 3.085072956147758,
      "grad_norm": 0.0874815359711647,
      "learning_rate": 3.855089844033131e-06,
      "loss": 0.008,
      "step": 1885140
    },
    {
      "epoch": 3.0851056865864117,
      "grad_norm": 0.20440228283405304,
      "learning_rate": 3.855023951819613e-06,
      "loss": 0.0071,
      "step": 1885160
    },
    {
      "epoch": 3.085138417025065,
      "grad_norm": 1.1117030382156372,
      "learning_rate": 3.854958059606096e-06,
      "loss": 0.011,
      "step": 1885180
    },
    {
      "epoch": 3.0851711474637185,
      "grad_norm": 0.2241036295890808,
      "learning_rate": 3.85489216739258e-06,
      "loss": 0.0085,
      "step": 1885200
    },
    {
      "epoch": 3.0852038779023716,
      "grad_norm": 0.47253715991973877,
      "learning_rate": 3.854826275179062e-06,
      "loss": 0.0174,
      "step": 1885220
    },
    {
      "epoch": 3.085236608341025,
      "grad_norm": 0.12243158370256424,
      "learning_rate": 3.854760382965545e-06,
      "loss": 0.0141,
      "step": 1885240
    },
    {
      "epoch": 3.0852693387796783,
      "grad_norm": 0.149872824549675,
      "learning_rate": 3.854694490752029e-06,
      "loss": 0.0108,
      "step": 1885260
    },
    {
      "epoch": 3.0853020692183315,
      "grad_norm": 0.4239124655723572,
      "learning_rate": 3.8546285985385115e-06,
      "loss": 0.0108,
      "step": 1885280
    },
    {
      "epoch": 3.085334799656985,
      "grad_norm": 0.550365686416626,
      "learning_rate": 3.854562706324994e-06,
      "loss": 0.0068,
      "step": 1885300
    },
    {
      "epoch": 3.0853675300956382,
      "grad_norm": 0.14372751116752625,
      "learning_rate": 3.854496814111477e-06,
      "loss": 0.0124,
      "step": 1885320
    },
    {
      "epoch": 3.085400260534292,
      "grad_norm": 0.1754092574119568,
      "learning_rate": 3.85443092189796e-06,
      "loss": 0.013,
      "step": 1885340
    },
    {
      "epoch": 3.085432990972945,
      "grad_norm": 0.9474477767944336,
      "learning_rate": 3.854365029684442e-06,
      "loss": 0.0104,
      "step": 1885360
    },
    {
      "epoch": 3.085465721411598,
      "grad_norm": 0.3007963001728058,
      "learning_rate": 3.854299137470925e-06,
      "loss": 0.0087,
      "step": 1885380
    },
    {
      "epoch": 3.0854984518502517,
      "grad_norm": 0.11651396006345749,
      "learning_rate": 3.854233245257408e-06,
      "loss": 0.0161,
      "step": 1885400
    },
    {
      "epoch": 3.085531182288905,
      "grad_norm": 0.3199857473373413,
      "learning_rate": 3.8541673530438915e-06,
      "loss": 0.0082,
      "step": 1885420
    },
    {
      "epoch": 3.0855639127275585,
      "grad_norm": 0.3372609615325928,
      "learning_rate": 3.854101460830374e-06,
      "loss": 0.0152,
      "step": 1885440
    },
    {
      "epoch": 3.0855966431662116,
      "grad_norm": 0.20758609473705292,
      "learning_rate": 3.854035568616857e-06,
      "loss": 0.0148,
      "step": 1885460
    },
    {
      "epoch": 3.0856293736048652,
      "grad_norm": 0.1567685604095459,
      "learning_rate": 3.85396967640334e-06,
      "loss": 0.0171,
      "step": 1885480
    },
    {
      "epoch": 3.0856621040435184,
      "grad_norm": 0.375531941652298,
      "learning_rate": 3.8539037841898224e-06,
      "loss": 0.0092,
      "step": 1885500
    },
    {
      "epoch": 3.0856948344821715,
      "grad_norm": 0.2694301903247833,
      "learning_rate": 3.853837891976305e-06,
      "loss": 0.0134,
      "step": 1885520
    },
    {
      "epoch": 3.085727564920825,
      "grad_norm": 0.1745644509792328,
      "learning_rate": 3.853771999762788e-06,
      "loss": 0.017,
      "step": 1885540
    },
    {
      "epoch": 3.0857602953594783,
      "grad_norm": 0.1041707694530487,
      "learning_rate": 3.853706107549271e-06,
      "loss": 0.0109,
      "step": 1885560
    },
    {
      "epoch": 3.085793025798132,
      "grad_norm": 0.1816302239894867,
      "learning_rate": 3.853640215335754e-06,
      "loss": 0.011,
      "step": 1885580
    },
    {
      "epoch": 3.085825756236785,
      "grad_norm": 0.1338125318288803,
      "learning_rate": 3.853574323122237e-06,
      "loss": 0.0079,
      "step": 1885600
    },
    {
      "epoch": 3.0858584866754386,
      "grad_norm": 0.2013450413942337,
      "learning_rate": 3.85350843090872e-06,
      "loss": 0.012,
      "step": 1885620
    },
    {
      "epoch": 3.0858912171140918,
      "grad_norm": 0.2263394296169281,
      "learning_rate": 3.853442538695203e-06,
      "loss": 0.0117,
      "step": 1885640
    },
    {
      "epoch": 3.085923947552745,
      "grad_norm": 0.325766384601593,
      "learning_rate": 3.853376646481686e-06,
      "loss": 0.0108,
      "step": 1885660
    },
    {
      "epoch": 3.0859566779913985,
      "grad_norm": 0.24620309472084045,
      "learning_rate": 3.853310754268169e-06,
      "loss": 0.0092,
      "step": 1885680
    },
    {
      "epoch": 3.0859894084300517,
      "grad_norm": 0.23907212913036346,
      "learning_rate": 3.8532448620546515e-06,
      "loss": 0.0101,
      "step": 1885700
    },
    {
      "epoch": 3.0860221388687052,
      "grad_norm": 0.19816097617149353,
      "learning_rate": 3.853178969841134e-06,
      "loss": 0.0088,
      "step": 1885720
    },
    {
      "epoch": 3.0860548693073584,
      "grad_norm": 0.2795428931713104,
      "learning_rate": 3.853113077627617e-06,
      "loss": 0.01,
      "step": 1885740
    },
    {
      "epoch": 3.086087599746012,
      "grad_norm": 0.4666479527950287,
      "learning_rate": 3.8530471854141e-06,
      "loss": 0.0071,
      "step": 1885760
    },
    {
      "epoch": 3.086120330184665,
      "grad_norm": 0.10631061345338821,
      "learning_rate": 3.8529812932005825e-06,
      "loss": 0.0128,
      "step": 1885780
    },
    {
      "epoch": 3.0861530606233183,
      "grad_norm": 0.13959993422031403,
      "learning_rate": 3.852915400987065e-06,
      "loss": 0.0106,
      "step": 1885800
    },
    {
      "epoch": 3.086185791061972,
      "grad_norm": 0.17084404826164246,
      "learning_rate": 3.852849508773549e-06,
      "loss": 0.0076,
      "step": 1885820
    },
    {
      "epoch": 3.086218521500625,
      "grad_norm": 0.34636256098747253,
      "learning_rate": 3.8527836165600316e-06,
      "loss": 0.0115,
      "step": 1885840
    },
    {
      "epoch": 3.0862512519392786,
      "grad_norm": 0.6908614635467529,
      "learning_rate": 3.852717724346514e-06,
      "loss": 0.0074,
      "step": 1885860
    },
    {
      "epoch": 3.086283982377932,
      "grad_norm": 0.40288713574409485,
      "learning_rate": 3.852651832132997e-06,
      "loss": 0.0096,
      "step": 1885880
    },
    {
      "epoch": 3.0863167128165854,
      "grad_norm": 0.4032236933708191,
      "learning_rate": 3.85258593991948e-06,
      "loss": 0.0109,
      "step": 1885900
    },
    {
      "epoch": 3.0863494432552385,
      "grad_norm": 0.13982917368412018,
      "learning_rate": 3.8525200477059625e-06,
      "loss": 0.012,
      "step": 1885920
    },
    {
      "epoch": 3.0863821736938917,
      "grad_norm": 0.21220281720161438,
      "learning_rate": 3.852454155492445e-06,
      "loss": 0.0187,
      "step": 1885940
    },
    {
      "epoch": 3.0864149041325453,
      "grad_norm": 0.13980697095394135,
      "learning_rate": 3.852388263278929e-06,
      "loss": 0.0119,
      "step": 1885960
    },
    {
      "epoch": 3.0864476345711984,
      "grad_norm": 0.5837393999099731,
      "learning_rate": 3.852322371065412e-06,
      "loss": 0.0102,
      "step": 1885980
    },
    {
      "epoch": 3.086480365009852,
      "grad_norm": 0.14723695814609528,
      "learning_rate": 3.852256478851894e-06,
      "loss": 0.0096,
      "step": 1886000
    },
    {
      "epoch": 3.086513095448505,
      "grad_norm": 0.14245283603668213,
      "learning_rate": 3.852190586638377e-06,
      "loss": 0.0134,
      "step": 1886020
    },
    {
      "epoch": 3.0865458258871588,
      "grad_norm": 0.21231122314929962,
      "learning_rate": 3.852124694424861e-06,
      "loss": 0.0141,
      "step": 1886040
    },
    {
      "epoch": 3.086578556325812,
      "grad_norm": 0.7324755191802979,
      "learning_rate": 3.852058802211343e-06,
      "loss": 0.0114,
      "step": 1886060
    },
    {
      "epoch": 3.086611286764465,
      "grad_norm": 0.06025267764925957,
      "learning_rate": 3.851992909997826e-06,
      "loss": 0.0091,
      "step": 1886080
    },
    {
      "epoch": 3.0866440172031187,
      "grad_norm": 0.2678583264350891,
      "learning_rate": 3.851927017784309e-06,
      "loss": 0.0197,
      "step": 1886100
    },
    {
      "epoch": 3.086676747641772,
      "grad_norm": 0.2858680188655853,
      "learning_rate": 3.851861125570792e-06,
      "loss": 0.01,
      "step": 1886120
    },
    {
      "epoch": 3.0867094780804254,
      "grad_norm": 0.3745644688606262,
      "learning_rate": 3.851795233357274e-06,
      "loss": 0.0146,
      "step": 1886140
    },
    {
      "epoch": 3.0867422085190785,
      "grad_norm": 0.3985692858695984,
      "learning_rate": 3.851729341143757e-06,
      "loss": 0.0132,
      "step": 1886160
    },
    {
      "epoch": 3.086774938957732,
      "grad_norm": 0.2169138491153717,
      "learning_rate": 3.85166344893024e-06,
      "loss": 0.0065,
      "step": 1886180
    },
    {
      "epoch": 3.0868076693963853,
      "grad_norm": 0.4456329643726349,
      "learning_rate": 3.8515975567167226e-06,
      "loss": 0.0115,
      "step": 1886200
    },
    {
      "epoch": 3.0868403998350384,
      "grad_norm": 0.3590357005596161,
      "learning_rate": 3.851531664503206e-06,
      "loss": 0.0104,
      "step": 1886220
    },
    {
      "epoch": 3.086873130273692,
      "grad_norm": 0.0451730340719223,
      "learning_rate": 3.851465772289689e-06,
      "loss": 0.0073,
      "step": 1886240
    },
    {
      "epoch": 3.086905860712345,
      "grad_norm": 0.9500489830970764,
      "learning_rate": 3.851399880076172e-06,
      "loss": 0.0091,
      "step": 1886260
    },
    {
      "epoch": 3.086938591150999,
      "grad_norm": 0.14184613525867462,
      "learning_rate": 3.851333987862654e-06,
      "loss": 0.009,
      "step": 1886280
    },
    {
      "epoch": 3.086971321589652,
      "grad_norm": 0.46382254362106323,
      "learning_rate": 3.851268095649138e-06,
      "loss": 0.0102,
      "step": 1886300
    },
    {
      "epoch": 3.087004052028305,
      "grad_norm": 0.2891196012496948,
      "learning_rate": 3.851202203435621e-06,
      "loss": 0.0138,
      "step": 1886320
    },
    {
      "epoch": 3.0870367824669587,
      "grad_norm": 0.5186395645141602,
      "learning_rate": 3.8511363112221034e-06,
      "loss": 0.0165,
      "step": 1886340
    },
    {
      "epoch": 3.087069512905612,
      "grad_norm": 0.33471328020095825,
      "learning_rate": 3.851070419008586e-06,
      "loss": 0.0092,
      "step": 1886360
    },
    {
      "epoch": 3.0871022433442654,
      "grad_norm": 0.1976422667503357,
      "learning_rate": 3.851004526795069e-06,
      "loss": 0.0105,
      "step": 1886380
    },
    {
      "epoch": 3.0871349737829186,
      "grad_norm": 0.1641678661108017,
      "learning_rate": 3.850938634581552e-06,
      "loss": 0.0088,
      "step": 1886400
    },
    {
      "epoch": 3.087167704221572,
      "grad_norm": 0.18188372254371643,
      "learning_rate": 3.850872742368034e-06,
      "loss": 0.0115,
      "step": 1886420
    },
    {
      "epoch": 3.0872004346602253,
      "grad_norm": 0.38292229175567627,
      "learning_rate": 3.850806850154518e-06,
      "loss": 0.0172,
      "step": 1886440
    },
    {
      "epoch": 3.0872331650988785,
      "grad_norm": 0.44290992617607117,
      "learning_rate": 3.850740957941001e-06,
      "loss": 0.0188,
      "step": 1886460
    },
    {
      "epoch": 3.087265895537532,
      "grad_norm": 0.11171086877584457,
      "learning_rate": 3.8506750657274835e-06,
      "loss": 0.0179,
      "step": 1886480
    },
    {
      "epoch": 3.087298625976185,
      "grad_norm": 0.14059704542160034,
      "learning_rate": 3.850609173513966e-06,
      "loss": 0.0106,
      "step": 1886500
    },
    {
      "epoch": 3.087331356414839,
      "grad_norm": 0.25133219361305237,
      "learning_rate": 3.850543281300449e-06,
      "loss": 0.0101,
      "step": 1886520
    },
    {
      "epoch": 3.087364086853492,
      "grad_norm": 0.5032861232757568,
      "learning_rate": 3.850477389086932e-06,
      "loss": 0.0122,
      "step": 1886540
    },
    {
      "epoch": 3.0873968172921455,
      "grad_norm": 0.29679974913597107,
      "learning_rate": 3.8504114968734144e-06,
      "loss": 0.0158,
      "step": 1886560
    },
    {
      "epoch": 3.0874295477307987,
      "grad_norm": 0.7460426688194275,
      "learning_rate": 3.850345604659897e-06,
      "loss": 0.0134,
      "step": 1886580
    },
    {
      "epoch": 3.087462278169452,
      "grad_norm": 0.332689493894577,
      "learning_rate": 3.85027971244638e-06,
      "loss": 0.0163,
      "step": 1886600
    },
    {
      "epoch": 3.0874950086081054,
      "grad_norm": 0.32900702953338623,
      "learning_rate": 3.8502138202328635e-06,
      "loss": 0.0114,
      "step": 1886620
    },
    {
      "epoch": 3.0875277390467586,
      "grad_norm": 0.7530601024627686,
      "learning_rate": 3.850147928019346e-06,
      "loss": 0.0086,
      "step": 1886640
    },
    {
      "epoch": 3.087560469485412,
      "grad_norm": 0.09038781374692917,
      "learning_rate": 3.85008203580583e-06,
      "loss": 0.0104,
      "step": 1886660
    },
    {
      "epoch": 3.0875931999240653,
      "grad_norm": 0.3168651759624481,
      "learning_rate": 3.8500161435923126e-06,
      "loss": 0.0126,
      "step": 1886680
    },
    {
      "epoch": 3.087625930362719,
      "grad_norm": 0.3764292895793915,
      "learning_rate": 3.849950251378795e-06,
      "loss": 0.0134,
      "step": 1886700
    },
    {
      "epoch": 3.087658660801372,
      "grad_norm": 0.45192813873291016,
      "learning_rate": 3.849884359165278e-06,
      "loss": 0.0081,
      "step": 1886720
    },
    {
      "epoch": 3.0876913912400252,
      "grad_norm": 0.26428329944610596,
      "learning_rate": 3.849818466951761e-06,
      "loss": 0.0125,
      "step": 1886740
    },
    {
      "epoch": 3.087724121678679,
      "grad_norm": 0.271761417388916,
      "learning_rate": 3.8497525747382435e-06,
      "loss": 0.0107,
      "step": 1886760
    },
    {
      "epoch": 3.087756852117332,
      "grad_norm": 0.3968043923377991,
      "learning_rate": 3.849686682524726e-06,
      "loss": 0.0123,
      "step": 1886780
    },
    {
      "epoch": 3.0877895825559856,
      "grad_norm": 0.05040628835558891,
      "learning_rate": 3.849620790311209e-06,
      "loss": 0.0155,
      "step": 1886800
    },
    {
      "epoch": 3.0878223129946387,
      "grad_norm": 0.20508091151714325,
      "learning_rate": 3.849554898097692e-06,
      "loss": 0.0121,
      "step": 1886820
    },
    {
      "epoch": 3.087855043433292,
      "grad_norm": 0.42321452498435974,
      "learning_rate": 3.849489005884175e-06,
      "loss": 0.0106,
      "step": 1886840
    },
    {
      "epoch": 3.0878877738719455,
      "grad_norm": 0.09619022160768509,
      "learning_rate": 3.849423113670658e-06,
      "loss": 0.0115,
      "step": 1886860
    },
    {
      "epoch": 3.0879205043105986,
      "grad_norm": 0.15897229313850403,
      "learning_rate": 3.849357221457141e-06,
      "loss": 0.0134,
      "step": 1886880
    },
    {
      "epoch": 3.087953234749252,
      "grad_norm": 0.6082220077514648,
      "learning_rate": 3.8492913292436235e-06,
      "loss": 0.0106,
      "step": 1886900
    },
    {
      "epoch": 3.0879859651879054,
      "grad_norm": 0.18125629425048828,
      "learning_rate": 3.849225437030106e-06,
      "loss": 0.0139,
      "step": 1886920
    },
    {
      "epoch": 3.088018695626559,
      "grad_norm": 0.3070419728755951,
      "learning_rate": 3.849159544816589e-06,
      "loss": 0.0118,
      "step": 1886940
    },
    {
      "epoch": 3.088051426065212,
      "grad_norm": 0.07668288052082062,
      "learning_rate": 3.849093652603072e-06,
      "loss": 0.0121,
      "step": 1886960
    },
    {
      "epoch": 3.0880841565038653,
      "grad_norm": 0.2636198103427887,
      "learning_rate": 3.8490277603895545e-06,
      "loss": 0.0105,
      "step": 1886980
    },
    {
      "epoch": 3.088116886942519,
      "grad_norm": 0.6243125796318054,
      "learning_rate": 3.848961868176038e-06,
      "loss": 0.0139,
      "step": 1887000
    },
    {
      "epoch": 3.088149617381172,
      "grad_norm": 0.24593240022659302,
      "learning_rate": 3.848895975962521e-06,
      "loss": 0.0106,
      "step": 1887020
    },
    {
      "epoch": 3.0881823478198256,
      "grad_norm": 0.7321066856384277,
      "learning_rate": 3.8488300837490036e-06,
      "loss": 0.0154,
      "step": 1887040
    },
    {
      "epoch": 3.0882150782584787,
      "grad_norm": 0.6388044953346252,
      "learning_rate": 3.848764191535487e-06,
      "loss": 0.0109,
      "step": 1887060
    },
    {
      "epoch": 3.0882478086971323,
      "grad_norm": 0.13751304149627686,
      "learning_rate": 3.84869829932197e-06,
      "loss": 0.013,
      "step": 1887080
    },
    {
      "epoch": 3.0882805391357855,
      "grad_norm": 0.2193613499403,
      "learning_rate": 3.848632407108453e-06,
      "loss": 0.0071,
      "step": 1887100
    },
    {
      "epoch": 3.0883132695744386,
      "grad_norm": 0.31691280007362366,
      "learning_rate": 3.848566514894935e-06,
      "loss": 0.0137,
      "step": 1887120
    },
    {
      "epoch": 3.0883460000130922,
      "grad_norm": 0.887874960899353,
      "learning_rate": 3.848500622681418e-06,
      "loss": 0.0181,
      "step": 1887140
    },
    {
      "epoch": 3.0883787304517454,
      "grad_norm": 0.3311917185783386,
      "learning_rate": 3.848434730467901e-06,
      "loss": 0.0113,
      "step": 1887160
    },
    {
      "epoch": 3.088411460890399,
      "grad_norm": 0.1573784500360489,
      "learning_rate": 3.848368838254384e-06,
      "loss": 0.0109,
      "step": 1887180
    },
    {
      "epoch": 3.088444191329052,
      "grad_norm": 0.4960172474384308,
      "learning_rate": 3.848302946040866e-06,
      "loss": 0.012,
      "step": 1887200
    },
    {
      "epoch": 3.0884769217677057,
      "grad_norm": 0.5513953566551208,
      "learning_rate": 3.848237053827349e-06,
      "loss": 0.0147,
      "step": 1887220
    },
    {
      "epoch": 3.088509652206359,
      "grad_norm": 0.21023808419704437,
      "learning_rate": 3.848171161613833e-06,
      "loss": 0.0134,
      "step": 1887240
    },
    {
      "epoch": 3.088542382645012,
      "grad_norm": 0.6707645654678345,
      "learning_rate": 3.848105269400315e-06,
      "loss": 0.013,
      "step": 1887260
    },
    {
      "epoch": 3.0885751130836656,
      "grad_norm": 0.3899635672569275,
      "learning_rate": 3.848039377186798e-06,
      "loss": 0.0117,
      "step": 1887280
    },
    {
      "epoch": 3.0886078435223188,
      "grad_norm": 0.18915164470672607,
      "learning_rate": 3.847973484973281e-06,
      "loss": 0.0105,
      "step": 1887300
    },
    {
      "epoch": 3.0886405739609724,
      "grad_norm": 0.13797923922538757,
      "learning_rate": 3.847907592759764e-06,
      "loss": 0.0106,
      "step": 1887320
    },
    {
      "epoch": 3.0886733043996255,
      "grad_norm": 0.8344686031341553,
      "learning_rate": 3.847841700546246e-06,
      "loss": 0.0146,
      "step": 1887340
    },
    {
      "epoch": 3.088706034838279,
      "grad_norm": 0.11222574859857559,
      "learning_rate": 3.84777580833273e-06,
      "loss": 0.0097,
      "step": 1887360
    },
    {
      "epoch": 3.0887387652769323,
      "grad_norm": 0.6451337933540344,
      "learning_rate": 3.847709916119213e-06,
      "loss": 0.0096,
      "step": 1887380
    },
    {
      "epoch": 3.0887714957155854,
      "grad_norm": 0.4414202868938446,
      "learning_rate": 3.847644023905695e-06,
      "loss": 0.0106,
      "step": 1887400
    },
    {
      "epoch": 3.088804226154239,
      "grad_norm": 0.146893709897995,
      "learning_rate": 3.847578131692178e-06,
      "loss": 0.0132,
      "step": 1887420
    },
    {
      "epoch": 3.088836956592892,
      "grad_norm": 0.07944092899560928,
      "learning_rate": 3.847512239478661e-06,
      "loss": 0.0101,
      "step": 1887440
    },
    {
      "epoch": 3.0888696870315457,
      "grad_norm": 0.14078989624977112,
      "learning_rate": 3.8474463472651445e-06,
      "loss": 0.0105,
      "step": 1887460
    },
    {
      "epoch": 3.088902417470199,
      "grad_norm": 0.5437176823616028,
      "learning_rate": 3.847380455051627e-06,
      "loss": 0.0122,
      "step": 1887480
    },
    {
      "epoch": 3.0889351479088525,
      "grad_norm": 0.23885652422904968,
      "learning_rate": 3.84731456283811e-06,
      "loss": 0.0145,
      "step": 1887500
    },
    {
      "epoch": 3.0889678783475056,
      "grad_norm": 0.17874407768249512,
      "learning_rate": 3.847248670624593e-06,
      "loss": 0.0066,
      "step": 1887520
    },
    {
      "epoch": 3.089000608786159,
      "grad_norm": 0.23603366315364838,
      "learning_rate": 3.8471827784110754e-06,
      "loss": 0.0078,
      "step": 1887540
    },
    {
      "epoch": 3.0890333392248124,
      "grad_norm": 0.1316019743680954,
      "learning_rate": 3.847116886197558e-06,
      "loss": 0.0083,
      "step": 1887560
    },
    {
      "epoch": 3.0890660696634655,
      "grad_norm": 0.6422346234321594,
      "learning_rate": 3.847050993984041e-06,
      "loss": 0.0131,
      "step": 1887580
    },
    {
      "epoch": 3.089098800102119,
      "grad_norm": 0.4622011184692383,
      "learning_rate": 3.846985101770524e-06,
      "loss": 0.0091,
      "step": 1887600
    },
    {
      "epoch": 3.0891315305407723,
      "grad_norm": 0.16819818317890167,
      "learning_rate": 3.846919209557006e-06,
      "loss": 0.0105,
      "step": 1887620
    },
    {
      "epoch": 3.089164260979426,
      "grad_norm": 0.5266693830490112,
      "learning_rate": 3.84685331734349e-06,
      "loss": 0.013,
      "step": 1887640
    },
    {
      "epoch": 3.089196991418079,
      "grad_norm": 0.3281445801258087,
      "learning_rate": 3.846787425129973e-06,
      "loss": 0.0134,
      "step": 1887660
    },
    {
      "epoch": 3.089229721856732,
      "grad_norm": 0.1500244438648224,
      "learning_rate": 3.8467215329164555e-06,
      "loss": 0.0104,
      "step": 1887680
    },
    {
      "epoch": 3.0892624522953858,
      "grad_norm": 0.20237255096435547,
      "learning_rate": 3.846655640702938e-06,
      "loss": 0.0089,
      "step": 1887700
    },
    {
      "epoch": 3.089295182734039,
      "grad_norm": 0.7517326474189758,
      "learning_rate": 3.846589748489422e-06,
      "loss": 0.0132,
      "step": 1887720
    },
    {
      "epoch": 3.0893279131726925,
      "grad_norm": 0.25675103068351746,
      "learning_rate": 3.8465238562759045e-06,
      "loss": 0.0091,
      "step": 1887740
    },
    {
      "epoch": 3.0893606436113457,
      "grad_norm": 0.4441640079021454,
      "learning_rate": 3.846457964062387e-06,
      "loss": 0.0141,
      "step": 1887760
    },
    {
      "epoch": 3.089393374049999,
      "grad_norm": 0.49112430214881897,
      "learning_rate": 3.84639207184887e-06,
      "loss": 0.0149,
      "step": 1887780
    },
    {
      "epoch": 3.0894261044886524,
      "grad_norm": 0.10071475058794022,
      "learning_rate": 3.846326179635353e-06,
      "loss": 0.0129,
      "step": 1887800
    },
    {
      "epoch": 3.0894588349273056,
      "grad_norm": 0.3563857674598694,
      "learning_rate": 3.8462602874218355e-06,
      "loss": 0.0081,
      "step": 1887820
    },
    {
      "epoch": 3.089491565365959,
      "grad_norm": 0.5050610303878784,
      "learning_rate": 3.846194395208318e-06,
      "loss": 0.0107,
      "step": 1887840
    },
    {
      "epoch": 3.0895242958046123,
      "grad_norm": 0.3519890308380127,
      "learning_rate": 3.846128502994802e-06,
      "loss": 0.0114,
      "step": 1887860
    },
    {
      "epoch": 3.089557026243266,
      "grad_norm": 0.1992550492286682,
      "learning_rate": 3.8460626107812846e-06,
      "loss": 0.0171,
      "step": 1887880
    },
    {
      "epoch": 3.089589756681919,
      "grad_norm": 0.3759327530860901,
      "learning_rate": 3.845996718567767e-06,
      "loss": 0.0115,
      "step": 1887900
    },
    {
      "epoch": 3.089622487120572,
      "grad_norm": 0.25918418169021606,
      "learning_rate": 3.84593082635425e-06,
      "loss": 0.0108,
      "step": 1887920
    },
    {
      "epoch": 3.089655217559226,
      "grad_norm": 0.4945181608200073,
      "learning_rate": 3.845864934140733e-06,
      "loss": 0.0168,
      "step": 1887940
    },
    {
      "epoch": 3.089687947997879,
      "grad_norm": 0.2686445415019989,
      "learning_rate": 3.8457990419272155e-06,
      "loss": 0.0086,
      "step": 1887960
    },
    {
      "epoch": 3.0897206784365325,
      "grad_norm": 0.13984805345535278,
      "learning_rate": 3.845733149713698e-06,
      "loss": 0.0124,
      "step": 1887980
    },
    {
      "epoch": 3.0897534088751857,
      "grad_norm": 0.201584130525589,
      "learning_rate": 3.845667257500181e-06,
      "loss": 0.0097,
      "step": 1888000
    },
    {
      "epoch": 3.0897861393138393,
      "grad_norm": 0.2549610435962677,
      "learning_rate": 3.845601365286665e-06,
      "loss": 0.0136,
      "step": 1888020
    },
    {
      "epoch": 3.0898188697524924,
      "grad_norm": 0.23250705003738403,
      "learning_rate": 3.845535473073147e-06,
      "loss": 0.0121,
      "step": 1888040
    },
    {
      "epoch": 3.0898516001911456,
      "grad_norm": 0.2687532901763916,
      "learning_rate": 3.84546958085963e-06,
      "loss": 0.01,
      "step": 1888060
    },
    {
      "epoch": 3.089884330629799,
      "grad_norm": 0.822260856628418,
      "learning_rate": 3.845403688646114e-06,
      "loss": 0.0105,
      "step": 1888080
    },
    {
      "epoch": 3.0899170610684523,
      "grad_norm": 0.20888684689998627,
      "learning_rate": 3.845337796432596e-06,
      "loss": 0.0151,
      "step": 1888100
    },
    {
      "epoch": 3.089949791507106,
      "grad_norm": 0.40725383162498474,
      "learning_rate": 3.845271904219079e-06,
      "loss": 0.0101,
      "step": 1888120
    },
    {
      "epoch": 3.089982521945759,
      "grad_norm": 0.12796229124069214,
      "learning_rate": 3.845206012005562e-06,
      "loss": 0.0088,
      "step": 1888140
    },
    {
      "epoch": 3.0900152523844127,
      "grad_norm": 0.6019597053527832,
      "learning_rate": 3.845140119792045e-06,
      "loss": 0.014,
      "step": 1888160
    },
    {
      "epoch": 3.090047982823066,
      "grad_norm": 0.2563631236553192,
      "learning_rate": 3.845074227578527e-06,
      "loss": 0.011,
      "step": 1888180
    },
    {
      "epoch": 3.090080713261719,
      "grad_norm": 0.09898639470338821,
      "learning_rate": 3.84500833536501e-06,
      "loss": 0.0094,
      "step": 1888200
    },
    {
      "epoch": 3.0901134437003726,
      "grad_norm": 0.09407219290733337,
      "learning_rate": 3.844942443151493e-06,
      "loss": 0.0145,
      "step": 1888220
    },
    {
      "epoch": 3.0901461741390257,
      "grad_norm": 0.5701452493667603,
      "learning_rate": 3.8448765509379756e-06,
      "loss": 0.0176,
      "step": 1888240
    },
    {
      "epoch": 3.0901789045776793,
      "grad_norm": 0.3566288650035858,
      "learning_rate": 3.844810658724459e-06,
      "loss": 0.0162,
      "step": 1888260
    },
    {
      "epoch": 3.0902116350163324,
      "grad_norm": 0.6391816735267639,
      "learning_rate": 3.844744766510942e-06,
      "loss": 0.0102,
      "step": 1888280
    },
    {
      "epoch": 3.090244365454986,
      "grad_norm": 0.19567914307117462,
      "learning_rate": 3.844678874297425e-06,
      "loss": 0.0112,
      "step": 1888300
    },
    {
      "epoch": 3.090277095893639,
      "grad_norm": 0.3772391676902771,
      "learning_rate": 3.844612982083907e-06,
      "loss": 0.0082,
      "step": 1888320
    },
    {
      "epoch": 3.0903098263322923,
      "grad_norm": 0.661173939704895,
      "learning_rate": 3.84454708987039e-06,
      "loss": 0.013,
      "step": 1888340
    },
    {
      "epoch": 3.090342556770946,
      "grad_norm": 0.18540243804454803,
      "learning_rate": 3.844481197656873e-06,
      "loss": 0.0107,
      "step": 1888360
    },
    {
      "epoch": 3.090375287209599,
      "grad_norm": 0.16830188035964966,
      "learning_rate": 3.844415305443356e-06,
      "loss": 0.0094,
      "step": 1888380
    },
    {
      "epoch": 3.0904080176482527,
      "grad_norm": 0.06047957018017769,
      "learning_rate": 3.844349413229838e-06,
      "loss": 0.0118,
      "step": 1888400
    },
    {
      "epoch": 3.090440748086906,
      "grad_norm": 0.1273721307516098,
      "learning_rate": 3.844283521016322e-06,
      "loss": 0.0135,
      "step": 1888420
    },
    {
      "epoch": 3.090473478525559,
      "grad_norm": 0.3773207664489746,
      "learning_rate": 3.844217628802805e-06,
      "loss": 0.0074,
      "step": 1888440
    },
    {
      "epoch": 3.0905062089642126,
      "grad_norm": 0.1334313005208969,
      "learning_rate": 3.844151736589287e-06,
      "loss": 0.0143,
      "step": 1888460
    },
    {
      "epoch": 3.0905389394028657,
      "grad_norm": 0.13375888764858246,
      "learning_rate": 3.844085844375771e-06,
      "loss": 0.0064,
      "step": 1888480
    },
    {
      "epoch": 3.0905716698415193,
      "grad_norm": 0.18685786426067352,
      "learning_rate": 3.844019952162254e-06,
      "loss": 0.0127,
      "step": 1888500
    },
    {
      "epoch": 3.0906044002801725,
      "grad_norm": 1.628293514251709,
      "learning_rate": 3.8439540599487365e-06,
      "loss": 0.0139,
      "step": 1888520
    },
    {
      "epoch": 3.090637130718826,
      "grad_norm": 0.18693575263023376,
      "learning_rate": 3.843888167735219e-06,
      "loss": 0.0087,
      "step": 1888540
    },
    {
      "epoch": 3.090669861157479,
      "grad_norm": 0.7274987697601318,
      "learning_rate": 3.843822275521702e-06,
      "loss": 0.0138,
      "step": 1888560
    },
    {
      "epoch": 3.0907025915961324,
      "grad_norm": 0.28973278403282166,
      "learning_rate": 3.843756383308185e-06,
      "loss": 0.0101,
      "step": 1888580
    },
    {
      "epoch": 3.090735322034786,
      "grad_norm": 0.39524713158607483,
      "learning_rate": 3.8436904910946674e-06,
      "loss": 0.0077,
      "step": 1888600
    },
    {
      "epoch": 3.090768052473439,
      "grad_norm": 0.3763819634914398,
      "learning_rate": 3.84362459888115e-06,
      "loss": 0.0112,
      "step": 1888620
    },
    {
      "epoch": 3.0908007829120927,
      "grad_norm": 0.3395390808582306,
      "learning_rate": 3.843558706667633e-06,
      "loss": 0.0199,
      "step": 1888640
    },
    {
      "epoch": 3.090833513350746,
      "grad_norm": 0.6353240609169006,
      "learning_rate": 3.8434928144541165e-06,
      "loss": 0.0096,
      "step": 1888660
    },
    {
      "epoch": 3.0908662437893994,
      "grad_norm": 0.6828413605690002,
      "learning_rate": 3.843426922240599e-06,
      "loss": 0.0104,
      "step": 1888680
    },
    {
      "epoch": 3.0908989742280526,
      "grad_norm": 0.11276090145111084,
      "learning_rate": 3.843361030027082e-06,
      "loss": 0.013,
      "step": 1888700
    },
    {
      "epoch": 3.0909317046667057,
      "grad_norm": 0.40944766998291016,
      "learning_rate": 3.843295137813565e-06,
      "loss": 0.0097,
      "step": 1888720
    },
    {
      "epoch": 3.0909644351053593,
      "grad_norm": 0.07941475510597229,
      "learning_rate": 3.8432292456000475e-06,
      "loss": 0.0108,
      "step": 1888740
    },
    {
      "epoch": 3.0909971655440125,
      "grad_norm": 0.3359982967376709,
      "learning_rate": 3.843163353386531e-06,
      "loss": 0.0114,
      "step": 1888760
    },
    {
      "epoch": 3.091029895982666,
      "grad_norm": 0.36175885796546936,
      "learning_rate": 3.843097461173014e-06,
      "loss": 0.0113,
      "step": 1888780
    },
    {
      "epoch": 3.0910626264213192,
      "grad_norm": 0.08222966641187668,
      "learning_rate": 3.8430315689594965e-06,
      "loss": 0.0081,
      "step": 1888800
    },
    {
      "epoch": 3.091095356859973,
      "grad_norm": 0.3753948211669922,
      "learning_rate": 3.842965676745979e-06,
      "loss": 0.0125,
      "step": 1888820
    },
    {
      "epoch": 3.091128087298626,
      "grad_norm": 0.31935274600982666,
      "learning_rate": 3.842899784532462e-06,
      "loss": 0.0101,
      "step": 1888840
    },
    {
      "epoch": 3.091160817737279,
      "grad_norm": 0.0461859405040741,
      "learning_rate": 3.842833892318945e-06,
      "loss": 0.0131,
      "step": 1888860
    },
    {
      "epoch": 3.0911935481759327,
      "grad_norm": 0.17649869620800018,
      "learning_rate": 3.842768000105428e-06,
      "loss": 0.0101,
      "step": 1888880
    },
    {
      "epoch": 3.091226278614586,
      "grad_norm": 0.4831326901912689,
      "learning_rate": 3.842702107891911e-06,
      "loss": 0.0093,
      "step": 1888900
    },
    {
      "epoch": 3.0912590090532395,
      "grad_norm": 6.476202011108398,
      "learning_rate": 3.842636215678394e-06,
      "loss": 0.0083,
      "step": 1888920
    },
    {
      "epoch": 3.0912917394918926,
      "grad_norm": 0.11644219607114792,
      "learning_rate": 3.8425703234648765e-06,
      "loss": 0.011,
      "step": 1888940
    },
    {
      "epoch": 3.091324469930546,
      "grad_norm": 0.1605469137430191,
      "learning_rate": 3.842504431251359e-06,
      "loss": 0.0114,
      "step": 1888960
    },
    {
      "epoch": 3.0913572003691994,
      "grad_norm": 0.2874082922935486,
      "learning_rate": 3.842438539037842e-06,
      "loss": 0.0093,
      "step": 1888980
    },
    {
      "epoch": 3.0913899308078525,
      "grad_norm": 0.09364723414182663,
      "learning_rate": 3.842372646824325e-06,
      "loss": 0.0144,
      "step": 1889000
    },
    {
      "epoch": 3.091422661246506,
      "grad_norm": 0.09435275197029114,
      "learning_rate": 3.8423067546108075e-06,
      "loss": 0.0076,
      "step": 1889020
    },
    {
      "epoch": 3.0914553916851593,
      "grad_norm": 0.13168632984161377,
      "learning_rate": 3.84224086239729e-06,
      "loss": 0.0103,
      "step": 1889040
    },
    {
      "epoch": 3.091488122123813,
      "grad_norm": 0.35339972376823425,
      "learning_rate": 3.842174970183774e-06,
      "loss": 0.0088,
      "step": 1889060
    },
    {
      "epoch": 3.091520852562466,
      "grad_norm": 7.461597919464111,
      "learning_rate": 3.8421090779702566e-06,
      "loss": 0.0101,
      "step": 1889080
    },
    {
      "epoch": 3.0915535830011196,
      "grad_norm": 0.40517085790634155,
      "learning_rate": 3.842043185756739e-06,
      "loss": 0.0115,
      "step": 1889100
    },
    {
      "epoch": 3.0915863134397727,
      "grad_norm": 0.4036751091480255,
      "learning_rate": 3.841977293543223e-06,
      "loss": 0.0113,
      "step": 1889120
    },
    {
      "epoch": 3.091619043878426,
      "grad_norm": 0.48542138934135437,
      "learning_rate": 3.841911401329706e-06,
      "loss": 0.0145,
      "step": 1889140
    },
    {
      "epoch": 3.0916517743170795,
      "grad_norm": 0.5912864804267883,
      "learning_rate": 3.841845509116188e-06,
      "loss": 0.0137,
      "step": 1889160
    },
    {
      "epoch": 3.0916845047557326,
      "grad_norm": 0.6412245035171509,
      "learning_rate": 3.841779616902671e-06,
      "loss": 0.0125,
      "step": 1889180
    },
    {
      "epoch": 3.0917172351943862,
      "grad_norm": 0.37680307030677795,
      "learning_rate": 3.841713724689154e-06,
      "loss": 0.0134,
      "step": 1889200
    },
    {
      "epoch": 3.0917499656330394,
      "grad_norm": 0.20646165311336517,
      "learning_rate": 3.841647832475637e-06,
      "loss": 0.0138,
      "step": 1889220
    },
    {
      "epoch": 3.0917826960716925,
      "grad_norm": 0.9116540551185608,
      "learning_rate": 3.841581940262119e-06,
      "loss": 0.0117,
      "step": 1889240
    },
    {
      "epoch": 3.091815426510346,
      "grad_norm": 0.23256425559520721,
      "learning_rate": 3.841516048048602e-06,
      "loss": 0.0083,
      "step": 1889260
    },
    {
      "epoch": 3.0918481569489993,
      "grad_norm": 0.270736426115036,
      "learning_rate": 3.841450155835086e-06,
      "loss": 0.0084,
      "step": 1889280
    },
    {
      "epoch": 3.091880887387653,
      "grad_norm": 2.6990785598754883,
      "learning_rate": 3.841384263621568e-06,
      "loss": 0.0128,
      "step": 1889300
    },
    {
      "epoch": 3.091913617826306,
      "grad_norm": 0.3025757968425751,
      "learning_rate": 3.841318371408051e-06,
      "loss": 0.0108,
      "step": 1889320
    },
    {
      "epoch": 3.0919463482649596,
      "grad_norm": 0.5608528852462769,
      "learning_rate": 3.841252479194534e-06,
      "loss": 0.0097,
      "step": 1889340
    },
    {
      "epoch": 3.0919790787036128,
      "grad_norm": 0.23016850650310516,
      "learning_rate": 3.841186586981017e-06,
      "loss": 0.0107,
      "step": 1889360
    },
    {
      "epoch": 3.092011809142266,
      "grad_norm": 0.472744345664978,
      "learning_rate": 3.841120694767499e-06,
      "loss": 0.0107,
      "step": 1889380
    },
    {
      "epoch": 3.0920445395809195,
      "grad_norm": 0.12000300735235214,
      "learning_rate": 3.841054802553982e-06,
      "loss": 0.0106,
      "step": 1889400
    },
    {
      "epoch": 3.0920772700195727,
      "grad_norm": 1.6933168172836304,
      "learning_rate": 3.840988910340465e-06,
      "loss": 0.0099,
      "step": 1889420
    },
    {
      "epoch": 3.0921100004582263,
      "grad_norm": 0.3614081144332886,
      "learning_rate": 3.8409230181269484e-06,
      "loss": 0.0139,
      "step": 1889440
    },
    {
      "epoch": 3.0921427308968794,
      "grad_norm": 0.151077538728714,
      "learning_rate": 3.840857125913431e-06,
      "loss": 0.0066,
      "step": 1889460
    },
    {
      "epoch": 3.092175461335533,
      "grad_norm": 0.4544669985771179,
      "learning_rate": 3.840791233699914e-06,
      "loss": 0.0087,
      "step": 1889480
    },
    {
      "epoch": 3.092208191774186,
      "grad_norm": 0.03907374292612076,
      "learning_rate": 3.8407253414863975e-06,
      "loss": 0.0123,
      "step": 1889500
    },
    {
      "epoch": 3.0922409222128393,
      "grad_norm": 0.31297653913497925,
      "learning_rate": 3.84065944927288e-06,
      "loss": 0.0118,
      "step": 1889520
    },
    {
      "epoch": 3.092273652651493,
      "grad_norm": 0.6419710516929626,
      "learning_rate": 3.840593557059363e-06,
      "loss": 0.0097,
      "step": 1889540
    },
    {
      "epoch": 3.092306383090146,
      "grad_norm": 0.1517738252878189,
      "learning_rate": 3.840527664845846e-06,
      "loss": 0.0098,
      "step": 1889560
    },
    {
      "epoch": 3.0923391135287996,
      "grad_norm": 0.8477452993392944,
      "learning_rate": 3.8404617726323285e-06,
      "loss": 0.0103,
      "step": 1889580
    },
    {
      "epoch": 3.092371843967453,
      "grad_norm": 0.39470818638801575,
      "learning_rate": 3.840395880418811e-06,
      "loss": 0.0134,
      "step": 1889600
    },
    {
      "epoch": 3.0924045744061064,
      "grad_norm": 0.19907839596271515,
      "learning_rate": 3.840329988205294e-06,
      "loss": 0.0084,
      "step": 1889620
    },
    {
      "epoch": 3.0924373048447595,
      "grad_norm": 0.4208645224571228,
      "learning_rate": 3.840264095991777e-06,
      "loss": 0.0149,
      "step": 1889640
    },
    {
      "epoch": 3.0924700352834127,
      "grad_norm": 0.3470454216003418,
      "learning_rate": 3.840198203778259e-06,
      "loss": 0.0135,
      "step": 1889660
    },
    {
      "epoch": 3.0925027657220663,
      "grad_norm": 0.20469340682029724,
      "learning_rate": 3.840132311564743e-06,
      "loss": 0.0128,
      "step": 1889680
    },
    {
      "epoch": 3.0925354961607194,
      "grad_norm": 0.23247498273849487,
      "learning_rate": 3.840066419351226e-06,
      "loss": 0.011,
      "step": 1889700
    },
    {
      "epoch": 3.092568226599373,
      "grad_norm": 0.5204443335533142,
      "learning_rate": 3.8400005271377085e-06,
      "loss": 0.0123,
      "step": 1889720
    },
    {
      "epoch": 3.092600957038026,
      "grad_norm": 0.38203632831573486,
      "learning_rate": 3.839934634924191e-06,
      "loss": 0.0133,
      "step": 1889740
    },
    {
      "epoch": 3.0926336874766798,
      "grad_norm": 0.2900491952896118,
      "learning_rate": 3.839868742710674e-06,
      "loss": 0.0135,
      "step": 1889760
    },
    {
      "epoch": 3.092666417915333,
      "grad_norm": 0.15861822664737701,
      "learning_rate": 3.839802850497157e-06,
      "loss": 0.0155,
      "step": 1889780
    },
    {
      "epoch": 3.092699148353986,
      "grad_norm": 0.8053066730499268,
      "learning_rate": 3.8397369582836394e-06,
      "loss": 0.0127,
      "step": 1889800
    },
    {
      "epoch": 3.0927318787926397,
      "grad_norm": 0.2577402591705322,
      "learning_rate": 3.839671066070123e-06,
      "loss": 0.0088,
      "step": 1889820
    },
    {
      "epoch": 3.092764609231293,
      "grad_norm": 0.3062569200992584,
      "learning_rate": 3.839605173856606e-06,
      "loss": 0.013,
      "step": 1889840
    },
    {
      "epoch": 3.0927973396699464,
      "grad_norm": 0.2686961591243744,
      "learning_rate": 3.8395392816430885e-06,
      "loss": 0.0112,
      "step": 1889860
    },
    {
      "epoch": 3.0928300701085996,
      "grad_norm": 0.11805343627929688,
      "learning_rate": 3.839473389429571e-06,
      "loss": 0.0105,
      "step": 1889880
    },
    {
      "epoch": 3.0928628005472527,
      "grad_norm": 0.20949243009090424,
      "learning_rate": 3.839407497216055e-06,
      "loss": 0.0117,
      "step": 1889900
    },
    {
      "epoch": 3.0928955309859063,
      "grad_norm": 0.23282280564308167,
      "learning_rate": 3.8393416050025376e-06,
      "loss": 0.0099,
      "step": 1889920
    },
    {
      "epoch": 3.0929282614245595,
      "grad_norm": 0.17288212478160858,
      "learning_rate": 3.83927571278902e-06,
      "loss": 0.0164,
      "step": 1889940
    },
    {
      "epoch": 3.092960991863213,
      "grad_norm": 0.4857127368450165,
      "learning_rate": 3.839209820575503e-06,
      "loss": 0.0098,
      "step": 1889960
    },
    {
      "epoch": 3.092993722301866,
      "grad_norm": 0.23395222425460815,
      "learning_rate": 3.839143928361986e-06,
      "loss": 0.0111,
      "step": 1889980
    },
    {
      "epoch": 3.09302645274052,
      "grad_norm": 0.17010465264320374,
      "learning_rate": 3.8390780361484685e-06,
      "loss": 0.0143,
      "step": 1890000
    },
    {
      "epoch": 3.093059183179173,
      "grad_norm": 0.18412676453590393,
      "learning_rate": 3.839012143934951e-06,
      "loss": 0.0134,
      "step": 1890020
    },
    {
      "epoch": 3.093091913617826,
      "grad_norm": 0.8955902457237244,
      "learning_rate": 3.838946251721434e-06,
      "loss": 0.0098,
      "step": 1890040
    },
    {
      "epoch": 3.0931246440564797,
      "grad_norm": 0.10569902509450912,
      "learning_rate": 3.838880359507917e-06,
      "loss": 0.0102,
      "step": 1890060
    },
    {
      "epoch": 3.093157374495133,
      "grad_norm": 0.3026477098464966,
      "learning_rate": 3.8388144672944e-06,
      "loss": 0.011,
      "step": 1890080
    },
    {
      "epoch": 3.0931901049337864,
      "grad_norm": 2.553739547729492,
      "learning_rate": 3.838748575080883e-06,
      "loss": 0.0149,
      "step": 1890100
    },
    {
      "epoch": 3.0932228353724396,
      "grad_norm": 0.18785065412521362,
      "learning_rate": 3.838682682867366e-06,
      "loss": 0.0126,
      "step": 1890120
    },
    {
      "epoch": 3.093255565811093,
      "grad_norm": 0.13191844522953033,
      "learning_rate": 3.8386167906538486e-06,
      "loss": 0.009,
      "step": 1890140
    },
    {
      "epoch": 3.0932882962497463,
      "grad_norm": 0.3473479747772217,
      "learning_rate": 3.838550898440331e-06,
      "loss": 0.0092,
      "step": 1890160
    },
    {
      "epoch": 3.0933210266883995,
      "grad_norm": 0.23121687769889832,
      "learning_rate": 3.838485006226815e-06,
      "loss": 0.0082,
      "step": 1890180
    },
    {
      "epoch": 3.093353757127053,
      "grad_norm": 0.5467744469642639,
      "learning_rate": 3.838419114013298e-06,
      "loss": 0.0157,
      "step": 1890200
    },
    {
      "epoch": 3.0933864875657062,
      "grad_norm": 0.1435142159461975,
      "learning_rate": 3.83835322179978e-06,
      "loss": 0.0097,
      "step": 1890220
    },
    {
      "epoch": 3.09341921800436,
      "grad_norm": 0.24024048447608948,
      "learning_rate": 3.838287329586263e-06,
      "loss": 0.012,
      "step": 1890240
    },
    {
      "epoch": 3.093451948443013,
      "grad_norm": 0.3839678466320038,
      "learning_rate": 3.838221437372746e-06,
      "loss": 0.02,
      "step": 1890260
    },
    {
      "epoch": 3.0934846788816666,
      "grad_norm": 0.45942145586013794,
      "learning_rate": 3.838155545159229e-06,
      "loss": 0.0128,
      "step": 1890280
    },
    {
      "epoch": 3.0935174093203197,
      "grad_norm": 0.25591179728507996,
      "learning_rate": 3.838089652945712e-06,
      "loss": 0.0106,
      "step": 1890300
    },
    {
      "epoch": 3.093550139758973,
      "grad_norm": 0.5353685021400452,
      "learning_rate": 3.838023760732195e-06,
      "loss": 0.0087,
      "step": 1890320
    },
    {
      "epoch": 3.0935828701976265,
      "grad_norm": 0.45790597796440125,
      "learning_rate": 3.837957868518678e-06,
      "loss": 0.0136,
      "step": 1890340
    },
    {
      "epoch": 3.0936156006362796,
      "grad_norm": 0.14100489020347595,
      "learning_rate": 3.83789197630516e-06,
      "loss": 0.008,
      "step": 1890360
    },
    {
      "epoch": 3.093648331074933,
      "grad_norm": 0.30676907300949097,
      "learning_rate": 3.837826084091643e-06,
      "loss": 0.0116,
      "step": 1890380
    },
    {
      "epoch": 3.0936810615135864,
      "grad_norm": 0.5698707103729248,
      "learning_rate": 3.837760191878126e-06,
      "loss": 0.0109,
      "step": 1890400
    },
    {
      "epoch": 3.09371379195224,
      "grad_norm": 0.15570186078548431,
      "learning_rate": 3.837694299664609e-06,
      "loss": 0.0128,
      "step": 1890420
    },
    {
      "epoch": 3.093746522390893,
      "grad_norm": 0.234737828373909,
      "learning_rate": 3.837628407451091e-06,
      "loss": 0.0092,
      "step": 1890440
    },
    {
      "epoch": 3.0937792528295462,
      "grad_norm": 0.26239654421806335,
      "learning_rate": 3.837562515237574e-06,
      "loss": 0.0123,
      "step": 1890460
    },
    {
      "epoch": 3.0938119832682,
      "grad_norm": 0.4584645628929138,
      "learning_rate": 3.837496623024058e-06,
      "loss": 0.0091,
      "step": 1890480
    },
    {
      "epoch": 3.093844713706853,
      "grad_norm": 0.769739031791687,
      "learning_rate": 3.83743073081054e-06,
      "loss": 0.0122,
      "step": 1890500
    },
    {
      "epoch": 3.0938774441455066,
      "grad_norm": 0.20600447058677673,
      "learning_rate": 3.837364838597024e-06,
      "loss": 0.0149,
      "step": 1890520
    },
    {
      "epoch": 3.0939101745841597,
      "grad_norm": 0.28912901878356934,
      "learning_rate": 3.837298946383507e-06,
      "loss": 0.0131,
      "step": 1890540
    },
    {
      "epoch": 3.0939429050228133,
      "grad_norm": 0.18268616497516632,
      "learning_rate": 3.8372330541699895e-06,
      "loss": 0.0147,
      "step": 1890560
    },
    {
      "epoch": 3.0939756354614665,
      "grad_norm": 0.39009734988212585,
      "learning_rate": 3.837167161956472e-06,
      "loss": 0.0126,
      "step": 1890580
    },
    {
      "epoch": 3.0940083659001196,
      "grad_norm": 0.5485233068466187,
      "learning_rate": 3.837101269742955e-06,
      "loss": 0.015,
      "step": 1890600
    },
    {
      "epoch": 3.0940410963387732,
      "grad_norm": 0.11883576959371567,
      "learning_rate": 3.837035377529438e-06,
      "loss": 0.0071,
      "step": 1890620
    },
    {
      "epoch": 3.0940738267774264,
      "grad_norm": 0.05059234797954559,
      "learning_rate": 3.8369694853159204e-06,
      "loss": 0.0103,
      "step": 1890640
    },
    {
      "epoch": 3.09410655721608,
      "grad_norm": 0.18420805037021637,
      "learning_rate": 3.836903593102403e-06,
      "loss": 0.0167,
      "step": 1890660
    },
    {
      "epoch": 3.094139287654733,
      "grad_norm": 0.21923092007637024,
      "learning_rate": 3.836837700888886e-06,
      "loss": 0.0181,
      "step": 1890680
    },
    {
      "epoch": 3.0941720180933867,
      "grad_norm": 0.13339416682720184,
      "learning_rate": 3.8367718086753695e-06,
      "loss": 0.0196,
      "step": 1890700
    },
    {
      "epoch": 3.09420474853204,
      "grad_norm": 0.6522998809814453,
      "learning_rate": 3.836705916461852e-06,
      "loss": 0.0144,
      "step": 1890720
    },
    {
      "epoch": 3.094237478970693,
      "grad_norm": 0.24515540897846222,
      "learning_rate": 3.836640024248335e-06,
      "loss": 0.0105,
      "step": 1890740
    },
    {
      "epoch": 3.0942702094093466,
      "grad_norm": 0.23838447034358978,
      "learning_rate": 3.836574132034818e-06,
      "loss": 0.0113,
      "step": 1890760
    },
    {
      "epoch": 3.0943029398479998,
      "grad_norm": 0.6783024668693542,
      "learning_rate": 3.8365082398213005e-06,
      "loss": 0.0083,
      "step": 1890780
    },
    {
      "epoch": 3.0943356702866534,
      "grad_norm": 0.2605959177017212,
      "learning_rate": 3.836442347607783e-06,
      "loss": 0.0172,
      "step": 1890800
    },
    {
      "epoch": 3.0943684007253065,
      "grad_norm": 0.3497640788555145,
      "learning_rate": 3.836376455394266e-06,
      "loss": 0.0123,
      "step": 1890820
    },
    {
      "epoch": 3.0944011311639597,
      "grad_norm": 0.5543955564498901,
      "learning_rate": 3.836310563180749e-06,
      "loss": 0.0132,
      "step": 1890840
    },
    {
      "epoch": 3.0944338616026132,
      "grad_norm": 0.3675574064254761,
      "learning_rate": 3.836244670967232e-06,
      "loss": 0.0121,
      "step": 1890860
    },
    {
      "epoch": 3.0944665920412664,
      "grad_norm": 0.3993537425994873,
      "learning_rate": 3.836178778753715e-06,
      "loss": 0.0133,
      "step": 1890880
    },
    {
      "epoch": 3.09449932247992,
      "grad_norm": 0.10690838098526001,
      "learning_rate": 3.836112886540198e-06,
      "loss": 0.0133,
      "step": 1890900
    },
    {
      "epoch": 3.094532052918573,
      "grad_norm": 0.713777482509613,
      "learning_rate": 3.836046994326681e-06,
      "loss": 0.0119,
      "step": 1890920
    },
    {
      "epoch": 3.0945647833572267,
      "grad_norm": 0.20844638347625732,
      "learning_rate": 3.835981102113164e-06,
      "loss": 0.0108,
      "step": 1890940
    },
    {
      "epoch": 3.09459751379588,
      "grad_norm": 0.42536336183547974,
      "learning_rate": 3.835915209899647e-06,
      "loss": 0.0142,
      "step": 1890960
    },
    {
      "epoch": 3.094630244234533,
      "grad_norm": 0.733950674533844,
      "learning_rate": 3.8358493176861296e-06,
      "loss": 0.0102,
      "step": 1890980
    },
    {
      "epoch": 3.0946629746731866,
      "grad_norm": 0.576404333114624,
      "learning_rate": 3.835783425472612e-06,
      "loss": 0.015,
      "step": 1891000
    },
    {
      "epoch": 3.09469570511184,
      "grad_norm": 0.2595742344856262,
      "learning_rate": 3.835717533259095e-06,
      "loss": 0.016,
      "step": 1891020
    },
    {
      "epoch": 3.0947284355504934,
      "grad_norm": 0.17467647790908813,
      "learning_rate": 3.835651641045578e-06,
      "loss": 0.0135,
      "step": 1891040
    },
    {
      "epoch": 3.0947611659891465,
      "grad_norm": 0.12496989220380783,
      "learning_rate": 3.8355857488320605e-06,
      "loss": 0.0127,
      "step": 1891060
    },
    {
      "epoch": 3.0947938964278,
      "grad_norm": 0.36141639947891235,
      "learning_rate": 3.835519856618543e-06,
      "loss": 0.0123,
      "step": 1891080
    },
    {
      "epoch": 3.0948266268664533,
      "grad_norm": 0.35225826501846313,
      "learning_rate": 3.835453964405027e-06,
      "loss": 0.0082,
      "step": 1891100
    },
    {
      "epoch": 3.0948593573051064,
      "grad_norm": 0.19392187893390656,
      "learning_rate": 3.83538807219151e-06,
      "loss": 0.008,
      "step": 1891120
    },
    {
      "epoch": 3.09489208774376,
      "grad_norm": 0.1779387891292572,
      "learning_rate": 3.835322179977992e-06,
      "loss": 0.0077,
      "step": 1891140
    },
    {
      "epoch": 3.094924818182413,
      "grad_norm": 0.30465394258499146,
      "learning_rate": 3.835256287764475e-06,
      "loss": 0.014,
      "step": 1891160
    },
    {
      "epoch": 3.0949575486210668,
      "grad_norm": 0.5102576017379761,
      "learning_rate": 3.835190395550958e-06,
      "loss": 0.0135,
      "step": 1891180
    },
    {
      "epoch": 3.09499027905972,
      "grad_norm": 0.5610806941986084,
      "learning_rate": 3.8351245033374405e-06,
      "loss": 0.0112,
      "step": 1891200
    },
    {
      "epoch": 3.0950230094983735,
      "grad_norm": 0.1858617663383484,
      "learning_rate": 3.835058611123923e-06,
      "loss": 0.011,
      "step": 1891220
    },
    {
      "epoch": 3.0950557399370267,
      "grad_norm": 0.11347697675228119,
      "learning_rate": 3.834992718910407e-06,
      "loss": 0.0124,
      "step": 1891240
    },
    {
      "epoch": 3.09508847037568,
      "grad_norm": 0.5629985332489014,
      "learning_rate": 3.83492682669689e-06,
      "loss": 0.0141,
      "step": 1891260
    },
    {
      "epoch": 3.0951212008143334,
      "grad_norm": 0.9855223894119263,
      "learning_rate": 3.834860934483372e-06,
      "loss": 0.0124,
      "step": 1891280
    },
    {
      "epoch": 3.0951539312529865,
      "grad_norm": 0.1688123196363449,
      "learning_rate": 3.834795042269855e-06,
      "loss": 0.0144,
      "step": 1891300
    },
    {
      "epoch": 3.09518666169164,
      "grad_norm": 0.18819227814674377,
      "learning_rate": 3.834729150056339e-06,
      "loss": 0.0145,
      "step": 1891320
    },
    {
      "epoch": 3.0952193921302933,
      "grad_norm": 0.5838536024093628,
      "learning_rate": 3.834663257842821e-06,
      "loss": 0.0095,
      "step": 1891340
    },
    {
      "epoch": 3.0952521225689464,
      "grad_norm": 0.16642050445079803,
      "learning_rate": 3.834597365629304e-06,
      "loss": 0.0128,
      "step": 1891360
    },
    {
      "epoch": 3.0952848530076,
      "grad_norm": 0.237268328666687,
      "learning_rate": 3.834531473415787e-06,
      "loss": 0.0094,
      "step": 1891380
    },
    {
      "epoch": 3.095317583446253,
      "grad_norm": 0.5737010836601257,
      "learning_rate": 3.83446558120227e-06,
      "loss": 0.012,
      "step": 1891400
    },
    {
      "epoch": 3.095350313884907,
      "grad_norm": 0.6204633712768555,
      "learning_rate": 3.834399688988752e-06,
      "loss": 0.0079,
      "step": 1891420
    },
    {
      "epoch": 3.09538304432356,
      "grad_norm": 0.16611282527446747,
      "learning_rate": 3.834333796775235e-06,
      "loss": 0.013,
      "step": 1891440
    },
    {
      "epoch": 3.0954157747622135,
      "grad_norm": 0.5181066989898682,
      "learning_rate": 3.834267904561718e-06,
      "loss": 0.0113,
      "step": 1891460
    },
    {
      "epoch": 3.0954485052008667,
      "grad_norm": 0.7105087637901306,
      "learning_rate": 3.834202012348201e-06,
      "loss": 0.0136,
      "step": 1891480
    },
    {
      "epoch": 3.09548123563952,
      "grad_norm": 0.3856180012226105,
      "learning_rate": 3.834136120134684e-06,
      "loss": 0.0117,
      "step": 1891500
    },
    {
      "epoch": 3.0955139660781734,
      "grad_norm": 0.09797894954681396,
      "learning_rate": 3.834070227921167e-06,
      "loss": 0.0175,
      "step": 1891520
    },
    {
      "epoch": 3.0955466965168266,
      "grad_norm": 0.1251561939716339,
      "learning_rate": 3.83400433570765e-06,
      "loss": 0.0081,
      "step": 1891540
    },
    {
      "epoch": 3.09557942695548,
      "grad_norm": 0.2690683901309967,
      "learning_rate": 3.833938443494132e-06,
      "loss": 0.0111,
      "step": 1891560
    },
    {
      "epoch": 3.0956121573941333,
      "grad_norm": 0.4541792571544647,
      "learning_rate": 3.833872551280616e-06,
      "loss": 0.0189,
      "step": 1891580
    },
    {
      "epoch": 3.095644887832787,
      "grad_norm": 0.0795864537358284,
      "learning_rate": 3.833806659067099e-06,
      "loss": 0.0071,
      "step": 1891600
    },
    {
      "epoch": 3.09567761827144,
      "grad_norm": 0.4118666350841522,
      "learning_rate": 3.8337407668535815e-06,
      "loss": 0.0112,
      "step": 1891620
    },
    {
      "epoch": 3.095710348710093,
      "grad_norm": 0.10294456034898758,
      "learning_rate": 3.833674874640064e-06,
      "loss": 0.0124,
      "step": 1891640
    },
    {
      "epoch": 3.095743079148747,
      "grad_norm": 0.04842250794172287,
      "learning_rate": 3.833608982426547e-06,
      "loss": 0.0089,
      "step": 1891660
    },
    {
      "epoch": 3.0957758095874,
      "grad_norm": 0.18318185210227966,
      "learning_rate": 3.83354309021303e-06,
      "loss": 0.0149,
      "step": 1891680
    },
    {
      "epoch": 3.0958085400260535,
      "grad_norm": 0.4436229169368744,
      "learning_rate": 3.833477197999512e-06,
      "loss": 0.0117,
      "step": 1891700
    },
    {
      "epoch": 3.0958412704647067,
      "grad_norm": 0.3984323740005493,
      "learning_rate": 3.833411305785996e-06,
      "loss": 0.0075,
      "step": 1891720
    },
    {
      "epoch": 3.0958740009033603,
      "grad_norm": 0.24866291880607605,
      "learning_rate": 3.833345413572479e-06,
      "loss": 0.0135,
      "step": 1891740
    },
    {
      "epoch": 3.0959067313420134,
      "grad_norm": 0.8282508254051208,
      "learning_rate": 3.8332795213589615e-06,
      "loss": 0.0114,
      "step": 1891760
    },
    {
      "epoch": 3.0959394617806666,
      "grad_norm": 0.49300482869148254,
      "learning_rate": 3.833213629145444e-06,
      "loss": 0.0087,
      "step": 1891780
    },
    {
      "epoch": 3.09597219221932,
      "grad_norm": 0.40395864844322205,
      "learning_rate": 3.833147736931927e-06,
      "loss": 0.0155,
      "step": 1891800
    },
    {
      "epoch": 3.0960049226579733,
      "grad_norm": 1.0519317388534546,
      "learning_rate": 3.83308184471841e-06,
      "loss": 0.0155,
      "step": 1891820
    },
    {
      "epoch": 3.096037653096627,
      "grad_norm": 0.3542332649230957,
      "learning_rate": 3.8330159525048924e-06,
      "loss": 0.0172,
      "step": 1891840
    },
    {
      "epoch": 3.09607038353528,
      "grad_norm": 0.10419803112745285,
      "learning_rate": 3.832950060291375e-06,
      "loss": 0.0092,
      "step": 1891860
    },
    {
      "epoch": 3.0961031139739337,
      "grad_norm": 0.12627369165420532,
      "learning_rate": 3.832884168077858e-06,
      "loss": 0.0138,
      "step": 1891880
    },
    {
      "epoch": 3.096135844412587,
      "grad_norm": 0.22419124841690063,
      "learning_rate": 3.8328182758643415e-06,
      "loss": 0.0178,
      "step": 1891900
    },
    {
      "epoch": 3.09616857485124,
      "grad_norm": 0.15338757634162903,
      "learning_rate": 3.832752383650824e-06,
      "loss": 0.0095,
      "step": 1891920
    },
    {
      "epoch": 3.0962013052898936,
      "grad_norm": 0.3468751907348633,
      "learning_rate": 3.832686491437308e-06,
      "loss": 0.0092,
      "step": 1891940
    },
    {
      "epoch": 3.0962340357285467,
      "grad_norm": 0.3004414141178131,
      "learning_rate": 3.832620599223791e-06,
      "loss": 0.0075,
      "step": 1891960
    },
    {
      "epoch": 3.0962667661672003,
      "grad_norm": 0.18281367421150208,
      "learning_rate": 3.832554707010273e-06,
      "loss": 0.0129,
      "step": 1891980
    },
    {
      "epoch": 3.0962994966058535,
      "grad_norm": 0.24088117480278015,
      "learning_rate": 3.832488814796756e-06,
      "loss": 0.0109,
      "step": 1892000
    },
    {
      "epoch": 3.096332227044507,
      "grad_norm": 0.20088878273963928,
      "learning_rate": 3.832422922583239e-06,
      "loss": 0.013,
      "step": 1892020
    },
    {
      "epoch": 3.09636495748316,
      "grad_norm": 0.48758456110954285,
      "learning_rate": 3.8323570303697215e-06,
      "loss": 0.0107,
      "step": 1892040
    },
    {
      "epoch": 3.0963976879218134,
      "grad_norm": 0.27034813165664673,
      "learning_rate": 3.832291138156204e-06,
      "loss": 0.0103,
      "step": 1892060
    },
    {
      "epoch": 3.096430418360467,
      "grad_norm": 0.2886604368686676,
      "learning_rate": 3.832225245942687e-06,
      "loss": 0.0098,
      "step": 1892080
    },
    {
      "epoch": 3.09646314879912,
      "grad_norm": 0.0987928956747055,
      "learning_rate": 3.83215935372917e-06,
      "loss": 0.01,
      "step": 1892100
    },
    {
      "epoch": 3.0964958792377737,
      "grad_norm": 0.3345857560634613,
      "learning_rate": 3.832093461515653e-06,
      "loss": 0.0107,
      "step": 1892120
    },
    {
      "epoch": 3.096528609676427,
      "grad_norm": 0.7252264022827148,
      "learning_rate": 3.832027569302136e-06,
      "loss": 0.0092,
      "step": 1892140
    },
    {
      "epoch": 3.0965613401150804,
      "grad_norm": 0.37195250391960144,
      "learning_rate": 3.831961677088619e-06,
      "loss": 0.01,
      "step": 1892160
    },
    {
      "epoch": 3.0965940705537336,
      "grad_norm": 0.4327651262283325,
      "learning_rate": 3.8318957848751016e-06,
      "loss": 0.0129,
      "step": 1892180
    },
    {
      "epoch": 3.0966268009923867,
      "grad_norm": 0.28461015224456787,
      "learning_rate": 3.831829892661584e-06,
      "loss": 0.0076,
      "step": 1892200
    },
    {
      "epoch": 3.0966595314310403,
      "grad_norm": 0.3748694658279419,
      "learning_rate": 3.831764000448067e-06,
      "loss": 0.0089,
      "step": 1892220
    },
    {
      "epoch": 3.0966922618696935,
      "grad_norm": 0.31434202194213867,
      "learning_rate": 3.83169810823455e-06,
      "loss": 0.0197,
      "step": 1892240
    },
    {
      "epoch": 3.096724992308347,
      "grad_norm": 1.1485638618469238,
      "learning_rate": 3.8316322160210325e-06,
      "loss": 0.0127,
      "step": 1892260
    },
    {
      "epoch": 3.0967577227470002,
      "grad_norm": 0.09277968108654022,
      "learning_rate": 3.831566323807516e-06,
      "loss": 0.0109,
      "step": 1892280
    },
    {
      "epoch": 3.0967904531856534,
      "grad_norm": 0.4062590003013611,
      "learning_rate": 3.831500431593999e-06,
      "loss": 0.0179,
      "step": 1892300
    },
    {
      "epoch": 3.096823183624307,
      "grad_norm": 0.186367467045784,
      "learning_rate": 3.831434539380482e-06,
      "loss": 0.0103,
      "step": 1892320
    },
    {
      "epoch": 3.09685591406296,
      "grad_norm": 0.37585899233818054,
      "learning_rate": 3.831368647166965e-06,
      "loss": 0.0125,
      "step": 1892340
    },
    {
      "epoch": 3.0968886445016137,
      "grad_norm": 0.25029057264328003,
      "learning_rate": 3.831302754953448e-06,
      "loss": 0.0177,
      "step": 1892360
    },
    {
      "epoch": 3.096921374940267,
      "grad_norm": 0.11441199481487274,
      "learning_rate": 3.831236862739931e-06,
      "loss": 0.0118,
      "step": 1892380
    },
    {
      "epoch": 3.0969541053789205,
      "grad_norm": 0.1054743304848671,
      "learning_rate": 3.831170970526413e-06,
      "loss": 0.0116,
      "step": 1892400
    },
    {
      "epoch": 3.0969868358175736,
      "grad_norm": 0.07684686779975891,
      "learning_rate": 3.831105078312896e-06,
      "loss": 0.0109,
      "step": 1892420
    },
    {
      "epoch": 3.0970195662562268,
      "grad_norm": 0.12936855852603912,
      "learning_rate": 3.831039186099379e-06,
      "loss": 0.0101,
      "step": 1892440
    },
    {
      "epoch": 3.0970522966948804,
      "grad_norm": 0.36267051100730896,
      "learning_rate": 3.830973293885862e-06,
      "loss": 0.0095,
      "step": 1892460
    },
    {
      "epoch": 3.0970850271335335,
      "grad_norm": 0.4397236406803131,
      "learning_rate": 3.830907401672344e-06,
      "loss": 0.0182,
      "step": 1892480
    },
    {
      "epoch": 3.097117757572187,
      "grad_norm": 0.25940048694610596,
      "learning_rate": 3.830841509458827e-06,
      "loss": 0.0091,
      "step": 1892500
    },
    {
      "epoch": 3.0971504880108403,
      "grad_norm": 0.7614717483520508,
      "learning_rate": 3.830775617245311e-06,
      "loss": 0.01,
      "step": 1892520
    },
    {
      "epoch": 3.097183218449494,
      "grad_norm": 0.2685161232948303,
      "learning_rate": 3.830709725031793e-06,
      "loss": 0.0126,
      "step": 1892540
    },
    {
      "epoch": 3.097215948888147,
      "grad_norm": 0.274841845035553,
      "learning_rate": 3.830643832818276e-06,
      "loss": 0.0082,
      "step": 1892560
    },
    {
      "epoch": 3.0972486793268,
      "grad_norm": 0.4834490716457367,
      "learning_rate": 3.830577940604759e-06,
      "loss": 0.01,
      "step": 1892580
    },
    {
      "epoch": 3.0972814097654537,
      "grad_norm": 0.1796661913394928,
      "learning_rate": 3.830512048391242e-06,
      "loss": 0.0154,
      "step": 1892600
    },
    {
      "epoch": 3.097314140204107,
      "grad_norm": 0.43650805950164795,
      "learning_rate": 3.830446156177724e-06,
      "loss": 0.0118,
      "step": 1892620
    },
    {
      "epoch": 3.0973468706427605,
      "grad_norm": 0.18629416823387146,
      "learning_rate": 3.830380263964208e-06,
      "loss": 0.0138,
      "step": 1892640
    },
    {
      "epoch": 3.0973796010814136,
      "grad_norm": 0.2540828585624695,
      "learning_rate": 3.830314371750691e-06,
      "loss": 0.0069,
      "step": 1892660
    },
    {
      "epoch": 3.0974123315200672,
      "grad_norm": 0.3719983696937561,
      "learning_rate": 3.8302484795371734e-06,
      "loss": 0.0118,
      "step": 1892680
    },
    {
      "epoch": 3.0974450619587204,
      "grad_norm": 0.22920100390911102,
      "learning_rate": 3.830182587323656e-06,
      "loss": 0.0098,
      "step": 1892700
    },
    {
      "epoch": 3.0974777923973735,
      "grad_norm": 0.5612807869911194,
      "learning_rate": 3.830116695110139e-06,
      "loss": 0.0127,
      "step": 1892720
    },
    {
      "epoch": 3.097510522836027,
      "grad_norm": 0.1871931254863739,
      "learning_rate": 3.8300508028966225e-06,
      "loss": 0.0067,
      "step": 1892740
    },
    {
      "epoch": 3.0975432532746803,
      "grad_norm": 0.09223593771457672,
      "learning_rate": 3.829984910683105e-06,
      "loss": 0.0105,
      "step": 1892760
    },
    {
      "epoch": 3.097575983713334,
      "grad_norm": 0.22606410086154938,
      "learning_rate": 3.829919018469588e-06,
      "loss": 0.0161,
      "step": 1892780
    },
    {
      "epoch": 3.097608714151987,
      "grad_norm": 0.3644823133945465,
      "learning_rate": 3.829853126256071e-06,
      "loss": 0.0086,
      "step": 1892800
    },
    {
      "epoch": 3.0976414445906406,
      "grad_norm": 0.286234587430954,
      "learning_rate": 3.8297872340425535e-06,
      "loss": 0.0101,
      "step": 1892820
    },
    {
      "epoch": 3.0976741750292938,
      "grad_norm": 0.19562973082065582,
      "learning_rate": 3.829721341829036e-06,
      "loss": 0.007,
      "step": 1892840
    },
    {
      "epoch": 3.097706905467947,
      "grad_norm": 0.24340149760246277,
      "learning_rate": 3.829655449615519e-06,
      "loss": 0.0142,
      "step": 1892860
    },
    {
      "epoch": 3.0977396359066005,
      "grad_norm": 0.7229722738265991,
      "learning_rate": 3.829589557402002e-06,
      "loss": 0.0138,
      "step": 1892880
    },
    {
      "epoch": 3.0977723663452537,
      "grad_norm": 0.3190643787384033,
      "learning_rate": 3.8295236651884844e-06,
      "loss": 0.0151,
      "step": 1892900
    },
    {
      "epoch": 3.0978050967839073,
      "grad_norm": 0.34517377614974976,
      "learning_rate": 3.829457772974968e-06,
      "loss": 0.0119,
      "step": 1892920
    },
    {
      "epoch": 3.0978378272225604,
      "grad_norm": 0.7676481008529663,
      "learning_rate": 3.829391880761451e-06,
      "loss": 0.0109,
      "step": 1892940
    },
    {
      "epoch": 3.0978705576612136,
      "grad_norm": 0.23181410133838654,
      "learning_rate": 3.8293259885479335e-06,
      "loss": 0.0116,
      "step": 1892960
    },
    {
      "epoch": 3.097903288099867,
      "grad_norm": 1.2360972166061401,
      "learning_rate": 3.829260096334416e-06,
      "loss": 0.0142,
      "step": 1892980
    },
    {
      "epoch": 3.0979360185385203,
      "grad_norm": 0.5509672164916992,
      "learning_rate": 3.8291942041209e-06,
      "loss": 0.0101,
      "step": 1893000
    },
    {
      "epoch": 3.097968748977174,
      "grad_norm": 0.21380466222763062,
      "learning_rate": 3.8291283119073826e-06,
      "loss": 0.0091,
      "step": 1893020
    },
    {
      "epoch": 3.098001479415827,
      "grad_norm": 0.08480554819107056,
      "learning_rate": 3.829062419693865e-06,
      "loss": 0.0068,
      "step": 1893040
    },
    {
      "epoch": 3.0980342098544806,
      "grad_norm": 0.33972591161727905,
      "learning_rate": 3.828996527480348e-06,
      "loss": 0.0122,
      "step": 1893060
    },
    {
      "epoch": 3.098066940293134,
      "grad_norm": 0.3249838352203369,
      "learning_rate": 3.828930635266831e-06,
      "loss": 0.0197,
      "step": 1893080
    },
    {
      "epoch": 3.098099670731787,
      "grad_norm": 0.14668820798397064,
      "learning_rate": 3.8288647430533135e-06,
      "loss": 0.0109,
      "step": 1893100
    },
    {
      "epoch": 3.0981324011704405,
      "grad_norm": 0.3433489203453064,
      "learning_rate": 3.828798850839796e-06,
      "loss": 0.011,
      "step": 1893120
    },
    {
      "epoch": 3.0981651316090937,
      "grad_norm": 0.17263509333133698,
      "learning_rate": 3.82873295862628e-06,
      "loss": 0.0106,
      "step": 1893140
    },
    {
      "epoch": 3.0981978620477473,
      "grad_norm": 0.5010445713996887,
      "learning_rate": 3.828667066412763e-06,
      "loss": 0.01,
      "step": 1893160
    },
    {
      "epoch": 3.0982305924864004,
      "grad_norm": 0.2007272094488144,
      "learning_rate": 3.828601174199245e-06,
      "loss": 0.0097,
      "step": 1893180
    },
    {
      "epoch": 3.098263322925054,
      "grad_norm": 0.1566542387008667,
      "learning_rate": 3.828535281985728e-06,
      "loss": 0.0111,
      "step": 1893200
    },
    {
      "epoch": 3.098296053363707,
      "grad_norm": 0.21664254367351532,
      "learning_rate": 3.828469389772211e-06,
      "loss": 0.0102,
      "step": 1893220
    },
    {
      "epoch": 3.0983287838023603,
      "grad_norm": 0.39837759733200073,
      "learning_rate": 3.8284034975586935e-06,
      "loss": 0.0117,
      "step": 1893240
    },
    {
      "epoch": 3.098361514241014,
      "grad_norm": 0.15381592512130737,
      "learning_rate": 3.828337605345176e-06,
      "loss": 0.0161,
      "step": 1893260
    },
    {
      "epoch": 3.098394244679667,
      "grad_norm": 0.07976742833852768,
      "learning_rate": 3.828271713131659e-06,
      "loss": 0.0157,
      "step": 1893280
    },
    {
      "epoch": 3.0984269751183207,
      "grad_norm": 0.27939510345458984,
      "learning_rate": 3.828205820918143e-06,
      "loss": 0.0091,
      "step": 1893300
    },
    {
      "epoch": 3.098459705556974,
      "grad_norm": 0.4048919677734375,
      "learning_rate": 3.828139928704625e-06,
      "loss": 0.0132,
      "step": 1893320
    },
    {
      "epoch": 3.0984924359956274,
      "grad_norm": 0.32217052578926086,
      "learning_rate": 3.828074036491108e-06,
      "loss": 0.0086,
      "step": 1893340
    },
    {
      "epoch": 3.0985251664342806,
      "grad_norm": 0.4843266010284424,
      "learning_rate": 3.828008144277592e-06,
      "loss": 0.0118,
      "step": 1893360
    },
    {
      "epoch": 3.0985578968729337,
      "grad_norm": 0.26303595304489136,
      "learning_rate": 3.827942252064074e-06,
      "loss": 0.0104,
      "step": 1893380
    },
    {
      "epoch": 3.0985906273115873,
      "grad_norm": 0.18338951468467712,
      "learning_rate": 3.827876359850557e-06,
      "loss": 0.0112,
      "step": 1893400
    },
    {
      "epoch": 3.0986233577502404,
      "grad_norm": 0.06694239377975464,
      "learning_rate": 3.82781046763704e-06,
      "loss": 0.0099,
      "step": 1893420
    },
    {
      "epoch": 3.098656088188894,
      "grad_norm": 0.105329729616642,
      "learning_rate": 3.827744575423523e-06,
      "loss": 0.0101,
      "step": 1893440
    },
    {
      "epoch": 3.098688818627547,
      "grad_norm": 0.27669546008110046,
      "learning_rate": 3.827678683210005e-06,
      "loss": 0.007,
      "step": 1893460
    },
    {
      "epoch": 3.098721549066201,
      "grad_norm": 0.1524471640586853,
      "learning_rate": 3.827612790996488e-06,
      "loss": 0.0143,
      "step": 1893480
    },
    {
      "epoch": 3.098754279504854,
      "grad_norm": 0.21033230423927307,
      "learning_rate": 3.827546898782971e-06,
      "loss": 0.0116,
      "step": 1893500
    },
    {
      "epoch": 3.098787009943507,
      "grad_norm": 0.09597685933113098,
      "learning_rate": 3.827481006569454e-06,
      "loss": 0.0078,
      "step": 1893520
    },
    {
      "epoch": 3.0988197403821607,
      "grad_norm": 0.1999637633562088,
      "learning_rate": 3.827415114355937e-06,
      "loss": 0.0096,
      "step": 1893540
    },
    {
      "epoch": 3.098852470820814,
      "grad_norm": 0.38928475975990295,
      "learning_rate": 3.82734922214242e-06,
      "loss": 0.0149,
      "step": 1893560
    },
    {
      "epoch": 3.0988852012594674,
      "grad_norm": 0.30597469210624695,
      "learning_rate": 3.827283329928903e-06,
      "loss": 0.0111,
      "step": 1893580
    },
    {
      "epoch": 3.0989179316981206,
      "grad_norm": 0.47347596287727356,
      "learning_rate": 3.827217437715385e-06,
      "loss": 0.0155,
      "step": 1893600
    },
    {
      "epoch": 3.098950662136774,
      "grad_norm": 0.46038496494293213,
      "learning_rate": 3.827151545501868e-06,
      "loss": 0.0125,
      "step": 1893620
    },
    {
      "epoch": 3.0989833925754273,
      "grad_norm": 0.18188847601413727,
      "learning_rate": 3.827085653288351e-06,
      "loss": 0.0063,
      "step": 1893640
    },
    {
      "epoch": 3.0990161230140805,
      "grad_norm": 0.195058211684227,
      "learning_rate": 3.827019761074834e-06,
      "loss": 0.0169,
      "step": 1893660
    },
    {
      "epoch": 3.099048853452734,
      "grad_norm": 0.4021555781364441,
      "learning_rate": 3.826953868861316e-06,
      "loss": 0.0161,
      "step": 1893680
    },
    {
      "epoch": 3.099081583891387,
      "grad_norm": 0.1728169023990631,
      "learning_rate": 3.8268879766478e-06,
      "loss": 0.0095,
      "step": 1893700
    },
    {
      "epoch": 3.099114314330041,
      "grad_norm": 0.6343292593955994,
      "learning_rate": 3.826822084434283e-06,
      "loss": 0.0095,
      "step": 1893720
    },
    {
      "epoch": 3.099147044768694,
      "grad_norm": 0.28404659032821655,
      "learning_rate": 3.8267561922207654e-06,
      "loss": 0.0098,
      "step": 1893740
    },
    {
      "epoch": 3.0991797752073476,
      "grad_norm": 0.10008891671895981,
      "learning_rate": 3.826690300007249e-06,
      "loss": 0.0113,
      "step": 1893760
    },
    {
      "epoch": 3.0992125056460007,
      "grad_norm": 0.19794556498527527,
      "learning_rate": 3.826624407793732e-06,
      "loss": 0.0121,
      "step": 1893780
    },
    {
      "epoch": 3.099245236084654,
      "grad_norm": 1.0150169134140015,
      "learning_rate": 3.8265585155802145e-06,
      "loss": 0.0127,
      "step": 1893800
    },
    {
      "epoch": 3.0992779665233074,
      "grad_norm": 0.17113395035266876,
      "learning_rate": 3.826492623366697e-06,
      "loss": 0.0115,
      "step": 1893820
    },
    {
      "epoch": 3.0993106969619606,
      "grad_norm": 0.20496557652950287,
      "learning_rate": 3.82642673115318e-06,
      "loss": 0.0153,
      "step": 1893840
    },
    {
      "epoch": 3.099343427400614,
      "grad_norm": 0.9009115099906921,
      "learning_rate": 3.826360838939663e-06,
      "loss": 0.0161,
      "step": 1893860
    },
    {
      "epoch": 3.0993761578392673,
      "grad_norm": 0.0964605063199997,
      "learning_rate": 3.8262949467261455e-06,
      "loss": 0.0097,
      "step": 1893880
    },
    {
      "epoch": 3.0994088882779205,
      "grad_norm": 0.1723102629184723,
      "learning_rate": 3.826229054512628e-06,
      "loss": 0.0094,
      "step": 1893900
    },
    {
      "epoch": 3.099441618716574,
      "grad_norm": 0.13909170031547546,
      "learning_rate": 3.826163162299111e-06,
      "loss": 0.008,
      "step": 1893920
    },
    {
      "epoch": 3.0994743491552272,
      "grad_norm": 0.5781615972518921,
      "learning_rate": 3.8260972700855945e-06,
      "loss": 0.0107,
      "step": 1893940
    },
    {
      "epoch": 3.099507079593881,
      "grad_norm": 0.1506306231021881,
      "learning_rate": 3.826031377872077e-06,
      "loss": 0.0146,
      "step": 1893960
    },
    {
      "epoch": 3.099539810032534,
      "grad_norm": 0.40291890501976013,
      "learning_rate": 3.82596548565856e-06,
      "loss": 0.0134,
      "step": 1893980
    },
    {
      "epoch": 3.0995725404711876,
      "grad_norm": 0.1562906950712204,
      "learning_rate": 3.825899593445043e-06,
      "loss": 0.008,
      "step": 1894000
    },
    {
      "epoch": 3.0996052709098407,
      "grad_norm": 0.2582680583000183,
      "learning_rate": 3.8258337012315255e-06,
      "loss": 0.0187,
      "step": 1894020
    },
    {
      "epoch": 3.099638001348494,
      "grad_norm": 0.2063910961151123,
      "learning_rate": 3.825767809018009e-06,
      "loss": 0.0147,
      "step": 1894040
    },
    {
      "epoch": 3.0996707317871475,
      "grad_norm": 0.493945837020874,
      "learning_rate": 3.825701916804492e-06,
      "loss": 0.0094,
      "step": 1894060
    },
    {
      "epoch": 3.0997034622258006,
      "grad_norm": 0.26307958364486694,
      "learning_rate": 3.8256360245909745e-06,
      "loss": 0.0107,
      "step": 1894080
    },
    {
      "epoch": 3.099736192664454,
      "grad_norm": 0.29192060232162476,
      "learning_rate": 3.825570132377457e-06,
      "loss": 0.0082,
      "step": 1894100
    },
    {
      "epoch": 3.0997689231031074,
      "grad_norm": 0.06869630515575409,
      "learning_rate": 3.82550424016394e-06,
      "loss": 0.0108,
      "step": 1894120
    },
    {
      "epoch": 3.099801653541761,
      "grad_norm": 0.538152813911438,
      "learning_rate": 3.825438347950423e-06,
      "loss": 0.013,
      "step": 1894140
    },
    {
      "epoch": 3.099834383980414,
      "grad_norm": 0.42708858847618103,
      "learning_rate": 3.825372455736906e-06,
      "loss": 0.0105,
      "step": 1894160
    },
    {
      "epoch": 3.0998671144190673,
      "grad_norm": 0.27498143911361694,
      "learning_rate": 3.825306563523389e-06,
      "loss": 0.0097,
      "step": 1894180
    },
    {
      "epoch": 3.099899844857721,
      "grad_norm": 0.12919259071350098,
      "learning_rate": 3.825240671309872e-06,
      "loss": 0.0177,
      "step": 1894200
    },
    {
      "epoch": 3.099932575296374,
      "grad_norm": 0.19623874127864838,
      "learning_rate": 3.8251747790963546e-06,
      "loss": 0.0083,
      "step": 1894220
    },
    {
      "epoch": 3.0999653057350276,
      "grad_norm": 0.10185269266366959,
      "learning_rate": 3.825108886882837e-06,
      "loss": 0.0108,
      "step": 1894240
    },
    {
      "epoch": 3.0999980361736807,
      "grad_norm": 0.458233505487442,
      "learning_rate": 3.82504299466932e-06,
      "loss": 0.0151,
      "step": 1894260
    },
    {
      "epoch": 3.1000307666123343,
      "grad_norm": 0.3800611197948456,
      "learning_rate": 3.824977102455803e-06,
      "loss": 0.0141,
      "step": 1894280
    },
    {
      "epoch": 3.1000634970509875,
      "grad_norm": 0.5629119873046875,
      "learning_rate": 3.8249112102422855e-06,
      "loss": 0.0136,
      "step": 1894300
    },
    {
      "epoch": 3.1000962274896406,
      "grad_norm": 0.5796542167663574,
      "learning_rate": 3.824845318028768e-06,
      "loss": 0.0128,
      "step": 1894320
    },
    {
      "epoch": 3.1001289579282942,
      "grad_norm": 0.1700090765953064,
      "learning_rate": 3.824779425815252e-06,
      "loss": 0.0114,
      "step": 1894340
    },
    {
      "epoch": 3.1001616883669474,
      "grad_norm": 0.6931671500205994,
      "learning_rate": 3.824713533601735e-06,
      "loss": 0.0114,
      "step": 1894360
    },
    {
      "epoch": 3.100194418805601,
      "grad_norm": 0.2569577991962433,
      "learning_rate": 3.824647641388217e-06,
      "loss": 0.0144,
      "step": 1894380
    },
    {
      "epoch": 3.100227149244254,
      "grad_norm": 0.1887483447790146,
      "learning_rate": 3.824581749174701e-06,
      "loss": 0.0085,
      "step": 1894400
    },
    {
      "epoch": 3.1002598796829073,
      "grad_norm": 0.828032910823822,
      "learning_rate": 3.824515856961184e-06,
      "loss": 0.0081,
      "step": 1894420
    },
    {
      "epoch": 3.100292610121561,
      "grad_norm": 0.14855656027793884,
      "learning_rate": 3.824449964747666e-06,
      "loss": 0.0125,
      "step": 1894440
    },
    {
      "epoch": 3.100325340560214,
      "grad_norm": 0.24396303296089172,
      "learning_rate": 3.824384072534149e-06,
      "loss": 0.0114,
      "step": 1894460
    },
    {
      "epoch": 3.1003580709988676,
      "grad_norm": 0.06232773885130882,
      "learning_rate": 3.824318180320632e-06,
      "loss": 0.0133,
      "step": 1894480
    },
    {
      "epoch": 3.1003908014375208,
      "grad_norm": 0.20405542850494385,
      "learning_rate": 3.824252288107115e-06,
      "loss": 0.0165,
      "step": 1894500
    },
    {
      "epoch": 3.1004235318761744,
      "grad_norm": 0.1714777946472168,
      "learning_rate": 3.824186395893597e-06,
      "loss": 0.0107,
      "step": 1894520
    },
    {
      "epoch": 3.1004562623148275,
      "grad_norm": 0.27177608013153076,
      "learning_rate": 3.82412050368008e-06,
      "loss": 0.0114,
      "step": 1894540
    },
    {
      "epoch": 3.1004889927534807,
      "grad_norm": 0.07868154346942902,
      "learning_rate": 3.824054611466564e-06,
      "loss": 0.0154,
      "step": 1894560
    },
    {
      "epoch": 3.1005217231921343,
      "grad_norm": 0.023323042318224907,
      "learning_rate": 3.8239887192530464e-06,
      "loss": 0.0106,
      "step": 1894580
    },
    {
      "epoch": 3.1005544536307874,
      "grad_norm": 0.1830340474843979,
      "learning_rate": 3.823922827039529e-06,
      "loss": 0.0165,
      "step": 1894600
    },
    {
      "epoch": 3.100587184069441,
      "grad_norm": 0.07446993887424469,
      "learning_rate": 3.823856934826012e-06,
      "loss": 0.0137,
      "step": 1894620
    },
    {
      "epoch": 3.100619914508094,
      "grad_norm": 0.12194892764091492,
      "learning_rate": 3.823791042612495e-06,
      "loss": 0.0105,
      "step": 1894640
    },
    {
      "epoch": 3.1006526449467477,
      "grad_norm": 0.2129039317369461,
      "learning_rate": 3.823725150398977e-06,
      "loss": 0.009,
      "step": 1894660
    },
    {
      "epoch": 3.100685375385401,
      "grad_norm": 0.2513638436794281,
      "learning_rate": 3.82365925818546e-06,
      "loss": 0.0074,
      "step": 1894680
    },
    {
      "epoch": 3.100718105824054,
      "grad_norm": 0.34076234698295593,
      "learning_rate": 3.823593365971943e-06,
      "loss": 0.0121,
      "step": 1894700
    },
    {
      "epoch": 3.1007508362627076,
      "grad_norm": 0.20627780258655548,
      "learning_rate": 3.8235274737584265e-06,
      "loss": 0.0082,
      "step": 1894720
    },
    {
      "epoch": 3.100783566701361,
      "grad_norm": 0.12135924398899078,
      "learning_rate": 3.823461581544909e-06,
      "loss": 0.0123,
      "step": 1894740
    },
    {
      "epoch": 3.1008162971400144,
      "grad_norm": 0.4688090980052948,
      "learning_rate": 3.823395689331392e-06,
      "loss": 0.0088,
      "step": 1894760
    },
    {
      "epoch": 3.1008490275786675,
      "grad_norm": 0.2734726071357727,
      "learning_rate": 3.8233297971178755e-06,
      "loss": 0.0122,
      "step": 1894780
    },
    {
      "epoch": 3.100881758017321,
      "grad_norm": 0.0753573402762413,
      "learning_rate": 3.823263904904358e-06,
      "loss": 0.0101,
      "step": 1894800
    },
    {
      "epoch": 3.1009144884559743,
      "grad_norm": 0.24234028160572052,
      "learning_rate": 3.823198012690841e-06,
      "loss": 0.0097,
      "step": 1894820
    },
    {
      "epoch": 3.1009472188946274,
      "grad_norm": 0.04427075758576393,
      "learning_rate": 3.823132120477324e-06,
      "loss": 0.0164,
      "step": 1894840
    },
    {
      "epoch": 3.100979949333281,
      "grad_norm": 0.3132832944393158,
      "learning_rate": 3.8230662282638065e-06,
      "loss": 0.0189,
      "step": 1894860
    },
    {
      "epoch": 3.101012679771934,
      "grad_norm": 0.129497230052948,
      "learning_rate": 3.823000336050289e-06,
      "loss": 0.0123,
      "step": 1894880
    },
    {
      "epoch": 3.1010454102105878,
      "grad_norm": 0.4079395830631256,
      "learning_rate": 3.822934443836772e-06,
      "loss": 0.0095,
      "step": 1894900
    },
    {
      "epoch": 3.101078140649241,
      "grad_norm": 0.21327681839466095,
      "learning_rate": 3.822868551623255e-06,
      "loss": 0.0132,
      "step": 1894920
    },
    {
      "epoch": 3.1011108710878945,
      "grad_norm": 0.13621295988559723,
      "learning_rate": 3.8228026594097374e-06,
      "loss": 0.0092,
      "step": 1894940
    },
    {
      "epoch": 3.1011436015265477,
      "grad_norm": 0.09401337802410126,
      "learning_rate": 3.822736767196221e-06,
      "loss": 0.0138,
      "step": 1894960
    },
    {
      "epoch": 3.101176331965201,
      "grad_norm": 0.1304377317428589,
      "learning_rate": 3.822670874982704e-06,
      "loss": 0.0075,
      "step": 1894980
    },
    {
      "epoch": 3.1012090624038544,
      "grad_norm": 0.15730486810207367,
      "learning_rate": 3.8226049827691865e-06,
      "loss": 0.01,
      "step": 1895000
    },
    {
      "epoch": 3.1012417928425076,
      "grad_norm": 0.6464842557907104,
      "learning_rate": 3.822539090555669e-06,
      "loss": 0.0161,
      "step": 1895020
    },
    {
      "epoch": 3.101274523281161,
      "grad_norm": 0.31405144929885864,
      "learning_rate": 3.822473198342152e-06,
      "loss": 0.0131,
      "step": 1895040
    },
    {
      "epoch": 3.1013072537198143,
      "grad_norm": 0.3267705738544464,
      "learning_rate": 3.822407306128635e-06,
      "loss": 0.0164,
      "step": 1895060
    },
    {
      "epoch": 3.101339984158468,
      "grad_norm": 0.12636606395244598,
      "learning_rate": 3.8223414139151175e-06,
      "loss": 0.0143,
      "step": 1895080
    },
    {
      "epoch": 3.101372714597121,
      "grad_norm": 0.11635292321443558,
      "learning_rate": 3.822275521701601e-06,
      "loss": 0.0133,
      "step": 1895100
    },
    {
      "epoch": 3.101405445035774,
      "grad_norm": 0.32324931025505066,
      "learning_rate": 3.822209629488084e-06,
      "loss": 0.0128,
      "step": 1895120
    },
    {
      "epoch": 3.101438175474428,
      "grad_norm": 0.2623494267463684,
      "learning_rate": 3.8221437372745665e-06,
      "loss": 0.0111,
      "step": 1895140
    },
    {
      "epoch": 3.101470905913081,
      "grad_norm": 0.13449445366859436,
      "learning_rate": 3.822077845061049e-06,
      "loss": 0.009,
      "step": 1895160
    },
    {
      "epoch": 3.1015036363517345,
      "grad_norm": 0.07887963950634003,
      "learning_rate": 3.822011952847533e-06,
      "loss": 0.0126,
      "step": 1895180
    },
    {
      "epoch": 3.1015363667903877,
      "grad_norm": 0.4287864565849304,
      "learning_rate": 3.821946060634016e-06,
      "loss": 0.016,
      "step": 1895200
    },
    {
      "epoch": 3.1015690972290413,
      "grad_norm": 0.13694234192371368,
      "learning_rate": 3.821880168420498e-06,
      "loss": 0.0105,
      "step": 1895220
    },
    {
      "epoch": 3.1016018276676944,
      "grad_norm": 0.1062098890542984,
      "learning_rate": 3.821814276206981e-06,
      "loss": 0.0096,
      "step": 1895240
    },
    {
      "epoch": 3.1016345581063476,
      "grad_norm": 0.21659086644649506,
      "learning_rate": 3.821748383993464e-06,
      "loss": 0.0088,
      "step": 1895260
    },
    {
      "epoch": 3.101667288545001,
      "grad_norm": 0.23512256145477295,
      "learning_rate": 3.8216824917799466e-06,
      "loss": 0.0117,
      "step": 1895280
    },
    {
      "epoch": 3.1017000189836543,
      "grad_norm": 0.9432736039161682,
      "learning_rate": 3.821616599566429e-06,
      "loss": 0.0084,
      "step": 1895300
    },
    {
      "epoch": 3.101732749422308,
      "grad_norm": 0.5053597092628479,
      "learning_rate": 3.821550707352912e-06,
      "loss": 0.0079,
      "step": 1895320
    },
    {
      "epoch": 3.101765479860961,
      "grad_norm": 0.2120460420846939,
      "learning_rate": 3.821484815139395e-06,
      "loss": 0.0144,
      "step": 1895340
    },
    {
      "epoch": 3.1017982102996142,
      "grad_norm": 0.21495851874351501,
      "learning_rate": 3.821418922925878e-06,
      "loss": 0.009,
      "step": 1895360
    },
    {
      "epoch": 3.101830940738268,
      "grad_norm": 0.20157456398010254,
      "learning_rate": 3.821353030712361e-06,
      "loss": 0.0101,
      "step": 1895380
    },
    {
      "epoch": 3.101863671176921,
      "grad_norm": 0.6674579977989197,
      "learning_rate": 3.821287138498844e-06,
      "loss": 0.0136,
      "step": 1895400
    },
    {
      "epoch": 3.1018964016155746,
      "grad_norm": 0.7390972971916199,
      "learning_rate": 3.8212212462853266e-06,
      "loss": 0.0144,
      "step": 1895420
    },
    {
      "epoch": 3.1019291320542277,
      "grad_norm": 0.22161711752414703,
      "learning_rate": 3.821155354071809e-06,
      "loss": 0.0116,
      "step": 1895440
    },
    {
      "epoch": 3.1019618624928813,
      "grad_norm": 0.6373482346534729,
      "learning_rate": 3.821089461858293e-06,
      "loss": 0.0106,
      "step": 1895460
    },
    {
      "epoch": 3.1019945929315345,
      "grad_norm": 0.1881224364042282,
      "learning_rate": 3.821023569644776e-06,
      "loss": 0.0096,
      "step": 1895480
    },
    {
      "epoch": 3.1020273233701876,
      "grad_norm": 0.21859408915042877,
      "learning_rate": 3.820957677431258e-06,
      "loss": 0.0107,
      "step": 1895500
    },
    {
      "epoch": 3.102060053808841,
      "grad_norm": 0.11944855749607086,
      "learning_rate": 3.820891785217741e-06,
      "loss": 0.0127,
      "step": 1895520
    },
    {
      "epoch": 3.1020927842474944,
      "grad_norm": 0.2882460951805115,
      "learning_rate": 3.820825893004224e-06,
      "loss": 0.0157,
      "step": 1895540
    },
    {
      "epoch": 3.102125514686148,
      "grad_norm": 0.20825724303722382,
      "learning_rate": 3.820760000790707e-06,
      "loss": 0.0107,
      "step": 1895560
    },
    {
      "epoch": 3.102158245124801,
      "grad_norm": 0.42945989966392517,
      "learning_rate": 3.82069410857719e-06,
      "loss": 0.0102,
      "step": 1895580
    },
    {
      "epoch": 3.1021909755634547,
      "grad_norm": 0.33459508419036865,
      "learning_rate": 3.820628216363673e-06,
      "loss": 0.0117,
      "step": 1895600
    },
    {
      "epoch": 3.102223706002108,
      "grad_norm": 0.06184284761548042,
      "learning_rate": 3.820562324150156e-06,
      "loss": 0.0088,
      "step": 1895620
    },
    {
      "epoch": 3.102256436440761,
      "grad_norm": 0.1668347269296646,
      "learning_rate": 3.820496431936638e-06,
      "loss": 0.0122,
      "step": 1895640
    },
    {
      "epoch": 3.1022891668794146,
      "grad_norm": 0.5817461609840393,
      "learning_rate": 3.820430539723121e-06,
      "loss": 0.0167,
      "step": 1895660
    },
    {
      "epoch": 3.1023218973180677,
      "grad_norm": 0.1041734367609024,
      "learning_rate": 3.820364647509604e-06,
      "loss": 0.0205,
      "step": 1895680
    },
    {
      "epoch": 3.1023546277567213,
      "grad_norm": 0.46776899695396423,
      "learning_rate": 3.820298755296087e-06,
      "loss": 0.01,
      "step": 1895700
    },
    {
      "epoch": 3.1023873581953745,
      "grad_norm": 0.19601960480213165,
      "learning_rate": 3.820232863082569e-06,
      "loss": 0.0134,
      "step": 1895720
    },
    {
      "epoch": 3.102420088634028,
      "grad_norm": 0.15761211514472961,
      "learning_rate": 3.820166970869052e-06,
      "loss": 0.0113,
      "step": 1895740
    },
    {
      "epoch": 3.1024528190726812,
      "grad_norm": 0.24110615253448486,
      "learning_rate": 3.820101078655536e-06,
      "loss": 0.0152,
      "step": 1895760
    },
    {
      "epoch": 3.1024855495113344,
      "grad_norm": 0.13885100185871124,
      "learning_rate": 3.8200351864420184e-06,
      "loss": 0.0108,
      "step": 1895780
    },
    {
      "epoch": 3.102518279949988,
      "grad_norm": 0.4996457099914551,
      "learning_rate": 3.819969294228502e-06,
      "loss": 0.0071,
      "step": 1895800
    },
    {
      "epoch": 3.102551010388641,
      "grad_norm": 1.1935276985168457,
      "learning_rate": 3.819903402014985e-06,
      "loss": 0.016,
      "step": 1895820
    },
    {
      "epoch": 3.1025837408272947,
      "grad_norm": 0.17546816170215607,
      "learning_rate": 3.8198375098014675e-06,
      "loss": 0.0127,
      "step": 1895840
    },
    {
      "epoch": 3.102616471265948,
      "grad_norm": 0.2527354955673218,
      "learning_rate": 3.81977161758795e-06,
      "loss": 0.0148,
      "step": 1895860
    },
    {
      "epoch": 3.1026492017046015,
      "grad_norm": 0.327932208776474,
      "learning_rate": 3.819705725374433e-06,
      "loss": 0.0152,
      "step": 1895880
    },
    {
      "epoch": 3.1026819321432546,
      "grad_norm": 0.22958897054195404,
      "learning_rate": 3.819639833160916e-06,
      "loss": 0.0091,
      "step": 1895900
    },
    {
      "epoch": 3.1027146625819078,
      "grad_norm": 0.4098210334777832,
      "learning_rate": 3.8195739409473985e-06,
      "loss": 0.0089,
      "step": 1895920
    },
    {
      "epoch": 3.1027473930205614,
      "grad_norm": 0.28809377551078796,
      "learning_rate": 3.819508048733881e-06,
      "loss": 0.0145,
      "step": 1895940
    },
    {
      "epoch": 3.1027801234592145,
      "grad_norm": 0.4284285306930542,
      "learning_rate": 3.819442156520364e-06,
      "loss": 0.0168,
      "step": 1895960
    },
    {
      "epoch": 3.102812853897868,
      "grad_norm": 1.2398542165756226,
      "learning_rate": 3.8193762643068475e-06,
      "loss": 0.0138,
      "step": 1895980
    },
    {
      "epoch": 3.1028455843365212,
      "grad_norm": 0.3753076195716858,
      "learning_rate": 3.81931037209333e-06,
      "loss": 0.0092,
      "step": 1896000
    },
    {
      "epoch": 3.1028783147751744,
      "grad_norm": 0.40435680747032166,
      "learning_rate": 3.819244479879813e-06,
      "loss": 0.0083,
      "step": 1896020
    },
    {
      "epoch": 3.102911045213828,
      "grad_norm": 0.5154527425765991,
      "learning_rate": 3.819178587666296e-06,
      "loss": 0.0147,
      "step": 1896040
    },
    {
      "epoch": 3.102943775652481,
      "grad_norm": 0.13744264841079712,
      "learning_rate": 3.8191126954527785e-06,
      "loss": 0.0081,
      "step": 1896060
    },
    {
      "epoch": 3.1029765060911347,
      "grad_norm": 0.14744466543197632,
      "learning_rate": 3.819046803239261e-06,
      "loss": 0.0109,
      "step": 1896080
    },
    {
      "epoch": 3.103009236529788,
      "grad_norm": 0.23422107100486755,
      "learning_rate": 3.818980911025744e-06,
      "loss": 0.0093,
      "step": 1896100
    },
    {
      "epoch": 3.1030419669684415,
      "grad_norm": 0.43443921208381653,
      "learning_rate": 3.818915018812227e-06,
      "loss": 0.0117,
      "step": 1896120
    },
    {
      "epoch": 3.1030746974070946,
      "grad_norm": 0.08946594595909119,
      "learning_rate": 3.81884912659871e-06,
      "loss": 0.0134,
      "step": 1896140
    },
    {
      "epoch": 3.103107427845748,
      "grad_norm": 0.20853720605373383,
      "learning_rate": 3.818783234385193e-06,
      "loss": 0.0183,
      "step": 1896160
    },
    {
      "epoch": 3.1031401582844014,
      "grad_norm": 0.36152732372283936,
      "learning_rate": 3.818717342171676e-06,
      "loss": 0.0117,
      "step": 1896180
    },
    {
      "epoch": 3.1031728887230545,
      "grad_norm": 0.1983284205198288,
      "learning_rate": 3.818651449958159e-06,
      "loss": 0.0108,
      "step": 1896200
    },
    {
      "epoch": 3.103205619161708,
      "grad_norm": 0.3142194151878357,
      "learning_rate": 3.818585557744642e-06,
      "loss": 0.0099,
      "step": 1896220
    },
    {
      "epoch": 3.1032383496003613,
      "grad_norm": 0.2548868954181671,
      "learning_rate": 3.818519665531125e-06,
      "loss": 0.0134,
      "step": 1896240
    },
    {
      "epoch": 3.103271080039015,
      "grad_norm": 0.21031630039215088,
      "learning_rate": 3.8184537733176076e-06,
      "loss": 0.01,
      "step": 1896260
    },
    {
      "epoch": 3.103303810477668,
      "grad_norm": 0.13325557112693787,
      "learning_rate": 3.81838788110409e-06,
      "loss": 0.0086,
      "step": 1896280
    },
    {
      "epoch": 3.103336540916321,
      "grad_norm": 0.21323499083518982,
      "learning_rate": 3.818321988890573e-06,
      "loss": 0.0084,
      "step": 1896300
    },
    {
      "epoch": 3.1033692713549748,
      "grad_norm": 0.3887846767902374,
      "learning_rate": 3.818256096677056e-06,
      "loss": 0.0105,
      "step": 1896320
    },
    {
      "epoch": 3.103402001793628,
      "grad_norm": 2.6155076026916504,
      "learning_rate": 3.8181902044635385e-06,
      "loss": 0.0154,
      "step": 1896340
    },
    {
      "epoch": 3.1034347322322815,
      "grad_norm": 0.15921229124069214,
      "learning_rate": 3.818124312250021e-06,
      "loss": 0.0085,
      "step": 1896360
    },
    {
      "epoch": 3.1034674626709347,
      "grad_norm": 0.32122868299484253,
      "learning_rate": 3.818058420036505e-06,
      "loss": 0.0118,
      "step": 1896380
    },
    {
      "epoch": 3.1035001931095882,
      "grad_norm": 0.136679008603096,
      "learning_rate": 3.817992527822988e-06,
      "loss": 0.0142,
      "step": 1896400
    },
    {
      "epoch": 3.1035329235482414,
      "grad_norm": 0.21367765963077545,
      "learning_rate": 3.81792663560947e-06,
      "loss": 0.0127,
      "step": 1896420
    },
    {
      "epoch": 3.1035656539868945,
      "grad_norm": 0.17211464047431946,
      "learning_rate": 3.817860743395953e-06,
      "loss": 0.0157,
      "step": 1896440
    },
    {
      "epoch": 3.103598384425548,
      "grad_norm": 0.2393275797367096,
      "learning_rate": 3.817794851182436e-06,
      "loss": 0.0121,
      "step": 1896460
    },
    {
      "epoch": 3.1036311148642013,
      "grad_norm": 0.30806753039360046,
      "learning_rate": 3.8177289589689186e-06,
      "loss": 0.0107,
      "step": 1896480
    },
    {
      "epoch": 3.103663845302855,
      "grad_norm": 0.7309103608131409,
      "learning_rate": 3.817663066755402e-06,
      "loss": 0.0169,
      "step": 1896500
    },
    {
      "epoch": 3.103696575741508,
      "grad_norm": 0.3038508892059326,
      "learning_rate": 3.817597174541885e-06,
      "loss": 0.0083,
      "step": 1896520
    },
    {
      "epoch": 3.1037293061801616,
      "grad_norm": 0.12629278004169464,
      "learning_rate": 3.817531282328368e-06,
      "loss": 0.0114,
      "step": 1896540
    },
    {
      "epoch": 3.103762036618815,
      "grad_norm": 0.6655513048171997,
      "learning_rate": 3.81746539011485e-06,
      "loss": 0.0104,
      "step": 1896560
    },
    {
      "epoch": 3.103794767057468,
      "grad_norm": 0.2641633450984955,
      "learning_rate": 3.817399497901333e-06,
      "loss": 0.0142,
      "step": 1896580
    },
    {
      "epoch": 3.1038274974961215,
      "grad_norm": 0.12415110319852829,
      "learning_rate": 3.817333605687817e-06,
      "loss": 0.0116,
      "step": 1896600
    },
    {
      "epoch": 3.1038602279347747,
      "grad_norm": 0.19032864272594452,
      "learning_rate": 3.8172677134742994e-06,
      "loss": 0.0101,
      "step": 1896620
    },
    {
      "epoch": 3.1038929583734283,
      "grad_norm": 0.28356119990348816,
      "learning_rate": 3.817201821260782e-06,
      "loss": 0.0125,
      "step": 1896640
    },
    {
      "epoch": 3.1039256888120814,
      "grad_norm": 0.24328087270259857,
      "learning_rate": 3.817135929047265e-06,
      "loss": 0.0133,
      "step": 1896660
    },
    {
      "epoch": 3.103958419250735,
      "grad_norm": 0.4885031580924988,
      "learning_rate": 3.817070036833748e-06,
      "loss": 0.0096,
      "step": 1896680
    },
    {
      "epoch": 3.103991149689388,
      "grad_norm": 0.10328461229801178,
      "learning_rate": 3.81700414462023e-06,
      "loss": 0.0129,
      "step": 1896700
    },
    {
      "epoch": 3.1040238801280413,
      "grad_norm": 0.14135944843292236,
      "learning_rate": 3.816938252406713e-06,
      "loss": 0.0136,
      "step": 1896720
    },
    {
      "epoch": 3.104056610566695,
      "grad_norm": 1.4143264293670654,
      "learning_rate": 3.816872360193196e-06,
      "loss": 0.0164,
      "step": 1896740
    },
    {
      "epoch": 3.104089341005348,
      "grad_norm": 0.1950640082359314,
      "learning_rate": 3.816806467979679e-06,
      "loss": 0.0114,
      "step": 1896760
    },
    {
      "epoch": 3.1041220714440017,
      "grad_norm": 0.4885001480579376,
      "learning_rate": 3.816740575766162e-06,
      "loss": 0.0092,
      "step": 1896780
    },
    {
      "epoch": 3.104154801882655,
      "grad_norm": 0.14401009678840637,
      "learning_rate": 3.816674683552645e-06,
      "loss": 0.0137,
      "step": 1896800
    },
    {
      "epoch": 3.1041875323213084,
      "grad_norm": 0.21048450469970703,
      "learning_rate": 3.816608791339128e-06,
      "loss": 0.0077,
      "step": 1896820
    },
    {
      "epoch": 3.1042202627599615,
      "grad_norm": 0.24631576240062714,
      "learning_rate": 3.81654289912561e-06,
      "loss": 0.0116,
      "step": 1896840
    },
    {
      "epoch": 3.1042529931986147,
      "grad_norm": 0.2524675130844116,
      "learning_rate": 3.816477006912094e-06,
      "loss": 0.0137,
      "step": 1896860
    },
    {
      "epoch": 3.1042857236372683,
      "grad_norm": 0.6057306528091431,
      "learning_rate": 3.816411114698577e-06,
      "loss": 0.0129,
      "step": 1896880
    },
    {
      "epoch": 3.1043184540759214,
      "grad_norm": 0.22522680461406708,
      "learning_rate": 3.8163452224850595e-06,
      "loss": 0.0088,
      "step": 1896900
    },
    {
      "epoch": 3.104351184514575,
      "grad_norm": 0.12413807958364487,
      "learning_rate": 3.816279330271542e-06,
      "loss": 0.0115,
      "step": 1896920
    },
    {
      "epoch": 3.104383914953228,
      "grad_norm": 0.2858532667160034,
      "learning_rate": 3.816213438058025e-06,
      "loss": 0.0125,
      "step": 1896940
    },
    {
      "epoch": 3.1044166453918813,
      "grad_norm": 0.5831730365753174,
      "learning_rate": 3.816147545844508e-06,
      "loss": 0.0148,
      "step": 1896960
    },
    {
      "epoch": 3.104449375830535,
      "grad_norm": 0.3635869026184082,
      "learning_rate": 3.8160816536309904e-06,
      "loss": 0.0102,
      "step": 1896980
    },
    {
      "epoch": 3.104482106269188,
      "grad_norm": 0.10919129848480225,
      "learning_rate": 3.816015761417474e-06,
      "loss": 0.0121,
      "step": 1897000
    },
    {
      "epoch": 3.1045148367078417,
      "grad_norm": 0.2538580298423767,
      "learning_rate": 3.815949869203957e-06,
      "loss": 0.0189,
      "step": 1897020
    },
    {
      "epoch": 3.104547567146495,
      "grad_norm": 0.6000559329986572,
      "learning_rate": 3.8158839769904395e-06,
      "loss": 0.0098,
      "step": 1897040
    },
    {
      "epoch": 3.1045802975851484,
      "grad_norm": 0.16879476606845856,
      "learning_rate": 3.815818084776922e-06,
      "loss": 0.0127,
      "step": 1897060
    },
    {
      "epoch": 3.1046130280238016,
      "grad_norm": 0.10856457054615021,
      "learning_rate": 3.815752192563405e-06,
      "loss": 0.0103,
      "step": 1897080
    },
    {
      "epoch": 3.1046457584624547,
      "grad_norm": 0.1599876880645752,
      "learning_rate": 3.815686300349888e-06,
      "loss": 0.0176,
      "step": 1897100
    },
    {
      "epoch": 3.1046784889011083,
      "grad_norm": 0.2713403105735779,
      "learning_rate": 3.8156204081363705e-06,
      "loss": 0.0119,
      "step": 1897120
    },
    {
      "epoch": 3.1047112193397615,
      "grad_norm": 0.20416344702243805,
      "learning_rate": 3.815554515922853e-06,
      "loss": 0.0094,
      "step": 1897140
    },
    {
      "epoch": 3.104743949778415,
      "grad_norm": 0.6218462586402893,
      "learning_rate": 3.815488623709337e-06,
      "loss": 0.008,
      "step": 1897160
    },
    {
      "epoch": 3.104776680217068,
      "grad_norm": 0.7483724355697632,
      "learning_rate": 3.8154227314958195e-06,
      "loss": 0.0119,
      "step": 1897180
    },
    {
      "epoch": 3.104809410655722,
      "grad_norm": 0.3727181553840637,
      "learning_rate": 3.815356839282302e-06,
      "loss": 0.0142,
      "step": 1897200
    },
    {
      "epoch": 3.104842141094375,
      "grad_norm": 0.37392058968544006,
      "learning_rate": 3.815290947068786e-06,
      "loss": 0.0114,
      "step": 1897220
    },
    {
      "epoch": 3.104874871533028,
      "grad_norm": 0.12949609756469727,
      "learning_rate": 3.815225054855269e-06,
      "loss": 0.0083,
      "step": 1897240
    },
    {
      "epoch": 3.1049076019716817,
      "grad_norm": 0.40256229043006897,
      "learning_rate": 3.815159162641751e-06,
      "loss": 0.0143,
      "step": 1897260
    },
    {
      "epoch": 3.104940332410335,
      "grad_norm": 0.3535837233066559,
      "learning_rate": 3.815093270428234e-06,
      "loss": 0.01,
      "step": 1897280
    },
    {
      "epoch": 3.1049730628489884,
      "grad_norm": 0.3589337170124054,
      "learning_rate": 3.815027378214717e-06,
      "loss": 0.0137,
      "step": 1897300
    },
    {
      "epoch": 3.1050057932876416,
      "grad_norm": 0.3779328763484955,
      "learning_rate": 3.8149614860011996e-06,
      "loss": 0.0125,
      "step": 1897320
    },
    {
      "epoch": 3.105038523726295,
      "grad_norm": 0.3870457708835602,
      "learning_rate": 3.814895593787682e-06,
      "loss": 0.0134,
      "step": 1897340
    },
    {
      "epoch": 3.1050712541649483,
      "grad_norm": 0.30992186069488525,
      "learning_rate": 3.814829701574165e-06,
      "loss": 0.0141,
      "step": 1897360
    },
    {
      "epoch": 3.1051039846036015,
      "grad_norm": 0.10265728086233139,
      "learning_rate": 3.814763809360648e-06,
      "loss": 0.0164,
      "step": 1897380
    },
    {
      "epoch": 3.105136715042255,
      "grad_norm": 0.5490120053291321,
      "learning_rate": 3.814697917147131e-06,
      "loss": 0.0128,
      "step": 1897400
    },
    {
      "epoch": 3.1051694454809082,
      "grad_norm": 0.3230058550834656,
      "learning_rate": 3.8146320249336137e-06,
      "loss": 0.0163,
      "step": 1897420
    },
    {
      "epoch": 3.105202175919562,
      "grad_norm": 0.591307520866394,
      "learning_rate": 3.814566132720097e-06,
      "loss": 0.0136,
      "step": 1897440
    },
    {
      "epoch": 3.105234906358215,
      "grad_norm": 0.5678036212921143,
      "learning_rate": 3.8145002405065796e-06,
      "loss": 0.0106,
      "step": 1897460
    },
    {
      "epoch": 3.105267636796868,
      "grad_norm": 0.47307726740837097,
      "learning_rate": 3.8144343482930623e-06,
      "loss": 0.0092,
      "step": 1897480
    },
    {
      "epoch": 3.1053003672355217,
      "grad_norm": 0.12289560586214066,
      "learning_rate": 3.814368456079545e-06,
      "loss": 0.0098,
      "step": 1897500
    },
    {
      "epoch": 3.105333097674175,
      "grad_norm": 0.1827610433101654,
      "learning_rate": 3.8143025638660282e-06,
      "loss": 0.0124,
      "step": 1897520
    },
    {
      "epoch": 3.1053658281128285,
      "grad_norm": 0.24713842570781708,
      "learning_rate": 3.814236671652511e-06,
      "loss": 0.01,
      "step": 1897540
    },
    {
      "epoch": 3.1053985585514816,
      "grad_norm": 0.4937455654144287,
      "learning_rate": 3.814170779438994e-06,
      "loss": 0.0116,
      "step": 1897560
    },
    {
      "epoch": 3.105431288990135,
      "grad_norm": 0.8669884204864502,
      "learning_rate": 3.8141048872254773e-06,
      "loss": 0.0135,
      "step": 1897580
    },
    {
      "epoch": 3.1054640194287884,
      "grad_norm": 0.42332959175109863,
      "learning_rate": 3.81403899501196e-06,
      "loss": 0.0163,
      "step": 1897600
    },
    {
      "epoch": 3.1054967498674415,
      "grad_norm": 0.22091782093048096,
      "learning_rate": 3.8139731027984428e-06,
      "loss": 0.0112,
      "step": 1897620
    },
    {
      "epoch": 3.105529480306095,
      "grad_norm": 0.4370179772377014,
      "learning_rate": 3.8139072105849255e-06,
      "loss": 0.012,
      "step": 1897640
    },
    {
      "epoch": 3.1055622107447483,
      "grad_norm": 0.3352366089820862,
      "learning_rate": 3.8138413183714087e-06,
      "loss": 0.0148,
      "step": 1897660
    },
    {
      "epoch": 3.105594941183402,
      "grad_norm": 0.30020958185195923,
      "learning_rate": 3.8137754261578914e-06,
      "loss": 0.0127,
      "step": 1897680
    },
    {
      "epoch": 3.105627671622055,
      "grad_norm": 0.29404139518737793,
      "learning_rate": 3.813709533944374e-06,
      "loss": 0.0111,
      "step": 1897700
    },
    {
      "epoch": 3.1056604020607086,
      "grad_norm": 0.3047461211681366,
      "learning_rate": 3.813643641730857e-06,
      "loss": 0.0126,
      "step": 1897720
    },
    {
      "epoch": 3.1056931324993617,
      "grad_norm": 0.22963620722293854,
      "learning_rate": 3.8135777495173396e-06,
      "loss": 0.009,
      "step": 1897740
    },
    {
      "epoch": 3.105725862938015,
      "grad_norm": 0.18051020801067352,
      "learning_rate": 3.813511857303823e-06,
      "loss": 0.0112,
      "step": 1897760
    },
    {
      "epoch": 3.1057585933766685,
      "grad_norm": 0.18042117357254028,
      "learning_rate": 3.8134459650903055e-06,
      "loss": 0.0117,
      "step": 1897780
    },
    {
      "epoch": 3.1057913238153216,
      "grad_norm": 0.2564958930015564,
      "learning_rate": 3.8133800728767883e-06,
      "loss": 0.0099,
      "step": 1897800
    },
    {
      "epoch": 3.1058240542539752,
      "grad_norm": 0.5179235339164734,
      "learning_rate": 3.813314180663271e-06,
      "loss": 0.0114,
      "step": 1897820
    },
    {
      "epoch": 3.1058567846926284,
      "grad_norm": 0.19566041231155396,
      "learning_rate": 3.813248288449754e-06,
      "loss": 0.0096,
      "step": 1897840
    },
    {
      "epoch": 3.105889515131282,
      "grad_norm": 0.1611776500940323,
      "learning_rate": 3.813182396236237e-06,
      "loss": 0.0166,
      "step": 1897860
    },
    {
      "epoch": 3.105922245569935,
      "grad_norm": 0.28553837537765503,
      "learning_rate": 3.8131165040227197e-06,
      "loss": 0.0103,
      "step": 1897880
    },
    {
      "epoch": 3.1059549760085883,
      "grad_norm": 0.27190059423446655,
      "learning_rate": 3.8130506118092024e-06,
      "loss": 0.0113,
      "step": 1897900
    },
    {
      "epoch": 3.105987706447242,
      "grad_norm": 0.5618581175804138,
      "learning_rate": 3.812984719595686e-06,
      "loss": 0.0084,
      "step": 1897920
    },
    {
      "epoch": 3.106020436885895,
      "grad_norm": 0.09593112766742706,
      "learning_rate": 3.8129188273821687e-06,
      "loss": 0.0111,
      "step": 1897940
    },
    {
      "epoch": 3.1060531673245486,
      "grad_norm": 0.2866082489490509,
      "learning_rate": 3.8128529351686515e-06,
      "loss": 0.0094,
      "step": 1897960
    },
    {
      "epoch": 3.1060858977632018,
      "grad_norm": 0.29551613330841064,
      "learning_rate": 3.8127870429551346e-06,
      "loss": 0.0127,
      "step": 1897980
    },
    {
      "epoch": 3.1061186282018554,
      "grad_norm": 0.14219340682029724,
      "learning_rate": 3.8127211507416174e-06,
      "loss": 0.0069,
      "step": 1898000
    },
    {
      "epoch": 3.1061513586405085,
      "grad_norm": 0.4587197005748749,
      "learning_rate": 3.8126552585281e-06,
      "loss": 0.0115,
      "step": 1898020
    },
    {
      "epoch": 3.1061840890791617,
      "grad_norm": 0.8872317671775818,
      "learning_rate": 3.812589366314583e-06,
      "loss": 0.015,
      "step": 1898040
    },
    {
      "epoch": 3.1062168195178153,
      "grad_norm": 0.6877561807632446,
      "learning_rate": 3.812523474101066e-06,
      "loss": 0.0138,
      "step": 1898060
    },
    {
      "epoch": 3.1062495499564684,
      "grad_norm": 0.23286160826683044,
      "learning_rate": 3.8124575818875488e-06,
      "loss": 0.0093,
      "step": 1898080
    },
    {
      "epoch": 3.106282280395122,
      "grad_norm": 0.1314951479434967,
      "learning_rate": 3.8123916896740315e-06,
      "loss": 0.0123,
      "step": 1898100
    },
    {
      "epoch": 3.106315010833775,
      "grad_norm": 0.3907932639122009,
      "learning_rate": 3.8123257974605142e-06,
      "loss": 0.0111,
      "step": 1898120
    },
    {
      "epoch": 3.1063477412724287,
      "grad_norm": 0.30622363090515137,
      "learning_rate": 3.8122599052469974e-06,
      "loss": 0.0099,
      "step": 1898140
    },
    {
      "epoch": 3.106380471711082,
      "grad_norm": 0.0842461809515953,
      "learning_rate": 3.81219401303348e-06,
      "loss": 0.0075,
      "step": 1898160
    },
    {
      "epoch": 3.106413202149735,
      "grad_norm": 0.12641295790672302,
      "learning_rate": 3.812128120819963e-06,
      "loss": 0.0111,
      "step": 1898180
    },
    {
      "epoch": 3.1064459325883886,
      "grad_norm": 1.0110986232757568,
      "learning_rate": 3.8120622286064456e-06,
      "loss": 0.0162,
      "step": 1898200
    },
    {
      "epoch": 3.106478663027042,
      "grad_norm": 0.33700186014175415,
      "learning_rate": 3.8119963363929284e-06,
      "loss": 0.0149,
      "step": 1898220
    },
    {
      "epoch": 3.1065113934656954,
      "grad_norm": 0.7430469989776611,
      "learning_rate": 3.8119304441794115e-06,
      "loss": 0.0084,
      "step": 1898240
    },
    {
      "epoch": 3.1065441239043485,
      "grad_norm": 0.0976182371377945,
      "learning_rate": 3.8118645519658947e-06,
      "loss": 0.0084,
      "step": 1898260
    },
    {
      "epoch": 3.106576854343002,
      "grad_norm": 0.2233465015888214,
      "learning_rate": 3.8117986597523774e-06,
      "loss": 0.0119,
      "step": 1898280
    },
    {
      "epoch": 3.1066095847816553,
      "grad_norm": 0.383393257856369,
      "learning_rate": 3.8117327675388606e-06,
      "loss": 0.0124,
      "step": 1898300
    },
    {
      "epoch": 3.1066423152203084,
      "grad_norm": 0.11061963438987732,
      "learning_rate": 3.8116668753253433e-06,
      "loss": 0.0108,
      "step": 1898320
    },
    {
      "epoch": 3.106675045658962,
      "grad_norm": 0.14607194066047668,
      "learning_rate": 3.811600983111826e-06,
      "loss": 0.0123,
      "step": 1898340
    },
    {
      "epoch": 3.106707776097615,
      "grad_norm": 0.6979863047599792,
      "learning_rate": 3.811535090898309e-06,
      "loss": 0.0136,
      "step": 1898360
    },
    {
      "epoch": 3.1067405065362688,
      "grad_norm": 0.31627899408340454,
      "learning_rate": 3.811469198684792e-06,
      "loss": 0.0147,
      "step": 1898380
    },
    {
      "epoch": 3.106773236974922,
      "grad_norm": 0.11453229933977127,
      "learning_rate": 3.8114033064712747e-06,
      "loss": 0.0151,
      "step": 1898400
    },
    {
      "epoch": 3.106805967413575,
      "grad_norm": 0.37947162985801697,
      "learning_rate": 3.8113374142577574e-06,
      "loss": 0.0071,
      "step": 1898420
    },
    {
      "epoch": 3.1068386978522287,
      "grad_norm": 0.18203715980052948,
      "learning_rate": 3.81127152204424e-06,
      "loss": 0.0117,
      "step": 1898440
    },
    {
      "epoch": 3.106871428290882,
      "grad_norm": 0.27123185992240906,
      "learning_rate": 3.8112056298307233e-06,
      "loss": 0.0139,
      "step": 1898460
    },
    {
      "epoch": 3.1069041587295354,
      "grad_norm": 0.3281254172325134,
      "learning_rate": 3.811139737617206e-06,
      "loss": 0.0098,
      "step": 1898480
    },
    {
      "epoch": 3.1069368891681886,
      "grad_norm": 0.3185562491416931,
      "learning_rate": 3.811073845403689e-06,
      "loss": 0.0116,
      "step": 1898500
    },
    {
      "epoch": 3.106969619606842,
      "grad_norm": 0.234712615609169,
      "learning_rate": 3.8110079531901716e-06,
      "loss": 0.0088,
      "step": 1898520
    },
    {
      "epoch": 3.1070023500454953,
      "grad_norm": 0.06933038681745529,
      "learning_rate": 3.8109420609766547e-06,
      "loss": 0.0099,
      "step": 1898540
    },
    {
      "epoch": 3.1070350804841484,
      "grad_norm": 0.382004052400589,
      "learning_rate": 3.8108761687631375e-06,
      "loss": 0.012,
      "step": 1898560
    },
    {
      "epoch": 3.107067810922802,
      "grad_norm": 0.13113243877887726,
      "learning_rate": 3.81081027654962e-06,
      "loss": 0.0143,
      "step": 1898580
    },
    {
      "epoch": 3.107100541361455,
      "grad_norm": 0.5818862915039062,
      "learning_rate": 3.810744384336103e-06,
      "loss": 0.0078,
      "step": 1898600
    },
    {
      "epoch": 3.107133271800109,
      "grad_norm": 0.10934015363454819,
      "learning_rate": 3.8106784921225865e-06,
      "loss": 0.0108,
      "step": 1898620
    },
    {
      "epoch": 3.107166002238762,
      "grad_norm": 0.3345501124858856,
      "learning_rate": 3.8106125999090693e-06,
      "loss": 0.009,
      "step": 1898640
    },
    {
      "epoch": 3.1071987326774155,
      "grad_norm": 0.3458871841430664,
      "learning_rate": 3.810546707695552e-06,
      "loss": 0.0097,
      "step": 1898660
    },
    {
      "epoch": 3.1072314631160687,
      "grad_norm": 0.24549600481987,
      "learning_rate": 3.810480815482035e-06,
      "loss": 0.0126,
      "step": 1898680
    },
    {
      "epoch": 3.107264193554722,
      "grad_norm": 0.2916679382324219,
      "learning_rate": 3.810414923268518e-06,
      "loss": 0.0119,
      "step": 1898700
    },
    {
      "epoch": 3.1072969239933754,
      "grad_norm": 0.774547815322876,
      "learning_rate": 3.8103490310550007e-06,
      "loss": 0.009,
      "step": 1898720
    },
    {
      "epoch": 3.1073296544320286,
      "grad_norm": 0.15676763653755188,
      "learning_rate": 3.8102831388414834e-06,
      "loss": 0.0106,
      "step": 1898740
    },
    {
      "epoch": 3.107362384870682,
      "grad_norm": 0.540260910987854,
      "learning_rate": 3.810217246627966e-06,
      "loss": 0.0126,
      "step": 1898760
    },
    {
      "epoch": 3.1073951153093353,
      "grad_norm": 0.14752162992954254,
      "learning_rate": 3.8101513544144493e-06,
      "loss": 0.0168,
      "step": 1898780
    },
    {
      "epoch": 3.107427845747989,
      "grad_norm": 0.6236618161201477,
      "learning_rate": 3.810085462200932e-06,
      "loss": 0.0136,
      "step": 1898800
    },
    {
      "epoch": 3.107460576186642,
      "grad_norm": 0.32701921463012695,
      "learning_rate": 3.8100195699874148e-06,
      "loss": 0.0101,
      "step": 1898820
    },
    {
      "epoch": 3.107493306625295,
      "grad_norm": 0.0660841092467308,
      "learning_rate": 3.8099536777738975e-06,
      "loss": 0.0075,
      "step": 1898840
    },
    {
      "epoch": 3.107526037063949,
      "grad_norm": 0.2884107530117035,
      "learning_rate": 3.8098877855603807e-06,
      "loss": 0.0106,
      "step": 1898860
    },
    {
      "epoch": 3.107558767502602,
      "grad_norm": 0.12458234280347824,
      "learning_rate": 3.8098218933468634e-06,
      "loss": 0.0121,
      "step": 1898880
    },
    {
      "epoch": 3.1075914979412556,
      "grad_norm": 0.12538893520832062,
      "learning_rate": 3.809756001133346e-06,
      "loss": 0.0097,
      "step": 1898900
    },
    {
      "epoch": 3.1076242283799087,
      "grad_norm": 0.6998812556266785,
      "learning_rate": 3.809690108919829e-06,
      "loss": 0.0093,
      "step": 1898920
    },
    {
      "epoch": 3.1076569588185623,
      "grad_norm": 0.2728017568588257,
      "learning_rate": 3.809624216706312e-06,
      "loss": 0.0078,
      "step": 1898940
    },
    {
      "epoch": 3.1076896892572154,
      "grad_norm": 0.30969715118408203,
      "learning_rate": 3.809558324492795e-06,
      "loss": 0.0206,
      "step": 1898960
    },
    {
      "epoch": 3.1077224196958686,
      "grad_norm": 0.5241594314575195,
      "learning_rate": 3.809492432279278e-06,
      "loss": 0.0086,
      "step": 1898980
    },
    {
      "epoch": 3.107755150134522,
      "grad_norm": 0.20505821704864502,
      "learning_rate": 3.809426540065761e-06,
      "loss": 0.0139,
      "step": 1899000
    },
    {
      "epoch": 3.1077878805731753,
      "grad_norm": 0.5506244897842407,
      "learning_rate": 3.809360647852244e-06,
      "loss": 0.0148,
      "step": 1899020
    },
    {
      "epoch": 3.107820611011829,
      "grad_norm": 0.09710949659347534,
      "learning_rate": 3.8092947556387266e-06,
      "loss": 0.0108,
      "step": 1899040
    },
    {
      "epoch": 3.107853341450482,
      "grad_norm": 0.34803709387779236,
      "learning_rate": 3.8092288634252094e-06,
      "loss": 0.0097,
      "step": 1899060
    },
    {
      "epoch": 3.1078860718891352,
      "grad_norm": 0.1865948587656021,
      "learning_rate": 3.8091629712116925e-06,
      "loss": 0.0136,
      "step": 1899080
    },
    {
      "epoch": 3.107918802327789,
      "grad_norm": 0.21710267663002014,
      "learning_rate": 3.8090970789981753e-06,
      "loss": 0.0145,
      "step": 1899100
    },
    {
      "epoch": 3.107951532766442,
      "grad_norm": 0.467876136302948,
      "learning_rate": 3.809031186784658e-06,
      "loss": 0.0115,
      "step": 1899120
    },
    {
      "epoch": 3.1079842632050956,
      "grad_norm": 0.20898035168647766,
      "learning_rate": 3.8089652945711407e-06,
      "loss": 0.0138,
      "step": 1899140
    },
    {
      "epoch": 3.1080169936437487,
      "grad_norm": 0.24605795741081238,
      "learning_rate": 3.8088994023576235e-06,
      "loss": 0.0131,
      "step": 1899160
    },
    {
      "epoch": 3.1080497240824023,
      "grad_norm": 0.3045884370803833,
      "learning_rate": 3.8088335101441066e-06,
      "loss": 0.0104,
      "step": 1899180
    },
    {
      "epoch": 3.1080824545210555,
      "grad_norm": 0.30775532126426697,
      "learning_rate": 3.8087676179305894e-06,
      "loss": 0.0099,
      "step": 1899200
    },
    {
      "epoch": 3.1081151849597086,
      "grad_norm": 0.45710423588752747,
      "learning_rate": 3.808701725717072e-06,
      "loss": 0.0156,
      "step": 1899220
    },
    {
      "epoch": 3.108147915398362,
      "grad_norm": 0.09486973285675049,
      "learning_rate": 3.808635833503555e-06,
      "loss": 0.0154,
      "step": 1899240
    },
    {
      "epoch": 3.1081806458370154,
      "grad_norm": 0.38623830676078796,
      "learning_rate": 3.808569941290038e-06,
      "loss": 0.0131,
      "step": 1899260
    },
    {
      "epoch": 3.108213376275669,
      "grad_norm": 0.2390894889831543,
      "learning_rate": 3.8085040490765208e-06,
      "loss": 0.0132,
      "step": 1899280
    },
    {
      "epoch": 3.108246106714322,
      "grad_norm": 0.8504111170768738,
      "learning_rate": 3.8084381568630035e-06,
      "loss": 0.0177,
      "step": 1899300
    },
    {
      "epoch": 3.1082788371529757,
      "grad_norm": 0.34855425357818604,
      "learning_rate": 3.808372264649487e-06,
      "loss": 0.0131,
      "step": 1899320
    },
    {
      "epoch": 3.108311567591629,
      "grad_norm": 0.596758246421814,
      "learning_rate": 3.80830637243597e-06,
      "loss": 0.0128,
      "step": 1899340
    },
    {
      "epoch": 3.108344298030282,
      "grad_norm": 0.7591213583946228,
      "learning_rate": 3.8082404802224526e-06,
      "loss": 0.0115,
      "step": 1899360
    },
    {
      "epoch": 3.1083770284689356,
      "grad_norm": 0.3972572684288025,
      "learning_rate": 3.8081745880089353e-06,
      "loss": 0.0099,
      "step": 1899380
    },
    {
      "epoch": 3.1084097589075887,
      "grad_norm": 0.722158670425415,
      "learning_rate": 3.8081086957954185e-06,
      "loss": 0.0139,
      "step": 1899400
    },
    {
      "epoch": 3.1084424893462423,
      "grad_norm": 0.8414104580879211,
      "learning_rate": 3.808042803581901e-06,
      "loss": 0.0248,
      "step": 1899420
    },
    {
      "epoch": 3.1084752197848955,
      "grad_norm": 0.3085392713546753,
      "learning_rate": 3.807976911368384e-06,
      "loss": 0.015,
      "step": 1899440
    },
    {
      "epoch": 3.108507950223549,
      "grad_norm": 0.35715505480766296,
      "learning_rate": 3.8079110191548667e-06,
      "loss": 0.0129,
      "step": 1899460
    },
    {
      "epoch": 3.1085406806622022,
      "grad_norm": 0.3920069932937622,
      "learning_rate": 3.80784512694135e-06,
      "loss": 0.013,
      "step": 1899480
    },
    {
      "epoch": 3.1085734111008554,
      "grad_norm": 1.0823661088943481,
      "learning_rate": 3.8077792347278326e-06,
      "loss": 0.0147,
      "step": 1899500
    },
    {
      "epoch": 3.108606141539509,
      "grad_norm": 0.27045801281929016,
      "learning_rate": 3.8077133425143153e-06,
      "loss": 0.0132,
      "step": 1899520
    },
    {
      "epoch": 3.108638871978162,
      "grad_norm": 0.2103627771139145,
      "learning_rate": 3.807647450300798e-06,
      "loss": 0.0141,
      "step": 1899540
    },
    {
      "epoch": 3.1086716024168157,
      "grad_norm": 0.20272527635097504,
      "learning_rate": 3.8075815580872812e-06,
      "loss": 0.0149,
      "step": 1899560
    },
    {
      "epoch": 3.108704332855469,
      "grad_norm": 0.43330085277557373,
      "learning_rate": 3.807515665873764e-06,
      "loss": 0.0113,
      "step": 1899580
    },
    {
      "epoch": 3.1087370632941225,
      "grad_norm": 0.1769675612449646,
      "learning_rate": 3.8074497736602467e-06,
      "loss": 0.0122,
      "step": 1899600
    },
    {
      "epoch": 3.1087697937327756,
      "grad_norm": 0.1669209897518158,
      "learning_rate": 3.8073838814467295e-06,
      "loss": 0.0081,
      "step": 1899620
    },
    {
      "epoch": 3.1088025241714288,
      "grad_norm": 0.16522078216075897,
      "learning_rate": 3.807317989233212e-06,
      "loss": 0.0142,
      "step": 1899640
    },
    {
      "epoch": 3.1088352546100824,
      "grad_norm": 0.41695454716682434,
      "learning_rate": 3.8072520970196954e-06,
      "loss": 0.014,
      "step": 1899660
    },
    {
      "epoch": 3.1088679850487355,
      "grad_norm": 0.45215028524398804,
      "learning_rate": 3.8071862048061785e-06,
      "loss": 0.0063,
      "step": 1899680
    },
    {
      "epoch": 3.108900715487389,
      "grad_norm": 0.4289076328277588,
      "learning_rate": 3.8071203125926613e-06,
      "loss": 0.0156,
      "step": 1899700
    },
    {
      "epoch": 3.1089334459260423,
      "grad_norm": 0.14297395944595337,
      "learning_rate": 3.8070544203791444e-06,
      "loss": 0.0124,
      "step": 1899720
    },
    {
      "epoch": 3.108966176364696,
      "grad_norm": 0.13647706806659698,
      "learning_rate": 3.806988528165627e-06,
      "loss": 0.0104,
      "step": 1899740
    },
    {
      "epoch": 3.108998906803349,
      "grad_norm": 0.5455942749977112,
      "learning_rate": 3.80692263595211e-06,
      "loss": 0.0114,
      "step": 1899760
    },
    {
      "epoch": 3.109031637242002,
      "grad_norm": 0.19000476598739624,
      "learning_rate": 3.8068567437385926e-06,
      "loss": 0.0116,
      "step": 1899780
    },
    {
      "epoch": 3.1090643676806557,
      "grad_norm": 0.309264600276947,
      "learning_rate": 3.806790851525076e-06,
      "loss": 0.0096,
      "step": 1899800
    },
    {
      "epoch": 3.109097098119309,
      "grad_norm": 0.7077932953834534,
      "learning_rate": 3.8067249593115585e-06,
      "loss": 0.0078,
      "step": 1899820
    },
    {
      "epoch": 3.1091298285579625,
      "grad_norm": 0.216048002243042,
      "learning_rate": 3.8066590670980413e-06,
      "loss": 0.0075,
      "step": 1899840
    },
    {
      "epoch": 3.1091625589966156,
      "grad_norm": 0.2598699629306793,
      "learning_rate": 3.806593174884524e-06,
      "loss": 0.0139,
      "step": 1899860
    },
    {
      "epoch": 3.1091952894352692,
      "grad_norm": 0.11020027101039886,
      "learning_rate": 3.806527282671007e-06,
      "loss": 0.0119,
      "step": 1899880
    },
    {
      "epoch": 3.1092280198739224,
      "grad_norm": 0.9065190553665161,
      "learning_rate": 3.80646139045749e-06,
      "loss": 0.0096,
      "step": 1899900
    },
    {
      "epoch": 3.1092607503125755,
      "grad_norm": 0.33768776059150696,
      "learning_rate": 3.8063954982439727e-06,
      "loss": 0.016,
      "step": 1899920
    },
    {
      "epoch": 3.109293480751229,
      "grad_norm": 0.5041691660881042,
      "learning_rate": 3.8063296060304554e-06,
      "loss": 0.0133,
      "step": 1899940
    },
    {
      "epoch": 3.1093262111898823,
      "grad_norm": 0.20233483612537384,
      "learning_rate": 3.8062637138169386e-06,
      "loss": 0.0136,
      "step": 1899960
    },
    {
      "epoch": 3.109358941628536,
      "grad_norm": 0.40910762548446655,
      "learning_rate": 3.8061978216034213e-06,
      "loss": 0.0079,
      "step": 1899980
    },
    {
      "epoch": 3.109391672067189,
      "grad_norm": 0.49942201375961304,
      "learning_rate": 3.806131929389904e-06,
      "loss": 0.0152,
      "step": 1900000
    },
    {
      "epoch": 3.109391672067189,
      "eval_loss": 0.007153215352445841,
      "eval_runtime": 6534.4077,
      "eval_samples_per_second": 157.299,
      "eval_steps_per_second": 15.73,
      "eval_sts-dev_pearson_cosine": 0.9836459772359298,
      "eval_sts-dev_spearman_cosine": 0.8947059985176853,
      "step": 1900000
    },
    {
      "epoch": 3.109424402505842,
      "grad_norm": 0.3964711129665375,
      "learning_rate": 3.8060660371763876e-06,
      "loss": 0.0107,
      "step": 1900020
    },
    {
      "epoch": 3.1094571329444958,
      "grad_norm": 0.6475272178649902,
      "learning_rate": 3.8060001449628704e-06,
      "loss": 0.0089,
      "step": 1900040
    },
    {
      "epoch": 3.109489863383149,
      "grad_norm": 0.2445119023323059,
      "learning_rate": 3.805934252749353e-06,
      "loss": 0.0196,
      "step": 1900060
    },
    {
      "epoch": 3.1095225938218025,
      "grad_norm": 0.2210947722196579,
      "learning_rate": 3.805868360535836e-06,
      "loss": 0.0079,
      "step": 1900080
    },
    {
      "epoch": 3.1095553242604557,
      "grad_norm": 0.28808876872062683,
      "learning_rate": 3.805802468322319e-06,
      "loss": 0.0076,
      "step": 1900100
    },
    {
      "epoch": 3.1095880546991093,
      "grad_norm": 0.2714710235595703,
      "learning_rate": 3.8057365761088018e-06,
      "loss": 0.0118,
      "step": 1900120
    },
    {
      "epoch": 3.1096207851377624,
      "grad_norm": 0.2045421004295349,
      "learning_rate": 3.8056706838952845e-06,
      "loss": 0.0106,
      "step": 1900140
    },
    {
      "epoch": 3.1096535155764156,
      "grad_norm": 0.6180629730224609,
      "learning_rate": 3.8056047916817672e-06,
      "loss": 0.007,
      "step": 1900160
    },
    {
      "epoch": 3.109686246015069,
      "grad_norm": 0.110615573823452,
      "learning_rate": 3.80553889946825e-06,
      "loss": 0.0139,
      "step": 1900180
    },
    {
      "epoch": 3.1097189764537223,
      "grad_norm": 0.21371206641197205,
      "learning_rate": 3.805473007254733e-06,
      "loss": 0.0115,
      "step": 1900200
    },
    {
      "epoch": 3.109751706892376,
      "grad_norm": 0.2086198627948761,
      "learning_rate": 3.805407115041216e-06,
      "loss": 0.015,
      "step": 1900220
    },
    {
      "epoch": 3.109784437331029,
      "grad_norm": 0.23596973717212677,
      "learning_rate": 3.8053412228276986e-06,
      "loss": 0.0137,
      "step": 1900240
    },
    {
      "epoch": 3.1098171677696826,
      "grad_norm": 0.27718988060951233,
      "learning_rate": 3.8052753306141814e-06,
      "loss": 0.0148,
      "step": 1900260
    },
    {
      "epoch": 3.109849898208336,
      "grad_norm": 0.34803104400634766,
      "learning_rate": 3.8052094384006645e-06,
      "loss": 0.0123,
      "step": 1900280
    },
    {
      "epoch": 3.109882628646989,
      "grad_norm": 0.1338663250207901,
      "learning_rate": 3.8051435461871473e-06,
      "loss": 0.0077,
      "step": 1900300
    },
    {
      "epoch": 3.1099153590856425,
      "grad_norm": 0.07422997057437897,
      "learning_rate": 3.80507765397363e-06,
      "loss": 0.0064,
      "step": 1900320
    },
    {
      "epoch": 3.1099480895242957,
      "grad_norm": 0.48129868507385254,
      "learning_rate": 3.8050117617601127e-06,
      "loss": 0.0104,
      "step": 1900340
    },
    {
      "epoch": 3.1099808199629493,
      "grad_norm": 0.59763503074646,
      "learning_rate": 3.804945869546596e-06,
      "loss": 0.0069,
      "step": 1900360
    },
    {
      "epoch": 3.1100135504016024,
      "grad_norm": 0.3453509509563446,
      "learning_rate": 3.804879977333079e-06,
      "loss": 0.0134,
      "step": 1900380
    },
    {
      "epoch": 3.110046280840256,
      "grad_norm": 0.21462327241897583,
      "learning_rate": 3.804814085119562e-06,
      "loss": 0.0083,
      "step": 1900400
    },
    {
      "epoch": 3.110079011278909,
      "grad_norm": 0.18624255061149597,
      "learning_rate": 3.804748192906045e-06,
      "loss": 0.0073,
      "step": 1900420
    },
    {
      "epoch": 3.1101117417175623,
      "grad_norm": 0.49662142992019653,
      "learning_rate": 3.8046823006925277e-06,
      "loss": 0.0107,
      "step": 1900440
    },
    {
      "epoch": 3.110144472156216,
      "grad_norm": 0.08230923116207123,
      "learning_rate": 3.8046164084790105e-06,
      "loss": 0.0078,
      "step": 1900460
    },
    {
      "epoch": 3.110177202594869,
      "grad_norm": 0.1170182004570961,
      "learning_rate": 3.804550516265493e-06,
      "loss": 0.0214,
      "step": 1900480
    },
    {
      "epoch": 3.1102099330335227,
      "grad_norm": 0.3057064116001129,
      "learning_rate": 3.8044846240519764e-06,
      "loss": 0.007,
      "step": 1900500
    },
    {
      "epoch": 3.110242663472176,
      "grad_norm": 0.871188759803772,
      "learning_rate": 3.804418731838459e-06,
      "loss": 0.0106,
      "step": 1900520
    },
    {
      "epoch": 3.110275393910829,
      "grad_norm": 0.4575713276863098,
      "learning_rate": 3.804352839624942e-06,
      "loss": 0.0113,
      "step": 1900540
    },
    {
      "epoch": 3.1103081243494826,
      "grad_norm": 0.1884998083114624,
      "learning_rate": 3.8042869474114246e-06,
      "loss": 0.0106,
      "step": 1900560
    },
    {
      "epoch": 3.1103408547881357,
      "grad_norm": 0.6551701426506042,
      "learning_rate": 3.8042210551979073e-06,
      "loss": 0.0074,
      "step": 1900580
    },
    {
      "epoch": 3.1103735852267893,
      "grad_norm": 0.039167370647192,
      "learning_rate": 3.8041551629843905e-06,
      "loss": 0.007,
      "step": 1900600
    },
    {
      "epoch": 3.1104063156654425,
      "grad_norm": 0.19782301783561707,
      "learning_rate": 3.8040892707708732e-06,
      "loss": 0.0071,
      "step": 1900620
    },
    {
      "epoch": 3.110439046104096,
      "grad_norm": 0.2156011462211609,
      "learning_rate": 3.804023378557356e-06,
      "loss": 0.016,
      "step": 1900640
    },
    {
      "epoch": 3.110471776542749,
      "grad_norm": 0.1365821212530136,
      "learning_rate": 3.8039574863438387e-06,
      "loss": 0.0104,
      "step": 1900660
    },
    {
      "epoch": 3.1105045069814024,
      "grad_norm": 0.2517892122268677,
      "learning_rate": 3.803891594130322e-06,
      "loss": 0.0127,
      "step": 1900680
    },
    {
      "epoch": 3.110537237420056,
      "grad_norm": 0.4140036702156067,
      "learning_rate": 3.8038257019168046e-06,
      "loss": 0.0137,
      "step": 1900700
    },
    {
      "epoch": 3.110569967858709,
      "grad_norm": 0.577124834060669,
      "learning_rate": 3.8037598097032873e-06,
      "loss": 0.0206,
      "step": 1900720
    },
    {
      "epoch": 3.1106026982973627,
      "grad_norm": 0.13704292476177216,
      "learning_rate": 3.803693917489771e-06,
      "loss": 0.0122,
      "step": 1900740
    },
    {
      "epoch": 3.110635428736016,
      "grad_norm": 0.04514183849096298,
      "learning_rate": 3.8036280252762537e-06,
      "loss": 0.0113,
      "step": 1900760
    },
    {
      "epoch": 3.1106681591746694,
      "grad_norm": 0.3727768361568451,
      "learning_rate": 3.8035621330627364e-06,
      "loss": 0.0138,
      "step": 1900780
    },
    {
      "epoch": 3.1107008896133226,
      "grad_norm": 0.09040225297212601,
      "learning_rate": 3.803496240849219e-06,
      "loss": 0.0129,
      "step": 1900800
    },
    {
      "epoch": 3.1107336200519757,
      "grad_norm": 0.25503093004226685,
      "learning_rate": 3.8034303486357023e-06,
      "loss": 0.0095,
      "step": 1900820
    },
    {
      "epoch": 3.1107663504906293,
      "grad_norm": 0.24105769395828247,
      "learning_rate": 3.803364456422185e-06,
      "loss": 0.0106,
      "step": 1900840
    },
    {
      "epoch": 3.1107990809292825,
      "grad_norm": 0.19834455847740173,
      "learning_rate": 3.8032985642086678e-06,
      "loss": 0.014,
      "step": 1900860
    },
    {
      "epoch": 3.110831811367936,
      "grad_norm": 0.37651878595352173,
      "learning_rate": 3.8032326719951505e-06,
      "loss": 0.0186,
      "step": 1900880
    },
    {
      "epoch": 3.1108645418065892,
      "grad_norm": 0.602425754070282,
      "learning_rate": 3.8031667797816337e-06,
      "loss": 0.0149,
      "step": 1900900
    },
    {
      "epoch": 3.110897272245243,
      "grad_norm": 0.6348960995674133,
      "learning_rate": 3.8031008875681164e-06,
      "loss": 0.0132,
      "step": 1900920
    },
    {
      "epoch": 3.110930002683896,
      "grad_norm": 0.1096695214509964,
      "learning_rate": 3.803034995354599e-06,
      "loss": 0.0069,
      "step": 1900940
    },
    {
      "epoch": 3.110962733122549,
      "grad_norm": 0.13393443822860718,
      "learning_rate": 3.802969103141082e-06,
      "loss": 0.008,
      "step": 1900960
    },
    {
      "epoch": 3.1109954635612027,
      "grad_norm": 0.28609856963157654,
      "learning_rate": 3.802903210927565e-06,
      "loss": 0.011,
      "step": 1900980
    },
    {
      "epoch": 3.111028193999856,
      "grad_norm": 0.23982207477092743,
      "learning_rate": 3.802837318714048e-06,
      "loss": 0.0127,
      "step": 1901000
    },
    {
      "epoch": 3.1110609244385095,
      "grad_norm": 0.5100603699684143,
      "learning_rate": 3.8027714265005306e-06,
      "loss": 0.0132,
      "step": 1901020
    },
    {
      "epoch": 3.1110936548771626,
      "grad_norm": 0.03115321695804596,
      "learning_rate": 3.8027055342870133e-06,
      "loss": 0.0121,
      "step": 1901040
    },
    {
      "epoch": 3.111126385315816,
      "grad_norm": 0.4064856469631195,
      "learning_rate": 3.802639642073496e-06,
      "loss": 0.0101,
      "step": 1901060
    },
    {
      "epoch": 3.1111591157544694,
      "grad_norm": 0.17775481939315796,
      "learning_rate": 3.8025737498599796e-06,
      "loss": 0.0088,
      "step": 1901080
    },
    {
      "epoch": 3.1111918461931225,
      "grad_norm": 0.28011247515678406,
      "learning_rate": 3.8025078576464624e-06,
      "loss": 0.0101,
      "step": 1901100
    },
    {
      "epoch": 3.111224576631776,
      "grad_norm": 0.329097718000412,
      "learning_rate": 3.802441965432945e-06,
      "loss": 0.0097,
      "step": 1901120
    },
    {
      "epoch": 3.1112573070704292,
      "grad_norm": 1.6157559156417847,
      "learning_rate": 3.8023760732194283e-06,
      "loss": 0.0132,
      "step": 1901140
    },
    {
      "epoch": 3.111290037509083,
      "grad_norm": 0.273034006357193,
      "learning_rate": 3.802310181005911e-06,
      "loss": 0.0123,
      "step": 1901160
    },
    {
      "epoch": 3.111322767947736,
      "grad_norm": 0.19741643965244293,
      "learning_rate": 3.8022442887923937e-06,
      "loss": 0.0094,
      "step": 1901180
    },
    {
      "epoch": 3.1113554983863896,
      "grad_norm": 0.06900019198656082,
      "learning_rate": 3.8021783965788765e-06,
      "loss": 0.0123,
      "step": 1901200
    },
    {
      "epoch": 3.1113882288250427,
      "grad_norm": 0.17870545387268066,
      "learning_rate": 3.8021125043653596e-06,
      "loss": 0.0139,
      "step": 1901220
    },
    {
      "epoch": 3.111420959263696,
      "grad_norm": 0.1447010338306427,
      "learning_rate": 3.8020466121518424e-06,
      "loss": 0.0143,
      "step": 1901240
    },
    {
      "epoch": 3.1114536897023495,
      "grad_norm": 0.34885162115097046,
      "learning_rate": 3.801980719938325e-06,
      "loss": 0.0108,
      "step": 1901260
    },
    {
      "epoch": 3.1114864201410026,
      "grad_norm": 0.1342480480670929,
      "learning_rate": 3.801914827724808e-06,
      "loss": 0.0129,
      "step": 1901280
    },
    {
      "epoch": 3.1115191505796562,
      "grad_norm": 0.2094380408525467,
      "learning_rate": 3.801848935511291e-06,
      "loss": 0.012,
      "step": 1901300
    },
    {
      "epoch": 3.1115518810183094,
      "grad_norm": 0.2623637318611145,
      "learning_rate": 3.8017830432977738e-06,
      "loss": 0.0089,
      "step": 1901320
    },
    {
      "epoch": 3.111584611456963,
      "grad_norm": 0.4753667116165161,
      "learning_rate": 3.8017171510842565e-06,
      "loss": 0.0083,
      "step": 1901340
    },
    {
      "epoch": 3.111617341895616,
      "grad_norm": 0.29862093925476074,
      "learning_rate": 3.8016512588707392e-06,
      "loss": 0.0095,
      "step": 1901360
    },
    {
      "epoch": 3.1116500723342693,
      "grad_norm": 0.5903351902961731,
      "learning_rate": 3.8015853666572224e-06,
      "loss": 0.0098,
      "step": 1901380
    },
    {
      "epoch": 3.111682802772923,
      "grad_norm": 0.2088501900434494,
      "learning_rate": 3.801519474443705e-06,
      "loss": 0.0089,
      "step": 1901400
    },
    {
      "epoch": 3.111715533211576,
      "grad_norm": 0.10984328389167786,
      "learning_rate": 3.801453582230188e-06,
      "loss": 0.0132,
      "step": 1901420
    },
    {
      "epoch": 3.1117482636502296,
      "grad_norm": 0.2643013596534729,
      "learning_rate": 3.8013876900166715e-06,
      "loss": 0.0129,
      "step": 1901440
    },
    {
      "epoch": 3.1117809940888828,
      "grad_norm": 0.21542172133922577,
      "learning_rate": 3.8013217978031542e-06,
      "loss": 0.0121,
      "step": 1901460
    },
    {
      "epoch": 3.111813724527536,
      "grad_norm": 0.8260660171508789,
      "learning_rate": 3.801255905589637e-06,
      "loss": 0.012,
      "step": 1901480
    },
    {
      "epoch": 3.1118464549661895,
      "grad_norm": 0.24536457657814026,
      "learning_rate": 3.8011900133761197e-06,
      "loss": 0.0116,
      "step": 1901500
    },
    {
      "epoch": 3.1118791854048427,
      "grad_norm": 0.6476077437400818,
      "learning_rate": 3.801124121162603e-06,
      "loss": 0.0117,
      "step": 1901520
    },
    {
      "epoch": 3.1119119158434962,
      "grad_norm": 0.5252401828765869,
      "learning_rate": 3.8010582289490856e-06,
      "loss": 0.0162,
      "step": 1901540
    },
    {
      "epoch": 3.1119446462821494,
      "grad_norm": 0.11208183318376541,
      "learning_rate": 3.8009923367355683e-06,
      "loss": 0.0129,
      "step": 1901560
    },
    {
      "epoch": 3.111977376720803,
      "grad_norm": 0.6838605999946594,
      "learning_rate": 3.800926444522051e-06,
      "loss": 0.0146,
      "step": 1901580
    },
    {
      "epoch": 3.112010107159456,
      "grad_norm": 0.5848289132118225,
      "learning_rate": 3.800860552308534e-06,
      "loss": 0.023,
      "step": 1901600
    },
    {
      "epoch": 3.1120428375981093,
      "grad_norm": 0.35965171456336975,
      "learning_rate": 3.800794660095017e-06,
      "loss": 0.013,
      "step": 1901620
    },
    {
      "epoch": 3.112075568036763,
      "grad_norm": 0.21317726373672485,
      "learning_rate": 3.8007287678814997e-06,
      "loss": 0.008,
      "step": 1901640
    },
    {
      "epoch": 3.112108298475416,
      "grad_norm": 0.4710768163204193,
      "learning_rate": 3.8006628756679825e-06,
      "loss": 0.0188,
      "step": 1901660
    },
    {
      "epoch": 3.1121410289140696,
      "grad_norm": 0.27017849683761597,
      "learning_rate": 3.800596983454465e-06,
      "loss": 0.0139,
      "step": 1901680
    },
    {
      "epoch": 3.112173759352723,
      "grad_norm": 0.14363311231136322,
      "learning_rate": 3.8005310912409484e-06,
      "loss": 0.0074,
      "step": 1901700
    },
    {
      "epoch": 3.1122064897913764,
      "grad_norm": 0.37282806634902954,
      "learning_rate": 3.800465199027431e-06,
      "loss": 0.0094,
      "step": 1901720
    },
    {
      "epoch": 3.1122392202300295,
      "grad_norm": 0.4703425168991089,
      "learning_rate": 3.800399306813914e-06,
      "loss": 0.0139,
      "step": 1901740
    },
    {
      "epoch": 3.1122719506686827,
      "grad_norm": 0.2540163993835449,
      "learning_rate": 3.8003334146003966e-06,
      "loss": 0.0132,
      "step": 1901760
    },
    {
      "epoch": 3.1123046811073363,
      "grad_norm": 0.19386743009090424,
      "learning_rate": 3.80026752238688e-06,
      "loss": 0.0126,
      "step": 1901780
    },
    {
      "epoch": 3.1123374115459894,
      "grad_norm": 0.4623521566390991,
      "learning_rate": 3.800201630173363e-06,
      "loss": 0.0165,
      "step": 1901800
    },
    {
      "epoch": 3.112370141984643,
      "grad_norm": 0.21010245382785797,
      "learning_rate": 3.8001357379598456e-06,
      "loss": 0.0109,
      "step": 1901820
    },
    {
      "epoch": 3.112402872423296,
      "grad_norm": 0.33049464225769043,
      "learning_rate": 3.800069845746329e-06,
      "loss": 0.0071,
      "step": 1901840
    },
    {
      "epoch": 3.1124356028619498,
      "grad_norm": 0.4869536757469177,
      "learning_rate": 3.8000039535328115e-06,
      "loss": 0.016,
      "step": 1901860
    },
    {
      "epoch": 3.112468333300603,
      "grad_norm": 0.18421807885169983,
      "learning_rate": 3.7999380613192943e-06,
      "loss": 0.0122,
      "step": 1901880
    },
    {
      "epoch": 3.112501063739256,
      "grad_norm": 0.5558807253837585,
      "learning_rate": 3.799872169105777e-06,
      "loss": 0.0174,
      "step": 1901900
    },
    {
      "epoch": 3.1125337941779097,
      "grad_norm": 0.2828119993209839,
      "learning_rate": 3.79980627689226e-06,
      "loss": 0.0193,
      "step": 1901920
    },
    {
      "epoch": 3.112566524616563,
      "grad_norm": 0.4222191572189331,
      "learning_rate": 3.799740384678743e-06,
      "loss": 0.0149,
      "step": 1901940
    },
    {
      "epoch": 3.1125992550552164,
      "grad_norm": 0.5011020302772522,
      "learning_rate": 3.7996744924652257e-06,
      "loss": 0.0084,
      "step": 1901960
    },
    {
      "epoch": 3.1126319854938695,
      "grad_norm": 0.3217800259590149,
      "learning_rate": 3.7996086002517084e-06,
      "loss": 0.0111,
      "step": 1901980
    },
    {
      "epoch": 3.112664715932523,
      "grad_norm": 0.3102225661277771,
      "learning_rate": 3.799542708038191e-06,
      "loss": 0.0124,
      "step": 1902000
    },
    {
      "epoch": 3.1126974463711763,
      "grad_norm": 0.6173469424247742,
      "learning_rate": 3.7994768158246743e-06,
      "loss": 0.0138,
      "step": 1902020
    },
    {
      "epoch": 3.1127301768098294,
      "grad_norm": 0.4247557818889618,
      "learning_rate": 3.799410923611157e-06,
      "loss": 0.0159,
      "step": 1902040
    },
    {
      "epoch": 3.112762907248483,
      "grad_norm": 0.1541966050863266,
      "learning_rate": 3.79934503139764e-06,
      "loss": 0.0081,
      "step": 1902060
    },
    {
      "epoch": 3.112795637687136,
      "grad_norm": 0.7144539952278137,
      "learning_rate": 3.7992791391841225e-06,
      "loss": 0.0117,
      "step": 1902080
    },
    {
      "epoch": 3.11282836812579,
      "grad_norm": 0.18848003447055817,
      "learning_rate": 3.7992132469706057e-06,
      "loss": 0.0101,
      "step": 1902100
    },
    {
      "epoch": 3.112861098564443,
      "grad_norm": 0.1748739331960678,
      "learning_rate": 3.7991473547570884e-06,
      "loss": 0.0114,
      "step": 1902120
    },
    {
      "epoch": 3.112893829003096,
      "grad_norm": 0.2630976736545563,
      "learning_rate": 3.7990814625435716e-06,
      "loss": 0.0081,
      "step": 1902140
    },
    {
      "epoch": 3.1129265594417497,
      "grad_norm": 0.41849058866500854,
      "learning_rate": 3.7990155703300548e-06,
      "loss": 0.0141,
      "step": 1902160
    },
    {
      "epoch": 3.112959289880403,
      "grad_norm": 0.30112892389297485,
      "learning_rate": 3.7989496781165375e-06,
      "loss": 0.0089,
      "step": 1902180
    },
    {
      "epoch": 3.1129920203190564,
      "grad_norm": 1.841970443725586,
      "learning_rate": 3.7988837859030202e-06,
      "loss": 0.0167,
      "step": 1902200
    },
    {
      "epoch": 3.1130247507577096,
      "grad_norm": 0.385548859834671,
      "learning_rate": 3.798817893689503e-06,
      "loss": 0.0133,
      "step": 1902220
    },
    {
      "epoch": 3.113057481196363,
      "grad_norm": 0.17401082813739777,
      "learning_rate": 3.798752001475986e-06,
      "loss": 0.0116,
      "step": 1902240
    },
    {
      "epoch": 3.1130902116350163,
      "grad_norm": 0.08409371227025986,
      "learning_rate": 3.798686109262469e-06,
      "loss": 0.0152,
      "step": 1902260
    },
    {
      "epoch": 3.1131229420736695,
      "grad_norm": 0.23444578051567078,
      "learning_rate": 3.7986202170489516e-06,
      "loss": 0.011,
      "step": 1902280
    },
    {
      "epoch": 3.113155672512323,
      "grad_norm": 0.5103635191917419,
      "learning_rate": 3.7985543248354344e-06,
      "loss": 0.0127,
      "step": 1902300
    },
    {
      "epoch": 3.113188402950976,
      "grad_norm": 0.3288516700267792,
      "learning_rate": 3.7984884326219175e-06,
      "loss": 0.0085,
      "step": 1902320
    },
    {
      "epoch": 3.11322113338963,
      "grad_norm": 0.26870134472846985,
      "learning_rate": 3.7984225404084003e-06,
      "loss": 0.0081,
      "step": 1902340
    },
    {
      "epoch": 3.113253863828283,
      "grad_norm": 0.11626847088336945,
      "learning_rate": 3.798356648194883e-06,
      "loss": 0.0088,
      "step": 1902360
    },
    {
      "epoch": 3.1132865942669365,
      "grad_norm": 0.2156238853931427,
      "learning_rate": 3.7982907559813657e-06,
      "loss": 0.0133,
      "step": 1902380
    },
    {
      "epoch": 3.1133193247055897,
      "grad_norm": 0.4805509150028229,
      "learning_rate": 3.798224863767849e-06,
      "loss": 0.0139,
      "step": 1902400
    },
    {
      "epoch": 3.113352055144243,
      "grad_norm": 0.6811047196388245,
      "learning_rate": 3.7981589715543317e-06,
      "loss": 0.0099,
      "step": 1902420
    },
    {
      "epoch": 3.1133847855828964,
      "grad_norm": 0.12395580857992172,
      "learning_rate": 3.7980930793408144e-06,
      "loss": 0.011,
      "step": 1902440
    },
    {
      "epoch": 3.1134175160215496,
      "grad_norm": 0.5541117787361145,
      "learning_rate": 3.798027187127297e-06,
      "loss": 0.0089,
      "step": 1902460
    },
    {
      "epoch": 3.113450246460203,
      "grad_norm": 0.9048829674720764,
      "learning_rate": 3.79796129491378e-06,
      "loss": 0.0098,
      "step": 1902480
    },
    {
      "epoch": 3.1134829768988563,
      "grad_norm": 0.29747143387794495,
      "learning_rate": 3.7978954027002635e-06,
      "loss": 0.012,
      "step": 1902500
    },
    {
      "epoch": 3.11351570733751,
      "grad_norm": 0.44507884979248047,
      "learning_rate": 3.797829510486746e-06,
      "loss": 0.0106,
      "step": 1902520
    },
    {
      "epoch": 3.113548437776163,
      "grad_norm": 0.33699291944503784,
      "learning_rate": 3.797763618273229e-06,
      "loss": 0.0091,
      "step": 1902540
    },
    {
      "epoch": 3.1135811682148162,
      "grad_norm": 0.27247676253318787,
      "learning_rate": 3.797697726059712e-06,
      "loss": 0.0118,
      "step": 1902560
    },
    {
      "epoch": 3.11361389865347,
      "grad_norm": 0.5558423399925232,
      "learning_rate": 3.797631833846195e-06,
      "loss": 0.0124,
      "step": 1902580
    },
    {
      "epoch": 3.113646629092123,
      "grad_norm": 0.15555426478385925,
      "learning_rate": 3.7975659416326776e-06,
      "loss": 0.0167,
      "step": 1902600
    },
    {
      "epoch": 3.1136793595307766,
      "grad_norm": 0.20849162340164185,
      "learning_rate": 3.7975000494191603e-06,
      "loss": 0.014,
      "step": 1902620
    },
    {
      "epoch": 3.1137120899694297,
      "grad_norm": 0.5898510217666626,
      "learning_rate": 3.7974341572056435e-06,
      "loss": 0.0152,
      "step": 1902640
    },
    {
      "epoch": 3.1137448204080833,
      "grad_norm": 0.9126307964324951,
      "learning_rate": 3.7973682649921262e-06,
      "loss": 0.0146,
      "step": 1902660
    },
    {
      "epoch": 3.1137775508467365,
      "grad_norm": 0.18450231850147247,
      "learning_rate": 3.797302372778609e-06,
      "loss": 0.0136,
      "step": 1902680
    },
    {
      "epoch": 3.1138102812853896,
      "grad_norm": 0.1439303606748581,
      "learning_rate": 3.7972364805650917e-06,
      "loss": 0.0112,
      "step": 1902700
    },
    {
      "epoch": 3.113843011724043,
      "grad_norm": 0.31147581338882446,
      "learning_rate": 3.797170588351575e-06,
      "loss": 0.0124,
      "step": 1902720
    },
    {
      "epoch": 3.1138757421626964,
      "grad_norm": 0.11046404391527176,
      "learning_rate": 3.7971046961380576e-06,
      "loss": 0.0105,
      "step": 1902740
    },
    {
      "epoch": 3.11390847260135,
      "grad_norm": 0.2409471720457077,
      "learning_rate": 3.7970388039245403e-06,
      "loss": 0.009,
      "step": 1902760
    },
    {
      "epoch": 3.113941203040003,
      "grad_norm": 0.28200817108154297,
      "learning_rate": 3.796972911711023e-06,
      "loss": 0.0115,
      "step": 1902780
    },
    {
      "epoch": 3.1139739334786567,
      "grad_norm": 0.35938459634780884,
      "learning_rate": 3.7969070194975062e-06,
      "loss": 0.0065,
      "step": 1902800
    },
    {
      "epoch": 3.11400666391731,
      "grad_norm": 0.2287757843732834,
      "learning_rate": 3.796841127283989e-06,
      "loss": 0.0069,
      "step": 1902820
    },
    {
      "epoch": 3.114039394355963,
      "grad_norm": 0.5474060773849487,
      "learning_rate": 3.796775235070472e-06,
      "loss": 0.0132,
      "step": 1902840
    },
    {
      "epoch": 3.1140721247946166,
      "grad_norm": 0.4641171097755432,
      "learning_rate": 3.7967093428569553e-06,
      "loss": 0.0104,
      "step": 1902860
    },
    {
      "epoch": 3.1141048552332697,
      "grad_norm": 0.23155082762241364,
      "learning_rate": 3.796643450643438e-06,
      "loss": 0.0133,
      "step": 1902880
    },
    {
      "epoch": 3.1141375856719233,
      "grad_norm": 0.5426353812217712,
      "learning_rate": 3.796577558429921e-06,
      "loss": 0.0126,
      "step": 1902900
    },
    {
      "epoch": 3.1141703161105765,
      "grad_norm": 0.32810354232788086,
      "learning_rate": 3.7965116662164035e-06,
      "loss": 0.0113,
      "step": 1902920
    },
    {
      "epoch": 3.11420304654923,
      "grad_norm": 0.0838087871670723,
      "learning_rate": 3.7964457740028867e-06,
      "loss": 0.0121,
      "step": 1902940
    },
    {
      "epoch": 3.1142357769878832,
      "grad_norm": 0.6694374084472656,
      "learning_rate": 3.7963798817893694e-06,
      "loss": 0.0102,
      "step": 1902960
    },
    {
      "epoch": 3.1142685074265364,
      "grad_norm": 0.26758578419685364,
      "learning_rate": 3.796313989575852e-06,
      "loss": 0.0095,
      "step": 1902980
    },
    {
      "epoch": 3.11430123786519,
      "grad_norm": 0.3295990526676178,
      "learning_rate": 3.796248097362335e-06,
      "loss": 0.0154,
      "step": 1903000
    },
    {
      "epoch": 3.114333968303843,
      "grad_norm": 0.272543340921402,
      "learning_rate": 3.7961822051488177e-06,
      "loss": 0.0093,
      "step": 1903020
    },
    {
      "epoch": 3.1143666987424967,
      "grad_norm": 0.6707121133804321,
      "learning_rate": 3.796116312935301e-06,
      "loss": 0.0094,
      "step": 1903040
    },
    {
      "epoch": 3.11439942918115,
      "grad_norm": 0.3327474892139435,
      "learning_rate": 3.7960504207217836e-06,
      "loss": 0.0097,
      "step": 1903060
    },
    {
      "epoch": 3.114432159619803,
      "grad_norm": 0.054330166429281235,
      "learning_rate": 3.7959845285082663e-06,
      "loss": 0.009,
      "step": 1903080
    },
    {
      "epoch": 3.1144648900584566,
      "grad_norm": 0.9007052183151245,
      "learning_rate": 3.795918636294749e-06,
      "loss": 0.0119,
      "step": 1903100
    },
    {
      "epoch": 3.1144976204971098,
      "grad_norm": 0.3601033389568329,
      "learning_rate": 3.795852744081232e-06,
      "loss": 0.0092,
      "step": 1903120
    },
    {
      "epoch": 3.1145303509357634,
      "grad_norm": 0.40175488591194153,
      "learning_rate": 3.795786851867715e-06,
      "loss": 0.0099,
      "step": 1903140
    },
    {
      "epoch": 3.1145630813744165,
      "grad_norm": 0.17346608638763428,
      "learning_rate": 3.7957209596541977e-06,
      "loss": 0.011,
      "step": 1903160
    },
    {
      "epoch": 3.11459581181307,
      "grad_norm": 0.3929625451564789,
      "learning_rate": 3.7956550674406804e-06,
      "loss": 0.0121,
      "step": 1903180
    },
    {
      "epoch": 3.1146285422517233,
      "grad_norm": 0.5021842122077942,
      "learning_rate": 3.795589175227164e-06,
      "loss": 0.0111,
      "step": 1903200
    },
    {
      "epoch": 3.1146612726903764,
      "grad_norm": 0.35568690299987793,
      "learning_rate": 3.7955232830136467e-06,
      "loss": 0.0083,
      "step": 1903220
    },
    {
      "epoch": 3.11469400312903,
      "grad_norm": 0.3774939179420471,
      "learning_rate": 3.7954573908001295e-06,
      "loss": 0.0116,
      "step": 1903240
    },
    {
      "epoch": 3.114726733567683,
      "grad_norm": 0.18623638153076172,
      "learning_rate": 3.7953914985866126e-06,
      "loss": 0.0087,
      "step": 1903260
    },
    {
      "epoch": 3.1147594640063367,
      "grad_norm": 0.10528810322284698,
      "learning_rate": 3.7953256063730954e-06,
      "loss": 0.0079,
      "step": 1903280
    },
    {
      "epoch": 3.11479219444499,
      "grad_norm": 0.4968266487121582,
      "learning_rate": 3.795259714159578e-06,
      "loss": 0.0101,
      "step": 1903300
    },
    {
      "epoch": 3.1148249248836435,
      "grad_norm": 0.12504471838474274,
      "learning_rate": 3.795193821946061e-06,
      "loss": 0.0091,
      "step": 1903320
    },
    {
      "epoch": 3.1148576553222966,
      "grad_norm": 0.09942659735679626,
      "learning_rate": 3.795127929732544e-06,
      "loss": 0.0098,
      "step": 1903340
    },
    {
      "epoch": 3.11489038576095,
      "grad_norm": 0.24205921590328217,
      "learning_rate": 3.7950620375190268e-06,
      "loss": 0.0213,
      "step": 1903360
    },
    {
      "epoch": 3.1149231161996034,
      "grad_norm": 0.4913538694381714,
      "learning_rate": 3.7949961453055095e-06,
      "loss": 0.0146,
      "step": 1903380
    },
    {
      "epoch": 3.1149558466382565,
      "grad_norm": 0.21577470004558563,
      "learning_rate": 3.7949302530919923e-06,
      "loss": 0.0102,
      "step": 1903400
    },
    {
      "epoch": 3.11498857707691,
      "grad_norm": 0.05241864174604416,
      "learning_rate": 3.7948643608784754e-06,
      "loss": 0.015,
      "step": 1903420
    },
    {
      "epoch": 3.1150213075155633,
      "grad_norm": 0.25474226474761963,
      "learning_rate": 3.794798468664958e-06,
      "loss": 0.0126,
      "step": 1903440
    },
    {
      "epoch": 3.115054037954217,
      "grad_norm": 0.28541550040245056,
      "learning_rate": 3.794732576451441e-06,
      "loss": 0.0101,
      "step": 1903460
    },
    {
      "epoch": 3.11508676839287,
      "grad_norm": 0.5246005058288574,
      "learning_rate": 3.7946666842379236e-06,
      "loss": 0.0104,
      "step": 1903480
    },
    {
      "epoch": 3.115119498831523,
      "grad_norm": 0.19849315285682678,
      "learning_rate": 3.7946007920244064e-06,
      "loss": 0.0139,
      "step": 1903500
    },
    {
      "epoch": 3.1151522292701768,
      "grad_norm": 0.05286276340484619,
      "learning_rate": 3.7945348998108895e-06,
      "loss": 0.01,
      "step": 1903520
    },
    {
      "epoch": 3.11518495970883,
      "grad_norm": 0.24465574324131012,
      "learning_rate": 3.7944690075973727e-06,
      "loss": 0.0118,
      "step": 1903540
    },
    {
      "epoch": 3.1152176901474835,
      "grad_norm": 0.3016040623188019,
      "learning_rate": 3.7944031153838554e-06,
      "loss": 0.0106,
      "step": 1903560
    },
    {
      "epoch": 3.1152504205861367,
      "grad_norm": 0.19137220084667206,
      "learning_rate": 3.7943372231703386e-06,
      "loss": 0.0121,
      "step": 1903580
    },
    {
      "epoch": 3.11528315102479,
      "grad_norm": 0.4354611039161682,
      "learning_rate": 3.7942713309568213e-06,
      "loss": 0.0096,
      "step": 1903600
    },
    {
      "epoch": 3.1153158814634434,
      "grad_norm": 0.27425533533096313,
      "learning_rate": 3.794205438743304e-06,
      "loss": 0.0112,
      "step": 1903620
    },
    {
      "epoch": 3.1153486119020966,
      "grad_norm": 0.3085033893585205,
      "learning_rate": 3.794139546529787e-06,
      "loss": 0.0124,
      "step": 1903640
    },
    {
      "epoch": 3.11538134234075,
      "grad_norm": 0.047541722655296326,
      "learning_rate": 3.79407365431627e-06,
      "loss": 0.0115,
      "step": 1903660
    },
    {
      "epoch": 3.1154140727794033,
      "grad_norm": 0.6352429389953613,
      "learning_rate": 3.7940077621027527e-06,
      "loss": 0.013,
      "step": 1903680
    },
    {
      "epoch": 3.115446803218057,
      "grad_norm": 0.43398427963256836,
      "learning_rate": 3.7939418698892355e-06,
      "loss": 0.0117,
      "step": 1903700
    },
    {
      "epoch": 3.11547953365671,
      "grad_norm": 0.21818400919437408,
      "learning_rate": 3.793875977675718e-06,
      "loss": 0.0082,
      "step": 1903720
    },
    {
      "epoch": 3.115512264095363,
      "grad_norm": 0.14758674800395966,
      "learning_rate": 3.7938100854622014e-06,
      "loss": 0.013,
      "step": 1903740
    },
    {
      "epoch": 3.115544994534017,
      "grad_norm": 0.26923972368240356,
      "learning_rate": 3.793744193248684e-06,
      "loss": 0.0106,
      "step": 1903760
    },
    {
      "epoch": 3.11557772497267,
      "grad_norm": 0.4940553605556488,
      "learning_rate": 3.793678301035167e-06,
      "loss": 0.0096,
      "step": 1903780
    },
    {
      "epoch": 3.1156104554113235,
      "grad_norm": 0.1949368417263031,
      "learning_rate": 3.7936124088216496e-06,
      "loss": 0.0098,
      "step": 1903800
    },
    {
      "epoch": 3.1156431858499767,
      "grad_norm": 0.318705677986145,
      "learning_rate": 3.7935465166081328e-06,
      "loss": 0.0127,
      "step": 1903820
    },
    {
      "epoch": 3.1156759162886303,
      "grad_norm": 0.19809184968471527,
      "learning_rate": 3.7934806243946155e-06,
      "loss": 0.0168,
      "step": 1903840
    },
    {
      "epoch": 3.1157086467272834,
      "grad_norm": 0.12892358005046844,
      "learning_rate": 3.7934147321810982e-06,
      "loss": 0.0079,
      "step": 1903860
    },
    {
      "epoch": 3.1157413771659366,
      "grad_norm": 0.34380680322647095,
      "learning_rate": 3.793348839967581e-06,
      "loss": 0.004,
      "step": 1903880
    },
    {
      "epoch": 3.11577410760459,
      "grad_norm": 0.4961600601673126,
      "learning_rate": 3.7932829477540646e-06,
      "loss": 0.0121,
      "step": 1903900
    },
    {
      "epoch": 3.1158068380432433,
      "grad_norm": 0.24910308420658112,
      "learning_rate": 3.7932170555405473e-06,
      "loss": 0.0145,
      "step": 1903920
    },
    {
      "epoch": 3.115839568481897,
      "grad_norm": 0.07179014384746552,
      "learning_rate": 3.79315116332703e-06,
      "loss": 0.0109,
      "step": 1903940
    },
    {
      "epoch": 3.11587229892055,
      "grad_norm": 0.7052276134490967,
      "learning_rate": 3.793085271113513e-06,
      "loss": 0.0102,
      "step": 1903960
    },
    {
      "epoch": 3.1159050293592037,
      "grad_norm": 0.09160680323839188,
      "learning_rate": 3.793019378899996e-06,
      "loss": 0.0113,
      "step": 1903980
    },
    {
      "epoch": 3.115937759797857,
      "grad_norm": 0.20028150081634521,
      "learning_rate": 3.7929534866864787e-06,
      "loss": 0.0145,
      "step": 1904000
    },
    {
      "epoch": 3.11597049023651,
      "grad_norm": 0.5475139021873474,
      "learning_rate": 3.7928875944729614e-06,
      "loss": 0.0105,
      "step": 1904020
    },
    {
      "epoch": 3.1160032206751636,
      "grad_norm": 0.3713502585887909,
      "learning_rate": 3.792821702259444e-06,
      "loss": 0.0142,
      "step": 1904040
    },
    {
      "epoch": 3.1160359511138167,
      "grad_norm": 0.3301991820335388,
      "learning_rate": 3.7927558100459273e-06,
      "loss": 0.0084,
      "step": 1904060
    },
    {
      "epoch": 3.1160686815524703,
      "grad_norm": 0.8365678787231445,
      "learning_rate": 3.79268991783241e-06,
      "loss": 0.0109,
      "step": 1904080
    },
    {
      "epoch": 3.1161014119911234,
      "grad_norm": 0.15966947376728058,
      "learning_rate": 3.792624025618893e-06,
      "loss": 0.0121,
      "step": 1904100
    },
    {
      "epoch": 3.116134142429777,
      "grad_norm": 0.32632142305374146,
      "learning_rate": 3.7925581334053755e-06,
      "loss": 0.0147,
      "step": 1904120
    },
    {
      "epoch": 3.11616687286843,
      "grad_norm": 0.35121721029281616,
      "learning_rate": 3.7924922411918587e-06,
      "loss": 0.0134,
      "step": 1904140
    },
    {
      "epoch": 3.1161996033070833,
      "grad_norm": 0.4013631343841553,
      "learning_rate": 3.7924263489783414e-06,
      "loss": 0.0177,
      "step": 1904160
    },
    {
      "epoch": 3.116232333745737,
      "grad_norm": 0.17626851797103882,
      "learning_rate": 3.792360456764824e-06,
      "loss": 0.0106,
      "step": 1904180
    },
    {
      "epoch": 3.11626506418439,
      "grad_norm": 0.30415791273117065,
      "learning_rate": 3.792294564551307e-06,
      "loss": 0.0088,
      "step": 1904200
    },
    {
      "epoch": 3.1162977946230437,
      "grad_norm": 0.3225734829902649,
      "learning_rate": 3.79222867233779e-06,
      "loss": 0.0149,
      "step": 1904220
    },
    {
      "epoch": 3.116330525061697,
      "grad_norm": 0.834479808807373,
      "learning_rate": 3.7921627801242732e-06,
      "loss": 0.0126,
      "step": 1904240
    },
    {
      "epoch": 3.1163632555003504,
      "grad_norm": 0.6989322900772095,
      "learning_rate": 3.792096887910756e-06,
      "loss": 0.0092,
      "step": 1904260
    },
    {
      "epoch": 3.1163959859390036,
      "grad_norm": 0.24080342054367065,
      "learning_rate": 3.792030995697239e-06,
      "loss": 0.0085,
      "step": 1904280
    },
    {
      "epoch": 3.1164287163776567,
      "grad_norm": 0.4909026324748993,
      "learning_rate": 3.791965103483722e-06,
      "loss": 0.0111,
      "step": 1904300
    },
    {
      "epoch": 3.1164614468163103,
      "grad_norm": 0.29539111256599426,
      "learning_rate": 3.7918992112702046e-06,
      "loss": 0.0091,
      "step": 1904320
    },
    {
      "epoch": 3.1164941772549635,
      "grad_norm": 0.440378338098526,
      "learning_rate": 3.7918333190566874e-06,
      "loss": 0.016,
      "step": 1904340
    },
    {
      "epoch": 3.116526907693617,
      "grad_norm": 0.3935238718986511,
      "learning_rate": 3.7917674268431705e-06,
      "loss": 0.0087,
      "step": 1904360
    },
    {
      "epoch": 3.11655963813227,
      "grad_norm": 0.1697826385498047,
      "learning_rate": 3.7917015346296533e-06,
      "loss": 0.0111,
      "step": 1904380
    },
    {
      "epoch": 3.116592368570924,
      "grad_norm": 0.11146792769432068,
      "learning_rate": 3.791635642416136e-06,
      "loss": 0.0101,
      "step": 1904400
    },
    {
      "epoch": 3.116625099009577,
      "grad_norm": 0.4893966019153595,
      "learning_rate": 3.7915697502026188e-06,
      "loss": 0.0124,
      "step": 1904420
    },
    {
      "epoch": 3.11665782944823,
      "grad_norm": 0.30109602212905884,
      "learning_rate": 3.7915038579891015e-06,
      "loss": 0.012,
      "step": 1904440
    },
    {
      "epoch": 3.1166905598868837,
      "grad_norm": 0.17367321252822876,
      "learning_rate": 3.7914379657755847e-06,
      "loss": 0.0121,
      "step": 1904460
    },
    {
      "epoch": 3.116723290325537,
      "grad_norm": 0.298942506313324,
      "learning_rate": 3.7913720735620674e-06,
      "loss": 0.0146,
      "step": 1904480
    },
    {
      "epoch": 3.1167560207641904,
      "grad_norm": 0.5359691381454468,
      "learning_rate": 3.79130618134855e-06,
      "loss": 0.0176,
      "step": 1904500
    },
    {
      "epoch": 3.1167887512028436,
      "grad_norm": 0.4867396056652069,
      "learning_rate": 3.791240289135033e-06,
      "loss": 0.0127,
      "step": 1904520
    },
    {
      "epoch": 3.1168214816414967,
      "grad_norm": 0.25077909231185913,
      "learning_rate": 3.791174396921516e-06,
      "loss": 0.0134,
      "step": 1904540
    },
    {
      "epoch": 3.1168542120801503,
      "grad_norm": 0.15312393009662628,
      "learning_rate": 3.7911085047079988e-06,
      "loss": 0.0137,
      "step": 1904560
    },
    {
      "epoch": 3.1168869425188035,
      "grad_norm": 0.3673524260520935,
      "learning_rate": 3.7910426124944815e-06,
      "loss": 0.013,
      "step": 1904580
    },
    {
      "epoch": 3.116919672957457,
      "grad_norm": 0.29653608798980713,
      "learning_rate": 3.790976720280965e-06,
      "loss": 0.0144,
      "step": 1904600
    },
    {
      "epoch": 3.1169524033961102,
      "grad_norm": 0.21840022504329681,
      "learning_rate": 3.790910828067448e-06,
      "loss": 0.0087,
      "step": 1904620
    },
    {
      "epoch": 3.116985133834764,
      "grad_norm": 0.18738609552383423,
      "learning_rate": 3.7908449358539306e-06,
      "loss": 0.0093,
      "step": 1904640
    },
    {
      "epoch": 3.117017864273417,
      "grad_norm": 0.41266095638275146,
      "learning_rate": 3.7907790436404133e-06,
      "loss": 0.0109,
      "step": 1904660
    },
    {
      "epoch": 3.11705059471207,
      "grad_norm": 0.44285497069358826,
      "learning_rate": 3.7907131514268965e-06,
      "loss": 0.0116,
      "step": 1904680
    },
    {
      "epoch": 3.1170833251507237,
      "grad_norm": 0.31356993317604065,
      "learning_rate": 3.7906472592133792e-06,
      "loss": 0.0112,
      "step": 1904700
    },
    {
      "epoch": 3.117116055589377,
      "grad_norm": 0.28310707211494446,
      "learning_rate": 3.790581366999862e-06,
      "loss": 0.0157,
      "step": 1904720
    },
    {
      "epoch": 3.1171487860280305,
      "grad_norm": 0.24947351217269897,
      "learning_rate": 3.7905154747863447e-06,
      "loss": 0.0115,
      "step": 1904740
    },
    {
      "epoch": 3.1171815164666836,
      "grad_norm": 0.2700260579586029,
      "learning_rate": 3.790449582572828e-06,
      "loss": 0.0108,
      "step": 1904760
    },
    {
      "epoch": 3.117214246905337,
      "grad_norm": 0.4402683973312378,
      "learning_rate": 3.7903836903593106e-06,
      "loss": 0.0117,
      "step": 1904780
    },
    {
      "epoch": 3.1172469773439904,
      "grad_norm": 0.24409520626068115,
      "learning_rate": 3.7903177981457934e-06,
      "loss": 0.0097,
      "step": 1904800
    },
    {
      "epoch": 3.1172797077826435,
      "grad_norm": 0.27005505561828613,
      "learning_rate": 3.790251905932276e-06,
      "loss": 0.0106,
      "step": 1904820
    },
    {
      "epoch": 3.117312438221297,
      "grad_norm": 0.12214459478855133,
      "learning_rate": 3.7901860137187593e-06,
      "loss": 0.0088,
      "step": 1904840
    },
    {
      "epoch": 3.1173451686599503,
      "grad_norm": 0.1975659281015396,
      "learning_rate": 3.790120121505242e-06,
      "loss": 0.0125,
      "step": 1904860
    },
    {
      "epoch": 3.117377899098604,
      "grad_norm": 0.10355681926012039,
      "learning_rate": 3.7900542292917247e-06,
      "loss": 0.0111,
      "step": 1904880
    },
    {
      "epoch": 3.117410629537257,
      "grad_norm": 0.23249122500419617,
      "learning_rate": 3.7899883370782075e-06,
      "loss": 0.0131,
      "step": 1904900
    },
    {
      "epoch": 3.1174433599759106,
      "grad_norm": 0.2557607591152191,
      "learning_rate": 3.7899224448646902e-06,
      "loss": 0.0076,
      "step": 1904920
    },
    {
      "epoch": 3.1174760904145637,
      "grad_norm": 0.09091305732727051,
      "learning_rate": 3.7898565526511734e-06,
      "loss": 0.0086,
      "step": 1904940
    },
    {
      "epoch": 3.117508820853217,
      "grad_norm": 0.5795274972915649,
      "learning_rate": 3.7897906604376565e-06,
      "loss": 0.0079,
      "step": 1904960
    },
    {
      "epoch": 3.1175415512918705,
      "grad_norm": 0.39038002490997314,
      "learning_rate": 3.7897247682241393e-06,
      "loss": 0.0086,
      "step": 1904980
    },
    {
      "epoch": 3.1175742817305236,
      "grad_norm": 0.17511014640331268,
      "learning_rate": 3.7896588760106224e-06,
      "loss": 0.0121,
      "step": 1905000
    },
    {
      "epoch": 3.1176070121691772,
      "grad_norm": 0.33032095432281494,
      "learning_rate": 3.789592983797105e-06,
      "loss": 0.0106,
      "step": 1905020
    },
    {
      "epoch": 3.1176397426078304,
      "grad_norm": 0.2505805492401123,
      "learning_rate": 3.789527091583588e-06,
      "loss": 0.0126,
      "step": 1905040
    },
    {
      "epoch": 3.117672473046484,
      "grad_norm": 0.16242104768753052,
      "learning_rate": 3.7894611993700707e-06,
      "loss": 0.0124,
      "step": 1905060
    },
    {
      "epoch": 3.117705203485137,
      "grad_norm": 0.10585235059261322,
      "learning_rate": 3.789395307156554e-06,
      "loss": 0.0078,
      "step": 1905080
    },
    {
      "epoch": 3.1177379339237903,
      "grad_norm": 0.17888271808624268,
      "learning_rate": 3.7893294149430366e-06,
      "loss": 0.0085,
      "step": 1905100
    },
    {
      "epoch": 3.117770664362444,
      "grad_norm": 0.5900053977966309,
      "learning_rate": 3.7892635227295193e-06,
      "loss": 0.0122,
      "step": 1905120
    },
    {
      "epoch": 3.117803394801097,
      "grad_norm": 0.450818806886673,
      "learning_rate": 3.789197630516002e-06,
      "loss": 0.0135,
      "step": 1905140
    },
    {
      "epoch": 3.1178361252397506,
      "grad_norm": 0.298380970954895,
      "learning_rate": 3.789131738302485e-06,
      "loss": 0.012,
      "step": 1905160
    },
    {
      "epoch": 3.1178688556784038,
      "grad_norm": 0.1923784613609314,
      "learning_rate": 3.789065846088968e-06,
      "loss": 0.0115,
      "step": 1905180
    },
    {
      "epoch": 3.117901586117057,
      "grad_norm": 0.2785048186779022,
      "learning_rate": 3.7889999538754507e-06,
      "loss": 0.011,
      "step": 1905200
    },
    {
      "epoch": 3.1179343165557105,
      "grad_norm": 0.1852455735206604,
      "learning_rate": 3.7889340616619334e-06,
      "loss": 0.016,
      "step": 1905220
    },
    {
      "epoch": 3.1179670469943637,
      "grad_norm": 0.05799191817641258,
      "learning_rate": 3.7888681694484166e-06,
      "loss": 0.0116,
      "step": 1905240
    },
    {
      "epoch": 3.1179997774330173,
      "grad_norm": 0.2664200961589813,
      "learning_rate": 3.7888022772348993e-06,
      "loss": 0.0124,
      "step": 1905260
    },
    {
      "epoch": 3.1180325078716704,
      "grad_norm": 0.07130815833806992,
      "learning_rate": 3.788736385021382e-06,
      "loss": 0.0109,
      "step": 1905280
    },
    {
      "epoch": 3.118065238310324,
      "grad_norm": 0.29733142256736755,
      "learning_rate": 3.7886704928078657e-06,
      "loss": 0.0113,
      "step": 1905300
    },
    {
      "epoch": 3.118097968748977,
      "grad_norm": 0.35590389370918274,
      "learning_rate": 3.7886046005943484e-06,
      "loss": 0.0072,
      "step": 1905320
    },
    {
      "epoch": 3.1181306991876303,
      "grad_norm": 0.22433798015117645,
      "learning_rate": 3.788538708380831e-06,
      "loss": 0.018,
      "step": 1905340
    },
    {
      "epoch": 3.118163429626284,
      "grad_norm": 1.1141477823257446,
      "learning_rate": 3.788472816167314e-06,
      "loss": 0.0149,
      "step": 1905360
    },
    {
      "epoch": 3.118196160064937,
      "grad_norm": 0.546025812625885,
      "learning_rate": 3.788406923953797e-06,
      "loss": 0.0122,
      "step": 1905380
    },
    {
      "epoch": 3.1182288905035906,
      "grad_norm": 0.20729707181453705,
      "learning_rate": 3.7883410317402798e-06,
      "loss": 0.0115,
      "step": 1905400
    },
    {
      "epoch": 3.118261620942244,
      "grad_norm": 0.0909373015165329,
      "learning_rate": 3.7882751395267625e-06,
      "loss": 0.0144,
      "step": 1905420
    },
    {
      "epoch": 3.1182943513808974,
      "grad_norm": 0.20426757633686066,
      "learning_rate": 3.7882092473132453e-06,
      "loss": 0.0095,
      "step": 1905440
    },
    {
      "epoch": 3.1183270818195505,
      "grad_norm": 0.059824138879776,
      "learning_rate": 3.788143355099728e-06,
      "loss": 0.0104,
      "step": 1905460
    },
    {
      "epoch": 3.1183598122582037,
      "grad_norm": 0.5059290528297424,
      "learning_rate": 3.788077462886211e-06,
      "loss": 0.0171,
      "step": 1905480
    },
    {
      "epoch": 3.1183925426968573,
      "grad_norm": 0.24355410039424896,
      "learning_rate": 3.788011570672694e-06,
      "loss": 0.0126,
      "step": 1905500
    },
    {
      "epoch": 3.1184252731355104,
      "grad_norm": 0.533976674079895,
      "learning_rate": 3.7879456784591766e-06,
      "loss": 0.0119,
      "step": 1905520
    },
    {
      "epoch": 3.118458003574164,
      "grad_norm": 0.17646458745002747,
      "learning_rate": 3.7878797862456594e-06,
      "loss": 0.0107,
      "step": 1905540
    },
    {
      "epoch": 3.118490734012817,
      "grad_norm": 0.07927995175123215,
      "learning_rate": 3.7878138940321425e-06,
      "loss": 0.0121,
      "step": 1905560
    },
    {
      "epoch": 3.1185234644514708,
      "grad_norm": 0.27153876423835754,
      "learning_rate": 3.7877480018186253e-06,
      "loss": 0.0081,
      "step": 1905580
    },
    {
      "epoch": 3.118556194890124,
      "grad_norm": 0.4159347116947174,
      "learning_rate": 3.787682109605108e-06,
      "loss": 0.0123,
      "step": 1905600
    },
    {
      "epoch": 3.118588925328777,
      "grad_norm": 0.8424372673034668,
      "learning_rate": 3.7876162173915908e-06,
      "loss": 0.0124,
      "step": 1905620
    },
    {
      "epoch": 3.1186216557674307,
      "grad_norm": 0.17092998325824738,
      "learning_rate": 3.787550325178074e-06,
      "loss": 0.008,
      "step": 1905640
    },
    {
      "epoch": 3.118654386206084,
      "grad_norm": 0.4402710795402527,
      "learning_rate": 3.787484432964557e-06,
      "loss": 0.0138,
      "step": 1905660
    },
    {
      "epoch": 3.1186871166447374,
      "grad_norm": 0.12073011696338654,
      "learning_rate": 3.78741854075104e-06,
      "loss": 0.0094,
      "step": 1905680
    },
    {
      "epoch": 3.1187198470833906,
      "grad_norm": 0.12396042793989182,
      "learning_rate": 3.787352648537523e-06,
      "loss": 0.0142,
      "step": 1905700
    },
    {
      "epoch": 3.118752577522044,
      "grad_norm": 0.20333944261074066,
      "learning_rate": 3.7872867563240057e-06,
      "loss": 0.0121,
      "step": 1905720
    },
    {
      "epoch": 3.1187853079606973,
      "grad_norm": 0.3316054046154022,
      "learning_rate": 3.7872208641104885e-06,
      "loss": 0.0164,
      "step": 1905740
    },
    {
      "epoch": 3.1188180383993505,
      "grad_norm": 0.1056697741150856,
      "learning_rate": 3.787154971896971e-06,
      "loss": 0.0074,
      "step": 1905760
    },
    {
      "epoch": 3.118850768838004,
      "grad_norm": 0.21975773572921753,
      "learning_rate": 3.7870890796834544e-06,
      "loss": 0.0118,
      "step": 1905780
    },
    {
      "epoch": 3.118883499276657,
      "grad_norm": 0.43373188376426697,
      "learning_rate": 3.787023187469937e-06,
      "loss": 0.0101,
      "step": 1905800
    },
    {
      "epoch": 3.118916229715311,
      "grad_norm": 0.10322914272546768,
      "learning_rate": 3.78695729525642e-06,
      "loss": 0.0117,
      "step": 1905820
    },
    {
      "epoch": 3.118948960153964,
      "grad_norm": 0.1627970188856125,
      "learning_rate": 3.7868914030429026e-06,
      "loss": 0.0115,
      "step": 1905840
    },
    {
      "epoch": 3.1189816905926175,
      "grad_norm": 0.8315900564193726,
      "learning_rate": 3.7868255108293853e-06,
      "loss": 0.0148,
      "step": 1905860
    },
    {
      "epoch": 3.1190144210312707,
      "grad_norm": 0.1307382583618164,
      "learning_rate": 3.7867596186158685e-06,
      "loss": 0.0163,
      "step": 1905880
    },
    {
      "epoch": 3.119047151469924,
      "grad_norm": 0.08589635789394379,
      "learning_rate": 3.7866937264023512e-06,
      "loss": 0.0117,
      "step": 1905900
    },
    {
      "epoch": 3.1190798819085774,
      "grad_norm": 0.2707758843898773,
      "learning_rate": 3.786627834188834e-06,
      "loss": 0.0068,
      "step": 1905920
    },
    {
      "epoch": 3.1191126123472306,
      "grad_norm": 0.06723736226558685,
      "learning_rate": 3.7865619419753167e-06,
      "loss": 0.0106,
      "step": 1905940
    },
    {
      "epoch": 3.119145342785884,
      "grad_norm": 0.1622830331325531,
      "learning_rate": 3.7864960497618e-06,
      "loss": 0.0122,
      "step": 1905960
    },
    {
      "epoch": 3.1191780732245373,
      "grad_norm": 0.18926116824150085,
      "learning_rate": 3.7864301575482826e-06,
      "loss": 0.0079,
      "step": 1905980
    },
    {
      "epoch": 3.119210803663191,
      "grad_norm": 0.7743198871612549,
      "learning_rate": 3.7863642653347658e-06,
      "loss": 0.01,
      "step": 1906000
    },
    {
      "epoch": 3.119243534101844,
      "grad_norm": 0.3492136597633362,
      "learning_rate": 3.786298373121249e-06,
      "loss": 0.0081,
      "step": 1906020
    },
    {
      "epoch": 3.1192762645404972,
      "grad_norm": 0.3382273316383362,
      "learning_rate": 3.7862324809077317e-06,
      "loss": 0.0186,
      "step": 1906040
    },
    {
      "epoch": 3.119308994979151,
      "grad_norm": 0.1282021701335907,
      "learning_rate": 3.7861665886942144e-06,
      "loss": 0.0139,
      "step": 1906060
    },
    {
      "epoch": 3.119341725417804,
      "grad_norm": 0.3038434386253357,
      "learning_rate": 3.786100696480697e-06,
      "loss": 0.0131,
      "step": 1906080
    },
    {
      "epoch": 3.1193744558564576,
      "grad_norm": 0.3790197968482971,
      "learning_rate": 3.7860348042671803e-06,
      "loss": 0.0122,
      "step": 1906100
    },
    {
      "epoch": 3.1194071862951107,
      "grad_norm": 0.29665377736091614,
      "learning_rate": 3.785968912053663e-06,
      "loss": 0.0167,
      "step": 1906120
    },
    {
      "epoch": 3.119439916733764,
      "grad_norm": 0.1045699417591095,
      "learning_rate": 3.785903019840146e-06,
      "loss": 0.0086,
      "step": 1906140
    },
    {
      "epoch": 3.1194726471724175,
      "grad_norm": 0.7562993764877319,
      "learning_rate": 3.7858371276266285e-06,
      "loss": 0.0124,
      "step": 1906160
    },
    {
      "epoch": 3.1195053776110706,
      "grad_norm": 0.35857710242271423,
      "learning_rate": 3.7857712354131117e-06,
      "loss": 0.0112,
      "step": 1906180
    },
    {
      "epoch": 3.119538108049724,
      "grad_norm": 0.13656266033649445,
      "learning_rate": 3.7857053431995945e-06,
      "loss": 0.013,
      "step": 1906200
    },
    {
      "epoch": 3.1195708384883774,
      "grad_norm": 0.3866090476512909,
      "learning_rate": 3.785639450986077e-06,
      "loss": 0.0103,
      "step": 1906220
    },
    {
      "epoch": 3.119603568927031,
      "grad_norm": 0.1779536008834839,
      "learning_rate": 3.78557355877256e-06,
      "loss": 0.0182,
      "step": 1906240
    },
    {
      "epoch": 3.119636299365684,
      "grad_norm": 0.5870111584663391,
      "learning_rate": 3.785507666559043e-06,
      "loss": 0.0129,
      "step": 1906260
    },
    {
      "epoch": 3.1196690298043372,
      "grad_norm": 0.20949266850948334,
      "learning_rate": 3.785441774345526e-06,
      "loss": 0.0114,
      "step": 1906280
    },
    {
      "epoch": 3.119701760242991,
      "grad_norm": 0.14322364330291748,
      "learning_rate": 3.7853758821320086e-06,
      "loss": 0.0128,
      "step": 1906300
    },
    {
      "epoch": 3.119734490681644,
      "grad_norm": 0.33314669132232666,
      "learning_rate": 3.7853099899184913e-06,
      "loss": 0.0108,
      "step": 1906320
    },
    {
      "epoch": 3.1197672211202976,
      "grad_norm": 0.18719717860221863,
      "learning_rate": 3.785244097704974e-06,
      "loss": 0.0116,
      "step": 1906340
    },
    {
      "epoch": 3.1197999515589507,
      "grad_norm": 0.5602101683616638,
      "learning_rate": 3.7851782054914576e-06,
      "loss": 0.0156,
      "step": 1906360
    },
    {
      "epoch": 3.1198326819976043,
      "grad_norm": 0.4956776797771454,
      "learning_rate": 3.7851123132779404e-06,
      "loss": 0.01,
      "step": 1906380
    },
    {
      "epoch": 3.1198654124362575,
      "grad_norm": 0.07368577271699905,
      "learning_rate": 3.785046421064423e-06,
      "loss": 0.0099,
      "step": 1906400
    },
    {
      "epoch": 3.1198981428749106,
      "grad_norm": 0.19214968383312225,
      "learning_rate": 3.7849805288509063e-06,
      "loss": 0.0087,
      "step": 1906420
    },
    {
      "epoch": 3.1199308733135642,
      "grad_norm": 0.19405391812324524,
      "learning_rate": 3.784914636637389e-06,
      "loss": 0.0136,
      "step": 1906440
    },
    {
      "epoch": 3.1199636037522174,
      "grad_norm": 0.17830589413642883,
      "learning_rate": 3.7848487444238718e-06,
      "loss": 0.0136,
      "step": 1906460
    },
    {
      "epoch": 3.119996334190871,
      "grad_norm": 0.09040916711091995,
      "learning_rate": 3.7847828522103545e-06,
      "loss": 0.0103,
      "step": 1906480
    },
    {
      "epoch": 3.120029064629524,
      "grad_norm": 0.09354160726070404,
      "learning_rate": 3.7847169599968377e-06,
      "loss": 0.0138,
      "step": 1906500
    },
    {
      "epoch": 3.1200617950681777,
      "grad_norm": 0.24589966237545013,
      "learning_rate": 3.7846510677833204e-06,
      "loss": 0.0217,
      "step": 1906520
    },
    {
      "epoch": 3.120094525506831,
      "grad_norm": 0.10239367932081223,
      "learning_rate": 3.784585175569803e-06,
      "loss": 0.0121,
      "step": 1906540
    },
    {
      "epoch": 3.120127255945484,
      "grad_norm": 0.3327113389968872,
      "learning_rate": 3.784519283356286e-06,
      "loss": 0.0103,
      "step": 1906560
    },
    {
      "epoch": 3.1201599863841376,
      "grad_norm": 0.9464364051818848,
      "learning_rate": 3.784453391142769e-06,
      "loss": 0.0151,
      "step": 1906580
    },
    {
      "epoch": 3.1201927168227908,
      "grad_norm": 0.5468369126319885,
      "learning_rate": 3.7843874989292518e-06,
      "loss": 0.0105,
      "step": 1906600
    },
    {
      "epoch": 3.1202254472614444,
      "grad_norm": 0.09738080948591232,
      "learning_rate": 3.7843216067157345e-06,
      "loss": 0.0094,
      "step": 1906620
    },
    {
      "epoch": 3.1202581777000975,
      "grad_norm": 0.2938087284564972,
      "learning_rate": 3.7842557145022173e-06,
      "loss": 0.0099,
      "step": 1906640
    },
    {
      "epoch": 3.1202909081387507,
      "grad_norm": 0.2417316436767578,
      "learning_rate": 3.7841898222887004e-06,
      "loss": 0.0109,
      "step": 1906660
    },
    {
      "epoch": 3.1203236385774042,
      "grad_norm": 0.13608868420124054,
      "learning_rate": 3.784123930075183e-06,
      "loss": 0.0098,
      "step": 1906680
    },
    {
      "epoch": 3.1203563690160574,
      "grad_norm": 0.18046674132347107,
      "learning_rate": 3.784058037861666e-06,
      "loss": 0.0125,
      "step": 1906700
    },
    {
      "epoch": 3.120389099454711,
      "grad_norm": 0.4213161766529083,
      "learning_rate": 3.7839921456481495e-06,
      "loss": 0.0123,
      "step": 1906720
    },
    {
      "epoch": 3.120421829893364,
      "grad_norm": 0.25115886330604553,
      "learning_rate": 3.7839262534346322e-06,
      "loss": 0.0093,
      "step": 1906740
    },
    {
      "epoch": 3.1204545603320177,
      "grad_norm": 0.6098323464393616,
      "learning_rate": 3.783860361221115e-06,
      "loss": 0.0197,
      "step": 1906760
    },
    {
      "epoch": 3.120487290770671,
      "grad_norm": 0.2296973615884781,
      "learning_rate": 3.7837944690075977e-06,
      "loss": 0.0092,
      "step": 1906780
    },
    {
      "epoch": 3.120520021209324,
      "grad_norm": 0.2566972076892853,
      "learning_rate": 3.783728576794081e-06,
      "loss": 0.0082,
      "step": 1906800
    },
    {
      "epoch": 3.1205527516479776,
      "grad_norm": 0.10289932787418365,
      "learning_rate": 3.7836626845805636e-06,
      "loss": 0.0122,
      "step": 1906820
    },
    {
      "epoch": 3.120585482086631,
      "grad_norm": 1.882304072380066,
      "learning_rate": 3.7835967923670464e-06,
      "loss": 0.0133,
      "step": 1906840
    },
    {
      "epoch": 3.1206182125252844,
      "grad_norm": 0.27877870202064514,
      "learning_rate": 3.783530900153529e-06,
      "loss": 0.0148,
      "step": 1906860
    },
    {
      "epoch": 3.1206509429639375,
      "grad_norm": 0.1038113385438919,
      "learning_rate": 3.783465007940012e-06,
      "loss": 0.0108,
      "step": 1906880
    },
    {
      "epoch": 3.120683673402591,
      "grad_norm": 0.23660482466220856,
      "learning_rate": 3.783399115726495e-06,
      "loss": 0.0098,
      "step": 1906900
    },
    {
      "epoch": 3.1207164038412443,
      "grad_norm": 0.1637500375509262,
      "learning_rate": 3.7833332235129777e-06,
      "loss": 0.0097,
      "step": 1906920
    },
    {
      "epoch": 3.1207491342798974,
      "grad_norm": 0.7697739005088806,
      "learning_rate": 3.7832673312994605e-06,
      "loss": 0.0127,
      "step": 1906940
    },
    {
      "epoch": 3.120781864718551,
      "grad_norm": 0.22731554508209229,
      "learning_rate": 3.7832014390859432e-06,
      "loss": 0.0139,
      "step": 1906960
    },
    {
      "epoch": 3.120814595157204,
      "grad_norm": 0.24423933029174805,
      "learning_rate": 3.7831355468724264e-06,
      "loss": 0.0125,
      "step": 1906980
    },
    {
      "epoch": 3.1208473255958578,
      "grad_norm": 0.24968023598194122,
      "learning_rate": 3.783069654658909e-06,
      "loss": 0.0124,
      "step": 1907000
    },
    {
      "epoch": 3.120880056034511,
      "grad_norm": 0.17437340319156647,
      "learning_rate": 3.783003762445392e-06,
      "loss": 0.0161,
      "step": 1907020
    },
    {
      "epoch": 3.1209127864731645,
      "grad_norm": 0.1699887067079544,
      "learning_rate": 3.7829378702318746e-06,
      "loss": 0.0124,
      "step": 1907040
    },
    {
      "epoch": 3.1209455169118177,
      "grad_norm": 0.7549902200698853,
      "learning_rate": 3.782871978018358e-06,
      "loss": 0.0106,
      "step": 1907060
    },
    {
      "epoch": 3.120978247350471,
      "grad_norm": 0.17183561623096466,
      "learning_rate": 3.782806085804841e-06,
      "loss": 0.0123,
      "step": 1907080
    },
    {
      "epoch": 3.1210109777891244,
      "grad_norm": 0.1304028481245041,
      "learning_rate": 3.7827401935913237e-06,
      "loss": 0.0104,
      "step": 1907100
    },
    {
      "epoch": 3.1210437082277775,
      "grad_norm": 0.24829554557800293,
      "learning_rate": 3.782674301377807e-06,
      "loss": 0.0144,
      "step": 1907120
    },
    {
      "epoch": 3.121076438666431,
      "grad_norm": 0.13126198947429657,
      "learning_rate": 3.7826084091642896e-06,
      "loss": 0.0096,
      "step": 1907140
    },
    {
      "epoch": 3.1211091691050843,
      "grad_norm": 0.25111040472984314,
      "learning_rate": 3.7825425169507723e-06,
      "loss": 0.0144,
      "step": 1907160
    },
    {
      "epoch": 3.121141899543738,
      "grad_norm": 0.5540726184844971,
      "learning_rate": 3.782476624737255e-06,
      "loss": 0.017,
      "step": 1907180
    },
    {
      "epoch": 3.121174629982391,
      "grad_norm": 0.12545245885849,
      "learning_rate": 3.7824107325237382e-06,
      "loss": 0.0115,
      "step": 1907200
    },
    {
      "epoch": 3.121207360421044,
      "grad_norm": 0.2812860310077667,
      "learning_rate": 3.782344840310221e-06,
      "loss": 0.0209,
      "step": 1907220
    },
    {
      "epoch": 3.121240090859698,
      "grad_norm": 0.05780864506959915,
      "learning_rate": 3.7822789480967037e-06,
      "loss": 0.0079,
      "step": 1907240
    },
    {
      "epoch": 3.121272821298351,
      "grad_norm": 0.17188110947608948,
      "learning_rate": 3.7822130558831864e-06,
      "loss": 0.0167,
      "step": 1907260
    },
    {
      "epoch": 3.1213055517370045,
      "grad_norm": 0.5003947615623474,
      "learning_rate": 3.7821471636696696e-06,
      "loss": 0.0098,
      "step": 1907280
    },
    {
      "epoch": 3.1213382821756577,
      "grad_norm": 0.09417201578617096,
      "learning_rate": 3.7820812714561523e-06,
      "loss": 0.0088,
      "step": 1907300
    },
    {
      "epoch": 3.1213710126143113,
      "grad_norm": 0.11795645952224731,
      "learning_rate": 3.782015379242635e-06,
      "loss": 0.0114,
      "step": 1907320
    },
    {
      "epoch": 3.1214037430529644,
      "grad_norm": 1.2462794780731201,
      "learning_rate": 3.781949487029118e-06,
      "loss": 0.0157,
      "step": 1907340
    },
    {
      "epoch": 3.1214364734916176,
      "grad_norm": 0.11030014604330063,
      "learning_rate": 3.7818835948156006e-06,
      "loss": 0.0106,
      "step": 1907360
    },
    {
      "epoch": 3.121469203930271,
      "grad_norm": 1.0041605234146118,
      "learning_rate": 3.7818177026020837e-06,
      "loss": 0.014,
      "step": 1907380
    },
    {
      "epoch": 3.1215019343689243,
      "grad_norm": 0.9320477843284607,
      "learning_rate": 3.7817518103885665e-06,
      "loss": 0.0097,
      "step": 1907400
    },
    {
      "epoch": 3.121534664807578,
      "grad_norm": 0.6264239549636841,
      "learning_rate": 3.7816859181750496e-06,
      "loss": 0.0146,
      "step": 1907420
    },
    {
      "epoch": 3.121567395246231,
      "grad_norm": 0.14503760635852814,
      "learning_rate": 3.7816200259615328e-06,
      "loss": 0.0138,
      "step": 1907440
    },
    {
      "epoch": 3.1216001256848847,
      "grad_norm": 0.2426532804965973,
      "learning_rate": 3.7815541337480155e-06,
      "loss": 0.0093,
      "step": 1907460
    },
    {
      "epoch": 3.121632856123538,
      "grad_norm": 0.2279987931251526,
      "learning_rate": 3.7814882415344983e-06,
      "loss": 0.0097,
      "step": 1907480
    },
    {
      "epoch": 3.121665586562191,
      "grad_norm": 0.2445725053548813,
      "learning_rate": 3.781422349320981e-06,
      "loss": 0.0097,
      "step": 1907500
    },
    {
      "epoch": 3.1216983170008445,
      "grad_norm": 0.5323309898376465,
      "learning_rate": 3.781356457107464e-06,
      "loss": 0.014,
      "step": 1907520
    },
    {
      "epoch": 3.1217310474394977,
      "grad_norm": 0.26797133684158325,
      "learning_rate": 3.781290564893947e-06,
      "loss": 0.0122,
      "step": 1907540
    },
    {
      "epoch": 3.1217637778781513,
      "grad_norm": 0.24218015372753143,
      "learning_rate": 3.7812246726804296e-06,
      "loss": 0.0109,
      "step": 1907560
    },
    {
      "epoch": 3.1217965083168044,
      "grad_norm": 0.4409843385219574,
      "learning_rate": 3.7811587804669124e-06,
      "loss": 0.0115,
      "step": 1907580
    },
    {
      "epoch": 3.1218292387554576,
      "grad_norm": 0.3786911368370056,
      "learning_rate": 3.7810928882533955e-06,
      "loss": 0.0186,
      "step": 1907600
    },
    {
      "epoch": 3.121861969194111,
      "grad_norm": 0.290834903717041,
      "learning_rate": 3.7810269960398783e-06,
      "loss": 0.0113,
      "step": 1907620
    },
    {
      "epoch": 3.1218946996327643,
      "grad_norm": 0.21105727553367615,
      "learning_rate": 3.780961103826361e-06,
      "loss": 0.0137,
      "step": 1907640
    },
    {
      "epoch": 3.121927430071418,
      "grad_norm": 0.1780957579612732,
      "learning_rate": 3.7808952116128438e-06,
      "loss": 0.0103,
      "step": 1907660
    },
    {
      "epoch": 3.121960160510071,
      "grad_norm": 0.3779147267341614,
      "learning_rate": 3.780829319399327e-06,
      "loss": 0.0129,
      "step": 1907680
    },
    {
      "epoch": 3.1219928909487247,
      "grad_norm": 0.24543046951293945,
      "learning_rate": 3.7807634271858097e-06,
      "loss": 0.0148,
      "step": 1907700
    },
    {
      "epoch": 3.122025621387378,
      "grad_norm": 0.854374349117279,
      "learning_rate": 3.7806975349722924e-06,
      "loss": 0.0143,
      "step": 1907720
    },
    {
      "epoch": 3.122058351826031,
      "grad_norm": 0.23002472519874573,
      "learning_rate": 3.780631642758775e-06,
      "loss": 0.0146,
      "step": 1907740
    },
    {
      "epoch": 3.1220910822646846,
      "grad_norm": 0.39885085821151733,
      "learning_rate": 3.7805657505452587e-06,
      "loss": 0.0105,
      "step": 1907760
    },
    {
      "epoch": 3.1221238127033377,
      "grad_norm": 0.241773784160614,
      "learning_rate": 3.7804998583317415e-06,
      "loss": 0.0107,
      "step": 1907780
    },
    {
      "epoch": 3.1221565431419913,
      "grad_norm": 0.2617703676223755,
      "learning_rate": 3.7804339661182242e-06,
      "loss": 0.0104,
      "step": 1907800
    },
    {
      "epoch": 3.1221892735806445,
      "grad_norm": 0.31855592131614685,
      "learning_rate": 3.7803680739047074e-06,
      "loss": 0.0145,
      "step": 1907820
    },
    {
      "epoch": 3.122222004019298,
      "grad_norm": 0.23920176923274994,
      "learning_rate": 3.78030218169119e-06,
      "loss": 0.0102,
      "step": 1907840
    },
    {
      "epoch": 3.122254734457951,
      "grad_norm": 0.47747543454170227,
      "learning_rate": 3.780236289477673e-06,
      "loss": 0.0134,
      "step": 1907860
    },
    {
      "epoch": 3.1222874648966044,
      "grad_norm": 0.1961299031972885,
      "learning_rate": 3.7801703972641556e-06,
      "loss": 0.0085,
      "step": 1907880
    },
    {
      "epoch": 3.122320195335258,
      "grad_norm": 0.13186956942081451,
      "learning_rate": 3.7801045050506383e-06,
      "loss": 0.0098,
      "step": 1907900
    },
    {
      "epoch": 3.122352925773911,
      "grad_norm": 0.40145373344421387,
      "learning_rate": 3.7800386128371215e-06,
      "loss": 0.0092,
      "step": 1907920
    },
    {
      "epoch": 3.1223856562125647,
      "grad_norm": 0.1437806338071823,
      "learning_rate": 3.7799727206236042e-06,
      "loss": 0.0102,
      "step": 1907940
    },
    {
      "epoch": 3.122418386651218,
      "grad_norm": 0.15389958024024963,
      "learning_rate": 3.779906828410087e-06,
      "loss": 0.0084,
      "step": 1907960
    },
    {
      "epoch": 3.1224511170898714,
      "grad_norm": 0.10706279426813126,
      "learning_rate": 3.7798409361965697e-06,
      "loss": 0.0092,
      "step": 1907980
    },
    {
      "epoch": 3.1224838475285246,
      "grad_norm": 0.2841850817203522,
      "learning_rate": 3.779775043983053e-06,
      "loss": 0.0151,
      "step": 1908000
    },
    {
      "epoch": 3.1225165779671777,
      "grad_norm": 0.28187626600265503,
      "learning_rate": 3.7797091517695356e-06,
      "loss": 0.0096,
      "step": 1908020
    },
    {
      "epoch": 3.1225493084058313,
      "grad_norm": 0.1573714017868042,
      "learning_rate": 3.7796432595560184e-06,
      "loss": 0.0086,
      "step": 1908040
    },
    {
      "epoch": 3.1225820388444845,
      "grad_norm": 0.1143379807472229,
      "learning_rate": 3.779577367342501e-06,
      "loss": 0.0142,
      "step": 1908060
    },
    {
      "epoch": 3.122614769283138,
      "grad_norm": 0.09978969395160675,
      "learning_rate": 3.7795114751289843e-06,
      "loss": 0.0137,
      "step": 1908080
    },
    {
      "epoch": 3.1226474997217912,
      "grad_norm": 0.1477556973695755,
      "learning_rate": 3.779445582915467e-06,
      "loss": 0.0069,
      "step": 1908100
    },
    {
      "epoch": 3.122680230160445,
      "grad_norm": 0.2002132087945938,
      "learning_rate": 3.77937969070195e-06,
      "loss": 0.0138,
      "step": 1908120
    },
    {
      "epoch": 3.122712960599098,
      "grad_norm": 0.07017142325639725,
      "learning_rate": 3.7793137984884333e-06,
      "loss": 0.0122,
      "step": 1908140
    },
    {
      "epoch": 3.122745691037751,
      "grad_norm": 0.19017018377780914,
      "learning_rate": 3.779247906274916e-06,
      "loss": 0.0093,
      "step": 1908160
    },
    {
      "epoch": 3.1227784214764047,
      "grad_norm": 0.4848286807537079,
      "learning_rate": 3.779182014061399e-06,
      "loss": 0.0132,
      "step": 1908180
    },
    {
      "epoch": 3.122811151915058,
      "grad_norm": 0.2566075623035431,
      "learning_rate": 3.7791161218478816e-06,
      "loss": 0.0079,
      "step": 1908200
    },
    {
      "epoch": 3.1228438823537115,
      "grad_norm": 0.2032216638326645,
      "learning_rate": 3.7790502296343647e-06,
      "loss": 0.0104,
      "step": 1908220
    },
    {
      "epoch": 3.1228766127923646,
      "grad_norm": 0.4213656783103943,
      "learning_rate": 3.7789843374208475e-06,
      "loss": 0.0153,
      "step": 1908240
    },
    {
      "epoch": 3.1229093432310178,
      "grad_norm": 0.24856603145599365,
      "learning_rate": 3.77891844520733e-06,
      "loss": 0.0107,
      "step": 1908260
    },
    {
      "epoch": 3.1229420736696714,
      "grad_norm": 1.492846131324768,
      "learning_rate": 3.778852552993813e-06,
      "loss": 0.0111,
      "step": 1908280
    },
    {
      "epoch": 3.1229748041083245,
      "grad_norm": 0.4499625265598297,
      "learning_rate": 3.7787866607802957e-06,
      "loss": 0.0162,
      "step": 1908300
    },
    {
      "epoch": 3.123007534546978,
      "grad_norm": 1.1421030759811401,
      "learning_rate": 3.778720768566779e-06,
      "loss": 0.0232,
      "step": 1908320
    },
    {
      "epoch": 3.1230402649856313,
      "grad_norm": 0.3657771348953247,
      "learning_rate": 3.7786548763532616e-06,
      "loss": 0.0101,
      "step": 1908340
    },
    {
      "epoch": 3.123072995424285,
      "grad_norm": 0.15152199566364288,
      "learning_rate": 3.7785889841397443e-06,
      "loss": 0.0092,
      "step": 1908360
    },
    {
      "epoch": 3.123105725862938,
      "grad_norm": 0.35994091629981995,
      "learning_rate": 3.778523091926227e-06,
      "loss": 0.0098,
      "step": 1908380
    },
    {
      "epoch": 3.123138456301591,
      "grad_norm": 0.18393930792808533,
      "learning_rate": 3.7784571997127102e-06,
      "loss": 0.0103,
      "step": 1908400
    },
    {
      "epoch": 3.1231711867402447,
      "grad_norm": 0.1383543759584427,
      "learning_rate": 3.778391307499193e-06,
      "loss": 0.0087,
      "step": 1908420
    },
    {
      "epoch": 3.123203917178898,
      "grad_norm": 0.4009944796562195,
      "learning_rate": 3.7783254152856757e-06,
      "loss": 0.0119,
      "step": 1908440
    },
    {
      "epoch": 3.1232366476175515,
      "grad_norm": 0.1929289549589157,
      "learning_rate": 3.7782595230721584e-06,
      "loss": 0.0129,
      "step": 1908460
    },
    {
      "epoch": 3.1232693780562046,
      "grad_norm": 1.4195853471755981,
      "learning_rate": 3.778193630858642e-06,
      "loss": 0.0167,
      "step": 1908480
    },
    {
      "epoch": 3.1233021084948582,
      "grad_norm": 0.0872814729809761,
      "learning_rate": 3.7781277386451248e-06,
      "loss": 0.016,
      "step": 1908500
    },
    {
      "epoch": 3.1233348389335114,
      "grad_norm": 0.8362349271774292,
      "learning_rate": 3.7780618464316075e-06,
      "loss": 0.0137,
      "step": 1908520
    },
    {
      "epoch": 3.1233675693721645,
      "grad_norm": 0.26040732860565186,
      "learning_rate": 3.7779959542180907e-06,
      "loss": 0.015,
      "step": 1908540
    },
    {
      "epoch": 3.123400299810818,
      "grad_norm": 0.12468057870864868,
      "learning_rate": 3.7779300620045734e-06,
      "loss": 0.0128,
      "step": 1908560
    },
    {
      "epoch": 3.1234330302494713,
      "grad_norm": 0.0968453511595726,
      "learning_rate": 3.777864169791056e-06,
      "loss": 0.0128,
      "step": 1908580
    },
    {
      "epoch": 3.123465760688125,
      "grad_norm": 0.16312164068222046,
      "learning_rate": 3.777798277577539e-06,
      "loss": 0.0138,
      "step": 1908600
    },
    {
      "epoch": 3.123498491126778,
      "grad_norm": 0.2616256773471832,
      "learning_rate": 3.777732385364022e-06,
      "loss": 0.0132,
      "step": 1908620
    },
    {
      "epoch": 3.1235312215654316,
      "grad_norm": 0.3572191298007965,
      "learning_rate": 3.777666493150505e-06,
      "loss": 0.0158,
      "step": 1908640
    },
    {
      "epoch": 3.1235639520040848,
      "grad_norm": 0.23729784786701202,
      "learning_rate": 3.7776006009369875e-06,
      "loss": 0.0099,
      "step": 1908660
    },
    {
      "epoch": 3.123596682442738,
      "grad_norm": 0.3611600697040558,
      "learning_rate": 3.7775347087234703e-06,
      "loss": 0.0078,
      "step": 1908680
    },
    {
      "epoch": 3.1236294128813915,
      "grad_norm": 0.6674425005912781,
      "learning_rate": 3.7774688165099534e-06,
      "loss": 0.0139,
      "step": 1908700
    },
    {
      "epoch": 3.1236621433200447,
      "grad_norm": 0.05966586619615555,
      "learning_rate": 3.777402924296436e-06,
      "loss": 0.0087,
      "step": 1908720
    },
    {
      "epoch": 3.1236948737586983,
      "grad_norm": 0.4292173981666565,
      "learning_rate": 3.777337032082919e-06,
      "loss": 0.0108,
      "step": 1908740
    },
    {
      "epoch": 3.1237276041973514,
      "grad_norm": 0.42943739891052246,
      "learning_rate": 3.7772711398694017e-06,
      "loss": 0.0146,
      "step": 1908760
    },
    {
      "epoch": 3.123760334636005,
      "grad_norm": 0.19466550648212433,
      "learning_rate": 3.7772052476558844e-06,
      "loss": 0.0126,
      "step": 1908780
    },
    {
      "epoch": 3.123793065074658,
      "grad_norm": 0.09596103429794312,
      "learning_rate": 3.7771393554423676e-06,
      "loss": 0.0088,
      "step": 1908800
    },
    {
      "epoch": 3.1238257955133113,
      "grad_norm": 0.5329672694206238,
      "learning_rate": 3.7770734632288507e-06,
      "loss": 0.0143,
      "step": 1908820
    },
    {
      "epoch": 3.123858525951965,
      "grad_norm": 0.15325911343097687,
      "learning_rate": 3.7770075710153335e-06,
      "loss": 0.0101,
      "step": 1908840
    },
    {
      "epoch": 3.123891256390618,
      "grad_norm": 0.15145516395568848,
      "learning_rate": 3.7769416788018166e-06,
      "loss": 0.0153,
      "step": 1908860
    },
    {
      "epoch": 3.1239239868292716,
      "grad_norm": 0.12254014611244202,
      "learning_rate": 3.7768757865882994e-06,
      "loss": 0.0153,
      "step": 1908880
    },
    {
      "epoch": 3.123956717267925,
      "grad_norm": 0.7378790974617004,
      "learning_rate": 3.776809894374782e-06,
      "loss": 0.0164,
      "step": 1908900
    },
    {
      "epoch": 3.1239894477065784,
      "grad_norm": 0.12040572613477707,
      "learning_rate": 3.776744002161265e-06,
      "loss": 0.0118,
      "step": 1908920
    },
    {
      "epoch": 3.1240221781452315,
      "grad_norm": 0.8162044286727905,
      "learning_rate": 3.776678109947748e-06,
      "loss": 0.0146,
      "step": 1908940
    },
    {
      "epoch": 3.1240549085838847,
      "grad_norm": 0.5975159406661987,
      "learning_rate": 3.7766122177342307e-06,
      "loss": 0.0088,
      "step": 1908960
    },
    {
      "epoch": 3.1240876390225383,
      "grad_norm": 0.13344621658325195,
      "learning_rate": 3.7765463255207135e-06,
      "loss": 0.0074,
      "step": 1908980
    },
    {
      "epoch": 3.1241203694611914,
      "grad_norm": 0.7993806004524231,
      "learning_rate": 3.7764804333071962e-06,
      "loss": 0.0095,
      "step": 1909000
    },
    {
      "epoch": 3.124153099899845,
      "grad_norm": 0.38446739315986633,
      "learning_rate": 3.7764145410936794e-06,
      "loss": 0.009,
      "step": 1909020
    },
    {
      "epoch": 3.124185830338498,
      "grad_norm": 0.4183422327041626,
      "learning_rate": 3.776348648880162e-06,
      "loss": 0.0077,
      "step": 1909040
    },
    {
      "epoch": 3.1242185607771513,
      "grad_norm": 0.5264101028442383,
      "learning_rate": 3.776282756666645e-06,
      "loss": 0.0163,
      "step": 1909060
    },
    {
      "epoch": 3.124251291215805,
      "grad_norm": 0.07715978473424911,
      "learning_rate": 3.7762168644531276e-06,
      "loss": 0.0115,
      "step": 1909080
    },
    {
      "epoch": 3.124284021654458,
      "grad_norm": 0.31798505783081055,
      "learning_rate": 3.7761509722396108e-06,
      "loss": 0.0103,
      "step": 1909100
    },
    {
      "epoch": 3.1243167520931117,
      "grad_norm": 0.20818375051021576,
      "learning_rate": 3.7760850800260935e-06,
      "loss": 0.0106,
      "step": 1909120
    },
    {
      "epoch": 3.124349482531765,
      "grad_norm": 0.22356845438480377,
      "learning_rate": 3.7760191878125763e-06,
      "loss": 0.0108,
      "step": 1909140
    },
    {
      "epoch": 3.1243822129704184,
      "grad_norm": 0.5038233399391174,
      "learning_rate": 3.775953295599059e-06,
      "loss": 0.016,
      "step": 1909160
    },
    {
      "epoch": 3.1244149434090716,
      "grad_norm": 0.3223015367984772,
      "learning_rate": 3.7758874033855426e-06,
      "loss": 0.0126,
      "step": 1909180
    },
    {
      "epoch": 3.1244476738477247,
      "grad_norm": 0.23210810124874115,
      "learning_rate": 3.7758215111720253e-06,
      "loss": 0.0111,
      "step": 1909200
    },
    {
      "epoch": 3.1244804042863783,
      "grad_norm": 0.184634268283844,
      "learning_rate": 3.775755618958508e-06,
      "loss": 0.0128,
      "step": 1909220
    },
    {
      "epoch": 3.1245131347250314,
      "grad_norm": 0.08606802672147751,
      "learning_rate": 3.7756897267449912e-06,
      "loss": 0.0134,
      "step": 1909240
    },
    {
      "epoch": 3.124545865163685,
      "grad_norm": 0.14958420395851135,
      "learning_rate": 3.775623834531474e-06,
      "loss": 0.0089,
      "step": 1909260
    },
    {
      "epoch": 3.124578595602338,
      "grad_norm": 0.2263043224811554,
      "learning_rate": 3.7755579423179567e-06,
      "loss": 0.0156,
      "step": 1909280
    },
    {
      "epoch": 3.124611326040992,
      "grad_norm": 0.11290960013866425,
      "learning_rate": 3.7754920501044394e-06,
      "loss": 0.0097,
      "step": 1909300
    },
    {
      "epoch": 3.124644056479645,
      "grad_norm": 0.11498448997735977,
      "learning_rate": 3.775426157890922e-06,
      "loss": 0.0108,
      "step": 1909320
    },
    {
      "epoch": 3.124676786918298,
      "grad_norm": 0.11735613644123077,
      "learning_rate": 3.7753602656774053e-06,
      "loss": 0.014,
      "step": 1909340
    },
    {
      "epoch": 3.1247095173569517,
      "grad_norm": 0.495389461517334,
      "learning_rate": 3.775294373463888e-06,
      "loss": 0.0173,
      "step": 1909360
    },
    {
      "epoch": 3.124742247795605,
      "grad_norm": 0.08233505487442017,
      "learning_rate": 3.775228481250371e-06,
      "loss": 0.0144,
      "step": 1909380
    },
    {
      "epoch": 3.1247749782342584,
      "grad_norm": 0.35721346735954285,
      "learning_rate": 3.7751625890368536e-06,
      "loss": 0.0087,
      "step": 1909400
    },
    {
      "epoch": 3.1248077086729116,
      "grad_norm": 0.7373842000961304,
      "learning_rate": 3.7750966968233367e-06,
      "loss": 0.0093,
      "step": 1909420
    },
    {
      "epoch": 3.124840439111565,
      "grad_norm": 0.522739052772522,
      "learning_rate": 3.7750308046098195e-06,
      "loss": 0.011,
      "step": 1909440
    },
    {
      "epoch": 3.1248731695502183,
      "grad_norm": 0.5766745209693909,
      "learning_rate": 3.774964912396302e-06,
      "loss": 0.0162,
      "step": 1909460
    },
    {
      "epoch": 3.1249058999888715,
      "grad_norm": 0.08220747113227844,
      "learning_rate": 3.774899020182785e-06,
      "loss": 0.0171,
      "step": 1909480
    },
    {
      "epoch": 3.124938630427525,
      "grad_norm": 0.30561473965644836,
      "learning_rate": 3.774833127969268e-06,
      "loss": 0.014,
      "step": 1909500
    },
    {
      "epoch": 3.124971360866178,
      "grad_norm": 0.36794036626815796,
      "learning_rate": 3.7747672357557513e-06,
      "loss": 0.0164,
      "step": 1909520
    },
    {
      "epoch": 3.125004091304832,
      "grad_norm": 0.19686326384544373,
      "learning_rate": 3.774701343542234e-06,
      "loss": 0.013,
      "step": 1909540
    },
    {
      "epoch": 3.125036821743485,
      "grad_norm": 0.3358200490474701,
      "learning_rate": 3.774635451328717e-06,
      "loss": 0.0085,
      "step": 1909560
    },
    {
      "epoch": 3.125069552182138,
      "grad_norm": 0.6388205289840698,
      "learning_rate": 3.7745695591152e-06,
      "loss": 0.011,
      "step": 1909580
    },
    {
      "epoch": 3.1251022826207917,
      "grad_norm": 0.148843914270401,
      "learning_rate": 3.7745036669016827e-06,
      "loss": 0.0141,
      "step": 1909600
    },
    {
      "epoch": 3.125135013059445,
      "grad_norm": 0.1942826807498932,
      "learning_rate": 3.7744377746881654e-06,
      "loss": 0.0066,
      "step": 1909620
    },
    {
      "epoch": 3.1251677434980984,
      "grad_norm": 0.230834499001503,
      "learning_rate": 3.7743718824746486e-06,
      "loss": 0.0107,
      "step": 1909640
    },
    {
      "epoch": 3.1252004739367516,
      "grad_norm": 0.3293646574020386,
      "learning_rate": 3.7743059902611313e-06,
      "loss": 0.0173,
      "step": 1909660
    },
    {
      "epoch": 3.125233204375405,
      "grad_norm": 0.207596093416214,
      "learning_rate": 3.774240098047614e-06,
      "loss": 0.0083,
      "step": 1909680
    },
    {
      "epoch": 3.1252659348140583,
      "grad_norm": 0.2440449297428131,
      "learning_rate": 3.7741742058340968e-06,
      "loss": 0.0087,
      "step": 1909700
    },
    {
      "epoch": 3.1252986652527115,
      "grad_norm": 0.23900489509105682,
      "learning_rate": 3.7741083136205795e-06,
      "loss": 0.009,
      "step": 1909720
    },
    {
      "epoch": 3.125331395691365,
      "grad_norm": 0.9596730470657349,
      "learning_rate": 3.7740424214070627e-06,
      "loss": 0.0094,
      "step": 1909740
    },
    {
      "epoch": 3.1253641261300182,
      "grad_norm": 0.08697985857725143,
      "learning_rate": 3.7739765291935454e-06,
      "loss": 0.0117,
      "step": 1909760
    },
    {
      "epoch": 3.125396856568672,
      "grad_norm": 0.3489290177822113,
      "learning_rate": 3.773910636980028e-06,
      "loss": 0.0156,
      "step": 1909780
    },
    {
      "epoch": 3.125429587007325,
      "grad_norm": 0.6631454229354858,
      "learning_rate": 3.773844744766511e-06,
      "loss": 0.0129,
      "step": 1909800
    },
    {
      "epoch": 3.1254623174459786,
      "grad_norm": 0.34264111518859863,
      "learning_rate": 3.773778852552994e-06,
      "loss": 0.0123,
      "step": 1909820
    },
    {
      "epoch": 3.1254950478846317,
      "grad_norm": 0.24168629944324493,
      "learning_rate": 3.773712960339477e-06,
      "loss": 0.0122,
      "step": 1909840
    },
    {
      "epoch": 3.125527778323285,
      "grad_norm": 0.1117681935429573,
      "learning_rate": 3.7736470681259595e-06,
      "loss": 0.0093,
      "step": 1909860
    },
    {
      "epoch": 3.1255605087619385,
      "grad_norm": 0.3317273259162903,
      "learning_rate": 3.773581175912443e-06,
      "loss": 0.0122,
      "step": 1909880
    },
    {
      "epoch": 3.1255932392005916,
      "grad_norm": 0.3131501078605652,
      "learning_rate": 3.773515283698926e-06,
      "loss": 0.0098,
      "step": 1909900
    },
    {
      "epoch": 3.125625969639245,
      "grad_norm": 0.30622243881225586,
      "learning_rate": 3.7734493914854086e-06,
      "loss": 0.0132,
      "step": 1909920
    },
    {
      "epoch": 3.1256587000778984,
      "grad_norm": 0.43413519859313965,
      "learning_rate": 3.7733834992718913e-06,
      "loss": 0.0108,
      "step": 1909940
    },
    {
      "epoch": 3.125691430516552,
      "grad_norm": 1.8281934261322021,
      "learning_rate": 3.7733176070583745e-06,
      "loss": 0.0099,
      "step": 1909960
    },
    {
      "epoch": 3.125724160955205,
      "grad_norm": 0.10961499810218811,
      "learning_rate": 3.7732517148448572e-06,
      "loss": 0.0055,
      "step": 1909980
    },
    {
      "epoch": 3.1257568913938583,
      "grad_norm": 0.23763053119182587,
      "learning_rate": 3.77318582263134e-06,
      "loss": 0.0062,
      "step": 1910000
    },
    {
      "epoch": 3.125789621832512,
      "grad_norm": 0.22104598581790924,
      "learning_rate": 3.7731199304178227e-06,
      "loss": 0.0134,
      "step": 1910020
    },
    {
      "epoch": 3.125822352271165,
      "grad_norm": 0.33459845185279846,
      "learning_rate": 3.773054038204306e-06,
      "loss": 0.0094,
      "step": 1910040
    },
    {
      "epoch": 3.1258550827098186,
      "grad_norm": 0.5325661301612854,
      "learning_rate": 3.7729881459907886e-06,
      "loss": 0.012,
      "step": 1910060
    },
    {
      "epoch": 3.1258878131484717,
      "grad_norm": 0.20537598431110382,
      "learning_rate": 3.7729222537772714e-06,
      "loss": 0.0133,
      "step": 1910080
    },
    {
      "epoch": 3.1259205435871253,
      "grad_norm": 0.06023555248975754,
      "learning_rate": 3.772856361563754e-06,
      "loss": 0.013,
      "step": 1910100
    },
    {
      "epoch": 3.1259532740257785,
      "grad_norm": 0.14457066357135773,
      "learning_rate": 3.7727904693502373e-06,
      "loss": 0.0163,
      "step": 1910120
    },
    {
      "epoch": 3.1259860044644316,
      "grad_norm": 0.19033509492874146,
      "learning_rate": 3.77272457713672e-06,
      "loss": 0.0089,
      "step": 1910140
    },
    {
      "epoch": 3.1260187349030852,
      "grad_norm": 0.11816107481718063,
      "learning_rate": 3.7726586849232028e-06,
      "loss": 0.0093,
      "step": 1910160
    },
    {
      "epoch": 3.1260514653417384,
      "grad_norm": 0.5225459933280945,
      "learning_rate": 3.7725927927096855e-06,
      "loss": 0.011,
      "step": 1910180
    },
    {
      "epoch": 3.126084195780392,
      "grad_norm": 0.20318737626075745,
      "learning_rate": 3.7725269004961682e-06,
      "loss": 0.0122,
      "step": 1910200
    },
    {
      "epoch": 3.126116926219045,
      "grad_norm": 0.276666522026062,
      "learning_rate": 3.7724610082826514e-06,
      "loss": 0.0112,
      "step": 1910220
    },
    {
      "epoch": 3.1261496566576987,
      "grad_norm": 0.5565241575241089,
      "learning_rate": 3.7723951160691346e-06,
      "loss": 0.0141,
      "step": 1910240
    },
    {
      "epoch": 3.126182387096352,
      "grad_norm": 0.20705126225948334,
      "learning_rate": 3.7723292238556173e-06,
      "loss": 0.0088,
      "step": 1910260
    },
    {
      "epoch": 3.126215117535005,
      "grad_norm": 0.2963073253631592,
      "learning_rate": 3.7722633316421005e-06,
      "loss": 0.0123,
      "step": 1910280
    },
    {
      "epoch": 3.1262478479736586,
      "grad_norm": 0.2449120581150055,
      "learning_rate": 3.772197439428583e-06,
      "loss": 0.0136,
      "step": 1910300
    },
    {
      "epoch": 3.1262805784123118,
      "grad_norm": 0.16340626776218414,
      "learning_rate": 3.772131547215066e-06,
      "loss": 0.0121,
      "step": 1910320
    },
    {
      "epoch": 3.1263133088509654,
      "grad_norm": 0.16389691829681396,
      "learning_rate": 3.7720656550015487e-06,
      "loss": 0.0103,
      "step": 1910340
    },
    {
      "epoch": 3.1263460392896185,
      "grad_norm": 0.4183109402656555,
      "learning_rate": 3.771999762788032e-06,
      "loss": 0.0098,
      "step": 1910360
    },
    {
      "epoch": 3.126378769728272,
      "grad_norm": 0.09995152056217194,
      "learning_rate": 3.7719338705745146e-06,
      "loss": 0.0099,
      "step": 1910380
    },
    {
      "epoch": 3.1264115001669253,
      "grad_norm": 0.38086625933647156,
      "learning_rate": 3.7718679783609973e-06,
      "loss": 0.0154,
      "step": 1910400
    },
    {
      "epoch": 3.1264442306055784,
      "grad_norm": 0.3456852436065674,
      "learning_rate": 3.77180208614748e-06,
      "loss": 0.0117,
      "step": 1910420
    },
    {
      "epoch": 3.126476961044232,
      "grad_norm": 0.241152822971344,
      "learning_rate": 3.7717361939339632e-06,
      "loss": 0.015,
      "step": 1910440
    },
    {
      "epoch": 3.126509691482885,
      "grad_norm": 0.14564397931098938,
      "learning_rate": 3.771670301720446e-06,
      "loss": 0.0082,
      "step": 1910460
    },
    {
      "epoch": 3.1265424219215388,
      "grad_norm": 0.18498773872852325,
      "learning_rate": 3.7716044095069287e-06,
      "loss": 0.0114,
      "step": 1910480
    },
    {
      "epoch": 3.126575152360192,
      "grad_norm": 0.22491255402565002,
      "learning_rate": 3.7715385172934114e-06,
      "loss": 0.0118,
      "step": 1910500
    },
    {
      "epoch": 3.1266078827988455,
      "grad_norm": 0.20318733155727386,
      "learning_rate": 3.7714726250798946e-06,
      "loss": 0.0086,
      "step": 1910520
    },
    {
      "epoch": 3.1266406132374986,
      "grad_norm": 0.39070025086402893,
      "learning_rate": 3.7714067328663774e-06,
      "loss": 0.0164,
      "step": 1910540
    },
    {
      "epoch": 3.126673343676152,
      "grad_norm": 2.1578640937805176,
      "learning_rate": 3.77134084065286e-06,
      "loss": 0.0133,
      "step": 1910560
    },
    {
      "epoch": 3.1267060741148054,
      "grad_norm": 0.19460971653461456,
      "learning_rate": 3.7712749484393437e-06,
      "loss": 0.0136,
      "step": 1910580
    },
    {
      "epoch": 3.1267388045534585,
      "grad_norm": 0.6654863357543945,
      "learning_rate": 3.7712090562258264e-06,
      "loss": 0.0076,
      "step": 1910600
    },
    {
      "epoch": 3.126771534992112,
      "grad_norm": 0.41753822565078735,
      "learning_rate": 3.771143164012309e-06,
      "loss": 0.0117,
      "step": 1910620
    },
    {
      "epoch": 3.1268042654307653,
      "grad_norm": 0.22223053872585297,
      "learning_rate": 3.771077271798792e-06,
      "loss": 0.0136,
      "step": 1910640
    },
    {
      "epoch": 3.1268369958694184,
      "grad_norm": 0.3741367757320404,
      "learning_rate": 3.771011379585275e-06,
      "loss": 0.0096,
      "step": 1910660
    },
    {
      "epoch": 3.126869726308072,
      "grad_norm": 0.2741287350654602,
      "learning_rate": 3.770945487371758e-06,
      "loss": 0.0126,
      "step": 1910680
    },
    {
      "epoch": 3.126902456746725,
      "grad_norm": 0.3724302351474762,
      "learning_rate": 3.7708795951582405e-06,
      "loss": 0.0136,
      "step": 1910700
    },
    {
      "epoch": 3.1269351871853788,
      "grad_norm": 0.1616017073392868,
      "learning_rate": 3.7708137029447233e-06,
      "loss": 0.0109,
      "step": 1910720
    },
    {
      "epoch": 3.126967917624032,
      "grad_norm": 0.4038184583187103,
      "learning_rate": 3.770747810731206e-06,
      "loss": 0.0085,
      "step": 1910740
    },
    {
      "epoch": 3.1270006480626855,
      "grad_norm": 0.16206663846969604,
      "learning_rate": 3.770681918517689e-06,
      "loss": 0.0147,
      "step": 1910760
    },
    {
      "epoch": 3.1270333785013387,
      "grad_norm": 0.08421050757169724,
      "learning_rate": 3.770616026304172e-06,
      "loss": 0.0119,
      "step": 1910780
    },
    {
      "epoch": 3.127066108939992,
      "grad_norm": 0.0761364996433258,
      "learning_rate": 3.7705501340906547e-06,
      "loss": 0.0109,
      "step": 1910800
    },
    {
      "epoch": 3.1270988393786454,
      "grad_norm": 0.31259334087371826,
      "learning_rate": 3.7704842418771374e-06,
      "loss": 0.013,
      "step": 1910820
    },
    {
      "epoch": 3.1271315698172986,
      "grad_norm": 0.5671115517616272,
      "learning_rate": 3.7704183496636206e-06,
      "loss": 0.0134,
      "step": 1910840
    },
    {
      "epoch": 3.127164300255952,
      "grad_norm": 0.21287420392036438,
      "learning_rate": 3.7703524574501033e-06,
      "loss": 0.0109,
      "step": 1910860
    },
    {
      "epoch": 3.1271970306946053,
      "grad_norm": 0.48308265209198,
      "learning_rate": 3.770286565236586e-06,
      "loss": 0.0116,
      "step": 1910880
    },
    {
      "epoch": 3.127229761133259,
      "grad_norm": 0.28095147013664246,
      "learning_rate": 3.7702206730230688e-06,
      "loss": 0.017,
      "step": 1910900
    },
    {
      "epoch": 3.127262491571912,
      "grad_norm": 0.9668232798576355,
      "learning_rate": 3.770154780809552e-06,
      "loss": 0.0123,
      "step": 1910920
    },
    {
      "epoch": 3.127295222010565,
      "grad_norm": 0.18735544383525848,
      "learning_rate": 3.770088888596035e-06,
      "loss": 0.0109,
      "step": 1910940
    },
    {
      "epoch": 3.127327952449219,
      "grad_norm": 0.34933769702911377,
      "learning_rate": 3.770022996382518e-06,
      "loss": 0.0123,
      "step": 1910960
    },
    {
      "epoch": 3.127360682887872,
      "grad_norm": 0.22059252858161926,
      "learning_rate": 3.769957104169001e-06,
      "loss": 0.0105,
      "step": 1910980
    },
    {
      "epoch": 3.1273934133265255,
      "grad_norm": 0.31662890315055847,
      "learning_rate": 3.7698912119554838e-06,
      "loss": 0.0091,
      "step": 1911000
    },
    {
      "epoch": 3.1274261437651787,
      "grad_norm": 0.7410299181938171,
      "learning_rate": 3.7698253197419665e-06,
      "loss": 0.0153,
      "step": 1911020
    },
    {
      "epoch": 3.1274588742038323,
      "grad_norm": 0.24392738938331604,
      "learning_rate": 3.7697594275284492e-06,
      "loss": 0.0134,
      "step": 1911040
    },
    {
      "epoch": 3.1274916046424854,
      "grad_norm": 0.37104296684265137,
      "learning_rate": 3.7696935353149324e-06,
      "loss": 0.0137,
      "step": 1911060
    },
    {
      "epoch": 3.1275243350811386,
      "grad_norm": 0.11618878692388535,
      "learning_rate": 3.769627643101415e-06,
      "loss": 0.0116,
      "step": 1911080
    },
    {
      "epoch": 3.127557065519792,
      "grad_norm": 0.16853390634059906,
      "learning_rate": 3.769561750887898e-06,
      "loss": 0.0081,
      "step": 1911100
    },
    {
      "epoch": 3.1275897959584453,
      "grad_norm": 0.09963884204626083,
      "learning_rate": 3.7694958586743806e-06,
      "loss": 0.0071,
      "step": 1911120
    },
    {
      "epoch": 3.127622526397099,
      "grad_norm": 0.08936427533626556,
      "learning_rate": 3.7694299664608634e-06,
      "loss": 0.0124,
      "step": 1911140
    },
    {
      "epoch": 3.127655256835752,
      "grad_norm": 0.43180301785469055,
      "learning_rate": 3.7693640742473465e-06,
      "loss": 0.0175,
      "step": 1911160
    },
    {
      "epoch": 3.1276879872744052,
      "grad_norm": 0.08887840807437897,
      "learning_rate": 3.7692981820338293e-06,
      "loss": 0.0129,
      "step": 1911180
    },
    {
      "epoch": 3.127720717713059,
      "grad_norm": 0.4825258255004883,
      "learning_rate": 3.769232289820312e-06,
      "loss": 0.012,
      "step": 1911200
    },
    {
      "epoch": 3.127753448151712,
      "grad_norm": 0.2263738363981247,
      "learning_rate": 3.7691663976067947e-06,
      "loss": 0.0129,
      "step": 1911220
    },
    {
      "epoch": 3.1277861785903656,
      "grad_norm": 0.5673844218254089,
      "learning_rate": 3.769100505393278e-06,
      "loss": 0.0123,
      "step": 1911240
    },
    {
      "epoch": 3.1278189090290187,
      "grad_norm": 0.40232279896736145,
      "learning_rate": 3.7690346131797606e-06,
      "loss": 0.0121,
      "step": 1911260
    },
    {
      "epoch": 3.1278516394676723,
      "grad_norm": 0.10502415895462036,
      "learning_rate": 3.768968720966244e-06,
      "loss": 0.012,
      "step": 1911280
    },
    {
      "epoch": 3.1278843699063255,
      "grad_norm": 0.15547628700733185,
      "learning_rate": 3.768902828752727e-06,
      "loss": 0.0113,
      "step": 1911300
    },
    {
      "epoch": 3.1279171003449786,
      "grad_norm": 0.1672155261039734,
      "learning_rate": 3.7688369365392097e-06,
      "loss": 0.0116,
      "step": 1911320
    },
    {
      "epoch": 3.127949830783632,
      "grad_norm": 0.22387272119522095,
      "learning_rate": 3.7687710443256924e-06,
      "loss": 0.0155,
      "step": 1911340
    },
    {
      "epoch": 3.1279825612222854,
      "grad_norm": 0.24438151717185974,
      "learning_rate": 3.768705152112175e-06,
      "loss": 0.0126,
      "step": 1911360
    },
    {
      "epoch": 3.128015291660939,
      "grad_norm": 0.3183087408542633,
      "learning_rate": 3.7686392598986583e-06,
      "loss": 0.0076,
      "step": 1911380
    },
    {
      "epoch": 3.128048022099592,
      "grad_norm": 0.15779423713684082,
      "learning_rate": 3.768573367685141e-06,
      "loss": 0.0135,
      "step": 1911400
    },
    {
      "epoch": 3.1280807525382457,
      "grad_norm": 0.223977193236351,
      "learning_rate": 3.768507475471624e-06,
      "loss": 0.0122,
      "step": 1911420
    },
    {
      "epoch": 3.128113482976899,
      "grad_norm": 0.3921653628349304,
      "learning_rate": 3.7684415832581066e-06,
      "loss": 0.0072,
      "step": 1911440
    },
    {
      "epoch": 3.128146213415552,
      "grad_norm": 0.3928893208503723,
      "learning_rate": 3.7683756910445897e-06,
      "loss": 0.0143,
      "step": 1911460
    },
    {
      "epoch": 3.1281789438542056,
      "grad_norm": 0.24322666227817535,
      "learning_rate": 3.7683097988310725e-06,
      "loss": 0.0098,
      "step": 1911480
    },
    {
      "epoch": 3.1282116742928587,
      "grad_norm": 0.03892707824707031,
      "learning_rate": 3.768243906617555e-06,
      "loss": 0.013,
      "step": 1911500
    },
    {
      "epoch": 3.1282444047315123,
      "grad_norm": 0.6168862581253052,
      "learning_rate": 3.768178014404038e-06,
      "loss": 0.009,
      "step": 1911520
    },
    {
      "epoch": 3.1282771351701655,
      "grad_norm": 0.22101782262325287,
      "learning_rate": 3.768112122190521e-06,
      "loss": 0.015,
      "step": 1911540
    },
    {
      "epoch": 3.128309865608819,
      "grad_norm": 1.1517364978790283,
      "learning_rate": 3.768046229977004e-06,
      "loss": 0.0115,
      "step": 1911560
    },
    {
      "epoch": 3.1283425960474722,
      "grad_norm": 0.14547604322433472,
      "learning_rate": 3.7679803377634866e-06,
      "loss": 0.012,
      "step": 1911580
    },
    {
      "epoch": 3.1283753264861254,
      "grad_norm": 0.15601825714111328,
      "learning_rate": 3.7679144455499693e-06,
      "loss": 0.0088,
      "step": 1911600
    },
    {
      "epoch": 3.128408056924779,
      "grad_norm": 0.40881434082984924,
      "learning_rate": 3.767848553336452e-06,
      "loss": 0.0099,
      "step": 1911620
    },
    {
      "epoch": 3.128440787363432,
      "grad_norm": 0.21119201183319092,
      "learning_rate": 3.7677826611229357e-06,
      "loss": 0.0083,
      "step": 1911640
    },
    {
      "epoch": 3.1284735178020857,
      "grad_norm": 0.07456132769584656,
      "learning_rate": 3.7677167689094184e-06,
      "loss": 0.0099,
      "step": 1911660
    },
    {
      "epoch": 3.128506248240739,
      "grad_norm": 0.21820947527885437,
      "learning_rate": 3.767650876695901e-06,
      "loss": 0.0138,
      "step": 1911680
    },
    {
      "epoch": 3.1285389786793925,
      "grad_norm": 0.12502962350845337,
      "learning_rate": 3.7675849844823843e-06,
      "loss": 0.0098,
      "step": 1911700
    },
    {
      "epoch": 3.1285717091180456,
      "grad_norm": 0.39106911420822144,
      "learning_rate": 3.767519092268867e-06,
      "loss": 0.0105,
      "step": 1911720
    },
    {
      "epoch": 3.1286044395566988,
      "grad_norm": 0.0549057312309742,
      "learning_rate": 3.7674532000553498e-06,
      "loss": 0.0151,
      "step": 1911740
    },
    {
      "epoch": 3.1286371699953524,
      "grad_norm": 0.7524694204330444,
      "learning_rate": 3.7673873078418325e-06,
      "loss": 0.0144,
      "step": 1911760
    },
    {
      "epoch": 3.1286699004340055,
      "grad_norm": 0.40805110335350037,
      "learning_rate": 3.7673214156283157e-06,
      "loss": 0.0116,
      "step": 1911780
    },
    {
      "epoch": 3.128702630872659,
      "grad_norm": 0.3846528232097626,
      "learning_rate": 3.7672555234147984e-06,
      "loss": 0.0138,
      "step": 1911800
    },
    {
      "epoch": 3.1287353613113122,
      "grad_norm": 0.1271553635597229,
      "learning_rate": 3.767189631201281e-06,
      "loss": 0.0098,
      "step": 1911820
    },
    {
      "epoch": 3.128768091749966,
      "grad_norm": 0.530865490436554,
      "learning_rate": 3.767123738987764e-06,
      "loss": 0.0136,
      "step": 1911840
    },
    {
      "epoch": 3.128800822188619,
      "grad_norm": 0.19171559810638428,
      "learning_rate": 3.767057846774247e-06,
      "loss": 0.0132,
      "step": 1911860
    },
    {
      "epoch": 3.128833552627272,
      "grad_norm": 0.47178325057029724,
      "learning_rate": 3.76699195456073e-06,
      "loss": 0.0122,
      "step": 1911880
    },
    {
      "epoch": 3.1288662830659257,
      "grad_norm": 0.35396865010261536,
      "learning_rate": 3.7669260623472125e-06,
      "loss": 0.0127,
      "step": 1911900
    },
    {
      "epoch": 3.128899013504579,
      "grad_norm": 0.10961677134037018,
      "learning_rate": 3.7668601701336953e-06,
      "loss": 0.0115,
      "step": 1911920
    },
    {
      "epoch": 3.1289317439432325,
      "grad_norm": 1.0424010753631592,
      "learning_rate": 3.7667942779201785e-06,
      "loss": 0.011,
      "step": 1911940
    },
    {
      "epoch": 3.1289644743818856,
      "grad_norm": 0.09297686070203781,
      "learning_rate": 3.766728385706661e-06,
      "loss": 0.0101,
      "step": 1911960
    },
    {
      "epoch": 3.1289972048205392,
      "grad_norm": 0.5277705788612366,
      "learning_rate": 3.7666624934931444e-06,
      "loss": 0.0136,
      "step": 1911980
    },
    {
      "epoch": 3.1290299352591924,
      "grad_norm": 0.3396393358707428,
      "learning_rate": 3.7665966012796275e-06,
      "loss": 0.0183,
      "step": 1912000
    },
    {
      "epoch": 3.1290626656978455,
      "grad_norm": 0.5450220108032227,
      "learning_rate": 3.7665307090661103e-06,
      "loss": 0.0103,
      "step": 1912020
    },
    {
      "epoch": 3.129095396136499,
      "grad_norm": 0.3572424352169037,
      "learning_rate": 3.766464816852593e-06,
      "loss": 0.0132,
      "step": 1912040
    },
    {
      "epoch": 3.1291281265751523,
      "grad_norm": 0.42044734954833984,
      "learning_rate": 3.7663989246390757e-06,
      "loss": 0.0112,
      "step": 1912060
    },
    {
      "epoch": 3.129160857013806,
      "grad_norm": 0.12552186846733093,
      "learning_rate": 3.766333032425559e-06,
      "loss": 0.0151,
      "step": 1912080
    },
    {
      "epoch": 3.129193587452459,
      "grad_norm": 0.08331337571144104,
      "learning_rate": 3.7662671402120416e-06,
      "loss": 0.0092,
      "step": 1912100
    },
    {
      "epoch": 3.1292263178911126,
      "grad_norm": 0.24104005098342896,
      "learning_rate": 3.7662012479985244e-06,
      "loss": 0.0096,
      "step": 1912120
    },
    {
      "epoch": 3.1292590483297658,
      "grad_norm": 0.16674186289310455,
      "learning_rate": 3.766135355785007e-06,
      "loss": 0.011,
      "step": 1912140
    },
    {
      "epoch": 3.129291778768419,
      "grad_norm": 0.07411916553974152,
      "learning_rate": 3.76606946357149e-06,
      "loss": 0.0128,
      "step": 1912160
    },
    {
      "epoch": 3.1293245092070725,
      "grad_norm": 0.125414177775383,
      "learning_rate": 3.766003571357973e-06,
      "loss": 0.008,
      "step": 1912180
    },
    {
      "epoch": 3.1293572396457257,
      "grad_norm": 0.3146275579929352,
      "learning_rate": 3.7659376791444558e-06,
      "loss": 0.0097,
      "step": 1912200
    },
    {
      "epoch": 3.1293899700843792,
      "grad_norm": 0.1368093192577362,
      "learning_rate": 3.7658717869309385e-06,
      "loss": 0.0095,
      "step": 1912220
    },
    {
      "epoch": 3.1294227005230324,
      "grad_norm": 0.34453296661376953,
      "learning_rate": 3.7658058947174212e-06,
      "loss": 0.0147,
      "step": 1912240
    },
    {
      "epoch": 3.1294554309616855,
      "grad_norm": 0.0989527627825737,
      "learning_rate": 3.7657400025039044e-06,
      "loss": 0.0179,
      "step": 1912260
    },
    {
      "epoch": 3.129488161400339,
      "grad_norm": 0.38477393984794617,
      "learning_rate": 3.765674110290387e-06,
      "loss": 0.012,
      "step": 1912280
    },
    {
      "epoch": 3.1295208918389923,
      "grad_norm": 0.2698708772659302,
      "learning_rate": 3.76560821807687e-06,
      "loss": 0.01,
      "step": 1912300
    },
    {
      "epoch": 3.129553622277646,
      "grad_norm": 0.31635597348213196,
      "learning_rate": 3.7655423258633526e-06,
      "loss": 0.0129,
      "step": 1912320
    },
    {
      "epoch": 3.129586352716299,
      "grad_norm": 0.21105723083019257,
      "learning_rate": 3.765476433649836e-06,
      "loss": 0.0121,
      "step": 1912340
    },
    {
      "epoch": 3.1296190831549526,
      "grad_norm": 0.3514181077480316,
      "learning_rate": 3.765410541436319e-06,
      "loss": 0.0211,
      "step": 1912360
    },
    {
      "epoch": 3.129651813593606,
      "grad_norm": 0.239718496799469,
      "learning_rate": 3.7653446492228017e-06,
      "loss": 0.0113,
      "step": 1912380
    },
    {
      "epoch": 3.129684544032259,
      "grad_norm": 0.4144436717033386,
      "learning_rate": 3.765278757009285e-06,
      "loss": 0.013,
      "step": 1912400
    },
    {
      "epoch": 3.1297172744709125,
      "grad_norm": 0.172665074467659,
      "learning_rate": 3.7652128647957676e-06,
      "loss": 0.0094,
      "step": 1912420
    },
    {
      "epoch": 3.1297500049095657,
      "grad_norm": 0.21602818369865417,
      "learning_rate": 3.7651469725822503e-06,
      "loss": 0.0145,
      "step": 1912440
    },
    {
      "epoch": 3.1297827353482193,
      "grad_norm": 0.4175173044204712,
      "learning_rate": 3.765081080368733e-06,
      "loss": 0.0111,
      "step": 1912460
    },
    {
      "epoch": 3.1298154657868724,
      "grad_norm": 0.28525081276893616,
      "learning_rate": 3.7650151881552162e-06,
      "loss": 0.0121,
      "step": 1912480
    },
    {
      "epoch": 3.129848196225526,
      "grad_norm": 0.10310088843107224,
      "learning_rate": 3.764949295941699e-06,
      "loss": 0.0107,
      "step": 1912500
    },
    {
      "epoch": 3.129880926664179,
      "grad_norm": 0.27506768703460693,
      "learning_rate": 3.7648834037281817e-06,
      "loss": 0.0102,
      "step": 1912520
    },
    {
      "epoch": 3.1299136571028323,
      "grad_norm": 0.3804450035095215,
      "learning_rate": 3.7648175115146645e-06,
      "loss": 0.0107,
      "step": 1912540
    },
    {
      "epoch": 3.129946387541486,
      "grad_norm": 0.3222998082637787,
      "learning_rate": 3.7647516193011476e-06,
      "loss": 0.0133,
      "step": 1912560
    },
    {
      "epoch": 3.129979117980139,
      "grad_norm": 0.5505216121673584,
      "learning_rate": 3.7646857270876304e-06,
      "loss": 0.009,
      "step": 1912580
    },
    {
      "epoch": 3.1300118484187927,
      "grad_norm": 0.9154672622680664,
      "learning_rate": 3.764619834874113e-06,
      "loss": 0.0115,
      "step": 1912600
    },
    {
      "epoch": 3.130044578857446,
      "grad_norm": 0.5291357040405273,
      "learning_rate": 3.764553942660596e-06,
      "loss": 0.0104,
      "step": 1912620
    },
    {
      "epoch": 3.130077309296099,
      "grad_norm": 0.4732816815376282,
      "learning_rate": 3.7644880504470786e-06,
      "loss": 0.0099,
      "step": 1912640
    },
    {
      "epoch": 3.1301100397347525,
      "grad_norm": 0.4482942223548889,
      "learning_rate": 3.7644221582335617e-06,
      "loss": 0.0135,
      "step": 1912660
    },
    {
      "epoch": 3.1301427701734057,
      "grad_norm": 0.4672229290008545,
      "learning_rate": 3.7643562660200445e-06,
      "loss": 0.0119,
      "step": 1912680
    },
    {
      "epoch": 3.1301755006120593,
      "grad_norm": 0.1979323774576187,
      "learning_rate": 3.7642903738065276e-06,
      "loss": 0.0107,
      "step": 1912700
    },
    {
      "epoch": 3.1302082310507124,
      "grad_norm": 0.10437481850385666,
      "learning_rate": 3.764224481593011e-06,
      "loss": 0.0108,
      "step": 1912720
    },
    {
      "epoch": 3.130240961489366,
      "grad_norm": 0.6757577657699585,
      "learning_rate": 3.7641585893794935e-06,
      "loss": 0.0077,
      "step": 1912740
    },
    {
      "epoch": 3.130273691928019,
      "grad_norm": 0.06058710068464279,
      "learning_rate": 3.7640926971659763e-06,
      "loss": 0.0112,
      "step": 1912760
    },
    {
      "epoch": 3.1303064223666723,
      "grad_norm": 0.8986286520957947,
      "learning_rate": 3.764026804952459e-06,
      "loss": 0.0132,
      "step": 1912780
    },
    {
      "epoch": 3.130339152805326,
      "grad_norm": 0.5499366521835327,
      "learning_rate": 3.763960912738942e-06,
      "loss": 0.0104,
      "step": 1912800
    },
    {
      "epoch": 3.130371883243979,
      "grad_norm": 1.255711317062378,
      "learning_rate": 3.763895020525425e-06,
      "loss": 0.0171,
      "step": 1912820
    },
    {
      "epoch": 3.1304046136826327,
      "grad_norm": 0.22764094173908234,
      "learning_rate": 3.7638291283119077e-06,
      "loss": 0.0133,
      "step": 1912840
    },
    {
      "epoch": 3.130437344121286,
      "grad_norm": 0.18190151453018188,
      "learning_rate": 3.7637632360983904e-06,
      "loss": 0.0071,
      "step": 1912860
    },
    {
      "epoch": 3.1304700745599394,
      "grad_norm": 0.13929598033428192,
      "learning_rate": 3.7636973438848736e-06,
      "loss": 0.0124,
      "step": 1912880
    },
    {
      "epoch": 3.1305028049985926,
      "grad_norm": 0.18399678170681,
      "learning_rate": 3.7636314516713563e-06,
      "loss": 0.0082,
      "step": 1912900
    },
    {
      "epoch": 3.1305355354372457,
      "grad_norm": 0.27865859866142273,
      "learning_rate": 3.763565559457839e-06,
      "loss": 0.0092,
      "step": 1912920
    },
    {
      "epoch": 3.1305682658758993,
      "grad_norm": 0.1963752657175064,
      "learning_rate": 3.763499667244322e-06,
      "loss": 0.0148,
      "step": 1912940
    },
    {
      "epoch": 3.1306009963145525,
      "grad_norm": 0.5620291233062744,
      "learning_rate": 3.763433775030805e-06,
      "loss": 0.0133,
      "step": 1912960
    },
    {
      "epoch": 3.130633726753206,
      "grad_norm": 0.09267240017652512,
      "learning_rate": 3.7633678828172877e-06,
      "loss": 0.0075,
      "step": 1912980
    },
    {
      "epoch": 3.130666457191859,
      "grad_norm": 0.2501831650733948,
      "learning_rate": 3.7633019906037704e-06,
      "loss": 0.017,
      "step": 1913000
    },
    {
      "epoch": 3.130699187630513,
      "grad_norm": 0.30644461512565613,
      "learning_rate": 3.763236098390253e-06,
      "loss": 0.0108,
      "step": 1913020
    },
    {
      "epoch": 3.130731918069166,
      "grad_norm": 0.8231185674667358,
      "learning_rate": 3.7631702061767368e-06,
      "loss": 0.0108,
      "step": 1913040
    },
    {
      "epoch": 3.130764648507819,
      "grad_norm": 0.4792110025882721,
      "learning_rate": 3.7631043139632195e-06,
      "loss": 0.0109,
      "step": 1913060
    },
    {
      "epoch": 3.1307973789464727,
      "grad_norm": 0.4303736090660095,
      "learning_rate": 3.7630384217497022e-06,
      "loss": 0.0216,
      "step": 1913080
    },
    {
      "epoch": 3.130830109385126,
      "grad_norm": 0.16510428488254547,
      "learning_rate": 3.7629725295361854e-06,
      "loss": 0.0105,
      "step": 1913100
    },
    {
      "epoch": 3.1308628398237794,
      "grad_norm": 0.44088172912597656,
      "learning_rate": 3.762906637322668e-06,
      "loss": 0.0138,
      "step": 1913120
    },
    {
      "epoch": 3.1308955702624326,
      "grad_norm": 0.19160015881061554,
      "learning_rate": 3.762840745109151e-06,
      "loss": 0.0134,
      "step": 1913140
    },
    {
      "epoch": 3.130928300701086,
      "grad_norm": 0.25059202313423157,
      "learning_rate": 3.7627748528956336e-06,
      "loss": 0.0121,
      "step": 1913160
    },
    {
      "epoch": 3.1309610311397393,
      "grad_norm": 0.6533254981040955,
      "learning_rate": 3.7627089606821164e-06,
      "loss": 0.0113,
      "step": 1913180
    },
    {
      "epoch": 3.1309937615783925,
      "grad_norm": 0.4794773459434509,
      "learning_rate": 3.7626430684685995e-06,
      "loss": 0.0082,
      "step": 1913200
    },
    {
      "epoch": 3.131026492017046,
      "grad_norm": 0.4170224666595459,
      "learning_rate": 3.7625771762550823e-06,
      "loss": 0.0097,
      "step": 1913220
    },
    {
      "epoch": 3.1310592224556992,
      "grad_norm": 0.30971378087997437,
      "learning_rate": 3.762511284041565e-06,
      "loss": 0.0085,
      "step": 1913240
    },
    {
      "epoch": 3.131091952894353,
      "grad_norm": 0.14780017733573914,
      "learning_rate": 3.7624453918280477e-06,
      "loss": 0.0097,
      "step": 1913260
    },
    {
      "epoch": 3.131124683333006,
      "grad_norm": 0.32317328453063965,
      "learning_rate": 3.762379499614531e-06,
      "loss": 0.0094,
      "step": 1913280
    },
    {
      "epoch": 3.1311574137716596,
      "grad_norm": 0.22037385404109955,
      "learning_rate": 3.7623136074010136e-06,
      "loss": 0.0101,
      "step": 1913300
    },
    {
      "epoch": 3.1311901442103127,
      "grad_norm": 0.13181450963020325,
      "learning_rate": 3.7622477151874964e-06,
      "loss": 0.0133,
      "step": 1913320
    },
    {
      "epoch": 3.131222874648966,
      "grad_norm": 0.11519937962293625,
      "learning_rate": 3.762181822973979e-06,
      "loss": 0.0125,
      "step": 1913340
    },
    {
      "epoch": 3.1312556050876195,
      "grad_norm": 0.13101479411125183,
      "learning_rate": 3.7621159307604623e-06,
      "loss": 0.009,
      "step": 1913360
    },
    {
      "epoch": 3.1312883355262726,
      "grad_norm": 0.16757583618164062,
      "learning_rate": 3.762050038546945e-06,
      "loss": 0.0135,
      "step": 1913380
    },
    {
      "epoch": 3.131321065964926,
      "grad_norm": 0.27228063344955444,
      "learning_rate": 3.761984146333428e-06,
      "loss": 0.013,
      "step": 1913400
    },
    {
      "epoch": 3.1313537964035794,
      "grad_norm": 0.5076123476028442,
      "learning_rate": 3.7619182541199114e-06,
      "loss": 0.0105,
      "step": 1913420
    },
    {
      "epoch": 3.131386526842233,
      "grad_norm": 0.25234684348106384,
      "learning_rate": 3.761852361906394e-06,
      "loss": 0.0106,
      "step": 1913440
    },
    {
      "epoch": 3.131419257280886,
      "grad_norm": 0.244531512260437,
      "learning_rate": 3.761786469692877e-06,
      "loss": 0.0134,
      "step": 1913460
    },
    {
      "epoch": 3.1314519877195393,
      "grad_norm": 0.46732380986213684,
      "learning_rate": 3.7617205774793596e-06,
      "loss": 0.0156,
      "step": 1913480
    },
    {
      "epoch": 3.131484718158193,
      "grad_norm": 0.2670668959617615,
      "learning_rate": 3.7616546852658427e-06,
      "loss": 0.0139,
      "step": 1913500
    },
    {
      "epoch": 3.131517448596846,
      "grad_norm": 0.42695656418800354,
      "learning_rate": 3.7615887930523255e-06,
      "loss": 0.01,
      "step": 1913520
    },
    {
      "epoch": 3.1315501790354996,
      "grad_norm": 0.27885353565216064,
      "learning_rate": 3.7615229008388082e-06,
      "loss": 0.0197,
      "step": 1913540
    },
    {
      "epoch": 3.1315829094741527,
      "grad_norm": 0.16142834722995758,
      "learning_rate": 3.761457008625291e-06,
      "loss": 0.0083,
      "step": 1913560
    },
    {
      "epoch": 3.1316156399128063,
      "grad_norm": 0.495191752910614,
      "learning_rate": 3.7613911164117737e-06,
      "loss": 0.0102,
      "step": 1913580
    },
    {
      "epoch": 3.1316483703514595,
      "grad_norm": 0.24523231387138367,
      "learning_rate": 3.761325224198257e-06,
      "loss": 0.0141,
      "step": 1913600
    },
    {
      "epoch": 3.1316811007901126,
      "grad_norm": 0.22597041726112366,
      "learning_rate": 3.7612593319847396e-06,
      "loss": 0.0131,
      "step": 1913620
    },
    {
      "epoch": 3.1317138312287662,
      "grad_norm": 0.3591528832912445,
      "learning_rate": 3.7611934397712223e-06,
      "loss": 0.0125,
      "step": 1913640
    },
    {
      "epoch": 3.1317465616674194,
      "grad_norm": 0.7747789621353149,
      "learning_rate": 3.761127547557705e-06,
      "loss": 0.0165,
      "step": 1913660
    },
    {
      "epoch": 3.131779292106073,
      "grad_norm": 0.3860425055027008,
      "learning_rate": 3.7610616553441882e-06,
      "loss": 0.0152,
      "step": 1913680
    },
    {
      "epoch": 3.131812022544726,
      "grad_norm": 0.511612057685852,
      "learning_rate": 3.760995763130671e-06,
      "loss": 0.0172,
      "step": 1913700
    },
    {
      "epoch": 3.1318447529833793,
      "grad_norm": 0.05191347002983093,
      "learning_rate": 3.7609298709171537e-06,
      "loss": 0.0084,
      "step": 1913720
    },
    {
      "epoch": 3.131877483422033,
      "grad_norm": 0.12111619114875793,
      "learning_rate": 3.7608639787036373e-06,
      "loss": 0.0161,
      "step": 1913740
    },
    {
      "epoch": 3.131910213860686,
      "grad_norm": 0.23666687309741974,
      "learning_rate": 3.76079808649012e-06,
      "loss": 0.0121,
      "step": 1913760
    },
    {
      "epoch": 3.1319429442993396,
      "grad_norm": 0.12860479950904846,
      "learning_rate": 3.760732194276603e-06,
      "loss": 0.0109,
      "step": 1913780
    },
    {
      "epoch": 3.1319756747379928,
      "grad_norm": 0.1307767778635025,
      "learning_rate": 3.7606663020630855e-06,
      "loss": 0.0087,
      "step": 1913800
    },
    {
      "epoch": 3.1320084051766464,
      "grad_norm": 0.09919452667236328,
      "learning_rate": 3.7606004098495687e-06,
      "loss": 0.007,
      "step": 1913820
    },
    {
      "epoch": 3.1320411356152995,
      "grad_norm": 0.27906766533851624,
      "learning_rate": 3.7605345176360514e-06,
      "loss": 0.0138,
      "step": 1913840
    },
    {
      "epoch": 3.1320738660539527,
      "grad_norm": 1.0210049152374268,
      "learning_rate": 3.760468625422534e-06,
      "loss": 0.0169,
      "step": 1913860
    },
    {
      "epoch": 3.1321065964926063,
      "grad_norm": 0.42661216855049133,
      "learning_rate": 3.760402733209017e-06,
      "loss": 0.0107,
      "step": 1913880
    },
    {
      "epoch": 3.1321393269312594,
      "grad_norm": 0.08876296877861023,
      "learning_rate": 3.7603368409955e-06,
      "loss": 0.0125,
      "step": 1913900
    },
    {
      "epoch": 3.132172057369913,
      "grad_norm": 2.384934663772583,
      "learning_rate": 3.760270948781983e-06,
      "loss": 0.0127,
      "step": 1913920
    },
    {
      "epoch": 3.132204787808566,
      "grad_norm": 0.19514226913452148,
      "learning_rate": 3.7602050565684656e-06,
      "loss": 0.0112,
      "step": 1913940
    },
    {
      "epoch": 3.1322375182472197,
      "grad_norm": 0.7737302780151367,
      "learning_rate": 3.7601391643549483e-06,
      "loss": 0.0122,
      "step": 1913960
    },
    {
      "epoch": 3.132270248685873,
      "grad_norm": 0.457448810338974,
      "learning_rate": 3.7600732721414315e-06,
      "loss": 0.011,
      "step": 1913980
    },
    {
      "epoch": 3.132302979124526,
      "grad_norm": 0.27505549788475037,
      "learning_rate": 3.760007379927914e-06,
      "loss": 0.0132,
      "step": 1914000
    },
    {
      "epoch": 3.1323357095631796,
      "grad_norm": 0.13977515697479248,
      "learning_rate": 3.759941487714397e-06,
      "loss": 0.0105,
      "step": 1914020
    },
    {
      "epoch": 3.132368440001833,
      "grad_norm": 0.2683275640010834,
      "learning_rate": 3.7598755955008797e-06,
      "loss": 0.0177,
      "step": 1914040
    },
    {
      "epoch": 3.1324011704404864,
      "grad_norm": 0.18409410119056702,
      "learning_rate": 3.7598097032873624e-06,
      "loss": 0.009,
      "step": 1914060
    },
    {
      "epoch": 3.1324339008791395,
      "grad_norm": 0.6990180015563965,
      "learning_rate": 3.7597438110738456e-06,
      "loss": 0.0149,
      "step": 1914080
    },
    {
      "epoch": 3.132466631317793,
      "grad_norm": 0.0912790521979332,
      "learning_rate": 3.7596779188603287e-06,
      "loss": 0.0149,
      "step": 1914100
    },
    {
      "epoch": 3.1324993617564463,
      "grad_norm": 0.5270887017250061,
      "learning_rate": 3.7596120266468115e-06,
      "loss": 0.0111,
      "step": 1914120
    },
    {
      "epoch": 3.1325320921950994,
      "grad_norm": 0.6931852698326111,
      "learning_rate": 3.7595461344332946e-06,
      "loss": 0.0107,
      "step": 1914140
    },
    {
      "epoch": 3.132564822633753,
      "grad_norm": 0.4592131972312927,
      "learning_rate": 3.7594802422197774e-06,
      "loss": 0.0118,
      "step": 1914160
    },
    {
      "epoch": 3.132597553072406,
      "grad_norm": 0.15581239759922028,
      "learning_rate": 3.75941435000626e-06,
      "loss": 0.0165,
      "step": 1914180
    },
    {
      "epoch": 3.1326302835110598,
      "grad_norm": 0.2010141909122467,
      "learning_rate": 3.759348457792743e-06,
      "loss": 0.0153,
      "step": 1914200
    },
    {
      "epoch": 3.132663013949713,
      "grad_norm": 0.26176849007606506,
      "learning_rate": 3.759282565579226e-06,
      "loss": 0.0132,
      "step": 1914220
    },
    {
      "epoch": 3.132695744388366,
      "grad_norm": 0.24522365629673004,
      "learning_rate": 3.7592166733657088e-06,
      "loss": 0.0086,
      "step": 1914240
    },
    {
      "epoch": 3.1327284748270197,
      "grad_norm": 0.33746597170829773,
      "learning_rate": 3.7591507811521915e-06,
      "loss": 0.0125,
      "step": 1914260
    },
    {
      "epoch": 3.132761205265673,
      "grad_norm": 0.1973012089729309,
      "learning_rate": 3.7590848889386742e-06,
      "loss": 0.0079,
      "step": 1914280
    },
    {
      "epoch": 3.1327939357043264,
      "grad_norm": 0.24406510591506958,
      "learning_rate": 3.7590189967251574e-06,
      "loss": 0.0101,
      "step": 1914300
    },
    {
      "epoch": 3.1328266661429796,
      "grad_norm": 0.1830320805311203,
      "learning_rate": 3.75895310451164e-06,
      "loss": 0.01,
      "step": 1914320
    },
    {
      "epoch": 3.132859396581633,
      "grad_norm": 0.7786570191383362,
      "learning_rate": 3.758887212298123e-06,
      "loss": 0.0148,
      "step": 1914340
    },
    {
      "epoch": 3.1328921270202863,
      "grad_norm": 0.32207539677619934,
      "learning_rate": 3.7588213200846056e-06,
      "loss": 0.0129,
      "step": 1914360
    },
    {
      "epoch": 3.1329248574589394,
      "grad_norm": 0.2445036619901657,
      "learning_rate": 3.758755427871089e-06,
      "loss": 0.013,
      "step": 1914380
    },
    {
      "epoch": 3.132957587897593,
      "grad_norm": 0.25835737586021423,
      "learning_rate": 3.7586895356575715e-06,
      "loss": 0.0079,
      "step": 1914400
    },
    {
      "epoch": 3.132990318336246,
      "grad_norm": 0.26290324330329895,
      "learning_rate": 3.7586236434440543e-06,
      "loss": 0.0122,
      "step": 1914420
    },
    {
      "epoch": 3.1330230487749,
      "grad_norm": 0.6203029751777649,
      "learning_rate": 3.758557751230537e-06,
      "loss": 0.0143,
      "step": 1914440
    },
    {
      "epoch": 3.133055779213553,
      "grad_norm": 0.2386111468076706,
      "learning_rate": 3.7584918590170206e-06,
      "loss": 0.0093,
      "step": 1914460
    },
    {
      "epoch": 3.1330885096522065,
      "grad_norm": 0.2908734977245331,
      "learning_rate": 3.7584259668035033e-06,
      "loss": 0.0117,
      "step": 1914480
    },
    {
      "epoch": 3.1331212400908597,
      "grad_norm": 0.34854117035865784,
      "learning_rate": 3.758360074589986e-06,
      "loss": 0.0137,
      "step": 1914500
    },
    {
      "epoch": 3.133153970529513,
      "grad_norm": 0.163004070520401,
      "learning_rate": 3.7582941823764692e-06,
      "loss": 0.0167,
      "step": 1914520
    },
    {
      "epoch": 3.1331867009681664,
      "grad_norm": 0.12844014167785645,
      "learning_rate": 3.758228290162952e-06,
      "loss": 0.0103,
      "step": 1914540
    },
    {
      "epoch": 3.1332194314068196,
      "grad_norm": 0.3489450514316559,
      "learning_rate": 3.7581623979494347e-06,
      "loss": 0.0139,
      "step": 1914560
    },
    {
      "epoch": 3.133252161845473,
      "grad_norm": 0.1295401155948639,
      "learning_rate": 3.7580965057359175e-06,
      "loss": 0.0099,
      "step": 1914580
    },
    {
      "epoch": 3.1332848922841263,
      "grad_norm": 0.15497711300849915,
      "learning_rate": 3.7580306135224e-06,
      "loss": 0.0129,
      "step": 1914600
    },
    {
      "epoch": 3.13331762272278,
      "grad_norm": 0.5064632892608643,
      "learning_rate": 3.7579647213088834e-06,
      "loss": 0.0062,
      "step": 1914620
    },
    {
      "epoch": 3.133350353161433,
      "grad_norm": 0.5336123704910278,
      "learning_rate": 3.757898829095366e-06,
      "loss": 0.0141,
      "step": 1914640
    },
    {
      "epoch": 3.133383083600086,
      "grad_norm": 0.3636701703071594,
      "learning_rate": 3.757832936881849e-06,
      "loss": 0.009,
      "step": 1914660
    },
    {
      "epoch": 3.13341581403874,
      "grad_norm": 0.1296471208333969,
      "learning_rate": 3.7577670446683316e-06,
      "loss": 0.0129,
      "step": 1914680
    },
    {
      "epoch": 3.133448544477393,
      "grad_norm": 0.27613869309425354,
      "learning_rate": 3.7577011524548147e-06,
      "loss": 0.0079,
      "step": 1914700
    },
    {
      "epoch": 3.1334812749160466,
      "grad_norm": 0.3173533082008362,
      "learning_rate": 3.7576352602412975e-06,
      "loss": 0.0085,
      "step": 1914720
    },
    {
      "epoch": 3.1335140053546997,
      "grad_norm": 0.3497666120529175,
      "learning_rate": 3.7575693680277802e-06,
      "loss": 0.0106,
      "step": 1914740
    },
    {
      "epoch": 3.1335467357933533,
      "grad_norm": 0.30427083373069763,
      "learning_rate": 3.757503475814263e-06,
      "loss": 0.012,
      "step": 1914760
    },
    {
      "epoch": 3.1335794662320064,
      "grad_norm": 0.09695913642644882,
      "learning_rate": 3.757437583600746e-06,
      "loss": 0.0067,
      "step": 1914780
    },
    {
      "epoch": 3.1336121966706596,
      "grad_norm": 0.26645636558532715,
      "learning_rate": 3.7573716913872293e-06,
      "loss": 0.0113,
      "step": 1914800
    },
    {
      "epoch": 3.133644927109313,
      "grad_norm": 0.21780048310756683,
      "learning_rate": 3.757305799173712e-06,
      "loss": 0.0162,
      "step": 1914820
    },
    {
      "epoch": 3.1336776575479663,
      "grad_norm": 0.6178960800170898,
      "learning_rate": 3.757239906960195e-06,
      "loss": 0.013,
      "step": 1914840
    },
    {
      "epoch": 3.13371038798662,
      "grad_norm": 0.16415132582187653,
      "learning_rate": 3.757174014746678e-06,
      "loss": 0.0086,
      "step": 1914860
    },
    {
      "epoch": 3.133743118425273,
      "grad_norm": 0.20463791489601135,
      "learning_rate": 3.7571081225331607e-06,
      "loss": 0.0114,
      "step": 1914880
    },
    {
      "epoch": 3.1337758488639267,
      "grad_norm": 0.18908153474330902,
      "learning_rate": 3.7570422303196434e-06,
      "loss": 0.0093,
      "step": 1914900
    },
    {
      "epoch": 3.13380857930258,
      "grad_norm": 0.3324750065803528,
      "learning_rate": 3.7569763381061266e-06,
      "loss": 0.0079,
      "step": 1914920
    },
    {
      "epoch": 3.133841309741233,
      "grad_norm": 0.1165957897901535,
      "learning_rate": 3.7569104458926093e-06,
      "loss": 0.0131,
      "step": 1914940
    },
    {
      "epoch": 3.1338740401798866,
      "grad_norm": 0.47682633996009827,
      "learning_rate": 3.756844553679092e-06,
      "loss": 0.0115,
      "step": 1914960
    },
    {
      "epoch": 3.1339067706185397,
      "grad_norm": 0.48497679829597473,
      "learning_rate": 3.756778661465575e-06,
      "loss": 0.0067,
      "step": 1914980
    },
    {
      "epoch": 3.1339395010571933,
      "grad_norm": 0.3389289081096649,
      "learning_rate": 3.7567127692520575e-06,
      "loss": 0.0116,
      "step": 1915000
    },
    {
      "epoch": 3.1339722314958465,
      "grad_norm": 0.39754942059516907,
      "learning_rate": 3.7566468770385407e-06,
      "loss": 0.0086,
      "step": 1915020
    },
    {
      "epoch": 3.1340049619345,
      "grad_norm": 0.29106006026268005,
      "learning_rate": 3.7565809848250234e-06,
      "loss": 0.0109,
      "step": 1915040
    },
    {
      "epoch": 3.134037692373153,
      "grad_norm": 0.39117082953453064,
      "learning_rate": 3.756515092611506e-06,
      "loss": 0.0099,
      "step": 1915060
    },
    {
      "epoch": 3.1340704228118064,
      "grad_norm": 0.17532311379909515,
      "learning_rate": 3.756449200397989e-06,
      "loss": 0.0158,
      "step": 1915080
    },
    {
      "epoch": 3.13410315325046,
      "grad_norm": 0.34227102994918823,
      "learning_rate": 3.756383308184472e-06,
      "loss": 0.0105,
      "step": 1915100
    },
    {
      "epoch": 3.134135883689113,
      "grad_norm": 0.28067904710769653,
      "learning_rate": 3.756317415970955e-06,
      "loss": 0.0147,
      "step": 1915120
    },
    {
      "epoch": 3.1341686141277667,
      "grad_norm": 0.1486320048570633,
      "learning_rate": 3.7562515237574376e-06,
      "loss": 0.0091,
      "step": 1915140
    },
    {
      "epoch": 3.13420134456642,
      "grad_norm": 0.05238364264369011,
      "learning_rate": 3.756185631543921e-06,
      "loss": 0.0112,
      "step": 1915160
    },
    {
      "epoch": 3.1342340750050734,
      "grad_norm": 1.0804017782211304,
      "learning_rate": 3.756119739330404e-06,
      "loss": 0.0129,
      "step": 1915180
    },
    {
      "epoch": 3.1342668054437266,
      "grad_norm": 0.5082582831382751,
      "learning_rate": 3.7560538471168866e-06,
      "loss": 0.0132,
      "step": 1915200
    },
    {
      "epoch": 3.1342995358823798,
      "grad_norm": 0.2263699620962143,
      "learning_rate": 3.7559879549033694e-06,
      "loss": 0.0094,
      "step": 1915220
    },
    {
      "epoch": 3.1343322663210333,
      "grad_norm": 0.9865599274635315,
      "learning_rate": 3.7559220626898525e-06,
      "loss": 0.0135,
      "step": 1915240
    },
    {
      "epoch": 3.1343649967596865,
      "grad_norm": 0.4085375666618347,
      "learning_rate": 3.7558561704763353e-06,
      "loss": 0.0072,
      "step": 1915260
    },
    {
      "epoch": 3.13439772719834,
      "grad_norm": 0.218002587556839,
      "learning_rate": 3.755790278262818e-06,
      "loss": 0.0154,
      "step": 1915280
    },
    {
      "epoch": 3.1344304576369932,
      "grad_norm": 0.1869335025548935,
      "learning_rate": 3.7557243860493008e-06,
      "loss": 0.012,
      "step": 1915300
    },
    {
      "epoch": 3.1344631880756464,
      "grad_norm": 0.1435272991657257,
      "learning_rate": 3.755658493835784e-06,
      "loss": 0.0149,
      "step": 1915320
    },
    {
      "epoch": 3.1344959185143,
      "grad_norm": 0.2124655395746231,
      "learning_rate": 3.7555926016222667e-06,
      "loss": 0.0083,
      "step": 1915340
    },
    {
      "epoch": 3.134528648952953,
      "grad_norm": 0.25062450766563416,
      "learning_rate": 3.7555267094087494e-06,
      "loss": 0.0138,
      "step": 1915360
    },
    {
      "epoch": 3.1345613793916067,
      "grad_norm": 0.4702698886394501,
      "learning_rate": 3.755460817195232e-06,
      "loss": 0.0118,
      "step": 1915380
    },
    {
      "epoch": 3.13459410983026,
      "grad_norm": 0.23371919989585876,
      "learning_rate": 3.7553949249817153e-06,
      "loss": 0.0096,
      "step": 1915400
    },
    {
      "epoch": 3.1346268402689135,
      "grad_norm": 0.20917029678821564,
      "learning_rate": 3.755329032768198e-06,
      "loss": 0.0131,
      "step": 1915420
    },
    {
      "epoch": 3.1346595707075666,
      "grad_norm": 0.45271173119544983,
      "learning_rate": 3.7552631405546808e-06,
      "loss": 0.0088,
      "step": 1915440
    },
    {
      "epoch": 3.1346923011462198,
      "grad_norm": 0.42962953448295593,
      "learning_rate": 3.7551972483411635e-06,
      "loss": 0.0122,
      "step": 1915460
    },
    {
      "epoch": 3.1347250315848734,
      "grad_norm": 0.11477779597043991,
      "learning_rate": 3.7551313561276463e-06,
      "loss": 0.0101,
      "step": 1915480
    },
    {
      "epoch": 3.1347577620235265,
      "grad_norm": 0.21832479536533356,
      "learning_rate": 3.75506546391413e-06,
      "loss": 0.0115,
      "step": 1915500
    },
    {
      "epoch": 3.13479049246218,
      "grad_norm": 0.22101841866970062,
      "learning_rate": 3.7549995717006126e-06,
      "loss": 0.0082,
      "step": 1915520
    },
    {
      "epoch": 3.1348232229008333,
      "grad_norm": 0.6864392161369324,
      "learning_rate": 3.7549336794870953e-06,
      "loss": 0.0089,
      "step": 1915540
    },
    {
      "epoch": 3.134855953339487,
      "grad_norm": 0.36129531264305115,
      "learning_rate": 3.7548677872735785e-06,
      "loss": 0.0099,
      "step": 1915560
    },
    {
      "epoch": 3.13488868377814,
      "grad_norm": 0.20757712423801422,
      "learning_rate": 3.7548018950600612e-06,
      "loss": 0.0099,
      "step": 1915580
    },
    {
      "epoch": 3.134921414216793,
      "grad_norm": 0.2211412936449051,
      "learning_rate": 3.754736002846544e-06,
      "loss": 0.0095,
      "step": 1915600
    },
    {
      "epoch": 3.1349541446554468,
      "grad_norm": 0.1708465814590454,
      "learning_rate": 3.7546701106330267e-06,
      "loss": 0.0101,
      "step": 1915620
    },
    {
      "epoch": 3.1349868750941,
      "grad_norm": 0.12555307149887085,
      "learning_rate": 3.75460421841951e-06,
      "loss": 0.0136,
      "step": 1915640
    },
    {
      "epoch": 3.1350196055327535,
      "grad_norm": 0.2618981897830963,
      "learning_rate": 3.7545383262059926e-06,
      "loss": 0.0088,
      "step": 1915660
    },
    {
      "epoch": 3.1350523359714066,
      "grad_norm": 0.23281055688858032,
      "learning_rate": 3.7544724339924753e-06,
      "loss": 0.0178,
      "step": 1915680
    },
    {
      "epoch": 3.13508506641006,
      "grad_norm": 0.4113355576992035,
      "learning_rate": 3.754406541778958e-06,
      "loss": 0.009,
      "step": 1915700
    },
    {
      "epoch": 3.1351177968487134,
      "grad_norm": 1.1325393915176392,
      "learning_rate": 3.7543406495654412e-06,
      "loss": 0.0108,
      "step": 1915720
    },
    {
      "epoch": 3.1351505272873665,
      "grad_norm": 0.3073832094669342,
      "learning_rate": 3.754274757351924e-06,
      "loss": 0.0115,
      "step": 1915740
    },
    {
      "epoch": 3.13518325772602,
      "grad_norm": 0.40473467111587524,
      "learning_rate": 3.7542088651384067e-06,
      "loss": 0.0189,
      "step": 1915760
    },
    {
      "epoch": 3.1352159881646733,
      "grad_norm": 0.41201239824295044,
      "learning_rate": 3.7541429729248895e-06,
      "loss": 0.0115,
      "step": 1915780
    },
    {
      "epoch": 3.135248718603327,
      "grad_norm": 0.32968392968177795,
      "learning_rate": 3.7540770807113726e-06,
      "loss": 0.0122,
      "step": 1915800
    },
    {
      "epoch": 3.13528144904198,
      "grad_norm": 0.3626217246055603,
      "learning_rate": 3.7540111884978554e-06,
      "loss": 0.0077,
      "step": 1915820
    },
    {
      "epoch": 3.135314179480633,
      "grad_norm": 0.3115050792694092,
      "learning_rate": 3.753945296284338e-06,
      "loss": 0.0073,
      "step": 1915840
    },
    {
      "epoch": 3.1353469099192868,
      "grad_norm": 0.17266911268234253,
      "learning_rate": 3.7538794040708217e-06,
      "loss": 0.0095,
      "step": 1915860
    },
    {
      "epoch": 3.13537964035794,
      "grad_norm": 0.22298774123191833,
      "learning_rate": 3.7538135118573044e-06,
      "loss": 0.0177,
      "step": 1915880
    },
    {
      "epoch": 3.1354123707965935,
      "grad_norm": 0.13852140307426453,
      "learning_rate": 3.753747619643787e-06,
      "loss": 0.0098,
      "step": 1915900
    },
    {
      "epoch": 3.1354451012352467,
      "grad_norm": 0.3584457337856293,
      "learning_rate": 3.75368172743027e-06,
      "loss": 0.0098,
      "step": 1915920
    },
    {
      "epoch": 3.1354778316739003,
      "grad_norm": 0.1072554737329483,
      "learning_rate": 3.753615835216753e-06,
      "loss": 0.0111,
      "step": 1915940
    },
    {
      "epoch": 3.1355105621125534,
      "grad_norm": 0.12001606076955795,
      "learning_rate": 3.753549943003236e-06,
      "loss": 0.0116,
      "step": 1915960
    },
    {
      "epoch": 3.1355432925512066,
      "grad_norm": 0.3493903875350952,
      "learning_rate": 3.7534840507897186e-06,
      "loss": 0.0136,
      "step": 1915980
    },
    {
      "epoch": 3.13557602298986,
      "grad_norm": 1.1416202783584595,
      "learning_rate": 3.7534181585762013e-06,
      "loss": 0.0103,
      "step": 1916000
    },
    {
      "epoch": 3.1356087534285133,
      "grad_norm": 1.0041731595993042,
      "learning_rate": 3.753352266362684e-06,
      "loss": 0.0093,
      "step": 1916020
    },
    {
      "epoch": 3.135641483867167,
      "grad_norm": 0.6927198171615601,
      "learning_rate": 3.753286374149167e-06,
      "loss": 0.011,
      "step": 1916040
    },
    {
      "epoch": 3.13567421430582,
      "grad_norm": 0.20005232095718384,
      "learning_rate": 3.75322048193565e-06,
      "loss": 0.0093,
      "step": 1916060
    },
    {
      "epoch": 3.1357069447444736,
      "grad_norm": 0.36987102031707764,
      "learning_rate": 3.7531545897221327e-06,
      "loss": 0.0104,
      "step": 1916080
    },
    {
      "epoch": 3.135739675183127,
      "grad_norm": 0.2664262652397156,
      "learning_rate": 3.7530886975086154e-06,
      "loss": 0.0109,
      "step": 1916100
    },
    {
      "epoch": 3.13577240562178,
      "grad_norm": 0.7978907227516174,
      "learning_rate": 3.7530228052950986e-06,
      "loss": 0.0121,
      "step": 1916120
    },
    {
      "epoch": 3.1358051360604335,
      "grad_norm": 0.5778892636299133,
      "learning_rate": 3.7529569130815813e-06,
      "loss": 0.011,
      "step": 1916140
    },
    {
      "epoch": 3.1358378664990867,
      "grad_norm": 0.43466717004776,
      "learning_rate": 3.752891020868064e-06,
      "loss": 0.0078,
      "step": 1916160
    },
    {
      "epoch": 3.1358705969377403,
      "grad_norm": 0.41226693987846375,
      "learning_rate": 3.752825128654547e-06,
      "loss": 0.0113,
      "step": 1916180
    },
    {
      "epoch": 3.1359033273763934,
      "grad_norm": 0.29034045338630676,
      "learning_rate": 3.75275923644103e-06,
      "loss": 0.0112,
      "step": 1916200
    },
    {
      "epoch": 3.135936057815047,
      "grad_norm": 0.12442044913768768,
      "learning_rate": 3.752693344227513e-06,
      "loss": 0.0102,
      "step": 1916220
    },
    {
      "epoch": 3.1359687882537,
      "grad_norm": 0.10218903422355652,
      "learning_rate": 3.752627452013996e-06,
      "loss": 0.0177,
      "step": 1916240
    },
    {
      "epoch": 3.1360015186923533,
      "grad_norm": 0.14374688267707825,
      "learning_rate": 3.752561559800479e-06,
      "loss": 0.0096,
      "step": 1916260
    },
    {
      "epoch": 3.136034249131007,
      "grad_norm": 0.48141056299209595,
      "learning_rate": 3.7524956675869618e-06,
      "loss": 0.0108,
      "step": 1916280
    },
    {
      "epoch": 3.13606697956966,
      "grad_norm": 0.07586174458265305,
      "learning_rate": 3.7524297753734445e-06,
      "loss": 0.0085,
      "step": 1916300
    },
    {
      "epoch": 3.1360997100083137,
      "grad_norm": 0.35304296016693115,
      "learning_rate": 3.7523638831599273e-06,
      "loss": 0.0103,
      "step": 1916320
    },
    {
      "epoch": 3.136132440446967,
      "grad_norm": 0.1027432456612587,
      "learning_rate": 3.7522979909464104e-06,
      "loss": 0.0137,
      "step": 1916340
    },
    {
      "epoch": 3.1361651708856204,
      "grad_norm": 0.09846526384353638,
      "learning_rate": 3.752232098732893e-06,
      "loss": 0.0064,
      "step": 1916360
    },
    {
      "epoch": 3.1361979013242736,
      "grad_norm": 0.5595632195472717,
      "learning_rate": 3.752166206519376e-06,
      "loss": 0.0088,
      "step": 1916380
    },
    {
      "epoch": 3.1362306317629267,
      "grad_norm": 0.2610980272293091,
      "learning_rate": 3.7521003143058586e-06,
      "loss": 0.0121,
      "step": 1916400
    },
    {
      "epoch": 3.1362633622015803,
      "grad_norm": 0.329549103975296,
      "learning_rate": 3.752034422092342e-06,
      "loss": 0.0187,
      "step": 1916420
    },
    {
      "epoch": 3.1362960926402335,
      "grad_norm": 0.15778748691082,
      "learning_rate": 3.7519685298788245e-06,
      "loss": 0.0154,
      "step": 1916440
    },
    {
      "epoch": 3.136328823078887,
      "grad_norm": 0.27968329191207886,
      "learning_rate": 3.7519026376653073e-06,
      "loss": 0.0091,
      "step": 1916460
    },
    {
      "epoch": 3.13636155351754,
      "grad_norm": 0.5652228593826294,
      "learning_rate": 3.75183674545179e-06,
      "loss": 0.013,
      "step": 1916480
    },
    {
      "epoch": 3.136394283956194,
      "grad_norm": 0.17400479316711426,
      "learning_rate": 3.7517708532382728e-06,
      "loss": 0.0124,
      "step": 1916500
    },
    {
      "epoch": 3.136427014394847,
      "grad_norm": 0.0772663876414299,
      "learning_rate": 3.751704961024756e-06,
      "loss": 0.0097,
      "step": 1916520
    },
    {
      "epoch": 3.1364597448335,
      "grad_norm": 0.16129927337169647,
      "learning_rate": 3.7516390688112387e-06,
      "loss": 0.0106,
      "step": 1916540
    },
    {
      "epoch": 3.1364924752721537,
      "grad_norm": 0.16931772232055664,
      "learning_rate": 3.751573176597722e-06,
      "loss": 0.0103,
      "step": 1916560
    },
    {
      "epoch": 3.136525205710807,
      "grad_norm": 0.20758917927742004,
      "learning_rate": 3.751507284384205e-06,
      "loss": 0.0114,
      "step": 1916580
    },
    {
      "epoch": 3.1365579361494604,
      "grad_norm": 0.2849547266960144,
      "learning_rate": 3.7514413921706877e-06,
      "loss": 0.0084,
      "step": 1916600
    },
    {
      "epoch": 3.1365906665881136,
      "grad_norm": 0.5898125767707825,
      "learning_rate": 3.7513754999571705e-06,
      "loss": 0.0149,
      "step": 1916620
    },
    {
      "epoch": 3.136623397026767,
      "grad_norm": 0.15555383265018463,
      "learning_rate": 3.751309607743653e-06,
      "loss": 0.0099,
      "step": 1916640
    },
    {
      "epoch": 3.1366561274654203,
      "grad_norm": 0.3826630711555481,
      "learning_rate": 3.7512437155301364e-06,
      "loss": 0.0149,
      "step": 1916660
    },
    {
      "epoch": 3.1366888579040735,
      "grad_norm": 0.4903457462787628,
      "learning_rate": 3.751177823316619e-06,
      "loss": 0.0122,
      "step": 1916680
    },
    {
      "epoch": 3.136721588342727,
      "grad_norm": 0.9762744903564453,
      "learning_rate": 3.751111931103102e-06,
      "loss": 0.0127,
      "step": 1916700
    },
    {
      "epoch": 3.1367543187813802,
      "grad_norm": 0.21216723322868347,
      "learning_rate": 3.7510460388895846e-06,
      "loss": 0.0108,
      "step": 1916720
    },
    {
      "epoch": 3.136787049220034,
      "grad_norm": 0.6121431589126587,
      "learning_rate": 3.7509801466760678e-06,
      "loss": 0.0104,
      "step": 1916740
    },
    {
      "epoch": 3.136819779658687,
      "grad_norm": 0.8648529648780823,
      "learning_rate": 3.7509142544625505e-06,
      "loss": 0.0106,
      "step": 1916760
    },
    {
      "epoch": 3.13685251009734,
      "grad_norm": 0.6028760671615601,
      "learning_rate": 3.7508483622490332e-06,
      "loss": 0.0124,
      "step": 1916780
    },
    {
      "epoch": 3.1368852405359937,
      "grad_norm": 0.19617131352424622,
      "learning_rate": 3.750782470035516e-06,
      "loss": 0.0159,
      "step": 1916800
    },
    {
      "epoch": 3.136917970974647,
      "grad_norm": 0.317461222410202,
      "learning_rate": 3.750716577821999e-06,
      "loss": 0.0123,
      "step": 1916820
    },
    {
      "epoch": 3.1369507014133005,
      "grad_norm": 0.8994231820106506,
      "learning_rate": 3.750650685608482e-06,
      "loss": 0.014,
      "step": 1916840
    },
    {
      "epoch": 3.1369834318519536,
      "grad_norm": 0.393546998500824,
      "learning_rate": 3.7505847933949646e-06,
      "loss": 0.0067,
      "step": 1916860
    },
    {
      "epoch": 3.137016162290607,
      "grad_norm": 0.3512417674064636,
      "learning_rate": 3.7505189011814474e-06,
      "loss": 0.0097,
      "step": 1916880
    },
    {
      "epoch": 3.1370488927292604,
      "grad_norm": 0.3712140917778015,
      "learning_rate": 3.75045300896793e-06,
      "loss": 0.011,
      "step": 1916900
    },
    {
      "epoch": 3.1370816231679135,
      "grad_norm": 0.30388587713241577,
      "learning_rate": 3.7503871167544137e-06,
      "loss": 0.0137,
      "step": 1916920
    },
    {
      "epoch": 3.137114353606567,
      "grad_norm": 0.7839566469192505,
      "learning_rate": 3.7503212245408964e-06,
      "loss": 0.0128,
      "step": 1916940
    },
    {
      "epoch": 3.1371470840452202,
      "grad_norm": 0.25112634897232056,
      "learning_rate": 3.750255332327379e-06,
      "loss": 0.0133,
      "step": 1916960
    },
    {
      "epoch": 3.137179814483874,
      "grad_norm": 0.24139735102653503,
      "learning_rate": 3.7501894401138623e-06,
      "loss": 0.0083,
      "step": 1916980
    },
    {
      "epoch": 3.137212544922527,
      "grad_norm": 0.19056646525859833,
      "learning_rate": 3.750123547900345e-06,
      "loss": 0.0127,
      "step": 1917000
    },
    {
      "epoch": 3.1372452753611806,
      "grad_norm": 0.17139670252799988,
      "learning_rate": 3.750057655686828e-06,
      "loss": 0.0117,
      "step": 1917020
    },
    {
      "epoch": 3.1372780057998337,
      "grad_norm": 0.2643080949783325,
      "learning_rate": 3.7499917634733105e-06,
      "loss": 0.0094,
      "step": 1917040
    },
    {
      "epoch": 3.137310736238487,
      "grad_norm": 0.24883408844470978,
      "learning_rate": 3.7499258712597937e-06,
      "loss": 0.0109,
      "step": 1917060
    },
    {
      "epoch": 3.1373434666771405,
      "grad_norm": 0.4171271324157715,
      "learning_rate": 3.7498599790462764e-06,
      "loss": 0.0125,
      "step": 1917080
    },
    {
      "epoch": 3.1373761971157936,
      "grad_norm": 0.13823086023330688,
      "learning_rate": 3.749794086832759e-06,
      "loss": 0.0091,
      "step": 1917100
    },
    {
      "epoch": 3.1374089275544472,
      "grad_norm": 0.12427537888288498,
      "learning_rate": 3.749728194619242e-06,
      "loss": 0.009,
      "step": 1917120
    },
    {
      "epoch": 3.1374416579931004,
      "grad_norm": 0.28003624081611633,
      "learning_rate": 3.749662302405725e-06,
      "loss": 0.0093,
      "step": 1917140
    },
    {
      "epoch": 3.137474388431754,
      "grad_norm": 0.6047942042350769,
      "learning_rate": 3.749596410192208e-06,
      "loss": 0.022,
      "step": 1917160
    },
    {
      "epoch": 3.137507118870407,
      "grad_norm": 0.2319924682378769,
      "learning_rate": 3.7495305179786906e-06,
      "loss": 0.0119,
      "step": 1917180
    },
    {
      "epoch": 3.1375398493090603,
      "grad_norm": 0.6083058714866638,
      "learning_rate": 3.7494646257651733e-06,
      "loss": 0.0146,
      "step": 1917200
    },
    {
      "epoch": 3.137572579747714,
      "grad_norm": 0.36953815817832947,
      "learning_rate": 3.7493987335516565e-06,
      "loss": 0.0143,
      "step": 1917220
    },
    {
      "epoch": 3.137605310186367,
      "grad_norm": 0.37467294931411743,
      "learning_rate": 3.749332841338139e-06,
      "loss": 0.0219,
      "step": 1917240
    },
    {
      "epoch": 3.1376380406250206,
      "grad_norm": 0.1733105182647705,
      "learning_rate": 3.7492669491246224e-06,
      "loss": 0.008,
      "step": 1917260
    },
    {
      "epoch": 3.1376707710636738,
      "grad_norm": 0.37826302647590637,
      "learning_rate": 3.7492010569111055e-06,
      "loss": 0.0133,
      "step": 1917280
    },
    {
      "epoch": 3.137703501502327,
      "grad_norm": 0.2685765326023102,
      "learning_rate": 3.7491351646975883e-06,
      "loss": 0.0077,
      "step": 1917300
    },
    {
      "epoch": 3.1377362319409805,
      "grad_norm": 0.1808657944202423,
      "learning_rate": 3.749069272484071e-06,
      "loss": 0.0117,
      "step": 1917320
    },
    {
      "epoch": 3.1377689623796337,
      "grad_norm": 0.6416913270950317,
      "learning_rate": 3.7490033802705538e-06,
      "loss": 0.0128,
      "step": 1917340
    },
    {
      "epoch": 3.1378016928182872,
      "grad_norm": 0.36216798424720764,
      "learning_rate": 3.748937488057037e-06,
      "loss": 0.0126,
      "step": 1917360
    },
    {
      "epoch": 3.1378344232569404,
      "grad_norm": 0.5684828758239746,
      "learning_rate": 3.7488715958435197e-06,
      "loss": 0.0132,
      "step": 1917380
    },
    {
      "epoch": 3.137867153695594,
      "grad_norm": 0.17982147634029388,
      "learning_rate": 3.7488057036300024e-06,
      "loss": 0.0122,
      "step": 1917400
    },
    {
      "epoch": 3.137899884134247,
      "grad_norm": 0.5496368408203125,
      "learning_rate": 3.748739811416485e-06,
      "loss": 0.0084,
      "step": 1917420
    },
    {
      "epoch": 3.1379326145729003,
      "grad_norm": 0.13215653598308563,
      "learning_rate": 3.748673919202968e-06,
      "loss": 0.0132,
      "step": 1917440
    },
    {
      "epoch": 3.137965345011554,
      "grad_norm": 0.401012659072876,
      "learning_rate": 3.748608026989451e-06,
      "loss": 0.0141,
      "step": 1917460
    },
    {
      "epoch": 3.137998075450207,
      "grad_norm": 0.14423784613609314,
      "learning_rate": 3.7485421347759338e-06,
      "loss": 0.0096,
      "step": 1917480
    },
    {
      "epoch": 3.1380308058888606,
      "grad_norm": 0.1819831281900406,
      "learning_rate": 3.7484762425624165e-06,
      "loss": 0.0157,
      "step": 1917500
    },
    {
      "epoch": 3.138063536327514,
      "grad_norm": 0.20809179544448853,
      "learning_rate": 3.7484103503488993e-06,
      "loss": 0.0112,
      "step": 1917520
    },
    {
      "epoch": 3.1380962667661674,
      "grad_norm": 0.15849560499191284,
      "learning_rate": 3.7483444581353824e-06,
      "loss": 0.0136,
      "step": 1917540
    },
    {
      "epoch": 3.1381289972048205,
      "grad_norm": 0.15401124954223633,
      "learning_rate": 3.748278565921865e-06,
      "loss": 0.0125,
      "step": 1917560
    },
    {
      "epoch": 3.1381617276434737,
      "grad_norm": 1.8137656450271606,
      "learning_rate": 3.748212673708348e-06,
      "loss": 0.0148,
      "step": 1917580
    },
    {
      "epoch": 3.1381944580821273,
      "grad_norm": 0.3690454065799713,
      "learning_rate": 3.7481467814948306e-06,
      "loss": 0.0112,
      "step": 1917600
    },
    {
      "epoch": 3.1382271885207804,
      "grad_norm": 0.47916075587272644,
      "learning_rate": 3.7480808892813142e-06,
      "loss": 0.0187,
      "step": 1917620
    },
    {
      "epoch": 3.138259918959434,
      "grad_norm": 0.12502214312553406,
      "learning_rate": 3.748014997067797e-06,
      "loss": 0.0113,
      "step": 1917640
    },
    {
      "epoch": 3.138292649398087,
      "grad_norm": 0.22947075963020325,
      "learning_rate": 3.7479491048542797e-06,
      "loss": 0.0066,
      "step": 1917660
    },
    {
      "epoch": 3.1383253798367408,
      "grad_norm": 0.4160691797733307,
      "learning_rate": 3.747883212640763e-06,
      "loss": 0.0087,
      "step": 1917680
    },
    {
      "epoch": 3.138358110275394,
      "grad_norm": 0.4308490753173828,
      "learning_rate": 3.7478173204272456e-06,
      "loss": 0.0134,
      "step": 1917700
    },
    {
      "epoch": 3.138390840714047,
      "grad_norm": 0.2653101682662964,
      "learning_rate": 3.7477514282137284e-06,
      "loss": 0.0133,
      "step": 1917720
    },
    {
      "epoch": 3.1384235711527007,
      "grad_norm": 0.650083065032959,
      "learning_rate": 3.747685536000211e-06,
      "loss": 0.0084,
      "step": 1917740
    },
    {
      "epoch": 3.138456301591354,
      "grad_norm": 0.16927769780158997,
      "learning_rate": 3.7476196437866943e-06,
      "loss": 0.0116,
      "step": 1917760
    },
    {
      "epoch": 3.1384890320300074,
      "grad_norm": 0.2133306860923767,
      "learning_rate": 3.747553751573177e-06,
      "loss": 0.0101,
      "step": 1917780
    },
    {
      "epoch": 3.1385217624686605,
      "grad_norm": 0.174177348613739,
      "learning_rate": 3.7474878593596597e-06,
      "loss": 0.0103,
      "step": 1917800
    },
    {
      "epoch": 3.138554492907314,
      "grad_norm": 0.8021581768989563,
      "learning_rate": 3.7474219671461425e-06,
      "loss": 0.0109,
      "step": 1917820
    },
    {
      "epoch": 3.1385872233459673,
      "grad_norm": 0.3561568558216095,
      "learning_rate": 3.7473560749326256e-06,
      "loss": 0.0114,
      "step": 1917840
    },
    {
      "epoch": 3.1386199537846204,
      "grad_norm": 0.45232075452804565,
      "learning_rate": 3.7472901827191084e-06,
      "loss": 0.0125,
      "step": 1917860
    },
    {
      "epoch": 3.138652684223274,
      "grad_norm": 0.22873438894748688,
      "learning_rate": 3.747224290505591e-06,
      "loss": 0.0119,
      "step": 1917880
    },
    {
      "epoch": 3.138685414661927,
      "grad_norm": 0.31021901965141296,
      "learning_rate": 3.747158398292074e-06,
      "loss": 0.0114,
      "step": 1917900
    },
    {
      "epoch": 3.138718145100581,
      "grad_norm": 0.056222155690193176,
      "learning_rate": 3.7470925060785566e-06,
      "loss": 0.0113,
      "step": 1917920
    },
    {
      "epoch": 3.138750875539234,
      "grad_norm": 0.38510435819625854,
      "learning_rate": 3.7470266138650398e-06,
      "loss": 0.0164,
      "step": 1917940
    },
    {
      "epoch": 3.1387836059778875,
      "grad_norm": 0.3672129809856415,
      "learning_rate": 3.7469607216515225e-06,
      "loss": 0.0087,
      "step": 1917960
    },
    {
      "epoch": 3.1388163364165407,
      "grad_norm": 0.4025755822658539,
      "learning_rate": 3.7468948294380057e-06,
      "loss": 0.009,
      "step": 1917980
    },
    {
      "epoch": 3.138849066855194,
      "grad_norm": 0.682804524898529,
      "learning_rate": 3.746828937224489e-06,
      "loss": 0.0139,
      "step": 1918000
    },
    {
      "epoch": 3.1388817972938474,
      "grad_norm": 0.24303701519966125,
      "learning_rate": 3.7467630450109716e-06,
      "loss": 0.0113,
      "step": 1918020
    },
    {
      "epoch": 3.1389145277325006,
      "grad_norm": 0.3574605882167816,
      "learning_rate": 3.7466971527974543e-06,
      "loss": 0.0118,
      "step": 1918040
    },
    {
      "epoch": 3.138947258171154,
      "grad_norm": 0.32174625992774963,
      "learning_rate": 3.746631260583937e-06,
      "loss": 0.0139,
      "step": 1918060
    },
    {
      "epoch": 3.1389799886098073,
      "grad_norm": 0.18151956796646118,
      "learning_rate": 3.74656536837042e-06,
      "loss": 0.0149,
      "step": 1918080
    },
    {
      "epoch": 3.139012719048461,
      "grad_norm": 0.16008619964122772,
      "learning_rate": 3.746499476156903e-06,
      "loss": 0.0073,
      "step": 1918100
    },
    {
      "epoch": 3.139045449487114,
      "grad_norm": 0.081549733877182,
      "learning_rate": 3.7464335839433857e-06,
      "loss": 0.013,
      "step": 1918120
    },
    {
      "epoch": 3.139078179925767,
      "grad_norm": 0.08622673153877258,
      "learning_rate": 3.7463676917298684e-06,
      "loss": 0.0062,
      "step": 1918140
    },
    {
      "epoch": 3.139110910364421,
      "grad_norm": 0.13597874343395233,
      "learning_rate": 3.7463017995163516e-06,
      "loss": 0.0162,
      "step": 1918160
    },
    {
      "epoch": 3.139143640803074,
      "grad_norm": 0.16085924208164215,
      "learning_rate": 3.7462359073028343e-06,
      "loss": 0.0106,
      "step": 1918180
    },
    {
      "epoch": 3.1391763712417275,
      "grad_norm": 0.17227482795715332,
      "learning_rate": 3.746170015089317e-06,
      "loss": 0.015,
      "step": 1918200
    },
    {
      "epoch": 3.1392091016803807,
      "grad_norm": 0.27026551961898804,
      "learning_rate": 3.7461041228758e-06,
      "loss": 0.0107,
      "step": 1918220
    },
    {
      "epoch": 3.1392418321190343,
      "grad_norm": 0.2910151779651642,
      "learning_rate": 3.746038230662283e-06,
      "loss": 0.0153,
      "step": 1918240
    },
    {
      "epoch": 3.1392745625576874,
      "grad_norm": 0.2607547640800476,
      "learning_rate": 3.7459723384487657e-06,
      "loss": 0.0109,
      "step": 1918260
    },
    {
      "epoch": 3.1393072929963406,
      "grad_norm": 0.16992305219173431,
      "learning_rate": 3.7459064462352485e-06,
      "loss": 0.0102,
      "step": 1918280
    },
    {
      "epoch": 3.139340023434994,
      "grad_norm": 0.4855148196220398,
      "learning_rate": 3.745840554021731e-06,
      "loss": 0.0138,
      "step": 1918300
    },
    {
      "epoch": 3.1393727538736473,
      "grad_norm": 1.1318625211715698,
      "learning_rate": 3.7457746618082148e-06,
      "loss": 0.0146,
      "step": 1918320
    },
    {
      "epoch": 3.139405484312301,
      "grad_norm": 0.27985531091690063,
      "learning_rate": 3.7457087695946975e-06,
      "loss": 0.0134,
      "step": 1918340
    },
    {
      "epoch": 3.139438214750954,
      "grad_norm": 0.5274888873100281,
      "learning_rate": 3.7456428773811803e-06,
      "loss": 0.0095,
      "step": 1918360
    },
    {
      "epoch": 3.1394709451896072,
      "grad_norm": 0.9698609113693237,
      "learning_rate": 3.7455769851676634e-06,
      "loss": 0.0162,
      "step": 1918380
    },
    {
      "epoch": 3.139503675628261,
      "grad_norm": 0.06715689599514008,
      "learning_rate": 3.745511092954146e-06,
      "loss": 0.0112,
      "step": 1918400
    },
    {
      "epoch": 3.139536406066914,
      "grad_norm": 0.2364439070224762,
      "learning_rate": 3.745445200740629e-06,
      "loss": 0.0075,
      "step": 1918420
    },
    {
      "epoch": 3.1395691365055676,
      "grad_norm": 0.32632383704185486,
      "learning_rate": 3.7453793085271116e-06,
      "loss": 0.0144,
      "step": 1918440
    },
    {
      "epoch": 3.1396018669442207,
      "grad_norm": 0.25706517696380615,
      "learning_rate": 3.7453134163135944e-06,
      "loss": 0.0186,
      "step": 1918460
    },
    {
      "epoch": 3.1396345973828743,
      "grad_norm": 0.232273131608963,
      "learning_rate": 3.7452475241000775e-06,
      "loss": 0.0152,
      "step": 1918480
    },
    {
      "epoch": 3.1396673278215275,
      "grad_norm": 0.3499964773654938,
      "learning_rate": 3.7451816318865603e-06,
      "loss": 0.0167,
      "step": 1918500
    },
    {
      "epoch": 3.1397000582601806,
      "grad_norm": 0.617082417011261,
      "learning_rate": 3.745115739673043e-06,
      "loss": 0.0079,
      "step": 1918520
    },
    {
      "epoch": 3.139732788698834,
      "grad_norm": 0.1410032957792282,
      "learning_rate": 3.7450498474595258e-06,
      "loss": 0.0143,
      "step": 1918540
    },
    {
      "epoch": 3.1397655191374874,
      "grad_norm": 0.48721352219581604,
      "learning_rate": 3.744983955246009e-06,
      "loss": 0.0118,
      "step": 1918560
    },
    {
      "epoch": 3.139798249576141,
      "grad_norm": 0.867236316204071,
      "learning_rate": 3.7449180630324917e-06,
      "loss": 0.0132,
      "step": 1918580
    },
    {
      "epoch": 3.139830980014794,
      "grad_norm": 0.2854880690574646,
      "learning_rate": 3.7448521708189744e-06,
      "loss": 0.0153,
      "step": 1918600
    },
    {
      "epoch": 3.1398637104534477,
      "grad_norm": 0.290160208940506,
      "learning_rate": 3.744786278605457e-06,
      "loss": 0.0122,
      "step": 1918620
    },
    {
      "epoch": 3.139896440892101,
      "grad_norm": 0.5972468852996826,
      "learning_rate": 3.7447203863919403e-06,
      "loss": 0.0116,
      "step": 1918640
    },
    {
      "epoch": 3.139929171330754,
      "grad_norm": 0.12237769365310669,
      "learning_rate": 3.744654494178423e-06,
      "loss": 0.0126,
      "step": 1918660
    },
    {
      "epoch": 3.1399619017694076,
      "grad_norm": 0.9814544320106506,
      "learning_rate": 3.7445886019649062e-06,
      "loss": 0.0143,
      "step": 1918680
    },
    {
      "epoch": 3.1399946322080607,
      "grad_norm": 0.5760840773582458,
      "learning_rate": 3.7445227097513894e-06,
      "loss": 0.0153,
      "step": 1918700
    },
    {
      "epoch": 3.1400273626467143,
      "grad_norm": 0.15649648010730743,
      "learning_rate": 3.744456817537872e-06,
      "loss": 0.0138,
      "step": 1918720
    },
    {
      "epoch": 3.1400600930853675,
      "grad_norm": 0.40051141381263733,
      "learning_rate": 3.744390925324355e-06,
      "loss": 0.0106,
      "step": 1918740
    },
    {
      "epoch": 3.1400928235240206,
      "grad_norm": 0.2441728115081787,
      "learning_rate": 3.7443250331108376e-06,
      "loss": 0.0077,
      "step": 1918760
    },
    {
      "epoch": 3.1401255539626742,
      "grad_norm": 0.21646766364574432,
      "learning_rate": 3.7442591408973208e-06,
      "loss": 0.0072,
      "step": 1918780
    },
    {
      "epoch": 3.1401582844013274,
      "grad_norm": 0.2710948884487152,
      "learning_rate": 3.7441932486838035e-06,
      "loss": 0.0084,
      "step": 1918800
    },
    {
      "epoch": 3.140191014839981,
      "grad_norm": 0.10096070170402527,
      "learning_rate": 3.7441273564702862e-06,
      "loss": 0.0152,
      "step": 1918820
    },
    {
      "epoch": 3.140223745278634,
      "grad_norm": 0.6109950542449951,
      "learning_rate": 3.744061464256769e-06,
      "loss": 0.0099,
      "step": 1918840
    },
    {
      "epoch": 3.1402564757172877,
      "grad_norm": 0.29273366928100586,
      "learning_rate": 3.7439955720432517e-06,
      "loss": 0.0085,
      "step": 1918860
    },
    {
      "epoch": 3.140289206155941,
      "grad_norm": 0.46695172786712646,
      "learning_rate": 3.743929679829735e-06,
      "loss": 0.018,
      "step": 1918880
    },
    {
      "epoch": 3.140321936594594,
      "grad_norm": 0.3218126893043518,
      "learning_rate": 3.7438637876162176e-06,
      "loss": 0.0118,
      "step": 1918900
    },
    {
      "epoch": 3.1403546670332476,
      "grad_norm": 0.15916863083839417,
      "learning_rate": 3.7437978954027004e-06,
      "loss": 0.0119,
      "step": 1918920
    },
    {
      "epoch": 3.1403873974719008,
      "grad_norm": 0.26124659180641174,
      "learning_rate": 3.743732003189183e-06,
      "loss": 0.0112,
      "step": 1918940
    },
    {
      "epoch": 3.1404201279105544,
      "grad_norm": 0.3472420275211334,
      "learning_rate": 3.7436661109756663e-06,
      "loss": 0.0141,
      "step": 1918960
    },
    {
      "epoch": 3.1404528583492075,
      "grad_norm": 0.20528587698936462,
      "learning_rate": 3.743600218762149e-06,
      "loss": 0.0109,
      "step": 1918980
    },
    {
      "epoch": 3.140485588787861,
      "grad_norm": 0.8899691700935364,
      "learning_rate": 3.7435343265486317e-06,
      "loss": 0.0114,
      "step": 1919000
    },
    {
      "epoch": 3.1405183192265143,
      "grad_norm": 0.3814256489276886,
      "learning_rate": 3.7434684343351153e-06,
      "loss": 0.0111,
      "step": 1919020
    },
    {
      "epoch": 3.1405510496651674,
      "grad_norm": 0.19634796679019928,
      "learning_rate": 3.743402542121598e-06,
      "loss": 0.0107,
      "step": 1919040
    },
    {
      "epoch": 3.140583780103821,
      "grad_norm": 0.24412019550800323,
      "learning_rate": 3.743336649908081e-06,
      "loss": 0.0094,
      "step": 1919060
    },
    {
      "epoch": 3.140616510542474,
      "grad_norm": 0.19920523464679718,
      "learning_rate": 3.7432707576945635e-06,
      "loss": 0.0116,
      "step": 1919080
    },
    {
      "epoch": 3.1406492409811277,
      "grad_norm": 0.10468234121799469,
      "learning_rate": 3.7432048654810467e-06,
      "loss": 0.0087,
      "step": 1919100
    },
    {
      "epoch": 3.140681971419781,
      "grad_norm": 0.05977051332592964,
      "learning_rate": 3.7431389732675295e-06,
      "loss": 0.0137,
      "step": 1919120
    },
    {
      "epoch": 3.1407147018584345,
      "grad_norm": 0.09518403559923172,
      "learning_rate": 3.743073081054012e-06,
      "loss": 0.0179,
      "step": 1919140
    },
    {
      "epoch": 3.1407474322970876,
      "grad_norm": 0.9016176462173462,
      "learning_rate": 3.743007188840495e-06,
      "loss": 0.0122,
      "step": 1919160
    },
    {
      "epoch": 3.140780162735741,
      "grad_norm": 0.15715374052524567,
      "learning_rate": 3.742941296626978e-06,
      "loss": 0.0077,
      "step": 1919180
    },
    {
      "epoch": 3.1408128931743944,
      "grad_norm": 0.14352332055568695,
      "learning_rate": 3.742875404413461e-06,
      "loss": 0.014,
      "step": 1919200
    },
    {
      "epoch": 3.1408456236130475,
      "grad_norm": 0.31100544333457947,
      "learning_rate": 3.7428095121999436e-06,
      "loss": 0.0084,
      "step": 1919220
    },
    {
      "epoch": 3.140878354051701,
      "grad_norm": 0.3441959321498871,
      "learning_rate": 3.7427436199864263e-06,
      "loss": 0.0173,
      "step": 1919240
    },
    {
      "epoch": 3.1409110844903543,
      "grad_norm": 0.22455905377864838,
      "learning_rate": 3.7426777277729095e-06,
      "loss": 0.0105,
      "step": 1919260
    },
    {
      "epoch": 3.140943814929008,
      "grad_norm": 4.6094183921813965,
      "learning_rate": 3.7426118355593922e-06,
      "loss": 0.0088,
      "step": 1919280
    },
    {
      "epoch": 3.140976545367661,
      "grad_norm": 0.32593318819999695,
      "learning_rate": 3.742545943345875e-06,
      "loss": 0.0139,
      "step": 1919300
    },
    {
      "epoch": 3.141009275806314,
      "grad_norm": 0.08997905254364014,
      "learning_rate": 3.7424800511323577e-06,
      "loss": 0.0094,
      "step": 1919320
    },
    {
      "epoch": 3.1410420062449678,
      "grad_norm": 0.7052068114280701,
      "learning_rate": 3.7424141589188404e-06,
      "loss": 0.0118,
      "step": 1919340
    },
    {
      "epoch": 3.141074736683621,
      "grad_norm": 0.33262935280799866,
      "learning_rate": 3.7423482667053236e-06,
      "loss": 0.0076,
      "step": 1919360
    },
    {
      "epoch": 3.1411074671222745,
      "grad_norm": 0.2454620599746704,
      "learning_rate": 3.7422823744918068e-06,
      "loss": 0.0113,
      "step": 1919380
    },
    {
      "epoch": 3.1411401975609277,
      "grad_norm": 0.10808973759412766,
      "learning_rate": 3.7422164822782895e-06,
      "loss": 0.0186,
      "step": 1919400
    },
    {
      "epoch": 3.1411729279995813,
      "grad_norm": 0.2500477731227875,
      "learning_rate": 3.7421505900647727e-06,
      "loss": 0.0163,
      "step": 1919420
    },
    {
      "epoch": 3.1412056584382344,
      "grad_norm": 0.2511789798736572,
      "learning_rate": 3.7420846978512554e-06,
      "loss": 0.0083,
      "step": 1919440
    },
    {
      "epoch": 3.1412383888768876,
      "grad_norm": 0.03385190665721893,
      "learning_rate": 3.742018805637738e-06,
      "loss": 0.0088,
      "step": 1919460
    },
    {
      "epoch": 3.141271119315541,
      "grad_norm": 0.38708552718162537,
      "learning_rate": 3.741952913424221e-06,
      "loss": 0.0103,
      "step": 1919480
    },
    {
      "epoch": 3.1413038497541943,
      "grad_norm": 0.049016114324331284,
      "learning_rate": 3.741887021210704e-06,
      "loss": 0.0083,
      "step": 1919500
    },
    {
      "epoch": 3.141336580192848,
      "grad_norm": 1.1482940912246704,
      "learning_rate": 3.741821128997187e-06,
      "loss": 0.0097,
      "step": 1919520
    },
    {
      "epoch": 3.141369310631501,
      "grad_norm": 0.07847141474485397,
      "learning_rate": 3.7417552367836695e-06,
      "loss": 0.0118,
      "step": 1919540
    },
    {
      "epoch": 3.1414020410701546,
      "grad_norm": 0.2817167043685913,
      "learning_rate": 3.7416893445701523e-06,
      "loss": 0.0144,
      "step": 1919560
    },
    {
      "epoch": 3.141434771508808,
      "grad_norm": 0.3017803728580475,
      "learning_rate": 3.7416234523566354e-06,
      "loss": 0.0078,
      "step": 1919580
    },
    {
      "epoch": 3.141467501947461,
      "grad_norm": 0.28393465280532837,
      "learning_rate": 3.741557560143118e-06,
      "loss": 0.0128,
      "step": 1919600
    },
    {
      "epoch": 3.1415002323861145,
      "grad_norm": 0.673096776008606,
      "learning_rate": 3.741491667929601e-06,
      "loss": 0.0153,
      "step": 1919620
    },
    {
      "epoch": 3.1415329628247677,
      "grad_norm": 0.42182838916778564,
      "learning_rate": 3.7414257757160837e-06,
      "loss": 0.0111,
      "step": 1919640
    },
    {
      "epoch": 3.1415656932634213,
      "grad_norm": 0.3701179325580597,
      "learning_rate": 3.741359883502567e-06,
      "loss": 0.0137,
      "step": 1919660
    },
    {
      "epoch": 3.1415984237020744,
      "grad_norm": 1.2801673412322998,
      "learning_rate": 3.7412939912890496e-06,
      "loss": 0.0096,
      "step": 1919680
    },
    {
      "epoch": 3.141631154140728,
      "grad_norm": 0.3535563051700592,
      "learning_rate": 3.7412280990755323e-06,
      "loss": 0.0114,
      "step": 1919700
    },
    {
      "epoch": 3.141663884579381,
      "grad_norm": 0.2763024866580963,
      "learning_rate": 3.741162206862015e-06,
      "loss": 0.0187,
      "step": 1919720
    },
    {
      "epoch": 3.1416966150180343,
      "grad_norm": 0.355295866727829,
      "learning_rate": 3.7410963146484986e-06,
      "loss": 0.0056,
      "step": 1919740
    },
    {
      "epoch": 3.141729345456688,
      "grad_norm": 0.16511164605617523,
      "learning_rate": 3.7410304224349814e-06,
      "loss": 0.0129,
      "step": 1919760
    },
    {
      "epoch": 3.141762075895341,
      "grad_norm": 0.05019916966557503,
      "learning_rate": 3.740964530221464e-06,
      "loss": 0.0095,
      "step": 1919780
    },
    {
      "epoch": 3.1417948063339947,
      "grad_norm": 0.11236928403377533,
      "learning_rate": 3.7408986380079473e-06,
      "loss": 0.0085,
      "step": 1919800
    },
    {
      "epoch": 3.141827536772648,
      "grad_norm": 0.380716472864151,
      "learning_rate": 3.74083274579443e-06,
      "loss": 0.0096,
      "step": 1919820
    },
    {
      "epoch": 3.141860267211301,
      "grad_norm": 0.2944825291633606,
      "learning_rate": 3.7407668535809127e-06,
      "loss": 0.0151,
      "step": 1919840
    },
    {
      "epoch": 3.1418929976499546,
      "grad_norm": 0.37907645106315613,
      "learning_rate": 3.7407009613673955e-06,
      "loss": 0.0146,
      "step": 1919860
    },
    {
      "epoch": 3.1419257280886077,
      "grad_norm": 0.755403995513916,
      "learning_rate": 3.7406350691538782e-06,
      "loss": 0.0123,
      "step": 1919880
    },
    {
      "epoch": 3.1419584585272613,
      "grad_norm": 0.41581326723098755,
      "learning_rate": 3.7405691769403614e-06,
      "loss": 0.0108,
      "step": 1919900
    },
    {
      "epoch": 3.1419911889659144,
      "grad_norm": 0.10853295028209686,
      "learning_rate": 3.740503284726844e-06,
      "loss": 0.014,
      "step": 1919920
    },
    {
      "epoch": 3.142023919404568,
      "grad_norm": 0.12948772311210632,
      "learning_rate": 3.740437392513327e-06,
      "loss": 0.0127,
      "step": 1919940
    },
    {
      "epoch": 3.142056649843221,
      "grad_norm": 0.1418619155883789,
      "learning_rate": 3.7403715002998096e-06,
      "loss": 0.008,
      "step": 1919960
    },
    {
      "epoch": 3.1420893802818743,
      "grad_norm": 0.14945439994335175,
      "learning_rate": 3.7403056080862928e-06,
      "loss": 0.0138,
      "step": 1919980
    },
    {
      "epoch": 3.142122110720528,
      "grad_norm": 0.46219176054000854,
      "learning_rate": 3.7402397158727755e-06,
      "loss": 0.0115,
      "step": 1920000
    },
    {
      "epoch": 3.142154841159181,
      "grad_norm": 0.1940804272890091,
      "learning_rate": 3.7401738236592582e-06,
      "loss": 0.0094,
      "step": 1920020
    },
    {
      "epoch": 3.1421875715978347,
      "grad_norm": 0.1730027198791504,
      "learning_rate": 3.740107931445741e-06,
      "loss": 0.0104,
      "step": 1920040
    },
    {
      "epoch": 3.142220302036488,
      "grad_norm": 0.10914131999015808,
      "learning_rate": 3.740042039232224e-06,
      "loss": 0.0135,
      "step": 1920060
    },
    {
      "epoch": 3.1422530324751414,
      "grad_norm": 0.13664332032203674,
      "learning_rate": 3.7399761470187073e-06,
      "loss": 0.0133,
      "step": 1920080
    },
    {
      "epoch": 3.1422857629137946,
      "grad_norm": 0.1335470825433731,
      "learning_rate": 3.73991025480519e-06,
      "loss": 0.0129,
      "step": 1920100
    },
    {
      "epoch": 3.1423184933524477,
      "grad_norm": 0.10160281509160995,
      "learning_rate": 3.7398443625916732e-06,
      "loss": 0.0085,
      "step": 1920120
    },
    {
      "epoch": 3.1423512237911013,
      "grad_norm": 0.48839208483695984,
      "learning_rate": 3.739778470378156e-06,
      "loss": 0.0142,
      "step": 1920140
    },
    {
      "epoch": 3.1423839542297545,
      "grad_norm": 0.368226021528244,
      "learning_rate": 3.7397125781646387e-06,
      "loss": 0.0107,
      "step": 1920160
    },
    {
      "epoch": 3.142416684668408,
      "grad_norm": 0.232844278216362,
      "learning_rate": 3.7396466859511214e-06,
      "loss": 0.0141,
      "step": 1920180
    },
    {
      "epoch": 3.142449415107061,
      "grad_norm": 0.2444998323917389,
      "learning_rate": 3.7395807937376046e-06,
      "loss": 0.0165,
      "step": 1920200
    },
    {
      "epoch": 3.142482145545715,
      "grad_norm": 0.09386464208364487,
      "learning_rate": 3.7395149015240873e-06,
      "loss": 0.0157,
      "step": 1920220
    },
    {
      "epoch": 3.142514875984368,
      "grad_norm": 0.2266400009393692,
      "learning_rate": 3.73944900931057e-06,
      "loss": 0.0141,
      "step": 1920240
    },
    {
      "epoch": 3.142547606423021,
      "grad_norm": 0.05632665380835533,
      "learning_rate": 3.739383117097053e-06,
      "loss": 0.0133,
      "step": 1920260
    },
    {
      "epoch": 3.1425803368616747,
      "grad_norm": 1.9958840608596802,
      "learning_rate": 3.7393172248835356e-06,
      "loss": 0.0136,
      "step": 1920280
    },
    {
      "epoch": 3.142613067300328,
      "grad_norm": 0.5364426970481873,
      "learning_rate": 3.7392513326700187e-06,
      "loss": 0.0118,
      "step": 1920300
    },
    {
      "epoch": 3.1426457977389815,
      "grad_norm": 0.3984893262386322,
      "learning_rate": 3.7391854404565015e-06,
      "loss": 0.0114,
      "step": 1920320
    },
    {
      "epoch": 3.1426785281776346,
      "grad_norm": 0.28637367486953735,
      "learning_rate": 3.739119548242984e-06,
      "loss": 0.016,
      "step": 1920340
    },
    {
      "epoch": 3.1427112586162878,
      "grad_norm": 0.5005215406417847,
      "learning_rate": 3.739053656029467e-06,
      "loss": 0.0164,
      "step": 1920360
    },
    {
      "epoch": 3.1427439890549413,
      "grad_norm": 0.09178178757429123,
      "learning_rate": 3.73898776381595e-06,
      "loss": 0.0151,
      "step": 1920380
    },
    {
      "epoch": 3.1427767194935945,
      "grad_norm": 0.18524986505508423,
      "learning_rate": 3.738921871602433e-06,
      "loss": 0.0154,
      "step": 1920400
    },
    {
      "epoch": 3.142809449932248,
      "grad_norm": 0.38901984691619873,
      "learning_rate": 3.7388559793889156e-06,
      "loss": 0.0154,
      "step": 1920420
    },
    {
      "epoch": 3.1428421803709012,
      "grad_norm": 0.0868668332695961,
      "learning_rate": 3.738790087175399e-06,
      "loss": 0.0111,
      "step": 1920440
    },
    {
      "epoch": 3.142874910809555,
      "grad_norm": 0.22075684368610382,
      "learning_rate": 3.738724194961882e-06,
      "loss": 0.0139,
      "step": 1920460
    },
    {
      "epoch": 3.142907641248208,
      "grad_norm": 0.46944624185562134,
      "learning_rate": 3.7386583027483646e-06,
      "loss": 0.0122,
      "step": 1920480
    },
    {
      "epoch": 3.142940371686861,
      "grad_norm": 0.17832455039024353,
      "learning_rate": 3.7385924105348474e-06,
      "loss": 0.015,
      "step": 1920500
    },
    {
      "epoch": 3.1429731021255147,
      "grad_norm": 1.0768574476242065,
      "learning_rate": 3.7385265183213306e-06,
      "loss": 0.0185,
      "step": 1920520
    },
    {
      "epoch": 3.143005832564168,
      "grad_norm": 0.2562313675880432,
      "learning_rate": 3.7384606261078133e-06,
      "loss": 0.0141,
      "step": 1920540
    },
    {
      "epoch": 3.1430385630028215,
      "grad_norm": 0.18968689441680908,
      "learning_rate": 3.738394733894296e-06,
      "loss": 0.0099,
      "step": 1920560
    },
    {
      "epoch": 3.1430712934414746,
      "grad_norm": 0.28639835119247437,
      "learning_rate": 3.7383288416807788e-06,
      "loss": 0.0148,
      "step": 1920580
    },
    {
      "epoch": 3.143104023880128,
      "grad_norm": 0.3760656714439392,
      "learning_rate": 3.738262949467262e-06,
      "loss": 0.0074,
      "step": 1920600
    },
    {
      "epoch": 3.1431367543187814,
      "grad_norm": 0.1399942934513092,
      "learning_rate": 3.7381970572537447e-06,
      "loss": 0.0118,
      "step": 1920620
    },
    {
      "epoch": 3.1431694847574345,
      "grad_norm": 1.5628732442855835,
      "learning_rate": 3.7381311650402274e-06,
      "loss": 0.0124,
      "step": 1920640
    },
    {
      "epoch": 3.143202215196088,
      "grad_norm": 0.4640091061592102,
      "learning_rate": 3.73806527282671e-06,
      "loss": 0.0131,
      "step": 1920660
    },
    {
      "epoch": 3.1432349456347413,
      "grad_norm": 0.24661029875278473,
      "learning_rate": 3.7379993806131933e-06,
      "loss": 0.0123,
      "step": 1920680
    },
    {
      "epoch": 3.143267676073395,
      "grad_norm": 0.06531935930252075,
      "learning_rate": 3.737933488399676e-06,
      "loss": 0.0116,
      "step": 1920700
    },
    {
      "epoch": 3.143300406512048,
      "grad_norm": 0.19983531534671783,
      "learning_rate": 3.737867596186159e-06,
      "loss": 0.0085,
      "step": 1920720
    },
    {
      "epoch": 3.1433331369507016,
      "grad_norm": 0.2989270091056824,
      "learning_rate": 3.7378017039726415e-06,
      "loss": 0.011,
      "step": 1920740
    },
    {
      "epoch": 3.1433658673893548,
      "grad_norm": 0.267145037651062,
      "learning_rate": 3.7377358117591243e-06,
      "loss": 0.0136,
      "step": 1920760
    },
    {
      "epoch": 3.143398597828008,
      "grad_norm": 0.20279984176158905,
      "learning_rate": 3.737669919545608e-06,
      "loss": 0.0083,
      "step": 1920780
    },
    {
      "epoch": 3.1434313282666615,
      "grad_norm": 0.3907221555709839,
      "learning_rate": 3.7376040273320906e-06,
      "loss": 0.0149,
      "step": 1920800
    },
    {
      "epoch": 3.1434640587053146,
      "grad_norm": 0.2577707767486572,
      "learning_rate": 3.7375381351185733e-06,
      "loss": 0.0117,
      "step": 1920820
    },
    {
      "epoch": 3.1434967891439682,
      "grad_norm": 0.3197046220302582,
      "learning_rate": 3.7374722429050565e-06,
      "loss": 0.0123,
      "step": 1920840
    },
    {
      "epoch": 3.1435295195826214,
      "grad_norm": 0.299672931432724,
      "learning_rate": 3.7374063506915392e-06,
      "loss": 0.0121,
      "step": 1920860
    },
    {
      "epoch": 3.143562250021275,
      "grad_norm": 0.315280020236969,
      "learning_rate": 3.737340458478022e-06,
      "loss": 0.0092,
      "step": 1920880
    },
    {
      "epoch": 3.143594980459928,
      "grad_norm": 0.14054180681705475,
      "learning_rate": 3.7372745662645047e-06,
      "loss": 0.0081,
      "step": 1920900
    },
    {
      "epoch": 3.1436277108985813,
      "grad_norm": 0.20899982750415802,
      "learning_rate": 3.737208674050988e-06,
      "loss": 0.0087,
      "step": 1920920
    },
    {
      "epoch": 3.143660441337235,
      "grad_norm": 0.2254284769296646,
      "learning_rate": 3.7371427818374706e-06,
      "loss": 0.0119,
      "step": 1920940
    },
    {
      "epoch": 3.143693171775888,
      "grad_norm": 0.6363528370857239,
      "learning_rate": 3.7370768896239534e-06,
      "loss": 0.0079,
      "step": 1920960
    },
    {
      "epoch": 3.1437259022145416,
      "grad_norm": 0.5147839188575745,
      "learning_rate": 3.737010997410436e-06,
      "loss": 0.0147,
      "step": 1920980
    },
    {
      "epoch": 3.1437586326531948,
      "grad_norm": 0.2727881669998169,
      "learning_rate": 3.7369451051969193e-06,
      "loss": 0.0065,
      "step": 1921000
    },
    {
      "epoch": 3.1437913630918484,
      "grad_norm": 0.5798503756523132,
      "learning_rate": 3.736879212983402e-06,
      "loss": 0.0115,
      "step": 1921020
    },
    {
      "epoch": 3.1438240935305015,
      "grad_norm": 0.5699864625930786,
      "learning_rate": 3.7368133207698848e-06,
      "loss": 0.0151,
      "step": 1921040
    },
    {
      "epoch": 3.1438568239691547,
      "grad_norm": 0.43290746212005615,
      "learning_rate": 3.7367474285563675e-06,
      "loss": 0.0056,
      "step": 1921060
    },
    {
      "epoch": 3.1438895544078083,
      "grad_norm": 0.4585755169391632,
      "learning_rate": 3.7366815363428507e-06,
      "loss": 0.0128,
      "step": 1921080
    },
    {
      "epoch": 3.1439222848464614,
      "grad_norm": 0.33653131127357483,
      "learning_rate": 3.7366156441293334e-06,
      "loss": 0.0128,
      "step": 1921100
    },
    {
      "epoch": 3.143955015285115,
      "grad_norm": 0.4059774577617645,
      "learning_rate": 3.736549751915816e-06,
      "loss": 0.0101,
      "step": 1921120
    },
    {
      "epoch": 3.143987745723768,
      "grad_norm": 1.047481894493103,
      "learning_rate": 3.7364838597022997e-06,
      "loss": 0.0113,
      "step": 1921140
    },
    {
      "epoch": 3.1440204761624218,
      "grad_norm": 0.24741066992282867,
      "learning_rate": 3.7364179674887825e-06,
      "loss": 0.01,
      "step": 1921160
    },
    {
      "epoch": 3.144053206601075,
      "grad_norm": 2.625913381576538,
      "learning_rate": 3.736352075275265e-06,
      "loss": 0.0117,
      "step": 1921180
    },
    {
      "epoch": 3.144085937039728,
      "grad_norm": 0.12846024334430695,
      "learning_rate": 3.736286183061748e-06,
      "loss": 0.013,
      "step": 1921200
    },
    {
      "epoch": 3.1441186674783816,
      "grad_norm": 0.1757339984178543,
      "learning_rate": 3.736220290848231e-06,
      "loss": 0.0124,
      "step": 1921220
    },
    {
      "epoch": 3.144151397917035,
      "grad_norm": 0.3447723984718323,
      "learning_rate": 3.736154398634714e-06,
      "loss": 0.0146,
      "step": 1921240
    },
    {
      "epoch": 3.1441841283556884,
      "grad_norm": 0.16124092042446136,
      "learning_rate": 3.7360885064211966e-06,
      "loss": 0.02,
      "step": 1921260
    },
    {
      "epoch": 3.1442168587943415,
      "grad_norm": 0.27317118644714355,
      "learning_rate": 3.7360226142076793e-06,
      "loss": 0.0111,
      "step": 1921280
    },
    {
      "epoch": 3.144249589232995,
      "grad_norm": 0.22767990827560425,
      "learning_rate": 3.735956721994162e-06,
      "loss": 0.016,
      "step": 1921300
    },
    {
      "epoch": 3.1442823196716483,
      "grad_norm": 0.19477581977844238,
      "learning_rate": 3.7358908297806452e-06,
      "loss": 0.0155,
      "step": 1921320
    },
    {
      "epoch": 3.1443150501103014,
      "grad_norm": 0.0968048945069313,
      "learning_rate": 3.735824937567128e-06,
      "loss": 0.0083,
      "step": 1921340
    },
    {
      "epoch": 3.144347780548955,
      "grad_norm": 0.20670448243618011,
      "learning_rate": 3.7357590453536107e-06,
      "loss": 0.0133,
      "step": 1921360
    },
    {
      "epoch": 3.144380510987608,
      "grad_norm": 0.35095569491386414,
      "learning_rate": 3.7356931531400934e-06,
      "loss": 0.0129,
      "step": 1921380
    },
    {
      "epoch": 3.1444132414262618,
      "grad_norm": 0.340067058801651,
      "learning_rate": 3.7356272609265766e-06,
      "loss": 0.0118,
      "step": 1921400
    },
    {
      "epoch": 3.144445971864915,
      "grad_norm": 0.25535255670547485,
      "learning_rate": 3.7355613687130593e-06,
      "loss": 0.009,
      "step": 1921420
    },
    {
      "epoch": 3.144478702303568,
      "grad_norm": 0.6444596648216248,
      "learning_rate": 3.735495476499542e-06,
      "loss": 0.0146,
      "step": 1921440
    },
    {
      "epoch": 3.1445114327422217,
      "grad_norm": 0.40407446026802063,
      "learning_rate": 3.735429584286025e-06,
      "loss": 0.0096,
      "step": 1921460
    },
    {
      "epoch": 3.144544163180875,
      "grad_norm": 0.09456676244735718,
      "learning_rate": 3.7353636920725084e-06,
      "loss": 0.0088,
      "step": 1921480
    },
    {
      "epoch": 3.1445768936195284,
      "grad_norm": 0.19648653268814087,
      "learning_rate": 3.735297799858991e-06,
      "loss": 0.0099,
      "step": 1921500
    },
    {
      "epoch": 3.1446096240581816,
      "grad_norm": 0.08038979768753052,
      "learning_rate": 3.735231907645474e-06,
      "loss": 0.0096,
      "step": 1921520
    },
    {
      "epoch": 3.144642354496835,
      "grad_norm": 0.12826381623744965,
      "learning_rate": 3.735166015431957e-06,
      "loss": 0.0127,
      "step": 1921540
    },
    {
      "epoch": 3.1446750849354883,
      "grad_norm": 0.2213742882013321,
      "learning_rate": 3.73510012321844e-06,
      "loss": 0.0172,
      "step": 1921560
    },
    {
      "epoch": 3.1447078153741415,
      "grad_norm": 0.1567123532295227,
      "learning_rate": 3.7350342310049225e-06,
      "loss": 0.011,
      "step": 1921580
    },
    {
      "epoch": 3.144740545812795,
      "grad_norm": 0.20371945202350616,
      "learning_rate": 3.7349683387914053e-06,
      "loss": 0.0085,
      "step": 1921600
    },
    {
      "epoch": 3.144773276251448,
      "grad_norm": 1.3174456357955933,
      "learning_rate": 3.7349024465778884e-06,
      "loss": 0.0129,
      "step": 1921620
    },
    {
      "epoch": 3.144806006690102,
      "grad_norm": 0.24099355936050415,
      "learning_rate": 3.734836554364371e-06,
      "loss": 0.011,
      "step": 1921640
    },
    {
      "epoch": 3.144838737128755,
      "grad_norm": 0.5976872444152832,
      "learning_rate": 3.734770662150854e-06,
      "loss": 0.0128,
      "step": 1921660
    },
    {
      "epoch": 3.1448714675674085,
      "grad_norm": 0.4676327705383301,
      "learning_rate": 3.7347047699373367e-06,
      "loss": 0.0106,
      "step": 1921680
    },
    {
      "epoch": 3.1449041980060617,
      "grad_norm": 0.24095505475997925,
      "learning_rate": 3.73463887772382e-06,
      "loss": 0.0172,
      "step": 1921700
    },
    {
      "epoch": 3.144936928444715,
      "grad_norm": 0.29003453254699707,
      "learning_rate": 3.7345729855103026e-06,
      "loss": 0.0099,
      "step": 1921720
    },
    {
      "epoch": 3.1449696588833684,
      "grad_norm": 0.3061341643333435,
      "learning_rate": 3.7345070932967853e-06,
      "loss": 0.0132,
      "step": 1921740
    },
    {
      "epoch": 3.1450023893220216,
      "grad_norm": 0.6704312562942505,
      "learning_rate": 3.734441201083268e-06,
      "loss": 0.0119,
      "step": 1921760
    },
    {
      "epoch": 3.145035119760675,
      "grad_norm": 1.9924298524856567,
      "learning_rate": 3.7343753088697508e-06,
      "loss": 0.0101,
      "step": 1921780
    },
    {
      "epoch": 3.1450678501993283,
      "grad_norm": 0.07476600259542465,
      "learning_rate": 3.734309416656234e-06,
      "loss": 0.0061,
      "step": 1921800
    },
    {
      "epoch": 3.1451005806379815,
      "grad_norm": 0.4600462317466736,
      "learning_rate": 3.7342435244427167e-06,
      "loss": 0.0113,
      "step": 1921820
    },
    {
      "epoch": 3.145133311076635,
      "grad_norm": 0.19186456501483917,
      "learning_rate": 3.7341776322292e-06,
      "loss": 0.0108,
      "step": 1921840
    },
    {
      "epoch": 3.1451660415152882,
      "grad_norm": 0.19473610818386078,
      "learning_rate": 3.734111740015683e-06,
      "loss": 0.0077,
      "step": 1921860
    },
    {
      "epoch": 3.145198771953942,
      "grad_norm": 0.24818789958953857,
      "learning_rate": 3.7340458478021657e-06,
      "loss": 0.0162,
      "step": 1921880
    },
    {
      "epoch": 3.145231502392595,
      "grad_norm": 0.6206369400024414,
      "learning_rate": 3.7339799555886485e-06,
      "loss": 0.0096,
      "step": 1921900
    },
    {
      "epoch": 3.1452642328312486,
      "grad_norm": 0.1577504575252533,
      "learning_rate": 3.7339140633751312e-06,
      "loss": 0.0106,
      "step": 1921920
    },
    {
      "epoch": 3.1452969632699017,
      "grad_norm": 0.19281543791294098,
      "learning_rate": 3.7338481711616144e-06,
      "loss": 0.0095,
      "step": 1921940
    },
    {
      "epoch": 3.145329693708555,
      "grad_norm": 0.3687383532524109,
      "learning_rate": 3.733782278948097e-06,
      "loss": 0.0113,
      "step": 1921960
    },
    {
      "epoch": 3.1453624241472085,
      "grad_norm": 0.28133878111839294,
      "learning_rate": 3.73371638673458e-06,
      "loss": 0.0121,
      "step": 1921980
    },
    {
      "epoch": 3.1453951545858616,
      "grad_norm": 0.4680613577365875,
      "learning_rate": 3.7336504945210626e-06,
      "loss": 0.0124,
      "step": 1922000
    },
    {
      "epoch": 3.145427885024515,
      "grad_norm": 0.3526293635368347,
      "learning_rate": 3.7335846023075458e-06,
      "loss": 0.0116,
      "step": 1922020
    },
    {
      "epoch": 3.1454606154631684,
      "grad_norm": 0.4443676173686981,
      "learning_rate": 3.7335187100940285e-06,
      "loss": 0.0106,
      "step": 1922040
    },
    {
      "epoch": 3.145493345901822,
      "grad_norm": 0.6773539781570435,
      "learning_rate": 3.7334528178805113e-06,
      "loss": 0.0123,
      "step": 1922060
    },
    {
      "epoch": 3.145526076340475,
      "grad_norm": 0.438760370016098,
      "learning_rate": 3.733386925666994e-06,
      "loss": 0.0129,
      "step": 1922080
    },
    {
      "epoch": 3.1455588067791282,
      "grad_norm": 0.27692800760269165,
      "learning_rate": 3.733321033453477e-06,
      "loss": 0.0163,
      "step": 1922100
    },
    {
      "epoch": 3.145591537217782,
      "grad_norm": 0.4577905535697937,
      "learning_rate": 3.73325514123996e-06,
      "loss": 0.0166,
      "step": 1922120
    },
    {
      "epoch": 3.145624267656435,
      "grad_norm": 0.17847885191440582,
      "learning_rate": 3.7331892490264426e-06,
      "loss": 0.0076,
      "step": 1922140
    },
    {
      "epoch": 3.1456569980950886,
      "grad_norm": 0.1542658656835556,
      "learning_rate": 3.7331233568129254e-06,
      "loss": 0.0136,
      "step": 1922160
    },
    {
      "epoch": 3.1456897285337417,
      "grad_norm": 0.3011520504951477,
      "learning_rate": 3.733057464599408e-06,
      "loss": 0.0081,
      "step": 1922180
    },
    {
      "epoch": 3.1457224589723953,
      "grad_norm": 0.2434782236814499,
      "learning_rate": 3.7329915723858917e-06,
      "loss": 0.014,
      "step": 1922200
    },
    {
      "epoch": 3.1457551894110485,
      "grad_norm": 0.16270166635513306,
      "learning_rate": 3.7329256801723744e-06,
      "loss": 0.013,
      "step": 1922220
    },
    {
      "epoch": 3.1457879198497016,
      "grad_norm": 0.08101857453584671,
      "learning_rate": 3.7328597879588576e-06,
      "loss": 0.0115,
      "step": 1922240
    },
    {
      "epoch": 3.1458206502883552,
      "grad_norm": 0.29263994097709656,
      "learning_rate": 3.7327938957453403e-06,
      "loss": 0.0157,
      "step": 1922260
    },
    {
      "epoch": 3.1458533807270084,
      "grad_norm": 0.12302887439727783,
      "learning_rate": 3.732728003531823e-06,
      "loss": 0.0109,
      "step": 1922280
    },
    {
      "epoch": 3.145886111165662,
      "grad_norm": 0.2815174162387848,
      "learning_rate": 3.732662111318306e-06,
      "loss": 0.0104,
      "step": 1922300
    },
    {
      "epoch": 3.145918841604315,
      "grad_norm": 0.025246962904930115,
      "learning_rate": 3.7325962191047886e-06,
      "loss": 0.0103,
      "step": 1922320
    },
    {
      "epoch": 3.1459515720429687,
      "grad_norm": 0.10536913573741913,
      "learning_rate": 3.7325303268912717e-06,
      "loss": 0.0113,
      "step": 1922340
    },
    {
      "epoch": 3.145984302481622,
      "grad_norm": 0.1922658532857895,
      "learning_rate": 3.7324644346777545e-06,
      "loss": 0.0121,
      "step": 1922360
    },
    {
      "epoch": 3.146017032920275,
      "grad_norm": 0.050676941871643066,
      "learning_rate": 3.732398542464237e-06,
      "loss": 0.0103,
      "step": 1922380
    },
    {
      "epoch": 3.1460497633589286,
      "grad_norm": 0.3790489137172699,
      "learning_rate": 3.73233265025072e-06,
      "loss": 0.0111,
      "step": 1922400
    },
    {
      "epoch": 3.1460824937975818,
      "grad_norm": 0.14531724154949188,
      "learning_rate": 3.732266758037203e-06,
      "loss": 0.0174,
      "step": 1922420
    },
    {
      "epoch": 3.1461152242362354,
      "grad_norm": 0.3271680772304535,
      "learning_rate": 3.732200865823686e-06,
      "loss": 0.0109,
      "step": 1922440
    },
    {
      "epoch": 3.1461479546748885,
      "grad_norm": 0.2323051393032074,
      "learning_rate": 3.7321349736101686e-06,
      "loss": 0.0167,
      "step": 1922460
    },
    {
      "epoch": 3.146180685113542,
      "grad_norm": 0.8403530716896057,
      "learning_rate": 3.7320690813966513e-06,
      "loss": 0.0106,
      "step": 1922480
    },
    {
      "epoch": 3.1462134155521952,
      "grad_norm": 0.20508569478988647,
      "learning_rate": 3.7320031891831345e-06,
      "loss": 0.0125,
      "step": 1922500
    },
    {
      "epoch": 3.1462461459908484,
      "grad_norm": 0.2331201136112213,
      "learning_rate": 3.7319372969696172e-06,
      "loss": 0.0134,
      "step": 1922520
    },
    {
      "epoch": 3.146278876429502,
      "grad_norm": 0.3473442792892456,
      "learning_rate": 3.7318714047561004e-06,
      "loss": 0.0074,
      "step": 1922540
    },
    {
      "epoch": 3.146311606868155,
      "grad_norm": 0.20088908076286316,
      "learning_rate": 3.7318055125425836e-06,
      "loss": 0.0124,
      "step": 1922560
    },
    {
      "epoch": 3.1463443373068087,
      "grad_norm": 0.23556755483150482,
      "learning_rate": 3.7317396203290663e-06,
      "loss": 0.0124,
      "step": 1922580
    },
    {
      "epoch": 3.146377067745462,
      "grad_norm": 0.14629153907299042,
      "learning_rate": 3.731673728115549e-06,
      "loss": 0.0112,
      "step": 1922600
    },
    {
      "epoch": 3.1464097981841155,
      "grad_norm": 0.1703125387430191,
      "learning_rate": 3.7316078359020318e-06,
      "loss": 0.0114,
      "step": 1922620
    },
    {
      "epoch": 3.1464425286227686,
      "grad_norm": 0.29264092445373535,
      "learning_rate": 3.731541943688515e-06,
      "loss": 0.0124,
      "step": 1922640
    },
    {
      "epoch": 3.146475259061422,
      "grad_norm": 1.3053416013717651,
      "learning_rate": 3.7314760514749977e-06,
      "loss": 0.0134,
      "step": 1922660
    },
    {
      "epoch": 3.1465079895000754,
      "grad_norm": 0.40639859437942505,
      "learning_rate": 3.7314101592614804e-06,
      "loss": 0.0127,
      "step": 1922680
    },
    {
      "epoch": 3.1465407199387285,
      "grad_norm": 2.740226984024048,
      "learning_rate": 3.731344267047963e-06,
      "loss": 0.0098,
      "step": 1922700
    },
    {
      "epoch": 3.146573450377382,
      "grad_norm": 0.2704216539859772,
      "learning_rate": 3.731278374834446e-06,
      "loss": 0.0065,
      "step": 1922720
    },
    {
      "epoch": 3.1466061808160353,
      "grad_norm": 0.42683738470077515,
      "learning_rate": 3.731212482620929e-06,
      "loss": 0.0092,
      "step": 1922740
    },
    {
      "epoch": 3.146638911254689,
      "grad_norm": 0.15837521851062775,
      "learning_rate": 3.731146590407412e-06,
      "loss": 0.0123,
      "step": 1922760
    },
    {
      "epoch": 3.146671641693342,
      "grad_norm": 0.17772066593170166,
      "learning_rate": 3.7310806981938945e-06,
      "loss": 0.0128,
      "step": 1922780
    },
    {
      "epoch": 3.146704372131995,
      "grad_norm": 0.14260560274124146,
      "learning_rate": 3.7310148059803773e-06,
      "loss": 0.0113,
      "step": 1922800
    },
    {
      "epoch": 3.1467371025706488,
      "grad_norm": 0.24310199916362762,
      "learning_rate": 3.7309489137668604e-06,
      "loss": 0.009,
      "step": 1922820
    },
    {
      "epoch": 3.146769833009302,
      "grad_norm": 0.5922158360481262,
      "learning_rate": 3.730883021553343e-06,
      "loss": 0.0085,
      "step": 1922840
    },
    {
      "epoch": 3.1468025634479555,
      "grad_norm": 0.10326794534921646,
      "learning_rate": 3.730817129339826e-06,
      "loss": 0.012,
      "step": 1922860
    },
    {
      "epoch": 3.1468352938866087,
      "grad_norm": 0.1614644080400467,
      "learning_rate": 3.7307512371263087e-06,
      "loss": 0.0185,
      "step": 1922880
    },
    {
      "epoch": 3.146868024325262,
      "grad_norm": 0.14434204995632172,
      "learning_rate": 3.7306853449127923e-06,
      "loss": 0.0141,
      "step": 1922900
    },
    {
      "epoch": 3.1469007547639154,
      "grad_norm": 0.27123287320137024,
      "learning_rate": 3.730619452699275e-06,
      "loss": 0.0134,
      "step": 1922920
    },
    {
      "epoch": 3.1469334852025685,
      "grad_norm": 0.8933037519454956,
      "learning_rate": 3.7305535604857577e-06,
      "loss": 0.0113,
      "step": 1922940
    },
    {
      "epoch": 3.146966215641222,
      "grad_norm": 0.10485149919986725,
      "learning_rate": 3.730487668272241e-06,
      "loss": 0.0098,
      "step": 1922960
    },
    {
      "epoch": 3.1469989460798753,
      "grad_norm": 0.9629470109939575,
      "learning_rate": 3.7304217760587236e-06,
      "loss": 0.0132,
      "step": 1922980
    },
    {
      "epoch": 3.147031676518529,
      "grad_norm": 0.3101692795753479,
      "learning_rate": 3.7303558838452064e-06,
      "loss": 0.0141,
      "step": 1923000
    },
    {
      "epoch": 3.147064406957182,
      "grad_norm": 0.1044306606054306,
      "learning_rate": 3.730289991631689e-06,
      "loss": 0.0145,
      "step": 1923020
    },
    {
      "epoch": 3.147097137395835,
      "grad_norm": 0.14119353890419006,
      "learning_rate": 3.7302240994181723e-06,
      "loss": 0.0108,
      "step": 1923040
    },
    {
      "epoch": 3.147129867834489,
      "grad_norm": 0.08822262287139893,
      "learning_rate": 3.730158207204655e-06,
      "loss": 0.0078,
      "step": 1923060
    },
    {
      "epoch": 3.147162598273142,
      "grad_norm": 0.22109602391719818,
      "learning_rate": 3.7300923149911378e-06,
      "loss": 0.0094,
      "step": 1923080
    },
    {
      "epoch": 3.1471953287117955,
      "grad_norm": 0.12965475022792816,
      "learning_rate": 3.7300264227776205e-06,
      "loss": 0.01,
      "step": 1923100
    },
    {
      "epoch": 3.1472280591504487,
      "grad_norm": 1.2756257057189941,
      "learning_rate": 3.7299605305641037e-06,
      "loss": 0.0097,
      "step": 1923120
    },
    {
      "epoch": 3.1472607895891023,
      "grad_norm": 0.37157416343688965,
      "learning_rate": 3.7298946383505864e-06,
      "loss": 0.0104,
      "step": 1923140
    },
    {
      "epoch": 3.1472935200277554,
      "grad_norm": 0.23324504494667053,
      "learning_rate": 3.729828746137069e-06,
      "loss": 0.0116,
      "step": 1923160
    },
    {
      "epoch": 3.1473262504664086,
      "grad_norm": 0.1571066677570343,
      "learning_rate": 3.729762853923552e-06,
      "loss": 0.0095,
      "step": 1923180
    },
    {
      "epoch": 3.147358980905062,
      "grad_norm": 0.2036566585302353,
      "learning_rate": 3.7296969617100346e-06,
      "loss": 0.0173,
      "step": 1923200
    },
    {
      "epoch": 3.1473917113437153,
      "grad_norm": 0.1566629856824875,
      "learning_rate": 3.7296310694965178e-06,
      "loss": 0.0134,
      "step": 1923220
    },
    {
      "epoch": 3.147424441782369,
      "grad_norm": 0.22326277196407318,
      "learning_rate": 3.729565177283001e-06,
      "loss": 0.0171,
      "step": 1923240
    },
    {
      "epoch": 3.147457172221022,
      "grad_norm": 0.1683807522058487,
      "learning_rate": 3.7294992850694837e-06,
      "loss": 0.0164,
      "step": 1923260
    },
    {
      "epoch": 3.147489902659675,
      "grad_norm": 0.09611227363348007,
      "learning_rate": 3.729433392855967e-06,
      "loss": 0.014,
      "step": 1923280
    },
    {
      "epoch": 3.147522633098329,
      "grad_norm": 0.2641400098800659,
      "learning_rate": 3.7293675006424496e-06,
      "loss": 0.013,
      "step": 1923300
    },
    {
      "epoch": 3.147555363536982,
      "grad_norm": 0.24810998141765594,
      "learning_rate": 3.7293016084289323e-06,
      "loss": 0.0088,
      "step": 1923320
    },
    {
      "epoch": 3.1475880939756355,
      "grad_norm": 0.22964297235012054,
      "learning_rate": 3.729235716215415e-06,
      "loss": 0.0112,
      "step": 1923340
    },
    {
      "epoch": 3.1476208244142887,
      "grad_norm": 0.19108553230762482,
      "learning_rate": 3.7291698240018982e-06,
      "loss": 0.0081,
      "step": 1923360
    },
    {
      "epoch": 3.1476535548529423,
      "grad_norm": 0.24711431562900543,
      "learning_rate": 3.729103931788381e-06,
      "loss": 0.0102,
      "step": 1923380
    },
    {
      "epoch": 3.1476862852915954,
      "grad_norm": 0.3834126889705658,
      "learning_rate": 3.7290380395748637e-06,
      "loss": 0.0121,
      "step": 1923400
    },
    {
      "epoch": 3.1477190157302486,
      "grad_norm": 0.5154266953468323,
      "learning_rate": 3.7289721473613465e-06,
      "loss": 0.0164,
      "step": 1923420
    },
    {
      "epoch": 3.147751746168902,
      "grad_norm": 0.22873543202877045,
      "learning_rate": 3.7289062551478296e-06,
      "loss": 0.012,
      "step": 1923440
    },
    {
      "epoch": 3.1477844766075553,
      "grad_norm": 1.0010792016983032,
      "learning_rate": 3.7288403629343124e-06,
      "loss": 0.0082,
      "step": 1923460
    },
    {
      "epoch": 3.147817207046209,
      "grad_norm": 0.30457350611686707,
      "learning_rate": 3.728774470720795e-06,
      "loss": 0.0094,
      "step": 1923480
    },
    {
      "epoch": 3.147849937484862,
      "grad_norm": 0.33223316073417664,
      "learning_rate": 3.728708578507278e-06,
      "loss": 0.0155,
      "step": 1923500
    },
    {
      "epoch": 3.1478826679235157,
      "grad_norm": 0.10413762181997299,
      "learning_rate": 3.728642686293761e-06,
      "loss": 0.0099,
      "step": 1923520
    },
    {
      "epoch": 3.147915398362169,
      "grad_norm": 0.11531803011894226,
      "learning_rate": 3.7285767940802437e-06,
      "loss": 0.0107,
      "step": 1923540
    },
    {
      "epoch": 3.147948128800822,
      "grad_norm": 0.18850037455558777,
      "learning_rate": 3.7285109018667265e-06,
      "loss": 0.008,
      "step": 1923560
    },
    {
      "epoch": 3.1479808592394756,
      "grad_norm": 1.7836066484451294,
      "learning_rate": 3.7284450096532092e-06,
      "loss": 0.0112,
      "step": 1923580
    },
    {
      "epoch": 3.1480135896781287,
      "grad_norm": 0.13420610129833221,
      "learning_rate": 3.728379117439693e-06,
      "loss": 0.0107,
      "step": 1923600
    },
    {
      "epoch": 3.1480463201167823,
      "grad_norm": 0.32382524013519287,
      "learning_rate": 3.7283132252261755e-06,
      "loss": 0.0119,
      "step": 1923620
    },
    {
      "epoch": 3.1480790505554355,
      "grad_norm": 0.08160237222909927,
      "learning_rate": 3.7282473330126583e-06,
      "loss": 0.0098,
      "step": 1923640
    },
    {
      "epoch": 3.148111780994089,
      "grad_norm": 0.5211453437805176,
      "learning_rate": 3.7281814407991414e-06,
      "loss": 0.0088,
      "step": 1923660
    },
    {
      "epoch": 3.148144511432742,
      "grad_norm": 0.26020216941833496,
      "learning_rate": 3.728115548585624e-06,
      "loss": 0.0092,
      "step": 1923680
    },
    {
      "epoch": 3.1481772418713954,
      "grad_norm": 0.24695564806461334,
      "learning_rate": 3.728049656372107e-06,
      "loss": 0.0127,
      "step": 1923700
    },
    {
      "epoch": 3.148209972310049,
      "grad_norm": 0.3524583876132965,
      "learning_rate": 3.7279837641585897e-06,
      "loss": 0.0094,
      "step": 1923720
    },
    {
      "epoch": 3.148242702748702,
      "grad_norm": 0.36983826756477356,
      "learning_rate": 3.7279178719450724e-06,
      "loss": 0.0118,
      "step": 1923740
    },
    {
      "epoch": 3.1482754331873557,
      "grad_norm": 0.18054239451885223,
      "learning_rate": 3.7278519797315556e-06,
      "loss": 0.0127,
      "step": 1923760
    },
    {
      "epoch": 3.148308163626009,
      "grad_norm": 0.48650145530700684,
      "learning_rate": 3.7277860875180383e-06,
      "loss": 0.0086,
      "step": 1923780
    },
    {
      "epoch": 3.1483408940646624,
      "grad_norm": 0.14989039301872253,
      "learning_rate": 3.727720195304521e-06,
      "loss": 0.0156,
      "step": 1923800
    },
    {
      "epoch": 3.1483736245033156,
      "grad_norm": 0.4809193015098572,
      "learning_rate": 3.7276543030910038e-06,
      "loss": 0.0107,
      "step": 1923820
    },
    {
      "epoch": 3.1484063549419687,
      "grad_norm": 0.062240611761808395,
      "learning_rate": 3.727588410877487e-06,
      "loss": 0.0146,
      "step": 1923840
    },
    {
      "epoch": 3.1484390853806223,
      "grad_norm": 0.279765248298645,
      "learning_rate": 3.7275225186639697e-06,
      "loss": 0.0081,
      "step": 1923860
    },
    {
      "epoch": 3.1484718158192755,
      "grad_norm": 0.48644742369651794,
      "learning_rate": 3.7274566264504524e-06,
      "loss": 0.0125,
      "step": 1923880
    },
    {
      "epoch": 3.148504546257929,
      "grad_norm": 0.3648759126663208,
      "learning_rate": 3.727390734236935e-06,
      "loss": 0.0106,
      "step": 1923900
    },
    {
      "epoch": 3.1485372766965822,
      "grad_norm": 0.21220600605010986,
      "learning_rate": 3.7273248420234183e-06,
      "loss": 0.007,
      "step": 1923920
    },
    {
      "epoch": 3.148570007135236,
      "grad_norm": 0.7195844650268555,
      "learning_rate": 3.727258949809901e-06,
      "loss": 0.0134,
      "step": 1923940
    },
    {
      "epoch": 3.148602737573889,
      "grad_norm": 0.35830143094062805,
      "learning_rate": 3.7271930575963842e-06,
      "loss": 0.0103,
      "step": 1923960
    },
    {
      "epoch": 3.148635468012542,
      "grad_norm": 0.11463184654712677,
      "learning_rate": 3.7271271653828674e-06,
      "loss": 0.0098,
      "step": 1923980
    },
    {
      "epoch": 3.1486681984511957,
      "grad_norm": 0.08441643416881561,
      "learning_rate": 3.72706127316935e-06,
      "loss": 0.0087,
      "step": 1924000
    },
    {
      "epoch": 3.148700928889849,
      "grad_norm": 0.11651713401079178,
      "learning_rate": 3.726995380955833e-06,
      "loss": 0.0104,
      "step": 1924020
    },
    {
      "epoch": 3.1487336593285025,
      "grad_norm": 0.32114318013191223,
      "learning_rate": 3.7269294887423156e-06,
      "loss": 0.0105,
      "step": 1924040
    },
    {
      "epoch": 3.1487663897671556,
      "grad_norm": 0.3575108051300049,
      "learning_rate": 3.7268635965287988e-06,
      "loss": 0.0082,
      "step": 1924060
    },
    {
      "epoch": 3.148799120205809,
      "grad_norm": 0.23207947611808777,
      "learning_rate": 3.7267977043152815e-06,
      "loss": 0.0149,
      "step": 1924080
    },
    {
      "epoch": 3.1488318506444624,
      "grad_norm": 0.2417958676815033,
      "learning_rate": 3.7267318121017643e-06,
      "loss": 0.0151,
      "step": 1924100
    },
    {
      "epoch": 3.1488645810831155,
      "grad_norm": 0.27496570348739624,
      "learning_rate": 3.726665919888247e-06,
      "loss": 0.0127,
      "step": 1924120
    },
    {
      "epoch": 3.148897311521769,
      "grad_norm": 0.22246405482292175,
      "learning_rate": 3.7266000276747297e-06,
      "loss": 0.0093,
      "step": 1924140
    },
    {
      "epoch": 3.1489300419604223,
      "grad_norm": 0.3061963617801666,
      "learning_rate": 3.726534135461213e-06,
      "loss": 0.0102,
      "step": 1924160
    },
    {
      "epoch": 3.148962772399076,
      "grad_norm": 0.3625969886779785,
      "learning_rate": 3.7264682432476956e-06,
      "loss": 0.0112,
      "step": 1924180
    },
    {
      "epoch": 3.148995502837729,
      "grad_norm": 3.2288224697113037,
      "learning_rate": 3.7264023510341784e-06,
      "loss": 0.0074,
      "step": 1924200
    },
    {
      "epoch": 3.1490282332763826,
      "grad_norm": 0.597385048866272,
      "learning_rate": 3.726336458820661e-06,
      "loss": 0.0164,
      "step": 1924220
    },
    {
      "epoch": 3.1490609637150357,
      "grad_norm": 0.3514634966850281,
      "learning_rate": 3.7262705666071443e-06,
      "loss": 0.0111,
      "step": 1924240
    },
    {
      "epoch": 3.149093694153689,
      "grad_norm": 0.7686677575111389,
      "learning_rate": 3.726204674393627e-06,
      "loss": 0.0142,
      "step": 1924260
    },
    {
      "epoch": 3.1491264245923425,
      "grad_norm": 0.6210513114929199,
      "learning_rate": 3.7261387821801098e-06,
      "loss": 0.0101,
      "step": 1924280
    },
    {
      "epoch": 3.1491591550309956,
      "grad_norm": 0.0759740099310875,
      "learning_rate": 3.7260728899665934e-06,
      "loss": 0.0136,
      "step": 1924300
    },
    {
      "epoch": 3.1491918854696492,
      "grad_norm": 0.9983124732971191,
      "learning_rate": 3.726006997753076e-06,
      "loss": 0.0132,
      "step": 1924320
    },
    {
      "epoch": 3.1492246159083024,
      "grad_norm": 0.34987473487854004,
      "learning_rate": 3.725941105539559e-06,
      "loss": 0.0133,
      "step": 1924340
    },
    {
      "epoch": 3.149257346346956,
      "grad_norm": 0.574876606464386,
      "learning_rate": 3.7258752133260416e-06,
      "loss": 0.0131,
      "step": 1924360
    },
    {
      "epoch": 3.149290076785609,
      "grad_norm": 0.1974456012248993,
      "learning_rate": 3.7258093211125247e-06,
      "loss": 0.017,
      "step": 1924380
    },
    {
      "epoch": 3.1493228072242623,
      "grad_norm": 0.6628038287162781,
      "learning_rate": 3.7257434288990075e-06,
      "loss": 0.0086,
      "step": 1924400
    },
    {
      "epoch": 3.149355537662916,
      "grad_norm": 0.9713791012763977,
      "learning_rate": 3.7256775366854902e-06,
      "loss": 0.0097,
      "step": 1924420
    },
    {
      "epoch": 3.149388268101569,
      "grad_norm": 0.1316877007484436,
      "learning_rate": 3.725611644471973e-06,
      "loss": 0.011,
      "step": 1924440
    },
    {
      "epoch": 3.1494209985402226,
      "grad_norm": 1.3524940013885498,
      "learning_rate": 3.725545752258456e-06,
      "loss": 0.0129,
      "step": 1924460
    },
    {
      "epoch": 3.1494537289788758,
      "grad_norm": 0.5087388157844543,
      "learning_rate": 3.725479860044939e-06,
      "loss": 0.0092,
      "step": 1924480
    },
    {
      "epoch": 3.149486459417529,
      "grad_norm": 0.22982317209243774,
      "learning_rate": 3.7254139678314216e-06,
      "loss": 0.0093,
      "step": 1924500
    },
    {
      "epoch": 3.1495191898561825,
      "grad_norm": 0.44391268491744995,
      "learning_rate": 3.7253480756179043e-06,
      "loss": 0.0106,
      "step": 1924520
    },
    {
      "epoch": 3.1495519202948357,
      "grad_norm": 0.39811962842941284,
      "learning_rate": 3.7252821834043875e-06,
      "loss": 0.0164,
      "step": 1924540
    },
    {
      "epoch": 3.1495846507334893,
      "grad_norm": 0.41825106739997864,
      "learning_rate": 3.7252162911908702e-06,
      "loss": 0.0105,
      "step": 1924560
    },
    {
      "epoch": 3.1496173811721424,
      "grad_norm": 0.21823398768901825,
      "learning_rate": 3.725150398977353e-06,
      "loss": 0.0169,
      "step": 1924580
    },
    {
      "epoch": 3.149650111610796,
      "grad_norm": 0.11206488311290741,
      "learning_rate": 3.7250845067638357e-06,
      "loss": 0.01,
      "step": 1924600
    },
    {
      "epoch": 3.149682842049449,
      "grad_norm": 0.48382309079170227,
      "learning_rate": 3.7250186145503185e-06,
      "loss": 0.012,
      "step": 1924620
    },
    {
      "epoch": 3.1497155724881023,
      "grad_norm": 0.32845109701156616,
      "learning_rate": 3.7249527223368016e-06,
      "loss": 0.0104,
      "step": 1924640
    },
    {
      "epoch": 3.149748302926756,
      "grad_norm": 0.3758266270160675,
      "learning_rate": 3.7248868301232848e-06,
      "loss": 0.0141,
      "step": 1924660
    },
    {
      "epoch": 3.149781033365409,
      "grad_norm": 0.16428174078464508,
      "learning_rate": 3.7248209379097675e-06,
      "loss": 0.0099,
      "step": 1924680
    },
    {
      "epoch": 3.1498137638040626,
      "grad_norm": 2.3130581378936768,
      "learning_rate": 3.7247550456962507e-06,
      "loss": 0.0147,
      "step": 1924700
    },
    {
      "epoch": 3.149846494242716,
      "grad_norm": 0.5741351842880249,
      "learning_rate": 3.7246891534827334e-06,
      "loss": 0.012,
      "step": 1924720
    },
    {
      "epoch": 3.1498792246813694,
      "grad_norm": 0.4038220942020416,
      "learning_rate": 3.724623261269216e-06,
      "loss": 0.012,
      "step": 1924740
    },
    {
      "epoch": 3.1499119551200225,
      "grad_norm": 0.15226460993289948,
      "learning_rate": 3.724557369055699e-06,
      "loss": 0.0076,
      "step": 1924760
    },
    {
      "epoch": 3.1499446855586757,
      "grad_norm": 0.37869635224342346,
      "learning_rate": 3.724491476842182e-06,
      "loss": 0.0101,
      "step": 1924780
    },
    {
      "epoch": 3.1499774159973293,
      "grad_norm": 0.3799130916595459,
      "learning_rate": 3.724425584628665e-06,
      "loss": 0.0118,
      "step": 1924800
    },
    {
      "epoch": 3.1500101464359824,
      "grad_norm": 0.245377779006958,
      "learning_rate": 3.7243596924151476e-06,
      "loss": 0.0106,
      "step": 1924820
    },
    {
      "epoch": 3.150042876874636,
      "grad_norm": 0.21188026666641235,
      "learning_rate": 3.7242938002016303e-06,
      "loss": 0.0073,
      "step": 1924840
    },
    {
      "epoch": 3.150075607313289,
      "grad_norm": 0.14777155220508575,
      "learning_rate": 3.7242279079881135e-06,
      "loss": 0.0079,
      "step": 1924860
    },
    {
      "epoch": 3.1501083377519423,
      "grad_norm": 0.6759167313575745,
      "learning_rate": 3.724162015774596e-06,
      "loss": 0.0091,
      "step": 1924880
    },
    {
      "epoch": 3.150141068190596,
      "grad_norm": 0.303046315908432,
      "learning_rate": 3.724096123561079e-06,
      "loss": 0.0088,
      "step": 1924900
    },
    {
      "epoch": 3.150173798629249,
      "grad_norm": 0.3690112829208374,
      "learning_rate": 3.7240302313475617e-06,
      "loss": 0.009,
      "step": 1924920
    },
    {
      "epoch": 3.1502065290679027,
      "grad_norm": 0.5547389984130859,
      "learning_rate": 3.723964339134045e-06,
      "loss": 0.0099,
      "step": 1924940
    },
    {
      "epoch": 3.150239259506556,
      "grad_norm": 0.12654557824134827,
      "learning_rate": 3.7238984469205276e-06,
      "loss": 0.0098,
      "step": 1924960
    },
    {
      "epoch": 3.1502719899452094,
      "grad_norm": 0.9887586236000061,
      "learning_rate": 3.7238325547070103e-06,
      "loss": 0.0105,
      "step": 1924980
    },
    {
      "epoch": 3.1503047203838626,
      "grad_norm": 0.22227743268013,
      "learning_rate": 3.723766662493494e-06,
      "loss": 0.0094,
      "step": 1925000
    },
    {
      "epoch": 3.1503374508225157,
      "grad_norm": 0.34134969115257263,
      "learning_rate": 3.7237007702799766e-06,
      "loss": 0.017,
      "step": 1925020
    },
    {
      "epoch": 3.1503701812611693,
      "grad_norm": 0.3313875198364258,
      "learning_rate": 3.7236348780664594e-06,
      "loss": 0.0135,
      "step": 1925040
    },
    {
      "epoch": 3.1504029116998225,
      "grad_norm": 0.18814058601856232,
      "learning_rate": 3.723568985852942e-06,
      "loss": 0.0149,
      "step": 1925060
    },
    {
      "epoch": 3.150435642138476,
      "grad_norm": 0.5656672120094299,
      "learning_rate": 3.7235030936394253e-06,
      "loss": 0.0098,
      "step": 1925080
    },
    {
      "epoch": 3.150468372577129,
      "grad_norm": 0.37477460503578186,
      "learning_rate": 3.723437201425908e-06,
      "loss": 0.0117,
      "step": 1925100
    },
    {
      "epoch": 3.150501103015783,
      "grad_norm": 0.11872155964374542,
      "learning_rate": 3.7233713092123908e-06,
      "loss": 0.0076,
      "step": 1925120
    },
    {
      "epoch": 3.150533833454436,
      "grad_norm": 0.14521461725234985,
      "learning_rate": 3.7233054169988735e-06,
      "loss": 0.0087,
      "step": 1925140
    },
    {
      "epoch": 3.150566563893089,
      "grad_norm": 0.9227138757705688,
      "learning_rate": 3.7232395247853562e-06,
      "loss": 0.0154,
      "step": 1925160
    },
    {
      "epoch": 3.1505992943317427,
      "grad_norm": 0.43642282485961914,
      "learning_rate": 3.7231736325718394e-06,
      "loss": 0.0145,
      "step": 1925180
    },
    {
      "epoch": 3.150632024770396,
      "grad_norm": 0.1868221014738083,
      "learning_rate": 3.723107740358322e-06,
      "loss": 0.0095,
      "step": 1925200
    },
    {
      "epoch": 3.1506647552090494,
      "grad_norm": 0.10667689144611359,
      "learning_rate": 3.723041848144805e-06,
      "loss": 0.01,
      "step": 1925220
    },
    {
      "epoch": 3.1506974856477026,
      "grad_norm": 0.6741929054260254,
      "learning_rate": 3.7229759559312876e-06,
      "loss": 0.0109,
      "step": 1925240
    },
    {
      "epoch": 3.150730216086356,
      "grad_norm": 0.29192671179771423,
      "learning_rate": 3.722910063717771e-06,
      "loss": 0.0097,
      "step": 1925260
    },
    {
      "epoch": 3.1507629465250093,
      "grad_norm": 0.27316048741340637,
      "learning_rate": 3.7228441715042535e-06,
      "loss": 0.0118,
      "step": 1925280
    },
    {
      "epoch": 3.1507956769636625,
      "grad_norm": 0.2655380666255951,
      "learning_rate": 3.7227782792907363e-06,
      "loss": 0.016,
      "step": 1925300
    },
    {
      "epoch": 3.150828407402316,
      "grad_norm": 0.1774703562259674,
      "learning_rate": 3.722712387077219e-06,
      "loss": 0.0099,
      "step": 1925320
    },
    {
      "epoch": 3.150861137840969,
      "grad_norm": 0.40182968974113464,
      "learning_rate": 3.722646494863702e-06,
      "loss": 0.0136,
      "step": 1925340
    },
    {
      "epoch": 3.150893868279623,
      "grad_norm": 0.19067884981632233,
      "learning_rate": 3.7225806026501853e-06,
      "loss": 0.0062,
      "step": 1925360
    },
    {
      "epoch": 3.150926598718276,
      "grad_norm": 0.4625840187072754,
      "learning_rate": 3.722514710436668e-06,
      "loss": 0.014,
      "step": 1925380
    },
    {
      "epoch": 3.1509593291569296,
      "grad_norm": 0.0716843456029892,
      "learning_rate": 3.7224488182231512e-06,
      "loss": 0.0147,
      "step": 1925400
    },
    {
      "epoch": 3.1509920595955827,
      "grad_norm": 0.3284439146518707,
      "learning_rate": 3.722382926009634e-06,
      "loss": 0.0152,
      "step": 1925420
    },
    {
      "epoch": 3.151024790034236,
      "grad_norm": 0.0802474096417427,
      "learning_rate": 3.7223170337961167e-06,
      "loss": 0.0108,
      "step": 1925440
    },
    {
      "epoch": 3.1510575204728895,
      "grad_norm": 0.2305222898721695,
      "learning_rate": 3.7222511415825995e-06,
      "loss": 0.0116,
      "step": 1925460
    },
    {
      "epoch": 3.1510902509115426,
      "grad_norm": 0.3048447370529175,
      "learning_rate": 3.7221852493690826e-06,
      "loss": 0.0108,
      "step": 1925480
    },
    {
      "epoch": 3.151122981350196,
      "grad_norm": 0.3107801675796509,
      "learning_rate": 3.7221193571555654e-06,
      "loss": 0.0067,
      "step": 1925500
    },
    {
      "epoch": 3.1511557117888493,
      "grad_norm": 1.9895826578140259,
      "learning_rate": 3.722053464942048e-06,
      "loss": 0.009,
      "step": 1925520
    },
    {
      "epoch": 3.151188442227503,
      "grad_norm": 0.22386734187602997,
      "learning_rate": 3.721987572728531e-06,
      "loss": 0.0101,
      "step": 1925540
    },
    {
      "epoch": 3.151221172666156,
      "grad_norm": 0.5283432006835938,
      "learning_rate": 3.7219216805150136e-06,
      "loss": 0.0102,
      "step": 1925560
    },
    {
      "epoch": 3.1512539031048092,
      "grad_norm": 0.11268597096204758,
      "learning_rate": 3.7218557883014967e-06,
      "loss": 0.0095,
      "step": 1925580
    },
    {
      "epoch": 3.151286633543463,
      "grad_norm": 0.2716131806373596,
      "learning_rate": 3.7217898960879795e-06,
      "loss": 0.0118,
      "step": 1925600
    },
    {
      "epoch": 3.151319363982116,
      "grad_norm": 0.6751088500022888,
      "learning_rate": 3.7217240038744622e-06,
      "loss": 0.0132,
      "step": 1925620
    },
    {
      "epoch": 3.1513520944207696,
      "grad_norm": 0.4843848645687103,
      "learning_rate": 3.721658111660945e-06,
      "loss": 0.0094,
      "step": 1925640
    },
    {
      "epoch": 3.1513848248594227,
      "grad_norm": 0.13588106632232666,
      "learning_rate": 3.721592219447428e-06,
      "loss": 0.0094,
      "step": 1925660
    },
    {
      "epoch": 3.1514175552980763,
      "grad_norm": 0.24207554757595062,
      "learning_rate": 3.721526327233911e-06,
      "loss": 0.0154,
      "step": 1925680
    },
    {
      "epoch": 3.1514502857367295,
      "grad_norm": 0.2069527804851532,
      "learning_rate": 3.7214604350203936e-06,
      "loss": 0.0153,
      "step": 1925700
    },
    {
      "epoch": 3.1514830161753826,
      "grad_norm": 0.3670830726623535,
      "learning_rate": 3.721394542806877e-06,
      "loss": 0.0111,
      "step": 1925720
    },
    {
      "epoch": 3.151515746614036,
      "grad_norm": 0.38356783986091614,
      "learning_rate": 3.72132865059336e-06,
      "loss": 0.0139,
      "step": 1925740
    },
    {
      "epoch": 3.1515484770526894,
      "grad_norm": 0.1764804720878601,
      "learning_rate": 3.7212627583798427e-06,
      "loss": 0.0128,
      "step": 1925760
    },
    {
      "epoch": 3.151581207491343,
      "grad_norm": 0.2270769625902176,
      "learning_rate": 3.7211968661663254e-06,
      "loss": 0.0134,
      "step": 1925780
    },
    {
      "epoch": 3.151613937929996,
      "grad_norm": 1.001222014427185,
      "learning_rate": 3.7211309739528086e-06,
      "loss": 0.0201,
      "step": 1925800
    },
    {
      "epoch": 3.1516466683686497,
      "grad_norm": 0.2188555896282196,
      "learning_rate": 3.7210650817392913e-06,
      "loss": 0.0105,
      "step": 1925820
    },
    {
      "epoch": 3.151679398807303,
      "grad_norm": 0.38273027539253235,
      "learning_rate": 3.720999189525774e-06,
      "loss": 0.0104,
      "step": 1925840
    },
    {
      "epoch": 3.151712129245956,
      "grad_norm": 0.11088968813419342,
      "learning_rate": 3.720933297312257e-06,
      "loss": 0.0065,
      "step": 1925860
    },
    {
      "epoch": 3.1517448596846096,
      "grad_norm": 0.2988148033618927,
      "learning_rate": 3.72086740509874e-06,
      "loss": 0.0134,
      "step": 1925880
    },
    {
      "epoch": 3.1517775901232628,
      "grad_norm": 0.4312446415424347,
      "learning_rate": 3.7208015128852227e-06,
      "loss": 0.0141,
      "step": 1925900
    },
    {
      "epoch": 3.1518103205619163,
      "grad_norm": 0.2616202235221863,
      "learning_rate": 3.7207356206717054e-06,
      "loss": 0.0082,
      "step": 1925920
    },
    {
      "epoch": 3.1518430510005695,
      "grad_norm": 0.22799618542194366,
      "learning_rate": 3.720669728458188e-06,
      "loss": 0.0106,
      "step": 1925940
    },
    {
      "epoch": 3.1518757814392226,
      "grad_norm": 0.44706761837005615,
      "learning_rate": 3.7206038362446713e-06,
      "loss": 0.012,
      "step": 1925960
    },
    {
      "epoch": 3.1519085118778762,
      "grad_norm": 0.31583958864212036,
      "learning_rate": 3.720537944031154e-06,
      "loss": 0.0121,
      "step": 1925980
    },
    {
      "epoch": 3.1519412423165294,
      "grad_norm": 0.22735418379306793,
      "learning_rate": 3.720472051817637e-06,
      "loss": 0.0129,
      "step": 1926000
    },
    {
      "epoch": 3.151973972755183,
      "grad_norm": 0.29694753885269165,
      "learning_rate": 3.7204061596041196e-06,
      "loss": 0.0108,
      "step": 1926020
    },
    {
      "epoch": 3.152006703193836,
      "grad_norm": 0.27908065915107727,
      "learning_rate": 3.7203402673906023e-06,
      "loss": 0.0152,
      "step": 1926040
    },
    {
      "epoch": 3.1520394336324897,
      "grad_norm": 0.22557982802391052,
      "learning_rate": 3.720274375177086e-06,
      "loss": 0.0126,
      "step": 1926060
    },
    {
      "epoch": 3.152072164071143,
      "grad_norm": 0.09269776195287704,
      "learning_rate": 3.7202084829635686e-06,
      "loss": 0.0133,
      "step": 1926080
    },
    {
      "epoch": 3.152104894509796,
      "grad_norm": 0.4892706274986267,
      "learning_rate": 3.7201425907500514e-06,
      "loss": 0.0074,
      "step": 1926100
    },
    {
      "epoch": 3.1521376249484496,
      "grad_norm": 0.2840869426727295,
      "learning_rate": 3.7200766985365345e-06,
      "loss": 0.0084,
      "step": 1926120
    },
    {
      "epoch": 3.1521703553871028,
      "grad_norm": 0.5451359152793884,
      "learning_rate": 3.7200108063230173e-06,
      "loss": 0.0142,
      "step": 1926140
    },
    {
      "epoch": 3.1522030858257564,
      "grad_norm": 0.34182804822921753,
      "learning_rate": 3.7199449141095e-06,
      "loss": 0.0125,
      "step": 1926160
    },
    {
      "epoch": 3.1522358162644095,
      "grad_norm": 0.12824204564094543,
      "learning_rate": 3.7198790218959827e-06,
      "loss": 0.0099,
      "step": 1926180
    },
    {
      "epoch": 3.152268546703063,
      "grad_norm": 0.2935434877872467,
      "learning_rate": 3.719813129682466e-06,
      "loss": 0.0067,
      "step": 1926200
    },
    {
      "epoch": 3.1523012771417163,
      "grad_norm": 2.2369298934936523,
      "learning_rate": 3.7197472374689486e-06,
      "loss": 0.0143,
      "step": 1926220
    },
    {
      "epoch": 3.1523340075803694,
      "grad_norm": 0.7635018229484558,
      "learning_rate": 3.7196813452554314e-06,
      "loss": 0.0121,
      "step": 1926240
    },
    {
      "epoch": 3.152366738019023,
      "grad_norm": 0.31798213720321655,
      "learning_rate": 3.719615453041914e-06,
      "loss": 0.0169,
      "step": 1926260
    },
    {
      "epoch": 3.152399468457676,
      "grad_norm": 0.16720418632030487,
      "learning_rate": 3.7195495608283973e-06,
      "loss": 0.0102,
      "step": 1926280
    },
    {
      "epoch": 3.1524321988963298,
      "grad_norm": 0.20060960948467255,
      "learning_rate": 3.71948366861488e-06,
      "loss": 0.0146,
      "step": 1926300
    },
    {
      "epoch": 3.152464929334983,
      "grad_norm": 0.19168496131896973,
      "learning_rate": 3.7194177764013628e-06,
      "loss": 0.0061,
      "step": 1926320
    },
    {
      "epoch": 3.152497659773636,
      "grad_norm": 0.2768796384334564,
      "learning_rate": 3.7193518841878455e-06,
      "loss": 0.0086,
      "step": 1926340
    },
    {
      "epoch": 3.1525303902122896,
      "grad_norm": 0.16490454971790314,
      "learning_rate": 3.7192859919743287e-06,
      "loss": 0.0103,
      "step": 1926360
    },
    {
      "epoch": 3.152563120650943,
      "grad_norm": 0.25231799483299255,
      "learning_rate": 3.7192200997608114e-06,
      "loss": 0.0096,
      "step": 1926380
    },
    {
      "epoch": 3.1525958510895964,
      "grad_norm": 0.8813009858131409,
      "learning_rate": 3.719154207547294e-06,
      "loss": 0.01,
      "step": 1926400
    },
    {
      "epoch": 3.1526285815282495,
      "grad_norm": 0.11294249445199966,
      "learning_rate": 3.7190883153337777e-06,
      "loss": 0.0094,
      "step": 1926420
    },
    {
      "epoch": 3.152661311966903,
      "grad_norm": 0.3516397476196289,
      "learning_rate": 3.7190224231202605e-06,
      "loss": 0.0105,
      "step": 1926440
    },
    {
      "epoch": 3.1526940424055563,
      "grad_norm": 0.744060218334198,
      "learning_rate": 3.7189565309067432e-06,
      "loss": 0.0121,
      "step": 1926460
    },
    {
      "epoch": 3.1527267728442094,
      "grad_norm": 0.19657392799854279,
      "learning_rate": 3.718890638693226e-06,
      "loss": 0.0098,
      "step": 1926480
    },
    {
      "epoch": 3.152759503282863,
      "grad_norm": 0.42772847414016724,
      "learning_rate": 3.718824746479709e-06,
      "loss": 0.0126,
      "step": 1926500
    },
    {
      "epoch": 3.152792233721516,
      "grad_norm": 0.48263368010520935,
      "learning_rate": 3.718758854266192e-06,
      "loss": 0.0169,
      "step": 1926520
    },
    {
      "epoch": 3.1528249641601698,
      "grad_norm": 0.4800766110420227,
      "learning_rate": 3.7186929620526746e-06,
      "loss": 0.0108,
      "step": 1926540
    },
    {
      "epoch": 3.152857694598823,
      "grad_norm": 0.20309098064899445,
      "learning_rate": 3.7186270698391573e-06,
      "loss": 0.0134,
      "step": 1926560
    },
    {
      "epoch": 3.1528904250374765,
      "grad_norm": 0.21779246628284454,
      "learning_rate": 3.71856117762564e-06,
      "loss": 0.0077,
      "step": 1926580
    },
    {
      "epoch": 3.1529231554761297,
      "grad_norm": 0.30024486780166626,
      "learning_rate": 3.7184952854121232e-06,
      "loss": 0.0088,
      "step": 1926600
    },
    {
      "epoch": 3.152955885914783,
      "grad_norm": 0.47390422224998474,
      "learning_rate": 3.718429393198606e-06,
      "loss": 0.0116,
      "step": 1926620
    },
    {
      "epoch": 3.1529886163534364,
      "grad_norm": 0.35438159108161926,
      "learning_rate": 3.7183635009850887e-06,
      "loss": 0.0138,
      "step": 1926640
    },
    {
      "epoch": 3.1530213467920896,
      "grad_norm": 1.0140846967697144,
      "learning_rate": 3.7182976087715715e-06,
      "loss": 0.0141,
      "step": 1926660
    },
    {
      "epoch": 3.153054077230743,
      "grad_norm": 0.2388782948255539,
      "learning_rate": 3.7182317165580546e-06,
      "loss": 0.0107,
      "step": 1926680
    },
    {
      "epoch": 3.1530868076693963,
      "grad_norm": 0.4911002218723297,
      "learning_rate": 3.7181658243445374e-06,
      "loss": 0.0158,
      "step": 1926700
    },
    {
      "epoch": 3.15311953810805,
      "grad_norm": 0.7897176146507263,
      "learning_rate": 3.71809993213102e-06,
      "loss": 0.0148,
      "step": 1926720
    },
    {
      "epoch": 3.153152268546703,
      "grad_norm": 0.15184108912944794,
      "learning_rate": 3.718034039917503e-06,
      "loss": 0.0106,
      "step": 1926740
    },
    {
      "epoch": 3.153184998985356,
      "grad_norm": 0.18637004494667053,
      "learning_rate": 3.7179681477039864e-06,
      "loss": 0.0144,
      "step": 1926760
    },
    {
      "epoch": 3.15321772942401,
      "grad_norm": 0.10821427404880524,
      "learning_rate": 3.717902255490469e-06,
      "loss": 0.0114,
      "step": 1926780
    },
    {
      "epoch": 3.153250459862663,
      "grad_norm": 0.20152603089809418,
      "learning_rate": 3.717836363276952e-06,
      "loss": 0.0117,
      "step": 1926800
    },
    {
      "epoch": 3.1532831903013165,
      "grad_norm": 0.039717528969049454,
      "learning_rate": 3.717770471063435e-06,
      "loss": 0.0118,
      "step": 1926820
    },
    {
      "epoch": 3.1533159207399697,
      "grad_norm": 0.12773574888706207,
      "learning_rate": 3.717704578849918e-06,
      "loss": 0.0143,
      "step": 1926840
    },
    {
      "epoch": 3.1533486511786233,
      "grad_norm": 0.294877290725708,
      "learning_rate": 3.7176386866364006e-06,
      "loss": 0.0143,
      "step": 1926860
    },
    {
      "epoch": 3.1533813816172764,
      "grad_norm": 0.10129132121801376,
      "learning_rate": 3.7175727944228833e-06,
      "loss": 0.0101,
      "step": 1926880
    },
    {
      "epoch": 3.1534141120559296,
      "grad_norm": 0.16172672808170319,
      "learning_rate": 3.7175069022093665e-06,
      "loss": 0.0115,
      "step": 1926900
    },
    {
      "epoch": 3.153446842494583,
      "grad_norm": 0.5744615197181702,
      "learning_rate": 3.717441009995849e-06,
      "loss": 0.0142,
      "step": 1926920
    },
    {
      "epoch": 3.1534795729332363,
      "grad_norm": 0.2031971961259842,
      "learning_rate": 3.717375117782332e-06,
      "loss": 0.0142,
      "step": 1926940
    },
    {
      "epoch": 3.15351230337189,
      "grad_norm": 0.2216326892375946,
      "learning_rate": 3.7173092255688147e-06,
      "loss": 0.0085,
      "step": 1926960
    },
    {
      "epoch": 3.153545033810543,
      "grad_norm": 0.30939269065856934,
      "learning_rate": 3.717243333355298e-06,
      "loss": 0.0134,
      "step": 1926980
    },
    {
      "epoch": 3.1535777642491967,
      "grad_norm": 0.07309295237064362,
      "learning_rate": 3.7171774411417806e-06,
      "loss": 0.0093,
      "step": 1927000
    },
    {
      "epoch": 3.15361049468785,
      "grad_norm": 0.37110117077827454,
      "learning_rate": 3.7171115489282633e-06,
      "loss": 0.0139,
      "step": 1927020
    },
    {
      "epoch": 3.153643225126503,
      "grad_norm": 0.3441486358642578,
      "learning_rate": 3.717045656714746e-06,
      "loss": 0.0106,
      "step": 1927040
    },
    {
      "epoch": 3.1536759555651566,
      "grad_norm": 0.6427123546600342,
      "learning_rate": 3.716979764501229e-06,
      "loss": 0.0116,
      "step": 1927060
    },
    {
      "epoch": 3.1537086860038097,
      "grad_norm": 0.40180906653404236,
      "learning_rate": 3.716913872287712e-06,
      "loss": 0.0076,
      "step": 1927080
    },
    {
      "epoch": 3.1537414164424633,
      "grad_norm": 0.20127634704113007,
      "learning_rate": 3.7168479800741947e-06,
      "loss": 0.0121,
      "step": 1927100
    },
    {
      "epoch": 3.1537741468811165,
      "grad_norm": 0.19172513484954834,
      "learning_rate": 3.716782087860678e-06,
      "loss": 0.0115,
      "step": 1927120
    },
    {
      "epoch": 3.15380687731977,
      "grad_norm": 0.29268866777420044,
      "learning_rate": 3.716716195647161e-06,
      "loss": 0.0151,
      "step": 1927140
    },
    {
      "epoch": 3.153839607758423,
      "grad_norm": 0.1823393851518631,
      "learning_rate": 3.7166503034336438e-06,
      "loss": 0.0166,
      "step": 1927160
    },
    {
      "epoch": 3.1538723381970764,
      "grad_norm": 0.17968769371509552,
      "learning_rate": 3.7165844112201265e-06,
      "loss": 0.0104,
      "step": 1927180
    },
    {
      "epoch": 3.15390506863573,
      "grad_norm": 0.2272915095090866,
      "learning_rate": 3.7165185190066092e-06,
      "loss": 0.0168,
      "step": 1927200
    },
    {
      "epoch": 3.153937799074383,
      "grad_norm": 0.5053814649581909,
      "learning_rate": 3.7164526267930924e-06,
      "loss": 0.0092,
      "step": 1927220
    },
    {
      "epoch": 3.1539705295130367,
      "grad_norm": 0.44636061787605286,
      "learning_rate": 3.716386734579575e-06,
      "loss": 0.0127,
      "step": 1927240
    },
    {
      "epoch": 3.15400325995169,
      "grad_norm": 0.23024845123291016,
      "learning_rate": 3.716320842366058e-06,
      "loss": 0.0125,
      "step": 1927260
    },
    {
      "epoch": 3.1540359903903434,
      "grad_norm": 0.981860339641571,
      "learning_rate": 3.7162549501525406e-06,
      "loss": 0.014,
      "step": 1927280
    },
    {
      "epoch": 3.1540687208289966,
      "grad_norm": 0.30237919092178345,
      "learning_rate": 3.716189057939024e-06,
      "loss": 0.0069,
      "step": 1927300
    },
    {
      "epoch": 3.1541014512676497,
      "grad_norm": 0.43427592515945435,
      "learning_rate": 3.7161231657255065e-06,
      "loss": 0.0089,
      "step": 1927320
    },
    {
      "epoch": 3.1541341817063033,
      "grad_norm": 0.2766474783420563,
      "learning_rate": 3.7160572735119893e-06,
      "loss": 0.0119,
      "step": 1927340
    },
    {
      "epoch": 3.1541669121449565,
      "grad_norm": 0.19864337146282196,
      "learning_rate": 3.715991381298472e-06,
      "loss": 0.0114,
      "step": 1927360
    },
    {
      "epoch": 3.15419964258361,
      "grad_norm": 0.32659250497817993,
      "learning_rate": 3.715925489084955e-06,
      "loss": 0.0122,
      "step": 1927380
    },
    {
      "epoch": 3.1542323730222632,
      "grad_norm": 0.3929522633552551,
      "learning_rate": 3.715859596871438e-06,
      "loss": 0.0086,
      "step": 1927400
    },
    {
      "epoch": 3.1542651034609164,
      "grad_norm": 0.2043890804052353,
      "learning_rate": 3.7157937046579207e-06,
      "loss": 0.0107,
      "step": 1927420
    },
    {
      "epoch": 3.15429783389957,
      "grad_norm": 0.07657646387815475,
      "learning_rate": 3.7157278124444034e-06,
      "loss": 0.0095,
      "step": 1927440
    },
    {
      "epoch": 3.154330564338223,
      "grad_norm": 0.27008718252182007,
      "learning_rate": 3.715661920230886e-06,
      "loss": 0.0077,
      "step": 1927460
    },
    {
      "epoch": 3.1543632947768767,
      "grad_norm": 0.04966236278414726,
      "learning_rate": 3.7155960280173697e-06,
      "loss": 0.0091,
      "step": 1927480
    },
    {
      "epoch": 3.15439602521553,
      "grad_norm": 0.7390084862709045,
      "learning_rate": 3.7155301358038525e-06,
      "loss": 0.0122,
      "step": 1927500
    },
    {
      "epoch": 3.1544287556541835,
      "grad_norm": 0.3967314660549164,
      "learning_rate": 3.7154642435903356e-06,
      "loss": 0.0154,
      "step": 1927520
    },
    {
      "epoch": 3.1544614860928366,
      "grad_norm": 0.27558785676956177,
      "learning_rate": 3.7153983513768184e-06,
      "loss": 0.0086,
      "step": 1927540
    },
    {
      "epoch": 3.1544942165314898,
      "grad_norm": 0.3759263753890991,
      "learning_rate": 3.715332459163301e-06,
      "loss": 0.0099,
      "step": 1927560
    },
    {
      "epoch": 3.1545269469701434,
      "grad_norm": 0.4449039399623871,
      "learning_rate": 3.715266566949784e-06,
      "loss": 0.0109,
      "step": 1927580
    },
    {
      "epoch": 3.1545596774087965,
      "grad_norm": 0.3608798384666443,
      "learning_rate": 3.7152006747362666e-06,
      "loss": 0.0145,
      "step": 1927600
    },
    {
      "epoch": 3.15459240784745,
      "grad_norm": 0.4156724512577057,
      "learning_rate": 3.7151347825227497e-06,
      "loss": 0.0107,
      "step": 1927620
    },
    {
      "epoch": 3.1546251382861032,
      "grad_norm": 0.2547829747200012,
      "learning_rate": 3.7150688903092325e-06,
      "loss": 0.0123,
      "step": 1927640
    },
    {
      "epoch": 3.154657868724757,
      "grad_norm": 0.6408008933067322,
      "learning_rate": 3.7150029980957152e-06,
      "loss": 0.0122,
      "step": 1927660
    },
    {
      "epoch": 3.15469059916341,
      "grad_norm": 0.12219563871622086,
      "learning_rate": 3.714937105882198e-06,
      "loss": 0.0124,
      "step": 1927680
    },
    {
      "epoch": 3.154723329602063,
      "grad_norm": 0.3006250262260437,
      "learning_rate": 3.714871213668681e-06,
      "loss": 0.0115,
      "step": 1927700
    },
    {
      "epoch": 3.1547560600407167,
      "grad_norm": 0.2255689948797226,
      "learning_rate": 3.714805321455164e-06,
      "loss": 0.0113,
      "step": 1927720
    },
    {
      "epoch": 3.15478879047937,
      "grad_norm": 0.30958589911460876,
      "learning_rate": 3.7147394292416466e-06,
      "loss": 0.0193,
      "step": 1927740
    },
    {
      "epoch": 3.1548215209180235,
      "grad_norm": 0.19005829095840454,
      "learning_rate": 3.7146735370281294e-06,
      "loss": 0.01,
      "step": 1927760
    },
    {
      "epoch": 3.1548542513566766,
      "grad_norm": 0.21731288731098175,
      "learning_rate": 3.7146076448146125e-06,
      "loss": 0.009,
      "step": 1927780
    },
    {
      "epoch": 3.1548869817953302,
      "grad_norm": 0.3066169321537018,
      "learning_rate": 3.7145417526010953e-06,
      "loss": 0.0143,
      "step": 1927800
    },
    {
      "epoch": 3.1549197122339834,
      "grad_norm": 0.22795678675174713,
      "learning_rate": 3.7144758603875784e-06,
      "loss": 0.0127,
      "step": 1927820
    },
    {
      "epoch": 3.1549524426726365,
      "grad_norm": 0.24586673080921173,
      "learning_rate": 3.7144099681740616e-06,
      "loss": 0.0101,
      "step": 1927840
    },
    {
      "epoch": 3.15498517311129,
      "grad_norm": 0.46114784479141235,
      "learning_rate": 3.7143440759605443e-06,
      "loss": 0.0144,
      "step": 1927860
    },
    {
      "epoch": 3.1550179035499433,
      "grad_norm": 0.19037789106369019,
      "learning_rate": 3.714278183747027e-06,
      "loss": 0.0082,
      "step": 1927880
    },
    {
      "epoch": 3.155050633988597,
      "grad_norm": 0.2167079895734787,
      "learning_rate": 3.71421229153351e-06,
      "loss": 0.0154,
      "step": 1927900
    },
    {
      "epoch": 3.15508336442725,
      "grad_norm": 1.4347385168075562,
      "learning_rate": 3.714146399319993e-06,
      "loss": 0.0135,
      "step": 1927920
    },
    {
      "epoch": 3.155116094865903,
      "grad_norm": 0.8508316874504089,
      "learning_rate": 3.7140805071064757e-06,
      "loss": 0.0093,
      "step": 1927940
    },
    {
      "epoch": 3.1551488253045568,
      "grad_norm": 0.2192092090845108,
      "learning_rate": 3.7140146148929584e-06,
      "loss": 0.0099,
      "step": 1927960
    },
    {
      "epoch": 3.15518155574321,
      "grad_norm": 0.10374762862920761,
      "learning_rate": 3.713948722679441e-06,
      "loss": 0.0155,
      "step": 1927980
    },
    {
      "epoch": 3.1552142861818635,
      "grad_norm": 0.6390129327774048,
      "learning_rate": 3.713882830465924e-06,
      "loss": 0.0141,
      "step": 1928000
    },
    {
      "epoch": 3.1552470166205167,
      "grad_norm": 0.18312165141105652,
      "learning_rate": 3.713816938252407e-06,
      "loss": 0.0121,
      "step": 1928020
    },
    {
      "epoch": 3.1552797470591702,
      "grad_norm": 0.5453817248344421,
      "learning_rate": 3.71375104603889e-06,
      "loss": 0.0109,
      "step": 1928040
    },
    {
      "epoch": 3.1553124774978234,
      "grad_norm": 0.3304371237754822,
      "learning_rate": 3.7136851538253726e-06,
      "loss": 0.0086,
      "step": 1928060
    },
    {
      "epoch": 3.1553452079364765,
      "grad_norm": 0.24145950376987457,
      "learning_rate": 3.7136192616118553e-06,
      "loss": 0.0116,
      "step": 1928080
    },
    {
      "epoch": 3.15537793837513,
      "grad_norm": 0.2091234177350998,
      "learning_rate": 3.7135533693983385e-06,
      "loss": 0.0105,
      "step": 1928100
    },
    {
      "epoch": 3.1554106688137833,
      "grad_norm": 0.2127753347158432,
      "learning_rate": 3.713487477184821e-06,
      "loss": 0.0146,
      "step": 1928120
    },
    {
      "epoch": 3.155443399252437,
      "grad_norm": 0.36119744181632996,
      "learning_rate": 3.713421584971304e-06,
      "loss": 0.0105,
      "step": 1928140
    },
    {
      "epoch": 3.15547612969109,
      "grad_norm": 0.3547927141189575,
      "learning_rate": 3.7133556927577867e-06,
      "loss": 0.0106,
      "step": 1928160
    },
    {
      "epoch": 3.1555088601297436,
      "grad_norm": 0.3455306887626648,
      "learning_rate": 3.7132898005442703e-06,
      "loss": 0.0141,
      "step": 1928180
    },
    {
      "epoch": 3.155541590568397,
      "grad_norm": 0.7967915534973145,
      "learning_rate": 3.713223908330753e-06,
      "loss": 0.0153,
      "step": 1928200
    },
    {
      "epoch": 3.15557432100705,
      "grad_norm": 0.4682658314704895,
      "learning_rate": 3.7131580161172358e-06,
      "loss": 0.011,
      "step": 1928220
    },
    {
      "epoch": 3.1556070514457035,
      "grad_norm": 0.04428695887327194,
      "learning_rate": 3.713092123903719e-06,
      "loss": 0.0128,
      "step": 1928240
    },
    {
      "epoch": 3.1556397818843567,
      "grad_norm": 0.4946064352989197,
      "learning_rate": 3.7130262316902017e-06,
      "loss": 0.0129,
      "step": 1928260
    },
    {
      "epoch": 3.1556725123230103,
      "grad_norm": 0.9466979503631592,
      "learning_rate": 3.7129603394766844e-06,
      "loss": 0.0121,
      "step": 1928280
    },
    {
      "epoch": 3.1557052427616634,
      "grad_norm": 0.20463377237319946,
      "learning_rate": 3.712894447263167e-06,
      "loss": 0.0119,
      "step": 1928300
    },
    {
      "epoch": 3.155737973200317,
      "grad_norm": 0.08946927636861801,
      "learning_rate": 3.7128285550496503e-06,
      "loss": 0.0153,
      "step": 1928320
    },
    {
      "epoch": 3.15577070363897,
      "grad_norm": 0.1609504222869873,
      "learning_rate": 3.712762662836133e-06,
      "loss": 0.0127,
      "step": 1928340
    },
    {
      "epoch": 3.1558034340776233,
      "grad_norm": 0.24187998473644257,
      "learning_rate": 3.7126967706226158e-06,
      "loss": 0.0082,
      "step": 1928360
    },
    {
      "epoch": 3.155836164516277,
      "grad_norm": 0.2068682461977005,
      "learning_rate": 3.7126308784090985e-06,
      "loss": 0.0087,
      "step": 1928380
    },
    {
      "epoch": 3.15586889495493,
      "grad_norm": 0.286655068397522,
      "learning_rate": 3.7125649861955817e-06,
      "loss": 0.009,
      "step": 1928400
    },
    {
      "epoch": 3.1559016253935837,
      "grad_norm": 0.045551881194114685,
      "learning_rate": 3.7124990939820644e-06,
      "loss": 0.0057,
      "step": 1928420
    },
    {
      "epoch": 3.155934355832237,
      "grad_norm": 0.18798215687274933,
      "learning_rate": 3.712433201768547e-06,
      "loss": 0.0169,
      "step": 1928440
    },
    {
      "epoch": 3.1559670862708904,
      "grad_norm": 1.1221544742584229,
      "learning_rate": 3.71236730955503e-06,
      "loss": 0.0121,
      "step": 1928460
    },
    {
      "epoch": 3.1559998167095435,
      "grad_norm": 0.2536565065383911,
      "learning_rate": 3.7123014173415126e-06,
      "loss": 0.0114,
      "step": 1928480
    },
    {
      "epoch": 3.1560325471481967,
      "grad_norm": 0.13301828503608704,
      "learning_rate": 3.712235525127996e-06,
      "loss": 0.0112,
      "step": 1928500
    },
    {
      "epoch": 3.1560652775868503,
      "grad_norm": 0.0800221860408783,
      "learning_rate": 3.712169632914479e-06,
      "loss": 0.0103,
      "step": 1928520
    },
    {
      "epoch": 3.1560980080255034,
      "grad_norm": 0.36559200286865234,
      "learning_rate": 3.7121037407009617e-06,
      "loss": 0.0142,
      "step": 1928540
    },
    {
      "epoch": 3.156130738464157,
      "grad_norm": 0.4102272093296051,
      "learning_rate": 3.712037848487445e-06,
      "loss": 0.009,
      "step": 1928560
    },
    {
      "epoch": 3.15616346890281,
      "grad_norm": 0.4566989839076996,
      "learning_rate": 3.7119719562739276e-06,
      "loss": 0.0121,
      "step": 1928580
    },
    {
      "epoch": 3.156196199341464,
      "grad_norm": 0.13181757926940918,
      "learning_rate": 3.7119060640604103e-06,
      "loss": 0.0099,
      "step": 1928600
    },
    {
      "epoch": 3.156228929780117,
      "grad_norm": 0.43525558710098267,
      "learning_rate": 3.711840171846893e-06,
      "loss": 0.021,
      "step": 1928620
    },
    {
      "epoch": 3.15626166021877,
      "grad_norm": 1.9707825183868408,
      "learning_rate": 3.7117742796333763e-06,
      "loss": 0.0118,
      "step": 1928640
    },
    {
      "epoch": 3.1562943906574237,
      "grad_norm": 0.24911117553710938,
      "learning_rate": 3.711708387419859e-06,
      "loss": 0.0118,
      "step": 1928660
    },
    {
      "epoch": 3.156327121096077,
      "grad_norm": 0.8694677948951721,
      "learning_rate": 3.7116424952063417e-06,
      "loss": 0.0165,
      "step": 1928680
    },
    {
      "epoch": 3.1563598515347304,
      "grad_norm": 0.35227468609809875,
      "learning_rate": 3.7115766029928245e-06,
      "loss": 0.0126,
      "step": 1928700
    },
    {
      "epoch": 3.1563925819733836,
      "grad_norm": 0.36298882961273193,
      "learning_rate": 3.7115107107793076e-06,
      "loss": 0.0094,
      "step": 1928720
    },
    {
      "epoch": 3.156425312412037,
      "grad_norm": 0.10895339399576187,
      "learning_rate": 3.7114448185657904e-06,
      "loss": 0.0091,
      "step": 1928740
    },
    {
      "epoch": 3.1564580428506903,
      "grad_norm": 0.15639929473400116,
      "learning_rate": 3.711378926352273e-06,
      "loss": 0.0107,
      "step": 1928760
    },
    {
      "epoch": 3.1564907732893435,
      "grad_norm": 0.7230513691902161,
      "learning_rate": 3.711313034138756e-06,
      "loss": 0.0097,
      "step": 1928780
    },
    {
      "epoch": 3.156523503727997,
      "grad_norm": 0.2705746293067932,
      "learning_rate": 3.711247141925239e-06,
      "loss": 0.0133,
      "step": 1928800
    },
    {
      "epoch": 3.15655623416665,
      "grad_norm": 0.2287997156381607,
      "learning_rate": 3.7111812497117218e-06,
      "loss": 0.0114,
      "step": 1928820
    },
    {
      "epoch": 3.156588964605304,
      "grad_norm": 0.176482155919075,
      "learning_rate": 3.7111153574982045e-06,
      "loss": 0.0167,
      "step": 1928840
    },
    {
      "epoch": 3.156621695043957,
      "grad_norm": 0.37536728382110596,
      "learning_rate": 3.7110494652846872e-06,
      "loss": 0.0102,
      "step": 1928860
    },
    {
      "epoch": 3.1566544254826105,
      "grad_norm": 0.3185179531574249,
      "learning_rate": 3.710983573071171e-06,
      "loss": 0.0109,
      "step": 1928880
    },
    {
      "epoch": 3.1566871559212637,
      "grad_norm": 0.32422935962677,
      "learning_rate": 3.7109176808576536e-06,
      "loss": 0.0135,
      "step": 1928900
    },
    {
      "epoch": 3.156719886359917,
      "grad_norm": 0.13484330475330353,
      "learning_rate": 3.7108517886441363e-06,
      "loss": 0.0191,
      "step": 1928920
    },
    {
      "epoch": 3.1567526167985704,
      "grad_norm": 0.28799885511398315,
      "learning_rate": 3.7107858964306195e-06,
      "loss": 0.0175,
      "step": 1928940
    },
    {
      "epoch": 3.1567853472372236,
      "grad_norm": 0.17911940813064575,
      "learning_rate": 3.710720004217102e-06,
      "loss": 0.0103,
      "step": 1928960
    },
    {
      "epoch": 3.156818077675877,
      "grad_norm": 0.2923136353492737,
      "learning_rate": 3.710654112003585e-06,
      "loss": 0.0077,
      "step": 1928980
    },
    {
      "epoch": 3.1568508081145303,
      "grad_norm": 0.16410283744335175,
      "learning_rate": 3.7105882197900677e-06,
      "loss": 0.0093,
      "step": 1929000
    },
    {
      "epoch": 3.1568835385531835,
      "grad_norm": 0.15767772495746613,
      "learning_rate": 3.7105223275765504e-06,
      "loss": 0.0149,
      "step": 1929020
    },
    {
      "epoch": 3.156916268991837,
      "grad_norm": 0.09225600212812424,
      "learning_rate": 3.7104564353630336e-06,
      "loss": 0.0136,
      "step": 1929040
    },
    {
      "epoch": 3.1569489994304902,
      "grad_norm": 0.9994087815284729,
      "learning_rate": 3.7103905431495163e-06,
      "loss": 0.0155,
      "step": 1929060
    },
    {
      "epoch": 3.156981729869144,
      "grad_norm": 0.07278969138860703,
      "learning_rate": 3.710324650935999e-06,
      "loss": 0.0107,
      "step": 1929080
    },
    {
      "epoch": 3.157014460307797,
      "grad_norm": 0.25234806537628174,
      "learning_rate": 3.710258758722482e-06,
      "loss": 0.0104,
      "step": 1929100
    },
    {
      "epoch": 3.1570471907464506,
      "grad_norm": 0.2832283675670624,
      "learning_rate": 3.710192866508965e-06,
      "loss": 0.0111,
      "step": 1929120
    },
    {
      "epoch": 3.1570799211851037,
      "grad_norm": 0.7915051579475403,
      "learning_rate": 3.7101269742954477e-06,
      "loss": 0.0092,
      "step": 1929140
    },
    {
      "epoch": 3.157112651623757,
      "grad_norm": 0.4504125416278839,
      "learning_rate": 3.7100610820819305e-06,
      "loss": 0.0129,
      "step": 1929160
    },
    {
      "epoch": 3.1571453820624105,
      "grad_norm": 0.19501608610153198,
      "learning_rate": 3.709995189868413e-06,
      "loss": 0.0082,
      "step": 1929180
    },
    {
      "epoch": 3.1571781125010636,
      "grad_norm": 0.42629605531692505,
      "learning_rate": 3.7099292976548964e-06,
      "loss": 0.0111,
      "step": 1929200
    },
    {
      "epoch": 3.157210842939717,
      "grad_norm": 0.054716143757104874,
      "learning_rate": 3.7098634054413795e-06,
      "loss": 0.0095,
      "step": 1929220
    },
    {
      "epoch": 3.1572435733783704,
      "grad_norm": 0.12621402740478516,
      "learning_rate": 3.7097975132278623e-06,
      "loss": 0.0126,
      "step": 1929240
    },
    {
      "epoch": 3.157276303817024,
      "grad_norm": 0.9907152056694031,
      "learning_rate": 3.7097316210143454e-06,
      "loss": 0.0079,
      "step": 1929260
    },
    {
      "epoch": 3.157309034255677,
      "grad_norm": 0.3802896738052368,
      "learning_rate": 3.709665728800828e-06,
      "loss": 0.0115,
      "step": 1929280
    },
    {
      "epoch": 3.1573417646943303,
      "grad_norm": 0.6048456430435181,
      "learning_rate": 3.709599836587311e-06,
      "loss": 0.0127,
      "step": 1929300
    },
    {
      "epoch": 3.157374495132984,
      "grad_norm": 0.18225154280662537,
      "learning_rate": 3.7095339443737936e-06,
      "loss": 0.013,
      "step": 1929320
    },
    {
      "epoch": 3.157407225571637,
      "grad_norm": 0.5615873336791992,
      "learning_rate": 3.709468052160277e-06,
      "loss": 0.0131,
      "step": 1929340
    },
    {
      "epoch": 3.1574399560102906,
      "grad_norm": 0.41176047921180725,
      "learning_rate": 3.7094021599467595e-06,
      "loss": 0.0075,
      "step": 1929360
    },
    {
      "epoch": 3.1574726864489437,
      "grad_norm": 0.2500695288181305,
      "learning_rate": 3.7093362677332423e-06,
      "loss": 0.0098,
      "step": 1929380
    },
    {
      "epoch": 3.157505416887597,
      "grad_norm": 0.27241235971450806,
      "learning_rate": 3.709270375519725e-06,
      "loss": 0.0147,
      "step": 1929400
    },
    {
      "epoch": 3.1575381473262505,
      "grad_norm": 0.9981719851493835,
      "learning_rate": 3.7092044833062078e-06,
      "loss": 0.0157,
      "step": 1929420
    },
    {
      "epoch": 3.1575708777649036,
      "grad_norm": 0.17312413454055786,
      "learning_rate": 3.709138591092691e-06,
      "loss": 0.0126,
      "step": 1929440
    },
    {
      "epoch": 3.1576036082035572,
      "grad_norm": 0.33019202947616577,
      "learning_rate": 3.7090726988791737e-06,
      "loss": 0.0142,
      "step": 1929460
    },
    {
      "epoch": 3.1576363386422104,
      "grad_norm": 0.18591149151325226,
      "learning_rate": 3.7090068066656564e-06,
      "loss": 0.0094,
      "step": 1929480
    },
    {
      "epoch": 3.157669069080864,
      "grad_norm": 0.5344260931015015,
      "learning_rate": 3.708940914452139e-06,
      "loss": 0.0101,
      "step": 1929500
    },
    {
      "epoch": 3.157701799519517,
      "grad_norm": 0.3446241021156311,
      "learning_rate": 3.7088750222386223e-06,
      "loss": 0.0164,
      "step": 1929520
    },
    {
      "epoch": 3.1577345299581703,
      "grad_norm": 0.3635014593601227,
      "learning_rate": 3.708809130025105e-06,
      "loss": 0.0147,
      "step": 1929540
    },
    {
      "epoch": 3.157767260396824,
      "grad_norm": 0.5420202016830444,
      "learning_rate": 3.7087432378115878e-06,
      "loss": 0.0082,
      "step": 1929560
    },
    {
      "epoch": 3.157799990835477,
      "grad_norm": 0.22007937729358673,
      "learning_rate": 3.7086773455980714e-06,
      "loss": 0.0161,
      "step": 1929580
    },
    {
      "epoch": 3.1578327212741306,
      "grad_norm": 0.18540841341018677,
      "learning_rate": 3.708611453384554e-06,
      "loss": 0.0141,
      "step": 1929600
    },
    {
      "epoch": 3.1578654517127838,
      "grad_norm": 0.21435140073299408,
      "learning_rate": 3.708545561171037e-06,
      "loss": 0.0172,
      "step": 1929620
    },
    {
      "epoch": 3.1578981821514374,
      "grad_norm": 0.3218208849430084,
      "learning_rate": 3.7084796689575196e-06,
      "loss": 0.0158,
      "step": 1929640
    },
    {
      "epoch": 3.1579309125900905,
      "grad_norm": 0.3160630166530609,
      "learning_rate": 3.7084137767440028e-06,
      "loss": 0.0097,
      "step": 1929660
    },
    {
      "epoch": 3.1579636430287437,
      "grad_norm": 0.3789258599281311,
      "learning_rate": 3.7083478845304855e-06,
      "loss": 0.0113,
      "step": 1929680
    },
    {
      "epoch": 3.1579963734673973,
      "grad_norm": 0.5153667330741882,
      "learning_rate": 3.7082819923169682e-06,
      "loss": 0.0157,
      "step": 1929700
    },
    {
      "epoch": 3.1580291039060504,
      "grad_norm": 0.31032922863960266,
      "learning_rate": 3.708216100103451e-06,
      "loss": 0.012,
      "step": 1929720
    },
    {
      "epoch": 3.158061834344704,
      "grad_norm": 0.2083771526813507,
      "learning_rate": 3.708150207889934e-06,
      "loss": 0.0101,
      "step": 1929740
    },
    {
      "epoch": 3.158094564783357,
      "grad_norm": 0.40054118633270264,
      "learning_rate": 3.708084315676417e-06,
      "loss": 0.0106,
      "step": 1929760
    },
    {
      "epoch": 3.1581272952220107,
      "grad_norm": 0.6579245924949646,
      "learning_rate": 3.7080184234628996e-06,
      "loss": 0.0104,
      "step": 1929780
    },
    {
      "epoch": 3.158160025660664,
      "grad_norm": 0.10682851821184158,
      "learning_rate": 3.7079525312493824e-06,
      "loss": 0.0073,
      "step": 1929800
    },
    {
      "epoch": 3.158192756099317,
      "grad_norm": 0.1682966947555542,
      "learning_rate": 3.7078866390358655e-06,
      "loss": 0.0116,
      "step": 1929820
    },
    {
      "epoch": 3.1582254865379706,
      "grad_norm": 1.0342196226119995,
      "learning_rate": 3.7078207468223483e-06,
      "loss": 0.0158,
      "step": 1929840
    },
    {
      "epoch": 3.158258216976624,
      "grad_norm": 0.13007588684558868,
      "learning_rate": 3.707754854608831e-06,
      "loss": 0.0106,
      "step": 1929860
    },
    {
      "epoch": 3.1582909474152774,
      "grad_norm": 0.06737363338470459,
      "learning_rate": 3.7076889623953137e-06,
      "loss": 0.011,
      "step": 1929880
    },
    {
      "epoch": 3.1583236778539305,
      "grad_norm": 0.1510101705789566,
      "learning_rate": 3.7076230701817965e-06,
      "loss": 0.0121,
      "step": 1929900
    },
    {
      "epoch": 3.158356408292584,
      "grad_norm": 0.7224509716033936,
      "learning_rate": 3.7075571779682796e-06,
      "loss": 0.016,
      "step": 1929920
    },
    {
      "epoch": 3.1583891387312373,
      "grad_norm": 0.15546917915344238,
      "learning_rate": 3.707491285754763e-06,
      "loss": 0.009,
      "step": 1929940
    },
    {
      "epoch": 3.1584218691698904,
      "grad_norm": 0.25290024280548096,
      "learning_rate": 3.7074253935412455e-06,
      "loss": 0.0153,
      "step": 1929960
    },
    {
      "epoch": 3.158454599608544,
      "grad_norm": 0.5269757509231567,
      "learning_rate": 3.7073595013277287e-06,
      "loss": 0.0118,
      "step": 1929980
    },
    {
      "epoch": 3.158487330047197,
      "grad_norm": 0.5537257194519043,
      "learning_rate": 3.7072936091142114e-06,
      "loss": 0.012,
      "step": 1930000
    },
    {
      "epoch": 3.1585200604858508,
      "grad_norm": 0.14994613826274872,
      "learning_rate": 3.707227716900694e-06,
      "loss": 0.0157,
      "step": 1930020
    },
    {
      "epoch": 3.158552790924504,
      "grad_norm": 0.1679421067237854,
      "learning_rate": 3.707161824687177e-06,
      "loss": 0.0078,
      "step": 1930040
    },
    {
      "epoch": 3.1585855213631575,
      "grad_norm": 0.5777953863143921,
      "learning_rate": 3.70709593247366e-06,
      "loss": 0.015,
      "step": 1930060
    },
    {
      "epoch": 3.1586182518018107,
      "grad_norm": 0.18601417541503906,
      "learning_rate": 3.707030040260143e-06,
      "loss": 0.0142,
      "step": 1930080
    },
    {
      "epoch": 3.158650982240464,
      "grad_norm": 0.4121963679790497,
      "learning_rate": 3.7069641480466256e-06,
      "loss": 0.0096,
      "step": 1930100
    },
    {
      "epoch": 3.1586837126791174,
      "grad_norm": 2.141911506652832,
      "learning_rate": 3.7068982558331083e-06,
      "loss": 0.0131,
      "step": 1930120
    },
    {
      "epoch": 3.1587164431177706,
      "grad_norm": 0.6453670263290405,
      "learning_rate": 3.7068323636195915e-06,
      "loss": 0.0136,
      "step": 1930140
    },
    {
      "epoch": 3.158749173556424,
      "grad_norm": 0.12056630849838257,
      "learning_rate": 3.7067664714060742e-06,
      "loss": 0.0075,
      "step": 1930160
    },
    {
      "epoch": 3.1587819039950773,
      "grad_norm": 0.10634313523769379,
      "learning_rate": 3.706700579192557e-06,
      "loss": 0.0107,
      "step": 1930180
    },
    {
      "epoch": 3.158814634433731,
      "grad_norm": 0.4338822364807129,
      "learning_rate": 3.7066346869790397e-06,
      "loss": 0.0102,
      "step": 1930200
    },
    {
      "epoch": 3.158847364872384,
      "grad_norm": 0.44235342741012573,
      "learning_rate": 3.706568794765523e-06,
      "loss": 0.0155,
      "step": 1930220
    },
    {
      "epoch": 3.158880095311037,
      "grad_norm": 1.6678749322891235,
      "learning_rate": 3.7065029025520056e-06,
      "loss": 0.011,
      "step": 1930240
    },
    {
      "epoch": 3.158912825749691,
      "grad_norm": 0.37230244278907776,
      "learning_rate": 3.7064370103384883e-06,
      "loss": 0.0131,
      "step": 1930260
    },
    {
      "epoch": 3.158945556188344,
      "grad_norm": 0.9633674621582031,
      "learning_rate": 3.706371118124972e-06,
      "loss": 0.0126,
      "step": 1930280
    },
    {
      "epoch": 3.1589782866269975,
      "grad_norm": 0.5815249681472778,
      "learning_rate": 3.7063052259114547e-06,
      "loss": 0.0126,
      "step": 1930300
    },
    {
      "epoch": 3.1590110170656507,
      "grad_norm": 0.10929703712463379,
      "learning_rate": 3.7062393336979374e-06,
      "loss": 0.0186,
      "step": 1930320
    },
    {
      "epoch": 3.1590437475043043,
      "grad_norm": 0.4062032103538513,
      "learning_rate": 3.70617344148442e-06,
      "loss": 0.0123,
      "step": 1930340
    },
    {
      "epoch": 3.1590764779429574,
      "grad_norm": 0.6972095370292664,
      "learning_rate": 3.7061075492709033e-06,
      "loss": 0.0075,
      "step": 1930360
    },
    {
      "epoch": 3.1591092083816106,
      "grad_norm": 1.014851689338684,
      "learning_rate": 3.706041657057386e-06,
      "loss": 0.013,
      "step": 1930380
    },
    {
      "epoch": 3.159141938820264,
      "grad_norm": 0.23336085677146912,
      "learning_rate": 3.7059757648438688e-06,
      "loss": 0.0114,
      "step": 1930400
    },
    {
      "epoch": 3.1591746692589173,
      "grad_norm": 1.2100541591644287,
      "learning_rate": 3.7059098726303515e-06,
      "loss": 0.0105,
      "step": 1930420
    },
    {
      "epoch": 3.159207399697571,
      "grad_norm": 0.19402751326560974,
      "learning_rate": 3.7058439804168343e-06,
      "loss": 0.0144,
      "step": 1930440
    },
    {
      "epoch": 3.159240130136224,
      "grad_norm": 1.3478233814239502,
      "learning_rate": 3.7057780882033174e-06,
      "loss": 0.0186,
      "step": 1930460
    },
    {
      "epoch": 3.159272860574877,
      "grad_norm": 0.34667298197746277,
      "learning_rate": 3.7057121959898e-06,
      "loss": 0.0121,
      "step": 1930480
    },
    {
      "epoch": 3.159305591013531,
      "grad_norm": 0.4189973771572113,
      "learning_rate": 3.705646303776283e-06,
      "loss": 0.0112,
      "step": 1930500
    },
    {
      "epoch": 3.159338321452184,
      "grad_norm": 0.16229920089244843,
      "learning_rate": 3.7055804115627656e-06,
      "loss": 0.0092,
      "step": 1930520
    },
    {
      "epoch": 3.1593710518908376,
      "grad_norm": 0.12231577187776566,
      "learning_rate": 3.705514519349249e-06,
      "loss": 0.0123,
      "step": 1930540
    },
    {
      "epoch": 3.1594037823294907,
      "grad_norm": 0.7575622200965881,
      "learning_rate": 3.7054486271357316e-06,
      "loss": 0.0141,
      "step": 1930560
    },
    {
      "epoch": 3.1594365127681443,
      "grad_norm": 0.16960471868515015,
      "learning_rate": 3.7053827349222143e-06,
      "loss": 0.0094,
      "step": 1930580
    },
    {
      "epoch": 3.1594692432067975,
      "grad_norm": 0.8561308979988098,
      "learning_rate": 3.705316842708697e-06,
      "loss": 0.015,
      "step": 1930600
    },
    {
      "epoch": 3.1595019736454506,
      "grad_norm": 0.30179911851882935,
      "learning_rate": 3.70525095049518e-06,
      "loss": 0.0102,
      "step": 1930620
    },
    {
      "epoch": 3.159534704084104,
      "grad_norm": 0.2136445790529251,
      "learning_rate": 3.7051850582816634e-06,
      "loss": 0.0127,
      "step": 1930640
    },
    {
      "epoch": 3.1595674345227573,
      "grad_norm": 0.25077304244041443,
      "learning_rate": 3.705119166068146e-06,
      "loss": 0.0154,
      "step": 1930660
    },
    {
      "epoch": 3.159600164961411,
      "grad_norm": 0.2550668716430664,
      "learning_rate": 3.7050532738546293e-06,
      "loss": 0.0128,
      "step": 1930680
    },
    {
      "epoch": 3.159632895400064,
      "grad_norm": 0.4563639760017395,
      "learning_rate": 3.704987381641112e-06,
      "loss": 0.0105,
      "step": 1930700
    },
    {
      "epoch": 3.1596656258387177,
      "grad_norm": 0.13770146667957306,
      "learning_rate": 3.7049214894275947e-06,
      "loss": 0.0098,
      "step": 1930720
    },
    {
      "epoch": 3.159698356277371,
      "grad_norm": 0.12617354094982147,
      "learning_rate": 3.7048555972140775e-06,
      "loss": 0.0198,
      "step": 1930740
    },
    {
      "epoch": 3.159731086716024,
      "grad_norm": 0.17679348587989807,
      "learning_rate": 3.7047897050005606e-06,
      "loss": 0.0121,
      "step": 1930760
    },
    {
      "epoch": 3.1597638171546776,
      "grad_norm": 0.36510536074638367,
      "learning_rate": 3.7047238127870434e-06,
      "loss": 0.0097,
      "step": 1930780
    },
    {
      "epoch": 3.1597965475933307,
      "grad_norm": 0.29535621404647827,
      "learning_rate": 3.704657920573526e-06,
      "loss": 0.0076,
      "step": 1930800
    },
    {
      "epoch": 3.1598292780319843,
      "grad_norm": 0.22347573935985565,
      "learning_rate": 3.704592028360009e-06,
      "loss": 0.0117,
      "step": 1930820
    },
    {
      "epoch": 3.1598620084706375,
      "grad_norm": 0.14583119750022888,
      "learning_rate": 3.704526136146492e-06,
      "loss": 0.0114,
      "step": 1930840
    },
    {
      "epoch": 3.159894738909291,
      "grad_norm": 0.5425729155540466,
      "learning_rate": 3.7044602439329748e-06,
      "loss": 0.0119,
      "step": 1930860
    },
    {
      "epoch": 3.159927469347944,
      "grad_norm": 0.35034194588661194,
      "learning_rate": 3.7043943517194575e-06,
      "loss": 0.0113,
      "step": 1930880
    },
    {
      "epoch": 3.1599601997865974,
      "grad_norm": 0.3557986617088318,
      "learning_rate": 3.7043284595059402e-06,
      "loss": 0.0134,
      "step": 1930900
    },
    {
      "epoch": 3.159992930225251,
      "grad_norm": 0.4097057580947876,
      "learning_rate": 3.704262567292423e-06,
      "loss": 0.0126,
      "step": 1930920
    },
    {
      "epoch": 3.160025660663904,
      "grad_norm": 3.791766881942749,
      "learning_rate": 3.704196675078906e-06,
      "loss": 0.0081,
      "step": 1930940
    },
    {
      "epoch": 3.1600583911025577,
      "grad_norm": 0.37677422165870667,
      "learning_rate": 3.704130782865389e-06,
      "loss": 0.0073,
      "step": 1930960
    },
    {
      "epoch": 3.160091121541211,
      "grad_norm": 0.24096818268299103,
      "learning_rate": 3.704064890651872e-06,
      "loss": 0.0151,
      "step": 1930980
    },
    {
      "epoch": 3.160123851979864,
      "grad_norm": 0.39692944288253784,
      "learning_rate": 3.7039989984383552e-06,
      "loss": 0.0113,
      "step": 1931000
    },
    {
      "epoch": 3.1601565824185176,
      "grad_norm": 0.5590885877609253,
      "learning_rate": 3.703933106224838e-06,
      "loss": 0.0123,
      "step": 1931020
    },
    {
      "epoch": 3.1601893128571708,
      "grad_norm": 0.5536267757415771,
      "learning_rate": 3.7038672140113207e-06,
      "loss": 0.0102,
      "step": 1931040
    },
    {
      "epoch": 3.1602220432958243,
      "grad_norm": 0.2324804663658142,
      "learning_rate": 3.7038013217978034e-06,
      "loss": 0.0094,
      "step": 1931060
    },
    {
      "epoch": 3.1602547737344775,
      "grad_norm": 0.25568535923957825,
      "learning_rate": 3.7037354295842866e-06,
      "loss": 0.0106,
      "step": 1931080
    },
    {
      "epoch": 3.160287504173131,
      "grad_norm": 0.16661903262138367,
      "learning_rate": 3.7036695373707693e-06,
      "loss": 0.0116,
      "step": 1931100
    },
    {
      "epoch": 3.1603202346117842,
      "grad_norm": 0.7667255401611328,
      "learning_rate": 3.703603645157252e-06,
      "loss": 0.0105,
      "step": 1931120
    },
    {
      "epoch": 3.1603529650504374,
      "grad_norm": 0.5486006140708923,
      "learning_rate": 3.703537752943735e-06,
      "loss": 0.0086,
      "step": 1931140
    },
    {
      "epoch": 3.160385695489091,
      "grad_norm": 0.9416364431381226,
      "learning_rate": 3.703471860730218e-06,
      "loss": 0.0137,
      "step": 1931160
    },
    {
      "epoch": 3.160418425927744,
      "grad_norm": 0.33066579699516296,
      "learning_rate": 3.7034059685167007e-06,
      "loss": 0.0151,
      "step": 1931180
    },
    {
      "epoch": 3.1604511563663977,
      "grad_norm": 0.4773959517478943,
      "learning_rate": 3.7033400763031835e-06,
      "loss": 0.0114,
      "step": 1931200
    },
    {
      "epoch": 3.160483886805051,
      "grad_norm": 0.14870357513427734,
      "learning_rate": 3.703274184089666e-06,
      "loss": 0.0082,
      "step": 1931220
    },
    {
      "epoch": 3.1605166172437045,
      "grad_norm": 0.2581239640712738,
      "learning_rate": 3.7032082918761494e-06,
      "loss": 0.0178,
      "step": 1931240
    },
    {
      "epoch": 3.1605493476823576,
      "grad_norm": 0.3360513746738434,
      "learning_rate": 3.703142399662632e-06,
      "loss": 0.0077,
      "step": 1931260
    },
    {
      "epoch": 3.1605820781210108,
      "grad_norm": 0.169677734375,
      "learning_rate": 3.703076507449115e-06,
      "loss": 0.0111,
      "step": 1931280
    },
    {
      "epoch": 3.1606148085596644,
      "grad_norm": 0.3228853642940521,
      "learning_rate": 3.7030106152355976e-06,
      "loss": 0.0118,
      "step": 1931300
    },
    {
      "epoch": 3.1606475389983175,
      "grad_norm": 0.3599458932876587,
      "learning_rate": 3.7029447230220803e-06,
      "loss": 0.0063,
      "step": 1931320
    },
    {
      "epoch": 3.160680269436971,
      "grad_norm": 0.21039611101150513,
      "learning_rate": 3.702878830808564e-06,
      "loss": 0.0165,
      "step": 1931340
    },
    {
      "epoch": 3.1607129998756243,
      "grad_norm": 1.0384501218795776,
      "learning_rate": 3.7028129385950466e-06,
      "loss": 0.0188,
      "step": 1931360
    },
    {
      "epoch": 3.160745730314278,
      "grad_norm": 0.10343877971172333,
      "learning_rate": 3.70274704638153e-06,
      "loss": 0.0107,
      "step": 1931380
    },
    {
      "epoch": 3.160778460752931,
      "grad_norm": 0.12146024405956268,
      "learning_rate": 3.7026811541680125e-06,
      "loss": 0.0142,
      "step": 1931400
    },
    {
      "epoch": 3.160811191191584,
      "grad_norm": 0.17394427955150604,
      "learning_rate": 3.7026152619544953e-06,
      "loss": 0.0156,
      "step": 1931420
    },
    {
      "epoch": 3.1608439216302378,
      "grad_norm": 0.29508304595947266,
      "learning_rate": 3.702549369740978e-06,
      "loss": 0.0129,
      "step": 1931440
    },
    {
      "epoch": 3.160876652068891,
      "grad_norm": 0.35221967101097107,
      "learning_rate": 3.7024834775274608e-06,
      "loss": 0.0102,
      "step": 1931460
    },
    {
      "epoch": 3.1609093825075445,
      "grad_norm": 0.34879153966903687,
      "learning_rate": 3.702417585313944e-06,
      "loss": 0.0106,
      "step": 1931480
    },
    {
      "epoch": 3.1609421129461976,
      "grad_norm": 0.8369009494781494,
      "learning_rate": 3.7023516931004267e-06,
      "loss": 0.0131,
      "step": 1931500
    },
    {
      "epoch": 3.1609748433848512,
      "grad_norm": 0.31348875164985657,
      "learning_rate": 3.7022858008869094e-06,
      "loss": 0.0125,
      "step": 1931520
    },
    {
      "epoch": 3.1610075738235044,
      "grad_norm": 0.48139071464538574,
      "learning_rate": 3.702219908673392e-06,
      "loss": 0.0117,
      "step": 1931540
    },
    {
      "epoch": 3.1610403042621575,
      "grad_norm": 0.22575870156288147,
      "learning_rate": 3.7021540164598753e-06,
      "loss": 0.014,
      "step": 1931560
    },
    {
      "epoch": 3.161073034700811,
      "grad_norm": 0.07512398809194565,
      "learning_rate": 3.702088124246358e-06,
      "loss": 0.0095,
      "step": 1931580
    },
    {
      "epoch": 3.1611057651394643,
      "grad_norm": 0.6949628591537476,
      "learning_rate": 3.702022232032841e-06,
      "loss": 0.0132,
      "step": 1931600
    },
    {
      "epoch": 3.161138495578118,
      "grad_norm": 0.17635294795036316,
      "learning_rate": 3.7019563398193235e-06,
      "loss": 0.012,
      "step": 1931620
    },
    {
      "epoch": 3.161171226016771,
      "grad_norm": 0.28015828132629395,
      "learning_rate": 3.7018904476058067e-06,
      "loss": 0.0139,
      "step": 1931640
    },
    {
      "epoch": 3.1612039564554246,
      "grad_norm": 0.4535692036151886,
      "learning_rate": 3.7018245553922894e-06,
      "loss": 0.0137,
      "step": 1931660
    },
    {
      "epoch": 3.1612366868940778,
      "grad_norm": 0.09641377627849579,
      "learning_rate": 3.701758663178772e-06,
      "loss": 0.0123,
      "step": 1931680
    },
    {
      "epoch": 3.161269417332731,
      "grad_norm": 0.0744093731045723,
      "learning_rate": 3.7016927709652558e-06,
      "loss": 0.013,
      "step": 1931700
    },
    {
      "epoch": 3.1613021477713845,
      "grad_norm": 0.37782058119773865,
      "learning_rate": 3.7016268787517385e-06,
      "loss": 0.0123,
      "step": 1931720
    },
    {
      "epoch": 3.1613348782100377,
      "grad_norm": 0.17472852766513824,
      "learning_rate": 3.7015609865382212e-06,
      "loss": 0.009,
      "step": 1931740
    },
    {
      "epoch": 3.1613676086486913,
      "grad_norm": 0.07504593580961227,
      "learning_rate": 3.701495094324704e-06,
      "loss": 0.0123,
      "step": 1931760
    },
    {
      "epoch": 3.1614003390873444,
      "grad_norm": 0.4300592243671417,
      "learning_rate": 3.701429202111187e-06,
      "loss": 0.0109,
      "step": 1931780
    },
    {
      "epoch": 3.161433069525998,
      "grad_norm": 0.27828583121299744,
      "learning_rate": 3.70136330989767e-06,
      "loss": 0.0057,
      "step": 1931800
    },
    {
      "epoch": 3.161465799964651,
      "grad_norm": 0.4929862320423126,
      "learning_rate": 3.7012974176841526e-06,
      "loss": 0.0106,
      "step": 1931820
    },
    {
      "epoch": 3.1614985304033043,
      "grad_norm": 0.3933110237121582,
      "learning_rate": 3.7012315254706354e-06,
      "loss": 0.0109,
      "step": 1931840
    },
    {
      "epoch": 3.161531260841958,
      "grad_norm": 0.11978432536125183,
      "learning_rate": 3.701165633257118e-06,
      "loss": 0.0111,
      "step": 1931860
    },
    {
      "epoch": 3.161563991280611,
      "grad_norm": 0.3445752263069153,
      "learning_rate": 3.7010997410436013e-06,
      "loss": 0.0089,
      "step": 1931880
    },
    {
      "epoch": 3.1615967217192646,
      "grad_norm": 0.14881132543087006,
      "learning_rate": 3.701033848830084e-06,
      "loss": 0.0116,
      "step": 1931900
    },
    {
      "epoch": 3.161629452157918,
      "grad_norm": 0.3411714732646942,
      "learning_rate": 3.7009679566165667e-06,
      "loss": 0.009,
      "step": 1931920
    },
    {
      "epoch": 3.1616621825965714,
      "grad_norm": 0.6183654069900513,
      "learning_rate": 3.7009020644030495e-06,
      "loss": 0.0136,
      "step": 1931940
    },
    {
      "epoch": 3.1616949130352245,
      "grad_norm": 1.2549407482147217,
      "learning_rate": 3.7008361721895326e-06,
      "loss": 0.0105,
      "step": 1931960
    },
    {
      "epoch": 3.1617276434738777,
      "grad_norm": 0.08990605175495148,
      "learning_rate": 3.7007702799760154e-06,
      "loss": 0.0126,
      "step": 1931980
    },
    {
      "epoch": 3.1617603739125313,
      "grad_norm": 0.3134889006614685,
      "learning_rate": 3.700704387762498e-06,
      "loss": 0.0094,
      "step": 1932000
    },
    {
      "epoch": 3.1617931043511844,
      "grad_norm": 0.3265003263950348,
      "learning_rate": 3.700638495548981e-06,
      "loss": 0.0123,
      "step": 1932020
    },
    {
      "epoch": 3.161825834789838,
      "grad_norm": 0.5697067975997925,
      "learning_rate": 3.7005726033354645e-06,
      "loss": 0.0123,
      "step": 1932040
    },
    {
      "epoch": 3.161858565228491,
      "grad_norm": 0.2400629222393036,
      "learning_rate": 3.700506711121947e-06,
      "loss": 0.0168,
      "step": 1932060
    },
    {
      "epoch": 3.1618912956671443,
      "grad_norm": 0.8969123959541321,
      "learning_rate": 3.70044081890843e-06,
      "loss": 0.0128,
      "step": 1932080
    },
    {
      "epoch": 3.161924026105798,
      "grad_norm": 0.1274801641702652,
      "learning_rate": 3.700374926694913e-06,
      "loss": 0.0164,
      "step": 1932100
    },
    {
      "epoch": 3.161956756544451,
      "grad_norm": 0.1615563929080963,
      "learning_rate": 3.700309034481396e-06,
      "loss": 0.0105,
      "step": 1932120
    },
    {
      "epoch": 3.1619894869831047,
      "grad_norm": 0.6143431067466736,
      "learning_rate": 3.7002431422678786e-06,
      "loss": 0.0122,
      "step": 1932140
    },
    {
      "epoch": 3.162022217421758,
      "grad_norm": 0.26025521755218506,
      "learning_rate": 3.7001772500543613e-06,
      "loss": 0.0176,
      "step": 1932160
    },
    {
      "epoch": 3.1620549478604114,
      "grad_norm": 0.32148978114128113,
      "learning_rate": 3.7001113578408445e-06,
      "loss": 0.0115,
      "step": 1932180
    },
    {
      "epoch": 3.1620876782990646,
      "grad_norm": 0.25583383440971375,
      "learning_rate": 3.7000454656273272e-06,
      "loss": 0.0088,
      "step": 1932200
    },
    {
      "epoch": 3.1621204087377177,
      "grad_norm": 0.3884854316711426,
      "learning_rate": 3.69997957341381e-06,
      "loss": 0.0081,
      "step": 1932220
    },
    {
      "epoch": 3.1621531391763713,
      "grad_norm": 0.34461134672164917,
      "learning_rate": 3.6999136812002927e-06,
      "loss": 0.0096,
      "step": 1932240
    },
    {
      "epoch": 3.1621858696150245,
      "grad_norm": 0.44978994131088257,
      "learning_rate": 3.699847788986776e-06,
      "loss": 0.015,
      "step": 1932260
    },
    {
      "epoch": 3.162218600053678,
      "grad_norm": 0.08427978307008743,
      "learning_rate": 3.6997818967732586e-06,
      "loss": 0.01,
      "step": 1932280
    },
    {
      "epoch": 3.162251330492331,
      "grad_norm": 0.33120638132095337,
      "learning_rate": 3.6997160045597413e-06,
      "loss": 0.0112,
      "step": 1932300
    },
    {
      "epoch": 3.162284060930985,
      "grad_norm": 0.1534246951341629,
      "learning_rate": 3.699650112346224e-06,
      "loss": 0.0097,
      "step": 1932320
    },
    {
      "epoch": 3.162316791369638,
      "grad_norm": 0.10209149122238159,
      "learning_rate": 3.699584220132707e-06,
      "loss": 0.0128,
      "step": 1932340
    },
    {
      "epoch": 3.162349521808291,
      "grad_norm": 0.6430858373641968,
      "learning_rate": 3.69951832791919e-06,
      "loss": 0.0097,
      "step": 1932360
    },
    {
      "epoch": 3.1623822522469447,
      "grad_norm": 0.5212708711624146,
      "learning_rate": 3.6994524357056727e-06,
      "loss": 0.0117,
      "step": 1932380
    },
    {
      "epoch": 3.162414982685598,
      "grad_norm": 0.12372097373008728,
      "learning_rate": 3.699386543492156e-06,
      "loss": 0.0095,
      "step": 1932400
    },
    {
      "epoch": 3.1624477131242514,
      "grad_norm": 0.4457260072231293,
      "learning_rate": 3.699320651278639e-06,
      "loss": 0.0156,
      "step": 1932420
    },
    {
      "epoch": 3.1624804435629046,
      "grad_norm": 0.15176533162593842,
      "learning_rate": 3.699254759065122e-06,
      "loss": 0.0095,
      "step": 1932440
    },
    {
      "epoch": 3.1625131740015577,
      "grad_norm": 0.3384909927845001,
      "learning_rate": 3.6991888668516045e-06,
      "loss": 0.0154,
      "step": 1932460
    },
    {
      "epoch": 3.1625459044402113,
      "grad_norm": 0.2840923070907593,
      "learning_rate": 3.6991229746380873e-06,
      "loss": 0.0131,
      "step": 1932480
    },
    {
      "epoch": 3.1625786348788645,
      "grad_norm": 0.14359129965305328,
      "learning_rate": 3.6990570824245704e-06,
      "loss": 0.0095,
      "step": 1932500
    },
    {
      "epoch": 3.162611365317518,
      "grad_norm": 0.08808502554893494,
      "learning_rate": 3.698991190211053e-06,
      "loss": 0.0133,
      "step": 1932520
    },
    {
      "epoch": 3.1626440957561712,
      "grad_norm": 0.48623397946357727,
      "learning_rate": 3.698925297997536e-06,
      "loss": 0.0104,
      "step": 1932540
    },
    {
      "epoch": 3.162676826194825,
      "grad_norm": 0.2742750644683838,
      "learning_rate": 3.6988594057840187e-06,
      "loss": 0.0123,
      "step": 1932560
    },
    {
      "epoch": 3.162709556633478,
      "grad_norm": 0.23944248259067535,
      "learning_rate": 3.698793513570502e-06,
      "loss": 0.0111,
      "step": 1932580
    },
    {
      "epoch": 3.162742287072131,
      "grad_norm": 0.1962897777557373,
      "learning_rate": 3.6987276213569846e-06,
      "loss": 0.0092,
      "step": 1932600
    },
    {
      "epoch": 3.1627750175107847,
      "grad_norm": 0.14395006000995636,
      "learning_rate": 3.6986617291434673e-06,
      "loss": 0.0174,
      "step": 1932620
    },
    {
      "epoch": 3.162807747949438,
      "grad_norm": 0.11447878926992416,
      "learning_rate": 3.69859583692995e-06,
      "loss": 0.0103,
      "step": 1932640
    },
    {
      "epoch": 3.1628404783880915,
      "grad_norm": 0.5535325407981873,
      "learning_rate": 3.698529944716433e-06,
      "loss": 0.0099,
      "step": 1932660
    },
    {
      "epoch": 3.1628732088267446,
      "grad_norm": 0.12474387884140015,
      "learning_rate": 3.698464052502916e-06,
      "loss": 0.0082,
      "step": 1932680
    },
    {
      "epoch": 3.162905939265398,
      "grad_norm": 0.17625044286251068,
      "learning_rate": 3.6983981602893987e-06,
      "loss": 0.0081,
      "step": 1932700
    },
    {
      "epoch": 3.1629386697040514,
      "grad_norm": 0.2162950187921524,
      "learning_rate": 3.6983322680758814e-06,
      "loss": 0.0118,
      "step": 1932720
    },
    {
      "epoch": 3.1629714001427045,
      "grad_norm": 0.057464491575956345,
      "learning_rate": 3.698266375862365e-06,
      "loss": 0.0084,
      "step": 1932740
    },
    {
      "epoch": 3.163004130581358,
      "grad_norm": 0.1458410918712616,
      "learning_rate": 3.6982004836488477e-06,
      "loss": 0.0102,
      "step": 1932760
    },
    {
      "epoch": 3.1630368610200112,
      "grad_norm": 0.2393314242362976,
      "learning_rate": 3.6981345914353305e-06,
      "loss": 0.0118,
      "step": 1932780
    },
    {
      "epoch": 3.163069591458665,
      "grad_norm": 1.459682822227478,
      "learning_rate": 3.6980686992218136e-06,
      "loss": 0.0076,
      "step": 1932800
    },
    {
      "epoch": 3.163102321897318,
      "grad_norm": 0.18780653178691864,
      "learning_rate": 3.6980028070082964e-06,
      "loss": 0.0162,
      "step": 1932820
    },
    {
      "epoch": 3.1631350523359716,
      "grad_norm": 0.772724986076355,
      "learning_rate": 3.697936914794779e-06,
      "loss": 0.0122,
      "step": 1932840
    },
    {
      "epoch": 3.1631677827746247,
      "grad_norm": 0.3001725971698761,
      "learning_rate": 3.697871022581262e-06,
      "loss": 0.0069,
      "step": 1932860
    },
    {
      "epoch": 3.163200513213278,
      "grad_norm": 0.20941472053527832,
      "learning_rate": 3.6978051303677446e-06,
      "loss": 0.0153,
      "step": 1932880
    },
    {
      "epoch": 3.1632332436519315,
      "grad_norm": 0.8835034370422363,
      "learning_rate": 3.6977392381542278e-06,
      "loss": 0.01,
      "step": 1932900
    },
    {
      "epoch": 3.1632659740905846,
      "grad_norm": 0.5315470695495605,
      "learning_rate": 3.6976733459407105e-06,
      "loss": 0.0112,
      "step": 1932920
    },
    {
      "epoch": 3.1632987045292382,
      "grad_norm": 0.34042173624038696,
      "learning_rate": 3.6976074537271933e-06,
      "loss": 0.0086,
      "step": 1932940
    },
    {
      "epoch": 3.1633314349678914,
      "grad_norm": 0.10053994506597519,
      "learning_rate": 3.697541561513676e-06,
      "loss": 0.0157,
      "step": 1932960
    },
    {
      "epoch": 3.163364165406545,
      "grad_norm": 0.4258037209510803,
      "learning_rate": 3.697475669300159e-06,
      "loss": 0.0158,
      "step": 1932980
    },
    {
      "epoch": 3.163396895845198,
      "grad_norm": 0.05414522811770439,
      "learning_rate": 3.697409777086642e-06,
      "loss": 0.009,
      "step": 1933000
    },
    {
      "epoch": 3.1634296262838513,
      "grad_norm": 0.34603139758110046,
      "learning_rate": 3.6973438848731246e-06,
      "loss": 0.0112,
      "step": 1933020
    },
    {
      "epoch": 3.163462356722505,
      "grad_norm": 0.3171898126602173,
      "learning_rate": 3.6972779926596074e-06,
      "loss": 0.0104,
      "step": 1933040
    },
    {
      "epoch": 3.163495087161158,
      "grad_norm": 0.8734516501426697,
      "learning_rate": 3.6972121004460905e-06,
      "loss": 0.0148,
      "step": 1933060
    },
    {
      "epoch": 3.1635278175998116,
      "grad_norm": 0.12338203191757202,
      "learning_rate": 3.6971462082325733e-06,
      "loss": 0.0148,
      "step": 1933080
    },
    {
      "epoch": 3.1635605480384648,
      "grad_norm": 0.7689895033836365,
      "learning_rate": 3.6970803160190564e-06,
      "loss": 0.0153,
      "step": 1933100
    },
    {
      "epoch": 3.1635932784771184,
      "grad_norm": 0.12850035727024078,
      "learning_rate": 3.6970144238055396e-06,
      "loss": 0.015,
      "step": 1933120
    },
    {
      "epoch": 3.1636260089157715,
      "grad_norm": 0.5001266002655029,
      "learning_rate": 3.6969485315920223e-06,
      "loss": 0.0114,
      "step": 1933140
    },
    {
      "epoch": 3.1636587393544247,
      "grad_norm": 0.29594653844833374,
      "learning_rate": 3.696882639378505e-06,
      "loss": 0.0082,
      "step": 1933160
    },
    {
      "epoch": 3.1636914697930782,
      "grad_norm": 0.4920041561126709,
      "learning_rate": 3.696816747164988e-06,
      "loss": 0.0137,
      "step": 1933180
    },
    {
      "epoch": 3.1637242002317314,
      "grad_norm": 0.16636592149734497,
      "learning_rate": 3.696750854951471e-06,
      "loss": 0.0108,
      "step": 1933200
    },
    {
      "epoch": 3.163756930670385,
      "grad_norm": 0.5346140265464783,
      "learning_rate": 3.6966849627379537e-06,
      "loss": 0.0164,
      "step": 1933220
    },
    {
      "epoch": 3.163789661109038,
      "grad_norm": 0.42630070447921753,
      "learning_rate": 3.6966190705244365e-06,
      "loss": 0.0125,
      "step": 1933240
    },
    {
      "epoch": 3.1638223915476917,
      "grad_norm": 0.8253999352455139,
      "learning_rate": 3.696553178310919e-06,
      "loss": 0.0088,
      "step": 1933260
    },
    {
      "epoch": 3.163855121986345,
      "grad_norm": 0.2683315873146057,
      "learning_rate": 3.696487286097402e-06,
      "loss": 0.0141,
      "step": 1933280
    },
    {
      "epoch": 3.163887852424998,
      "grad_norm": 0.3724774420261383,
      "learning_rate": 3.696421393883885e-06,
      "loss": 0.0093,
      "step": 1933300
    },
    {
      "epoch": 3.1639205828636516,
      "grad_norm": 0.31051725149154663,
      "learning_rate": 3.696355501670368e-06,
      "loss": 0.0153,
      "step": 1933320
    },
    {
      "epoch": 3.163953313302305,
      "grad_norm": 0.07633985579013824,
      "learning_rate": 3.6962896094568506e-06,
      "loss": 0.0129,
      "step": 1933340
    },
    {
      "epoch": 3.1639860437409584,
      "grad_norm": 0.2636387050151825,
      "learning_rate": 3.6962237172433333e-06,
      "loss": 0.0096,
      "step": 1933360
    },
    {
      "epoch": 3.1640187741796115,
      "grad_norm": 0.7409586310386658,
      "learning_rate": 3.6961578250298165e-06,
      "loss": 0.0129,
      "step": 1933380
    },
    {
      "epoch": 3.164051504618265,
      "grad_norm": 0.520498514175415,
      "learning_rate": 3.6960919328162992e-06,
      "loss": 0.02,
      "step": 1933400
    },
    {
      "epoch": 3.1640842350569183,
      "grad_norm": 0.7143491506576538,
      "learning_rate": 3.696026040602782e-06,
      "loss": 0.013,
      "step": 1933420
    },
    {
      "epoch": 3.1641169654955714,
      "grad_norm": 0.23179681599140167,
      "learning_rate": 3.6959601483892647e-06,
      "loss": 0.0131,
      "step": 1933440
    },
    {
      "epoch": 3.164149695934225,
      "grad_norm": 0.4003283679485321,
      "learning_rate": 3.6958942561757483e-06,
      "loss": 0.0112,
      "step": 1933460
    },
    {
      "epoch": 3.164182426372878,
      "grad_norm": 0.13544808328151703,
      "learning_rate": 3.695828363962231e-06,
      "loss": 0.0177,
      "step": 1933480
    },
    {
      "epoch": 3.1642151568115318,
      "grad_norm": 0.25484561920166016,
      "learning_rate": 3.6957624717487138e-06,
      "loss": 0.0144,
      "step": 1933500
    },
    {
      "epoch": 3.164247887250185,
      "grad_norm": 0.1279301941394806,
      "learning_rate": 3.695696579535197e-06,
      "loss": 0.0086,
      "step": 1933520
    },
    {
      "epoch": 3.164280617688838,
      "grad_norm": 0.21451309323310852,
      "learning_rate": 3.6956306873216797e-06,
      "loss": 0.0146,
      "step": 1933540
    },
    {
      "epoch": 3.1643133481274917,
      "grad_norm": 0.7441139221191406,
      "learning_rate": 3.6955647951081624e-06,
      "loss": 0.0097,
      "step": 1933560
    },
    {
      "epoch": 3.164346078566145,
      "grad_norm": 0.18477843701839447,
      "learning_rate": 3.695498902894645e-06,
      "loss": 0.0161,
      "step": 1933580
    },
    {
      "epoch": 3.1643788090047984,
      "grad_norm": 0.21219034492969513,
      "learning_rate": 3.6954330106811283e-06,
      "loss": 0.0125,
      "step": 1933600
    },
    {
      "epoch": 3.1644115394434515,
      "grad_norm": 0.32808974385261536,
      "learning_rate": 3.695367118467611e-06,
      "loss": 0.0139,
      "step": 1933620
    },
    {
      "epoch": 3.164444269882105,
      "grad_norm": 0.1516861766576767,
      "learning_rate": 3.695301226254094e-06,
      "loss": 0.0093,
      "step": 1933640
    },
    {
      "epoch": 3.1644770003207583,
      "grad_norm": 0.12284747511148453,
      "learning_rate": 3.6952353340405765e-06,
      "loss": 0.01,
      "step": 1933660
    },
    {
      "epoch": 3.1645097307594114,
      "grad_norm": 0.23508906364440918,
      "learning_rate": 3.6951694418270597e-06,
      "loss": 0.0102,
      "step": 1933680
    },
    {
      "epoch": 3.164542461198065,
      "grad_norm": 0.1113380640745163,
      "learning_rate": 3.6951035496135424e-06,
      "loss": 0.0085,
      "step": 1933700
    },
    {
      "epoch": 3.164575191636718,
      "grad_norm": 0.18156811594963074,
      "learning_rate": 3.695037657400025e-06,
      "loss": 0.01,
      "step": 1933720
    },
    {
      "epoch": 3.164607922075372,
      "grad_norm": 0.7183080315589905,
      "learning_rate": 3.694971765186508e-06,
      "loss": 0.0165,
      "step": 1933740
    },
    {
      "epoch": 3.164640652514025,
      "grad_norm": 0.374737411737442,
      "learning_rate": 3.6949058729729907e-06,
      "loss": 0.009,
      "step": 1933760
    },
    {
      "epoch": 3.1646733829526785,
      "grad_norm": 0.04432883486151695,
      "learning_rate": 3.694839980759474e-06,
      "loss": 0.0086,
      "step": 1933780
    },
    {
      "epoch": 3.1647061133913317,
      "grad_norm": 0.3675551414489746,
      "learning_rate": 3.694774088545957e-06,
      "loss": 0.0097,
      "step": 1933800
    },
    {
      "epoch": 3.164738843829985,
      "grad_norm": 0.09717355668544769,
      "learning_rate": 3.6947081963324397e-06,
      "loss": 0.0072,
      "step": 1933820
    },
    {
      "epoch": 3.1647715742686384,
      "grad_norm": 0.21353337168693542,
      "learning_rate": 3.694642304118923e-06,
      "loss": 0.0103,
      "step": 1933840
    },
    {
      "epoch": 3.1648043047072916,
      "grad_norm": 0.11576168239116669,
      "learning_rate": 3.6945764119054056e-06,
      "loss": 0.0076,
      "step": 1933860
    },
    {
      "epoch": 3.164837035145945,
      "grad_norm": 0.2016763687133789,
      "learning_rate": 3.6945105196918884e-06,
      "loss": 0.0113,
      "step": 1933880
    },
    {
      "epoch": 3.1648697655845983,
      "grad_norm": 0.10843387246131897,
      "learning_rate": 3.694444627478371e-06,
      "loss": 0.0144,
      "step": 1933900
    },
    {
      "epoch": 3.164902496023252,
      "grad_norm": 0.42230913043022156,
      "learning_rate": 3.6943787352648543e-06,
      "loss": 0.0189,
      "step": 1933920
    },
    {
      "epoch": 3.164935226461905,
      "grad_norm": 0.3160044252872467,
      "learning_rate": 3.694312843051337e-06,
      "loss": 0.0104,
      "step": 1933940
    },
    {
      "epoch": 3.164967956900558,
      "grad_norm": 0.0904451459646225,
      "learning_rate": 3.6942469508378198e-06,
      "loss": 0.0085,
      "step": 1933960
    },
    {
      "epoch": 3.165000687339212,
      "grad_norm": 0.17551249265670776,
      "learning_rate": 3.6941810586243025e-06,
      "loss": 0.0104,
      "step": 1933980
    },
    {
      "epoch": 3.165033417777865,
      "grad_norm": 0.20775455236434937,
      "learning_rate": 3.6941151664107857e-06,
      "loss": 0.0118,
      "step": 1934000
    },
    {
      "epoch": 3.1650661482165185,
      "grad_norm": 0.15101493895053864,
      "learning_rate": 3.6940492741972684e-06,
      "loss": 0.0114,
      "step": 1934020
    },
    {
      "epoch": 3.1650988786551717,
      "grad_norm": 0.22590294480323792,
      "learning_rate": 3.693983381983751e-06,
      "loss": 0.0107,
      "step": 1934040
    },
    {
      "epoch": 3.165131609093825,
      "grad_norm": 0.3345796465873718,
      "learning_rate": 3.693917489770234e-06,
      "loss": 0.0102,
      "step": 1934060
    },
    {
      "epoch": 3.1651643395324784,
      "grad_norm": 0.06493973731994629,
      "learning_rate": 3.693851597556717e-06,
      "loss": 0.014,
      "step": 1934080
    },
    {
      "epoch": 3.1651970699711316,
      "grad_norm": 0.2320690006017685,
      "learning_rate": 3.6937857053431998e-06,
      "loss": 0.0132,
      "step": 1934100
    },
    {
      "epoch": 3.165229800409785,
      "grad_norm": 0.8262726664543152,
      "learning_rate": 3.6937198131296825e-06,
      "loss": 0.0111,
      "step": 1934120
    },
    {
      "epoch": 3.1652625308484383,
      "grad_norm": 0.19695593416690826,
      "learning_rate": 3.6936539209161653e-06,
      "loss": 0.0105,
      "step": 1934140
    },
    {
      "epoch": 3.165295261287092,
      "grad_norm": 0.8320111632347107,
      "learning_rate": 3.693588028702649e-06,
      "loss": 0.0108,
      "step": 1934160
    },
    {
      "epoch": 3.165327991725745,
      "grad_norm": 0.15030241012573242,
      "learning_rate": 3.6935221364891316e-06,
      "loss": 0.0158,
      "step": 1934180
    },
    {
      "epoch": 3.1653607221643982,
      "grad_norm": 0.11081212759017944,
      "learning_rate": 3.6934562442756143e-06,
      "loss": 0.0119,
      "step": 1934200
    },
    {
      "epoch": 3.165393452603052,
      "grad_norm": 0.324691504240036,
      "learning_rate": 3.6933903520620975e-06,
      "loss": 0.0147,
      "step": 1934220
    },
    {
      "epoch": 3.165426183041705,
      "grad_norm": 0.14088863134384155,
      "learning_rate": 3.6933244598485802e-06,
      "loss": 0.0112,
      "step": 1934240
    },
    {
      "epoch": 3.1654589134803586,
      "grad_norm": 0.17564401030540466,
      "learning_rate": 3.693258567635063e-06,
      "loss": 0.0085,
      "step": 1934260
    },
    {
      "epoch": 3.1654916439190117,
      "grad_norm": 0.20568890869617462,
      "learning_rate": 3.6931926754215457e-06,
      "loss": 0.0122,
      "step": 1934280
    },
    {
      "epoch": 3.1655243743576653,
      "grad_norm": 0.21120762825012207,
      "learning_rate": 3.6931267832080284e-06,
      "loss": 0.0175,
      "step": 1934300
    },
    {
      "epoch": 3.1655571047963185,
      "grad_norm": 0.28689929842948914,
      "learning_rate": 3.6930608909945116e-06,
      "loss": 0.0107,
      "step": 1934320
    },
    {
      "epoch": 3.1655898352349716,
      "grad_norm": 0.1012483462691307,
      "learning_rate": 3.6929949987809943e-06,
      "loss": 0.0104,
      "step": 1934340
    },
    {
      "epoch": 3.165622565673625,
      "grad_norm": 0.29073581099510193,
      "learning_rate": 3.692929106567477e-06,
      "loss": 0.0098,
      "step": 1934360
    },
    {
      "epoch": 3.1656552961122784,
      "grad_norm": 0.24876061081886292,
      "learning_rate": 3.69286321435396e-06,
      "loss": 0.016,
      "step": 1934380
    },
    {
      "epoch": 3.165688026550932,
      "grad_norm": 0.30897706747055054,
      "learning_rate": 3.692797322140443e-06,
      "loss": 0.0167,
      "step": 1934400
    },
    {
      "epoch": 3.165720756989585,
      "grad_norm": 0.19532038271427155,
      "learning_rate": 3.6927314299269257e-06,
      "loss": 0.0099,
      "step": 1934420
    },
    {
      "epoch": 3.1657534874282387,
      "grad_norm": 0.19740687310695648,
      "learning_rate": 3.6926655377134085e-06,
      "loss": 0.0134,
      "step": 1934440
    },
    {
      "epoch": 3.165786217866892,
      "grad_norm": 0.7566042542457581,
      "learning_rate": 3.692599645499891e-06,
      "loss": 0.0104,
      "step": 1934460
    },
    {
      "epoch": 3.165818948305545,
      "grad_norm": 0.51079922914505,
      "learning_rate": 3.6925337532863744e-06,
      "loss": 0.0076,
      "step": 1934480
    },
    {
      "epoch": 3.1658516787441986,
      "grad_norm": 0.14328676462173462,
      "learning_rate": 3.6924678610728575e-06,
      "loss": 0.0071,
      "step": 1934500
    },
    {
      "epoch": 3.1658844091828517,
      "grad_norm": 0.743432879447937,
      "learning_rate": 3.6924019688593403e-06,
      "loss": 0.0142,
      "step": 1934520
    },
    {
      "epoch": 3.1659171396215053,
      "grad_norm": 0.09649263322353363,
      "learning_rate": 3.6923360766458234e-06,
      "loss": 0.0093,
      "step": 1934540
    },
    {
      "epoch": 3.1659498700601585,
      "grad_norm": 0.865301251411438,
      "learning_rate": 3.692270184432306e-06,
      "loss": 0.0147,
      "step": 1934560
    },
    {
      "epoch": 3.165982600498812,
      "grad_norm": 0.6046203374862671,
      "learning_rate": 3.692204292218789e-06,
      "loss": 0.0101,
      "step": 1934580
    },
    {
      "epoch": 3.1660153309374652,
      "grad_norm": 0.3297710716724396,
      "learning_rate": 3.6921384000052717e-06,
      "loss": 0.0132,
      "step": 1934600
    },
    {
      "epoch": 3.1660480613761184,
      "grad_norm": 0.1232457086443901,
      "learning_rate": 3.692072507791755e-06,
      "loss": 0.011,
      "step": 1934620
    },
    {
      "epoch": 3.166080791814772,
      "grad_norm": 0.5344876050949097,
      "learning_rate": 3.6920066155782376e-06,
      "loss": 0.0091,
      "step": 1934640
    },
    {
      "epoch": 3.166113522253425,
      "grad_norm": 0.23994533717632294,
      "learning_rate": 3.6919407233647203e-06,
      "loss": 0.012,
      "step": 1934660
    },
    {
      "epoch": 3.1661462526920787,
      "grad_norm": 3.757244110107422,
      "learning_rate": 3.691874831151203e-06,
      "loss": 0.0091,
      "step": 1934680
    },
    {
      "epoch": 3.166178983130732,
      "grad_norm": 0.40167224407196045,
      "learning_rate": 3.6918089389376858e-06,
      "loss": 0.0106,
      "step": 1934700
    },
    {
      "epoch": 3.1662117135693855,
      "grad_norm": 0.194718599319458,
      "learning_rate": 3.691743046724169e-06,
      "loss": 0.0106,
      "step": 1934720
    },
    {
      "epoch": 3.1662444440080386,
      "grad_norm": 0.18846260011196136,
      "learning_rate": 3.6916771545106517e-06,
      "loss": 0.0145,
      "step": 1934740
    },
    {
      "epoch": 3.1662771744466918,
      "grad_norm": 0.30587175488471985,
      "learning_rate": 3.6916112622971344e-06,
      "loss": 0.0097,
      "step": 1934760
    },
    {
      "epoch": 3.1663099048853454,
      "grad_norm": 0.2220962792634964,
      "learning_rate": 3.691545370083617e-06,
      "loss": 0.0094,
      "step": 1934780
    },
    {
      "epoch": 3.1663426353239985,
      "grad_norm": 0.09447620809078217,
      "learning_rate": 3.6914794778701003e-06,
      "loss": 0.0084,
      "step": 1934800
    },
    {
      "epoch": 3.166375365762652,
      "grad_norm": 0.3003777861595154,
      "learning_rate": 3.691413585656583e-06,
      "loss": 0.0127,
      "step": 1934820
    },
    {
      "epoch": 3.1664080962013053,
      "grad_norm": 0.9601084589958191,
      "learning_rate": 3.691347693443066e-06,
      "loss": 0.0128,
      "step": 1934840
    },
    {
      "epoch": 3.166440826639959,
      "grad_norm": 0.21785850822925568,
      "learning_rate": 3.6912818012295494e-06,
      "loss": 0.0105,
      "step": 1934860
    },
    {
      "epoch": 3.166473557078612,
      "grad_norm": 0.33563339710235596,
      "learning_rate": 3.691215909016032e-06,
      "loss": 0.0088,
      "step": 1934880
    },
    {
      "epoch": 3.166506287517265,
      "grad_norm": 0.15573659539222717,
      "learning_rate": 3.691150016802515e-06,
      "loss": 0.0109,
      "step": 1934900
    },
    {
      "epoch": 3.1665390179559187,
      "grad_norm": 0.31954219937324524,
      "learning_rate": 3.6910841245889976e-06,
      "loss": 0.0085,
      "step": 1934920
    },
    {
      "epoch": 3.166571748394572,
      "grad_norm": 0.2570319175720215,
      "learning_rate": 3.6910182323754808e-06,
      "loss": 0.011,
      "step": 1934940
    },
    {
      "epoch": 3.1666044788332255,
      "grad_norm": 0.0811116173863411,
      "learning_rate": 3.6909523401619635e-06,
      "loss": 0.0083,
      "step": 1934960
    },
    {
      "epoch": 3.1666372092718786,
      "grad_norm": 0.43533000349998474,
      "learning_rate": 3.6908864479484463e-06,
      "loss": 0.0153,
      "step": 1934980
    },
    {
      "epoch": 3.1666699397105322,
      "grad_norm": 0.3292190134525299,
      "learning_rate": 3.690820555734929e-06,
      "loss": 0.0133,
      "step": 1935000
    },
    {
      "epoch": 3.1667026701491854,
      "grad_norm": 0.8202628493309021,
      "learning_rate": 3.690754663521412e-06,
      "loss": 0.0119,
      "step": 1935020
    },
    {
      "epoch": 3.1667354005878385,
      "grad_norm": 0.06704115122556686,
      "learning_rate": 3.690688771307895e-06,
      "loss": 0.0131,
      "step": 1935040
    },
    {
      "epoch": 3.166768131026492,
      "grad_norm": 0.3566510081291199,
      "learning_rate": 3.6906228790943776e-06,
      "loss": 0.0181,
      "step": 1935060
    },
    {
      "epoch": 3.1668008614651453,
      "grad_norm": 0.19784219563007355,
      "learning_rate": 3.6905569868808604e-06,
      "loss": 0.009,
      "step": 1935080
    },
    {
      "epoch": 3.166833591903799,
      "grad_norm": 0.3234696090221405,
      "learning_rate": 3.6904910946673435e-06,
      "loss": 0.0073,
      "step": 1935100
    },
    {
      "epoch": 3.166866322342452,
      "grad_norm": 0.15401026606559753,
      "learning_rate": 3.6904252024538263e-06,
      "loss": 0.0158,
      "step": 1935120
    },
    {
      "epoch": 3.166899052781105,
      "grad_norm": 0.36163875460624695,
      "learning_rate": 3.690359310240309e-06,
      "loss": 0.0104,
      "step": 1935140
    },
    {
      "epoch": 3.1669317832197588,
      "grad_norm": 0.12962371110916138,
      "learning_rate": 3.6902934180267918e-06,
      "loss": 0.011,
      "step": 1935160
    },
    {
      "epoch": 3.166964513658412,
      "grad_norm": 0.2928546965122223,
      "learning_rate": 3.6902275258132745e-06,
      "loss": 0.0137,
      "step": 1935180
    },
    {
      "epoch": 3.1669972440970655,
      "grad_norm": 0.18052498996257782,
      "learning_rate": 3.6901616335997577e-06,
      "loss": 0.0174,
      "step": 1935200
    },
    {
      "epoch": 3.1670299745357187,
      "grad_norm": 0.1586020141839981,
      "learning_rate": 3.690095741386241e-06,
      "loss": 0.0108,
      "step": 1935220
    },
    {
      "epoch": 3.1670627049743723,
      "grad_norm": 0.1505410224199295,
      "learning_rate": 3.6900298491727236e-06,
      "loss": 0.0075,
      "step": 1935240
    },
    {
      "epoch": 3.1670954354130254,
      "grad_norm": 0.3160995841026306,
      "learning_rate": 3.6899639569592067e-06,
      "loss": 0.0108,
      "step": 1935260
    },
    {
      "epoch": 3.1671281658516786,
      "grad_norm": 0.1297956258058548,
      "learning_rate": 3.6898980647456895e-06,
      "loss": 0.0138,
      "step": 1935280
    },
    {
      "epoch": 3.167160896290332,
      "grad_norm": 0.22806623578071594,
      "learning_rate": 3.689832172532172e-06,
      "loss": 0.0098,
      "step": 1935300
    },
    {
      "epoch": 3.1671936267289853,
      "grad_norm": 0.4041281044483185,
      "learning_rate": 3.689766280318655e-06,
      "loss": 0.0162,
      "step": 1935320
    },
    {
      "epoch": 3.167226357167639,
      "grad_norm": 0.21392199397087097,
      "learning_rate": 3.689700388105138e-06,
      "loss": 0.0139,
      "step": 1935340
    },
    {
      "epoch": 3.167259087606292,
      "grad_norm": 0.19655682146549225,
      "learning_rate": 3.689634495891621e-06,
      "loss": 0.0151,
      "step": 1935360
    },
    {
      "epoch": 3.1672918180449456,
      "grad_norm": 0.7028048634529114,
      "learning_rate": 3.6895686036781036e-06,
      "loss": 0.0081,
      "step": 1935380
    },
    {
      "epoch": 3.167324548483599,
      "grad_norm": 0.14598673582077026,
      "learning_rate": 3.6895027114645863e-06,
      "loss": 0.0109,
      "step": 1935400
    },
    {
      "epoch": 3.167357278922252,
      "grad_norm": 0.18229727447032928,
      "learning_rate": 3.6894368192510695e-06,
      "loss": 0.0077,
      "step": 1935420
    },
    {
      "epoch": 3.1673900093609055,
      "grad_norm": 0.18651355803012848,
      "learning_rate": 3.6893709270375522e-06,
      "loss": 0.0148,
      "step": 1935440
    },
    {
      "epoch": 3.1674227397995587,
      "grad_norm": 0.5586630702018738,
      "learning_rate": 3.689305034824035e-06,
      "loss": 0.0125,
      "step": 1935460
    },
    {
      "epoch": 3.1674554702382123,
      "grad_norm": 0.560519278049469,
      "learning_rate": 3.6892391426105177e-06,
      "loss": 0.0117,
      "step": 1935480
    },
    {
      "epoch": 3.1674882006768654,
      "grad_norm": 0.954534649848938,
      "learning_rate": 3.689173250397001e-06,
      "loss": 0.0126,
      "step": 1935500
    },
    {
      "epoch": 3.1675209311155186,
      "grad_norm": 0.308992862701416,
      "learning_rate": 3.6891073581834836e-06,
      "loss": 0.0102,
      "step": 1935520
    },
    {
      "epoch": 3.167553661554172,
      "grad_norm": 0.7383038997650146,
      "learning_rate": 3.6890414659699664e-06,
      "loss": 0.0119,
      "step": 1935540
    },
    {
      "epoch": 3.1675863919928253,
      "grad_norm": 0.055153731256723404,
      "learning_rate": 3.68897557375645e-06,
      "loss": 0.0121,
      "step": 1935560
    },
    {
      "epoch": 3.167619122431479,
      "grad_norm": 0.25225892663002014,
      "learning_rate": 3.6889096815429327e-06,
      "loss": 0.0137,
      "step": 1935580
    },
    {
      "epoch": 3.167651852870132,
      "grad_norm": 0.3283769488334656,
      "learning_rate": 3.6888437893294154e-06,
      "loss": 0.0116,
      "step": 1935600
    },
    {
      "epoch": 3.1676845833087857,
      "grad_norm": 0.3116358816623688,
      "learning_rate": 3.688777897115898e-06,
      "loss": 0.015,
      "step": 1935620
    },
    {
      "epoch": 3.167717313747439,
      "grad_norm": 0.7105507254600525,
      "learning_rate": 3.6887120049023813e-06,
      "loss": 0.0149,
      "step": 1935640
    },
    {
      "epoch": 3.167750044186092,
      "grad_norm": 0.26602426171302795,
      "learning_rate": 3.688646112688864e-06,
      "loss": 0.0102,
      "step": 1935660
    },
    {
      "epoch": 3.1677827746247456,
      "grad_norm": 0.2337339222431183,
      "learning_rate": 3.688580220475347e-06,
      "loss": 0.0159,
      "step": 1935680
    },
    {
      "epoch": 3.1678155050633987,
      "grad_norm": 0.1642068326473236,
      "learning_rate": 3.6885143282618295e-06,
      "loss": 0.0144,
      "step": 1935700
    },
    {
      "epoch": 3.1678482355020523,
      "grad_norm": 0.2720205783843994,
      "learning_rate": 3.6884484360483123e-06,
      "loss": 0.0125,
      "step": 1935720
    },
    {
      "epoch": 3.1678809659407055,
      "grad_norm": 0.27640801668167114,
      "learning_rate": 3.6883825438347954e-06,
      "loss": 0.0104,
      "step": 1935740
    },
    {
      "epoch": 3.167913696379359,
      "grad_norm": 0.22569118440151215,
      "learning_rate": 3.688316651621278e-06,
      "loss": 0.0083,
      "step": 1935760
    },
    {
      "epoch": 3.167946426818012,
      "grad_norm": 0.13299937546253204,
      "learning_rate": 3.688250759407761e-06,
      "loss": 0.0141,
      "step": 1935780
    },
    {
      "epoch": 3.1679791572566653,
      "grad_norm": 0.10454957187175751,
      "learning_rate": 3.6881848671942437e-06,
      "loss": 0.0086,
      "step": 1935800
    },
    {
      "epoch": 3.168011887695319,
      "grad_norm": 0.2229015976190567,
      "learning_rate": 3.688118974980727e-06,
      "loss": 0.0074,
      "step": 1935820
    },
    {
      "epoch": 3.168044618133972,
      "grad_norm": 0.33355799317359924,
      "learning_rate": 3.6880530827672096e-06,
      "loss": 0.0085,
      "step": 1935840
    },
    {
      "epoch": 3.1680773485726257,
      "grad_norm": 0.27517732977867126,
      "learning_rate": 3.6879871905536923e-06,
      "loss": 0.0102,
      "step": 1935860
    },
    {
      "epoch": 3.168110079011279,
      "grad_norm": 0.31116798520088196,
      "learning_rate": 3.687921298340175e-06,
      "loss": 0.0115,
      "step": 1935880
    },
    {
      "epoch": 3.1681428094499324,
      "grad_norm": 0.20005710422992706,
      "learning_rate": 3.6878554061266582e-06,
      "loss": 0.0138,
      "step": 1935900
    },
    {
      "epoch": 3.1681755398885856,
      "grad_norm": 0.12441757321357727,
      "learning_rate": 3.6877895139131414e-06,
      "loss": 0.0135,
      "step": 1935920
    },
    {
      "epoch": 3.1682082703272387,
      "grad_norm": 1.5592719316482544,
      "learning_rate": 3.687723621699624e-06,
      "loss": 0.0232,
      "step": 1935940
    },
    {
      "epoch": 3.1682410007658923,
      "grad_norm": 0.3819003701210022,
      "learning_rate": 3.6876577294861073e-06,
      "loss": 0.0155,
      "step": 1935960
    },
    {
      "epoch": 3.1682737312045455,
      "grad_norm": 0.6498056054115295,
      "learning_rate": 3.68759183727259e-06,
      "loss": 0.0149,
      "step": 1935980
    },
    {
      "epoch": 3.168306461643199,
      "grad_norm": 1.3459237813949585,
      "learning_rate": 3.6875259450590728e-06,
      "loss": 0.0146,
      "step": 1936000
    },
    {
      "epoch": 3.168339192081852,
      "grad_norm": 0.47354814410209656,
      "learning_rate": 3.6874600528455555e-06,
      "loss": 0.0069,
      "step": 1936020
    },
    {
      "epoch": 3.168371922520506,
      "grad_norm": 0.17938721179962158,
      "learning_rate": 3.6873941606320387e-06,
      "loss": 0.0134,
      "step": 1936040
    },
    {
      "epoch": 3.168404652959159,
      "grad_norm": 0.12805894017219543,
      "learning_rate": 3.6873282684185214e-06,
      "loss": 0.0146,
      "step": 1936060
    },
    {
      "epoch": 3.168437383397812,
      "grad_norm": 0.23754343390464783,
      "learning_rate": 3.687262376205004e-06,
      "loss": 0.0132,
      "step": 1936080
    },
    {
      "epoch": 3.1684701138364657,
      "grad_norm": 0.4544052481651306,
      "learning_rate": 3.687196483991487e-06,
      "loss": 0.0103,
      "step": 1936100
    },
    {
      "epoch": 3.168502844275119,
      "grad_norm": 0.25655561685562134,
      "learning_rate": 3.68713059177797e-06,
      "loss": 0.0138,
      "step": 1936120
    },
    {
      "epoch": 3.1685355747137725,
      "grad_norm": 0.19571895897388458,
      "learning_rate": 3.6870646995644528e-06,
      "loss": 0.0131,
      "step": 1936140
    },
    {
      "epoch": 3.1685683051524256,
      "grad_norm": 0.2594902217388153,
      "learning_rate": 3.6869988073509355e-06,
      "loss": 0.0081,
      "step": 1936160
    },
    {
      "epoch": 3.168601035591079,
      "grad_norm": 0.10246685892343521,
      "learning_rate": 3.6869329151374183e-06,
      "loss": 0.0077,
      "step": 1936180
    },
    {
      "epoch": 3.1686337660297323,
      "grad_norm": 0.2618553340435028,
      "learning_rate": 3.686867022923901e-06,
      "loss": 0.0129,
      "step": 1936200
    },
    {
      "epoch": 3.1686664964683855,
      "grad_norm": 0.2874801456928253,
      "learning_rate": 3.686801130710384e-06,
      "loss": 0.0117,
      "step": 1936220
    },
    {
      "epoch": 3.168699226907039,
      "grad_norm": 0.5201463103294373,
      "learning_rate": 3.686735238496867e-06,
      "loss": 0.0111,
      "step": 1936240
    },
    {
      "epoch": 3.1687319573456922,
      "grad_norm": 0.4056144654750824,
      "learning_rate": 3.68666934628335e-06,
      "loss": 0.0142,
      "step": 1936260
    },
    {
      "epoch": 3.168764687784346,
      "grad_norm": 0.15616276860237122,
      "learning_rate": 3.6866034540698332e-06,
      "loss": 0.0086,
      "step": 1936280
    },
    {
      "epoch": 3.168797418222999,
      "grad_norm": 0.3889528214931488,
      "learning_rate": 3.686537561856316e-06,
      "loss": 0.0172,
      "step": 1936300
    },
    {
      "epoch": 3.1688301486616526,
      "grad_norm": 0.25012314319610596,
      "learning_rate": 3.6864716696427987e-06,
      "loss": 0.0141,
      "step": 1936320
    },
    {
      "epoch": 3.1688628791003057,
      "grad_norm": 0.41459977626800537,
      "learning_rate": 3.6864057774292815e-06,
      "loss": 0.0144,
      "step": 1936340
    },
    {
      "epoch": 3.168895609538959,
      "grad_norm": 0.34961801767349243,
      "learning_rate": 3.6863398852157646e-06,
      "loss": 0.0128,
      "step": 1936360
    },
    {
      "epoch": 3.1689283399776125,
      "grad_norm": 0.08878042548894882,
      "learning_rate": 3.6862739930022474e-06,
      "loss": 0.013,
      "step": 1936380
    },
    {
      "epoch": 3.1689610704162656,
      "grad_norm": 0.2232833206653595,
      "learning_rate": 3.68620810078873e-06,
      "loss": 0.0084,
      "step": 1936400
    },
    {
      "epoch": 3.168993800854919,
      "grad_norm": 0.08524385094642639,
      "learning_rate": 3.686142208575213e-06,
      "loss": 0.0107,
      "step": 1936420
    },
    {
      "epoch": 3.1690265312935724,
      "grad_norm": 0.5741367936134338,
      "learning_rate": 3.686076316361696e-06,
      "loss": 0.0091,
      "step": 1936440
    },
    {
      "epoch": 3.169059261732226,
      "grad_norm": 0.6238442659378052,
      "learning_rate": 3.6860104241481787e-06,
      "loss": 0.0126,
      "step": 1936460
    },
    {
      "epoch": 3.169091992170879,
      "grad_norm": 0.36049649119377136,
      "learning_rate": 3.6859445319346615e-06,
      "loss": 0.0109,
      "step": 1936480
    },
    {
      "epoch": 3.1691247226095323,
      "grad_norm": 0.3034583628177643,
      "learning_rate": 3.6858786397211442e-06,
      "loss": 0.0104,
      "step": 1936500
    },
    {
      "epoch": 3.169157453048186,
      "grad_norm": 0.07247160375118256,
      "learning_rate": 3.6858127475076274e-06,
      "loss": 0.0126,
      "step": 1936520
    },
    {
      "epoch": 3.169190183486839,
      "grad_norm": 0.34939420223236084,
      "learning_rate": 3.68574685529411e-06,
      "loss": 0.0114,
      "step": 1936540
    },
    {
      "epoch": 3.1692229139254926,
      "grad_norm": 0.17111603915691376,
      "learning_rate": 3.685680963080593e-06,
      "loss": 0.01,
      "step": 1936560
    },
    {
      "epoch": 3.1692556443641458,
      "grad_norm": 0.18552662432193756,
      "learning_rate": 3.6856150708670756e-06,
      "loss": 0.0113,
      "step": 1936580
    },
    {
      "epoch": 3.169288374802799,
      "grad_norm": 2.2337377071380615,
      "learning_rate": 3.6855491786535583e-06,
      "loss": 0.0128,
      "step": 1936600
    },
    {
      "epoch": 3.1693211052414525,
      "grad_norm": 0.4061930179595947,
      "learning_rate": 3.685483286440042e-06,
      "loss": 0.0104,
      "step": 1936620
    },
    {
      "epoch": 3.1693538356801056,
      "grad_norm": 0.12279658764600754,
      "learning_rate": 3.6854173942265247e-06,
      "loss": 0.0101,
      "step": 1936640
    },
    {
      "epoch": 3.1693865661187592,
      "grad_norm": 0.2501491904258728,
      "learning_rate": 3.685351502013008e-06,
      "loss": 0.0084,
      "step": 1936660
    },
    {
      "epoch": 3.1694192965574124,
      "grad_norm": 0.22229339182376862,
      "learning_rate": 3.6852856097994906e-06,
      "loss": 0.0079,
      "step": 1936680
    },
    {
      "epoch": 3.169452026996066,
      "grad_norm": 0.26139891147613525,
      "learning_rate": 3.6852197175859733e-06,
      "loss": 0.0134,
      "step": 1936700
    },
    {
      "epoch": 3.169484757434719,
      "grad_norm": 0.11177562177181244,
      "learning_rate": 3.685153825372456e-06,
      "loss": 0.0089,
      "step": 1936720
    },
    {
      "epoch": 3.1695174878733723,
      "grad_norm": 0.35948696732521057,
      "learning_rate": 3.685087933158939e-06,
      "loss": 0.0099,
      "step": 1936740
    },
    {
      "epoch": 3.169550218312026,
      "grad_norm": 0.46872878074645996,
      "learning_rate": 3.685022040945422e-06,
      "loss": 0.0078,
      "step": 1936760
    },
    {
      "epoch": 3.169582948750679,
      "grad_norm": 0.16435423493385315,
      "learning_rate": 3.6849561487319047e-06,
      "loss": 0.0157,
      "step": 1936780
    },
    {
      "epoch": 3.1696156791893326,
      "grad_norm": 0.26614946126937866,
      "learning_rate": 3.6848902565183874e-06,
      "loss": 0.0077,
      "step": 1936800
    },
    {
      "epoch": 3.1696484096279858,
      "grad_norm": 0.12241919338703156,
      "learning_rate": 3.68482436430487e-06,
      "loss": 0.0139,
      "step": 1936820
    },
    {
      "epoch": 3.1696811400666394,
      "grad_norm": 0.39614471793174744,
      "learning_rate": 3.6847584720913533e-06,
      "loss": 0.01,
      "step": 1936840
    },
    {
      "epoch": 3.1697138705052925,
      "grad_norm": 0.19857603311538696,
      "learning_rate": 3.684692579877836e-06,
      "loss": 0.0128,
      "step": 1936860
    },
    {
      "epoch": 3.1697466009439457,
      "grad_norm": 0.1065920889377594,
      "learning_rate": 3.684626687664319e-06,
      "loss": 0.0109,
      "step": 1936880
    },
    {
      "epoch": 3.1697793313825993,
      "grad_norm": 0.2587191164493561,
      "learning_rate": 3.6845607954508016e-06,
      "loss": 0.0108,
      "step": 1936900
    },
    {
      "epoch": 3.1698120618212524,
      "grad_norm": 0.23009085655212402,
      "learning_rate": 3.6844949032372847e-06,
      "loss": 0.0105,
      "step": 1936920
    },
    {
      "epoch": 3.169844792259906,
      "grad_norm": 0.5115227699279785,
      "learning_rate": 3.6844290110237675e-06,
      "loss": 0.0131,
      "step": 1936940
    },
    {
      "epoch": 3.169877522698559,
      "grad_norm": 0.09671347588300705,
      "learning_rate": 3.68436311881025e-06,
      "loss": 0.0138,
      "step": 1936960
    },
    {
      "epoch": 3.1699102531372128,
      "grad_norm": 0.24346482753753662,
      "learning_rate": 3.6842972265967338e-06,
      "loss": 0.0061,
      "step": 1936980
    },
    {
      "epoch": 3.169942983575866,
      "grad_norm": 0.35561227798461914,
      "learning_rate": 3.6842313343832165e-06,
      "loss": 0.0076,
      "step": 1937000
    },
    {
      "epoch": 3.169975714014519,
      "grad_norm": 0.09291177242994308,
      "learning_rate": 3.6841654421696993e-06,
      "loss": 0.0075,
      "step": 1937020
    },
    {
      "epoch": 3.1700084444531726,
      "grad_norm": 0.1009448990225792,
      "learning_rate": 3.684099549956182e-06,
      "loss": 0.0097,
      "step": 1937040
    },
    {
      "epoch": 3.170041174891826,
      "grad_norm": 0.1921994537115097,
      "learning_rate": 3.684033657742665e-06,
      "loss": 0.0115,
      "step": 1937060
    },
    {
      "epoch": 3.1700739053304794,
      "grad_norm": 0.25760072469711304,
      "learning_rate": 3.683967765529148e-06,
      "loss": 0.0097,
      "step": 1937080
    },
    {
      "epoch": 3.1701066357691325,
      "grad_norm": 0.3750964105129242,
      "learning_rate": 3.6839018733156306e-06,
      "loss": 0.0113,
      "step": 1937100
    },
    {
      "epoch": 3.1701393662077857,
      "grad_norm": 0.27033838629722595,
      "learning_rate": 3.6838359811021134e-06,
      "loss": 0.0107,
      "step": 1937120
    },
    {
      "epoch": 3.1701720966464393,
      "grad_norm": 0.406339555978775,
      "learning_rate": 3.683770088888596e-06,
      "loss": 0.012,
      "step": 1937140
    },
    {
      "epoch": 3.1702048270850924,
      "grad_norm": 0.21987128257751465,
      "learning_rate": 3.6837041966750793e-06,
      "loss": 0.0083,
      "step": 1937160
    },
    {
      "epoch": 3.170237557523746,
      "grad_norm": 0.4416017532348633,
      "learning_rate": 3.683638304461562e-06,
      "loss": 0.0119,
      "step": 1937180
    },
    {
      "epoch": 3.170270287962399,
      "grad_norm": 0.4511902332305908,
      "learning_rate": 3.6835724122480448e-06,
      "loss": 0.0148,
      "step": 1937200
    },
    {
      "epoch": 3.1703030184010528,
      "grad_norm": 0.23472300171852112,
      "learning_rate": 3.6835065200345275e-06,
      "loss": 0.0118,
      "step": 1937220
    },
    {
      "epoch": 3.170335748839706,
      "grad_norm": 0.20844024419784546,
      "learning_rate": 3.6834406278210107e-06,
      "loss": 0.0109,
      "step": 1937240
    },
    {
      "epoch": 3.170368479278359,
      "grad_norm": 0.678122878074646,
      "learning_rate": 3.6833747356074934e-06,
      "loss": 0.0178,
      "step": 1937260
    },
    {
      "epoch": 3.1704012097170127,
      "grad_norm": 1.6072168350219727,
      "learning_rate": 3.683308843393976e-06,
      "loss": 0.0115,
      "step": 1937280
    },
    {
      "epoch": 3.170433940155666,
      "grad_norm": 0.1085609495639801,
      "learning_rate": 3.683242951180459e-06,
      "loss": 0.0102,
      "step": 1937300
    },
    {
      "epoch": 3.1704666705943194,
      "grad_norm": 0.8990331292152405,
      "learning_rate": 3.6831770589669425e-06,
      "loss": 0.0145,
      "step": 1937320
    },
    {
      "epoch": 3.1704994010329726,
      "grad_norm": 0.24448587000370026,
      "learning_rate": 3.6831111667534252e-06,
      "loss": 0.014,
      "step": 1937340
    },
    {
      "epoch": 3.170532131471626,
      "grad_norm": 0.2096518725156784,
      "learning_rate": 3.683045274539908e-06,
      "loss": 0.0128,
      "step": 1937360
    },
    {
      "epoch": 3.1705648619102793,
      "grad_norm": 0.4259292185306549,
      "learning_rate": 3.682979382326391e-06,
      "loss": 0.0122,
      "step": 1937380
    },
    {
      "epoch": 3.1705975923489325,
      "grad_norm": 0.060557398945093155,
      "learning_rate": 3.682913490112874e-06,
      "loss": 0.0162,
      "step": 1937400
    },
    {
      "epoch": 3.170630322787586,
      "grad_norm": 0.5126145482063293,
      "learning_rate": 3.6828475978993566e-06,
      "loss": 0.0125,
      "step": 1937420
    },
    {
      "epoch": 3.170663053226239,
      "grad_norm": 0.8856601715087891,
      "learning_rate": 3.6827817056858393e-06,
      "loss": 0.0111,
      "step": 1937440
    },
    {
      "epoch": 3.170695783664893,
      "grad_norm": 0.47437039017677307,
      "learning_rate": 3.6827158134723225e-06,
      "loss": 0.0138,
      "step": 1937460
    },
    {
      "epoch": 3.170728514103546,
      "grad_norm": 0.2701050341129303,
      "learning_rate": 3.6826499212588052e-06,
      "loss": 0.0155,
      "step": 1937480
    },
    {
      "epoch": 3.1707612445421995,
      "grad_norm": 0.1869172304868698,
      "learning_rate": 3.682584029045288e-06,
      "loss": 0.009,
      "step": 1937500
    },
    {
      "epoch": 3.1707939749808527,
      "grad_norm": 0.4317227900028229,
      "learning_rate": 3.6825181368317707e-06,
      "loss": 0.0113,
      "step": 1937520
    },
    {
      "epoch": 3.170826705419506,
      "grad_norm": 0.14938773214817047,
      "learning_rate": 3.682452244618254e-06,
      "loss": 0.013,
      "step": 1937540
    },
    {
      "epoch": 3.1708594358581594,
      "grad_norm": 0.1336122304201126,
      "learning_rate": 3.6823863524047366e-06,
      "loss": 0.0153,
      "step": 1937560
    },
    {
      "epoch": 3.1708921662968126,
      "grad_norm": 0.48282530903816223,
      "learning_rate": 3.6823204601912194e-06,
      "loss": 0.011,
      "step": 1937580
    },
    {
      "epoch": 3.170924896735466,
      "grad_norm": 0.5710877776145935,
      "learning_rate": 3.682254567977702e-06,
      "loss": 0.0138,
      "step": 1937600
    },
    {
      "epoch": 3.1709576271741193,
      "grad_norm": 0.39247044920921326,
      "learning_rate": 3.682188675764185e-06,
      "loss": 0.0114,
      "step": 1937620
    },
    {
      "epoch": 3.170990357612773,
      "grad_norm": 0.283992737531662,
      "learning_rate": 3.682122783550668e-06,
      "loss": 0.012,
      "step": 1937640
    },
    {
      "epoch": 3.171023088051426,
      "grad_norm": 0.14837203919887543,
      "learning_rate": 3.6820568913371507e-06,
      "loss": 0.0073,
      "step": 1937660
    },
    {
      "epoch": 3.1710558184900792,
      "grad_norm": 0.2223147749900818,
      "learning_rate": 3.681990999123634e-06,
      "loss": 0.0105,
      "step": 1937680
    },
    {
      "epoch": 3.171088548928733,
      "grad_norm": 0.418836385011673,
      "learning_rate": 3.681925106910117e-06,
      "loss": 0.012,
      "step": 1937700
    },
    {
      "epoch": 3.171121279367386,
      "grad_norm": 0.3213772177696228,
      "learning_rate": 3.6818592146966e-06,
      "loss": 0.0106,
      "step": 1937720
    },
    {
      "epoch": 3.1711540098060396,
      "grad_norm": 0.2934841513633728,
      "learning_rate": 3.6817933224830826e-06,
      "loss": 0.0247,
      "step": 1937740
    },
    {
      "epoch": 3.1711867402446927,
      "grad_norm": 0.15130120515823364,
      "learning_rate": 3.6817274302695653e-06,
      "loss": 0.0121,
      "step": 1937760
    },
    {
      "epoch": 3.1712194706833463,
      "grad_norm": 0.6740331649780273,
      "learning_rate": 3.6816615380560485e-06,
      "loss": 0.009,
      "step": 1937780
    },
    {
      "epoch": 3.1712522011219995,
      "grad_norm": 0.383186012506485,
      "learning_rate": 3.681595645842531e-06,
      "loss": 0.0136,
      "step": 1937800
    },
    {
      "epoch": 3.1712849315606526,
      "grad_norm": 0.5432948470115662,
      "learning_rate": 3.681529753629014e-06,
      "loss": 0.0174,
      "step": 1937820
    },
    {
      "epoch": 3.171317661999306,
      "grad_norm": 0.16846486926078796,
      "learning_rate": 3.6814638614154967e-06,
      "loss": 0.0089,
      "step": 1937840
    },
    {
      "epoch": 3.1713503924379594,
      "grad_norm": 0.06375047564506531,
      "learning_rate": 3.68139796920198e-06,
      "loss": 0.01,
      "step": 1937860
    },
    {
      "epoch": 3.171383122876613,
      "grad_norm": 0.17541684210300446,
      "learning_rate": 3.6813320769884626e-06,
      "loss": 0.0114,
      "step": 1937880
    },
    {
      "epoch": 3.171415853315266,
      "grad_norm": 0.5647204518318176,
      "learning_rate": 3.6812661847749453e-06,
      "loss": 0.0097,
      "step": 1937900
    },
    {
      "epoch": 3.1714485837539197,
      "grad_norm": 0.18630918860435486,
      "learning_rate": 3.681200292561428e-06,
      "loss": 0.0136,
      "step": 1937920
    },
    {
      "epoch": 3.171481314192573,
      "grad_norm": 0.5301377773284912,
      "learning_rate": 3.6811344003479112e-06,
      "loss": 0.014,
      "step": 1937940
    },
    {
      "epoch": 3.171514044631226,
      "grad_norm": 0.07101219147443771,
      "learning_rate": 3.681068508134394e-06,
      "loss": 0.0094,
      "step": 1937960
    },
    {
      "epoch": 3.1715467750698796,
      "grad_norm": 0.0908847376704216,
      "learning_rate": 3.6810026159208767e-06,
      "loss": 0.0131,
      "step": 1937980
    },
    {
      "epoch": 3.1715795055085327,
      "grad_norm": 0.05968696251511574,
      "learning_rate": 3.6809367237073594e-06,
      "loss": 0.0099,
      "step": 1938000
    },
    {
      "epoch": 3.1716122359471863,
      "grad_norm": 0.30031073093414307,
      "learning_rate": 3.680870831493843e-06,
      "loss": 0.017,
      "step": 1938020
    },
    {
      "epoch": 3.1716449663858395,
      "grad_norm": 0.33425748348236084,
      "learning_rate": 3.6808049392803258e-06,
      "loss": 0.0092,
      "step": 1938040
    },
    {
      "epoch": 3.171677696824493,
      "grad_norm": 0.192418172955513,
      "learning_rate": 3.6807390470668085e-06,
      "loss": 0.0104,
      "step": 1938060
    },
    {
      "epoch": 3.1717104272631462,
      "grad_norm": 0.10161035507917404,
      "learning_rate": 3.6806731548532917e-06,
      "loss": 0.0146,
      "step": 1938080
    },
    {
      "epoch": 3.1717431577017994,
      "grad_norm": 0.5513771772384644,
      "learning_rate": 3.6806072626397744e-06,
      "loss": 0.0102,
      "step": 1938100
    },
    {
      "epoch": 3.171775888140453,
      "grad_norm": 0.0973697155714035,
      "learning_rate": 3.680541370426257e-06,
      "loss": 0.0093,
      "step": 1938120
    },
    {
      "epoch": 3.171808618579106,
      "grad_norm": 0.6933717131614685,
      "learning_rate": 3.68047547821274e-06,
      "loss": 0.0128,
      "step": 1938140
    },
    {
      "epoch": 3.1718413490177597,
      "grad_norm": 0.37862256169319153,
      "learning_rate": 3.6804095859992226e-06,
      "loss": 0.0149,
      "step": 1938160
    },
    {
      "epoch": 3.171874079456413,
      "grad_norm": 1.77536940574646,
      "learning_rate": 3.680343693785706e-06,
      "loss": 0.0127,
      "step": 1938180
    },
    {
      "epoch": 3.171906809895066,
      "grad_norm": 0.42552968859672546,
      "learning_rate": 3.6802778015721885e-06,
      "loss": 0.0132,
      "step": 1938200
    },
    {
      "epoch": 3.1719395403337196,
      "grad_norm": 1.0596848726272583,
      "learning_rate": 3.6802119093586713e-06,
      "loss": 0.0148,
      "step": 1938220
    },
    {
      "epoch": 3.1719722707723728,
      "grad_norm": 0.48392587900161743,
      "learning_rate": 3.680146017145154e-06,
      "loss": 0.0158,
      "step": 1938240
    },
    {
      "epoch": 3.1720050012110264,
      "grad_norm": 0.26212745904922485,
      "learning_rate": 3.680080124931637e-06,
      "loss": 0.0105,
      "step": 1938260
    },
    {
      "epoch": 3.1720377316496795,
      "grad_norm": 0.566120982170105,
      "learning_rate": 3.68001423271812e-06,
      "loss": 0.01,
      "step": 1938280
    },
    {
      "epoch": 3.172070462088333,
      "grad_norm": 0.40496358275413513,
      "learning_rate": 3.6799483405046027e-06,
      "loss": 0.0079,
      "step": 1938300
    },
    {
      "epoch": 3.1721031925269862,
      "grad_norm": 0.24650858342647552,
      "learning_rate": 3.6798824482910854e-06,
      "loss": 0.0092,
      "step": 1938320
    },
    {
      "epoch": 3.1721359229656394,
      "grad_norm": 0.4418483376502991,
      "learning_rate": 3.6798165560775686e-06,
      "loss": 0.0099,
      "step": 1938340
    },
    {
      "epoch": 3.172168653404293,
      "grad_norm": 0.5003127455711365,
      "learning_rate": 3.6797506638640513e-06,
      "loss": 0.0098,
      "step": 1938360
    },
    {
      "epoch": 3.172201383842946,
      "grad_norm": 0.21814098954200745,
      "learning_rate": 3.6796847716505345e-06,
      "loss": 0.0121,
      "step": 1938380
    },
    {
      "epoch": 3.1722341142815997,
      "grad_norm": 0.4503324329853058,
      "learning_rate": 3.6796188794370176e-06,
      "loss": 0.01,
      "step": 1938400
    },
    {
      "epoch": 3.172266844720253,
      "grad_norm": 0.1170518770813942,
      "learning_rate": 3.6795529872235004e-06,
      "loss": 0.0122,
      "step": 1938420
    },
    {
      "epoch": 3.1722995751589065,
      "grad_norm": 0.6899383664131165,
      "learning_rate": 3.679487095009983e-06,
      "loss": 0.0135,
      "step": 1938440
    },
    {
      "epoch": 3.1723323055975596,
      "grad_norm": 0.3286570906639099,
      "learning_rate": 3.679421202796466e-06,
      "loss": 0.0105,
      "step": 1938460
    },
    {
      "epoch": 3.172365036036213,
      "grad_norm": 0.9606730341911316,
      "learning_rate": 3.679355310582949e-06,
      "loss": 0.0103,
      "step": 1938480
    },
    {
      "epoch": 3.1723977664748664,
      "grad_norm": 0.1750364750623703,
      "learning_rate": 3.6792894183694317e-06,
      "loss": 0.0116,
      "step": 1938500
    },
    {
      "epoch": 3.1724304969135195,
      "grad_norm": 0.7002624273300171,
      "learning_rate": 3.6792235261559145e-06,
      "loss": 0.0084,
      "step": 1938520
    },
    {
      "epoch": 3.172463227352173,
      "grad_norm": 0.20320188999176025,
      "learning_rate": 3.6791576339423972e-06,
      "loss": 0.0091,
      "step": 1938540
    },
    {
      "epoch": 3.1724959577908263,
      "grad_norm": 0.179612398147583,
      "learning_rate": 3.67909174172888e-06,
      "loss": 0.0102,
      "step": 1938560
    },
    {
      "epoch": 3.1725286882294794,
      "grad_norm": 0.15707699954509735,
      "learning_rate": 3.679025849515363e-06,
      "loss": 0.0106,
      "step": 1938580
    },
    {
      "epoch": 3.172561418668133,
      "grad_norm": 0.31088247895240784,
      "learning_rate": 3.678959957301846e-06,
      "loss": 0.0152,
      "step": 1938600
    },
    {
      "epoch": 3.172594149106786,
      "grad_norm": 0.733748733997345,
      "learning_rate": 3.6788940650883286e-06,
      "loss": 0.0168,
      "step": 1938620
    },
    {
      "epoch": 3.1726268795454398,
      "grad_norm": 0.6534844040870667,
      "learning_rate": 3.6788281728748113e-06,
      "loss": 0.0161,
      "step": 1938640
    },
    {
      "epoch": 3.172659609984093,
      "grad_norm": 0.15103381872177124,
      "learning_rate": 3.6787622806612945e-06,
      "loss": 0.0097,
      "step": 1938660
    },
    {
      "epoch": 3.1726923404227465,
      "grad_norm": 0.36553001403808594,
      "learning_rate": 3.6786963884477773e-06,
      "loss": 0.0162,
      "step": 1938680
    },
    {
      "epoch": 3.1727250708613997,
      "grad_norm": 0.3554416596889496,
      "learning_rate": 3.67863049623426e-06,
      "loss": 0.0159,
      "step": 1938700
    },
    {
      "epoch": 3.172757801300053,
      "grad_norm": 0.23804403841495514,
      "learning_rate": 3.6785646040207436e-06,
      "loss": 0.0151,
      "step": 1938720
    },
    {
      "epoch": 3.1727905317387064,
      "grad_norm": 0.3054487109184265,
      "learning_rate": 3.6784987118072263e-06,
      "loss": 0.0106,
      "step": 1938740
    },
    {
      "epoch": 3.1728232621773595,
      "grad_norm": 0.3732307553291321,
      "learning_rate": 3.678432819593709e-06,
      "loss": 0.0163,
      "step": 1938760
    },
    {
      "epoch": 3.172855992616013,
      "grad_norm": 0.10198052227497101,
      "learning_rate": 3.678366927380192e-06,
      "loss": 0.0091,
      "step": 1938780
    },
    {
      "epoch": 3.1728887230546663,
      "grad_norm": 0.3108413517475128,
      "learning_rate": 3.678301035166675e-06,
      "loss": 0.014,
      "step": 1938800
    },
    {
      "epoch": 3.17292145349332,
      "grad_norm": 0.525497317314148,
      "learning_rate": 3.6782351429531577e-06,
      "loss": 0.0147,
      "step": 1938820
    },
    {
      "epoch": 3.172954183931973,
      "grad_norm": 0.6162603497505188,
      "learning_rate": 3.6781692507396404e-06,
      "loss": 0.0131,
      "step": 1938840
    },
    {
      "epoch": 3.172986914370626,
      "grad_norm": 0.17994150519371033,
      "learning_rate": 3.678103358526123e-06,
      "loss": 0.0074,
      "step": 1938860
    },
    {
      "epoch": 3.17301964480928,
      "grad_norm": 0.7246402502059937,
      "learning_rate": 3.6780374663126063e-06,
      "loss": 0.0174,
      "step": 1938880
    },
    {
      "epoch": 3.173052375247933,
      "grad_norm": 0.10267367213964462,
      "learning_rate": 3.677971574099089e-06,
      "loss": 0.0057,
      "step": 1938900
    },
    {
      "epoch": 3.1730851056865865,
      "grad_norm": 0.4379185140132904,
      "learning_rate": 3.677905681885572e-06,
      "loss": 0.0133,
      "step": 1938920
    },
    {
      "epoch": 3.1731178361252397,
      "grad_norm": 0.21975034475326538,
      "learning_rate": 3.6778397896720546e-06,
      "loss": 0.0088,
      "step": 1938940
    },
    {
      "epoch": 3.1731505665638933,
      "grad_norm": 0.2800863981246948,
      "learning_rate": 3.6777738974585377e-06,
      "loss": 0.012,
      "step": 1938960
    },
    {
      "epoch": 3.1731832970025464,
      "grad_norm": 0.3520771861076355,
      "learning_rate": 3.6777080052450205e-06,
      "loss": 0.0136,
      "step": 1938980
    },
    {
      "epoch": 3.1732160274411996,
      "grad_norm": 0.1611834168434143,
      "learning_rate": 3.677642113031503e-06,
      "loss": 0.0135,
      "step": 1939000
    },
    {
      "epoch": 3.173248757879853,
      "grad_norm": 0.23607027530670166,
      "learning_rate": 3.677576220817986e-06,
      "loss": 0.0125,
      "step": 1939020
    },
    {
      "epoch": 3.1732814883185063,
      "grad_norm": 0.21810103952884674,
      "learning_rate": 3.6775103286044687e-06,
      "loss": 0.0106,
      "step": 1939040
    },
    {
      "epoch": 3.17331421875716,
      "grad_norm": 0.1844417303800583,
      "learning_rate": 3.677444436390952e-06,
      "loss": 0.0122,
      "step": 1939060
    },
    {
      "epoch": 3.173346949195813,
      "grad_norm": 0.15961599349975586,
      "learning_rate": 3.677378544177435e-06,
      "loss": 0.0069,
      "step": 1939080
    },
    {
      "epoch": 3.1733796796344667,
      "grad_norm": 0.10359826683998108,
      "learning_rate": 3.6773126519639177e-06,
      "loss": 0.009,
      "step": 1939100
    },
    {
      "epoch": 3.17341241007312,
      "grad_norm": 0.5100622177124023,
      "learning_rate": 3.677246759750401e-06,
      "loss": 0.0182,
      "step": 1939120
    },
    {
      "epoch": 3.173445140511773,
      "grad_norm": 0.32735347747802734,
      "learning_rate": 3.6771808675368837e-06,
      "loss": 0.0138,
      "step": 1939140
    },
    {
      "epoch": 3.1734778709504265,
      "grad_norm": 1.2703770399093628,
      "learning_rate": 3.6771149753233664e-06,
      "loss": 0.0125,
      "step": 1939160
    },
    {
      "epoch": 3.1735106013890797,
      "grad_norm": 0.474926620721817,
      "learning_rate": 3.677049083109849e-06,
      "loss": 0.0134,
      "step": 1939180
    },
    {
      "epoch": 3.1735433318277333,
      "grad_norm": 0.5324528217315674,
      "learning_rate": 3.6769831908963323e-06,
      "loss": 0.0126,
      "step": 1939200
    },
    {
      "epoch": 3.1735760622663864,
      "grad_norm": 0.29455775022506714,
      "learning_rate": 3.676917298682815e-06,
      "loss": 0.0097,
      "step": 1939220
    },
    {
      "epoch": 3.17360879270504,
      "grad_norm": 0.5507360696792603,
      "learning_rate": 3.6768514064692978e-06,
      "loss": 0.0109,
      "step": 1939240
    },
    {
      "epoch": 3.173641523143693,
      "grad_norm": 0.41463571786880493,
      "learning_rate": 3.6767855142557805e-06,
      "loss": 0.0093,
      "step": 1939260
    },
    {
      "epoch": 3.1736742535823463,
      "grad_norm": 0.07179373502731323,
      "learning_rate": 3.6767196220422637e-06,
      "loss": 0.0086,
      "step": 1939280
    },
    {
      "epoch": 3.173706984021,
      "grad_norm": 0.6936435699462891,
      "learning_rate": 3.6766537298287464e-06,
      "loss": 0.0138,
      "step": 1939300
    },
    {
      "epoch": 3.173739714459653,
      "grad_norm": 0.3726249933242798,
      "learning_rate": 3.676587837615229e-06,
      "loss": 0.0147,
      "step": 1939320
    },
    {
      "epoch": 3.1737724448983067,
      "grad_norm": 0.2711639106273651,
      "learning_rate": 3.676521945401712e-06,
      "loss": 0.0136,
      "step": 1939340
    },
    {
      "epoch": 3.17380517533696,
      "grad_norm": 0.12310363352298737,
      "learning_rate": 3.676456053188195e-06,
      "loss": 0.0058,
      "step": 1939360
    },
    {
      "epoch": 3.1738379057756134,
      "grad_norm": 0.20140883326530457,
      "learning_rate": 3.676390160974678e-06,
      "loss": 0.0171,
      "step": 1939380
    },
    {
      "epoch": 3.1738706362142666,
      "grad_norm": 0.13187864422798157,
      "learning_rate": 3.6763242687611605e-06,
      "loss": 0.012,
      "step": 1939400
    },
    {
      "epoch": 3.1739033666529197,
      "grad_norm": 0.17544208467006683,
      "learning_rate": 3.6762583765476433e-06,
      "loss": 0.0126,
      "step": 1939420
    },
    {
      "epoch": 3.1739360970915733,
      "grad_norm": 0.5006489753723145,
      "learning_rate": 3.676192484334127e-06,
      "loss": 0.0137,
      "step": 1939440
    },
    {
      "epoch": 3.1739688275302265,
      "grad_norm": 0.4644469618797302,
      "learning_rate": 3.6761265921206096e-06,
      "loss": 0.0141,
      "step": 1939460
    },
    {
      "epoch": 3.17400155796888,
      "grad_norm": 0.38829728960990906,
      "learning_rate": 3.6760606999070923e-06,
      "loss": 0.0113,
      "step": 1939480
    },
    {
      "epoch": 3.174034288407533,
      "grad_norm": 0.32485270500183105,
      "learning_rate": 3.6759948076935755e-06,
      "loss": 0.0109,
      "step": 1939500
    },
    {
      "epoch": 3.174067018846187,
      "grad_norm": 0.3456536829471588,
      "learning_rate": 3.6759289154800582e-06,
      "loss": 0.0101,
      "step": 1939520
    },
    {
      "epoch": 3.17409974928484,
      "grad_norm": 0.32301849126815796,
      "learning_rate": 3.675863023266541e-06,
      "loss": 0.014,
      "step": 1939540
    },
    {
      "epoch": 3.174132479723493,
      "grad_norm": 0.08332044631242752,
      "learning_rate": 3.6757971310530237e-06,
      "loss": 0.0103,
      "step": 1939560
    },
    {
      "epoch": 3.1741652101621467,
      "grad_norm": 0.5531101822853088,
      "learning_rate": 3.6757312388395065e-06,
      "loss": 0.0137,
      "step": 1939580
    },
    {
      "epoch": 3.1741979406008,
      "grad_norm": 0.24268104135990143,
      "learning_rate": 3.6756653466259896e-06,
      "loss": 0.0115,
      "step": 1939600
    },
    {
      "epoch": 3.1742306710394534,
      "grad_norm": 0.19621659815311432,
      "learning_rate": 3.6755994544124724e-06,
      "loss": 0.0105,
      "step": 1939620
    },
    {
      "epoch": 3.1742634014781066,
      "grad_norm": 0.43227073550224304,
      "learning_rate": 3.675533562198955e-06,
      "loss": 0.012,
      "step": 1939640
    },
    {
      "epoch": 3.1742961319167597,
      "grad_norm": 0.25668904185295105,
      "learning_rate": 3.675467669985438e-06,
      "loss": 0.0165,
      "step": 1939660
    },
    {
      "epoch": 3.1743288623554133,
      "grad_norm": 0.24868831038475037,
      "learning_rate": 3.675401777771921e-06,
      "loss": 0.0088,
      "step": 1939680
    },
    {
      "epoch": 3.1743615927940665,
      "grad_norm": 0.49873673915863037,
      "learning_rate": 3.6753358855584038e-06,
      "loss": 0.0181,
      "step": 1939700
    },
    {
      "epoch": 3.17439432323272,
      "grad_norm": 0.3840058147907257,
      "learning_rate": 3.6752699933448865e-06,
      "loss": 0.008,
      "step": 1939720
    },
    {
      "epoch": 3.1744270536713732,
      "grad_norm": 0.1441904604434967,
      "learning_rate": 3.6752041011313692e-06,
      "loss": 0.0103,
      "step": 1939740
    },
    {
      "epoch": 3.174459784110027,
      "grad_norm": 0.26084306836128235,
      "learning_rate": 3.6751382089178524e-06,
      "loss": 0.0094,
      "step": 1939760
    },
    {
      "epoch": 3.17449251454868,
      "grad_norm": 0.47591856122016907,
      "learning_rate": 3.6750723167043356e-06,
      "loss": 0.0107,
      "step": 1939780
    },
    {
      "epoch": 3.174525244987333,
      "grad_norm": 0.1786007434129715,
      "learning_rate": 3.6750064244908183e-06,
      "loss": 0.0101,
      "step": 1939800
    },
    {
      "epoch": 3.1745579754259867,
      "grad_norm": 1.107337236404419,
      "learning_rate": 3.6749405322773015e-06,
      "loss": 0.0097,
      "step": 1939820
    },
    {
      "epoch": 3.17459070586464,
      "grad_norm": 0.33909958600997925,
      "learning_rate": 3.674874640063784e-06,
      "loss": 0.0092,
      "step": 1939840
    },
    {
      "epoch": 3.1746234363032935,
      "grad_norm": 0.3590717911720276,
      "learning_rate": 3.674808747850267e-06,
      "loss": 0.0136,
      "step": 1939860
    },
    {
      "epoch": 3.1746561667419466,
      "grad_norm": 0.7626486420631409,
      "learning_rate": 3.6747428556367497e-06,
      "loss": 0.016,
      "step": 1939880
    },
    {
      "epoch": 3.1746888971806,
      "grad_norm": 0.6187663674354553,
      "learning_rate": 3.674676963423233e-06,
      "loss": 0.0136,
      "step": 1939900
    },
    {
      "epoch": 3.1747216276192534,
      "grad_norm": 1.3351876735687256,
      "learning_rate": 3.6746110712097156e-06,
      "loss": 0.0143,
      "step": 1939920
    },
    {
      "epoch": 3.1747543580579065,
      "grad_norm": 0.6981229186058044,
      "learning_rate": 3.6745451789961983e-06,
      "loss": 0.0125,
      "step": 1939940
    },
    {
      "epoch": 3.17478708849656,
      "grad_norm": 0.08150322735309601,
      "learning_rate": 3.674479286782681e-06,
      "loss": 0.0134,
      "step": 1939960
    },
    {
      "epoch": 3.1748198189352133,
      "grad_norm": 0.24984867870807648,
      "learning_rate": 3.6744133945691642e-06,
      "loss": 0.0115,
      "step": 1939980
    },
    {
      "epoch": 3.174852549373867,
      "grad_norm": 0.12819813191890717,
      "learning_rate": 3.674347502355647e-06,
      "loss": 0.0114,
      "step": 1940000
    },
    {
      "epoch": 3.17488527981252,
      "grad_norm": 0.0759183019399643,
      "learning_rate": 3.6742816101421297e-06,
      "loss": 0.0078,
      "step": 1940020
    },
    {
      "epoch": 3.174918010251173,
      "grad_norm": 0.2525036931037903,
      "learning_rate": 3.6742157179286124e-06,
      "loss": 0.0096,
      "step": 1940040
    },
    {
      "epoch": 3.1749507406898267,
      "grad_norm": 1.1957356929779053,
      "learning_rate": 3.674149825715095e-06,
      "loss": 0.0199,
      "step": 1940060
    },
    {
      "epoch": 3.17498347112848,
      "grad_norm": 0.4547630846500397,
      "learning_rate": 3.6740839335015783e-06,
      "loss": 0.0109,
      "step": 1940080
    },
    {
      "epoch": 3.1750162015671335,
      "grad_norm": 0.0782853215932846,
      "learning_rate": 3.674018041288061e-06,
      "loss": 0.0106,
      "step": 1940100
    },
    {
      "epoch": 3.1750489320057866,
      "grad_norm": 0.2510716915130615,
      "learning_rate": 3.673952149074544e-06,
      "loss": 0.0121,
      "step": 1940120
    },
    {
      "epoch": 3.1750816624444402,
      "grad_norm": 0.2864173948764801,
      "learning_rate": 3.6738862568610274e-06,
      "loss": 0.008,
      "step": 1940140
    },
    {
      "epoch": 3.1751143928830934,
      "grad_norm": 0.24333059787750244,
      "learning_rate": 3.67382036464751e-06,
      "loss": 0.0133,
      "step": 1940160
    },
    {
      "epoch": 3.1751471233217465,
      "grad_norm": 0.18835929036140442,
      "learning_rate": 3.673754472433993e-06,
      "loss": 0.015,
      "step": 1940180
    },
    {
      "epoch": 3.1751798537604,
      "grad_norm": 0.28236961364746094,
      "learning_rate": 3.6736885802204756e-06,
      "loss": 0.0107,
      "step": 1940200
    },
    {
      "epoch": 3.1752125841990533,
      "grad_norm": 0.2064857929944992,
      "learning_rate": 3.673622688006959e-06,
      "loss": 0.0118,
      "step": 1940220
    },
    {
      "epoch": 3.175245314637707,
      "grad_norm": 0.4789522886276245,
      "learning_rate": 3.6735567957934415e-06,
      "loss": 0.0142,
      "step": 1940240
    },
    {
      "epoch": 3.17527804507636,
      "grad_norm": 1.0750279426574707,
      "learning_rate": 3.6734909035799243e-06,
      "loss": 0.0254,
      "step": 1940260
    },
    {
      "epoch": 3.1753107755150136,
      "grad_norm": 0.18768157064914703,
      "learning_rate": 3.673425011366407e-06,
      "loss": 0.0087,
      "step": 1940280
    },
    {
      "epoch": 3.1753435059536668,
      "grad_norm": 0.22893662750720978,
      "learning_rate": 3.67335911915289e-06,
      "loss": 0.0149,
      "step": 1940300
    },
    {
      "epoch": 3.17537623639232,
      "grad_norm": 0.34497547149658203,
      "learning_rate": 3.673293226939373e-06,
      "loss": 0.0101,
      "step": 1940320
    },
    {
      "epoch": 3.1754089668309735,
      "grad_norm": 0.2897031903266907,
      "learning_rate": 3.6732273347258557e-06,
      "loss": 0.0129,
      "step": 1940340
    },
    {
      "epoch": 3.1754416972696267,
      "grad_norm": 0.13296400010585785,
      "learning_rate": 3.6731614425123384e-06,
      "loss": 0.0077,
      "step": 1940360
    },
    {
      "epoch": 3.1754744277082803,
      "grad_norm": 0.2846340537071228,
      "learning_rate": 3.6730955502988216e-06,
      "loss": 0.0139,
      "step": 1940380
    },
    {
      "epoch": 3.1755071581469334,
      "grad_norm": 0.21647846698760986,
      "learning_rate": 3.6730296580853043e-06,
      "loss": 0.0085,
      "step": 1940400
    },
    {
      "epoch": 3.175539888585587,
      "grad_norm": 0.2080511450767517,
      "learning_rate": 3.672963765871787e-06,
      "loss": 0.012,
      "step": 1940420
    },
    {
      "epoch": 3.17557261902424,
      "grad_norm": 0.36845535039901733,
      "learning_rate": 3.6728978736582698e-06,
      "loss": 0.011,
      "step": 1940440
    },
    {
      "epoch": 3.1756053494628933,
      "grad_norm": 0.35962626338005066,
      "learning_rate": 3.6728319814447525e-06,
      "loss": 0.0098,
      "step": 1940460
    },
    {
      "epoch": 3.175638079901547,
      "grad_norm": 0.14912188053131104,
      "learning_rate": 3.672766089231236e-06,
      "loss": 0.0101,
      "step": 1940480
    },
    {
      "epoch": 3.1756708103402,
      "grad_norm": 0.4421219527721405,
      "learning_rate": 3.672700197017719e-06,
      "loss": 0.0103,
      "step": 1940500
    },
    {
      "epoch": 3.1757035407788536,
      "grad_norm": 1.12601900100708,
      "learning_rate": 3.6726343048042016e-06,
      "loss": 0.014,
      "step": 1940520
    },
    {
      "epoch": 3.175736271217507,
      "grad_norm": 0.36378833651542664,
      "learning_rate": 3.6725684125906848e-06,
      "loss": 0.012,
      "step": 1940540
    },
    {
      "epoch": 3.1757690016561604,
      "grad_norm": 0.3877772390842438,
      "learning_rate": 3.6725025203771675e-06,
      "loss": 0.009,
      "step": 1940560
    },
    {
      "epoch": 3.1758017320948135,
      "grad_norm": 0.16414090991020203,
      "learning_rate": 3.6724366281636502e-06,
      "loss": 0.0126,
      "step": 1940580
    },
    {
      "epoch": 3.1758344625334667,
      "grad_norm": 0.46530449390411377,
      "learning_rate": 3.672370735950133e-06,
      "loss": 0.0123,
      "step": 1940600
    },
    {
      "epoch": 3.1758671929721203,
      "grad_norm": 0.43181145191192627,
      "learning_rate": 3.672304843736616e-06,
      "loss": 0.0092,
      "step": 1940620
    },
    {
      "epoch": 3.1758999234107734,
      "grad_norm": 0.20749510824680328,
      "learning_rate": 3.672238951523099e-06,
      "loss": 0.0082,
      "step": 1940640
    },
    {
      "epoch": 3.175932653849427,
      "grad_norm": 0.40876534581184387,
      "learning_rate": 3.6721730593095816e-06,
      "loss": 0.012,
      "step": 1940660
    },
    {
      "epoch": 3.17596538428808,
      "grad_norm": 0.33295416831970215,
      "learning_rate": 3.6721071670960644e-06,
      "loss": 0.0093,
      "step": 1940680
    },
    {
      "epoch": 3.1759981147267338,
      "grad_norm": 0.3950481116771698,
      "learning_rate": 3.6720412748825475e-06,
      "loss": 0.0127,
      "step": 1940700
    },
    {
      "epoch": 3.176030845165387,
      "grad_norm": 1.0005344152450562,
      "learning_rate": 3.6719753826690303e-06,
      "loss": 0.01,
      "step": 1940720
    },
    {
      "epoch": 3.17606357560404,
      "grad_norm": 0.13161198794841766,
      "learning_rate": 3.671909490455513e-06,
      "loss": 0.0119,
      "step": 1940740
    },
    {
      "epoch": 3.1760963060426937,
      "grad_norm": 0.45292142033576965,
      "learning_rate": 3.6718435982419957e-06,
      "loss": 0.0107,
      "step": 1940760
    },
    {
      "epoch": 3.176129036481347,
      "grad_norm": 0.2225799411535263,
      "learning_rate": 3.671777706028479e-06,
      "loss": 0.012,
      "step": 1940780
    },
    {
      "epoch": 3.1761617669200004,
      "grad_norm": 0.053276024758815765,
      "learning_rate": 3.6717118138149616e-06,
      "loss": 0.0107,
      "step": 1940800
    },
    {
      "epoch": 3.1761944973586536,
      "grad_norm": 0.2566247284412384,
      "learning_rate": 3.6716459216014444e-06,
      "loss": 0.0106,
      "step": 1940820
    },
    {
      "epoch": 3.176227227797307,
      "grad_norm": 0.5264362692832947,
      "learning_rate": 3.671580029387928e-06,
      "loss": 0.0127,
      "step": 1940840
    },
    {
      "epoch": 3.1762599582359603,
      "grad_norm": 0.13824236392974854,
      "learning_rate": 3.6715141371744107e-06,
      "loss": 0.0085,
      "step": 1940860
    },
    {
      "epoch": 3.1762926886746135,
      "grad_norm": 0.304439514875412,
      "learning_rate": 3.6714482449608934e-06,
      "loss": 0.0124,
      "step": 1940880
    },
    {
      "epoch": 3.176325419113267,
      "grad_norm": 1.041609764099121,
      "learning_rate": 3.671382352747376e-06,
      "loss": 0.0109,
      "step": 1940900
    },
    {
      "epoch": 3.17635814955192,
      "grad_norm": 0.2604747414588928,
      "learning_rate": 3.6713164605338593e-06,
      "loss": 0.0084,
      "step": 1940920
    },
    {
      "epoch": 3.176390879990574,
      "grad_norm": 0.21349890530109406,
      "learning_rate": 3.671250568320342e-06,
      "loss": 0.0091,
      "step": 1940940
    },
    {
      "epoch": 3.176423610429227,
      "grad_norm": 0.29185667634010315,
      "learning_rate": 3.671184676106825e-06,
      "loss": 0.0115,
      "step": 1940960
    },
    {
      "epoch": 3.1764563408678805,
      "grad_norm": 0.057549700140953064,
      "learning_rate": 3.6711187838933076e-06,
      "loss": 0.0124,
      "step": 1940980
    },
    {
      "epoch": 3.1764890713065337,
      "grad_norm": 0.41661593317985535,
      "learning_rate": 3.6710528916797903e-06,
      "loss": 0.0091,
      "step": 1941000
    },
    {
      "epoch": 3.176521801745187,
      "grad_norm": 0.2823144197463989,
      "learning_rate": 3.6709869994662735e-06,
      "loss": 0.013,
      "step": 1941020
    },
    {
      "epoch": 3.1765545321838404,
      "grad_norm": 0.763457179069519,
      "learning_rate": 3.670921107252756e-06,
      "loss": 0.0123,
      "step": 1941040
    },
    {
      "epoch": 3.1765872626224936,
      "grad_norm": 0.467722624540329,
      "learning_rate": 3.670855215039239e-06,
      "loss": 0.013,
      "step": 1941060
    },
    {
      "epoch": 3.176619993061147,
      "grad_norm": 0.569316565990448,
      "learning_rate": 3.6707893228257217e-06,
      "loss": 0.01,
      "step": 1941080
    },
    {
      "epoch": 3.1766527234998003,
      "grad_norm": 0.42569172382354736,
      "learning_rate": 3.670723430612205e-06,
      "loss": 0.0128,
      "step": 1941100
    },
    {
      "epoch": 3.176685453938454,
      "grad_norm": 0.0769757553935051,
      "learning_rate": 3.6706575383986876e-06,
      "loss": 0.0128,
      "step": 1941120
    },
    {
      "epoch": 3.176718184377107,
      "grad_norm": 0.2396581768989563,
      "learning_rate": 3.6705916461851703e-06,
      "loss": 0.0054,
      "step": 1941140
    },
    {
      "epoch": 3.17675091481576,
      "grad_norm": 0.6319209337234497,
      "learning_rate": 3.670525753971653e-06,
      "loss": 0.0069,
      "step": 1941160
    },
    {
      "epoch": 3.176783645254414,
      "grad_norm": 0.7628490328788757,
      "learning_rate": 3.6704598617581362e-06,
      "loss": 0.0085,
      "step": 1941180
    },
    {
      "epoch": 3.176816375693067,
      "grad_norm": 0.37961849570274353,
      "learning_rate": 3.6703939695446194e-06,
      "loss": 0.0071,
      "step": 1941200
    },
    {
      "epoch": 3.1768491061317206,
      "grad_norm": 0.34850773215293884,
      "learning_rate": 3.670328077331102e-06,
      "loss": 0.009,
      "step": 1941220
    },
    {
      "epoch": 3.1768818365703737,
      "grad_norm": 0.15230612456798553,
      "learning_rate": 3.6702621851175853e-06,
      "loss": 0.0114,
      "step": 1941240
    },
    {
      "epoch": 3.176914567009027,
      "grad_norm": 0.11904606968164444,
      "learning_rate": 3.670196292904068e-06,
      "loss": 0.0169,
      "step": 1941260
    },
    {
      "epoch": 3.1769472974476805,
      "grad_norm": 0.20794665813446045,
      "learning_rate": 3.6701304006905508e-06,
      "loss": 0.0074,
      "step": 1941280
    },
    {
      "epoch": 3.1769800278863336,
      "grad_norm": 0.2776425778865814,
      "learning_rate": 3.6700645084770335e-06,
      "loss": 0.008,
      "step": 1941300
    },
    {
      "epoch": 3.177012758324987,
      "grad_norm": 0.22133195400238037,
      "learning_rate": 3.6699986162635167e-06,
      "loss": 0.0085,
      "step": 1941320
    },
    {
      "epoch": 3.1770454887636403,
      "grad_norm": 0.06719482690095901,
      "learning_rate": 3.6699327240499994e-06,
      "loss": 0.0124,
      "step": 1941340
    },
    {
      "epoch": 3.177078219202294,
      "grad_norm": 0.12100399285554886,
      "learning_rate": 3.669866831836482e-06,
      "loss": 0.0084,
      "step": 1941360
    },
    {
      "epoch": 3.177110949640947,
      "grad_norm": 0.08963525295257568,
      "learning_rate": 3.669800939622965e-06,
      "loss": 0.007,
      "step": 1941380
    },
    {
      "epoch": 3.1771436800796002,
      "grad_norm": 0.49931690096855164,
      "learning_rate": 3.669735047409448e-06,
      "loss": 0.0095,
      "step": 1941400
    },
    {
      "epoch": 3.177176410518254,
      "grad_norm": 0.3375181555747986,
      "learning_rate": 3.669669155195931e-06,
      "loss": 0.0064,
      "step": 1941420
    },
    {
      "epoch": 3.177209140956907,
      "grad_norm": 0.2270442396402359,
      "learning_rate": 3.6696032629824135e-06,
      "loss": 0.0096,
      "step": 1941440
    },
    {
      "epoch": 3.1772418713955606,
      "grad_norm": 0.11148297786712646,
      "learning_rate": 3.6695373707688963e-06,
      "loss": 0.0098,
      "step": 1941460
    },
    {
      "epoch": 3.1772746018342137,
      "grad_norm": 0.2041577696800232,
      "learning_rate": 3.669471478555379e-06,
      "loss": 0.0099,
      "step": 1941480
    },
    {
      "epoch": 3.1773073322728673,
      "grad_norm": 0.508511483669281,
      "learning_rate": 3.669405586341862e-06,
      "loss": 0.012,
      "step": 1941500
    },
    {
      "epoch": 3.1773400627115205,
      "grad_norm": 0.4953097701072693,
      "learning_rate": 3.669339694128345e-06,
      "loss": 0.0101,
      "step": 1941520
    },
    {
      "epoch": 3.1773727931501736,
      "grad_norm": 0.22500577569007874,
      "learning_rate": 3.669273801914828e-06,
      "loss": 0.0092,
      "step": 1941540
    },
    {
      "epoch": 3.177405523588827,
      "grad_norm": 0.9627678394317627,
      "learning_rate": 3.6692079097013113e-06,
      "loss": 0.0172,
      "step": 1941560
    },
    {
      "epoch": 3.1774382540274804,
      "grad_norm": 0.3556439280509949,
      "learning_rate": 3.669142017487794e-06,
      "loss": 0.0165,
      "step": 1941580
    },
    {
      "epoch": 3.177470984466134,
      "grad_norm": 0.15679462254047394,
      "learning_rate": 3.6690761252742767e-06,
      "loss": 0.013,
      "step": 1941600
    },
    {
      "epoch": 3.177503714904787,
      "grad_norm": 0.14524959027767181,
      "learning_rate": 3.6690102330607595e-06,
      "loss": 0.0177,
      "step": 1941620
    },
    {
      "epoch": 3.1775364453434403,
      "grad_norm": 0.1051868349313736,
      "learning_rate": 3.6689443408472426e-06,
      "loss": 0.0097,
      "step": 1941640
    },
    {
      "epoch": 3.177569175782094,
      "grad_norm": 0.705903172492981,
      "learning_rate": 3.6688784486337254e-06,
      "loss": 0.0156,
      "step": 1941660
    },
    {
      "epoch": 3.177601906220747,
      "grad_norm": 0.626760721206665,
      "learning_rate": 3.668812556420208e-06,
      "loss": 0.0103,
      "step": 1941680
    },
    {
      "epoch": 3.1776346366594006,
      "grad_norm": 0.31851258873939514,
      "learning_rate": 3.668746664206691e-06,
      "loss": 0.0117,
      "step": 1941700
    },
    {
      "epoch": 3.1776673670980538,
      "grad_norm": 0.12146732211112976,
      "learning_rate": 3.668680771993174e-06,
      "loss": 0.0146,
      "step": 1941720
    },
    {
      "epoch": 3.1777000975367073,
      "grad_norm": 0.10218686610460281,
      "learning_rate": 3.6686148797796568e-06,
      "loss": 0.0092,
      "step": 1941740
    },
    {
      "epoch": 3.1777328279753605,
      "grad_norm": 0.2288537621498108,
      "learning_rate": 3.6685489875661395e-06,
      "loss": 0.0113,
      "step": 1941760
    },
    {
      "epoch": 3.1777655584140136,
      "grad_norm": 0.1258399784564972,
      "learning_rate": 3.6684830953526222e-06,
      "loss": 0.0101,
      "step": 1941780
    },
    {
      "epoch": 3.1777982888526672,
      "grad_norm": 0.10082964599132538,
      "learning_rate": 3.6684172031391054e-06,
      "loss": 0.0123,
      "step": 1941800
    },
    {
      "epoch": 3.1778310192913204,
      "grad_norm": 0.47308555245399475,
      "learning_rate": 3.668351310925588e-06,
      "loss": 0.0165,
      "step": 1941820
    },
    {
      "epoch": 3.177863749729974,
      "grad_norm": 0.3163067698478699,
      "learning_rate": 3.668285418712071e-06,
      "loss": 0.0085,
      "step": 1941840
    },
    {
      "epoch": 3.177896480168627,
      "grad_norm": 0.1853042095899582,
      "learning_rate": 3.6682195264985536e-06,
      "loss": 0.0099,
      "step": 1941860
    },
    {
      "epoch": 3.1779292106072807,
      "grad_norm": 0.21746620535850525,
      "learning_rate": 3.6681536342850364e-06,
      "loss": 0.0094,
      "step": 1941880
    },
    {
      "epoch": 3.177961941045934,
      "grad_norm": 0.3942663371562958,
      "learning_rate": 3.66808774207152e-06,
      "loss": 0.0116,
      "step": 1941900
    },
    {
      "epoch": 3.177994671484587,
      "grad_norm": 0.07246153801679611,
      "learning_rate": 3.6680218498580027e-06,
      "loss": 0.006,
      "step": 1941920
    },
    {
      "epoch": 3.1780274019232406,
      "grad_norm": 0.2095155417919159,
      "learning_rate": 3.667955957644486e-06,
      "loss": 0.0138,
      "step": 1941940
    },
    {
      "epoch": 3.1780601323618938,
      "grad_norm": 0.0843619629740715,
      "learning_rate": 3.6678900654309686e-06,
      "loss": 0.0084,
      "step": 1941960
    },
    {
      "epoch": 3.1780928628005474,
      "grad_norm": 0.3802211582660675,
      "learning_rate": 3.6678241732174513e-06,
      "loss": 0.0122,
      "step": 1941980
    },
    {
      "epoch": 3.1781255932392005,
      "grad_norm": 0.25377458333969116,
      "learning_rate": 3.667758281003934e-06,
      "loss": 0.0198,
      "step": 1942000
    },
    {
      "epoch": 3.178158323677854,
      "grad_norm": 0.4378551244735718,
      "learning_rate": 3.667692388790417e-06,
      "loss": 0.011,
      "step": 1942020
    },
    {
      "epoch": 3.1781910541165073,
      "grad_norm": 0.3025210201740265,
      "learning_rate": 3.6676264965769e-06,
      "loss": 0.0145,
      "step": 1942040
    },
    {
      "epoch": 3.1782237845551604,
      "grad_norm": 0.5762673616409302,
      "learning_rate": 3.6675606043633827e-06,
      "loss": 0.0152,
      "step": 1942060
    },
    {
      "epoch": 3.178256514993814,
      "grad_norm": 0.435494601726532,
      "learning_rate": 3.6674947121498655e-06,
      "loss": 0.0202,
      "step": 1942080
    },
    {
      "epoch": 3.178289245432467,
      "grad_norm": 0.09231852740049362,
      "learning_rate": 3.667428819936348e-06,
      "loss": 0.0127,
      "step": 1942100
    },
    {
      "epoch": 3.1783219758711208,
      "grad_norm": 0.34289994835853577,
      "learning_rate": 3.6673629277228314e-06,
      "loss": 0.0096,
      "step": 1942120
    },
    {
      "epoch": 3.178354706309774,
      "grad_norm": 0.708741307258606,
      "learning_rate": 3.667297035509314e-06,
      "loss": 0.0146,
      "step": 1942140
    },
    {
      "epoch": 3.1783874367484275,
      "grad_norm": 0.3659757077693939,
      "learning_rate": 3.667231143295797e-06,
      "loss": 0.0186,
      "step": 1942160
    },
    {
      "epoch": 3.1784201671870806,
      "grad_norm": 0.2843902111053467,
      "learning_rate": 3.6671652510822796e-06,
      "loss": 0.0138,
      "step": 1942180
    },
    {
      "epoch": 3.178452897625734,
      "grad_norm": 0.6572840213775635,
      "learning_rate": 3.6670993588687627e-06,
      "loss": 0.0103,
      "step": 1942200
    },
    {
      "epoch": 3.1784856280643874,
      "grad_norm": 0.20285271108150482,
      "learning_rate": 3.6670334666552455e-06,
      "loss": 0.0135,
      "step": 1942220
    },
    {
      "epoch": 3.1785183585030405,
      "grad_norm": 0.21898359060287476,
      "learning_rate": 3.6669675744417286e-06,
      "loss": 0.0124,
      "step": 1942240
    },
    {
      "epoch": 3.178551088941694,
      "grad_norm": 0.383064866065979,
      "learning_rate": 3.666901682228212e-06,
      "loss": 0.0165,
      "step": 1942260
    },
    {
      "epoch": 3.1785838193803473,
      "grad_norm": 0.8994923830032349,
      "learning_rate": 3.6668357900146945e-06,
      "loss": 0.0183,
      "step": 1942280
    },
    {
      "epoch": 3.178616549819001,
      "grad_norm": 0.5122485756874084,
      "learning_rate": 3.6667698978011773e-06,
      "loss": 0.0083,
      "step": 1942300
    },
    {
      "epoch": 3.178649280257654,
      "grad_norm": 0.2061787098646164,
      "learning_rate": 3.66670400558766e-06,
      "loss": 0.0151,
      "step": 1942320
    },
    {
      "epoch": 3.178682010696307,
      "grad_norm": 0.2700277864933014,
      "learning_rate": 3.666638113374143e-06,
      "loss": 0.0146,
      "step": 1942340
    },
    {
      "epoch": 3.1787147411349608,
      "grad_norm": 0.12428899854421616,
      "learning_rate": 3.666572221160626e-06,
      "loss": 0.0118,
      "step": 1942360
    },
    {
      "epoch": 3.178747471573614,
      "grad_norm": 0.35932305455207825,
      "learning_rate": 3.6665063289471087e-06,
      "loss": 0.0108,
      "step": 1942380
    },
    {
      "epoch": 3.1787802020122675,
      "grad_norm": 0.09471599012613297,
      "learning_rate": 3.6664404367335914e-06,
      "loss": 0.0094,
      "step": 1942400
    },
    {
      "epoch": 3.1788129324509207,
      "grad_norm": 0.20750883221626282,
      "learning_rate": 3.666374544520074e-06,
      "loss": 0.0165,
      "step": 1942420
    },
    {
      "epoch": 3.1788456628895743,
      "grad_norm": 0.2126641720533371,
      "learning_rate": 3.6663086523065573e-06,
      "loss": 0.009,
      "step": 1942440
    },
    {
      "epoch": 3.1788783933282274,
      "grad_norm": 0.07346895337104797,
      "learning_rate": 3.66624276009304e-06,
      "loss": 0.0105,
      "step": 1942460
    },
    {
      "epoch": 3.1789111237668806,
      "grad_norm": 0.3882688581943512,
      "learning_rate": 3.666176867879523e-06,
      "loss": 0.0143,
      "step": 1942480
    },
    {
      "epoch": 3.178943854205534,
      "grad_norm": 0.3458939492702484,
      "learning_rate": 3.6661109756660055e-06,
      "loss": 0.0113,
      "step": 1942500
    },
    {
      "epoch": 3.1789765846441873,
      "grad_norm": 0.3833538293838501,
      "learning_rate": 3.6660450834524887e-06,
      "loss": 0.0108,
      "step": 1942520
    },
    {
      "epoch": 3.179009315082841,
      "grad_norm": 0.3775322437286377,
      "learning_rate": 3.6659791912389714e-06,
      "loss": 0.0136,
      "step": 1942540
    },
    {
      "epoch": 3.179042045521494,
      "grad_norm": 0.15398840606212616,
      "learning_rate": 3.665913299025454e-06,
      "loss": 0.0119,
      "step": 1942560
    },
    {
      "epoch": 3.1790747759601476,
      "grad_norm": 1.00083327293396,
      "learning_rate": 3.665847406811937e-06,
      "loss": 0.0123,
      "step": 1942580
    },
    {
      "epoch": 3.179107506398801,
      "grad_norm": 0.2370694875717163,
      "learning_rate": 3.6657815145984205e-06,
      "loss": 0.0127,
      "step": 1942600
    },
    {
      "epoch": 3.179140236837454,
      "grad_norm": 0.14628814160823822,
      "learning_rate": 3.6657156223849032e-06,
      "loss": 0.0138,
      "step": 1942620
    },
    {
      "epoch": 3.1791729672761075,
      "grad_norm": 0.2654325067996979,
      "learning_rate": 3.665649730171386e-06,
      "loss": 0.0099,
      "step": 1942640
    },
    {
      "epoch": 3.1792056977147607,
      "grad_norm": 0.25880900025367737,
      "learning_rate": 3.665583837957869e-06,
      "loss": 0.0096,
      "step": 1942660
    },
    {
      "epoch": 3.1792384281534143,
      "grad_norm": 0.30068686604499817,
      "learning_rate": 3.665517945744352e-06,
      "loss": 0.0077,
      "step": 1942680
    },
    {
      "epoch": 3.1792711585920674,
      "grad_norm": 0.1052865982055664,
      "learning_rate": 3.6654520535308346e-06,
      "loss": 0.0148,
      "step": 1942700
    },
    {
      "epoch": 3.1793038890307206,
      "grad_norm": 0.5924834609031677,
      "learning_rate": 3.6653861613173174e-06,
      "loss": 0.0106,
      "step": 1942720
    },
    {
      "epoch": 3.179336619469374,
      "grad_norm": 0.2536129355430603,
      "learning_rate": 3.6653202691038005e-06,
      "loss": 0.0067,
      "step": 1942740
    },
    {
      "epoch": 3.1793693499080273,
      "grad_norm": 0.17596535384655,
      "learning_rate": 3.6652543768902833e-06,
      "loss": 0.0143,
      "step": 1942760
    },
    {
      "epoch": 3.179402080346681,
      "grad_norm": 0.9393377900123596,
      "learning_rate": 3.665188484676766e-06,
      "loss": 0.0086,
      "step": 1942780
    },
    {
      "epoch": 3.179434810785334,
      "grad_norm": 0.271747887134552,
      "learning_rate": 3.6651225924632487e-06,
      "loss": 0.0124,
      "step": 1942800
    },
    {
      "epoch": 3.1794675412239877,
      "grad_norm": 0.1847163736820221,
      "learning_rate": 3.665056700249732e-06,
      "loss": 0.0109,
      "step": 1942820
    },
    {
      "epoch": 3.179500271662641,
      "grad_norm": 0.20947735011577606,
      "learning_rate": 3.6649908080362146e-06,
      "loss": 0.0152,
      "step": 1942840
    },
    {
      "epoch": 3.179533002101294,
      "grad_norm": 0.37227863073349,
      "learning_rate": 3.6649249158226974e-06,
      "loss": 0.0159,
      "step": 1942860
    },
    {
      "epoch": 3.1795657325399476,
      "grad_norm": 0.40161067247390747,
      "learning_rate": 3.66485902360918e-06,
      "loss": 0.0094,
      "step": 1942880
    },
    {
      "epoch": 3.1795984629786007,
      "grad_norm": 0.15551525354385376,
      "learning_rate": 3.664793131395663e-06,
      "loss": 0.0144,
      "step": 1942900
    },
    {
      "epoch": 3.1796311934172543,
      "grad_norm": 0.1737397313117981,
      "learning_rate": 3.664727239182146e-06,
      "loss": 0.0083,
      "step": 1942920
    },
    {
      "epoch": 3.1796639238559075,
      "grad_norm": 0.1872476190328598,
      "learning_rate": 3.6646613469686288e-06,
      "loss": 0.0128,
      "step": 1942940
    },
    {
      "epoch": 3.179696654294561,
      "grad_norm": 0.3567858934402466,
      "learning_rate": 3.664595454755112e-06,
      "loss": 0.016,
      "step": 1942960
    },
    {
      "epoch": 3.179729384733214,
      "grad_norm": 0.17827734351158142,
      "learning_rate": 3.664529562541595e-06,
      "loss": 0.0096,
      "step": 1942980
    },
    {
      "epoch": 3.1797621151718674,
      "grad_norm": 0.2459399551153183,
      "learning_rate": 3.664463670328078e-06,
      "loss": 0.015,
      "step": 1943000
    },
    {
      "epoch": 3.179794845610521,
      "grad_norm": 0.4223952293395996,
      "learning_rate": 3.6643977781145606e-06,
      "loss": 0.0142,
      "step": 1943020
    },
    {
      "epoch": 3.179827576049174,
      "grad_norm": 0.15433968603610992,
      "learning_rate": 3.6643318859010433e-06,
      "loss": 0.0141,
      "step": 1943040
    },
    {
      "epoch": 3.1798603064878277,
      "grad_norm": 0.3078429102897644,
      "learning_rate": 3.6642659936875265e-06,
      "loss": 0.0113,
      "step": 1943060
    },
    {
      "epoch": 3.179893036926481,
      "grad_norm": 0.1306915581226349,
      "learning_rate": 3.6642001014740092e-06,
      "loss": 0.0103,
      "step": 1943080
    },
    {
      "epoch": 3.179925767365134,
      "grad_norm": 0.5187882781028748,
      "learning_rate": 3.664134209260492e-06,
      "loss": 0.0134,
      "step": 1943100
    },
    {
      "epoch": 3.1799584978037876,
      "grad_norm": 0.15719838440418243,
      "learning_rate": 3.6640683170469747e-06,
      "loss": 0.0104,
      "step": 1943120
    },
    {
      "epoch": 3.1799912282424407,
      "grad_norm": 0.28288888931274414,
      "learning_rate": 3.664002424833458e-06,
      "loss": 0.009,
      "step": 1943140
    },
    {
      "epoch": 3.1800239586810943,
      "grad_norm": 0.08714563399553299,
      "learning_rate": 3.6639365326199406e-06,
      "loss": 0.0131,
      "step": 1943160
    },
    {
      "epoch": 3.1800566891197475,
      "grad_norm": 0.5681692361831665,
      "learning_rate": 3.6638706404064233e-06,
      "loss": 0.0115,
      "step": 1943180
    },
    {
      "epoch": 3.180089419558401,
      "grad_norm": 0.15880845487117767,
      "learning_rate": 3.663804748192906e-06,
      "loss": 0.0097,
      "step": 1943200
    },
    {
      "epoch": 3.1801221499970542,
      "grad_norm": 0.2634863555431366,
      "learning_rate": 3.6637388559793892e-06,
      "loss": 0.0102,
      "step": 1943220
    },
    {
      "epoch": 3.1801548804357074,
      "grad_norm": 0.09226623922586441,
      "learning_rate": 3.663672963765872e-06,
      "loss": 0.0094,
      "step": 1943240
    },
    {
      "epoch": 3.180187610874361,
      "grad_norm": 0.7671657800674438,
      "learning_rate": 3.6636070715523547e-06,
      "loss": 0.0166,
      "step": 1943260
    },
    {
      "epoch": 3.180220341313014,
      "grad_norm": 0.16176335513591766,
      "learning_rate": 3.6635411793388375e-06,
      "loss": 0.0138,
      "step": 1943280
    },
    {
      "epoch": 3.1802530717516677,
      "grad_norm": 0.3376900255680084,
      "learning_rate": 3.663475287125321e-06,
      "loss": 0.01,
      "step": 1943300
    },
    {
      "epoch": 3.180285802190321,
      "grad_norm": 0.5744916796684265,
      "learning_rate": 3.6634093949118038e-06,
      "loss": 0.0198,
      "step": 1943320
    },
    {
      "epoch": 3.1803185326289745,
      "grad_norm": 0.04458329454064369,
      "learning_rate": 3.6633435026982865e-06,
      "loss": 0.0101,
      "step": 1943340
    },
    {
      "epoch": 3.1803512630676276,
      "grad_norm": 0.13333261013031006,
      "learning_rate": 3.6632776104847697e-06,
      "loss": 0.0135,
      "step": 1943360
    },
    {
      "epoch": 3.1803839935062808,
      "grad_norm": 0.4078453481197357,
      "learning_rate": 3.6632117182712524e-06,
      "loss": 0.0114,
      "step": 1943380
    },
    {
      "epoch": 3.1804167239449344,
      "grad_norm": 0.5856363773345947,
      "learning_rate": 3.663145826057735e-06,
      "loss": 0.0134,
      "step": 1943400
    },
    {
      "epoch": 3.1804494543835875,
      "grad_norm": 0.26145708560943604,
      "learning_rate": 3.663079933844218e-06,
      "loss": 0.0168,
      "step": 1943420
    },
    {
      "epoch": 3.180482184822241,
      "grad_norm": 0.31900787353515625,
      "learning_rate": 3.6630140416307006e-06,
      "loss": 0.0075,
      "step": 1943440
    },
    {
      "epoch": 3.1805149152608942,
      "grad_norm": 0.15356434881687164,
      "learning_rate": 3.662948149417184e-06,
      "loss": 0.0114,
      "step": 1943460
    },
    {
      "epoch": 3.180547645699548,
      "grad_norm": 0.7017359137535095,
      "learning_rate": 3.6628822572036666e-06,
      "loss": 0.0096,
      "step": 1943480
    },
    {
      "epoch": 3.180580376138201,
      "grad_norm": 0.3267953395843506,
      "learning_rate": 3.6628163649901493e-06,
      "loss": 0.0156,
      "step": 1943500
    },
    {
      "epoch": 3.180613106576854,
      "grad_norm": 0.3187527358531952,
      "learning_rate": 3.662750472776632e-06,
      "loss": 0.0126,
      "step": 1943520
    },
    {
      "epoch": 3.1806458370155077,
      "grad_norm": 0.22174258530139923,
      "learning_rate": 3.662684580563115e-06,
      "loss": 0.0138,
      "step": 1943540
    },
    {
      "epoch": 3.180678567454161,
      "grad_norm": 0.3953808844089508,
      "learning_rate": 3.662618688349598e-06,
      "loss": 0.01,
      "step": 1943560
    },
    {
      "epoch": 3.1807112978928145,
      "grad_norm": 0.14779935777187347,
      "learning_rate": 3.6625527961360807e-06,
      "loss": 0.0119,
      "step": 1943580
    },
    {
      "epoch": 3.1807440283314676,
      "grad_norm": 0.7898769974708557,
      "learning_rate": 3.6624869039225634e-06,
      "loss": 0.0188,
      "step": 1943600
    },
    {
      "epoch": 3.1807767587701212,
      "grad_norm": 0.4169294536113739,
      "learning_rate": 3.6624210117090466e-06,
      "loss": 0.0101,
      "step": 1943620
    },
    {
      "epoch": 3.1808094892087744,
      "grad_norm": 0.46949347853660583,
      "learning_rate": 3.6623551194955293e-06,
      "loss": 0.007,
      "step": 1943640
    },
    {
      "epoch": 3.1808422196474275,
      "grad_norm": 0.18873856961727142,
      "learning_rate": 3.6622892272820125e-06,
      "loss": 0.0113,
      "step": 1943660
    },
    {
      "epoch": 3.180874950086081,
      "grad_norm": 0.1600562483072281,
      "learning_rate": 3.6622233350684956e-06,
      "loss": 0.0088,
      "step": 1943680
    },
    {
      "epoch": 3.1809076805247343,
      "grad_norm": 0.32529133558273315,
      "learning_rate": 3.6621574428549784e-06,
      "loss": 0.0112,
      "step": 1943700
    },
    {
      "epoch": 3.180940410963388,
      "grad_norm": 0.5819548964500427,
      "learning_rate": 3.662091550641461e-06,
      "loss": 0.0148,
      "step": 1943720
    },
    {
      "epoch": 3.180973141402041,
      "grad_norm": 0.4495609700679779,
      "learning_rate": 3.662025658427944e-06,
      "loss": 0.0116,
      "step": 1943740
    },
    {
      "epoch": 3.1810058718406946,
      "grad_norm": 0.25873082876205444,
      "learning_rate": 3.661959766214427e-06,
      "loss": 0.0074,
      "step": 1943760
    },
    {
      "epoch": 3.1810386022793478,
      "grad_norm": 0.2566823959350586,
      "learning_rate": 3.6618938740009098e-06,
      "loss": 0.0124,
      "step": 1943780
    },
    {
      "epoch": 3.181071332718001,
      "grad_norm": 0.27671554684638977,
      "learning_rate": 3.6618279817873925e-06,
      "loss": 0.0099,
      "step": 1943800
    },
    {
      "epoch": 3.1811040631566545,
      "grad_norm": 0.949824869632721,
      "learning_rate": 3.6617620895738752e-06,
      "loss": 0.0164,
      "step": 1943820
    },
    {
      "epoch": 3.1811367935953077,
      "grad_norm": 0.07896032929420471,
      "learning_rate": 3.661696197360358e-06,
      "loss": 0.013,
      "step": 1943840
    },
    {
      "epoch": 3.1811695240339612,
      "grad_norm": 0.6272587180137634,
      "learning_rate": 3.661630305146841e-06,
      "loss": 0.0128,
      "step": 1943860
    },
    {
      "epoch": 3.1812022544726144,
      "grad_norm": 0.342377245426178,
      "learning_rate": 3.661564412933324e-06,
      "loss": 0.0151,
      "step": 1943880
    },
    {
      "epoch": 3.181234984911268,
      "grad_norm": 0.2907080054283142,
      "learning_rate": 3.6614985207198066e-06,
      "loss": 0.0123,
      "step": 1943900
    },
    {
      "epoch": 3.181267715349921,
      "grad_norm": 0.10364672541618347,
      "learning_rate": 3.6614326285062894e-06,
      "loss": 0.0073,
      "step": 1943920
    },
    {
      "epoch": 3.1813004457885743,
      "grad_norm": 0.06666792184114456,
      "learning_rate": 3.6613667362927725e-06,
      "loss": 0.0077,
      "step": 1943940
    },
    {
      "epoch": 3.181333176227228,
      "grad_norm": 0.32696598768234253,
      "learning_rate": 3.6613008440792553e-06,
      "loss": 0.0079,
      "step": 1943960
    },
    {
      "epoch": 3.181365906665881,
      "grad_norm": 0.2933853566646576,
      "learning_rate": 3.661234951865738e-06,
      "loss": 0.0123,
      "step": 1943980
    },
    {
      "epoch": 3.1813986371045346,
      "grad_norm": 0.5931915044784546,
      "learning_rate": 3.6611690596522216e-06,
      "loss": 0.0129,
      "step": 1944000
    },
    {
      "epoch": 3.181431367543188,
      "grad_norm": 0.43225324153900146,
      "learning_rate": 3.6611031674387043e-06,
      "loss": 0.0132,
      "step": 1944020
    },
    {
      "epoch": 3.1814640979818414,
      "grad_norm": 0.3008480966091156,
      "learning_rate": 3.661037275225187e-06,
      "loss": 0.0115,
      "step": 1944040
    },
    {
      "epoch": 3.1814968284204945,
      "grad_norm": 0.22263352572917938,
      "learning_rate": 3.66097138301167e-06,
      "loss": 0.0115,
      "step": 1944060
    },
    {
      "epoch": 3.1815295588591477,
      "grad_norm": 0.7573183178901672,
      "learning_rate": 3.660905490798153e-06,
      "loss": 0.0162,
      "step": 1944080
    },
    {
      "epoch": 3.1815622892978013,
      "grad_norm": 0.2119549661874771,
      "learning_rate": 3.6608395985846357e-06,
      "loss": 0.0123,
      "step": 1944100
    },
    {
      "epoch": 3.1815950197364544,
      "grad_norm": 0.14790107309818268,
      "learning_rate": 3.6607737063711185e-06,
      "loss": 0.0128,
      "step": 1944120
    },
    {
      "epoch": 3.181627750175108,
      "grad_norm": 0.3150538504123688,
      "learning_rate": 3.660707814157601e-06,
      "loss": 0.0117,
      "step": 1944140
    },
    {
      "epoch": 3.181660480613761,
      "grad_norm": 0.18801456689834595,
      "learning_rate": 3.6606419219440844e-06,
      "loss": 0.0122,
      "step": 1944160
    },
    {
      "epoch": 3.1816932110524148,
      "grad_norm": 0.6146238446235657,
      "learning_rate": 3.660576029730567e-06,
      "loss": 0.0148,
      "step": 1944180
    },
    {
      "epoch": 3.181725941491068,
      "grad_norm": 0.5370252728462219,
      "learning_rate": 3.66051013751705e-06,
      "loss": 0.0121,
      "step": 1944200
    },
    {
      "epoch": 3.181758671929721,
      "grad_norm": 0.1990540474653244,
      "learning_rate": 3.6604442453035326e-06,
      "loss": 0.0119,
      "step": 1944220
    },
    {
      "epoch": 3.1817914023683747,
      "grad_norm": 0.3943302035331726,
      "learning_rate": 3.6603783530900157e-06,
      "loss": 0.0122,
      "step": 1944240
    },
    {
      "epoch": 3.181824132807028,
      "grad_norm": 0.46029210090637207,
      "learning_rate": 3.6603124608764985e-06,
      "loss": 0.0138,
      "step": 1944260
    },
    {
      "epoch": 3.1818568632456814,
      "grad_norm": 0.24666030704975128,
      "learning_rate": 3.6602465686629812e-06,
      "loss": 0.0138,
      "step": 1944280
    },
    {
      "epoch": 3.1818895936843345,
      "grad_norm": 0.19670577347278595,
      "learning_rate": 3.660180676449464e-06,
      "loss": 0.0106,
      "step": 1944300
    },
    {
      "epoch": 3.1819223241229877,
      "grad_norm": 0.2786427438259125,
      "learning_rate": 3.6601147842359467e-06,
      "loss": 0.0103,
      "step": 1944320
    },
    {
      "epoch": 3.1819550545616413,
      "grad_norm": 0.315849244594574,
      "learning_rate": 3.66004889202243e-06,
      "loss": 0.0125,
      "step": 1944340
    },
    {
      "epoch": 3.1819877850002944,
      "grad_norm": 0.5738359093666077,
      "learning_rate": 3.659982999808913e-06,
      "loss": 0.0127,
      "step": 1944360
    },
    {
      "epoch": 3.182020515438948,
      "grad_norm": 0.14568153023719788,
      "learning_rate": 3.6599171075953958e-06,
      "loss": 0.0088,
      "step": 1944380
    },
    {
      "epoch": 3.182053245877601,
      "grad_norm": 0.2184736281633377,
      "learning_rate": 3.659851215381879e-06,
      "loss": 0.01,
      "step": 1944400
    },
    {
      "epoch": 3.182085976316255,
      "grad_norm": 0.4429018199443817,
      "learning_rate": 3.6597853231683617e-06,
      "loss": 0.0084,
      "step": 1944420
    },
    {
      "epoch": 3.182118706754908,
      "grad_norm": 1.0874758958816528,
      "learning_rate": 3.6597194309548444e-06,
      "loss": 0.0154,
      "step": 1944440
    },
    {
      "epoch": 3.182151437193561,
      "grad_norm": 0.32336515188217163,
      "learning_rate": 3.659653538741327e-06,
      "loss": 0.0083,
      "step": 1944460
    },
    {
      "epoch": 3.1821841676322147,
      "grad_norm": 0.06117357313632965,
      "learning_rate": 3.6595876465278103e-06,
      "loss": 0.0092,
      "step": 1944480
    },
    {
      "epoch": 3.182216898070868,
      "grad_norm": 0.09032994508743286,
      "learning_rate": 3.659521754314293e-06,
      "loss": 0.0111,
      "step": 1944500
    },
    {
      "epoch": 3.1822496285095214,
      "grad_norm": 0.3524787127971649,
      "learning_rate": 3.659455862100776e-06,
      "loss": 0.0173,
      "step": 1944520
    },
    {
      "epoch": 3.1822823589481746,
      "grad_norm": 0.29421013593673706,
      "learning_rate": 3.6593899698872585e-06,
      "loss": 0.0077,
      "step": 1944540
    },
    {
      "epoch": 3.182315089386828,
      "grad_norm": 1.1271438598632812,
      "learning_rate": 3.6593240776737417e-06,
      "loss": 0.0079,
      "step": 1944560
    },
    {
      "epoch": 3.1823478198254813,
      "grad_norm": 0.1541041135787964,
      "learning_rate": 3.6592581854602244e-06,
      "loss": 0.0169,
      "step": 1944580
    },
    {
      "epoch": 3.1823805502641345,
      "grad_norm": 0.3461429178714752,
      "learning_rate": 3.659192293246707e-06,
      "loss": 0.0108,
      "step": 1944600
    },
    {
      "epoch": 3.182413280702788,
      "grad_norm": 0.43032100796699524,
      "learning_rate": 3.65912640103319e-06,
      "loss": 0.0134,
      "step": 1944620
    },
    {
      "epoch": 3.182446011141441,
      "grad_norm": 0.3448014557361603,
      "learning_rate": 3.659060508819673e-06,
      "loss": 0.0136,
      "step": 1944640
    },
    {
      "epoch": 3.182478741580095,
      "grad_norm": 0.9460451006889343,
      "learning_rate": 3.658994616606156e-06,
      "loss": 0.0112,
      "step": 1944660
    },
    {
      "epoch": 3.182511472018748,
      "grad_norm": 0.4117222726345062,
      "learning_rate": 3.6589287243926386e-06,
      "loss": 0.012,
      "step": 1944680
    },
    {
      "epoch": 3.182544202457401,
      "grad_norm": 0.5589735507965088,
      "learning_rate": 3.6588628321791213e-06,
      "loss": 0.0149,
      "step": 1944700
    },
    {
      "epoch": 3.1825769328960547,
      "grad_norm": 0.12897197902202606,
      "learning_rate": 3.658796939965605e-06,
      "loss": 0.0127,
      "step": 1944720
    },
    {
      "epoch": 3.182609663334708,
      "grad_norm": 0.7555233836174011,
      "learning_rate": 3.6587310477520876e-06,
      "loss": 0.011,
      "step": 1944740
    },
    {
      "epoch": 3.1826423937733614,
      "grad_norm": 0.05941305682063103,
      "learning_rate": 3.6586651555385704e-06,
      "loss": 0.0083,
      "step": 1944760
    },
    {
      "epoch": 3.1826751242120146,
      "grad_norm": 0.22148272395133972,
      "learning_rate": 3.6585992633250535e-06,
      "loss": 0.0132,
      "step": 1944780
    },
    {
      "epoch": 3.182707854650668,
      "grad_norm": 0.22975808382034302,
      "learning_rate": 3.6585333711115363e-06,
      "loss": 0.0092,
      "step": 1944800
    },
    {
      "epoch": 3.1827405850893213,
      "grad_norm": 0.44166824221611023,
      "learning_rate": 3.658467478898019e-06,
      "loss": 0.0165,
      "step": 1944820
    },
    {
      "epoch": 3.1827733155279745,
      "grad_norm": 0.40971365571022034,
      "learning_rate": 3.6584015866845017e-06,
      "loss": 0.0111,
      "step": 1944840
    },
    {
      "epoch": 3.182806045966628,
      "grad_norm": 0.1317523568868637,
      "learning_rate": 3.6583356944709845e-06,
      "loss": 0.0184,
      "step": 1944860
    },
    {
      "epoch": 3.1828387764052812,
      "grad_norm": 0.4210568070411682,
      "learning_rate": 3.6582698022574677e-06,
      "loss": 0.0106,
      "step": 1944880
    },
    {
      "epoch": 3.182871506843935,
      "grad_norm": 0.14969967305660248,
      "learning_rate": 3.6582039100439504e-06,
      "loss": 0.0081,
      "step": 1944900
    },
    {
      "epoch": 3.182904237282588,
      "grad_norm": 0.15340180695056915,
      "learning_rate": 3.658138017830433e-06,
      "loss": 0.0108,
      "step": 1944920
    },
    {
      "epoch": 3.1829369677212416,
      "grad_norm": 0.20432685315608978,
      "learning_rate": 3.658072125616916e-06,
      "loss": 0.0128,
      "step": 1944940
    },
    {
      "epoch": 3.1829696981598947,
      "grad_norm": 0.3130912482738495,
      "learning_rate": 3.658006233403399e-06,
      "loss": 0.0135,
      "step": 1944960
    },
    {
      "epoch": 3.183002428598548,
      "grad_norm": 0.17064103484153748,
      "learning_rate": 3.6579403411898818e-06,
      "loss": 0.0124,
      "step": 1944980
    },
    {
      "epoch": 3.1830351590372015,
      "grad_norm": 0.20569376647472382,
      "learning_rate": 3.6578744489763645e-06,
      "loss": 0.0107,
      "step": 1945000
    },
    {
      "epoch": 3.1830678894758546,
      "grad_norm": 0.07869651168584824,
      "learning_rate": 3.6578085567628473e-06,
      "loss": 0.0105,
      "step": 1945020
    },
    {
      "epoch": 3.183100619914508,
      "grad_norm": 0.6473709344863892,
      "learning_rate": 3.6577426645493304e-06,
      "loss": 0.012,
      "step": 1945040
    },
    {
      "epoch": 3.1831333503531614,
      "grad_norm": 0.14670085906982422,
      "learning_rate": 3.6576767723358136e-06,
      "loss": 0.0085,
      "step": 1945060
    },
    {
      "epoch": 3.183166080791815,
      "grad_norm": 0.2226153016090393,
      "learning_rate": 3.6576108801222963e-06,
      "loss": 0.0124,
      "step": 1945080
    },
    {
      "epoch": 3.183198811230468,
      "grad_norm": 0.10007977485656738,
      "learning_rate": 3.6575449879087795e-06,
      "loss": 0.0122,
      "step": 1945100
    },
    {
      "epoch": 3.1832315416691213,
      "grad_norm": 0.31173357367515564,
      "learning_rate": 3.6574790956952622e-06,
      "loss": 0.0097,
      "step": 1945120
    },
    {
      "epoch": 3.183264272107775,
      "grad_norm": 0.11923792213201523,
      "learning_rate": 3.657413203481745e-06,
      "loss": 0.0092,
      "step": 1945140
    },
    {
      "epoch": 3.183297002546428,
      "grad_norm": 0.47894543409347534,
      "learning_rate": 3.6573473112682277e-06,
      "loss": 0.0103,
      "step": 1945160
    },
    {
      "epoch": 3.1833297329850816,
      "grad_norm": 0.40753304958343506,
      "learning_rate": 3.657281419054711e-06,
      "loss": 0.018,
      "step": 1945180
    },
    {
      "epoch": 3.1833624634237347,
      "grad_norm": 0.3277587890625,
      "learning_rate": 3.6572155268411936e-06,
      "loss": 0.0132,
      "step": 1945200
    },
    {
      "epoch": 3.1833951938623883,
      "grad_norm": 0.217838317155838,
      "learning_rate": 3.6571496346276763e-06,
      "loss": 0.0093,
      "step": 1945220
    },
    {
      "epoch": 3.1834279243010415,
      "grad_norm": 0.32549136877059937,
      "learning_rate": 3.657083742414159e-06,
      "loss": 0.0116,
      "step": 1945240
    },
    {
      "epoch": 3.1834606547396946,
      "grad_norm": 0.2519260048866272,
      "learning_rate": 3.6570178502006422e-06,
      "loss": 0.0088,
      "step": 1945260
    },
    {
      "epoch": 3.1834933851783482,
      "grad_norm": 0.04050208255648613,
      "learning_rate": 3.656951957987125e-06,
      "loss": 0.009,
      "step": 1945280
    },
    {
      "epoch": 3.1835261156170014,
      "grad_norm": 0.1128373071551323,
      "learning_rate": 3.6568860657736077e-06,
      "loss": 0.0112,
      "step": 1945300
    },
    {
      "epoch": 3.183558846055655,
      "grad_norm": 0.26375147700309753,
      "learning_rate": 3.6568201735600905e-06,
      "loss": 0.0075,
      "step": 1945320
    },
    {
      "epoch": 3.183591576494308,
      "grad_norm": 0.5513752102851868,
      "learning_rate": 3.656754281346573e-06,
      "loss": 0.0145,
      "step": 1945340
    },
    {
      "epoch": 3.1836243069329617,
      "grad_norm": 0.071273572742939,
      "learning_rate": 3.6566883891330564e-06,
      "loss": 0.0116,
      "step": 1945360
    },
    {
      "epoch": 3.183657037371615,
      "grad_norm": 0.41494041681289673,
      "learning_rate": 3.656622496919539e-06,
      "loss": 0.0082,
      "step": 1945380
    },
    {
      "epoch": 3.183689767810268,
      "grad_norm": 0.2920733690261841,
      "learning_rate": 3.656556604706022e-06,
      "loss": 0.0188,
      "step": 1945400
    },
    {
      "epoch": 3.1837224982489216,
      "grad_norm": 0.1811603456735611,
      "learning_rate": 3.6564907124925054e-06,
      "loss": 0.0163,
      "step": 1945420
    },
    {
      "epoch": 3.1837552286875748,
      "grad_norm": 0.7239280939102173,
      "learning_rate": 3.656424820278988e-06,
      "loss": 0.0177,
      "step": 1945440
    },
    {
      "epoch": 3.1837879591262284,
      "grad_norm": 0.3795344829559326,
      "learning_rate": 3.656358928065471e-06,
      "loss": 0.0108,
      "step": 1945460
    },
    {
      "epoch": 3.1838206895648815,
      "grad_norm": 0.24070651829242706,
      "learning_rate": 3.6562930358519537e-06,
      "loss": 0.0143,
      "step": 1945480
    },
    {
      "epoch": 3.183853420003535,
      "grad_norm": 0.402817040681839,
      "learning_rate": 3.656227143638437e-06,
      "loss": 0.0125,
      "step": 1945500
    },
    {
      "epoch": 3.1838861504421883,
      "grad_norm": 0.25803372263908386,
      "learning_rate": 3.6561612514249196e-06,
      "loss": 0.0169,
      "step": 1945520
    },
    {
      "epoch": 3.1839188808808414,
      "grad_norm": 0.8692820072174072,
      "learning_rate": 3.6560953592114023e-06,
      "loss": 0.0082,
      "step": 1945540
    },
    {
      "epoch": 3.183951611319495,
      "grad_norm": 0.23746070265769958,
      "learning_rate": 3.656029466997885e-06,
      "loss": 0.0132,
      "step": 1945560
    },
    {
      "epoch": 3.183984341758148,
      "grad_norm": 0.3556641936302185,
      "learning_rate": 3.655963574784368e-06,
      "loss": 0.0095,
      "step": 1945580
    },
    {
      "epoch": 3.1840170721968017,
      "grad_norm": 0.30092474818229675,
      "learning_rate": 3.655897682570851e-06,
      "loss": 0.0138,
      "step": 1945600
    },
    {
      "epoch": 3.184049802635455,
      "grad_norm": 0.37264373898506165,
      "learning_rate": 3.6558317903573337e-06,
      "loss": 0.0131,
      "step": 1945620
    },
    {
      "epoch": 3.1840825330741085,
      "grad_norm": 0.09850041568279266,
      "learning_rate": 3.6557658981438164e-06,
      "loss": 0.0118,
      "step": 1945640
    },
    {
      "epoch": 3.1841152635127616,
      "grad_norm": 0.1485816091299057,
      "learning_rate": 3.6557000059302996e-06,
      "loss": 0.0092,
      "step": 1945660
    },
    {
      "epoch": 3.184147993951415,
      "grad_norm": 0.2816724181175232,
      "learning_rate": 3.6556341137167823e-06,
      "loss": 0.0117,
      "step": 1945680
    },
    {
      "epoch": 3.1841807243900684,
      "grad_norm": 0.530570387840271,
      "learning_rate": 3.655568221503265e-06,
      "loss": 0.008,
      "step": 1945700
    },
    {
      "epoch": 3.1842134548287215,
      "grad_norm": 0.04260808974504471,
      "learning_rate": 3.655502329289748e-06,
      "loss": 0.0056,
      "step": 1945720
    },
    {
      "epoch": 3.184246185267375,
      "grad_norm": 0.4374910891056061,
      "learning_rate": 3.6554364370762305e-06,
      "loss": 0.01,
      "step": 1945740
    },
    {
      "epoch": 3.1842789157060283,
      "grad_norm": 0.37947478890419006,
      "learning_rate": 3.655370544862714e-06,
      "loss": 0.0121,
      "step": 1945760
    },
    {
      "epoch": 3.1843116461446814,
      "grad_norm": 0.3952324688434601,
      "learning_rate": 3.655304652649197e-06,
      "loss": 0.0097,
      "step": 1945780
    },
    {
      "epoch": 3.184344376583335,
      "grad_norm": 0.14444434642791748,
      "learning_rate": 3.65523876043568e-06,
      "loss": 0.0091,
      "step": 1945800
    },
    {
      "epoch": 3.184377107021988,
      "grad_norm": 0.36861947178840637,
      "learning_rate": 3.6551728682221628e-06,
      "loss": 0.0103,
      "step": 1945820
    },
    {
      "epoch": 3.1844098374606418,
      "grad_norm": 0.05291958898305893,
      "learning_rate": 3.6551069760086455e-06,
      "loss": 0.0091,
      "step": 1945840
    },
    {
      "epoch": 3.184442567899295,
      "grad_norm": 0.38537827134132385,
      "learning_rate": 3.6550410837951283e-06,
      "loss": 0.0102,
      "step": 1945860
    },
    {
      "epoch": 3.1844752983379485,
      "grad_norm": 0.6609034538269043,
      "learning_rate": 3.654975191581611e-06,
      "loss": 0.009,
      "step": 1945880
    },
    {
      "epoch": 3.1845080287766017,
      "grad_norm": 0.16946709156036377,
      "learning_rate": 3.654909299368094e-06,
      "loss": 0.0079,
      "step": 1945900
    },
    {
      "epoch": 3.184540759215255,
      "grad_norm": 0.4460918605327606,
      "learning_rate": 3.654843407154577e-06,
      "loss": 0.0114,
      "step": 1945920
    },
    {
      "epoch": 3.1845734896539084,
      "grad_norm": 0.21219122409820557,
      "learning_rate": 3.6547775149410596e-06,
      "loss": 0.0092,
      "step": 1945940
    },
    {
      "epoch": 3.1846062200925616,
      "grad_norm": 0.21160507202148438,
      "learning_rate": 3.6547116227275424e-06,
      "loss": 0.0166,
      "step": 1945960
    },
    {
      "epoch": 3.184638950531215,
      "grad_norm": 0.2403436005115509,
      "learning_rate": 3.6546457305140255e-06,
      "loss": 0.0078,
      "step": 1945980
    },
    {
      "epoch": 3.1846716809698683,
      "grad_norm": 0.5053282380104065,
      "learning_rate": 3.6545798383005083e-06,
      "loss": 0.0104,
      "step": 1946000
    },
    {
      "epoch": 3.184704411408522,
      "grad_norm": 0.043839965015649796,
      "learning_rate": 3.654513946086991e-06,
      "loss": 0.0107,
      "step": 1946020
    },
    {
      "epoch": 3.184737141847175,
      "grad_norm": 0.17823456227779388,
      "learning_rate": 3.6544480538734738e-06,
      "loss": 0.0175,
      "step": 1946040
    },
    {
      "epoch": 3.184769872285828,
      "grad_norm": 0.4757488965988159,
      "learning_rate": 3.654382161659957e-06,
      "loss": 0.0087,
      "step": 1946060
    },
    {
      "epoch": 3.184802602724482,
      "grad_norm": 0.47763901948928833,
      "learning_rate": 3.6543162694464397e-06,
      "loss": 0.0168,
      "step": 1946080
    },
    {
      "epoch": 3.184835333163135,
      "grad_norm": 0.35969439148902893,
      "learning_rate": 3.6542503772329224e-06,
      "loss": 0.0106,
      "step": 1946100
    },
    {
      "epoch": 3.1848680636017885,
      "grad_norm": 0.2578252851963043,
      "learning_rate": 3.654184485019406e-06,
      "loss": 0.0104,
      "step": 1946120
    },
    {
      "epoch": 3.1849007940404417,
      "grad_norm": 0.05816793069243431,
      "learning_rate": 3.6541185928058887e-06,
      "loss": 0.0101,
      "step": 1946140
    },
    {
      "epoch": 3.184933524479095,
      "grad_norm": 0.6998894214630127,
      "learning_rate": 3.6540527005923715e-06,
      "loss": 0.0185,
      "step": 1946160
    },
    {
      "epoch": 3.1849662549177484,
      "grad_norm": 0.5131332278251648,
      "learning_rate": 3.653986808378854e-06,
      "loss": 0.0121,
      "step": 1946180
    },
    {
      "epoch": 3.1849989853564016,
      "grad_norm": 0.42219144105911255,
      "learning_rate": 3.6539209161653374e-06,
      "loss": 0.0132,
      "step": 1946200
    },
    {
      "epoch": 3.185031715795055,
      "grad_norm": 0.149438738822937,
      "learning_rate": 3.65385502395182e-06,
      "loss": 0.0192,
      "step": 1946220
    },
    {
      "epoch": 3.1850644462337083,
      "grad_norm": 0.1863064467906952,
      "learning_rate": 3.653789131738303e-06,
      "loss": 0.0133,
      "step": 1946240
    },
    {
      "epoch": 3.185097176672362,
      "grad_norm": 0.2309177815914154,
      "learning_rate": 3.6537232395247856e-06,
      "loss": 0.0092,
      "step": 1946260
    },
    {
      "epoch": 3.185129907111015,
      "grad_norm": 0.17856274545192719,
      "learning_rate": 3.6536573473112683e-06,
      "loss": 0.0125,
      "step": 1946280
    },
    {
      "epoch": 3.185162637549668,
      "grad_norm": 0.27766966819763184,
      "learning_rate": 3.6535914550977515e-06,
      "loss": 0.0122,
      "step": 1946300
    },
    {
      "epoch": 3.185195367988322,
      "grad_norm": 0.2615678310394287,
      "learning_rate": 3.6535255628842342e-06,
      "loss": 0.0115,
      "step": 1946320
    },
    {
      "epoch": 3.185228098426975,
      "grad_norm": 0.12161903828382492,
      "learning_rate": 3.653459670670717e-06,
      "loss": 0.016,
      "step": 1946340
    },
    {
      "epoch": 3.1852608288656286,
      "grad_norm": 0.39788392186164856,
      "learning_rate": 3.6533937784571997e-06,
      "loss": 0.0088,
      "step": 1946360
    },
    {
      "epoch": 3.1852935593042817,
      "grad_norm": 0.12811803817749023,
      "learning_rate": 3.653327886243683e-06,
      "loss": 0.0163,
      "step": 1946380
    },
    {
      "epoch": 3.1853262897429353,
      "grad_norm": 0.49301812052726746,
      "learning_rate": 3.6532619940301656e-06,
      "loss": 0.0136,
      "step": 1946400
    },
    {
      "epoch": 3.1853590201815885,
      "grad_norm": 0.5952073335647583,
      "learning_rate": 3.6531961018166484e-06,
      "loss": 0.0113,
      "step": 1946420
    },
    {
      "epoch": 3.1853917506202416,
      "grad_norm": 0.2752639353275299,
      "learning_rate": 3.653130209603131e-06,
      "loss": 0.009,
      "step": 1946440
    },
    {
      "epoch": 3.185424481058895,
      "grad_norm": 0.1454431265592575,
      "learning_rate": 3.6530643173896147e-06,
      "loss": 0.0096,
      "step": 1946460
    },
    {
      "epoch": 3.1854572114975483,
      "grad_norm": 0.46608754992485046,
      "learning_rate": 3.6529984251760974e-06,
      "loss": 0.0148,
      "step": 1946480
    },
    {
      "epoch": 3.185489941936202,
      "grad_norm": 0.21319958567619324,
      "learning_rate": 3.65293253296258e-06,
      "loss": 0.0103,
      "step": 1946500
    },
    {
      "epoch": 3.185522672374855,
      "grad_norm": 0.3461281955242157,
      "learning_rate": 3.6528666407490633e-06,
      "loss": 0.0146,
      "step": 1946520
    },
    {
      "epoch": 3.1855554028135087,
      "grad_norm": 0.3585737645626068,
      "learning_rate": 3.652800748535546e-06,
      "loss": 0.009,
      "step": 1946540
    },
    {
      "epoch": 3.185588133252162,
      "grad_norm": 0.2748296558856964,
      "learning_rate": 3.652734856322029e-06,
      "loss": 0.0111,
      "step": 1946560
    },
    {
      "epoch": 3.185620863690815,
      "grad_norm": 0.200198695063591,
      "learning_rate": 3.6526689641085115e-06,
      "loss": 0.0128,
      "step": 1946580
    },
    {
      "epoch": 3.1856535941294686,
      "grad_norm": 0.36023059487342834,
      "learning_rate": 3.6526030718949947e-06,
      "loss": 0.0101,
      "step": 1946600
    },
    {
      "epoch": 3.1856863245681217,
      "grad_norm": 0.7010200619697571,
      "learning_rate": 3.6525371796814774e-06,
      "loss": 0.0114,
      "step": 1946620
    },
    {
      "epoch": 3.1857190550067753,
      "grad_norm": 0.4253086447715759,
      "learning_rate": 3.65247128746796e-06,
      "loss": 0.009,
      "step": 1946640
    },
    {
      "epoch": 3.1857517854454285,
      "grad_norm": 0.16552391648292542,
      "learning_rate": 3.652405395254443e-06,
      "loss": 0.0224,
      "step": 1946660
    },
    {
      "epoch": 3.185784515884082,
      "grad_norm": 0.12660762667655945,
      "learning_rate": 3.652339503040926e-06,
      "loss": 0.0081,
      "step": 1946680
    },
    {
      "epoch": 3.185817246322735,
      "grad_norm": 0.4642574191093445,
      "learning_rate": 3.652273610827409e-06,
      "loss": 0.013,
      "step": 1946700
    },
    {
      "epoch": 3.1858499767613884,
      "grad_norm": 0.078068807721138,
      "learning_rate": 3.6522077186138916e-06,
      "loss": 0.0086,
      "step": 1946720
    },
    {
      "epoch": 3.185882707200042,
      "grad_norm": 0.6120539903640747,
      "learning_rate": 3.6521418264003743e-06,
      "loss": 0.0126,
      "step": 1946740
    },
    {
      "epoch": 3.185915437638695,
      "grad_norm": 0.38078200817108154,
      "learning_rate": 3.652075934186857e-06,
      "loss": 0.0146,
      "step": 1946760
    },
    {
      "epoch": 3.1859481680773487,
      "grad_norm": 0.5875462293624878,
      "learning_rate": 3.65201004197334e-06,
      "loss": 0.0084,
      "step": 1946780
    },
    {
      "epoch": 3.185980898516002,
      "grad_norm": 0.38476109504699707,
      "learning_rate": 3.651944149759823e-06,
      "loss": 0.0127,
      "step": 1946800
    },
    {
      "epoch": 3.1860136289546555,
      "grad_norm": 0.08947520703077316,
      "learning_rate": 3.651878257546306e-06,
      "loss": 0.0143,
      "step": 1946820
    },
    {
      "epoch": 3.1860463593933086,
      "grad_norm": 0.363995760679245,
      "learning_rate": 3.6518123653327893e-06,
      "loss": 0.0139,
      "step": 1946840
    },
    {
      "epoch": 3.1860790898319618,
      "grad_norm": 0.18496547639369965,
      "learning_rate": 3.651746473119272e-06,
      "loss": 0.0128,
      "step": 1946860
    },
    {
      "epoch": 3.1861118202706153,
      "grad_norm": 0.22342009842395782,
      "learning_rate": 3.6516805809057548e-06,
      "loss": 0.0122,
      "step": 1946880
    },
    {
      "epoch": 3.1861445507092685,
      "grad_norm": 0.1720658391714096,
      "learning_rate": 3.6516146886922375e-06,
      "loss": 0.0109,
      "step": 1946900
    },
    {
      "epoch": 3.186177281147922,
      "grad_norm": 0.2600005865097046,
      "learning_rate": 3.6515487964787207e-06,
      "loss": 0.0105,
      "step": 1946920
    },
    {
      "epoch": 3.1862100115865752,
      "grad_norm": 0.1939813643693924,
      "learning_rate": 3.6514829042652034e-06,
      "loss": 0.0128,
      "step": 1946940
    },
    {
      "epoch": 3.186242742025229,
      "grad_norm": 0.2582995891571045,
      "learning_rate": 3.651417012051686e-06,
      "loss": 0.0075,
      "step": 1946960
    },
    {
      "epoch": 3.186275472463882,
      "grad_norm": 0.18881961703300476,
      "learning_rate": 3.651351119838169e-06,
      "loss": 0.0108,
      "step": 1946980
    },
    {
      "epoch": 3.186308202902535,
      "grad_norm": 0.25865989923477173,
      "learning_rate": 3.651285227624652e-06,
      "loss": 0.0075,
      "step": 1947000
    },
    {
      "epoch": 3.1863409333411887,
      "grad_norm": 0.3112901449203491,
      "learning_rate": 3.6512193354111348e-06,
      "loss": 0.0081,
      "step": 1947020
    },
    {
      "epoch": 3.186373663779842,
      "grad_norm": 0.3039547801017761,
      "learning_rate": 3.6511534431976175e-06,
      "loss": 0.0131,
      "step": 1947040
    },
    {
      "epoch": 3.1864063942184955,
      "grad_norm": 0.3734196126461029,
      "learning_rate": 3.6510875509841003e-06,
      "loss": 0.0095,
      "step": 1947060
    },
    {
      "epoch": 3.1864391246571486,
      "grad_norm": 0.4109598994255066,
      "learning_rate": 3.6510216587705834e-06,
      "loss": 0.0177,
      "step": 1947080
    },
    {
      "epoch": 3.186471855095802,
      "grad_norm": 0.18778139352798462,
      "learning_rate": 3.650955766557066e-06,
      "loss": 0.0146,
      "step": 1947100
    },
    {
      "epoch": 3.1865045855344554,
      "grad_norm": 0.3708546757698059,
      "learning_rate": 3.650889874343549e-06,
      "loss": 0.0125,
      "step": 1947120
    },
    {
      "epoch": 3.1865373159731085,
      "grad_norm": 0.25710323452949524,
      "learning_rate": 3.6508239821300316e-06,
      "loss": 0.0127,
      "step": 1947140
    },
    {
      "epoch": 3.186570046411762,
      "grad_norm": 0.09850681573152542,
      "learning_rate": 3.6507580899165144e-06,
      "loss": 0.01,
      "step": 1947160
    },
    {
      "epoch": 3.1866027768504153,
      "grad_norm": 0.27457481622695923,
      "learning_rate": 3.650692197702998e-06,
      "loss": 0.0126,
      "step": 1947180
    },
    {
      "epoch": 3.186635507289069,
      "grad_norm": 0.21009168028831482,
      "learning_rate": 3.6506263054894807e-06,
      "loss": 0.0109,
      "step": 1947200
    },
    {
      "epoch": 3.186668237727722,
      "grad_norm": 0.17630572617053986,
      "learning_rate": 3.650560413275964e-06,
      "loss": 0.014,
      "step": 1947220
    },
    {
      "epoch": 3.186700968166375,
      "grad_norm": 0.33554336428642273,
      "learning_rate": 3.6504945210624466e-06,
      "loss": 0.0141,
      "step": 1947240
    },
    {
      "epoch": 3.1867336986050288,
      "grad_norm": 0.3050884008407593,
      "learning_rate": 3.6504286288489294e-06,
      "loss": 0.0114,
      "step": 1947260
    },
    {
      "epoch": 3.186766429043682,
      "grad_norm": 0.12445051968097687,
      "learning_rate": 3.650362736635412e-06,
      "loss": 0.0149,
      "step": 1947280
    },
    {
      "epoch": 3.1867991594823355,
      "grad_norm": 0.5217300057411194,
      "learning_rate": 3.650296844421895e-06,
      "loss": 0.0105,
      "step": 1947300
    },
    {
      "epoch": 3.1868318899209886,
      "grad_norm": 0.34716692566871643,
      "learning_rate": 3.650230952208378e-06,
      "loss": 0.0113,
      "step": 1947320
    },
    {
      "epoch": 3.1868646203596422,
      "grad_norm": 0.28317564725875854,
      "learning_rate": 3.6501650599948607e-06,
      "loss": 0.011,
      "step": 1947340
    },
    {
      "epoch": 3.1868973507982954,
      "grad_norm": 0.08166661858558655,
      "learning_rate": 3.6500991677813435e-06,
      "loss": 0.014,
      "step": 1947360
    },
    {
      "epoch": 3.1869300812369485,
      "grad_norm": 0.29752838611602783,
      "learning_rate": 3.6500332755678262e-06,
      "loss": 0.0072,
      "step": 1947380
    },
    {
      "epoch": 3.186962811675602,
      "grad_norm": 0.4158582091331482,
      "learning_rate": 3.6499673833543094e-06,
      "loss": 0.0198,
      "step": 1947400
    },
    {
      "epoch": 3.1869955421142553,
      "grad_norm": 0.10975213348865509,
      "learning_rate": 3.649901491140792e-06,
      "loss": 0.0124,
      "step": 1947420
    },
    {
      "epoch": 3.187028272552909,
      "grad_norm": 0.10402857512235641,
      "learning_rate": 3.649835598927275e-06,
      "loss": 0.0093,
      "step": 1947440
    },
    {
      "epoch": 3.187061002991562,
      "grad_norm": 0.35584336519241333,
      "learning_rate": 3.6497697067137576e-06,
      "loss": 0.0058,
      "step": 1947460
    },
    {
      "epoch": 3.1870937334302156,
      "grad_norm": 1.0920530557632446,
      "learning_rate": 3.6497038145002408e-06,
      "loss": 0.012,
      "step": 1947480
    },
    {
      "epoch": 3.1871264638688688,
      "grad_norm": 0.387478768825531,
      "learning_rate": 3.6496379222867235e-06,
      "loss": 0.0108,
      "step": 1947500
    },
    {
      "epoch": 3.187159194307522,
      "grad_norm": 0.28750428557395935,
      "learning_rate": 3.6495720300732067e-06,
      "loss": 0.0122,
      "step": 1947520
    },
    {
      "epoch": 3.1871919247461755,
      "grad_norm": 0.19454284012317657,
      "learning_rate": 3.64950613785969e-06,
      "loss": 0.0103,
      "step": 1947540
    },
    {
      "epoch": 3.1872246551848287,
      "grad_norm": 0.4069737493991852,
      "learning_rate": 3.6494402456461726e-06,
      "loss": 0.0095,
      "step": 1947560
    },
    {
      "epoch": 3.1872573856234823,
      "grad_norm": 0.18474464118480682,
      "learning_rate": 3.6493743534326553e-06,
      "loss": 0.0093,
      "step": 1947580
    },
    {
      "epoch": 3.1872901160621354,
      "grad_norm": 0.1928011178970337,
      "learning_rate": 3.649308461219138e-06,
      "loss": 0.0109,
      "step": 1947600
    },
    {
      "epoch": 3.187322846500789,
      "grad_norm": 0.1737380474805832,
      "learning_rate": 3.649242569005621e-06,
      "loss": 0.0104,
      "step": 1947620
    },
    {
      "epoch": 3.187355576939442,
      "grad_norm": 0.4706156849861145,
      "learning_rate": 3.649176676792104e-06,
      "loss": 0.0101,
      "step": 1947640
    },
    {
      "epoch": 3.1873883073780953,
      "grad_norm": 0.12870655953884125,
      "learning_rate": 3.6491107845785867e-06,
      "loss": 0.0113,
      "step": 1947660
    },
    {
      "epoch": 3.187421037816749,
      "grad_norm": 0.19435186684131622,
      "learning_rate": 3.6490448923650694e-06,
      "loss": 0.0123,
      "step": 1947680
    },
    {
      "epoch": 3.187453768255402,
      "grad_norm": 0.5056590437889099,
      "learning_rate": 3.648979000151552e-06,
      "loss": 0.0107,
      "step": 1947700
    },
    {
      "epoch": 3.1874864986940556,
      "grad_norm": 0.1422896534204483,
      "learning_rate": 3.6489131079380353e-06,
      "loss": 0.0116,
      "step": 1947720
    },
    {
      "epoch": 3.187519229132709,
      "grad_norm": 0.47148874402046204,
      "learning_rate": 3.648847215724518e-06,
      "loss": 0.0181,
      "step": 1947740
    },
    {
      "epoch": 3.187551959571362,
      "grad_norm": 0.3817805051803589,
      "learning_rate": 3.648781323511001e-06,
      "loss": 0.0068,
      "step": 1947760
    },
    {
      "epoch": 3.1875846900100155,
      "grad_norm": 0.8745242357254028,
      "learning_rate": 3.6487154312974836e-06,
      "loss": 0.0115,
      "step": 1947780
    },
    {
      "epoch": 3.1876174204486687,
      "grad_norm": 0.19936150312423706,
      "learning_rate": 3.6486495390839667e-06,
      "loss": 0.0092,
      "step": 1947800
    },
    {
      "epoch": 3.1876501508873223,
      "grad_norm": 0.1434943526983261,
      "learning_rate": 3.6485836468704495e-06,
      "loss": 0.0127,
      "step": 1947820
    },
    {
      "epoch": 3.1876828813259754,
      "grad_norm": 0.11095835268497467,
      "learning_rate": 3.648517754656932e-06,
      "loss": 0.012,
      "step": 1947840
    },
    {
      "epoch": 3.187715611764629,
      "grad_norm": 0.34619176387786865,
      "learning_rate": 3.648451862443415e-06,
      "loss": 0.0225,
      "step": 1947860
    },
    {
      "epoch": 3.187748342203282,
      "grad_norm": 0.15432773530483246,
      "learning_rate": 3.6483859702298985e-06,
      "loss": 0.0121,
      "step": 1947880
    },
    {
      "epoch": 3.1877810726419353,
      "grad_norm": 0.6889492869377136,
      "learning_rate": 3.6483200780163813e-06,
      "loss": 0.0173,
      "step": 1947900
    },
    {
      "epoch": 3.187813803080589,
      "grad_norm": 0.2795642018318176,
      "learning_rate": 3.648254185802864e-06,
      "loss": 0.0123,
      "step": 1947920
    },
    {
      "epoch": 3.187846533519242,
      "grad_norm": 0.09962552040815353,
      "learning_rate": 3.648188293589347e-06,
      "loss": 0.0139,
      "step": 1947940
    },
    {
      "epoch": 3.1878792639578957,
      "grad_norm": 0.42150333523750305,
      "learning_rate": 3.64812240137583e-06,
      "loss": 0.0073,
      "step": 1947960
    },
    {
      "epoch": 3.187911994396549,
      "grad_norm": 0.26420554518699646,
      "learning_rate": 3.6480565091623126e-06,
      "loss": 0.0116,
      "step": 1947980
    },
    {
      "epoch": 3.1879447248352024,
      "grad_norm": 1.9077067375183105,
      "learning_rate": 3.6479906169487954e-06,
      "loss": 0.0148,
      "step": 1948000
    },
    {
      "epoch": 3.1879774552738556,
      "grad_norm": 0.07225323468446732,
      "learning_rate": 3.6479247247352785e-06,
      "loss": 0.0094,
      "step": 1948020
    },
    {
      "epoch": 3.1880101857125087,
      "grad_norm": 0.11227072030305862,
      "learning_rate": 3.6478588325217613e-06,
      "loss": 0.0109,
      "step": 1948040
    },
    {
      "epoch": 3.1880429161511623,
      "grad_norm": 0.47751596570014954,
      "learning_rate": 3.647792940308244e-06,
      "loss": 0.0119,
      "step": 1948060
    },
    {
      "epoch": 3.1880756465898155,
      "grad_norm": 0.5051789879798889,
      "learning_rate": 3.6477270480947268e-06,
      "loss": 0.0136,
      "step": 1948080
    },
    {
      "epoch": 3.188108377028469,
      "grad_norm": 0.22258560359477997,
      "learning_rate": 3.64766115588121e-06,
      "loss": 0.011,
      "step": 1948100
    },
    {
      "epoch": 3.188141107467122,
      "grad_norm": 0.14397946000099182,
      "learning_rate": 3.6475952636676927e-06,
      "loss": 0.0131,
      "step": 1948120
    },
    {
      "epoch": 3.188173837905776,
      "grad_norm": 0.6061395406723022,
      "learning_rate": 3.6475293714541754e-06,
      "loss": 0.0102,
      "step": 1948140
    },
    {
      "epoch": 3.188206568344429,
      "grad_norm": 0.2404438853263855,
      "learning_rate": 3.647463479240658e-06,
      "loss": 0.0099,
      "step": 1948160
    },
    {
      "epoch": 3.188239298783082,
      "grad_norm": 0.4004625678062439,
      "learning_rate": 3.647397587027141e-06,
      "loss": 0.0106,
      "step": 1948180
    },
    {
      "epoch": 3.1882720292217357,
      "grad_norm": 0.1620776653289795,
      "learning_rate": 3.647331694813624e-06,
      "loss": 0.0153,
      "step": 1948200
    },
    {
      "epoch": 3.188304759660389,
      "grad_norm": 0.18943609297275543,
      "learning_rate": 3.6472658026001072e-06,
      "loss": 0.0152,
      "step": 1948220
    },
    {
      "epoch": 3.1883374900990424,
      "grad_norm": 0.08981061726808548,
      "learning_rate": 3.64719991038659e-06,
      "loss": 0.0126,
      "step": 1948240
    },
    {
      "epoch": 3.1883702205376956,
      "grad_norm": 0.3005129396915436,
      "learning_rate": 3.647134018173073e-06,
      "loss": 0.0136,
      "step": 1948260
    },
    {
      "epoch": 3.188402950976349,
      "grad_norm": 0.10167142748832703,
      "learning_rate": 3.647068125959556e-06,
      "loss": 0.0091,
      "step": 1948280
    },
    {
      "epoch": 3.1884356814150023,
      "grad_norm": 0.1442616581916809,
      "learning_rate": 3.6470022337460386e-06,
      "loss": 0.0103,
      "step": 1948300
    },
    {
      "epoch": 3.1884684118536555,
      "grad_norm": 0.20770786702632904,
      "learning_rate": 3.6469363415325213e-06,
      "loss": 0.0121,
      "step": 1948320
    },
    {
      "epoch": 3.188501142292309,
      "grad_norm": 0.3617496192455292,
      "learning_rate": 3.6468704493190045e-06,
      "loss": 0.0125,
      "step": 1948340
    },
    {
      "epoch": 3.1885338727309622,
      "grad_norm": 0.10883410274982452,
      "learning_rate": 3.6468045571054872e-06,
      "loss": 0.0109,
      "step": 1948360
    },
    {
      "epoch": 3.188566603169616,
      "grad_norm": 0.045513346791267395,
      "learning_rate": 3.64673866489197e-06,
      "loss": 0.0119,
      "step": 1948380
    },
    {
      "epoch": 3.188599333608269,
      "grad_norm": 0.35701805353164673,
      "learning_rate": 3.6466727726784527e-06,
      "loss": 0.0109,
      "step": 1948400
    },
    {
      "epoch": 3.1886320640469226,
      "grad_norm": 0.26438847184181213,
      "learning_rate": 3.646606880464936e-06,
      "loss": 0.011,
      "step": 1948420
    },
    {
      "epoch": 3.1886647944855757,
      "grad_norm": 0.20416930317878723,
      "learning_rate": 3.6465409882514186e-06,
      "loss": 0.0101,
      "step": 1948440
    },
    {
      "epoch": 3.188697524924229,
      "grad_norm": 0.3832487165927887,
      "learning_rate": 3.6464750960379014e-06,
      "loss": 0.0116,
      "step": 1948460
    },
    {
      "epoch": 3.1887302553628825,
      "grad_norm": 0.2741212248802185,
      "learning_rate": 3.646409203824384e-06,
      "loss": 0.0094,
      "step": 1948480
    },
    {
      "epoch": 3.1887629858015356,
      "grad_norm": 0.5637410879135132,
      "learning_rate": 3.6463433116108673e-06,
      "loss": 0.0129,
      "step": 1948500
    },
    {
      "epoch": 3.188795716240189,
      "grad_norm": 0.4434507191181183,
      "learning_rate": 3.64627741939735e-06,
      "loss": 0.0094,
      "step": 1948520
    },
    {
      "epoch": 3.1888284466788424,
      "grad_norm": 0.5359565615653992,
      "learning_rate": 3.6462115271838327e-06,
      "loss": 0.0105,
      "step": 1948540
    },
    {
      "epoch": 3.188861177117496,
      "grad_norm": 0.10280134528875351,
      "learning_rate": 3.6461456349703155e-06,
      "loss": 0.014,
      "step": 1948560
    },
    {
      "epoch": 3.188893907556149,
      "grad_norm": 0.32628437876701355,
      "learning_rate": 3.646079742756799e-06,
      "loss": 0.0128,
      "step": 1948580
    },
    {
      "epoch": 3.1889266379948022,
      "grad_norm": 0.17875482141971588,
      "learning_rate": 3.646013850543282e-06,
      "loss": 0.011,
      "step": 1948600
    },
    {
      "epoch": 3.188959368433456,
      "grad_norm": 0.07820416241884232,
      "learning_rate": 3.6459479583297645e-06,
      "loss": 0.0098,
      "step": 1948620
    },
    {
      "epoch": 3.188992098872109,
      "grad_norm": 0.294689804315567,
      "learning_rate": 3.6458820661162477e-06,
      "loss": 0.0147,
      "step": 1948640
    },
    {
      "epoch": 3.1890248293107626,
      "grad_norm": 0.4598517417907715,
      "learning_rate": 3.6458161739027305e-06,
      "loss": 0.0082,
      "step": 1948660
    },
    {
      "epoch": 3.1890575597494157,
      "grad_norm": 0.10970757156610489,
      "learning_rate": 3.645750281689213e-06,
      "loss": 0.0097,
      "step": 1948680
    },
    {
      "epoch": 3.1890902901880693,
      "grad_norm": 0.37822791934013367,
      "learning_rate": 3.645684389475696e-06,
      "loss": 0.0121,
      "step": 1948700
    },
    {
      "epoch": 3.1891230206267225,
      "grad_norm": 0.05999099835753441,
      "learning_rate": 3.6456184972621787e-06,
      "loss": 0.0112,
      "step": 1948720
    },
    {
      "epoch": 3.1891557510653756,
      "grad_norm": 0.39446187019348145,
      "learning_rate": 3.645552605048662e-06,
      "loss": 0.0168,
      "step": 1948740
    },
    {
      "epoch": 3.1891884815040292,
      "grad_norm": 0.2235272079706192,
      "learning_rate": 3.6454867128351446e-06,
      "loss": 0.0114,
      "step": 1948760
    },
    {
      "epoch": 3.1892212119426824,
      "grad_norm": 0.5193189978599548,
      "learning_rate": 3.6454208206216273e-06,
      "loss": 0.0075,
      "step": 1948780
    },
    {
      "epoch": 3.189253942381336,
      "grad_norm": 0.3604370057582855,
      "learning_rate": 3.64535492840811e-06,
      "loss": 0.0097,
      "step": 1948800
    },
    {
      "epoch": 3.189286672819989,
      "grad_norm": 0.6825451254844666,
      "learning_rate": 3.6452890361945932e-06,
      "loss": 0.0159,
      "step": 1948820
    },
    {
      "epoch": 3.1893194032586423,
      "grad_norm": 0.23715424537658691,
      "learning_rate": 3.645223143981076e-06,
      "loss": 0.0116,
      "step": 1948840
    },
    {
      "epoch": 3.189352133697296,
      "grad_norm": 0.22937358915805817,
      "learning_rate": 3.6451572517675587e-06,
      "loss": 0.0115,
      "step": 1948860
    },
    {
      "epoch": 3.189384864135949,
      "grad_norm": 0.523453414440155,
      "learning_rate": 3.6450913595540414e-06,
      "loss": 0.0118,
      "step": 1948880
    },
    {
      "epoch": 3.1894175945746026,
      "grad_norm": 0.5324106216430664,
      "learning_rate": 3.6450254673405246e-06,
      "loss": 0.0114,
      "step": 1948900
    },
    {
      "epoch": 3.1894503250132558,
      "grad_norm": 0.3771224319934845,
      "learning_rate": 3.6449595751270073e-06,
      "loss": 0.0181,
      "step": 1948920
    },
    {
      "epoch": 3.1894830554519094,
      "grad_norm": 0.38083136081695557,
      "learning_rate": 3.6448936829134905e-06,
      "loss": 0.0116,
      "step": 1948940
    },
    {
      "epoch": 3.1895157858905625,
      "grad_norm": 0.32670390605926514,
      "learning_rate": 3.6448277906999737e-06,
      "loss": 0.0095,
      "step": 1948960
    },
    {
      "epoch": 3.1895485163292157,
      "grad_norm": 0.15674948692321777,
      "learning_rate": 3.6447618984864564e-06,
      "loss": 0.0142,
      "step": 1948980
    },
    {
      "epoch": 3.1895812467678692,
      "grad_norm": 0.28291964530944824,
      "learning_rate": 3.644696006272939e-06,
      "loss": 0.014,
      "step": 1949000
    },
    {
      "epoch": 3.1896139772065224,
      "grad_norm": 0.1914779543876648,
      "learning_rate": 3.644630114059422e-06,
      "loss": 0.0078,
      "step": 1949020
    },
    {
      "epoch": 3.189646707645176,
      "grad_norm": 0.20715972781181335,
      "learning_rate": 3.644564221845905e-06,
      "loss": 0.0134,
      "step": 1949040
    },
    {
      "epoch": 3.189679438083829,
      "grad_norm": 0.06720244884490967,
      "learning_rate": 3.6444983296323878e-06,
      "loss": 0.0147,
      "step": 1949060
    },
    {
      "epoch": 3.1897121685224827,
      "grad_norm": 0.7649067640304565,
      "learning_rate": 3.6444324374188705e-06,
      "loss": 0.011,
      "step": 1949080
    },
    {
      "epoch": 3.189744898961136,
      "grad_norm": 0.5223735570907593,
      "learning_rate": 3.6443665452053533e-06,
      "loss": 0.014,
      "step": 1949100
    },
    {
      "epoch": 3.189777629399789,
      "grad_norm": 0.8453068137168884,
      "learning_rate": 3.644300652991836e-06,
      "loss": 0.0103,
      "step": 1949120
    },
    {
      "epoch": 3.1898103598384426,
      "grad_norm": 0.3932366371154785,
      "learning_rate": 3.644234760778319e-06,
      "loss": 0.01,
      "step": 1949140
    },
    {
      "epoch": 3.189843090277096,
      "grad_norm": 0.15790890157222748,
      "learning_rate": 3.644168868564802e-06,
      "loss": 0.0068,
      "step": 1949160
    },
    {
      "epoch": 3.1898758207157494,
      "grad_norm": 0.1776271015405655,
      "learning_rate": 3.6441029763512846e-06,
      "loss": 0.0085,
      "step": 1949180
    },
    {
      "epoch": 3.1899085511544025,
      "grad_norm": 0.2156112939119339,
      "learning_rate": 3.6440370841377674e-06,
      "loss": 0.011,
      "step": 1949200
    },
    {
      "epoch": 3.1899412815930557,
      "grad_norm": 0.5799176096916199,
      "learning_rate": 3.6439711919242506e-06,
      "loss": 0.0098,
      "step": 1949220
    },
    {
      "epoch": 3.1899740120317093,
      "grad_norm": 0.1769510954618454,
      "learning_rate": 3.6439052997107333e-06,
      "loss": 0.0094,
      "step": 1949240
    },
    {
      "epoch": 3.1900067424703624,
      "grad_norm": 0.0885421559214592,
      "learning_rate": 3.643839407497216e-06,
      "loss": 0.0094,
      "step": 1949260
    },
    {
      "epoch": 3.190039472909016,
      "grad_norm": 0.2903900146484375,
      "learning_rate": 3.6437735152836996e-06,
      "loss": 0.0079,
      "step": 1949280
    },
    {
      "epoch": 3.190072203347669,
      "grad_norm": 0.15572191774845123,
      "learning_rate": 3.6437076230701824e-06,
      "loss": 0.0112,
      "step": 1949300
    },
    {
      "epoch": 3.1901049337863228,
      "grad_norm": 0.28873300552368164,
      "learning_rate": 3.643641730856665e-06,
      "loss": 0.0101,
      "step": 1949320
    },
    {
      "epoch": 3.190137664224976,
      "grad_norm": 0.6638376712799072,
      "learning_rate": 3.643575838643148e-06,
      "loss": 0.0112,
      "step": 1949340
    },
    {
      "epoch": 3.190170394663629,
      "grad_norm": 0.3534335196018219,
      "learning_rate": 3.643509946429631e-06,
      "loss": 0.0122,
      "step": 1949360
    },
    {
      "epoch": 3.1902031251022827,
      "grad_norm": 0.14673589169979095,
      "learning_rate": 3.6434440542161137e-06,
      "loss": 0.0135,
      "step": 1949380
    },
    {
      "epoch": 3.190235855540936,
      "grad_norm": 1.161916732788086,
      "learning_rate": 3.6433781620025965e-06,
      "loss": 0.0179,
      "step": 1949400
    },
    {
      "epoch": 3.1902685859795894,
      "grad_norm": 0.16095136106014252,
      "learning_rate": 3.6433122697890792e-06,
      "loss": 0.0125,
      "step": 1949420
    },
    {
      "epoch": 3.1903013164182425,
      "grad_norm": 0.11778510361909866,
      "learning_rate": 3.6432463775755624e-06,
      "loss": 0.0111,
      "step": 1949440
    },
    {
      "epoch": 3.190334046856896,
      "grad_norm": 1.2874456644058228,
      "learning_rate": 3.643180485362045e-06,
      "loss": 0.0095,
      "step": 1949460
    },
    {
      "epoch": 3.1903667772955493,
      "grad_norm": 0.3205847144126892,
      "learning_rate": 3.643114593148528e-06,
      "loss": 0.009,
      "step": 1949480
    },
    {
      "epoch": 3.1903995077342024,
      "grad_norm": 0.4419575035572052,
      "learning_rate": 3.6430487009350106e-06,
      "loss": 0.009,
      "step": 1949500
    },
    {
      "epoch": 3.190432238172856,
      "grad_norm": 0.2695525288581848,
      "learning_rate": 3.6429828087214938e-06,
      "loss": 0.0102,
      "step": 1949520
    },
    {
      "epoch": 3.190464968611509,
      "grad_norm": 0.4958886504173279,
      "learning_rate": 3.6429169165079765e-06,
      "loss": 0.0182,
      "step": 1949540
    },
    {
      "epoch": 3.190497699050163,
      "grad_norm": 0.5950426459312439,
      "learning_rate": 3.6428510242944592e-06,
      "loss": 0.011,
      "step": 1949560
    },
    {
      "epoch": 3.190530429488816,
      "grad_norm": 0.2513783574104309,
      "learning_rate": 3.642785132080942e-06,
      "loss": 0.011,
      "step": 1949580
    },
    {
      "epoch": 3.1905631599274695,
      "grad_norm": 0.20498807728290558,
      "learning_rate": 3.6427192398674247e-06,
      "loss": 0.0071,
      "step": 1949600
    },
    {
      "epoch": 3.1905958903661227,
      "grad_norm": 0.1142946109175682,
      "learning_rate": 3.642653347653908e-06,
      "loss": 0.0129,
      "step": 1949620
    },
    {
      "epoch": 3.190628620804776,
      "grad_norm": 0.056664034724235535,
      "learning_rate": 3.642587455440391e-06,
      "loss": 0.0121,
      "step": 1949640
    },
    {
      "epoch": 3.1906613512434294,
      "grad_norm": 0.3242846131324768,
      "learning_rate": 3.642521563226874e-06,
      "loss": 0.0111,
      "step": 1949660
    },
    {
      "epoch": 3.1906940816820826,
      "grad_norm": 0.15683075785636902,
      "learning_rate": 3.642455671013357e-06,
      "loss": 0.0113,
      "step": 1949680
    },
    {
      "epoch": 3.190726812120736,
      "grad_norm": 0.6218741536140442,
      "learning_rate": 3.6423897787998397e-06,
      "loss": 0.0092,
      "step": 1949700
    },
    {
      "epoch": 3.1907595425593893,
      "grad_norm": 0.20822031795978546,
      "learning_rate": 3.6423238865863224e-06,
      "loss": 0.0107,
      "step": 1949720
    },
    {
      "epoch": 3.190792272998043,
      "grad_norm": 0.4941890835762024,
      "learning_rate": 3.642257994372805e-06,
      "loss": 0.0125,
      "step": 1949740
    },
    {
      "epoch": 3.190825003436696,
      "grad_norm": 0.5038274526596069,
      "learning_rate": 3.6421921021592883e-06,
      "loss": 0.0102,
      "step": 1949760
    },
    {
      "epoch": 3.190857733875349,
      "grad_norm": 0.7446179986000061,
      "learning_rate": 3.642126209945771e-06,
      "loss": 0.0116,
      "step": 1949780
    },
    {
      "epoch": 3.190890464314003,
      "grad_norm": 0.16887541115283966,
      "learning_rate": 3.642060317732254e-06,
      "loss": 0.0102,
      "step": 1949800
    },
    {
      "epoch": 3.190923194752656,
      "grad_norm": 0.5692664384841919,
      "learning_rate": 3.6419944255187366e-06,
      "loss": 0.0144,
      "step": 1949820
    },
    {
      "epoch": 3.1909559251913096,
      "grad_norm": 0.2784486711025238,
      "learning_rate": 3.6419285333052197e-06,
      "loss": 0.015,
      "step": 1949840
    },
    {
      "epoch": 3.1909886556299627,
      "grad_norm": 0.04613224416971207,
      "learning_rate": 3.6418626410917025e-06,
      "loss": 0.0143,
      "step": 1949860
    },
    {
      "epoch": 3.1910213860686163,
      "grad_norm": 0.27186569571495056,
      "learning_rate": 3.641796748878185e-06,
      "loss": 0.0098,
      "step": 1949880
    },
    {
      "epoch": 3.1910541165072694,
      "grad_norm": 0.309508353471756,
      "learning_rate": 3.641730856664668e-06,
      "loss": 0.0123,
      "step": 1949900
    },
    {
      "epoch": 3.1910868469459226,
      "grad_norm": 0.8203215003013611,
      "learning_rate": 3.641664964451151e-06,
      "loss": 0.0113,
      "step": 1949920
    },
    {
      "epoch": 3.191119577384576,
      "grad_norm": 0.08432231098413467,
      "learning_rate": 3.641599072237634e-06,
      "loss": 0.0088,
      "step": 1949940
    },
    {
      "epoch": 3.1911523078232293,
      "grad_norm": 0.07215183228254318,
      "learning_rate": 3.6415331800241166e-06,
      "loss": 0.0077,
      "step": 1949960
    },
    {
      "epoch": 3.191185038261883,
      "grad_norm": 0.18199463188648224,
      "learning_rate": 3.6414672878106e-06,
      "loss": 0.0078,
      "step": 1949980
    },
    {
      "epoch": 3.191217768700536,
      "grad_norm": 0.7072787880897522,
      "learning_rate": 3.641401395597083e-06,
      "loss": 0.0108,
      "step": 1950000
    },
    {
      "epoch": 3.191217768700536,
      "eval_loss": 0.007086367812007666,
      "eval_runtime": 6521.1643,
      "eval_samples_per_second": 157.619,
      "eval_steps_per_second": 15.762,
      "eval_sts-dev_pearson_cosine": 0.983780319032031,
      "eval_sts-dev_spearman_cosine": 0.8947373418807876,
      "step": 1950000
    },
    {
      "epoch": 3.1912504991391897,
      "grad_norm": 0.17445795238018036,
      "learning_rate": 3.6413355033835656e-06,
      "loss": 0.0142,
      "step": 1950020
    },
    {
      "epoch": 3.191283229577843,
      "grad_norm": 0.2569613456726074,
      "learning_rate": 3.6412696111700484e-06,
      "loss": 0.0117,
      "step": 1950040
    },
    {
      "epoch": 3.191315960016496,
      "grad_norm": 0.1629112809896469,
      "learning_rate": 3.6412037189565316e-06,
      "loss": 0.0127,
      "step": 1950060
    },
    {
      "epoch": 3.1913486904551496,
      "grad_norm": 0.3304556906223297,
      "learning_rate": 3.6411378267430143e-06,
      "loss": 0.0115,
      "step": 1950080
    },
    {
      "epoch": 3.1913814208938027,
      "grad_norm": 0.20484696328639984,
      "learning_rate": 3.641071934529497e-06,
      "loss": 0.0166,
      "step": 1950100
    },
    {
      "epoch": 3.1914141513324563,
      "grad_norm": 0.38126444816589355,
      "learning_rate": 3.6410060423159798e-06,
      "loss": 0.0089,
      "step": 1950120
    },
    {
      "epoch": 3.1914468817711095,
      "grad_norm": 0.17441028356552124,
      "learning_rate": 3.6409401501024625e-06,
      "loss": 0.0161,
      "step": 1950140
    },
    {
      "epoch": 3.191479612209763,
      "grad_norm": 0.06633615493774414,
      "learning_rate": 3.6408742578889457e-06,
      "loss": 0.0133,
      "step": 1950160
    },
    {
      "epoch": 3.191512342648416,
      "grad_norm": 0.2232396900653839,
      "learning_rate": 3.6408083656754284e-06,
      "loss": 0.0087,
      "step": 1950180
    },
    {
      "epoch": 3.1915450730870694,
      "grad_norm": 0.23518218100070953,
      "learning_rate": 3.640742473461911e-06,
      "loss": 0.0136,
      "step": 1950200
    },
    {
      "epoch": 3.191577803525723,
      "grad_norm": 0.7710690498352051,
      "learning_rate": 3.640676581248394e-06,
      "loss": 0.01,
      "step": 1950220
    },
    {
      "epoch": 3.191610533964376,
      "grad_norm": 0.27763816714286804,
      "learning_rate": 3.640610689034877e-06,
      "loss": 0.0106,
      "step": 1950240
    },
    {
      "epoch": 3.1916432644030297,
      "grad_norm": 0.5809419751167297,
      "learning_rate": 3.64054479682136e-06,
      "loss": 0.0101,
      "step": 1950260
    },
    {
      "epoch": 3.191675994841683,
      "grad_norm": 0.13882265985012054,
      "learning_rate": 3.6404789046078425e-06,
      "loss": 0.0125,
      "step": 1950280
    },
    {
      "epoch": 3.191708725280336,
      "grad_norm": 0.23305009305477142,
      "learning_rate": 3.6404130123943253e-06,
      "loss": 0.0124,
      "step": 1950300
    },
    {
      "epoch": 3.1917414557189896,
      "grad_norm": 0.4805256426334381,
      "learning_rate": 3.6403471201808084e-06,
      "loss": 0.0156,
      "step": 1950320
    },
    {
      "epoch": 3.1917741861576427,
      "grad_norm": 0.2718113958835602,
      "learning_rate": 3.6402812279672916e-06,
      "loss": 0.0181,
      "step": 1950340
    },
    {
      "epoch": 3.1918069165962963,
      "grad_norm": 0.25552991032600403,
      "learning_rate": 3.6402153357537743e-06,
      "loss": 0.0102,
      "step": 1950360
    },
    {
      "epoch": 3.1918396470349495,
      "grad_norm": 0.1876053661108017,
      "learning_rate": 3.6401494435402575e-06,
      "loss": 0.0126,
      "step": 1950380
    },
    {
      "epoch": 3.191872377473603,
      "grad_norm": 0.33290866017341614,
      "learning_rate": 3.6400835513267402e-06,
      "loss": 0.0145,
      "step": 1950400
    },
    {
      "epoch": 3.1919051079122562,
      "grad_norm": 0.2485416978597641,
      "learning_rate": 3.640017659113223e-06,
      "loss": 0.013,
      "step": 1950420
    },
    {
      "epoch": 3.1919378383509094,
      "grad_norm": 0.2001236081123352,
      "learning_rate": 3.6399517668997057e-06,
      "loss": 0.0121,
      "step": 1950440
    },
    {
      "epoch": 3.191970568789563,
      "grad_norm": 0.2146974802017212,
      "learning_rate": 3.639885874686189e-06,
      "loss": 0.0104,
      "step": 1950460
    },
    {
      "epoch": 3.192003299228216,
      "grad_norm": 0.2973759174346924,
      "learning_rate": 3.6398199824726716e-06,
      "loss": 0.0118,
      "step": 1950480
    },
    {
      "epoch": 3.1920360296668697,
      "grad_norm": 0.14350442588329315,
      "learning_rate": 3.6397540902591544e-06,
      "loss": 0.0072,
      "step": 1950500
    },
    {
      "epoch": 3.192068760105523,
      "grad_norm": 0.544057309627533,
      "learning_rate": 3.639688198045637e-06,
      "loss": 0.011,
      "step": 1950520
    },
    {
      "epoch": 3.1921014905441765,
      "grad_norm": 0.3808051347732544,
      "learning_rate": 3.6396223058321203e-06,
      "loss": 0.017,
      "step": 1950540
    },
    {
      "epoch": 3.1921342209828296,
      "grad_norm": 0.13669832050800323,
      "learning_rate": 3.639556413618603e-06,
      "loss": 0.0108,
      "step": 1950560
    },
    {
      "epoch": 3.1921669514214828,
      "grad_norm": 0.6829037666320801,
      "learning_rate": 3.6394905214050857e-06,
      "loss": 0.0151,
      "step": 1950580
    },
    {
      "epoch": 3.1921996818601364,
      "grad_norm": 0.2523590624332428,
      "learning_rate": 3.6394246291915685e-06,
      "loss": 0.0126,
      "step": 1950600
    },
    {
      "epoch": 3.1922324122987895,
      "grad_norm": 0.11496488004922867,
      "learning_rate": 3.6393587369780512e-06,
      "loss": 0.0109,
      "step": 1950620
    },
    {
      "epoch": 3.192265142737443,
      "grad_norm": 0.17839378118515015,
      "learning_rate": 3.6392928447645344e-06,
      "loss": 0.0115,
      "step": 1950640
    },
    {
      "epoch": 3.1922978731760963,
      "grad_norm": 0.18873922526836395,
      "learning_rate": 3.639226952551017e-06,
      "loss": 0.0084,
      "step": 1950660
    },
    {
      "epoch": 3.19233060361475,
      "grad_norm": 0.10429123044013977,
      "learning_rate": 3.6391610603375e-06,
      "loss": 0.0102,
      "step": 1950680
    },
    {
      "epoch": 3.192363334053403,
      "grad_norm": 2.130545139312744,
      "learning_rate": 3.6390951681239835e-06,
      "loss": 0.01,
      "step": 1950700
    },
    {
      "epoch": 3.192396064492056,
      "grad_norm": 0.2548060417175293,
      "learning_rate": 3.639029275910466e-06,
      "loss": 0.0193,
      "step": 1950720
    },
    {
      "epoch": 3.1924287949307097,
      "grad_norm": 0.5150635242462158,
      "learning_rate": 3.638963383696949e-06,
      "loss": 0.0175,
      "step": 1950740
    },
    {
      "epoch": 3.192461525369363,
      "grad_norm": 0.25309041142463684,
      "learning_rate": 3.6388974914834317e-06,
      "loss": 0.0105,
      "step": 1950760
    },
    {
      "epoch": 3.1924942558080165,
      "grad_norm": 0.3393893837928772,
      "learning_rate": 3.638831599269915e-06,
      "loss": 0.0098,
      "step": 1950780
    },
    {
      "epoch": 3.1925269862466696,
      "grad_norm": 0.21024951338768005,
      "learning_rate": 3.6387657070563976e-06,
      "loss": 0.0068,
      "step": 1950800
    },
    {
      "epoch": 3.192559716685323,
      "grad_norm": 0.37120914459228516,
      "learning_rate": 3.6386998148428803e-06,
      "loss": 0.0132,
      "step": 1950820
    },
    {
      "epoch": 3.1925924471239764,
      "grad_norm": 0.16589023172855377,
      "learning_rate": 3.638633922629363e-06,
      "loss": 0.0078,
      "step": 1950840
    },
    {
      "epoch": 3.1926251775626295,
      "grad_norm": 0.17981144785881042,
      "learning_rate": 3.6385680304158462e-06,
      "loss": 0.011,
      "step": 1950860
    },
    {
      "epoch": 3.192657908001283,
      "grad_norm": 0.395324170589447,
      "learning_rate": 3.638502138202329e-06,
      "loss": 0.0102,
      "step": 1950880
    },
    {
      "epoch": 3.1926906384399363,
      "grad_norm": 0.06095260754227638,
      "learning_rate": 3.6384362459888117e-06,
      "loss": 0.0074,
      "step": 1950900
    },
    {
      "epoch": 3.19272336887859,
      "grad_norm": 0.24783308804035187,
      "learning_rate": 3.6383703537752944e-06,
      "loss": 0.0128,
      "step": 1950920
    },
    {
      "epoch": 3.192756099317243,
      "grad_norm": 0.13993939757347107,
      "learning_rate": 3.6383044615617776e-06,
      "loss": 0.0137,
      "step": 1950940
    },
    {
      "epoch": 3.192788829755896,
      "grad_norm": 0.3486059904098511,
      "learning_rate": 3.6382385693482603e-06,
      "loss": 0.0113,
      "step": 1950960
    },
    {
      "epoch": 3.1928215601945498,
      "grad_norm": 0.35045644640922546,
      "learning_rate": 3.638172677134743e-06,
      "loss": 0.01,
      "step": 1950980
    },
    {
      "epoch": 3.192854290633203,
      "grad_norm": 0.2259567677974701,
      "learning_rate": 3.638106784921226e-06,
      "loss": 0.0103,
      "step": 1951000
    },
    {
      "epoch": 3.1928870210718565,
      "grad_norm": 0.13467761874198914,
      "learning_rate": 3.6380408927077086e-06,
      "loss": 0.0088,
      "step": 1951020
    },
    {
      "epoch": 3.1929197515105097,
      "grad_norm": 1.1147040128707886,
      "learning_rate": 3.637975000494192e-06,
      "loss": 0.014,
      "step": 1951040
    },
    {
      "epoch": 3.1929524819491633,
      "grad_norm": 0.5248647332191467,
      "learning_rate": 3.637909108280675e-06,
      "loss": 0.0135,
      "step": 1951060
    },
    {
      "epoch": 3.1929852123878164,
      "grad_norm": 0.5834996700286865,
      "learning_rate": 3.637843216067158e-06,
      "loss": 0.0143,
      "step": 1951080
    },
    {
      "epoch": 3.1930179428264696,
      "grad_norm": 0.3305821716785431,
      "learning_rate": 3.637777323853641e-06,
      "loss": 0.0117,
      "step": 1951100
    },
    {
      "epoch": 3.193050673265123,
      "grad_norm": 0.13647742569446564,
      "learning_rate": 3.6377114316401235e-06,
      "loss": 0.0074,
      "step": 1951120
    },
    {
      "epoch": 3.1930834037037763,
      "grad_norm": 0.5304767489433289,
      "learning_rate": 3.6376455394266063e-06,
      "loss": 0.0101,
      "step": 1951140
    },
    {
      "epoch": 3.19311613414243,
      "grad_norm": 0.09913600981235504,
      "learning_rate": 3.637579647213089e-06,
      "loss": 0.0104,
      "step": 1951160
    },
    {
      "epoch": 3.193148864581083,
      "grad_norm": 0.16808252036571503,
      "learning_rate": 3.637513754999572e-06,
      "loss": 0.0107,
      "step": 1951180
    },
    {
      "epoch": 3.1931815950197366,
      "grad_norm": 0.19961945712566376,
      "learning_rate": 3.637447862786055e-06,
      "loss": 0.0096,
      "step": 1951200
    },
    {
      "epoch": 3.19321432545839,
      "grad_norm": 0.1588136851787567,
      "learning_rate": 3.6373819705725377e-06,
      "loss": 0.012,
      "step": 1951220
    },
    {
      "epoch": 3.193247055897043,
      "grad_norm": 0.33531010150909424,
      "learning_rate": 3.6373160783590204e-06,
      "loss": 0.0106,
      "step": 1951240
    },
    {
      "epoch": 3.1932797863356965,
      "grad_norm": 0.14656083285808563,
      "learning_rate": 3.6372501861455036e-06,
      "loss": 0.0114,
      "step": 1951260
    },
    {
      "epoch": 3.1933125167743497,
      "grad_norm": 0.11802740395069122,
      "learning_rate": 3.6371842939319863e-06,
      "loss": 0.0092,
      "step": 1951280
    },
    {
      "epoch": 3.1933452472130033,
      "grad_norm": 0.2518709599971771,
      "learning_rate": 3.637118401718469e-06,
      "loss": 0.0125,
      "step": 1951300
    },
    {
      "epoch": 3.1933779776516564,
      "grad_norm": 0.15565164387226105,
      "learning_rate": 3.6370525095049518e-06,
      "loss": 0.0114,
      "step": 1951320
    },
    {
      "epoch": 3.19341070809031,
      "grad_norm": 0.5132642388343811,
      "learning_rate": 3.636986617291435e-06,
      "loss": 0.01,
      "step": 1951340
    },
    {
      "epoch": 3.193443438528963,
      "grad_norm": 0.2526840567588806,
      "learning_rate": 3.6369207250779177e-06,
      "loss": 0.009,
      "step": 1951360
    },
    {
      "epoch": 3.1934761689676163,
      "grad_norm": 0.11683768779039383,
      "learning_rate": 3.6368548328644004e-06,
      "loss": 0.0157,
      "step": 1951380
    },
    {
      "epoch": 3.19350889940627,
      "grad_norm": 0.0575680173933506,
      "learning_rate": 3.636788940650884e-06,
      "loss": 0.0135,
      "step": 1951400
    },
    {
      "epoch": 3.193541629844923,
      "grad_norm": 0.28354236483573914,
      "learning_rate": 3.6367230484373667e-06,
      "loss": 0.0102,
      "step": 1951420
    },
    {
      "epoch": 3.1935743602835767,
      "grad_norm": 0.04159462824463844,
      "learning_rate": 3.6366571562238495e-06,
      "loss": 0.0103,
      "step": 1951440
    },
    {
      "epoch": 3.19360709072223,
      "grad_norm": 0.22851718962192535,
      "learning_rate": 3.6365912640103322e-06,
      "loss": 0.0104,
      "step": 1951460
    },
    {
      "epoch": 3.1936398211608834,
      "grad_norm": 1.1135441064834595,
      "learning_rate": 3.6365253717968154e-06,
      "loss": 0.0096,
      "step": 1951480
    },
    {
      "epoch": 3.1936725515995366,
      "grad_norm": 0.17920374870300293,
      "learning_rate": 3.636459479583298e-06,
      "loss": 0.0077,
      "step": 1951500
    },
    {
      "epoch": 3.1937052820381897,
      "grad_norm": 0.3264829218387604,
      "learning_rate": 3.636393587369781e-06,
      "loss": 0.0123,
      "step": 1951520
    },
    {
      "epoch": 3.1937380124768433,
      "grad_norm": 0.3166673183441162,
      "learning_rate": 3.6363276951562636e-06,
      "loss": 0.01,
      "step": 1951540
    },
    {
      "epoch": 3.1937707429154965,
      "grad_norm": 0.24262170493602753,
      "learning_rate": 3.6362618029427463e-06,
      "loss": 0.0114,
      "step": 1951560
    },
    {
      "epoch": 3.19380347335415,
      "grad_norm": 0.3228082060813904,
      "learning_rate": 3.6361959107292295e-06,
      "loss": 0.0156,
      "step": 1951580
    },
    {
      "epoch": 3.193836203792803,
      "grad_norm": 0.20662760734558105,
      "learning_rate": 3.6361300185157123e-06,
      "loss": 0.0136,
      "step": 1951600
    },
    {
      "epoch": 3.193868934231457,
      "grad_norm": 0.14420095086097717,
      "learning_rate": 3.636064126302195e-06,
      "loss": 0.0093,
      "step": 1951620
    },
    {
      "epoch": 3.19390166467011,
      "grad_norm": 0.27333956956863403,
      "learning_rate": 3.6359982340886777e-06,
      "loss": 0.0181,
      "step": 1951640
    },
    {
      "epoch": 3.193934395108763,
      "grad_norm": 0.44021156430244446,
      "learning_rate": 3.635932341875161e-06,
      "loss": 0.0096,
      "step": 1951660
    },
    {
      "epoch": 3.1939671255474167,
      "grad_norm": 0.2317492812871933,
      "learning_rate": 3.6358664496616436e-06,
      "loss": 0.0142,
      "step": 1951680
    },
    {
      "epoch": 3.19399985598607,
      "grad_norm": 0.3052377998828888,
      "learning_rate": 3.6358005574481264e-06,
      "loss": 0.012,
      "step": 1951700
    },
    {
      "epoch": 3.1940325864247234,
      "grad_norm": 0.2929454743862152,
      "learning_rate": 3.635734665234609e-06,
      "loss": 0.0103,
      "step": 1951720
    },
    {
      "epoch": 3.1940653168633766,
      "grad_norm": 0.29062986373901367,
      "learning_rate": 3.6356687730210927e-06,
      "loss": 0.0083,
      "step": 1951740
    },
    {
      "epoch": 3.19409804730203,
      "grad_norm": 0.15986090898513794,
      "learning_rate": 3.6356028808075754e-06,
      "loss": 0.0146,
      "step": 1951760
    },
    {
      "epoch": 3.1941307777406833,
      "grad_norm": 0.11042828112840652,
      "learning_rate": 3.635536988594058e-06,
      "loss": 0.0122,
      "step": 1951780
    },
    {
      "epoch": 3.1941635081793365,
      "grad_norm": 0.6232302784919739,
      "learning_rate": 3.6354710963805413e-06,
      "loss": 0.0102,
      "step": 1951800
    },
    {
      "epoch": 3.19419623861799,
      "grad_norm": 0.3281078636646271,
      "learning_rate": 3.635405204167024e-06,
      "loss": 0.0126,
      "step": 1951820
    },
    {
      "epoch": 3.194228969056643,
      "grad_norm": 0.2533453702926636,
      "learning_rate": 3.635339311953507e-06,
      "loss": 0.0135,
      "step": 1951840
    },
    {
      "epoch": 3.194261699495297,
      "grad_norm": 0.16556251049041748,
      "learning_rate": 3.6352734197399896e-06,
      "loss": 0.0139,
      "step": 1951860
    },
    {
      "epoch": 3.19429442993395,
      "grad_norm": 0.3489242494106293,
      "learning_rate": 3.6352075275264727e-06,
      "loss": 0.0091,
      "step": 1951880
    },
    {
      "epoch": 3.194327160372603,
      "grad_norm": 0.39076370000839233,
      "learning_rate": 3.6351416353129555e-06,
      "loss": 0.012,
      "step": 1951900
    },
    {
      "epoch": 3.1943598908112567,
      "grad_norm": 0.22849543392658234,
      "learning_rate": 3.635075743099438e-06,
      "loss": 0.0166,
      "step": 1951920
    },
    {
      "epoch": 3.19439262124991,
      "grad_norm": 0.9989933967590332,
      "learning_rate": 3.635009850885921e-06,
      "loss": 0.0099,
      "step": 1951940
    },
    {
      "epoch": 3.1944253516885635,
      "grad_norm": 0.13688595592975616,
      "learning_rate": 3.634943958672404e-06,
      "loss": 0.0151,
      "step": 1951960
    },
    {
      "epoch": 3.1944580821272166,
      "grad_norm": 0.2225610613822937,
      "learning_rate": 3.634878066458887e-06,
      "loss": 0.0139,
      "step": 1951980
    },
    {
      "epoch": 3.19449081256587,
      "grad_norm": 0.0617632232606411,
      "learning_rate": 3.6348121742453696e-06,
      "loss": 0.0095,
      "step": 1952000
    },
    {
      "epoch": 3.1945235430045233,
      "grad_norm": 0.05625353753566742,
      "learning_rate": 3.6347462820318523e-06,
      "loss": 0.0114,
      "step": 1952020
    },
    {
      "epoch": 3.1945562734431765,
      "grad_norm": 0.3576976954936981,
      "learning_rate": 3.634680389818335e-06,
      "loss": 0.0097,
      "step": 1952040
    },
    {
      "epoch": 3.19458900388183,
      "grad_norm": 0.5254210829734802,
      "learning_rate": 3.6346144976048182e-06,
      "loss": 0.0143,
      "step": 1952060
    },
    {
      "epoch": 3.1946217343204832,
      "grad_norm": 0.25331956148147583,
      "learning_rate": 3.634548605391301e-06,
      "loss": 0.0093,
      "step": 1952080
    },
    {
      "epoch": 3.194654464759137,
      "grad_norm": 0.2166455090045929,
      "learning_rate": 3.634482713177784e-06,
      "loss": 0.0114,
      "step": 1952100
    },
    {
      "epoch": 3.19468719519779,
      "grad_norm": 0.1883159875869751,
      "learning_rate": 3.6344168209642673e-06,
      "loss": 0.011,
      "step": 1952120
    },
    {
      "epoch": 3.1947199256364436,
      "grad_norm": 0.35197314620018005,
      "learning_rate": 3.63435092875075e-06,
      "loss": 0.0097,
      "step": 1952140
    },
    {
      "epoch": 3.1947526560750967,
      "grad_norm": 0.24903073906898499,
      "learning_rate": 3.6342850365372328e-06,
      "loss": 0.0161,
      "step": 1952160
    },
    {
      "epoch": 3.19478538651375,
      "grad_norm": 0.37908923625946045,
      "learning_rate": 3.6342191443237155e-06,
      "loss": 0.0123,
      "step": 1952180
    },
    {
      "epoch": 3.1948181169524035,
      "grad_norm": 0.5682070255279541,
      "learning_rate": 3.6341532521101987e-06,
      "loss": 0.0113,
      "step": 1952200
    },
    {
      "epoch": 3.1948508473910566,
      "grad_norm": 0.3435308337211609,
      "learning_rate": 3.6340873598966814e-06,
      "loss": 0.009,
      "step": 1952220
    },
    {
      "epoch": 3.19488357782971,
      "grad_norm": 0.12215752899646759,
      "learning_rate": 3.634021467683164e-06,
      "loss": 0.0116,
      "step": 1952240
    },
    {
      "epoch": 3.1949163082683634,
      "grad_norm": 0.3054560422897339,
      "learning_rate": 3.633955575469647e-06,
      "loss": 0.0097,
      "step": 1952260
    },
    {
      "epoch": 3.1949490387070165,
      "grad_norm": 0.18406561017036438,
      "learning_rate": 3.63388968325613e-06,
      "loss": 0.0096,
      "step": 1952280
    },
    {
      "epoch": 3.19498176914567,
      "grad_norm": 1.0894219875335693,
      "learning_rate": 3.633823791042613e-06,
      "loss": 0.014,
      "step": 1952300
    },
    {
      "epoch": 3.1950144995843233,
      "grad_norm": 0.43864575028419495,
      "learning_rate": 3.6337578988290955e-06,
      "loss": 0.0125,
      "step": 1952320
    },
    {
      "epoch": 3.195047230022977,
      "grad_norm": 0.2284170240163803,
      "learning_rate": 3.6336920066155783e-06,
      "loss": 0.0187,
      "step": 1952340
    },
    {
      "epoch": 3.19507996046163,
      "grad_norm": 0.1706015169620514,
      "learning_rate": 3.6336261144020614e-06,
      "loss": 0.0161,
      "step": 1952360
    },
    {
      "epoch": 3.1951126909002836,
      "grad_norm": 0.19517101347446442,
      "learning_rate": 3.633560222188544e-06,
      "loss": 0.0153,
      "step": 1952380
    },
    {
      "epoch": 3.1951454213389368,
      "grad_norm": 0.5562692284584045,
      "learning_rate": 3.633494329975027e-06,
      "loss": 0.0123,
      "step": 1952400
    },
    {
      "epoch": 3.19517815177759,
      "grad_norm": 0.06937777251005173,
      "learning_rate": 3.6334284377615097e-06,
      "loss": 0.0129,
      "step": 1952420
    },
    {
      "epoch": 3.1952108822162435,
      "grad_norm": 0.23514334857463837,
      "learning_rate": 3.6333625455479924e-06,
      "loss": 0.014,
      "step": 1952440
    },
    {
      "epoch": 3.1952436126548966,
      "grad_norm": 0.16936825215816498,
      "learning_rate": 3.633296653334476e-06,
      "loss": 0.0133,
      "step": 1952460
    },
    {
      "epoch": 3.1952763430935502,
      "grad_norm": 0.4392711818218231,
      "learning_rate": 3.6332307611209587e-06,
      "loss": 0.0116,
      "step": 1952480
    },
    {
      "epoch": 3.1953090735322034,
      "grad_norm": 0.23611798882484436,
      "learning_rate": 3.633164868907442e-06,
      "loss": 0.009,
      "step": 1952500
    },
    {
      "epoch": 3.195341803970857,
      "grad_norm": 1.229293942451477,
      "learning_rate": 3.6330989766939246e-06,
      "loss": 0.0173,
      "step": 1952520
    },
    {
      "epoch": 3.19537453440951,
      "grad_norm": 0.25902852416038513,
      "learning_rate": 3.6330330844804074e-06,
      "loss": 0.0085,
      "step": 1952540
    },
    {
      "epoch": 3.1954072648481633,
      "grad_norm": 0.07515028864145279,
      "learning_rate": 3.63296719226689e-06,
      "loss": 0.0095,
      "step": 1952560
    },
    {
      "epoch": 3.195439995286817,
      "grad_norm": 0.5250238180160522,
      "learning_rate": 3.632901300053373e-06,
      "loss": 0.0134,
      "step": 1952580
    },
    {
      "epoch": 3.19547272572547,
      "grad_norm": 0.19270552694797516,
      "learning_rate": 3.632835407839856e-06,
      "loss": 0.012,
      "step": 1952600
    },
    {
      "epoch": 3.1955054561641236,
      "grad_norm": 0.29046133160591125,
      "learning_rate": 3.6327695156263388e-06,
      "loss": 0.0122,
      "step": 1952620
    },
    {
      "epoch": 3.1955381866027768,
      "grad_norm": 0.2475002259016037,
      "learning_rate": 3.6327036234128215e-06,
      "loss": 0.0153,
      "step": 1952640
    },
    {
      "epoch": 3.1955709170414304,
      "grad_norm": 0.11529459804296494,
      "learning_rate": 3.6326377311993042e-06,
      "loss": 0.0108,
      "step": 1952660
    },
    {
      "epoch": 3.1956036474800835,
      "grad_norm": 0.6310688853263855,
      "learning_rate": 3.6325718389857874e-06,
      "loss": 0.0136,
      "step": 1952680
    },
    {
      "epoch": 3.1956363779187367,
      "grad_norm": 0.07333222776651382,
      "learning_rate": 3.63250594677227e-06,
      "loss": 0.014,
      "step": 1952700
    },
    {
      "epoch": 3.1956691083573903,
      "grad_norm": 0.25123894214630127,
      "learning_rate": 3.632440054558753e-06,
      "loss": 0.0122,
      "step": 1952720
    },
    {
      "epoch": 3.1957018387960434,
      "grad_norm": 0.09779234230518341,
      "learning_rate": 3.6323741623452356e-06,
      "loss": 0.0142,
      "step": 1952740
    },
    {
      "epoch": 3.195734569234697,
      "grad_norm": 0.3652169704437256,
      "learning_rate": 3.6323082701317188e-06,
      "loss": 0.01,
      "step": 1952760
    },
    {
      "epoch": 3.19576729967335,
      "grad_norm": 0.1946963667869568,
      "learning_rate": 3.6322423779182015e-06,
      "loss": 0.0114,
      "step": 1952780
    },
    {
      "epoch": 3.1958000301120038,
      "grad_norm": 0.12935908138751984,
      "learning_rate": 3.6321764857046847e-06,
      "loss": 0.0191,
      "step": 1952800
    },
    {
      "epoch": 3.195832760550657,
      "grad_norm": 0.2807510495185852,
      "learning_rate": 3.632110593491168e-06,
      "loss": 0.0095,
      "step": 1952820
    },
    {
      "epoch": 3.19586549098931,
      "grad_norm": 0.2521554231643677,
      "learning_rate": 3.6320447012776506e-06,
      "loss": 0.0128,
      "step": 1952840
    },
    {
      "epoch": 3.1958982214279636,
      "grad_norm": 0.20206846296787262,
      "learning_rate": 3.6319788090641333e-06,
      "loss": 0.013,
      "step": 1952860
    },
    {
      "epoch": 3.195930951866617,
      "grad_norm": 1.0420914888381958,
      "learning_rate": 3.631912916850616e-06,
      "loss": 0.0169,
      "step": 1952880
    },
    {
      "epoch": 3.1959636823052704,
      "grad_norm": 0.572735071182251,
      "learning_rate": 3.6318470246370992e-06,
      "loss": 0.0104,
      "step": 1952900
    },
    {
      "epoch": 3.1959964127439235,
      "grad_norm": 0.32467859983444214,
      "learning_rate": 3.631781132423582e-06,
      "loss": 0.0117,
      "step": 1952920
    },
    {
      "epoch": 3.196029143182577,
      "grad_norm": 0.21154555678367615,
      "learning_rate": 3.6317152402100647e-06,
      "loss": 0.0082,
      "step": 1952940
    },
    {
      "epoch": 3.1960618736212303,
      "grad_norm": 0.19034583866596222,
      "learning_rate": 3.6316493479965474e-06,
      "loss": 0.0123,
      "step": 1952960
    },
    {
      "epoch": 3.1960946040598834,
      "grad_norm": 0.1928483545780182,
      "learning_rate": 3.63158345578303e-06,
      "loss": 0.0077,
      "step": 1952980
    },
    {
      "epoch": 3.196127334498537,
      "grad_norm": 0.1402808576822281,
      "learning_rate": 3.6315175635695134e-06,
      "loss": 0.0106,
      "step": 1953000
    },
    {
      "epoch": 3.19616006493719,
      "grad_norm": 0.47748470306396484,
      "learning_rate": 3.631451671355996e-06,
      "loss": 0.008,
      "step": 1953020
    },
    {
      "epoch": 3.1961927953758438,
      "grad_norm": 0.4095120131969452,
      "learning_rate": 3.631385779142479e-06,
      "loss": 0.0134,
      "step": 1953040
    },
    {
      "epoch": 3.196225525814497,
      "grad_norm": 0.3043205440044403,
      "learning_rate": 3.6313198869289616e-06,
      "loss": 0.0086,
      "step": 1953060
    },
    {
      "epoch": 3.1962582562531505,
      "grad_norm": 0.24470077455043793,
      "learning_rate": 3.6312539947154447e-06,
      "loss": 0.0134,
      "step": 1953080
    },
    {
      "epoch": 3.1962909866918037,
      "grad_norm": 0.2434372901916504,
      "learning_rate": 3.6311881025019275e-06,
      "loss": 0.0132,
      "step": 1953100
    },
    {
      "epoch": 3.196323717130457,
      "grad_norm": 0.07139132916927338,
      "learning_rate": 3.6311222102884102e-06,
      "loss": 0.011,
      "step": 1953120
    },
    {
      "epoch": 3.1963564475691104,
      "grad_norm": 0.3799435496330261,
      "learning_rate": 3.631056318074893e-06,
      "loss": 0.0096,
      "step": 1953140
    },
    {
      "epoch": 3.1963891780077636,
      "grad_norm": 1.8260138034820557,
      "learning_rate": 3.6309904258613765e-06,
      "loss": 0.0174,
      "step": 1953160
    },
    {
      "epoch": 3.196421908446417,
      "grad_norm": 0.15169906616210938,
      "learning_rate": 3.6309245336478593e-06,
      "loss": 0.0119,
      "step": 1953180
    },
    {
      "epoch": 3.1964546388850703,
      "grad_norm": 0.2800162136554718,
      "learning_rate": 3.630858641434342e-06,
      "loss": 0.0127,
      "step": 1953200
    },
    {
      "epoch": 3.196487369323724,
      "grad_norm": 0.16806429624557495,
      "learning_rate": 3.630792749220825e-06,
      "loss": 0.0138,
      "step": 1953220
    },
    {
      "epoch": 3.196520099762377,
      "grad_norm": 0.5398693084716797,
      "learning_rate": 3.630726857007308e-06,
      "loss": 0.0162,
      "step": 1953240
    },
    {
      "epoch": 3.19655283020103,
      "grad_norm": 0.22865711152553558,
      "learning_rate": 3.6306609647937907e-06,
      "loss": 0.0078,
      "step": 1953260
    },
    {
      "epoch": 3.196585560639684,
      "grad_norm": 0.3576163351535797,
      "learning_rate": 3.6305950725802734e-06,
      "loss": 0.0115,
      "step": 1953280
    },
    {
      "epoch": 3.196618291078337,
      "grad_norm": 0.24703386425971985,
      "learning_rate": 3.6305291803667566e-06,
      "loss": 0.0116,
      "step": 1953300
    },
    {
      "epoch": 3.1966510215169905,
      "grad_norm": 0.23900780081748962,
      "learning_rate": 3.6304632881532393e-06,
      "loss": 0.009,
      "step": 1953320
    },
    {
      "epoch": 3.1966837519556437,
      "grad_norm": 0.06265545636415482,
      "learning_rate": 3.630397395939722e-06,
      "loss": 0.0095,
      "step": 1953340
    },
    {
      "epoch": 3.196716482394297,
      "grad_norm": 0.21941734850406647,
      "learning_rate": 3.6303315037262048e-06,
      "loss": 0.007,
      "step": 1953360
    },
    {
      "epoch": 3.1967492128329504,
      "grad_norm": 0.1808439940214157,
      "learning_rate": 3.630265611512688e-06,
      "loss": 0.0098,
      "step": 1953380
    },
    {
      "epoch": 3.1967819432716036,
      "grad_norm": 0.5371379852294922,
      "learning_rate": 3.6301997192991707e-06,
      "loss": 0.0159,
      "step": 1953400
    },
    {
      "epoch": 3.196814673710257,
      "grad_norm": 0.1884709596633911,
      "learning_rate": 3.6301338270856534e-06,
      "loss": 0.0113,
      "step": 1953420
    },
    {
      "epoch": 3.1968474041489103,
      "grad_norm": 0.24163846671581268,
      "learning_rate": 3.630067934872136e-06,
      "loss": 0.0108,
      "step": 1953440
    },
    {
      "epoch": 3.196880134587564,
      "grad_norm": 0.2130778282880783,
      "learning_rate": 3.630002042658619e-06,
      "loss": 0.012,
      "step": 1953460
    },
    {
      "epoch": 3.196912865026217,
      "grad_norm": 0.6198464632034302,
      "learning_rate": 3.629936150445102e-06,
      "loss": 0.0156,
      "step": 1953480
    },
    {
      "epoch": 3.1969455954648702,
      "grad_norm": 0.126051127910614,
      "learning_rate": 3.6298702582315852e-06,
      "loss": 0.0113,
      "step": 1953500
    },
    {
      "epoch": 3.196978325903524,
      "grad_norm": 0.1738063395023346,
      "learning_rate": 3.629804366018068e-06,
      "loss": 0.0087,
      "step": 1953520
    },
    {
      "epoch": 3.197011056342177,
      "grad_norm": 1.8824810981750488,
      "learning_rate": 3.629738473804551e-06,
      "loss": 0.0147,
      "step": 1953540
    },
    {
      "epoch": 3.1970437867808306,
      "grad_norm": 0.29928475618362427,
      "learning_rate": 3.629672581591034e-06,
      "loss": 0.0119,
      "step": 1953560
    },
    {
      "epoch": 3.1970765172194837,
      "grad_norm": 0.201373890042305,
      "learning_rate": 3.6296066893775166e-06,
      "loss": 0.0078,
      "step": 1953580
    },
    {
      "epoch": 3.1971092476581373,
      "grad_norm": 0.35149243474006653,
      "learning_rate": 3.6295407971639994e-06,
      "loss": 0.0063,
      "step": 1953600
    },
    {
      "epoch": 3.1971419780967905,
      "grad_norm": 0.12407998740673065,
      "learning_rate": 3.6294749049504825e-06,
      "loss": 0.0094,
      "step": 1953620
    },
    {
      "epoch": 3.1971747085354436,
      "grad_norm": 0.27880895137786865,
      "learning_rate": 3.6294090127369653e-06,
      "loss": 0.0098,
      "step": 1953640
    },
    {
      "epoch": 3.197207438974097,
      "grad_norm": 0.5180002450942993,
      "learning_rate": 3.629343120523448e-06,
      "loss": 0.0139,
      "step": 1953660
    },
    {
      "epoch": 3.1972401694127504,
      "grad_norm": 0.546165943145752,
      "learning_rate": 3.6292772283099307e-06,
      "loss": 0.0115,
      "step": 1953680
    },
    {
      "epoch": 3.197272899851404,
      "grad_norm": 0.39256200194358826,
      "learning_rate": 3.629211336096414e-06,
      "loss": 0.0174,
      "step": 1953700
    },
    {
      "epoch": 3.197305630290057,
      "grad_norm": 0.2721894383430481,
      "learning_rate": 3.6291454438828966e-06,
      "loss": 0.0138,
      "step": 1953720
    },
    {
      "epoch": 3.1973383607287107,
      "grad_norm": 0.11755866557359695,
      "learning_rate": 3.6290795516693794e-06,
      "loss": 0.0072,
      "step": 1953740
    },
    {
      "epoch": 3.197371091167364,
      "grad_norm": 0.150796040892601,
      "learning_rate": 3.629013659455862e-06,
      "loss": 0.0119,
      "step": 1953760
    },
    {
      "epoch": 3.197403821606017,
      "grad_norm": 0.931843101978302,
      "learning_rate": 3.6289477672423453e-06,
      "loss": 0.0138,
      "step": 1953780
    },
    {
      "epoch": 3.1974365520446706,
      "grad_norm": 0.49395158886909485,
      "learning_rate": 3.628881875028828e-06,
      "loss": 0.0083,
      "step": 1953800
    },
    {
      "epoch": 3.1974692824833237,
      "grad_norm": 0.13545487821102142,
      "learning_rate": 3.6288159828153108e-06,
      "loss": 0.0121,
      "step": 1953820
    },
    {
      "epoch": 3.1975020129219773,
      "grad_norm": 0.6352779269218445,
      "learning_rate": 3.6287500906017935e-06,
      "loss": 0.0176,
      "step": 1953840
    },
    {
      "epoch": 3.1975347433606305,
      "grad_norm": 0.4761514365673065,
      "learning_rate": 3.628684198388277e-06,
      "loss": 0.0086,
      "step": 1953860
    },
    {
      "epoch": 3.1975674737992836,
      "grad_norm": 0.45151427388191223,
      "learning_rate": 3.62861830617476e-06,
      "loss": 0.0139,
      "step": 1953880
    },
    {
      "epoch": 3.1976002042379372,
      "grad_norm": 0.6603025197982788,
      "learning_rate": 3.6285524139612426e-06,
      "loss": 0.012,
      "step": 1953900
    },
    {
      "epoch": 3.1976329346765904,
      "grad_norm": 0.565542995929718,
      "learning_rate": 3.6284865217477257e-06,
      "loss": 0.0127,
      "step": 1953920
    },
    {
      "epoch": 3.197665665115244,
      "grad_norm": 0.3650187849998474,
      "learning_rate": 3.6284206295342085e-06,
      "loss": 0.0098,
      "step": 1953940
    },
    {
      "epoch": 3.197698395553897,
      "grad_norm": 0.6494781970977783,
      "learning_rate": 3.6283547373206912e-06,
      "loss": 0.0097,
      "step": 1953960
    },
    {
      "epoch": 3.1977311259925507,
      "grad_norm": 0.12221153825521469,
      "learning_rate": 3.628288845107174e-06,
      "loss": 0.0063,
      "step": 1953980
    },
    {
      "epoch": 3.197763856431204,
      "grad_norm": 0.14372965693473816,
      "learning_rate": 3.6282229528936567e-06,
      "loss": 0.0073,
      "step": 1954000
    },
    {
      "epoch": 3.197796586869857,
      "grad_norm": 0.2049262374639511,
      "learning_rate": 3.62815706068014e-06,
      "loss": 0.0129,
      "step": 1954020
    },
    {
      "epoch": 3.1978293173085106,
      "grad_norm": 0.10846787691116333,
      "learning_rate": 3.6280911684666226e-06,
      "loss": 0.0076,
      "step": 1954040
    },
    {
      "epoch": 3.1978620477471638,
      "grad_norm": 0.19446231424808502,
      "learning_rate": 3.6280252762531053e-06,
      "loss": 0.011,
      "step": 1954060
    },
    {
      "epoch": 3.1978947781858174,
      "grad_norm": 0.20687204599380493,
      "learning_rate": 3.627959384039588e-06,
      "loss": 0.0083,
      "step": 1954080
    },
    {
      "epoch": 3.1979275086244705,
      "grad_norm": 0.8082832098007202,
      "learning_rate": 3.6278934918260712e-06,
      "loss": 0.0114,
      "step": 1954100
    },
    {
      "epoch": 3.197960239063124,
      "grad_norm": 0.9199805855751038,
      "learning_rate": 3.627827599612554e-06,
      "loss": 0.0088,
      "step": 1954120
    },
    {
      "epoch": 3.1979929695017772,
      "grad_norm": 1.1454811096191406,
      "learning_rate": 3.6277617073990367e-06,
      "loss": 0.0138,
      "step": 1954140
    },
    {
      "epoch": 3.1980256999404304,
      "grad_norm": 0.065765380859375,
      "learning_rate": 3.6276958151855195e-06,
      "loss": 0.015,
      "step": 1954160
    },
    {
      "epoch": 3.198058430379084,
      "grad_norm": 0.25530123710632324,
      "learning_rate": 3.6276299229720026e-06,
      "loss": 0.0124,
      "step": 1954180
    },
    {
      "epoch": 3.198091160817737,
      "grad_norm": 0.2393798977136612,
      "learning_rate": 3.6275640307584858e-06,
      "loss": 0.0107,
      "step": 1954200
    },
    {
      "epoch": 3.1981238912563907,
      "grad_norm": 0.2791222631931305,
      "learning_rate": 3.6274981385449685e-06,
      "loss": 0.0108,
      "step": 1954220
    },
    {
      "epoch": 3.198156621695044,
      "grad_norm": 0.13163651525974274,
      "learning_rate": 3.6274322463314517e-06,
      "loss": 0.007,
      "step": 1954240
    },
    {
      "epoch": 3.1981893521336975,
      "grad_norm": 0.3396068215370178,
      "learning_rate": 3.6273663541179344e-06,
      "loss": 0.0089,
      "step": 1954260
    },
    {
      "epoch": 3.1982220825723506,
      "grad_norm": 0.1835988461971283,
      "learning_rate": 3.627300461904417e-06,
      "loss": 0.0123,
      "step": 1954280
    },
    {
      "epoch": 3.198254813011004,
      "grad_norm": 0.39173877239227295,
      "learning_rate": 3.6272345696909e-06,
      "loss": 0.0091,
      "step": 1954300
    },
    {
      "epoch": 3.1982875434496574,
      "grad_norm": 0.2978435158729553,
      "learning_rate": 3.627168677477383e-06,
      "loss": 0.0148,
      "step": 1954320
    },
    {
      "epoch": 3.1983202738883105,
      "grad_norm": 0.21159936487674713,
      "learning_rate": 3.627102785263866e-06,
      "loss": 0.0082,
      "step": 1954340
    },
    {
      "epoch": 3.198353004326964,
      "grad_norm": 0.08814948052167892,
      "learning_rate": 3.6270368930503485e-06,
      "loss": 0.0124,
      "step": 1954360
    },
    {
      "epoch": 3.1983857347656173,
      "grad_norm": 0.13360480964183807,
      "learning_rate": 3.6269710008368313e-06,
      "loss": 0.0069,
      "step": 1954380
    },
    {
      "epoch": 3.198418465204271,
      "grad_norm": 0.14867417514324188,
      "learning_rate": 3.6269051086233145e-06,
      "loss": 0.0133,
      "step": 1954400
    },
    {
      "epoch": 3.198451195642924,
      "grad_norm": 0.12735004723072052,
      "learning_rate": 3.626839216409797e-06,
      "loss": 0.0089,
      "step": 1954420
    },
    {
      "epoch": 3.198483926081577,
      "grad_norm": 0.16153046488761902,
      "learning_rate": 3.62677332419628e-06,
      "loss": 0.0077,
      "step": 1954440
    },
    {
      "epoch": 3.1985166565202308,
      "grad_norm": 0.2924796938896179,
      "learning_rate": 3.6267074319827627e-06,
      "loss": 0.0119,
      "step": 1954460
    },
    {
      "epoch": 3.198549386958884,
      "grad_norm": 0.5435996651649475,
      "learning_rate": 3.6266415397692454e-06,
      "loss": 0.0169,
      "step": 1954480
    },
    {
      "epoch": 3.1985821173975375,
      "grad_norm": 0.3195866644382477,
      "learning_rate": 3.6265756475557286e-06,
      "loss": 0.0175,
      "step": 1954500
    },
    {
      "epoch": 3.1986148478361907,
      "grad_norm": 0.44396623969078064,
      "learning_rate": 3.6265097553422113e-06,
      "loss": 0.0108,
      "step": 1954520
    },
    {
      "epoch": 3.1986475782748442,
      "grad_norm": 0.1680619865655899,
      "learning_rate": 3.626443863128694e-06,
      "loss": 0.0189,
      "step": 1954540
    },
    {
      "epoch": 3.1986803087134974,
      "grad_norm": 0.23585860431194305,
      "learning_rate": 3.6263779709151776e-06,
      "loss": 0.0144,
      "step": 1954560
    },
    {
      "epoch": 3.1987130391521506,
      "grad_norm": 0.17762671411037445,
      "learning_rate": 3.6263120787016604e-06,
      "loss": 0.0115,
      "step": 1954580
    },
    {
      "epoch": 3.198745769590804,
      "grad_norm": 0.2982679307460785,
      "learning_rate": 3.626246186488143e-06,
      "loss": 0.0107,
      "step": 1954600
    },
    {
      "epoch": 3.1987785000294573,
      "grad_norm": 0.21731209754943848,
      "learning_rate": 3.626180294274626e-06,
      "loss": 0.0129,
      "step": 1954620
    },
    {
      "epoch": 3.198811230468111,
      "grad_norm": 0.5113288760185242,
      "learning_rate": 3.626114402061109e-06,
      "loss": 0.0062,
      "step": 1954640
    },
    {
      "epoch": 3.198843960906764,
      "grad_norm": 0.12029796838760376,
      "learning_rate": 3.6260485098475918e-06,
      "loss": 0.0137,
      "step": 1954660
    },
    {
      "epoch": 3.1988766913454176,
      "grad_norm": 0.2587931156158447,
      "learning_rate": 3.6259826176340745e-06,
      "loss": 0.0098,
      "step": 1954680
    },
    {
      "epoch": 3.198909421784071,
      "grad_norm": 0.5476549863815308,
      "learning_rate": 3.6259167254205572e-06,
      "loss": 0.012,
      "step": 1954700
    },
    {
      "epoch": 3.198942152222724,
      "grad_norm": 0.5395821928977966,
      "learning_rate": 3.6258508332070404e-06,
      "loss": 0.0114,
      "step": 1954720
    },
    {
      "epoch": 3.1989748826613775,
      "grad_norm": 0.29952287673950195,
      "learning_rate": 3.625784940993523e-06,
      "loss": 0.0142,
      "step": 1954740
    },
    {
      "epoch": 3.1990076131000307,
      "grad_norm": 0.1331065446138382,
      "learning_rate": 3.625719048780006e-06,
      "loss": 0.0131,
      "step": 1954760
    },
    {
      "epoch": 3.1990403435386843,
      "grad_norm": 0.3609424829483032,
      "learning_rate": 3.6256531565664886e-06,
      "loss": 0.0132,
      "step": 1954780
    },
    {
      "epoch": 3.1990730739773374,
      "grad_norm": 0.39728114008903503,
      "learning_rate": 3.6255872643529718e-06,
      "loss": 0.0079,
      "step": 1954800
    },
    {
      "epoch": 3.199105804415991,
      "grad_norm": 0.3223569691181183,
      "learning_rate": 3.6255213721394545e-06,
      "loss": 0.0135,
      "step": 1954820
    },
    {
      "epoch": 3.199138534854644,
      "grad_norm": 0.13511335849761963,
      "learning_rate": 3.6254554799259373e-06,
      "loss": 0.0122,
      "step": 1954840
    },
    {
      "epoch": 3.1991712652932973,
      "grad_norm": 0.1184004470705986,
      "learning_rate": 3.62538958771242e-06,
      "loss": 0.0114,
      "step": 1954860
    },
    {
      "epoch": 3.199203995731951,
      "grad_norm": 0.15016888082027435,
      "learning_rate": 3.6253236954989027e-06,
      "loss": 0.015,
      "step": 1954880
    },
    {
      "epoch": 3.199236726170604,
      "grad_norm": 0.18063536286354065,
      "learning_rate": 3.625257803285386e-06,
      "loss": 0.0116,
      "step": 1954900
    },
    {
      "epoch": 3.1992694566092577,
      "grad_norm": 0.13098447024822235,
      "learning_rate": 3.625191911071869e-06,
      "loss": 0.0104,
      "step": 1954920
    },
    {
      "epoch": 3.199302187047911,
      "grad_norm": 0.1577564924955368,
      "learning_rate": 3.6251260188583522e-06,
      "loss": 0.0152,
      "step": 1954940
    },
    {
      "epoch": 3.199334917486564,
      "grad_norm": 0.1146254688501358,
      "learning_rate": 3.625060126644835e-06,
      "loss": 0.0109,
      "step": 1954960
    },
    {
      "epoch": 3.1993676479252176,
      "grad_norm": 0.1508251279592514,
      "learning_rate": 3.6249942344313177e-06,
      "loss": 0.009,
      "step": 1954980
    },
    {
      "epoch": 3.1994003783638707,
      "grad_norm": 0.4821834862232208,
      "learning_rate": 3.6249283422178005e-06,
      "loss": 0.0131,
      "step": 1955000
    },
    {
      "epoch": 3.1994331088025243,
      "grad_norm": 0.15600499510765076,
      "learning_rate": 3.624862450004283e-06,
      "loss": 0.0087,
      "step": 1955020
    },
    {
      "epoch": 3.1994658392411774,
      "grad_norm": 0.4568513333797455,
      "learning_rate": 3.6247965577907664e-06,
      "loss": 0.0139,
      "step": 1955040
    },
    {
      "epoch": 3.199498569679831,
      "grad_norm": 0.419903039932251,
      "learning_rate": 3.624730665577249e-06,
      "loss": 0.0131,
      "step": 1955060
    },
    {
      "epoch": 3.199531300118484,
      "grad_norm": 0.4087727963924408,
      "learning_rate": 3.624664773363732e-06,
      "loss": 0.0133,
      "step": 1955080
    },
    {
      "epoch": 3.1995640305571373,
      "grad_norm": 0.5525506734848022,
      "learning_rate": 3.6245988811502146e-06,
      "loss": 0.011,
      "step": 1955100
    },
    {
      "epoch": 3.199596760995791,
      "grad_norm": 0.40777304768562317,
      "learning_rate": 3.6245329889366977e-06,
      "loss": 0.0085,
      "step": 1955120
    },
    {
      "epoch": 3.199629491434444,
      "grad_norm": 0.8480636477470398,
      "learning_rate": 3.6244670967231805e-06,
      "loss": 0.0138,
      "step": 1955140
    },
    {
      "epoch": 3.1996622218730977,
      "grad_norm": 0.27172690629959106,
      "learning_rate": 3.6244012045096632e-06,
      "loss": 0.006,
      "step": 1955160
    },
    {
      "epoch": 3.199694952311751,
      "grad_norm": 0.22613197565078735,
      "learning_rate": 3.624335312296146e-06,
      "loss": 0.008,
      "step": 1955180
    },
    {
      "epoch": 3.1997276827504044,
      "grad_norm": 0.5855807065963745,
      "learning_rate": 3.624269420082629e-06,
      "loss": 0.0161,
      "step": 1955200
    },
    {
      "epoch": 3.1997604131890576,
      "grad_norm": 0.5320645570755005,
      "learning_rate": 3.624203527869112e-06,
      "loss": 0.0087,
      "step": 1955220
    },
    {
      "epoch": 3.1997931436277107,
      "grad_norm": 0.14608745276927948,
      "learning_rate": 3.6241376356555946e-06,
      "loss": 0.0118,
      "step": 1955240
    },
    {
      "epoch": 3.1998258740663643,
      "grad_norm": 0.10486835241317749,
      "learning_rate": 3.624071743442078e-06,
      "loss": 0.0131,
      "step": 1955260
    },
    {
      "epoch": 3.1998586045050175,
      "grad_norm": 0.13354749977588654,
      "learning_rate": 3.624005851228561e-06,
      "loss": 0.0145,
      "step": 1955280
    },
    {
      "epoch": 3.199891334943671,
      "grad_norm": 0.8284517526626587,
      "learning_rate": 3.6239399590150437e-06,
      "loss": 0.0121,
      "step": 1955300
    },
    {
      "epoch": 3.199924065382324,
      "grad_norm": 0.1981603503227234,
      "learning_rate": 3.6238740668015264e-06,
      "loss": 0.0073,
      "step": 1955320
    },
    {
      "epoch": 3.1999567958209774,
      "grad_norm": 0.20938339829444885,
      "learning_rate": 3.6238081745880096e-06,
      "loss": 0.0095,
      "step": 1955340
    },
    {
      "epoch": 3.199989526259631,
      "grad_norm": 0.1510215550661087,
      "learning_rate": 3.6237422823744923e-06,
      "loss": 0.0106,
      "step": 1955360
    },
    {
      "epoch": 3.200022256698284,
      "grad_norm": 0.12255833297967911,
      "learning_rate": 3.623676390160975e-06,
      "loss": 0.0111,
      "step": 1955380
    },
    {
      "epoch": 3.2000549871369377,
      "grad_norm": 0.12941816449165344,
      "learning_rate": 3.623610497947458e-06,
      "loss": 0.0081,
      "step": 1955400
    },
    {
      "epoch": 3.200087717575591,
      "grad_norm": 0.12424474209547043,
      "learning_rate": 3.6235446057339405e-06,
      "loss": 0.0129,
      "step": 1955420
    },
    {
      "epoch": 3.2001204480142444,
      "grad_norm": 0.693864643573761,
      "learning_rate": 3.6234787135204237e-06,
      "loss": 0.0154,
      "step": 1955440
    },
    {
      "epoch": 3.2001531784528976,
      "grad_norm": 0.6695079803466797,
      "learning_rate": 3.6234128213069064e-06,
      "loss": 0.0104,
      "step": 1955460
    },
    {
      "epoch": 3.2001859088915507,
      "grad_norm": 0.34608566761016846,
      "learning_rate": 3.623346929093389e-06,
      "loss": 0.0098,
      "step": 1955480
    },
    {
      "epoch": 3.2002186393302043,
      "grad_norm": 0.46094924211502075,
      "learning_rate": 3.623281036879872e-06,
      "loss": 0.0111,
      "step": 1955500
    },
    {
      "epoch": 3.2002513697688575,
      "grad_norm": 0.6172283291816711,
      "learning_rate": 3.623215144666355e-06,
      "loss": 0.017,
      "step": 1955520
    },
    {
      "epoch": 3.200284100207511,
      "grad_norm": 0.10387422144412994,
      "learning_rate": 3.623149252452838e-06,
      "loss": 0.0087,
      "step": 1955540
    },
    {
      "epoch": 3.2003168306461642,
      "grad_norm": 0.4117785394191742,
      "learning_rate": 3.6230833602393206e-06,
      "loss": 0.0088,
      "step": 1955560
    },
    {
      "epoch": 3.200349561084818,
      "grad_norm": 0.40134409070014954,
      "learning_rate": 3.6230174680258033e-06,
      "loss": 0.0147,
      "step": 1955580
    },
    {
      "epoch": 3.200382291523471,
      "grad_norm": 0.4316885471343994,
      "learning_rate": 3.6229515758122865e-06,
      "loss": 0.0112,
      "step": 1955600
    },
    {
      "epoch": 3.200415021962124,
      "grad_norm": 2.3548336029052734,
      "learning_rate": 3.6228856835987696e-06,
      "loss": 0.0192,
      "step": 1955620
    },
    {
      "epoch": 3.2004477524007777,
      "grad_norm": 2.1300723552703857,
      "learning_rate": 3.6228197913852524e-06,
      "loss": 0.0136,
      "step": 1955640
    },
    {
      "epoch": 3.200480482839431,
      "grad_norm": 0.8078148365020752,
      "learning_rate": 3.6227538991717355e-06,
      "loss": 0.0095,
      "step": 1955660
    },
    {
      "epoch": 3.2005132132780845,
      "grad_norm": 0.1659172773361206,
      "learning_rate": 3.6226880069582183e-06,
      "loss": 0.0109,
      "step": 1955680
    },
    {
      "epoch": 3.2005459437167376,
      "grad_norm": 0.20142558217048645,
      "learning_rate": 3.622622114744701e-06,
      "loss": 0.0112,
      "step": 1955700
    },
    {
      "epoch": 3.200578674155391,
      "grad_norm": 0.4064771234989166,
      "learning_rate": 3.6225562225311837e-06,
      "loss": 0.0115,
      "step": 1955720
    },
    {
      "epoch": 3.2006114045940444,
      "grad_norm": 0.15283212065696716,
      "learning_rate": 3.622490330317667e-06,
      "loss": 0.0087,
      "step": 1955740
    },
    {
      "epoch": 3.2006441350326975,
      "grad_norm": 0.06217370554804802,
      "learning_rate": 3.6224244381041496e-06,
      "loss": 0.0128,
      "step": 1955760
    },
    {
      "epoch": 3.200676865471351,
      "grad_norm": 0.19499213993549347,
      "learning_rate": 3.6223585458906324e-06,
      "loss": 0.013,
      "step": 1955780
    },
    {
      "epoch": 3.2007095959100043,
      "grad_norm": 0.5971013903617859,
      "learning_rate": 3.622292653677115e-06,
      "loss": 0.008,
      "step": 1955800
    },
    {
      "epoch": 3.200742326348658,
      "grad_norm": 0.21158896386623383,
      "learning_rate": 3.6222267614635983e-06,
      "loss": 0.0133,
      "step": 1955820
    },
    {
      "epoch": 3.200775056787311,
      "grad_norm": 0.2643996775150299,
      "learning_rate": 3.622160869250081e-06,
      "loss": 0.009,
      "step": 1955840
    },
    {
      "epoch": 3.2008077872259646,
      "grad_norm": 0.3414054214954376,
      "learning_rate": 3.6220949770365638e-06,
      "loss": 0.0089,
      "step": 1955860
    },
    {
      "epoch": 3.2008405176646177,
      "grad_norm": 0.3197036683559418,
      "learning_rate": 3.6220290848230465e-06,
      "loss": 0.0099,
      "step": 1955880
    },
    {
      "epoch": 3.200873248103271,
      "grad_norm": 0.5374742150306702,
      "learning_rate": 3.6219631926095293e-06,
      "loss": 0.019,
      "step": 1955900
    },
    {
      "epoch": 3.2009059785419245,
      "grad_norm": 0.5182688236236572,
      "learning_rate": 3.6218973003960124e-06,
      "loss": 0.0123,
      "step": 1955920
    },
    {
      "epoch": 3.2009387089805776,
      "grad_norm": 0.6557507514953613,
      "learning_rate": 3.621831408182495e-06,
      "loss": 0.0108,
      "step": 1955940
    },
    {
      "epoch": 3.2009714394192312,
      "grad_norm": 0.4523398280143738,
      "learning_rate": 3.6217655159689783e-06,
      "loss": 0.0094,
      "step": 1955960
    },
    {
      "epoch": 3.2010041698578844,
      "grad_norm": 0.5163632035255432,
      "learning_rate": 3.6216996237554615e-06,
      "loss": 0.0091,
      "step": 1955980
    },
    {
      "epoch": 3.201036900296538,
      "grad_norm": 0.4156133234500885,
      "learning_rate": 3.6216337315419442e-06,
      "loss": 0.0096,
      "step": 1956000
    },
    {
      "epoch": 3.201069630735191,
      "grad_norm": 0.4070368707180023,
      "learning_rate": 3.621567839328427e-06,
      "loss": 0.0154,
      "step": 1956020
    },
    {
      "epoch": 3.2011023611738443,
      "grad_norm": 0.12746821343898773,
      "learning_rate": 3.6215019471149097e-06,
      "loss": 0.013,
      "step": 1956040
    },
    {
      "epoch": 3.201135091612498,
      "grad_norm": 0.09741218388080597,
      "learning_rate": 3.621436054901393e-06,
      "loss": 0.0082,
      "step": 1956060
    },
    {
      "epoch": 3.201167822051151,
      "grad_norm": 0.17438241839408875,
      "learning_rate": 3.6213701626878756e-06,
      "loss": 0.0134,
      "step": 1956080
    },
    {
      "epoch": 3.2012005524898046,
      "grad_norm": 0.17591708898544312,
      "learning_rate": 3.6213042704743583e-06,
      "loss": 0.01,
      "step": 1956100
    },
    {
      "epoch": 3.2012332829284578,
      "grad_norm": 0.15167145431041718,
      "learning_rate": 3.621238378260841e-06,
      "loss": 0.0124,
      "step": 1956120
    },
    {
      "epoch": 3.2012660133671114,
      "grad_norm": 0.21255053579807281,
      "learning_rate": 3.6211724860473242e-06,
      "loss": 0.0101,
      "step": 1956140
    },
    {
      "epoch": 3.2012987438057645,
      "grad_norm": 0.28433746099472046,
      "learning_rate": 3.621106593833807e-06,
      "loss": 0.0117,
      "step": 1956160
    },
    {
      "epoch": 3.2013314742444177,
      "grad_norm": 0.3179466724395752,
      "learning_rate": 3.6210407016202897e-06,
      "loss": 0.0151,
      "step": 1956180
    },
    {
      "epoch": 3.2013642046830713,
      "grad_norm": 0.19347180426120758,
      "learning_rate": 3.6209748094067725e-06,
      "loss": 0.0139,
      "step": 1956200
    },
    {
      "epoch": 3.2013969351217244,
      "grad_norm": 0.27203884720802307,
      "learning_rate": 3.6209089171932556e-06,
      "loss": 0.0095,
      "step": 1956220
    },
    {
      "epoch": 3.201429665560378,
      "grad_norm": 0.3053396940231323,
      "learning_rate": 3.6208430249797384e-06,
      "loss": 0.0135,
      "step": 1956240
    },
    {
      "epoch": 3.201462395999031,
      "grad_norm": 3.28187894821167,
      "learning_rate": 3.620777132766221e-06,
      "loss": 0.0148,
      "step": 1956260
    },
    {
      "epoch": 3.2014951264376847,
      "grad_norm": 0.3463050425052643,
      "learning_rate": 3.620711240552704e-06,
      "loss": 0.0126,
      "step": 1956280
    },
    {
      "epoch": 3.201527856876338,
      "grad_norm": 0.29043489694595337,
      "learning_rate": 3.6206453483391866e-06,
      "loss": 0.0145,
      "step": 1956300
    },
    {
      "epoch": 3.201560587314991,
      "grad_norm": 0.029702456668019295,
      "learning_rate": 3.62057945612567e-06,
      "loss": 0.0133,
      "step": 1956320
    },
    {
      "epoch": 3.2015933177536446,
      "grad_norm": 0.14509111642837524,
      "learning_rate": 3.620513563912153e-06,
      "loss": 0.01,
      "step": 1956340
    },
    {
      "epoch": 3.201626048192298,
      "grad_norm": 0.31500744819641113,
      "learning_rate": 3.620447671698636e-06,
      "loss": 0.011,
      "step": 1956360
    },
    {
      "epoch": 3.2016587786309514,
      "grad_norm": 0.17966081202030182,
      "learning_rate": 3.620381779485119e-06,
      "loss": 0.014,
      "step": 1956380
    },
    {
      "epoch": 3.2016915090696045,
      "grad_norm": 0.10140976309776306,
      "learning_rate": 3.6203158872716016e-06,
      "loss": 0.0114,
      "step": 1956400
    },
    {
      "epoch": 3.2017242395082577,
      "grad_norm": 3.163668155670166,
      "learning_rate": 3.6202499950580843e-06,
      "loss": 0.0133,
      "step": 1956420
    },
    {
      "epoch": 3.2017569699469113,
      "grad_norm": 0.5935786366462708,
      "learning_rate": 3.620184102844567e-06,
      "loss": 0.0138,
      "step": 1956440
    },
    {
      "epoch": 3.2017897003855644,
      "grad_norm": 0.312722384929657,
      "learning_rate": 3.62011821063105e-06,
      "loss": 0.0105,
      "step": 1956460
    },
    {
      "epoch": 3.201822430824218,
      "grad_norm": 0.4124906361103058,
      "learning_rate": 3.620052318417533e-06,
      "loss": 0.0094,
      "step": 1956480
    },
    {
      "epoch": 3.201855161262871,
      "grad_norm": 0.25722017884254456,
      "learning_rate": 3.6199864262040157e-06,
      "loss": 0.0074,
      "step": 1956500
    },
    {
      "epoch": 3.2018878917015248,
      "grad_norm": 0.22729645669460297,
      "learning_rate": 3.6199205339904984e-06,
      "loss": 0.0101,
      "step": 1956520
    },
    {
      "epoch": 3.201920622140178,
      "grad_norm": 0.40052223205566406,
      "learning_rate": 3.6198546417769816e-06,
      "loss": 0.0111,
      "step": 1956540
    },
    {
      "epoch": 3.201953352578831,
      "grad_norm": 0.266467422246933,
      "learning_rate": 3.6197887495634643e-06,
      "loss": 0.0095,
      "step": 1956560
    },
    {
      "epoch": 3.2019860830174847,
      "grad_norm": 0.33580005168914795,
      "learning_rate": 3.619722857349947e-06,
      "loss": 0.0121,
      "step": 1956580
    },
    {
      "epoch": 3.202018813456138,
      "grad_norm": 0.22131264209747314,
      "learning_rate": 3.61965696513643e-06,
      "loss": 0.0114,
      "step": 1956600
    },
    {
      "epoch": 3.2020515438947914,
      "grad_norm": 0.28058138489723206,
      "learning_rate": 3.619591072922913e-06,
      "loss": 0.0155,
      "step": 1956620
    },
    {
      "epoch": 3.2020842743334446,
      "grad_norm": 0.3884337842464447,
      "learning_rate": 3.6195251807093957e-06,
      "loss": 0.0111,
      "step": 1956640
    },
    {
      "epoch": 3.202117004772098,
      "grad_norm": 0.3284865617752075,
      "learning_rate": 3.6194592884958784e-06,
      "loss": 0.0131,
      "step": 1956660
    },
    {
      "epoch": 3.2021497352107513,
      "grad_norm": 0.2358780950307846,
      "learning_rate": 3.619393396282362e-06,
      "loss": 0.0136,
      "step": 1956680
    },
    {
      "epoch": 3.2021824656494045,
      "grad_norm": 0.2116440236568451,
      "learning_rate": 3.6193275040688448e-06,
      "loss": 0.0075,
      "step": 1956700
    },
    {
      "epoch": 3.202215196088058,
      "grad_norm": 0.17936395108699799,
      "learning_rate": 3.6192616118553275e-06,
      "loss": 0.0123,
      "step": 1956720
    },
    {
      "epoch": 3.202247926526711,
      "grad_norm": 0.25161316990852356,
      "learning_rate": 3.6191957196418102e-06,
      "loss": 0.0106,
      "step": 1956740
    },
    {
      "epoch": 3.202280656965365,
      "grad_norm": 0.253852516412735,
      "learning_rate": 3.6191298274282934e-06,
      "loss": 0.0138,
      "step": 1956760
    },
    {
      "epoch": 3.202313387404018,
      "grad_norm": 0.08380851149559021,
      "learning_rate": 3.619063935214776e-06,
      "loss": 0.0067,
      "step": 1956780
    },
    {
      "epoch": 3.202346117842671,
      "grad_norm": 0.4243015944957733,
      "learning_rate": 3.618998043001259e-06,
      "loss": 0.0128,
      "step": 1956800
    },
    {
      "epoch": 3.2023788482813247,
      "grad_norm": 0.28605353832244873,
      "learning_rate": 3.6189321507877416e-06,
      "loss": 0.0098,
      "step": 1956820
    },
    {
      "epoch": 3.202411578719978,
      "grad_norm": 0.19109492003917694,
      "learning_rate": 3.6188662585742244e-06,
      "loss": 0.0096,
      "step": 1956840
    },
    {
      "epoch": 3.2024443091586314,
      "grad_norm": 0.35607582330703735,
      "learning_rate": 3.6188003663607075e-06,
      "loss": 0.0106,
      "step": 1956860
    },
    {
      "epoch": 3.2024770395972846,
      "grad_norm": 0.10943858325481415,
      "learning_rate": 3.6187344741471903e-06,
      "loss": 0.0107,
      "step": 1956880
    },
    {
      "epoch": 3.202509770035938,
      "grad_norm": 0.21194098889827728,
      "learning_rate": 3.618668581933673e-06,
      "loss": 0.0103,
      "step": 1956900
    },
    {
      "epoch": 3.2025425004745913,
      "grad_norm": 0.07588236033916473,
      "learning_rate": 3.6186026897201558e-06,
      "loss": 0.0097,
      "step": 1956920
    },
    {
      "epoch": 3.2025752309132445,
      "grad_norm": 0.6492646932601929,
      "learning_rate": 3.618536797506639e-06,
      "loss": 0.0098,
      "step": 1956940
    },
    {
      "epoch": 3.202607961351898,
      "grad_norm": 0.13371096551418304,
      "learning_rate": 3.6184709052931217e-06,
      "loss": 0.0181,
      "step": 1956960
    },
    {
      "epoch": 3.202640691790551,
      "grad_norm": 0.08787903189659119,
      "learning_rate": 3.6184050130796044e-06,
      "loss": 0.0083,
      "step": 1956980
    },
    {
      "epoch": 3.202673422229205,
      "grad_norm": 0.4479343295097351,
      "learning_rate": 3.618339120866087e-06,
      "loss": 0.0118,
      "step": 1957000
    },
    {
      "epoch": 3.202706152667858,
      "grad_norm": 0.7545298337936401,
      "learning_rate": 3.6182732286525707e-06,
      "loss": 0.0122,
      "step": 1957020
    },
    {
      "epoch": 3.2027388831065116,
      "grad_norm": 0.46710026264190674,
      "learning_rate": 3.6182073364390535e-06,
      "loss": 0.0148,
      "step": 1957040
    },
    {
      "epoch": 3.2027716135451647,
      "grad_norm": 0.5108811855316162,
      "learning_rate": 3.618141444225536e-06,
      "loss": 0.0119,
      "step": 1957060
    },
    {
      "epoch": 3.202804343983818,
      "grad_norm": 0.6456217765808105,
      "learning_rate": 3.6180755520120194e-06,
      "loss": 0.0172,
      "step": 1957080
    },
    {
      "epoch": 3.2028370744224715,
      "grad_norm": 0.27357080578804016,
      "learning_rate": 3.618009659798502e-06,
      "loss": 0.014,
      "step": 1957100
    },
    {
      "epoch": 3.2028698048611246,
      "grad_norm": 1.0063204765319824,
      "learning_rate": 3.617943767584985e-06,
      "loss": 0.0138,
      "step": 1957120
    },
    {
      "epoch": 3.202902535299778,
      "grad_norm": 0.1416166126728058,
      "learning_rate": 3.6178778753714676e-06,
      "loss": 0.0095,
      "step": 1957140
    },
    {
      "epoch": 3.2029352657384313,
      "grad_norm": 0.1133209839463234,
      "learning_rate": 3.6178119831579507e-06,
      "loss": 0.011,
      "step": 1957160
    },
    {
      "epoch": 3.202967996177085,
      "grad_norm": 0.7299235463142395,
      "learning_rate": 3.6177460909444335e-06,
      "loss": 0.0125,
      "step": 1957180
    },
    {
      "epoch": 3.203000726615738,
      "grad_norm": 0.31780609488487244,
      "learning_rate": 3.6176801987309162e-06,
      "loss": 0.0143,
      "step": 1957200
    },
    {
      "epoch": 3.2030334570543912,
      "grad_norm": 0.12307845801115036,
      "learning_rate": 3.617614306517399e-06,
      "loss": 0.0126,
      "step": 1957220
    },
    {
      "epoch": 3.203066187493045,
      "grad_norm": 0.1004413515329361,
      "learning_rate": 3.617548414303882e-06,
      "loss": 0.0097,
      "step": 1957240
    },
    {
      "epoch": 3.203098917931698,
      "grad_norm": 0.3410341143608093,
      "learning_rate": 3.617482522090365e-06,
      "loss": 0.0094,
      "step": 1957260
    },
    {
      "epoch": 3.2031316483703516,
      "grad_norm": 0.21270784735679626,
      "learning_rate": 3.6174166298768476e-06,
      "loss": 0.0071,
      "step": 1957280
    },
    {
      "epoch": 3.2031643788090047,
      "grad_norm": 0.3299616277217865,
      "learning_rate": 3.6173507376633303e-06,
      "loss": 0.0127,
      "step": 1957300
    },
    {
      "epoch": 3.2031971092476583,
      "grad_norm": 0.42991548776626587,
      "learning_rate": 3.617284845449813e-06,
      "loss": 0.0173,
      "step": 1957320
    },
    {
      "epoch": 3.2032298396863115,
      "grad_norm": 0.177837073802948,
      "learning_rate": 3.6172189532362963e-06,
      "loss": 0.0102,
      "step": 1957340
    },
    {
      "epoch": 3.2032625701249646,
      "grad_norm": 0.6334619522094727,
      "learning_rate": 3.617153061022779e-06,
      "loss": 0.0089,
      "step": 1957360
    },
    {
      "epoch": 3.203295300563618,
      "grad_norm": 0.09331002831459045,
      "learning_rate": 3.617087168809262e-06,
      "loss": 0.0133,
      "step": 1957380
    },
    {
      "epoch": 3.2033280310022714,
      "grad_norm": 0.27548256516456604,
      "learning_rate": 3.6170212765957453e-06,
      "loss": 0.0104,
      "step": 1957400
    },
    {
      "epoch": 3.203360761440925,
      "grad_norm": 2.270533323287964,
      "learning_rate": 3.616955384382228e-06,
      "loss": 0.0108,
      "step": 1957420
    },
    {
      "epoch": 3.203393491879578,
      "grad_norm": 0.18154752254486084,
      "learning_rate": 3.616889492168711e-06,
      "loss": 0.014,
      "step": 1957440
    },
    {
      "epoch": 3.2034262223182317,
      "grad_norm": 0.15838484466075897,
      "learning_rate": 3.6168235999551935e-06,
      "loss": 0.0115,
      "step": 1957460
    },
    {
      "epoch": 3.203458952756885,
      "grad_norm": 0.6412448883056641,
      "learning_rate": 3.6167577077416767e-06,
      "loss": 0.0197,
      "step": 1957480
    },
    {
      "epoch": 3.203491683195538,
      "grad_norm": 0.3367631733417511,
      "learning_rate": 3.6166918155281594e-06,
      "loss": 0.0108,
      "step": 1957500
    },
    {
      "epoch": 3.2035244136341916,
      "grad_norm": 0.47885286808013916,
      "learning_rate": 3.616625923314642e-06,
      "loss": 0.015,
      "step": 1957520
    },
    {
      "epoch": 3.2035571440728448,
      "grad_norm": 0.28970494866371155,
      "learning_rate": 3.616560031101125e-06,
      "loss": 0.012,
      "step": 1957540
    },
    {
      "epoch": 3.2035898745114983,
      "grad_norm": 0.21828489005565643,
      "learning_rate": 3.616494138887608e-06,
      "loss": 0.0091,
      "step": 1957560
    },
    {
      "epoch": 3.2036226049501515,
      "grad_norm": 0.20069250464439392,
      "learning_rate": 3.616428246674091e-06,
      "loss": 0.0114,
      "step": 1957580
    },
    {
      "epoch": 3.203655335388805,
      "grad_norm": 0.37832483649253845,
      "learning_rate": 3.6163623544605736e-06,
      "loss": 0.0115,
      "step": 1957600
    },
    {
      "epoch": 3.2036880658274582,
      "grad_norm": 0.06786718964576721,
      "learning_rate": 3.6162964622470563e-06,
      "loss": 0.0072,
      "step": 1957620
    },
    {
      "epoch": 3.2037207962661114,
      "grad_norm": 0.2853250503540039,
      "learning_rate": 3.6162305700335395e-06,
      "loss": 0.0104,
      "step": 1957640
    },
    {
      "epoch": 3.203753526704765,
      "grad_norm": 0.18938586115837097,
      "learning_rate": 3.616164677820022e-06,
      "loss": 0.0121,
      "step": 1957660
    },
    {
      "epoch": 3.203786257143418,
      "grad_norm": 0.24192427098751068,
      "learning_rate": 3.616098785606505e-06,
      "loss": 0.0085,
      "step": 1957680
    },
    {
      "epoch": 3.2038189875820717,
      "grad_norm": 0.5944936275482178,
      "learning_rate": 3.6160328933929877e-06,
      "loss": 0.0127,
      "step": 1957700
    },
    {
      "epoch": 3.203851718020725,
      "grad_norm": 0.4278969466686249,
      "learning_rate": 3.6159670011794713e-06,
      "loss": 0.0131,
      "step": 1957720
    },
    {
      "epoch": 3.2038844484593785,
      "grad_norm": 0.35063478350639343,
      "learning_rate": 3.615901108965954e-06,
      "loss": 0.0113,
      "step": 1957740
    },
    {
      "epoch": 3.2039171788980316,
      "grad_norm": 0.13129973411560059,
      "learning_rate": 3.6158352167524368e-06,
      "loss": 0.017,
      "step": 1957760
    },
    {
      "epoch": 3.2039499093366848,
      "grad_norm": 0.3182528614997864,
      "learning_rate": 3.61576932453892e-06,
      "loss": 0.0108,
      "step": 1957780
    },
    {
      "epoch": 3.2039826397753384,
      "grad_norm": 0.14656853675842285,
      "learning_rate": 3.6157034323254027e-06,
      "loss": 0.0083,
      "step": 1957800
    },
    {
      "epoch": 3.2040153702139915,
      "grad_norm": 0.39402273297309875,
      "learning_rate": 3.6156375401118854e-06,
      "loss": 0.0091,
      "step": 1957820
    },
    {
      "epoch": 3.204048100652645,
      "grad_norm": 0.13610291481018066,
      "learning_rate": 3.615571647898368e-06,
      "loss": 0.0144,
      "step": 1957840
    },
    {
      "epoch": 3.2040808310912983,
      "grad_norm": 0.11947568506002426,
      "learning_rate": 3.615505755684851e-06,
      "loss": 0.012,
      "step": 1957860
    },
    {
      "epoch": 3.204113561529952,
      "grad_norm": 0.4887436628341675,
      "learning_rate": 3.615439863471334e-06,
      "loss": 0.0103,
      "step": 1957880
    },
    {
      "epoch": 3.204146291968605,
      "grad_norm": 0.3154924213886261,
      "learning_rate": 3.6153739712578168e-06,
      "loss": 0.0069,
      "step": 1957900
    },
    {
      "epoch": 3.204179022407258,
      "grad_norm": 0.5858681797981262,
      "learning_rate": 3.6153080790442995e-06,
      "loss": 0.0111,
      "step": 1957920
    },
    {
      "epoch": 3.2042117528459118,
      "grad_norm": 0.2809859812259674,
      "learning_rate": 3.6152421868307823e-06,
      "loss": 0.0124,
      "step": 1957940
    },
    {
      "epoch": 3.204244483284565,
      "grad_norm": 0.32225942611694336,
      "learning_rate": 3.6151762946172654e-06,
      "loss": 0.011,
      "step": 1957960
    },
    {
      "epoch": 3.2042772137232185,
      "grad_norm": 0.20992013812065125,
      "learning_rate": 3.615110402403748e-06,
      "loss": 0.0119,
      "step": 1957980
    },
    {
      "epoch": 3.2043099441618716,
      "grad_norm": 0.30740886926651,
      "learning_rate": 3.615044510190231e-06,
      "loss": 0.0108,
      "step": 1958000
    },
    {
      "epoch": 3.204342674600525,
      "grad_norm": 0.5360725522041321,
      "learning_rate": 3.6149786179767136e-06,
      "loss": 0.0119,
      "step": 1958020
    },
    {
      "epoch": 3.2043754050391784,
      "grad_norm": 0.38249197602272034,
      "learning_rate": 3.614912725763197e-06,
      "loss": 0.012,
      "step": 1958040
    },
    {
      "epoch": 3.2044081354778315,
      "grad_norm": 0.5982785224914551,
      "learning_rate": 3.6148468335496795e-06,
      "loss": 0.0159,
      "step": 1958060
    },
    {
      "epoch": 3.204440865916485,
      "grad_norm": 0.14317697286605835,
      "learning_rate": 3.6147809413361627e-06,
      "loss": 0.0134,
      "step": 1958080
    },
    {
      "epoch": 3.2044735963551383,
      "grad_norm": 0.2216842621564865,
      "learning_rate": 3.614715049122646e-06,
      "loss": 0.0126,
      "step": 1958100
    },
    {
      "epoch": 3.204506326793792,
      "grad_norm": 0.1690177023410797,
      "learning_rate": 3.6146491569091286e-06,
      "loss": 0.0132,
      "step": 1958120
    },
    {
      "epoch": 3.204539057232445,
      "grad_norm": 0.15385545790195465,
      "learning_rate": 3.6145832646956113e-06,
      "loss": 0.0109,
      "step": 1958140
    },
    {
      "epoch": 3.204571787671098,
      "grad_norm": 0.2708614468574524,
      "learning_rate": 3.614517372482094e-06,
      "loss": 0.0189,
      "step": 1958160
    },
    {
      "epoch": 3.2046045181097518,
      "grad_norm": 0.3430209755897522,
      "learning_rate": 3.6144514802685773e-06,
      "loss": 0.0136,
      "step": 1958180
    },
    {
      "epoch": 3.204637248548405,
      "grad_norm": 0.39474767446517944,
      "learning_rate": 3.61438558805506e-06,
      "loss": 0.0133,
      "step": 1958200
    },
    {
      "epoch": 3.2046699789870585,
      "grad_norm": 0.20940503478050232,
      "learning_rate": 3.6143196958415427e-06,
      "loss": 0.0105,
      "step": 1958220
    },
    {
      "epoch": 3.2047027094257117,
      "grad_norm": 0.1314792037010193,
      "learning_rate": 3.6142538036280255e-06,
      "loss": 0.008,
      "step": 1958240
    },
    {
      "epoch": 3.2047354398643653,
      "grad_norm": 0.32039839029312134,
      "learning_rate": 3.614187911414508e-06,
      "loss": 0.0084,
      "step": 1958260
    },
    {
      "epoch": 3.2047681703030184,
      "grad_norm": 0.4347485601902008,
      "learning_rate": 3.6141220192009914e-06,
      "loss": 0.0125,
      "step": 1958280
    },
    {
      "epoch": 3.2048009007416716,
      "grad_norm": 0.23153887689113617,
      "learning_rate": 3.614056126987474e-06,
      "loss": 0.0088,
      "step": 1958300
    },
    {
      "epoch": 3.204833631180325,
      "grad_norm": 0.5249821543693542,
      "learning_rate": 3.613990234773957e-06,
      "loss": 0.0124,
      "step": 1958320
    },
    {
      "epoch": 3.2048663616189783,
      "grad_norm": 0.3207341134548187,
      "learning_rate": 3.6139243425604396e-06,
      "loss": 0.0112,
      "step": 1958340
    },
    {
      "epoch": 3.204899092057632,
      "grad_norm": 0.5827580690383911,
      "learning_rate": 3.6138584503469228e-06,
      "loss": 0.0154,
      "step": 1958360
    },
    {
      "epoch": 3.204931822496285,
      "grad_norm": 0.3407179117202759,
      "learning_rate": 3.6137925581334055e-06,
      "loss": 0.0121,
      "step": 1958380
    },
    {
      "epoch": 3.204964552934938,
      "grad_norm": 0.47359055280685425,
      "learning_rate": 3.6137266659198882e-06,
      "loss": 0.0115,
      "step": 1958400
    },
    {
      "epoch": 3.204997283373592,
      "grad_norm": 0.23039431869983673,
      "learning_rate": 3.613660773706371e-06,
      "loss": 0.0102,
      "step": 1958420
    },
    {
      "epoch": 3.205030013812245,
      "grad_norm": 0.21203339099884033,
      "learning_rate": 3.6135948814928546e-06,
      "loss": 0.011,
      "step": 1958440
    },
    {
      "epoch": 3.2050627442508985,
      "grad_norm": 0.2668130099773407,
      "learning_rate": 3.6135289892793373e-06,
      "loss": 0.009,
      "step": 1958460
    },
    {
      "epoch": 3.2050954746895517,
      "grad_norm": 0.24874424934387207,
      "learning_rate": 3.61346309706582e-06,
      "loss": 0.0153,
      "step": 1958480
    },
    {
      "epoch": 3.2051282051282053,
      "grad_norm": 0.15353815257549286,
      "learning_rate": 3.613397204852303e-06,
      "loss": 0.0083,
      "step": 1958500
    },
    {
      "epoch": 3.2051609355668584,
      "grad_norm": 0.19351671636104584,
      "learning_rate": 3.613331312638786e-06,
      "loss": 0.0096,
      "step": 1958520
    },
    {
      "epoch": 3.2051936660055116,
      "grad_norm": 0.39014020562171936,
      "learning_rate": 3.6132654204252687e-06,
      "loss": 0.0135,
      "step": 1958540
    },
    {
      "epoch": 3.205226396444165,
      "grad_norm": 0.1985551118850708,
      "learning_rate": 3.6131995282117514e-06,
      "loss": 0.0097,
      "step": 1958560
    },
    {
      "epoch": 3.2052591268828183,
      "grad_norm": 0.31113100051879883,
      "learning_rate": 3.6131336359982346e-06,
      "loss": 0.0076,
      "step": 1958580
    },
    {
      "epoch": 3.205291857321472,
      "grad_norm": 0.21983470022678375,
      "learning_rate": 3.6130677437847173e-06,
      "loss": 0.0156,
      "step": 1958600
    },
    {
      "epoch": 3.205324587760125,
      "grad_norm": 0.187620148062706,
      "learning_rate": 3.6130018515712e-06,
      "loss": 0.0162,
      "step": 1958620
    },
    {
      "epoch": 3.2053573181987787,
      "grad_norm": 0.22595305740833282,
      "learning_rate": 3.612935959357683e-06,
      "loss": 0.0104,
      "step": 1958640
    },
    {
      "epoch": 3.205390048637432,
      "grad_norm": 0.16088280081748962,
      "learning_rate": 3.612870067144166e-06,
      "loss": 0.0204,
      "step": 1958660
    },
    {
      "epoch": 3.205422779076085,
      "grad_norm": 0.40413591265678406,
      "learning_rate": 3.6128041749306487e-06,
      "loss": 0.0157,
      "step": 1958680
    },
    {
      "epoch": 3.2054555095147386,
      "grad_norm": 0.39495012164115906,
      "learning_rate": 3.6127382827171314e-06,
      "loss": 0.0084,
      "step": 1958700
    },
    {
      "epoch": 3.2054882399533917,
      "grad_norm": 0.16363073885440826,
      "learning_rate": 3.612672390503614e-06,
      "loss": 0.0107,
      "step": 1958720
    },
    {
      "epoch": 3.2055209703920453,
      "grad_norm": 0.12212617695331573,
      "learning_rate": 3.612606498290097e-06,
      "loss": 0.009,
      "step": 1958740
    },
    {
      "epoch": 3.2055537008306985,
      "grad_norm": 0.19979029893875122,
      "learning_rate": 3.61254060607658e-06,
      "loss": 0.0116,
      "step": 1958760
    },
    {
      "epoch": 3.205586431269352,
      "grad_norm": 0.21388034522533417,
      "learning_rate": 3.6124747138630633e-06,
      "loss": 0.0111,
      "step": 1958780
    },
    {
      "epoch": 3.205619161708005,
      "grad_norm": 0.2245660126209259,
      "learning_rate": 3.612408821649546e-06,
      "loss": 0.0095,
      "step": 1958800
    },
    {
      "epoch": 3.2056518921466584,
      "grad_norm": 0.3254614770412445,
      "learning_rate": 3.612342929436029e-06,
      "loss": 0.013,
      "step": 1958820
    },
    {
      "epoch": 3.205684622585312,
      "grad_norm": 0.3340149521827698,
      "learning_rate": 3.612277037222512e-06,
      "loss": 0.0141,
      "step": 1958840
    },
    {
      "epoch": 3.205717353023965,
      "grad_norm": 0.7352213859558105,
      "learning_rate": 3.6122111450089946e-06,
      "loss": 0.0123,
      "step": 1958860
    },
    {
      "epoch": 3.2057500834626187,
      "grad_norm": 0.8649948239326477,
      "learning_rate": 3.6121452527954774e-06,
      "loss": 0.0111,
      "step": 1958880
    },
    {
      "epoch": 3.205782813901272,
      "grad_norm": 0.35988086462020874,
      "learning_rate": 3.6120793605819605e-06,
      "loss": 0.0123,
      "step": 1958900
    },
    {
      "epoch": 3.2058155443399254,
      "grad_norm": 0.6286634206771851,
      "learning_rate": 3.6120134683684433e-06,
      "loss": 0.0158,
      "step": 1958920
    },
    {
      "epoch": 3.2058482747785786,
      "grad_norm": 0.25384604930877686,
      "learning_rate": 3.611947576154926e-06,
      "loss": 0.011,
      "step": 1958940
    },
    {
      "epoch": 3.2058810052172317,
      "grad_norm": 0.22227436304092407,
      "learning_rate": 3.6118816839414088e-06,
      "loss": 0.0088,
      "step": 1958960
    },
    {
      "epoch": 3.2059137356558853,
      "grad_norm": 0.3866155743598938,
      "learning_rate": 3.611815791727892e-06,
      "loss": 0.0138,
      "step": 1958980
    },
    {
      "epoch": 3.2059464660945385,
      "grad_norm": 0.29709574580192566,
      "learning_rate": 3.6117498995143747e-06,
      "loss": 0.0104,
      "step": 1959000
    },
    {
      "epoch": 3.205979196533192,
      "grad_norm": 0.23731842637062073,
      "learning_rate": 3.6116840073008574e-06,
      "loss": 0.0136,
      "step": 1959020
    },
    {
      "epoch": 3.2060119269718452,
      "grad_norm": 0.07121361792087555,
      "learning_rate": 3.61161811508734e-06,
      "loss": 0.0213,
      "step": 1959040
    },
    {
      "epoch": 3.206044657410499,
      "grad_norm": 0.304000586271286,
      "learning_rate": 3.6115522228738233e-06,
      "loss": 0.009,
      "step": 1959060
    },
    {
      "epoch": 3.206077387849152,
      "grad_norm": 0.1219790130853653,
      "learning_rate": 3.611486330660306e-06,
      "loss": 0.009,
      "step": 1959080
    },
    {
      "epoch": 3.206110118287805,
      "grad_norm": 0.20156866312026978,
      "learning_rate": 3.6114204384467888e-06,
      "loss": 0.0152,
      "step": 1959100
    },
    {
      "epoch": 3.2061428487264587,
      "grad_norm": 0.28354403376579285,
      "learning_rate": 3.6113545462332715e-06,
      "loss": 0.012,
      "step": 1959120
    },
    {
      "epoch": 3.206175579165112,
      "grad_norm": 0.38944029808044434,
      "learning_rate": 3.611288654019755e-06,
      "loss": 0.0119,
      "step": 1959140
    },
    {
      "epoch": 3.2062083096037655,
      "grad_norm": 0.16047805547714233,
      "learning_rate": 3.611222761806238e-06,
      "loss": 0.0139,
      "step": 1959160
    },
    {
      "epoch": 3.2062410400424186,
      "grad_norm": 0.261222779750824,
      "learning_rate": 3.6111568695927206e-06,
      "loss": 0.0122,
      "step": 1959180
    },
    {
      "epoch": 3.206273770481072,
      "grad_norm": 0.1221439316868782,
      "learning_rate": 3.6110909773792038e-06,
      "loss": 0.0139,
      "step": 1959200
    },
    {
      "epoch": 3.2063065009197254,
      "grad_norm": 0.667097806930542,
      "learning_rate": 3.6110250851656865e-06,
      "loss": 0.0156,
      "step": 1959220
    },
    {
      "epoch": 3.2063392313583785,
      "grad_norm": 0.3953688442707062,
      "learning_rate": 3.6109591929521692e-06,
      "loss": 0.0129,
      "step": 1959240
    },
    {
      "epoch": 3.206371961797032,
      "grad_norm": 0.1694246381521225,
      "learning_rate": 3.610893300738652e-06,
      "loss": 0.0148,
      "step": 1959260
    },
    {
      "epoch": 3.2064046922356852,
      "grad_norm": 0.7065152525901794,
      "learning_rate": 3.6108274085251347e-06,
      "loss": 0.0154,
      "step": 1959280
    },
    {
      "epoch": 3.206437422674339,
      "grad_norm": 0.6530973315238953,
      "learning_rate": 3.610761516311618e-06,
      "loss": 0.0112,
      "step": 1959300
    },
    {
      "epoch": 3.206470153112992,
      "grad_norm": 0.12859496474266052,
      "learning_rate": 3.6106956240981006e-06,
      "loss": 0.008,
      "step": 1959320
    },
    {
      "epoch": 3.2065028835516456,
      "grad_norm": 0.12886521220207214,
      "learning_rate": 3.6106297318845834e-06,
      "loss": 0.0121,
      "step": 1959340
    },
    {
      "epoch": 3.2065356139902987,
      "grad_norm": 0.08876290172338486,
      "learning_rate": 3.610563839671066e-06,
      "loss": 0.0109,
      "step": 1959360
    },
    {
      "epoch": 3.206568344428952,
      "grad_norm": 0.15638189017772675,
      "learning_rate": 3.6104979474575493e-06,
      "loss": 0.0138,
      "step": 1959380
    },
    {
      "epoch": 3.2066010748676055,
      "grad_norm": 0.2034558206796646,
      "learning_rate": 3.610432055244032e-06,
      "loss": 0.0175,
      "step": 1959400
    },
    {
      "epoch": 3.2066338053062586,
      "grad_norm": 0.24291589856147766,
      "learning_rate": 3.6103661630305147e-06,
      "loss": 0.0098,
      "step": 1959420
    },
    {
      "epoch": 3.2066665357449122,
      "grad_norm": 0.2603462338447571,
      "learning_rate": 3.6103002708169975e-06,
      "loss": 0.0131,
      "step": 1959440
    },
    {
      "epoch": 3.2066992661835654,
      "grad_norm": 0.19290447235107422,
      "learning_rate": 3.6102343786034806e-06,
      "loss": 0.0099,
      "step": 1959460
    },
    {
      "epoch": 3.2067319966222185,
      "grad_norm": 0.20477977395057678,
      "learning_rate": 3.610168486389964e-06,
      "loss": 0.017,
      "step": 1959480
    },
    {
      "epoch": 3.206764727060872,
      "grad_norm": 0.29982104897499084,
      "learning_rate": 3.6101025941764465e-06,
      "loss": 0.0097,
      "step": 1959500
    },
    {
      "epoch": 3.2067974574995253,
      "grad_norm": 0.12629877030849457,
      "learning_rate": 3.6100367019629297e-06,
      "loss": 0.009,
      "step": 1959520
    },
    {
      "epoch": 3.206830187938179,
      "grad_norm": 0.26147300004959106,
      "learning_rate": 3.6099708097494124e-06,
      "loss": 0.0114,
      "step": 1959540
    },
    {
      "epoch": 3.206862918376832,
      "grad_norm": 0.06272567063570023,
      "learning_rate": 3.609904917535895e-06,
      "loss": 0.0099,
      "step": 1959560
    },
    {
      "epoch": 3.2068956488154856,
      "grad_norm": 0.34351497888565063,
      "learning_rate": 3.609839025322378e-06,
      "loss": 0.0126,
      "step": 1959580
    },
    {
      "epoch": 3.2069283792541388,
      "grad_norm": 0.37185782194137573,
      "learning_rate": 3.609773133108861e-06,
      "loss": 0.0115,
      "step": 1959600
    },
    {
      "epoch": 3.206961109692792,
      "grad_norm": 0.12808731198310852,
      "learning_rate": 3.609707240895344e-06,
      "loss": 0.0161,
      "step": 1959620
    },
    {
      "epoch": 3.2069938401314455,
      "grad_norm": 0.2966964840888977,
      "learning_rate": 3.6096413486818266e-06,
      "loss": 0.0114,
      "step": 1959640
    },
    {
      "epoch": 3.2070265705700987,
      "grad_norm": 0.3424018621444702,
      "learning_rate": 3.6095754564683093e-06,
      "loss": 0.0145,
      "step": 1959660
    },
    {
      "epoch": 3.2070593010087523,
      "grad_norm": 0.14355245232582092,
      "learning_rate": 3.6095095642547925e-06,
      "loss": 0.0079,
      "step": 1959680
    },
    {
      "epoch": 3.2070920314474054,
      "grad_norm": 0.40185222029685974,
      "learning_rate": 3.6094436720412752e-06,
      "loss": 0.0104,
      "step": 1959700
    },
    {
      "epoch": 3.207124761886059,
      "grad_norm": 0.3009369671344757,
      "learning_rate": 3.609377779827758e-06,
      "loss": 0.0088,
      "step": 1959720
    },
    {
      "epoch": 3.207157492324712,
      "grad_norm": 0.3227330148220062,
      "learning_rate": 3.6093118876142407e-06,
      "loss": 0.012,
      "step": 1959740
    },
    {
      "epoch": 3.2071902227633653,
      "grad_norm": 0.1534767895936966,
      "learning_rate": 3.6092459954007234e-06,
      "loss": 0.0091,
      "step": 1959760
    },
    {
      "epoch": 3.207222953202019,
      "grad_norm": 0.2571817636489868,
      "learning_rate": 3.6091801031872066e-06,
      "loss": 0.0102,
      "step": 1959780
    },
    {
      "epoch": 3.207255683640672,
      "grad_norm": 0.238750621676445,
      "learning_rate": 3.6091142109736893e-06,
      "loss": 0.0115,
      "step": 1959800
    },
    {
      "epoch": 3.2072884140793256,
      "grad_norm": 0.261295348405838,
      "learning_rate": 3.609048318760172e-06,
      "loss": 0.013,
      "step": 1959820
    },
    {
      "epoch": 3.207321144517979,
      "grad_norm": 0.1860934942960739,
      "learning_rate": 3.6089824265466557e-06,
      "loss": 0.0103,
      "step": 1959840
    },
    {
      "epoch": 3.207353874956632,
      "grad_norm": 0.23393890261650085,
      "learning_rate": 3.6089165343331384e-06,
      "loss": 0.0122,
      "step": 1959860
    },
    {
      "epoch": 3.2073866053952855,
      "grad_norm": 0.31531786918640137,
      "learning_rate": 3.608850642119621e-06,
      "loss": 0.0112,
      "step": 1959880
    },
    {
      "epoch": 3.2074193358339387,
      "grad_norm": 0.13817395269870758,
      "learning_rate": 3.608784749906104e-06,
      "loss": 0.0118,
      "step": 1959900
    },
    {
      "epoch": 3.2074520662725923,
      "grad_norm": 0.2195625752210617,
      "learning_rate": 3.608718857692587e-06,
      "loss": 0.0105,
      "step": 1959920
    },
    {
      "epoch": 3.2074847967112454,
      "grad_norm": 0.38226357102394104,
      "learning_rate": 3.6086529654790698e-06,
      "loss": 0.0114,
      "step": 1959940
    },
    {
      "epoch": 3.207517527149899,
      "grad_norm": 0.3612484633922577,
      "learning_rate": 3.6085870732655525e-06,
      "loss": 0.0101,
      "step": 1959960
    },
    {
      "epoch": 3.207550257588552,
      "grad_norm": 0.23322820663452148,
      "learning_rate": 3.6085211810520353e-06,
      "loss": 0.0186,
      "step": 1959980
    },
    {
      "epoch": 3.2075829880272053,
      "grad_norm": 0.7433992028236389,
      "learning_rate": 3.6084552888385184e-06,
      "loss": 0.0116,
      "step": 1960000
    },
    {
      "epoch": 3.207615718465859,
      "grad_norm": 0.0891294777393341,
      "learning_rate": 3.608389396625001e-06,
      "loss": 0.0131,
      "step": 1960020
    },
    {
      "epoch": 3.207648448904512,
      "grad_norm": 0.1514296680688858,
      "learning_rate": 3.608323504411484e-06,
      "loss": 0.0099,
      "step": 1960040
    },
    {
      "epoch": 3.2076811793431657,
      "grad_norm": 0.12499634921550751,
      "learning_rate": 3.6082576121979666e-06,
      "loss": 0.0124,
      "step": 1960060
    },
    {
      "epoch": 3.207713909781819,
      "grad_norm": 1.7236472368240356,
      "learning_rate": 3.60819171998445e-06,
      "loss": 0.0095,
      "step": 1960080
    },
    {
      "epoch": 3.2077466402204724,
      "grad_norm": 0.2692566514015198,
      "learning_rate": 3.6081258277709325e-06,
      "loss": 0.0104,
      "step": 1960100
    },
    {
      "epoch": 3.2077793706591256,
      "grad_norm": 0.2558114528656006,
      "learning_rate": 3.6080599355574153e-06,
      "loss": 0.0084,
      "step": 1960120
    },
    {
      "epoch": 3.2078121010977787,
      "grad_norm": 0.15899622440338135,
      "learning_rate": 3.607994043343898e-06,
      "loss": 0.0109,
      "step": 1960140
    },
    {
      "epoch": 3.2078448315364323,
      "grad_norm": 0.3014882504940033,
      "learning_rate": 3.6079281511303808e-06,
      "loss": 0.0084,
      "step": 1960160
    },
    {
      "epoch": 3.2078775619750854,
      "grad_norm": 0.06703069061040878,
      "learning_rate": 3.607862258916864e-06,
      "loss": 0.0142,
      "step": 1960180
    },
    {
      "epoch": 3.207910292413739,
      "grad_norm": 0.6982690691947937,
      "learning_rate": 3.607796366703347e-06,
      "loss": 0.0095,
      "step": 1960200
    },
    {
      "epoch": 3.207943022852392,
      "grad_norm": 0.43078139424324036,
      "learning_rate": 3.6077304744898303e-06,
      "loss": 0.0104,
      "step": 1960220
    },
    {
      "epoch": 3.207975753291046,
      "grad_norm": 0.2594468891620636,
      "learning_rate": 3.607664582276313e-06,
      "loss": 0.0083,
      "step": 1960240
    },
    {
      "epoch": 3.208008483729699,
      "grad_norm": 0.15738432109355927,
      "learning_rate": 3.6075986900627957e-06,
      "loss": 0.0177,
      "step": 1960260
    },
    {
      "epoch": 3.208041214168352,
      "grad_norm": 0.15618376433849335,
      "learning_rate": 3.6075327978492785e-06,
      "loss": 0.013,
      "step": 1960280
    },
    {
      "epoch": 3.2080739446070057,
      "grad_norm": 0.1699676513671875,
      "learning_rate": 3.6074669056357612e-06,
      "loss": 0.0102,
      "step": 1960300
    },
    {
      "epoch": 3.208106675045659,
      "grad_norm": 0.3481869101524353,
      "learning_rate": 3.6074010134222444e-06,
      "loss": 0.0137,
      "step": 1960320
    },
    {
      "epoch": 3.2081394054843124,
      "grad_norm": 1.0963234901428223,
      "learning_rate": 3.607335121208727e-06,
      "loss": 0.0142,
      "step": 1960340
    },
    {
      "epoch": 3.2081721359229656,
      "grad_norm": 0.1377555876970291,
      "learning_rate": 3.60726922899521e-06,
      "loss": 0.0147,
      "step": 1960360
    },
    {
      "epoch": 3.208204866361619,
      "grad_norm": 0.4022088050842285,
      "learning_rate": 3.6072033367816926e-06,
      "loss": 0.0094,
      "step": 1960380
    },
    {
      "epoch": 3.2082375968002723,
      "grad_norm": 0.2780354619026184,
      "learning_rate": 3.6071374445681758e-06,
      "loss": 0.0089,
      "step": 1960400
    },
    {
      "epoch": 3.2082703272389255,
      "grad_norm": 0.5499863028526306,
      "learning_rate": 3.6070715523546585e-06,
      "loss": 0.0141,
      "step": 1960420
    },
    {
      "epoch": 3.208303057677579,
      "grad_norm": 0.15711139142513275,
      "learning_rate": 3.6070056601411412e-06,
      "loss": 0.0105,
      "step": 1960440
    },
    {
      "epoch": 3.208335788116232,
      "grad_norm": 0.17927609384059906,
      "learning_rate": 3.606939767927624e-06,
      "loss": 0.0157,
      "step": 1960460
    },
    {
      "epoch": 3.208368518554886,
      "grad_norm": 0.1390463411808014,
      "learning_rate": 3.606873875714107e-06,
      "loss": 0.0091,
      "step": 1960480
    },
    {
      "epoch": 3.208401248993539,
      "grad_norm": 0.19018933176994324,
      "learning_rate": 3.60680798350059e-06,
      "loss": 0.0126,
      "step": 1960500
    },
    {
      "epoch": 3.2084339794321926,
      "grad_norm": 0.29920801520347595,
      "learning_rate": 3.6067420912870726e-06,
      "loss": 0.016,
      "step": 1960520
    },
    {
      "epoch": 3.2084667098708457,
      "grad_norm": 0.2766268849372864,
      "learning_rate": 3.606676199073556e-06,
      "loss": 0.0059,
      "step": 1960540
    },
    {
      "epoch": 3.208499440309499,
      "grad_norm": 0.08366601169109344,
      "learning_rate": 3.606610306860039e-06,
      "loss": 0.0108,
      "step": 1960560
    },
    {
      "epoch": 3.2085321707481524,
      "grad_norm": 0.6591935753822327,
      "learning_rate": 3.6065444146465217e-06,
      "loss": 0.0108,
      "step": 1960580
    },
    {
      "epoch": 3.2085649011868056,
      "grad_norm": 0.12765786051750183,
      "learning_rate": 3.6064785224330044e-06,
      "loss": 0.0122,
      "step": 1960600
    },
    {
      "epoch": 3.208597631625459,
      "grad_norm": 0.4326799511909485,
      "learning_rate": 3.6064126302194876e-06,
      "loss": 0.0107,
      "step": 1960620
    },
    {
      "epoch": 3.2086303620641123,
      "grad_norm": 0.47815483808517456,
      "learning_rate": 3.6063467380059703e-06,
      "loss": 0.0109,
      "step": 1960640
    },
    {
      "epoch": 3.208663092502766,
      "grad_norm": 0.42766401171684265,
      "learning_rate": 3.606280845792453e-06,
      "loss": 0.0141,
      "step": 1960660
    },
    {
      "epoch": 3.208695822941419,
      "grad_norm": 0.17797933518886566,
      "learning_rate": 3.606214953578936e-06,
      "loss": 0.0089,
      "step": 1960680
    },
    {
      "epoch": 3.2087285533800722,
      "grad_norm": 1.7327691316604614,
      "learning_rate": 3.6061490613654186e-06,
      "loss": 0.0171,
      "step": 1960700
    },
    {
      "epoch": 3.208761283818726,
      "grad_norm": 0.5300979018211365,
      "learning_rate": 3.6060831691519017e-06,
      "loss": 0.0119,
      "step": 1960720
    },
    {
      "epoch": 3.208794014257379,
      "grad_norm": 0.6266838908195496,
      "learning_rate": 3.6060172769383845e-06,
      "loss": 0.0109,
      "step": 1960740
    },
    {
      "epoch": 3.2088267446960326,
      "grad_norm": 0.11602470278739929,
      "learning_rate": 3.605951384724867e-06,
      "loss": 0.0108,
      "step": 1960760
    },
    {
      "epoch": 3.2088594751346857,
      "grad_norm": 0.07940579205751419,
      "learning_rate": 3.60588549251135e-06,
      "loss": 0.0089,
      "step": 1960780
    },
    {
      "epoch": 3.2088922055733393,
      "grad_norm": 1.1611872911453247,
      "learning_rate": 3.605819600297833e-06,
      "loss": 0.0089,
      "step": 1960800
    },
    {
      "epoch": 3.2089249360119925,
      "grad_norm": 0.29943859577178955,
      "learning_rate": 3.605753708084316e-06,
      "loss": 0.0128,
      "step": 1960820
    },
    {
      "epoch": 3.2089576664506456,
      "grad_norm": 0.21968145668506622,
      "learning_rate": 3.6056878158707986e-06,
      "loss": 0.0117,
      "step": 1960840
    },
    {
      "epoch": 3.208990396889299,
      "grad_norm": 0.32357296347618103,
      "learning_rate": 3.6056219236572813e-06,
      "loss": 0.0113,
      "step": 1960860
    },
    {
      "epoch": 3.2090231273279524,
      "grad_norm": 0.12206222862005234,
      "learning_rate": 3.6055560314437645e-06,
      "loss": 0.0095,
      "step": 1960880
    },
    {
      "epoch": 3.209055857766606,
      "grad_norm": 0.19921141862869263,
      "learning_rate": 3.6054901392302476e-06,
      "loss": 0.014,
      "step": 1960900
    },
    {
      "epoch": 3.209088588205259,
      "grad_norm": 0.11925976723432541,
      "learning_rate": 3.6054242470167304e-06,
      "loss": 0.0147,
      "step": 1960920
    },
    {
      "epoch": 3.2091213186439127,
      "grad_norm": 0.2112724781036377,
      "learning_rate": 3.6053583548032135e-06,
      "loss": 0.0093,
      "step": 1960940
    },
    {
      "epoch": 3.209154049082566,
      "grad_norm": 0.15054979920387268,
      "learning_rate": 3.6052924625896963e-06,
      "loss": 0.0103,
      "step": 1960960
    },
    {
      "epoch": 3.209186779521219,
      "grad_norm": 0.13267166912555695,
      "learning_rate": 3.605226570376179e-06,
      "loss": 0.0099,
      "step": 1960980
    },
    {
      "epoch": 3.2092195099598726,
      "grad_norm": 0.4973859488964081,
      "learning_rate": 3.6051606781626618e-06,
      "loss": 0.0211,
      "step": 1961000
    },
    {
      "epoch": 3.2092522403985257,
      "grad_norm": 0.3967895805835724,
      "learning_rate": 3.605094785949145e-06,
      "loss": 0.0152,
      "step": 1961020
    },
    {
      "epoch": 3.2092849708371793,
      "grad_norm": 0.30383941531181335,
      "learning_rate": 3.6050288937356277e-06,
      "loss": 0.0112,
      "step": 1961040
    },
    {
      "epoch": 3.2093177012758325,
      "grad_norm": 0.3080465793609619,
      "learning_rate": 3.6049630015221104e-06,
      "loss": 0.0134,
      "step": 1961060
    },
    {
      "epoch": 3.2093504317144856,
      "grad_norm": 0.07280035316944122,
      "learning_rate": 3.604897109308593e-06,
      "loss": 0.0109,
      "step": 1961080
    },
    {
      "epoch": 3.2093831621531392,
      "grad_norm": 0.43256208300590515,
      "learning_rate": 3.6048312170950763e-06,
      "loss": 0.0147,
      "step": 1961100
    },
    {
      "epoch": 3.2094158925917924,
      "grad_norm": 0.1475810557603836,
      "learning_rate": 3.604765324881559e-06,
      "loss": 0.0096,
      "step": 1961120
    },
    {
      "epoch": 3.209448623030446,
      "grad_norm": 0.7548398375511169,
      "learning_rate": 3.604699432668042e-06,
      "loss": 0.0165,
      "step": 1961140
    },
    {
      "epoch": 3.209481353469099,
      "grad_norm": 0.4014521539211273,
      "learning_rate": 3.6046335404545245e-06,
      "loss": 0.0104,
      "step": 1961160
    },
    {
      "epoch": 3.2095140839077527,
      "grad_norm": 0.1483442783355713,
      "learning_rate": 3.6045676482410073e-06,
      "loss": 0.0106,
      "step": 1961180
    },
    {
      "epoch": 3.209546814346406,
      "grad_norm": 0.581091582775116,
      "learning_rate": 3.6045017560274904e-06,
      "loss": 0.0123,
      "step": 1961200
    },
    {
      "epoch": 3.209579544785059,
      "grad_norm": 0.5542946457862854,
      "learning_rate": 3.604435863813973e-06,
      "loss": 0.0137,
      "step": 1961220
    },
    {
      "epoch": 3.2096122752237126,
      "grad_norm": 0.17354929447174072,
      "learning_rate": 3.6043699716004563e-06,
      "loss": 0.0139,
      "step": 1961240
    },
    {
      "epoch": 3.2096450056623658,
      "grad_norm": 0.41250523924827576,
      "learning_rate": 3.6043040793869395e-06,
      "loss": 0.0107,
      "step": 1961260
    },
    {
      "epoch": 3.2096777361010194,
      "grad_norm": 0.2612396776676178,
      "learning_rate": 3.6042381871734222e-06,
      "loss": 0.013,
      "step": 1961280
    },
    {
      "epoch": 3.2097104665396725,
      "grad_norm": 0.1816389262676239,
      "learning_rate": 3.604172294959905e-06,
      "loss": 0.0114,
      "step": 1961300
    },
    {
      "epoch": 3.209743196978326,
      "grad_norm": 0.3039001524448395,
      "learning_rate": 3.6041064027463877e-06,
      "loss": 0.0171,
      "step": 1961320
    },
    {
      "epoch": 3.2097759274169793,
      "grad_norm": 0.3246957063674927,
      "learning_rate": 3.604040510532871e-06,
      "loss": 0.0105,
      "step": 1961340
    },
    {
      "epoch": 3.2098086578556324,
      "grad_norm": 0.24983566999435425,
      "learning_rate": 3.6039746183193536e-06,
      "loss": 0.0111,
      "step": 1961360
    },
    {
      "epoch": 3.209841388294286,
      "grad_norm": 0.8095757365226746,
      "learning_rate": 3.6039087261058364e-06,
      "loss": 0.0136,
      "step": 1961380
    },
    {
      "epoch": 3.209874118732939,
      "grad_norm": 0.07249298691749573,
      "learning_rate": 3.603842833892319e-06,
      "loss": 0.0073,
      "step": 1961400
    },
    {
      "epoch": 3.2099068491715927,
      "grad_norm": 0.3674525320529938,
      "learning_rate": 3.6037769416788023e-06,
      "loss": 0.008,
      "step": 1961420
    },
    {
      "epoch": 3.209939579610246,
      "grad_norm": 0.2752463221549988,
      "learning_rate": 3.603711049465285e-06,
      "loss": 0.0159,
      "step": 1961440
    },
    {
      "epoch": 3.209972310048899,
      "grad_norm": 0.6904041767120361,
      "learning_rate": 3.6036451572517677e-06,
      "loss": 0.0117,
      "step": 1961460
    },
    {
      "epoch": 3.2100050404875526,
      "grad_norm": 0.25913700461387634,
      "learning_rate": 3.6035792650382505e-06,
      "loss": 0.0143,
      "step": 1961480
    },
    {
      "epoch": 3.210037770926206,
      "grad_norm": 0.14378871023654938,
      "learning_rate": 3.6035133728247336e-06,
      "loss": 0.0102,
      "step": 1961500
    },
    {
      "epoch": 3.2100705013648594,
      "grad_norm": 0.25995707511901855,
      "learning_rate": 3.6034474806112164e-06,
      "loss": 0.0112,
      "step": 1961520
    },
    {
      "epoch": 3.2101032318035125,
      "grad_norm": 0.19630974531173706,
      "learning_rate": 3.603381588397699e-06,
      "loss": 0.0182,
      "step": 1961540
    },
    {
      "epoch": 3.210135962242166,
      "grad_norm": 0.4200423061847687,
      "learning_rate": 3.603315696184182e-06,
      "loss": 0.0097,
      "step": 1961560
    },
    {
      "epoch": 3.2101686926808193,
      "grad_norm": 0.33740127086639404,
      "learning_rate": 3.6032498039706646e-06,
      "loss": 0.014,
      "step": 1961580
    },
    {
      "epoch": 3.2102014231194724,
      "grad_norm": 0.2045654058456421,
      "learning_rate": 3.603183911757148e-06,
      "loss": 0.0165,
      "step": 1961600
    },
    {
      "epoch": 3.210234153558126,
      "grad_norm": 0.6712098717689514,
      "learning_rate": 3.603118019543631e-06,
      "loss": 0.0168,
      "step": 1961620
    },
    {
      "epoch": 3.210266883996779,
      "grad_norm": 0.7760983109474182,
      "learning_rate": 3.603052127330114e-06,
      "loss": 0.011,
      "step": 1961640
    },
    {
      "epoch": 3.2102996144354328,
      "grad_norm": 0.5956584215164185,
      "learning_rate": 3.602986235116597e-06,
      "loss": 0.0102,
      "step": 1961660
    },
    {
      "epoch": 3.210332344874086,
      "grad_norm": 0.6267169713973999,
      "learning_rate": 3.6029203429030796e-06,
      "loss": 0.0136,
      "step": 1961680
    },
    {
      "epoch": 3.2103650753127395,
      "grad_norm": 0.15470461547374725,
      "learning_rate": 3.6028544506895623e-06,
      "loss": 0.0095,
      "step": 1961700
    },
    {
      "epoch": 3.2103978057513927,
      "grad_norm": 0.13759839534759521,
      "learning_rate": 3.602788558476045e-06,
      "loss": 0.0168,
      "step": 1961720
    },
    {
      "epoch": 3.210430536190046,
      "grad_norm": 0.18988598883152008,
      "learning_rate": 3.6027226662625282e-06,
      "loss": 0.0122,
      "step": 1961740
    },
    {
      "epoch": 3.2104632666286994,
      "grad_norm": 0.2351251244544983,
      "learning_rate": 3.602656774049011e-06,
      "loss": 0.0114,
      "step": 1961760
    },
    {
      "epoch": 3.2104959970673526,
      "grad_norm": 0.4719454050064087,
      "learning_rate": 3.6025908818354937e-06,
      "loss": 0.0103,
      "step": 1961780
    },
    {
      "epoch": 3.210528727506006,
      "grad_norm": 0.22097964584827423,
      "learning_rate": 3.6025249896219764e-06,
      "loss": 0.0108,
      "step": 1961800
    },
    {
      "epoch": 3.2105614579446593,
      "grad_norm": 0.3779526352882385,
      "learning_rate": 3.6024590974084596e-06,
      "loss": 0.0103,
      "step": 1961820
    },
    {
      "epoch": 3.210594188383313,
      "grad_norm": 0.2415671944618225,
      "learning_rate": 3.6023932051949423e-06,
      "loss": 0.0119,
      "step": 1961840
    },
    {
      "epoch": 3.210626918821966,
      "grad_norm": 0.291157990694046,
      "learning_rate": 3.602327312981425e-06,
      "loss": 0.0139,
      "step": 1961860
    },
    {
      "epoch": 3.210659649260619,
      "grad_norm": 0.8198531270027161,
      "learning_rate": 3.602261420767908e-06,
      "loss": 0.0102,
      "step": 1961880
    },
    {
      "epoch": 3.210692379699273,
      "grad_norm": 0.42549318075180054,
      "learning_rate": 3.602195528554391e-06,
      "loss": 0.0095,
      "step": 1961900
    },
    {
      "epoch": 3.210725110137926,
      "grad_norm": 0.2229996770620346,
      "learning_rate": 3.6021296363408737e-06,
      "loss": 0.0083,
      "step": 1961920
    },
    {
      "epoch": 3.2107578405765795,
      "grad_norm": 0.45029154419898987,
      "learning_rate": 3.6020637441273565e-06,
      "loss": 0.0156,
      "step": 1961940
    },
    {
      "epoch": 3.2107905710152327,
      "grad_norm": 0.22273631393909454,
      "learning_rate": 3.60199785191384e-06,
      "loss": 0.0098,
      "step": 1961960
    },
    {
      "epoch": 3.2108233014538863,
      "grad_norm": 0.16045795381069183,
      "learning_rate": 3.601931959700323e-06,
      "loss": 0.0139,
      "step": 1961980
    },
    {
      "epoch": 3.2108560318925394,
      "grad_norm": 0.1387275755405426,
      "learning_rate": 3.6018660674868055e-06,
      "loss": 0.0127,
      "step": 1962000
    },
    {
      "epoch": 3.2108887623311926,
      "grad_norm": 0.6034635901451111,
      "learning_rate": 3.6018001752732883e-06,
      "loss": 0.011,
      "step": 1962020
    },
    {
      "epoch": 3.210921492769846,
      "grad_norm": 0.4798547327518463,
      "learning_rate": 3.6017342830597714e-06,
      "loss": 0.0094,
      "step": 1962040
    },
    {
      "epoch": 3.2109542232084993,
      "grad_norm": 0.22517645359039307,
      "learning_rate": 3.601668390846254e-06,
      "loss": 0.0131,
      "step": 1962060
    },
    {
      "epoch": 3.210986953647153,
      "grad_norm": 0.39445045590400696,
      "learning_rate": 3.601602498632737e-06,
      "loss": 0.0078,
      "step": 1962080
    },
    {
      "epoch": 3.211019684085806,
      "grad_norm": 0.19605115056037903,
      "learning_rate": 3.6015366064192197e-06,
      "loss": 0.0081,
      "step": 1962100
    },
    {
      "epoch": 3.2110524145244597,
      "grad_norm": 0.6134175658226013,
      "learning_rate": 3.6014707142057024e-06,
      "loss": 0.015,
      "step": 1962120
    },
    {
      "epoch": 3.211085144963113,
      "grad_norm": 0.13139376044273376,
      "learning_rate": 3.6014048219921856e-06,
      "loss": 0.0088,
      "step": 1962140
    },
    {
      "epoch": 3.211117875401766,
      "grad_norm": 0.23708534240722656,
      "learning_rate": 3.6013389297786683e-06,
      "loss": 0.0123,
      "step": 1962160
    },
    {
      "epoch": 3.2111506058404196,
      "grad_norm": 0.5648180842399597,
      "learning_rate": 3.601273037565151e-06,
      "loss": 0.0116,
      "step": 1962180
    },
    {
      "epoch": 3.2111833362790727,
      "grad_norm": 0.35177081823349,
      "learning_rate": 3.6012071453516338e-06,
      "loss": 0.016,
      "step": 1962200
    },
    {
      "epoch": 3.2112160667177263,
      "grad_norm": 0.16650009155273438,
      "learning_rate": 3.601141253138117e-06,
      "loss": 0.0102,
      "step": 1962220
    },
    {
      "epoch": 3.2112487971563795,
      "grad_norm": 0.07642247527837753,
      "learning_rate": 3.6010753609245997e-06,
      "loss": 0.0097,
      "step": 1962240
    },
    {
      "epoch": 3.211281527595033,
      "grad_norm": 0.35766786336898804,
      "learning_rate": 3.6010094687110824e-06,
      "loss": 0.0109,
      "step": 1962260
    },
    {
      "epoch": 3.211314258033686,
      "grad_norm": 0.09285938739776611,
      "learning_rate": 3.600943576497565e-06,
      "loss": 0.0095,
      "step": 1962280
    },
    {
      "epoch": 3.2113469884723393,
      "grad_norm": 0.37966814637184143,
      "learning_rate": 3.6008776842840487e-06,
      "loss": 0.0087,
      "step": 1962300
    },
    {
      "epoch": 3.211379718910993,
      "grad_norm": 0.14691627025604248,
      "learning_rate": 3.6008117920705315e-06,
      "loss": 0.0085,
      "step": 1962320
    },
    {
      "epoch": 3.211412449349646,
      "grad_norm": 0.17791573703289032,
      "learning_rate": 3.6007458998570142e-06,
      "loss": 0.0089,
      "step": 1962340
    },
    {
      "epoch": 3.2114451797882997,
      "grad_norm": 0.5047187805175781,
      "learning_rate": 3.6006800076434974e-06,
      "loss": 0.0119,
      "step": 1962360
    },
    {
      "epoch": 3.211477910226953,
      "grad_norm": 0.5345392227172852,
      "learning_rate": 3.60061411542998e-06,
      "loss": 0.0098,
      "step": 1962380
    },
    {
      "epoch": 3.2115106406656064,
      "grad_norm": 1.490465760231018,
      "learning_rate": 3.600548223216463e-06,
      "loss": 0.0075,
      "step": 1962400
    },
    {
      "epoch": 3.2115433711042596,
      "grad_norm": 0.35617104172706604,
      "learning_rate": 3.6004823310029456e-06,
      "loss": 0.0103,
      "step": 1962420
    },
    {
      "epoch": 3.2115761015429127,
      "grad_norm": 0.22115322947502136,
      "learning_rate": 3.6004164387894288e-06,
      "loss": 0.011,
      "step": 1962440
    },
    {
      "epoch": 3.2116088319815663,
      "grad_norm": 0.5077932476997375,
      "learning_rate": 3.6003505465759115e-06,
      "loss": 0.0179,
      "step": 1962460
    },
    {
      "epoch": 3.2116415624202195,
      "grad_norm": 0.18422289192676544,
      "learning_rate": 3.6002846543623942e-06,
      "loss": 0.0111,
      "step": 1962480
    },
    {
      "epoch": 3.211674292858873,
      "grad_norm": 0.3366224765777588,
      "learning_rate": 3.600218762148877e-06,
      "loss": 0.013,
      "step": 1962500
    },
    {
      "epoch": 3.211707023297526,
      "grad_norm": 0.18291176855564117,
      "learning_rate": 3.60015286993536e-06,
      "loss": 0.0139,
      "step": 1962520
    },
    {
      "epoch": 3.2117397537361794,
      "grad_norm": 0.5261317491531372,
      "learning_rate": 3.600086977721843e-06,
      "loss": 0.0099,
      "step": 1962540
    },
    {
      "epoch": 3.211772484174833,
      "grad_norm": 0.14275887608528137,
      "learning_rate": 3.6000210855083256e-06,
      "loss": 0.0139,
      "step": 1962560
    },
    {
      "epoch": 3.211805214613486,
      "grad_norm": 0.22728748619556427,
      "learning_rate": 3.5999551932948084e-06,
      "loss": 0.0062,
      "step": 1962580
    },
    {
      "epoch": 3.2118379450521397,
      "grad_norm": 0.39292117953300476,
      "learning_rate": 3.599889301081291e-06,
      "loss": 0.01,
      "step": 1962600
    },
    {
      "epoch": 3.211870675490793,
      "grad_norm": 0.6169218420982361,
      "learning_rate": 3.5998234088677743e-06,
      "loss": 0.0129,
      "step": 1962620
    },
    {
      "epoch": 3.2119034059294465,
      "grad_norm": 0.7252047657966614,
      "learning_rate": 3.599757516654257e-06,
      "loss": 0.0122,
      "step": 1962640
    },
    {
      "epoch": 3.2119361363680996,
      "grad_norm": 0.285656601190567,
      "learning_rate": 3.59969162444074e-06,
      "loss": 0.0115,
      "step": 1962660
    },
    {
      "epoch": 3.2119688668067528,
      "grad_norm": 0.2845114767551422,
      "learning_rate": 3.5996257322272233e-06,
      "loss": 0.0122,
      "step": 1962680
    },
    {
      "epoch": 3.2120015972454063,
      "grad_norm": 0.15208493173122406,
      "learning_rate": 3.599559840013706e-06,
      "loss": 0.0133,
      "step": 1962700
    },
    {
      "epoch": 3.2120343276840595,
      "grad_norm": 0.5969381332397461,
      "learning_rate": 3.599493947800189e-06,
      "loss": 0.0146,
      "step": 1962720
    },
    {
      "epoch": 3.212067058122713,
      "grad_norm": 0.18373502790927887,
      "learning_rate": 3.5994280555866716e-06,
      "loss": 0.0123,
      "step": 1962740
    },
    {
      "epoch": 3.2120997885613662,
      "grad_norm": 0.28234928846359253,
      "learning_rate": 3.5993621633731547e-06,
      "loss": 0.0088,
      "step": 1962760
    },
    {
      "epoch": 3.21213251900002,
      "grad_norm": 0.09091662615537643,
      "learning_rate": 3.5992962711596375e-06,
      "loss": 0.0078,
      "step": 1962780
    },
    {
      "epoch": 3.212165249438673,
      "grad_norm": 0.4681493937969208,
      "learning_rate": 3.59923037894612e-06,
      "loss": 0.0127,
      "step": 1962800
    },
    {
      "epoch": 3.212197979877326,
      "grad_norm": 0.16761720180511475,
      "learning_rate": 3.599164486732603e-06,
      "loss": 0.0138,
      "step": 1962820
    },
    {
      "epoch": 3.2122307103159797,
      "grad_norm": 0.14855101704597473,
      "learning_rate": 3.599098594519086e-06,
      "loss": 0.013,
      "step": 1962840
    },
    {
      "epoch": 3.212263440754633,
      "grad_norm": 0.5292813777923584,
      "learning_rate": 3.599032702305569e-06,
      "loss": 0.0123,
      "step": 1962860
    },
    {
      "epoch": 3.2122961711932865,
      "grad_norm": 0.4024930000305176,
      "learning_rate": 3.5989668100920516e-06,
      "loss": 0.014,
      "step": 1962880
    },
    {
      "epoch": 3.2123289016319396,
      "grad_norm": 0.35435014963150024,
      "learning_rate": 3.5989009178785343e-06,
      "loss": 0.0118,
      "step": 1962900
    },
    {
      "epoch": 3.2123616320705928,
      "grad_norm": 0.1720295548439026,
      "learning_rate": 3.5988350256650175e-06,
      "loss": 0.0139,
      "step": 1962920
    },
    {
      "epoch": 3.2123943625092464,
      "grad_norm": 0.8425527215003967,
      "learning_rate": 3.5987691334515002e-06,
      "loss": 0.0128,
      "step": 1962940
    },
    {
      "epoch": 3.2124270929478995,
      "grad_norm": 0.645786464214325,
      "learning_rate": 3.598703241237983e-06,
      "loss": 0.0154,
      "step": 1962960
    },
    {
      "epoch": 3.212459823386553,
      "grad_norm": 0.11662019789218903,
      "learning_rate": 3.5986373490244657e-06,
      "loss": 0.0095,
      "step": 1962980
    },
    {
      "epoch": 3.2124925538252063,
      "grad_norm": 0.20756858587265015,
      "learning_rate": 3.5985714568109493e-06,
      "loss": 0.0117,
      "step": 1963000
    },
    {
      "epoch": 3.21252528426386,
      "grad_norm": 0.21273721754550934,
      "learning_rate": 3.598505564597432e-06,
      "loss": 0.0163,
      "step": 1963020
    },
    {
      "epoch": 3.212558014702513,
      "grad_norm": 0.16438786685466766,
      "learning_rate": 3.5984396723839148e-06,
      "loss": 0.01,
      "step": 1963040
    },
    {
      "epoch": 3.212590745141166,
      "grad_norm": 0.42654603719711304,
      "learning_rate": 3.598373780170398e-06,
      "loss": 0.0173,
      "step": 1963060
    },
    {
      "epoch": 3.2126234755798198,
      "grad_norm": 0.11120159178972244,
      "learning_rate": 3.5983078879568807e-06,
      "loss": 0.0096,
      "step": 1963080
    },
    {
      "epoch": 3.212656206018473,
      "grad_norm": 0.1360151618719101,
      "learning_rate": 3.5982419957433634e-06,
      "loss": 0.01,
      "step": 1963100
    },
    {
      "epoch": 3.2126889364571265,
      "grad_norm": 0.6425173878669739,
      "learning_rate": 3.598176103529846e-06,
      "loss": 0.0071,
      "step": 1963120
    },
    {
      "epoch": 3.2127216668957796,
      "grad_norm": 0.22786737978458405,
      "learning_rate": 3.598110211316329e-06,
      "loss": 0.015,
      "step": 1963140
    },
    {
      "epoch": 3.2127543973344332,
      "grad_norm": 0.4985142946243286,
      "learning_rate": 3.598044319102812e-06,
      "loss": 0.0095,
      "step": 1963160
    },
    {
      "epoch": 3.2127871277730864,
      "grad_norm": 0.28292131423950195,
      "learning_rate": 3.597978426889295e-06,
      "loss": 0.0181,
      "step": 1963180
    },
    {
      "epoch": 3.2128198582117395,
      "grad_norm": 0.2414691299200058,
      "learning_rate": 3.5979125346757775e-06,
      "loss": 0.008,
      "step": 1963200
    },
    {
      "epoch": 3.212852588650393,
      "grad_norm": 0.19908034801483154,
      "learning_rate": 3.5978466424622603e-06,
      "loss": 0.0079,
      "step": 1963220
    },
    {
      "epoch": 3.2128853190890463,
      "grad_norm": 0.21477869153022766,
      "learning_rate": 3.5977807502487434e-06,
      "loss": 0.0108,
      "step": 1963240
    },
    {
      "epoch": 3.2129180495277,
      "grad_norm": 0.37108632922172546,
      "learning_rate": 3.597714858035226e-06,
      "loss": 0.0159,
      "step": 1963260
    },
    {
      "epoch": 3.212950779966353,
      "grad_norm": 0.1509833037853241,
      "learning_rate": 3.597648965821709e-06,
      "loss": 0.0091,
      "step": 1963280
    },
    {
      "epoch": 3.2129835104050066,
      "grad_norm": 0.10691843181848526,
      "learning_rate": 3.5975830736081917e-06,
      "loss": 0.0081,
      "step": 1963300
    },
    {
      "epoch": 3.2130162408436598,
      "grad_norm": 0.08415570855140686,
      "learning_rate": 3.597517181394675e-06,
      "loss": 0.0101,
      "step": 1963320
    },
    {
      "epoch": 3.213048971282313,
      "grad_norm": 0.3608005940914154,
      "learning_rate": 3.5974512891811576e-06,
      "loss": 0.0103,
      "step": 1963340
    },
    {
      "epoch": 3.2130817017209665,
      "grad_norm": 0.07293979823589325,
      "learning_rate": 3.5973853969676407e-06,
      "loss": 0.0115,
      "step": 1963360
    },
    {
      "epoch": 3.2131144321596197,
      "grad_norm": 0.1891866773366928,
      "learning_rate": 3.597319504754124e-06,
      "loss": 0.0156,
      "step": 1963380
    },
    {
      "epoch": 3.2131471625982733,
      "grad_norm": 0.1644531935453415,
      "learning_rate": 3.5972536125406066e-06,
      "loss": 0.0076,
      "step": 1963400
    },
    {
      "epoch": 3.2131798930369264,
      "grad_norm": 0.2617734372615814,
      "learning_rate": 3.5971877203270894e-06,
      "loss": 0.017,
      "step": 1963420
    },
    {
      "epoch": 3.21321262347558,
      "grad_norm": 0.08842535316944122,
      "learning_rate": 3.597121828113572e-06,
      "loss": 0.0111,
      "step": 1963440
    },
    {
      "epoch": 3.213245353914233,
      "grad_norm": 0.42870545387268066,
      "learning_rate": 3.5970559359000553e-06,
      "loss": 0.0126,
      "step": 1963460
    },
    {
      "epoch": 3.2132780843528863,
      "grad_norm": 0.3564978539943695,
      "learning_rate": 3.596990043686538e-06,
      "loss": 0.0128,
      "step": 1963480
    },
    {
      "epoch": 3.21331081479154,
      "grad_norm": 0.4484507143497467,
      "learning_rate": 3.5969241514730208e-06,
      "loss": 0.0164,
      "step": 1963500
    },
    {
      "epoch": 3.213343545230193,
      "grad_norm": 0.2215120494365692,
      "learning_rate": 3.5968582592595035e-06,
      "loss": 0.017,
      "step": 1963520
    },
    {
      "epoch": 3.2133762756688466,
      "grad_norm": 0.3009137213230133,
      "learning_rate": 3.5967923670459867e-06,
      "loss": 0.011,
      "step": 1963540
    },
    {
      "epoch": 3.2134090061075,
      "grad_norm": 0.6258372664451599,
      "learning_rate": 3.5967264748324694e-06,
      "loss": 0.015,
      "step": 1963560
    },
    {
      "epoch": 3.2134417365461534,
      "grad_norm": 0.35428187251091003,
      "learning_rate": 3.596660582618952e-06,
      "loss": 0.011,
      "step": 1963580
    },
    {
      "epoch": 3.2134744669848065,
      "grad_norm": 0.11355689913034439,
      "learning_rate": 3.596594690405435e-06,
      "loss": 0.0088,
      "step": 1963600
    },
    {
      "epoch": 3.2135071974234597,
      "grad_norm": 0.28290966153144836,
      "learning_rate": 3.5965287981919176e-06,
      "loss": 0.008,
      "step": 1963620
    },
    {
      "epoch": 3.2135399278621133,
      "grad_norm": 0.20725186169147491,
      "learning_rate": 3.5964629059784008e-06,
      "loss": 0.0099,
      "step": 1963640
    },
    {
      "epoch": 3.2135726583007664,
      "grad_norm": 0.21174483001232147,
      "learning_rate": 3.5963970137648835e-06,
      "loss": 0.0151,
      "step": 1963660
    },
    {
      "epoch": 3.21360538873942,
      "grad_norm": 0.20925338566303253,
      "learning_rate": 3.5963311215513663e-06,
      "loss": 0.0108,
      "step": 1963680
    },
    {
      "epoch": 3.213638119178073,
      "grad_norm": 0.08778346329927444,
      "learning_rate": 3.59626522933785e-06,
      "loss": 0.0138,
      "step": 1963700
    },
    {
      "epoch": 3.2136708496167268,
      "grad_norm": 0.24873846769332886,
      "learning_rate": 3.5961993371243326e-06,
      "loss": 0.0098,
      "step": 1963720
    },
    {
      "epoch": 3.21370358005538,
      "grad_norm": 0.5450690984725952,
      "learning_rate": 3.5961334449108153e-06,
      "loss": 0.0117,
      "step": 1963740
    },
    {
      "epoch": 3.213736310494033,
      "grad_norm": 0.11963319033384323,
      "learning_rate": 3.596067552697298e-06,
      "loss": 0.0106,
      "step": 1963760
    },
    {
      "epoch": 3.2137690409326867,
      "grad_norm": 0.32811543345451355,
      "learning_rate": 3.5960016604837812e-06,
      "loss": 0.0115,
      "step": 1963780
    },
    {
      "epoch": 3.21380177137134,
      "grad_norm": 0.4330233931541443,
      "learning_rate": 3.595935768270264e-06,
      "loss": 0.011,
      "step": 1963800
    },
    {
      "epoch": 3.2138345018099934,
      "grad_norm": 0.11009834706783295,
      "learning_rate": 3.5958698760567467e-06,
      "loss": 0.015,
      "step": 1963820
    },
    {
      "epoch": 3.2138672322486466,
      "grad_norm": 0.17272791266441345,
      "learning_rate": 3.5958039838432294e-06,
      "loss": 0.0101,
      "step": 1963840
    },
    {
      "epoch": 3.2138999626873,
      "grad_norm": 0.4400327503681183,
      "learning_rate": 3.5957380916297126e-06,
      "loss": 0.0128,
      "step": 1963860
    },
    {
      "epoch": 3.2139326931259533,
      "grad_norm": 0.20951269567012787,
      "learning_rate": 3.5956721994161953e-06,
      "loss": 0.0102,
      "step": 1963880
    },
    {
      "epoch": 3.2139654235646065,
      "grad_norm": 0.46282759308815,
      "learning_rate": 3.595606307202678e-06,
      "loss": 0.0089,
      "step": 1963900
    },
    {
      "epoch": 3.21399815400326,
      "grad_norm": 0.4076679050922394,
      "learning_rate": 3.595540414989161e-06,
      "loss": 0.0101,
      "step": 1963920
    },
    {
      "epoch": 3.214030884441913,
      "grad_norm": 0.20881301164627075,
      "learning_rate": 3.595474522775644e-06,
      "loss": 0.0089,
      "step": 1963940
    },
    {
      "epoch": 3.214063614880567,
      "grad_norm": 0.14086252450942993,
      "learning_rate": 3.5954086305621267e-06,
      "loss": 0.0094,
      "step": 1963960
    },
    {
      "epoch": 3.21409634531922,
      "grad_norm": 0.5328831076622009,
      "learning_rate": 3.5953427383486095e-06,
      "loss": 0.0133,
      "step": 1963980
    },
    {
      "epoch": 3.214129075757873,
      "grad_norm": 0.701653242111206,
      "learning_rate": 3.595276846135092e-06,
      "loss": 0.0149,
      "step": 1964000
    },
    {
      "epoch": 3.2141618061965267,
      "grad_norm": 0.5455754995346069,
      "learning_rate": 3.595210953921575e-06,
      "loss": 0.0106,
      "step": 1964020
    },
    {
      "epoch": 3.21419453663518,
      "grad_norm": 0.40259289741516113,
      "learning_rate": 3.595145061708058e-06,
      "loss": 0.0123,
      "step": 1964040
    },
    {
      "epoch": 3.2142272670738334,
      "grad_norm": 0.05853009223937988,
      "learning_rate": 3.5950791694945413e-06,
      "loss": 0.0093,
      "step": 1964060
    },
    {
      "epoch": 3.2142599975124866,
      "grad_norm": 0.2431071698665619,
      "learning_rate": 3.595013277281024e-06,
      "loss": 0.0114,
      "step": 1964080
    },
    {
      "epoch": 3.21429272795114,
      "grad_norm": 0.24605387449264526,
      "learning_rate": 3.594947385067507e-06,
      "loss": 0.0096,
      "step": 1964100
    },
    {
      "epoch": 3.2143254583897933,
      "grad_norm": 0.24238577485084534,
      "learning_rate": 3.59488149285399e-06,
      "loss": 0.0105,
      "step": 1964120
    },
    {
      "epoch": 3.2143581888284465,
      "grad_norm": 0.4633820056915283,
      "learning_rate": 3.5948156006404727e-06,
      "loss": 0.0155,
      "step": 1964140
    },
    {
      "epoch": 3.2143909192671,
      "grad_norm": 0.28866520524024963,
      "learning_rate": 3.5947497084269554e-06,
      "loss": 0.012,
      "step": 1964160
    },
    {
      "epoch": 3.2144236497057532,
      "grad_norm": 0.33993956446647644,
      "learning_rate": 3.5946838162134386e-06,
      "loss": 0.0134,
      "step": 1964180
    },
    {
      "epoch": 3.214456380144407,
      "grad_norm": 0.30119216442108154,
      "learning_rate": 3.5946179239999213e-06,
      "loss": 0.0097,
      "step": 1964200
    },
    {
      "epoch": 3.21448911058306,
      "grad_norm": 0.06315301358699799,
      "learning_rate": 3.594552031786404e-06,
      "loss": 0.0099,
      "step": 1964220
    },
    {
      "epoch": 3.2145218410217136,
      "grad_norm": 0.48412764072418213,
      "learning_rate": 3.5944861395728868e-06,
      "loss": 0.0075,
      "step": 1964240
    },
    {
      "epoch": 3.2145545714603667,
      "grad_norm": 0.14850860834121704,
      "learning_rate": 3.59442024735937e-06,
      "loss": 0.0142,
      "step": 1964260
    },
    {
      "epoch": 3.21458730189902,
      "grad_norm": 0.157077357172966,
      "learning_rate": 3.5943543551458527e-06,
      "loss": 0.01,
      "step": 1964280
    },
    {
      "epoch": 3.2146200323376735,
      "grad_norm": 0.6259610652923584,
      "learning_rate": 3.5942884629323354e-06,
      "loss": 0.0149,
      "step": 1964300
    },
    {
      "epoch": 3.2146527627763266,
      "grad_norm": 0.1873079538345337,
      "learning_rate": 3.594222570718818e-06,
      "loss": 0.0079,
      "step": 1964320
    },
    {
      "epoch": 3.21468549321498,
      "grad_norm": 0.43993058800697327,
      "learning_rate": 3.5941566785053013e-06,
      "loss": 0.0116,
      "step": 1964340
    },
    {
      "epoch": 3.2147182236536334,
      "grad_norm": 0.5374338626861572,
      "learning_rate": 3.594090786291784e-06,
      "loss": 0.0094,
      "step": 1964360
    },
    {
      "epoch": 3.214750954092287,
      "grad_norm": 0.2685166895389557,
      "learning_rate": 3.594024894078267e-06,
      "loss": 0.0117,
      "step": 1964380
    },
    {
      "epoch": 3.21478368453094,
      "grad_norm": 0.37327876687049866,
      "learning_rate": 3.5939590018647495e-06,
      "loss": 0.009,
      "step": 1964400
    },
    {
      "epoch": 3.2148164149695933,
      "grad_norm": 0.43396684527397156,
      "learning_rate": 3.593893109651233e-06,
      "loss": 0.0117,
      "step": 1964420
    },
    {
      "epoch": 3.214849145408247,
      "grad_norm": 0.43769538402557373,
      "learning_rate": 3.593827217437716e-06,
      "loss": 0.014,
      "step": 1964440
    },
    {
      "epoch": 3.2148818758469,
      "grad_norm": 0.08324205875396729,
      "learning_rate": 3.5937613252241986e-06,
      "loss": 0.0132,
      "step": 1964460
    },
    {
      "epoch": 3.2149146062855536,
      "grad_norm": 0.26755040884017944,
      "learning_rate": 3.5936954330106818e-06,
      "loss": 0.0121,
      "step": 1964480
    },
    {
      "epoch": 3.2149473367242067,
      "grad_norm": 0.3208881914615631,
      "learning_rate": 3.5936295407971645e-06,
      "loss": 0.0121,
      "step": 1964500
    },
    {
      "epoch": 3.21498006716286,
      "grad_norm": 0.5037731528282166,
      "learning_rate": 3.5935636485836473e-06,
      "loss": 0.0154,
      "step": 1964520
    },
    {
      "epoch": 3.2150127976015135,
      "grad_norm": 0.45947498083114624,
      "learning_rate": 3.59349775637013e-06,
      "loss": 0.0095,
      "step": 1964540
    },
    {
      "epoch": 3.2150455280401666,
      "grad_norm": 0.5303899049758911,
      "learning_rate": 3.5934318641566127e-06,
      "loss": 0.0116,
      "step": 1964560
    },
    {
      "epoch": 3.2150782584788202,
      "grad_norm": 0.4035116136074066,
      "learning_rate": 3.593365971943096e-06,
      "loss": 0.0103,
      "step": 1964580
    },
    {
      "epoch": 3.2151109889174734,
      "grad_norm": 0.1757904440164566,
      "learning_rate": 3.5933000797295786e-06,
      "loss": 0.0115,
      "step": 1964600
    },
    {
      "epoch": 3.215143719356127,
      "grad_norm": 0.33462661504745483,
      "learning_rate": 3.5932341875160614e-06,
      "loss": 0.0087,
      "step": 1964620
    },
    {
      "epoch": 3.21517644979478,
      "grad_norm": 0.379527747631073,
      "learning_rate": 3.593168295302544e-06,
      "loss": 0.0135,
      "step": 1964640
    },
    {
      "epoch": 3.2152091802334333,
      "grad_norm": 0.32183241844177246,
      "learning_rate": 3.5931024030890273e-06,
      "loss": 0.0123,
      "step": 1964660
    },
    {
      "epoch": 3.215241910672087,
      "grad_norm": 0.21636618673801422,
      "learning_rate": 3.59303651087551e-06,
      "loss": 0.0094,
      "step": 1964680
    },
    {
      "epoch": 3.21527464111074,
      "grad_norm": 0.5176313519477844,
      "learning_rate": 3.5929706186619928e-06,
      "loss": 0.0098,
      "step": 1964700
    },
    {
      "epoch": 3.2153073715493936,
      "grad_norm": 0.196618914604187,
      "learning_rate": 3.5929047264484755e-06,
      "loss": 0.0111,
      "step": 1964720
    },
    {
      "epoch": 3.2153401019880468,
      "grad_norm": 0.29372379183769226,
      "learning_rate": 3.5928388342349587e-06,
      "loss": 0.0135,
      "step": 1964740
    },
    {
      "epoch": 3.2153728324267004,
      "grad_norm": 0.24838627874851227,
      "learning_rate": 3.592772942021442e-06,
      "loss": 0.011,
      "step": 1964760
    },
    {
      "epoch": 3.2154055628653535,
      "grad_norm": 0.11783269047737122,
      "learning_rate": 3.5927070498079246e-06,
      "loss": 0.011,
      "step": 1964780
    },
    {
      "epoch": 3.2154382933040067,
      "grad_norm": 0.26127681136131287,
      "learning_rate": 3.5926411575944077e-06,
      "loss": 0.0105,
      "step": 1964800
    },
    {
      "epoch": 3.2154710237426603,
      "grad_norm": 0.14235150814056396,
      "learning_rate": 3.5925752653808905e-06,
      "loss": 0.0114,
      "step": 1964820
    },
    {
      "epoch": 3.2155037541813134,
      "grad_norm": 0.4743974804878235,
      "learning_rate": 3.592509373167373e-06,
      "loss": 0.0084,
      "step": 1964840
    },
    {
      "epoch": 3.215536484619967,
      "grad_norm": 0.8365094661712646,
      "learning_rate": 3.592443480953856e-06,
      "loss": 0.0091,
      "step": 1964860
    },
    {
      "epoch": 3.21556921505862,
      "grad_norm": 0.25140970945358276,
      "learning_rate": 3.592377588740339e-06,
      "loss": 0.014,
      "step": 1964880
    },
    {
      "epoch": 3.2156019454972737,
      "grad_norm": 0.12293846905231476,
      "learning_rate": 3.592311696526822e-06,
      "loss": 0.0093,
      "step": 1964900
    },
    {
      "epoch": 3.215634675935927,
      "grad_norm": 0.44339117407798767,
      "learning_rate": 3.5922458043133046e-06,
      "loss": 0.0085,
      "step": 1964920
    },
    {
      "epoch": 3.21566740637458,
      "grad_norm": 0.12247483432292938,
      "learning_rate": 3.5921799120997873e-06,
      "loss": 0.0132,
      "step": 1964940
    },
    {
      "epoch": 3.2157001368132336,
      "grad_norm": 0.5835811495780945,
      "learning_rate": 3.5921140198862705e-06,
      "loss": 0.0153,
      "step": 1964960
    },
    {
      "epoch": 3.215732867251887,
      "grad_norm": 0.3727706968784332,
      "learning_rate": 3.5920481276727532e-06,
      "loss": 0.0118,
      "step": 1964980
    },
    {
      "epoch": 3.2157655976905404,
      "grad_norm": 0.22855046391487122,
      "learning_rate": 3.591982235459236e-06,
      "loss": 0.0136,
      "step": 1965000
    },
    {
      "epoch": 3.2157983281291935,
      "grad_norm": 0.3251062035560608,
      "learning_rate": 3.5919163432457187e-06,
      "loss": 0.0087,
      "step": 1965020
    },
    {
      "epoch": 3.215831058567847,
      "grad_norm": 0.45216813683509827,
      "learning_rate": 3.5918504510322015e-06,
      "loss": 0.0121,
      "step": 1965040
    },
    {
      "epoch": 3.2158637890065003,
      "grad_norm": 0.5578868985176086,
      "learning_rate": 3.5917845588186846e-06,
      "loss": 0.0137,
      "step": 1965060
    },
    {
      "epoch": 3.2158965194451534,
      "grad_norm": 0.10903237015008926,
      "learning_rate": 3.5917186666051674e-06,
      "loss": 0.0103,
      "step": 1965080
    },
    {
      "epoch": 3.215929249883807,
      "grad_norm": 0.22435881197452545,
      "learning_rate": 3.59165277439165e-06,
      "loss": 0.0111,
      "step": 1965100
    },
    {
      "epoch": 3.21596198032246,
      "grad_norm": 0.15958961844444275,
      "learning_rate": 3.5915868821781337e-06,
      "loss": 0.011,
      "step": 1965120
    },
    {
      "epoch": 3.2159947107611138,
      "grad_norm": 0.3982372581958771,
      "learning_rate": 3.5915209899646164e-06,
      "loss": 0.009,
      "step": 1965140
    },
    {
      "epoch": 3.216027441199767,
      "grad_norm": 0.06063174083828926,
      "learning_rate": 3.591455097751099e-06,
      "loss": 0.0091,
      "step": 1965160
    },
    {
      "epoch": 3.2160601716384205,
      "grad_norm": 0.26379111409187317,
      "learning_rate": 3.591389205537582e-06,
      "loss": 0.0115,
      "step": 1965180
    },
    {
      "epoch": 3.2160929020770737,
      "grad_norm": 0.5835708975791931,
      "learning_rate": 3.591323313324065e-06,
      "loss": 0.012,
      "step": 1965200
    },
    {
      "epoch": 3.216125632515727,
      "grad_norm": 0.35193192958831787,
      "learning_rate": 3.591257421110548e-06,
      "loss": 0.0119,
      "step": 1965220
    },
    {
      "epoch": 3.2161583629543804,
      "grad_norm": 0.07753268629312515,
      "learning_rate": 3.5911915288970305e-06,
      "loss": 0.0112,
      "step": 1965240
    },
    {
      "epoch": 3.2161910933930336,
      "grad_norm": 0.13623690605163574,
      "learning_rate": 3.5911256366835133e-06,
      "loss": 0.0104,
      "step": 1965260
    },
    {
      "epoch": 3.216223823831687,
      "grad_norm": 0.5298526883125305,
      "learning_rate": 3.5910597444699964e-06,
      "loss": 0.0101,
      "step": 1965280
    },
    {
      "epoch": 3.2162565542703403,
      "grad_norm": 0.2143891304731369,
      "learning_rate": 3.590993852256479e-06,
      "loss": 0.009,
      "step": 1965300
    },
    {
      "epoch": 3.216289284708994,
      "grad_norm": 0.06652380526065826,
      "learning_rate": 3.590927960042962e-06,
      "loss": 0.0084,
      "step": 1965320
    },
    {
      "epoch": 3.216322015147647,
      "grad_norm": 0.21938562393188477,
      "learning_rate": 3.5908620678294447e-06,
      "loss": 0.0113,
      "step": 1965340
    },
    {
      "epoch": 3.2163547455863,
      "grad_norm": 0.10169167071580887,
      "learning_rate": 3.590796175615928e-06,
      "loss": 0.0117,
      "step": 1965360
    },
    {
      "epoch": 3.216387476024954,
      "grad_norm": 0.2001367062330246,
      "learning_rate": 3.5907302834024106e-06,
      "loss": 0.007,
      "step": 1965380
    },
    {
      "epoch": 3.216420206463607,
      "grad_norm": 0.3054842948913574,
      "learning_rate": 3.5906643911888933e-06,
      "loss": 0.0157,
      "step": 1965400
    },
    {
      "epoch": 3.2164529369022605,
      "grad_norm": 0.1811365932226181,
      "learning_rate": 3.590598498975376e-06,
      "loss": 0.0088,
      "step": 1965420
    },
    {
      "epoch": 3.2164856673409137,
      "grad_norm": 0.4268287420272827,
      "learning_rate": 3.590532606761859e-06,
      "loss": 0.0124,
      "step": 1965440
    },
    {
      "epoch": 3.2165183977795673,
      "grad_norm": 0.37321433424949646,
      "learning_rate": 3.5904667145483424e-06,
      "loss": 0.0129,
      "step": 1965460
    },
    {
      "epoch": 3.2165511282182204,
      "grad_norm": 0.21277599036693573,
      "learning_rate": 3.590400822334825e-06,
      "loss": 0.0156,
      "step": 1965480
    },
    {
      "epoch": 3.2165838586568736,
      "grad_norm": 0.7283514738082886,
      "learning_rate": 3.5903349301213083e-06,
      "loss": 0.0137,
      "step": 1965500
    },
    {
      "epoch": 3.216616589095527,
      "grad_norm": 0.2745122015476227,
      "learning_rate": 3.590269037907791e-06,
      "loss": 0.0109,
      "step": 1965520
    },
    {
      "epoch": 3.2166493195341803,
      "grad_norm": 0.8285326957702637,
      "learning_rate": 3.5902031456942738e-06,
      "loss": 0.0141,
      "step": 1965540
    },
    {
      "epoch": 3.216682049972834,
      "grad_norm": 0.19630274176597595,
      "learning_rate": 3.5901372534807565e-06,
      "loss": 0.0117,
      "step": 1965560
    },
    {
      "epoch": 3.216714780411487,
      "grad_norm": 0.58128821849823,
      "learning_rate": 3.5900713612672392e-06,
      "loss": 0.0169,
      "step": 1965580
    },
    {
      "epoch": 3.21674751085014,
      "grad_norm": 0.3972989618778229,
      "learning_rate": 3.5900054690537224e-06,
      "loss": 0.0099,
      "step": 1965600
    },
    {
      "epoch": 3.216780241288794,
      "grad_norm": 0.8767731785774231,
      "learning_rate": 3.589939576840205e-06,
      "loss": 0.0107,
      "step": 1965620
    },
    {
      "epoch": 3.216812971727447,
      "grad_norm": 0.3284904956817627,
      "learning_rate": 3.589873684626688e-06,
      "loss": 0.0089,
      "step": 1965640
    },
    {
      "epoch": 3.2168457021661006,
      "grad_norm": 0.45005878806114197,
      "learning_rate": 3.5898077924131706e-06,
      "loss": 0.0086,
      "step": 1965660
    },
    {
      "epoch": 3.2168784326047537,
      "grad_norm": 0.1408732831478119,
      "learning_rate": 3.5897419001996538e-06,
      "loss": 0.0128,
      "step": 1965680
    },
    {
      "epoch": 3.2169111630434073,
      "grad_norm": 0.3532116711139679,
      "learning_rate": 3.5896760079861365e-06,
      "loss": 0.0085,
      "step": 1965700
    },
    {
      "epoch": 3.2169438934820604,
      "grad_norm": 0.10525042563676834,
      "learning_rate": 3.5896101157726193e-06,
      "loss": 0.0115,
      "step": 1965720
    },
    {
      "epoch": 3.2169766239207136,
      "grad_norm": 0.15390510857105255,
      "learning_rate": 3.589544223559102e-06,
      "loss": 0.0134,
      "step": 1965740
    },
    {
      "epoch": 3.217009354359367,
      "grad_norm": 0.11555503308773041,
      "learning_rate": 3.589478331345585e-06,
      "loss": 0.0076,
      "step": 1965760
    },
    {
      "epoch": 3.2170420847980203,
      "grad_norm": 0.4036496579647064,
      "learning_rate": 3.589412439132068e-06,
      "loss": 0.0107,
      "step": 1965780
    },
    {
      "epoch": 3.217074815236674,
      "grad_norm": 0.9290143847465515,
      "learning_rate": 3.5893465469185506e-06,
      "loss": 0.0087,
      "step": 1965800
    },
    {
      "epoch": 3.217107545675327,
      "grad_norm": 0.06472025066614151,
      "learning_rate": 3.5892806547050342e-06,
      "loss": 0.0122,
      "step": 1965820
    },
    {
      "epoch": 3.2171402761139807,
      "grad_norm": 0.2705751955509186,
      "learning_rate": 3.589214762491517e-06,
      "loss": 0.0119,
      "step": 1965840
    },
    {
      "epoch": 3.217173006552634,
      "grad_norm": 0.1483430713415146,
      "learning_rate": 3.5891488702779997e-06,
      "loss": 0.0105,
      "step": 1965860
    },
    {
      "epoch": 3.217205736991287,
      "grad_norm": 3.9590373039245605,
      "learning_rate": 3.5890829780644825e-06,
      "loss": 0.0136,
      "step": 1965880
    },
    {
      "epoch": 3.2172384674299406,
      "grad_norm": 0.21907716989517212,
      "learning_rate": 3.5890170858509656e-06,
      "loss": 0.0102,
      "step": 1965900
    },
    {
      "epoch": 3.2172711978685937,
      "grad_norm": 0.27642714977264404,
      "learning_rate": 3.5889511936374484e-06,
      "loss": 0.0091,
      "step": 1965920
    },
    {
      "epoch": 3.2173039283072473,
      "grad_norm": 0.20319640636444092,
      "learning_rate": 3.588885301423931e-06,
      "loss": 0.0107,
      "step": 1965940
    },
    {
      "epoch": 3.2173366587459005,
      "grad_norm": 0.3932563364505768,
      "learning_rate": 3.588819409210414e-06,
      "loss": 0.0146,
      "step": 1965960
    },
    {
      "epoch": 3.2173693891845536,
      "grad_norm": 0.5263240337371826,
      "learning_rate": 3.5887535169968966e-06,
      "loss": 0.0072,
      "step": 1965980
    },
    {
      "epoch": 3.217402119623207,
      "grad_norm": 0.38350775837898254,
      "learning_rate": 3.5886876247833797e-06,
      "loss": 0.0156,
      "step": 1966000
    },
    {
      "epoch": 3.2174348500618604,
      "grad_norm": 0.07217072695493698,
      "learning_rate": 3.5886217325698625e-06,
      "loss": 0.0097,
      "step": 1966020
    },
    {
      "epoch": 3.217467580500514,
      "grad_norm": 0.30455219745635986,
      "learning_rate": 3.5885558403563452e-06,
      "loss": 0.0079,
      "step": 1966040
    },
    {
      "epoch": 3.217500310939167,
      "grad_norm": 0.2541002929210663,
      "learning_rate": 3.588489948142828e-06,
      "loss": 0.0142,
      "step": 1966060
    },
    {
      "epoch": 3.2175330413778207,
      "grad_norm": 0.24075022339820862,
      "learning_rate": 3.588424055929311e-06,
      "loss": 0.0131,
      "step": 1966080
    },
    {
      "epoch": 3.217565771816474,
      "grad_norm": 0.2635658383369446,
      "learning_rate": 3.588358163715794e-06,
      "loss": 0.0088,
      "step": 1966100
    },
    {
      "epoch": 3.217598502255127,
      "grad_norm": 0.3270889222621918,
      "learning_rate": 3.5882922715022766e-06,
      "loss": 0.013,
      "step": 1966120
    },
    {
      "epoch": 3.2176312326937806,
      "grad_norm": 0.08494650572538376,
      "learning_rate": 3.5882263792887593e-06,
      "loss": 0.0073,
      "step": 1966140
    },
    {
      "epoch": 3.2176639631324337,
      "grad_norm": 0.16363805532455444,
      "learning_rate": 3.5881604870752425e-06,
      "loss": 0.0147,
      "step": 1966160
    },
    {
      "epoch": 3.2176966935710873,
      "grad_norm": 0.5976989269256592,
      "learning_rate": 3.5880945948617257e-06,
      "loss": 0.0101,
      "step": 1966180
    },
    {
      "epoch": 3.2177294240097405,
      "grad_norm": 0.7001387476921082,
      "learning_rate": 3.5880287026482084e-06,
      "loss": 0.0155,
      "step": 1966200
    },
    {
      "epoch": 3.217762154448394,
      "grad_norm": 0.2100427895784378,
      "learning_rate": 3.5879628104346916e-06,
      "loss": 0.0095,
      "step": 1966220
    },
    {
      "epoch": 3.2177948848870472,
      "grad_norm": 0.0640062689781189,
      "learning_rate": 3.5878969182211743e-06,
      "loss": 0.0085,
      "step": 1966240
    },
    {
      "epoch": 3.2178276153257004,
      "grad_norm": 0.41987720131874084,
      "learning_rate": 3.587831026007657e-06,
      "loss": 0.011,
      "step": 1966260
    },
    {
      "epoch": 3.217860345764354,
      "grad_norm": 0.1298721432685852,
      "learning_rate": 3.5877651337941398e-06,
      "loss": 0.0161,
      "step": 1966280
    },
    {
      "epoch": 3.217893076203007,
      "grad_norm": 0.6797683238983154,
      "learning_rate": 3.587699241580623e-06,
      "loss": 0.0113,
      "step": 1966300
    },
    {
      "epoch": 3.2179258066416607,
      "grad_norm": 0.8634617924690247,
      "learning_rate": 3.5876333493671057e-06,
      "loss": 0.012,
      "step": 1966320
    },
    {
      "epoch": 3.217958537080314,
      "grad_norm": 0.29765772819519043,
      "learning_rate": 3.5875674571535884e-06,
      "loss": 0.0094,
      "step": 1966340
    },
    {
      "epoch": 3.2179912675189675,
      "grad_norm": 0.19790999591350555,
      "learning_rate": 3.587501564940071e-06,
      "loss": 0.0114,
      "step": 1966360
    },
    {
      "epoch": 3.2180239979576206,
      "grad_norm": 0.47270819544792175,
      "learning_rate": 3.5874356727265543e-06,
      "loss": 0.0119,
      "step": 1966380
    },
    {
      "epoch": 3.2180567283962738,
      "grad_norm": 0.6117483377456665,
      "learning_rate": 3.587369780513037e-06,
      "loss": 0.0142,
      "step": 1966400
    },
    {
      "epoch": 3.2180894588349274,
      "grad_norm": 0.19540885090827942,
      "learning_rate": 3.58730388829952e-06,
      "loss": 0.0151,
      "step": 1966420
    },
    {
      "epoch": 3.2181221892735805,
      "grad_norm": 0.22056321799755096,
      "learning_rate": 3.5872379960860026e-06,
      "loss": 0.0124,
      "step": 1966440
    },
    {
      "epoch": 3.218154919712234,
      "grad_norm": 1.6041010618209839,
      "learning_rate": 3.5871721038724853e-06,
      "loss": 0.0122,
      "step": 1966460
    },
    {
      "epoch": 3.2181876501508873,
      "grad_norm": 0.14256742596626282,
      "learning_rate": 3.5871062116589685e-06,
      "loss": 0.0073,
      "step": 1966480
    },
    {
      "epoch": 3.218220380589541,
      "grad_norm": 0.7613601088523865,
      "learning_rate": 3.587040319445451e-06,
      "loss": 0.008,
      "step": 1966500
    },
    {
      "epoch": 3.218253111028194,
      "grad_norm": 0.47990575432777405,
      "learning_rate": 3.5869744272319344e-06,
      "loss": 0.0155,
      "step": 1966520
    },
    {
      "epoch": 3.218285841466847,
      "grad_norm": 0.19283941388130188,
      "learning_rate": 3.5869085350184175e-06,
      "loss": 0.013,
      "step": 1966540
    },
    {
      "epoch": 3.2183185719055007,
      "grad_norm": 0.14333327114582062,
      "learning_rate": 3.5868426428049003e-06,
      "loss": 0.0105,
      "step": 1966560
    },
    {
      "epoch": 3.218351302344154,
      "grad_norm": 0.11612819135189056,
      "learning_rate": 3.586776750591383e-06,
      "loss": 0.0125,
      "step": 1966580
    },
    {
      "epoch": 3.2183840327828075,
      "grad_norm": 0.15005823969841003,
      "learning_rate": 3.5867108583778657e-06,
      "loss": 0.0108,
      "step": 1966600
    },
    {
      "epoch": 3.2184167632214606,
      "grad_norm": 0.10741659253835678,
      "learning_rate": 3.586644966164349e-06,
      "loss": 0.0125,
      "step": 1966620
    },
    {
      "epoch": 3.2184494936601142,
      "grad_norm": 0.8956706523895264,
      "learning_rate": 3.5865790739508316e-06,
      "loss": 0.0127,
      "step": 1966640
    },
    {
      "epoch": 3.2184822240987674,
      "grad_norm": 0.19992223381996155,
      "learning_rate": 3.5865131817373144e-06,
      "loss": 0.0086,
      "step": 1966660
    },
    {
      "epoch": 3.2185149545374205,
      "grad_norm": 0.47327563166618347,
      "learning_rate": 3.586447289523797e-06,
      "loss": 0.0098,
      "step": 1966680
    },
    {
      "epoch": 3.218547684976074,
      "grad_norm": 0.1295681893825531,
      "learning_rate": 3.5863813973102803e-06,
      "loss": 0.0131,
      "step": 1966700
    },
    {
      "epoch": 3.2185804154147273,
      "grad_norm": 0.14894938468933105,
      "learning_rate": 3.586315505096763e-06,
      "loss": 0.0092,
      "step": 1966720
    },
    {
      "epoch": 3.218613145853381,
      "grad_norm": 0.3826233744621277,
      "learning_rate": 3.5862496128832458e-06,
      "loss": 0.0105,
      "step": 1966740
    },
    {
      "epoch": 3.218645876292034,
      "grad_norm": 0.15129104256629944,
      "learning_rate": 3.5861837206697285e-06,
      "loss": 0.0107,
      "step": 1966760
    },
    {
      "epoch": 3.2186786067306876,
      "grad_norm": 0.3305493891239166,
      "learning_rate": 3.5861178284562117e-06,
      "loss": 0.008,
      "step": 1966780
    },
    {
      "epoch": 3.2187113371693408,
      "grad_norm": 0.38821402192115784,
      "learning_rate": 3.5860519362426944e-06,
      "loss": 0.009,
      "step": 1966800
    },
    {
      "epoch": 3.218744067607994,
      "grad_norm": 0.12079288810491562,
      "learning_rate": 3.585986044029177e-06,
      "loss": 0.0112,
      "step": 1966820
    },
    {
      "epoch": 3.2187767980466475,
      "grad_norm": 0.1546061486005783,
      "learning_rate": 3.58592015181566e-06,
      "loss": 0.0119,
      "step": 1966840
    },
    {
      "epoch": 3.2188095284853007,
      "grad_norm": 0.3695024847984314,
      "learning_rate": 3.5858542596021426e-06,
      "loss": 0.0166,
      "step": 1966860
    },
    {
      "epoch": 3.2188422589239543,
      "grad_norm": 0.20579993724822998,
      "learning_rate": 3.5857883673886262e-06,
      "loss": 0.0102,
      "step": 1966880
    },
    {
      "epoch": 3.2188749893626074,
      "grad_norm": 0.19002924859523773,
      "learning_rate": 3.585722475175109e-06,
      "loss": 0.0119,
      "step": 1966900
    },
    {
      "epoch": 3.218907719801261,
      "grad_norm": 0.5289160013198853,
      "learning_rate": 3.585656582961592e-06,
      "loss": 0.0068,
      "step": 1966920
    },
    {
      "epoch": 3.218940450239914,
      "grad_norm": 0.6888521909713745,
      "learning_rate": 3.585590690748075e-06,
      "loss": 0.0085,
      "step": 1966940
    },
    {
      "epoch": 3.2189731806785673,
      "grad_norm": 0.2215890735387802,
      "learning_rate": 3.5855247985345576e-06,
      "loss": 0.0078,
      "step": 1966960
    },
    {
      "epoch": 3.219005911117221,
      "grad_norm": 0.26147031784057617,
      "learning_rate": 3.5854589063210403e-06,
      "loss": 0.0123,
      "step": 1966980
    },
    {
      "epoch": 3.219038641555874,
      "grad_norm": 0.3536809980869293,
      "learning_rate": 3.585393014107523e-06,
      "loss": 0.0117,
      "step": 1967000
    },
    {
      "epoch": 3.2190713719945276,
      "grad_norm": 0.10756093263626099,
      "learning_rate": 3.5853271218940062e-06,
      "loss": 0.0123,
      "step": 1967020
    },
    {
      "epoch": 3.219104102433181,
      "grad_norm": 0.5241592526435852,
      "learning_rate": 3.585261229680489e-06,
      "loss": 0.0168,
      "step": 1967040
    },
    {
      "epoch": 3.219136832871834,
      "grad_norm": 0.6096934676170349,
      "learning_rate": 3.5851953374669717e-06,
      "loss": 0.0118,
      "step": 1967060
    },
    {
      "epoch": 3.2191695633104875,
      "grad_norm": 0.24389958381652832,
      "learning_rate": 3.5851294452534545e-06,
      "loss": 0.0097,
      "step": 1967080
    },
    {
      "epoch": 3.2192022937491407,
      "grad_norm": 0.24726226925849915,
      "learning_rate": 3.5850635530399376e-06,
      "loss": 0.0118,
      "step": 1967100
    },
    {
      "epoch": 3.2192350241877943,
      "grad_norm": 0.35389775037765503,
      "learning_rate": 3.5849976608264204e-06,
      "loss": 0.011,
      "step": 1967120
    },
    {
      "epoch": 3.2192677546264474,
      "grad_norm": 0.8402849435806274,
      "learning_rate": 3.584931768612903e-06,
      "loss": 0.0094,
      "step": 1967140
    },
    {
      "epoch": 3.219300485065101,
      "grad_norm": 0.6790624856948853,
      "learning_rate": 3.584865876399386e-06,
      "loss": 0.0139,
      "step": 1967160
    },
    {
      "epoch": 3.219333215503754,
      "grad_norm": 0.1632852554321289,
      "learning_rate": 3.584799984185869e-06,
      "loss": 0.0094,
      "step": 1967180
    },
    {
      "epoch": 3.2193659459424073,
      "grad_norm": 0.1409740298986435,
      "learning_rate": 3.5847340919723517e-06,
      "loss": 0.0101,
      "step": 1967200
    },
    {
      "epoch": 3.219398676381061,
      "grad_norm": 0.03690442070364952,
      "learning_rate": 3.584668199758835e-06,
      "loss": 0.0145,
      "step": 1967220
    },
    {
      "epoch": 3.219431406819714,
      "grad_norm": 0.34617677330970764,
      "learning_rate": 3.584602307545318e-06,
      "loss": 0.0069,
      "step": 1967240
    },
    {
      "epoch": 3.2194641372583677,
      "grad_norm": 0.43749135732650757,
      "learning_rate": 3.584536415331801e-06,
      "loss": 0.0133,
      "step": 1967260
    },
    {
      "epoch": 3.219496867697021,
      "grad_norm": 0.7361453771591187,
      "learning_rate": 3.5844705231182836e-06,
      "loss": 0.0092,
      "step": 1967280
    },
    {
      "epoch": 3.2195295981356744,
      "grad_norm": 0.13110119104385376,
      "learning_rate": 3.5844046309047663e-06,
      "loss": 0.0101,
      "step": 1967300
    },
    {
      "epoch": 3.2195623285743276,
      "grad_norm": 0.38467615842819214,
      "learning_rate": 3.5843387386912495e-06,
      "loss": 0.0135,
      "step": 1967320
    },
    {
      "epoch": 3.2195950590129807,
      "grad_norm": 1.3152161836624146,
      "learning_rate": 3.584272846477732e-06,
      "loss": 0.0107,
      "step": 1967340
    },
    {
      "epoch": 3.2196277894516343,
      "grad_norm": 0.11051243543624878,
      "learning_rate": 3.584206954264215e-06,
      "loss": 0.0113,
      "step": 1967360
    },
    {
      "epoch": 3.2196605198902875,
      "grad_norm": 0.3876087963581085,
      "learning_rate": 3.5841410620506977e-06,
      "loss": 0.0162,
      "step": 1967380
    },
    {
      "epoch": 3.219693250328941,
      "grad_norm": 0.07710567861795425,
      "learning_rate": 3.5840751698371804e-06,
      "loss": 0.0109,
      "step": 1967400
    },
    {
      "epoch": 3.219725980767594,
      "grad_norm": 0.22809112071990967,
      "learning_rate": 3.5840092776236636e-06,
      "loss": 0.0109,
      "step": 1967420
    },
    {
      "epoch": 3.219758711206248,
      "grad_norm": 0.2816762328147888,
      "learning_rate": 3.5839433854101463e-06,
      "loss": 0.0127,
      "step": 1967440
    },
    {
      "epoch": 3.219791441644901,
      "grad_norm": 0.26854562759399414,
      "learning_rate": 3.583877493196629e-06,
      "loss": 0.014,
      "step": 1967460
    },
    {
      "epoch": 3.219824172083554,
      "grad_norm": 0.2510439157485962,
      "learning_rate": 3.583811600983112e-06,
      "loss": 0.0111,
      "step": 1967480
    },
    {
      "epoch": 3.2198569025222077,
      "grad_norm": 0.5621578097343445,
      "learning_rate": 3.583745708769595e-06,
      "loss": 0.0154,
      "step": 1967500
    },
    {
      "epoch": 3.219889632960861,
      "grad_norm": 2.0352296829223633,
      "learning_rate": 3.5836798165560777e-06,
      "loss": 0.0163,
      "step": 1967520
    },
    {
      "epoch": 3.2199223633995144,
      "grad_norm": 0.30126678943634033,
      "learning_rate": 3.5836139243425604e-06,
      "loss": 0.0147,
      "step": 1967540
    },
    {
      "epoch": 3.2199550938381676,
      "grad_norm": 0.4611799716949463,
      "learning_rate": 3.583548032129043e-06,
      "loss": 0.0111,
      "step": 1967560
    },
    {
      "epoch": 3.2199878242768207,
      "grad_norm": 0.08200933039188385,
      "learning_rate": 3.5834821399155268e-06,
      "loss": 0.0125,
      "step": 1967580
    },
    {
      "epoch": 3.2200205547154743,
      "grad_norm": 0.15852788090705872,
      "learning_rate": 3.5834162477020095e-06,
      "loss": 0.0175,
      "step": 1967600
    },
    {
      "epoch": 3.2200532851541275,
      "grad_norm": 0.10154689848423004,
      "learning_rate": 3.5833503554884922e-06,
      "loss": 0.0105,
      "step": 1967620
    },
    {
      "epoch": 3.220086015592781,
      "grad_norm": 0.08985617756843567,
      "learning_rate": 3.5832844632749754e-06,
      "loss": 0.0112,
      "step": 1967640
    },
    {
      "epoch": 3.220118746031434,
      "grad_norm": 0.37417787313461304,
      "learning_rate": 3.583218571061458e-06,
      "loss": 0.0146,
      "step": 1967660
    },
    {
      "epoch": 3.220151476470088,
      "grad_norm": 0.1399051547050476,
      "learning_rate": 3.583152678847941e-06,
      "loss": 0.0085,
      "step": 1967680
    },
    {
      "epoch": 3.220184206908741,
      "grad_norm": 0.422571063041687,
      "learning_rate": 3.5830867866344236e-06,
      "loss": 0.0122,
      "step": 1967700
    },
    {
      "epoch": 3.220216937347394,
      "grad_norm": 0.5868669152259827,
      "learning_rate": 3.583020894420907e-06,
      "loss": 0.0138,
      "step": 1967720
    },
    {
      "epoch": 3.2202496677860477,
      "grad_norm": 0.17436648905277252,
      "learning_rate": 3.5829550022073895e-06,
      "loss": 0.0131,
      "step": 1967740
    },
    {
      "epoch": 3.220282398224701,
      "grad_norm": 0.44192951917648315,
      "learning_rate": 3.5828891099938723e-06,
      "loss": 0.0142,
      "step": 1967760
    },
    {
      "epoch": 3.2203151286633545,
      "grad_norm": 0.3894251585006714,
      "learning_rate": 3.582823217780355e-06,
      "loss": 0.0159,
      "step": 1967780
    },
    {
      "epoch": 3.2203478591020076,
      "grad_norm": 0.2028634399175644,
      "learning_rate": 3.582757325566838e-06,
      "loss": 0.0091,
      "step": 1967800
    },
    {
      "epoch": 3.220380589540661,
      "grad_norm": 0.4691672921180725,
      "learning_rate": 3.582691433353321e-06,
      "loss": 0.0093,
      "step": 1967820
    },
    {
      "epoch": 3.2204133199793143,
      "grad_norm": 0.31322118639945984,
      "learning_rate": 3.5826255411398037e-06,
      "loss": 0.0115,
      "step": 1967840
    },
    {
      "epoch": 3.2204460504179675,
      "grad_norm": 0.25526484847068787,
      "learning_rate": 3.5825596489262864e-06,
      "loss": 0.0083,
      "step": 1967860
    },
    {
      "epoch": 3.220478780856621,
      "grad_norm": 0.549009382724762,
      "learning_rate": 3.582493756712769e-06,
      "loss": 0.0155,
      "step": 1967880
    },
    {
      "epoch": 3.2205115112952742,
      "grad_norm": 0.2525689899921417,
      "learning_rate": 3.5824278644992523e-06,
      "loss": 0.0126,
      "step": 1967900
    },
    {
      "epoch": 3.220544241733928,
      "grad_norm": 0.12147451937198639,
      "learning_rate": 3.582361972285735e-06,
      "loss": 0.0101,
      "step": 1967920
    },
    {
      "epoch": 3.220576972172581,
      "grad_norm": 0.25782060623168945,
      "learning_rate": 3.582296080072218e-06,
      "loss": 0.0125,
      "step": 1967940
    },
    {
      "epoch": 3.2206097026112346,
      "grad_norm": 0.15895158052444458,
      "learning_rate": 3.5822301878587014e-06,
      "loss": 0.016,
      "step": 1967960
    },
    {
      "epoch": 3.2206424330498877,
      "grad_norm": 0.2827165722846985,
      "learning_rate": 3.582164295645184e-06,
      "loss": 0.011,
      "step": 1967980
    },
    {
      "epoch": 3.220675163488541,
      "grad_norm": 0.16703388094902039,
      "learning_rate": 3.582098403431667e-06,
      "loss": 0.0127,
      "step": 1968000
    },
    {
      "epoch": 3.2207078939271945,
      "grad_norm": 0.263714462518692,
      "learning_rate": 3.5820325112181496e-06,
      "loss": 0.0189,
      "step": 1968020
    },
    {
      "epoch": 3.2207406243658476,
      "grad_norm": 0.230008065700531,
      "learning_rate": 3.5819666190046327e-06,
      "loss": 0.0083,
      "step": 1968040
    },
    {
      "epoch": 3.220773354804501,
      "grad_norm": 0.12849773466587067,
      "learning_rate": 3.5819007267911155e-06,
      "loss": 0.0078,
      "step": 1968060
    },
    {
      "epoch": 3.2208060852431544,
      "grad_norm": 0.9273167252540588,
      "learning_rate": 3.5818348345775982e-06,
      "loss": 0.0083,
      "step": 1968080
    },
    {
      "epoch": 3.220838815681808,
      "grad_norm": 0.7629337310791016,
      "learning_rate": 3.581768942364081e-06,
      "loss": 0.0108,
      "step": 1968100
    },
    {
      "epoch": 3.220871546120461,
      "grad_norm": 0.6426106095314026,
      "learning_rate": 3.581703050150564e-06,
      "loss": 0.0135,
      "step": 1968120
    },
    {
      "epoch": 3.2209042765591143,
      "grad_norm": 0.2908037304878235,
      "learning_rate": 3.581637157937047e-06,
      "loss": 0.0088,
      "step": 1968140
    },
    {
      "epoch": 3.220937006997768,
      "grad_norm": 0.36612778902053833,
      "learning_rate": 3.5815712657235296e-06,
      "loss": 0.0163,
      "step": 1968160
    },
    {
      "epoch": 3.220969737436421,
      "grad_norm": 0.28649571537971497,
      "learning_rate": 3.5815053735100123e-06,
      "loss": 0.0054,
      "step": 1968180
    },
    {
      "epoch": 3.2210024678750746,
      "grad_norm": 0.08226549625396729,
      "learning_rate": 3.5814394812964955e-06,
      "loss": 0.0109,
      "step": 1968200
    },
    {
      "epoch": 3.2210351983137278,
      "grad_norm": 0.41954582929611206,
      "learning_rate": 3.5813735890829782e-06,
      "loss": 0.0092,
      "step": 1968220
    },
    {
      "epoch": 3.2210679287523813,
      "grad_norm": 0.5129050612449646,
      "learning_rate": 3.581307696869461e-06,
      "loss": 0.0109,
      "step": 1968240
    },
    {
      "epoch": 3.2211006591910345,
      "grad_norm": 0.16938278079032898,
      "learning_rate": 3.5812418046559437e-06,
      "loss": 0.0117,
      "step": 1968260
    },
    {
      "epoch": 3.2211333896296876,
      "grad_norm": 0.0357353575527668,
      "learning_rate": 3.5811759124424273e-06,
      "loss": 0.0133,
      "step": 1968280
    },
    {
      "epoch": 3.2211661200683412,
      "grad_norm": 0.33906784653663635,
      "learning_rate": 3.58111002022891e-06,
      "loss": 0.0109,
      "step": 1968300
    },
    {
      "epoch": 3.2211988505069944,
      "grad_norm": 0.20852866768836975,
      "learning_rate": 3.581044128015393e-06,
      "loss": 0.0125,
      "step": 1968320
    },
    {
      "epoch": 3.221231580945648,
      "grad_norm": 0.3930012583732605,
      "learning_rate": 3.580978235801876e-06,
      "loss": 0.0139,
      "step": 1968340
    },
    {
      "epoch": 3.221264311384301,
      "grad_norm": 0.31095170974731445,
      "learning_rate": 3.5809123435883587e-06,
      "loss": 0.0151,
      "step": 1968360
    },
    {
      "epoch": 3.2212970418229547,
      "grad_norm": 0.09913640469312668,
      "learning_rate": 3.5808464513748414e-06,
      "loss": 0.0141,
      "step": 1968380
    },
    {
      "epoch": 3.221329772261608,
      "grad_norm": 1.1479817628860474,
      "learning_rate": 3.580780559161324e-06,
      "loss": 0.0097,
      "step": 1968400
    },
    {
      "epoch": 3.221362502700261,
      "grad_norm": 0.33774587512016296,
      "learning_rate": 3.580714666947807e-06,
      "loss": 0.0121,
      "step": 1968420
    },
    {
      "epoch": 3.2213952331389146,
      "grad_norm": 0.5742723345756531,
      "learning_rate": 3.58064877473429e-06,
      "loss": 0.0149,
      "step": 1968440
    },
    {
      "epoch": 3.2214279635775678,
      "grad_norm": 0.5007946491241455,
      "learning_rate": 3.580582882520773e-06,
      "loss": 0.013,
      "step": 1968460
    },
    {
      "epoch": 3.2214606940162214,
      "grad_norm": 0.03182488679885864,
      "learning_rate": 3.5805169903072556e-06,
      "loss": 0.0076,
      "step": 1968480
    },
    {
      "epoch": 3.2214934244548745,
      "grad_norm": 0.42756959795951843,
      "learning_rate": 3.5804510980937383e-06,
      "loss": 0.0098,
      "step": 1968500
    },
    {
      "epoch": 3.221526154893528,
      "grad_norm": 0.2856837809085846,
      "learning_rate": 3.5803852058802215e-06,
      "loss": 0.0147,
      "step": 1968520
    },
    {
      "epoch": 3.2215588853321813,
      "grad_norm": 0.17446380853652954,
      "learning_rate": 3.580319313666704e-06,
      "loss": 0.0082,
      "step": 1968540
    },
    {
      "epoch": 3.2215916157708344,
      "grad_norm": 0.2180478572845459,
      "learning_rate": 3.580253421453187e-06,
      "loss": 0.0153,
      "step": 1968560
    },
    {
      "epoch": 3.221624346209488,
      "grad_norm": 0.6403075456619263,
      "learning_rate": 3.5801875292396697e-06,
      "loss": 0.0143,
      "step": 1968580
    },
    {
      "epoch": 3.221657076648141,
      "grad_norm": 0.32295340299606323,
      "learning_rate": 3.580121637026153e-06,
      "loss": 0.0135,
      "step": 1968600
    },
    {
      "epoch": 3.2216898070867948,
      "grad_norm": 0.7894524931907654,
      "learning_rate": 3.5800557448126356e-06,
      "loss": 0.0099,
      "step": 1968620
    },
    {
      "epoch": 3.221722537525448,
      "grad_norm": 0.26055172085762024,
      "learning_rate": 3.5799898525991187e-06,
      "loss": 0.0114,
      "step": 1968640
    },
    {
      "epoch": 3.221755267964101,
      "grad_norm": 0.1436377465724945,
      "learning_rate": 3.579923960385602e-06,
      "loss": 0.0153,
      "step": 1968660
    },
    {
      "epoch": 3.2217879984027546,
      "grad_norm": 0.6154616475105286,
      "learning_rate": 3.5798580681720847e-06,
      "loss": 0.0079,
      "step": 1968680
    },
    {
      "epoch": 3.221820728841408,
      "grad_norm": 0.21699517965316772,
      "learning_rate": 3.5797921759585674e-06,
      "loss": 0.0107,
      "step": 1968700
    },
    {
      "epoch": 3.2218534592800614,
      "grad_norm": 0.18655551970005035,
      "learning_rate": 3.57972628374505e-06,
      "loss": 0.008,
      "step": 1968720
    },
    {
      "epoch": 3.2218861897187145,
      "grad_norm": 0.1954418569803238,
      "learning_rate": 3.5796603915315333e-06,
      "loss": 0.0113,
      "step": 1968740
    },
    {
      "epoch": 3.221918920157368,
      "grad_norm": 0.6589255928993225,
      "learning_rate": 3.579594499318016e-06,
      "loss": 0.0087,
      "step": 1968760
    },
    {
      "epoch": 3.2219516505960213,
      "grad_norm": 0.13309596478939056,
      "learning_rate": 3.5795286071044988e-06,
      "loss": 0.0069,
      "step": 1968780
    },
    {
      "epoch": 3.2219843810346744,
      "grad_norm": 0.08578513562679291,
      "learning_rate": 3.5794627148909815e-06,
      "loss": 0.0127,
      "step": 1968800
    },
    {
      "epoch": 3.222017111473328,
      "grad_norm": 0.17927280068397522,
      "learning_rate": 3.5793968226774647e-06,
      "loss": 0.01,
      "step": 1968820
    },
    {
      "epoch": 3.222049841911981,
      "grad_norm": 0.521794855594635,
      "learning_rate": 3.5793309304639474e-06,
      "loss": 0.0066,
      "step": 1968840
    },
    {
      "epoch": 3.2220825723506348,
      "grad_norm": 0.1719198375940323,
      "learning_rate": 3.57926503825043e-06,
      "loss": 0.0097,
      "step": 1968860
    },
    {
      "epoch": 3.222115302789288,
      "grad_norm": 0.5901732444763184,
      "learning_rate": 3.579199146036913e-06,
      "loss": 0.0122,
      "step": 1968880
    },
    {
      "epoch": 3.2221480332279415,
      "grad_norm": 0.38136306405067444,
      "learning_rate": 3.5791332538233956e-06,
      "loss": 0.011,
      "step": 1968900
    },
    {
      "epoch": 3.2221807636665947,
      "grad_norm": 0.37428686022758484,
      "learning_rate": 3.579067361609879e-06,
      "loss": 0.0095,
      "step": 1968920
    },
    {
      "epoch": 3.222213494105248,
      "grad_norm": 0.32846754789352417,
      "learning_rate": 3.5790014693963615e-06,
      "loss": 0.0148,
      "step": 1968940
    },
    {
      "epoch": 3.2222462245439014,
      "grad_norm": 0.18136893212795258,
      "learning_rate": 3.5789355771828443e-06,
      "loss": 0.0118,
      "step": 1968960
    },
    {
      "epoch": 3.2222789549825546,
      "grad_norm": 0.1932772696018219,
      "learning_rate": 3.578869684969328e-06,
      "loss": 0.0122,
      "step": 1968980
    },
    {
      "epoch": 3.222311685421208,
      "grad_norm": 0.3958335816860199,
      "learning_rate": 3.5788037927558106e-06,
      "loss": 0.0103,
      "step": 1969000
    },
    {
      "epoch": 3.2223444158598613,
      "grad_norm": 0.5174863934516907,
      "learning_rate": 3.5787379005422933e-06,
      "loss": 0.0098,
      "step": 1969020
    },
    {
      "epoch": 3.2223771462985145,
      "grad_norm": 0.37996822595596313,
      "learning_rate": 3.578672008328776e-06,
      "loss": 0.0159,
      "step": 1969040
    },
    {
      "epoch": 3.222409876737168,
      "grad_norm": 0.43132853507995605,
      "learning_rate": 3.5786061161152592e-06,
      "loss": 0.0108,
      "step": 1969060
    },
    {
      "epoch": 3.222442607175821,
      "grad_norm": 0.25179117918014526,
      "learning_rate": 3.578540223901742e-06,
      "loss": 0.0105,
      "step": 1969080
    },
    {
      "epoch": 3.222475337614475,
      "grad_norm": 0.17525555193424225,
      "learning_rate": 3.5784743316882247e-06,
      "loss": 0.0147,
      "step": 1969100
    },
    {
      "epoch": 3.222508068053128,
      "grad_norm": 0.2533766031265259,
      "learning_rate": 3.5784084394747075e-06,
      "loss": 0.0071,
      "step": 1969120
    },
    {
      "epoch": 3.2225407984917815,
      "grad_norm": 0.2770772874355316,
      "learning_rate": 3.5783425472611906e-06,
      "loss": 0.0106,
      "step": 1969140
    },
    {
      "epoch": 3.2225735289304347,
      "grad_norm": 0.12871864438056946,
      "learning_rate": 3.5782766550476734e-06,
      "loss": 0.0138,
      "step": 1969160
    },
    {
      "epoch": 3.222606259369088,
      "grad_norm": 0.18556223809719086,
      "learning_rate": 3.578210762834156e-06,
      "loss": 0.0182,
      "step": 1969180
    },
    {
      "epoch": 3.2226389898077414,
      "grad_norm": 0.11831580847501755,
      "learning_rate": 3.578144870620639e-06,
      "loss": 0.0154,
      "step": 1969200
    },
    {
      "epoch": 3.2226717202463946,
      "grad_norm": 0.23772156238555908,
      "learning_rate": 3.578078978407122e-06,
      "loss": 0.0077,
      "step": 1969220
    },
    {
      "epoch": 3.222704450685048,
      "grad_norm": 0.19162261486053467,
      "learning_rate": 3.5780130861936048e-06,
      "loss": 0.0125,
      "step": 1969240
    },
    {
      "epoch": 3.2227371811237013,
      "grad_norm": 0.6989638805389404,
      "learning_rate": 3.5779471939800875e-06,
      "loss": 0.0152,
      "step": 1969260
    },
    {
      "epoch": 3.222769911562355,
      "grad_norm": 0.6258992552757263,
      "learning_rate": 3.5778813017665702e-06,
      "loss": 0.0142,
      "step": 1969280
    },
    {
      "epoch": 3.222802642001008,
      "grad_norm": 0.21778930723667145,
      "learning_rate": 3.577815409553053e-06,
      "loss": 0.0095,
      "step": 1969300
    },
    {
      "epoch": 3.2228353724396612,
      "grad_norm": 0.5056238770484924,
      "learning_rate": 3.577749517339536e-06,
      "loss": 0.0145,
      "step": 1969320
    },
    {
      "epoch": 3.222868102878315,
      "grad_norm": 0.1151038110256195,
      "learning_rate": 3.5776836251260193e-06,
      "loss": 0.0102,
      "step": 1969340
    },
    {
      "epoch": 3.222900833316968,
      "grad_norm": 0.4263361394405365,
      "learning_rate": 3.5776177329125025e-06,
      "loss": 0.012,
      "step": 1969360
    },
    {
      "epoch": 3.2229335637556216,
      "grad_norm": 0.14010480046272278,
      "learning_rate": 3.577551840698985e-06,
      "loss": 0.0099,
      "step": 1969380
    },
    {
      "epoch": 3.2229662941942747,
      "grad_norm": 0.289776086807251,
      "learning_rate": 3.577485948485468e-06,
      "loss": 0.0128,
      "step": 1969400
    },
    {
      "epoch": 3.2229990246329283,
      "grad_norm": 0.34789878129959106,
      "learning_rate": 3.5774200562719507e-06,
      "loss": 0.0133,
      "step": 1969420
    },
    {
      "epoch": 3.2230317550715815,
      "grad_norm": 0.4542236626148224,
      "learning_rate": 3.5773541640584334e-06,
      "loss": 0.008,
      "step": 1969440
    },
    {
      "epoch": 3.2230644855102346,
      "grad_norm": 0.1551097184419632,
      "learning_rate": 3.5772882718449166e-06,
      "loss": 0.0101,
      "step": 1969460
    },
    {
      "epoch": 3.223097215948888,
      "grad_norm": 0.08681714534759521,
      "learning_rate": 3.5772223796313993e-06,
      "loss": 0.0126,
      "step": 1969480
    },
    {
      "epoch": 3.2231299463875414,
      "grad_norm": 0.522689938545227,
      "learning_rate": 3.577156487417882e-06,
      "loss": 0.0093,
      "step": 1969500
    },
    {
      "epoch": 3.223162676826195,
      "grad_norm": 0.5394174456596375,
      "learning_rate": 3.577090595204365e-06,
      "loss": 0.0122,
      "step": 1969520
    },
    {
      "epoch": 3.223195407264848,
      "grad_norm": 0.1693418025970459,
      "learning_rate": 3.577024702990848e-06,
      "loss": 0.0105,
      "step": 1969540
    },
    {
      "epoch": 3.2232281377035017,
      "grad_norm": 0.10616930574178696,
      "learning_rate": 3.5769588107773307e-06,
      "loss": 0.01,
      "step": 1969560
    },
    {
      "epoch": 3.223260868142155,
      "grad_norm": 0.46284353733062744,
      "learning_rate": 3.5768929185638134e-06,
      "loss": 0.0126,
      "step": 1969580
    },
    {
      "epoch": 3.223293598580808,
      "grad_norm": 0.462529718875885,
      "learning_rate": 3.576827026350296e-06,
      "loss": 0.0079,
      "step": 1969600
    },
    {
      "epoch": 3.2233263290194616,
      "grad_norm": 0.3247666358947754,
      "learning_rate": 3.5767611341367793e-06,
      "loss": 0.0073,
      "step": 1969620
    },
    {
      "epoch": 3.2233590594581147,
      "grad_norm": 0.18081463873386383,
      "learning_rate": 3.576695241923262e-06,
      "loss": 0.0105,
      "step": 1969640
    },
    {
      "epoch": 3.2233917898967683,
      "grad_norm": 0.12404687702655792,
      "learning_rate": 3.576629349709745e-06,
      "loss": 0.014,
      "step": 1969660
    },
    {
      "epoch": 3.2234245203354215,
      "grad_norm": 0.5230419635772705,
      "learning_rate": 3.5765634574962276e-06,
      "loss": 0.0088,
      "step": 1969680
    },
    {
      "epoch": 3.223457250774075,
      "grad_norm": 0.13061949610710144,
      "learning_rate": 3.576497565282711e-06,
      "loss": 0.0104,
      "step": 1969700
    },
    {
      "epoch": 3.2234899812127282,
      "grad_norm": 0.4023314416408539,
      "learning_rate": 3.576431673069194e-06,
      "loss": 0.0098,
      "step": 1969720
    },
    {
      "epoch": 3.2235227116513814,
      "grad_norm": 0.057377029210329056,
      "learning_rate": 3.5763657808556766e-06,
      "loss": 0.0091,
      "step": 1969740
    },
    {
      "epoch": 3.223555442090035,
      "grad_norm": 0.20095069706439972,
      "learning_rate": 3.57629988864216e-06,
      "loss": 0.0104,
      "step": 1969760
    },
    {
      "epoch": 3.223588172528688,
      "grad_norm": 0.16594262421131134,
      "learning_rate": 3.5762339964286425e-06,
      "loss": 0.008,
      "step": 1969780
    },
    {
      "epoch": 3.2236209029673417,
      "grad_norm": 0.25073179602622986,
      "learning_rate": 3.5761681042151253e-06,
      "loss": 0.0105,
      "step": 1969800
    },
    {
      "epoch": 3.223653633405995,
      "grad_norm": 0.12384866923093796,
      "learning_rate": 3.576102212001608e-06,
      "loss": 0.0066,
      "step": 1969820
    },
    {
      "epoch": 3.2236863638446485,
      "grad_norm": 0.08783726394176483,
      "learning_rate": 3.5760363197880908e-06,
      "loss": 0.0104,
      "step": 1969840
    },
    {
      "epoch": 3.2237190942833016,
      "grad_norm": 0.596208930015564,
      "learning_rate": 3.575970427574574e-06,
      "loss": 0.0158,
      "step": 1969860
    },
    {
      "epoch": 3.2237518247219548,
      "grad_norm": 0.5804243087768555,
      "learning_rate": 3.5759045353610567e-06,
      "loss": 0.0088,
      "step": 1969880
    },
    {
      "epoch": 3.2237845551606084,
      "grad_norm": 0.14038299024105072,
      "learning_rate": 3.5758386431475394e-06,
      "loss": 0.0108,
      "step": 1969900
    },
    {
      "epoch": 3.2238172855992615,
      "grad_norm": 0.2575679421424866,
      "learning_rate": 3.575772750934022e-06,
      "loss": 0.0153,
      "step": 1969920
    },
    {
      "epoch": 3.223850016037915,
      "grad_norm": 0.26622578501701355,
      "learning_rate": 3.5757068587205053e-06,
      "loss": 0.0124,
      "step": 1969940
    },
    {
      "epoch": 3.2238827464765683,
      "grad_norm": 0.508710503578186,
      "learning_rate": 3.575640966506988e-06,
      "loss": 0.0131,
      "step": 1969960
    },
    {
      "epoch": 3.223915476915222,
      "grad_norm": 0.1460525095462799,
      "learning_rate": 3.5755750742934708e-06,
      "loss": 0.011,
      "step": 1969980
    },
    {
      "epoch": 3.223948207353875,
      "grad_norm": 0.16882111132144928,
      "learning_rate": 3.5755091820799535e-06,
      "loss": 0.0114,
      "step": 1970000
    },
    {
      "epoch": 3.223980937792528,
      "grad_norm": 0.15185926854610443,
      "learning_rate": 3.5754432898664367e-06,
      "loss": 0.0125,
      "step": 1970020
    },
    {
      "epoch": 3.2240136682311817,
      "grad_norm": 0.14557424187660217,
      "learning_rate": 3.57537739765292e-06,
      "loss": 0.0187,
      "step": 1970040
    },
    {
      "epoch": 3.224046398669835,
      "grad_norm": 0.1374225616455078,
      "learning_rate": 3.5753115054394026e-06,
      "loss": 0.012,
      "step": 1970060
    },
    {
      "epoch": 3.2240791291084885,
      "grad_norm": 0.155551478266716,
      "learning_rate": 3.5752456132258858e-06,
      "loss": 0.0153,
      "step": 1970080
    },
    {
      "epoch": 3.2241118595471416,
      "grad_norm": 0.21423006057739258,
      "learning_rate": 3.5751797210123685e-06,
      "loss": 0.0103,
      "step": 1970100
    },
    {
      "epoch": 3.224144589985795,
      "grad_norm": 0.3681449294090271,
      "learning_rate": 3.5751138287988512e-06,
      "loss": 0.0145,
      "step": 1970120
    },
    {
      "epoch": 3.2241773204244484,
      "grad_norm": 0.08713144063949585,
      "learning_rate": 3.575047936585334e-06,
      "loss": 0.0105,
      "step": 1970140
    },
    {
      "epoch": 3.2242100508631015,
      "grad_norm": 0.6073590517044067,
      "learning_rate": 3.574982044371817e-06,
      "loss": 0.0091,
      "step": 1970160
    },
    {
      "epoch": 3.224242781301755,
      "grad_norm": 0.06161179021000862,
      "learning_rate": 3.5749161521583e-06,
      "loss": 0.0155,
      "step": 1970180
    },
    {
      "epoch": 3.2242755117404083,
      "grad_norm": 0.08028562366962433,
      "learning_rate": 3.5748502599447826e-06,
      "loss": 0.0129,
      "step": 1970200
    },
    {
      "epoch": 3.224308242179062,
      "grad_norm": 0.44507572054862976,
      "learning_rate": 3.5747843677312654e-06,
      "loss": 0.0089,
      "step": 1970220
    },
    {
      "epoch": 3.224340972617715,
      "grad_norm": 0.14616483449935913,
      "learning_rate": 3.5747184755177485e-06,
      "loss": 0.0116,
      "step": 1970240
    },
    {
      "epoch": 3.224373703056368,
      "grad_norm": 0.09024523943662643,
      "learning_rate": 3.5746525833042313e-06,
      "loss": 0.0116,
      "step": 1970260
    },
    {
      "epoch": 3.2244064334950218,
      "grad_norm": 0.24418288469314575,
      "learning_rate": 3.574586691090714e-06,
      "loss": 0.0109,
      "step": 1970280
    },
    {
      "epoch": 3.224439163933675,
      "grad_norm": 0.1798134297132492,
      "learning_rate": 3.5745207988771967e-06,
      "loss": 0.0147,
      "step": 1970300
    },
    {
      "epoch": 3.2244718943723285,
      "grad_norm": 0.30381548404693604,
      "learning_rate": 3.5744549066636795e-06,
      "loss": 0.0153,
      "step": 1970320
    },
    {
      "epoch": 3.2245046248109817,
      "grad_norm": 0.18276585638523102,
      "learning_rate": 3.5743890144501626e-06,
      "loss": 0.0158,
      "step": 1970340
    },
    {
      "epoch": 3.2245373552496353,
      "grad_norm": 0.205097958445549,
      "learning_rate": 3.5743231222366454e-06,
      "loss": 0.0125,
      "step": 1970360
    },
    {
      "epoch": 3.2245700856882884,
      "grad_norm": 0.5728234648704529,
      "learning_rate": 3.574257230023128e-06,
      "loss": 0.0082,
      "step": 1970380
    },
    {
      "epoch": 3.2246028161269416,
      "grad_norm": 0.33485147356987,
      "learning_rate": 3.5741913378096117e-06,
      "loss": 0.0081,
      "step": 1970400
    },
    {
      "epoch": 3.224635546565595,
      "grad_norm": 0.22273139655590057,
      "learning_rate": 3.5741254455960944e-06,
      "loss": 0.0065,
      "step": 1970420
    },
    {
      "epoch": 3.2246682770042483,
      "grad_norm": 0.456299751996994,
      "learning_rate": 3.574059553382577e-06,
      "loss": 0.013,
      "step": 1970440
    },
    {
      "epoch": 3.224701007442902,
      "grad_norm": 0.5063912868499756,
      "learning_rate": 3.57399366116906e-06,
      "loss": 0.0115,
      "step": 1970460
    },
    {
      "epoch": 3.224733737881555,
      "grad_norm": 2.358065605163574,
      "learning_rate": 3.573927768955543e-06,
      "loss": 0.0108,
      "step": 1970480
    },
    {
      "epoch": 3.2247664683202086,
      "grad_norm": 0.09491278231143951,
      "learning_rate": 3.573861876742026e-06,
      "loss": 0.0103,
      "step": 1970500
    },
    {
      "epoch": 3.224799198758862,
      "grad_norm": 0.28034526109695435,
      "learning_rate": 3.5737959845285086e-06,
      "loss": 0.0092,
      "step": 1970520
    },
    {
      "epoch": 3.224831929197515,
      "grad_norm": 0.6083021759986877,
      "learning_rate": 3.5737300923149913e-06,
      "loss": 0.0096,
      "step": 1970540
    },
    {
      "epoch": 3.2248646596361685,
      "grad_norm": 0.4151008129119873,
      "learning_rate": 3.5736642001014745e-06,
      "loss": 0.0146,
      "step": 1970560
    },
    {
      "epoch": 3.2248973900748217,
      "grad_norm": 0.17947450280189514,
      "learning_rate": 3.573598307887957e-06,
      "loss": 0.0113,
      "step": 1970580
    },
    {
      "epoch": 3.2249301205134753,
      "grad_norm": 0.30398231744766235,
      "learning_rate": 3.57353241567444e-06,
      "loss": 0.0141,
      "step": 1970600
    },
    {
      "epoch": 3.2249628509521284,
      "grad_norm": 0.6103265881538391,
      "learning_rate": 3.5734665234609227e-06,
      "loss": 0.0132,
      "step": 1970620
    },
    {
      "epoch": 3.2249955813907816,
      "grad_norm": 0.5214627981185913,
      "learning_rate": 3.573400631247406e-06,
      "loss": 0.0126,
      "step": 1970640
    },
    {
      "epoch": 3.225028311829435,
      "grad_norm": 0.3468262851238251,
      "learning_rate": 3.5733347390338886e-06,
      "loss": 0.012,
      "step": 1970660
    },
    {
      "epoch": 3.2250610422680883,
      "grad_norm": 0.2161375880241394,
      "learning_rate": 3.5732688468203713e-06,
      "loss": 0.0106,
      "step": 1970680
    },
    {
      "epoch": 3.225093772706742,
      "grad_norm": 0.1650734692811966,
      "learning_rate": 3.573202954606854e-06,
      "loss": 0.0081,
      "step": 1970700
    },
    {
      "epoch": 3.225126503145395,
      "grad_norm": 0.29798027873039246,
      "learning_rate": 3.573137062393337e-06,
      "loss": 0.0105,
      "step": 1970720
    },
    {
      "epoch": 3.2251592335840487,
      "grad_norm": 0.7827370762825012,
      "learning_rate": 3.5730711701798204e-06,
      "loss": 0.0224,
      "step": 1970740
    },
    {
      "epoch": 3.225191964022702,
      "grad_norm": 0.2943912446498871,
      "learning_rate": 3.573005277966303e-06,
      "loss": 0.0126,
      "step": 1970760
    },
    {
      "epoch": 3.225224694461355,
      "grad_norm": 0.1388726383447647,
      "learning_rate": 3.5729393857527863e-06,
      "loss": 0.0105,
      "step": 1970780
    },
    {
      "epoch": 3.2252574249000086,
      "grad_norm": 0.27245181798934937,
      "learning_rate": 3.572873493539269e-06,
      "loss": 0.0101,
      "step": 1970800
    },
    {
      "epoch": 3.2252901553386617,
      "grad_norm": 0.010294230654835701,
      "learning_rate": 3.5728076013257518e-06,
      "loss": 0.0092,
      "step": 1970820
    },
    {
      "epoch": 3.2253228857773153,
      "grad_norm": 0.713674008846283,
      "learning_rate": 3.5727417091122345e-06,
      "loss": 0.0094,
      "step": 1970840
    },
    {
      "epoch": 3.2253556162159684,
      "grad_norm": 0.30924269556999207,
      "learning_rate": 3.5726758168987173e-06,
      "loss": 0.0081,
      "step": 1970860
    },
    {
      "epoch": 3.225388346654622,
      "grad_norm": 0.339618444442749,
      "learning_rate": 3.5726099246852004e-06,
      "loss": 0.0128,
      "step": 1970880
    },
    {
      "epoch": 3.225421077093275,
      "grad_norm": 0.31541311740875244,
      "learning_rate": 3.572544032471683e-06,
      "loss": 0.0163,
      "step": 1970900
    },
    {
      "epoch": 3.2254538075319283,
      "grad_norm": 0.3281742036342621,
      "learning_rate": 3.572478140258166e-06,
      "loss": 0.0084,
      "step": 1970920
    },
    {
      "epoch": 3.225486537970582,
      "grad_norm": 0.3529229164123535,
      "learning_rate": 3.5724122480446486e-06,
      "loss": 0.0182,
      "step": 1970940
    },
    {
      "epoch": 3.225519268409235,
      "grad_norm": 0.1779324859380722,
      "learning_rate": 3.572346355831132e-06,
      "loss": 0.015,
      "step": 1970960
    },
    {
      "epoch": 3.2255519988478887,
      "grad_norm": 0.2258318066596985,
      "learning_rate": 3.5722804636176145e-06,
      "loss": 0.0087,
      "step": 1970980
    },
    {
      "epoch": 3.225584729286542,
      "grad_norm": 0.29164692759513855,
      "learning_rate": 3.5722145714040973e-06,
      "loss": 0.0086,
      "step": 1971000
    },
    {
      "epoch": 3.2256174597251954,
      "grad_norm": 0.4462337791919708,
      "learning_rate": 3.57214867919058e-06,
      "loss": 0.0114,
      "step": 1971020
    },
    {
      "epoch": 3.2256501901638486,
      "grad_norm": 0.5017666816711426,
      "learning_rate": 3.572082786977063e-06,
      "loss": 0.0164,
      "step": 1971040
    },
    {
      "epoch": 3.2256829206025017,
      "grad_norm": 0.5911659598350525,
      "learning_rate": 3.572016894763546e-06,
      "loss": 0.0206,
      "step": 1971060
    },
    {
      "epoch": 3.2257156510411553,
      "grad_norm": 0.19860373437404633,
      "learning_rate": 3.5719510025500287e-06,
      "loss": 0.011,
      "step": 1971080
    },
    {
      "epoch": 3.2257483814798085,
      "grad_norm": 0.4380854666233063,
      "learning_rate": 3.5718851103365123e-06,
      "loss": 0.0122,
      "step": 1971100
    },
    {
      "epoch": 3.225781111918462,
      "grad_norm": 0.6813716292381287,
      "learning_rate": 3.571819218122995e-06,
      "loss": 0.015,
      "step": 1971120
    },
    {
      "epoch": 3.225813842357115,
      "grad_norm": 0.6417765021324158,
      "learning_rate": 3.5717533259094777e-06,
      "loss": 0.0118,
      "step": 1971140
    },
    {
      "epoch": 3.225846572795769,
      "grad_norm": 0.35442280769348145,
      "learning_rate": 3.5716874336959605e-06,
      "loss": 0.0063,
      "step": 1971160
    },
    {
      "epoch": 3.225879303234422,
      "grad_norm": 0.10200424492359161,
      "learning_rate": 3.5716215414824436e-06,
      "loss": 0.012,
      "step": 1971180
    },
    {
      "epoch": 3.225912033673075,
      "grad_norm": 0.22604122757911682,
      "learning_rate": 3.5715556492689264e-06,
      "loss": 0.0101,
      "step": 1971200
    },
    {
      "epoch": 3.2259447641117287,
      "grad_norm": 0.17691795527935028,
      "learning_rate": 3.571489757055409e-06,
      "loss": 0.0075,
      "step": 1971220
    },
    {
      "epoch": 3.225977494550382,
      "grad_norm": 0.29699602723121643,
      "learning_rate": 3.571423864841892e-06,
      "loss": 0.01,
      "step": 1971240
    },
    {
      "epoch": 3.2260102249890354,
      "grad_norm": 0.09845782816410065,
      "learning_rate": 3.5713579726283746e-06,
      "loss": 0.0158,
      "step": 1971260
    },
    {
      "epoch": 3.2260429554276886,
      "grad_norm": 0.28669220209121704,
      "learning_rate": 3.5712920804148578e-06,
      "loss": 0.0109,
      "step": 1971280
    },
    {
      "epoch": 3.226075685866342,
      "grad_norm": 0.07472636550664902,
      "learning_rate": 3.5712261882013405e-06,
      "loss": 0.0115,
      "step": 1971300
    },
    {
      "epoch": 3.2261084163049953,
      "grad_norm": 0.34065374732017517,
      "learning_rate": 3.5711602959878232e-06,
      "loss": 0.009,
      "step": 1971320
    },
    {
      "epoch": 3.2261411467436485,
      "grad_norm": 0.20338304340839386,
      "learning_rate": 3.571094403774306e-06,
      "loss": 0.0125,
      "step": 1971340
    },
    {
      "epoch": 3.226173877182302,
      "grad_norm": 0.14334429800510406,
      "learning_rate": 3.571028511560789e-06,
      "loss": 0.0083,
      "step": 1971360
    },
    {
      "epoch": 3.2262066076209552,
      "grad_norm": 0.18024511635303497,
      "learning_rate": 3.570962619347272e-06,
      "loss": 0.0098,
      "step": 1971380
    },
    {
      "epoch": 3.226239338059609,
      "grad_norm": 0.4753778576850891,
      "learning_rate": 3.5708967271337546e-06,
      "loss": 0.013,
      "step": 1971400
    },
    {
      "epoch": 3.226272068498262,
      "grad_norm": 0.4160796105861664,
      "learning_rate": 3.5708308349202374e-06,
      "loss": 0.0084,
      "step": 1971420
    },
    {
      "epoch": 3.2263047989369156,
      "grad_norm": 0.13774612545967102,
      "learning_rate": 3.570764942706721e-06,
      "loss": 0.0109,
      "step": 1971440
    },
    {
      "epoch": 3.2263375293755687,
      "grad_norm": 0.21383973956108093,
      "learning_rate": 3.5706990504932037e-06,
      "loss": 0.0164,
      "step": 1971460
    },
    {
      "epoch": 3.226370259814222,
      "grad_norm": 0.3773118853569031,
      "learning_rate": 3.5706331582796864e-06,
      "loss": 0.014,
      "step": 1971480
    },
    {
      "epoch": 3.2264029902528755,
      "grad_norm": 0.1089760810136795,
      "learning_rate": 3.5705672660661696e-06,
      "loss": 0.0108,
      "step": 1971500
    },
    {
      "epoch": 3.2264357206915286,
      "grad_norm": 0.7597376108169556,
      "learning_rate": 3.5705013738526523e-06,
      "loss": 0.0105,
      "step": 1971520
    },
    {
      "epoch": 3.226468451130182,
      "grad_norm": 0.27243876457214355,
      "learning_rate": 3.570435481639135e-06,
      "loss": 0.0104,
      "step": 1971540
    },
    {
      "epoch": 3.2265011815688354,
      "grad_norm": 0.46974360942840576,
      "learning_rate": 3.570369589425618e-06,
      "loss": 0.008,
      "step": 1971560
    },
    {
      "epoch": 3.226533912007489,
      "grad_norm": 0.397221177816391,
      "learning_rate": 3.570303697212101e-06,
      "loss": 0.017,
      "step": 1971580
    },
    {
      "epoch": 3.226566642446142,
      "grad_norm": 0.3064481317996979,
      "learning_rate": 3.5702378049985837e-06,
      "loss": 0.0138,
      "step": 1971600
    },
    {
      "epoch": 3.2265993728847953,
      "grad_norm": 0.19130143523216248,
      "learning_rate": 3.5701719127850665e-06,
      "loss": 0.0103,
      "step": 1971620
    },
    {
      "epoch": 3.226632103323449,
      "grad_norm": 0.15910568833351135,
      "learning_rate": 3.570106020571549e-06,
      "loss": 0.012,
      "step": 1971640
    },
    {
      "epoch": 3.226664833762102,
      "grad_norm": 0.34028390049934387,
      "learning_rate": 3.5700401283580324e-06,
      "loss": 0.014,
      "step": 1971660
    },
    {
      "epoch": 3.2266975642007556,
      "grad_norm": 0.10816122591495514,
      "learning_rate": 3.569974236144515e-06,
      "loss": 0.0118,
      "step": 1971680
    },
    {
      "epoch": 3.2267302946394087,
      "grad_norm": 0.1838657110929489,
      "learning_rate": 3.569908343930998e-06,
      "loss": 0.0102,
      "step": 1971700
    },
    {
      "epoch": 3.226763025078062,
      "grad_norm": 0.3220798671245575,
      "learning_rate": 3.5698424517174806e-06,
      "loss": 0.0198,
      "step": 1971720
    },
    {
      "epoch": 3.2267957555167155,
      "grad_norm": 0.968465268611908,
      "learning_rate": 3.5697765595039633e-06,
      "loss": 0.0129,
      "step": 1971740
    },
    {
      "epoch": 3.2268284859553686,
      "grad_norm": 0.22413258254528046,
      "learning_rate": 3.5697106672904465e-06,
      "loss": 0.0157,
      "step": 1971760
    },
    {
      "epoch": 3.2268612163940222,
      "grad_norm": 0.6216356158256531,
      "learning_rate": 3.5696447750769292e-06,
      "loss": 0.0089,
      "step": 1971780
    },
    {
      "epoch": 3.2268939468326754,
      "grad_norm": 1.1667578220367432,
      "learning_rate": 3.5695788828634124e-06,
      "loss": 0.012,
      "step": 1971800
    },
    {
      "epoch": 3.226926677271329,
      "grad_norm": 0.3047620356082916,
      "learning_rate": 3.5695129906498955e-06,
      "loss": 0.0152,
      "step": 1971820
    },
    {
      "epoch": 3.226959407709982,
      "grad_norm": 0.6829463839530945,
      "learning_rate": 3.5694470984363783e-06,
      "loss": 0.0192,
      "step": 1971840
    },
    {
      "epoch": 3.2269921381486353,
      "grad_norm": 0.28408312797546387,
      "learning_rate": 3.569381206222861e-06,
      "loss": 0.0105,
      "step": 1971860
    },
    {
      "epoch": 3.227024868587289,
      "grad_norm": 0.10631228983402252,
      "learning_rate": 3.5693153140093438e-06,
      "loss": 0.0152,
      "step": 1971880
    },
    {
      "epoch": 3.227057599025942,
      "grad_norm": 0.09461244940757751,
      "learning_rate": 3.569249421795827e-06,
      "loss": 0.0083,
      "step": 1971900
    },
    {
      "epoch": 3.2270903294645956,
      "grad_norm": 0.9396798610687256,
      "learning_rate": 3.5691835295823097e-06,
      "loss": 0.0113,
      "step": 1971920
    },
    {
      "epoch": 3.2271230599032488,
      "grad_norm": 0.24943920969963074,
      "learning_rate": 3.5691176373687924e-06,
      "loss": 0.011,
      "step": 1971940
    },
    {
      "epoch": 3.2271557903419024,
      "grad_norm": 0.2061287760734558,
      "learning_rate": 3.569051745155275e-06,
      "loss": 0.0108,
      "step": 1971960
    },
    {
      "epoch": 3.2271885207805555,
      "grad_norm": 0.2688586711883545,
      "learning_rate": 3.5689858529417583e-06,
      "loss": 0.0114,
      "step": 1971980
    },
    {
      "epoch": 3.2272212512192087,
      "grad_norm": 0.282938688993454,
      "learning_rate": 3.568919960728241e-06,
      "loss": 0.0124,
      "step": 1972000
    },
    {
      "epoch": 3.2272539816578623,
      "grad_norm": 0.05390234291553497,
      "learning_rate": 3.5688540685147238e-06,
      "loss": 0.0118,
      "step": 1972020
    },
    {
      "epoch": 3.2272867120965154,
      "grad_norm": 0.14354555308818817,
      "learning_rate": 3.5687881763012065e-06,
      "loss": 0.0117,
      "step": 1972040
    },
    {
      "epoch": 3.227319442535169,
      "grad_norm": 0.22990036010742188,
      "learning_rate": 3.5687222840876897e-06,
      "loss": 0.0129,
      "step": 1972060
    },
    {
      "epoch": 3.227352172973822,
      "grad_norm": 0.3902694880962372,
      "learning_rate": 3.5686563918741724e-06,
      "loss": 0.0105,
      "step": 1972080
    },
    {
      "epoch": 3.2273849034124753,
      "grad_norm": 0.11591095477342606,
      "learning_rate": 3.568590499660655e-06,
      "loss": 0.0107,
      "step": 1972100
    },
    {
      "epoch": 3.227417633851129,
      "grad_norm": 0.32183435559272766,
      "learning_rate": 3.568524607447138e-06,
      "loss": 0.0134,
      "step": 1972120
    },
    {
      "epoch": 3.227450364289782,
      "grad_norm": 0.20883671939373016,
      "learning_rate": 3.568458715233621e-06,
      "loss": 0.0109,
      "step": 1972140
    },
    {
      "epoch": 3.2274830947284356,
      "grad_norm": 0.20015257596969604,
      "learning_rate": 3.5683928230201042e-06,
      "loss": 0.0093,
      "step": 1972160
    },
    {
      "epoch": 3.227515825167089,
      "grad_norm": 0.12914510071277618,
      "learning_rate": 3.568326930806587e-06,
      "loss": 0.0165,
      "step": 1972180
    },
    {
      "epoch": 3.2275485556057424,
      "grad_norm": 0.44318675994873047,
      "learning_rate": 3.56826103859307e-06,
      "loss": 0.012,
      "step": 1972200
    },
    {
      "epoch": 3.2275812860443955,
      "grad_norm": 0.2802414000034332,
      "learning_rate": 3.568195146379553e-06,
      "loss": 0.0139,
      "step": 1972220
    },
    {
      "epoch": 3.2276140164830487,
      "grad_norm": 0.5051676630973816,
      "learning_rate": 3.5681292541660356e-06,
      "loss": 0.0123,
      "step": 1972240
    },
    {
      "epoch": 3.2276467469217023,
      "grad_norm": 0.439988374710083,
      "learning_rate": 3.5680633619525184e-06,
      "loss": 0.0095,
      "step": 1972260
    },
    {
      "epoch": 3.2276794773603554,
      "grad_norm": 0.18678466975688934,
      "learning_rate": 3.567997469739001e-06,
      "loss": 0.017,
      "step": 1972280
    },
    {
      "epoch": 3.227712207799009,
      "grad_norm": 0.3816342055797577,
      "learning_rate": 3.5679315775254843e-06,
      "loss": 0.0148,
      "step": 1972300
    },
    {
      "epoch": 3.227744938237662,
      "grad_norm": 0.4702089726924896,
      "learning_rate": 3.567865685311967e-06,
      "loss": 0.0159,
      "step": 1972320
    },
    {
      "epoch": 3.2277776686763158,
      "grad_norm": 0.1364356279373169,
      "learning_rate": 3.5677997930984497e-06,
      "loss": 0.0149,
      "step": 1972340
    },
    {
      "epoch": 3.227810399114969,
      "grad_norm": 0.3301278352737427,
      "learning_rate": 3.5677339008849325e-06,
      "loss": 0.0089,
      "step": 1972360
    },
    {
      "epoch": 3.227843129553622,
      "grad_norm": 0.2316863089799881,
      "learning_rate": 3.5676680086714156e-06,
      "loss": 0.0129,
      "step": 1972380
    },
    {
      "epoch": 3.2278758599922757,
      "grad_norm": 0.9092922210693359,
      "learning_rate": 3.5676021164578984e-06,
      "loss": 0.0153,
      "step": 1972400
    },
    {
      "epoch": 3.227908590430929,
      "grad_norm": 0.13803468644618988,
      "learning_rate": 3.567536224244381e-06,
      "loss": 0.0088,
      "step": 1972420
    },
    {
      "epoch": 3.2279413208695824,
      "grad_norm": 0.3746337890625,
      "learning_rate": 3.567470332030864e-06,
      "loss": 0.0133,
      "step": 1972440
    },
    {
      "epoch": 3.2279740513082356,
      "grad_norm": 0.27087023854255676,
      "learning_rate": 3.567404439817347e-06,
      "loss": 0.0159,
      "step": 1972460
    },
    {
      "epoch": 3.228006781746889,
      "grad_norm": 0.26630899310112,
      "learning_rate": 3.5673385476038298e-06,
      "loss": 0.0147,
      "step": 1972480
    },
    {
      "epoch": 3.2280395121855423,
      "grad_norm": 0.3022354543209076,
      "learning_rate": 3.567272655390313e-06,
      "loss": 0.0133,
      "step": 1972500
    },
    {
      "epoch": 3.2280722426241955,
      "grad_norm": 0.3307710886001587,
      "learning_rate": 3.567206763176796e-06,
      "loss": 0.0124,
      "step": 1972520
    },
    {
      "epoch": 3.228104973062849,
      "grad_norm": 0.23117314279079437,
      "learning_rate": 3.567140870963279e-06,
      "loss": 0.0112,
      "step": 1972540
    },
    {
      "epoch": 3.228137703501502,
      "grad_norm": 0.31446924805641174,
      "learning_rate": 3.5670749787497616e-06,
      "loss": 0.012,
      "step": 1972560
    },
    {
      "epoch": 3.228170433940156,
      "grad_norm": 0.17442888021469116,
      "learning_rate": 3.5670090865362443e-06,
      "loss": 0.0069,
      "step": 1972580
    },
    {
      "epoch": 3.228203164378809,
      "grad_norm": 0.04884716495871544,
      "learning_rate": 3.5669431943227275e-06,
      "loss": 0.0085,
      "step": 1972600
    },
    {
      "epoch": 3.2282358948174625,
      "grad_norm": 0.28965574502944946,
      "learning_rate": 3.5668773021092102e-06,
      "loss": 0.0149,
      "step": 1972620
    },
    {
      "epoch": 3.2282686252561157,
      "grad_norm": 0.09550148248672485,
      "learning_rate": 3.566811409895693e-06,
      "loss": 0.0112,
      "step": 1972640
    },
    {
      "epoch": 3.228301355694769,
      "grad_norm": 0.2558062970638275,
      "learning_rate": 3.5667455176821757e-06,
      "loss": 0.0137,
      "step": 1972660
    },
    {
      "epoch": 3.2283340861334224,
      "grad_norm": 0.5799270272254944,
      "learning_rate": 3.5666796254686584e-06,
      "loss": 0.0077,
      "step": 1972680
    },
    {
      "epoch": 3.2283668165720756,
      "grad_norm": 0.04644281044602394,
      "learning_rate": 3.5666137332551416e-06,
      "loss": 0.0143,
      "step": 1972700
    },
    {
      "epoch": 3.228399547010729,
      "grad_norm": 0.21496982872486115,
      "learning_rate": 3.5665478410416243e-06,
      "loss": 0.012,
      "step": 1972720
    },
    {
      "epoch": 3.2284322774493823,
      "grad_norm": 0.2331034392118454,
      "learning_rate": 3.566481948828107e-06,
      "loss": 0.0091,
      "step": 1972740
    },
    {
      "epoch": 3.228465007888036,
      "grad_norm": 0.3512871563434601,
      "learning_rate": 3.56641605661459e-06,
      "loss": 0.0144,
      "step": 1972760
    },
    {
      "epoch": 3.228497738326689,
      "grad_norm": 0.5698800683021545,
      "learning_rate": 3.566350164401073e-06,
      "loss": 0.0191,
      "step": 1972780
    },
    {
      "epoch": 3.228530468765342,
      "grad_norm": 0.29256051778793335,
      "learning_rate": 3.5662842721875557e-06,
      "loss": 0.0142,
      "step": 1972800
    },
    {
      "epoch": 3.228563199203996,
      "grad_norm": 0.34490907192230225,
      "learning_rate": 3.5662183799740385e-06,
      "loss": 0.0093,
      "step": 1972820
    },
    {
      "epoch": 3.228595929642649,
      "grad_norm": 0.2746194899082184,
      "learning_rate": 3.566152487760521e-06,
      "loss": 0.0097,
      "step": 1972840
    },
    {
      "epoch": 3.2286286600813026,
      "grad_norm": 0.0680607482790947,
      "learning_rate": 3.5660865955470048e-06,
      "loss": 0.0086,
      "step": 1972860
    },
    {
      "epoch": 3.2286613905199557,
      "grad_norm": 0.7073173522949219,
      "learning_rate": 3.5660207033334875e-06,
      "loss": 0.011,
      "step": 1972880
    },
    {
      "epoch": 3.2286941209586093,
      "grad_norm": 0.45147913694381714,
      "learning_rate": 3.5659548111199703e-06,
      "loss": 0.0192,
      "step": 1972900
    },
    {
      "epoch": 3.2287268513972625,
      "grad_norm": 0.26154279708862305,
      "learning_rate": 3.5658889189064534e-06,
      "loss": 0.0116,
      "step": 1972920
    },
    {
      "epoch": 3.2287595818359156,
      "grad_norm": 0.23770567774772644,
      "learning_rate": 3.565823026692936e-06,
      "loss": 0.0121,
      "step": 1972940
    },
    {
      "epoch": 3.228792312274569,
      "grad_norm": 0.31300288438796997,
      "learning_rate": 3.565757134479419e-06,
      "loss": 0.0161,
      "step": 1972960
    },
    {
      "epoch": 3.2288250427132223,
      "grad_norm": 0.2446185052394867,
      "learning_rate": 3.5656912422659016e-06,
      "loss": 0.0136,
      "step": 1972980
    },
    {
      "epoch": 3.228857773151876,
      "grad_norm": 0.07678748667240143,
      "learning_rate": 3.565625350052385e-06,
      "loss": 0.0148,
      "step": 1973000
    },
    {
      "epoch": 3.228890503590529,
      "grad_norm": 0.36791691184043884,
      "learning_rate": 3.5655594578388676e-06,
      "loss": 0.016,
      "step": 1973020
    },
    {
      "epoch": 3.2289232340291827,
      "grad_norm": 0.4942496120929718,
      "learning_rate": 3.5654935656253503e-06,
      "loss": 0.0248,
      "step": 1973040
    },
    {
      "epoch": 3.228955964467836,
      "grad_norm": 0.18470394611358643,
      "learning_rate": 3.565427673411833e-06,
      "loss": 0.0142,
      "step": 1973060
    },
    {
      "epoch": 3.228988694906489,
      "grad_norm": 0.1618385910987854,
      "learning_rate": 3.565361781198316e-06,
      "loss": 0.0132,
      "step": 1973080
    },
    {
      "epoch": 3.2290214253451426,
      "grad_norm": 0.6832188963890076,
      "learning_rate": 3.565295888984799e-06,
      "loss": 0.0128,
      "step": 1973100
    },
    {
      "epoch": 3.2290541557837957,
      "grad_norm": 0.3015437722206116,
      "learning_rate": 3.5652299967712817e-06,
      "loss": 0.0122,
      "step": 1973120
    },
    {
      "epoch": 3.2290868862224493,
      "grad_norm": 0.07982515543699265,
      "learning_rate": 3.5651641045577644e-06,
      "loss": 0.0064,
      "step": 1973140
    },
    {
      "epoch": 3.2291196166611025,
      "grad_norm": 0.3612040579319,
      "learning_rate": 3.565098212344247e-06,
      "loss": 0.0155,
      "step": 1973160
    },
    {
      "epoch": 3.2291523470997556,
      "grad_norm": 0.48900341987609863,
      "learning_rate": 3.5650323201307303e-06,
      "loss": 0.0175,
      "step": 1973180
    },
    {
      "epoch": 3.229185077538409,
      "grad_norm": 0.45439478754997253,
      "learning_rate": 3.5649664279172135e-06,
      "loss": 0.0169,
      "step": 1973200
    },
    {
      "epoch": 3.2292178079770624,
      "grad_norm": 0.5010254383087158,
      "learning_rate": 3.5649005357036962e-06,
      "loss": 0.0182,
      "step": 1973220
    },
    {
      "epoch": 3.229250538415716,
      "grad_norm": 0.18070708215236664,
      "learning_rate": 3.5648346434901794e-06,
      "loss": 0.0117,
      "step": 1973240
    },
    {
      "epoch": 3.229283268854369,
      "grad_norm": 0.10405311733484268,
      "learning_rate": 3.564768751276662e-06,
      "loss": 0.0093,
      "step": 1973260
    },
    {
      "epoch": 3.2293159992930227,
      "grad_norm": 0.12530100345611572,
      "learning_rate": 3.564702859063145e-06,
      "loss": 0.0117,
      "step": 1973280
    },
    {
      "epoch": 3.229348729731676,
      "grad_norm": 0.3840843141078949,
      "learning_rate": 3.5646369668496276e-06,
      "loss": 0.0117,
      "step": 1973300
    },
    {
      "epoch": 3.229381460170329,
      "grad_norm": 0.3612752854824066,
      "learning_rate": 3.5645710746361108e-06,
      "loss": 0.0139,
      "step": 1973320
    },
    {
      "epoch": 3.2294141906089826,
      "grad_norm": 0.3380565047264099,
      "learning_rate": 3.5645051824225935e-06,
      "loss": 0.0105,
      "step": 1973340
    },
    {
      "epoch": 3.2294469210476358,
      "grad_norm": 0.6563284397125244,
      "learning_rate": 3.5644392902090762e-06,
      "loss": 0.0128,
      "step": 1973360
    },
    {
      "epoch": 3.2294796514862893,
      "grad_norm": 0.24146051704883575,
      "learning_rate": 3.564373397995559e-06,
      "loss": 0.0072,
      "step": 1973380
    },
    {
      "epoch": 3.2295123819249425,
      "grad_norm": 0.31029748916625977,
      "learning_rate": 3.564307505782042e-06,
      "loss": 0.01,
      "step": 1973400
    },
    {
      "epoch": 3.229545112363596,
      "grad_norm": 0.08805999159812927,
      "learning_rate": 3.564241613568525e-06,
      "loss": 0.0126,
      "step": 1973420
    },
    {
      "epoch": 3.2295778428022492,
      "grad_norm": 0.7250707149505615,
      "learning_rate": 3.5641757213550076e-06,
      "loss": 0.012,
      "step": 1973440
    },
    {
      "epoch": 3.2296105732409024,
      "grad_norm": 0.1730131059885025,
      "learning_rate": 3.5641098291414904e-06,
      "loss": 0.0136,
      "step": 1973460
    },
    {
      "epoch": 3.229643303679556,
      "grad_norm": 1.3554389476776123,
      "learning_rate": 3.5640439369279735e-06,
      "loss": 0.0102,
      "step": 1973480
    },
    {
      "epoch": 3.229676034118209,
      "grad_norm": 0.30681681632995605,
      "learning_rate": 3.5639780447144563e-06,
      "loss": 0.0109,
      "step": 1973500
    },
    {
      "epoch": 3.2297087645568627,
      "grad_norm": 0.332278847694397,
      "learning_rate": 3.563912152500939e-06,
      "loss": 0.0122,
      "step": 1973520
    },
    {
      "epoch": 3.229741494995516,
      "grad_norm": 0.7127094268798828,
      "learning_rate": 3.5638462602874217e-06,
      "loss": 0.0142,
      "step": 1973540
    },
    {
      "epoch": 3.2297742254341695,
      "grad_norm": 0.2345515638589859,
      "learning_rate": 3.5637803680739053e-06,
      "loss": 0.0121,
      "step": 1973560
    },
    {
      "epoch": 3.2298069558728226,
      "grad_norm": 0.20925983786582947,
      "learning_rate": 3.563714475860388e-06,
      "loss": 0.0086,
      "step": 1973580
    },
    {
      "epoch": 3.2298396863114758,
      "grad_norm": 0.4705336093902588,
      "learning_rate": 3.563648583646871e-06,
      "loss": 0.0152,
      "step": 1973600
    },
    {
      "epoch": 3.2298724167501294,
      "grad_norm": 0.27620217204093933,
      "learning_rate": 3.563582691433354e-06,
      "loss": 0.0101,
      "step": 1973620
    },
    {
      "epoch": 3.2299051471887825,
      "grad_norm": 0.49005261063575745,
      "learning_rate": 3.5635167992198367e-06,
      "loss": 0.011,
      "step": 1973640
    },
    {
      "epoch": 3.229937877627436,
      "grad_norm": 0.9755166172981262,
      "learning_rate": 3.5634509070063195e-06,
      "loss": 0.009,
      "step": 1973660
    },
    {
      "epoch": 3.2299706080660893,
      "grad_norm": 0.2616027891635895,
      "learning_rate": 3.563385014792802e-06,
      "loss": 0.0107,
      "step": 1973680
    },
    {
      "epoch": 3.2300033385047424,
      "grad_norm": 0.1256953328847885,
      "learning_rate": 3.563319122579285e-06,
      "loss": 0.0102,
      "step": 1973700
    },
    {
      "epoch": 3.230036068943396,
      "grad_norm": 0.17649662494659424,
      "learning_rate": 3.563253230365768e-06,
      "loss": 0.0096,
      "step": 1973720
    },
    {
      "epoch": 3.230068799382049,
      "grad_norm": 0.1977977752685547,
      "learning_rate": 3.563187338152251e-06,
      "loss": 0.0063,
      "step": 1973740
    },
    {
      "epoch": 3.2301015298207028,
      "grad_norm": 0.1292780339717865,
      "learning_rate": 3.5631214459387336e-06,
      "loss": 0.01,
      "step": 1973760
    },
    {
      "epoch": 3.230134260259356,
      "grad_norm": 0.23192784190177917,
      "learning_rate": 3.5630555537252163e-06,
      "loss": 0.0157,
      "step": 1973780
    },
    {
      "epoch": 3.2301669906980095,
      "grad_norm": 0.3351373076438904,
      "learning_rate": 3.5629896615116995e-06,
      "loss": 0.0225,
      "step": 1973800
    },
    {
      "epoch": 3.2301997211366626,
      "grad_norm": 0.6559359431266785,
      "learning_rate": 3.5629237692981822e-06,
      "loss": 0.0107,
      "step": 1973820
    },
    {
      "epoch": 3.230232451575316,
      "grad_norm": 0.16679692268371582,
      "learning_rate": 3.562857877084665e-06,
      "loss": 0.0119,
      "step": 1973840
    },
    {
      "epoch": 3.2302651820139694,
      "grad_norm": 0.03921838477253914,
      "learning_rate": 3.5627919848711477e-06,
      "loss": 0.0096,
      "step": 1973860
    },
    {
      "epoch": 3.2302979124526225,
      "grad_norm": 0.16831918060779572,
      "learning_rate": 3.562726092657631e-06,
      "loss": 0.0136,
      "step": 1973880
    },
    {
      "epoch": 3.230330642891276,
      "grad_norm": 0.1348363161087036,
      "learning_rate": 3.5626602004441136e-06,
      "loss": 0.0098,
      "step": 1973900
    },
    {
      "epoch": 3.2303633733299293,
      "grad_norm": 0.35853174328804016,
      "learning_rate": 3.5625943082305968e-06,
      "loss": 0.0064,
      "step": 1973920
    },
    {
      "epoch": 3.230396103768583,
      "grad_norm": 0.3590811789035797,
      "learning_rate": 3.56252841601708e-06,
      "loss": 0.0132,
      "step": 1973940
    },
    {
      "epoch": 3.230428834207236,
      "grad_norm": 0.7168358564376831,
      "learning_rate": 3.5624625238035627e-06,
      "loss": 0.0117,
      "step": 1973960
    },
    {
      "epoch": 3.230461564645889,
      "grad_norm": 0.26629403233528137,
      "learning_rate": 3.5623966315900454e-06,
      "loss": 0.0118,
      "step": 1973980
    },
    {
      "epoch": 3.2304942950845428,
      "grad_norm": 1.1131224632263184,
      "learning_rate": 3.562330739376528e-06,
      "loss": 0.0162,
      "step": 1974000
    },
    {
      "epoch": 3.230527025523196,
      "grad_norm": 0.3467057943344116,
      "learning_rate": 3.5622648471630113e-06,
      "loss": 0.0139,
      "step": 1974020
    },
    {
      "epoch": 3.2305597559618495,
      "grad_norm": 0.21218252182006836,
      "learning_rate": 3.562198954949494e-06,
      "loss": 0.0167,
      "step": 1974040
    },
    {
      "epoch": 3.2305924864005027,
      "grad_norm": 0.836780309677124,
      "learning_rate": 3.562133062735977e-06,
      "loss": 0.014,
      "step": 1974060
    },
    {
      "epoch": 3.2306252168391563,
      "grad_norm": 0.06942381709814072,
      "learning_rate": 3.5620671705224595e-06,
      "loss": 0.0128,
      "step": 1974080
    },
    {
      "epoch": 3.2306579472778094,
      "grad_norm": 0.14922380447387695,
      "learning_rate": 3.5620012783089427e-06,
      "loss": 0.009,
      "step": 1974100
    },
    {
      "epoch": 3.2306906777164626,
      "grad_norm": 0.3258635699748993,
      "learning_rate": 3.5619353860954254e-06,
      "loss": 0.0143,
      "step": 1974120
    },
    {
      "epoch": 3.230723408155116,
      "grad_norm": 0.2977668046951294,
      "learning_rate": 3.561869493881908e-06,
      "loss": 0.0113,
      "step": 1974140
    },
    {
      "epoch": 3.2307561385937693,
      "grad_norm": 0.2011335790157318,
      "learning_rate": 3.561803601668391e-06,
      "loss": 0.0151,
      "step": 1974160
    },
    {
      "epoch": 3.230788869032423,
      "grad_norm": 0.6795802712440491,
      "learning_rate": 3.5617377094548737e-06,
      "loss": 0.0107,
      "step": 1974180
    },
    {
      "epoch": 3.230821599471076,
      "grad_norm": 0.406857430934906,
      "learning_rate": 3.561671817241357e-06,
      "loss": 0.0115,
      "step": 1974200
    },
    {
      "epoch": 3.2308543299097296,
      "grad_norm": 0.11691401153802872,
      "learning_rate": 3.5616059250278396e-06,
      "loss": 0.0114,
      "step": 1974220
    },
    {
      "epoch": 3.230887060348383,
      "grad_norm": 0.23487398028373718,
      "learning_rate": 3.5615400328143223e-06,
      "loss": 0.0063,
      "step": 1974240
    },
    {
      "epoch": 3.230919790787036,
      "grad_norm": 0.16435284912586212,
      "learning_rate": 3.561474140600806e-06,
      "loss": 0.0098,
      "step": 1974260
    },
    {
      "epoch": 3.2309525212256895,
      "grad_norm": 0.028976930305361748,
      "learning_rate": 3.5614082483872886e-06,
      "loss": 0.0109,
      "step": 1974280
    },
    {
      "epoch": 3.2309852516643427,
      "grad_norm": 0.10147274285554886,
      "learning_rate": 3.5613423561737714e-06,
      "loss": 0.0109,
      "step": 1974300
    },
    {
      "epoch": 3.2310179821029963,
      "grad_norm": 0.07392647862434387,
      "learning_rate": 3.561276463960254e-06,
      "loss": 0.0085,
      "step": 1974320
    },
    {
      "epoch": 3.2310507125416494,
      "grad_norm": 0.23466601967811584,
      "learning_rate": 3.5612105717467373e-06,
      "loss": 0.0118,
      "step": 1974340
    },
    {
      "epoch": 3.231083442980303,
      "grad_norm": 0.2883162200450897,
      "learning_rate": 3.56114467953322e-06,
      "loss": 0.0072,
      "step": 1974360
    },
    {
      "epoch": 3.231116173418956,
      "grad_norm": 0.1855415403842926,
      "learning_rate": 3.5610787873197027e-06,
      "loss": 0.0148,
      "step": 1974380
    },
    {
      "epoch": 3.2311489038576093,
      "grad_norm": 0.3980739116668701,
      "learning_rate": 3.5610128951061855e-06,
      "loss": 0.0131,
      "step": 1974400
    },
    {
      "epoch": 3.231181634296263,
      "grad_norm": 0.14514471590518951,
      "learning_rate": 3.5609470028926687e-06,
      "loss": 0.0141,
      "step": 1974420
    },
    {
      "epoch": 3.231214364734916,
      "grad_norm": 0.10609392821788788,
      "learning_rate": 3.5608811106791514e-06,
      "loss": 0.0143,
      "step": 1974440
    },
    {
      "epoch": 3.2312470951735697,
      "grad_norm": 0.1390804946422577,
      "learning_rate": 3.560815218465634e-06,
      "loss": 0.0095,
      "step": 1974460
    },
    {
      "epoch": 3.231279825612223,
      "grad_norm": 0.24311500787734985,
      "learning_rate": 3.560749326252117e-06,
      "loss": 0.014,
      "step": 1974480
    },
    {
      "epoch": 3.2313125560508764,
      "grad_norm": 0.3749503791332245,
      "learning_rate": 3.5606834340386e-06,
      "loss": 0.0121,
      "step": 1974500
    },
    {
      "epoch": 3.2313452864895296,
      "grad_norm": 0.21159496903419495,
      "learning_rate": 3.5606175418250828e-06,
      "loss": 0.0114,
      "step": 1974520
    },
    {
      "epoch": 3.2313780169281827,
      "grad_norm": 0.23216859996318817,
      "learning_rate": 3.5605516496115655e-06,
      "loss": 0.0129,
      "step": 1974540
    },
    {
      "epoch": 3.2314107473668363,
      "grad_norm": 0.211758553981781,
      "learning_rate": 3.5604857573980483e-06,
      "loss": 0.0109,
      "step": 1974560
    },
    {
      "epoch": 3.2314434778054895,
      "grad_norm": 0.6206437349319458,
      "learning_rate": 3.560419865184531e-06,
      "loss": 0.0149,
      "step": 1974580
    },
    {
      "epoch": 3.231476208244143,
      "grad_norm": 0.6097154021263123,
      "learning_rate": 3.560353972971014e-06,
      "loss": 0.0115,
      "step": 1974600
    },
    {
      "epoch": 3.231508938682796,
      "grad_norm": 0.07953833788633347,
      "learning_rate": 3.5602880807574973e-06,
      "loss": 0.0097,
      "step": 1974620
    },
    {
      "epoch": 3.23154166912145,
      "grad_norm": 0.20441831648349762,
      "learning_rate": 3.5602221885439805e-06,
      "loss": 0.007,
      "step": 1974640
    },
    {
      "epoch": 3.231574399560103,
      "grad_norm": 0.08132722228765488,
      "learning_rate": 3.5601562963304632e-06,
      "loss": 0.0098,
      "step": 1974660
    },
    {
      "epoch": 3.231607129998756,
      "grad_norm": 0.37700435519218445,
      "learning_rate": 3.560090404116946e-06,
      "loss": 0.0121,
      "step": 1974680
    },
    {
      "epoch": 3.2316398604374097,
      "grad_norm": 0.3468184769153595,
      "learning_rate": 3.5600245119034287e-06,
      "loss": 0.0126,
      "step": 1974700
    },
    {
      "epoch": 3.231672590876063,
      "grad_norm": 0.4816123843193054,
      "learning_rate": 3.5599586196899114e-06,
      "loss": 0.0096,
      "step": 1974720
    },
    {
      "epoch": 3.2317053213147164,
      "grad_norm": 0.34614309668540955,
      "learning_rate": 3.5598927274763946e-06,
      "loss": 0.0099,
      "step": 1974740
    },
    {
      "epoch": 3.2317380517533696,
      "grad_norm": 0.6853583455085754,
      "learning_rate": 3.5598268352628773e-06,
      "loss": 0.0122,
      "step": 1974760
    },
    {
      "epoch": 3.2317707821920227,
      "grad_norm": 0.044092245399951935,
      "learning_rate": 3.55976094304936e-06,
      "loss": 0.0103,
      "step": 1974780
    },
    {
      "epoch": 3.2318035126306763,
      "grad_norm": 0.7564921379089355,
      "learning_rate": 3.559695050835843e-06,
      "loss": 0.0134,
      "step": 1974800
    },
    {
      "epoch": 3.2318362430693295,
      "grad_norm": 0.11069928854703903,
      "learning_rate": 3.559629158622326e-06,
      "loss": 0.0149,
      "step": 1974820
    },
    {
      "epoch": 3.231868973507983,
      "grad_norm": 0.08615084737539291,
      "learning_rate": 3.5595632664088087e-06,
      "loss": 0.0158,
      "step": 1974840
    },
    {
      "epoch": 3.2319017039466362,
      "grad_norm": 0.43190109729766846,
      "learning_rate": 3.5594973741952915e-06,
      "loss": 0.011,
      "step": 1974860
    },
    {
      "epoch": 3.23193443438529,
      "grad_norm": 0.13346193730831146,
      "learning_rate": 3.559431481981774e-06,
      "loss": 0.0066,
      "step": 1974880
    },
    {
      "epoch": 3.231967164823943,
      "grad_norm": 1.3212335109710693,
      "learning_rate": 3.5593655897682574e-06,
      "loss": 0.0146,
      "step": 1974900
    },
    {
      "epoch": 3.231999895262596,
      "grad_norm": 0.15222208201885223,
      "learning_rate": 3.55929969755474e-06,
      "loss": 0.0119,
      "step": 1974920
    },
    {
      "epoch": 3.2320326257012497,
      "grad_norm": 0.1788758486509323,
      "learning_rate": 3.559233805341223e-06,
      "loss": 0.0101,
      "step": 1974940
    },
    {
      "epoch": 3.232065356139903,
      "grad_norm": 0.09928707033395767,
      "learning_rate": 3.5591679131277064e-06,
      "loss": 0.0156,
      "step": 1974960
    },
    {
      "epoch": 3.2320980865785565,
      "grad_norm": 0.2172802984714508,
      "learning_rate": 3.559102020914189e-06,
      "loss": 0.014,
      "step": 1974980
    },
    {
      "epoch": 3.2321308170172096,
      "grad_norm": 0.17323553562164307,
      "learning_rate": 3.559036128700672e-06,
      "loss": 0.0113,
      "step": 1975000
    },
    {
      "epoch": 3.232163547455863,
      "grad_norm": 0.1952749788761139,
      "learning_rate": 3.5589702364871547e-06,
      "loss": 0.0126,
      "step": 1975020
    },
    {
      "epoch": 3.2321962778945164,
      "grad_norm": 0.1295609474182129,
      "learning_rate": 3.558904344273638e-06,
      "loss": 0.0119,
      "step": 1975040
    },
    {
      "epoch": 3.2322290083331695,
      "grad_norm": 0.16265173256397247,
      "learning_rate": 3.5588384520601206e-06,
      "loss": 0.0103,
      "step": 1975060
    },
    {
      "epoch": 3.232261738771823,
      "grad_norm": 0.32996177673339844,
      "learning_rate": 3.5587725598466033e-06,
      "loss": 0.0163,
      "step": 1975080
    },
    {
      "epoch": 3.2322944692104763,
      "grad_norm": 0.26840487122535706,
      "learning_rate": 3.558706667633086e-06,
      "loss": 0.0108,
      "step": 1975100
    },
    {
      "epoch": 3.23232719964913,
      "grad_norm": 0.14865458011627197,
      "learning_rate": 3.5586407754195688e-06,
      "loss": 0.0097,
      "step": 1975120
    },
    {
      "epoch": 3.232359930087783,
      "grad_norm": 0.4964219927787781,
      "learning_rate": 3.558574883206052e-06,
      "loss": 0.0135,
      "step": 1975140
    },
    {
      "epoch": 3.232392660526436,
      "grad_norm": 0.5586285591125488,
      "learning_rate": 3.5585089909925347e-06,
      "loss": 0.0092,
      "step": 1975160
    },
    {
      "epoch": 3.2324253909650897,
      "grad_norm": 0.19780948758125305,
      "learning_rate": 3.5584430987790174e-06,
      "loss": 0.0144,
      "step": 1975180
    },
    {
      "epoch": 3.232458121403743,
      "grad_norm": 0.5676283836364746,
      "learning_rate": 3.5583772065655e-06,
      "loss": 0.0079,
      "step": 1975200
    },
    {
      "epoch": 3.2324908518423965,
      "grad_norm": 0.09371650964021683,
      "learning_rate": 3.5583113143519833e-06,
      "loss": 0.0082,
      "step": 1975220
    },
    {
      "epoch": 3.2325235822810496,
      "grad_norm": 0.47727280855178833,
      "learning_rate": 3.558245422138466e-06,
      "loss": 0.0154,
      "step": 1975240
    },
    {
      "epoch": 3.2325563127197032,
      "grad_norm": 0.17288970947265625,
      "learning_rate": 3.558179529924949e-06,
      "loss": 0.0085,
      "step": 1975260
    },
    {
      "epoch": 3.2325890431583564,
      "grad_norm": 0.2258695662021637,
      "learning_rate": 3.5581136377114315e-06,
      "loss": 0.0061,
      "step": 1975280
    },
    {
      "epoch": 3.2326217735970095,
      "grad_norm": 0.2661479413509369,
      "learning_rate": 3.5580477454979147e-06,
      "loss": 0.0101,
      "step": 1975300
    },
    {
      "epoch": 3.232654504035663,
      "grad_norm": 0.26952776312828064,
      "learning_rate": 3.557981853284398e-06,
      "loss": 0.0129,
      "step": 1975320
    },
    {
      "epoch": 3.2326872344743163,
      "grad_norm": 0.43260809779167175,
      "learning_rate": 3.5579159610708806e-06,
      "loss": 0.0095,
      "step": 1975340
    },
    {
      "epoch": 3.23271996491297,
      "grad_norm": 0.04839513823390007,
      "learning_rate": 3.5578500688573638e-06,
      "loss": 0.0071,
      "step": 1975360
    },
    {
      "epoch": 3.232752695351623,
      "grad_norm": 0.26283133029937744,
      "learning_rate": 3.5577841766438465e-06,
      "loss": 0.0088,
      "step": 1975380
    },
    {
      "epoch": 3.2327854257902766,
      "grad_norm": 0.2599344551563263,
      "learning_rate": 3.5577182844303293e-06,
      "loss": 0.0121,
      "step": 1975400
    },
    {
      "epoch": 3.2328181562289298,
      "grad_norm": 0.4296058118343353,
      "learning_rate": 3.557652392216812e-06,
      "loss": 0.0121,
      "step": 1975420
    },
    {
      "epoch": 3.232850886667583,
      "grad_norm": 0.3385457694530487,
      "learning_rate": 3.557586500003295e-06,
      "loss": 0.0107,
      "step": 1975440
    },
    {
      "epoch": 3.2328836171062365,
      "grad_norm": 0.08747416734695435,
      "learning_rate": 3.557520607789778e-06,
      "loss": 0.0125,
      "step": 1975460
    },
    {
      "epoch": 3.2329163475448897,
      "grad_norm": 0.324954628944397,
      "learning_rate": 3.5574547155762606e-06,
      "loss": 0.0161,
      "step": 1975480
    },
    {
      "epoch": 3.2329490779835433,
      "grad_norm": 0.8202853798866272,
      "learning_rate": 3.5573888233627434e-06,
      "loss": 0.0145,
      "step": 1975500
    },
    {
      "epoch": 3.2329818084221964,
      "grad_norm": 0.3733416795730591,
      "learning_rate": 3.5573229311492265e-06,
      "loss": 0.0134,
      "step": 1975520
    },
    {
      "epoch": 3.23301453886085,
      "grad_norm": 0.317818820476532,
      "learning_rate": 3.5572570389357093e-06,
      "loss": 0.0069,
      "step": 1975540
    },
    {
      "epoch": 3.233047269299503,
      "grad_norm": 0.06048570200800896,
      "learning_rate": 3.557191146722192e-06,
      "loss": 0.0137,
      "step": 1975560
    },
    {
      "epoch": 3.2330799997381563,
      "grad_norm": 0.15381276607513428,
      "learning_rate": 3.5571252545086748e-06,
      "loss": 0.0103,
      "step": 1975580
    },
    {
      "epoch": 3.23311273017681,
      "grad_norm": 0.13669300079345703,
      "learning_rate": 3.5570593622951575e-06,
      "loss": 0.0115,
      "step": 1975600
    },
    {
      "epoch": 3.233145460615463,
      "grad_norm": 0.17563581466674805,
      "learning_rate": 3.5569934700816407e-06,
      "loss": 0.018,
      "step": 1975620
    },
    {
      "epoch": 3.2331781910541166,
      "grad_norm": 0.5978905558586121,
      "learning_rate": 3.5569275778681234e-06,
      "loss": 0.0134,
      "step": 1975640
    },
    {
      "epoch": 3.23321092149277,
      "grad_norm": 0.30288609862327576,
      "learning_rate": 3.556861685654606e-06,
      "loss": 0.0093,
      "step": 1975660
    },
    {
      "epoch": 3.2332436519314234,
      "grad_norm": 0.2301648110151291,
      "learning_rate": 3.5567957934410897e-06,
      "loss": 0.0104,
      "step": 1975680
    },
    {
      "epoch": 3.2332763823700765,
      "grad_norm": 0.39966079592704773,
      "learning_rate": 3.5567299012275725e-06,
      "loss": 0.0099,
      "step": 1975700
    },
    {
      "epoch": 3.2333091128087297,
      "grad_norm": 0.37837862968444824,
      "learning_rate": 3.556664009014055e-06,
      "loss": 0.0109,
      "step": 1975720
    },
    {
      "epoch": 3.2333418432473833,
      "grad_norm": 0.2133658528327942,
      "learning_rate": 3.556598116800538e-06,
      "loss": 0.012,
      "step": 1975740
    },
    {
      "epoch": 3.2333745736860364,
      "grad_norm": 0.45016613602638245,
      "learning_rate": 3.556532224587021e-06,
      "loss": 0.0104,
      "step": 1975760
    },
    {
      "epoch": 3.23340730412469,
      "grad_norm": 0.11212826520204544,
      "learning_rate": 3.556466332373504e-06,
      "loss": 0.0123,
      "step": 1975780
    },
    {
      "epoch": 3.233440034563343,
      "grad_norm": 0.3156927227973938,
      "learning_rate": 3.5564004401599866e-06,
      "loss": 0.0118,
      "step": 1975800
    },
    {
      "epoch": 3.2334727650019968,
      "grad_norm": 0.7342958450317383,
      "learning_rate": 3.5563345479464693e-06,
      "loss": 0.0158,
      "step": 1975820
    },
    {
      "epoch": 3.23350549544065,
      "grad_norm": 0.3300524652004242,
      "learning_rate": 3.5562686557329525e-06,
      "loss": 0.0099,
      "step": 1975840
    },
    {
      "epoch": 3.233538225879303,
      "grad_norm": 0.48017817735671997,
      "learning_rate": 3.5562027635194352e-06,
      "loss": 0.0095,
      "step": 1975860
    },
    {
      "epoch": 3.2335709563179567,
      "grad_norm": 0.29288190603256226,
      "learning_rate": 3.556136871305918e-06,
      "loss": 0.0121,
      "step": 1975880
    },
    {
      "epoch": 3.23360368675661,
      "grad_norm": 0.2225131392478943,
      "learning_rate": 3.5560709790924007e-06,
      "loss": 0.0093,
      "step": 1975900
    },
    {
      "epoch": 3.2336364171952634,
      "grad_norm": 0.2576461732387543,
      "learning_rate": 3.556005086878884e-06,
      "loss": 0.01,
      "step": 1975920
    },
    {
      "epoch": 3.2336691476339166,
      "grad_norm": 0.4865962862968445,
      "learning_rate": 3.5559391946653666e-06,
      "loss": 0.0116,
      "step": 1975940
    },
    {
      "epoch": 3.23370187807257,
      "grad_norm": 0.2979697585105896,
      "learning_rate": 3.5558733024518494e-06,
      "loss": 0.0091,
      "step": 1975960
    },
    {
      "epoch": 3.2337346085112233,
      "grad_norm": 0.21154837310314178,
      "learning_rate": 3.555807410238332e-06,
      "loss": 0.012,
      "step": 1975980
    },
    {
      "epoch": 3.2337673389498764,
      "grad_norm": 0.21242348849773407,
      "learning_rate": 3.555741518024815e-06,
      "loss": 0.0107,
      "step": 1976000
    },
    {
      "epoch": 3.23380006938853,
      "grad_norm": 0.3030945062637329,
      "learning_rate": 3.5556756258112984e-06,
      "loss": 0.0082,
      "step": 1976020
    },
    {
      "epoch": 3.233832799827183,
      "grad_norm": 0.36156976222991943,
      "learning_rate": 3.555609733597781e-06,
      "loss": 0.0102,
      "step": 1976040
    },
    {
      "epoch": 3.233865530265837,
      "grad_norm": 0.2636485695838928,
      "learning_rate": 3.5555438413842643e-06,
      "loss": 0.0098,
      "step": 1976060
    },
    {
      "epoch": 3.23389826070449,
      "grad_norm": 0.5337539911270142,
      "learning_rate": 3.555477949170747e-06,
      "loss": 0.0125,
      "step": 1976080
    },
    {
      "epoch": 3.2339309911431435,
      "grad_norm": 0.32765042781829834,
      "learning_rate": 3.55541205695723e-06,
      "loss": 0.0078,
      "step": 1976100
    },
    {
      "epoch": 3.2339637215817967,
      "grad_norm": 0.2782146632671356,
      "learning_rate": 3.5553461647437125e-06,
      "loss": 0.0103,
      "step": 1976120
    },
    {
      "epoch": 3.23399645202045,
      "grad_norm": 0.25152868032455444,
      "learning_rate": 3.5552802725301953e-06,
      "loss": 0.0091,
      "step": 1976140
    },
    {
      "epoch": 3.2340291824591034,
      "grad_norm": 0.05619435757398605,
      "learning_rate": 3.5552143803166784e-06,
      "loss": 0.0092,
      "step": 1976160
    },
    {
      "epoch": 3.2340619128977566,
      "grad_norm": 0.04423229396343231,
      "learning_rate": 3.555148488103161e-06,
      "loss": 0.0114,
      "step": 1976180
    },
    {
      "epoch": 3.23409464333641,
      "grad_norm": 0.12890218198299408,
      "learning_rate": 3.555082595889644e-06,
      "loss": 0.0093,
      "step": 1976200
    },
    {
      "epoch": 3.2341273737750633,
      "grad_norm": 0.14054788649082184,
      "learning_rate": 3.5550167036761267e-06,
      "loss": 0.0134,
      "step": 1976220
    },
    {
      "epoch": 3.2341601042137165,
      "grad_norm": 0.46264705061912537,
      "learning_rate": 3.55495081146261e-06,
      "loss": 0.0104,
      "step": 1976240
    },
    {
      "epoch": 3.23419283465237,
      "grad_norm": 0.3235166668891907,
      "learning_rate": 3.5548849192490926e-06,
      "loss": 0.0124,
      "step": 1976260
    },
    {
      "epoch": 3.234225565091023,
      "grad_norm": 0.09256353229284286,
      "learning_rate": 3.5548190270355753e-06,
      "loss": 0.0144,
      "step": 1976280
    },
    {
      "epoch": 3.234258295529677,
      "grad_norm": 0.17535936832427979,
      "learning_rate": 3.554753134822058e-06,
      "loss": 0.0102,
      "step": 1976300
    },
    {
      "epoch": 3.23429102596833,
      "grad_norm": 0.4680229127407074,
      "learning_rate": 3.554687242608541e-06,
      "loss": 0.0092,
      "step": 1976320
    },
    {
      "epoch": 3.2343237564069836,
      "grad_norm": 0.1481424868106842,
      "learning_rate": 3.554621350395024e-06,
      "loss": 0.0102,
      "step": 1976340
    },
    {
      "epoch": 3.2343564868456367,
      "grad_norm": 0.33626896142959595,
      "learning_rate": 3.5545554581815067e-06,
      "loss": 0.015,
      "step": 1976360
    },
    {
      "epoch": 3.23438921728429,
      "grad_norm": 0.23673641681671143,
      "learning_rate": 3.5544895659679903e-06,
      "loss": 0.014,
      "step": 1976380
    },
    {
      "epoch": 3.2344219477229434,
      "grad_norm": 0.17501536011695862,
      "learning_rate": 3.554423673754473e-06,
      "loss": 0.0174,
      "step": 1976400
    },
    {
      "epoch": 3.2344546781615966,
      "grad_norm": 0.2606392800807953,
      "learning_rate": 3.5543577815409558e-06,
      "loss": 0.009,
      "step": 1976420
    },
    {
      "epoch": 3.23448740860025,
      "grad_norm": 0.21388480067253113,
      "learning_rate": 3.5542918893274385e-06,
      "loss": 0.0126,
      "step": 1976440
    },
    {
      "epoch": 3.2345201390389033,
      "grad_norm": 0.18231038749217987,
      "learning_rate": 3.5542259971139217e-06,
      "loss": 0.0105,
      "step": 1976460
    },
    {
      "epoch": 3.234552869477557,
      "grad_norm": 0.23587828874588013,
      "learning_rate": 3.5541601049004044e-06,
      "loss": 0.0073,
      "step": 1976480
    },
    {
      "epoch": 3.23458559991621,
      "grad_norm": 0.38079679012298584,
      "learning_rate": 3.554094212686887e-06,
      "loss": 0.0184,
      "step": 1976500
    },
    {
      "epoch": 3.2346183303548632,
      "grad_norm": 0.742404580116272,
      "learning_rate": 3.55402832047337e-06,
      "loss": 0.0111,
      "step": 1976520
    },
    {
      "epoch": 3.234651060793517,
      "grad_norm": 0.30124059319496155,
      "learning_rate": 3.5539624282598526e-06,
      "loss": 0.0121,
      "step": 1976540
    },
    {
      "epoch": 3.23468379123217,
      "grad_norm": 0.2753749191761017,
      "learning_rate": 3.5538965360463358e-06,
      "loss": 0.0062,
      "step": 1976560
    },
    {
      "epoch": 3.2347165216708236,
      "grad_norm": 0.3638637661933899,
      "learning_rate": 3.5538306438328185e-06,
      "loss": 0.0108,
      "step": 1976580
    },
    {
      "epoch": 3.2347492521094767,
      "grad_norm": 0.5507222414016724,
      "learning_rate": 3.5537647516193013e-06,
      "loss": 0.0112,
      "step": 1976600
    },
    {
      "epoch": 3.23478198254813,
      "grad_norm": 0.31347689032554626,
      "learning_rate": 3.553698859405784e-06,
      "loss": 0.0123,
      "step": 1976620
    },
    {
      "epoch": 3.2348147129867835,
      "grad_norm": 0.6005914807319641,
      "learning_rate": 3.553632967192267e-06,
      "loss": 0.0132,
      "step": 1976640
    },
    {
      "epoch": 3.2348474434254366,
      "grad_norm": 0.5089110136032104,
      "learning_rate": 3.55356707497875e-06,
      "loss": 0.0228,
      "step": 1976660
    },
    {
      "epoch": 3.23488017386409,
      "grad_norm": 0.5449998378753662,
      "learning_rate": 3.5535011827652326e-06,
      "loss": 0.0087,
      "step": 1976680
    },
    {
      "epoch": 3.2349129043027434,
      "grad_norm": 0.3325623571872711,
      "learning_rate": 3.5534352905517154e-06,
      "loss": 0.0123,
      "step": 1976700
    },
    {
      "epoch": 3.234945634741397,
      "grad_norm": 0.5251542329788208,
      "learning_rate": 3.553369398338199e-06,
      "loss": 0.0092,
      "step": 1976720
    },
    {
      "epoch": 3.23497836518005,
      "grad_norm": 0.14566780626773834,
      "learning_rate": 3.5533035061246817e-06,
      "loss": 0.0165,
      "step": 1976740
    },
    {
      "epoch": 3.2350110956187033,
      "grad_norm": 0.6524632573127747,
      "learning_rate": 3.5532376139111644e-06,
      "loss": 0.0142,
      "step": 1976760
    },
    {
      "epoch": 3.235043826057357,
      "grad_norm": 0.04129086062312126,
      "learning_rate": 3.5531717216976476e-06,
      "loss": 0.0149,
      "step": 1976780
    },
    {
      "epoch": 3.23507655649601,
      "grad_norm": 0.12831638753414154,
      "learning_rate": 3.5531058294841304e-06,
      "loss": 0.0072,
      "step": 1976800
    },
    {
      "epoch": 3.2351092869346636,
      "grad_norm": 0.16766202449798584,
      "learning_rate": 3.553039937270613e-06,
      "loss": 0.012,
      "step": 1976820
    },
    {
      "epoch": 3.2351420173733167,
      "grad_norm": 0.17548465728759766,
      "learning_rate": 3.552974045057096e-06,
      "loss": 0.0096,
      "step": 1976840
    },
    {
      "epoch": 3.2351747478119703,
      "grad_norm": 0.23255565762519836,
      "learning_rate": 3.552908152843579e-06,
      "loss": 0.0098,
      "step": 1976860
    },
    {
      "epoch": 3.2352074782506235,
      "grad_norm": 0.46724045276641846,
      "learning_rate": 3.5528422606300617e-06,
      "loss": 0.0117,
      "step": 1976880
    },
    {
      "epoch": 3.2352402086892766,
      "grad_norm": 0.3246192932128906,
      "learning_rate": 3.5527763684165445e-06,
      "loss": 0.0108,
      "step": 1976900
    },
    {
      "epoch": 3.2352729391279302,
      "grad_norm": 0.3304672837257385,
      "learning_rate": 3.5527104762030272e-06,
      "loss": 0.0133,
      "step": 1976920
    },
    {
      "epoch": 3.2353056695665834,
      "grad_norm": 0.25891226530075073,
      "learning_rate": 3.5526445839895104e-06,
      "loss": 0.0105,
      "step": 1976940
    },
    {
      "epoch": 3.235338400005237,
      "grad_norm": 0.252131849527359,
      "learning_rate": 3.552578691775993e-06,
      "loss": 0.0099,
      "step": 1976960
    },
    {
      "epoch": 3.23537113044389,
      "grad_norm": 0.3573034405708313,
      "learning_rate": 3.552512799562476e-06,
      "loss": 0.0094,
      "step": 1976980
    },
    {
      "epoch": 3.2354038608825437,
      "grad_norm": 0.1117214635014534,
      "learning_rate": 3.5524469073489586e-06,
      "loss": 0.0112,
      "step": 1977000
    },
    {
      "epoch": 3.235436591321197,
      "grad_norm": 0.08780498057603836,
      "learning_rate": 3.5523810151354413e-06,
      "loss": 0.0143,
      "step": 1977020
    },
    {
      "epoch": 3.23546932175985,
      "grad_norm": 0.44518107175827026,
      "learning_rate": 3.5523151229219245e-06,
      "loss": 0.0099,
      "step": 1977040
    },
    {
      "epoch": 3.2355020521985036,
      "grad_norm": 0.16781216859817505,
      "learning_rate": 3.5522492307084072e-06,
      "loss": 0.0104,
      "step": 1977060
    },
    {
      "epoch": 3.2355347826371568,
      "grad_norm": 0.12500052154064178,
      "learning_rate": 3.5521833384948904e-06,
      "loss": 0.0113,
      "step": 1977080
    },
    {
      "epoch": 3.2355675130758104,
      "grad_norm": 0.1881541609764099,
      "learning_rate": 3.5521174462813736e-06,
      "loss": 0.0116,
      "step": 1977100
    },
    {
      "epoch": 3.2356002435144635,
      "grad_norm": 0.2362469881772995,
      "learning_rate": 3.5520515540678563e-06,
      "loss": 0.0094,
      "step": 1977120
    },
    {
      "epoch": 3.235632973953117,
      "grad_norm": 0.11848264187574387,
      "learning_rate": 3.551985661854339e-06,
      "loss": 0.0083,
      "step": 1977140
    },
    {
      "epoch": 3.2356657043917703,
      "grad_norm": 0.17685236036777496,
      "learning_rate": 3.5519197696408218e-06,
      "loss": 0.0109,
      "step": 1977160
    },
    {
      "epoch": 3.2356984348304234,
      "grad_norm": 0.1442900002002716,
      "learning_rate": 3.551853877427305e-06,
      "loss": 0.0099,
      "step": 1977180
    },
    {
      "epoch": 3.235731165269077,
      "grad_norm": 0.5556603670120239,
      "learning_rate": 3.5517879852137877e-06,
      "loss": 0.0159,
      "step": 1977200
    },
    {
      "epoch": 3.23576389570773,
      "grad_norm": 0.08913636207580566,
      "learning_rate": 3.5517220930002704e-06,
      "loss": 0.0123,
      "step": 1977220
    },
    {
      "epoch": 3.2357966261463837,
      "grad_norm": 0.15452779829502106,
      "learning_rate": 3.551656200786753e-06,
      "loss": 0.0103,
      "step": 1977240
    },
    {
      "epoch": 3.235829356585037,
      "grad_norm": 0.08314800262451172,
      "learning_rate": 3.5515903085732363e-06,
      "loss": 0.0094,
      "step": 1977260
    },
    {
      "epoch": 3.2358620870236905,
      "grad_norm": 3.1771585941314697,
      "learning_rate": 3.551524416359719e-06,
      "loss": 0.0084,
      "step": 1977280
    },
    {
      "epoch": 3.2358948174623436,
      "grad_norm": 0.2904052734375,
      "learning_rate": 3.551458524146202e-06,
      "loss": 0.0128,
      "step": 1977300
    },
    {
      "epoch": 3.235927547900997,
      "grad_norm": 0.43972569704055786,
      "learning_rate": 3.5513926319326845e-06,
      "loss": 0.0199,
      "step": 1977320
    },
    {
      "epoch": 3.2359602783396504,
      "grad_norm": 0.7108876705169678,
      "learning_rate": 3.5513267397191677e-06,
      "loss": 0.0214,
      "step": 1977340
    },
    {
      "epoch": 3.2359930087783035,
      "grad_norm": 0.0977969840168953,
      "learning_rate": 3.5512608475056505e-06,
      "loss": 0.0172,
      "step": 1977360
    },
    {
      "epoch": 3.236025739216957,
      "grad_norm": 0.20699794590473175,
      "learning_rate": 3.551194955292133e-06,
      "loss": 0.0138,
      "step": 1977380
    },
    {
      "epoch": 3.2360584696556103,
      "grad_norm": 0.41960275173187256,
      "learning_rate": 3.551129063078616e-06,
      "loss": 0.0122,
      "step": 1977400
    },
    {
      "epoch": 3.236091200094264,
      "grad_norm": 0.13270114362239838,
      "learning_rate": 3.551063170865099e-06,
      "loss": 0.0063,
      "step": 1977420
    },
    {
      "epoch": 3.236123930532917,
      "grad_norm": 0.22098304331302643,
      "learning_rate": 3.5509972786515823e-06,
      "loss": 0.0142,
      "step": 1977440
    },
    {
      "epoch": 3.23615666097157,
      "grad_norm": 0.21264120936393738,
      "learning_rate": 3.550931386438065e-06,
      "loss": 0.0122,
      "step": 1977460
    },
    {
      "epoch": 3.2361893914102238,
      "grad_norm": 0.5681004524230957,
      "learning_rate": 3.550865494224548e-06,
      "loss": 0.012,
      "step": 1977480
    },
    {
      "epoch": 3.236222121848877,
      "grad_norm": 0.2288619726896286,
      "learning_rate": 3.550799602011031e-06,
      "loss": 0.0153,
      "step": 1977500
    },
    {
      "epoch": 3.2362548522875305,
      "grad_norm": 0.4619534909725189,
      "learning_rate": 3.5507337097975136e-06,
      "loss": 0.0136,
      "step": 1977520
    },
    {
      "epoch": 3.2362875827261837,
      "grad_norm": 0.11525596678256989,
      "learning_rate": 3.5506678175839964e-06,
      "loss": 0.0085,
      "step": 1977540
    },
    {
      "epoch": 3.2363203131648373,
      "grad_norm": 0.4648379981517792,
      "learning_rate": 3.550601925370479e-06,
      "loss": 0.0095,
      "step": 1977560
    },
    {
      "epoch": 3.2363530436034904,
      "grad_norm": 0.2710067331790924,
      "learning_rate": 3.5505360331569623e-06,
      "loss": 0.0141,
      "step": 1977580
    },
    {
      "epoch": 3.2363857740421436,
      "grad_norm": 0.4386245906352997,
      "learning_rate": 3.550470140943445e-06,
      "loss": 0.0132,
      "step": 1977600
    },
    {
      "epoch": 3.236418504480797,
      "grad_norm": 0.11883329600095749,
      "learning_rate": 3.5504042487299278e-06,
      "loss": 0.0115,
      "step": 1977620
    },
    {
      "epoch": 3.2364512349194503,
      "grad_norm": 0.18090440332889557,
      "learning_rate": 3.5503383565164105e-06,
      "loss": 0.0106,
      "step": 1977640
    },
    {
      "epoch": 3.236483965358104,
      "grad_norm": 1.2411465644836426,
      "learning_rate": 3.5502724643028937e-06,
      "loss": 0.0139,
      "step": 1977660
    },
    {
      "epoch": 3.236516695796757,
      "grad_norm": 0.4077710807323456,
      "learning_rate": 3.5502065720893764e-06,
      "loss": 0.01,
      "step": 1977680
    },
    {
      "epoch": 3.2365494262354106,
      "grad_norm": 0.4647243320941925,
      "learning_rate": 3.550140679875859e-06,
      "loss": 0.0086,
      "step": 1977700
    },
    {
      "epoch": 3.236582156674064,
      "grad_norm": 0.24089276790618896,
      "learning_rate": 3.550074787662342e-06,
      "loss": 0.02,
      "step": 1977720
    },
    {
      "epoch": 3.236614887112717,
      "grad_norm": 0.17104202508926392,
      "learning_rate": 3.550008895448825e-06,
      "loss": 0.0204,
      "step": 1977740
    },
    {
      "epoch": 3.2366476175513705,
      "grad_norm": 0.34750130772590637,
      "learning_rate": 3.549943003235308e-06,
      "loss": 0.0109,
      "step": 1977760
    },
    {
      "epoch": 3.2366803479900237,
      "grad_norm": 0.3640629053115845,
      "learning_rate": 3.549877111021791e-06,
      "loss": 0.009,
      "step": 1977780
    },
    {
      "epoch": 3.2367130784286773,
      "grad_norm": 0.2576322853565216,
      "learning_rate": 3.549811218808274e-06,
      "loss": 0.0117,
      "step": 1977800
    },
    {
      "epoch": 3.2367458088673304,
      "grad_norm": 0.09331808984279633,
      "learning_rate": 3.549745326594757e-06,
      "loss": 0.013,
      "step": 1977820
    },
    {
      "epoch": 3.2367785393059836,
      "grad_norm": 0.2963733375072479,
      "learning_rate": 3.5496794343812396e-06,
      "loss": 0.0147,
      "step": 1977840
    },
    {
      "epoch": 3.236811269744637,
      "grad_norm": 0.5639995336532593,
      "learning_rate": 3.5496135421677223e-06,
      "loss": 0.0129,
      "step": 1977860
    },
    {
      "epoch": 3.2368440001832903,
      "grad_norm": 0.5649426579475403,
      "learning_rate": 3.5495476499542055e-06,
      "loss": 0.016,
      "step": 1977880
    },
    {
      "epoch": 3.236876730621944,
      "grad_norm": 0.13305197656154633,
      "learning_rate": 3.5494817577406882e-06,
      "loss": 0.0115,
      "step": 1977900
    },
    {
      "epoch": 3.236909461060597,
      "grad_norm": 0.03230145201086998,
      "learning_rate": 3.549415865527171e-06,
      "loss": 0.0085,
      "step": 1977920
    },
    {
      "epoch": 3.2369421914992507,
      "grad_norm": 0.28304746747016907,
      "learning_rate": 3.5493499733136537e-06,
      "loss": 0.012,
      "step": 1977940
    },
    {
      "epoch": 3.236974921937904,
      "grad_norm": 0.7164672613143921,
      "learning_rate": 3.549284081100137e-06,
      "loss": 0.013,
      "step": 1977960
    },
    {
      "epoch": 3.237007652376557,
      "grad_norm": 0.8249239921569824,
      "learning_rate": 3.5492181888866196e-06,
      "loss": 0.0156,
      "step": 1977980
    },
    {
      "epoch": 3.2370403828152106,
      "grad_norm": 0.49616363644599915,
      "learning_rate": 3.5491522966731024e-06,
      "loss": 0.0159,
      "step": 1978000
    },
    {
      "epoch": 3.2370731132538637,
      "grad_norm": 0.6658211946487427,
      "learning_rate": 3.549086404459585e-06,
      "loss": 0.0117,
      "step": 1978020
    },
    {
      "epoch": 3.2371058436925173,
      "grad_norm": 0.33227628469467163,
      "learning_rate": 3.549020512246068e-06,
      "loss": 0.0184,
      "step": 1978040
    },
    {
      "epoch": 3.2371385741311705,
      "grad_norm": 0.24660536646842957,
      "learning_rate": 3.548954620032551e-06,
      "loss": 0.0088,
      "step": 1978060
    },
    {
      "epoch": 3.237171304569824,
      "grad_norm": 0.6371362805366516,
      "learning_rate": 3.5488887278190337e-06,
      "loss": 0.0127,
      "step": 1978080
    },
    {
      "epoch": 3.237204035008477,
      "grad_norm": 0.2554888427257538,
      "learning_rate": 3.5488228356055165e-06,
      "loss": 0.0145,
      "step": 1978100
    },
    {
      "epoch": 3.2372367654471303,
      "grad_norm": 0.170536071062088,
      "learning_rate": 3.5487569433919992e-06,
      "loss": 0.0132,
      "step": 1978120
    },
    {
      "epoch": 3.237269495885784,
      "grad_norm": 0.3152318596839905,
      "learning_rate": 3.548691051178483e-06,
      "loss": 0.0146,
      "step": 1978140
    },
    {
      "epoch": 3.237302226324437,
      "grad_norm": 0.30332043766975403,
      "learning_rate": 3.5486251589649655e-06,
      "loss": 0.0117,
      "step": 1978160
    },
    {
      "epoch": 3.2373349567630907,
      "grad_norm": 0.4567932188510895,
      "learning_rate": 3.5485592667514483e-06,
      "loss": 0.0104,
      "step": 1978180
    },
    {
      "epoch": 3.237367687201744,
      "grad_norm": 0.2898902893066406,
      "learning_rate": 3.5484933745379315e-06,
      "loss": 0.0141,
      "step": 1978200
    },
    {
      "epoch": 3.237400417640397,
      "grad_norm": 0.10165244340896606,
      "learning_rate": 3.548427482324414e-06,
      "loss": 0.0113,
      "step": 1978220
    },
    {
      "epoch": 3.2374331480790506,
      "grad_norm": 0.34657126665115356,
      "learning_rate": 3.548361590110897e-06,
      "loss": 0.0125,
      "step": 1978240
    },
    {
      "epoch": 3.2374658785177037,
      "grad_norm": 0.2902294993400574,
      "learning_rate": 3.5482956978973797e-06,
      "loss": 0.0099,
      "step": 1978260
    },
    {
      "epoch": 3.2374986089563573,
      "grad_norm": 0.08684703707695007,
      "learning_rate": 3.548229805683863e-06,
      "loss": 0.0109,
      "step": 1978280
    },
    {
      "epoch": 3.2375313393950105,
      "grad_norm": 0.2409432977437973,
      "learning_rate": 3.5481639134703456e-06,
      "loss": 0.0118,
      "step": 1978300
    },
    {
      "epoch": 3.237564069833664,
      "grad_norm": 0.4355041980743408,
      "learning_rate": 3.5480980212568283e-06,
      "loss": 0.011,
      "step": 1978320
    },
    {
      "epoch": 3.237596800272317,
      "grad_norm": 0.06123993918299675,
      "learning_rate": 3.548032129043311e-06,
      "loss": 0.0252,
      "step": 1978340
    },
    {
      "epoch": 3.2376295307109704,
      "grad_norm": 0.1602502167224884,
      "learning_rate": 3.5479662368297942e-06,
      "loss": 0.0072,
      "step": 1978360
    },
    {
      "epoch": 3.237662261149624,
      "grad_norm": 0.18691720068454742,
      "learning_rate": 3.547900344616277e-06,
      "loss": 0.0115,
      "step": 1978380
    },
    {
      "epoch": 3.237694991588277,
      "grad_norm": 0.3418589234352112,
      "learning_rate": 3.5478344524027597e-06,
      "loss": 0.0117,
      "step": 1978400
    },
    {
      "epoch": 3.2377277220269307,
      "grad_norm": 0.6624311804771423,
      "learning_rate": 3.5477685601892424e-06,
      "loss": 0.0137,
      "step": 1978420
    },
    {
      "epoch": 3.237760452465584,
      "grad_norm": 0.393769770860672,
      "learning_rate": 3.547702667975725e-06,
      "loss": 0.0107,
      "step": 1978440
    },
    {
      "epoch": 3.2377931829042375,
      "grad_norm": 0.26992693543434143,
      "learning_rate": 3.5476367757622083e-06,
      "loss": 0.0081,
      "step": 1978460
    },
    {
      "epoch": 3.2378259133428906,
      "grad_norm": 0.5322741866111755,
      "learning_rate": 3.5475708835486915e-06,
      "loss": 0.0108,
      "step": 1978480
    },
    {
      "epoch": 3.2378586437815438,
      "grad_norm": 0.20509590208530426,
      "learning_rate": 3.5475049913351742e-06,
      "loss": 0.0117,
      "step": 1978500
    },
    {
      "epoch": 3.2378913742201973,
      "grad_norm": 0.7759296298027039,
      "learning_rate": 3.5474390991216574e-06,
      "loss": 0.0104,
      "step": 1978520
    },
    {
      "epoch": 3.2379241046588505,
      "grad_norm": 0.3107367157936096,
      "learning_rate": 3.54737320690814e-06,
      "loss": 0.0104,
      "step": 1978540
    },
    {
      "epoch": 3.237956835097504,
      "grad_norm": 0.2577238380908966,
      "learning_rate": 3.547307314694623e-06,
      "loss": 0.0126,
      "step": 1978560
    },
    {
      "epoch": 3.2379895655361572,
      "grad_norm": 0.29416289925575256,
      "learning_rate": 3.5472414224811056e-06,
      "loss": 0.0086,
      "step": 1978580
    },
    {
      "epoch": 3.238022295974811,
      "grad_norm": 0.1064332127571106,
      "learning_rate": 3.5471755302675888e-06,
      "loss": 0.0172,
      "step": 1978600
    },
    {
      "epoch": 3.238055026413464,
      "grad_norm": 0.1766146421432495,
      "learning_rate": 3.5471096380540715e-06,
      "loss": 0.0067,
      "step": 1978620
    },
    {
      "epoch": 3.238087756852117,
      "grad_norm": 0.20398683845996857,
      "learning_rate": 3.5470437458405543e-06,
      "loss": 0.0128,
      "step": 1978640
    },
    {
      "epoch": 3.2381204872907707,
      "grad_norm": 0.3707045912742615,
      "learning_rate": 3.546977853627037e-06,
      "loss": 0.0095,
      "step": 1978660
    },
    {
      "epoch": 3.238153217729424,
      "grad_norm": 0.1849963217973709,
      "learning_rate": 3.54691196141352e-06,
      "loss": 0.008,
      "step": 1978680
    },
    {
      "epoch": 3.2381859481680775,
      "grad_norm": 0.44962191581726074,
      "learning_rate": 3.546846069200003e-06,
      "loss": 0.0144,
      "step": 1978700
    },
    {
      "epoch": 3.2382186786067306,
      "grad_norm": 0.22188979387283325,
      "learning_rate": 3.5467801769864856e-06,
      "loss": 0.0103,
      "step": 1978720
    },
    {
      "epoch": 3.238251409045384,
      "grad_norm": 0.413251668214798,
      "learning_rate": 3.5467142847729684e-06,
      "loss": 0.013,
      "step": 1978740
    },
    {
      "epoch": 3.2382841394840374,
      "grad_norm": 0.2990284562110901,
      "learning_rate": 3.5466483925594516e-06,
      "loss": 0.0143,
      "step": 1978760
    },
    {
      "epoch": 3.2383168699226905,
      "grad_norm": 0.2565774917602539,
      "learning_rate": 3.5465825003459343e-06,
      "loss": 0.0117,
      "step": 1978780
    },
    {
      "epoch": 3.238349600361344,
      "grad_norm": 0.6046616435050964,
      "learning_rate": 3.546516608132417e-06,
      "loss": 0.0139,
      "step": 1978800
    },
    {
      "epoch": 3.2383823307999973,
      "grad_norm": 0.06763602793216705,
      "learning_rate": 3.5464507159188998e-06,
      "loss": 0.0118,
      "step": 1978820
    },
    {
      "epoch": 3.238415061238651,
      "grad_norm": 0.36445188522338867,
      "learning_rate": 3.5463848237053834e-06,
      "loss": 0.0158,
      "step": 1978840
    },
    {
      "epoch": 3.238447791677304,
      "grad_norm": 0.7295377254486084,
      "learning_rate": 3.546318931491866e-06,
      "loss": 0.014,
      "step": 1978860
    },
    {
      "epoch": 3.2384805221159576,
      "grad_norm": 0.1272469162940979,
      "learning_rate": 3.546253039278349e-06,
      "loss": 0.0147,
      "step": 1978880
    },
    {
      "epoch": 3.2385132525546108,
      "grad_norm": 0.5369443297386169,
      "learning_rate": 3.546187147064832e-06,
      "loss": 0.0132,
      "step": 1978900
    },
    {
      "epoch": 3.238545982993264,
      "grad_norm": 0.19019602239131927,
      "learning_rate": 3.5461212548513147e-06,
      "loss": 0.0144,
      "step": 1978920
    },
    {
      "epoch": 3.2385787134319175,
      "grad_norm": 0.1942874640226364,
      "learning_rate": 3.5460553626377975e-06,
      "loss": 0.0076,
      "step": 1978940
    },
    {
      "epoch": 3.2386114438705706,
      "grad_norm": 0.2878810465335846,
      "learning_rate": 3.5459894704242802e-06,
      "loss": 0.0094,
      "step": 1978960
    },
    {
      "epoch": 3.2386441743092242,
      "grad_norm": 0.25627031922340393,
      "learning_rate": 3.545923578210763e-06,
      "loss": 0.0096,
      "step": 1978980
    },
    {
      "epoch": 3.2386769047478774,
      "grad_norm": 0.08840367943048477,
      "learning_rate": 3.545857685997246e-06,
      "loss": 0.0122,
      "step": 1979000
    },
    {
      "epoch": 3.238709635186531,
      "grad_norm": 0.19559451937675476,
      "learning_rate": 3.545791793783729e-06,
      "loss": 0.0156,
      "step": 1979020
    },
    {
      "epoch": 3.238742365625184,
      "grad_norm": 0.3049764931201935,
      "learning_rate": 3.5457259015702116e-06,
      "loss": 0.0109,
      "step": 1979040
    },
    {
      "epoch": 3.2387750960638373,
      "grad_norm": 0.3286113739013672,
      "learning_rate": 3.5456600093566943e-06,
      "loss": 0.0166,
      "step": 1979060
    },
    {
      "epoch": 3.238807826502491,
      "grad_norm": 0.332093745470047,
      "learning_rate": 3.5455941171431775e-06,
      "loss": 0.0111,
      "step": 1979080
    },
    {
      "epoch": 3.238840556941144,
      "grad_norm": 0.2886230945587158,
      "learning_rate": 3.5455282249296602e-06,
      "loss": 0.0145,
      "step": 1979100
    },
    {
      "epoch": 3.2388732873797976,
      "grad_norm": 0.12986505031585693,
      "learning_rate": 3.545462332716143e-06,
      "loss": 0.0104,
      "step": 1979120
    },
    {
      "epoch": 3.2389060178184508,
      "grad_norm": 0.7729217410087585,
      "learning_rate": 3.5453964405026257e-06,
      "loss": 0.01,
      "step": 1979140
    },
    {
      "epoch": 3.2389387482571044,
      "grad_norm": 0.6896230578422546,
      "learning_rate": 3.545330548289109e-06,
      "loss": 0.0118,
      "step": 1979160
    },
    {
      "epoch": 3.2389714786957575,
      "grad_norm": 0.6539820432662964,
      "learning_rate": 3.545264656075592e-06,
      "loss": 0.0103,
      "step": 1979180
    },
    {
      "epoch": 3.2390042091344107,
      "grad_norm": 0.24069327116012573,
      "learning_rate": 3.545198763862075e-06,
      "loss": 0.0186,
      "step": 1979200
    },
    {
      "epoch": 3.2390369395730643,
      "grad_norm": 0.15497419238090515,
      "learning_rate": 3.545132871648558e-06,
      "loss": 0.0094,
      "step": 1979220
    },
    {
      "epoch": 3.2390696700117174,
      "grad_norm": 0.13153769075870514,
      "learning_rate": 3.5450669794350407e-06,
      "loss": 0.0121,
      "step": 1979240
    },
    {
      "epoch": 3.239102400450371,
      "grad_norm": 0.4080064594745636,
      "learning_rate": 3.5450010872215234e-06,
      "loss": 0.0155,
      "step": 1979260
    },
    {
      "epoch": 3.239135130889024,
      "grad_norm": 0.20102445781230927,
      "learning_rate": 3.544935195008006e-06,
      "loss": 0.0114,
      "step": 1979280
    },
    {
      "epoch": 3.2391678613276773,
      "grad_norm": 0.21971657872200012,
      "learning_rate": 3.5448693027944893e-06,
      "loss": 0.011,
      "step": 1979300
    },
    {
      "epoch": 3.239200591766331,
      "grad_norm": 0.20273779332637787,
      "learning_rate": 3.544803410580972e-06,
      "loss": 0.0061,
      "step": 1979320
    },
    {
      "epoch": 3.239233322204984,
      "grad_norm": 0.08826396614313126,
      "learning_rate": 3.544737518367455e-06,
      "loss": 0.0139,
      "step": 1979340
    },
    {
      "epoch": 3.2392660526436377,
      "grad_norm": 0.4187101721763611,
      "learning_rate": 3.5446716261539376e-06,
      "loss": 0.0085,
      "step": 1979360
    },
    {
      "epoch": 3.239298783082291,
      "grad_norm": 0.33490893244743347,
      "learning_rate": 3.5446057339404207e-06,
      "loss": 0.0103,
      "step": 1979380
    },
    {
      "epoch": 3.2393315135209444,
      "grad_norm": 0.44705694913864136,
      "learning_rate": 3.5445398417269035e-06,
      "loss": 0.0117,
      "step": 1979400
    },
    {
      "epoch": 3.2393642439595975,
      "grad_norm": 0.48312053084373474,
      "learning_rate": 3.544473949513386e-06,
      "loss": 0.013,
      "step": 1979420
    },
    {
      "epoch": 3.2393969743982507,
      "grad_norm": 0.5626941323280334,
      "learning_rate": 3.544408057299869e-06,
      "loss": 0.011,
      "step": 1979440
    },
    {
      "epoch": 3.2394297048369043,
      "grad_norm": 0.33668237924575806,
      "learning_rate": 3.5443421650863517e-06,
      "loss": 0.0092,
      "step": 1979460
    },
    {
      "epoch": 3.2394624352755574,
      "grad_norm": 0.16743896901607513,
      "learning_rate": 3.544276272872835e-06,
      "loss": 0.0209,
      "step": 1979480
    },
    {
      "epoch": 3.239495165714211,
      "grad_norm": 0.14139984548091888,
      "learning_rate": 3.5442103806593176e-06,
      "loss": 0.0093,
      "step": 1979500
    },
    {
      "epoch": 3.239527896152864,
      "grad_norm": 0.22990508377552032,
      "learning_rate": 3.5441444884458003e-06,
      "loss": 0.0077,
      "step": 1979520
    },
    {
      "epoch": 3.2395606265915178,
      "grad_norm": 0.23775947093963623,
      "learning_rate": 3.544078596232284e-06,
      "loss": 0.0095,
      "step": 1979540
    },
    {
      "epoch": 3.239593357030171,
      "grad_norm": 0.11862782388925552,
      "learning_rate": 3.5440127040187666e-06,
      "loss": 0.0112,
      "step": 1979560
    },
    {
      "epoch": 3.239626087468824,
      "grad_norm": 0.33500275015830994,
      "learning_rate": 3.5439468118052494e-06,
      "loss": 0.0113,
      "step": 1979580
    },
    {
      "epoch": 3.2396588179074777,
      "grad_norm": 0.14201335608959198,
      "learning_rate": 3.543880919591732e-06,
      "loss": 0.0105,
      "step": 1979600
    },
    {
      "epoch": 3.239691548346131,
      "grad_norm": 0.2299376130104065,
      "learning_rate": 3.5438150273782153e-06,
      "loss": 0.0172,
      "step": 1979620
    },
    {
      "epoch": 3.2397242787847844,
      "grad_norm": 0.45819535851478577,
      "learning_rate": 3.543749135164698e-06,
      "loss": 0.0151,
      "step": 1979640
    },
    {
      "epoch": 3.2397570092234376,
      "grad_norm": 0.6235643625259399,
      "learning_rate": 3.5436832429511808e-06,
      "loss": 0.0142,
      "step": 1979660
    },
    {
      "epoch": 3.2397897396620907,
      "grad_norm": 0.2632335424423218,
      "learning_rate": 3.5436173507376635e-06,
      "loss": 0.0141,
      "step": 1979680
    },
    {
      "epoch": 3.2398224701007443,
      "grad_norm": 0.10605309903621674,
      "learning_rate": 3.5435514585241467e-06,
      "loss": 0.0093,
      "step": 1979700
    },
    {
      "epoch": 3.2398552005393975,
      "grad_norm": 1.1234444379806519,
      "learning_rate": 3.5434855663106294e-06,
      "loss": 0.0103,
      "step": 1979720
    },
    {
      "epoch": 3.239887930978051,
      "grad_norm": 0.5160371661186218,
      "learning_rate": 3.543419674097112e-06,
      "loss": 0.0124,
      "step": 1979740
    },
    {
      "epoch": 3.239920661416704,
      "grad_norm": 0.33041253685951233,
      "learning_rate": 3.543353781883595e-06,
      "loss": 0.0078,
      "step": 1979760
    },
    {
      "epoch": 3.239953391855358,
      "grad_norm": 0.0412711501121521,
      "learning_rate": 3.543287889670078e-06,
      "loss": 0.0107,
      "step": 1979780
    },
    {
      "epoch": 3.239986122294011,
      "grad_norm": 0.24248918890953064,
      "learning_rate": 3.543221997456561e-06,
      "loss": 0.0103,
      "step": 1979800
    },
    {
      "epoch": 3.240018852732664,
      "grad_norm": 0.2041536271572113,
      "learning_rate": 3.5431561052430435e-06,
      "loss": 0.0112,
      "step": 1979820
    },
    {
      "epoch": 3.2400515831713177,
      "grad_norm": 0.1531248837709427,
      "learning_rate": 3.5430902130295263e-06,
      "loss": 0.0093,
      "step": 1979840
    },
    {
      "epoch": 3.240084313609971,
      "grad_norm": 0.3557876646518707,
      "learning_rate": 3.543024320816009e-06,
      "loss": 0.0109,
      "step": 1979860
    },
    {
      "epoch": 3.2401170440486244,
      "grad_norm": 0.19922174513339996,
      "learning_rate": 3.542958428602492e-06,
      "loss": 0.0139,
      "step": 1979880
    },
    {
      "epoch": 3.2401497744872776,
      "grad_norm": 0.22928158938884735,
      "learning_rate": 3.5428925363889753e-06,
      "loss": 0.0092,
      "step": 1979900
    },
    {
      "epoch": 3.240182504925931,
      "grad_norm": 0.43399864435195923,
      "learning_rate": 3.5428266441754585e-06,
      "loss": 0.0123,
      "step": 1979920
    },
    {
      "epoch": 3.2402152353645843,
      "grad_norm": 0.09277959913015366,
      "learning_rate": 3.5427607519619412e-06,
      "loss": 0.0076,
      "step": 1979940
    },
    {
      "epoch": 3.2402479658032375,
      "grad_norm": 0.22687384486198425,
      "learning_rate": 3.542694859748424e-06,
      "loss": 0.0091,
      "step": 1979960
    },
    {
      "epoch": 3.240280696241891,
      "grad_norm": 0.3513515293598175,
      "learning_rate": 3.5426289675349067e-06,
      "loss": 0.013,
      "step": 1979980
    },
    {
      "epoch": 3.2403134266805442,
      "grad_norm": 0.6039212942123413,
      "learning_rate": 3.5425630753213895e-06,
      "loss": 0.0125,
      "step": 1980000
    },
    {
      "epoch": 3.240346157119198,
      "grad_norm": 0.2980961203575134,
      "learning_rate": 3.5424971831078726e-06,
      "loss": 0.0135,
      "step": 1980020
    },
    {
      "epoch": 3.240378887557851,
      "grad_norm": 0.16681858897209167,
      "learning_rate": 3.5424312908943554e-06,
      "loss": 0.0152,
      "step": 1980040
    },
    {
      "epoch": 3.2404116179965046,
      "grad_norm": 0.20451347529888153,
      "learning_rate": 3.542365398680838e-06,
      "loss": 0.0108,
      "step": 1980060
    },
    {
      "epoch": 3.2404443484351577,
      "grad_norm": 0.2854797840118408,
      "learning_rate": 3.542299506467321e-06,
      "loss": 0.0106,
      "step": 1980080
    },
    {
      "epoch": 3.240477078873811,
      "grad_norm": 0.2437710165977478,
      "learning_rate": 3.542233614253804e-06,
      "loss": 0.0091,
      "step": 1980100
    },
    {
      "epoch": 3.2405098093124645,
      "grad_norm": 0.3647315800189972,
      "learning_rate": 3.5421677220402867e-06,
      "loss": 0.0086,
      "step": 1980120
    },
    {
      "epoch": 3.2405425397511176,
      "grad_norm": 0.25372567772865295,
      "learning_rate": 3.5421018298267695e-06,
      "loss": 0.0082,
      "step": 1980140
    },
    {
      "epoch": 3.240575270189771,
      "grad_norm": 0.2598084807395935,
      "learning_rate": 3.5420359376132522e-06,
      "loss": 0.0122,
      "step": 1980160
    },
    {
      "epoch": 3.2406080006284244,
      "grad_norm": 0.2107275277376175,
      "learning_rate": 3.5419700453997354e-06,
      "loss": 0.0073,
      "step": 1980180
    },
    {
      "epoch": 3.240640731067078,
      "grad_norm": 0.24972863495349884,
      "learning_rate": 3.541904153186218e-06,
      "loss": 0.0069,
      "step": 1980200
    },
    {
      "epoch": 3.240673461505731,
      "grad_norm": 0.11701072752475739,
      "learning_rate": 3.541838260972701e-06,
      "loss": 0.0089,
      "step": 1980220
    },
    {
      "epoch": 3.2407061919443843,
      "grad_norm": 0.542496383190155,
      "learning_rate": 3.5417723687591845e-06,
      "loss": 0.0111,
      "step": 1980240
    },
    {
      "epoch": 3.240738922383038,
      "grad_norm": 0.4443683624267578,
      "learning_rate": 3.541706476545667e-06,
      "loss": 0.0156,
      "step": 1980260
    },
    {
      "epoch": 3.240771652821691,
      "grad_norm": 0.2198331207036972,
      "learning_rate": 3.54164058433215e-06,
      "loss": 0.012,
      "step": 1980280
    },
    {
      "epoch": 3.2408043832603446,
      "grad_norm": 0.2898441553115845,
      "learning_rate": 3.5415746921186327e-06,
      "loss": 0.0119,
      "step": 1980300
    },
    {
      "epoch": 3.2408371136989977,
      "grad_norm": 0.2875922918319702,
      "learning_rate": 3.541508799905116e-06,
      "loss": 0.0122,
      "step": 1980320
    },
    {
      "epoch": 3.2408698441376513,
      "grad_norm": 0.1305188238620758,
      "learning_rate": 3.5414429076915986e-06,
      "loss": 0.0093,
      "step": 1980340
    },
    {
      "epoch": 3.2409025745763045,
      "grad_norm": 0.09550788253545761,
      "learning_rate": 3.5413770154780813e-06,
      "loss": 0.0125,
      "step": 1980360
    },
    {
      "epoch": 3.2409353050149576,
      "grad_norm": 0.9053487181663513,
      "learning_rate": 3.541311123264564e-06,
      "loss": 0.0136,
      "step": 1980380
    },
    {
      "epoch": 3.2409680354536112,
      "grad_norm": 0.3008337914943695,
      "learning_rate": 3.541245231051047e-06,
      "loss": 0.0162,
      "step": 1980400
    },
    {
      "epoch": 3.2410007658922644,
      "grad_norm": 0.08706413954496384,
      "learning_rate": 3.54117933883753e-06,
      "loss": 0.0074,
      "step": 1980420
    },
    {
      "epoch": 3.241033496330918,
      "grad_norm": 0.2384461760520935,
      "learning_rate": 3.5411134466240127e-06,
      "loss": 0.0115,
      "step": 1980440
    },
    {
      "epoch": 3.241066226769571,
      "grad_norm": 0.5019614696502686,
      "learning_rate": 3.5410475544104954e-06,
      "loss": 0.012,
      "step": 1980460
    },
    {
      "epoch": 3.2410989572082247,
      "grad_norm": 0.35124197602272034,
      "learning_rate": 3.540981662196978e-06,
      "loss": 0.0133,
      "step": 1980480
    },
    {
      "epoch": 3.241131687646878,
      "grad_norm": 0.8630361557006836,
      "learning_rate": 3.5409157699834613e-06,
      "loss": 0.0126,
      "step": 1980500
    },
    {
      "epoch": 3.241164418085531,
      "grad_norm": 0.8763742446899414,
      "learning_rate": 3.540849877769944e-06,
      "loss": 0.0133,
      "step": 1980520
    },
    {
      "epoch": 3.2411971485241846,
      "grad_norm": 0.4562666118144989,
      "learning_rate": 3.540783985556427e-06,
      "loss": 0.0123,
      "step": 1980540
    },
    {
      "epoch": 3.2412298789628378,
      "grad_norm": 0.29334479570388794,
      "learning_rate": 3.5407180933429096e-06,
      "loss": 0.008,
      "step": 1980560
    },
    {
      "epoch": 3.2412626094014914,
      "grad_norm": 0.0832328051328659,
      "learning_rate": 3.5406522011293927e-06,
      "loss": 0.0124,
      "step": 1980580
    },
    {
      "epoch": 3.2412953398401445,
      "grad_norm": 0.9861652851104736,
      "learning_rate": 3.540586308915876e-06,
      "loss": 0.0183,
      "step": 1980600
    },
    {
      "epoch": 3.241328070278798,
      "grad_norm": 0.25497347116470337,
      "learning_rate": 3.5405204167023586e-06,
      "loss": 0.0108,
      "step": 1980620
    },
    {
      "epoch": 3.2413608007174513,
      "grad_norm": 0.06850483268499374,
      "learning_rate": 3.540454524488842e-06,
      "loss": 0.0106,
      "step": 1980640
    },
    {
      "epoch": 3.2413935311561044,
      "grad_norm": 0.39955440163612366,
      "learning_rate": 3.5403886322753245e-06,
      "loss": 0.0076,
      "step": 1980660
    },
    {
      "epoch": 3.241426261594758,
      "grad_norm": 0.589140772819519,
      "learning_rate": 3.5403227400618073e-06,
      "loss": 0.0098,
      "step": 1980680
    },
    {
      "epoch": 3.241458992033411,
      "grad_norm": 0.3542923033237457,
      "learning_rate": 3.54025684784829e-06,
      "loss": 0.0149,
      "step": 1980700
    },
    {
      "epoch": 3.2414917224720647,
      "grad_norm": 0.5417731404304504,
      "learning_rate": 3.540190955634773e-06,
      "loss": 0.0126,
      "step": 1980720
    },
    {
      "epoch": 3.241524452910718,
      "grad_norm": 0.3371575176715851,
      "learning_rate": 3.540125063421256e-06,
      "loss": 0.0084,
      "step": 1980740
    },
    {
      "epoch": 3.241557183349371,
      "grad_norm": 0.276682585477829,
      "learning_rate": 3.5400591712077387e-06,
      "loss": 0.0121,
      "step": 1980760
    },
    {
      "epoch": 3.2415899137880246,
      "grad_norm": 0.03909604623913765,
      "learning_rate": 3.5399932789942214e-06,
      "loss": 0.0174,
      "step": 1980780
    },
    {
      "epoch": 3.241622644226678,
      "grad_norm": 0.3372197151184082,
      "learning_rate": 3.5399273867807046e-06,
      "loss": 0.013,
      "step": 1980800
    },
    {
      "epoch": 3.2416553746653314,
      "grad_norm": 0.21398012340068817,
      "learning_rate": 3.5398614945671873e-06,
      "loss": 0.012,
      "step": 1980820
    },
    {
      "epoch": 3.2416881051039845,
      "grad_norm": 0.12982849776744843,
      "learning_rate": 3.53979560235367e-06,
      "loss": 0.0121,
      "step": 1980840
    },
    {
      "epoch": 3.241720835542638,
      "grad_norm": 0.16692915558815002,
      "learning_rate": 3.5397297101401528e-06,
      "loss": 0.0123,
      "step": 1980860
    },
    {
      "epoch": 3.2417535659812913,
      "grad_norm": 0.15730918943881989,
      "learning_rate": 3.5396638179266355e-06,
      "loss": 0.0112,
      "step": 1980880
    },
    {
      "epoch": 3.2417862964199444,
      "grad_norm": 0.11793298274278641,
      "learning_rate": 3.5395979257131187e-06,
      "loss": 0.0114,
      "step": 1980900
    },
    {
      "epoch": 3.241819026858598,
      "grad_norm": 0.32706284523010254,
      "learning_rate": 3.5395320334996014e-06,
      "loss": 0.0126,
      "step": 1980920
    },
    {
      "epoch": 3.241851757297251,
      "grad_norm": 0.28352877497673035,
      "learning_rate": 3.5394661412860846e-06,
      "loss": 0.0112,
      "step": 1980940
    },
    {
      "epoch": 3.2418844877359048,
      "grad_norm": 0.1947505921125412,
      "learning_rate": 3.5394002490725677e-06,
      "loss": 0.0119,
      "step": 1980960
    },
    {
      "epoch": 3.241917218174558,
      "grad_norm": 0.26062554121017456,
      "learning_rate": 3.5393343568590505e-06,
      "loss": 0.0118,
      "step": 1980980
    },
    {
      "epoch": 3.2419499486132115,
      "grad_norm": 0.21409232914447784,
      "learning_rate": 3.5392684646455332e-06,
      "loss": 0.0084,
      "step": 1981000
    },
    {
      "epoch": 3.2419826790518647,
      "grad_norm": 0.32878515124320984,
      "learning_rate": 3.539202572432016e-06,
      "loss": 0.0117,
      "step": 1981020
    },
    {
      "epoch": 3.242015409490518,
      "grad_norm": 0.4054982364177704,
      "learning_rate": 3.539136680218499e-06,
      "loss": 0.0118,
      "step": 1981040
    },
    {
      "epoch": 3.2420481399291714,
      "grad_norm": 0.3988995850086212,
      "learning_rate": 3.539070788004982e-06,
      "loss": 0.0072,
      "step": 1981060
    },
    {
      "epoch": 3.2420808703678246,
      "grad_norm": 0.7321703433990479,
      "learning_rate": 3.5390048957914646e-06,
      "loss": 0.0171,
      "step": 1981080
    },
    {
      "epoch": 3.242113600806478,
      "grad_norm": 0.14952217042446136,
      "learning_rate": 3.5389390035779473e-06,
      "loss": 0.0118,
      "step": 1981100
    },
    {
      "epoch": 3.2421463312451313,
      "grad_norm": 0.1780022829771042,
      "learning_rate": 3.5388731113644305e-06,
      "loss": 0.0117,
      "step": 1981120
    },
    {
      "epoch": 3.242179061683785,
      "grad_norm": 0.5361592173576355,
      "learning_rate": 3.5388072191509133e-06,
      "loss": 0.012,
      "step": 1981140
    },
    {
      "epoch": 3.242211792122438,
      "grad_norm": 0.5126340985298157,
      "learning_rate": 3.538741326937396e-06,
      "loss": 0.0091,
      "step": 1981160
    },
    {
      "epoch": 3.242244522561091,
      "grad_norm": 0.1647290587425232,
      "learning_rate": 3.5386754347238787e-06,
      "loss": 0.008,
      "step": 1981180
    },
    {
      "epoch": 3.242277252999745,
      "grad_norm": 0.16530150175094604,
      "learning_rate": 3.538609542510362e-06,
      "loss": 0.0117,
      "step": 1981200
    },
    {
      "epoch": 3.242309983438398,
      "grad_norm": 0.7903281450271606,
      "learning_rate": 3.5385436502968446e-06,
      "loss": 0.013,
      "step": 1981220
    },
    {
      "epoch": 3.2423427138770515,
      "grad_norm": 0.434558242559433,
      "learning_rate": 3.5384777580833274e-06,
      "loss": 0.0114,
      "step": 1981240
    },
    {
      "epoch": 3.2423754443157047,
      "grad_norm": 0.32855600118637085,
      "learning_rate": 3.53841186586981e-06,
      "loss": 0.013,
      "step": 1981260
    },
    {
      "epoch": 3.242408174754358,
      "grad_norm": 0.12460042536258698,
      "learning_rate": 3.538345973656293e-06,
      "loss": 0.0149,
      "step": 1981280
    },
    {
      "epoch": 3.2424409051930114,
      "grad_norm": 0.4774673581123352,
      "learning_rate": 3.5382800814427764e-06,
      "loss": 0.0158,
      "step": 1981300
    },
    {
      "epoch": 3.2424736356316646,
      "grad_norm": 0.7912108898162842,
      "learning_rate": 3.538214189229259e-06,
      "loss": 0.0108,
      "step": 1981320
    },
    {
      "epoch": 3.242506366070318,
      "grad_norm": 0.3747158944606781,
      "learning_rate": 3.5381482970157423e-06,
      "loss": 0.0124,
      "step": 1981340
    },
    {
      "epoch": 3.2425390965089713,
      "grad_norm": 0.2901778519153595,
      "learning_rate": 3.538082404802225e-06,
      "loss": 0.0124,
      "step": 1981360
    },
    {
      "epoch": 3.242571826947625,
      "grad_norm": 0.21069400012493134,
      "learning_rate": 3.538016512588708e-06,
      "loss": 0.0161,
      "step": 1981380
    },
    {
      "epoch": 3.242604557386278,
      "grad_norm": 0.10475032031536102,
      "learning_rate": 3.5379506203751906e-06,
      "loss": 0.0139,
      "step": 1981400
    },
    {
      "epoch": 3.242637287824931,
      "grad_norm": 0.23052974045276642,
      "learning_rate": 3.5378847281616733e-06,
      "loss": 0.0124,
      "step": 1981420
    },
    {
      "epoch": 3.242670018263585,
      "grad_norm": 0.6905727982521057,
      "learning_rate": 3.5378188359481565e-06,
      "loss": 0.0139,
      "step": 1981440
    },
    {
      "epoch": 3.242702748702238,
      "grad_norm": 0.18568629026412964,
      "learning_rate": 3.537752943734639e-06,
      "loss": 0.0119,
      "step": 1981460
    },
    {
      "epoch": 3.2427354791408916,
      "grad_norm": 1.2643955945968628,
      "learning_rate": 3.537687051521122e-06,
      "loss": 0.0099,
      "step": 1981480
    },
    {
      "epoch": 3.2427682095795447,
      "grad_norm": 0.2599319815635681,
      "learning_rate": 3.5376211593076047e-06,
      "loss": 0.0164,
      "step": 1981500
    },
    {
      "epoch": 3.2428009400181983,
      "grad_norm": 0.32817772030830383,
      "learning_rate": 3.537555267094088e-06,
      "loss": 0.0105,
      "step": 1981520
    },
    {
      "epoch": 3.2428336704568514,
      "grad_norm": 0.19672706723213196,
      "learning_rate": 3.5374893748805706e-06,
      "loss": 0.0093,
      "step": 1981540
    },
    {
      "epoch": 3.2428664008955046,
      "grad_norm": 0.2146904468536377,
      "learning_rate": 3.5374234826670533e-06,
      "loss": 0.0124,
      "step": 1981560
    },
    {
      "epoch": 3.242899131334158,
      "grad_norm": 0.2197013646364212,
      "learning_rate": 3.537357590453536e-06,
      "loss": 0.0099,
      "step": 1981580
    },
    {
      "epoch": 3.2429318617728113,
      "grad_norm": 0.2913959324359894,
      "learning_rate": 3.5372916982400192e-06,
      "loss": 0.0104,
      "step": 1981600
    },
    {
      "epoch": 3.242964592211465,
      "grad_norm": 0.19127149879932404,
      "learning_rate": 3.537225806026502e-06,
      "loss": 0.0147,
      "step": 1981620
    },
    {
      "epoch": 3.242997322650118,
      "grad_norm": 0.21141083538532257,
      "learning_rate": 3.5371599138129847e-06,
      "loss": 0.0187,
      "step": 1981640
    },
    {
      "epoch": 3.2430300530887717,
      "grad_norm": 0.0782228633761406,
      "learning_rate": 3.5370940215994683e-06,
      "loss": 0.0177,
      "step": 1981660
    },
    {
      "epoch": 3.243062783527425,
      "grad_norm": 0.39521172642707825,
      "learning_rate": 3.537028129385951e-06,
      "loss": 0.014,
      "step": 1981680
    },
    {
      "epoch": 3.243095513966078,
      "grad_norm": 0.11235842108726501,
      "learning_rate": 3.5369622371724338e-06,
      "loss": 0.008,
      "step": 1981700
    },
    {
      "epoch": 3.2431282444047316,
      "grad_norm": 0.1032082661986351,
      "learning_rate": 3.5368963449589165e-06,
      "loss": 0.0126,
      "step": 1981720
    },
    {
      "epoch": 3.2431609748433847,
      "grad_norm": 0.10936835408210754,
      "learning_rate": 3.5368304527453997e-06,
      "loss": 0.0121,
      "step": 1981740
    },
    {
      "epoch": 3.2431937052820383,
      "grad_norm": 0.4875795543193817,
      "learning_rate": 3.5367645605318824e-06,
      "loss": 0.0129,
      "step": 1981760
    },
    {
      "epoch": 3.2432264357206915,
      "grad_norm": 0.5442255139350891,
      "learning_rate": 3.536698668318365e-06,
      "loss": 0.0108,
      "step": 1981780
    },
    {
      "epoch": 3.243259166159345,
      "grad_norm": 0.3818036913871765,
      "learning_rate": 3.536632776104848e-06,
      "loss": 0.0129,
      "step": 1981800
    },
    {
      "epoch": 3.243291896597998,
      "grad_norm": 0.21413689851760864,
      "learning_rate": 3.5365668838913306e-06,
      "loss": 0.0164,
      "step": 1981820
    },
    {
      "epoch": 3.2433246270366514,
      "grad_norm": 0.2719680070877075,
      "learning_rate": 3.536500991677814e-06,
      "loss": 0.0073,
      "step": 1981840
    },
    {
      "epoch": 3.243357357475305,
      "grad_norm": 0.3019581437110901,
      "learning_rate": 3.5364350994642965e-06,
      "loss": 0.0118,
      "step": 1981860
    },
    {
      "epoch": 3.243390087913958,
      "grad_norm": 0.33145245909690857,
      "learning_rate": 3.5363692072507793e-06,
      "loss": 0.0112,
      "step": 1981880
    },
    {
      "epoch": 3.2434228183526117,
      "grad_norm": 0.3838098347187042,
      "learning_rate": 3.536303315037262e-06,
      "loss": 0.0129,
      "step": 1981900
    },
    {
      "epoch": 3.243455548791265,
      "grad_norm": 0.11191824823617935,
      "learning_rate": 3.536237422823745e-06,
      "loss": 0.0122,
      "step": 1981920
    },
    {
      "epoch": 3.2434882792299184,
      "grad_norm": 0.27099618315696716,
      "learning_rate": 3.536171530610228e-06,
      "loss": 0.007,
      "step": 1981940
    },
    {
      "epoch": 3.2435210096685716,
      "grad_norm": 0.5037453770637512,
      "learning_rate": 3.5361056383967107e-06,
      "loss": 0.0176,
      "step": 1981960
    },
    {
      "epoch": 3.2435537401072247,
      "grad_norm": 0.37500903010368347,
      "learning_rate": 3.5360397461831934e-06,
      "loss": 0.0084,
      "step": 1981980
    },
    {
      "epoch": 3.2435864705458783,
      "grad_norm": 0.4438205659389496,
      "learning_rate": 3.535973853969677e-06,
      "loss": 0.0122,
      "step": 1982000
    },
    {
      "epoch": 3.2436192009845315,
      "grad_norm": 0.6502463221549988,
      "learning_rate": 3.5359079617561597e-06,
      "loss": 0.0128,
      "step": 1982020
    },
    {
      "epoch": 3.243651931423185,
      "grad_norm": 0.22542841732501984,
      "learning_rate": 3.5358420695426425e-06,
      "loss": 0.0107,
      "step": 1982040
    },
    {
      "epoch": 3.2436846618618382,
      "grad_norm": 0.08182410895824432,
      "learning_rate": 3.5357761773291256e-06,
      "loss": 0.0132,
      "step": 1982060
    },
    {
      "epoch": 3.243717392300492,
      "grad_norm": 0.2401530146598816,
      "learning_rate": 3.5357102851156084e-06,
      "loss": 0.009,
      "step": 1982080
    },
    {
      "epoch": 3.243750122739145,
      "grad_norm": 0.2646747827529907,
      "learning_rate": 3.535644392902091e-06,
      "loss": 0.009,
      "step": 1982100
    },
    {
      "epoch": 3.243782853177798,
      "grad_norm": 0.40397170186042786,
      "learning_rate": 3.535578500688574e-06,
      "loss": 0.0161,
      "step": 1982120
    },
    {
      "epoch": 3.2438155836164517,
      "grad_norm": 0.4243687689304352,
      "learning_rate": 3.535512608475057e-06,
      "loss": 0.0108,
      "step": 1982140
    },
    {
      "epoch": 3.243848314055105,
      "grad_norm": 0.12270558625459671,
      "learning_rate": 3.5354467162615398e-06,
      "loss": 0.01,
      "step": 1982160
    },
    {
      "epoch": 3.2438810444937585,
      "grad_norm": 0.20834487676620483,
      "learning_rate": 3.5353808240480225e-06,
      "loss": 0.0083,
      "step": 1982180
    },
    {
      "epoch": 3.2439137749324116,
      "grad_norm": 0.4697439968585968,
      "learning_rate": 3.5353149318345052e-06,
      "loss": 0.0124,
      "step": 1982200
    },
    {
      "epoch": 3.243946505371065,
      "grad_norm": 0.2171650379896164,
      "learning_rate": 3.5352490396209884e-06,
      "loss": 0.0148,
      "step": 1982220
    },
    {
      "epoch": 3.2439792358097184,
      "grad_norm": 0.06051263213157654,
      "learning_rate": 3.535183147407471e-06,
      "loss": 0.014,
      "step": 1982240
    },
    {
      "epoch": 3.2440119662483715,
      "grad_norm": 0.21016626060009003,
      "learning_rate": 3.535117255193954e-06,
      "loss": 0.0132,
      "step": 1982260
    },
    {
      "epoch": 3.244044696687025,
      "grad_norm": 0.32754960656166077,
      "learning_rate": 3.5350513629804366e-06,
      "loss": 0.0119,
      "step": 1982280
    },
    {
      "epoch": 3.2440774271256783,
      "grad_norm": 0.1266859769821167,
      "learning_rate": 3.5349854707669194e-06,
      "loss": 0.015,
      "step": 1982300
    },
    {
      "epoch": 3.244110157564332,
      "grad_norm": 0.19253870844841003,
      "learning_rate": 3.5349195785534025e-06,
      "loss": 0.0129,
      "step": 1982320
    },
    {
      "epoch": 3.244142888002985,
      "grad_norm": 0.6597790122032166,
      "learning_rate": 3.5348536863398853e-06,
      "loss": 0.0165,
      "step": 1982340
    },
    {
      "epoch": 3.244175618441638,
      "grad_norm": 0.1191655844449997,
      "learning_rate": 3.5347877941263684e-06,
      "loss": 0.0094,
      "step": 1982360
    },
    {
      "epoch": 3.2442083488802917,
      "grad_norm": 0.17577748000621796,
      "learning_rate": 3.5347219019128516e-06,
      "loss": 0.0075,
      "step": 1982380
    },
    {
      "epoch": 3.244241079318945,
      "grad_norm": 0.4148979187011719,
      "learning_rate": 3.5346560096993343e-06,
      "loss": 0.0124,
      "step": 1982400
    },
    {
      "epoch": 3.2442738097575985,
      "grad_norm": 0.9321053624153137,
      "learning_rate": 3.534590117485817e-06,
      "loss": 0.0112,
      "step": 1982420
    },
    {
      "epoch": 3.2443065401962516,
      "grad_norm": 0.12942494451999664,
      "learning_rate": 3.5345242252723e-06,
      "loss": 0.0081,
      "step": 1982440
    },
    {
      "epoch": 3.2443392706349052,
      "grad_norm": 0.2953851819038391,
      "learning_rate": 3.534458333058783e-06,
      "loss": 0.0111,
      "step": 1982460
    },
    {
      "epoch": 3.2443720010735584,
      "grad_norm": 0.4238487780094147,
      "learning_rate": 3.5343924408452657e-06,
      "loss": 0.0084,
      "step": 1982480
    },
    {
      "epoch": 3.2444047315122115,
      "grad_norm": 0.2506365478038788,
      "learning_rate": 3.5343265486317484e-06,
      "loss": 0.0092,
      "step": 1982500
    },
    {
      "epoch": 3.244437461950865,
      "grad_norm": 0.3280284106731415,
      "learning_rate": 3.534260656418231e-06,
      "loss": 0.0115,
      "step": 1982520
    },
    {
      "epoch": 3.2444701923895183,
      "grad_norm": 0.49753350019454956,
      "learning_rate": 3.5341947642047144e-06,
      "loss": 0.0075,
      "step": 1982540
    },
    {
      "epoch": 3.244502922828172,
      "grad_norm": 0.6292691230773926,
      "learning_rate": 3.534128871991197e-06,
      "loss": 0.014,
      "step": 1982560
    },
    {
      "epoch": 3.244535653266825,
      "grad_norm": 0.3848240077495575,
      "learning_rate": 3.53406297977768e-06,
      "loss": 0.0117,
      "step": 1982580
    },
    {
      "epoch": 3.2445683837054786,
      "grad_norm": 0.3372848331928253,
      "learning_rate": 3.5339970875641626e-06,
      "loss": 0.0156,
      "step": 1982600
    },
    {
      "epoch": 3.2446011141441318,
      "grad_norm": 0.11357112973928452,
      "learning_rate": 3.5339311953506457e-06,
      "loss": 0.0124,
      "step": 1982620
    },
    {
      "epoch": 3.244633844582785,
      "grad_norm": 0.0578877218067646,
      "learning_rate": 3.5338653031371285e-06,
      "loss": 0.0104,
      "step": 1982640
    },
    {
      "epoch": 3.2446665750214385,
      "grad_norm": 0.28096187114715576,
      "learning_rate": 3.5337994109236112e-06,
      "loss": 0.0148,
      "step": 1982660
    },
    {
      "epoch": 3.2446993054600917,
      "grad_norm": 0.12922783195972443,
      "learning_rate": 3.533733518710094e-06,
      "loss": 0.0099,
      "step": 1982680
    },
    {
      "epoch": 3.2447320358987453,
      "grad_norm": 0.22811485826969147,
      "learning_rate": 3.5336676264965775e-06,
      "loss": 0.0103,
      "step": 1982700
    },
    {
      "epoch": 3.2447647663373984,
      "grad_norm": 0.2223937064409256,
      "learning_rate": 3.5336017342830603e-06,
      "loss": 0.0082,
      "step": 1982720
    },
    {
      "epoch": 3.2447974967760516,
      "grad_norm": 0.3226000964641571,
      "learning_rate": 3.533535842069543e-06,
      "loss": 0.012,
      "step": 1982740
    },
    {
      "epoch": 3.244830227214705,
      "grad_norm": 0.35419464111328125,
      "learning_rate": 3.533469949856026e-06,
      "loss": 0.0104,
      "step": 1982760
    },
    {
      "epoch": 3.2448629576533583,
      "grad_norm": 0.2107716053724289,
      "learning_rate": 3.533404057642509e-06,
      "loss": 0.0136,
      "step": 1982780
    },
    {
      "epoch": 3.244895688092012,
      "grad_norm": 0.09444461017847061,
      "learning_rate": 3.5333381654289917e-06,
      "loss": 0.0182,
      "step": 1982800
    },
    {
      "epoch": 3.244928418530665,
      "grad_norm": 0.09772275388240814,
      "learning_rate": 3.5332722732154744e-06,
      "loss": 0.0098,
      "step": 1982820
    },
    {
      "epoch": 3.2449611489693186,
      "grad_norm": 0.253108948469162,
      "learning_rate": 3.533206381001957e-06,
      "loss": 0.0095,
      "step": 1982840
    },
    {
      "epoch": 3.244993879407972,
      "grad_norm": 0.21619223058223724,
      "learning_rate": 3.5331404887884403e-06,
      "loss": 0.0113,
      "step": 1982860
    },
    {
      "epoch": 3.245026609846625,
      "grad_norm": 0.3711768686771393,
      "learning_rate": 3.533074596574923e-06,
      "loss": 0.0116,
      "step": 1982880
    },
    {
      "epoch": 3.2450593402852785,
      "grad_norm": 0.3050524592399597,
      "learning_rate": 3.5330087043614058e-06,
      "loss": 0.0136,
      "step": 1982900
    },
    {
      "epoch": 3.2450920707239317,
      "grad_norm": 0.22395779192447662,
      "learning_rate": 3.5329428121478885e-06,
      "loss": 0.013,
      "step": 1982920
    },
    {
      "epoch": 3.2451248011625853,
      "grad_norm": 0.19730889797210693,
      "learning_rate": 3.5328769199343717e-06,
      "loss": 0.0127,
      "step": 1982940
    },
    {
      "epoch": 3.2451575316012384,
      "grad_norm": 0.13565963506698608,
      "learning_rate": 3.5328110277208544e-06,
      "loss": 0.0124,
      "step": 1982960
    },
    {
      "epoch": 3.245190262039892,
      "grad_norm": 0.27390962839126587,
      "learning_rate": 3.532745135507337e-06,
      "loss": 0.0079,
      "step": 1982980
    },
    {
      "epoch": 3.245222992478545,
      "grad_norm": 0.25679588317871094,
      "learning_rate": 3.53267924329382e-06,
      "loss": 0.008,
      "step": 1983000
    },
    {
      "epoch": 3.2452557229171983,
      "grad_norm": 0.26095160841941833,
      "learning_rate": 3.532613351080303e-06,
      "loss": 0.014,
      "step": 1983020
    },
    {
      "epoch": 3.245288453355852,
      "grad_norm": 0.2314625084400177,
      "learning_rate": 3.532547458866786e-06,
      "loss": 0.0132,
      "step": 1983040
    },
    {
      "epoch": 3.245321183794505,
      "grad_norm": 0.04425181448459625,
      "learning_rate": 3.532481566653269e-06,
      "loss": 0.0083,
      "step": 1983060
    },
    {
      "epoch": 3.2453539142331587,
      "grad_norm": 0.3066733777523041,
      "learning_rate": 3.532415674439752e-06,
      "loss": 0.0098,
      "step": 1983080
    },
    {
      "epoch": 3.245386644671812,
      "grad_norm": 0.24734845757484436,
      "learning_rate": 3.532349782226235e-06,
      "loss": 0.0115,
      "step": 1983100
    },
    {
      "epoch": 3.2454193751104654,
      "grad_norm": 0.15211156010627747,
      "learning_rate": 3.5322838900127176e-06,
      "loss": 0.0087,
      "step": 1983120
    },
    {
      "epoch": 3.2454521055491186,
      "grad_norm": 0.20000997185707092,
      "learning_rate": 3.5322179977992004e-06,
      "loss": 0.0078,
      "step": 1983140
    },
    {
      "epoch": 3.2454848359877717,
      "grad_norm": 0.22174783051013947,
      "learning_rate": 3.5321521055856835e-06,
      "loss": 0.0127,
      "step": 1983160
    },
    {
      "epoch": 3.2455175664264253,
      "grad_norm": 0.33392709493637085,
      "learning_rate": 3.5320862133721663e-06,
      "loss": 0.0148,
      "step": 1983180
    },
    {
      "epoch": 3.2455502968650785,
      "grad_norm": 0.25910866260528564,
      "learning_rate": 3.532020321158649e-06,
      "loss": 0.0126,
      "step": 1983200
    },
    {
      "epoch": 3.245583027303732,
      "grad_norm": 0.1384071558713913,
      "learning_rate": 3.5319544289451317e-06,
      "loss": 0.0099,
      "step": 1983220
    },
    {
      "epoch": 3.245615757742385,
      "grad_norm": 1.1331813335418701,
      "learning_rate": 3.531888536731615e-06,
      "loss": 0.0118,
      "step": 1983240
    },
    {
      "epoch": 3.245648488181039,
      "grad_norm": 0.14799989759922028,
      "learning_rate": 3.5318226445180976e-06,
      "loss": 0.0068,
      "step": 1983260
    },
    {
      "epoch": 3.245681218619692,
      "grad_norm": 1.1409410238265991,
      "learning_rate": 3.5317567523045804e-06,
      "loss": 0.0181,
      "step": 1983280
    },
    {
      "epoch": 3.245713949058345,
      "grad_norm": 0.34681618213653564,
      "learning_rate": 3.531690860091063e-06,
      "loss": 0.011,
      "step": 1983300
    },
    {
      "epoch": 3.2457466794969987,
      "grad_norm": 0.2949460446834564,
      "learning_rate": 3.531624967877546e-06,
      "loss": 0.0134,
      "step": 1983320
    },
    {
      "epoch": 3.245779409935652,
      "grad_norm": 0.43269258737564087,
      "learning_rate": 3.531559075664029e-06,
      "loss": 0.0112,
      "step": 1983340
    },
    {
      "epoch": 3.2458121403743054,
      "grad_norm": 0.16843563318252563,
      "learning_rate": 3.5314931834505118e-06,
      "loss": 0.0066,
      "step": 1983360
    },
    {
      "epoch": 3.2458448708129586,
      "grad_norm": 0.40257444977760315,
      "learning_rate": 3.5314272912369945e-06,
      "loss": 0.0112,
      "step": 1983380
    },
    {
      "epoch": 3.245877601251612,
      "grad_norm": 0.32826659083366394,
      "learning_rate": 3.5313613990234772e-06,
      "loss": 0.0093,
      "step": 1983400
    },
    {
      "epoch": 3.2459103316902653,
      "grad_norm": 0.6424040198326111,
      "learning_rate": 3.531295506809961e-06,
      "loss": 0.0131,
      "step": 1983420
    },
    {
      "epoch": 3.2459430621289185,
      "grad_norm": 0.09032971411943436,
      "learning_rate": 3.5312296145964436e-06,
      "loss": 0.0106,
      "step": 1983440
    },
    {
      "epoch": 3.245975792567572,
      "grad_norm": 0.33731377124786377,
      "learning_rate": 3.5311637223829263e-06,
      "loss": 0.0092,
      "step": 1983460
    },
    {
      "epoch": 3.246008523006225,
      "grad_norm": 0.3289444148540497,
      "learning_rate": 3.5310978301694095e-06,
      "loss": 0.0102,
      "step": 1983480
    },
    {
      "epoch": 3.246041253444879,
      "grad_norm": 0.400436133146286,
      "learning_rate": 3.5310319379558922e-06,
      "loss": 0.0086,
      "step": 1983500
    },
    {
      "epoch": 3.246073983883532,
      "grad_norm": 0.25260457396507263,
      "learning_rate": 3.530966045742375e-06,
      "loss": 0.0134,
      "step": 1983520
    },
    {
      "epoch": 3.2461067143221856,
      "grad_norm": 0.1557963639497757,
      "learning_rate": 3.5309001535288577e-06,
      "loss": 0.0112,
      "step": 1983540
    },
    {
      "epoch": 3.2461394447608387,
      "grad_norm": 0.05882776528596878,
      "learning_rate": 3.530834261315341e-06,
      "loss": 0.01,
      "step": 1983560
    },
    {
      "epoch": 3.246172175199492,
      "grad_norm": 0.25240784883499146,
      "learning_rate": 3.5307683691018236e-06,
      "loss": 0.0122,
      "step": 1983580
    },
    {
      "epoch": 3.2462049056381455,
      "grad_norm": 0.29235899448394775,
      "learning_rate": 3.5307024768883063e-06,
      "loss": 0.0143,
      "step": 1983600
    },
    {
      "epoch": 3.2462376360767986,
      "grad_norm": 0.535444974899292,
      "learning_rate": 3.530636584674789e-06,
      "loss": 0.0189,
      "step": 1983620
    },
    {
      "epoch": 3.246270366515452,
      "grad_norm": 0.24226102232933044,
      "learning_rate": 3.5305706924612722e-06,
      "loss": 0.008,
      "step": 1983640
    },
    {
      "epoch": 3.2463030969541053,
      "grad_norm": 0.2613679766654968,
      "learning_rate": 3.530504800247755e-06,
      "loss": 0.0111,
      "step": 1983660
    },
    {
      "epoch": 3.246335827392759,
      "grad_norm": 1.1324344873428345,
      "learning_rate": 3.5304389080342377e-06,
      "loss": 0.01,
      "step": 1983680
    },
    {
      "epoch": 3.246368557831412,
      "grad_norm": 0.32346007227897644,
      "learning_rate": 3.5303730158207205e-06,
      "loss": 0.0156,
      "step": 1983700
    },
    {
      "epoch": 3.2464012882700652,
      "grad_norm": 0.21908150613307953,
      "learning_rate": 3.530307123607203e-06,
      "loss": 0.0075,
      "step": 1983720
    },
    {
      "epoch": 3.246434018708719,
      "grad_norm": 0.13668444752693176,
      "learning_rate": 3.5302412313936864e-06,
      "loss": 0.0111,
      "step": 1983740
    },
    {
      "epoch": 3.246466749147372,
      "grad_norm": 0.6346419453620911,
      "learning_rate": 3.5301753391801695e-06,
      "loss": 0.0148,
      "step": 1983760
    },
    {
      "epoch": 3.2464994795860256,
      "grad_norm": 0.25747066736221313,
      "learning_rate": 3.5301094469666527e-06,
      "loss": 0.0089,
      "step": 1983780
    },
    {
      "epoch": 3.2465322100246787,
      "grad_norm": 0.16215026378631592,
      "learning_rate": 3.5300435547531354e-06,
      "loss": 0.012,
      "step": 1983800
    },
    {
      "epoch": 3.246564940463332,
      "grad_norm": 0.16530050337314606,
      "learning_rate": 3.529977662539618e-06,
      "loss": 0.0082,
      "step": 1983820
    },
    {
      "epoch": 3.2465976709019855,
      "grad_norm": 0.4023439288139343,
      "learning_rate": 3.529911770326101e-06,
      "loss": 0.0123,
      "step": 1983840
    },
    {
      "epoch": 3.2466304013406386,
      "grad_norm": 0.13611100614070892,
      "learning_rate": 3.5298458781125836e-06,
      "loss": 0.0084,
      "step": 1983860
    },
    {
      "epoch": 3.246663131779292,
      "grad_norm": 0.11164452135562897,
      "learning_rate": 3.529779985899067e-06,
      "loss": 0.0111,
      "step": 1983880
    },
    {
      "epoch": 3.2466958622179454,
      "grad_norm": 0.19436784088611603,
      "learning_rate": 3.5297140936855495e-06,
      "loss": 0.0131,
      "step": 1983900
    },
    {
      "epoch": 3.246728592656599,
      "grad_norm": 0.13463027775287628,
      "learning_rate": 3.5296482014720323e-06,
      "loss": 0.0174,
      "step": 1983920
    },
    {
      "epoch": 3.246761323095252,
      "grad_norm": 0.2707030773162842,
      "learning_rate": 3.529582309258515e-06,
      "loss": 0.0071,
      "step": 1983940
    },
    {
      "epoch": 3.2467940535339053,
      "grad_norm": 0.21934407949447632,
      "learning_rate": 3.529516417044998e-06,
      "loss": 0.0164,
      "step": 1983960
    },
    {
      "epoch": 3.246826783972559,
      "grad_norm": 0.20715922117233276,
      "learning_rate": 3.529450524831481e-06,
      "loss": 0.0076,
      "step": 1983980
    },
    {
      "epoch": 3.246859514411212,
      "grad_norm": 0.24477370083332062,
      "learning_rate": 3.5293846326179637e-06,
      "loss": 0.0119,
      "step": 1984000
    },
    {
      "epoch": 3.2468922448498656,
      "grad_norm": 0.6039751172065735,
      "learning_rate": 3.5293187404044464e-06,
      "loss": 0.0127,
      "step": 1984020
    },
    {
      "epoch": 3.2469249752885188,
      "grad_norm": 0.6412703394889832,
      "learning_rate": 3.5292528481909296e-06,
      "loss": 0.0098,
      "step": 1984040
    },
    {
      "epoch": 3.2469577057271723,
      "grad_norm": 0.8182045221328735,
      "learning_rate": 3.5291869559774123e-06,
      "loss": 0.0106,
      "step": 1984060
    },
    {
      "epoch": 3.2469904361658255,
      "grad_norm": 0.34521812200546265,
      "learning_rate": 3.529121063763895e-06,
      "loss": 0.0155,
      "step": 1984080
    },
    {
      "epoch": 3.2470231666044787,
      "grad_norm": 0.56876140832901,
      "learning_rate": 3.529055171550378e-06,
      "loss": 0.0198,
      "step": 1984100
    },
    {
      "epoch": 3.2470558970431322,
      "grad_norm": 0.03857123479247093,
      "learning_rate": 3.5289892793368614e-06,
      "loss": 0.0133,
      "step": 1984120
    },
    {
      "epoch": 3.2470886274817854,
      "grad_norm": 0.18366724252700806,
      "learning_rate": 3.528923387123344e-06,
      "loss": 0.0089,
      "step": 1984140
    },
    {
      "epoch": 3.247121357920439,
      "grad_norm": 0.03425288572907448,
      "learning_rate": 3.528857494909827e-06,
      "loss": 0.0163,
      "step": 1984160
    },
    {
      "epoch": 3.247154088359092,
      "grad_norm": 0.5949863195419312,
      "learning_rate": 3.52879160269631e-06,
      "loss": 0.0126,
      "step": 1984180
    },
    {
      "epoch": 3.2471868187977457,
      "grad_norm": 0.27222368121147156,
      "learning_rate": 3.5287257104827928e-06,
      "loss": 0.0105,
      "step": 1984200
    },
    {
      "epoch": 3.247219549236399,
      "grad_norm": 0.45681238174438477,
      "learning_rate": 3.5286598182692755e-06,
      "loss": 0.0115,
      "step": 1984220
    },
    {
      "epoch": 3.247252279675052,
      "grad_norm": 0.21873673796653748,
      "learning_rate": 3.5285939260557582e-06,
      "loss": 0.012,
      "step": 1984240
    },
    {
      "epoch": 3.2472850101137056,
      "grad_norm": 0.3610958158969879,
      "learning_rate": 3.528528033842241e-06,
      "loss": 0.0101,
      "step": 1984260
    },
    {
      "epoch": 3.247317740552359,
      "grad_norm": 0.11551815271377563,
      "learning_rate": 3.528462141628724e-06,
      "loss": 0.007,
      "step": 1984280
    },
    {
      "epoch": 3.2473504709910124,
      "grad_norm": 0.2998679578304291,
      "learning_rate": 3.528396249415207e-06,
      "loss": 0.0113,
      "step": 1984300
    },
    {
      "epoch": 3.2473832014296655,
      "grad_norm": 0.2816184461116791,
      "learning_rate": 3.5283303572016896e-06,
      "loss": 0.0093,
      "step": 1984320
    },
    {
      "epoch": 3.2474159318683187,
      "grad_norm": 0.21401867270469666,
      "learning_rate": 3.5282644649881724e-06,
      "loss": 0.0102,
      "step": 1984340
    },
    {
      "epoch": 3.2474486623069723,
      "grad_norm": 0.538307249546051,
      "learning_rate": 3.5281985727746555e-06,
      "loss": 0.0145,
      "step": 1984360
    },
    {
      "epoch": 3.2474813927456254,
      "grad_norm": 0.4123006761074066,
      "learning_rate": 3.5281326805611383e-06,
      "loss": 0.0102,
      "step": 1984380
    },
    {
      "epoch": 3.247514123184279,
      "grad_norm": 0.21804045140743256,
      "learning_rate": 3.528066788347621e-06,
      "loss": 0.0083,
      "step": 1984400
    },
    {
      "epoch": 3.247546853622932,
      "grad_norm": 0.6891734600067139,
      "learning_rate": 3.5280008961341037e-06,
      "loss": 0.0204,
      "step": 1984420
    },
    {
      "epoch": 3.2475795840615858,
      "grad_norm": 0.2703191637992859,
      "learning_rate": 3.527935003920587e-06,
      "loss": 0.0144,
      "step": 1984440
    },
    {
      "epoch": 3.247612314500239,
      "grad_norm": 0.8514825105667114,
      "learning_rate": 3.52786911170707e-06,
      "loss": 0.0172,
      "step": 1984460
    },
    {
      "epoch": 3.247645044938892,
      "grad_norm": 0.3929181396961212,
      "learning_rate": 3.527803219493553e-06,
      "loss": 0.0139,
      "step": 1984480
    },
    {
      "epoch": 3.2476777753775457,
      "grad_norm": 0.11501537263393402,
      "learning_rate": 3.527737327280036e-06,
      "loss": 0.0111,
      "step": 1984500
    },
    {
      "epoch": 3.247710505816199,
      "grad_norm": 0.1455489844083786,
      "learning_rate": 3.5276714350665187e-06,
      "loss": 0.0121,
      "step": 1984520
    },
    {
      "epoch": 3.2477432362548524,
      "grad_norm": 0.19364172220230103,
      "learning_rate": 3.5276055428530015e-06,
      "loss": 0.0104,
      "step": 1984540
    },
    {
      "epoch": 3.2477759666935055,
      "grad_norm": 0.493063360452652,
      "learning_rate": 3.527539650639484e-06,
      "loss": 0.0139,
      "step": 1984560
    },
    {
      "epoch": 3.247808697132159,
      "grad_norm": 0.1739557832479477,
      "learning_rate": 3.5274737584259674e-06,
      "loss": 0.0135,
      "step": 1984580
    },
    {
      "epoch": 3.2478414275708123,
      "grad_norm": 0.18981391191482544,
      "learning_rate": 3.52740786621245e-06,
      "loss": 0.0074,
      "step": 1984600
    },
    {
      "epoch": 3.2478741580094654,
      "grad_norm": 0.149070143699646,
      "learning_rate": 3.527341973998933e-06,
      "loss": 0.0093,
      "step": 1984620
    },
    {
      "epoch": 3.247906888448119,
      "grad_norm": 0.12368715554475784,
      "learning_rate": 3.5272760817854156e-06,
      "loss": 0.0083,
      "step": 1984640
    },
    {
      "epoch": 3.247939618886772,
      "grad_norm": 0.39545539021492004,
      "learning_rate": 3.5272101895718987e-06,
      "loss": 0.0149,
      "step": 1984660
    },
    {
      "epoch": 3.247972349325426,
      "grad_norm": 0.1298706829547882,
      "learning_rate": 3.5271442973583815e-06,
      "loss": 0.0089,
      "step": 1984680
    },
    {
      "epoch": 3.248005079764079,
      "grad_norm": 0.41809239983558655,
      "learning_rate": 3.5270784051448642e-06,
      "loss": 0.0134,
      "step": 1984700
    },
    {
      "epoch": 3.2480378102027325,
      "grad_norm": 0.34117937088012695,
      "learning_rate": 3.527012512931347e-06,
      "loss": 0.0094,
      "step": 1984720
    },
    {
      "epoch": 3.2480705406413857,
      "grad_norm": 0.20975135266780853,
      "learning_rate": 3.5269466207178297e-06,
      "loss": 0.0091,
      "step": 1984740
    },
    {
      "epoch": 3.248103271080039,
      "grad_norm": 0.09880033880472183,
      "learning_rate": 3.526880728504313e-06,
      "loss": 0.0068,
      "step": 1984760
    },
    {
      "epoch": 3.2481360015186924,
      "grad_norm": 0.17699047923088074,
      "learning_rate": 3.5268148362907956e-06,
      "loss": 0.016,
      "step": 1984780
    },
    {
      "epoch": 3.2481687319573456,
      "grad_norm": 0.5898066163063049,
      "learning_rate": 3.5267489440772783e-06,
      "loss": 0.0174,
      "step": 1984800
    },
    {
      "epoch": 3.248201462395999,
      "grad_norm": 0.7005472183227539,
      "learning_rate": 3.526683051863762e-06,
      "loss": 0.0182,
      "step": 1984820
    },
    {
      "epoch": 3.2482341928346523,
      "grad_norm": 0.20650224387645721,
      "learning_rate": 3.5266171596502447e-06,
      "loss": 0.0088,
      "step": 1984840
    },
    {
      "epoch": 3.248266923273306,
      "grad_norm": 0.42390644550323486,
      "learning_rate": 3.5265512674367274e-06,
      "loss": 0.0079,
      "step": 1984860
    },
    {
      "epoch": 3.248299653711959,
      "grad_norm": 0.24713529646396637,
      "learning_rate": 3.52648537522321e-06,
      "loss": 0.0086,
      "step": 1984880
    },
    {
      "epoch": 3.248332384150612,
      "grad_norm": 0.14436621963977814,
      "learning_rate": 3.5264194830096933e-06,
      "loss": 0.0089,
      "step": 1984900
    },
    {
      "epoch": 3.248365114589266,
      "grad_norm": 0.05943938344717026,
      "learning_rate": 3.526353590796176e-06,
      "loss": 0.0139,
      "step": 1984920
    },
    {
      "epoch": 3.248397845027919,
      "grad_norm": 0.19076931476593018,
      "learning_rate": 3.526287698582659e-06,
      "loss": 0.0158,
      "step": 1984940
    },
    {
      "epoch": 3.2484305754665725,
      "grad_norm": 0.7203732132911682,
      "learning_rate": 3.5262218063691415e-06,
      "loss": 0.016,
      "step": 1984960
    },
    {
      "epoch": 3.2484633059052257,
      "grad_norm": 0.373222291469574,
      "learning_rate": 3.5261559141556247e-06,
      "loss": 0.008,
      "step": 1984980
    },
    {
      "epoch": 3.2484960363438793,
      "grad_norm": 0.3877713084220886,
      "learning_rate": 3.5260900219421074e-06,
      "loss": 0.0096,
      "step": 1985000
    },
    {
      "epoch": 3.2485287667825324,
      "grad_norm": 0.22327840328216553,
      "learning_rate": 3.52602412972859e-06,
      "loss": 0.0074,
      "step": 1985020
    },
    {
      "epoch": 3.2485614972211856,
      "grad_norm": 0.2175687700510025,
      "learning_rate": 3.525958237515073e-06,
      "loss": 0.0083,
      "step": 1985040
    },
    {
      "epoch": 3.248594227659839,
      "grad_norm": 0.6670904159545898,
      "learning_rate": 3.525892345301556e-06,
      "loss": 0.0155,
      "step": 1985060
    },
    {
      "epoch": 3.2486269580984923,
      "grad_norm": 0.2392609566450119,
      "learning_rate": 3.525826453088039e-06,
      "loss": 0.0097,
      "step": 1985080
    },
    {
      "epoch": 3.248659688537146,
      "grad_norm": 0.7052018642425537,
      "learning_rate": 3.5257605608745216e-06,
      "loss": 0.0111,
      "step": 1985100
    },
    {
      "epoch": 3.248692418975799,
      "grad_norm": 0.3218920826911926,
      "learning_rate": 3.5256946686610043e-06,
      "loss": 0.0126,
      "step": 1985120
    },
    {
      "epoch": 3.2487251494144527,
      "grad_norm": 0.07763872295618057,
      "learning_rate": 3.525628776447487e-06,
      "loss": 0.0111,
      "step": 1985140
    },
    {
      "epoch": 3.248757879853106,
      "grad_norm": 0.23333922028541565,
      "learning_rate": 3.52556288423397e-06,
      "loss": 0.0075,
      "step": 1985160
    },
    {
      "epoch": 3.248790610291759,
      "grad_norm": 0.3146587014198303,
      "learning_rate": 3.5254969920204534e-06,
      "loss": 0.0113,
      "step": 1985180
    },
    {
      "epoch": 3.2488233407304126,
      "grad_norm": 0.13147450983524323,
      "learning_rate": 3.5254310998069365e-06,
      "loss": 0.007,
      "step": 1985200
    },
    {
      "epoch": 3.2488560711690657,
      "grad_norm": 0.03394973650574684,
      "learning_rate": 3.5253652075934193e-06,
      "loss": 0.009,
      "step": 1985220
    },
    {
      "epoch": 3.2488888016077193,
      "grad_norm": 0.5300816893577576,
      "learning_rate": 3.525299315379902e-06,
      "loss": 0.0092,
      "step": 1985240
    },
    {
      "epoch": 3.2489215320463725,
      "grad_norm": 0.30601343512535095,
      "learning_rate": 3.5252334231663847e-06,
      "loss": 0.0109,
      "step": 1985260
    },
    {
      "epoch": 3.248954262485026,
      "grad_norm": 0.16557876765727997,
      "learning_rate": 3.5251675309528675e-06,
      "loss": 0.0178,
      "step": 1985280
    },
    {
      "epoch": 3.248986992923679,
      "grad_norm": 1.252563714981079,
      "learning_rate": 3.5251016387393506e-06,
      "loss": 0.015,
      "step": 1985300
    },
    {
      "epoch": 3.2490197233623324,
      "grad_norm": 0.10212031751871109,
      "learning_rate": 3.5250357465258334e-06,
      "loss": 0.0141,
      "step": 1985320
    },
    {
      "epoch": 3.249052453800986,
      "grad_norm": 0.6407527327537537,
      "learning_rate": 3.524969854312316e-06,
      "loss": 0.012,
      "step": 1985340
    },
    {
      "epoch": 3.249085184239639,
      "grad_norm": 0.30299144983291626,
      "learning_rate": 3.524903962098799e-06,
      "loss": 0.0105,
      "step": 1985360
    },
    {
      "epoch": 3.2491179146782927,
      "grad_norm": 0.35215598344802856,
      "learning_rate": 3.524838069885282e-06,
      "loss": 0.0137,
      "step": 1985380
    },
    {
      "epoch": 3.249150645116946,
      "grad_norm": 0.16654345393180847,
      "learning_rate": 3.5247721776717648e-06,
      "loss": 0.0093,
      "step": 1985400
    },
    {
      "epoch": 3.249183375555599,
      "grad_norm": 0.2196573168039322,
      "learning_rate": 3.5247062854582475e-06,
      "loss": 0.0078,
      "step": 1985420
    },
    {
      "epoch": 3.2492161059942526,
      "grad_norm": 0.16456738114356995,
      "learning_rate": 3.5246403932447302e-06,
      "loss": 0.0097,
      "step": 1985440
    },
    {
      "epoch": 3.2492488364329057,
      "grad_norm": 0.17484401166439056,
      "learning_rate": 3.5245745010312134e-06,
      "loss": 0.0107,
      "step": 1985460
    },
    {
      "epoch": 3.2492815668715593,
      "grad_norm": 0.17768387496471405,
      "learning_rate": 3.524508608817696e-06,
      "loss": 0.0099,
      "step": 1985480
    },
    {
      "epoch": 3.2493142973102125,
      "grad_norm": 0.24211831390857697,
      "learning_rate": 3.524442716604179e-06,
      "loss": 0.0119,
      "step": 1985500
    },
    {
      "epoch": 3.249347027748866,
      "grad_norm": 0.25750765204429626,
      "learning_rate": 3.5243768243906625e-06,
      "loss": 0.0139,
      "step": 1985520
    },
    {
      "epoch": 3.2493797581875192,
      "grad_norm": 0.22419404983520508,
      "learning_rate": 3.5243109321771452e-06,
      "loss": 0.0085,
      "step": 1985540
    },
    {
      "epoch": 3.2494124886261724,
      "grad_norm": 0.06748738884925842,
      "learning_rate": 3.524245039963628e-06,
      "loss": 0.0077,
      "step": 1985560
    },
    {
      "epoch": 3.249445219064826,
      "grad_norm": 0.19749611616134644,
      "learning_rate": 3.5241791477501107e-06,
      "loss": 0.0093,
      "step": 1985580
    },
    {
      "epoch": 3.249477949503479,
      "grad_norm": 0.08001768589019775,
      "learning_rate": 3.524113255536594e-06,
      "loss": 0.0092,
      "step": 1985600
    },
    {
      "epoch": 3.2495106799421327,
      "grad_norm": 0.26148033142089844,
      "learning_rate": 3.5240473633230766e-06,
      "loss": 0.0123,
      "step": 1985620
    },
    {
      "epoch": 3.249543410380786,
      "grad_norm": 0.30710723996162415,
      "learning_rate": 3.5239814711095593e-06,
      "loss": 0.01,
      "step": 1985640
    },
    {
      "epoch": 3.2495761408194395,
      "grad_norm": 0.27390149235725403,
      "learning_rate": 3.523915578896042e-06,
      "loss": 0.0122,
      "step": 1985660
    },
    {
      "epoch": 3.2496088712580926,
      "grad_norm": 0.2042195200920105,
      "learning_rate": 3.523849686682525e-06,
      "loss": 0.0111,
      "step": 1985680
    },
    {
      "epoch": 3.2496416016967458,
      "grad_norm": 0.31369730830192566,
      "learning_rate": 3.523783794469008e-06,
      "loss": 0.0078,
      "step": 1985700
    },
    {
      "epoch": 3.2496743321353994,
      "grad_norm": 0.10861530900001526,
      "learning_rate": 3.5237179022554907e-06,
      "loss": 0.0167,
      "step": 1985720
    },
    {
      "epoch": 3.2497070625740525,
      "grad_norm": 0.5321092009544373,
      "learning_rate": 3.5236520100419735e-06,
      "loss": 0.0128,
      "step": 1985740
    },
    {
      "epoch": 3.249739793012706,
      "grad_norm": 0.4547572135925293,
      "learning_rate": 3.523586117828456e-06,
      "loss": 0.0126,
      "step": 1985760
    },
    {
      "epoch": 3.2497725234513593,
      "grad_norm": 0.1752498745918274,
      "learning_rate": 3.5235202256149394e-06,
      "loss": 0.0093,
      "step": 1985780
    },
    {
      "epoch": 3.2498052538900124,
      "grad_norm": 0.6283344030380249,
      "learning_rate": 3.523454333401422e-06,
      "loss": 0.0135,
      "step": 1985800
    },
    {
      "epoch": 3.249837984328666,
      "grad_norm": 0.12316294759511948,
      "learning_rate": 3.523388441187905e-06,
      "loss": 0.0098,
      "step": 1985820
    },
    {
      "epoch": 3.249870714767319,
      "grad_norm": 0.18895146250724792,
      "learning_rate": 3.5233225489743876e-06,
      "loss": 0.0093,
      "step": 1985840
    },
    {
      "epoch": 3.2499034452059727,
      "grad_norm": 1.3561288118362427,
      "learning_rate": 3.5232566567608707e-06,
      "loss": 0.0097,
      "step": 1985860
    },
    {
      "epoch": 3.249936175644626,
      "grad_norm": 0.43963709473609924,
      "learning_rate": 3.523190764547354e-06,
      "loss": 0.0069,
      "step": 1985880
    },
    {
      "epoch": 3.2499689060832795,
      "grad_norm": 0.7830860018730164,
      "learning_rate": 3.5231248723338367e-06,
      "loss": 0.011,
      "step": 1985900
    },
    {
      "epoch": 3.2500016365219326,
      "grad_norm": 0.12229547649621964,
      "learning_rate": 3.52305898012032e-06,
      "loss": 0.0175,
      "step": 1985920
    },
    {
      "epoch": 3.250034366960586,
      "grad_norm": 0.2443300038576126,
      "learning_rate": 3.5229930879068026e-06,
      "loss": 0.0109,
      "step": 1985940
    },
    {
      "epoch": 3.2500670973992394,
      "grad_norm": 0.9336799383163452,
      "learning_rate": 3.5229271956932853e-06,
      "loss": 0.0112,
      "step": 1985960
    },
    {
      "epoch": 3.2500998278378925,
      "grad_norm": 0.3197353184223175,
      "learning_rate": 3.522861303479768e-06,
      "loss": 0.0093,
      "step": 1985980
    },
    {
      "epoch": 3.250132558276546,
      "grad_norm": 0.22984136641025543,
      "learning_rate": 3.522795411266251e-06,
      "loss": 0.0077,
      "step": 1986000
    },
    {
      "epoch": 3.2501652887151993,
      "grad_norm": 0.629848301410675,
      "learning_rate": 3.522729519052734e-06,
      "loss": 0.0154,
      "step": 1986020
    },
    {
      "epoch": 3.250198019153853,
      "grad_norm": 0.1136171966791153,
      "learning_rate": 3.5226636268392167e-06,
      "loss": 0.0177,
      "step": 1986040
    },
    {
      "epoch": 3.250230749592506,
      "grad_norm": 0.09626401215791702,
      "learning_rate": 3.5225977346256994e-06,
      "loss": 0.0137,
      "step": 1986060
    },
    {
      "epoch": 3.250263480031159,
      "grad_norm": 0.8465272784233093,
      "learning_rate": 3.5225318424121826e-06,
      "loss": 0.0159,
      "step": 1986080
    },
    {
      "epoch": 3.2502962104698128,
      "grad_norm": 0.41973888874053955,
      "learning_rate": 3.5224659501986653e-06,
      "loss": 0.011,
      "step": 1986100
    },
    {
      "epoch": 3.250328940908466,
      "grad_norm": 0.44592273235321045,
      "learning_rate": 3.522400057985148e-06,
      "loss": 0.0121,
      "step": 1986120
    },
    {
      "epoch": 3.2503616713471195,
      "grad_norm": 0.36803337931632996,
      "learning_rate": 3.522334165771631e-06,
      "loss": 0.0078,
      "step": 1986140
    },
    {
      "epoch": 3.2503944017857727,
      "grad_norm": 0.12148214876651764,
      "learning_rate": 3.5222682735581135e-06,
      "loss": 0.0113,
      "step": 1986160
    },
    {
      "epoch": 3.2504271322244263,
      "grad_norm": 0.4930921196937561,
      "learning_rate": 3.5222023813445967e-06,
      "loss": 0.0102,
      "step": 1986180
    },
    {
      "epoch": 3.2504598626630794,
      "grad_norm": 0.5574247241020203,
      "learning_rate": 3.5221364891310794e-06,
      "loss": 0.0099,
      "step": 1986200
    },
    {
      "epoch": 3.2504925931017326,
      "grad_norm": 0.32918238639831543,
      "learning_rate": 3.5220705969175626e-06,
      "loss": 0.0166,
      "step": 1986220
    },
    {
      "epoch": 3.250525323540386,
      "grad_norm": 0.3744732439517975,
      "learning_rate": 3.5220047047040458e-06,
      "loss": 0.01,
      "step": 1986240
    },
    {
      "epoch": 3.2505580539790393,
      "grad_norm": 0.1705554574728012,
      "learning_rate": 3.5219388124905285e-06,
      "loss": 0.0156,
      "step": 1986260
    },
    {
      "epoch": 3.250590784417693,
      "grad_norm": 0.3961333632469177,
      "learning_rate": 3.5218729202770112e-06,
      "loss": 0.0128,
      "step": 1986280
    },
    {
      "epoch": 3.250623514856346,
      "grad_norm": 0.5574208498001099,
      "learning_rate": 3.521807028063494e-06,
      "loss": 0.006,
      "step": 1986300
    },
    {
      "epoch": 3.2506562452949996,
      "grad_norm": 0.23478631675243378,
      "learning_rate": 3.521741135849977e-06,
      "loss": 0.0106,
      "step": 1986320
    },
    {
      "epoch": 3.250688975733653,
      "grad_norm": 0.1530274748802185,
      "learning_rate": 3.52167524363646e-06,
      "loss": 0.0127,
      "step": 1986340
    },
    {
      "epoch": 3.250721706172306,
      "grad_norm": 0.4253329038619995,
      "learning_rate": 3.5216093514229426e-06,
      "loss": 0.0142,
      "step": 1986360
    },
    {
      "epoch": 3.2507544366109595,
      "grad_norm": 0.2741680145263672,
      "learning_rate": 3.5215434592094254e-06,
      "loss": 0.0171,
      "step": 1986380
    },
    {
      "epoch": 3.2507871670496127,
      "grad_norm": 0.22450582683086395,
      "learning_rate": 3.5214775669959085e-06,
      "loss": 0.0115,
      "step": 1986400
    },
    {
      "epoch": 3.2508198974882663,
      "grad_norm": 0.18688590824604034,
      "learning_rate": 3.5214116747823913e-06,
      "loss": 0.0143,
      "step": 1986420
    },
    {
      "epoch": 3.2508526279269194,
      "grad_norm": 0.4134286344051361,
      "learning_rate": 3.521345782568874e-06,
      "loss": 0.0117,
      "step": 1986440
    },
    {
      "epoch": 3.250885358365573,
      "grad_norm": 0.6909608840942383,
      "learning_rate": 3.5212798903553568e-06,
      "loss": 0.018,
      "step": 1986460
    },
    {
      "epoch": 3.250918088804226,
      "grad_norm": 0.13169464468955994,
      "learning_rate": 3.52121399814184e-06,
      "loss": 0.0158,
      "step": 1986480
    },
    {
      "epoch": 3.2509508192428793,
      "grad_norm": 1.7914515733718872,
      "learning_rate": 3.5211481059283227e-06,
      "loss": 0.0122,
      "step": 1986500
    },
    {
      "epoch": 3.250983549681533,
      "grad_norm": 0.8786922097206116,
      "learning_rate": 3.5210822137148054e-06,
      "loss": 0.0161,
      "step": 1986520
    },
    {
      "epoch": 3.251016280120186,
      "grad_norm": 0.3830643892288208,
      "learning_rate": 3.521016321501288e-06,
      "loss": 0.0141,
      "step": 1986540
    },
    {
      "epoch": 3.2510490105588397,
      "grad_norm": 0.3260377049446106,
      "learning_rate": 3.5209504292877713e-06,
      "loss": 0.0098,
      "step": 1986560
    },
    {
      "epoch": 3.251081740997493,
      "grad_norm": 0.35603317618370056,
      "learning_rate": 3.5208845370742545e-06,
      "loss": 0.0172,
      "step": 1986580
    },
    {
      "epoch": 3.2511144714361464,
      "grad_norm": 0.1901927888393402,
      "learning_rate": 3.520818644860737e-06,
      "loss": 0.0098,
      "step": 1986600
    },
    {
      "epoch": 3.2511472018747996,
      "grad_norm": 0.12151406705379486,
      "learning_rate": 3.5207527526472204e-06,
      "loss": 0.0102,
      "step": 1986620
    },
    {
      "epoch": 3.2511799323134527,
      "grad_norm": 0.18682414293289185,
      "learning_rate": 3.520686860433703e-06,
      "loss": 0.0116,
      "step": 1986640
    },
    {
      "epoch": 3.2512126627521063,
      "grad_norm": 0.29916948080062866,
      "learning_rate": 3.520620968220186e-06,
      "loss": 0.0158,
      "step": 1986660
    },
    {
      "epoch": 3.2512453931907594,
      "grad_norm": 0.9401682615280151,
      "learning_rate": 3.5205550760066686e-06,
      "loss": 0.0119,
      "step": 1986680
    },
    {
      "epoch": 3.251278123629413,
      "grad_norm": 0.5911813378334045,
      "learning_rate": 3.5204891837931513e-06,
      "loss": 0.0112,
      "step": 1986700
    },
    {
      "epoch": 3.251310854068066,
      "grad_norm": 0.13369478285312653,
      "learning_rate": 3.5204232915796345e-06,
      "loss": 0.0081,
      "step": 1986720
    },
    {
      "epoch": 3.25134358450672,
      "grad_norm": 0.18952509760856628,
      "learning_rate": 3.5203573993661172e-06,
      "loss": 0.0074,
      "step": 1986740
    },
    {
      "epoch": 3.251376314945373,
      "grad_norm": 0.6333802938461304,
      "learning_rate": 3.5202915071526e-06,
      "loss": 0.0088,
      "step": 1986760
    },
    {
      "epoch": 3.251409045384026,
      "grad_norm": 0.20696483552455902,
      "learning_rate": 3.5202256149390827e-06,
      "loss": 0.012,
      "step": 1986780
    },
    {
      "epoch": 3.2514417758226797,
      "grad_norm": 0.5370293855667114,
      "learning_rate": 3.520159722725566e-06,
      "loss": 0.0097,
      "step": 1986800
    },
    {
      "epoch": 3.251474506261333,
      "grad_norm": 0.8383751511573792,
      "learning_rate": 3.5200938305120486e-06,
      "loss": 0.0204,
      "step": 1986820
    },
    {
      "epoch": 3.2515072366999864,
      "grad_norm": 0.4343893229961395,
      "learning_rate": 3.5200279382985313e-06,
      "loss": 0.0094,
      "step": 1986840
    },
    {
      "epoch": 3.2515399671386396,
      "grad_norm": 4.435855865478516,
      "learning_rate": 3.519962046085014e-06,
      "loss": 0.0122,
      "step": 1986860
    },
    {
      "epoch": 3.251572697577293,
      "grad_norm": 0.17658492922782898,
      "learning_rate": 3.5198961538714973e-06,
      "loss": 0.0082,
      "step": 1986880
    },
    {
      "epoch": 3.2516054280159463,
      "grad_norm": 0.2742083668708801,
      "learning_rate": 3.51983026165798e-06,
      "loss": 0.0095,
      "step": 1986900
    },
    {
      "epoch": 3.2516381584545995,
      "grad_norm": 0.2689216136932373,
      "learning_rate": 3.5197643694444627e-06,
      "loss": 0.0121,
      "step": 1986920
    },
    {
      "epoch": 3.251670888893253,
      "grad_norm": 0.20395834743976593,
      "learning_rate": 3.5196984772309463e-06,
      "loss": 0.0091,
      "step": 1986940
    },
    {
      "epoch": 3.251703619331906,
      "grad_norm": 0.33876481652259827,
      "learning_rate": 3.519632585017429e-06,
      "loss": 0.0088,
      "step": 1986960
    },
    {
      "epoch": 3.25173634977056,
      "grad_norm": 0.3366509675979614,
      "learning_rate": 3.519566692803912e-06,
      "loss": 0.0112,
      "step": 1986980
    },
    {
      "epoch": 3.251769080209213,
      "grad_norm": 0.4811306893825531,
      "learning_rate": 3.5195008005903945e-06,
      "loss": 0.0114,
      "step": 1987000
    },
    {
      "epoch": 3.251801810647866,
      "grad_norm": 0.913037121295929,
      "learning_rate": 3.5194349083768777e-06,
      "loss": 0.0163,
      "step": 1987020
    },
    {
      "epoch": 3.2518345410865197,
      "grad_norm": 0.3181779086589813,
      "learning_rate": 3.5193690161633604e-06,
      "loss": 0.0118,
      "step": 1987040
    },
    {
      "epoch": 3.251867271525173,
      "grad_norm": 1.5449738502502441,
      "learning_rate": 3.519303123949843e-06,
      "loss": 0.0112,
      "step": 1987060
    },
    {
      "epoch": 3.2519000019638264,
      "grad_norm": 0.07328910380601883,
      "learning_rate": 3.519237231736326e-06,
      "loss": 0.0088,
      "step": 1987080
    },
    {
      "epoch": 3.2519327324024796,
      "grad_norm": 0.33112820982933044,
      "learning_rate": 3.519171339522809e-06,
      "loss": 0.0091,
      "step": 1987100
    },
    {
      "epoch": 3.2519654628411327,
      "grad_norm": 0.18081608414649963,
      "learning_rate": 3.519105447309292e-06,
      "loss": 0.0096,
      "step": 1987120
    },
    {
      "epoch": 3.2519981932797863,
      "grad_norm": 0.18523919582366943,
      "learning_rate": 3.5190395550957746e-06,
      "loss": 0.014,
      "step": 1987140
    },
    {
      "epoch": 3.2520309237184395,
      "grad_norm": 0.1537148505449295,
      "learning_rate": 3.5189736628822573e-06,
      "loss": 0.0119,
      "step": 1987160
    },
    {
      "epoch": 3.252063654157093,
      "grad_norm": 0.38437405228614807,
      "learning_rate": 3.51890777066874e-06,
      "loss": 0.007,
      "step": 1987180
    },
    {
      "epoch": 3.2520963845957462,
      "grad_norm": 0.14087846875190735,
      "learning_rate": 3.518841878455223e-06,
      "loss": 0.0095,
      "step": 1987200
    },
    {
      "epoch": 3.2521291150344,
      "grad_norm": 0.25654786825180054,
      "learning_rate": 3.518775986241706e-06,
      "loss": 0.0145,
      "step": 1987220
    },
    {
      "epoch": 3.252161845473053,
      "grad_norm": 0.2883637547492981,
      "learning_rate": 3.5187100940281887e-06,
      "loss": 0.0101,
      "step": 1987240
    },
    {
      "epoch": 3.252194575911706,
      "grad_norm": 0.10377970337867737,
      "learning_rate": 3.5186442018146714e-06,
      "loss": 0.0098,
      "step": 1987260
    },
    {
      "epoch": 3.2522273063503597,
      "grad_norm": 0.3140406906604767,
      "learning_rate": 3.518578309601155e-06,
      "loss": 0.0156,
      "step": 1987280
    },
    {
      "epoch": 3.252260036789013,
      "grad_norm": 0.42459115386009216,
      "learning_rate": 3.5185124173876378e-06,
      "loss": 0.0075,
      "step": 1987300
    },
    {
      "epoch": 3.2522927672276665,
      "grad_norm": 0.30755913257598877,
      "learning_rate": 3.5184465251741205e-06,
      "loss": 0.0131,
      "step": 1987320
    },
    {
      "epoch": 3.2523254976663196,
      "grad_norm": 0.22139790654182434,
      "learning_rate": 3.5183806329606037e-06,
      "loss": 0.0139,
      "step": 1987340
    },
    {
      "epoch": 3.252358228104973,
      "grad_norm": 0.5033778548240662,
      "learning_rate": 3.5183147407470864e-06,
      "loss": 0.0116,
      "step": 1987360
    },
    {
      "epoch": 3.2523909585436264,
      "grad_norm": 0.27894407510757446,
      "learning_rate": 3.518248848533569e-06,
      "loss": 0.0154,
      "step": 1987380
    },
    {
      "epoch": 3.2524236889822795,
      "grad_norm": 0.4008443057537079,
      "learning_rate": 3.518182956320052e-06,
      "loss": 0.0117,
      "step": 1987400
    },
    {
      "epoch": 3.252456419420933,
      "grad_norm": 0.8089640736579895,
      "learning_rate": 3.518117064106535e-06,
      "loss": 0.017,
      "step": 1987420
    },
    {
      "epoch": 3.2524891498595863,
      "grad_norm": 1.2713521718978882,
      "learning_rate": 3.5180511718930178e-06,
      "loss": 0.0152,
      "step": 1987440
    },
    {
      "epoch": 3.25252188029824,
      "grad_norm": 0.12613065540790558,
      "learning_rate": 3.5179852796795005e-06,
      "loss": 0.0094,
      "step": 1987460
    },
    {
      "epoch": 3.252554610736893,
      "grad_norm": 0.333716481924057,
      "learning_rate": 3.5179193874659833e-06,
      "loss": 0.0092,
      "step": 1987480
    },
    {
      "epoch": 3.2525873411755466,
      "grad_norm": 0.5767207741737366,
      "learning_rate": 3.5178534952524664e-06,
      "loss": 0.0171,
      "step": 1987500
    },
    {
      "epoch": 3.2526200716141997,
      "grad_norm": 0.1088896244764328,
      "learning_rate": 3.517787603038949e-06,
      "loss": 0.0152,
      "step": 1987520
    },
    {
      "epoch": 3.252652802052853,
      "grad_norm": 0.22902226448059082,
      "learning_rate": 3.517721710825432e-06,
      "loss": 0.0119,
      "step": 1987540
    },
    {
      "epoch": 3.2526855324915065,
      "grad_norm": 0.1872025728225708,
      "learning_rate": 3.5176558186119146e-06,
      "loss": 0.009,
      "step": 1987560
    },
    {
      "epoch": 3.2527182629301596,
      "grad_norm": 0.28034695982933044,
      "learning_rate": 3.5175899263983974e-06,
      "loss": 0.0072,
      "step": 1987580
    },
    {
      "epoch": 3.2527509933688132,
      "grad_norm": 0.1866752803325653,
      "learning_rate": 3.5175240341848805e-06,
      "loss": 0.0098,
      "step": 1987600
    },
    {
      "epoch": 3.2527837238074664,
      "grad_norm": 0.6947690844535828,
      "learning_rate": 3.5174581419713633e-06,
      "loss": 0.0095,
      "step": 1987620
    },
    {
      "epoch": 3.25281645424612,
      "grad_norm": 0.13815709948539734,
      "learning_rate": 3.5173922497578464e-06,
      "loss": 0.0113,
      "step": 1987640
    },
    {
      "epoch": 3.252849184684773,
      "grad_norm": 0.5014504194259644,
      "learning_rate": 3.5173263575443296e-06,
      "loss": 0.0135,
      "step": 1987660
    },
    {
      "epoch": 3.2528819151234263,
      "grad_norm": 0.45607244968414307,
      "learning_rate": 3.5172604653308123e-06,
      "loss": 0.0076,
      "step": 1987680
    },
    {
      "epoch": 3.25291464556208,
      "grad_norm": 0.20284751057624817,
      "learning_rate": 3.517194573117295e-06,
      "loss": 0.0124,
      "step": 1987700
    },
    {
      "epoch": 3.252947376000733,
      "grad_norm": 0.20536310970783234,
      "learning_rate": 3.517128680903778e-06,
      "loss": 0.0114,
      "step": 1987720
    },
    {
      "epoch": 3.2529801064393866,
      "grad_norm": 0.35136881470680237,
      "learning_rate": 3.517062788690261e-06,
      "loss": 0.0107,
      "step": 1987740
    },
    {
      "epoch": 3.2530128368780398,
      "grad_norm": 0.16091051697731018,
      "learning_rate": 3.5169968964767437e-06,
      "loss": 0.0126,
      "step": 1987760
    },
    {
      "epoch": 3.2530455673166934,
      "grad_norm": 0.21123498678207397,
      "learning_rate": 3.5169310042632265e-06,
      "loss": 0.0103,
      "step": 1987780
    },
    {
      "epoch": 3.2530782977553465,
      "grad_norm": 0.24692058563232422,
      "learning_rate": 3.516865112049709e-06,
      "loss": 0.0104,
      "step": 1987800
    },
    {
      "epoch": 3.2531110281939997,
      "grad_norm": 0.28381651639938354,
      "learning_rate": 3.5167992198361924e-06,
      "loss": 0.0097,
      "step": 1987820
    },
    {
      "epoch": 3.2531437586326533,
      "grad_norm": 0.34602779150009155,
      "learning_rate": 3.516733327622675e-06,
      "loss": 0.0125,
      "step": 1987840
    },
    {
      "epoch": 3.2531764890713064,
      "grad_norm": 0.14097291231155396,
      "learning_rate": 3.516667435409158e-06,
      "loss": 0.0167,
      "step": 1987860
    },
    {
      "epoch": 3.25320921950996,
      "grad_norm": 0.7251092791557312,
      "learning_rate": 3.5166015431956406e-06,
      "loss": 0.0105,
      "step": 1987880
    },
    {
      "epoch": 3.253241949948613,
      "grad_norm": 0.27453309297561646,
      "learning_rate": 3.5165356509821238e-06,
      "loss": 0.011,
      "step": 1987900
    },
    {
      "epoch": 3.2532746803872667,
      "grad_norm": 0.3032790720462799,
      "learning_rate": 3.5164697587686065e-06,
      "loss": 0.0077,
      "step": 1987920
    },
    {
      "epoch": 3.25330741082592,
      "grad_norm": 0.35166245698928833,
      "learning_rate": 3.5164038665550892e-06,
      "loss": 0.012,
      "step": 1987940
    },
    {
      "epoch": 3.253340141264573,
      "grad_norm": 0.2392314225435257,
      "learning_rate": 3.516337974341572e-06,
      "loss": 0.0075,
      "step": 1987960
    },
    {
      "epoch": 3.2533728717032266,
      "grad_norm": 0.2751288414001465,
      "learning_rate": 3.5162720821280556e-06,
      "loss": 0.0153,
      "step": 1987980
    },
    {
      "epoch": 3.25340560214188,
      "grad_norm": 0.10400491207838058,
      "learning_rate": 3.5162061899145383e-06,
      "loss": 0.0148,
      "step": 1988000
    },
    {
      "epoch": 3.2534383325805334,
      "grad_norm": 0.6376839280128479,
      "learning_rate": 3.516140297701021e-06,
      "loss": 0.0097,
      "step": 1988020
    },
    {
      "epoch": 3.2534710630191865,
      "grad_norm": 0.305841863155365,
      "learning_rate": 3.516074405487504e-06,
      "loss": 0.0087,
      "step": 1988040
    },
    {
      "epoch": 3.25350379345784,
      "grad_norm": 0.7369772791862488,
      "learning_rate": 3.516008513273987e-06,
      "loss": 0.012,
      "step": 1988060
    },
    {
      "epoch": 3.2535365238964933,
      "grad_norm": 0.18752191960811615,
      "learning_rate": 3.5159426210604697e-06,
      "loss": 0.0132,
      "step": 1988080
    },
    {
      "epoch": 3.2535692543351464,
      "grad_norm": 0.631911039352417,
      "learning_rate": 3.5158767288469524e-06,
      "loss": 0.01,
      "step": 1988100
    },
    {
      "epoch": 3.2536019847738,
      "grad_norm": 0.4324752986431122,
      "learning_rate": 3.515810836633435e-06,
      "loss": 0.0146,
      "step": 1988120
    },
    {
      "epoch": 3.253634715212453,
      "grad_norm": 0.16448654234409332,
      "learning_rate": 3.5157449444199183e-06,
      "loss": 0.0112,
      "step": 1988140
    },
    {
      "epoch": 3.2536674456511068,
      "grad_norm": 1.0197252035140991,
      "learning_rate": 3.515679052206401e-06,
      "loss": 0.0137,
      "step": 1988160
    },
    {
      "epoch": 3.25370017608976,
      "grad_norm": 0.39480826258659363,
      "learning_rate": 3.515613159992884e-06,
      "loss": 0.0115,
      "step": 1988180
    },
    {
      "epoch": 3.2537329065284135,
      "grad_norm": 0.35613518953323364,
      "learning_rate": 3.5155472677793665e-06,
      "loss": 0.0128,
      "step": 1988200
    },
    {
      "epoch": 3.2537656369670667,
      "grad_norm": 0.3696866035461426,
      "learning_rate": 3.5154813755658497e-06,
      "loss": 0.0096,
      "step": 1988220
    },
    {
      "epoch": 3.25379836740572,
      "grad_norm": 0.20285460352897644,
      "learning_rate": 3.5154154833523324e-06,
      "loss": 0.0111,
      "step": 1988240
    },
    {
      "epoch": 3.2538310978443734,
      "grad_norm": 0.276691198348999,
      "learning_rate": 3.515349591138815e-06,
      "loss": 0.0105,
      "step": 1988260
    },
    {
      "epoch": 3.2538638282830266,
      "grad_norm": 0.21706622838974,
      "learning_rate": 3.515283698925298e-06,
      "loss": 0.0123,
      "step": 1988280
    },
    {
      "epoch": 3.25389655872168,
      "grad_norm": 0.17522303760051727,
      "learning_rate": 3.515217806711781e-06,
      "loss": 0.0128,
      "step": 1988300
    },
    {
      "epoch": 3.2539292891603333,
      "grad_norm": 0.029025493189692497,
      "learning_rate": 3.515151914498264e-06,
      "loss": 0.0088,
      "step": 1988320
    },
    {
      "epoch": 3.253962019598987,
      "grad_norm": 0.4584561288356781,
      "learning_rate": 3.515086022284747e-06,
      "loss": 0.0096,
      "step": 1988340
    },
    {
      "epoch": 3.25399475003764,
      "grad_norm": 0.2738473117351532,
      "learning_rate": 3.51502013007123e-06,
      "loss": 0.0178,
      "step": 1988360
    },
    {
      "epoch": 3.254027480476293,
      "grad_norm": 0.21867720782756805,
      "learning_rate": 3.514954237857713e-06,
      "loss": 0.0086,
      "step": 1988380
    },
    {
      "epoch": 3.254060210914947,
      "grad_norm": 0.39178988337516785,
      "learning_rate": 3.5148883456441956e-06,
      "loss": 0.0126,
      "step": 1988400
    },
    {
      "epoch": 3.2540929413536,
      "grad_norm": 0.08858368545770645,
      "learning_rate": 3.5148224534306784e-06,
      "loss": 0.0084,
      "step": 1988420
    },
    {
      "epoch": 3.2541256717922535,
      "grad_norm": 0.21212415397167206,
      "learning_rate": 3.5147565612171615e-06,
      "loss": 0.0146,
      "step": 1988440
    },
    {
      "epoch": 3.2541584022309067,
      "grad_norm": 0.400878369808197,
      "learning_rate": 3.5146906690036443e-06,
      "loss": 0.0136,
      "step": 1988460
    },
    {
      "epoch": 3.2541911326695603,
      "grad_norm": 0.2275983840227127,
      "learning_rate": 3.514624776790127e-06,
      "loss": 0.0138,
      "step": 1988480
    },
    {
      "epoch": 3.2542238631082134,
      "grad_norm": 0.5764121413230896,
      "learning_rate": 3.5145588845766098e-06,
      "loss": 0.0101,
      "step": 1988500
    },
    {
      "epoch": 3.2542565935468666,
      "grad_norm": 0.11333488672971725,
      "learning_rate": 3.514492992363093e-06,
      "loss": 0.0111,
      "step": 1988520
    },
    {
      "epoch": 3.25428932398552,
      "grad_norm": 0.16940084099769592,
      "learning_rate": 3.5144271001495757e-06,
      "loss": 0.0097,
      "step": 1988540
    },
    {
      "epoch": 3.2543220544241733,
      "grad_norm": 0.36415743827819824,
      "learning_rate": 3.5143612079360584e-06,
      "loss": 0.0126,
      "step": 1988560
    },
    {
      "epoch": 3.254354784862827,
      "grad_norm": 0.0774458795785904,
      "learning_rate": 3.514295315722541e-06,
      "loss": 0.0111,
      "step": 1988580
    },
    {
      "epoch": 3.25438751530148,
      "grad_norm": 0.3005485236644745,
      "learning_rate": 3.514229423509024e-06,
      "loss": 0.0141,
      "step": 1988600
    },
    {
      "epoch": 3.254420245740133,
      "grad_norm": 0.8334574699401855,
      "learning_rate": 3.514163531295507e-06,
      "loss": 0.012,
      "step": 1988620
    },
    {
      "epoch": 3.254452976178787,
      "grad_norm": 0.16000157594680786,
      "learning_rate": 3.5140976390819898e-06,
      "loss": 0.0073,
      "step": 1988640
    },
    {
      "epoch": 3.25448570661744,
      "grad_norm": 0.8406654000282288,
      "learning_rate": 3.5140317468684725e-06,
      "loss": 0.0138,
      "step": 1988660
    },
    {
      "epoch": 3.2545184370560936,
      "grad_norm": 0.4370242953300476,
      "learning_rate": 3.513965854654956e-06,
      "loss": 0.0125,
      "step": 1988680
    },
    {
      "epoch": 3.2545511674947467,
      "grad_norm": 0.9619523286819458,
      "learning_rate": 3.513899962441439e-06,
      "loss": 0.012,
      "step": 1988700
    },
    {
      "epoch": 3.2545838979334,
      "grad_norm": 0.32013213634490967,
      "learning_rate": 3.5138340702279216e-06,
      "loss": 0.0104,
      "step": 1988720
    },
    {
      "epoch": 3.2546166283720535,
      "grad_norm": 0.5138006806373596,
      "learning_rate": 3.5137681780144043e-06,
      "loss": 0.012,
      "step": 1988740
    },
    {
      "epoch": 3.2546493588107066,
      "grad_norm": 0.5219693779945374,
      "learning_rate": 3.5137022858008875e-06,
      "loss": 0.0131,
      "step": 1988760
    },
    {
      "epoch": 3.25468208924936,
      "grad_norm": 0.1147947907447815,
      "learning_rate": 3.5136363935873702e-06,
      "loss": 0.015,
      "step": 1988780
    },
    {
      "epoch": 3.2547148196880133,
      "grad_norm": 0.4304613173007965,
      "learning_rate": 3.513570501373853e-06,
      "loss": 0.0142,
      "step": 1988800
    },
    {
      "epoch": 3.254747550126667,
      "grad_norm": 0.18047155439853668,
      "learning_rate": 3.5135046091603357e-06,
      "loss": 0.0078,
      "step": 1988820
    },
    {
      "epoch": 3.25478028056532,
      "grad_norm": 0.22471244633197784,
      "learning_rate": 3.513438716946819e-06,
      "loss": 0.017,
      "step": 1988840
    },
    {
      "epoch": 3.2548130110039732,
      "grad_norm": 0.7309468984603882,
      "learning_rate": 3.5133728247333016e-06,
      "loss": 0.0119,
      "step": 1988860
    },
    {
      "epoch": 3.254845741442627,
      "grad_norm": 0.356092631816864,
      "learning_rate": 3.5133069325197844e-06,
      "loss": 0.0133,
      "step": 1988880
    },
    {
      "epoch": 3.25487847188128,
      "grad_norm": 0.18871569633483887,
      "learning_rate": 3.513241040306267e-06,
      "loss": 0.0123,
      "step": 1988900
    },
    {
      "epoch": 3.2549112023199336,
      "grad_norm": 0.3597884178161621,
      "learning_rate": 3.5131751480927503e-06,
      "loss": 0.0146,
      "step": 1988920
    },
    {
      "epoch": 3.2549439327585867,
      "grad_norm": 0.6113330125808716,
      "learning_rate": 3.513109255879233e-06,
      "loss": 0.0102,
      "step": 1988940
    },
    {
      "epoch": 3.2549766631972403,
      "grad_norm": 0.5192270874977112,
      "learning_rate": 3.5130433636657157e-06,
      "loss": 0.0095,
      "step": 1988960
    },
    {
      "epoch": 3.2550093936358935,
      "grad_norm": 0.15592361986637115,
      "learning_rate": 3.5129774714521985e-06,
      "loss": 0.0128,
      "step": 1988980
    },
    {
      "epoch": 3.2550421240745466,
      "grad_norm": 0.5050336122512817,
      "learning_rate": 3.5129115792386812e-06,
      "loss": 0.0084,
      "step": 1989000
    },
    {
      "epoch": 3.2550748545132002,
      "grad_norm": 0.2817836403846741,
      "learning_rate": 3.5128456870251644e-06,
      "loss": 0.0076,
      "step": 1989020
    },
    {
      "epoch": 3.2551075849518534,
      "grad_norm": 0.15637409687042236,
      "learning_rate": 3.5127797948116475e-06,
      "loss": 0.0105,
      "step": 1989040
    },
    {
      "epoch": 3.255140315390507,
      "grad_norm": 0.25076642632484436,
      "learning_rate": 3.5127139025981307e-06,
      "loss": 0.0113,
      "step": 1989060
    },
    {
      "epoch": 3.25517304582916,
      "grad_norm": 0.2114003747701645,
      "learning_rate": 3.5126480103846134e-06,
      "loss": 0.0147,
      "step": 1989080
    },
    {
      "epoch": 3.2552057762678137,
      "grad_norm": 0.965477705001831,
      "learning_rate": 3.512582118171096e-06,
      "loss": 0.0128,
      "step": 1989100
    },
    {
      "epoch": 3.255238506706467,
      "grad_norm": 0.39264237880706787,
      "learning_rate": 3.512516225957579e-06,
      "loss": 0.0088,
      "step": 1989120
    },
    {
      "epoch": 3.25527123714512,
      "grad_norm": 0.2682541012763977,
      "learning_rate": 3.5124503337440617e-06,
      "loss": 0.0177,
      "step": 1989140
    },
    {
      "epoch": 3.2553039675837736,
      "grad_norm": 0.11388775706291199,
      "learning_rate": 3.512384441530545e-06,
      "loss": 0.0092,
      "step": 1989160
    },
    {
      "epoch": 3.2553366980224268,
      "grad_norm": 0.27871572971343994,
      "learning_rate": 3.5123185493170276e-06,
      "loss": 0.0128,
      "step": 1989180
    },
    {
      "epoch": 3.2553694284610803,
      "grad_norm": 0.1725601702928543,
      "learning_rate": 3.5122526571035103e-06,
      "loss": 0.008,
      "step": 1989200
    },
    {
      "epoch": 3.2554021588997335,
      "grad_norm": 0.19214791059494019,
      "learning_rate": 3.512186764889993e-06,
      "loss": 0.0108,
      "step": 1989220
    },
    {
      "epoch": 3.255434889338387,
      "grad_norm": 0.32414036989212036,
      "learning_rate": 3.5121208726764762e-06,
      "loss": 0.0157,
      "step": 1989240
    },
    {
      "epoch": 3.2554676197770402,
      "grad_norm": 0.12550100684165955,
      "learning_rate": 3.512054980462959e-06,
      "loss": 0.0085,
      "step": 1989260
    },
    {
      "epoch": 3.2555003502156934,
      "grad_norm": 0.9843905568122864,
      "learning_rate": 3.5119890882494417e-06,
      "loss": 0.0138,
      "step": 1989280
    },
    {
      "epoch": 3.255533080654347,
      "grad_norm": 0.2586599290370941,
      "learning_rate": 3.5119231960359244e-06,
      "loss": 0.0093,
      "step": 1989300
    },
    {
      "epoch": 3.255565811093,
      "grad_norm": 0.4709298610687256,
      "learning_rate": 3.5118573038224076e-06,
      "loss": 0.0112,
      "step": 1989320
    },
    {
      "epoch": 3.2555985415316537,
      "grad_norm": 0.6140029430389404,
      "learning_rate": 3.5117914116088903e-06,
      "loss": 0.0104,
      "step": 1989340
    },
    {
      "epoch": 3.255631271970307,
      "grad_norm": 0.3822582960128784,
      "learning_rate": 3.511725519395373e-06,
      "loss": 0.0092,
      "step": 1989360
    },
    {
      "epoch": 3.2556640024089605,
      "grad_norm": 0.5399669408798218,
      "learning_rate": 3.511659627181856e-06,
      "loss": 0.0189,
      "step": 1989380
    },
    {
      "epoch": 3.2556967328476136,
      "grad_norm": 0.42432963848114014,
      "learning_rate": 3.5115937349683394e-06,
      "loss": 0.0097,
      "step": 1989400
    },
    {
      "epoch": 3.255729463286267,
      "grad_norm": 0.09791332483291626,
      "learning_rate": 3.511527842754822e-06,
      "loss": 0.0121,
      "step": 1989420
    },
    {
      "epoch": 3.2557621937249204,
      "grad_norm": 0.15285520255565643,
      "learning_rate": 3.511461950541305e-06,
      "loss": 0.0085,
      "step": 1989440
    },
    {
      "epoch": 3.2557949241635735,
      "grad_norm": 0.4021508991718292,
      "learning_rate": 3.511396058327788e-06,
      "loss": 0.0127,
      "step": 1989460
    },
    {
      "epoch": 3.255827654602227,
      "grad_norm": 0.6381613612174988,
      "learning_rate": 3.5113301661142708e-06,
      "loss": 0.0115,
      "step": 1989480
    },
    {
      "epoch": 3.2558603850408803,
      "grad_norm": 0.4333028197288513,
      "learning_rate": 3.5112642739007535e-06,
      "loss": 0.0079,
      "step": 1989500
    },
    {
      "epoch": 3.255893115479534,
      "grad_norm": 0.473398357629776,
      "learning_rate": 3.5111983816872363e-06,
      "loss": 0.0108,
      "step": 1989520
    },
    {
      "epoch": 3.255925845918187,
      "grad_norm": 0.37046653032302856,
      "learning_rate": 3.511132489473719e-06,
      "loss": 0.008,
      "step": 1989540
    },
    {
      "epoch": 3.25595857635684,
      "grad_norm": 0.25796300172805786,
      "learning_rate": 3.511066597260202e-06,
      "loss": 0.0114,
      "step": 1989560
    },
    {
      "epoch": 3.2559913067954938,
      "grad_norm": 0.4210391640663147,
      "learning_rate": 3.511000705046685e-06,
      "loss": 0.011,
      "step": 1989580
    },
    {
      "epoch": 3.256024037234147,
      "grad_norm": 0.6652052998542786,
      "learning_rate": 3.5109348128331676e-06,
      "loss": 0.0103,
      "step": 1989600
    },
    {
      "epoch": 3.2560567676728005,
      "grad_norm": 0.2953028976917267,
      "learning_rate": 3.5108689206196504e-06,
      "loss": 0.0103,
      "step": 1989620
    },
    {
      "epoch": 3.2560894981114537,
      "grad_norm": 0.3653278648853302,
      "learning_rate": 3.5108030284061335e-06,
      "loss": 0.0094,
      "step": 1989640
    },
    {
      "epoch": 3.2561222285501072,
      "grad_norm": 0.4879626929759979,
      "learning_rate": 3.5107371361926163e-06,
      "loss": 0.0081,
      "step": 1989660
    },
    {
      "epoch": 3.2561549589887604,
      "grad_norm": 0.12798552215099335,
      "learning_rate": 3.510671243979099e-06,
      "loss": 0.0099,
      "step": 1989680
    },
    {
      "epoch": 3.2561876894274135,
      "grad_norm": 0.0289768036454916,
      "learning_rate": 3.5106053517655818e-06,
      "loss": 0.0101,
      "step": 1989700
    },
    {
      "epoch": 3.256220419866067,
      "grad_norm": 0.1301886886358261,
      "learning_rate": 3.510539459552065e-06,
      "loss": 0.0113,
      "step": 1989720
    },
    {
      "epoch": 3.2562531503047203,
      "grad_norm": 0.3699822723865509,
      "learning_rate": 3.510473567338548e-06,
      "loss": 0.0143,
      "step": 1989740
    },
    {
      "epoch": 3.256285880743374,
      "grad_norm": 0.052587687969207764,
      "learning_rate": 3.510407675125031e-06,
      "loss": 0.0107,
      "step": 1989760
    },
    {
      "epoch": 3.256318611182027,
      "grad_norm": 0.27079248428344727,
      "learning_rate": 3.510341782911514e-06,
      "loss": 0.008,
      "step": 1989780
    },
    {
      "epoch": 3.2563513416206806,
      "grad_norm": 0.531302809715271,
      "learning_rate": 3.5102758906979967e-06,
      "loss": 0.0079,
      "step": 1989800
    },
    {
      "epoch": 3.256384072059334,
      "grad_norm": 0.23016029596328735,
      "learning_rate": 3.5102099984844795e-06,
      "loss": 0.0108,
      "step": 1989820
    },
    {
      "epoch": 3.256416802497987,
      "grad_norm": 0.3343561291694641,
      "learning_rate": 3.5101441062709622e-06,
      "loss": 0.0108,
      "step": 1989840
    },
    {
      "epoch": 3.2564495329366405,
      "grad_norm": 0.14372391998767853,
      "learning_rate": 3.5100782140574454e-06,
      "loss": 0.0091,
      "step": 1989860
    },
    {
      "epoch": 3.2564822633752937,
      "grad_norm": 0.3057017922401428,
      "learning_rate": 3.510012321843928e-06,
      "loss": 0.011,
      "step": 1989880
    },
    {
      "epoch": 3.2565149938139473,
      "grad_norm": 0.5177714228630066,
      "learning_rate": 3.509946429630411e-06,
      "loss": 0.0122,
      "step": 1989900
    },
    {
      "epoch": 3.2565477242526004,
      "grad_norm": 1.1538912057876587,
      "learning_rate": 3.5098805374168936e-06,
      "loss": 0.0116,
      "step": 1989920
    },
    {
      "epoch": 3.256580454691254,
      "grad_norm": 0.41769152879714966,
      "learning_rate": 3.5098146452033768e-06,
      "loss": 0.0118,
      "step": 1989940
    },
    {
      "epoch": 3.256613185129907,
      "grad_norm": 0.11340613663196564,
      "learning_rate": 3.5097487529898595e-06,
      "loss": 0.0121,
      "step": 1989960
    },
    {
      "epoch": 3.2566459155685603,
      "grad_norm": 0.2147236466407776,
      "learning_rate": 3.5096828607763422e-06,
      "loss": 0.009,
      "step": 1989980
    },
    {
      "epoch": 3.256678646007214,
      "grad_norm": 0.1274874210357666,
      "learning_rate": 3.509616968562825e-06,
      "loss": 0.0149,
      "step": 1990000
    },
    {
      "epoch": 3.256711376445867,
      "grad_norm": 0.3570331037044525,
      "learning_rate": 3.5095510763493077e-06,
      "loss": 0.0102,
      "step": 1990020
    },
    {
      "epoch": 3.2567441068845207,
      "grad_norm": 0.11332722753286362,
      "learning_rate": 3.509485184135791e-06,
      "loss": 0.0132,
      "step": 1990040
    },
    {
      "epoch": 3.256776837323174,
      "grad_norm": 2.6901237964630127,
      "learning_rate": 3.5094192919222736e-06,
      "loss": 0.0128,
      "step": 1990060
    },
    {
      "epoch": 3.256809567761827,
      "grad_norm": 0.19813553988933563,
      "learning_rate": 3.5093533997087564e-06,
      "loss": 0.0108,
      "step": 1990080
    },
    {
      "epoch": 3.2568422982004805,
      "grad_norm": 0.26858898997306824,
      "learning_rate": 3.50928750749524e-06,
      "loss": 0.0097,
      "step": 1990100
    },
    {
      "epoch": 3.2568750286391337,
      "grad_norm": 0.1830720752477646,
      "learning_rate": 3.5092216152817227e-06,
      "loss": 0.0087,
      "step": 1990120
    },
    {
      "epoch": 3.2569077590777873,
      "grad_norm": 0.24626223742961884,
      "learning_rate": 3.5091557230682054e-06,
      "loss": 0.0085,
      "step": 1990140
    },
    {
      "epoch": 3.2569404895164404,
      "grad_norm": 0.4380585253238678,
      "learning_rate": 3.509089830854688e-06,
      "loss": 0.0132,
      "step": 1990160
    },
    {
      "epoch": 3.2569732199550936,
      "grad_norm": 0.7465128302574158,
      "learning_rate": 3.5090239386411713e-06,
      "loss": 0.0115,
      "step": 1990180
    },
    {
      "epoch": 3.257005950393747,
      "grad_norm": 0.6392690539360046,
      "learning_rate": 3.508958046427654e-06,
      "loss": 0.0129,
      "step": 1990200
    },
    {
      "epoch": 3.2570386808324003,
      "grad_norm": 0.7827122211456299,
      "learning_rate": 3.508892154214137e-06,
      "loss": 0.0107,
      "step": 1990220
    },
    {
      "epoch": 3.257071411271054,
      "grad_norm": 0.3418169319629669,
      "learning_rate": 3.5088262620006196e-06,
      "loss": 0.0103,
      "step": 1990240
    },
    {
      "epoch": 3.257104141709707,
      "grad_norm": 0.11073767393827438,
      "learning_rate": 3.5087603697871027e-06,
      "loss": 0.0188,
      "step": 1990260
    },
    {
      "epoch": 3.2571368721483607,
      "grad_norm": 0.7003504037857056,
      "learning_rate": 3.5086944775735855e-06,
      "loss": 0.0107,
      "step": 1990280
    },
    {
      "epoch": 3.257169602587014,
      "grad_norm": 0.10264015942811966,
      "learning_rate": 3.508628585360068e-06,
      "loss": 0.0163,
      "step": 1990300
    },
    {
      "epoch": 3.257202333025667,
      "grad_norm": 1.2746808528900146,
      "learning_rate": 3.508562693146551e-06,
      "loss": 0.0148,
      "step": 1990320
    },
    {
      "epoch": 3.2572350634643206,
      "grad_norm": 0.2817189395427704,
      "learning_rate": 3.508496800933034e-06,
      "loss": 0.0135,
      "step": 1990340
    },
    {
      "epoch": 3.2572677939029737,
      "grad_norm": 0.3597583770751953,
      "learning_rate": 3.508430908719517e-06,
      "loss": 0.0145,
      "step": 1990360
    },
    {
      "epoch": 3.2573005243416273,
      "grad_norm": 0.06345781683921814,
      "learning_rate": 3.5083650165059996e-06,
      "loss": 0.0086,
      "step": 1990380
    },
    {
      "epoch": 3.2573332547802805,
      "grad_norm": 1.001956820487976,
      "learning_rate": 3.5082991242924823e-06,
      "loss": 0.0158,
      "step": 1990400
    },
    {
      "epoch": 3.257365985218934,
      "grad_norm": 0.279403418302536,
      "learning_rate": 3.508233232078965e-06,
      "loss": 0.0096,
      "step": 1990420
    },
    {
      "epoch": 3.257398715657587,
      "grad_norm": 0.11422169953584671,
      "learning_rate": 3.5081673398654486e-06,
      "loss": 0.0096,
      "step": 1990440
    },
    {
      "epoch": 3.2574314460962404,
      "grad_norm": 0.30645671486854553,
      "learning_rate": 3.5081014476519314e-06,
      "loss": 0.0082,
      "step": 1990460
    },
    {
      "epoch": 3.257464176534894,
      "grad_norm": 0.2231002300977707,
      "learning_rate": 3.5080355554384145e-06,
      "loss": 0.006,
      "step": 1990480
    },
    {
      "epoch": 3.257496906973547,
      "grad_norm": 0.1837828904390335,
      "learning_rate": 3.5079696632248973e-06,
      "loss": 0.0113,
      "step": 1990500
    },
    {
      "epoch": 3.2575296374122007,
      "grad_norm": 0.2988795042037964,
      "learning_rate": 3.50790377101138e-06,
      "loss": 0.0163,
      "step": 1990520
    },
    {
      "epoch": 3.257562367850854,
      "grad_norm": 0.12242689728736877,
      "learning_rate": 3.5078378787978628e-06,
      "loss": 0.0112,
      "step": 1990540
    },
    {
      "epoch": 3.2575950982895074,
      "grad_norm": 0.2805462181568146,
      "learning_rate": 3.5077719865843455e-06,
      "loss": 0.0066,
      "step": 1990560
    },
    {
      "epoch": 3.2576278287281606,
      "grad_norm": 0.12402509152889252,
      "learning_rate": 3.5077060943708287e-06,
      "loss": 0.0103,
      "step": 1990580
    },
    {
      "epoch": 3.2576605591668137,
      "grad_norm": 0.0802348181605339,
      "learning_rate": 3.5076402021573114e-06,
      "loss": 0.01,
      "step": 1990600
    },
    {
      "epoch": 3.2576932896054673,
      "grad_norm": 0.31196141242980957,
      "learning_rate": 3.507574309943794e-06,
      "loss": 0.0157,
      "step": 1990620
    },
    {
      "epoch": 3.2577260200441205,
      "grad_norm": 0.4800690710544586,
      "learning_rate": 3.507508417730277e-06,
      "loss": 0.0107,
      "step": 1990640
    },
    {
      "epoch": 3.257758750482774,
      "grad_norm": 0.33630242943763733,
      "learning_rate": 3.50744252551676e-06,
      "loss": 0.0161,
      "step": 1990660
    },
    {
      "epoch": 3.2577914809214272,
      "grad_norm": 0.40976738929748535,
      "learning_rate": 3.507376633303243e-06,
      "loss": 0.0144,
      "step": 1990680
    },
    {
      "epoch": 3.257824211360081,
      "grad_norm": 0.06988083571195602,
      "learning_rate": 3.5073107410897255e-06,
      "loss": 0.0119,
      "step": 1990700
    },
    {
      "epoch": 3.257856941798734,
      "grad_norm": 0.1055196151137352,
      "learning_rate": 3.5072448488762083e-06,
      "loss": 0.0152,
      "step": 1990720
    },
    {
      "epoch": 3.257889672237387,
      "grad_norm": 0.2596231698989868,
      "learning_rate": 3.5071789566626914e-06,
      "loss": 0.0144,
      "step": 1990740
    },
    {
      "epoch": 3.2579224026760407,
      "grad_norm": 0.143283411860466,
      "learning_rate": 3.507113064449174e-06,
      "loss": 0.0114,
      "step": 1990760
    },
    {
      "epoch": 3.257955133114694,
      "grad_norm": 0.27766749262809753,
      "learning_rate": 3.507047172235657e-06,
      "loss": 0.009,
      "step": 1990780
    },
    {
      "epoch": 3.2579878635533475,
      "grad_norm": 0.6054462790489197,
      "learning_rate": 3.5069812800221405e-06,
      "loss": 0.0107,
      "step": 1990800
    },
    {
      "epoch": 3.2580205939920006,
      "grad_norm": 0.40187275409698486,
      "learning_rate": 3.5069153878086232e-06,
      "loss": 0.0099,
      "step": 1990820
    },
    {
      "epoch": 3.258053324430654,
      "grad_norm": 0.3180459141731262,
      "learning_rate": 3.506849495595106e-06,
      "loss": 0.0119,
      "step": 1990840
    },
    {
      "epoch": 3.2580860548693074,
      "grad_norm": 0.17378531396389008,
      "learning_rate": 3.5067836033815887e-06,
      "loss": 0.0126,
      "step": 1990860
    },
    {
      "epoch": 3.2581187853079605,
      "grad_norm": 0.4481576383113861,
      "learning_rate": 3.506717711168072e-06,
      "loss": 0.0121,
      "step": 1990880
    },
    {
      "epoch": 3.258151515746614,
      "grad_norm": 0.16534800827503204,
      "learning_rate": 3.5066518189545546e-06,
      "loss": 0.0096,
      "step": 1990900
    },
    {
      "epoch": 3.2581842461852673,
      "grad_norm": 0.5569712519645691,
      "learning_rate": 3.5065859267410374e-06,
      "loss": 0.0119,
      "step": 1990920
    },
    {
      "epoch": 3.258216976623921,
      "grad_norm": 0.3521352708339691,
      "learning_rate": 3.50652003452752e-06,
      "loss": 0.0091,
      "step": 1990940
    },
    {
      "epoch": 3.258249707062574,
      "grad_norm": 0.4635085463523865,
      "learning_rate": 3.506454142314003e-06,
      "loss": 0.0094,
      "step": 1990960
    },
    {
      "epoch": 3.2582824375012276,
      "grad_norm": 0.1390494704246521,
      "learning_rate": 3.506388250100486e-06,
      "loss": 0.0108,
      "step": 1990980
    },
    {
      "epoch": 3.2583151679398807,
      "grad_norm": 0.26526546478271484,
      "learning_rate": 3.5063223578869687e-06,
      "loss": 0.0089,
      "step": 1991000
    },
    {
      "epoch": 3.258347898378534,
      "grad_norm": 0.2553902864456177,
      "learning_rate": 3.5062564656734515e-06,
      "loss": 0.0085,
      "step": 1991020
    },
    {
      "epoch": 3.2583806288171875,
      "grad_norm": 0.25060874223709106,
      "learning_rate": 3.5061905734599342e-06,
      "loss": 0.0141,
      "step": 1991040
    },
    {
      "epoch": 3.2584133592558406,
      "grad_norm": 0.2911885380744934,
      "learning_rate": 3.5061246812464174e-06,
      "loss": 0.0132,
      "step": 1991060
    },
    {
      "epoch": 3.2584460896944942,
      "grad_norm": 0.07854432612657547,
      "learning_rate": 3.5060587890329e-06,
      "loss": 0.0106,
      "step": 1991080
    },
    {
      "epoch": 3.2584788201331474,
      "grad_norm": 0.6439161896705627,
      "learning_rate": 3.505992896819383e-06,
      "loss": 0.0104,
      "step": 1991100
    },
    {
      "epoch": 3.258511550571801,
      "grad_norm": 0.5877493023872375,
      "learning_rate": 3.5059270046058656e-06,
      "loss": 0.0133,
      "step": 1991120
    },
    {
      "epoch": 3.258544281010454,
      "grad_norm": 0.26005369424819946,
      "learning_rate": 3.5058611123923488e-06,
      "loss": 0.0118,
      "step": 1991140
    },
    {
      "epoch": 3.2585770114491073,
      "grad_norm": 0.8237173557281494,
      "learning_rate": 3.505795220178832e-06,
      "loss": 0.0096,
      "step": 1991160
    },
    {
      "epoch": 3.258609741887761,
      "grad_norm": 0.19335132837295532,
      "learning_rate": 3.5057293279653147e-06,
      "loss": 0.0093,
      "step": 1991180
    },
    {
      "epoch": 3.258642472326414,
      "grad_norm": 0.781457245349884,
      "learning_rate": 3.505663435751798e-06,
      "loss": 0.0116,
      "step": 1991200
    },
    {
      "epoch": 3.2586752027650676,
      "grad_norm": 0.06907019764184952,
      "learning_rate": 3.5055975435382806e-06,
      "loss": 0.013,
      "step": 1991220
    },
    {
      "epoch": 3.2587079332037208,
      "grad_norm": 0.26661819219589233,
      "learning_rate": 3.5055316513247633e-06,
      "loss": 0.0124,
      "step": 1991240
    },
    {
      "epoch": 3.2587406636423744,
      "grad_norm": 0.3828377425670624,
      "learning_rate": 3.505465759111246e-06,
      "loss": 0.0123,
      "step": 1991260
    },
    {
      "epoch": 3.2587733940810275,
      "grad_norm": 0.0953315794467926,
      "learning_rate": 3.5053998668977292e-06,
      "loss": 0.0133,
      "step": 1991280
    },
    {
      "epoch": 3.2588061245196807,
      "grad_norm": 0.22707977890968323,
      "learning_rate": 3.505333974684212e-06,
      "loss": 0.0134,
      "step": 1991300
    },
    {
      "epoch": 3.2588388549583343,
      "grad_norm": 0.6447216272354126,
      "learning_rate": 3.5052680824706947e-06,
      "loss": 0.0089,
      "step": 1991320
    },
    {
      "epoch": 3.2588715853969874,
      "grad_norm": 0.21484732627868652,
      "learning_rate": 3.5052021902571774e-06,
      "loss": 0.0095,
      "step": 1991340
    },
    {
      "epoch": 3.258904315835641,
      "grad_norm": 0.43788665533065796,
      "learning_rate": 3.5051362980436606e-06,
      "loss": 0.013,
      "step": 1991360
    },
    {
      "epoch": 3.258937046274294,
      "grad_norm": 0.48826390504837036,
      "learning_rate": 3.5050704058301433e-06,
      "loss": 0.0137,
      "step": 1991380
    },
    {
      "epoch": 3.2589697767129477,
      "grad_norm": 0.0839608758687973,
      "learning_rate": 3.505004513616626e-06,
      "loss": 0.0135,
      "step": 1991400
    },
    {
      "epoch": 3.259002507151601,
      "grad_norm": 0.21940939128398895,
      "learning_rate": 3.504938621403109e-06,
      "loss": 0.0094,
      "step": 1991420
    },
    {
      "epoch": 3.259035237590254,
      "grad_norm": 0.5174943804740906,
      "learning_rate": 3.5048727291895916e-06,
      "loss": 0.0129,
      "step": 1991440
    },
    {
      "epoch": 3.2590679680289076,
      "grad_norm": 0.2817736268043518,
      "learning_rate": 3.5048068369760747e-06,
      "loss": 0.0162,
      "step": 1991460
    },
    {
      "epoch": 3.259100698467561,
      "grad_norm": 0.08140260726213455,
      "learning_rate": 3.5047409447625575e-06,
      "loss": 0.0125,
      "step": 1991480
    },
    {
      "epoch": 3.2591334289062144,
      "grad_norm": 0.24956583976745605,
      "learning_rate": 3.5046750525490406e-06,
      "loss": 0.0116,
      "step": 1991500
    },
    {
      "epoch": 3.2591661593448675,
      "grad_norm": 0.2726303040981293,
      "learning_rate": 3.504609160335524e-06,
      "loss": 0.0137,
      "step": 1991520
    },
    {
      "epoch": 3.259198889783521,
      "grad_norm": 0.41441842913627625,
      "learning_rate": 3.5045432681220065e-06,
      "loss": 0.0102,
      "step": 1991540
    },
    {
      "epoch": 3.2592316202221743,
      "grad_norm": 0.19625389575958252,
      "learning_rate": 3.5044773759084893e-06,
      "loss": 0.0115,
      "step": 1991560
    },
    {
      "epoch": 3.2592643506608274,
      "grad_norm": 0.2387823462486267,
      "learning_rate": 3.504411483694972e-06,
      "loss": 0.0069,
      "step": 1991580
    },
    {
      "epoch": 3.259297081099481,
      "grad_norm": 0.10617505013942719,
      "learning_rate": 3.504345591481455e-06,
      "loss": 0.0189,
      "step": 1991600
    },
    {
      "epoch": 3.259329811538134,
      "grad_norm": 0.19101347029209137,
      "learning_rate": 3.504279699267938e-06,
      "loss": 0.0134,
      "step": 1991620
    },
    {
      "epoch": 3.2593625419767878,
      "grad_norm": 0.16352631151676178,
      "learning_rate": 3.5042138070544207e-06,
      "loss": 0.0123,
      "step": 1991640
    },
    {
      "epoch": 3.259395272415441,
      "grad_norm": 0.23571929335594177,
      "learning_rate": 3.5041479148409034e-06,
      "loss": 0.0097,
      "step": 1991660
    },
    {
      "epoch": 3.259428002854094,
      "grad_norm": 0.7673593759536743,
      "learning_rate": 3.5040820226273866e-06,
      "loss": 0.0165,
      "step": 1991680
    },
    {
      "epoch": 3.2594607332927477,
      "grad_norm": 0.12591008841991425,
      "learning_rate": 3.5040161304138693e-06,
      "loss": 0.0106,
      "step": 1991700
    },
    {
      "epoch": 3.259493463731401,
      "grad_norm": 0.3369450569152832,
      "learning_rate": 3.503950238200352e-06,
      "loss": 0.0123,
      "step": 1991720
    },
    {
      "epoch": 3.2595261941700544,
      "grad_norm": 0.31294873356819153,
      "learning_rate": 3.5038843459868348e-06,
      "loss": 0.0096,
      "step": 1991740
    },
    {
      "epoch": 3.2595589246087076,
      "grad_norm": 0.11495202779769897,
      "learning_rate": 3.503818453773318e-06,
      "loss": 0.0097,
      "step": 1991760
    },
    {
      "epoch": 3.2595916550473607,
      "grad_norm": 0.12375976145267487,
      "learning_rate": 3.5037525615598007e-06,
      "loss": 0.0081,
      "step": 1991780
    },
    {
      "epoch": 3.2596243854860143,
      "grad_norm": 0.1876600980758667,
      "learning_rate": 3.5036866693462834e-06,
      "loss": 0.0144,
      "step": 1991800
    },
    {
      "epoch": 3.2596571159246674,
      "grad_norm": 0.3415091931819916,
      "learning_rate": 3.503620777132766e-06,
      "loss": 0.0111,
      "step": 1991820
    },
    {
      "epoch": 3.259689846363321,
      "grad_norm": 0.1864001452922821,
      "learning_rate": 3.5035548849192493e-06,
      "loss": 0.012,
      "step": 1991840
    },
    {
      "epoch": 3.259722576801974,
      "grad_norm": 0.27150970697402954,
      "learning_rate": 3.5034889927057325e-06,
      "loss": 0.0178,
      "step": 1991860
    },
    {
      "epoch": 3.259755307240628,
      "grad_norm": 0.06166985258460045,
      "learning_rate": 3.5034231004922152e-06,
      "loss": 0.0107,
      "step": 1991880
    },
    {
      "epoch": 3.259788037679281,
      "grad_norm": 0.3723956048488617,
      "learning_rate": 3.5033572082786984e-06,
      "loss": 0.0131,
      "step": 1991900
    },
    {
      "epoch": 3.259820768117934,
      "grad_norm": 0.1644708216190338,
      "learning_rate": 3.503291316065181e-06,
      "loss": 0.0094,
      "step": 1991920
    },
    {
      "epoch": 3.2598534985565877,
      "grad_norm": 0.15465550124645233,
      "learning_rate": 3.503225423851664e-06,
      "loss": 0.0116,
      "step": 1991940
    },
    {
      "epoch": 3.259886228995241,
      "grad_norm": 0.18056003749370575,
      "learning_rate": 3.5031595316381466e-06,
      "loss": 0.0139,
      "step": 1991960
    },
    {
      "epoch": 3.2599189594338944,
      "grad_norm": 0.13513872027397156,
      "learning_rate": 3.5030936394246293e-06,
      "loss": 0.0073,
      "step": 1991980
    },
    {
      "epoch": 3.2599516898725476,
      "grad_norm": 0.14629463851451874,
      "learning_rate": 3.5030277472111125e-06,
      "loss": 0.0091,
      "step": 1992000
    },
    {
      "epoch": 3.259984420311201,
      "grad_norm": 0.6542818546295166,
      "learning_rate": 3.5029618549975952e-06,
      "loss": 0.0107,
      "step": 1992020
    },
    {
      "epoch": 3.2600171507498543,
      "grad_norm": 0.38664862513542175,
      "learning_rate": 3.502895962784078e-06,
      "loss": 0.0157,
      "step": 1992040
    },
    {
      "epoch": 3.2600498811885075,
      "grad_norm": 0.5279338955879211,
      "learning_rate": 3.5028300705705607e-06,
      "loss": 0.0073,
      "step": 1992060
    },
    {
      "epoch": 3.260082611627161,
      "grad_norm": 0.24735993146896362,
      "learning_rate": 3.502764178357044e-06,
      "loss": 0.0114,
      "step": 1992080
    },
    {
      "epoch": 3.260115342065814,
      "grad_norm": 0.26200398802757263,
      "learning_rate": 3.5026982861435266e-06,
      "loss": 0.0181,
      "step": 1992100
    },
    {
      "epoch": 3.260148072504468,
      "grad_norm": 0.15791641175746918,
      "learning_rate": 3.5026323939300094e-06,
      "loss": 0.0114,
      "step": 1992120
    },
    {
      "epoch": 3.260180802943121,
      "grad_norm": 0.13280560076236725,
      "learning_rate": 3.502566501716492e-06,
      "loss": 0.0167,
      "step": 1992140
    },
    {
      "epoch": 3.2602135333817746,
      "grad_norm": 0.13504642248153687,
      "learning_rate": 3.5025006095029753e-06,
      "loss": 0.0114,
      "step": 1992160
    },
    {
      "epoch": 3.2602462638204277,
      "grad_norm": 0.6399039030075073,
      "learning_rate": 3.502434717289458e-06,
      "loss": 0.0107,
      "step": 1992180
    },
    {
      "epoch": 3.260278994259081,
      "grad_norm": 0.6613762378692627,
      "learning_rate": 3.502368825075941e-06,
      "loss": 0.0164,
      "step": 1992200
    },
    {
      "epoch": 3.2603117246977344,
      "grad_norm": 0.3289308547973633,
      "learning_rate": 3.5023029328624243e-06,
      "loss": 0.0136,
      "step": 1992220
    },
    {
      "epoch": 3.2603444551363876,
      "grad_norm": 0.7185954451560974,
      "learning_rate": 3.502237040648907e-06,
      "loss": 0.0164,
      "step": 1992240
    },
    {
      "epoch": 3.260377185575041,
      "grad_norm": 0.3417026698589325,
      "learning_rate": 3.50217114843539e-06,
      "loss": 0.0144,
      "step": 1992260
    },
    {
      "epoch": 3.2604099160136943,
      "grad_norm": 0.06321794539690018,
      "learning_rate": 3.5021052562218726e-06,
      "loss": 0.0097,
      "step": 1992280
    },
    {
      "epoch": 3.260442646452348,
      "grad_norm": 0.5506442189216614,
      "learning_rate": 3.5020393640083557e-06,
      "loss": 0.0153,
      "step": 1992300
    },
    {
      "epoch": 3.260475376891001,
      "grad_norm": 0.5085917711257935,
      "learning_rate": 3.5019734717948385e-06,
      "loss": 0.0077,
      "step": 1992320
    },
    {
      "epoch": 3.2605081073296542,
      "grad_norm": 0.2060087025165558,
      "learning_rate": 3.501907579581321e-06,
      "loss": 0.0108,
      "step": 1992340
    },
    {
      "epoch": 3.260540837768308,
      "grad_norm": 0.2994265556335449,
      "learning_rate": 3.501841687367804e-06,
      "loss": 0.0146,
      "step": 1992360
    },
    {
      "epoch": 3.260573568206961,
      "grad_norm": 0.276724249124527,
      "learning_rate": 3.501775795154287e-06,
      "loss": 0.0125,
      "step": 1992380
    },
    {
      "epoch": 3.2606062986456146,
      "grad_norm": 0.3073613941669464,
      "learning_rate": 3.50170990294077e-06,
      "loss": 0.0101,
      "step": 1992400
    },
    {
      "epoch": 3.2606390290842677,
      "grad_norm": 0.6844412684440613,
      "learning_rate": 3.5016440107272526e-06,
      "loss": 0.0122,
      "step": 1992420
    },
    {
      "epoch": 3.2606717595229213,
      "grad_norm": 0.5937233567237854,
      "learning_rate": 3.5015781185137353e-06,
      "loss": 0.0129,
      "step": 1992440
    },
    {
      "epoch": 3.2607044899615745,
      "grad_norm": 0.3414532244205475,
      "learning_rate": 3.501512226300218e-06,
      "loss": 0.01,
      "step": 1992460
    },
    {
      "epoch": 3.2607372204002276,
      "grad_norm": 0.42328813672065735,
      "learning_rate": 3.5014463340867012e-06,
      "loss": 0.013,
      "step": 1992480
    },
    {
      "epoch": 3.260769950838881,
      "grad_norm": 0.2641317844390869,
      "learning_rate": 3.501380441873184e-06,
      "loss": 0.007,
      "step": 1992500
    },
    {
      "epoch": 3.2608026812775344,
      "grad_norm": 0.5586681365966797,
      "learning_rate": 3.5013145496596667e-06,
      "loss": 0.01,
      "step": 1992520
    },
    {
      "epoch": 3.260835411716188,
      "grad_norm": 0.1546904444694519,
      "learning_rate": 3.5012486574461494e-06,
      "loss": 0.0084,
      "step": 1992540
    },
    {
      "epoch": 3.260868142154841,
      "grad_norm": 0.2484143227338791,
      "learning_rate": 3.501182765232633e-06,
      "loss": 0.0249,
      "step": 1992560
    },
    {
      "epoch": 3.2609008725934947,
      "grad_norm": 0.219032883644104,
      "learning_rate": 3.5011168730191158e-06,
      "loss": 0.0108,
      "step": 1992580
    },
    {
      "epoch": 3.260933603032148,
      "grad_norm": 0.4763016998767853,
      "learning_rate": 3.5010509808055985e-06,
      "loss": 0.0161,
      "step": 1992600
    },
    {
      "epoch": 3.260966333470801,
      "grad_norm": 0.46930205821990967,
      "learning_rate": 3.5009850885920817e-06,
      "loss": 0.0122,
      "step": 1992620
    },
    {
      "epoch": 3.2609990639094546,
      "grad_norm": 0.1916310042142868,
      "learning_rate": 3.5009191963785644e-06,
      "loss": 0.0104,
      "step": 1992640
    },
    {
      "epoch": 3.2610317943481077,
      "grad_norm": 0.14008283615112305,
      "learning_rate": 3.500853304165047e-06,
      "loss": 0.0155,
      "step": 1992660
    },
    {
      "epoch": 3.2610645247867613,
      "grad_norm": 0.14040011167526245,
      "learning_rate": 3.50078741195153e-06,
      "loss": 0.0085,
      "step": 1992680
    },
    {
      "epoch": 3.2610972552254145,
      "grad_norm": 0.2616882622241974,
      "learning_rate": 3.500721519738013e-06,
      "loss": 0.0183,
      "step": 1992700
    },
    {
      "epoch": 3.261129985664068,
      "grad_norm": 0.257477343082428,
      "learning_rate": 3.500655627524496e-06,
      "loss": 0.0169,
      "step": 1992720
    },
    {
      "epoch": 3.2611627161027212,
      "grad_norm": 0.43999984860420227,
      "learning_rate": 3.5005897353109785e-06,
      "loss": 0.009,
      "step": 1992740
    },
    {
      "epoch": 3.2611954465413744,
      "grad_norm": 0.4530889093875885,
      "learning_rate": 3.5005238430974613e-06,
      "loss": 0.0112,
      "step": 1992760
    },
    {
      "epoch": 3.261228176980028,
      "grad_norm": 0.16715523600578308,
      "learning_rate": 3.5004579508839444e-06,
      "loss": 0.0108,
      "step": 1992780
    },
    {
      "epoch": 3.261260907418681,
      "grad_norm": 0.20931251347064972,
      "learning_rate": 3.500392058670427e-06,
      "loss": 0.0126,
      "step": 1992800
    },
    {
      "epoch": 3.2612936378573347,
      "grad_norm": 0.6055296063423157,
      "learning_rate": 3.50032616645691e-06,
      "loss": 0.0119,
      "step": 1992820
    },
    {
      "epoch": 3.261326368295988,
      "grad_norm": 0.24027575552463531,
      "learning_rate": 3.5002602742433927e-06,
      "loss": 0.0085,
      "step": 1992840
    },
    {
      "epoch": 3.2613590987346415,
      "grad_norm": 0.07516085356473923,
      "learning_rate": 3.5001943820298754e-06,
      "loss": 0.0092,
      "step": 1992860
    },
    {
      "epoch": 3.2613918291732946,
      "grad_norm": 0.21733909845352173,
      "learning_rate": 3.5001284898163586e-06,
      "loss": 0.0134,
      "step": 1992880
    },
    {
      "epoch": 3.2614245596119478,
      "grad_norm": 1.3907302618026733,
      "learning_rate": 3.5000625976028413e-06,
      "loss": 0.012,
      "step": 1992900
    },
    {
      "epoch": 3.2614572900506014,
      "grad_norm": 0.13784049451351166,
      "learning_rate": 3.499996705389325e-06,
      "loss": 0.0135,
      "step": 1992920
    },
    {
      "epoch": 3.2614900204892545,
      "grad_norm": 0.4341030418872833,
      "learning_rate": 3.4999308131758076e-06,
      "loss": 0.0167,
      "step": 1992940
    },
    {
      "epoch": 3.261522750927908,
      "grad_norm": 0.3095657527446747,
      "learning_rate": 3.4998649209622904e-06,
      "loss": 0.0121,
      "step": 1992960
    },
    {
      "epoch": 3.2615554813665613,
      "grad_norm": 0.3545062839984894,
      "learning_rate": 3.499799028748773e-06,
      "loss": 0.0064,
      "step": 1992980
    },
    {
      "epoch": 3.261588211805215,
      "grad_norm": 0.5726559162139893,
      "learning_rate": 3.499733136535256e-06,
      "loss": 0.0082,
      "step": 1993000
    },
    {
      "epoch": 3.261620942243868,
      "grad_norm": 0.15338864922523499,
      "learning_rate": 3.499667244321739e-06,
      "loss": 0.017,
      "step": 1993020
    },
    {
      "epoch": 3.261653672682521,
      "grad_norm": 0.4703352749347687,
      "learning_rate": 3.4996013521082218e-06,
      "loss": 0.0103,
      "step": 1993040
    },
    {
      "epoch": 3.2616864031211747,
      "grad_norm": 0.10423582047224045,
      "learning_rate": 3.4995354598947045e-06,
      "loss": 0.0145,
      "step": 1993060
    },
    {
      "epoch": 3.261719133559828,
      "grad_norm": 0.49527812004089355,
      "learning_rate": 3.4994695676811872e-06,
      "loss": 0.0156,
      "step": 1993080
    },
    {
      "epoch": 3.2617518639984815,
      "grad_norm": 0.27298158407211304,
      "learning_rate": 3.4994036754676704e-06,
      "loss": 0.0106,
      "step": 1993100
    },
    {
      "epoch": 3.2617845944371346,
      "grad_norm": 0.5080158710479736,
      "learning_rate": 3.499337783254153e-06,
      "loss": 0.0095,
      "step": 1993120
    },
    {
      "epoch": 3.261817324875788,
      "grad_norm": 0.18774807453155518,
      "learning_rate": 3.499271891040636e-06,
      "loss": 0.0092,
      "step": 1993140
    },
    {
      "epoch": 3.2618500553144414,
      "grad_norm": 0.23909813165664673,
      "learning_rate": 3.4992059988271186e-06,
      "loss": 0.0095,
      "step": 1993160
    },
    {
      "epoch": 3.2618827857530945,
      "grad_norm": 0.2846830189228058,
      "learning_rate": 3.4991401066136018e-06,
      "loss": 0.013,
      "step": 1993180
    },
    {
      "epoch": 3.261915516191748,
      "grad_norm": 0.0958176851272583,
      "learning_rate": 3.4990742144000845e-06,
      "loss": 0.0138,
      "step": 1993200
    },
    {
      "epoch": 3.2619482466304013,
      "grad_norm": 0.6178853511810303,
      "learning_rate": 3.4990083221865673e-06,
      "loss": 0.008,
      "step": 1993220
    },
    {
      "epoch": 3.2619809770690544,
      "grad_norm": 0.38664868474006653,
      "learning_rate": 3.49894242997305e-06,
      "loss": 0.0128,
      "step": 1993240
    },
    {
      "epoch": 3.262013707507708,
      "grad_norm": 0.4115558862686157,
      "learning_rate": 3.4988765377595336e-06,
      "loss": 0.0102,
      "step": 1993260
    },
    {
      "epoch": 3.262046437946361,
      "grad_norm": 0.12581488490104675,
      "learning_rate": 3.4988106455460163e-06,
      "loss": 0.0089,
      "step": 1993280
    },
    {
      "epoch": 3.2620791683850148,
      "grad_norm": 1.2759442329406738,
      "learning_rate": 3.498744753332499e-06,
      "loss": 0.0087,
      "step": 1993300
    },
    {
      "epoch": 3.262111898823668,
      "grad_norm": 0.13810473680496216,
      "learning_rate": 3.4986788611189822e-06,
      "loss": 0.0087,
      "step": 1993320
    },
    {
      "epoch": 3.2621446292623215,
      "grad_norm": 0.29339030385017395,
      "learning_rate": 3.498612968905465e-06,
      "loss": 0.0117,
      "step": 1993340
    },
    {
      "epoch": 3.2621773597009747,
      "grad_norm": 0.11974789202213287,
      "learning_rate": 3.4985470766919477e-06,
      "loss": 0.0123,
      "step": 1993360
    },
    {
      "epoch": 3.262210090139628,
      "grad_norm": 0.27270618081092834,
      "learning_rate": 3.4984811844784304e-06,
      "loss": 0.0098,
      "step": 1993380
    },
    {
      "epoch": 3.2622428205782814,
      "grad_norm": 0.21920442581176758,
      "learning_rate": 3.498415292264913e-06,
      "loss": 0.0107,
      "step": 1993400
    },
    {
      "epoch": 3.2622755510169346,
      "grad_norm": 0.09483269602060318,
      "learning_rate": 3.4983494000513963e-06,
      "loss": 0.0094,
      "step": 1993420
    },
    {
      "epoch": 3.262308281455588,
      "grad_norm": 0.37033993005752563,
      "learning_rate": 3.498283507837879e-06,
      "loss": 0.0115,
      "step": 1993440
    },
    {
      "epoch": 3.2623410118942413,
      "grad_norm": 0.1237572729587555,
      "learning_rate": 3.498217615624362e-06,
      "loss": 0.0104,
      "step": 1993460
    },
    {
      "epoch": 3.262373742332895,
      "grad_norm": 0.21946273744106293,
      "learning_rate": 3.4981517234108446e-06,
      "loss": 0.0148,
      "step": 1993480
    },
    {
      "epoch": 3.262406472771548,
      "grad_norm": 0.3155686557292938,
      "learning_rate": 3.4980858311973277e-06,
      "loss": 0.011,
      "step": 1993500
    },
    {
      "epoch": 3.262439203210201,
      "grad_norm": 0.17707706987857819,
      "learning_rate": 3.4980199389838105e-06,
      "loss": 0.0121,
      "step": 1993520
    },
    {
      "epoch": 3.262471933648855,
      "grad_norm": 0.5022248029708862,
      "learning_rate": 3.497954046770293e-06,
      "loss": 0.0117,
      "step": 1993540
    },
    {
      "epoch": 3.262504664087508,
      "grad_norm": 0.1027342900633812,
      "learning_rate": 3.497888154556776e-06,
      "loss": 0.0118,
      "step": 1993560
    },
    {
      "epoch": 3.2625373945261615,
      "grad_norm": 0.2841363847255707,
      "learning_rate": 3.497822262343259e-06,
      "loss": 0.0079,
      "step": 1993580
    },
    {
      "epoch": 3.2625701249648147,
      "grad_norm": 0.1016986221075058,
      "learning_rate": 3.497756370129742e-06,
      "loss": 0.0105,
      "step": 1993600
    },
    {
      "epoch": 3.2626028554034683,
      "grad_norm": 0.08155332505702972,
      "learning_rate": 3.497690477916225e-06,
      "loss": 0.0096,
      "step": 1993620
    },
    {
      "epoch": 3.2626355858421214,
      "grad_norm": 0.1727692037820816,
      "learning_rate": 3.497624585702708e-06,
      "loss": 0.0087,
      "step": 1993640
    },
    {
      "epoch": 3.2626683162807746,
      "grad_norm": 0.14632287621498108,
      "learning_rate": 3.497558693489191e-06,
      "loss": 0.0101,
      "step": 1993660
    },
    {
      "epoch": 3.262701046719428,
      "grad_norm": 0.8212010860443115,
      "learning_rate": 3.4974928012756737e-06,
      "loss": 0.0148,
      "step": 1993680
    },
    {
      "epoch": 3.2627337771580813,
      "grad_norm": 0.40421995520591736,
      "learning_rate": 3.4974269090621564e-06,
      "loss": 0.0074,
      "step": 1993700
    },
    {
      "epoch": 3.262766507596735,
      "grad_norm": 0.03085877187550068,
      "learning_rate": 3.4973610168486396e-06,
      "loss": 0.0088,
      "step": 1993720
    },
    {
      "epoch": 3.262799238035388,
      "grad_norm": 0.2617277204990387,
      "learning_rate": 3.4972951246351223e-06,
      "loss": 0.0108,
      "step": 1993740
    },
    {
      "epoch": 3.2628319684740417,
      "grad_norm": 0.498859703540802,
      "learning_rate": 3.497229232421605e-06,
      "loss": 0.016,
      "step": 1993760
    },
    {
      "epoch": 3.262864698912695,
      "grad_norm": 0.30289825797080994,
      "learning_rate": 3.4971633402080878e-06,
      "loss": 0.0131,
      "step": 1993780
    },
    {
      "epoch": 3.262897429351348,
      "grad_norm": 0.21893885731697083,
      "learning_rate": 3.497097447994571e-06,
      "loss": 0.0182,
      "step": 1993800
    },
    {
      "epoch": 3.2629301597900016,
      "grad_norm": 0.12623703479766846,
      "learning_rate": 3.4970315557810537e-06,
      "loss": 0.01,
      "step": 1993820
    },
    {
      "epoch": 3.2629628902286547,
      "grad_norm": 0.1317894160747528,
      "learning_rate": 3.4969656635675364e-06,
      "loss": 0.0092,
      "step": 1993840
    },
    {
      "epoch": 3.2629956206673083,
      "grad_norm": 0.3916931450366974,
      "learning_rate": 3.496899771354019e-06,
      "loss": 0.0114,
      "step": 1993860
    },
    {
      "epoch": 3.2630283511059615,
      "grad_norm": 0.2460721880197525,
      "learning_rate": 3.496833879140502e-06,
      "loss": 0.0148,
      "step": 1993880
    },
    {
      "epoch": 3.263061081544615,
      "grad_norm": 0.4481997787952423,
      "learning_rate": 3.496767986926985e-06,
      "loss": 0.0181,
      "step": 1993900
    },
    {
      "epoch": 3.263093811983268,
      "grad_norm": 0.4455604553222656,
      "learning_rate": 3.496702094713468e-06,
      "loss": 0.0207,
      "step": 1993920
    },
    {
      "epoch": 3.2631265424219214,
      "grad_norm": 0.12926103174686432,
      "learning_rate": 3.4966362024999505e-06,
      "loss": 0.0148,
      "step": 1993940
    },
    {
      "epoch": 3.263159272860575,
      "grad_norm": 0.2846693694591522,
      "learning_rate": 3.496570310286434e-06,
      "loss": 0.0118,
      "step": 1993960
    },
    {
      "epoch": 3.263192003299228,
      "grad_norm": 0.11129182577133179,
      "learning_rate": 3.496504418072917e-06,
      "loss": 0.0176,
      "step": 1993980
    },
    {
      "epoch": 3.2632247337378817,
      "grad_norm": 0.12561070919036865,
      "learning_rate": 3.4964385258593996e-06,
      "loss": 0.0142,
      "step": 1994000
    },
    {
      "epoch": 3.263257464176535,
      "grad_norm": 1.402775764465332,
      "learning_rate": 3.4963726336458824e-06,
      "loss": 0.0103,
      "step": 1994020
    },
    {
      "epoch": 3.2632901946151884,
      "grad_norm": 0.502352774143219,
      "learning_rate": 3.4963067414323655e-06,
      "loss": 0.0096,
      "step": 1994040
    },
    {
      "epoch": 3.2633229250538416,
      "grad_norm": 0.7411956787109375,
      "learning_rate": 3.4962408492188483e-06,
      "loss": 0.0135,
      "step": 1994060
    },
    {
      "epoch": 3.2633556554924947,
      "grad_norm": 0.2951052188873291,
      "learning_rate": 3.496174957005331e-06,
      "loss": 0.0121,
      "step": 1994080
    },
    {
      "epoch": 3.2633883859311483,
      "grad_norm": 0.22819502651691437,
      "learning_rate": 3.4961090647918137e-06,
      "loss": 0.0126,
      "step": 1994100
    },
    {
      "epoch": 3.2634211163698015,
      "grad_norm": 0.8981527090072632,
      "learning_rate": 3.496043172578297e-06,
      "loss": 0.0103,
      "step": 1994120
    },
    {
      "epoch": 3.263453846808455,
      "grad_norm": 0.24862676858901978,
      "learning_rate": 3.4959772803647796e-06,
      "loss": 0.0161,
      "step": 1994140
    },
    {
      "epoch": 3.2634865772471082,
      "grad_norm": 0.15165159106254578,
      "learning_rate": 3.4959113881512624e-06,
      "loss": 0.0117,
      "step": 1994160
    },
    {
      "epoch": 3.263519307685762,
      "grad_norm": 0.29706868529319763,
      "learning_rate": 3.495845495937745e-06,
      "loss": 0.0072,
      "step": 1994180
    },
    {
      "epoch": 3.263552038124415,
      "grad_norm": 0.32737261056900024,
      "learning_rate": 3.4957796037242283e-06,
      "loss": 0.0184,
      "step": 1994200
    },
    {
      "epoch": 3.263584768563068,
      "grad_norm": 0.3981274664402008,
      "learning_rate": 3.495713711510711e-06,
      "loss": 0.0097,
      "step": 1994220
    },
    {
      "epoch": 3.2636174990017217,
      "grad_norm": 0.08809463679790497,
      "learning_rate": 3.4956478192971938e-06,
      "loss": 0.0142,
      "step": 1994240
    },
    {
      "epoch": 3.263650229440375,
      "grad_norm": 0.1976325660943985,
      "learning_rate": 3.4955819270836765e-06,
      "loss": 0.0101,
      "step": 1994260
    },
    {
      "epoch": 3.2636829598790285,
      "grad_norm": 0.3154209852218628,
      "learning_rate": 3.4955160348701592e-06,
      "loss": 0.0113,
      "step": 1994280
    },
    {
      "epoch": 3.2637156903176816,
      "grad_norm": 0.21132072806358337,
      "learning_rate": 3.4954501426566424e-06,
      "loss": 0.0088,
      "step": 1994300
    },
    {
      "epoch": 3.263748420756335,
      "grad_norm": 0.15351048111915588,
      "learning_rate": 3.4953842504431256e-06,
      "loss": 0.0082,
      "step": 1994320
    },
    {
      "epoch": 3.2637811511949884,
      "grad_norm": 0.3165706396102905,
      "learning_rate": 3.4953183582296087e-06,
      "loss": 0.0123,
      "step": 1994340
    },
    {
      "epoch": 3.2638138816336415,
      "grad_norm": 0.20365169644355774,
      "learning_rate": 3.4952524660160915e-06,
      "loss": 0.0104,
      "step": 1994360
    },
    {
      "epoch": 3.263846612072295,
      "grad_norm": 0.0717368870973587,
      "learning_rate": 3.495186573802574e-06,
      "loss": 0.0113,
      "step": 1994380
    },
    {
      "epoch": 3.2638793425109482,
      "grad_norm": 0.25976791977882385,
      "learning_rate": 3.495120681589057e-06,
      "loss": 0.0144,
      "step": 1994400
    },
    {
      "epoch": 3.263912072949602,
      "grad_norm": 0.16532227396965027,
      "learning_rate": 3.4950547893755397e-06,
      "loss": 0.0119,
      "step": 1994420
    },
    {
      "epoch": 3.263944803388255,
      "grad_norm": 0.47062596678733826,
      "learning_rate": 3.494988897162023e-06,
      "loss": 0.0115,
      "step": 1994440
    },
    {
      "epoch": 3.2639775338269086,
      "grad_norm": 0.1463196873664856,
      "learning_rate": 3.4949230049485056e-06,
      "loss": 0.0133,
      "step": 1994460
    },
    {
      "epoch": 3.2640102642655617,
      "grad_norm": 0.6850786209106445,
      "learning_rate": 3.4948571127349883e-06,
      "loss": 0.0136,
      "step": 1994480
    },
    {
      "epoch": 3.264042994704215,
      "grad_norm": 0.13045121729373932,
      "learning_rate": 3.494791220521471e-06,
      "loss": 0.0085,
      "step": 1994500
    },
    {
      "epoch": 3.2640757251428685,
      "grad_norm": 0.0668032169342041,
      "learning_rate": 3.4947253283079542e-06,
      "loss": 0.0095,
      "step": 1994520
    },
    {
      "epoch": 3.2641084555815216,
      "grad_norm": 0.05328715220093727,
      "learning_rate": 3.494659436094437e-06,
      "loss": 0.01,
      "step": 1994540
    },
    {
      "epoch": 3.2641411860201752,
      "grad_norm": 0.206572026014328,
      "learning_rate": 3.4945935438809197e-06,
      "loss": 0.0181,
      "step": 1994560
    },
    {
      "epoch": 3.2641739164588284,
      "grad_norm": 0.40061768889427185,
      "learning_rate": 3.4945276516674025e-06,
      "loss": 0.0116,
      "step": 1994580
    },
    {
      "epoch": 3.264206646897482,
      "grad_norm": 0.35961776971817017,
      "learning_rate": 3.4944617594538856e-06,
      "loss": 0.012,
      "step": 1994600
    },
    {
      "epoch": 3.264239377336135,
      "grad_norm": 0.35405340790748596,
      "learning_rate": 3.4943958672403684e-06,
      "loss": 0.0097,
      "step": 1994620
    },
    {
      "epoch": 3.2642721077747883,
      "grad_norm": 0.18875165283679962,
      "learning_rate": 3.494329975026851e-06,
      "loss": 0.0097,
      "step": 1994640
    },
    {
      "epoch": 3.264304838213442,
      "grad_norm": 0.336597204208374,
      "learning_rate": 3.494264082813334e-06,
      "loss": 0.0079,
      "step": 1994660
    },
    {
      "epoch": 3.264337568652095,
      "grad_norm": 0.39179620146751404,
      "learning_rate": 3.4941981905998174e-06,
      "loss": 0.0137,
      "step": 1994680
    },
    {
      "epoch": 3.2643702990907486,
      "grad_norm": 0.40833911299705505,
      "learning_rate": 3.4941322983863e-06,
      "loss": 0.0115,
      "step": 1994700
    },
    {
      "epoch": 3.2644030295294018,
      "grad_norm": 0.15559159219264984,
      "learning_rate": 3.494066406172783e-06,
      "loss": 0.01,
      "step": 1994720
    },
    {
      "epoch": 3.264435759968055,
      "grad_norm": 0.11220578849315643,
      "learning_rate": 3.494000513959266e-06,
      "loss": 0.0081,
      "step": 1994740
    },
    {
      "epoch": 3.2644684904067085,
      "grad_norm": 0.31144285202026367,
      "learning_rate": 3.493934621745749e-06,
      "loss": 0.0119,
      "step": 1994760
    },
    {
      "epoch": 3.2645012208453617,
      "grad_norm": 0.18487024307250977,
      "learning_rate": 3.4938687295322315e-06,
      "loss": 0.0092,
      "step": 1994780
    },
    {
      "epoch": 3.2645339512840152,
      "grad_norm": 0.0833015888929367,
      "learning_rate": 3.4938028373187143e-06,
      "loss": 0.0106,
      "step": 1994800
    },
    {
      "epoch": 3.2645666817226684,
      "grad_norm": 0.39512190222740173,
      "learning_rate": 3.493736945105197e-06,
      "loss": 0.0102,
      "step": 1994820
    },
    {
      "epoch": 3.2645994121613215,
      "grad_norm": 0.4818141460418701,
      "learning_rate": 3.49367105289168e-06,
      "loss": 0.0099,
      "step": 1994840
    },
    {
      "epoch": 3.264632142599975,
      "grad_norm": 0.22432346642017365,
      "learning_rate": 3.493605160678163e-06,
      "loss": 0.0079,
      "step": 1994860
    },
    {
      "epoch": 3.2646648730386283,
      "grad_norm": 0.07341806590557098,
      "learning_rate": 3.4935392684646457e-06,
      "loss": 0.0093,
      "step": 1994880
    },
    {
      "epoch": 3.264697603477282,
      "grad_norm": 0.19039838016033173,
      "learning_rate": 3.4934733762511284e-06,
      "loss": 0.0086,
      "step": 1994900
    },
    {
      "epoch": 3.264730333915935,
      "grad_norm": 0.2522384524345398,
      "learning_rate": 3.4934074840376116e-06,
      "loss": 0.0088,
      "step": 1994920
    },
    {
      "epoch": 3.2647630643545886,
      "grad_norm": 0.30132561922073364,
      "learning_rate": 3.4933415918240943e-06,
      "loss": 0.0091,
      "step": 1994940
    },
    {
      "epoch": 3.264795794793242,
      "grad_norm": 0.15947581827640533,
      "learning_rate": 3.493275699610577e-06,
      "loss": 0.0112,
      "step": 1994960
    },
    {
      "epoch": 3.264828525231895,
      "grad_norm": 0.5668554306030273,
      "learning_rate": 3.49320980739706e-06,
      "loss": 0.0141,
      "step": 1994980
    },
    {
      "epoch": 3.2648612556705485,
      "grad_norm": 0.24489977955818176,
      "learning_rate": 3.493143915183543e-06,
      "loss": 0.0145,
      "step": 1995000
    },
    {
      "epoch": 3.2648939861092017,
      "grad_norm": 0.4189222455024719,
      "learning_rate": 3.493078022970026e-06,
      "loss": 0.0104,
      "step": 1995020
    },
    {
      "epoch": 3.2649267165478553,
      "grad_norm": 0.4486076533794403,
      "learning_rate": 3.493012130756509e-06,
      "loss": 0.0117,
      "step": 1995040
    },
    {
      "epoch": 3.2649594469865084,
      "grad_norm": 0.47148311138153076,
      "learning_rate": 3.492946238542992e-06,
      "loss": 0.0081,
      "step": 1995060
    },
    {
      "epoch": 3.264992177425162,
      "grad_norm": 0.2398749440908432,
      "learning_rate": 3.4928803463294748e-06,
      "loss": 0.0108,
      "step": 1995080
    },
    {
      "epoch": 3.265024907863815,
      "grad_norm": 0.5967851877212524,
      "learning_rate": 3.4928144541159575e-06,
      "loss": 0.0139,
      "step": 1995100
    },
    {
      "epoch": 3.2650576383024683,
      "grad_norm": 0.13010072708129883,
      "learning_rate": 3.4927485619024402e-06,
      "loss": 0.013,
      "step": 1995120
    },
    {
      "epoch": 3.265090368741122,
      "grad_norm": 0.15940476953983307,
      "learning_rate": 3.4926826696889234e-06,
      "loss": 0.0155,
      "step": 1995140
    },
    {
      "epoch": 3.265123099179775,
      "grad_norm": 0.35422977805137634,
      "learning_rate": 3.492616777475406e-06,
      "loss": 0.011,
      "step": 1995160
    },
    {
      "epoch": 3.2651558296184287,
      "grad_norm": 0.0635736957192421,
      "learning_rate": 3.492550885261889e-06,
      "loss": 0.0144,
      "step": 1995180
    },
    {
      "epoch": 3.265188560057082,
      "grad_norm": 0.21119025349617004,
      "learning_rate": 3.4924849930483716e-06,
      "loss": 0.0111,
      "step": 1995200
    },
    {
      "epoch": 3.2652212904957354,
      "grad_norm": 0.7361020445823669,
      "learning_rate": 3.4924191008348548e-06,
      "loss": 0.0096,
      "step": 1995220
    },
    {
      "epoch": 3.2652540209343885,
      "grad_norm": 0.26965880393981934,
      "learning_rate": 3.4923532086213375e-06,
      "loss": 0.0078,
      "step": 1995240
    },
    {
      "epoch": 3.2652867513730417,
      "grad_norm": 0.476632684469223,
      "learning_rate": 3.4922873164078203e-06,
      "loss": 0.0144,
      "step": 1995260
    },
    {
      "epoch": 3.2653194818116953,
      "grad_norm": 0.20394998788833618,
      "learning_rate": 3.492221424194303e-06,
      "loss": 0.0077,
      "step": 1995280
    },
    {
      "epoch": 3.2653522122503484,
      "grad_norm": 0.6317522525787354,
      "learning_rate": 3.4921555319807857e-06,
      "loss": 0.0089,
      "step": 1995300
    },
    {
      "epoch": 3.265384942689002,
      "grad_norm": 0.40438616275787354,
      "learning_rate": 3.492089639767269e-06,
      "loss": 0.0135,
      "step": 1995320
    },
    {
      "epoch": 3.265417673127655,
      "grad_norm": 0.7573148012161255,
      "learning_rate": 3.4920237475537516e-06,
      "loss": 0.0093,
      "step": 1995340
    },
    {
      "epoch": 3.265450403566309,
      "grad_norm": 0.12488411366939545,
      "learning_rate": 3.4919578553402344e-06,
      "loss": 0.0102,
      "step": 1995360
    },
    {
      "epoch": 3.265483134004962,
      "grad_norm": 0.389436274766922,
      "learning_rate": 3.491891963126718e-06,
      "loss": 0.0108,
      "step": 1995380
    },
    {
      "epoch": 3.265515864443615,
      "grad_norm": 0.7480159401893616,
      "learning_rate": 3.4918260709132007e-06,
      "loss": 0.0108,
      "step": 1995400
    },
    {
      "epoch": 3.2655485948822687,
      "grad_norm": 0.15616337954998016,
      "learning_rate": 3.4917601786996835e-06,
      "loss": 0.009,
      "step": 1995420
    },
    {
      "epoch": 3.265581325320922,
      "grad_norm": 0.21707308292388916,
      "learning_rate": 3.491694286486166e-06,
      "loss": 0.0063,
      "step": 1995440
    },
    {
      "epoch": 3.2656140557595754,
      "grad_norm": 0.21261097490787506,
      "learning_rate": 3.4916283942726494e-06,
      "loss": 0.0107,
      "step": 1995460
    },
    {
      "epoch": 3.2656467861982286,
      "grad_norm": 0.08463844656944275,
      "learning_rate": 3.491562502059132e-06,
      "loss": 0.0085,
      "step": 1995480
    },
    {
      "epoch": 3.265679516636882,
      "grad_norm": 1.3475172519683838,
      "learning_rate": 3.491496609845615e-06,
      "loss": 0.0123,
      "step": 1995500
    },
    {
      "epoch": 3.2657122470755353,
      "grad_norm": 0.3643852174282074,
      "learning_rate": 3.4914307176320976e-06,
      "loss": 0.0119,
      "step": 1995520
    },
    {
      "epoch": 3.2657449775141885,
      "grad_norm": 0.2252967208623886,
      "learning_rate": 3.4913648254185807e-06,
      "loss": 0.01,
      "step": 1995540
    },
    {
      "epoch": 3.265777707952842,
      "grad_norm": 0.9944860935211182,
      "learning_rate": 3.4912989332050635e-06,
      "loss": 0.0111,
      "step": 1995560
    },
    {
      "epoch": 3.265810438391495,
      "grad_norm": 0.28568652272224426,
      "learning_rate": 3.4912330409915462e-06,
      "loss": 0.009,
      "step": 1995580
    },
    {
      "epoch": 3.265843168830149,
      "grad_norm": 0.2784157395362854,
      "learning_rate": 3.491167148778029e-06,
      "loss": 0.0106,
      "step": 1995600
    },
    {
      "epoch": 3.265875899268802,
      "grad_norm": 0.24782946705818176,
      "learning_rate": 3.491101256564512e-06,
      "loss": 0.0133,
      "step": 1995620
    },
    {
      "epoch": 3.2659086297074555,
      "grad_norm": 0.27992403507232666,
      "learning_rate": 3.491035364350995e-06,
      "loss": 0.009,
      "step": 1995640
    },
    {
      "epoch": 3.2659413601461087,
      "grad_norm": 0.46064841747283936,
      "learning_rate": 3.4909694721374776e-06,
      "loss": 0.0135,
      "step": 1995660
    },
    {
      "epoch": 3.265974090584762,
      "grad_norm": 0.5996190905570984,
      "learning_rate": 3.4909035799239603e-06,
      "loss": 0.0157,
      "step": 1995680
    },
    {
      "epoch": 3.2660068210234154,
      "grad_norm": 0.227098748087883,
      "learning_rate": 3.4908376877104435e-06,
      "loss": 0.0157,
      "step": 1995700
    },
    {
      "epoch": 3.2660395514620686,
      "grad_norm": 0.5608243346214294,
      "learning_rate": 3.4907717954969267e-06,
      "loss": 0.0131,
      "step": 1995720
    },
    {
      "epoch": 3.266072281900722,
      "grad_norm": 0.688372015953064,
      "learning_rate": 3.4907059032834094e-06,
      "loss": 0.0111,
      "step": 1995740
    },
    {
      "epoch": 3.2661050123393753,
      "grad_norm": 0.4826318919658661,
      "learning_rate": 3.4906400110698926e-06,
      "loss": 0.0114,
      "step": 1995760
    },
    {
      "epoch": 3.266137742778029,
      "grad_norm": 0.2844649851322174,
      "learning_rate": 3.4905741188563753e-06,
      "loss": 0.0113,
      "step": 1995780
    },
    {
      "epoch": 3.266170473216682,
      "grad_norm": 0.13412871956825256,
      "learning_rate": 3.490508226642858e-06,
      "loss": 0.0116,
      "step": 1995800
    },
    {
      "epoch": 3.2662032036553352,
      "grad_norm": 0.24962441623210907,
      "learning_rate": 3.4904423344293408e-06,
      "loss": 0.0145,
      "step": 1995820
    },
    {
      "epoch": 3.266235934093989,
      "grad_norm": 0.3961354196071625,
      "learning_rate": 3.4903764422158235e-06,
      "loss": 0.0191,
      "step": 1995840
    },
    {
      "epoch": 3.266268664532642,
      "grad_norm": 0.1299731582403183,
      "learning_rate": 3.4903105500023067e-06,
      "loss": 0.0116,
      "step": 1995860
    },
    {
      "epoch": 3.2663013949712956,
      "grad_norm": 0.22461439669132233,
      "learning_rate": 3.4902446577887894e-06,
      "loss": 0.0075,
      "step": 1995880
    },
    {
      "epoch": 3.2663341254099487,
      "grad_norm": 0.23054125905036926,
      "learning_rate": 3.490178765575272e-06,
      "loss": 0.0079,
      "step": 1995900
    },
    {
      "epoch": 3.2663668558486023,
      "grad_norm": 0.15346315503120422,
      "learning_rate": 3.490112873361755e-06,
      "loss": 0.0151,
      "step": 1995920
    },
    {
      "epoch": 3.2663995862872555,
      "grad_norm": 0.33185669779777527,
      "learning_rate": 3.490046981148238e-06,
      "loss": 0.0149,
      "step": 1995940
    },
    {
      "epoch": 3.2664323167259086,
      "grad_norm": 0.48143163323402405,
      "learning_rate": 3.489981088934721e-06,
      "loss": 0.0105,
      "step": 1995960
    },
    {
      "epoch": 3.266465047164562,
      "grad_norm": 0.15690070390701294,
      "learning_rate": 3.4899151967212036e-06,
      "loss": 0.0079,
      "step": 1995980
    },
    {
      "epoch": 3.2664977776032154,
      "grad_norm": 0.2801036238670349,
      "learning_rate": 3.4898493045076863e-06,
      "loss": 0.0089,
      "step": 1996000
    },
    {
      "epoch": 3.266530508041869,
      "grad_norm": 0.2554765045642853,
      "learning_rate": 3.4897834122941695e-06,
      "loss": 0.0067,
      "step": 1996020
    },
    {
      "epoch": 3.266563238480522,
      "grad_norm": 0.2593177258968353,
      "learning_rate": 3.489717520080652e-06,
      "loss": 0.0105,
      "step": 1996040
    },
    {
      "epoch": 3.2665959689191757,
      "grad_norm": 0.992538571357727,
      "learning_rate": 3.489651627867135e-06,
      "loss": 0.0096,
      "step": 1996060
    },
    {
      "epoch": 3.266628699357829,
      "grad_norm": 0.5316824316978455,
      "learning_rate": 3.4895857356536185e-06,
      "loss": 0.0121,
      "step": 1996080
    },
    {
      "epoch": 3.266661429796482,
      "grad_norm": 0.18133148550987244,
      "learning_rate": 3.4895198434401013e-06,
      "loss": 0.0104,
      "step": 1996100
    },
    {
      "epoch": 3.2666941602351356,
      "grad_norm": 0.12812075018882751,
      "learning_rate": 3.489453951226584e-06,
      "loss": 0.0117,
      "step": 1996120
    },
    {
      "epoch": 3.2667268906737887,
      "grad_norm": 0.1092187762260437,
      "learning_rate": 3.4893880590130667e-06,
      "loss": 0.0092,
      "step": 1996140
    },
    {
      "epoch": 3.2667596211124423,
      "grad_norm": 0.17504334449768066,
      "learning_rate": 3.48932216679955e-06,
      "loss": 0.0102,
      "step": 1996160
    },
    {
      "epoch": 3.2667923515510955,
      "grad_norm": 0.22186560928821564,
      "learning_rate": 3.4892562745860326e-06,
      "loss": 0.0108,
      "step": 1996180
    },
    {
      "epoch": 3.2668250819897486,
      "grad_norm": 0.5153251886367798,
      "learning_rate": 3.4891903823725154e-06,
      "loss": 0.0112,
      "step": 1996200
    },
    {
      "epoch": 3.2668578124284022,
      "grad_norm": 0.10698430985212326,
      "learning_rate": 3.489124490158998e-06,
      "loss": 0.0081,
      "step": 1996220
    },
    {
      "epoch": 3.2668905428670554,
      "grad_norm": 0.18790525197982788,
      "learning_rate": 3.489058597945481e-06,
      "loss": 0.0142,
      "step": 1996240
    },
    {
      "epoch": 3.266923273305709,
      "grad_norm": 0.5060283541679382,
      "learning_rate": 3.488992705731964e-06,
      "loss": 0.0075,
      "step": 1996260
    },
    {
      "epoch": 3.266956003744362,
      "grad_norm": 0.30179327726364136,
      "learning_rate": 3.4889268135184468e-06,
      "loss": 0.0104,
      "step": 1996280
    },
    {
      "epoch": 3.2669887341830153,
      "grad_norm": 0.3523651361465454,
      "learning_rate": 3.4888609213049295e-06,
      "loss": 0.0107,
      "step": 1996300
    },
    {
      "epoch": 3.267021464621669,
      "grad_norm": 0.24016711115837097,
      "learning_rate": 3.4887950290914122e-06,
      "loss": 0.0099,
      "step": 1996320
    },
    {
      "epoch": 3.267054195060322,
      "grad_norm": 0.2398461401462555,
      "learning_rate": 3.4887291368778954e-06,
      "loss": 0.0146,
      "step": 1996340
    },
    {
      "epoch": 3.2670869254989756,
      "grad_norm": 0.21386660635471344,
      "learning_rate": 3.488663244664378e-06,
      "loss": 0.0109,
      "step": 1996360
    },
    {
      "epoch": 3.2671196559376288,
      "grad_norm": 0.2419663965702057,
      "learning_rate": 3.488597352450861e-06,
      "loss": 0.0105,
      "step": 1996380
    },
    {
      "epoch": 3.2671523863762824,
      "grad_norm": 0.5317085385322571,
      "learning_rate": 3.4885314602373436e-06,
      "loss": 0.0152,
      "step": 1996400
    },
    {
      "epoch": 3.2671851168149355,
      "grad_norm": 0.3745638430118561,
      "learning_rate": 3.4884655680238272e-06,
      "loss": 0.0092,
      "step": 1996420
    },
    {
      "epoch": 3.2672178472535887,
      "grad_norm": 0.2438964992761612,
      "learning_rate": 3.48839967581031e-06,
      "loss": 0.0124,
      "step": 1996440
    },
    {
      "epoch": 3.2672505776922423,
      "grad_norm": 0.1387631595134735,
      "learning_rate": 3.4883337835967927e-06,
      "loss": 0.0122,
      "step": 1996460
    },
    {
      "epoch": 3.2672833081308954,
      "grad_norm": 0.32328447699546814,
      "learning_rate": 3.488267891383276e-06,
      "loss": 0.0164,
      "step": 1996480
    },
    {
      "epoch": 3.267316038569549,
      "grad_norm": 0.5268126726150513,
      "learning_rate": 3.4882019991697586e-06,
      "loss": 0.0215,
      "step": 1996500
    },
    {
      "epoch": 3.267348769008202,
      "grad_norm": 0.14949628710746765,
      "learning_rate": 3.4881361069562413e-06,
      "loss": 0.0091,
      "step": 1996520
    },
    {
      "epoch": 3.2673814994468557,
      "grad_norm": 0.3951023519039154,
      "learning_rate": 3.488070214742724e-06,
      "loss": 0.0152,
      "step": 1996540
    },
    {
      "epoch": 3.267414229885509,
      "grad_norm": 0.7207731008529663,
      "learning_rate": 3.4880043225292072e-06,
      "loss": 0.0122,
      "step": 1996560
    },
    {
      "epoch": 3.267446960324162,
      "grad_norm": 0.3618335425853729,
      "learning_rate": 3.48793843031569e-06,
      "loss": 0.0087,
      "step": 1996580
    },
    {
      "epoch": 3.2674796907628156,
      "grad_norm": 0.24721695482730865,
      "learning_rate": 3.4878725381021727e-06,
      "loss": 0.0109,
      "step": 1996600
    },
    {
      "epoch": 3.267512421201469,
      "grad_norm": 0.11240933835506439,
      "learning_rate": 3.4878066458886555e-06,
      "loss": 0.0152,
      "step": 1996620
    },
    {
      "epoch": 3.2675451516401224,
      "grad_norm": 0.2954389154911041,
      "learning_rate": 3.4877407536751386e-06,
      "loss": 0.0135,
      "step": 1996640
    },
    {
      "epoch": 3.2675778820787755,
      "grad_norm": 0.14207467436790466,
      "learning_rate": 3.4876748614616214e-06,
      "loss": 0.0118,
      "step": 1996660
    },
    {
      "epoch": 3.267610612517429,
      "grad_norm": 0.36397674679756165,
      "learning_rate": 3.487608969248104e-06,
      "loss": 0.0133,
      "step": 1996680
    },
    {
      "epoch": 3.2676433429560823,
      "grad_norm": 0.8362082242965698,
      "learning_rate": 3.487543077034587e-06,
      "loss": 0.0095,
      "step": 1996700
    },
    {
      "epoch": 3.2676760733947354,
      "grad_norm": 0.1331983059644699,
      "learning_rate": 3.4874771848210696e-06,
      "loss": 0.0081,
      "step": 1996720
    },
    {
      "epoch": 3.267708803833389,
      "grad_norm": 0.3047006130218506,
      "learning_rate": 3.4874112926075527e-06,
      "loss": 0.0138,
      "step": 1996740
    },
    {
      "epoch": 3.267741534272042,
      "grad_norm": 0.23999962210655212,
      "learning_rate": 3.4873454003940355e-06,
      "loss": 0.0124,
      "step": 1996760
    },
    {
      "epoch": 3.2677742647106958,
      "grad_norm": 0.07864422351121902,
      "learning_rate": 3.4872795081805186e-06,
      "loss": 0.0156,
      "step": 1996780
    },
    {
      "epoch": 3.267806995149349,
      "grad_norm": 0.6359258890151978,
      "learning_rate": 3.487213615967002e-06,
      "loss": 0.0113,
      "step": 1996800
    },
    {
      "epoch": 3.2678397255880025,
      "grad_norm": 0.19320468604564667,
      "learning_rate": 3.4871477237534846e-06,
      "loss": 0.0146,
      "step": 1996820
    },
    {
      "epoch": 3.2678724560266557,
      "grad_norm": 0.7528203129768372,
      "learning_rate": 3.4870818315399673e-06,
      "loss": 0.0098,
      "step": 1996840
    },
    {
      "epoch": 3.267905186465309,
      "grad_norm": 0.5511499047279358,
      "learning_rate": 3.48701593932645e-06,
      "loss": 0.0088,
      "step": 1996860
    },
    {
      "epoch": 3.2679379169039624,
      "grad_norm": 0.1122557520866394,
      "learning_rate": 3.486950047112933e-06,
      "loss": 0.0093,
      "step": 1996880
    },
    {
      "epoch": 3.2679706473426156,
      "grad_norm": 0.1776888370513916,
      "learning_rate": 3.486884154899416e-06,
      "loss": 0.0088,
      "step": 1996900
    },
    {
      "epoch": 3.268003377781269,
      "grad_norm": 0.37684905529022217,
      "learning_rate": 3.4868182626858987e-06,
      "loss": 0.0122,
      "step": 1996920
    },
    {
      "epoch": 3.2680361082199223,
      "grad_norm": 0.5293900370597839,
      "learning_rate": 3.4867523704723814e-06,
      "loss": 0.0098,
      "step": 1996940
    },
    {
      "epoch": 3.268068838658576,
      "grad_norm": 0.21767336130142212,
      "learning_rate": 3.4866864782588646e-06,
      "loss": 0.0105,
      "step": 1996960
    },
    {
      "epoch": 3.268101569097229,
      "grad_norm": 0.5509540438652039,
      "learning_rate": 3.4866205860453473e-06,
      "loss": 0.0138,
      "step": 1996980
    },
    {
      "epoch": 3.268134299535882,
      "grad_norm": 0.6338590383529663,
      "learning_rate": 3.48655469383183e-06,
      "loss": 0.0087,
      "step": 1997000
    },
    {
      "epoch": 3.268167029974536,
      "grad_norm": 0.9577692747116089,
      "learning_rate": 3.486488801618313e-06,
      "loss": 0.0143,
      "step": 1997020
    },
    {
      "epoch": 3.268199760413189,
      "grad_norm": 0.48225781321525574,
      "learning_rate": 3.486422909404796e-06,
      "loss": 0.0161,
      "step": 1997040
    },
    {
      "epoch": 3.2682324908518425,
      "grad_norm": 0.18438778817653656,
      "learning_rate": 3.4863570171912787e-06,
      "loss": 0.0089,
      "step": 1997060
    },
    {
      "epoch": 3.2682652212904957,
      "grad_norm": 0.5911172032356262,
      "learning_rate": 3.4862911249777614e-06,
      "loss": 0.0145,
      "step": 1997080
    },
    {
      "epoch": 3.2682979517291493,
      "grad_norm": 0.35765019059181213,
      "learning_rate": 3.486225232764244e-06,
      "loss": 0.007,
      "step": 1997100
    },
    {
      "epoch": 3.2683306821678024,
      "grad_norm": 0.5356588363647461,
      "learning_rate": 3.4861593405507273e-06,
      "loss": 0.013,
      "step": 1997120
    },
    {
      "epoch": 3.2683634126064556,
      "grad_norm": 0.22840341925621033,
      "learning_rate": 3.4860934483372105e-06,
      "loss": 0.0083,
      "step": 1997140
    },
    {
      "epoch": 3.268396143045109,
      "grad_norm": 0.27181193232536316,
      "learning_rate": 3.4860275561236932e-06,
      "loss": 0.021,
      "step": 1997160
    },
    {
      "epoch": 3.2684288734837623,
      "grad_norm": 0.2787111699581146,
      "learning_rate": 3.4859616639101764e-06,
      "loss": 0.0162,
      "step": 1997180
    },
    {
      "epoch": 3.268461603922416,
      "grad_norm": 0.3592417240142822,
      "learning_rate": 3.485895771696659e-06,
      "loss": 0.0126,
      "step": 1997200
    },
    {
      "epoch": 3.268494334361069,
      "grad_norm": 0.11668600887060165,
      "learning_rate": 3.485829879483142e-06,
      "loss": 0.0111,
      "step": 1997220
    },
    {
      "epoch": 3.2685270647997227,
      "grad_norm": 0.22729259729385376,
      "learning_rate": 3.4857639872696246e-06,
      "loss": 0.018,
      "step": 1997240
    },
    {
      "epoch": 3.268559795238376,
      "grad_norm": 0.26824644207954407,
      "learning_rate": 3.4856980950561074e-06,
      "loss": 0.0109,
      "step": 1997260
    },
    {
      "epoch": 3.268592525677029,
      "grad_norm": 0.14240901172161102,
      "learning_rate": 3.4856322028425905e-06,
      "loss": 0.0102,
      "step": 1997280
    },
    {
      "epoch": 3.2686252561156826,
      "grad_norm": 0.8209959864616394,
      "learning_rate": 3.4855663106290733e-06,
      "loss": 0.0144,
      "step": 1997300
    },
    {
      "epoch": 3.2686579865543357,
      "grad_norm": 0.2210605889558792,
      "learning_rate": 3.485500418415556e-06,
      "loss": 0.0171,
      "step": 1997320
    },
    {
      "epoch": 3.2686907169929893,
      "grad_norm": 0.8976080417633057,
      "learning_rate": 3.4854345262020387e-06,
      "loss": 0.0123,
      "step": 1997340
    },
    {
      "epoch": 3.2687234474316424,
      "grad_norm": 0.1010187417268753,
      "learning_rate": 3.485368633988522e-06,
      "loss": 0.0075,
      "step": 1997360
    },
    {
      "epoch": 3.268756177870296,
      "grad_norm": 0.1489085555076599,
      "learning_rate": 3.4853027417750047e-06,
      "loss": 0.0122,
      "step": 1997380
    },
    {
      "epoch": 3.268788908308949,
      "grad_norm": 0.17571425437927246,
      "learning_rate": 3.4852368495614874e-06,
      "loss": 0.0074,
      "step": 1997400
    },
    {
      "epoch": 3.2688216387476023,
      "grad_norm": 0.27732518315315247,
      "learning_rate": 3.48517095734797e-06,
      "loss": 0.0087,
      "step": 1997420
    },
    {
      "epoch": 3.268854369186256,
      "grad_norm": 0.15732043981552124,
      "learning_rate": 3.4851050651344533e-06,
      "loss": 0.0089,
      "step": 1997440
    },
    {
      "epoch": 3.268887099624909,
      "grad_norm": 0.37086614966392517,
      "learning_rate": 3.485039172920936e-06,
      "loss": 0.0124,
      "step": 1997460
    },
    {
      "epoch": 3.2689198300635627,
      "grad_norm": 0.1748111993074417,
      "learning_rate": 3.484973280707419e-06,
      "loss": 0.0083,
      "step": 1997480
    },
    {
      "epoch": 3.268952560502216,
      "grad_norm": 0.22928869724273682,
      "learning_rate": 3.4849073884939024e-06,
      "loss": 0.0085,
      "step": 1997500
    },
    {
      "epoch": 3.2689852909408694,
      "grad_norm": 0.6493290066719055,
      "learning_rate": 3.484841496280385e-06,
      "loss": 0.0121,
      "step": 1997520
    },
    {
      "epoch": 3.2690180213795226,
      "grad_norm": 0.598823606967926,
      "learning_rate": 3.484775604066868e-06,
      "loss": 0.0104,
      "step": 1997540
    },
    {
      "epoch": 3.2690507518181757,
      "grad_norm": 0.03532707691192627,
      "learning_rate": 3.4847097118533506e-06,
      "loss": 0.0071,
      "step": 1997560
    },
    {
      "epoch": 3.2690834822568293,
      "grad_norm": 0.6924671530723572,
      "learning_rate": 3.4846438196398337e-06,
      "loss": 0.0134,
      "step": 1997580
    },
    {
      "epoch": 3.2691162126954825,
      "grad_norm": 0.35446691513061523,
      "learning_rate": 3.4845779274263165e-06,
      "loss": 0.0125,
      "step": 1997600
    },
    {
      "epoch": 3.269148943134136,
      "grad_norm": 0.5992643237113953,
      "learning_rate": 3.4845120352127992e-06,
      "loss": 0.0133,
      "step": 1997620
    },
    {
      "epoch": 3.269181673572789,
      "grad_norm": 0.1455732136964798,
      "learning_rate": 3.484446142999282e-06,
      "loss": 0.0143,
      "step": 1997640
    },
    {
      "epoch": 3.269214404011443,
      "grad_norm": 0.2761631906032562,
      "learning_rate": 3.484380250785765e-06,
      "loss": 0.0105,
      "step": 1997660
    },
    {
      "epoch": 3.269247134450096,
      "grad_norm": 0.41943246126174927,
      "learning_rate": 3.484314358572248e-06,
      "loss": 0.0145,
      "step": 1997680
    },
    {
      "epoch": 3.269279864888749,
      "grad_norm": 0.3346325755119324,
      "learning_rate": 3.4842484663587306e-06,
      "loss": 0.0128,
      "step": 1997700
    },
    {
      "epoch": 3.2693125953274027,
      "grad_norm": 0.041612908244132996,
      "learning_rate": 3.4841825741452133e-06,
      "loss": 0.0139,
      "step": 1997720
    },
    {
      "epoch": 3.269345325766056,
      "grad_norm": 0.36464643478393555,
      "learning_rate": 3.484116681931696e-06,
      "loss": 0.0086,
      "step": 1997740
    },
    {
      "epoch": 3.2693780562047094,
      "grad_norm": 0.05839018523693085,
      "learning_rate": 3.4840507897181792e-06,
      "loss": 0.0106,
      "step": 1997760
    },
    {
      "epoch": 3.2694107866433626,
      "grad_norm": 0.2096119374036789,
      "learning_rate": 3.483984897504662e-06,
      "loss": 0.0103,
      "step": 1997780
    },
    {
      "epoch": 3.2694435170820157,
      "grad_norm": 0.9441692233085632,
      "learning_rate": 3.4839190052911447e-06,
      "loss": 0.0094,
      "step": 1997800
    },
    {
      "epoch": 3.2694762475206693,
      "grad_norm": 0.0534462109208107,
      "learning_rate": 3.4838531130776275e-06,
      "loss": 0.0135,
      "step": 1997820
    },
    {
      "epoch": 3.2695089779593225,
      "grad_norm": 0.22267721593379974,
      "learning_rate": 3.483787220864111e-06,
      "loss": 0.0158,
      "step": 1997840
    },
    {
      "epoch": 3.269541708397976,
      "grad_norm": 0.3054022490978241,
      "learning_rate": 3.483721328650594e-06,
      "loss": 0.0077,
      "step": 1997860
    },
    {
      "epoch": 3.2695744388366292,
      "grad_norm": 0.16955851018428802,
      "learning_rate": 3.4836554364370765e-06,
      "loss": 0.0093,
      "step": 1997880
    },
    {
      "epoch": 3.2696071692752824,
      "grad_norm": 0.1595773994922638,
      "learning_rate": 3.4835895442235597e-06,
      "loss": 0.0146,
      "step": 1997900
    },
    {
      "epoch": 3.269639899713936,
      "grad_norm": 0.2888515591621399,
      "learning_rate": 3.4835236520100424e-06,
      "loss": 0.0146,
      "step": 1997920
    },
    {
      "epoch": 3.269672630152589,
      "grad_norm": 0.27488693594932556,
      "learning_rate": 3.483457759796525e-06,
      "loss": 0.0096,
      "step": 1997940
    },
    {
      "epoch": 3.2697053605912427,
      "grad_norm": 0.3613816797733307,
      "learning_rate": 3.483391867583008e-06,
      "loss": 0.0135,
      "step": 1997960
    },
    {
      "epoch": 3.269738091029896,
      "grad_norm": Infinity,
      "learning_rate": 3.483325975369491e-06,
      "loss": 0.0187,
      "step": 1997980
    },
    {
      "epoch": 3.2697708214685495,
      "grad_norm": 0.20134837925434113,
      "learning_rate": 3.483260083155974e-06,
      "loss": 0.0116,
      "step": 1998000
    },
    {
      "epoch": 3.2698035519072026,
      "grad_norm": 0.2207144796848297,
      "learning_rate": 3.4831941909424566e-06,
      "loss": 0.008,
      "step": 1998020
    },
    {
      "epoch": 3.2698362823458558,
      "grad_norm": 0.3559960126876831,
      "learning_rate": 3.4831282987289393e-06,
      "loss": 0.0116,
      "step": 1998040
    },
    {
      "epoch": 3.2698690127845094,
      "grad_norm": 0.11056714504957199,
      "learning_rate": 3.4830624065154225e-06,
      "loss": 0.0083,
      "step": 1998060
    },
    {
      "epoch": 3.2699017432231625,
      "grad_norm": 0.21228834986686707,
      "learning_rate": 3.482996514301905e-06,
      "loss": 0.0148,
      "step": 1998080
    },
    {
      "epoch": 3.269934473661816,
      "grad_norm": 0.45125746726989746,
      "learning_rate": 3.482930622088388e-06,
      "loss": 0.0122,
      "step": 1998100
    },
    {
      "epoch": 3.2699672041004693,
      "grad_norm": 1.0684998035430908,
      "learning_rate": 3.4828647298748707e-06,
      "loss": 0.01,
      "step": 1998120
    },
    {
      "epoch": 3.269999934539123,
      "grad_norm": 0.08956132084131241,
      "learning_rate": 3.4827988376613534e-06,
      "loss": 0.0189,
      "step": 1998140
    },
    {
      "epoch": 3.270032664977776,
      "grad_norm": 0.09437151998281479,
      "learning_rate": 3.4827329454478366e-06,
      "loss": 0.0096,
      "step": 1998160
    },
    {
      "epoch": 3.270065395416429,
      "grad_norm": 0.32629308104515076,
      "learning_rate": 3.4826670532343197e-06,
      "loss": 0.0117,
      "step": 1998180
    },
    {
      "epoch": 3.2700981258550827,
      "grad_norm": 0.5339813232421875,
      "learning_rate": 3.482601161020803e-06,
      "loss": 0.0132,
      "step": 1998200
    },
    {
      "epoch": 3.270130856293736,
      "grad_norm": 0.10458187758922577,
      "learning_rate": 3.4825352688072857e-06,
      "loss": 0.0087,
      "step": 1998220
    },
    {
      "epoch": 3.2701635867323895,
      "grad_norm": 0.19159045815467834,
      "learning_rate": 3.4824693765937684e-06,
      "loss": 0.0089,
      "step": 1998240
    },
    {
      "epoch": 3.2701963171710426,
      "grad_norm": 0.28468289971351624,
      "learning_rate": 3.482403484380251e-06,
      "loss": 0.0213,
      "step": 1998260
    },
    {
      "epoch": 3.2702290476096962,
      "grad_norm": 0.22574375569820404,
      "learning_rate": 3.482337592166734e-06,
      "loss": 0.0106,
      "step": 1998280
    },
    {
      "epoch": 3.2702617780483494,
      "grad_norm": 0.760935366153717,
      "learning_rate": 3.482271699953217e-06,
      "loss": 0.012,
      "step": 1998300
    },
    {
      "epoch": 3.2702945084870025,
      "grad_norm": 0.9896336197853088,
      "learning_rate": 3.4822058077396998e-06,
      "loss": 0.0115,
      "step": 1998320
    },
    {
      "epoch": 3.270327238925656,
      "grad_norm": 0.41967257857322693,
      "learning_rate": 3.4821399155261825e-06,
      "loss": 0.0125,
      "step": 1998340
    },
    {
      "epoch": 3.2703599693643093,
      "grad_norm": 0.41424912214279175,
      "learning_rate": 3.4820740233126653e-06,
      "loss": 0.012,
      "step": 1998360
    },
    {
      "epoch": 3.270392699802963,
      "grad_norm": 0.26932820677757263,
      "learning_rate": 3.4820081310991484e-06,
      "loss": 0.0108,
      "step": 1998380
    },
    {
      "epoch": 3.270425430241616,
      "grad_norm": 0.2094302624464035,
      "learning_rate": 3.481942238885631e-06,
      "loss": 0.0118,
      "step": 1998400
    },
    {
      "epoch": 3.2704581606802696,
      "grad_norm": 0.563930094242096,
      "learning_rate": 3.481876346672114e-06,
      "loss": 0.0119,
      "step": 1998420
    },
    {
      "epoch": 3.2704908911189228,
      "grad_norm": 0.27312347292900085,
      "learning_rate": 3.4818104544585966e-06,
      "loss": 0.0101,
      "step": 1998440
    },
    {
      "epoch": 3.270523621557576,
      "grad_norm": 0.13253049552440643,
      "learning_rate": 3.48174456224508e-06,
      "loss": 0.0098,
      "step": 1998460
    },
    {
      "epoch": 3.2705563519962295,
      "grad_norm": 0.43311503529548645,
      "learning_rate": 3.4816786700315625e-06,
      "loss": 0.008,
      "step": 1998480
    },
    {
      "epoch": 3.2705890824348827,
      "grad_norm": 0.10304491221904755,
      "learning_rate": 3.4816127778180453e-06,
      "loss": 0.0172,
      "step": 1998500
    },
    {
      "epoch": 3.2706218128735363,
      "grad_norm": 0.4041067361831665,
      "learning_rate": 3.481546885604528e-06,
      "loss": 0.0103,
      "step": 1998520
    },
    {
      "epoch": 3.2706545433121894,
      "grad_norm": 0.10791575163602829,
      "learning_rate": 3.4814809933910116e-06,
      "loss": 0.014,
      "step": 1998540
    },
    {
      "epoch": 3.270687273750843,
      "grad_norm": 0.12280015647411346,
      "learning_rate": 3.4814151011774943e-06,
      "loss": 0.0171,
      "step": 1998560
    },
    {
      "epoch": 3.270720004189496,
      "grad_norm": 0.14107678830623627,
      "learning_rate": 3.481349208963977e-06,
      "loss": 0.0077,
      "step": 1998580
    },
    {
      "epoch": 3.2707527346281493,
      "grad_norm": 0.3316265642642975,
      "learning_rate": 3.4812833167504602e-06,
      "loss": 0.0083,
      "step": 1998600
    },
    {
      "epoch": 3.270785465066803,
      "grad_norm": 0.22309258580207825,
      "learning_rate": 3.481217424536943e-06,
      "loss": 0.0096,
      "step": 1998620
    },
    {
      "epoch": 3.270818195505456,
      "grad_norm": 0.4178121089935303,
      "learning_rate": 3.4811515323234257e-06,
      "loss": 0.0163,
      "step": 1998640
    },
    {
      "epoch": 3.2708509259441096,
      "grad_norm": 0.21702362596988678,
      "learning_rate": 3.4810856401099085e-06,
      "loss": 0.0092,
      "step": 1998660
    },
    {
      "epoch": 3.270883656382763,
      "grad_norm": 1.2792483568191528,
      "learning_rate": 3.481019747896391e-06,
      "loss": 0.0085,
      "step": 1998680
    },
    {
      "epoch": 3.2709163868214164,
      "grad_norm": 0.25682196021080017,
      "learning_rate": 3.4809538556828744e-06,
      "loss": 0.0115,
      "step": 1998700
    },
    {
      "epoch": 3.2709491172600695,
      "grad_norm": 0.32910263538360596,
      "learning_rate": 3.480887963469357e-06,
      "loss": 0.0082,
      "step": 1998720
    },
    {
      "epoch": 3.2709818476987227,
      "grad_norm": 0.21039944887161255,
      "learning_rate": 3.48082207125584e-06,
      "loss": 0.011,
      "step": 1998740
    },
    {
      "epoch": 3.2710145781373763,
      "grad_norm": 0.4010433554649353,
      "learning_rate": 3.4807561790423226e-06,
      "loss": 0.0118,
      "step": 1998760
    },
    {
      "epoch": 3.2710473085760294,
      "grad_norm": 0.5937690734863281,
      "learning_rate": 3.4806902868288058e-06,
      "loss": 0.0108,
      "step": 1998780
    },
    {
      "epoch": 3.271080039014683,
      "grad_norm": 0.37692323327064514,
      "learning_rate": 3.4806243946152885e-06,
      "loss": 0.01,
      "step": 1998800
    },
    {
      "epoch": 3.271112769453336,
      "grad_norm": 0.5900072455406189,
      "learning_rate": 3.4805585024017712e-06,
      "loss": 0.0121,
      "step": 1998820
    },
    {
      "epoch": 3.2711454998919898,
      "grad_norm": 0.28191155195236206,
      "learning_rate": 3.480492610188254e-06,
      "loss": 0.0088,
      "step": 1998840
    },
    {
      "epoch": 3.271178230330643,
      "grad_norm": 0.2534998953342438,
      "learning_rate": 3.480426717974737e-06,
      "loss": 0.0097,
      "step": 1998860
    },
    {
      "epoch": 3.271210960769296,
      "grad_norm": 1.213559865951538,
      "learning_rate": 3.48036082576122e-06,
      "loss": 0.0144,
      "step": 1998880
    },
    {
      "epoch": 3.2712436912079497,
      "grad_norm": 0.8830049633979797,
      "learning_rate": 3.480294933547703e-06,
      "loss": 0.0173,
      "step": 1998900
    },
    {
      "epoch": 3.271276421646603,
      "grad_norm": 0.2675941586494446,
      "learning_rate": 3.480229041334186e-06,
      "loss": 0.0142,
      "step": 1998920
    },
    {
      "epoch": 3.2713091520852564,
      "grad_norm": 0.3269471526145935,
      "learning_rate": 3.480163149120669e-06,
      "loss": 0.0123,
      "step": 1998940
    },
    {
      "epoch": 3.2713418825239096,
      "grad_norm": 0.4682636559009552,
      "learning_rate": 3.4800972569071517e-06,
      "loss": 0.0137,
      "step": 1998960
    },
    {
      "epoch": 3.271374612962563,
      "grad_norm": 0.22786906361579895,
      "learning_rate": 3.4800313646936344e-06,
      "loss": 0.0071,
      "step": 1998980
    },
    {
      "epoch": 3.2714073434012163,
      "grad_norm": 0.1757546365261078,
      "learning_rate": 3.4799654724801176e-06,
      "loss": 0.0203,
      "step": 1999000
    },
    {
      "epoch": 3.2714400738398695,
      "grad_norm": 0.32996705174446106,
      "learning_rate": 3.4798995802666003e-06,
      "loss": 0.0144,
      "step": 1999020
    },
    {
      "epoch": 3.271472804278523,
      "grad_norm": 0.3010959029197693,
      "learning_rate": 3.479833688053083e-06,
      "loss": 0.0112,
      "step": 1999040
    },
    {
      "epoch": 3.271505534717176,
      "grad_norm": 0.10369040071964264,
      "learning_rate": 3.479767795839566e-06,
      "loss": 0.0178,
      "step": 1999060
    },
    {
      "epoch": 3.27153826515583,
      "grad_norm": 0.13031592965126038,
      "learning_rate": 3.479701903626049e-06,
      "loss": 0.0096,
      "step": 1999080
    },
    {
      "epoch": 3.271570995594483,
      "grad_norm": 0.1670091301202774,
      "learning_rate": 3.4796360114125317e-06,
      "loss": 0.0137,
      "step": 1999100
    },
    {
      "epoch": 3.2716037260331365,
      "grad_norm": 0.11642012745141983,
      "learning_rate": 3.4795701191990144e-06,
      "loss": 0.008,
      "step": 1999120
    },
    {
      "epoch": 3.2716364564717897,
      "grad_norm": 0.057415641844272614,
      "learning_rate": 3.479504226985497e-06,
      "loss": 0.0142,
      "step": 1999140
    },
    {
      "epoch": 3.271669186910443,
      "grad_norm": 0.1782812774181366,
      "learning_rate": 3.47943833477198e-06,
      "loss": 0.0104,
      "step": 1999160
    },
    {
      "epoch": 3.2717019173490964,
      "grad_norm": 0.1433190256357193,
      "learning_rate": 3.479372442558463e-06,
      "loss": 0.008,
      "step": 1999180
    },
    {
      "epoch": 3.2717346477877496,
      "grad_norm": 0.11533056199550629,
      "learning_rate": 3.479306550344946e-06,
      "loss": 0.0136,
      "step": 1999200
    },
    {
      "epoch": 3.271767378226403,
      "grad_norm": 0.3095397353172302,
      "learning_rate": 3.4792406581314286e-06,
      "loss": 0.0135,
      "step": 1999220
    },
    {
      "epoch": 3.2718001086650563,
      "grad_norm": 0.10346661508083344,
      "learning_rate": 3.479174765917912e-06,
      "loss": 0.0149,
      "step": 1999240
    },
    {
      "epoch": 3.2718328391037095,
      "grad_norm": 0.974065363407135,
      "learning_rate": 3.479108873704395e-06,
      "loss": 0.0108,
      "step": 1999260
    },
    {
      "epoch": 3.271865569542363,
      "grad_norm": 0.2198619395494461,
      "learning_rate": 3.4790429814908776e-06,
      "loss": 0.0089,
      "step": 1999280
    },
    {
      "epoch": 3.2718982999810162,
      "grad_norm": 0.09294233471155167,
      "learning_rate": 3.4789770892773604e-06,
      "loss": 0.0128,
      "step": 1999300
    },
    {
      "epoch": 3.27193103041967,
      "grad_norm": 0.13924828171730042,
      "learning_rate": 3.4789111970638435e-06,
      "loss": 0.0117,
      "step": 1999320
    },
    {
      "epoch": 3.271963760858323,
      "grad_norm": 0.22225837409496307,
      "learning_rate": 3.4788453048503263e-06,
      "loss": 0.0113,
      "step": 1999340
    },
    {
      "epoch": 3.271996491296976,
      "grad_norm": 0.2383303940296173,
      "learning_rate": 3.478779412636809e-06,
      "loss": 0.0112,
      "step": 1999360
    },
    {
      "epoch": 3.2720292217356297,
      "grad_norm": 0.2666458785533905,
      "learning_rate": 3.4787135204232918e-06,
      "loss": 0.0104,
      "step": 1999380
    },
    {
      "epoch": 3.272061952174283,
      "grad_norm": 0.18862204253673553,
      "learning_rate": 3.478647628209775e-06,
      "loss": 0.007,
      "step": 1999400
    },
    {
      "epoch": 3.2720946826129365,
      "grad_norm": 0.2836971879005432,
      "learning_rate": 3.4785817359962577e-06,
      "loss": 0.0117,
      "step": 1999420
    },
    {
      "epoch": 3.2721274130515896,
      "grad_norm": 0.08935502916574478,
      "learning_rate": 3.4785158437827404e-06,
      "loss": 0.0127,
      "step": 1999440
    },
    {
      "epoch": 3.272160143490243,
      "grad_norm": 0.42036283016204834,
      "learning_rate": 3.478449951569223e-06,
      "loss": 0.0097,
      "step": 1999460
    },
    {
      "epoch": 3.2721928739288964,
      "grad_norm": 0.2675909399986267,
      "learning_rate": 3.4783840593557063e-06,
      "loss": 0.0079,
      "step": 1999480
    },
    {
      "epoch": 3.2722256043675495,
      "grad_norm": 0.1262165755033493,
      "learning_rate": 3.478318167142189e-06,
      "loss": 0.0136,
      "step": 1999500
    },
    {
      "epoch": 3.272258334806203,
      "grad_norm": 0.1427639275789261,
      "learning_rate": 3.4782522749286718e-06,
      "loss": 0.011,
      "step": 1999520
    },
    {
      "epoch": 3.2722910652448562,
      "grad_norm": 1.418857216835022,
      "learning_rate": 3.4781863827151545e-06,
      "loss": 0.014,
      "step": 1999540
    },
    {
      "epoch": 3.27232379568351,
      "grad_norm": 0.30610400438308716,
      "learning_rate": 3.4781204905016373e-06,
      "loss": 0.0131,
      "step": 1999560
    },
    {
      "epoch": 3.272356526122163,
      "grad_norm": 0.1210324838757515,
      "learning_rate": 3.4780545982881204e-06,
      "loss": 0.0114,
      "step": 1999580
    },
    {
      "epoch": 3.2723892565608166,
      "grad_norm": 0.4308500587940216,
      "learning_rate": 3.4779887060746036e-06,
      "loss": 0.0119,
      "step": 1999600
    },
    {
      "epoch": 3.2724219869994697,
      "grad_norm": 0.1173219084739685,
      "learning_rate": 3.4779228138610867e-06,
      "loss": 0.0111,
      "step": 1999620
    },
    {
      "epoch": 3.272454717438123,
      "grad_norm": 0.33791735768318176,
      "learning_rate": 3.4778569216475695e-06,
      "loss": 0.0177,
      "step": 1999640
    },
    {
      "epoch": 3.2724874478767765,
      "grad_norm": 0.32095274329185486,
      "learning_rate": 3.4777910294340522e-06,
      "loss": 0.0219,
      "step": 1999660
    },
    {
      "epoch": 3.2725201783154296,
      "grad_norm": 0.24710406363010406,
      "learning_rate": 3.477725137220535e-06,
      "loss": 0.0074,
      "step": 1999680
    },
    {
      "epoch": 3.2725529087540832,
      "grad_norm": 0.19443990290164948,
      "learning_rate": 3.4776592450070177e-06,
      "loss": 0.0083,
      "step": 1999700
    },
    {
      "epoch": 3.2725856391927364,
      "grad_norm": 0.28493914008140564,
      "learning_rate": 3.477593352793501e-06,
      "loss": 0.0117,
      "step": 1999720
    },
    {
      "epoch": 3.27261836963139,
      "grad_norm": 0.17241615056991577,
      "learning_rate": 3.4775274605799836e-06,
      "loss": 0.0103,
      "step": 1999740
    },
    {
      "epoch": 3.272651100070043,
      "grad_norm": 0.23376445472240448,
      "learning_rate": 3.4774615683664664e-06,
      "loss": 0.0108,
      "step": 1999760
    },
    {
      "epoch": 3.2726838305086963,
      "grad_norm": 0.11385773867368698,
      "learning_rate": 3.477395676152949e-06,
      "loss": 0.0077,
      "step": 1999780
    },
    {
      "epoch": 3.27271656094735,
      "grad_norm": 0.03194497525691986,
      "learning_rate": 3.4773297839394323e-06,
      "loss": 0.0083,
      "step": 1999800
    },
    {
      "epoch": 3.272749291386003,
      "grad_norm": 0.187343567609787,
      "learning_rate": 3.477263891725915e-06,
      "loss": 0.014,
      "step": 1999820
    },
    {
      "epoch": 3.2727820218246566,
      "grad_norm": 0.38210931420326233,
      "learning_rate": 3.4771979995123977e-06,
      "loss": 0.0138,
      "step": 1999840
    },
    {
      "epoch": 3.2728147522633098,
      "grad_norm": 0.1739175170660019,
      "learning_rate": 3.4771321072988805e-06,
      "loss": 0.0112,
      "step": 1999860
    },
    {
      "epoch": 3.2728474827019634,
      "grad_norm": 0.9373255968093872,
      "learning_rate": 3.4770662150853636e-06,
      "loss": 0.014,
      "step": 1999880
    },
    {
      "epoch": 3.2728802131406165,
      "grad_norm": 0.1794673651456833,
      "learning_rate": 3.4770003228718464e-06,
      "loss": 0.0093,
      "step": 1999900
    },
    {
      "epoch": 3.2729129435792697,
      "grad_norm": 0.6970983147621155,
      "learning_rate": 3.476934430658329e-06,
      "loss": 0.0113,
      "step": 1999920
    },
    {
      "epoch": 3.2729456740179232,
      "grad_norm": 0.4764751195907593,
      "learning_rate": 3.4768685384448127e-06,
      "loss": 0.0089,
      "step": 1999940
    },
    {
      "epoch": 3.2729784044565764,
      "grad_norm": 0.3302198648452759,
      "learning_rate": 3.4768026462312954e-06,
      "loss": 0.009,
      "step": 1999960
    },
    {
      "epoch": 3.27301113489523,
      "grad_norm": 0.20498403906822205,
      "learning_rate": 3.476736754017778e-06,
      "loss": 0.0072,
      "step": 1999980
    },
    {
      "epoch": 3.273043865333883,
      "grad_norm": 0.7429282069206238,
      "learning_rate": 3.476670861804261e-06,
      "loss": 0.0108,
      "step": 2000000
    },
    {
      "epoch": 3.273043865333883,
      "eval_loss": 0.006985635496675968,
      "eval_runtime": 6506.5927,
      "eval_samples_per_second": 157.972,
      "eval_steps_per_second": 15.797,
      "eval_sts-dev_pearson_cosine": 0.984058841406085,
      "eval_sts-dev_spearman_cosine": 0.8948932250403385,
      "step": 2000000
    },
    {
      "epoch": 3.2730765957725367,
      "grad_norm": 0.3856569528579712,
      "learning_rate": 3.476604969590744e-06,
      "loss": 0.0175,
      "step": 2000020
    },
    {
      "epoch": 3.27310932621119,
      "grad_norm": 1.5176968574523926,
      "learning_rate": 3.476539077377227e-06,
      "loss": 0.0144,
      "step": 2000040
    },
    {
      "epoch": 3.273142056649843,
      "grad_norm": 0.14728417992591858,
      "learning_rate": 3.4764731851637096e-06,
      "loss": 0.0102,
      "step": 2000060
    },
    {
      "epoch": 3.2731747870884966,
      "grad_norm": 0.6030598878860474,
      "learning_rate": 3.4764072929501923e-06,
      "loss": 0.0155,
      "step": 2000080
    },
    {
      "epoch": 3.27320751752715,
      "grad_norm": 0.5549412965774536,
      "learning_rate": 3.476341400736675e-06,
      "loss": 0.0125,
      "step": 2000100
    },
    {
      "epoch": 3.2732402479658034,
      "grad_norm": 0.21799390017986298,
      "learning_rate": 3.476275508523158e-06,
      "loss": 0.0109,
      "step": 2000120
    },
    {
      "epoch": 3.2732729784044565,
      "grad_norm": 0.5194177627563477,
      "learning_rate": 3.476209616309641e-06,
      "loss": 0.011,
      "step": 2000140
    },
    {
      "epoch": 3.27330570884311,
      "grad_norm": 0.33885663747787476,
      "learning_rate": 3.4761437240961237e-06,
      "loss": 0.0106,
      "step": 2000160
    },
    {
      "epoch": 3.2733384392817633,
      "grad_norm": 0.2659473419189453,
      "learning_rate": 3.4760778318826064e-06,
      "loss": 0.0153,
      "step": 2000180
    },
    {
      "epoch": 3.2733711697204164,
      "grad_norm": 0.10213790833950043,
      "learning_rate": 3.4760119396690896e-06,
      "loss": 0.0082,
      "step": 2000200
    },
    {
      "epoch": 3.27340390015907,
      "grad_norm": 0.35723888874053955,
      "learning_rate": 3.4759460474555723e-06,
      "loss": 0.0105,
      "step": 2000220
    },
    {
      "epoch": 3.273436630597723,
      "grad_norm": 0.5366680026054382,
      "learning_rate": 3.475880155242055e-06,
      "loss": 0.012,
      "step": 2000240
    },
    {
      "epoch": 3.2734693610363768,
      "grad_norm": 0.534807026386261,
      "learning_rate": 3.475814263028538e-06,
      "loss": 0.012,
      "step": 2000260
    },
    {
      "epoch": 3.27350209147503,
      "grad_norm": 0.3856423795223236,
      "learning_rate": 3.475748370815021e-06,
      "loss": 0.0167,
      "step": 2000280
    },
    {
      "epoch": 3.2735348219136835,
      "grad_norm": 0.1994275450706482,
      "learning_rate": 3.475682478601504e-06,
      "loss": 0.008,
      "step": 2000300
    },
    {
      "epoch": 3.2735675523523367,
      "grad_norm": 0.7790604829788208,
      "learning_rate": 3.475616586387987e-06,
      "loss": 0.0097,
      "step": 2000320
    },
    {
      "epoch": 3.27360028279099,
      "grad_norm": 0.07603128999471664,
      "learning_rate": 3.47555069417447e-06,
      "loss": 0.0136,
      "step": 2000340
    },
    {
      "epoch": 3.2736330132296434,
      "grad_norm": 0.29375016689300537,
      "learning_rate": 3.4754848019609528e-06,
      "loss": 0.0123,
      "step": 2000360
    },
    {
      "epoch": 3.2736657436682965,
      "grad_norm": 0.2204657793045044,
      "learning_rate": 3.4754189097474355e-06,
      "loss": 0.0108,
      "step": 2000380
    },
    {
      "epoch": 3.27369847410695,
      "grad_norm": 0.08276977390050888,
      "learning_rate": 3.4753530175339183e-06,
      "loss": 0.0085,
      "step": 2000400
    },
    {
      "epoch": 3.2737312045456033,
      "grad_norm": 0.1376011222600937,
      "learning_rate": 3.4752871253204014e-06,
      "loss": 0.0069,
      "step": 2000420
    },
    {
      "epoch": 3.273763934984257,
      "grad_norm": 0.8095881938934326,
      "learning_rate": 3.475221233106884e-06,
      "loss": 0.0279,
      "step": 2000440
    },
    {
      "epoch": 3.27379666542291,
      "grad_norm": 0.33096957206726074,
      "learning_rate": 3.475155340893367e-06,
      "loss": 0.0144,
      "step": 2000460
    },
    {
      "epoch": 3.273829395861563,
      "grad_norm": 0.13412311673164368,
      "learning_rate": 3.4750894486798496e-06,
      "loss": 0.009,
      "step": 2000480
    },
    {
      "epoch": 3.273862126300217,
      "grad_norm": 0.46391668915748596,
      "learning_rate": 3.475023556466333e-06,
      "loss": 0.0141,
      "step": 2000500
    },
    {
      "epoch": 3.27389485673887,
      "grad_norm": 0.1252317577600479,
      "learning_rate": 3.4749576642528155e-06,
      "loss": 0.0104,
      "step": 2000520
    },
    {
      "epoch": 3.2739275871775235,
      "grad_norm": 0.48128774762153625,
      "learning_rate": 3.4748917720392983e-06,
      "loss": 0.0104,
      "step": 2000540
    },
    {
      "epoch": 3.2739603176161767,
      "grad_norm": 0.7548919916152954,
      "learning_rate": 3.474825879825781e-06,
      "loss": 0.0139,
      "step": 2000560
    },
    {
      "epoch": 3.2739930480548303,
      "grad_norm": 0.25776395201683044,
      "learning_rate": 3.4747599876122638e-06,
      "loss": 0.0124,
      "step": 2000580
    },
    {
      "epoch": 3.2740257784934834,
      "grad_norm": 0.27717772126197815,
      "learning_rate": 3.474694095398747e-06,
      "loss": 0.0128,
      "step": 2000600
    },
    {
      "epoch": 3.2740585089321366,
      "grad_norm": 0.7305853962898254,
      "learning_rate": 3.4746282031852297e-06,
      "loss": 0.0096,
      "step": 2000620
    },
    {
      "epoch": 3.27409123937079,
      "grad_norm": 0.10996098816394806,
      "learning_rate": 3.4745623109717124e-06,
      "loss": 0.0108,
      "step": 2000640
    },
    {
      "epoch": 3.2741239698094433,
      "grad_norm": 0.741708517074585,
      "learning_rate": 3.474496418758196e-06,
      "loss": 0.0132,
      "step": 2000660
    },
    {
      "epoch": 3.274156700248097,
      "grad_norm": 0.10517355799674988,
      "learning_rate": 3.4744305265446787e-06,
      "loss": 0.0186,
      "step": 2000680
    },
    {
      "epoch": 3.27418943068675,
      "grad_norm": 0.25899291038513184,
      "learning_rate": 3.4743646343311615e-06,
      "loss": 0.0096,
      "step": 2000700
    },
    {
      "epoch": 3.2742221611254037,
      "grad_norm": 0.6411138772964478,
      "learning_rate": 3.4742987421176442e-06,
      "loss": 0.013,
      "step": 2000720
    },
    {
      "epoch": 3.274254891564057,
      "grad_norm": 0.10814839601516724,
      "learning_rate": 3.4742328499041274e-06,
      "loss": 0.0105,
      "step": 2000740
    },
    {
      "epoch": 3.27428762200271,
      "grad_norm": 0.13923998177051544,
      "learning_rate": 3.47416695769061e-06,
      "loss": 0.0158,
      "step": 2000760
    },
    {
      "epoch": 3.2743203524413635,
      "grad_norm": 0.09980499744415283,
      "learning_rate": 3.474101065477093e-06,
      "loss": 0.01,
      "step": 2000780
    },
    {
      "epoch": 3.2743530828800167,
      "grad_norm": 0.1094452440738678,
      "learning_rate": 3.4740351732635756e-06,
      "loss": 0.0144,
      "step": 2000800
    },
    {
      "epoch": 3.2743858133186703,
      "grad_norm": 0.20703013241291046,
      "learning_rate": 3.4739692810500588e-06,
      "loss": 0.0107,
      "step": 2000820
    },
    {
      "epoch": 3.2744185437573234,
      "grad_norm": 0.5774810314178467,
      "learning_rate": 3.4739033888365415e-06,
      "loss": 0.0124,
      "step": 2000840
    },
    {
      "epoch": 3.2744512741959766,
      "grad_norm": 0.1287423074245453,
      "learning_rate": 3.4738374966230242e-06,
      "loss": 0.0103,
      "step": 2000860
    },
    {
      "epoch": 3.27448400463463,
      "grad_norm": 0.12704898416996002,
      "learning_rate": 3.473771604409507e-06,
      "loss": 0.0109,
      "step": 2000880
    },
    {
      "epoch": 3.2745167350732833,
      "grad_norm": 0.18785150349140167,
      "learning_rate": 3.47370571219599e-06,
      "loss": 0.0088,
      "step": 2000900
    },
    {
      "epoch": 3.274549465511937,
      "grad_norm": 0.24513748288154602,
      "learning_rate": 3.473639819982473e-06,
      "loss": 0.0078,
      "step": 2000920
    },
    {
      "epoch": 3.27458219595059,
      "grad_norm": 0.10446806997060776,
      "learning_rate": 3.4735739277689556e-06,
      "loss": 0.008,
      "step": 2000940
    },
    {
      "epoch": 3.2746149263892432,
      "grad_norm": 0.40937691926956177,
      "learning_rate": 3.4735080355554384e-06,
      "loss": 0.0105,
      "step": 2000960
    },
    {
      "epoch": 3.274647656827897,
      "grad_norm": 0.6068224906921387,
      "learning_rate": 3.4734421433419215e-06,
      "loss": 0.0157,
      "step": 2000980
    },
    {
      "epoch": 3.27468038726655,
      "grad_norm": 0.2203342616558075,
      "learning_rate": 3.4733762511284047e-06,
      "loss": 0.0091,
      "step": 2001000
    },
    {
      "epoch": 3.2747131177052036,
      "grad_norm": 0.3353177309036255,
      "learning_rate": 3.4733103589148874e-06,
      "loss": 0.0126,
      "step": 2001020
    },
    {
      "epoch": 3.2747458481438567,
      "grad_norm": 0.20379064977169037,
      "learning_rate": 3.4732444667013706e-06,
      "loss": 0.01,
      "step": 2001040
    },
    {
      "epoch": 3.2747785785825103,
      "grad_norm": 0.06672465801239014,
      "learning_rate": 3.4731785744878533e-06,
      "loss": 0.0078,
      "step": 2001060
    },
    {
      "epoch": 3.2748113090211635,
      "grad_norm": 0.10583756119012833,
      "learning_rate": 3.473112682274336e-06,
      "loss": 0.0121,
      "step": 2001080
    },
    {
      "epoch": 3.2748440394598166,
      "grad_norm": 0.1175728440284729,
      "learning_rate": 3.473046790060819e-06,
      "loss": 0.0109,
      "step": 2001100
    },
    {
      "epoch": 3.27487676989847,
      "grad_norm": 0.45677295327186584,
      "learning_rate": 3.4729808978473015e-06,
      "loss": 0.0168,
      "step": 2001120
    },
    {
      "epoch": 3.2749095003371234,
      "grad_norm": 0.30555206537246704,
      "learning_rate": 3.4729150056337847e-06,
      "loss": 0.0119,
      "step": 2001140
    },
    {
      "epoch": 3.274942230775777,
      "grad_norm": 0.28925222158432007,
      "learning_rate": 3.4728491134202675e-06,
      "loss": 0.013,
      "step": 2001160
    },
    {
      "epoch": 3.27497496121443,
      "grad_norm": 0.297452837228775,
      "learning_rate": 3.47278322120675e-06,
      "loss": 0.0119,
      "step": 2001180
    },
    {
      "epoch": 3.2750076916530837,
      "grad_norm": 0.3892740309238434,
      "learning_rate": 3.472717328993233e-06,
      "loss": 0.0127,
      "step": 2001200
    },
    {
      "epoch": 3.275040422091737,
      "grad_norm": 0.17647472023963928,
      "learning_rate": 3.472651436779716e-06,
      "loss": 0.0142,
      "step": 2001220
    },
    {
      "epoch": 3.27507315253039,
      "grad_norm": 0.3948597013950348,
      "learning_rate": 3.472585544566199e-06,
      "loss": 0.0123,
      "step": 2001240
    },
    {
      "epoch": 3.2751058829690436,
      "grad_norm": 1.371678113937378,
      "learning_rate": 3.4725196523526816e-06,
      "loss": 0.0171,
      "step": 2001260
    },
    {
      "epoch": 3.2751386134076967,
      "grad_norm": 0.08617743849754333,
      "learning_rate": 3.4724537601391643e-06,
      "loss": 0.0182,
      "step": 2001280
    },
    {
      "epoch": 3.2751713438463503,
      "grad_norm": 0.2784386873245239,
      "learning_rate": 3.4723878679256475e-06,
      "loss": 0.0083,
      "step": 2001300
    },
    {
      "epoch": 3.2752040742850035,
      "grad_norm": 0.4260643720626831,
      "learning_rate": 3.4723219757121302e-06,
      "loss": 0.0128,
      "step": 2001320
    },
    {
      "epoch": 3.275236804723657,
      "grad_norm": 0.9031241536140442,
      "learning_rate": 3.472256083498613e-06,
      "loss": 0.0132,
      "step": 2001340
    },
    {
      "epoch": 3.2752695351623102,
      "grad_norm": 0.23371899127960205,
      "learning_rate": 3.4721901912850965e-06,
      "loss": 0.0129,
      "step": 2001360
    },
    {
      "epoch": 3.2753022656009634,
      "grad_norm": 0.41453778743743896,
      "learning_rate": 3.4721242990715793e-06,
      "loss": 0.0127,
      "step": 2001380
    },
    {
      "epoch": 3.275334996039617,
      "grad_norm": 0.403104692697525,
      "learning_rate": 3.472058406858062e-06,
      "loss": 0.0169,
      "step": 2001400
    },
    {
      "epoch": 3.27536772647827,
      "grad_norm": 0.11278019845485687,
      "learning_rate": 3.4719925146445448e-06,
      "loss": 0.0074,
      "step": 2001420
    },
    {
      "epoch": 3.2754004569169237,
      "grad_norm": 0.3089905381202698,
      "learning_rate": 3.471926622431028e-06,
      "loss": 0.0096,
      "step": 2001440
    },
    {
      "epoch": 3.275433187355577,
      "grad_norm": 0.3358689844608307,
      "learning_rate": 3.4718607302175107e-06,
      "loss": 0.01,
      "step": 2001460
    },
    {
      "epoch": 3.2754659177942305,
      "grad_norm": 0.43840375542640686,
      "learning_rate": 3.4717948380039934e-06,
      "loss": 0.0083,
      "step": 2001480
    },
    {
      "epoch": 3.2754986482328836,
      "grad_norm": 0.28188556432724,
      "learning_rate": 3.471728945790476e-06,
      "loss": 0.0082,
      "step": 2001500
    },
    {
      "epoch": 3.2755313786715368,
      "grad_norm": 0.17021538317203522,
      "learning_rate": 3.4716630535769593e-06,
      "loss": 0.0118,
      "step": 2001520
    },
    {
      "epoch": 3.2755641091101904,
      "grad_norm": 0.273491233587265,
      "learning_rate": 3.471597161363442e-06,
      "loss": 0.0088,
      "step": 2001540
    },
    {
      "epoch": 3.2755968395488435,
      "grad_norm": 0.30238381028175354,
      "learning_rate": 3.4715312691499248e-06,
      "loss": 0.0072,
      "step": 2001560
    },
    {
      "epoch": 3.275629569987497,
      "grad_norm": 0.1724170744419098,
      "learning_rate": 3.4714653769364075e-06,
      "loss": 0.0154,
      "step": 2001580
    },
    {
      "epoch": 3.2756623004261503,
      "grad_norm": 0.4752516746520996,
      "learning_rate": 3.4713994847228903e-06,
      "loss": 0.0119,
      "step": 2001600
    },
    {
      "epoch": 3.275695030864804,
      "grad_norm": 0.18710345029830933,
      "learning_rate": 3.4713335925093734e-06,
      "loss": 0.0123,
      "step": 2001620
    },
    {
      "epoch": 3.275727761303457,
      "grad_norm": 0.10889153182506561,
      "learning_rate": 3.471267700295856e-06,
      "loss": 0.0089,
      "step": 2001640
    },
    {
      "epoch": 3.27576049174211,
      "grad_norm": 0.2740439772605896,
      "learning_rate": 3.471201808082339e-06,
      "loss": 0.0114,
      "step": 2001660
    },
    {
      "epoch": 3.2757932221807637,
      "grad_norm": 0.31918275356292725,
      "learning_rate": 3.4711359158688216e-06,
      "loss": 0.0096,
      "step": 2001680
    },
    {
      "epoch": 3.275825952619417,
      "grad_norm": 0.1913120299577713,
      "learning_rate": 3.4710700236553052e-06,
      "loss": 0.0124,
      "step": 2001700
    },
    {
      "epoch": 3.2758586830580705,
      "grad_norm": 0.06987357884645462,
      "learning_rate": 3.471004131441788e-06,
      "loss": 0.0095,
      "step": 2001720
    },
    {
      "epoch": 3.2758914134967236,
      "grad_norm": 0.47843649983406067,
      "learning_rate": 3.4709382392282707e-06,
      "loss": 0.0118,
      "step": 2001740
    },
    {
      "epoch": 3.2759241439353772,
      "grad_norm": 0.14301694929599762,
      "learning_rate": 3.470872347014754e-06,
      "loss": 0.0128,
      "step": 2001760
    },
    {
      "epoch": 3.2759568743740304,
      "grad_norm": 0.8676267266273499,
      "learning_rate": 3.4708064548012366e-06,
      "loss": 0.0143,
      "step": 2001780
    },
    {
      "epoch": 3.2759896048126835,
      "grad_norm": 0.7605780363082886,
      "learning_rate": 3.4707405625877194e-06,
      "loss": 0.0122,
      "step": 2001800
    },
    {
      "epoch": 3.276022335251337,
      "grad_norm": 0.10692775249481201,
      "learning_rate": 3.470674670374202e-06,
      "loss": 0.0116,
      "step": 2001820
    },
    {
      "epoch": 3.2760550656899903,
      "grad_norm": 0.3543154299259186,
      "learning_rate": 3.4706087781606853e-06,
      "loss": 0.0209,
      "step": 2001840
    },
    {
      "epoch": 3.276087796128644,
      "grad_norm": 0.14649325609207153,
      "learning_rate": 3.470542885947168e-06,
      "loss": 0.0084,
      "step": 2001860
    },
    {
      "epoch": 3.276120526567297,
      "grad_norm": 0.4274350106716156,
      "learning_rate": 3.4704769937336507e-06,
      "loss": 0.0097,
      "step": 2001880
    },
    {
      "epoch": 3.2761532570059506,
      "grad_norm": 0.27388235926628113,
      "learning_rate": 3.4704111015201335e-06,
      "loss": 0.0123,
      "step": 2001900
    },
    {
      "epoch": 3.2761859874446038,
      "grad_norm": 0.4284939467906952,
      "learning_rate": 3.4703452093066166e-06,
      "loss": 0.0091,
      "step": 2001920
    },
    {
      "epoch": 3.276218717883257,
      "grad_norm": 0.4975590109825134,
      "learning_rate": 3.4702793170930994e-06,
      "loss": 0.0117,
      "step": 2001940
    },
    {
      "epoch": 3.2762514483219105,
      "grad_norm": 0.21645453572273254,
      "learning_rate": 3.470213424879582e-06,
      "loss": 0.0111,
      "step": 2001960
    },
    {
      "epoch": 3.2762841787605637,
      "grad_norm": 0.16529476642608643,
      "learning_rate": 3.470147532666065e-06,
      "loss": 0.0157,
      "step": 2001980
    },
    {
      "epoch": 3.2763169091992173,
      "grad_norm": 1.511313796043396,
      "learning_rate": 3.4700816404525476e-06,
      "loss": 0.0108,
      "step": 2002000
    },
    {
      "epoch": 3.2763496396378704,
      "grad_norm": 1.8073041439056396,
      "learning_rate": 3.4700157482390308e-06,
      "loss": 0.0108,
      "step": 2002020
    },
    {
      "epoch": 3.276382370076524,
      "grad_norm": 0.48451584577560425,
      "learning_rate": 3.4699498560255135e-06,
      "loss": 0.0108,
      "step": 2002040
    },
    {
      "epoch": 3.276415100515177,
      "grad_norm": 0.18584208190441132,
      "learning_rate": 3.4698839638119967e-06,
      "loss": 0.0094,
      "step": 2002060
    },
    {
      "epoch": 3.2764478309538303,
      "grad_norm": 0.09376166015863419,
      "learning_rate": 3.46981807159848e-06,
      "loss": 0.0128,
      "step": 2002080
    },
    {
      "epoch": 3.276480561392484,
      "grad_norm": 0.09367355704307556,
      "learning_rate": 3.4697521793849626e-06,
      "loss": 0.0096,
      "step": 2002100
    },
    {
      "epoch": 3.276513291831137,
      "grad_norm": 0.171514093875885,
      "learning_rate": 3.4696862871714453e-06,
      "loss": 0.0141,
      "step": 2002120
    },
    {
      "epoch": 3.2765460222697906,
      "grad_norm": 0.11846835911273956,
      "learning_rate": 3.469620394957928e-06,
      "loss": 0.0112,
      "step": 2002140
    },
    {
      "epoch": 3.276578752708444,
      "grad_norm": 0.18998867273330688,
      "learning_rate": 3.4695545027444112e-06,
      "loss": 0.0087,
      "step": 2002160
    },
    {
      "epoch": 3.2766114831470974,
      "grad_norm": 0.4729570746421814,
      "learning_rate": 3.469488610530894e-06,
      "loss": 0.011,
      "step": 2002180
    },
    {
      "epoch": 3.2766442135857505,
      "grad_norm": 0.19440586864948273,
      "learning_rate": 3.4694227183173767e-06,
      "loss": 0.0072,
      "step": 2002200
    },
    {
      "epoch": 3.2766769440244037,
      "grad_norm": 0.3304474949836731,
      "learning_rate": 3.4693568261038594e-06,
      "loss": 0.0132,
      "step": 2002220
    },
    {
      "epoch": 3.2767096744630573,
      "grad_norm": 0.11244753003120422,
      "learning_rate": 3.4692909338903426e-06,
      "loss": 0.0112,
      "step": 2002240
    },
    {
      "epoch": 3.2767424049017104,
      "grad_norm": 0.09729445725679398,
      "learning_rate": 3.4692250416768253e-06,
      "loss": 0.0102,
      "step": 2002260
    },
    {
      "epoch": 3.276775135340364,
      "grad_norm": 0.5192290544509888,
      "learning_rate": 3.469159149463308e-06,
      "loss": 0.0123,
      "step": 2002280
    },
    {
      "epoch": 3.276807865779017,
      "grad_norm": 0.10758647322654724,
      "learning_rate": 3.469093257249791e-06,
      "loss": 0.0095,
      "step": 2002300
    },
    {
      "epoch": 3.2768405962176703,
      "grad_norm": 0.3426401615142822,
      "learning_rate": 3.469027365036274e-06,
      "loss": 0.0098,
      "step": 2002320
    },
    {
      "epoch": 3.276873326656324,
      "grad_norm": 0.10165705531835556,
      "learning_rate": 3.4689614728227567e-06,
      "loss": 0.0134,
      "step": 2002340
    },
    {
      "epoch": 3.276906057094977,
      "grad_norm": 0.20310302078723907,
      "learning_rate": 3.4688955806092395e-06,
      "loss": 0.0127,
      "step": 2002360
    },
    {
      "epoch": 3.2769387875336307,
      "grad_norm": 0.32325369119644165,
      "learning_rate": 3.468829688395722e-06,
      "loss": 0.0091,
      "step": 2002380
    },
    {
      "epoch": 3.276971517972284,
      "grad_norm": 0.1451791524887085,
      "learning_rate": 3.4687637961822054e-06,
      "loss": 0.0086,
      "step": 2002400
    },
    {
      "epoch": 3.277004248410937,
      "grad_norm": 0.27443891763687134,
      "learning_rate": 3.4686979039686885e-06,
      "loss": 0.0157,
      "step": 2002420
    },
    {
      "epoch": 3.2770369788495906,
      "grad_norm": 0.5211496949195862,
      "learning_rate": 3.4686320117551713e-06,
      "loss": 0.0194,
      "step": 2002440
    },
    {
      "epoch": 3.2770697092882437,
      "grad_norm": 0.16167640686035156,
      "learning_rate": 3.4685661195416544e-06,
      "loss": 0.0092,
      "step": 2002460
    },
    {
      "epoch": 3.2771024397268973,
      "grad_norm": 0.32934045791625977,
      "learning_rate": 3.468500227328137e-06,
      "loss": 0.0147,
      "step": 2002480
    },
    {
      "epoch": 3.2771351701655504,
      "grad_norm": 0.9838271737098694,
      "learning_rate": 3.46843433511462e-06,
      "loss": 0.0151,
      "step": 2002500
    },
    {
      "epoch": 3.277167900604204,
      "grad_norm": 0.09225190430879593,
      "learning_rate": 3.4683684429011026e-06,
      "loss": 0.0131,
      "step": 2002520
    },
    {
      "epoch": 3.277200631042857,
      "grad_norm": 0.22242577373981476,
      "learning_rate": 3.4683025506875854e-06,
      "loss": 0.0105,
      "step": 2002540
    },
    {
      "epoch": 3.2772333614815103,
      "grad_norm": 3.681474208831787,
      "learning_rate": 3.4682366584740686e-06,
      "loss": 0.0131,
      "step": 2002560
    },
    {
      "epoch": 3.277266091920164,
      "grad_norm": 0.39074280858039856,
      "learning_rate": 3.4681707662605513e-06,
      "loss": 0.0123,
      "step": 2002580
    },
    {
      "epoch": 3.277298822358817,
      "grad_norm": 0.3745124340057373,
      "learning_rate": 3.468104874047034e-06,
      "loss": 0.0104,
      "step": 2002600
    },
    {
      "epoch": 3.2773315527974707,
      "grad_norm": 0.18132175505161285,
      "learning_rate": 3.4680389818335168e-06,
      "loss": 0.0121,
      "step": 2002620
    },
    {
      "epoch": 3.277364283236124,
      "grad_norm": 0.2583317160606384,
      "learning_rate": 3.46797308962e-06,
      "loss": 0.0123,
      "step": 2002640
    },
    {
      "epoch": 3.2773970136747774,
      "grad_norm": 0.029793936759233475,
      "learning_rate": 3.4679071974064827e-06,
      "loss": 0.0125,
      "step": 2002660
    },
    {
      "epoch": 3.2774297441134306,
      "grad_norm": 0.15200175344944,
      "learning_rate": 3.4678413051929654e-06,
      "loss": 0.0099,
      "step": 2002680
    },
    {
      "epoch": 3.2774624745520837,
      "grad_norm": 0.4852658212184906,
      "learning_rate": 3.467775412979448e-06,
      "loss": 0.0142,
      "step": 2002700
    },
    {
      "epoch": 3.2774952049907373,
      "grad_norm": 0.13457264006137848,
      "learning_rate": 3.4677095207659313e-06,
      "loss": 0.0147,
      "step": 2002720
    },
    {
      "epoch": 3.2775279354293905,
      "grad_norm": 0.2966209948062897,
      "learning_rate": 3.467643628552414e-06,
      "loss": 0.0105,
      "step": 2002740
    },
    {
      "epoch": 3.277560665868044,
      "grad_norm": 0.32103002071380615,
      "learning_rate": 3.4675777363388972e-06,
      "loss": 0.0104,
      "step": 2002760
    },
    {
      "epoch": 3.277593396306697,
      "grad_norm": 0.28732356429100037,
      "learning_rate": 3.4675118441253804e-06,
      "loss": 0.0162,
      "step": 2002780
    },
    {
      "epoch": 3.277626126745351,
      "grad_norm": 0.3031795918941498,
      "learning_rate": 3.467445951911863e-06,
      "loss": 0.0091,
      "step": 2002800
    },
    {
      "epoch": 3.277658857184004,
      "grad_norm": 0.7336798906326294,
      "learning_rate": 3.467380059698346e-06,
      "loss": 0.011,
      "step": 2002820
    },
    {
      "epoch": 3.277691587622657,
      "grad_norm": 0.4011993110179901,
      "learning_rate": 3.4673141674848286e-06,
      "loss": 0.0114,
      "step": 2002840
    },
    {
      "epoch": 3.2777243180613107,
      "grad_norm": 0.3285330832004547,
      "learning_rate": 3.4672482752713118e-06,
      "loss": 0.0133,
      "step": 2002860
    },
    {
      "epoch": 3.277757048499964,
      "grad_norm": 0.7413082122802734,
      "learning_rate": 3.4671823830577945e-06,
      "loss": 0.0115,
      "step": 2002880
    },
    {
      "epoch": 3.2777897789386174,
      "grad_norm": 0.5585685968399048,
      "learning_rate": 3.4671164908442772e-06,
      "loss": 0.0092,
      "step": 2002900
    },
    {
      "epoch": 3.2778225093772706,
      "grad_norm": 0.3315078914165497,
      "learning_rate": 3.46705059863076e-06,
      "loss": 0.0148,
      "step": 2002920
    },
    {
      "epoch": 3.277855239815924,
      "grad_norm": 0.626389741897583,
      "learning_rate": 3.466984706417243e-06,
      "loss": 0.0142,
      "step": 2002940
    },
    {
      "epoch": 3.2778879702545773,
      "grad_norm": 0.15718285739421844,
      "learning_rate": 3.466918814203726e-06,
      "loss": 0.009,
      "step": 2002960
    },
    {
      "epoch": 3.2779207006932305,
      "grad_norm": 0.09102372825145721,
      "learning_rate": 3.4668529219902086e-06,
      "loss": 0.012,
      "step": 2002980
    },
    {
      "epoch": 3.277953431131884,
      "grad_norm": 0.262692391872406,
      "learning_rate": 3.4667870297766914e-06,
      "loss": 0.0114,
      "step": 2003000
    },
    {
      "epoch": 3.2779861615705372,
      "grad_norm": 0.2710355520248413,
      "learning_rate": 3.466721137563174e-06,
      "loss": 0.0184,
      "step": 2003020
    },
    {
      "epoch": 3.278018892009191,
      "grad_norm": 0.13228344917297363,
      "learning_rate": 3.4666552453496573e-06,
      "loss": 0.0097,
      "step": 2003040
    },
    {
      "epoch": 3.278051622447844,
      "grad_norm": 0.33341115713119507,
      "learning_rate": 3.46658935313614e-06,
      "loss": 0.0065,
      "step": 2003060
    },
    {
      "epoch": 3.2780843528864976,
      "grad_norm": 0.29164671897888184,
      "learning_rate": 3.4665234609226227e-06,
      "loss": 0.0113,
      "step": 2003080
    },
    {
      "epoch": 3.2781170833251507,
      "grad_norm": 0.2035127431154251,
      "learning_rate": 3.4664575687091055e-06,
      "loss": 0.0123,
      "step": 2003100
    },
    {
      "epoch": 3.278149813763804,
      "grad_norm": 0.2527307868003845,
      "learning_rate": 3.466391676495589e-06,
      "loss": 0.0154,
      "step": 2003120
    },
    {
      "epoch": 3.2781825442024575,
      "grad_norm": 0.2773714065551758,
      "learning_rate": 3.466325784282072e-06,
      "loss": 0.0074,
      "step": 2003140
    },
    {
      "epoch": 3.2782152746411106,
      "grad_norm": 0.17017826437950134,
      "learning_rate": 3.4662598920685546e-06,
      "loss": 0.0109,
      "step": 2003160
    },
    {
      "epoch": 3.278248005079764,
      "grad_norm": 0.23879913985729218,
      "learning_rate": 3.4661939998550377e-06,
      "loss": 0.0091,
      "step": 2003180
    },
    {
      "epoch": 3.2782807355184174,
      "grad_norm": 0.288682758808136,
      "learning_rate": 3.4661281076415205e-06,
      "loss": 0.0161,
      "step": 2003200
    },
    {
      "epoch": 3.278313465957071,
      "grad_norm": 0.16634275019168854,
      "learning_rate": 3.466062215428003e-06,
      "loss": 0.0118,
      "step": 2003220
    },
    {
      "epoch": 3.278346196395724,
      "grad_norm": 0.12597040832042694,
      "learning_rate": 3.465996323214486e-06,
      "loss": 0.0138,
      "step": 2003240
    },
    {
      "epoch": 3.2783789268343773,
      "grad_norm": 0.23431672155857086,
      "learning_rate": 3.465930431000969e-06,
      "loss": 0.0111,
      "step": 2003260
    },
    {
      "epoch": 3.278411657273031,
      "grad_norm": 0.16417299211025238,
      "learning_rate": 3.465864538787452e-06,
      "loss": 0.0188,
      "step": 2003280
    },
    {
      "epoch": 3.278444387711684,
      "grad_norm": 0.3525199890136719,
      "learning_rate": 3.4657986465739346e-06,
      "loss": 0.0145,
      "step": 2003300
    },
    {
      "epoch": 3.2784771181503376,
      "grad_norm": 0.2539871037006378,
      "learning_rate": 3.4657327543604173e-06,
      "loss": 0.0121,
      "step": 2003320
    },
    {
      "epoch": 3.2785098485889907,
      "grad_norm": 0.2947569787502289,
      "learning_rate": 3.4656668621469005e-06,
      "loss": 0.0138,
      "step": 2003340
    },
    {
      "epoch": 3.2785425790276443,
      "grad_norm": 0.18822813034057617,
      "learning_rate": 3.4656009699333832e-06,
      "loss": 0.0123,
      "step": 2003360
    },
    {
      "epoch": 3.2785753094662975,
      "grad_norm": 1.409514307975769,
      "learning_rate": 3.465535077719866e-06,
      "loss": 0.0123,
      "step": 2003380
    },
    {
      "epoch": 3.2786080399049506,
      "grad_norm": 0.2715219557285309,
      "learning_rate": 3.4654691855063487e-06,
      "loss": 0.0077,
      "step": 2003400
    },
    {
      "epoch": 3.2786407703436042,
      "grad_norm": 0.41203784942626953,
      "learning_rate": 3.4654032932928314e-06,
      "loss": 0.0081,
      "step": 2003420
    },
    {
      "epoch": 3.2786735007822574,
      "grad_norm": 0.09317970275878906,
      "learning_rate": 3.4653374010793146e-06,
      "loss": 0.0102,
      "step": 2003440
    },
    {
      "epoch": 3.278706231220911,
      "grad_norm": 0.14038731157779694,
      "learning_rate": 3.4652715088657978e-06,
      "loss": 0.0106,
      "step": 2003460
    },
    {
      "epoch": 3.278738961659564,
      "grad_norm": 0.4259023070335388,
      "learning_rate": 3.465205616652281e-06,
      "loss": 0.014,
      "step": 2003480
    },
    {
      "epoch": 3.2787716920982177,
      "grad_norm": 0.42487967014312744,
      "learning_rate": 3.4651397244387637e-06,
      "loss": 0.0089,
      "step": 2003500
    },
    {
      "epoch": 3.278804422536871,
      "grad_norm": 0.2854865491390228,
      "learning_rate": 3.4650738322252464e-06,
      "loss": 0.009,
      "step": 2003520
    },
    {
      "epoch": 3.278837152975524,
      "grad_norm": 0.06670433282852173,
      "learning_rate": 3.465007940011729e-06,
      "loss": 0.0108,
      "step": 2003540
    },
    {
      "epoch": 3.2788698834141776,
      "grad_norm": 0.7412819266319275,
      "learning_rate": 3.464942047798212e-06,
      "loss": 0.0101,
      "step": 2003560
    },
    {
      "epoch": 3.2789026138528308,
      "grad_norm": 0.18784326314926147,
      "learning_rate": 3.464876155584695e-06,
      "loss": 0.0076,
      "step": 2003580
    },
    {
      "epoch": 3.2789353442914844,
      "grad_norm": 0.4378430247306824,
      "learning_rate": 3.464810263371178e-06,
      "loss": 0.0129,
      "step": 2003600
    },
    {
      "epoch": 3.2789680747301375,
      "grad_norm": 0.23858000338077545,
      "learning_rate": 3.4647443711576605e-06,
      "loss": 0.0099,
      "step": 2003620
    },
    {
      "epoch": 3.279000805168791,
      "grad_norm": 0.11914937198162079,
      "learning_rate": 3.4646784789441433e-06,
      "loss": 0.0117,
      "step": 2003640
    },
    {
      "epoch": 3.2790335356074443,
      "grad_norm": 0.16063766181468964,
      "learning_rate": 3.4646125867306264e-06,
      "loss": 0.0071,
      "step": 2003660
    },
    {
      "epoch": 3.2790662660460974,
      "grad_norm": 0.5988377928733826,
      "learning_rate": 3.464546694517109e-06,
      "loss": 0.009,
      "step": 2003680
    },
    {
      "epoch": 3.279098996484751,
      "grad_norm": 0.26887860894203186,
      "learning_rate": 3.464480802303592e-06,
      "loss": 0.0095,
      "step": 2003700
    },
    {
      "epoch": 3.279131726923404,
      "grad_norm": 0.14975832402706146,
      "learning_rate": 3.4644149100900747e-06,
      "loss": 0.011,
      "step": 2003720
    },
    {
      "epoch": 3.2791644573620577,
      "grad_norm": 0.7452273964881897,
      "learning_rate": 3.464349017876558e-06,
      "loss": 0.0134,
      "step": 2003740
    },
    {
      "epoch": 3.279197187800711,
      "grad_norm": 0.14495958387851715,
      "learning_rate": 3.4642831256630406e-06,
      "loss": 0.0137,
      "step": 2003760
    },
    {
      "epoch": 3.279229918239364,
      "grad_norm": 0.36358118057250977,
      "learning_rate": 3.4642172334495233e-06,
      "loss": 0.0101,
      "step": 2003780
    },
    {
      "epoch": 3.2792626486780176,
      "grad_norm": 1.0082178115844727,
      "learning_rate": 3.464151341236006e-06,
      "loss": 0.0119,
      "step": 2003800
    },
    {
      "epoch": 3.279295379116671,
      "grad_norm": 0.16497987508773804,
      "learning_rate": 3.4640854490224896e-06,
      "loss": 0.0102,
      "step": 2003820
    },
    {
      "epoch": 3.2793281095553244,
      "grad_norm": 0.22186246514320374,
      "learning_rate": 3.4640195568089724e-06,
      "loss": 0.0083,
      "step": 2003840
    },
    {
      "epoch": 3.2793608399939775,
      "grad_norm": 0.4026184380054474,
      "learning_rate": 3.463953664595455e-06,
      "loss": 0.0128,
      "step": 2003860
    },
    {
      "epoch": 3.2793935704326307,
      "grad_norm": 0.6076992154121399,
      "learning_rate": 3.4638877723819383e-06,
      "loss": 0.0202,
      "step": 2003880
    },
    {
      "epoch": 3.2794263008712843,
      "grad_norm": 0.24190810322761536,
      "learning_rate": 3.463821880168421e-06,
      "loss": 0.009,
      "step": 2003900
    },
    {
      "epoch": 3.2794590313099374,
      "grad_norm": 0.5475833415985107,
      "learning_rate": 3.4637559879549037e-06,
      "loss": 0.0127,
      "step": 2003920
    },
    {
      "epoch": 3.279491761748591,
      "grad_norm": 0.3285895884037018,
      "learning_rate": 3.4636900957413865e-06,
      "loss": 0.0103,
      "step": 2003940
    },
    {
      "epoch": 3.279524492187244,
      "grad_norm": 0.19195671379566193,
      "learning_rate": 3.4636242035278692e-06,
      "loss": 0.0102,
      "step": 2003960
    },
    {
      "epoch": 3.2795572226258978,
      "grad_norm": 0.6520679593086243,
      "learning_rate": 3.4635583113143524e-06,
      "loss": 0.011,
      "step": 2003980
    },
    {
      "epoch": 3.279589953064551,
      "grad_norm": 0.09806682914495468,
      "learning_rate": 3.463492419100835e-06,
      "loss": 0.013,
      "step": 2004000
    },
    {
      "epoch": 3.279622683503204,
      "grad_norm": 0.27436545491218567,
      "learning_rate": 3.463426526887318e-06,
      "loss": 0.0162,
      "step": 2004020
    },
    {
      "epoch": 3.2796554139418577,
      "grad_norm": 0.27553853392601013,
      "learning_rate": 3.4633606346738006e-06,
      "loss": 0.0112,
      "step": 2004040
    },
    {
      "epoch": 3.279688144380511,
      "grad_norm": 0.38421717286109924,
      "learning_rate": 3.4632947424602838e-06,
      "loss": 0.0157,
      "step": 2004060
    },
    {
      "epoch": 3.2797208748191644,
      "grad_norm": 0.1992010772228241,
      "learning_rate": 3.4632288502467665e-06,
      "loss": 0.0078,
      "step": 2004080
    },
    {
      "epoch": 3.2797536052578176,
      "grad_norm": 0.31208252906799316,
      "learning_rate": 3.4631629580332493e-06,
      "loss": 0.0072,
      "step": 2004100
    },
    {
      "epoch": 3.279786335696471,
      "grad_norm": 0.14941978454589844,
      "learning_rate": 3.463097065819732e-06,
      "loss": 0.0145,
      "step": 2004120
    },
    {
      "epoch": 3.2798190661351243,
      "grad_norm": 0.1901884824037552,
      "learning_rate": 3.463031173606215e-06,
      "loss": 0.0172,
      "step": 2004140
    },
    {
      "epoch": 3.2798517965737775,
      "grad_norm": 0.1528967022895813,
      "learning_rate": 3.462965281392698e-06,
      "loss": 0.0105,
      "step": 2004160
    },
    {
      "epoch": 3.279884527012431,
      "grad_norm": 0.20715582370758057,
      "learning_rate": 3.462899389179181e-06,
      "loss": 0.009,
      "step": 2004180
    },
    {
      "epoch": 3.279917257451084,
      "grad_norm": 0.38849881291389465,
      "learning_rate": 3.4628334969656642e-06,
      "loss": 0.0134,
      "step": 2004200
    },
    {
      "epoch": 3.279949987889738,
      "grad_norm": 0.22136463224887848,
      "learning_rate": 3.462767604752147e-06,
      "loss": 0.0105,
      "step": 2004220
    },
    {
      "epoch": 3.279982718328391,
      "grad_norm": 0.2418474555015564,
      "learning_rate": 3.4627017125386297e-06,
      "loss": 0.0099,
      "step": 2004240
    },
    {
      "epoch": 3.2800154487670445,
      "grad_norm": 0.5328511595726013,
      "learning_rate": 3.4626358203251124e-06,
      "loss": 0.0174,
      "step": 2004260
    },
    {
      "epoch": 3.2800481792056977,
      "grad_norm": 0.4984535276889801,
      "learning_rate": 3.4625699281115956e-06,
      "loss": 0.0121,
      "step": 2004280
    },
    {
      "epoch": 3.280080909644351,
      "grad_norm": 0.20866051316261292,
      "learning_rate": 3.4625040358980783e-06,
      "loss": 0.0117,
      "step": 2004300
    },
    {
      "epoch": 3.2801136400830044,
      "grad_norm": 0.2513587474822998,
      "learning_rate": 3.462438143684561e-06,
      "loss": 0.0095,
      "step": 2004320
    },
    {
      "epoch": 3.2801463705216576,
      "grad_norm": 0.3266979455947876,
      "learning_rate": 3.462372251471044e-06,
      "loss": 0.0124,
      "step": 2004340
    },
    {
      "epoch": 3.280179100960311,
      "grad_norm": 0.17075254023075104,
      "learning_rate": 3.462306359257527e-06,
      "loss": 0.0095,
      "step": 2004360
    },
    {
      "epoch": 3.2802118313989643,
      "grad_norm": 1.480533242225647,
      "learning_rate": 3.4622404670440097e-06,
      "loss": 0.016,
      "step": 2004380
    },
    {
      "epoch": 3.280244561837618,
      "grad_norm": 0.09706137329339981,
      "learning_rate": 3.4621745748304925e-06,
      "loss": 0.0068,
      "step": 2004400
    },
    {
      "epoch": 3.280277292276271,
      "grad_norm": 0.2482289969921112,
      "learning_rate": 3.462108682616975e-06,
      "loss": 0.0088,
      "step": 2004420
    },
    {
      "epoch": 3.2803100227149242,
      "grad_norm": 0.06838946044445038,
      "learning_rate": 3.462042790403458e-06,
      "loss": 0.0143,
      "step": 2004440
    },
    {
      "epoch": 3.280342753153578,
      "grad_norm": 0.198360875248909,
      "learning_rate": 3.461976898189941e-06,
      "loss": 0.0119,
      "step": 2004460
    },
    {
      "epoch": 3.280375483592231,
      "grad_norm": 0.10588928312063217,
      "learning_rate": 3.461911005976424e-06,
      "loss": 0.0143,
      "step": 2004480
    },
    {
      "epoch": 3.2804082140308846,
      "grad_norm": 0.2879379391670227,
      "learning_rate": 3.4618451137629066e-06,
      "loss": 0.0149,
      "step": 2004500
    },
    {
      "epoch": 3.2804409444695377,
      "grad_norm": 0.3329528570175171,
      "learning_rate": 3.46177922154939e-06,
      "loss": 0.0102,
      "step": 2004520
    },
    {
      "epoch": 3.2804736749081913,
      "grad_norm": 0.6166313290596008,
      "learning_rate": 3.461713329335873e-06,
      "loss": 0.0166,
      "step": 2004540
    },
    {
      "epoch": 3.2805064053468445,
      "grad_norm": 0.28240305185317993,
      "learning_rate": 3.4616474371223557e-06,
      "loss": 0.0104,
      "step": 2004560
    },
    {
      "epoch": 3.2805391357854976,
      "grad_norm": 0.23327401280403137,
      "learning_rate": 3.4615815449088384e-06,
      "loss": 0.0077,
      "step": 2004580
    },
    {
      "epoch": 3.280571866224151,
      "grad_norm": 0.0994342565536499,
      "learning_rate": 3.4615156526953216e-06,
      "loss": 0.0115,
      "step": 2004600
    },
    {
      "epoch": 3.2806045966628044,
      "grad_norm": 0.1422165483236313,
      "learning_rate": 3.4614497604818043e-06,
      "loss": 0.0123,
      "step": 2004620
    },
    {
      "epoch": 3.280637327101458,
      "grad_norm": 0.21471835672855377,
      "learning_rate": 3.461383868268287e-06,
      "loss": 0.01,
      "step": 2004640
    },
    {
      "epoch": 3.280670057540111,
      "grad_norm": 0.45584574341773987,
      "learning_rate": 3.4613179760547698e-06,
      "loss": 0.0096,
      "step": 2004660
    },
    {
      "epoch": 3.2807027879787647,
      "grad_norm": 0.1054314598441124,
      "learning_rate": 3.461252083841253e-06,
      "loss": 0.0063,
      "step": 2004680
    },
    {
      "epoch": 3.280735518417418,
      "grad_norm": 0.6142135858535767,
      "learning_rate": 3.4611861916277357e-06,
      "loss": 0.0117,
      "step": 2004700
    },
    {
      "epoch": 3.280768248856071,
      "grad_norm": 0.10617166012525558,
      "learning_rate": 3.4611202994142184e-06,
      "loss": 0.0112,
      "step": 2004720
    },
    {
      "epoch": 3.2808009792947246,
      "grad_norm": 0.2185547649860382,
      "learning_rate": 3.461054407200701e-06,
      "loss": 0.0135,
      "step": 2004740
    },
    {
      "epoch": 3.2808337097333777,
      "grad_norm": 1.8467344045639038,
      "learning_rate": 3.4609885149871843e-06,
      "loss": 0.0133,
      "step": 2004760
    },
    {
      "epoch": 3.2808664401720313,
      "grad_norm": 0.053642045706510544,
      "learning_rate": 3.460922622773667e-06,
      "loss": 0.0132,
      "step": 2004780
    },
    {
      "epoch": 3.2808991706106845,
      "grad_norm": 0.26872533559799194,
      "learning_rate": 3.46085673056015e-06,
      "loss": 0.0103,
      "step": 2004800
    },
    {
      "epoch": 3.280931901049338,
      "grad_norm": 0.4786910116672516,
      "learning_rate": 3.4607908383466325e-06,
      "loss": 0.0094,
      "step": 2004820
    },
    {
      "epoch": 3.2809646314879912,
      "grad_norm": 0.4884233772754669,
      "learning_rate": 3.4607249461331153e-06,
      "loss": 0.017,
      "step": 2004840
    },
    {
      "epoch": 3.2809973619266444,
      "grad_norm": 0.21450047194957733,
      "learning_rate": 3.4606590539195984e-06,
      "loss": 0.0102,
      "step": 2004860
    },
    {
      "epoch": 3.281030092365298,
      "grad_norm": 0.07564860582351685,
      "learning_rate": 3.4605931617060816e-06,
      "loss": 0.0144,
      "step": 2004880
    },
    {
      "epoch": 3.281062822803951,
      "grad_norm": 0.09100264310836792,
      "learning_rate": 3.4605272694925648e-06,
      "loss": 0.0123,
      "step": 2004900
    },
    {
      "epoch": 3.2810955532426047,
      "grad_norm": 0.42260223627090454,
      "learning_rate": 3.4604613772790475e-06,
      "loss": 0.0166,
      "step": 2004920
    },
    {
      "epoch": 3.281128283681258,
      "grad_norm": 0.7597991824150085,
      "learning_rate": 3.4603954850655303e-06,
      "loss": 0.0108,
      "step": 2004940
    },
    {
      "epoch": 3.2811610141199115,
      "grad_norm": 0.34921497106552124,
      "learning_rate": 3.460329592852013e-06,
      "loss": 0.0119,
      "step": 2004960
    },
    {
      "epoch": 3.2811937445585646,
      "grad_norm": 0.3008017838001251,
      "learning_rate": 3.4602637006384957e-06,
      "loss": 0.0177,
      "step": 2004980
    },
    {
      "epoch": 3.2812264749972178,
      "grad_norm": 0.056848686188459396,
      "learning_rate": 3.460197808424979e-06,
      "loss": 0.0123,
      "step": 2005000
    },
    {
      "epoch": 3.2812592054358714,
      "grad_norm": 0.243726909160614,
      "learning_rate": 3.4601319162114616e-06,
      "loss": 0.0104,
      "step": 2005020
    },
    {
      "epoch": 3.2812919358745245,
      "grad_norm": 0.16547873616218567,
      "learning_rate": 3.4600660239979444e-06,
      "loss": 0.0095,
      "step": 2005040
    },
    {
      "epoch": 3.281324666313178,
      "grad_norm": 0.1484151929616928,
      "learning_rate": 3.460000131784427e-06,
      "loss": 0.0132,
      "step": 2005060
    },
    {
      "epoch": 3.2813573967518312,
      "grad_norm": 0.24356643855571747,
      "learning_rate": 3.4599342395709103e-06,
      "loss": 0.0112,
      "step": 2005080
    },
    {
      "epoch": 3.281390127190485,
      "grad_norm": 0.16690923273563385,
      "learning_rate": 3.459868347357393e-06,
      "loss": 0.0073,
      "step": 2005100
    },
    {
      "epoch": 3.281422857629138,
      "grad_norm": 0.31872788071632385,
      "learning_rate": 3.4598024551438758e-06,
      "loss": 0.0126,
      "step": 2005120
    },
    {
      "epoch": 3.281455588067791,
      "grad_norm": 0.2757243812084198,
      "learning_rate": 3.4597365629303585e-06,
      "loss": 0.0077,
      "step": 2005140
    },
    {
      "epoch": 3.2814883185064447,
      "grad_norm": 0.09834327548742294,
      "learning_rate": 3.4596706707168417e-06,
      "loss": 0.008,
      "step": 2005160
    },
    {
      "epoch": 3.281521048945098,
      "grad_norm": 0.1481509506702423,
      "learning_rate": 3.4596047785033244e-06,
      "loss": 0.0097,
      "step": 2005180
    },
    {
      "epoch": 3.2815537793837515,
      "grad_norm": 0.3908454179763794,
      "learning_rate": 3.459538886289807e-06,
      "loss": 0.0128,
      "step": 2005200
    },
    {
      "epoch": 3.2815865098224046,
      "grad_norm": 0.30235525965690613,
      "learning_rate": 3.4594729940762907e-06,
      "loss": 0.012,
      "step": 2005220
    },
    {
      "epoch": 3.2816192402610582,
      "grad_norm": 0.23597432672977448,
      "learning_rate": 3.4594071018627735e-06,
      "loss": 0.008,
      "step": 2005240
    },
    {
      "epoch": 3.2816519706997114,
      "grad_norm": 0.11611853539943695,
      "learning_rate": 3.459341209649256e-06,
      "loss": 0.0088,
      "step": 2005260
    },
    {
      "epoch": 3.2816847011383645,
      "grad_norm": 0.1891963630914688,
      "learning_rate": 3.459275317435739e-06,
      "loss": 0.0096,
      "step": 2005280
    },
    {
      "epoch": 3.281717431577018,
      "grad_norm": 0.24489271640777588,
      "learning_rate": 3.459209425222222e-06,
      "loss": 0.0091,
      "step": 2005300
    },
    {
      "epoch": 3.2817501620156713,
      "grad_norm": 0.42631274461746216,
      "learning_rate": 3.459143533008705e-06,
      "loss": 0.0153,
      "step": 2005320
    },
    {
      "epoch": 3.281782892454325,
      "grad_norm": 1.2856987714767456,
      "learning_rate": 3.4590776407951876e-06,
      "loss": 0.0143,
      "step": 2005340
    },
    {
      "epoch": 3.281815622892978,
      "grad_norm": 0.09938561916351318,
      "learning_rate": 3.4590117485816703e-06,
      "loss": 0.0092,
      "step": 2005360
    },
    {
      "epoch": 3.281848353331631,
      "grad_norm": 0.19384866952896118,
      "learning_rate": 3.458945856368153e-06,
      "loss": 0.0091,
      "step": 2005380
    },
    {
      "epoch": 3.2818810837702848,
      "grad_norm": 0.1601862758398056,
      "learning_rate": 3.4588799641546362e-06,
      "loss": 0.0107,
      "step": 2005400
    },
    {
      "epoch": 3.281913814208938,
      "grad_norm": 0.44717395305633545,
      "learning_rate": 3.458814071941119e-06,
      "loss": 0.0111,
      "step": 2005420
    },
    {
      "epoch": 3.2819465446475915,
      "grad_norm": 0.45629075169563293,
      "learning_rate": 3.4587481797276017e-06,
      "loss": 0.0138,
      "step": 2005440
    },
    {
      "epoch": 3.2819792750862447,
      "grad_norm": 0.14904189109802246,
      "learning_rate": 3.4586822875140844e-06,
      "loss": 0.0085,
      "step": 2005460
    },
    {
      "epoch": 3.282012005524898,
      "grad_norm": 0.2422315925359726,
      "learning_rate": 3.4586163953005676e-06,
      "loss": 0.0087,
      "step": 2005480
    },
    {
      "epoch": 3.2820447359635514,
      "grad_norm": 0.391614705324173,
      "learning_rate": 3.4585505030870504e-06,
      "loss": 0.014,
      "step": 2005500
    },
    {
      "epoch": 3.2820774664022045,
      "grad_norm": 0.2021937072277069,
      "learning_rate": 3.458484610873533e-06,
      "loss": 0.0149,
      "step": 2005520
    },
    {
      "epoch": 3.282110196840858,
      "grad_norm": 0.37041324377059937,
      "learning_rate": 3.458418718660016e-06,
      "loss": 0.0171,
      "step": 2005540
    },
    {
      "epoch": 3.2821429272795113,
      "grad_norm": 0.18561778962612152,
      "learning_rate": 3.458352826446499e-06,
      "loss": 0.0084,
      "step": 2005560
    },
    {
      "epoch": 3.282175657718165,
      "grad_norm": 0.1409120410680771,
      "learning_rate": 3.458286934232982e-06,
      "loss": 0.0103,
      "step": 2005580
    },
    {
      "epoch": 3.282208388156818,
      "grad_norm": 0.14751917123794556,
      "learning_rate": 3.458221042019465e-06,
      "loss": 0.0119,
      "step": 2005600
    },
    {
      "epoch": 3.282241118595471,
      "grad_norm": 0.0915701612830162,
      "learning_rate": 3.458155149805948e-06,
      "loss": 0.0069,
      "step": 2005620
    },
    {
      "epoch": 3.282273849034125,
      "grad_norm": 0.14767152070999146,
      "learning_rate": 3.458089257592431e-06,
      "loss": 0.0068,
      "step": 2005640
    },
    {
      "epoch": 3.282306579472778,
      "grad_norm": 0.3572455942630768,
      "learning_rate": 3.4580233653789135e-06,
      "loss": 0.0174,
      "step": 2005660
    },
    {
      "epoch": 3.2823393099114315,
      "grad_norm": 0.23853185772895813,
      "learning_rate": 3.4579574731653963e-06,
      "loss": 0.0105,
      "step": 2005680
    },
    {
      "epoch": 3.2823720403500847,
      "grad_norm": 0.17274436354637146,
      "learning_rate": 3.4578915809518794e-06,
      "loss": 0.0078,
      "step": 2005700
    },
    {
      "epoch": 3.2824047707887383,
      "grad_norm": 0.22221693396568298,
      "learning_rate": 3.457825688738362e-06,
      "loss": 0.0107,
      "step": 2005720
    },
    {
      "epoch": 3.2824375012273914,
      "grad_norm": 0.12890444695949554,
      "learning_rate": 3.457759796524845e-06,
      "loss": 0.0144,
      "step": 2005740
    },
    {
      "epoch": 3.2824702316660446,
      "grad_norm": 0.2417302131652832,
      "learning_rate": 3.4576939043113277e-06,
      "loss": 0.0084,
      "step": 2005760
    },
    {
      "epoch": 3.282502962104698,
      "grad_norm": 0.09198040515184402,
      "learning_rate": 3.457628012097811e-06,
      "loss": 0.0105,
      "step": 2005780
    },
    {
      "epoch": 3.2825356925433513,
      "grad_norm": 0.33731991052627563,
      "learning_rate": 3.4575621198842936e-06,
      "loss": 0.0106,
      "step": 2005800
    },
    {
      "epoch": 3.282568422982005,
      "grad_norm": 0.4614817798137665,
      "learning_rate": 3.4574962276707763e-06,
      "loss": 0.0144,
      "step": 2005820
    },
    {
      "epoch": 3.282601153420658,
      "grad_norm": 0.17772464454174042,
      "learning_rate": 3.457430335457259e-06,
      "loss": 0.0118,
      "step": 2005840
    },
    {
      "epoch": 3.2826338838593117,
      "grad_norm": 0.15034526586532593,
      "learning_rate": 3.4573644432437418e-06,
      "loss": 0.0141,
      "step": 2005860
    },
    {
      "epoch": 3.282666614297965,
      "grad_norm": 0.18446823954582214,
      "learning_rate": 3.457298551030225e-06,
      "loss": 0.0082,
      "step": 2005880
    },
    {
      "epoch": 3.282699344736618,
      "grad_norm": 0.09626416116952896,
      "learning_rate": 3.4572326588167077e-06,
      "loss": 0.0098,
      "step": 2005900
    },
    {
      "epoch": 3.2827320751752715,
      "grad_norm": 0.6043082475662231,
      "learning_rate": 3.457166766603191e-06,
      "loss": 0.015,
      "step": 2005920
    },
    {
      "epoch": 3.2827648056139247,
      "grad_norm": 0.24060054123401642,
      "learning_rate": 3.457100874389674e-06,
      "loss": 0.0097,
      "step": 2005940
    },
    {
      "epoch": 3.2827975360525783,
      "grad_norm": 0.7555728554725647,
      "learning_rate": 3.4570349821761568e-06,
      "loss": 0.0089,
      "step": 2005960
    },
    {
      "epoch": 3.2828302664912314,
      "grad_norm": 0.49337413907051086,
      "learning_rate": 3.4569690899626395e-06,
      "loss": 0.0146,
      "step": 2005980
    },
    {
      "epoch": 3.282862996929885,
      "grad_norm": 0.11694302409887314,
      "learning_rate": 3.4569031977491222e-06,
      "loss": 0.0073,
      "step": 2006000
    },
    {
      "epoch": 3.282895727368538,
      "grad_norm": 0.15073111653327942,
      "learning_rate": 3.4568373055356054e-06,
      "loss": 0.01,
      "step": 2006020
    },
    {
      "epoch": 3.2829284578071913,
      "grad_norm": 0.7540492415428162,
      "learning_rate": 3.456771413322088e-06,
      "loss": 0.0115,
      "step": 2006040
    },
    {
      "epoch": 3.282961188245845,
      "grad_norm": 0.06981036067008972,
      "learning_rate": 3.456705521108571e-06,
      "loss": 0.0085,
      "step": 2006060
    },
    {
      "epoch": 3.282993918684498,
      "grad_norm": 1.0705116987228394,
      "learning_rate": 3.4566396288950536e-06,
      "loss": 0.0117,
      "step": 2006080
    },
    {
      "epoch": 3.2830266491231517,
      "grad_norm": 0.26613911986351013,
      "learning_rate": 3.4565737366815368e-06,
      "loss": 0.0086,
      "step": 2006100
    },
    {
      "epoch": 3.283059379561805,
      "grad_norm": 0.3910691440105438,
      "learning_rate": 3.4565078444680195e-06,
      "loss": 0.0095,
      "step": 2006120
    },
    {
      "epoch": 3.2830921100004584,
      "grad_norm": 0.7720690965652466,
      "learning_rate": 3.4564419522545023e-06,
      "loss": 0.0163,
      "step": 2006140
    },
    {
      "epoch": 3.2831248404391116,
      "grad_norm": 0.14796589314937592,
      "learning_rate": 3.456376060040985e-06,
      "loss": 0.0119,
      "step": 2006160
    },
    {
      "epoch": 3.2831575708777647,
      "grad_norm": 0.18579313158988953,
      "learning_rate": 3.456310167827468e-06,
      "loss": 0.015,
      "step": 2006180
    },
    {
      "epoch": 3.2831903013164183,
      "grad_norm": 0.15191328525543213,
      "learning_rate": 3.456244275613951e-06,
      "loss": 0.0092,
      "step": 2006200
    },
    {
      "epoch": 3.2832230317550715,
      "grad_norm": 0.21440890431404114,
      "learning_rate": 3.4561783834004336e-06,
      "loss": 0.0106,
      "step": 2006220
    },
    {
      "epoch": 3.283255762193725,
      "grad_norm": 0.15454412996768951,
      "learning_rate": 3.4561124911869164e-06,
      "loss": 0.0104,
      "step": 2006240
    },
    {
      "epoch": 3.283288492632378,
      "grad_norm": 0.5467708706855774,
      "learning_rate": 3.4560465989733995e-06,
      "loss": 0.0154,
      "step": 2006260
    },
    {
      "epoch": 3.283321223071032,
      "grad_norm": 0.377537339925766,
      "learning_rate": 3.4559807067598827e-06,
      "loss": 0.0112,
      "step": 2006280
    },
    {
      "epoch": 3.283353953509685,
      "grad_norm": 0.20888309180736542,
      "learning_rate": 3.4559148145463654e-06,
      "loss": 0.0098,
      "step": 2006300
    },
    {
      "epoch": 3.283386683948338,
      "grad_norm": 0.529521107673645,
      "learning_rate": 3.4558489223328486e-06,
      "loss": 0.0123,
      "step": 2006320
    },
    {
      "epoch": 3.2834194143869917,
      "grad_norm": 0.4971861243247986,
      "learning_rate": 3.4557830301193314e-06,
      "loss": 0.0138,
      "step": 2006340
    },
    {
      "epoch": 3.283452144825645,
      "grad_norm": 0.3397906720638275,
      "learning_rate": 3.455717137905814e-06,
      "loss": 0.0099,
      "step": 2006360
    },
    {
      "epoch": 3.2834848752642984,
      "grad_norm": 0.37817123532295227,
      "learning_rate": 3.455651245692297e-06,
      "loss": 0.0082,
      "step": 2006380
    },
    {
      "epoch": 3.2835176057029516,
      "grad_norm": 0.5276831388473511,
      "learning_rate": 3.4555853534787796e-06,
      "loss": 0.012,
      "step": 2006400
    },
    {
      "epoch": 3.283550336141605,
      "grad_norm": 0.2713153660297394,
      "learning_rate": 3.4555194612652627e-06,
      "loss": 0.0189,
      "step": 2006420
    },
    {
      "epoch": 3.2835830665802583,
      "grad_norm": 0.3343515694141388,
      "learning_rate": 3.4554535690517455e-06,
      "loss": 0.0158,
      "step": 2006440
    },
    {
      "epoch": 3.2836157970189115,
      "grad_norm": 0.28644365072250366,
      "learning_rate": 3.4553876768382282e-06,
      "loss": 0.0108,
      "step": 2006460
    },
    {
      "epoch": 3.283648527457565,
      "grad_norm": 0.11638199537992477,
      "learning_rate": 3.455321784624711e-06,
      "loss": 0.0149,
      "step": 2006480
    },
    {
      "epoch": 3.2836812578962182,
      "grad_norm": 0.1898447722196579,
      "learning_rate": 3.455255892411194e-06,
      "loss": 0.0108,
      "step": 2006500
    },
    {
      "epoch": 3.283713988334872,
      "grad_norm": 0.12166926264762878,
      "learning_rate": 3.455190000197677e-06,
      "loss": 0.0158,
      "step": 2006520
    },
    {
      "epoch": 3.283746718773525,
      "grad_norm": 0.19987982511520386,
      "learning_rate": 3.4551241079841596e-06,
      "loss": 0.008,
      "step": 2006540
    },
    {
      "epoch": 3.2837794492121786,
      "grad_norm": 0.17667940258979797,
      "learning_rate": 3.4550582157706423e-06,
      "loss": 0.016,
      "step": 2006560
    },
    {
      "epoch": 3.2838121796508317,
      "grad_norm": 0.26348620653152466,
      "learning_rate": 3.4549923235571255e-06,
      "loss": 0.0116,
      "step": 2006580
    },
    {
      "epoch": 3.283844910089485,
      "grad_norm": 0.49439793825149536,
      "learning_rate": 3.4549264313436082e-06,
      "loss": 0.0096,
      "step": 2006600
    },
    {
      "epoch": 3.2838776405281385,
      "grad_norm": 0.30015307664871216,
      "learning_rate": 3.454860539130091e-06,
      "loss": 0.0135,
      "step": 2006620
    },
    {
      "epoch": 3.2839103709667916,
      "grad_norm": 0.18944747745990753,
      "learning_rate": 3.4547946469165746e-06,
      "loss": 0.0121,
      "step": 2006640
    },
    {
      "epoch": 3.283943101405445,
      "grad_norm": 0.5985301733016968,
      "learning_rate": 3.4547287547030573e-06,
      "loss": 0.0103,
      "step": 2006660
    },
    {
      "epoch": 3.2839758318440984,
      "grad_norm": 0.40459975600242615,
      "learning_rate": 3.45466286248954e-06,
      "loss": 0.0149,
      "step": 2006680
    },
    {
      "epoch": 3.284008562282752,
      "grad_norm": 0.05391739308834076,
      "learning_rate": 3.4545969702760228e-06,
      "loss": 0.0119,
      "step": 2006700
    },
    {
      "epoch": 3.284041292721405,
      "grad_norm": 0.4936157464981079,
      "learning_rate": 3.454531078062506e-06,
      "loss": 0.0135,
      "step": 2006720
    },
    {
      "epoch": 3.2840740231600583,
      "grad_norm": 0.1343286633491516,
      "learning_rate": 3.4544651858489887e-06,
      "loss": 0.0147,
      "step": 2006740
    },
    {
      "epoch": 3.284106753598712,
      "grad_norm": 0.2219480723142624,
      "learning_rate": 3.4543992936354714e-06,
      "loss": 0.0106,
      "step": 2006760
    },
    {
      "epoch": 3.284139484037365,
      "grad_norm": 0.5149967670440674,
      "learning_rate": 3.454333401421954e-06,
      "loss": 0.0098,
      "step": 2006780
    },
    {
      "epoch": 3.2841722144760186,
      "grad_norm": 0.21702441573143005,
      "learning_rate": 3.4542675092084373e-06,
      "loss": 0.01,
      "step": 2006800
    },
    {
      "epoch": 3.2842049449146717,
      "grad_norm": 0.03137199208140373,
      "learning_rate": 3.45420161699492e-06,
      "loss": 0.0077,
      "step": 2006820
    },
    {
      "epoch": 3.284237675353325,
      "grad_norm": 0.5529707670211792,
      "learning_rate": 3.454135724781403e-06,
      "loss": 0.0127,
      "step": 2006840
    },
    {
      "epoch": 3.2842704057919785,
      "grad_norm": 0.1988454908132553,
      "learning_rate": 3.4540698325678855e-06,
      "loss": 0.011,
      "step": 2006860
    },
    {
      "epoch": 3.2843031362306316,
      "grad_norm": 0.5603761076927185,
      "learning_rate": 3.4540039403543683e-06,
      "loss": 0.0175,
      "step": 2006880
    },
    {
      "epoch": 3.2843358666692852,
      "grad_norm": 0.294890820980072,
      "learning_rate": 3.4539380481408515e-06,
      "loss": 0.0094,
      "step": 2006900
    },
    {
      "epoch": 3.2843685971079384,
      "grad_norm": 0.3458746075630188,
      "learning_rate": 3.453872155927334e-06,
      "loss": 0.0097,
      "step": 2006920
    },
    {
      "epoch": 3.2844013275465915,
      "grad_norm": 0.7174074053764343,
      "learning_rate": 3.453806263713817e-06,
      "loss": 0.0162,
      "step": 2006940
    },
    {
      "epoch": 3.284434057985245,
      "grad_norm": 0.06918083876371384,
      "learning_rate": 3.4537403715002997e-06,
      "loss": 0.0106,
      "step": 2006960
    },
    {
      "epoch": 3.2844667884238983,
      "grad_norm": 3.8565874099731445,
      "learning_rate": 3.4536744792867833e-06,
      "loss": 0.0125,
      "step": 2006980
    },
    {
      "epoch": 3.284499518862552,
      "grad_norm": 0.2539859414100647,
      "learning_rate": 3.453608587073266e-06,
      "loss": 0.0092,
      "step": 2007000
    },
    {
      "epoch": 3.284532249301205,
      "grad_norm": 0.11893311142921448,
      "learning_rate": 3.4535426948597487e-06,
      "loss": 0.0121,
      "step": 2007020
    },
    {
      "epoch": 3.2845649797398586,
      "grad_norm": 0.4626625180244446,
      "learning_rate": 3.453476802646232e-06,
      "loss": 0.0119,
      "step": 2007040
    },
    {
      "epoch": 3.2845977101785118,
      "grad_norm": 0.11782411485910416,
      "learning_rate": 3.4534109104327146e-06,
      "loss": 0.01,
      "step": 2007060
    },
    {
      "epoch": 3.284630440617165,
      "grad_norm": 0.37651708722114563,
      "learning_rate": 3.4533450182191974e-06,
      "loss": 0.0137,
      "step": 2007080
    },
    {
      "epoch": 3.2846631710558185,
      "grad_norm": 0.11912662535905838,
      "learning_rate": 3.45327912600568e-06,
      "loss": 0.0085,
      "step": 2007100
    },
    {
      "epoch": 3.2846959014944717,
      "grad_norm": 0.23601093888282776,
      "learning_rate": 3.4532132337921633e-06,
      "loss": 0.0133,
      "step": 2007120
    },
    {
      "epoch": 3.2847286319331253,
      "grad_norm": 0.5888233184814453,
      "learning_rate": 3.453147341578646e-06,
      "loss": 0.0119,
      "step": 2007140
    },
    {
      "epoch": 3.2847613623717784,
      "grad_norm": 0.18325351178646088,
      "learning_rate": 3.4530814493651288e-06,
      "loss": 0.0092,
      "step": 2007160
    },
    {
      "epoch": 3.284794092810432,
      "grad_norm": 0.10364015400409698,
      "learning_rate": 3.4530155571516115e-06,
      "loss": 0.0146,
      "step": 2007180
    },
    {
      "epoch": 3.284826823249085,
      "grad_norm": 0.5307177305221558,
      "learning_rate": 3.4529496649380947e-06,
      "loss": 0.012,
      "step": 2007200
    },
    {
      "epoch": 3.2848595536877383,
      "grad_norm": 0.316322386264801,
      "learning_rate": 3.4528837727245774e-06,
      "loss": 0.0119,
      "step": 2007220
    },
    {
      "epoch": 3.284892284126392,
      "grad_norm": 0.236436665058136,
      "learning_rate": 3.45281788051106e-06,
      "loss": 0.0107,
      "step": 2007240
    },
    {
      "epoch": 3.284925014565045,
      "grad_norm": 0.38518232107162476,
      "learning_rate": 3.452751988297543e-06,
      "loss": 0.0085,
      "step": 2007260
    },
    {
      "epoch": 3.2849577450036986,
      "grad_norm": 0.19903124868869781,
      "learning_rate": 3.4526860960840256e-06,
      "loss": 0.0081,
      "step": 2007280
    },
    {
      "epoch": 3.284990475442352,
      "grad_norm": 0.20364055037498474,
      "learning_rate": 3.4526202038705088e-06,
      "loss": 0.007,
      "step": 2007300
    },
    {
      "epoch": 3.2850232058810054,
      "grad_norm": 0.4949161112308502,
      "learning_rate": 3.4525543116569915e-06,
      "loss": 0.0117,
      "step": 2007320
    },
    {
      "epoch": 3.2850559363196585,
      "grad_norm": 0.48129794001579285,
      "learning_rate": 3.452488419443475e-06,
      "loss": 0.012,
      "step": 2007340
    },
    {
      "epoch": 3.2850886667583117,
      "grad_norm": 0.34245824813842773,
      "learning_rate": 3.452422527229958e-06,
      "loss": 0.013,
      "step": 2007360
    },
    {
      "epoch": 3.2851213971969653,
      "grad_norm": 0.057159651070833206,
      "learning_rate": 3.4523566350164406e-06,
      "loss": 0.01,
      "step": 2007380
    },
    {
      "epoch": 3.2851541276356184,
      "grad_norm": 0.36240488290786743,
      "learning_rate": 3.4522907428029233e-06,
      "loss": 0.0105,
      "step": 2007400
    },
    {
      "epoch": 3.285186858074272,
      "grad_norm": 0.16182941198349,
      "learning_rate": 3.452224850589406e-06,
      "loss": 0.0132,
      "step": 2007420
    },
    {
      "epoch": 3.285219588512925,
      "grad_norm": 0.19138802587985992,
      "learning_rate": 3.4521589583758892e-06,
      "loss": 0.0097,
      "step": 2007440
    },
    {
      "epoch": 3.2852523189515788,
      "grad_norm": 0.12210734933614731,
      "learning_rate": 3.452093066162372e-06,
      "loss": 0.0154,
      "step": 2007460
    },
    {
      "epoch": 3.285285049390232,
      "grad_norm": 0.11155641078948975,
      "learning_rate": 3.4520271739488547e-06,
      "loss": 0.0152,
      "step": 2007480
    },
    {
      "epoch": 3.285317779828885,
      "grad_norm": 0.2746863067150116,
      "learning_rate": 3.4519612817353375e-06,
      "loss": 0.0109,
      "step": 2007500
    },
    {
      "epoch": 3.2853505102675387,
      "grad_norm": 0.3203447461128235,
      "learning_rate": 3.4518953895218206e-06,
      "loss": 0.0103,
      "step": 2007520
    },
    {
      "epoch": 3.285383240706192,
      "grad_norm": 0.0696650817990303,
      "learning_rate": 3.4518294973083034e-06,
      "loss": 0.009,
      "step": 2007540
    },
    {
      "epoch": 3.2854159711448454,
      "grad_norm": 0.43606889247894287,
      "learning_rate": 3.451763605094786e-06,
      "loss": 0.0167,
      "step": 2007560
    },
    {
      "epoch": 3.2854487015834986,
      "grad_norm": 0.30709242820739746,
      "learning_rate": 3.451697712881269e-06,
      "loss": 0.0134,
      "step": 2007580
    },
    {
      "epoch": 3.285481432022152,
      "grad_norm": 0.8280770182609558,
      "learning_rate": 3.451631820667752e-06,
      "loss": 0.0105,
      "step": 2007600
    },
    {
      "epoch": 3.2855141624608053,
      "grad_norm": 0.29022762179374695,
      "learning_rate": 3.4515659284542347e-06,
      "loss": 0.0096,
      "step": 2007620
    },
    {
      "epoch": 3.2855468928994584,
      "grad_norm": 0.37788692116737366,
      "learning_rate": 3.4515000362407175e-06,
      "loss": 0.0098,
      "step": 2007640
    },
    {
      "epoch": 3.285579623338112,
      "grad_norm": 0.12278230488300323,
      "learning_rate": 3.4514341440272002e-06,
      "loss": 0.0211,
      "step": 2007660
    },
    {
      "epoch": 3.285612353776765,
      "grad_norm": 0.18690618872642517,
      "learning_rate": 3.451368251813684e-06,
      "loss": 0.0125,
      "step": 2007680
    },
    {
      "epoch": 3.285645084215419,
      "grad_norm": 0.18422262370586395,
      "learning_rate": 3.4513023596001665e-06,
      "loss": 0.009,
      "step": 2007700
    },
    {
      "epoch": 3.285677814654072,
      "grad_norm": 0.47139471769332886,
      "learning_rate": 3.4512364673866493e-06,
      "loss": 0.0069,
      "step": 2007720
    },
    {
      "epoch": 3.2857105450927255,
      "grad_norm": 0.4476754665374756,
      "learning_rate": 3.4511705751731324e-06,
      "loss": 0.0091,
      "step": 2007740
    },
    {
      "epoch": 3.2857432755313787,
      "grad_norm": 0.5780460834503174,
      "learning_rate": 3.451104682959615e-06,
      "loss": 0.0109,
      "step": 2007760
    },
    {
      "epoch": 3.285776005970032,
      "grad_norm": 0.2797955870628357,
      "learning_rate": 3.451038790746098e-06,
      "loss": 0.0114,
      "step": 2007780
    },
    {
      "epoch": 3.2858087364086854,
      "grad_norm": 0.2709030508995056,
      "learning_rate": 3.4509728985325807e-06,
      "loss": 0.0189,
      "step": 2007800
    },
    {
      "epoch": 3.2858414668473386,
      "grad_norm": 0.18165625631809235,
      "learning_rate": 3.4509070063190634e-06,
      "loss": 0.016,
      "step": 2007820
    },
    {
      "epoch": 3.285874197285992,
      "grad_norm": 0.522651195526123,
      "learning_rate": 3.4508411141055466e-06,
      "loss": 0.0073,
      "step": 2007840
    },
    {
      "epoch": 3.2859069277246453,
      "grad_norm": 0.40436139702796936,
      "learning_rate": 3.4507752218920293e-06,
      "loss": 0.0088,
      "step": 2007860
    },
    {
      "epoch": 3.285939658163299,
      "grad_norm": 0.2536269724369049,
      "learning_rate": 3.450709329678512e-06,
      "loss": 0.0135,
      "step": 2007880
    },
    {
      "epoch": 3.285972388601952,
      "grad_norm": 0.24704881012439728,
      "learning_rate": 3.450643437464995e-06,
      "loss": 0.0111,
      "step": 2007900
    },
    {
      "epoch": 3.286005119040605,
      "grad_norm": 0.24762514233589172,
      "learning_rate": 3.450577545251478e-06,
      "loss": 0.0081,
      "step": 2007920
    },
    {
      "epoch": 3.286037849479259,
      "grad_norm": 0.3125869929790497,
      "learning_rate": 3.4505116530379607e-06,
      "loss": 0.0066,
      "step": 2007940
    },
    {
      "epoch": 3.286070579917912,
      "grad_norm": 0.4508402645587921,
      "learning_rate": 3.4504457608244434e-06,
      "loss": 0.0085,
      "step": 2007960
    },
    {
      "epoch": 3.2861033103565656,
      "grad_norm": 0.09491283446550369,
      "learning_rate": 3.450379868610926e-06,
      "loss": 0.0125,
      "step": 2007980
    },
    {
      "epoch": 3.2861360407952187,
      "grad_norm": 0.2314530462026596,
      "learning_rate": 3.4503139763974093e-06,
      "loss": 0.0107,
      "step": 2008000
    },
    {
      "epoch": 3.2861687712338723,
      "grad_norm": 0.2158316820859909,
      "learning_rate": 3.450248084183892e-06,
      "loss": 0.0143,
      "step": 2008020
    },
    {
      "epoch": 3.2862015016725254,
      "grad_norm": 0.2743978500366211,
      "learning_rate": 3.4501821919703752e-06,
      "loss": 0.0096,
      "step": 2008040
    },
    {
      "epoch": 3.2862342321111786,
      "grad_norm": 0.10286448150873184,
      "learning_rate": 3.4501162997568584e-06,
      "loss": 0.0096,
      "step": 2008060
    },
    {
      "epoch": 3.286266962549832,
      "grad_norm": 0.20657332241535187,
      "learning_rate": 3.450050407543341e-06,
      "loss": 0.0181,
      "step": 2008080
    },
    {
      "epoch": 3.2862996929884853,
      "grad_norm": 0.12948057055473328,
      "learning_rate": 3.449984515329824e-06,
      "loss": 0.0087,
      "step": 2008100
    },
    {
      "epoch": 3.286332423427139,
      "grad_norm": 0.24793006479740143,
      "learning_rate": 3.4499186231163066e-06,
      "loss": 0.0083,
      "step": 2008120
    },
    {
      "epoch": 3.286365153865792,
      "grad_norm": 0.4951351284980774,
      "learning_rate": 3.4498527309027898e-06,
      "loss": 0.0122,
      "step": 2008140
    },
    {
      "epoch": 3.2863978843044457,
      "grad_norm": 0.202008455991745,
      "learning_rate": 3.4497868386892725e-06,
      "loss": 0.0072,
      "step": 2008160
    },
    {
      "epoch": 3.286430614743099,
      "grad_norm": 0.3791881203651428,
      "learning_rate": 3.4497209464757553e-06,
      "loss": 0.0094,
      "step": 2008180
    },
    {
      "epoch": 3.286463345181752,
      "grad_norm": 0.15901587903499603,
      "learning_rate": 3.449655054262238e-06,
      "loss": 0.0049,
      "step": 2008200
    },
    {
      "epoch": 3.2864960756204056,
      "grad_norm": 0.24415044486522675,
      "learning_rate": 3.449589162048721e-06,
      "loss": 0.0087,
      "step": 2008220
    },
    {
      "epoch": 3.2865288060590587,
      "grad_norm": 1.0599204301834106,
      "learning_rate": 3.449523269835204e-06,
      "loss": 0.0125,
      "step": 2008240
    },
    {
      "epoch": 3.2865615364977123,
      "grad_norm": 0.126313254237175,
      "learning_rate": 3.4494573776216866e-06,
      "loss": 0.0112,
      "step": 2008260
    },
    {
      "epoch": 3.2865942669363655,
      "grad_norm": 0.219833642244339,
      "learning_rate": 3.4493914854081694e-06,
      "loss": 0.0096,
      "step": 2008280
    },
    {
      "epoch": 3.286626997375019,
      "grad_norm": 0.341818243265152,
      "learning_rate": 3.449325593194652e-06,
      "loss": 0.011,
      "step": 2008300
    },
    {
      "epoch": 3.286659727813672,
      "grad_norm": 0.1449045091867447,
      "learning_rate": 3.4492597009811353e-06,
      "loss": 0.0103,
      "step": 2008320
    },
    {
      "epoch": 3.2866924582523254,
      "grad_norm": 0.2202000766992569,
      "learning_rate": 3.449193808767618e-06,
      "loss": 0.0084,
      "step": 2008340
    },
    {
      "epoch": 3.286725188690979,
      "grad_norm": 0.5455061197280884,
      "learning_rate": 3.4491279165541008e-06,
      "loss": 0.0145,
      "step": 2008360
    },
    {
      "epoch": 3.286757919129632,
      "grad_norm": 0.03407871723175049,
      "learning_rate": 3.4490620243405835e-06,
      "loss": 0.0133,
      "step": 2008380
    },
    {
      "epoch": 3.2867906495682857,
      "grad_norm": 0.22523066401481628,
      "learning_rate": 3.448996132127067e-06,
      "loss": 0.0083,
      "step": 2008400
    },
    {
      "epoch": 3.286823380006939,
      "grad_norm": 0.1552322506904602,
      "learning_rate": 3.44893023991355e-06,
      "loss": 0.0123,
      "step": 2008420
    },
    {
      "epoch": 3.286856110445592,
      "grad_norm": 0.38611316680908203,
      "learning_rate": 3.4488643477000326e-06,
      "loss": 0.0144,
      "step": 2008440
    },
    {
      "epoch": 3.2868888408842456,
      "grad_norm": 0.14366976916790009,
      "learning_rate": 3.4487984554865157e-06,
      "loss": 0.0132,
      "step": 2008460
    },
    {
      "epoch": 3.2869215713228987,
      "grad_norm": 0.2645863890647888,
      "learning_rate": 3.4487325632729985e-06,
      "loss": 0.0103,
      "step": 2008480
    },
    {
      "epoch": 3.2869543017615523,
      "grad_norm": 0.8059512376785278,
      "learning_rate": 3.4486666710594812e-06,
      "loss": 0.0089,
      "step": 2008500
    },
    {
      "epoch": 3.2869870322002055,
      "grad_norm": 0.2533476948738098,
      "learning_rate": 3.448600778845964e-06,
      "loss": 0.0138,
      "step": 2008520
    },
    {
      "epoch": 3.2870197626388586,
      "grad_norm": 0.8894701600074768,
      "learning_rate": 3.448534886632447e-06,
      "loss": 0.0134,
      "step": 2008540
    },
    {
      "epoch": 3.2870524930775122,
      "grad_norm": 0.2923409342765808,
      "learning_rate": 3.44846899441893e-06,
      "loss": 0.0114,
      "step": 2008560
    },
    {
      "epoch": 3.2870852235161654,
      "grad_norm": 0.2742941677570343,
      "learning_rate": 3.4484031022054126e-06,
      "loss": 0.0113,
      "step": 2008580
    },
    {
      "epoch": 3.287117953954819,
      "grad_norm": 0.043283626437187195,
      "learning_rate": 3.4483372099918953e-06,
      "loss": 0.0078,
      "step": 2008600
    },
    {
      "epoch": 3.287150684393472,
      "grad_norm": 0.38986408710479736,
      "learning_rate": 3.4482713177783785e-06,
      "loss": 0.0106,
      "step": 2008620
    },
    {
      "epoch": 3.2871834148321257,
      "grad_norm": 0.22123441100120544,
      "learning_rate": 3.4482054255648612e-06,
      "loss": 0.0126,
      "step": 2008640
    },
    {
      "epoch": 3.287216145270779,
      "grad_norm": 0.20252636075019836,
      "learning_rate": 3.448139533351344e-06,
      "loss": 0.0153,
      "step": 2008660
    },
    {
      "epoch": 3.287248875709432,
      "grad_norm": 0.3717375695705414,
      "learning_rate": 3.4480736411378267e-06,
      "loss": 0.0087,
      "step": 2008680
    },
    {
      "epoch": 3.2872816061480856,
      "grad_norm": 0.23513992130756378,
      "learning_rate": 3.4480077489243095e-06,
      "loss": 0.0123,
      "step": 2008700
    },
    {
      "epoch": 3.2873143365867388,
      "grad_norm": 0.7458630204200745,
      "learning_rate": 3.4479418567107926e-06,
      "loss": 0.0137,
      "step": 2008720
    },
    {
      "epoch": 3.2873470670253924,
      "grad_norm": 0.15268699824810028,
      "learning_rate": 3.447875964497276e-06,
      "loss": 0.0108,
      "step": 2008740
    },
    {
      "epoch": 3.2873797974640455,
      "grad_norm": 0.37995174527168274,
      "learning_rate": 3.447810072283759e-06,
      "loss": 0.0111,
      "step": 2008760
    },
    {
      "epoch": 3.287412527902699,
      "grad_norm": 0.18342140316963196,
      "learning_rate": 3.4477441800702417e-06,
      "loss": 0.0091,
      "step": 2008780
    },
    {
      "epoch": 3.2874452583413523,
      "grad_norm": 0.7573545575141907,
      "learning_rate": 3.4476782878567244e-06,
      "loss": 0.0124,
      "step": 2008800
    },
    {
      "epoch": 3.2874779887800054,
      "grad_norm": 0.3306097686290741,
      "learning_rate": 3.447612395643207e-06,
      "loss": 0.0085,
      "step": 2008820
    },
    {
      "epoch": 3.287510719218659,
      "grad_norm": 0.1269700676202774,
      "learning_rate": 3.44754650342969e-06,
      "loss": 0.0132,
      "step": 2008840
    },
    {
      "epoch": 3.287543449657312,
      "grad_norm": 0.3574846386909485,
      "learning_rate": 3.447480611216173e-06,
      "loss": 0.0151,
      "step": 2008860
    },
    {
      "epoch": 3.2875761800959657,
      "grad_norm": 0.23238584399223328,
      "learning_rate": 3.447414719002656e-06,
      "loss": 0.0081,
      "step": 2008880
    },
    {
      "epoch": 3.287608910534619,
      "grad_norm": 0.2090483158826828,
      "learning_rate": 3.4473488267891386e-06,
      "loss": 0.0103,
      "step": 2008900
    },
    {
      "epoch": 3.2876416409732725,
      "grad_norm": 0.19697034358978271,
      "learning_rate": 3.4472829345756213e-06,
      "loss": 0.0091,
      "step": 2008920
    },
    {
      "epoch": 3.2876743714119256,
      "grad_norm": 0.09271398186683655,
      "learning_rate": 3.4472170423621045e-06,
      "loss": 0.0137,
      "step": 2008940
    },
    {
      "epoch": 3.287707101850579,
      "grad_norm": 0.22361953556537628,
      "learning_rate": 3.447151150148587e-06,
      "loss": 0.0093,
      "step": 2008960
    },
    {
      "epoch": 3.2877398322892324,
      "grad_norm": 0.2309788167476654,
      "learning_rate": 3.44708525793507e-06,
      "loss": 0.0124,
      "step": 2008980
    },
    {
      "epoch": 3.2877725627278855,
      "grad_norm": 0.29994258284568787,
      "learning_rate": 3.4470193657215527e-06,
      "loss": 0.0114,
      "step": 2009000
    },
    {
      "epoch": 3.287805293166539,
      "grad_norm": 0.4723190367221832,
      "learning_rate": 3.446953473508036e-06,
      "loss": 0.0091,
      "step": 2009020
    },
    {
      "epoch": 3.2878380236051923,
      "grad_norm": 0.341027170419693,
      "learning_rate": 3.4468875812945186e-06,
      "loss": 0.0092,
      "step": 2009040
    },
    {
      "epoch": 3.287870754043846,
      "grad_norm": 0.2954186499118805,
      "learning_rate": 3.4468216890810013e-06,
      "loss": 0.0189,
      "step": 2009060
    },
    {
      "epoch": 3.287903484482499,
      "grad_norm": 0.2772020697593689,
      "learning_rate": 3.446755796867484e-06,
      "loss": 0.0112,
      "step": 2009080
    },
    {
      "epoch": 3.287936214921152,
      "grad_norm": 0.6930227875709534,
      "learning_rate": 3.4466899046539676e-06,
      "loss": 0.0152,
      "step": 2009100
    },
    {
      "epoch": 3.2879689453598058,
      "grad_norm": 1.4175286293029785,
      "learning_rate": 3.4466240124404504e-06,
      "loss": 0.0109,
      "step": 2009120
    },
    {
      "epoch": 3.288001675798459,
      "grad_norm": 0.18176451325416565,
      "learning_rate": 3.446558120226933e-06,
      "loss": 0.0132,
      "step": 2009140
    },
    {
      "epoch": 3.2880344062371125,
      "grad_norm": 0.37157103419303894,
      "learning_rate": 3.4464922280134163e-06,
      "loss": 0.0144,
      "step": 2009160
    },
    {
      "epoch": 3.2880671366757657,
      "grad_norm": 0.1855822205543518,
      "learning_rate": 3.446426335799899e-06,
      "loss": 0.0118,
      "step": 2009180
    },
    {
      "epoch": 3.2880998671144193,
      "grad_norm": 0.09752030670642853,
      "learning_rate": 3.4463604435863818e-06,
      "loss": 0.0097,
      "step": 2009200
    },
    {
      "epoch": 3.2881325975530724,
      "grad_norm": 0.7877837419509888,
      "learning_rate": 3.4462945513728645e-06,
      "loss": 0.0173,
      "step": 2009220
    },
    {
      "epoch": 3.2881653279917256,
      "grad_norm": 0.14199014008045197,
      "learning_rate": 3.4462286591593472e-06,
      "loss": 0.0176,
      "step": 2009240
    },
    {
      "epoch": 3.288198058430379,
      "grad_norm": 0.1931406706571579,
      "learning_rate": 3.4461627669458304e-06,
      "loss": 0.0137,
      "step": 2009260
    },
    {
      "epoch": 3.2882307888690323,
      "grad_norm": 0.21335718035697937,
      "learning_rate": 3.446096874732313e-06,
      "loss": 0.0072,
      "step": 2009280
    },
    {
      "epoch": 3.288263519307686,
      "grad_norm": 0.16226382553577423,
      "learning_rate": 3.446030982518796e-06,
      "loss": 0.0136,
      "step": 2009300
    },
    {
      "epoch": 3.288296249746339,
      "grad_norm": 0.12833039462566376,
      "learning_rate": 3.4459650903052786e-06,
      "loss": 0.0085,
      "step": 2009320
    },
    {
      "epoch": 3.2883289801849926,
      "grad_norm": 0.0750742107629776,
      "learning_rate": 3.445899198091762e-06,
      "loss": 0.0099,
      "step": 2009340
    },
    {
      "epoch": 3.288361710623646,
      "grad_norm": 0.2592954635620117,
      "learning_rate": 3.4458333058782445e-06,
      "loss": 0.0109,
      "step": 2009360
    },
    {
      "epoch": 3.288394441062299,
      "grad_norm": 0.19021141529083252,
      "learning_rate": 3.4457674136647273e-06,
      "loss": 0.0127,
      "step": 2009380
    },
    {
      "epoch": 3.2884271715009525,
      "grad_norm": 0.24771612882614136,
      "learning_rate": 3.44570152145121e-06,
      "loss": 0.0103,
      "step": 2009400
    },
    {
      "epoch": 3.2884599019396057,
      "grad_norm": 0.6791154146194458,
      "learning_rate": 3.445635629237693e-06,
      "loss": 0.0126,
      "step": 2009420
    },
    {
      "epoch": 3.2884926323782593,
      "grad_norm": 0.42346078157424927,
      "learning_rate": 3.4455697370241763e-06,
      "loss": 0.0083,
      "step": 2009440
    },
    {
      "epoch": 3.2885253628169124,
      "grad_norm": 0.20734331011772156,
      "learning_rate": 3.445503844810659e-06,
      "loss": 0.0107,
      "step": 2009460
    },
    {
      "epoch": 3.288558093255566,
      "grad_norm": 0.15245862305164337,
      "learning_rate": 3.4454379525971422e-06,
      "loss": 0.0081,
      "step": 2009480
    },
    {
      "epoch": 3.288590823694219,
      "grad_norm": 0.19661082327365875,
      "learning_rate": 3.445372060383625e-06,
      "loss": 0.0096,
      "step": 2009500
    },
    {
      "epoch": 3.2886235541328723,
      "grad_norm": 0.2583359479904175,
      "learning_rate": 3.4453061681701077e-06,
      "loss": 0.0157,
      "step": 2009520
    },
    {
      "epoch": 3.288656284571526,
      "grad_norm": 0.5468627214431763,
      "learning_rate": 3.4452402759565905e-06,
      "loss": 0.0123,
      "step": 2009540
    },
    {
      "epoch": 3.288689015010179,
      "grad_norm": 0.5767806172370911,
      "learning_rate": 3.4451743837430736e-06,
      "loss": 0.0085,
      "step": 2009560
    },
    {
      "epoch": 3.2887217454488327,
      "grad_norm": 0.18919546902179718,
      "learning_rate": 3.4451084915295564e-06,
      "loss": 0.0078,
      "step": 2009580
    },
    {
      "epoch": 3.288754475887486,
      "grad_norm": 0.2638484537601471,
      "learning_rate": 3.445042599316039e-06,
      "loss": 0.0087,
      "step": 2009600
    },
    {
      "epoch": 3.2887872063261394,
      "grad_norm": 0.30258482694625854,
      "learning_rate": 3.444976707102522e-06,
      "loss": 0.0115,
      "step": 2009620
    },
    {
      "epoch": 3.2888199367647926,
      "grad_norm": 0.5252715349197388,
      "learning_rate": 3.444910814889005e-06,
      "loss": 0.0172,
      "step": 2009640
    },
    {
      "epoch": 3.2888526672034457,
      "grad_norm": 0.4769159257411957,
      "learning_rate": 3.4448449226754877e-06,
      "loss": 0.0105,
      "step": 2009660
    },
    {
      "epoch": 3.2888853976420993,
      "grad_norm": 0.5705205798149109,
      "learning_rate": 3.4447790304619705e-06,
      "loss": 0.0161,
      "step": 2009680
    },
    {
      "epoch": 3.2889181280807525,
      "grad_norm": 0.3492754101753235,
      "learning_rate": 3.4447131382484532e-06,
      "loss": 0.0123,
      "step": 2009700
    },
    {
      "epoch": 3.288950858519406,
      "grad_norm": 0.2182604819536209,
      "learning_rate": 3.444647246034936e-06,
      "loss": 0.0098,
      "step": 2009720
    },
    {
      "epoch": 3.288983588958059,
      "grad_norm": 0.5737206339836121,
      "learning_rate": 3.444581353821419e-06,
      "loss": 0.0122,
      "step": 2009740
    },
    {
      "epoch": 3.289016319396713,
      "grad_norm": 0.15678858757019043,
      "learning_rate": 3.444515461607902e-06,
      "loss": 0.0096,
      "step": 2009760
    },
    {
      "epoch": 3.289049049835366,
      "grad_norm": 0.4180032014846802,
      "learning_rate": 3.4444495693943846e-06,
      "loss": 0.011,
      "step": 2009780
    },
    {
      "epoch": 3.289081780274019,
      "grad_norm": 0.6866428256034851,
      "learning_rate": 3.444383677180868e-06,
      "loss": 0.0126,
      "step": 2009800
    },
    {
      "epoch": 3.2891145107126727,
      "grad_norm": 0.05150280520319939,
      "learning_rate": 3.444317784967351e-06,
      "loss": 0.0129,
      "step": 2009820
    },
    {
      "epoch": 3.289147241151326,
      "grad_norm": 0.16380174458026886,
      "learning_rate": 3.4442518927538337e-06,
      "loss": 0.013,
      "step": 2009840
    },
    {
      "epoch": 3.2891799715899794,
      "grad_norm": 0.5255935788154602,
      "learning_rate": 3.4441860005403164e-06,
      "loss": 0.0113,
      "step": 2009860
    },
    {
      "epoch": 3.2892127020286326,
      "grad_norm": 0.1131313145160675,
      "learning_rate": 3.4441201083267996e-06,
      "loss": 0.0096,
      "step": 2009880
    },
    {
      "epoch": 3.2892454324672857,
      "grad_norm": 0.3072120249271393,
      "learning_rate": 3.4440542161132823e-06,
      "loss": 0.0076,
      "step": 2009900
    },
    {
      "epoch": 3.2892781629059393,
      "grad_norm": 0.41354358196258545,
      "learning_rate": 3.443988323899765e-06,
      "loss": 0.0107,
      "step": 2009920
    },
    {
      "epoch": 3.2893108933445925,
      "grad_norm": 0.32930228114128113,
      "learning_rate": 3.443922431686248e-06,
      "loss": 0.014,
      "step": 2009940
    },
    {
      "epoch": 3.289343623783246,
      "grad_norm": 0.39042922854423523,
      "learning_rate": 3.443856539472731e-06,
      "loss": 0.0089,
      "step": 2009960
    },
    {
      "epoch": 3.2893763542218992,
      "grad_norm": 0.24533887207508087,
      "learning_rate": 3.4437906472592137e-06,
      "loss": 0.0065,
      "step": 2009980
    },
    {
      "epoch": 3.2894090846605524,
      "grad_norm": 0.4563615024089813,
      "learning_rate": 3.4437247550456964e-06,
      "loss": 0.0107,
      "step": 2010000
    },
    {
      "epoch": 3.289441815099206,
      "grad_norm": 0.19941851496696472,
      "learning_rate": 3.443658862832179e-06,
      "loss": 0.0104,
      "step": 2010020
    },
    {
      "epoch": 3.289474545537859,
      "grad_norm": 0.48733192682266235,
      "learning_rate": 3.4435929706186623e-06,
      "loss": 0.0122,
      "step": 2010040
    },
    {
      "epoch": 3.2895072759765127,
      "grad_norm": 0.6044068336486816,
      "learning_rate": 3.443527078405145e-06,
      "loss": 0.0119,
      "step": 2010060
    },
    {
      "epoch": 3.289540006415166,
      "grad_norm": 0.31302183866500854,
      "learning_rate": 3.443461186191628e-06,
      "loss": 0.0119,
      "step": 2010080
    },
    {
      "epoch": 3.2895727368538195,
      "grad_norm": 0.17154043912887573,
      "learning_rate": 3.4433952939781106e-06,
      "loss": 0.0104,
      "step": 2010100
    },
    {
      "epoch": 3.2896054672924726,
      "grad_norm": 0.2902434766292572,
      "learning_rate": 3.4433294017645937e-06,
      "loss": 0.0112,
      "step": 2010120
    },
    {
      "epoch": 3.2896381977311258,
      "grad_norm": 2.6062610149383545,
      "learning_rate": 3.4432635095510765e-06,
      "loss": 0.0082,
      "step": 2010140
    },
    {
      "epoch": 3.2896709281697794,
      "grad_norm": 0.1862075924873352,
      "learning_rate": 3.4431976173375596e-06,
      "loss": 0.0084,
      "step": 2010160
    },
    {
      "epoch": 3.2897036586084325,
      "grad_norm": 0.09428713470697403,
      "learning_rate": 3.443131725124043e-06,
      "loss": 0.0083,
      "step": 2010180
    },
    {
      "epoch": 3.289736389047086,
      "grad_norm": 0.18581415712833405,
      "learning_rate": 3.4430658329105255e-06,
      "loss": 0.0076,
      "step": 2010200
    },
    {
      "epoch": 3.2897691194857392,
      "grad_norm": 0.1849897801876068,
      "learning_rate": 3.4429999406970083e-06,
      "loss": 0.0061,
      "step": 2010220
    },
    {
      "epoch": 3.289801849924393,
      "grad_norm": 0.417190283536911,
      "learning_rate": 3.442934048483491e-06,
      "loss": 0.0117,
      "step": 2010240
    },
    {
      "epoch": 3.289834580363046,
      "grad_norm": 0.14169391989707947,
      "learning_rate": 3.4428681562699738e-06,
      "loss": 0.0121,
      "step": 2010260
    },
    {
      "epoch": 3.289867310801699,
      "grad_norm": 0.3353201448917389,
      "learning_rate": 3.442802264056457e-06,
      "loss": 0.0104,
      "step": 2010280
    },
    {
      "epoch": 3.2899000412403527,
      "grad_norm": 0.6096563339233398,
      "learning_rate": 3.4427363718429397e-06,
      "loss": 0.0231,
      "step": 2010300
    },
    {
      "epoch": 3.289932771679006,
      "grad_norm": 0.432804137468338,
      "learning_rate": 3.4426704796294224e-06,
      "loss": 0.0123,
      "step": 2010320
    },
    {
      "epoch": 3.2899655021176595,
      "grad_norm": 0.49141883850097656,
      "learning_rate": 3.442604587415905e-06,
      "loss": 0.019,
      "step": 2010340
    },
    {
      "epoch": 3.2899982325563126,
      "grad_norm": 0.18079863488674164,
      "learning_rate": 3.4425386952023883e-06,
      "loss": 0.0086,
      "step": 2010360
    },
    {
      "epoch": 3.2900309629949662,
      "grad_norm": 0.29198503494262695,
      "learning_rate": 3.442472802988871e-06,
      "loss": 0.0074,
      "step": 2010380
    },
    {
      "epoch": 3.2900636934336194,
      "grad_norm": 0.19748114049434662,
      "learning_rate": 3.4424069107753538e-06,
      "loss": 0.0073,
      "step": 2010400
    },
    {
      "epoch": 3.2900964238722725,
      "grad_norm": 0.26654866337776184,
      "learning_rate": 3.4423410185618365e-06,
      "loss": 0.0104,
      "step": 2010420
    },
    {
      "epoch": 3.290129154310926,
      "grad_norm": 0.12807264924049377,
      "learning_rate": 3.4422751263483197e-06,
      "loss": 0.0094,
      "step": 2010440
    },
    {
      "epoch": 3.2901618847495793,
      "grad_norm": 0.4601621925830841,
      "learning_rate": 3.4422092341348024e-06,
      "loss": 0.0093,
      "step": 2010460
    },
    {
      "epoch": 3.290194615188233,
      "grad_norm": 0.1606605499982834,
      "learning_rate": 3.442143341921285e-06,
      "loss": 0.012,
      "step": 2010480
    },
    {
      "epoch": 3.290227345626886,
      "grad_norm": 0.10679961740970612,
      "learning_rate": 3.4420774497077687e-06,
      "loss": 0.0109,
      "step": 2010500
    },
    {
      "epoch": 3.2902600760655396,
      "grad_norm": 0.10216251760721207,
      "learning_rate": 3.4420115574942515e-06,
      "loss": 0.011,
      "step": 2010520
    },
    {
      "epoch": 3.2902928065041928,
      "grad_norm": 0.569077730178833,
      "learning_rate": 3.4419456652807342e-06,
      "loss": 0.0135,
      "step": 2010540
    },
    {
      "epoch": 3.290325536942846,
      "grad_norm": 0.11618747562170029,
      "learning_rate": 3.441879773067217e-06,
      "loss": 0.0127,
      "step": 2010560
    },
    {
      "epoch": 3.2903582673814995,
      "grad_norm": 0.47581103444099426,
      "learning_rate": 3.4418138808537e-06,
      "loss": 0.0136,
      "step": 2010580
    },
    {
      "epoch": 3.2903909978201527,
      "grad_norm": 0.23260091245174408,
      "learning_rate": 3.441747988640183e-06,
      "loss": 0.0084,
      "step": 2010600
    },
    {
      "epoch": 3.2904237282588062,
      "grad_norm": 0.3159886598587036,
      "learning_rate": 3.4416820964266656e-06,
      "loss": 0.0093,
      "step": 2010620
    },
    {
      "epoch": 3.2904564586974594,
      "grad_norm": 0.13032567501068115,
      "learning_rate": 3.4416162042131483e-06,
      "loss": 0.009,
      "step": 2010640
    },
    {
      "epoch": 3.290489189136113,
      "grad_norm": 0.6909265518188477,
      "learning_rate": 3.4415503119996315e-06,
      "loss": 0.0133,
      "step": 2010660
    },
    {
      "epoch": 3.290521919574766,
      "grad_norm": 0.26909512281417847,
      "learning_rate": 3.4414844197861143e-06,
      "loss": 0.01,
      "step": 2010680
    },
    {
      "epoch": 3.2905546500134193,
      "grad_norm": 0.14891788363456726,
      "learning_rate": 3.441418527572597e-06,
      "loss": 0.0108,
      "step": 2010700
    },
    {
      "epoch": 3.290587380452073,
      "grad_norm": 0.32019364833831787,
      "learning_rate": 3.4413526353590797e-06,
      "loss": 0.0129,
      "step": 2010720
    },
    {
      "epoch": 3.290620110890726,
      "grad_norm": 0.7809978723526001,
      "learning_rate": 3.4412867431455625e-06,
      "loss": 0.0114,
      "step": 2010740
    },
    {
      "epoch": 3.2906528413293796,
      "grad_norm": 0.13199910521507263,
      "learning_rate": 3.4412208509320456e-06,
      "loss": 0.0163,
      "step": 2010760
    },
    {
      "epoch": 3.290685571768033,
      "grad_norm": 0.40290626883506775,
      "learning_rate": 3.4411549587185284e-06,
      "loss": 0.0109,
      "step": 2010780
    },
    {
      "epoch": 3.2907183022066864,
      "grad_norm": 0.13344226777553558,
      "learning_rate": 3.441089066505011e-06,
      "loss": 0.0097,
      "step": 2010800
    },
    {
      "epoch": 3.2907510326453395,
      "grad_norm": 0.09189950674772263,
      "learning_rate": 3.441023174291494e-06,
      "loss": 0.0086,
      "step": 2010820
    },
    {
      "epoch": 3.2907837630839927,
      "grad_norm": 0.4949873387813568,
      "learning_rate": 3.440957282077977e-06,
      "loss": 0.0143,
      "step": 2010840
    },
    {
      "epoch": 3.2908164935226463,
      "grad_norm": 0.44521602988243103,
      "learning_rate": 3.44089138986446e-06,
      "loss": 0.0091,
      "step": 2010860
    },
    {
      "epoch": 3.2908492239612994,
      "grad_norm": 0.23112106323242188,
      "learning_rate": 3.440825497650943e-06,
      "loss": 0.011,
      "step": 2010880
    },
    {
      "epoch": 3.290881954399953,
      "grad_norm": 0.10925779491662979,
      "learning_rate": 3.440759605437426e-06,
      "loss": 0.0172,
      "step": 2010900
    },
    {
      "epoch": 3.290914684838606,
      "grad_norm": 0.5613342523574829,
      "learning_rate": 3.440693713223909e-06,
      "loss": 0.014,
      "step": 2010920
    },
    {
      "epoch": 3.2909474152772598,
      "grad_norm": 0.21965986490249634,
      "learning_rate": 3.4406278210103916e-06,
      "loss": 0.0113,
      "step": 2010940
    },
    {
      "epoch": 3.290980145715913,
      "grad_norm": 0.11697110533714294,
      "learning_rate": 3.4405619287968743e-06,
      "loss": 0.0127,
      "step": 2010960
    },
    {
      "epoch": 3.291012876154566,
      "grad_norm": 0.6718429327011108,
      "learning_rate": 3.4404960365833575e-06,
      "loss": 0.0105,
      "step": 2010980
    },
    {
      "epoch": 3.2910456065932197,
      "grad_norm": 0.668443500995636,
      "learning_rate": 3.44043014436984e-06,
      "loss": 0.0141,
      "step": 2011000
    },
    {
      "epoch": 3.291078337031873,
      "grad_norm": 0.5910436511039734,
      "learning_rate": 3.440364252156323e-06,
      "loss": 0.0137,
      "step": 2011020
    },
    {
      "epoch": 3.2911110674705264,
      "grad_norm": 0.5750030875205994,
      "learning_rate": 3.4402983599428057e-06,
      "loss": 0.0147,
      "step": 2011040
    },
    {
      "epoch": 3.2911437979091795,
      "grad_norm": 0.1141333356499672,
      "learning_rate": 3.440232467729289e-06,
      "loss": 0.0101,
      "step": 2011060
    },
    {
      "epoch": 3.291176528347833,
      "grad_norm": 0.08279382437467575,
      "learning_rate": 3.4401665755157716e-06,
      "loss": 0.0124,
      "step": 2011080
    },
    {
      "epoch": 3.2912092587864863,
      "grad_norm": 0.27939537167549133,
      "learning_rate": 3.4401006833022543e-06,
      "loss": 0.0141,
      "step": 2011100
    },
    {
      "epoch": 3.2912419892251394,
      "grad_norm": 0.24648132920265198,
      "learning_rate": 3.440034791088737e-06,
      "loss": 0.0096,
      "step": 2011120
    },
    {
      "epoch": 3.291274719663793,
      "grad_norm": 0.474386602640152,
      "learning_rate": 3.43996889887522e-06,
      "loss": 0.0152,
      "step": 2011140
    },
    {
      "epoch": 3.291307450102446,
      "grad_norm": 0.08199897408485413,
      "learning_rate": 3.439903006661703e-06,
      "loss": 0.01,
      "step": 2011160
    },
    {
      "epoch": 3.2913401805411,
      "grad_norm": 0.17221082746982574,
      "learning_rate": 3.4398371144481857e-06,
      "loss": 0.0123,
      "step": 2011180
    },
    {
      "epoch": 3.291372910979753,
      "grad_norm": 0.13909246027469635,
      "learning_rate": 3.439771222234669e-06,
      "loss": 0.0083,
      "step": 2011200
    },
    {
      "epoch": 3.2914056414184065,
      "grad_norm": 0.08984816074371338,
      "learning_rate": 3.439705330021152e-06,
      "loss": 0.0135,
      "step": 2011220
    },
    {
      "epoch": 3.2914383718570597,
      "grad_norm": 0.2524712085723877,
      "learning_rate": 3.4396394378076348e-06,
      "loss": 0.0074,
      "step": 2011240
    },
    {
      "epoch": 3.291471102295713,
      "grad_norm": 0.12756069004535675,
      "learning_rate": 3.4395735455941175e-06,
      "loss": 0.0097,
      "step": 2011260
    },
    {
      "epoch": 3.2915038327343664,
      "grad_norm": 0.2372106909751892,
      "learning_rate": 3.4395076533806003e-06,
      "loss": 0.009,
      "step": 2011280
    },
    {
      "epoch": 3.2915365631730196,
      "grad_norm": 0.3241812586784363,
      "learning_rate": 3.4394417611670834e-06,
      "loss": 0.0138,
      "step": 2011300
    },
    {
      "epoch": 3.291569293611673,
      "grad_norm": 0.1652788519859314,
      "learning_rate": 3.439375868953566e-06,
      "loss": 0.0153,
      "step": 2011320
    },
    {
      "epoch": 3.2916020240503263,
      "grad_norm": 0.211093008518219,
      "learning_rate": 3.439309976740049e-06,
      "loss": 0.0094,
      "step": 2011340
    },
    {
      "epoch": 3.29163475448898,
      "grad_norm": 0.12222511321306229,
      "learning_rate": 3.4392440845265316e-06,
      "loss": 0.0097,
      "step": 2011360
    },
    {
      "epoch": 3.291667484927633,
      "grad_norm": 0.2524224817752838,
      "learning_rate": 3.439178192313015e-06,
      "loss": 0.0124,
      "step": 2011380
    },
    {
      "epoch": 3.291700215366286,
      "grad_norm": 0.186613067984581,
      "learning_rate": 3.4391123000994975e-06,
      "loss": 0.0104,
      "step": 2011400
    },
    {
      "epoch": 3.29173294580494,
      "grad_norm": 0.2948891520500183,
      "learning_rate": 3.4390464078859803e-06,
      "loss": 0.0126,
      "step": 2011420
    },
    {
      "epoch": 3.291765676243593,
      "grad_norm": 0.2950621247291565,
      "learning_rate": 3.438980515672463e-06,
      "loss": 0.0139,
      "step": 2011440
    },
    {
      "epoch": 3.2917984066822465,
      "grad_norm": 0.6314998269081116,
      "learning_rate": 3.438914623458946e-06,
      "loss": 0.0193,
      "step": 2011460
    },
    {
      "epoch": 3.2918311371208997,
      "grad_norm": 0.14978794753551483,
      "learning_rate": 3.438848731245429e-06,
      "loss": 0.0118,
      "step": 2011480
    },
    {
      "epoch": 3.291863867559553,
      "grad_norm": 0.41778767108917236,
      "learning_rate": 3.4387828390319117e-06,
      "loss": 0.0123,
      "step": 2011500
    },
    {
      "epoch": 3.2918965979982064,
      "grad_norm": 3.300934076309204,
      "learning_rate": 3.4387169468183944e-06,
      "loss": 0.0157,
      "step": 2011520
    },
    {
      "epoch": 3.2919293284368596,
      "grad_norm": 0.7337265610694885,
      "learning_rate": 3.4386510546048776e-06,
      "loss": 0.0137,
      "step": 2011540
    },
    {
      "epoch": 3.291962058875513,
      "grad_norm": 0.777770459651947,
      "learning_rate": 3.4385851623913607e-06,
      "loss": 0.0091,
      "step": 2011560
    },
    {
      "epoch": 3.2919947893141663,
      "grad_norm": 0.34299859404563904,
      "learning_rate": 3.4385192701778435e-06,
      "loss": 0.0116,
      "step": 2011580
    },
    {
      "epoch": 3.2920275197528195,
      "grad_norm": 0.16122384369373322,
      "learning_rate": 3.4384533779643266e-06,
      "loss": 0.0094,
      "step": 2011600
    },
    {
      "epoch": 3.292060250191473,
      "grad_norm": 0.43779292702674866,
      "learning_rate": 3.4383874857508094e-06,
      "loss": 0.0118,
      "step": 2011620
    },
    {
      "epoch": 3.2920929806301262,
      "grad_norm": 0.10027869790792465,
      "learning_rate": 3.438321593537292e-06,
      "loss": 0.01,
      "step": 2011640
    },
    {
      "epoch": 3.29212571106878,
      "grad_norm": 0.28987935185432434,
      "learning_rate": 3.438255701323775e-06,
      "loss": 0.0107,
      "step": 2011660
    },
    {
      "epoch": 3.292158441507433,
      "grad_norm": 0.11360007524490356,
      "learning_rate": 3.4381898091102576e-06,
      "loss": 0.0167,
      "step": 2011680
    },
    {
      "epoch": 3.2921911719460866,
      "grad_norm": 0.1251760572195053,
      "learning_rate": 3.4381239168967408e-06,
      "loss": 0.014,
      "step": 2011700
    },
    {
      "epoch": 3.2922239023847397,
      "grad_norm": 0.3925051689147949,
      "learning_rate": 3.4380580246832235e-06,
      "loss": 0.0142,
      "step": 2011720
    },
    {
      "epoch": 3.292256632823393,
      "grad_norm": 0.13967549800872803,
      "learning_rate": 3.4379921324697062e-06,
      "loss": 0.0086,
      "step": 2011740
    },
    {
      "epoch": 3.2922893632620465,
      "grad_norm": 0.3282645344734192,
      "learning_rate": 3.437926240256189e-06,
      "loss": 0.0121,
      "step": 2011760
    },
    {
      "epoch": 3.2923220937006996,
      "grad_norm": 0.21068546175956726,
      "learning_rate": 3.437860348042672e-06,
      "loss": 0.0151,
      "step": 2011780
    },
    {
      "epoch": 3.292354824139353,
      "grad_norm": 0.15916472673416138,
      "learning_rate": 3.437794455829155e-06,
      "loss": 0.0083,
      "step": 2011800
    },
    {
      "epoch": 3.2923875545780064,
      "grad_norm": 0.18025840818881989,
      "learning_rate": 3.4377285636156376e-06,
      "loss": 0.0088,
      "step": 2011820
    },
    {
      "epoch": 3.29242028501666,
      "grad_norm": 0.5150374174118042,
      "learning_rate": 3.4376626714021204e-06,
      "loss": 0.0113,
      "step": 2011840
    },
    {
      "epoch": 3.292453015455313,
      "grad_norm": 0.6240016222000122,
      "learning_rate": 3.4375967791886035e-06,
      "loss": 0.0137,
      "step": 2011860
    },
    {
      "epoch": 3.2924857458939663,
      "grad_norm": 0.513914167881012,
      "learning_rate": 3.4375308869750863e-06,
      "loss": 0.0099,
      "step": 2011880
    },
    {
      "epoch": 3.29251847633262,
      "grad_norm": 0.3225363492965698,
      "learning_rate": 3.437464994761569e-06,
      "loss": 0.0086,
      "step": 2011900
    },
    {
      "epoch": 3.292551206771273,
      "grad_norm": 0.6817767024040222,
      "learning_rate": 3.4373991025480526e-06,
      "loss": 0.0176,
      "step": 2011920
    },
    {
      "epoch": 3.2925839372099266,
      "grad_norm": 0.2930394113063812,
      "learning_rate": 3.4373332103345353e-06,
      "loss": 0.0096,
      "step": 2011940
    },
    {
      "epoch": 3.2926166676485797,
      "grad_norm": 0.27422574162483215,
      "learning_rate": 3.437267318121018e-06,
      "loss": 0.0124,
      "step": 2011960
    },
    {
      "epoch": 3.2926493980872333,
      "grad_norm": 0.18081890046596527,
      "learning_rate": 3.437201425907501e-06,
      "loss": 0.0156,
      "step": 2011980
    },
    {
      "epoch": 3.2926821285258865,
      "grad_norm": 0.21259470283985138,
      "learning_rate": 3.437135533693984e-06,
      "loss": 0.0115,
      "step": 2012000
    },
    {
      "epoch": 3.2927148589645396,
      "grad_norm": 0.20778384804725647,
      "learning_rate": 3.4370696414804667e-06,
      "loss": 0.0112,
      "step": 2012020
    },
    {
      "epoch": 3.2927475894031932,
      "grad_norm": 0.470012903213501,
      "learning_rate": 3.4370037492669494e-06,
      "loss": 0.0115,
      "step": 2012040
    },
    {
      "epoch": 3.2927803198418464,
      "grad_norm": 0.16953125596046448,
      "learning_rate": 3.436937857053432e-06,
      "loss": 0.0077,
      "step": 2012060
    },
    {
      "epoch": 3.2928130502805,
      "grad_norm": 0.029290098696947098,
      "learning_rate": 3.4368719648399154e-06,
      "loss": 0.0123,
      "step": 2012080
    },
    {
      "epoch": 3.292845780719153,
      "grad_norm": 0.13491877913475037,
      "learning_rate": 3.436806072626398e-06,
      "loss": 0.013,
      "step": 2012100
    },
    {
      "epoch": 3.2928785111578067,
      "grad_norm": 0.341225802898407,
      "learning_rate": 3.436740180412881e-06,
      "loss": 0.0088,
      "step": 2012120
    },
    {
      "epoch": 3.29291124159646,
      "grad_norm": 0.36395320296287537,
      "learning_rate": 3.4366742881993636e-06,
      "loss": 0.0149,
      "step": 2012140
    },
    {
      "epoch": 3.292943972035113,
      "grad_norm": 0.24735891819000244,
      "learning_rate": 3.4366083959858463e-06,
      "loss": 0.0116,
      "step": 2012160
    },
    {
      "epoch": 3.2929767024737666,
      "grad_norm": 0.4695669412612915,
      "learning_rate": 3.4365425037723295e-06,
      "loss": 0.0095,
      "step": 2012180
    },
    {
      "epoch": 3.2930094329124198,
      "grad_norm": 0.47799140214920044,
      "learning_rate": 3.4364766115588122e-06,
      "loss": 0.0111,
      "step": 2012200
    },
    {
      "epoch": 3.2930421633510734,
      "grad_norm": 0.44523245096206665,
      "learning_rate": 3.436410719345295e-06,
      "loss": 0.0127,
      "step": 2012220
    },
    {
      "epoch": 3.2930748937897265,
      "grad_norm": 0.31824272871017456,
      "learning_rate": 3.4363448271317777e-06,
      "loss": 0.0091,
      "step": 2012240
    },
    {
      "epoch": 3.29310762422838,
      "grad_norm": 0.1700085550546646,
      "learning_rate": 3.4362789349182613e-06,
      "loss": 0.0094,
      "step": 2012260
    },
    {
      "epoch": 3.2931403546670333,
      "grad_norm": 0.20890940725803375,
      "learning_rate": 3.436213042704744e-06,
      "loss": 0.0135,
      "step": 2012280
    },
    {
      "epoch": 3.2931730851056864,
      "grad_norm": 0.6203228831291199,
      "learning_rate": 3.4361471504912268e-06,
      "loss": 0.0158,
      "step": 2012300
    },
    {
      "epoch": 3.29320581554434,
      "grad_norm": 0.25229397416114807,
      "learning_rate": 3.43608125827771e-06,
      "loss": 0.0127,
      "step": 2012320
    },
    {
      "epoch": 3.293238545982993,
      "grad_norm": 0.20504766702651978,
      "learning_rate": 3.4360153660641927e-06,
      "loss": 0.012,
      "step": 2012340
    },
    {
      "epoch": 3.2932712764216467,
      "grad_norm": 0.14688187837600708,
      "learning_rate": 3.4359494738506754e-06,
      "loss": 0.0111,
      "step": 2012360
    },
    {
      "epoch": 3.2933040068603,
      "grad_norm": 0.26099446415901184,
      "learning_rate": 3.435883581637158e-06,
      "loss": 0.0159,
      "step": 2012380
    },
    {
      "epoch": 3.2933367372989535,
      "grad_norm": 0.2440706193447113,
      "learning_rate": 3.4358176894236413e-06,
      "loss": 0.0126,
      "step": 2012400
    },
    {
      "epoch": 3.2933694677376066,
      "grad_norm": 0.1572040468454361,
      "learning_rate": 3.435751797210124e-06,
      "loss": 0.0136,
      "step": 2012420
    },
    {
      "epoch": 3.29340219817626,
      "grad_norm": 0.24208208918571472,
      "learning_rate": 3.4356859049966068e-06,
      "loss": 0.0112,
      "step": 2012440
    },
    {
      "epoch": 3.2934349286149134,
      "grad_norm": 0.16639739274978638,
      "learning_rate": 3.4356200127830895e-06,
      "loss": 0.0117,
      "step": 2012460
    },
    {
      "epoch": 3.2934676590535665,
      "grad_norm": 0.5556905269622803,
      "learning_rate": 3.4355541205695727e-06,
      "loss": 0.0105,
      "step": 2012480
    },
    {
      "epoch": 3.29350038949222,
      "grad_norm": 0.34063345193862915,
      "learning_rate": 3.4354882283560554e-06,
      "loss": 0.0118,
      "step": 2012500
    },
    {
      "epoch": 3.2935331199308733,
      "grad_norm": 0.17754901945590973,
      "learning_rate": 3.435422336142538e-06,
      "loss": 0.0158,
      "step": 2012520
    },
    {
      "epoch": 3.293565850369527,
      "grad_norm": 0.4008067548274994,
      "learning_rate": 3.435356443929021e-06,
      "loss": 0.0102,
      "step": 2012540
    },
    {
      "epoch": 3.29359858080818,
      "grad_norm": 0.3945114314556122,
      "learning_rate": 3.4352905517155036e-06,
      "loss": 0.0072,
      "step": 2012560
    },
    {
      "epoch": 3.293631311246833,
      "grad_norm": 0.3264608383178711,
      "learning_rate": 3.435224659501987e-06,
      "loss": 0.0111,
      "step": 2012580
    },
    {
      "epoch": 3.2936640416854868,
      "grad_norm": 2.3051488399505615,
      "learning_rate": 3.4351587672884695e-06,
      "loss": 0.0116,
      "step": 2012600
    },
    {
      "epoch": 3.29369677212414,
      "grad_norm": 0.40560078620910645,
      "learning_rate": 3.435092875074953e-06,
      "loss": 0.0114,
      "step": 2012620
    },
    {
      "epoch": 3.2937295025627935,
      "grad_norm": 0.6241198778152466,
      "learning_rate": 3.435026982861436e-06,
      "loss": 0.0129,
      "step": 2012640
    },
    {
      "epoch": 3.2937622330014467,
      "grad_norm": 0.3284895122051239,
      "learning_rate": 3.4349610906479186e-06,
      "loss": 0.0098,
      "step": 2012660
    },
    {
      "epoch": 3.2937949634401003,
      "grad_norm": 0.35754328966140747,
      "learning_rate": 3.4348951984344014e-06,
      "loss": 0.0103,
      "step": 2012680
    },
    {
      "epoch": 3.2938276938787534,
      "grad_norm": 0.30542290210723877,
      "learning_rate": 3.434829306220884e-06,
      "loss": 0.0127,
      "step": 2012700
    },
    {
      "epoch": 3.2938604243174066,
      "grad_norm": 0.7533419132232666,
      "learning_rate": 3.4347634140073673e-06,
      "loss": 0.0138,
      "step": 2012720
    },
    {
      "epoch": 3.29389315475606,
      "grad_norm": 0.07830736041069031,
      "learning_rate": 3.43469752179385e-06,
      "loss": 0.0107,
      "step": 2012740
    },
    {
      "epoch": 3.2939258851947133,
      "grad_norm": 1.2462927103042603,
      "learning_rate": 3.4346316295803327e-06,
      "loss": 0.0106,
      "step": 2012760
    },
    {
      "epoch": 3.293958615633367,
      "grad_norm": 0.20519940555095673,
      "learning_rate": 3.4345657373668155e-06,
      "loss": 0.0094,
      "step": 2012780
    },
    {
      "epoch": 3.29399134607202,
      "grad_norm": 0.16985192894935608,
      "learning_rate": 3.4344998451532986e-06,
      "loss": 0.0095,
      "step": 2012800
    },
    {
      "epoch": 3.2940240765106736,
      "grad_norm": 0.34340769052505493,
      "learning_rate": 3.4344339529397814e-06,
      "loss": 0.0101,
      "step": 2012820
    },
    {
      "epoch": 3.294056806949327,
      "grad_norm": 0.37084710597991943,
      "learning_rate": 3.434368060726264e-06,
      "loss": 0.0131,
      "step": 2012840
    },
    {
      "epoch": 3.29408953738798,
      "grad_norm": 0.6167177557945251,
      "learning_rate": 3.434302168512747e-06,
      "loss": 0.0104,
      "step": 2012860
    },
    {
      "epoch": 3.2941222678266335,
      "grad_norm": 0.5348813533782959,
      "learning_rate": 3.43423627629923e-06,
      "loss": 0.0123,
      "step": 2012880
    },
    {
      "epoch": 3.2941549982652867,
      "grad_norm": 0.31749147176742554,
      "learning_rate": 3.4341703840857128e-06,
      "loss": 0.0141,
      "step": 2012900
    },
    {
      "epoch": 3.2941877287039403,
      "grad_norm": 0.21795986592769623,
      "learning_rate": 3.4341044918721955e-06,
      "loss": 0.0102,
      "step": 2012920
    },
    {
      "epoch": 3.2942204591425934,
      "grad_norm": 0.05199037864804268,
      "learning_rate": 3.4340385996586782e-06,
      "loss": 0.0098,
      "step": 2012940
    },
    {
      "epoch": 3.2942531895812466,
      "grad_norm": 0.3230508863925934,
      "learning_rate": 3.433972707445162e-06,
      "loss": 0.0099,
      "step": 2012960
    },
    {
      "epoch": 3.2942859200199,
      "grad_norm": 0.13865061104297638,
      "learning_rate": 3.4339068152316446e-06,
      "loss": 0.0074,
      "step": 2012980
    },
    {
      "epoch": 3.2943186504585533,
      "grad_norm": 0.18843957781791687,
      "learning_rate": 3.4338409230181273e-06,
      "loss": 0.0135,
      "step": 2013000
    },
    {
      "epoch": 3.294351380897207,
      "grad_norm": 0.5744703412055969,
      "learning_rate": 3.4337750308046105e-06,
      "loss": 0.0075,
      "step": 2013020
    },
    {
      "epoch": 3.29438411133586,
      "grad_norm": 0.6047647595405579,
      "learning_rate": 3.433709138591093e-06,
      "loss": 0.0145,
      "step": 2013040
    },
    {
      "epoch": 3.294416841774513,
      "grad_norm": 0.1852533370256424,
      "learning_rate": 3.433643246377576e-06,
      "loss": 0.0132,
      "step": 2013060
    },
    {
      "epoch": 3.294449572213167,
      "grad_norm": 0.38425543904304504,
      "learning_rate": 3.4335773541640587e-06,
      "loss": 0.0074,
      "step": 2013080
    },
    {
      "epoch": 3.29448230265182,
      "grad_norm": 0.2130315601825714,
      "learning_rate": 3.4335114619505414e-06,
      "loss": 0.0124,
      "step": 2013100
    },
    {
      "epoch": 3.2945150330904736,
      "grad_norm": 0.4978468418121338,
      "learning_rate": 3.4334455697370246e-06,
      "loss": 0.0068,
      "step": 2013120
    },
    {
      "epoch": 3.2945477635291267,
      "grad_norm": 0.2405013144016266,
      "learning_rate": 3.4333796775235073e-06,
      "loss": 0.0104,
      "step": 2013140
    },
    {
      "epoch": 3.2945804939677803,
      "grad_norm": 0.16087275743484497,
      "learning_rate": 3.43331378530999e-06,
      "loss": 0.0109,
      "step": 2013160
    },
    {
      "epoch": 3.2946132244064334,
      "grad_norm": 0.6470813155174255,
      "learning_rate": 3.433247893096473e-06,
      "loss": 0.0137,
      "step": 2013180
    },
    {
      "epoch": 3.2946459548450866,
      "grad_norm": 0.08378595858812332,
      "learning_rate": 3.433182000882956e-06,
      "loss": 0.0096,
      "step": 2013200
    },
    {
      "epoch": 3.29467868528374,
      "grad_norm": 0.2741589844226837,
      "learning_rate": 3.4331161086694387e-06,
      "loss": 0.0106,
      "step": 2013220
    },
    {
      "epoch": 3.2947114157223933,
      "grad_norm": 0.27314189076423645,
      "learning_rate": 3.4330502164559215e-06,
      "loss": 0.0133,
      "step": 2013240
    },
    {
      "epoch": 3.294744146161047,
      "grad_norm": 0.4108709692955017,
      "learning_rate": 3.432984324242404e-06,
      "loss": 0.015,
      "step": 2013260
    },
    {
      "epoch": 3.2947768765997,
      "grad_norm": 0.24222825467586517,
      "learning_rate": 3.4329184320288874e-06,
      "loss": 0.0104,
      "step": 2013280
    },
    {
      "epoch": 3.2948096070383537,
      "grad_norm": 0.3928217589855194,
      "learning_rate": 3.43285253981537e-06,
      "loss": 0.0114,
      "step": 2013300
    },
    {
      "epoch": 3.294842337477007,
      "grad_norm": 0.1843457818031311,
      "learning_rate": 3.4327866476018533e-06,
      "loss": 0.0103,
      "step": 2013320
    },
    {
      "epoch": 3.29487506791566,
      "grad_norm": 0.5829328298568726,
      "learning_rate": 3.4327207553883364e-06,
      "loss": 0.0153,
      "step": 2013340
    },
    {
      "epoch": 3.2949077983543136,
      "grad_norm": 0.12022101134061813,
      "learning_rate": 3.432654863174819e-06,
      "loss": 0.0097,
      "step": 2013360
    },
    {
      "epoch": 3.2949405287929667,
      "grad_norm": 0.3022512197494507,
      "learning_rate": 3.432588970961302e-06,
      "loss": 0.0164,
      "step": 2013380
    },
    {
      "epoch": 3.2949732592316203,
      "grad_norm": 0.34734928607940674,
      "learning_rate": 3.4325230787477846e-06,
      "loss": 0.0116,
      "step": 2013400
    },
    {
      "epoch": 3.2950059896702735,
      "grad_norm": 0.48564061522483826,
      "learning_rate": 3.432457186534268e-06,
      "loss": 0.0097,
      "step": 2013420
    },
    {
      "epoch": 3.295038720108927,
      "grad_norm": 0.24872766435146332,
      "learning_rate": 3.4323912943207505e-06,
      "loss": 0.012,
      "step": 2013440
    },
    {
      "epoch": 3.29507145054758,
      "grad_norm": 0.42598697543144226,
      "learning_rate": 3.4323254021072333e-06,
      "loss": 0.0147,
      "step": 2013460
    },
    {
      "epoch": 3.2951041809862334,
      "grad_norm": 0.20612755417823792,
      "learning_rate": 3.432259509893716e-06,
      "loss": 0.0139,
      "step": 2013480
    },
    {
      "epoch": 3.295136911424887,
      "grad_norm": 0.5698295831680298,
      "learning_rate": 3.432193617680199e-06,
      "loss": 0.0145,
      "step": 2013500
    },
    {
      "epoch": 3.29516964186354,
      "grad_norm": 0.45508667826652527,
      "learning_rate": 3.432127725466682e-06,
      "loss": 0.0128,
      "step": 2013520
    },
    {
      "epoch": 3.2952023723021937,
      "grad_norm": 0.20307272672653198,
      "learning_rate": 3.4320618332531647e-06,
      "loss": 0.0113,
      "step": 2013540
    },
    {
      "epoch": 3.295235102740847,
      "grad_norm": 0.1789456456899643,
      "learning_rate": 3.4319959410396474e-06,
      "loss": 0.0091,
      "step": 2013560
    },
    {
      "epoch": 3.2952678331795004,
      "grad_norm": 0.17023316025733948,
      "learning_rate": 3.43193004882613e-06,
      "loss": 0.008,
      "step": 2013580
    },
    {
      "epoch": 3.2953005636181536,
      "grad_norm": 0.3732074201107025,
      "learning_rate": 3.4318641566126133e-06,
      "loss": 0.0089,
      "step": 2013600
    },
    {
      "epoch": 3.2953332940568067,
      "grad_norm": 0.7659416198730469,
      "learning_rate": 3.431798264399096e-06,
      "loss": 0.0144,
      "step": 2013620
    },
    {
      "epoch": 3.2953660244954603,
      "grad_norm": 0.22123511135578156,
      "learning_rate": 3.431732372185579e-06,
      "loss": 0.0136,
      "step": 2013640
    },
    {
      "epoch": 3.2953987549341135,
      "grad_norm": 0.2982464134693146,
      "learning_rate": 3.4316664799720624e-06,
      "loss": 0.016,
      "step": 2013660
    },
    {
      "epoch": 3.295431485372767,
      "grad_norm": 0.575227677822113,
      "learning_rate": 3.431600587758545e-06,
      "loss": 0.0153,
      "step": 2013680
    },
    {
      "epoch": 3.2954642158114202,
      "grad_norm": 0.13301411271095276,
      "learning_rate": 3.431534695545028e-06,
      "loss": 0.0113,
      "step": 2013700
    },
    {
      "epoch": 3.295496946250074,
      "grad_norm": 0.29038918018341064,
      "learning_rate": 3.4314688033315106e-06,
      "loss": 0.0145,
      "step": 2013720
    },
    {
      "epoch": 3.295529676688727,
      "grad_norm": 0.12044233083724976,
      "learning_rate": 3.4314029111179938e-06,
      "loss": 0.0108,
      "step": 2013740
    },
    {
      "epoch": 3.29556240712738,
      "grad_norm": 0.2627356946468353,
      "learning_rate": 3.4313370189044765e-06,
      "loss": 0.0148,
      "step": 2013760
    },
    {
      "epoch": 3.2955951375660337,
      "grad_norm": 0.7049527168273926,
      "learning_rate": 3.4312711266909592e-06,
      "loss": 0.0125,
      "step": 2013780
    },
    {
      "epoch": 3.295627868004687,
      "grad_norm": 0.21785719692707062,
      "learning_rate": 3.431205234477442e-06,
      "loss": 0.0071,
      "step": 2013800
    },
    {
      "epoch": 3.2956605984433405,
      "grad_norm": 0.13528120517730713,
      "learning_rate": 3.431139342263925e-06,
      "loss": 0.008,
      "step": 2013820
    },
    {
      "epoch": 3.2956933288819936,
      "grad_norm": 0.2603064775466919,
      "learning_rate": 3.431073450050408e-06,
      "loss": 0.0115,
      "step": 2013840
    },
    {
      "epoch": 3.295726059320647,
      "grad_norm": 0.400593101978302,
      "learning_rate": 3.4310075578368906e-06,
      "loss": 0.0108,
      "step": 2013860
    },
    {
      "epoch": 3.2957587897593004,
      "grad_norm": 0.4164752662181854,
      "learning_rate": 3.4309416656233734e-06,
      "loss": 0.0156,
      "step": 2013880
    },
    {
      "epoch": 3.2957915201979535,
      "grad_norm": 0.32244250178337097,
      "learning_rate": 3.4308757734098565e-06,
      "loss": 0.0145,
      "step": 2013900
    },
    {
      "epoch": 3.295824250636607,
      "grad_norm": 0.2307935655117035,
      "learning_rate": 3.4308098811963393e-06,
      "loss": 0.0075,
      "step": 2013920
    },
    {
      "epoch": 3.2958569810752603,
      "grad_norm": 0.24218116700649261,
      "learning_rate": 3.430743988982822e-06,
      "loss": 0.0094,
      "step": 2013940
    },
    {
      "epoch": 3.295889711513914,
      "grad_norm": 0.39647093415260315,
      "learning_rate": 3.4306780967693047e-06,
      "loss": 0.009,
      "step": 2013960
    },
    {
      "epoch": 3.295922441952567,
      "grad_norm": 0.24147965013980865,
      "learning_rate": 3.4306122045557875e-06,
      "loss": 0.0137,
      "step": 2013980
    },
    {
      "epoch": 3.2959551723912206,
      "grad_norm": 0.043094221502542496,
      "learning_rate": 3.4305463123422706e-06,
      "loss": 0.0076,
      "step": 2014000
    },
    {
      "epoch": 3.2959879028298738,
      "grad_norm": 0.13052070140838623,
      "learning_rate": 3.430480420128754e-06,
      "loss": 0.0136,
      "step": 2014020
    },
    {
      "epoch": 3.296020633268527,
      "grad_norm": 0.22966335713863373,
      "learning_rate": 3.430414527915237e-06,
      "loss": 0.0117,
      "step": 2014040
    },
    {
      "epoch": 3.2960533637071805,
      "grad_norm": 0.28575649857521057,
      "learning_rate": 3.4303486357017197e-06,
      "loss": 0.0223,
      "step": 2014060
    },
    {
      "epoch": 3.2960860941458336,
      "grad_norm": 0.4056742191314697,
      "learning_rate": 3.4302827434882025e-06,
      "loss": 0.0086,
      "step": 2014080
    },
    {
      "epoch": 3.2961188245844872,
      "grad_norm": 0.2980545163154602,
      "learning_rate": 3.430216851274685e-06,
      "loss": 0.0127,
      "step": 2014100
    },
    {
      "epoch": 3.2961515550231404,
      "grad_norm": 0.0648055300116539,
      "learning_rate": 3.430150959061168e-06,
      "loss": 0.0057,
      "step": 2014120
    },
    {
      "epoch": 3.296184285461794,
      "grad_norm": 0.17279180884361267,
      "learning_rate": 3.430085066847651e-06,
      "loss": 0.0116,
      "step": 2014140
    },
    {
      "epoch": 3.296217015900447,
      "grad_norm": 0.07305266708135605,
      "learning_rate": 3.430019174634134e-06,
      "loss": 0.0107,
      "step": 2014160
    },
    {
      "epoch": 3.2962497463391003,
      "grad_norm": 0.10234303772449493,
      "learning_rate": 3.4299532824206166e-06,
      "loss": 0.0117,
      "step": 2014180
    },
    {
      "epoch": 3.296282476777754,
      "grad_norm": 0.1581021100282669,
      "learning_rate": 3.4298873902070993e-06,
      "loss": 0.008,
      "step": 2014200
    },
    {
      "epoch": 3.296315207216407,
      "grad_norm": 0.26823297142982483,
      "learning_rate": 3.4298214979935825e-06,
      "loss": 0.0138,
      "step": 2014220
    },
    {
      "epoch": 3.2963479376550606,
      "grad_norm": 0.6098248958587646,
      "learning_rate": 3.4297556057800652e-06,
      "loss": 0.0108,
      "step": 2014240
    },
    {
      "epoch": 3.2963806680937138,
      "grad_norm": 0.19395434856414795,
      "learning_rate": 3.429689713566548e-06,
      "loss": 0.0099,
      "step": 2014260
    },
    {
      "epoch": 3.2964133985323674,
      "grad_norm": 0.39013615250587463,
      "learning_rate": 3.4296238213530307e-06,
      "loss": 0.0141,
      "step": 2014280
    },
    {
      "epoch": 3.2964461289710205,
      "grad_norm": 0.3629486560821533,
      "learning_rate": 3.429557929139514e-06,
      "loss": 0.0112,
      "step": 2014300
    },
    {
      "epoch": 3.2964788594096737,
      "grad_norm": 0.26854684948921204,
      "learning_rate": 3.4294920369259966e-06,
      "loss": 0.0075,
      "step": 2014320
    },
    {
      "epoch": 3.2965115898483273,
      "grad_norm": 0.28927525877952576,
      "learning_rate": 3.4294261447124793e-06,
      "loss": 0.0125,
      "step": 2014340
    },
    {
      "epoch": 3.2965443202869804,
      "grad_norm": 0.3349144160747528,
      "learning_rate": 3.429360252498962e-06,
      "loss": 0.0137,
      "step": 2014360
    },
    {
      "epoch": 3.296577050725634,
      "grad_norm": 0.19614900648593903,
      "learning_rate": 3.4292943602854457e-06,
      "loss": 0.0127,
      "step": 2014380
    },
    {
      "epoch": 3.296609781164287,
      "grad_norm": 1.5178730487823486,
      "learning_rate": 3.4292284680719284e-06,
      "loss": 0.0157,
      "step": 2014400
    },
    {
      "epoch": 3.2966425116029408,
      "grad_norm": 0.17044493556022644,
      "learning_rate": 3.429162575858411e-06,
      "loss": 0.0136,
      "step": 2014420
    },
    {
      "epoch": 3.296675242041594,
      "grad_norm": 0.19681788980960846,
      "learning_rate": 3.4290966836448943e-06,
      "loss": 0.0136,
      "step": 2014440
    },
    {
      "epoch": 3.296707972480247,
      "grad_norm": 0.2967013418674469,
      "learning_rate": 3.429030791431377e-06,
      "loss": 0.0138,
      "step": 2014460
    },
    {
      "epoch": 3.2967407029189006,
      "grad_norm": 0.10538867115974426,
      "learning_rate": 3.42896489921786e-06,
      "loss": 0.007,
      "step": 2014480
    },
    {
      "epoch": 3.296773433357554,
      "grad_norm": 0.3362548351287842,
      "learning_rate": 3.4288990070043425e-06,
      "loss": 0.0155,
      "step": 2014500
    },
    {
      "epoch": 3.2968061637962074,
      "grad_norm": 0.29255372285842896,
      "learning_rate": 3.4288331147908253e-06,
      "loss": 0.0076,
      "step": 2014520
    },
    {
      "epoch": 3.2968388942348605,
      "grad_norm": 0.36512482166290283,
      "learning_rate": 3.4287672225773084e-06,
      "loss": 0.0111,
      "step": 2014540
    },
    {
      "epoch": 3.2968716246735137,
      "grad_norm": 0.2974403202533722,
      "learning_rate": 3.428701330363791e-06,
      "loss": 0.01,
      "step": 2014560
    },
    {
      "epoch": 3.2969043551121673,
      "grad_norm": 0.08256187289953232,
      "learning_rate": 3.428635438150274e-06,
      "loss": 0.0107,
      "step": 2014580
    },
    {
      "epoch": 3.2969370855508204,
      "grad_norm": 0.33140671253204346,
      "learning_rate": 3.4285695459367567e-06,
      "loss": 0.0097,
      "step": 2014600
    },
    {
      "epoch": 3.296969815989474,
      "grad_norm": 0.46613287925720215,
      "learning_rate": 3.42850365372324e-06,
      "loss": 0.0126,
      "step": 2014620
    },
    {
      "epoch": 3.297002546428127,
      "grad_norm": 0.1401921808719635,
      "learning_rate": 3.4284377615097226e-06,
      "loss": 0.0077,
      "step": 2014640
    },
    {
      "epoch": 3.2970352768667803,
      "grad_norm": 0.4470156729221344,
      "learning_rate": 3.4283718692962053e-06,
      "loss": 0.0073,
      "step": 2014660
    },
    {
      "epoch": 3.297068007305434,
      "grad_norm": 0.5646107196807861,
      "learning_rate": 3.428305977082688e-06,
      "loss": 0.0098,
      "step": 2014680
    },
    {
      "epoch": 3.297100737744087,
      "grad_norm": 0.13481909036636353,
      "learning_rate": 3.428240084869171e-06,
      "loss": 0.0134,
      "step": 2014700
    },
    {
      "epoch": 3.2971334681827407,
      "grad_norm": 0.60395348072052,
      "learning_rate": 3.4281741926556544e-06,
      "loss": 0.0107,
      "step": 2014720
    },
    {
      "epoch": 3.297166198621394,
      "grad_norm": 0.8645091652870178,
      "learning_rate": 3.428108300442137e-06,
      "loss": 0.0146,
      "step": 2014740
    },
    {
      "epoch": 3.2971989290600474,
      "grad_norm": 0.12093426287174225,
      "learning_rate": 3.4280424082286203e-06,
      "loss": 0.0104,
      "step": 2014760
    },
    {
      "epoch": 3.2972316594987006,
      "grad_norm": 0.18454687297344208,
      "learning_rate": 3.427976516015103e-06,
      "loss": 0.0123,
      "step": 2014780
    },
    {
      "epoch": 3.2972643899373537,
      "grad_norm": 0.19121123850345612,
      "learning_rate": 3.4279106238015857e-06,
      "loss": 0.0106,
      "step": 2014800
    },
    {
      "epoch": 3.2972971203760073,
      "grad_norm": 0.204349085688591,
      "learning_rate": 3.4278447315880685e-06,
      "loss": 0.0108,
      "step": 2014820
    },
    {
      "epoch": 3.2973298508146605,
      "grad_norm": 0.3301047384738922,
      "learning_rate": 3.4277788393745516e-06,
      "loss": 0.0123,
      "step": 2014840
    },
    {
      "epoch": 3.297362581253314,
      "grad_norm": 0.1513206511735916,
      "learning_rate": 3.4277129471610344e-06,
      "loss": 0.0079,
      "step": 2014860
    },
    {
      "epoch": 3.297395311691967,
      "grad_norm": 0.17392748594284058,
      "learning_rate": 3.427647054947517e-06,
      "loss": 0.012,
      "step": 2014880
    },
    {
      "epoch": 3.297428042130621,
      "grad_norm": 0.21740947663784027,
      "learning_rate": 3.427581162734e-06,
      "loss": 0.0168,
      "step": 2014900
    },
    {
      "epoch": 3.297460772569274,
      "grad_norm": 0.6231571435928345,
      "learning_rate": 3.427515270520483e-06,
      "loss": 0.0184,
      "step": 2014920
    },
    {
      "epoch": 3.297493503007927,
      "grad_norm": 0.2897511124610901,
      "learning_rate": 3.4274493783069658e-06,
      "loss": 0.0153,
      "step": 2014940
    },
    {
      "epoch": 3.2975262334465807,
      "grad_norm": 0.3358234167098999,
      "learning_rate": 3.4273834860934485e-06,
      "loss": 0.0131,
      "step": 2014960
    },
    {
      "epoch": 3.297558963885234,
      "grad_norm": 0.10104883462190628,
      "learning_rate": 3.4273175938799312e-06,
      "loss": 0.0097,
      "step": 2014980
    },
    {
      "epoch": 3.2975916943238874,
      "grad_norm": 0.3260945677757263,
      "learning_rate": 3.427251701666414e-06,
      "loss": 0.0184,
      "step": 2015000
    },
    {
      "epoch": 3.2976244247625406,
      "grad_norm": 0.24638506770133972,
      "learning_rate": 3.427185809452897e-06,
      "loss": 0.0152,
      "step": 2015020
    },
    {
      "epoch": 3.297657155201194,
      "grad_norm": 0.3974944055080414,
      "learning_rate": 3.42711991723938e-06,
      "loss": 0.0072,
      "step": 2015040
    },
    {
      "epoch": 3.2976898856398473,
      "grad_norm": 0.279070109128952,
      "learning_rate": 3.4270540250258626e-06,
      "loss": 0.0104,
      "step": 2015060
    },
    {
      "epoch": 3.2977226160785005,
      "grad_norm": 0.8887321949005127,
      "learning_rate": 3.4269881328123462e-06,
      "loss": 0.0177,
      "step": 2015080
    },
    {
      "epoch": 3.297755346517154,
      "grad_norm": 0.2548891007900238,
      "learning_rate": 3.426922240598829e-06,
      "loss": 0.0132,
      "step": 2015100
    },
    {
      "epoch": 3.2977880769558072,
      "grad_norm": 0.18060292303562164,
      "learning_rate": 3.4268563483853117e-06,
      "loss": 0.0147,
      "step": 2015120
    },
    {
      "epoch": 3.297820807394461,
      "grad_norm": 0.2159111052751541,
      "learning_rate": 3.4267904561717944e-06,
      "loss": 0.0078,
      "step": 2015140
    },
    {
      "epoch": 3.297853537833114,
      "grad_norm": 0.38683274388313293,
      "learning_rate": 3.4267245639582776e-06,
      "loss": 0.0138,
      "step": 2015160
    },
    {
      "epoch": 3.2978862682717676,
      "grad_norm": 0.889061689376831,
      "learning_rate": 3.4266586717447603e-06,
      "loss": 0.0099,
      "step": 2015180
    },
    {
      "epoch": 3.2979189987104207,
      "grad_norm": 0.13097940385341644,
      "learning_rate": 3.426592779531243e-06,
      "loss": 0.0099,
      "step": 2015200
    },
    {
      "epoch": 3.297951729149074,
      "grad_norm": 0.4442264437675476,
      "learning_rate": 3.426526887317726e-06,
      "loss": 0.0133,
      "step": 2015220
    },
    {
      "epoch": 3.2979844595877275,
      "grad_norm": 0.34105366468429565,
      "learning_rate": 3.426460995104209e-06,
      "loss": 0.0108,
      "step": 2015240
    },
    {
      "epoch": 3.2980171900263806,
      "grad_norm": 0.10435401648283005,
      "learning_rate": 3.4263951028906917e-06,
      "loss": 0.0096,
      "step": 2015260
    },
    {
      "epoch": 3.298049920465034,
      "grad_norm": 0.1908147633075714,
      "learning_rate": 3.4263292106771745e-06,
      "loss": 0.0093,
      "step": 2015280
    },
    {
      "epoch": 3.2980826509036874,
      "grad_norm": 0.15200871229171753,
      "learning_rate": 3.426263318463657e-06,
      "loss": 0.0184,
      "step": 2015300
    },
    {
      "epoch": 3.298115381342341,
      "grad_norm": 0.36019256711006165,
      "learning_rate": 3.4261974262501404e-06,
      "loss": 0.0138,
      "step": 2015320
    },
    {
      "epoch": 3.298148111780994,
      "grad_norm": 0.10968539118766785,
      "learning_rate": 3.426131534036623e-06,
      "loss": 0.0131,
      "step": 2015340
    },
    {
      "epoch": 3.2981808422196472,
      "grad_norm": 0.1295134276151657,
      "learning_rate": 3.426065641823106e-06,
      "loss": 0.0126,
      "step": 2015360
    },
    {
      "epoch": 3.298213572658301,
      "grad_norm": 0.15606655180454254,
      "learning_rate": 3.4259997496095886e-06,
      "loss": 0.0092,
      "step": 2015380
    },
    {
      "epoch": 3.298246303096954,
      "grad_norm": 0.2713271975517273,
      "learning_rate": 3.4259338573960717e-06,
      "loss": 0.0114,
      "step": 2015400
    },
    {
      "epoch": 3.2982790335356076,
      "grad_norm": 0.22750519216060638,
      "learning_rate": 3.425867965182555e-06,
      "loss": 0.0077,
      "step": 2015420
    },
    {
      "epoch": 3.2983117639742607,
      "grad_norm": 0.27947595715522766,
      "learning_rate": 3.4258020729690377e-06,
      "loss": 0.0104,
      "step": 2015440
    },
    {
      "epoch": 3.2983444944129143,
      "grad_norm": 0.3035537898540497,
      "learning_rate": 3.425736180755521e-06,
      "loss": 0.0081,
      "step": 2015460
    },
    {
      "epoch": 3.2983772248515675,
      "grad_norm": 0.3639100193977356,
      "learning_rate": 3.4256702885420036e-06,
      "loss": 0.015,
      "step": 2015480
    },
    {
      "epoch": 3.2984099552902206,
      "grad_norm": 0.48834460973739624,
      "learning_rate": 3.4256043963284863e-06,
      "loss": 0.0145,
      "step": 2015500
    },
    {
      "epoch": 3.2984426857288742,
      "grad_norm": 0.2359749972820282,
      "learning_rate": 3.425538504114969e-06,
      "loss": 0.016,
      "step": 2015520
    },
    {
      "epoch": 3.2984754161675274,
      "grad_norm": 0.39294058084487915,
      "learning_rate": 3.4254726119014518e-06,
      "loss": 0.0131,
      "step": 2015540
    },
    {
      "epoch": 3.298508146606181,
      "grad_norm": 0.23258695006370544,
      "learning_rate": 3.425406719687935e-06,
      "loss": 0.0178,
      "step": 2015560
    },
    {
      "epoch": 3.298540877044834,
      "grad_norm": 0.10187222063541412,
      "learning_rate": 3.4253408274744177e-06,
      "loss": 0.0099,
      "step": 2015580
    },
    {
      "epoch": 3.2985736074834877,
      "grad_norm": 0.73262619972229,
      "learning_rate": 3.4252749352609004e-06,
      "loss": 0.0142,
      "step": 2015600
    },
    {
      "epoch": 3.298606337922141,
      "grad_norm": 0.34404289722442627,
      "learning_rate": 3.425209043047383e-06,
      "loss": 0.0131,
      "step": 2015620
    },
    {
      "epoch": 3.298639068360794,
      "grad_norm": 0.567573606967926,
      "learning_rate": 3.4251431508338663e-06,
      "loss": 0.0132,
      "step": 2015640
    },
    {
      "epoch": 3.2986717987994476,
      "grad_norm": 0.10147630423307419,
      "learning_rate": 3.425077258620349e-06,
      "loss": 0.0103,
      "step": 2015660
    },
    {
      "epoch": 3.2987045292381008,
      "grad_norm": 0.5255590677261353,
      "learning_rate": 3.425011366406832e-06,
      "loss": 0.0075,
      "step": 2015680
    },
    {
      "epoch": 3.2987372596767544,
      "grad_norm": 0.5027735829353333,
      "learning_rate": 3.4249454741933145e-06,
      "loss": 0.0126,
      "step": 2015700
    },
    {
      "epoch": 3.2987699901154075,
      "grad_norm": 0.3380196690559387,
      "learning_rate": 3.4248795819797977e-06,
      "loss": 0.0078,
      "step": 2015720
    },
    {
      "epoch": 3.298802720554061,
      "grad_norm": 0.646888792514801,
      "learning_rate": 3.4248136897662804e-06,
      "loss": 0.0123,
      "step": 2015740
    },
    {
      "epoch": 3.2988354509927142,
      "grad_norm": 0.08955540508031845,
      "learning_rate": 3.424747797552763e-06,
      "loss": 0.0121,
      "step": 2015760
    },
    {
      "epoch": 3.2988681814313674,
      "grad_norm": 0.3165551722049713,
      "learning_rate": 3.4246819053392468e-06,
      "loss": 0.0093,
      "step": 2015780
    },
    {
      "epoch": 3.298900911870021,
      "grad_norm": 0.36570388078689575,
      "learning_rate": 3.4246160131257295e-06,
      "loss": 0.008,
      "step": 2015800
    },
    {
      "epoch": 3.298933642308674,
      "grad_norm": 0.5459997653961182,
      "learning_rate": 3.4245501209122122e-06,
      "loss": 0.0145,
      "step": 2015820
    },
    {
      "epoch": 3.2989663727473277,
      "grad_norm": 0.11878693848848343,
      "learning_rate": 3.424484228698695e-06,
      "loss": 0.0104,
      "step": 2015840
    },
    {
      "epoch": 3.298999103185981,
      "grad_norm": 0.0878334790468216,
      "learning_rate": 3.424418336485178e-06,
      "loss": 0.0091,
      "step": 2015860
    },
    {
      "epoch": 3.2990318336246345,
      "grad_norm": 0.34273862838745117,
      "learning_rate": 3.424352444271661e-06,
      "loss": 0.0177,
      "step": 2015880
    },
    {
      "epoch": 3.2990645640632876,
      "grad_norm": 0.2284320741891861,
      "learning_rate": 3.4242865520581436e-06,
      "loss": 0.009,
      "step": 2015900
    },
    {
      "epoch": 3.299097294501941,
      "grad_norm": 0.13262395560741425,
      "learning_rate": 3.4242206598446264e-06,
      "loss": 0.0156,
      "step": 2015920
    },
    {
      "epoch": 3.2991300249405944,
      "grad_norm": 0.30931127071380615,
      "learning_rate": 3.4241547676311095e-06,
      "loss": 0.008,
      "step": 2015940
    },
    {
      "epoch": 3.2991627553792475,
      "grad_norm": 0.34097474813461304,
      "learning_rate": 3.4240888754175923e-06,
      "loss": 0.0144,
      "step": 2015960
    },
    {
      "epoch": 3.299195485817901,
      "grad_norm": 0.26656076312065125,
      "learning_rate": 3.424022983204075e-06,
      "loss": 0.0092,
      "step": 2015980
    },
    {
      "epoch": 3.2992282162565543,
      "grad_norm": 0.8239591717720032,
      "learning_rate": 3.4239570909905578e-06,
      "loss": 0.0102,
      "step": 2016000
    },
    {
      "epoch": 3.2992609466952074,
      "grad_norm": 0.1473640501499176,
      "learning_rate": 3.4238911987770405e-06,
      "loss": 0.0114,
      "step": 2016020
    },
    {
      "epoch": 3.299293677133861,
      "grad_norm": 0.06537578254938126,
      "learning_rate": 3.4238253065635237e-06,
      "loss": 0.0113,
      "step": 2016040
    },
    {
      "epoch": 3.299326407572514,
      "grad_norm": 0.2564677298069,
      "learning_rate": 3.4237594143500064e-06,
      "loss": 0.0131,
      "step": 2016060
    },
    {
      "epoch": 3.2993591380111678,
      "grad_norm": 0.14391635358333588,
      "learning_rate": 3.423693522136489e-06,
      "loss": 0.0115,
      "step": 2016080
    },
    {
      "epoch": 3.299391868449821,
      "grad_norm": 0.2673650085926056,
      "learning_rate": 3.423627629922972e-06,
      "loss": 0.0198,
      "step": 2016100
    },
    {
      "epoch": 3.299424598888474,
      "grad_norm": 0.4535899758338928,
      "learning_rate": 3.423561737709455e-06,
      "loss": 0.0117,
      "step": 2016120
    },
    {
      "epoch": 3.2994573293271277,
      "grad_norm": 0.09288166463375092,
      "learning_rate": 3.423495845495938e-06,
      "loss": 0.0145,
      "step": 2016140
    },
    {
      "epoch": 3.299490059765781,
      "grad_norm": 0.4005122184753418,
      "learning_rate": 3.423429953282421e-06,
      "loss": 0.0097,
      "step": 2016160
    },
    {
      "epoch": 3.2995227902044344,
      "grad_norm": 0.1325351744890213,
      "learning_rate": 3.423364061068904e-06,
      "loss": 0.0115,
      "step": 2016180
    },
    {
      "epoch": 3.2995555206430875,
      "grad_norm": 0.48799532651901245,
      "learning_rate": 3.423298168855387e-06,
      "loss": 0.0108,
      "step": 2016200
    },
    {
      "epoch": 3.299588251081741,
      "grad_norm": 0.26206842064857483,
      "learning_rate": 3.4232322766418696e-06,
      "loss": 0.0101,
      "step": 2016220
    },
    {
      "epoch": 3.2996209815203943,
      "grad_norm": 0.09658164530992508,
      "learning_rate": 3.4231663844283523e-06,
      "loss": 0.012,
      "step": 2016240
    },
    {
      "epoch": 3.2996537119590474,
      "grad_norm": 0.563094437122345,
      "learning_rate": 3.4231004922148355e-06,
      "loss": 0.0159,
      "step": 2016260
    },
    {
      "epoch": 3.299686442397701,
      "grad_norm": 0.041882287710905075,
      "learning_rate": 3.4230346000013182e-06,
      "loss": 0.0097,
      "step": 2016280
    },
    {
      "epoch": 3.299719172836354,
      "grad_norm": 0.09263023734092712,
      "learning_rate": 3.422968707787801e-06,
      "loss": 0.0117,
      "step": 2016300
    },
    {
      "epoch": 3.299751903275008,
      "grad_norm": 0.16791704297065735,
      "learning_rate": 3.4229028155742837e-06,
      "loss": 0.0083,
      "step": 2016320
    },
    {
      "epoch": 3.299784633713661,
      "grad_norm": 0.11519738286733627,
      "learning_rate": 3.422836923360767e-06,
      "loss": 0.0117,
      "step": 2016340
    },
    {
      "epoch": 3.2998173641523145,
      "grad_norm": 0.19821427762508392,
      "learning_rate": 3.4227710311472496e-06,
      "loss": 0.0089,
      "step": 2016360
    },
    {
      "epoch": 3.2998500945909677,
      "grad_norm": 0.1445041447877884,
      "learning_rate": 3.4227051389337323e-06,
      "loss": 0.0105,
      "step": 2016380
    },
    {
      "epoch": 3.299882825029621,
      "grad_norm": 0.168379008769989,
      "learning_rate": 3.422639246720215e-06,
      "loss": 0.011,
      "step": 2016400
    },
    {
      "epoch": 3.2999155554682744,
      "grad_norm": 0.32143184542655945,
      "learning_rate": 3.422573354506698e-06,
      "loss": 0.0095,
      "step": 2016420
    },
    {
      "epoch": 3.2999482859069276,
      "grad_norm": 0.43905994296073914,
      "learning_rate": 3.422507462293181e-06,
      "loss": 0.0185,
      "step": 2016440
    },
    {
      "epoch": 3.299981016345581,
      "grad_norm": 0.2759668231010437,
      "learning_rate": 3.4224415700796637e-06,
      "loss": 0.0157,
      "step": 2016460
    },
    {
      "epoch": 3.3000137467842343,
      "grad_norm": 0.1870466023683548,
      "learning_rate": 3.4223756778661473e-06,
      "loss": 0.0101,
      "step": 2016480
    },
    {
      "epoch": 3.300046477222888,
      "grad_norm": 0.14053647220134735,
      "learning_rate": 3.42230978565263e-06,
      "loss": 0.0106,
      "step": 2016500
    },
    {
      "epoch": 3.300079207661541,
      "grad_norm": 0.22105178236961365,
      "learning_rate": 3.422243893439113e-06,
      "loss": 0.0112,
      "step": 2016520
    },
    {
      "epoch": 3.300111938100194,
      "grad_norm": 0.18093572556972504,
      "learning_rate": 3.4221780012255955e-06,
      "loss": 0.0146,
      "step": 2016540
    },
    {
      "epoch": 3.300144668538848,
      "grad_norm": 0.24018104374408722,
      "learning_rate": 3.4221121090120783e-06,
      "loss": 0.0192,
      "step": 2016560
    },
    {
      "epoch": 3.300177398977501,
      "grad_norm": 0.1476047933101654,
      "learning_rate": 3.4220462167985614e-06,
      "loss": 0.0111,
      "step": 2016580
    },
    {
      "epoch": 3.3002101294161545,
      "grad_norm": 0.05845710262656212,
      "learning_rate": 3.421980324585044e-06,
      "loss": 0.0104,
      "step": 2016600
    },
    {
      "epoch": 3.3002428598548077,
      "grad_norm": 0.35068365931510925,
      "learning_rate": 3.421914432371527e-06,
      "loss": 0.013,
      "step": 2016620
    },
    {
      "epoch": 3.3002755902934613,
      "grad_norm": 0.2662160396575928,
      "learning_rate": 3.4218485401580097e-06,
      "loss": 0.0161,
      "step": 2016640
    },
    {
      "epoch": 3.3003083207321144,
      "grad_norm": 0.13333727419376373,
      "learning_rate": 3.421782647944493e-06,
      "loss": 0.0126,
      "step": 2016660
    },
    {
      "epoch": 3.3003410511707676,
      "grad_norm": 0.625267505645752,
      "learning_rate": 3.4217167557309756e-06,
      "loss": 0.0108,
      "step": 2016680
    },
    {
      "epoch": 3.300373781609421,
      "grad_norm": 0.14842475950717926,
      "learning_rate": 3.4216508635174583e-06,
      "loss": 0.0131,
      "step": 2016700
    },
    {
      "epoch": 3.3004065120480743,
      "grad_norm": 0.48510509729385376,
      "learning_rate": 3.421584971303941e-06,
      "loss": 0.0088,
      "step": 2016720
    },
    {
      "epoch": 3.300439242486728,
      "grad_norm": 0.08868417888879776,
      "learning_rate": 3.421519079090424e-06,
      "loss": 0.0148,
      "step": 2016740
    },
    {
      "epoch": 3.300471972925381,
      "grad_norm": 0.30928805470466614,
      "learning_rate": 3.421453186876907e-06,
      "loss": 0.0155,
      "step": 2016760
    },
    {
      "epoch": 3.3005047033640347,
      "grad_norm": 0.3250337243080139,
      "learning_rate": 3.4213872946633897e-06,
      "loss": 0.0089,
      "step": 2016780
    },
    {
      "epoch": 3.300537433802688,
      "grad_norm": 0.24309837818145752,
      "learning_rate": 3.4213214024498724e-06,
      "loss": 0.0152,
      "step": 2016800
    },
    {
      "epoch": 3.300570164241341,
      "grad_norm": 0.2256752848625183,
      "learning_rate": 3.4212555102363556e-06,
      "loss": 0.0134,
      "step": 2016820
    },
    {
      "epoch": 3.3006028946799946,
      "grad_norm": 0.29571399092674255,
      "learning_rate": 3.4211896180228388e-06,
      "loss": 0.0119,
      "step": 2016840
    },
    {
      "epoch": 3.3006356251186477,
      "grad_norm": 0.7861104011535645,
      "learning_rate": 3.4211237258093215e-06,
      "loss": 0.0127,
      "step": 2016860
    },
    {
      "epoch": 3.3006683555573013,
      "grad_norm": 0.3363732695579529,
      "learning_rate": 3.4210578335958047e-06,
      "loss": 0.0219,
      "step": 2016880
    },
    {
      "epoch": 3.3007010859959545,
      "grad_norm": 0.17643968760967255,
      "learning_rate": 3.4209919413822874e-06,
      "loss": 0.011,
      "step": 2016900
    },
    {
      "epoch": 3.300733816434608,
      "grad_norm": 0.6050949692726135,
      "learning_rate": 3.42092604916877e-06,
      "loss": 0.0125,
      "step": 2016920
    },
    {
      "epoch": 3.300766546873261,
      "grad_norm": 0.373614639043808,
      "learning_rate": 3.420860156955253e-06,
      "loss": 0.0127,
      "step": 2016940
    },
    {
      "epoch": 3.3007992773119144,
      "grad_norm": 0.05171719938516617,
      "learning_rate": 3.4207942647417356e-06,
      "loss": 0.0087,
      "step": 2016960
    },
    {
      "epoch": 3.300832007750568,
      "grad_norm": 0.15818968415260315,
      "learning_rate": 3.4207283725282188e-06,
      "loss": 0.009,
      "step": 2016980
    },
    {
      "epoch": 3.300864738189221,
      "grad_norm": 0.1842334270477295,
      "learning_rate": 3.4206624803147015e-06,
      "loss": 0.0092,
      "step": 2017000
    },
    {
      "epoch": 3.3008974686278747,
      "grad_norm": 0.1579839438199997,
      "learning_rate": 3.4205965881011843e-06,
      "loss": 0.0147,
      "step": 2017020
    },
    {
      "epoch": 3.300930199066528,
      "grad_norm": 0.3139093816280365,
      "learning_rate": 3.420530695887667e-06,
      "loss": 0.0118,
      "step": 2017040
    },
    {
      "epoch": 3.3009629295051814,
      "grad_norm": 0.21597710251808167,
      "learning_rate": 3.42046480367415e-06,
      "loss": 0.0094,
      "step": 2017060
    },
    {
      "epoch": 3.3009956599438346,
      "grad_norm": 0.6708527207374573,
      "learning_rate": 3.420398911460633e-06,
      "loss": 0.0083,
      "step": 2017080
    },
    {
      "epoch": 3.3010283903824877,
      "grad_norm": 0.3620622456073761,
      "learning_rate": 3.4203330192471156e-06,
      "loss": 0.0149,
      "step": 2017100
    },
    {
      "epoch": 3.3010611208211413,
      "grad_norm": 0.11311397701501846,
      "learning_rate": 3.4202671270335984e-06,
      "loss": 0.0142,
      "step": 2017120
    },
    {
      "epoch": 3.3010938512597945,
      "grad_norm": 0.31718116998672485,
      "learning_rate": 3.4202012348200815e-06,
      "loss": 0.0085,
      "step": 2017140
    },
    {
      "epoch": 3.301126581698448,
      "grad_norm": 0.22755688428878784,
      "learning_rate": 3.4201353426065643e-06,
      "loss": 0.012,
      "step": 2017160
    },
    {
      "epoch": 3.3011593121371012,
      "grad_norm": 0.07953547686338425,
      "learning_rate": 3.4200694503930474e-06,
      "loss": 0.0106,
      "step": 2017180
    },
    {
      "epoch": 3.301192042575755,
      "grad_norm": 0.185506671667099,
      "learning_rate": 3.4200035581795306e-06,
      "loss": 0.0069,
      "step": 2017200
    },
    {
      "epoch": 3.301224773014408,
      "grad_norm": 0.10853095352649689,
      "learning_rate": 3.4199376659660133e-06,
      "loss": 0.0124,
      "step": 2017220
    },
    {
      "epoch": 3.301257503453061,
      "grad_norm": 0.3085157573223114,
      "learning_rate": 3.419871773752496e-06,
      "loss": 0.0099,
      "step": 2017240
    },
    {
      "epoch": 3.3012902338917147,
      "grad_norm": 0.5004661679267883,
      "learning_rate": 3.419805881538979e-06,
      "loss": 0.0106,
      "step": 2017260
    },
    {
      "epoch": 3.301322964330368,
      "grad_norm": 0.4669523239135742,
      "learning_rate": 3.419739989325462e-06,
      "loss": 0.0102,
      "step": 2017280
    },
    {
      "epoch": 3.3013556947690215,
      "grad_norm": 0.2605220377445221,
      "learning_rate": 3.4196740971119447e-06,
      "loss": 0.0118,
      "step": 2017300
    },
    {
      "epoch": 3.3013884252076746,
      "grad_norm": 0.9385151267051697,
      "learning_rate": 3.4196082048984275e-06,
      "loss": 0.0106,
      "step": 2017320
    },
    {
      "epoch": 3.301421155646328,
      "grad_norm": 0.36442825198173523,
      "learning_rate": 3.41954231268491e-06,
      "loss": 0.0093,
      "step": 2017340
    },
    {
      "epoch": 3.3014538860849814,
      "grad_norm": 0.18109913170337677,
      "learning_rate": 3.4194764204713934e-06,
      "loss": 0.012,
      "step": 2017360
    },
    {
      "epoch": 3.3014866165236345,
      "grad_norm": 0.15209166705608368,
      "learning_rate": 3.419410528257876e-06,
      "loss": 0.0114,
      "step": 2017380
    },
    {
      "epoch": 3.301519346962288,
      "grad_norm": 0.6373938918113708,
      "learning_rate": 3.419344636044359e-06,
      "loss": 0.0167,
      "step": 2017400
    },
    {
      "epoch": 3.3015520774009413,
      "grad_norm": 0.18892337381839752,
      "learning_rate": 3.4192787438308416e-06,
      "loss": 0.0092,
      "step": 2017420
    },
    {
      "epoch": 3.301584807839595,
      "grad_norm": 0.15837834775447845,
      "learning_rate": 3.4192128516173243e-06,
      "loss": 0.0138,
      "step": 2017440
    },
    {
      "epoch": 3.301617538278248,
      "grad_norm": 0.3695048689842224,
      "learning_rate": 3.4191469594038075e-06,
      "loss": 0.0145,
      "step": 2017460
    },
    {
      "epoch": 3.3016502687169016,
      "grad_norm": 0.16589833796024323,
      "learning_rate": 3.4190810671902902e-06,
      "loss": 0.0115,
      "step": 2017480
    },
    {
      "epoch": 3.3016829991555547,
      "grad_norm": 0.2594253122806549,
      "learning_rate": 3.419015174976773e-06,
      "loss": 0.0151,
      "step": 2017500
    },
    {
      "epoch": 3.301715729594208,
      "grad_norm": 0.1479104608297348,
      "learning_rate": 3.4189492827632557e-06,
      "loss": 0.011,
      "step": 2017520
    },
    {
      "epoch": 3.3017484600328615,
      "grad_norm": 0.2305961549282074,
      "learning_rate": 3.4188833905497393e-06,
      "loss": 0.0086,
      "step": 2017540
    },
    {
      "epoch": 3.3017811904715146,
      "grad_norm": 0.19113148748874664,
      "learning_rate": 3.418817498336222e-06,
      "loss": 0.0077,
      "step": 2017560
    },
    {
      "epoch": 3.3018139209101682,
      "grad_norm": 0.18553045392036438,
      "learning_rate": 3.4187516061227048e-06,
      "loss": 0.0106,
      "step": 2017580
    },
    {
      "epoch": 3.3018466513488214,
      "grad_norm": 0.641560971736908,
      "learning_rate": 3.418685713909188e-06,
      "loss": 0.0132,
      "step": 2017600
    },
    {
      "epoch": 3.3018793817874745,
      "grad_norm": 0.16161152720451355,
      "learning_rate": 3.4186198216956707e-06,
      "loss": 0.0127,
      "step": 2017620
    },
    {
      "epoch": 3.301912112226128,
      "grad_norm": 0.21224553883075714,
      "learning_rate": 3.4185539294821534e-06,
      "loss": 0.0111,
      "step": 2017640
    },
    {
      "epoch": 3.3019448426647813,
      "grad_norm": 0.1113947406411171,
      "learning_rate": 3.418488037268636e-06,
      "loss": 0.0139,
      "step": 2017660
    },
    {
      "epoch": 3.301977573103435,
      "grad_norm": 0.20810005068778992,
      "learning_rate": 3.4184221450551193e-06,
      "loss": 0.0133,
      "step": 2017680
    },
    {
      "epoch": 3.302010303542088,
      "grad_norm": 0.17865513265132904,
      "learning_rate": 3.418356252841602e-06,
      "loss": 0.0103,
      "step": 2017700
    },
    {
      "epoch": 3.302043033980741,
      "grad_norm": 0.12910905480384827,
      "learning_rate": 3.418290360628085e-06,
      "loss": 0.0104,
      "step": 2017720
    },
    {
      "epoch": 3.3020757644193948,
      "grad_norm": 0.18989135324954987,
      "learning_rate": 3.4182244684145675e-06,
      "loss": 0.0089,
      "step": 2017740
    },
    {
      "epoch": 3.302108494858048,
      "grad_norm": 0.10212624818086624,
      "learning_rate": 3.4181585762010507e-06,
      "loss": 0.0091,
      "step": 2017760
    },
    {
      "epoch": 3.3021412252967015,
      "grad_norm": 0.24232593178749084,
      "learning_rate": 3.4180926839875334e-06,
      "loss": 0.0126,
      "step": 2017780
    },
    {
      "epoch": 3.3021739557353547,
      "grad_norm": 0.1450994312763214,
      "learning_rate": 3.418026791774016e-06,
      "loss": 0.011,
      "step": 2017800
    },
    {
      "epoch": 3.3022066861740083,
      "grad_norm": 0.5849074125289917,
      "learning_rate": 3.417960899560499e-06,
      "loss": 0.0118,
      "step": 2017820
    },
    {
      "epoch": 3.3022394166126614,
      "grad_norm": 0.6713942289352417,
      "learning_rate": 3.4178950073469817e-06,
      "loss": 0.0111,
      "step": 2017840
    },
    {
      "epoch": 3.3022721470513146,
      "grad_norm": 0.14923742413520813,
      "learning_rate": 3.417829115133465e-06,
      "loss": 0.0123,
      "step": 2017860
    },
    {
      "epoch": 3.302304877489968,
      "grad_norm": 0.22982104122638702,
      "learning_rate": 3.4177632229199476e-06,
      "loss": 0.0108,
      "step": 2017880
    },
    {
      "epoch": 3.3023376079286213,
      "grad_norm": 0.45765024423599243,
      "learning_rate": 3.417697330706431e-06,
      "loss": 0.0093,
      "step": 2017900
    },
    {
      "epoch": 3.302370338367275,
      "grad_norm": 0.03900182247161865,
      "learning_rate": 3.417631438492914e-06,
      "loss": 0.0098,
      "step": 2017920
    },
    {
      "epoch": 3.302403068805928,
      "grad_norm": 0.3344637155532837,
      "learning_rate": 3.4175655462793966e-06,
      "loss": 0.0107,
      "step": 2017940
    },
    {
      "epoch": 3.3024357992445816,
      "grad_norm": 0.4440937042236328,
      "learning_rate": 3.4174996540658794e-06,
      "loss": 0.0085,
      "step": 2017960
    },
    {
      "epoch": 3.302468529683235,
      "grad_norm": 0.05392957478761673,
      "learning_rate": 3.417433761852362e-06,
      "loss": 0.009,
      "step": 2017980
    },
    {
      "epoch": 3.302501260121888,
      "grad_norm": 0.22571973502635956,
      "learning_rate": 3.4173678696388453e-06,
      "loss": 0.0127,
      "step": 2018000
    },
    {
      "epoch": 3.3025339905605415,
      "grad_norm": 0.11669006198644638,
      "learning_rate": 3.417301977425328e-06,
      "loss": 0.0118,
      "step": 2018020
    },
    {
      "epoch": 3.3025667209991947,
      "grad_norm": 0.4461936354637146,
      "learning_rate": 3.4172360852118108e-06,
      "loss": 0.0132,
      "step": 2018040
    },
    {
      "epoch": 3.3025994514378483,
      "grad_norm": 1.0415117740631104,
      "learning_rate": 3.4171701929982935e-06,
      "loss": 0.006,
      "step": 2018060
    },
    {
      "epoch": 3.3026321818765014,
      "grad_norm": 0.2719036936759949,
      "learning_rate": 3.4171043007847767e-06,
      "loss": 0.0156,
      "step": 2018080
    },
    {
      "epoch": 3.302664912315155,
      "grad_norm": 0.24244682490825653,
      "learning_rate": 3.4170384085712594e-06,
      "loss": 0.0122,
      "step": 2018100
    },
    {
      "epoch": 3.302697642753808,
      "grad_norm": 0.3714558184146881,
      "learning_rate": 3.416972516357742e-06,
      "loss": 0.0124,
      "step": 2018120
    },
    {
      "epoch": 3.3027303731924613,
      "grad_norm": 0.2080146223306656,
      "learning_rate": 3.416906624144225e-06,
      "loss": 0.0093,
      "step": 2018140
    },
    {
      "epoch": 3.302763103631115,
      "grad_norm": 0.20442235469818115,
      "learning_rate": 3.416840731930708e-06,
      "loss": 0.0099,
      "step": 2018160
    },
    {
      "epoch": 3.302795834069768,
      "grad_norm": 0.2163434624671936,
      "learning_rate": 3.4167748397171908e-06,
      "loss": 0.0138,
      "step": 2018180
    },
    {
      "epoch": 3.3028285645084217,
      "grad_norm": 0.12871766090393066,
      "learning_rate": 3.4167089475036735e-06,
      "loss": 0.0147,
      "step": 2018200
    },
    {
      "epoch": 3.302861294947075,
      "grad_norm": 0.4050597846508026,
      "learning_rate": 3.4166430552901563e-06,
      "loss": 0.016,
      "step": 2018220
    },
    {
      "epoch": 3.3028940253857284,
      "grad_norm": 0.07232910394668579,
      "learning_rate": 3.41657716307664e-06,
      "loss": 0.0105,
      "step": 2018240
    },
    {
      "epoch": 3.3029267558243816,
      "grad_norm": 1.0055210590362549,
      "learning_rate": 3.4165112708631226e-06,
      "loss": 0.0131,
      "step": 2018260
    },
    {
      "epoch": 3.3029594862630347,
      "grad_norm": 0.11430902034044266,
      "learning_rate": 3.4164453786496053e-06,
      "loss": 0.0094,
      "step": 2018280
    },
    {
      "epoch": 3.3029922167016883,
      "grad_norm": 0.11113466322422028,
      "learning_rate": 3.4163794864360885e-06,
      "loss": 0.0098,
      "step": 2018300
    },
    {
      "epoch": 3.3030249471403414,
      "grad_norm": 0.3349968492984772,
      "learning_rate": 3.4163135942225712e-06,
      "loss": 0.0085,
      "step": 2018320
    },
    {
      "epoch": 3.303057677578995,
      "grad_norm": 0.10297603160142899,
      "learning_rate": 3.416247702009054e-06,
      "loss": 0.0086,
      "step": 2018340
    },
    {
      "epoch": 3.303090408017648,
      "grad_norm": 0.23317186534404755,
      "learning_rate": 3.4161818097955367e-06,
      "loss": 0.0081,
      "step": 2018360
    },
    {
      "epoch": 3.303123138456302,
      "grad_norm": 0.23848454654216766,
      "learning_rate": 3.4161159175820195e-06,
      "loss": 0.0196,
      "step": 2018380
    },
    {
      "epoch": 3.303155868894955,
      "grad_norm": 0.23431998491287231,
      "learning_rate": 3.4160500253685026e-06,
      "loss": 0.0073,
      "step": 2018400
    },
    {
      "epoch": 3.303188599333608,
      "grad_norm": 0.35156238079071045,
      "learning_rate": 3.4159841331549854e-06,
      "loss": 0.0118,
      "step": 2018420
    },
    {
      "epoch": 3.3032213297722617,
      "grad_norm": 0.10527396947145462,
      "learning_rate": 3.415918240941468e-06,
      "loss": 0.0137,
      "step": 2018440
    },
    {
      "epoch": 3.303254060210915,
      "grad_norm": 3.043405055999756,
      "learning_rate": 3.415852348727951e-06,
      "loss": 0.0135,
      "step": 2018460
    },
    {
      "epoch": 3.3032867906495684,
      "grad_norm": 0.23930417001247406,
      "learning_rate": 3.415786456514434e-06,
      "loss": 0.0127,
      "step": 2018480
    },
    {
      "epoch": 3.3033195210882216,
      "grad_norm": 0.21923716366291046,
      "learning_rate": 3.4157205643009167e-06,
      "loss": 0.0067,
      "step": 2018500
    },
    {
      "epoch": 3.303352251526875,
      "grad_norm": 0.38586410880088806,
      "learning_rate": 3.4156546720873995e-06,
      "loss": 0.0139,
      "step": 2018520
    },
    {
      "epoch": 3.3033849819655283,
      "grad_norm": 0.07630769163370132,
      "learning_rate": 3.4155887798738822e-06,
      "loss": 0.0098,
      "step": 2018540
    },
    {
      "epoch": 3.3034177124041815,
      "grad_norm": 0.5015770196914673,
      "learning_rate": 3.4155228876603654e-06,
      "loss": 0.0106,
      "step": 2018560
    },
    {
      "epoch": 3.303450442842835,
      "grad_norm": 0.26374998688697815,
      "learning_rate": 3.415456995446848e-06,
      "loss": 0.0096,
      "step": 2018580
    },
    {
      "epoch": 3.303483173281488,
      "grad_norm": 0.236382395029068,
      "learning_rate": 3.4153911032333313e-06,
      "loss": 0.01,
      "step": 2018600
    },
    {
      "epoch": 3.303515903720142,
      "grad_norm": 0.8235263824462891,
      "learning_rate": 3.4153252110198144e-06,
      "loss": 0.0133,
      "step": 2018620
    },
    {
      "epoch": 3.303548634158795,
      "grad_norm": 0.28659138083457947,
      "learning_rate": 3.415259318806297e-06,
      "loss": 0.0145,
      "step": 2018640
    },
    {
      "epoch": 3.3035813645974486,
      "grad_norm": 0.1151481568813324,
      "learning_rate": 3.41519342659278e-06,
      "loss": 0.0092,
      "step": 2018660
    },
    {
      "epoch": 3.3036140950361017,
      "grad_norm": 0.4264673888683319,
      "learning_rate": 3.4151275343792627e-06,
      "loss": 0.0118,
      "step": 2018680
    },
    {
      "epoch": 3.303646825474755,
      "grad_norm": 0.08611933141946793,
      "learning_rate": 3.415061642165746e-06,
      "loss": 0.0098,
      "step": 2018700
    },
    {
      "epoch": 3.3036795559134084,
      "grad_norm": 0.4406377375125885,
      "learning_rate": 3.4149957499522286e-06,
      "loss": 0.0111,
      "step": 2018720
    },
    {
      "epoch": 3.3037122863520616,
      "grad_norm": 1.186434030532837,
      "learning_rate": 3.4149298577387113e-06,
      "loss": 0.0128,
      "step": 2018740
    },
    {
      "epoch": 3.303745016790715,
      "grad_norm": 0.06318943202495575,
      "learning_rate": 3.414863965525194e-06,
      "loss": 0.013,
      "step": 2018760
    },
    {
      "epoch": 3.3037777472293683,
      "grad_norm": 0.09809836745262146,
      "learning_rate": 3.414798073311677e-06,
      "loss": 0.0112,
      "step": 2018780
    },
    {
      "epoch": 3.303810477668022,
      "grad_norm": 0.059778742492198944,
      "learning_rate": 3.41473218109816e-06,
      "loss": 0.0124,
      "step": 2018800
    },
    {
      "epoch": 3.303843208106675,
      "grad_norm": 0.16293086111545563,
      "learning_rate": 3.4146662888846427e-06,
      "loss": 0.0114,
      "step": 2018820
    },
    {
      "epoch": 3.3038759385453282,
      "grad_norm": 0.16755411028862,
      "learning_rate": 3.4146003966711254e-06,
      "loss": 0.0177,
      "step": 2018840
    },
    {
      "epoch": 3.303908668983982,
      "grad_norm": 0.22217954695224762,
      "learning_rate": 3.414534504457608e-06,
      "loss": 0.0143,
      "step": 2018860
    },
    {
      "epoch": 3.303941399422635,
      "grad_norm": 0.08582602441310883,
      "learning_rate": 3.4144686122440913e-06,
      "loss": 0.018,
      "step": 2018880
    },
    {
      "epoch": 3.3039741298612886,
      "grad_norm": 0.447537362575531,
      "learning_rate": 3.414402720030574e-06,
      "loss": 0.0106,
      "step": 2018900
    },
    {
      "epoch": 3.3040068602999417,
      "grad_norm": 0.3265973627567291,
      "learning_rate": 3.414336827817057e-06,
      "loss": 0.0116,
      "step": 2018920
    },
    {
      "epoch": 3.3040395907385953,
      "grad_norm": 0.08158661425113678,
      "learning_rate": 3.4142709356035404e-06,
      "loss": 0.0092,
      "step": 2018940
    },
    {
      "epoch": 3.3040723211772485,
      "grad_norm": 0.22940117120742798,
      "learning_rate": 3.414205043390023e-06,
      "loss": 0.0081,
      "step": 2018960
    },
    {
      "epoch": 3.3041050516159016,
      "grad_norm": 0.2638949751853943,
      "learning_rate": 3.414139151176506e-06,
      "loss": 0.0099,
      "step": 2018980
    },
    {
      "epoch": 3.304137782054555,
      "grad_norm": 0.2780051529407501,
      "learning_rate": 3.4140732589629886e-06,
      "loss": 0.0097,
      "step": 2019000
    },
    {
      "epoch": 3.3041705124932084,
      "grad_norm": 0.3048316538333893,
      "learning_rate": 3.4140073667494718e-06,
      "loss": 0.0081,
      "step": 2019020
    },
    {
      "epoch": 3.304203242931862,
      "grad_norm": 0.6262109875679016,
      "learning_rate": 3.4139414745359545e-06,
      "loss": 0.0121,
      "step": 2019040
    },
    {
      "epoch": 3.304235973370515,
      "grad_norm": 0.1936262845993042,
      "learning_rate": 3.4138755823224373e-06,
      "loss": 0.013,
      "step": 2019060
    },
    {
      "epoch": 3.3042687038091683,
      "grad_norm": 0.5849193334579468,
      "learning_rate": 3.41380969010892e-06,
      "loss": 0.01,
      "step": 2019080
    },
    {
      "epoch": 3.304301434247822,
      "grad_norm": 0.14909467101097107,
      "learning_rate": 3.413743797895403e-06,
      "loss": 0.0096,
      "step": 2019100
    },
    {
      "epoch": 3.304334164686475,
      "grad_norm": 0.3327588438987732,
      "learning_rate": 3.413677905681886e-06,
      "loss": 0.0115,
      "step": 2019120
    },
    {
      "epoch": 3.3043668951251286,
      "grad_norm": 0.34001249074935913,
      "learning_rate": 3.4136120134683686e-06,
      "loss": 0.0113,
      "step": 2019140
    },
    {
      "epoch": 3.3043996255637818,
      "grad_norm": 0.2137981504201889,
      "learning_rate": 3.4135461212548514e-06,
      "loss": 0.0086,
      "step": 2019160
    },
    {
      "epoch": 3.304432356002435,
      "grad_norm": 2.343327522277832,
      "learning_rate": 3.4134802290413345e-06,
      "loss": 0.0138,
      "step": 2019180
    },
    {
      "epoch": 3.3044650864410885,
      "grad_norm": 0.6546483635902405,
      "learning_rate": 3.4134143368278173e-06,
      "loss": 0.0161,
      "step": 2019200
    },
    {
      "epoch": 3.3044978168797416,
      "grad_norm": 0.2177397459745407,
      "learning_rate": 3.4133484446143e-06,
      "loss": 0.0096,
      "step": 2019220
    },
    {
      "epoch": 3.3045305473183952,
      "grad_norm": 0.293651282787323,
      "learning_rate": 3.4132825524007828e-06,
      "loss": 0.0095,
      "step": 2019240
    },
    {
      "epoch": 3.3045632777570484,
      "grad_norm": 0.33274146914482117,
      "learning_rate": 3.413216660187266e-06,
      "loss": 0.0112,
      "step": 2019260
    },
    {
      "epoch": 3.304596008195702,
      "grad_norm": 0.4086180627346039,
      "learning_rate": 3.4131507679737487e-06,
      "loss": 0.0082,
      "step": 2019280
    },
    {
      "epoch": 3.304628738634355,
      "grad_norm": 0.3549702763557434,
      "learning_rate": 3.413084875760232e-06,
      "loss": 0.0122,
      "step": 2019300
    },
    {
      "epoch": 3.3046614690730083,
      "grad_norm": 0.21381063759326935,
      "learning_rate": 3.413018983546715e-06,
      "loss": 0.013,
      "step": 2019320
    },
    {
      "epoch": 3.304694199511662,
      "grad_norm": 0.40630191564559937,
      "learning_rate": 3.4129530913331977e-06,
      "loss": 0.0124,
      "step": 2019340
    },
    {
      "epoch": 3.304726929950315,
      "grad_norm": 0.20019932091236115,
      "learning_rate": 3.4128871991196805e-06,
      "loss": 0.0078,
      "step": 2019360
    },
    {
      "epoch": 3.3047596603889686,
      "grad_norm": 0.1988248974084854,
      "learning_rate": 3.4128213069061632e-06,
      "loss": 0.0107,
      "step": 2019380
    },
    {
      "epoch": 3.3047923908276218,
      "grad_norm": 0.3759669065475464,
      "learning_rate": 3.412755414692646e-06,
      "loss": 0.0119,
      "step": 2019400
    },
    {
      "epoch": 3.3048251212662754,
      "grad_norm": 0.43357834219932556,
      "learning_rate": 3.412689522479129e-06,
      "loss": 0.0101,
      "step": 2019420
    },
    {
      "epoch": 3.3048578517049285,
      "grad_norm": 0.3275213837623596,
      "learning_rate": 3.412623630265612e-06,
      "loss": 0.0127,
      "step": 2019440
    },
    {
      "epoch": 3.3048905821435817,
      "grad_norm": 0.3418219983577728,
      "learning_rate": 3.4125577380520946e-06,
      "loss": 0.0083,
      "step": 2019460
    },
    {
      "epoch": 3.3049233125822353,
      "grad_norm": 0.5220263600349426,
      "learning_rate": 3.4124918458385773e-06,
      "loss": 0.0135,
      "step": 2019480
    },
    {
      "epoch": 3.3049560430208884,
      "grad_norm": 0.5465345978736877,
      "learning_rate": 3.4124259536250605e-06,
      "loss": 0.0086,
      "step": 2019500
    },
    {
      "epoch": 3.304988773459542,
      "grad_norm": 0.4334341883659363,
      "learning_rate": 3.4123600614115432e-06,
      "loss": 0.0143,
      "step": 2019520
    },
    {
      "epoch": 3.305021503898195,
      "grad_norm": 0.26241645216941833,
      "learning_rate": 3.412294169198026e-06,
      "loss": 0.0109,
      "step": 2019540
    },
    {
      "epoch": 3.3050542343368488,
      "grad_norm": 0.43054959177970886,
      "learning_rate": 3.4122282769845087e-06,
      "loss": 0.0123,
      "step": 2019560
    },
    {
      "epoch": 3.305086964775502,
      "grad_norm": 0.46485593914985657,
      "learning_rate": 3.412162384770992e-06,
      "loss": 0.0114,
      "step": 2019580
    },
    {
      "epoch": 3.305119695214155,
      "grad_norm": 0.4418649673461914,
      "learning_rate": 3.4120964925574746e-06,
      "loss": 0.0152,
      "step": 2019600
    },
    {
      "epoch": 3.3051524256528086,
      "grad_norm": 1.171540379524231,
      "learning_rate": 3.4120306003439574e-06,
      "loss": 0.0092,
      "step": 2019620
    },
    {
      "epoch": 3.305185156091462,
      "grad_norm": 0.2887480556964874,
      "learning_rate": 3.41196470813044e-06,
      "loss": 0.0109,
      "step": 2019640
    },
    {
      "epoch": 3.3052178865301154,
      "grad_norm": 0.3387734293937683,
      "learning_rate": 3.4118988159169237e-06,
      "loss": 0.0158,
      "step": 2019660
    },
    {
      "epoch": 3.3052506169687685,
      "grad_norm": 0.3975289463996887,
      "learning_rate": 3.4118329237034064e-06,
      "loss": 0.0108,
      "step": 2019680
    },
    {
      "epoch": 3.305283347407422,
      "grad_norm": 0.23489497601985931,
      "learning_rate": 3.411767031489889e-06,
      "loss": 0.0075,
      "step": 2019700
    },
    {
      "epoch": 3.3053160778460753,
      "grad_norm": 0.2790619432926178,
      "learning_rate": 3.4117011392763723e-06,
      "loss": 0.009,
      "step": 2019720
    },
    {
      "epoch": 3.3053488082847284,
      "grad_norm": 0.14066097140312195,
      "learning_rate": 3.411635247062855e-06,
      "loss": 0.009,
      "step": 2019740
    },
    {
      "epoch": 3.305381538723382,
      "grad_norm": 0.1889672428369522,
      "learning_rate": 3.411569354849338e-06,
      "loss": 0.0154,
      "step": 2019760
    },
    {
      "epoch": 3.305414269162035,
      "grad_norm": 0.305601567029953,
      "learning_rate": 3.4115034626358206e-06,
      "loss": 0.0115,
      "step": 2019780
    },
    {
      "epoch": 3.3054469996006888,
      "grad_norm": 0.5536128282546997,
      "learning_rate": 3.4114375704223033e-06,
      "loss": 0.0129,
      "step": 2019800
    },
    {
      "epoch": 3.305479730039342,
      "grad_norm": 0.3222578465938568,
      "learning_rate": 3.4113716782087865e-06,
      "loss": 0.013,
      "step": 2019820
    },
    {
      "epoch": 3.3055124604779955,
      "grad_norm": 0.5447092652320862,
      "learning_rate": 3.411305785995269e-06,
      "loss": 0.0097,
      "step": 2019840
    },
    {
      "epoch": 3.3055451909166487,
      "grad_norm": 0.529597818851471,
      "learning_rate": 3.411239893781752e-06,
      "loss": 0.0088,
      "step": 2019860
    },
    {
      "epoch": 3.305577921355302,
      "grad_norm": 1.3094128370285034,
      "learning_rate": 3.4111740015682347e-06,
      "loss": 0.0159,
      "step": 2019880
    },
    {
      "epoch": 3.3056106517939554,
      "grad_norm": 0.26968660950660706,
      "learning_rate": 3.411108109354718e-06,
      "loss": 0.0118,
      "step": 2019900
    },
    {
      "epoch": 3.3056433822326086,
      "grad_norm": 0.06583802402019501,
      "learning_rate": 3.4110422171412006e-06,
      "loss": 0.0129,
      "step": 2019920
    },
    {
      "epoch": 3.305676112671262,
      "grad_norm": 0.2214830368757248,
      "learning_rate": 3.4109763249276833e-06,
      "loss": 0.0079,
      "step": 2019940
    },
    {
      "epoch": 3.3057088431099153,
      "grad_norm": 0.07335461676120758,
      "learning_rate": 3.410910432714166e-06,
      "loss": 0.0079,
      "step": 2019960
    },
    {
      "epoch": 3.305741573548569,
      "grad_norm": 0.43205082416534424,
      "learning_rate": 3.4108445405006492e-06,
      "loss": 0.0096,
      "step": 2019980
    },
    {
      "epoch": 3.305774303987222,
      "grad_norm": 0.14008861780166626,
      "learning_rate": 3.4107786482871324e-06,
      "loss": 0.0128,
      "step": 2020000
    },
    {
      "epoch": 3.305807034425875,
      "grad_norm": 0.2295496165752411,
      "learning_rate": 3.410712756073615e-06,
      "loss": 0.0126,
      "step": 2020020
    },
    {
      "epoch": 3.305839764864529,
      "grad_norm": 0.5539085268974304,
      "learning_rate": 3.4106468638600983e-06,
      "loss": 0.0134,
      "step": 2020040
    },
    {
      "epoch": 3.305872495303182,
      "grad_norm": 0.16598676145076752,
      "learning_rate": 3.410580971646581e-06,
      "loss": 0.0101,
      "step": 2020060
    },
    {
      "epoch": 3.3059052257418355,
      "grad_norm": 0.21418282389640808,
      "learning_rate": 3.4105150794330638e-06,
      "loss": 0.0096,
      "step": 2020080
    },
    {
      "epoch": 3.3059379561804887,
      "grad_norm": 0.4275415539741516,
      "learning_rate": 3.4104491872195465e-06,
      "loss": 0.0161,
      "step": 2020100
    },
    {
      "epoch": 3.3059706866191423,
      "grad_norm": 0.16309447586536407,
      "learning_rate": 3.4103832950060297e-06,
      "loss": 0.011,
      "step": 2020120
    },
    {
      "epoch": 3.3060034170577954,
      "grad_norm": 0.10393097996711731,
      "learning_rate": 3.4103174027925124e-06,
      "loss": 0.0132,
      "step": 2020140
    },
    {
      "epoch": 3.3060361474964486,
      "grad_norm": 0.6315109133720398,
      "learning_rate": 3.410251510578995e-06,
      "loss": 0.0123,
      "step": 2020160
    },
    {
      "epoch": 3.306068877935102,
      "grad_norm": 0.17655856907367706,
      "learning_rate": 3.410185618365478e-06,
      "loss": 0.0101,
      "step": 2020180
    },
    {
      "epoch": 3.3061016083737553,
      "grad_norm": 0.24230031669139862,
      "learning_rate": 3.410119726151961e-06,
      "loss": 0.0106,
      "step": 2020200
    },
    {
      "epoch": 3.306134338812409,
      "grad_norm": 0.3522152602672577,
      "learning_rate": 3.410053833938444e-06,
      "loss": 0.0162,
      "step": 2020220
    },
    {
      "epoch": 3.306167069251062,
      "grad_norm": 0.2603476643562317,
      "learning_rate": 3.4099879417249265e-06,
      "loss": 0.0098,
      "step": 2020240
    },
    {
      "epoch": 3.3061997996897157,
      "grad_norm": 0.16171251237392426,
      "learning_rate": 3.4099220495114093e-06,
      "loss": 0.0138,
      "step": 2020260
    },
    {
      "epoch": 3.306232530128369,
      "grad_norm": 0.91404128074646,
      "learning_rate": 3.409856157297892e-06,
      "loss": 0.0085,
      "step": 2020280
    },
    {
      "epoch": 3.306265260567022,
      "grad_norm": 0.5309065580368042,
      "learning_rate": 3.409790265084375e-06,
      "loss": 0.0139,
      "step": 2020300
    },
    {
      "epoch": 3.3062979910056756,
      "grad_norm": 0.29763683676719666,
      "learning_rate": 3.409724372870858e-06,
      "loss": 0.0096,
      "step": 2020320
    },
    {
      "epoch": 3.3063307214443287,
      "grad_norm": 0.20092350244522095,
      "learning_rate": 3.4096584806573407e-06,
      "loss": 0.011,
      "step": 2020340
    },
    {
      "epoch": 3.3063634518829823,
      "grad_norm": 1.3525705337524414,
      "learning_rate": 3.4095925884438242e-06,
      "loss": 0.0138,
      "step": 2020360
    },
    {
      "epoch": 3.3063961823216355,
      "grad_norm": 0.8293859958648682,
      "learning_rate": 3.409526696230307e-06,
      "loss": 0.0147,
      "step": 2020380
    },
    {
      "epoch": 3.306428912760289,
      "grad_norm": 0.1684112697839737,
      "learning_rate": 3.4094608040167897e-06,
      "loss": 0.0119,
      "step": 2020400
    },
    {
      "epoch": 3.306461643198942,
      "grad_norm": 0.12875430285930634,
      "learning_rate": 3.4093949118032725e-06,
      "loss": 0.0116,
      "step": 2020420
    },
    {
      "epoch": 3.3064943736375954,
      "grad_norm": 0.5728153586387634,
      "learning_rate": 3.4093290195897556e-06,
      "loss": 0.0122,
      "step": 2020440
    },
    {
      "epoch": 3.306527104076249,
      "grad_norm": 0.5995612740516663,
      "learning_rate": 3.4092631273762384e-06,
      "loss": 0.0126,
      "step": 2020460
    },
    {
      "epoch": 3.306559834514902,
      "grad_norm": 0.20009808242321014,
      "learning_rate": 3.409197235162721e-06,
      "loss": 0.0128,
      "step": 2020480
    },
    {
      "epoch": 3.3065925649535557,
      "grad_norm": 0.1753462851047516,
      "learning_rate": 3.409131342949204e-06,
      "loss": 0.0126,
      "step": 2020500
    },
    {
      "epoch": 3.306625295392209,
      "grad_norm": 0.709262490272522,
      "learning_rate": 3.409065450735687e-06,
      "loss": 0.0195,
      "step": 2020520
    },
    {
      "epoch": 3.306658025830862,
      "grad_norm": 0.3641602098941803,
      "learning_rate": 3.4089995585221697e-06,
      "loss": 0.0076,
      "step": 2020540
    },
    {
      "epoch": 3.3066907562695156,
      "grad_norm": 0.07355806976556778,
      "learning_rate": 3.4089336663086525e-06,
      "loss": 0.0138,
      "step": 2020560
    },
    {
      "epoch": 3.3067234867081687,
      "grad_norm": 0.07203302532434464,
      "learning_rate": 3.4088677740951352e-06,
      "loss": 0.0147,
      "step": 2020580
    },
    {
      "epoch": 3.3067562171468223,
      "grad_norm": 0.2940073609352112,
      "learning_rate": 3.4088018818816184e-06,
      "loss": 0.0131,
      "step": 2020600
    },
    {
      "epoch": 3.3067889475854755,
      "grad_norm": 0.9537051916122437,
      "learning_rate": 3.408735989668101e-06,
      "loss": 0.0144,
      "step": 2020620
    },
    {
      "epoch": 3.3068216780241286,
      "grad_norm": 0.11252105981111526,
      "learning_rate": 3.408670097454584e-06,
      "loss": 0.0098,
      "step": 2020640
    },
    {
      "epoch": 3.3068544084627822,
      "grad_norm": 0.5344692468643188,
      "learning_rate": 3.4086042052410666e-06,
      "loss": 0.0126,
      "step": 2020660
    },
    {
      "epoch": 3.3068871389014354,
      "grad_norm": 0.12800262868404388,
      "learning_rate": 3.4085383130275498e-06,
      "loss": 0.009,
      "step": 2020680
    },
    {
      "epoch": 3.306919869340089,
      "grad_norm": 0.4174346327781677,
      "learning_rate": 3.408472420814033e-06,
      "loss": 0.0108,
      "step": 2020700
    },
    {
      "epoch": 3.306952599778742,
      "grad_norm": 0.4090375304222107,
      "learning_rate": 3.4084065286005157e-06,
      "loss": 0.008,
      "step": 2020720
    },
    {
      "epoch": 3.3069853302173957,
      "grad_norm": 0.24389570951461792,
      "learning_rate": 3.408340636386999e-06,
      "loss": 0.01,
      "step": 2020740
    },
    {
      "epoch": 3.307018060656049,
      "grad_norm": 0.19249817728996277,
      "learning_rate": 3.4082747441734816e-06,
      "loss": 0.0097,
      "step": 2020760
    },
    {
      "epoch": 3.307050791094702,
      "grad_norm": 0.2220989614725113,
      "learning_rate": 3.4082088519599643e-06,
      "loss": 0.0134,
      "step": 2020780
    },
    {
      "epoch": 3.3070835215333556,
      "grad_norm": 0.3048487603664398,
      "learning_rate": 3.408142959746447e-06,
      "loss": 0.0116,
      "step": 2020800
    },
    {
      "epoch": 3.3071162519720088,
      "grad_norm": 0.05701690539717674,
      "learning_rate": 3.40807706753293e-06,
      "loss": 0.0065,
      "step": 2020820
    },
    {
      "epoch": 3.3071489824106624,
      "grad_norm": 0.11593406647443771,
      "learning_rate": 3.408011175319413e-06,
      "loss": 0.0078,
      "step": 2020840
    },
    {
      "epoch": 3.3071817128493155,
      "grad_norm": 0.5087106227874756,
      "learning_rate": 3.4079452831058957e-06,
      "loss": 0.0147,
      "step": 2020860
    },
    {
      "epoch": 3.307214443287969,
      "grad_norm": 0.21860052645206451,
      "learning_rate": 3.4078793908923784e-06,
      "loss": 0.0114,
      "step": 2020880
    },
    {
      "epoch": 3.3072471737266222,
      "grad_norm": 0.5219382643699646,
      "learning_rate": 3.407813498678861e-06,
      "loss": 0.0151,
      "step": 2020900
    },
    {
      "epoch": 3.3072799041652754,
      "grad_norm": 0.21664917469024658,
      "learning_rate": 3.4077476064653443e-06,
      "loss": 0.0146,
      "step": 2020920
    },
    {
      "epoch": 3.307312634603929,
      "grad_norm": 0.1895020604133606,
      "learning_rate": 3.407681714251827e-06,
      "loss": 0.01,
      "step": 2020940
    },
    {
      "epoch": 3.307345365042582,
      "grad_norm": 0.6119685769081116,
      "learning_rate": 3.40761582203831e-06,
      "loss": 0.0121,
      "step": 2020960
    },
    {
      "epoch": 3.3073780954812357,
      "grad_norm": 0.10018295794725418,
      "learning_rate": 3.4075499298247926e-06,
      "loss": 0.0152,
      "step": 2020980
    },
    {
      "epoch": 3.307410825919889,
      "grad_norm": 0.0709674283862114,
      "learning_rate": 3.4074840376112757e-06,
      "loss": 0.0165,
      "step": 2021000
    },
    {
      "epoch": 3.3074435563585425,
      "grad_norm": 0.3074217140674591,
      "learning_rate": 3.4074181453977585e-06,
      "loss": 0.0071,
      "step": 2021020
    },
    {
      "epoch": 3.3074762867971956,
      "grad_norm": 0.12954826653003693,
      "learning_rate": 3.407352253184241e-06,
      "loss": 0.0121,
      "step": 2021040
    },
    {
      "epoch": 3.307509017235849,
      "grad_norm": 0.34862014651298523,
      "learning_rate": 3.407286360970725e-06,
      "loss": 0.0155,
      "step": 2021060
    },
    {
      "epoch": 3.3075417476745024,
      "grad_norm": 0.2813655436038971,
      "learning_rate": 3.4072204687572075e-06,
      "loss": 0.01,
      "step": 2021080
    },
    {
      "epoch": 3.3075744781131555,
      "grad_norm": 0.13324172794818878,
      "learning_rate": 3.4071545765436903e-06,
      "loss": 0.0127,
      "step": 2021100
    },
    {
      "epoch": 3.307607208551809,
      "grad_norm": 0.23311111330986023,
      "learning_rate": 3.407088684330173e-06,
      "loss": 0.0104,
      "step": 2021120
    },
    {
      "epoch": 3.3076399389904623,
      "grad_norm": 0.8331181406974792,
      "learning_rate": 3.407022792116656e-06,
      "loss": 0.0078,
      "step": 2021140
    },
    {
      "epoch": 3.307672669429116,
      "grad_norm": 0.1707153022289276,
      "learning_rate": 3.406956899903139e-06,
      "loss": 0.0144,
      "step": 2021160
    },
    {
      "epoch": 3.307705399867769,
      "grad_norm": 0.0672706738114357,
      "learning_rate": 3.4068910076896217e-06,
      "loss": 0.0098,
      "step": 2021180
    },
    {
      "epoch": 3.307738130306422,
      "grad_norm": 0.36556994915008545,
      "learning_rate": 3.4068251154761044e-06,
      "loss": 0.0111,
      "step": 2021200
    },
    {
      "epoch": 3.3077708607450758,
      "grad_norm": 0.15558715164661407,
      "learning_rate": 3.4067592232625876e-06,
      "loss": 0.0093,
      "step": 2021220
    },
    {
      "epoch": 3.307803591183729,
      "grad_norm": 0.7339414358139038,
      "learning_rate": 3.4066933310490703e-06,
      "loss": 0.0131,
      "step": 2021240
    },
    {
      "epoch": 3.3078363216223825,
      "grad_norm": 0.3443160057067871,
      "learning_rate": 3.406627438835553e-06,
      "loss": 0.0121,
      "step": 2021260
    },
    {
      "epoch": 3.3078690520610357,
      "grad_norm": 0.10857110470533371,
      "learning_rate": 3.4065615466220358e-06,
      "loss": 0.0093,
      "step": 2021280
    },
    {
      "epoch": 3.3079017824996892,
      "grad_norm": 0.07515846192836761,
      "learning_rate": 3.4064956544085185e-06,
      "loss": 0.0136,
      "step": 2021300
    },
    {
      "epoch": 3.3079345129383424,
      "grad_norm": 0.5100229382514954,
      "learning_rate": 3.4064297621950017e-06,
      "loss": 0.0138,
      "step": 2021320
    },
    {
      "epoch": 3.3079672433769955,
      "grad_norm": 0.19744084775447845,
      "learning_rate": 3.4063638699814844e-06,
      "loss": 0.0131,
      "step": 2021340
    },
    {
      "epoch": 3.307999973815649,
      "grad_norm": 0.5520387291908264,
      "learning_rate": 3.406297977767967e-06,
      "loss": 0.0137,
      "step": 2021360
    },
    {
      "epoch": 3.3080327042543023,
      "grad_norm": 0.4813541769981384,
      "learning_rate": 3.40623208555445e-06,
      "loss": 0.0064,
      "step": 2021380
    },
    {
      "epoch": 3.308065434692956,
      "grad_norm": 0.18079771101474762,
      "learning_rate": 3.4061661933409335e-06,
      "loss": 0.0097,
      "step": 2021400
    },
    {
      "epoch": 3.308098165131609,
      "grad_norm": 0.19270581007003784,
      "learning_rate": 3.4061003011274162e-06,
      "loss": 0.0089,
      "step": 2021420
    },
    {
      "epoch": 3.3081308955702626,
      "grad_norm": 0.16518788039684296,
      "learning_rate": 3.406034408913899e-06,
      "loss": 0.0057,
      "step": 2021440
    },
    {
      "epoch": 3.308163626008916,
      "grad_norm": 0.2911904454231262,
      "learning_rate": 3.405968516700382e-06,
      "loss": 0.0121,
      "step": 2021460
    },
    {
      "epoch": 3.308196356447569,
      "grad_norm": 0.502622127532959,
      "learning_rate": 3.405902624486865e-06,
      "loss": 0.0103,
      "step": 2021480
    },
    {
      "epoch": 3.3082290868862225,
      "grad_norm": 0.49872204661369324,
      "learning_rate": 3.4058367322733476e-06,
      "loss": 0.0142,
      "step": 2021500
    },
    {
      "epoch": 3.3082618173248757,
      "grad_norm": 0.1715492606163025,
      "learning_rate": 3.4057708400598303e-06,
      "loss": 0.014,
      "step": 2021520
    },
    {
      "epoch": 3.3082945477635293,
      "grad_norm": 0.23574039340019226,
      "learning_rate": 3.4057049478463135e-06,
      "loss": 0.0113,
      "step": 2021540
    },
    {
      "epoch": 3.3083272782021824,
      "grad_norm": 0.4439055621623993,
      "learning_rate": 3.4056390556327962e-06,
      "loss": 0.0103,
      "step": 2021560
    },
    {
      "epoch": 3.308360008640836,
      "grad_norm": 0.3283960819244385,
      "learning_rate": 3.405573163419279e-06,
      "loss": 0.011,
      "step": 2021580
    },
    {
      "epoch": 3.308392739079489,
      "grad_norm": 0.190672367811203,
      "learning_rate": 3.4055072712057617e-06,
      "loss": 0.0072,
      "step": 2021600
    },
    {
      "epoch": 3.3084254695181423,
      "grad_norm": 0.24160535633563995,
      "learning_rate": 3.405441378992245e-06,
      "loss": 0.0091,
      "step": 2021620
    },
    {
      "epoch": 3.308458199956796,
      "grad_norm": 0.5862231850624084,
      "learning_rate": 3.4053754867787276e-06,
      "loss": 0.0194,
      "step": 2021640
    },
    {
      "epoch": 3.308490930395449,
      "grad_norm": 0.186013862490654,
      "learning_rate": 3.4053095945652104e-06,
      "loss": 0.0089,
      "step": 2021660
    },
    {
      "epoch": 3.3085236608341027,
      "grad_norm": 0.4164552688598633,
      "learning_rate": 3.405243702351693e-06,
      "loss": 0.0071,
      "step": 2021680
    },
    {
      "epoch": 3.308556391272756,
      "grad_norm": 0.6374678015708923,
      "learning_rate": 3.405177810138176e-06,
      "loss": 0.0124,
      "step": 2021700
    },
    {
      "epoch": 3.3085891217114094,
      "grad_norm": 0.2160930037498474,
      "learning_rate": 3.405111917924659e-06,
      "loss": 0.0127,
      "step": 2021720
    },
    {
      "epoch": 3.3086218521500625,
      "grad_norm": 0.06613413244485855,
      "learning_rate": 3.4050460257111418e-06,
      "loss": 0.0094,
      "step": 2021740
    },
    {
      "epoch": 3.3086545825887157,
      "grad_norm": 0.3523952066898346,
      "learning_rate": 3.4049801334976253e-06,
      "loss": 0.0124,
      "step": 2021760
    },
    {
      "epoch": 3.3086873130273693,
      "grad_norm": 0.43318307399749756,
      "learning_rate": 3.404914241284108e-06,
      "loss": 0.0087,
      "step": 2021780
    },
    {
      "epoch": 3.3087200434660224,
      "grad_norm": 0.28112977743148804,
      "learning_rate": 3.404848349070591e-06,
      "loss": 0.0112,
      "step": 2021800
    },
    {
      "epoch": 3.308752773904676,
      "grad_norm": 0.14469414949417114,
      "learning_rate": 3.4047824568570736e-06,
      "loss": 0.0144,
      "step": 2021820
    },
    {
      "epoch": 3.308785504343329,
      "grad_norm": 0.5589229464530945,
      "learning_rate": 3.4047165646435563e-06,
      "loss": 0.0091,
      "step": 2021840
    },
    {
      "epoch": 3.308818234781983,
      "grad_norm": 0.34285372495651245,
      "learning_rate": 3.4046506724300395e-06,
      "loss": 0.0111,
      "step": 2021860
    },
    {
      "epoch": 3.308850965220636,
      "grad_norm": 0.3085438311100006,
      "learning_rate": 3.404584780216522e-06,
      "loss": 0.0083,
      "step": 2021880
    },
    {
      "epoch": 3.308883695659289,
      "grad_norm": 0.13172374665737152,
      "learning_rate": 3.404518888003005e-06,
      "loss": 0.0095,
      "step": 2021900
    },
    {
      "epoch": 3.3089164260979427,
      "grad_norm": 0.3724743723869324,
      "learning_rate": 3.4044529957894877e-06,
      "loss": 0.0101,
      "step": 2021920
    },
    {
      "epoch": 3.308949156536596,
      "grad_norm": 0.32173389196395874,
      "learning_rate": 3.404387103575971e-06,
      "loss": 0.017,
      "step": 2021940
    },
    {
      "epoch": 3.3089818869752494,
      "grad_norm": 0.8986859321594238,
      "learning_rate": 3.4043212113624536e-06,
      "loss": 0.0099,
      "step": 2021960
    },
    {
      "epoch": 3.3090146174139026,
      "grad_norm": 0.12651845812797546,
      "learning_rate": 3.4042553191489363e-06,
      "loss": 0.017,
      "step": 2021980
    },
    {
      "epoch": 3.309047347852556,
      "grad_norm": 0.08701086044311523,
      "learning_rate": 3.404189426935419e-06,
      "loss": 0.0095,
      "step": 2022000
    },
    {
      "epoch": 3.3090800782912093,
      "grad_norm": 0.9246694445610046,
      "learning_rate": 3.4041235347219022e-06,
      "loss": 0.0125,
      "step": 2022020
    },
    {
      "epoch": 3.3091128087298625,
      "grad_norm": 0.21719905734062195,
      "learning_rate": 3.404057642508385e-06,
      "loss": 0.0102,
      "step": 2022040
    },
    {
      "epoch": 3.309145539168516,
      "grad_norm": 0.12721221148967743,
      "learning_rate": 3.4039917502948677e-06,
      "loss": 0.0099,
      "step": 2022060
    },
    {
      "epoch": 3.309178269607169,
      "grad_norm": 0.24083924293518066,
      "learning_rate": 3.4039258580813504e-06,
      "loss": 0.0117,
      "step": 2022080
    },
    {
      "epoch": 3.309211000045823,
      "grad_norm": 0.12372780591249466,
      "learning_rate": 3.4038599658678336e-06,
      "loss": 0.0115,
      "step": 2022100
    },
    {
      "epoch": 3.309243730484476,
      "grad_norm": 0.3849349319934845,
      "learning_rate": 3.4037940736543168e-06,
      "loss": 0.0123,
      "step": 2022120
    },
    {
      "epoch": 3.309276460923129,
      "grad_norm": 0.18160273134708405,
      "learning_rate": 3.4037281814407995e-06,
      "loss": 0.0128,
      "step": 2022140
    },
    {
      "epoch": 3.3093091913617827,
      "grad_norm": 0.09619414061307907,
      "learning_rate": 3.4036622892272827e-06,
      "loss": 0.0108,
      "step": 2022160
    },
    {
      "epoch": 3.309341921800436,
      "grad_norm": 0.39452385902404785,
      "learning_rate": 3.4035963970137654e-06,
      "loss": 0.0103,
      "step": 2022180
    },
    {
      "epoch": 3.3093746522390894,
      "grad_norm": 0.20706792175769806,
      "learning_rate": 3.403530504800248e-06,
      "loss": 0.0134,
      "step": 2022200
    },
    {
      "epoch": 3.3094073826777426,
      "grad_norm": 0.1346873790025711,
      "learning_rate": 3.403464612586731e-06,
      "loss": 0.0135,
      "step": 2022220
    },
    {
      "epoch": 3.3094401131163957,
      "grad_norm": 0.12434064596891403,
      "learning_rate": 3.4033987203732136e-06,
      "loss": 0.0112,
      "step": 2022240
    },
    {
      "epoch": 3.3094728435550493,
      "grad_norm": 0.25902798771858215,
      "learning_rate": 3.403332828159697e-06,
      "loss": 0.0139,
      "step": 2022260
    },
    {
      "epoch": 3.3095055739937025,
      "grad_norm": 0.5304631590843201,
      "learning_rate": 3.4032669359461795e-06,
      "loss": 0.0139,
      "step": 2022280
    },
    {
      "epoch": 3.309538304432356,
      "grad_norm": 0.0610911063849926,
      "learning_rate": 3.4032010437326623e-06,
      "loss": 0.0119,
      "step": 2022300
    },
    {
      "epoch": 3.3095710348710092,
      "grad_norm": 0.41060173511505127,
      "learning_rate": 3.403135151519145e-06,
      "loss": 0.0093,
      "step": 2022320
    },
    {
      "epoch": 3.309603765309663,
      "grad_norm": 0.3230624198913574,
      "learning_rate": 3.403069259305628e-06,
      "loss": 0.0159,
      "step": 2022340
    },
    {
      "epoch": 3.309636495748316,
      "grad_norm": 0.10332739353179932,
      "learning_rate": 3.403003367092111e-06,
      "loss": 0.0108,
      "step": 2022360
    },
    {
      "epoch": 3.309669226186969,
      "grad_norm": 0.16538125276565552,
      "learning_rate": 3.4029374748785937e-06,
      "loss": 0.0121,
      "step": 2022380
    },
    {
      "epoch": 3.3097019566256227,
      "grad_norm": 0.13996407389640808,
      "learning_rate": 3.4028715826650764e-06,
      "loss": 0.0111,
      "step": 2022400
    },
    {
      "epoch": 3.309734687064276,
      "grad_norm": 0.25842753052711487,
      "learning_rate": 3.4028056904515596e-06,
      "loss": 0.0085,
      "step": 2022420
    },
    {
      "epoch": 3.3097674175029295,
      "grad_norm": 0.40445151925086975,
      "learning_rate": 3.4027397982380423e-06,
      "loss": 0.0116,
      "step": 2022440
    },
    {
      "epoch": 3.3098001479415826,
      "grad_norm": 0.3696042597293854,
      "learning_rate": 3.4026739060245255e-06,
      "loss": 0.0115,
      "step": 2022460
    },
    {
      "epoch": 3.309832878380236,
      "grad_norm": 0.23449130356311798,
      "learning_rate": 3.4026080138110086e-06,
      "loss": 0.0116,
      "step": 2022480
    },
    {
      "epoch": 3.3098656088188894,
      "grad_norm": 0.2407362014055252,
      "learning_rate": 3.4025421215974914e-06,
      "loss": 0.0103,
      "step": 2022500
    },
    {
      "epoch": 3.3098983392575425,
      "grad_norm": 0.09410767257213593,
      "learning_rate": 3.402476229383974e-06,
      "loss": 0.0135,
      "step": 2022520
    },
    {
      "epoch": 3.309931069696196,
      "grad_norm": 0.2748545706272125,
      "learning_rate": 3.402410337170457e-06,
      "loss": 0.0098,
      "step": 2022540
    },
    {
      "epoch": 3.3099638001348493,
      "grad_norm": 0.4472222924232483,
      "learning_rate": 3.40234444495694e-06,
      "loss": 0.0178,
      "step": 2022560
    },
    {
      "epoch": 3.309996530573503,
      "grad_norm": 0.5695601105690002,
      "learning_rate": 3.4022785527434228e-06,
      "loss": 0.0098,
      "step": 2022580
    },
    {
      "epoch": 3.310029261012156,
      "grad_norm": 0.11104883998632431,
      "learning_rate": 3.4022126605299055e-06,
      "loss": 0.0075,
      "step": 2022600
    },
    {
      "epoch": 3.3100619914508096,
      "grad_norm": 0.4192962646484375,
      "learning_rate": 3.4021467683163882e-06,
      "loss": 0.0164,
      "step": 2022620
    },
    {
      "epoch": 3.3100947218894627,
      "grad_norm": 0.24733228981494904,
      "learning_rate": 3.4020808761028714e-06,
      "loss": 0.0093,
      "step": 2022640
    },
    {
      "epoch": 3.310127452328116,
      "grad_norm": 0.24518491327762604,
      "learning_rate": 3.402014983889354e-06,
      "loss": 0.0095,
      "step": 2022660
    },
    {
      "epoch": 3.3101601827667695,
      "grad_norm": 0.07271750271320343,
      "learning_rate": 3.401949091675837e-06,
      "loss": 0.008,
      "step": 2022680
    },
    {
      "epoch": 3.3101929132054226,
      "grad_norm": 0.3982623815536499,
      "learning_rate": 3.4018831994623196e-06,
      "loss": 0.0116,
      "step": 2022700
    },
    {
      "epoch": 3.3102256436440762,
      "grad_norm": 0.30255478620529175,
      "learning_rate": 3.4018173072488024e-06,
      "loss": 0.0101,
      "step": 2022720
    },
    {
      "epoch": 3.3102583740827294,
      "grad_norm": 0.10687601566314697,
      "learning_rate": 3.4017514150352855e-06,
      "loss": 0.0116,
      "step": 2022740
    },
    {
      "epoch": 3.310291104521383,
      "grad_norm": 0.13077451288700104,
      "learning_rate": 3.4016855228217683e-06,
      "loss": 0.0089,
      "step": 2022760
    },
    {
      "epoch": 3.310323834960036,
      "grad_norm": 0.27863121032714844,
      "learning_rate": 3.401619630608251e-06,
      "loss": 0.008,
      "step": 2022780
    },
    {
      "epoch": 3.3103565653986893,
      "grad_norm": 0.18052661418914795,
      "learning_rate": 3.4015537383947337e-06,
      "loss": 0.0072,
      "step": 2022800
    },
    {
      "epoch": 3.310389295837343,
      "grad_norm": 0.5118232369422913,
      "learning_rate": 3.4014878461812173e-06,
      "loss": 0.0137,
      "step": 2022820
    },
    {
      "epoch": 3.310422026275996,
      "grad_norm": 0.22789528965950012,
      "learning_rate": 3.4014219539677e-06,
      "loss": 0.0121,
      "step": 2022840
    },
    {
      "epoch": 3.3104547567146496,
      "grad_norm": 0.30804145336151123,
      "learning_rate": 3.401356061754183e-06,
      "loss": 0.0082,
      "step": 2022860
    },
    {
      "epoch": 3.3104874871533028,
      "grad_norm": 0.3053741157054901,
      "learning_rate": 3.401290169540666e-06,
      "loss": 0.0093,
      "step": 2022880
    },
    {
      "epoch": 3.3105202175919564,
      "grad_norm": 1.0997169017791748,
      "learning_rate": 3.4012242773271487e-06,
      "loss": 0.0144,
      "step": 2022900
    },
    {
      "epoch": 3.3105529480306095,
      "grad_norm": 0.1359858512878418,
      "learning_rate": 3.4011583851136314e-06,
      "loss": 0.0104,
      "step": 2022920
    },
    {
      "epoch": 3.3105856784692627,
      "grad_norm": 0.6685926914215088,
      "learning_rate": 3.401092492900114e-06,
      "loss": 0.0143,
      "step": 2022940
    },
    {
      "epoch": 3.3106184089079163,
      "grad_norm": 0.20946693420410156,
      "learning_rate": 3.4010266006865973e-06,
      "loss": 0.0092,
      "step": 2022960
    },
    {
      "epoch": 3.3106511393465694,
      "grad_norm": 0.1648089438676834,
      "learning_rate": 3.40096070847308e-06,
      "loss": 0.0141,
      "step": 2022980
    },
    {
      "epoch": 3.310683869785223,
      "grad_norm": 0.3539588749408722,
      "learning_rate": 3.400894816259563e-06,
      "loss": 0.0113,
      "step": 2023000
    },
    {
      "epoch": 3.310716600223876,
      "grad_norm": 0.09530263394117355,
      "learning_rate": 3.4008289240460456e-06,
      "loss": 0.0103,
      "step": 2023020
    },
    {
      "epoch": 3.3107493306625297,
      "grad_norm": 0.5086345076560974,
      "learning_rate": 3.4007630318325287e-06,
      "loss": 0.0143,
      "step": 2023040
    },
    {
      "epoch": 3.310782061101183,
      "grad_norm": 0.08365029841661453,
      "learning_rate": 3.4006971396190115e-06,
      "loss": 0.0078,
      "step": 2023060
    },
    {
      "epoch": 3.310814791539836,
      "grad_norm": 0.20782049000263214,
      "learning_rate": 3.400631247405494e-06,
      "loss": 0.0116,
      "step": 2023080
    },
    {
      "epoch": 3.3108475219784896,
      "grad_norm": 0.14563095569610596,
      "learning_rate": 3.400565355191977e-06,
      "loss": 0.01,
      "step": 2023100
    },
    {
      "epoch": 3.310880252417143,
      "grad_norm": 0.2733771800994873,
      "learning_rate": 3.4004994629784597e-06,
      "loss": 0.019,
      "step": 2023120
    },
    {
      "epoch": 3.3109129828557964,
      "grad_norm": 0.3414951264858246,
      "learning_rate": 3.400433570764943e-06,
      "loss": 0.0074,
      "step": 2023140
    },
    {
      "epoch": 3.3109457132944495,
      "grad_norm": 0.1940997987985611,
      "learning_rate": 3.400367678551426e-06,
      "loss": 0.0099,
      "step": 2023160
    },
    {
      "epoch": 3.310978443733103,
      "grad_norm": 0.24257756769657135,
      "learning_rate": 3.400301786337909e-06,
      "loss": 0.0151,
      "step": 2023180
    },
    {
      "epoch": 3.3110111741717563,
      "grad_norm": 0.4514417350292206,
      "learning_rate": 3.400235894124392e-06,
      "loss": 0.0179,
      "step": 2023200
    },
    {
      "epoch": 3.3110439046104094,
      "grad_norm": 0.08229831606149673,
      "learning_rate": 3.4001700019108747e-06,
      "loss": 0.0079,
      "step": 2023220
    },
    {
      "epoch": 3.311076635049063,
      "grad_norm": 0.22226741909980774,
      "learning_rate": 3.4001041096973574e-06,
      "loss": 0.011,
      "step": 2023240
    },
    {
      "epoch": 3.311109365487716,
      "grad_norm": 0.34612441062927246,
      "learning_rate": 3.40003821748384e-06,
      "loss": 0.0114,
      "step": 2023260
    },
    {
      "epoch": 3.3111420959263698,
      "grad_norm": 0.11440357565879822,
      "learning_rate": 3.3999723252703233e-06,
      "loss": 0.0126,
      "step": 2023280
    },
    {
      "epoch": 3.311174826365023,
      "grad_norm": 0.20650263130664825,
      "learning_rate": 3.399906433056806e-06,
      "loss": 0.0115,
      "step": 2023300
    },
    {
      "epoch": 3.3112075568036765,
      "grad_norm": 0.5882431268692017,
      "learning_rate": 3.3998405408432888e-06,
      "loss": 0.0091,
      "step": 2023320
    },
    {
      "epoch": 3.3112402872423297,
      "grad_norm": 0.5389344692230225,
      "learning_rate": 3.3997746486297715e-06,
      "loss": 0.0127,
      "step": 2023340
    },
    {
      "epoch": 3.311273017680983,
      "grad_norm": 0.314434289932251,
      "learning_rate": 3.3997087564162547e-06,
      "loss": 0.0105,
      "step": 2023360
    },
    {
      "epoch": 3.3113057481196364,
      "grad_norm": 0.3775494396686554,
      "learning_rate": 3.3996428642027374e-06,
      "loss": 0.0085,
      "step": 2023380
    },
    {
      "epoch": 3.3113384785582896,
      "grad_norm": 0.3543933928012848,
      "learning_rate": 3.39957697198922e-06,
      "loss": 0.01,
      "step": 2023400
    },
    {
      "epoch": 3.311371208996943,
      "grad_norm": 0.18740423023700714,
      "learning_rate": 3.399511079775703e-06,
      "loss": 0.0099,
      "step": 2023420
    },
    {
      "epoch": 3.3114039394355963,
      "grad_norm": 0.37678197026252747,
      "learning_rate": 3.399445187562186e-06,
      "loss": 0.0094,
      "step": 2023440
    },
    {
      "epoch": 3.31143666987425,
      "grad_norm": 0.9022481441497803,
      "learning_rate": 3.399379295348669e-06,
      "loss": 0.0097,
      "step": 2023460
    },
    {
      "epoch": 3.311469400312903,
      "grad_norm": 0.3968425691127777,
      "learning_rate": 3.3993134031351515e-06,
      "loss": 0.0131,
      "step": 2023480
    },
    {
      "epoch": 3.311502130751556,
      "grad_norm": 0.14675232768058777,
      "learning_rate": 3.3992475109216343e-06,
      "loss": 0.0135,
      "step": 2023500
    },
    {
      "epoch": 3.31153486119021,
      "grad_norm": 0.10536488145589828,
      "learning_rate": 3.399181618708118e-06,
      "loss": 0.0108,
      "step": 2023520
    },
    {
      "epoch": 3.311567591628863,
      "grad_norm": 0.11422473937273026,
      "learning_rate": 3.3991157264946006e-06,
      "loss": 0.0111,
      "step": 2023540
    },
    {
      "epoch": 3.3116003220675165,
      "grad_norm": 0.17611776292324066,
      "learning_rate": 3.3990498342810834e-06,
      "loss": 0.0074,
      "step": 2023560
    },
    {
      "epoch": 3.3116330525061697,
      "grad_norm": 0.08982323855161667,
      "learning_rate": 3.3989839420675665e-06,
      "loss": 0.0063,
      "step": 2023580
    },
    {
      "epoch": 3.311665782944823,
      "grad_norm": 0.2476840317249298,
      "learning_rate": 3.3989180498540493e-06,
      "loss": 0.0117,
      "step": 2023600
    },
    {
      "epoch": 3.3116985133834764,
      "grad_norm": 0.19561421871185303,
      "learning_rate": 3.398852157640532e-06,
      "loss": 0.0089,
      "step": 2023620
    },
    {
      "epoch": 3.3117312438221296,
      "grad_norm": 0.2200784832239151,
      "learning_rate": 3.3987862654270147e-06,
      "loss": 0.0109,
      "step": 2023640
    },
    {
      "epoch": 3.311763974260783,
      "grad_norm": 0.21609775722026825,
      "learning_rate": 3.3987203732134975e-06,
      "loss": 0.0124,
      "step": 2023660
    },
    {
      "epoch": 3.3117967046994363,
      "grad_norm": 0.291166216135025,
      "learning_rate": 3.3986544809999806e-06,
      "loss": 0.01,
      "step": 2023680
    },
    {
      "epoch": 3.3118294351380895,
      "grad_norm": 0.4704645872116089,
      "learning_rate": 3.3985885887864634e-06,
      "loss": 0.0095,
      "step": 2023700
    },
    {
      "epoch": 3.311862165576743,
      "grad_norm": 0.13294556736946106,
      "learning_rate": 3.398522696572946e-06,
      "loss": 0.0157,
      "step": 2023720
    },
    {
      "epoch": 3.311894896015396,
      "grad_norm": 0.18642225861549377,
      "learning_rate": 3.398456804359429e-06,
      "loss": 0.0168,
      "step": 2023740
    },
    {
      "epoch": 3.31192762645405,
      "grad_norm": 0.4857216775417328,
      "learning_rate": 3.398390912145912e-06,
      "loss": 0.0132,
      "step": 2023760
    },
    {
      "epoch": 3.311960356892703,
      "grad_norm": 0.17356719076633453,
      "learning_rate": 3.3983250199323948e-06,
      "loss": 0.0145,
      "step": 2023780
    },
    {
      "epoch": 3.3119930873313566,
      "grad_norm": 0.4331912100315094,
      "learning_rate": 3.3982591277188775e-06,
      "loss": 0.0128,
      "step": 2023800
    },
    {
      "epoch": 3.3120258177700097,
      "grad_norm": 0.12977057695388794,
      "learning_rate": 3.3981932355053602e-06,
      "loss": 0.0105,
      "step": 2023820
    },
    {
      "epoch": 3.312058548208663,
      "grad_norm": 0.14788973331451416,
      "learning_rate": 3.3981273432918434e-06,
      "loss": 0.0117,
      "step": 2023840
    },
    {
      "epoch": 3.3120912786473165,
      "grad_norm": 0.24891814589500427,
      "learning_rate": 3.398061451078326e-06,
      "loss": 0.0121,
      "step": 2023860
    },
    {
      "epoch": 3.3121240090859696,
      "grad_norm": 0.48031044006347656,
      "learning_rate": 3.3979955588648093e-06,
      "loss": 0.0091,
      "step": 2023880
    },
    {
      "epoch": 3.312156739524623,
      "grad_norm": 0.41170966625213623,
      "learning_rate": 3.3979296666512925e-06,
      "loss": 0.0114,
      "step": 2023900
    },
    {
      "epoch": 3.3121894699632763,
      "grad_norm": 0.23701214790344238,
      "learning_rate": 3.397863774437775e-06,
      "loss": 0.0129,
      "step": 2023920
    },
    {
      "epoch": 3.31222220040193,
      "grad_norm": 0.22647453844547272,
      "learning_rate": 3.397797882224258e-06,
      "loss": 0.0094,
      "step": 2023940
    },
    {
      "epoch": 3.312254930840583,
      "grad_norm": 0.11776280403137207,
      "learning_rate": 3.3977319900107407e-06,
      "loss": 0.0061,
      "step": 2023960
    },
    {
      "epoch": 3.3122876612792362,
      "grad_norm": 0.2880323529243469,
      "learning_rate": 3.397666097797224e-06,
      "loss": 0.0126,
      "step": 2023980
    },
    {
      "epoch": 3.31232039171789,
      "grad_norm": 0.26834043860435486,
      "learning_rate": 3.3976002055837066e-06,
      "loss": 0.0165,
      "step": 2024000
    },
    {
      "epoch": 3.312353122156543,
      "grad_norm": 0.2630773186683655,
      "learning_rate": 3.3975343133701893e-06,
      "loss": 0.0083,
      "step": 2024020
    },
    {
      "epoch": 3.3123858525951966,
      "grad_norm": 0.4789884388446808,
      "learning_rate": 3.397468421156672e-06,
      "loss": 0.0113,
      "step": 2024040
    },
    {
      "epoch": 3.3124185830338497,
      "grad_norm": 0.17012153565883636,
      "learning_rate": 3.3974025289431552e-06,
      "loss": 0.011,
      "step": 2024060
    },
    {
      "epoch": 3.3124513134725033,
      "grad_norm": 0.688363254070282,
      "learning_rate": 3.397336636729638e-06,
      "loss": 0.0132,
      "step": 2024080
    },
    {
      "epoch": 3.3124840439111565,
      "grad_norm": 0.11838861554861069,
      "learning_rate": 3.3972707445161207e-06,
      "loss": 0.013,
      "step": 2024100
    },
    {
      "epoch": 3.3125167743498096,
      "grad_norm": 0.17229889333248138,
      "learning_rate": 3.3972048523026035e-06,
      "loss": 0.0113,
      "step": 2024120
    },
    {
      "epoch": 3.312549504788463,
      "grad_norm": 0.342847615480423,
      "learning_rate": 3.397138960089086e-06,
      "loss": 0.0104,
      "step": 2024140
    },
    {
      "epoch": 3.3125822352271164,
      "grad_norm": 0.2833121120929718,
      "learning_rate": 3.3970730678755694e-06,
      "loss": 0.0125,
      "step": 2024160
    },
    {
      "epoch": 3.31261496566577,
      "grad_norm": 0.7264397740364075,
      "learning_rate": 3.397007175662052e-06,
      "loss": 0.0152,
      "step": 2024180
    },
    {
      "epoch": 3.312647696104423,
      "grad_norm": 0.1447116583585739,
      "learning_rate": 3.396941283448535e-06,
      "loss": 0.01,
      "step": 2024200
    },
    {
      "epoch": 3.3126804265430767,
      "grad_norm": 0.1318226158618927,
      "learning_rate": 3.3968753912350184e-06,
      "loss": 0.0124,
      "step": 2024220
    },
    {
      "epoch": 3.31271315698173,
      "grad_norm": 0.1399202048778534,
      "learning_rate": 3.396809499021501e-06,
      "loss": 0.0144,
      "step": 2024240
    },
    {
      "epoch": 3.312745887420383,
      "grad_norm": 0.34160691499710083,
      "learning_rate": 3.396743606807984e-06,
      "loss": 0.0129,
      "step": 2024260
    },
    {
      "epoch": 3.3127786178590366,
      "grad_norm": 0.16412276029586792,
      "learning_rate": 3.3966777145944666e-06,
      "loss": 0.0145,
      "step": 2024280
    },
    {
      "epoch": 3.3128113482976898,
      "grad_norm": 0.2437785416841507,
      "learning_rate": 3.39661182238095e-06,
      "loss": 0.0116,
      "step": 2024300
    },
    {
      "epoch": 3.3128440787363433,
      "grad_norm": 0.6749684810638428,
      "learning_rate": 3.3965459301674325e-06,
      "loss": 0.0117,
      "step": 2024320
    },
    {
      "epoch": 3.3128768091749965,
      "grad_norm": 0.20239505171775818,
      "learning_rate": 3.3964800379539153e-06,
      "loss": 0.0095,
      "step": 2024340
    },
    {
      "epoch": 3.31290953961365,
      "grad_norm": 0.4376037120819092,
      "learning_rate": 3.396414145740398e-06,
      "loss": 0.0108,
      "step": 2024360
    },
    {
      "epoch": 3.3129422700523032,
      "grad_norm": 0.32623717188835144,
      "learning_rate": 3.396348253526881e-06,
      "loss": 0.0101,
      "step": 2024380
    },
    {
      "epoch": 3.3129750004909564,
      "grad_norm": 0.21850642561912537,
      "learning_rate": 3.396282361313364e-06,
      "loss": 0.0131,
      "step": 2024400
    },
    {
      "epoch": 3.31300773092961,
      "grad_norm": 0.15271726250648499,
      "learning_rate": 3.3962164690998467e-06,
      "loss": 0.0085,
      "step": 2024420
    },
    {
      "epoch": 3.313040461368263,
      "grad_norm": 0.34454360604286194,
      "learning_rate": 3.3961505768863294e-06,
      "loss": 0.01,
      "step": 2024440
    },
    {
      "epoch": 3.3130731918069167,
      "grad_norm": 0.06289634108543396,
      "learning_rate": 3.3960846846728126e-06,
      "loss": 0.0168,
      "step": 2024460
    },
    {
      "epoch": 3.31310592224557,
      "grad_norm": 0.8259656429290771,
      "learning_rate": 3.3960187924592953e-06,
      "loss": 0.0096,
      "step": 2024480
    },
    {
      "epoch": 3.3131386526842235,
      "grad_norm": 0.3910309672355652,
      "learning_rate": 3.395952900245778e-06,
      "loss": 0.0086,
      "step": 2024500
    },
    {
      "epoch": 3.3131713831228766,
      "grad_norm": 0.2258712202310562,
      "learning_rate": 3.3958870080322608e-06,
      "loss": 0.0095,
      "step": 2024520
    },
    {
      "epoch": 3.3132041135615298,
      "grad_norm": 0.3802133798599243,
      "learning_rate": 3.395821115818744e-06,
      "loss": 0.0122,
      "step": 2024540
    },
    {
      "epoch": 3.3132368440001834,
      "grad_norm": 0.25504592061042786,
      "learning_rate": 3.3957552236052267e-06,
      "loss": 0.0089,
      "step": 2024560
    },
    {
      "epoch": 3.3132695744388365,
      "grad_norm": 0.2781200110912323,
      "learning_rate": 3.39568933139171e-06,
      "loss": 0.0108,
      "step": 2024580
    },
    {
      "epoch": 3.31330230487749,
      "grad_norm": 0.0850929245352745,
      "learning_rate": 3.395623439178193e-06,
      "loss": 0.0094,
      "step": 2024600
    },
    {
      "epoch": 3.3133350353161433,
      "grad_norm": 0.32428160309791565,
      "learning_rate": 3.3955575469646758e-06,
      "loss": 0.0105,
      "step": 2024620
    },
    {
      "epoch": 3.313367765754797,
      "grad_norm": 0.3504560589790344,
      "learning_rate": 3.3954916547511585e-06,
      "loss": 0.0121,
      "step": 2024640
    },
    {
      "epoch": 3.31340049619345,
      "grad_norm": 0.35163721442222595,
      "learning_rate": 3.3954257625376412e-06,
      "loss": 0.016,
      "step": 2024660
    },
    {
      "epoch": 3.313433226632103,
      "grad_norm": 0.1582077592611313,
      "learning_rate": 3.395359870324124e-06,
      "loss": 0.0111,
      "step": 2024680
    },
    {
      "epoch": 3.3134659570707568,
      "grad_norm": 0.15983891487121582,
      "learning_rate": 3.395293978110607e-06,
      "loss": 0.0124,
      "step": 2024700
    },
    {
      "epoch": 3.31349868750941,
      "grad_norm": 0.3023891746997833,
      "learning_rate": 3.39522808589709e-06,
      "loss": 0.0081,
      "step": 2024720
    },
    {
      "epoch": 3.3135314179480635,
      "grad_norm": 0.551032304763794,
      "learning_rate": 3.3951621936835726e-06,
      "loss": 0.0117,
      "step": 2024740
    },
    {
      "epoch": 3.3135641483867166,
      "grad_norm": 0.429226815700531,
      "learning_rate": 3.3950963014700554e-06,
      "loss": 0.0151,
      "step": 2024760
    },
    {
      "epoch": 3.3135968788253702,
      "grad_norm": 0.8060159683227539,
      "learning_rate": 3.3950304092565385e-06,
      "loss": 0.0156,
      "step": 2024780
    },
    {
      "epoch": 3.3136296092640234,
      "grad_norm": 0.26023271679878235,
      "learning_rate": 3.3949645170430213e-06,
      "loss": 0.0116,
      "step": 2024800
    },
    {
      "epoch": 3.3136623397026765,
      "grad_norm": 0.11956636607646942,
      "learning_rate": 3.394898624829504e-06,
      "loss": 0.0098,
      "step": 2024820
    },
    {
      "epoch": 3.31369507014133,
      "grad_norm": 0.10389155894517899,
      "learning_rate": 3.3948327326159867e-06,
      "loss": 0.0119,
      "step": 2024840
    },
    {
      "epoch": 3.3137278005799833,
      "grad_norm": 0.241679847240448,
      "learning_rate": 3.39476684040247e-06,
      "loss": 0.0135,
      "step": 2024860
    },
    {
      "epoch": 3.313760531018637,
      "grad_norm": 0.4774581789970398,
      "learning_rate": 3.3947009481889526e-06,
      "loss": 0.0107,
      "step": 2024880
    },
    {
      "epoch": 3.31379326145729,
      "grad_norm": 0.10326682031154633,
      "learning_rate": 3.3946350559754354e-06,
      "loss": 0.0094,
      "step": 2024900
    },
    {
      "epoch": 3.3138259918959436,
      "grad_norm": 0.11396320164203644,
      "learning_rate": 3.394569163761919e-06,
      "loss": 0.0113,
      "step": 2024920
    },
    {
      "epoch": 3.3138587223345968,
      "grad_norm": 0.1999226212501526,
      "learning_rate": 3.3945032715484017e-06,
      "loss": 0.0115,
      "step": 2024940
    },
    {
      "epoch": 3.31389145277325,
      "grad_norm": 0.7117320895195007,
      "learning_rate": 3.3944373793348844e-06,
      "loss": 0.0159,
      "step": 2024960
    },
    {
      "epoch": 3.3139241832119035,
      "grad_norm": 0.41406702995300293,
      "learning_rate": 3.394371487121367e-06,
      "loss": 0.0119,
      "step": 2024980
    },
    {
      "epoch": 3.3139569136505567,
      "grad_norm": 0.45538315176963806,
      "learning_rate": 3.3943055949078504e-06,
      "loss": 0.0095,
      "step": 2025000
    },
    {
      "epoch": 3.3139896440892103,
      "grad_norm": 0.2064424753189087,
      "learning_rate": 3.394239702694333e-06,
      "loss": 0.0136,
      "step": 2025020
    },
    {
      "epoch": 3.3140223745278634,
      "grad_norm": 0.845045268535614,
      "learning_rate": 3.394173810480816e-06,
      "loss": 0.0227,
      "step": 2025040
    },
    {
      "epoch": 3.314055104966517,
      "grad_norm": 0.32071366906166077,
      "learning_rate": 3.3941079182672986e-06,
      "loss": 0.0089,
      "step": 2025060
    },
    {
      "epoch": 3.31408783540517,
      "grad_norm": 0.3552241623401642,
      "learning_rate": 3.3940420260537817e-06,
      "loss": 0.0134,
      "step": 2025080
    },
    {
      "epoch": 3.3141205658438233,
      "grad_norm": 0.2900419533252716,
      "learning_rate": 3.3939761338402645e-06,
      "loss": 0.0102,
      "step": 2025100
    },
    {
      "epoch": 3.314153296282477,
      "grad_norm": 0.05605119839310646,
      "learning_rate": 3.3939102416267472e-06,
      "loss": 0.009,
      "step": 2025120
    },
    {
      "epoch": 3.31418602672113,
      "grad_norm": 0.2462363988161087,
      "learning_rate": 3.39384434941323e-06,
      "loss": 0.0099,
      "step": 2025140
    },
    {
      "epoch": 3.3142187571597836,
      "grad_norm": 0.24790777266025543,
      "learning_rate": 3.3937784571997127e-06,
      "loss": 0.015,
      "step": 2025160
    },
    {
      "epoch": 3.314251487598437,
      "grad_norm": 0.23466090857982635,
      "learning_rate": 3.393712564986196e-06,
      "loss": 0.0099,
      "step": 2025180
    },
    {
      "epoch": 3.31428421803709,
      "grad_norm": 0.13039015233516693,
      "learning_rate": 3.3936466727726786e-06,
      "loss": 0.0104,
      "step": 2025200
    },
    {
      "epoch": 3.3143169484757435,
      "grad_norm": 0.15563887357711792,
      "learning_rate": 3.3935807805591613e-06,
      "loss": 0.0142,
      "step": 2025220
    },
    {
      "epoch": 3.3143496789143967,
      "grad_norm": 0.33177071809768677,
      "learning_rate": 3.393514888345644e-06,
      "loss": 0.0106,
      "step": 2025240
    },
    {
      "epoch": 3.3143824093530503,
      "grad_norm": 0.1930968314409256,
      "learning_rate": 3.3934489961321272e-06,
      "loss": 0.0112,
      "step": 2025260
    },
    {
      "epoch": 3.3144151397917034,
      "grad_norm": 0.2423839569091797,
      "learning_rate": 3.3933831039186104e-06,
      "loss": 0.0067,
      "step": 2025280
    },
    {
      "epoch": 3.3144478702303566,
      "grad_norm": 0.2759438157081604,
      "learning_rate": 3.393317211705093e-06,
      "loss": 0.0147,
      "step": 2025300
    },
    {
      "epoch": 3.31448060066901,
      "grad_norm": 0.19861143827438354,
      "learning_rate": 3.3932513194915763e-06,
      "loss": 0.0113,
      "step": 2025320
    },
    {
      "epoch": 3.3145133311076633,
      "grad_norm": 0.23612046241760254,
      "learning_rate": 3.393185427278059e-06,
      "loss": 0.0108,
      "step": 2025340
    },
    {
      "epoch": 3.314546061546317,
      "grad_norm": 0.0771535187959671,
      "learning_rate": 3.3931195350645418e-06,
      "loss": 0.0074,
      "step": 2025360
    },
    {
      "epoch": 3.31457879198497,
      "grad_norm": 0.14520008862018585,
      "learning_rate": 3.3930536428510245e-06,
      "loss": 0.0115,
      "step": 2025380
    },
    {
      "epoch": 3.3146115224236237,
      "grad_norm": 0.06824781000614166,
      "learning_rate": 3.3929877506375077e-06,
      "loss": 0.0162,
      "step": 2025400
    },
    {
      "epoch": 3.314644252862277,
      "grad_norm": 0.08363131433725357,
      "learning_rate": 3.3929218584239904e-06,
      "loss": 0.0117,
      "step": 2025420
    },
    {
      "epoch": 3.31467698330093,
      "grad_norm": 0.23919321596622467,
      "learning_rate": 3.392855966210473e-06,
      "loss": 0.007,
      "step": 2025440
    },
    {
      "epoch": 3.3147097137395836,
      "grad_norm": 0.12614423036575317,
      "learning_rate": 3.392790073996956e-06,
      "loss": 0.0142,
      "step": 2025460
    },
    {
      "epoch": 3.3147424441782367,
      "grad_norm": 0.15282011032104492,
      "learning_rate": 3.392724181783439e-06,
      "loss": 0.0117,
      "step": 2025480
    },
    {
      "epoch": 3.3147751746168903,
      "grad_norm": 0.6482459902763367,
      "learning_rate": 3.392658289569922e-06,
      "loss": 0.0112,
      "step": 2025500
    },
    {
      "epoch": 3.3148079050555435,
      "grad_norm": 0.4131953716278076,
      "learning_rate": 3.3925923973564046e-06,
      "loss": 0.015,
      "step": 2025520
    },
    {
      "epoch": 3.314840635494197,
      "grad_norm": 0.11303817480802536,
      "learning_rate": 3.3925265051428873e-06,
      "loss": 0.0161,
      "step": 2025540
    },
    {
      "epoch": 3.31487336593285,
      "grad_norm": 0.2556998133659363,
      "learning_rate": 3.39246061292937e-06,
      "loss": 0.0086,
      "step": 2025560
    },
    {
      "epoch": 3.3149060963715034,
      "grad_norm": 0.17935720086097717,
      "learning_rate": 3.392394720715853e-06,
      "loss": 0.012,
      "step": 2025580
    },
    {
      "epoch": 3.314938826810157,
      "grad_norm": 0.12390248477458954,
      "learning_rate": 3.392328828502336e-06,
      "loss": 0.0119,
      "step": 2025600
    },
    {
      "epoch": 3.31497155724881,
      "grad_norm": 0.5454447269439697,
      "learning_rate": 3.3922629362888187e-06,
      "loss": 0.0097,
      "step": 2025620
    },
    {
      "epoch": 3.3150042876874637,
      "grad_norm": 0.31731393933296204,
      "learning_rate": 3.3921970440753023e-06,
      "loss": 0.0117,
      "step": 2025640
    },
    {
      "epoch": 3.315037018126117,
      "grad_norm": 0.5749462246894836,
      "learning_rate": 3.392131151861785e-06,
      "loss": 0.0097,
      "step": 2025660
    },
    {
      "epoch": 3.3150697485647704,
      "grad_norm": 0.2491234689950943,
      "learning_rate": 3.3920652596482677e-06,
      "loss": 0.0099,
      "step": 2025680
    },
    {
      "epoch": 3.3151024790034236,
      "grad_norm": 0.3559472858905792,
      "learning_rate": 3.3919993674347505e-06,
      "loss": 0.0103,
      "step": 2025700
    },
    {
      "epoch": 3.3151352094420767,
      "grad_norm": 0.8342077136039734,
      "learning_rate": 3.3919334752212336e-06,
      "loss": 0.0125,
      "step": 2025720
    },
    {
      "epoch": 3.3151679398807303,
      "grad_norm": 0.27980291843414307,
      "learning_rate": 3.3918675830077164e-06,
      "loss": 0.0112,
      "step": 2025740
    },
    {
      "epoch": 3.3152006703193835,
      "grad_norm": 0.11443307995796204,
      "learning_rate": 3.391801690794199e-06,
      "loss": 0.0128,
      "step": 2025760
    },
    {
      "epoch": 3.315233400758037,
      "grad_norm": 0.14463359117507935,
      "learning_rate": 3.391735798580682e-06,
      "loss": 0.0096,
      "step": 2025780
    },
    {
      "epoch": 3.3152661311966902,
      "grad_norm": 0.30705755949020386,
      "learning_rate": 3.391669906367165e-06,
      "loss": 0.0138,
      "step": 2025800
    },
    {
      "epoch": 3.315298861635344,
      "grad_norm": 0.6258841753005981,
      "learning_rate": 3.3916040141536478e-06,
      "loss": 0.0161,
      "step": 2025820
    },
    {
      "epoch": 3.315331592073997,
      "grad_norm": 0.078839011490345,
      "learning_rate": 3.3915381219401305e-06,
      "loss": 0.0117,
      "step": 2025840
    },
    {
      "epoch": 3.31536432251265,
      "grad_norm": 0.8261718153953552,
      "learning_rate": 3.3914722297266132e-06,
      "loss": 0.0164,
      "step": 2025860
    },
    {
      "epoch": 3.3153970529513037,
      "grad_norm": 0.6948592662811279,
      "learning_rate": 3.3914063375130964e-06,
      "loss": 0.0127,
      "step": 2025880
    },
    {
      "epoch": 3.315429783389957,
      "grad_norm": 0.5097540616989136,
      "learning_rate": 3.391340445299579e-06,
      "loss": 0.0111,
      "step": 2025900
    },
    {
      "epoch": 3.3154625138286105,
      "grad_norm": 0.23679126799106598,
      "learning_rate": 3.391274553086062e-06,
      "loss": 0.0107,
      "step": 2025920
    },
    {
      "epoch": 3.3154952442672636,
      "grad_norm": 0.1896134316921234,
      "learning_rate": 3.3912086608725446e-06,
      "loss": 0.011,
      "step": 2025940
    },
    {
      "epoch": 3.315527974705917,
      "grad_norm": 0.48439252376556396,
      "learning_rate": 3.391142768659028e-06,
      "loss": 0.0094,
      "step": 2025960
    },
    {
      "epoch": 3.3155607051445704,
      "grad_norm": 0.15908126533031464,
      "learning_rate": 3.391076876445511e-06,
      "loss": 0.0175,
      "step": 2025980
    },
    {
      "epoch": 3.3155934355832235,
      "grad_norm": 0.12390083074569702,
      "learning_rate": 3.3910109842319937e-06,
      "loss": 0.011,
      "step": 2026000
    },
    {
      "epoch": 3.315626166021877,
      "grad_norm": 1.4158244132995605,
      "learning_rate": 3.390945092018477e-06,
      "loss": 0.0195,
      "step": 2026020
    },
    {
      "epoch": 3.3156588964605302,
      "grad_norm": 0.32147422432899475,
      "learning_rate": 3.3908791998049596e-06,
      "loss": 0.0086,
      "step": 2026040
    },
    {
      "epoch": 3.315691626899184,
      "grad_norm": 0.18778741359710693,
      "learning_rate": 3.3908133075914423e-06,
      "loss": 0.0113,
      "step": 2026060
    },
    {
      "epoch": 3.315724357337837,
      "grad_norm": 0.15183907747268677,
      "learning_rate": 3.390747415377925e-06,
      "loss": 0.0264,
      "step": 2026080
    },
    {
      "epoch": 3.3157570877764906,
      "grad_norm": 0.4014839231967926,
      "learning_rate": 3.390681523164408e-06,
      "loss": 0.0116,
      "step": 2026100
    },
    {
      "epoch": 3.3157898182151437,
      "grad_norm": 0.05825047940015793,
      "learning_rate": 3.390615630950891e-06,
      "loss": 0.0101,
      "step": 2026120
    },
    {
      "epoch": 3.315822548653797,
      "grad_norm": 0.43784594535827637,
      "learning_rate": 3.3905497387373737e-06,
      "loss": 0.0099,
      "step": 2026140
    },
    {
      "epoch": 3.3158552790924505,
      "grad_norm": 0.30397936701774597,
      "learning_rate": 3.3904838465238565e-06,
      "loss": 0.0092,
      "step": 2026160
    },
    {
      "epoch": 3.3158880095311036,
      "grad_norm": 0.2109028548002243,
      "learning_rate": 3.390417954310339e-06,
      "loss": 0.0106,
      "step": 2026180
    },
    {
      "epoch": 3.3159207399697572,
      "grad_norm": 0.17111805081367493,
      "learning_rate": 3.3903520620968224e-06,
      "loss": 0.0149,
      "step": 2026200
    },
    {
      "epoch": 3.3159534704084104,
      "grad_norm": 0.13724413514137268,
      "learning_rate": 3.390286169883305e-06,
      "loss": 0.0127,
      "step": 2026220
    },
    {
      "epoch": 3.315986200847064,
      "grad_norm": 0.11510691046714783,
      "learning_rate": 3.390220277669788e-06,
      "loss": 0.0128,
      "step": 2026240
    },
    {
      "epoch": 3.316018931285717,
      "grad_norm": 0.14091891050338745,
      "learning_rate": 3.3901543854562706e-06,
      "loss": 0.0076,
      "step": 2026260
    },
    {
      "epoch": 3.3160516617243703,
      "grad_norm": 0.614698052406311,
      "learning_rate": 3.3900884932427537e-06,
      "loss": 0.0115,
      "step": 2026280
    },
    {
      "epoch": 3.316084392163024,
      "grad_norm": 0.1717974841594696,
      "learning_rate": 3.3900226010292365e-06,
      "loss": 0.0102,
      "step": 2026300
    },
    {
      "epoch": 3.316117122601677,
      "grad_norm": 0.12588298320770264,
      "learning_rate": 3.3899567088157192e-06,
      "loss": 0.0086,
      "step": 2026320
    },
    {
      "epoch": 3.3161498530403306,
      "grad_norm": 0.11447405815124512,
      "learning_rate": 3.389890816602203e-06,
      "loss": 0.0155,
      "step": 2026340
    },
    {
      "epoch": 3.3161825834789838,
      "grad_norm": 0.130108043551445,
      "learning_rate": 3.3898249243886855e-06,
      "loss": 0.0103,
      "step": 2026360
    },
    {
      "epoch": 3.3162153139176374,
      "grad_norm": 0.3422221839427948,
      "learning_rate": 3.3897590321751683e-06,
      "loss": 0.0124,
      "step": 2026380
    },
    {
      "epoch": 3.3162480443562905,
      "grad_norm": 0.2270778864622116,
      "learning_rate": 3.389693139961651e-06,
      "loss": 0.0118,
      "step": 2026400
    },
    {
      "epoch": 3.3162807747949437,
      "grad_norm": 0.3426225483417511,
      "learning_rate": 3.389627247748134e-06,
      "loss": 0.0133,
      "step": 2026420
    },
    {
      "epoch": 3.3163135052335972,
      "grad_norm": 0.1017414927482605,
      "learning_rate": 3.389561355534617e-06,
      "loss": 0.0152,
      "step": 2026440
    },
    {
      "epoch": 3.3163462356722504,
      "grad_norm": 0.24453601241111755,
      "learning_rate": 3.3894954633210997e-06,
      "loss": 0.0103,
      "step": 2026460
    },
    {
      "epoch": 3.316378966110904,
      "grad_norm": 0.09427796304225922,
      "learning_rate": 3.3894295711075824e-06,
      "loss": 0.0101,
      "step": 2026480
    },
    {
      "epoch": 3.316411696549557,
      "grad_norm": 0.4902350604534149,
      "learning_rate": 3.3893636788940656e-06,
      "loss": 0.0177,
      "step": 2026500
    },
    {
      "epoch": 3.3164444269882107,
      "grad_norm": 1.2322312593460083,
      "learning_rate": 3.3892977866805483e-06,
      "loss": 0.0128,
      "step": 2026520
    },
    {
      "epoch": 3.316477157426864,
      "grad_norm": 0.19085180759429932,
      "learning_rate": 3.389231894467031e-06,
      "loss": 0.0166,
      "step": 2026540
    },
    {
      "epoch": 3.316509887865517,
      "grad_norm": 0.4322164058685303,
      "learning_rate": 3.389166002253514e-06,
      "loss": 0.0137,
      "step": 2026560
    },
    {
      "epoch": 3.3165426183041706,
      "grad_norm": 0.16096481680870056,
      "learning_rate": 3.3891001100399965e-06,
      "loss": 0.0101,
      "step": 2026580
    },
    {
      "epoch": 3.316575348742824,
      "grad_norm": 0.24446217715740204,
      "learning_rate": 3.3890342178264797e-06,
      "loss": 0.0094,
      "step": 2026600
    },
    {
      "epoch": 3.3166080791814774,
      "grad_norm": 0.5576757788658142,
      "learning_rate": 3.3889683256129624e-06,
      "loss": 0.0126,
      "step": 2026620
    },
    {
      "epoch": 3.3166408096201305,
      "grad_norm": 0.2444717437028885,
      "learning_rate": 3.388902433399445e-06,
      "loss": 0.0086,
      "step": 2026640
    },
    {
      "epoch": 3.3166735400587837,
      "grad_norm": 0.1602289080619812,
      "learning_rate": 3.388836541185928e-06,
      "loss": 0.0095,
      "step": 2026660
    },
    {
      "epoch": 3.3167062704974373,
      "grad_norm": 0.33123692870140076,
      "learning_rate": 3.3887706489724115e-06,
      "loss": 0.0087,
      "step": 2026680
    },
    {
      "epoch": 3.3167390009360904,
      "grad_norm": 0.15397930145263672,
      "learning_rate": 3.3887047567588942e-06,
      "loss": 0.0097,
      "step": 2026700
    },
    {
      "epoch": 3.316771731374744,
      "grad_norm": 1.0069013833999634,
      "learning_rate": 3.388638864545377e-06,
      "loss": 0.0093,
      "step": 2026720
    },
    {
      "epoch": 3.316804461813397,
      "grad_norm": 0.28791362047195435,
      "learning_rate": 3.38857297233186e-06,
      "loss": 0.0085,
      "step": 2026740
    },
    {
      "epoch": 3.3168371922520503,
      "grad_norm": 0.47366783022880554,
      "learning_rate": 3.388507080118343e-06,
      "loss": 0.0097,
      "step": 2026760
    },
    {
      "epoch": 3.316869922690704,
      "grad_norm": 0.2286507785320282,
      "learning_rate": 3.3884411879048256e-06,
      "loss": 0.01,
      "step": 2026780
    },
    {
      "epoch": 3.316902653129357,
      "grad_norm": 0.25747591257095337,
      "learning_rate": 3.3883752956913084e-06,
      "loss": 0.0154,
      "step": 2026800
    },
    {
      "epoch": 3.3169353835680107,
      "grad_norm": 0.1157698780298233,
      "learning_rate": 3.3883094034777915e-06,
      "loss": 0.0084,
      "step": 2026820
    },
    {
      "epoch": 3.316968114006664,
      "grad_norm": 0.3665519058704376,
      "learning_rate": 3.3882435112642743e-06,
      "loss": 0.0104,
      "step": 2026840
    },
    {
      "epoch": 3.3170008444453174,
      "grad_norm": 0.046244364231824875,
      "learning_rate": 3.388177619050757e-06,
      "loss": 0.0086,
      "step": 2026860
    },
    {
      "epoch": 3.3170335748839705,
      "grad_norm": 0.3381344676017761,
      "learning_rate": 3.3881117268372397e-06,
      "loss": 0.0178,
      "step": 2026880
    },
    {
      "epoch": 3.3170663053226237,
      "grad_norm": 0.12694023549556732,
      "learning_rate": 3.388045834623723e-06,
      "loss": 0.0103,
      "step": 2026900
    },
    {
      "epoch": 3.3170990357612773,
      "grad_norm": 0.24308671057224274,
      "learning_rate": 3.3879799424102057e-06,
      "loss": 0.0126,
      "step": 2026920
    },
    {
      "epoch": 3.3171317661999304,
      "grad_norm": 0.18147224187850952,
      "learning_rate": 3.3879140501966884e-06,
      "loss": 0.0096,
      "step": 2026940
    },
    {
      "epoch": 3.317164496638584,
      "grad_norm": 0.29635030031204224,
      "learning_rate": 3.387848157983171e-06,
      "loss": 0.0186,
      "step": 2026960
    },
    {
      "epoch": 3.317197227077237,
      "grad_norm": 0.21809367835521698,
      "learning_rate": 3.387782265769654e-06,
      "loss": 0.0083,
      "step": 2026980
    },
    {
      "epoch": 3.317229957515891,
      "grad_norm": 0.2982025444507599,
      "learning_rate": 3.387716373556137e-06,
      "loss": 0.008,
      "step": 2027000
    },
    {
      "epoch": 3.317262687954544,
      "grad_norm": 1.0252043008804321,
      "learning_rate": 3.3876504813426198e-06,
      "loss": 0.0136,
      "step": 2027020
    },
    {
      "epoch": 3.317295418393197,
      "grad_norm": 0.4332094192504883,
      "learning_rate": 3.3875845891291034e-06,
      "loss": 0.0091,
      "step": 2027040
    },
    {
      "epoch": 3.3173281488318507,
      "grad_norm": 0.11977402120828629,
      "learning_rate": 3.387518696915586e-06,
      "loss": 0.0132,
      "step": 2027060
    },
    {
      "epoch": 3.317360879270504,
      "grad_norm": 0.2766197919845581,
      "learning_rate": 3.387452804702069e-06,
      "loss": 0.0105,
      "step": 2027080
    },
    {
      "epoch": 3.3173936097091574,
      "grad_norm": 0.5255646109580994,
      "learning_rate": 3.3873869124885516e-06,
      "loss": 0.017,
      "step": 2027100
    },
    {
      "epoch": 3.3174263401478106,
      "grad_norm": 0.1385631412267685,
      "learning_rate": 3.3873210202750343e-06,
      "loss": 0.0055,
      "step": 2027120
    },
    {
      "epoch": 3.317459070586464,
      "grad_norm": 0.28234195709228516,
      "learning_rate": 3.3872551280615175e-06,
      "loss": 0.0163,
      "step": 2027140
    },
    {
      "epoch": 3.3174918010251173,
      "grad_norm": 0.46610885858535767,
      "learning_rate": 3.3871892358480002e-06,
      "loss": 0.011,
      "step": 2027160
    },
    {
      "epoch": 3.3175245314637705,
      "grad_norm": 0.28272250294685364,
      "learning_rate": 3.387123343634483e-06,
      "loss": 0.0109,
      "step": 2027180
    },
    {
      "epoch": 3.317557261902424,
      "grad_norm": 0.7489733695983887,
      "learning_rate": 3.3870574514209657e-06,
      "loss": 0.0124,
      "step": 2027200
    },
    {
      "epoch": 3.317589992341077,
      "grad_norm": 0.10890533775091171,
      "learning_rate": 3.386991559207449e-06,
      "loss": 0.0082,
      "step": 2027220
    },
    {
      "epoch": 3.317622722779731,
      "grad_norm": 0.19340048730373383,
      "learning_rate": 3.3869256669939316e-06,
      "loss": 0.0121,
      "step": 2027240
    },
    {
      "epoch": 3.317655453218384,
      "grad_norm": 0.22910800576210022,
      "learning_rate": 3.3868597747804143e-06,
      "loss": 0.0089,
      "step": 2027260
    },
    {
      "epoch": 3.3176881836570375,
      "grad_norm": 0.49767717719078064,
      "learning_rate": 3.386793882566897e-06,
      "loss": 0.0133,
      "step": 2027280
    },
    {
      "epoch": 3.3177209140956907,
      "grad_norm": 0.1945006549358368,
      "learning_rate": 3.3867279903533802e-06,
      "loss": 0.0096,
      "step": 2027300
    },
    {
      "epoch": 3.317753644534344,
      "grad_norm": 0.40868616104125977,
      "learning_rate": 3.386662098139863e-06,
      "loss": 0.012,
      "step": 2027320
    },
    {
      "epoch": 3.3177863749729974,
      "grad_norm": 0.21938447654247284,
      "learning_rate": 3.3865962059263457e-06,
      "loss": 0.0089,
      "step": 2027340
    },
    {
      "epoch": 3.3178191054116506,
      "grad_norm": 0.42137643694877625,
      "learning_rate": 3.3865303137128285e-06,
      "loss": 0.0076,
      "step": 2027360
    },
    {
      "epoch": 3.317851835850304,
      "grad_norm": 0.9564615488052368,
      "learning_rate": 3.3864644214993116e-06,
      "loss": 0.0164,
      "step": 2027380
    },
    {
      "epoch": 3.3178845662889573,
      "grad_norm": 0.35482820868492126,
      "learning_rate": 3.386398529285795e-06,
      "loss": 0.0167,
      "step": 2027400
    },
    {
      "epoch": 3.317917296727611,
      "grad_norm": 0.3305183947086334,
      "learning_rate": 3.3863326370722775e-06,
      "loss": 0.0152,
      "step": 2027420
    },
    {
      "epoch": 3.317950027166264,
      "grad_norm": 0.19836246967315674,
      "learning_rate": 3.3862667448587607e-06,
      "loss": 0.0122,
      "step": 2027440
    },
    {
      "epoch": 3.3179827576049172,
      "grad_norm": 0.7847838997840881,
      "learning_rate": 3.3862008526452434e-06,
      "loss": 0.011,
      "step": 2027460
    },
    {
      "epoch": 3.318015488043571,
      "grad_norm": 0.13499049842357635,
      "learning_rate": 3.386134960431726e-06,
      "loss": 0.0125,
      "step": 2027480
    },
    {
      "epoch": 3.318048218482224,
      "grad_norm": 0.11824603378772736,
      "learning_rate": 3.386069068218209e-06,
      "loss": 0.0126,
      "step": 2027500
    },
    {
      "epoch": 3.3180809489208776,
      "grad_norm": 0.3673853874206543,
      "learning_rate": 3.3860031760046917e-06,
      "loss": 0.0132,
      "step": 2027520
    },
    {
      "epoch": 3.3181136793595307,
      "grad_norm": 0.15825209021568298,
      "learning_rate": 3.385937283791175e-06,
      "loss": 0.0099,
      "step": 2027540
    },
    {
      "epoch": 3.3181464097981843,
      "grad_norm": 0.1628437340259552,
      "learning_rate": 3.3858713915776576e-06,
      "loss": 0.0064,
      "step": 2027560
    },
    {
      "epoch": 3.3181791402368375,
      "grad_norm": 0.30887946486473083,
      "learning_rate": 3.3858054993641403e-06,
      "loss": 0.0086,
      "step": 2027580
    },
    {
      "epoch": 3.3182118706754906,
      "grad_norm": 0.4946063458919525,
      "learning_rate": 3.385739607150623e-06,
      "loss": 0.0108,
      "step": 2027600
    },
    {
      "epoch": 3.318244601114144,
      "grad_norm": 0.45568200945854187,
      "learning_rate": 3.385673714937106e-06,
      "loss": 0.0144,
      "step": 2027620
    },
    {
      "epoch": 3.3182773315527974,
      "grad_norm": 0.33149170875549316,
      "learning_rate": 3.385607822723589e-06,
      "loss": 0.0099,
      "step": 2027640
    },
    {
      "epoch": 3.318310061991451,
      "grad_norm": 0.28423982858657837,
      "learning_rate": 3.3855419305100717e-06,
      "loss": 0.0163,
      "step": 2027660
    },
    {
      "epoch": 3.318342792430104,
      "grad_norm": 0.25668659806251526,
      "learning_rate": 3.3854760382965544e-06,
      "loss": 0.0102,
      "step": 2027680
    },
    {
      "epoch": 3.3183755228687577,
      "grad_norm": 1.0630953311920166,
      "learning_rate": 3.3854101460830376e-06,
      "loss": 0.011,
      "step": 2027700
    },
    {
      "epoch": 3.318408253307411,
      "grad_norm": 0.10769686847925186,
      "learning_rate": 3.3853442538695203e-06,
      "loss": 0.0094,
      "step": 2027720
    },
    {
      "epoch": 3.318440983746064,
      "grad_norm": 0.16785496473312378,
      "learning_rate": 3.3852783616560035e-06,
      "loss": 0.011,
      "step": 2027740
    },
    {
      "epoch": 3.3184737141847176,
      "grad_norm": 0.3217206597328186,
      "learning_rate": 3.3852124694424866e-06,
      "loss": 0.0141,
      "step": 2027760
    },
    {
      "epoch": 3.3185064446233707,
      "grad_norm": 0.8996809720993042,
      "learning_rate": 3.3851465772289694e-06,
      "loss": 0.0126,
      "step": 2027780
    },
    {
      "epoch": 3.3185391750620243,
      "grad_norm": 0.1735619455575943,
      "learning_rate": 3.385080685015452e-06,
      "loss": 0.0106,
      "step": 2027800
    },
    {
      "epoch": 3.3185719055006775,
      "grad_norm": 0.06106482818722725,
      "learning_rate": 3.385014792801935e-06,
      "loss": 0.0101,
      "step": 2027820
    },
    {
      "epoch": 3.318604635939331,
      "grad_norm": 0.5349324345588684,
      "learning_rate": 3.384948900588418e-06,
      "loss": 0.0114,
      "step": 2027840
    },
    {
      "epoch": 3.3186373663779842,
      "grad_norm": 0.3441406786441803,
      "learning_rate": 3.3848830083749008e-06,
      "loss": 0.0095,
      "step": 2027860
    },
    {
      "epoch": 3.3186700968166374,
      "grad_norm": 0.40888839960098267,
      "learning_rate": 3.3848171161613835e-06,
      "loss": 0.0093,
      "step": 2027880
    },
    {
      "epoch": 3.318702827255291,
      "grad_norm": 0.4633429944515228,
      "learning_rate": 3.3847512239478663e-06,
      "loss": 0.0108,
      "step": 2027900
    },
    {
      "epoch": 3.318735557693944,
      "grad_norm": 0.10927510261535645,
      "learning_rate": 3.3846853317343494e-06,
      "loss": 0.0108,
      "step": 2027920
    },
    {
      "epoch": 3.3187682881325977,
      "grad_norm": 0.3737551271915436,
      "learning_rate": 3.384619439520832e-06,
      "loss": 0.0205,
      "step": 2027940
    },
    {
      "epoch": 3.318801018571251,
      "grad_norm": 0.49493512511253357,
      "learning_rate": 3.384553547307315e-06,
      "loss": 0.0139,
      "step": 2027960
    },
    {
      "epoch": 3.3188337490099045,
      "grad_norm": 0.6030260324478149,
      "learning_rate": 3.3844876550937976e-06,
      "loss": 0.0136,
      "step": 2027980
    },
    {
      "epoch": 3.3188664794485576,
      "grad_norm": 0.1255994290113449,
      "learning_rate": 3.3844217628802804e-06,
      "loss": 0.0096,
      "step": 2028000
    },
    {
      "epoch": 3.3188992098872108,
      "grad_norm": 0.3481045067310333,
      "learning_rate": 3.3843558706667635e-06,
      "loss": 0.0116,
      "step": 2028020
    },
    {
      "epoch": 3.3189319403258644,
      "grad_norm": 0.12748821079730988,
      "learning_rate": 3.3842899784532463e-06,
      "loss": 0.0178,
      "step": 2028040
    },
    {
      "epoch": 3.3189646707645175,
      "grad_norm": 0.9476832151412964,
      "learning_rate": 3.384224086239729e-06,
      "loss": 0.0153,
      "step": 2028060
    },
    {
      "epoch": 3.318997401203171,
      "grad_norm": 0.22474417090415955,
      "learning_rate": 3.3841581940262118e-06,
      "loss": 0.0079,
      "step": 2028080
    },
    {
      "epoch": 3.3190301316418243,
      "grad_norm": 0.33033493161201477,
      "learning_rate": 3.3840923018126953e-06,
      "loss": 0.0101,
      "step": 2028100
    },
    {
      "epoch": 3.319062862080478,
      "grad_norm": 0.21460065245628357,
      "learning_rate": 3.384026409599178e-06,
      "loss": 0.0127,
      "step": 2028120
    },
    {
      "epoch": 3.319095592519131,
      "grad_norm": 0.26898083090782166,
      "learning_rate": 3.383960517385661e-06,
      "loss": 0.014,
      "step": 2028140
    },
    {
      "epoch": 3.319128322957784,
      "grad_norm": 0.11435096710920334,
      "learning_rate": 3.383894625172144e-06,
      "loss": 0.0101,
      "step": 2028160
    },
    {
      "epoch": 3.3191610533964377,
      "grad_norm": 0.18135295808315277,
      "learning_rate": 3.3838287329586267e-06,
      "loss": 0.0068,
      "step": 2028180
    },
    {
      "epoch": 3.319193783835091,
      "grad_norm": 0.14062602818012238,
      "learning_rate": 3.3837628407451095e-06,
      "loss": 0.0103,
      "step": 2028200
    },
    {
      "epoch": 3.3192265142737445,
      "grad_norm": 0.29219773411750793,
      "learning_rate": 3.383696948531592e-06,
      "loss": 0.0138,
      "step": 2028220
    },
    {
      "epoch": 3.3192592447123976,
      "grad_norm": 0.1870277374982834,
      "learning_rate": 3.3836310563180754e-06,
      "loss": 0.0104,
      "step": 2028240
    },
    {
      "epoch": 3.319291975151051,
      "grad_norm": 0.29119569063186646,
      "learning_rate": 3.383565164104558e-06,
      "loss": 0.0074,
      "step": 2028260
    },
    {
      "epoch": 3.3193247055897044,
      "grad_norm": 0.5404253602027893,
      "learning_rate": 3.383499271891041e-06,
      "loss": 0.0126,
      "step": 2028280
    },
    {
      "epoch": 3.3193574360283575,
      "grad_norm": 0.30061882734298706,
      "learning_rate": 3.3834333796775236e-06,
      "loss": 0.0135,
      "step": 2028300
    },
    {
      "epoch": 3.319390166467011,
      "grad_norm": 0.20328956842422485,
      "learning_rate": 3.3833674874640068e-06,
      "loss": 0.0106,
      "step": 2028320
    },
    {
      "epoch": 3.3194228969056643,
      "grad_norm": 0.09771843999624252,
      "learning_rate": 3.3833015952504895e-06,
      "loss": 0.0107,
      "step": 2028340
    },
    {
      "epoch": 3.3194556273443174,
      "grad_norm": 0.08136233687400818,
      "learning_rate": 3.3832357030369722e-06,
      "loss": 0.0065,
      "step": 2028360
    },
    {
      "epoch": 3.319488357782971,
      "grad_norm": 0.41855862736701965,
      "learning_rate": 3.383169810823455e-06,
      "loss": 0.0135,
      "step": 2028380
    },
    {
      "epoch": 3.319521088221624,
      "grad_norm": 0.07655113935470581,
      "learning_rate": 3.3831039186099377e-06,
      "loss": 0.0112,
      "step": 2028400
    },
    {
      "epoch": 3.3195538186602778,
      "grad_norm": 0.3418534994125366,
      "learning_rate": 3.383038026396421e-06,
      "loss": 0.0079,
      "step": 2028420
    },
    {
      "epoch": 3.319586549098931,
      "grad_norm": 0.7058440446853638,
      "learning_rate": 3.382972134182904e-06,
      "loss": 0.0137,
      "step": 2028440
    },
    {
      "epoch": 3.3196192795375845,
      "grad_norm": 0.45944103598594666,
      "learning_rate": 3.382906241969387e-06,
      "loss": 0.0116,
      "step": 2028460
    },
    {
      "epoch": 3.3196520099762377,
      "grad_norm": 0.37351301312446594,
      "learning_rate": 3.38284034975587e-06,
      "loss": 0.009,
      "step": 2028480
    },
    {
      "epoch": 3.319684740414891,
      "grad_norm": 0.46730273962020874,
      "learning_rate": 3.3827744575423527e-06,
      "loss": 0.0113,
      "step": 2028500
    },
    {
      "epoch": 3.3197174708535444,
      "grad_norm": 0.3278892934322357,
      "learning_rate": 3.3827085653288354e-06,
      "loss": 0.0071,
      "step": 2028520
    },
    {
      "epoch": 3.3197502012921976,
      "grad_norm": 0.19370600581169128,
      "learning_rate": 3.382642673115318e-06,
      "loss": 0.0094,
      "step": 2028540
    },
    {
      "epoch": 3.319782931730851,
      "grad_norm": 0.16041727364063263,
      "learning_rate": 3.3825767809018013e-06,
      "loss": 0.0066,
      "step": 2028560
    },
    {
      "epoch": 3.3198156621695043,
      "grad_norm": 0.17267687618732452,
      "learning_rate": 3.382510888688284e-06,
      "loss": 0.0094,
      "step": 2028580
    },
    {
      "epoch": 3.319848392608158,
      "grad_norm": 0.1782221645116806,
      "learning_rate": 3.382444996474767e-06,
      "loss": 0.0101,
      "step": 2028600
    },
    {
      "epoch": 3.319881123046811,
      "grad_norm": 0.22695603966712952,
      "learning_rate": 3.3823791042612495e-06,
      "loss": 0.0091,
      "step": 2028620
    },
    {
      "epoch": 3.319913853485464,
      "grad_norm": 0.1527353972196579,
      "learning_rate": 3.3823132120477327e-06,
      "loss": 0.0089,
      "step": 2028640
    },
    {
      "epoch": 3.319946583924118,
      "grad_norm": 0.17437709867954254,
      "learning_rate": 3.3822473198342154e-06,
      "loss": 0.0124,
      "step": 2028660
    },
    {
      "epoch": 3.319979314362771,
      "grad_norm": 0.12722225487232208,
      "learning_rate": 3.382181427620698e-06,
      "loss": 0.0183,
      "step": 2028680
    },
    {
      "epoch": 3.3200120448014245,
      "grad_norm": 0.14448948204517365,
      "learning_rate": 3.382115535407181e-06,
      "loss": 0.0117,
      "step": 2028700
    },
    {
      "epoch": 3.3200447752400777,
      "grad_norm": 0.3380129635334015,
      "learning_rate": 3.382049643193664e-06,
      "loss": 0.0142,
      "step": 2028720
    },
    {
      "epoch": 3.3200775056787313,
      "grad_norm": 0.22406554222106934,
      "learning_rate": 3.381983750980147e-06,
      "loss": 0.013,
      "step": 2028740
    },
    {
      "epoch": 3.3201102361173844,
      "grad_norm": 0.33604487776756287,
      "learning_rate": 3.3819178587666296e-06,
      "loss": 0.0096,
      "step": 2028760
    },
    {
      "epoch": 3.3201429665560376,
      "grad_norm": 1.518481731414795,
      "learning_rate": 3.3818519665531123e-06,
      "loss": 0.0112,
      "step": 2028780
    },
    {
      "epoch": 3.320175696994691,
      "grad_norm": 0.4693663418292999,
      "learning_rate": 3.381786074339596e-06,
      "loss": 0.0105,
      "step": 2028800
    },
    {
      "epoch": 3.3202084274333443,
      "grad_norm": 0.2322382777929306,
      "learning_rate": 3.3817201821260786e-06,
      "loss": 0.0102,
      "step": 2028820
    },
    {
      "epoch": 3.320241157871998,
      "grad_norm": 0.733954131603241,
      "learning_rate": 3.3816542899125614e-06,
      "loss": 0.0138,
      "step": 2028840
    },
    {
      "epoch": 3.320273888310651,
      "grad_norm": 0.1399887204170227,
      "learning_rate": 3.3815883976990445e-06,
      "loss": 0.0173,
      "step": 2028860
    },
    {
      "epoch": 3.3203066187493047,
      "grad_norm": 0.12393057346343994,
      "learning_rate": 3.3815225054855273e-06,
      "loss": 0.0177,
      "step": 2028880
    },
    {
      "epoch": 3.320339349187958,
      "grad_norm": 0.20651879906654358,
      "learning_rate": 3.38145661327201e-06,
      "loss": 0.0107,
      "step": 2028900
    },
    {
      "epoch": 3.320372079626611,
      "grad_norm": 0.6401481032371521,
      "learning_rate": 3.3813907210584928e-06,
      "loss": 0.0134,
      "step": 2028920
    },
    {
      "epoch": 3.3204048100652646,
      "grad_norm": 0.2977808117866516,
      "learning_rate": 3.3813248288449755e-06,
      "loss": 0.0134,
      "step": 2028940
    },
    {
      "epoch": 3.3204375405039177,
      "grad_norm": 0.2187463939189911,
      "learning_rate": 3.3812589366314587e-06,
      "loss": 0.0133,
      "step": 2028960
    },
    {
      "epoch": 3.3204702709425713,
      "grad_norm": 0.1415962427854538,
      "learning_rate": 3.3811930444179414e-06,
      "loss": 0.0156,
      "step": 2028980
    },
    {
      "epoch": 3.3205030013812245,
      "grad_norm": 0.2261987328529358,
      "learning_rate": 3.381127152204424e-06,
      "loss": 0.0128,
      "step": 2029000
    },
    {
      "epoch": 3.320535731819878,
      "grad_norm": 0.24720913171768188,
      "learning_rate": 3.381061259990907e-06,
      "loss": 0.0135,
      "step": 2029020
    },
    {
      "epoch": 3.320568462258531,
      "grad_norm": 0.4250543713569641,
      "learning_rate": 3.38099536777739e-06,
      "loss": 0.0174,
      "step": 2029040
    },
    {
      "epoch": 3.3206011926971843,
      "grad_norm": 0.4402052164077759,
      "learning_rate": 3.3809294755638728e-06,
      "loss": 0.013,
      "step": 2029060
    },
    {
      "epoch": 3.320633923135838,
      "grad_norm": 0.315278559923172,
      "learning_rate": 3.3808635833503555e-06,
      "loss": 0.0111,
      "step": 2029080
    },
    {
      "epoch": 3.320666653574491,
      "grad_norm": 0.4933103322982788,
      "learning_rate": 3.3807976911368383e-06,
      "loss": 0.0121,
      "step": 2029100
    },
    {
      "epoch": 3.3206993840131447,
      "grad_norm": 0.22936756908893585,
      "learning_rate": 3.3807317989233214e-06,
      "loss": 0.0153,
      "step": 2029120
    },
    {
      "epoch": 3.320732114451798,
      "grad_norm": 0.10564153641462326,
      "learning_rate": 3.380665906709804e-06,
      "loss": 0.0089,
      "step": 2029140
    },
    {
      "epoch": 3.3207648448904514,
      "grad_norm": 0.16120190918445587,
      "learning_rate": 3.3806000144962873e-06,
      "loss": 0.013,
      "step": 2029160
    },
    {
      "epoch": 3.3207975753291046,
      "grad_norm": 0.6786546111106873,
      "learning_rate": 3.3805341222827705e-06,
      "loss": 0.0112,
      "step": 2029180
    },
    {
      "epoch": 3.3208303057677577,
      "grad_norm": 0.2194124311208725,
      "learning_rate": 3.3804682300692532e-06,
      "loss": 0.0147,
      "step": 2029200
    },
    {
      "epoch": 3.3208630362064113,
      "grad_norm": 0.26557162404060364,
      "learning_rate": 3.380402337855736e-06,
      "loss": 0.0089,
      "step": 2029220
    },
    {
      "epoch": 3.3208957666450645,
      "grad_norm": 0.14780199527740479,
      "learning_rate": 3.3803364456422187e-06,
      "loss": 0.0114,
      "step": 2029240
    },
    {
      "epoch": 3.320928497083718,
      "grad_norm": 0.1591130644083023,
      "learning_rate": 3.380270553428702e-06,
      "loss": 0.0096,
      "step": 2029260
    },
    {
      "epoch": 3.320961227522371,
      "grad_norm": 0.3535189628601074,
      "learning_rate": 3.3802046612151846e-06,
      "loss": 0.0142,
      "step": 2029280
    },
    {
      "epoch": 3.320993957961025,
      "grad_norm": 0.12668569386005402,
      "learning_rate": 3.3801387690016674e-06,
      "loss": 0.0102,
      "step": 2029300
    },
    {
      "epoch": 3.321026688399678,
      "grad_norm": 0.50248122215271,
      "learning_rate": 3.38007287678815e-06,
      "loss": 0.0118,
      "step": 2029320
    },
    {
      "epoch": 3.321059418838331,
      "grad_norm": 0.37240979075431824,
      "learning_rate": 3.3800069845746333e-06,
      "loss": 0.0089,
      "step": 2029340
    },
    {
      "epoch": 3.3210921492769847,
      "grad_norm": 0.21709036827087402,
      "learning_rate": 3.379941092361116e-06,
      "loss": 0.0155,
      "step": 2029360
    },
    {
      "epoch": 3.321124879715638,
      "grad_norm": 0.9371151924133301,
      "learning_rate": 3.3798752001475987e-06,
      "loss": 0.0131,
      "step": 2029380
    },
    {
      "epoch": 3.3211576101542915,
      "grad_norm": 0.15821658074855804,
      "learning_rate": 3.3798093079340815e-06,
      "loss": 0.0071,
      "step": 2029400
    },
    {
      "epoch": 3.3211903405929446,
      "grad_norm": 0.11395671963691711,
      "learning_rate": 3.3797434157205642e-06,
      "loss": 0.0092,
      "step": 2029420
    },
    {
      "epoch": 3.321223071031598,
      "grad_norm": 0.4531625807285309,
      "learning_rate": 3.3796775235070474e-06,
      "loss": 0.0081,
      "step": 2029440
    },
    {
      "epoch": 3.3212558014702513,
      "grad_norm": 0.3193846642971039,
      "learning_rate": 3.37961163129353e-06,
      "loss": 0.0103,
      "step": 2029460
    },
    {
      "epoch": 3.3212885319089045,
      "grad_norm": 0.151705801486969,
      "learning_rate": 3.379545739080013e-06,
      "loss": 0.0125,
      "step": 2029480
    },
    {
      "epoch": 3.321321262347558,
      "grad_norm": 0.7802526354789734,
      "learning_rate": 3.3794798468664964e-06,
      "loss": 0.0115,
      "step": 2029500
    },
    {
      "epoch": 3.3213539927862112,
      "grad_norm": 1.23179292678833,
      "learning_rate": 3.379413954652979e-06,
      "loss": 0.0122,
      "step": 2029520
    },
    {
      "epoch": 3.321386723224865,
      "grad_norm": 0.35174691677093506,
      "learning_rate": 3.379348062439462e-06,
      "loss": 0.0125,
      "step": 2029540
    },
    {
      "epoch": 3.321419453663518,
      "grad_norm": 0.8413746953010559,
      "learning_rate": 3.3792821702259447e-06,
      "loss": 0.0131,
      "step": 2029560
    },
    {
      "epoch": 3.3214521841021716,
      "grad_norm": 0.264493465423584,
      "learning_rate": 3.379216278012428e-06,
      "loss": 0.015,
      "step": 2029580
    },
    {
      "epoch": 3.3214849145408247,
      "grad_norm": 0.27114638686180115,
      "learning_rate": 3.3791503857989106e-06,
      "loss": 0.0108,
      "step": 2029600
    },
    {
      "epoch": 3.321517644979478,
      "grad_norm": 0.24324658513069153,
      "learning_rate": 3.3790844935853933e-06,
      "loss": 0.0108,
      "step": 2029620
    },
    {
      "epoch": 3.3215503754181315,
      "grad_norm": 0.21195900440216064,
      "learning_rate": 3.379018601371876e-06,
      "loss": 0.0103,
      "step": 2029640
    },
    {
      "epoch": 3.3215831058567846,
      "grad_norm": 0.5849090814590454,
      "learning_rate": 3.378952709158359e-06,
      "loss": 0.0198,
      "step": 2029660
    },
    {
      "epoch": 3.321615836295438,
      "grad_norm": 0.5685182213783264,
      "learning_rate": 3.378886816944842e-06,
      "loss": 0.0138,
      "step": 2029680
    },
    {
      "epoch": 3.3216485667340914,
      "grad_norm": 0.1566241830587387,
      "learning_rate": 3.3788209247313247e-06,
      "loss": 0.0164,
      "step": 2029700
    },
    {
      "epoch": 3.3216812971727445,
      "grad_norm": 0.36755093932151794,
      "learning_rate": 3.3787550325178074e-06,
      "loss": 0.0102,
      "step": 2029720
    },
    {
      "epoch": 3.321714027611398,
      "grad_norm": 0.20472249388694763,
      "learning_rate": 3.3786891403042906e-06,
      "loss": 0.0125,
      "step": 2029740
    },
    {
      "epoch": 3.3217467580500513,
      "grad_norm": 0.22387667000293732,
      "learning_rate": 3.3786232480907733e-06,
      "loss": 0.01,
      "step": 2029760
    },
    {
      "epoch": 3.321779488488705,
      "grad_norm": 0.5644460320472717,
      "learning_rate": 3.378557355877256e-06,
      "loss": 0.0245,
      "step": 2029780
    },
    {
      "epoch": 3.321812218927358,
      "grad_norm": 0.25605788826942444,
      "learning_rate": 3.378491463663739e-06,
      "loss": 0.0123,
      "step": 2029800
    },
    {
      "epoch": 3.321844949366011,
      "grad_norm": 0.12174883484840393,
      "learning_rate": 3.378425571450222e-06,
      "loss": 0.0119,
      "step": 2029820
    },
    {
      "epoch": 3.3218776798046648,
      "grad_norm": 0.3148985803127289,
      "learning_rate": 3.3783596792367047e-06,
      "loss": 0.0077,
      "step": 2029840
    },
    {
      "epoch": 3.321910410243318,
      "grad_norm": 0.23873858153820038,
      "learning_rate": 3.378293787023188e-06,
      "loss": 0.0088,
      "step": 2029860
    },
    {
      "epoch": 3.3219431406819715,
      "grad_norm": 0.6756297945976257,
      "learning_rate": 3.378227894809671e-06,
      "loss": 0.0115,
      "step": 2029880
    },
    {
      "epoch": 3.3219758711206246,
      "grad_norm": 0.24156871438026428,
      "learning_rate": 3.3781620025961538e-06,
      "loss": 0.0123,
      "step": 2029900
    },
    {
      "epoch": 3.3220086015592782,
      "grad_norm": 0.1906851828098297,
      "learning_rate": 3.3780961103826365e-06,
      "loss": 0.0083,
      "step": 2029920
    },
    {
      "epoch": 3.3220413319979314,
      "grad_norm": 0.5163300037384033,
      "learning_rate": 3.3780302181691193e-06,
      "loss": 0.0129,
      "step": 2029940
    },
    {
      "epoch": 3.3220740624365845,
      "grad_norm": 0.1282714456319809,
      "learning_rate": 3.377964325955602e-06,
      "loss": 0.0118,
      "step": 2029960
    },
    {
      "epoch": 3.322106792875238,
      "grad_norm": 0.20193666219711304,
      "learning_rate": 3.377898433742085e-06,
      "loss": 0.0093,
      "step": 2029980
    },
    {
      "epoch": 3.3221395233138913,
      "grad_norm": 0.2440991997718811,
      "learning_rate": 3.377832541528568e-06,
      "loss": 0.0105,
      "step": 2030000
    },
    {
      "epoch": 3.322172253752545,
      "grad_norm": 0.12729240953922272,
      "learning_rate": 3.3777666493150506e-06,
      "loss": 0.0105,
      "step": 2030020
    },
    {
      "epoch": 3.322204984191198,
      "grad_norm": 0.17612887918949127,
      "learning_rate": 3.3777007571015334e-06,
      "loss": 0.0122,
      "step": 2030040
    },
    {
      "epoch": 3.3222377146298516,
      "grad_norm": 0.22753438353538513,
      "learning_rate": 3.3776348648880165e-06,
      "loss": 0.0102,
      "step": 2030060
    },
    {
      "epoch": 3.3222704450685048,
      "grad_norm": 0.43197059631347656,
      "learning_rate": 3.3775689726744993e-06,
      "loss": 0.0107,
      "step": 2030080
    },
    {
      "epoch": 3.322303175507158,
      "grad_norm": 0.22561490535736084,
      "learning_rate": 3.377503080460982e-06,
      "loss": 0.0079,
      "step": 2030100
    },
    {
      "epoch": 3.3223359059458115,
      "grad_norm": 0.04610297456383705,
      "learning_rate": 3.3774371882474648e-06,
      "loss": 0.0109,
      "step": 2030120
    },
    {
      "epoch": 3.3223686363844647,
      "grad_norm": 0.31002259254455566,
      "learning_rate": 3.377371296033948e-06,
      "loss": 0.0117,
      "step": 2030140
    },
    {
      "epoch": 3.3224013668231183,
      "grad_norm": 0.5566012859344482,
      "learning_rate": 3.3773054038204307e-06,
      "loss": 0.0119,
      "step": 2030160
    },
    {
      "epoch": 3.3224340972617714,
      "grad_norm": 0.3369264602661133,
      "learning_rate": 3.3772395116069134e-06,
      "loss": 0.01,
      "step": 2030180
    },
    {
      "epoch": 3.322466827700425,
      "grad_norm": 0.31475675106048584,
      "learning_rate": 3.377173619393397e-06,
      "loss": 0.0112,
      "step": 2030200
    },
    {
      "epoch": 3.322499558139078,
      "grad_norm": 0.18443404138088226,
      "learning_rate": 3.3771077271798797e-06,
      "loss": 0.0106,
      "step": 2030220
    },
    {
      "epoch": 3.3225322885777313,
      "grad_norm": 0.11516796052455902,
      "learning_rate": 3.3770418349663625e-06,
      "loss": 0.0109,
      "step": 2030240
    },
    {
      "epoch": 3.322565019016385,
      "grad_norm": 0.4304666817188263,
      "learning_rate": 3.376975942752845e-06,
      "loss": 0.0136,
      "step": 2030260
    },
    {
      "epoch": 3.322597749455038,
      "grad_norm": 0.5047832727432251,
      "learning_rate": 3.3769100505393284e-06,
      "loss": 0.0117,
      "step": 2030280
    },
    {
      "epoch": 3.3226304798936916,
      "grad_norm": 0.1376640349626541,
      "learning_rate": 3.376844158325811e-06,
      "loss": 0.0072,
      "step": 2030300
    },
    {
      "epoch": 3.322663210332345,
      "grad_norm": 0.3695974051952362,
      "learning_rate": 3.376778266112294e-06,
      "loss": 0.012,
      "step": 2030320
    },
    {
      "epoch": 3.3226959407709984,
      "grad_norm": 0.3000776171684265,
      "learning_rate": 3.3767123738987766e-06,
      "loss": 0.0086,
      "step": 2030340
    },
    {
      "epoch": 3.3227286712096515,
      "grad_norm": 0.4271462559700012,
      "learning_rate": 3.3766464816852598e-06,
      "loss": 0.0141,
      "step": 2030360
    },
    {
      "epoch": 3.3227614016483047,
      "grad_norm": 0.45703214406967163,
      "learning_rate": 3.3765805894717425e-06,
      "loss": 0.0137,
      "step": 2030380
    },
    {
      "epoch": 3.3227941320869583,
      "grad_norm": 0.37825530767440796,
      "learning_rate": 3.3765146972582252e-06,
      "loss": 0.0132,
      "step": 2030400
    },
    {
      "epoch": 3.3228268625256114,
      "grad_norm": 0.059480760246515274,
      "learning_rate": 3.376448805044708e-06,
      "loss": 0.0096,
      "step": 2030420
    },
    {
      "epoch": 3.322859592964265,
      "grad_norm": 0.22477753460407257,
      "learning_rate": 3.3763829128311907e-06,
      "loss": 0.0182,
      "step": 2030440
    },
    {
      "epoch": 3.322892323402918,
      "grad_norm": 0.16011792421340942,
      "learning_rate": 3.376317020617674e-06,
      "loss": 0.0135,
      "step": 2030460
    },
    {
      "epoch": 3.3229250538415718,
      "grad_norm": 0.050479955971241,
      "learning_rate": 3.3762511284041566e-06,
      "loss": 0.0115,
      "step": 2030480
    },
    {
      "epoch": 3.322957784280225,
      "grad_norm": 0.169973224401474,
      "learning_rate": 3.3761852361906394e-06,
      "loss": 0.0068,
      "step": 2030500
    },
    {
      "epoch": 3.322990514718878,
      "grad_norm": 0.2930242419242859,
      "learning_rate": 3.376119343977122e-06,
      "loss": 0.0103,
      "step": 2030520
    },
    {
      "epoch": 3.3230232451575317,
      "grad_norm": 0.2646294832229614,
      "learning_rate": 3.3760534517636053e-06,
      "loss": 0.0106,
      "step": 2030540
    },
    {
      "epoch": 3.323055975596185,
      "grad_norm": 0.25837334990501404,
      "learning_rate": 3.3759875595500884e-06,
      "loss": 0.0095,
      "step": 2030560
    },
    {
      "epoch": 3.3230887060348384,
      "grad_norm": 0.35189294815063477,
      "learning_rate": 3.375921667336571e-06,
      "loss": 0.0138,
      "step": 2030580
    },
    {
      "epoch": 3.3231214364734916,
      "grad_norm": 0.2123664766550064,
      "learning_rate": 3.3758557751230543e-06,
      "loss": 0.0102,
      "step": 2030600
    },
    {
      "epoch": 3.323154166912145,
      "grad_norm": 0.23377379775047302,
      "learning_rate": 3.375789882909537e-06,
      "loss": 0.0075,
      "step": 2030620
    },
    {
      "epoch": 3.3231868973507983,
      "grad_norm": 0.08454838395118713,
      "learning_rate": 3.37572399069602e-06,
      "loss": 0.0107,
      "step": 2030640
    },
    {
      "epoch": 3.3232196277894515,
      "grad_norm": 0.2713048458099365,
      "learning_rate": 3.3756580984825025e-06,
      "loss": 0.0117,
      "step": 2030660
    },
    {
      "epoch": 3.323252358228105,
      "grad_norm": 0.32609325647354126,
      "learning_rate": 3.3755922062689857e-06,
      "loss": 0.0083,
      "step": 2030680
    },
    {
      "epoch": 3.323285088666758,
      "grad_norm": 0.10603779554367065,
      "learning_rate": 3.3755263140554685e-06,
      "loss": 0.0098,
      "step": 2030700
    },
    {
      "epoch": 3.323317819105412,
      "grad_norm": 0.29301926493644714,
      "learning_rate": 3.375460421841951e-06,
      "loss": 0.0101,
      "step": 2030720
    },
    {
      "epoch": 3.323350549544065,
      "grad_norm": 0.3227287232875824,
      "learning_rate": 3.375394529628434e-06,
      "loss": 0.0114,
      "step": 2030740
    },
    {
      "epoch": 3.3233832799827185,
      "grad_norm": 0.220067098736763,
      "learning_rate": 3.375328637414917e-06,
      "loss": 0.0106,
      "step": 2030760
    },
    {
      "epoch": 3.3234160104213717,
      "grad_norm": 0.15671619772911072,
      "learning_rate": 3.3752627452014e-06,
      "loss": 0.0168,
      "step": 2030780
    },
    {
      "epoch": 3.323448740860025,
      "grad_norm": 0.1322970688343048,
      "learning_rate": 3.3751968529878826e-06,
      "loss": 0.0101,
      "step": 2030800
    },
    {
      "epoch": 3.3234814712986784,
      "grad_norm": 0.17887111008167267,
      "learning_rate": 3.3751309607743653e-06,
      "loss": 0.012,
      "step": 2030820
    },
    {
      "epoch": 3.3235142017373316,
      "grad_norm": 0.11701954901218414,
      "learning_rate": 3.375065068560848e-06,
      "loss": 0.0167,
      "step": 2030840
    },
    {
      "epoch": 3.323546932175985,
      "grad_norm": 0.20848049223423004,
      "learning_rate": 3.3749991763473312e-06,
      "loss": 0.01,
      "step": 2030860
    },
    {
      "epoch": 3.3235796626146383,
      "grad_norm": 0.10530301928520203,
      "learning_rate": 3.374933284133814e-06,
      "loss": 0.008,
      "step": 2030880
    },
    {
      "epoch": 3.323612393053292,
      "grad_norm": 0.18239417672157288,
      "learning_rate": 3.3748673919202975e-06,
      "loss": 0.0124,
      "step": 2030900
    },
    {
      "epoch": 3.323645123491945,
      "grad_norm": 0.20827001333236694,
      "learning_rate": 3.3748014997067803e-06,
      "loss": 0.0126,
      "step": 2030920
    },
    {
      "epoch": 3.3236778539305982,
      "grad_norm": 0.3208770751953125,
      "learning_rate": 3.374735607493263e-06,
      "loss": 0.0075,
      "step": 2030940
    },
    {
      "epoch": 3.323710584369252,
      "grad_norm": 0.045022860169410706,
      "learning_rate": 3.3746697152797458e-06,
      "loss": 0.009,
      "step": 2030960
    },
    {
      "epoch": 3.323743314807905,
      "grad_norm": 1.4748355150222778,
      "learning_rate": 3.3746038230662285e-06,
      "loss": 0.0122,
      "step": 2030980
    },
    {
      "epoch": 3.3237760452465586,
      "grad_norm": 0.36069658398628235,
      "learning_rate": 3.3745379308527117e-06,
      "loss": 0.0181,
      "step": 2031000
    },
    {
      "epoch": 3.3238087756852117,
      "grad_norm": 0.13888892531394958,
      "learning_rate": 3.3744720386391944e-06,
      "loss": 0.0136,
      "step": 2031020
    },
    {
      "epoch": 3.3238415061238653,
      "grad_norm": 0.7763307094573975,
      "learning_rate": 3.374406146425677e-06,
      "loss": 0.012,
      "step": 2031040
    },
    {
      "epoch": 3.3238742365625185,
      "grad_norm": 0.3950072228908539,
      "learning_rate": 3.37434025421216e-06,
      "loss": 0.0082,
      "step": 2031060
    },
    {
      "epoch": 3.3239069670011716,
      "grad_norm": 0.07737147808074951,
      "learning_rate": 3.374274361998643e-06,
      "loss": 0.0106,
      "step": 2031080
    },
    {
      "epoch": 3.323939697439825,
      "grad_norm": 0.2902134954929352,
      "learning_rate": 3.3742084697851258e-06,
      "loss": 0.0148,
      "step": 2031100
    },
    {
      "epoch": 3.3239724278784784,
      "grad_norm": 0.2188941240310669,
      "learning_rate": 3.3741425775716085e-06,
      "loss": 0.0118,
      "step": 2031120
    },
    {
      "epoch": 3.324005158317132,
      "grad_norm": 0.18422026932239532,
      "learning_rate": 3.3740766853580913e-06,
      "loss": 0.0122,
      "step": 2031140
    },
    {
      "epoch": 3.324037888755785,
      "grad_norm": 0.3144463002681732,
      "learning_rate": 3.3740107931445744e-06,
      "loss": 0.0125,
      "step": 2031160
    },
    {
      "epoch": 3.3240706191944387,
      "grad_norm": 0.4061204791069031,
      "learning_rate": 3.373944900931057e-06,
      "loss": 0.0103,
      "step": 2031180
    },
    {
      "epoch": 3.324103349633092,
      "grad_norm": 0.3450762927532196,
      "learning_rate": 3.37387900871754e-06,
      "loss": 0.0159,
      "step": 2031200
    },
    {
      "epoch": 3.324136080071745,
      "grad_norm": 3.220902681350708,
      "learning_rate": 3.3738131165040226e-06,
      "loss": 0.01,
      "step": 2031220
    },
    {
      "epoch": 3.3241688105103986,
      "grad_norm": 0.19113922119140625,
      "learning_rate": 3.373747224290506e-06,
      "loss": 0.0088,
      "step": 2031240
    },
    {
      "epoch": 3.3242015409490517,
      "grad_norm": 0.8256711363792419,
      "learning_rate": 3.373681332076989e-06,
      "loss": 0.0098,
      "step": 2031260
    },
    {
      "epoch": 3.3242342713877053,
      "grad_norm": 0.13288232684135437,
      "learning_rate": 3.3736154398634717e-06,
      "loss": 0.0127,
      "step": 2031280
    },
    {
      "epoch": 3.3242670018263585,
      "grad_norm": 0.22746871411800385,
      "learning_rate": 3.373549547649955e-06,
      "loss": 0.0071,
      "step": 2031300
    },
    {
      "epoch": 3.3242997322650116,
      "grad_norm": 0.4859601855278015,
      "learning_rate": 3.3734836554364376e-06,
      "loss": 0.0101,
      "step": 2031320
    },
    {
      "epoch": 3.3243324627036652,
      "grad_norm": 0.8159964680671692,
      "learning_rate": 3.3734177632229204e-06,
      "loss": 0.0126,
      "step": 2031340
    },
    {
      "epoch": 3.3243651931423184,
      "grad_norm": 0.1332848072052002,
      "learning_rate": 3.373351871009403e-06,
      "loss": 0.011,
      "step": 2031360
    },
    {
      "epoch": 3.324397923580972,
      "grad_norm": 0.30422693490982056,
      "learning_rate": 3.373285978795886e-06,
      "loss": 0.012,
      "step": 2031380
    },
    {
      "epoch": 3.324430654019625,
      "grad_norm": 0.36936575174331665,
      "learning_rate": 3.373220086582369e-06,
      "loss": 0.0142,
      "step": 2031400
    },
    {
      "epoch": 3.3244633844582783,
      "grad_norm": 0.544597327709198,
      "learning_rate": 3.3731541943688517e-06,
      "loss": 0.0084,
      "step": 2031420
    },
    {
      "epoch": 3.324496114896932,
      "grad_norm": 0.5127602219581604,
      "learning_rate": 3.3730883021553345e-06,
      "loss": 0.0139,
      "step": 2031440
    },
    {
      "epoch": 3.324528845335585,
      "grad_norm": 0.1531720906496048,
      "learning_rate": 3.3730224099418172e-06,
      "loss": 0.0102,
      "step": 2031460
    },
    {
      "epoch": 3.3245615757742386,
      "grad_norm": 0.15895308554172516,
      "learning_rate": 3.3729565177283004e-06,
      "loss": 0.0122,
      "step": 2031480
    },
    {
      "epoch": 3.3245943062128918,
      "grad_norm": 0.3809400200843811,
      "learning_rate": 3.372890625514783e-06,
      "loss": 0.0145,
      "step": 2031500
    },
    {
      "epoch": 3.3246270366515454,
      "grad_norm": 0.511979341506958,
      "learning_rate": 3.372824733301266e-06,
      "loss": 0.0087,
      "step": 2031520
    },
    {
      "epoch": 3.3246597670901985,
      "grad_norm": 0.21019718050956726,
      "learning_rate": 3.3727588410877486e-06,
      "loss": 0.0142,
      "step": 2031540
    },
    {
      "epoch": 3.3246924975288517,
      "grad_norm": 0.24749484658241272,
      "learning_rate": 3.3726929488742318e-06,
      "loss": 0.0142,
      "step": 2031560
    },
    {
      "epoch": 3.3247252279675052,
      "grad_norm": 0.14733891189098358,
      "learning_rate": 3.3726270566607145e-06,
      "loss": 0.0114,
      "step": 2031580
    },
    {
      "epoch": 3.3247579584061584,
      "grad_norm": 0.11859400570392609,
      "learning_rate": 3.3725611644471972e-06,
      "loss": 0.0126,
      "step": 2031600
    },
    {
      "epoch": 3.324790688844812,
      "grad_norm": 0.17641881108283997,
      "learning_rate": 3.372495272233681e-06,
      "loss": 0.0066,
      "step": 2031620
    },
    {
      "epoch": 3.324823419283465,
      "grad_norm": 0.262580543756485,
      "learning_rate": 3.3724293800201636e-06,
      "loss": 0.0133,
      "step": 2031640
    },
    {
      "epoch": 3.3248561497221187,
      "grad_norm": 0.13025164604187012,
      "learning_rate": 3.3723634878066463e-06,
      "loss": 0.008,
      "step": 2031660
    },
    {
      "epoch": 3.324888880160772,
      "grad_norm": 0.33422183990478516,
      "learning_rate": 3.372297595593129e-06,
      "loss": 0.01,
      "step": 2031680
    },
    {
      "epoch": 3.324921610599425,
      "grad_norm": 0.11857565492391586,
      "learning_rate": 3.3722317033796122e-06,
      "loss": 0.0117,
      "step": 2031700
    },
    {
      "epoch": 3.3249543410380786,
      "grad_norm": 0.6201618313789368,
      "learning_rate": 3.372165811166095e-06,
      "loss": 0.0083,
      "step": 2031720
    },
    {
      "epoch": 3.324987071476732,
      "grad_norm": 0.3227733075618744,
      "learning_rate": 3.3720999189525777e-06,
      "loss": 0.0092,
      "step": 2031740
    },
    {
      "epoch": 3.3250198019153854,
      "grad_norm": 0.29714128375053406,
      "learning_rate": 3.3720340267390604e-06,
      "loss": 0.0187,
      "step": 2031760
    },
    {
      "epoch": 3.3250525323540385,
      "grad_norm": 0.36329227685928345,
      "learning_rate": 3.3719681345255436e-06,
      "loss": 0.0213,
      "step": 2031780
    },
    {
      "epoch": 3.325085262792692,
      "grad_norm": 0.3915832042694092,
      "learning_rate": 3.3719022423120263e-06,
      "loss": 0.0088,
      "step": 2031800
    },
    {
      "epoch": 3.3251179932313453,
      "grad_norm": 0.3714838922023773,
      "learning_rate": 3.371836350098509e-06,
      "loss": 0.0089,
      "step": 2031820
    },
    {
      "epoch": 3.3251507236699984,
      "grad_norm": 0.43008360266685486,
      "learning_rate": 3.371770457884992e-06,
      "loss": 0.0115,
      "step": 2031840
    },
    {
      "epoch": 3.325183454108652,
      "grad_norm": 0.16950711607933044,
      "learning_rate": 3.3717045656714746e-06,
      "loss": 0.0104,
      "step": 2031860
    },
    {
      "epoch": 3.325216184547305,
      "grad_norm": 0.11673349142074585,
      "learning_rate": 3.3716386734579577e-06,
      "loss": 0.0101,
      "step": 2031880
    },
    {
      "epoch": 3.3252489149859588,
      "grad_norm": 1.0865147113800049,
      "learning_rate": 3.3715727812444405e-06,
      "loss": 0.0116,
      "step": 2031900
    },
    {
      "epoch": 3.325281645424612,
      "grad_norm": 0.17187918722629547,
      "learning_rate": 3.371506889030923e-06,
      "loss": 0.0114,
      "step": 2031920
    },
    {
      "epoch": 3.3253143758632655,
      "grad_norm": 0.21304982900619507,
      "learning_rate": 3.371440996817406e-06,
      "loss": 0.013,
      "step": 2031940
    },
    {
      "epoch": 3.3253471063019187,
      "grad_norm": 0.3932850956916809,
      "learning_rate": 3.3713751046038895e-06,
      "loss": 0.0086,
      "step": 2031960
    },
    {
      "epoch": 3.325379836740572,
      "grad_norm": 0.28551608324050903,
      "learning_rate": 3.3713092123903723e-06,
      "loss": 0.0129,
      "step": 2031980
    },
    {
      "epoch": 3.3254125671792254,
      "grad_norm": 0.21382419764995575,
      "learning_rate": 3.371243320176855e-06,
      "loss": 0.0098,
      "step": 2032000
    },
    {
      "epoch": 3.3254452976178785,
      "grad_norm": 0.06357311457395554,
      "learning_rate": 3.371177427963338e-06,
      "loss": 0.0101,
      "step": 2032020
    },
    {
      "epoch": 3.325478028056532,
      "grad_norm": 0.2771078050136566,
      "learning_rate": 3.371111535749821e-06,
      "loss": 0.012,
      "step": 2032040
    },
    {
      "epoch": 3.3255107584951853,
      "grad_norm": 0.28189516067504883,
      "learning_rate": 3.3710456435363036e-06,
      "loss": 0.0152,
      "step": 2032060
    },
    {
      "epoch": 3.325543488933839,
      "grad_norm": 0.14459088444709778,
      "learning_rate": 3.3709797513227864e-06,
      "loss": 0.0136,
      "step": 2032080
    },
    {
      "epoch": 3.325576219372492,
      "grad_norm": 0.37331438064575195,
      "learning_rate": 3.3709138591092695e-06,
      "loss": 0.0127,
      "step": 2032100
    },
    {
      "epoch": 3.325608949811145,
      "grad_norm": 0.062180593609809875,
      "learning_rate": 3.3708479668957523e-06,
      "loss": 0.0141,
      "step": 2032120
    },
    {
      "epoch": 3.325641680249799,
      "grad_norm": 0.16337519884109497,
      "learning_rate": 3.370782074682235e-06,
      "loss": 0.0086,
      "step": 2032140
    },
    {
      "epoch": 3.325674410688452,
      "grad_norm": 0.2588225305080414,
      "learning_rate": 3.3707161824687178e-06,
      "loss": 0.0135,
      "step": 2032160
    },
    {
      "epoch": 3.3257071411271055,
      "grad_norm": 0.38930743932724,
      "learning_rate": 3.370650290255201e-06,
      "loss": 0.0115,
      "step": 2032180
    },
    {
      "epoch": 3.3257398715657587,
      "grad_norm": 0.24712297320365906,
      "learning_rate": 3.3705843980416837e-06,
      "loss": 0.0067,
      "step": 2032200
    },
    {
      "epoch": 3.3257726020044123,
      "grad_norm": 0.20823581516742706,
      "learning_rate": 3.3705185058281664e-06,
      "loss": 0.0103,
      "step": 2032220
    },
    {
      "epoch": 3.3258053324430654,
      "grad_norm": 0.06311666965484619,
      "learning_rate": 3.370452613614649e-06,
      "loss": 0.0089,
      "step": 2032240
    },
    {
      "epoch": 3.3258380628817186,
      "grad_norm": 0.48280400037765503,
      "learning_rate": 3.370386721401132e-06,
      "loss": 0.0158,
      "step": 2032260
    },
    {
      "epoch": 3.325870793320372,
      "grad_norm": 0.17525705695152283,
      "learning_rate": 3.370320829187615e-06,
      "loss": 0.0121,
      "step": 2032280
    },
    {
      "epoch": 3.3259035237590253,
      "grad_norm": 0.12620894610881805,
      "learning_rate": 3.370254936974098e-06,
      "loss": 0.0127,
      "step": 2032300
    },
    {
      "epoch": 3.325936254197679,
      "grad_norm": 0.11040689796209335,
      "learning_rate": 3.3701890447605814e-06,
      "loss": 0.0093,
      "step": 2032320
    },
    {
      "epoch": 3.325968984636332,
      "grad_norm": 0.16221028566360474,
      "learning_rate": 3.370123152547064e-06,
      "loss": 0.0107,
      "step": 2032340
    },
    {
      "epoch": 3.3260017150749857,
      "grad_norm": 0.38514018058776855,
      "learning_rate": 3.370057260333547e-06,
      "loss": 0.0084,
      "step": 2032360
    },
    {
      "epoch": 3.326034445513639,
      "grad_norm": 0.09973343461751938,
      "learning_rate": 3.3699913681200296e-06,
      "loss": 0.0095,
      "step": 2032380
    },
    {
      "epoch": 3.326067175952292,
      "grad_norm": 0.20742247998714447,
      "learning_rate": 3.3699254759065123e-06,
      "loss": 0.0151,
      "step": 2032400
    },
    {
      "epoch": 3.3260999063909455,
      "grad_norm": 0.2646805942058563,
      "learning_rate": 3.3698595836929955e-06,
      "loss": 0.0161,
      "step": 2032420
    },
    {
      "epoch": 3.3261326368295987,
      "grad_norm": 0.30085012316703796,
      "learning_rate": 3.3697936914794782e-06,
      "loss": 0.0119,
      "step": 2032440
    },
    {
      "epoch": 3.3261653672682523,
      "grad_norm": 0.17900069057941437,
      "learning_rate": 3.369727799265961e-06,
      "loss": 0.0105,
      "step": 2032460
    },
    {
      "epoch": 3.3261980977069054,
      "grad_norm": 0.20791935920715332,
      "learning_rate": 3.3696619070524437e-06,
      "loss": 0.0148,
      "step": 2032480
    },
    {
      "epoch": 3.326230828145559,
      "grad_norm": 0.19659096002578735,
      "learning_rate": 3.369596014838927e-06,
      "loss": 0.0117,
      "step": 2032500
    },
    {
      "epoch": 3.326263558584212,
      "grad_norm": 0.3124901056289673,
      "learning_rate": 3.3695301226254096e-06,
      "loss": 0.0135,
      "step": 2032520
    },
    {
      "epoch": 3.3262962890228653,
      "grad_norm": 0.0724988579750061,
      "learning_rate": 3.3694642304118924e-06,
      "loss": 0.0135,
      "step": 2032540
    },
    {
      "epoch": 3.326329019461519,
      "grad_norm": 0.36908838152885437,
      "learning_rate": 3.369398338198375e-06,
      "loss": 0.0123,
      "step": 2032560
    },
    {
      "epoch": 3.326361749900172,
      "grad_norm": 0.2607043385505676,
      "learning_rate": 3.3693324459848583e-06,
      "loss": 0.0077,
      "step": 2032580
    },
    {
      "epoch": 3.3263944803388257,
      "grad_norm": 0.6014212965965271,
      "learning_rate": 3.369266553771341e-06,
      "loss": 0.0107,
      "step": 2032600
    },
    {
      "epoch": 3.326427210777479,
      "grad_norm": 0.2191968411207199,
      "learning_rate": 3.3692006615578237e-06,
      "loss": 0.0112,
      "step": 2032620
    },
    {
      "epoch": 3.3264599412161324,
      "grad_norm": 0.12499629706144333,
      "learning_rate": 3.3691347693443065e-06,
      "loss": 0.0152,
      "step": 2032640
    },
    {
      "epoch": 3.3264926716547856,
      "grad_norm": 0.14969708025455475,
      "learning_rate": 3.36906887713079e-06,
      "loss": 0.0133,
      "step": 2032660
    },
    {
      "epoch": 3.3265254020934387,
      "grad_norm": 0.11500012129545212,
      "learning_rate": 3.369002984917273e-06,
      "loss": 0.0077,
      "step": 2032680
    },
    {
      "epoch": 3.3265581325320923,
      "grad_norm": 0.1118687316775322,
      "learning_rate": 3.3689370927037556e-06,
      "loss": 0.007,
      "step": 2032700
    },
    {
      "epoch": 3.3265908629707455,
      "grad_norm": 0.2741575539112091,
      "learning_rate": 3.3688712004902387e-06,
      "loss": 0.0102,
      "step": 2032720
    },
    {
      "epoch": 3.326623593409399,
      "grad_norm": 0.5258426666259766,
      "learning_rate": 3.3688053082767215e-06,
      "loss": 0.0101,
      "step": 2032740
    },
    {
      "epoch": 3.326656323848052,
      "grad_norm": 0.25000718235969543,
      "learning_rate": 3.368739416063204e-06,
      "loss": 0.0068,
      "step": 2032760
    },
    {
      "epoch": 3.3266890542867054,
      "grad_norm": 0.41742339730262756,
      "learning_rate": 3.368673523849687e-06,
      "loss": 0.0114,
      "step": 2032780
    },
    {
      "epoch": 3.326721784725359,
      "grad_norm": 0.1078035831451416,
      "learning_rate": 3.3686076316361697e-06,
      "loss": 0.0125,
      "step": 2032800
    },
    {
      "epoch": 3.326754515164012,
      "grad_norm": 0.07892020046710968,
      "learning_rate": 3.368541739422653e-06,
      "loss": 0.0103,
      "step": 2032820
    },
    {
      "epoch": 3.3267872456026657,
      "grad_norm": 0.6196463108062744,
      "learning_rate": 3.3684758472091356e-06,
      "loss": 0.0142,
      "step": 2032840
    },
    {
      "epoch": 3.326819976041319,
      "grad_norm": 0.6522379517555237,
      "learning_rate": 3.3684099549956183e-06,
      "loss": 0.0137,
      "step": 2032860
    },
    {
      "epoch": 3.326852706479972,
      "grad_norm": 0.8094233870506287,
      "learning_rate": 3.368344062782101e-06,
      "loss": 0.0231,
      "step": 2032880
    },
    {
      "epoch": 3.3268854369186256,
      "grad_norm": 0.17858842015266418,
      "learning_rate": 3.3682781705685842e-06,
      "loss": 0.0127,
      "step": 2032900
    },
    {
      "epoch": 3.3269181673572787,
      "grad_norm": 0.3985660970211029,
      "learning_rate": 3.368212278355067e-06,
      "loss": 0.0109,
      "step": 2032920
    },
    {
      "epoch": 3.3269508977959323,
      "grad_norm": 0.7645916938781738,
      "learning_rate": 3.3681463861415497e-06,
      "loss": 0.0089,
      "step": 2032940
    },
    {
      "epoch": 3.3269836282345855,
      "grad_norm": 0.17686817049980164,
      "learning_rate": 3.3680804939280324e-06,
      "loss": 0.0097,
      "step": 2032960
    },
    {
      "epoch": 3.327016358673239,
      "grad_norm": 0.28579404950141907,
      "learning_rate": 3.3680146017145156e-06,
      "loss": 0.0077,
      "step": 2032980
    },
    {
      "epoch": 3.3270490891118922,
      "grad_norm": 0.3984604775905609,
      "learning_rate": 3.3679487095009983e-06,
      "loss": 0.0113,
      "step": 2033000
    },
    {
      "epoch": 3.3270818195505454,
      "grad_norm": 0.7890024185180664,
      "learning_rate": 3.3678828172874815e-06,
      "loss": 0.016,
      "step": 2033020
    },
    {
      "epoch": 3.327114549989199,
      "grad_norm": 0.4071510434150696,
      "learning_rate": 3.3678169250739647e-06,
      "loss": 0.0119,
      "step": 2033040
    },
    {
      "epoch": 3.327147280427852,
      "grad_norm": 0.1551738679409027,
      "learning_rate": 3.3677510328604474e-06,
      "loss": 0.0116,
      "step": 2033060
    },
    {
      "epoch": 3.3271800108665057,
      "grad_norm": 0.232910618185997,
      "learning_rate": 3.36768514064693e-06,
      "loss": 0.0088,
      "step": 2033080
    },
    {
      "epoch": 3.327212741305159,
      "grad_norm": 0.3271465599536896,
      "learning_rate": 3.367619248433413e-06,
      "loss": 0.0145,
      "step": 2033100
    },
    {
      "epoch": 3.3272454717438125,
      "grad_norm": 0.6141602396965027,
      "learning_rate": 3.367553356219896e-06,
      "loss": 0.0095,
      "step": 2033120
    },
    {
      "epoch": 3.3272782021824656,
      "grad_norm": 0.2737451195716858,
      "learning_rate": 3.367487464006379e-06,
      "loss": 0.0121,
      "step": 2033140
    },
    {
      "epoch": 3.3273109326211188,
      "grad_norm": 0.26211926341056824,
      "learning_rate": 3.3674215717928615e-06,
      "loss": 0.0109,
      "step": 2033160
    },
    {
      "epoch": 3.3273436630597724,
      "grad_norm": 0.4869113564491272,
      "learning_rate": 3.3673556795793443e-06,
      "loss": 0.0108,
      "step": 2033180
    },
    {
      "epoch": 3.3273763934984255,
      "grad_norm": 0.19795773923397064,
      "learning_rate": 3.3672897873658274e-06,
      "loss": 0.0119,
      "step": 2033200
    },
    {
      "epoch": 3.327409123937079,
      "grad_norm": 0.16606998443603516,
      "learning_rate": 3.36722389515231e-06,
      "loss": 0.0153,
      "step": 2033220
    },
    {
      "epoch": 3.3274418543757323,
      "grad_norm": 0.44267863035202026,
      "learning_rate": 3.367158002938793e-06,
      "loss": 0.013,
      "step": 2033240
    },
    {
      "epoch": 3.327474584814386,
      "grad_norm": 0.33598166704177856,
      "learning_rate": 3.3670921107252757e-06,
      "loss": 0.0103,
      "step": 2033260
    },
    {
      "epoch": 3.327507315253039,
      "grad_norm": 0.47618982195854187,
      "learning_rate": 3.3670262185117584e-06,
      "loss": 0.0121,
      "step": 2033280
    },
    {
      "epoch": 3.327540045691692,
      "grad_norm": 0.3390151560306549,
      "learning_rate": 3.3669603262982416e-06,
      "loss": 0.0146,
      "step": 2033300
    },
    {
      "epoch": 3.3275727761303457,
      "grad_norm": 0.7317010760307312,
      "learning_rate": 3.3668944340847243e-06,
      "loss": 0.0113,
      "step": 2033320
    },
    {
      "epoch": 3.327605506568999,
      "grad_norm": 0.1281619518995285,
      "learning_rate": 3.366828541871207e-06,
      "loss": 0.0133,
      "step": 2033340
    },
    {
      "epoch": 3.3276382370076525,
      "grad_norm": 0.6433228850364685,
      "learning_rate": 3.3667626496576898e-06,
      "loss": 0.0109,
      "step": 2033360
    },
    {
      "epoch": 3.3276709674463056,
      "grad_norm": 0.17311890423297882,
      "learning_rate": 3.3666967574441734e-06,
      "loss": 0.0202,
      "step": 2033380
    },
    {
      "epoch": 3.3277036978849592,
      "grad_norm": 0.221930131316185,
      "learning_rate": 3.366630865230656e-06,
      "loss": 0.011,
      "step": 2033400
    },
    {
      "epoch": 3.3277364283236124,
      "grad_norm": 0.09383761137723923,
      "learning_rate": 3.366564973017139e-06,
      "loss": 0.0094,
      "step": 2033420
    },
    {
      "epoch": 3.3277691587622655,
      "grad_norm": 0.12333110719919205,
      "learning_rate": 3.366499080803622e-06,
      "loss": 0.0082,
      "step": 2033440
    },
    {
      "epoch": 3.327801889200919,
      "grad_norm": 0.4143989086151123,
      "learning_rate": 3.3664331885901047e-06,
      "loss": 0.006,
      "step": 2033460
    },
    {
      "epoch": 3.3278346196395723,
      "grad_norm": 0.13365358114242554,
      "learning_rate": 3.3663672963765875e-06,
      "loss": 0.01,
      "step": 2033480
    },
    {
      "epoch": 3.327867350078226,
      "grad_norm": 0.24948915839195251,
      "learning_rate": 3.3663014041630702e-06,
      "loss": 0.0168,
      "step": 2033500
    },
    {
      "epoch": 3.327900080516879,
      "grad_norm": 0.32975277304649353,
      "learning_rate": 3.3662355119495534e-06,
      "loss": 0.0102,
      "step": 2033520
    },
    {
      "epoch": 3.3279328109555326,
      "grad_norm": 0.6523396372795105,
      "learning_rate": 3.366169619736036e-06,
      "loss": 0.0135,
      "step": 2033540
    },
    {
      "epoch": 3.3279655413941858,
      "grad_norm": 0.46994447708129883,
      "learning_rate": 3.366103727522519e-06,
      "loss": 0.0098,
      "step": 2033560
    },
    {
      "epoch": 3.327998271832839,
      "grad_norm": 0.08000432699918747,
      "learning_rate": 3.3660378353090016e-06,
      "loss": 0.0139,
      "step": 2033580
    },
    {
      "epoch": 3.3280310022714925,
      "grad_norm": 0.32247084379196167,
      "learning_rate": 3.3659719430954848e-06,
      "loss": 0.0108,
      "step": 2033600
    },
    {
      "epoch": 3.3280637327101457,
      "grad_norm": 0.18000756204128265,
      "learning_rate": 3.3659060508819675e-06,
      "loss": 0.0088,
      "step": 2033620
    },
    {
      "epoch": 3.3280964631487993,
      "grad_norm": 0.5065222382545471,
      "learning_rate": 3.3658401586684503e-06,
      "loss": 0.0138,
      "step": 2033640
    },
    {
      "epoch": 3.3281291935874524,
      "grad_norm": 0.10399101674556732,
      "learning_rate": 3.365774266454933e-06,
      "loss": 0.0089,
      "step": 2033660
    },
    {
      "epoch": 3.328161924026106,
      "grad_norm": 0.154671773314476,
      "learning_rate": 3.365708374241416e-06,
      "loss": 0.0117,
      "step": 2033680
    },
    {
      "epoch": 3.328194654464759,
      "grad_norm": 0.16395476460456848,
      "learning_rate": 3.365642482027899e-06,
      "loss": 0.0133,
      "step": 2033700
    },
    {
      "epoch": 3.3282273849034123,
      "grad_norm": 0.19211266934871674,
      "learning_rate": 3.365576589814382e-06,
      "loss": 0.0144,
      "step": 2033720
    },
    {
      "epoch": 3.328260115342066,
      "grad_norm": 0.2532685399055481,
      "learning_rate": 3.3655106976008652e-06,
      "loss": 0.0115,
      "step": 2033740
    },
    {
      "epoch": 3.328292845780719,
      "grad_norm": 0.2695252597332001,
      "learning_rate": 3.365444805387348e-06,
      "loss": 0.0112,
      "step": 2033760
    },
    {
      "epoch": 3.3283255762193726,
      "grad_norm": 0.21277278661727905,
      "learning_rate": 3.3653789131738307e-06,
      "loss": 0.0094,
      "step": 2033780
    },
    {
      "epoch": 3.328358306658026,
      "grad_norm": 0.38299596309661865,
      "learning_rate": 3.3653130209603134e-06,
      "loss": 0.0115,
      "step": 2033800
    },
    {
      "epoch": 3.3283910370966794,
      "grad_norm": 0.3870764672756195,
      "learning_rate": 3.365247128746796e-06,
      "loss": 0.0158,
      "step": 2033820
    },
    {
      "epoch": 3.3284237675353325,
      "grad_norm": 0.3699714243412018,
      "learning_rate": 3.3651812365332793e-06,
      "loss": 0.0099,
      "step": 2033840
    },
    {
      "epoch": 3.3284564979739857,
      "grad_norm": 0.07171238958835602,
      "learning_rate": 3.365115344319762e-06,
      "loss": 0.0145,
      "step": 2033860
    },
    {
      "epoch": 3.3284892284126393,
      "grad_norm": 0.508698046207428,
      "learning_rate": 3.365049452106245e-06,
      "loss": 0.0086,
      "step": 2033880
    },
    {
      "epoch": 3.3285219588512924,
      "grad_norm": 0.35463374853134155,
      "learning_rate": 3.3649835598927276e-06,
      "loss": 0.0127,
      "step": 2033900
    },
    {
      "epoch": 3.328554689289946,
      "grad_norm": 0.11382772773504257,
      "learning_rate": 3.3649176676792107e-06,
      "loss": 0.011,
      "step": 2033920
    },
    {
      "epoch": 3.328587419728599,
      "grad_norm": 0.2369292974472046,
      "learning_rate": 3.3648517754656935e-06,
      "loss": 0.0088,
      "step": 2033940
    },
    {
      "epoch": 3.3286201501672528,
      "grad_norm": 0.5884365439414978,
      "learning_rate": 3.364785883252176e-06,
      "loss": 0.0161,
      "step": 2033960
    },
    {
      "epoch": 3.328652880605906,
      "grad_norm": 0.2759331464767456,
      "learning_rate": 3.364719991038659e-06,
      "loss": 0.0152,
      "step": 2033980
    },
    {
      "epoch": 3.328685611044559,
      "grad_norm": 0.12207692116498947,
      "learning_rate": 3.364654098825142e-06,
      "loss": 0.0097,
      "step": 2034000
    },
    {
      "epoch": 3.3287183414832127,
      "grad_norm": 0.29959675669670105,
      "learning_rate": 3.364588206611625e-06,
      "loss": 0.0095,
      "step": 2034020
    },
    {
      "epoch": 3.328751071921866,
      "grad_norm": 0.32428136467933655,
      "learning_rate": 3.3645223143981076e-06,
      "loss": 0.0105,
      "step": 2034040
    },
    {
      "epoch": 3.3287838023605194,
      "grad_norm": 0.37039467692375183,
      "learning_rate": 3.3644564221845903e-06,
      "loss": 0.0123,
      "step": 2034060
    },
    {
      "epoch": 3.3288165327991726,
      "grad_norm": 0.38045555353164673,
      "learning_rate": 3.364390529971074e-06,
      "loss": 0.0116,
      "step": 2034080
    },
    {
      "epoch": 3.328849263237826,
      "grad_norm": 0.3213362693786621,
      "learning_rate": 3.3643246377575567e-06,
      "loss": 0.0129,
      "step": 2034100
    },
    {
      "epoch": 3.3288819936764793,
      "grad_norm": 0.1603558361530304,
      "learning_rate": 3.3642587455440394e-06,
      "loss": 0.0121,
      "step": 2034120
    },
    {
      "epoch": 3.3289147241151325,
      "grad_norm": 0.42647889256477356,
      "learning_rate": 3.3641928533305226e-06,
      "loss": 0.016,
      "step": 2034140
    },
    {
      "epoch": 3.328947454553786,
      "grad_norm": 0.6845114231109619,
      "learning_rate": 3.3641269611170053e-06,
      "loss": 0.0096,
      "step": 2034160
    },
    {
      "epoch": 3.328980184992439,
      "grad_norm": 0.45186105370521545,
      "learning_rate": 3.364061068903488e-06,
      "loss": 0.0118,
      "step": 2034180
    },
    {
      "epoch": 3.329012915431093,
      "grad_norm": 0.08604057133197784,
      "learning_rate": 3.3639951766899708e-06,
      "loss": 0.0104,
      "step": 2034200
    },
    {
      "epoch": 3.329045645869746,
      "grad_norm": 0.1616714894771576,
      "learning_rate": 3.3639292844764535e-06,
      "loss": 0.0172,
      "step": 2034220
    },
    {
      "epoch": 3.3290783763083995,
      "grad_norm": 0.4984694719314575,
      "learning_rate": 3.3638633922629367e-06,
      "loss": 0.0092,
      "step": 2034240
    },
    {
      "epoch": 3.3291111067470527,
      "grad_norm": 0.15753567218780518,
      "learning_rate": 3.3637975000494194e-06,
      "loss": 0.0083,
      "step": 2034260
    },
    {
      "epoch": 3.329143837185706,
      "grad_norm": 0.4111279845237732,
      "learning_rate": 3.363731607835902e-06,
      "loss": 0.0117,
      "step": 2034280
    },
    {
      "epoch": 3.3291765676243594,
      "grad_norm": 0.36184367537498474,
      "learning_rate": 3.363665715622385e-06,
      "loss": 0.0108,
      "step": 2034300
    },
    {
      "epoch": 3.3292092980630126,
      "grad_norm": 0.4781520366668701,
      "learning_rate": 3.363599823408868e-06,
      "loss": 0.0133,
      "step": 2034320
    },
    {
      "epoch": 3.329242028501666,
      "grad_norm": 0.2737802267074585,
      "learning_rate": 3.363533931195351e-06,
      "loss": 0.0094,
      "step": 2034340
    },
    {
      "epoch": 3.3292747589403193,
      "grad_norm": 0.24696019291877747,
      "learning_rate": 3.3634680389818335e-06,
      "loss": 0.011,
      "step": 2034360
    },
    {
      "epoch": 3.3293074893789725,
      "grad_norm": 0.13561494648456573,
      "learning_rate": 3.3634021467683163e-06,
      "loss": 0.0107,
      "step": 2034380
    },
    {
      "epoch": 3.329340219817626,
      "grad_norm": 0.26269271969795227,
      "learning_rate": 3.3633362545547994e-06,
      "loss": 0.0152,
      "step": 2034400
    },
    {
      "epoch": 3.329372950256279,
      "grad_norm": 0.3353218138217926,
      "learning_rate": 3.3632703623412826e-06,
      "loss": 0.0086,
      "step": 2034420
    },
    {
      "epoch": 3.329405680694933,
      "grad_norm": 0.5573252439498901,
      "learning_rate": 3.3632044701277653e-06,
      "loss": 0.0093,
      "step": 2034440
    },
    {
      "epoch": 3.329438411133586,
      "grad_norm": 0.1140686646103859,
      "learning_rate": 3.3631385779142485e-06,
      "loss": 0.0179,
      "step": 2034460
    },
    {
      "epoch": 3.329471141572239,
      "grad_norm": 0.49443671107292175,
      "learning_rate": 3.3630726857007312e-06,
      "loss": 0.0098,
      "step": 2034480
    },
    {
      "epoch": 3.3295038720108927,
      "grad_norm": 0.17908243834972382,
      "learning_rate": 3.363006793487214e-06,
      "loss": 0.0096,
      "step": 2034500
    },
    {
      "epoch": 3.329536602449546,
      "grad_norm": 0.40481385588645935,
      "learning_rate": 3.3629409012736967e-06,
      "loss": 0.0174,
      "step": 2034520
    },
    {
      "epoch": 3.3295693328881995,
      "grad_norm": 0.12865138053894043,
      "learning_rate": 3.36287500906018e-06,
      "loss": 0.0088,
      "step": 2034540
    },
    {
      "epoch": 3.3296020633268526,
      "grad_norm": 0.7636608481407166,
      "learning_rate": 3.3628091168466626e-06,
      "loss": 0.0133,
      "step": 2034560
    },
    {
      "epoch": 3.329634793765506,
      "grad_norm": 0.6788666844367981,
      "learning_rate": 3.3627432246331454e-06,
      "loss": 0.0118,
      "step": 2034580
    },
    {
      "epoch": 3.3296675242041593,
      "grad_norm": 0.1009206548333168,
      "learning_rate": 3.362677332419628e-06,
      "loss": 0.0113,
      "step": 2034600
    },
    {
      "epoch": 3.3297002546428125,
      "grad_norm": 0.4127155840396881,
      "learning_rate": 3.3626114402061113e-06,
      "loss": 0.0146,
      "step": 2034620
    },
    {
      "epoch": 3.329732985081466,
      "grad_norm": 0.6751214265823364,
      "learning_rate": 3.362545547992594e-06,
      "loss": 0.0095,
      "step": 2034640
    },
    {
      "epoch": 3.3297657155201192,
      "grad_norm": 0.1910475790500641,
      "learning_rate": 3.3624796557790768e-06,
      "loss": 0.0082,
      "step": 2034660
    },
    {
      "epoch": 3.329798445958773,
      "grad_norm": 0.12690649926662445,
      "learning_rate": 3.3624137635655595e-06,
      "loss": 0.0118,
      "step": 2034680
    },
    {
      "epoch": 3.329831176397426,
      "grad_norm": 0.2955295741558075,
      "learning_rate": 3.3623478713520422e-06,
      "loss": 0.0105,
      "step": 2034700
    },
    {
      "epoch": 3.3298639068360796,
      "grad_norm": 0.4843210279941559,
      "learning_rate": 3.3622819791385254e-06,
      "loss": 0.0126,
      "step": 2034720
    },
    {
      "epoch": 3.3298966372747327,
      "grad_norm": 0.47381046414375305,
      "learning_rate": 3.362216086925008e-06,
      "loss": 0.0149,
      "step": 2034740
    },
    {
      "epoch": 3.329929367713386,
      "grad_norm": 0.2986709773540497,
      "learning_rate": 3.362150194711491e-06,
      "loss": 0.0093,
      "step": 2034760
    },
    {
      "epoch": 3.3299620981520395,
      "grad_norm": 0.17465227842330933,
      "learning_rate": 3.3620843024979745e-06,
      "loss": 0.0094,
      "step": 2034780
    },
    {
      "epoch": 3.3299948285906926,
      "grad_norm": 0.28109225630760193,
      "learning_rate": 3.362018410284457e-06,
      "loss": 0.0121,
      "step": 2034800
    },
    {
      "epoch": 3.330027559029346,
      "grad_norm": 0.11758817732334137,
      "learning_rate": 3.36195251807094e-06,
      "loss": 0.0113,
      "step": 2034820
    },
    {
      "epoch": 3.3300602894679994,
      "grad_norm": 0.1264578253030777,
      "learning_rate": 3.3618866258574227e-06,
      "loss": 0.0099,
      "step": 2034840
    },
    {
      "epoch": 3.330093019906653,
      "grad_norm": 0.4895688593387604,
      "learning_rate": 3.361820733643906e-06,
      "loss": 0.0099,
      "step": 2034860
    },
    {
      "epoch": 3.330125750345306,
      "grad_norm": 0.2957148849964142,
      "learning_rate": 3.3617548414303886e-06,
      "loss": 0.0094,
      "step": 2034880
    },
    {
      "epoch": 3.3301584807839593,
      "grad_norm": 0.2683538496494293,
      "learning_rate": 3.3616889492168713e-06,
      "loss": 0.0118,
      "step": 2034900
    },
    {
      "epoch": 3.330191211222613,
      "grad_norm": 0.20180293917655945,
      "learning_rate": 3.361623057003354e-06,
      "loss": 0.0073,
      "step": 2034920
    },
    {
      "epoch": 3.330223941661266,
      "grad_norm": 0.14320586621761322,
      "learning_rate": 3.3615571647898372e-06,
      "loss": 0.0127,
      "step": 2034940
    },
    {
      "epoch": 3.3302566720999196,
      "grad_norm": 0.4184924364089966,
      "learning_rate": 3.36149127257632e-06,
      "loss": 0.0085,
      "step": 2034960
    },
    {
      "epoch": 3.3302894025385728,
      "grad_norm": 0.12294797599315643,
      "learning_rate": 3.3614253803628027e-06,
      "loss": 0.0105,
      "step": 2034980
    },
    {
      "epoch": 3.3303221329772263,
      "grad_norm": 1.0205481052398682,
      "learning_rate": 3.3613594881492854e-06,
      "loss": 0.0141,
      "step": 2035000
    },
    {
      "epoch": 3.3303548634158795,
      "grad_norm": 0.1639609932899475,
      "learning_rate": 3.3612935959357686e-06,
      "loss": 0.0095,
      "step": 2035020
    },
    {
      "epoch": 3.3303875938545326,
      "grad_norm": 0.24224981665611267,
      "learning_rate": 3.3612277037222514e-06,
      "loss": 0.0089,
      "step": 2035040
    },
    {
      "epoch": 3.3304203242931862,
      "grad_norm": 0.1877169907093048,
      "learning_rate": 3.361161811508734e-06,
      "loss": 0.0071,
      "step": 2035060
    },
    {
      "epoch": 3.3304530547318394,
      "grad_norm": 0.3447614014148712,
      "learning_rate": 3.361095919295217e-06,
      "loss": 0.0102,
      "step": 2035080
    },
    {
      "epoch": 3.330485785170493,
      "grad_norm": 1.0084397792816162,
      "learning_rate": 3.3610300270817e-06,
      "loss": 0.0131,
      "step": 2035100
    },
    {
      "epoch": 3.330518515609146,
      "grad_norm": 0.17877130210399628,
      "learning_rate": 3.3609641348681827e-06,
      "loss": 0.0058,
      "step": 2035120
    },
    {
      "epoch": 3.3305512460477997,
      "grad_norm": 0.8495738506317139,
      "learning_rate": 3.360898242654666e-06,
      "loss": 0.0134,
      "step": 2035140
    },
    {
      "epoch": 3.330583976486453,
      "grad_norm": 0.32827138900756836,
      "learning_rate": 3.360832350441149e-06,
      "loss": 0.0166,
      "step": 2035160
    },
    {
      "epoch": 3.330616706925106,
      "grad_norm": 0.5598879456520081,
      "learning_rate": 3.360766458227632e-06,
      "loss": 0.0124,
      "step": 2035180
    },
    {
      "epoch": 3.3306494373637596,
      "grad_norm": 0.24255067110061646,
      "learning_rate": 3.3607005660141145e-06,
      "loss": 0.012,
      "step": 2035200
    },
    {
      "epoch": 3.3306821678024128,
      "grad_norm": 0.17242377996444702,
      "learning_rate": 3.3606346738005973e-06,
      "loss": 0.0083,
      "step": 2035220
    },
    {
      "epoch": 3.3307148982410664,
      "grad_norm": 0.28489986062049866,
      "learning_rate": 3.36056878158708e-06,
      "loss": 0.0098,
      "step": 2035240
    },
    {
      "epoch": 3.3307476286797195,
      "grad_norm": 0.26331913471221924,
      "learning_rate": 3.360502889373563e-06,
      "loss": 0.0098,
      "step": 2035260
    },
    {
      "epoch": 3.330780359118373,
      "grad_norm": 0.5823757648468018,
      "learning_rate": 3.360436997160046e-06,
      "loss": 0.0167,
      "step": 2035280
    },
    {
      "epoch": 3.3308130895570263,
      "grad_norm": 0.1263236403465271,
      "learning_rate": 3.3603711049465287e-06,
      "loss": 0.0105,
      "step": 2035300
    },
    {
      "epoch": 3.3308458199956794,
      "grad_norm": 1.3888922929763794,
      "learning_rate": 3.3603052127330114e-06,
      "loss": 0.0182,
      "step": 2035320
    },
    {
      "epoch": 3.330878550434333,
      "grad_norm": 0.13224998116493225,
      "learning_rate": 3.3602393205194946e-06,
      "loss": 0.015,
      "step": 2035340
    },
    {
      "epoch": 3.330911280872986,
      "grad_norm": 0.08472181856632233,
      "learning_rate": 3.3601734283059773e-06,
      "loss": 0.0079,
      "step": 2035360
    },
    {
      "epoch": 3.3309440113116398,
      "grad_norm": 0.2839319109916687,
      "learning_rate": 3.36010753609246e-06,
      "loss": 0.0112,
      "step": 2035380
    },
    {
      "epoch": 3.330976741750293,
      "grad_norm": 0.2415318787097931,
      "learning_rate": 3.3600416438789428e-06,
      "loss": 0.0102,
      "step": 2035400
    },
    {
      "epoch": 3.3310094721889465,
      "grad_norm": 0.10928764194250107,
      "learning_rate": 3.359975751665426e-06,
      "loss": 0.0147,
      "step": 2035420
    },
    {
      "epoch": 3.3310422026275996,
      "grad_norm": 0.21759453415870667,
      "learning_rate": 3.3599098594519087e-06,
      "loss": 0.01,
      "step": 2035440
    },
    {
      "epoch": 3.331074933066253,
      "grad_norm": 0.4652571678161621,
      "learning_rate": 3.3598439672383914e-06,
      "loss": 0.0112,
      "step": 2035460
    },
    {
      "epoch": 3.3311076635049064,
      "grad_norm": 0.1166454628109932,
      "learning_rate": 3.359778075024875e-06,
      "loss": 0.0118,
      "step": 2035480
    },
    {
      "epoch": 3.3311403939435595,
      "grad_norm": 0.7795594334602356,
      "learning_rate": 3.3597121828113578e-06,
      "loss": 0.0125,
      "step": 2035500
    },
    {
      "epoch": 3.331173124382213,
      "grad_norm": 0.8460920453071594,
      "learning_rate": 3.3596462905978405e-06,
      "loss": 0.0145,
      "step": 2035520
    },
    {
      "epoch": 3.3312058548208663,
      "grad_norm": 0.26834264397621155,
      "learning_rate": 3.3595803983843232e-06,
      "loss": 0.0066,
      "step": 2035540
    },
    {
      "epoch": 3.33123858525952,
      "grad_norm": 0.3460906445980072,
      "learning_rate": 3.3595145061708064e-06,
      "loss": 0.0129,
      "step": 2035560
    },
    {
      "epoch": 3.331271315698173,
      "grad_norm": 0.9916012287139893,
      "learning_rate": 3.359448613957289e-06,
      "loss": 0.0102,
      "step": 2035580
    },
    {
      "epoch": 3.331304046136826,
      "grad_norm": 0.13009589910507202,
      "learning_rate": 3.359382721743772e-06,
      "loss": 0.0089,
      "step": 2035600
    },
    {
      "epoch": 3.3313367765754798,
      "grad_norm": 0.5242175459861755,
      "learning_rate": 3.3593168295302546e-06,
      "loss": 0.0134,
      "step": 2035620
    },
    {
      "epoch": 3.331369507014133,
      "grad_norm": 0.20683923363685608,
      "learning_rate": 3.3592509373167378e-06,
      "loss": 0.0158,
      "step": 2035640
    },
    {
      "epoch": 3.3314022374527865,
      "grad_norm": 0.2896883189678192,
      "learning_rate": 3.3591850451032205e-06,
      "loss": 0.0119,
      "step": 2035660
    },
    {
      "epoch": 3.3314349678914397,
      "grad_norm": 0.1879434436559677,
      "learning_rate": 3.3591191528897033e-06,
      "loss": 0.0139,
      "step": 2035680
    },
    {
      "epoch": 3.3314676983300933,
      "grad_norm": 0.38245221972465515,
      "learning_rate": 3.359053260676186e-06,
      "loss": 0.008,
      "step": 2035700
    },
    {
      "epoch": 3.3315004287687464,
      "grad_norm": 0.4240069091320038,
      "learning_rate": 3.3589873684626687e-06,
      "loss": 0.0133,
      "step": 2035720
    },
    {
      "epoch": 3.3315331592073996,
      "grad_norm": 0.1304471790790558,
      "learning_rate": 3.358921476249152e-06,
      "loss": 0.0124,
      "step": 2035740
    },
    {
      "epoch": 3.331565889646053,
      "grad_norm": 0.33766573667526245,
      "learning_rate": 3.3588555840356346e-06,
      "loss": 0.0121,
      "step": 2035760
    },
    {
      "epoch": 3.3315986200847063,
      "grad_norm": 0.119850754737854,
      "learning_rate": 3.3587896918221174e-06,
      "loss": 0.0084,
      "step": 2035780
    },
    {
      "epoch": 3.33163135052336,
      "grad_norm": 0.1795412003993988,
      "learning_rate": 3.3587237996086e-06,
      "loss": 0.015,
      "step": 2035800
    },
    {
      "epoch": 3.331664080962013,
      "grad_norm": 0.3745022416114807,
      "learning_rate": 3.3586579073950833e-06,
      "loss": 0.0086,
      "step": 2035820
    },
    {
      "epoch": 3.331696811400666,
      "grad_norm": 0.19868852198123932,
      "learning_rate": 3.3585920151815664e-06,
      "loss": 0.0111,
      "step": 2035840
    },
    {
      "epoch": 3.33172954183932,
      "grad_norm": 0.1241569072008133,
      "learning_rate": 3.358526122968049e-06,
      "loss": 0.0095,
      "step": 2035860
    },
    {
      "epoch": 3.331762272277973,
      "grad_norm": 0.12058824300765991,
      "learning_rate": 3.3584602307545323e-06,
      "loss": 0.0142,
      "step": 2035880
    },
    {
      "epoch": 3.3317950027166265,
      "grad_norm": 0.17908762395381927,
      "learning_rate": 3.358394338541015e-06,
      "loss": 0.0096,
      "step": 2035900
    },
    {
      "epoch": 3.3318277331552797,
      "grad_norm": 0.2914392054080963,
      "learning_rate": 3.358328446327498e-06,
      "loss": 0.0148,
      "step": 2035920
    },
    {
      "epoch": 3.331860463593933,
      "grad_norm": 0.5672932863235474,
      "learning_rate": 3.3582625541139806e-06,
      "loss": 0.0118,
      "step": 2035940
    },
    {
      "epoch": 3.3318931940325864,
      "grad_norm": 0.22313301265239716,
      "learning_rate": 3.3581966619004637e-06,
      "loss": 0.0095,
      "step": 2035960
    },
    {
      "epoch": 3.3319259244712396,
      "grad_norm": 0.45790913701057434,
      "learning_rate": 3.3581307696869465e-06,
      "loss": 0.0135,
      "step": 2035980
    },
    {
      "epoch": 3.331958654909893,
      "grad_norm": 0.229188472032547,
      "learning_rate": 3.358064877473429e-06,
      "loss": 0.0113,
      "step": 2036000
    },
    {
      "epoch": 3.3319913853485463,
      "grad_norm": 0.24197527766227722,
      "learning_rate": 3.357998985259912e-06,
      "loss": 0.0147,
      "step": 2036020
    },
    {
      "epoch": 3.3320241157872,
      "grad_norm": 0.08495738357305527,
      "learning_rate": 3.357933093046395e-06,
      "loss": 0.0108,
      "step": 2036040
    },
    {
      "epoch": 3.332056846225853,
      "grad_norm": 0.5311556458473206,
      "learning_rate": 3.357867200832878e-06,
      "loss": 0.012,
      "step": 2036060
    },
    {
      "epoch": 3.3320895766645062,
      "grad_norm": 0.06104293093085289,
      "learning_rate": 3.3578013086193606e-06,
      "loss": 0.011,
      "step": 2036080
    },
    {
      "epoch": 3.33212230710316,
      "grad_norm": 0.13132578134536743,
      "learning_rate": 3.3577354164058433e-06,
      "loss": 0.0201,
      "step": 2036100
    },
    {
      "epoch": 3.332155037541813,
      "grad_norm": 0.2474285513162613,
      "learning_rate": 3.357669524192326e-06,
      "loss": 0.009,
      "step": 2036120
    },
    {
      "epoch": 3.3321877679804666,
      "grad_norm": 0.20860137045383453,
      "learning_rate": 3.3576036319788092e-06,
      "loss": 0.0101,
      "step": 2036140
    },
    {
      "epoch": 3.3322204984191197,
      "grad_norm": 0.14478544890880585,
      "learning_rate": 3.357537739765292e-06,
      "loss": 0.0118,
      "step": 2036160
    },
    {
      "epoch": 3.3322532288577733,
      "grad_norm": 0.29449597001075745,
      "learning_rate": 3.3574718475517756e-06,
      "loss": 0.0132,
      "step": 2036180
    },
    {
      "epoch": 3.3322859592964265,
      "grad_norm": 0.13280324637889862,
      "learning_rate": 3.3574059553382583e-06,
      "loss": 0.0085,
      "step": 2036200
    },
    {
      "epoch": 3.3323186897350796,
      "grad_norm": 0.2528684735298157,
      "learning_rate": 3.357340063124741e-06,
      "loss": 0.0091,
      "step": 2036220
    },
    {
      "epoch": 3.332351420173733,
      "grad_norm": 0.1951235830783844,
      "learning_rate": 3.3572741709112238e-06,
      "loss": 0.0107,
      "step": 2036240
    },
    {
      "epoch": 3.3323841506123864,
      "grad_norm": 0.15648441016674042,
      "learning_rate": 3.3572082786977065e-06,
      "loss": 0.009,
      "step": 2036260
    },
    {
      "epoch": 3.33241688105104,
      "grad_norm": 0.10978396981954575,
      "learning_rate": 3.3571423864841897e-06,
      "loss": 0.0129,
      "step": 2036280
    },
    {
      "epoch": 3.332449611489693,
      "grad_norm": 0.2044016569852829,
      "learning_rate": 3.3570764942706724e-06,
      "loss": 0.0095,
      "step": 2036300
    },
    {
      "epoch": 3.3324823419283467,
      "grad_norm": 0.16272035241127014,
      "learning_rate": 3.357010602057155e-06,
      "loss": 0.0099,
      "step": 2036320
    },
    {
      "epoch": 3.332515072367,
      "grad_norm": 0.2214704006910324,
      "learning_rate": 3.356944709843638e-06,
      "loss": 0.0138,
      "step": 2036340
    },
    {
      "epoch": 3.332547802805653,
      "grad_norm": 0.08318410068750381,
      "learning_rate": 3.356878817630121e-06,
      "loss": 0.0124,
      "step": 2036360
    },
    {
      "epoch": 3.3325805332443066,
      "grad_norm": 0.27932682633399963,
      "learning_rate": 3.356812925416604e-06,
      "loss": 0.009,
      "step": 2036380
    },
    {
      "epoch": 3.3326132636829597,
      "grad_norm": 0.4571536183357239,
      "learning_rate": 3.3567470332030865e-06,
      "loss": 0.0079,
      "step": 2036400
    },
    {
      "epoch": 3.3326459941216133,
      "grad_norm": 0.4685607850551605,
      "learning_rate": 3.3566811409895693e-06,
      "loss": 0.0157,
      "step": 2036420
    },
    {
      "epoch": 3.3326787245602665,
      "grad_norm": 0.36935189366340637,
      "learning_rate": 3.3566152487760525e-06,
      "loss": 0.0126,
      "step": 2036440
    },
    {
      "epoch": 3.33271145499892,
      "grad_norm": 0.23010936379432678,
      "learning_rate": 3.356549356562535e-06,
      "loss": 0.0166,
      "step": 2036460
    },
    {
      "epoch": 3.3327441854375732,
      "grad_norm": 0.2877466380596161,
      "learning_rate": 3.356483464349018e-06,
      "loss": 0.0109,
      "step": 2036480
    },
    {
      "epoch": 3.3327769158762264,
      "grad_norm": 0.1234896332025528,
      "learning_rate": 3.3564175721355007e-06,
      "loss": 0.0135,
      "step": 2036500
    },
    {
      "epoch": 3.33280964631488,
      "grad_norm": 0.7093999981880188,
      "learning_rate": 3.356351679921984e-06,
      "loss": 0.0078,
      "step": 2036520
    },
    {
      "epoch": 3.332842376753533,
      "grad_norm": 0.19205382466316223,
      "learning_rate": 3.356285787708467e-06,
      "loss": 0.0138,
      "step": 2036540
    },
    {
      "epoch": 3.3328751071921867,
      "grad_norm": 0.10842736065387726,
      "learning_rate": 3.3562198954949497e-06,
      "loss": 0.0089,
      "step": 2036560
    },
    {
      "epoch": 3.33290783763084,
      "grad_norm": 0.17861728370189667,
      "learning_rate": 3.356154003281433e-06,
      "loss": 0.0131,
      "step": 2036580
    },
    {
      "epoch": 3.3329405680694935,
      "grad_norm": 0.16051283478736877,
      "learning_rate": 3.3560881110679156e-06,
      "loss": 0.0135,
      "step": 2036600
    },
    {
      "epoch": 3.3329732985081466,
      "grad_norm": 0.14802195131778717,
      "learning_rate": 3.3560222188543984e-06,
      "loss": 0.0151,
      "step": 2036620
    },
    {
      "epoch": 3.3330060289467998,
      "grad_norm": 0.2612226605415344,
      "learning_rate": 3.355956326640881e-06,
      "loss": 0.0154,
      "step": 2036640
    },
    {
      "epoch": 3.3330387593854534,
      "grad_norm": 0.40037301182746887,
      "learning_rate": 3.355890434427364e-06,
      "loss": 0.0109,
      "step": 2036660
    },
    {
      "epoch": 3.3330714898241065,
      "grad_norm": 0.2214938849210739,
      "learning_rate": 3.355824542213847e-06,
      "loss": 0.0165,
      "step": 2036680
    },
    {
      "epoch": 3.33310422026276,
      "grad_norm": 0.13707950711250305,
      "learning_rate": 3.3557586500003298e-06,
      "loss": 0.0102,
      "step": 2036700
    },
    {
      "epoch": 3.3331369507014132,
      "grad_norm": 0.39043888449668884,
      "learning_rate": 3.3556927577868125e-06,
      "loss": 0.0092,
      "step": 2036720
    },
    {
      "epoch": 3.333169681140067,
      "grad_norm": 0.3030087649822235,
      "learning_rate": 3.3556268655732952e-06,
      "loss": 0.016,
      "step": 2036740
    },
    {
      "epoch": 3.33320241157872,
      "grad_norm": 0.36836472153663635,
      "learning_rate": 3.3555609733597784e-06,
      "loss": 0.0172,
      "step": 2036760
    },
    {
      "epoch": 3.333235142017373,
      "grad_norm": 0.3226809501647949,
      "learning_rate": 3.355495081146261e-06,
      "loss": 0.0068,
      "step": 2036780
    },
    {
      "epoch": 3.3332678724560267,
      "grad_norm": 0.19186189770698547,
      "learning_rate": 3.355429188932744e-06,
      "loss": 0.01,
      "step": 2036800
    },
    {
      "epoch": 3.33330060289468,
      "grad_norm": 0.34444496035575867,
      "learning_rate": 3.3553632967192266e-06,
      "loss": 0.0155,
      "step": 2036820
    },
    {
      "epoch": 3.3333333333333335,
      "grad_norm": 0.10069254785776138,
      "learning_rate": 3.3552974045057098e-06,
      "loss": 0.0062,
      "step": 2036840
    },
    {
      "epoch": 3.3333660637719866,
      "grad_norm": 0.2735344171524048,
      "learning_rate": 3.3552315122921925e-06,
      "loss": 0.0108,
      "step": 2036860
    },
    {
      "epoch": 3.3333987942106402,
      "grad_norm": 0.12272344529628754,
      "learning_rate": 3.3551656200786753e-06,
      "loss": 0.0067,
      "step": 2036880
    },
    {
      "epoch": 3.3334315246492934,
      "grad_norm": 0.17100700736045837,
      "learning_rate": 3.355099727865159e-06,
      "loss": 0.0102,
      "step": 2036900
    },
    {
      "epoch": 3.3334642550879465,
      "grad_norm": 0.33041831851005554,
      "learning_rate": 3.3550338356516416e-06,
      "loss": 0.0103,
      "step": 2036920
    },
    {
      "epoch": 3.3334969855266,
      "grad_norm": 0.20826640725135803,
      "learning_rate": 3.3549679434381243e-06,
      "loss": 0.0123,
      "step": 2036940
    },
    {
      "epoch": 3.3335297159652533,
      "grad_norm": 0.39790377020835876,
      "learning_rate": 3.354902051224607e-06,
      "loss": 0.0123,
      "step": 2036960
    },
    {
      "epoch": 3.333562446403907,
      "grad_norm": 0.3650955855846405,
      "learning_rate": 3.3548361590110902e-06,
      "loss": 0.0108,
      "step": 2036980
    },
    {
      "epoch": 3.33359517684256,
      "grad_norm": 0.21020399034023285,
      "learning_rate": 3.354770266797573e-06,
      "loss": 0.0126,
      "step": 2037000
    },
    {
      "epoch": 3.3336279072812136,
      "grad_norm": 0.03819790482521057,
      "learning_rate": 3.3547043745840557e-06,
      "loss": 0.0114,
      "step": 2037020
    },
    {
      "epoch": 3.3336606377198668,
      "grad_norm": 1.2210670709609985,
      "learning_rate": 3.3546384823705385e-06,
      "loss": 0.0139,
      "step": 2037040
    },
    {
      "epoch": 3.33369336815852,
      "grad_norm": 0.39605632424354553,
      "learning_rate": 3.3545725901570216e-06,
      "loss": 0.0156,
      "step": 2037060
    },
    {
      "epoch": 3.3337260985971735,
      "grad_norm": 0.0871191918849945,
      "learning_rate": 3.3545066979435044e-06,
      "loss": 0.0083,
      "step": 2037080
    },
    {
      "epoch": 3.3337588290358267,
      "grad_norm": 0.32361626625061035,
      "learning_rate": 3.354440805729987e-06,
      "loss": 0.0081,
      "step": 2037100
    },
    {
      "epoch": 3.3337915594744802,
      "grad_norm": 0.4800494313240051,
      "learning_rate": 3.35437491351647e-06,
      "loss": 0.0177,
      "step": 2037120
    },
    {
      "epoch": 3.3338242899131334,
      "grad_norm": 0.5122433304786682,
      "learning_rate": 3.3543090213029526e-06,
      "loss": 0.0139,
      "step": 2037140
    },
    {
      "epoch": 3.333857020351787,
      "grad_norm": 0.20383363962173462,
      "learning_rate": 3.3542431290894357e-06,
      "loss": 0.0111,
      "step": 2037160
    },
    {
      "epoch": 3.33388975079044,
      "grad_norm": 0.12594667077064514,
      "learning_rate": 3.3541772368759185e-06,
      "loss": 0.014,
      "step": 2037180
    },
    {
      "epoch": 3.3339224812290933,
      "grad_norm": 0.498919814825058,
      "learning_rate": 3.3541113446624012e-06,
      "loss": 0.0124,
      "step": 2037200
    },
    {
      "epoch": 3.333955211667747,
      "grad_norm": 0.14232948422431946,
      "learning_rate": 3.354045452448884e-06,
      "loss": 0.0109,
      "step": 2037220
    },
    {
      "epoch": 3.3339879421064,
      "grad_norm": 0.53959721326828,
      "learning_rate": 3.3539795602353675e-06,
      "loss": 0.0158,
      "step": 2037240
    },
    {
      "epoch": 3.3340206725450536,
      "grad_norm": 0.36163392663002014,
      "learning_rate": 3.3539136680218503e-06,
      "loss": 0.0159,
      "step": 2037260
    },
    {
      "epoch": 3.334053402983707,
      "grad_norm": 0.20272447168827057,
      "learning_rate": 3.353847775808333e-06,
      "loss": 0.0085,
      "step": 2037280
    },
    {
      "epoch": 3.3340861334223604,
      "grad_norm": 0.23236516118049622,
      "learning_rate": 3.353781883594816e-06,
      "loss": 0.012,
      "step": 2037300
    },
    {
      "epoch": 3.3341188638610135,
      "grad_norm": 0.08720976859331131,
      "learning_rate": 3.353715991381299e-06,
      "loss": 0.01,
      "step": 2037320
    },
    {
      "epoch": 3.3341515942996667,
      "grad_norm": 0.7097386717796326,
      "learning_rate": 3.3536500991677817e-06,
      "loss": 0.0096,
      "step": 2037340
    },
    {
      "epoch": 3.3341843247383203,
      "grad_norm": 0.21160566806793213,
      "learning_rate": 3.3535842069542644e-06,
      "loss": 0.0077,
      "step": 2037360
    },
    {
      "epoch": 3.3342170551769734,
      "grad_norm": 0.40032240748405457,
      "learning_rate": 3.3535183147407476e-06,
      "loss": 0.0101,
      "step": 2037380
    },
    {
      "epoch": 3.3342497856156266,
      "grad_norm": 0.4549659192562103,
      "learning_rate": 3.3534524225272303e-06,
      "loss": 0.0143,
      "step": 2037400
    },
    {
      "epoch": 3.33428251605428,
      "grad_norm": 0.5375708937644958,
      "learning_rate": 3.353386530313713e-06,
      "loss": 0.0118,
      "step": 2037420
    },
    {
      "epoch": 3.3343152464929333,
      "grad_norm": 0.18518680334091187,
      "learning_rate": 3.353320638100196e-06,
      "loss": 0.0125,
      "step": 2037440
    },
    {
      "epoch": 3.334347976931587,
      "grad_norm": 0.2065521478652954,
      "learning_rate": 3.353254745886679e-06,
      "loss": 0.0103,
      "step": 2037460
    },
    {
      "epoch": 3.33438070737024,
      "grad_norm": 0.30551061034202576,
      "learning_rate": 3.3531888536731617e-06,
      "loss": 0.0127,
      "step": 2037480
    },
    {
      "epoch": 3.3344134378088937,
      "grad_norm": 0.44918087124824524,
      "learning_rate": 3.3531229614596444e-06,
      "loss": 0.0114,
      "step": 2037500
    },
    {
      "epoch": 3.334446168247547,
      "grad_norm": 0.5050519108772278,
      "learning_rate": 3.353057069246127e-06,
      "loss": 0.012,
      "step": 2037520
    },
    {
      "epoch": 3.3344788986862,
      "grad_norm": 0.23075512051582336,
      "learning_rate": 3.35299117703261e-06,
      "loss": 0.0264,
      "step": 2037540
    },
    {
      "epoch": 3.3345116291248535,
      "grad_norm": 0.3346703350543976,
      "learning_rate": 3.352925284819093e-06,
      "loss": 0.0095,
      "step": 2037560
    },
    {
      "epoch": 3.3345443595635067,
      "grad_norm": 0.28459566831588745,
      "learning_rate": 3.352859392605576e-06,
      "loss": 0.0147,
      "step": 2037580
    },
    {
      "epoch": 3.3345770900021603,
      "grad_norm": 0.6700641512870789,
      "learning_rate": 3.3527935003920594e-06,
      "loss": 0.0086,
      "step": 2037600
    },
    {
      "epoch": 3.3346098204408134,
      "grad_norm": 0.25856804847717285,
      "learning_rate": 3.352727608178542e-06,
      "loss": 0.0086,
      "step": 2037620
    },
    {
      "epoch": 3.334642550879467,
      "grad_norm": 0.3170493543148041,
      "learning_rate": 3.352661715965025e-06,
      "loss": 0.0153,
      "step": 2037640
    },
    {
      "epoch": 3.33467528131812,
      "grad_norm": 0.32995566725730896,
      "learning_rate": 3.3525958237515076e-06,
      "loss": 0.0107,
      "step": 2037660
    },
    {
      "epoch": 3.3347080117567733,
      "grad_norm": 0.10818477720022202,
      "learning_rate": 3.3525299315379904e-06,
      "loss": 0.0162,
      "step": 2037680
    },
    {
      "epoch": 3.334740742195427,
      "grad_norm": 0.7008238434791565,
      "learning_rate": 3.3524640393244735e-06,
      "loss": 0.0083,
      "step": 2037700
    },
    {
      "epoch": 3.33477347263408,
      "grad_norm": 0.32642662525177,
      "learning_rate": 3.3523981471109563e-06,
      "loss": 0.0118,
      "step": 2037720
    },
    {
      "epoch": 3.3348062030727337,
      "grad_norm": 0.14627817273139954,
      "learning_rate": 3.352332254897439e-06,
      "loss": 0.0087,
      "step": 2037740
    },
    {
      "epoch": 3.334838933511387,
      "grad_norm": 0.2060660570859909,
      "learning_rate": 3.3522663626839217e-06,
      "loss": 0.0169,
      "step": 2037760
    },
    {
      "epoch": 3.3348716639500404,
      "grad_norm": 0.2702372372150421,
      "learning_rate": 3.352200470470405e-06,
      "loss": 0.0111,
      "step": 2037780
    },
    {
      "epoch": 3.3349043943886936,
      "grad_norm": 0.3201490044593811,
      "learning_rate": 3.3521345782568876e-06,
      "loss": 0.0119,
      "step": 2037800
    },
    {
      "epoch": 3.3349371248273467,
      "grad_norm": 0.14540375769138336,
      "learning_rate": 3.3520686860433704e-06,
      "loss": 0.0097,
      "step": 2037820
    },
    {
      "epoch": 3.3349698552660003,
      "grad_norm": 0.10239800065755844,
      "learning_rate": 3.352002793829853e-06,
      "loss": 0.0158,
      "step": 2037840
    },
    {
      "epoch": 3.3350025857046535,
      "grad_norm": 0.150325208902359,
      "learning_rate": 3.3519369016163363e-06,
      "loss": 0.0138,
      "step": 2037860
    },
    {
      "epoch": 3.335035316143307,
      "grad_norm": 0.030848130583763123,
      "learning_rate": 3.351871009402819e-06,
      "loss": 0.0096,
      "step": 2037880
    },
    {
      "epoch": 3.33506804658196,
      "grad_norm": 0.14965444803237915,
      "learning_rate": 3.3518051171893018e-06,
      "loss": 0.0073,
      "step": 2037900
    },
    {
      "epoch": 3.335100777020614,
      "grad_norm": 0.33201372623443604,
      "learning_rate": 3.3517392249757845e-06,
      "loss": 0.0139,
      "step": 2037920
    },
    {
      "epoch": 3.335133507459267,
      "grad_norm": 0.4782378375530243,
      "learning_rate": 3.351673332762268e-06,
      "loss": 0.0117,
      "step": 2037940
    },
    {
      "epoch": 3.33516623789792,
      "grad_norm": 0.5279407501220703,
      "learning_rate": 3.351607440548751e-06,
      "loss": 0.0093,
      "step": 2037960
    },
    {
      "epoch": 3.3351989683365737,
      "grad_norm": 0.34640541672706604,
      "learning_rate": 3.3515415483352336e-06,
      "loss": 0.014,
      "step": 2037980
    },
    {
      "epoch": 3.335231698775227,
      "grad_norm": 0.22105377912521362,
      "learning_rate": 3.3514756561217167e-06,
      "loss": 0.0117,
      "step": 2038000
    },
    {
      "epoch": 3.3352644292138804,
      "grad_norm": 0.05084522068500519,
      "learning_rate": 3.3514097639081995e-06,
      "loss": 0.0123,
      "step": 2038020
    },
    {
      "epoch": 3.3352971596525336,
      "grad_norm": 0.1446918398141861,
      "learning_rate": 3.3513438716946822e-06,
      "loss": 0.0092,
      "step": 2038040
    },
    {
      "epoch": 3.335329890091187,
      "grad_norm": 0.0736660361289978,
      "learning_rate": 3.351277979481165e-06,
      "loss": 0.0127,
      "step": 2038060
    },
    {
      "epoch": 3.3353626205298403,
      "grad_norm": 0.25702640414237976,
      "learning_rate": 3.3512120872676477e-06,
      "loss": 0.0119,
      "step": 2038080
    },
    {
      "epoch": 3.3353953509684935,
      "grad_norm": 0.223776176571846,
      "learning_rate": 3.351146195054131e-06,
      "loss": 0.0104,
      "step": 2038100
    },
    {
      "epoch": 3.335428081407147,
      "grad_norm": 0.2929028868675232,
      "learning_rate": 3.3510803028406136e-06,
      "loss": 0.0106,
      "step": 2038120
    },
    {
      "epoch": 3.3354608118458002,
      "grad_norm": 0.3381020426750183,
      "learning_rate": 3.3510144106270963e-06,
      "loss": 0.0135,
      "step": 2038140
    },
    {
      "epoch": 3.335493542284454,
      "grad_norm": 1.5744678974151611,
      "learning_rate": 3.350948518413579e-06,
      "loss": 0.0118,
      "step": 2038160
    },
    {
      "epoch": 3.335526272723107,
      "grad_norm": 0.329125314950943,
      "learning_rate": 3.3508826262000622e-06,
      "loss": 0.0105,
      "step": 2038180
    },
    {
      "epoch": 3.3355590031617606,
      "grad_norm": 0.6261653304100037,
      "learning_rate": 3.350816733986545e-06,
      "loss": 0.0118,
      "step": 2038200
    },
    {
      "epoch": 3.3355917336004137,
      "grad_norm": 0.2841385304927826,
      "learning_rate": 3.3507508417730277e-06,
      "loss": 0.0123,
      "step": 2038220
    },
    {
      "epoch": 3.335624464039067,
      "grad_norm": 0.11475392431020737,
      "learning_rate": 3.3506849495595105e-06,
      "loss": 0.0097,
      "step": 2038240
    },
    {
      "epoch": 3.3356571944777205,
      "grad_norm": 0.7829866409301758,
      "learning_rate": 3.3506190573459936e-06,
      "loss": 0.0156,
      "step": 2038260
    },
    {
      "epoch": 3.3356899249163736,
      "grad_norm": 0.26956257224082947,
      "learning_rate": 3.3505531651324764e-06,
      "loss": 0.0093,
      "step": 2038280
    },
    {
      "epoch": 3.335722655355027,
      "grad_norm": 0.273964524269104,
      "learning_rate": 3.3504872729189595e-06,
      "loss": 0.0124,
      "step": 2038300
    },
    {
      "epoch": 3.3357553857936804,
      "grad_norm": 0.1572980135679245,
      "learning_rate": 3.3504213807054427e-06,
      "loss": 0.0077,
      "step": 2038320
    },
    {
      "epoch": 3.335788116232334,
      "grad_norm": 0.195755273103714,
      "learning_rate": 3.3503554884919254e-06,
      "loss": 0.0081,
      "step": 2038340
    },
    {
      "epoch": 3.335820846670987,
      "grad_norm": 0.5772507786750793,
      "learning_rate": 3.350289596278408e-06,
      "loss": 0.012,
      "step": 2038360
    },
    {
      "epoch": 3.3358535771096403,
      "grad_norm": 0.13756711781024933,
      "learning_rate": 3.350223704064891e-06,
      "loss": 0.0135,
      "step": 2038380
    },
    {
      "epoch": 3.335886307548294,
      "grad_norm": 0.4281133711338043,
      "learning_rate": 3.350157811851374e-06,
      "loss": 0.0102,
      "step": 2038400
    },
    {
      "epoch": 3.335919037986947,
      "grad_norm": 0.4277099668979645,
      "learning_rate": 3.350091919637857e-06,
      "loss": 0.0112,
      "step": 2038420
    },
    {
      "epoch": 3.3359517684256006,
      "grad_norm": 0.41444194316864014,
      "learning_rate": 3.3500260274243396e-06,
      "loss": 0.0113,
      "step": 2038440
    },
    {
      "epoch": 3.3359844988642537,
      "grad_norm": 0.12549522519111633,
      "learning_rate": 3.3499601352108223e-06,
      "loss": 0.0142,
      "step": 2038460
    },
    {
      "epoch": 3.3360172293029073,
      "grad_norm": 0.45007556676864624,
      "learning_rate": 3.3498942429973055e-06,
      "loss": 0.0095,
      "step": 2038480
    },
    {
      "epoch": 3.3360499597415605,
      "grad_norm": 1.5804578065872192,
      "learning_rate": 3.349828350783788e-06,
      "loss": 0.0136,
      "step": 2038500
    },
    {
      "epoch": 3.3360826901802136,
      "grad_norm": 0.05694824829697609,
      "learning_rate": 3.349762458570271e-06,
      "loss": 0.0094,
      "step": 2038520
    },
    {
      "epoch": 3.3361154206188672,
      "grad_norm": 0.12058136612176895,
      "learning_rate": 3.3496965663567537e-06,
      "loss": 0.0101,
      "step": 2038540
    },
    {
      "epoch": 3.3361481510575204,
      "grad_norm": 0.48515358567237854,
      "learning_rate": 3.3496306741432364e-06,
      "loss": 0.0162,
      "step": 2038560
    },
    {
      "epoch": 3.336180881496174,
      "grad_norm": 0.3512001633644104,
      "learning_rate": 3.3495647819297196e-06,
      "loss": 0.0102,
      "step": 2038580
    },
    {
      "epoch": 3.336213611934827,
      "grad_norm": 0.7772150635719299,
      "learning_rate": 3.3494988897162023e-06,
      "loss": 0.0103,
      "step": 2038600
    },
    {
      "epoch": 3.3362463423734807,
      "grad_norm": 0.13655973970890045,
      "learning_rate": 3.349432997502685e-06,
      "loss": 0.0109,
      "step": 2038620
    },
    {
      "epoch": 3.336279072812134,
      "grad_norm": 0.23981167376041412,
      "learning_rate": 3.3493671052891686e-06,
      "loss": 0.011,
      "step": 2038640
    },
    {
      "epoch": 3.336311803250787,
      "grad_norm": 0.23401622474193573,
      "learning_rate": 3.3493012130756514e-06,
      "loss": 0.0114,
      "step": 2038660
    },
    {
      "epoch": 3.3363445336894406,
      "grad_norm": 0.7409390211105347,
      "learning_rate": 3.349235320862134e-06,
      "loss": 0.0174,
      "step": 2038680
    },
    {
      "epoch": 3.3363772641280938,
      "grad_norm": 0.414538711309433,
      "learning_rate": 3.349169428648617e-06,
      "loss": 0.0197,
      "step": 2038700
    },
    {
      "epoch": 3.3364099945667474,
      "grad_norm": 0.7525460124015808,
      "learning_rate": 3.3491035364351e-06,
      "loss": 0.0155,
      "step": 2038720
    },
    {
      "epoch": 3.3364427250054005,
      "grad_norm": 0.21930697560310364,
      "learning_rate": 3.3490376442215828e-06,
      "loss": 0.0154,
      "step": 2038740
    },
    {
      "epoch": 3.336475455444054,
      "grad_norm": 0.40655049681663513,
      "learning_rate": 3.3489717520080655e-06,
      "loss": 0.0137,
      "step": 2038760
    },
    {
      "epoch": 3.3365081858827073,
      "grad_norm": 0.4755720794200897,
      "learning_rate": 3.3489058597945482e-06,
      "loss": 0.0112,
      "step": 2038780
    },
    {
      "epoch": 3.3365409163213604,
      "grad_norm": 0.2149025946855545,
      "learning_rate": 3.3488399675810314e-06,
      "loss": 0.0093,
      "step": 2038800
    },
    {
      "epoch": 3.336573646760014,
      "grad_norm": 0.3213662803173065,
      "learning_rate": 3.348774075367514e-06,
      "loss": 0.0098,
      "step": 2038820
    },
    {
      "epoch": 3.336606377198667,
      "grad_norm": 0.8969265818595886,
      "learning_rate": 3.348708183153997e-06,
      "loss": 0.0165,
      "step": 2038840
    },
    {
      "epoch": 3.3366391076373207,
      "grad_norm": 0.20813776552677155,
      "learning_rate": 3.3486422909404796e-06,
      "loss": 0.008,
      "step": 2038860
    },
    {
      "epoch": 3.336671838075974,
      "grad_norm": 0.21621321141719818,
      "learning_rate": 3.348576398726963e-06,
      "loss": 0.0087,
      "step": 2038880
    },
    {
      "epoch": 3.336704568514627,
      "grad_norm": 0.6359673738479614,
      "learning_rate": 3.3485105065134455e-06,
      "loss": 0.0066,
      "step": 2038900
    },
    {
      "epoch": 3.3367372989532806,
      "grad_norm": 0.12693610787391663,
      "learning_rate": 3.3484446142999283e-06,
      "loss": 0.0142,
      "step": 2038920
    },
    {
      "epoch": 3.336770029391934,
      "grad_norm": 0.1988595873117447,
      "learning_rate": 3.348378722086411e-06,
      "loss": 0.0093,
      "step": 2038940
    },
    {
      "epoch": 3.3368027598305874,
      "grad_norm": 0.08315286040306091,
      "learning_rate": 3.348312829872894e-06,
      "loss": 0.0076,
      "step": 2038960
    },
    {
      "epoch": 3.3368354902692405,
      "grad_norm": 0.20789997279644012,
      "learning_rate": 3.348246937659377e-06,
      "loss": 0.0138,
      "step": 2038980
    },
    {
      "epoch": 3.3368682207078937,
      "grad_norm": 2.055001735687256,
      "learning_rate": 3.34818104544586e-06,
      "loss": 0.0102,
      "step": 2039000
    },
    {
      "epoch": 3.3369009511465473,
      "grad_norm": 0.49135372042655945,
      "learning_rate": 3.3481151532323432e-06,
      "loss": 0.0096,
      "step": 2039020
    },
    {
      "epoch": 3.3369336815852004,
      "grad_norm": 0.3658898174762726,
      "learning_rate": 3.348049261018826e-06,
      "loss": 0.0114,
      "step": 2039040
    },
    {
      "epoch": 3.336966412023854,
      "grad_norm": 0.48401573300361633,
      "learning_rate": 3.3479833688053087e-06,
      "loss": 0.0168,
      "step": 2039060
    },
    {
      "epoch": 3.336999142462507,
      "grad_norm": 0.4176638424396515,
      "learning_rate": 3.3479174765917915e-06,
      "loss": 0.0093,
      "step": 2039080
    },
    {
      "epoch": 3.3370318729011608,
      "grad_norm": 0.13721872866153717,
      "learning_rate": 3.347851584378274e-06,
      "loss": 0.0117,
      "step": 2039100
    },
    {
      "epoch": 3.337064603339814,
      "grad_norm": 0.6525919437408447,
      "learning_rate": 3.3477856921647574e-06,
      "loss": 0.0165,
      "step": 2039120
    },
    {
      "epoch": 3.337097333778467,
      "grad_norm": 0.18987248837947845,
      "learning_rate": 3.34771979995124e-06,
      "loss": 0.0078,
      "step": 2039140
    },
    {
      "epoch": 3.3371300642171207,
      "grad_norm": 0.5677372217178345,
      "learning_rate": 3.347653907737723e-06,
      "loss": 0.0113,
      "step": 2039160
    },
    {
      "epoch": 3.337162794655774,
      "grad_norm": 0.6087029576301575,
      "learning_rate": 3.3475880155242056e-06,
      "loss": 0.0101,
      "step": 2039180
    },
    {
      "epoch": 3.3371955250944274,
      "grad_norm": 0.16890199482440948,
      "learning_rate": 3.3475221233106887e-06,
      "loss": 0.0145,
      "step": 2039200
    },
    {
      "epoch": 3.3372282555330806,
      "grad_norm": 0.5975828170776367,
      "learning_rate": 3.3474562310971715e-06,
      "loss": 0.0126,
      "step": 2039220
    },
    {
      "epoch": 3.337260985971734,
      "grad_norm": 0.24020126461982727,
      "learning_rate": 3.3473903388836542e-06,
      "loss": 0.0082,
      "step": 2039240
    },
    {
      "epoch": 3.3372937164103873,
      "grad_norm": 0.10520039498806,
      "learning_rate": 3.347324446670137e-06,
      "loss": 0.0085,
      "step": 2039260
    },
    {
      "epoch": 3.3373264468490405,
      "grad_norm": 0.7004414796829224,
      "learning_rate": 3.34725855445662e-06,
      "loss": 0.0118,
      "step": 2039280
    },
    {
      "epoch": 3.337359177287694,
      "grad_norm": 0.7272278666496277,
      "learning_rate": 3.347192662243103e-06,
      "loss": 0.0101,
      "step": 2039300
    },
    {
      "epoch": 3.337391907726347,
      "grad_norm": 0.135457843542099,
      "learning_rate": 3.3471267700295856e-06,
      "loss": 0.0112,
      "step": 2039320
    },
    {
      "epoch": 3.337424638165001,
      "grad_norm": 0.6188012957572937,
      "learning_rate": 3.3470608778160683e-06,
      "loss": 0.0214,
      "step": 2039340
    },
    {
      "epoch": 3.337457368603654,
      "grad_norm": 0.20228083431720734,
      "learning_rate": 3.346994985602552e-06,
      "loss": 0.0088,
      "step": 2039360
    },
    {
      "epoch": 3.3374900990423075,
      "grad_norm": 0.43520909547805786,
      "learning_rate": 3.3469290933890347e-06,
      "loss": 0.0088,
      "step": 2039380
    },
    {
      "epoch": 3.3375228294809607,
      "grad_norm": 0.34416767954826355,
      "learning_rate": 3.3468632011755174e-06,
      "loss": 0.0096,
      "step": 2039400
    },
    {
      "epoch": 3.337555559919614,
      "grad_norm": 0.33142828941345215,
      "learning_rate": 3.3467973089620006e-06,
      "loss": 0.0128,
      "step": 2039420
    },
    {
      "epoch": 3.3375882903582674,
      "grad_norm": 0.6298664212226868,
      "learning_rate": 3.3467314167484833e-06,
      "loss": 0.0109,
      "step": 2039440
    },
    {
      "epoch": 3.3376210207969206,
      "grad_norm": 0.2457231730222702,
      "learning_rate": 3.346665524534966e-06,
      "loss": 0.0073,
      "step": 2039460
    },
    {
      "epoch": 3.337653751235574,
      "grad_norm": 0.3672350347042084,
      "learning_rate": 3.346599632321449e-06,
      "loss": 0.0157,
      "step": 2039480
    },
    {
      "epoch": 3.3376864816742273,
      "grad_norm": 0.2720383405685425,
      "learning_rate": 3.346533740107932e-06,
      "loss": 0.0166,
      "step": 2039500
    },
    {
      "epoch": 3.337719212112881,
      "grad_norm": 0.8321451544761658,
      "learning_rate": 3.3464678478944147e-06,
      "loss": 0.0131,
      "step": 2039520
    },
    {
      "epoch": 3.337751942551534,
      "grad_norm": 0.8832903504371643,
      "learning_rate": 3.3464019556808974e-06,
      "loss": 0.0137,
      "step": 2039540
    },
    {
      "epoch": 3.337784672990187,
      "grad_norm": 0.42512452602386475,
      "learning_rate": 3.34633606346738e-06,
      "loss": 0.0119,
      "step": 2039560
    },
    {
      "epoch": 3.337817403428841,
      "grad_norm": 0.13779552280902863,
      "learning_rate": 3.346270171253863e-06,
      "loss": 0.0102,
      "step": 2039580
    },
    {
      "epoch": 3.337850133867494,
      "grad_norm": 0.3318755328655243,
      "learning_rate": 3.346204279040346e-06,
      "loss": 0.0115,
      "step": 2039600
    },
    {
      "epoch": 3.3378828643061476,
      "grad_norm": 0.15458498895168304,
      "learning_rate": 3.346138386826829e-06,
      "loss": 0.0132,
      "step": 2039620
    },
    {
      "epoch": 3.3379155947448007,
      "grad_norm": 0.1426919400691986,
      "learning_rate": 3.3460724946133116e-06,
      "loss": 0.0129,
      "step": 2039640
    },
    {
      "epoch": 3.3379483251834543,
      "grad_norm": 0.19148696959018707,
      "learning_rate": 3.3460066023997943e-06,
      "loss": 0.0104,
      "step": 2039660
    },
    {
      "epoch": 3.3379810556221075,
      "grad_norm": 0.5289770364761353,
      "learning_rate": 3.3459407101862775e-06,
      "loss": 0.015,
      "step": 2039680
    },
    {
      "epoch": 3.3380137860607606,
      "grad_norm": 0.3590053915977478,
      "learning_rate": 3.3458748179727606e-06,
      "loss": 0.0111,
      "step": 2039700
    },
    {
      "epoch": 3.338046516499414,
      "grad_norm": 0.16861297190189362,
      "learning_rate": 3.3458089257592434e-06,
      "loss": 0.015,
      "step": 2039720
    },
    {
      "epoch": 3.3380792469380673,
      "grad_norm": 0.44515177607536316,
      "learning_rate": 3.3457430335457265e-06,
      "loss": 0.0113,
      "step": 2039740
    },
    {
      "epoch": 3.338111977376721,
      "grad_norm": 0.7601001262664795,
      "learning_rate": 3.3456771413322093e-06,
      "loss": 0.0148,
      "step": 2039760
    },
    {
      "epoch": 3.338144707815374,
      "grad_norm": 0.29431357979774475,
      "learning_rate": 3.345611249118692e-06,
      "loss": 0.0134,
      "step": 2039780
    },
    {
      "epoch": 3.3381774382540277,
      "grad_norm": 0.1212901622056961,
      "learning_rate": 3.3455453569051748e-06,
      "loss": 0.0111,
      "step": 2039800
    },
    {
      "epoch": 3.338210168692681,
      "grad_norm": 0.2474849969148636,
      "learning_rate": 3.345479464691658e-06,
      "loss": 0.0227,
      "step": 2039820
    },
    {
      "epoch": 3.338242899131334,
      "grad_norm": 0.23239921033382416,
      "learning_rate": 3.3454135724781407e-06,
      "loss": 0.0141,
      "step": 2039840
    },
    {
      "epoch": 3.3382756295699876,
      "grad_norm": 0.08421354740858078,
      "learning_rate": 3.3453476802646234e-06,
      "loss": 0.0126,
      "step": 2039860
    },
    {
      "epoch": 3.3383083600086407,
      "grad_norm": 0.2772141993045807,
      "learning_rate": 3.345281788051106e-06,
      "loss": 0.0122,
      "step": 2039880
    },
    {
      "epoch": 3.3383410904472943,
      "grad_norm": 0.2927362024784088,
      "learning_rate": 3.3452158958375893e-06,
      "loss": 0.0095,
      "step": 2039900
    },
    {
      "epoch": 3.3383738208859475,
      "grad_norm": 0.37632402777671814,
      "learning_rate": 3.345150003624072e-06,
      "loss": 0.0103,
      "step": 2039920
    },
    {
      "epoch": 3.338406551324601,
      "grad_norm": 0.3732163608074188,
      "learning_rate": 3.3450841114105548e-06,
      "loss": 0.0092,
      "step": 2039940
    },
    {
      "epoch": 3.338439281763254,
      "grad_norm": 0.6372376680374146,
      "learning_rate": 3.3450182191970375e-06,
      "loss": 0.0118,
      "step": 2039960
    },
    {
      "epoch": 3.3384720122019074,
      "grad_norm": 0.14930258691310883,
      "learning_rate": 3.3449523269835203e-06,
      "loss": 0.0191,
      "step": 2039980
    },
    {
      "epoch": 3.338504742640561,
      "grad_norm": 0.16489918529987335,
      "learning_rate": 3.3448864347700034e-06,
      "loss": 0.013,
      "step": 2040000
    },
    {
      "epoch": 3.338537473079214,
      "grad_norm": 0.4572756886482239,
      "learning_rate": 3.344820542556486e-06,
      "loss": 0.0112,
      "step": 2040020
    },
    {
      "epoch": 3.3385702035178677,
      "grad_norm": 0.2217235118150711,
      "learning_rate": 3.344754650342969e-06,
      "loss": 0.0135,
      "step": 2040040
    },
    {
      "epoch": 3.338602933956521,
      "grad_norm": 0.3658222556114197,
      "learning_rate": 3.3446887581294525e-06,
      "loss": 0.013,
      "step": 2040060
    },
    {
      "epoch": 3.3386356643951745,
      "grad_norm": 0.2788638472557068,
      "learning_rate": 3.3446228659159352e-06,
      "loss": 0.0095,
      "step": 2040080
    },
    {
      "epoch": 3.3386683948338276,
      "grad_norm": 0.3547702133655548,
      "learning_rate": 3.344556973702418e-06,
      "loss": 0.0176,
      "step": 2040100
    },
    {
      "epoch": 3.3387011252724808,
      "grad_norm": 0.39011040329933167,
      "learning_rate": 3.3444910814889007e-06,
      "loss": 0.012,
      "step": 2040120
    },
    {
      "epoch": 3.3387338557111343,
      "grad_norm": 0.27794578671455383,
      "learning_rate": 3.344425189275384e-06,
      "loss": 0.0106,
      "step": 2040140
    },
    {
      "epoch": 3.3387665861497875,
      "grad_norm": 0.1639975607395172,
      "learning_rate": 3.3443592970618666e-06,
      "loss": 0.013,
      "step": 2040160
    },
    {
      "epoch": 3.338799316588441,
      "grad_norm": 0.05292043089866638,
      "learning_rate": 3.3442934048483493e-06,
      "loss": 0.0104,
      "step": 2040180
    },
    {
      "epoch": 3.3388320470270942,
      "grad_norm": 0.4979793429374695,
      "learning_rate": 3.344227512634832e-06,
      "loss": 0.0098,
      "step": 2040200
    },
    {
      "epoch": 3.338864777465748,
      "grad_norm": 0.0679272785782814,
      "learning_rate": 3.3441616204213152e-06,
      "loss": 0.0102,
      "step": 2040220
    },
    {
      "epoch": 3.338897507904401,
      "grad_norm": 0.07102864235639572,
      "learning_rate": 3.344095728207798e-06,
      "loss": 0.0123,
      "step": 2040240
    },
    {
      "epoch": 3.338930238343054,
      "grad_norm": 0.34768715500831604,
      "learning_rate": 3.3440298359942807e-06,
      "loss": 0.0173,
      "step": 2040260
    },
    {
      "epoch": 3.3389629687817077,
      "grad_norm": 0.33778154850006104,
      "learning_rate": 3.3439639437807635e-06,
      "loss": 0.0125,
      "step": 2040280
    },
    {
      "epoch": 3.338995699220361,
      "grad_norm": 0.46649736166000366,
      "learning_rate": 3.3438980515672466e-06,
      "loss": 0.0086,
      "step": 2040300
    },
    {
      "epoch": 3.3390284296590145,
      "grad_norm": 0.29025018215179443,
      "learning_rate": 3.3438321593537294e-06,
      "loss": 0.0083,
      "step": 2040320
    },
    {
      "epoch": 3.3390611600976676,
      "grad_norm": 0.5963906645774841,
      "learning_rate": 3.343766267140212e-06,
      "loss": 0.0155,
      "step": 2040340
    },
    {
      "epoch": 3.3390938905363208,
      "grad_norm": 0.17340289056301117,
      "learning_rate": 3.343700374926695e-06,
      "loss": 0.0119,
      "step": 2040360
    },
    {
      "epoch": 3.3391266209749744,
      "grad_norm": 0.5610653758049011,
      "learning_rate": 3.343634482713178e-06,
      "loss": 0.0148,
      "step": 2040380
    },
    {
      "epoch": 3.3391593514136275,
      "grad_norm": 0.8728549480438232,
      "learning_rate": 3.343568590499661e-06,
      "loss": 0.0158,
      "step": 2040400
    },
    {
      "epoch": 3.339192081852281,
      "grad_norm": 0.160783588886261,
      "learning_rate": 3.343502698286144e-06,
      "loss": 0.0106,
      "step": 2040420
    },
    {
      "epoch": 3.3392248122909343,
      "grad_norm": 0.28614309430122375,
      "learning_rate": 3.343436806072627e-06,
      "loss": 0.0137,
      "step": 2040440
    },
    {
      "epoch": 3.3392575427295874,
      "grad_norm": 1.64584219455719,
      "learning_rate": 3.34337091385911e-06,
      "loss": 0.0161,
      "step": 2040460
    },
    {
      "epoch": 3.339290273168241,
      "grad_norm": 0.3716234564781189,
      "learning_rate": 3.3433050216455926e-06,
      "loss": 0.0111,
      "step": 2040480
    },
    {
      "epoch": 3.339323003606894,
      "grad_norm": 0.42566096782684326,
      "learning_rate": 3.3432391294320753e-06,
      "loss": 0.0138,
      "step": 2040500
    },
    {
      "epoch": 3.3393557340455478,
      "grad_norm": 0.37753725051879883,
      "learning_rate": 3.343173237218558e-06,
      "loss": 0.0134,
      "step": 2040520
    },
    {
      "epoch": 3.339388464484201,
      "grad_norm": 0.21538928151130676,
      "learning_rate": 3.343107345005041e-06,
      "loss": 0.0122,
      "step": 2040540
    },
    {
      "epoch": 3.3394211949228545,
      "grad_norm": 0.24943865835666656,
      "learning_rate": 3.343041452791524e-06,
      "loss": 0.0117,
      "step": 2040560
    },
    {
      "epoch": 3.3394539253615076,
      "grad_norm": 0.10168495029211044,
      "learning_rate": 3.3429755605780067e-06,
      "loss": 0.0138,
      "step": 2040580
    },
    {
      "epoch": 3.339486655800161,
      "grad_norm": 0.25908681750297546,
      "learning_rate": 3.3429096683644894e-06,
      "loss": 0.0087,
      "step": 2040600
    },
    {
      "epoch": 3.3395193862388144,
      "grad_norm": 0.4282069802284241,
      "learning_rate": 3.3428437761509726e-06,
      "loss": 0.0071,
      "step": 2040620
    },
    {
      "epoch": 3.3395521166774675,
      "grad_norm": 0.3497428894042969,
      "learning_rate": 3.3427778839374553e-06,
      "loss": 0.0111,
      "step": 2040640
    },
    {
      "epoch": 3.339584847116121,
      "grad_norm": 0.49668189883232117,
      "learning_rate": 3.342711991723938e-06,
      "loss": 0.008,
      "step": 2040660
    },
    {
      "epoch": 3.3396175775547743,
      "grad_norm": 0.20816361904144287,
      "learning_rate": 3.342646099510421e-06,
      "loss": 0.0094,
      "step": 2040680
    },
    {
      "epoch": 3.339650307993428,
      "grad_norm": 0.2794782817363739,
      "learning_rate": 3.342580207296904e-06,
      "loss": 0.0119,
      "step": 2040700
    },
    {
      "epoch": 3.339683038432081,
      "grad_norm": 0.2899831235408783,
      "learning_rate": 3.3425143150833867e-06,
      "loss": 0.0131,
      "step": 2040720
    },
    {
      "epoch": 3.339715768870734,
      "grad_norm": 1.0328848361968994,
      "learning_rate": 3.3424484228698694e-06,
      "loss": 0.0115,
      "step": 2040740
    },
    {
      "epoch": 3.3397484993093878,
      "grad_norm": 0.22094202041625977,
      "learning_rate": 3.342382530656353e-06,
      "loss": 0.0103,
      "step": 2040760
    },
    {
      "epoch": 3.339781229748041,
      "grad_norm": 0.7640403509140015,
      "learning_rate": 3.3423166384428358e-06,
      "loss": 0.0161,
      "step": 2040780
    },
    {
      "epoch": 3.3398139601866945,
      "grad_norm": 0.2905386984348297,
      "learning_rate": 3.3422507462293185e-06,
      "loss": 0.0107,
      "step": 2040800
    },
    {
      "epoch": 3.3398466906253477,
      "grad_norm": 0.09810175001621246,
      "learning_rate": 3.3421848540158013e-06,
      "loss": 0.014,
      "step": 2040820
    },
    {
      "epoch": 3.3398794210640013,
      "grad_norm": 0.21489429473876953,
      "learning_rate": 3.3421189618022844e-06,
      "loss": 0.0114,
      "step": 2040840
    },
    {
      "epoch": 3.3399121515026544,
      "grad_norm": 0.4089536964893341,
      "learning_rate": 3.342053069588767e-06,
      "loss": 0.0101,
      "step": 2040860
    },
    {
      "epoch": 3.3399448819413076,
      "grad_norm": 0.3504682779312134,
      "learning_rate": 3.34198717737525e-06,
      "loss": 0.011,
      "step": 2040880
    },
    {
      "epoch": 3.339977612379961,
      "grad_norm": 0.2165941596031189,
      "learning_rate": 3.3419212851617326e-06,
      "loss": 0.0102,
      "step": 2040900
    },
    {
      "epoch": 3.3400103428186143,
      "grad_norm": 0.1993648260831833,
      "learning_rate": 3.341855392948216e-06,
      "loss": 0.0157,
      "step": 2040920
    },
    {
      "epoch": 3.340043073257268,
      "grad_norm": 0.13940933346748352,
      "learning_rate": 3.3417895007346985e-06,
      "loss": 0.0083,
      "step": 2040940
    },
    {
      "epoch": 3.340075803695921,
      "grad_norm": 0.799277663230896,
      "learning_rate": 3.3417236085211813e-06,
      "loss": 0.0077,
      "step": 2040960
    },
    {
      "epoch": 3.3401085341345746,
      "grad_norm": 0.7403503060340881,
      "learning_rate": 3.341657716307664e-06,
      "loss": 0.0147,
      "step": 2040980
    },
    {
      "epoch": 3.340141264573228,
      "grad_norm": 0.29763975739479065,
      "learning_rate": 3.3415918240941468e-06,
      "loss": 0.0128,
      "step": 2041000
    },
    {
      "epoch": 3.340173995011881,
      "grad_norm": 0.16506628692150116,
      "learning_rate": 3.34152593188063e-06,
      "loss": 0.0148,
      "step": 2041020
    },
    {
      "epoch": 3.3402067254505345,
      "grad_norm": 0.23274627327919006,
      "learning_rate": 3.3414600396671127e-06,
      "loss": 0.0147,
      "step": 2041040
    },
    {
      "epoch": 3.3402394558891877,
      "grad_norm": 0.49567875266075134,
      "learning_rate": 3.3413941474535954e-06,
      "loss": 0.0064,
      "step": 2041060
    },
    {
      "epoch": 3.3402721863278413,
      "grad_norm": 0.16373229026794434,
      "learning_rate": 3.341328255240078e-06,
      "loss": 0.0101,
      "step": 2041080
    },
    {
      "epoch": 3.3403049167664944,
      "grad_norm": 0.6379327774047852,
      "learning_rate": 3.3412623630265613e-06,
      "loss": 0.0155,
      "step": 2041100
    },
    {
      "epoch": 3.340337647205148,
      "grad_norm": 0.24498778581619263,
      "learning_rate": 3.3411964708130445e-06,
      "loss": 0.0119,
      "step": 2041120
    },
    {
      "epoch": 3.340370377643801,
      "grad_norm": 0.40817776322364807,
      "learning_rate": 3.341130578599527e-06,
      "loss": 0.011,
      "step": 2041140
    },
    {
      "epoch": 3.3404031080824543,
      "grad_norm": 0.16010728478431702,
      "learning_rate": 3.3410646863860104e-06,
      "loss": 0.0096,
      "step": 2041160
    },
    {
      "epoch": 3.340435838521108,
      "grad_norm": 0.18059208989143372,
      "learning_rate": 3.340998794172493e-06,
      "loss": 0.0151,
      "step": 2041180
    },
    {
      "epoch": 3.340468568959761,
      "grad_norm": 0.1545790433883667,
      "learning_rate": 3.340932901958976e-06,
      "loss": 0.0116,
      "step": 2041200
    },
    {
      "epoch": 3.3405012993984147,
      "grad_norm": 0.28320837020874023,
      "learning_rate": 3.3408670097454586e-06,
      "loss": 0.0087,
      "step": 2041220
    },
    {
      "epoch": 3.340534029837068,
      "grad_norm": 0.10687859356403351,
      "learning_rate": 3.3408011175319418e-06,
      "loss": 0.0123,
      "step": 2041240
    },
    {
      "epoch": 3.3405667602757214,
      "grad_norm": 0.270015686750412,
      "learning_rate": 3.3407352253184245e-06,
      "loss": 0.0119,
      "step": 2041260
    },
    {
      "epoch": 3.3405994907143746,
      "grad_norm": 0.7118574380874634,
      "learning_rate": 3.3406693331049072e-06,
      "loss": 0.0154,
      "step": 2041280
    },
    {
      "epoch": 3.3406322211530277,
      "grad_norm": 0.44231703877449036,
      "learning_rate": 3.34060344089139e-06,
      "loss": 0.0065,
      "step": 2041300
    },
    {
      "epoch": 3.3406649515916813,
      "grad_norm": 0.14865867793560028,
      "learning_rate": 3.340537548677873e-06,
      "loss": 0.0162,
      "step": 2041320
    },
    {
      "epoch": 3.3406976820303345,
      "grad_norm": 1.4334022998809814,
      "learning_rate": 3.340471656464356e-06,
      "loss": 0.01,
      "step": 2041340
    },
    {
      "epoch": 3.340730412468988,
      "grad_norm": 0.09119559079408646,
      "learning_rate": 3.3404057642508386e-06,
      "loss": 0.0082,
      "step": 2041360
    },
    {
      "epoch": 3.340763142907641,
      "grad_norm": 0.07415652275085449,
      "learning_rate": 3.3403398720373214e-06,
      "loss": 0.0161,
      "step": 2041380
    },
    {
      "epoch": 3.340795873346295,
      "grad_norm": 0.3287551701068878,
      "learning_rate": 3.340273979823804e-06,
      "loss": 0.0112,
      "step": 2041400
    },
    {
      "epoch": 3.340828603784948,
      "grad_norm": 0.19987964630126953,
      "learning_rate": 3.3402080876102873e-06,
      "loss": 0.0117,
      "step": 2041420
    },
    {
      "epoch": 3.340861334223601,
      "grad_norm": 0.4217127561569214,
      "learning_rate": 3.34014219539677e-06,
      "loss": 0.0092,
      "step": 2041440
    },
    {
      "epoch": 3.3408940646622547,
      "grad_norm": 0.7367849946022034,
      "learning_rate": 3.3400763031832536e-06,
      "loss": 0.0189,
      "step": 2041460
    },
    {
      "epoch": 3.340926795100908,
      "grad_norm": 0.41111305356025696,
      "learning_rate": 3.3400104109697363e-06,
      "loss": 0.0104,
      "step": 2041480
    },
    {
      "epoch": 3.3409595255395614,
      "grad_norm": 0.24330969154834747,
      "learning_rate": 3.339944518756219e-06,
      "loss": 0.0105,
      "step": 2041500
    },
    {
      "epoch": 3.3409922559782146,
      "grad_norm": 0.3292678892612457,
      "learning_rate": 3.339878626542702e-06,
      "loss": 0.0136,
      "step": 2041520
    },
    {
      "epoch": 3.341024986416868,
      "grad_norm": 0.20287539064884186,
      "learning_rate": 3.3398127343291845e-06,
      "loss": 0.0089,
      "step": 2041540
    },
    {
      "epoch": 3.3410577168555213,
      "grad_norm": 0.44811487197875977,
      "learning_rate": 3.3397468421156677e-06,
      "loss": 0.0147,
      "step": 2041560
    },
    {
      "epoch": 3.3410904472941745,
      "grad_norm": 0.09602005034685135,
      "learning_rate": 3.3396809499021504e-06,
      "loss": 0.0065,
      "step": 2041580
    },
    {
      "epoch": 3.341123177732828,
      "grad_norm": 0.19574061036109924,
      "learning_rate": 3.339615057688633e-06,
      "loss": 0.014,
      "step": 2041600
    },
    {
      "epoch": 3.3411559081714812,
      "grad_norm": 0.6045978665351868,
      "learning_rate": 3.339549165475116e-06,
      "loss": 0.0153,
      "step": 2041620
    },
    {
      "epoch": 3.341188638610135,
      "grad_norm": 0.1877434253692627,
      "learning_rate": 3.339483273261599e-06,
      "loss": 0.0089,
      "step": 2041640
    },
    {
      "epoch": 3.341221369048788,
      "grad_norm": 0.3005712628364563,
      "learning_rate": 3.339417381048082e-06,
      "loss": 0.0121,
      "step": 2041660
    },
    {
      "epoch": 3.3412540994874416,
      "grad_norm": 0.4457213580608368,
      "learning_rate": 3.3393514888345646e-06,
      "loss": 0.01,
      "step": 2041680
    },
    {
      "epoch": 3.3412868299260947,
      "grad_norm": 0.44863733649253845,
      "learning_rate": 3.3392855966210473e-06,
      "loss": 0.0125,
      "step": 2041700
    },
    {
      "epoch": 3.341319560364748,
      "grad_norm": 0.3444443643093109,
      "learning_rate": 3.3392197044075305e-06,
      "loss": 0.0079,
      "step": 2041720
    },
    {
      "epoch": 3.3413522908034015,
      "grad_norm": 0.08124727010726929,
      "learning_rate": 3.339153812194013e-06,
      "loss": 0.0176,
      "step": 2041740
    },
    {
      "epoch": 3.3413850212420546,
      "grad_norm": 0.23461726307868958,
      "learning_rate": 3.339087919980496e-06,
      "loss": 0.0068,
      "step": 2041760
    },
    {
      "epoch": 3.341417751680708,
      "grad_norm": 0.10963854193687439,
      "learning_rate": 3.3390220277669787e-06,
      "loss": 0.0071,
      "step": 2041780
    },
    {
      "epoch": 3.3414504821193614,
      "grad_norm": 0.06923019886016846,
      "learning_rate": 3.338956135553462e-06,
      "loss": 0.0116,
      "step": 2041800
    },
    {
      "epoch": 3.341483212558015,
      "grad_norm": 0.09994105994701385,
      "learning_rate": 3.338890243339945e-06,
      "loss": 0.0082,
      "step": 2041820
    },
    {
      "epoch": 3.341515942996668,
      "grad_norm": 0.07855984568595886,
      "learning_rate": 3.3388243511264278e-06,
      "loss": 0.012,
      "step": 2041840
    },
    {
      "epoch": 3.3415486734353212,
      "grad_norm": 0.20312777161598206,
      "learning_rate": 3.338758458912911e-06,
      "loss": 0.0131,
      "step": 2041860
    },
    {
      "epoch": 3.341581403873975,
      "grad_norm": 0.05725085735321045,
      "learning_rate": 3.3386925666993937e-06,
      "loss": 0.0075,
      "step": 2041880
    },
    {
      "epoch": 3.341614134312628,
      "grad_norm": 0.6906017661094666,
      "learning_rate": 3.3386266744858764e-06,
      "loss": 0.0138,
      "step": 2041900
    },
    {
      "epoch": 3.3416468647512816,
      "grad_norm": 0.3045956790447235,
      "learning_rate": 3.338560782272359e-06,
      "loss": 0.0113,
      "step": 2041920
    },
    {
      "epoch": 3.3416795951899347,
      "grad_norm": 0.1003013476729393,
      "learning_rate": 3.338494890058842e-06,
      "loss": 0.0076,
      "step": 2041940
    },
    {
      "epoch": 3.341712325628588,
      "grad_norm": 0.5771733522415161,
      "learning_rate": 3.338428997845325e-06,
      "loss": 0.0086,
      "step": 2041960
    },
    {
      "epoch": 3.3417450560672415,
      "grad_norm": 0.3266071081161499,
      "learning_rate": 3.3383631056318078e-06,
      "loss": 0.0116,
      "step": 2041980
    },
    {
      "epoch": 3.3417777865058946,
      "grad_norm": 0.12124821543693542,
      "learning_rate": 3.3382972134182905e-06,
      "loss": 0.0136,
      "step": 2042000
    },
    {
      "epoch": 3.3418105169445482,
      "grad_norm": 0.22844283282756805,
      "learning_rate": 3.3382313212047733e-06,
      "loss": 0.0174,
      "step": 2042020
    },
    {
      "epoch": 3.3418432473832014,
      "grad_norm": 0.09258726239204407,
      "learning_rate": 3.3381654289912564e-06,
      "loss": 0.0102,
      "step": 2042040
    },
    {
      "epoch": 3.3418759778218545,
      "grad_norm": 0.2902427017688751,
      "learning_rate": 3.338099536777739e-06,
      "loss": 0.0103,
      "step": 2042060
    },
    {
      "epoch": 3.341908708260508,
      "grad_norm": 0.08767206966876984,
      "learning_rate": 3.338033644564222e-06,
      "loss": 0.0139,
      "step": 2042080
    },
    {
      "epoch": 3.3419414386991613,
      "grad_norm": 0.23793557286262512,
      "learning_rate": 3.3379677523507046e-06,
      "loss": 0.0121,
      "step": 2042100
    },
    {
      "epoch": 3.341974169137815,
      "grad_norm": 0.23601526021957397,
      "learning_rate": 3.337901860137188e-06,
      "loss": 0.0117,
      "step": 2042120
    },
    {
      "epoch": 3.342006899576468,
      "grad_norm": 0.142108753323555,
      "learning_rate": 3.3378359679236705e-06,
      "loss": 0.0108,
      "step": 2042140
    },
    {
      "epoch": 3.3420396300151216,
      "grad_norm": 0.14400149881839752,
      "learning_rate": 3.3377700757101537e-06,
      "loss": 0.0139,
      "step": 2042160
    },
    {
      "epoch": 3.3420723604537748,
      "grad_norm": 0.6113151907920837,
      "learning_rate": 3.337704183496637e-06,
      "loss": 0.0159,
      "step": 2042180
    },
    {
      "epoch": 3.342105090892428,
      "grad_norm": 0.18856966495513916,
      "learning_rate": 3.3376382912831196e-06,
      "loss": 0.009,
      "step": 2042200
    },
    {
      "epoch": 3.3421378213310815,
      "grad_norm": 0.255512535572052,
      "learning_rate": 3.3375723990696024e-06,
      "loss": 0.0112,
      "step": 2042220
    },
    {
      "epoch": 3.3421705517697347,
      "grad_norm": 0.15480349957942963,
      "learning_rate": 3.337506506856085e-06,
      "loss": 0.0083,
      "step": 2042240
    },
    {
      "epoch": 3.3422032822083882,
      "grad_norm": 0.19042252004146576,
      "learning_rate": 3.3374406146425683e-06,
      "loss": 0.012,
      "step": 2042260
    },
    {
      "epoch": 3.3422360126470414,
      "grad_norm": 0.1687757670879364,
      "learning_rate": 3.337374722429051e-06,
      "loss": 0.0101,
      "step": 2042280
    },
    {
      "epoch": 3.342268743085695,
      "grad_norm": 0.37796929478645325,
      "learning_rate": 3.3373088302155337e-06,
      "loss": 0.0119,
      "step": 2042300
    },
    {
      "epoch": 3.342301473524348,
      "grad_norm": 0.2618931829929352,
      "learning_rate": 3.3372429380020165e-06,
      "loss": 0.0114,
      "step": 2042320
    },
    {
      "epoch": 3.3423342039630013,
      "grad_norm": 0.20793378353118896,
      "learning_rate": 3.3371770457884996e-06,
      "loss": 0.0143,
      "step": 2042340
    },
    {
      "epoch": 3.342366934401655,
      "grad_norm": 0.09954467415809631,
      "learning_rate": 3.3371111535749824e-06,
      "loss": 0.011,
      "step": 2042360
    },
    {
      "epoch": 3.342399664840308,
      "grad_norm": 0.07487081736326218,
      "learning_rate": 3.337045261361465e-06,
      "loss": 0.01,
      "step": 2042380
    },
    {
      "epoch": 3.3424323952789616,
      "grad_norm": 1.3326573371887207,
      "learning_rate": 3.336979369147948e-06,
      "loss": 0.01,
      "step": 2042400
    },
    {
      "epoch": 3.342465125717615,
      "grad_norm": 0.16732394695281982,
      "learning_rate": 3.3369134769344306e-06,
      "loss": 0.0144,
      "step": 2042420
    },
    {
      "epoch": 3.3424978561562684,
      "grad_norm": 0.2801230847835541,
      "learning_rate": 3.3368475847209138e-06,
      "loss": 0.0117,
      "step": 2042440
    },
    {
      "epoch": 3.3425305865949215,
      "grad_norm": 0.05555417016148567,
      "learning_rate": 3.3367816925073965e-06,
      "loss": 0.0089,
      "step": 2042460
    },
    {
      "epoch": 3.3425633170335747,
      "grad_norm": 1.486774206161499,
      "learning_rate": 3.3367158002938792e-06,
      "loss": 0.0124,
      "step": 2042480
    },
    {
      "epoch": 3.3425960474722283,
      "grad_norm": 0.22023329138755798,
      "learning_rate": 3.336649908080362e-06,
      "loss": 0.0123,
      "step": 2042500
    },
    {
      "epoch": 3.3426287779108814,
      "grad_norm": 0.11080974340438843,
      "learning_rate": 3.3365840158668456e-06,
      "loss": 0.0149,
      "step": 2042520
    },
    {
      "epoch": 3.342661508349535,
      "grad_norm": 0.47473543882369995,
      "learning_rate": 3.3365181236533283e-06,
      "loss": 0.0082,
      "step": 2042540
    },
    {
      "epoch": 3.342694238788188,
      "grad_norm": 0.0798066258430481,
      "learning_rate": 3.336452231439811e-06,
      "loss": 0.0099,
      "step": 2042560
    },
    {
      "epoch": 3.3427269692268418,
      "grad_norm": 0.5642082691192627,
      "learning_rate": 3.336386339226294e-06,
      "loss": 0.0084,
      "step": 2042580
    },
    {
      "epoch": 3.342759699665495,
      "grad_norm": 0.33443784713745117,
      "learning_rate": 3.336320447012777e-06,
      "loss": 0.0145,
      "step": 2042600
    },
    {
      "epoch": 3.342792430104148,
      "grad_norm": 0.21480199694633484,
      "learning_rate": 3.3362545547992597e-06,
      "loss": 0.012,
      "step": 2042620
    },
    {
      "epoch": 3.3428251605428017,
      "grad_norm": 0.279697060585022,
      "learning_rate": 3.3361886625857424e-06,
      "loss": 0.0111,
      "step": 2042640
    },
    {
      "epoch": 3.342857890981455,
      "grad_norm": 0.17213097214698792,
      "learning_rate": 3.3361227703722256e-06,
      "loss": 0.0085,
      "step": 2042660
    },
    {
      "epoch": 3.3428906214201084,
      "grad_norm": 0.3948778808116913,
      "learning_rate": 3.3360568781587083e-06,
      "loss": 0.0101,
      "step": 2042680
    },
    {
      "epoch": 3.3429233518587615,
      "grad_norm": 0.28012898564338684,
      "learning_rate": 3.335990985945191e-06,
      "loss": 0.0098,
      "step": 2042700
    },
    {
      "epoch": 3.342956082297415,
      "grad_norm": 0.15408587455749512,
      "learning_rate": 3.335925093731674e-06,
      "loss": 0.011,
      "step": 2042720
    },
    {
      "epoch": 3.3429888127360683,
      "grad_norm": 0.33896952867507935,
      "learning_rate": 3.335859201518157e-06,
      "loss": 0.0145,
      "step": 2042740
    },
    {
      "epoch": 3.3430215431747214,
      "grad_norm": 0.841322660446167,
      "learning_rate": 3.3357933093046397e-06,
      "loss": 0.0112,
      "step": 2042760
    },
    {
      "epoch": 3.343054273613375,
      "grad_norm": 0.8516975045204163,
      "learning_rate": 3.3357274170911225e-06,
      "loss": 0.0125,
      "step": 2042780
    },
    {
      "epoch": 3.343087004052028,
      "grad_norm": 0.26515069603919983,
      "learning_rate": 3.335661524877605e-06,
      "loss": 0.0143,
      "step": 2042800
    },
    {
      "epoch": 3.343119734490682,
      "grad_norm": 0.6158265471458435,
      "learning_rate": 3.3355956326640884e-06,
      "loss": 0.0127,
      "step": 2042820
    },
    {
      "epoch": 3.343152464929335,
      "grad_norm": 1.6876118183135986,
      "learning_rate": 3.335529740450571e-06,
      "loss": 0.0169,
      "step": 2042840
    },
    {
      "epoch": 3.3431851953679885,
      "grad_norm": 0.7005258202552795,
      "learning_rate": 3.335463848237054e-06,
      "loss": 0.0156,
      "step": 2042860
    },
    {
      "epoch": 3.3432179258066417,
      "grad_norm": 0.1337652951478958,
      "learning_rate": 3.3353979560235374e-06,
      "loss": 0.0109,
      "step": 2042880
    },
    {
      "epoch": 3.343250656245295,
      "grad_norm": 0.368166983127594,
      "learning_rate": 3.33533206381002e-06,
      "loss": 0.0118,
      "step": 2042900
    },
    {
      "epoch": 3.3432833866839484,
      "grad_norm": 0.16703857481479645,
      "learning_rate": 3.335266171596503e-06,
      "loss": 0.013,
      "step": 2042920
    },
    {
      "epoch": 3.3433161171226016,
      "grad_norm": 0.3769296705722809,
      "learning_rate": 3.3352002793829856e-06,
      "loss": 0.0113,
      "step": 2042940
    },
    {
      "epoch": 3.343348847561255,
      "grad_norm": 0.12992019951343536,
      "learning_rate": 3.3351343871694684e-06,
      "loss": 0.0122,
      "step": 2042960
    },
    {
      "epoch": 3.3433815779999083,
      "grad_norm": 0.25418779253959656,
      "learning_rate": 3.3350684949559515e-06,
      "loss": 0.0103,
      "step": 2042980
    },
    {
      "epoch": 3.343414308438562,
      "grad_norm": 0.2199072688817978,
      "learning_rate": 3.3350026027424343e-06,
      "loss": 0.0123,
      "step": 2043000
    },
    {
      "epoch": 3.343447038877215,
      "grad_norm": 0.2548789381980896,
      "learning_rate": 3.334936710528917e-06,
      "loss": 0.0081,
      "step": 2043020
    },
    {
      "epoch": 3.343479769315868,
      "grad_norm": 0.3208388388156891,
      "learning_rate": 3.3348708183153998e-06,
      "loss": 0.0147,
      "step": 2043040
    },
    {
      "epoch": 3.343512499754522,
      "grad_norm": 0.5968793034553528,
      "learning_rate": 3.334804926101883e-06,
      "loss": 0.0093,
      "step": 2043060
    },
    {
      "epoch": 3.343545230193175,
      "grad_norm": 0.3233088254928589,
      "learning_rate": 3.3347390338883657e-06,
      "loss": 0.0094,
      "step": 2043080
    },
    {
      "epoch": 3.3435779606318285,
      "grad_norm": 0.43661707639694214,
      "learning_rate": 3.3346731416748484e-06,
      "loss": 0.0107,
      "step": 2043100
    },
    {
      "epoch": 3.3436106910704817,
      "grad_norm": 0.3986964523792267,
      "learning_rate": 3.334607249461331e-06,
      "loss": 0.0087,
      "step": 2043120
    },
    {
      "epoch": 3.3436434215091353,
      "grad_norm": 0.35467055439949036,
      "learning_rate": 3.3345413572478143e-06,
      "loss": 0.0136,
      "step": 2043140
    },
    {
      "epoch": 3.3436761519477884,
      "grad_norm": 0.1285741925239563,
      "learning_rate": 3.334475465034297e-06,
      "loss": 0.0107,
      "step": 2043160
    },
    {
      "epoch": 3.3437088823864416,
      "grad_norm": 0.31311431527137756,
      "learning_rate": 3.33440957282078e-06,
      "loss": 0.0063,
      "step": 2043180
    },
    {
      "epoch": 3.343741612825095,
      "grad_norm": 0.17860932648181915,
      "learning_rate": 3.3343436806072625e-06,
      "loss": 0.0179,
      "step": 2043200
    },
    {
      "epoch": 3.3437743432637483,
      "grad_norm": 0.21142888069152832,
      "learning_rate": 3.334277788393746e-06,
      "loss": 0.009,
      "step": 2043220
    },
    {
      "epoch": 3.343807073702402,
      "grad_norm": 0.3392699062824249,
      "learning_rate": 3.334211896180229e-06,
      "loss": 0.0105,
      "step": 2043240
    },
    {
      "epoch": 3.343839804141055,
      "grad_norm": 0.1975564807653427,
      "learning_rate": 3.3341460039667116e-06,
      "loss": 0.0085,
      "step": 2043260
    },
    {
      "epoch": 3.3438725345797087,
      "grad_norm": 0.43240413069725037,
      "learning_rate": 3.3340801117531948e-06,
      "loss": 0.0062,
      "step": 2043280
    },
    {
      "epoch": 3.343905265018362,
      "grad_norm": 0.23474135994911194,
      "learning_rate": 3.3340142195396775e-06,
      "loss": 0.0094,
      "step": 2043300
    },
    {
      "epoch": 3.343937995457015,
      "grad_norm": 0.2811920642852783,
      "learning_rate": 3.3339483273261602e-06,
      "loss": 0.0127,
      "step": 2043320
    },
    {
      "epoch": 3.3439707258956686,
      "grad_norm": 0.25505489110946655,
      "learning_rate": 3.333882435112643e-06,
      "loss": 0.0164,
      "step": 2043340
    },
    {
      "epoch": 3.3440034563343217,
      "grad_norm": 0.37087273597717285,
      "learning_rate": 3.3338165428991257e-06,
      "loss": 0.0131,
      "step": 2043360
    },
    {
      "epoch": 3.3440361867729753,
      "grad_norm": 0.24652333557605743,
      "learning_rate": 3.333750650685609e-06,
      "loss": 0.0142,
      "step": 2043380
    },
    {
      "epoch": 3.3440689172116285,
      "grad_norm": 0.5044307708740234,
      "learning_rate": 3.3336847584720916e-06,
      "loss": 0.0104,
      "step": 2043400
    },
    {
      "epoch": 3.3441016476502816,
      "grad_norm": 0.6300332546234131,
      "learning_rate": 3.3336188662585744e-06,
      "loss": 0.0123,
      "step": 2043420
    },
    {
      "epoch": 3.344134378088935,
      "grad_norm": 0.13685333728790283,
      "learning_rate": 3.333552974045057e-06,
      "loss": 0.0115,
      "step": 2043440
    },
    {
      "epoch": 3.3441671085275884,
      "grad_norm": 0.28527241945266724,
      "learning_rate": 3.3334870818315403e-06,
      "loss": 0.0158,
      "step": 2043460
    },
    {
      "epoch": 3.344199838966242,
      "grad_norm": 0.2945990264415741,
      "learning_rate": 3.333421189618023e-06,
      "loss": 0.012,
      "step": 2043480
    },
    {
      "epoch": 3.344232569404895,
      "grad_norm": 0.1550149917602539,
      "learning_rate": 3.3333552974045057e-06,
      "loss": 0.0139,
      "step": 2043500
    },
    {
      "epoch": 3.3442652998435483,
      "grad_norm": 0.18938015401363373,
      "learning_rate": 3.3332894051909885e-06,
      "loss": 0.0089,
      "step": 2043520
    },
    {
      "epoch": 3.344298030282202,
      "grad_norm": 0.49448710680007935,
      "learning_rate": 3.3332235129774716e-06,
      "loss": 0.0106,
      "step": 2043540
    },
    {
      "epoch": 3.344330760720855,
      "grad_norm": 0.21679633855819702,
      "learning_rate": 3.3331576207639544e-06,
      "loss": 0.0114,
      "step": 2043560
    },
    {
      "epoch": 3.3443634911595086,
      "grad_norm": 0.33163657784461975,
      "learning_rate": 3.3330917285504375e-06,
      "loss": 0.0164,
      "step": 2043580
    },
    {
      "epoch": 3.3443962215981617,
      "grad_norm": 0.23695655167102814,
      "learning_rate": 3.3330258363369207e-06,
      "loss": 0.0088,
      "step": 2043600
    },
    {
      "epoch": 3.3444289520368153,
      "grad_norm": 0.44542109966278076,
      "learning_rate": 3.3329599441234035e-06,
      "loss": 0.0142,
      "step": 2043620
    },
    {
      "epoch": 3.3444616824754685,
      "grad_norm": 0.2232179045677185,
      "learning_rate": 3.332894051909886e-06,
      "loss": 0.016,
      "step": 2043640
    },
    {
      "epoch": 3.3444944129141216,
      "grad_norm": 0.45060670375823975,
      "learning_rate": 3.332828159696369e-06,
      "loss": 0.0123,
      "step": 2043660
    },
    {
      "epoch": 3.3445271433527752,
      "grad_norm": 0.18179483711719513,
      "learning_rate": 3.332762267482852e-06,
      "loss": 0.0119,
      "step": 2043680
    },
    {
      "epoch": 3.3445598737914284,
      "grad_norm": 0.4330611824989319,
      "learning_rate": 3.332696375269335e-06,
      "loss": 0.0107,
      "step": 2043700
    },
    {
      "epoch": 3.344592604230082,
      "grad_norm": 0.16036851704120636,
      "learning_rate": 3.3326304830558176e-06,
      "loss": 0.0108,
      "step": 2043720
    },
    {
      "epoch": 3.344625334668735,
      "grad_norm": 0.25366851687431335,
      "learning_rate": 3.3325645908423003e-06,
      "loss": 0.0156,
      "step": 2043740
    },
    {
      "epoch": 3.3446580651073887,
      "grad_norm": 0.4048280715942383,
      "learning_rate": 3.3324986986287835e-06,
      "loss": 0.0148,
      "step": 2043760
    },
    {
      "epoch": 3.344690795546042,
      "grad_norm": 1.4537960290908813,
      "learning_rate": 3.3324328064152662e-06,
      "loss": 0.0117,
      "step": 2043780
    },
    {
      "epoch": 3.344723525984695,
      "grad_norm": 0.9389923214912415,
      "learning_rate": 3.332366914201749e-06,
      "loss": 0.0194,
      "step": 2043800
    },
    {
      "epoch": 3.3447562564233486,
      "grad_norm": 0.16202811896800995,
      "learning_rate": 3.3323010219882317e-06,
      "loss": 0.0132,
      "step": 2043820
    },
    {
      "epoch": 3.3447889868620018,
      "grad_norm": 0.48458337783813477,
      "learning_rate": 3.3322351297747144e-06,
      "loss": 0.0183,
      "step": 2043840
    },
    {
      "epoch": 3.3448217173006554,
      "grad_norm": 0.40556105971336365,
      "learning_rate": 3.3321692375611976e-06,
      "loss": 0.0128,
      "step": 2043860
    },
    {
      "epoch": 3.3448544477393085,
      "grad_norm": 0.050817858427762985,
      "learning_rate": 3.3321033453476803e-06,
      "loss": 0.008,
      "step": 2043880
    },
    {
      "epoch": 3.344887178177962,
      "grad_norm": 0.5018352270126343,
      "learning_rate": 3.332037453134163e-06,
      "loss": 0.0079,
      "step": 2043900
    },
    {
      "epoch": 3.3449199086166153,
      "grad_norm": 0.33073991537094116,
      "learning_rate": 3.3319715609206467e-06,
      "loss": 0.0142,
      "step": 2043920
    },
    {
      "epoch": 3.3449526390552684,
      "grad_norm": 0.16837237775325775,
      "learning_rate": 3.3319056687071294e-06,
      "loss": 0.0128,
      "step": 2043940
    },
    {
      "epoch": 3.344985369493922,
      "grad_norm": 0.47761011123657227,
      "learning_rate": 3.331839776493612e-06,
      "loss": 0.0085,
      "step": 2043960
    },
    {
      "epoch": 3.345018099932575,
      "grad_norm": 0.39738282561302185,
      "learning_rate": 3.331773884280095e-06,
      "loss": 0.0124,
      "step": 2043980
    },
    {
      "epoch": 3.3450508303712287,
      "grad_norm": 0.2199954390525818,
      "learning_rate": 3.331707992066578e-06,
      "loss": 0.0107,
      "step": 2044000
    },
    {
      "epoch": 3.345083560809882,
      "grad_norm": 0.39382439851760864,
      "learning_rate": 3.331642099853061e-06,
      "loss": 0.0118,
      "step": 2044020
    },
    {
      "epoch": 3.3451162912485355,
      "grad_norm": 0.1958647519350052,
      "learning_rate": 3.3315762076395435e-06,
      "loss": 0.0093,
      "step": 2044040
    },
    {
      "epoch": 3.3451490216871886,
      "grad_norm": 0.0827464833855629,
      "learning_rate": 3.3315103154260263e-06,
      "loss": 0.0087,
      "step": 2044060
    },
    {
      "epoch": 3.345181752125842,
      "grad_norm": 0.4073256552219391,
      "learning_rate": 3.3314444232125094e-06,
      "loss": 0.0072,
      "step": 2044080
    },
    {
      "epoch": 3.3452144825644954,
      "grad_norm": 0.6176954507827759,
      "learning_rate": 3.331378530998992e-06,
      "loss": 0.0067,
      "step": 2044100
    },
    {
      "epoch": 3.3452472130031485,
      "grad_norm": 0.25966769456863403,
      "learning_rate": 3.331312638785475e-06,
      "loss": 0.0127,
      "step": 2044120
    },
    {
      "epoch": 3.345279943441802,
      "grad_norm": 0.09095510095357895,
      "learning_rate": 3.3312467465719577e-06,
      "loss": 0.0128,
      "step": 2044140
    },
    {
      "epoch": 3.3453126738804553,
      "grad_norm": 0.11196798831224442,
      "learning_rate": 3.331180854358441e-06,
      "loss": 0.009,
      "step": 2044160
    },
    {
      "epoch": 3.345345404319109,
      "grad_norm": 0.19484487175941467,
      "learning_rate": 3.3311149621449236e-06,
      "loss": 0.02,
      "step": 2044180
    },
    {
      "epoch": 3.345378134757762,
      "grad_norm": 0.7481219172477722,
      "learning_rate": 3.3310490699314063e-06,
      "loss": 0.011,
      "step": 2044200
    },
    {
      "epoch": 3.345410865196415,
      "grad_norm": 0.3312213718891144,
      "learning_rate": 3.330983177717889e-06,
      "loss": 0.0084,
      "step": 2044220
    },
    {
      "epoch": 3.3454435956350688,
      "grad_norm": 0.07653652131557465,
      "learning_rate": 3.330917285504372e-06,
      "loss": 0.0108,
      "step": 2044240
    },
    {
      "epoch": 3.345476326073722,
      "grad_norm": 0.1853785663843155,
      "learning_rate": 3.330851393290855e-06,
      "loss": 0.0106,
      "step": 2044260
    },
    {
      "epoch": 3.3455090565123755,
      "grad_norm": 0.41731029748916626,
      "learning_rate": 3.330785501077338e-06,
      "loss": 0.0136,
      "step": 2044280
    },
    {
      "epoch": 3.3455417869510287,
      "grad_norm": 0.6532576084136963,
      "learning_rate": 3.3307196088638213e-06,
      "loss": 0.0103,
      "step": 2044300
    },
    {
      "epoch": 3.3455745173896823,
      "grad_norm": 0.24438153207302094,
      "learning_rate": 3.330653716650304e-06,
      "loss": 0.0098,
      "step": 2044320
    },
    {
      "epoch": 3.3456072478283354,
      "grad_norm": 0.14559495449066162,
      "learning_rate": 3.3305878244367867e-06,
      "loss": 0.0131,
      "step": 2044340
    },
    {
      "epoch": 3.3456399782669886,
      "grad_norm": 0.5872657895088196,
      "learning_rate": 3.3305219322232695e-06,
      "loss": 0.01,
      "step": 2044360
    },
    {
      "epoch": 3.345672708705642,
      "grad_norm": 0.20254646241664886,
      "learning_rate": 3.3304560400097522e-06,
      "loss": 0.011,
      "step": 2044380
    },
    {
      "epoch": 3.3457054391442953,
      "grad_norm": 0.5957430005073547,
      "learning_rate": 3.3303901477962354e-06,
      "loss": 0.0086,
      "step": 2044400
    },
    {
      "epoch": 3.345738169582949,
      "grad_norm": 0.12190558761358261,
      "learning_rate": 3.330324255582718e-06,
      "loss": 0.0132,
      "step": 2044420
    },
    {
      "epoch": 3.345770900021602,
      "grad_norm": 0.2798197567462921,
      "learning_rate": 3.330258363369201e-06,
      "loss": 0.0122,
      "step": 2044440
    },
    {
      "epoch": 3.3458036304602556,
      "grad_norm": 0.44710835814476013,
      "learning_rate": 3.3301924711556836e-06,
      "loss": 0.0109,
      "step": 2044460
    },
    {
      "epoch": 3.345836360898909,
      "grad_norm": 0.41713976860046387,
      "learning_rate": 3.3301265789421668e-06,
      "loss": 0.0114,
      "step": 2044480
    },
    {
      "epoch": 3.345869091337562,
      "grad_norm": 0.09899302572011948,
      "learning_rate": 3.3300606867286495e-06,
      "loss": 0.0054,
      "step": 2044500
    },
    {
      "epoch": 3.3459018217762155,
      "grad_norm": 0.289402574300766,
      "learning_rate": 3.3299947945151322e-06,
      "loss": 0.0126,
      "step": 2044520
    },
    {
      "epoch": 3.3459345522148687,
      "grad_norm": 0.7486910820007324,
      "learning_rate": 3.329928902301615e-06,
      "loss": 0.0106,
      "step": 2044540
    },
    {
      "epoch": 3.3459672826535223,
      "grad_norm": 0.12193730473518372,
      "learning_rate": 3.329863010088098e-06,
      "loss": 0.0094,
      "step": 2044560
    },
    {
      "epoch": 3.3460000130921754,
      "grad_norm": 0.14479021728038788,
      "learning_rate": 3.329797117874581e-06,
      "loss": 0.0125,
      "step": 2044580
    },
    {
      "epoch": 3.346032743530829,
      "grad_norm": 0.5366166830062866,
      "learning_rate": 3.3297312256610636e-06,
      "loss": 0.0139,
      "step": 2044600
    },
    {
      "epoch": 3.346065473969482,
      "grad_norm": 0.03410184383392334,
      "learning_rate": 3.3296653334475464e-06,
      "loss": 0.01,
      "step": 2044620
    },
    {
      "epoch": 3.3460982044081353,
      "grad_norm": 0.1713656634092331,
      "learning_rate": 3.32959944123403e-06,
      "loss": 0.0118,
      "step": 2044640
    },
    {
      "epoch": 3.346130934846789,
      "grad_norm": 0.4786701202392578,
      "learning_rate": 3.3295335490205127e-06,
      "loss": 0.0076,
      "step": 2044660
    },
    {
      "epoch": 3.346163665285442,
      "grad_norm": 0.35898542404174805,
      "learning_rate": 3.3294676568069954e-06,
      "loss": 0.0105,
      "step": 2044680
    },
    {
      "epoch": 3.3461963957240957,
      "grad_norm": 0.1143876314163208,
      "learning_rate": 3.3294017645934786e-06,
      "loss": 0.0131,
      "step": 2044700
    },
    {
      "epoch": 3.346229126162749,
      "grad_norm": 0.264340877532959,
      "learning_rate": 3.3293358723799613e-06,
      "loss": 0.01,
      "step": 2044720
    },
    {
      "epoch": 3.3462618566014024,
      "grad_norm": 0.44875985383987427,
      "learning_rate": 3.329269980166444e-06,
      "loss": 0.0139,
      "step": 2044740
    },
    {
      "epoch": 3.3462945870400556,
      "grad_norm": 0.30983567237854004,
      "learning_rate": 3.329204087952927e-06,
      "loss": 0.0094,
      "step": 2044760
    },
    {
      "epoch": 3.3463273174787087,
      "grad_norm": 0.14235684275627136,
      "learning_rate": 3.32913819573941e-06,
      "loss": 0.0116,
      "step": 2044780
    },
    {
      "epoch": 3.3463600479173623,
      "grad_norm": 0.19458943605422974,
      "learning_rate": 3.3290723035258927e-06,
      "loss": 0.017,
      "step": 2044800
    },
    {
      "epoch": 3.3463927783560155,
      "grad_norm": 0.08918795734643936,
      "learning_rate": 3.3290064113123755e-06,
      "loss": 0.015,
      "step": 2044820
    },
    {
      "epoch": 3.346425508794669,
      "grad_norm": 0.5477683544158936,
      "learning_rate": 3.328940519098858e-06,
      "loss": 0.0113,
      "step": 2044840
    },
    {
      "epoch": 3.346458239233322,
      "grad_norm": 0.5488559007644653,
      "learning_rate": 3.328874626885341e-06,
      "loss": 0.0143,
      "step": 2044860
    },
    {
      "epoch": 3.346490969671976,
      "grad_norm": 0.3982771337032318,
      "learning_rate": 3.328808734671824e-06,
      "loss": 0.0142,
      "step": 2044880
    },
    {
      "epoch": 3.346523700110629,
      "grad_norm": 0.3782574236392975,
      "learning_rate": 3.328742842458307e-06,
      "loss": 0.0132,
      "step": 2044900
    },
    {
      "epoch": 3.346556430549282,
      "grad_norm": 0.48925042152404785,
      "learning_rate": 3.3286769502447896e-06,
      "loss": 0.0113,
      "step": 2044920
    },
    {
      "epoch": 3.3465891609879357,
      "grad_norm": 0.42195093631744385,
      "learning_rate": 3.3286110580312723e-06,
      "loss": 0.0091,
      "step": 2044940
    },
    {
      "epoch": 3.346621891426589,
      "grad_norm": 0.5250390768051147,
      "learning_rate": 3.3285451658177555e-06,
      "loss": 0.0101,
      "step": 2044960
    },
    {
      "epoch": 3.3466546218652424,
      "grad_norm": 0.18185970187187195,
      "learning_rate": 3.3284792736042386e-06,
      "loss": 0.0137,
      "step": 2044980
    },
    {
      "epoch": 3.3466873523038956,
      "grad_norm": 0.20426160097122192,
      "learning_rate": 3.3284133813907214e-06,
      "loss": 0.0103,
      "step": 2045000
    },
    {
      "epoch": 3.3467200827425487,
      "grad_norm": 2.2143261432647705,
      "learning_rate": 3.3283474891772046e-06,
      "loss": 0.0103,
      "step": 2045020
    },
    {
      "epoch": 3.3467528131812023,
      "grad_norm": 1.033203125,
      "learning_rate": 3.3282815969636873e-06,
      "loss": 0.0155,
      "step": 2045040
    },
    {
      "epoch": 3.3467855436198555,
      "grad_norm": 0.21573609113693237,
      "learning_rate": 3.32821570475017e-06,
      "loss": 0.0153,
      "step": 2045060
    },
    {
      "epoch": 3.346818274058509,
      "grad_norm": 0.27331069111824036,
      "learning_rate": 3.3281498125366528e-06,
      "loss": 0.0145,
      "step": 2045080
    },
    {
      "epoch": 3.346851004497162,
      "grad_norm": 0.2467707395553589,
      "learning_rate": 3.328083920323136e-06,
      "loss": 0.0122,
      "step": 2045100
    },
    {
      "epoch": 3.3468837349358154,
      "grad_norm": 0.6579660177230835,
      "learning_rate": 3.3280180281096187e-06,
      "loss": 0.0119,
      "step": 2045120
    },
    {
      "epoch": 3.346916465374469,
      "grad_norm": 0.2512833774089813,
      "learning_rate": 3.3279521358961014e-06,
      "loss": 0.0118,
      "step": 2045140
    },
    {
      "epoch": 3.346949195813122,
      "grad_norm": 0.24396444857120514,
      "learning_rate": 3.327886243682584e-06,
      "loss": 0.0102,
      "step": 2045160
    },
    {
      "epoch": 3.3469819262517757,
      "grad_norm": 0.3399803936481476,
      "learning_rate": 3.3278203514690673e-06,
      "loss": 0.0148,
      "step": 2045180
    },
    {
      "epoch": 3.347014656690429,
      "grad_norm": 0.37112483382225037,
      "learning_rate": 3.32775445925555e-06,
      "loss": 0.0138,
      "step": 2045200
    },
    {
      "epoch": 3.3470473871290825,
      "grad_norm": 0.14865374565124512,
      "learning_rate": 3.327688567042033e-06,
      "loss": 0.0084,
      "step": 2045220
    },
    {
      "epoch": 3.3470801175677356,
      "grad_norm": 0.5249733328819275,
      "learning_rate": 3.3276226748285155e-06,
      "loss": 0.013,
      "step": 2045240
    },
    {
      "epoch": 3.3471128480063888,
      "grad_norm": 0.23100796341896057,
      "learning_rate": 3.3275567826149983e-06,
      "loss": 0.013,
      "step": 2045260
    },
    {
      "epoch": 3.3471455784450423,
      "grad_norm": 0.2714129388332367,
      "learning_rate": 3.3274908904014814e-06,
      "loss": 0.0141,
      "step": 2045280
    },
    {
      "epoch": 3.3471783088836955,
      "grad_norm": 0.31215161085128784,
      "learning_rate": 3.327424998187964e-06,
      "loss": 0.0167,
      "step": 2045300
    },
    {
      "epoch": 3.347211039322349,
      "grad_norm": 0.3252604901790619,
      "learning_rate": 3.327359105974447e-06,
      "loss": 0.0107,
      "step": 2045320
    },
    {
      "epoch": 3.3472437697610022,
      "grad_norm": 0.21125325560569763,
      "learning_rate": 3.3272932137609305e-06,
      "loss": 0.0161,
      "step": 2045340
    },
    {
      "epoch": 3.347276500199656,
      "grad_norm": 0.7618427872657776,
      "learning_rate": 3.3272273215474132e-06,
      "loss": 0.0088,
      "step": 2045360
    },
    {
      "epoch": 3.347309230638309,
      "grad_norm": 0.3407558798789978,
      "learning_rate": 3.327161429333896e-06,
      "loss": 0.0093,
      "step": 2045380
    },
    {
      "epoch": 3.347341961076962,
      "grad_norm": 0.07683888077735901,
      "learning_rate": 3.3270955371203787e-06,
      "loss": 0.0169,
      "step": 2045400
    },
    {
      "epoch": 3.3473746915156157,
      "grad_norm": 0.15240749716758728,
      "learning_rate": 3.327029644906862e-06,
      "loss": 0.0095,
      "step": 2045420
    },
    {
      "epoch": 3.347407421954269,
      "grad_norm": 0.6579871773719788,
      "learning_rate": 3.3269637526933446e-06,
      "loss": 0.0103,
      "step": 2045440
    },
    {
      "epoch": 3.3474401523929225,
      "grad_norm": 0.39307066798210144,
      "learning_rate": 3.3268978604798274e-06,
      "loss": 0.0159,
      "step": 2045460
    },
    {
      "epoch": 3.3474728828315756,
      "grad_norm": 0.7326147556304932,
      "learning_rate": 3.32683196826631e-06,
      "loss": 0.0135,
      "step": 2045480
    },
    {
      "epoch": 3.347505613270229,
      "grad_norm": 0.24041469395160675,
      "learning_rate": 3.3267660760527933e-06,
      "loss": 0.0098,
      "step": 2045500
    },
    {
      "epoch": 3.3475383437088824,
      "grad_norm": 0.4440767467021942,
      "learning_rate": 3.326700183839276e-06,
      "loss": 0.0113,
      "step": 2045520
    },
    {
      "epoch": 3.3475710741475355,
      "grad_norm": 0.11418767273426056,
      "learning_rate": 3.3266342916257588e-06,
      "loss": 0.0123,
      "step": 2045540
    },
    {
      "epoch": 3.347603804586189,
      "grad_norm": 0.28934988379478455,
      "learning_rate": 3.3265683994122415e-06,
      "loss": 0.0183,
      "step": 2045560
    },
    {
      "epoch": 3.3476365350248423,
      "grad_norm": 0.1390714794397354,
      "learning_rate": 3.3265025071987247e-06,
      "loss": 0.0113,
      "step": 2045580
    },
    {
      "epoch": 3.347669265463496,
      "grad_norm": 0.3997720181941986,
      "learning_rate": 3.3264366149852074e-06,
      "loss": 0.0125,
      "step": 2045600
    },
    {
      "epoch": 3.347701995902149,
      "grad_norm": 1.5369597673416138,
      "learning_rate": 3.32637072277169e-06,
      "loss": 0.0129,
      "step": 2045620
    },
    {
      "epoch": 3.3477347263408026,
      "grad_norm": 0.1563173234462738,
      "learning_rate": 3.326304830558173e-06,
      "loss": 0.0141,
      "step": 2045640
    },
    {
      "epoch": 3.3477674567794558,
      "grad_norm": 0.3756802976131439,
      "learning_rate": 3.326238938344656e-06,
      "loss": 0.0115,
      "step": 2045660
    },
    {
      "epoch": 3.347800187218109,
      "grad_norm": 0.19643057882785797,
      "learning_rate": 3.326173046131139e-06,
      "loss": 0.012,
      "step": 2045680
    },
    {
      "epoch": 3.3478329176567625,
      "grad_norm": 0.29489102959632874,
      "learning_rate": 3.326107153917622e-06,
      "loss": 0.0094,
      "step": 2045700
    },
    {
      "epoch": 3.3478656480954156,
      "grad_norm": 0.3367544710636139,
      "learning_rate": 3.326041261704105e-06,
      "loss": 0.0176,
      "step": 2045720
    },
    {
      "epoch": 3.3478983785340692,
      "grad_norm": 0.43218132853507996,
      "learning_rate": 3.325975369490588e-06,
      "loss": 0.0093,
      "step": 2045740
    },
    {
      "epoch": 3.3479311089727224,
      "grad_norm": 0.31849899888038635,
      "learning_rate": 3.3259094772770706e-06,
      "loss": 0.0128,
      "step": 2045760
    },
    {
      "epoch": 3.347963839411376,
      "grad_norm": 0.27293825149536133,
      "learning_rate": 3.3258435850635533e-06,
      "loss": 0.0114,
      "step": 2045780
    },
    {
      "epoch": 3.347996569850029,
      "grad_norm": 0.18957330286502838,
      "learning_rate": 3.325777692850036e-06,
      "loss": 0.0143,
      "step": 2045800
    },
    {
      "epoch": 3.3480293002886823,
      "grad_norm": 0.2771039605140686,
      "learning_rate": 3.3257118006365192e-06,
      "loss": 0.0127,
      "step": 2045820
    },
    {
      "epoch": 3.348062030727336,
      "grad_norm": 0.49561625719070435,
      "learning_rate": 3.325645908423002e-06,
      "loss": 0.0108,
      "step": 2045840
    },
    {
      "epoch": 3.348094761165989,
      "grad_norm": 0.18900258839130402,
      "learning_rate": 3.3255800162094847e-06,
      "loss": 0.0111,
      "step": 2045860
    },
    {
      "epoch": 3.3481274916046426,
      "grad_norm": 0.16754716634750366,
      "learning_rate": 3.3255141239959674e-06,
      "loss": 0.0089,
      "step": 2045880
    },
    {
      "epoch": 3.3481602220432958,
      "grad_norm": 0.4097377061843872,
      "learning_rate": 3.3254482317824506e-06,
      "loss": 0.0114,
      "step": 2045900
    },
    {
      "epoch": 3.3481929524819494,
      "grad_norm": 0.5306411981582642,
      "learning_rate": 3.3253823395689333e-06,
      "loss": 0.0134,
      "step": 2045920
    },
    {
      "epoch": 3.3482256829206025,
      "grad_norm": 0.39718320965766907,
      "learning_rate": 3.325316447355416e-06,
      "loss": 0.0085,
      "step": 2045940
    },
    {
      "epoch": 3.3482584133592557,
      "grad_norm": 0.3472149074077606,
      "learning_rate": 3.325250555141899e-06,
      "loss": 0.0112,
      "step": 2045960
    },
    {
      "epoch": 3.3482911437979093,
      "grad_norm": 0.04021809995174408,
      "learning_rate": 3.325184662928382e-06,
      "loss": 0.0122,
      "step": 2045980
    },
    {
      "epoch": 3.3483238742365624,
      "grad_norm": 0.1267291009426117,
      "learning_rate": 3.3251187707148647e-06,
      "loss": 0.0193,
      "step": 2046000
    },
    {
      "epoch": 3.348356604675216,
      "grad_norm": 0.42544662952423096,
      "learning_rate": 3.3250528785013475e-06,
      "loss": 0.0098,
      "step": 2046020
    },
    {
      "epoch": 3.348389335113869,
      "grad_norm": 0.16959242522716522,
      "learning_rate": 3.324986986287831e-06,
      "loss": 0.0095,
      "step": 2046040
    },
    {
      "epoch": 3.3484220655525228,
      "grad_norm": 0.24233615398406982,
      "learning_rate": 3.324921094074314e-06,
      "loss": 0.0155,
      "step": 2046060
    },
    {
      "epoch": 3.348454795991176,
      "grad_norm": 1.0817208290100098,
      "learning_rate": 3.3248552018607965e-06,
      "loss": 0.0129,
      "step": 2046080
    },
    {
      "epoch": 3.348487526429829,
      "grad_norm": 0.2676544189453125,
      "learning_rate": 3.3247893096472793e-06,
      "loss": 0.0111,
      "step": 2046100
    },
    {
      "epoch": 3.3485202568684826,
      "grad_norm": 0.168411985039711,
      "learning_rate": 3.3247234174337624e-06,
      "loss": 0.0092,
      "step": 2046120
    },
    {
      "epoch": 3.348552987307136,
      "grad_norm": 0.24940013885498047,
      "learning_rate": 3.324657525220245e-06,
      "loss": 0.0091,
      "step": 2046140
    },
    {
      "epoch": 3.3485857177457894,
      "grad_norm": 0.18663500249385834,
      "learning_rate": 3.324591633006728e-06,
      "loss": 0.0171,
      "step": 2046160
    },
    {
      "epoch": 3.3486184481844425,
      "grad_norm": 0.6285797357559204,
      "learning_rate": 3.3245257407932107e-06,
      "loss": 0.012,
      "step": 2046180
    },
    {
      "epoch": 3.348651178623096,
      "grad_norm": 0.5012770891189575,
      "learning_rate": 3.324459848579694e-06,
      "loss": 0.0134,
      "step": 2046200
    },
    {
      "epoch": 3.3486839090617493,
      "grad_norm": 0.23306389153003693,
      "learning_rate": 3.3243939563661766e-06,
      "loss": 0.0174,
      "step": 2046220
    },
    {
      "epoch": 3.3487166395004024,
      "grad_norm": 0.16554421186447144,
      "learning_rate": 3.3243280641526593e-06,
      "loss": 0.0123,
      "step": 2046240
    },
    {
      "epoch": 3.348749369939056,
      "grad_norm": 0.24402344226837158,
      "learning_rate": 3.324262171939142e-06,
      "loss": 0.0144,
      "step": 2046260
    },
    {
      "epoch": 3.348782100377709,
      "grad_norm": 0.6987773776054382,
      "learning_rate": 3.3241962797256248e-06,
      "loss": 0.0121,
      "step": 2046280
    },
    {
      "epoch": 3.3488148308163628,
      "grad_norm": 0.1728091984987259,
      "learning_rate": 3.324130387512108e-06,
      "loss": 0.0094,
      "step": 2046300
    },
    {
      "epoch": 3.348847561255016,
      "grad_norm": 0.1521219164133072,
      "learning_rate": 3.3240644952985907e-06,
      "loss": 0.009,
      "step": 2046320
    },
    {
      "epoch": 3.3488802916936695,
      "grad_norm": 0.3780598044395447,
      "learning_rate": 3.3239986030850734e-06,
      "loss": 0.0103,
      "step": 2046340
    },
    {
      "epoch": 3.3489130221323227,
      "grad_norm": 0.07128067314624786,
      "learning_rate": 3.323932710871556e-06,
      "loss": 0.0127,
      "step": 2046360
    },
    {
      "epoch": 3.348945752570976,
      "grad_norm": 0.22861254215240479,
      "learning_rate": 3.3238668186580397e-06,
      "loss": 0.0099,
      "step": 2046380
    },
    {
      "epoch": 3.3489784830096294,
      "grad_norm": 0.19201844930648804,
      "learning_rate": 3.3238009264445225e-06,
      "loss": 0.0098,
      "step": 2046400
    },
    {
      "epoch": 3.3490112134482826,
      "grad_norm": 0.650252103805542,
      "learning_rate": 3.3237350342310052e-06,
      "loss": 0.0091,
      "step": 2046420
    },
    {
      "epoch": 3.349043943886936,
      "grad_norm": 0.26873594522476196,
      "learning_rate": 3.3236691420174884e-06,
      "loss": 0.007,
      "step": 2046440
    },
    {
      "epoch": 3.3490766743255893,
      "grad_norm": 0.14872083067893982,
      "learning_rate": 3.323603249803971e-06,
      "loss": 0.0075,
      "step": 2046460
    },
    {
      "epoch": 3.3491094047642425,
      "grad_norm": 0.12857672572135925,
      "learning_rate": 3.323537357590454e-06,
      "loss": 0.0091,
      "step": 2046480
    },
    {
      "epoch": 3.349142135202896,
      "grad_norm": 0.11019368469715118,
      "learning_rate": 3.3234714653769366e-06,
      "loss": 0.0081,
      "step": 2046500
    },
    {
      "epoch": 3.349174865641549,
      "grad_norm": 0.2861217260360718,
      "learning_rate": 3.3234055731634198e-06,
      "loss": 0.0113,
      "step": 2046520
    },
    {
      "epoch": 3.349207596080203,
      "grad_norm": 0.5280897617340088,
      "learning_rate": 3.3233396809499025e-06,
      "loss": 0.0131,
      "step": 2046540
    },
    {
      "epoch": 3.349240326518856,
      "grad_norm": 0.6037840247154236,
      "learning_rate": 3.3232737887363853e-06,
      "loss": 0.0151,
      "step": 2046560
    },
    {
      "epoch": 3.349273056957509,
      "grad_norm": 0.1489858776330948,
      "learning_rate": 3.323207896522868e-06,
      "loss": 0.0152,
      "step": 2046580
    },
    {
      "epoch": 3.3493057873961627,
      "grad_norm": 0.27947625517845154,
      "learning_rate": 3.323142004309351e-06,
      "loss": 0.0146,
      "step": 2046600
    },
    {
      "epoch": 3.349338517834816,
      "grad_norm": 0.2630782723426819,
      "learning_rate": 3.323076112095834e-06,
      "loss": 0.0098,
      "step": 2046620
    },
    {
      "epoch": 3.3493712482734694,
      "grad_norm": 0.7811005115509033,
      "learning_rate": 3.3230102198823166e-06,
      "loss": 0.0157,
      "step": 2046640
    },
    {
      "epoch": 3.3494039787121226,
      "grad_norm": 0.10140638798475266,
      "learning_rate": 3.3229443276687994e-06,
      "loss": 0.0121,
      "step": 2046660
    },
    {
      "epoch": 3.349436709150776,
      "grad_norm": 0.3866840600967407,
      "learning_rate": 3.322878435455282e-06,
      "loss": 0.013,
      "step": 2046680
    },
    {
      "epoch": 3.3494694395894293,
      "grad_norm": 0.11630524694919586,
      "learning_rate": 3.3228125432417653e-06,
      "loss": 0.0086,
      "step": 2046700
    },
    {
      "epoch": 3.3495021700280825,
      "grad_norm": 0.6579840779304504,
      "learning_rate": 3.322746651028248e-06,
      "loss": 0.0177,
      "step": 2046720
    },
    {
      "epoch": 3.349534900466736,
      "grad_norm": 0.07722194492816925,
      "learning_rate": 3.3226807588147316e-06,
      "loss": 0.0089,
      "step": 2046740
    },
    {
      "epoch": 3.3495676309053892,
      "grad_norm": 0.5383232831954956,
      "learning_rate": 3.3226148666012143e-06,
      "loss": 0.0147,
      "step": 2046760
    },
    {
      "epoch": 3.349600361344043,
      "grad_norm": 0.16899149119853973,
      "learning_rate": 3.322548974387697e-06,
      "loss": 0.0093,
      "step": 2046780
    },
    {
      "epoch": 3.349633091782696,
      "grad_norm": 0.09905228018760681,
      "learning_rate": 3.32248308217418e-06,
      "loss": 0.0076,
      "step": 2046800
    },
    {
      "epoch": 3.3496658222213496,
      "grad_norm": 0.08254776149988174,
      "learning_rate": 3.3224171899606626e-06,
      "loss": 0.0118,
      "step": 2046820
    },
    {
      "epoch": 3.3496985526600027,
      "grad_norm": 0.08389320224523544,
      "learning_rate": 3.3223512977471457e-06,
      "loss": 0.016,
      "step": 2046840
    },
    {
      "epoch": 3.349731283098656,
      "grad_norm": 0.4813658595085144,
      "learning_rate": 3.3222854055336285e-06,
      "loss": 0.0112,
      "step": 2046860
    },
    {
      "epoch": 3.3497640135373095,
      "grad_norm": 0.3728035092353821,
      "learning_rate": 3.322219513320111e-06,
      "loss": 0.013,
      "step": 2046880
    },
    {
      "epoch": 3.3497967439759626,
      "grad_norm": 0.18316861987113953,
      "learning_rate": 3.322153621106594e-06,
      "loss": 0.0128,
      "step": 2046900
    },
    {
      "epoch": 3.349829474414616,
      "grad_norm": 0.5365565419197083,
      "learning_rate": 3.322087728893077e-06,
      "loss": 0.0115,
      "step": 2046920
    },
    {
      "epoch": 3.3498622048532694,
      "grad_norm": 0.8293447494506836,
      "learning_rate": 3.32202183667956e-06,
      "loss": 0.0133,
      "step": 2046940
    },
    {
      "epoch": 3.349894935291923,
      "grad_norm": 0.3825809955596924,
      "learning_rate": 3.3219559444660426e-06,
      "loss": 0.01,
      "step": 2046960
    },
    {
      "epoch": 3.349927665730576,
      "grad_norm": 0.31231531500816345,
      "learning_rate": 3.3218900522525253e-06,
      "loss": 0.0101,
      "step": 2046980
    },
    {
      "epoch": 3.3499603961692292,
      "grad_norm": 0.12604345381259918,
      "learning_rate": 3.3218241600390085e-06,
      "loss": 0.0113,
      "step": 2047000
    },
    {
      "epoch": 3.349993126607883,
      "grad_norm": 0.18187002837657928,
      "learning_rate": 3.3217582678254912e-06,
      "loss": 0.0092,
      "step": 2047020
    },
    {
      "epoch": 3.350025857046536,
      "grad_norm": 0.2237284630537033,
      "learning_rate": 3.321692375611974e-06,
      "loss": 0.0119,
      "step": 2047040
    },
    {
      "epoch": 3.3500585874851896,
      "grad_norm": 0.25939980149269104,
      "learning_rate": 3.3216264833984567e-06,
      "loss": 0.0137,
      "step": 2047060
    },
    {
      "epoch": 3.3500913179238427,
      "grad_norm": 0.22820641100406647,
      "learning_rate": 3.32156059118494e-06,
      "loss": 0.0129,
      "step": 2047080
    },
    {
      "epoch": 3.3501240483624963,
      "grad_norm": 0.4029615819454193,
      "learning_rate": 3.321494698971423e-06,
      "loss": 0.0084,
      "step": 2047100
    },
    {
      "epoch": 3.3501567788011495,
      "grad_norm": 0.41792282462120056,
      "learning_rate": 3.3214288067579058e-06,
      "loss": 0.012,
      "step": 2047120
    },
    {
      "epoch": 3.3501895092398026,
      "grad_norm": 0.2953520119190216,
      "learning_rate": 3.321362914544389e-06,
      "loss": 0.0094,
      "step": 2047140
    },
    {
      "epoch": 3.3502222396784562,
      "grad_norm": 0.07781770825386047,
      "learning_rate": 3.3212970223308717e-06,
      "loss": 0.014,
      "step": 2047160
    },
    {
      "epoch": 3.3502549701171094,
      "grad_norm": 0.3460838496685028,
      "learning_rate": 3.3212311301173544e-06,
      "loss": 0.0093,
      "step": 2047180
    },
    {
      "epoch": 3.350287700555763,
      "grad_norm": 0.37531569600105286,
      "learning_rate": 3.321165237903837e-06,
      "loss": 0.0091,
      "step": 2047200
    },
    {
      "epoch": 3.350320430994416,
      "grad_norm": 0.0790075808763504,
      "learning_rate": 3.32109934569032e-06,
      "loss": 0.0138,
      "step": 2047220
    },
    {
      "epoch": 3.3503531614330697,
      "grad_norm": 0.15844732522964478,
      "learning_rate": 3.321033453476803e-06,
      "loss": 0.0108,
      "step": 2047240
    },
    {
      "epoch": 3.350385891871723,
      "grad_norm": 0.1627763956785202,
      "learning_rate": 3.320967561263286e-06,
      "loss": 0.0076,
      "step": 2047260
    },
    {
      "epoch": 3.350418622310376,
      "grad_norm": 0.15818196535110474,
      "learning_rate": 3.3209016690497685e-06,
      "loss": 0.0129,
      "step": 2047280
    },
    {
      "epoch": 3.3504513527490296,
      "grad_norm": 0.2784520089626312,
      "learning_rate": 3.3208357768362513e-06,
      "loss": 0.0089,
      "step": 2047300
    },
    {
      "epoch": 3.3504840831876828,
      "grad_norm": 0.19921912252902985,
      "learning_rate": 3.3207698846227344e-06,
      "loss": 0.0141,
      "step": 2047320
    },
    {
      "epoch": 3.3505168136263364,
      "grad_norm": 0.36312225461006165,
      "learning_rate": 3.320703992409217e-06,
      "loss": 0.0118,
      "step": 2047340
    },
    {
      "epoch": 3.3505495440649895,
      "grad_norm": 0.7495772242546082,
      "learning_rate": 3.3206381001957e-06,
      "loss": 0.0223,
      "step": 2047360
    },
    {
      "epoch": 3.350582274503643,
      "grad_norm": 0.15589480102062225,
      "learning_rate": 3.3205722079821827e-06,
      "loss": 0.0082,
      "step": 2047380
    },
    {
      "epoch": 3.3506150049422962,
      "grad_norm": 0.5897200107574463,
      "learning_rate": 3.320506315768666e-06,
      "loss": 0.0106,
      "step": 2047400
    },
    {
      "epoch": 3.3506477353809494,
      "grad_norm": 0.17486026883125305,
      "learning_rate": 3.3204404235551486e-06,
      "loss": 0.007,
      "step": 2047420
    },
    {
      "epoch": 3.350680465819603,
      "grad_norm": 0.17402467131614685,
      "learning_rate": 3.3203745313416317e-06,
      "loss": 0.011,
      "step": 2047440
    },
    {
      "epoch": 3.350713196258256,
      "grad_norm": 0.18680256605148315,
      "learning_rate": 3.320308639128115e-06,
      "loss": 0.0116,
      "step": 2047460
    },
    {
      "epoch": 3.3507459266969097,
      "grad_norm": 0.5632136464118958,
      "learning_rate": 3.3202427469145976e-06,
      "loss": 0.0085,
      "step": 2047480
    },
    {
      "epoch": 3.350778657135563,
      "grad_norm": 0.08882012218236923,
      "learning_rate": 3.3201768547010804e-06,
      "loss": 0.0097,
      "step": 2047500
    },
    {
      "epoch": 3.3508113875742165,
      "grad_norm": 0.9700998067855835,
      "learning_rate": 3.320110962487563e-06,
      "loss": 0.0116,
      "step": 2047520
    },
    {
      "epoch": 3.3508441180128696,
      "grad_norm": 0.33503833413124084,
      "learning_rate": 3.3200450702740463e-06,
      "loss": 0.0153,
      "step": 2047540
    },
    {
      "epoch": 3.350876848451523,
      "grad_norm": 0.1265379935503006,
      "learning_rate": 3.319979178060529e-06,
      "loss": 0.0093,
      "step": 2047560
    },
    {
      "epoch": 3.3509095788901764,
      "grad_norm": 0.2157706767320633,
      "learning_rate": 3.3199132858470118e-06,
      "loss": 0.006,
      "step": 2047580
    },
    {
      "epoch": 3.3509423093288295,
      "grad_norm": 0.39712363481521606,
      "learning_rate": 3.3198473936334945e-06,
      "loss": 0.0093,
      "step": 2047600
    },
    {
      "epoch": 3.350975039767483,
      "grad_norm": 0.3980512320995331,
      "learning_rate": 3.3197815014199777e-06,
      "loss": 0.0134,
      "step": 2047620
    },
    {
      "epoch": 3.3510077702061363,
      "grad_norm": 0.3751596212387085,
      "learning_rate": 3.3197156092064604e-06,
      "loss": 0.0092,
      "step": 2047640
    },
    {
      "epoch": 3.35104050064479,
      "grad_norm": 0.3461163640022278,
      "learning_rate": 3.319649716992943e-06,
      "loss": 0.012,
      "step": 2047660
    },
    {
      "epoch": 3.351073231083443,
      "grad_norm": 0.34535548090934753,
      "learning_rate": 3.319583824779426e-06,
      "loss": 0.0146,
      "step": 2047680
    },
    {
      "epoch": 3.351105961522096,
      "grad_norm": 0.1844671666622162,
      "learning_rate": 3.3195179325659086e-06,
      "loss": 0.0108,
      "step": 2047700
    },
    {
      "epoch": 3.3511386919607498,
      "grad_norm": 0.1390455961227417,
      "learning_rate": 3.3194520403523918e-06,
      "loss": 0.014,
      "step": 2047720
    },
    {
      "epoch": 3.351171422399403,
      "grad_norm": 0.24101319909095764,
      "learning_rate": 3.3193861481388745e-06,
      "loss": 0.0127,
      "step": 2047740
    },
    {
      "epoch": 3.3512041528380565,
      "grad_norm": 0.2017621546983719,
      "learning_rate": 3.3193202559253573e-06,
      "loss": 0.0168,
      "step": 2047760
    },
    {
      "epoch": 3.3512368832767097,
      "grad_norm": 0.31856560707092285,
      "learning_rate": 3.31925436371184e-06,
      "loss": 0.0097,
      "step": 2047780
    },
    {
      "epoch": 3.3512696137153632,
      "grad_norm": 0.29716646671295166,
      "learning_rate": 3.3191884714983236e-06,
      "loss": 0.0082,
      "step": 2047800
    },
    {
      "epoch": 3.3513023441540164,
      "grad_norm": 0.08741302788257599,
      "learning_rate": 3.3191225792848063e-06,
      "loss": 0.0098,
      "step": 2047820
    },
    {
      "epoch": 3.3513350745926695,
      "grad_norm": 0.10691370815038681,
      "learning_rate": 3.319056687071289e-06,
      "loss": 0.0081,
      "step": 2047840
    },
    {
      "epoch": 3.351367805031323,
      "grad_norm": 0.327962726354599,
      "learning_rate": 3.3189907948577722e-06,
      "loss": 0.0098,
      "step": 2047860
    },
    {
      "epoch": 3.3514005354699763,
      "grad_norm": 0.28237754106521606,
      "learning_rate": 3.318924902644255e-06,
      "loss": 0.0117,
      "step": 2047880
    },
    {
      "epoch": 3.35143326590863,
      "grad_norm": 0.16054607927799225,
      "learning_rate": 3.3188590104307377e-06,
      "loss": 0.0111,
      "step": 2047900
    },
    {
      "epoch": 3.351465996347283,
      "grad_norm": 0.08738391846418381,
      "learning_rate": 3.3187931182172205e-06,
      "loss": 0.0132,
      "step": 2047920
    },
    {
      "epoch": 3.3514987267859366,
      "grad_norm": 0.5667018890380859,
      "learning_rate": 3.3187272260037036e-06,
      "loss": 0.0137,
      "step": 2047940
    },
    {
      "epoch": 3.35153145722459,
      "grad_norm": 0.7394723892211914,
      "learning_rate": 3.3186613337901864e-06,
      "loss": 0.0131,
      "step": 2047960
    },
    {
      "epoch": 3.351564187663243,
      "grad_norm": 0.10469233989715576,
      "learning_rate": 3.318595441576669e-06,
      "loss": 0.0159,
      "step": 2047980
    },
    {
      "epoch": 3.3515969181018965,
      "grad_norm": 0.43504598736763,
      "learning_rate": 3.318529549363152e-06,
      "loss": 0.0106,
      "step": 2048000
    },
    {
      "epoch": 3.3516296485405497,
      "grad_norm": 0.15940846502780914,
      "learning_rate": 3.318463657149635e-06,
      "loss": 0.0144,
      "step": 2048020
    },
    {
      "epoch": 3.3516623789792033,
      "grad_norm": 0.5876433253288269,
      "learning_rate": 3.3183977649361177e-06,
      "loss": 0.0121,
      "step": 2048040
    },
    {
      "epoch": 3.3516951094178564,
      "grad_norm": 0.19447942078113556,
      "learning_rate": 3.3183318727226005e-06,
      "loss": 0.0102,
      "step": 2048060
    },
    {
      "epoch": 3.3517278398565096,
      "grad_norm": 0.04992882162332535,
      "learning_rate": 3.3182659805090832e-06,
      "loss": 0.0074,
      "step": 2048080
    },
    {
      "epoch": 3.351760570295163,
      "grad_norm": 0.1317639797925949,
      "learning_rate": 3.3182000882955664e-06,
      "loss": 0.0135,
      "step": 2048100
    },
    {
      "epoch": 3.3517933007338163,
      "grad_norm": 0.6855815649032593,
      "learning_rate": 3.318134196082049e-06,
      "loss": 0.0091,
      "step": 2048120
    },
    {
      "epoch": 3.35182603117247,
      "grad_norm": 0.20370276272296906,
      "learning_rate": 3.3180683038685323e-06,
      "loss": 0.0123,
      "step": 2048140
    },
    {
      "epoch": 3.351858761611123,
      "grad_norm": 0.2007683515548706,
      "learning_rate": 3.3180024116550154e-06,
      "loss": 0.0122,
      "step": 2048160
    },
    {
      "epoch": 3.351891492049776,
      "grad_norm": 0.1882506161928177,
      "learning_rate": 3.317936519441498e-06,
      "loss": 0.0093,
      "step": 2048180
    },
    {
      "epoch": 3.35192422248843,
      "grad_norm": 0.10231385380029678,
      "learning_rate": 3.317870627227981e-06,
      "loss": 0.012,
      "step": 2048200
    },
    {
      "epoch": 3.351956952927083,
      "grad_norm": 0.45291048288345337,
      "learning_rate": 3.3178047350144637e-06,
      "loss": 0.0102,
      "step": 2048220
    },
    {
      "epoch": 3.3519896833657365,
      "grad_norm": 0.373258113861084,
      "learning_rate": 3.3177388428009464e-06,
      "loss": 0.0092,
      "step": 2048240
    },
    {
      "epoch": 3.3520224138043897,
      "grad_norm": 0.06687675416469574,
      "learning_rate": 3.3176729505874296e-06,
      "loss": 0.0112,
      "step": 2048260
    },
    {
      "epoch": 3.3520551442430433,
      "grad_norm": 0.13749293982982635,
      "learning_rate": 3.3176070583739123e-06,
      "loss": 0.0085,
      "step": 2048280
    },
    {
      "epoch": 3.3520878746816964,
      "grad_norm": 0.31994760036468506,
      "learning_rate": 3.317541166160395e-06,
      "loss": 0.0094,
      "step": 2048300
    },
    {
      "epoch": 3.3521206051203496,
      "grad_norm": 0.19660887122154236,
      "learning_rate": 3.3174752739468778e-06,
      "loss": 0.0123,
      "step": 2048320
    },
    {
      "epoch": 3.352153335559003,
      "grad_norm": 0.1424231082201004,
      "learning_rate": 3.317409381733361e-06,
      "loss": 0.007,
      "step": 2048340
    },
    {
      "epoch": 3.3521860659976563,
      "grad_norm": 0.6621562838554382,
      "learning_rate": 3.3173434895198437e-06,
      "loss": 0.0198,
      "step": 2048360
    },
    {
      "epoch": 3.35221879643631,
      "grad_norm": 0.21042124927043915,
      "learning_rate": 3.3172775973063264e-06,
      "loss": 0.0079,
      "step": 2048380
    },
    {
      "epoch": 3.352251526874963,
      "grad_norm": 0.3911781311035156,
      "learning_rate": 3.317211705092809e-06,
      "loss": 0.0086,
      "step": 2048400
    },
    {
      "epoch": 3.3522842573136167,
      "grad_norm": 0.4656945466995239,
      "learning_rate": 3.3171458128792923e-06,
      "loss": 0.0135,
      "step": 2048420
    },
    {
      "epoch": 3.35231698775227,
      "grad_norm": 0.0508560910820961,
      "learning_rate": 3.317079920665775e-06,
      "loss": 0.0147,
      "step": 2048440
    },
    {
      "epoch": 3.352349718190923,
      "grad_norm": 0.7060964703559875,
      "learning_rate": 3.317014028452258e-06,
      "loss": 0.0105,
      "step": 2048460
    },
    {
      "epoch": 3.3523824486295766,
      "grad_norm": 0.09012803435325623,
      "learning_rate": 3.3169481362387406e-06,
      "loss": 0.0102,
      "step": 2048480
    },
    {
      "epoch": 3.3524151790682297,
      "grad_norm": 0.7901160717010498,
      "learning_rate": 3.316882244025224e-06,
      "loss": 0.0106,
      "step": 2048500
    },
    {
      "epoch": 3.3524479095068833,
      "grad_norm": 0.5137741565704346,
      "learning_rate": 3.316816351811707e-06,
      "loss": 0.0125,
      "step": 2048520
    },
    {
      "epoch": 3.3524806399455365,
      "grad_norm": 0.2717084586620331,
      "learning_rate": 3.3167504595981896e-06,
      "loss": 0.0114,
      "step": 2048540
    },
    {
      "epoch": 3.35251337038419,
      "grad_norm": 0.4160982668399811,
      "learning_rate": 3.3166845673846728e-06,
      "loss": 0.015,
      "step": 2048560
    },
    {
      "epoch": 3.352546100822843,
      "grad_norm": 0.11802135407924652,
      "learning_rate": 3.3166186751711555e-06,
      "loss": 0.0093,
      "step": 2048580
    },
    {
      "epoch": 3.3525788312614964,
      "grad_norm": 0.21735242009162903,
      "learning_rate": 3.3165527829576383e-06,
      "loss": 0.0169,
      "step": 2048600
    },
    {
      "epoch": 3.35261156170015,
      "grad_norm": 0.5062811374664307,
      "learning_rate": 3.316486890744121e-06,
      "loss": 0.0173,
      "step": 2048620
    },
    {
      "epoch": 3.352644292138803,
      "grad_norm": 0.3920291066169739,
      "learning_rate": 3.316420998530604e-06,
      "loss": 0.0155,
      "step": 2048640
    },
    {
      "epoch": 3.3526770225774567,
      "grad_norm": 0.3152454197406769,
      "learning_rate": 3.316355106317087e-06,
      "loss": 0.0094,
      "step": 2048660
    },
    {
      "epoch": 3.35270975301611,
      "grad_norm": 0.3004283607006073,
      "learning_rate": 3.3162892141035696e-06,
      "loss": 0.0131,
      "step": 2048680
    },
    {
      "epoch": 3.3527424834547634,
      "grad_norm": 0.06702830642461777,
      "learning_rate": 3.3162233218900524e-06,
      "loss": 0.0131,
      "step": 2048700
    },
    {
      "epoch": 3.3527752138934166,
      "grad_norm": 0.28040650486946106,
      "learning_rate": 3.316157429676535e-06,
      "loss": 0.0079,
      "step": 2048720
    },
    {
      "epoch": 3.3528079443320697,
      "grad_norm": 0.8445160388946533,
      "learning_rate": 3.3160915374630183e-06,
      "loss": 0.0157,
      "step": 2048740
    },
    {
      "epoch": 3.3528406747707233,
      "grad_norm": 0.2325490117073059,
      "learning_rate": 3.316025645249501e-06,
      "loss": 0.0125,
      "step": 2048760
    },
    {
      "epoch": 3.3528734052093765,
      "grad_norm": 0.2897881865501404,
      "learning_rate": 3.3159597530359838e-06,
      "loss": 0.0109,
      "step": 2048780
    },
    {
      "epoch": 3.35290613564803,
      "grad_norm": 0.23543599247932434,
      "learning_rate": 3.3158938608224665e-06,
      "loss": 0.0122,
      "step": 2048800
    },
    {
      "epoch": 3.3529388660866832,
      "grad_norm": 0.24362586438655853,
      "learning_rate": 3.3158279686089497e-06,
      "loss": 0.0098,
      "step": 2048820
    },
    {
      "epoch": 3.352971596525337,
      "grad_norm": 0.1692398339509964,
      "learning_rate": 3.3157620763954324e-06,
      "loss": 0.0122,
      "step": 2048840
    },
    {
      "epoch": 3.35300432696399,
      "grad_norm": 0.35277727246284485,
      "learning_rate": 3.3156961841819156e-06,
      "loss": 0.0118,
      "step": 2048860
    },
    {
      "epoch": 3.353037057402643,
      "grad_norm": 0.136033296585083,
      "learning_rate": 3.3156302919683987e-06,
      "loss": 0.0106,
      "step": 2048880
    },
    {
      "epoch": 3.3530697878412967,
      "grad_norm": 0.5390177369117737,
      "learning_rate": 3.3155643997548815e-06,
      "loss": 0.0138,
      "step": 2048900
    },
    {
      "epoch": 3.35310251827995,
      "grad_norm": 0.05487404391169548,
      "learning_rate": 3.3154985075413642e-06,
      "loss": 0.0132,
      "step": 2048920
    },
    {
      "epoch": 3.3531352487186035,
      "grad_norm": 0.35116028785705566,
      "learning_rate": 3.315432615327847e-06,
      "loss": 0.014,
      "step": 2048940
    },
    {
      "epoch": 3.3531679791572566,
      "grad_norm": 0.2969113886356354,
      "learning_rate": 3.31536672311433e-06,
      "loss": 0.0129,
      "step": 2048960
    },
    {
      "epoch": 3.35320070959591,
      "grad_norm": 0.17926143109798431,
      "learning_rate": 3.315300830900813e-06,
      "loss": 0.0094,
      "step": 2048980
    },
    {
      "epoch": 3.3532334400345634,
      "grad_norm": 0.29566317796707153,
      "learning_rate": 3.3152349386872956e-06,
      "loss": 0.0113,
      "step": 2049000
    },
    {
      "epoch": 3.3532661704732165,
      "grad_norm": 0.24523089826107025,
      "learning_rate": 3.3151690464737783e-06,
      "loss": 0.0086,
      "step": 2049020
    },
    {
      "epoch": 3.35329890091187,
      "grad_norm": 0.2027287632226944,
      "learning_rate": 3.3151031542602615e-06,
      "loss": 0.0089,
      "step": 2049040
    },
    {
      "epoch": 3.3533316313505233,
      "grad_norm": 0.08722357451915741,
      "learning_rate": 3.3150372620467442e-06,
      "loss": 0.0132,
      "step": 2049060
    },
    {
      "epoch": 3.353364361789177,
      "grad_norm": 0.2150801718235016,
      "learning_rate": 3.314971369833227e-06,
      "loss": 0.0116,
      "step": 2049080
    },
    {
      "epoch": 3.35339709222783,
      "grad_norm": 0.10203487426042557,
      "learning_rate": 3.3149054776197097e-06,
      "loss": 0.012,
      "step": 2049100
    },
    {
      "epoch": 3.3534298226664836,
      "grad_norm": 0.7543661594390869,
      "learning_rate": 3.3148395854061925e-06,
      "loss": 0.0134,
      "step": 2049120
    },
    {
      "epoch": 3.3534625531051367,
      "grad_norm": 0.3570433259010315,
      "learning_rate": 3.3147736931926756e-06,
      "loss": 0.01,
      "step": 2049140
    },
    {
      "epoch": 3.35349528354379,
      "grad_norm": 0.13708700239658356,
      "learning_rate": 3.3147078009791584e-06,
      "loss": 0.0094,
      "step": 2049160
    },
    {
      "epoch": 3.3535280139824435,
      "grad_norm": 0.3606891334056854,
      "learning_rate": 3.314641908765641e-06,
      "loss": 0.014,
      "step": 2049180
    },
    {
      "epoch": 3.3535607444210966,
      "grad_norm": 0.1336093544960022,
      "learning_rate": 3.3145760165521247e-06,
      "loss": 0.0077,
      "step": 2049200
    },
    {
      "epoch": 3.3535934748597502,
      "grad_norm": 0.2286885380744934,
      "learning_rate": 3.3145101243386074e-06,
      "loss": 0.0102,
      "step": 2049220
    },
    {
      "epoch": 3.3536262052984034,
      "grad_norm": 0.16103629767894745,
      "learning_rate": 3.31444423212509e-06,
      "loss": 0.014,
      "step": 2049240
    },
    {
      "epoch": 3.353658935737057,
      "grad_norm": 0.5264312028884888,
      "learning_rate": 3.314378339911573e-06,
      "loss": 0.0157,
      "step": 2049260
    },
    {
      "epoch": 3.35369166617571,
      "grad_norm": 0.6435598731040955,
      "learning_rate": 3.314312447698056e-06,
      "loss": 0.0126,
      "step": 2049280
    },
    {
      "epoch": 3.3537243966143633,
      "grad_norm": 1.101953148841858,
      "learning_rate": 3.314246555484539e-06,
      "loss": 0.0128,
      "step": 2049300
    },
    {
      "epoch": 3.353757127053017,
      "grad_norm": 0.16496875882148743,
      "learning_rate": 3.3141806632710215e-06,
      "loss": 0.0066,
      "step": 2049320
    },
    {
      "epoch": 3.35378985749167,
      "grad_norm": 0.15364329516887665,
      "learning_rate": 3.3141147710575043e-06,
      "loss": 0.013,
      "step": 2049340
    },
    {
      "epoch": 3.3538225879303236,
      "grad_norm": 0.2355334311723709,
      "learning_rate": 3.3140488788439875e-06,
      "loss": 0.0101,
      "step": 2049360
    },
    {
      "epoch": 3.3538553183689768,
      "grad_norm": 0.19977231323719025,
      "learning_rate": 3.31398298663047e-06,
      "loss": 0.0093,
      "step": 2049380
    },
    {
      "epoch": 3.3538880488076304,
      "grad_norm": 0.27486246824264526,
      "learning_rate": 3.313917094416953e-06,
      "loss": 0.0126,
      "step": 2049400
    },
    {
      "epoch": 3.3539207792462835,
      "grad_norm": 0.17283578217029572,
      "learning_rate": 3.3138512022034357e-06,
      "loss": 0.0102,
      "step": 2049420
    },
    {
      "epoch": 3.3539535096849367,
      "grad_norm": 0.07445605844259262,
      "learning_rate": 3.313785309989919e-06,
      "loss": 0.0088,
      "step": 2049440
    },
    {
      "epoch": 3.3539862401235903,
      "grad_norm": 0.3550005257129669,
      "learning_rate": 3.3137194177764016e-06,
      "loss": 0.0153,
      "step": 2049460
    },
    {
      "epoch": 3.3540189705622434,
      "grad_norm": 0.5665632486343384,
      "learning_rate": 3.3136535255628843e-06,
      "loss": 0.0196,
      "step": 2049480
    },
    {
      "epoch": 3.354051701000897,
      "grad_norm": 0.9444297552108765,
      "learning_rate": 3.313587633349367e-06,
      "loss": 0.0173,
      "step": 2049500
    },
    {
      "epoch": 3.35408443143955,
      "grad_norm": 0.27650904655456543,
      "learning_rate": 3.3135217411358502e-06,
      "loss": 0.0133,
      "step": 2049520
    },
    {
      "epoch": 3.3541171618782033,
      "grad_norm": 0.17136120796203613,
      "learning_rate": 3.313455848922333e-06,
      "loss": 0.0112,
      "step": 2049540
    },
    {
      "epoch": 3.354149892316857,
      "grad_norm": 0.15124358236789703,
      "learning_rate": 3.313389956708816e-06,
      "loss": 0.008,
      "step": 2049560
    },
    {
      "epoch": 3.35418262275551,
      "grad_norm": 0.38099637627601624,
      "learning_rate": 3.3133240644952993e-06,
      "loss": 0.0092,
      "step": 2049580
    },
    {
      "epoch": 3.3542153531941636,
      "grad_norm": 0.5712150931358337,
      "learning_rate": 3.313258172281782e-06,
      "loss": 0.0131,
      "step": 2049600
    },
    {
      "epoch": 3.354248083632817,
      "grad_norm": 0.25165051221847534,
      "learning_rate": 3.3131922800682648e-06,
      "loss": 0.0072,
      "step": 2049620
    },
    {
      "epoch": 3.35428081407147,
      "grad_norm": 0.16296549141407013,
      "learning_rate": 3.3131263878547475e-06,
      "loss": 0.0098,
      "step": 2049640
    },
    {
      "epoch": 3.3543135445101235,
      "grad_norm": 0.23103469610214233,
      "learning_rate": 3.3130604956412302e-06,
      "loss": 0.0109,
      "step": 2049660
    },
    {
      "epoch": 3.3543462749487767,
      "grad_norm": 0.5392892360687256,
      "learning_rate": 3.3129946034277134e-06,
      "loss": 0.0124,
      "step": 2049680
    },
    {
      "epoch": 3.3543790053874303,
      "grad_norm": 0.0768459364771843,
      "learning_rate": 3.312928711214196e-06,
      "loss": 0.0147,
      "step": 2049700
    },
    {
      "epoch": 3.3544117358260834,
      "grad_norm": 0.24605290591716766,
      "learning_rate": 3.312862819000679e-06,
      "loss": 0.0131,
      "step": 2049720
    },
    {
      "epoch": 3.354444466264737,
      "grad_norm": 0.2887786328792572,
      "learning_rate": 3.3127969267871616e-06,
      "loss": 0.0153,
      "step": 2049740
    },
    {
      "epoch": 3.35447719670339,
      "grad_norm": 0.23149755597114563,
      "learning_rate": 3.312731034573645e-06,
      "loss": 0.0097,
      "step": 2049760
    },
    {
      "epoch": 3.3545099271420433,
      "grad_norm": 0.20202171802520752,
      "learning_rate": 3.3126651423601275e-06,
      "loss": 0.0167,
      "step": 2049780
    },
    {
      "epoch": 3.354542657580697,
      "grad_norm": 0.3734077513217926,
      "learning_rate": 3.3125992501466103e-06,
      "loss": 0.0077,
      "step": 2049800
    },
    {
      "epoch": 3.35457538801935,
      "grad_norm": 0.5456642508506775,
      "learning_rate": 3.312533357933093e-06,
      "loss": 0.0179,
      "step": 2049820
    },
    {
      "epoch": 3.3546081184580037,
      "grad_norm": 0.1939496248960495,
      "learning_rate": 3.312467465719576e-06,
      "loss": 0.0183,
      "step": 2049840
    },
    {
      "epoch": 3.354640848896657,
      "grad_norm": 0.17141029238700867,
      "learning_rate": 3.312401573506059e-06,
      "loss": 0.0091,
      "step": 2049860
    },
    {
      "epoch": 3.3546735793353104,
      "grad_norm": 0.4053838849067688,
      "learning_rate": 3.3123356812925417e-06,
      "loss": 0.009,
      "step": 2049880
    },
    {
      "epoch": 3.3547063097739636,
      "grad_norm": 0.21508362889289856,
      "learning_rate": 3.3122697890790252e-06,
      "loss": 0.0066,
      "step": 2049900
    },
    {
      "epoch": 3.3547390402126167,
      "grad_norm": 0.23623324930667877,
      "learning_rate": 3.312203896865508e-06,
      "loss": 0.0113,
      "step": 2049920
    },
    {
      "epoch": 3.3547717706512703,
      "grad_norm": 0.1054743304848671,
      "learning_rate": 3.3121380046519907e-06,
      "loss": 0.0075,
      "step": 2049940
    },
    {
      "epoch": 3.3548045010899235,
      "grad_norm": 0.16365361213684082,
      "learning_rate": 3.3120721124384735e-06,
      "loss": 0.01,
      "step": 2049960
    },
    {
      "epoch": 3.354837231528577,
      "grad_norm": 0.06260458379983902,
      "learning_rate": 3.3120062202249566e-06,
      "loss": 0.0128,
      "step": 2049980
    },
    {
      "epoch": 3.35486996196723,
      "grad_norm": 0.14126800000667572,
      "learning_rate": 3.3119403280114394e-06,
      "loss": 0.0086,
      "step": 2050000
    },
    {
      "epoch": 3.35486996196723,
      "eval_loss": 0.0068906634114682674,
      "eval_runtime": 6530.963,
      "eval_samples_per_second": 157.382,
      "eval_steps_per_second": 15.738,
      "eval_sts-dev_pearson_cosine": 0.9843534465934086,
      "eval_sts-dev_spearman_cosine": 0.8952457587084928,
      "step": 2050000
    },
    {
      "epoch": 3.354902692405884,
      "grad_norm": 0.16312387585639954,
      "learning_rate": 3.311874435797922e-06,
      "loss": 0.0136,
      "step": 2050020
    },
    {
      "epoch": 3.354935422844537,
      "grad_norm": 0.614121675491333,
      "learning_rate": 3.311808543584405e-06,
      "loss": 0.0133,
      "step": 2050040
    },
    {
      "epoch": 3.35496815328319,
      "grad_norm": 0.13691388070583344,
      "learning_rate": 3.311742651370888e-06,
      "loss": 0.0094,
      "step": 2050060
    },
    {
      "epoch": 3.3550008837218437,
      "grad_norm": 0.6731441617012024,
      "learning_rate": 3.3116767591573707e-06,
      "loss": 0.0182,
      "step": 2050080
    },
    {
      "epoch": 3.355033614160497,
      "grad_norm": 0.1703232377767563,
      "learning_rate": 3.3116108669438535e-06,
      "loss": 0.0114,
      "step": 2050100
    },
    {
      "epoch": 3.3550663445991504,
      "grad_norm": 0.10939284414052963,
      "learning_rate": 3.3115449747303362e-06,
      "loss": 0.0117,
      "step": 2050120
    },
    {
      "epoch": 3.3550990750378036,
      "grad_norm": 0.47222182154655457,
      "learning_rate": 3.311479082516819e-06,
      "loss": 0.0123,
      "step": 2050140
    },
    {
      "epoch": 3.355131805476457,
      "grad_norm": 0.5740467309951782,
      "learning_rate": 3.311413190303302e-06,
      "loss": 0.0152,
      "step": 2050160
    },
    {
      "epoch": 3.3551645359151103,
      "grad_norm": 0.1706172078847885,
      "learning_rate": 3.311347298089785e-06,
      "loss": 0.0073,
      "step": 2050180
    },
    {
      "epoch": 3.3551972663537635,
      "grad_norm": 0.7002763748168945,
      "learning_rate": 3.3112814058762676e-06,
      "loss": 0.0101,
      "step": 2050200
    },
    {
      "epoch": 3.355229996792417,
      "grad_norm": 0.17901276051998138,
      "learning_rate": 3.3112155136627503e-06,
      "loss": 0.0071,
      "step": 2050220
    },
    {
      "epoch": 3.35526272723107,
      "grad_norm": 0.6081225872039795,
      "learning_rate": 3.3111496214492335e-06,
      "loss": 0.0131,
      "step": 2050240
    },
    {
      "epoch": 3.355295457669724,
      "grad_norm": 0.07873329520225525,
      "learning_rate": 3.3110837292357167e-06,
      "loss": 0.0122,
      "step": 2050260
    },
    {
      "epoch": 3.355328188108377,
      "grad_norm": 0.4353523850440979,
      "learning_rate": 3.3110178370221994e-06,
      "loss": 0.0099,
      "step": 2050280
    },
    {
      "epoch": 3.3553609185470306,
      "grad_norm": 0.6651865839958191,
      "learning_rate": 3.3109519448086826e-06,
      "loss": 0.0129,
      "step": 2050300
    },
    {
      "epoch": 3.3553936489856837,
      "grad_norm": 0.1899106204509735,
      "learning_rate": 3.3108860525951653e-06,
      "loss": 0.0082,
      "step": 2050320
    },
    {
      "epoch": 3.355426379424337,
      "grad_norm": 0.049114763736724854,
      "learning_rate": 3.310820160381648e-06,
      "loss": 0.0132,
      "step": 2050340
    },
    {
      "epoch": 3.3554591098629905,
      "grad_norm": 0.08211340010166168,
      "learning_rate": 3.310754268168131e-06,
      "loss": 0.0094,
      "step": 2050360
    },
    {
      "epoch": 3.3554918403016436,
      "grad_norm": 0.7693818807601929,
      "learning_rate": 3.310688375954614e-06,
      "loss": 0.0109,
      "step": 2050380
    },
    {
      "epoch": 3.355524570740297,
      "grad_norm": 0.06463714689016342,
      "learning_rate": 3.3106224837410967e-06,
      "loss": 0.01,
      "step": 2050400
    },
    {
      "epoch": 3.3555573011789503,
      "grad_norm": 0.14543472230434418,
      "learning_rate": 3.3105565915275794e-06,
      "loss": 0.0096,
      "step": 2050420
    },
    {
      "epoch": 3.355590031617604,
      "grad_norm": 0.1375611573457718,
      "learning_rate": 3.310490699314062e-06,
      "loss": 0.0089,
      "step": 2050440
    },
    {
      "epoch": 3.355622762056257,
      "grad_norm": 0.37217289209365845,
      "learning_rate": 3.3104248071005453e-06,
      "loss": 0.0081,
      "step": 2050460
    },
    {
      "epoch": 3.3556554924949102,
      "grad_norm": 0.44044750928878784,
      "learning_rate": 3.310358914887028e-06,
      "loss": 0.0133,
      "step": 2050480
    },
    {
      "epoch": 3.355688222933564,
      "grad_norm": 0.4837811291217804,
      "learning_rate": 3.310293022673511e-06,
      "loss": 0.0142,
      "step": 2050500
    },
    {
      "epoch": 3.355720953372217,
      "grad_norm": 0.16635538637638092,
      "learning_rate": 3.3102271304599936e-06,
      "loss": 0.013,
      "step": 2050520
    },
    {
      "epoch": 3.3557536838108706,
      "grad_norm": 0.5914918780326843,
      "learning_rate": 3.3101612382464763e-06,
      "loss": 0.013,
      "step": 2050540
    },
    {
      "epoch": 3.3557864142495237,
      "grad_norm": 0.12284890562295914,
      "learning_rate": 3.3100953460329595e-06,
      "loss": 0.0109,
      "step": 2050560
    },
    {
      "epoch": 3.3558191446881773,
      "grad_norm": 0.13462211191654205,
      "learning_rate": 3.310029453819442e-06,
      "loss": 0.0127,
      "step": 2050580
    },
    {
      "epoch": 3.3558518751268305,
      "grad_norm": 0.1542634218931198,
      "learning_rate": 3.309963561605925e-06,
      "loss": 0.0102,
      "step": 2050600
    },
    {
      "epoch": 3.3558846055654836,
      "grad_norm": 0.15104825794696808,
      "learning_rate": 3.3098976693924085e-06,
      "loss": 0.0091,
      "step": 2050620
    },
    {
      "epoch": 3.355917336004137,
      "grad_norm": 0.4502159655094147,
      "learning_rate": 3.3098317771788913e-06,
      "loss": 0.0126,
      "step": 2050640
    },
    {
      "epoch": 3.3559500664427904,
      "grad_norm": 0.19474473595619202,
      "learning_rate": 3.309765884965374e-06,
      "loss": 0.0128,
      "step": 2050660
    },
    {
      "epoch": 3.355982796881444,
      "grad_norm": 0.1350330114364624,
      "learning_rate": 3.3096999927518567e-06,
      "loss": 0.0138,
      "step": 2050680
    },
    {
      "epoch": 3.356015527320097,
      "grad_norm": 0.09947764128446579,
      "learning_rate": 3.30963410053834e-06,
      "loss": 0.0128,
      "step": 2050700
    },
    {
      "epoch": 3.3560482577587507,
      "grad_norm": 0.7793205380439758,
      "learning_rate": 3.3095682083248226e-06,
      "loss": 0.0178,
      "step": 2050720
    },
    {
      "epoch": 3.356080988197404,
      "grad_norm": 0.6304650902748108,
      "learning_rate": 3.3095023161113054e-06,
      "loss": 0.0113,
      "step": 2050740
    },
    {
      "epoch": 3.356113718636057,
      "grad_norm": 0.3190543055534363,
      "learning_rate": 3.309436423897788e-06,
      "loss": 0.0086,
      "step": 2050760
    },
    {
      "epoch": 3.3561464490747106,
      "grad_norm": 0.016233446076512337,
      "learning_rate": 3.3093705316842713e-06,
      "loss": 0.0096,
      "step": 2050780
    },
    {
      "epoch": 3.3561791795133638,
      "grad_norm": 0.36003217101097107,
      "learning_rate": 3.309304639470754e-06,
      "loss": 0.0118,
      "step": 2050800
    },
    {
      "epoch": 3.3562119099520173,
      "grad_norm": 0.4616965353488922,
      "learning_rate": 3.3092387472572368e-06,
      "loss": 0.0157,
      "step": 2050820
    },
    {
      "epoch": 3.3562446403906705,
      "grad_norm": 0.2722840905189514,
      "learning_rate": 3.3091728550437195e-06,
      "loss": 0.0114,
      "step": 2050840
    },
    {
      "epoch": 3.356277370829324,
      "grad_norm": 0.25362035632133484,
      "learning_rate": 3.3091069628302027e-06,
      "loss": 0.0118,
      "step": 2050860
    },
    {
      "epoch": 3.3563101012679772,
      "grad_norm": 0.5289244651794434,
      "learning_rate": 3.3090410706166854e-06,
      "loss": 0.0138,
      "step": 2050880
    },
    {
      "epoch": 3.3563428317066304,
      "grad_norm": 0.1841995120048523,
      "learning_rate": 3.308975178403168e-06,
      "loss": 0.0082,
      "step": 2050900
    },
    {
      "epoch": 3.356375562145284,
      "grad_norm": 0.7638249397277832,
      "learning_rate": 3.308909286189651e-06,
      "loss": 0.0233,
      "step": 2050920
    },
    {
      "epoch": 3.356408292583937,
      "grad_norm": 0.2821066081523895,
      "learning_rate": 3.308843393976134e-06,
      "loss": 0.0155,
      "step": 2050940
    },
    {
      "epoch": 3.3564410230225907,
      "grad_norm": 0.33843398094177246,
      "learning_rate": 3.3087775017626172e-06,
      "loss": 0.0075,
      "step": 2050960
    },
    {
      "epoch": 3.356473753461244,
      "grad_norm": 0.38088375329971313,
      "learning_rate": 3.3087116095491e-06,
      "loss": 0.0106,
      "step": 2050980
    },
    {
      "epoch": 3.3565064838998975,
      "grad_norm": 0.8674443364143372,
      "learning_rate": 3.308645717335583e-06,
      "loss": 0.0146,
      "step": 2051000
    },
    {
      "epoch": 3.3565392143385506,
      "grad_norm": 0.5486636757850647,
      "learning_rate": 3.308579825122066e-06,
      "loss": 0.0148,
      "step": 2051020
    },
    {
      "epoch": 3.3565719447772038,
      "grad_norm": 0.21253342926502228,
      "learning_rate": 3.3085139329085486e-06,
      "loss": 0.0116,
      "step": 2051040
    },
    {
      "epoch": 3.3566046752158574,
      "grad_norm": 0.5188090205192566,
      "learning_rate": 3.3084480406950313e-06,
      "loss": 0.0121,
      "step": 2051060
    },
    {
      "epoch": 3.3566374056545105,
      "grad_norm": 0.3850109577178955,
      "learning_rate": 3.308382148481514e-06,
      "loss": 0.0079,
      "step": 2051080
    },
    {
      "epoch": 3.356670136093164,
      "grad_norm": 0.1329353153705597,
      "learning_rate": 3.3083162562679972e-06,
      "loss": 0.0131,
      "step": 2051100
    },
    {
      "epoch": 3.3567028665318173,
      "grad_norm": 0.14480367302894592,
      "learning_rate": 3.30825036405448e-06,
      "loss": 0.0115,
      "step": 2051120
    },
    {
      "epoch": 3.3567355969704704,
      "grad_norm": 0.13581153750419617,
      "learning_rate": 3.3081844718409627e-06,
      "loss": 0.0082,
      "step": 2051140
    },
    {
      "epoch": 3.356768327409124,
      "grad_norm": 0.40853050351142883,
      "learning_rate": 3.3081185796274455e-06,
      "loss": 0.0138,
      "step": 2051160
    },
    {
      "epoch": 3.356801057847777,
      "grad_norm": 0.1899060606956482,
      "learning_rate": 3.3080526874139286e-06,
      "loss": 0.0127,
      "step": 2051180
    },
    {
      "epoch": 3.3568337882864308,
      "grad_norm": 0.5318729877471924,
      "learning_rate": 3.3079867952004114e-06,
      "loss": 0.0148,
      "step": 2051200
    },
    {
      "epoch": 3.356866518725084,
      "grad_norm": 0.14364807307720184,
      "learning_rate": 3.307920902986894e-06,
      "loss": 0.0118,
      "step": 2051220
    },
    {
      "epoch": 3.356899249163737,
      "grad_norm": 0.1146979108452797,
      "learning_rate": 3.307855010773377e-06,
      "loss": 0.0141,
      "step": 2051240
    },
    {
      "epoch": 3.3569319796023906,
      "grad_norm": 0.04006272554397583,
      "learning_rate": 3.30778911855986e-06,
      "loss": 0.0083,
      "step": 2051260
    },
    {
      "epoch": 3.356964710041044,
      "grad_norm": 0.298740953207016,
      "learning_rate": 3.3077232263463428e-06,
      "loss": 0.0063,
      "step": 2051280
    },
    {
      "epoch": 3.3569974404796974,
      "grad_norm": 0.349902480840683,
      "learning_rate": 3.3076573341328255e-06,
      "loss": 0.0089,
      "step": 2051300
    },
    {
      "epoch": 3.3570301709183505,
      "grad_norm": 0.1715371310710907,
      "learning_rate": 3.307591441919309e-06,
      "loss": 0.0099,
      "step": 2051320
    },
    {
      "epoch": 3.357062901357004,
      "grad_norm": 0.23418863117694855,
      "learning_rate": 3.307525549705792e-06,
      "loss": 0.0105,
      "step": 2051340
    },
    {
      "epoch": 3.3570956317956573,
      "grad_norm": 0.7092669010162354,
      "learning_rate": 3.3074596574922746e-06,
      "loss": 0.0115,
      "step": 2051360
    },
    {
      "epoch": 3.3571283622343104,
      "grad_norm": 0.2433534413576126,
      "learning_rate": 3.3073937652787573e-06,
      "loss": 0.0082,
      "step": 2051380
    },
    {
      "epoch": 3.357161092672964,
      "grad_norm": 0.2773328721523285,
      "learning_rate": 3.3073278730652405e-06,
      "loss": 0.013,
      "step": 2051400
    },
    {
      "epoch": 3.357193823111617,
      "grad_norm": 0.22160013020038605,
      "learning_rate": 3.307261980851723e-06,
      "loss": 0.0134,
      "step": 2051420
    },
    {
      "epoch": 3.3572265535502708,
      "grad_norm": 0.2930385172367096,
      "learning_rate": 3.307196088638206e-06,
      "loss": 0.0151,
      "step": 2051440
    },
    {
      "epoch": 3.357259283988924,
      "grad_norm": 0.2131703495979309,
      "learning_rate": 3.3071301964246887e-06,
      "loss": 0.011,
      "step": 2051460
    },
    {
      "epoch": 3.3572920144275775,
      "grad_norm": 0.09242366999387741,
      "learning_rate": 3.307064304211172e-06,
      "loss": 0.011,
      "step": 2051480
    },
    {
      "epoch": 3.3573247448662307,
      "grad_norm": 0.33275583386421204,
      "learning_rate": 3.3069984119976546e-06,
      "loss": 0.0084,
      "step": 2051500
    },
    {
      "epoch": 3.357357475304884,
      "grad_norm": 0.20658981800079346,
      "learning_rate": 3.3069325197841373e-06,
      "loss": 0.0076,
      "step": 2051520
    },
    {
      "epoch": 3.3573902057435374,
      "grad_norm": 0.20879438519477844,
      "learning_rate": 3.30686662757062e-06,
      "loss": 0.0162,
      "step": 2051540
    },
    {
      "epoch": 3.3574229361821906,
      "grad_norm": 0.2851227819919586,
      "learning_rate": 3.306800735357103e-06,
      "loss": 0.0087,
      "step": 2051560
    },
    {
      "epoch": 3.357455666620844,
      "grad_norm": 0.5854604244232178,
      "learning_rate": 3.306734843143586e-06,
      "loss": 0.0131,
      "step": 2051580
    },
    {
      "epoch": 3.3574883970594973,
      "grad_norm": 0.30568087100982666,
      "learning_rate": 3.3066689509300687e-06,
      "loss": 0.0086,
      "step": 2051600
    },
    {
      "epoch": 3.357521127498151,
      "grad_norm": 0.1396872103214264,
      "learning_rate": 3.3066030587165514e-06,
      "loss": 0.0076,
      "step": 2051620
    },
    {
      "epoch": 3.357553857936804,
      "grad_norm": 0.269084095954895,
      "learning_rate": 3.306537166503034e-06,
      "loss": 0.0101,
      "step": 2051640
    },
    {
      "epoch": 3.357586588375457,
      "grad_norm": 0.46609923243522644,
      "learning_rate": 3.3064712742895178e-06,
      "loss": 0.0102,
      "step": 2051660
    },
    {
      "epoch": 3.357619318814111,
      "grad_norm": 0.12409679591655731,
      "learning_rate": 3.3064053820760005e-06,
      "loss": 0.0103,
      "step": 2051680
    },
    {
      "epoch": 3.357652049252764,
      "grad_norm": 0.28047794103622437,
      "learning_rate": 3.3063394898624832e-06,
      "loss": 0.0088,
      "step": 2051700
    },
    {
      "epoch": 3.3576847796914175,
      "grad_norm": 0.22992470860481262,
      "learning_rate": 3.3062735976489664e-06,
      "loss": 0.0124,
      "step": 2051720
    },
    {
      "epoch": 3.3577175101300707,
      "grad_norm": 0.5748019218444824,
      "learning_rate": 3.306207705435449e-06,
      "loss": 0.0156,
      "step": 2051740
    },
    {
      "epoch": 3.3577502405687243,
      "grad_norm": 0.09217390418052673,
      "learning_rate": 3.306141813221932e-06,
      "loss": 0.0099,
      "step": 2051760
    },
    {
      "epoch": 3.3577829710073774,
      "grad_norm": 0.36887216567993164,
      "learning_rate": 3.3060759210084146e-06,
      "loss": 0.0143,
      "step": 2051780
    },
    {
      "epoch": 3.3578157014460306,
      "grad_norm": 0.598329484462738,
      "learning_rate": 3.306010028794898e-06,
      "loss": 0.0141,
      "step": 2051800
    },
    {
      "epoch": 3.357848431884684,
      "grad_norm": 0.07411275804042816,
      "learning_rate": 3.3059441365813805e-06,
      "loss": 0.0117,
      "step": 2051820
    },
    {
      "epoch": 3.3578811623233373,
      "grad_norm": 0.5666983723640442,
      "learning_rate": 3.3058782443678633e-06,
      "loss": 0.0119,
      "step": 2051840
    },
    {
      "epoch": 3.357913892761991,
      "grad_norm": 0.40859082341194153,
      "learning_rate": 3.305812352154346e-06,
      "loss": 0.0188,
      "step": 2051860
    },
    {
      "epoch": 3.357946623200644,
      "grad_norm": 0.3728022873401642,
      "learning_rate": 3.305746459940829e-06,
      "loss": 0.0132,
      "step": 2051880
    },
    {
      "epoch": 3.3579793536392977,
      "grad_norm": 0.5025897026062012,
      "learning_rate": 3.305680567727312e-06,
      "loss": 0.0107,
      "step": 2051900
    },
    {
      "epoch": 3.358012084077951,
      "grad_norm": 0.3040103614330292,
      "learning_rate": 3.3056146755137947e-06,
      "loss": 0.0119,
      "step": 2051920
    },
    {
      "epoch": 3.358044814516604,
      "grad_norm": 0.3903014659881592,
      "learning_rate": 3.3055487833002774e-06,
      "loss": 0.0118,
      "step": 2051940
    },
    {
      "epoch": 3.3580775449552576,
      "grad_norm": 0.3463004231452942,
      "learning_rate": 3.30548289108676e-06,
      "loss": 0.0103,
      "step": 2051960
    },
    {
      "epoch": 3.3581102753939107,
      "grad_norm": 0.11083725839853287,
      "learning_rate": 3.3054169988732433e-06,
      "loss": 0.0117,
      "step": 2051980
    },
    {
      "epoch": 3.3581430058325643,
      "grad_norm": 0.13031157851219177,
      "learning_rate": 3.305351106659726e-06,
      "loss": 0.0139,
      "step": 2052000
    },
    {
      "epoch": 3.3581757362712175,
      "grad_norm": 0.47121164202690125,
      "learning_rate": 3.3052852144462096e-06,
      "loss": 0.0101,
      "step": 2052020
    },
    {
      "epoch": 3.358208466709871,
      "grad_norm": 0.30986836552619934,
      "learning_rate": 3.3052193222326924e-06,
      "loss": 0.0124,
      "step": 2052040
    },
    {
      "epoch": 3.358241197148524,
      "grad_norm": 0.12080735713243484,
      "learning_rate": 3.305153430019175e-06,
      "loss": 0.0107,
      "step": 2052060
    },
    {
      "epoch": 3.3582739275871774,
      "grad_norm": 0.10579187422990799,
      "learning_rate": 3.305087537805658e-06,
      "loss": 0.007,
      "step": 2052080
    },
    {
      "epoch": 3.358306658025831,
      "grad_norm": 0.39633089303970337,
      "learning_rate": 3.3050216455921406e-06,
      "loss": 0.0107,
      "step": 2052100
    },
    {
      "epoch": 3.358339388464484,
      "grad_norm": 0.484809935092926,
      "learning_rate": 3.3049557533786237e-06,
      "loss": 0.0111,
      "step": 2052120
    },
    {
      "epoch": 3.3583721189031377,
      "grad_norm": 0.47052258253097534,
      "learning_rate": 3.3048898611651065e-06,
      "loss": 0.0189,
      "step": 2052140
    },
    {
      "epoch": 3.358404849341791,
      "grad_norm": 0.0573456771671772,
      "learning_rate": 3.3048239689515892e-06,
      "loss": 0.0158,
      "step": 2052160
    },
    {
      "epoch": 3.3584375797804444,
      "grad_norm": 0.15024077892303467,
      "learning_rate": 3.304758076738072e-06,
      "loss": 0.0117,
      "step": 2052180
    },
    {
      "epoch": 3.3584703102190976,
      "grad_norm": 0.2717033922672272,
      "learning_rate": 3.304692184524555e-06,
      "loss": 0.0115,
      "step": 2052200
    },
    {
      "epoch": 3.3585030406577507,
      "grad_norm": 0.10939403623342514,
      "learning_rate": 3.304626292311038e-06,
      "loss": 0.0099,
      "step": 2052220
    },
    {
      "epoch": 3.3585357710964043,
      "grad_norm": 0.5226550698280334,
      "learning_rate": 3.3045604000975206e-06,
      "loss": 0.0091,
      "step": 2052240
    },
    {
      "epoch": 3.3585685015350575,
      "grad_norm": 0.2912517189979553,
      "learning_rate": 3.3044945078840034e-06,
      "loss": 0.0098,
      "step": 2052260
    },
    {
      "epoch": 3.358601231973711,
      "grad_norm": 0.4461372196674347,
      "learning_rate": 3.3044286156704865e-06,
      "loss": 0.0128,
      "step": 2052280
    },
    {
      "epoch": 3.3586339624123642,
      "grad_norm": 0.12953230738639832,
      "learning_rate": 3.3043627234569693e-06,
      "loss": 0.0114,
      "step": 2052300
    },
    {
      "epoch": 3.358666692851018,
      "grad_norm": 0.42838162183761597,
      "learning_rate": 3.304296831243452e-06,
      "loss": 0.0121,
      "step": 2052320
    },
    {
      "epoch": 3.358699423289671,
      "grad_norm": 0.43856915831565857,
      "learning_rate": 3.3042309390299347e-06,
      "loss": 0.0145,
      "step": 2052340
    },
    {
      "epoch": 3.358732153728324,
      "grad_norm": 0.9827443361282349,
      "learning_rate": 3.304165046816418e-06,
      "loss": 0.0162,
      "step": 2052360
    },
    {
      "epoch": 3.3587648841669777,
      "grad_norm": 0.29468584060668945,
      "learning_rate": 3.304099154602901e-06,
      "loss": 0.0118,
      "step": 2052380
    },
    {
      "epoch": 3.358797614605631,
      "grad_norm": 0.5010655522346497,
      "learning_rate": 3.304033262389384e-06,
      "loss": 0.0091,
      "step": 2052400
    },
    {
      "epoch": 3.3588303450442845,
      "grad_norm": 0.38039064407348633,
      "learning_rate": 3.303967370175867e-06,
      "loss": 0.0115,
      "step": 2052420
    },
    {
      "epoch": 3.3588630754829376,
      "grad_norm": 0.06146292760968208,
      "learning_rate": 3.3039014779623497e-06,
      "loss": 0.0056,
      "step": 2052440
    },
    {
      "epoch": 3.358895805921591,
      "grad_norm": 0.16356296837329865,
      "learning_rate": 3.3038355857488324e-06,
      "loss": 0.0113,
      "step": 2052460
    },
    {
      "epoch": 3.3589285363602444,
      "grad_norm": 0.29947489500045776,
      "learning_rate": 3.303769693535315e-06,
      "loss": 0.0113,
      "step": 2052480
    },
    {
      "epoch": 3.3589612667988975,
      "grad_norm": 0.16628111898899078,
      "learning_rate": 3.303703801321798e-06,
      "loss": 0.0118,
      "step": 2052500
    },
    {
      "epoch": 3.358993997237551,
      "grad_norm": 0.27868250012397766,
      "learning_rate": 3.303637909108281e-06,
      "loss": 0.0121,
      "step": 2052520
    },
    {
      "epoch": 3.3590267276762042,
      "grad_norm": 0.5257311463356018,
      "learning_rate": 3.303572016894764e-06,
      "loss": 0.0086,
      "step": 2052540
    },
    {
      "epoch": 3.359059458114858,
      "grad_norm": 0.1747749149799347,
      "learning_rate": 3.3035061246812466e-06,
      "loss": 0.0132,
      "step": 2052560
    },
    {
      "epoch": 3.359092188553511,
      "grad_norm": 0.2879418432712555,
      "learning_rate": 3.3034402324677293e-06,
      "loss": 0.0108,
      "step": 2052580
    },
    {
      "epoch": 3.359124918992164,
      "grad_norm": 0.25110772252082825,
      "learning_rate": 3.3033743402542125e-06,
      "loss": 0.0107,
      "step": 2052600
    },
    {
      "epoch": 3.3591576494308177,
      "grad_norm": 1.1345033645629883,
      "learning_rate": 3.303308448040695e-06,
      "loss": 0.0075,
      "step": 2052620
    },
    {
      "epoch": 3.359190379869471,
      "grad_norm": 0.23120829463005066,
      "learning_rate": 3.303242555827178e-06,
      "loss": 0.0147,
      "step": 2052640
    },
    {
      "epoch": 3.3592231103081245,
      "grad_norm": 0.7232157588005066,
      "learning_rate": 3.3031766636136607e-06,
      "loss": 0.0164,
      "step": 2052660
    },
    {
      "epoch": 3.3592558407467776,
      "grad_norm": 0.13732175529003143,
      "learning_rate": 3.303110771400144e-06,
      "loss": 0.0124,
      "step": 2052680
    },
    {
      "epoch": 3.359288571185431,
      "grad_norm": 0.3338896334171295,
      "learning_rate": 3.3030448791866266e-06,
      "loss": 0.0095,
      "step": 2052700
    },
    {
      "epoch": 3.3593213016240844,
      "grad_norm": 0.2105318307876587,
      "learning_rate": 3.3029789869731098e-06,
      "loss": 0.0099,
      "step": 2052720
    },
    {
      "epoch": 3.3593540320627375,
      "grad_norm": 0.45059484243392944,
      "learning_rate": 3.302913094759593e-06,
      "loss": 0.0102,
      "step": 2052740
    },
    {
      "epoch": 3.359386762501391,
      "grad_norm": 0.2634154260158539,
      "learning_rate": 3.3028472025460757e-06,
      "loss": 0.0149,
      "step": 2052760
    },
    {
      "epoch": 3.3594194929400443,
      "grad_norm": 0.10230427980422974,
      "learning_rate": 3.3027813103325584e-06,
      "loss": 0.0102,
      "step": 2052780
    },
    {
      "epoch": 3.359452223378698,
      "grad_norm": 0.4883032441139221,
      "learning_rate": 3.302715418119041e-06,
      "loss": 0.0107,
      "step": 2052800
    },
    {
      "epoch": 3.359484953817351,
      "grad_norm": 0.22350876033306122,
      "learning_rate": 3.3026495259055243e-06,
      "loss": 0.0139,
      "step": 2052820
    },
    {
      "epoch": 3.359517684256004,
      "grad_norm": 0.07712054252624512,
      "learning_rate": 3.302583633692007e-06,
      "loss": 0.013,
      "step": 2052840
    },
    {
      "epoch": 3.3595504146946578,
      "grad_norm": 0.46558618545532227,
      "learning_rate": 3.3025177414784898e-06,
      "loss": 0.0122,
      "step": 2052860
    },
    {
      "epoch": 3.359583145133311,
      "grad_norm": 0.1976635456085205,
      "learning_rate": 3.3024518492649725e-06,
      "loss": 0.0069,
      "step": 2052880
    },
    {
      "epoch": 3.3596158755719645,
      "grad_norm": 0.35371652245521545,
      "learning_rate": 3.3023859570514557e-06,
      "loss": 0.0182,
      "step": 2052900
    },
    {
      "epoch": 3.3596486060106177,
      "grad_norm": 0.32110974192619324,
      "learning_rate": 3.3023200648379384e-06,
      "loss": 0.0072,
      "step": 2052920
    },
    {
      "epoch": 3.3596813364492712,
      "grad_norm": 0.4419720768928528,
      "learning_rate": 3.302254172624421e-06,
      "loss": 0.0106,
      "step": 2052940
    },
    {
      "epoch": 3.3597140668879244,
      "grad_norm": 0.37709441781044006,
      "learning_rate": 3.302188280410904e-06,
      "loss": 0.0217,
      "step": 2052960
    },
    {
      "epoch": 3.3597467973265775,
      "grad_norm": 0.2674804925918579,
      "learning_rate": 3.3021223881973866e-06,
      "loss": 0.0102,
      "step": 2052980
    },
    {
      "epoch": 3.359779527765231,
      "grad_norm": 0.464370995759964,
      "learning_rate": 3.30205649598387e-06,
      "loss": 0.01,
      "step": 2053000
    },
    {
      "epoch": 3.3598122582038843,
      "grad_norm": 0.5025960206985474,
      "learning_rate": 3.3019906037703525e-06,
      "loss": 0.0103,
      "step": 2053020
    },
    {
      "epoch": 3.359844988642538,
      "grad_norm": 0.19767224788665771,
      "learning_rate": 3.3019247115568353e-06,
      "loss": 0.0199,
      "step": 2053040
    },
    {
      "epoch": 3.359877719081191,
      "grad_norm": 0.11213074624538422,
      "learning_rate": 3.301858819343318e-06,
      "loss": 0.0085,
      "step": 2053060
    },
    {
      "epoch": 3.3599104495198446,
      "grad_norm": 0.25502097606658936,
      "learning_rate": 3.3017929271298016e-06,
      "loss": 0.0109,
      "step": 2053080
    },
    {
      "epoch": 3.359943179958498,
      "grad_norm": 0.41281381249427795,
      "learning_rate": 3.3017270349162843e-06,
      "loss": 0.0082,
      "step": 2053100
    },
    {
      "epoch": 3.359975910397151,
      "grad_norm": 0.3244115710258484,
      "learning_rate": 3.301661142702767e-06,
      "loss": 0.0108,
      "step": 2053120
    },
    {
      "epoch": 3.3600086408358045,
      "grad_norm": 0.3465171158313751,
      "learning_rate": 3.3015952504892503e-06,
      "loss": 0.0111,
      "step": 2053140
    },
    {
      "epoch": 3.3600413712744577,
      "grad_norm": 0.4692027270793915,
      "learning_rate": 3.301529358275733e-06,
      "loss": 0.0115,
      "step": 2053160
    },
    {
      "epoch": 3.3600741017131113,
      "grad_norm": 0.5793980360031128,
      "learning_rate": 3.3014634660622157e-06,
      "loss": 0.0149,
      "step": 2053180
    },
    {
      "epoch": 3.3601068321517644,
      "grad_norm": 0.08674747496843338,
      "learning_rate": 3.3013975738486985e-06,
      "loss": 0.0114,
      "step": 2053200
    },
    {
      "epoch": 3.360139562590418,
      "grad_norm": 0.20871451497077942,
      "learning_rate": 3.3013316816351816e-06,
      "loss": 0.0088,
      "step": 2053220
    },
    {
      "epoch": 3.360172293029071,
      "grad_norm": 0.14584538340568542,
      "learning_rate": 3.3012657894216644e-06,
      "loss": 0.0079,
      "step": 2053240
    },
    {
      "epoch": 3.3602050234677243,
      "grad_norm": 0.6102741956710815,
      "learning_rate": 3.301199897208147e-06,
      "loss": 0.0136,
      "step": 2053260
    },
    {
      "epoch": 3.360237753906378,
      "grad_norm": 0.10330618172883987,
      "learning_rate": 3.30113400499463e-06,
      "loss": 0.0067,
      "step": 2053280
    },
    {
      "epoch": 3.360270484345031,
      "grad_norm": 0.3824520707130432,
      "learning_rate": 3.301068112781113e-06,
      "loss": 0.0185,
      "step": 2053300
    },
    {
      "epoch": 3.3603032147836847,
      "grad_norm": 0.4684508144855499,
      "learning_rate": 3.3010022205675958e-06,
      "loss": 0.0142,
      "step": 2053320
    },
    {
      "epoch": 3.360335945222338,
      "grad_norm": 0.05779524892568588,
      "learning_rate": 3.3009363283540785e-06,
      "loss": 0.0137,
      "step": 2053340
    },
    {
      "epoch": 3.3603686756609914,
      "grad_norm": 0.19549527764320374,
      "learning_rate": 3.3008704361405612e-06,
      "loss": 0.0119,
      "step": 2053360
    },
    {
      "epoch": 3.3604014060996446,
      "grad_norm": 0.5152505040168762,
      "learning_rate": 3.3008045439270444e-06,
      "loss": 0.0096,
      "step": 2053380
    },
    {
      "epoch": 3.3604341365382977,
      "grad_norm": 0.3646399676799774,
      "learning_rate": 3.300738651713527e-06,
      "loss": 0.0103,
      "step": 2053400
    },
    {
      "epoch": 3.3604668669769513,
      "grad_norm": 0.3984750211238861,
      "learning_rate": 3.3006727595000103e-06,
      "loss": 0.0098,
      "step": 2053420
    },
    {
      "epoch": 3.3604995974156044,
      "grad_norm": 0.3532755672931671,
      "learning_rate": 3.3006068672864935e-06,
      "loss": 0.0107,
      "step": 2053440
    },
    {
      "epoch": 3.360532327854258,
      "grad_norm": 0.12312015146017075,
      "learning_rate": 3.300540975072976e-06,
      "loss": 0.0115,
      "step": 2053460
    },
    {
      "epoch": 3.360565058292911,
      "grad_norm": 0.2693636119365692,
      "learning_rate": 3.300475082859459e-06,
      "loss": 0.0103,
      "step": 2053480
    },
    {
      "epoch": 3.360597788731565,
      "grad_norm": 0.3123297691345215,
      "learning_rate": 3.3004091906459417e-06,
      "loss": 0.0129,
      "step": 2053500
    },
    {
      "epoch": 3.360630519170218,
      "grad_norm": 0.5123478174209595,
      "learning_rate": 3.3003432984324244e-06,
      "loss": 0.0091,
      "step": 2053520
    },
    {
      "epoch": 3.360663249608871,
      "grad_norm": 0.493941992521286,
      "learning_rate": 3.3002774062189076e-06,
      "loss": 0.0106,
      "step": 2053540
    },
    {
      "epoch": 3.3606959800475247,
      "grad_norm": 0.2854689359664917,
      "learning_rate": 3.3002115140053903e-06,
      "loss": 0.0118,
      "step": 2053560
    },
    {
      "epoch": 3.360728710486178,
      "grad_norm": 0.1539277732372284,
      "learning_rate": 3.300145621791873e-06,
      "loss": 0.0089,
      "step": 2053580
    },
    {
      "epoch": 3.3607614409248314,
      "grad_norm": 0.5824257731437683,
      "learning_rate": 3.300079729578356e-06,
      "loss": 0.0143,
      "step": 2053600
    },
    {
      "epoch": 3.3607941713634846,
      "grad_norm": 0.25239622592926025,
      "learning_rate": 3.300013837364839e-06,
      "loss": 0.015,
      "step": 2053620
    },
    {
      "epoch": 3.360826901802138,
      "grad_norm": 0.44830289483070374,
      "learning_rate": 3.2999479451513217e-06,
      "loss": 0.0198,
      "step": 2053640
    },
    {
      "epoch": 3.3608596322407913,
      "grad_norm": 0.2810509502887726,
      "learning_rate": 3.2998820529378045e-06,
      "loss": 0.0097,
      "step": 2053660
    },
    {
      "epoch": 3.3608923626794445,
      "grad_norm": 0.8750110864639282,
      "learning_rate": 3.299816160724287e-06,
      "loss": 0.0132,
      "step": 2053680
    },
    {
      "epoch": 3.360925093118098,
      "grad_norm": 0.4864048659801483,
      "learning_rate": 3.2997502685107704e-06,
      "loss": 0.0124,
      "step": 2053700
    },
    {
      "epoch": 3.360957823556751,
      "grad_norm": 0.1801038384437561,
      "learning_rate": 3.299684376297253e-06,
      "loss": 0.0096,
      "step": 2053720
    },
    {
      "epoch": 3.360990553995405,
      "grad_norm": 0.1670392006635666,
      "learning_rate": 3.299618484083736e-06,
      "loss": 0.0125,
      "step": 2053740
    },
    {
      "epoch": 3.361023284434058,
      "grad_norm": 0.6002317070960999,
      "learning_rate": 3.2995525918702186e-06,
      "loss": 0.0176,
      "step": 2053760
    },
    {
      "epoch": 3.3610560148727116,
      "grad_norm": 0.8782621622085571,
      "learning_rate": 3.299486699656702e-06,
      "loss": 0.0188,
      "step": 2053780
    },
    {
      "epoch": 3.3610887453113647,
      "grad_norm": 0.17003338038921356,
      "learning_rate": 3.299420807443185e-06,
      "loss": 0.0091,
      "step": 2053800
    },
    {
      "epoch": 3.361121475750018,
      "grad_norm": 0.3778102397918701,
      "learning_rate": 3.2993549152296676e-06,
      "loss": 0.0106,
      "step": 2053820
    },
    {
      "epoch": 3.3611542061886714,
      "grad_norm": 0.2775600254535675,
      "learning_rate": 3.299289023016151e-06,
      "loss": 0.0079,
      "step": 2053840
    },
    {
      "epoch": 3.3611869366273246,
      "grad_norm": 0.5934494137763977,
      "learning_rate": 3.2992231308026335e-06,
      "loss": 0.0123,
      "step": 2053860
    },
    {
      "epoch": 3.361219667065978,
      "grad_norm": 0.17114359140396118,
      "learning_rate": 3.2991572385891163e-06,
      "loss": 0.014,
      "step": 2053880
    },
    {
      "epoch": 3.3612523975046313,
      "grad_norm": 0.5320137739181519,
      "learning_rate": 3.299091346375599e-06,
      "loss": 0.0112,
      "step": 2053900
    },
    {
      "epoch": 3.361285127943285,
      "grad_norm": 0.31721770763397217,
      "learning_rate": 3.299025454162082e-06,
      "loss": 0.0103,
      "step": 2053920
    },
    {
      "epoch": 3.361317858381938,
      "grad_norm": 0.5040874481201172,
      "learning_rate": 3.298959561948565e-06,
      "loss": 0.0151,
      "step": 2053940
    },
    {
      "epoch": 3.3613505888205912,
      "grad_norm": 0.6848658919334412,
      "learning_rate": 3.2988936697350477e-06,
      "loss": 0.0111,
      "step": 2053960
    },
    {
      "epoch": 3.361383319259245,
      "grad_norm": 0.2475244551897049,
      "learning_rate": 3.2988277775215304e-06,
      "loss": 0.0175,
      "step": 2053980
    },
    {
      "epoch": 3.361416049697898,
      "grad_norm": 0.24940241873264313,
      "learning_rate": 3.298761885308013e-06,
      "loss": 0.0142,
      "step": 2054000
    },
    {
      "epoch": 3.3614487801365516,
      "grad_norm": 0.22163185477256775,
      "learning_rate": 3.2986959930944963e-06,
      "loss": 0.0131,
      "step": 2054020
    },
    {
      "epoch": 3.3614815105752047,
      "grad_norm": 0.3199460804462433,
      "learning_rate": 3.298630100880979e-06,
      "loss": 0.0132,
      "step": 2054040
    },
    {
      "epoch": 3.3615142410138583,
      "grad_norm": 0.14307673275470734,
      "learning_rate": 3.2985642086674618e-06,
      "loss": 0.0123,
      "step": 2054060
    },
    {
      "epoch": 3.3615469714525115,
      "grad_norm": 0.3808790445327759,
      "learning_rate": 3.2984983164539445e-06,
      "loss": 0.0103,
      "step": 2054080
    },
    {
      "epoch": 3.3615797018911646,
      "grad_norm": 0.4347347021102905,
      "learning_rate": 3.2984324242404277e-06,
      "loss": 0.0131,
      "step": 2054100
    },
    {
      "epoch": 3.361612432329818,
      "grad_norm": 0.1778491884469986,
      "learning_rate": 3.2983665320269104e-06,
      "loss": 0.0112,
      "step": 2054120
    },
    {
      "epoch": 3.3616451627684714,
      "grad_norm": 0.25830313563346863,
      "learning_rate": 3.2983006398133936e-06,
      "loss": 0.0107,
      "step": 2054140
    },
    {
      "epoch": 3.3616778932071245,
      "grad_norm": 0.7104994654655457,
      "learning_rate": 3.2982347475998768e-06,
      "loss": 0.0083,
      "step": 2054160
    },
    {
      "epoch": 3.361710623645778,
      "grad_norm": 0.27209731936454773,
      "learning_rate": 3.2981688553863595e-06,
      "loss": 0.0158,
      "step": 2054180
    },
    {
      "epoch": 3.3617433540844313,
      "grad_norm": 0.2368067353963852,
      "learning_rate": 3.2981029631728422e-06,
      "loss": 0.0154,
      "step": 2054200
    },
    {
      "epoch": 3.361776084523085,
      "grad_norm": 0.12242351472377777,
      "learning_rate": 3.298037070959325e-06,
      "loss": 0.0097,
      "step": 2054220
    },
    {
      "epoch": 3.361808814961738,
      "grad_norm": 0.36061376333236694,
      "learning_rate": 3.297971178745808e-06,
      "loss": 0.0075,
      "step": 2054240
    },
    {
      "epoch": 3.3618415454003916,
      "grad_norm": 0.12547706067562103,
      "learning_rate": 3.297905286532291e-06,
      "loss": 0.0129,
      "step": 2054260
    },
    {
      "epoch": 3.3618742758390447,
      "grad_norm": 0.3402464687824249,
      "learning_rate": 3.2978393943187736e-06,
      "loss": 0.0189,
      "step": 2054280
    },
    {
      "epoch": 3.361907006277698,
      "grad_norm": 0.34467995166778564,
      "learning_rate": 3.2977735021052564e-06,
      "loss": 0.0092,
      "step": 2054300
    },
    {
      "epoch": 3.3619397367163515,
      "grad_norm": 0.1975410133600235,
      "learning_rate": 3.2977076098917395e-06,
      "loss": 0.0101,
      "step": 2054320
    },
    {
      "epoch": 3.3619724671550046,
      "grad_norm": 0.289232075214386,
      "learning_rate": 3.2976417176782223e-06,
      "loss": 0.0124,
      "step": 2054340
    },
    {
      "epoch": 3.3620051975936582,
      "grad_norm": 0.3106507658958435,
      "learning_rate": 3.297575825464705e-06,
      "loss": 0.0157,
      "step": 2054360
    },
    {
      "epoch": 3.3620379280323114,
      "grad_norm": 0.32668718695640564,
      "learning_rate": 3.2975099332511877e-06,
      "loss": 0.0126,
      "step": 2054380
    },
    {
      "epoch": 3.362070658470965,
      "grad_norm": 0.24352312088012695,
      "learning_rate": 3.2974440410376705e-06,
      "loss": 0.0097,
      "step": 2054400
    },
    {
      "epoch": 3.362103388909618,
      "grad_norm": 0.28776636719703674,
      "learning_rate": 3.2973781488241536e-06,
      "loss": 0.0121,
      "step": 2054420
    },
    {
      "epoch": 3.3621361193482713,
      "grad_norm": 0.4104940891265869,
      "learning_rate": 3.2973122566106364e-06,
      "loss": 0.0107,
      "step": 2054440
    },
    {
      "epoch": 3.362168849786925,
      "grad_norm": 0.0998300164937973,
      "learning_rate": 3.297246364397119e-06,
      "loss": 0.013,
      "step": 2054460
    },
    {
      "epoch": 3.362201580225578,
      "grad_norm": 0.19273699820041656,
      "learning_rate": 3.2971804721836027e-06,
      "loss": 0.0124,
      "step": 2054480
    },
    {
      "epoch": 3.3622343106642316,
      "grad_norm": 0.25894808769226074,
      "learning_rate": 3.2971145799700854e-06,
      "loss": 0.0102,
      "step": 2054500
    },
    {
      "epoch": 3.3622670411028848,
      "grad_norm": 0.2173565924167633,
      "learning_rate": 3.297048687756568e-06,
      "loss": 0.016,
      "step": 2054520
    },
    {
      "epoch": 3.3622997715415384,
      "grad_norm": 0.4200478792190552,
      "learning_rate": 3.296982795543051e-06,
      "loss": 0.0109,
      "step": 2054540
    },
    {
      "epoch": 3.3623325019801915,
      "grad_norm": 0.5194980502128601,
      "learning_rate": 3.296916903329534e-06,
      "loss": 0.008,
      "step": 2054560
    },
    {
      "epoch": 3.3623652324188447,
      "grad_norm": 0.08365069329738617,
      "learning_rate": 3.296851011116017e-06,
      "loss": 0.0099,
      "step": 2054580
    },
    {
      "epoch": 3.3623979628574983,
      "grad_norm": 0.20182198286056519,
      "learning_rate": 3.2967851189024996e-06,
      "loss": 0.0139,
      "step": 2054600
    },
    {
      "epoch": 3.3624306932961514,
      "grad_norm": 0.32599711418151855,
      "learning_rate": 3.2967192266889823e-06,
      "loss": 0.0085,
      "step": 2054620
    },
    {
      "epoch": 3.362463423734805,
      "grad_norm": 0.10572101175785065,
      "learning_rate": 3.2966533344754655e-06,
      "loss": 0.0105,
      "step": 2054640
    },
    {
      "epoch": 3.362496154173458,
      "grad_norm": 0.39592304825782776,
      "learning_rate": 3.2965874422619482e-06,
      "loss": 0.0138,
      "step": 2054660
    },
    {
      "epoch": 3.3625288846121117,
      "grad_norm": 0.2212338149547577,
      "learning_rate": 3.296521550048431e-06,
      "loss": 0.0076,
      "step": 2054680
    },
    {
      "epoch": 3.362561615050765,
      "grad_norm": 0.5917101502418518,
      "learning_rate": 3.2964556578349137e-06,
      "loss": 0.0087,
      "step": 2054700
    },
    {
      "epoch": 3.362594345489418,
      "grad_norm": 0.16504105925559998,
      "learning_rate": 3.296389765621397e-06,
      "loss": 0.0109,
      "step": 2054720
    },
    {
      "epoch": 3.3626270759280716,
      "grad_norm": 0.11656579375267029,
      "learning_rate": 3.2963238734078796e-06,
      "loss": 0.007,
      "step": 2054740
    },
    {
      "epoch": 3.362659806366725,
      "grad_norm": 0.3965924084186554,
      "learning_rate": 3.2962579811943623e-06,
      "loss": 0.0133,
      "step": 2054760
    },
    {
      "epoch": 3.3626925368053784,
      "grad_norm": 1.0748249292373657,
      "learning_rate": 3.296192088980845e-06,
      "loss": 0.009,
      "step": 2054780
    },
    {
      "epoch": 3.3627252672440315,
      "grad_norm": 0.1543167233467102,
      "learning_rate": 3.2961261967673282e-06,
      "loss": 0.0082,
      "step": 2054800
    },
    {
      "epoch": 3.362757997682685,
      "grad_norm": 0.32515066862106323,
      "learning_rate": 3.296060304553811e-06,
      "loss": 0.0118,
      "step": 2054820
    },
    {
      "epoch": 3.3627907281213383,
      "grad_norm": 0.5092538595199585,
      "learning_rate": 3.295994412340294e-06,
      "loss": 0.014,
      "step": 2054840
    },
    {
      "epoch": 3.3628234585599914,
      "grad_norm": 0.32077857851982117,
      "learning_rate": 3.2959285201267773e-06,
      "loss": 0.0187,
      "step": 2054860
    },
    {
      "epoch": 3.362856188998645,
      "grad_norm": 0.5290423035621643,
      "learning_rate": 3.29586262791326e-06,
      "loss": 0.0118,
      "step": 2054880
    },
    {
      "epoch": 3.362888919437298,
      "grad_norm": 0.8746414184570312,
      "learning_rate": 3.2957967356997428e-06,
      "loss": 0.0114,
      "step": 2054900
    },
    {
      "epoch": 3.3629216498759518,
      "grad_norm": 0.10009464621543884,
      "learning_rate": 3.2957308434862255e-06,
      "loss": 0.0118,
      "step": 2054920
    },
    {
      "epoch": 3.362954380314605,
      "grad_norm": 0.36463847756385803,
      "learning_rate": 3.2956649512727083e-06,
      "loss": 0.0132,
      "step": 2054940
    },
    {
      "epoch": 3.3629871107532585,
      "grad_norm": 0.15897047519683838,
      "learning_rate": 3.2955990590591914e-06,
      "loss": 0.0113,
      "step": 2054960
    },
    {
      "epoch": 3.3630198411919117,
      "grad_norm": 0.22140058875083923,
      "learning_rate": 3.295533166845674e-06,
      "loss": 0.013,
      "step": 2054980
    },
    {
      "epoch": 3.363052571630565,
      "grad_norm": 0.2850680351257324,
      "learning_rate": 3.295467274632157e-06,
      "loss": 0.0081,
      "step": 2055000
    },
    {
      "epoch": 3.3630853020692184,
      "grad_norm": 0.2306370884180069,
      "learning_rate": 3.2954013824186396e-06,
      "loss": 0.0131,
      "step": 2055020
    },
    {
      "epoch": 3.3631180325078716,
      "grad_norm": 0.3173300325870514,
      "learning_rate": 3.295335490205123e-06,
      "loss": 0.0114,
      "step": 2055040
    },
    {
      "epoch": 3.363150762946525,
      "grad_norm": 0.14686281979084015,
      "learning_rate": 3.2952695979916055e-06,
      "loss": 0.0121,
      "step": 2055060
    },
    {
      "epoch": 3.3631834933851783,
      "grad_norm": 0.217544287443161,
      "learning_rate": 3.2952037057780883e-06,
      "loss": 0.01,
      "step": 2055080
    },
    {
      "epoch": 3.363216223823832,
      "grad_norm": 0.3578121066093445,
      "learning_rate": 3.295137813564571e-06,
      "loss": 0.0114,
      "step": 2055100
    },
    {
      "epoch": 3.363248954262485,
      "grad_norm": 0.3928358554840088,
      "learning_rate": 3.295071921351054e-06,
      "loss": 0.0124,
      "step": 2055120
    },
    {
      "epoch": 3.363281684701138,
      "grad_norm": 1.1260429620742798,
      "learning_rate": 3.295006029137537e-06,
      "loss": 0.0119,
      "step": 2055140
    },
    {
      "epoch": 3.363314415139792,
      "grad_norm": 0.19608257710933685,
      "learning_rate": 3.2949401369240197e-06,
      "loss": 0.0062,
      "step": 2055160
    },
    {
      "epoch": 3.363347145578445,
      "grad_norm": 0.40330401062965393,
      "learning_rate": 3.2948742447105033e-06,
      "loss": 0.0119,
      "step": 2055180
    },
    {
      "epoch": 3.3633798760170985,
      "grad_norm": 0.19150109589099884,
      "learning_rate": 3.294808352496986e-06,
      "loss": 0.0083,
      "step": 2055200
    },
    {
      "epoch": 3.3634126064557517,
      "grad_norm": 0.29199761152267456,
      "learning_rate": 3.2947424602834687e-06,
      "loss": 0.0103,
      "step": 2055220
    },
    {
      "epoch": 3.3634453368944053,
      "grad_norm": 0.4504966735839844,
      "learning_rate": 3.2946765680699515e-06,
      "loss": 0.0142,
      "step": 2055240
    },
    {
      "epoch": 3.3634780673330584,
      "grad_norm": 0.42897433042526245,
      "learning_rate": 3.2946106758564346e-06,
      "loss": 0.0106,
      "step": 2055260
    },
    {
      "epoch": 3.3635107977717116,
      "grad_norm": 0.36048826575279236,
      "learning_rate": 3.2945447836429174e-06,
      "loss": 0.0079,
      "step": 2055280
    },
    {
      "epoch": 3.363543528210365,
      "grad_norm": 0.18079733848571777,
      "learning_rate": 3.2944788914294e-06,
      "loss": 0.0094,
      "step": 2055300
    },
    {
      "epoch": 3.3635762586490183,
      "grad_norm": 0.4332544803619385,
      "learning_rate": 3.294412999215883e-06,
      "loss": 0.0156,
      "step": 2055320
    },
    {
      "epoch": 3.363608989087672,
      "grad_norm": 0.20210880041122437,
      "learning_rate": 3.294347107002366e-06,
      "loss": 0.008,
      "step": 2055340
    },
    {
      "epoch": 3.363641719526325,
      "grad_norm": 0.16314055025577545,
      "learning_rate": 3.2942812147888488e-06,
      "loss": 0.0107,
      "step": 2055360
    },
    {
      "epoch": 3.3636744499649787,
      "grad_norm": 0.3966710567474365,
      "learning_rate": 3.2942153225753315e-06,
      "loss": 0.0113,
      "step": 2055380
    },
    {
      "epoch": 3.363707180403632,
      "grad_norm": 0.6142321228981018,
      "learning_rate": 3.2941494303618142e-06,
      "loss": 0.0138,
      "step": 2055400
    },
    {
      "epoch": 3.363739910842285,
      "grad_norm": 0.35479816794395447,
      "learning_rate": 3.294083538148297e-06,
      "loss": 0.0138,
      "step": 2055420
    },
    {
      "epoch": 3.3637726412809386,
      "grad_norm": 0.09279181808233261,
      "learning_rate": 3.29401764593478e-06,
      "loss": 0.0083,
      "step": 2055440
    },
    {
      "epoch": 3.3638053717195917,
      "grad_norm": 0.3570317029953003,
      "learning_rate": 3.293951753721263e-06,
      "loss": 0.0092,
      "step": 2055460
    },
    {
      "epoch": 3.3638381021582453,
      "grad_norm": 0.08770928531885147,
      "learning_rate": 3.2938858615077456e-06,
      "loss": 0.0092,
      "step": 2055480
    },
    {
      "epoch": 3.3638708325968985,
      "grad_norm": 0.4032094478607178,
      "learning_rate": 3.2938199692942284e-06,
      "loss": 0.0091,
      "step": 2055500
    },
    {
      "epoch": 3.363903563035552,
      "grad_norm": 0.06779848784208298,
      "learning_rate": 3.2937540770807115e-06,
      "loss": 0.007,
      "step": 2055520
    },
    {
      "epoch": 3.363936293474205,
      "grad_norm": 0.11130386590957642,
      "learning_rate": 3.2936881848671947e-06,
      "loss": 0.0133,
      "step": 2055540
    },
    {
      "epoch": 3.3639690239128583,
      "grad_norm": 0.5564607381820679,
      "learning_rate": 3.2936222926536774e-06,
      "loss": 0.0221,
      "step": 2055560
    },
    {
      "epoch": 3.364001754351512,
      "grad_norm": 1.4674699306488037,
      "learning_rate": 3.2935564004401606e-06,
      "loss": 0.0096,
      "step": 2055580
    },
    {
      "epoch": 3.364034484790165,
      "grad_norm": 0.18610504269599915,
      "learning_rate": 3.2934905082266433e-06,
      "loss": 0.0146,
      "step": 2055600
    },
    {
      "epoch": 3.3640672152288187,
      "grad_norm": 0.3504723310470581,
      "learning_rate": 3.293424616013126e-06,
      "loss": 0.0131,
      "step": 2055620
    },
    {
      "epoch": 3.364099945667472,
      "grad_norm": 0.25759273767471313,
      "learning_rate": 3.293358723799609e-06,
      "loss": 0.01,
      "step": 2055640
    },
    {
      "epoch": 3.364132676106125,
      "grad_norm": 0.4767370820045471,
      "learning_rate": 3.293292831586092e-06,
      "loss": 0.0154,
      "step": 2055660
    },
    {
      "epoch": 3.3641654065447786,
      "grad_norm": 0.5815402865409851,
      "learning_rate": 3.2932269393725747e-06,
      "loss": 0.0104,
      "step": 2055680
    },
    {
      "epoch": 3.3641981369834317,
      "grad_norm": 0.19507387280464172,
      "learning_rate": 3.2931610471590575e-06,
      "loss": 0.0101,
      "step": 2055700
    },
    {
      "epoch": 3.3642308674220853,
      "grad_norm": 0.2933666408061981,
      "learning_rate": 3.29309515494554e-06,
      "loss": 0.0129,
      "step": 2055720
    },
    {
      "epoch": 3.3642635978607385,
      "grad_norm": 0.32573625445365906,
      "learning_rate": 3.2930292627320234e-06,
      "loss": 0.0141,
      "step": 2055740
    },
    {
      "epoch": 3.3642963282993916,
      "grad_norm": 0.4174126982688904,
      "learning_rate": 3.292963370518506e-06,
      "loss": 0.0096,
      "step": 2055760
    },
    {
      "epoch": 3.364329058738045,
      "grad_norm": 0.31173262000083923,
      "learning_rate": 3.292897478304989e-06,
      "loss": 0.0107,
      "step": 2055780
    },
    {
      "epoch": 3.3643617891766984,
      "grad_norm": 0.2545776069164276,
      "learning_rate": 3.2928315860914716e-06,
      "loss": 0.0114,
      "step": 2055800
    },
    {
      "epoch": 3.364394519615352,
      "grad_norm": 0.06059950962662697,
      "learning_rate": 3.2927656938779543e-06,
      "loss": 0.0069,
      "step": 2055820
    },
    {
      "epoch": 3.364427250054005,
      "grad_norm": 0.32845285534858704,
      "learning_rate": 3.2926998016644375e-06,
      "loss": 0.0139,
      "step": 2055840
    },
    {
      "epoch": 3.3644599804926587,
      "grad_norm": 0.07846144586801529,
      "learning_rate": 3.2926339094509202e-06,
      "loss": 0.0132,
      "step": 2055860
    },
    {
      "epoch": 3.364492710931312,
      "grad_norm": 0.41216719150543213,
      "learning_rate": 3.292568017237404e-06,
      "loss": 0.0105,
      "step": 2055880
    },
    {
      "epoch": 3.364525441369965,
      "grad_norm": 0.14890284836292267,
      "learning_rate": 3.2925021250238865e-06,
      "loss": 0.0087,
      "step": 2055900
    },
    {
      "epoch": 3.3645581718086186,
      "grad_norm": 0.3087911605834961,
      "learning_rate": 3.2924362328103693e-06,
      "loss": 0.0123,
      "step": 2055920
    },
    {
      "epoch": 3.3645909022472718,
      "grad_norm": 0.047955647110939026,
      "learning_rate": 3.292370340596852e-06,
      "loss": 0.0098,
      "step": 2055940
    },
    {
      "epoch": 3.3646236326859253,
      "grad_norm": 0.18825216591358185,
      "learning_rate": 3.2923044483833348e-06,
      "loss": 0.0126,
      "step": 2055960
    },
    {
      "epoch": 3.3646563631245785,
      "grad_norm": 0.14750787615776062,
      "learning_rate": 3.292238556169818e-06,
      "loss": 0.0086,
      "step": 2055980
    },
    {
      "epoch": 3.364689093563232,
      "grad_norm": 0.21998938918113708,
      "learning_rate": 3.2921726639563007e-06,
      "loss": 0.0153,
      "step": 2056000
    },
    {
      "epoch": 3.3647218240018852,
      "grad_norm": 0.49774202704429626,
      "learning_rate": 3.2921067717427834e-06,
      "loss": 0.0122,
      "step": 2056020
    },
    {
      "epoch": 3.3647545544405384,
      "grad_norm": 0.08398738503456116,
      "learning_rate": 3.292040879529266e-06,
      "loss": 0.0066,
      "step": 2056040
    },
    {
      "epoch": 3.364787284879192,
      "grad_norm": 0.24863223731517792,
      "learning_rate": 3.2919749873157493e-06,
      "loss": 0.0123,
      "step": 2056060
    },
    {
      "epoch": 3.364820015317845,
      "grad_norm": 0.21816615760326385,
      "learning_rate": 3.291909095102232e-06,
      "loss": 0.0147,
      "step": 2056080
    },
    {
      "epoch": 3.3648527457564987,
      "grad_norm": 0.2094743400812149,
      "learning_rate": 3.291843202888715e-06,
      "loss": 0.0114,
      "step": 2056100
    },
    {
      "epoch": 3.364885476195152,
      "grad_norm": 0.3098979890346527,
      "learning_rate": 3.2917773106751975e-06,
      "loss": 0.0097,
      "step": 2056120
    },
    {
      "epoch": 3.3649182066338055,
      "grad_norm": 0.1402927190065384,
      "learning_rate": 3.2917114184616807e-06,
      "loss": 0.0149,
      "step": 2056140
    },
    {
      "epoch": 3.3649509370724586,
      "grad_norm": 0.35283344984054565,
      "learning_rate": 3.2916455262481634e-06,
      "loss": 0.0117,
      "step": 2056160
    },
    {
      "epoch": 3.3649836675111118,
      "grad_norm": 0.21903042495250702,
      "learning_rate": 3.291579634034646e-06,
      "loss": 0.006,
      "step": 2056180
    },
    {
      "epoch": 3.3650163979497654,
      "grad_norm": 0.3955407440662384,
      "learning_rate": 3.291513741821129e-06,
      "loss": 0.0075,
      "step": 2056200
    },
    {
      "epoch": 3.3650491283884185,
      "grad_norm": 0.19888830184936523,
      "learning_rate": 3.291447849607612e-06,
      "loss": 0.012,
      "step": 2056220
    },
    {
      "epoch": 3.365081858827072,
      "grad_norm": 0.14923042058944702,
      "learning_rate": 3.2913819573940952e-06,
      "loss": 0.0104,
      "step": 2056240
    },
    {
      "epoch": 3.3651145892657253,
      "grad_norm": 0.45673221349716187,
      "learning_rate": 3.291316065180578e-06,
      "loss": 0.0111,
      "step": 2056260
    },
    {
      "epoch": 3.365147319704379,
      "grad_norm": 0.2193608582019806,
      "learning_rate": 3.291250172967061e-06,
      "loss": 0.0153,
      "step": 2056280
    },
    {
      "epoch": 3.365180050143032,
      "grad_norm": 0.8992084264755249,
      "learning_rate": 3.291184280753544e-06,
      "loss": 0.0138,
      "step": 2056300
    },
    {
      "epoch": 3.365212780581685,
      "grad_norm": 0.34230315685272217,
      "learning_rate": 3.2911183885400266e-06,
      "loss": 0.0202,
      "step": 2056320
    },
    {
      "epoch": 3.3652455110203388,
      "grad_norm": 0.5085098147392273,
      "learning_rate": 3.2910524963265094e-06,
      "loss": 0.0119,
      "step": 2056340
    },
    {
      "epoch": 3.365278241458992,
      "grad_norm": 1.5624921321868896,
      "learning_rate": 3.290986604112992e-06,
      "loss": 0.0107,
      "step": 2056360
    },
    {
      "epoch": 3.3653109718976455,
      "grad_norm": 0.1398468166589737,
      "learning_rate": 3.2909207118994753e-06,
      "loss": 0.0076,
      "step": 2056380
    },
    {
      "epoch": 3.3653437023362986,
      "grad_norm": 0.10885531455278397,
      "learning_rate": 3.290854819685958e-06,
      "loss": 0.0138,
      "step": 2056400
    },
    {
      "epoch": 3.3653764327749522,
      "grad_norm": 0.6388891339302063,
      "learning_rate": 3.2907889274724407e-06,
      "loss": 0.0079,
      "step": 2056420
    },
    {
      "epoch": 3.3654091632136054,
      "grad_norm": 0.9640414714813232,
      "learning_rate": 3.2907230352589235e-06,
      "loss": 0.0099,
      "step": 2056440
    },
    {
      "epoch": 3.3654418936522585,
      "grad_norm": 0.260856568813324,
      "learning_rate": 3.2906571430454066e-06,
      "loss": 0.0173,
      "step": 2056460
    },
    {
      "epoch": 3.365474624090912,
      "grad_norm": 0.2577035427093506,
      "learning_rate": 3.2905912508318894e-06,
      "loss": 0.0076,
      "step": 2056480
    },
    {
      "epoch": 3.3655073545295653,
      "grad_norm": 1.0224229097366333,
      "learning_rate": 3.290525358618372e-06,
      "loss": 0.0169,
      "step": 2056500
    },
    {
      "epoch": 3.365540084968219,
      "grad_norm": 0.12141208350658417,
      "learning_rate": 3.290459466404855e-06,
      "loss": 0.0106,
      "step": 2056520
    },
    {
      "epoch": 3.365572815406872,
      "grad_norm": 0.17017042636871338,
      "learning_rate": 3.290393574191338e-06,
      "loss": 0.0164,
      "step": 2056540
    },
    {
      "epoch": 3.3656055458455256,
      "grad_norm": 0.5188308358192444,
      "learning_rate": 3.2903276819778208e-06,
      "loss": 0.0094,
      "step": 2056560
    },
    {
      "epoch": 3.3656382762841788,
      "grad_norm": 0.3478139340877533,
      "learning_rate": 3.2902617897643035e-06,
      "loss": 0.0104,
      "step": 2056580
    },
    {
      "epoch": 3.365671006722832,
      "grad_norm": 0.32750675082206726,
      "learning_rate": 3.290195897550787e-06,
      "loss": 0.0176,
      "step": 2056600
    },
    {
      "epoch": 3.3657037371614855,
      "grad_norm": 0.17467094957828522,
      "learning_rate": 3.29013000533727e-06,
      "loss": 0.0079,
      "step": 2056620
    },
    {
      "epoch": 3.3657364676001387,
      "grad_norm": 0.10761848837137222,
      "learning_rate": 3.2900641131237526e-06,
      "loss": 0.0099,
      "step": 2056640
    },
    {
      "epoch": 3.3657691980387923,
      "grad_norm": 0.5310096740722656,
      "learning_rate": 3.2899982209102353e-06,
      "loss": 0.0134,
      "step": 2056660
    },
    {
      "epoch": 3.3658019284774454,
      "grad_norm": 0.48477703332901,
      "learning_rate": 3.2899323286967185e-06,
      "loss": 0.0113,
      "step": 2056680
    },
    {
      "epoch": 3.365834658916099,
      "grad_norm": 0.2982638478279114,
      "learning_rate": 3.2898664364832012e-06,
      "loss": 0.0088,
      "step": 2056700
    },
    {
      "epoch": 3.365867389354752,
      "grad_norm": 0.1794109046459198,
      "learning_rate": 3.289800544269684e-06,
      "loss": 0.0123,
      "step": 2056720
    },
    {
      "epoch": 3.3659001197934053,
      "grad_norm": 0.14594686031341553,
      "learning_rate": 3.2897346520561667e-06,
      "loss": 0.0152,
      "step": 2056740
    },
    {
      "epoch": 3.365932850232059,
      "grad_norm": 0.14936630427837372,
      "learning_rate": 3.28966875984265e-06,
      "loss": 0.0082,
      "step": 2056760
    },
    {
      "epoch": 3.365965580670712,
      "grad_norm": 0.676660418510437,
      "learning_rate": 3.2896028676291326e-06,
      "loss": 0.0146,
      "step": 2056780
    },
    {
      "epoch": 3.3659983111093656,
      "grad_norm": 0.4468492567539215,
      "learning_rate": 3.2895369754156153e-06,
      "loss": 0.0125,
      "step": 2056800
    },
    {
      "epoch": 3.366031041548019,
      "grad_norm": 0.1186763197183609,
      "learning_rate": 3.289471083202098e-06,
      "loss": 0.0083,
      "step": 2056820
    },
    {
      "epoch": 3.3660637719866724,
      "grad_norm": 0.24277779459953308,
      "learning_rate": 3.289405190988581e-06,
      "loss": 0.0118,
      "step": 2056840
    },
    {
      "epoch": 3.3660965024253255,
      "grad_norm": 0.12341036647558212,
      "learning_rate": 3.289339298775064e-06,
      "loss": 0.0164,
      "step": 2056860
    },
    {
      "epoch": 3.3661292328639787,
      "grad_norm": 0.17949293553829193,
      "learning_rate": 3.2892734065615467e-06,
      "loss": 0.0091,
      "step": 2056880
    },
    {
      "epoch": 3.3661619633026323,
      "grad_norm": 0.4219437837600708,
      "learning_rate": 3.2892075143480295e-06,
      "loss": 0.0154,
      "step": 2056900
    },
    {
      "epoch": 3.3661946937412854,
      "grad_norm": 0.3874683976173401,
      "learning_rate": 3.289141622134512e-06,
      "loss": 0.0096,
      "step": 2056920
    },
    {
      "epoch": 3.366227424179939,
      "grad_norm": 0.2426835596561432,
      "learning_rate": 3.289075729920996e-06,
      "loss": 0.0123,
      "step": 2056940
    },
    {
      "epoch": 3.366260154618592,
      "grad_norm": 0.29510587453842163,
      "learning_rate": 3.2890098377074785e-06,
      "loss": 0.0121,
      "step": 2056960
    },
    {
      "epoch": 3.3662928850572458,
      "grad_norm": 0.5553246736526489,
      "learning_rate": 3.2889439454939613e-06,
      "loss": 0.0143,
      "step": 2056980
    },
    {
      "epoch": 3.366325615495899,
      "grad_norm": 0.3499193787574768,
      "learning_rate": 3.2888780532804444e-06,
      "loss": 0.0135,
      "step": 2057000
    },
    {
      "epoch": 3.366358345934552,
      "grad_norm": 0.06986849755048752,
      "learning_rate": 3.288812161066927e-06,
      "loss": 0.007,
      "step": 2057020
    },
    {
      "epoch": 3.3663910763732057,
      "grad_norm": 0.11430246382951736,
      "learning_rate": 3.28874626885341e-06,
      "loss": 0.0096,
      "step": 2057040
    },
    {
      "epoch": 3.366423806811859,
      "grad_norm": 0.16616864502429962,
      "learning_rate": 3.2886803766398927e-06,
      "loss": 0.0089,
      "step": 2057060
    },
    {
      "epoch": 3.3664565372505124,
      "grad_norm": 0.33386075496673584,
      "learning_rate": 3.288614484426376e-06,
      "loss": 0.0102,
      "step": 2057080
    },
    {
      "epoch": 3.3664892676891656,
      "grad_norm": 0.5866636037826538,
      "learning_rate": 3.2885485922128586e-06,
      "loss": 0.0238,
      "step": 2057100
    },
    {
      "epoch": 3.3665219981278187,
      "grad_norm": 0.11257846653461456,
      "learning_rate": 3.2884826999993413e-06,
      "loss": 0.0113,
      "step": 2057120
    },
    {
      "epoch": 3.3665547285664723,
      "grad_norm": 0.165626659989357,
      "learning_rate": 3.288416807785824e-06,
      "loss": 0.012,
      "step": 2057140
    },
    {
      "epoch": 3.3665874590051255,
      "grad_norm": 0.44173404574394226,
      "learning_rate": 3.288350915572307e-06,
      "loss": 0.0114,
      "step": 2057160
    },
    {
      "epoch": 3.366620189443779,
      "grad_norm": 0.33994171023368835,
      "learning_rate": 3.28828502335879e-06,
      "loss": 0.0189,
      "step": 2057180
    },
    {
      "epoch": 3.366652919882432,
      "grad_norm": 0.9942901730537415,
      "learning_rate": 3.2882191311452727e-06,
      "loss": 0.0143,
      "step": 2057200
    },
    {
      "epoch": 3.3666856503210854,
      "grad_norm": 0.48837152123451233,
      "learning_rate": 3.2881532389317554e-06,
      "loss": 0.0148,
      "step": 2057220
    },
    {
      "epoch": 3.366718380759739,
      "grad_norm": 0.4653116762638092,
      "learning_rate": 3.2880873467182386e-06,
      "loss": 0.0119,
      "step": 2057240
    },
    {
      "epoch": 3.366751111198392,
      "grad_norm": 0.281658411026001,
      "learning_rate": 3.2880214545047213e-06,
      "loss": 0.0053,
      "step": 2057260
    },
    {
      "epoch": 3.3667838416370457,
      "grad_norm": 0.09955111891031265,
      "learning_rate": 3.287955562291204e-06,
      "loss": 0.0089,
      "step": 2057280
    },
    {
      "epoch": 3.366816572075699,
      "grad_norm": 0.1942993849515915,
      "learning_rate": 3.2878896700776876e-06,
      "loss": 0.01,
      "step": 2057300
    },
    {
      "epoch": 3.3668493025143524,
      "grad_norm": 0.10387805104255676,
      "learning_rate": 3.2878237778641704e-06,
      "loss": 0.0117,
      "step": 2057320
    },
    {
      "epoch": 3.3668820329530056,
      "grad_norm": 0.24443453550338745,
      "learning_rate": 3.287757885650653e-06,
      "loss": 0.0132,
      "step": 2057340
    },
    {
      "epoch": 3.3669147633916587,
      "grad_norm": 0.4125540852546692,
      "learning_rate": 3.287691993437136e-06,
      "loss": 0.0109,
      "step": 2057360
    },
    {
      "epoch": 3.3669474938303123,
      "grad_norm": 0.3558344542980194,
      "learning_rate": 3.2876261012236186e-06,
      "loss": 0.0107,
      "step": 2057380
    },
    {
      "epoch": 3.3669802242689655,
      "grad_norm": 0.212579146027565,
      "learning_rate": 3.2875602090101018e-06,
      "loss": 0.0137,
      "step": 2057400
    },
    {
      "epoch": 3.367012954707619,
      "grad_norm": 2.313389301300049,
      "learning_rate": 3.2874943167965845e-06,
      "loss": 0.0124,
      "step": 2057420
    },
    {
      "epoch": 3.3670456851462722,
      "grad_norm": 0.2718757390975952,
      "learning_rate": 3.2874284245830672e-06,
      "loss": 0.0109,
      "step": 2057440
    },
    {
      "epoch": 3.367078415584926,
      "grad_norm": 0.34150969982147217,
      "learning_rate": 3.28736253236955e-06,
      "loss": 0.0071,
      "step": 2057460
    },
    {
      "epoch": 3.367111146023579,
      "grad_norm": 0.39242446422576904,
      "learning_rate": 3.287296640156033e-06,
      "loss": 0.0151,
      "step": 2057480
    },
    {
      "epoch": 3.367143876462232,
      "grad_norm": 0.1886521875858307,
      "learning_rate": 3.287230747942516e-06,
      "loss": 0.0142,
      "step": 2057500
    },
    {
      "epoch": 3.3671766069008857,
      "grad_norm": 0.44725438952445984,
      "learning_rate": 3.2871648557289986e-06,
      "loss": 0.0131,
      "step": 2057520
    },
    {
      "epoch": 3.367209337339539,
      "grad_norm": 0.118307925760746,
      "learning_rate": 3.2870989635154814e-06,
      "loss": 0.0136,
      "step": 2057540
    },
    {
      "epoch": 3.3672420677781925,
      "grad_norm": 0.09751153737306595,
      "learning_rate": 3.2870330713019645e-06,
      "loss": 0.0102,
      "step": 2057560
    },
    {
      "epoch": 3.3672747982168456,
      "grad_norm": 0.07988443225622177,
      "learning_rate": 3.2869671790884473e-06,
      "loss": 0.0096,
      "step": 2057580
    },
    {
      "epoch": 3.367307528655499,
      "grad_norm": 0.14993393421173096,
      "learning_rate": 3.28690128687493e-06,
      "loss": 0.0124,
      "step": 2057600
    },
    {
      "epoch": 3.3673402590941524,
      "grad_norm": 0.20107516646385193,
      "learning_rate": 3.2868353946614128e-06,
      "loss": 0.0099,
      "step": 2057620
    },
    {
      "epoch": 3.3673729895328055,
      "grad_norm": 0.7285380959510803,
      "learning_rate": 3.2867695024478963e-06,
      "loss": 0.0114,
      "step": 2057640
    },
    {
      "epoch": 3.367405719971459,
      "grad_norm": 0.2289809137582779,
      "learning_rate": 3.286703610234379e-06,
      "loss": 0.0126,
      "step": 2057660
    },
    {
      "epoch": 3.3674384504101122,
      "grad_norm": 0.6114997267723083,
      "learning_rate": 3.286637718020862e-06,
      "loss": 0.0081,
      "step": 2057680
    },
    {
      "epoch": 3.367471180848766,
      "grad_norm": 0.1802377998828888,
      "learning_rate": 3.286571825807345e-06,
      "loss": 0.0093,
      "step": 2057700
    },
    {
      "epoch": 3.367503911287419,
      "grad_norm": 0.2256363481283188,
      "learning_rate": 3.2865059335938277e-06,
      "loss": 0.0114,
      "step": 2057720
    },
    {
      "epoch": 3.3675366417260726,
      "grad_norm": 0.2813844382762909,
      "learning_rate": 3.2864400413803105e-06,
      "loss": 0.0109,
      "step": 2057740
    },
    {
      "epoch": 3.3675693721647257,
      "grad_norm": 0.15509650111198425,
      "learning_rate": 3.286374149166793e-06,
      "loss": 0.0087,
      "step": 2057760
    },
    {
      "epoch": 3.367602102603379,
      "grad_norm": 0.10949654877185822,
      "learning_rate": 3.286308256953276e-06,
      "loss": 0.0113,
      "step": 2057780
    },
    {
      "epoch": 3.3676348330420325,
      "grad_norm": 0.38325217366218567,
      "learning_rate": 3.286242364739759e-06,
      "loss": 0.0118,
      "step": 2057800
    },
    {
      "epoch": 3.3676675634806856,
      "grad_norm": 0.18164755403995514,
      "learning_rate": 3.286176472526242e-06,
      "loss": 0.0172,
      "step": 2057820
    },
    {
      "epoch": 3.3677002939193392,
      "grad_norm": 0.32121023535728455,
      "learning_rate": 3.2861105803127246e-06,
      "loss": 0.0127,
      "step": 2057840
    },
    {
      "epoch": 3.3677330243579924,
      "grad_norm": 0.19020192325115204,
      "learning_rate": 3.2860446880992073e-06,
      "loss": 0.0098,
      "step": 2057860
    },
    {
      "epoch": 3.367765754796646,
      "grad_norm": 0.24837425351142883,
      "learning_rate": 3.2859787958856905e-06,
      "loss": 0.0201,
      "step": 2057880
    },
    {
      "epoch": 3.367798485235299,
      "grad_norm": 0.17708946764469147,
      "learning_rate": 3.2859129036721732e-06,
      "loss": 0.0079,
      "step": 2057900
    },
    {
      "epoch": 3.3678312156739523,
      "grad_norm": 0.23036189377307892,
      "learning_rate": 3.285847011458656e-06,
      "loss": 0.0132,
      "step": 2057920
    },
    {
      "epoch": 3.367863946112606,
      "grad_norm": 0.23937055468559265,
      "learning_rate": 3.2857811192451387e-06,
      "loss": 0.0116,
      "step": 2057940
    },
    {
      "epoch": 3.367896676551259,
      "grad_norm": 0.4260479509830475,
      "learning_rate": 3.285715227031622e-06,
      "loss": 0.008,
      "step": 2057960
    },
    {
      "epoch": 3.3679294069899126,
      "grad_norm": 0.16499605774879456,
      "learning_rate": 3.2856493348181046e-06,
      "loss": 0.0088,
      "step": 2057980
    },
    {
      "epoch": 3.3679621374285658,
      "grad_norm": 0.24347858130931854,
      "learning_rate": 3.2855834426045878e-06,
      "loss": 0.014,
      "step": 2058000
    },
    {
      "epoch": 3.3679948678672194,
      "grad_norm": 0.23554138839244843,
      "learning_rate": 3.285517550391071e-06,
      "loss": 0.0143,
      "step": 2058020
    },
    {
      "epoch": 3.3680275983058725,
      "grad_norm": 0.5718679428100586,
      "learning_rate": 3.2854516581775537e-06,
      "loss": 0.0105,
      "step": 2058040
    },
    {
      "epoch": 3.3680603287445257,
      "grad_norm": 0.18925148248672485,
      "learning_rate": 3.2853857659640364e-06,
      "loss": 0.0146,
      "step": 2058060
    },
    {
      "epoch": 3.3680930591831792,
      "grad_norm": 0.1401309221982956,
      "learning_rate": 3.285319873750519e-06,
      "loss": 0.0123,
      "step": 2058080
    },
    {
      "epoch": 3.3681257896218324,
      "grad_norm": 0.48201897740364075,
      "learning_rate": 3.2852539815370023e-06,
      "loss": 0.008,
      "step": 2058100
    },
    {
      "epoch": 3.368158520060486,
      "grad_norm": 0.13266155123710632,
      "learning_rate": 3.285188089323485e-06,
      "loss": 0.0102,
      "step": 2058120
    },
    {
      "epoch": 3.368191250499139,
      "grad_norm": 0.30126386880874634,
      "learning_rate": 3.285122197109968e-06,
      "loss": 0.0117,
      "step": 2058140
    },
    {
      "epoch": 3.3682239809377927,
      "grad_norm": 0.2230757176876068,
      "learning_rate": 3.2850563048964505e-06,
      "loss": 0.0143,
      "step": 2058160
    },
    {
      "epoch": 3.368256711376446,
      "grad_norm": 0.2576482594013214,
      "learning_rate": 3.2849904126829337e-06,
      "loss": 0.008,
      "step": 2058180
    },
    {
      "epoch": 3.368289441815099,
      "grad_norm": 0.07546281069517136,
      "learning_rate": 3.2849245204694164e-06,
      "loss": 0.0111,
      "step": 2058200
    },
    {
      "epoch": 3.3683221722537526,
      "grad_norm": 0.8435709476470947,
      "learning_rate": 3.284858628255899e-06,
      "loss": 0.0085,
      "step": 2058220
    },
    {
      "epoch": 3.368354902692406,
      "grad_norm": 0.08063553273677826,
      "learning_rate": 3.284792736042382e-06,
      "loss": 0.0095,
      "step": 2058240
    },
    {
      "epoch": 3.3683876331310594,
      "grad_norm": 0.6044365763664246,
      "learning_rate": 3.2847268438288647e-06,
      "loss": 0.0095,
      "step": 2058260
    },
    {
      "epoch": 3.3684203635697125,
      "grad_norm": 0.1839667707681656,
      "learning_rate": 3.284660951615348e-06,
      "loss": 0.0095,
      "step": 2058280
    },
    {
      "epoch": 3.368453094008366,
      "grad_norm": 0.15389923751354218,
      "learning_rate": 3.2845950594018306e-06,
      "loss": 0.0096,
      "step": 2058300
    },
    {
      "epoch": 3.3684858244470193,
      "grad_norm": 0.44127708673477173,
      "learning_rate": 3.2845291671883133e-06,
      "loss": 0.0102,
      "step": 2058320
    },
    {
      "epoch": 3.3685185548856724,
      "grad_norm": 0.19481755793094635,
      "learning_rate": 3.284463274974796e-06,
      "loss": 0.0111,
      "step": 2058340
    },
    {
      "epoch": 3.368551285324326,
      "grad_norm": 0.3671051561832428,
      "learning_rate": 3.2843973827612796e-06,
      "loss": 0.0108,
      "step": 2058360
    },
    {
      "epoch": 3.368584015762979,
      "grad_norm": 0.2995096445083618,
      "learning_rate": 3.2843314905477624e-06,
      "loss": 0.0114,
      "step": 2058380
    },
    {
      "epoch": 3.3686167462016328,
      "grad_norm": 0.7749899625778198,
      "learning_rate": 3.284265598334245e-06,
      "loss": 0.0187,
      "step": 2058400
    },
    {
      "epoch": 3.368649476640286,
      "grad_norm": 0.322969913482666,
      "learning_rate": 3.2841997061207283e-06,
      "loss": 0.0152,
      "step": 2058420
    },
    {
      "epoch": 3.3686822070789395,
      "grad_norm": 0.44658342003822327,
      "learning_rate": 3.284133813907211e-06,
      "loss": 0.0174,
      "step": 2058440
    },
    {
      "epoch": 3.3687149375175927,
      "grad_norm": 0.2586933970451355,
      "learning_rate": 3.2840679216936938e-06,
      "loss": 0.0102,
      "step": 2058460
    },
    {
      "epoch": 3.368747667956246,
      "grad_norm": 0.2722567021846771,
      "learning_rate": 3.2840020294801765e-06,
      "loss": 0.0101,
      "step": 2058480
    },
    {
      "epoch": 3.3687803983948994,
      "grad_norm": 0.8468477725982666,
      "learning_rate": 3.2839361372666597e-06,
      "loss": 0.0138,
      "step": 2058500
    },
    {
      "epoch": 3.3688131288335526,
      "grad_norm": 0.19585885107517242,
      "learning_rate": 3.2838702450531424e-06,
      "loss": 0.01,
      "step": 2058520
    },
    {
      "epoch": 3.368845859272206,
      "grad_norm": 0.27379682660102844,
      "learning_rate": 3.283804352839625e-06,
      "loss": 0.0135,
      "step": 2058540
    },
    {
      "epoch": 3.3688785897108593,
      "grad_norm": 0.1758047193288803,
      "learning_rate": 3.283738460626108e-06,
      "loss": 0.0162,
      "step": 2058560
    },
    {
      "epoch": 3.368911320149513,
      "grad_norm": 0.2133626937866211,
      "learning_rate": 3.283672568412591e-06,
      "loss": 0.0089,
      "step": 2058580
    },
    {
      "epoch": 3.368944050588166,
      "grad_norm": 0.2116481214761734,
      "learning_rate": 3.2836066761990738e-06,
      "loss": 0.0117,
      "step": 2058600
    },
    {
      "epoch": 3.368976781026819,
      "grad_norm": 0.34860751032829285,
      "learning_rate": 3.2835407839855565e-06,
      "loss": 0.0082,
      "step": 2058620
    },
    {
      "epoch": 3.369009511465473,
      "grad_norm": 0.08786698430776596,
      "learning_rate": 3.2834748917720393e-06,
      "loss": 0.0108,
      "step": 2058640
    },
    {
      "epoch": 3.369042241904126,
      "grad_norm": 0.2190432995557785,
      "learning_rate": 3.2834089995585224e-06,
      "loss": 0.0094,
      "step": 2058660
    },
    {
      "epoch": 3.3690749723427795,
      "grad_norm": 0.38150396943092346,
      "learning_rate": 3.283343107345005e-06,
      "loss": 0.0098,
      "step": 2058680
    },
    {
      "epoch": 3.3691077027814327,
      "grad_norm": 0.09004276990890503,
      "learning_rate": 3.2832772151314883e-06,
      "loss": 0.0078,
      "step": 2058700
    },
    {
      "epoch": 3.369140433220086,
      "grad_norm": 0.04018078371882439,
      "learning_rate": 3.2832113229179715e-06,
      "loss": 0.0146,
      "step": 2058720
    },
    {
      "epoch": 3.3691731636587394,
      "grad_norm": 0.749173104763031,
      "learning_rate": 3.2831454307044542e-06,
      "loss": 0.0127,
      "step": 2058740
    },
    {
      "epoch": 3.3692058940973926,
      "grad_norm": 0.4831186532974243,
      "learning_rate": 3.283079538490937e-06,
      "loss": 0.0097,
      "step": 2058760
    },
    {
      "epoch": 3.369238624536046,
      "grad_norm": 0.32368820905685425,
      "learning_rate": 3.2830136462774197e-06,
      "loss": 0.0154,
      "step": 2058780
    },
    {
      "epoch": 3.3692713549746993,
      "grad_norm": 0.281759113073349,
      "learning_rate": 3.2829477540639024e-06,
      "loss": 0.0067,
      "step": 2058800
    },
    {
      "epoch": 3.3693040854133525,
      "grad_norm": 0.04875829070806503,
      "learning_rate": 3.2828818618503856e-06,
      "loss": 0.0063,
      "step": 2058820
    },
    {
      "epoch": 3.369336815852006,
      "grad_norm": 0.09651809185743332,
      "learning_rate": 3.2828159696368683e-06,
      "loss": 0.0118,
      "step": 2058840
    },
    {
      "epoch": 3.369369546290659,
      "grad_norm": 0.3452463448047638,
      "learning_rate": 3.282750077423351e-06,
      "loss": 0.0123,
      "step": 2058860
    },
    {
      "epoch": 3.369402276729313,
      "grad_norm": 0.1311873346567154,
      "learning_rate": 3.282684185209834e-06,
      "loss": 0.0153,
      "step": 2058880
    },
    {
      "epoch": 3.369435007167966,
      "grad_norm": 0.40597933530807495,
      "learning_rate": 3.282618292996317e-06,
      "loss": 0.0148,
      "step": 2058900
    },
    {
      "epoch": 3.3694677376066196,
      "grad_norm": 0.13663558661937714,
      "learning_rate": 3.2825524007827997e-06,
      "loss": 0.0089,
      "step": 2058920
    },
    {
      "epoch": 3.3695004680452727,
      "grad_norm": 0.1723290979862213,
      "learning_rate": 3.2824865085692825e-06,
      "loss": 0.0143,
      "step": 2058940
    },
    {
      "epoch": 3.369533198483926,
      "grad_norm": 0.06292423605918884,
      "learning_rate": 3.282420616355765e-06,
      "loss": 0.0102,
      "step": 2058960
    },
    {
      "epoch": 3.3695659289225794,
      "grad_norm": 0.1255665272474289,
      "learning_rate": 3.2823547241422484e-06,
      "loss": 0.0075,
      "step": 2058980
    },
    {
      "epoch": 3.3695986593612326,
      "grad_norm": 0.3092789351940155,
      "learning_rate": 3.282288831928731e-06,
      "loss": 0.0144,
      "step": 2059000
    },
    {
      "epoch": 3.369631389799886,
      "grad_norm": 0.22808842360973358,
      "learning_rate": 3.282222939715214e-06,
      "loss": 0.0107,
      "step": 2059020
    },
    {
      "epoch": 3.3696641202385393,
      "grad_norm": 0.10562460124492645,
      "learning_rate": 3.2821570475016966e-06,
      "loss": 0.0114,
      "step": 2059040
    },
    {
      "epoch": 3.369696850677193,
      "grad_norm": 0.11259660869836807,
      "learning_rate": 3.28209115528818e-06,
      "loss": 0.011,
      "step": 2059060
    },
    {
      "epoch": 3.369729581115846,
      "grad_norm": 0.7230926156044006,
      "learning_rate": 3.282025263074663e-06,
      "loss": 0.0146,
      "step": 2059080
    },
    {
      "epoch": 3.3697623115544992,
      "grad_norm": 0.1517905741930008,
      "learning_rate": 3.2819593708611457e-06,
      "loss": 0.0079,
      "step": 2059100
    },
    {
      "epoch": 3.369795041993153,
      "grad_norm": 0.3799887001514435,
      "learning_rate": 3.281893478647629e-06,
      "loss": 0.0102,
      "step": 2059120
    },
    {
      "epoch": 3.369827772431806,
      "grad_norm": 0.15132325887680054,
      "learning_rate": 3.2818275864341116e-06,
      "loss": 0.0113,
      "step": 2059140
    },
    {
      "epoch": 3.3698605028704596,
      "grad_norm": 0.2243598848581314,
      "learning_rate": 3.2817616942205943e-06,
      "loss": 0.0162,
      "step": 2059160
    },
    {
      "epoch": 3.3698932333091127,
      "grad_norm": 1.0120739936828613,
      "learning_rate": 3.281695802007077e-06,
      "loss": 0.0111,
      "step": 2059180
    },
    {
      "epoch": 3.3699259637477663,
      "grad_norm": 0.2969885468482971,
      "learning_rate": 3.28162990979356e-06,
      "loss": 0.0072,
      "step": 2059200
    },
    {
      "epoch": 3.3699586941864195,
      "grad_norm": 0.21220846474170685,
      "learning_rate": 3.281564017580043e-06,
      "loss": 0.0172,
      "step": 2059220
    },
    {
      "epoch": 3.3699914246250726,
      "grad_norm": 0.22909623384475708,
      "learning_rate": 3.2814981253665257e-06,
      "loss": 0.0112,
      "step": 2059240
    },
    {
      "epoch": 3.370024155063726,
      "grad_norm": 0.261919230222702,
      "learning_rate": 3.2814322331530084e-06,
      "loss": 0.0112,
      "step": 2059260
    },
    {
      "epoch": 3.3700568855023794,
      "grad_norm": 0.5061550736427307,
      "learning_rate": 3.281366340939491e-06,
      "loss": 0.0131,
      "step": 2059280
    },
    {
      "epoch": 3.370089615941033,
      "grad_norm": 0.39412257075309753,
      "learning_rate": 3.2813004487259743e-06,
      "loss": 0.0086,
      "step": 2059300
    },
    {
      "epoch": 3.370122346379686,
      "grad_norm": 0.23754510283470154,
      "learning_rate": 3.281234556512457e-06,
      "loss": 0.009,
      "step": 2059320
    },
    {
      "epoch": 3.3701550768183397,
      "grad_norm": 0.19934852421283722,
      "learning_rate": 3.28116866429894e-06,
      "loss": 0.0106,
      "step": 2059340
    },
    {
      "epoch": 3.370187807256993,
      "grad_norm": 0.18064065277576447,
      "learning_rate": 3.2811027720854225e-06,
      "loss": 0.0086,
      "step": 2059360
    },
    {
      "epoch": 3.370220537695646,
      "grad_norm": 0.11419207602739334,
      "learning_rate": 3.2810368798719057e-06,
      "loss": 0.0106,
      "step": 2059380
    },
    {
      "epoch": 3.3702532681342996,
      "grad_norm": 0.16736294329166412,
      "learning_rate": 3.280970987658389e-06,
      "loss": 0.0099,
      "step": 2059400
    },
    {
      "epoch": 3.3702859985729527,
      "grad_norm": 0.38283082842826843,
      "learning_rate": 3.2809050954448716e-06,
      "loss": 0.0134,
      "step": 2059420
    },
    {
      "epoch": 3.3703187290116063,
      "grad_norm": 0.3328978717327118,
      "learning_rate": 3.2808392032313548e-06,
      "loss": 0.0112,
      "step": 2059440
    },
    {
      "epoch": 3.3703514594502595,
      "grad_norm": 0.13255690038204193,
      "learning_rate": 3.2807733110178375e-06,
      "loss": 0.0092,
      "step": 2059460
    },
    {
      "epoch": 3.370384189888913,
      "grad_norm": 0.1526906043291092,
      "learning_rate": 3.2807074188043203e-06,
      "loss": 0.0137,
      "step": 2059480
    },
    {
      "epoch": 3.3704169203275662,
      "grad_norm": 0.30403971672058105,
      "learning_rate": 3.280641526590803e-06,
      "loss": 0.0114,
      "step": 2059500
    },
    {
      "epoch": 3.3704496507662194,
      "grad_norm": 0.2948527932167053,
      "learning_rate": 3.280575634377286e-06,
      "loss": 0.013,
      "step": 2059520
    },
    {
      "epoch": 3.370482381204873,
      "grad_norm": 0.4188661575317383,
      "learning_rate": 3.280509742163769e-06,
      "loss": 0.0105,
      "step": 2059540
    },
    {
      "epoch": 3.370515111643526,
      "grad_norm": 0.02946525812149048,
      "learning_rate": 3.2804438499502516e-06,
      "loss": 0.0088,
      "step": 2059560
    },
    {
      "epoch": 3.3705478420821797,
      "grad_norm": 0.08365292847156525,
      "learning_rate": 3.2803779577367344e-06,
      "loss": 0.0156,
      "step": 2059580
    },
    {
      "epoch": 3.370580572520833,
      "grad_norm": 0.20568907260894775,
      "learning_rate": 3.2803120655232175e-06,
      "loss": 0.0144,
      "step": 2059600
    },
    {
      "epoch": 3.3706133029594865,
      "grad_norm": 0.3440214693546295,
      "learning_rate": 3.2802461733097003e-06,
      "loss": 0.0106,
      "step": 2059620
    },
    {
      "epoch": 3.3706460333981396,
      "grad_norm": 0.12403156608343124,
      "learning_rate": 3.280180281096183e-06,
      "loss": 0.0102,
      "step": 2059640
    },
    {
      "epoch": 3.3706787638367928,
      "grad_norm": 0.9401658177375793,
      "learning_rate": 3.2801143888826658e-06,
      "loss": 0.0139,
      "step": 2059660
    },
    {
      "epoch": 3.3707114942754464,
      "grad_norm": 0.8397180438041687,
      "learning_rate": 3.2800484966691485e-06,
      "loss": 0.0123,
      "step": 2059680
    },
    {
      "epoch": 3.3707442247140995,
      "grad_norm": 0.13797038793563843,
      "learning_rate": 3.2799826044556317e-06,
      "loss": 0.0098,
      "step": 2059700
    },
    {
      "epoch": 3.370776955152753,
      "grad_norm": 0.292606920003891,
      "learning_rate": 3.2799167122421144e-06,
      "loss": 0.0129,
      "step": 2059720
    },
    {
      "epoch": 3.3708096855914063,
      "grad_norm": 0.08126810193061829,
      "learning_rate": 3.279850820028597e-06,
      "loss": 0.0167,
      "step": 2059740
    },
    {
      "epoch": 3.37084241603006,
      "grad_norm": 0.16763275861740112,
      "learning_rate": 3.2797849278150807e-06,
      "loss": 0.0088,
      "step": 2059760
    },
    {
      "epoch": 3.370875146468713,
      "grad_norm": 0.4242807626724243,
      "learning_rate": 3.2797190356015635e-06,
      "loss": 0.0153,
      "step": 2059780
    },
    {
      "epoch": 3.370907876907366,
      "grad_norm": 0.1530187875032425,
      "learning_rate": 3.279653143388046e-06,
      "loss": 0.0099,
      "step": 2059800
    },
    {
      "epoch": 3.3709406073460197,
      "grad_norm": 0.3124838173389435,
      "learning_rate": 3.279587251174529e-06,
      "loss": 0.0071,
      "step": 2059820
    },
    {
      "epoch": 3.370973337784673,
      "grad_norm": 0.16320981085300446,
      "learning_rate": 3.279521358961012e-06,
      "loss": 0.0119,
      "step": 2059840
    },
    {
      "epoch": 3.3710060682233265,
      "grad_norm": 0.1089460626244545,
      "learning_rate": 3.279455466747495e-06,
      "loss": 0.0112,
      "step": 2059860
    },
    {
      "epoch": 3.3710387986619796,
      "grad_norm": 0.3245982527732849,
      "learning_rate": 3.2793895745339776e-06,
      "loss": 0.0144,
      "step": 2059880
    },
    {
      "epoch": 3.3710715291006332,
      "grad_norm": 0.6972867250442505,
      "learning_rate": 3.2793236823204603e-06,
      "loss": 0.0092,
      "step": 2059900
    },
    {
      "epoch": 3.3711042595392864,
      "grad_norm": 0.15496531128883362,
      "learning_rate": 3.2792577901069435e-06,
      "loss": 0.0084,
      "step": 2059920
    },
    {
      "epoch": 3.3711369899779395,
      "grad_norm": 0.2342333197593689,
      "learning_rate": 3.2791918978934262e-06,
      "loss": 0.0114,
      "step": 2059940
    },
    {
      "epoch": 3.371169720416593,
      "grad_norm": 0.32849496603012085,
      "learning_rate": 3.279126005679909e-06,
      "loss": 0.0118,
      "step": 2059960
    },
    {
      "epoch": 3.3712024508552463,
      "grad_norm": 0.2466028332710266,
      "learning_rate": 3.2790601134663917e-06,
      "loss": 0.0076,
      "step": 2059980
    },
    {
      "epoch": 3.3712351812939,
      "grad_norm": 0.23825255036354065,
      "learning_rate": 3.278994221252875e-06,
      "loss": 0.0112,
      "step": 2060000
    },
    {
      "epoch": 3.371267911732553,
      "grad_norm": 3.4629552364349365,
      "learning_rate": 3.2789283290393576e-06,
      "loss": 0.0106,
      "step": 2060020
    },
    {
      "epoch": 3.3713006421712066,
      "grad_norm": 0.23882964253425598,
      "learning_rate": 3.2788624368258404e-06,
      "loss": 0.0144,
      "step": 2060040
    },
    {
      "epoch": 3.3713333726098598,
      "grad_norm": 0.2570595145225525,
      "learning_rate": 3.278796544612323e-06,
      "loss": 0.0131,
      "step": 2060060
    },
    {
      "epoch": 3.371366103048513,
      "grad_norm": 0.3915046453475952,
      "learning_rate": 3.2787306523988063e-06,
      "loss": 0.0115,
      "step": 2060080
    },
    {
      "epoch": 3.3713988334871665,
      "grad_norm": 0.7424907684326172,
      "learning_rate": 3.278664760185289e-06,
      "loss": 0.0085,
      "step": 2060100
    },
    {
      "epoch": 3.3714315639258197,
      "grad_norm": 0.21034963428974152,
      "learning_rate": 3.278598867971772e-06,
      "loss": 0.0086,
      "step": 2060120
    },
    {
      "epoch": 3.3714642943644733,
      "grad_norm": 0.287584513425827,
      "learning_rate": 3.2785329757582553e-06,
      "loss": 0.0086,
      "step": 2060140
    },
    {
      "epoch": 3.3714970248031264,
      "grad_norm": 0.19243404269218445,
      "learning_rate": 3.278467083544738e-06,
      "loss": 0.0127,
      "step": 2060160
    },
    {
      "epoch": 3.3715297552417796,
      "grad_norm": 0.15925730764865875,
      "learning_rate": 3.278401191331221e-06,
      "loss": 0.0111,
      "step": 2060180
    },
    {
      "epoch": 3.371562485680433,
      "grad_norm": 0.19598864018917084,
      "learning_rate": 3.2783352991177035e-06,
      "loss": 0.0105,
      "step": 2060200
    },
    {
      "epoch": 3.3715952161190863,
      "grad_norm": 0.674962043762207,
      "learning_rate": 3.2782694069041863e-06,
      "loss": 0.0073,
      "step": 2060220
    },
    {
      "epoch": 3.37162794655774,
      "grad_norm": 0.31608420610427856,
      "learning_rate": 3.2782035146906694e-06,
      "loss": 0.0102,
      "step": 2060240
    },
    {
      "epoch": 3.371660676996393,
      "grad_norm": 0.3986324369907379,
      "learning_rate": 3.278137622477152e-06,
      "loss": 0.0147,
      "step": 2060260
    },
    {
      "epoch": 3.371693407435046,
      "grad_norm": 0.42218491435050964,
      "learning_rate": 3.278071730263635e-06,
      "loss": 0.0107,
      "step": 2060280
    },
    {
      "epoch": 3.3717261378737,
      "grad_norm": 0.19441181421279907,
      "learning_rate": 3.2780058380501177e-06,
      "loss": 0.0097,
      "step": 2060300
    },
    {
      "epoch": 3.371758868312353,
      "grad_norm": 0.40828073024749756,
      "learning_rate": 3.277939945836601e-06,
      "loss": 0.0157,
      "step": 2060320
    },
    {
      "epoch": 3.3717915987510065,
      "grad_norm": 0.3906368017196655,
      "learning_rate": 3.2778740536230836e-06,
      "loss": 0.0063,
      "step": 2060340
    },
    {
      "epoch": 3.3718243291896597,
      "grad_norm": 0.15960927307605743,
      "learning_rate": 3.2778081614095663e-06,
      "loss": 0.0091,
      "step": 2060360
    },
    {
      "epoch": 3.3718570596283133,
      "grad_norm": 0.6604891419410706,
      "learning_rate": 3.277742269196049e-06,
      "loss": 0.0169,
      "step": 2060380
    },
    {
      "epoch": 3.3718897900669664,
      "grad_norm": 0.2845669984817505,
      "learning_rate": 3.2776763769825322e-06,
      "loss": 0.0097,
      "step": 2060400
    },
    {
      "epoch": 3.3719225205056196,
      "grad_norm": 0.24763068556785583,
      "learning_rate": 3.277610484769015e-06,
      "loss": 0.0084,
      "step": 2060420
    },
    {
      "epoch": 3.371955250944273,
      "grad_norm": 2.427535057067871,
      "learning_rate": 3.2775445925554977e-06,
      "loss": 0.0101,
      "step": 2060440
    },
    {
      "epoch": 3.3719879813829263,
      "grad_norm": 0.4465078115463257,
      "learning_rate": 3.2774787003419813e-06,
      "loss": 0.0156,
      "step": 2060460
    },
    {
      "epoch": 3.37202071182158,
      "grad_norm": 0.5492212176322937,
      "learning_rate": 3.277412808128464e-06,
      "loss": 0.0094,
      "step": 2060480
    },
    {
      "epoch": 3.372053442260233,
      "grad_norm": 0.23580630123615265,
      "learning_rate": 3.2773469159149468e-06,
      "loss": 0.0126,
      "step": 2060500
    },
    {
      "epoch": 3.3720861726988867,
      "grad_norm": 0.11032799631357193,
      "learning_rate": 3.2772810237014295e-06,
      "loss": 0.0086,
      "step": 2060520
    },
    {
      "epoch": 3.37211890313754,
      "grad_norm": 0.7915121912956238,
      "learning_rate": 3.2772151314879127e-06,
      "loss": 0.0135,
      "step": 2060540
    },
    {
      "epoch": 3.372151633576193,
      "grad_norm": 0.041870035231113434,
      "learning_rate": 3.2771492392743954e-06,
      "loss": 0.0143,
      "step": 2060560
    },
    {
      "epoch": 3.3721843640148466,
      "grad_norm": 0.4794972538948059,
      "learning_rate": 3.277083347060878e-06,
      "loss": 0.0101,
      "step": 2060580
    },
    {
      "epoch": 3.3722170944534997,
      "grad_norm": 1.33209228515625,
      "learning_rate": 3.277017454847361e-06,
      "loss": 0.0129,
      "step": 2060600
    },
    {
      "epoch": 3.3722498248921533,
      "grad_norm": 0.5021963715553284,
      "learning_rate": 3.276951562633844e-06,
      "loss": 0.0072,
      "step": 2060620
    },
    {
      "epoch": 3.3722825553308065,
      "grad_norm": 0.1038236990571022,
      "learning_rate": 3.2768856704203268e-06,
      "loss": 0.0105,
      "step": 2060640
    },
    {
      "epoch": 3.37231528576946,
      "grad_norm": 0.46221035718917847,
      "learning_rate": 3.2768197782068095e-06,
      "loss": 0.0087,
      "step": 2060660
    },
    {
      "epoch": 3.372348016208113,
      "grad_norm": 0.6090919375419617,
      "learning_rate": 3.2767538859932923e-06,
      "loss": 0.0157,
      "step": 2060680
    },
    {
      "epoch": 3.3723807466467663,
      "grad_norm": 0.1173541322350502,
      "learning_rate": 3.276687993779775e-06,
      "loss": 0.0093,
      "step": 2060700
    },
    {
      "epoch": 3.37241347708542,
      "grad_norm": 0.22513431310653687,
      "learning_rate": 3.276622101566258e-06,
      "loss": 0.0131,
      "step": 2060720
    },
    {
      "epoch": 3.372446207524073,
      "grad_norm": 0.2347847819328308,
      "learning_rate": 3.276556209352741e-06,
      "loss": 0.0131,
      "step": 2060740
    },
    {
      "epoch": 3.3724789379627267,
      "grad_norm": 0.12089294195175171,
      "learning_rate": 3.2764903171392236e-06,
      "loss": 0.0125,
      "step": 2060760
    },
    {
      "epoch": 3.37251166840138,
      "grad_norm": 0.3931809067726135,
      "learning_rate": 3.2764244249257064e-06,
      "loss": 0.0167,
      "step": 2060780
    },
    {
      "epoch": 3.3725443988400334,
      "grad_norm": 0.4423879086971283,
      "learning_rate": 3.2763585327121895e-06,
      "loss": 0.0126,
      "step": 2060800
    },
    {
      "epoch": 3.3725771292786866,
      "grad_norm": 0.20633751153945923,
      "learning_rate": 3.2762926404986727e-06,
      "loss": 0.0101,
      "step": 2060820
    },
    {
      "epoch": 3.3726098597173397,
      "grad_norm": 0.34195396304130554,
      "learning_rate": 3.2762267482851555e-06,
      "loss": 0.0133,
      "step": 2060840
    },
    {
      "epoch": 3.3726425901559933,
      "grad_norm": 0.26527541875839233,
      "learning_rate": 3.2761608560716386e-06,
      "loss": 0.0119,
      "step": 2060860
    },
    {
      "epoch": 3.3726753205946465,
      "grad_norm": 0.16404791176319122,
      "learning_rate": 3.2760949638581214e-06,
      "loss": 0.0124,
      "step": 2060880
    },
    {
      "epoch": 3.3727080510333,
      "grad_norm": 0.12227122485637665,
      "learning_rate": 3.276029071644604e-06,
      "loss": 0.0174,
      "step": 2060900
    },
    {
      "epoch": 3.372740781471953,
      "grad_norm": 0.200786292552948,
      "learning_rate": 3.275963179431087e-06,
      "loss": 0.0134,
      "step": 2060920
    },
    {
      "epoch": 3.372773511910607,
      "grad_norm": 0.6099748015403748,
      "learning_rate": 3.27589728721757e-06,
      "loss": 0.0133,
      "step": 2060940
    },
    {
      "epoch": 3.37280624234926,
      "grad_norm": 0.13595367968082428,
      "learning_rate": 3.2758313950040527e-06,
      "loss": 0.0101,
      "step": 2060960
    },
    {
      "epoch": 3.372838972787913,
      "grad_norm": 0.4258039593696594,
      "learning_rate": 3.2757655027905355e-06,
      "loss": 0.0155,
      "step": 2060980
    },
    {
      "epoch": 3.3728717032265667,
      "grad_norm": 0.24332879483699799,
      "learning_rate": 3.2756996105770182e-06,
      "loss": 0.0112,
      "step": 2061000
    },
    {
      "epoch": 3.37290443366522,
      "grad_norm": 0.263128399848938,
      "learning_rate": 3.2756337183635014e-06,
      "loss": 0.0143,
      "step": 2061020
    },
    {
      "epoch": 3.3729371641038735,
      "grad_norm": 0.14090055227279663,
      "learning_rate": 3.275567826149984e-06,
      "loss": 0.0113,
      "step": 2061040
    },
    {
      "epoch": 3.3729698945425266,
      "grad_norm": 0.08186360448598862,
      "learning_rate": 3.275501933936467e-06,
      "loss": 0.0135,
      "step": 2061060
    },
    {
      "epoch": 3.37300262498118,
      "grad_norm": 0.49854376912117004,
      "learning_rate": 3.2754360417229496e-06,
      "loss": 0.0125,
      "step": 2061080
    },
    {
      "epoch": 3.3730353554198333,
      "grad_norm": 1.525686502456665,
      "learning_rate": 3.2753701495094323e-06,
      "loss": 0.0106,
      "step": 2061100
    },
    {
      "epoch": 3.3730680858584865,
      "grad_norm": 0.1342783421278,
      "learning_rate": 3.2753042572959155e-06,
      "loss": 0.0156,
      "step": 2061120
    },
    {
      "epoch": 3.37310081629714,
      "grad_norm": 0.5820068717002869,
      "learning_rate": 3.2752383650823982e-06,
      "loss": 0.0099,
      "step": 2061140
    },
    {
      "epoch": 3.3731335467357932,
      "grad_norm": 0.22127722203731537,
      "learning_rate": 3.275172472868882e-06,
      "loss": 0.013,
      "step": 2061160
    },
    {
      "epoch": 3.373166277174447,
      "grad_norm": 0.43933767080307007,
      "learning_rate": 3.2751065806553646e-06,
      "loss": 0.0136,
      "step": 2061180
    },
    {
      "epoch": 3.3731990076131,
      "grad_norm": 0.4511139988899231,
      "learning_rate": 3.2750406884418473e-06,
      "loss": 0.0177,
      "step": 2061200
    },
    {
      "epoch": 3.3732317380517536,
      "grad_norm": 0.6562356352806091,
      "learning_rate": 3.27497479622833e-06,
      "loss": 0.0155,
      "step": 2061220
    },
    {
      "epoch": 3.3732644684904067,
      "grad_norm": 0.3255435824394226,
      "learning_rate": 3.274908904014813e-06,
      "loss": 0.0117,
      "step": 2061240
    },
    {
      "epoch": 3.37329719892906,
      "grad_norm": 0.30284398794174194,
      "learning_rate": 3.274843011801296e-06,
      "loss": 0.0079,
      "step": 2061260
    },
    {
      "epoch": 3.3733299293677135,
      "grad_norm": 0.21194159984588623,
      "learning_rate": 3.2747771195877787e-06,
      "loss": 0.0102,
      "step": 2061280
    },
    {
      "epoch": 3.3733626598063666,
      "grad_norm": 0.35254186391830444,
      "learning_rate": 3.2747112273742614e-06,
      "loss": 0.0106,
      "step": 2061300
    },
    {
      "epoch": 3.37339539024502,
      "grad_norm": 0.2670229375362396,
      "learning_rate": 3.274645335160744e-06,
      "loss": 0.0114,
      "step": 2061320
    },
    {
      "epoch": 3.3734281206836734,
      "grad_norm": 0.19233816862106323,
      "learning_rate": 3.2745794429472273e-06,
      "loss": 0.0143,
      "step": 2061340
    },
    {
      "epoch": 3.373460851122327,
      "grad_norm": 0.15449488162994385,
      "learning_rate": 3.27451355073371e-06,
      "loss": 0.0102,
      "step": 2061360
    },
    {
      "epoch": 3.37349358156098,
      "grad_norm": 0.32002565264701843,
      "learning_rate": 3.274447658520193e-06,
      "loss": 0.0146,
      "step": 2061380
    },
    {
      "epoch": 3.3735263119996333,
      "grad_norm": 0.18570208549499512,
      "learning_rate": 3.2743817663066756e-06,
      "loss": 0.0103,
      "step": 2061400
    },
    {
      "epoch": 3.373559042438287,
      "grad_norm": 0.47982048988342285,
      "learning_rate": 3.2743158740931587e-06,
      "loss": 0.0101,
      "step": 2061420
    },
    {
      "epoch": 3.37359177287694,
      "grad_norm": 0.28131017088890076,
      "learning_rate": 3.2742499818796415e-06,
      "loss": 0.0097,
      "step": 2061440
    },
    {
      "epoch": 3.3736245033155936,
      "grad_norm": 0.585671067237854,
      "learning_rate": 3.274184089666124e-06,
      "loss": 0.0098,
      "step": 2061460
    },
    {
      "epoch": 3.3736572337542468,
      "grad_norm": 0.3349446654319763,
      "learning_rate": 3.274118197452607e-06,
      "loss": 0.0075,
      "step": 2061480
    },
    {
      "epoch": 3.3736899641929003,
      "grad_norm": 0.06726380437612534,
      "learning_rate": 3.27405230523909e-06,
      "loss": 0.0108,
      "step": 2061500
    },
    {
      "epoch": 3.3737226946315535,
      "grad_norm": 0.18892769515514374,
      "learning_rate": 3.2739864130255733e-06,
      "loss": 0.0153,
      "step": 2061520
    },
    {
      "epoch": 3.3737554250702066,
      "grad_norm": 0.2155391126871109,
      "learning_rate": 3.273920520812056e-06,
      "loss": 0.007,
      "step": 2061540
    },
    {
      "epoch": 3.3737881555088602,
      "grad_norm": 0.10338184237480164,
      "learning_rate": 3.273854628598539e-06,
      "loss": 0.0111,
      "step": 2061560
    },
    {
      "epoch": 3.3738208859475134,
      "grad_norm": 0.18536588549613953,
      "learning_rate": 3.273788736385022e-06,
      "loss": 0.0141,
      "step": 2061580
    },
    {
      "epoch": 3.373853616386167,
      "grad_norm": 0.08312933146953583,
      "learning_rate": 3.2737228441715046e-06,
      "loss": 0.011,
      "step": 2061600
    },
    {
      "epoch": 3.37388634682482,
      "grad_norm": 0.19748596847057343,
      "learning_rate": 3.2736569519579874e-06,
      "loss": 0.008,
      "step": 2061620
    },
    {
      "epoch": 3.3739190772634737,
      "grad_norm": 0.4327545762062073,
      "learning_rate": 3.27359105974447e-06,
      "loss": 0.0082,
      "step": 2061640
    },
    {
      "epoch": 3.373951807702127,
      "grad_norm": 0.4560016989707947,
      "learning_rate": 3.2735251675309533e-06,
      "loss": 0.0067,
      "step": 2061660
    },
    {
      "epoch": 3.37398453814078,
      "grad_norm": 0.07630819827318192,
      "learning_rate": 3.273459275317436e-06,
      "loss": 0.0122,
      "step": 2061680
    },
    {
      "epoch": 3.3740172685794336,
      "grad_norm": 0.14718495309352875,
      "learning_rate": 3.2733933831039188e-06,
      "loss": 0.0133,
      "step": 2061700
    },
    {
      "epoch": 3.3740499990180868,
      "grad_norm": 0.23450960218906403,
      "learning_rate": 3.2733274908904015e-06,
      "loss": 0.0117,
      "step": 2061720
    },
    {
      "epoch": 3.3740827294567404,
      "grad_norm": 0.148294597864151,
      "learning_rate": 3.2732615986768847e-06,
      "loss": 0.0096,
      "step": 2061740
    },
    {
      "epoch": 3.3741154598953935,
      "grad_norm": 0.44801411032676697,
      "learning_rate": 3.2731957064633674e-06,
      "loss": 0.0135,
      "step": 2061760
    },
    {
      "epoch": 3.3741481903340467,
      "grad_norm": 0.2614637315273285,
      "learning_rate": 3.27312981424985e-06,
      "loss": 0.0162,
      "step": 2061780
    },
    {
      "epoch": 3.3741809207727003,
      "grad_norm": 0.5654317736625671,
      "learning_rate": 3.273063922036333e-06,
      "loss": 0.0098,
      "step": 2061800
    },
    {
      "epoch": 3.3742136512113534,
      "grad_norm": 0.875934898853302,
      "learning_rate": 3.272998029822816e-06,
      "loss": 0.0105,
      "step": 2061820
    },
    {
      "epoch": 3.374246381650007,
      "grad_norm": 0.22019484639167786,
      "learning_rate": 3.272932137609299e-06,
      "loss": 0.0097,
      "step": 2061840
    },
    {
      "epoch": 3.37427911208866,
      "grad_norm": 0.11236988008022308,
      "learning_rate": 3.2728662453957815e-06,
      "loss": 0.012,
      "step": 2061860
    },
    {
      "epoch": 3.3743118425273133,
      "grad_norm": 0.519422173500061,
      "learning_rate": 3.272800353182265e-06,
      "loss": 0.011,
      "step": 2061880
    },
    {
      "epoch": 3.374344572965967,
      "grad_norm": 0.2255331128835678,
      "learning_rate": 3.272734460968748e-06,
      "loss": 0.013,
      "step": 2061900
    },
    {
      "epoch": 3.37437730340462,
      "grad_norm": 0.039734452962875366,
      "learning_rate": 3.2726685687552306e-06,
      "loss": 0.01,
      "step": 2061920
    },
    {
      "epoch": 3.3744100338432736,
      "grad_norm": 0.27486738562583923,
      "learning_rate": 3.2726026765417133e-06,
      "loss": 0.0118,
      "step": 2061940
    },
    {
      "epoch": 3.374442764281927,
      "grad_norm": 0.27486446499824524,
      "learning_rate": 3.2725367843281965e-06,
      "loss": 0.0096,
      "step": 2061960
    },
    {
      "epoch": 3.3744754947205804,
      "grad_norm": 0.1138383150100708,
      "learning_rate": 3.2724708921146792e-06,
      "loss": 0.0091,
      "step": 2061980
    },
    {
      "epoch": 3.3745082251592335,
      "grad_norm": 0.8258125185966492,
      "learning_rate": 3.272404999901162e-06,
      "loss": 0.0157,
      "step": 2062000
    },
    {
      "epoch": 3.3745409555978867,
      "grad_norm": 0.2548616826534271,
      "learning_rate": 3.2723391076876447e-06,
      "loss": 0.0086,
      "step": 2062020
    },
    {
      "epoch": 3.3745736860365403,
      "grad_norm": 0.5008785724639893,
      "learning_rate": 3.272273215474128e-06,
      "loss": 0.0147,
      "step": 2062040
    },
    {
      "epoch": 3.3746064164751934,
      "grad_norm": 0.050873953849077225,
      "learning_rate": 3.2722073232606106e-06,
      "loss": 0.0157,
      "step": 2062060
    },
    {
      "epoch": 3.374639146913847,
      "grad_norm": 0.12031480669975281,
      "learning_rate": 3.2721414310470934e-06,
      "loss": 0.0132,
      "step": 2062080
    },
    {
      "epoch": 3.3746718773525,
      "grad_norm": 0.3607737720012665,
      "learning_rate": 3.272075538833576e-06,
      "loss": 0.0114,
      "step": 2062100
    },
    {
      "epoch": 3.3747046077911538,
      "grad_norm": 0.07355808466672897,
      "learning_rate": 3.272009646620059e-06,
      "loss": 0.0123,
      "step": 2062120
    },
    {
      "epoch": 3.374737338229807,
      "grad_norm": 0.4427523612976074,
      "learning_rate": 3.271943754406542e-06,
      "loss": 0.0144,
      "step": 2062140
    },
    {
      "epoch": 3.37477006866846,
      "grad_norm": 0.5898968577384949,
      "learning_rate": 3.2718778621930247e-06,
      "loss": 0.0131,
      "step": 2062160
    },
    {
      "epoch": 3.3748027991071137,
      "grad_norm": 0.26048168540000916,
      "learning_rate": 3.2718119699795075e-06,
      "loss": 0.0123,
      "step": 2062180
    },
    {
      "epoch": 3.374835529545767,
      "grad_norm": 0.1889607012271881,
      "learning_rate": 3.2717460777659902e-06,
      "loss": 0.0123,
      "step": 2062200
    },
    {
      "epoch": 3.3748682599844204,
      "grad_norm": 0.11054610460996628,
      "learning_rate": 3.271680185552474e-06,
      "loss": 0.0074,
      "step": 2062220
    },
    {
      "epoch": 3.3749009904230736,
      "grad_norm": 0.09875837713479996,
      "learning_rate": 3.2716142933389566e-06,
      "loss": 0.0088,
      "step": 2062240
    },
    {
      "epoch": 3.374933720861727,
      "grad_norm": 0.14286312460899353,
      "learning_rate": 3.2715484011254393e-06,
      "loss": 0.0213,
      "step": 2062260
    },
    {
      "epoch": 3.3749664513003803,
      "grad_norm": 0.24822580814361572,
      "learning_rate": 3.2714825089119225e-06,
      "loss": 0.0095,
      "step": 2062280
    },
    {
      "epoch": 3.3749991817390335,
      "grad_norm": 0.27378585934638977,
      "learning_rate": 3.271416616698405e-06,
      "loss": 0.0115,
      "step": 2062300
    },
    {
      "epoch": 3.375031912177687,
      "grad_norm": 0.30164623260498047,
      "learning_rate": 3.271350724484888e-06,
      "loss": 0.0104,
      "step": 2062320
    },
    {
      "epoch": 3.37506464261634,
      "grad_norm": 0.19256281852722168,
      "learning_rate": 3.2712848322713707e-06,
      "loss": 0.0102,
      "step": 2062340
    },
    {
      "epoch": 3.375097373054994,
      "grad_norm": 0.30080893635749817,
      "learning_rate": 3.271218940057854e-06,
      "loss": 0.0135,
      "step": 2062360
    },
    {
      "epoch": 3.375130103493647,
      "grad_norm": 0.35154861211776733,
      "learning_rate": 3.2711530478443366e-06,
      "loss": 0.0143,
      "step": 2062380
    },
    {
      "epoch": 3.3751628339323005,
      "grad_norm": 0.6592747569084167,
      "learning_rate": 3.2710871556308193e-06,
      "loss": 0.0081,
      "step": 2062400
    },
    {
      "epoch": 3.3751955643709537,
      "grad_norm": 0.10182318091392517,
      "learning_rate": 3.271021263417302e-06,
      "loss": 0.0153,
      "step": 2062420
    },
    {
      "epoch": 3.375228294809607,
      "grad_norm": 0.17385514080524445,
      "learning_rate": 3.2709553712037852e-06,
      "loss": 0.0132,
      "step": 2062440
    },
    {
      "epoch": 3.3752610252482604,
      "grad_norm": 0.5709717869758606,
      "learning_rate": 3.270889478990268e-06,
      "loss": 0.0106,
      "step": 2062460
    },
    {
      "epoch": 3.3752937556869136,
      "grad_norm": 0.13139666616916656,
      "learning_rate": 3.2708235867767507e-06,
      "loss": 0.0096,
      "step": 2062480
    },
    {
      "epoch": 3.375326486125567,
      "grad_norm": 0.2992507815361023,
      "learning_rate": 3.2707576945632334e-06,
      "loss": 0.01,
      "step": 2062500
    },
    {
      "epoch": 3.3753592165642203,
      "grad_norm": 0.32340261340141296,
      "learning_rate": 3.2706918023497166e-06,
      "loss": 0.0111,
      "step": 2062520
    },
    {
      "epoch": 3.375391947002874,
      "grad_norm": 0.2988201975822449,
      "learning_rate": 3.2706259101361993e-06,
      "loss": 0.0181,
      "step": 2062540
    },
    {
      "epoch": 3.375424677441527,
      "grad_norm": 0.1647505909204483,
      "learning_rate": 3.270560017922682e-06,
      "loss": 0.0094,
      "step": 2062560
    },
    {
      "epoch": 3.3754574078801802,
      "grad_norm": 0.30674970149993896,
      "learning_rate": 3.2704941257091657e-06,
      "loss": 0.0119,
      "step": 2062580
    },
    {
      "epoch": 3.375490138318834,
      "grad_norm": 0.8956795334815979,
      "learning_rate": 3.2704282334956484e-06,
      "loss": 0.0105,
      "step": 2062600
    },
    {
      "epoch": 3.375522868757487,
      "grad_norm": 0.5241609215736389,
      "learning_rate": 3.270362341282131e-06,
      "loss": 0.0156,
      "step": 2062620
    },
    {
      "epoch": 3.3755555991961406,
      "grad_norm": 0.3501432538032532,
      "learning_rate": 3.270296449068614e-06,
      "loss": 0.0125,
      "step": 2062640
    },
    {
      "epoch": 3.3755883296347937,
      "grad_norm": 0.5451655983924866,
      "learning_rate": 3.2702305568550966e-06,
      "loss": 0.0207,
      "step": 2062660
    },
    {
      "epoch": 3.3756210600734473,
      "grad_norm": 0.4678400754928589,
      "learning_rate": 3.27016466464158e-06,
      "loss": 0.0123,
      "step": 2062680
    },
    {
      "epoch": 3.3756537905121005,
      "grad_norm": 0.2046901285648346,
      "learning_rate": 3.2700987724280625e-06,
      "loss": 0.0093,
      "step": 2062700
    },
    {
      "epoch": 3.3756865209507536,
      "grad_norm": 0.5842656493186951,
      "learning_rate": 3.2700328802145453e-06,
      "loss": 0.0155,
      "step": 2062720
    },
    {
      "epoch": 3.375719251389407,
      "grad_norm": 0.1754014641046524,
      "learning_rate": 3.269966988001028e-06,
      "loss": 0.0092,
      "step": 2062740
    },
    {
      "epoch": 3.3757519818280604,
      "grad_norm": 0.2656550705432892,
      "learning_rate": 3.269901095787511e-06,
      "loss": 0.0126,
      "step": 2062760
    },
    {
      "epoch": 3.375784712266714,
      "grad_norm": 0.4919348359107971,
      "learning_rate": 3.269835203573994e-06,
      "loss": 0.0093,
      "step": 2062780
    },
    {
      "epoch": 3.375817442705367,
      "grad_norm": 0.6698118448257446,
      "learning_rate": 3.2697693113604767e-06,
      "loss": 0.0139,
      "step": 2062800
    },
    {
      "epoch": 3.3758501731440207,
      "grad_norm": 0.2092500925064087,
      "learning_rate": 3.2697034191469594e-06,
      "loss": 0.0112,
      "step": 2062820
    },
    {
      "epoch": 3.375882903582674,
      "grad_norm": 0.16027238965034485,
      "learning_rate": 3.2696375269334426e-06,
      "loss": 0.0065,
      "step": 2062840
    },
    {
      "epoch": 3.375915634021327,
      "grad_norm": 0.2854421138763428,
      "learning_rate": 3.2695716347199253e-06,
      "loss": 0.01,
      "step": 2062860
    },
    {
      "epoch": 3.3759483644599806,
      "grad_norm": 0.06817100197076797,
      "learning_rate": 3.269505742506408e-06,
      "loss": 0.0117,
      "step": 2062880
    },
    {
      "epoch": 3.3759810948986337,
      "grad_norm": 0.21125583350658417,
      "learning_rate": 3.2694398502928908e-06,
      "loss": 0.0129,
      "step": 2062900
    },
    {
      "epoch": 3.3760138253372873,
      "grad_norm": 0.15966245532035828,
      "learning_rate": 3.2693739580793744e-06,
      "loss": 0.0142,
      "step": 2062920
    },
    {
      "epoch": 3.3760465557759405,
      "grad_norm": 0.6241286396980286,
      "learning_rate": 3.269308065865857e-06,
      "loss": 0.0086,
      "step": 2062940
    },
    {
      "epoch": 3.376079286214594,
      "grad_norm": 0.058333590626716614,
      "learning_rate": 3.26924217365234e-06,
      "loss": 0.0091,
      "step": 2062960
    },
    {
      "epoch": 3.3761120166532472,
      "grad_norm": 0.10548068583011627,
      "learning_rate": 3.269176281438823e-06,
      "loss": 0.0092,
      "step": 2062980
    },
    {
      "epoch": 3.3761447470919004,
      "grad_norm": 0.3016270399093628,
      "learning_rate": 3.2691103892253057e-06,
      "loss": 0.0108,
      "step": 2063000
    },
    {
      "epoch": 3.376177477530554,
      "grad_norm": 0.17691807448863983,
      "learning_rate": 3.2690444970117885e-06,
      "loss": 0.0125,
      "step": 2063020
    },
    {
      "epoch": 3.376210207969207,
      "grad_norm": 0.325637549161911,
      "learning_rate": 3.2689786047982712e-06,
      "loss": 0.0155,
      "step": 2063040
    },
    {
      "epoch": 3.3762429384078607,
      "grad_norm": 0.917981743812561,
      "learning_rate": 3.2689127125847544e-06,
      "loss": 0.014,
      "step": 2063060
    },
    {
      "epoch": 3.376275668846514,
      "grad_norm": 0.3715478777885437,
      "learning_rate": 3.268846820371237e-06,
      "loss": 0.0107,
      "step": 2063080
    },
    {
      "epoch": 3.3763083992851675,
      "grad_norm": 0.2351883053779602,
      "learning_rate": 3.26878092815772e-06,
      "loss": 0.0098,
      "step": 2063100
    },
    {
      "epoch": 3.3763411297238206,
      "grad_norm": 0.4191387891769409,
      "learning_rate": 3.2687150359442026e-06,
      "loss": 0.0122,
      "step": 2063120
    },
    {
      "epoch": 3.3763738601624738,
      "grad_norm": 0.22771884500980377,
      "learning_rate": 3.2686491437306853e-06,
      "loss": 0.0104,
      "step": 2063140
    },
    {
      "epoch": 3.3764065906011274,
      "grad_norm": 0.08597147464752197,
      "learning_rate": 3.2685832515171685e-06,
      "loss": 0.0095,
      "step": 2063160
    },
    {
      "epoch": 3.3764393210397805,
      "grad_norm": 0.31332898139953613,
      "learning_rate": 3.2685173593036512e-06,
      "loss": 0.009,
      "step": 2063180
    },
    {
      "epoch": 3.376472051478434,
      "grad_norm": 0.17776741087436676,
      "learning_rate": 3.268451467090134e-06,
      "loss": 0.0159,
      "step": 2063200
    },
    {
      "epoch": 3.3765047819170873,
      "grad_norm": 0.21106380224227905,
      "learning_rate": 3.2683855748766167e-06,
      "loss": 0.0155,
      "step": 2063220
    },
    {
      "epoch": 3.3765375123557404,
      "grad_norm": 0.11808495223522186,
      "learning_rate": 3.2683196826631e-06,
      "loss": 0.01,
      "step": 2063240
    },
    {
      "epoch": 3.376570242794394,
      "grad_norm": 0.8857303261756897,
      "learning_rate": 3.2682537904495826e-06,
      "loss": 0.0122,
      "step": 2063260
    },
    {
      "epoch": 3.376602973233047,
      "grad_norm": 0.06155461445450783,
      "learning_rate": 3.268187898236066e-06,
      "loss": 0.016,
      "step": 2063280
    },
    {
      "epoch": 3.3766357036717007,
      "grad_norm": 0.18672481179237366,
      "learning_rate": 3.268122006022549e-06,
      "loss": 0.0107,
      "step": 2063300
    },
    {
      "epoch": 3.376668434110354,
      "grad_norm": 0.07247168570756912,
      "learning_rate": 3.2680561138090317e-06,
      "loss": 0.0146,
      "step": 2063320
    },
    {
      "epoch": 3.376701164549007,
      "grad_norm": 0.07459314912557602,
      "learning_rate": 3.2679902215955144e-06,
      "loss": 0.0114,
      "step": 2063340
    },
    {
      "epoch": 3.3767338949876606,
      "grad_norm": 0.10226161032915115,
      "learning_rate": 3.267924329381997e-06,
      "loss": 0.0134,
      "step": 2063360
    },
    {
      "epoch": 3.376766625426314,
      "grad_norm": 0.44867488741874695,
      "learning_rate": 3.2678584371684803e-06,
      "loss": 0.0187,
      "step": 2063380
    },
    {
      "epoch": 3.3767993558649674,
      "grad_norm": 0.20554766058921814,
      "learning_rate": 3.267792544954963e-06,
      "loss": 0.0106,
      "step": 2063400
    },
    {
      "epoch": 3.3768320863036205,
      "grad_norm": 0.966708242893219,
      "learning_rate": 3.267726652741446e-06,
      "loss": 0.0156,
      "step": 2063420
    },
    {
      "epoch": 3.376864816742274,
      "grad_norm": 0.3701951205730438,
      "learning_rate": 3.2676607605279286e-06,
      "loss": 0.0095,
      "step": 2063440
    },
    {
      "epoch": 3.3768975471809273,
      "grad_norm": 0.359082967042923,
      "learning_rate": 3.2675948683144117e-06,
      "loss": 0.0109,
      "step": 2063460
    },
    {
      "epoch": 3.3769302776195804,
      "grad_norm": 0.1587272584438324,
      "learning_rate": 3.2675289761008945e-06,
      "loss": 0.0108,
      "step": 2063480
    },
    {
      "epoch": 3.376963008058234,
      "grad_norm": 0.8376166224479675,
      "learning_rate": 3.267463083887377e-06,
      "loss": 0.0098,
      "step": 2063500
    },
    {
      "epoch": 3.376995738496887,
      "grad_norm": 0.1974150836467743,
      "learning_rate": 3.26739719167386e-06,
      "loss": 0.0182,
      "step": 2063520
    },
    {
      "epoch": 3.3770284689355408,
      "grad_norm": 0.20673181116580963,
      "learning_rate": 3.2673312994603427e-06,
      "loss": 0.0107,
      "step": 2063540
    },
    {
      "epoch": 3.377061199374194,
      "grad_norm": 0.09219444543123245,
      "learning_rate": 3.267265407246826e-06,
      "loss": 0.0083,
      "step": 2063560
    },
    {
      "epoch": 3.3770939298128475,
      "grad_norm": 0.2724037766456604,
      "learning_rate": 3.2671995150333086e-06,
      "loss": 0.0109,
      "step": 2063580
    },
    {
      "epoch": 3.3771266602515007,
      "grad_norm": 0.2109338641166687,
      "learning_rate": 3.2671336228197913e-06,
      "loss": 0.0133,
      "step": 2063600
    },
    {
      "epoch": 3.377159390690154,
      "grad_norm": 0.09078076481819153,
      "learning_rate": 3.267067730606275e-06,
      "loss": 0.0163,
      "step": 2063620
    },
    {
      "epoch": 3.3771921211288074,
      "grad_norm": 0.1493457853794098,
      "learning_rate": 3.2670018383927577e-06,
      "loss": 0.0139,
      "step": 2063640
    },
    {
      "epoch": 3.3772248515674606,
      "grad_norm": 0.39269158244132996,
      "learning_rate": 3.2669359461792404e-06,
      "loss": 0.0111,
      "step": 2063660
    },
    {
      "epoch": 3.377257582006114,
      "grad_norm": 0.4094407856464386,
      "learning_rate": 3.266870053965723e-06,
      "loss": 0.013,
      "step": 2063680
    },
    {
      "epoch": 3.3772903124447673,
      "grad_norm": 0.21084387600421906,
      "learning_rate": 3.2668041617522063e-06,
      "loss": 0.0078,
      "step": 2063700
    },
    {
      "epoch": 3.377323042883421,
      "grad_norm": 0.1835060864686966,
      "learning_rate": 3.266738269538689e-06,
      "loss": 0.0115,
      "step": 2063720
    },
    {
      "epoch": 3.377355773322074,
      "grad_norm": 0.14130182564258575,
      "learning_rate": 3.2666723773251718e-06,
      "loss": 0.0101,
      "step": 2063740
    },
    {
      "epoch": 3.377388503760727,
      "grad_norm": 0.7734612822532654,
      "learning_rate": 3.2666064851116545e-06,
      "loss": 0.011,
      "step": 2063760
    },
    {
      "epoch": 3.377421234199381,
      "grad_norm": 0.31126418709754944,
      "learning_rate": 3.2665405928981377e-06,
      "loss": 0.0147,
      "step": 2063780
    },
    {
      "epoch": 3.377453964638034,
      "grad_norm": 0.31592050194740295,
      "learning_rate": 3.2664747006846204e-06,
      "loss": 0.0174,
      "step": 2063800
    },
    {
      "epoch": 3.3774866950766875,
      "grad_norm": 0.1679965853691101,
      "learning_rate": 3.266408808471103e-06,
      "loss": 0.0086,
      "step": 2063820
    },
    {
      "epoch": 3.3775194255153407,
      "grad_norm": 0.23765252530574799,
      "learning_rate": 3.266342916257586e-06,
      "loss": 0.0136,
      "step": 2063840
    },
    {
      "epoch": 3.3775521559539943,
      "grad_norm": 0.19802269339561462,
      "learning_rate": 3.266277024044069e-06,
      "loss": 0.0088,
      "step": 2063860
    },
    {
      "epoch": 3.3775848863926474,
      "grad_norm": 0.375446081161499,
      "learning_rate": 3.266211131830552e-06,
      "loss": 0.0112,
      "step": 2063880
    },
    {
      "epoch": 3.3776176168313006,
      "grad_norm": 0.6021913886070251,
      "learning_rate": 3.2661452396170345e-06,
      "loss": 0.0106,
      "step": 2063900
    },
    {
      "epoch": 3.377650347269954,
      "grad_norm": 0.07631925493478775,
      "learning_rate": 3.2660793474035173e-06,
      "loss": 0.0073,
      "step": 2063920
    },
    {
      "epoch": 3.3776830777086073,
      "grad_norm": 0.3322516083717346,
      "learning_rate": 3.2660134551900004e-06,
      "loss": 0.0108,
      "step": 2063940
    },
    {
      "epoch": 3.377715808147261,
      "grad_norm": 0.27496856451034546,
      "learning_rate": 3.265947562976483e-06,
      "loss": 0.0144,
      "step": 2063960
    },
    {
      "epoch": 3.377748538585914,
      "grad_norm": 0.4184695780277252,
      "learning_rate": 3.2658816707629663e-06,
      "loss": 0.0089,
      "step": 2063980
    },
    {
      "epoch": 3.3777812690245677,
      "grad_norm": 0.3717940151691437,
      "learning_rate": 3.2658157785494495e-06,
      "loss": 0.0116,
      "step": 2064000
    },
    {
      "epoch": 3.377813999463221,
      "grad_norm": 0.24310638010501862,
      "learning_rate": 3.2657498863359322e-06,
      "loss": 0.0099,
      "step": 2064020
    },
    {
      "epoch": 3.377846729901874,
      "grad_norm": 0.05096007138490677,
      "learning_rate": 3.265683994122415e-06,
      "loss": 0.0133,
      "step": 2064040
    },
    {
      "epoch": 3.3778794603405276,
      "grad_norm": 0.8686304092407227,
      "learning_rate": 3.2656181019088977e-06,
      "loss": 0.0156,
      "step": 2064060
    },
    {
      "epoch": 3.3779121907791807,
      "grad_norm": 0.25176113843917847,
      "learning_rate": 3.2655522096953805e-06,
      "loss": 0.0092,
      "step": 2064080
    },
    {
      "epoch": 3.3779449212178343,
      "grad_norm": 0.10208553820848465,
      "learning_rate": 3.2654863174818636e-06,
      "loss": 0.0163,
      "step": 2064100
    },
    {
      "epoch": 3.3779776516564874,
      "grad_norm": 0.22165106236934662,
      "learning_rate": 3.2654204252683464e-06,
      "loss": 0.0122,
      "step": 2064120
    },
    {
      "epoch": 3.378010382095141,
      "grad_norm": 0.1440291553735733,
      "learning_rate": 3.265354533054829e-06,
      "loss": 0.0117,
      "step": 2064140
    },
    {
      "epoch": 3.378043112533794,
      "grad_norm": 0.5454150438308716,
      "learning_rate": 3.265288640841312e-06,
      "loss": 0.0124,
      "step": 2064160
    },
    {
      "epoch": 3.3780758429724473,
      "grad_norm": 0.2931898534297943,
      "learning_rate": 3.265222748627795e-06,
      "loss": 0.0091,
      "step": 2064180
    },
    {
      "epoch": 3.378108573411101,
      "grad_norm": 0.11276107281446457,
      "learning_rate": 3.2651568564142778e-06,
      "loss": 0.01,
      "step": 2064200
    },
    {
      "epoch": 3.378141303849754,
      "grad_norm": 0.14321285486221313,
      "learning_rate": 3.2650909642007605e-06,
      "loss": 0.0108,
      "step": 2064220
    },
    {
      "epoch": 3.3781740342884077,
      "grad_norm": 0.13300061225891113,
      "learning_rate": 3.2650250719872432e-06,
      "loss": 0.0092,
      "step": 2064240
    },
    {
      "epoch": 3.378206764727061,
      "grad_norm": 0.12173891812562943,
      "learning_rate": 3.2649591797737264e-06,
      "loss": 0.0086,
      "step": 2064260
    },
    {
      "epoch": 3.3782394951657144,
      "grad_norm": 0.36852535605430603,
      "learning_rate": 3.264893287560209e-06,
      "loss": 0.0157,
      "step": 2064280
    },
    {
      "epoch": 3.3782722256043676,
      "grad_norm": 0.1657702475786209,
      "learning_rate": 3.264827395346692e-06,
      "loss": 0.0079,
      "step": 2064300
    },
    {
      "epoch": 3.3783049560430207,
      "grad_norm": 0.5010801553726196,
      "learning_rate": 3.2647615031331746e-06,
      "loss": 0.0109,
      "step": 2064320
    },
    {
      "epoch": 3.3783376864816743,
      "grad_norm": 0.32342949509620667,
      "learning_rate": 3.264695610919658e-06,
      "loss": 0.0114,
      "step": 2064340
    },
    {
      "epoch": 3.3783704169203275,
      "grad_norm": 0.25720369815826416,
      "learning_rate": 3.264629718706141e-06,
      "loss": 0.0104,
      "step": 2064360
    },
    {
      "epoch": 3.378403147358981,
      "grad_norm": 0.12472651153802872,
      "learning_rate": 3.2645638264926237e-06,
      "loss": 0.0118,
      "step": 2064380
    },
    {
      "epoch": 3.378435877797634,
      "grad_norm": 0.12233749032020569,
      "learning_rate": 3.264497934279107e-06,
      "loss": 0.0072,
      "step": 2064400
    },
    {
      "epoch": 3.378468608236288,
      "grad_norm": 0.21238206326961517,
      "learning_rate": 3.2644320420655896e-06,
      "loss": 0.0101,
      "step": 2064420
    },
    {
      "epoch": 3.378501338674941,
      "grad_norm": 0.3024698495864868,
      "learning_rate": 3.2643661498520723e-06,
      "loss": 0.0171,
      "step": 2064440
    },
    {
      "epoch": 3.378534069113594,
      "grad_norm": 0.3791714012622833,
      "learning_rate": 3.264300257638555e-06,
      "loss": 0.0102,
      "step": 2064460
    },
    {
      "epoch": 3.3785667995522477,
      "grad_norm": 0.39639973640441895,
      "learning_rate": 3.2642343654250382e-06,
      "loss": 0.0112,
      "step": 2064480
    },
    {
      "epoch": 3.378599529990901,
      "grad_norm": 0.4772351086139679,
      "learning_rate": 3.264168473211521e-06,
      "loss": 0.0104,
      "step": 2064500
    },
    {
      "epoch": 3.3786322604295544,
      "grad_norm": 0.11973311007022858,
      "learning_rate": 3.2641025809980037e-06,
      "loss": 0.0076,
      "step": 2064520
    },
    {
      "epoch": 3.3786649908682076,
      "grad_norm": 0.09767691045999527,
      "learning_rate": 3.2640366887844864e-06,
      "loss": 0.01,
      "step": 2064540
    },
    {
      "epoch": 3.378697721306861,
      "grad_norm": 0.22302278876304626,
      "learning_rate": 3.263970796570969e-06,
      "loss": 0.0087,
      "step": 2064560
    },
    {
      "epoch": 3.3787304517455143,
      "grad_norm": 0.888750433921814,
      "learning_rate": 3.2639049043574523e-06,
      "loss": 0.0166,
      "step": 2064580
    },
    {
      "epoch": 3.3787631821841675,
      "grad_norm": 0.20745348930358887,
      "learning_rate": 3.263839012143935e-06,
      "loss": 0.014,
      "step": 2064600
    },
    {
      "epoch": 3.378795912622821,
      "grad_norm": 0.04963694140315056,
      "learning_rate": 3.263773119930418e-06,
      "loss": 0.0103,
      "step": 2064620
    },
    {
      "epoch": 3.3788286430614742,
      "grad_norm": 0.606543242931366,
      "learning_rate": 3.2637072277169006e-06,
      "loss": 0.0139,
      "step": 2064640
    },
    {
      "epoch": 3.378861373500128,
      "grad_norm": 0.26691243052482605,
      "learning_rate": 3.2636413355033837e-06,
      "loss": 0.0169,
      "step": 2064660
    },
    {
      "epoch": 3.378894103938781,
      "grad_norm": 0.32345423102378845,
      "learning_rate": 3.263575443289867e-06,
      "loss": 0.0155,
      "step": 2064680
    },
    {
      "epoch": 3.3789268343774346,
      "grad_norm": 0.26574504375457764,
      "learning_rate": 3.2635095510763496e-06,
      "loss": 0.0134,
      "step": 2064700
    },
    {
      "epoch": 3.3789595648160877,
      "grad_norm": 0.2597118318080902,
      "learning_rate": 3.263443658862833e-06,
      "loss": 0.0142,
      "step": 2064720
    },
    {
      "epoch": 3.378992295254741,
      "grad_norm": 0.15767799317836761,
      "learning_rate": 3.2633777666493155e-06,
      "loss": 0.0159,
      "step": 2064740
    },
    {
      "epoch": 3.3790250256933945,
      "grad_norm": 0.17328424751758575,
      "learning_rate": 3.2633118744357983e-06,
      "loss": 0.0154,
      "step": 2064760
    },
    {
      "epoch": 3.3790577561320476,
      "grad_norm": 0.1200776919722557,
      "learning_rate": 3.263245982222281e-06,
      "loss": 0.0104,
      "step": 2064780
    },
    {
      "epoch": 3.379090486570701,
      "grad_norm": 0.48447173833847046,
      "learning_rate": 3.263180090008764e-06,
      "loss": 0.0138,
      "step": 2064800
    },
    {
      "epoch": 3.3791232170093544,
      "grad_norm": 0.24308151006698608,
      "learning_rate": 3.263114197795247e-06,
      "loss": 0.0104,
      "step": 2064820
    },
    {
      "epoch": 3.3791559474480075,
      "grad_norm": 0.17465107142925262,
      "learning_rate": 3.2630483055817297e-06,
      "loss": 0.0136,
      "step": 2064840
    },
    {
      "epoch": 3.379188677886661,
      "grad_norm": 0.06546185910701752,
      "learning_rate": 3.2629824133682124e-06,
      "loss": 0.0122,
      "step": 2064860
    },
    {
      "epoch": 3.3792214083253143,
      "grad_norm": 0.28316888213157654,
      "learning_rate": 3.2629165211546956e-06,
      "loss": 0.0191,
      "step": 2064880
    },
    {
      "epoch": 3.379254138763968,
      "grad_norm": 0.33100390434265137,
      "learning_rate": 3.2628506289411783e-06,
      "loss": 0.0139,
      "step": 2064900
    },
    {
      "epoch": 3.379286869202621,
      "grad_norm": 0.6352255940437317,
      "learning_rate": 3.262784736727661e-06,
      "loss": 0.0135,
      "step": 2064920
    },
    {
      "epoch": 3.379319599641274,
      "grad_norm": 0.10912704467773438,
      "learning_rate": 3.2627188445141438e-06,
      "loss": 0.0089,
      "step": 2064940
    },
    {
      "epoch": 3.3793523300799277,
      "grad_norm": 0.14480017125606537,
      "learning_rate": 3.2626529523006265e-06,
      "loss": 0.0132,
      "step": 2064960
    },
    {
      "epoch": 3.379385060518581,
      "grad_norm": 0.27685439586639404,
      "learning_rate": 3.2625870600871097e-06,
      "loss": 0.0118,
      "step": 2064980
    },
    {
      "epoch": 3.3794177909572345,
      "grad_norm": 0.17262955009937286,
      "learning_rate": 3.2625211678735924e-06,
      "loss": 0.0106,
      "step": 2065000
    },
    {
      "epoch": 3.3794505213958876,
      "grad_norm": 0.247397318482399,
      "learning_rate": 3.262455275660075e-06,
      "loss": 0.008,
      "step": 2065020
    },
    {
      "epoch": 3.3794832518345412,
      "grad_norm": 0.20832861959934235,
      "learning_rate": 3.2623893834465588e-06,
      "loss": 0.0113,
      "step": 2065040
    },
    {
      "epoch": 3.3795159822731944,
      "grad_norm": 0.8594783544540405,
      "learning_rate": 3.2623234912330415e-06,
      "loss": 0.0079,
      "step": 2065060
    },
    {
      "epoch": 3.3795487127118475,
      "grad_norm": 0.22710232436656952,
      "learning_rate": 3.2622575990195242e-06,
      "loss": 0.0104,
      "step": 2065080
    },
    {
      "epoch": 3.379581443150501,
      "grad_norm": 0.2613064646720886,
      "learning_rate": 3.262191706806007e-06,
      "loss": 0.0121,
      "step": 2065100
    },
    {
      "epoch": 3.3796141735891543,
      "grad_norm": 0.14583297073841095,
      "learning_rate": 3.26212581459249e-06,
      "loss": 0.0175,
      "step": 2065120
    },
    {
      "epoch": 3.379646904027808,
      "grad_norm": 0.34209147095680237,
      "learning_rate": 3.262059922378973e-06,
      "loss": 0.0129,
      "step": 2065140
    },
    {
      "epoch": 3.379679634466461,
      "grad_norm": 0.42757025361061096,
      "learning_rate": 3.2619940301654556e-06,
      "loss": 0.0168,
      "step": 2065160
    },
    {
      "epoch": 3.3797123649051146,
      "grad_norm": 0.21906079351902008,
      "learning_rate": 3.2619281379519384e-06,
      "loss": 0.0126,
      "step": 2065180
    },
    {
      "epoch": 3.3797450953437678,
      "grad_norm": 0.17101766169071198,
      "learning_rate": 3.2618622457384215e-06,
      "loss": 0.0125,
      "step": 2065200
    },
    {
      "epoch": 3.379777825782421,
      "grad_norm": 0.3857376277446747,
      "learning_rate": 3.2617963535249043e-06,
      "loss": 0.0129,
      "step": 2065220
    },
    {
      "epoch": 3.3798105562210745,
      "grad_norm": 0.4256063997745514,
      "learning_rate": 3.261730461311387e-06,
      "loss": 0.0143,
      "step": 2065240
    },
    {
      "epoch": 3.3798432866597277,
      "grad_norm": 0.3789348006248474,
      "learning_rate": 3.2616645690978697e-06,
      "loss": 0.0115,
      "step": 2065260
    },
    {
      "epoch": 3.3798760170983813,
      "grad_norm": 0.0978526920080185,
      "learning_rate": 3.261598676884353e-06,
      "loss": 0.0091,
      "step": 2065280
    },
    {
      "epoch": 3.3799087475370344,
      "grad_norm": 0.15014159679412842,
      "learning_rate": 3.2615327846708356e-06,
      "loss": 0.0081,
      "step": 2065300
    },
    {
      "epoch": 3.379941477975688,
      "grad_norm": 0.33204755187034607,
      "learning_rate": 3.2614668924573184e-06,
      "loss": 0.0161,
      "step": 2065320
    },
    {
      "epoch": 3.379974208414341,
      "grad_norm": 0.07640339434146881,
      "learning_rate": 3.261401000243801e-06,
      "loss": 0.0107,
      "step": 2065340
    },
    {
      "epoch": 3.3800069388529943,
      "grad_norm": 0.5571103692054749,
      "learning_rate": 3.2613351080302843e-06,
      "loss": 0.0111,
      "step": 2065360
    },
    {
      "epoch": 3.380039669291648,
      "grad_norm": 0.5599483847618103,
      "learning_rate": 3.2612692158167674e-06,
      "loss": 0.0137,
      "step": 2065380
    },
    {
      "epoch": 3.380072399730301,
      "grad_norm": 0.4839957058429718,
      "learning_rate": 3.26120332360325e-06,
      "loss": 0.0112,
      "step": 2065400
    },
    {
      "epoch": 3.3801051301689546,
      "grad_norm": 0.23928195238113403,
      "learning_rate": 3.2611374313897333e-06,
      "loss": 0.0129,
      "step": 2065420
    },
    {
      "epoch": 3.380137860607608,
      "grad_norm": 0.11614786833524704,
      "learning_rate": 3.261071539176216e-06,
      "loss": 0.0144,
      "step": 2065440
    },
    {
      "epoch": 3.3801705910462614,
      "grad_norm": 0.18946781754493713,
      "learning_rate": 3.261005646962699e-06,
      "loss": 0.0101,
      "step": 2065460
    },
    {
      "epoch": 3.3802033214849145,
      "grad_norm": 0.46366065740585327,
      "learning_rate": 3.2609397547491816e-06,
      "loss": 0.0166,
      "step": 2065480
    },
    {
      "epoch": 3.3802360519235677,
      "grad_norm": 0.20469187200069427,
      "learning_rate": 3.2608738625356643e-06,
      "loss": 0.0112,
      "step": 2065500
    },
    {
      "epoch": 3.3802687823622213,
      "grad_norm": 0.04394584894180298,
      "learning_rate": 3.2608079703221475e-06,
      "loss": 0.0089,
      "step": 2065520
    },
    {
      "epoch": 3.3803015128008744,
      "grad_norm": 0.34067264199256897,
      "learning_rate": 3.26074207810863e-06,
      "loss": 0.0176,
      "step": 2065540
    },
    {
      "epoch": 3.380334243239528,
      "grad_norm": 0.48784732818603516,
      "learning_rate": 3.260676185895113e-06,
      "loss": 0.0091,
      "step": 2065560
    },
    {
      "epoch": 3.380366973678181,
      "grad_norm": 0.17098456621170044,
      "learning_rate": 3.2606102936815957e-06,
      "loss": 0.0096,
      "step": 2065580
    },
    {
      "epoch": 3.3803997041168348,
      "grad_norm": 0.18750311434268951,
      "learning_rate": 3.260544401468079e-06,
      "loss": 0.0133,
      "step": 2065600
    },
    {
      "epoch": 3.380432434555488,
      "grad_norm": 0.14347244799137115,
      "learning_rate": 3.2604785092545616e-06,
      "loss": 0.0077,
      "step": 2065620
    },
    {
      "epoch": 3.380465164994141,
      "grad_norm": 0.2899819016456604,
      "learning_rate": 3.2604126170410443e-06,
      "loss": 0.0078,
      "step": 2065640
    },
    {
      "epoch": 3.3804978954327947,
      "grad_norm": 0.6959635615348816,
      "learning_rate": 3.260346724827527e-06,
      "loss": 0.0137,
      "step": 2065660
    },
    {
      "epoch": 3.380530625871448,
      "grad_norm": 0.3668009638786316,
      "learning_rate": 3.2602808326140102e-06,
      "loss": 0.0156,
      "step": 2065680
    },
    {
      "epoch": 3.3805633563101014,
      "grad_norm": 0.21839497983455658,
      "learning_rate": 3.260214940400493e-06,
      "loss": 0.0104,
      "step": 2065700
    },
    {
      "epoch": 3.3805960867487546,
      "grad_norm": 0.19510287046432495,
      "learning_rate": 3.2601490481869757e-06,
      "loss": 0.0103,
      "step": 2065720
    },
    {
      "epoch": 3.380628817187408,
      "grad_norm": 0.3195783793926239,
      "learning_rate": 3.2600831559734593e-06,
      "loss": 0.0128,
      "step": 2065740
    },
    {
      "epoch": 3.3806615476260613,
      "grad_norm": 0.1301470249891281,
      "learning_rate": 3.260017263759942e-06,
      "loss": 0.0108,
      "step": 2065760
    },
    {
      "epoch": 3.3806942780647145,
      "grad_norm": 0.5017774105072021,
      "learning_rate": 3.2599513715464248e-06,
      "loss": 0.0121,
      "step": 2065780
    },
    {
      "epoch": 3.380727008503368,
      "grad_norm": 0.20805442333221436,
      "learning_rate": 3.2598854793329075e-06,
      "loss": 0.009,
      "step": 2065800
    },
    {
      "epoch": 3.380759738942021,
      "grad_norm": 0.46350154280662537,
      "learning_rate": 3.2598195871193907e-06,
      "loss": 0.0125,
      "step": 2065820
    },
    {
      "epoch": 3.380792469380675,
      "grad_norm": 0.28819477558135986,
      "learning_rate": 3.2597536949058734e-06,
      "loss": 0.01,
      "step": 2065840
    },
    {
      "epoch": 3.380825199819328,
      "grad_norm": 0.2707197666168213,
      "learning_rate": 3.259687802692356e-06,
      "loss": 0.0083,
      "step": 2065860
    },
    {
      "epoch": 3.3808579302579815,
      "grad_norm": 0.2415749877691269,
      "learning_rate": 3.259621910478839e-06,
      "loss": 0.0079,
      "step": 2065880
    },
    {
      "epoch": 3.3808906606966347,
      "grad_norm": 0.16744588315486908,
      "learning_rate": 3.259556018265322e-06,
      "loss": 0.0086,
      "step": 2065900
    },
    {
      "epoch": 3.380923391135288,
      "grad_norm": 0.5198729634284973,
      "learning_rate": 3.259490126051805e-06,
      "loss": 0.0115,
      "step": 2065920
    },
    {
      "epoch": 3.3809561215739414,
      "grad_norm": 0.128021240234375,
      "learning_rate": 3.2594242338382875e-06,
      "loss": 0.0132,
      "step": 2065940
    },
    {
      "epoch": 3.3809888520125946,
      "grad_norm": 0.12693479657173157,
      "learning_rate": 3.2593583416247703e-06,
      "loss": 0.0117,
      "step": 2065960
    },
    {
      "epoch": 3.381021582451248,
      "grad_norm": 0.28246909379959106,
      "learning_rate": 3.259292449411253e-06,
      "loss": 0.0148,
      "step": 2065980
    },
    {
      "epoch": 3.3810543128899013,
      "grad_norm": 0.6306111812591553,
      "learning_rate": 3.259226557197736e-06,
      "loss": 0.0109,
      "step": 2066000
    },
    {
      "epoch": 3.381087043328555,
      "grad_norm": 0.1614668220281601,
      "learning_rate": 3.259160664984219e-06,
      "loss": 0.0098,
      "step": 2066020
    },
    {
      "epoch": 3.381119773767208,
      "grad_norm": 0.06002020835876465,
      "learning_rate": 3.2590947727707017e-06,
      "loss": 0.0098,
      "step": 2066040
    },
    {
      "epoch": 3.381152504205861,
      "grad_norm": 0.13086260855197906,
      "learning_rate": 3.2590288805571844e-06,
      "loss": 0.012,
      "step": 2066060
    },
    {
      "epoch": 3.381185234644515,
      "grad_norm": 0.42624303698539734,
      "learning_rate": 3.2589629883436676e-06,
      "loss": 0.0101,
      "step": 2066080
    },
    {
      "epoch": 3.381217965083168,
      "grad_norm": 0.3407289981842041,
      "learning_rate": 3.2588970961301507e-06,
      "loss": 0.0132,
      "step": 2066100
    },
    {
      "epoch": 3.3812506955218216,
      "grad_norm": 0.29335635900497437,
      "learning_rate": 3.2588312039166335e-06,
      "loss": 0.0155,
      "step": 2066120
    },
    {
      "epoch": 3.3812834259604747,
      "grad_norm": 0.20677123963832855,
      "learning_rate": 3.2587653117031166e-06,
      "loss": 0.0188,
      "step": 2066140
    },
    {
      "epoch": 3.3813161563991283,
      "grad_norm": 0.1036658063530922,
      "learning_rate": 3.2586994194895994e-06,
      "loss": 0.009,
      "step": 2066160
    },
    {
      "epoch": 3.3813488868377815,
      "grad_norm": 0.4684755504131317,
      "learning_rate": 3.258633527276082e-06,
      "loss": 0.0159,
      "step": 2066180
    },
    {
      "epoch": 3.3813816172764346,
      "grad_norm": 0.4875665605068207,
      "learning_rate": 3.258567635062565e-06,
      "loss": 0.0113,
      "step": 2066200
    },
    {
      "epoch": 3.381414347715088,
      "grad_norm": 0.3842225968837738,
      "learning_rate": 3.258501742849048e-06,
      "loss": 0.0134,
      "step": 2066220
    },
    {
      "epoch": 3.3814470781537413,
      "grad_norm": 0.22107405960559845,
      "learning_rate": 3.2584358506355308e-06,
      "loss": 0.0117,
      "step": 2066240
    },
    {
      "epoch": 3.381479808592395,
      "grad_norm": 0.2033156305551529,
      "learning_rate": 3.2583699584220135e-06,
      "loss": 0.0158,
      "step": 2066260
    },
    {
      "epoch": 3.381512539031048,
      "grad_norm": 0.06573182344436646,
      "learning_rate": 3.2583040662084962e-06,
      "loss": 0.0117,
      "step": 2066280
    },
    {
      "epoch": 3.3815452694697012,
      "grad_norm": 0.2524793744087219,
      "learning_rate": 3.2582381739949794e-06,
      "loss": 0.011,
      "step": 2066300
    },
    {
      "epoch": 3.381577999908355,
      "grad_norm": 0.0792187750339508,
      "learning_rate": 3.258172281781462e-06,
      "loss": 0.0107,
      "step": 2066320
    },
    {
      "epoch": 3.381610730347008,
      "grad_norm": 0.34237489104270935,
      "learning_rate": 3.258106389567945e-06,
      "loss": 0.0111,
      "step": 2066340
    },
    {
      "epoch": 3.3816434607856616,
      "grad_norm": 0.44162702560424805,
      "learning_rate": 3.2580404973544276e-06,
      "loss": 0.0138,
      "step": 2066360
    },
    {
      "epoch": 3.3816761912243147,
      "grad_norm": 0.49821150302886963,
      "learning_rate": 3.2579746051409108e-06,
      "loss": 0.0116,
      "step": 2066380
    },
    {
      "epoch": 3.381708921662968,
      "grad_norm": 0.3202964663505554,
      "learning_rate": 3.2579087129273935e-06,
      "loss": 0.008,
      "step": 2066400
    },
    {
      "epoch": 3.3817416521016215,
      "grad_norm": 0.5660286545753479,
      "learning_rate": 3.2578428207138763e-06,
      "loss": 0.0166,
      "step": 2066420
    },
    {
      "epoch": 3.3817743825402746,
      "grad_norm": 0.14451685547828674,
      "learning_rate": 3.25777692850036e-06,
      "loss": 0.0098,
      "step": 2066440
    },
    {
      "epoch": 3.381807112978928,
      "grad_norm": 0.1335529386997223,
      "learning_rate": 3.2577110362868426e-06,
      "loss": 0.0125,
      "step": 2066460
    },
    {
      "epoch": 3.3818398434175814,
      "grad_norm": 0.3066160976886749,
      "learning_rate": 3.2576451440733253e-06,
      "loss": 0.0111,
      "step": 2066480
    },
    {
      "epoch": 3.381872573856235,
      "grad_norm": 0.3088175058364868,
      "learning_rate": 3.257579251859808e-06,
      "loss": 0.0109,
      "step": 2066500
    },
    {
      "epoch": 3.381905304294888,
      "grad_norm": 0.2157314568758011,
      "learning_rate": 3.257513359646291e-06,
      "loss": 0.0106,
      "step": 2066520
    },
    {
      "epoch": 3.3819380347335413,
      "grad_norm": 0.179446280002594,
      "learning_rate": 3.257447467432774e-06,
      "loss": 0.0117,
      "step": 2066540
    },
    {
      "epoch": 3.381970765172195,
      "grad_norm": 0.44388917088508606,
      "learning_rate": 3.2573815752192567e-06,
      "loss": 0.0142,
      "step": 2066560
    },
    {
      "epoch": 3.382003495610848,
      "grad_norm": 0.23210127651691437,
      "learning_rate": 3.2573156830057395e-06,
      "loss": 0.0088,
      "step": 2066580
    },
    {
      "epoch": 3.3820362260495016,
      "grad_norm": 0.8913872838020325,
      "learning_rate": 3.257249790792222e-06,
      "loss": 0.0099,
      "step": 2066600
    },
    {
      "epoch": 3.3820689564881548,
      "grad_norm": 0.24183876812458038,
      "learning_rate": 3.2571838985787054e-06,
      "loss": 0.0153,
      "step": 2066620
    },
    {
      "epoch": 3.3821016869268083,
      "grad_norm": 0.20595930516719818,
      "learning_rate": 3.257118006365188e-06,
      "loss": 0.0095,
      "step": 2066640
    },
    {
      "epoch": 3.3821344173654615,
      "grad_norm": 0.367358922958374,
      "learning_rate": 3.257052114151671e-06,
      "loss": 0.0137,
      "step": 2066660
    },
    {
      "epoch": 3.3821671478041146,
      "grad_norm": 0.1716359257698059,
      "learning_rate": 3.2569862219381536e-06,
      "loss": 0.0092,
      "step": 2066680
    },
    {
      "epoch": 3.3821998782427682,
      "grad_norm": 0.29762840270996094,
      "learning_rate": 3.2569203297246367e-06,
      "loss": 0.0112,
      "step": 2066700
    },
    {
      "epoch": 3.3822326086814214,
      "grad_norm": 0.3324287533760071,
      "learning_rate": 3.2568544375111195e-06,
      "loss": 0.0148,
      "step": 2066720
    },
    {
      "epoch": 3.382265339120075,
      "grad_norm": 0.26108190417289734,
      "learning_rate": 3.2567885452976022e-06,
      "loss": 0.012,
      "step": 2066740
    },
    {
      "epoch": 3.382298069558728,
      "grad_norm": 0.3395819664001465,
      "learning_rate": 3.256722653084085e-06,
      "loss": 0.0163,
      "step": 2066760
    },
    {
      "epoch": 3.3823307999973817,
      "grad_norm": 0.08354654908180237,
      "learning_rate": 3.256656760870568e-06,
      "loss": 0.0107,
      "step": 2066780
    },
    {
      "epoch": 3.382363530436035,
      "grad_norm": 0.5342076420783997,
      "learning_rate": 3.2565908686570513e-06,
      "loss": 0.013,
      "step": 2066800
    },
    {
      "epoch": 3.382396260874688,
      "grad_norm": 0.36820557713508606,
      "learning_rate": 3.256524976443534e-06,
      "loss": 0.0097,
      "step": 2066820
    },
    {
      "epoch": 3.3824289913133416,
      "grad_norm": 0.4367864429950714,
      "learning_rate": 3.256459084230017e-06,
      "loss": 0.0112,
      "step": 2066840
    },
    {
      "epoch": 3.3824617217519948,
      "grad_norm": 0.1624971330165863,
      "learning_rate": 3.2563931920165e-06,
      "loss": 0.0124,
      "step": 2066860
    },
    {
      "epoch": 3.3824944521906484,
      "grad_norm": 0.06788907200098038,
      "learning_rate": 3.2563272998029827e-06,
      "loss": 0.0116,
      "step": 2066880
    },
    {
      "epoch": 3.3825271826293015,
      "grad_norm": 0.16511568427085876,
      "learning_rate": 3.2562614075894654e-06,
      "loss": 0.0094,
      "step": 2066900
    },
    {
      "epoch": 3.382559913067955,
      "grad_norm": 0.18367502093315125,
      "learning_rate": 3.256195515375948e-06,
      "loss": 0.009,
      "step": 2066920
    },
    {
      "epoch": 3.3825926435066083,
      "grad_norm": 0.24537578225135803,
      "learning_rate": 3.2561296231624313e-06,
      "loss": 0.0132,
      "step": 2066940
    },
    {
      "epoch": 3.3826253739452614,
      "grad_norm": 0.2894672155380249,
      "learning_rate": 3.256063730948914e-06,
      "loss": 0.0142,
      "step": 2066960
    },
    {
      "epoch": 3.382658104383915,
      "grad_norm": 0.20011378824710846,
      "learning_rate": 3.255997838735397e-06,
      "loss": 0.0104,
      "step": 2066980
    },
    {
      "epoch": 3.382690834822568,
      "grad_norm": 0.28349781036376953,
      "learning_rate": 3.2559319465218795e-06,
      "loss": 0.0104,
      "step": 2067000
    },
    {
      "epoch": 3.3827235652612218,
      "grad_norm": 0.18020865321159363,
      "learning_rate": 3.2558660543083627e-06,
      "loss": 0.0112,
      "step": 2067020
    },
    {
      "epoch": 3.382756295699875,
      "grad_norm": 0.27674543857574463,
      "learning_rate": 3.2558001620948454e-06,
      "loss": 0.01,
      "step": 2067040
    },
    {
      "epoch": 3.3827890261385285,
      "grad_norm": 0.26865649223327637,
      "learning_rate": 3.255734269881328e-06,
      "loss": 0.0135,
      "step": 2067060
    },
    {
      "epoch": 3.3828217565771816,
      "grad_norm": 1.0139087438583374,
      "learning_rate": 3.255668377667811e-06,
      "loss": 0.0137,
      "step": 2067080
    },
    {
      "epoch": 3.382854487015835,
      "grad_norm": 0.3290700316429138,
      "learning_rate": 3.255602485454294e-06,
      "loss": 0.01,
      "step": 2067100
    },
    {
      "epoch": 3.3828872174544884,
      "grad_norm": 0.6002312898635864,
      "learning_rate": 3.255536593240777e-06,
      "loss": 0.0097,
      "step": 2067120
    },
    {
      "epoch": 3.3829199478931415,
      "grad_norm": 0.39036306738853455,
      "learning_rate": 3.25547070102726e-06,
      "loss": 0.01,
      "step": 2067140
    },
    {
      "epoch": 3.382952678331795,
      "grad_norm": 0.3727550208568573,
      "learning_rate": 3.255404808813743e-06,
      "loss": 0.015,
      "step": 2067160
    },
    {
      "epoch": 3.3829854087704483,
      "grad_norm": 0.2634367048740387,
      "learning_rate": 3.255338916600226e-06,
      "loss": 0.0095,
      "step": 2067180
    },
    {
      "epoch": 3.383018139209102,
      "grad_norm": 0.0844542533159256,
      "learning_rate": 3.2552730243867086e-06,
      "loss": 0.0129,
      "step": 2067200
    },
    {
      "epoch": 3.383050869647755,
      "grad_norm": 0.21653704345226288,
      "learning_rate": 3.2552071321731914e-06,
      "loss": 0.0125,
      "step": 2067220
    },
    {
      "epoch": 3.383083600086408,
      "grad_norm": 0.4462750256061554,
      "learning_rate": 3.2551412399596745e-06,
      "loss": 0.0134,
      "step": 2067240
    },
    {
      "epoch": 3.3831163305250618,
      "grad_norm": 0.07465654611587524,
      "learning_rate": 3.2550753477461573e-06,
      "loss": 0.009,
      "step": 2067260
    },
    {
      "epoch": 3.383149060963715,
      "grad_norm": 0.6166425943374634,
      "learning_rate": 3.25500945553264e-06,
      "loss": 0.0112,
      "step": 2067280
    },
    {
      "epoch": 3.3831817914023685,
      "grad_norm": 0.48545047640800476,
      "learning_rate": 3.2549435633191227e-06,
      "loss": 0.0128,
      "step": 2067300
    },
    {
      "epoch": 3.3832145218410217,
      "grad_norm": 0.4039449393749237,
      "learning_rate": 3.254877671105606e-06,
      "loss": 0.0112,
      "step": 2067320
    },
    {
      "epoch": 3.3832472522796753,
      "grad_norm": 0.4713905155658722,
      "learning_rate": 3.2548117788920886e-06,
      "loss": 0.0148,
      "step": 2067340
    },
    {
      "epoch": 3.3832799827183284,
      "grad_norm": 0.612287163734436,
      "learning_rate": 3.2547458866785714e-06,
      "loss": 0.0152,
      "step": 2067360
    },
    {
      "epoch": 3.3833127131569816,
      "grad_norm": 0.08840687572956085,
      "learning_rate": 3.254679994465054e-06,
      "loss": 0.0095,
      "step": 2067380
    },
    {
      "epoch": 3.383345443595635,
      "grad_norm": 0.4440035820007324,
      "learning_rate": 3.254614102251537e-06,
      "loss": 0.0099,
      "step": 2067400
    },
    {
      "epoch": 3.3833781740342883,
      "grad_norm": 0.7343732714653015,
      "learning_rate": 3.25454821003802e-06,
      "loss": 0.0116,
      "step": 2067420
    },
    {
      "epoch": 3.383410904472942,
      "grad_norm": 0.10090143978595734,
      "learning_rate": 3.2544823178245028e-06,
      "loss": 0.0132,
      "step": 2067440
    },
    {
      "epoch": 3.383443634911595,
      "grad_norm": 0.176791712641716,
      "learning_rate": 3.2544164256109855e-06,
      "loss": 0.0162,
      "step": 2067460
    },
    {
      "epoch": 3.3834763653502486,
      "grad_norm": 0.23812668025493622,
      "learning_rate": 3.2543505333974682e-06,
      "loss": 0.0081,
      "step": 2067480
    },
    {
      "epoch": 3.383509095788902,
      "grad_norm": 0.5655927658081055,
      "learning_rate": 3.254284641183952e-06,
      "loss": 0.0103,
      "step": 2067500
    },
    {
      "epoch": 3.383541826227555,
      "grad_norm": 0.08545155823230743,
      "learning_rate": 3.2542187489704346e-06,
      "loss": 0.0089,
      "step": 2067520
    },
    {
      "epoch": 3.3835745566662085,
      "grad_norm": 0.14652833342552185,
      "learning_rate": 3.2541528567569173e-06,
      "loss": 0.0114,
      "step": 2067540
    },
    {
      "epoch": 3.3836072871048617,
      "grad_norm": 0.39106452465057373,
      "learning_rate": 3.2540869645434005e-06,
      "loss": 0.0098,
      "step": 2067560
    },
    {
      "epoch": 3.3836400175435153,
      "grad_norm": 0.30549347400665283,
      "learning_rate": 3.2540210723298832e-06,
      "loss": 0.0109,
      "step": 2067580
    },
    {
      "epoch": 3.3836727479821684,
      "grad_norm": 0.46984055638313293,
      "learning_rate": 3.253955180116366e-06,
      "loss": 0.0111,
      "step": 2067600
    },
    {
      "epoch": 3.383705478420822,
      "grad_norm": 0.26773473620414734,
      "learning_rate": 3.2538892879028487e-06,
      "loss": 0.0086,
      "step": 2067620
    },
    {
      "epoch": 3.383738208859475,
      "grad_norm": 0.20059798657894135,
      "learning_rate": 3.253823395689332e-06,
      "loss": 0.0119,
      "step": 2067640
    },
    {
      "epoch": 3.3837709392981283,
      "grad_norm": 0.4239756166934967,
      "learning_rate": 3.2537575034758146e-06,
      "loss": 0.0135,
      "step": 2067660
    },
    {
      "epoch": 3.383803669736782,
      "grad_norm": 0.7316476702690125,
      "learning_rate": 3.2536916112622973e-06,
      "loss": 0.0148,
      "step": 2067680
    },
    {
      "epoch": 3.383836400175435,
      "grad_norm": 0.44994890689849854,
      "learning_rate": 3.25362571904878e-06,
      "loss": 0.0071,
      "step": 2067700
    },
    {
      "epoch": 3.3838691306140887,
      "grad_norm": 0.20794734358787537,
      "learning_rate": 3.2535598268352632e-06,
      "loss": 0.0102,
      "step": 2067720
    },
    {
      "epoch": 3.383901861052742,
      "grad_norm": 0.07665163278579712,
      "learning_rate": 3.253493934621746e-06,
      "loss": 0.0114,
      "step": 2067740
    },
    {
      "epoch": 3.3839345914913954,
      "grad_norm": 0.6506105065345764,
      "learning_rate": 3.2534280424082287e-06,
      "loss": 0.0107,
      "step": 2067760
    },
    {
      "epoch": 3.3839673219300486,
      "grad_norm": 0.10837998241186142,
      "learning_rate": 3.2533621501947115e-06,
      "loss": 0.0108,
      "step": 2067780
    },
    {
      "epoch": 3.3840000523687017,
      "grad_norm": 0.14734970033168793,
      "learning_rate": 3.2532962579811946e-06,
      "loss": 0.0158,
      "step": 2067800
    },
    {
      "epoch": 3.3840327828073553,
      "grad_norm": 0.5619598031044006,
      "learning_rate": 3.2532303657676774e-06,
      "loss": 0.0191,
      "step": 2067820
    },
    {
      "epoch": 3.3840655132460085,
      "grad_norm": 0.2470177710056305,
      "learning_rate": 3.25316447355416e-06,
      "loss": 0.0119,
      "step": 2067840
    },
    {
      "epoch": 3.384098243684662,
      "grad_norm": 0.15589503943920135,
      "learning_rate": 3.2530985813406437e-06,
      "loss": 0.0119,
      "step": 2067860
    },
    {
      "epoch": 3.384130974123315,
      "grad_norm": 0.476125031709671,
      "learning_rate": 3.2530326891271264e-06,
      "loss": 0.0138,
      "step": 2067880
    },
    {
      "epoch": 3.3841637045619684,
      "grad_norm": 0.13193176686763763,
      "learning_rate": 3.252966796913609e-06,
      "loss": 0.0158,
      "step": 2067900
    },
    {
      "epoch": 3.384196435000622,
      "grad_norm": 0.4318583309650421,
      "learning_rate": 3.252900904700092e-06,
      "loss": 0.0082,
      "step": 2067920
    },
    {
      "epoch": 3.384229165439275,
      "grad_norm": 0.28128284215927124,
      "learning_rate": 3.2528350124865746e-06,
      "loss": 0.013,
      "step": 2067940
    },
    {
      "epoch": 3.3842618958779287,
      "grad_norm": 0.1091679185628891,
      "learning_rate": 3.252769120273058e-06,
      "loss": 0.0122,
      "step": 2067960
    },
    {
      "epoch": 3.384294626316582,
      "grad_norm": 0.11493130773305893,
      "learning_rate": 3.2527032280595406e-06,
      "loss": 0.0097,
      "step": 2067980
    },
    {
      "epoch": 3.384327356755235,
      "grad_norm": 0.5646732449531555,
      "learning_rate": 3.2526373358460233e-06,
      "loss": 0.0132,
      "step": 2068000
    },
    {
      "epoch": 3.3843600871938886,
      "grad_norm": 0.37604284286499023,
      "learning_rate": 3.252571443632506e-06,
      "loss": 0.0166,
      "step": 2068020
    },
    {
      "epoch": 3.3843928176325417,
      "grad_norm": 0.7092586159706116,
      "learning_rate": 3.252505551418989e-06,
      "loss": 0.0087,
      "step": 2068040
    },
    {
      "epoch": 3.3844255480711953,
      "grad_norm": 0.2759876251220703,
      "learning_rate": 3.252439659205472e-06,
      "loss": 0.0111,
      "step": 2068060
    },
    {
      "epoch": 3.3844582785098485,
      "grad_norm": 0.17991535365581512,
      "learning_rate": 3.2523737669919547e-06,
      "loss": 0.0084,
      "step": 2068080
    },
    {
      "epoch": 3.384491008948502,
      "grad_norm": 0.3742021322250366,
      "learning_rate": 3.2523078747784374e-06,
      "loss": 0.014,
      "step": 2068100
    },
    {
      "epoch": 3.3845237393871552,
      "grad_norm": 0.4457341134548187,
      "learning_rate": 3.2522419825649206e-06,
      "loss": 0.0126,
      "step": 2068120
    },
    {
      "epoch": 3.3845564698258084,
      "grad_norm": 0.520420491695404,
      "learning_rate": 3.2521760903514033e-06,
      "loss": 0.0121,
      "step": 2068140
    },
    {
      "epoch": 3.384589200264462,
      "grad_norm": 0.18827803432941437,
      "learning_rate": 3.252110198137886e-06,
      "loss": 0.0153,
      "step": 2068160
    },
    {
      "epoch": 3.384621930703115,
      "grad_norm": 0.45228445529937744,
      "learning_rate": 3.252044305924369e-06,
      "loss": 0.0099,
      "step": 2068180
    },
    {
      "epoch": 3.3846546611417687,
      "grad_norm": 0.17934508621692657,
      "learning_rate": 3.2519784137108524e-06,
      "loss": 0.0104,
      "step": 2068200
    },
    {
      "epoch": 3.384687391580422,
      "grad_norm": 0.2109772115945816,
      "learning_rate": 3.251912521497335e-06,
      "loss": 0.0094,
      "step": 2068220
    },
    {
      "epoch": 3.3847201220190755,
      "grad_norm": 0.050655264407396317,
      "learning_rate": 3.251846629283818e-06,
      "loss": 0.0063,
      "step": 2068240
    },
    {
      "epoch": 3.3847528524577286,
      "grad_norm": 0.30688297748565674,
      "learning_rate": 3.251780737070301e-06,
      "loss": 0.0118,
      "step": 2068260
    },
    {
      "epoch": 3.3847855828963818,
      "grad_norm": 0.341612309217453,
      "learning_rate": 3.2517148448567838e-06,
      "loss": 0.011,
      "step": 2068280
    },
    {
      "epoch": 3.3848183133350354,
      "grad_norm": 0.10316460579633713,
      "learning_rate": 3.2516489526432665e-06,
      "loss": 0.0094,
      "step": 2068300
    },
    {
      "epoch": 3.3848510437736885,
      "grad_norm": 0.14046283066272736,
      "learning_rate": 3.2515830604297492e-06,
      "loss": 0.0105,
      "step": 2068320
    },
    {
      "epoch": 3.384883774212342,
      "grad_norm": 0.15629041194915771,
      "learning_rate": 3.2515171682162324e-06,
      "loss": 0.0132,
      "step": 2068340
    },
    {
      "epoch": 3.3849165046509953,
      "grad_norm": 0.4444364607334137,
      "learning_rate": 3.251451276002715e-06,
      "loss": 0.0118,
      "step": 2068360
    },
    {
      "epoch": 3.384949235089649,
      "grad_norm": 0.11971721798181534,
      "learning_rate": 3.251385383789198e-06,
      "loss": 0.0095,
      "step": 2068380
    },
    {
      "epoch": 3.384981965528302,
      "grad_norm": 0.32743269205093384,
      "learning_rate": 3.2513194915756806e-06,
      "loss": 0.0093,
      "step": 2068400
    },
    {
      "epoch": 3.385014695966955,
      "grad_norm": 0.14682486653327942,
      "learning_rate": 3.2512535993621634e-06,
      "loss": 0.0089,
      "step": 2068420
    },
    {
      "epoch": 3.3850474264056087,
      "grad_norm": 0.250313937664032,
      "learning_rate": 3.2511877071486465e-06,
      "loss": 0.0095,
      "step": 2068440
    },
    {
      "epoch": 3.385080156844262,
      "grad_norm": 0.5489722490310669,
      "learning_rate": 3.2511218149351293e-06,
      "loss": 0.0115,
      "step": 2068460
    },
    {
      "epoch": 3.3851128872829155,
      "grad_norm": 0.5447409749031067,
      "learning_rate": 3.251055922721612e-06,
      "loss": 0.0101,
      "step": 2068480
    },
    {
      "epoch": 3.3851456177215686,
      "grad_norm": 0.4710496962070465,
      "learning_rate": 3.2509900305080948e-06,
      "loss": 0.0135,
      "step": 2068500
    },
    {
      "epoch": 3.3851783481602222,
      "grad_norm": 0.28238654136657715,
      "learning_rate": 3.250924138294578e-06,
      "loss": 0.0158,
      "step": 2068520
    },
    {
      "epoch": 3.3852110785988754,
      "grad_norm": 1.1750410795211792,
      "learning_rate": 3.2508582460810607e-06,
      "loss": 0.011,
      "step": 2068540
    },
    {
      "epoch": 3.3852438090375285,
      "grad_norm": 0.9622818231582642,
      "learning_rate": 3.250792353867544e-06,
      "loss": 0.017,
      "step": 2068560
    },
    {
      "epoch": 3.385276539476182,
      "grad_norm": 0.8043776154518127,
      "learning_rate": 3.250726461654027e-06,
      "loss": 0.0127,
      "step": 2068580
    },
    {
      "epoch": 3.3853092699148353,
      "grad_norm": 0.24684074521064758,
      "learning_rate": 3.2506605694405097e-06,
      "loss": 0.0092,
      "step": 2068600
    },
    {
      "epoch": 3.385342000353489,
      "grad_norm": 0.6877543926239014,
      "learning_rate": 3.2505946772269925e-06,
      "loss": 0.0096,
      "step": 2068620
    },
    {
      "epoch": 3.385374730792142,
      "grad_norm": 0.5467416644096375,
      "learning_rate": 3.250528785013475e-06,
      "loss": 0.011,
      "step": 2068640
    },
    {
      "epoch": 3.3854074612307956,
      "grad_norm": 0.18104733526706696,
      "learning_rate": 3.2504628927999584e-06,
      "loss": 0.0119,
      "step": 2068660
    },
    {
      "epoch": 3.3854401916694488,
      "grad_norm": 0.09002672880887985,
      "learning_rate": 3.250397000586441e-06,
      "loss": 0.0148,
      "step": 2068680
    },
    {
      "epoch": 3.385472922108102,
      "grad_norm": 0.7512427568435669,
      "learning_rate": 3.250331108372924e-06,
      "loss": 0.0092,
      "step": 2068700
    },
    {
      "epoch": 3.3855056525467555,
      "grad_norm": 0.2055681049823761,
      "learning_rate": 3.2502652161594066e-06,
      "loss": 0.0099,
      "step": 2068720
    },
    {
      "epoch": 3.3855383829854087,
      "grad_norm": 0.3586157262325287,
      "learning_rate": 3.2501993239458897e-06,
      "loss": 0.013,
      "step": 2068740
    },
    {
      "epoch": 3.3855711134240623,
      "grad_norm": 0.3362131714820862,
      "learning_rate": 3.2501334317323725e-06,
      "loss": 0.0118,
      "step": 2068760
    },
    {
      "epoch": 3.3856038438627154,
      "grad_norm": 0.2915828227996826,
      "learning_rate": 3.2500675395188552e-06,
      "loss": 0.0079,
      "step": 2068780
    },
    {
      "epoch": 3.385636574301369,
      "grad_norm": 0.27734628319740295,
      "learning_rate": 3.250001647305338e-06,
      "loss": 0.0135,
      "step": 2068800
    },
    {
      "epoch": 3.385669304740022,
      "grad_norm": 0.36855441331863403,
      "learning_rate": 3.2499357550918207e-06,
      "loss": 0.0122,
      "step": 2068820
    },
    {
      "epoch": 3.3857020351786753,
      "grad_norm": 0.3013194799423218,
      "learning_rate": 3.249869862878304e-06,
      "loss": 0.0126,
      "step": 2068840
    },
    {
      "epoch": 3.385734765617329,
      "grad_norm": 0.153254434466362,
      "learning_rate": 3.2498039706647866e-06,
      "loss": 0.0156,
      "step": 2068860
    },
    {
      "epoch": 3.385767496055982,
      "grad_norm": 0.24065394699573517,
      "learning_rate": 3.2497380784512693e-06,
      "loss": 0.0146,
      "step": 2068880
    },
    {
      "epoch": 3.3858002264946356,
      "grad_norm": 0.7824857234954834,
      "learning_rate": 3.249672186237753e-06,
      "loss": 0.0092,
      "step": 2068900
    },
    {
      "epoch": 3.385832956933289,
      "grad_norm": 0.12644115090370178,
      "learning_rate": 3.2496062940242357e-06,
      "loss": 0.015,
      "step": 2068920
    },
    {
      "epoch": 3.3858656873719424,
      "grad_norm": 0.17040526866912842,
      "learning_rate": 3.2495404018107184e-06,
      "loss": 0.0102,
      "step": 2068940
    },
    {
      "epoch": 3.3858984178105955,
      "grad_norm": 0.3581697344779968,
      "learning_rate": 3.249474509597201e-06,
      "loss": 0.0097,
      "step": 2068960
    },
    {
      "epoch": 3.3859311482492487,
      "grad_norm": 0.1860002875328064,
      "learning_rate": 3.2494086173836843e-06,
      "loss": 0.0106,
      "step": 2068980
    },
    {
      "epoch": 3.3859638786879023,
      "grad_norm": 0.1950446367263794,
      "learning_rate": 3.249342725170167e-06,
      "loss": 0.0101,
      "step": 2069000
    },
    {
      "epoch": 3.3859966091265554,
      "grad_norm": 0.22009745240211487,
      "learning_rate": 3.24927683295665e-06,
      "loss": 0.0103,
      "step": 2069020
    },
    {
      "epoch": 3.386029339565209,
      "grad_norm": 0.14433607459068298,
      "learning_rate": 3.2492109407431325e-06,
      "loss": 0.0158,
      "step": 2069040
    },
    {
      "epoch": 3.386062070003862,
      "grad_norm": 1.0069501399993896,
      "learning_rate": 3.2491450485296157e-06,
      "loss": 0.0118,
      "step": 2069060
    },
    {
      "epoch": 3.3860948004425158,
      "grad_norm": 0.3721854090690613,
      "learning_rate": 3.2490791563160984e-06,
      "loss": 0.0073,
      "step": 2069080
    },
    {
      "epoch": 3.386127530881169,
      "grad_norm": 0.09705840051174164,
      "learning_rate": 3.249013264102581e-06,
      "loss": 0.0133,
      "step": 2069100
    },
    {
      "epoch": 3.386160261319822,
      "grad_norm": 0.19985555112361908,
      "learning_rate": 3.248947371889064e-06,
      "loss": 0.0093,
      "step": 2069120
    },
    {
      "epoch": 3.3861929917584757,
      "grad_norm": 0.24877214431762695,
      "learning_rate": 3.248881479675547e-06,
      "loss": 0.0132,
      "step": 2069140
    },
    {
      "epoch": 3.386225722197129,
      "grad_norm": 0.6681997179985046,
      "learning_rate": 3.24881558746203e-06,
      "loss": 0.012,
      "step": 2069160
    },
    {
      "epoch": 3.3862584526357824,
      "grad_norm": 1.6642802953720093,
      "learning_rate": 3.2487496952485126e-06,
      "loss": 0.0107,
      "step": 2069180
    },
    {
      "epoch": 3.3862911830744356,
      "grad_norm": 0.17976437509059906,
      "learning_rate": 3.2486838030349953e-06,
      "loss": 0.0107,
      "step": 2069200
    },
    {
      "epoch": 3.386323913513089,
      "grad_norm": 0.5292035937309265,
      "learning_rate": 3.2486179108214785e-06,
      "loss": 0.0108,
      "step": 2069220
    },
    {
      "epoch": 3.3863566439517423,
      "grad_norm": 0.3106590211391449,
      "learning_rate": 3.248552018607961e-06,
      "loss": 0.0101,
      "step": 2069240
    },
    {
      "epoch": 3.3863893743903954,
      "grad_norm": 0.09811894595623016,
      "learning_rate": 3.2484861263944444e-06,
      "loss": 0.0099,
      "step": 2069260
    },
    {
      "epoch": 3.386422104829049,
      "grad_norm": 0.09808927029371262,
      "learning_rate": 3.2484202341809275e-06,
      "loss": 0.0084,
      "step": 2069280
    },
    {
      "epoch": 3.386454835267702,
      "grad_norm": 0.38288965821266174,
      "learning_rate": 3.2483543419674103e-06,
      "loss": 0.0124,
      "step": 2069300
    },
    {
      "epoch": 3.386487565706356,
      "grad_norm": 0.02954534813761711,
      "learning_rate": 3.248288449753893e-06,
      "loss": 0.016,
      "step": 2069320
    },
    {
      "epoch": 3.386520296145009,
      "grad_norm": 0.36628031730651855,
      "learning_rate": 3.2482225575403757e-06,
      "loss": 0.0133,
      "step": 2069340
    },
    {
      "epoch": 3.386553026583662,
      "grad_norm": 0.14350417256355286,
      "learning_rate": 3.2481566653268585e-06,
      "loss": 0.0097,
      "step": 2069360
    },
    {
      "epoch": 3.3865857570223157,
      "grad_norm": 0.22360827028751373,
      "learning_rate": 3.2480907731133417e-06,
      "loss": 0.0125,
      "step": 2069380
    },
    {
      "epoch": 3.386618487460969,
      "grad_norm": 0.4513370394706726,
      "learning_rate": 3.2480248808998244e-06,
      "loss": 0.015,
      "step": 2069400
    },
    {
      "epoch": 3.3866512178996224,
      "grad_norm": 0.2881884276866913,
      "learning_rate": 3.247958988686307e-06,
      "loss": 0.0227,
      "step": 2069420
    },
    {
      "epoch": 3.3866839483382756,
      "grad_norm": 0.26918020844459534,
      "learning_rate": 3.24789309647279e-06,
      "loss": 0.0123,
      "step": 2069440
    },
    {
      "epoch": 3.3867166787769287,
      "grad_norm": 0.2544373571872711,
      "learning_rate": 3.247827204259273e-06,
      "loss": 0.0139,
      "step": 2069460
    },
    {
      "epoch": 3.3867494092155823,
      "grad_norm": 0.10163771361112595,
      "learning_rate": 3.2477613120457558e-06,
      "loss": 0.0089,
      "step": 2069480
    },
    {
      "epoch": 3.3867821396542355,
      "grad_norm": 0.09776312857866287,
      "learning_rate": 3.2476954198322385e-06,
      "loss": 0.0085,
      "step": 2069500
    },
    {
      "epoch": 3.386814870092889,
      "grad_norm": 1.1465251445770264,
      "learning_rate": 3.2476295276187213e-06,
      "loss": 0.0149,
      "step": 2069520
    },
    {
      "epoch": 3.386847600531542,
      "grad_norm": 0.22527103126049042,
      "learning_rate": 3.2475636354052044e-06,
      "loss": 0.0116,
      "step": 2069540
    },
    {
      "epoch": 3.386880330970196,
      "grad_norm": 0.17351701855659485,
      "learning_rate": 3.247497743191687e-06,
      "loss": 0.0113,
      "step": 2069560
    },
    {
      "epoch": 3.386913061408849,
      "grad_norm": 0.4581925868988037,
      "learning_rate": 3.24743185097817e-06,
      "loss": 0.0113,
      "step": 2069580
    },
    {
      "epoch": 3.386945791847502,
      "grad_norm": 0.20180004835128784,
      "learning_rate": 3.2473659587646526e-06,
      "loss": 0.0139,
      "step": 2069600
    },
    {
      "epoch": 3.3869785222861557,
      "grad_norm": 0.2573484778404236,
      "learning_rate": 3.2473000665511362e-06,
      "loss": 0.0097,
      "step": 2069620
    },
    {
      "epoch": 3.387011252724809,
      "grad_norm": 0.09852313995361328,
      "learning_rate": 3.247234174337619e-06,
      "loss": 0.0108,
      "step": 2069640
    },
    {
      "epoch": 3.3870439831634624,
      "grad_norm": 0.343036949634552,
      "learning_rate": 3.2471682821241017e-06,
      "loss": 0.016,
      "step": 2069660
    },
    {
      "epoch": 3.3870767136021156,
      "grad_norm": 0.28298211097717285,
      "learning_rate": 3.247102389910585e-06,
      "loss": 0.0135,
      "step": 2069680
    },
    {
      "epoch": 3.387109444040769,
      "grad_norm": 0.14072227478027344,
      "learning_rate": 3.2470364976970676e-06,
      "loss": 0.0121,
      "step": 2069700
    },
    {
      "epoch": 3.3871421744794223,
      "grad_norm": 1.619238257408142,
      "learning_rate": 3.2469706054835503e-06,
      "loss": 0.0123,
      "step": 2069720
    },
    {
      "epoch": 3.3871749049180755,
      "grad_norm": 0.624445378780365,
      "learning_rate": 3.246904713270033e-06,
      "loss": 0.018,
      "step": 2069740
    },
    {
      "epoch": 3.387207635356729,
      "grad_norm": 0.23728236556053162,
      "learning_rate": 3.2468388210565162e-06,
      "loss": 0.0086,
      "step": 2069760
    },
    {
      "epoch": 3.3872403657953822,
      "grad_norm": 0.33404773473739624,
      "learning_rate": 3.246772928842999e-06,
      "loss": 0.0085,
      "step": 2069780
    },
    {
      "epoch": 3.387273096234036,
      "grad_norm": 0.45241978764533997,
      "learning_rate": 3.2467070366294817e-06,
      "loss": 0.0061,
      "step": 2069800
    },
    {
      "epoch": 3.387305826672689,
      "grad_norm": 0.05277376249432564,
      "learning_rate": 3.2466411444159645e-06,
      "loss": 0.0127,
      "step": 2069820
    },
    {
      "epoch": 3.3873385571113426,
      "grad_norm": 0.2000085413455963,
      "learning_rate": 3.246575252202447e-06,
      "loss": 0.0124,
      "step": 2069840
    },
    {
      "epoch": 3.3873712875499957,
      "grad_norm": 0.15125538408756256,
      "learning_rate": 3.2465093599889304e-06,
      "loss": 0.0167,
      "step": 2069860
    },
    {
      "epoch": 3.387404017988649,
      "grad_norm": 0.11096956580877304,
      "learning_rate": 3.246443467775413e-06,
      "loss": 0.0088,
      "step": 2069880
    },
    {
      "epoch": 3.3874367484273025,
      "grad_norm": 0.08623745292425156,
      "learning_rate": 3.246377575561896e-06,
      "loss": 0.0079,
      "step": 2069900
    },
    {
      "epoch": 3.3874694788659556,
      "grad_norm": 0.22087518870830536,
      "learning_rate": 3.2463116833483786e-06,
      "loss": 0.007,
      "step": 2069920
    },
    {
      "epoch": 3.387502209304609,
      "grad_norm": 0.1159949004650116,
      "learning_rate": 3.2462457911348618e-06,
      "loss": 0.019,
      "step": 2069940
    },
    {
      "epoch": 3.3875349397432624,
      "grad_norm": 0.5876510739326477,
      "learning_rate": 3.246179898921345e-06,
      "loss": 0.0097,
      "step": 2069960
    },
    {
      "epoch": 3.387567670181916,
      "grad_norm": 0.22702616453170776,
      "learning_rate": 3.2461140067078277e-06,
      "loss": 0.0121,
      "step": 2069980
    },
    {
      "epoch": 3.387600400620569,
      "grad_norm": 0.193526953458786,
      "learning_rate": 3.246048114494311e-06,
      "loss": 0.0131,
      "step": 2070000
    },
    {
      "epoch": 3.3876331310592223,
      "grad_norm": 0.18108902871608734,
      "learning_rate": 3.2459822222807936e-06,
      "loss": 0.0145,
      "step": 2070020
    },
    {
      "epoch": 3.387665861497876,
      "grad_norm": 0.16987666487693787,
      "learning_rate": 3.2459163300672763e-06,
      "loss": 0.0139,
      "step": 2070040
    },
    {
      "epoch": 3.387698591936529,
      "grad_norm": 0.3464086949825287,
      "learning_rate": 3.245850437853759e-06,
      "loss": 0.0125,
      "step": 2070060
    },
    {
      "epoch": 3.3877313223751826,
      "grad_norm": 0.046551913022994995,
      "learning_rate": 3.245784545640242e-06,
      "loss": 0.0153,
      "step": 2070080
    },
    {
      "epoch": 3.3877640528138357,
      "grad_norm": 0.12734933197498322,
      "learning_rate": 3.245718653426725e-06,
      "loss": 0.0069,
      "step": 2070100
    },
    {
      "epoch": 3.3877967832524893,
      "grad_norm": 0.17029277980327606,
      "learning_rate": 3.2456527612132077e-06,
      "loss": 0.0126,
      "step": 2070120
    },
    {
      "epoch": 3.3878295136911425,
      "grad_norm": 0.38604670763015747,
      "learning_rate": 3.2455868689996904e-06,
      "loss": 0.0102,
      "step": 2070140
    },
    {
      "epoch": 3.3878622441297956,
      "grad_norm": 0.1835833340883255,
      "learning_rate": 3.2455209767861736e-06,
      "loss": 0.0099,
      "step": 2070160
    },
    {
      "epoch": 3.3878949745684492,
      "grad_norm": 0.08620580285787582,
      "learning_rate": 3.2454550845726563e-06,
      "loss": 0.01,
      "step": 2070180
    },
    {
      "epoch": 3.3879277050071024,
      "grad_norm": 0.5214924812316895,
      "learning_rate": 3.245389192359139e-06,
      "loss": 0.0111,
      "step": 2070200
    },
    {
      "epoch": 3.387960435445756,
      "grad_norm": 0.42797765135765076,
      "learning_rate": 3.245323300145622e-06,
      "loss": 0.0087,
      "step": 2070220
    },
    {
      "epoch": 3.387993165884409,
      "grad_norm": 0.21414712071418762,
      "learning_rate": 3.2452574079321045e-06,
      "loss": 0.0108,
      "step": 2070240
    },
    {
      "epoch": 3.3880258963230627,
      "grad_norm": 0.13491050899028778,
      "learning_rate": 3.2451915157185877e-06,
      "loss": 0.0091,
      "step": 2070260
    },
    {
      "epoch": 3.388058626761716,
      "grad_norm": 0.5641882419586182,
      "learning_rate": 3.2451256235050704e-06,
      "loss": 0.0121,
      "step": 2070280
    },
    {
      "epoch": 3.388091357200369,
      "grad_norm": 0.4630547761917114,
      "learning_rate": 3.245059731291553e-06,
      "loss": 0.013,
      "step": 2070300
    },
    {
      "epoch": 3.3881240876390226,
      "grad_norm": 0.212661474943161,
      "learning_rate": 3.2449938390780368e-06,
      "loss": 0.0078,
      "step": 2070320
    },
    {
      "epoch": 3.3881568180776758,
      "grad_norm": 0.38126426935195923,
      "learning_rate": 3.2449279468645195e-06,
      "loss": 0.0093,
      "step": 2070340
    },
    {
      "epoch": 3.3881895485163294,
      "grad_norm": 0.3423089385032654,
      "learning_rate": 3.2448620546510023e-06,
      "loss": 0.009,
      "step": 2070360
    },
    {
      "epoch": 3.3882222789549825,
      "grad_norm": 0.3053722083568573,
      "learning_rate": 3.244796162437485e-06,
      "loss": 0.0109,
      "step": 2070380
    },
    {
      "epoch": 3.388255009393636,
      "grad_norm": 0.0436752550303936,
      "learning_rate": 3.244730270223968e-06,
      "loss": 0.015,
      "step": 2070400
    },
    {
      "epoch": 3.3882877398322893,
      "grad_norm": 0.38993921875953674,
      "learning_rate": 3.244664378010451e-06,
      "loss": 0.0104,
      "step": 2070420
    },
    {
      "epoch": 3.3883204702709424,
      "grad_norm": 0.10185171663761139,
      "learning_rate": 3.2445984857969336e-06,
      "loss": 0.0072,
      "step": 2070440
    },
    {
      "epoch": 3.388353200709596,
      "grad_norm": 0.054193418473005295,
      "learning_rate": 3.2445325935834164e-06,
      "loss": 0.0101,
      "step": 2070460
    },
    {
      "epoch": 3.388385931148249,
      "grad_norm": 0.3311351537704468,
      "learning_rate": 3.2444667013698995e-06,
      "loss": 0.0092,
      "step": 2070480
    },
    {
      "epoch": 3.3884186615869027,
      "grad_norm": 0.2582881450653076,
      "learning_rate": 3.2444008091563823e-06,
      "loss": 0.0141,
      "step": 2070500
    },
    {
      "epoch": 3.388451392025556,
      "grad_norm": 0.15231002867221832,
      "learning_rate": 3.244334916942865e-06,
      "loss": 0.0145,
      "step": 2070520
    },
    {
      "epoch": 3.3884841224642095,
      "grad_norm": 0.22432267665863037,
      "learning_rate": 3.2442690247293478e-06,
      "loss": 0.0097,
      "step": 2070540
    },
    {
      "epoch": 3.3885168529028626,
      "grad_norm": 0.18432657420635223,
      "learning_rate": 3.244203132515831e-06,
      "loss": 0.0072,
      "step": 2070560
    },
    {
      "epoch": 3.388549583341516,
      "grad_norm": 0.11788682639598846,
      "learning_rate": 3.2441372403023137e-06,
      "loss": 0.0072,
      "step": 2070580
    },
    {
      "epoch": 3.3885823137801694,
      "grad_norm": 0.17971983551979065,
      "learning_rate": 3.2440713480887964e-06,
      "loss": 0.0115,
      "step": 2070600
    },
    {
      "epoch": 3.3886150442188225,
      "grad_norm": 0.18449720740318298,
      "learning_rate": 3.244005455875279e-06,
      "loss": 0.0087,
      "step": 2070620
    },
    {
      "epoch": 3.388647774657476,
      "grad_norm": 0.1179221048951149,
      "learning_rate": 3.2439395636617623e-06,
      "loss": 0.0105,
      "step": 2070640
    },
    {
      "epoch": 3.3886805050961293,
      "grad_norm": 0.22214138507843018,
      "learning_rate": 3.2438736714482455e-06,
      "loss": 0.0097,
      "step": 2070660
    },
    {
      "epoch": 3.388713235534783,
      "grad_norm": 0.22519409656524658,
      "learning_rate": 3.243807779234728e-06,
      "loss": 0.0112,
      "step": 2070680
    },
    {
      "epoch": 3.388745965973436,
      "grad_norm": 0.14487604796886444,
      "learning_rate": 3.2437418870212114e-06,
      "loss": 0.0099,
      "step": 2070700
    },
    {
      "epoch": 3.388778696412089,
      "grad_norm": 0.3802381753921509,
      "learning_rate": 3.243675994807694e-06,
      "loss": 0.0099,
      "step": 2070720
    },
    {
      "epoch": 3.3888114268507428,
      "grad_norm": 0.19100964069366455,
      "learning_rate": 3.243610102594177e-06,
      "loss": 0.0076,
      "step": 2070740
    },
    {
      "epoch": 3.388844157289396,
      "grad_norm": 0.24405387043952942,
      "learning_rate": 3.2435442103806596e-06,
      "loss": 0.0104,
      "step": 2070760
    },
    {
      "epoch": 3.3888768877280495,
      "grad_norm": 1.0200529098510742,
      "learning_rate": 3.2434783181671423e-06,
      "loss": 0.011,
      "step": 2070780
    },
    {
      "epoch": 3.3889096181667027,
      "grad_norm": 0.14347709715366364,
      "learning_rate": 3.2434124259536255e-06,
      "loss": 0.0134,
      "step": 2070800
    },
    {
      "epoch": 3.3889423486053563,
      "grad_norm": 0.5410715937614441,
      "learning_rate": 3.2433465337401082e-06,
      "loss": 0.0136,
      "step": 2070820
    },
    {
      "epoch": 3.3889750790440094,
      "grad_norm": 0.08826041221618652,
      "learning_rate": 3.243280641526591e-06,
      "loss": 0.0105,
      "step": 2070840
    },
    {
      "epoch": 3.3890078094826626,
      "grad_norm": 0.45337429642677307,
      "learning_rate": 3.2432147493130737e-06,
      "loss": 0.0135,
      "step": 2070860
    },
    {
      "epoch": 3.389040539921316,
      "grad_norm": 0.16579653322696686,
      "learning_rate": 3.243148857099557e-06,
      "loss": 0.0114,
      "step": 2070880
    },
    {
      "epoch": 3.3890732703599693,
      "grad_norm": 0.10446041077375412,
      "learning_rate": 3.2430829648860396e-06,
      "loss": 0.0129,
      "step": 2070900
    },
    {
      "epoch": 3.3891060007986225,
      "grad_norm": 0.14023178815841675,
      "learning_rate": 3.2430170726725224e-06,
      "loss": 0.0105,
      "step": 2070920
    },
    {
      "epoch": 3.389138731237276,
      "grad_norm": 0.35576751828193665,
      "learning_rate": 3.242951180459005e-06,
      "loss": 0.0111,
      "step": 2070940
    },
    {
      "epoch": 3.389171461675929,
      "grad_norm": 0.37794315814971924,
      "learning_rate": 3.2428852882454883e-06,
      "loss": 0.0088,
      "step": 2070960
    },
    {
      "epoch": 3.389204192114583,
      "grad_norm": 0.5556667447090149,
      "learning_rate": 3.242819396031971e-06,
      "loss": 0.0116,
      "step": 2070980
    },
    {
      "epoch": 3.389236922553236,
      "grad_norm": 0.5783346891403198,
      "learning_rate": 3.2427535038184537e-06,
      "loss": 0.0117,
      "step": 2071000
    },
    {
      "epoch": 3.3892696529918895,
      "grad_norm": 0.4032716453075409,
      "learning_rate": 3.2426876116049373e-06,
      "loss": 0.0119,
      "step": 2071020
    },
    {
      "epoch": 3.3893023834305427,
      "grad_norm": 0.20246267318725586,
      "learning_rate": 3.24262171939142e-06,
      "loss": 0.0083,
      "step": 2071040
    },
    {
      "epoch": 3.389335113869196,
      "grad_norm": 0.7783786058425903,
      "learning_rate": 3.242555827177903e-06,
      "loss": 0.0088,
      "step": 2071060
    },
    {
      "epoch": 3.3893678443078494,
      "grad_norm": 0.21399064362049103,
      "learning_rate": 3.2424899349643855e-06,
      "loss": 0.0108,
      "step": 2071080
    },
    {
      "epoch": 3.3894005747465026,
      "grad_norm": 0.6626374125480652,
      "learning_rate": 3.2424240427508687e-06,
      "loss": 0.0104,
      "step": 2071100
    },
    {
      "epoch": 3.389433305185156,
      "grad_norm": 0.361260324716568,
      "learning_rate": 3.2423581505373514e-06,
      "loss": 0.0098,
      "step": 2071120
    },
    {
      "epoch": 3.3894660356238093,
      "grad_norm": 0.16796675324440002,
      "learning_rate": 3.242292258323834e-06,
      "loss": 0.0094,
      "step": 2071140
    },
    {
      "epoch": 3.389498766062463,
      "grad_norm": 0.789581298828125,
      "learning_rate": 3.242226366110317e-06,
      "loss": 0.0143,
      "step": 2071160
    },
    {
      "epoch": 3.389531496501116,
      "grad_norm": 0.09216057509183884,
      "learning_rate": 3.2421604738968e-06,
      "loss": 0.0134,
      "step": 2071180
    },
    {
      "epoch": 3.389564226939769,
      "grad_norm": 0.12232620269060135,
      "learning_rate": 3.242094581683283e-06,
      "loss": 0.0082,
      "step": 2071200
    },
    {
      "epoch": 3.389596957378423,
      "grad_norm": 0.11414933949708939,
      "learning_rate": 3.2420286894697656e-06,
      "loss": 0.0094,
      "step": 2071220
    },
    {
      "epoch": 3.389629687817076,
      "grad_norm": 0.5032145977020264,
      "learning_rate": 3.2419627972562483e-06,
      "loss": 0.0087,
      "step": 2071240
    },
    {
      "epoch": 3.3896624182557296,
      "grad_norm": 0.388290673494339,
      "learning_rate": 3.241896905042731e-06,
      "loss": 0.0121,
      "step": 2071260
    },
    {
      "epoch": 3.3896951486943827,
      "grad_norm": 0.2772996425628662,
      "learning_rate": 3.241831012829214e-06,
      "loss": 0.0088,
      "step": 2071280
    },
    {
      "epoch": 3.3897278791330363,
      "grad_norm": 0.10507436841726303,
      "learning_rate": 3.241765120615697e-06,
      "loss": 0.0142,
      "step": 2071300
    },
    {
      "epoch": 3.3897606095716895,
      "grad_norm": 0.7791433930397034,
      "learning_rate": 3.2416992284021797e-06,
      "loss": 0.0148,
      "step": 2071320
    },
    {
      "epoch": 3.3897933400103426,
      "grad_norm": 0.1473681628704071,
      "learning_rate": 3.2416333361886624e-06,
      "loss": 0.0123,
      "step": 2071340
    },
    {
      "epoch": 3.389826070448996,
      "grad_norm": 0.445094496011734,
      "learning_rate": 3.2415674439751456e-06,
      "loss": 0.0119,
      "step": 2071360
    },
    {
      "epoch": 3.3898588008876493,
      "grad_norm": 0.4548981487751007,
      "learning_rate": 3.2415015517616288e-06,
      "loss": 0.0131,
      "step": 2071380
    },
    {
      "epoch": 3.389891531326303,
      "grad_norm": 0.3075682818889618,
      "learning_rate": 3.2414356595481115e-06,
      "loss": 0.0094,
      "step": 2071400
    },
    {
      "epoch": 3.389924261764956,
      "grad_norm": 0.2920781373977661,
      "learning_rate": 3.2413697673345947e-06,
      "loss": 0.0112,
      "step": 2071420
    },
    {
      "epoch": 3.3899569922036097,
      "grad_norm": 0.7121868133544922,
      "learning_rate": 3.2413038751210774e-06,
      "loss": 0.0153,
      "step": 2071440
    },
    {
      "epoch": 3.389989722642263,
      "grad_norm": 0.12471390515565872,
      "learning_rate": 3.24123798290756e-06,
      "loss": 0.0107,
      "step": 2071460
    },
    {
      "epoch": 3.390022453080916,
      "grad_norm": 0.5602366924285889,
      "learning_rate": 3.241172090694043e-06,
      "loss": 0.0094,
      "step": 2071480
    },
    {
      "epoch": 3.3900551835195696,
      "grad_norm": 1.7703537940979004,
      "learning_rate": 3.241106198480526e-06,
      "loss": 0.0139,
      "step": 2071500
    },
    {
      "epoch": 3.3900879139582227,
      "grad_norm": 0.20114484429359436,
      "learning_rate": 3.2410403062670088e-06,
      "loss": 0.0115,
      "step": 2071520
    },
    {
      "epoch": 3.3901206443968763,
      "grad_norm": 0.2862675189971924,
      "learning_rate": 3.2409744140534915e-06,
      "loss": 0.0149,
      "step": 2071540
    },
    {
      "epoch": 3.3901533748355295,
      "grad_norm": 0.18922732770442963,
      "learning_rate": 3.2409085218399743e-06,
      "loss": 0.0079,
      "step": 2071560
    },
    {
      "epoch": 3.390186105274183,
      "grad_norm": 0.20365259051322937,
      "learning_rate": 3.2408426296264574e-06,
      "loss": 0.0104,
      "step": 2071580
    },
    {
      "epoch": 3.390218835712836,
      "grad_norm": 0.6275648474693298,
      "learning_rate": 3.24077673741294e-06,
      "loss": 0.0177,
      "step": 2071600
    },
    {
      "epoch": 3.3902515661514894,
      "grad_norm": 0.0186956524848938,
      "learning_rate": 3.240710845199423e-06,
      "loss": 0.0102,
      "step": 2071620
    },
    {
      "epoch": 3.390284296590143,
      "grad_norm": 0.1326972246170044,
      "learning_rate": 3.2406449529859056e-06,
      "loss": 0.0145,
      "step": 2071640
    },
    {
      "epoch": 3.390317027028796,
      "grad_norm": 0.5448536276817322,
      "learning_rate": 3.240579060772389e-06,
      "loss": 0.0107,
      "step": 2071660
    },
    {
      "epoch": 3.3903497574674497,
      "grad_norm": 0.3610983192920685,
      "learning_rate": 3.2405131685588715e-06,
      "loss": 0.0115,
      "step": 2071680
    },
    {
      "epoch": 3.390382487906103,
      "grad_norm": 0.24787713587284088,
      "learning_rate": 3.2404472763453543e-06,
      "loss": 0.0088,
      "step": 2071700
    },
    {
      "epoch": 3.3904152183447565,
      "grad_norm": 0.20922285318374634,
      "learning_rate": 3.240381384131838e-06,
      "loss": 0.0104,
      "step": 2071720
    },
    {
      "epoch": 3.3904479487834096,
      "grad_norm": 0.43846842646598816,
      "learning_rate": 3.2403154919183206e-06,
      "loss": 0.0084,
      "step": 2071740
    },
    {
      "epoch": 3.3904806792220628,
      "grad_norm": 0.0845087319612503,
      "learning_rate": 3.2402495997048034e-06,
      "loss": 0.0118,
      "step": 2071760
    },
    {
      "epoch": 3.3905134096607163,
      "grad_norm": 0.18539009988307953,
      "learning_rate": 3.240183707491286e-06,
      "loss": 0.0086,
      "step": 2071780
    },
    {
      "epoch": 3.3905461400993695,
      "grad_norm": 0.2453550100326538,
      "learning_rate": 3.240117815277769e-06,
      "loss": 0.0134,
      "step": 2071800
    },
    {
      "epoch": 3.390578870538023,
      "grad_norm": 0.5432434678077698,
      "learning_rate": 3.240051923064252e-06,
      "loss": 0.0145,
      "step": 2071820
    },
    {
      "epoch": 3.3906116009766762,
      "grad_norm": 0.46287772059440613,
      "learning_rate": 3.2399860308507347e-06,
      "loss": 0.0138,
      "step": 2071840
    },
    {
      "epoch": 3.39064433141533,
      "grad_norm": 0.1464666724205017,
      "learning_rate": 3.2399201386372175e-06,
      "loss": 0.0099,
      "step": 2071860
    },
    {
      "epoch": 3.390677061853983,
      "grad_norm": 0.051658254116773605,
      "learning_rate": 3.2398542464237002e-06,
      "loss": 0.0094,
      "step": 2071880
    },
    {
      "epoch": 3.390709792292636,
      "grad_norm": 0.383706659078598,
      "learning_rate": 3.2397883542101834e-06,
      "loss": 0.0095,
      "step": 2071900
    },
    {
      "epoch": 3.3907425227312897,
      "grad_norm": 0.3083610236644745,
      "learning_rate": 3.239722461996666e-06,
      "loss": 0.0171,
      "step": 2071920
    },
    {
      "epoch": 3.390775253169943,
      "grad_norm": 0.42437899112701416,
      "learning_rate": 3.239656569783149e-06,
      "loss": 0.0115,
      "step": 2071940
    },
    {
      "epoch": 3.3908079836085965,
      "grad_norm": 0.21007972955703735,
      "learning_rate": 3.2395906775696316e-06,
      "loss": 0.0095,
      "step": 2071960
    },
    {
      "epoch": 3.3908407140472496,
      "grad_norm": 0.06803469359874725,
      "learning_rate": 3.2395247853561148e-06,
      "loss": 0.0091,
      "step": 2071980
    },
    {
      "epoch": 3.390873444485903,
      "grad_norm": 0.4138075113296509,
      "learning_rate": 3.2394588931425975e-06,
      "loss": 0.0154,
      "step": 2072000
    },
    {
      "epoch": 3.3909061749245564,
      "grad_norm": 0.9423007369041443,
      "learning_rate": 3.2393930009290802e-06,
      "loss": 0.0114,
      "step": 2072020
    },
    {
      "epoch": 3.3909389053632095,
      "grad_norm": 0.20699037611484528,
      "learning_rate": 3.239327108715563e-06,
      "loss": 0.0098,
      "step": 2072040
    },
    {
      "epoch": 3.390971635801863,
      "grad_norm": 0.5529443621635437,
      "learning_rate": 3.239261216502046e-06,
      "loss": 0.0116,
      "step": 2072060
    },
    {
      "epoch": 3.3910043662405163,
      "grad_norm": 0.1272912323474884,
      "learning_rate": 3.2391953242885293e-06,
      "loss": 0.0111,
      "step": 2072080
    },
    {
      "epoch": 3.39103709667917,
      "grad_norm": 0.19751077890396118,
      "learning_rate": 3.239129432075012e-06,
      "loss": 0.0173,
      "step": 2072100
    },
    {
      "epoch": 3.391069827117823,
      "grad_norm": 0.1999601125717163,
      "learning_rate": 3.239063539861495e-06,
      "loss": 0.0101,
      "step": 2072120
    },
    {
      "epoch": 3.3911025575564766,
      "grad_norm": 0.18002289533615112,
      "learning_rate": 3.238997647647978e-06,
      "loss": 0.0097,
      "step": 2072140
    },
    {
      "epoch": 3.3911352879951298,
      "grad_norm": 0.12147880345582962,
      "learning_rate": 3.2389317554344607e-06,
      "loss": 0.0095,
      "step": 2072160
    },
    {
      "epoch": 3.391168018433783,
      "grad_norm": 0.2076754868030548,
      "learning_rate": 3.2388658632209434e-06,
      "loss": 0.0094,
      "step": 2072180
    },
    {
      "epoch": 3.3912007488724365,
      "grad_norm": 0.27886590361595154,
      "learning_rate": 3.2387999710074266e-06,
      "loss": 0.0117,
      "step": 2072200
    },
    {
      "epoch": 3.3912334793110896,
      "grad_norm": 0.1934589445590973,
      "learning_rate": 3.2387340787939093e-06,
      "loss": 0.015,
      "step": 2072220
    },
    {
      "epoch": 3.3912662097497432,
      "grad_norm": 0.19250395894050598,
      "learning_rate": 3.238668186580392e-06,
      "loss": 0.01,
      "step": 2072240
    },
    {
      "epoch": 3.3912989401883964,
      "grad_norm": 0.3425980508327484,
      "learning_rate": 3.238602294366875e-06,
      "loss": 0.0114,
      "step": 2072260
    },
    {
      "epoch": 3.39133167062705,
      "grad_norm": 0.21477682888507843,
      "learning_rate": 3.2385364021533576e-06,
      "loss": 0.0113,
      "step": 2072280
    },
    {
      "epoch": 3.391364401065703,
      "grad_norm": 0.18888165056705475,
      "learning_rate": 3.2384705099398407e-06,
      "loss": 0.0142,
      "step": 2072300
    },
    {
      "epoch": 3.3913971315043563,
      "grad_norm": 0.2939850389957428,
      "learning_rate": 3.2384046177263235e-06,
      "loss": 0.0088,
      "step": 2072320
    },
    {
      "epoch": 3.39142986194301,
      "grad_norm": 0.46466100215911865,
      "learning_rate": 3.238338725512806e-06,
      "loss": 0.011,
      "step": 2072340
    },
    {
      "epoch": 3.391462592381663,
      "grad_norm": 0.12746678292751312,
      "learning_rate": 3.238272833299289e-06,
      "loss": 0.0123,
      "step": 2072360
    },
    {
      "epoch": 3.3914953228203166,
      "grad_norm": 0.3143260180950165,
      "learning_rate": 3.238206941085772e-06,
      "loss": 0.0115,
      "step": 2072380
    },
    {
      "epoch": 3.3915280532589698,
      "grad_norm": 0.2015143185853958,
      "learning_rate": 3.238141048872255e-06,
      "loss": 0.014,
      "step": 2072400
    },
    {
      "epoch": 3.391560783697623,
      "grad_norm": 0.48100560903549194,
      "learning_rate": 3.238075156658738e-06,
      "loss": 0.0116,
      "step": 2072420
    },
    {
      "epoch": 3.3915935141362765,
      "grad_norm": 0.3044551610946655,
      "learning_rate": 3.238009264445221e-06,
      "loss": 0.0135,
      "step": 2072440
    },
    {
      "epoch": 3.3916262445749297,
      "grad_norm": 0.18688294291496277,
      "learning_rate": 3.237943372231704e-06,
      "loss": 0.0114,
      "step": 2072460
    },
    {
      "epoch": 3.3916589750135833,
      "grad_norm": 0.23577147722244263,
      "learning_rate": 3.2378774800181866e-06,
      "loss": 0.012,
      "step": 2072480
    },
    {
      "epoch": 3.3916917054522364,
      "grad_norm": 0.33208078145980835,
      "learning_rate": 3.2378115878046694e-06,
      "loss": 0.0193,
      "step": 2072500
    },
    {
      "epoch": 3.3917244358908896,
      "grad_norm": 0.1503777951002121,
      "learning_rate": 3.2377456955911525e-06,
      "loss": 0.0095,
      "step": 2072520
    },
    {
      "epoch": 3.391757166329543,
      "grad_norm": 0.4772765636444092,
      "learning_rate": 3.2376798033776353e-06,
      "loss": 0.0072,
      "step": 2072540
    },
    {
      "epoch": 3.3917898967681963,
      "grad_norm": 0.21437491476535797,
      "learning_rate": 3.237613911164118e-06,
      "loss": 0.0123,
      "step": 2072560
    },
    {
      "epoch": 3.39182262720685,
      "grad_norm": 0.27031588554382324,
      "learning_rate": 3.2375480189506008e-06,
      "loss": 0.0121,
      "step": 2072580
    },
    {
      "epoch": 3.391855357645503,
      "grad_norm": 0.1740439385175705,
      "learning_rate": 3.237482126737084e-06,
      "loss": 0.0113,
      "step": 2072600
    },
    {
      "epoch": 3.3918880880841566,
      "grad_norm": 0.2327248603105545,
      "learning_rate": 3.2374162345235667e-06,
      "loss": 0.0113,
      "step": 2072620
    },
    {
      "epoch": 3.39192081852281,
      "grad_norm": 0.1409827172756195,
      "learning_rate": 3.2373503423100494e-06,
      "loss": 0.0145,
      "step": 2072640
    },
    {
      "epoch": 3.391953548961463,
      "grad_norm": 0.2376197725534439,
      "learning_rate": 3.237284450096532e-06,
      "loss": 0.0108,
      "step": 2072660
    },
    {
      "epoch": 3.3919862794001165,
      "grad_norm": 0.5594474077224731,
      "learning_rate": 3.237218557883015e-06,
      "loss": 0.0108,
      "step": 2072680
    },
    {
      "epoch": 3.3920190098387697,
      "grad_norm": 0.3403352200984955,
      "learning_rate": 3.237152665669498e-06,
      "loss": 0.008,
      "step": 2072700
    },
    {
      "epoch": 3.3920517402774233,
      "grad_norm": 0.17443813383579254,
      "learning_rate": 3.237086773455981e-06,
      "loss": 0.0183,
      "step": 2072720
    },
    {
      "epoch": 3.3920844707160764,
      "grad_norm": 0.2828233540058136,
      "learning_rate": 3.2370208812424635e-06,
      "loss": 0.0116,
      "step": 2072740
    },
    {
      "epoch": 3.39211720115473,
      "grad_norm": 0.27027714252471924,
      "learning_rate": 3.2369549890289463e-06,
      "loss": 0.0093,
      "step": 2072760
    },
    {
      "epoch": 3.392149931593383,
      "grad_norm": 0.2725699841976166,
      "learning_rate": 3.23688909681543e-06,
      "loss": 0.0126,
      "step": 2072780
    },
    {
      "epoch": 3.3921826620320363,
      "grad_norm": 0.6874226331710815,
      "learning_rate": 3.2368232046019126e-06,
      "loss": 0.009,
      "step": 2072800
    },
    {
      "epoch": 3.39221539247069,
      "grad_norm": 0.2533436417579651,
      "learning_rate": 3.2367573123883953e-06,
      "loss": 0.008,
      "step": 2072820
    },
    {
      "epoch": 3.392248122909343,
      "grad_norm": 0.7209683656692505,
      "learning_rate": 3.2366914201748785e-06,
      "loss": 0.0105,
      "step": 2072840
    },
    {
      "epoch": 3.3922808533479967,
      "grad_norm": 0.2228330373764038,
      "learning_rate": 3.2366255279613612e-06,
      "loss": 0.0099,
      "step": 2072860
    },
    {
      "epoch": 3.39231358378665,
      "grad_norm": 0.46203291416168213,
      "learning_rate": 3.236559635747844e-06,
      "loss": 0.017,
      "step": 2072880
    },
    {
      "epoch": 3.3923463142253034,
      "grad_norm": 0.506014883518219,
      "learning_rate": 3.2364937435343267e-06,
      "loss": 0.0162,
      "step": 2072900
    },
    {
      "epoch": 3.3923790446639566,
      "grad_norm": 0.37338364124298096,
      "learning_rate": 3.23642785132081e-06,
      "loss": 0.0104,
      "step": 2072920
    },
    {
      "epoch": 3.3924117751026097,
      "grad_norm": 0.26028868556022644,
      "learning_rate": 3.2363619591072926e-06,
      "loss": 0.0091,
      "step": 2072940
    },
    {
      "epoch": 3.3924445055412633,
      "grad_norm": 0.2705174684524536,
      "learning_rate": 3.2362960668937754e-06,
      "loss": 0.0071,
      "step": 2072960
    },
    {
      "epoch": 3.3924772359799165,
      "grad_norm": 0.9113501906394958,
      "learning_rate": 3.236230174680258e-06,
      "loss": 0.0148,
      "step": 2072980
    },
    {
      "epoch": 3.39250996641857,
      "grad_norm": 0.48101377487182617,
      "learning_rate": 3.2361642824667413e-06,
      "loss": 0.011,
      "step": 2073000
    },
    {
      "epoch": 3.392542696857223,
      "grad_norm": 0.17128072679042816,
      "learning_rate": 3.236098390253224e-06,
      "loss": 0.0161,
      "step": 2073020
    },
    {
      "epoch": 3.392575427295877,
      "grad_norm": 0.27741536498069763,
      "learning_rate": 3.2360324980397067e-06,
      "loss": 0.0092,
      "step": 2073040
    },
    {
      "epoch": 3.39260815773453,
      "grad_norm": 0.2398122102022171,
      "learning_rate": 3.2359666058261895e-06,
      "loss": 0.0131,
      "step": 2073060
    },
    {
      "epoch": 3.392640888173183,
      "grad_norm": 0.23850664496421814,
      "learning_rate": 3.2359007136126726e-06,
      "loss": 0.0124,
      "step": 2073080
    },
    {
      "epoch": 3.3926736186118367,
      "grad_norm": 0.508626401424408,
      "learning_rate": 3.2358348213991554e-06,
      "loss": 0.0122,
      "step": 2073100
    },
    {
      "epoch": 3.39270634905049,
      "grad_norm": 1.1719422340393066,
      "learning_rate": 3.2357689291856385e-06,
      "loss": 0.0202,
      "step": 2073120
    },
    {
      "epoch": 3.3927390794891434,
      "grad_norm": 0.2758263349533081,
      "learning_rate": 3.2357030369721217e-06,
      "loss": 0.0134,
      "step": 2073140
    },
    {
      "epoch": 3.3927718099277966,
      "grad_norm": 0.09310798346996307,
      "learning_rate": 3.2356371447586045e-06,
      "loss": 0.0091,
      "step": 2073160
    },
    {
      "epoch": 3.39280454036645,
      "grad_norm": 0.1747918725013733,
      "learning_rate": 3.235571252545087e-06,
      "loss": 0.0087,
      "step": 2073180
    },
    {
      "epoch": 3.3928372708051033,
      "grad_norm": 0.11844576150178909,
      "learning_rate": 3.23550536033157e-06,
      "loss": 0.0132,
      "step": 2073200
    },
    {
      "epoch": 3.3928700012437565,
      "grad_norm": 0.16574804484844208,
      "learning_rate": 3.2354394681180527e-06,
      "loss": 0.0161,
      "step": 2073220
    },
    {
      "epoch": 3.39290273168241,
      "grad_norm": 0.15638290345668793,
      "learning_rate": 3.235373575904536e-06,
      "loss": 0.0137,
      "step": 2073240
    },
    {
      "epoch": 3.3929354621210632,
      "grad_norm": 0.6722050309181213,
      "learning_rate": 3.2353076836910186e-06,
      "loss": 0.012,
      "step": 2073260
    },
    {
      "epoch": 3.392968192559717,
      "grad_norm": 0.05850541964173317,
      "learning_rate": 3.2352417914775013e-06,
      "loss": 0.0114,
      "step": 2073280
    },
    {
      "epoch": 3.39300092299837,
      "grad_norm": 0.41145166754722595,
      "learning_rate": 3.235175899263984e-06,
      "loss": 0.0121,
      "step": 2073300
    },
    {
      "epoch": 3.3930336534370236,
      "grad_norm": 0.13987036049365997,
      "learning_rate": 3.2351100070504672e-06,
      "loss": 0.0146,
      "step": 2073320
    },
    {
      "epoch": 3.3930663838756767,
      "grad_norm": 0.14789409935474396,
      "learning_rate": 3.23504411483695e-06,
      "loss": 0.0077,
      "step": 2073340
    },
    {
      "epoch": 3.39309911431433,
      "grad_norm": 0.2843519151210785,
      "learning_rate": 3.2349782226234327e-06,
      "loss": 0.0108,
      "step": 2073360
    },
    {
      "epoch": 3.3931318447529835,
      "grad_norm": 0.7311112284660339,
      "learning_rate": 3.2349123304099154e-06,
      "loss": 0.0154,
      "step": 2073380
    },
    {
      "epoch": 3.3931645751916366,
      "grad_norm": 0.1057475358247757,
      "learning_rate": 3.2348464381963986e-06,
      "loss": 0.0081,
      "step": 2073400
    },
    {
      "epoch": 3.39319730563029,
      "grad_norm": 0.04644829407334328,
      "learning_rate": 3.2347805459828813e-06,
      "loss": 0.0154,
      "step": 2073420
    },
    {
      "epoch": 3.3932300360689434,
      "grad_norm": 0.5523360371589661,
      "learning_rate": 3.234714653769364e-06,
      "loss": 0.0185,
      "step": 2073440
    },
    {
      "epoch": 3.393262766507597,
      "grad_norm": 0.14215584099292755,
      "learning_rate": 3.234648761555847e-06,
      "loss": 0.0104,
      "step": 2073460
    },
    {
      "epoch": 3.39329549694625,
      "grad_norm": 0.48268941044807434,
      "learning_rate": 3.2345828693423304e-06,
      "loss": 0.0098,
      "step": 2073480
    },
    {
      "epoch": 3.3933282273849033,
      "grad_norm": 0.4367384612560272,
      "learning_rate": 3.234516977128813e-06,
      "loss": 0.0157,
      "step": 2073500
    },
    {
      "epoch": 3.393360957823557,
      "grad_norm": 0.5045498013496399,
      "learning_rate": 3.234451084915296e-06,
      "loss": 0.0189,
      "step": 2073520
    },
    {
      "epoch": 3.39339368826221,
      "grad_norm": 0.10628221929073334,
      "learning_rate": 3.234385192701779e-06,
      "loss": 0.0099,
      "step": 2073540
    },
    {
      "epoch": 3.3934264187008636,
      "grad_norm": 0.34153538942337036,
      "learning_rate": 3.2343193004882618e-06,
      "loss": 0.0074,
      "step": 2073560
    },
    {
      "epoch": 3.3934591491395167,
      "grad_norm": 0.4704210162162781,
      "learning_rate": 3.2342534082747445e-06,
      "loss": 0.0152,
      "step": 2073580
    },
    {
      "epoch": 3.3934918795781703,
      "grad_norm": 0.441697359085083,
      "learning_rate": 3.2341875160612273e-06,
      "loss": 0.0136,
      "step": 2073600
    },
    {
      "epoch": 3.3935246100168235,
      "grad_norm": 0.6113491654396057,
      "learning_rate": 3.2341216238477104e-06,
      "loss": 0.0128,
      "step": 2073620
    },
    {
      "epoch": 3.3935573404554766,
      "grad_norm": 0.09562976658344269,
      "learning_rate": 3.234055731634193e-06,
      "loss": 0.0095,
      "step": 2073640
    },
    {
      "epoch": 3.3935900708941302,
      "grad_norm": 0.10680854320526123,
      "learning_rate": 3.233989839420676e-06,
      "loss": 0.0099,
      "step": 2073660
    },
    {
      "epoch": 3.3936228013327834,
      "grad_norm": 0.46541130542755127,
      "learning_rate": 3.2339239472071586e-06,
      "loss": 0.0142,
      "step": 2073680
    },
    {
      "epoch": 3.393655531771437,
      "grad_norm": 0.2245561182498932,
      "learning_rate": 3.2338580549936414e-06,
      "loss": 0.0127,
      "step": 2073700
    },
    {
      "epoch": 3.39368826221009,
      "grad_norm": 0.14290177822113037,
      "learning_rate": 3.2337921627801246e-06,
      "loss": 0.0102,
      "step": 2073720
    },
    {
      "epoch": 3.3937209926487437,
      "grad_norm": 0.505660891532898,
      "learning_rate": 3.2337262705666073e-06,
      "loss": 0.0138,
      "step": 2073740
    },
    {
      "epoch": 3.393753723087397,
      "grad_norm": 0.7954457402229309,
      "learning_rate": 3.23366037835309e-06,
      "loss": 0.0119,
      "step": 2073760
    },
    {
      "epoch": 3.39378645352605,
      "grad_norm": 0.2694056034088135,
      "learning_rate": 3.2335944861395728e-06,
      "loss": 0.0162,
      "step": 2073780
    },
    {
      "epoch": 3.3938191839647036,
      "grad_norm": 0.19242052733898163,
      "learning_rate": 3.233528593926056e-06,
      "loss": 0.0097,
      "step": 2073800
    },
    {
      "epoch": 3.3938519144033568,
      "grad_norm": 0.22865267097949982,
      "learning_rate": 3.2334627017125387e-06,
      "loss": 0.0117,
      "step": 2073820
    },
    {
      "epoch": 3.3938846448420104,
      "grad_norm": 0.36463287472724915,
      "learning_rate": 3.233396809499022e-06,
      "loss": 0.0163,
      "step": 2073840
    },
    {
      "epoch": 3.3939173752806635,
      "grad_norm": 0.5355494618415833,
      "learning_rate": 3.233330917285505e-06,
      "loss": 0.0088,
      "step": 2073860
    },
    {
      "epoch": 3.3939501057193167,
      "grad_norm": 0.3320325016975403,
      "learning_rate": 3.2332650250719877e-06,
      "loss": 0.0119,
      "step": 2073880
    },
    {
      "epoch": 3.3939828361579703,
      "grad_norm": 0.1124795526266098,
      "learning_rate": 3.2331991328584705e-06,
      "loss": 0.0122,
      "step": 2073900
    },
    {
      "epoch": 3.3940155665966234,
      "grad_norm": 0.2641623616218567,
      "learning_rate": 3.2331332406449532e-06,
      "loss": 0.0084,
      "step": 2073920
    },
    {
      "epoch": 3.394048297035277,
      "grad_norm": 0.47203391790390015,
      "learning_rate": 3.2330673484314364e-06,
      "loss": 0.0127,
      "step": 2073940
    },
    {
      "epoch": 3.39408102747393,
      "grad_norm": 0.5034887790679932,
      "learning_rate": 3.233001456217919e-06,
      "loss": 0.0081,
      "step": 2073960
    },
    {
      "epoch": 3.3941137579125833,
      "grad_norm": 0.14773552119731903,
      "learning_rate": 3.232935564004402e-06,
      "loss": 0.0125,
      "step": 2073980
    },
    {
      "epoch": 3.394146488351237,
      "grad_norm": 0.518356204032898,
      "learning_rate": 3.2328696717908846e-06,
      "loss": 0.018,
      "step": 2074000
    },
    {
      "epoch": 3.39417921878989,
      "grad_norm": 0.28905564546585083,
      "learning_rate": 3.2328037795773678e-06,
      "loss": 0.0118,
      "step": 2074020
    },
    {
      "epoch": 3.3942119492285436,
      "grad_norm": 0.4308185875415802,
      "learning_rate": 3.2327378873638505e-06,
      "loss": 0.0055,
      "step": 2074040
    },
    {
      "epoch": 3.394244679667197,
      "grad_norm": 0.2599635422229767,
      "learning_rate": 3.2326719951503332e-06,
      "loss": 0.0108,
      "step": 2074060
    },
    {
      "epoch": 3.3942774101058504,
      "grad_norm": 0.6856082081794739,
      "learning_rate": 3.232606102936816e-06,
      "loss": 0.012,
      "step": 2074080
    },
    {
      "epoch": 3.3943101405445035,
      "grad_norm": 0.7108936309814453,
      "learning_rate": 3.2325402107232987e-06,
      "loss": 0.0113,
      "step": 2074100
    },
    {
      "epoch": 3.3943428709831567,
      "grad_norm": 0.32855531573295593,
      "learning_rate": 3.232474318509782e-06,
      "loss": 0.009,
      "step": 2074120
    },
    {
      "epoch": 3.3943756014218103,
      "grad_norm": 0.13825686275959015,
      "learning_rate": 3.2324084262962646e-06,
      "loss": 0.0114,
      "step": 2074140
    },
    {
      "epoch": 3.3944083318604634,
      "grad_norm": 0.40321558713912964,
      "learning_rate": 3.2323425340827474e-06,
      "loss": 0.0091,
      "step": 2074160
    },
    {
      "epoch": 3.394441062299117,
      "grad_norm": 0.2999025285243988,
      "learning_rate": 3.232276641869231e-06,
      "loss": 0.0101,
      "step": 2074180
    },
    {
      "epoch": 3.39447379273777,
      "grad_norm": 0.1777413785457611,
      "learning_rate": 3.2322107496557137e-06,
      "loss": 0.009,
      "step": 2074200
    },
    {
      "epoch": 3.3945065231764238,
      "grad_norm": 0.42644360661506653,
      "learning_rate": 3.2321448574421964e-06,
      "loss": 0.0104,
      "step": 2074220
    },
    {
      "epoch": 3.394539253615077,
      "grad_norm": 0.33051586151123047,
      "learning_rate": 3.232078965228679e-06,
      "loss": 0.0104,
      "step": 2074240
    },
    {
      "epoch": 3.39457198405373,
      "grad_norm": 0.19682596623897552,
      "learning_rate": 3.2320130730151623e-06,
      "loss": 0.0084,
      "step": 2074260
    },
    {
      "epoch": 3.3946047144923837,
      "grad_norm": 0.23021647334098816,
      "learning_rate": 3.231947180801645e-06,
      "loss": 0.0088,
      "step": 2074280
    },
    {
      "epoch": 3.394637444931037,
      "grad_norm": 0.2622491121292114,
      "learning_rate": 3.231881288588128e-06,
      "loss": 0.0154,
      "step": 2074300
    },
    {
      "epoch": 3.3946701753696904,
      "grad_norm": 0.13458159565925598,
      "learning_rate": 3.2318153963746106e-06,
      "loss": 0.0087,
      "step": 2074320
    },
    {
      "epoch": 3.3947029058083436,
      "grad_norm": 0.30646395683288574,
      "learning_rate": 3.2317495041610937e-06,
      "loss": 0.0075,
      "step": 2074340
    },
    {
      "epoch": 3.394735636246997,
      "grad_norm": 0.11547522991895676,
      "learning_rate": 3.2316836119475765e-06,
      "loss": 0.0093,
      "step": 2074360
    },
    {
      "epoch": 3.3947683666856503,
      "grad_norm": 0.40425431728363037,
      "learning_rate": 3.231617719734059e-06,
      "loss": 0.0104,
      "step": 2074380
    },
    {
      "epoch": 3.3948010971243034,
      "grad_norm": 0.6783257722854614,
      "learning_rate": 3.231551827520542e-06,
      "loss": 0.0152,
      "step": 2074400
    },
    {
      "epoch": 3.394833827562957,
      "grad_norm": 0.32485976815223694,
      "learning_rate": 3.231485935307025e-06,
      "loss": 0.0117,
      "step": 2074420
    },
    {
      "epoch": 3.39486655800161,
      "grad_norm": 0.645010769367218,
      "learning_rate": 3.231420043093508e-06,
      "loss": 0.012,
      "step": 2074440
    },
    {
      "epoch": 3.394899288440264,
      "grad_norm": 0.2693129777908325,
      "learning_rate": 3.2313541508799906e-06,
      "loss": 0.0097,
      "step": 2074460
    },
    {
      "epoch": 3.394932018878917,
      "grad_norm": 0.5138903260231018,
      "learning_rate": 3.2312882586664733e-06,
      "loss": 0.0116,
      "step": 2074480
    },
    {
      "epoch": 3.3949647493175705,
      "grad_norm": 0.3294345736503601,
      "learning_rate": 3.2312223664529565e-06,
      "loss": 0.0125,
      "step": 2074500
    },
    {
      "epoch": 3.3949974797562237,
      "grad_norm": 0.29465851187705994,
      "learning_rate": 3.2311564742394392e-06,
      "loss": 0.0131,
      "step": 2074520
    },
    {
      "epoch": 3.395030210194877,
      "grad_norm": 0.12360665202140808,
      "learning_rate": 3.2310905820259224e-06,
      "loss": 0.0102,
      "step": 2074540
    },
    {
      "epoch": 3.3950629406335304,
      "grad_norm": 0.038174986839294434,
      "learning_rate": 3.2310246898124056e-06,
      "loss": 0.0125,
      "step": 2074560
    },
    {
      "epoch": 3.3950956710721836,
      "grad_norm": 0.21154721081256866,
      "learning_rate": 3.2309587975988883e-06,
      "loss": 0.0074,
      "step": 2074580
    },
    {
      "epoch": 3.395128401510837,
      "grad_norm": 0.2401449829339981,
      "learning_rate": 3.230892905385371e-06,
      "loss": 0.01,
      "step": 2074600
    },
    {
      "epoch": 3.3951611319494903,
      "grad_norm": 0.8978796005249023,
      "learning_rate": 3.2308270131718538e-06,
      "loss": 0.0136,
      "step": 2074620
    },
    {
      "epoch": 3.395193862388144,
      "grad_norm": 0.05334495007991791,
      "learning_rate": 3.2307611209583365e-06,
      "loss": 0.0077,
      "step": 2074640
    },
    {
      "epoch": 3.395226592826797,
      "grad_norm": 0.10949710756540298,
      "learning_rate": 3.2306952287448197e-06,
      "loss": 0.0071,
      "step": 2074660
    },
    {
      "epoch": 3.39525932326545,
      "grad_norm": 0.23537439107894897,
      "learning_rate": 3.2306293365313024e-06,
      "loss": 0.0139,
      "step": 2074680
    },
    {
      "epoch": 3.395292053704104,
      "grad_norm": 0.2363707572221756,
      "learning_rate": 3.230563444317785e-06,
      "loss": 0.0114,
      "step": 2074700
    },
    {
      "epoch": 3.395324784142757,
      "grad_norm": 0.2514604926109314,
      "learning_rate": 3.230497552104268e-06,
      "loss": 0.0087,
      "step": 2074720
    },
    {
      "epoch": 3.3953575145814106,
      "grad_norm": 0.23960994184017181,
      "learning_rate": 3.230431659890751e-06,
      "loss": 0.0123,
      "step": 2074740
    },
    {
      "epoch": 3.3953902450200637,
      "grad_norm": 0.09048038721084595,
      "learning_rate": 3.230365767677234e-06,
      "loss": 0.0124,
      "step": 2074760
    },
    {
      "epoch": 3.3954229754587173,
      "grad_norm": 0.39262741804122925,
      "learning_rate": 3.2302998754637165e-06,
      "loss": 0.0143,
      "step": 2074780
    },
    {
      "epoch": 3.3954557058973704,
      "grad_norm": 0.6667353510856628,
      "learning_rate": 3.2302339832501993e-06,
      "loss": 0.0177,
      "step": 2074800
    },
    {
      "epoch": 3.3954884363360236,
      "grad_norm": 0.2892725467681885,
      "learning_rate": 3.2301680910366824e-06,
      "loss": 0.015,
      "step": 2074820
    },
    {
      "epoch": 3.395521166774677,
      "grad_norm": 0.3673132061958313,
      "learning_rate": 3.230102198823165e-06,
      "loss": 0.013,
      "step": 2074840
    },
    {
      "epoch": 3.3955538972133303,
      "grad_norm": 0.06739472597837448,
      "learning_rate": 3.230036306609648e-06,
      "loss": 0.0085,
      "step": 2074860
    },
    {
      "epoch": 3.395586627651984,
      "grad_norm": 0.25582507252693176,
      "learning_rate": 3.2299704143961315e-06,
      "loss": 0.0165,
      "step": 2074880
    },
    {
      "epoch": 3.395619358090637,
      "grad_norm": 0.1578192412853241,
      "learning_rate": 3.2299045221826142e-06,
      "loss": 0.0154,
      "step": 2074900
    },
    {
      "epoch": 3.3956520885292907,
      "grad_norm": 0.3176789879798889,
      "learning_rate": 3.229838629969097e-06,
      "loss": 0.0075,
      "step": 2074920
    },
    {
      "epoch": 3.395684818967944,
      "grad_norm": 0.0640946701169014,
      "learning_rate": 3.2297727377555797e-06,
      "loss": 0.0087,
      "step": 2074940
    },
    {
      "epoch": 3.395717549406597,
      "grad_norm": 0.60899817943573,
      "learning_rate": 3.229706845542063e-06,
      "loss": 0.0146,
      "step": 2074960
    },
    {
      "epoch": 3.3957502798452506,
      "grad_norm": 0.37356865406036377,
      "learning_rate": 3.2296409533285456e-06,
      "loss": 0.0097,
      "step": 2074980
    },
    {
      "epoch": 3.3957830102839037,
      "grad_norm": 0.5078158974647522,
      "learning_rate": 3.2295750611150284e-06,
      "loss": 0.0095,
      "step": 2075000
    },
    {
      "epoch": 3.3958157407225573,
      "grad_norm": 0.12917128205299377,
      "learning_rate": 3.229509168901511e-06,
      "loss": 0.0111,
      "step": 2075020
    },
    {
      "epoch": 3.3958484711612105,
      "grad_norm": 0.09174667298793793,
      "learning_rate": 3.2294432766879943e-06,
      "loss": 0.0095,
      "step": 2075040
    },
    {
      "epoch": 3.395881201599864,
      "grad_norm": 0.08435830473899841,
      "learning_rate": 3.229377384474477e-06,
      "loss": 0.01,
      "step": 2075060
    },
    {
      "epoch": 3.395913932038517,
      "grad_norm": 0.20267736911773682,
      "learning_rate": 3.2293114922609597e-06,
      "loss": 0.0122,
      "step": 2075080
    },
    {
      "epoch": 3.3959466624771704,
      "grad_norm": 0.1616371124982834,
      "learning_rate": 3.2292456000474425e-06,
      "loss": 0.0123,
      "step": 2075100
    },
    {
      "epoch": 3.395979392915824,
      "grad_norm": 0.08407393097877502,
      "learning_rate": 3.2291797078339252e-06,
      "loss": 0.0146,
      "step": 2075120
    },
    {
      "epoch": 3.396012123354477,
      "grad_norm": 0.41586461663246155,
      "learning_rate": 3.2291138156204084e-06,
      "loss": 0.0124,
      "step": 2075140
    },
    {
      "epoch": 3.3960448537931307,
      "grad_norm": 0.4733024537563324,
      "learning_rate": 3.229047923406891e-06,
      "loss": 0.0075,
      "step": 2075160
    },
    {
      "epoch": 3.396077584231784,
      "grad_norm": 0.09497857093811035,
      "learning_rate": 3.228982031193374e-06,
      "loss": 0.011,
      "step": 2075180
    },
    {
      "epoch": 3.3961103146704374,
      "grad_norm": 0.2582448422908783,
      "learning_rate": 3.2289161389798566e-06,
      "loss": 0.0076,
      "step": 2075200
    },
    {
      "epoch": 3.3961430451090906,
      "grad_norm": 0.2231575846672058,
      "learning_rate": 3.2288502467663398e-06,
      "loss": 0.0088,
      "step": 2075220
    },
    {
      "epoch": 3.3961757755477437,
      "grad_norm": 0.24517527222633362,
      "learning_rate": 3.228784354552823e-06,
      "loss": 0.0116,
      "step": 2075240
    },
    {
      "epoch": 3.3962085059863973,
      "grad_norm": 0.22546802461147308,
      "learning_rate": 3.2287184623393057e-06,
      "loss": 0.0082,
      "step": 2075260
    },
    {
      "epoch": 3.3962412364250505,
      "grad_norm": 0.22487947344779968,
      "learning_rate": 3.228652570125789e-06,
      "loss": 0.0121,
      "step": 2075280
    },
    {
      "epoch": 3.396273966863704,
      "grad_norm": 0.10343409329652786,
      "learning_rate": 3.2285866779122716e-06,
      "loss": 0.0124,
      "step": 2075300
    },
    {
      "epoch": 3.3963066973023572,
      "grad_norm": 0.04773049056529999,
      "learning_rate": 3.2285207856987543e-06,
      "loss": 0.0072,
      "step": 2075320
    },
    {
      "epoch": 3.396339427741011,
      "grad_norm": 0.2391633689403534,
      "learning_rate": 3.228454893485237e-06,
      "loss": 0.008,
      "step": 2075340
    },
    {
      "epoch": 3.396372158179664,
      "grad_norm": 0.19237908720970154,
      "learning_rate": 3.2283890012717202e-06,
      "loss": 0.0121,
      "step": 2075360
    },
    {
      "epoch": 3.396404888618317,
      "grad_norm": 0.26322507858276367,
      "learning_rate": 3.228323109058203e-06,
      "loss": 0.0104,
      "step": 2075380
    },
    {
      "epoch": 3.3964376190569707,
      "grad_norm": 0.3983124792575836,
      "learning_rate": 3.2282572168446857e-06,
      "loss": 0.0155,
      "step": 2075400
    },
    {
      "epoch": 3.396470349495624,
      "grad_norm": 0.239801287651062,
      "learning_rate": 3.2281913246311684e-06,
      "loss": 0.0114,
      "step": 2075420
    },
    {
      "epoch": 3.3965030799342775,
      "grad_norm": 0.5060182213783264,
      "learning_rate": 3.2281254324176516e-06,
      "loss": 0.0106,
      "step": 2075440
    },
    {
      "epoch": 3.3965358103729306,
      "grad_norm": 0.2829991281032562,
      "learning_rate": 3.2280595402041343e-06,
      "loss": 0.0121,
      "step": 2075460
    },
    {
      "epoch": 3.3965685408115838,
      "grad_norm": 0.20004378259181976,
      "learning_rate": 3.227993647990617e-06,
      "loss": 0.0139,
      "step": 2075480
    },
    {
      "epoch": 3.3966012712502374,
      "grad_norm": 0.7077115178108215,
      "learning_rate": 3.2279277557771e-06,
      "loss": 0.0093,
      "step": 2075500
    },
    {
      "epoch": 3.3966340016888905,
      "grad_norm": 0.3407596945762634,
      "learning_rate": 3.2278618635635826e-06,
      "loss": 0.0125,
      "step": 2075520
    },
    {
      "epoch": 3.396666732127544,
      "grad_norm": 0.1920848935842514,
      "learning_rate": 3.2277959713500657e-06,
      "loss": 0.0135,
      "step": 2075540
    },
    {
      "epoch": 3.3966994625661973,
      "grad_norm": 0.28516390919685364,
      "learning_rate": 3.2277300791365485e-06,
      "loss": 0.0106,
      "step": 2075560
    },
    {
      "epoch": 3.3967321930048504,
      "grad_norm": 0.180018350481987,
      "learning_rate": 3.227664186923031e-06,
      "loss": 0.0104,
      "step": 2075580
    },
    {
      "epoch": 3.396764923443504,
      "grad_norm": 0.263430118560791,
      "learning_rate": 3.227598294709515e-06,
      "loss": 0.014,
      "step": 2075600
    },
    {
      "epoch": 3.396797653882157,
      "grad_norm": 0.2334914654493332,
      "learning_rate": 3.2275324024959975e-06,
      "loss": 0.0087,
      "step": 2075620
    },
    {
      "epoch": 3.3968303843208107,
      "grad_norm": 0.43399617075920105,
      "learning_rate": 3.2274665102824803e-06,
      "loss": 0.009,
      "step": 2075640
    },
    {
      "epoch": 3.396863114759464,
      "grad_norm": 0.11617515981197357,
      "learning_rate": 3.227400618068963e-06,
      "loss": 0.008,
      "step": 2075660
    },
    {
      "epoch": 3.3968958451981175,
      "grad_norm": 0.32273992896080017,
      "learning_rate": 3.227334725855446e-06,
      "loss": 0.0084,
      "step": 2075680
    },
    {
      "epoch": 3.3969285756367706,
      "grad_norm": 1.3483473062515259,
      "learning_rate": 3.227268833641929e-06,
      "loss": 0.0122,
      "step": 2075700
    },
    {
      "epoch": 3.396961306075424,
      "grad_norm": 0.3649331033229828,
      "learning_rate": 3.2272029414284117e-06,
      "loss": 0.0101,
      "step": 2075720
    },
    {
      "epoch": 3.3969940365140774,
      "grad_norm": 0.7698995471000671,
      "learning_rate": 3.2271370492148944e-06,
      "loss": 0.0133,
      "step": 2075740
    },
    {
      "epoch": 3.3970267669527305,
      "grad_norm": 1.3245024681091309,
      "learning_rate": 3.2270711570013776e-06,
      "loss": 0.01,
      "step": 2075760
    },
    {
      "epoch": 3.397059497391384,
      "grad_norm": 0.2601974308490753,
      "learning_rate": 3.2270052647878603e-06,
      "loss": 0.0089,
      "step": 2075780
    },
    {
      "epoch": 3.3970922278300373,
      "grad_norm": 0.1957969218492508,
      "learning_rate": 3.226939372574343e-06,
      "loss": 0.0138,
      "step": 2075800
    },
    {
      "epoch": 3.397124958268691,
      "grad_norm": 0.20439745485782623,
      "learning_rate": 3.2268734803608258e-06,
      "loss": 0.011,
      "step": 2075820
    },
    {
      "epoch": 3.397157688707344,
      "grad_norm": 0.35988032817840576,
      "learning_rate": 3.226807588147309e-06,
      "loss": 0.0138,
      "step": 2075840
    },
    {
      "epoch": 3.397190419145997,
      "grad_norm": 0.3806096613407135,
      "learning_rate": 3.2267416959337917e-06,
      "loss": 0.0137,
      "step": 2075860
    },
    {
      "epoch": 3.3972231495846508,
      "grad_norm": 0.1570255607366562,
      "learning_rate": 3.2266758037202744e-06,
      "loss": 0.0119,
      "step": 2075880
    },
    {
      "epoch": 3.397255880023304,
      "grad_norm": 0.10568201541900635,
      "learning_rate": 3.226609911506757e-06,
      "loss": 0.0082,
      "step": 2075900
    },
    {
      "epoch": 3.3972886104619575,
      "grad_norm": 0.281261682510376,
      "learning_rate": 3.2265440192932403e-06,
      "loss": 0.0131,
      "step": 2075920
    },
    {
      "epoch": 3.3973213409006107,
      "grad_norm": 0.15974348783493042,
      "learning_rate": 3.2264781270797235e-06,
      "loss": 0.0103,
      "step": 2075940
    },
    {
      "epoch": 3.3973540713392643,
      "grad_norm": 0.26765331625938416,
      "learning_rate": 3.2264122348662062e-06,
      "loss": 0.0151,
      "step": 2075960
    },
    {
      "epoch": 3.3973868017779174,
      "grad_norm": 0.296569287776947,
      "learning_rate": 3.2263463426526894e-06,
      "loss": 0.0081,
      "step": 2075980
    },
    {
      "epoch": 3.3974195322165706,
      "grad_norm": 0.5235512852668762,
      "learning_rate": 3.226280450439172e-06,
      "loss": 0.0154,
      "step": 2076000
    },
    {
      "epoch": 3.397452262655224,
      "grad_norm": 0.40986427664756775,
      "learning_rate": 3.226214558225655e-06,
      "loss": 0.0091,
      "step": 2076020
    },
    {
      "epoch": 3.3974849930938773,
      "grad_norm": 0.05150509998202324,
      "learning_rate": 3.2261486660121376e-06,
      "loss": 0.0115,
      "step": 2076040
    },
    {
      "epoch": 3.397517723532531,
      "grad_norm": 0.2904209494590759,
      "learning_rate": 3.2260827737986203e-06,
      "loss": 0.0162,
      "step": 2076060
    },
    {
      "epoch": 3.397550453971184,
      "grad_norm": 0.1745171844959259,
      "learning_rate": 3.2260168815851035e-06,
      "loss": 0.0104,
      "step": 2076080
    },
    {
      "epoch": 3.3975831844098376,
      "grad_norm": 0.13911500573158264,
      "learning_rate": 3.2259509893715863e-06,
      "loss": 0.0132,
      "step": 2076100
    },
    {
      "epoch": 3.397615914848491,
      "grad_norm": 0.4776959717273712,
      "learning_rate": 3.225885097158069e-06,
      "loss": 0.0111,
      "step": 2076120
    },
    {
      "epoch": 3.397648645287144,
      "grad_norm": 0.40855780243873596,
      "learning_rate": 3.2258192049445517e-06,
      "loss": 0.0132,
      "step": 2076140
    },
    {
      "epoch": 3.3976813757257975,
      "grad_norm": 0.18939192593097687,
      "learning_rate": 3.225753312731035e-06,
      "loss": 0.0092,
      "step": 2076160
    },
    {
      "epoch": 3.3977141061644507,
      "grad_norm": 0.576729953289032,
      "learning_rate": 3.2256874205175176e-06,
      "loss": 0.0111,
      "step": 2076180
    },
    {
      "epoch": 3.3977468366031043,
      "grad_norm": 0.320247620344162,
      "learning_rate": 3.2256215283040004e-06,
      "loss": 0.0108,
      "step": 2076200
    },
    {
      "epoch": 3.3977795670417574,
      "grad_norm": 0.5926691889762878,
      "learning_rate": 3.225555636090483e-06,
      "loss": 0.0098,
      "step": 2076220
    },
    {
      "epoch": 3.397812297480411,
      "grad_norm": 0.09802191704511642,
      "learning_rate": 3.2254897438769663e-06,
      "loss": 0.0136,
      "step": 2076240
    },
    {
      "epoch": 3.397845027919064,
      "grad_norm": 0.3510395586490631,
      "learning_rate": 3.225423851663449e-06,
      "loss": 0.0083,
      "step": 2076260
    },
    {
      "epoch": 3.3978777583577173,
      "grad_norm": 0.25959402322769165,
      "learning_rate": 3.2253579594499318e-06,
      "loss": 0.0114,
      "step": 2076280
    },
    {
      "epoch": 3.397910488796371,
      "grad_norm": 0.2872450649738312,
      "learning_rate": 3.2252920672364153e-06,
      "loss": 0.0109,
      "step": 2076300
    },
    {
      "epoch": 3.397943219235024,
      "grad_norm": 0.2620548605918884,
      "learning_rate": 3.225226175022898e-06,
      "loss": 0.0097,
      "step": 2076320
    },
    {
      "epoch": 3.3979759496736777,
      "grad_norm": 0.16310061514377594,
      "learning_rate": 3.225160282809381e-06,
      "loss": 0.012,
      "step": 2076340
    },
    {
      "epoch": 3.398008680112331,
      "grad_norm": 0.1730635166168213,
      "learning_rate": 3.2250943905958636e-06,
      "loss": 0.0141,
      "step": 2076360
    },
    {
      "epoch": 3.3980414105509844,
      "grad_norm": 0.39933621883392334,
      "learning_rate": 3.2250284983823467e-06,
      "loss": 0.0162,
      "step": 2076380
    },
    {
      "epoch": 3.3980741409896376,
      "grad_norm": 1.0279171466827393,
      "learning_rate": 3.2249626061688295e-06,
      "loss": 0.0159,
      "step": 2076400
    },
    {
      "epoch": 3.3981068714282907,
      "grad_norm": 0.17226211726665497,
      "learning_rate": 3.224896713955312e-06,
      "loss": 0.0108,
      "step": 2076420
    },
    {
      "epoch": 3.3981396018669443,
      "grad_norm": 0.17133024334907532,
      "learning_rate": 3.224830821741795e-06,
      "loss": 0.0091,
      "step": 2076440
    },
    {
      "epoch": 3.3981723323055975,
      "grad_norm": 0.368285208940506,
      "learning_rate": 3.224764929528278e-06,
      "loss": 0.0089,
      "step": 2076460
    },
    {
      "epoch": 3.398205062744251,
      "grad_norm": 0.6249343156814575,
      "learning_rate": 3.224699037314761e-06,
      "loss": 0.0143,
      "step": 2076480
    },
    {
      "epoch": 3.398237793182904,
      "grad_norm": 0.2889619767665863,
      "learning_rate": 3.2246331451012436e-06,
      "loss": 0.0163,
      "step": 2076500
    },
    {
      "epoch": 3.398270523621558,
      "grad_norm": 0.22092697024345398,
      "learning_rate": 3.2245672528877263e-06,
      "loss": 0.0176,
      "step": 2076520
    },
    {
      "epoch": 3.398303254060211,
      "grad_norm": 0.371671199798584,
      "learning_rate": 3.224501360674209e-06,
      "loss": 0.0169,
      "step": 2076540
    },
    {
      "epoch": 3.398335984498864,
      "grad_norm": 0.1395464837551117,
      "learning_rate": 3.2244354684606922e-06,
      "loss": 0.0128,
      "step": 2076560
    },
    {
      "epoch": 3.3983687149375177,
      "grad_norm": 0.08805263042449951,
      "learning_rate": 3.224369576247175e-06,
      "loss": 0.0175,
      "step": 2076580
    },
    {
      "epoch": 3.398401445376171,
      "grad_norm": 0.5704782009124756,
      "learning_rate": 3.2243036840336577e-06,
      "loss": 0.0112,
      "step": 2076600
    },
    {
      "epoch": 3.3984341758148244,
      "grad_norm": 0.15284791588783264,
      "learning_rate": 3.2242377918201405e-06,
      "loss": 0.0078,
      "step": 2076620
    },
    {
      "epoch": 3.3984669062534776,
      "grad_norm": 0.1306374967098236,
      "learning_rate": 3.224171899606624e-06,
      "loss": 0.0109,
      "step": 2076640
    },
    {
      "epoch": 3.398499636692131,
      "grad_norm": 0.27717041969299316,
      "learning_rate": 3.2241060073931068e-06,
      "loss": 0.0101,
      "step": 2076660
    },
    {
      "epoch": 3.3985323671307843,
      "grad_norm": 0.20234611630439758,
      "learning_rate": 3.2240401151795895e-06,
      "loss": 0.0119,
      "step": 2076680
    },
    {
      "epoch": 3.3985650975694375,
      "grad_norm": 0.13638557493686676,
      "learning_rate": 3.2239742229660727e-06,
      "loss": 0.0086,
      "step": 2076700
    },
    {
      "epoch": 3.398597828008091,
      "grad_norm": 0.5917382836341858,
      "learning_rate": 3.2239083307525554e-06,
      "loss": 0.0181,
      "step": 2076720
    },
    {
      "epoch": 3.398630558446744,
      "grad_norm": 0.6456378698348999,
      "learning_rate": 3.223842438539038e-06,
      "loss": 0.0144,
      "step": 2076740
    },
    {
      "epoch": 3.398663288885398,
      "grad_norm": 1.2187769412994385,
      "learning_rate": 3.223776546325521e-06,
      "loss": 0.01,
      "step": 2076760
    },
    {
      "epoch": 3.398696019324051,
      "grad_norm": 0.23513799905776978,
      "learning_rate": 3.223710654112004e-06,
      "loss": 0.01,
      "step": 2076780
    },
    {
      "epoch": 3.3987287497627046,
      "grad_norm": 0.09744326025247574,
      "learning_rate": 3.223644761898487e-06,
      "loss": 0.0123,
      "step": 2076800
    },
    {
      "epoch": 3.3987614802013577,
      "grad_norm": 0.4812595546245575,
      "learning_rate": 3.2235788696849695e-06,
      "loss": 0.0118,
      "step": 2076820
    },
    {
      "epoch": 3.398794210640011,
      "grad_norm": 0.637147843837738,
      "learning_rate": 3.2235129774714523e-06,
      "loss": 0.0168,
      "step": 2076840
    },
    {
      "epoch": 3.3988269410786645,
      "grad_norm": 0.10601205378770828,
      "learning_rate": 3.2234470852579354e-06,
      "loss": 0.0072,
      "step": 2076860
    },
    {
      "epoch": 3.3988596715173176,
      "grad_norm": 0.2628997266292572,
      "learning_rate": 3.223381193044418e-06,
      "loss": 0.0148,
      "step": 2076880
    },
    {
      "epoch": 3.398892401955971,
      "grad_norm": 0.18183070421218872,
      "learning_rate": 3.223315300830901e-06,
      "loss": 0.0119,
      "step": 2076900
    },
    {
      "epoch": 3.3989251323946243,
      "grad_norm": 2.340641498565674,
      "learning_rate": 3.2232494086173837e-06,
      "loss": 0.0164,
      "step": 2076920
    },
    {
      "epoch": 3.3989578628332775,
      "grad_norm": 0.3801044225692749,
      "learning_rate": 3.223183516403867e-06,
      "loss": 0.0101,
      "step": 2076940
    },
    {
      "epoch": 3.398990593271931,
      "grad_norm": 0.3401159942150116,
      "learning_rate": 3.2231176241903496e-06,
      "loss": 0.0141,
      "step": 2076960
    },
    {
      "epoch": 3.3990233237105842,
      "grad_norm": 1.0171194076538086,
      "learning_rate": 3.2230517319768323e-06,
      "loss": 0.012,
      "step": 2076980
    },
    {
      "epoch": 3.399056054149238,
      "grad_norm": 0.5105088353157043,
      "learning_rate": 3.222985839763316e-06,
      "loss": 0.017,
      "step": 2077000
    },
    {
      "epoch": 3.399088784587891,
      "grad_norm": 0.31609126925468445,
      "learning_rate": 3.2229199475497986e-06,
      "loss": 0.0081,
      "step": 2077020
    },
    {
      "epoch": 3.399121515026544,
      "grad_norm": 0.4523751735687256,
      "learning_rate": 3.2228540553362814e-06,
      "loss": 0.0122,
      "step": 2077040
    },
    {
      "epoch": 3.3991542454651977,
      "grad_norm": 0.10984573513269424,
      "learning_rate": 3.222788163122764e-06,
      "loss": 0.0109,
      "step": 2077060
    },
    {
      "epoch": 3.399186975903851,
      "grad_norm": 0.18522825837135315,
      "learning_rate": 3.222722270909247e-06,
      "loss": 0.0095,
      "step": 2077080
    },
    {
      "epoch": 3.3992197063425045,
      "grad_norm": 0.15594260394573212,
      "learning_rate": 3.22265637869573e-06,
      "loss": 0.0135,
      "step": 2077100
    },
    {
      "epoch": 3.3992524367811576,
      "grad_norm": 0.34221339225769043,
      "learning_rate": 3.2225904864822128e-06,
      "loss": 0.0177,
      "step": 2077120
    },
    {
      "epoch": 3.399285167219811,
      "grad_norm": 0.47382521629333496,
      "learning_rate": 3.2225245942686955e-06,
      "loss": 0.0092,
      "step": 2077140
    },
    {
      "epoch": 3.3993178976584644,
      "grad_norm": 0.1774105429649353,
      "learning_rate": 3.2224587020551782e-06,
      "loss": 0.0113,
      "step": 2077160
    },
    {
      "epoch": 3.3993506280971175,
      "grad_norm": 0.2886711061000824,
      "learning_rate": 3.2223928098416614e-06,
      "loss": 0.0137,
      "step": 2077180
    },
    {
      "epoch": 3.399383358535771,
      "grad_norm": 0.2610425055027008,
      "learning_rate": 3.222326917628144e-06,
      "loss": 0.0109,
      "step": 2077200
    },
    {
      "epoch": 3.3994160889744243,
      "grad_norm": 0.24361708760261536,
      "learning_rate": 3.222261025414627e-06,
      "loss": 0.0119,
      "step": 2077220
    },
    {
      "epoch": 3.399448819413078,
      "grad_norm": 0.19905108213424683,
      "learning_rate": 3.2221951332011096e-06,
      "loss": 0.0137,
      "step": 2077240
    },
    {
      "epoch": 3.399481549851731,
      "grad_norm": 0.25408652424812317,
      "learning_rate": 3.2221292409875928e-06,
      "loss": 0.0166,
      "step": 2077260
    },
    {
      "epoch": 3.3995142802903846,
      "grad_norm": 0.1446591317653656,
      "learning_rate": 3.2220633487740755e-06,
      "loss": 0.0133,
      "step": 2077280
    },
    {
      "epoch": 3.3995470107290378,
      "grad_norm": 0.21927234530448914,
      "learning_rate": 3.2219974565605583e-06,
      "loss": 0.0104,
      "step": 2077300
    },
    {
      "epoch": 3.399579741167691,
      "grad_norm": 0.4363788664340973,
      "learning_rate": 3.221931564347041e-06,
      "loss": 0.0114,
      "step": 2077320
    },
    {
      "epoch": 3.3996124716063445,
      "grad_norm": 0.16295142471790314,
      "learning_rate": 3.221865672133524e-06,
      "loss": 0.0118,
      "step": 2077340
    },
    {
      "epoch": 3.3996452020449976,
      "grad_norm": 0.6494606733322144,
      "learning_rate": 3.2217997799200073e-06,
      "loss": 0.012,
      "step": 2077360
    },
    {
      "epoch": 3.3996779324836512,
      "grad_norm": 0.2855108678340912,
      "learning_rate": 3.22173388770649e-06,
      "loss": 0.0086,
      "step": 2077380
    },
    {
      "epoch": 3.3997106629223044,
      "grad_norm": 0.3774527907371521,
      "learning_rate": 3.2216679954929732e-06,
      "loss": 0.0077,
      "step": 2077400
    },
    {
      "epoch": 3.399743393360958,
      "grad_norm": 0.3300912380218506,
      "learning_rate": 3.221602103279456e-06,
      "loss": 0.0134,
      "step": 2077420
    },
    {
      "epoch": 3.399776123799611,
      "grad_norm": 0.8066499829292297,
      "learning_rate": 3.2215362110659387e-06,
      "loss": 0.0156,
      "step": 2077440
    },
    {
      "epoch": 3.3998088542382643,
      "grad_norm": 0.6006604433059692,
      "learning_rate": 3.2214703188524214e-06,
      "loss": 0.0149,
      "step": 2077460
    },
    {
      "epoch": 3.399841584676918,
      "grad_norm": 0.07509662955999374,
      "learning_rate": 3.2214044266389046e-06,
      "loss": 0.0156,
      "step": 2077480
    },
    {
      "epoch": 3.399874315115571,
      "grad_norm": 0.6286619901657104,
      "learning_rate": 3.2213385344253874e-06,
      "loss": 0.0105,
      "step": 2077500
    },
    {
      "epoch": 3.3999070455542246,
      "grad_norm": 0.2963109314441681,
      "learning_rate": 3.22127264221187e-06,
      "loss": 0.0069,
      "step": 2077520
    },
    {
      "epoch": 3.3999397759928778,
      "grad_norm": 0.2299221158027649,
      "learning_rate": 3.221206749998353e-06,
      "loss": 0.013,
      "step": 2077540
    },
    {
      "epoch": 3.3999725064315314,
      "grad_norm": 0.24794791638851166,
      "learning_rate": 3.2211408577848356e-06,
      "loss": 0.0088,
      "step": 2077560
    },
    {
      "epoch": 3.4000052368701845,
      "grad_norm": 0.2538083493709564,
      "learning_rate": 3.2210749655713187e-06,
      "loss": 0.014,
      "step": 2077580
    },
    {
      "epoch": 3.4000379673088377,
      "grad_norm": 0.06455839425325394,
      "learning_rate": 3.2210090733578015e-06,
      "loss": 0.0055,
      "step": 2077600
    },
    {
      "epoch": 3.4000706977474913,
      "grad_norm": 0.21766285598278046,
      "learning_rate": 3.2209431811442842e-06,
      "loss": 0.0105,
      "step": 2077620
    },
    {
      "epoch": 3.4001034281861444,
      "grad_norm": 0.45865383744239807,
      "learning_rate": 3.220877288930767e-06,
      "loss": 0.0118,
      "step": 2077640
    },
    {
      "epoch": 3.400136158624798,
      "grad_norm": 0.08357800543308258,
      "learning_rate": 3.22081139671725e-06,
      "loss": 0.0077,
      "step": 2077660
    },
    {
      "epoch": 3.400168889063451,
      "grad_norm": 0.3784092366695404,
      "learning_rate": 3.220745504503733e-06,
      "loss": 0.0082,
      "step": 2077680
    },
    {
      "epoch": 3.4002016195021048,
      "grad_norm": 0.2935417592525482,
      "learning_rate": 3.220679612290216e-06,
      "loss": 0.0117,
      "step": 2077700
    },
    {
      "epoch": 3.400234349940758,
      "grad_norm": 0.5258503556251526,
      "learning_rate": 3.220613720076699e-06,
      "loss": 0.0136,
      "step": 2077720
    },
    {
      "epoch": 3.400267080379411,
      "grad_norm": 0.5691949129104614,
      "learning_rate": 3.220547827863182e-06,
      "loss": 0.0202,
      "step": 2077740
    },
    {
      "epoch": 3.4002998108180646,
      "grad_norm": 0.2132972925901413,
      "learning_rate": 3.2204819356496647e-06,
      "loss": 0.0134,
      "step": 2077760
    },
    {
      "epoch": 3.400332541256718,
      "grad_norm": 0.2893565595149994,
      "learning_rate": 3.2204160434361474e-06,
      "loss": 0.0149,
      "step": 2077780
    },
    {
      "epoch": 3.4003652716953714,
      "grad_norm": 0.14520485699176788,
      "learning_rate": 3.2203501512226306e-06,
      "loss": 0.0081,
      "step": 2077800
    },
    {
      "epoch": 3.4003980021340245,
      "grad_norm": 0.2122642546892166,
      "learning_rate": 3.2202842590091133e-06,
      "loss": 0.0103,
      "step": 2077820
    },
    {
      "epoch": 3.400430732572678,
      "grad_norm": 0.35427290201187134,
      "learning_rate": 3.220218366795596e-06,
      "loss": 0.0107,
      "step": 2077840
    },
    {
      "epoch": 3.4004634630113313,
      "grad_norm": 0.23346315324306488,
      "learning_rate": 3.2201524745820788e-06,
      "loss": 0.0118,
      "step": 2077860
    },
    {
      "epoch": 3.4004961934499844,
      "grad_norm": 0.4136221408843994,
      "learning_rate": 3.220086582368562e-06,
      "loss": 0.0161,
      "step": 2077880
    },
    {
      "epoch": 3.400528923888638,
      "grad_norm": 1.5940487384796143,
      "learning_rate": 3.2200206901550447e-06,
      "loss": 0.0128,
      "step": 2077900
    },
    {
      "epoch": 3.400561654327291,
      "grad_norm": 0.2383269965648651,
      "learning_rate": 3.2199547979415274e-06,
      "loss": 0.0115,
      "step": 2077920
    },
    {
      "epoch": 3.4005943847659448,
      "grad_norm": 1.0373245477676392,
      "learning_rate": 3.21988890572801e-06,
      "loss": 0.0118,
      "step": 2077940
    },
    {
      "epoch": 3.400627115204598,
      "grad_norm": 0.13281743228435516,
      "learning_rate": 3.219823013514493e-06,
      "loss": 0.0187,
      "step": 2077960
    },
    {
      "epoch": 3.4006598456432515,
      "grad_norm": 0.3670242726802826,
      "learning_rate": 3.219757121300976e-06,
      "loss": 0.0075,
      "step": 2077980
    },
    {
      "epoch": 3.4006925760819047,
      "grad_norm": 0.05494636669754982,
      "learning_rate": 3.219691229087459e-06,
      "loss": 0.0069,
      "step": 2078000
    },
    {
      "epoch": 3.400725306520558,
      "grad_norm": 0.23189076781272888,
      "learning_rate": 3.2196253368739416e-06,
      "loss": 0.0123,
      "step": 2078020
    },
    {
      "epoch": 3.4007580369592114,
      "grad_norm": 0.16540053486824036,
      "learning_rate": 3.2195594446604243e-06,
      "loss": 0.0143,
      "step": 2078040
    },
    {
      "epoch": 3.4007907673978646,
      "grad_norm": 0.14924544095993042,
      "learning_rate": 3.219493552446908e-06,
      "loss": 0.0128,
      "step": 2078060
    },
    {
      "epoch": 3.400823497836518,
      "grad_norm": 0.21627187728881836,
      "learning_rate": 3.2194276602333906e-06,
      "loss": 0.0085,
      "step": 2078080
    },
    {
      "epoch": 3.4008562282751713,
      "grad_norm": 0.5969362854957581,
      "learning_rate": 3.2193617680198734e-06,
      "loss": 0.0105,
      "step": 2078100
    },
    {
      "epoch": 3.400888958713825,
      "grad_norm": 0.5868818163871765,
      "learning_rate": 3.2192958758063565e-06,
      "loss": 0.014,
      "step": 2078120
    },
    {
      "epoch": 3.400921689152478,
      "grad_norm": 0.4244353771209717,
      "learning_rate": 3.2192299835928393e-06,
      "loss": 0.0128,
      "step": 2078140
    },
    {
      "epoch": 3.400954419591131,
      "grad_norm": 0.4500361979007721,
      "learning_rate": 3.219164091379322e-06,
      "loss": 0.0084,
      "step": 2078160
    },
    {
      "epoch": 3.400987150029785,
      "grad_norm": 0.1383843868970871,
      "learning_rate": 3.2190981991658047e-06,
      "loss": 0.0114,
      "step": 2078180
    },
    {
      "epoch": 3.401019880468438,
      "grad_norm": 0.24976392090320587,
      "learning_rate": 3.219032306952288e-06,
      "loss": 0.0124,
      "step": 2078200
    },
    {
      "epoch": 3.4010526109070915,
      "grad_norm": 0.7760137915611267,
      "learning_rate": 3.2189664147387706e-06,
      "loss": 0.0162,
      "step": 2078220
    },
    {
      "epoch": 3.4010853413457447,
      "grad_norm": 0.22856582701206207,
      "learning_rate": 3.2189005225252534e-06,
      "loss": 0.0081,
      "step": 2078240
    },
    {
      "epoch": 3.4011180717843983,
      "grad_norm": 0.12685298919677734,
      "learning_rate": 3.218834630311736e-06,
      "loss": 0.012,
      "step": 2078260
    },
    {
      "epoch": 3.4011508022230514,
      "grad_norm": 0.6356897950172424,
      "learning_rate": 3.2187687380982193e-06,
      "loss": 0.0131,
      "step": 2078280
    },
    {
      "epoch": 3.4011835326617046,
      "grad_norm": 0.14582817256450653,
      "learning_rate": 3.218702845884702e-06,
      "loss": 0.0111,
      "step": 2078300
    },
    {
      "epoch": 3.401216263100358,
      "grad_norm": 0.9120665788650513,
      "learning_rate": 3.2186369536711848e-06,
      "loss": 0.0153,
      "step": 2078320
    },
    {
      "epoch": 3.4012489935390113,
      "grad_norm": 0.25894710421562195,
      "learning_rate": 3.2185710614576675e-06,
      "loss": 0.0111,
      "step": 2078340
    },
    {
      "epoch": 3.401281723977665,
      "grad_norm": 0.4489712119102478,
      "learning_rate": 3.2185051692441507e-06,
      "loss": 0.0121,
      "step": 2078360
    },
    {
      "epoch": 3.401314454416318,
      "grad_norm": 0.36603158712387085,
      "learning_rate": 3.2184392770306334e-06,
      "loss": 0.0107,
      "step": 2078380
    },
    {
      "epoch": 3.4013471848549717,
      "grad_norm": 0.08831697702407837,
      "learning_rate": 3.2183733848171166e-06,
      "loss": 0.0106,
      "step": 2078400
    },
    {
      "epoch": 3.401379915293625,
      "grad_norm": 0.20215412974357605,
      "learning_rate": 3.2183074926035997e-06,
      "loss": 0.0122,
      "step": 2078420
    },
    {
      "epoch": 3.401412645732278,
      "grad_norm": 0.8554315567016602,
      "learning_rate": 3.2182416003900825e-06,
      "loss": 0.0152,
      "step": 2078440
    },
    {
      "epoch": 3.4014453761709316,
      "grad_norm": 0.5257251262664795,
      "learning_rate": 3.2181757081765652e-06,
      "loss": 0.0139,
      "step": 2078460
    },
    {
      "epoch": 3.4014781066095847,
      "grad_norm": 0.3050629794597626,
      "learning_rate": 3.218109815963048e-06,
      "loss": 0.0113,
      "step": 2078480
    },
    {
      "epoch": 3.4015108370482383,
      "grad_norm": 0.10937096178531647,
      "learning_rate": 3.2180439237495307e-06,
      "loss": 0.01,
      "step": 2078500
    },
    {
      "epoch": 3.4015435674868915,
      "grad_norm": 0.14782969653606415,
      "learning_rate": 3.217978031536014e-06,
      "loss": 0.0089,
      "step": 2078520
    },
    {
      "epoch": 3.4015762979255446,
      "grad_norm": 0.4596318006515503,
      "learning_rate": 3.2179121393224966e-06,
      "loss": 0.0133,
      "step": 2078540
    },
    {
      "epoch": 3.401609028364198,
      "grad_norm": 0.12359439581632614,
      "learning_rate": 3.2178462471089793e-06,
      "loss": 0.0106,
      "step": 2078560
    },
    {
      "epoch": 3.4016417588028514,
      "grad_norm": 0.7433888912200928,
      "learning_rate": 3.217780354895462e-06,
      "loss": 0.0179,
      "step": 2078580
    },
    {
      "epoch": 3.401674489241505,
      "grad_norm": 0.11357641220092773,
      "learning_rate": 3.2177144626819452e-06,
      "loss": 0.0078,
      "step": 2078600
    },
    {
      "epoch": 3.401707219680158,
      "grad_norm": 0.5427609086036682,
      "learning_rate": 3.217648570468428e-06,
      "loss": 0.0117,
      "step": 2078620
    },
    {
      "epoch": 3.4017399501188113,
      "grad_norm": 0.5554749369621277,
      "learning_rate": 3.2175826782549107e-06,
      "loss": 0.013,
      "step": 2078640
    },
    {
      "epoch": 3.401772680557465,
      "grad_norm": 0.2857932448387146,
      "learning_rate": 3.2175167860413935e-06,
      "loss": 0.0124,
      "step": 2078660
    },
    {
      "epoch": 3.401805410996118,
      "grad_norm": 0.2666654884815216,
      "learning_rate": 3.2174508938278766e-06,
      "loss": 0.0143,
      "step": 2078680
    },
    {
      "epoch": 3.4018381414347716,
      "grad_norm": 0.22766900062561035,
      "learning_rate": 3.2173850016143594e-06,
      "loss": 0.0093,
      "step": 2078700
    },
    {
      "epoch": 3.4018708718734247,
      "grad_norm": 0.3673245906829834,
      "learning_rate": 3.217319109400842e-06,
      "loss": 0.0121,
      "step": 2078720
    },
    {
      "epoch": 3.4019036023120783,
      "grad_norm": 0.6020559072494507,
      "learning_rate": 3.217253217187325e-06,
      "loss": 0.0113,
      "step": 2078740
    },
    {
      "epoch": 3.4019363327507315,
      "grad_norm": 0.5547820925712585,
      "learning_rate": 3.2171873249738084e-06,
      "loss": 0.0121,
      "step": 2078760
    },
    {
      "epoch": 3.4019690631893846,
      "grad_norm": 0.5434008240699768,
      "learning_rate": 3.217121432760291e-06,
      "loss": 0.0109,
      "step": 2078780
    },
    {
      "epoch": 3.4020017936280382,
      "grad_norm": 0.23912622034549713,
      "learning_rate": 3.217055540546774e-06,
      "loss": 0.0123,
      "step": 2078800
    },
    {
      "epoch": 3.4020345240666914,
      "grad_norm": 2.0346102714538574,
      "learning_rate": 3.216989648333257e-06,
      "loss": 0.0204,
      "step": 2078820
    },
    {
      "epoch": 3.402067254505345,
      "grad_norm": 0.20745952427387238,
      "learning_rate": 3.21692375611974e-06,
      "loss": 0.0096,
      "step": 2078840
    },
    {
      "epoch": 3.402099984943998,
      "grad_norm": 0.43985164165496826,
      "learning_rate": 3.2168578639062225e-06,
      "loss": 0.0078,
      "step": 2078860
    },
    {
      "epoch": 3.4021327153826517,
      "grad_norm": 0.6108511686325073,
      "learning_rate": 3.2167919716927053e-06,
      "loss": 0.0098,
      "step": 2078880
    },
    {
      "epoch": 3.402165445821305,
      "grad_norm": 0.1879475861787796,
      "learning_rate": 3.2167260794791885e-06,
      "loss": 0.0105,
      "step": 2078900
    },
    {
      "epoch": 3.402198176259958,
      "grad_norm": 0.24511225521564484,
      "learning_rate": 3.216660187265671e-06,
      "loss": 0.0162,
      "step": 2078920
    },
    {
      "epoch": 3.4022309066986116,
      "grad_norm": 0.19128739833831787,
      "learning_rate": 3.216594295052154e-06,
      "loss": 0.0099,
      "step": 2078940
    },
    {
      "epoch": 3.4022636371372648,
      "grad_norm": 0.14556625485420227,
      "learning_rate": 3.2165284028386367e-06,
      "loss": 0.0095,
      "step": 2078960
    },
    {
      "epoch": 3.4022963675759184,
      "grad_norm": 0.08206077665090561,
      "learning_rate": 3.2164625106251194e-06,
      "loss": 0.0103,
      "step": 2078980
    },
    {
      "epoch": 3.4023290980145715,
      "grad_norm": 0.13924318552017212,
      "learning_rate": 3.2163966184116026e-06,
      "loss": 0.0107,
      "step": 2079000
    },
    {
      "epoch": 3.402361828453225,
      "grad_norm": 0.3288674056529999,
      "learning_rate": 3.2163307261980853e-06,
      "loss": 0.0162,
      "step": 2079020
    },
    {
      "epoch": 3.4023945588918783,
      "grad_norm": 0.27912360429763794,
      "learning_rate": 3.216264833984568e-06,
      "loss": 0.0096,
      "step": 2079040
    },
    {
      "epoch": 3.4024272893305314,
      "grad_norm": 0.20002853870391846,
      "learning_rate": 3.216198941771051e-06,
      "loss": 0.0129,
      "step": 2079060
    },
    {
      "epoch": 3.402460019769185,
      "grad_norm": 0.3162538707256317,
      "learning_rate": 3.216133049557534e-06,
      "loss": 0.0112,
      "step": 2079080
    },
    {
      "epoch": 3.402492750207838,
      "grad_norm": 0.5102848410606384,
      "learning_rate": 3.2160671573440167e-06,
      "loss": 0.0086,
      "step": 2079100
    },
    {
      "epoch": 3.4025254806464917,
      "grad_norm": 0.18434841930866241,
      "learning_rate": 3.2160012651305e-06,
      "loss": 0.0091,
      "step": 2079120
    },
    {
      "epoch": 3.402558211085145,
      "grad_norm": 0.21948422491550446,
      "learning_rate": 3.215935372916983e-06,
      "loss": 0.0114,
      "step": 2079140
    },
    {
      "epoch": 3.4025909415237985,
      "grad_norm": 0.06594570726156235,
      "learning_rate": 3.2158694807034658e-06,
      "loss": 0.0125,
      "step": 2079160
    },
    {
      "epoch": 3.4026236719624516,
      "grad_norm": 0.11064125597476959,
      "learning_rate": 3.2158035884899485e-06,
      "loss": 0.0175,
      "step": 2079180
    },
    {
      "epoch": 3.402656402401105,
      "grad_norm": 0.25937971472740173,
      "learning_rate": 3.2157376962764312e-06,
      "loss": 0.0119,
      "step": 2079200
    },
    {
      "epoch": 3.4026891328397584,
      "grad_norm": 0.1537161022424698,
      "learning_rate": 3.2156718040629144e-06,
      "loss": 0.0076,
      "step": 2079220
    },
    {
      "epoch": 3.4027218632784115,
      "grad_norm": 0.20200350880622864,
      "learning_rate": 3.215605911849397e-06,
      "loss": 0.0067,
      "step": 2079240
    },
    {
      "epoch": 3.402754593717065,
      "grad_norm": 0.3313073515892029,
      "learning_rate": 3.21554001963588e-06,
      "loss": 0.0105,
      "step": 2079260
    },
    {
      "epoch": 3.4027873241557183,
      "grad_norm": 0.5940539240837097,
      "learning_rate": 3.2154741274223626e-06,
      "loss": 0.0128,
      "step": 2079280
    },
    {
      "epoch": 3.402820054594372,
      "grad_norm": 0.2920909523963928,
      "learning_rate": 3.2154082352088458e-06,
      "loss": 0.011,
      "step": 2079300
    },
    {
      "epoch": 3.402852785033025,
      "grad_norm": 0.07325613498687744,
      "learning_rate": 3.2153423429953285e-06,
      "loss": 0.0066,
      "step": 2079320
    },
    {
      "epoch": 3.402885515471678,
      "grad_norm": 0.18228578567504883,
      "learning_rate": 3.2152764507818113e-06,
      "loss": 0.0125,
      "step": 2079340
    },
    {
      "epoch": 3.4029182459103318,
      "grad_norm": 0.6499808430671692,
      "learning_rate": 3.215210558568294e-06,
      "loss": 0.0092,
      "step": 2079360
    },
    {
      "epoch": 3.402950976348985,
      "grad_norm": 0.2964029610157013,
      "learning_rate": 3.2151446663547767e-06,
      "loss": 0.013,
      "step": 2079380
    },
    {
      "epoch": 3.4029837067876385,
      "grad_norm": 0.16229066252708435,
      "learning_rate": 3.21507877414126e-06,
      "loss": 0.0144,
      "step": 2079400
    },
    {
      "epoch": 3.4030164372262917,
      "grad_norm": 0.34640738368034363,
      "learning_rate": 3.2150128819277426e-06,
      "loss": 0.0126,
      "step": 2079420
    },
    {
      "epoch": 3.4030491676649453,
      "grad_norm": 0.2648032009601593,
      "learning_rate": 3.2149469897142254e-06,
      "loss": 0.0103,
      "step": 2079440
    },
    {
      "epoch": 3.4030818981035984,
      "grad_norm": 0.05171770602464676,
      "learning_rate": 3.214881097500709e-06,
      "loss": 0.0103,
      "step": 2079460
    },
    {
      "epoch": 3.4031146285422516,
      "grad_norm": 0.30419719219207764,
      "learning_rate": 3.2148152052871917e-06,
      "loss": 0.0121,
      "step": 2079480
    },
    {
      "epoch": 3.403147358980905,
      "grad_norm": 0.7777289748191833,
      "learning_rate": 3.2147493130736745e-06,
      "loss": 0.0111,
      "step": 2079500
    },
    {
      "epoch": 3.4031800894195583,
      "grad_norm": 0.23290657997131348,
      "learning_rate": 3.214683420860157e-06,
      "loss": 0.0138,
      "step": 2079520
    },
    {
      "epoch": 3.403212819858212,
      "grad_norm": 0.1793307363986969,
      "learning_rate": 3.2146175286466404e-06,
      "loss": 0.0085,
      "step": 2079540
    },
    {
      "epoch": 3.403245550296865,
      "grad_norm": 0.2701496481895447,
      "learning_rate": 3.214551636433123e-06,
      "loss": 0.0167,
      "step": 2079560
    },
    {
      "epoch": 3.4032782807355186,
      "grad_norm": 0.3166114091873169,
      "learning_rate": 3.214485744219606e-06,
      "loss": 0.0097,
      "step": 2079580
    },
    {
      "epoch": 3.403311011174172,
      "grad_norm": 0.24486327171325684,
      "learning_rate": 3.2144198520060886e-06,
      "loss": 0.009,
      "step": 2079600
    },
    {
      "epoch": 3.403343741612825,
      "grad_norm": 0.5347684621810913,
      "learning_rate": 3.2143539597925717e-06,
      "loss": 0.0125,
      "step": 2079620
    },
    {
      "epoch": 3.4033764720514785,
      "grad_norm": 0.09193681925535202,
      "learning_rate": 3.2142880675790545e-06,
      "loss": 0.0138,
      "step": 2079640
    },
    {
      "epoch": 3.4034092024901317,
      "grad_norm": 0.517009437084198,
      "learning_rate": 3.2142221753655372e-06,
      "loss": 0.0139,
      "step": 2079660
    },
    {
      "epoch": 3.4034419329287853,
      "grad_norm": 0.34162071347236633,
      "learning_rate": 3.21415628315202e-06,
      "loss": 0.0095,
      "step": 2079680
    },
    {
      "epoch": 3.4034746633674384,
      "grad_norm": 0.22608046233654022,
      "learning_rate": 3.214090390938503e-06,
      "loss": 0.0102,
      "step": 2079700
    },
    {
      "epoch": 3.403507393806092,
      "grad_norm": 0.10701879113912582,
      "learning_rate": 3.214024498724986e-06,
      "loss": 0.0121,
      "step": 2079720
    },
    {
      "epoch": 3.403540124244745,
      "grad_norm": 0.0924357920885086,
      "learning_rate": 3.2139586065114686e-06,
      "loss": 0.0092,
      "step": 2079740
    },
    {
      "epoch": 3.4035728546833983,
      "grad_norm": 0.23772934079170227,
      "learning_rate": 3.2138927142979513e-06,
      "loss": 0.014,
      "step": 2079760
    },
    {
      "epoch": 3.403605585122052,
      "grad_norm": 0.5035932064056396,
      "learning_rate": 3.2138268220844345e-06,
      "loss": 0.0151,
      "step": 2079780
    },
    {
      "epoch": 3.403638315560705,
      "grad_norm": 0.09005779772996902,
      "learning_rate": 3.2137609298709172e-06,
      "loss": 0.0103,
      "step": 2079800
    },
    {
      "epoch": 3.4036710459993587,
      "grad_norm": 0.5920590758323669,
      "learning_rate": 3.2136950376574004e-06,
      "loss": 0.0145,
      "step": 2079820
    },
    {
      "epoch": 3.403703776438012,
      "grad_norm": 0.18888886272907257,
      "learning_rate": 3.2136291454438836e-06,
      "loss": 0.0111,
      "step": 2079840
    },
    {
      "epoch": 3.4037365068766654,
      "grad_norm": 0.3914894759654999,
      "learning_rate": 3.2135632532303663e-06,
      "loss": 0.01,
      "step": 2079860
    },
    {
      "epoch": 3.4037692373153186,
      "grad_norm": 0.7255795001983643,
      "learning_rate": 3.213497361016849e-06,
      "loss": 0.0138,
      "step": 2079880
    },
    {
      "epoch": 3.4038019677539717,
      "grad_norm": 0.18701399862766266,
      "learning_rate": 3.213431468803332e-06,
      "loss": 0.014,
      "step": 2079900
    },
    {
      "epoch": 3.4038346981926253,
      "grad_norm": 0.15928272902965546,
      "learning_rate": 3.2133655765898145e-06,
      "loss": 0.0106,
      "step": 2079920
    },
    {
      "epoch": 3.4038674286312784,
      "grad_norm": 0.49607062339782715,
      "learning_rate": 3.2132996843762977e-06,
      "loss": 0.011,
      "step": 2079940
    },
    {
      "epoch": 3.403900159069932,
      "grad_norm": 0.07655563950538635,
      "learning_rate": 3.2132337921627804e-06,
      "loss": 0.0084,
      "step": 2079960
    },
    {
      "epoch": 3.403932889508585,
      "grad_norm": 0.15462622046470642,
      "learning_rate": 3.213167899949263e-06,
      "loss": 0.0093,
      "step": 2079980
    },
    {
      "epoch": 3.4039656199472383,
      "grad_norm": 0.5431495904922485,
      "learning_rate": 3.213102007735746e-06,
      "loss": 0.0098,
      "step": 2080000
    },
    {
      "epoch": 3.403998350385892,
      "grad_norm": 0.37671446800231934,
      "learning_rate": 3.213036115522229e-06,
      "loss": 0.0113,
      "step": 2080020
    },
    {
      "epoch": 3.404031080824545,
      "grad_norm": 0.1956898272037506,
      "learning_rate": 3.212970223308712e-06,
      "loss": 0.0134,
      "step": 2080040
    },
    {
      "epoch": 3.4040638112631987,
      "grad_norm": 0.6392951011657715,
      "learning_rate": 3.2129043310951946e-06,
      "loss": 0.0097,
      "step": 2080060
    },
    {
      "epoch": 3.404096541701852,
      "grad_norm": 0.45978260040283203,
      "learning_rate": 3.2128384388816773e-06,
      "loss": 0.0114,
      "step": 2080080
    },
    {
      "epoch": 3.404129272140505,
      "grad_norm": 0.30346909165382385,
      "learning_rate": 3.2127725466681605e-06,
      "loss": 0.0142,
      "step": 2080100
    },
    {
      "epoch": 3.4041620025791586,
      "grad_norm": 0.4991759955883026,
      "learning_rate": 3.212706654454643e-06,
      "loss": 0.0113,
      "step": 2080120
    },
    {
      "epoch": 3.4041947330178117,
      "grad_norm": 0.1309102177619934,
      "learning_rate": 3.212640762241126e-06,
      "loss": 0.0072,
      "step": 2080140
    },
    {
      "epoch": 3.4042274634564653,
      "grad_norm": 0.2384113371372223,
      "learning_rate": 3.2125748700276095e-06,
      "loss": 0.0128,
      "step": 2080160
    },
    {
      "epoch": 3.4042601938951185,
      "grad_norm": 0.11234059929847717,
      "learning_rate": 3.2125089778140923e-06,
      "loss": 0.0144,
      "step": 2080180
    },
    {
      "epoch": 3.404292924333772,
      "grad_norm": 0.26521867513656616,
      "learning_rate": 3.212443085600575e-06,
      "loss": 0.0089,
      "step": 2080200
    },
    {
      "epoch": 3.404325654772425,
      "grad_norm": 0.33447375893592834,
      "learning_rate": 3.2123771933870577e-06,
      "loss": 0.0103,
      "step": 2080220
    },
    {
      "epoch": 3.4043583852110784,
      "grad_norm": 0.1149635761976242,
      "learning_rate": 3.212311301173541e-06,
      "loss": 0.0076,
      "step": 2080240
    },
    {
      "epoch": 3.404391115649732,
      "grad_norm": 0.19304300844669342,
      "learning_rate": 3.2122454089600236e-06,
      "loss": 0.0175,
      "step": 2080260
    },
    {
      "epoch": 3.404423846088385,
      "grad_norm": 0.16872110962867737,
      "learning_rate": 3.2121795167465064e-06,
      "loss": 0.0107,
      "step": 2080280
    },
    {
      "epoch": 3.4044565765270387,
      "grad_norm": 0.3692149221897125,
      "learning_rate": 3.212113624532989e-06,
      "loss": 0.0079,
      "step": 2080300
    },
    {
      "epoch": 3.404489306965692,
      "grad_norm": 0.6816421747207642,
      "learning_rate": 3.2120477323194723e-06,
      "loss": 0.0117,
      "step": 2080320
    },
    {
      "epoch": 3.4045220374043454,
      "grad_norm": 0.3662380278110504,
      "learning_rate": 3.211981840105955e-06,
      "loss": 0.0131,
      "step": 2080340
    },
    {
      "epoch": 3.4045547678429986,
      "grad_norm": 0.37293916940689087,
      "learning_rate": 3.2119159478924378e-06,
      "loss": 0.013,
      "step": 2080360
    },
    {
      "epoch": 3.4045874982816517,
      "grad_norm": 0.1598900407552719,
      "learning_rate": 3.2118500556789205e-06,
      "loss": 0.0081,
      "step": 2080380
    },
    {
      "epoch": 3.4046202287203053,
      "grad_norm": 0.36322444677352905,
      "learning_rate": 3.2117841634654033e-06,
      "loss": 0.0127,
      "step": 2080400
    },
    {
      "epoch": 3.4046529591589585,
      "grad_norm": 0.512641966342926,
      "learning_rate": 3.2117182712518864e-06,
      "loss": 0.01,
      "step": 2080420
    },
    {
      "epoch": 3.404685689597612,
      "grad_norm": 0.8313614726066589,
      "learning_rate": 3.211652379038369e-06,
      "loss": 0.0152,
      "step": 2080440
    },
    {
      "epoch": 3.4047184200362652,
      "grad_norm": 0.10380332916975021,
      "learning_rate": 3.211586486824852e-06,
      "loss": 0.0059,
      "step": 2080460
    },
    {
      "epoch": 3.404751150474919,
      "grad_norm": 0.19652648270130157,
      "learning_rate": 3.2115205946113346e-06,
      "loss": 0.0155,
      "step": 2080480
    },
    {
      "epoch": 3.404783880913572,
      "grad_norm": 0.23699893057346344,
      "learning_rate": 3.211454702397818e-06,
      "loss": 0.0059,
      "step": 2080500
    },
    {
      "epoch": 3.404816611352225,
      "grad_norm": 0.13913610577583313,
      "learning_rate": 3.211388810184301e-06,
      "loss": 0.0067,
      "step": 2080520
    },
    {
      "epoch": 3.4048493417908787,
      "grad_norm": 0.2066536396741867,
      "learning_rate": 3.2113229179707837e-06,
      "loss": 0.0141,
      "step": 2080540
    },
    {
      "epoch": 3.404882072229532,
      "grad_norm": 0.20168186724185944,
      "learning_rate": 3.211257025757267e-06,
      "loss": 0.0142,
      "step": 2080560
    },
    {
      "epoch": 3.4049148026681855,
      "grad_norm": 0.12415686994791031,
      "learning_rate": 3.2111911335437496e-06,
      "loss": 0.0105,
      "step": 2080580
    },
    {
      "epoch": 3.4049475331068386,
      "grad_norm": 0.21406251192092896,
      "learning_rate": 3.2111252413302323e-06,
      "loss": 0.0069,
      "step": 2080600
    },
    {
      "epoch": 3.404980263545492,
      "grad_norm": 0.10883697867393494,
      "learning_rate": 3.211059349116715e-06,
      "loss": 0.0106,
      "step": 2080620
    },
    {
      "epoch": 3.4050129939841454,
      "grad_norm": 0.25306904315948486,
      "learning_rate": 3.2109934569031982e-06,
      "loss": 0.0122,
      "step": 2080640
    },
    {
      "epoch": 3.4050457244227985,
      "grad_norm": 0.5625526905059814,
      "learning_rate": 3.210927564689681e-06,
      "loss": 0.0124,
      "step": 2080660
    },
    {
      "epoch": 3.405078454861452,
      "grad_norm": 0.18203021585941315,
      "learning_rate": 3.2108616724761637e-06,
      "loss": 0.0099,
      "step": 2080680
    },
    {
      "epoch": 3.4051111853001053,
      "grad_norm": 0.19508600234985352,
      "learning_rate": 3.2107957802626465e-06,
      "loss": 0.0059,
      "step": 2080700
    },
    {
      "epoch": 3.405143915738759,
      "grad_norm": 0.14908550679683685,
      "learning_rate": 3.2107298880491296e-06,
      "loss": 0.0091,
      "step": 2080720
    },
    {
      "epoch": 3.405176646177412,
      "grad_norm": 0.5840203762054443,
      "learning_rate": 3.2106639958356124e-06,
      "loss": 0.0107,
      "step": 2080740
    },
    {
      "epoch": 3.4052093766160656,
      "grad_norm": 0.19177718460559845,
      "learning_rate": 3.210598103622095e-06,
      "loss": 0.0094,
      "step": 2080760
    },
    {
      "epoch": 3.4052421070547187,
      "grad_norm": 0.5773086547851562,
      "learning_rate": 3.210532211408578e-06,
      "loss": 0.0104,
      "step": 2080780
    },
    {
      "epoch": 3.405274837493372,
      "grad_norm": 0.2674562633037567,
      "learning_rate": 3.210466319195061e-06,
      "loss": 0.0144,
      "step": 2080800
    },
    {
      "epoch": 3.4053075679320255,
      "grad_norm": 0.3274295926094055,
      "learning_rate": 3.2104004269815437e-06,
      "loss": 0.0112,
      "step": 2080820
    },
    {
      "epoch": 3.4053402983706786,
      "grad_norm": 0.508497416973114,
      "learning_rate": 3.2103345347680265e-06,
      "loss": 0.0129,
      "step": 2080840
    },
    {
      "epoch": 3.4053730288093322,
      "grad_norm": 0.18970701098442078,
      "learning_rate": 3.21026864255451e-06,
      "loss": 0.0097,
      "step": 2080860
    },
    {
      "epoch": 3.4054057592479854,
      "grad_norm": 0.07832669466733932,
      "learning_rate": 3.210202750340993e-06,
      "loss": 0.0132,
      "step": 2080880
    },
    {
      "epoch": 3.405438489686639,
      "grad_norm": 0.6573578119277954,
      "learning_rate": 3.2101368581274756e-06,
      "loss": 0.0125,
      "step": 2080900
    },
    {
      "epoch": 3.405471220125292,
      "grad_norm": 0.9845754504203796,
      "learning_rate": 3.2100709659139583e-06,
      "loss": 0.0137,
      "step": 2080920
    },
    {
      "epoch": 3.4055039505639453,
      "grad_norm": 0.3441806137561798,
      "learning_rate": 3.210005073700441e-06,
      "loss": 0.0123,
      "step": 2080940
    },
    {
      "epoch": 3.405536681002599,
      "grad_norm": 0.5363986492156982,
      "learning_rate": 3.209939181486924e-06,
      "loss": 0.0139,
      "step": 2080960
    },
    {
      "epoch": 3.405569411441252,
      "grad_norm": 0.29740044474601746,
      "learning_rate": 3.209873289273407e-06,
      "loss": 0.0106,
      "step": 2080980
    },
    {
      "epoch": 3.4056021418799056,
      "grad_norm": 0.10716186463832855,
      "learning_rate": 3.2098073970598897e-06,
      "loss": 0.0096,
      "step": 2081000
    },
    {
      "epoch": 3.4056348723185588,
      "grad_norm": 0.055352725088596344,
      "learning_rate": 3.2097415048463724e-06,
      "loss": 0.0114,
      "step": 2081020
    },
    {
      "epoch": 3.4056676027572124,
      "grad_norm": 0.13611912727355957,
      "learning_rate": 3.2096756126328556e-06,
      "loss": 0.009,
      "step": 2081040
    },
    {
      "epoch": 3.4057003331958655,
      "grad_norm": 0.3308050036430359,
      "learning_rate": 3.2096097204193383e-06,
      "loss": 0.0125,
      "step": 2081060
    },
    {
      "epoch": 3.4057330636345187,
      "grad_norm": 0.0955868735909462,
      "learning_rate": 3.209543828205821e-06,
      "loss": 0.0099,
      "step": 2081080
    },
    {
      "epoch": 3.4057657940731723,
      "grad_norm": 0.32138729095458984,
      "learning_rate": 3.209477935992304e-06,
      "loss": 0.0123,
      "step": 2081100
    },
    {
      "epoch": 3.4057985245118254,
      "grad_norm": 0.12171465158462524,
      "learning_rate": 3.209412043778787e-06,
      "loss": 0.0098,
      "step": 2081120
    },
    {
      "epoch": 3.405831254950479,
      "grad_norm": 0.16730670630931854,
      "learning_rate": 3.2093461515652697e-06,
      "loss": 0.0075,
      "step": 2081140
    },
    {
      "epoch": 3.405863985389132,
      "grad_norm": 0.2225390523672104,
      "learning_rate": 3.2092802593517524e-06,
      "loss": 0.0127,
      "step": 2081160
    },
    {
      "epoch": 3.4058967158277857,
      "grad_norm": 0.31807664036750793,
      "learning_rate": 3.209214367138235e-06,
      "loss": 0.0139,
      "step": 2081180
    },
    {
      "epoch": 3.405929446266439,
      "grad_norm": 0.17727456986904144,
      "learning_rate": 3.2091484749247183e-06,
      "loss": 0.0116,
      "step": 2081200
    },
    {
      "epoch": 3.405962176705092,
      "grad_norm": 0.70331209897995,
      "learning_rate": 3.2090825827112015e-06,
      "loss": 0.0152,
      "step": 2081220
    },
    {
      "epoch": 3.4059949071437456,
      "grad_norm": 0.25498631596565247,
      "learning_rate": 3.2090166904976842e-06,
      "loss": 0.0136,
      "step": 2081240
    },
    {
      "epoch": 3.406027637582399,
      "grad_norm": 0.2067098170518875,
      "learning_rate": 3.2089507982841674e-06,
      "loss": 0.0068,
      "step": 2081260
    },
    {
      "epoch": 3.4060603680210524,
      "grad_norm": 0.363831490278244,
      "learning_rate": 3.20888490607065e-06,
      "loss": 0.0136,
      "step": 2081280
    },
    {
      "epoch": 3.4060930984597055,
      "grad_norm": 0.37860894203186035,
      "learning_rate": 3.208819013857133e-06,
      "loss": 0.0101,
      "step": 2081300
    },
    {
      "epoch": 3.406125828898359,
      "grad_norm": 0.06201179698109627,
      "learning_rate": 3.2087531216436156e-06,
      "loss": 0.0125,
      "step": 2081320
    },
    {
      "epoch": 3.4061585593370123,
      "grad_norm": 0.30458274483680725,
      "learning_rate": 3.2086872294300984e-06,
      "loss": 0.0092,
      "step": 2081340
    },
    {
      "epoch": 3.4061912897756654,
      "grad_norm": 0.623576283454895,
      "learning_rate": 3.2086213372165815e-06,
      "loss": 0.0145,
      "step": 2081360
    },
    {
      "epoch": 3.406224020214319,
      "grad_norm": 0.25454866886138916,
      "learning_rate": 3.2085554450030643e-06,
      "loss": 0.0147,
      "step": 2081380
    },
    {
      "epoch": 3.406256750652972,
      "grad_norm": 0.05179028958082199,
      "learning_rate": 3.208489552789547e-06,
      "loss": 0.0137,
      "step": 2081400
    },
    {
      "epoch": 3.4062894810916258,
      "grad_norm": 0.047958556562662125,
      "learning_rate": 3.2084236605760298e-06,
      "loss": 0.0099,
      "step": 2081420
    },
    {
      "epoch": 3.406322211530279,
      "grad_norm": 0.2801964282989502,
      "learning_rate": 3.208357768362513e-06,
      "loss": 0.0157,
      "step": 2081440
    },
    {
      "epoch": 3.4063549419689325,
      "grad_norm": 0.12903913855552673,
      "learning_rate": 3.2082918761489957e-06,
      "loss": 0.0131,
      "step": 2081460
    },
    {
      "epoch": 3.4063876724075857,
      "grad_norm": 0.2388363629579544,
      "learning_rate": 3.2082259839354784e-06,
      "loss": 0.0097,
      "step": 2081480
    },
    {
      "epoch": 3.406420402846239,
      "grad_norm": 0.44326743483543396,
      "learning_rate": 3.208160091721961e-06,
      "loss": 0.0126,
      "step": 2081500
    },
    {
      "epoch": 3.4064531332848924,
      "grad_norm": 0.24720843136310577,
      "learning_rate": 3.2080941995084443e-06,
      "loss": 0.0107,
      "step": 2081520
    },
    {
      "epoch": 3.4064858637235456,
      "grad_norm": 0.13187332451343536,
      "learning_rate": 3.208028307294927e-06,
      "loss": 0.0133,
      "step": 2081540
    },
    {
      "epoch": 3.406518594162199,
      "grad_norm": 0.12122943252325058,
      "learning_rate": 3.2079624150814098e-06,
      "loss": 0.0096,
      "step": 2081560
    },
    {
      "epoch": 3.4065513246008523,
      "grad_norm": 0.16618260741233826,
      "learning_rate": 3.2078965228678934e-06,
      "loss": 0.0154,
      "step": 2081580
    },
    {
      "epoch": 3.4065840550395055,
      "grad_norm": 0.3859964907169342,
      "learning_rate": 3.207830630654376e-06,
      "loss": 0.0122,
      "step": 2081600
    },
    {
      "epoch": 3.406616785478159,
      "grad_norm": 0.0734572634100914,
      "learning_rate": 3.207764738440859e-06,
      "loss": 0.0081,
      "step": 2081620
    },
    {
      "epoch": 3.406649515916812,
      "grad_norm": 0.1214371919631958,
      "learning_rate": 3.2076988462273416e-06,
      "loss": 0.0127,
      "step": 2081640
    },
    {
      "epoch": 3.406682246355466,
      "grad_norm": 0.19167454540729523,
      "learning_rate": 3.2076329540138247e-06,
      "loss": 0.0136,
      "step": 2081660
    },
    {
      "epoch": 3.406714976794119,
      "grad_norm": 0.1660277098417282,
      "learning_rate": 3.2075670618003075e-06,
      "loss": 0.0079,
      "step": 2081680
    },
    {
      "epoch": 3.406747707232772,
      "grad_norm": 0.30711519718170166,
      "learning_rate": 3.2075011695867902e-06,
      "loss": 0.0107,
      "step": 2081700
    },
    {
      "epoch": 3.4067804376714257,
      "grad_norm": 0.18495506048202515,
      "learning_rate": 3.207435277373273e-06,
      "loss": 0.0095,
      "step": 2081720
    },
    {
      "epoch": 3.406813168110079,
      "grad_norm": 0.25619229674339294,
      "learning_rate": 3.207369385159756e-06,
      "loss": 0.0124,
      "step": 2081740
    },
    {
      "epoch": 3.4068458985487324,
      "grad_norm": 0.05066158249974251,
      "learning_rate": 3.207303492946239e-06,
      "loss": 0.012,
      "step": 2081760
    },
    {
      "epoch": 3.4068786289873856,
      "grad_norm": 0.2724628746509552,
      "learning_rate": 3.2072376007327216e-06,
      "loss": 0.0098,
      "step": 2081780
    },
    {
      "epoch": 3.406911359426039,
      "grad_norm": 0.18373394012451172,
      "learning_rate": 3.2071717085192043e-06,
      "loss": 0.0122,
      "step": 2081800
    },
    {
      "epoch": 3.4069440898646923,
      "grad_norm": 0.20346058905124664,
      "learning_rate": 3.207105816305687e-06,
      "loss": 0.0118,
      "step": 2081820
    },
    {
      "epoch": 3.4069768203033455,
      "grad_norm": 0.057882532477378845,
      "learning_rate": 3.2070399240921703e-06,
      "loss": 0.007,
      "step": 2081840
    },
    {
      "epoch": 3.407009550741999,
      "grad_norm": 0.24676096439361572,
      "learning_rate": 3.206974031878653e-06,
      "loss": 0.0111,
      "step": 2081860
    },
    {
      "epoch": 3.407042281180652,
      "grad_norm": 0.1103789210319519,
      "learning_rate": 3.2069081396651357e-06,
      "loss": 0.0102,
      "step": 2081880
    },
    {
      "epoch": 3.407075011619306,
      "grad_norm": 0.3427491784095764,
      "learning_rate": 3.2068422474516185e-06,
      "loss": 0.0095,
      "step": 2081900
    },
    {
      "epoch": 3.407107742057959,
      "grad_norm": 0.16976603865623474,
      "learning_rate": 3.206776355238102e-06,
      "loss": 0.0129,
      "step": 2081920
    },
    {
      "epoch": 3.4071404724966126,
      "grad_norm": 0.320738822221756,
      "learning_rate": 3.206710463024585e-06,
      "loss": 0.0108,
      "step": 2081940
    },
    {
      "epoch": 3.4071732029352657,
      "grad_norm": 0.4297176003456116,
      "learning_rate": 3.2066445708110675e-06,
      "loss": 0.0146,
      "step": 2081960
    },
    {
      "epoch": 3.407205933373919,
      "grad_norm": 0.5184755325317383,
      "learning_rate": 3.2065786785975507e-06,
      "loss": 0.0083,
      "step": 2081980
    },
    {
      "epoch": 3.4072386638125725,
      "grad_norm": 0.20511797070503235,
      "learning_rate": 3.2065127863840334e-06,
      "loss": 0.0084,
      "step": 2082000
    },
    {
      "epoch": 3.4072713942512256,
      "grad_norm": 0.35136327147483826,
      "learning_rate": 3.206446894170516e-06,
      "loss": 0.0083,
      "step": 2082020
    },
    {
      "epoch": 3.407304124689879,
      "grad_norm": 0.13591508567333221,
      "learning_rate": 3.206381001956999e-06,
      "loss": 0.015,
      "step": 2082040
    },
    {
      "epoch": 3.4073368551285323,
      "grad_norm": 0.8670207858085632,
      "learning_rate": 3.206315109743482e-06,
      "loss": 0.0176,
      "step": 2082060
    },
    {
      "epoch": 3.407369585567186,
      "grad_norm": 0.18804849684238434,
      "learning_rate": 3.206249217529965e-06,
      "loss": 0.0089,
      "step": 2082080
    },
    {
      "epoch": 3.407402316005839,
      "grad_norm": 0.6875250935554504,
      "learning_rate": 3.2061833253164476e-06,
      "loss": 0.011,
      "step": 2082100
    },
    {
      "epoch": 3.4074350464444922,
      "grad_norm": 0.2724757492542267,
      "learning_rate": 3.2061174331029303e-06,
      "loss": 0.0108,
      "step": 2082120
    },
    {
      "epoch": 3.407467776883146,
      "grad_norm": 0.2317933887243271,
      "learning_rate": 3.2060515408894135e-06,
      "loss": 0.0153,
      "step": 2082140
    },
    {
      "epoch": 3.407500507321799,
      "grad_norm": 0.2734721899032593,
      "learning_rate": 3.205985648675896e-06,
      "loss": 0.0104,
      "step": 2082160
    },
    {
      "epoch": 3.4075332377604526,
      "grad_norm": 0.27693238854408264,
      "learning_rate": 3.205919756462379e-06,
      "loss": 0.0113,
      "step": 2082180
    },
    {
      "epoch": 3.4075659681991057,
      "grad_norm": 0.5963737964630127,
      "learning_rate": 3.2058538642488617e-06,
      "loss": 0.0126,
      "step": 2082200
    },
    {
      "epoch": 3.4075986986377593,
      "grad_norm": 0.4875824451446533,
      "learning_rate": 3.205787972035345e-06,
      "loss": 0.0103,
      "step": 2082220
    },
    {
      "epoch": 3.4076314290764125,
      "grad_norm": 0.46702566742897034,
      "learning_rate": 3.2057220798218276e-06,
      "loss": 0.0129,
      "step": 2082240
    },
    {
      "epoch": 3.4076641595150656,
      "grad_norm": 0.7539532780647278,
      "learning_rate": 3.2056561876083103e-06,
      "loss": 0.0155,
      "step": 2082260
    },
    {
      "epoch": 3.407696889953719,
      "grad_norm": 0.10565631836652756,
      "learning_rate": 3.205590295394794e-06,
      "loss": 0.0106,
      "step": 2082280
    },
    {
      "epoch": 3.4077296203923724,
      "grad_norm": 0.38894811272621155,
      "learning_rate": 3.2055244031812767e-06,
      "loss": 0.022,
      "step": 2082300
    },
    {
      "epoch": 3.407762350831026,
      "grad_norm": 0.181467205286026,
      "learning_rate": 3.2054585109677594e-06,
      "loss": 0.0107,
      "step": 2082320
    },
    {
      "epoch": 3.407795081269679,
      "grad_norm": 0.20243868231773376,
      "learning_rate": 3.205392618754242e-06,
      "loss": 0.0125,
      "step": 2082340
    },
    {
      "epoch": 3.4078278117083327,
      "grad_norm": 0.29751405119895935,
      "learning_rate": 3.205326726540725e-06,
      "loss": 0.01,
      "step": 2082360
    },
    {
      "epoch": 3.407860542146986,
      "grad_norm": 0.5196266770362854,
      "learning_rate": 3.205260834327208e-06,
      "loss": 0.0118,
      "step": 2082380
    },
    {
      "epoch": 3.407893272585639,
      "grad_norm": 0.39292773604393005,
      "learning_rate": 3.2051949421136908e-06,
      "loss": 0.008,
      "step": 2082400
    },
    {
      "epoch": 3.4079260030242926,
      "grad_norm": 0.3439538776874542,
      "learning_rate": 3.2051290499001735e-06,
      "loss": 0.008,
      "step": 2082420
    },
    {
      "epoch": 3.4079587334629458,
      "grad_norm": 0.21111305058002472,
      "learning_rate": 3.2050631576866563e-06,
      "loss": 0.0123,
      "step": 2082440
    },
    {
      "epoch": 3.4079914639015993,
      "grad_norm": 0.35401448607444763,
      "learning_rate": 3.2049972654731394e-06,
      "loss": 0.0117,
      "step": 2082460
    },
    {
      "epoch": 3.4080241943402525,
      "grad_norm": 0.428798645734787,
      "learning_rate": 3.204931373259622e-06,
      "loss": 0.0098,
      "step": 2082480
    },
    {
      "epoch": 3.408056924778906,
      "grad_norm": 0.12295008450746536,
      "learning_rate": 3.204865481046105e-06,
      "loss": 0.0073,
      "step": 2082500
    },
    {
      "epoch": 3.4080896552175592,
      "grad_norm": 0.06799928843975067,
      "learning_rate": 3.2047995888325876e-06,
      "loss": 0.0076,
      "step": 2082520
    },
    {
      "epoch": 3.4081223856562124,
      "grad_norm": 0.5536022186279297,
      "learning_rate": 3.204733696619071e-06,
      "loss": 0.0099,
      "step": 2082540
    },
    {
      "epoch": 3.408155116094866,
      "grad_norm": 0.18630798161029816,
      "learning_rate": 3.2046678044055535e-06,
      "loss": 0.0123,
      "step": 2082560
    },
    {
      "epoch": 3.408187846533519,
      "grad_norm": 0.1029956266283989,
      "learning_rate": 3.2046019121920363e-06,
      "loss": 0.0071,
      "step": 2082580
    },
    {
      "epoch": 3.4082205769721727,
      "grad_norm": 0.17854775488376617,
      "learning_rate": 3.204536019978519e-06,
      "loss": 0.0134,
      "step": 2082600
    },
    {
      "epoch": 3.408253307410826,
      "grad_norm": 0.12361956387758255,
      "learning_rate": 3.2044701277650026e-06,
      "loss": 0.0067,
      "step": 2082620
    },
    {
      "epoch": 3.4082860378494795,
      "grad_norm": 0.08667946606874466,
      "learning_rate": 3.2044042355514853e-06,
      "loss": 0.0108,
      "step": 2082640
    },
    {
      "epoch": 3.4083187682881326,
      "grad_norm": 0.1416768580675125,
      "learning_rate": 3.204338343337968e-06,
      "loss": 0.0109,
      "step": 2082660
    },
    {
      "epoch": 3.4083514987267858,
      "grad_norm": 0.08304344117641449,
      "learning_rate": 3.2042724511244513e-06,
      "loss": 0.0109,
      "step": 2082680
    },
    {
      "epoch": 3.4083842291654394,
      "grad_norm": 0.1144978404045105,
      "learning_rate": 3.204206558910934e-06,
      "loss": 0.0116,
      "step": 2082700
    },
    {
      "epoch": 3.4084169596040925,
      "grad_norm": 0.8676508069038391,
      "learning_rate": 3.2041406666974167e-06,
      "loss": 0.014,
      "step": 2082720
    },
    {
      "epoch": 3.408449690042746,
      "grad_norm": 0.26092854142189026,
      "learning_rate": 3.2040747744838995e-06,
      "loss": 0.0105,
      "step": 2082740
    },
    {
      "epoch": 3.4084824204813993,
      "grad_norm": 0.12315050512552261,
      "learning_rate": 3.2040088822703826e-06,
      "loss": 0.0141,
      "step": 2082760
    },
    {
      "epoch": 3.408515150920053,
      "grad_norm": 0.2677067816257477,
      "learning_rate": 3.2039429900568654e-06,
      "loss": 0.0101,
      "step": 2082780
    },
    {
      "epoch": 3.408547881358706,
      "grad_norm": 0.12028609216213226,
      "learning_rate": 3.203877097843348e-06,
      "loss": 0.0121,
      "step": 2082800
    },
    {
      "epoch": 3.408580611797359,
      "grad_norm": 0.5673835873603821,
      "learning_rate": 3.203811205629831e-06,
      "loss": 0.0142,
      "step": 2082820
    },
    {
      "epoch": 3.4086133422360128,
      "grad_norm": 0.15205630660057068,
      "learning_rate": 3.2037453134163136e-06,
      "loss": 0.0102,
      "step": 2082840
    },
    {
      "epoch": 3.408646072674666,
      "grad_norm": 0.5565443634986877,
      "learning_rate": 3.2036794212027968e-06,
      "loss": 0.0162,
      "step": 2082860
    },
    {
      "epoch": 3.4086788031133195,
      "grad_norm": 0.22979407012462616,
      "learning_rate": 3.2036135289892795e-06,
      "loss": 0.0103,
      "step": 2082880
    },
    {
      "epoch": 3.4087115335519727,
      "grad_norm": 0.20845980942249298,
      "learning_rate": 3.2035476367757622e-06,
      "loss": 0.0117,
      "step": 2082900
    },
    {
      "epoch": 3.4087442639906262,
      "grad_norm": 0.16160860657691956,
      "learning_rate": 3.203481744562245e-06,
      "loss": 0.0077,
      "step": 2082920
    },
    {
      "epoch": 3.4087769944292794,
      "grad_norm": 0.08222585171461105,
      "learning_rate": 3.203415852348728e-06,
      "loss": 0.0121,
      "step": 2082940
    },
    {
      "epoch": 3.4088097248679325,
      "grad_norm": 0.2942635715007782,
      "learning_rate": 3.203349960135211e-06,
      "loss": 0.0134,
      "step": 2082960
    },
    {
      "epoch": 3.408842455306586,
      "grad_norm": 0.40289488434791565,
      "learning_rate": 3.203284067921694e-06,
      "loss": 0.0098,
      "step": 2082980
    },
    {
      "epoch": 3.4088751857452393,
      "grad_norm": 0.3452332317829132,
      "learning_rate": 3.203218175708177e-06,
      "loss": 0.0107,
      "step": 2083000
    },
    {
      "epoch": 3.408907916183893,
      "grad_norm": 0.24298352003097534,
      "learning_rate": 3.20315228349466e-06,
      "loss": 0.0087,
      "step": 2083020
    },
    {
      "epoch": 3.408940646622546,
      "grad_norm": 0.1811603158712387,
      "learning_rate": 3.2030863912811427e-06,
      "loss": 0.0118,
      "step": 2083040
    },
    {
      "epoch": 3.408973377061199,
      "grad_norm": 0.6209262013435364,
      "learning_rate": 3.2030204990676254e-06,
      "loss": 0.0076,
      "step": 2083060
    },
    {
      "epoch": 3.4090061074998528,
      "grad_norm": 0.23310959339141846,
      "learning_rate": 3.2029546068541086e-06,
      "loss": 0.0126,
      "step": 2083080
    },
    {
      "epoch": 3.409038837938506,
      "grad_norm": 0.308321088552475,
      "learning_rate": 3.2028887146405913e-06,
      "loss": 0.0124,
      "step": 2083100
    },
    {
      "epoch": 3.4090715683771595,
      "grad_norm": 0.21270543336868286,
      "learning_rate": 3.202822822427074e-06,
      "loss": 0.0138,
      "step": 2083120
    },
    {
      "epoch": 3.4091042988158127,
      "grad_norm": 0.27087944746017456,
      "learning_rate": 3.202756930213557e-06,
      "loss": 0.0064,
      "step": 2083140
    },
    {
      "epoch": 3.409137029254466,
      "grad_norm": 0.24093753099441528,
      "learning_rate": 3.20269103800004e-06,
      "loss": 0.0109,
      "step": 2083160
    },
    {
      "epoch": 3.4091697596931194,
      "grad_norm": 0.5521591901779175,
      "learning_rate": 3.2026251457865227e-06,
      "loss": 0.0091,
      "step": 2083180
    },
    {
      "epoch": 3.4092024901317726,
      "grad_norm": 0.30088040232658386,
      "learning_rate": 3.2025592535730054e-06,
      "loss": 0.0089,
      "step": 2083200
    },
    {
      "epoch": 3.409235220570426,
      "grad_norm": 0.2625180780887604,
      "learning_rate": 3.202493361359488e-06,
      "loss": 0.0127,
      "step": 2083220
    },
    {
      "epoch": 3.4092679510090793,
      "grad_norm": 0.12599822878837585,
      "learning_rate": 3.202427469145971e-06,
      "loss": 0.0123,
      "step": 2083240
    },
    {
      "epoch": 3.409300681447733,
      "grad_norm": 0.1390727460384369,
      "learning_rate": 3.202361576932454e-06,
      "loss": 0.014,
      "step": 2083260
    },
    {
      "epoch": 3.409333411886386,
      "grad_norm": 0.25854694843292236,
      "learning_rate": 3.202295684718937e-06,
      "loss": 0.0174,
      "step": 2083280
    },
    {
      "epoch": 3.409366142325039,
      "grad_norm": 0.1766285002231598,
      "learning_rate": 3.2022297925054196e-06,
      "loss": 0.0077,
      "step": 2083300
    },
    {
      "epoch": 3.409398872763693,
      "grad_norm": 0.2100289911031723,
      "learning_rate": 3.2021639002919023e-06,
      "loss": 0.0068,
      "step": 2083320
    },
    {
      "epoch": 3.409431603202346,
      "grad_norm": 0.1794101893901825,
      "learning_rate": 3.202098008078386e-06,
      "loss": 0.007,
      "step": 2083340
    },
    {
      "epoch": 3.4094643336409995,
      "grad_norm": 0.8564599752426147,
      "learning_rate": 3.2020321158648686e-06,
      "loss": 0.0099,
      "step": 2083360
    },
    {
      "epoch": 3.4094970640796527,
      "grad_norm": 0.2784714698791504,
      "learning_rate": 3.2019662236513514e-06,
      "loss": 0.0099,
      "step": 2083380
    },
    {
      "epoch": 3.4095297945183063,
      "grad_norm": 0.29180434346199036,
      "learning_rate": 3.2019003314378345e-06,
      "loss": 0.0127,
      "step": 2083400
    },
    {
      "epoch": 3.4095625249569594,
      "grad_norm": 0.20538167655467987,
      "learning_rate": 3.2018344392243173e-06,
      "loss": 0.0095,
      "step": 2083420
    },
    {
      "epoch": 3.4095952553956126,
      "grad_norm": 0.876622200012207,
      "learning_rate": 3.2017685470108e-06,
      "loss": 0.0101,
      "step": 2083440
    },
    {
      "epoch": 3.409627985834266,
      "grad_norm": 0.3042515516281128,
      "learning_rate": 3.2017026547972828e-06,
      "loss": 0.011,
      "step": 2083460
    },
    {
      "epoch": 3.4096607162729193,
      "grad_norm": 0.2849982678890228,
      "learning_rate": 3.201636762583766e-06,
      "loss": 0.0107,
      "step": 2083480
    },
    {
      "epoch": 3.409693446711573,
      "grad_norm": 0.0643543004989624,
      "learning_rate": 3.2015708703702487e-06,
      "loss": 0.0119,
      "step": 2083500
    },
    {
      "epoch": 3.409726177150226,
      "grad_norm": 0.22418290376663208,
      "learning_rate": 3.2015049781567314e-06,
      "loss": 0.0135,
      "step": 2083520
    },
    {
      "epoch": 3.4097589075888797,
      "grad_norm": 0.17644287645816803,
      "learning_rate": 3.201439085943214e-06,
      "loss": 0.0087,
      "step": 2083540
    },
    {
      "epoch": 3.409791638027533,
      "grad_norm": 0.2777453660964966,
      "learning_rate": 3.2013731937296973e-06,
      "loss": 0.0091,
      "step": 2083560
    },
    {
      "epoch": 3.409824368466186,
      "grad_norm": 0.19866891205310822,
      "learning_rate": 3.20130730151618e-06,
      "loss": 0.011,
      "step": 2083580
    },
    {
      "epoch": 3.4098570989048396,
      "grad_norm": 0.45882168412208557,
      "learning_rate": 3.2012414093026628e-06,
      "loss": 0.0162,
      "step": 2083600
    },
    {
      "epoch": 3.4098898293434927,
      "grad_norm": 0.21817080676555634,
      "learning_rate": 3.2011755170891455e-06,
      "loss": 0.0073,
      "step": 2083620
    },
    {
      "epoch": 3.4099225597821463,
      "grad_norm": 0.17076806724071503,
      "learning_rate": 3.2011096248756287e-06,
      "loss": 0.0113,
      "step": 2083640
    },
    {
      "epoch": 3.4099552902207995,
      "grad_norm": 0.20510335266590118,
      "learning_rate": 3.2010437326621114e-06,
      "loss": 0.009,
      "step": 2083660
    },
    {
      "epoch": 3.409988020659453,
      "grad_norm": 0.051528893411159515,
      "learning_rate": 3.2009778404485946e-06,
      "loss": 0.0085,
      "step": 2083680
    },
    {
      "epoch": 3.410020751098106,
      "grad_norm": 0.174951434135437,
      "learning_rate": 3.2009119482350778e-06,
      "loss": 0.0129,
      "step": 2083700
    },
    {
      "epoch": 3.4100534815367594,
      "grad_norm": 0.2574903070926666,
      "learning_rate": 3.2008460560215605e-06,
      "loss": 0.0103,
      "step": 2083720
    },
    {
      "epoch": 3.410086211975413,
      "grad_norm": 0.7280523180961609,
      "learning_rate": 3.2007801638080432e-06,
      "loss": 0.0079,
      "step": 2083740
    },
    {
      "epoch": 3.410118942414066,
      "grad_norm": 0.1427372395992279,
      "learning_rate": 3.200714271594526e-06,
      "loss": 0.0128,
      "step": 2083760
    },
    {
      "epoch": 3.4101516728527197,
      "grad_norm": 0.1787782609462738,
      "learning_rate": 3.2006483793810087e-06,
      "loss": 0.0123,
      "step": 2083780
    },
    {
      "epoch": 3.410184403291373,
      "grad_norm": 0.47416213154792786,
      "learning_rate": 3.200582487167492e-06,
      "loss": 0.0145,
      "step": 2083800
    },
    {
      "epoch": 3.4102171337300264,
      "grad_norm": 0.059378426522016525,
      "learning_rate": 3.2005165949539746e-06,
      "loss": 0.0078,
      "step": 2083820
    },
    {
      "epoch": 3.4102498641686796,
      "grad_norm": 0.7421557307243347,
      "learning_rate": 3.2004507027404574e-06,
      "loss": 0.0163,
      "step": 2083840
    },
    {
      "epoch": 3.4102825946073327,
      "grad_norm": 0.25952064990997314,
      "learning_rate": 3.20038481052694e-06,
      "loss": 0.0109,
      "step": 2083860
    },
    {
      "epoch": 3.4103153250459863,
      "grad_norm": 0.36556097865104675,
      "learning_rate": 3.2003189183134233e-06,
      "loss": 0.0093,
      "step": 2083880
    },
    {
      "epoch": 3.4103480554846395,
      "grad_norm": 0.5905405879020691,
      "learning_rate": 3.200253026099906e-06,
      "loss": 0.0126,
      "step": 2083900
    },
    {
      "epoch": 3.410380785923293,
      "grad_norm": 0.15749487280845642,
      "learning_rate": 3.2001871338863887e-06,
      "loss": 0.0098,
      "step": 2083920
    },
    {
      "epoch": 3.4104135163619462,
      "grad_norm": 0.13904491066932678,
      "learning_rate": 3.2001212416728715e-06,
      "loss": 0.0127,
      "step": 2083940
    },
    {
      "epoch": 3.4104462468006,
      "grad_norm": 0.4773598611354828,
      "learning_rate": 3.2000553494593546e-06,
      "loss": 0.0098,
      "step": 2083960
    },
    {
      "epoch": 3.410478977239253,
      "grad_norm": 0.3559987246990204,
      "learning_rate": 3.1999894572458374e-06,
      "loss": 0.0097,
      "step": 2083980
    },
    {
      "epoch": 3.410511707677906,
      "grad_norm": 0.495970219373703,
      "learning_rate": 3.19992356503232e-06,
      "loss": 0.0115,
      "step": 2084000
    },
    {
      "epoch": 3.4105444381165597,
      "grad_norm": 0.19023831188678741,
      "learning_rate": 3.199857672818803e-06,
      "loss": 0.0148,
      "step": 2084020
    },
    {
      "epoch": 3.410577168555213,
      "grad_norm": 0.5635074377059937,
      "learning_rate": 3.1997917806052864e-06,
      "loss": 0.0151,
      "step": 2084040
    },
    {
      "epoch": 3.4106098989938665,
      "grad_norm": 0.3402353823184967,
      "learning_rate": 3.199725888391769e-06,
      "loss": 0.0122,
      "step": 2084060
    },
    {
      "epoch": 3.4106426294325196,
      "grad_norm": 0.15502572059631348,
      "learning_rate": 3.199659996178252e-06,
      "loss": 0.0117,
      "step": 2084080
    },
    {
      "epoch": 3.410675359871173,
      "grad_norm": 0.1334853321313858,
      "learning_rate": 3.199594103964735e-06,
      "loss": 0.0094,
      "step": 2084100
    },
    {
      "epoch": 3.4107080903098264,
      "grad_norm": 0.47589048743247986,
      "learning_rate": 3.199528211751218e-06,
      "loss": 0.014,
      "step": 2084120
    },
    {
      "epoch": 3.4107408207484795,
      "grad_norm": 0.1537708193063736,
      "learning_rate": 3.1994623195377006e-06,
      "loss": 0.0113,
      "step": 2084140
    },
    {
      "epoch": 3.410773551187133,
      "grad_norm": 0.3557424247264862,
      "learning_rate": 3.1993964273241833e-06,
      "loss": 0.0146,
      "step": 2084160
    },
    {
      "epoch": 3.4108062816257863,
      "grad_norm": 0.21635551750659943,
      "learning_rate": 3.1993305351106665e-06,
      "loss": 0.0133,
      "step": 2084180
    },
    {
      "epoch": 3.41083901206444,
      "grad_norm": 0.4328644871711731,
      "learning_rate": 3.1992646428971492e-06,
      "loss": 0.0104,
      "step": 2084200
    },
    {
      "epoch": 3.410871742503093,
      "grad_norm": 0.29361602663993835,
      "learning_rate": 3.199198750683632e-06,
      "loss": 0.0135,
      "step": 2084220
    },
    {
      "epoch": 3.4109044729417466,
      "grad_norm": 0.19019447267055511,
      "learning_rate": 3.1991328584701147e-06,
      "loss": 0.0075,
      "step": 2084240
    },
    {
      "epoch": 3.4109372033803997,
      "grad_norm": 0.17471995949745178,
      "learning_rate": 3.1990669662565974e-06,
      "loss": 0.0105,
      "step": 2084260
    },
    {
      "epoch": 3.410969933819053,
      "grad_norm": 0.32914790511131287,
      "learning_rate": 3.1990010740430806e-06,
      "loss": 0.0142,
      "step": 2084280
    },
    {
      "epoch": 3.4110026642577065,
      "grad_norm": 0.623434841632843,
      "learning_rate": 3.1989351818295633e-06,
      "loss": 0.0114,
      "step": 2084300
    },
    {
      "epoch": 3.4110353946963596,
      "grad_norm": 0.13581952452659607,
      "learning_rate": 3.198869289616046e-06,
      "loss": 0.011,
      "step": 2084320
    },
    {
      "epoch": 3.4110681251350132,
      "grad_norm": 0.0655805915594101,
      "learning_rate": 3.198803397402529e-06,
      "loss": 0.0183,
      "step": 2084340
    },
    {
      "epoch": 3.4111008555736664,
      "grad_norm": 0.20402193069458008,
      "learning_rate": 3.198737505189012e-06,
      "loss": 0.0074,
      "step": 2084360
    },
    {
      "epoch": 3.41113358601232,
      "grad_norm": 0.2279091775417328,
      "learning_rate": 3.198671612975495e-06,
      "loss": 0.0145,
      "step": 2084380
    },
    {
      "epoch": 3.411166316450973,
      "grad_norm": 0.6777601838111877,
      "learning_rate": 3.198605720761978e-06,
      "loss": 0.0141,
      "step": 2084400
    },
    {
      "epoch": 3.4111990468896263,
      "grad_norm": 0.1445198506116867,
      "learning_rate": 3.198539828548461e-06,
      "loss": 0.0132,
      "step": 2084420
    },
    {
      "epoch": 3.41123177732828,
      "grad_norm": 0.38424938917160034,
      "learning_rate": 3.1984739363349438e-06,
      "loss": 0.0108,
      "step": 2084440
    },
    {
      "epoch": 3.411264507766933,
      "grad_norm": 0.2912062704563141,
      "learning_rate": 3.1984080441214265e-06,
      "loss": 0.0117,
      "step": 2084460
    },
    {
      "epoch": 3.4112972382055866,
      "grad_norm": 0.2491929531097412,
      "learning_rate": 3.1983421519079093e-06,
      "loss": 0.0058,
      "step": 2084480
    },
    {
      "epoch": 3.4113299686442398,
      "grad_norm": 0.6305120587348938,
      "learning_rate": 3.1982762596943924e-06,
      "loss": 0.0145,
      "step": 2084500
    },
    {
      "epoch": 3.4113626990828934,
      "grad_norm": 0.19921916723251343,
      "learning_rate": 3.198210367480875e-06,
      "loss": 0.0116,
      "step": 2084520
    },
    {
      "epoch": 3.4113954295215465,
      "grad_norm": 0.20860621333122253,
      "learning_rate": 3.198144475267358e-06,
      "loss": 0.0115,
      "step": 2084540
    },
    {
      "epoch": 3.4114281599601997,
      "grad_norm": 0.06797324866056442,
      "learning_rate": 3.1980785830538406e-06,
      "loss": 0.0125,
      "step": 2084560
    },
    {
      "epoch": 3.4114608903988533,
      "grad_norm": 0.37256085872650146,
      "learning_rate": 3.198012690840324e-06,
      "loss": 0.0133,
      "step": 2084580
    },
    {
      "epoch": 3.4114936208375064,
      "grad_norm": 0.06173296272754669,
      "learning_rate": 3.1979467986268065e-06,
      "loss": 0.0099,
      "step": 2084600
    },
    {
      "epoch": 3.41152635127616,
      "grad_norm": 0.3971339464187622,
      "learning_rate": 3.1978809064132893e-06,
      "loss": 0.0129,
      "step": 2084620
    },
    {
      "epoch": 3.411559081714813,
      "grad_norm": 0.18537747859954834,
      "learning_rate": 3.197815014199772e-06,
      "loss": 0.0095,
      "step": 2084640
    },
    {
      "epoch": 3.4115918121534663,
      "grad_norm": 0.20143379271030426,
      "learning_rate": 3.1977491219862548e-06,
      "loss": 0.0079,
      "step": 2084660
    },
    {
      "epoch": 3.41162454259212,
      "grad_norm": 0.26012927293777466,
      "learning_rate": 3.197683229772738e-06,
      "loss": 0.0099,
      "step": 2084680
    },
    {
      "epoch": 3.411657273030773,
      "grad_norm": 0.4743388891220093,
      "learning_rate": 3.1976173375592207e-06,
      "loss": 0.0174,
      "step": 2084700
    },
    {
      "epoch": 3.4116900034694266,
      "grad_norm": 0.2706102728843689,
      "learning_rate": 3.1975514453457034e-06,
      "loss": 0.0073,
      "step": 2084720
    },
    {
      "epoch": 3.41172273390808,
      "grad_norm": 0.07018294930458069,
      "learning_rate": 3.197485553132187e-06,
      "loss": 0.0122,
      "step": 2084740
    },
    {
      "epoch": 3.411755464346733,
      "grad_norm": 0.1332695484161377,
      "learning_rate": 3.1974196609186697e-06,
      "loss": 0.0086,
      "step": 2084760
    },
    {
      "epoch": 3.4117881947853865,
      "grad_norm": 1.0669879913330078,
      "learning_rate": 3.1973537687051525e-06,
      "loss": 0.0123,
      "step": 2084780
    },
    {
      "epoch": 3.4118209252240397,
      "grad_norm": 0.18854887783527374,
      "learning_rate": 3.1972878764916352e-06,
      "loss": 0.0114,
      "step": 2084800
    },
    {
      "epoch": 3.4118536556626933,
      "grad_norm": 0.31925567984580994,
      "learning_rate": 3.1972219842781184e-06,
      "loss": 0.0064,
      "step": 2084820
    },
    {
      "epoch": 3.4118863861013464,
      "grad_norm": 0.1668243259191513,
      "learning_rate": 3.197156092064601e-06,
      "loss": 0.0086,
      "step": 2084840
    },
    {
      "epoch": 3.41191911654,
      "grad_norm": 0.2909846603870392,
      "learning_rate": 3.197090199851084e-06,
      "loss": 0.0149,
      "step": 2084860
    },
    {
      "epoch": 3.411951846978653,
      "grad_norm": 0.20112468302249908,
      "learning_rate": 3.1970243076375666e-06,
      "loss": 0.0118,
      "step": 2084880
    },
    {
      "epoch": 3.4119845774173063,
      "grad_norm": 0.5843186974525452,
      "learning_rate": 3.1969584154240498e-06,
      "loss": 0.0091,
      "step": 2084900
    },
    {
      "epoch": 3.41201730785596,
      "grad_norm": 1.0283421277999878,
      "learning_rate": 3.1968925232105325e-06,
      "loss": 0.0095,
      "step": 2084920
    },
    {
      "epoch": 3.412050038294613,
      "grad_norm": 0.3656842112541199,
      "learning_rate": 3.1968266309970152e-06,
      "loss": 0.0078,
      "step": 2084940
    },
    {
      "epoch": 3.4120827687332667,
      "grad_norm": 0.08463578671216965,
      "learning_rate": 3.196760738783498e-06,
      "loss": 0.0082,
      "step": 2084960
    },
    {
      "epoch": 3.41211549917192,
      "grad_norm": 0.30857622623443604,
      "learning_rate": 3.196694846569981e-06,
      "loss": 0.0137,
      "step": 2084980
    },
    {
      "epoch": 3.4121482296105734,
      "grad_norm": 0.2608127295970917,
      "learning_rate": 3.196628954356464e-06,
      "loss": 0.0083,
      "step": 2085000
    },
    {
      "epoch": 3.4121809600492266,
      "grad_norm": 0.2343967705965042,
      "learning_rate": 3.1965630621429466e-06,
      "loss": 0.0182,
      "step": 2085020
    },
    {
      "epoch": 3.4122136904878797,
      "grad_norm": 0.17283910512924194,
      "learning_rate": 3.1964971699294294e-06,
      "loss": 0.0101,
      "step": 2085040
    },
    {
      "epoch": 3.4122464209265333,
      "grad_norm": 0.9355601668357849,
      "learning_rate": 3.1964312777159125e-06,
      "loss": 0.0126,
      "step": 2085060
    },
    {
      "epoch": 3.4122791513651864,
      "grad_norm": 0.3769984841346741,
      "learning_rate": 3.1963653855023953e-06,
      "loss": 0.0089,
      "step": 2085080
    },
    {
      "epoch": 3.41231188180384,
      "grad_norm": 0.725950300693512,
      "learning_rate": 3.1962994932888784e-06,
      "loss": 0.0092,
      "step": 2085100
    },
    {
      "epoch": 3.412344612242493,
      "grad_norm": 0.21724121272563934,
      "learning_rate": 3.1962336010753616e-06,
      "loss": 0.0117,
      "step": 2085120
    },
    {
      "epoch": 3.412377342681147,
      "grad_norm": 0.22803626954555511,
      "learning_rate": 3.1961677088618443e-06,
      "loss": 0.0128,
      "step": 2085140
    },
    {
      "epoch": 3.4124100731198,
      "grad_norm": 0.9263498187065125,
      "learning_rate": 3.196101816648327e-06,
      "loss": 0.011,
      "step": 2085160
    },
    {
      "epoch": 3.412442803558453,
      "grad_norm": 0.311740905046463,
      "learning_rate": 3.19603592443481e-06,
      "loss": 0.0189,
      "step": 2085180
    },
    {
      "epoch": 3.4124755339971067,
      "grad_norm": 0.48387807607650757,
      "learning_rate": 3.1959700322212926e-06,
      "loss": 0.0095,
      "step": 2085200
    },
    {
      "epoch": 3.41250826443576,
      "grad_norm": 0.3766947388648987,
      "learning_rate": 3.1959041400077757e-06,
      "loss": 0.0072,
      "step": 2085220
    },
    {
      "epoch": 3.4125409948744134,
      "grad_norm": 0.35228872299194336,
      "learning_rate": 3.1958382477942585e-06,
      "loss": 0.0111,
      "step": 2085240
    },
    {
      "epoch": 3.4125737253130666,
      "grad_norm": 0.2037414312362671,
      "learning_rate": 3.195772355580741e-06,
      "loss": 0.008,
      "step": 2085260
    },
    {
      "epoch": 3.41260645575172,
      "grad_norm": 0.665729284286499,
      "learning_rate": 3.195706463367224e-06,
      "loss": 0.0128,
      "step": 2085280
    },
    {
      "epoch": 3.4126391861903733,
      "grad_norm": 0.16705487668514252,
      "learning_rate": 3.195640571153707e-06,
      "loss": 0.0096,
      "step": 2085300
    },
    {
      "epoch": 3.4126719166290265,
      "grad_norm": 0.4408789575099945,
      "learning_rate": 3.19557467894019e-06,
      "loss": 0.0134,
      "step": 2085320
    },
    {
      "epoch": 3.41270464706768,
      "grad_norm": 0.1479741930961609,
      "learning_rate": 3.1955087867266726e-06,
      "loss": 0.0131,
      "step": 2085340
    },
    {
      "epoch": 3.412737377506333,
      "grad_norm": 0.4851780831813812,
      "learning_rate": 3.1954428945131553e-06,
      "loss": 0.0129,
      "step": 2085360
    },
    {
      "epoch": 3.412770107944987,
      "grad_norm": 0.06739174574613571,
      "learning_rate": 3.1953770022996385e-06,
      "loss": 0.0096,
      "step": 2085380
    },
    {
      "epoch": 3.41280283838364,
      "grad_norm": 0.2634773254394531,
      "learning_rate": 3.1953111100861212e-06,
      "loss": 0.0165,
      "step": 2085400
    },
    {
      "epoch": 3.4128355688222936,
      "grad_norm": 0.22079850733280182,
      "learning_rate": 3.195245217872604e-06,
      "loss": 0.0087,
      "step": 2085420
    },
    {
      "epoch": 3.4128682992609467,
      "grad_norm": 0.579677402973175,
      "learning_rate": 3.1951793256590875e-06,
      "loss": 0.015,
      "step": 2085440
    },
    {
      "epoch": 3.4129010296996,
      "grad_norm": 0.27722784876823425,
      "learning_rate": 3.1951134334455703e-06,
      "loss": 0.0113,
      "step": 2085460
    },
    {
      "epoch": 3.4129337601382534,
      "grad_norm": 0.21477040648460388,
      "learning_rate": 3.195047541232053e-06,
      "loss": 0.0088,
      "step": 2085480
    },
    {
      "epoch": 3.4129664905769066,
      "grad_norm": 0.148965984582901,
      "learning_rate": 3.1949816490185358e-06,
      "loss": 0.0107,
      "step": 2085500
    },
    {
      "epoch": 3.41299922101556,
      "grad_norm": 0.32143455743789673,
      "learning_rate": 3.194915756805019e-06,
      "loss": 0.0124,
      "step": 2085520
    },
    {
      "epoch": 3.4130319514542133,
      "grad_norm": 0.3862978518009186,
      "learning_rate": 3.1948498645915017e-06,
      "loss": 0.0181,
      "step": 2085540
    },
    {
      "epoch": 3.413064681892867,
      "grad_norm": 0.053664740175008774,
      "learning_rate": 3.1947839723779844e-06,
      "loss": 0.0132,
      "step": 2085560
    },
    {
      "epoch": 3.41309741233152,
      "grad_norm": 0.7507526874542236,
      "learning_rate": 3.194718080164467e-06,
      "loss": 0.0085,
      "step": 2085580
    },
    {
      "epoch": 3.4131301427701732,
      "grad_norm": 1.6547319889068604,
      "learning_rate": 3.1946521879509503e-06,
      "loss": 0.0153,
      "step": 2085600
    },
    {
      "epoch": 3.413162873208827,
      "grad_norm": 0.2097996473312378,
      "learning_rate": 3.194586295737433e-06,
      "loss": 0.0106,
      "step": 2085620
    },
    {
      "epoch": 3.41319560364748,
      "grad_norm": 0.2944023013114929,
      "learning_rate": 3.194520403523916e-06,
      "loss": 0.0127,
      "step": 2085640
    },
    {
      "epoch": 3.4132283340861336,
      "grad_norm": 0.3142724335193634,
      "learning_rate": 3.1944545113103985e-06,
      "loss": 0.0082,
      "step": 2085660
    },
    {
      "epoch": 3.4132610645247867,
      "grad_norm": 0.34189102053642273,
      "learning_rate": 3.1943886190968813e-06,
      "loss": 0.0085,
      "step": 2085680
    },
    {
      "epoch": 3.4132937949634403,
      "grad_norm": 0.1491512507200241,
      "learning_rate": 3.1943227268833644e-06,
      "loss": 0.0102,
      "step": 2085700
    },
    {
      "epoch": 3.4133265254020935,
      "grad_norm": 0.2557542324066162,
      "learning_rate": 3.194256834669847e-06,
      "loss": 0.0102,
      "step": 2085720
    },
    {
      "epoch": 3.4133592558407466,
      "grad_norm": 0.09158103168010712,
      "learning_rate": 3.19419094245633e-06,
      "loss": 0.0072,
      "step": 2085740
    },
    {
      "epoch": 3.4133919862794,
      "grad_norm": 0.456988662481308,
      "learning_rate": 3.1941250502428127e-06,
      "loss": 0.0103,
      "step": 2085760
    },
    {
      "epoch": 3.4134247167180534,
      "grad_norm": 0.38447660207748413,
      "learning_rate": 3.194059158029296e-06,
      "loss": 0.0174,
      "step": 2085780
    },
    {
      "epoch": 3.413457447156707,
      "grad_norm": 0.47969844937324524,
      "learning_rate": 3.193993265815779e-06,
      "loss": 0.0101,
      "step": 2085800
    },
    {
      "epoch": 3.41349017759536,
      "grad_norm": 0.21878132224082947,
      "learning_rate": 3.1939273736022617e-06,
      "loss": 0.0083,
      "step": 2085820
    },
    {
      "epoch": 3.4135229080340137,
      "grad_norm": 0.6020137667655945,
      "learning_rate": 3.193861481388745e-06,
      "loss": 0.0117,
      "step": 2085840
    },
    {
      "epoch": 3.413555638472667,
      "grad_norm": 0.24314892292022705,
      "learning_rate": 3.1937955891752276e-06,
      "loss": 0.0121,
      "step": 2085860
    },
    {
      "epoch": 3.41358836891132,
      "grad_norm": 0.25528156757354736,
      "learning_rate": 3.1937296969617104e-06,
      "loss": 0.0137,
      "step": 2085880
    },
    {
      "epoch": 3.4136210993499736,
      "grad_norm": 0.12504364550113678,
      "learning_rate": 3.193663804748193e-06,
      "loss": 0.009,
      "step": 2085900
    },
    {
      "epoch": 3.4136538297886267,
      "grad_norm": 0.20679374039173126,
      "learning_rate": 3.1935979125346763e-06,
      "loss": 0.0078,
      "step": 2085920
    },
    {
      "epoch": 3.4136865602272803,
      "grad_norm": 0.4200393259525299,
      "learning_rate": 3.193532020321159e-06,
      "loss": 0.0093,
      "step": 2085940
    },
    {
      "epoch": 3.4137192906659335,
      "grad_norm": 0.168644517660141,
      "learning_rate": 3.1934661281076417e-06,
      "loss": 0.0127,
      "step": 2085960
    },
    {
      "epoch": 3.413752021104587,
      "grad_norm": 0.14386090636253357,
      "learning_rate": 3.1934002358941245e-06,
      "loss": 0.0104,
      "step": 2085980
    },
    {
      "epoch": 3.4137847515432402,
      "grad_norm": 0.05335623025894165,
      "learning_rate": 3.1933343436806076e-06,
      "loss": 0.0137,
      "step": 2086000
    },
    {
      "epoch": 3.4138174819818934,
      "grad_norm": 0.15950600802898407,
      "learning_rate": 3.1932684514670904e-06,
      "loss": 0.0127,
      "step": 2086020
    },
    {
      "epoch": 3.413850212420547,
      "grad_norm": 0.12592433393001556,
      "learning_rate": 3.193202559253573e-06,
      "loss": 0.0131,
      "step": 2086040
    },
    {
      "epoch": 3.4138829428592,
      "grad_norm": 0.2545606195926666,
      "learning_rate": 3.193136667040056e-06,
      "loss": 0.0097,
      "step": 2086060
    },
    {
      "epoch": 3.4139156732978537,
      "grad_norm": 0.13132710754871368,
      "learning_rate": 3.193070774826539e-06,
      "loss": 0.0142,
      "step": 2086080
    },
    {
      "epoch": 3.413948403736507,
      "grad_norm": 0.5716784000396729,
      "learning_rate": 3.1930048826130218e-06,
      "loss": 0.0153,
      "step": 2086100
    },
    {
      "epoch": 3.41398113417516,
      "grad_norm": 0.18275901675224304,
      "learning_rate": 3.1929389903995045e-06,
      "loss": 0.0103,
      "step": 2086120
    },
    {
      "epoch": 3.4140138646138136,
      "grad_norm": 0.6863877773284912,
      "learning_rate": 3.192873098185988e-06,
      "loss": 0.0184,
      "step": 2086140
    },
    {
      "epoch": 3.4140465950524668,
      "grad_norm": 0.10658225417137146,
      "learning_rate": 3.192807205972471e-06,
      "loss": 0.0128,
      "step": 2086160
    },
    {
      "epoch": 3.4140793254911204,
      "grad_norm": 0.07871782034635544,
      "learning_rate": 3.1927413137589536e-06,
      "loss": 0.0095,
      "step": 2086180
    },
    {
      "epoch": 3.4141120559297735,
      "grad_norm": 0.3125205338001251,
      "learning_rate": 3.1926754215454363e-06,
      "loss": 0.011,
      "step": 2086200
    },
    {
      "epoch": 3.4141447863684267,
      "grad_norm": 0.32193681597709656,
      "learning_rate": 3.192609529331919e-06,
      "loss": 0.0121,
      "step": 2086220
    },
    {
      "epoch": 3.4141775168070803,
      "grad_norm": 0.18435905873775482,
      "learning_rate": 3.1925436371184022e-06,
      "loss": 0.0112,
      "step": 2086240
    },
    {
      "epoch": 3.4142102472457334,
      "grad_norm": 0.1600506603717804,
      "learning_rate": 3.192477744904885e-06,
      "loss": 0.0084,
      "step": 2086260
    },
    {
      "epoch": 3.414242977684387,
      "grad_norm": 0.6343510150909424,
      "learning_rate": 3.1924118526913677e-06,
      "loss": 0.0107,
      "step": 2086280
    },
    {
      "epoch": 3.41427570812304,
      "grad_norm": 0.09423888474702835,
      "learning_rate": 3.1923459604778504e-06,
      "loss": 0.0161,
      "step": 2086300
    },
    {
      "epoch": 3.4143084385616937,
      "grad_norm": 0.3483424782752991,
      "learning_rate": 3.1922800682643336e-06,
      "loss": 0.0112,
      "step": 2086320
    },
    {
      "epoch": 3.414341169000347,
      "grad_norm": 0.5376102328300476,
      "learning_rate": 3.1922141760508163e-06,
      "loss": 0.0135,
      "step": 2086340
    },
    {
      "epoch": 3.414373899439,
      "grad_norm": 0.2453766167163849,
      "learning_rate": 3.192148283837299e-06,
      "loss": 0.0114,
      "step": 2086360
    },
    {
      "epoch": 3.4144066298776536,
      "grad_norm": 0.6351775527000427,
      "learning_rate": 3.192082391623782e-06,
      "loss": 0.0099,
      "step": 2086380
    },
    {
      "epoch": 3.414439360316307,
      "grad_norm": 0.15117326378822327,
      "learning_rate": 3.192016499410265e-06,
      "loss": 0.0095,
      "step": 2086400
    },
    {
      "epoch": 3.4144720907549604,
      "grad_norm": 0.12156417220830917,
      "learning_rate": 3.1919506071967477e-06,
      "loss": 0.0064,
      "step": 2086420
    },
    {
      "epoch": 3.4145048211936135,
      "grad_norm": 0.23352418839931488,
      "learning_rate": 3.1918847149832305e-06,
      "loss": 0.0124,
      "step": 2086440
    },
    {
      "epoch": 3.414537551632267,
      "grad_norm": 0.3012469410896301,
      "learning_rate": 3.191818822769713e-06,
      "loss": 0.0117,
      "step": 2086460
    },
    {
      "epoch": 3.4145702820709203,
      "grad_norm": 0.2927258610725403,
      "learning_rate": 3.1917529305561964e-06,
      "loss": 0.0117,
      "step": 2086480
    },
    {
      "epoch": 3.4146030125095734,
      "grad_norm": 0.21679170429706573,
      "learning_rate": 3.1916870383426795e-06,
      "loss": 0.0116,
      "step": 2086500
    },
    {
      "epoch": 3.414635742948227,
      "grad_norm": 0.5603905916213989,
      "learning_rate": 3.1916211461291623e-06,
      "loss": 0.012,
      "step": 2086520
    },
    {
      "epoch": 3.41466847338688,
      "grad_norm": 0.181290403008461,
      "learning_rate": 3.1915552539156454e-06,
      "loss": 0.0106,
      "step": 2086540
    },
    {
      "epoch": 3.4147012038255338,
      "grad_norm": 0.3864070177078247,
      "learning_rate": 3.191489361702128e-06,
      "loss": 0.01,
      "step": 2086560
    },
    {
      "epoch": 3.414733934264187,
      "grad_norm": 0.29743874073028564,
      "learning_rate": 3.191423469488611e-06,
      "loss": 0.0108,
      "step": 2086580
    },
    {
      "epoch": 3.4147666647028405,
      "grad_norm": 0.3623883128166199,
      "learning_rate": 3.1913575772750937e-06,
      "loss": 0.012,
      "step": 2086600
    },
    {
      "epoch": 3.4147993951414937,
      "grad_norm": 0.4233490228652954,
      "learning_rate": 3.191291685061577e-06,
      "loss": 0.0108,
      "step": 2086620
    },
    {
      "epoch": 3.414832125580147,
      "grad_norm": 0.3017868399620056,
      "learning_rate": 3.1912257928480596e-06,
      "loss": 0.011,
      "step": 2086640
    },
    {
      "epoch": 3.4148648560188004,
      "grad_norm": 0.2409823089838028,
      "learning_rate": 3.1911599006345423e-06,
      "loss": 0.0141,
      "step": 2086660
    },
    {
      "epoch": 3.4148975864574536,
      "grad_norm": 0.24210286140441895,
      "learning_rate": 3.191094008421025e-06,
      "loss": 0.0087,
      "step": 2086680
    },
    {
      "epoch": 3.414930316896107,
      "grad_norm": 0.3302096128463745,
      "learning_rate": 3.1910281162075078e-06,
      "loss": 0.0124,
      "step": 2086700
    },
    {
      "epoch": 3.4149630473347603,
      "grad_norm": 0.3211907148361206,
      "learning_rate": 3.190962223993991e-06,
      "loss": 0.0109,
      "step": 2086720
    },
    {
      "epoch": 3.414995777773414,
      "grad_norm": 0.44266021251678467,
      "learning_rate": 3.1908963317804737e-06,
      "loss": 0.0096,
      "step": 2086740
    },
    {
      "epoch": 3.415028508212067,
      "grad_norm": 0.16022267937660217,
      "learning_rate": 3.1908304395669564e-06,
      "loss": 0.0062,
      "step": 2086760
    },
    {
      "epoch": 3.41506123865072,
      "grad_norm": 0.5074501633644104,
      "learning_rate": 3.190764547353439e-06,
      "loss": 0.0111,
      "step": 2086780
    },
    {
      "epoch": 3.415093969089374,
      "grad_norm": 0.45016682147979736,
      "learning_rate": 3.1906986551399223e-06,
      "loss": 0.0133,
      "step": 2086800
    },
    {
      "epoch": 3.415126699528027,
      "grad_norm": 0.44785216450691223,
      "learning_rate": 3.190632762926405e-06,
      "loss": 0.0132,
      "step": 2086820
    },
    {
      "epoch": 3.4151594299666805,
      "grad_norm": 0.18328382074832916,
      "learning_rate": 3.190566870712888e-06,
      "loss": 0.0112,
      "step": 2086840
    },
    {
      "epoch": 3.4151921604053337,
      "grad_norm": 0.2941277027130127,
      "learning_rate": 3.1905009784993714e-06,
      "loss": 0.0142,
      "step": 2086860
    },
    {
      "epoch": 3.4152248908439873,
      "grad_norm": 0.029697129502892494,
      "learning_rate": 3.190435086285854e-06,
      "loss": 0.0166,
      "step": 2086880
    },
    {
      "epoch": 3.4152576212826404,
      "grad_norm": 0.23028230667114258,
      "learning_rate": 3.190369194072337e-06,
      "loss": 0.0129,
      "step": 2086900
    },
    {
      "epoch": 3.4152903517212936,
      "grad_norm": 0.1457260549068451,
      "learning_rate": 3.1903033018588196e-06,
      "loss": 0.0131,
      "step": 2086920
    },
    {
      "epoch": 3.415323082159947,
      "grad_norm": 0.27296528220176697,
      "learning_rate": 3.1902374096453028e-06,
      "loss": 0.0113,
      "step": 2086940
    },
    {
      "epoch": 3.4153558125986003,
      "grad_norm": 2.7145910263061523,
      "learning_rate": 3.1901715174317855e-06,
      "loss": 0.0116,
      "step": 2086960
    },
    {
      "epoch": 3.415388543037254,
      "grad_norm": 0.247027188539505,
      "learning_rate": 3.1901056252182682e-06,
      "loss": 0.013,
      "step": 2086980
    },
    {
      "epoch": 3.415421273475907,
      "grad_norm": 0.2291780561208725,
      "learning_rate": 3.190039733004751e-06,
      "loss": 0.0151,
      "step": 2087000
    },
    {
      "epoch": 3.4154540039145607,
      "grad_norm": 0.11875909566879272,
      "learning_rate": 3.189973840791234e-06,
      "loss": 0.0111,
      "step": 2087020
    },
    {
      "epoch": 3.415486734353214,
      "grad_norm": 0.08502981811761856,
      "learning_rate": 3.189907948577717e-06,
      "loss": 0.0119,
      "step": 2087040
    },
    {
      "epoch": 3.415519464791867,
      "grad_norm": 0.3361447751522064,
      "learning_rate": 3.1898420563641996e-06,
      "loss": 0.0189,
      "step": 2087060
    },
    {
      "epoch": 3.4155521952305206,
      "grad_norm": 0.2604120373725891,
      "learning_rate": 3.1897761641506824e-06,
      "loss": 0.0081,
      "step": 2087080
    },
    {
      "epoch": 3.4155849256691737,
      "grad_norm": 0.2482604831457138,
      "learning_rate": 3.189710271937165e-06,
      "loss": 0.0106,
      "step": 2087100
    },
    {
      "epoch": 3.4156176561078273,
      "grad_norm": 0.47518977522850037,
      "learning_rate": 3.1896443797236483e-06,
      "loss": 0.0166,
      "step": 2087120
    },
    {
      "epoch": 3.4156503865464805,
      "grad_norm": 0.11397805064916611,
      "learning_rate": 3.189578487510131e-06,
      "loss": 0.0128,
      "step": 2087140
    },
    {
      "epoch": 3.415683116985134,
      "grad_norm": 0.44408583641052246,
      "learning_rate": 3.1895125952966138e-06,
      "loss": 0.0124,
      "step": 2087160
    },
    {
      "epoch": 3.415715847423787,
      "grad_norm": 0.3499867618083954,
      "learning_rate": 3.1894467030830965e-06,
      "loss": 0.0113,
      "step": 2087180
    },
    {
      "epoch": 3.4157485778624403,
      "grad_norm": 0.21402283012866974,
      "learning_rate": 3.18938081086958e-06,
      "loss": 0.0147,
      "step": 2087200
    },
    {
      "epoch": 3.415781308301094,
      "grad_norm": 0.12336096912622452,
      "learning_rate": 3.189314918656063e-06,
      "loss": 0.0163,
      "step": 2087220
    },
    {
      "epoch": 3.415814038739747,
      "grad_norm": 0.32161253690719604,
      "learning_rate": 3.1892490264425456e-06,
      "loss": 0.0098,
      "step": 2087240
    },
    {
      "epoch": 3.4158467691784007,
      "grad_norm": 0.40049660205841064,
      "learning_rate": 3.1891831342290287e-06,
      "loss": 0.0105,
      "step": 2087260
    },
    {
      "epoch": 3.415879499617054,
      "grad_norm": 0.4753829836845398,
      "learning_rate": 3.1891172420155115e-06,
      "loss": 0.0107,
      "step": 2087280
    },
    {
      "epoch": 3.4159122300557074,
      "grad_norm": 0.10085341334342957,
      "learning_rate": 3.189051349801994e-06,
      "loss": 0.0076,
      "step": 2087300
    },
    {
      "epoch": 3.4159449604943606,
      "grad_norm": 0.15602657198905945,
      "learning_rate": 3.188985457588477e-06,
      "loss": 0.0105,
      "step": 2087320
    },
    {
      "epoch": 3.4159776909330137,
      "grad_norm": 0.30488333106040955,
      "learning_rate": 3.18891956537496e-06,
      "loss": 0.0103,
      "step": 2087340
    },
    {
      "epoch": 3.4160104213716673,
      "grad_norm": 0.23641175031661987,
      "learning_rate": 3.188853673161443e-06,
      "loss": 0.0107,
      "step": 2087360
    },
    {
      "epoch": 3.4160431518103205,
      "grad_norm": 0.28124380111694336,
      "learning_rate": 3.1887877809479256e-06,
      "loss": 0.015,
      "step": 2087380
    },
    {
      "epoch": 3.416075882248974,
      "grad_norm": 0.3507061004638672,
      "learning_rate": 3.1887218887344083e-06,
      "loss": 0.0144,
      "step": 2087400
    },
    {
      "epoch": 3.416108612687627,
      "grad_norm": 0.1462704986333847,
      "learning_rate": 3.1886559965208915e-06,
      "loss": 0.0117,
      "step": 2087420
    },
    {
      "epoch": 3.416141343126281,
      "grad_norm": 0.05804584175348282,
      "learning_rate": 3.1885901043073742e-06,
      "loss": 0.0109,
      "step": 2087440
    },
    {
      "epoch": 3.416174073564934,
      "grad_norm": 0.16348524391651154,
      "learning_rate": 3.188524212093857e-06,
      "loss": 0.0133,
      "step": 2087460
    },
    {
      "epoch": 3.416206804003587,
      "grad_norm": 0.14779677987098694,
      "learning_rate": 3.1884583198803397e-06,
      "loss": 0.0095,
      "step": 2087480
    },
    {
      "epoch": 3.4162395344422407,
      "grad_norm": 0.2572230100631714,
      "learning_rate": 3.188392427666823e-06,
      "loss": 0.0097,
      "step": 2087500
    },
    {
      "epoch": 3.416272264880894,
      "grad_norm": 0.41219210624694824,
      "learning_rate": 3.1883265354533056e-06,
      "loss": 0.0193,
      "step": 2087520
    },
    {
      "epoch": 3.4163049953195475,
      "grad_norm": 0.1324552297592163,
      "learning_rate": 3.1882606432397883e-06,
      "loss": 0.0103,
      "step": 2087540
    },
    {
      "epoch": 3.4163377257582006,
      "grad_norm": 0.9243690967559814,
      "learning_rate": 3.188194751026272e-06,
      "loss": 0.0099,
      "step": 2087560
    },
    {
      "epoch": 3.416370456196854,
      "grad_norm": 0.18733161687850952,
      "learning_rate": 3.1881288588127547e-06,
      "loss": 0.0094,
      "step": 2087580
    },
    {
      "epoch": 3.4164031866355073,
      "grad_norm": 0.3884841203689575,
      "learning_rate": 3.1880629665992374e-06,
      "loss": 0.0081,
      "step": 2087600
    },
    {
      "epoch": 3.4164359170741605,
      "grad_norm": 0.11452018469572067,
      "learning_rate": 3.18799707438572e-06,
      "loss": 0.0117,
      "step": 2087620
    },
    {
      "epoch": 3.416468647512814,
      "grad_norm": 0.19086705148220062,
      "learning_rate": 3.187931182172203e-06,
      "loss": 0.011,
      "step": 2087640
    },
    {
      "epoch": 3.4165013779514672,
      "grad_norm": 0.20463794469833374,
      "learning_rate": 3.187865289958686e-06,
      "loss": 0.0078,
      "step": 2087660
    },
    {
      "epoch": 3.416534108390121,
      "grad_norm": 0.6172857284545898,
      "learning_rate": 3.187799397745169e-06,
      "loss": 0.0094,
      "step": 2087680
    },
    {
      "epoch": 3.416566838828774,
      "grad_norm": 0.3719874322414398,
      "learning_rate": 3.1877335055316515e-06,
      "loss": 0.0081,
      "step": 2087700
    },
    {
      "epoch": 3.416599569267427,
      "grad_norm": 0.23281458020210266,
      "learning_rate": 3.1876676133181343e-06,
      "loss": 0.0103,
      "step": 2087720
    },
    {
      "epoch": 3.4166322997060807,
      "grad_norm": 0.12202231585979462,
      "learning_rate": 3.1876017211046174e-06,
      "loss": 0.011,
      "step": 2087740
    },
    {
      "epoch": 3.416665030144734,
      "grad_norm": 0.1951478272676468,
      "learning_rate": 3.1875358288911e-06,
      "loss": 0.0101,
      "step": 2087760
    },
    {
      "epoch": 3.4166977605833875,
      "grad_norm": 0.4262208342552185,
      "learning_rate": 3.187469936677583e-06,
      "loss": 0.0123,
      "step": 2087780
    },
    {
      "epoch": 3.4167304910220406,
      "grad_norm": 0.3183278441429138,
      "learning_rate": 3.1874040444640657e-06,
      "loss": 0.0131,
      "step": 2087800
    },
    {
      "epoch": 3.4167632214606938,
      "grad_norm": 0.44678157567977905,
      "learning_rate": 3.187338152250549e-06,
      "loss": 0.0132,
      "step": 2087820
    },
    {
      "epoch": 3.4167959518993474,
      "grad_norm": 0.3282214105129242,
      "learning_rate": 3.1872722600370316e-06,
      "loss": 0.0176,
      "step": 2087840
    },
    {
      "epoch": 3.4168286823380005,
      "grad_norm": 0.45708805322647095,
      "learning_rate": 3.1872063678235143e-06,
      "loss": 0.0151,
      "step": 2087860
    },
    {
      "epoch": 3.416861412776654,
      "grad_norm": 0.9124897122383118,
      "learning_rate": 3.187140475609997e-06,
      "loss": 0.0149,
      "step": 2087880
    },
    {
      "epoch": 3.4168941432153073,
      "grad_norm": 0.0983649268746376,
      "learning_rate": 3.1870745833964806e-06,
      "loss": 0.0094,
      "step": 2087900
    },
    {
      "epoch": 3.416926873653961,
      "grad_norm": 0.1893942654132843,
      "learning_rate": 3.1870086911829634e-06,
      "loss": 0.0114,
      "step": 2087920
    },
    {
      "epoch": 3.416959604092614,
      "grad_norm": 0.20604576170444489,
      "learning_rate": 3.186942798969446e-06,
      "loss": 0.0203,
      "step": 2087940
    },
    {
      "epoch": 3.416992334531267,
      "grad_norm": 0.1745351403951645,
      "learning_rate": 3.1868769067559293e-06,
      "loss": 0.0089,
      "step": 2087960
    },
    {
      "epoch": 3.4170250649699208,
      "grad_norm": 0.8200981020927429,
      "learning_rate": 3.186811014542412e-06,
      "loss": 0.0129,
      "step": 2087980
    },
    {
      "epoch": 3.417057795408574,
      "grad_norm": 0.8694753050804138,
      "learning_rate": 3.1867451223288948e-06,
      "loss": 0.009,
      "step": 2088000
    },
    {
      "epoch": 3.4170905258472275,
      "grad_norm": 0.13870924711227417,
      "learning_rate": 3.1866792301153775e-06,
      "loss": 0.0118,
      "step": 2088020
    },
    {
      "epoch": 3.4171232562858807,
      "grad_norm": 0.5288985371589661,
      "learning_rate": 3.1866133379018607e-06,
      "loss": 0.0143,
      "step": 2088040
    },
    {
      "epoch": 3.4171559867245342,
      "grad_norm": 0.8335433006286621,
      "learning_rate": 3.1865474456883434e-06,
      "loss": 0.0151,
      "step": 2088060
    },
    {
      "epoch": 3.4171887171631874,
      "grad_norm": 0.28069519996643066,
      "learning_rate": 3.186481553474826e-06,
      "loss": 0.0126,
      "step": 2088080
    },
    {
      "epoch": 3.4172214476018405,
      "grad_norm": 0.6830663681030273,
      "learning_rate": 3.186415661261309e-06,
      "loss": 0.0175,
      "step": 2088100
    },
    {
      "epoch": 3.417254178040494,
      "grad_norm": 0.49995991587638855,
      "learning_rate": 3.1863497690477916e-06,
      "loss": 0.0114,
      "step": 2088120
    },
    {
      "epoch": 3.4172869084791473,
      "grad_norm": 0.2261287122964859,
      "learning_rate": 3.1862838768342748e-06,
      "loss": 0.0084,
      "step": 2088140
    },
    {
      "epoch": 3.417319638917801,
      "grad_norm": 0.41830000281333923,
      "learning_rate": 3.1862179846207575e-06,
      "loss": 0.0102,
      "step": 2088160
    },
    {
      "epoch": 3.417352369356454,
      "grad_norm": 0.2517913579940796,
      "learning_rate": 3.1861520924072403e-06,
      "loss": 0.0089,
      "step": 2088180
    },
    {
      "epoch": 3.4173850997951076,
      "grad_norm": 0.12478157132863998,
      "learning_rate": 3.186086200193723e-06,
      "loss": 0.0126,
      "step": 2088200
    },
    {
      "epoch": 3.417417830233761,
      "grad_norm": 0.17380225658416748,
      "learning_rate": 3.186020307980206e-06,
      "loss": 0.0074,
      "step": 2088220
    },
    {
      "epoch": 3.417450560672414,
      "grad_norm": 0.2703011929988861,
      "learning_rate": 3.185954415766689e-06,
      "loss": 0.0128,
      "step": 2088240
    },
    {
      "epoch": 3.4174832911110675,
      "grad_norm": 0.361137330532074,
      "learning_rate": 3.185888523553172e-06,
      "loss": 0.0128,
      "step": 2088260
    },
    {
      "epoch": 3.4175160215497207,
      "grad_norm": 0.10946085304021835,
      "learning_rate": 3.1858226313396552e-06,
      "loss": 0.01,
      "step": 2088280
    },
    {
      "epoch": 3.4175487519883743,
      "grad_norm": 0.17206010222434998,
      "learning_rate": 3.185756739126138e-06,
      "loss": 0.0118,
      "step": 2088300
    },
    {
      "epoch": 3.4175814824270274,
      "grad_norm": 0.13432329893112183,
      "learning_rate": 3.1856908469126207e-06,
      "loss": 0.0143,
      "step": 2088320
    },
    {
      "epoch": 3.417614212865681,
      "grad_norm": 0.1985347419977188,
      "learning_rate": 3.1856249546991034e-06,
      "loss": 0.0125,
      "step": 2088340
    },
    {
      "epoch": 3.417646943304334,
      "grad_norm": 0.25467929244041443,
      "learning_rate": 3.1855590624855866e-06,
      "loss": 0.018,
      "step": 2088360
    },
    {
      "epoch": 3.4176796737429873,
      "grad_norm": 0.32155507802963257,
      "learning_rate": 3.1854931702720693e-06,
      "loss": 0.0092,
      "step": 2088380
    },
    {
      "epoch": 3.417712404181641,
      "grad_norm": 0.2486366331577301,
      "learning_rate": 3.185427278058552e-06,
      "loss": 0.0118,
      "step": 2088400
    },
    {
      "epoch": 3.417745134620294,
      "grad_norm": 0.18333612382411957,
      "learning_rate": 3.185361385845035e-06,
      "loss": 0.0085,
      "step": 2088420
    },
    {
      "epoch": 3.4177778650589477,
      "grad_norm": 0.23985520005226135,
      "learning_rate": 3.185295493631518e-06,
      "loss": 0.0087,
      "step": 2088440
    },
    {
      "epoch": 3.417810595497601,
      "grad_norm": 0.699880063533783,
      "learning_rate": 3.1852296014180007e-06,
      "loss": 0.0172,
      "step": 2088460
    },
    {
      "epoch": 3.4178433259362544,
      "grad_norm": 0.28251826763153076,
      "learning_rate": 3.1851637092044835e-06,
      "loss": 0.0094,
      "step": 2088480
    },
    {
      "epoch": 3.4178760563749075,
      "grad_norm": 0.20613761246204376,
      "learning_rate": 3.185097816990966e-06,
      "loss": 0.0111,
      "step": 2088500
    },
    {
      "epoch": 3.4179087868135607,
      "grad_norm": 0.2248232513666153,
      "learning_rate": 3.185031924777449e-06,
      "loss": 0.0078,
      "step": 2088520
    },
    {
      "epoch": 3.4179415172522143,
      "grad_norm": 0.4581976532936096,
      "learning_rate": 3.184966032563932e-06,
      "loss": 0.0087,
      "step": 2088540
    },
    {
      "epoch": 3.4179742476908674,
      "grad_norm": 0.4860403835773468,
      "learning_rate": 3.184900140350415e-06,
      "loss": 0.011,
      "step": 2088560
    },
    {
      "epoch": 3.418006978129521,
      "grad_norm": 0.5557345151901245,
      "learning_rate": 3.1848342481368976e-06,
      "loss": 0.0138,
      "step": 2088580
    },
    {
      "epoch": 3.418039708568174,
      "grad_norm": 0.3120616376399994,
      "learning_rate": 3.184768355923381e-06,
      "loss": 0.0091,
      "step": 2088600
    },
    {
      "epoch": 3.418072439006828,
      "grad_norm": 0.1469162553548813,
      "learning_rate": 3.184702463709864e-06,
      "loss": 0.0123,
      "step": 2088620
    },
    {
      "epoch": 3.418105169445481,
      "grad_norm": 0.4777653217315674,
      "learning_rate": 3.1846365714963467e-06,
      "loss": 0.0142,
      "step": 2088640
    },
    {
      "epoch": 3.418137899884134,
      "grad_norm": 0.2026541382074356,
      "learning_rate": 3.1845706792828294e-06,
      "loss": 0.0104,
      "step": 2088660
    },
    {
      "epoch": 3.4181706303227877,
      "grad_norm": 0.14085645973682404,
      "learning_rate": 3.1845047870693126e-06,
      "loss": 0.0159,
      "step": 2088680
    },
    {
      "epoch": 3.418203360761441,
      "grad_norm": 0.33097878098487854,
      "learning_rate": 3.1844388948557953e-06,
      "loss": 0.0127,
      "step": 2088700
    },
    {
      "epoch": 3.4182360912000944,
      "grad_norm": 0.27052465081214905,
      "learning_rate": 3.184373002642278e-06,
      "loss": 0.0131,
      "step": 2088720
    },
    {
      "epoch": 3.4182688216387476,
      "grad_norm": 0.1734571009874344,
      "learning_rate": 3.1843071104287608e-06,
      "loss": 0.0065,
      "step": 2088740
    },
    {
      "epoch": 3.418301552077401,
      "grad_norm": 0.14133423566818237,
      "learning_rate": 3.184241218215244e-06,
      "loss": 0.0074,
      "step": 2088760
    },
    {
      "epoch": 3.4183342825160543,
      "grad_norm": 0.18978306651115417,
      "learning_rate": 3.1841753260017267e-06,
      "loss": 0.0107,
      "step": 2088780
    },
    {
      "epoch": 3.4183670129547075,
      "grad_norm": 0.13175636529922485,
      "learning_rate": 3.1841094337882094e-06,
      "loss": 0.0112,
      "step": 2088800
    },
    {
      "epoch": 3.418399743393361,
      "grad_norm": 0.32537680864334106,
      "learning_rate": 3.184043541574692e-06,
      "loss": 0.0146,
      "step": 2088820
    },
    {
      "epoch": 3.418432473832014,
      "grad_norm": 0.5277245044708252,
      "learning_rate": 3.1839776493611753e-06,
      "loss": 0.0173,
      "step": 2088840
    },
    {
      "epoch": 3.418465204270668,
      "grad_norm": 0.2134469598531723,
      "learning_rate": 3.183911757147658e-06,
      "loss": 0.0108,
      "step": 2088860
    },
    {
      "epoch": 3.418497934709321,
      "grad_norm": 0.058914635330438614,
      "learning_rate": 3.183845864934141e-06,
      "loss": 0.0114,
      "step": 2088880
    },
    {
      "epoch": 3.4185306651479745,
      "grad_norm": 0.9170407056808472,
      "learning_rate": 3.1837799727206235e-06,
      "loss": 0.0105,
      "step": 2088900
    },
    {
      "epoch": 3.4185633955866277,
      "grad_norm": 0.3499204218387604,
      "learning_rate": 3.1837140805071067e-06,
      "loss": 0.0129,
      "step": 2088920
    },
    {
      "epoch": 3.418596126025281,
      "grad_norm": 0.31671857833862305,
      "learning_rate": 3.1836481882935894e-06,
      "loss": 0.0135,
      "step": 2088940
    },
    {
      "epoch": 3.4186288564639344,
      "grad_norm": 0.4238375723361969,
      "learning_rate": 3.1835822960800726e-06,
      "loss": 0.0113,
      "step": 2088960
    },
    {
      "epoch": 3.4186615869025876,
      "grad_norm": 0.4871883690357208,
      "learning_rate": 3.1835164038665558e-06,
      "loss": 0.0173,
      "step": 2088980
    },
    {
      "epoch": 3.418694317341241,
      "grad_norm": 0.15483389794826508,
      "learning_rate": 3.1834505116530385e-06,
      "loss": 0.0085,
      "step": 2089000
    },
    {
      "epoch": 3.4187270477798943,
      "grad_norm": 0.3078452944755554,
      "learning_rate": 3.1833846194395213e-06,
      "loss": 0.0118,
      "step": 2089020
    },
    {
      "epoch": 3.418759778218548,
      "grad_norm": 0.2525073289871216,
      "learning_rate": 3.183318727226004e-06,
      "loss": 0.0123,
      "step": 2089040
    },
    {
      "epoch": 3.418792508657201,
      "grad_norm": 0.20282280445098877,
      "learning_rate": 3.1832528350124867e-06,
      "loss": 0.0086,
      "step": 2089060
    },
    {
      "epoch": 3.4188252390958542,
      "grad_norm": 0.2015395164489746,
      "learning_rate": 3.18318694279897e-06,
      "loss": 0.0164,
      "step": 2089080
    },
    {
      "epoch": 3.418857969534508,
      "grad_norm": 0.10997606068849564,
      "learning_rate": 3.1831210505854526e-06,
      "loss": 0.0103,
      "step": 2089100
    },
    {
      "epoch": 3.418890699973161,
      "grad_norm": 0.0876908227801323,
      "learning_rate": 3.1830551583719354e-06,
      "loss": 0.0138,
      "step": 2089120
    },
    {
      "epoch": 3.4189234304118146,
      "grad_norm": 0.3999694585800171,
      "learning_rate": 3.182989266158418e-06,
      "loss": 0.0136,
      "step": 2089140
    },
    {
      "epoch": 3.4189561608504677,
      "grad_norm": 0.17015573382377625,
      "learning_rate": 3.1829233739449013e-06,
      "loss": 0.0123,
      "step": 2089160
    },
    {
      "epoch": 3.418988891289121,
      "grad_norm": 0.18124693632125854,
      "learning_rate": 3.182857481731384e-06,
      "loss": 0.009,
      "step": 2089180
    },
    {
      "epoch": 3.4190216217277745,
      "grad_norm": 0.6841201186180115,
      "learning_rate": 3.1827915895178668e-06,
      "loss": 0.011,
      "step": 2089200
    },
    {
      "epoch": 3.4190543521664276,
      "grad_norm": 0.23871001601219177,
      "learning_rate": 3.1827256973043495e-06,
      "loss": 0.0147,
      "step": 2089220
    },
    {
      "epoch": 3.419087082605081,
      "grad_norm": 0.20380741357803345,
      "learning_rate": 3.1826598050908327e-06,
      "loss": 0.0106,
      "step": 2089240
    },
    {
      "epoch": 3.4191198130437344,
      "grad_norm": 0.1473160684108734,
      "learning_rate": 3.1825939128773154e-06,
      "loss": 0.0115,
      "step": 2089260
    },
    {
      "epoch": 3.4191525434823875,
      "grad_norm": 0.829054594039917,
      "learning_rate": 3.182528020663798e-06,
      "loss": 0.0097,
      "step": 2089280
    },
    {
      "epoch": 3.419185273921041,
      "grad_norm": 0.18192487955093384,
      "learning_rate": 3.182462128450281e-06,
      "loss": 0.0133,
      "step": 2089300
    },
    {
      "epoch": 3.4192180043596943,
      "grad_norm": 0.2988102436065674,
      "learning_rate": 3.1823962362367645e-06,
      "loss": 0.0111,
      "step": 2089320
    },
    {
      "epoch": 3.419250734798348,
      "grad_norm": 0.3734343647956848,
      "learning_rate": 3.182330344023247e-06,
      "loss": 0.0114,
      "step": 2089340
    },
    {
      "epoch": 3.419283465237001,
      "grad_norm": 0.2922598123550415,
      "learning_rate": 3.18226445180973e-06,
      "loss": 0.0076,
      "step": 2089360
    },
    {
      "epoch": 3.4193161956756546,
      "grad_norm": 0.2543661594390869,
      "learning_rate": 3.182198559596213e-06,
      "loss": 0.0127,
      "step": 2089380
    },
    {
      "epoch": 3.4193489261143077,
      "grad_norm": 0.8343373537063599,
      "learning_rate": 3.182132667382696e-06,
      "loss": 0.0181,
      "step": 2089400
    },
    {
      "epoch": 3.419381656552961,
      "grad_norm": 0.3286304771900177,
      "learning_rate": 3.1820667751691786e-06,
      "loss": 0.01,
      "step": 2089420
    },
    {
      "epoch": 3.4194143869916145,
      "grad_norm": 0.1005716621875763,
      "learning_rate": 3.1820008829556613e-06,
      "loss": 0.0128,
      "step": 2089440
    },
    {
      "epoch": 3.4194471174302676,
      "grad_norm": 0.5063977241516113,
      "learning_rate": 3.1819349907421445e-06,
      "loss": 0.01,
      "step": 2089460
    },
    {
      "epoch": 3.4194798478689212,
      "grad_norm": 0.2058115303516388,
      "learning_rate": 3.1818690985286272e-06,
      "loss": 0.0106,
      "step": 2089480
    },
    {
      "epoch": 3.4195125783075744,
      "grad_norm": 0.24427875876426697,
      "learning_rate": 3.18180320631511e-06,
      "loss": 0.0085,
      "step": 2089500
    },
    {
      "epoch": 3.419545308746228,
      "grad_norm": 0.4648131728172302,
      "learning_rate": 3.1817373141015927e-06,
      "loss": 0.0094,
      "step": 2089520
    },
    {
      "epoch": 3.419578039184881,
      "grad_norm": 0.21206451952457428,
      "learning_rate": 3.1816714218880755e-06,
      "loss": 0.0084,
      "step": 2089540
    },
    {
      "epoch": 3.4196107696235343,
      "grad_norm": 0.26904726028442383,
      "learning_rate": 3.1816055296745586e-06,
      "loss": 0.0119,
      "step": 2089560
    },
    {
      "epoch": 3.419643500062188,
      "grad_norm": 0.1276332288980484,
      "learning_rate": 3.1815396374610414e-06,
      "loss": 0.0119,
      "step": 2089580
    },
    {
      "epoch": 3.419676230500841,
      "grad_norm": 0.19130517542362213,
      "learning_rate": 3.181473745247524e-06,
      "loss": 0.0119,
      "step": 2089600
    },
    {
      "epoch": 3.4197089609394946,
      "grad_norm": 0.26276928186416626,
      "learning_rate": 3.181407853034007e-06,
      "loss": 0.015,
      "step": 2089620
    },
    {
      "epoch": 3.4197416913781478,
      "grad_norm": 0.5022664666175842,
      "learning_rate": 3.18134196082049e-06,
      "loss": 0.0125,
      "step": 2089640
    },
    {
      "epoch": 3.4197744218168014,
      "grad_norm": 0.07627687603235245,
      "learning_rate": 3.181276068606973e-06,
      "loss": 0.0135,
      "step": 2089660
    },
    {
      "epoch": 3.4198071522554545,
      "grad_norm": 0.16600917279720306,
      "learning_rate": 3.181210176393456e-06,
      "loss": 0.0175,
      "step": 2089680
    },
    {
      "epoch": 3.4198398826941077,
      "grad_norm": 0.24659040570259094,
      "learning_rate": 3.181144284179939e-06,
      "loss": 0.0102,
      "step": 2089700
    },
    {
      "epoch": 3.4198726131327613,
      "grad_norm": 0.260932594537735,
      "learning_rate": 3.181078391966422e-06,
      "loss": 0.0119,
      "step": 2089720
    },
    {
      "epoch": 3.4199053435714144,
      "grad_norm": 0.20216888189315796,
      "learning_rate": 3.1810124997529045e-06,
      "loss": 0.0068,
      "step": 2089740
    },
    {
      "epoch": 3.419938074010068,
      "grad_norm": 0.2824619710445404,
      "learning_rate": 3.1809466075393873e-06,
      "loss": 0.0123,
      "step": 2089760
    },
    {
      "epoch": 3.419970804448721,
      "grad_norm": 0.12789958715438843,
      "learning_rate": 3.1808807153258704e-06,
      "loss": 0.0145,
      "step": 2089780
    },
    {
      "epoch": 3.4200035348873747,
      "grad_norm": 0.03274011239409447,
      "learning_rate": 3.180814823112353e-06,
      "loss": 0.0089,
      "step": 2089800
    },
    {
      "epoch": 3.420036265326028,
      "grad_norm": 0.8666090369224548,
      "learning_rate": 3.180748930898836e-06,
      "loss": 0.0085,
      "step": 2089820
    },
    {
      "epoch": 3.420068995764681,
      "grad_norm": 0.738249659538269,
      "learning_rate": 3.1806830386853187e-06,
      "loss": 0.011,
      "step": 2089840
    },
    {
      "epoch": 3.4201017262033346,
      "grad_norm": 0.27790382504463196,
      "learning_rate": 3.180617146471802e-06,
      "loss": 0.0083,
      "step": 2089860
    },
    {
      "epoch": 3.420134456641988,
      "grad_norm": 0.19518627226352692,
      "learning_rate": 3.1805512542582846e-06,
      "loss": 0.0161,
      "step": 2089880
    },
    {
      "epoch": 3.4201671870806414,
      "grad_norm": 0.21189218759536743,
      "learning_rate": 3.1804853620447673e-06,
      "loss": 0.0117,
      "step": 2089900
    },
    {
      "epoch": 3.4201999175192945,
      "grad_norm": 0.2683098316192627,
      "learning_rate": 3.18041946983125e-06,
      "loss": 0.0164,
      "step": 2089920
    },
    {
      "epoch": 3.420232647957948,
      "grad_norm": 0.40849727392196655,
      "learning_rate": 3.1803535776177332e-06,
      "loss": 0.0083,
      "step": 2089940
    },
    {
      "epoch": 3.4202653783966013,
      "grad_norm": 0.40857264399528503,
      "learning_rate": 3.180287685404216e-06,
      "loss": 0.0131,
      "step": 2089960
    },
    {
      "epoch": 3.4202981088352544,
      "grad_norm": 0.11994584649801254,
      "learning_rate": 3.1802217931906987e-06,
      "loss": 0.008,
      "step": 2089980
    },
    {
      "epoch": 3.420330839273908,
      "grad_norm": 0.08362565189599991,
      "learning_rate": 3.1801559009771814e-06,
      "loss": 0.0122,
      "step": 2090000
    },
    {
      "epoch": 3.420363569712561,
      "grad_norm": 0.15681931376457214,
      "learning_rate": 3.180090008763665e-06,
      "loss": 0.0179,
      "step": 2090020
    },
    {
      "epoch": 3.4203963001512148,
      "grad_norm": 0.8197628259658813,
      "learning_rate": 3.1800241165501478e-06,
      "loss": 0.0164,
      "step": 2090040
    },
    {
      "epoch": 3.420429030589868,
      "grad_norm": 0.40260857343673706,
      "learning_rate": 3.1799582243366305e-06,
      "loss": 0.012,
      "step": 2090060
    },
    {
      "epoch": 3.4204617610285215,
      "grad_norm": 0.21580828726291656,
      "learning_rate": 3.1798923321231132e-06,
      "loss": 0.0128,
      "step": 2090080
    },
    {
      "epoch": 3.4204944914671747,
      "grad_norm": 0.27099278569221497,
      "learning_rate": 3.1798264399095964e-06,
      "loss": 0.0129,
      "step": 2090100
    },
    {
      "epoch": 3.420527221905828,
      "grad_norm": 0.26072922348976135,
      "learning_rate": 3.179760547696079e-06,
      "loss": 0.0153,
      "step": 2090120
    },
    {
      "epoch": 3.4205599523444814,
      "grad_norm": 0.48180103302001953,
      "learning_rate": 3.179694655482562e-06,
      "loss": 0.0112,
      "step": 2090140
    },
    {
      "epoch": 3.4205926827831346,
      "grad_norm": 0.3208830952644348,
      "learning_rate": 3.1796287632690446e-06,
      "loss": 0.0098,
      "step": 2090160
    },
    {
      "epoch": 3.420625413221788,
      "grad_norm": 0.2964566648006439,
      "learning_rate": 3.1795628710555278e-06,
      "loss": 0.0121,
      "step": 2090180
    },
    {
      "epoch": 3.4206581436604413,
      "grad_norm": 0.31166887283325195,
      "learning_rate": 3.1794969788420105e-06,
      "loss": 0.0115,
      "step": 2090200
    },
    {
      "epoch": 3.420690874099095,
      "grad_norm": 0.355097234249115,
      "learning_rate": 3.1794310866284933e-06,
      "loss": 0.0071,
      "step": 2090220
    },
    {
      "epoch": 3.420723604537748,
      "grad_norm": 0.28463664650917053,
      "learning_rate": 3.179365194414976e-06,
      "loss": 0.0124,
      "step": 2090240
    },
    {
      "epoch": 3.420756334976401,
      "grad_norm": 0.5559177398681641,
      "learning_rate": 3.179299302201459e-06,
      "loss": 0.01,
      "step": 2090260
    },
    {
      "epoch": 3.420789065415055,
      "grad_norm": 0.15955153107643127,
      "learning_rate": 3.179233409987942e-06,
      "loss": 0.0177,
      "step": 2090280
    },
    {
      "epoch": 3.420821795853708,
      "grad_norm": 0.12692339718341827,
      "learning_rate": 3.1791675177744246e-06,
      "loss": 0.0106,
      "step": 2090300
    },
    {
      "epoch": 3.4208545262923615,
      "grad_norm": 0.16557931900024414,
      "learning_rate": 3.1791016255609074e-06,
      "loss": 0.01,
      "step": 2090320
    },
    {
      "epoch": 3.4208872567310147,
      "grad_norm": 0.2896271049976349,
      "learning_rate": 3.1790357333473905e-06,
      "loss": 0.0114,
      "step": 2090340
    },
    {
      "epoch": 3.4209199871696683,
      "grad_norm": 0.24028871953487396,
      "learning_rate": 3.1789698411338737e-06,
      "loss": 0.0131,
      "step": 2090360
    },
    {
      "epoch": 3.4209527176083214,
      "grad_norm": 0.11852258443832397,
      "learning_rate": 3.1789039489203565e-06,
      "loss": 0.0144,
      "step": 2090380
    },
    {
      "epoch": 3.4209854480469746,
      "grad_norm": 0.1998567432165146,
      "learning_rate": 3.1788380567068396e-06,
      "loss": 0.0094,
      "step": 2090400
    },
    {
      "epoch": 3.421018178485628,
      "grad_norm": 0.07197288423776627,
      "learning_rate": 3.1787721644933224e-06,
      "loss": 0.009,
      "step": 2090420
    },
    {
      "epoch": 3.4210509089242813,
      "grad_norm": 0.5665311813354492,
      "learning_rate": 3.178706272279805e-06,
      "loss": 0.0141,
      "step": 2090440
    },
    {
      "epoch": 3.421083639362935,
      "grad_norm": 0.23614539206027985,
      "learning_rate": 3.178640380066288e-06,
      "loss": 0.0125,
      "step": 2090460
    },
    {
      "epoch": 3.421116369801588,
      "grad_norm": 0.1712792068719864,
      "learning_rate": 3.1785744878527706e-06,
      "loss": 0.0112,
      "step": 2090480
    },
    {
      "epoch": 3.4211491002402417,
      "grad_norm": 0.2077697366476059,
      "learning_rate": 3.1785085956392537e-06,
      "loss": 0.0155,
      "step": 2090500
    },
    {
      "epoch": 3.421181830678895,
      "grad_norm": 0.20291750133037567,
      "learning_rate": 3.1784427034257365e-06,
      "loss": 0.0112,
      "step": 2090520
    },
    {
      "epoch": 3.421214561117548,
      "grad_norm": 0.1477695107460022,
      "learning_rate": 3.1783768112122192e-06,
      "loss": 0.0069,
      "step": 2090540
    },
    {
      "epoch": 3.4212472915562016,
      "grad_norm": 0.11522780358791351,
      "learning_rate": 3.178310918998702e-06,
      "loss": 0.0097,
      "step": 2090560
    },
    {
      "epoch": 3.4212800219948547,
      "grad_norm": 0.3212953209877014,
      "learning_rate": 3.178245026785185e-06,
      "loss": 0.0152,
      "step": 2090580
    },
    {
      "epoch": 3.4213127524335083,
      "grad_norm": 0.5794854760169983,
      "learning_rate": 3.178179134571668e-06,
      "loss": 0.0122,
      "step": 2090600
    },
    {
      "epoch": 3.4213454828721614,
      "grad_norm": 0.2481103390455246,
      "learning_rate": 3.1781132423581506e-06,
      "loss": 0.011,
      "step": 2090620
    },
    {
      "epoch": 3.4213782133108146,
      "grad_norm": 0.30066853761672974,
      "learning_rate": 3.1780473501446333e-06,
      "loss": 0.0163,
      "step": 2090640
    },
    {
      "epoch": 3.421410943749468,
      "grad_norm": 0.31529226899147034,
      "learning_rate": 3.1779814579311165e-06,
      "loss": 0.0129,
      "step": 2090660
    },
    {
      "epoch": 3.4214436741881213,
      "grad_norm": 0.3675050139427185,
      "learning_rate": 3.1779155657175992e-06,
      "loss": 0.0108,
      "step": 2090680
    },
    {
      "epoch": 3.421476404626775,
      "grad_norm": 0.2335209846496582,
      "learning_rate": 3.177849673504082e-06,
      "loss": 0.017,
      "step": 2090700
    },
    {
      "epoch": 3.421509135065428,
      "grad_norm": 0.14260467886924744,
      "learning_rate": 3.1777837812905656e-06,
      "loss": 0.0138,
      "step": 2090720
    },
    {
      "epoch": 3.4215418655040812,
      "grad_norm": 0.20945194363594055,
      "learning_rate": 3.1777178890770483e-06,
      "loss": 0.0099,
      "step": 2090740
    },
    {
      "epoch": 3.421574595942735,
      "grad_norm": 0.6329526305198669,
      "learning_rate": 3.177651996863531e-06,
      "loss": 0.0123,
      "step": 2090760
    },
    {
      "epoch": 3.421607326381388,
      "grad_norm": 0.11683118343353271,
      "learning_rate": 3.1775861046500138e-06,
      "loss": 0.0136,
      "step": 2090780
    },
    {
      "epoch": 3.4216400568200416,
      "grad_norm": 0.1414400190114975,
      "learning_rate": 3.177520212436497e-06,
      "loss": 0.0102,
      "step": 2090800
    },
    {
      "epoch": 3.4216727872586947,
      "grad_norm": 0.41531434655189514,
      "learning_rate": 3.1774543202229797e-06,
      "loss": 0.0097,
      "step": 2090820
    },
    {
      "epoch": 3.4217055176973483,
      "grad_norm": 0.10449890792369843,
      "learning_rate": 3.1773884280094624e-06,
      "loss": 0.0078,
      "step": 2090840
    },
    {
      "epoch": 3.4217382481360015,
      "grad_norm": 0.2696038782596588,
      "learning_rate": 3.177322535795945e-06,
      "loss": 0.0109,
      "step": 2090860
    },
    {
      "epoch": 3.4217709785746546,
      "grad_norm": 0.1579408347606659,
      "learning_rate": 3.1772566435824283e-06,
      "loss": 0.0084,
      "step": 2090880
    },
    {
      "epoch": 3.421803709013308,
      "grad_norm": 0.1863834261894226,
      "learning_rate": 3.177190751368911e-06,
      "loss": 0.0114,
      "step": 2090900
    },
    {
      "epoch": 3.4218364394519614,
      "grad_norm": 0.2194593846797943,
      "learning_rate": 3.177124859155394e-06,
      "loss": 0.0143,
      "step": 2090920
    },
    {
      "epoch": 3.421869169890615,
      "grad_norm": 0.5658695697784424,
      "learning_rate": 3.1770589669418766e-06,
      "loss": 0.0084,
      "step": 2090940
    },
    {
      "epoch": 3.421901900329268,
      "grad_norm": 0.40035855770111084,
      "learning_rate": 3.1769930747283593e-06,
      "loss": 0.0094,
      "step": 2090960
    },
    {
      "epoch": 3.4219346307679217,
      "grad_norm": 0.3868945240974426,
      "learning_rate": 3.1769271825148425e-06,
      "loss": 0.0105,
      "step": 2090980
    },
    {
      "epoch": 3.421967361206575,
      "grad_norm": 0.6993958353996277,
      "learning_rate": 3.176861290301325e-06,
      "loss": 0.0098,
      "step": 2091000
    },
    {
      "epoch": 3.422000091645228,
      "grad_norm": 0.27986547350883484,
      "learning_rate": 3.176795398087808e-06,
      "loss": 0.0104,
      "step": 2091020
    },
    {
      "epoch": 3.4220328220838816,
      "grad_norm": 0.20939616858959198,
      "learning_rate": 3.1767295058742907e-06,
      "loss": 0.0087,
      "step": 2091040
    },
    {
      "epoch": 3.4220655525225347,
      "grad_norm": 0.09158507734537125,
      "learning_rate": 3.176663613660774e-06,
      "loss": 0.0098,
      "step": 2091060
    },
    {
      "epoch": 3.4220982829611883,
      "grad_norm": 0.4070034623146057,
      "learning_rate": 3.176597721447257e-06,
      "loss": 0.0092,
      "step": 2091080
    },
    {
      "epoch": 3.4221310133998415,
      "grad_norm": 0.39911478757858276,
      "learning_rate": 3.1765318292337397e-06,
      "loss": 0.0157,
      "step": 2091100
    },
    {
      "epoch": 3.422163743838495,
      "grad_norm": 0.09850430488586426,
      "learning_rate": 3.176465937020223e-06,
      "loss": 0.0082,
      "step": 2091120
    },
    {
      "epoch": 3.4221964742771482,
      "grad_norm": 0.2222284972667694,
      "learning_rate": 3.1764000448067056e-06,
      "loss": 0.0096,
      "step": 2091140
    },
    {
      "epoch": 3.4222292047158014,
      "grad_norm": 0.1483234167098999,
      "learning_rate": 3.1763341525931884e-06,
      "loss": 0.0115,
      "step": 2091160
    },
    {
      "epoch": 3.422261935154455,
      "grad_norm": 0.22990427911281586,
      "learning_rate": 3.176268260379671e-06,
      "loss": 0.0084,
      "step": 2091180
    },
    {
      "epoch": 3.422294665593108,
      "grad_norm": 0.25749149918556213,
      "learning_rate": 3.1762023681661543e-06,
      "loss": 0.0128,
      "step": 2091200
    },
    {
      "epoch": 3.4223273960317617,
      "grad_norm": 0.22584296762943268,
      "learning_rate": 3.176136475952637e-06,
      "loss": 0.017,
      "step": 2091220
    },
    {
      "epoch": 3.422360126470415,
      "grad_norm": 0.31497621536254883,
      "learning_rate": 3.1760705837391198e-06,
      "loss": 0.0141,
      "step": 2091240
    },
    {
      "epoch": 3.4223928569090685,
      "grad_norm": 0.566677987575531,
      "learning_rate": 3.1760046915256025e-06,
      "loss": 0.0093,
      "step": 2091260
    },
    {
      "epoch": 3.4224255873477216,
      "grad_norm": 0.21515454351902008,
      "learning_rate": 3.1759387993120857e-06,
      "loss": 0.0127,
      "step": 2091280
    },
    {
      "epoch": 3.4224583177863748,
      "grad_norm": 0.2765372395515442,
      "learning_rate": 3.1758729070985684e-06,
      "loss": 0.0124,
      "step": 2091300
    },
    {
      "epoch": 3.4224910482250284,
      "grad_norm": 0.6884610056877136,
      "learning_rate": 3.175807014885051e-06,
      "loss": 0.012,
      "step": 2091320
    },
    {
      "epoch": 3.4225237786636815,
      "grad_norm": 0.3575061857700348,
      "learning_rate": 3.175741122671534e-06,
      "loss": 0.0099,
      "step": 2091340
    },
    {
      "epoch": 3.422556509102335,
      "grad_norm": 0.28680914640426636,
      "learning_rate": 3.175675230458017e-06,
      "loss": 0.0212,
      "step": 2091360
    },
    {
      "epoch": 3.4225892395409883,
      "grad_norm": 0.19282589852809906,
      "learning_rate": 3.1756093382445e-06,
      "loss": 0.014,
      "step": 2091380
    },
    {
      "epoch": 3.422621969979642,
      "grad_norm": 0.4651537537574768,
      "learning_rate": 3.1755434460309825e-06,
      "loss": 0.0141,
      "step": 2091400
    },
    {
      "epoch": 3.422654700418295,
      "grad_norm": 0.18008552491664886,
      "learning_rate": 3.175477553817466e-06,
      "loss": 0.0092,
      "step": 2091420
    },
    {
      "epoch": 3.422687430856948,
      "grad_norm": 0.22839877009391785,
      "learning_rate": 3.175411661603949e-06,
      "loss": 0.0125,
      "step": 2091440
    },
    {
      "epoch": 3.4227201612956017,
      "grad_norm": 0.29164254665374756,
      "learning_rate": 3.1753457693904316e-06,
      "loss": 0.0137,
      "step": 2091460
    },
    {
      "epoch": 3.422752891734255,
      "grad_norm": 0.5685385465621948,
      "learning_rate": 3.1752798771769143e-06,
      "loss": 0.0144,
      "step": 2091480
    },
    {
      "epoch": 3.4227856221729085,
      "grad_norm": 0.6217864155769348,
      "learning_rate": 3.175213984963397e-06,
      "loss": 0.0112,
      "step": 2091500
    },
    {
      "epoch": 3.4228183526115616,
      "grad_norm": 0.23740018904209137,
      "learning_rate": 3.1751480927498802e-06,
      "loss": 0.0138,
      "step": 2091520
    },
    {
      "epoch": 3.4228510830502152,
      "grad_norm": 0.09607990831136703,
      "learning_rate": 3.175082200536363e-06,
      "loss": 0.0116,
      "step": 2091540
    },
    {
      "epoch": 3.4228838134888684,
      "grad_norm": 0.15229038894176483,
      "learning_rate": 3.1750163083228457e-06,
      "loss": 0.0204,
      "step": 2091560
    },
    {
      "epoch": 3.4229165439275215,
      "grad_norm": 0.10335950553417206,
      "learning_rate": 3.1749504161093285e-06,
      "loss": 0.0116,
      "step": 2091580
    },
    {
      "epoch": 3.422949274366175,
      "grad_norm": 1.0132911205291748,
      "learning_rate": 3.1748845238958116e-06,
      "loss": 0.0135,
      "step": 2091600
    },
    {
      "epoch": 3.4229820048048283,
      "grad_norm": 0.06424197554588318,
      "learning_rate": 3.1748186316822944e-06,
      "loss": 0.0101,
      "step": 2091620
    },
    {
      "epoch": 3.423014735243482,
      "grad_norm": 0.16373705863952637,
      "learning_rate": 3.174752739468777e-06,
      "loss": 0.0133,
      "step": 2091640
    },
    {
      "epoch": 3.423047465682135,
      "grad_norm": 0.06250302493572235,
      "learning_rate": 3.17468684725526e-06,
      "loss": 0.0071,
      "step": 2091660
    },
    {
      "epoch": 3.4230801961207886,
      "grad_norm": 0.166734516620636,
      "learning_rate": 3.174620955041743e-06,
      "loss": 0.0088,
      "step": 2091680
    },
    {
      "epoch": 3.4231129265594418,
      "grad_norm": 2.274714708328247,
      "learning_rate": 3.1745550628282257e-06,
      "loss": 0.009,
      "step": 2091700
    },
    {
      "epoch": 3.423145656998095,
      "grad_norm": 0.2595365047454834,
      "learning_rate": 3.1744891706147085e-06,
      "loss": 0.0119,
      "step": 2091720
    },
    {
      "epoch": 3.4231783874367485,
      "grad_norm": 0.46085697412490845,
      "learning_rate": 3.1744232784011912e-06,
      "loss": 0.0113,
      "step": 2091740
    },
    {
      "epoch": 3.4232111178754017,
      "grad_norm": 0.10927179455757141,
      "learning_rate": 3.1743573861876744e-06,
      "loss": 0.0113,
      "step": 2091760
    },
    {
      "epoch": 3.4232438483140553,
      "grad_norm": 0.3534405529499054,
      "learning_rate": 3.1742914939741576e-06,
      "loss": 0.0137,
      "step": 2091780
    },
    {
      "epoch": 3.4232765787527084,
      "grad_norm": 0.10321062058210373,
      "learning_rate": 3.1742256017606403e-06,
      "loss": 0.0156,
      "step": 2091800
    },
    {
      "epoch": 3.423309309191362,
      "grad_norm": 0.5098949074745178,
      "learning_rate": 3.1741597095471235e-06,
      "loss": 0.0085,
      "step": 2091820
    },
    {
      "epoch": 3.423342039630015,
      "grad_norm": 0.07886910438537598,
      "learning_rate": 3.174093817333606e-06,
      "loss": 0.0112,
      "step": 2091840
    },
    {
      "epoch": 3.4233747700686683,
      "grad_norm": 0.24801968038082123,
      "learning_rate": 3.174027925120089e-06,
      "loss": 0.0147,
      "step": 2091860
    },
    {
      "epoch": 3.423407500507322,
      "grad_norm": 0.18531745672225952,
      "learning_rate": 3.1739620329065717e-06,
      "loss": 0.0133,
      "step": 2091880
    },
    {
      "epoch": 3.423440230945975,
      "grad_norm": 0.4924860894680023,
      "learning_rate": 3.173896140693055e-06,
      "loss": 0.0092,
      "step": 2091900
    },
    {
      "epoch": 3.4234729613846286,
      "grad_norm": 0.2957565188407898,
      "learning_rate": 3.1738302484795376e-06,
      "loss": 0.0122,
      "step": 2091920
    },
    {
      "epoch": 3.423505691823282,
      "grad_norm": 0.22078008949756622,
      "learning_rate": 3.1737643562660203e-06,
      "loss": 0.0093,
      "step": 2091940
    },
    {
      "epoch": 3.4235384222619354,
      "grad_norm": 0.12109451740980148,
      "learning_rate": 3.173698464052503e-06,
      "loss": 0.008,
      "step": 2091960
    },
    {
      "epoch": 3.4235711527005885,
      "grad_norm": 0.664941132068634,
      "learning_rate": 3.173632571838986e-06,
      "loss": 0.0115,
      "step": 2091980
    },
    {
      "epoch": 3.4236038831392417,
      "grad_norm": 0.6484663486480713,
      "learning_rate": 3.173566679625469e-06,
      "loss": 0.0087,
      "step": 2092000
    },
    {
      "epoch": 3.4236366135778953,
      "grad_norm": 0.14899809658527374,
      "learning_rate": 3.1735007874119517e-06,
      "loss": 0.0104,
      "step": 2092020
    },
    {
      "epoch": 3.4236693440165484,
      "grad_norm": 0.2876536250114441,
      "learning_rate": 3.1734348951984344e-06,
      "loss": 0.0135,
      "step": 2092040
    },
    {
      "epoch": 3.423702074455202,
      "grad_norm": 0.25328946113586426,
      "learning_rate": 3.173369002984917e-06,
      "loss": 0.0109,
      "step": 2092060
    },
    {
      "epoch": 3.423734804893855,
      "grad_norm": 0.39084774255752563,
      "learning_rate": 3.1733031107714003e-06,
      "loss": 0.0112,
      "step": 2092080
    },
    {
      "epoch": 3.4237675353325088,
      "grad_norm": 0.24590900540351868,
      "learning_rate": 3.173237218557883e-06,
      "loss": 0.0184,
      "step": 2092100
    },
    {
      "epoch": 3.423800265771162,
      "grad_norm": 0.6014370918273926,
      "learning_rate": 3.1731713263443662e-06,
      "loss": 0.0143,
      "step": 2092120
    },
    {
      "epoch": 3.423832996209815,
      "grad_norm": 0.3450218737125397,
      "learning_rate": 3.1731054341308494e-06,
      "loss": 0.0113,
      "step": 2092140
    },
    {
      "epoch": 3.4238657266484687,
      "grad_norm": 0.2822214663028717,
      "learning_rate": 3.173039541917332e-06,
      "loss": 0.0097,
      "step": 2092160
    },
    {
      "epoch": 3.423898457087122,
      "grad_norm": 0.28643450140953064,
      "learning_rate": 3.172973649703815e-06,
      "loss": 0.0122,
      "step": 2092180
    },
    {
      "epoch": 3.4239311875257754,
      "grad_norm": 0.1878306269645691,
      "learning_rate": 3.1729077574902976e-06,
      "loss": 0.0116,
      "step": 2092200
    },
    {
      "epoch": 3.4239639179644286,
      "grad_norm": 0.14632545411586761,
      "learning_rate": 3.172841865276781e-06,
      "loss": 0.007,
      "step": 2092220
    },
    {
      "epoch": 3.4239966484030817,
      "grad_norm": 0.5691010355949402,
      "learning_rate": 3.1727759730632635e-06,
      "loss": 0.0149,
      "step": 2092240
    },
    {
      "epoch": 3.4240293788417353,
      "grad_norm": 0.17288485169410706,
      "learning_rate": 3.1727100808497463e-06,
      "loss": 0.0092,
      "step": 2092260
    },
    {
      "epoch": 3.4240621092803885,
      "grad_norm": 0.2825373113155365,
      "learning_rate": 3.172644188636229e-06,
      "loss": 0.0133,
      "step": 2092280
    },
    {
      "epoch": 3.424094839719042,
      "grad_norm": 0.2972507178783417,
      "learning_rate": 3.172578296422712e-06,
      "loss": 0.0088,
      "step": 2092300
    },
    {
      "epoch": 3.424127570157695,
      "grad_norm": 1.13152015209198,
      "learning_rate": 3.172512404209195e-06,
      "loss": 0.0136,
      "step": 2092320
    },
    {
      "epoch": 3.4241603005963483,
      "grad_norm": 0.31075945496559143,
      "learning_rate": 3.1724465119956777e-06,
      "loss": 0.0123,
      "step": 2092340
    },
    {
      "epoch": 3.424193031035002,
      "grad_norm": 0.10666573792695999,
      "learning_rate": 3.1723806197821604e-06,
      "loss": 0.0084,
      "step": 2092360
    },
    {
      "epoch": 3.424225761473655,
      "grad_norm": 0.6237858533859253,
      "learning_rate": 3.172314727568643e-06,
      "loss": 0.0108,
      "step": 2092380
    },
    {
      "epoch": 3.4242584919123087,
      "grad_norm": 0.30177071690559387,
      "learning_rate": 3.1722488353551263e-06,
      "loss": 0.0087,
      "step": 2092400
    },
    {
      "epoch": 3.424291222350962,
      "grad_norm": 0.29273927211761475,
      "learning_rate": 3.172182943141609e-06,
      "loss": 0.0113,
      "step": 2092420
    },
    {
      "epoch": 3.4243239527896154,
      "grad_norm": 0.43353599309921265,
      "learning_rate": 3.1721170509280918e-06,
      "loss": 0.01,
      "step": 2092440
    },
    {
      "epoch": 3.4243566832282686,
      "grad_norm": 0.6378347277641296,
      "learning_rate": 3.1720511587145745e-06,
      "loss": 0.0127,
      "step": 2092460
    },
    {
      "epoch": 3.4243894136669217,
      "grad_norm": 0.12634463608264923,
      "learning_rate": 3.171985266501058e-06,
      "loss": 0.0073,
      "step": 2092480
    },
    {
      "epoch": 3.4244221441055753,
      "grad_norm": 0.11420497298240662,
      "learning_rate": 3.171919374287541e-06,
      "loss": 0.0121,
      "step": 2092500
    },
    {
      "epoch": 3.4244548745442285,
      "grad_norm": 0.40134209394454956,
      "learning_rate": 3.1718534820740236e-06,
      "loss": 0.0161,
      "step": 2092520
    },
    {
      "epoch": 3.424487604982882,
      "grad_norm": 0.8473421335220337,
      "learning_rate": 3.1717875898605067e-06,
      "loss": 0.0149,
      "step": 2092540
    },
    {
      "epoch": 3.4245203354215352,
      "grad_norm": 0.33507218956947327,
      "learning_rate": 3.1717216976469895e-06,
      "loss": 0.0074,
      "step": 2092560
    },
    {
      "epoch": 3.424553065860189,
      "grad_norm": 0.2779495120048523,
      "learning_rate": 3.1716558054334722e-06,
      "loss": 0.0087,
      "step": 2092580
    },
    {
      "epoch": 3.424585796298842,
      "grad_norm": 0.18211525678634644,
      "learning_rate": 3.171589913219955e-06,
      "loss": 0.0088,
      "step": 2092600
    },
    {
      "epoch": 3.424618526737495,
      "grad_norm": 0.08664167672395706,
      "learning_rate": 3.171524021006438e-06,
      "loss": 0.0125,
      "step": 2092620
    },
    {
      "epoch": 3.4246512571761487,
      "grad_norm": 0.1253121942281723,
      "learning_rate": 3.171458128792921e-06,
      "loss": 0.0126,
      "step": 2092640
    },
    {
      "epoch": 3.424683987614802,
      "grad_norm": 0.15663205087184906,
      "learning_rate": 3.1713922365794036e-06,
      "loss": 0.0115,
      "step": 2092660
    },
    {
      "epoch": 3.4247167180534555,
      "grad_norm": 0.4983382821083069,
      "learning_rate": 3.1713263443658863e-06,
      "loss": 0.0197,
      "step": 2092680
    },
    {
      "epoch": 3.4247494484921086,
      "grad_norm": 0.26888754963874817,
      "learning_rate": 3.1712604521523695e-06,
      "loss": 0.0205,
      "step": 2092700
    },
    {
      "epoch": 3.424782178930762,
      "grad_norm": 0.055453818291425705,
      "learning_rate": 3.1711945599388522e-06,
      "loss": 0.0161,
      "step": 2092720
    },
    {
      "epoch": 3.4248149093694153,
      "grad_norm": 0.07766611129045486,
      "learning_rate": 3.171128667725335e-06,
      "loss": 0.0125,
      "step": 2092740
    },
    {
      "epoch": 3.4248476398080685,
      "grad_norm": 0.36582645773887634,
      "learning_rate": 3.1710627755118177e-06,
      "loss": 0.0093,
      "step": 2092760
    },
    {
      "epoch": 3.424880370246722,
      "grad_norm": 0.25405991077423096,
      "learning_rate": 3.170996883298301e-06,
      "loss": 0.013,
      "step": 2092780
    },
    {
      "epoch": 3.4249131006853752,
      "grad_norm": 0.5690488815307617,
      "learning_rate": 3.1709309910847836e-06,
      "loss": 0.0096,
      "step": 2092800
    },
    {
      "epoch": 3.424945831124029,
      "grad_norm": 0.3963925838470459,
      "learning_rate": 3.1708650988712664e-06,
      "loss": 0.0106,
      "step": 2092820
    },
    {
      "epoch": 3.424978561562682,
      "grad_norm": 0.2662811279296875,
      "learning_rate": 3.17079920665775e-06,
      "loss": 0.0086,
      "step": 2092840
    },
    {
      "epoch": 3.4250112920013356,
      "grad_norm": 0.571143627166748,
      "learning_rate": 3.1707333144442327e-06,
      "loss": 0.0159,
      "step": 2092860
    },
    {
      "epoch": 3.4250440224399887,
      "grad_norm": 0.27075931429862976,
      "learning_rate": 3.1706674222307154e-06,
      "loss": 0.0131,
      "step": 2092880
    },
    {
      "epoch": 3.425076752878642,
      "grad_norm": 0.2821110486984253,
      "learning_rate": 3.170601530017198e-06,
      "loss": 0.0097,
      "step": 2092900
    },
    {
      "epoch": 3.4251094833172955,
      "grad_norm": 0.20785126090049744,
      "learning_rate": 3.170535637803681e-06,
      "loss": 0.0139,
      "step": 2092920
    },
    {
      "epoch": 3.4251422137559486,
      "grad_norm": 0.25075122714042664,
      "learning_rate": 3.170469745590164e-06,
      "loss": 0.0095,
      "step": 2092940
    },
    {
      "epoch": 3.4251749441946022,
      "grad_norm": 0.28666388988494873,
      "learning_rate": 3.170403853376647e-06,
      "loss": 0.0154,
      "step": 2092960
    },
    {
      "epoch": 3.4252076746332554,
      "grad_norm": 0.08852347731590271,
      "learning_rate": 3.1703379611631296e-06,
      "loss": 0.0117,
      "step": 2092980
    },
    {
      "epoch": 3.425240405071909,
      "grad_norm": 0.24136455357074738,
      "learning_rate": 3.1702720689496123e-06,
      "loss": 0.0102,
      "step": 2093000
    },
    {
      "epoch": 3.425273135510562,
      "grad_norm": 0.2676406800746918,
      "learning_rate": 3.1702061767360955e-06,
      "loss": 0.0153,
      "step": 2093020
    },
    {
      "epoch": 3.4253058659492153,
      "grad_norm": 0.297451913356781,
      "learning_rate": 3.170140284522578e-06,
      "loss": 0.0149,
      "step": 2093040
    },
    {
      "epoch": 3.425338596387869,
      "grad_norm": 0.43026310205459595,
      "learning_rate": 3.170074392309061e-06,
      "loss": 0.0111,
      "step": 2093060
    },
    {
      "epoch": 3.425371326826522,
      "grad_norm": 0.09279228001832962,
      "learning_rate": 3.1700085000955437e-06,
      "loss": 0.01,
      "step": 2093080
    },
    {
      "epoch": 3.4254040572651756,
      "grad_norm": 0.11460064351558685,
      "learning_rate": 3.169942607882027e-06,
      "loss": 0.0156,
      "step": 2093100
    },
    {
      "epoch": 3.4254367877038288,
      "grad_norm": 0.25553247332572937,
      "learning_rate": 3.1698767156685096e-06,
      "loss": 0.0106,
      "step": 2093120
    },
    {
      "epoch": 3.4254695181424824,
      "grad_norm": 0.49730074405670166,
      "learning_rate": 3.1698108234549923e-06,
      "loss": 0.0119,
      "step": 2093140
    },
    {
      "epoch": 3.4255022485811355,
      "grad_norm": 0.27984100580215454,
      "learning_rate": 3.169744931241475e-06,
      "loss": 0.0116,
      "step": 2093160
    },
    {
      "epoch": 3.4255349790197887,
      "grad_norm": 0.33896592259407043,
      "learning_rate": 3.1696790390279587e-06,
      "loss": 0.017,
      "step": 2093180
    },
    {
      "epoch": 3.4255677094584422,
      "grad_norm": 0.0943203717470169,
      "learning_rate": 3.1696131468144414e-06,
      "loss": 0.0079,
      "step": 2093200
    },
    {
      "epoch": 3.4256004398970954,
      "grad_norm": 0.3798106610774994,
      "learning_rate": 3.169547254600924e-06,
      "loss": 0.0108,
      "step": 2093220
    },
    {
      "epoch": 3.425633170335749,
      "grad_norm": 0.2809688150882721,
      "learning_rate": 3.1694813623874073e-06,
      "loss": 0.0159,
      "step": 2093240
    },
    {
      "epoch": 3.425665900774402,
      "grad_norm": 0.49645736813545227,
      "learning_rate": 3.16941547017389e-06,
      "loss": 0.0106,
      "step": 2093260
    },
    {
      "epoch": 3.4256986312130557,
      "grad_norm": 0.1227346733212471,
      "learning_rate": 3.1693495779603728e-06,
      "loss": 0.0111,
      "step": 2093280
    },
    {
      "epoch": 3.425731361651709,
      "grad_norm": 0.25019213557243347,
      "learning_rate": 3.1692836857468555e-06,
      "loss": 0.007,
      "step": 2093300
    },
    {
      "epoch": 3.425764092090362,
      "grad_norm": 0.38067832589149475,
      "learning_rate": 3.1692177935333387e-06,
      "loss": 0.0123,
      "step": 2093320
    },
    {
      "epoch": 3.4257968225290156,
      "grad_norm": 0.33855563402175903,
      "learning_rate": 3.1691519013198214e-06,
      "loss": 0.01,
      "step": 2093340
    },
    {
      "epoch": 3.425829552967669,
      "grad_norm": 0.8121645450592041,
      "learning_rate": 3.169086009106304e-06,
      "loss": 0.0132,
      "step": 2093360
    },
    {
      "epoch": 3.4258622834063224,
      "grad_norm": 0.3284696638584137,
      "learning_rate": 3.169020116892787e-06,
      "loss": 0.0134,
      "step": 2093380
    },
    {
      "epoch": 3.4258950138449755,
      "grad_norm": 0.17557154595851898,
      "learning_rate": 3.1689542246792696e-06,
      "loss": 0.0094,
      "step": 2093400
    },
    {
      "epoch": 3.425927744283629,
      "grad_norm": 0.1644490659236908,
      "learning_rate": 3.168888332465753e-06,
      "loss": 0.0094,
      "step": 2093420
    },
    {
      "epoch": 3.4259604747222823,
      "grad_norm": 0.4965395927429199,
      "learning_rate": 3.1688224402522355e-06,
      "loss": 0.0117,
      "step": 2093440
    },
    {
      "epoch": 3.4259932051609354,
      "grad_norm": 0.08168870210647583,
      "learning_rate": 3.1687565480387183e-06,
      "loss": 0.0148,
      "step": 2093460
    },
    {
      "epoch": 3.426025935599589,
      "grad_norm": 0.20943351089954376,
      "learning_rate": 3.168690655825201e-06,
      "loss": 0.0109,
      "step": 2093480
    },
    {
      "epoch": 3.426058666038242,
      "grad_norm": 0.15421566367149353,
      "learning_rate": 3.168624763611684e-06,
      "loss": 0.0124,
      "step": 2093500
    },
    {
      "epoch": 3.4260913964768958,
      "grad_norm": 0.1160985603928566,
      "learning_rate": 3.168558871398167e-06,
      "loss": 0.0124,
      "step": 2093520
    },
    {
      "epoch": 3.426124126915549,
      "grad_norm": 0.21822035312652588,
      "learning_rate": 3.16849297918465e-06,
      "loss": 0.0167,
      "step": 2093540
    },
    {
      "epoch": 3.4261568573542025,
      "grad_norm": 0.23923653364181519,
      "learning_rate": 3.1684270869711332e-06,
      "loss": 0.008,
      "step": 2093560
    },
    {
      "epoch": 3.4261895877928557,
      "grad_norm": 0.31134241819381714,
      "learning_rate": 3.168361194757616e-06,
      "loss": 0.0107,
      "step": 2093580
    },
    {
      "epoch": 3.426222318231509,
      "grad_norm": 0.2432355135679245,
      "learning_rate": 3.1682953025440987e-06,
      "loss": 0.0113,
      "step": 2093600
    },
    {
      "epoch": 3.4262550486701624,
      "grad_norm": 0.27872294187545776,
      "learning_rate": 3.1682294103305815e-06,
      "loss": 0.007,
      "step": 2093620
    },
    {
      "epoch": 3.4262877791088155,
      "grad_norm": 0.11793143302202225,
      "learning_rate": 3.1681635181170646e-06,
      "loss": 0.0176,
      "step": 2093640
    },
    {
      "epoch": 3.426320509547469,
      "grad_norm": 0.18577149510383606,
      "learning_rate": 3.1680976259035474e-06,
      "loss": 0.0078,
      "step": 2093660
    },
    {
      "epoch": 3.4263532399861223,
      "grad_norm": 0.3487739861011505,
      "learning_rate": 3.16803173369003e-06,
      "loss": 0.0114,
      "step": 2093680
    },
    {
      "epoch": 3.4263859704247754,
      "grad_norm": 0.371400386095047,
      "learning_rate": 3.167965841476513e-06,
      "loss": 0.0116,
      "step": 2093700
    },
    {
      "epoch": 3.426418700863429,
      "grad_norm": 0.11965185403823853,
      "learning_rate": 3.167899949262996e-06,
      "loss": 0.0127,
      "step": 2093720
    },
    {
      "epoch": 3.426451431302082,
      "grad_norm": 0.3955352008342743,
      "learning_rate": 3.1678340570494788e-06,
      "loss": 0.0135,
      "step": 2093740
    },
    {
      "epoch": 3.426484161740736,
      "grad_norm": 0.18146933615207672,
      "learning_rate": 3.1677681648359615e-06,
      "loss": 0.0136,
      "step": 2093760
    },
    {
      "epoch": 3.426516892179389,
      "grad_norm": 0.5242180228233337,
      "learning_rate": 3.1677022726224442e-06,
      "loss": 0.0098,
      "step": 2093780
    },
    {
      "epoch": 3.426549622618042,
      "grad_norm": 0.36580604314804077,
      "learning_rate": 3.167636380408927e-06,
      "loss": 0.0127,
      "step": 2093800
    },
    {
      "epoch": 3.4265823530566957,
      "grad_norm": 0.4868985414505005,
      "learning_rate": 3.16757048819541e-06,
      "loss": 0.0191,
      "step": 2093820
    },
    {
      "epoch": 3.426615083495349,
      "grad_norm": 0.256667822599411,
      "learning_rate": 3.167504595981893e-06,
      "loss": 0.0103,
      "step": 2093840
    },
    {
      "epoch": 3.4266478139340024,
      "grad_norm": 0.167350634932518,
      "learning_rate": 3.1674387037683756e-06,
      "loss": 0.0082,
      "step": 2093860
    },
    {
      "epoch": 3.4266805443726556,
      "grad_norm": 0.4420259892940521,
      "learning_rate": 3.167372811554859e-06,
      "loss": 0.0123,
      "step": 2093880
    },
    {
      "epoch": 3.426713274811309,
      "grad_norm": 0.07190918922424316,
      "learning_rate": 3.167306919341342e-06,
      "loss": 0.0078,
      "step": 2093900
    },
    {
      "epoch": 3.4267460052499623,
      "grad_norm": 0.21792474389076233,
      "learning_rate": 3.1672410271278247e-06,
      "loss": 0.0133,
      "step": 2093920
    },
    {
      "epoch": 3.4267787356886155,
      "grad_norm": 0.20658834278583527,
      "learning_rate": 3.1671751349143074e-06,
      "loss": 0.0128,
      "step": 2093940
    },
    {
      "epoch": 3.426811466127269,
      "grad_norm": 0.06136779487133026,
      "learning_rate": 3.1671092427007906e-06,
      "loss": 0.0076,
      "step": 2093960
    },
    {
      "epoch": 3.426844196565922,
      "grad_norm": 0.08455409854650497,
      "learning_rate": 3.1670433504872733e-06,
      "loss": 0.0095,
      "step": 2093980
    },
    {
      "epoch": 3.426876927004576,
      "grad_norm": 0.19294419884681702,
      "learning_rate": 3.166977458273756e-06,
      "loss": 0.0138,
      "step": 2094000
    },
    {
      "epoch": 3.426909657443229,
      "grad_norm": 0.44909822940826416,
      "learning_rate": 3.166911566060239e-06,
      "loss": 0.0097,
      "step": 2094020
    },
    {
      "epoch": 3.4269423878818825,
      "grad_norm": 1.5001236200332642,
      "learning_rate": 3.166845673846722e-06,
      "loss": 0.0224,
      "step": 2094040
    },
    {
      "epoch": 3.4269751183205357,
      "grad_norm": 0.4883432388305664,
      "learning_rate": 3.1667797816332047e-06,
      "loss": 0.0139,
      "step": 2094060
    },
    {
      "epoch": 3.427007848759189,
      "grad_norm": 0.2616960108280182,
      "learning_rate": 3.1667138894196874e-06,
      "loss": 0.0108,
      "step": 2094080
    },
    {
      "epoch": 3.4270405791978424,
      "grad_norm": 0.09190170466899872,
      "learning_rate": 3.16664799720617e-06,
      "loss": 0.0142,
      "step": 2094100
    },
    {
      "epoch": 3.4270733096364956,
      "grad_norm": 0.1351366639137268,
      "learning_rate": 3.1665821049926533e-06,
      "loss": 0.0155,
      "step": 2094120
    },
    {
      "epoch": 3.427106040075149,
      "grad_norm": 0.23093350231647491,
      "learning_rate": 3.166516212779136e-06,
      "loss": 0.0148,
      "step": 2094140
    },
    {
      "epoch": 3.4271387705138023,
      "grad_norm": 0.3025500178337097,
      "learning_rate": 3.166450320565619e-06,
      "loss": 0.0096,
      "step": 2094160
    },
    {
      "epoch": 3.427171500952456,
      "grad_norm": 0.7841485738754272,
      "learning_rate": 3.1663844283521016e-06,
      "loss": 0.0181,
      "step": 2094180
    },
    {
      "epoch": 3.427204231391109,
      "grad_norm": 0.40734684467315674,
      "learning_rate": 3.1663185361385847e-06,
      "loss": 0.0139,
      "step": 2094200
    },
    {
      "epoch": 3.4272369618297622,
      "grad_norm": 0.061644867062568665,
      "learning_rate": 3.1662526439250675e-06,
      "loss": 0.0133,
      "step": 2094220
    },
    {
      "epoch": 3.427269692268416,
      "grad_norm": 0.5090048313140869,
      "learning_rate": 3.1661867517115506e-06,
      "loss": 0.0125,
      "step": 2094240
    },
    {
      "epoch": 3.427302422707069,
      "grad_norm": 0.15540273487567902,
      "learning_rate": 3.166120859498034e-06,
      "loss": 0.0112,
      "step": 2094260
    },
    {
      "epoch": 3.4273351531457226,
      "grad_norm": 0.32277944684028625,
      "learning_rate": 3.1660549672845165e-06,
      "loss": 0.0144,
      "step": 2094280
    },
    {
      "epoch": 3.4273678835843757,
      "grad_norm": 0.4238343834877014,
      "learning_rate": 3.1659890750709993e-06,
      "loss": 0.0082,
      "step": 2094300
    },
    {
      "epoch": 3.4274006140230293,
      "grad_norm": 0.25451022386550903,
      "learning_rate": 3.165923182857482e-06,
      "loss": 0.0108,
      "step": 2094320
    },
    {
      "epoch": 3.4274333444616825,
      "grad_norm": 0.16232001781463623,
      "learning_rate": 3.1658572906439648e-06,
      "loss": 0.0094,
      "step": 2094340
    },
    {
      "epoch": 3.4274660749003356,
      "grad_norm": 0.12887755036354065,
      "learning_rate": 3.165791398430448e-06,
      "loss": 0.0144,
      "step": 2094360
    },
    {
      "epoch": 3.427498805338989,
      "grad_norm": 0.3929162919521332,
      "learning_rate": 3.1657255062169307e-06,
      "loss": 0.0115,
      "step": 2094380
    },
    {
      "epoch": 3.4275315357776424,
      "grad_norm": 0.2908134460449219,
      "learning_rate": 3.1656596140034134e-06,
      "loss": 0.0131,
      "step": 2094400
    },
    {
      "epoch": 3.427564266216296,
      "grad_norm": 0.40665918588638306,
      "learning_rate": 3.165593721789896e-06,
      "loss": 0.0113,
      "step": 2094420
    },
    {
      "epoch": 3.427596996654949,
      "grad_norm": 0.2867705821990967,
      "learning_rate": 3.1655278295763793e-06,
      "loss": 0.0093,
      "step": 2094440
    },
    {
      "epoch": 3.4276297270936027,
      "grad_norm": 0.20753440260887146,
      "learning_rate": 3.165461937362862e-06,
      "loss": 0.0095,
      "step": 2094460
    },
    {
      "epoch": 3.427662457532256,
      "grad_norm": 0.20542468130588531,
      "learning_rate": 3.1653960451493448e-06,
      "loss": 0.013,
      "step": 2094480
    },
    {
      "epoch": 3.427695187970909,
      "grad_norm": 0.20031465590000153,
      "learning_rate": 3.1653301529358275e-06,
      "loss": 0.0102,
      "step": 2094500
    },
    {
      "epoch": 3.4277279184095626,
      "grad_norm": 0.19683556258678436,
      "learning_rate": 3.1652642607223107e-06,
      "loss": 0.0085,
      "step": 2094520
    },
    {
      "epoch": 3.4277606488482157,
      "grad_norm": 0.1570335328578949,
      "learning_rate": 3.1651983685087934e-06,
      "loss": 0.0107,
      "step": 2094540
    },
    {
      "epoch": 3.4277933792868693,
      "grad_norm": 0.7421688437461853,
      "learning_rate": 3.165132476295276e-06,
      "loss": 0.0166,
      "step": 2094560
    },
    {
      "epoch": 3.4278261097255225,
      "grad_norm": 0.23924076557159424,
      "learning_rate": 3.165066584081759e-06,
      "loss": 0.0108,
      "step": 2094580
    },
    {
      "epoch": 3.427858840164176,
      "grad_norm": 0.14673827588558197,
      "learning_rate": 3.1650006918682425e-06,
      "loss": 0.0082,
      "step": 2094600
    },
    {
      "epoch": 3.4278915706028292,
      "grad_norm": 0.08090749382972717,
      "learning_rate": 3.1649347996547252e-06,
      "loss": 0.009,
      "step": 2094620
    },
    {
      "epoch": 3.4279243010414824,
      "grad_norm": 0.2777799665927887,
      "learning_rate": 3.164868907441208e-06,
      "loss": 0.0105,
      "step": 2094640
    },
    {
      "epoch": 3.427957031480136,
      "grad_norm": 0.7810580134391785,
      "learning_rate": 3.164803015227691e-06,
      "loss": 0.0126,
      "step": 2094660
    },
    {
      "epoch": 3.427989761918789,
      "grad_norm": 0.7541146874427795,
      "learning_rate": 3.164737123014174e-06,
      "loss": 0.0102,
      "step": 2094680
    },
    {
      "epoch": 3.4280224923574427,
      "grad_norm": 0.18371476233005524,
      "learning_rate": 3.1646712308006566e-06,
      "loss": 0.0111,
      "step": 2094700
    },
    {
      "epoch": 3.428055222796096,
      "grad_norm": 0.5821470618247986,
      "learning_rate": 3.1646053385871394e-06,
      "loss": 0.0162,
      "step": 2094720
    },
    {
      "epoch": 3.4280879532347495,
      "grad_norm": 0.36888745427131653,
      "learning_rate": 3.1645394463736225e-06,
      "loss": 0.008,
      "step": 2094740
    },
    {
      "epoch": 3.4281206836734026,
      "grad_norm": 0.3167611360549927,
      "learning_rate": 3.1644735541601053e-06,
      "loss": 0.015,
      "step": 2094760
    },
    {
      "epoch": 3.4281534141120558,
      "grad_norm": 0.42030104994773865,
      "learning_rate": 3.164407661946588e-06,
      "loss": 0.012,
      "step": 2094780
    },
    {
      "epoch": 3.4281861445507094,
      "grad_norm": 0.1729358583688736,
      "learning_rate": 3.1643417697330707e-06,
      "loss": 0.0132,
      "step": 2094800
    },
    {
      "epoch": 3.4282188749893625,
      "grad_norm": 0.08601212501525879,
      "learning_rate": 3.1642758775195535e-06,
      "loss": 0.0113,
      "step": 2094820
    },
    {
      "epoch": 3.428251605428016,
      "grad_norm": 0.16285274922847748,
      "learning_rate": 3.1642099853060366e-06,
      "loss": 0.011,
      "step": 2094840
    },
    {
      "epoch": 3.4282843358666693,
      "grad_norm": 0.12290827929973602,
      "learning_rate": 3.1641440930925194e-06,
      "loss": 0.0131,
      "step": 2094860
    },
    {
      "epoch": 3.428317066305323,
      "grad_norm": 0.1583058089017868,
      "learning_rate": 3.164078200879002e-06,
      "loss": 0.0092,
      "step": 2094880
    },
    {
      "epoch": 3.428349796743976,
      "grad_norm": 0.14077453315258026,
      "learning_rate": 3.164012308665485e-06,
      "loss": 0.0114,
      "step": 2094900
    },
    {
      "epoch": 3.428382527182629,
      "grad_norm": 0.199509397149086,
      "learning_rate": 3.163946416451968e-06,
      "loss": 0.0138,
      "step": 2094920
    },
    {
      "epoch": 3.4284152576212827,
      "grad_norm": 0.22965840995311737,
      "learning_rate": 3.163880524238451e-06,
      "loss": 0.0138,
      "step": 2094940
    },
    {
      "epoch": 3.428447988059936,
      "grad_norm": 0.20760421454906464,
      "learning_rate": 3.163814632024934e-06,
      "loss": 0.0107,
      "step": 2094960
    },
    {
      "epoch": 3.4284807184985895,
      "grad_norm": 0.35002565383911133,
      "learning_rate": 3.163748739811417e-06,
      "loss": 0.012,
      "step": 2094980
    },
    {
      "epoch": 3.4285134489372426,
      "grad_norm": 0.2954370081424713,
      "learning_rate": 3.1636828475979e-06,
      "loss": 0.0167,
      "step": 2095000
    },
    {
      "epoch": 3.4285461793758962,
      "grad_norm": 0.5893017053604126,
      "learning_rate": 3.1636169553843826e-06,
      "loss": 0.0118,
      "step": 2095020
    },
    {
      "epoch": 3.4285789098145494,
      "grad_norm": 0.37705233693122864,
      "learning_rate": 3.1635510631708653e-06,
      "loss": 0.0135,
      "step": 2095040
    },
    {
      "epoch": 3.4286116402532025,
      "grad_norm": 0.16684608161449432,
      "learning_rate": 3.1634851709573485e-06,
      "loss": 0.0131,
      "step": 2095060
    },
    {
      "epoch": 3.428644370691856,
      "grad_norm": 0.08289244771003723,
      "learning_rate": 3.163419278743831e-06,
      "loss": 0.0099,
      "step": 2095080
    },
    {
      "epoch": 3.4286771011305093,
      "grad_norm": 0.7361593842506409,
      "learning_rate": 3.163353386530314e-06,
      "loss": 0.0143,
      "step": 2095100
    },
    {
      "epoch": 3.428709831569163,
      "grad_norm": 0.1582871675491333,
      "learning_rate": 3.1632874943167967e-06,
      "loss": 0.0083,
      "step": 2095120
    },
    {
      "epoch": 3.428742562007816,
      "grad_norm": 0.4779403507709503,
      "learning_rate": 3.16322160210328e-06,
      "loss": 0.0098,
      "step": 2095140
    },
    {
      "epoch": 3.4287752924464696,
      "grad_norm": 0.4548293650150299,
      "learning_rate": 3.1631557098897626e-06,
      "loss": 0.0144,
      "step": 2095160
    },
    {
      "epoch": 3.4288080228851228,
      "grad_norm": 0.38485899567604065,
      "learning_rate": 3.1630898176762453e-06,
      "loss": 0.0113,
      "step": 2095180
    },
    {
      "epoch": 3.428840753323776,
      "grad_norm": 0.3437913954257965,
      "learning_rate": 3.163023925462728e-06,
      "loss": 0.0185,
      "step": 2095200
    },
    {
      "epoch": 3.4288734837624295,
      "grad_norm": 0.2190539538860321,
      "learning_rate": 3.1629580332492112e-06,
      "loss": 0.0132,
      "step": 2095220
    },
    {
      "epoch": 3.4289062142010827,
      "grad_norm": 0.09993951767683029,
      "learning_rate": 3.162892141035694e-06,
      "loss": 0.0147,
      "step": 2095240
    },
    {
      "epoch": 3.4289389446397363,
      "grad_norm": 0.27861571311950684,
      "learning_rate": 3.1628262488221767e-06,
      "loss": 0.0113,
      "step": 2095260
    },
    {
      "epoch": 3.4289716750783894,
      "grad_norm": 0.7820775508880615,
      "learning_rate": 3.1627603566086595e-06,
      "loss": 0.0155,
      "step": 2095280
    },
    {
      "epoch": 3.4290044055170426,
      "grad_norm": 0.22812142968177795,
      "learning_rate": 3.162694464395143e-06,
      "loss": 0.0151,
      "step": 2095300
    },
    {
      "epoch": 3.429037135955696,
      "grad_norm": 0.13493479788303375,
      "learning_rate": 3.1626285721816258e-06,
      "loss": 0.0113,
      "step": 2095320
    },
    {
      "epoch": 3.4290698663943493,
      "grad_norm": 0.5332934260368347,
      "learning_rate": 3.1625626799681085e-06,
      "loss": 0.0093,
      "step": 2095340
    },
    {
      "epoch": 3.429102596833003,
      "grad_norm": 0.37055060267448425,
      "learning_rate": 3.1624967877545913e-06,
      "loss": 0.0089,
      "step": 2095360
    },
    {
      "epoch": 3.429135327271656,
      "grad_norm": 0.30965662002563477,
      "learning_rate": 3.1624308955410744e-06,
      "loss": 0.0135,
      "step": 2095380
    },
    {
      "epoch": 3.429168057710309,
      "grad_norm": 0.41980770230293274,
      "learning_rate": 3.162365003327557e-06,
      "loss": 0.0105,
      "step": 2095400
    },
    {
      "epoch": 3.429200788148963,
      "grad_norm": 0.34127917885780334,
      "learning_rate": 3.16229911111404e-06,
      "loss": 0.0114,
      "step": 2095420
    },
    {
      "epoch": 3.429233518587616,
      "grad_norm": 0.07917661964893341,
      "learning_rate": 3.1622332189005226e-06,
      "loss": 0.013,
      "step": 2095440
    },
    {
      "epoch": 3.4292662490262695,
      "grad_norm": 0.49840834736824036,
      "learning_rate": 3.162167326687006e-06,
      "loss": 0.011,
      "step": 2095460
    },
    {
      "epoch": 3.4292989794649227,
      "grad_norm": 0.24725136160850525,
      "learning_rate": 3.1621014344734885e-06,
      "loss": 0.013,
      "step": 2095480
    },
    {
      "epoch": 3.4293317099035763,
      "grad_norm": 0.7531605958938599,
      "learning_rate": 3.1620355422599713e-06,
      "loss": 0.009,
      "step": 2095500
    },
    {
      "epoch": 3.4293644403422294,
      "grad_norm": 1.050209879875183,
      "learning_rate": 3.161969650046454e-06,
      "loss": 0.0143,
      "step": 2095520
    },
    {
      "epoch": 3.4293971707808826,
      "grad_norm": 0.3942902088165283,
      "learning_rate": 3.161903757832937e-06,
      "loss": 0.014,
      "step": 2095540
    },
    {
      "epoch": 3.429429901219536,
      "grad_norm": 0.25330284237861633,
      "learning_rate": 3.16183786561942e-06,
      "loss": 0.0145,
      "step": 2095560
    },
    {
      "epoch": 3.4294626316581893,
      "grad_norm": 0.1638679951429367,
      "learning_rate": 3.1617719734059027e-06,
      "loss": 0.0176,
      "step": 2095580
    },
    {
      "epoch": 3.429495362096843,
      "grad_norm": 0.22482626140117645,
      "learning_rate": 3.1617060811923854e-06,
      "loss": 0.0076,
      "step": 2095600
    },
    {
      "epoch": 3.429528092535496,
      "grad_norm": 0.22383703291416168,
      "learning_rate": 3.1616401889788686e-06,
      "loss": 0.0196,
      "step": 2095620
    },
    {
      "epoch": 3.4295608229741497,
      "grad_norm": 0.3967590928077698,
      "learning_rate": 3.1615742967653517e-06,
      "loss": 0.0089,
      "step": 2095640
    },
    {
      "epoch": 3.429593553412803,
      "grad_norm": 0.1617969572544098,
      "learning_rate": 3.1615084045518345e-06,
      "loss": 0.011,
      "step": 2095660
    },
    {
      "epoch": 3.429626283851456,
      "grad_norm": 0.1295759081840515,
      "learning_rate": 3.1614425123383176e-06,
      "loss": 0.0128,
      "step": 2095680
    },
    {
      "epoch": 3.4296590142901096,
      "grad_norm": 0.4019946753978729,
      "learning_rate": 3.1613766201248004e-06,
      "loss": 0.0074,
      "step": 2095700
    },
    {
      "epoch": 3.4296917447287627,
      "grad_norm": 0.24366682767868042,
      "learning_rate": 3.161310727911283e-06,
      "loss": 0.0095,
      "step": 2095720
    },
    {
      "epoch": 3.4297244751674163,
      "grad_norm": 0.22678905725479126,
      "learning_rate": 3.161244835697766e-06,
      "loss": 0.0167,
      "step": 2095740
    },
    {
      "epoch": 3.4297572056060694,
      "grad_norm": 0.1756197065114975,
      "learning_rate": 3.161178943484249e-06,
      "loss": 0.0138,
      "step": 2095760
    },
    {
      "epoch": 3.429789936044723,
      "grad_norm": 0.22728633880615234,
      "learning_rate": 3.1611130512707318e-06,
      "loss": 0.0133,
      "step": 2095780
    },
    {
      "epoch": 3.429822666483376,
      "grad_norm": 0.11934749037027359,
      "learning_rate": 3.1610471590572145e-06,
      "loss": 0.0119,
      "step": 2095800
    },
    {
      "epoch": 3.4298553969220293,
      "grad_norm": 0.1616203784942627,
      "learning_rate": 3.1609812668436972e-06,
      "loss": 0.0087,
      "step": 2095820
    },
    {
      "epoch": 3.429888127360683,
      "grad_norm": 0.39173585176467896,
      "learning_rate": 3.16091537463018e-06,
      "loss": 0.0112,
      "step": 2095840
    },
    {
      "epoch": 3.429920857799336,
      "grad_norm": 0.10865411162376404,
      "learning_rate": 3.160849482416663e-06,
      "loss": 0.0105,
      "step": 2095860
    },
    {
      "epoch": 3.4299535882379897,
      "grad_norm": 0.07904145866632462,
      "learning_rate": 3.160783590203146e-06,
      "loss": 0.0105,
      "step": 2095880
    },
    {
      "epoch": 3.429986318676643,
      "grad_norm": 0.6244902014732361,
      "learning_rate": 3.1607176979896286e-06,
      "loss": 0.0107,
      "step": 2095900
    },
    {
      "epoch": 3.4300190491152964,
      "grad_norm": 0.15261828899383545,
      "learning_rate": 3.1606518057761114e-06,
      "loss": 0.01,
      "step": 2095920
    },
    {
      "epoch": 3.4300517795539496,
      "grad_norm": 0.08063796162605286,
      "learning_rate": 3.1605859135625945e-06,
      "loss": 0.0116,
      "step": 2095940
    },
    {
      "epoch": 3.4300845099926027,
      "grad_norm": 0.3308314383029938,
      "learning_rate": 3.1605200213490773e-06,
      "loss": 0.0084,
      "step": 2095960
    },
    {
      "epoch": 3.4301172404312563,
      "grad_norm": 0.2756845951080322,
      "learning_rate": 3.16045412913556e-06,
      "loss": 0.0138,
      "step": 2095980
    },
    {
      "epoch": 3.4301499708699095,
      "grad_norm": 0.35565635561943054,
      "learning_rate": 3.1603882369220436e-06,
      "loss": 0.0087,
      "step": 2096000
    },
    {
      "epoch": 3.430182701308563,
      "grad_norm": 0.30256208777427673,
      "learning_rate": 3.1603223447085263e-06,
      "loss": 0.0131,
      "step": 2096020
    },
    {
      "epoch": 3.430215431747216,
      "grad_norm": 0.1767423152923584,
      "learning_rate": 3.160256452495009e-06,
      "loss": 0.0083,
      "step": 2096040
    },
    {
      "epoch": 3.43024816218587,
      "grad_norm": 0.4615490734577179,
      "learning_rate": 3.160190560281492e-06,
      "loss": 0.0101,
      "step": 2096060
    },
    {
      "epoch": 3.430280892624523,
      "grad_norm": 0.12230544537305832,
      "learning_rate": 3.160124668067975e-06,
      "loss": 0.0107,
      "step": 2096080
    },
    {
      "epoch": 3.430313623063176,
      "grad_norm": 0.36873671412467957,
      "learning_rate": 3.1600587758544577e-06,
      "loss": 0.015,
      "step": 2096100
    },
    {
      "epoch": 3.4303463535018297,
      "grad_norm": 0.35422903299331665,
      "learning_rate": 3.1599928836409405e-06,
      "loss": 0.0106,
      "step": 2096120
    },
    {
      "epoch": 3.430379083940483,
      "grad_norm": 0.41226139664649963,
      "learning_rate": 3.159926991427423e-06,
      "loss": 0.0082,
      "step": 2096140
    },
    {
      "epoch": 3.4304118143791364,
      "grad_norm": 0.24686677753925323,
      "learning_rate": 3.1598610992139064e-06,
      "loss": 0.0126,
      "step": 2096160
    },
    {
      "epoch": 3.4304445448177896,
      "grad_norm": 0.11283138394355774,
      "learning_rate": 3.159795207000389e-06,
      "loss": 0.0122,
      "step": 2096180
    },
    {
      "epoch": 3.430477275256443,
      "grad_norm": 0.6495797634124756,
      "learning_rate": 3.159729314786872e-06,
      "loss": 0.0086,
      "step": 2096200
    },
    {
      "epoch": 3.4305100056950963,
      "grad_norm": 0.21834689378738403,
      "learning_rate": 3.1596634225733546e-06,
      "loss": 0.011,
      "step": 2096220
    },
    {
      "epoch": 3.4305427361337495,
      "grad_norm": 0.29447832703590393,
      "learning_rate": 3.1595975303598373e-06,
      "loss": 0.0111,
      "step": 2096240
    },
    {
      "epoch": 3.430575466572403,
      "grad_norm": 0.1771935224533081,
      "learning_rate": 3.1595316381463205e-06,
      "loss": 0.0085,
      "step": 2096260
    },
    {
      "epoch": 3.4306081970110562,
      "grad_norm": 0.4589410126209259,
      "learning_rate": 3.1594657459328032e-06,
      "loss": 0.0112,
      "step": 2096280
    },
    {
      "epoch": 3.43064092744971,
      "grad_norm": 0.15973562002182007,
      "learning_rate": 3.159399853719286e-06,
      "loss": 0.0138,
      "step": 2096300
    },
    {
      "epoch": 3.430673657888363,
      "grad_norm": 0.3142121434211731,
      "learning_rate": 3.1593339615057687e-06,
      "loss": 0.0148,
      "step": 2096320
    },
    {
      "epoch": 3.4307063883270166,
      "grad_norm": 0.3047325015068054,
      "learning_rate": 3.159268069292252e-06,
      "loss": 0.009,
      "step": 2096340
    },
    {
      "epoch": 3.4307391187656697,
      "grad_norm": 0.13712674379348755,
      "learning_rate": 3.159202177078735e-06,
      "loss": 0.0147,
      "step": 2096360
    },
    {
      "epoch": 3.430771849204323,
      "grad_norm": 0.33309251070022583,
      "learning_rate": 3.1591362848652178e-06,
      "loss": 0.0106,
      "step": 2096380
    },
    {
      "epoch": 3.4308045796429765,
      "grad_norm": 0.4101208746433258,
      "learning_rate": 3.159070392651701e-06,
      "loss": 0.0114,
      "step": 2096400
    },
    {
      "epoch": 3.4308373100816296,
      "grad_norm": 0.16408516466617584,
      "learning_rate": 3.1590045004381837e-06,
      "loss": 0.0128,
      "step": 2096420
    },
    {
      "epoch": 3.430870040520283,
      "grad_norm": 0.8127853870391846,
      "learning_rate": 3.1589386082246664e-06,
      "loss": 0.0167,
      "step": 2096440
    },
    {
      "epoch": 3.4309027709589364,
      "grad_norm": 0.1725207269191742,
      "learning_rate": 3.158872716011149e-06,
      "loss": 0.0084,
      "step": 2096460
    },
    {
      "epoch": 3.43093550139759,
      "grad_norm": 0.04203483462333679,
      "learning_rate": 3.1588068237976323e-06,
      "loss": 0.0085,
      "step": 2096480
    },
    {
      "epoch": 3.430968231836243,
      "grad_norm": 0.17753244936466217,
      "learning_rate": 3.158740931584115e-06,
      "loss": 0.0123,
      "step": 2096500
    },
    {
      "epoch": 3.4310009622748963,
      "grad_norm": 1.1311554908752441,
      "learning_rate": 3.1586750393705978e-06,
      "loss": 0.0084,
      "step": 2096520
    },
    {
      "epoch": 3.43103369271355,
      "grad_norm": 0.2870931625366211,
      "learning_rate": 3.1586091471570805e-06,
      "loss": 0.019,
      "step": 2096540
    },
    {
      "epoch": 3.431066423152203,
      "grad_norm": 0.1740361452102661,
      "learning_rate": 3.1585432549435637e-06,
      "loss": 0.0094,
      "step": 2096560
    },
    {
      "epoch": 3.4310991535908566,
      "grad_norm": 0.25741076469421387,
      "learning_rate": 3.1584773627300464e-06,
      "loss": 0.0142,
      "step": 2096580
    },
    {
      "epoch": 3.4311318840295097,
      "grad_norm": 0.2955509126186371,
      "learning_rate": 3.158411470516529e-06,
      "loss": 0.0085,
      "step": 2096600
    },
    {
      "epoch": 3.4311646144681633,
      "grad_norm": 0.25249338150024414,
      "learning_rate": 3.158345578303012e-06,
      "loss": 0.0115,
      "step": 2096620
    },
    {
      "epoch": 3.4311973449068165,
      "grad_norm": 0.20652051270008087,
      "learning_rate": 3.158279686089495e-06,
      "loss": 0.0109,
      "step": 2096640
    },
    {
      "epoch": 3.4312300753454696,
      "grad_norm": 0.21781618893146515,
      "learning_rate": 3.158213793875978e-06,
      "loss": 0.0121,
      "step": 2096660
    },
    {
      "epoch": 3.4312628057841232,
      "grad_norm": 0.3136647939682007,
      "learning_rate": 3.1581479016624606e-06,
      "loss": 0.0073,
      "step": 2096680
    },
    {
      "epoch": 3.4312955362227764,
      "grad_norm": 0.7101194858551025,
      "learning_rate": 3.158082009448944e-06,
      "loss": 0.0115,
      "step": 2096700
    },
    {
      "epoch": 3.43132826666143,
      "grad_norm": 0.24524368345737457,
      "learning_rate": 3.158016117235427e-06,
      "loss": 0.013,
      "step": 2096720
    },
    {
      "epoch": 3.431360997100083,
      "grad_norm": 0.1848011612892151,
      "learning_rate": 3.1579502250219096e-06,
      "loss": 0.0125,
      "step": 2096740
    },
    {
      "epoch": 3.4313937275387363,
      "grad_norm": 0.48426201939582825,
      "learning_rate": 3.1578843328083924e-06,
      "loss": 0.0141,
      "step": 2096760
    },
    {
      "epoch": 3.43142645797739,
      "grad_norm": 0.7090885639190674,
      "learning_rate": 3.157818440594875e-06,
      "loss": 0.0165,
      "step": 2096780
    },
    {
      "epoch": 3.431459188416043,
      "grad_norm": 0.28031060099601746,
      "learning_rate": 3.1577525483813583e-06,
      "loss": 0.0139,
      "step": 2096800
    },
    {
      "epoch": 3.4314919188546966,
      "grad_norm": 0.45180004835128784,
      "learning_rate": 3.157686656167841e-06,
      "loss": 0.0097,
      "step": 2096820
    },
    {
      "epoch": 3.4315246492933498,
      "grad_norm": 1.02034592628479,
      "learning_rate": 3.1576207639543237e-06,
      "loss": 0.0122,
      "step": 2096840
    },
    {
      "epoch": 3.431557379732003,
      "grad_norm": 0.23314030468463898,
      "learning_rate": 3.1575548717408065e-06,
      "loss": 0.0102,
      "step": 2096860
    },
    {
      "epoch": 3.4315901101706565,
      "grad_norm": 0.26303064823150635,
      "learning_rate": 3.1574889795272896e-06,
      "loss": 0.012,
      "step": 2096880
    },
    {
      "epoch": 3.4316228406093097,
      "grad_norm": 0.18735335767269135,
      "learning_rate": 3.1574230873137724e-06,
      "loss": 0.0109,
      "step": 2096900
    },
    {
      "epoch": 3.4316555710479633,
      "grad_norm": 0.2461094707250595,
      "learning_rate": 3.157357195100255e-06,
      "loss": 0.0069,
      "step": 2096920
    },
    {
      "epoch": 3.4316883014866164,
      "grad_norm": 0.13960480690002441,
      "learning_rate": 3.157291302886738e-06,
      "loss": 0.012,
      "step": 2096940
    },
    {
      "epoch": 3.43172103192527,
      "grad_norm": 0.09243841469287872,
      "learning_rate": 3.157225410673221e-06,
      "loss": 0.0133,
      "step": 2096960
    },
    {
      "epoch": 3.431753762363923,
      "grad_norm": 0.2128313034772873,
      "learning_rate": 3.1571595184597038e-06,
      "loss": 0.0106,
      "step": 2096980
    },
    {
      "epoch": 3.4317864928025763,
      "grad_norm": 0.3443771004676819,
      "learning_rate": 3.1570936262461865e-06,
      "loss": 0.0097,
      "step": 2097000
    },
    {
      "epoch": 3.43181922324123,
      "grad_norm": 0.22087334096431732,
      "learning_rate": 3.1570277340326692e-06,
      "loss": 0.0089,
      "step": 2097020
    },
    {
      "epoch": 3.431851953679883,
      "grad_norm": 0.09410755336284637,
      "learning_rate": 3.1569618418191524e-06,
      "loss": 0.0114,
      "step": 2097040
    },
    {
      "epoch": 3.4318846841185366,
      "grad_norm": 0.2480710744857788,
      "learning_rate": 3.1568959496056356e-06,
      "loss": 0.008,
      "step": 2097060
    },
    {
      "epoch": 3.43191741455719,
      "grad_norm": 0.14739525318145752,
      "learning_rate": 3.1568300573921183e-06,
      "loss": 0.0114,
      "step": 2097080
    },
    {
      "epoch": 3.4319501449958434,
      "grad_norm": 0.051336463540792465,
      "learning_rate": 3.1567641651786015e-06,
      "loss": 0.008,
      "step": 2097100
    },
    {
      "epoch": 3.4319828754344965,
      "grad_norm": 0.530302882194519,
      "learning_rate": 3.1566982729650842e-06,
      "loss": 0.0106,
      "step": 2097120
    },
    {
      "epoch": 3.4320156058731497,
      "grad_norm": 0.17137587070465088,
      "learning_rate": 3.156632380751567e-06,
      "loss": 0.0111,
      "step": 2097140
    },
    {
      "epoch": 3.4320483363118033,
      "grad_norm": 0.1576889604330063,
      "learning_rate": 3.1565664885380497e-06,
      "loss": 0.0075,
      "step": 2097160
    },
    {
      "epoch": 3.4320810667504564,
      "grad_norm": 0.175180122256279,
      "learning_rate": 3.156500596324533e-06,
      "loss": 0.0113,
      "step": 2097180
    },
    {
      "epoch": 3.43211379718911,
      "grad_norm": 0.11679595708847046,
      "learning_rate": 3.1564347041110156e-06,
      "loss": 0.0091,
      "step": 2097200
    },
    {
      "epoch": 3.432146527627763,
      "grad_norm": 0.06967523694038391,
      "learning_rate": 3.1563688118974983e-06,
      "loss": 0.0078,
      "step": 2097220
    },
    {
      "epoch": 3.4321792580664168,
      "grad_norm": 0.10580755770206451,
      "learning_rate": 3.156302919683981e-06,
      "loss": 0.0106,
      "step": 2097240
    },
    {
      "epoch": 3.43221198850507,
      "grad_norm": 0.18008315563201904,
      "learning_rate": 3.156237027470464e-06,
      "loss": 0.0116,
      "step": 2097260
    },
    {
      "epoch": 3.432244718943723,
      "grad_norm": 0.16199056804180145,
      "learning_rate": 3.156171135256947e-06,
      "loss": 0.0107,
      "step": 2097280
    },
    {
      "epoch": 3.4322774493823767,
      "grad_norm": 0.1773889660835266,
      "learning_rate": 3.1561052430434297e-06,
      "loss": 0.0104,
      "step": 2097300
    },
    {
      "epoch": 3.43231017982103,
      "grad_norm": 0.10717538744211197,
      "learning_rate": 3.1560393508299125e-06,
      "loss": 0.0126,
      "step": 2097320
    },
    {
      "epoch": 3.4323429102596834,
      "grad_norm": 0.14995959401130676,
      "learning_rate": 3.155973458616395e-06,
      "loss": 0.0101,
      "step": 2097340
    },
    {
      "epoch": 3.4323756406983366,
      "grad_norm": 0.20021037757396698,
      "learning_rate": 3.1559075664028784e-06,
      "loss": 0.0084,
      "step": 2097360
    },
    {
      "epoch": 3.43240837113699,
      "grad_norm": 0.17762482166290283,
      "learning_rate": 3.155841674189361e-06,
      "loss": 0.0066,
      "step": 2097380
    },
    {
      "epoch": 3.4324411015756433,
      "grad_norm": 0.07654859125614166,
      "learning_rate": 3.1557757819758443e-06,
      "loss": 0.012,
      "step": 2097400
    },
    {
      "epoch": 3.4324738320142965,
      "grad_norm": 0.18922874331474304,
      "learning_rate": 3.1557098897623274e-06,
      "loss": 0.0073,
      "step": 2097420
    },
    {
      "epoch": 3.43250656245295,
      "grad_norm": 0.13278073072433472,
      "learning_rate": 3.15564399754881e-06,
      "loss": 0.0131,
      "step": 2097440
    },
    {
      "epoch": 3.432539292891603,
      "grad_norm": 0.4179898500442505,
      "learning_rate": 3.155578105335293e-06,
      "loss": 0.0065,
      "step": 2097460
    },
    {
      "epoch": 3.432572023330257,
      "grad_norm": 0.2771163284778595,
      "learning_rate": 3.1555122131217756e-06,
      "loss": 0.0097,
      "step": 2097480
    },
    {
      "epoch": 3.43260475376891,
      "grad_norm": 0.14640559256076813,
      "learning_rate": 3.155446320908259e-06,
      "loss": 0.0124,
      "step": 2097500
    },
    {
      "epoch": 3.4326374842075635,
      "grad_norm": 0.18782970309257507,
      "learning_rate": 3.1553804286947416e-06,
      "loss": 0.0127,
      "step": 2097520
    },
    {
      "epoch": 3.4326702146462167,
      "grad_norm": 0.5006056427955627,
      "learning_rate": 3.1553145364812243e-06,
      "loss": 0.0128,
      "step": 2097540
    },
    {
      "epoch": 3.43270294508487,
      "grad_norm": 0.4982650876045227,
      "learning_rate": 3.155248644267707e-06,
      "loss": 0.0105,
      "step": 2097560
    },
    {
      "epoch": 3.4327356755235234,
      "grad_norm": 1.5895992517471313,
      "learning_rate": 3.15518275205419e-06,
      "loss": 0.0146,
      "step": 2097580
    },
    {
      "epoch": 3.4327684059621766,
      "grad_norm": 0.20643207430839539,
      "learning_rate": 3.155116859840673e-06,
      "loss": 0.0077,
      "step": 2097600
    },
    {
      "epoch": 3.43280113640083,
      "grad_norm": 0.29309597611427307,
      "learning_rate": 3.1550509676271557e-06,
      "loss": 0.0163,
      "step": 2097620
    },
    {
      "epoch": 3.4328338668394833,
      "grad_norm": 0.48212260007858276,
      "learning_rate": 3.1549850754136384e-06,
      "loss": 0.0173,
      "step": 2097640
    },
    {
      "epoch": 3.432866597278137,
      "grad_norm": 0.030311129987239838,
      "learning_rate": 3.154919183200121e-06,
      "loss": 0.0087,
      "step": 2097660
    },
    {
      "epoch": 3.43289932771679,
      "grad_norm": 0.765184760093689,
      "learning_rate": 3.1548532909866043e-06,
      "loss": 0.012,
      "step": 2097680
    },
    {
      "epoch": 3.4329320581554432,
      "grad_norm": 0.24519599974155426,
      "learning_rate": 3.154787398773087e-06,
      "loss": 0.0105,
      "step": 2097700
    },
    {
      "epoch": 3.432964788594097,
      "grad_norm": 0.28884372115135193,
      "learning_rate": 3.15472150655957e-06,
      "loss": 0.011,
      "step": 2097720
    },
    {
      "epoch": 3.43299751903275,
      "grad_norm": 0.3885103762149811,
      "learning_rate": 3.1546556143460525e-06,
      "loss": 0.008,
      "step": 2097740
    },
    {
      "epoch": 3.4330302494714036,
      "grad_norm": 0.29113951325416565,
      "learning_rate": 3.154589722132536e-06,
      "loss": 0.0094,
      "step": 2097760
    },
    {
      "epoch": 3.4330629799100567,
      "grad_norm": 0.07966652512550354,
      "learning_rate": 3.154523829919019e-06,
      "loss": 0.0112,
      "step": 2097780
    },
    {
      "epoch": 3.4330957103487103,
      "grad_norm": 0.3556802272796631,
      "learning_rate": 3.1544579377055016e-06,
      "loss": 0.017,
      "step": 2097800
    },
    {
      "epoch": 3.4331284407873635,
      "grad_norm": 0.28266406059265137,
      "learning_rate": 3.1543920454919848e-06,
      "loss": 0.0061,
      "step": 2097820
    },
    {
      "epoch": 3.4331611712260166,
      "grad_norm": 0.18711085617542267,
      "learning_rate": 3.1543261532784675e-06,
      "loss": 0.0096,
      "step": 2097840
    },
    {
      "epoch": 3.43319390166467,
      "grad_norm": 0.23636296391487122,
      "learning_rate": 3.1542602610649502e-06,
      "loss": 0.0116,
      "step": 2097860
    },
    {
      "epoch": 3.4332266321033234,
      "grad_norm": 0.25540634989738464,
      "learning_rate": 3.154194368851433e-06,
      "loss": 0.0074,
      "step": 2097880
    },
    {
      "epoch": 3.433259362541977,
      "grad_norm": 0.6738852858543396,
      "learning_rate": 3.154128476637916e-06,
      "loss": 0.013,
      "step": 2097900
    },
    {
      "epoch": 3.43329209298063,
      "grad_norm": 0.28730541467666626,
      "learning_rate": 3.154062584424399e-06,
      "loss": 0.0135,
      "step": 2097920
    },
    {
      "epoch": 3.4333248234192837,
      "grad_norm": 0.26132792234420776,
      "learning_rate": 3.1539966922108816e-06,
      "loss": 0.0128,
      "step": 2097940
    },
    {
      "epoch": 3.433357553857937,
      "grad_norm": 0.24964508414268494,
      "learning_rate": 3.1539307999973644e-06,
      "loss": 0.0103,
      "step": 2097960
    },
    {
      "epoch": 3.43339028429659,
      "grad_norm": 0.17451465129852295,
      "learning_rate": 3.1538649077838475e-06,
      "loss": 0.0097,
      "step": 2097980
    },
    {
      "epoch": 3.4334230147352436,
      "grad_norm": 0.8260257840156555,
      "learning_rate": 3.1537990155703303e-06,
      "loss": 0.0096,
      "step": 2098000
    },
    {
      "epoch": 3.4334557451738967,
      "grad_norm": 0.1253415197134018,
      "learning_rate": 3.153733123356813e-06,
      "loss": 0.0105,
      "step": 2098020
    },
    {
      "epoch": 3.4334884756125503,
      "grad_norm": 0.24328064918518066,
      "learning_rate": 3.1536672311432957e-06,
      "loss": 0.0103,
      "step": 2098040
    },
    {
      "epoch": 3.4335212060512035,
      "grad_norm": 0.3272993564605713,
      "learning_rate": 3.153601338929779e-06,
      "loss": 0.0129,
      "step": 2098060
    },
    {
      "epoch": 3.433553936489857,
      "grad_norm": 0.4406353831291199,
      "learning_rate": 3.1535354467162617e-06,
      "loss": 0.0084,
      "step": 2098080
    },
    {
      "epoch": 3.4335866669285102,
      "grad_norm": 0.2334083467721939,
      "learning_rate": 3.153469554502745e-06,
      "loss": 0.0124,
      "step": 2098100
    },
    {
      "epoch": 3.4336193973671634,
      "grad_norm": 7.282833099365234,
      "learning_rate": 3.153403662289228e-06,
      "loss": 0.0131,
      "step": 2098120
    },
    {
      "epoch": 3.433652127805817,
      "grad_norm": 0.15674015879631042,
      "learning_rate": 3.1533377700757107e-06,
      "loss": 0.0104,
      "step": 2098140
    },
    {
      "epoch": 3.43368485824447,
      "grad_norm": 0.21095699071884155,
      "learning_rate": 3.1532718778621935e-06,
      "loss": 0.0148,
      "step": 2098160
    },
    {
      "epoch": 3.4337175886831237,
      "grad_norm": 0.24506163597106934,
      "learning_rate": 3.153205985648676e-06,
      "loss": 0.0117,
      "step": 2098180
    },
    {
      "epoch": 3.433750319121777,
      "grad_norm": 0.3497248888015747,
      "learning_rate": 3.153140093435159e-06,
      "loss": 0.0188,
      "step": 2098200
    },
    {
      "epoch": 3.4337830495604305,
      "grad_norm": 0.29869315028190613,
      "learning_rate": 3.153074201221642e-06,
      "loss": 0.015,
      "step": 2098220
    },
    {
      "epoch": 3.4338157799990836,
      "grad_norm": 0.23866800963878632,
      "learning_rate": 3.153008309008125e-06,
      "loss": 0.0094,
      "step": 2098240
    },
    {
      "epoch": 3.4338485104377368,
      "grad_norm": 0.3523649573326111,
      "learning_rate": 3.1529424167946076e-06,
      "loss": 0.0068,
      "step": 2098260
    },
    {
      "epoch": 3.4338812408763904,
      "grad_norm": 0.6212285161018372,
      "learning_rate": 3.1528765245810903e-06,
      "loss": 0.0177,
      "step": 2098280
    },
    {
      "epoch": 3.4339139713150435,
      "grad_norm": 0.1627507209777832,
      "learning_rate": 3.1528106323675735e-06,
      "loss": 0.0074,
      "step": 2098300
    },
    {
      "epoch": 3.433946701753697,
      "grad_norm": 0.12304742634296417,
      "learning_rate": 3.1527447401540562e-06,
      "loss": 0.0126,
      "step": 2098320
    },
    {
      "epoch": 3.4339794321923502,
      "grad_norm": 0.2539452910423279,
      "learning_rate": 3.152678847940539e-06,
      "loss": 0.01,
      "step": 2098340
    },
    {
      "epoch": 3.4340121626310034,
      "grad_norm": 0.647899329662323,
      "learning_rate": 3.1526129557270217e-06,
      "loss": 0.0095,
      "step": 2098360
    },
    {
      "epoch": 3.434044893069657,
      "grad_norm": 0.10560861974954605,
      "learning_rate": 3.152547063513505e-06,
      "loss": 0.008,
      "step": 2098380
    },
    {
      "epoch": 3.43407762350831,
      "grad_norm": 0.061093997210264206,
      "learning_rate": 3.1524811712999876e-06,
      "loss": 0.0102,
      "step": 2098400
    },
    {
      "epoch": 3.4341103539469637,
      "grad_norm": 0.1632998287677765,
      "learning_rate": 3.1524152790864703e-06,
      "loss": 0.0067,
      "step": 2098420
    },
    {
      "epoch": 3.434143084385617,
      "grad_norm": 0.5377604365348816,
      "learning_rate": 3.152349386872953e-06,
      "loss": 0.0109,
      "step": 2098440
    },
    {
      "epoch": 3.43417581482427,
      "grad_norm": 0.15755395591259003,
      "learning_rate": 3.1522834946594367e-06,
      "loss": 0.0185,
      "step": 2098460
    },
    {
      "epoch": 3.4342085452629236,
      "grad_norm": 0.23585772514343262,
      "learning_rate": 3.1522176024459194e-06,
      "loss": 0.0123,
      "step": 2098480
    },
    {
      "epoch": 3.434241275701577,
      "grad_norm": 0.15286390483379364,
      "learning_rate": 3.152151710232402e-06,
      "loss": 0.0111,
      "step": 2098500
    },
    {
      "epoch": 3.4342740061402304,
      "grad_norm": 0.13017195463180542,
      "learning_rate": 3.1520858180188853e-06,
      "loss": 0.0141,
      "step": 2098520
    },
    {
      "epoch": 3.4343067365788835,
      "grad_norm": 0.16389936208724976,
      "learning_rate": 3.152019925805368e-06,
      "loss": 0.0114,
      "step": 2098540
    },
    {
      "epoch": 3.434339467017537,
      "grad_norm": 0.10619749128818512,
      "learning_rate": 3.151954033591851e-06,
      "loss": 0.0068,
      "step": 2098560
    },
    {
      "epoch": 3.4343721974561903,
      "grad_norm": 0.19438032805919647,
      "learning_rate": 3.1518881413783335e-06,
      "loss": 0.0113,
      "step": 2098580
    },
    {
      "epoch": 3.4344049278948434,
      "grad_norm": 0.19370003044605255,
      "learning_rate": 3.1518222491648167e-06,
      "loss": 0.0124,
      "step": 2098600
    },
    {
      "epoch": 3.434437658333497,
      "grad_norm": 0.14417925477027893,
      "learning_rate": 3.1517563569512994e-06,
      "loss": 0.0117,
      "step": 2098620
    },
    {
      "epoch": 3.43447038877215,
      "grad_norm": 0.2366059273481369,
      "learning_rate": 3.151690464737782e-06,
      "loss": 0.0128,
      "step": 2098640
    },
    {
      "epoch": 3.4345031192108038,
      "grad_norm": 0.27657783031463623,
      "learning_rate": 3.151624572524265e-06,
      "loss": 0.0159,
      "step": 2098660
    },
    {
      "epoch": 3.434535849649457,
      "grad_norm": 0.0983356311917305,
      "learning_rate": 3.1515586803107477e-06,
      "loss": 0.0115,
      "step": 2098680
    },
    {
      "epoch": 3.4345685800881105,
      "grad_norm": 0.1223500594496727,
      "learning_rate": 3.151492788097231e-06,
      "loss": 0.0122,
      "step": 2098700
    },
    {
      "epoch": 3.4346013105267637,
      "grad_norm": 0.31580811738967896,
      "learning_rate": 3.1514268958837136e-06,
      "loss": 0.0139,
      "step": 2098720
    },
    {
      "epoch": 3.434634040965417,
      "grad_norm": 0.20044660568237305,
      "learning_rate": 3.1513610036701963e-06,
      "loss": 0.0081,
      "step": 2098740
    },
    {
      "epoch": 3.4346667714040704,
      "grad_norm": 0.4875465929508209,
      "learning_rate": 3.151295111456679e-06,
      "loss": 0.0102,
      "step": 2098760
    },
    {
      "epoch": 3.4346995018427235,
      "grad_norm": 0.2109459787607193,
      "learning_rate": 3.151229219243162e-06,
      "loss": 0.0187,
      "step": 2098780
    },
    {
      "epoch": 3.434732232281377,
      "grad_norm": 0.1217798963189125,
      "learning_rate": 3.151163327029645e-06,
      "loss": 0.0131,
      "step": 2098800
    },
    {
      "epoch": 3.4347649627200303,
      "grad_norm": 0.6251634359359741,
      "learning_rate": 3.151097434816128e-06,
      "loss": 0.0111,
      "step": 2098820
    },
    {
      "epoch": 3.434797693158684,
      "grad_norm": 0.18127578496932983,
      "learning_rate": 3.1510315426026113e-06,
      "loss": 0.0091,
      "step": 2098840
    },
    {
      "epoch": 3.434830423597337,
      "grad_norm": 0.3787759244441986,
      "learning_rate": 3.150965650389094e-06,
      "loss": 0.013,
      "step": 2098860
    },
    {
      "epoch": 3.43486315403599,
      "grad_norm": 0.18314510583877563,
      "learning_rate": 3.1508997581755767e-06,
      "loss": 0.0076,
      "step": 2098880
    },
    {
      "epoch": 3.434895884474644,
      "grad_norm": 1.2893061637878418,
      "learning_rate": 3.1508338659620595e-06,
      "loss": 0.0118,
      "step": 2098900
    },
    {
      "epoch": 3.434928614913297,
      "grad_norm": 0.4498104453086853,
      "learning_rate": 3.1507679737485427e-06,
      "loss": 0.0103,
      "step": 2098920
    },
    {
      "epoch": 3.4349613453519505,
      "grad_norm": 0.12383026629686356,
      "learning_rate": 3.1507020815350254e-06,
      "loss": 0.0103,
      "step": 2098940
    },
    {
      "epoch": 3.4349940757906037,
      "grad_norm": 0.1502852886915207,
      "learning_rate": 3.150636189321508e-06,
      "loss": 0.012,
      "step": 2098960
    },
    {
      "epoch": 3.4350268062292573,
      "grad_norm": 0.5999819040298462,
      "learning_rate": 3.150570297107991e-06,
      "loss": 0.0187,
      "step": 2098980
    },
    {
      "epoch": 3.4350595366679104,
      "grad_norm": 0.378703236579895,
      "learning_rate": 3.150504404894474e-06,
      "loss": 0.018,
      "step": 2099000
    },
    {
      "epoch": 3.4350922671065636,
      "grad_norm": 0.5164798498153687,
      "learning_rate": 3.1504385126809568e-06,
      "loss": 0.0125,
      "step": 2099020
    },
    {
      "epoch": 3.435124997545217,
      "grad_norm": 0.24266189336776733,
      "learning_rate": 3.1503726204674395e-06,
      "loss": 0.0093,
      "step": 2099040
    },
    {
      "epoch": 3.4351577279838703,
      "grad_norm": 0.367763489484787,
      "learning_rate": 3.1503067282539223e-06,
      "loss": 0.0105,
      "step": 2099060
    },
    {
      "epoch": 3.435190458422524,
      "grad_norm": 0.1977550983428955,
      "learning_rate": 3.150240836040405e-06,
      "loss": 0.0112,
      "step": 2099080
    },
    {
      "epoch": 3.435223188861177,
      "grad_norm": 0.17783395946025848,
      "learning_rate": 3.150174943826888e-06,
      "loss": 0.0111,
      "step": 2099100
    },
    {
      "epoch": 3.4352559192998307,
      "grad_norm": 0.33480149507522583,
      "learning_rate": 3.150109051613371e-06,
      "loss": 0.01,
      "step": 2099120
    },
    {
      "epoch": 3.435288649738484,
      "grad_norm": 0.4784914553165436,
      "learning_rate": 3.1500431593998536e-06,
      "loss": 0.0153,
      "step": 2099140
    },
    {
      "epoch": 3.435321380177137,
      "grad_norm": 0.13636483252048492,
      "learning_rate": 3.1499772671863372e-06,
      "loss": 0.0081,
      "step": 2099160
    },
    {
      "epoch": 3.4353541106157905,
      "grad_norm": 0.24207331240177155,
      "learning_rate": 3.14991137497282e-06,
      "loss": 0.0089,
      "step": 2099180
    },
    {
      "epoch": 3.4353868410544437,
      "grad_norm": 0.33738529682159424,
      "learning_rate": 3.1498454827593027e-06,
      "loss": 0.0101,
      "step": 2099200
    },
    {
      "epoch": 3.4354195714930973,
      "grad_norm": 0.09370749443769455,
      "learning_rate": 3.1497795905457854e-06,
      "loss": 0.006,
      "step": 2099220
    },
    {
      "epoch": 3.4354523019317504,
      "grad_norm": 0.23923850059509277,
      "learning_rate": 3.1497136983322686e-06,
      "loss": 0.0154,
      "step": 2099240
    },
    {
      "epoch": 3.435485032370404,
      "grad_norm": 0.20841822028160095,
      "learning_rate": 3.1496478061187513e-06,
      "loss": 0.0084,
      "step": 2099260
    },
    {
      "epoch": 3.435517762809057,
      "grad_norm": 0.1427481770515442,
      "learning_rate": 3.149581913905234e-06,
      "loss": 0.0095,
      "step": 2099280
    },
    {
      "epoch": 3.4355504932477103,
      "grad_norm": 0.5478870272636414,
      "learning_rate": 3.149516021691717e-06,
      "loss": 0.0117,
      "step": 2099300
    },
    {
      "epoch": 3.435583223686364,
      "grad_norm": 0.4092555642127991,
      "learning_rate": 3.1494501294782e-06,
      "loss": 0.0096,
      "step": 2099320
    },
    {
      "epoch": 3.435615954125017,
      "grad_norm": 0.09881005436182022,
      "learning_rate": 3.1493842372646827e-06,
      "loss": 0.0132,
      "step": 2099340
    },
    {
      "epoch": 3.4356486845636707,
      "grad_norm": 0.3031785488128662,
      "learning_rate": 3.1493183450511655e-06,
      "loss": 0.0121,
      "step": 2099360
    },
    {
      "epoch": 3.435681415002324,
      "grad_norm": 0.0791681557893753,
      "learning_rate": 3.149252452837648e-06,
      "loss": 0.0127,
      "step": 2099380
    },
    {
      "epoch": 3.4357141454409774,
      "grad_norm": 0.11234822124242783,
      "learning_rate": 3.1491865606241314e-06,
      "loss": 0.0109,
      "step": 2099400
    },
    {
      "epoch": 3.4357468758796306,
      "grad_norm": 0.7008374333381653,
      "learning_rate": 3.149120668410614e-06,
      "loss": 0.0138,
      "step": 2099420
    },
    {
      "epoch": 3.4357796063182837,
      "grad_norm": 0.5159404873847961,
      "learning_rate": 3.149054776197097e-06,
      "loss": 0.0101,
      "step": 2099440
    },
    {
      "epoch": 3.4358123367569373,
      "grad_norm": 1.1990885734558105,
      "learning_rate": 3.1489888839835796e-06,
      "loss": 0.0126,
      "step": 2099460
    },
    {
      "epoch": 3.4358450671955905,
      "grad_norm": 0.35907986760139465,
      "learning_rate": 3.1489229917700628e-06,
      "loss": 0.0106,
      "step": 2099480
    },
    {
      "epoch": 3.435877797634244,
      "grad_norm": 0.19407758116722107,
      "learning_rate": 3.1488570995565455e-06,
      "loss": 0.0108,
      "step": 2099500
    },
    {
      "epoch": 3.435910528072897,
      "grad_norm": 0.05923191457986832,
      "learning_rate": 3.1487912073430287e-06,
      "loss": 0.0067,
      "step": 2099520
    },
    {
      "epoch": 3.435943258511551,
      "grad_norm": 0.66624915599823,
      "learning_rate": 3.148725315129512e-06,
      "loss": 0.0161,
      "step": 2099540
    },
    {
      "epoch": 3.435975988950204,
      "grad_norm": 0.1745484173297882,
      "learning_rate": 3.1486594229159946e-06,
      "loss": 0.0091,
      "step": 2099560
    },
    {
      "epoch": 3.436008719388857,
      "grad_norm": 0.43699413537979126,
      "learning_rate": 3.1485935307024773e-06,
      "loss": 0.0134,
      "step": 2099580
    },
    {
      "epoch": 3.4360414498275107,
      "grad_norm": 0.14541564881801605,
      "learning_rate": 3.14852763848896e-06,
      "loss": 0.0108,
      "step": 2099600
    },
    {
      "epoch": 3.436074180266164,
      "grad_norm": 0.704502522945404,
      "learning_rate": 3.1484617462754428e-06,
      "loss": 0.0126,
      "step": 2099620
    },
    {
      "epoch": 3.4361069107048174,
      "grad_norm": 0.3881855309009552,
      "learning_rate": 3.148395854061926e-06,
      "loss": 0.0128,
      "step": 2099640
    },
    {
      "epoch": 3.4361396411434706,
      "grad_norm": 0.3334944546222687,
      "learning_rate": 3.1483299618484087e-06,
      "loss": 0.0106,
      "step": 2099660
    },
    {
      "epoch": 3.436172371582124,
      "grad_norm": 0.21748563647270203,
      "learning_rate": 3.1482640696348914e-06,
      "loss": 0.0108,
      "step": 2099680
    },
    {
      "epoch": 3.4362051020207773,
      "grad_norm": 0.4105832576751709,
      "learning_rate": 3.148198177421374e-06,
      "loss": 0.0132,
      "step": 2099700
    },
    {
      "epoch": 3.4362378324594305,
      "grad_norm": 0.2508617341518402,
      "learning_rate": 3.1481322852078573e-06,
      "loss": 0.0134,
      "step": 2099720
    },
    {
      "epoch": 3.436270562898084,
      "grad_norm": 0.7183364033699036,
      "learning_rate": 3.14806639299434e-06,
      "loss": 0.0147,
      "step": 2099740
    },
    {
      "epoch": 3.4363032933367372,
      "grad_norm": 0.3041132986545563,
      "learning_rate": 3.148000500780823e-06,
      "loss": 0.0119,
      "step": 2099760
    },
    {
      "epoch": 3.436336023775391,
      "grad_norm": 0.3352210223674774,
      "learning_rate": 3.1479346085673055e-06,
      "loss": 0.0116,
      "step": 2099780
    },
    {
      "epoch": 3.436368754214044,
      "grad_norm": 0.08107805997133255,
      "learning_rate": 3.1478687163537887e-06,
      "loss": 0.0087,
      "step": 2099800
    },
    {
      "epoch": 3.436401484652697,
      "grad_norm": 0.09996392577886581,
      "learning_rate": 3.1478028241402714e-06,
      "loss": 0.0183,
      "step": 2099820
    },
    {
      "epoch": 3.4364342150913507,
      "grad_norm": 0.15779231488704681,
      "learning_rate": 3.147736931926754e-06,
      "loss": 0.0096,
      "step": 2099840
    },
    {
      "epoch": 3.436466945530004,
      "grad_norm": 0.21856184303760529,
      "learning_rate": 3.1476710397132378e-06,
      "loss": 0.0101,
      "step": 2099860
    },
    {
      "epoch": 3.4364996759686575,
      "grad_norm": 0.2492581307888031,
      "learning_rate": 3.1476051474997205e-06,
      "loss": 0.012,
      "step": 2099880
    },
    {
      "epoch": 3.4365324064073106,
      "grad_norm": 0.6103140711784363,
      "learning_rate": 3.1475392552862033e-06,
      "loss": 0.0109,
      "step": 2099900
    },
    {
      "epoch": 3.4365651368459638,
      "grad_norm": 0.3601163923740387,
      "learning_rate": 3.147473363072686e-06,
      "loss": 0.0129,
      "step": 2099920
    },
    {
      "epoch": 3.4365978672846174,
      "grad_norm": 0.38574445247650146,
      "learning_rate": 3.147407470859169e-06,
      "loss": 0.0151,
      "step": 2099940
    },
    {
      "epoch": 3.4366305977232705,
      "grad_norm": 0.5496988296508789,
      "learning_rate": 3.147341578645652e-06,
      "loss": 0.0115,
      "step": 2099960
    },
    {
      "epoch": 3.436663328161924,
      "grad_norm": 0.2835378050804138,
      "learning_rate": 3.1472756864321346e-06,
      "loss": 0.012,
      "step": 2099980
    },
    {
      "epoch": 3.4366960586005773,
      "grad_norm": 0.07092096656560898,
      "learning_rate": 3.1472097942186174e-06,
      "loss": 0.0075,
      "step": 2100000
    },
    {
      "epoch": 3.4366960586005773,
      "eval_loss": 0.006778441369533539,
      "eval_runtime": 6511.2847,
      "eval_samples_per_second": 157.858,
      "eval_steps_per_second": 15.786,
      "eval_sts-dev_pearson_cosine": 0.9845580629825204,
      "eval_sts-dev_spearman_cosine": 0.8952763883953024,
      "step": 2100000
    },
    {
      "epoch": 3.436728789039231,
      "grad_norm": 0.8765193223953247,
      "learning_rate": 3.1471439020051005e-06,
      "loss": 0.0132,
      "step": 2100020
    },
    {
      "epoch": 3.436761519477884,
      "grad_norm": 0.6426447033882141,
      "learning_rate": 3.1470780097915833e-06,
      "loss": 0.0147,
      "step": 2100040
    },
    {
      "epoch": 3.436794249916537,
      "grad_norm": 0.557129442691803,
      "learning_rate": 3.147012117578066e-06,
      "loss": 0.0139,
      "step": 2100060
    },
    {
      "epoch": 3.4368269803551907,
      "grad_norm": 0.08456940948963165,
      "learning_rate": 3.1469462253645488e-06,
      "loss": 0.0112,
      "step": 2100080
    },
    {
      "epoch": 3.436859710793844,
      "grad_norm": 0.36209943890571594,
      "learning_rate": 3.1468803331510315e-06,
      "loss": 0.0143,
      "step": 2100100
    },
    {
      "epoch": 3.4368924412324975,
      "grad_norm": 0.1309371143579483,
      "learning_rate": 3.1468144409375147e-06,
      "loss": 0.0089,
      "step": 2100120
    },
    {
      "epoch": 3.4369251716711506,
      "grad_norm": 0.1290738880634308,
      "learning_rate": 3.1467485487239974e-06,
      "loss": 0.0111,
      "step": 2100140
    },
    {
      "epoch": 3.4369579021098042,
      "grad_norm": 0.23060445487499237,
      "learning_rate": 3.14668265651048e-06,
      "loss": 0.0153,
      "step": 2100160
    },
    {
      "epoch": 3.4369906325484574,
      "grad_norm": 0.17798011004924774,
      "learning_rate": 3.146616764296963e-06,
      "loss": 0.009,
      "step": 2100180
    },
    {
      "epoch": 3.4370233629871105,
      "grad_norm": 0.6739583015441895,
      "learning_rate": 3.146550872083446e-06,
      "loss": 0.0147,
      "step": 2100200
    },
    {
      "epoch": 3.437056093425764,
      "grad_norm": 0.4282986521720886,
      "learning_rate": 3.146484979869929e-06,
      "loss": 0.0129,
      "step": 2100220
    },
    {
      "epoch": 3.4370888238644173,
      "grad_norm": 0.2979852259159088,
      "learning_rate": 3.146419087656412e-06,
      "loss": 0.009,
      "step": 2100240
    },
    {
      "epoch": 3.437121554303071,
      "grad_norm": 0.10289555788040161,
      "learning_rate": 3.146353195442895e-06,
      "loss": 0.0065,
      "step": 2100260
    },
    {
      "epoch": 3.437154284741724,
      "grad_norm": 0.08637198805809021,
      "learning_rate": 3.146287303229378e-06,
      "loss": 0.0097,
      "step": 2100280
    },
    {
      "epoch": 3.4371870151803776,
      "grad_norm": 0.2643066644668579,
      "learning_rate": 3.1462214110158606e-06,
      "loss": 0.0093,
      "step": 2100300
    },
    {
      "epoch": 3.4372197456190308,
      "grad_norm": 0.3752516210079193,
      "learning_rate": 3.1461555188023433e-06,
      "loss": 0.0161,
      "step": 2100320
    },
    {
      "epoch": 3.437252476057684,
      "grad_norm": 0.18662691116333008,
      "learning_rate": 3.1460896265888265e-06,
      "loss": 0.0091,
      "step": 2100340
    },
    {
      "epoch": 3.4372852064963375,
      "grad_norm": 0.4239957630634308,
      "learning_rate": 3.1460237343753092e-06,
      "loss": 0.0121,
      "step": 2100360
    },
    {
      "epoch": 3.4373179369349907,
      "grad_norm": 0.1519385278224945,
      "learning_rate": 3.145957842161792e-06,
      "loss": 0.0096,
      "step": 2100380
    },
    {
      "epoch": 3.4373506673736443,
      "grad_norm": 0.16724199056625366,
      "learning_rate": 3.1458919499482747e-06,
      "loss": 0.0091,
      "step": 2100400
    },
    {
      "epoch": 3.4373833978122974,
      "grad_norm": 0.4278619587421417,
      "learning_rate": 3.145826057734758e-06,
      "loss": 0.0119,
      "step": 2100420
    },
    {
      "epoch": 3.437416128250951,
      "grad_norm": 0.31378820538520813,
      "learning_rate": 3.1457601655212406e-06,
      "loss": 0.0091,
      "step": 2100440
    },
    {
      "epoch": 3.437448858689604,
      "grad_norm": 0.9940326809883118,
      "learning_rate": 3.1456942733077234e-06,
      "loss": 0.0124,
      "step": 2100460
    },
    {
      "epoch": 3.4374815891282573,
      "grad_norm": 0.27342191338539124,
      "learning_rate": 3.145628381094206e-06,
      "loss": 0.0106,
      "step": 2100480
    },
    {
      "epoch": 3.437514319566911,
      "grad_norm": 0.12867242097854614,
      "learning_rate": 3.1455624888806893e-06,
      "loss": 0.0135,
      "step": 2100500
    },
    {
      "epoch": 3.437547050005564,
      "grad_norm": 0.12257487326860428,
      "learning_rate": 3.145496596667172e-06,
      "loss": 0.0121,
      "step": 2100520
    },
    {
      "epoch": 3.4375797804442176,
      "grad_norm": 0.0907314345240593,
      "learning_rate": 3.1454307044536547e-06,
      "loss": 0.0107,
      "step": 2100540
    },
    {
      "epoch": 3.437612510882871,
      "grad_norm": 0.26767879724502563,
      "learning_rate": 3.1453648122401375e-06,
      "loss": 0.0106,
      "step": 2100560
    },
    {
      "epoch": 3.4376452413215244,
      "grad_norm": 0.28565549850463867,
      "learning_rate": 3.145298920026621e-06,
      "loss": 0.0088,
      "step": 2100580
    },
    {
      "epoch": 3.4376779717601775,
      "grad_norm": 0.09604419767856598,
      "learning_rate": 3.145233027813104e-06,
      "loss": 0.011,
      "step": 2100600
    },
    {
      "epoch": 3.4377107021988307,
      "grad_norm": 0.7122794985771179,
      "learning_rate": 3.1451671355995865e-06,
      "loss": 0.0109,
      "step": 2100620
    },
    {
      "epoch": 3.4377434326374843,
      "grad_norm": 0.17803730070590973,
      "learning_rate": 3.1451012433860693e-06,
      "loss": 0.0127,
      "step": 2100640
    },
    {
      "epoch": 3.4377761630761374,
      "grad_norm": 0.4360233247280121,
      "learning_rate": 3.1450353511725524e-06,
      "loss": 0.0126,
      "step": 2100660
    },
    {
      "epoch": 3.437808893514791,
      "grad_norm": 0.12099999934434891,
      "learning_rate": 3.144969458959035e-06,
      "loss": 0.0196,
      "step": 2100680
    },
    {
      "epoch": 3.437841623953444,
      "grad_norm": 0.7109426259994507,
      "learning_rate": 3.144903566745518e-06,
      "loss": 0.0187,
      "step": 2100700
    },
    {
      "epoch": 3.4378743543920978,
      "grad_norm": 0.1920124739408493,
      "learning_rate": 3.1448376745320007e-06,
      "loss": 0.0089,
      "step": 2100720
    },
    {
      "epoch": 3.437907084830751,
      "grad_norm": 0.7207641005516052,
      "learning_rate": 3.144771782318484e-06,
      "loss": 0.0141,
      "step": 2100740
    },
    {
      "epoch": 3.437939815269404,
      "grad_norm": 0.1540030986070633,
      "learning_rate": 3.1447058901049666e-06,
      "loss": 0.0068,
      "step": 2100760
    },
    {
      "epoch": 3.4379725457080577,
      "grad_norm": 0.19097372889518738,
      "learning_rate": 3.1446399978914493e-06,
      "loss": 0.014,
      "step": 2100780
    },
    {
      "epoch": 3.438005276146711,
      "grad_norm": 0.23425164818763733,
      "learning_rate": 3.144574105677932e-06,
      "loss": 0.0095,
      "step": 2100800
    },
    {
      "epoch": 3.4380380065853644,
      "grad_norm": 0.11266320198774338,
      "learning_rate": 3.144508213464415e-06,
      "loss": 0.0114,
      "step": 2100820
    },
    {
      "epoch": 3.4380707370240176,
      "grad_norm": 0.19585855305194855,
      "learning_rate": 3.144442321250898e-06,
      "loss": 0.0094,
      "step": 2100840
    },
    {
      "epoch": 3.438103467462671,
      "grad_norm": 0.2797514796257019,
      "learning_rate": 3.1443764290373807e-06,
      "loss": 0.0097,
      "step": 2100860
    },
    {
      "epoch": 3.4381361979013243,
      "grad_norm": 0.2817044258117676,
      "learning_rate": 3.1443105368238634e-06,
      "loss": 0.0087,
      "step": 2100880
    },
    {
      "epoch": 3.4381689283399774,
      "grad_norm": 0.22954079508781433,
      "learning_rate": 3.1442446446103466e-06,
      "loss": 0.0116,
      "step": 2100900
    },
    {
      "epoch": 3.438201658778631,
      "grad_norm": 0.5555917620658875,
      "learning_rate": 3.1441787523968298e-06,
      "loss": 0.0091,
      "step": 2100920
    },
    {
      "epoch": 3.438234389217284,
      "grad_norm": 0.38580796122550964,
      "learning_rate": 3.1441128601833125e-06,
      "loss": 0.0088,
      "step": 2100940
    },
    {
      "epoch": 3.438267119655938,
      "grad_norm": 0.11932855099439621,
      "learning_rate": 3.1440469679697957e-06,
      "loss": 0.0129,
      "step": 2100960
    },
    {
      "epoch": 3.438299850094591,
      "grad_norm": 0.211061492562294,
      "learning_rate": 3.1439810757562784e-06,
      "loss": 0.0078,
      "step": 2100980
    },
    {
      "epoch": 3.4383325805332445,
      "grad_norm": 0.05628792941570282,
      "learning_rate": 3.143915183542761e-06,
      "loss": 0.0084,
      "step": 2101000
    },
    {
      "epoch": 3.4383653109718977,
      "grad_norm": 0.5113846659660339,
      "learning_rate": 3.143849291329244e-06,
      "loss": 0.0132,
      "step": 2101020
    },
    {
      "epoch": 3.438398041410551,
      "grad_norm": 0.12915875017642975,
      "learning_rate": 3.143783399115727e-06,
      "loss": 0.0142,
      "step": 2101040
    },
    {
      "epoch": 3.4384307718492044,
      "grad_norm": 0.18639563024044037,
      "learning_rate": 3.1437175069022098e-06,
      "loss": 0.0087,
      "step": 2101060
    },
    {
      "epoch": 3.4384635022878576,
      "grad_norm": 0.2629548907279968,
      "learning_rate": 3.1436516146886925e-06,
      "loss": 0.0134,
      "step": 2101080
    },
    {
      "epoch": 3.438496232726511,
      "grad_norm": 0.12870904803276062,
      "learning_rate": 3.1435857224751753e-06,
      "loss": 0.0072,
      "step": 2101100
    },
    {
      "epoch": 3.4385289631651643,
      "grad_norm": 0.4371577799320221,
      "learning_rate": 3.143519830261658e-06,
      "loss": 0.0122,
      "step": 2101120
    },
    {
      "epoch": 3.438561693603818,
      "grad_norm": 0.39976903796195984,
      "learning_rate": 3.143453938048141e-06,
      "loss": 0.0116,
      "step": 2101140
    },
    {
      "epoch": 3.438594424042471,
      "grad_norm": 0.09939704090356827,
      "learning_rate": 3.143388045834624e-06,
      "loss": 0.0096,
      "step": 2101160
    },
    {
      "epoch": 3.438627154481124,
      "grad_norm": 0.472650408744812,
      "learning_rate": 3.1433221536211066e-06,
      "loss": 0.0112,
      "step": 2101180
    },
    {
      "epoch": 3.438659884919778,
      "grad_norm": 0.44082802534103394,
      "learning_rate": 3.1432562614075894e-06,
      "loss": 0.0086,
      "step": 2101200
    },
    {
      "epoch": 3.438692615358431,
      "grad_norm": 0.22743451595306396,
      "learning_rate": 3.1431903691940725e-06,
      "loss": 0.0101,
      "step": 2101220
    },
    {
      "epoch": 3.4387253457970846,
      "grad_norm": 0.5033552050590515,
      "learning_rate": 3.1431244769805553e-06,
      "loss": 0.0096,
      "step": 2101240
    },
    {
      "epoch": 3.4387580762357377,
      "grad_norm": 0.16241680085659027,
      "learning_rate": 3.143058584767038e-06,
      "loss": 0.0089,
      "step": 2101260
    },
    {
      "epoch": 3.4387908066743913,
      "grad_norm": 0.42614448070526123,
      "learning_rate": 3.1429926925535216e-06,
      "loss": 0.0081,
      "step": 2101280
    },
    {
      "epoch": 3.4388235371130444,
      "grad_norm": 0.2706766724586487,
      "learning_rate": 3.1429268003400044e-06,
      "loss": 0.0103,
      "step": 2101300
    },
    {
      "epoch": 3.4388562675516976,
      "grad_norm": 0.0836002305150032,
      "learning_rate": 3.142860908126487e-06,
      "loss": 0.0083,
      "step": 2101320
    },
    {
      "epoch": 3.438888997990351,
      "grad_norm": 0.16333480179309845,
      "learning_rate": 3.14279501591297e-06,
      "loss": 0.0153,
      "step": 2101340
    },
    {
      "epoch": 3.4389217284290043,
      "grad_norm": 0.12034681439399719,
      "learning_rate": 3.142729123699453e-06,
      "loss": 0.0117,
      "step": 2101360
    },
    {
      "epoch": 3.438954458867658,
      "grad_norm": 0.09325114637613297,
      "learning_rate": 3.1426632314859357e-06,
      "loss": 0.0135,
      "step": 2101380
    },
    {
      "epoch": 3.438987189306311,
      "grad_norm": 0.25902363657951355,
      "learning_rate": 3.1425973392724185e-06,
      "loss": 0.0117,
      "step": 2101400
    },
    {
      "epoch": 3.4390199197449642,
      "grad_norm": 0.14183616638183594,
      "learning_rate": 3.1425314470589012e-06,
      "loss": 0.0098,
      "step": 2101420
    },
    {
      "epoch": 3.439052650183618,
      "grad_norm": 0.13528893887996674,
      "learning_rate": 3.1424655548453844e-06,
      "loss": 0.0098,
      "step": 2101440
    },
    {
      "epoch": 3.439085380622271,
      "grad_norm": 0.2662476897239685,
      "learning_rate": 3.142399662631867e-06,
      "loss": 0.0096,
      "step": 2101460
    },
    {
      "epoch": 3.4391181110609246,
      "grad_norm": 0.10604289919137955,
      "learning_rate": 3.14233377041835e-06,
      "loss": 0.008,
      "step": 2101480
    },
    {
      "epoch": 3.4391508414995777,
      "grad_norm": 0.20187149941921234,
      "learning_rate": 3.1422678782048326e-06,
      "loss": 0.0118,
      "step": 2101500
    },
    {
      "epoch": 3.439183571938231,
      "grad_norm": 0.18366055190563202,
      "learning_rate": 3.1422019859913153e-06,
      "loss": 0.0093,
      "step": 2101520
    },
    {
      "epoch": 3.4392163023768845,
      "grad_norm": 0.6510743498802185,
      "learning_rate": 3.1421360937777985e-06,
      "loss": 0.0159,
      "step": 2101540
    },
    {
      "epoch": 3.4392490328155376,
      "grad_norm": 0.2777448892593384,
      "learning_rate": 3.1420702015642812e-06,
      "loss": 0.0168,
      "step": 2101560
    },
    {
      "epoch": 3.439281763254191,
      "grad_norm": 0.13806171715259552,
      "learning_rate": 3.142004309350764e-06,
      "loss": 0.0135,
      "step": 2101580
    },
    {
      "epoch": 3.4393144936928444,
      "grad_norm": 0.2119666486978531,
      "learning_rate": 3.1419384171372467e-06,
      "loss": 0.0131,
      "step": 2101600
    },
    {
      "epoch": 3.439347224131498,
      "grad_norm": 0.3157298266887665,
      "learning_rate": 3.1418725249237303e-06,
      "loss": 0.0131,
      "step": 2101620
    },
    {
      "epoch": 3.439379954570151,
      "grad_norm": 0.5011858344078064,
      "learning_rate": 3.141806632710213e-06,
      "loss": 0.0126,
      "step": 2101640
    },
    {
      "epoch": 3.4394126850088043,
      "grad_norm": 0.06266696006059647,
      "learning_rate": 3.1417407404966958e-06,
      "loss": 0.012,
      "step": 2101660
    },
    {
      "epoch": 3.439445415447458,
      "grad_norm": 0.8238449096679688,
      "learning_rate": 3.141674848283179e-06,
      "loss": 0.015,
      "step": 2101680
    },
    {
      "epoch": 3.439478145886111,
      "grad_norm": 0.7810527086257935,
      "learning_rate": 3.1416089560696617e-06,
      "loss": 0.0128,
      "step": 2101700
    },
    {
      "epoch": 3.4395108763247646,
      "grad_norm": 0.6638234257698059,
      "learning_rate": 3.1415430638561444e-06,
      "loss": 0.0112,
      "step": 2101720
    },
    {
      "epoch": 3.4395436067634177,
      "grad_norm": 0.12355807423591614,
      "learning_rate": 3.141477171642627e-06,
      "loss": 0.0077,
      "step": 2101740
    },
    {
      "epoch": 3.4395763372020713,
      "grad_norm": 0.3665163218975067,
      "learning_rate": 3.1414112794291103e-06,
      "loss": 0.0143,
      "step": 2101760
    },
    {
      "epoch": 3.4396090676407245,
      "grad_norm": 0.17449522018432617,
      "learning_rate": 3.141345387215593e-06,
      "loss": 0.0102,
      "step": 2101780
    },
    {
      "epoch": 3.4396417980793776,
      "grad_norm": 0.3324141502380371,
      "learning_rate": 3.141279495002076e-06,
      "loss": 0.0137,
      "step": 2101800
    },
    {
      "epoch": 3.4396745285180312,
      "grad_norm": 0.5889692306518555,
      "learning_rate": 3.1412136027885585e-06,
      "loss": 0.013,
      "step": 2101820
    },
    {
      "epoch": 3.4397072589566844,
      "grad_norm": 0.09755466133356094,
      "learning_rate": 3.1411477105750417e-06,
      "loss": 0.0083,
      "step": 2101840
    },
    {
      "epoch": 3.439739989395338,
      "grad_norm": 0.5320062637329102,
      "learning_rate": 3.1410818183615245e-06,
      "loss": 0.01,
      "step": 2101860
    },
    {
      "epoch": 3.439772719833991,
      "grad_norm": 0.24145622551441193,
      "learning_rate": 3.141015926148007e-06,
      "loss": 0.0135,
      "step": 2101880
    },
    {
      "epoch": 3.4398054502726447,
      "grad_norm": 0.3457264006137848,
      "learning_rate": 3.14095003393449e-06,
      "loss": 0.0119,
      "step": 2101900
    },
    {
      "epoch": 3.439838180711298,
      "grad_norm": 0.22430914640426636,
      "learning_rate": 3.140884141720973e-06,
      "loss": 0.0092,
      "step": 2101920
    },
    {
      "epoch": 3.439870911149951,
      "grad_norm": 0.0786912813782692,
      "learning_rate": 3.140818249507456e-06,
      "loss": 0.0113,
      "step": 2101940
    },
    {
      "epoch": 3.4399036415886046,
      "grad_norm": 0.1389026790857315,
      "learning_rate": 3.1407523572939386e-06,
      "loss": 0.0104,
      "step": 2101960
    },
    {
      "epoch": 3.4399363720272578,
      "grad_norm": 0.17014643549919128,
      "learning_rate": 3.140686465080422e-06,
      "loss": 0.0134,
      "step": 2101980
    },
    {
      "epoch": 3.4399691024659114,
      "grad_norm": 0.10730311274528503,
      "learning_rate": 3.140620572866905e-06,
      "loss": 0.0104,
      "step": 2102000
    },
    {
      "epoch": 3.4400018329045645,
      "grad_norm": 0.1517244279384613,
      "learning_rate": 3.1405546806533876e-06,
      "loss": 0.0091,
      "step": 2102020
    },
    {
      "epoch": 3.440034563343218,
      "grad_norm": 0.3077114522457123,
      "learning_rate": 3.1404887884398704e-06,
      "loss": 0.0076,
      "step": 2102040
    },
    {
      "epoch": 3.4400672937818713,
      "grad_norm": 0.33336132764816284,
      "learning_rate": 3.140422896226353e-06,
      "loss": 0.0104,
      "step": 2102060
    },
    {
      "epoch": 3.4401000242205244,
      "grad_norm": 0.2448963075876236,
      "learning_rate": 3.1403570040128363e-06,
      "loss": 0.0103,
      "step": 2102080
    },
    {
      "epoch": 3.440132754659178,
      "grad_norm": 0.1392352283000946,
      "learning_rate": 3.140291111799319e-06,
      "loss": 0.011,
      "step": 2102100
    },
    {
      "epoch": 3.440165485097831,
      "grad_norm": 0.3234974443912506,
      "learning_rate": 3.1402252195858018e-06,
      "loss": 0.0093,
      "step": 2102120
    },
    {
      "epoch": 3.4401982155364847,
      "grad_norm": 0.18296818435192108,
      "learning_rate": 3.1401593273722845e-06,
      "loss": 0.0167,
      "step": 2102140
    },
    {
      "epoch": 3.440230945975138,
      "grad_norm": 0.4301426112651825,
      "learning_rate": 3.1400934351587677e-06,
      "loss": 0.0133,
      "step": 2102160
    },
    {
      "epoch": 3.4402636764137915,
      "grad_norm": 0.0867174044251442,
      "learning_rate": 3.1400275429452504e-06,
      "loss": 0.0073,
      "step": 2102180
    },
    {
      "epoch": 3.4402964068524446,
      "grad_norm": 0.33235085010528564,
      "learning_rate": 3.139961650731733e-06,
      "loss": 0.0136,
      "step": 2102200
    },
    {
      "epoch": 3.440329137291098,
      "grad_norm": 0.10252731293439865,
      "learning_rate": 3.139895758518216e-06,
      "loss": 0.0166,
      "step": 2102220
    },
    {
      "epoch": 3.4403618677297514,
      "grad_norm": 0.2454911321401596,
      "learning_rate": 3.139829866304699e-06,
      "loss": 0.0144,
      "step": 2102240
    },
    {
      "epoch": 3.4403945981684045,
      "grad_norm": 0.7120946645736694,
      "learning_rate": 3.1397639740911818e-06,
      "loss": 0.0174,
      "step": 2102260
    },
    {
      "epoch": 3.440427328607058,
      "grad_norm": 0.10608343034982681,
      "learning_rate": 3.1396980818776645e-06,
      "loss": 0.0097,
      "step": 2102280
    },
    {
      "epoch": 3.4404600590457113,
      "grad_norm": 0.1935052126646042,
      "learning_rate": 3.1396321896641473e-06,
      "loss": 0.008,
      "step": 2102300
    },
    {
      "epoch": 3.440492789484365,
      "grad_norm": 0.30574831366539,
      "learning_rate": 3.1395662974506304e-06,
      "loss": 0.0075,
      "step": 2102320
    },
    {
      "epoch": 3.440525519923018,
      "grad_norm": 0.1280810534954071,
      "learning_rate": 3.1395004052371136e-06,
      "loss": 0.0132,
      "step": 2102340
    },
    {
      "epoch": 3.440558250361671,
      "grad_norm": 0.20915257930755615,
      "learning_rate": 3.1394345130235963e-06,
      "loss": 0.0108,
      "step": 2102360
    },
    {
      "epoch": 3.4405909808003248,
      "grad_norm": 0.32382437586784363,
      "learning_rate": 3.1393686208100795e-06,
      "loss": 0.0109,
      "step": 2102380
    },
    {
      "epoch": 3.440623711238978,
      "grad_norm": 0.15779677033424377,
      "learning_rate": 3.1393027285965622e-06,
      "loss": 0.0132,
      "step": 2102400
    },
    {
      "epoch": 3.4406564416776315,
      "grad_norm": 0.4955083131790161,
      "learning_rate": 3.139236836383045e-06,
      "loss": 0.0126,
      "step": 2102420
    },
    {
      "epoch": 3.4406891721162847,
      "grad_norm": 0.05212787911295891,
      "learning_rate": 3.1391709441695277e-06,
      "loss": 0.0098,
      "step": 2102440
    },
    {
      "epoch": 3.4407219025549383,
      "grad_norm": 0.20594683289527893,
      "learning_rate": 3.139105051956011e-06,
      "loss": 0.0094,
      "step": 2102460
    },
    {
      "epoch": 3.4407546329935914,
      "grad_norm": 0.42230191826820374,
      "learning_rate": 3.1390391597424936e-06,
      "loss": 0.0147,
      "step": 2102480
    },
    {
      "epoch": 3.4407873634322446,
      "grad_norm": 0.7784656286239624,
      "learning_rate": 3.1389732675289764e-06,
      "loss": 0.019,
      "step": 2102500
    },
    {
      "epoch": 3.440820093870898,
      "grad_norm": 0.14427082240581512,
      "learning_rate": 3.138907375315459e-06,
      "loss": 0.0096,
      "step": 2102520
    },
    {
      "epoch": 3.4408528243095513,
      "grad_norm": 0.14108845591545105,
      "learning_rate": 3.138841483101942e-06,
      "loss": 0.0117,
      "step": 2102540
    },
    {
      "epoch": 3.440885554748205,
      "grad_norm": 0.1702824980020523,
      "learning_rate": 3.138775590888425e-06,
      "loss": 0.0117,
      "step": 2102560
    },
    {
      "epoch": 3.440918285186858,
      "grad_norm": 0.20828838646411896,
      "learning_rate": 3.1387096986749077e-06,
      "loss": 0.0155,
      "step": 2102580
    },
    {
      "epoch": 3.4409510156255116,
      "grad_norm": 0.14252741634845734,
      "learning_rate": 3.1386438064613905e-06,
      "loss": 0.0087,
      "step": 2102600
    },
    {
      "epoch": 3.440983746064165,
      "grad_norm": 0.32400017976760864,
      "learning_rate": 3.1385779142478732e-06,
      "loss": 0.0121,
      "step": 2102620
    },
    {
      "epoch": 3.441016476502818,
      "grad_norm": 0.5077268481254578,
      "learning_rate": 3.1385120220343564e-06,
      "loss": 0.0148,
      "step": 2102640
    },
    {
      "epoch": 3.4410492069414715,
      "grad_norm": 0.10755031555891037,
      "learning_rate": 3.138446129820839e-06,
      "loss": 0.0136,
      "step": 2102660
    },
    {
      "epoch": 3.4410819373801247,
      "grad_norm": 0.10264120250940323,
      "learning_rate": 3.1383802376073223e-06,
      "loss": 0.0119,
      "step": 2102680
    },
    {
      "epoch": 3.4411146678187783,
      "grad_norm": 0.17910395562648773,
      "learning_rate": 3.1383143453938055e-06,
      "loss": 0.0107,
      "step": 2102700
    },
    {
      "epoch": 3.4411473982574314,
      "grad_norm": 0.15898475050926208,
      "learning_rate": 3.138248453180288e-06,
      "loss": 0.0084,
      "step": 2102720
    },
    {
      "epoch": 3.441180128696085,
      "grad_norm": 0.06648438423871994,
      "learning_rate": 3.138182560966771e-06,
      "loss": 0.015,
      "step": 2102740
    },
    {
      "epoch": 3.441212859134738,
      "grad_norm": 0.1707930564880371,
      "learning_rate": 3.1381166687532537e-06,
      "loss": 0.0074,
      "step": 2102760
    },
    {
      "epoch": 3.4412455895733913,
      "grad_norm": 0.8197046518325806,
      "learning_rate": 3.138050776539737e-06,
      "loss": 0.0135,
      "step": 2102780
    },
    {
      "epoch": 3.441278320012045,
      "grad_norm": 0.47999492287635803,
      "learning_rate": 3.1379848843262196e-06,
      "loss": 0.0108,
      "step": 2102800
    },
    {
      "epoch": 3.441311050450698,
      "grad_norm": 0.1916399747133255,
      "learning_rate": 3.1379189921127023e-06,
      "loss": 0.0066,
      "step": 2102820
    },
    {
      "epoch": 3.4413437808893517,
      "grad_norm": 0.37779998779296875,
      "learning_rate": 3.137853099899185e-06,
      "loss": 0.0098,
      "step": 2102840
    },
    {
      "epoch": 3.441376511328005,
      "grad_norm": 0.3518749475479126,
      "learning_rate": 3.1377872076856682e-06,
      "loss": 0.0109,
      "step": 2102860
    },
    {
      "epoch": 3.441409241766658,
      "grad_norm": 0.15118789672851562,
      "learning_rate": 3.137721315472151e-06,
      "loss": 0.0089,
      "step": 2102880
    },
    {
      "epoch": 3.4414419722053116,
      "grad_norm": 0.5295916199684143,
      "learning_rate": 3.1376554232586337e-06,
      "loss": 0.0162,
      "step": 2102900
    },
    {
      "epoch": 3.4414747026439647,
      "grad_norm": 0.19692708551883698,
      "learning_rate": 3.1375895310451164e-06,
      "loss": 0.0077,
      "step": 2102920
    },
    {
      "epoch": 3.4415074330826183,
      "grad_norm": 0.12973657250404358,
      "learning_rate": 3.137523638831599e-06,
      "loss": 0.0131,
      "step": 2102940
    },
    {
      "epoch": 3.4415401635212715,
      "grad_norm": 0.17414364218711853,
      "learning_rate": 3.1374577466180823e-06,
      "loss": 0.015,
      "step": 2102960
    },
    {
      "epoch": 3.4415728939599246,
      "grad_norm": 0.1914937049150467,
      "learning_rate": 3.137391854404565e-06,
      "loss": 0.0063,
      "step": 2102980
    },
    {
      "epoch": 3.441605624398578,
      "grad_norm": 0.3831484019756317,
      "learning_rate": 3.137325962191048e-06,
      "loss": 0.0125,
      "step": 2103000
    },
    {
      "epoch": 3.4416383548372314,
      "grad_norm": 0.3868643641471863,
      "learning_rate": 3.1372600699775306e-06,
      "loss": 0.0088,
      "step": 2103020
    },
    {
      "epoch": 3.441671085275885,
      "grad_norm": 0.2913140058517456,
      "learning_rate": 3.137194177764014e-06,
      "loss": 0.0097,
      "step": 2103040
    },
    {
      "epoch": 3.441703815714538,
      "grad_norm": 0.35432785749435425,
      "learning_rate": 3.137128285550497e-06,
      "loss": 0.013,
      "step": 2103060
    },
    {
      "epoch": 3.4417365461531917,
      "grad_norm": 0.3248419165611267,
      "learning_rate": 3.1370623933369796e-06,
      "loss": 0.0114,
      "step": 2103080
    },
    {
      "epoch": 3.441769276591845,
      "grad_norm": 0.41394364833831787,
      "learning_rate": 3.1369965011234628e-06,
      "loss": 0.0151,
      "step": 2103100
    },
    {
      "epoch": 3.441802007030498,
      "grad_norm": 0.2137460857629776,
      "learning_rate": 3.1369306089099455e-06,
      "loss": 0.0182,
      "step": 2103120
    },
    {
      "epoch": 3.4418347374691516,
      "grad_norm": 1.3577862977981567,
      "learning_rate": 3.1368647166964283e-06,
      "loss": 0.0175,
      "step": 2103140
    },
    {
      "epoch": 3.4418674679078047,
      "grad_norm": 0.15320928394794464,
      "learning_rate": 3.136798824482911e-06,
      "loss": 0.0105,
      "step": 2103160
    },
    {
      "epoch": 3.4419001983464583,
      "grad_norm": 0.22769220173358917,
      "learning_rate": 3.136732932269394e-06,
      "loss": 0.011,
      "step": 2103180
    },
    {
      "epoch": 3.4419329287851115,
      "grad_norm": 0.09840185940265656,
      "learning_rate": 3.136667040055877e-06,
      "loss": 0.0151,
      "step": 2103200
    },
    {
      "epoch": 3.441965659223765,
      "grad_norm": 0.17961430549621582,
      "learning_rate": 3.1366011478423596e-06,
      "loss": 0.0083,
      "step": 2103220
    },
    {
      "epoch": 3.4419983896624182,
      "grad_norm": 0.24053162336349487,
      "learning_rate": 3.1365352556288424e-06,
      "loss": 0.0085,
      "step": 2103240
    },
    {
      "epoch": 3.4420311201010714,
      "grad_norm": 0.18377310037612915,
      "learning_rate": 3.1364693634153256e-06,
      "loss": 0.0141,
      "step": 2103260
    },
    {
      "epoch": 3.442063850539725,
      "grad_norm": 0.27026107907295227,
      "learning_rate": 3.1364034712018083e-06,
      "loss": 0.0186,
      "step": 2103280
    },
    {
      "epoch": 3.442096580978378,
      "grad_norm": 0.05741368606686592,
      "learning_rate": 3.136337578988291e-06,
      "loss": 0.0071,
      "step": 2103300
    },
    {
      "epoch": 3.4421293114170317,
      "grad_norm": 0.4278505742549896,
      "learning_rate": 3.1362716867747738e-06,
      "loss": 0.0127,
      "step": 2103320
    },
    {
      "epoch": 3.442162041855685,
      "grad_norm": 0.7750558853149414,
      "learning_rate": 3.136205794561257e-06,
      "loss": 0.0088,
      "step": 2103340
    },
    {
      "epoch": 3.4421947722943385,
      "grad_norm": 0.13574746251106262,
      "learning_rate": 3.1361399023477397e-06,
      "loss": 0.0137,
      "step": 2103360
    },
    {
      "epoch": 3.4422275027329916,
      "grad_norm": 0.17510749399662018,
      "learning_rate": 3.136074010134223e-06,
      "loss": 0.0101,
      "step": 2103380
    },
    {
      "epoch": 3.4422602331716448,
      "grad_norm": 0.3475819528102875,
      "learning_rate": 3.136008117920706e-06,
      "loss": 0.0114,
      "step": 2103400
    },
    {
      "epoch": 3.4422929636102984,
      "grad_norm": 0.0661873072385788,
      "learning_rate": 3.1359422257071887e-06,
      "loss": 0.0146,
      "step": 2103420
    },
    {
      "epoch": 3.4423256940489515,
      "grad_norm": 0.8224353790283203,
      "learning_rate": 3.1358763334936715e-06,
      "loss": 0.0126,
      "step": 2103440
    },
    {
      "epoch": 3.442358424487605,
      "grad_norm": 0.13229314982891083,
      "learning_rate": 3.1358104412801542e-06,
      "loss": 0.0126,
      "step": 2103460
    },
    {
      "epoch": 3.4423911549262582,
      "grad_norm": 0.7277002334594727,
      "learning_rate": 3.135744549066637e-06,
      "loss": 0.0082,
      "step": 2103480
    },
    {
      "epoch": 3.442423885364912,
      "grad_norm": 0.5550342798233032,
      "learning_rate": 3.13567865685312e-06,
      "loss": 0.009,
      "step": 2103500
    },
    {
      "epoch": 3.442456615803565,
      "grad_norm": 0.6353057622909546,
      "learning_rate": 3.135612764639603e-06,
      "loss": 0.0114,
      "step": 2103520
    },
    {
      "epoch": 3.442489346242218,
      "grad_norm": 0.3127157986164093,
      "learning_rate": 3.1355468724260856e-06,
      "loss": 0.0096,
      "step": 2103540
    },
    {
      "epoch": 3.4425220766808717,
      "grad_norm": 0.15749980509281158,
      "learning_rate": 3.1354809802125683e-06,
      "loss": 0.0106,
      "step": 2103560
    },
    {
      "epoch": 3.442554807119525,
      "grad_norm": 0.7274057269096375,
      "learning_rate": 3.1354150879990515e-06,
      "loss": 0.0121,
      "step": 2103580
    },
    {
      "epoch": 3.4425875375581785,
      "grad_norm": 0.052083589136600494,
      "learning_rate": 3.1353491957855342e-06,
      "loss": 0.0097,
      "step": 2103600
    },
    {
      "epoch": 3.4426202679968316,
      "grad_norm": 0.36858221888542175,
      "learning_rate": 3.135283303572017e-06,
      "loss": 0.0087,
      "step": 2103620
    },
    {
      "epoch": 3.4426529984354852,
      "grad_norm": 0.3416016101837158,
      "learning_rate": 3.1352174113584997e-06,
      "loss": 0.0145,
      "step": 2103640
    },
    {
      "epoch": 3.4426857288741384,
      "grad_norm": 0.1504736840724945,
      "learning_rate": 3.135151519144983e-06,
      "loss": 0.0108,
      "step": 2103660
    },
    {
      "epoch": 3.4427184593127915,
      "grad_norm": 0.19870242476463318,
      "learning_rate": 3.1350856269314656e-06,
      "loss": 0.0113,
      "step": 2103680
    },
    {
      "epoch": 3.442751189751445,
      "grad_norm": 0.1070137470960617,
      "learning_rate": 3.1350197347179484e-06,
      "loss": 0.0087,
      "step": 2103700
    },
    {
      "epoch": 3.4427839201900983,
      "grad_norm": 0.15094774961471558,
      "learning_rate": 3.134953842504431e-06,
      "loss": 0.0088,
      "step": 2103720
    },
    {
      "epoch": 3.442816650628752,
      "grad_norm": 0.1860482543706894,
      "learning_rate": 3.1348879502909147e-06,
      "loss": 0.0145,
      "step": 2103740
    },
    {
      "epoch": 3.442849381067405,
      "grad_norm": 0.1630610078573227,
      "learning_rate": 3.1348220580773974e-06,
      "loss": 0.0131,
      "step": 2103760
    },
    {
      "epoch": 3.4428821115060586,
      "grad_norm": 0.18979720771312714,
      "learning_rate": 3.13475616586388e-06,
      "loss": 0.0098,
      "step": 2103780
    },
    {
      "epoch": 3.4429148419447118,
      "grad_norm": 0.901526153087616,
      "learning_rate": 3.1346902736503633e-06,
      "loss": 0.0155,
      "step": 2103800
    },
    {
      "epoch": 3.442947572383365,
      "grad_norm": 0.15392670035362244,
      "learning_rate": 3.134624381436846e-06,
      "loss": 0.015,
      "step": 2103820
    },
    {
      "epoch": 3.4429803028220185,
      "grad_norm": 0.11667946726083755,
      "learning_rate": 3.134558489223329e-06,
      "loss": 0.0107,
      "step": 2103840
    },
    {
      "epoch": 3.4430130332606717,
      "grad_norm": 0.09865854680538177,
      "learning_rate": 3.1344925970098116e-06,
      "loss": 0.0103,
      "step": 2103860
    },
    {
      "epoch": 3.4430457636993252,
      "grad_norm": 0.3484744131565094,
      "learning_rate": 3.1344267047962947e-06,
      "loss": 0.0122,
      "step": 2103880
    },
    {
      "epoch": 3.4430784941379784,
      "grad_norm": 0.15042519569396973,
      "learning_rate": 3.1343608125827775e-06,
      "loss": 0.0102,
      "step": 2103900
    },
    {
      "epoch": 3.443111224576632,
      "grad_norm": 0.4489019811153412,
      "learning_rate": 3.13429492036926e-06,
      "loss": 0.0137,
      "step": 2103920
    },
    {
      "epoch": 3.443143955015285,
      "grad_norm": 0.2258484959602356,
      "learning_rate": 3.134229028155743e-06,
      "loss": 0.0137,
      "step": 2103940
    },
    {
      "epoch": 3.4431766854539383,
      "grad_norm": 0.20403318107128143,
      "learning_rate": 3.1341631359422257e-06,
      "loss": 0.0116,
      "step": 2103960
    },
    {
      "epoch": 3.443209415892592,
      "grad_norm": 0.13637270033359528,
      "learning_rate": 3.134097243728709e-06,
      "loss": 0.0158,
      "step": 2103980
    },
    {
      "epoch": 3.443242146331245,
      "grad_norm": 0.33999505639076233,
      "learning_rate": 3.1340313515151916e-06,
      "loss": 0.012,
      "step": 2104000
    },
    {
      "epoch": 3.4432748767698986,
      "grad_norm": 0.09264827519655228,
      "learning_rate": 3.1339654593016743e-06,
      "loss": 0.0121,
      "step": 2104020
    },
    {
      "epoch": 3.443307607208552,
      "grad_norm": 0.24669818580150604,
      "learning_rate": 3.133899567088157e-06,
      "loss": 0.0129,
      "step": 2104040
    },
    {
      "epoch": 3.4433403376472054,
      "grad_norm": 0.38832125067710876,
      "learning_rate": 3.1338336748746402e-06,
      "loss": 0.0073,
      "step": 2104060
    },
    {
      "epoch": 3.4433730680858585,
      "grad_norm": 0.15403933823108673,
      "learning_rate": 3.133767782661123e-06,
      "loss": 0.0158,
      "step": 2104080
    },
    {
      "epoch": 3.4434057985245117,
      "grad_norm": 0.23103977739810944,
      "learning_rate": 3.133701890447606e-06,
      "loss": 0.0109,
      "step": 2104100
    },
    {
      "epoch": 3.4434385289631653,
      "grad_norm": 0.3517979085445404,
      "learning_rate": 3.1336359982340893e-06,
      "loss": 0.0158,
      "step": 2104120
    },
    {
      "epoch": 3.4434712594018184,
      "grad_norm": 0.15391291677951813,
      "learning_rate": 3.133570106020572e-06,
      "loss": 0.0095,
      "step": 2104140
    },
    {
      "epoch": 3.443503989840472,
      "grad_norm": 0.17569701373577118,
      "learning_rate": 3.1335042138070548e-06,
      "loss": 0.0112,
      "step": 2104160
    },
    {
      "epoch": 3.443536720279125,
      "grad_norm": 0.8362315893173218,
      "learning_rate": 3.1334383215935375e-06,
      "loss": 0.0109,
      "step": 2104180
    },
    {
      "epoch": 3.4435694507177788,
      "grad_norm": 0.10433764010667801,
      "learning_rate": 3.1333724293800207e-06,
      "loss": 0.0089,
      "step": 2104200
    },
    {
      "epoch": 3.443602181156432,
      "grad_norm": 0.5788809061050415,
      "learning_rate": 3.1333065371665034e-06,
      "loss": 0.0119,
      "step": 2104220
    },
    {
      "epoch": 3.443634911595085,
      "grad_norm": 0.08189211040735245,
      "learning_rate": 3.133240644952986e-06,
      "loss": 0.0112,
      "step": 2104240
    },
    {
      "epoch": 3.4436676420337387,
      "grad_norm": 0.4221120774745941,
      "learning_rate": 3.133174752739469e-06,
      "loss": 0.0074,
      "step": 2104260
    },
    {
      "epoch": 3.443700372472392,
      "grad_norm": 0.23445084691047668,
      "learning_rate": 3.133108860525952e-06,
      "loss": 0.0092,
      "step": 2104280
    },
    {
      "epoch": 3.4437331029110454,
      "grad_norm": 0.2695387601852417,
      "learning_rate": 3.133042968312435e-06,
      "loss": 0.0095,
      "step": 2104300
    },
    {
      "epoch": 3.4437658333496985,
      "grad_norm": 0.37858232855796814,
      "learning_rate": 3.1329770760989175e-06,
      "loss": 0.0081,
      "step": 2104320
    },
    {
      "epoch": 3.443798563788352,
      "grad_norm": 0.2567425072193146,
      "learning_rate": 3.1329111838854003e-06,
      "loss": 0.013,
      "step": 2104340
    },
    {
      "epoch": 3.4438312942270053,
      "grad_norm": 0.26029711961746216,
      "learning_rate": 3.1328452916718834e-06,
      "loss": 0.0114,
      "step": 2104360
    },
    {
      "epoch": 3.4438640246656584,
      "grad_norm": 0.7991263270378113,
      "learning_rate": 3.132779399458366e-06,
      "loss": 0.0147,
      "step": 2104380
    },
    {
      "epoch": 3.443896755104312,
      "grad_norm": 0.21320606768131256,
      "learning_rate": 3.132713507244849e-06,
      "loss": 0.0132,
      "step": 2104400
    },
    {
      "epoch": 3.443929485542965,
      "grad_norm": 0.6528409719467163,
      "learning_rate": 3.1326476150313317e-06,
      "loss": 0.013,
      "step": 2104420
    },
    {
      "epoch": 3.443962215981619,
      "grad_norm": 0.1368046998977661,
      "learning_rate": 3.1325817228178152e-06,
      "loss": 0.0074,
      "step": 2104440
    },
    {
      "epoch": 3.443994946420272,
      "grad_norm": 0.18868863582611084,
      "learning_rate": 3.132515830604298e-06,
      "loss": 0.0117,
      "step": 2104460
    },
    {
      "epoch": 3.444027676858925,
      "grad_norm": 0.18351863324642181,
      "learning_rate": 3.1324499383907807e-06,
      "loss": 0.01,
      "step": 2104480
    },
    {
      "epoch": 3.4440604072975787,
      "grad_norm": 0.5355215668678284,
      "learning_rate": 3.1323840461772635e-06,
      "loss": 0.0116,
      "step": 2104500
    },
    {
      "epoch": 3.444093137736232,
      "grad_norm": 0.4380774199962616,
      "learning_rate": 3.1323181539637466e-06,
      "loss": 0.0069,
      "step": 2104520
    },
    {
      "epoch": 3.4441258681748854,
      "grad_norm": 0.1388685703277588,
      "learning_rate": 3.1322522617502294e-06,
      "loss": 0.0129,
      "step": 2104540
    },
    {
      "epoch": 3.4441585986135386,
      "grad_norm": 0.4019393026828766,
      "learning_rate": 3.132186369536712e-06,
      "loss": 0.0107,
      "step": 2104560
    },
    {
      "epoch": 3.4441913290521917,
      "grad_norm": 0.15040847659111023,
      "learning_rate": 3.132120477323195e-06,
      "loss": 0.0076,
      "step": 2104580
    },
    {
      "epoch": 3.4442240594908453,
      "grad_norm": 0.10004683583974838,
      "learning_rate": 3.132054585109678e-06,
      "loss": 0.0169,
      "step": 2104600
    },
    {
      "epoch": 3.4442567899294985,
      "grad_norm": 0.30784812569618225,
      "learning_rate": 3.1319886928961607e-06,
      "loss": 0.0148,
      "step": 2104620
    },
    {
      "epoch": 3.444289520368152,
      "grad_norm": 0.19124601781368256,
      "learning_rate": 3.1319228006826435e-06,
      "loss": 0.0111,
      "step": 2104640
    },
    {
      "epoch": 3.444322250806805,
      "grad_norm": 0.2542875111103058,
      "learning_rate": 3.1318569084691262e-06,
      "loss": 0.0107,
      "step": 2104660
    },
    {
      "epoch": 3.444354981245459,
      "grad_norm": 0.26704996824264526,
      "learning_rate": 3.1317910162556094e-06,
      "loss": 0.0131,
      "step": 2104680
    },
    {
      "epoch": 3.444387711684112,
      "grad_norm": 0.07757846266031265,
      "learning_rate": 3.131725124042092e-06,
      "loss": 0.0151,
      "step": 2104700
    },
    {
      "epoch": 3.444420442122765,
      "grad_norm": 0.11723802238702774,
      "learning_rate": 3.131659231828575e-06,
      "loss": 0.0114,
      "step": 2104720
    },
    {
      "epoch": 3.4444531725614187,
      "grad_norm": 0.18617868423461914,
      "learning_rate": 3.1315933396150576e-06,
      "loss": 0.0119,
      "step": 2104740
    },
    {
      "epoch": 3.444485903000072,
      "grad_norm": 0.1068492904305458,
      "learning_rate": 3.1315274474015408e-06,
      "loss": 0.0086,
      "step": 2104760
    },
    {
      "epoch": 3.4445186334387254,
      "grad_norm": 0.23150627315044403,
      "learning_rate": 3.1314615551880235e-06,
      "loss": 0.0166,
      "step": 2104780
    },
    {
      "epoch": 3.4445513638773786,
      "grad_norm": 0.39326703548431396,
      "learning_rate": 3.1313956629745067e-06,
      "loss": 0.0119,
      "step": 2104800
    },
    {
      "epoch": 3.444584094316032,
      "grad_norm": 0.16092707216739655,
      "learning_rate": 3.13132977076099e-06,
      "loss": 0.0088,
      "step": 2104820
    },
    {
      "epoch": 3.4446168247546853,
      "grad_norm": 0.5018993020057678,
      "learning_rate": 3.1312638785474726e-06,
      "loss": 0.0135,
      "step": 2104840
    },
    {
      "epoch": 3.4446495551933385,
      "grad_norm": 0.25396737456321716,
      "learning_rate": 3.1311979863339553e-06,
      "loss": 0.0096,
      "step": 2104860
    },
    {
      "epoch": 3.444682285631992,
      "grad_norm": 0.28139954805374146,
      "learning_rate": 3.131132094120438e-06,
      "loss": 0.0125,
      "step": 2104880
    },
    {
      "epoch": 3.4447150160706452,
      "grad_norm": 0.4762193262577057,
      "learning_rate": 3.131066201906921e-06,
      "loss": 0.0064,
      "step": 2104900
    },
    {
      "epoch": 3.444747746509299,
      "grad_norm": 0.11023752391338348,
      "learning_rate": 3.131000309693404e-06,
      "loss": 0.0123,
      "step": 2104920
    },
    {
      "epoch": 3.444780476947952,
      "grad_norm": 0.38682544231414795,
      "learning_rate": 3.1309344174798867e-06,
      "loss": 0.0096,
      "step": 2104940
    },
    {
      "epoch": 3.4448132073866056,
      "grad_norm": 0.11012288928031921,
      "learning_rate": 3.1308685252663694e-06,
      "loss": 0.0096,
      "step": 2104960
    },
    {
      "epoch": 3.4448459378252587,
      "grad_norm": 0.40789321064949036,
      "learning_rate": 3.130802633052852e-06,
      "loss": 0.0134,
      "step": 2104980
    },
    {
      "epoch": 3.444878668263912,
      "grad_norm": 0.06036575883626938,
      "learning_rate": 3.1307367408393353e-06,
      "loss": 0.0134,
      "step": 2105000
    },
    {
      "epoch": 3.4449113987025655,
      "grad_norm": 0.48449864983558655,
      "learning_rate": 3.130670848625818e-06,
      "loss": 0.0094,
      "step": 2105020
    },
    {
      "epoch": 3.4449441291412186,
      "grad_norm": 0.3728419542312622,
      "learning_rate": 3.130604956412301e-06,
      "loss": 0.0123,
      "step": 2105040
    },
    {
      "epoch": 3.444976859579872,
      "grad_norm": 0.44610485434532166,
      "learning_rate": 3.1305390641987836e-06,
      "loss": 0.0111,
      "step": 2105060
    },
    {
      "epoch": 3.4450095900185254,
      "grad_norm": 0.09204313904047012,
      "learning_rate": 3.1304731719852667e-06,
      "loss": 0.0128,
      "step": 2105080
    },
    {
      "epoch": 3.445042320457179,
      "grad_norm": 0.14801008999347687,
      "learning_rate": 3.1304072797717495e-06,
      "loss": 0.0124,
      "step": 2105100
    },
    {
      "epoch": 3.445075050895832,
      "grad_norm": 0.2816994786262512,
      "learning_rate": 3.130341387558232e-06,
      "loss": 0.009,
      "step": 2105120
    },
    {
      "epoch": 3.4451077813344853,
      "grad_norm": 0.19437582790851593,
      "learning_rate": 3.130275495344716e-06,
      "loss": 0.0165,
      "step": 2105140
    },
    {
      "epoch": 3.445140511773139,
      "grad_norm": 0.24844831228256226,
      "learning_rate": 3.1302096031311985e-06,
      "loss": 0.0122,
      "step": 2105160
    },
    {
      "epoch": 3.445173242211792,
      "grad_norm": 0.07636056840419769,
      "learning_rate": 3.1301437109176813e-06,
      "loss": 0.0095,
      "step": 2105180
    },
    {
      "epoch": 3.4452059726504456,
      "grad_norm": 0.5622062087059021,
      "learning_rate": 3.130077818704164e-06,
      "loss": 0.0134,
      "step": 2105200
    },
    {
      "epoch": 3.4452387030890987,
      "grad_norm": 0.24989955127239227,
      "learning_rate": 3.130011926490647e-06,
      "loss": 0.0128,
      "step": 2105220
    },
    {
      "epoch": 3.4452714335277523,
      "grad_norm": 0.27876943349838257,
      "learning_rate": 3.12994603427713e-06,
      "loss": 0.0091,
      "step": 2105240
    },
    {
      "epoch": 3.4453041639664055,
      "grad_norm": 0.6326500773429871,
      "learning_rate": 3.1298801420636127e-06,
      "loss": 0.0143,
      "step": 2105260
    },
    {
      "epoch": 3.4453368944050586,
      "grad_norm": 0.2987086772918701,
      "learning_rate": 3.1298142498500954e-06,
      "loss": 0.0094,
      "step": 2105280
    },
    {
      "epoch": 3.4453696248437122,
      "grad_norm": 0.5152478218078613,
      "learning_rate": 3.1297483576365786e-06,
      "loss": 0.0097,
      "step": 2105300
    },
    {
      "epoch": 3.4454023552823654,
      "grad_norm": 0.08293397724628448,
      "learning_rate": 3.1296824654230613e-06,
      "loss": 0.013,
      "step": 2105320
    },
    {
      "epoch": 3.445435085721019,
      "grad_norm": 0.34437480568885803,
      "learning_rate": 3.129616573209544e-06,
      "loss": 0.007,
      "step": 2105340
    },
    {
      "epoch": 3.445467816159672,
      "grad_norm": 0.4355623126029968,
      "learning_rate": 3.1295506809960268e-06,
      "loss": 0.0144,
      "step": 2105360
    },
    {
      "epoch": 3.4455005465983257,
      "grad_norm": 0.11010865867137909,
      "learning_rate": 3.1294847887825095e-06,
      "loss": 0.0067,
      "step": 2105380
    },
    {
      "epoch": 3.445533277036979,
      "grad_norm": 0.23506973683834076,
      "learning_rate": 3.1294188965689927e-06,
      "loss": 0.013,
      "step": 2105400
    },
    {
      "epoch": 3.445566007475632,
      "grad_norm": 0.2801937162876129,
      "learning_rate": 3.1293530043554754e-06,
      "loss": 0.0117,
      "step": 2105420
    },
    {
      "epoch": 3.4455987379142856,
      "grad_norm": 0.301525741815567,
      "learning_rate": 3.129287112141958e-06,
      "loss": 0.0141,
      "step": 2105440
    },
    {
      "epoch": 3.4456314683529388,
      "grad_norm": 0.05172405764460564,
      "learning_rate": 3.129221219928441e-06,
      "loss": 0.011,
      "step": 2105460
    },
    {
      "epoch": 3.4456641987915924,
      "grad_norm": 0.24184711277484894,
      "learning_rate": 3.129155327714924e-06,
      "loss": 0.0132,
      "step": 2105480
    },
    {
      "epoch": 3.4456969292302455,
      "grad_norm": 0.40403375029563904,
      "learning_rate": 3.1290894355014072e-06,
      "loss": 0.0127,
      "step": 2105500
    },
    {
      "epoch": 3.445729659668899,
      "grad_norm": 0.20321024954319,
      "learning_rate": 3.12902354328789e-06,
      "loss": 0.0166,
      "step": 2105520
    },
    {
      "epoch": 3.4457623901075523,
      "grad_norm": 0.2902675271034241,
      "learning_rate": 3.128957651074373e-06,
      "loss": 0.0087,
      "step": 2105540
    },
    {
      "epoch": 3.4457951205462054,
      "grad_norm": 0.31596699357032776,
      "learning_rate": 3.128891758860856e-06,
      "loss": 0.0088,
      "step": 2105560
    },
    {
      "epoch": 3.445827850984859,
      "grad_norm": 0.1153821051120758,
      "learning_rate": 3.1288258666473386e-06,
      "loss": 0.0121,
      "step": 2105580
    },
    {
      "epoch": 3.445860581423512,
      "grad_norm": 0.43691617250442505,
      "learning_rate": 3.1287599744338213e-06,
      "loss": 0.0163,
      "step": 2105600
    },
    {
      "epoch": 3.4458933118621657,
      "grad_norm": 0.27290287613868713,
      "learning_rate": 3.1286940822203045e-06,
      "loss": 0.0128,
      "step": 2105620
    },
    {
      "epoch": 3.445926042300819,
      "grad_norm": 0.38769516348838806,
      "learning_rate": 3.1286281900067873e-06,
      "loss": 0.0119,
      "step": 2105640
    },
    {
      "epoch": 3.4459587727394725,
      "grad_norm": 0.11409386247396469,
      "learning_rate": 3.12856229779327e-06,
      "loss": 0.0097,
      "step": 2105660
    },
    {
      "epoch": 3.4459915031781256,
      "grad_norm": 0.06681161373853683,
      "learning_rate": 3.1284964055797527e-06,
      "loss": 0.0074,
      "step": 2105680
    },
    {
      "epoch": 3.446024233616779,
      "grad_norm": 0.5194888710975647,
      "learning_rate": 3.128430513366236e-06,
      "loss": 0.0141,
      "step": 2105700
    },
    {
      "epoch": 3.4460569640554324,
      "grad_norm": 0.6495397090911865,
      "learning_rate": 3.1283646211527186e-06,
      "loss": 0.0102,
      "step": 2105720
    },
    {
      "epoch": 3.4460896944940855,
      "grad_norm": 0.1845068484544754,
      "learning_rate": 3.1282987289392014e-06,
      "loss": 0.0095,
      "step": 2105740
    },
    {
      "epoch": 3.446122424932739,
      "grad_norm": 0.23604877293109894,
      "learning_rate": 3.128232836725684e-06,
      "loss": 0.0144,
      "step": 2105760
    },
    {
      "epoch": 3.4461551553713923,
      "grad_norm": 0.6457679867744446,
      "learning_rate": 3.1281669445121673e-06,
      "loss": 0.0113,
      "step": 2105780
    },
    {
      "epoch": 3.446187885810046,
      "grad_norm": 0.2072944939136505,
      "learning_rate": 3.12810105229865e-06,
      "loss": 0.0117,
      "step": 2105800
    },
    {
      "epoch": 3.446220616248699,
      "grad_norm": 0.4690808057785034,
      "learning_rate": 3.1280351600851328e-06,
      "loss": 0.0124,
      "step": 2105820
    },
    {
      "epoch": 3.446253346687352,
      "grad_norm": 0.3942810297012329,
      "learning_rate": 3.1279692678716163e-06,
      "loss": 0.0108,
      "step": 2105840
    },
    {
      "epoch": 3.4462860771260058,
      "grad_norm": 0.106275275349617,
      "learning_rate": 3.127903375658099e-06,
      "loss": 0.0101,
      "step": 2105860
    },
    {
      "epoch": 3.446318807564659,
      "grad_norm": 0.7511900067329407,
      "learning_rate": 3.127837483444582e-06,
      "loss": 0.0085,
      "step": 2105880
    },
    {
      "epoch": 3.4463515380033125,
      "grad_norm": 0.18016499280929565,
      "learning_rate": 3.1277715912310646e-06,
      "loss": 0.0058,
      "step": 2105900
    },
    {
      "epoch": 3.4463842684419657,
      "grad_norm": 0.28039732575416565,
      "learning_rate": 3.1277056990175473e-06,
      "loss": 0.0167,
      "step": 2105920
    },
    {
      "epoch": 3.446416998880619,
      "grad_norm": 0.3909170627593994,
      "learning_rate": 3.1276398068040305e-06,
      "loss": 0.0143,
      "step": 2105940
    },
    {
      "epoch": 3.4464497293192724,
      "grad_norm": 0.33629629015922546,
      "learning_rate": 3.127573914590513e-06,
      "loss": 0.013,
      "step": 2105960
    },
    {
      "epoch": 3.4464824597579256,
      "grad_norm": 0.4352053701877594,
      "learning_rate": 3.127508022376996e-06,
      "loss": 0.0126,
      "step": 2105980
    },
    {
      "epoch": 3.446515190196579,
      "grad_norm": 0.6864686012268066,
      "learning_rate": 3.1274421301634787e-06,
      "loss": 0.0084,
      "step": 2106000
    },
    {
      "epoch": 3.4465479206352323,
      "grad_norm": 0.512193500995636,
      "learning_rate": 3.127376237949962e-06,
      "loss": 0.0091,
      "step": 2106020
    },
    {
      "epoch": 3.4465806510738854,
      "grad_norm": 0.16515620052814484,
      "learning_rate": 3.1273103457364446e-06,
      "loss": 0.0121,
      "step": 2106040
    },
    {
      "epoch": 3.446613381512539,
      "grad_norm": 0.4167921245098114,
      "learning_rate": 3.1272444535229273e-06,
      "loss": 0.0094,
      "step": 2106060
    },
    {
      "epoch": 3.446646111951192,
      "grad_norm": 0.33584296703338623,
      "learning_rate": 3.12717856130941e-06,
      "loss": 0.0103,
      "step": 2106080
    },
    {
      "epoch": 3.446678842389846,
      "grad_norm": 0.35357484221458435,
      "learning_rate": 3.1271126690958932e-06,
      "loss": 0.012,
      "step": 2106100
    },
    {
      "epoch": 3.446711572828499,
      "grad_norm": 0.0898294448852539,
      "learning_rate": 3.127046776882376e-06,
      "loss": 0.0141,
      "step": 2106120
    },
    {
      "epoch": 3.4467443032671525,
      "grad_norm": 0.2724582552909851,
      "learning_rate": 3.1269808846688587e-06,
      "loss": 0.0102,
      "step": 2106140
    },
    {
      "epoch": 3.4467770337058057,
      "grad_norm": 0.4449542760848999,
      "learning_rate": 3.1269149924553414e-06,
      "loss": 0.012,
      "step": 2106160
    },
    {
      "epoch": 3.446809764144459,
      "grad_norm": 0.7174807786941528,
      "learning_rate": 3.1268491002418246e-06,
      "loss": 0.0146,
      "step": 2106180
    },
    {
      "epoch": 3.4468424945831124,
      "grad_norm": 0.4603785276412964,
      "learning_rate": 3.1267832080283078e-06,
      "loss": 0.0113,
      "step": 2106200
    },
    {
      "epoch": 3.4468752250217656,
      "grad_norm": 1.2402410507202148,
      "learning_rate": 3.1267173158147905e-06,
      "loss": 0.0099,
      "step": 2106220
    },
    {
      "epoch": 3.446907955460419,
      "grad_norm": 0.2777949571609497,
      "learning_rate": 3.1266514236012737e-06,
      "loss": 0.0121,
      "step": 2106240
    },
    {
      "epoch": 3.4469406858990723,
      "grad_norm": 0.31900203227996826,
      "learning_rate": 3.1265855313877564e-06,
      "loss": 0.0097,
      "step": 2106260
    },
    {
      "epoch": 3.446973416337726,
      "grad_norm": 0.17027629911899567,
      "learning_rate": 3.126519639174239e-06,
      "loss": 0.0139,
      "step": 2106280
    },
    {
      "epoch": 3.447006146776379,
      "grad_norm": 0.3110417127609253,
      "learning_rate": 3.126453746960722e-06,
      "loss": 0.0109,
      "step": 2106300
    },
    {
      "epoch": 3.447038877215032,
      "grad_norm": 0.2854164242744446,
      "learning_rate": 3.126387854747205e-06,
      "loss": 0.013,
      "step": 2106320
    },
    {
      "epoch": 3.447071607653686,
      "grad_norm": 0.3362554907798767,
      "learning_rate": 3.126321962533688e-06,
      "loss": 0.0125,
      "step": 2106340
    },
    {
      "epoch": 3.447104338092339,
      "grad_norm": 0.06589754670858383,
      "learning_rate": 3.1262560703201705e-06,
      "loss": 0.0084,
      "step": 2106360
    },
    {
      "epoch": 3.4471370685309926,
      "grad_norm": 0.2132682353258133,
      "learning_rate": 3.1261901781066533e-06,
      "loss": 0.0137,
      "step": 2106380
    },
    {
      "epoch": 3.4471697989696457,
      "grad_norm": 0.38040101528167725,
      "learning_rate": 3.126124285893136e-06,
      "loss": 0.0129,
      "step": 2106400
    },
    {
      "epoch": 3.4472025294082993,
      "grad_norm": 0.13467460870742798,
      "learning_rate": 3.126058393679619e-06,
      "loss": 0.0135,
      "step": 2106420
    },
    {
      "epoch": 3.4472352598469524,
      "grad_norm": 0.1941842883825302,
      "learning_rate": 3.125992501466102e-06,
      "loss": 0.0113,
      "step": 2106440
    },
    {
      "epoch": 3.4472679902856056,
      "grad_norm": 0.5560234785079956,
      "learning_rate": 3.1259266092525847e-06,
      "loss": 0.0169,
      "step": 2106460
    },
    {
      "epoch": 3.447300720724259,
      "grad_norm": 0.16211971640586853,
      "learning_rate": 3.1258607170390674e-06,
      "loss": 0.0058,
      "step": 2106480
    },
    {
      "epoch": 3.4473334511629123,
      "grad_norm": 0.2444489449262619,
      "learning_rate": 3.1257948248255506e-06,
      "loss": 0.0074,
      "step": 2106500
    },
    {
      "epoch": 3.447366181601566,
      "grad_norm": 0.4067190885543823,
      "learning_rate": 3.1257289326120333e-06,
      "loss": 0.0183,
      "step": 2106520
    },
    {
      "epoch": 3.447398912040219,
      "grad_norm": 0.0772990733385086,
      "learning_rate": 3.125663040398516e-06,
      "loss": 0.0151,
      "step": 2106540
    },
    {
      "epoch": 3.4474316424788727,
      "grad_norm": 0.18711364269256592,
      "learning_rate": 3.1255971481849996e-06,
      "loss": 0.0115,
      "step": 2106560
    },
    {
      "epoch": 3.447464372917526,
      "grad_norm": 0.3069305121898651,
      "learning_rate": 3.1255312559714824e-06,
      "loss": 0.0106,
      "step": 2106580
    },
    {
      "epoch": 3.447497103356179,
      "grad_norm": 0.13069148361682892,
      "learning_rate": 3.125465363757965e-06,
      "loss": 0.0151,
      "step": 2106600
    },
    {
      "epoch": 3.4475298337948326,
      "grad_norm": 0.4360051453113556,
      "learning_rate": 3.125399471544448e-06,
      "loss": 0.0108,
      "step": 2106620
    },
    {
      "epoch": 3.4475625642334857,
      "grad_norm": 0.059102121740579605,
      "learning_rate": 3.125333579330931e-06,
      "loss": 0.0062,
      "step": 2106640
    },
    {
      "epoch": 3.4475952946721393,
      "grad_norm": 0.2880534827709198,
      "learning_rate": 3.1252676871174138e-06,
      "loss": 0.0086,
      "step": 2106660
    },
    {
      "epoch": 3.4476280251107925,
      "grad_norm": 0.22399656474590302,
      "learning_rate": 3.1252017949038965e-06,
      "loss": 0.0099,
      "step": 2106680
    },
    {
      "epoch": 3.447660755549446,
      "grad_norm": 0.33765605092048645,
      "learning_rate": 3.1251359026903792e-06,
      "loss": 0.0167,
      "step": 2106700
    },
    {
      "epoch": 3.447693485988099,
      "grad_norm": 0.08342424035072327,
      "learning_rate": 3.1250700104768624e-06,
      "loss": 0.0115,
      "step": 2106720
    },
    {
      "epoch": 3.4477262164267524,
      "grad_norm": 0.16274318099021912,
      "learning_rate": 3.125004118263345e-06,
      "loss": 0.0116,
      "step": 2106740
    },
    {
      "epoch": 3.447758946865406,
      "grad_norm": 0.6526336073875427,
      "learning_rate": 3.124938226049828e-06,
      "loss": 0.0118,
      "step": 2106760
    },
    {
      "epoch": 3.447791677304059,
      "grad_norm": 0.10161227732896805,
      "learning_rate": 3.1248723338363106e-06,
      "loss": 0.0126,
      "step": 2106780
    },
    {
      "epoch": 3.4478244077427127,
      "grad_norm": 0.24111822247505188,
      "learning_rate": 3.1248064416227934e-06,
      "loss": 0.0088,
      "step": 2106800
    },
    {
      "epoch": 3.447857138181366,
      "grad_norm": 0.2513309419155121,
      "learning_rate": 3.1247405494092765e-06,
      "loss": 0.0105,
      "step": 2106820
    },
    {
      "epoch": 3.4478898686200194,
      "grad_norm": 0.2826248109340668,
      "learning_rate": 3.1246746571957593e-06,
      "loss": 0.0072,
      "step": 2106840
    },
    {
      "epoch": 3.4479225990586726,
      "grad_norm": 0.14250069856643677,
      "learning_rate": 3.124608764982242e-06,
      "loss": 0.0097,
      "step": 2106860
    },
    {
      "epoch": 3.4479553294973257,
      "grad_norm": 0.29730290174484253,
      "learning_rate": 3.1245428727687247e-06,
      "loss": 0.0079,
      "step": 2106880
    },
    {
      "epoch": 3.4479880599359793,
      "grad_norm": 0.6646988987922668,
      "learning_rate": 3.1244769805552083e-06,
      "loss": 0.0113,
      "step": 2106900
    },
    {
      "epoch": 3.4480207903746325,
      "grad_norm": 0.7267977595329285,
      "learning_rate": 3.124411088341691e-06,
      "loss": 0.0131,
      "step": 2106920
    },
    {
      "epoch": 3.448053520813286,
      "grad_norm": 0.2528839707374573,
      "learning_rate": 3.124345196128174e-06,
      "loss": 0.0105,
      "step": 2106940
    },
    {
      "epoch": 3.4480862512519392,
      "grad_norm": 0.32537052035331726,
      "learning_rate": 3.124279303914657e-06,
      "loss": 0.0092,
      "step": 2106960
    },
    {
      "epoch": 3.448118981690593,
      "grad_norm": 0.22657890617847443,
      "learning_rate": 3.1242134117011397e-06,
      "loss": 0.0139,
      "step": 2106980
    },
    {
      "epoch": 3.448151712129246,
      "grad_norm": 0.4791296124458313,
      "learning_rate": 3.1241475194876224e-06,
      "loss": 0.0133,
      "step": 2107000
    },
    {
      "epoch": 3.448184442567899,
      "grad_norm": 0.2475079894065857,
      "learning_rate": 3.124081627274105e-06,
      "loss": 0.0093,
      "step": 2107020
    },
    {
      "epoch": 3.4482171730065527,
      "grad_norm": 0.1486055999994278,
      "learning_rate": 3.1240157350605884e-06,
      "loss": 0.0131,
      "step": 2107040
    },
    {
      "epoch": 3.448249903445206,
      "grad_norm": 0.22370663285255432,
      "learning_rate": 3.123949842847071e-06,
      "loss": 0.0093,
      "step": 2107060
    },
    {
      "epoch": 3.4482826338838595,
      "grad_norm": 0.2808769643306732,
      "learning_rate": 3.123883950633554e-06,
      "loss": 0.0131,
      "step": 2107080
    },
    {
      "epoch": 3.4483153643225126,
      "grad_norm": 0.04934054613113403,
      "learning_rate": 3.1238180584200366e-06,
      "loss": 0.0113,
      "step": 2107100
    },
    {
      "epoch": 3.448348094761166,
      "grad_norm": 0.2778324484825134,
      "learning_rate": 3.1237521662065197e-06,
      "loss": 0.0064,
      "step": 2107120
    },
    {
      "epoch": 3.4483808251998194,
      "grad_norm": 0.9746233820915222,
      "learning_rate": 3.1236862739930025e-06,
      "loss": 0.0124,
      "step": 2107140
    },
    {
      "epoch": 3.4484135556384725,
      "grad_norm": 0.6727033257484436,
      "learning_rate": 3.1236203817794852e-06,
      "loss": 0.0118,
      "step": 2107160
    },
    {
      "epoch": 3.448446286077126,
      "grad_norm": 0.3061733841896057,
      "learning_rate": 3.123554489565968e-06,
      "loss": 0.0113,
      "step": 2107180
    },
    {
      "epoch": 3.4484790165157793,
      "grad_norm": 0.1295289844274521,
      "learning_rate": 3.123488597352451e-06,
      "loss": 0.0086,
      "step": 2107200
    },
    {
      "epoch": 3.448511746954433,
      "grad_norm": 0.060571182519197464,
      "learning_rate": 3.123422705138934e-06,
      "loss": 0.0071,
      "step": 2107220
    },
    {
      "epoch": 3.448544477393086,
      "grad_norm": 0.3280470669269562,
      "learning_rate": 3.1233568129254166e-06,
      "loss": 0.0125,
      "step": 2107240
    },
    {
      "epoch": 3.4485772078317396,
      "grad_norm": 0.4527345895767212,
      "learning_rate": 3.1232909207119e-06,
      "loss": 0.01,
      "step": 2107260
    },
    {
      "epoch": 3.4486099382703927,
      "grad_norm": 0.026666486635804176,
      "learning_rate": 3.123225028498383e-06,
      "loss": 0.0125,
      "step": 2107280
    },
    {
      "epoch": 3.448642668709046,
      "grad_norm": 0.28137123584747314,
      "learning_rate": 3.1231591362848657e-06,
      "loss": 0.0093,
      "step": 2107300
    },
    {
      "epoch": 3.4486753991476995,
      "grad_norm": 0.3703129291534424,
      "learning_rate": 3.1230932440713484e-06,
      "loss": 0.0109,
      "step": 2107320
    },
    {
      "epoch": 3.4487081295863526,
      "grad_norm": 0.5320568084716797,
      "learning_rate": 3.123027351857831e-06,
      "loss": 0.0136,
      "step": 2107340
    },
    {
      "epoch": 3.4487408600250062,
      "grad_norm": 0.22437717020511627,
      "learning_rate": 3.1229614596443143e-06,
      "loss": 0.0093,
      "step": 2107360
    },
    {
      "epoch": 3.4487735904636594,
      "grad_norm": 0.09238067269325256,
      "learning_rate": 3.122895567430797e-06,
      "loss": 0.01,
      "step": 2107380
    },
    {
      "epoch": 3.4488063209023125,
      "grad_norm": 0.26781409978866577,
      "learning_rate": 3.1228296752172798e-06,
      "loss": 0.0086,
      "step": 2107400
    },
    {
      "epoch": 3.448839051340966,
      "grad_norm": 0.3861963152885437,
      "learning_rate": 3.1227637830037625e-06,
      "loss": 0.0174,
      "step": 2107420
    },
    {
      "epoch": 3.4488717817796193,
      "grad_norm": 0.7348727583885193,
      "learning_rate": 3.1226978907902457e-06,
      "loss": 0.0108,
      "step": 2107440
    },
    {
      "epoch": 3.448904512218273,
      "grad_norm": 0.3198002874851227,
      "learning_rate": 3.1226319985767284e-06,
      "loss": 0.009,
      "step": 2107460
    },
    {
      "epoch": 3.448937242656926,
      "grad_norm": 0.980675220489502,
      "learning_rate": 3.122566106363211e-06,
      "loss": 0.0167,
      "step": 2107480
    },
    {
      "epoch": 3.448969973095579,
      "grad_norm": 0.4129517674446106,
      "learning_rate": 3.122500214149694e-06,
      "loss": 0.0092,
      "step": 2107500
    },
    {
      "epoch": 3.4490027035342328,
      "grad_norm": 0.22647017240524292,
      "learning_rate": 3.122434321936177e-06,
      "loss": 0.0096,
      "step": 2107520
    },
    {
      "epoch": 3.449035433972886,
      "grad_norm": 0.22014664113521576,
      "learning_rate": 3.12236842972266e-06,
      "loss": 0.0146,
      "step": 2107540
    },
    {
      "epoch": 3.4490681644115395,
      "grad_norm": 0.10795800387859344,
      "learning_rate": 3.1223025375091425e-06,
      "loss": 0.0088,
      "step": 2107560
    },
    {
      "epoch": 3.4491008948501927,
      "grad_norm": 0.08982981741428375,
      "learning_rate": 3.1222366452956253e-06,
      "loss": 0.0085,
      "step": 2107580
    },
    {
      "epoch": 3.4491336252888463,
      "grad_norm": 0.4105777144432068,
      "learning_rate": 3.122170753082109e-06,
      "loss": 0.012,
      "step": 2107600
    },
    {
      "epoch": 3.4491663557274994,
      "grad_norm": 0.20448128879070282,
      "learning_rate": 3.1221048608685916e-06,
      "loss": 0.0098,
      "step": 2107620
    },
    {
      "epoch": 3.4491990861661526,
      "grad_norm": 0.4641309976577759,
      "learning_rate": 3.1220389686550744e-06,
      "loss": 0.0118,
      "step": 2107640
    },
    {
      "epoch": 3.449231816604806,
      "grad_norm": 0.10150375962257385,
      "learning_rate": 3.1219730764415575e-06,
      "loss": 0.0063,
      "step": 2107660
    },
    {
      "epoch": 3.4492645470434593,
      "grad_norm": 0.3854828178882599,
      "learning_rate": 3.1219071842280403e-06,
      "loss": 0.0121,
      "step": 2107680
    },
    {
      "epoch": 3.449297277482113,
      "grad_norm": 0.2554057240486145,
      "learning_rate": 3.121841292014523e-06,
      "loss": 0.013,
      "step": 2107700
    },
    {
      "epoch": 3.449330007920766,
      "grad_norm": 0.6349501013755798,
      "learning_rate": 3.1217753998010057e-06,
      "loss": 0.0117,
      "step": 2107720
    },
    {
      "epoch": 3.4493627383594196,
      "grad_norm": 0.23485274612903595,
      "learning_rate": 3.121709507587489e-06,
      "loss": 0.0118,
      "step": 2107740
    },
    {
      "epoch": 3.449395468798073,
      "grad_norm": 0.08540085703134537,
      "learning_rate": 3.1216436153739716e-06,
      "loss": 0.0101,
      "step": 2107760
    },
    {
      "epoch": 3.449428199236726,
      "grad_norm": 0.4553602933883667,
      "learning_rate": 3.1215777231604544e-06,
      "loss": 0.0122,
      "step": 2107780
    },
    {
      "epoch": 3.4494609296753795,
      "grad_norm": 0.5151848793029785,
      "learning_rate": 3.121511830946937e-06,
      "loss": 0.0081,
      "step": 2107800
    },
    {
      "epoch": 3.4494936601140327,
      "grad_norm": 0.057410452514886856,
      "learning_rate": 3.12144593873342e-06,
      "loss": 0.0131,
      "step": 2107820
    },
    {
      "epoch": 3.4495263905526863,
      "grad_norm": 0.4562198519706726,
      "learning_rate": 3.121380046519903e-06,
      "loss": 0.0167,
      "step": 2107840
    },
    {
      "epoch": 3.4495591209913394,
      "grad_norm": 0.26972150802612305,
      "learning_rate": 3.1213141543063858e-06,
      "loss": 0.0116,
      "step": 2107860
    },
    {
      "epoch": 3.449591851429993,
      "grad_norm": 0.39131632447242737,
      "learning_rate": 3.1212482620928685e-06,
      "loss": 0.0127,
      "step": 2107880
    },
    {
      "epoch": 3.449624581868646,
      "grad_norm": 0.30578818917274475,
      "learning_rate": 3.1211823698793512e-06,
      "loss": 0.0142,
      "step": 2107900
    },
    {
      "epoch": 3.4496573123072993,
      "grad_norm": 0.2239869087934494,
      "learning_rate": 3.1211164776658344e-06,
      "loss": 0.0134,
      "step": 2107920
    },
    {
      "epoch": 3.449690042745953,
      "grad_norm": 0.34109076857566833,
      "learning_rate": 3.121050585452317e-06,
      "loss": 0.0114,
      "step": 2107940
    },
    {
      "epoch": 3.449722773184606,
      "grad_norm": 0.16273701190948486,
      "learning_rate": 3.1209846932388003e-06,
      "loss": 0.0148,
      "step": 2107960
    },
    {
      "epoch": 3.4497555036232597,
      "grad_norm": 0.11455971747636795,
      "learning_rate": 3.1209188010252835e-06,
      "loss": 0.0121,
      "step": 2107980
    },
    {
      "epoch": 3.449788234061913,
      "grad_norm": 0.5523201823234558,
      "learning_rate": 3.120852908811766e-06,
      "loss": 0.0179,
      "step": 2108000
    },
    {
      "epoch": 3.4498209645005664,
      "grad_norm": 0.1624220609664917,
      "learning_rate": 3.120787016598249e-06,
      "loss": 0.0106,
      "step": 2108020
    },
    {
      "epoch": 3.4498536949392196,
      "grad_norm": 0.08969562500715256,
      "learning_rate": 3.1207211243847317e-06,
      "loss": 0.0068,
      "step": 2108040
    },
    {
      "epoch": 3.4498864253778727,
      "grad_norm": 0.14066092669963837,
      "learning_rate": 3.120655232171215e-06,
      "loss": 0.0101,
      "step": 2108060
    },
    {
      "epoch": 3.4499191558165263,
      "grad_norm": 0.30655765533447266,
      "learning_rate": 3.1205893399576976e-06,
      "loss": 0.0134,
      "step": 2108080
    },
    {
      "epoch": 3.4499518862551795,
      "grad_norm": 1.071786880493164,
      "learning_rate": 3.1205234477441803e-06,
      "loss": 0.0162,
      "step": 2108100
    },
    {
      "epoch": 3.449984616693833,
      "grad_norm": 0.7425600290298462,
      "learning_rate": 3.120457555530663e-06,
      "loss": 0.0146,
      "step": 2108120
    },
    {
      "epoch": 3.450017347132486,
      "grad_norm": 0.0853596180677414,
      "learning_rate": 3.1203916633171462e-06,
      "loss": 0.0089,
      "step": 2108140
    },
    {
      "epoch": 3.45005007757114,
      "grad_norm": 0.26754477620124817,
      "learning_rate": 3.120325771103629e-06,
      "loss": 0.0086,
      "step": 2108160
    },
    {
      "epoch": 3.450082808009793,
      "grad_norm": 0.0707770437002182,
      "learning_rate": 3.1202598788901117e-06,
      "loss": 0.0158,
      "step": 2108180
    },
    {
      "epoch": 3.450115538448446,
      "grad_norm": 0.5459054708480835,
      "learning_rate": 3.1201939866765945e-06,
      "loss": 0.0115,
      "step": 2108200
    },
    {
      "epoch": 3.4501482688870997,
      "grad_norm": 0.2213832139968872,
      "learning_rate": 3.120128094463077e-06,
      "loss": 0.0108,
      "step": 2108220
    },
    {
      "epoch": 3.450180999325753,
      "grad_norm": 0.3014172911643982,
      "learning_rate": 3.1200622022495604e-06,
      "loss": 0.0155,
      "step": 2108240
    },
    {
      "epoch": 3.4502137297644064,
      "grad_norm": 0.4950680434703827,
      "learning_rate": 3.119996310036043e-06,
      "loss": 0.0122,
      "step": 2108260
    },
    {
      "epoch": 3.4502464602030596,
      "grad_norm": 0.1475280523300171,
      "learning_rate": 3.119930417822526e-06,
      "loss": 0.0099,
      "step": 2108280
    },
    {
      "epoch": 3.450279190641713,
      "grad_norm": 0.47485965490341187,
      "learning_rate": 3.1198645256090086e-06,
      "loss": 0.0113,
      "step": 2108300
    },
    {
      "epoch": 3.4503119210803663,
      "grad_norm": 0.6593525409698486,
      "learning_rate": 3.119798633395492e-06,
      "loss": 0.0153,
      "step": 2108320
    },
    {
      "epoch": 3.4503446515190195,
      "grad_norm": 0.32998669147491455,
      "learning_rate": 3.119732741181975e-06,
      "loss": 0.0132,
      "step": 2108340
    },
    {
      "epoch": 3.450377381957673,
      "grad_norm": 0.37058472633361816,
      "learning_rate": 3.1196668489684576e-06,
      "loss": 0.0094,
      "step": 2108360
    },
    {
      "epoch": 3.4504101123963262,
      "grad_norm": 0.24020598828792572,
      "learning_rate": 3.119600956754941e-06,
      "loss": 0.0116,
      "step": 2108380
    },
    {
      "epoch": 3.45044284283498,
      "grad_norm": 0.6034207940101624,
      "learning_rate": 3.1195350645414235e-06,
      "loss": 0.01,
      "step": 2108400
    },
    {
      "epoch": 3.450475573273633,
      "grad_norm": 0.13488450646400452,
      "learning_rate": 3.1194691723279063e-06,
      "loss": 0.0133,
      "step": 2108420
    },
    {
      "epoch": 3.4505083037122866,
      "grad_norm": 0.12955769896507263,
      "learning_rate": 3.119403280114389e-06,
      "loss": 0.012,
      "step": 2108440
    },
    {
      "epoch": 3.4505410341509397,
      "grad_norm": 0.20752044022083282,
      "learning_rate": 3.119337387900872e-06,
      "loss": 0.0124,
      "step": 2108460
    },
    {
      "epoch": 3.450573764589593,
      "grad_norm": 0.19596578180789948,
      "learning_rate": 3.119271495687355e-06,
      "loss": 0.0103,
      "step": 2108480
    },
    {
      "epoch": 3.4506064950282465,
      "grad_norm": 0.3955193758010864,
      "learning_rate": 3.1192056034738377e-06,
      "loss": 0.0151,
      "step": 2108500
    },
    {
      "epoch": 3.4506392254668996,
      "grad_norm": 0.7542417645454407,
      "learning_rate": 3.1191397112603204e-06,
      "loss": 0.0202,
      "step": 2108520
    },
    {
      "epoch": 3.450671955905553,
      "grad_norm": 0.1458745002746582,
      "learning_rate": 3.1190738190468036e-06,
      "loss": 0.0118,
      "step": 2108540
    },
    {
      "epoch": 3.4507046863442064,
      "grad_norm": 0.2576577365398407,
      "learning_rate": 3.1190079268332863e-06,
      "loss": 0.0116,
      "step": 2108560
    },
    {
      "epoch": 3.45073741678286,
      "grad_norm": 0.10726796835660934,
      "learning_rate": 3.118942034619769e-06,
      "loss": 0.0116,
      "step": 2108580
    },
    {
      "epoch": 3.450770147221513,
      "grad_norm": 0.25475943088531494,
      "learning_rate": 3.118876142406252e-06,
      "loss": 0.0125,
      "step": 2108600
    },
    {
      "epoch": 3.4508028776601662,
      "grad_norm": 0.39457935094833374,
      "learning_rate": 3.118810250192735e-06,
      "loss": 0.0165,
      "step": 2108620
    },
    {
      "epoch": 3.45083560809882,
      "grad_norm": 0.12176181375980377,
      "learning_rate": 3.1187443579792177e-06,
      "loss": 0.0097,
      "step": 2108640
    },
    {
      "epoch": 3.450868338537473,
      "grad_norm": 0.38214120268821716,
      "learning_rate": 3.118678465765701e-06,
      "loss": 0.0119,
      "step": 2108660
    },
    {
      "epoch": 3.4509010689761266,
      "grad_norm": 0.10645762830972672,
      "learning_rate": 3.118612573552184e-06,
      "loss": 0.0096,
      "step": 2108680
    },
    {
      "epoch": 3.4509337994147797,
      "grad_norm": 0.16009381413459778,
      "learning_rate": 3.1185466813386668e-06,
      "loss": 0.009,
      "step": 2108700
    },
    {
      "epoch": 3.4509665298534333,
      "grad_norm": 0.46740707755088806,
      "learning_rate": 3.1184807891251495e-06,
      "loss": 0.013,
      "step": 2108720
    },
    {
      "epoch": 3.4509992602920865,
      "grad_norm": 0.3239559829235077,
      "learning_rate": 3.1184148969116322e-06,
      "loss": 0.0081,
      "step": 2108740
    },
    {
      "epoch": 3.4510319907307396,
      "grad_norm": 0.22282645106315613,
      "learning_rate": 3.118349004698115e-06,
      "loss": 0.0078,
      "step": 2108760
    },
    {
      "epoch": 3.4510647211693932,
      "grad_norm": 0.4161520004272461,
      "learning_rate": 3.118283112484598e-06,
      "loss": 0.014,
      "step": 2108780
    },
    {
      "epoch": 3.4510974516080464,
      "grad_norm": 0.2746350169181824,
      "learning_rate": 3.118217220271081e-06,
      "loss": 0.0069,
      "step": 2108800
    },
    {
      "epoch": 3.4511301820467,
      "grad_norm": 0.2626629173755646,
      "learning_rate": 3.1181513280575636e-06,
      "loss": 0.0153,
      "step": 2108820
    },
    {
      "epoch": 3.451162912485353,
      "grad_norm": 0.24639759957790375,
      "learning_rate": 3.1180854358440464e-06,
      "loss": 0.009,
      "step": 2108840
    },
    {
      "epoch": 3.4511956429240067,
      "grad_norm": 0.3199552297592163,
      "learning_rate": 3.1180195436305295e-06,
      "loss": 0.0108,
      "step": 2108860
    },
    {
      "epoch": 3.45122837336266,
      "grad_norm": 0.271857887506485,
      "learning_rate": 3.1179536514170123e-06,
      "loss": 0.0076,
      "step": 2108880
    },
    {
      "epoch": 3.451261103801313,
      "grad_norm": 0.7686949968338013,
      "learning_rate": 3.117887759203495e-06,
      "loss": 0.0089,
      "step": 2108900
    },
    {
      "epoch": 3.4512938342399666,
      "grad_norm": 0.15505847334861755,
      "learning_rate": 3.1178218669899777e-06,
      "loss": 0.0083,
      "step": 2108920
    },
    {
      "epoch": 3.4513265646786198,
      "grad_norm": 0.23314188420772552,
      "learning_rate": 3.117755974776461e-06,
      "loss": 0.0106,
      "step": 2108940
    },
    {
      "epoch": 3.4513592951172734,
      "grad_norm": 0.1669415384531021,
      "learning_rate": 3.1176900825629436e-06,
      "loss": 0.01,
      "step": 2108960
    },
    {
      "epoch": 3.4513920255559265,
      "grad_norm": 0.5416086912155151,
      "learning_rate": 3.1176241903494264e-06,
      "loss": 0.0063,
      "step": 2108980
    },
    {
      "epoch": 3.4514247559945797,
      "grad_norm": 0.24358594417572021,
      "learning_rate": 3.117558298135909e-06,
      "loss": 0.008,
      "step": 2109000
    },
    {
      "epoch": 3.4514574864332332,
      "grad_norm": 0.2421964555978775,
      "learning_rate": 3.1174924059223927e-06,
      "loss": 0.0075,
      "step": 2109020
    },
    {
      "epoch": 3.4514902168718864,
      "grad_norm": 0.23869384825229645,
      "learning_rate": 3.1174265137088755e-06,
      "loss": 0.0108,
      "step": 2109040
    },
    {
      "epoch": 3.45152294731054,
      "grad_norm": 0.218337744474411,
      "learning_rate": 3.117360621495358e-06,
      "loss": 0.009,
      "step": 2109060
    },
    {
      "epoch": 3.451555677749193,
      "grad_norm": 0.3809082806110382,
      "learning_rate": 3.1172947292818414e-06,
      "loss": 0.0088,
      "step": 2109080
    },
    {
      "epoch": 3.4515884081878463,
      "grad_norm": 0.7644992470741272,
      "learning_rate": 3.117228837068324e-06,
      "loss": 0.0101,
      "step": 2109100
    },
    {
      "epoch": 3.4516211386265,
      "grad_norm": 0.07602640986442566,
      "learning_rate": 3.117162944854807e-06,
      "loss": 0.0096,
      "step": 2109120
    },
    {
      "epoch": 3.451653869065153,
      "grad_norm": 0.23322895169258118,
      "learning_rate": 3.1170970526412896e-06,
      "loss": 0.0081,
      "step": 2109140
    },
    {
      "epoch": 3.4516865995038066,
      "grad_norm": 0.38095206022262573,
      "learning_rate": 3.1170311604277727e-06,
      "loss": 0.0105,
      "step": 2109160
    },
    {
      "epoch": 3.45171932994246,
      "grad_norm": 0.3004283308982849,
      "learning_rate": 3.1169652682142555e-06,
      "loss": 0.0079,
      "step": 2109180
    },
    {
      "epoch": 3.4517520603811134,
      "grad_norm": 0.2865380644798279,
      "learning_rate": 3.1168993760007382e-06,
      "loss": 0.0102,
      "step": 2109200
    },
    {
      "epoch": 3.4517847908197665,
      "grad_norm": 0.10707449167966843,
      "learning_rate": 3.116833483787221e-06,
      "loss": 0.009,
      "step": 2109220
    },
    {
      "epoch": 3.4518175212584197,
      "grad_norm": 0.1782398521900177,
      "learning_rate": 3.1167675915737037e-06,
      "loss": 0.0076,
      "step": 2109240
    },
    {
      "epoch": 3.4518502516970733,
      "grad_norm": 1.0617936849594116,
      "learning_rate": 3.116701699360187e-06,
      "loss": 0.0134,
      "step": 2109260
    },
    {
      "epoch": 3.4518829821357264,
      "grad_norm": 0.18362389504909515,
      "learning_rate": 3.1166358071466696e-06,
      "loss": 0.0082,
      "step": 2109280
    },
    {
      "epoch": 3.45191571257438,
      "grad_norm": 0.25931835174560547,
      "learning_rate": 3.1165699149331523e-06,
      "loss": 0.0134,
      "step": 2109300
    },
    {
      "epoch": 3.451948443013033,
      "grad_norm": 0.8119649887084961,
      "learning_rate": 3.116504022719635e-06,
      "loss": 0.0095,
      "step": 2109320
    },
    {
      "epoch": 3.4519811734516868,
      "grad_norm": 0.11881598085165024,
      "learning_rate": 3.1164381305061182e-06,
      "loss": 0.0104,
      "step": 2109340
    },
    {
      "epoch": 3.45201390389034,
      "grad_norm": 0.3101958930492401,
      "learning_rate": 3.1163722382926014e-06,
      "loss": 0.0171,
      "step": 2109360
    },
    {
      "epoch": 3.452046634328993,
      "grad_norm": 0.31162112951278687,
      "learning_rate": 3.116306346079084e-06,
      "loss": 0.016,
      "step": 2109380
    },
    {
      "epoch": 3.4520793647676467,
      "grad_norm": 0.2846463620662689,
      "learning_rate": 3.1162404538655673e-06,
      "loss": 0.0139,
      "step": 2109400
    },
    {
      "epoch": 3.4521120952063,
      "grad_norm": 0.21262292563915253,
      "learning_rate": 3.11617456165205e-06,
      "loss": 0.0098,
      "step": 2109420
    },
    {
      "epoch": 3.4521448256449534,
      "grad_norm": 0.1910540759563446,
      "learning_rate": 3.116108669438533e-06,
      "loss": 0.0155,
      "step": 2109440
    },
    {
      "epoch": 3.4521775560836065,
      "grad_norm": 0.27756041288375854,
      "learning_rate": 3.1160427772250155e-06,
      "loss": 0.0101,
      "step": 2109460
    },
    {
      "epoch": 3.45221028652226,
      "grad_norm": 0.2903907001018524,
      "learning_rate": 3.1159768850114987e-06,
      "loss": 0.011,
      "step": 2109480
    },
    {
      "epoch": 3.4522430169609133,
      "grad_norm": 0.16271939873695374,
      "learning_rate": 3.1159109927979814e-06,
      "loss": 0.0117,
      "step": 2109500
    },
    {
      "epoch": 3.4522757473995664,
      "grad_norm": 0.2687813341617584,
      "learning_rate": 3.115845100584464e-06,
      "loss": 0.0084,
      "step": 2109520
    },
    {
      "epoch": 3.45230847783822,
      "grad_norm": 0.44994211196899414,
      "learning_rate": 3.115779208370947e-06,
      "loss": 0.0186,
      "step": 2109540
    },
    {
      "epoch": 3.452341208276873,
      "grad_norm": 0.053636081516742706,
      "learning_rate": 3.11571331615743e-06,
      "loss": 0.0089,
      "step": 2109560
    },
    {
      "epoch": 3.452373938715527,
      "grad_norm": 0.2834673821926117,
      "learning_rate": 3.115647423943913e-06,
      "loss": 0.0106,
      "step": 2109580
    },
    {
      "epoch": 3.45240666915418,
      "grad_norm": 0.3382193148136139,
      "learning_rate": 3.1155815317303956e-06,
      "loss": 0.0157,
      "step": 2109600
    },
    {
      "epoch": 3.4524393995928335,
      "grad_norm": 0.7203253507614136,
      "learning_rate": 3.1155156395168783e-06,
      "loss": 0.0102,
      "step": 2109620
    },
    {
      "epoch": 3.4524721300314867,
      "grad_norm": 0.16092881560325623,
      "learning_rate": 3.1154497473033615e-06,
      "loss": 0.0075,
      "step": 2109640
    },
    {
      "epoch": 3.45250486047014,
      "grad_norm": 0.6878947615623474,
      "learning_rate": 3.115383855089844e-06,
      "loss": 0.0197,
      "step": 2109660
    },
    {
      "epoch": 3.4525375909087934,
      "grad_norm": 0.1659437119960785,
      "learning_rate": 3.115317962876327e-06,
      "loss": 0.0117,
      "step": 2109680
    },
    {
      "epoch": 3.4525703213474466,
      "grad_norm": 0.6891793608665466,
      "learning_rate": 3.1152520706628097e-06,
      "loss": 0.0159,
      "step": 2109700
    },
    {
      "epoch": 3.4526030517861,
      "grad_norm": 0.045741427689790726,
      "learning_rate": 3.1151861784492933e-06,
      "loss": 0.0101,
      "step": 2109720
    },
    {
      "epoch": 3.4526357822247533,
      "grad_norm": 0.12784282863140106,
      "learning_rate": 3.115120286235776e-06,
      "loss": 0.0104,
      "step": 2109740
    },
    {
      "epoch": 3.452668512663407,
      "grad_norm": 0.24739618599414825,
      "learning_rate": 3.1150543940222587e-06,
      "loss": 0.0115,
      "step": 2109760
    },
    {
      "epoch": 3.45270124310206,
      "grad_norm": 0.619704008102417,
      "learning_rate": 3.1149885018087415e-06,
      "loss": 0.0112,
      "step": 2109780
    },
    {
      "epoch": 3.452733973540713,
      "grad_norm": 0.45067641139030457,
      "learning_rate": 3.1149226095952246e-06,
      "loss": 0.0151,
      "step": 2109800
    },
    {
      "epoch": 3.452766703979367,
      "grad_norm": 0.1405702531337738,
      "learning_rate": 3.1148567173817074e-06,
      "loss": 0.0158,
      "step": 2109820
    },
    {
      "epoch": 3.45279943441802,
      "grad_norm": 0.4725891053676605,
      "learning_rate": 3.11479082516819e-06,
      "loss": 0.0125,
      "step": 2109840
    },
    {
      "epoch": 3.4528321648566735,
      "grad_norm": 0.3011472225189209,
      "learning_rate": 3.114724932954673e-06,
      "loss": 0.0088,
      "step": 2109860
    },
    {
      "epoch": 3.4528648952953267,
      "grad_norm": 0.1598808616399765,
      "learning_rate": 3.114659040741156e-06,
      "loss": 0.0166,
      "step": 2109880
    },
    {
      "epoch": 3.4528976257339803,
      "grad_norm": 0.20524384081363678,
      "learning_rate": 3.1145931485276388e-06,
      "loss": 0.0126,
      "step": 2109900
    },
    {
      "epoch": 3.4529303561726334,
      "grad_norm": 0.3397996723651886,
      "learning_rate": 3.1145272563141215e-06,
      "loss": 0.013,
      "step": 2109920
    },
    {
      "epoch": 3.4529630866112866,
      "grad_norm": 0.20245234668254852,
      "learning_rate": 3.1144613641006042e-06,
      "loss": 0.0152,
      "step": 2109940
    },
    {
      "epoch": 3.45299581704994,
      "grad_norm": 0.4476247727870941,
      "learning_rate": 3.1143954718870874e-06,
      "loss": 0.008,
      "step": 2109960
    },
    {
      "epoch": 3.4530285474885933,
      "grad_norm": 0.18986447155475616,
      "learning_rate": 3.11432957967357e-06,
      "loss": 0.0081,
      "step": 2109980
    },
    {
      "epoch": 3.453061277927247,
      "grad_norm": 0.23001809418201447,
      "learning_rate": 3.114263687460053e-06,
      "loss": 0.0095,
      "step": 2110000
    },
    {
      "epoch": 3.4530940083659,
      "grad_norm": 0.20052677392959595,
      "learning_rate": 3.1141977952465356e-06,
      "loss": 0.011,
      "step": 2110020
    },
    {
      "epoch": 3.4531267388045537,
      "grad_norm": 0.16899465024471283,
      "learning_rate": 3.114131903033019e-06,
      "loss": 0.0154,
      "step": 2110040
    },
    {
      "epoch": 3.453159469243207,
      "grad_norm": 0.40393033623695374,
      "learning_rate": 3.1140660108195015e-06,
      "loss": 0.0121,
      "step": 2110060
    },
    {
      "epoch": 3.45319219968186,
      "grad_norm": 0.12166208773851395,
      "learning_rate": 3.1140001186059847e-06,
      "loss": 0.011,
      "step": 2110080
    },
    {
      "epoch": 3.4532249301205136,
      "grad_norm": 0.4239373207092285,
      "learning_rate": 3.113934226392468e-06,
      "loss": 0.0073,
      "step": 2110100
    },
    {
      "epoch": 3.4532576605591667,
      "grad_norm": 0.3115043044090271,
      "learning_rate": 3.1138683341789506e-06,
      "loss": 0.0092,
      "step": 2110120
    },
    {
      "epoch": 3.4532903909978203,
      "grad_norm": 0.401570200920105,
      "learning_rate": 3.1138024419654333e-06,
      "loss": 0.018,
      "step": 2110140
    },
    {
      "epoch": 3.4533231214364735,
      "grad_norm": 0.545224130153656,
      "learning_rate": 3.113736549751916e-06,
      "loss": 0.0104,
      "step": 2110160
    },
    {
      "epoch": 3.453355851875127,
      "grad_norm": 0.28208592534065247,
      "learning_rate": 3.1136706575383992e-06,
      "loss": 0.0137,
      "step": 2110180
    },
    {
      "epoch": 3.45338858231378,
      "grad_norm": 0.3816510736942291,
      "learning_rate": 3.113604765324882e-06,
      "loss": 0.01,
      "step": 2110200
    },
    {
      "epoch": 3.4534213127524334,
      "grad_norm": 0.12600085139274597,
      "learning_rate": 3.1135388731113647e-06,
      "loss": 0.0083,
      "step": 2110220
    },
    {
      "epoch": 3.453454043191087,
      "grad_norm": 0.3510333001613617,
      "learning_rate": 3.1134729808978475e-06,
      "loss": 0.0095,
      "step": 2110240
    },
    {
      "epoch": 3.45348677362974,
      "grad_norm": 0.1650891751050949,
      "learning_rate": 3.11340708868433e-06,
      "loss": 0.0103,
      "step": 2110260
    },
    {
      "epoch": 3.4535195040683937,
      "grad_norm": 0.2138371467590332,
      "learning_rate": 3.1133411964708134e-06,
      "loss": 0.012,
      "step": 2110280
    },
    {
      "epoch": 3.453552234507047,
      "grad_norm": 0.36975952982902527,
      "learning_rate": 3.113275304257296e-06,
      "loss": 0.0081,
      "step": 2110300
    },
    {
      "epoch": 3.4535849649457004,
      "grad_norm": 0.19709135591983795,
      "learning_rate": 3.113209412043779e-06,
      "loss": 0.0109,
      "step": 2110320
    },
    {
      "epoch": 3.4536176953843536,
      "grad_norm": 0.6611773371696472,
      "learning_rate": 3.1131435198302616e-06,
      "loss": 0.0178,
      "step": 2110340
    },
    {
      "epoch": 3.4536504258230067,
      "grad_norm": 0.3887723982334137,
      "learning_rate": 3.1130776276167447e-06,
      "loss": 0.015,
      "step": 2110360
    },
    {
      "epoch": 3.4536831562616603,
      "grad_norm": 0.25506311655044556,
      "learning_rate": 3.1130117354032275e-06,
      "loss": 0.0097,
      "step": 2110380
    },
    {
      "epoch": 3.4537158867003135,
      "grad_norm": 0.3332938253879547,
      "learning_rate": 3.1129458431897102e-06,
      "loss": 0.0105,
      "step": 2110400
    },
    {
      "epoch": 3.453748617138967,
      "grad_norm": 0.12671831250190735,
      "learning_rate": 3.112879950976194e-06,
      "loss": 0.0192,
      "step": 2110420
    },
    {
      "epoch": 3.4537813475776202,
      "grad_norm": 0.7746036052703857,
      "learning_rate": 3.1128140587626766e-06,
      "loss": 0.0146,
      "step": 2110440
    },
    {
      "epoch": 3.4538140780162734,
      "grad_norm": 0.28019362688064575,
      "learning_rate": 3.1127481665491593e-06,
      "loss": 0.0134,
      "step": 2110460
    },
    {
      "epoch": 3.453846808454927,
      "grad_norm": 0.20805643498897552,
      "learning_rate": 3.112682274335642e-06,
      "loss": 0.0102,
      "step": 2110480
    },
    {
      "epoch": 3.45387953889358,
      "grad_norm": 0.27072128653526306,
      "learning_rate": 3.112616382122125e-06,
      "loss": 0.0179,
      "step": 2110500
    },
    {
      "epoch": 3.4539122693322337,
      "grad_norm": 0.21623121201992035,
      "learning_rate": 3.112550489908608e-06,
      "loss": 0.0128,
      "step": 2110520
    },
    {
      "epoch": 3.453944999770887,
      "grad_norm": 0.3185953199863434,
      "learning_rate": 3.1124845976950907e-06,
      "loss": 0.0101,
      "step": 2110540
    },
    {
      "epoch": 3.45397773020954,
      "grad_norm": 0.35418352484703064,
      "learning_rate": 3.1124187054815734e-06,
      "loss": 0.0098,
      "step": 2110560
    },
    {
      "epoch": 3.4540104606481936,
      "grad_norm": 0.5446059107780457,
      "learning_rate": 3.1123528132680566e-06,
      "loss": 0.0131,
      "step": 2110580
    },
    {
      "epoch": 3.4540431910868468,
      "grad_norm": 0.397597074508667,
      "learning_rate": 3.1122869210545393e-06,
      "loss": 0.0099,
      "step": 2110600
    },
    {
      "epoch": 3.4540759215255004,
      "grad_norm": 0.3258191645145416,
      "learning_rate": 3.112221028841022e-06,
      "loss": 0.013,
      "step": 2110620
    },
    {
      "epoch": 3.4541086519641535,
      "grad_norm": 0.446008563041687,
      "learning_rate": 3.112155136627505e-06,
      "loss": 0.0169,
      "step": 2110640
    },
    {
      "epoch": 3.454141382402807,
      "grad_norm": 0.050061579793691635,
      "learning_rate": 3.1120892444139875e-06,
      "loss": 0.0147,
      "step": 2110660
    },
    {
      "epoch": 3.4541741128414603,
      "grad_norm": 0.5514979362487793,
      "learning_rate": 3.1120233522004707e-06,
      "loss": 0.0108,
      "step": 2110680
    },
    {
      "epoch": 3.4542068432801134,
      "grad_norm": 2.265965461730957,
      "learning_rate": 3.1119574599869534e-06,
      "loss": 0.0115,
      "step": 2110700
    },
    {
      "epoch": 3.454239573718767,
      "grad_norm": 0.15242429077625275,
      "learning_rate": 3.111891567773436e-06,
      "loss": 0.0103,
      "step": 2110720
    },
    {
      "epoch": 3.45427230415742,
      "grad_norm": 0.2773161828517914,
      "learning_rate": 3.111825675559919e-06,
      "loss": 0.0147,
      "step": 2110740
    },
    {
      "epoch": 3.4543050345960737,
      "grad_norm": 0.44667381048202515,
      "learning_rate": 3.111759783346402e-06,
      "loss": 0.011,
      "step": 2110760
    },
    {
      "epoch": 3.454337765034727,
      "grad_norm": 0.057137381285429,
      "learning_rate": 3.1116938911328852e-06,
      "loss": 0.0093,
      "step": 2110780
    },
    {
      "epoch": 3.4543704954733805,
      "grad_norm": 0.2418818324804306,
      "learning_rate": 3.111627998919368e-06,
      "loss": 0.0096,
      "step": 2110800
    },
    {
      "epoch": 3.4544032259120336,
      "grad_norm": 0.22474582493305206,
      "learning_rate": 3.111562106705851e-06,
      "loss": 0.0107,
      "step": 2110820
    },
    {
      "epoch": 3.454435956350687,
      "grad_norm": 0.21925005316734314,
      "learning_rate": 3.111496214492334e-06,
      "loss": 0.0106,
      "step": 2110840
    },
    {
      "epoch": 3.4544686867893404,
      "grad_norm": 0.1564524620771408,
      "learning_rate": 3.1114303222788166e-06,
      "loss": 0.0128,
      "step": 2110860
    },
    {
      "epoch": 3.4545014172279935,
      "grad_norm": 0.0996607318520546,
      "learning_rate": 3.1113644300652994e-06,
      "loss": 0.0109,
      "step": 2110880
    },
    {
      "epoch": 3.454534147666647,
      "grad_norm": 0.13841837644577026,
      "learning_rate": 3.1112985378517825e-06,
      "loss": 0.0084,
      "step": 2110900
    },
    {
      "epoch": 3.4545668781053003,
      "grad_norm": 0.2441335767507553,
      "learning_rate": 3.1112326456382653e-06,
      "loss": 0.0103,
      "step": 2110920
    },
    {
      "epoch": 3.454599608543954,
      "grad_norm": 0.0697789341211319,
      "learning_rate": 3.111166753424748e-06,
      "loss": 0.012,
      "step": 2110940
    },
    {
      "epoch": 3.454632338982607,
      "grad_norm": 0.130978062748909,
      "learning_rate": 3.1111008612112308e-06,
      "loss": 0.0113,
      "step": 2110960
    },
    {
      "epoch": 3.45466506942126,
      "grad_norm": 0.2620065212249756,
      "learning_rate": 3.111034968997714e-06,
      "loss": 0.0097,
      "step": 2110980
    },
    {
      "epoch": 3.4546977998599138,
      "grad_norm": 0.3942229151725769,
      "learning_rate": 3.1109690767841967e-06,
      "loss": 0.0129,
      "step": 2111000
    },
    {
      "epoch": 3.454730530298567,
      "grad_norm": 0.02826519124209881,
      "learning_rate": 3.1109031845706794e-06,
      "loss": 0.012,
      "step": 2111020
    },
    {
      "epoch": 3.4547632607372205,
      "grad_norm": 0.289559543132782,
      "learning_rate": 3.110837292357162e-06,
      "loss": 0.0114,
      "step": 2111040
    },
    {
      "epoch": 3.4547959911758737,
      "grad_norm": 0.302850604057312,
      "learning_rate": 3.1107714001436453e-06,
      "loss": 0.0128,
      "step": 2111060
    },
    {
      "epoch": 3.4548287216145273,
      "grad_norm": 1.8139498233795166,
      "learning_rate": 3.110705507930128e-06,
      "loss": 0.0134,
      "step": 2111080
    },
    {
      "epoch": 3.4548614520531804,
      "grad_norm": 2.4259047508239746,
      "learning_rate": 3.1106396157166108e-06,
      "loss": 0.0088,
      "step": 2111100
    },
    {
      "epoch": 3.4548941824918336,
      "grad_norm": 0.7277313470840454,
      "learning_rate": 3.1105737235030944e-06,
      "loss": 0.0154,
      "step": 2111120
    },
    {
      "epoch": 3.454926912930487,
      "grad_norm": 1.0564591884613037,
      "learning_rate": 3.110507831289577e-06,
      "loss": 0.0154,
      "step": 2111140
    },
    {
      "epoch": 3.4549596433691403,
      "grad_norm": 0.04467282444238663,
      "learning_rate": 3.11044193907606e-06,
      "loss": 0.0105,
      "step": 2111160
    },
    {
      "epoch": 3.454992373807794,
      "grad_norm": 0.31605443358421326,
      "learning_rate": 3.1103760468625426e-06,
      "loss": 0.0104,
      "step": 2111180
    },
    {
      "epoch": 3.455025104246447,
      "grad_norm": 0.1322481334209442,
      "learning_rate": 3.1103101546490253e-06,
      "loss": 0.0165,
      "step": 2111200
    },
    {
      "epoch": 3.4550578346851006,
      "grad_norm": 0.3127562999725342,
      "learning_rate": 3.1102442624355085e-06,
      "loss": 0.0115,
      "step": 2111220
    },
    {
      "epoch": 3.455090565123754,
      "grad_norm": 0.38466575741767883,
      "learning_rate": 3.1101783702219912e-06,
      "loss": 0.0116,
      "step": 2111240
    },
    {
      "epoch": 3.455123295562407,
      "grad_norm": 0.23655639588832855,
      "learning_rate": 3.110112478008474e-06,
      "loss": 0.0102,
      "step": 2111260
    },
    {
      "epoch": 3.4551560260010605,
      "grad_norm": 0.11303990334272385,
      "learning_rate": 3.1100465857949567e-06,
      "loss": 0.0089,
      "step": 2111280
    },
    {
      "epoch": 3.4551887564397137,
      "grad_norm": 0.45676329731941223,
      "learning_rate": 3.10998069358144e-06,
      "loss": 0.017,
      "step": 2111300
    },
    {
      "epoch": 3.4552214868783673,
      "grad_norm": 0.291131854057312,
      "learning_rate": 3.1099148013679226e-06,
      "loss": 0.0126,
      "step": 2111320
    },
    {
      "epoch": 3.4552542173170204,
      "grad_norm": 0.06047890707850456,
      "learning_rate": 3.1098489091544053e-06,
      "loss": 0.0129,
      "step": 2111340
    },
    {
      "epoch": 3.455286947755674,
      "grad_norm": 0.24890108406543732,
      "learning_rate": 3.109783016940888e-06,
      "loss": 0.0073,
      "step": 2111360
    },
    {
      "epoch": 3.455319678194327,
      "grad_norm": 0.5801175832748413,
      "learning_rate": 3.1097171247273713e-06,
      "loss": 0.0128,
      "step": 2111380
    },
    {
      "epoch": 3.4553524086329803,
      "grad_norm": 0.3365526795387268,
      "learning_rate": 3.109651232513854e-06,
      "loss": 0.0114,
      "step": 2111400
    },
    {
      "epoch": 3.455385139071634,
      "grad_norm": 0.522232174873352,
      "learning_rate": 3.1095853403003367e-06,
      "loss": 0.0169,
      "step": 2111420
    },
    {
      "epoch": 3.455417869510287,
      "grad_norm": 0.09896870702505112,
      "learning_rate": 3.1095194480868195e-06,
      "loss": 0.012,
      "step": 2111440
    },
    {
      "epoch": 3.4554505999489407,
      "grad_norm": 0.30532652139663696,
      "learning_rate": 3.1094535558733026e-06,
      "loss": 0.0083,
      "step": 2111460
    },
    {
      "epoch": 3.455483330387594,
      "grad_norm": 0.4177292287349701,
      "learning_rate": 3.109387663659786e-06,
      "loss": 0.0095,
      "step": 2111480
    },
    {
      "epoch": 3.4555160608262474,
      "grad_norm": 0.10415578633546829,
      "learning_rate": 3.1093217714462685e-06,
      "loss": 0.0141,
      "step": 2111500
    },
    {
      "epoch": 3.4555487912649006,
      "grad_norm": 0.2540884017944336,
      "learning_rate": 3.1092558792327517e-06,
      "loss": 0.0089,
      "step": 2111520
    },
    {
      "epoch": 3.4555815217035537,
      "grad_norm": 0.7530688643455505,
      "learning_rate": 3.1091899870192344e-06,
      "loss": 0.0144,
      "step": 2111540
    },
    {
      "epoch": 3.4556142521422073,
      "grad_norm": 0.22927220165729523,
      "learning_rate": 3.109124094805717e-06,
      "loss": 0.0123,
      "step": 2111560
    },
    {
      "epoch": 3.4556469825808604,
      "grad_norm": 0.19241191446781158,
      "learning_rate": 3.1090582025922e-06,
      "loss": 0.0085,
      "step": 2111580
    },
    {
      "epoch": 3.455679713019514,
      "grad_norm": 0.5503504276275635,
      "learning_rate": 3.108992310378683e-06,
      "loss": 0.0114,
      "step": 2111600
    },
    {
      "epoch": 3.455712443458167,
      "grad_norm": 0.2159193605184555,
      "learning_rate": 3.108926418165166e-06,
      "loss": 0.0103,
      "step": 2111620
    },
    {
      "epoch": 3.455745173896821,
      "grad_norm": 0.5340246558189392,
      "learning_rate": 3.1088605259516486e-06,
      "loss": 0.0093,
      "step": 2111640
    },
    {
      "epoch": 3.455777904335474,
      "grad_norm": 0.6769824028015137,
      "learning_rate": 3.1087946337381313e-06,
      "loss": 0.0151,
      "step": 2111660
    },
    {
      "epoch": 3.455810634774127,
      "grad_norm": 0.3177657425403595,
      "learning_rate": 3.108728741524614e-06,
      "loss": 0.0144,
      "step": 2111680
    },
    {
      "epoch": 3.4558433652127807,
      "grad_norm": 0.7609341740608215,
      "learning_rate": 3.108662849311097e-06,
      "loss": 0.0134,
      "step": 2111700
    },
    {
      "epoch": 3.455876095651434,
      "grad_norm": 0.12416993826627731,
      "learning_rate": 3.10859695709758e-06,
      "loss": 0.0132,
      "step": 2111720
    },
    {
      "epoch": 3.4559088260900874,
      "grad_norm": 0.4347864091396332,
      "learning_rate": 3.1085310648840627e-06,
      "loss": 0.0069,
      "step": 2111740
    },
    {
      "epoch": 3.4559415565287406,
      "grad_norm": 0.05097680166363716,
      "learning_rate": 3.1084651726705454e-06,
      "loss": 0.0139,
      "step": 2111760
    },
    {
      "epoch": 3.455974286967394,
      "grad_norm": 0.2604296803474426,
      "learning_rate": 3.1083992804570286e-06,
      "loss": 0.0172,
      "step": 2111780
    },
    {
      "epoch": 3.4560070174060473,
      "grad_norm": 0.47000348567962646,
      "learning_rate": 3.1083333882435113e-06,
      "loss": 0.0162,
      "step": 2111800
    },
    {
      "epoch": 3.4560397478447005,
      "grad_norm": 0.31324291229248047,
      "learning_rate": 3.108267496029994e-06,
      "loss": 0.011,
      "step": 2111820
    },
    {
      "epoch": 3.456072478283354,
      "grad_norm": 0.5384712815284729,
      "learning_rate": 3.1082016038164777e-06,
      "loss": 0.0084,
      "step": 2111840
    },
    {
      "epoch": 3.456105208722007,
      "grad_norm": 0.24318915605545044,
      "learning_rate": 3.1081357116029604e-06,
      "loss": 0.0126,
      "step": 2111860
    },
    {
      "epoch": 3.456137939160661,
      "grad_norm": 0.14856909215450287,
      "learning_rate": 3.108069819389443e-06,
      "loss": 0.0092,
      "step": 2111880
    },
    {
      "epoch": 3.456170669599314,
      "grad_norm": 0.15554769337177277,
      "learning_rate": 3.108003927175926e-06,
      "loss": 0.0108,
      "step": 2111900
    },
    {
      "epoch": 3.4562034000379676,
      "grad_norm": 0.8050570487976074,
      "learning_rate": 3.107938034962409e-06,
      "loss": 0.0147,
      "step": 2111920
    },
    {
      "epoch": 3.4562361304766207,
      "grad_norm": 0.39474809169769287,
      "learning_rate": 3.1078721427488918e-06,
      "loss": 0.0074,
      "step": 2111940
    },
    {
      "epoch": 3.456268860915274,
      "grad_norm": 0.17882560193538666,
      "learning_rate": 3.1078062505353745e-06,
      "loss": 0.0083,
      "step": 2111960
    },
    {
      "epoch": 3.4563015913539274,
      "grad_norm": 0.4435684382915497,
      "learning_rate": 3.1077403583218573e-06,
      "loss": 0.0097,
      "step": 2111980
    },
    {
      "epoch": 3.4563343217925806,
      "grad_norm": 0.25649452209472656,
      "learning_rate": 3.1076744661083404e-06,
      "loss": 0.012,
      "step": 2112000
    },
    {
      "epoch": 3.456367052231234,
      "grad_norm": 0.22213618457317352,
      "learning_rate": 3.107608573894823e-06,
      "loss": 0.0137,
      "step": 2112020
    },
    {
      "epoch": 3.4563997826698873,
      "grad_norm": 0.19767417013645172,
      "learning_rate": 3.107542681681306e-06,
      "loss": 0.01,
      "step": 2112040
    },
    {
      "epoch": 3.4564325131085405,
      "grad_norm": 0.9509873986244202,
      "learning_rate": 3.1074767894677886e-06,
      "loss": 0.0108,
      "step": 2112060
    },
    {
      "epoch": 3.456465243547194,
      "grad_norm": 0.18762308359146118,
      "learning_rate": 3.1074108972542714e-06,
      "loss": 0.0114,
      "step": 2112080
    },
    {
      "epoch": 3.4564979739858472,
      "grad_norm": 0.621424674987793,
      "learning_rate": 3.1073450050407545e-06,
      "loss": 0.016,
      "step": 2112100
    },
    {
      "epoch": 3.456530704424501,
      "grad_norm": 0.08938255161046982,
      "learning_rate": 3.1072791128272373e-06,
      "loss": 0.0181,
      "step": 2112120
    },
    {
      "epoch": 3.456563434863154,
      "grad_norm": 0.18849293887615204,
      "learning_rate": 3.10721322061372e-06,
      "loss": 0.01,
      "step": 2112140
    },
    {
      "epoch": 3.456596165301807,
      "grad_norm": 0.49311843514442444,
      "learning_rate": 3.1071473284002028e-06,
      "loss": 0.0116,
      "step": 2112160
    },
    {
      "epoch": 3.4566288957404607,
      "grad_norm": 0.3533349931240082,
      "learning_rate": 3.1070814361866863e-06,
      "loss": 0.011,
      "step": 2112180
    },
    {
      "epoch": 3.456661626179114,
      "grad_norm": 0.3039179742336273,
      "learning_rate": 3.107015543973169e-06,
      "loss": 0.0094,
      "step": 2112200
    },
    {
      "epoch": 3.4566943566177675,
      "grad_norm": 0.13246315717697144,
      "learning_rate": 3.106949651759652e-06,
      "loss": 0.0151,
      "step": 2112220
    },
    {
      "epoch": 3.4567270870564206,
      "grad_norm": 0.21768975257873535,
      "learning_rate": 3.106883759546135e-06,
      "loss": 0.0072,
      "step": 2112240
    },
    {
      "epoch": 3.456759817495074,
      "grad_norm": 0.16654439270496368,
      "learning_rate": 3.1068178673326177e-06,
      "loss": 0.0069,
      "step": 2112260
    },
    {
      "epoch": 3.4567925479337274,
      "grad_norm": 0.38385438919067383,
      "learning_rate": 3.1067519751191005e-06,
      "loss": 0.0136,
      "step": 2112280
    },
    {
      "epoch": 3.4568252783723805,
      "grad_norm": 0.3460552394390106,
      "learning_rate": 3.106686082905583e-06,
      "loss": 0.0115,
      "step": 2112300
    },
    {
      "epoch": 3.456858008811034,
      "grad_norm": 0.19639189541339874,
      "learning_rate": 3.1066201906920664e-06,
      "loss": 0.0102,
      "step": 2112320
    },
    {
      "epoch": 3.4568907392496873,
      "grad_norm": 0.16831885278224945,
      "learning_rate": 3.106554298478549e-06,
      "loss": 0.0145,
      "step": 2112340
    },
    {
      "epoch": 3.456923469688341,
      "grad_norm": 0.708519458770752,
      "learning_rate": 3.106488406265032e-06,
      "loss": 0.0113,
      "step": 2112360
    },
    {
      "epoch": 3.456956200126994,
      "grad_norm": 2.129600763320923,
      "learning_rate": 3.1064225140515146e-06,
      "loss": 0.0095,
      "step": 2112380
    },
    {
      "epoch": 3.4569889305656476,
      "grad_norm": 0.3723071217536926,
      "learning_rate": 3.1063566218379978e-06,
      "loss": 0.0091,
      "step": 2112400
    },
    {
      "epoch": 3.4570216610043007,
      "grad_norm": 0.0978630855679512,
      "learning_rate": 3.1062907296244805e-06,
      "loss": 0.0125,
      "step": 2112420
    },
    {
      "epoch": 3.457054391442954,
      "grad_norm": 0.44839632511138916,
      "learning_rate": 3.1062248374109632e-06,
      "loss": 0.0156,
      "step": 2112440
    },
    {
      "epoch": 3.4570871218816075,
      "grad_norm": 0.3200456500053406,
      "learning_rate": 3.106158945197446e-06,
      "loss": 0.0115,
      "step": 2112460
    },
    {
      "epoch": 3.4571198523202606,
      "grad_norm": 0.2132362276315689,
      "learning_rate": 3.106093052983929e-06,
      "loss": 0.0075,
      "step": 2112480
    },
    {
      "epoch": 3.4571525827589142,
      "grad_norm": 0.060188550502061844,
      "learning_rate": 3.106027160770412e-06,
      "loss": 0.0088,
      "step": 2112500
    },
    {
      "epoch": 3.4571853131975674,
      "grad_norm": 0.774560809135437,
      "learning_rate": 3.1059612685568946e-06,
      "loss": 0.0094,
      "step": 2112520
    },
    {
      "epoch": 3.457218043636221,
      "grad_norm": 0.2726922333240509,
      "learning_rate": 3.105895376343378e-06,
      "loss": 0.0095,
      "step": 2112540
    },
    {
      "epoch": 3.457250774074874,
      "grad_norm": 0.2572921812534332,
      "learning_rate": 3.105829484129861e-06,
      "loss": 0.0108,
      "step": 2112560
    },
    {
      "epoch": 3.4572835045135273,
      "grad_norm": 0.11736670881509781,
      "learning_rate": 3.1057635919163437e-06,
      "loss": 0.0084,
      "step": 2112580
    },
    {
      "epoch": 3.457316234952181,
      "grad_norm": 0.33596348762512207,
      "learning_rate": 3.1056976997028264e-06,
      "loss": 0.0179,
      "step": 2112600
    },
    {
      "epoch": 3.457348965390834,
      "grad_norm": 0.21933268010616302,
      "learning_rate": 3.105631807489309e-06,
      "loss": 0.0113,
      "step": 2112620
    },
    {
      "epoch": 3.4573816958294876,
      "grad_norm": 0.35451728105545044,
      "learning_rate": 3.1055659152757923e-06,
      "loss": 0.0123,
      "step": 2112640
    },
    {
      "epoch": 3.4574144262681408,
      "grad_norm": 0.35908928513526917,
      "learning_rate": 3.105500023062275e-06,
      "loss": 0.0114,
      "step": 2112660
    },
    {
      "epoch": 3.4574471567067944,
      "grad_norm": 0.07286883890628815,
      "learning_rate": 3.105434130848758e-06,
      "loss": 0.0126,
      "step": 2112680
    },
    {
      "epoch": 3.4574798871454475,
      "grad_norm": 0.34428489208221436,
      "learning_rate": 3.1053682386352405e-06,
      "loss": 0.0094,
      "step": 2112700
    },
    {
      "epoch": 3.4575126175841007,
      "grad_norm": 0.3401384949684143,
      "learning_rate": 3.1053023464217237e-06,
      "loss": 0.0121,
      "step": 2112720
    },
    {
      "epoch": 3.4575453480227543,
      "grad_norm": 0.45470747351646423,
      "learning_rate": 3.1052364542082064e-06,
      "loss": 0.01,
      "step": 2112740
    },
    {
      "epoch": 3.4575780784614074,
      "grad_norm": 0.7290540337562561,
      "learning_rate": 3.105170561994689e-06,
      "loss": 0.009,
      "step": 2112760
    },
    {
      "epoch": 3.457610808900061,
      "grad_norm": 0.14678221940994263,
      "learning_rate": 3.105104669781172e-06,
      "loss": 0.0064,
      "step": 2112780
    },
    {
      "epoch": 3.457643539338714,
      "grad_norm": 0.18715450167655945,
      "learning_rate": 3.105038777567655e-06,
      "loss": 0.0118,
      "step": 2112800
    },
    {
      "epoch": 3.4576762697773678,
      "grad_norm": 0.05849481001496315,
      "learning_rate": 3.104972885354138e-06,
      "loss": 0.007,
      "step": 2112820
    },
    {
      "epoch": 3.457709000216021,
      "grad_norm": 0.4302060008049011,
      "learning_rate": 3.1049069931406206e-06,
      "loss": 0.0175,
      "step": 2112840
    },
    {
      "epoch": 3.457741730654674,
      "grad_norm": 0.3098777234554291,
      "learning_rate": 3.1048411009271033e-06,
      "loss": 0.0107,
      "step": 2112860
    },
    {
      "epoch": 3.4577744610933276,
      "grad_norm": 0.6314324140548706,
      "learning_rate": 3.104775208713587e-06,
      "loss": 0.0113,
      "step": 2112880
    },
    {
      "epoch": 3.457807191531981,
      "grad_norm": 0.4796278774738312,
      "learning_rate": 3.1047093165000696e-06,
      "loss": 0.0118,
      "step": 2112900
    },
    {
      "epoch": 3.4578399219706344,
      "grad_norm": 0.14882192015647888,
      "learning_rate": 3.1046434242865524e-06,
      "loss": 0.0084,
      "step": 2112920
    },
    {
      "epoch": 3.4578726524092875,
      "grad_norm": 0.06335300207138062,
      "learning_rate": 3.1045775320730355e-06,
      "loss": 0.0094,
      "step": 2112940
    },
    {
      "epoch": 3.457905382847941,
      "grad_norm": 0.41888803243637085,
      "learning_rate": 3.1045116398595183e-06,
      "loss": 0.0142,
      "step": 2112960
    },
    {
      "epoch": 3.4579381132865943,
      "grad_norm": 0.511120080947876,
      "learning_rate": 3.104445747646001e-06,
      "loss": 0.0062,
      "step": 2112980
    },
    {
      "epoch": 3.4579708437252474,
      "grad_norm": 0.5278005599975586,
      "learning_rate": 3.1043798554324838e-06,
      "loss": 0.0107,
      "step": 2113000
    },
    {
      "epoch": 3.458003574163901,
      "grad_norm": 0.258959025144577,
      "learning_rate": 3.104313963218967e-06,
      "loss": 0.0098,
      "step": 2113020
    },
    {
      "epoch": 3.458036304602554,
      "grad_norm": 0.1861262321472168,
      "learning_rate": 3.1042480710054497e-06,
      "loss": 0.0143,
      "step": 2113040
    },
    {
      "epoch": 3.4580690350412078,
      "grad_norm": 1.3401894569396973,
      "learning_rate": 3.1041821787919324e-06,
      "loss": 0.0107,
      "step": 2113060
    },
    {
      "epoch": 3.458101765479861,
      "grad_norm": 0.791879415512085,
      "learning_rate": 3.104116286578415e-06,
      "loss": 0.0153,
      "step": 2113080
    },
    {
      "epoch": 3.4581344959185145,
      "grad_norm": 0.104317806661129,
      "learning_rate": 3.104050394364898e-06,
      "loss": 0.0091,
      "step": 2113100
    },
    {
      "epoch": 3.4581672263571677,
      "grad_norm": 0.10762855410575867,
      "learning_rate": 3.103984502151381e-06,
      "loss": 0.0096,
      "step": 2113120
    },
    {
      "epoch": 3.458199956795821,
      "grad_norm": 0.47696632146835327,
      "learning_rate": 3.1039186099378638e-06,
      "loss": 0.0175,
      "step": 2113140
    },
    {
      "epoch": 3.4582326872344744,
      "grad_norm": 0.11057081073522568,
      "learning_rate": 3.1038527177243465e-06,
      "loss": 0.0101,
      "step": 2113160
    },
    {
      "epoch": 3.4582654176731276,
      "grad_norm": 0.5538734197616577,
      "learning_rate": 3.1037868255108293e-06,
      "loss": 0.0095,
      "step": 2113180
    },
    {
      "epoch": 3.458298148111781,
      "grad_norm": 0.36370664834976196,
      "learning_rate": 3.1037209332973124e-06,
      "loss": 0.0094,
      "step": 2113200
    },
    {
      "epoch": 3.4583308785504343,
      "grad_norm": 0.1338016390800476,
      "learning_rate": 3.103655041083795e-06,
      "loss": 0.0119,
      "step": 2113220
    },
    {
      "epoch": 3.458363608989088,
      "grad_norm": 0.13525287806987762,
      "learning_rate": 3.1035891488702783e-06,
      "loss": 0.014,
      "step": 2113240
    },
    {
      "epoch": 3.458396339427741,
      "grad_norm": 0.1543789505958557,
      "learning_rate": 3.1035232566567615e-06,
      "loss": 0.0147,
      "step": 2113260
    },
    {
      "epoch": 3.458429069866394,
      "grad_norm": 0.05151907727122307,
      "learning_rate": 3.1034573644432442e-06,
      "loss": 0.0074,
      "step": 2113280
    },
    {
      "epoch": 3.458461800305048,
      "grad_norm": 0.2007320523262024,
      "learning_rate": 3.103391472229727e-06,
      "loss": 0.0108,
      "step": 2113300
    },
    {
      "epoch": 3.458494530743701,
      "grad_norm": 0.5944044589996338,
      "learning_rate": 3.1033255800162097e-06,
      "loss": 0.0087,
      "step": 2113320
    },
    {
      "epoch": 3.4585272611823545,
      "grad_norm": 0.2258724570274353,
      "learning_rate": 3.103259687802693e-06,
      "loss": 0.0096,
      "step": 2113340
    },
    {
      "epoch": 3.4585599916210077,
      "grad_norm": 0.47744402289390564,
      "learning_rate": 3.1031937955891756e-06,
      "loss": 0.0138,
      "step": 2113360
    },
    {
      "epoch": 3.4585927220596613,
      "grad_norm": 0.23337607085704803,
      "learning_rate": 3.1031279033756584e-06,
      "loss": 0.0113,
      "step": 2113380
    },
    {
      "epoch": 3.4586254524983144,
      "grad_norm": 0.19284085929393768,
      "learning_rate": 3.103062011162141e-06,
      "loss": 0.0122,
      "step": 2113400
    },
    {
      "epoch": 3.4586581829369676,
      "grad_norm": 0.4408273994922638,
      "learning_rate": 3.1029961189486243e-06,
      "loss": 0.0091,
      "step": 2113420
    },
    {
      "epoch": 3.458690913375621,
      "grad_norm": 0.17829033732414246,
      "learning_rate": 3.102930226735107e-06,
      "loss": 0.0142,
      "step": 2113440
    },
    {
      "epoch": 3.4587236438142743,
      "grad_norm": 0.0941888689994812,
      "learning_rate": 3.1028643345215897e-06,
      "loss": 0.013,
      "step": 2113460
    },
    {
      "epoch": 3.458756374252928,
      "grad_norm": 0.2187023013830185,
      "learning_rate": 3.1027984423080725e-06,
      "loss": 0.0126,
      "step": 2113480
    },
    {
      "epoch": 3.458789104691581,
      "grad_norm": 0.5301851630210876,
      "learning_rate": 3.1027325500945552e-06,
      "loss": 0.02,
      "step": 2113500
    },
    {
      "epoch": 3.4588218351302342,
      "grad_norm": 0.7573046088218689,
      "learning_rate": 3.1026666578810384e-06,
      "loss": 0.014,
      "step": 2113520
    },
    {
      "epoch": 3.458854565568888,
      "grad_norm": 0.1736053228378296,
      "learning_rate": 3.102600765667521e-06,
      "loss": 0.0155,
      "step": 2113540
    },
    {
      "epoch": 3.458887296007541,
      "grad_norm": 0.4063728451728821,
      "learning_rate": 3.102534873454004e-06,
      "loss": 0.0084,
      "step": 2113560
    },
    {
      "epoch": 3.4589200264461946,
      "grad_norm": 0.29103899002075195,
      "learning_rate": 3.1024689812404874e-06,
      "loss": 0.0184,
      "step": 2113580
    },
    {
      "epoch": 3.4589527568848477,
      "grad_norm": 0.20329873263835907,
      "learning_rate": 3.10240308902697e-06,
      "loss": 0.0122,
      "step": 2113600
    },
    {
      "epoch": 3.458985487323501,
      "grad_norm": 0.5215600728988647,
      "learning_rate": 3.102337196813453e-06,
      "loss": 0.0122,
      "step": 2113620
    },
    {
      "epoch": 3.4590182177621545,
      "grad_norm": 0.662212073802948,
      "learning_rate": 3.1022713045999357e-06,
      "loss": 0.0124,
      "step": 2113640
    },
    {
      "epoch": 3.4590509482008076,
      "grad_norm": 0.323442280292511,
      "learning_rate": 3.102205412386419e-06,
      "loss": 0.0127,
      "step": 2113660
    },
    {
      "epoch": 3.459083678639461,
      "grad_norm": 0.3664454519748688,
      "learning_rate": 3.1021395201729016e-06,
      "loss": 0.0136,
      "step": 2113680
    },
    {
      "epoch": 3.4591164090781144,
      "grad_norm": 0.35881221294403076,
      "learning_rate": 3.1020736279593843e-06,
      "loss": 0.0108,
      "step": 2113700
    },
    {
      "epoch": 3.459149139516768,
      "grad_norm": 0.15872691571712494,
      "learning_rate": 3.102007735745867e-06,
      "loss": 0.0098,
      "step": 2113720
    },
    {
      "epoch": 3.459181869955421,
      "grad_norm": 0.04325103014707565,
      "learning_rate": 3.10194184353235e-06,
      "loss": 0.0157,
      "step": 2113740
    },
    {
      "epoch": 3.4592146003940742,
      "grad_norm": 0.18379658460617065,
      "learning_rate": 3.101875951318833e-06,
      "loss": 0.0084,
      "step": 2113760
    },
    {
      "epoch": 3.459247330832728,
      "grad_norm": 0.1045849546790123,
      "learning_rate": 3.1018100591053157e-06,
      "loss": 0.0089,
      "step": 2113780
    },
    {
      "epoch": 3.459280061271381,
      "grad_norm": 0.18522676825523376,
      "learning_rate": 3.1017441668917984e-06,
      "loss": 0.0096,
      "step": 2113800
    },
    {
      "epoch": 3.4593127917100346,
      "grad_norm": 0.1940690129995346,
      "learning_rate": 3.1016782746782816e-06,
      "loss": 0.0119,
      "step": 2113820
    },
    {
      "epoch": 3.4593455221486877,
      "grad_norm": 0.3931153118610382,
      "learning_rate": 3.1016123824647643e-06,
      "loss": 0.0071,
      "step": 2113840
    },
    {
      "epoch": 3.4593782525873413,
      "grad_norm": 0.26313668489456177,
      "learning_rate": 3.101546490251247e-06,
      "loss": 0.0076,
      "step": 2113860
    },
    {
      "epoch": 3.4594109830259945,
      "grad_norm": 0.08063904941082001,
      "learning_rate": 3.10148059803773e-06,
      "loss": 0.0148,
      "step": 2113880
    },
    {
      "epoch": 3.4594437134646476,
      "grad_norm": 0.29733869433403015,
      "learning_rate": 3.101414705824213e-06,
      "loss": 0.0095,
      "step": 2113900
    },
    {
      "epoch": 3.4594764439033012,
      "grad_norm": 0.14209997653961182,
      "learning_rate": 3.1013488136106957e-06,
      "loss": 0.0092,
      "step": 2113920
    },
    {
      "epoch": 3.4595091743419544,
      "grad_norm": 0.31921759247779846,
      "learning_rate": 3.101282921397179e-06,
      "loss": 0.0119,
      "step": 2113940
    },
    {
      "epoch": 3.459541904780608,
      "grad_norm": 0.5195763111114502,
      "learning_rate": 3.101217029183662e-06,
      "loss": 0.0116,
      "step": 2113960
    },
    {
      "epoch": 3.459574635219261,
      "grad_norm": 0.30950409173965454,
      "learning_rate": 3.1011511369701448e-06,
      "loss": 0.0095,
      "step": 2113980
    },
    {
      "epoch": 3.4596073656579147,
      "grad_norm": 0.4801703989505768,
      "learning_rate": 3.1010852447566275e-06,
      "loss": 0.0113,
      "step": 2114000
    },
    {
      "epoch": 3.459640096096568,
      "grad_norm": 0.20061640441417694,
      "learning_rate": 3.1010193525431103e-06,
      "loss": 0.0156,
      "step": 2114020
    },
    {
      "epoch": 3.459672826535221,
      "grad_norm": 0.250460147857666,
      "learning_rate": 3.100953460329593e-06,
      "loss": 0.0108,
      "step": 2114040
    },
    {
      "epoch": 3.4597055569738746,
      "grad_norm": 0.424643874168396,
      "learning_rate": 3.100887568116076e-06,
      "loss": 0.0124,
      "step": 2114060
    },
    {
      "epoch": 3.4597382874125278,
      "grad_norm": 0.32770660519599915,
      "learning_rate": 3.100821675902559e-06,
      "loss": 0.0082,
      "step": 2114080
    },
    {
      "epoch": 3.4597710178511814,
      "grad_norm": 0.2622101306915283,
      "learning_rate": 3.1007557836890416e-06,
      "loss": 0.0102,
      "step": 2114100
    },
    {
      "epoch": 3.4598037482898345,
      "grad_norm": 0.4561517536640167,
      "learning_rate": 3.1006898914755244e-06,
      "loss": 0.0133,
      "step": 2114120
    },
    {
      "epoch": 3.459836478728488,
      "grad_norm": 0.43735575675964355,
      "learning_rate": 3.1006239992620075e-06,
      "loss": 0.0097,
      "step": 2114140
    },
    {
      "epoch": 3.4598692091671412,
      "grad_norm": 0.22418373823165894,
      "learning_rate": 3.1005581070484903e-06,
      "loss": 0.011,
      "step": 2114160
    },
    {
      "epoch": 3.4599019396057944,
      "grad_norm": 0.28910163044929504,
      "learning_rate": 3.100492214834973e-06,
      "loss": 0.0073,
      "step": 2114180
    },
    {
      "epoch": 3.459934670044448,
      "grad_norm": 0.21443021297454834,
      "learning_rate": 3.1004263226214558e-06,
      "loss": 0.0129,
      "step": 2114200
    },
    {
      "epoch": 3.459967400483101,
      "grad_norm": 0.39515265822410583,
      "learning_rate": 3.100360430407939e-06,
      "loss": 0.0081,
      "step": 2114220
    },
    {
      "epoch": 3.4600001309217547,
      "grad_norm": 0.5902297496795654,
      "learning_rate": 3.1002945381944217e-06,
      "loss": 0.0105,
      "step": 2114240
    },
    {
      "epoch": 3.460032861360408,
      "grad_norm": 0.18803104758262634,
      "learning_rate": 3.1002286459809044e-06,
      "loss": 0.0139,
      "step": 2114260
    },
    {
      "epoch": 3.4600655917990615,
      "grad_norm": 0.34871065616607666,
      "learning_rate": 3.100162753767387e-06,
      "loss": 0.0092,
      "step": 2114280
    },
    {
      "epoch": 3.4600983222377146,
      "grad_norm": 0.17754718661308289,
      "learning_rate": 3.1000968615538707e-06,
      "loss": 0.0135,
      "step": 2114300
    },
    {
      "epoch": 3.460131052676368,
      "grad_norm": 0.44074851274490356,
      "learning_rate": 3.1000309693403535e-06,
      "loss": 0.0103,
      "step": 2114320
    },
    {
      "epoch": 3.4601637831150214,
      "grad_norm": 0.21165737509727478,
      "learning_rate": 3.0999650771268362e-06,
      "loss": 0.0103,
      "step": 2114340
    },
    {
      "epoch": 3.4601965135536745,
      "grad_norm": 0.1264302134513855,
      "learning_rate": 3.0998991849133194e-06,
      "loss": 0.0116,
      "step": 2114360
    },
    {
      "epoch": 3.460229243992328,
      "grad_norm": 0.5184792876243591,
      "learning_rate": 3.099833292699802e-06,
      "loss": 0.0122,
      "step": 2114380
    },
    {
      "epoch": 3.4602619744309813,
      "grad_norm": 0.2612963914871216,
      "learning_rate": 3.099767400486285e-06,
      "loss": 0.0109,
      "step": 2114400
    },
    {
      "epoch": 3.460294704869635,
      "grad_norm": 0.2677215337753296,
      "learning_rate": 3.0997015082727676e-06,
      "loss": 0.0128,
      "step": 2114420
    },
    {
      "epoch": 3.460327435308288,
      "grad_norm": 0.3393637537956238,
      "learning_rate": 3.0996356160592508e-06,
      "loss": 0.0129,
      "step": 2114440
    },
    {
      "epoch": 3.460360165746941,
      "grad_norm": 0.766650378704071,
      "learning_rate": 3.0995697238457335e-06,
      "loss": 0.0158,
      "step": 2114460
    },
    {
      "epoch": 3.4603928961855948,
      "grad_norm": 0.08046700805425644,
      "learning_rate": 3.0995038316322162e-06,
      "loss": 0.0074,
      "step": 2114480
    },
    {
      "epoch": 3.460425626624248,
      "grad_norm": 0.7677397131919861,
      "learning_rate": 3.099437939418699e-06,
      "loss": 0.0091,
      "step": 2114500
    },
    {
      "epoch": 3.4604583570629015,
      "grad_norm": 0.1846984177827835,
      "learning_rate": 3.0993720472051817e-06,
      "loss": 0.0072,
      "step": 2114520
    },
    {
      "epoch": 3.4604910875015547,
      "grad_norm": 0.4099639654159546,
      "learning_rate": 3.099306154991665e-06,
      "loss": 0.0135,
      "step": 2114540
    },
    {
      "epoch": 3.4605238179402082,
      "grad_norm": 0.2996549904346466,
      "learning_rate": 3.0992402627781476e-06,
      "loss": 0.0116,
      "step": 2114560
    },
    {
      "epoch": 3.4605565483788614,
      "grad_norm": 0.1678488701581955,
      "learning_rate": 3.0991743705646304e-06,
      "loss": 0.0113,
      "step": 2114580
    },
    {
      "epoch": 3.4605892788175145,
      "grad_norm": 0.48722192645072937,
      "learning_rate": 3.099108478351113e-06,
      "loss": 0.0103,
      "step": 2114600
    },
    {
      "epoch": 3.460622009256168,
      "grad_norm": 0.8419995903968811,
      "learning_rate": 3.0990425861375963e-06,
      "loss": 0.0116,
      "step": 2114620
    },
    {
      "epoch": 3.4606547396948213,
      "grad_norm": 0.19722002744674683,
      "learning_rate": 3.0989766939240794e-06,
      "loss": 0.009,
      "step": 2114640
    },
    {
      "epoch": 3.460687470133475,
      "grad_norm": 0.08712491393089294,
      "learning_rate": 3.098910801710562e-06,
      "loss": 0.0085,
      "step": 2114660
    },
    {
      "epoch": 3.460720200572128,
      "grad_norm": 0.21576614677906036,
      "learning_rate": 3.0988449094970453e-06,
      "loss": 0.0105,
      "step": 2114680
    },
    {
      "epoch": 3.4607529310107816,
      "grad_norm": 0.10490063577890396,
      "learning_rate": 3.098779017283528e-06,
      "loss": 0.01,
      "step": 2114700
    },
    {
      "epoch": 3.460785661449435,
      "grad_norm": 0.09002304077148438,
      "learning_rate": 3.098713125070011e-06,
      "loss": 0.0107,
      "step": 2114720
    },
    {
      "epoch": 3.460818391888088,
      "grad_norm": 0.15250104665756226,
      "learning_rate": 3.0986472328564936e-06,
      "loss": 0.0102,
      "step": 2114740
    },
    {
      "epoch": 3.4608511223267415,
      "grad_norm": 0.28588467836380005,
      "learning_rate": 3.0985813406429767e-06,
      "loss": 0.0145,
      "step": 2114760
    },
    {
      "epoch": 3.4608838527653947,
      "grad_norm": 0.23062178492546082,
      "learning_rate": 3.0985154484294595e-06,
      "loss": 0.0097,
      "step": 2114780
    },
    {
      "epoch": 3.4609165832040483,
      "grad_norm": 0.10910024493932724,
      "learning_rate": 3.098449556215942e-06,
      "loss": 0.0127,
      "step": 2114800
    },
    {
      "epoch": 3.4609493136427014,
      "grad_norm": 0.7131751179695129,
      "learning_rate": 3.098383664002425e-06,
      "loss": 0.0151,
      "step": 2114820
    },
    {
      "epoch": 3.460982044081355,
      "grad_norm": 0.13832907378673553,
      "learning_rate": 3.098317771788908e-06,
      "loss": 0.0111,
      "step": 2114840
    },
    {
      "epoch": 3.461014774520008,
      "grad_norm": 0.156517893075943,
      "learning_rate": 3.098251879575391e-06,
      "loss": 0.0128,
      "step": 2114860
    },
    {
      "epoch": 3.4610475049586613,
      "grad_norm": 0.3519940972328186,
      "learning_rate": 3.0981859873618736e-06,
      "loss": 0.01,
      "step": 2114880
    },
    {
      "epoch": 3.461080235397315,
      "grad_norm": 0.32664334774017334,
      "learning_rate": 3.0981200951483563e-06,
      "loss": 0.0089,
      "step": 2114900
    },
    {
      "epoch": 3.461112965835968,
      "grad_norm": 0.18436317145824432,
      "learning_rate": 3.0980542029348395e-06,
      "loss": 0.0114,
      "step": 2114920
    },
    {
      "epoch": 3.4611456962746217,
      "grad_norm": 0.1197739839553833,
      "learning_rate": 3.0979883107213222e-06,
      "loss": 0.0184,
      "step": 2114940
    },
    {
      "epoch": 3.461178426713275,
      "grad_norm": 0.16592121124267578,
      "learning_rate": 3.097922418507805e-06,
      "loss": 0.0064,
      "step": 2114960
    },
    {
      "epoch": 3.4612111571519284,
      "grad_norm": 0.4783953130245209,
      "learning_rate": 3.0978565262942877e-06,
      "loss": 0.0097,
      "step": 2114980
    },
    {
      "epoch": 3.4612438875905815,
      "grad_norm": 0.2321143001317978,
      "learning_rate": 3.0977906340807713e-06,
      "loss": 0.0158,
      "step": 2115000
    },
    {
      "epoch": 3.4612766180292347,
      "grad_norm": 0.5163153409957886,
      "learning_rate": 3.097724741867254e-06,
      "loss": 0.0111,
      "step": 2115020
    },
    {
      "epoch": 3.4613093484678883,
      "grad_norm": 0.10442081838846207,
      "learning_rate": 3.0976588496537368e-06,
      "loss": 0.0095,
      "step": 2115040
    },
    {
      "epoch": 3.4613420789065414,
      "grad_norm": 2.1610662937164307,
      "learning_rate": 3.0975929574402195e-06,
      "loss": 0.0184,
      "step": 2115060
    },
    {
      "epoch": 3.461374809345195,
      "grad_norm": 0.232450470328331,
      "learning_rate": 3.0975270652267027e-06,
      "loss": 0.0152,
      "step": 2115080
    },
    {
      "epoch": 3.461407539783848,
      "grad_norm": 0.07407447695732117,
      "learning_rate": 3.0974611730131854e-06,
      "loss": 0.0096,
      "step": 2115100
    },
    {
      "epoch": 3.4614402702225013,
      "grad_norm": 0.3839620053768158,
      "learning_rate": 3.097395280799668e-06,
      "loss": 0.0138,
      "step": 2115120
    },
    {
      "epoch": 3.461473000661155,
      "grad_norm": 0.23328062891960144,
      "learning_rate": 3.097329388586151e-06,
      "loss": 0.007,
      "step": 2115140
    },
    {
      "epoch": 3.461505731099808,
      "grad_norm": 0.4152524471282959,
      "learning_rate": 3.097263496372634e-06,
      "loss": 0.0144,
      "step": 2115160
    },
    {
      "epoch": 3.4615384615384617,
      "grad_norm": 0.17895135283470154,
      "learning_rate": 3.097197604159117e-06,
      "loss": 0.0077,
      "step": 2115180
    },
    {
      "epoch": 3.461571191977115,
      "grad_norm": 0.16639432311058044,
      "learning_rate": 3.0971317119455995e-06,
      "loss": 0.0095,
      "step": 2115200
    },
    {
      "epoch": 3.461603922415768,
      "grad_norm": 0.43566229939460754,
      "learning_rate": 3.0970658197320823e-06,
      "loss": 0.0102,
      "step": 2115220
    },
    {
      "epoch": 3.4616366528544216,
      "grad_norm": 0.2826394736766815,
      "learning_rate": 3.0969999275185654e-06,
      "loss": 0.015,
      "step": 2115240
    },
    {
      "epoch": 3.4616693832930747,
      "grad_norm": 0.11894702166318893,
      "learning_rate": 3.096934035305048e-06,
      "loss": 0.0091,
      "step": 2115260
    },
    {
      "epoch": 3.4617021137317283,
      "grad_norm": 0.7324851751327515,
      "learning_rate": 3.096868143091531e-06,
      "loss": 0.0074,
      "step": 2115280
    },
    {
      "epoch": 3.4617348441703815,
      "grad_norm": 0.15960605442523956,
      "learning_rate": 3.0968022508780137e-06,
      "loss": 0.0098,
      "step": 2115300
    },
    {
      "epoch": 3.461767574609035,
      "grad_norm": 0.504845380783081,
      "learning_rate": 3.096736358664497e-06,
      "loss": 0.0141,
      "step": 2115320
    },
    {
      "epoch": 3.461800305047688,
      "grad_norm": 0.6449677348136902,
      "learning_rate": 3.09667046645098e-06,
      "loss": 0.0093,
      "step": 2115340
    },
    {
      "epoch": 3.4618330354863414,
      "grad_norm": 0.07212807983160019,
      "learning_rate": 3.0966045742374627e-06,
      "loss": 0.0105,
      "step": 2115360
    },
    {
      "epoch": 3.461865765924995,
      "grad_norm": 0.21900111436843872,
      "learning_rate": 3.096538682023946e-06,
      "loss": 0.0135,
      "step": 2115380
    },
    {
      "epoch": 3.461898496363648,
      "grad_norm": 0.08288823068141937,
      "learning_rate": 3.0964727898104286e-06,
      "loss": 0.0157,
      "step": 2115400
    },
    {
      "epoch": 3.4619312268023017,
      "grad_norm": 0.09896023571491241,
      "learning_rate": 3.0964068975969114e-06,
      "loss": 0.0071,
      "step": 2115420
    },
    {
      "epoch": 3.461963957240955,
      "grad_norm": 0.2971228361129761,
      "learning_rate": 3.096341005383394e-06,
      "loss": 0.0125,
      "step": 2115440
    },
    {
      "epoch": 3.4619966876796084,
      "grad_norm": 0.20229847729206085,
      "learning_rate": 3.0962751131698773e-06,
      "loss": 0.0115,
      "step": 2115460
    },
    {
      "epoch": 3.4620294181182616,
      "grad_norm": 0.4854162633419037,
      "learning_rate": 3.09620922095636e-06,
      "loss": 0.0097,
      "step": 2115480
    },
    {
      "epoch": 3.4620621485569147,
      "grad_norm": 0.10011670738458633,
      "learning_rate": 3.0961433287428427e-06,
      "loss": 0.0076,
      "step": 2115500
    },
    {
      "epoch": 3.4620948789955683,
      "grad_norm": 0.6473625898361206,
      "learning_rate": 3.0960774365293255e-06,
      "loss": 0.0137,
      "step": 2115520
    },
    {
      "epoch": 3.4621276094342215,
      "grad_norm": 0.31477516889572144,
      "learning_rate": 3.0960115443158082e-06,
      "loss": 0.0106,
      "step": 2115540
    },
    {
      "epoch": 3.462160339872875,
      "grad_norm": 0.07374735176563263,
      "learning_rate": 3.0959456521022914e-06,
      "loss": 0.0131,
      "step": 2115560
    },
    {
      "epoch": 3.4621930703115282,
      "grad_norm": 0.2604213356971741,
      "learning_rate": 3.095879759888774e-06,
      "loss": 0.014,
      "step": 2115580
    },
    {
      "epoch": 3.462225800750182,
      "grad_norm": 0.22875499725341797,
      "learning_rate": 3.095813867675257e-06,
      "loss": 0.0071,
      "step": 2115600
    },
    {
      "epoch": 3.462258531188835,
      "grad_norm": 1.094683051109314,
      "learning_rate": 3.0957479754617396e-06,
      "loss": 0.0152,
      "step": 2115620
    },
    {
      "epoch": 3.462291261627488,
      "grad_norm": 0.19998489320278168,
      "learning_rate": 3.0956820832482228e-06,
      "loss": 0.0101,
      "step": 2115640
    },
    {
      "epoch": 3.4623239920661417,
      "grad_norm": 0.14294150471687317,
      "learning_rate": 3.0956161910347055e-06,
      "loss": 0.013,
      "step": 2115660
    },
    {
      "epoch": 3.462356722504795,
      "grad_norm": 0.15176114439964294,
      "learning_rate": 3.0955502988211882e-06,
      "loss": 0.0095,
      "step": 2115680
    },
    {
      "epoch": 3.4623894529434485,
      "grad_norm": 0.18534483015537262,
      "learning_rate": 3.095484406607672e-06,
      "loss": 0.0129,
      "step": 2115700
    },
    {
      "epoch": 3.4624221833821016,
      "grad_norm": 0.2134840041399002,
      "learning_rate": 3.0954185143941546e-06,
      "loss": 0.0123,
      "step": 2115720
    },
    {
      "epoch": 3.462454913820755,
      "grad_norm": 0.32959747314453125,
      "learning_rate": 3.0953526221806373e-06,
      "loss": 0.01,
      "step": 2115740
    },
    {
      "epoch": 3.4624876442594084,
      "grad_norm": 0.4636470079421997,
      "learning_rate": 3.09528672996712e-06,
      "loss": 0.0085,
      "step": 2115760
    },
    {
      "epoch": 3.4625203746980615,
      "grad_norm": 0.2860534191131592,
      "learning_rate": 3.0952208377536032e-06,
      "loss": 0.0137,
      "step": 2115780
    },
    {
      "epoch": 3.462553105136715,
      "grad_norm": 0.5259095430374146,
      "learning_rate": 3.095154945540086e-06,
      "loss": 0.0153,
      "step": 2115800
    },
    {
      "epoch": 3.4625858355753683,
      "grad_norm": 0.38862359523773193,
      "learning_rate": 3.0950890533265687e-06,
      "loss": 0.0094,
      "step": 2115820
    },
    {
      "epoch": 3.462618566014022,
      "grad_norm": 0.23812921345233917,
      "learning_rate": 3.0950231611130514e-06,
      "loss": 0.0094,
      "step": 2115840
    },
    {
      "epoch": 3.462651296452675,
      "grad_norm": 0.10461725294589996,
      "learning_rate": 3.0949572688995346e-06,
      "loss": 0.0089,
      "step": 2115860
    },
    {
      "epoch": 3.4626840268913286,
      "grad_norm": 0.06636849045753479,
      "learning_rate": 3.0948913766860173e-06,
      "loss": 0.0155,
      "step": 2115880
    },
    {
      "epoch": 3.4627167573299817,
      "grad_norm": 0.12887923419475555,
      "learning_rate": 3.0948254844725e-06,
      "loss": 0.0094,
      "step": 2115900
    },
    {
      "epoch": 3.462749487768635,
      "grad_norm": 0.19053222239017487,
      "learning_rate": 3.094759592258983e-06,
      "loss": 0.0077,
      "step": 2115920
    },
    {
      "epoch": 3.4627822182072885,
      "grad_norm": 0.2567901611328125,
      "learning_rate": 3.0946937000454656e-06,
      "loss": 0.0132,
      "step": 2115940
    },
    {
      "epoch": 3.4628149486459416,
      "grad_norm": 0.24772609770298004,
      "learning_rate": 3.0946278078319487e-06,
      "loss": 0.0114,
      "step": 2115960
    },
    {
      "epoch": 3.4628476790845952,
      "grad_norm": 0.2811526358127594,
      "learning_rate": 3.0945619156184315e-06,
      "loss": 0.0119,
      "step": 2115980
    },
    {
      "epoch": 3.4628804095232484,
      "grad_norm": 0.4474041759967804,
      "learning_rate": 3.094496023404914e-06,
      "loss": 0.0082,
      "step": 2116000
    },
    {
      "epoch": 3.462913139961902,
      "grad_norm": 0.4030313491821289,
      "learning_rate": 3.094430131191397e-06,
      "loss": 0.009,
      "step": 2116020
    },
    {
      "epoch": 3.462945870400555,
      "grad_norm": 0.45489463210105896,
      "learning_rate": 3.09436423897788e-06,
      "loss": 0.0147,
      "step": 2116040
    },
    {
      "epoch": 3.4629786008392083,
      "grad_norm": 0.12836894392967224,
      "learning_rate": 3.0942983467643633e-06,
      "loss": 0.0104,
      "step": 2116060
    },
    {
      "epoch": 3.463011331277862,
      "grad_norm": 0.22284933924674988,
      "learning_rate": 3.094232454550846e-06,
      "loss": 0.0141,
      "step": 2116080
    },
    {
      "epoch": 3.463044061716515,
      "grad_norm": 0.12383163720369339,
      "learning_rate": 3.094166562337329e-06,
      "loss": 0.0125,
      "step": 2116100
    },
    {
      "epoch": 3.4630767921551686,
      "grad_norm": 0.18623974919319153,
      "learning_rate": 3.094100670123812e-06,
      "loss": 0.0127,
      "step": 2116120
    },
    {
      "epoch": 3.4631095225938218,
      "grad_norm": 0.3508441150188446,
      "learning_rate": 3.0940347779102947e-06,
      "loss": 0.0091,
      "step": 2116140
    },
    {
      "epoch": 3.4631422530324754,
      "grad_norm": 0.1802215874195099,
      "learning_rate": 3.0939688856967774e-06,
      "loss": 0.0069,
      "step": 2116160
    },
    {
      "epoch": 3.4631749834711285,
      "grad_norm": 0.15035711228847504,
      "learning_rate": 3.0939029934832606e-06,
      "loss": 0.0134,
      "step": 2116180
    },
    {
      "epoch": 3.4632077139097817,
      "grad_norm": 0.5209704041481018,
      "learning_rate": 3.0938371012697433e-06,
      "loss": 0.0124,
      "step": 2116200
    },
    {
      "epoch": 3.4632404443484353,
      "grad_norm": 0.35625243186950684,
      "learning_rate": 3.093771209056226e-06,
      "loss": 0.01,
      "step": 2116220
    },
    {
      "epoch": 3.4632731747870884,
      "grad_norm": 0.7165815830230713,
      "learning_rate": 3.0937053168427088e-06,
      "loss": 0.0133,
      "step": 2116240
    },
    {
      "epoch": 3.463305905225742,
      "grad_norm": 0.18498411774635315,
      "learning_rate": 3.093639424629192e-06,
      "loss": 0.008,
      "step": 2116260
    },
    {
      "epoch": 3.463338635664395,
      "grad_norm": 0.45896396040916443,
      "learning_rate": 3.0935735324156747e-06,
      "loss": 0.0103,
      "step": 2116280
    },
    {
      "epoch": 3.4633713661030487,
      "grad_norm": 0.14142507314682007,
      "learning_rate": 3.0935076402021574e-06,
      "loss": 0.0079,
      "step": 2116300
    },
    {
      "epoch": 3.463404096541702,
      "grad_norm": 0.5837845206260681,
      "learning_rate": 3.09344174798864e-06,
      "loss": 0.0159,
      "step": 2116320
    },
    {
      "epoch": 3.463436826980355,
      "grad_norm": 0.16575215756893158,
      "learning_rate": 3.0933758557751233e-06,
      "loss": 0.013,
      "step": 2116340
    },
    {
      "epoch": 3.4634695574190086,
      "grad_norm": 0.18465322256088257,
      "learning_rate": 3.093309963561606e-06,
      "loss": 0.0098,
      "step": 2116360
    },
    {
      "epoch": 3.463502287857662,
      "grad_norm": 0.15409183502197266,
      "learning_rate": 3.093244071348089e-06,
      "loss": 0.0079,
      "step": 2116380
    },
    {
      "epoch": 3.4635350182963154,
      "grad_norm": 0.2382049262523651,
      "learning_rate": 3.0931781791345724e-06,
      "loss": 0.0098,
      "step": 2116400
    },
    {
      "epoch": 3.4635677487349685,
      "grad_norm": 0.4029623568058014,
      "learning_rate": 3.093112286921055e-06,
      "loss": 0.0102,
      "step": 2116420
    },
    {
      "epoch": 3.463600479173622,
      "grad_norm": 0.4976159930229187,
      "learning_rate": 3.093046394707538e-06,
      "loss": 0.0123,
      "step": 2116440
    },
    {
      "epoch": 3.4636332096122753,
      "grad_norm": 0.33317941427230835,
      "learning_rate": 3.0929805024940206e-06,
      "loss": 0.0133,
      "step": 2116460
    },
    {
      "epoch": 3.4636659400509284,
      "grad_norm": 1.9819482564926147,
      "learning_rate": 3.0929146102805033e-06,
      "loss": 0.018,
      "step": 2116480
    },
    {
      "epoch": 3.463698670489582,
      "grad_norm": 0.5210987329483032,
      "learning_rate": 3.0928487180669865e-06,
      "loss": 0.0092,
      "step": 2116500
    },
    {
      "epoch": 3.463731400928235,
      "grad_norm": 0.43355849385261536,
      "learning_rate": 3.0927828258534692e-06,
      "loss": 0.0124,
      "step": 2116520
    },
    {
      "epoch": 3.4637641313668888,
      "grad_norm": 0.34325477480888367,
      "learning_rate": 3.092716933639952e-06,
      "loss": 0.013,
      "step": 2116540
    },
    {
      "epoch": 3.463796861805542,
      "grad_norm": 0.2978518605232239,
      "learning_rate": 3.0926510414264347e-06,
      "loss": 0.0095,
      "step": 2116560
    },
    {
      "epoch": 3.463829592244195,
      "grad_norm": 0.2597937285900116,
      "learning_rate": 3.092585149212918e-06,
      "loss": 0.0108,
      "step": 2116580
    },
    {
      "epoch": 3.4638623226828487,
      "grad_norm": 0.2324841320514679,
      "learning_rate": 3.0925192569994006e-06,
      "loss": 0.0107,
      "step": 2116600
    },
    {
      "epoch": 3.463895053121502,
      "grad_norm": 3.3888208866119385,
      "learning_rate": 3.0924533647858834e-06,
      "loss": 0.0129,
      "step": 2116620
    },
    {
      "epoch": 3.4639277835601554,
      "grad_norm": 0.08492934703826904,
      "learning_rate": 3.092387472572366e-06,
      "loss": 0.0116,
      "step": 2116640
    },
    {
      "epoch": 3.4639605139988086,
      "grad_norm": 0.40043219923973083,
      "learning_rate": 3.0923215803588493e-06,
      "loss": 0.0082,
      "step": 2116660
    },
    {
      "epoch": 3.4639932444374617,
      "grad_norm": 0.4338838458061218,
      "learning_rate": 3.092255688145332e-06,
      "loss": 0.0192,
      "step": 2116680
    },
    {
      "epoch": 3.4640259748761153,
      "grad_norm": 0.6768133640289307,
      "learning_rate": 3.0921897959318148e-06,
      "loss": 0.0187,
      "step": 2116700
    },
    {
      "epoch": 3.4640587053147684,
      "grad_norm": 0.20629854500293732,
      "learning_rate": 3.0921239037182975e-06,
      "loss": 0.0114,
      "step": 2116720
    },
    {
      "epoch": 3.464091435753422,
      "grad_norm": 0.23105163872241974,
      "learning_rate": 3.0920580115047807e-06,
      "loss": 0.0087,
      "step": 2116740
    },
    {
      "epoch": 3.464124166192075,
      "grad_norm": 0.27333855628967285,
      "learning_rate": 3.091992119291264e-06,
      "loss": 0.0095,
      "step": 2116760
    },
    {
      "epoch": 3.464156896630729,
      "grad_norm": 0.4666365087032318,
      "learning_rate": 3.0919262270777466e-06,
      "loss": 0.015,
      "step": 2116780
    },
    {
      "epoch": 3.464189627069382,
      "grad_norm": 0.26563510298728943,
      "learning_rate": 3.0918603348642297e-06,
      "loss": 0.008,
      "step": 2116800
    },
    {
      "epoch": 3.464222357508035,
      "grad_norm": 0.24345508217811584,
      "learning_rate": 3.0917944426507125e-06,
      "loss": 0.0117,
      "step": 2116820
    },
    {
      "epoch": 3.4642550879466887,
      "grad_norm": 0.1498968005180359,
      "learning_rate": 3.091728550437195e-06,
      "loss": 0.0126,
      "step": 2116840
    },
    {
      "epoch": 3.464287818385342,
      "grad_norm": 0.24956460297107697,
      "learning_rate": 3.091662658223678e-06,
      "loss": 0.0132,
      "step": 2116860
    },
    {
      "epoch": 3.4643205488239954,
      "grad_norm": 0.2416766732931137,
      "learning_rate": 3.091596766010161e-06,
      "loss": 0.0123,
      "step": 2116880
    },
    {
      "epoch": 3.4643532792626486,
      "grad_norm": 0.09869753569364548,
      "learning_rate": 3.091530873796644e-06,
      "loss": 0.0146,
      "step": 2116900
    },
    {
      "epoch": 3.464386009701302,
      "grad_norm": 0.31577521562576294,
      "learning_rate": 3.0914649815831266e-06,
      "loss": 0.0084,
      "step": 2116920
    },
    {
      "epoch": 3.4644187401399553,
      "grad_norm": 0.13646021485328674,
      "learning_rate": 3.0913990893696093e-06,
      "loss": 0.0098,
      "step": 2116940
    },
    {
      "epoch": 3.4644514705786085,
      "grad_norm": 0.2593764662742615,
      "learning_rate": 3.091333197156092e-06,
      "loss": 0.0128,
      "step": 2116960
    },
    {
      "epoch": 3.464484201017262,
      "grad_norm": 0.21116016805171967,
      "learning_rate": 3.0912673049425752e-06,
      "loss": 0.0095,
      "step": 2116980
    },
    {
      "epoch": 3.464516931455915,
      "grad_norm": 0.3283728063106537,
      "learning_rate": 3.091201412729058e-06,
      "loss": 0.0092,
      "step": 2117000
    },
    {
      "epoch": 3.464549661894569,
      "grad_norm": 0.2771379351615906,
      "learning_rate": 3.0911355205155407e-06,
      "loss": 0.0119,
      "step": 2117020
    },
    {
      "epoch": 3.464582392333222,
      "grad_norm": 0.4363957941532135,
      "learning_rate": 3.0910696283020234e-06,
      "loss": 0.0118,
      "step": 2117040
    },
    {
      "epoch": 3.4646151227718756,
      "grad_norm": 0.09871689975261688,
      "learning_rate": 3.0910037360885066e-06,
      "loss": 0.014,
      "step": 2117060
    },
    {
      "epoch": 3.4646478532105287,
      "grad_norm": 0.08585502207279205,
      "learning_rate": 3.0909378438749893e-06,
      "loss": 0.0106,
      "step": 2117080
    },
    {
      "epoch": 3.464680583649182,
      "grad_norm": 0.1548057496547699,
      "learning_rate": 3.0908719516614725e-06,
      "loss": 0.0122,
      "step": 2117100
    },
    {
      "epoch": 3.4647133140878354,
      "grad_norm": 0.253641813993454,
      "learning_rate": 3.0908060594479557e-06,
      "loss": 0.0093,
      "step": 2117120
    },
    {
      "epoch": 3.4647460445264886,
      "grad_norm": 0.45523345470428467,
      "learning_rate": 3.0907401672344384e-06,
      "loss": 0.0103,
      "step": 2117140
    },
    {
      "epoch": 3.464778774965142,
      "grad_norm": 0.3193569481372833,
      "learning_rate": 3.090674275020921e-06,
      "loss": 0.0076,
      "step": 2117160
    },
    {
      "epoch": 3.4648115054037953,
      "grad_norm": 0.20281203091144562,
      "learning_rate": 3.090608382807404e-06,
      "loss": 0.009,
      "step": 2117180
    },
    {
      "epoch": 3.464844235842449,
      "grad_norm": 0.827200710773468,
      "learning_rate": 3.090542490593887e-06,
      "loss": 0.0157,
      "step": 2117200
    },
    {
      "epoch": 3.464876966281102,
      "grad_norm": 0.23411069810390472,
      "learning_rate": 3.09047659838037e-06,
      "loss": 0.0134,
      "step": 2117220
    },
    {
      "epoch": 3.4649096967197552,
      "grad_norm": 0.39939114451408386,
      "learning_rate": 3.0904107061668525e-06,
      "loss": 0.0108,
      "step": 2117240
    },
    {
      "epoch": 3.464942427158409,
      "grad_norm": 0.8485994338989258,
      "learning_rate": 3.0903448139533353e-06,
      "loss": 0.0136,
      "step": 2117260
    },
    {
      "epoch": 3.464975157597062,
      "grad_norm": 0.1273401528596878,
      "learning_rate": 3.0902789217398184e-06,
      "loss": 0.0125,
      "step": 2117280
    },
    {
      "epoch": 3.4650078880357156,
      "grad_norm": 0.11378587037324905,
      "learning_rate": 3.090213029526301e-06,
      "loss": 0.0105,
      "step": 2117300
    },
    {
      "epoch": 3.4650406184743687,
      "grad_norm": 0.19698090851306915,
      "learning_rate": 3.090147137312784e-06,
      "loss": 0.0114,
      "step": 2117320
    },
    {
      "epoch": 3.4650733489130223,
      "grad_norm": 0.09511301666498184,
      "learning_rate": 3.0900812450992667e-06,
      "loss": 0.0105,
      "step": 2117340
    },
    {
      "epoch": 3.4651060793516755,
      "grad_norm": 0.7792544364929199,
      "learning_rate": 3.0900153528857494e-06,
      "loss": 0.0145,
      "step": 2117360
    },
    {
      "epoch": 3.4651388097903286,
      "grad_norm": 0.1553371101617813,
      "learning_rate": 3.0899494606722326e-06,
      "loss": 0.0072,
      "step": 2117380
    },
    {
      "epoch": 3.465171540228982,
      "grad_norm": 0.21187202632427216,
      "learning_rate": 3.0898835684587153e-06,
      "loss": 0.0132,
      "step": 2117400
    },
    {
      "epoch": 3.4652042706676354,
      "grad_norm": 0.15312881767749786,
      "learning_rate": 3.089817676245198e-06,
      "loss": 0.0111,
      "step": 2117420
    },
    {
      "epoch": 3.465237001106289,
      "grad_norm": 0.47646456956863403,
      "learning_rate": 3.0897517840316808e-06,
      "loss": 0.0148,
      "step": 2117440
    },
    {
      "epoch": 3.465269731544942,
      "grad_norm": 0.2408357411623001,
      "learning_rate": 3.0896858918181644e-06,
      "loss": 0.0093,
      "step": 2117460
    },
    {
      "epoch": 3.4653024619835957,
      "grad_norm": 0.5604873895645142,
      "learning_rate": 3.089619999604647e-06,
      "loss": 0.0115,
      "step": 2117480
    },
    {
      "epoch": 3.465335192422249,
      "grad_norm": 0.31039637327194214,
      "learning_rate": 3.08955410739113e-06,
      "loss": 0.0072,
      "step": 2117500
    },
    {
      "epoch": 3.465367922860902,
      "grad_norm": 0.20703458786010742,
      "learning_rate": 3.089488215177613e-06,
      "loss": 0.0106,
      "step": 2117520
    },
    {
      "epoch": 3.4654006532995556,
      "grad_norm": 2.0987865924835205,
      "learning_rate": 3.0894223229640958e-06,
      "loss": 0.0094,
      "step": 2117540
    },
    {
      "epoch": 3.4654333837382088,
      "grad_norm": 0.24303200840950012,
      "learning_rate": 3.0893564307505785e-06,
      "loss": 0.0134,
      "step": 2117560
    },
    {
      "epoch": 3.4654661141768623,
      "grad_norm": 0.22966843843460083,
      "learning_rate": 3.0892905385370612e-06,
      "loss": 0.0156,
      "step": 2117580
    },
    {
      "epoch": 3.4654988446155155,
      "grad_norm": 0.011191761121153831,
      "learning_rate": 3.0892246463235444e-06,
      "loss": 0.0126,
      "step": 2117600
    },
    {
      "epoch": 3.465531575054169,
      "grad_norm": 0.7486616969108582,
      "learning_rate": 3.089158754110027e-06,
      "loss": 0.0104,
      "step": 2117620
    },
    {
      "epoch": 3.4655643054928222,
      "grad_norm": 0.16905902326107025,
      "learning_rate": 3.08909286189651e-06,
      "loss": 0.0127,
      "step": 2117640
    },
    {
      "epoch": 3.4655970359314754,
      "grad_norm": 0.35065385699272156,
      "learning_rate": 3.0890269696829926e-06,
      "loss": 0.0091,
      "step": 2117660
    },
    {
      "epoch": 3.465629766370129,
      "grad_norm": 0.08106208592653275,
      "learning_rate": 3.0889610774694758e-06,
      "loss": 0.0059,
      "step": 2117680
    },
    {
      "epoch": 3.465662496808782,
      "grad_norm": 0.03782450407743454,
      "learning_rate": 3.0888951852559585e-06,
      "loss": 0.0125,
      "step": 2117700
    },
    {
      "epoch": 3.4656952272474357,
      "grad_norm": 0.11817412078380585,
      "learning_rate": 3.0888292930424413e-06,
      "loss": 0.0084,
      "step": 2117720
    },
    {
      "epoch": 3.465727957686089,
      "grad_norm": 1.0682547092437744,
      "learning_rate": 3.088763400828924e-06,
      "loss": 0.0132,
      "step": 2117740
    },
    {
      "epoch": 3.4657606881247425,
      "grad_norm": 0.19791704416275024,
      "learning_rate": 3.088697508615407e-06,
      "loss": 0.0094,
      "step": 2117760
    },
    {
      "epoch": 3.4657934185633956,
      "grad_norm": 0.56840580701828,
      "learning_rate": 3.08863161640189e-06,
      "loss": 0.0108,
      "step": 2117780
    },
    {
      "epoch": 3.4658261490020488,
      "grad_norm": 0.1779690384864807,
      "learning_rate": 3.0885657241883726e-06,
      "loss": 0.0067,
      "step": 2117800
    },
    {
      "epoch": 3.4658588794407024,
      "grad_norm": 0.08694768697023392,
      "learning_rate": 3.0884998319748562e-06,
      "loss": 0.0128,
      "step": 2117820
    },
    {
      "epoch": 3.4658916098793555,
      "grad_norm": 0.1395154446363449,
      "learning_rate": 3.088433939761339e-06,
      "loss": 0.0141,
      "step": 2117840
    },
    {
      "epoch": 3.465924340318009,
      "grad_norm": 0.0769442617893219,
      "learning_rate": 3.0883680475478217e-06,
      "loss": 0.0088,
      "step": 2117860
    },
    {
      "epoch": 3.4659570707566623,
      "grad_norm": 0.09569107741117477,
      "learning_rate": 3.0883021553343044e-06,
      "loss": 0.0124,
      "step": 2117880
    },
    {
      "epoch": 3.465989801195316,
      "grad_norm": 0.14011111855506897,
      "learning_rate": 3.088236263120787e-06,
      "loss": 0.0136,
      "step": 2117900
    },
    {
      "epoch": 3.466022531633969,
      "grad_norm": 0.17926719784736633,
      "learning_rate": 3.0881703709072703e-06,
      "loss": 0.0136,
      "step": 2117920
    },
    {
      "epoch": 3.466055262072622,
      "grad_norm": 0.48561930656433105,
      "learning_rate": 3.088104478693753e-06,
      "loss": 0.0135,
      "step": 2117940
    },
    {
      "epoch": 3.4660879925112758,
      "grad_norm": 0.20191213488578796,
      "learning_rate": 3.088038586480236e-06,
      "loss": 0.0108,
      "step": 2117960
    },
    {
      "epoch": 3.466120722949929,
      "grad_norm": 0.21583740413188934,
      "learning_rate": 3.0879726942667186e-06,
      "loss": 0.0126,
      "step": 2117980
    },
    {
      "epoch": 3.4661534533885825,
      "grad_norm": 0.10440447181463242,
      "learning_rate": 3.0879068020532017e-06,
      "loss": 0.014,
      "step": 2118000
    },
    {
      "epoch": 3.4661861838272356,
      "grad_norm": 0.40175703167915344,
      "learning_rate": 3.0878409098396845e-06,
      "loss": 0.007,
      "step": 2118020
    },
    {
      "epoch": 3.4662189142658892,
      "grad_norm": 0.5750375986099243,
      "learning_rate": 3.087775017626167e-06,
      "loss": 0.0089,
      "step": 2118040
    },
    {
      "epoch": 3.4662516447045424,
      "grad_norm": 0.13061252236366272,
      "learning_rate": 3.08770912541265e-06,
      "loss": 0.0075,
      "step": 2118060
    },
    {
      "epoch": 3.4662843751431955,
      "grad_norm": 0.5095229744911194,
      "learning_rate": 3.087643233199133e-06,
      "loss": 0.0096,
      "step": 2118080
    },
    {
      "epoch": 3.466317105581849,
      "grad_norm": 0.3208800256252289,
      "learning_rate": 3.087577340985616e-06,
      "loss": 0.0115,
      "step": 2118100
    },
    {
      "epoch": 3.4663498360205023,
      "grad_norm": 0.46989765763282776,
      "learning_rate": 3.0875114487720986e-06,
      "loss": 0.0104,
      "step": 2118120
    },
    {
      "epoch": 3.466382566459156,
      "grad_norm": 0.18156881630420685,
      "learning_rate": 3.0874455565585813e-06,
      "loss": 0.0165,
      "step": 2118140
    },
    {
      "epoch": 3.466415296897809,
      "grad_norm": 0.45746248960494995,
      "learning_rate": 3.087379664345065e-06,
      "loss": 0.0086,
      "step": 2118160
    },
    {
      "epoch": 3.466448027336462,
      "grad_norm": 0.27290070056915283,
      "learning_rate": 3.0873137721315477e-06,
      "loss": 0.0155,
      "step": 2118180
    },
    {
      "epoch": 3.4664807577751158,
      "grad_norm": 0.1647641360759735,
      "learning_rate": 3.0872478799180304e-06,
      "loss": 0.0132,
      "step": 2118200
    },
    {
      "epoch": 3.466513488213769,
      "grad_norm": 0.1522553712129593,
      "learning_rate": 3.0871819877045136e-06,
      "loss": 0.0109,
      "step": 2118220
    },
    {
      "epoch": 3.4665462186524225,
      "grad_norm": 0.26775631308555603,
      "learning_rate": 3.0871160954909963e-06,
      "loss": 0.0084,
      "step": 2118240
    },
    {
      "epoch": 3.4665789490910757,
      "grad_norm": 0.17381127178668976,
      "learning_rate": 3.087050203277479e-06,
      "loss": 0.0103,
      "step": 2118260
    },
    {
      "epoch": 3.466611679529729,
      "grad_norm": 0.06945496052503586,
      "learning_rate": 3.0869843110639618e-06,
      "loss": 0.0097,
      "step": 2118280
    },
    {
      "epoch": 3.4666444099683824,
      "grad_norm": 0.5348142385482788,
      "learning_rate": 3.086918418850445e-06,
      "loss": 0.0094,
      "step": 2118300
    },
    {
      "epoch": 3.4666771404070356,
      "grad_norm": 0.1370115876197815,
      "learning_rate": 3.0868525266369277e-06,
      "loss": 0.0108,
      "step": 2118320
    },
    {
      "epoch": 3.466709870845689,
      "grad_norm": 0.08682885766029358,
      "learning_rate": 3.0867866344234104e-06,
      "loss": 0.0121,
      "step": 2118340
    },
    {
      "epoch": 3.4667426012843423,
      "grad_norm": 0.548861026763916,
      "learning_rate": 3.086720742209893e-06,
      "loss": 0.0138,
      "step": 2118360
    },
    {
      "epoch": 3.466775331722996,
      "grad_norm": 0.21226076781749725,
      "learning_rate": 3.086654849996376e-06,
      "loss": 0.0103,
      "step": 2118380
    },
    {
      "epoch": 3.466808062161649,
      "grad_norm": 0.3823178708553314,
      "learning_rate": 3.086588957782859e-06,
      "loss": 0.0128,
      "step": 2118400
    },
    {
      "epoch": 3.466840792600302,
      "grad_norm": 0.5022853016853333,
      "learning_rate": 3.086523065569342e-06,
      "loss": 0.0159,
      "step": 2118420
    },
    {
      "epoch": 3.466873523038956,
      "grad_norm": 0.1651366800069809,
      "learning_rate": 3.0864571733558245e-06,
      "loss": 0.0106,
      "step": 2118440
    },
    {
      "epoch": 3.466906253477609,
      "grad_norm": 0.38921573758125305,
      "learning_rate": 3.0863912811423073e-06,
      "loss": 0.0111,
      "step": 2118460
    },
    {
      "epoch": 3.4669389839162625,
      "grad_norm": 0.3722222149372101,
      "learning_rate": 3.0863253889287904e-06,
      "loss": 0.0141,
      "step": 2118480
    },
    {
      "epoch": 3.4669717143549157,
      "grad_norm": 0.23544374108314514,
      "learning_rate": 3.086259496715273e-06,
      "loss": 0.0128,
      "step": 2118500
    },
    {
      "epoch": 3.4670044447935693,
      "grad_norm": 0.24243395030498505,
      "learning_rate": 3.0861936045017564e-06,
      "loss": 0.0136,
      "step": 2118520
    },
    {
      "epoch": 3.4670371752322224,
      "grad_norm": 0.17430897057056427,
      "learning_rate": 3.0861277122882395e-06,
      "loss": 0.0119,
      "step": 2118540
    },
    {
      "epoch": 3.4670699056708756,
      "grad_norm": 0.4175766110420227,
      "learning_rate": 3.0860618200747223e-06,
      "loss": 0.0099,
      "step": 2118560
    },
    {
      "epoch": 3.467102636109529,
      "grad_norm": 0.4173423945903778,
      "learning_rate": 3.085995927861205e-06,
      "loss": 0.0112,
      "step": 2118580
    },
    {
      "epoch": 3.4671353665481823,
      "grad_norm": 0.31457921862602234,
      "learning_rate": 3.0859300356476877e-06,
      "loss": 0.0105,
      "step": 2118600
    },
    {
      "epoch": 3.467168096986836,
      "grad_norm": 0.47500351071357727,
      "learning_rate": 3.085864143434171e-06,
      "loss": 0.0101,
      "step": 2118620
    },
    {
      "epoch": 3.467200827425489,
      "grad_norm": 0.548468291759491,
      "learning_rate": 3.0857982512206536e-06,
      "loss": 0.0149,
      "step": 2118640
    },
    {
      "epoch": 3.4672335578641427,
      "grad_norm": 0.1426851600408554,
      "learning_rate": 3.0857323590071364e-06,
      "loss": 0.0113,
      "step": 2118660
    },
    {
      "epoch": 3.467266288302796,
      "grad_norm": 0.29302749037742615,
      "learning_rate": 3.085666466793619e-06,
      "loss": 0.0227,
      "step": 2118680
    },
    {
      "epoch": 3.467299018741449,
      "grad_norm": 0.2411796897649765,
      "learning_rate": 3.0856005745801023e-06,
      "loss": 0.0093,
      "step": 2118700
    },
    {
      "epoch": 3.4673317491801026,
      "grad_norm": 0.25942906737327576,
      "learning_rate": 3.085534682366585e-06,
      "loss": 0.0093,
      "step": 2118720
    },
    {
      "epoch": 3.4673644796187557,
      "grad_norm": 0.18224672973155975,
      "learning_rate": 3.0854687901530678e-06,
      "loss": 0.0096,
      "step": 2118740
    },
    {
      "epoch": 3.4673972100574093,
      "grad_norm": 0.13731206953525543,
      "learning_rate": 3.0854028979395505e-06,
      "loss": 0.0167,
      "step": 2118760
    },
    {
      "epoch": 3.4674299404960625,
      "grad_norm": 0.23147475719451904,
      "learning_rate": 3.0853370057260337e-06,
      "loss": 0.0086,
      "step": 2118780
    },
    {
      "epoch": 3.467462670934716,
      "grad_norm": 0.43301355838775635,
      "learning_rate": 3.0852711135125164e-06,
      "loss": 0.0117,
      "step": 2118800
    },
    {
      "epoch": 3.467495401373369,
      "grad_norm": 0.36511364579200745,
      "learning_rate": 3.085205221298999e-06,
      "loss": 0.0107,
      "step": 2118820
    },
    {
      "epoch": 3.4675281318120224,
      "grad_norm": 0.31445208191871643,
      "learning_rate": 3.085139329085482e-06,
      "loss": 0.0088,
      "step": 2118840
    },
    {
      "epoch": 3.467560862250676,
      "grad_norm": 0.5059908628463745,
      "learning_rate": 3.0850734368719655e-06,
      "loss": 0.0092,
      "step": 2118860
    },
    {
      "epoch": 3.467593592689329,
      "grad_norm": 0.38021883368492126,
      "learning_rate": 3.085007544658448e-06,
      "loss": 0.0126,
      "step": 2118880
    },
    {
      "epoch": 3.4676263231279827,
      "grad_norm": 0.44719198346138,
      "learning_rate": 3.084941652444931e-06,
      "loss": 0.0109,
      "step": 2118900
    },
    {
      "epoch": 3.467659053566636,
      "grad_norm": 0.29684334993362427,
      "learning_rate": 3.0848757602314137e-06,
      "loss": 0.0103,
      "step": 2118920
    },
    {
      "epoch": 3.4676917840052894,
      "grad_norm": 0.14172619581222534,
      "learning_rate": 3.084809868017897e-06,
      "loss": 0.0125,
      "step": 2118940
    },
    {
      "epoch": 3.4677245144439426,
      "grad_norm": 0.12651421129703522,
      "learning_rate": 3.0847439758043796e-06,
      "loss": 0.0086,
      "step": 2118960
    },
    {
      "epoch": 3.4677572448825957,
      "grad_norm": 0.23741963505744934,
      "learning_rate": 3.0846780835908623e-06,
      "loss": 0.0107,
      "step": 2118980
    },
    {
      "epoch": 3.4677899753212493,
      "grad_norm": 0.3587663173675537,
      "learning_rate": 3.084612191377345e-06,
      "loss": 0.0137,
      "step": 2119000
    },
    {
      "epoch": 3.4678227057599025,
      "grad_norm": 0.03760235756635666,
      "learning_rate": 3.0845462991638282e-06,
      "loss": 0.0143,
      "step": 2119020
    },
    {
      "epoch": 3.467855436198556,
      "grad_norm": 0.2806031107902527,
      "learning_rate": 3.084480406950311e-06,
      "loss": 0.0073,
      "step": 2119040
    },
    {
      "epoch": 3.4678881666372092,
      "grad_norm": 0.4175345301628113,
      "learning_rate": 3.0844145147367937e-06,
      "loss": 0.0096,
      "step": 2119060
    },
    {
      "epoch": 3.467920897075863,
      "grad_norm": 0.19136540591716766,
      "learning_rate": 3.0843486225232765e-06,
      "loss": 0.0083,
      "step": 2119080
    },
    {
      "epoch": 3.467953627514516,
      "grad_norm": 0.1275562047958374,
      "learning_rate": 3.0842827303097596e-06,
      "loss": 0.0105,
      "step": 2119100
    },
    {
      "epoch": 3.467986357953169,
      "grad_norm": 0.29461756348609924,
      "learning_rate": 3.0842168380962424e-06,
      "loss": 0.0114,
      "step": 2119120
    },
    {
      "epoch": 3.4680190883918227,
      "grad_norm": 0.3775783181190491,
      "learning_rate": 3.084150945882725e-06,
      "loss": 0.0095,
      "step": 2119140
    },
    {
      "epoch": 3.468051818830476,
      "grad_norm": 0.07710953801870346,
      "learning_rate": 3.084085053669208e-06,
      "loss": 0.0113,
      "step": 2119160
    },
    {
      "epoch": 3.4680845492691295,
      "grad_norm": 0.2159174531698227,
      "learning_rate": 3.084019161455691e-06,
      "loss": 0.0122,
      "step": 2119180
    },
    {
      "epoch": 3.4681172797077826,
      "grad_norm": 0.07802145928144455,
      "learning_rate": 3.0839532692421737e-06,
      "loss": 0.0118,
      "step": 2119200
    },
    {
      "epoch": 3.468150010146436,
      "grad_norm": 0.25611957907676697,
      "learning_rate": 3.083887377028657e-06,
      "loss": 0.0092,
      "step": 2119220
    },
    {
      "epoch": 3.4681827405850894,
      "grad_norm": 0.6935860514640808,
      "learning_rate": 3.08382148481514e-06,
      "loss": 0.0121,
      "step": 2119240
    },
    {
      "epoch": 3.4682154710237425,
      "grad_norm": 0.06352770328521729,
      "learning_rate": 3.083755592601623e-06,
      "loss": 0.0052,
      "step": 2119260
    },
    {
      "epoch": 3.468248201462396,
      "grad_norm": 0.14618609845638275,
      "learning_rate": 3.0836897003881055e-06,
      "loss": 0.0095,
      "step": 2119280
    },
    {
      "epoch": 3.4682809319010492,
      "grad_norm": 0.2751461863517761,
      "learning_rate": 3.0836238081745883e-06,
      "loss": 0.0178,
      "step": 2119300
    },
    {
      "epoch": 3.468313662339703,
      "grad_norm": 0.20638476312160492,
      "learning_rate": 3.0835579159610714e-06,
      "loss": 0.0063,
      "step": 2119320
    },
    {
      "epoch": 3.468346392778356,
      "grad_norm": 0.10348548740148544,
      "learning_rate": 3.083492023747554e-06,
      "loss": 0.0089,
      "step": 2119340
    },
    {
      "epoch": 3.4683791232170096,
      "grad_norm": 0.3861137926578522,
      "learning_rate": 3.083426131534037e-06,
      "loss": 0.008,
      "step": 2119360
    },
    {
      "epoch": 3.4684118536556627,
      "grad_norm": 0.061818238347768784,
      "learning_rate": 3.0833602393205197e-06,
      "loss": 0.0103,
      "step": 2119380
    },
    {
      "epoch": 3.468444584094316,
      "grad_norm": 0.4461318850517273,
      "learning_rate": 3.0832943471070024e-06,
      "loss": 0.0091,
      "step": 2119400
    },
    {
      "epoch": 3.4684773145329695,
      "grad_norm": 0.2980973720550537,
      "learning_rate": 3.0832284548934856e-06,
      "loss": 0.0096,
      "step": 2119420
    },
    {
      "epoch": 3.4685100449716226,
      "grad_norm": 0.12391737848520279,
      "learning_rate": 3.0831625626799683e-06,
      "loss": 0.0154,
      "step": 2119440
    },
    {
      "epoch": 3.4685427754102762,
      "grad_norm": 0.27973416447639465,
      "learning_rate": 3.083096670466451e-06,
      "loss": 0.0103,
      "step": 2119460
    },
    {
      "epoch": 3.4685755058489294,
      "grad_norm": 0.0893217995762825,
      "learning_rate": 3.083030778252934e-06,
      "loss": 0.0095,
      "step": 2119480
    },
    {
      "epoch": 3.468608236287583,
      "grad_norm": 0.19799327850341797,
      "learning_rate": 3.082964886039417e-06,
      "loss": 0.0116,
      "step": 2119500
    },
    {
      "epoch": 3.468640966726236,
      "grad_norm": 0.32316577434539795,
      "learning_rate": 3.0828989938258997e-06,
      "loss": 0.0139,
      "step": 2119520
    },
    {
      "epoch": 3.4686736971648893,
      "grad_norm": 0.23692527413368225,
      "learning_rate": 3.0828331016123824e-06,
      "loss": 0.0084,
      "step": 2119540
    },
    {
      "epoch": 3.468706427603543,
      "grad_norm": 0.22188803553581238,
      "learning_rate": 3.082767209398865e-06,
      "loss": 0.01,
      "step": 2119560
    },
    {
      "epoch": 3.468739158042196,
      "grad_norm": 0.311821311712265,
      "learning_rate": 3.0827013171853488e-06,
      "loss": 0.0112,
      "step": 2119580
    },
    {
      "epoch": 3.4687718884808496,
      "grad_norm": 0.8975521922111511,
      "learning_rate": 3.0826354249718315e-06,
      "loss": 0.0101,
      "step": 2119600
    },
    {
      "epoch": 3.4688046189195028,
      "grad_norm": 0.5373473763465881,
      "learning_rate": 3.0825695327583142e-06,
      "loss": 0.01,
      "step": 2119620
    },
    {
      "epoch": 3.468837349358156,
      "grad_norm": 0.14078514277935028,
      "learning_rate": 3.0825036405447974e-06,
      "loss": 0.0126,
      "step": 2119640
    },
    {
      "epoch": 3.4688700797968095,
      "grad_norm": 0.2709173560142517,
      "learning_rate": 3.08243774833128e-06,
      "loss": 0.0121,
      "step": 2119660
    },
    {
      "epoch": 3.4689028102354627,
      "grad_norm": 0.6222528219223022,
      "learning_rate": 3.082371856117763e-06,
      "loss": 0.0124,
      "step": 2119680
    },
    {
      "epoch": 3.4689355406741162,
      "grad_norm": 0.21328076720237732,
      "learning_rate": 3.0823059639042456e-06,
      "loss": 0.0152,
      "step": 2119700
    },
    {
      "epoch": 3.4689682711127694,
      "grad_norm": 0.1602848619222641,
      "learning_rate": 3.0822400716907288e-06,
      "loss": 0.0114,
      "step": 2119720
    },
    {
      "epoch": 3.4690010015514225,
      "grad_norm": 0.34279221296310425,
      "learning_rate": 3.0821741794772115e-06,
      "loss": 0.0115,
      "step": 2119740
    },
    {
      "epoch": 3.469033731990076,
      "grad_norm": 0.253410667181015,
      "learning_rate": 3.0821082872636943e-06,
      "loss": 0.0111,
      "step": 2119760
    },
    {
      "epoch": 3.4690664624287293,
      "grad_norm": 0.3825356364250183,
      "learning_rate": 3.082042395050177e-06,
      "loss": 0.0115,
      "step": 2119780
    },
    {
      "epoch": 3.469099192867383,
      "grad_norm": 0.7418500185012817,
      "learning_rate": 3.0819765028366597e-06,
      "loss": 0.0143,
      "step": 2119800
    },
    {
      "epoch": 3.469131923306036,
      "grad_norm": 0.15952348709106445,
      "learning_rate": 3.081910610623143e-06,
      "loss": 0.0086,
      "step": 2119820
    },
    {
      "epoch": 3.4691646537446896,
      "grad_norm": 0.15478315949440002,
      "learning_rate": 3.0818447184096256e-06,
      "loss": 0.0087,
      "step": 2119840
    },
    {
      "epoch": 3.469197384183343,
      "grad_norm": 0.13687142729759216,
      "learning_rate": 3.0817788261961084e-06,
      "loss": 0.0094,
      "step": 2119860
    },
    {
      "epoch": 3.469230114621996,
      "grad_norm": 0.33821871876716614,
      "learning_rate": 3.081712933982591e-06,
      "loss": 0.0141,
      "step": 2119880
    },
    {
      "epoch": 3.4692628450606495,
      "grad_norm": 0.19703428447246552,
      "learning_rate": 3.0816470417690743e-06,
      "loss": 0.0107,
      "step": 2119900
    },
    {
      "epoch": 3.4692955754993027,
      "grad_norm": 0.22313077747821808,
      "learning_rate": 3.0815811495555575e-06,
      "loss": 0.0103,
      "step": 2119920
    },
    {
      "epoch": 3.4693283059379563,
      "grad_norm": 0.33360621333122253,
      "learning_rate": 3.08151525734204e-06,
      "loss": 0.0121,
      "step": 2119940
    },
    {
      "epoch": 3.4693610363766094,
      "grad_norm": 0.5218497514724731,
      "learning_rate": 3.0814493651285234e-06,
      "loss": 0.009,
      "step": 2119960
    },
    {
      "epoch": 3.469393766815263,
      "grad_norm": 0.2215537279844284,
      "learning_rate": 3.081383472915006e-06,
      "loss": 0.0127,
      "step": 2119980
    },
    {
      "epoch": 3.469426497253916,
      "grad_norm": 1.016764521598816,
      "learning_rate": 3.081317580701489e-06,
      "loss": 0.0121,
      "step": 2120000
    },
    {
      "epoch": 3.4694592276925693,
      "grad_norm": 0.27412891387939453,
      "learning_rate": 3.0812516884879716e-06,
      "loss": 0.0084,
      "step": 2120020
    },
    {
      "epoch": 3.469491958131223,
      "grad_norm": 0.5644497871398926,
      "learning_rate": 3.0811857962744547e-06,
      "loss": 0.0158,
      "step": 2120040
    },
    {
      "epoch": 3.469524688569876,
      "grad_norm": 0.24122005701065063,
      "learning_rate": 3.0811199040609375e-06,
      "loss": 0.0103,
      "step": 2120060
    },
    {
      "epoch": 3.4695574190085297,
      "grad_norm": 0.2926497459411621,
      "learning_rate": 3.0810540118474202e-06,
      "loss": 0.0073,
      "step": 2120080
    },
    {
      "epoch": 3.469590149447183,
      "grad_norm": 0.3284511864185333,
      "learning_rate": 3.080988119633903e-06,
      "loss": 0.0123,
      "step": 2120100
    },
    {
      "epoch": 3.4696228798858364,
      "grad_norm": 0.09027861058712006,
      "learning_rate": 3.080922227420386e-06,
      "loss": 0.0149,
      "step": 2120120
    },
    {
      "epoch": 3.4696556103244895,
      "grad_norm": 0.281698614358902,
      "learning_rate": 3.080856335206869e-06,
      "loss": 0.0076,
      "step": 2120140
    },
    {
      "epoch": 3.4696883407631427,
      "grad_norm": 0.19830267131328583,
      "learning_rate": 3.0807904429933516e-06,
      "loss": 0.0105,
      "step": 2120160
    },
    {
      "epoch": 3.4697210712017963,
      "grad_norm": 0.25667843222618103,
      "learning_rate": 3.0807245507798343e-06,
      "loss": 0.0071,
      "step": 2120180
    },
    {
      "epoch": 3.4697538016404494,
      "grad_norm": 0.3154904246330261,
      "learning_rate": 3.0806586585663175e-06,
      "loss": 0.0142,
      "step": 2120200
    },
    {
      "epoch": 3.469786532079103,
      "grad_norm": 0.2584315538406372,
      "learning_rate": 3.0805927663528002e-06,
      "loss": 0.0117,
      "step": 2120220
    },
    {
      "epoch": 3.469819262517756,
      "grad_norm": 0.13250109553337097,
      "learning_rate": 3.080526874139283e-06,
      "loss": 0.0109,
      "step": 2120240
    },
    {
      "epoch": 3.46985199295641,
      "grad_norm": 0.2522537112236023,
      "learning_rate": 3.0804609819257657e-06,
      "loss": 0.0136,
      "step": 2120260
    },
    {
      "epoch": 3.469884723395063,
      "grad_norm": 0.16681131720542908,
      "learning_rate": 3.0803950897122493e-06,
      "loss": 0.0117,
      "step": 2120280
    },
    {
      "epoch": 3.469917453833716,
      "grad_norm": 0.0927526131272316,
      "learning_rate": 3.080329197498732e-06,
      "loss": 0.0156,
      "step": 2120300
    },
    {
      "epoch": 3.4699501842723697,
      "grad_norm": 0.7420349717140198,
      "learning_rate": 3.0802633052852148e-06,
      "loss": 0.0088,
      "step": 2120320
    },
    {
      "epoch": 3.469982914711023,
      "grad_norm": 0.09248661249876022,
      "learning_rate": 3.0801974130716975e-06,
      "loss": 0.0065,
      "step": 2120340
    },
    {
      "epoch": 3.4700156451496764,
      "grad_norm": 0.5911438465118408,
      "learning_rate": 3.0801315208581807e-06,
      "loss": 0.014,
      "step": 2120360
    },
    {
      "epoch": 3.4700483755883296,
      "grad_norm": 0.09091192483901978,
      "learning_rate": 3.0800656286446634e-06,
      "loss": 0.009,
      "step": 2120380
    },
    {
      "epoch": 3.470081106026983,
      "grad_norm": 0.11229733377695084,
      "learning_rate": 3.079999736431146e-06,
      "loss": 0.0117,
      "step": 2120400
    },
    {
      "epoch": 3.4701138364656363,
      "grad_norm": 0.5698788166046143,
      "learning_rate": 3.079933844217629e-06,
      "loss": 0.0157,
      "step": 2120420
    },
    {
      "epoch": 3.4701465669042895,
      "grad_norm": 0.30359145998954773,
      "learning_rate": 3.079867952004112e-06,
      "loss": 0.008,
      "step": 2120440
    },
    {
      "epoch": 3.470179297342943,
      "grad_norm": 0.11874349415302277,
      "learning_rate": 3.079802059790595e-06,
      "loss": 0.0114,
      "step": 2120460
    },
    {
      "epoch": 3.470212027781596,
      "grad_norm": 0.19052112102508545,
      "learning_rate": 3.0797361675770776e-06,
      "loss": 0.0128,
      "step": 2120480
    },
    {
      "epoch": 3.47024475822025,
      "grad_norm": 0.534085750579834,
      "learning_rate": 3.0796702753635603e-06,
      "loss": 0.0093,
      "step": 2120500
    },
    {
      "epoch": 3.470277488658903,
      "grad_norm": 0.2985387146472931,
      "learning_rate": 3.0796043831500435e-06,
      "loss": 0.01,
      "step": 2120520
    },
    {
      "epoch": 3.4703102190975565,
      "grad_norm": 0.21510900557041168,
      "learning_rate": 3.079538490936526e-06,
      "loss": 0.0061,
      "step": 2120540
    },
    {
      "epoch": 3.4703429495362097,
      "grad_norm": 0.6442261934280396,
      "learning_rate": 3.079472598723009e-06,
      "loss": 0.0155,
      "step": 2120560
    },
    {
      "epoch": 3.470375679974863,
      "grad_norm": 0.12595780193805695,
      "learning_rate": 3.0794067065094917e-06,
      "loss": 0.0112,
      "step": 2120580
    },
    {
      "epoch": 3.4704084104135164,
      "grad_norm": 1.145305871963501,
      "learning_rate": 3.079340814295975e-06,
      "loss": 0.0147,
      "step": 2120600
    },
    {
      "epoch": 3.4704411408521696,
      "grad_norm": 0.07054007798433304,
      "learning_rate": 3.079274922082458e-06,
      "loss": 0.011,
      "step": 2120620
    },
    {
      "epoch": 3.470473871290823,
      "grad_norm": 0.12199168652296066,
      "learning_rate": 3.0792090298689407e-06,
      "loss": 0.0079,
      "step": 2120640
    },
    {
      "epoch": 3.4705066017294763,
      "grad_norm": 0.3383764922618866,
      "learning_rate": 3.079143137655424e-06,
      "loss": 0.0092,
      "step": 2120660
    },
    {
      "epoch": 3.47053933216813,
      "grad_norm": 0.2306700050830841,
      "learning_rate": 3.0790772454419066e-06,
      "loss": 0.0127,
      "step": 2120680
    },
    {
      "epoch": 3.470572062606783,
      "grad_norm": 0.333065003156662,
      "learning_rate": 3.0790113532283894e-06,
      "loss": 0.0076,
      "step": 2120700
    },
    {
      "epoch": 3.4706047930454362,
      "grad_norm": 0.6160685420036316,
      "learning_rate": 3.078945461014872e-06,
      "loss": 0.0122,
      "step": 2120720
    },
    {
      "epoch": 3.47063752348409,
      "grad_norm": 0.37016499042510986,
      "learning_rate": 3.0788795688013553e-06,
      "loss": 0.0118,
      "step": 2120740
    },
    {
      "epoch": 3.470670253922743,
      "grad_norm": 0.29984843730926514,
      "learning_rate": 3.078813676587838e-06,
      "loss": 0.0139,
      "step": 2120760
    },
    {
      "epoch": 3.4707029843613966,
      "grad_norm": 0.05671018362045288,
      "learning_rate": 3.0787477843743208e-06,
      "loss": 0.0141,
      "step": 2120780
    },
    {
      "epoch": 3.4707357148000497,
      "grad_norm": 0.19648410379886627,
      "learning_rate": 3.0786818921608035e-06,
      "loss": 0.0173,
      "step": 2120800
    },
    {
      "epoch": 3.4707684452387033,
      "grad_norm": 0.34794479608535767,
      "learning_rate": 3.0786159999472862e-06,
      "loss": 0.0129,
      "step": 2120820
    },
    {
      "epoch": 3.4708011756773565,
      "grad_norm": 0.049384985119104385,
      "learning_rate": 3.0785501077337694e-06,
      "loss": 0.0116,
      "step": 2120840
    },
    {
      "epoch": 3.4708339061160096,
      "grad_norm": 0.23738525807857513,
      "learning_rate": 3.078484215520252e-06,
      "loss": 0.0104,
      "step": 2120860
    },
    {
      "epoch": 3.470866636554663,
      "grad_norm": 0.060911353677511215,
      "learning_rate": 3.078418323306735e-06,
      "loss": 0.0128,
      "step": 2120880
    },
    {
      "epoch": 3.4708993669933164,
      "grad_norm": 0.26789888739585876,
      "learning_rate": 3.0783524310932176e-06,
      "loss": 0.0119,
      "step": 2120900
    },
    {
      "epoch": 3.47093209743197,
      "grad_norm": 0.23578409850597382,
      "learning_rate": 3.078286538879701e-06,
      "loss": 0.0086,
      "step": 2120920
    },
    {
      "epoch": 3.470964827870623,
      "grad_norm": 0.318949431180954,
      "learning_rate": 3.0782206466661835e-06,
      "loss": 0.0124,
      "step": 2120940
    },
    {
      "epoch": 3.4709975583092767,
      "grad_norm": 0.9879442453384399,
      "learning_rate": 3.0781547544526663e-06,
      "loss": 0.0074,
      "step": 2120960
    },
    {
      "epoch": 3.47103028874793,
      "grad_norm": 0.4034569561481476,
      "learning_rate": 3.07808886223915e-06,
      "loss": 0.0113,
      "step": 2120980
    },
    {
      "epoch": 3.471063019186583,
      "grad_norm": 0.21542491018772125,
      "learning_rate": 3.0780229700256326e-06,
      "loss": 0.0121,
      "step": 2121000
    },
    {
      "epoch": 3.4710957496252366,
      "grad_norm": 0.37323349714279175,
      "learning_rate": 3.0779570778121153e-06,
      "loss": 0.0074,
      "step": 2121020
    },
    {
      "epoch": 3.4711284800638897,
      "grad_norm": 0.44009968638420105,
      "learning_rate": 3.077891185598598e-06,
      "loss": 0.0116,
      "step": 2121040
    },
    {
      "epoch": 3.4711612105025433,
      "grad_norm": 0.1541937291622162,
      "learning_rate": 3.0778252933850812e-06,
      "loss": 0.0068,
      "step": 2121060
    },
    {
      "epoch": 3.4711939409411965,
      "grad_norm": 0.3194747269153595,
      "learning_rate": 3.077759401171564e-06,
      "loss": 0.0114,
      "step": 2121080
    },
    {
      "epoch": 3.47122667137985,
      "grad_norm": 0.19139501452445984,
      "learning_rate": 3.0776935089580467e-06,
      "loss": 0.0097,
      "step": 2121100
    },
    {
      "epoch": 3.4712594018185032,
      "grad_norm": 0.9439898729324341,
      "learning_rate": 3.0776276167445295e-06,
      "loss": 0.0104,
      "step": 2121120
    },
    {
      "epoch": 3.4712921322571564,
      "grad_norm": 0.3462679088115692,
      "learning_rate": 3.0775617245310126e-06,
      "loss": 0.0077,
      "step": 2121140
    },
    {
      "epoch": 3.47132486269581,
      "grad_norm": 0.5699930191040039,
      "learning_rate": 3.0774958323174954e-06,
      "loss": 0.0125,
      "step": 2121160
    },
    {
      "epoch": 3.471357593134463,
      "grad_norm": 0.8915983438491821,
      "learning_rate": 3.077429940103978e-06,
      "loss": 0.0109,
      "step": 2121180
    },
    {
      "epoch": 3.4713903235731167,
      "grad_norm": 0.561371386051178,
      "learning_rate": 3.077364047890461e-06,
      "loss": 0.0139,
      "step": 2121200
    },
    {
      "epoch": 3.47142305401177,
      "grad_norm": 0.4639521539211273,
      "learning_rate": 3.0772981556769436e-06,
      "loss": 0.0114,
      "step": 2121220
    },
    {
      "epoch": 3.471455784450423,
      "grad_norm": 0.1556384265422821,
      "learning_rate": 3.0772322634634267e-06,
      "loss": 0.0112,
      "step": 2121240
    },
    {
      "epoch": 3.4714885148890766,
      "grad_norm": 0.3038959205150604,
      "learning_rate": 3.0771663712499095e-06,
      "loss": 0.0102,
      "step": 2121260
    },
    {
      "epoch": 3.4715212453277298,
      "grad_norm": 0.3695654571056366,
      "learning_rate": 3.0771004790363922e-06,
      "loss": 0.0125,
      "step": 2121280
    },
    {
      "epoch": 3.4715539757663834,
      "grad_norm": 0.2769320607185364,
      "learning_rate": 3.077034586822875e-06,
      "loss": 0.0116,
      "step": 2121300
    },
    {
      "epoch": 3.4715867062050365,
      "grad_norm": 0.6218625903129578,
      "learning_rate": 3.076968694609358e-06,
      "loss": 0.0125,
      "step": 2121320
    },
    {
      "epoch": 3.4716194366436897,
      "grad_norm": 0.11713200807571411,
      "learning_rate": 3.0769028023958413e-06,
      "loss": 0.0119,
      "step": 2121340
    },
    {
      "epoch": 3.4716521670823433,
      "grad_norm": 0.17100563645362854,
      "learning_rate": 3.076836910182324e-06,
      "loss": 0.0118,
      "step": 2121360
    },
    {
      "epoch": 3.4716848975209964,
      "grad_norm": 0.563904345035553,
      "learning_rate": 3.076771017968807e-06,
      "loss": 0.013,
      "step": 2121380
    },
    {
      "epoch": 3.47171762795965,
      "grad_norm": 0.32231202721595764,
      "learning_rate": 3.07670512575529e-06,
      "loss": 0.0142,
      "step": 2121400
    },
    {
      "epoch": 3.471750358398303,
      "grad_norm": 0.18377260863780975,
      "learning_rate": 3.0766392335417727e-06,
      "loss": 0.0083,
      "step": 2121420
    },
    {
      "epoch": 3.4717830888369567,
      "grad_norm": 0.31321942806243896,
      "learning_rate": 3.0765733413282554e-06,
      "loss": 0.0082,
      "step": 2121440
    },
    {
      "epoch": 3.47181581927561,
      "grad_norm": 0.3365790843963623,
      "learning_rate": 3.0765074491147386e-06,
      "loss": 0.0122,
      "step": 2121460
    },
    {
      "epoch": 3.471848549714263,
      "grad_norm": 0.6303203105926514,
      "learning_rate": 3.0764415569012213e-06,
      "loss": 0.0087,
      "step": 2121480
    },
    {
      "epoch": 3.4718812801529166,
      "grad_norm": 0.3815450370311737,
      "learning_rate": 3.076375664687704e-06,
      "loss": 0.0113,
      "step": 2121500
    },
    {
      "epoch": 3.47191401059157,
      "grad_norm": 0.1405668705701828,
      "learning_rate": 3.076309772474187e-06,
      "loss": 0.0105,
      "step": 2121520
    },
    {
      "epoch": 3.4719467410302234,
      "grad_norm": 0.27581992745399475,
      "learning_rate": 3.07624388026067e-06,
      "loss": 0.0111,
      "step": 2121540
    },
    {
      "epoch": 3.4719794714688765,
      "grad_norm": 0.09182646125555038,
      "learning_rate": 3.0761779880471527e-06,
      "loss": 0.0111,
      "step": 2121560
    },
    {
      "epoch": 3.47201220190753,
      "grad_norm": 0.3434322476387024,
      "learning_rate": 3.0761120958336354e-06,
      "loss": 0.0081,
      "step": 2121580
    },
    {
      "epoch": 3.4720449323461833,
      "grad_norm": 0.37831851840019226,
      "learning_rate": 3.076046203620118e-06,
      "loss": 0.0183,
      "step": 2121600
    },
    {
      "epoch": 3.4720776627848364,
      "grad_norm": 0.11540066450834274,
      "learning_rate": 3.0759803114066013e-06,
      "loss": 0.0112,
      "step": 2121620
    },
    {
      "epoch": 3.47211039322349,
      "grad_norm": 0.4878539443016052,
      "learning_rate": 3.075914419193084e-06,
      "loss": 0.0107,
      "step": 2121640
    },
    {
      "epoch": 3.472143123662143,
      "grad_norm": 0.284540593624115,
      "learning_rate": 3.075848526979567e-06,
      "loss": 0.0103,
      "step": 2121660
    },
    {
      "epoch": 3.4721758541007968,
      "grad_norm": 0.20618808269500732,
      "learning_rate": 3.0757826347660504e-06,
      "loss": 0.0074,
      "step": 2121680
    },
    {
      "epoch": 3.47220858453945,
      "grad_norm": 0.21431130170822144,
      "learning_rate": 3.075716742552533e-06,
      "loss": 0.0072,
      "step": 2121700
    },
    {
      "epoch": 3.4722413149781035,
      "grad_norm": 0.63581782579422,
      "learning_rate": 3.075650850339016e-06,
      "loss": 0.0176,
      "step": 2121720
    },
    {
      "epoch": 3.4722740454167567,
      "grad_norm": 0.27085381746292114,
      "learning_rate": 3.0755849581254986e-06,
      "loss": 0.0096,
      "step": 2121740
    },
    {
      "epoch": 3.47230677585541,
      "grad_norm": 0.11705569922924042,
      "learning_rate": 3.0755190659119814e-06,
      "loss": 0.0107,
      "step": 2121760
    },
    {
      "epoch": 3.4723395062940634,
      "grad_norm": 0.12415388971567154,
      "learning_rate": 3.0754531736984645e-06,
      "loss": 0.0092,
      "step": 2121780
    },
    {
      "epoch": 3.4723722367327166,
      "grad_norm": 0.2478645294904709,
      "learning_rate": 3.0753872814849473e-06,
      "loss": 0.0108,
      "step": 2121800
    },
    {
      "epoch": 3.47240496717137,
      "grad_norm": 0.6384916305541992,
      "learning_rate": 3.07532138927143e-06,
      "loss": 0.0097,
      "step": 2121820
    },
    {
      "epoch": 3.4724376976100233,
      "grad_norm": 0.49092334508895874,
      "learning_rate": 3.0752554970579127e-06,
      "loss": 0.0204,
      "step": 2121840
    },
    {
      "epoch": 3.472470428048677,
      "grad_norm": 0.4839078485965729,
      "learning_rate": 3.075189604844396e-06,
      "loss": 0.0087,
      "step": 2121860
    },
    {
      "epoch": 3.47250315848733,
      "grad_norm": 0.0793863832950592,
      "learning_rate": 3.0751237126308787e-06,
      "loss": 0.0119,
      "step": 2121880
    },
    {
      "epoch": 3.472535888925983,
      "grad_norm": 0.8007064461708069,
      "learning_rate": 3.0750578204173614e-06,
      "loss": 0.0172,
      "step": 2121900
    },
    {
      "epoch": 3.472568619364637,
      "grad_norm": 0.3126430809497833,
      "learning_rate": 3.074991928203844e-06,
      "loss": 0.0081,
      "step": 2121920
    },
    {
      "epoch": 3.47260134980329,
      "grad_norm": 0.36125314235687256,
      "learning_rate": 3.0749260359903273e-06,
      "loss": 0.0206,
      "step": 2121940
    },
    {
      "epoch": 3.4726340802419435,
      "grad_norm": 0.18475255370140076,
      "learning_rate": 3.07486014377681e-06,
      "loss": 0.0121,
      "step": 2121960
    },
    {
      "epoch": 3.4726668106805967,
      "grad_norm": 0.19088013470172882,
      "learning_rate": 3.0747942515632928e-06,
      "loss": 0.0076,
      "step": 2121980
    },
    {
      "epoch": 3.4726995411192503,
      "grad_norm": 0.2053879052400589,
      "learning_rate": 3.0747283593497755e-06,
      "loss": 0.0091,
      "step": 2122000
    },
    {
      "epoch": 3.4727322715579034,
      "grad_norm": 0.15189217031002045,
      "learning_rate": 3.0746624671362587e-06,
      "loss": 0.0139,
      "step": 2122020
    },
    {
      "epoch": 3.4727650019965566,
      "grad_norm": 0.2866312265396118,
      "learning_rate": 3.074596574922742e-06,
      "loss": 0.011,
      "step": 2122040
    },
    {
      "epoch": 3.47279773243521,
      "grad_norm": 0.14812426269054413,
      "learning_rate": 3.0745306827092246e-06,
      "loss": 0.0094,
      "step": 2122060
    },
    {
      "epoch": 3.4728304628738633,
      "grad_norm": 0.6293691992759705,
      "learning_rate": 3.0744647904957077e-06,
      "loss": 0.0156,
      "step": 2122080
    },
    {
      "epoch": 3.472863193312517,
      "grad_norm": 0.27759525179862976,
      "learning_rate": 3.0743988982821905e-06,
      "loss": 0.0103,
      "step": 2122100
    },
    {
      "epoch": 3.47289592375117,
      "grad_norm": 0.21668550372123718,
      "learning_rate": 3.0743330060686732e-06,
      "loss": 0.0113,
      "step": 2122120
    },
    {
      "epoch": 3.4729286541898237,
      "grad_norm": 0.16320137679576874,
      "learning_rate": 3.074267113855156e-06,
      "loss": 0.0148,
      "step": 2122140
    },
    {
      "epoch": 3.472961384628477,
      "grad_norm": 0.04678958281874657,
      "learning_rate": 3.074201221641639e-06,
      "loss": 0.0063,
      "step": 2122160
    },
    {
      "epoch": 3.47299411506713,
      "grad_norm": 0.3851824700832367,
      "learning_rate": 3.074135329428122e-06,
      "loss": 0.0176,
      "step": 2122180
    },
    {
      "epoch": 3.4730268455057836,
      "grad_norm": 0.12112364917993546,
      "learning_rate": 3.0740694372146046e-06,
      "loss": 0.0119,
      "step": 2122200
    },
    {
      "epoch": 3.4730595759444367,
      "grad_norm": 0.12472542375326157,
      "learning_rate": 3.0740035450010873e-06,
      "loss": 0.0195,
      "step": 2122220
    },
    {
      "epoch": 3.4730923063830903,
      "grad_norm": 0.12490066885948181,
      "learning_rate": 3.07393765278757e-06,
      "loss": 0.0125,
      "step": 2122240
    },
    {
      "epoch": 3.4731250368217434,
      "grad_norm": 0.1893095225095749,
      "learning_rate": 3.0738717605740532e-06,
      "loss": 0.0075,
      "step": 2122260
    },
    {
      "epoch": 3.473157767260397,
      "grad_norm": 0.1359192579984665,
      "learning_rate": 3.073805868360536e-06,
      "loss": 0.016,
      "step": 2122280
    },
    {
      "epoch": 3.47319049769905,
      "grad_norm": 0.3989664316177368,
      "learning_rate": 3.0737399761470187e-06,
      "loss": 0.0153,
      "step": 2122300
    },
    {
      "epoch": 3.4732232281377033,
      "grad_norm": 0.29071953892707825,
      "learning_rate": 3.0736740839335015e-06,
      "loss": 0.0148,
      "step": 2122320
    },
    {
      "epoch": 3.473255958576357,
      "grad_norm": 0.48408347368240356,
      "learning_rate": 3.0736081917199846e-06,
      "loss": 0.0125,
      "step": 2122340
    },
    {
      "epoch": 3.47328868901501,
      "grad_norm": 0.4868495762348175,
      "learning_rate": 3.0735422995064674e-06,
      "loss": 0.0162,
      "step": 2122360
    },
    {
      "epoch": 3.4733214194536637,
      "grad_norm": 0.14956191182136536,
      "learning_rate": 3.0734764072929505e-06,
      "loss": 0.0125,
      "step": 2122380
    },
    {
      "epoch": 3.473354149892317,
      "grad_norm": 0.2700358033180237,
      "learning_rate": 3.0734105150794337e-06,
      "loss": 0.0102,
      "step": 2122400
    },
    {
      "epoch": 3.4733868803309704,
      "grad_norm": 0.12883403897285461,
      "learning_rate": 3.0733446228659164e-06,
      "loss": 0.0152,
      "step": 2122420
    },
    {
      "epoch": 3.4734196107696236,
      "grad_norm": 0.03506721183657646,
      "learning_rate": 3.073278730652399e-06,
      "loss": 0.0109,
      "step": 2122440
    },
    {
      "epoch": 3.4734523412082767,
      "grad_norm": 0.12686410546302795,
      "learning_rate": 3.073212838438882e-06,
      "loss": 0.0114,
      "step": 2122460
    },
    {
      "epoch": 3.4734850716469303,
      "grad_norm": 0.16379304230213165,
      "learning_rate": 3.073146946225365e-06,
      "loss": 0.0079,
      "step": 2122480
    },
    {
      "epoch": 3.4735178020855835,
      "grad_norm": 0.09662953019142151,
      "learning_rate": 3.073081054011848e-06,
      "loss": 0.0058,
      "step": 2122500
    },
    {
      "epoch": 3.473550532524237,
      "grad_norm": 0.4552435576915741,
      "learning_rate": 3.0730151617983306e-06,
      "loss": 0.011,
      "step": 2122520
    },
    {
      "epoch": 3.47358326296289,
      "grad_norm": 0.7242996692657471,
      "learning_rate": 3.0729492695848133e-06,
      "loss": 0.012,
      "step": 2122540
    },
    {
      "epoch": 3.473615993401544,
      "grad_norm": 0.18451349437236786,
      "learning_rate": 3.0728833773712965e-06,
      "loss": 0.0115,
      "step": 2122560
    },
    {
      "epoch": 3.473648723840197,
      "grad_norm": 0.5500516295433044,
      "learning_rate": 3.072817485157779e-06,
      "loss": 0.0109,
      "step": 2122580
    },
    {
      "epoch": 3.47368145427885,
      "grad_norm": 0.05784853175282478,
      "learning_rate": 3.072751592944262e-06,
      "loss": 0.0095,
      "step": 2122600
    },
    {
      "epoch": 3.4737141847175037,
      "grad_norm": 0.23636218905448914,
      "learning_rate": 3.0726857007307447e-06,
      "loss": 0.014,
      "step": 2122620
    },
    {
      "epoch": 3.473746915156157,
      "grad_norm": 0.07815425097942352,
      "learning_rate": 3.0726198085172274e-06,
      "loss": 0.0104,
      "step": 2122640
    },
    {
      "epoch": 3.4737796455948105,
      "grad_norm": 0.3634563982486725,
      "learning_rate": 3.0725539163037106e-06,
      "loss": 0.0138,
      "step": 2122660
    },
    {
      "epoch": 3.4738123760334636,
      "grad_norm": 0.19735422730445862,
      "learning_rate": 3.0724880240901933e-06,
      "loss": 0.009,
      "step": 2122680
    },
    {
      "epoch": 3.4738451064721168,
      "grad_norm": 0.1432308703660965,
      "learning_rate": 3.072422131876676e-06,
      "loss": 0.0072,
      "step": 2122700
    },
    {
      "epoch": 3.4738778369107703,
      "grad_norm": 0.578645646572113,
      "learning_rate": 3.072356239663159e-06,
      "loss": 0.0102,
      "step": 2122720
    },
    {
      "epoch": 3.4739105673494235,
      "grad_norm": 0.1949552297592163,
      "learning_rate": 3.0722903474496424e-06,
      "loss": 0.0093,
      "step": 2122740
    },
    {
      "epoch": 3.473943297788077,
      "grad_norm": 0.17653100192546844,
      "learning_rate": 3.072224455236125e-06,
      "loss": 0.0089,
      "step": 2122760
    },
    {
      "epoch": 3.4739760282267302,
      "grad_norm": 0.16283197700977325,
      "learning_rate": 3.072158563022608e-06,
      "loss": 0.01,
      "step": 2122780
    },
    {
      "epoch": 3.4740087586653834,
      "grad_norm": 0.06777903437614441,
      "learning_rate": 3.072092670809091e-06,
      "loss": 0.0109,
      "step": 2122800
    },
    {
      "epoch": 3.474041489104037,
      "grad_norm": 0.2403068095445633,
      "learning_rate": 3.0720267785955738e-06,
      "loss": 0.0112,
      "step": 2122820
    },
    {
      "epoch": 3.47407421954269,
      "grad_norm": 1.1471507549285889,
      "learning_rate": 3.0719608863820565e-06,
      "loss": 0.0138,
      "step": 2122840
    },
    {
      "epoch": 3.4741069499813437,
      "grad_norm": 0.12077707052230835,
      "learning_rate": 3.0718949941685393e-06,
      "loss": 0.0115,
      "step": 2122860
    },
    {
      "epoch": 3.474139680419997,
      "grad_norm": 0.11808081716299057,
      "learning_rate": 3.0718291019550224e-06,
      "loss": 0.0173,
      "step": 2122880
    },
    {
      "epoch": 3.4741724108586505,
      "grad_norm": 0.22605912387371063,
      "learning_rate": 3.071763209741505e-06,
      "loss": 0.0101,
      "step": 2122900
    },
    {
      "epoch": 3.4742051412973036,
      "grad_norm": 0.3176092505455017,
      "learning_rate": 3.071697317527988e-06,
      "loss": 0.01,
      "step": 2122920
    },
    {
      "epoch": 3.4742378717359568,
      "grad_norm": 0.6002077460289001,
      "learning_rate": 3.0716314253144706e-06,
      "loss": 0.0118,
      "step": 2122940
    },
    {
      "epoch": 3.4742706021746104,
      "grad_norm": 0.3564337193965912,
      "learning_rate": 3.071565533100954e-06,
      "loss": 0.0163,
      "step": 2122960
    },
    {
      "epoch": 3.4743033326132635,
      "grad_norm": 0.17647317051887512,
      "learning_rate": 3.0714996408874365e-06,
      "loss": 0.0098,
      "step": 2122980
    },
    {
      "epoch": 3.474336063051917,
      "grad_norm": 0.40106433629989624,
      "learning_rate": 3.0714337486739193e-06,
      "loss": 0.0128,
      "step": 2123000
    },
    {
      "epoch": 3.4743687934905703,
      "grad_norm": 0.3941899836063385,
      "learning_rate": 3.071367856460402e-06,
      "loss": 0.0146,
      "step": 2123020
    },
    {
      "epoch": 3.474401523929224,
      "grad_norm": 0.09588094055652618,
      "learning_rate": 3.071301964246885e-06,
      "loss": 0.0116,
      "step": 2123040
    },
    {
      "epoch": 3.474434254367877,
      "grad_norm": 0.29832056164741516,
      "learning_rate": 3.071236072033368e-06,
      "loss": 0.014,
      "step": 2123060
    },
    {
      "epoch": 3.47446698480653,
      "grad_norm": 0.15954482555389404,
      "learning_rate": 3.071170179819851e-06,
      "loss": 0.0104,
      "step": 2123080
    },
    {
      "epoch": 3.4744997152451838,
      "grad_norm": 0.2648653984069824,
      "learning_rate": 3.0711042876063342e-06,
      "loss": 0.0124,
      "step": 2123100
    },
    {
      "epoch": 3.474532445683837,
      "grad_norm": 0.8776195645332336,
      "learning_rate": 3.071038395392817e-06,
      "loss": 0.0143,
      "step": 2123120
    },
    {
      "epoch": 3.4745651761224905,
      "grad_norm": 0.31038302183151245,
      "learning_rate": 3.0709725031792997e-06,
      "loss": 0.0179,
      "step": 2123140
    },
    {
      "epoch": 3.4745979065611436,
      "grad_norm": 0.15153732895851135,
      "learning_rate": 3.0709066109657825e-06,
      "loss": 0.0139,
      "step": 2123160
    },
    {
      "epoch": 3.4746306369997972,
      "grad_norm": 0.34601226449012756,
      "learning_rate": 3.070840718752265e-06,
      "loss": 0.0102,
      "step": 2123180
    },
    {
      "epoch": 3.4746633674384504,
      "grad_norm": 0.4059559106826782,
      "learning_rate": 3.0707748265387484e-06,
      "loss": 0.0109,
      "step": 2123200
    },
    {
      "epoch": 3.4746960978771035,
      "grad_norm": 0.07632423937320709,
      "learning_rate": 3.070708934325231e-06,
      "loss": 0.0077,
      "step": 2123220
    },
    {
      "epoch": 3.474728828315757,
      "grad_norm": 0.4703071117401123,
      "learning_rate": 3.070643042111714e-06,
      "loss": 0.0095,
      "step": 2123240
    },
    {
      "epoch": 3.4747615587544103,
      "grad_norm": 0.4810319244861603,
      "learning_rate": 3.0705771498981966e-06,
      "loss": 0.0114,
      "step": 2123260
    },
    {
      "epoch": 3.474794289193064,
      "grad_norm": 0.33542531728744507,
      "learning_rate": 3.0705112576846798e-06,
      "loss": 0.0161,
      "step": 2123280
    },
    {
      "epoch": 3.474827019631717,
      "grad_norm": 0.11294685304164886,
      "learning_rate": 3.0704453654711625e-06,
      "loss": 0.0084,
      "step": 2123300
    },
    {
      "epoch": 3.4748597500703706,
      "grad_norm": 0.44476160407066345,
      "learning_rate": 3.0703794732576452e-06,
      "loss": 0.0111,
      "step": 2123320
    },
    {
      "epoch": 3.4748924805090238,
      "grad_norm": 0.18485884368419647,
      "learning_rate": 3.070313581044128e-06,
      "loss": 0.0115,
      "step": 2123340
    },
    {
      "epoch": 3.474925210947677,
      "grad_norm": 0.2574026584625244,
      "learning_rate": 3.070247688830611e-06,
      "loss": 0.0087,
      "step": 2123360
    },
    {
      "epoch": 3.4749579413863305,
      "grad_norm": 0.4483432471752167,
      "learning_rate": 3.070181796617094e-06,
      "loss": 0.0097,
      "step": 2123380
    },
    {
      "epoch": 3.4749906718249837,
      "grad_norm": 0.32430410385131836,
      "learning_rate": 3.0701159044035766e-06,
      "loss": 0.0157,
      "step": 2123400
    },
    {
      "epoch": 3.4750234022636373,
      "grad_norm": 0.27976348996162415,
      "learning_rate": 3.0700500121900594e-06,
      "loss": 0.0098,
      "step": 2123420
    },
    {
      "epoch": 3.4750561327022904,
      "grad_norm": 0.43394309282302856,
      "learning_rate": 3.069984119976543e-06,
      "loss": 0.0103,
      "step": 2123440
    },
    {
      "epoch": 3.475088863140944,
      "grad_norm": 1.6279398202896118,
      "learning_rate": 3.0699182277630257e-06,
      "loss": 0.0108,
      "step": 2123460
    },
    {
      "epoch": 3.475121593579597,
      "grad_norm": 0.10988885164260864,
      "learning_rate": 3.0698523355495084e-06,
      "loss": 0.0092,
      "step": 2123480
    },
    {
      "epoch": 3.4751543240182503,
      "grad_norm": 0.24561697244644165,
      "learning_rate": 3.0697864433359916e-06,
      "loss": 0.0103,
      "step": 2123500
    },
    {
      "epoch": 3.475187054456904,
      "grad_norm": 0.36338818073272705,
      "learning_rate": 3.0697205511224743e-06,
      "loss": 0.0089,
      "step": 2123520
    },
    {
      "epoch": 3.475219784895557,
      "grad_norm": 0.2995356619358063,
      "learning_rate": 3.069654658908957e-06,
      "loss": 0.011,
      "step": 2123540
    },
    {
      "epoch": 3.4752525153342106,
      "grad_norm": 0.1538006216287613,
      "learning_rate": 3.06958876669544e-06,
      "loss": 0.0092,
      "step": 2123560
    },
    {
      "epoch": 3.475285245772864,
      "grad_norm": 0.3680516183376312,
      "learning_rate": 3.069522874481923e-06,
      "loss": 0.0127,
      "step": 2123580
    },
    {
      "epoch": 3.4753179762115174,
      "grad_norm": 0.2057994306087494,
      "learning_rate": 3.0694569822684057e-06,
      "loss": 0.0102,
      "step": 2123600
    },
    {
      "epoch": 3.4753507066501705,
      "grad_norm": 0.44462263584136963,
      "learning_rate": 3.0693910900548884e-06,
      "loss": 0.0145,
      "step": 2123620
    },
    {
      "epoch": 3.4753834370888237,
      "grad_norm": 0.2722522020339966,
      "learning_rate": 3.069325197841371e-06,
      "loss": 0.0084,
      "step": 2123640
    },
    {
      "epoch": 3.4754161675274773,
      "grad_norm": 0.1640612632036209,
      "learning_rate": 3.069259305627854e-06,
      "loss": 0.011,
      "step": 2123660
    },
    {
      "epoch": 3.4754488979661304,
      "grad_norm": 0.68822181224823,
      "learning_rate": 3.069193413414337e-06,
      "loss": 0.0135,
      "step": 2123680
    },
    {
      "epoch": 3.475481628404784,
      "grad_norm": 0.20630913972854614,
      "learning_rate": 3.06912752120082e-06,
      "loss": 0.0101,
      "step": 2123700
    },
    {
      "epoch": 3.475514358843437,
      "grad_norm": 0.4492529630661011,
      "learning_rate": 3.0690616289873026e-06,
      "loss": 0.0149,
      "step": 2123720
    },
    {
      "epoch": 3.4755470892820908,
      "grad_norm": 1.8962981700897217,
      "learning_rate": 3.0689957367737853e-06,
      "loss": 0.0104,
      "step": 2123740
    },
    {
      "epoch": 3.475579819720744,
      "grad_norm": 0.2735351622104645,
      "learning_rate": 3.0689298445602685e-06,
      "loss": 0.012,
      "step": 2123760
    },
    {
      "epoch": 3.475612550159397,
      "grad_norm": 0.190007284283638,
      "learning_rate": 3.068863952346751e-06,
      "loss": 0.0063,
      "step": 2123780
    },
    {
      "epoch": 3.4756452805980507,
      "grad_norm": 0.21651151776313782,
      "learning_rate": 3.0687980601332344e-06,
      "loss": 0.0078,
      "step": 2123800
    },
    {
      "epoch": 3.475678011036704,
      "grad_norm": 0.18275807797908783,
      "learning_rate": 3.0687321679197175e-06,
      "loss": 0.0085,
      "step": 2123820
    },
    {
      "epoch": 3.4757107414753574,
      "grad_norm": 0.5291234850883484,
      "learning_rate": 3.0686662757062003e-06,
      "loss": 0.0108,
      "step": 2123840
    },
    {
      "epoch": 3.4757434719140106,
      "grad_norm": 0.5632099509239197,
      "learning_rate": 3.068600383492683e-06,
      "loss": 0.0105,
      "step": 2123860
    },
    {
      "epoch": 3.475776202352664,
      "grad_norm": 0.18293710052967072,
      "learning_rate": 3.0685344912791658e-06,
      "loss": 0.0119,
      "step": 2123880
    },
    {
      "epoch": 3.4758089327913173,
      "grad_norm": 0.5159695148468018,
      "learning_rate": 3.068468599065649e-06,
      "loss": 0.0128,
      "step": 2123900
    },
    {
      "epoch": 3.4758416632299705,
      "grad_norm": 0.38375869393348694,
      "learning_rate": 3.0684027068521317e-06,
      "loss": 0.0108,
      "step": 2123920
    },
    {
      "epoch": 3.475874393668624,
      "grad_norm": 0.1723387986421585,
      "learning_rate": 3.0683368146386144e-06,
      "loss": 0.0107,
      "step": 2123940
    },
    {
      "epoch": 3.475907124107277,
      "grad_norm": 0.5121910572052002,
      "learning_rate": 3.068270922425097e-06,
      "loss": 0.0087,
      "step": 2123960
    },
    {
      "epoch": 3.475939854545931,
      "grad_norm": 0.5443441867828369,
      "learning_rate": 3.0682050302115803e-06,
      "loss": 0.0111,
      "step": 2123980
    },
    {
      "epoch": 3.475972584984584,
      "grad_norm": 0.4909861981868744,
      "learning_rate": 3.068139137998063e-06,
      "loss": 0.0082,
      "step": 2124000
    },
    {
      "epoch": 3.4760053154232375,
      "grad_norm": 0.11799134314060211,
      "learning_rate": 3.0680732457845458e-06,
      "loss": 0.0113,
      "step": 2124020
    },
    {
      "epoch": 3.4760380458618907,
      "grad_norm": 0.07653261721134186,
      "learning_rate": 3.0680073535710285e-06,
      "loss": 0.013,
      "step": 2124040
    },
    {
      "epoch": 3.476070776300544,
      "grad_norm": 0.13750560581684113,
      "learning_rate": 3.0679414613575117e-06,
      "loss": 0.0136,
      "step": 2124060
    },
    {
      "epoch": 3.4761035067391974,
      "grad_norm": 0.1461171656847,
      "learning_rate": 3.0678755691439944e-06,
      "loss": 0.0095,
      "step": 2124080
    },
    {
      "epoch": 3.4761362371778506,
      "grad_norm": 0.6657787561416626,
      "learning_rate": 3.067809676930477e-06,
      "loss": 0.0105,
      "step": 2124100
    },
    {
      "epoch": 3.476168967616504,
      "grad_norm": 0.1695176213979721,
      "learning_rate": 3.06774378471696e-06,
      "loss": 0.0178,
      "step": 2124120
    },
    {
      "epoch": 3.4762016980551573,
      "grad_norm": 0.07452264428138733,
      "learning_rate": 3.0676778925034435e-06,
      "loss": 0.0121,
      "step": 2124140
    },
    {
      "epoch": 3.4762344284938105,
      "grad_norm": 0.31469863653182983,
      "learning_rate": 3.0676120002899262e-06,
      "loss": 0.0137,
      "step": 2124160
    },
    {
      "epoch": 3.476267158932464,
      "grad_norm": 0.25684255361557007,
      "learning_rate": 3.067546108076409e-06,
      "loss": 0.0089,
      "step": 2124180
    },
    {
      "epoch": 3.4762998893711172,
      "grad_norm": 0.3658568263053894,
      "learning_rate": 3.0674802158628917e-06,
      "loss": 0.0093,
      "step": 2124200
    },
    {
      "epoch": 3.476332619809771,
      "grad_norm": 0.22382061183452606,
      "learning_rate": 3.067414323649375e-06,
      "loss": 0.0113,
      "step": 2124220
    },
    {
      "epoch": 3.476365350248424,
      "grad_norm": 0.32740461826324463,
      "learning_rate": 3.0673484314358576e-06,
      "loss": 0.0084,
      "step": 2124240
    },
    {
      "epoch": 3.476398080687077,
      "grad_norm": 0.3598979413509369,
      "learning_rate": 3.0672825392223404e-06,
      "loss": 0.0102,
      "step": 2124260
    },
    {
      "epoch": 3.4764308111257307,
      "grad_norm": 0.45630988478660583,
      "learning_rate": 3.067216647008823e-06,
      "loss": 0.011,
      "step": 2124280
    },
    {
      "epoch": 3.476463541564384,
      "grad_norm": 0.20100998878479004,
      "learning_rate": 3.0671507547953063e-06,
      "loss": 0.0117,
      "step": 2124300
    },
    {
      "epoch": 3.4764962720030375,
      "grad_norm": 0.07845858484506607,
      "learning_rate": 3.067084862581789e-06,
      "loss": 0.0109,
      "step": 2124320
    },
    {
      "epoch": 3.4765290024416906,
      "grad_norm": 0.15945999324321747,
      "learning_rate": 3.0670189703682717e-06,
      "loss": 0.0107,
      "step": 2124340
    },
    {
      "epoch": 3.476561732880344,
      "grad_norm": 0.6503605246543884,
      "learning_rate": 3.0669530781547545e-06,
      "loss": 0.0148,
      "step": 2124360
    },
    {
      "epoch": 3.4765944633189974,
      "grad_norm": 0.08015349507331848,
      "learning_rate": 3.0668871859412376e-06,
      "loss": 0.0112,
      "step": 2124380
    },
    {
      "epoch": 3.4766271937576505,
      "grad_norm": 0.2006201148033142,
      "learning_rate": 3.0668212937277204e-06,
      "loss": 0.0105,
      "step": 2124400
    },
    {
      "epoch": 3.476659924196304,
      "grad_norm": 0.11990094929933548,
      "learning_rate": 3.066755401514203e-06,
      "loss": 0.0076,
      "step": 2124420
    },
    {
      "epoch": 3.4766926546349572,
      "grad_norm": 0.120823934674263,
      "learning_rate": 3.066689509300686e-06,
      "loss": 0.0078,
      "step": 2124440
    },
    {
      "epoch": 3.476725385073611,
      "grad_norm": 0.28241437673568726,
      "learning_rate": 3.066623617087169e-06,
      "loss": 0.0174,
      "step": 2124460
    },
    {
      "epoch": 3.476758115512264,
      "grad_norm": 0.6234851479530334,
      "learning_rate": 3.0665577248736518e-06,
      "loss": 0.0115,
      "step": 2124480
    },
    {
      "epoch": 3.4767908459509176,
      "grad_norm": 0.33023887872695923,
      "learning_rate": 3.066491832660135e-06,
      "loss": 0.0092,
      "step": 2124500
    },
    {
      "epoch": 3.4768235763895707,
      "grad_norm": 0.045904114842414856,
      "learning_rate": 3.066425940446618e-06,
      "loss": 0.0093,
      "step": 2124520
    },
    {
      "epoch": 3.476856306828224,
      "grad_norm": 0.2360086888074875,
      "learning_rate": 3.066360048233101e-06,
      "loss": 0.0132,
      "step": 2124540
    },
    {
      "epoch": 3.4768890372668775,
      "grad_norm": 0.1397041231393814,
      "learning_rate": 3.0662941560195836e-06,
      "loss": 0.0116,
      "step": 2124560
    },
    {
      "epoch": 3.4769217677055306,
      "grad_norm": 0.1992315798997879,
      "learning_rate": 3.0662282638060663e-06,
      "loss": 0.0089,
      "step": 2124580
    },
    {
      "epoch": 3.4769544981441842,
      "grad_norm": 0.20500989258289337,
      "learning_rate": 3.0661623715925495e-06,
      "loss": 0.0082,
      "step": 2124600
    },
    {
      "epoch": 3.4769872285828374,
      "grad_norm": 0.0631466880440712,
      "learning_rate": 3.066096479379032e-06,
      "loss": 0.0134,
      "step": 2124620
    },
    {
      "epoch": 3.477019959021491,
      "grad_norm": 0.3981630504131317,
      "learning_rate": 3.066030587165515e-06,
      "loss": 0.0178,
      "step": 2124640
    },
    {
      "epoch": 3.477052689460144,
      "grad_norm": 0.13743042945861816,
      "learning_rate": 3.0659646949519977e-06,
      "loss": 0.0108,
      "step": 2124660
    },
    {
      "epoch": 3.4770854198987973,
      "grad_norm": 0.16189411282539368,
      "learning_rate": 3.0658988027384804e-06,
      "loss": 0.0078,
      "step": 2124680
    },
    {
      "epoch": 3.477118150337451,
      "grad_norm": 0.5660034418106079,
      "learning_rate": 3.0658329105249636e-06,
      "loss": 0.0117,
      "step": 2124700
    },
    {
      "epoch": 3.477150880776104,
      "grad_norm": 0.26616135239601135,
      "learning_rate": 3.0657670183114463e-06,
      "loss": 0.0079,
      "step": 2124720
    },
    {
      "epoch": 3.4771836112147576,
      "grad_norm": 0.3746054470539093,
      "learning_rate": 3.065701126097929e-06,
      "loss": 0.0097,
      "step": 2124740
    },
    {
      "epoch": 3.4772163416534108,
      "grad_norm": 0.28116852045059204,
      "learning_rate": 3.065635233884412e-06,
      "loss": 0.009,
      "step": 2124760
    },
    {
      "epoch": 3.4772490720920644,
      "grad_norm": 0.16853511333465576,
      "learning_rate": 3.065569341670895e-06,
      "loss": 0.0131,
      "step": 2124780
    },
    {
      "epoch": 3.4772818025307175,
      "grad_norm": 1.7553004026412964,
      "learning_rate": 3.0655034494573777e-06,
      "loss": 0.02,
      "step": 2124800
    },
    {
      "epoch": 3.4773145329693707,
      "grad_norm": 0.3298817574977875,
      "learning_rate": 3.0654375572438605e-06,
      "loss": 0.0064,
      "step": 2124820
    },
    {
      "epoch": 3.4773472634080242,
      "grad_norm": 0.2115458995103836,
      "learning_rate": 3.065371665030344e-06,
      "loss": 0.0117,
      "step": 2124840
    },
    {
      "epoch": 3.4773799938466774,
      "grad_norm": 0.41148641705513,
      "learning_rate": 3.0653057728168268e-06,
      "loss": 0.0095,
      "step": 2124860
    },
    {
      "epoch": 3.477412724285331,
      "grad_norm": 0.4017660617828369,
      "learning_rate": 3.0652398806033095e-06,
      "loss": 0.0118,
      "step": 2124880
    },
    {
      "epoch": 3.477445454723984,
      "grad_norm": 0.48529285192489624,
      "learning_rate": 3.0651739883897923e-06,
      "loss": 0.0111,
      "step": 2124900
    },
    {
      "epoch": 3.4774781851626377,
      "grad_norm": 0.4511626660823822,
      "learning_rate": 3.0651080961762754e-06,
      "loss": 0.0104,
      "step": 2124920
    },
    {
      "epoch": 3.477510915601291,
      "grad_norm": 0.14727063477039337,
      "learning_rate": 3.065042203962758e-06,
      "loss": 0.0126,
      "step": 2124940
    },
    {
      "epoch": 3.477543646039944,
      "grad_norm": 0.25485649704933167,
      "learning_rate": 3.064976311749241e-06,
      "loss": 0.0101,
      "step": 2124960
    },
    {
      "epoch": 3.4775763764785976,
      "grad_norm": 0.6657920479774475,
      "learning_rate": 3.0649104195357236e-06,
      "loss": 0.0147,
      "step": 2124980
    },
    {
      "epoch": 3.477609106917251,
      "grad_norm": 0.2186150699853897,
      "learning_rate": 3.064844527322207e-06,
      "loss": 0.0073,
      "step": 2125000
    },
    {
      "epoch": 3.4776418373559044,
      "grad_norm": 0.2252505123615265,
      "learning_rate": 3.0647786351086895e-06,
      "loss": 0.0093,
      "step": 2125020
    },
    {
      "epoch": 3.4776745677945575,
      "grad_norm": 0.2445818930864334,
      "learning_rate": 3.0647127428951723e-06,
      "loss": 0.0102,
      "step": 2125040
    },
    {
      "epoch": 3.477707298233211,
      "grad_norm": 0.9480276703834534,
      "learning_rate": 3.064646850681655e-06,
      "loss": 0.0114,
      "step": 2125060
    },
    {
      "epoch": 3.4777400286718643,
      "grad_norm": 0.4492628276348114,
      "learning_rate": 3.0645809584681378e-06,
      "loss": 0.0113,
      "step": 2125080
    },
    {
      "epoch": 3.4777727591105174,
      "grad_norm": 0.4704788029193878,
      "learning_rate": 3.064515066254621e-06,
      "loss": 0.0175,
      "step": 2125100
    },
    {
      "epoch": 3.477805489549171,
      "grad_norm": 0.10918989032506943,
      "learning_rate": 3.0644491740411037e-06,
      "loss": 0.0154,
      "step": 2125120
    },
    {
      "epoch": 3.477838219987824,
      "grad_norm": 0.23396174609661102,
      "learning_rate": 3.0643832818275864e-06,
      "loss": 0.0091,
      "step": 2125140
    },
    {
      "epoch": 3.4778709504264778,
      "grad_norm": 0.2569335401058197,
      "learning_rate": 3.064317389614069e-06,
      "loss": 0.0122,
      "step": 2125160
    },
    {
      "epoch": 3.477903680865131,
      "grad_norm": 1.0992424488067627,
      "learning_rate": 3.0642514974005523e-06,
      "loss": 0.0182,
      "step": 2125180
    },
    {
      "epoch": 3.4779364113037845,
      "grad_norm": 0.3119848966598511,
      "learning_rate": 3.0641856051870355e-06,
      "loss": 0.008,
      "step": 2125200
    },
    {
      "epoch": 3.4779691417424377,
      "grad_norm": 0.20665690302848816,
      "learning_rate": 3.0641197129735182e-06,
      "loss": 0.0111,
      "step": 2125220
    },
    {
      "epoch": 3.478001872181091,
      "grad_norm": 0.1728294938802719,
      "learning_rate": 3.0640538207600014e-06,
      "loss": 0.0135,
      "step": 2125240
    },
    {
      "epoch": 3.4780346026197444,
      "grad_norm": 0.06790178269147873,
      "learning_rate": 3.063987928546484e-06,
      "loss": 0.012,
      "step": 2125260
    },
    {
      "epoch": 3.4780673330583975,
      "grad_norm": 0.23141613602638245,
      "learning_rate": 3.063922036332967e-06,
      "loss": 0.0141,
      "step": 2125280
    },
    {
      "epoch": 3.478100063497051,
      "grad_norm": 0.21883638203144073,
      "learning_rate": 3.0638561441194496e-06,
      "loss": 0.0141,
      "step": 2125300
    },
    {
      "epoch": 3.4781327939357043,
      "grad_norm": 0.406126469373703,
      "learning_rate": 3.0637902519059328e-06,
      "loss": 0.0131,
      "step": 2125320
    },
    {
      "epoch": 3.478165524374358,
      "grad_norm": 0.27053555846214294,
      "learning_rate": 3.0637243596924155e-06,
      "loss": 0.0116,
      "step": 2125340
    },
    {
      "epoch": 3.478198254813011,
      "grad_norm": 0.14205899834632874,
      "learning_rate": 3.0636584674788982e-06,
      "loss": 0.0104,
      "step": 2125360
    },
    {
      "epoch": 3.478230985251664,
      "grad_norm": 0.4308267831802368,
      "learning_rate": 3.063592575265381e-06,
      "loss": 0.0234,
      "step": 2125380
    },
    {
      "epoch": 3.478263715690318,
      "grad_norm": 0.541755199432373,
      "learning_rate": 3.063526683051864e-06,
      "loss": 0.0078,
      "step": 2125400
    },
    {
      "epoch": 3.478296446128971,
      "grad_norm": 0.1841687262058258,
      "learning_rate": 3.063460790838347e-06,
      "loss": 0.0093,
      "step": 2125420
    },
    {
      "epoch": 3.4783291765676245,
      "grad_norm": 0.15382927656173706,
      "learning_rate": 3.0633948986248296e-06,
      "loss": 0.0154,
      "step": 2125440
    },
    {
      "epoch": 3.4783619070062777,
      "grad_norm": 0.07123179733753204,
      "learning_rate": 3.0633290064113124e-06,
      "loss": 0.0123,
      "step": 2125460
    },
    {
      "epoch": 3.4783946374449313,
      "grad_norm": 0.1828327775001526,
      "learning_rate": 3.0632631141977955e-06,
      "loss": 0.0099,
      "step": 2125480
    },
    {
      "epoch": 3.4784273678835844,
      "grad_norm": 0.08677756786346436,
      "learning_rate": 3.0631972219842783e-06,
      "loss": 0.0097,
      "step": 2125500
    },
    {
      "epoch": 3.4784600983222376,
      "grad_norm": 0.42079606652259827,
      "learning_rate": 3.063131329770761e-06,
      "loss": 0.0088,
      "step": 2125520
    },
    {
      "epoch": 3.478492828760891,
      "grad_norm": 0.3776194751262665,
      "learning_rate": 3.0630654375572437e-06,
      "loss": 0.0111,
      "step": 2125540
    },
    {
      "epoch": 3.4785255591995443,
      "grad_norm": 0.19796814024448395,
      "learning_rate": 3.0629995453437273e-06,
      "loss": 0.0069,
      "step": 2125560
    },
    {
      "epoch": 3.478558289638198,
      "grad_norm": 0.302397221326828,
      "learning_rate": 3.06293365313021e-06,
      "loss": 0.0134,
      "step": 2125580
    },
    {
      "epoch": 3.478591020076851,
      "grad_norm": 0.2630331814289093,
      "learning_rate": 3.062867760916693e-06,
      "loss": 0.0099,
      "step": 2125600
    },
    {
      "epoch": 3.4786237505155047,
      "grad_norm": 0.4267117977142334,
      "learning_rate": 3.0628018687031755e-06,
      "loss": 0.0178,
      "step": 2125620
    },
    {
      "epoch": 3.478656480954158,
      "grad_norm": 0.2853577435016632,
      "learning_rate": 3.0627359764896587e-06,
      "loss": 0.0106,
      "step": 2125640
    },
    {
      "epoch": 3.478689211392811,
      "grad_norm": 0.2324398010969162,
      "learning_rate": 3.0626700842761415e-06,
      "loss": 0.0161,
      "step": 2125660
    },
    {
      "epoch": 3.4787219418314645,
      "grad_norm": 0.3333657681941986,
      "learning_rate": 3.062604192062624e-06,
      "loss": 0.013,
      "step": 2125680
    },
    {
      "epoch": 3.4787546722701177,
      "grad_norm": 0.2888447344303131,
      "learning_rate": 3.062538299849107e-06,
      "loss": 0.0132,
      "step": 2125700
    },
    {
      "epoch": 3.4787874027087713,
      "grad_norm": 0.16005507111549377,
      "learning_rate": 3.06247240763559e-06,
      "loss": 0.0115,
      "step": 2125720
    },
    {
      "epoch": 3.4788201331474244,
      "grad_norm": 0.06357742846012115,
      "learning_rate": 3.062406515422073e-06,
      "loss": 0.0081,
      "step": 2125740
    },
    {
      "epoch": 3.4788528635860776,
      "grad_norm": 0.2913377285003662,
      "learning_rate": 3.0623406232085556e-06,
      "loss": 0.0135,
      "step": 2125760
    },
    {
      "epoch": 3.478885594024731,
      "grad_norm": 2.009540319442749,
      "learning_rate": 3.0622747309950383e-06,
      "loss": 0.0127,
      "step": 2125780
    },
    {
      "epoch": 3.4789183244633843,
      "grad_norm": 0.23363830149173737,
      "learning_rate": 3.0622088387815215e-06,
      "loss": 0.0094,
      "step": 2125800
    },
    {
      "epoch": 3.478951054902038,
      "grad_norm": 0.21255306899547577,
      "learning_rate": 3.0621429465680042e-06,
      "loss": 0.0146,
      "step": 2125820
    },
    {
      "epoch": 3.478983785340691,
      "grad_norm": 0.4058779776096344,
      "learning_rate": 3.062077054354487e-06,
      "loss": 0.0159,
      "step": 2125840
    },
    {
      "epoch": 3.4790165157793442,
      "grad_norm": 0.29685407876968384,
      "learning_rate": 3.0620111621409697e-06,
      "loss": 0.0105,
      "step": 2125860
    },
    {
      "epoch": 3.479049246217998,
      "grad_norm": 0.4353657364845276,
      "learning_rate": 3.061945269927453e-06,
      "loss": 0.0142,
      "step": 2125880
    },
    {
      "epoch": 3.479081976656651,
      "grad_norm": 0.19216328859329224,
      "learning_rate": 3.061879377713936e-06,
      "loss": 0.0083,
      "step": 2125900
    },
    {
      "epoch": 3.4791147070953046,
      "grad_norm": 0.20647400617599487,
      "learning_rate": 3.0618134855004188e-06,
      "loss": 0.0098,
      "step": 2125920
    },
    {
      "epoch": 3.4791474375339577,
      "grad_norm": 0.26652753353118896,
      "learning_rate": 3.061747593286902e-06,
      "loss": 0.0113,
      "step": 2125940
    },
    {
      "epoch": 3.4791801679726113,
      "grad_norm": 0.07437392324209213,
      "learning_rate": 3.0616817010733847e-06,
      "loss": 0.0089,
      "step": 2125960
    },
    {
      "epoch": 3.4792128984112645,
      "grad_norm": 0.22136174142360687,
      "learning_rate": 3.0616158088598674e-06,
      "loss": 0.0096,
      "step": 2125980
    },
    {
      "epoch": 3.4792456288499176,
      "grad_norm": 0.18947379291057587,
      "learning_rate": 3.06154991664635e-06,
      "loss": 0.0123,
      "step": 2126000
    },
    {
      "epoch": 3.479278359288571,
      "grad_norm": 0.24747717380523682,
      "learning_rate": 3.0614840244328333e-06,
      "loss": 0.0075,
      "step": 2126020
    },
    {
      "epoch": 3.4793110897272244,
      "grad_norm": 0.19745850563049316,
      "learning_rate": 3.061418132219316e-06,
      "loss": 0.0116,
      "step": 2126040
    },
    {
      "epoch": 3.479343820165878,
      "grad_norm": 0.2429327517747879,
      "learning_rate": 3.0613522400057988e-06,
      "loss": 0.0111,
      "step": 2126060
    },
    {
      "epoch": 3.479376550604531,
      "grad_norm": 0.16624867916107178,
      "learning_rate": 3.0612863477922815e-06,
      "loss": 0.0104,
      "step": 2126080
    },
    {
      "epoch": 3.4794092810431847,
      "grad_norm": 0.13877569139003754,
      "learning_rate": 3.0612204555787643e-06,
      "loss": 0.0095,
      "step": 2126100
    },
    {
      "epoch": 3.479442011481838,
      "grad_norm": 0.6528642177581787,
      "learning_rate": 3.0611545633652474e-06,
      "loss": 0.0137,
      "step": 2126120
    },
    {
      "epoch": 3.479474741920491,
      "grad_norm": 0.8075295090675354,
      "learning_rate": 3.06108867115173e-06,
      "loss": 0.0155,
      "step": 2126140
    },
    {
      "epoch": 3.4795074723591446,
      "grad_norm": 0.694259762763977,
      "learning_rate": 3.061022778938213e-06,
      "loss": 0.0093,
      "step": 2126160
    },
    {
      "epoch": 3.4795402027977977,
      "grad_norm": 0.3676910102367401,
      "learning_rate": 3.0609568867246956e-06,
      "loss": 0.0112,
      "step": 2126180
    },
    {
      "epoch": 3.4795729332364513,
      "grad_norm": 0.18486636877059937,
      "learning_rate": 3.060890994511179e-06,
      "loss": 0.0086,
      "step": 2126200
    },
    {
      "epoch": 3.4796056636751045,
      "grad_norm": 0.43326595425605774,
      "learning_rate": 3.0608251022976616e-06,
      "loss": 0.0143,
      "step": 2126220
    },
    {
      "epoch": 3.479638394113758,
      "grad_norm": 0.456434428691864,
      "learning_rate": 3.0607592100841443e-06,
      "loss": 0.0114,
      "step": 2126240
    },
    {
      "epoch": 3.4796711245524112,
      "grad_norm": 0.46067097783088684,
      "learning_rate": 3.060693317870628e-06,
      "loss": 0.0108,
      "step": 2126260
    },
    {
      "epoch": 3.4797038549910644,
      "grad_norm": 0.30698513984680176,
      "learning_rate": 3.0606274256571106e-06,
      "loss": 0.0078,
      "step": 2126280
    },
    {
      "epoch": 3.479736585429718,
      "grad_norm": 0.165339857339859,
      "learning_rate": 3.0605615334435934e-06,
      "loss": 0.0122,
      "step": 2126300
    },
    {
      "epoch": 3.479769315868371,
      "grad_norm": 0.19220110774040222,
      "learning_rate": 3.060495641230076e-06,
      "loss": 0.0135,
      "step": 2126320
    },
    {
      "epoch": 3.4798020463070247,
      "grad_norm": 0.6743454933166504,
      "learning_rate": 3.0604297490165593e-06,
      "loss": 0.0111,
      "step": 2126340
    },
    {
      "epoch": 3.479834776745678,
      "grad_norm": 0.29605644941329956,
      "learning_rate": 3.060363856803042e-06,
      "loss": 0.0177,
      "step": 2126360
    },
    {
      "epoch": 3.4798675071843315,
      "grad_norm": 0.34250888228416443,
      "learning_rate": 3.0602979645895247e-06,
      "loss": 0.0118,
      "step": 2126380
    },
    {
      "epoch": 3.4799002376229846,
      "grad_norm": 0.829456627368927,
      "learning_rate": 3.0602320723760075e-06,
      "loss": 0.0145,
      "step": 2126400
    },
    {
      "epoch": 3.4799329680616378,
      "grad_norm": 0.2051321417093277,
      "learning_rate": 3.0601661801624906e-06,
      "loss": 0.0095,
      "step": 2126420
    },
    {
      "epoch": 3.4799656985002914,
      "grad_norm": 0.7944528460502625,
      "learning_rate": 3.0601002879489734e-06,
      "loss": 0.0139,
      "step": 2126440
    },
    {
      "epoch": 3.4799984289389445,
      "grad_norm": 0.6677994728088379,
      "learning_rate": 3.060034395735456e-06,
      "loss": 0.0118,
      "step": 2126460
    },
    {
      "epoch": 3.480031159377598,
      "grad_norm": 0.18681538105010986,
      "learning_rate": 3.059968503521939e-06,
      "loss": 0.0114,
      "step": 2126480
    },
    {
      "epoch": 3.4800638898162513,
      "grad_norm": 0.16100119054317474,
      "learning_rate": 3.0599026113084216e-06,
      "loss": 0.0093,
      "step": 2126500
    },
    {
      "epoch": 3.480096620254905,
      "grad_norm": 0.20899327099323273,
      "learning_rate": 3.0598367190949048e-06,
      "loss": 0.0079,
      "step": 2126520
    },
    {
      "epoch": 3.480129350693558,
      "grad_norm": 0.16979378461837769,
      "learning_rate": 3.0597708268813875e-06,
      "loss": 0.0099,
      "step": 2126540
    },
    {
      "epoch": 3.480162081132211,
      "grad_norm": 0.25265923142433167,
      "learning_rate": 3.0597049346678702e-06,
      "loss": 0.0119,
      "step": 2126560
    },
    {
      "epoch": 3.4801948115708647,
      "grad_norm": 0.3850352168083191,
      "learning_rate": 3.059639042454353e-06,
      "loss": 0.0117,
      "step": 2126580
    },
    {
      "epoch": 3.480227542009518,
      "grad_norm": 0.21730969846248627,
      "learning_rate": 3.0595731502408366e-06,
      "loss": 0.0074,
      "step": 2126600
    },
    {
      "epoch": 3.4802602724481715,
      "grad_norm": 0.36379164457321167,
      "learning_rate": 3.0595072580273193e-06,
      "loss": 0.008,
      "step": 2126620
    },
    {
      "epoch": 3.4802930028868246,
      "grad_norm": 0.3574813902378082,
      "learning_rate": 3.059441365813802e-06,
      "loss": 0.0151,
      "step": 2126640
    },
    {
      "epoch": 3.4803257333254782,
      "grad_norm": 0.31925109028816223,
      "learning_rate": 3.0593754736002852e-06,
      "loss": 0.0127,
      "step": 2126660
    },
    {
      "epoch": 3.4803584637641314,
      "grad_norm": 0.2881118357181549,
      "learning_rate": 3.059309581386768e-06,
      "loss": 0.0105,
      "step": 2126680
    },
    {
      "epoch": 3.4803911942027845,
      "grad_norm": 0.37694495916366577,
      "learning_rate": 3.0592436891732507e-06,
      "loss": 0.013,
      "step": 2126700
    },
    {
      "epoch": 3.480423924641438,
      "grad_norm": 0.2565452456474304,
      "learning_rate": 3.0591777969597334e-06,
      "loss": 0.019,
      "step": 2126720
    },
    {
      "epoch": 3.4804566550800913,
      "grad_norm": 0.1502450853586197,
      "learning_rate": 3.0591119047462166e-06,
      "loss": 0.0086,
      "step": 2126740
    },
    {
      "epoch": 3.480489385518745,
      "grad_norm": 0.23150548338890076,
      "learning_rate": 3.0590460125326993e-06,
      "loss": 0.0092,
      "step": 2126760
    },
    {
      "epoch": 3.480522115957398,
      "grad_norm": 0.7583494782447815,
      "learning_rate": 3.058980120319182e-06,
      "loss": 0.0175,
      "step": 2126780
    },
    {
      "epoch": 3.4805548463960516,
      "grad_norm": 0.09488525241613388,
      "learning_rate": 3.058914228105665e-06,
      "loss": 0.009,
      "step": 2126800
    },
    {
      "epoch": 3.4805875768347048,
      "grad_norm": 0.44088006019592285,
      "learning_rate": 3.058848335892148e-06,
      "loss": 0.0176,
      "step": 2126820
    },
    {
      "epoch": 3.480620307273358,
      "grad_norm": 0.5758943557739258,
      "learning_rate": 3.0587824436786307e-06,
      "loss": 0.0086,
      "step": 2126840
    },
    {
      "epoch": 3.4806530377120115,
      "grad_norm": 0.1372777819633484,
      "learning_rate": 3.0587165514651135e-06,
      "loss": 0.0064,
      "step": 2126860
    },
    {
      "epoch": 3.4806857681506647,
      "grad_norm": 0.48298314213752747,
      "learning_rate": 3.058650659251596e-06,
      "loss": 0.0088,
      "step": 2126880
    },
    {
      "epoch": 3.4807184985893183,
      "grad_norm": 0.19629216194152832,
      "learning_rate": 3.0585847670380794e-06,
      "loss": 0.0078,
      "step": 2126900
    },
    {
      "epoch": 3.4807512290279714,
      "grad_norm": 0.42363861203193665,
      "learning_rate": 3.058518874824562e-06,
      "loss": 0.0101,
      "step": 2126920
    },
    {
      "epoch": 3.480783959466625,
      "grad_norm": 0.5963720083236694,
      "learning_rate": 3.058452982611045e-06,
      "loss": 0.0157,
      "step": 2126940
    },
    {
      "epoch": 3.480816689905278,
      "grad_norm": 0.22638992965221405,
      "learning_rate": 3.0583870903975284e-06,
      "loss": 0.0069,
      "step": 2126960
    },
    {
      "epoch": 3.4808494203439313,
      "grad_norm": 0.6403601765632629,
      "learning_rate": 3.058321198184011e-06,
      "loss": 0.0086,
      "step": 2126980
    },
    {
      "epoch": 3.480882150782585,
      "grad_norm": 0.24127446115016937,
      "learning_rate": 3.058255305970494e-06,
      "loss": 0.0118,
      "step": 2127000
    },
    {
      "epoch": 3.480914881221238,
      "grad_norm": 0.6510621309280396,
      "learning_rate": 3.0581894137569766e-06,
      "loss": 0.0203,
      "step": 2127020
    },
    {
      "epoch": 3.4809476116598916,
      "grad_norm": 0.19132016599178314,
      "learning_rate": 3.0581235215434594e-06,
      "loss": 0.0178,
      "step": 2127040
    },
    {
      "epoch": 3.480980342098545,
      "grad_norm": 0.3833886384963989,
      "learning_rate": 3.0580576293299426e-06,
      "loss": 0.0156,
      "step": 2127060
    },
    {
      "epoch": 3.4810130725371984,
      "grad_norm": 0.169703409075737,
      "learning_rate": 3.0579917371164253e-06,
      "loss": 0.0135,
      "step": 2127080
    },
    {
      "epoch": 3.4810458029758515,
      "grad_norm": 0.1982722133398056,
      "learning_rate": 3.057925844902908e-06,
      "loss": 0.0109,
      "step": 2127100
    },
    {
      "epoch": 3.4810785334145047,
      "grad_norm": 0.9755140542984009,
      "learning_rate": 3.0578599526893908e-06,
      "loss": 0.0106,
      "step": 2127120
    },
    {
      "epoch": 3.4811112638531583,
      "grad_norm": 0.13965915143489838,
      "learning_rate": 3.057794060475874e-06,
      "loss": 0.0157,
      "step": 2127140
    },
    {
      "epoch": 3.4811439942918114,
      "grad_norm": 0.21762891113758087,
      "learning_rate": 3.0577281682623567e-06,
      "loss": 0.0102,
      "step": 2127160
    },
    {
      "epoch": 3.481176724730465,
      "grad_norm": 0.35483410954475403,
      "learning_rate": 3.0576622760488394e-06,
      "loss": 0.0122,
      "step": 2127180
    },
    {
      "epoch": 3.481209455169118,
      "grad_norm": 0.2915060222148895,
      "learning_rate": 3.057596383835322e-06,
      "loss": 0.0087,
      "step": 2127200
    },
    {
      "epoch": 3.4812421856077713,
      "grad_norm": 0.40562427043914795,
      "learning_rate": 3.0575304916218053e-06,
      "loss": 0.0104,
      "step": 2127220
    },
    {
      "epoch": 3.481274916046425,
      "grad_norm": 0.1651744693517685,
      "learning_rate": 3.057464599408288e-06,
      "loss": 0.0106,
      "step": 2127240
    },
    {
      "epoch": 3.481307646485078,
      "grad_norm": 0.19980739057064056,
      "learning_rate": 3.057398707194771e-06,
      "loss": 0.0116,
      "step": 2127260
    },
    {
      "epoch": 3.4813403769237317,
      "grad_norm": 0.3579576015472412,
      "learning_rate": 3.0573328149812535e-06,
      "loss": 0.0117,
      "step": 2127280
    },
    {
      "epoch": 3.481373107362385,
      "grad_norm": 0.15511953830718994,
      "learning_rate": 3.0572669227677367e-06,
      "loss": 0.0147,
      "step": 2127300
    },
    {
      "epoch": 3.481405837801038,
      "grad_norm": 0.3989962339401245,
      "learning_rate": 3.05720103055422e-06,
      "loss": 0.0097,
      "step": 2127320
    },
    {
      "epoch": 3.4814385682396916,
      "grad_norm": 0.1933780461549759,
      "learning_rate": 3.0571351383407026e-06,
      "loss": 0.0114,
      "step": 2127340
    },
    {
      "epoch": 3.4814712986783447,
      "grad_norm": 0.0595402829349041,
      "learning_rate": 3.0570692461271858e-06,
      "loss": 0.0103,
      "step": 2127360
    },
    {
      "epoch": 3.4815040291169983,
      "grad_norm": 0.0214844923466444,
      "learning_rate": 3.0570033539136685e-06,
      "loss": 0.0119,
      "step": 2127380
    },
    {
      "epoch": 3.4815367595556515,
      "grad_norm": 0.34461620450019836,
      "learning_rate": 3.0569374617001512e-06,
      "loss": 0.0096,
      "step": 2127400
    },
    {
      "epoch": 3.481569489994305,
      "grad_norm": 0.30679142475128174,
      "learning_rate": 3.056871569486634e-06,
      "loss": 0.0134,
      "step": 2127420
    },
    {
      "epoch": 3.481602220432958,
      "grad_norm": 0.1945009082555771,
      "learning_rate": 3.056805677273117e-06,
      "loss": 0.0149,
      "step": 2127440
    },
    {
      "epoch": 3.4816349508716113,
      "grad_norm": 0.135198175907135,
      "learning_rate": 3.0567397850596e-06,
      "loss": 0.0067,
      "step": 2127460
    },
    {
      "epoch": 3.481667681310265,
      "grad_norm": 0.16809597611427307,
      "learning_rate": 3.0566738928460826e-06,
      "loss": 0.0128,
      "step": 2127480
    },
    {
      "epoch": 3.481700411748918,
      "grad_norm": 0.22024133801460266,
      "learning_rate": 3.0566080006325654e-06,
      "loss": 0.0062,
      "step": 2127500
    },
    {
      "epoch": 3.4817331421875717,
      "grad_norm": 0.3617832064628601,
      "learning_rate": 3.056542108419048e-06,
      "loss": 0.0124,
      "step": 2127520
    },
    {
      "epoch": 3.481765872626225,
      "grad_norm": 0.696219265460968,
      "learning_rate": 3.0564762162055313e-06,
      "loss": 0.01,
      "step": 2127540
    },
    {
      "epoch": 3.4817986030648784,
      "grad_norm": 0.21614103019237518,
      "learning_rate": 3.056410323992014e-06,
      "loss": 0.0065,
      "step": 2127560
    },
    {
      "epoch": 3.4818313335035316,
      "grad_norm": 0.3778081238269806,
      "learning_rate": 3.0563444317784967e-06,
      "loss": 0.0116,
      "step": 2127580
    },
    {
      "epoch": 3.4818640639421847,
      "grad_norm": 0.05783563107252121,
      "learning_rate": 3.0562785395649795e-06,
      "loss": 0.0114,
      "step": 2127600
    },
    {
      "epoch": 3.4818967943808383,
      "grad_norm": 0.24791571497917175,
      "learning_rate": 3.0562126473514627e-06,
      "loss": 0.0095,
      "step": 2127620
    },
    {
      "epoch": 3.4819295248194915,
      "grad_norm": 0.2569483518600464,
      "learning_rate": 3.0561467551379454e-06,
      "loss": 0.0098,
      "step": 2127640
    },
    {
      "epoch": 3.481962255258145,
      "grad_norm": 0.27186650037765503,
      "learning_rate": 3.0560808629244286e-06,
      "loss": 0.0137,
      "step": 2127660
    },
    {
      "epoch": 3.481994985696798,
      "grad_norm": 0.1510484516620636,
      "learning_rate": 3.0560149707109117e-06,
      "loss": 0.0094,
      "step": 2127680
    },
    {
      "epoch": 3.482027716135452,
      "grad_norm": 0.1346745789051056,
      "learning_rate": 3.0559490784973945e-06,
      "loss": 0.0106,
      "step": 2127700
    },
    {
      "epoch": 3.482060446574105,
      "grad_norm": 0.19196298718452454,
      "learning_rate": 3.055883186283877e-06,
      "loss": 0.012,
      "step": 2127720
    },
    {
      "epoch": 3.482093177012758,
      "grad_norm": 0.5590471029281616,
      "learning_rate": 3.05581729407036e-06,
      "loss": 0.0119,
      "step": 2127740
    },
    {
      "epoch": 3.4821259074514117,
      "grad_norm": 0.2532319128513336,
      "learning_rate": 3.055751401856843e-06,
      "loss": 0.0079,
      "step": 2127760
    },
    {
      "epoch": 3.482158637890065,
      "grad_norm": 0.11265188455581665,
      "learning_rate": 3.055685509643326e-06,
      "loss": 0.0183,
      "step": 2127780
    },
    {
      "epoch": 3.4821913683287185,
      "grad_norm": 0.18852271139621735,
      "learning_rate": 3.0556196174298086e-06,
      "loss": 0.011,
      "step": 2127800
    },
    {
      "epoch": 3.4822240987673716,
      "grad_norm": 0.12611320614814758,
      "learning_rate": 3.0555537252162913e-06,
      "loss": 0.0069,
      "step": 2127820
    },
    {
      "epoch": 3.482256829206025,
      "grad_norm": 0.40149781107902527,
      "learning_rate": 3.0554878330027745e-06,
      "loss": 0.0145,
      "step": 2127840
    },
    {
      "epoch": 3.4822895596446783,
      "grad_norm": 0.0730118453502655,
      "learning_rate": 3.0554219407892572e-06,
      "loss": 0.0077,
      "step": 2127860
    },
    {
      "epoch": 3.4823222900833315,
      "grad_norm": 0.20691536366939545,
      "learning_rate": 3.05535604857574e-06,
      "loss": 0.0118,
      "step": 2127880
    },
    {
      "epoch": 3.482355020521985,
      "grad_norm": 0.12492239475250244,
      "learning_rate": 3.0552901563622227e-06,
      "loss": 0.0087,
      "step": 2127900
    },
    {
      "epoch": 3.4823877509606382,
      "grad_norm": 0.3816177546977997,
      "learning_rate": 3.055224264148706e-06,
      "loss": 0.0134,
      "step": 2127920
    },
    {
      "epoch": 3.482420481399292,
      "grad_norm": 0.15811480581760406,
      "learning_rate": 3.0551583719351886e-06,
      "loss": 0.0165,
      "step": 2127940
    },
    {
      "epoch": 3.482453211837945,
      "grad_norm": 0.13875322043895721,
      "learning_rate": 3.0550924797216713e-06,
      "loss": 0.0106,
      "step": 2127960
    },
    {
      "epoch": 3.4824859422765986,
      "grad_norm": 0.4101105332374573,
      "learning_rate": 3.055026587508154e-06,
      "loss": 0.0117,
      "step": 2127980
    },
    {
      "epoch": 3.4825186727152517,
      "grad_norm": 0.1636486053466797,
      "learning_rate": 3.054960695294637e-06,
      "loss": 0.0074,
      "step": 2128000
    },
    {
      "epoch": 3.482551403153905,
      "grad_norm": 0.10791483521461487,
      "learning_rate": 3.0548948030811204e-06,
      "loss": 0.0156,
      "step": 2128020
    },
    {
      "epoch": 3.4825841335925585,
      "grad_norm": 0.1940537542104721,
      "learning_rate": 3.054828910867603e-06,
      "loss": 0.0192,
      "step": 2128040
    },
    {
      "epoch": 3.4826168640312116,
      "grad_norm": 0.06984540075063705,
      "learning_rate": 3.054763018654086e-06,
      "loss": 0.0129,
      "step": 2128060
    },
    {
      "epoch": 3.482649594469865,
      "grad_norm": 0.3046874403953552,
      "learning_rate": 3.054697126440569e-06,
      "loss": 0.0136,
      "step": 2128080
    },
    {
      "epoch": 3.4826823249085184,
      "grad_norm": 0.2930479645729065,
      "learning_rate": 3.054631234227052e-06,
      "loss": 0.0112,
      "step": 2128100
    },
    {
      "epoch": 3.482715055347172,
      "grad_norm": 0.24389119446277618,
      "learning_rate": 3.0545653420135345e-06,
      "loss": 0.0075,
      "step": 2128120
    },
    {
      "epoch": 3.482747785785825,
      "grad_norm": 0.23964104056358337,
      "learning_rate": 3.0544994498000173e-06,
      "loss": 0.0158,
      "step": 2128140
    },
    {
      "epoch": 3.4827805162244783,
      "grad_norm": 0.6215358376502991,
      "learning_rate": 3.0544335575865004e-06,
      "loss": 0.0104,
      "step": 2128160
    },
    {
      "epoch": 3.482813246663132,
      "grad_norm": 0.34644201397895813,
      "learning_rate": 3.054367665372983e-06,
      "loss": 0.0118,
      "step": 2128180
    },
    {
      "epoch": 3.482845977101785,
      "grad_norm": 1.2577024698257446,
      "learning_rate": 3.054301773159466e-06,
      "loss": 0.0121,
      "step": 2128200
    },
    {
      "epoch": 3.4828787075404386,
      "grad_norm": 0.9523735642433167,
      "learning_rate": 3.0542358809459487e-06,
      "loss": 0.0153,
      "step": 2128220
    },
    {
      "epoch": 3.4829114379790918,
      "grad_norm": 0.506742000579834,
      "learning_rate": 3.054169988732432e-06,
      "loss": 0.011,
      "step": 2128240
    },
    {
      "epoch": 3.4829441684177453,
      "grad_norm": 0.20694126188755035,
      "learning_rate": 3.0541040965189146e-06,
      "loss": 0.0146,
      "step": 2128260
    },
    {
      "epoch": 3.4829768988563985,
      "grad_norm": 0.2552187144756317,
      "learning_rate": 3.0540382043053973e-06,
      "loss": 0.0088,
      "step": 2128280
    },
    {
      "epoch": 3.4830096292950516,
      "grad_norm": 0.33111268281936646,
      "learning_rate": 3.05397231209188e-06,
      "loss": 0.0133,
      "step": 2128300
    },
    {
      "epoch": 3.4830423597337052,
      "grad_norm": 0.8192086219787598,
      "learning_rate": 3.053906419878363e-06,
      "loss": 0.0198,
      "step": 2128320
    },
    {
      "epoch": 3.4830750901723584,
      "grad_norm": 0.17071488499641418,
      "learning_rate": 3.053840527664846e-06,
      "loss": 0.0118,
      "step": 2128340
    },
    {
      "epoch": 3.483107820611012,
      "grad_norm": 0.4768860936164856,
      "learning_rate": 3.053774635451329e-06,
      "loss": 0.0169,
      "step": 2128360
    },
    {
      "epoch": 3.483140551049665,
      "grad_norm": 0.2703460454940796,
      "learning_rate": 3.0537087432378123e-06,
      "loss": 0.011,
      "step": 2128380
    },
    {
      "epoch": 3.4831732814883187,
      "grad_norm": 0.5158213973045349,
      "learning_rate": 3.053642851024295e-06,
      "loss": 0.0133,
      "step": 2128400
    },
    {
      "epoch": 3.483206011926972,
      "grad_norm": 0.2260330766439438,
      "learning_rate": 3.0535769588107777e-06,
      "loss": 0.0097,
      "step": 2128420
    },
    {
      "epoch": 3.483238742365625,
      "grad_norm": 0.13878712058067322,
      "learning_rate": 3.0535110665972605e-06,
      "loss": 0.013,
      "step": 2128440
    },
    {
      "epoch": 3.4832714728042786,
      "grad_norm": 0.14789365231990814,
      "learning_rate": 3.0534451743837432e-06,
      "loss": 0.0108,
      "step": 2128460
    },
    {
      "epoch": 3.4833042032429318,
      "grad_norm": 0.4102913439273834,
      "learning_rate": 3.0533792821702264e-06,
      "loss": 0.01,
      "step": 2128480
    },
    {
      "epoch": 3.4833369336815854,
      "grad_norm": 0.32510101795196533,
      "learning_rate": 3.053313389956709e-06,
      "loss": 0.0116,
      "step": 2128500
    },
    {
      "epoch": 3.4833696641202385,
      "grad_norm": 0.3190816640853882,
      "learning_rate": 3.053247497743192e-06,
      "loss": 0.0085,
      "step": 2128520
    },
    {
      "epoch": 3.483402394558892,
      "grad_norm": 0.2553003132343292,
      "learning_rate": 3.0531816055296746e-06,
      "loss": 0.0084,
      "step": 2128540
    },
    {
      "epoch": 3.4834351249975453,
      "grad_norm": 0.5280604362487793,
      "learning_rate": 3.0531157133161578e-06,
      "loss": 0.0085,
      "step": 2128560
    },
    {
      "epoch": 3.4834678554361984,
      "grad_norm": 0.13917091488838196,
      "learning_rate": 3.0530498211026405e-06,
      "loss": 0.0127,
      "step": 2128580
    },
    {
      "epoch": 3.483500585874852,
      "grad_norm": 0.19986064732074738,
      "learning_rate": 3.0529839288891233e-06,
      "loss": 0.0124,
      "step": 2128600
    },
    {
      "epoch": 3.483533316313505,
      "grad_norm": 0.12984564900398254,
      "learning_rate": 3.052918036675606e-06,
      "loss": 0.0082,
      "step": 2128620
    },
    {
      "epoch": 3.4835660467521588,
      "grad_norm": 0.47119781374931335,
      "learning_rate": 3.052852144462089e-06,
      "loss": 0.0194,
      "step": 2128640
    },
    {
      "epoch": 3.483598777190812,
      "grad_norm": 0.21282199025154114,
      "learning_rate": 3.052786252248572e-06,
      "loss": 0.0076,
      "step": 2128660
    },
    {
      "epoch": 3.4836315076294655,
      "grad_norm": 0.3088679313659668,
      "learning_rate": 3.0527203600350546e-06,
      "loss": 0.006,
      "step": 2128680
    },
    {
      "epoch": 3.4836642380681186,
      "grad_norm": 0.16847141087055206,
      "learning_rate": 3.0526544678215374e-06,
      "loss": 0.0112,
      "step": 2128700
    },
    {
      "epoch": 3.483696968506772,
      "grad_norm": 0.2874113619327545,
      "learning_rate": 3.052588575608021e-06,
      "loss": 0.0143,
      "step": 2128720
    },
    {
      "epoch": 3.4837296989454254,
      "grad_norm": 0.22483104467391968,
      "learning_rate": 3.0525226833945037e-06,
      "loss": 0.0148,
      "step": 2128740
    },
    {
      "epoch": 3.4837624293840785,
      "grad_norm": 0.17665134370326996,
      "learning_rate": 3.0524567911809864e-06,
      "loss": 0.0109,
      "step": 2128760
    },
    {
      "epoch": 3.483795159822732,
      "grad_norm": 0.12206326425075531,
      "learning_rate": 3.0523908989674696e-06,
      "loss": 0.008,
      "step": 2128780
    },
    {
      "epoch": 3.4838278902613853,
      "grad_norm": 0.15009412169456482,
      "learning_rate": 3.0523250067539523e-06,
      "loss": 0.0079,
      "step": 2128800
    },
    {
      "epoch": 3.4838606207000384,
      "grad_norm": 0.2447611391544342,
      "learning_rate": 3.052259114540435e-06,
      "loss": 0.01,
      "step": 2128820
    },
    {
      "epoch": 3.483893351138692,
      "grad_norm": 0.2402113974094391,
      "learning_rate": 3.052193222326918e-06,
      "loss": 0.0124,
      "step": 2128840
    },
    {
      "epoch": 3.483926081577345,
      "grad_norm": 0.32316628098487854,
      "learning_rate": 3.052127330113401e-06,
      "loss": 0.0116,
      "step": 2128860
    },
    {
      "epoch": 3.4839588120159988,
      "grad_norm": 0.28925031423568726,
      "learning_rate": 3.0520614378998837e-06,
      "loss": 0.0138,
      "step": 2128880
    },
    {
      "epoch": 3.483991542454652,
      "grad_norm": 0.9681256413459778,
      "learning_rate": 3.0519955456863665e-06,
      "loss": 0.0131,
      "step": 2128900
    },
    {
      "epoch": 3.484024272893305,
      "grad_norm": 0.8309528231620789,
      "learning_rate": 3.051929653472849e-06,
      "loss": 0.0135,
      "step": 2128920
    },
    {
      "epoch": 3.4840570033319587,
      "grad_norm": 0.36606863141059875,
      "learning_rate": 3.051863761259332e-06,
      "loss": 0.0099,
      "step": 2128940
    },
    {
      "epoch": 3.484089733770612,
      "grad_norm": 0.2897019386291504,
      "learning_rate": 3.051797869045815e-06,
      "loss": 0.0083,
      "step": 2128960
    },
    {
      "epoch": 3.4841224642092654,
      "grad_norm": 0.30050209164619446,
      "learning_rate": 3.051731976832298e-06,
      "loss": 0.0116,
      "step": 2128980
    },
    {
      "epoch": 3.4841551946479186,
      "grad_norm": 0.2749859392642975,
      "learning_rate": 3.0516660846187806e-06,
      "loss": 0.0104,
      "step": 2129000
    },
    {
      "epoch": 3.484187925086572,
      "grad_norm": 0.4363311529159546,
      "learning_rate": 3.0516001924052633e-06,
      "loss": 0.0134,
      "step": 2129020
    },
    {
      "epoch": 3.4842206555252253,
      "grad_norm": 0.3875986635684967,
      "learning_rate": 3.0515343001917465e-06,
      "loss": 0.0159,
      "step": 2129040
    },
    {
      "epoch": 3.4842533859638785,
      "grad_norm": 0.43549978733062744,
      "learning_rate": 3.0514684079782292e-06,
      "loss": 0.0092,
      "step": 2129060
    },
    {
      "epoch": 3.484286116402532,
      "grad_norm": 0.25347068905830383,
      "learning_rate": 3.0514025157647124e-06,
      "loss": 0.0112,
      "step": 2129080
    },
    {
      "epoch": 3.484318846841185,
      "grad_norm": 0.6730709671974182,
      "learning_rate": 3.0513366235511956e-06,
      "loss": 0.0108,
      "step": 2129100
    },
    {
      "epoch": 3.484351577279839,
      "grad_norm": 0.19864439964294434,
      "learning_rate": 3.0512707313376783e-06,
      "loss": 0.0161,
      "step": 2129120
    },
    {
      "epoch": 3.484384307718492,
      "grad_norm": 0.07378305494785309,
      "learning_rate": 3.051204839124161e-06,
      "loss": 0.0114,
      "step": 2129140
    },
    {
      "epoch": 3.4844170381571455,
      "grad_norm": 0.12497606873512268,
      "learning_rate": 3.0511389469106438e-06,
      "loss": 0.0128,
      "step": 2129160
    },
    {
      "epoch": 3.4844497685957987,
      "grad_norm": 0.4272647202014923,
      "learning_rate": 3.051073054697127e-06,
      "loss": 0.0108,
      "step": 2129180
    },
    {
      "epoch": 3.484482499034452,
      "grad_norm": 0.7108742594718933,
      "learning_rate": 3.0510071624836097e-06,
      "loss": 0.01,
      "step": 2129200
    },
    {
      "epoch": 3.4845152294731054,
      "grad_norm": 0.17922605574131012,
      "learning_rate": 3.0509412702700924e-06,
      "loss": 0.0127,
      "step": 2129220
    },
    {
      "epoch": 3.4845479599117586,
      "grad_norm": 0.15872354805469513,
      "learning_rate": 3.050875378056575e-06,
      "loss": 0.0092,
      "step": 2129240
    },
    {
      "epoch": 3.484580690350412,
      "grad_norm": 0.1374238282442093,
      "learning_rate": 3.0508094858430583e-06,
      "loss": 0.0114,
      "step": 2129260
    },
    {
      "epoch": 3.4846134207890653,
      "grad_norm": 0.17523738741874695,
      "learning_rate": 3.050743593629541e-06,
      "loss": 0.0106,
      "step": 2129280
    },
    {
      "epoch": 3.484646151227719,
      "grad_norm": 0.19140511751174927,
      "learning_rate": 3.050677701416024e-06,
      "loss": 0.0155,
      "step": 2129300
    },
    {
      "epoch": 3.484678881666372,
      "grad_norm": 0.49536052346229553,
      "learning_rate": 3.0506118092025065e-06,
      "loss": 0.0199,
      "step": 2129320
    },
    {
      "epoch": 3.4847116121050252,
      "grad_norm": 0.2243032306432724,
      "learning_rate": 3.0505459169889897e-06,
      "loss": 0.0094,
      "step": 2129340
    },
    {
      "epoch": 3.484744342543679,
      "grad_norm": 0.5561308860778809,
      "learning_rate": 3.0504800247754724e-06,
      "loss": 0.0107,
      "step": 2129360
    },
    {
      "epoch": 3.484777072982332,
      "grad_norm": 0.1936236023902893,
      "learning_rate": 3.050414132561955e-06,
      "loss": 0.0097,
      "step": 2129380
    },
    {
      "epoch": 3.4848098034209856,
      "grad_norm": 0.5354714393615723,
      "learning_rate": 3.050348240348438e-06,
      "loss": 0.0116,
      "step": 2129400
    },
    {
      "epoch": 3.4848425338596387,
      "grad_norm": 0.1185910627245903,
      "learning_rate": 3.0502823481349215e-06,
      "loss": 0.0169,
      "step": 2129420
    },
    {
      "epoch": 3.4848752642982923,
      "grad_norm": 0.2395022064447403,
      "learning_rate": 3.0502164559214043e-06,
      "loss": 0.0094,
      "step": 2129440
    },
    {
      "epoch": 3.4849079947369455,
      "grad_norm": 0.32316580414772034,
      "learning_rate": 3.050150563707887e-06,
      "loss": 0.0178,
      "step": 2129460
    },
    {
      "epoch": 3.4849407251755986,
      "grad_norm": 0.2028270661830902,
      "learning_rate": 3.0500846714943697e-06,
      "loss": 0.0085,
      "step": 2129480
    },
    {
      "epoch": 3.484973455614252,
      "grad_norm": 0.11791307479143143,
      "learning_rate": 3.050018779280853e-06,
      "loss": 0.0136,
      "step": 2129500
    },
    {
      "epoch": 3.4850061860529054,
      "grad_norm": 0.3712792694568634,
      "learning_rate": 3.0499528870673356e-06,
      "loss": 0.0127,
      "step": 2129520
    },
    {
      "epoch": 3.485038916491559,
      "grad_norm": 0.25298869609832764,
      "learning_rate": 3.0498869948538184e-06,
      "loss": 0.0109,
      "step": 2129540
    },
    {
      "epoch": 3.485071646930212,
      "grad_norm": 0.13809087872505188,
      "learning_rate": 3.049821102640301e-06,
      "loss": 0.0112,
      "step": 2129560
    },
    {
      "epoch": 3.4851043773688657,
      "grad_norm": 0.1973116546869278,
      "learning_rate": 3.0497552104267843e-06,
      "loss": 0.0153,
      "step": 2129580
    },
    {
      "epoch": 3.485137107807519,
      "grad_norm": 0.3779562711715698,
      "learning_rate": 3.049689318213267e-06,
      "loss": 0.01,
      "step": 2129600
    },
    {
      "epoch": 3.485169838246172,
      "grad_norm": 0.18863502144813538,
      "learning_rate": 3.0496234259997498e-06,
      "loss": 0.0115,
      "step": 2129620
    },
    {
      "epoch": 3.4852025686848256,
      "grad_norm": 0.21708330512046814,
      "learning_rate": 3.0495575337862325e-06,
      "loss": 0.0068,
      "step": 2129640
    },
    {
      "epoch": 3.4852352991234787,
      "grad_norm": 0.1715201586484909,
      "learning_rate": 3.0494916415727157e-06,
      "loss": 0.0111,
      "step": 2129660
    },
    {
      "epoch": 3.4852680295621323,
      "grad_norm": 0.2004500776529312,
      "learning_rate": 3.0494257493591984e-06,
      "loss": 0.007,
      "step": 2129680
    },
    {
      "epoch": 3.4853007600007855,
      "grad_norm": 0.08400332927703857,
      "learning_rate": 3.049359857145681e-06,
      "loss": 0.0135,
      "step": 2129700
    },
    {
      "epoch": 3.485333490439439,
      "grad_norm": 0.19214171171188354,
      "learning_rate": 3.049293964932164e-06,
      "loss": 0.0139,
      "step": 2129720
    },
    {
      "epoch": 3.4853662208780922,
      "grad_norm": 0.19881950318813324,
      "learning_rate": 3.049228072718647e-06,
      "loss": 0.0175,
      "step": 2129740
    },
    {
      "epoch": 3.4853989513167454,
      "grad_norm": 0.13341696560382843,
      "learning_rate": 3.0491621805051298e-06,
      "loss": 0.0074,
      "step": 2129760
    },
    {
      "epoch": 3.485431681755399,
      "grad_norm": 0.30096444487571716,
      "learning_rate": 3.049096288291613e-06,
      "loss": 0.0132,
      "step": 2129780
    },
    {
      "epoch": 3.485464412194052,
      "grad_norm": 0.1451912671327591,
      "learning_rate": 3.049030396078096e-06,
      "loss": 0.0106,
      "step": 2129800
    },
    {
      "epoch": 3.4854971426327057,
      "grad_norm": 0.36277711391448975,
      "learning_rate": 3.048964503864579e-06,
      "loss": 0.0113,
      "step": 2129820
    },
    {
      "epoch": 3.485529873071359,
      "grad_norm": 0.08891545236110687,
      "learning_rate": 3.0488986116510616e-06,
      "loss": 0.0111,
      "step": 2129840
    },
    {
      "epoch": 3.4855626035100125,
      "grad_norm": 0.20042946934700012,
      "learning_rate": 3.0488327194375443e-06,
      "loss": 0.0089,
      "step": 2129860
    },
    {
      "epoch": 3.4855953339486656,
      "grad_norm": 0.3765285015106201,
      "learning_rate": 3.0487668272240275e-06,
      "loss": 0.0108,
      "step": 2129880
    },
    {
      "epoch": 3.4856280643873188,
      "grad_norm": 0.40987539291381836,
      "learning_rate": 3.0487009350105102e-06,
      "loss": 0.0098,
      "step": 2129900
    },
    {
      "epoch": 3.4856607948259724,
      "grad_norm": 0.09814944863319397,
      "learning_rate": 3.048635042796993e-06,
      "loss": 0.0151,
      "step": 2129920
    },
    {
      "epoch": 3.4856935252646255,
      "grad_norm": 0.3618564009666443,
      "learning_rate": 3.0485691505834757e-06,
      "loss": 0.0112,
      "step": 2129940
    },
    {
      "epoch": 3.485726255703279,
      "grad_norm": 0.17767322063446045,
      "learning_rate": 3.0485032583699584e-06,
      "loss": 0.0072,
      "step": 2129960
    },
    {
      "epoch": 3.4857589861419322,
      "grad_norm": 0.1738911271095276,
      "learning_rate": 3.0484373661564416e-06,
      "loss": 0.0091,
      "step": 2129980
    },
    {
      "epoch": 3.485791716580586,
      "grad_norm": 0.12499936670064926,
      "learning_rate": 3.0483714739429244e-06,
      "loss": 0.0084,
      "step": 2130000
    },
    {
      "epoch": 3.485824447019239,
      "grad_norm": 0.4033052325248718,
      "learning_rate": 3.048305581729407e-06,
      "loss": 0.0079,
      "step": 2130020
    },
    {
      "epoch": 3.485857177457892,
      "grad_norm": 0.1639333814382553,
      "learning_rate": 3.04823968951589e-06,
      "loss": 0.0098,
      "step": 2130040
    },
    {
      "epoch": 3.4858899078965457,
      "grad_norm": 0.6501723527908325,
      "learning_rate": 3.048173797302373e-06,
      "loss": 0.0096,
      "step": 2130060
    },
    {
      "epoch": 3.485922638335199,
      "grad_norm": 0.14641599357128143,
      "learning_rate": 3.0481079050888557e-06,
      "loss": 0.0076,
      "step": 2130080
    },
    {
      "epoch": 3.4859553687738525,
      "grad_norm": 0.14382654428482056,
      "learning_rate": 3.0480420128753385e-06,
      "loss": 0.0154,
      "step": 2130100
    },
    {
      "epoch": 3.4859880992125056,
      "grad_norm": 0.24932777881622314,
      "learning_rate": 3.047976120661822e-06,
      "loss": 0.0082,
      "step": 2130120
    },
    {
      "epoch": 3.4860208296511592,
      "grad_norm": 0.3662807047367096,
      "learning_rate": 3.047910228448305e-06,
      "loss": 0.0096,
      "step": 2130140
    },
    {
      "epoch": 3.4860535600898124,
      "grad_norm": 0.07824044674634933,
      "learning_rate": 3.0478443362347875e-06,
      "loss": 0.0099,
      "step": 2130160
    },
    {
      "epoch": 3.4860862905284655,
      "grad_norm": 0.6819062232971191,
      "learning_rate": 3.0477784440212703e-06,
      "loss": 0.0105,
      "step": 2130180
    },
    {
      "epoch": 3.486119020967119,
      "grad_norm": 0.33602380752563477,
      "learning_rate": 3.0477125518077534e-06,
      "loss": 0.0071,
      "step": 2130200
    },
    {
      "epoch": 3.4861517514057723,
      "grad_norm": 0.5134035348892212,
      "learning_rate": 3.047646659594236e-06,
      "loss": 0.0135,
      "step": 2130220
    },
    {
      "epoch": 3.486184481844426,
      "grad_norm": 0.09296830743551254,
      "learning_rate": 3.047580767380719e-06,
      "loss": 0.0122,
      "step": 2130240
    },
    {
      "epoch": 3.486217212283079,
      "grad_norm": 0.10726165771484375,
      "learning_rate": 3.0475148751672017e-06,
      "loss": 0.0105,
      "step": 2130260
    },
    {
      "epoch": 3.486249942721732,
      "grad_norm": 0.5774154663085938,
      "learning_rate": 3.047448982953685e-06,
      "loss": 0.009,
      "step": 2130280
    },
    {
      "epoch": 3.4862826731603858,
      "grad_norm": 0.11493940651416779,
      "learning_rate": 3.0473830907401676e-06,
      "loss": 0.01,
      "step": 2130300
    },
    {
      "epoch": 3.486315403599039,
      "grad_norm": 0.28773054480552673,
      "learning_rate": 3.0473171985266503e-06,
      "loss": 0.0114,
      "step": 2130320
    },
    {
      "epoch": 3.4863481340376925,
      "grad_norm": 0.11720875650644302,
      "learning_rate": 3.047251306313133e-06,
      "loss": 0.012,
      "step": 2130340
    },
    {
      "epoch": 3.4863808644763457,
      "grad_norm": 0.18059104681015015,
      "learning_rate": 3.0471854140996158e-06,
      "loss": 0.0176,
      "step": 2130360
    },
    {
      "epoch": 3.486413594914999,
      "grad_norm": 0.13434705138206482,
      "learning_rate": 3.047119521886099e-06,
      "loss": 0.0097,
      "step": 2130380
    },
    {
      "epoch": 3.4864463253536524,
      "grad_norm": 0.5785455703735352,
      "learning_rate": 3.0470536296725817e-06,
      "loss": 0.0094,
      "step": 2130400
    },
    {
      "epoch": 3.4864790557923055,
      "grad_norm": 0.6827729940414429,
      "learning_rate": 3.0469877374590644e-06,
      "loss": 0.0099,
      "step": 2130420
    },
    {
      "epoch": 3.486511786230959,
      "grad_norm": 0.4171418249607086,
      "learning_rate": 3.046921845245547e-06,
      "loss": 0.0158,
      "step": 2130440
    },
    {
      "epoch": 3.4865445166696123,
      "grad_norm": 0.1157318651676178,
      "learning_rate": 3.0468559530320303e-06,
      "loss": 0.0113,
      "step": 2130460
    },
    {
      "epoch": 3.486577247108266,
      "grad_norm": 0.6092516183853149,
      "learning_rate": 3.0467900608185135e-06,
      "loss": 0.0149,
      "step": 2130480
    },
    {
      "epoch": 3.486609977546919,
      "grad_norm": 0.04327670484781265,
      "learning_rate": 3.0467241686049962e-06,
      "loss": 0.0213,
      "step": 2130500
    },
    {
      "epoch": 3.486642707985572,
      "grad_norm": 0.5974815487861633,
      "learning_rate": 3.0466582763914794e-06,
      "loss": 0.0149,
      "step": 2130520
    },
    {
      "epoch": 3.486675438424226,
      "grad_norm": 0.18822288513183594,
      "learning_rate": 3.046592384177962e-06,
      "loss": 0.0098,
      "step": 2130540
    },
    {
      "epoch": 3.486708168862879,
      "grad_norm": 0.09988870471715927,
      "learning_rate": 3.046526491964445e-06,
      "loss": 0.0117,
      "step": 2130560
    },
    {
      "epoch": 3.4867408993015325,
      "grad_norm": 0.4451577663421631,
      "learning_rate": 3.0464605997509276e-06,
      "loss": 0.0096,
      "step": 2130580
    },
    {
      "epoch": 3.4867736297401857,
      "grad_norm": 0.28568848967552185,
      "learning_rate": 3.0463947075374108e-06,
      "loss": 0.011,
      "step": 2130600
    },
    {
      "epoch": 3.4868063601788393,
      "grad_norm": 0.26768624782562256,
      "learning_rate": 3.0463288153238935e-06,
      "loss": 0.014,
      "step": 2130620
    },
    {
      "epoch": 3.4868390906174924,
      "grad_norm": 0.03600630164146423,
      "learning_rate": 3.0462629231103763e-06,
      "loss": 0.0101,
      "step": 2130640
    },
    {
      "epoch": 3.4868718210561456,
      "grad_norm": 0.5719957947731018,
      "learning_rate": 3.046197030896859e-06,
      "loss": 0.0087,
      "step": 2130660
    },
    {
      "epoch": 3.486904551494799,
      "grad_norm": 0.3815840184688568,
      "learning_rate": 3.046131138683342e-06,
      "loss": 0.0078,
      "step": 2130680
    },
    {
      "epoch": 3.4869372819334523,
      "grad_norm": 0.4890308976173401,
      "learning_rate": 3.046065246469825e-06,
      "loss": 0.0099,
      "step": 2130700
    },
    {
      "epoch": 3.486970012372106,
      "grad_norm": 0.11328508704900742,
      "learning_rate": 3.0459993542563076e-06,
      "loss": 0.0104,
      "step": 2130720
    },
    {
      "epoch": 3.487002742810759,
      "grad_norm": 0.3812759220600128,
      "learning_rate": 3.0459334620427904e-06,
      "loss": 0.0084,
      "step": 2130740
    },
    {
      "epoch": 3.4870354732494127,
      "grad_norm": 0.14067351818084717,
      "learning_rate": 3.0458675698292735e-06,
      "loss": 0.0133,
      "step": 2130760
    },
    {
      "epoch": 3.487068203688066,
      "grad_norm": 0.411653071641922,
      "learning_rate": 3.0458016776157563e-06,
      "loss": 0.0165,
      "step": 2130780
    },
    {
      "epoch": 3.487100934126719,
      "grad_norm": 0.26246196031570435,
      "learning_rate": 3.045735785402239e-06,
      "loss": 0.0139,
      "step": 2130800
    },
    {
      "epoch": 3.4871336645653725,
      "grad_norm": 0.18873342871665955,
      "learning_rate": 3.0456698931887226e-06,
      "loss": 0.0131,
      "step": 2130820
    },
    {
      "epoch": 3.4871663950040257,
      "grad_norm": 0.19473136961460114,
      "learning_rate": 3.0456040009752053e-06,
      "loss": 0.0097,
      "step": 2130840
    },
    {
      "epoch": 3.4871991254426793,
      "grad_norm": 0.2080535590648651,
      "learning_rate": 3.045538108761688e-06,
      "loss": 0.0071,
      "step": 2130860
    },
    {
      "epoch": 3.4872318558813324,
      "grad_norm": 0.08931118249893188,
      "learning_rate": 3.045472216548171e-06,
      "loss": 0.0137,
      "step": 2130880
    },
    {
      "epoch": 3.487264586319986,
      "grad_norm": 0.18152011930942535,
      "learning_rate": 3.0454063243346536e-06,
      "loss": 0.0132,
      "step": 2130900
    },
    {
      "epoch": 3.487297316758639,
      "grad_norm": 0.29062336683273315,
      "learning_rate": 3.0453404321211367e-06,
      "loss": 0.0138,
      "step": 2130920
    },
    {
      "epoch": 3.4873300471972923,
      "grad_norm": 0.9573231339454651,
      "learning_rate": 3.0452745399076195e-06,
      "loss": 0.0143,
      "step": 2130940
    },
    {
      "epoch": 3.487362777635946,
      "grad_norm": 0.2996886074542999,
      "learning_rate": 3.0452086476941022e-06,
      "loss": 0.0155,
      "step": 2130960
    },
    {
      "epoch": 3.487395508074599,
      "grad_norm": 0.4367382228374481,
      "learning_rate": 3.045142755480585e-06,
      "loss": 0.0105,
      "step": 2130980
    },
    {
      "epoch": 3.4874282385132527,
      "grad_norm": 0.3097831606864929,
      "learning_rate": 3.045076863267068e-06,
      "loss": 0.0101,
      "step": 2131000
    },
    {
      "epoch": 3.487460968951906,
      "grad_norm": 0.2779821455478668,
      "learning_rate": 3.045010971053551e-06,
      "loss": 0.0089,
      "step": 2131020
    },
    {
      "epoch": 3.4874936993905594,
      "grad_norm": 0.3969862163066864,
      "learning_rate": 3.0449450788400336e-06,
      "loss": 0.0093,
      "step": 2131040
    },
    {
      "epoch": 3.4875264298292126,
      "grad_norm": 0.2213180512189865,
      "learning_rate": 3.0448791866265163e-06,
      "loss": 0.0112,
      "step": 2131060
    },
    {
      "epoch": 3.4875591602678657,
      "grad_norm": 0.2852611839771271,
      "learning_rate": 3.0448132944129995e-06,
      "loss": 0.0152,
      "step": 2131080
    },
    {
      "epoch": 3.4875918907065193,
      "grad_norm": 0.12344608455896378,
      "learning_rate": 3.0447474021994822e-06,
      "loss": 0.012,
      "step": 2131100
    },
    {
      "epoch": 3.4876246211451725,
      "grad_norm": 0.27978941798210144,
      "learning_rate": 3.044681509985965e-06,
      "loss": 0.0118,
      "step": 2131120
    },
    {
      "epoch": 3.487657351583826,
      "grad_norm": 0.13354797661304474,
      "learning_rate": 3.0446156177724477e-06,
      "loss": 0.0126,
      "step": 2131140
    },
    {
      "epoch": 3.487690082022479,
      "grad_norm": 0.5318434834480286,
      "learning_rate": 3.044549725558931e-06,
      "loss": 0.0125,
      "step": 2131160
    },
    {
      "epoch": 3.487722812461133,
      "grad_norm": 0.06841719150543213,
      "learning_rate": 3.044483833345414e-06,
      "loss": 0.0194,
      "step": 2131180
    },
    {
      "epoch": 3.487755542899786,
      "grad_norm": 0.28773918747901917,
      "learning_rate": 3.0444179411318968e-06,
      "loss": 0.0068,
      "step": 2131200
    },
    {
      "epoch": 3.487788273338439,
      "grad_norm": 0.407868355512619,
      "learning_rate": 3.04435204891838e-06,
      "loss": 0.0091,
      "step": 2131220
    },
    {
      "epoch": 3.4878210037770927,
      "grad_norm": 0.03271576017141342,
      "learning_rate": 3.0442861567048627e-06,
      "loss": 0.0116,
      "step": 2131240
    },
    {
      "epoch": 3.487853734215746,
      "grad_norm": 0.1893116682767868,
      "learning_rate": 3.0442202644913454e-06,
      "loss": 0.0155,
      "step": 2131260
    },
    {
      "epoch": 3.4878864646543994,
      "grad_norm": 0.10820449888706207,
      "learning_rate": 3.044154372277828e-06,
      "loss": 0.0093,
      "step": 2131280
    },
    {
      "epoch": 3.4879191950930526,
      "grad_norm": 0.37692126631736755,
      "learning_rate": 3.0440884800643113e-06,
      "loss": 0.014,
      "step": 2131300
    },
    {
      "epoch": 3.487951925531706,
      "grad_norm": 0.4632709324359894,
      "learning_rate": 3.044022587850794e-06,
      "loss": 0.0095,
      "step": 2131320
    },
    {
      "epoch": 3.4879846559703593,
      "grad_norm": 0.2441602498292923,
      "learning_rate": 3.043956695637277e-06,
      "loss": 0.01,
      "step": 2131340
    },
    {
      "epoch": 3.4880173864090125,
      "grad_norm": 0.29459866881370544,
      "learning_rate": 3.0438908034237595e-06,
      "loss": 0.0096,
      "step": 2131360
    },
    {
      "epoch": 3.488050116847666,
      "grad_norm": 0.47751307487487793,
      "learning_rate": 3.0438249112102423e-06,
      "loss": 0.0097,
      "step": 2131380
    },
    {
      "epoch": 3.4880828472863192,
      "grad_norm": 0.38915562629699707,
      "learning_rate": 3.0437590189967255e-06,
      "loss": 0.018,
      "step": 2131400
    },
    {
      "epoch": 3.488115577724973,
      "grad_norm": 0.4176870286464691,
      "learning_rate": 3.043693126783208e-06,
      "loss": 0.0084,
      "step": 2131420
    },
    {
      "epoch": 3.488148308163626,
      "grad_norm": 0.28801223635673523,
      "learning_rate": 3.043627234569691e-06,
      "loss": 0.0109,
      "step": 2131440
    },
    {
      "epoch": 3.4881810386022796,
      "grad_norm": 0.40138885378837585,
      "learning_rate": 3.0435613423561737e-06,
      "loss": 0.0156,
      "step": 2131460
    },
    {
      "epoch": 3.4882137690409327,
      "grad_norm": 0.2993836998939514,
      "learning_rate": 3.043495450142657e-06,
      "loss": 0.0092,
      "step": 2131480
    },
    {
      "epoch": 3.488246499479586,
      "grad_norm": 0.2612372040748596,
      "learning_rate": 3.0434295579291396e-06,
      "loss": 0.0097,
      "step": 2131500
    },
    {
      "epoch": 3.4882792299182395,
      "grad_norm": 0.4845793843269348,
      "learning_rate": 3.0433636657156223e-06,
      "loss": 0.0133,
      "step": 2131520
    },
    {
      "epoch": 3.4883119603568926,
      "grad_norm": 0.0703464224934578,
      "learning_rate": 3.043297773502106e-06,
      "loss": 0.0093,
      "step": 2131540
    },
    {
      "epoch": 3.488344690795546,
      "grad_norm": 0.1589215099811554,
      "learning_rate": 3.0432318812885886e-06,
      "loss": 0.0079,
      "step": 2131560
    },
    {
      "epoch": 3.4883774212341994,
      "grad_norm": 0.4108598530292511,
      "learning_rate": 3.0431659890750714e-06,
      "loss": 0.0077,
      "step": 2131580
    },
    {
      "epoch": 3.488410151672853,
      "grad_norm": 0.2782879173755646,
      "learning_rate": 3.043100096861554e-06,
      "loss": 0.0108,
      "step": 2131600
    },
    {
      "epoch": 3.488442882111506,
      "grad_norm": 0.1226600632071495,
      "learning_rate": 3.0430342046480373e-06,
      "loss": 0.0095,
      "step": 2131620
    },
    {
      "epoch": 3.4884756125501593,
      "grad_norm": 0.40106436610221863,
      "learning_rate": 3.04296831243452e-06,
      "loss": 0.012,
      "step": 2131640
    },
    {
      "epoch": 3.488508342988813,
      "grad_norm": 0.2751573920249939,
      "learning_rate": 3.0429024202210028e-06,
      "loss": 0.0111,
      "step": 2131660
    },
    {
      "epoch": 3.488541073427466,
      "grad_norm": 0.15231813490390778,
      "learning_rate": 3.0428365280074855e-06,
      "loss": 0.0123,
      "step": 2131680
    },
    {
      "epoch": 3.4885738038661196,
      "grad_norm": 0.10976307094097137,
      "learning_rate": 3.0427706357939687e-06,
      "loss": 0.0092,
      "step": 2131700
    },
    {
      "epoch": 3.4886065343047727,
      "grad_norm": 0.06736750900745392,
      "learning_rate": 3.0427047435804514e-06,
      "loss": 0.008,
      "step": 2131720
    },
    {
      "epoch": 3.4886392647434263,
      "grad_norm": 0.24251379072666168,
      "learning_rate": 3.042638851366934e-06,
      "loss": 0.0133,
      "step": 2131740
    },
    {
      "epoch": 3.4886719951820795,
      "grad_norm": 0.11744841933250427,
      "learning_rate": 3.042572959153417e-06,
      "loss": 0.0088,
      "step": 2131760
    },
    {
      "epoch": 3.4887047256207326,
      "grad_norm": 0.17601580917835236,
      "learning_rate": 3.0425070669398996e-06,
      "loss": 0.0175,
      "step": 2131780
    },
    {
      "epoch": 3.4887374560593862,
      "grad_norm": 0.07262174040079117,
      "learning_rate": 3.0424411747263828e-06,
      "loss": 0.0109,
      "step": 2131800
    },
    {
      "epoch": 3.4887701864980394,
      "grad_norm": 0.09543991088867188,
      "learning_rate": 3.0423752825128655e-06,
      "loss": 0.0138,
      "step": 2131820
    },
    {
      "epoch": 3.488802916936693,
      "grad_norm": 0.48113662004470825,
      "learning_rate": 3.0423093902993483e-06,
      "loss": 0.0134,
      "step": 2131840
    },
    {
      "epoch": 3.488835647375346,
      "grad_norm": 0.3027147948741913,
      "learning_rate": 3.042243498085831e-06,
      "loss": 0.0104,
      "step": 2131860
    },
    {
      "epoch": 3.4888683778139993,
      "grad_norm": 0.03215488791465759,
      "learning_rate": 3.0421776058723146e-06,
      "loss": 0.0102,
      "step": 2131880
    },
    {
      "epoch": 3.488901108252653,
      "grad_norm": 0.15037281811237335,
      "learning_rate": 3.0421117136587973e-06,
      "loss": 0.0119,
      "step": 2131900
    },
    {
      "epoch": 3.488933838691306,
      "grad_norm": 0.5478163361549377,
      "learning_rate": 3.04204582144528e-06,
      "loss": 0.0121,
      "step": 2131920
    },
    {
      "epoch": 3.4889665691299596,
      "grad_norm": 0.11258963495492935,
      "learning_rate": 3.0419799292317632e-06,
      "loss": 0.0137,
      "step": 2131940
    },
    {
      "epoch": 3.4889992995686128,
      "grad_norm": 0.2245599627494812,
      "learning_rate": 3.041914037018246e-06,
      "loss": 0.0069,
      "step": 2131960
    },
    {
      "epoch": 3.489032030007266,
      "grad_norm": 0.41153937578201294,
      "learning_rate": 3.0418481448047287e-06,
      "loss": 0.0165,
      "step": 2131980
    },
    {
      "epoch": 3.4890647604459195,
      "grad_norm": 0.31803467869758606,
      "learning_rate": 3.0417822525912115e-06,
      "loss": 0.0135,
      "step": 2132000
    },
    {
      "epoch": 3.4890974908845727,
      "grad_norm": 0.11784570664167404,
      "learning_rate": 3.0417163603776946e-06,
      "loss": 0.0098,
      "step": 2132020
    },
    {
      "epoch": 3.4891302213232263,
      "grad_norm": 0.2781219780445099,
      "learning_rate": 3.0416504681641774e-06,
      "loss": 0.0098,
      "step": 2132040
    },
    {
      "epoch": 3.4891629517618794,
      "grad_norm": 0.26785871386528015,
      "learning_rate": 3.04158457595066e-06,
      "loss": 0.0092,
      "step": 2132060
    },
    {
      "epoch": 3.489195682200533,
      "grad_norm": 1.0681698322296143,
      "learning_rate": 3.041518683737143e-06,
      "loss": 0.0111,
      "step": 2132080
    },
    {
      "epoch": 3.489228412639186,
      "grad_norm": 0.29035940766334534,
      "learning_rate": 3.041452791523626e-06,
      "loss": 0.0097,
      "step": 2132100
    },
    {
      "epoch": 3.4892611430778393,
      "grad_norm": 0.2923486828804016,
      "learning_rate": 3.0413868993101087e-06,
      "loss": 0.0183,
      "step": 2132120
    },
    {
      "epoch": 3.489293873516493,
      "grad_norm": 0.5269978046417236,
      "learning_rate": 3.0413210070965915e-06,
      "loss": 0.0099,
      "step": 2132140
    },
    {
      "epoch": 3.489326603955146,
      "grad_norm": 0.1034931018948555,
      "learning_rate": 3.0412551148830742e-06,
      "loss": 0.0137,
      "step": 2132160
    },
    {
      "epoch": 3.4893593343937996,
      "grad_norm": 0.1872805505990982,
      "learning_rate": 3.0411892226695574e-06,
      "loss": 0.0065,
      "step": 2132180
    },
    {
      "epoch": 3.489392064832453,
      "grad_norm": 0.1518995314836502,
      "learning_rate": 3.04112333045604e-06,
      "loss": 0.0069,
      "step": 2132200
    },
    {
      "epoch": 3.4894247952711064,
      "grad_norm": 0.2056235820055008,
      "learning_rate": 3.041057438242523e-06,
      "loss": 0.0097,
      "step": 2132220
    },
    {
      "epoch": 3.4894575257097595,
      "grad_norm": 0.29119351506233215,
      "learning_rate": 3.0409915460290064e-06,
      "loss": 0.0088,
      "step": 2132240
    },
    {
      "epoch": 3.4894902561484127,
      "grad_norm": 0.1311054825782776,
      "learning_rate": 3.040925653815489e-06,
      "loss": 0.0081,
      "step": 2132260
    },
    {
      "epoch": 3.4895229865870663,
      "grad_norm": 0.20358812808990479,
      "learning_rate": 3.040859761601972e-06,
      "loss": 0.01,
      "step": 2132280
    },
    {
      "epoch": 3.4895557170257194,
      "grad_norm": 0.1669054478406906,
      "learning_rate": 3.0407938693884547e-06,
      "loss": 0.016,
      "step": 2132300
    },
    {
      "epoch": 3.489588447464373,
      "grad_norm": 0.15396513044834137,
      "learning_rate": 3.0407279771749374e-06,
      "loss": 0.0132,
      "step": 2132320
    },
    {
      "epoch": 3.489621177903026,
      "grad_norm": 0.41655629873275757,
      "learning_rate": 3.0406620849614206e-06,
      "loss": 0.0144,
      "step": 2132340
    },
    {
      "epoch": 3.4896539083416798,
      "grad_norm": 0.2865332067012787,
      "learning_rate": 3.0405961927479033e-06,
      "loss": 0.0116,
      "step": 2132360
    },
    {
      "epoch": 3.489686638780333,
      "grad_norm": 0.28545743227005005,
      "learning_rate": 3.040530300534386e-06,
      "loss": 0.0082,
      "step": 2132380
    },
    {
      "epoch": 3.489719369218986,
      "grad_norm": 0.07858540862798691,
      "learning_rate": 3.040464408320869e-06,
      "loss": 0.0084,
      "step": 2132400
    },
    {
      "epoch": 3.4897520996576397,
      "grad_norm": 0.16615226864814758,
      "learning_rate": 3.040398516107352e-06,
      "loss": 0.0228,
      "step": 2132420
    },
    {
      "epoch": 3.489784830096293,
      "grad_norm": 0.4542057514190674,
      "learning_rate": 3.0403326238938347e-06,
      "loss": 0.0131,
      "step": 2132440
    },
    {
      "epoch": 3.4898175605349464,
      "grad_norm": 0.2542971968650818,
      "learning_rate": 3.0402667316803174e-06,
      "loss": 0.0166,
      "step": 2132460
    },
    {
      "epoch": 3.4898502909735996,
      "grad_norm": 0.3073936104774475,
      "learning_rate": 3.0402008394668e-06,
      "loss": 0.0081,
      "step": 2132480
    },
    {
      "epoch": 3.489883021412253,
      "grad_norm": 0.22813843190670013,
      "learning_rate": 3.0401349472532833e-06,
      "loss": 0.0099,
      "step": 2132500
    },
    {
      "epoch": 3.4899157518509063,
      "grad_norm": 0.5180338621139526,
      "learning_rate": 3.040069055039766e-06,
      "loss": 0.0098,
      "step": 2132520
    },
    {
      "epoch": 3.4899484822895595,
      "grad_norm": 0.39663222432136536,
      "learning_rate": 3.040003162826249e-06,
      "loss": 0.009,
      "step": 2132540
    },
    {
      "epoch": 3.489981212728213,
      "grad_norm": 0.3176426887512207,
      "learning_rate": 3.0399372706127316e-06,
      "loss": 0.0132,
      "step": 2132560
    },
    {
      "epoch": 3.490013943166866,
      "grad_norm": 0.23778460919857025,
      "learning_rate": 3.039871378399215e-06,
      "loss": 0.011,
      "step": 2132580
    },
    {
      "epoch": 3.49004667360552,
      "grad_norm": 0.29572346806526184,
      "learning_rate": 3.039805486185698e-06,
      "loss": 0.0081,
      "step": 2132600
    },
    {
      "epoch": 3.490079404044173,
      "grad_norm": 0.2141803354024887,
      "learning_rate": 3.0397395939721806e-06,
      "loss": 0.0119,
      "step": 2132620
    },
    {
      "epoch": 3.4901121344828265,
      "grad_norm": 0.34155842661857605,
      "learning_rate": 3.0396737017586638e-06,
      "loss": 0.015,
      "step": 2132640
    },
    {
      "epoch": 3.4901448649214797,
      "grad_norm": 0.15861311554908752,
      "learning_rate": 3.0396078095451465e-06,
      "loss": 0.0101,
      "step": 2132660
    },
    {
      "epoch": 3.490177595360133,
      "grad_norm": 0.7787304520606995,
      "learning_rate": 3.0395419173316293e-06,
      "loss": 0.0194,
      "step": 2132680
    },
    {
      "epoch": 3.4902103257987864,
      "grad_norm": 0.1143692135810852,
      "learning_rate": 3.039476025118112e-06,
      "loss": 0.0123,
      "step": 2132700
    },
    {
      "epoch": 3.4902430562374396,
      "grad_norm": 0.3616364598274231,
      "learning_rate": 3.039410132904595e-06,
      "loss": 0.0114,
      "step": 2132720
    },
    {
      "epoch": 3.490275786676093,
      "grad_norm": 0.22972990572452545,
      "learning_rate": 3.039344240691078e-06,
      "loss": 0.0117,
      "step": 2132740
    },
    {
      "epoch": 3.4903085171147463,
      "grad_norm": 0.27073386311531067,
      "learning_rate": 3.0392783484775606e-06,
      "loss": 0.0106,
      "step": 2132760
    },
    {
      "epoch": 3.4903412475534,
      "grad_norm": 0.22544944286346436,
      "learning_rate": 3.0392124562640434e-06,
      "loss": 0.013,
      "step": 2132780
    },
    {
      "epoch": 3.490373977992053,
      "grad_norm": 0.6414299607276917,
      "learning_rate": 3.039146564050526e-06,
      "loss": 0.0152,
      "step": 2132800
    },
    {
      "epoch": 3.490406708430706,
      "grad_norm": 0.11710195988416672,
      "learning_rate": 3.0390806718370093e-06,
      "loss": 0.0114,
      "step": 2132820
    },
    {
      "epoch": 3.49043943886936,
      "grad_norm": 0.24950607120990753,
      "learning_rate": 3.039014779623492e-06,
      "loss": 0.0125,
      "step": 2132840
    },
    {
      "epoch": 3.490472169308013,
      "grad_norm": 0.4580962657928467,
      "learning_rate": 3.0389488874099748e-06,
      "loss": 0.013,
      "step": 2132860
    },
    {
      "epoch": 3.4905048997466666,
      "grad_norm": 0.9730328321456909,
      "learning_rate": 3.0388829951964575e-06,
      "loss": 0.0103,
      "step": 2132880
    },
    {
      "epoch": 3.4905376301853197,
      "grad_norm": 0.2516125440597534,
      "learning_rate": 3.0388171029829407e-06,
      "loss": 0.0106,
      "step": 2132900
    },
    {
      "epoch": 3.4905703606239733,
      "grad_norm": 0.6040664911270142,
      "learning_rate": 3.0387512107694234e-06,
      "loss": 0.0099,
      "step": 2132920
    },
    {
      "epoch": 3.4906030910626265,
      "grad_norm": 0.34682294726371765,
      "learning_rate": 3.0386853185559066e-06,
      "loss": 0.0116,
      "step": 2132940
    },
    {
      "epoch": 3.4906358215012796,
      "grad_norm": 0.14724421501159668,
      "learning_rate": 3.0386194263423897e-06,
      "loss": 0.0096,
      "step": 2132960
    },
    {
      "epoch": 3.490668551939933,
      "grad_norm": 0.12480556219816208,
      "learning_rate": 3.0385535341288725e-06,
      "loss": 0.0109,
      "step": 2132980
    },
    {
      "epoch": 3.4907012823785863,
      "grad_norm": 0.2418002188205719,
      "learning_rate": 3.0384876419153552e-06,
      "loss": 0.0089,
      "step": 2133000
    },
    {
      "epoch": 3.49073401281724,
      "grad_norm": 0.34532037377357483,
      "learning_rate": 3.038421749701838e-06,
      "loss": 0.0145,
      "step": 2133020
    },
    {
      "epoch": 3.490766743255893,
      "grad_norm": 0.20740331709384918,
      "learning_rate": 3.038355857488321e-06,
      "loss": 0.0114,
      "step": 2133040
    },
    {
      "epoch": 3.4907994736945467,
      "grad_norm": 0.16562113165855408,
      "learning_rate": 3.038289965274804e-06,
      "loss": 0.0085,
      "step": 2133060
    },
    {
      "epoch": 3.4908322041332,
      "grad_norm": 0.19617637991905212,
      "learning_rate": 3.0382240730612866e-06,
      "loss": 0.021,
      "step": 2133080
    },
    {
      "epoch": 3.490864934571853,
      "grad_norm": 0.14835859835147858,
      "learning_rate": 3.0381581808477693e-06,
      "loss": 0.0159,
      "step": 2133100
    },
    {
      "epoch": 3.4908976650105066,
      "grad_norm": 0.19062134623527527,
      "learning_rate": 3.0380922886342525e-06,
      "loss": 0.0119,
      "step": 2133120
    },
    {
      "epoch": 3.4909303954491597,
      "grad_norm": 0.3104785680770874,
      "learning_rate": 3.0380263964207352e-06,
      "loss": 0.0073,
      "step": 2133140
    },
    {
      "epoch": 3.4909631258878133,
      "grad_norm": 0.6211564540863037,
      "learning_rate": 3.037960504207218e-06,
      "loss": 0.0141,
      "step": 2133160
    },
    {
      "epoch": 3.4909958563264665,
      "grad_norm": 0.1993398368358612,
      "learning_rate": 3.0378946119937007e-06,
      "loss": 0.008,
      "step": 2133180
    },
    {
      "epoch": 3.49102858676512,
      "grad_norm": 0.09802084416151047,
      "learning_rate": 3.037828719780184e-06,
      "loss": 0.0094,
      "step": 2133200
    },
    {
      "epoch": 3.491061317203773,
      "grad_norm": 0.7335225939750671,
      "learning_rate": 3.0377628275666666e-06,
      "loss": 0.0123,
      "step": 2133220
    },
    {
      "epoch": 3.4910940476424264,
      "grad_norm": 0.13031356036663055,
      "learning_rate": 3.0376969353531494e-06,
      "loss": 0.0072,
      "step": 2133240
    },
    {
      "epoch": 3.49112677808108,
      "grad_norm": 0.449473112821579,
      "learning_rate": 3.037631043139632e-06,
      "loss": 0.012,
      "step": 2133260
    },
    {
      "epoch": 3.491159508519733,
      "grad_norm": 0.29055696725845337,
      "learning_rate": 3.037565150926115e-06,
      "loss": 0.0108,
      "step": 2133280
    },
    {
      "epoch": 3.4911922389583867,
      "grad_norm": 0.7804949879646301,
      "learning_rate": 3.0374992587125984e-06,
      "loss": 0.0179,
      "step": 2133300
    },
    {
      "epoch": 3.49122496939704,
      "grad_norm": 0.16335196793079376,
      "learning_rate": 3.037433366499081e-06,
      "loss": 0.0127,
      "step": 2133320
    },
    {
      "epoch": 3.491257699835693,
      "grad_norm": 0.20997509360313416,
      "learning_rate": 3.037367474285564e-06,
      "loss": 0.0073,
      "step": 2133340
    },
    {
      "epoch": 3.4912904302743466,
      "grad_norm": 0.5727519989013672,
      "learning_rate": 3.037301582072047e-06,
      "loss": 0.0122,
      "step": 2133360
    },
    {
      "epoch": 3.4913231607129998,
      "grad_norm": 0.20593015849590302,
      "learning_rate": 3.03723568985853e-06,
      "loss": 0.0069,
      "step": 2133380
    },
    {
      "epoch": 3.4913558911516533,
      "grad_norm": 0.26962974667549133,
      "learning_rate": 3.0371697976450126e-06,
      "loss": 0.0102,
      "step": 2133400
    },
    {
      "epoch": 3.4913886215903065,
      "grad_norm": 0.42571622133255005,
      "learning_rate": 3.0371039054314953e-06,
      "loss": 0.0109,
      "step": 2133420
    },
    {
      "epoch": 3.4914213520289596,
      "grad_norm": 0.17723196744918823,
      "learning_rate": 3.0370380132179785e-06,
      "loss": 0.0105,
      "step": 2133440
    },
    {
      "epoch": 3.4914540824676132,
      "grad_norm": 0.28495389223098755,
      "learning_rate": 3.036972121004461e-06,
      "loss": 0.0123,
      "step": 2133460
    },
    {
      "epoch": 3.4914868129062664,
      "grad_norm": 0.10100097954273224,
      "learning_rate": 3.036906228790944e-06,
      "loss": 0.0235,
      "step": 2133480
    },
    {
      "epoch": 3.49151954334492,
      "grad_norm": 0.25723400712013245,
      "learning_rate": 3.0368403365774267e-06,
      "loss": 0.0098,
      "step": 2133500
    },
    {
      "epoch": 3.491552273783573,
      "grad_norm": 0.32981541752815247,
      "learning_rate": 3.03677444436391e-06,
      "loss": 0.0091,
      "step": 2133520
    },
    {
      "epoch": 3.4915850042222267,
      "grad_norm": 0.23042401671409607,
      "learning_rate": 3.0367085521503926e-06,
      "loss": 0.0129,
      "step": 2133540
    },
    {
      "epoch": 3.49161773466088,
      "grad_norm": 0.10609550774097443,
      "learning_rate": 3.0366426599368753e-06,
      "loss": 0.0127,
      "step": 2133560
    },
    {
      "epoch": 3.491650465099533,
      "grad_norm": 0.9605668783187866,
      "learning_rate": 3.036576767723358e-06,
      "loss": 0.0161,
      "step": 2133580
    },
    {
      "epoch": 3.4916831955381866,
      "grad_norm": 0.06277109682559967,
      "learning_rate": 3.0365108755098412e-06,
      "loss": 0.0078,
      "step": 2133600
    },
    {
      "epoch": 3.4917159259768398,
      "grad_norm": 0.28546762466430664,
      "learning_rate": 3.036444983296324e-06,
      "loss": 0.008,
      "step": 2133620
    },
    {
      "epoch": 3.4917486564154934,
      "grad_norm": 0.17107287049293518,
      "learning_rate": 3.036379091082807e-06,
      "loss": 0.0142,
      "step": 2133640
    },
    {
      "epoch": 3.4917813868541465,
      "grad_norm": 0.3456869423389435,
      "learning_rate": 3.0363131988692903e-06,
      "loss": 0.0076,
      "step": 2133660
    },
    {
      "epoch": 3.4918141172928,
      "grad_norm": 0.2621329128742218,
      "learning_rate": 3.036247306655773e-06,
      "loss": 0.0093,
      "step": 2133680
    },
    {
      "epoch": 3.4918468477314533,
      "grad_norm": 0.09081524610519409,
      "learning_rate": 3.0361814144422558e-06,
      "loss": 0.0078,
      "step": 2133700
    },
    {
      "epoch": 3.4918795781701064,
      "grad_norm": 0.22502751648426056,
      "learning_rate": 3.0361155222287385e-06,
      "loss": 0.0097,
      "step": 2133720
    },
    {
      "epoch": 3.49191230860876,
      "grad_norm": 0.1801474541425705,
      "learning_rate": 3.0360496300152217e-06,
      "loss": 0.0133,
      "step": 2133740
    },
    {
      "epoch": 3.491945039047413,
      "grad_norm": 0.20058511197566986,
      "learning_rate": 3.0359837378017044e-06,
      "loss": 0.0078,
      "step": 2133760
    },
    {
      "epoch": 3.4919777694860668,
      "grad_norm": 0.30284377932548523,
      "learning_rate": 3.035917845588187e-06,
      "loss": 0.0091,
      "step": 2133780
    },
    {
      "epoch": 3.49201049992472,
      "grad_norm": 0.5975671410560608,
      "learning_rate": 3.03585195337467e-06,
      "loss": 0.0118,
      "step": 2133800
    },
    {
      "epoch": 3.4920432303633735,
      "grad_norm": 0.13680607080459595,
      "learning_rate": 3.0357860611611526e-06,
      "loss": 0.0147,
      "step": 2133820
    },
    {
      "epoch": 3.4920759608020266,
      "grad_norm": 0.1948334276676178,
      "learning_rate": 3.035720168947636e-06,
      "loss": 0.0133,
      "step": 2133840
    },
    {
      "epoch": 3.49210869124068,
      "grad_norm": 0.23320819437503815,
      "learning_rate": 3.0356542767341185e-06,
      "loss": 0.0101,
      "step": 2133860
    },
    {
      "epoch": 3.4921414216793334,
      "grad_norm": 0.1381366103887558,
      "learning_rate": 3.0355883845206013e-06,
      "loss": 0.016,
      "step": 2133880
    },
    {
      "epoch": 3.4921741521179865,
      "grad_norm": 0.17412669956684113,
      "learning_rate": 3.035522492307084e-06,
      "loss": 0.0209,
      "step": 2133900
    },
    {
      "epoch": 3.49220688255664,
      "grad_norm": 0.42270946502685547,
      "learning_rate": 3.035456600093567e-06,
      "loss": 0.0085,
      "step": 2133920
    },
    {
      "epoch": 3.4922396129952933,
      "grad_norm": 0.37093597650527954,
      "learning_rate": 3.03539070788005e-06,
      "loss": 0.0136,
      "step": 2133940
    },
    {
      "epoch": 3.492272343433947,
      "grad_norm": 0.16678647696971893,
      "learning_rate": 3.0353248156665327e-06,
      "loss": 0.0119,
      "step": 2133960
    },
    {
      "epoch": 3.4923050738726,
      "grad_norm": 0.405994713306427,
      "learning_rate": 3.0352589234530154e-06,
      "loss": 0.0122,
      "step": 2133980
    },
    {
      "epoch": 3.492337804311253,
      "grad_norm": 0.50840824842453,
      "learning_rate": 3.035193031239499e-06,
      "loss": 0.0112,
      "step": 2134000
    },
    {
      "epoch": 3.4923705347499068,
      "grad_norm": 0.5377905964851379,
      "learning_rate": 3.0351271390259817e-06,
      "loss": 0.0104,
      "step": 2134020
    },
    {
      "epoch": 3.49240326518856,
      "grad_norm": 0.706713855266571,
      "learning_rate": 3.0350612468124645e-06,
      "loss": 0.0069,
      "step": 2134040
    },
    {
      "epoch": 3.4924359956272135,
      "grad_norm": 0.31680989265441895,
      "learning_rate": 3.0349953545989476e-06,
      "loss": 0.0164,
      "step": 2134060
    },
    {
      "epoch": 3.4924687260658667,
      "grad_norm": 0.44814521074295044,
      "learning_rate": 3.0349294623854304e-06,
      "loss": 0.0129,
      "step": 2134080
    },
    {
      "epoch": 3.4925014565045203,
      "grad_norm": 0.7760509848594666,
      "learning_rate": 3.034863570171913e-06,
      "loss": 0.0093,
      "step": 2134100
    },
    {
      "epoch": 3.4925341869431734,
      "grad_norm": 0.4120137393474579,
      "learning_rate": 3.034797677958396e-06,
      "loss": 0.0094,
      "step": 2134120
    },
    {
      "epoch": 3.4925669173818266,
      "grad_norm": 0.32064828276634216,
      "learning_rate": 3.034731785744879e-06,
      "loss": 0.0129,
      "step": 2134140
    },
    {
      "epoch": 3.49259964782048,
      "grad_norm": 0.2186092734336853,
      "learning_rate": 3.0346658935313617e-06,
      "loss": 0.0145,
      "step": 2134160
    },
    {
      "epoch": 3.4926323782591333,
      "grad_norm": 0.12334319949150085,
      "learning_rate": 3.0346000013178445e-06,
      "loss": 0.0055,
      "step": 2134180
    },
    {
      "epoch": 3.492665108697787,
      "grad_norm": 0.9312495589256287,
      "learning_rate": 3.0345341091043272e-06,
      "loss": 0.0116,
      "step": 2134200
    },
    {
      "epoch": 3.49269783913644,
      "grad_norm": 0.4413197934627533,
      "learning_rate": 3.03446821689081e-06,
      "loss": 0.0087,
      "step": 2134220
    },
    {
      "epoch": 3.4927305695750936,
      "grad_norm": 0.3486233651638031,
      "learning_rate": 3.034402324677293e-06,
      "loss": 0.0115,
      "step": 2134240
    },
    {
      "epoch": 3.492763300013747,
      "grad_norm": 0.2983146011829376,
      "learning_rate": 3.034336432463776e-06,
      "loss": 0.0155,
      "step": 2134260
    },
    {
      "epoch": 3.4927960304524,
      "grad_norm": 0.045720018446445465,
      "learning_rate": 3.0342705402502586e-06,
      "loss": 0.0093,
      "step": 2134280
    },
    {
      "epoch": 3.4928287608910535,
      "grad_norm": 0.5460162162780762,
      "learning_rate": 3.0342046480367413e-06,
      "loss": 0.0124,
      "step": 2134300
    },
    {
      "epoch": 3.4928614913297067,
      "grad_norm": 0.15508881211280823,
      "learning_rate": 3.0341387558232245e-06,
      "loss": 0.0091,
      "step": 2134320
    },
    {
      "epoch": 3.4928942217683603,
      "grad_norm": 0.5935543179512024,
      "learning_rate": 3.0340728636097077e-06,
      "loss": 0.0221,
      "step": 2134340
    },
    {
      "epoch": 3.4929269522070134,
      "grad_norm": 0.07088644057512283,
      "learning_rate": 3.0340069713961904e-06,
      "loss": 0.0119,
      "step": 2134360
    },
    {
      "epoch": 3.492959682645667,
      "grad_norm": 0.5988420248031616,
      "learning_rate": 3.0339410791826736e-06,
      "loss": 0.0158,
      "step": 2134380
    },
    {
      "epoch": 3.49299241308432,
      "grad_norm": 0.1273372918367386,
      "learning_rate": 3.0338751869691563e-06,
      "loss": 0.0107,
      "step": 2134400
    },
    {
      "epoch": 3.4930251435229733,
      "grad_norm": 0.16667228937149048,
      "learning_rate": 3.033809294755639e-06,
      "loss": 0.0074,
      "step": 2134420
    },
    {
      "epoch": 3.493057873961627,
      "grad_norm": 0.0672464594244957,
      "learning_rate": 3.033743402542122e-06,
      "loss": 0.01,
      "step": 2134440
    },
    {
      "epoch": 3.49309060440028,
      "grad_norm": 0.17034195363521576,
      "learning_rate": 3.033677510328605e-06,
      "loss": 0.0118,
      "step": 2134460
    },
    {
      "epoch": 3.4931233348389337,
      "grad_norm": 0.1671556681394577,
      "learning_rate": 3.0336116181150877e-06,
      "loss": 0.0082,
      "step": 2134480
    },
    {
      "epoch": 3.493156065277587,
      "grad_norm": 0.4100535809993744,
      "learning_rate": 3.0335457259015704e-06,
      "loss": 0.0098,
      "step": 2134500
    },
    {
      "epoch": 3.4931887957162404,
      "grad_norm": 0.42721614241600037,
      "learning_rate": 3.033479833688053e-06,
      "loss": 0.0159,
      "step": 2134520
    },
    {
      "epoch": 3.4932215261548936,
      "grad_norm": 0.6560688018798828,
      "learning_rate": 3.0334139414745363e-06,
      "loss": 0.011,
      "step": 2134540
    },
    {
      "epoch": 3.4932542565935467,
      "grad_norm": 0.17652243375778198,
      "learning_rate": 3.033348049261019e-06,
      "loss": 0.0096,
      "step": 2134560
    },
    {
      "epoch": 3.4932869870322003,
      "grad_norm": 1.1410373449325562,
      "learning_rate": 3.033282157047502e-06,
      "loss": 0.0097,
      "step": 2134580
    },
    {
      "epoch": 3.4933197174708535,
      "grad_norm": 0.6185264587402344,
      "learning_rate": 3.0332162648339846e-06,
      "loss": 0.0133,
      "step": 2134600
    },
    {
      "epoch": 3.493352447909507,
      "grad_norm": 0.3043718934059143,
      "learning_rate": 3.0331503726204677e-06,
      "loss": 0.0118,
      "step": 2134620
    },
    {
      "epoch": 3.49338517834816,
      "grad_norm": 0.23656943440437317,
      "learning_rate": 3.0330844804069505e-06,
      "loss": 0.0119,
      "step": 2134640
    },
    {
      "epoch": 3.493417908786814,
      "grad_norm": 0.1682744175195694,
      "learning_rate": 3.033018588193433e-06,
      "loss": 0.0093,
      "step": 2134660
    },
    {
      "epoch": 3.493450639225467,
      "grad_norm": 0.3381236791610718,
      "learning_rate": 3.032952695979916e-06,
      "loss": 0.0104,
      "step": 2134680
    },
    {
      "epoch": 3.49348336966412,
      "grad_norm": 0.7534602880477905,
      "learning_rate": 3.0328868037663995e-06,
      "loss": 0.0122,
      "step": 2134700
    },
    {
      "epoch": 3.4935161001027737,
      "grad_norm": 0.20108868181705475,
      "learning_rate": 3.0328209115528823e-06,
      "loss": 0.0134,
      "step": 2134720
    },
    {
      "epoch": 3.493548830541427,
      "grad_norm": 0.5549967288970947,
      "learning_rate": 3.032755019339365e-06,
      "loss": 0.0101,
      "step": 2134740
    },
    {
      "epoch": 3.4935815609800804,
      "grad_norm": 0.48107314109802246,
      "learning_rate": 3.0326891271258478e-06,
      "loss": 0.0185,
      "step": 2134760
    },
    {
      "epoch": 3.4936142914187336,
      "grad_norm": 0.47853267192840576,
      "learning_rate": 3.032623234912331e-06,
      "loss": 0.015,
      "step": 2134780
    },
    {
      "epoch": 3.493647021857387,
      "grad_norm": 0.19718743860721588,
      "learning_rate": 3.0325573426988137e-06,
      "loss": 0.0074,
      "step": 2134800
    },
    {
      "epoch": 3.4936797522960403,
      "grad_norm": 0.15187767148017883,
      "learning_rate": 3.0324914504852964e-06,
      "loss": 0.0135,
      "step": 2134820
    },
    {
      "epoch": 3.4937124827346935,
      "grad_norm": 0.0691143050789833,
      "learning_rate": 3.032425558271779e-06,
      "loss": 0.0103,
      "step": 2134840
    },
    {
      "epoch": 3.493745213173347,
      "grad_norm": 0.24265643954277039,
      "learning_rate": 3.0323596660582623e-06,
      "loss": 0.0121,
      "step": 2134860
    },
    {
      "epoch": 3.4937779436120002,
      "grad_norm": 0.1422034353017807,
      "learning_rate": 3.032293773844745e-06,
      "loss": 0.0111,
      "step": 2134880
    },
    {
      "epoch": 3.493810674050654,
      "grad_norm": 0.11585228145122528,
      "learning_rate": 3.0322278816312278e-06,
      "loss": 0.0083,
      "step": 2134900
    },
    {
      "epoch": 3.493843404489307,
      "grad_norm": 0.20820468664169312,
      "learning_rate": 3.0321619894177105e-06,
      "loss": 0.0103,
      "step": 2134920
    },
    {
      "epoch": 3.49387613492796,
      "grad_norm": 0.1662965714931488,
      "learning_rate": 3.0320960972041937e-06,
      "loss": 0.0112,
      "step": 2134940
    },
    {
      "epoch": 3.4939088653666137,
      "grad_norm": 0.05018968507647514,
      "learning_rate": 3.0320302049906764e-06,
      "loss": 0.0146,
      "step": 2134960
    },
    {
      "epoch": 3.493941595805267,
      "grad_norm": 0.2304443120956421,
      "learning_rate": 3.031964312777159e-06,
      "loss": 0.0115,
      "step": 2134980
    },
    {
      "epoch": 3.4939743262439205,
      "grad_norm": 0.241887167096138,
      "learning_rate": 3.031898420563642e-06,
      "loss": 0.0082,
      "step": 2135000
    },
    {
      "epoch": 3.4940070566825736,
      "grad_norm": 0.10551344603300095,
      "learning_rate": 3.031832528350125e-06,
      "loss": 0.01,
      "step": 2135020
    },
    {
      "epoch": 3.4940397871212268,
      "grad_norm": 0.4398554563522339,
      "learning_rate": 3.031766636136608e-06,
      "loss": 0.0093,
      "step": 2135040
    },
    {
      "epoch": 3.4940725175598804,
      "grad_norm": 0.48805689811706543,
      "learning_rate": 3.031700743923091e-06,
      "loss": 0.0127,
      "step": 2135060
    },
    {
      "epoch": 3.4941052479985335,
      "grad_norm": 0.23210865259170532,
      "learning_rate": 3.031634851709574e-06,
      "loss": 0.0094,
      "step": 2135080
    },
    {
      "epoch": 3.494137978437187,
      "grad_norm": 0.5606784820556641,
      "learning_rate": 3.031568959496057e-06,
      "loss": 0.0113,
      "step": 2135100
    },
    {
      "epoch": 3.4941707088758402,
      "grad_norm": 0.2493520975112915,
      "learning_rate": 3.0315030672825396e-06,
      "loss": 0.0104,
      "step": 2135120
    },
    {
      "epoch": 3.494203439314494,
      "grad_norm": 0.3302040994167328,
      "learning_rate": 3.0314371750690223e-06,
      "loss": 0.0132,
      "step": 2135140
    },
    {
      "epoch": 3.494236169753147,
      "grad_norm": 0.1966264247894287,
      "learning_rate": 3.0313712828555055e-06,
      "loss": 0.0116,
      "step": 2135160
    },
    {
      "epoch": 3.4942689001918,
      "grad_norm": 0.6912888288497925,
      "learning_rate": 3.0313053906419883e-06,
      "loss": 0.0103,
      "step": 2135180
    },
    {
      "epoch": 3.4943016306304537,
      "grad_norm": 0.5305337905883789,
      "learning_rate": 3.031239498428471e-06,
      "loss": 0.0134,
      "step": 2135200
    },
    {
      "epoch": 3.494334361069107,
      "grad_norm": 0.17308636009693146,
      "learning_rate": 3.0311736062149537e-06,
      "loss": 0.0131,
      "step": 2135220
    },
    {
      "epoch": 3.4943670915077605,
      "grad_norm": 0.5035282373428345,
      "learning_rate": 3.0311077140014365e-06,
      "loss": 0.0097,
      "step": 2135240
    },
    {
      "epoch": 3.4943998219464136,
      "grad_norm": 0.35247522592544556,
      "learning_rate": 3.0310418217879196e-06,
      "loss": 0.0135,
      "step": 2135260
    },
    {
      "epoch": 3.4944325523850672,
      "grad_norm": 0.18507356941699982,
      "learning_rate": 3.0309759295744024e-06,
      "loss": 0.007,
      "step": 2135280
    },
    {
      "epoch": 3.4944652828237204,
      "grad_norm": 0.46415993571281433,
      "learning_rate": 3.030910037360885e-06,
      "loss": 0.0168,
      "step": 2135300
    },
    {
      "epoch": 3.4944980132623735,
      "grad_norm": 0.05761122703552246,
      "learning_rate": 3.030844145147368e-06,
      "loss": 0.0092,
      "step": 2135320
    },
    {
      "epoch": 3.494530743701027,
      "grad_norm": 0.6008196473121643,
      "learning_rate": 3.030778252933851e-06,
      "loss": 0.0099,
      "step": 2135340
    },
    {
      "epoch": 3.4945634741396803,
      "grad_norm": 0.18209640681743622,
      "learning_rate": 3.0307123607203338e-06,
      "loss": 0.009,
      "step": 2135360
    },
    {
      "epoch": 3.494596204578334,
      "grad_norm": 0.20239634811878204,
      "learning_rate": 3.0306464685068165e-06,
      "loss": 0.0088,
      "step": 2135380
    },
    {
      "epoch": 3.494628935016987,
      "grad_norm": 0.3222646713256836,
      "learning_rate": 3.0305805762933e-06,
      "loss": 0.0131,
      "step": 2135400
    },
    {
      "epoch": 3.4946616654556406,
      "grad_norm": 0.08736659586429596,
      "learning_rate": 3.030514684079783e-06,
      "loss": 0.0084,
      "step": 2135420
    },
    {
      "epoch": 3.4946943958942938,
      "grad_norm": 0.33656665682792664,
      "learning_rate": 3.0304487918662656e-06,
      "loss": 0.0104,
      "step": 2135440
    },
    {
      "epoch": 3.494727126332947,
      "grad_norm": 0.6502034664154053,
      "learning_rate": 3.0303828996527483e-06,
      "loss": 0.0097,
      "step": 2135460
    },
    {
      "epoch": 3.4947598567716005,
      "grad_norm": 0.16168996691703796,
      "learning_rate": 3.0303170074392315e-06,
      "loss": 0.0166,
      "step": 2135480
    },
    {
      "epoch": 3.4947925872102537,
      "grad_norm": 0.36308911442756653,
      "learning_rate": 3.030251115225714e-06,
      "loss": 0.0132,
      "step": 2135500
    },
    {
      "epoch": 3.4948253176489072,
      "grad_norm": 0.5143820643424988,
      "learning_rate": 3.030185223012197e-06,
      "loss": 0.0128,
      "step": 2135520
    },
    {
      "epoch": 3.4948580480875604,
      "grad_norm": 0.15664158761501312,
      "learning_rate": 3.0301193307986797e-06,
      "loss": 0.0059,
      "step": 2135540
    },
    {
      "epoch": 3.494890778526214,
      "grad_norm": 0.27546781301498413,
      "learning_rate": 3.030053438585163e-06,
      "loss": 0.0165,
      "step": 2135560
    },
    {
      "epoch": 3.494923508964867,
      "grad_norm": 0.5786539912223816,
      "learning_rate": 3.0299875463716456e-06,
      "loss": 0.0123,
      "step": 2135580
    },
    {
      "epoch": 3.4949562394035203,
      "grad_norm": 0.4976792335510254,
      "learning_rate": 3.0299216541581283e-06,
      "loss": 0.0073,
      "step": 2135600
    },
    {
      "epoch": 3.494988969842174,
      "grad_norm": 0.1668218970298767,
      "learning_rate": 3.029855761944611e-06,
      "loss": 0.0089,
      "step": 2135620
    },
    {
      "epoch": 3.495021700280827,
      "grad_norm": 0.4448990821838379,
      "learning_rate": 3.029789869731094e-06,
      "loss": 0.0111,
      "step": 2135640
    },
    {
      "epoch": 3.4950544307194806,
      "grad_norm": 0.32674407958984375,
      "learning_rate": 3.029723977517577e-06,
      "loss": 0.0099,
      "step": 2135660
    },
    {
      "epoch": 3.495087161158134,
      "grad_norm": 0.23434986174106598,
      "learning_rate": 3.0296580853040597e-06,
      "loss": 0.008,
      "step": 2135680
    },
    {
      "epoch": 3.4951198915967874,
      "grad_norm": 0.07147963345050812,
      "learning_rate": 3.0295921930905424e-06,
      "loss": 0.0088,
      "step": 2135700
    },
    {
      "epoch": 3.4951526220354405,
      "grad_norm": 0.3299961984157562,
      "learning_rate": 3.029526300877025e-06,
      "loss": 0.0118,
      "step": 2135720
    },
    {
      "epoch": 3.4951853524740937,
      "grad_norm": 0.2077300101518631,
      "learning_rate": 3.0294604086635084e-06,
      "loss": 0.0135,
      "step": 2135740
    },
    {
      "epoch": 3.4952180829127473,
      "grad_norm": 0.2631501257419586,
      "learning_rate": 3.0293945164499915e-06,
      "loss": 0.0098,
      "step": 2135760
    },
    {
      "epoch": 3.4952508133514004,
      "grad_norm": 0.11988667398691177,
      "learning_rate": 3.0293286242364743e-06,
      "loss": 0.0117,
      "step": 2135780
    },
    {
      "epoch": 3.495283543790054,
      "grad_norm": 0.8812757134437561,
      "learning_rate": 3.0292627320229574e-06,
      "loss": 0.01,
      "step": 2135800
    },
    {
      "epoch": 3.495316274228707,
      "grad_norm": 0.4569269120693207,
      "learning_rate": 3.02919683980944e-06,
      "loss": 0.0151,
      "step": 2135820
    },
    {
      "epoch": 3.4953490046673608,
      "grad_norm": 0.4822596609592438,
      "learning_rate": 3.029130947595923e-06,
      "loss": 0.0103,
      "step": 2135840
    },
    {
      "epoch": 3.495381735106014,
      "grad_norm": 0.2460298240184784,
      "learning_rate": 3.0290650553824056e-06,
      "loss": 0.009,
      "step": 2135860
    },
    {
      "epoch": 3.495414465544667,
      "grad_norm": 0.3767944276332855,
      "learning_rate": 3.028999163168889e-06,
      "loss": 0.0171,
      "step": 2135880
    },
    {
      "epoch": 3.4954471959833207,
      "grad_norm": 0.18595340847969055,
      "learning_rate": 3.0289332709553715e-06,
      "loss": 0.015,
      "step": 2135900
    },
    {
      "epoch": 3.495479926421974,
      "grad_norm": 0.3049434721469879,
      "learning_rate": 3.0288673787418543e-06,
      "loss": 0.0147,
      "step": 2135920
    },
    {
      "epoch": 3.4955126568606274,
      "grad_norm": 0.13131187856197357,
      "learning_rate": 3.028801486528337e-06,
      "loss": 0.0102,
      "step": 2135940
    },
    {
      "epoch": 3.4955453872992805,
      "grad_norm": 0.6032127737998962,
      "learning_rate": 3.02873559431482e-06,
      "loss": 0.0126,
      "step": 2135960
    },
    {
      "epoch": 3.495578117737934,
      "grad_norm": 0.7968674302101135,
      "learning_rate": 3.028669702101303e-06,
      "loss": 0.0106,
      "step": 2135980
    },
    {
      "epoch": 3.4956108481765873,
      "grad_norm": 0.4874761700630188,
      "learning_rate": 3.0286038098877857e-06,
      "loss": 0.0101,
      "step": 2136000
    },
    {
      "epoch": 3.4956435786152404,
      "grad_norm": 0.05020986869931221,
      "learning_rate": 3.0285379176742684e-06,
      "loss": 0.0097,
      "step": 2136020
    },
    {
      "epoch": 3.495676309053894,
      "grad_norm": 0.24741610884666443,
      "learning_rate": 3.0284720254607516e-06,
      "loss": 0.0097,
      "step": 2136040
    },
    {
      "epoch": 3.495709039492547,
      "grad_norm": 0.264069527387619,
      "learning_rate": 3.0284061332472343e-06,
      "loss": 0.0081,
      "step": 2136060
    },
    {
      "epoch": 3.495741769931201,
      "grad_norm": 0.8203598260879517,
      "learning_rate": 3.028340241033717e-06,
      "loss": 0.0123,
      "step": 2136080
    },
    {
      "epoch": 3.495774500369854,
      "grad_norm": 0.10557060688734055,
      "learning_rate": 3.0282743488202006e-06,
      "loss": 0.0107,
      "step": 2136100
    },
    {
      "epoch": 3.4958072308085075,
      "grad_norm": 0.27525269985198975,
      "learning_rate": 3.0282084566066834e-06,
      "loss": 0.0089,
      "step": 2136120
    },
    {
      "epoch": 3.4958399612471607,
      "grad_norm": 0.2802351117134094,
      "learning_rate": 3.028142564393166e-06,
      "loss": 0.0119,
      "step": 2136140
    },
    {
      "epoch": 3.495872691685814,
      "grad_norm": 0.22351177036762238,
      "learning_rate": 3.028076672179649e-06,
      "loss": 0.011,
      "step": 2136160
    },
    {
      "epoch": 3.4959054221244674,
      "grad_norm": 0.5618219375610352,
      "learning_rate": 3.0280107799661316e-06,
      "loss": 0.0105,
      "step": 2136180
    },
    {
      "epoch": 3.4959381525631206,
      "grad_norm": 0.36999136209487915,
      "learning_rate": 3.0279448877526148e-06,
      "loss": 0.0098,
      "step": 2136200
    },
    {
      "epoch": 3.495970883001774,
      "grad_norm": 0.41923558712005615,
      "learning_rate": 3.0278789955390975e-06,
      "loss": 0.0134,
      "step": 2136220
    },
    {
      "epoch": 3.4960036134404273,
      "grad_norm": 0.16542509198188782,
      "learning_rate": 3.0278131033255802e-06,
      "loss": 0.0076,
      "step": 2136240
    },
    {
      "epoch": 3.496036343879081,
      "grad_norm": 1.4287351369857788,
      "learning_rate": 3.027747211112063e-06,
      "loss": 0.0081,
      "step": 2136260
    },
    {
      "epoch": 3.496069074317734,
      "grad_norm": 0.3372904658317566,
      "learning_rate": 3.027681318898546e-06,
      "loss": 0.0118,
      "step": 2136280
    },
    {
      "epoch": 3.496101804756387,
      "grad_norm": 0.3192983865737915,
      "learning_rate": 3.027615426685029e-06,
      "loss": 0.0109,
      "step": 2136300
    },
    {
      "epoch": 3.496134535195041,
      "grad_norm": 0.2643071711063385,
      "learning_rate": 3.0275495344715116e-06,
      "loss": 0.0168,
      "step": 2136320
    },
    {
      "epoch": 3.496167265633694,
      "grad_norm": 0.16706182062625885,
      "learning_rate": 3.0274836422579944e-06,
      "loss": 0.0098,
      "step": 2136340
    },
    {
      "epoch": 3.4961999960723475,
      "grad_norm": 0.024888262152671814,
      "learning_rate": 3.0274177500444775e-06,
      "loss": 0.0061,
      "step": 2136360
    },
    {
      "epoch": 3.4962327265110007,
      "grad_norm": 0.16867303848266602,
      "learning_rate": 3.0273518578309603e-06,
      "loss": 0.0088,
      "step": 2136380
    },
    {
      "epoch": 3.496265456949654,
      "grad_norm": 0.3318078815937042,
      "learning_rate": 3.027285965617443e-06,
      "loss": 0.0091,
      "step": 2136400
    },
    {
      "epoch": 3.4962981873883074,
      "grad_norm": 0.13965491950511932,
      "learning_rate": 3.0272200734039257e-06,
      "loss": 0.0108,
      "step": 2136420
    },
    {
      "epoch": 3.4963309178269606,
      "grad_norm": 0.1522454023361206,
      "learning_rate": 3.027154181190409e-06,
      "loss": 0.0124,
      "step": 2136440
    },
    {
      "epoch": 3.496363648265614,
      "grad_norm": 0.6152399182319641,
      "learning_rate": 3.027088288976892e-06,
      "loss": 0.0099,
      "step": 2136460
    },
    {
      "epoch": 3.4963963787042673,
      "grad_norm": 0.49834612011909485,
      "learning_rate": 3.027022396763375e-06,
      "loss": 0.0149,
      "step": 2136480
    },
    {
      "epoch": 3.4964291091429205,
      "grad_norm": 0.47361063957214355,
      "learning_rate": 3.026956504549858e-06,
      "loss": 0.0128,
      "step": 2136500
    },
    {
      "epoch": 3.496461839581574,
      "grad_norm": 0.2201879322528839,
      "learning_rate": 3.0268906123363407e-06,
      "loss": 0.0148,
      "step": 2136520
    },
    {
      "epoch": 3.4964945700202272,
      "grad_norm": 1.008909821510315,
      "learning_rate": 3.0268247201228234e-06,
      "loss": 0.0115,
      "step": 2136540
    },
    {
      "epoch": 3.496527300458881,
      "grad_norm": 0.15722890198230743,
      "learning_rate": 3.026758827909306e-06,
      "loss": 0.0071,
      "step": 2136560
    },
    {
      "epoch": 3.496560030897534,
      "grad_norm": 0.1207064762711525,
      "learning_rate": 3.0266929356957894e-06,
      "loss": 0.0103,
      "step": 2136580
    },
    {
      "epoch": 3.4965927613361876,
      "grad_norm": 0.06088479980826378,
      "learning_rate": 3.026627043482272e-06,
      "loss": 0.0116,
      "step": 2136600
    },
    {
      "epoch": 3.4966254917748407,
      "grad_norm": 0.16576851904392242,
      "learning_rate": 3.026561151268755e-06,
      "loss": 0.0106,
      "step": 2136620
    },
    {
      "epoch": 3.496658222213494,
      "grad_norm": 0.5030683279037476,
      "learning_rate": 3.0264952590552376e-06,
      "loss": 0.0109,
      "step": 2136640
    },
    {
      "epoch": 3.4966909526521475,
      "grad_norm": 0.20274733006954193,
      "learning_rate": 3.0264293668417203e-06,
      "loss": 0.0095,
      "step": 2136660
    },
    {
      "epoch": 3.4967236830908006,
      "grad_norm": 0.48518362641334534,
      "learning_rate": 3.0263634746282035e-06,
      "loss": 0.0142,
      "step": 2136680
    },
    {
      "epoch": 3.496756413529454,
      "grad_norm": 0.5825635194778442,
      "learning_rate": 3.0262975824146862e-06,
      "loss": 0.0105,
      "step": 2136700
    },
    {
      "epoch": 3.4967891439681074,
      "grad_norm": 0.06405071169137955,
      "learning_rate": 3.026231690201169e-06,
      "loss": 0.013,
      "step": 2136720
    },
    {
      "epoch": 3.496821874406761,
      "grad_norm": 0.3628016710281372,
      "learning_rate": 3.0261657979876517e-06,
      "loss": 0.0098,
      "step": 2136740
    },
    {
      "epoch": 3.496854604845414,
      "grad_norm": 0.16387304663658142,
      "learning_rate": 3.026099905774135e-06,
      "loss": 0.0102,
      "step": 2136760
    },
    {
      "epoch": 3.4968873352840673,
      "grad_norm": 0.2523825466632843,
      "learning_rate": 3.0260340135606176e-06,
      "loss": 0.0099,
      "step": 2136780
    },
    {
      "epoch": 3.496920065722721,
      "grad_norm": 0.2912065386772156,
      "learning_rate": 3.0259681213471003e-06,
      "loss": 0.0137,
      "step": 2136800
    },
    {
      "epoch": 3.496952796161374,
      "grad_norm": 0.22332972288131714,
      "learning_rate": 3.025902229133584e-06,
      "loss": 0.0101,
      "step": 2136820
    },
    {
      "epoch": 3.4969855266000276,
      "grad_norm": 0.3743204176425934,
      "learning_rate": 3.0258363369200667e-06,
      "loss": 0.0124,
      "step": 2136840
    },
    {
      "epoch": 3.4970182570386807,
      "grad_norm": 0.33548155426979065,
      "learning_rate": 3.0257704447065494e-06,
      "loss": 0.0143,
      "step": 2136860
    },
    {
      "epoch": 3.4970509874773343,
      "grad_norm": 0.0891449823975563,
      "learning_rate": 3.025704552493032e-06,
      "loss": 0.0119,
      "step": 2136880
    },
    {
      "epoch": 3.4970837179159875,
      "grad_norm": 0.057923078536987305,
      "learning_rate": 3.0256386602795153e-06,
      "loss": 0.0118,
      "step": 2136900
    },
    {
      "epoch": 3.4971164483546406,
      "grad_norm": 0.834861695766449,
      "learning_rate": 3.025572768065998e-06,
      "loss": 0.0119,
      "step": 2136920
    },
    {
      "epoch": 3.4971491787932942,
      "grad_norm": 0.44098299741744995,
      "learning_rate": 3.0255068758524808e-06,
      "loss": 0.0184,
      "step": 2136940
    },
    {
      "epoch": 3.4971819092319474,
      "grad_norm": 0.02758968062698841,
      "learning_rate": 3.0254409836389635e-06,
      "loss": 0.0129,
      "step": 2136960
    },
    {
      "epoch": 3.497214639670601,
      "grad_norm": 0.8086187243461609,
      "learning_rate": 3.0253750914254467e-06,
      "loss": 0.0156,
      "step": 2136980
    },
    {
      "epoch": 3.497247370109254,
      "grad_norm": 0.4344845116138458,
      "learning_rate": 3.0253091992119294e-06,
      "loss": 0.0106,
      "step": 2137000
    },
    {
      "epoch": 3.4972801005479077,
      "grad_norm": 0.13874231278896332,
      "learning_rate": 3.025243306998412e-06,
      "loss": 0.0088,
      "step": 2137020
    },
    {
      "epoch": 3.497312830986561,
      "grad_norm": 0.09009459614753723,
      "learning_rate": 3.025177414784895e-06,
      "loss": 0.0076,
      "step": 2137040
    },
    {
      "epoch": 3.497345561425214,
      "grad_norm": 0.6673018932342529,
      "learning_rate": 3.0251115225713776e-06,
      "loss": 0.0091,
      "step": 2137060
    },
    {
      "epoch": 3.4973782918638676,
      "grad_norm": 0.1275797188282013,
      "learning_rate": 3.025045630357861e-06,
      "loss": 0.0103,
      "step": 2137080
    },
    {
      "epoch": 3.4974110223025208,
      "grad_norm": 0.19769510626792908,
      "learning_rate": 3.0249797381443435e-06,
      "loss": 0.0114,
      "step": 2137100
    },
    {
      "epoch": 3.4974437527411744,
      "grad_norm": 0.3764406442642212,
      "learning_rate": 3.0249138459308263e-06,
      "loss": 0.0105,
      "step": 2137120
    },
    {
      "epoch": 3.4974764831798275,
      "grad_norm": 0.07473765313625336,
      "learning_rate": 3.024847953717309e-06,
      "loss": 0.0102,
      "step": 2137140
    },
    {
      "epoch": 3.497509213618481,
      "grad_norm": 0.29337260127067566,
      "learning_rate": 3.0247820615037926e-06,
      "loss": 0.0078,
      "step": 2137160
    },
    {
      "epoch": 3.4975419440571343,
      "grad_norm": 0.2910439670085907,
      "learning_rate": 3.0247161692902754e-06,
      "loss": 0.0095,
      "step": 2137180
    },
    {
      "epoch": 3.4975746744957874,
      "grad_norm": 0.3634818494319916,
      "learning_rate": 3.024650277076758e-06,
      "loss": 0.0122,
      "step": 2137200
    },
    {
      "epoch": 3.497607404934441,
      "grad_norm": 0.29007789492607117,
      "learning_rate": 3.0245843848632413e-06,
      "loss": 0.0107,
      "step": 2137220
    },
    {
      "epoch": 3.497640135373094,
      "grad_norm": 0.4235731065273285,
      "learning_rate": 3.024518492649724e-06,
      "loss": 0.0107,
      "step": 2137240
    },
    {
      "epoch": 3.4976728658117477,
      "grad_norm": 0.2866722047328949,
      "learning_rate": 3.0244526004362067e-06,
      "loss": 0.013,
      "step": 2137260
    },
    {
      "epoch": 3.497705596250401,
      "grad_norm": 0.28784698247909546,
      "learning_rate": 3.0243867082226895e-06,
      "loss": 0.013,
      "step": 2137280
    },
    {
      "epoch": 3.4977383266890545,
      "grad_norm": 0.339829683303833,
      "learning_rate": 3.0243208160091726e-06,
      "loss": 0.008,
      "step": 2137300
    },
    {
      "epoch": 3.4977710571277076,
      "grad_norm": 0.13250449299812317,
      "learning_rate": 3.0242549237956554e-06,
      "loss": 0.0117,
      "step": 2137320
    },
    {
      "epoch": 3.497803787566361,
      "grad_norm": 0.12581679224967957,
      "learning_rate": 3.024189031582138e-06,
      "loss": 0.0091,
      "step": 2137340
    },
    {
      "epoch": 3.4978365180050144,
      "grad_norm": 0.32997116446495056,
      "learning_rate": 3.024123139368621e-06,
      "loss": 0.0091,
      "step": 2137360
    },
    {
      "epoch": 3.4978692484436675,
      "grad_norm": 0.2697814106941223,
      "learning_rate": 3.024057247155104e-06,
      "loss": 0.0074,
      "step": 2137380
    },
    {
      "epoch": 3.497901978882321,
      "grad_norm": 0.31276029348373413,
      "learning_rate": 3.0239913549415868e-06,
      "loss": 0.0105,
      "step": 2137400
    },
    {
      "epoch": 3.4979347093209743,
      "grad_norm": 0.12072569876909256,
      "learning_rate": 3.0239254627280695e-06,
      "loss": 0.0122,
      "step": 2137420
    },
    {
      "epoch": 3.497967439759628,
      "grad_norm": 0.18962769210338593,
      "learning_rate": 3.0238595705145522e-06,
      "loss": 0.0168,
      "step": 2137440
    },
    {
      "epoch": 3.498000170198281,
      "grad_norm": 0.5068931579589844,
      "learning_rate": 3.0237936783010354e-06,
      "loss": 0.0095,
      "step": 2137460
    },
    {
      "epoch": 3.498032900636934,
      "grad_norm": 0.3282215893268585,
      "learning_rate": 3.023727786087518e-06,
      "loss": 0.0082,
      "step": 2137480
    },
    {
      "epoch": 3.4980656310755878,
      "grad_norm": 0.19615048170089722,
      "learning_rate": 3.023661893874001e-06,
      "loss": 0.0122,
      "step": 2137500
    },
    {
      "epoch": 3.498098361514241,
      "grad_norm": 0.5407586693763733,
      "learning_rate": 3.0235960016604845e-06,
      "loss": 0.012,
      "step": 2137520
    },
    {
      "epoch": 3.4981310919528945,
      "grad_norm": 0.14453428983688354,
      "learning_rate": 3.023530109446967e-06,
      "loss": 0.0114,
      "step": 2137540
    },
    {
      "epoch": 3.4981638223915477,
      "grad_norm": 0.09457559138536453,
      "learning_rate": 3.02346421723345e-06,
      "loss": 0.0122,
      "step": 2137560
    },
    {
      "epoch": 3.4981965528302013,
      "grad_norm": 0.22732073068618774,
      "learning_rate": 3.0233983250199327e-06,
      "loss": 0.0136,
      "step": 2137580
    },
    {
      "epoch": 3.4982292832688544,
      "grad_norm": 0.07219231128692627,
      "learning_rate": 3.0233324328064154e-06,
      "loss": 0.0152,
      "step": 2137600
    },
    {
      "epoch": 3.4982620137075076,
      "grad_norm": 0.9942260384559631,
      "learning_rate": 3.0232665405928986e-06,
      "loss": 0.0165,
      "step": 2137620
    },
    {
      "epoch": 3.498294744146161,
      "grad_norm": 0.2125643938779831,
      "learning_rate": 3.0232006483793813e-06,
      "loss": 0.0146,
      "step": 2137640
    },
    {
      "epoch": 3.4983274745848143,
      "grad_norm": 0.1818770468235016,
      "learning_rate": 3.023134756165864e-06,
      "loss": 0.0099,
      "step": 2137660
    },
    {
      "epoch": 3.498360205023468,
      "grad_norm": 0.11561863124370575,
      "learning_rate": 3.023068863952347e-06,
      "loss": 0.013,
      "step": 2137680
    },
    {
      "epoch": 3.498392935462121,
      "grad_norm": 0.1795714944601059,
      "learning_rate": 3.02300297173883e-06,
      "loss": 0.0135,
      "step": 2137700
    },
    {
      "epoch": 3.4984256659007746,
      "grad_norm": 0.3677837550640106,
      "learning_rate": 3.0229370795253127e-06,
      "loss": 0.0091,
      "step": 2137720
    },
    {
      "epoch": 3.498458396339428,
      "grad_norm": 0.15620090067386627,
      "learning_rate": 3.0228711873117955e-06,
      "loss": 0.0097,
      "step": 2137740
    },
    {
      "epoch": 3.498491126778081,
      "grad_norm": 0.24582995474338531,
      "learning_rate": 3.022805295098278e-06,
      "loss": 0.0111,
      "step": 2137760
    },
    {
      "epoch": 3.4985238572167345,
      "grad_norm": 0.6614081263542175,
      "learning_rate": 3.0227394028847614e-06,
      "loss": 0.0099,
      "step": 2137780
    },
    {
      "epoch": 3.4985565876553877,
      "grad_norm": 0.3142068386077881,
      "learning_rate": 3.022673510671244e-06,
      "loss": 0.0117,
      "step": 2137800
    },
    {
      "epoch": 3.4985893180940413,
      "grad_norm": 0.448278546333313,
      "learning_rate": 3.022607618457727e-06,
      "loss": 0.01,
      "step": 2137820
    },
    {
      "epoch": 3.4986220485326944,
      "grad_norm": 0.6457239389419556,
      "learning_rate": 3.0225417262442096e-06,
      "loss": 0.0089,
      "step": 2137840
    },
    {
      "epoch": 3.498654778971348,
      "grad_norm": 0.5502205491065979,
      "learning_rate": 3.022475834030693e-06,
      "loss": 0.0124,
      "step": 2137860
    },
    {
      "epoch": 3.498687509410001,
      "grad_norm": 0.2696108818054199,
      "learning_rate": 3.022409941817176e-06,
      "loss": 0.0137,
      "step": 2137880
    },
    {
      "epoch": 3.4987202398486543,
      "grad_norm": 0.255563884973526,
      "learning_rate": 3.0223440496036586e-06,
      "loss": 0.0125,
      "step": 2137900
    },
    {
      "epoch": 3.498752970287308,
      "grad_norm": 0.3587298095226288,
      "learning_rate": 3.022278157390142e-06,
      "loss": 0.0074,
      "step": 2137920
    },
    {
      "epoch": 3.498785700725961,
      "grad_norm": 0.24206973612308502,
      "learning_rate": 3.0222122651766245e-06,
      "loss": 0.0108,
      "step": 2137940
    },
    {
      "epoch": 3.4988184311646147,
      "grad_norm": 0.24045436084270477,
      "learning_rate": 3.0221463729631073e-06,
      "loss": 0.0193,
      "step": 2137960
    },
    {
      "epoch": 3.498851161603268,
      "grad_norm": 0.19109666347503662,
      "learning_rate": 3.02208048074959e-06,
      "loss": 0.013,
      "step": 2137980
    },
    {
      "epoch": 3.498883892041921,
      "grad_norm": 0.10480313003063202,
      "learning_rate": 3.022014588536073e-06,
      "loss": 0.0113,
      "step": 2138000
    },
    {
      "epoch": 3.4989166224805746,
      "grad_norm": 0.30849117040634155,
      "learning_rate": 3.021948696322556e-06,
      "loss": 0.01,
      "step": 2138020
    },
    {
      "epoch": 3.4989493529192277,
      "grad_norm": 0.2659640908241272,
      "learning_rate": 3.0218828041090387e-06,
      "loss": 0.0158,
      "step": 2138040
    },
    {
      "epoch": 3.4989820833578813,
      "grad_norm": 0.16685117781162262,
      "learning_rate": 3.0218169118955214e-06,
      "loss": 0.0101,
      "step": 2138060
    },
    {
      "epoch": 3.4990148137965345,
      "grad_norm": 0.2944958806037903,
      "learning_rate": 3.021751019682004e-06,
      "loss": 0.0139,
      "step": 2138080
    },
    {
      "epoch": 3.4990475442351876,
      "grad_norm": 0.24571852385997772,
      "learning_rate": 3.0216851274684873e-06,
      "loss": 0.012,
      "step": 2138100
    },
    {
      "epoch": 3.499080274673841,
      "grad_norm": 0.212505042552948,
      "learning_rate": 3.02161923525497e-06,
      "loss": 0.0134,
      "step": 2138120
    },
    {
      "epoch": 3.4991130051124943,
      "grad_norm": 0.2709794044494629,
      "learning_rate": 3.021553343041453e-06,
      "loss": 0.0091,
      "step": 2138140
    },
    {
      "epoch": 3.499145735551148,
      "grad_norm": 0.268994003534317,
      "learning_rate": 3.0214874508279355e-06,
      "loss": 0.0126,
      "step": 2138160
    },
    {
      "epoch": 3.499178465989801,
      "grad_norm": 0.23347169160842896,
      "learning_rate": 3.0214215586144187e-06,
      "loss": 0.0122,
      "step": 2138180
    },
    {
      "epoch": 3.4992111964284547,
      "grad_norm": 0.12359859049320221,
      "learning_rate": 3.0213556664009014e-06,
      "loss": 0.0115,
      "step": 2138200
    },
    {
      "epoch": 3.499243926867108,
      "grad_norm": 0.6770234107971191,
      "learning_rate": 3.0212897741873846e-06,
      "loss": 0.0104,
      "step": 2138220
    },
    {
      "epoch": 3.499276657305761,
      "grad_norm": 0.30521050095558167,
      "learning_rate": 3.0212238819738678e-06,
      "loss": 0.0093,
      "step": 2138240
    },
    {
      "epoch": 3.4993093877444146,
      "grad_norm": 0.26131802797317505,
      "learning_rate": 3.0211579897603505e-06,
      "loss": 0.0121,
      "step": 2138260
    },
    {
      "epoch": 3.4993421181830677,
      "grad_norm": 0.0495566725730896,
      "learning_rate": 3.0210920975468332e-06,
      "loss": 0.0105,
      "step": 2138280
    },
    {
      "epoch": 3.4993748486217213,
      "grad_norm": 0.2888672351837158,
      "learning_rate": 3.021026205333316e-06,
      "loss": 0.0095,
      "step": 2138300
    },
    {
      "epoch": 3.4994075790603745,
      "grad_norm": 0.3738659620285034,
      "learning_rate": 3.020960313119799e-06,
      "loss": 0.0129,
      "step": 2138320
    },
    {
      "epoch": 3.499440309499028,
      "grad_norm": 0.10724878311157227,
      "learning_rate": 3.020894420906282e-06,
      "loss": 0.0093,
      "step": 2138340
    },
    {
      "epoch": 3.499473039937681,
      "grad_norm": 0.23810838162899017,
      "learning_rate": 3.0208285286927646e-06,
      "loss": 0.0131,
      "step": 2138360
    },
    {
      "epoch": 3.4995057703763344,
      "grad_norm": 0.35776272416114807,
      "learning_rate": 3.0207626364792474e-06,
      "loss": 0.0107,
      "step": 2138380
    },
    {
      "epoch": 3.499538500814988,
      "grad_norm": 0.49125054478645325,
      "learning_rate": 3.0206967442657305e-06,
      "loss": 0.0113,
      "step": 2138400
    },
    {
      "epoch": 3.499571231253641,
      "grad_norm": 0.30428606271743774,
      "learning_rate": 3.0206308520522133e-06,
      "loss": 0.0094,
      "step": 2138420
    },
    {
      "epoch": 3.4996039616922947,
      "grad_norm": 0.2546577453613281,
      "learning_rate": 3.020564959838696e-06,
      "loss": 0.0092,
      "step": 2138440
    },
    {
      "epoch": 3.499636692130948,
      "grad_norm": 0.1410658061504364,
      "learning_rate": 3.0204990676251787e-06,
      "loss": 0.0162,
      "step": 2138460
    },
    {
      "epoch": 3.4996694225696015,
      "grad_norm": 2.0606276988983154,
      "learning_rate": 3.020433175411662e-06,
      "loss": 0.0163,
      "step": 2138480
    },
    {
      "epoch": 3.4997021530082546,
      "grad_norm": 0.1644924134016037,
      "learning_rate": 3.0203672831981446e-06,
      "loss": 0.0086,
      "step": 2138500
    },
    {
      "epoch": 3.4997348834469078,
      "grad_norm": 0.19744613766670227,
      "learning_rate": 3.0203013909846274e-06,
      "loss": 0.0144,
      "step": 2138520
    },
    {
      "epoch": 3.4997676138855613,
      "grad_norm": 0.2122313678264618,
      "learning_rate": 3.02023549877111e-06,
      "loss": 0.0107,
      "step": 2138540
    },
    {
      "epoch": 3.4998003443242145,
      "grad_norm": 0.22560186684131622,
      "learning_rate": 3.020169606557593e-06,
      "loss": 0.0097,
      "step": 2138560
    },
    {
      "epoch": 3.499833074762868,
      "grad_norm": 0.1239556223154068,
      "learning_rate": 3.0201037143440765e-06,
      "loss": 0.0152,
      "step": 2138580
    },
    {
      "epoch": 3.4998658052015212,
      "grad_norm": 1.0299140214920044,
      "learning_rate": 3.020037822130559e-06,
      "loss": 0.012,
      "step": 2138600
    },
    {
      "epoch": 3.499898535640175,
      "grad_norm": 0.5280967950820923,
      "learning_rate": 3.019971929917042e-06,
      "loss": 0.0152,
      "step": 2138620
    },
    {
      "epoch": 3.499931266078828,
      "grad_norm": 0.18598681688308716,
      "learning_rate": 3.019906037703525e-06,
      "loss": 0.0106,
      "step": 2138640
    },
    {
      "epoch": 3.499963996517481,
      "grad_norm": 0.2628379166126251,
      "learning_rate": 3.019840145490008e-06,
      "loss": 0.0147,
      "step": 2138660
    },
    {
      "epoch": 3.4999967269561347,
      "grad_norm": 0.2515757083892822,
      "learning_rate": 3.0197742532764906e-06,
      "loss": 0.0158,
      "step": 2138680
    },
    {
      "epoch": 3.500029457394788,
      "grad_norm": 0.12790796160697937,
      "learning_rate": 3.0197083610629733e-06,
      "loss": 0.0118,
      "step": 2138700
    },
    {
      "epoch": 3.5000621878334415,
      "grad_norm": 0.3967284560203552,
      "learning_rate": 3.0196424688494565e-06,
      "loss": 0.0116,
      "step": 2138720
    },
    {
      "epoch": 3.5000949182720946,
      "grad_norm": 0.5148470997810364,
      "learning_rate": 3.0195765766359392e-06,
      "loss": 0.0106,
      "step": 2138740
    },
    {
      "epoch": 3.500127648710748,
      "grad_norm": 0.6601284146308899,
      "learning_rate": 3.019510684422422e-06,
      "loss": 0.0148,
      "step": 2138760
    },
    {
      "epoch": 3.5001603791494014,
      "grad_norm": 0.50260990858078,
      "learning_rate": 3.0194447922089047e-06,
      "loss": 0.0146,
      "step": 2138780
    },
    {
      "epoch": 3.5001931095880545,
      "grad_norm": 0.5945024490356445,
      "learning_rate": 3.019378899995388e-06,
      "loss": 0.01,
      "step": 2138800
    },
    {
      "epoch": 3.500225840026708,
      "grad_norm": 0.515031635761261,
      "learning_rate": 3.0193130077818706e-06,
      "loss": 0.0101,
      "step": 2138820
    },
    {
      "epoch": 3.5002585704653613,
      "grad_norm": 0.25878819823265076,
      "learning_rate": 3.0192471155683533e-06,
      "loss": 0.0134,
      "step": 2138840
    },
    {
      "epoch": 3.500291300904015,
      "grad_norm": 0.5424796938896179,
      "learning_rate": 3.019181223354836e-06,
      "loss": 0.0121,
      "step": 2138860
    },
    {
      "epoch": 3.500324031342668,
      "grad_norm": 0.591526448726654,
      "learning_rate": 3.0191153311413192e-06,
      "loss": 0.0195,
      "step": 2138880
    },
    {
      "epoch": 3.5003567617813216,
      "grad_norm": 0.35976317524909973,
      "learning_rate": 3.019049438927802e-06,
      "loss": 0.012,
      "step": 2138900
    },
    {
      "epoch": 3.5003894922199748,
      "grad_norm": 0.11819885671138763,
      "learning_rate": 3.018983546714285e-06,
      "loss": 0.017,
      "step": 2138920
    },
    {
      "epoch": 3.500422222658628,
      "grad_norm": 0.18311536312103271,
      "learning_rate": 3.0189176545007683e-06,
      "loss": 0.0123,
      "step": 2138940
    },
    {
      "epoch": 3.5004549530972815,
      "grad_norm": 0.8833361268043518,
      "learning_rate": 3.018851762287251e-06,
      "loss": 0.0112,
      "step": 2138960
    },
    {
      "epoch": 3.5004876835359346,
      "grad_norm": 0.7847526669502258,
      "learning_rate": 3.018785870073734e-06,
      "loss": 0.0153,
      "step": 2138980
    },
    {
      "epoch": 3.5005204139745882,
      "grad_norm": 0.6843271851539612,
      "learning_rate": 3.0187199778602165e-06,
      "loss": 0.0145,
      "step": 2139000
    },
    {
      "epoch": 3.5005531444132414,
      "grad_norm": 0.13949744403362274,
      "learning_rate": 3.0186540856466997e-06,
      "loss": 0.012,
      "step": 2139020
    },
    {
      "epoch": 3.500585874851895,
      "grad_norm": 0.5356664061546326,
      "learning_rate": 3.0185881934331824e-06,
      "loss": 0.0107,
      "step": 2139040
    },
    {
      "epoch": 3.500618605290548,
      "grad_norm": 0.23178870975971222,
      "learning_rate": 3.018522301219665e-06,
      "loss": 0.0123,
      "step": 2139060
    },
    {
      "epoch": 3.5006513357292013,
      "grad_norm": 0.841381311416626,
      "learning_rate": 3.018456409006148e-06,
      "loss": 0.0138,
      "step": 2139080
    },
    {
      "epoch": 3.500684066167855,
      "grad_norm": 0.40933242440223694,
      "learning_rate": 3.0183905167926307e-06,
      "loss": 0.0089,
      "step": 2139100
    },
    {
      "epoch": 3.500716796606508,
      "grad_norm": 0.49482885003089905,
      "learning_rate": 3.018324624579114e-06,
      "loss": 0.0104,
      "step": 2139120
    },
    {
      "epoch": 3.5007495270451616,
      "grad_norm": 0.3025352954864502,
      "learning_rate": 3.0182587323655966e-06,
      "loss": 0.0138,
      "step": 2139140
    },
    {
      "epoch": 3.5007822574838148,
      "grad_norm": 0.2009589821100235,
      "learning_rate": 3.0181928401520793e-06,
      "loss": 0.0181,
      "step": 2139160
    },
    {
      "epoch": 3.5008149879224684,
      "grad_norm": 0.10979293286800385,
      "learning_rate": 3.018126947938562e-06,
      "loss": 0.0084,
      "step": 2139180
    },
    {
      "epoch": 3.5008477183611215,
      "grad_norm": 0.20212863385677338,
      "learning_rate": 3.018061055725045e-06,
      "loss": 0.0104,
      "step": 2139200
    },
    {
      "epoch": 3.5008804487997747,
      "grad_norm": 0.6133368015289307,
      "learning_rate": 3.017995163511528e-06,
      "loss": 0.0082,
      "step": 2139220
    },
    {
      "epoch": 3.5009131792384283,
      "grad_norm": 0.10819625109434128,
      "learning_rate": 3.0179292712980107e-06,
      "loss": 0.0104,
      "step": 2139240
    },
    {
      "epoch": 3.5009459096770814,
      "grad_norm": 0.13275247812271118,
      "learning_rate": 3.0178633790844934e-06,
      "loss": 0.0183,
      "step": 2139260
    },
    {
      "epoch": 3.5009786401157346,
      "grad_norm": 0.2675707936286926,
      "learning_rate": 3.017797486870977e-06,
      "loss": 0.013,
      "step": 2139280
    },
    {
      "epoch": 3.501011370554388,
      "grad_norm": 0.3742842674255371,
      "learning_rate": 3.0177315946574597e-06,
      "loss": 0.0106,
      "step": 2139300
    },
    {
      "epoch": 3.5010441009930418,
      "grad_norm": 0.4543919861316681,
      "learning_rate": 3.0176657024439425e-06,
      "loss": 0.0102,
      "step": 2139320
    },
    {
      "epoch": 3.501076831431695,
      "grad_norm": 0.2104676216840744,
      "learning_rate": 3.0175998102304256e-06,
      "loss": 0.0128,
      "step": 2139340
    },
    {
      "epoch": 3.501109561870348,
      "grad_norm": 0.3446386158466339,
      "learning_rate": 3.0175339180169084e-06,
      "loss": 0.0112,
      "step": 2139360
    },
    {
      "epoch": 3.5011422923090016,
      "grad_norm": 0.09441138803958893,
      "learning_rate": 3.017468025803391e-06,
      "loss": 0.011,
      "step": 2139380
    },
    {
      "epoch": 3.501175022747655,
      "grad_norm": 1.5550510883331299,
      "learning_rate": 3.017402133589874e-06,
      "loss": 0.0087,
      "step": 2139400
    },
    {
      "epoch": 3.501207753186308,
      "grad_norm": 0.40632373094558716,
      "learning_rate": 3.017336241376357e-06,
      "loss": 0.0082,
      "step": 2139420
    },
    {
      "epoch": 3.5012404836249615,
      "grad_norm": 0.4378814399242401,
      "learning_rate": 3.0172703491628398e-06,
      "loss": 0.0098,
      "step": 2139440
    },
    {
      "epoch": 3.501273214063615,
      "grad_norm": 0.2512446343898773,
      "learning_rate": 3.0172044569493225e-06,
      "loss": 0.0096,
      "step": 2139460
    },
    {
      "epoch": 3.5013059445022683,
      "grad_norm": 0.3609658181667328,
      "learning_rate": 3.0171385647358052e-06,
      "loss": 0.0125,
      "step": 2139480
    },
    {
      "epoch": 3.5013386749409214,
      "grad_norm": 0.3200831413269043,
      "learning_rate": 3.017072672522288e-06,
      "loss": 0.0149,
      "step": 2139500
    },
    {
      "epoch": 3.501371405379575,
      "grad_norm": 0.10819151997566223,
      "learning_rate": 3.017006780308771e-06,
      "loss": 0.0095,
      "step": 2139520
    },
    {
      "epoch": 3.501404135818228,
      "grad_norm": 0.1608227789402008,
      "learning_rate": 3.016940888095254e-06,
      "loss": 0.0077,
      "step": 2139540
    },
    {
      "epoch": 3.5014368662568813,
      "grad_norm": 0.1712205559015274,
      "learning_rate": 3.0168749958817366e-06,
      "loss": 0.0136,
      "step": 2139560
    },
    {
      "epoch": 3.501469596695535,
      "grad_norm": 0.07215601205825806,
      "learning_rate": 3.0168091036682194e-06,
      "loss": 0.0108,
      "step": 2139580
    },
    {
      "epoch": 3.5015023271341885,
      "grad_norm": 0.6702889204025269,
      "learning_rate": 3.0167432114547025e-06,
      "loss": 0.0105,
      "step": 2139600
    },
    {
      "epoch": 3.5015350575728417,
      "grad_norm": 0.11906096339225769,
      "learning_rate": 3.0166773192411857e-06,
      "loss": 0.01,
      "step": 2139620
    },
    {
      "epoch": 3.501567788011495,
      "grad_norm": 0.49750614166259766,
      "learning_rate": 3.0166114270276684e-06,
      "loss": 0.0161,
      "step": 2139640
    },
    {
      "epoch": 3.5016005184501484,
      "grad_norm": 0.4034459590911865,
      "learning_rate": 3.0165455348141516e-06,
      "loss": 0.0091,
      "step": 2139660
    },
    {
      "epoch": 3.5016332488888016,
      "grad_norm": 0.15330959856510162,
      "learning_rate": 3.0164796426006343e-06,
      "loss": 0.0105,
      "step": 2139680
    },
    {
      "epoch": 3.5016659793274547,
      "grad_norm": 0.3249214291572571,
      "learning_rate": 3.016413750387117e-06,
      "loss": 0.0083,
      "step": 2139700
    },
    {
      "epoch": 3.5016987097661083,
      "grad_norm": 0.4807669222354889,
      "learning_rate": 3.0163478581736e-06,
      "loss": 0.0135,
      "step": 2139720
    },
    {
      "epoch": 3.5017314402047615,
      "grad_norm": 0.34407228231430054,
      "learning_rate": 3.016281965960083e-06,
      "loss": 0.0173,
      "step": 2139740
    },
    {
      "epoch": 3.501764170643415,
      "grad_norm": 0.21940936148166656,
      "learning_rate": 3.0162160737465657e-06,
      "loss": 0.0079,
      "step": 2139760
    },
    {
      "epoch": 3.501796901082068,
      "grad_norm": 0.36079156398773193,
      "learning_rate": 3.0161501815330485e-06,
      "loss": 0.0147,
      "step": 2139780
    },
    {
      "epoch": 3.501829631520722,
      "grad_norm": NaN,
      "learning_rate": 3.016084289319531e-06,
      "loss": 0.0108,
      "step": 2139800
    },
    {
      "epoch": 3.501862361959375,
      "grad_norm": 0.28336301445961,
      "learning_rate": 3.0160183971060144e-06,
      "loss": 0.0131,
      "step": 2139820
    },
    {
      "epoch": 3.501895092398028,
      "grad_norm": 0.2663329243659973,
      "learning_rate": 3.015952504892497e-06,
      "loss": 0.0096,
      "step": 2139840
    },
    {
      "epoch": 3.5019278228366817,
      "grad_norm": 0.24243248999118805,
      "learning_rate": 3.01588661267898e-06,
      "loss": 0.0105,
      "step": 2139860
    },
    {
      "epoch": 3.501960553275335,
      "grad_norm": 0.2799496352672577,
      "learning_rate": 3.0158207204654626e-06,
      "loss": 0.012,
      "step": 2139880
    },
    {
      "epoch": 3.5019932837139884,
      "grad_norm": 0.40100201964378357,
      "learning_rate": 3.0157548282519457e-06,
      "loss": 0.0112,
      "step": 2139900
    },
    {
      "epoch": 3.5020260141526416,
      "grad_norm": 0.7020990252494812,
      "learning_rate": 3.0156889360384285e-06,
      "loss": 0.0084,
      "step": 2139920
    },
    {
      "epoch": 3.502058744591295,
      "grad_norm": 0.4685724675655365,
      "learning_rate": 3.0156230438249112e-06,
      "loss": 0.0113,
      "step": 2139940
    },
    {
      "epoch": 3.5020914750299483,
      "grad_norm": 0.056485019624233246,
      "learning_rate": 3.015557151611394e-06,
      "loss": 0.0108,
      "step": 2139960
    },
    {
      "epoch": 3.5021242054686015,
      "grad_norm": 0.043545883148908615,
      "learning_rate": 3.0154912593978776e-06,
      "loss": 0.0115,
      "step": 2139980
    },
    {
      "epoch": 3.502156935907255,
      "grad_norm": 0.18339623510837555,
      "learning_rate": 3.0154253671843603e-06,
      "loss": 0.011,
      "step": 2140000
    },
    {
      "epoch": 3.5021896663459082,
      "grad_norm": 0.5110265016555786,
      "learning_rate": 3.015359474970843e-06,
      "loss": 0.0105,
      "step": 2140020
    },
    {
      "epoch": 3.502222396784562,
      "grad_norm": 0.5958913564682007,
      "learning_rate": 3.0152935827573258e-06,
      "loss": 0.0133,
      "step": 2140040
    },
    {
      "epoch": 3.502255127223215,
      "grad_norm": 0.6285280585289001,
      "learning_rate": 3.015227690543809e-06,
      "loss": 0.0081,
      "step": 2140060
    },
    {
      "epoch": 3.5022878576618686,
      "grad_norm": 0.23430180549621582,
      "learning_rate": 3.0151617983302917e-06,
      "loss": 0.0116,
      "step": 2140080
    },
    {
      "epoch": 3.5023205881005217,
      "grad_norm": 0.5112387537956238,
      "learning_rate": 3.0150959061167744e-06,
      "loss": 0.0095,
      "step": 2140100
    },
    {
      "epoch": 3.502353318539175,
      "grad_norm": 0.1472516804933548,
      "learning_rate": 3.015030013903257e-06,
      "loss": 0.0128,
      "step": 2140120
    },
    {
      "epoch": 3.5023860489778285,
      "grad_norm": 0.25971338152885437,
      "learning_rate": 3.0149641216897403e-06,
      "loss": 0.0137,
      "step": 2140140
    },
    {
      "epoch": 3.5024187794164816,
      "grad_norm": 0.5205855369567871,
      "learning_rate": 3.014898229476223e-06,
      "loss": 0.0068,
      "step": 2140160
    },
    {
      "epoch": 3.502451509855135,
      "grad_norm": 0.2773088812828064,
      "learning_rate": 3.014832337262706e-06,
      "loss": 0.0129,
      "step": 2140180
    },
    {
      "epoch": 3.5024842402937884,
      "grad_norm": 0.12540984153747559,
      "learning_rate": 3.0147664450491885e-06,
      "loss": 0.0161,
      "step": 2140200
    },
    {
      "epoch": 3.502516970732442,
      "grad_norm": 0.3801746070384979,
      "learning_rate": 3.0147005528356717e-06,
      "loss": 0.0122,
      "step": 2140220
    },
    {
      "epoch": 3.502549701171095,
      "grad_norm": 0.4247051179409027,
      "learning_rate": 3.0146346606221544e-06,
      "loss": 0.0107,
      "step": 2140240
    },
    {
      "epoch": 3.5025824316097482,
      "grad_norm": 1.9059743881225586,
      "learning_rate": 3.014568768408637e-06,
      "loss": 0.0106,
      "step": 2140260
    },
    {
      "epoch": 3.502615162048402,
      "grad_norm": 0.7076841592788696,
      "learning_rate": 3.01450287619512e-06,
      "loss": 0.0109,
      "step": 2140280
    },
    {
      "epoch": 3.502647892487055,
      "grad_norm": 0.26906681060791016,
      "learning_rate": 3.014436983981603e-06,
      "loss": 0.0128,
      "step": 2140300
    },
    {
      "epoch": 3.5026806229257086,
      "grad_norm": 0.12383068352937698,
      "learning_rate": 3.0143710917680862e-06,
      "loss": 0.0088,
      "step": 2140320
    },
    {
      "epoch": 3.5027133533643617,
      "grad_norm": 0.2742801010608673,
      "learning_rate": 3.014305199554569e-06,
      "loss": 0.007,
      "step": 2140340
    },
    {
      "epoch": 3.5027460838030153,
      "grad_norm": 2.0178754329681396,
      "learning_rate": 3.014239307341052e-06,
      "loss": 0.0087,
      "step": 2140360
    },
    {
      "epoch": 3.5027788142416685,
      "grad_norm": 0.2966859042644501,
      "learning_rate": 3.014173415127535e-06,
      "loss": 0.0101,
      "step": 2140380
    },
    {
      "epoch": 3.5028115446803216,
      "grad_norm": 0.27225783467292786,
      "learning_rate": 3.0141075229140176e-06,
      "loss": 0.0194,
      "step": 2140400
    },
    {
      "epoch": 3.5028442751189752,
      "grad_norm": 0.19105681777000427,
      "learning_rate": 3.0140416307005004e-06,
      "loss": 0.0147,
      "step": 2140420
    },
    {
      "epoch": 3.5028770055576284,
      "grad_norm": 0.26696833968162537,
      "learning_rate": 3.0139757384869835e-06,
      "loss": 0.0075,
      "step": 2140440
    },
    {
      "epoch": 3.502909735996282,
      "grad_norm": 0.040409862995147705,
      "learning_rate": 3.0139098462734663e-06,
      "loss": 0.0152,
      "step": 2140460
    },
    {
      "epoch": 3.502942466434935,
      "grad_norm": 0.68137127161026,
      "learning_rate": 3.013843954059949e-06,
      "loss": 0.0152,
      "step": 2140480
    },
    {
      "epoch": 3.5029751968735887,
      "grad_norm": 0.44721150398254395,
      "learning_rate": 3.0137780618464318e-06,
      "loss": 0.0096,
      "step": 2140500
    },
    {
      "epoch": 3.503007927312242,
      "grad_norm": 0.12923742830753326,
      "learning_rate": 3.0137121696329145e-06,
      "loss": 0.0069,
      "step": 2140520
    },
    {
      "epoch": 3.503040657750895,
      "grad_norm": 0.21226316690444946,
      "learning_rate": 3.0136462774193977e-06,
      "loss": 0.0113,
      "step": 2140540
    },
    {
      "epoch": 3.5030733881895486,
      "grad_norm": 0.20101140439510345,
      "learning_rate": 3.0135803852058804e-06,
      "loss": 0.0081,
      "step": 2140560
    },
    {
      "epoch": 3.5031061186282018,
      "grad_norm": 0.5452648401260376,
      "learning_rate": 3.013514492992363e-06,
      "loss": 0.0203,
      "step": 2140580
    },
    {
      "epoch": 3.5031388490668554,
      "grad_norm": 0.2890763282775879,
      "learning_rate": 3.013448600778846e-06,
      "loss": 0.0135,
      "step": 2140600
    },
    {
      "epoch": 3.5031715795055085,
      "grad_norm": 0.3025798797607422,
      "learning_rate": 3.013382708565329e-06,
      "loss": 0.0122,
      "step": 2140620
    },
    {
      "epoch": 3.503204309944162,
      "grad_norm": 0.18251800537109375,
      "learning_rate": 3.0133168163518118e-06,
      "loss": 0.0076,
      "step": 2140640
    },
    {
      "epoch": 3.5032370403828152,
      "grad_norm": 0.756091833114624,
      "learning_rate": 3.0132509241382945e-06,
      "loss": 0.0139,
      "step": 2140660
    },
    {
      "epoch": 3.5032697708214684,
      "grad_norm": 0.14884163439273834,
      "learning_rate": 3.013185031924778e-06,
      "loss": 0.0135,
      "step": 2140680
    },
    {
      "epoch": 3.503302501260122,
      "grad_norm": 0.3654801845550537,
      "learning_rate": 3.013119139711261e-06,
      "loss": 0.0089,
      "step": 2140700
    },
    {
      "epoch": 3.503335231698775,
      "grad_norm": 0.12262775003910065,
      "learning_rate": 3.0130532474977436e-06,
      "loss": 0.0155,
      "step": 2140720
    },
    {
      "epoch": 3.5033679621374283,
      "grad_norm": 0.39420613646507263,
      "learning_rate": 3.0129873552842263e-06,
      "loss": 0.0074,
      "step": 2140740
    },
    {
      "epoch": 3.503400692576082,
      "grad_norm": 0.6068894863128662,
      "learning_rate": 3.0129214630707095e-06,
      "loss": 0.01,
      "step": 2140760
    },
    {
      "epoch": 3.5034334230147355,
      "grad_norm": 0.6559863090515137,
      "learning_rate": 3.0128555708571922e-06,
      "loss": 0.0114,
      "step": 2140780
    },
    {
      "epoch": 3.5034661534533886,
      "grad_norm": 0.14341993629932404,
      "learning_rate": 3.012789678643675e-06,
      "loss": 0.0112,
      "step": 2140800
    },
    {
      "epoch": 3.503498883892042,
      "grad_norm": 0.16327960789203644,
      "learning_rate": 3.0127237864301577e-06,
      "loss": 0.0124,
      "step": 2140820
    },
    {
      "epoch": 3.5035316143306954,
      "grad_norm": 0.13680680096149445,
      "learning_rate": 3.012657894216641e-06,
      "loss": 0.0157,
      "step": 2140840
    },
    {
      "epoch": 3.5035643447693485,
      "grad_norm": 0.7507553100585938,
      "learning_rate": 3.0125920020031236e-06,
      "loss": 0.0185,
      "step": 2140860
    },
    {
      "epoch": 3.5035970752080017,
      "grad_norm": 1.2917205095291138,
      "learning_rate": 3.0125261097896063e-06,
      "loss": 0.0137,
      "step": 2140880
    },
    {
      "epoch": 3.5036298056466553,
      "grad_norm": 0.2900267243385315,
      "learning_rate": 3.012460217576089e-06,
      "loss": 0.0122,
      "step": 2140900
    },
    {
      "epoch": 3.503662536085309,
      "grad_norm": 0.41132786870002747,
      "learning_rate": 3.012394325362572e-06,
      "loss": 0.0087,
      "step": 2140920
    },
    {
      "epoch": 3.503695266523962,
      "grad_norm": 0.22956322133541107,
      "learning_rate": 3.012328433149055e-06,
      "loss": 0.0073,
      "step": 2140940
    },
    {
      "epoch": 3.503727996962615,
      "grad_norm": 0.22169193625450134,
      "learning_rate": 3.0122625409355377e-06,
      "loss": 0.0122,
      "step": 2140960
    },
    {
      "epoch": 3.5037607274012688,
      "grad_norm": 0.31734180450439453,
      "learning_rate": 3.0121966487220205e-06,
      "loss": 0.0072,
      "step": 2140980
    },
    {
      "epoch": 3.503793457839922,
      "grad_norm": 0.13322708010673523,
      "learning_rate": 3.012130756508503e-06,
      "loss": 0.0112,
      "step": 2141000
    },
    {
      "epoch": 3.503826188278575,
      "grad_norm": 0.24676784873008728,
      "learning_rate": 3.0120648642949864e-06,
      "loss": 0.0108,
      "step": 2141020
    },
    {
      "epoch": 3.5038589187172287,
      "grad_norm": 0.2259935587644577,
      "learning_rate": 3.0119989720814695e-06,
      "loss": 0.0087,
      "step": 2141040
    },
    {
      "epoch": 3.5038916491558822,
      "grad_norm": 0.3119291067123413,
      "learning_rate": 3.0119330798679523e-06,
      "loss": 0.0135,
      "step": 2141060
    },
    {
      "epoch": 3.5039243795945354,
      "grad_norm": 0.09853959828615189,
      "learning_rate": 3.0118671876544354e-06,
      "loss": 0.0114,
      "step": 2141080
    },
    {
      "epoch": 3.5039571100331885,
      "grad_norm": 0.10730225592851639,
      "learning_rate": 3.011801295440918e-06,
      "loss": 0.0096,
      "step": 2141100
    },
    {
      "epoch": 3.503989840471842,
      "grad_norm": 0.36041679978370667,
      "learning_rate": 3.011735403227401e-06,
      "loss": 0.0134,
      "step": 2141120
    },
    {
      "epoch": 3.5040225709104953,
      "grad_norm": 0.6827632188796997,
      "learning_rate": 3.0116695110138837e-06,
      "loss": 0.0144,
      "step": 2141140
    },
    {
      "epoch": 3.5040553013491484,
      "grad_norm": 0.5084710121154785,
      "learning_rate": 3.011603618800367e-06,
      "loss": 0.0115,
      "step": 2141160
    },
    {
      "epoch": 3.504088031787802,
      "grad_norm": 0.25056400895118713,
      "learning_rate": 3.0115377265868496e-06,
      "loss": 0.0093,
      "step": 2141180
    },
    {
      "epoch": 3.5041207622264556,
      "grad_norm": 0.2419964224100113,
      "learning_rate": 3.0114718343733323e-06,
      "loss": 0.0105,
      "step": 2141200
    },
    {
      "epoch": 3.504153492665109,
      "grad_norm": 0.28979527950286865,
      "learning_rate": 3.011405942159815e-06,
      "loss": 0.0094,
      "step": 2141220
    },
    {
      "epoch": 3.504186223103762,
      "grad_norm": 0.045350391417741776,
      "learning_rate": 3.011340049946298e-06,
      "loss": 0.0134,
      "step": 2141240
    },
    {
      "epoch": 3.5042189535424155,
      "grad_norm": 0.16203036904335022,
      "learning_rate": 3.011274157732781e-06,
      "loss": 0.0067,
      "step": 2141260
    },
    {
      "epoch": 3.5042516839810687,
      "grad_norm": 0.27330246567726135,
      "learning_rate": 3.0112082655192637e-06,
      "loss": 0.0119,
      "step": 2141280
    },
    {
      "epoch": 3.504284414419722,
      "grad_norm": 0.16588689386844635,
      "learning_rate": 3.0111423733057464e-06,
      "loss": 0.0101,
      "step": 2141300
    },
    {
      "epoch": 3.5043171448583754,
      "grad_norm": 0.2023441344499588,
      "learning_rate": 3.0110764810922296e-06,
      "loss": 0.019,
      "step": 2141320
    },
    {
      "epoch": 3.5043498752970286,
      "grad_norm": 0.353717565536499,
      "learning_rate": 3.0110105888787123e-06,
      "loss": 0.0142,
      "step": 2141340
    },
    {
      "epoch": 3.504382605735682,
      "grad_norm": 0.8168973326683044,
      "learning_rate": 3.010944696665195e-06,
      "loss": 0.0097,
      "step": 2141360
    },
    {
      "epoch": 3.5044153361743353,
      "grad_norm": 0.2992546856403351,
      "learning_rate": 3.0108788044516787e-06,
      "loss": 0.0105,
      "step": 2141380
    },
    {
      "epoch": 3.504448066612989,
      "grad_norm": 0.9942989945411682,
      "learning_rate": 3.0108129122381614e-06,
      "loss": 0.0093,
      "step": 2141400
    },
    {
      "epoch": 3.504480797051642,
      "grad_norm": 0.10715501755475998,
      "learning_rate": 3.010747020024644e-06,
      "loss": 0.0136,
      "step": 2141420
    },
    {
      "epoch": 3.504513527490295,
      "grad_norm": 0.3368508517742157,
      "learning_rate": 3.010681127811127e-06,
      "loss": 0.0085,
      "step": 2141440
    },
    {
      "epoch": 3.504546257928949,
      "grad_norm": 0.23212435841560364,
      "learning_rate": 3.0106152355976096e-06,
      "loss": 0.0156,
      "step": 2141460
    },
    {
      "epoch": 3.504578988367602,
      "grad_norm": 0.33064863085746765,
      "learning_rate": 3.0105493433840928e-06,
      "loss": 0.0101,
      "step": 2141480
    },
    {
      "epoch": 3.5046117188062555,
      "grad_norm": 0.32102078199386597,
      "learning_rate": 3.0104834511705755e-06,
      "loss": 0.0147,
      "step": 2141500
    },
    {
      "epoch": 3.5046444492449087,
      "grad_norm": 0.13710901141166687,
      "learning_rate": 3.0104175589570583e-06,
      "loss": 0.0112,
      "step": 2141520
    },
    {
      "epoch": 3.5046771796835623,
      "grad_norm": 0.1192665621638298,
      "learning_rate": 3.010351666743541e-06,
      "loss": 0.0119,
      "step": 2141540
    },
    {
      "epoch": 3.5047099101222154,
      "grad_norm": 0.20910972356796265,
      "learning_rate": 3.010285774530024e-06,
      "loss": 0.0125,
      "step": 2141560
    },
    {
      "epoch": 3.5047426405608686,
      "grad_norm": 0.20612694323062897,
      "learning_rate": 3.010219882316507e-06,
      "loss": 0.019,
      "step": 2141580
    },
    {
      "epoch": 3.504775370999522,
      "grad_norm": 0.7348415851593018,
      "learning_rate": 3.0101539901029896e-06,
      "loss": 0.0132,
      "step": 2141600
    },
    {
      "epoch": 3.5048081014381753,
      "grad_norm": 0.19208228588104248,
      "learning_rate": 3.0100880978894724e-06,
      "loss": 0.0102,
      "step": 2141620
    },
    {
      "epoch": 3.504840831876829,
      "grad_norm": 0.11799293011426926,
      "learning_rate": 3.0100222056759555e-06,
      "loss": 0.0088,
      "step": 2141640
    },
    {
      "epoch": 3.504873562315482,
      "grad_norm": 0.1098141297698021,
      "learning_rate": 3.0099563134624383e-06,
      "loss": 0.0076,
      "step": 2141660
    },
    {
      "epoch": 3.5049062927541357,
      "grad_norm": 0.05154460668563843,
      "learning_rate": 3.009890421248921e-06,
      "loss": 0.0124,
      "step": 2141680
    },
    {
      "epoch": 3.504939023192789,
      "grad_norm": 0.18191631138324738,
      "learning_rate": 3.0098245290354038e-06,
      "loss": 0.0177,
      "step": 2141700
    },
    {
      "epoch": 3.504971753631442,
      "grad_norm": 0.3941461443901062,
      "learning_rate": 3.009758636821887e-06,
      "loss": 0.0097,
      "step": 2141720
    },
    {
      "epoch": 3.5050044840700956,
      "grad_norm": 0.08809220790863037,
      "learning_rate": 3.00969274460837e-06,
      "loss": 0.009,
      "step": 2141740
    },
    {
      "epoch": 3.5050372145087487,
      "grad_norm": 0.5680059790611267,
      "learning_rate": 3.009626852394853e-06,
      "loss": 0.0094,
      "step": 2141760
    },
    {
      "epoch": 3.5050699449474023,
      "grad_norm": 0.1693127155303955,
      "learning_rate": 3.009560960181336e-06,
      "loss": 0.0089,
      "step": 2141780
    },
    {
      "epoch": 3.5051026753860555,
      "grad_norm": 0.1131194457411766,
      "learning_rate": 3.0094950679678187e-06,
      "loss": 0.0082,
      "step": 2141800
    },
    {
      "epoch": 3.505135405824709,
      "grad_norm": 0.31046685576438904,
      "learning_rate": 3.0094291757543015e-06,
      "loss": 0.0111,
      "step": 2141820
    },
    {
      "epoch": 3.505168136263362,
      "grad_norm": 0.11493141204118729,
      "learning_rate": 3.009363283540784e-06,
      "loss": 0.0105,
      "step": 2141840
    },
    {
      "epoch": 3.5052008667020154,
      "grad_norm": 0.42118656635284424,
      "learning_rate": 3.0092973913272674e-06,
      "loss": 0.0126,
      "step": 2141860
    },
    {
      "epoch": 3.505233597140669,
      "grad_norm": 0.42487865686416626,
      "learning_rate": 3.00923149911375e-06,
      "loss": 0.0107,
      "step": 2141880
    },
    {
      "epoch": 3.505266327579322,
      "grad_norm": 0.35892197489738464,
      "learning_rate": 3.009165606900233e-06,
      "loss": 0.0144,
      "step": 2141900
    },
    {
      "epoch": 3.5052990580179757,
      "grad_norm": 0.15021398663520813,
      "learning_rate": 3.0090997146867156e-06,
      "loss": 0.0103,
      "step": 2141920
    },
    {
      "epoch": 3.505331788456629,
      "grad_norm": 0.21346931159496307,
      "learning_rate": 3.0090338224731983e-06,
      "loss": 0.0132,
      "step": 2141940
    },
    {
      "epoch": 3.5053645188952824,
      "grad_norm": 0.31799614429473877,
      "learning_rate": 3.0089679302596815e-06,
      "loss": 0.0109,
      "step": 2141960
    },
    {
      "epoch": 3.5053972493339356,
      "grad_norm": 0.1448647677898407,
      "learning_rate": 3.0089020380461642e-06,
      "loss": 0.0123,
      "step": 2141980
    },
    {
      "epoch": 3.5054299797725887,
      "grad_norm": 0.1967027336359024,
      "learning_rate": 3.008836145832647e-06,
      "loss": 0.0072,
      "step": 2142000
    },
    {
      "epoch": 3.5054627102112423,
      "grad_norm": 0.1351243108510971,
      "learning_rate": 3.0087702536191297e-06,
      "loss": 0.0095,
      "step": 2142020
    },
    {
      "epoch": 3.5054954406498955,
      "grad_norm": 0.561057448387146,
      "learning_rate": 3.008704361405613e-06,
      "loss": 0.0175,
      "step": 2142040
    },
    {
      "epoch": 3.505528171088549,
      "grad_norm": 0.2798824906349182,
      "learning_rate": 3.0086384691920956e-06,
      "loss": 0.0149,
      "step": 2142060
    },
    {
      "epoch": 3.5055609015272022,
      "grad_norm": 0.19703257083892822,
      "learning_rate": 3.0085725769785788e-06,
      "loss": 0.0088,
      "step": 2142080
    },
    {
      "epoch": 3.505593631965856,
      "grad_norm": 0.32619261741638184,
      "learning_rate": 3.008506684765062e-06,
      "loss": 0.0119,
      "step": 2142100
    },
    {
      "epoch": 3.505626362404509,
      "grad_norm": 0.1021556481719017,
      "learning_rate": 3.0084407925515447e-06,
      "loss": 0.0125,
      "step": 2142120
    },
    {
      "epoch": 3.505659092843162,
      "grad_norm": 0.1639534831047058,
      "learning_rate": 3.0083749003380274e-06,
      "loss": 0.0076,
      "step": 2142140
    },
    {
      "epoch": 3.5056918232818157,
      "grad_norm": 0.5841483473777771,
      "learning_rate": 3.00830900812451e-06,
      "loss": 0.015,
      "step": 2142160
    },
    {
      "epoch": 3.505724553720469,
      "grad_norm": 0.8148348927497864,
      "learning_rate": 3.0082431159109933e-06,
      "loss": 0.0102,
      "step": 2142180
    },
    {
      "epoch": 3.5057572841591225,
      "grad_norm": 0.17950737476348877,
      "learning_rate": 3.008177223697476e-06,
      "loss": 0.0114,
      "step": 2142200
    },
    {
      "epoch": 3.5057900145977756,
      "grad_norm": 0.6825955510139465,
      "learning_rate": 3.008111331483959e-06,
      "loss": 0.0102,
      "step": 2142220
    },
    {
      "epoch": 3.505822745036429,
      "grad_norm": 0.2842845320701599,
      "learning_rate": 3.0080454392704415e-06,
      "loss": 0.0149,
      "step": 2142240
    },
    {
      "epoch": 3.5058554754750824,
      "grad_norm": 0.2979264259338379,
      "learning_rate": 3.0079795470569247e-06,
      "loss": 0.0115,
      "step": 2142260
    },
    {
      "epoch": 3.5058882059137355,
      "grad_norm": 0.5969491004943848,
      "learning_rate": 3.0079136548434074e-06,
      "loss": 0.0134,
      "step": 2142280
    },
    {
      "epoch": 3.505920936352389,
      "grad_norm": 0.23294270038604736,
      "learning_rate": 3.00784776262989e-06,
      "loss": 0.0114,
      "step": 2142300
    },
    {
      "epoch": 3.5059536667910423,
      "grad_norm": 0.5042436718940735,
      "learning_rate": 3.007781870416373e-06,
      "loss": 0.01,
      "step": 2142320
    },
    {
      "epoch": 3.5059863972296954,
      "grad_norm": 0.10046494752168655,
      "learning_rate": 3.007715978202856e-06,
      "loss": 0.0105,
      "step": 2142340
    },
    {
      "epoch": 3.506019127668349,
      "grad_norm": 0.238233283162117,
      "learning_rate": 3.007650085989339e-06,
      "loss": 0.0148,
      "step": 2142360
    },
    {
      "epoch": 3.5060518581070026,
      "grad_norm": 0.5452703833580017,
      "learning_rate": 3.0075841937758216e-06,
      "loss": 0.0111,
      "step": 2142380
    },
    {
      "epoch": 3.5060845885456557,
      "grad_norm": 0.2825363874435425,
      "learning_rate": 3.0075183015623043e-06,
      "loss": 0.0156,
      "step": 2142400
    },
    {
      "epoch": 3.506117318984309,
      "grad_norm": 0.19421935081481934,
      "learning_rate": 3.007452409348787e-06,
      "loss": 0.0117,
      "step": 2142420
    },
    {
      "epoch": 3.5061500494229625,
      "grad_norm": 0.25429457426071167,
      "learning_rate": 3.0073865171352706e-06,
      "loss": 0.014,
      "step": 2142440
    },
    {
      "epoch": 3.5061827798616156,
      "grad_norm": 0.11887338757514954,
      "learning_rate": 3.0073206249217534e-06,
      "loss": 0.0102,
      "step": 2142460
    },
    {
      "epoch": 3.506215510300269,
      "grad_norm": 0.09101966768503189,
      "learning_rate": 3.007254732708236e-06,
      "loss": 0.0102,
      "step": 2142480
    },
    {
      "epoch": 3.5062482407389224,
      "grad_norm": 0.4418843686580658,
      "learning_rate": 3.0071888404947193e-06,
      "loss": 0.0104,
      "step": 2142500
    },
    {
      "epoch": 3.506280971177576,
      "grad_norm": 0.45280060172080994,
      "learning_rate": 3.007122948281202e-06,
      "loss": 0.0101,
      "step": 2142520
    },
    {
      "epoch": 3.506313701616229,
      "grad_norm": 0.5724447965621948,
      "learning_rate": 3.0070570560676848e-06,
      "loss": 0.0127,
      "step": 2142540
    },
    {
      "epoch": 3.5063464320548823,
      "grad_norm": 0.11055397987365723,
      "learning_rate": 3.0069911638541675e-06,
      "loss": 0.008,
      "step": 2142560
    },
    {
      "epoch": 3.506379162493536,
      "grad_norm": 0.07282891869544983,
      "learning_rate": 3.0069252716406507e-06,
      "loss": 0.0106,
      "step": 2142580
    },
    {
      "epoch": 3.506411892932189,
      "grad_norm": 0.17198827862739563,
      "learning_rate": 3.0068593794271334e-06,
      "loss": 0.0099,
      "step": 2142600
    },
    {
      "epoch": 3.506444623370842,
      "grad_norm": 0.17868155241012573,
      "learning_rate": 3.006793487213616e-06,
      "loss": 0.0085,
      "step": 2142620
    },
    {
      "epoch": 3.5064773538094958,
      "grad_norm": 0.3639136850833893,
      "learning_rate": 3.006727595000099e-06,
      "loss": 0.0127,
      "step": 2142640
    },
    {
      "epoch": 3.5065100842481494,
      "grad_norm": 0.24694202840328217,
      "learning_rate": 3.006661702786582e-06,
      "loss": 0.0126,
      "step": 2142660
    },
    {
      "epoch": 3.5065428146868025,
      "grad_norm": 0.4341401159763336,
      "learning_rate": 3.0065958105730648e-06,
      "loss": 0.0094,
      "step": 2142680
    },
    {
      "epoch": 3.5065755451254557,
      "grad_norm": 0.5207078456878662,
      "learning_rate": 3.0065299183595475e-06,
      "loss": 0.0104,
      "step": 2142700
    },
    {
      "epoch": 3.5066082755641093,
      "grad_norm": 0.19353963434696198,
      "learning_rate": 3.0064640261460303e-06,
      "loss": 0.0065,
      "step": 2142720
    },
    {
      "epoch": 3.5066410060027624,
      "grad_norm": 0.5323366522789001,
      "learning_rate": 3.0063981339325134e-06,
      "loss": 0.0124,
      "step": 2142740
    },
    {
      "epoch": 3.5066737364414156,
      "grad_norm": 0.5057610273361206,
      "learning_rate": 3.006332241718996e-06,
      "loss": 0.0106,
      "step": 2142760
    },
    {
      "epoch": 3.506706466880069,
      "grad_norm": 0.41519254446029663,
      "learning_rate": 3.006266349505479e-06,
      "loss": 0.0099,
      "step": 2142780
    },
    {
      "epoch": 3.5067391973187223,
      "grad_norm": 0.1358126848936081,
      "learning_rate": 3.0062004572919625e-06,
      "loss": 0.0158,
      "step": 2142800
    },
    {
      "epoch": 3.506771927757376,
      "grad_norm": 0.3665534257888794,
      "learning_rate": 3.0061345650784452e-06,
      "loss": 0.0113,
      "step": 2142820
    },
    {
      "epoch": 3.506804658196029,
      "grad_norm": 0.1504400223493576,
      "learning_rate": 3.006068672864928e-06,
      "loss": 0.0148,
      "step": 2142840
    },
    {
      "epoch": 3.5068373886346826,
      "grad_norm": 0.1953364908695221,
      "learning_rate": 3.0060027806514107e-06,
      "loss": 0.0089,
      "step": 2142860
    },
    {
      "epoch": 3.506870119073336,
      "grad_norm": 0.3185352385044098,
      "learning_rate": 3.005936888437894e-06,
      "loss": 0.0125,
      "step": 2142880
    },
    {
      "epoch": 3.506902849511989,
      "grad_norm": 0.05521487072110176,
      "learning_rate": 3.0058709962243766e-06,
      "loss": 0.0117,
      "step": 2142900
    },
    {
      "epoch": 3.5069355799506425,
      "grad_norm": 0.6044184565544128,
      "learning_rate": 3.0058051040108594e-06,
      "loss": 0.0105,
      "step": 2142920
    },
    {
      "epoch": 3.5069683103892957,
      "grad_norm": 0.22730538249015808,
      "learning_rate": 3.005739211797342e-06,
      "loss": 0.0098,
      "step": 2142940
    },
    {
      "epoch": 3.5070010408279493,
      "grad_norm": 0.2646141052246094,
      "learning_rate": 3.005673319583825e-06,
      "loss": 0.0143,
      "step": 2142960
    },
    {
      "epoch": 3.5070337712666024,
      "grad_norm": 0.33877280354499817,
      "learning_rate": 3.005607427370308e-06,
      "loss": 0.0084,
      "step": 2142980
    },
    {
      "epoch": 3.507066501705256,
      "grad_norm": 0.40053901076316833,
      "learning_rate": 3.0055415351567907e-06,
      "loss": 0.0107,
      "step": 2143000
    },
    {
      "epoch": 3.507099232143909,
      "grad_norm": 0.1355026662349701,
      "learning_rate": 3.0054756429432735e-06,
      "loss": 0.0088,
      "step": 2143020
    },
    {
      "epoch": 3.5071319625825623,
      "grad_norm": 0.19930316507816315,
      "learning_rate": 3.0054097507297562e-06,
      "loss": 0.0082,
      "step": 2143040
    },
    {
      "epoch": 3.507164693021216,
      "grad_norm": 0.21338148415088654,
      "learning_rate": 3.0053438585162394e-06,
      "loss": 0.0083,
      "step": 2143060
    },
    {
      "epoch": 3.507197423459869,
      "grad_norm": 0.292132705450058,
      "learning_rate": 3.005277966302722e-06,
      "loss": 0.0131,
      "step": 2143080
    },
    {
      "epoch": 3.5072301538985227,
      "grad_norm": 0.16967549920082092,
      "learning_rate": 3.005212074089205e-06,
      "loss": 0.0134,
      "step": 2143100
    },
    {
      "epoch": 3.507262884337176,
      "grad_norm": 0.21097919344902039,
      "learning_rate": 3.0051461818756876e-06,
      "loss": 0.0059,
      "step": 2143120
    },
    {
      "epoch": 3.5072956147758294,
      "grad_norm": 0.3284589946269989,
      "learning_rate": 3.005080289662171e-06,
      "loss": 0.0116,
      "step": 2143140
    },
    {
      "epoch": 3.5073283452144826,
      "grad_norm": 0.21060825884342194,
      "learning_rate": 3.005014397448654e-06,
      "loss": 0.0097,
      "step": 2143160
    },
    {
      "epoch": 3.5073610756531357,
      "grad_norm": 0.6948592066764832,
      "learning_rate": 3.0049485052351367e-06,
      "loss": 0.0083,
      "step": 2143180
    },
    {
      "epoch": 3.5073938060917893,
      "grad_norm": 0.23452997207641602,
      "learning_rate": 3.00488261302162e-06,
      "loss": 0.0168,
      "step": 2143200
    },
    {
      "epoch": 3.5074265365304425,
      "grad_norm": 0.5369812250137329,
      "learning_rate": 3.0048167208081026e-06,
      "loss": 0.0099,
      "step": 2143220
    },
    {
      "epoch": 3.507459266969096,
      "grad_norm": 0.22795085608959198,
      "learning_rate": 3.0047508285945853e-06,
      "loss": 0.0071,
      "step": 2143240
    },
    {
      "epoch": 3.507491997407749,
      "grad_norm": 0.5213096737861633,
      "learning_rate": 3.004684936381068e-06,
      "loss": 0.0117,
      "step": 2143260
    },
    {
      "epoch": 3.507524727846403,
      "grad_norm": 0.5209915041923523,
      "learning_rate": 3.004619044167551e-06,
      "loss": 0.0128,
      "step": 2143280
    },
    {
      "epoch": 3.507557458285056,
      "grad_norm": 0.09832505881786346,
      "learning_rate": 3.004553151954034e-06,
      "loss": 0.0148,
      "step": 2143300
    },
    {
      "epoch": 3.507590188723709,
      "grad_norm": 0.1270815134048462,
      "learning_rate": 3.0044872597405167e-06,
      "loss": 0.0137,
      "step": 2143320
    },
    {
      "epoch": 3.5076229191623627,
      "grad_norm": 0.18670068681240082,
      "learning_rate": 3.0044213675269994e-06,
      "loss": 0.0104,
      "step": 2143340
    },
    {
      "epoch": 3.507655649601016,
      "grad_norm": 0.06340283155441284,
      "learning_rate": 3.004355475313482e-06,
      "loss": 0.0114,
      "step": 2143360
    },
    {
      "epoch": 3.5076883800396694,
      "grad_norm": 0.8299717307090759,
      "learning_rate": 3.0042895830999653e-06,
      "loss": 0.0155,
      "step": 2143380
    },
    {
      "epoch": 3.5077211104783226,
      "grad_norm": 0.2954697012901306,
      "learning_rate": 3.004223690886448e-06,
      "loss": 0.0141,
      "step": 2143400
    },
    {
      "epoch": 3.507753840916976,
      "grad_norm": 0.22794553637504578,
      "learning_rate": 3.004157798672931e-06,
      "loss": 0.0074,
      "step": 2143420
    },
    {
      "epoch": 3.5077865713556293,
      "grad_norm": 0.16948719322681427,
      "learning_rate": 3.0040919064594136e-06,
      "loss": 0.0063,
      "step": 2143440
    },
    {
      "epoch": 3.5078193017942825,
      "grad_norm": 0.1633031815290451,
      "learning_rate": 3.0040260142458967e-06,
      "loss": 0.0142,
      "step": 2143460
    },
    {
      "epoch": 3.507852032232936,
      "grad_norm": 0.1600329726934433,
      "learning_rate": 3.0039601220323795e-06,
      "loss": 0.0122,
      "step": 2143480
    },
    {
      "epoch": 3.507884762671589,
      "grad_norm": 0.08548144996166229,
      "learning_rate": 3.0038942298188626e-06,
      "loss": 0.0079,
      "step": 2143500
    },
    {
      "epoch": 3.507917493110243,
      "grad_norm": 0.07940099388360977,
      "learning_rate": 3.0038283376053458e-06,
      "loss": 0.0122,
      "step": 2143520
    },
    {
      "epoch": 3.507950223548896,
      "grad_norm": 0.7098168134689331,
      "learning_rate": 3.0037624453918285e-06,
      "loss": 0.0198,
      "step": 2143540
    },
    {
      "epoch": 3.5079829539875496,
      "grad_norm": 0.21651002764701843,
      "learning_rate": 3.0036965531783113e-06,
      "loss": 0.0093,
      "step": 2143560
    },
    {
      "epoch": 3.5080156844262027,
      "grad_norm": 0.19188015162944794,
      "learning_rate": 3.003630660964794e-06,
      "loss": 0.0105,
      "step": 2143580
    },
    {
      "epoch": 3.508048414864856,
      "grad_norm": 0.19729188084602356,
      "learning_rate": 3.003564768751277e-06,
      "loss": 0.0105,
      "step": 2143600
    },
    {
      "epoch": 3.5080811453035095,
      "grad_norm": 0.577919602394104,
      "learning_rate": 3.00349887653776e-06,
      "loss": 0.0156,
      "step": 2143620
    },
    {
      "epoch": 3.5081138757421626,
      "grad_norm": 0.4750191271305084,
      "learning_rate": 3.0034329843242426e-06,
      "loss": 0.0157,
      "step": 2143640
    },
    {
      "epoch": 3.508146606180816,
      "grad_norm": 0.3592314124107361,
      "learning_rate": 3.0033670921107254e-06,
      "loss": 0.0093,
      "step": 2143660
    },
    {
      "epoch": 3.5081793366194693,
      "grad_norm": 0.090116947889328,
      "learning_rate": 3.0033011998972085e-06,
      "loss": 0.0141,
      "step": 2143680
    },
    {
      "epoch": 3.508212067058123,
      "grad_norm": 0.3002622723579407,
      "learning_rate": 3.0032353076836913e-06,
      "loss": 0.0092,
      "step": 2143700
    },
    {
      "epoch": 3.508244797496776,
      "grad_norm": 0.0942843109369278,
      "learning_rate": 3.003169415470174e-06,
      "loss": 0.0099,
      "step": 2143720
    },
    {
      "epoch": 3.5082775279354292,
      "grad_norm": 0.1256079077720642,
      "learning_rate": 3.0031035232566568e-06,
      "loss": 0.0106,
      "step": 2143740
    },
    {
      "epoch": 3.508310258374083,
      "grad_norm": 0.8542216420173645,
      "learning_rate": 3.00303763104314e-06,
      "loss": 0.0105,
      "step": 2143760
    },
    {
      "epoch": 3.508342988812736,
      "grad_norm": 0.17425121366977692,
      "learning_rate": 3.0029717388296227e-06,
      "loss": 0.0067,
      "step": 2143780
    },
    {
      "epoch": 3.508375719251389,
      "grad_norm": 0.2652286887168884,
      "learning_rate": 3.0029058466161054e-06,
      "loss": 0.008,
      "step": 2143800
    },
    {
      "epoch": 3.5084084496900427,
      "grad_norm": 0.14173780381679535,
      "learning_rate": 3.002839954402588e-06,
      "loss": 0.0067,
      "step": 2143820
    },
    {
      "epoch": 3.5084411801286963,
      "grad_norm": 0.07533351331949234,
      "learning_rate": 3.0027740621890717e-06,
      "loss": 0.0111,
      "step": 2143840
    },
    {
      "epoch": 3.5084739105673495,
      "grad_norm": 0.08235929161310196,
      "learning_rate": 3.0027081699755545e-06,
      "loss": 0.0154,
      "step": 2143860
    },
    {
      "epoch": 3.5085066410060026,
      "grad_norm": 0.13687807321548462,
      "learning_rate": 3.0026422777620372e-06,
      "loss": 0.0134,
      "step": 2143880
    },
    {
      "epoch": 3.508539371444656,
      "grad_norm": 0.586533784866333,
      "learning_rate": 3.00257638554852e-06,
      "loss": 0.0163,
      "step": 2143900
    },
    {
      "epoch": 3.5085721018833094,
      "grad_norm": 0.15624327957630157,
      "learning_rate": 3.002510493335003e-06,
      "loss": 0.0125,
      "step": 2143920
    },
    {
      "epoch": 3.5086048323219625,
      "grad_norm": 0.3337579071521759,
      "learning_rate": 3.002444601121486e-06,
      "loss": 0.0132,
      "step": 2143940
    },
    {
      "epoch": 3.508637562760616,
      "grad_norm": 0.30010008811950684,
      "learning_rate": 3.0023787089079686e-06,
      "loss": 0.0173,
      "step": 2143960
    },
    {
      "epoch": 3.5086702931992697,
      "grad_norm": 0.13021987676620483,
      "learning_rate": 3.0023128166944513e-06,
      "loss": 0.0105,
      "step": 2143980
    },
    {
      "epoch": 3.508703023637923,
      "grad_norm": 0.35534849762916565,
      "learning_rate": 3.0022469244809345e-06,
      "loss": 0.0098,
      "step": 2144000
    },
    {
      "epoch": 3.508735754076576,
      "grad_norm": 0.6081042885780334,
      "learning_rate": 3.0021810322674172e-06,
      "loss": 0.0111,
      "step": 2144020
    },
    {
      "epoch": 3.5087684845152296,
      "grad_norm": 0.06949734687805176,
      "learning_rate": 3.0021151400539e-06,
      "loss": 0.0092,
      "step": 2144040
    },
    {
      "epoch": 3.5088012149538828,
      "grad_norm": 0.2519233822822571,
      "learning_rate": 3.0020492478403827e-06,
      "loss": 0.0105,
      "step": 2144060
    },
    {
      "epoch": 3.508833945392536,
      "grad_norm": 0.218088760972023,
      "learning_rate": 3.001983355626866e-06,
      "loss": 0.012,
      "step": 2144080
    },
    {
      "epoch": 3.5088666758311895,
      "grad_norm": 0.14256200194358826,
      "learning_rate": 3.0019174634133486e-06,
      "loss": 0.0117,
      "step": 2144100
    },
    {
      "epoch": 3.508899406269843,
      "grad_norm": 0.07211944460868835,
      "learning_rate": 3.0018515711998314e-06,
      "loss": 0.0083,
      "step": 2144120
    },
    {
      "epoch": 3.5089321367084962,
      "grad_norm": 0.2339693307876587,
      "learning_rate": 3.001785678986314e-06,
      "loss": 0.0109,
      "step": 2144140
    },
    {
      "epoch": 3.5089648671471494,
      "grad_norm": 0.32238641381263733,
      "learning_rate": 3.0017197867727973e-06,
      "loss": 0.0118,
      "step": 2144160
    },
    {
      "epoch": 3.508997597585803,
      "grad_norm": 1.7758967876434326,
      "learning_rate": 3.00165389455928e-06,
      "loss": 0.0134,
      "step": 2144180
    },
    {
      "epoch": 3.509030328024456,
      "grad_norm": 0.20704318583011627,
      "learning_rate": 3.001588002345763e-06,
      "loss": 0.0085,
      "step": 2144200
    },
    {
      "epoch": 3.5090630584631093,
      "grad_norm": 0.20766475796699524,
      "learning_rate": 3.0015221101322463e-06,
      "loss": 0.0083,
      "step": 2144220
    },
    {
      "epoch": 3.509095788901763,
      "grad_norm": 0.032625067979097366,
      "learning_rate": 3.001456217918729e-06,
      "loss": 0.0152,
      "step": 2144240
    },
    {
      "epoch": 3.5091285193404165,
      "grad_norm": 0.3009054362773895,
      "learning_rate": 3.001390325705212e-06,
      "loss": 0.0092,
      "step": 2144260
    },
    {
      "epoch": 3.5091612497790696,
      "grad_norm": 0.32692256569862366,
      "learning_rate": 3.0013244334916946e-06,
      "loss": 0.0086,
      "step": 2144280
    },
    {
      "epoch": 3.5091939802177228,
      "grad_norm": 0.26256877183914185,
      "learning_rate": 3.0012585412781777e-06,
      "loss": 0.0142,
      "step": 2144300
    },
    {
      "epoch": 3.5092267106563764,
      "grad_norm": 0.05500906705856323,
      "learning_rate": 3.0011926490646605e-06,
      "loss": 0.0086,
      "step": 2144320
    },
    {
      "epoch": 3.5092594410950295,
      "grad_norm": 0.13239708542823792,
      "learning_rate": 3.001126756851143e-06,
      "loss": 0.0094,
      "step": 2144340
    },
    {
      "epoch": 3.5092921715336827,
      "grad_norm": 0.2053391933441162,
      "learning_rate": 3.001060864637626e-06,
      "loss": 0.0151,
      "step": 2144360
    },
    {
      "epoch": 3.5093249019723363,
      "grad_norm": 0.10665374249219894,
      "learning_rate": 3.0009949724241087e-06,
      "loss": 0.0107,
      "step": 2144380
    },
    {
      "epoch": 3.5093576324109894,
      "grad_norm": 0.28521955013275146,
      "learning_rate": 3.000929080210592e-06,
      "loss": 0.0176,
      "step": 2144400
    },
    {
      "epoch": 3.509390362849643,
      "grad_norm": 0.45059648156166077,
      "learning_rate": 3.0008631879970746e-06,
      "loss": 0.0077,
      "step": 2144420
    },
    {
      "epoch": 3.509423093288296,
      "grad_norm": 0.3270121216773987,
      "learning_rate": 3.0007972957835573e-06,
      "loss": 0.0105,
      "step": 2144440
    },
    {
      "epoch": 3.5094558237269498,
      "grad_norm": 0.2755841910839081,
      "learning_rate": 3.00073140357004e-06,
      "loss": 0.0076,
      "step": 2144460
    },
    {
      "epoch": 3.509488554165603,
      "grad_norm": 0.17674380540847778,
      "learning_rate": 3.0006655113565232e-06,
      "loss": 0.0137,
      "step": 2144480
    },
    {
      "epoch": 3.509521284604256,
      "grad_norm": 0.30822059512138367,
      "learning_rate": 3.000599619143006e-06,
      "loss": 0.0147,
      "step": 2144500
    },
    {
      "epoch": 3.5095540150429096,
      "grad_norm": 0.3163774609565735,
      "learning_rate": 3.0005337269294887e-06,
      "loss": 0.0112,
      "step": 2144520
    },
    {
      "epoch": 3.509586745481563,
      "grad_norm": 0.2575482726097107,
      "learning_rate": 3.0004678347159714e-06,
      "loss": 0.009,
      "step": 2144540
    },
    {
      "epoch": 3.5096194759202164,
      "grad_norm": 0.08177477866411209,
      "learning_rate": 3.000401942502455e-06,
      "loss": 0.0111,
      "step": 2144560
    },
    {
      "epoch": 3.5096522063588695,
      "grad_norm": 0.12979090213775635,
      "learning_rate": 3.0003360502889378e-06,
      "loss": 0.0084,
      "step": 2144580
    },
    {
      "epoch": 3.509684936797523,
      "grad_norm": 0.13701747357845306,
      "learning_rate": 3.0002701580754205e-06,
      "loss": 0.0079,
      "step": 2144600
    },
    {
      "epoch": 3.5097176672361763,
      "grad_norm": 0.2371169626712799,
      "learning_rate": 3.0002042658619037e-06,
      "loss": 0.0158,
      "step": 2144620
    },
    {
      "epoch": 3.5097503976748294,
      "grad_norm": 0.13537824153900146,
      "learning_rate": 3.0001383736483864e-06,
      "loss": 0.0088,
      "step": 2144640
    },
    {
      "epoch": 3.509783128113483,
      "grad_norm": 0.6063756942749023,
      "learning_rate": 3.000072481434869e-06,
      "loss": 0.0113,
      "step": 2144660
    },
    {
      "epoch": 3.509815858552136,
      "grad_norm": 0.08605809509754181,
      "learning_rate": 3.000006589221352e-06,
      "loss": 0.0196,
      "step": 2144680
    },
    {
      "epoch": 3.5098485889907898,
      "grad_norm": 0.7315569519996643,
      "learning_rate": 2.999940697007835e-06,
      "loss": 0.0089,
      "step": 2144700
    },
    {
      "epoch": 3.509881319429443,
      "grad_norm": 0.35859575867652893,
      "learning_rate": 2.999874804794318e-06,
      "loss": 0.0155,
      "step": 2144720
    },
    {
      "epoch": 3.5099140498680965,
      "grad_norm": 0.1940966248512268,
      "learning_rate": 2.9998089125808005e-06,
      "loss": 0.0111,
      "step": 2144740
    },
    {
      "epoch": 3.5099467803067497,
      "grad_norm": 0.059416841715574265,
      "learning_rate": 2.9997430203672833e-06,
      "loss": 0.0071,
      "step": 2144760
    },
    {
      "epoch": 3.509979510745403,
      "grad_norm": 0.5994389057159424,
      "learning_rate": 2.999677128153766e-06,
      "loss": 0.012,
      "step": 2144780
    },
    {
      "epoch": 3.5100122411840564,
      "grad_norm": 0.27798470854759216,
      "learning_rate": 2.999611235940249e-06,
      "loss": 0.0121,
      "step": 2144800
    },
    {
      "epoch": 3.5100449716227096,
      "grad_norm": 0.20597106218338013,
      "learning_rate": 2.999545343726732e-06,
      "loss": 0.0113,
      "step": 2144820
    },
    {
      "epoch": 3.510077702061363,
      "grad_norm": 0.06955642998218536,
      "learning_rate": 2.9994794515132147e-06,
      "loss": 0.0095,
      "step": 2144840
    },
    {
      "epoch": 3.5101104325000163,
      "grad_norm": 0.21900112926959991,
      "learning_rate": 2.9994135592996974e-06,
      "loss": 0.0075,
      "step": 2144860
    },
    {
      "epoch": 3.51014316293867,
      "grad_norm": 0.21505559980869293,
      "learning_rate": 2.9993476670861806e-06,
      "loss": 0.018,
      "step": 2144880
    },
    {
      "epoch": 3.510175893377323,
      "grad_norm": 0.3840539753437042,
      "learning_rate": 2.9992817748726637e-06,
      "loss": 0.0126,
      "step": 2144900
    },
    {
      "epoch": 3.510208623815976,
      "grad_norm": 0.25336262583732605,
      "learning_rate": 2.9992158826591465e-06,
      "loss": 0.011,
      "step": 2144920
    },
    {
      "epoch": 3.51024135425463,
      "grad_norm": 0.49308347702026367,
      "learning_rate": 2.9991499904456296e-06,
      "loss": 0.0124,
      "step": 2144940
    },
    {
      "epoch": 3.510274084693283,
      "grad_norm": 0.23402687907218933,
      "learning_rate": 2.9990840982321124e-06,
      "loss": 0.0125,
      "step": 2144960
    },
    {
      "epoch": 3.5103068151319365,
      "grad_norm": 0.1500089317560196,
      "learning_rate": 2.999018206018595e-06,
      "loss": 0.0153,
      "step": 2144980
    },
    {
      "epoch": 3.5103395455705897,
      "grad_norm": 0.15241986513137817,
      "learning_rate": 2.998952313805078e-06,
      "loss": 0.0088,
      "step": 2145000
    },
    {
      "epoch": 3.5103722760092433,
      "grad_norm": 0.3824431598186493,
      "learning_rate": 2.998886421591561e-06,
      "loss": 0.0158,
      "step": 2145020
    },
    {
      "epoch": 3.5104050064478964,
      "grad_norm": 0.15979525446891785,
      "learning_rate": 2.9988205293780437e-06,
      "loss": 0.0069,
      "step": 2145040
    },
    {
      "epoch": 3.5104377368865496,
      "grad_norm": 0.29599547386169434,
      "learning_rate": 2.9987546371645265e-06,
      "loss": 0.0066,
      "step": 2145060
    },
    {
      "epoch": 3.510470467325203,
      "grad_norm": 0.36567971110343933,
      "learning_rate": 2.9986887449510092e-06,
      "loss": 0.0078,
      "step": 2145080
    },
    {
      "epoch": 3.5105031977638563,
      "grad_norm": 0.24570335447788239,
      "learning_rate": 2.9986228527374924e-06,
      "loss": 0.0077,
      "step": 2145100
    },
    {
      "epoch": 3.51053592820251,
      "grad_norm": 0.14953704178333282,
      "learning_rate": 2.998556960523975e-06,
      "loss": 0.0072,
      "step": 2145120
    },
    {
      "epoch": 3.510568658641163,
      "grad_norm": 0.07172239571809769,
      "learning_rate": 2.998491068310458e-06,
      "loss": 0.0113,
      "step": 2145140
    },
    {
      "epoch": 3.5106013890798167,
      "grad_norm": 0.1585773527622223,
      "learning_rate": 2.9984251760969406e-06,
      "loss": 0.0073,
      "step": 2145160
    },
    {
      "epoch": 3.51063411951847,
      "grad_norm": 0.22503544390201569,
      "learning_rate": 2.9983592838834238e-06,
      "loss": 0.0115,
      "step": 2145180
    },
    {
      "epoch": 3.510666849957123,
      "grad_norm": 0.40152281522750854,
      "learning_rate": 2.9982933916699065e-06,
      "loss": 0.0077,
      "step": 2145200
    },
    {
      "epoch": 3.5106995803957766,
      "grad_norm": 0.11863184720277786,
      "learning_rate": 2.9982274994563892e-06,
      "loss": 0.013,
      "step": 2145220
    },
    {
      "epoch": 3.5107323108344297,
      "grad_norm": 0.13434462249279022,
      "learning_rate": 2.998161607242872e-06,
      "loss": 0.0079,
      "step": 2145240
    },
    {
      "epoch": 3.510765041273083,
      "grad_norm": 0.16609854996204376,
      "learning_rate": 2.9980957150293556e-06,
      "loss": 0.0112,
      "step": 2145260
    },
    {
      "epoch": 3.5107977717117365,
      "grad_norm": 0.14397099614143372,
      "learning_rate": 2.9980298228158383e-06,
      "loss": 0.0153,
      "step": 2145280
    },
    {
      "epoch": 3.51083050215039,
      "grad_norm": 0.40416833758354187,
      "learning_rate": 2.997963930602321e-06,
      "loss": 0.008,
      "step": 2145300
    },
    {
      "epoch": 3.510863232589043,
      "grad_norm": 0.2623189389705658,
      "learning_rate": 2.997898038388804e-06,
      "loss": 0.0107,
      "step": 2145320
    },
    {
      "epoch": 3.5108959630276964,
      "grad_norm": 0.2692086696624756,
      "learning_rate": 2.997832146175287e-06,
      "loss": 0.0071,
      "step": 2145340
    },
    {
      "epoch": 3.51092869346635,
      "grad_norm": 0.1749727427959442,
      "learning_rate": 2.9977662539617697e-06,
      "loss": 0.0088,
      "step": 2145360
    },
    {
      "epoch": 3.510961423905003,
      "grad_norm": 0.22894760966300964,
      "learning_rate": 2.9977003617482524e-06,
      "loss": 0.01,
      "step": 2145380
    },
    {
      "epoch": 3.5109941543436562,
      "grad_norm": 0.7217046022415161,
      "learning_rate": 2.997634469534735e-06,
      "loss": 0.0144,
      "step": 2145400
    },
    {
      "epoch": 3.51102688478231,
      "grad_norm": 0.6395464539527893,
      "learning_rate": 2.9975685773212183e-06,
      "loss": 0.0132,
      "step": 2145420
    },
    {
      "epoch": 3.5110596152209634,
      "grad_norm": 0.07755011320114136,
      "learning_rate": 2.997502685107701e-06,
      "loss": 0.0084,
      "step": 2145440
    },
    {
      "epoch": 3.5110923456596166,
      "grad_norm": 0.26489463448524475,
      "learning_rate": 2.997436792894184e-06,
      "loss": 0.0121,
      "step": 2145460
    },
    {
      "epoch": 3.5111250760982697,
      "grad_norm": 0.3508288562297821,
      "learning_rate": 2.9973709006806666e-06,
      "loss": 0.0101,
      "step": 2145480
    },
    {
      "epoch": 3.5111578065369233,
      "grad_norm": 0.1277526468038559,
      "learning_rate": 2.9973050084671497e-06,
      "loss": 0.0092,
      "step": 2145500
    },
    {
      "epoch": 3.5111905369755765,
      "grad_norm": 0.40019235014915466,
      "learning_rate": 2.9972391162536325e-06,
      "loss": 0.0117,
      "step": 2145520
    },
    {
      "epoch": 3.5112232674142296,
      "grad_norm": 0.6076914072036743,
      "learning_rate": 2.997173224040115e-06,
      "loss": 0.0114,
      "step": 2145540
    },
    {
      "epoch": 3.5112559978528832,
      "grad_norm": 0.6253378391265869,
      "learning_rate": 2.997107331826598e-06,
      "loss": 0.0147,
      "step": 2145560
    },
    {
      "epoch": 3.511288728291537,
      "grad_norm": 0.23641082644462585,
      "learning_rate": 2.997041439613081e-06,
      "loss": 0.0092,
      "step": 2145580
    },
    {
      "epoch": 3.51132145873019,
      "grad_norm": 0.11808818578720093,
      "learning_rate": 2.9969755473995643e-06,
      "loss": 0.0095,
      "step": 2145600
    },
    {
      "epoch": 3.511354189168843,
      "grad_norm": 0.35741063952445984,
      "learning_rate": 2.996909655186047e-06,
      "loss": 0.0145,
      "step": 2145620
    },
    {
      "epoch": 3.5113869196074967,
      "grad_norm": 0.4136793911457062,
      "learning_rate": 2.99684376297253e-06,
      "loss": 0.0103,
      "step": 2145640
    },
    {
      "epoch": 3.51141965004615,
      "grad_norm": 0.21628795564174652,
      "learning_rate": 2.996777870759013e-06,
      "loss": 0.0126,
      "step": 2145660
    },
    {
      "epoch": 3.511452380484803,
      "grad_norm": 0.4980476200580597,
      "learning_rate": 2.9967119785454957e-06,
      "loss": 0.0143,
      "step": 2145680
    },
    {
      "epoch": 3.5114851109234566,
      "grad_norm": 0.3043084740638733,
      "learning_rate": 2.9966460863319784e-06,
      "loss": 0.0111,
      "step": 2145700
    },
    {
      "epoch": 3.51151784136211,
      "grad_norm": 0.14887624979019165,
      "learning_rate": 2.9965801941184616e-06,
      "loss": 0.0151,
      "step": 2145720
    },
    {
      "epoch": 3.5115505718007634,
      "grad_norm": 0.21453668177127838,
      "learning_rate": 2.9965143019049443e-06,
      "loss": 0.0194,
      "step": 2145740
    },
    {
      "epoch": 3.5115833022394165,
      "grad_norm": 0.2999732494354248,
      "learning_rate": 2.996448409691427e-06,
      "loss": 0.0105,
      "step": 2145760
    },
    {
      "epoch": 3.51161603267807,
      "grad_norm": 0.26419511437416077,
      "learning_rate": 2.9963825174779098e-06,
      "loss": 0.009,
      "step": 2145780
    },
    {
      "epoch": 3.5116487631167232,
      "grad_norm": 0.10640056431293488,
      "learning_rate": 2.9963166252643925e-06,
      "loss": 0.0121,
      "step": 2145800
    },
    {
      "epoch": 3.5116814935553764,
      "grad_norm": 0.16832315921783447,
      "learning_rate": 2.9962507330508757e-06,
      "loss": 0.0107,
      "step": 2145820
    },
    {
      "epoch": 3.51171422399403,
      "grad_norm": 0.16185200214385986,
      "learning_rate": 2.9961848408373584e-06,
      "loss": 0.0113,
      "step": 2145840
    },
    {
      "epoch": 3.511746954432683,
      "grad_norm": 0.185931995511055,
      "learning_rate": 2.996118948623841e-06,
      "loss": 0.0125,
      "step": 2145860
    },
    {
      "epoch": 3.5117796848713367,
      "grad_norm": 0.2252645045518875,
      "learning_rate": 2.996053056410324e-06,
      "loss": 0.0102,
      "step": 2145880
    },
    {
      "epoch": 3.51181241530999,
      "grad_norm": 0.07553966343402863,
      "learning_rate": 2.995987164196807e-06,
      "loss": 0.0094,
      "step": 2145900
    },
    {
      "epoch": 3.5118451457486435,
      "grad_norm": 0.19825348258018494,
      "learning_rate": 2.99592127198329e-06,
      "loss": 0.0095,
      "step": 2145920
    },
    {
      "epoch": 3.5118778761872966,
      "grad_norm": 0.0872548371553421,
      "learning_rate": 2.9958553797697725e-06,
      "loss": 0.006,
      "step": 2145940
    },
    {
      "epoch": 3.51191060662595,
      "grad_norm": 0.06815700232982635,
      "learning_rate": 2.995789487556256e-06,
      "loss": 0.0063,
      "step": 2145960
    },
    {
      "epoch": 3.5119433370646034,
      "grad_norm": 0.14719121158123016,
      "learning_rate": 2.995723595342739e-06,
      "loss": 0.0125,
      "step": 2145980
    },
    {
      "epoch": 3.5119760675032565,
      "grad_norm": 0.12741878628730774,
      "learning_rate": 2.9956577031292216e-06,
      "loss": 0.0102,
      "step": 2146000
    },
    {
      "epoch": 3.51200879794191,
      "grad_norm": 0.25621679425239563,
      "learning_rate": 2.9955918109157043e-06,
      "loss": 0.0126,
      "step": 2146020
    },
    {
      "epoch": 3.5120415283805633,
      "grad_norm": 0.32042160630226135,
      "learning_rate": 2.9955259187021875e-06,
      "loss": 0.013,
      "step": 2146040
    },
    {
      "epoch": 3.512074258819217,
      "grad_norm": 0.10703710466623306,
      "learning_rate": 2.9954600264886702e-06,
      "loss": 0.0105,
      "step": 2146060
    },
    {
      "epoch": 3.51210698925787,
      "grad_norm": 0.35174864530563354,
      "learning_rate": 2.995394134275153e-06,
      "loss": 0.0124,
      "step": 2146080
    },
    {
      "epoch": 3.512139719696523,
      "grad_norm": 0.3166029751300812,
      "learning_rate": 2.9953282420616357e-06,
      "loss": 0.0053,
      "step": 2146100
    },
    {
      "epoch": 3.5121724501351768,
      "grad_norm": 0.43649882078170776,
      "learning_rate": 2.995262349848119e-06,
      "loss": 0.0131,
      "step": 2146120
    },
    {
      "epoch": 3.51220518057383,
      "grad_norm": 0.34551045298576355,
      "learning_rate": 2.9951964576346016e-06,
      "loss": 0.0149,
      "step": 2146140
    },
    {
      "epoch": 3.5122379110124835,
      "grad_norm": 0.4523394703865051,
      "learning_rate": 2.9951305654210844e-06,
      "loss": 0.0105,
      "step": 2146160
    },
    {
      "epoch": 3.5122706414511367,
      "grad_norm": 0.23900222778320312,
      "learning_rate": 2.995064673207567e-06,
      "loss": 0.0104,
      "step": 2146180
    },
    {
      "epoch": 3.5123033718897902,
      "grad_norm": 0.047285422682762146,
      "learning_rate": 2.99499878099405e-06,
      "loss": 0.0094,
      "step": 2146200
    },
    {
      "epoch": 3.5123361023284434,
      "grad_norm": 0.25110653042793274,
      "learning_rate": 2.994932888780533e-06,
      "loss": 0.0127,
      "step": 2146220
    },
    {
      "epoch": 3.5123688327670965,
      "grad_norm": 0.6165980100631714,
      "learning_rate": 2.9948669965670158e-06,
      "loss": 0.0084,
      "step": 2146240
    },
    {
      "epoch": 3.51240156320575,
      "grad_norm": 0.1854129433631897,
      "learning_rate": 2.9948011043534985e-06,
      "loss": 0.0122,
      "step": 2146260
    },
    {
      "epoch": 3.5124342936444033,
      "grad_norm": 0.9214886426925659,
      "learning_rate": 2.9947352121399812e-06,
      "loss": 0.0101,
      "step": 2146280
    },
    {
      "epoch": 3.512467024083057,
      "grad_norm": 0.5654028058052063,
      "learning_rate": 2.9946693199264644e-06,
      "loss": 0.0166,
      "step": 2146300
    },
    {
      "epoch": 3.51249975452171,
      "grad_norm": 0.5076404809951782,
      "learning_rate": 2.9946034277129476e-06,
      "loss": 0.0115,
      "step": 2146320
    },
    {
      "epoch": 3.5125324849603636,
      "grad_norm": 0.6675898432731628,
      "learning_rate": 2.9945375354994303e-06,
      "loss": 0.008,
      "step": 2146340
    },
    {
      "epoch": 3.512565215399017,
      "grad_norm": 0.08311095833778381,
      "learning_rate": 2.9944716432859135e-06,
      "loss": 0.0186,
      "step": 2146360
    },
    {
      "epoch": 3.51259794583767,
      "grad_norm": 0.46250128746032715,
      "learning_rate": 2.994405751072396e-06,
      "loss": 0.0103,
      "step": 2146380
    },
    {
      "epoch": 3.5126306762763235,
      "grad_norm": 0.28512969613075256,
      "learning_rate": 2.994339858858879e-06,
      "loss": 0.0123,
      "step": 2146400
    },
    {
      "epoch": 3.5126634067149767,
      "grad_norm": 0.0359499529004097,
      "learning_rate": 2.9942739666453617e-06,
      "loss": 0.0185,
      "step": 2146420
    },
    {
      "epoch": 3.5126961371536303,
      "grad_norm": 0.1365649551153183,
      "learning_rate": 2.994208074431845e-06,
      "loss": 0.0154,
      "step": 2146440
    },
    {
      "epoch": 3.5127288675922834,
      "grad_norm": 0.4494722783565521,
      "learning_rate": 2.9941421822183276e-06,
      "loss": 0.0094,
      "step": 2146460
    },
    {
      "epoch": 3.512761598030937,
      "grad_norm": 0.19456791877746582,
      "learning_rate": 2.9940762900048103e-06,
      "loss": 0.0081,
      "step": 2146480
    },
    {
      "epoch": 3.51279432846959,
      "grad_norm": 0.22923319041728973,
      "learning_rate": 2.994010397791293e-06,
      "loss": 0.0149,
      "step": 2146500
    },
    {
      "epoch": 3.5128270589082433,
      "grad_norm": 0.2362222671508789,
      "learning_rate": 2.9939445055777762e-06,
      "loss": 0.0102,
      "step": 2146520
    },
    {
      "epoch": 3.512859789346897,
      "grad_norm": 0.4813062846660614,
      "learning_rate": 2.993878613364259e-06,
      "loss": 0.0073,
      "step": 2146540
    },
    {
      "epoch": 3.51289251978555,
      "grad_norm": 0.10332700610160828,
      "learning_rate": 2.9938127211507417e-06,
      "loss": 0.014,
      "step": 2146560
    },
    {
      "epoch": 3.5129252502242037,
      "grad_norm": 0.7486128807067871,
      "learning_rate": 2.9937468289372244e-06,
      "loss": 0.0078,
      "step": 2146580
    },
    {
      "epoch": 3.512957980662857,
      "grad_norm": 0.2242402732372284,
      "learning_rate": 2.9936809367237076e-06,
      "loss": 0.0113,
      "step": 2146600
    },
    {
      "epoch": 3.5129907111015104,
      "grad_norm": 0.1343764364719391,
      "learning_rate": 2.9936150445101903e-06,
      "loss": 0.012,
      "step": 2146620
    },
    {
      "epoch": 3.5130234415401635,
      "grad_norm": 0.6759856939315796,
      "learning_rate": 2.993549152296673e-06,
      "loss": 0.0148,
      "step": 2146640
    },
    {
      "epoch": 3.5130561719788167,
      "grad_norm": 0.37135186791419983,
      "learning_rate": 2.9934832600831567e-06,
      "loss": 0.0096,
      "step": 2146660
    },
    {
      "epoch": 3.5130889024174703,
      "grad_norm": 0.2062949538230896,
      "learning_rate": 2.9934173678696394e-06,
      "loss": 0.01,
      "step": 2146680
    },
    {
      "epoch": 3.5131216328561234,
      "grad_norm": 0.5298963785171509,
      "learning_rate": 2.993351475656122e-06,
      "loss": 0.0096,
      "step": 2146700
    },
    {
      "epoch": 3.513154363294777,
      "grad_norm": 0.23214904963970184,
      "learning_rate": 2.993285583442605e-06,
      "loss": 0.0115,
      "step": 2146720
    },
    {
      "epoch": 3.51318709373343,
      "grad_norm": 0.2562048137187958,
      "learning_rate": 2.9932196912290876e-06,
      "loss": 0.0084,
      "step": 2146740
    },
    {
      "epoch": 3.513219824172084,
      "grad_norm": 0.21262255311012268,
      "learning_rate": 2.993153799015571e-06,
      "loss": 0.0097,
      "step": 2146760
    },
    {
      "epoch": 3.513252554610737,
      "grad_norm": 0.3159971535205841,
      "learning_rate": 2.9930879068020535e-06,
      "loss": 0.0126,
      "step": 2146780
    },
    {
      "epoch": 3.51328528504939,
      "grad_norm": 0.39430704712867737,
      "learning_rate": 2.9930220145885363e-06,
      "loss": 0.009,
      "step": 2146800
    },
    {
      "epoch": 3.5133180154880437,
      "grad_norm": 0.06844359636306763,
      "learning_rate": 2.992956122375019e-06,
      "loss": 0.0123,
      "step": 2146820
    },
    {
      "epoch": 3.513350745926697,
      "grad_norm": 0.36091482639312744,
      "learning_rate": 2.992890230161502e-06,
      "loss": 0.0158,
      "step": 2146840
    },
    {
      "epoch": 3.51338347636535,
      "grad_norm": 0.23092643916606903,
      "learning_rate": 2.992824337947985e-06,
      "loss": 0.0117,
      "step": 2146860
    },
    {
      "epoch": 3.5134162068040036,
      "grad_norm": 0.3617873191833496,
      "learning_rate": 2.9927584457344677e-06,
      "loss": 0.0088,
      "step": 2146880
    },
    {
      "epoch": 3.513448937242657,
      "grad_norm": 0.34349799156188965,
      "learning_rate": 2.9926925535209504e-06,
      "loss": 0.0127,
      "step": 2146900
    },
    {
      "epoch": 3.5134816676813103,
      "grad_norm": 0.13333983719348907,
      "learning_rate": 2.9926266613074336e-06,
      "loss": 0.0091,
      "step": 2146920
    },
    {
      "epoch": 3.5135143981199635,
      "grad_norm": 0.26700690388679504,
      "learning_rate": 2.9925607690939163e-06,
      "loss": 0.0128,
      "step": 2146940
    },
    {
      "epoch": 3.513547128558617,
      "grad_norm": 1.2955392599105835,
      "learning_rate": 2.992494876880399e-06,
      "loss": 0.0096,
      "step": 2146960
    },
    {
      "epoch": 3.51357985899727,
      "grad_norm": 0.4026353657245636,
      "learning_rate": 2.9924289846668818e-06,
      "loss": 0.0091,
      "step": 2146980
    },
    {
      "epoch": 3.5136125894359234,
      "grad_norm": 0.24882417917251587,
      "learning_rate": 2.992363092453365e-06,
      "loss": 0.0163,
      "step": 2147000
    },
    {
      "epoch": 3.513645319874577,
      "grad_norm": 0.09933866560459137,
      "learning_rate": 2.992297200239848e-06,
      "loss": 0.0149,
      "step": 2147020
    },
    {
      "epoch": 3.5136780503132305,
      "grad_norm": 0.1398705691099167,
      "learning_rate": 2.992231308026331e-06,
      "loss": 0.0141,
      "step": 2147040
    },
    {
      "epoch": 3.5137107807518837,
      "grad_norm": 0.23880209028720856,
      "learning_rate": 2.992165415812814e-06,
      "loss": 0.0107,
      "step": 2147060
    },
    {
      "epoch": 3.513743511190537,
      "grad_norm": 0.7680028676986694,
      "learning_rate": 2.9920995235992967e-06,
      "loss": 0.0164,
      "step": 2147080
    },
    {
      "epoch": 3.5137762416291904,
      "grad_norm": 0.23277908563613892,
      "learning_rate": 2.9920336313857795e-06,
      "loss": 0.0108,
      "step": 2147100
    },
    {
      "epoch": 3.5138089720678436,
      "grad_norm": 0.38182392716407776,
      "learning_rate": 2.9919677391722622e-06,
      "loss": 0.0094,
      "step": 2147120
    },
    {
      "epoch": 3.5138417025064967,
      "grad_norm": 1.384272813796997,
      "learning_rate": 2.9919018469587454e-06,
      "loss": 0.0079,
      "step": 2147140
    },
    {
      "epoch": 3.5138744329451503,
      "grad_norm": 0.39363202452659607,
      "learning_rate": 2.991835954745228e-06,
      "loss": 0.0158,
      "step": 2147160
    },
    {
      "epoch": 3.513907163383804,
      "grad_norm": 0.4621819853782654,
      "learning_rate": 2.991770062531711e-06,
      "loss": 0.0131,
      "step": 2147180
    },
    {
      "epoch": 3.513939893822457,
      "grad_norm": 0.1419588178396225,
      "learning_rate": 2.9917041703181936e-06,
      "loss": 0.0073,
      "step": 2147200
    },
    {
      "epoch": 3.5139726242611102,
      "grad_norm": 0.4394732117652893,
      "learning_rate": 2.9916382781046764e-06,
      "loss": 0.0091,
      "step": 2147220
    },
    {
      "epoch": 3.514005354699764,
      "grad_norm": 0.14098629355430603,
      "learning_rate": 2.9915723858911595e-06,
      "loss": 0.0138,
      "step": 2147240
    },
    {
      "epoch": 3.514038085138417,
      "grad_norm": 0.20193539559841156,
      "learning_rate": 2.9915064936776423e-06,
      "loss": 0.0102,
      "step": 2147260
    },
    {
      "epoch": 3.51407081557707,
      "grad_norm": 0.1405283659696579,
      "learning_rate": 2.991440601464125e-06,
      "loss": 0.0084,
      "step": 2147280
    },
    {
      "epoch": 3.5141035460157237,
      "grad_norm": 0.7185761332511902,
      "learning_rate": 2.9913747092506077e-06,
      "loss": 0.0156,
      "step": 2147300
    },
    {
      "epoch": 3.5141362764543773,
      "grad_norm": 0.198647141456604,
      "learning_rate": 2.991308817037091e-06,
      "loss": 0.0134,
      "step": 2147320
    },
    {
      "epoch": 3.5141690068930305,
      "grad_norm": 0.6377926468849182,
      "learning_rate": 2.9912429248235736e-06,
      "loss": 0.0088,
      "step": 2147340
    },
    {
      "epoch": 3.5142017373316836,
      "grad_norm": 0.16002610325813293,
      "learning_rate": 2.991177032610057e-06,
      "loss": 0.0082,
      "step": 2147360
    },
    {
      "epoch": 3.514234467770337,
      "grad_norm": 0.3624212443828583,
      "learning_rate": 2.99111114039654e-06,
      "loss": 0.0121,
      "step": 2147380
    },
    {
      "epoch": 3.5142671982089904,
      "grad_norm": 0.0689060240983963,
      "learning_rate": 2.9910452481830227e-06,
      "loss": 0.0096,
      "step": 2147400
    },
    {
      "epoch": 3.5142999286476435,
      "grad_norm": 0.14969974756240845,
      "learning_rate": 2.9909793559695054e-06,
      "loss": 0.0123,
      "step": 2147420
    },
    {
      "epoch": 3.514332659086297,
      "grad_norm": 0.3062381446361542,
      "learning_rate": 2.990913463755988e-06,
      "loss": 0.0115,
      "step": 2147440
    },
    {
      "epoch": 3.5143653895249503,
      "grad_norm": 0.25155672430992126,
      "learning_rate": 2.9908475715424713e-06,
      "loss": 0.0147,
      "step": 2147460
    },
    {
      "epoch": 3.514398119963604,
      "grad_norm": 0.37783753871917725,
      "learning_rate": 2.990781679328954e-06,
      "loss": 0.0072,
      "step": 2147480
    },
    {
      "epoch": 3.514430850402257,
      "grad_norm": 0.18870961666107178,
      "learning_rate": 2.990715787115437e-06,
      "loss": 0.0126,
      "step": 2147500
    },
    {
      "epoch": 3.5144635808409106,
      "grad_norm": 0.15608538687229156,
      "learning_rate": 2.9906498949019196e-06,
      "loss": 0.0102,
      "step": 2147520
    },
    {
      "epoch": 3.5144963112795637,
      "grad_norm": 0.405656635761261,
      "learning_rate": 2.9905840026884027e-06,
      "loss": 0.0121,
      "step": 2147540
    },
    {
      "epoch": 3.514529041718217,
      "grad_norm": 0.35640716552734375,
      "learning_rate": 2.9905181104748855e-06,
      "loss": 0.0135,
      "step": 2147560
    },
    {
      "epoch": 3.5145617721568705,
      "grad_norm": 0.15519140660762787,
      "learning_rate": 2.990452218261368e-06,
      "loss": 0.009,
      "step": 2147580
    },
    {
      "epoch": 3.5145945025955236,
      "grad_norm": 0.36700180172920227,
      "learning_rate": 2.990386326047851e-06,
      "loss": 0.0145,
      "step": 2147600
    },
    {
      "epoch": 3.5146272330341772,
      "grad_norm": 0.67342609167099,
      "learning_rate": 2.990320433834334e-06,
      "loss": 0.0093,
      "step": 2147620
    },
    {
      "epoch": 3.5146599634728304,
      "grad_norm": 0.2112123668193817,
      "learning_rate": 2.990254541620817e-06,
      "loss": 0.0084,
      "step": 2147640
    },
    {
      "epoch": 3.514692693911484,
      "grad_norm": 0.039991799741983414,
      "learning_rate": 2.9901886494072996e-06,
      "loss": 0.009,
      "step": 2147660
    },
    {
      "epoch": 3.514725424350137,
      "grad_norm": 0.4675794839859009,
      "learning_rate": 2.9901227571937823e-06,
      "loss": 0.0134,
      "step": 2147680
    },
    {
      "epoch": 3.5147581547887903,
      "grad_norm": 0.25916382670402527,
      "learning_rate": 2.990056864980265e-06,
      "loss": 0.0182,
      "step": 2147700
    },
    {
      "epoch": 3.514790885227444,
      "grad_norm": 0.3268361985683441,
      "learning_rate": 2.9899909727667487e-06,
      "loss": 0.0092,
      "step": 2147720
    },
    {
      "epoch": 3.514823615666097,
      "grad_norm": 0.16325359046459198,
      "learning_rate": 2.9899250805532314e-06,
      "loss": 0.0093,
      "step": 2147740
    },
    {
      "epoch": 3.5148563461047506,
      "grad_norm": 0.2745864689350128,
      "learning_rate": 2.989859188339714e-06,
      "loss": 0.0111,
      "step": 2147760
    },
    {
      "epoch": 3.5148890765434038,
      "grad_norm": 0.023422472178936005,
      "learning_rate": 2.9897932961261973e-06,
      "loss": 0.0085,
      "step": 2147780
    },
    {
      "epoch": 3.5149218069820574,
      "grad_norm": 0.09139583259820938,
      "learning_rate": 2.98972740391268e-06,
      "loss": 0.0149,
      "step": 2147800
    },
    {
      "epoch": 3.5149545374207105,
      "grad_norm": 0.2513064444065094,
      "learning_rate": 2.9896615116991628e-06,
      "loss": 0.0127,
      "step": 2147820
    },
    {
      "epoch": 3.5149872678593637,
      "grad_norm": 0.16547372937202454,
      "learning_rate": 2.9895956194856455e-06,
      "loss": 0.0113,
      "step": 2147840
    },
    {
      "epoch": 3.5150199982980173,
      "grad_norm": 0.22001776099205017,
      "learning_rate": 2.9895297272721287e-06,
      "loss": 0.0098,
      "step": 2147860
    },
    {
      "epoch": 3.5150527287366704,
      "grad_norm": 0.48809438943862915,
      "learning_rate": 2.9894638350586114e-06,
      "loss": 0.0176,
      "step": 2147880
    },
    {
      "epoch": 3.515085459175324,
      "grad_norm": 0.13392670452594757,
      "learning_rate": 2.989397942845094e-06,
      "loss": 0.011,
      "step": 2147900
    },
    {
      "epoch": 3.515118189613977,
      "grad_norm": 0.4133053719997406,
      "learning_rate": 2.989332050631577e-06,
      "loss": 0.0131,
      "step": 2147920
    },
    {
      "epoch": 3.5151509200526307,
      "grad_norm": 0.1035042330622673,
      "learning_rate": 2.98926615841806e-06,
      "loss": 0.0121,
      "step": 2147940
    },
    {
      "epoch": 3.515183650491284,
      "grad_norm": 0.3952581286430359,
      "learning_rate": 2.989200266204543e-06,
      "loss": 0.017,
      "step": 2147960
    },
    {
      "epoch": 3.515216380929937,
      "grad_norm": 0.1361149251461029,
      "learning_rate": 2.9891343739910255e-06,
      "loss": 0.0096,
      "step": 2147980
    },
    {
      "epoch": 3.5152491113685906,
      "grad_norm": 0.2393515557050705,
      "learning_rate": 2.9890684817775083e-06,
      "loss": 0.0136,
      "step": 2148000
    },
    {
      "epoch": 3.515281841807244,
      "grad_norm": 0.19998976588249207,
      "learning_rate": 2.9890025895639914e-06,
      "loss": 0.0144,
      "step": 2148020
    },
    {
      "epoch": 3.5153145722458974,
      "grad_norm": 0.2257283627986908,
      "learning_rate": 2.988936697350474e-06,
      "loss": 0.0104,
      "step": 2148040
    },
    {
      "epoch": 3.5153473026845505,
      "grad_norm": 0.19501374661922455,
      "learning_rate": 2.9888708051369574e-06,
      "loss": 0.009,
      "step": 2148060
    },
    {
      "epoch": 3.515380033123204,
      "grad_norm": 0.06957051903009415,
      "learning_rate": 2.9888049129234405e-06,
      "loss": 0.0083,
      "step": 2148080
    },
    {
      "epoch": 3.5154127635618573,
      "grad_norm": 0.2667700946331024,
      "learning_rate": 2.9887390207099233e-06,
      "loss": 0.0138,
      "step": 2148100
    },
    {
      "epoch": 3.5154454940005104,
      "grad_norm": 0.14190630614757538,
      "learning_rate": 2.988673128496406e-06,
      "loss": 0.0143,
      "step": 2148120
    },
    {
      "epoch": 3.515478224439164,
      "grad_norm": 0.12364775687456131,
      "learning_rate": 2.9886072362828887e-06,
      "loss": 0.0122,
      "step": 2148140
    },
    {
      "epoch": 3.515510954877817,
      "grad_norm": 0.3927587568759918,
      "learning_rate": 2.988541344069372e-06,
      "loss": 0.0084,
      "step": 2148160
    },
    {
      "epoch": 3.5155436853164708,
      "grad_norm": 0.18842044472694397,
      "learning_rate": 2.9884754518558546e-06,
      "loss": 0.0114,
      "step": 2148180
    },
    {
      "epoch": 3.515576415755124,
      "grad_norm": 0.23056839406490326,
      "learning_rate": 2.9884095596423374e-06,
      "loss": 0.0093,
      "step": 2148200
    },
    {
      "epoch": 3.5156091461937775,
      "grad_norm": 0.21138815581798553,
      "learning_rate": 2.98834366742882e-06,
      "loss": 0.0116,
      "step": 2148220
    },
    {
      "epoch": 3.5156418766324307,
      "grad_norm": 0.14221380650997162,
      "learning_rate": 2.988277775215303e-06,
      "loss": 0.0207,
      "step": 2148240
    },
    {
      "epoch": 3.515674607071084,
      "grad_norm": 0.30386850237846375,
      "learning_rate": 2.988211883001786e-06,
      "loss": 0.0148,
      "step": 2148260
    },
    {
      "epoch": 3.5157073375097374,
      "grad_norm": 0.22723697125911713,
      "learning_rate": 2.9881459907882688e-06,
      "loss": 0.0098,
      "step": 2148280
    },
    {
      "epoch": 3.5157400679483906,
      "grad_norm": 0.09558518975973129,
      "learning_rate": 2.9880800985747515e-06,
      "loss": 0.0129,
      "step": 2148300
    },
    {
      "epoch": 3.5157727983870437,
      "grad_norm": 0.19687947630882263,
      "learning_rate": 2.9880142063612342e-06,
      "loss": 0.0128,
      "step": 2148320
    },
    {
      "epoch": 3.5158055288256973,
      "grad_norm": 0.35275495052337646,
      "learning_rate": 2.9879483141477174e-06,
      "loss": 0.0105,
      "step": 2148340
    },
    {
      "epoch": 3.515838259264351,
      "grad_norm": 0.25818556547164917,
      "learning_rate": 2.9878824219342e-06,
      "loss": 0.0141,
      "step": 2148360
    },
    {
      "epoch": 3.515870989703004,
      "grad_norm": 0.23120646178722382,
      "learning_rate": 2.987816529720683e-06,
      "loss": 0.011,
      "step": 2148380
    },
    {
      "epoch": 3.515903720141657,
      "grad_norm": 0.1287931501865387,
      "learning_rate": 2.9877506375071656e-06,
      "loss": 0.0142,
      "step": 2148400
    },
    {
      "epoch": 3.515936450580311,
      "grad_norm": 0.07872042804956436,
      "learning_rate": 2.987684745293649e-06,
      "loss": 0.0082,
      "step": 2148420
    },
    {
      "epoch": 3.515969181018964,
      "grad_norm": 0.36511680483818054,
      "learning_rate": 2.987618853080132e-06,
      "loss": 0.0081,
      "step": 2148440
    },
    {
      "epoch": 3.516001911457617,
      "grad_norm": 0.6358358263969421,
      "learning_rate": 2.9875529608666147e-06,
      "loss": 0.0087,
      "step": 2148460
    },
    {
      "epoch": 3.5160346418962707,
      "grad_norm": 0.2794382870197296,
      "learning_rate": 2.987487068653098e-06,
      "loss": 0.0114,
      "step": 2148480
    },
    {
      "epoch": 3.5160673723349243,
      "grad_norm": 0.17349517345428467,
      "learning_rate": 2.9874211764395806e-06,
      "loss": 0.0114,
      "step": 2148500
    },
    {
      "epoch": 3.5161001027735774,
      "grad_norm": 0.11396420001983643,
      "learning_rate": 2.9873552842260633e-06,
      "loss": 0.0086,
      "step": 2148520
    },
    {
      "epoch": 3.5161328332122306,
      "grad_norm": 0.5455108880996704,
      "learning_rate": 2.987289392012546e-06,
      "loss": 0.0115,
      "step": 2148540
    },
    {
      "epoch": 3.516165563650884,
      "grad_norm": 0.13130809366703033,
      "learning_rate": 2.9872234997990292e-06,
      "loss": 0.0106,
      "step": 2148560
    },
    {
      "epoch": 3.5161982940895373,
      "grad_norm": 0.10639574378728867,
      "learning_rate": 2.987157607585512e-06,
      "loss": 0.0089,
      "step": 2148580
    },
    {
      "epoch": 3.5162310245281905,
      "grad_norm": 0.393731027841568,
      "learning_rate": 2.9870917153719947e-06,
      "loss": 0.0156,
      "step": 2148600
    },
    {
      "epoch": 3.516263754966844,
      "grad_norm": 0.3671859800815582,
      "learning_rate": 2.9870258231584775e-06,
      "loss": 0.0088,
      "step": 2148620
    },
    {
      "epoch": 3.5162964854054977,
      "grad_norm": 0.22355639934539795,
      "learning_rate": 2.98695993094496e-06,
      "loss": 0.0096,
      "step": 2148640
    },
    {
      "epoch": 3.516329215844151,
      "grad_norm": 0.5465489625930786,
      "learning_rate": 2.9868940387314434e-06,
      "loss": 0.0146,
      "step": 2148660
    },
    {
      "epoch": 3.516361946282804,
      "grad_norm": 0.5200046300888062,
      "learning_rate": 2.986828146517926e-06,
      "loss": 0.0094,
      "step": 2148680
    },
    {
      "epoch": 3.5163946767214576,
      "grad_norm": 0.3808465600013733,
      "learning_rate": 2.986762254304409e-06,
      "loss": 0.0181,
      "step": 2148700
    },
    {
      "epoch": 3.5164274071601107,
      "grad_norm": 0.6287839412689209,
      "learning_rate": 2.9866963620908916e-06,
      "loss": 0.0129,
      "step": 2148720
    },
    {
      "epoch": 3.516460137598764,
      "grad_norm": 0.1037597507238388,
      "learning_rate": 2.9866304698773747e-06,
      "loss": 0.0151,
      "step": 2148740
    },
    {
      "epoch": 3.5164928680374175,
      "grad_norm": 0.19159992039203644,
      "learning_rate": 2.9865645776638575e-06,
      "loss": 0.009,
      "step": 2148760
    },
    {
      "epoch": 3.516525598476071,
      "grad_norm": 0.48270320892333984,
      "learning_rate": 2.9864986854503406e-06,
      "loss": 0.0154,
      "step": 2148780
    },
    {
      "epoch": 3.516558328914724,
      "grad_norm": 0.14241260290145874,
      "learning_rate": 2.986432793236824e-06,
      "loss": 0.0092,
      "step": 2148800
    },
    {
      "epoch": 3.5165910593533773,
      "grad_norm": 0.14987005293369293,
      "learning_rate": 2.9863669010233065e-06,
      "loss": 0.0136,
      "step": 2148820
    },
    {
      "epoch": 3.516623789792031,
      "grad_norm": 0.2045188993215561,
      "learning_rate": 2.9863010088097893e-06,
      "loss": 0.0106,
      "step": 2148840
    },
    {
      "epoch": 3.516656520230684,
      "grad_norm": 0.4150264859199524,
      "learning_rate": 2.986235116596272e-06,
      "loss": 0.0092,
      "step": 2148860
    },
    {
      "epoch": 3.5166892506693372,
      "grad_norm": 0.3223068118095398,
      "learning_rate": 2.986169224382755e-06,
      "loss": 0.013,
      "step": 2148880
    },
    {
      "epoch": 3.516721981107991,
      "grad_norm": 0.46177801489830017,
      "learning_rate": 2.986103332169238e-06,
      "loss": 0.0077,
      "step": 2148900
    },
    {
      "epoch": 3.516754711546644,
      "grad_norm": 0.1535017043352127,
      "learning_rate": 2.9860374399557207e-06,
      "loss": 0.01,
      "step": 2148920
    },
    {
      "epoch": 3.5167874419852976,
      "grad_norm": 0.08959886431694031,
      "learning_rate": 2.9859715477422034e-06,
      "loss": 0.0079,
      "step": 2148940
    },
    {
      "epoch": 3.5168201724239507,
      "grad_norm": 0.3494165241718292,
      "learning_rate": 2.9859056555286866e-06,
      "loss": 0.008,
      "step": 2148960
    },
    {
      "epoch": 3.5168529028626043,
      "grad_norm": 0.24598701298236847,
      "learning_rate": 2.9858397633151693e-06,
      "loss": 0.0126,
      "step": 2148980
    },
    {
      "epoch": 3.5168856333012575,
      "grad_norm": 0.36202704906463623,
      "learning_rate": 2.985773871101652e-06,
      "loss": 0.0096,
      "step": 2149000
    },
    {
      "epoch": 3.5169183637399106,
      "grad_norm": 0.25349482893943787,
      "learning_rate": 2.9857079788881348e-06,
      "loss": 0.0125,
      "step": 2149020
    },
    {
      "epoch": 3.516951094178564,
      "grad_norm": 0.25145474076271057,
      "learning_rate": 2.985642086674618e-06,
      "loss": 0.0104,
      "step": 2149040
    },
    {
      "epoch": 3.5169838246172174,
      "grad_norm": 0.5198458433151245,
      "learning_rate": 2.9855761944611007e-06,
      "loss": 0.0156,
      "step": 2149060
    },
    {
      "epoch": 3.517016555055871,
      "grad_norm": 0.28711849451065063,
      "learning_rate": 2.9855103022475834e-06,
      "loss": 0.0083,
      "step": 2149080
    },
    {
      "epoch": 3.517049285494524,
      "grad_norm": 0.14423950016498566,
      "learning_rate": 2.985444410034066e-06,
      "loss": 0.0091,
      "step": 2149100
    },
    {
      "epoch": 3.5170820159331777,
      "grad_norm": 0.2613639533519745,
      "learning_rate": 2.9853785178205498e-06,
      "loss": 0.0154,
      "step": 2149120
    },
    {
      "epoch": 3.517114746371831,
      "grad_norm": 0.2502303719520569,
      "learning_rate": 2.9853126256070325e-06,
      "loss": 0.0145,
      "step": 2149140
    },
    {
      "epoch": 3.517147476810484,
      "grad_norm": 0.15862353146076202,
      "learning_rate": 2.9852467333935152e-06,
      "loss": 0.0101,
      "step": 2149160
    },
    {
      "epoch": 3.5171802072491376,
      "grad_norm": 0.14399197697639465,
      "learning_rate": 2.985180841179998e-06,
      "loss": 0.0102,
      "step": 2149180
    },
    {
      "epoch": 3.5172129376877908,
      "grad_norm": 0.13737094402313232,
      "learning_rate": 2.985114948966481e-06,
      "loss": 0.0112,
      "step": 2149200
    },
    {
      "epoch": 3.5172456681264443,
      "grad_norm": 0.3251379132270813,
      "learning_rate": 2.985049056752964e-06,
      "loss": 0.0123,
      "step": 2149220
    },
    {
      "epoch": 3.5172783985650975,
      "grad_norm": 0.7008883357048035,
      "learning_rate": 2.9849831645394466e-06,
      "loss": 0.0153,
      "step": 2149240
    },
    {
      "epoch": 3.517311129003751,
      "grad_norm": 0.05682101473212242,
      "learning_rate": 2.9849172723259294e-06,
      "loss": 0.0097,
      "step": 2149260
    },
    {
      "epoch": 3.5173438594424042,
      "grad_norm": 0.49165424704551697,
      "learning_rate": 2.9848513801124125e-06,
      "loss": 0.0157,
      "step": 2149280
    },
    {
      "epoch": 3.5173765898810574,
      "grad_norm": 0.24980275332927704,
      "learning_rate": 2.9847854878988953e-06,
      "loss": 0.0088,
      "step": 2149300
    },
    {
      "epoch": 3.517409320319711,
      "grad_norm": 0.27808305621147156,
      "learning_rate": 2.984719595685378e-06,
      "loss": 0.0068,
      "step": 2149320
    },
    {
      "epoch": 3.517442050758364,
      "grad_norm": 0.3508428931236267,
      "learning_rate": 2.9846537034718607e-06,
      "loss": 0.0163,
      "step": 2149340
    },
    {
      "epoch": 3.5174747811970177,
      "grad_norm": 0.19115856289863586,
      "learning_rate": 2.984587811258344e-06,
      "loss": 0.0105,
      "step": 2149360
    },
    {
      "epoch": 3.517507511635671,
      "grad_norm": 0.6801758408546448,
      "learning_rate": 2.9845219190448266e-06,
      "loss": 0.0126,
      "step": 2149380
    },
    {
      "epoch": 3.5175402420743245,
      "grad_norm": 0.3203895092010498,
      "learning_rate": 2.9844560268313094e-06,
      "loss": 0.0116,
      "step": 2149400
    },
    {
      "epoch": 3.5175729725129776,
      "grad_norm": 1.0553311109542847,
      "learning_rate": 2.984390134617792e-06,
      "loss": 0.0124,
      "step": 2149420
    },
    {
      "epoch": 3.5176057029516308,
      "grad_norm": 0.03794069215655327,
      "learning_rate": 2.9843242424042753e-06,
      "loss": 0.0128,
      "step": 2149440
    },
    {
      "epoch": 3.5176384333902844,
      "grad_norm": 0.30968695878982544,
      "learning_rate": 2.984258350190758e-06,
      "loss": 0.0139,
      "step": 2149460
    },
    {
      "epoch": 3.5176711638289375,
      "grad_norm": 0.1150813028216362,
      "learning_rate": 2.984192457977241e-06,
      "loss": 0.01,
      "step": 2149480
    },
    {
      "epoch": 3.517703894267591,
      "grad_norm": 0.276779443025589,
      "learning_rate": 2.9841265657637244e-06,
      "loss": 0.0095,
      "step": 2149500
    },
    {
      "epoch": 3.5177366247062443,
      "grad_norm": 0.6613513827323914,
      "learning_rate": 2.984060673550207e-06,
      "loss": 0.0125,
      "step": 2149520
    },
    {
      "epoch": 3.517769355144898,
      "grad_norm": 0.2897486388683319,
      "learning_rate": 2.98399478133669e-06,
      "loss": 0.0084,
      "step": 2149540
    },
    {
      "epoch": 3.517802085583551,
      "grad_norm": 0.3695732355117798,
      "learning_rate": 2.9839288891231726e-06,
      "loss": 0.0108,
      "step": 2149560
    },
    {
      "epoch": 3.517834816022204,
      "grad_norm": 0.16983194649219513,
      "learning_rate": 2.9838629969096557e-06,
      "loss": 0.0119,
      "step": 2149580
    },
    {
      "epoch": 3.5178675464608578,
      "grad_norm": 0.10915324091911316,
      "learning_rate": 2.9837971046961385e-06,
      "loss": 0.0099,
      "step": 2149600
    },
    {
      "epoch": 3.517900276899511,
      "grad_norm": 0.11040391027927399,
      "learning_rate": 2.9837312124826212e-06,
      "loss": 0.0086,
      "step": 2149620
    },
    {
      "epoch": 3.5179330073381645,
      "grad_norm": 0.2813991904258728,
      "learning_rate": 2.983665320269104e-06,
      "loss": 0.0108,
      "step": 2149640
    },
    {
      "epoch": 3.5179657377768176,
      "grad_norm": 0.173524871468544,
      "learning_rate": 2.9835994280555867e-06,
      "loss": 0.0097,
      "step": 2149660
    },
    {
      "epoch": 3.5179984682154712,
      "grad_norm": 0.16915525496006012,
      "learning_rate": 2.98353353584207e-06,
      "loss": 0.0099,
      "step": 2149680
    },
    {
      "epoch": 3.5180311986541244,
      "grad_norm": 0.12310920655727386,
      "learning_rate": 2.9834676436285526e-06,
      "loss": 0.0118,
      "step": 2149700
    },
    {
      "epoch": 3.5180639290927775,
      "grad_norm": 0.16056066751480103,
      "learning_rate": 2.9834017514150353e-06,
      "loss": 0.0203,
      "step": 2149720
    },
    {
      "epoch": 3.518096659531431,
      "grad_norm": 0.10588933527469635,
      "learning_rate": 2.983335859201518e-06,
      "loss": 0.0131,
      "step": 2149740
    },
    {
      "epoch": 3.5181293899700843,
      "grad_norm": 0.12374714016914368,
      "learning_rate": 2.9832699669880012e-06,
      "loss": 0.012,
      "step": 2149760
    },
    {
      "epoch": 3.518162120408738,
      "grad_norm": 0.5531166195869446,
      "learning_rate": 2.983204074774484e-06,
      "loss": 0.0116,
      "step": 2149780
    },
    {
      "epoch": 3.518194850847391,
      "grad_norm": 0.11381052434444427,
      "learning_rate": 2.9831381825609667e-06,
      "loss": 0.0076,
      "step": 2149800
    },
    {
      "epoch": 3.5182275812860446,
      "grad_norm": 0.18355295062065125,
      "learning_rate": 2.9830722903474503e-06,
      "loss": 0.0135,
      "step": 2149820
    },
    {
      "epoch": 3.5182603117246978,
      "grad_norm": 0.367823988199234,
      "learning_rate": 2.983006398133933e-06,
      "loss": 0.0101,
      "step": 2149840
    },
    {
      "epoch": 3.518293042163351,
      "grad_norm": 0.19224300980567932,
      "learning_rate": 2.9829405059204158e-06,
      "loss": 0.0112,
      "step": 2149860
    },
    {
      "epoch": 3.5183257726020045,
      "grad_norm": 0.17020763456821442,
      "learning_rate": 2.9828746137068985e-06,
      "loss": 0.0119,
      "step": 2149880
    },
    {
      "epoch": 3.5183585030406577,
      "grad_norm": 0.4645145535469055,
      "learning_rate": 2.9828087214933817e-06,
      "loss": 0.0114,
      "step": 2149900
    },
    {
      "epoch": 3.518391233479311,
      "grad_norm": 0.13985274732112885,
      "learning_rate": 2.9827428292798644e-06,
      "loss": 0.007,
      "step": 2149920
    },
    {
      "epoch": 3.5184239639179644,
      "grad_norm": 0.23716764152050018,
      "learning_rate": 2.982676937066347e-06,
      "loss": 0.0137,
      "step": 2149940
    },
    {
      "epoch": 3.518456694356618,
      "grad_norm": 0.1960792988538742,
      "learning_rate": 2.98261104485283e-06,
      "loss": 0.0101,
      "step": 2149960
    },
    {
      "epoch": 3.518489424795271,
      "grad_norm": 0.5017139911651611,
      "learning_rate": 2.982545152639313e-06,
      "loss": 0.0115,
      "step": 2149980
    },
    {
      "epoch": 3.5185221552339243,
      "grad_norm": 0.29533615708351135,
      "learning_rate": 2.982479260425796e-06,
      "loss": 0.0108,
      "step": 2150000
    },
    {
      "epoch": 3.5185221552339243,
      "eval_loss": 0.0066763306967914104,
      "eval_runtime": 6497.4835,
      "eval_samples_per_second": 158.193,
      "eval_steps_per_second": 15.819,
      "eval_sts-dev_pearson_cosine": 0.9847751713363588,
      "eval_sts-dev_spearman_cosine": 0.8954390154318588,
      "step": 2150000
    },
    {
      "epoch": 3.518554885672578,
      "grad_norm": 0.3946974277496338,
      "learning_rate": 2.9824133682122786e-06,
      "loss": 0.0115,
      "step": 2150020
    },
    {
      "epoch": 3.518587616111231,
      "grad_norm": 0.3094989061355591,
      "learning_rate": 2.9823474759987613e-06,
      "loss": 0.0083,
      "step": 2150040
    },
    {
      "epoch": 3.518620346549884,
      "grad_norm": 0.2087925374507904,
      "learning_rate": 2.982281583785244e-06,
      "loss": 0.0143,
      "step": 2150060
    },
    {
      "epoch": 3.518653076988538,
      "grad_norm": 0.522570013999939,
      "learning_rate": 2.982215691571727e-06,
      "loss": 0.0082,
      "step": 2150080
    },
    {
      "epoch": 3.5186858074271914,
      "grad_norm": 0.16190026700496674,
      "learning_rate": 2.98214979935821e-06,
      "loss": 0.0107,
      "step": 2150100
    },
    {
      "epoch": 3.5187185378658445,
      "grad_norm": 0.29530763626098633,
      "learning_rate": 2.9820839071446927e-06,
      "loss": 0.0111,
      "step": 2150120
    },
    {
      "epoch": 3.5187512683044977,
      "grad_norm": 0.5320578813552856,
      "learning_rate": 2.9820180149311754e-06,
      "loss": 0.0101,
      "step": 2150140
    },
    {
      "epoch": 3.5187839987431513,
      "grad_norm": 0.1607791930437088,
      "learning_rate": 2.9819521227176586e-06,
      "loss": 0.0055,
      "step": 2150160
    },
    {
      "epoch": 3.5188167291818044,
      "grad_norm": 0.26187726855278015,
      "learning_rate": 2.9818862305041417e-06,
      "loss": 0.0096,
      "step": 2150180
    },
    {
      "epoch": 3.5188494596204576,
      "grad_norm": 0.13675516843795776,
      "learning_rate": 2.9818203382906245e-06,
      "loss": 0.0085,
      "step": 2150200
    },
    {
      "epoch": 3.518882190059111,
      "grad_norm": 0.17364516854286194,
      "learning_rate": 2.9817544460771076e-06,
      "loss": 0.0088,
      "step": 2150220
    },
    {
      "epoch": 3.5189149204977648,
      "grad_norm": 0.23328225314617157,
      "learning_rate": 2.9816885538635904e-06,
      "loss": 0.0101,
      "step": 2150240
    },
    {
      "epoch": 3.518947650936418,
      "grad_norm": 0.1634049415588379,
      "learning_rate": 2.981622661650073e-06,
      "loss": 0.0106,
      "step": 2150260
    },
    {
      "epoch": 3.518980381375071,
      "grad_norm": 0.1011626347899437,
      "learning_rate": 2.981556769436556e-06,
      "loss": 0.0081,
      "step": 2150280
    },
    {
      "epoch": 3.5190131118137247,
      "grad_norm": 0.16636238992214203,
      "learning_rate": 2.981490877223039e-06,
      "loss": 0.0135,
      "step": 2150300
    },
    {
      "epoch": 3.519045842252378,
      "grad_norm": 0.3071117401123047,
      "learning_rate": 2.9814249850095218e-06,
      "loss": 0.0104,
      "step": 2150320
    },
    {
      "epoch": 3.519078572691031,
      "grad_norm": 0.6882989406585693,
      "learning_rate": 2.9813590927960045e-06,
      "loss": 0.0125,
      "step": 2150340
    },
    {
      "epoch": 3.5191113031296846,
      "grad_norm": 0.10545536875724792,
      "learning_rate": 2.9812932005824872e-06,
      "loss": 0.0115,
      "step": 2150360
    },
    {
      "epoch": 3.519144033568338,
      "grad_norm": 0.16076527535915375,
      "learning_rate": 2.9812273083689704e-06,
      "loss": 0.0193,
      "step": 2150380
    },
    {
      "epoch": 3.5191767640069913,
      "grad_norm": 0.2106780856847763,
      "learning_rate": 2.981161416155453e-06,
      "loss": 0.013,
      "step": 2150400
    },
    {
      "epoch": 3.5192094944456445,
      "grad_norm": 0.47141772508621216,
      "learning_rate": 2.981095523941936e-06,
      "loss": 0.0103,
      "step": 2150420
    },
    {
      "epoch": 3.519242224884298,
      "grad_norm": 0.25236013531684875,
      "learning_rate": 2.9810296317284186e-06,
      "loss": 0.012,
      "step": 2150440
    },
    {
      "epoch": 3.519274955322951,
      "grad_norm": 0.27564239501953125,
      "learning_rate": 2.980963739514902e-06,
      "loss": 0.0105,
      "step": 2150460
    },
    {
      "epoch": 3.5193076857616044,
      "grad_norm": 0.6913921236991882,
      "learning_rate": 2.9808978473013845e-06,
      "loss": 0.0151,
      "step": 2150480
    },
    {
      "epoch": 3.519340416200258,
      "grad_norm": 0.36301320791244507,
      "learning_rate": 2.9808319550878673e-06,
      "loss": 0.0125,
      "step": 2150500
    },
    {
      "epoch": 3.519373146638911,
      "grad_norm": 0.11217928677797318,
      "learning_rate": 2.98076606287435e-06,
      "loss": 0.0085,
      "step": 2150520
    },
    {
      "epoch": 3.5194058770775647,
      "grad_norm": 0.08029017597436905,
      "learning_rate": 2.9807001706608336e-06,
      "loss": 0.0078,
      "step": 2150540
    },
    {
      "epoch": 3.519438607516218,
      "grad_norm": 0.23735080659389496,
      "learning_rate": 2.9806342784473163e-06,
      "loss": 0.0123,
      "step": 2150560
    },
    {
      "epoch": 3.5194713379548714,
      "grad_norm": 0.191117063164711,
      "learning_rate": 2.980568386233799e-06,
      "loss": 0.0168,
      "step": 2150580
    },
    {
      "epoch": 3.5195040683935246,
      "grad_norm": 0.4081811010837555,
      "learning_rate": 2.980502494020282e-06,
      "loss": 0.0134,
      "step": 2150600
    },
    {
      "epoch": 3.5195367988321777,
      "grad_norm": 0.1078781858086586,
      "learning_rate": 2.980436601806765e-06,
      "loss": 0.0098,
      "step": 2150620
    },
    {
      "epoch": 3.5195695292708313,
      "grad_norm": 0.38817569613456726,
      "learning_rate": 2.9803707095932477e-06,
      "loss": 0.0131,
      "step": 2150640
    },
    {
      "epoch": 3.5196022597094845,
      "grad_norm": 0.27247944474220276,
      "learning_rate": 2.9803048173797305e-06,
      "loss": 0.0094,
      "step": 2150660
    },
    {
      "epoch": 3.519634990148138,
      "grad_norm": 0.09389759600162506,
      "learning_rate": 2.980238925166213e-06,
      "loss": 0.0071,
      "step": 2150680
    },
    {
      "epoch": 3.5196677205867912,
      "grad_norm": 0.04674467444419861,
      "learning_rate": 2.9801730329526964e-06,
      "loss": 0.0116,
      "step": 2150700
    },
    {
      "epoch": 3.519700451025445,
      "grad_norm": 0.5954174995422363,
      "learning_rate": 2.980107140739179e-06,
      "loss": 0.016,
      "step": 2150720
    },
    {
      "epoch": 3.519733181464098,
      "grad_norm": 0.2393178790807724,
      "learning_rate": 2.980041248525662e-06,
      "loss": 0.0102,
      "step": 2150740
    },
    {
      "epoch": 3.519765911902751,
      "grad_norm": 0.6024001836776733,
      "learning_rate": 2.9799753563121446e-06,
      "loss": 0.0102,
      "step": 2150760
    },
    {
      "epoch": 3.5197986423414047,
      "grad_norm": 0.30698487162590027,
      "learning_rate": 2.9799094640986277e-06,
      "loss": 0.0097,
      "step": 2150780
    },
    {
      "epoch": 3.519831372780058,
      "grad_norm": 0.20459674298763275,
      "learning_rate": 2.9798435718851105e-06,
      "loss": 0.0113,
      "step": 2150800
    },
    {
      "epoch": 3.5198641032187115,
      "grad_norm": 0.09729493409395218,
      "learning_rate": 2.9797776796715932e-06,
      "loss": 0.0143,
      "step": 2150820
    },
    {
      "epoch": 3.5198968336573646,
      "grad_norm": 0.07961270213127136,
      "learning_rate": 2.979711787458076e-06,
      "loss": 0.0151,
      "step": 2150840
    },
    {
      "epoch": 3.519929564096018,
      "grad_norm": 0.29714712500572205,
      "learning_rate": 2.979645895244559e-06,
      "loss": 0.0092,
      "step": 2150860
    },
    {
      "epoch": 3.5199622945346714,
      "grad_norm": 0.2797679305076599,
      "learning_rate": 2.9795800030310423e-06,
      "loss": 0.0078,
      "step": 2150880
    },
    {
      "epoch": 3.5199950249733245,
      "grad_norm": 0.1648186892271042,
      "learning_rate": 2.979514110817525e-06,
      "loss": 0.0073,
      "step": 2150900
    },
    {
      "epoch": 3.520027755411978,
      "grad_norm": 0.06719309836626053,
      "learning_rate": 2.979448218604008e-06,
      "loss": 0.0114,
      "step": 2150920
    },
    {
      "epoch": 3.5200604858506312,
      "grad_norm": 0.26306334137916565,
      "learning_rate": 2.979382326390491e-06,
      "loss": 0.0101,
      "step": 2150940
    },
    {
      "epoch": 3.520093216289285,
      "grad_norm": 0.11459653824567795,
      "learning_rate": 2.9793164341769737e-06,
      "loss": 0.0137,
      "step": 2150960
    },
    {
      "epoch": 3.520125946727938,
      "grad_norm": 0.06140150874853134,
      "learning_rate": 2.9792505419634564e-06,
      "loss": 0.0138,
      "step": 2150980
    },
    {
      "epoch": 3.5201586771665916,
      "grad_norm": 0.13347138464450836,
      "learning_rate": 2.9791846497499396e-06,
      "loss": 0.0145,
      "step": 2151000
    },
    {
      "epoch": 3.5201914076052447,
      "grad_norm": 0.15760427713394165,
      "learning_rate": 2.9791187575364223e-06,
      "loss": 0.0097,
      "step": 2151020
    },
    {
      "epoch": 3.520224138043898,
      "grad_norm": 0.17205309867858887,
      "learning_rate": 2.979052865322905e-06,
      "loss": 0.0105,
      "step": 2151040
    },
    {
      "epoch": 3.5202568684825515,
      "grad_norm": 0.16267706453800201,
      "learning_rate": 2.978986973109388e-06,
      "loss": 0.0147,
      "step": 2151060
    },
    {
      "epoch": 3.5202895989212046,
      "grad_norm": 0.6625791192054749,
      "learning_rate": 2.9789210808958705e-06,
      "loss": 0.006,
      "step": 2151080
    },
    {
      "epoch": 3.5203223293598582,
      "grad_norm": 0.8095223307609558,
      "learning_rate": 2.9788551886823537e-06,
      "loss": 0.0094,
      "step": 2151100
    },
    {
      "epoch": 3.5203550597985114,
      "grad_norm": 0.3034912347793579,
      "learning_rate": 2.9787892964688364e-06,
      "loss": 0.0126,
      "step": 2151120
    },
    {
      "epoch": 3.520387790237165,
      "grad_norm": 0.7335942983627319,
      "learning_rate": 2.978723404255319e-06,
      "loss": 0.0134,
      "step": 2151140
    },
    {
      "epoch": 3.520420520675818,
      "grad_norm": 0.18781061470508575,
      "learning_rate": 2.978657512041802e-06,
      "loss": 0.0126,
      "step": 2151160
    },
    {
      "epoch": 3.5204532511144713,
      "grad_norm": 0.37242183089256287,
      "learning_rate": 2.978591619828285e-06,
      "loss": 0.0082,
      "step": 2151180
    },
    {
      "epoch": 3.520485981553125,
      "grad_norm": 0.1430940330028534,
      "learning_rate": 2.978525727614768e-06,
      "loss": 0.0073,
      "step": 2151200
    },
    {
      "epoch": 3.520518711991778,
      "grad_norm": 0.6722710132598877,
      "learning_rate": 2.9784598354012506e-06,
      "loss": 0.0106,
      "step": 2151220
    },
    {
      "epoch": 3.5205514424304316,
      "grad_norm": 0.14444293081760406,
      "learning_rate": 2.978393943187734e-06,
      "loss": 0.009,
      "step": 2151240
    },
    {
      "epoch": 3.5205841728690848,
      "grad_norm": 0.34447944164276123,
      "learning_rate": 2.978328050974217e-06,
      "loss": 0.0128,
      "step": 2151260
    },
    {
      "epoch": 3.5206169033077384,
      "grad_norm": 0.10550597310066223,
      "learning_rate": 2.9782621587606996e-06,
      "loss": 0.0088,
      "step": 2151280
    },
    {
      "epoch": 3.5206496337463915,
      "grad_norm": 0.2243591696023941,
      "learning_rate": 2.9781962665471824e-06,
      "loss": 0.0128,
      "step": 2151300
    },
    {
      "epoch": 3.5206823641850447,
      "grad_norm": 0.214484304189682,
      "learning_rate": 2.9781303743336655e-06,
      "loss": 0.0093,
      "step": 2151320
    },
    {
      "epoch": 3.5207150946236982,
      "grad_norm": 0.322194367647171,
      "learning_rate": 2.9780644821201483e-06,
      "loss": 0.015,
      "step": 2151340
    },
    {
      "epoch": 3.5207478250623514,
      "grad_norm": 0.21029210090637207,
      "learning_rate": 2.977998589906631e-06,
      "loss": 0.0093,
      "step": 2151360
    },
    {
      "epoch": 3.5207805555010045,
      "grad_norm": 0.19567742943763733,
      "learning_rate": 2.9779326976931137e-06,
      "loss": 0.0079,
      "step": 2151380
    },
    {
      "epoch": 3.520813285939658,
      "grad_norm": 0.24797900021076202,
      "learning_rate": 2.977866805479597e-06,
      "loss": 0.011,
      "step": 2151400
    },
    {
      "epoch": 3.5208460163783117,
      "grad_norm": 0.3044632077217102,
      "learning_rate": 2.9778009132660797e-06,
      "loss": 0.0104,
      "step": 2151420
    },
    {
      "epoch": 3.520878746816965,
      "grad_norm": 0.3276779055595398,
      "learning_rate": 2.9777350210525624e-06,
      "loss": 0.0082,
      "step": 2151440
    },
    {
      "epoch": 3.520911477255618,
      "grad_norm": 0.28418973088264465,
      "learning_rate": 2.977669128839045e-06,
      "loss": 0.0102,
      "step": 2151460
    },
    {
      "epoch": 3.5209442076942716,
      "grad_norm": 0.3172442317008972,
      "learning_rate": 2.9776032366255283e-06,
      "loss": 0.0077,
      "step": 2151480
    },
    {
      "epoch": 3.520976938132925,
      "grad_norm": 0.347650408744812,
      "learning_rate": 2.977537344412011e-06,
      "loss": 0.0142,
      "step": 2151500
    },
    {
      "epoch": 3.521009668571578,
      "grad_norm": 0.5403216481208801,
      "learning_rate": 2.9774714521984938e-06,
      "loss": 0.0146,
      "step": 2151520
    },
    {
      "epoch": 3.5210423990102315,
      "grad_norm": 0.42217180132865906,
      "learning_rate": 2.9774055599849765e-06,
      "loss": 0.0062,
      "step": 2151540
    },
    {
      "epoch": 3.521075129448885,
      "grad_norm": 0.7288459539413452,
      "learning_rate": 2.9773396677714593e-06,
      "loss": 0.0119,
      "step": 2151560
    },
    {
      "epoch": 3.5211078598875383,
      "grad_norm": 0.2200470119714737,
      "learning_rate": 2.977273775557943e-06,
      "loss": 0.0066,
      "step": 2151580
    },
    {
      "epoch": 3.5211405903261914,
      "grad_norm": 0.33858397603034973,
      "learning_rate": 2.9772078833444256e-06,
      "loss": 0.0076,
      "step": 2151600
    },
    {
      "epoch": 3.521173320764845,
      "grad_norm": 0.08280392736196518,
      "learning_rate": 2.9771419911309083e-06,
      "loss": 0.0129,
      "step": 2151620
    },
    {
      "epoch": 3.521206051203498,
      "grad_norm": 0.1312355250120163,
      "learning_rate": 2.9770760989173915e-06,
      "loss": 0.0109,
      "step": 2151640
    },
    {
      "epoch": 3.5212387816421513,
      "grad_norm": 0.37924301624298096,
      "learning_rate": 2.9770102067038742e-06,
      "loss": 0.0088,
      "step": 2151660
    },
    {
      "epoch": 3.521271512080805,
      "grad_norm": 0.2250826358795166,
      "learning_rate": 2.976944314490357e-06,
      "loss": 0.0078,
      "step": 2151680
    },
    {
      "epoch": 3.5213042425194585,
      "grad_norm": 0.36064887046813965,
      "learning_rate": 2.9768784222768397e-06,
      "loss": 0.0082,
      "step": 2151700
    },
    {
      "epoch": 3.5213369729581117,
      "grad_norm": 0.15208406746387482,
      "learning_rate": 2.976812530063323e-06,
      "loss": 0.01,
      "step": 2151720
    },
    {
      "epoch": 3.521369703396765,
      "grad_norm": 0.3539489805698395,
      "learning_rate": 2.9767466378498056e-06,
      "loss": 0.0111,
      "step": 2151740
    },
    {
      "epoch": 3.5214024338354184,
      "grad_norm": 0.5626556277275085,
      "learning_rate": 2.9766807456362883e-06,
      "loss": 0.0104,
      "step": 2151760
    },
    {
      "epoch": 3.5214351642740715,
      "grad_norm": 0.20469185709953308,
      "learning_rate": 2.976614853422771e-06,
      "loss": 0.013,
      "step": 2151780
    },
    {
      "epoch": 3.5214678947127247,
      "grad_norm": 0.2355634868144989,
      "learning_rate": 2.9765489612092542e-06,
      "loss": 0.0101,
      "step": 2151800
    },
    {
      "epoch": 3.5215006251513783,
      "grad_norm": 0.3911377787590027,
      "learning_rate": 2.976483068995737e-06,
      "loss": 0.0078,
      "step": 2151820
    },
    {
      "epoch": 3.521533355590032,
      "grad_norm": 0.298755407333374,
      "learning_rate": 2.9764171767822197e-06,
      "loss": 0.0105,
      "step": 2151840
    },
    {
      "epoch": 3.521566086028685,
      "grad_norm": 0.36782655119895935,
      "learning_rate": 2.9763512845687025e-06,
      "loss": 0.0098,
      "step": 2151860
    },
    {
      "epoch": 3.521598816467338,
      "grad_norm": 0.9942547082901001,
      "learning_rate": 2.9762853923551856e-06,
      "loss": 0.0139,
      "step": 2151880
    },
    {
      "epoch": 3.521631546905992,
      "grad_norm": 0.11224498599767685,
      "learning_rate": 2.9762195001416684e-06,
      "loss": 0.0128,
      "step": 2151900
    },
    {
      "epoch": 3.521664277344645,
      "grad_norm": 0.08446471393108368,
      "learning_rate": 2.976153607928151e-06,
      "loss": 0.012,
      "step": 2151920
    },
    {
      "epoch": 3.521697007783298,
      "grad_norm": 0.38655754923820496,
      "learning_rate": 2.9760877157146347e-06,
      "loss": 0.0145,
      "step": 2151940
    },
    {
      "epoch": 3.5217297382219517,
      "grad_norm": 0.20323051512241364,
      "learning_rate": 2.9760218235011174e-06,
      "loss": 0.0079,
      "step": 2151960
    },
    {
      "epoch": 3.521762468660605,
      "grad_norm": 0.07210955023765564,
      "learning_rate": 2.9759559312876e-06,
      "loss": 0.0067,
      "step": 2151980
    },
    {
      "epoch": 3.5217951990992584,
      "grad_norm": 0.2132442742586136,
      "learning_rate": 2.975890039074083e-06,
      "loss": 0.009,
      "step": 2152000
    },
    {
      "epoch": 3.5218279295379116,
      "grad_norm": 0.16982264816761017,
      "learning_rate": 2.9758241468605657e-06,
      "loss": 0.0062,
      "step": 2152020
    },
    {
      "epoch": 3.521860659976565,
      "grad_norm": 0.12676531076431274,
      "learning_rate": 2.975758254647049e-06,
      "loss": 0.0103,
      "step": 2152040
    },
    {
      "epoch": 3.5218933904152183,
      "grad_norm": 0.4426848888397217,
      "learning_rate": 2.9756923624335316e-06,
      "loss": 0.0126,
      "step": 2152060
    },
    {
      "epoch": 3.5219261208538715,
      "grad_norm": 0.48715025186538696,
      "learning_rate": 2.9756264702200143e-06,
      "loss": 0.0141,
      "step": 2152080
    },
    {
      "epoch": 3.521958851292525,
      "grad_norm": 0.4483575224876404,
      "learning_rate": 2.975560578006497e-06,
      "loss": 0.0098,
      "step": 2152100
    },
    {
      "epoch": 3.521991581731178,
      "grad_norm": 0.19345414638519287,
      "learning_rate": 2.97549468579298e-06,
      "loss": 0.0138,
      "step": 2152120
    },
    {
      "epoch": 3.522024312169832,
      "grad_norm": 0.27548444271087646,
      "learning_rate": 2.975428793579463e-06,
      "loss": 0.0108,
      "step": 2152140
    },
    {
      "epoch": 3.522057042608485,
      "grad_norm": 0.19039182364940643,
      "learning_rate": 2.9753629013659457e-06,
      "loss": 0.0193,
      "step": 2152160
    },
    {
      "epoch": 3.5220897730471386,
      "grad_norm": 0.4646039307117462,
      "learning_rate": 2.9752970091524284e-06,
      "loss": 0.0174,
      "step": 2152180
    },
    {
      "epoch": 3.5221225034857917,
      "grad_norm": 0.18004195392131805,
      "learning_rate": 2.9752311169389116e-06,
      "loss": 0.0139,
      "step": 2152200
    },
    {
      "epoch": 3.522155233924445,
      "grad_norm": 0.2883720397949219,
      "learning_rate": 2.9751652247253943e-06,
      "loss": 0.0096,
      "step": 2152220
    },
    {
      "epoch": 3.5221879643630984,
      "grad_norm": 0.3130882680416107,
      "learning_rate": 2.975099332511877e-06,
      "loss": 0.0074,
      "step": 2152240
    },
    {
      "epoch": 3.5222206948017516,
      "grad_norm": 0.3547818064689636,
      "learning_rate": 2.97503344029836e-06,
      "loss": 0.009,
      "step": 2152260
    },
    {
      "epoch": 3.522253425240405,
      "grad_norm": 0.4228377044200897,
      "learning_rate": 2.974967548084843e-06,
      "loss": 0.0049,
      "step": 2152280
    },
    {
      "epoch": 3.5222861556790583,
      "grad_norm": 0.1840830147266388,
      "learning_rate": 2.974901655871326e-06,
      "loss": 0.0137,
      "step": 2152300
    },
    {
      "epoch": 3.522318886117712,
      "grad_norm": 0.7544933557510376,
      "learning_rate": 2.974835763657809e-06,
      "loss": 0.0144,
      "step": 2152320
    },
    {
      "epoch": 3.522351616556365,
      "grad_norm": 0.4175681471824646,
      "learning_rate": 2.974769871444292e-06,
      "loss": 0.0115,
      "step": 2152340
    },
    {
      "epoch": 3.5223843469950182,
      "grad_norm": 0.2789614498615265,
      "learning_rate": 2.9747039792307748e-06,
      "loss": 0.0131,
      "step": 2152360
    },
    {
      "epoch": 3.522417077433672,
      "grad_norm": 0.031034676358103752,
      "learning_rate": 2.9746380870172575e-06,
      "loss": 0.0108,
      "step": 2152380
    },
    {
      "epoch": 3.522449807872325,
      "grad_norm": 0.2778109014034271,
      "learning_rate": 2.9745721948037403e-06,
      "loss": 0.0095,
      "step": 2152400
    },
    {
      "epoch": 3.5224825383109786,
      "grad_norm": 0.27725499868392944,
      "learning_rate": 2.9745063025902234e-06,
      "loss": 0.0102,
      "step": 2152420
    },
    {
      "epoch": 3.5225152687496317,
      "grad_norm": 0.1164071187376976,
      "learning_rate": 2.974440410376706e-06,
      "loss": 0.0114,
      "step": 2152440
    },
    {
      "epoch": 3.5225479991882853,
      "grad_norm": 0.6638652682304382,
      "learning_rate": 2.974374518163189e-06,
      "loss": 0.0092,
      "step": 2152460
    },
    {
      "epoch": 3.5225807296269385,
      "grad_norm": 0.4707924723625183,
      "learning_rate": 2.9743086259496716e-06,
      "loss": 0.0112,
      "step": 2152480
    },
    {
      "epoch": 3.5226134600655916,
      "grad_norm": 0.25789695978164673,
      "learning_rate": 2.9742427337361544e-06,
      "loss": 0.0112,
      "step": 2152500
    },
    {
      "epoch": 3.522646190504245,
      "grad_norm": 0.29746997356414795,
      "learning_rate": 2.9741768415226375e-06,
      "loss": 0.0116,
      "step": 2152520
    },
    {
      "epoch": 3.5226789209428984,
      "grad_norm": 0.16014596819877625,
      "learning_rate": 2.9741109493091203e-06,
      "loss": 0.0115,
      "step": 2152540
    },
    {
      "epoch": 3.522711651381552,
      "grad_norm": 0.17946410179138184,
      "learning_rate": 2.974045057095603e-06,
      "loss": 0.0123,
      "step": 2152560
    },
    {
      "epoch": 3.522744381820205,
      "grad_norm": 0.07478231191635132,
      "learning_rate": 2.9739791648820858e-06,
      "loss": 0.0096,
      "step": 2152580
    },
    {
      "epoch": 3.5227771122588587,
      "grad_norm": 0.44460636377334595,
      "learning_rate": 2.973913272668569e-06,
      "loss": 0.0104,
      "step": 2152600
    },
    {
      "epoch": 3.522809842697512,
      "grad_norm": 0.22703737020492554,
      "learning_rate": 2.9738473804550517e-06,
      "loss": 0.0145,
      "step": 2152620
    },
    {
      "epoch": 3.522842573136165,
      "grad_norm": 3.38460111618042,
      "learning_rate": 2.973781488241535e-06,
      "loss": 0.0085,
      "step": 2152640
    },
    {
      "epoch": 3.5228753035748186,
      "grad_norm": 0.2224385291337967,
      "learning_rate": 2.973715596028018e-06,
      "loss": 0.008,
      "step": 2152660
    },
    {
      "epoch": 3.5229080340134717,
      "grad_norm": 0.14848455786705017,
      "learning_rate": 2.9736497038145007e-06,
      "loss": 0.0103,
      "step": 2152680
    },
    {
      "epoch": 3.5229407644521253,
      "grad_norm": 0.5566781759262085,
      "learning_rate": 2.9735838116009835e-06,
      "loss": 0.0076,
      "step": 2152700
    },
    {
      "epoch": 3.5229734948907785,
      "grad_norm": 0.378292977809906,
      "learning_rate": 2.973517919387466e-06,
      "loss": 0.0127,
      "step": 2152720
    },
    {
      "epoch": 3.523006225329432,
      "grad_norm": 0.42098304629325867,
      "learning_rate": 2.9734520271739494e-06,
      "loss": 0.0083,
      "step": 2152740
    },
    {
      "epoch": 3.5230389557680852,
      "grad_norm": 0.6701570153236389,
      "learning_rate": 2.973386134960432e-06,
      "loss": 0.0182,
      "step": 2152760
    },
    {
      "epoch": 3.5230716862067384,
      "grad_norm": 0.09908188134431839,
      "learning_rate": 2.973320242746915e-06,
      "loss": 0.0081,
      "step": 2152780
    },
    {
      "epoch": 3.523104416645392,
      "grad_norm": 0.20566719770431519,
      "learning_rate": 2.9732543505333976e-06,
      "loss": 0.0075,
      "step": 2152800
    },
    {
      "epoch": 3.523137147084045,
      "grad_norm": 1.358921766281128,
      "learning_rate": 2.9731884583198807e-06,
      "loss": 0.0107,
      "step": 2152820
    },
    {
      "epoch": 3.5231698775226987,
      "grad_norm": 0.43429070711135864,
      "learning_rate": 2.9731225661063635e-06,
      "loss": 0.0102,
      "step": 2152840
    },
    {
      "epoch": 3.523202607961352,
      "grad_norm": 1.2703516483306885,
      "learning_rate": 2.9730566738928462e-06,
      "loss": 0.0113,
      "step": 2152860
    },
    {
      "epoch": 3.5232353384000055,
      "grad_norm": 0.5905144214630127,
      "learning_rate": 2.972990781679329e-06,
      "loss": 0.0097,
      "step": 2152880
    },
    {
      "epoch": 3.5232680688386586,
      "grad_norm": 0.24536080658435822,
      "learning_rate": 2.972924889465812e-06,
      "loss": 0.0101,
      "step": 2152900
    },
    {
      "epoch": 3.5233007992773118,
      "grad_norm": 0.27082860469818115,
      "learning_rate": 2.972858997252295e-06,
      "loss": 0.0114,
      "step": 2152920
    },
    {
      "epoch": 3.5233335297159654,
      "grad_norm": 0.18114373087882996,
      "learning_rate": 2.9727931050387776e-06,
      "loss": 0.0099,
      "step": 2152940
    },
    {
      "epoch": 3.5233662601546185,
      "grad_norm": 0.19334207475185394,
      "learning_rate": 2.9727272128252604e-06,
      "loss": 0.0057,
      "step": 2152960
    },
    {
      "epoch": 3.5233989905932717,
      "grad_norm": 0.4635201096534729,
      "learning_rate": 2.972661320611743e-06,
      "loss": 0.0082,
      "step": 2152980
    },
    {
      "epoch": 3.5234317210319253,
      "grad_norm": 0.23540613055229187,
      "learning_rate": 2.9725954283982267e-06,
      "loss": 0.0068,
      "step": 2153000
    },
    {
      "epoch": 3.523464451470579,
      "grad_norm": 0.4093325138092041,
      "learning_rate": 2.9725295361847094e-06,
      "loss": 0.0097,
      "step": 2153020
    },
    {
      "epoch": 3.523497181909232,
      "grad_norm": 0.2366742342710495,
      "learning_rate": 2.972463643971192e-06,
      "loss": 0.0137,
      "step": 2153040
    },
    {
      "epoch": 3.523529912347885,
      "grad_norm": 0.4651452898979187,
      "learning_rate": 2.9723977517576753e-06,
      "loss": 0.0134,
      "step": 2153060
    },
    {
      "epoch": 3.5235626427865387,
      "grad_norm": 0.37309280037879944,
      "learning_rate": 2.972331859544158e-06,
      "loss": 0.0065,
      "step": 2153080
    },
    {
      "epoch": 3.523595373225192,
      "grad_norm": 0.3053973913192749,
      "learning_rate": 2.972265967330641e-06,
      "loss": 0.0082,
      "step": 2153100
    },
    {
      "epoch": 3.523628103663845,
      "grad_norm": 0.382621705532074,
      "learning_rate": 2.9722000751171235e-06,
      "loss": 0.0078,
      "step": 2153120
    },
    {
      "epoch": 3.5236608341024986,
      "grad_norm": 0.11148125678300858,
      "learning_rate": 2.9721341829036067e-06,
      "loss": 0.0175,
      "step": 2153140
    },
    {
      "epoch": 3.5236935645411522,
      "grad_norm": 0.5036365985870361,
      "learning_rate": 2.9720682906900894e-06,
      "loss": 0.0113,
      "step": 2153160
    },
    {
      "epoch": 3.5237262949798054,
      "grad_norm": 0.396907776594162,
      "learning_rate": 2.972002398476572e-06,
      "loss": 0.0117,
      "step": 2153180
    },
    {
      "epoch": 3.5237590254184585,
      "grad_norm": 0.7216289043426514,
      "learning_rate": 2.971936506263055e-06,
      "loss": 0.0124,
      "step": 2153200
    },
    {
      "epoch": 3.523791755857112,
      "grad_norm": 0.5153988003730774,
      "learning_rate": 2.971870614049538e-06,
      "loss": 0.011,
      "step": 2153220
    },
    {
      "epoch": 3.5238244862957653,
      "grad_norm": 0.15532994270324707,
      "learning_rate": 2.971804721836021e-06,
      "loss": 0.0096,
      "step": 2153240
    },
    {
      "epoch": 3.5238572167344184,
      "grad_norm": 0.1506938636302948,
      "learning_rate": 2.9717388296225036e-06,
      "loss": 0.0109,
      "step": 2153260
    },
    {
      "epoch": 3.523889947173072,
      "grad_norm": 0.03168464079499245,
      "learning_rate": 2.9716729374089863e-06,
      "loss": 0.009,
      "step": 2153280
    },
    {
      "epoch": 3.5239226776117256,
      "grad_norm": 0.22589902579784393,
      "learning_rate": 2.9716070451954695e-06,
      "loss": 0.0107,
      "step": 2153300
    },
    {
      "epoch": 3.5239554080503788,
      "grad_norm": 0.5417128801345825,
      "learning_rate": 2.971541152981952e-06,
      "loss": 0.0087,
      "step": 2153320
    },
    {
      "epoch": 3.523988138489032,
      "grad_norm": 0.1405266523361206,
      "learning_rate": 2.9714752607684354e-06,
      "loss": 0.0119,
      "step": 2153340
    },
    {
      "epoch": 3.5240208689276855,
      "grad_norm": 0.12424768507480621,
      "learning_rate": 2.9714093685549185e-06,
      "loss": 0.0081,
      "step": 2153360
    },
    {
      "epoch": 3.5240535993663387,
      "grad_norm": 0.16191469132900238,
      "learning_rate": 2.9713434763414013e-06,
      "loss": 0.0172,
      "step": 2153380
    },
    {
      "epoch": 3.524086329804992,
      "grad_norm": 0.2217177003622055,
      "learning_rate": 2.971277584127884e-06,
      "loss": 0.0255,
      "step": 2153400
    },
    {
      "epoch": 3.5241190602436454,
      "grad_norm": 0.21773582696914673,
      "learning_rate": 2.9712116919143668e-06,
      "loss": 0.0084,
      "step": 2153420
    },
    {
      "epoch": 3.524151790682299,
      "grad_norm": 0.2444482147693634,
      "learning_rate": 2.97114579970085e-06,
      "loss": 0.0124,
      "step": 2153440
    },
    {
      "epoch": 3.524184521120952,
      "grad_norm": 0.3015195429325104,
      "learning_rate": 2.9710799074873327e-06,
      "loss": 0.0135,
      "step": 2153460
    },
    {
      "epoch": 3.5242172515596053,
      "grad_norm": 0.11707239598035812,
      "learning_rate": 2.9710140152738154e-06,
      "loss": 0.0127,
      "step": 2153480
    },
    {
      "epoch": 3.524249981998259,
      "grad_norm": 0.2796218693256378,
      "learning_rate": 2.970948123060298e-06,
      "loss": 0.0049,
      "step": 2153500
    },
    {
      "epoch": 3.524282712436912,
      "grad_norm": 0.2537477910518646,
      "learning_rate": 2.970882230846781e-06,
      "loss": 0.0107,
      "step": 2153520
    },
    {
      "epoch": 3.524315442875565,
      "grad_norm": 0.09715314209461212,
      "learning_rate": 2.970816338633264e-06,
      "loss": 0.0133,
      "step": 2153540
    },
    {
      "epoch": 3.524348173314219,
      "grad_norm": 0.053252942860126495,
      "learning_rate": 2.9707504464197468e-06,
      "loss": 0.0095,
      "step": 2153560
    },
    {
      "epoch": 3.524380903752872,
      "grad_norm": 0.14100699126720428,
      "learning_rate": 2.9706845542062295e-06,
      "loss": 0.0141,
      "step": 2153580
    },
    {
      "epoch": 3.5244136341915255,
      "grad_norm": 0.4835285246372223,
      "learning_rate": 2.9706186619927123e-06,
      "loss": 0.0089,
      "step": 2153600
    },
    {
      "epoch": 3.5244463646301787,
      "grad_norm": 0.3275614380836487,
      "learning_rate": 2.9705527697791954e-06,
      "loss": 0.009,
      "step": 2153620
    },
    {
      "epoch": 3.5244790950688323,
      "grad_norm": 0.20409521460533142,
      "learning_rate": 2.970486877565678e-06,
      "loss": 0.0097,
      "step": 2153640
    },
    {
      "epoch": 3.5245118255074854,
      "grad_norm": 0.38569554686546326,
      "learning_rate": 2.970420985352161e-06,
      "loss": 0.0104,
      "step": 2153660
    },
    {
      "epoch": 3.5245445559461386,
      "grad_norm": 0.27574726939201355,
      "learning_rate": 2.9703550931386436e-06,
      "loss": 0.0129,
      "step": 2153680
    },
    {
      "epoch": 3.524577286384792,
      "grad_norm": 0.25355520844459534,
      "learning_rate": 2.9702892009251272e-06,
      "loss": 0.0119,
      "step": 2153700
    },
    {
      "epoch": 3.5246100168234453,
      "grad_norm": 0.3240658640861511,
      "learning_rate": 2.97022330871161e-06,
      "loss": 0.0115,
      "step": 2153720
    },
    {
      "epoch": 3.524642747262099,
      "grad_norm": 0.5947187542915344,
      "learning_rate": 2.9701574164980927e-06,
      "loss": 0.0116,
      "step": 2153740
    },
    {
      "epoch": 3.524675477700752,
      "grad_norm": 0.480814129114151,
      "learning_rate": 2.970091524284576e-06,
      "loss": 0.0157,
      "step": 2153760
    },
    {
      "epoch": 3.5247082081394057,
      "grad_norm": 0.5252787470817566,
      "learning_rate": 2.9700256320710586e-06,
      "loss": 0.0095,
      "step": 2153780
    },
    {
      "epoch": 3.524740938578059,
      "grad_norm": 0.15923073887825012,
      "learning_rate": 2.9699597398575414e-06,
      "loss": 0.0064,
      "step": 2153800
    },
    {
      "epoch": 3.524773669016712,
      "grad_norm": 0.2692868411540985,
      "learning_rate": 2.969893847644024e-06,
      "loss": 0.0107,
      "step": 2153820
    },
    {
      "epoch": 3.5248063994553656,
      "grad_norm": 0.09348027408123016,
      "learning_rate": 2.9698279554305073e-06,
      "loss": 0.0124,
      "step": 2153840
    },
    {
      "epoch": 3.5248391298940187,
      "grad_norm": 0.41269052028656006,
      "learning_rate": 2.96976206321699e-06,
      "loss": 0.0112,
      "step": 2153860
    },
    {
      "epoch": 3.5248718603326723,
      "grad_norm": 0.47948598861694336,
      "learning_rate": 2.9696961710034727e-06,
      "loss": 0.012,
      "step": 2153880
    },
    {
      "epoch": 3.5249045907713255,
      "grad_norm": 0.2160211056470871,
      "learning_rate": 2.9696302787899555e-06,
      "loss": 0.0161,
      "step": 2153900
    },
    {
      "epoch": 3.524937321209979,
      "grad_norm": 0.17827118933200836,
      "learning_rate": 2.9695643865764382e-06,
      "loss": 0.0132,
      "step": 2153920
    },
    {
      "epoch": 3.524970051648632,
      "grad_norm": 0.1956178992986679,
      "learning_rate": 2.9694984943629214e-06,
      "loss": 0.0136,
      "step": 2153940
    },
    {
      "epoch": 3.5250027820872853,
      "grad_norm": 0.2891954183578491,
      "learning_rate": 2.969432602149404e-06,
      "loss": 0.0118,
      "step": 2153960
    },
    {
      "epoch": 3.525035512525939,
      "grad_norm": 0.14534331858158112,
      "learning_rate": 2.969366709935887e-06,
      "loss": 0.0154,
      "step": 2153980
    },
    {
      "epoch": 3.525068242964592,
      "grad_norm": 0.3237200379371643,
      "learning_rate": 2.9693008177223696e-06,
      "loss": 0.0127,
      "step": 2154000
    },
    {
      "epoch": 3.5251009734032457,
      "grad_norm": 0.3279440999031067,
      "learning_rate": 2.9692349255088528e-06,
      "loss": 0.0094,
      "step": 2154020
    },
    {
      "epoch": 3.525133703841899,
      "grad_norm": 0.21276527643203735,
      "learning_rate": 2.9691690332953355e-06,
      "loss": 0.0095,
      "step": 2154040
    },
    {
      "epoch": 3.5251664342805524,
      "grad_norm": 0.7874869108200073,
      "learning_rate": 2.9691031410818187e-06,
      "loss": 0.0084,
      "step": 2154060
    },
    {
      "epoch": 3.5251991647192056,
      "grad_norm": 0.4356106221675873,
      "learning_rate": 2.969037248868302e-06,
      "loss": 0.0102,
      "step": 2154080
    },
    {
      "epoch": 3.5252318951578587,
      "grad_norm": 0.5607308149337769,
      "learning_rate": 2.9689713566547846e-06,
      "loss": 0.007,
      "step": 2154100
    },
    {
      "epoch": 3.5252646255965123,
      "grad_norm": 0.0945347249507904,
      "learning_rate": 2.9689054644412673e-06,
      "loss": 0.0123,
      "step": 2154120
    },
    {
      "epoch": 3.5252973560351655,
      "grad_norm": 0.36382603645324707,
      "learning_rate": 2.96883957222775e-06,
      "loss": 0.0114,
      "step": 2154140
    },
    {
      "epoch": 3.525330086473819,
      "grad_norm": 1.022947072982788,
      "learning_rate": 2.968773680014233e-06,
      "loss": 0.0146,
      "step": 2154160
    },
    {
      "epoch": 3.525362816912472,
      "grad_norm": 0.21428538858890533,
      "learning_rate": 2.968707787800716e-06,
      "loss": 0.0149,
      "step": 2154180
    },
    {
      "epoch": 3.525395547351126,
      "grad_norm": 0.088834747672081,
      "learning_rate": 2.9686418955871987e-06,
      "loss": 0.011,
      "step": 2154200
    },
    {
      "epoch": 3.525428277789779,
      "grad_norm": 0.15356166660785675,
      "learning_rate": 2.9685760033736814e-06,
      "loss": 0.0101,
      "step": 2154220
    },
    {
      "epoch": 3.525461008228432,
      "grad_norm": 0.23869600892066956,
      "learning_rate": 2.9685101111601646e-06,
      "loss": 0.0107,
      "step": 2154240
    },
    {
      "epoch": 3.5254937386670857,
      "grad_norm": 0.33075669407844543,
      "learning_rate": 2.9684442189466473e-06,
      "loss": 0.0133,
      "step": 2154260
    },
    {
      "epoch": 3.525526469105739,
      "grad_norm": 0.09669192135334015,
      "learning_rate": 2.96837832673313e-06,
      "loss": 0.0107,
      "step": 2154280
    },
    {
      "epoch": 3.5255591995443925,
      "grad_norm": 0.12467008084058762,
      "learning_rate": 2.968312434519613e-06,
      "loss": 0.0095,
      "step": 2154300
    },
    {
      "epoch": 3.5255919299830456,
      "grad_norm": 0.3849232792854309,
      "learning_rate": 2.968246542306096e-06,
      "loss": 0.0144,
      "step": 2154320
    },
    {
      "epoch": 3.525624660421699,
      "grad_norm": 0.22498202323913574,
      "learning_rate": 2.9681806500925787e-06,
      "loss": 0.0121,
      "step": 2154340
    },
    {
      "epoch": 3.5256573908603523,
      "grad_norm": 0.5972704887390137,
      "learning_rate": 2.9681147578790615e-06,
      "loss": 0.0104,
      "step": 2154360
    },
    {
      "epoch": 3.5256901212990055,
      "grad_norm": 0.4030172824859619,
      "learning_rate": 2.968048865665544e-06,
      "loss": 0.0073,
      "step": 2154380
    },
    {
      "epoch": 3.525722851737659,
      "grad_norm": 0.16908873617649078,
      "learning_rate": 2.9679829734520278e-06,
      "loss": 0.0119,
      "step": 2154400
    },
    {
      "epoch": 3.5257555821763122,
      "grad_norm": 0.5657287240028381,
      "learning_rate": 2.9679170812385105e-06,
      "loss": 0.016,
      "step": 2154420
    },
    {
      "epoch": 3.5257883126149654,
      "grad_norm": 0.34414273500442505,
      "learning_rate": 2.9678511890249933e-06,
      "loss": 0.0072,
      "step": 2154440
    },
    {
      "epoch": 3.525821043053619,
      "grad_norm": 0.17958638072013855,
      "learning_rate": 2.967785296811476e-06,
      "loss": 0.0089,
      "step": 2154460
    },
    {
      "epoch": 3.5258537734922726,
      "grad_norm": 0.7328304052352905,
      "learning_rate": 2.967719404597959e-06,
      "loss": 0.0104,
      "step": 2154480
    },
    {
      "epoch": 3.5258865039309257,
      "grad_norm": 0.12796662747859955,
      "learning_rate": 2.967653512384442e-06,
      "loss": 0.0101,
      "step": 2154500
    },
    {
      "epoch": 3.525919234369579,
      "grad_norm": 0.19402667880058289,
      "learning_rate": 2.9675876201709246e-06,
      "loss": 0.0095,
      "step": 2154520
    },
    {
      "epoch": 3.5259519648082325,
      "grad_norm": 0.3033478260040283,
      "learning_rate": 2.9675217279574074e-06,
      "loss": 0.0071,
      "step": 2154540
    },
    {
      "epoch": 3.5259846952468856,
      "grad_norm": 0.2098621279001236,
      "learning_rate": 2.9674558357438905e-06,
      "loss": 0.0106,
      "step": 2154560
    },
    {
      "epoch": 3.5260174256855388,
      "grad_norm": 0.3302750587463379,
      "learning_rate": 2.9673899435303733e-06,
      "loss": 0.0136,
      "step": 2154580
    },
    {
      "epoch": 3.5260501561241924,
      "grad_norm": 0.21401828527450562,
      "learning_rate": 2.967324051316856e-06,
      "loss": 0.0114,
      "step": 2154600
    },
    {
      "epoch": 3.526082886562846,
      "grad_norm": 0.2643318772315979,
      "learning_rate": 2.9672581591033388e-06,
      "loss": 0.0197,
      "step": 2154620
    },
    {
      "epoch": 3.526115617001499,
      "grad_norm": 0.2634263336658478,
      "learning_rate": 2.967192266889822e-06,
      "loss": 0.0083,
      "step": 2154640
    },
    {
      "epoch": 3.5261483474401523,
      "grad_norm": 0.15852339565753937,
      "learning_rate": 2.9671263746763047e-06,
      "loss": 0.0145,
      "step": 2154660
    },
    {
      "epoch": 3.526181077878806,
      "grad_norm": 0.4015437662601471,
      "learning_rate": 2.9670604824627874e-06,
      "loss": 0.0093,
      "step": 2154680
    },
    {
      "epoch": 3.526213808317459,
      "grad_norm": 2.7617533206939697,
      "learning_rate": 2.96699459024927e-06,
      "loss": 0.0105,
      "step": 2154700
    },
    {
      "epoch": 3.526246538756112,
      "grad_norm": 0.1649627536535263,
      "learning_rate": 2.9669286980357533e-06,
      "loss": 0.008,
      "step": 2154720
    },
    {
      "epoch": 3.5262792691947658,
      "grad_norm": 0.29093098640441895,
      "learning_rate": 2.966862805822236e-06,
      "loss": 0.0084,
      "step": 2154740
    },
    {
      "epoch": 3.5263119996334193,
      "grad_norm": 0.3862047791481018,
      "learning_rate": 2.966796913608719e-06,
      "loss": 0.0078,
      "step": 2154760
    },
    {
      "epoch": 3.5263447300720725,
      "grad_norm": 0.28783929347991943,
      "learning_rate": 2.9667310213952024e-06,
      "loss": 0.0159,
      "step": 2154780
    },
    {
      "epoch": 3.5263774605107256,
      "grad_norm": 0.4550752341747284,
      "learning_rate": 2.966665129181685e-06,
      "loss": 0.0095,
      "step": 2154800
    },
    {
      "epoch": 3.5264101909493792,
      "grad_norm": 0.10403917729854584,
      "learning_rate": 2.966599236968168e-06,
      "loss": 0.0126,
      "step": 2154820
    },
    {
      "epoch": 3.5264429213880324,
      "grad_norm": 0.16287320852279663,
      "learning_rate": 2.9665333447546506e-06,
      "loss": 0.0115,
      "step": 2154840
    },
    {
      "epoch": 3.5264756518266855,
      "grad_norm": 0.412627637386322,
      "learning_rate": 2.9664674525411338e-06,
      "loss": 0.0127,
      "step": 2154860
    },
    {
      "epoch": 3.526508382265339,
      "grad_norm": 0.5294751524925232,
      "learning_rate": 2.9664015603276165e-06,
      "loss": 0.0105,
      "step": 2154880
    },
    {
      "epoch": 3.5265411127039927,
      "grad_norm": 0.32602226734161377,
      "learning_rate": 2.9663356681140992e-06,
      "loss": 0.0084,
      "step": 2154900
    },
    {
      "epoch": 3.526573843142646,
      "grad_norm": 0.5364094376564026,
      "learning_rate": 2.966269775900582e-06,
      "loss": 0.0115,
      "step": 2154920
    },
    {
      "epoch": 3.526606573581299,
      "grad_norm": 0.07404765486717224,
      "learning_rate": 2.9662038836870647e-06,
      "loss": 0.0082,
      "step": 2154940
    },
    {
      "epoch": 3.5266393040199526,
      "grad_norm": 0.29351842403411865,
      "learning_rate": 2.966137991473548e-06,
      "loss": 0.0076,
      "step": 2154960
    },
    {
      "epoch": 3.5266720344586058,
      "grad_norm": 0.2749582529067993,
      "learning_rate": 2.9660720992600306e-06,
      "loss": 0.0151,
      "step": 2154980
    },
    {
      "epoch": 3.526704764897259,
      "grad_norm": 0.2100113332271576,
      "learning_rate": 2.9660062070465134e-06,
      "loss": 0.0101,
      "step": 2155000
    },
    {
      "epoch": 3.5267374953359125,
      "grad_norm": 0.5832861661911011,
      "learning_rate": 2.965940314832996e-06,
      "loss": 0.012,
      "step": 2155020
    },
    {
      "epoch": 3.5267702257745657,
      "grad_norm": 0.22853122651576996,
      "learning_rate": 2.9658744226194793e-06,
      "loss": 0.0087,
      "step": 2155040
    },
    {
      "epoch": 3.5268029562132193,
      "grad_norm": 0.16128094494342804,
      "learning_rate": 2.965808530405962e-06,
      "loss": 0.0095,
      "step": 2155060
    },
    {
      "epoch": 3.5268356866518724,
      "grad_norm": 0.1249055564403534,
      "learning_rate": 2.9657426381924447e-06,
      "loss": 0.0093,
      "step": 2155080
    },
    {
      "epoch": 3.526868417090526,
      "grad_norm": 0.5849021077156067,
      "learning_rate": 2.9656767459789283e-06,
      "loss": 0.0073,
      "step": 2155100
    },
    {
      "epoch": 3.526901147529179,
      "grad_norm": 0.20289815962314606,
      "learning_rate": 2.965610853765411e-06,
      "loss": 0.0085,
      "step": 2155120
    },
    {
      "epoch": 3.5269338779678323,
      "grad_norm": 0.053388915956020355,
      "learning_rate": 2.965544961551894e-06,
      "loss": 0.0108,
      "step": 2155140
    },
    {
      "epoch": 3.526966608406486,
      "grad_norm": 0.14606882631778717,
      "learning_rate": 2.9654790693383765e-06,
      "loss": 0.0087,
      "step": 2155160
    },
    {
      "epoch": 3.526999338845139,
      "grad_norm": 0.5869626998901367,
      "learning_rate": 2.9654131771248597e-06,
      "loss": 0.0116,
      "step": 2155180
    },
    {
      "epoch": 3.5270320692837926,
      "grad_norm": 0.9629647731781006,
      "learning_rate": 2.9653472849113424e-06,
      "loss": 0.0226,
      "step": 2155200
    },
    {
      "epoch": 3.527064799722446,
      "grad_norm": 0.3821388781070709,
      "learning_rate": 2.965281392697825e-06,
      "loss": 0.0086,
      "step": 2155220
    },
    {
      "epoch": 3.5270975301610994,
      "grad_norm": 0.09716817736625671,
      "learning_rate": 2.965215500484308e-06,
      "loss": 0.0158,
      "step": 2155240
    },
    {
      "epoch": 3.5271302605997525,
      "grad_norm": 0.06394997239112854,
      "learning_rate": 2.965149608270791e-06,
      "loss": 0.0109,
      "step": 2155260
    },
    {
      "epoch": 3.5271629910384057,
      "grad_norm": 0.18720649182796478,
      "learning_rate": 2.965083716057274e-06,
      "loss": 0.0097,
      "step": 2155280
    },
    {
      "epoch": 3.5271957214770593,
      "grad_norm": 0.15569379925727844,
      "learning_rate": 2.9650178238437566e-06,
      "loss": 0.0146,
      "step": 2155300
    },
    {
      "epoch": 3.5272284519157124,
      "grad_norm": 0.29554280638694763,
      "learning_rate": 2.9649519316302393e-06,
      "loss": 0.0138,
      "step": 2155320
    },
    {
      "epoch": 3.527261182354366,
      "grad_norm": 0.1689172238111496,
      "learning_rate": 2.964886039416722e-06,
      "loss": 0.0086,
      "step": 2155340
    },
    {
      "epoch": 3.527293912793019,
      "grad_norm": 0.1493505835533142,
      "learning_rate": 2.9648201472032052e-06,
      "loss": 0.0127,
      "step": 2155360
    },
    {
      "epoch": 3.5273266432316728,
      "grad_norm": 0.2470683455467224,
      "learning_rate": 2.964754254989688e-06,
      "loss": 0.0097,
      "step": 2155380
    },
    {
      "epoch": 3.527359373670326,
      "grad_norm": 0.12420617043972015,
      "learning_rate": 2.9646883627761707e-06,
      "loss": 0.0149,
      "step": 2155400
    },
    {
      "epoch": 3.527392104108979,
      "grad_norm": 0.2212727665901184,
      "learning_rate": 2.9646224705626534e-06,
      "loss": 0.0143,
      "step": 2155420
    },
    {
      "epoch": 3.5274248345476327,
      "grad_norm": 0.2722569406032562,
      "learning_rate": 2.9645565783491366e-06,
      "loss": 0.0106,
      "step": 2155440
    },
    {
      "epoch": 3.527457564986286,
      "grad_norm": 0.6455089449882507,
      "learning_rate": 2.9644906861356198e-06,
      "loss": 0.0105,
      "step": 2155460
    },
    {
      "epoch": 3.5274902954249394,
      "grad_norm": 0.10017559677362442,
      "learning_rate": 2.9644247939221025e-06,
      "loss": 0.0111,
      "step": 2155480
    },
    {
      "epoch": 3.5275230258635926,
      "grad_norm": 0.1366540640592575,
      "learning_rate": 2.9643589017085857e-06,
      "loss": 0.0084,
      "step": 2155500
    },
    {
      "epoch": 3.527555756302246,
      "grad_norm": 0.19827736914157867,
      "learning_rate": 2.9642930094950684e-06,
      "loss": 0.0136,
      "step": 2155520
    },
    {
      "epoch": 3.5275884867408993,
      "grad_norm": 0.051304951310157776,
      "learning_rate": 2.964227117281551e-06,
      "loss": 0.0077,
      "step": 2155540
    },
    {
      "epoch": 3.5276212171795525,
      "grad_norm": 0.47137150168418884,
      "learning_rate": 2.964161225068034e-06,
      "loss": 0.0113,
      "step": 2155560
    },
    {
      "epoch": 3.527653947618206,
      "grad_norm": 0.22947634756565094,
      "learning_rate": 2.964095332854517e-06,
      "loss": 0.0135,
      "step": 2155580
    },
    {
      "epoch": 3.527686678056859,
      "grad_norm": 0.75840163230896,
      "learning_rate": 2.9640294406409998e-06,
      "loss": 0.0156,
      "step": 2155600
    },
    {
      "epoch": 3.527719408495513,
      "grad_norm": 0.2747057378292084,
      "learning_rate": 2.9639635484274825e-06,
      "loss": 0.0114,
      "step": 2155620
    },
    {
      "epoch": 3.527752138934166,
      "grad_norm": 0.035467229783535004,
      "learning_rate": 2.9638976562139653e-06,
      "loss": 0.014,
      "step": 2155640
    },
    {
      "epoch": 3.5277848693728195,
      "grad_norm": 0.26289764046669006,
      "learning_rate": 2.9638317640004484e-06,
      "loss": 0.0126,
      "step": 2155660
    },
    {
      "epoch": 3.5278175998114727,
      "grad_norm": 0.49373576045036316,
      "learning_rate": 2.963765871786931e-06,
      "loss": 0.0145,
      "step": 2155680
    },
    {
      "epoch": 3.527850330250126,
      "grad_norm": 0.12223879992961884,
      "learning_rate": 2.963699979573414e-06,
      "loss": 0.0106,
      "step": 2155700
    },
    {
      "epoch": 3.5278830606887794,
      "grad_norm": 0.10613909363746643,
      "learning_rate": 2.9636340873598966e-06,
      "loss": 0.0124,
      "step": 2155720
    },
    {
      "epoch": 3.5279157911274326,
      "grad_norm": 0.21958473324775696,
      "learning_rate": 2.96356819514638e-06,
      "loss": 0.0175,
      "step": 2155740
    },
    {
      "epoch": 3.527948521566086,
      "grad_norm": 0.30663564801216125,
      "learning_rate": 2.9635023029328626e-06,
      "loss": 0.0088,
      "step": 2155760
    },
    {
      "epoch": 3.5279812520047393,
      "grad_norm": 0.18852423131465912,
      "learning_rate": 2.9634364107193453e-06,
      "loss": 0.0102,
      "step": 2155780
    },
    {
      "epoch": 3.528013982443393,
      "grad_norm": 0.2774801552295685,
      "learning_rate": 2.963370518505829e-06,
      "loss": 0.012,
      "step": 2155800
    },
    {
      "epoch": 3.528046712882046,
      "grad_norm": 0.11779630184173584,
      "learning_rate": 2.9633046262923116e-06,
      "loss": 0.0127,
      "step": 2155820
    },
    {
      "epoch": 3.5280794433206992,
      "grad_norm": 0.15339699387550354,
      "learning_rate": 2.9632387340787944e-06,
      "loss": 0.0127,
      "step": 2155840
    },
    {
      "epoch": 3.528112173759353,
      "grad_norm": 0.09070082753896713,
      "learning_rate": 2.963172841865277e-06,
      "loss": 0.0109,
      "step": 2155860
    },
    {
      "epoch": 3.528144904198006,
      "grad_norm": 0.542778730392456,
      "learning_rate": 2.96310694965176e-06,
      "loss": 0.007,
      "step": 2155880
    },
    {
      "epoch": 3.5281776346366596,
      "grad_norm": 0.25366631150245667,
      "learning_rate": 2.963041057438243e-06,
      "loss": 0.01,
      "step": 2155900
    },
    {
      "epoch": 3.5282103650753127,
      "grad_norm": 0.24278657138347626,
      "learning_rate": 2.9629751652247257e-06,
      "loss": 0.0126,
      "step": 2155920
    },
    {
      "epoch": 3.5282430955139663,
      "grad_norm": 0.7183590531349182,
      "learning_rate": 2.9629092730112085e-06,
      "loss": 0.0096,
      "step": 2155940
    },
    {
      "epoch": 3.5282758259526195,
      "grad_norm": 0.17550858855247498,
      "learning_rate": 2.9628433807976912e-06,
      "loss": 0.0082,
      "step": 2155960
    },
    {
      "epoch": 3.5283085563912726,
      "grad_norm": 0.4327453374862671,
      "learning_rate": 2.9627774885841744e-06,
      "loss": 0.0137,
      "step": 2155980
    },
    {
      "epoch": 3.528341286829926,
      "grad_norm": 0.4291399121284485,
      "learning_rate": 2.962711596370657e-06,
      "loss": 0.012,
      "step": 2156000
    },
    {
      "epoch": 3.5283740172685794,
      "grad_norm": 0.46325451135635376,
      "learning_rate": 2.96264570415714e-06,
      "loss": 0.0174,
      "step": 2156020
    },
    {
      "epoch": 3.5284067477072325,
      "grad_norm": 0.2624449133872986,
      "learning_rate": 2.9625798119436226e-06,
      "loss": 0.0132,
      "step": 2156040
    },
    {
      "epoch": 3.528439478145886,
      "grad_norm": 0.2102804034948349,
      "learning_rate": 2.9625139197301058e-06,
      "loss": 0.0109,
      "step": 2156060
    },
    {
      "epoch": 3.5284722085845397,
      "grad_norm": 0.15189653635025024,
      "learning_rate": 2.9624480275165885e-06,
      "loss": 0.0106,
      "step": 2156080
    },
    {
      "epoch": 3.528504939023193,
      "grad_norm": 0.3915312886238098,
      "learning_rate": 2.9623821353030712e-06,
      "loss": 0.0162,
      "step": 2156100
    },
    {
      "epoch": 3.528537669461846,
      "grad_norm": 0.25776833295822144,
      "learning_rate": 2.962316243089554e-06,
      "loss": 0.008,
      "step": 2156120
    },
    {
      "epoch": 3.5285703999004996,
      "grad_norm": 0.25176018476486206,
      "learning_rate": 2.962250350876037e-06,
      "loss": 0.0128,
      "step": 2156140
    },
    {
      "epoch": 3.5286031303391527,
      "grad_norm": 0.44466838240623474,
      "learning_rate": 2.9621844586625203e-06,
      "loss": 0.013,
      "step": 2156160
    },
    {
      "epoch": 3.528635860777806,
      "grad_norm": 0.16232162714004517,
      "learning_rate": 2.962118566449003e-06,
      "loss": 0.01,
      "step": 2156180
    },
    {
      "epoch": 3.5286685912164595,
      "grad_norm": 0.24533437192440033,
      "learning_rate": 2.9620526742354862e-06,
      "loss": 0.0103,
      "step": 2156200
    },
    {
      "epoch": 3.528701321655113,
      "grad_norm": 0.2467280924320221,
      "learning_rate": 2.961986782021969e-06,
      "loss": 0.0091,
      "step": 2156220
    },
    {
      "epoch": 3.5287340520937662,
      "grad_norm": 0.2568851113319397,
      "learning_rate": 2.9619208898084517e-06,
      "loss": 0.0089,
      "step": 2156240
    },
    {
      "epoch": 3.5287667825324194,
      "grad_norm": 0.1899036467075348,
      "learning_rate": 2.9618549975949344e-06,
      "loss": 0.012,
      "step": 2156260
    },
    {
      "epoch": 3.528799512971073,
      "grad_norm": 0.27915507555007935,
      "learning_rate": 2.9617891053814176e-06,
      "loss": 0.0065,
      "step": 2156280
    },
    {
      "epoch": 3.528832243409726,
      "grad_norm": 0.5599530339241028,
      "learning_rate": 2.9617232131679003e-06,
      "loss": 0.0147,
      "step": 2156300
    },
    {
      "epoch": 3.5288649738483793,
      "grad_norm": 0.6209218502044678,
      "learning_rate": 2.961657320954383e-06,
      "loss": 0.0092,
      "step": 2156320
    },
    {
      "epoch": 3.528897704287033,
      "grad_norm": 0.12798218429088593,
      "learning_rate": 2.961591428740866e-06,
      "loss": 0.013,
      "step": 2156340
    },
    {
      "epoch": 3.5289304347256865,
      "grad_norm": 0.21358008682727814,
      "learning_rate": 2.9615255365273486e-06,
      "loss": 0.0117,
      "step": 2156360
    },
    {
      "epoch": 3.5289631651643396,
      "grad_norm": 0.3663601875305176,
      "learning_rate": 2.9614596443138317e-06,
      "loss": 0.0121,
      "step": 2156380
    },
    {
      "epoch": 3.5289958956029928,
      "grad_norm": 0.07280754297971725,
      "learning_rate": 2.9613937521003145e-06,
      "loss": 0.0125,
      "step": 2156400
    },
    {
      "epoch": 3.5290286260416464,
      "grad_norm": 0.2094469517469406,
      "learning_rate": 2.961327859886797e-06,
      "loss": 0.0111,
      "step": 2156420
    },
    {
      "epoch": 3.5290613564802995,
      "grad_norm": 0.09641620516777039,
      "learning_rate": 2.96126196767328e-06,
      "loss": 0.0063,
      "step": 2156440
    },
    {
      "epoch": 3.5290940869189527,
      "grad_norm": 0.017890293151140213,
      "learning_rate": 2.961196075459763e-06,
      "loss": 0.0094,
      "step": 2156460
    },
    {
      "epoch": 3.5291268173576062,
      "grad_norm": 0.4610974192619324,
      "learning_rate": 2.961130183246246e-06,
      "loss": 0.01,
      "step": 2156480
    },
    {
      "epoch": 3.5291595477962594,
      "grad_norm": 0.19939114153385162,
      "learning_rate": 2.9610642910327286e-06,
      "loss": 0.0119,
      "step": 2156500
    },
    {
      "epoch": 3.529192278234913,
      "grad_norm": 0.22334463894367218,
      "learning_rate": 2.960998398819212e-06,
      "loss": 0.0099,
      "step": 2156520
    },
    {
      "epoch": 3.529225008673566,
      "grad_norm": 1.2421600818634033,
      "learning_rate": 2.960932506605695e-06,
      "loss": 0.0117,
      "step": 2156540
    },
    {
      "epoch": 3.5292577391122197,
      "grad_norm": 0.14174357056617737,
      "learning_rate": 2.9608666143921776e-06,
      "loss": 0.0087,
      "step": 2156560
    },
    {
      "epoch": 3.529290469550873,
      "grad_norm": 0.17186854779720306,
      "learning_rate": 2.9608007221786604e-06,
      "loss": 0.0113,
      "step": 2156580
    },
    {
      "epoch": 3.529323199989526,
      "grad_norm": 0.5103747248649597,
      "learning_rate": 2.9607348299651435e-06,
      "loss": 0.0105,
      "step": 2156600
    },
    {
      "epoch": 3.5293559304281796,
      "grad_norm": 0.5455628037452698,
      "learning_rate": 2.9606689377516263e-06,
      "loss": 0.0126,
      "step": 2156620
    },
    {
      "epoch": 3.529388660866833,
      "grad_norm": 0.12158055603504181,
      "learning_rate": 2.960603045538109e-06,
      "loss": 0.0145,
      "step": 2156640
    },
    {
      "epoch": 3.5294213913054864,
      "grad_norm": 0.597438633441925,
      "learning_rate": 2.9605371533245918e-06,
      "loss": 0.0118,
      "step": 2156660
    },
    {
      "epoch": 3.5294541217441395,
      "grad_norm": 0.09725803881883621,
      "learning_rate": 2.960471261111075e-06,
      "loss": 0.0085,
      "step": 2156680
    },
    {
      "epoch": 3.529486852182793,
      "grad_norm": 0.4905601143836975,
      "learning_rate": 2.9604053688975577e-06,
      "loss": 0.0081,
      "step": 2156700
    },
    {
      "epoch": 3.5295195826214463,
      "grad_norm": 0.15732501447200775,
      "learning_rate": 2.9603394766840404e-06,
      "loss": 0.0125,
      "step": 2156720
    },
    {
      "epoch": 3.5295523130600994,
      "grad_norm": 0.29910194873809814,
      "learning_rate": 2.960273584470523e-06,
      "loss": 0.0099,
      "step": 2156740
    },
    {
      "epoch": 3.529585043498753,
      "grad_norm": 0.47361287474632263,
      "learning_rate": 2.9602076922570063e-06,
      "loss": 0.0101,
      "step": 2156760
    },
    {
      "epoch": 3.529617773937406,
      "grad_norm": 0.3916926980018616,
      "learning_rate": 2.960141800043489e-06,
      "loss": 0.0102,
      "step": 2156780
    },
    {
      "epoch": 3.5296505043760598,
      "grad_norm": 0.2973780333995819,
      "learning_rate": 2.960075907829972e-06,
      "loss": 0.0102,
      "step": 2156800
    },
    {
      "epoch": 3.529683234814713,
      "grad_norm": 0.1916574090719223,
      "learning_rate": 2.9600100156164545e-06,
      "loss": 0.0103,
      "step": 2156820
    },
    {
      "epoch": 3.5297159652533665,
      "grad_norm": 0.0971873328089714,
      "learning_rate": 2.9599441234029373e-06,
      "loss": 0.0105,
      "step": 2156840
    },
    {
      "epoch": 3.5297486956920197,
      "grad_norm": 0.1428932398557663,
      "learning_rate": 2.959878231189421e-06,
      "loss": 0.0136,
      "step": 2156860
    },
    {
      "epoch": 3.529781426130673,
      "grad_norm": 0.2964600622653961,
      "learning_rate": 2.9598123389759036e-06,
      "loss": 0.007,
      "step": 2156880
    },
    {
      "epoch": 3.5298141565693264,
      "grad_norm": 0.13760405778884888,
      "learning_rate": 2.9597464467623863e-06,
      "loss": 0.0092,
      "step": 2156900
    },
    {
      "epoch": 3.5298468870079796,
      "grad_norm": 0.629137396812439,
      "learning_rate": 2.9596805545488695e-06,
      "loss": 0.0135,
      "step": 2156920
    },
    {
      "epoch": 3.529879617446633,
      "grad_norm": 0.14107052981853485,
      "learning_rate": 2.9596146623353522e-06,
      "loss": 0.0085,
      "step": 2156940
    },
    {
      "epoch": 3.5299123478852863,
      "grad_norm": 0.21710926294326782,
      "learning_rate": 2.959548770121835e-06,
      "loss": 0.0073,
      "step": 2156960
    },
    {
      "epoch": 3.52994507832394,
      "grad_norm": 0.08832337707281113,
      "learning_rate": 2.9594828779083177e-06,
      "loss": 0.011,
      "step": 2156980
    },
    {
      "epoch": 3.529977808762593,
      "grad_norm": 0.15108947455883026,
      "learning_rate": 2.959416985694801e-06,
      "loss": 0.0069,
      "step": 2157000
    },
    {
      "epoch": 3.530010539201246,
      "grad_norm": 0.5052706003189087,
      "learning_rate": 2.9593510934812836e-06,
      "loss": 0.0147,
      "step": 2157020
    },
    {
      "epoch": 3.5300432696399,
      "grad_norm": 0.18823616206645966,
      "learning_rate": 2.9592852012677664e-06,
      "loss": 0.0088,
      "step": 2157040
    },
    {
      "epoch": 3.530076000078553,
      "grad_norm": 0.13685250282287598,
      "learning_rate": 2.959219309054249e-06,
      "loss": 0.0101,
      "step": 2157060
    },
    {
      "epoch": 3.5301087305172065,
      "grad_norm": 0.4208211898803711,
      "learning_rate": 2.9591534168407323e-06,
      "loss": 0.0115,
      "step": 2157080
    },
    {
      "epoch": 3.5301414609558597,
      "grad_norm": 0.2703167200088501,
      "learning_rate": 2.959087524627215e-06,
      "loss": 0.0106,
      "step": 2157100
    },
    {
      "epoch": 3.5301741913945133,
      "grad_norm": 0.824393093585968,
      "learning_rate": 2.9590216324136977e-06,
      "loss": 0.0133,
      "step": 2157120
    },
    {
      "epoch": 3.5302069218331664,
      "grad_norm": 0.20357726514339447,
      "learning_rate": 2.9589557402001805e-06,
      "loss": 0.0097,
      "step": 2157140
    },
    {
      "epoch": 3.5302396522718196,
      "grad_norm": 0.46206408739089966,
      "learning_rate": 2.9588898479866637e-06,
      "loss": 0.0149,
      "step": 2157160
    },
    {
      "epoch": 3.530272382710473,
      "grad_norm": 0.37804725766181946,
      "learning_rate": 2.9588239557731464e-06,
      "loss": 0.0087,
      "step": 2157180
    },
    {
      "epoch": 3.5303051131491263,
      "grad_norm": 0.21156394481658936,
      "learning_rate": 2.958758063559629e-06,
      "loss": 0.0106,
      "step": 2157200
    },
    {
      "epoch": 3.53033784358778,
      "grad_norm": 0.08831440657377243,
      "learning_rate": 2.9586921713461127e-06,
      "loss": 0.0148,
      "step": 2157220
    },
    {
      "epoch": 3.530370574026433,
      "grad_norm": 0.09948268532752991,
      "learning_rate": 2.9586262791325955e-06,
      "loss": 0.0072,
      "step": 2157240
    },
    {
      "epoch": 3.5304033044650867,
      "grad_norm": 0.4836190640926361,
      "learning_rate": 2.958560386919078e-06,
      "loss": 0.0101,
      "step": 2157260
    },
    {
      "epoch": 3.53043603490374,
      "grad_norm": 0.34851253032684326,
      "learning_rate": 2.958494494705561e-06,
      "loss": 0.0109,
      "step": 2157280
    },
    {
      "epoch": 3.530468765342393,
      "grad_norm": 0.2593197524547577,
      "learning_rate": 2.958428602492044e-06,
      "loss": 0.0107,
      "step": 2157300
    },
    {
      "epoch": 3.5305014957810466,
      "grad_norm": 0.5485870242118835,
      "learning_rate": 2.958362710278527e-06,
      "loss": 0.012,
      "step": 2157320
    },
    {
      "epoch": 3.5305342262196997,
      "grad_norm": 0.09163429588079453,
      "learning_rate": 2.9582968180650096e-06,
      "loss": 0.0077,
      "step": 2157340
    },
    {
      "epoch": 3.5305669566583533,
      "grad_norm": 0.11956385523080826,
      "learning_rate": 2.9582309258514923e-06,
      "loss": 0.0122,
      "step": 2157360
    },
    {
      "epoch": 3.5305996870970064,
      "grad_norm": 0.405237078666687,
      "learning_rate": 2.958165033637975e-06,
      "loss": 0.0108,
      "step": 2157380
    },
    {
      "epoch": 3.53063241753566,
      "grad_norm": 0.31366464495658875,
      "learning_rate": 2.9580991414244582e-06,
      "loss": 0.007,
      "step": 2157400
    },
    {
      "epoch": 3.530665147974313,
      "grad_norm": 0.36928409337997437,
      "learning_rate": 2.958033249210941e-06,
      "loss": 0.0091,
      "step": 2157420
    },
    {
      "epoch": 3.5306978784129663,
      "grad_norm": 0.2744523286819458,
      "learning_rate": 2.9579673569974237e-06,
      "loss": 0.0087,
      "step": 2157440
    },
    {
      "epoch": 3.53073060885162,
      "grad_norm": 0.39789554476737976,
      "learning_rate": 2.9579014647839064e-06,
      "loss": 0.0118,
      "step": 2157460
    },
    {
      "epoch": 3.530763339290273,
      "grad_norm": 0.07325293868780136,
      "learning_rate": 2.9578355725703896e-06,
      "loss": 0.0147,
      "step": 2157480
    },
    {
      "epoch": 3.5307960697289262,
      "grad_norm": 0.37373170256614685,
      "learning_rate": 2.9577696803568723e-06,
      "loss": 0.0106,
      "step": 2157500
    },
    {
      "epoch": 3.53082880016758,
      "grad_norm": 0.5094059705734253,
      "learning_rate": 2.957703788143355e-06,
      "loss": 0.0108,
      "step": 2157520
    },
    {
      "epoch": 3.5308615306062334,
      "grad_norm": 0.4805213212966919,
      "learning_rate": 2.957637895929838e-06,
      "loss": 0.0126,
      "step": 2157540
    },
    {
      "epoch": 3.5308942610448866,
      "grad_norm": 0.3752288222312927,
      "learning_rate": 2.9575720037163214e-06,
      "loss": 0.0137,
      "step": 2157560
    },
    {
      "epoch": 3.5309269914835397,
      "grad_norm": 0.057597383856773376,
      "learning_rate": 2.957506111502804e-06,
      "loss": 0.0103,
      "step": 2157580
    },
    {
      "epoch": 3.5309597219221933,
      "grad_norm": 0.7727648019790649,
      "learning_rate": 2.957440219289287e-06,
      "loss": 0.0123,
      "step": 2157600
    },
    {
      "epoch": 3.5309924523608465,
      "grad_norm": 0.04527581110596657,
      "learning_rate": 2.95737432707577e-06,
      "loss": 0.0191,
      "step": 2157620
    },
    {
      "epoch": 3.5310251827994996,
      "grad_norm": 0.619579553604126,
      "learning_rate": 2.957308434862253e-06,
      "loss": 0.0145,
      "step": 2157640
    },
    {
      "epoch": 3.531057913238153,
      "grad_norm": 0.15283353626728058,
      "learning_rate": 2.9572425426487355e-06,
      "loss": 0.0096,
      "step": 2157660
    },
    {
      "epoch": 3.531090643676807,
      "grad_norm": 0.4157230257987976,
      "learning_rate": 2.9571766504352183e-06,
      "loss": 0.0104,
      "step": 2157680
    },
    {
      "epoch": 3.53112337411546,
      "grad_norm": 0.1316024512052536,
      "learning_rate": 2.9571107582217014e-06,
      "loss": 0.011,
      "step": 2157700
    },
    {
      "epoch": 3.531156104554113,
      "grad_norm": 0.1854255646467209,
      "learning_rate": 2.957044866008184e-06,
      "loss": 0.0091,
      "step": 2157720
    },
    {
      "epoch": 3.5311888349927667,
      "grad_norm": 1.0938420295715332,
      "learning_rate": 2.956978973794667e-06,
      "loss": 0.0151,
      "step": 2157740
    },
    {
      "epoch": 3.53122156543142,
      "grad_norm": 0.1872408539056778,
      "learning_rate": 2.9569130815811497e-06,
      "loss": 0.0099,
      "step": 2157760
    },
    {
      "epoch": 3.531254295870073,
      "grad_norm": 0.10394465178251266,
      "learning_rate": 2.9568471893676324e-06,
      "loss": 0.014,
      "step": 2157780
    },
    {
      "epoch": 3.5312870263087266,
      "grad_norm": 0.5636148452758789,
      "learning_rate": 2.9567812971541156e-06,
      "loss": 0.0149,
      "step": 2157800
    },
    {
      "epoch": 3.53131975674738,
      "grad_norm": 0.21246206760406494,
      "learning_rate": 2.9567154049405983e-06,
      "loss": 0.0119,
      "step": 2157820
    },
    {
      "epoch": 3.5313524871860333,
      "grad_norm": 0.20959658920764923,
      "learning_rate": 2.956649512727081e-06,
      "loss": 0.0101,
      "step": 2157840
    },
    {
      "epoch": 3.5313852176246865,
      "grad_norm": 0.11860805004835129,
      "learning_rate": 2.9565836205135638e-06,
      "loss": 0.0078,
      "step": 2157860
    },
    {
      "epoch": 3.53141794806334,
      "grad_norm": 0.21299482882022858,
      "learning_rate": 2.956517728300047e-06,
      "loss": 0.0058,
      "step": 2157880
    },
    {
      "epoch": 3.5314506785019932,
      "grad_norm": 0.5159655809402466,
      "learning_rate": 2.9564518360865297e-06,
      "loss": 0.0073,
      "step": 2157900
    },
    {
      "epoch": 3.5314834089406464,
      "grad_norm": 0.2226259410381317,
      "learning_rate": 2.956385943873013e-06,
      "loss": 0.0085,
      "step": 2157920
    },
    {
      "epoch": 3.5315161393793,
      "grad_norm": 0.5852950811386108,
      "learning_rate": 2.956320051659496e-06,
      "loss": 0.0107,
      "step": 2157940
    },
    {
      "epoch": 3.5315488698179536,
      "grad_norm": 0.5443528294563293,
      "learning_rate": 2.9562541594459787e-06,
      "loss": 0.0153,
      "step": 2157960
    },
    {
      "epoch": 3.5315816002566067,
      "grad_norm": 0.5014346837997437,
      "learning_rate": 2.9561882672324615e-06,
      "loss": 0.0109,
      "step": 2157980
    },
    {
      "epoch": 3.53161433069526,
      "grad_norm": 0.1667240709066391,
      "learning_rate": 2.9561223750189442e-06,
      "loss": 0.0126,
      "step": 2158000
    },
    {
      "epoch": 3.5316470611339135,
      "grad_norm": 0.2547721564769745,
      "learning_rate": 2.9560564828054274e-06,
      "loss": 0.0175,
      "step": 2158020
    },
    {
      "epoch": 3.5316797915725666,
      "grad_norm": 0.40812626481056213,
      "learning_rate": 2.95599059059191e-06,
      "loss": 0.0149,
      "step": 2158040
    },
    {
      "epoch": 3.5317125220112198,
      "grad_norm": 0.21310479938983917,
      "learning_rate": 2.955924698378393e-06,
      "loss": 0.0091,
      "step": 2158060
    },
    {
      "epoch": 3.5317452524498734,
      "grad_norm": 0.2634093761444092,
      "learning_rate": 2.9558588061648756e-06,
      "loss": 0.012,
      "step": 2158080
    },
    {
      "epoch": 3.5317779828885265,
      "grad_norm": 0.22274933755397797,
      "learning_rate": 2.9557929139513588e-06,
      "loss": 0.0099,
      "step": 2158100
    },
    {
      "epoch": 3.53181071332718,
      "grad_norm": 0.053404487669467926,
      "learning_rate": 2.9557270217378415e-06,
      "loss": 0.0081,
      "step": 2158120
    },
    {
      "epoch": 3.5318434437658333,
      "grad_norm": 0.13013984262943268,
      "learning_rate": 2.9556611295243243e-06,
      "loss": 0.0069,
      "step": 2158140
    },
    {
      "epoch": 3.531876174204487,
      "grad_norm": 0.31449297070503235,
      "learning_rate": 2.955595237310807e-06,
      "loss": 0.0121,
      "step": 2158160
    },
    {
      "epoch": 3.53190890464314,
      "grad_norm": 0.5623519420623779,
      "learning_rate": 2.95552934509729e-06,
      "loss": 0.0104,
      "step": 2158180
    },
    {
      "epoch": 3.531941635081793,
      "grad_norm": 0.15425564348697662,
      "learning_rate": 2.955463452883773e-06,
      "loss": 0.0116,
      "step": 2158200
    },
    {
      "epoch": 3.5319743655204467,
      "grad_norm": 0.44709914922714233,
      "learning_rate": 2.9553975606702556e-06,
      "loss": 0.0124,
      "step": 2158220
    },
    {
      "epoch": 3.5320070959591,
      "grad_norm": 0.15903332829475403,
      "learning_rate": 2.9553316684567384e-06,
      "loss": 0.009,
      "step": 2158240
    },
    {
      "epoch": 3.5320398263977535,
      "grad_norm": 0.391797810792923,
      "learning_rate": 2.955265776243221e-06,
      "loss": 0.0086,
      "step": 2158260
    },
    {
      "epoch": 3.5320725568364066,
      "grad_norm": 0.11348290741443634,
      "learning_rate": 2.9551998840297047e-06,
      "loss": 0.0088,
      "step": 2158280
    },
    {
      "epoch": 3.5321052872750602,
      "grad_norm": 0.5898665189743042,
      "learning_rate": 2.9551339918161874e-06,
      "loss": 0.0075,
      "step": 2158300
    },
    {
      "epoch": 3.5321380177137134,
      "grad_norm": 0.18982096016407013,
      "learning_rate": 2.95506809960267e-06,
      "loss": 0.0133,
      "step": 2158320
    },
    {
      "epoch": 3.5321707481523665,
      "grad_norm": 0.6533415913581848,
      "learning_rate": 2.9550022073891533e-06,
      "loss": 0.0147,
      "step": 2158340
    },
    {
      "epoch": 3.53220347859102,
      "grad_norm": 0.4177711308002472,
      "learning_rate": 2.954936315175636e-06,
      "loss": 0.0091,
      "step": 2158360
    },
    {
      "epoch": 3.5322362090296733,
      "grad_norm": 0.2592417299747467,
      "learning_rate": 2.954870422962119e-06,
      "loss": 0.0121,
      "step": 2158380
    },
    {
      "epoch": 3.532268939468327,
      "grad_norm": 0.12416788190603256,
      "learning_rate": 2.9548045307486016e-06,
      "loss": 0.0116,
      "step": 2158400
    },
    {
      "epoch": 3.53230166990698,
      "grad_norm": 0.1356154978275299,
      "learning_rate": 2.9547386385350847e-06,
      "loss": 0.0149,
      "step": 2158420
    },
    {
      "epoch": 3.5323344003456336,
      "grad_norm": 0.3244130611419678,
      "learning_rate": 2.9546727463215675e-06,
      "loss": 0.0102,
      "step": 2158440
    },
    {
      "epoch": 3.5323671307842868,
      "grad_norm": 0.07869646698236465,
      "learning_rate": 2.95460685410805e-06,
      "loss": 0.0091,
      "step": 2158460
    },
    {
      "epoch": 3.53239986122294,
      "grad_norm": 0.2659110128879547,
      "learning_rate": 2.954540961894533e-06,
      "loss": 0.0088,
      "step": 2158480
    },
    {
      "epoch": 3.5324325916615935,
      "grad_norm": 0.20575501024723053,
      "learning_rate": 2.954475069681016e-06,
      "loss": 0.014,
      "step": 2158500
    },
    {
      "epoch": 3.5324653221002467,
      "grad_norm": 0.6552762985229492,
      "learning_rate": 2.954409177467499e-06,
      "loss": 0.0089,
      "step": 2158520
    },
    {
      "epoch": 3.5324980525389003,
      "grad_norm": 0.23175832629203796,
      "learning_rate": 2.9543432852539816e-06,
      "loss": 0.0115,
      "step": 2158540
    },
    {
      "epoch": 3.5325307829775534,
      "grad_norm": 0.11250744014978409,
      "learning_rate": 2.9542773930404643e-06,
      "loss": 0.0119,
      "step": 2158560
    },
    {
      "epoch": 3.532563513416207,
      "grad_norm": 0.32743752002716064,
      "learning_rate": 2.9542115008269475e-06,
      "loss": 0.0079,
      "step": 2158580
    },
    {
      "epoch": 3.53259624385486,
      "grad_norm": 0.506473958492279,
      "learning_rate": 2.9541456086134302e-06,
      "loss": 0.0086,
      "step": 2158600
    },
    {
      "epoch": 3.5326289742935133,
      "grad_norm": 0.7296232581138611,
      "learning_rate": 2.9540797163999134e-06,
      "loss": 0.0098,
      "step": 2158620
    },
    {
      "epoch": 3.532661704732167,
      "grad_norm": 0.7313551902770996,
      "learning_rate": 2.9540138241863966e-06,
      "loss": 0.0111,
      "step": 2158640
    },
    {
      "epoch": 3.53269443517082,
      "grad_norm": 0.14973464608192444,
      "learning_rate": 2.9539479319728793e-06,
      "loss": 0.015,
      "step": 2158660
    },
    {
      "epoch": 3.5327271656094736,
      "grad_norm": 0.2427467256784439,
      "learning_rate": 2.953882039759362e-06,
      "loss": 0.0079,
      "step": 2158680
    },
    {
      "epoch": 3.532759896048127,
      "grad_norm": 0.16013754904270172,
      "learning_rate": 2.9538161475458448e-06,
      "loss": 0.0135,
      "step": 2158700
    },
    {
      "epoch": 3.5327926264867804,
      "grad_norm": 0.7304239273071289,
      "learning_rate": 2.953750255332328e-06,
      "loss": 0.0106,
      "step": 2158720
    },
    {
      "epoch": 3.5328253569254335,
      "grad_norm": 0.1717970371246338,
      "learning_rate": 2.9536843631188107e-06,
      "loss": 0.0098,
      "step": 2158740
    },
    {
      "epoch": 3.5328580873640867,
      "grad_norm": 0.14970363676548004,
      "learning_rate": 2.9536184709052934e-06,
      "loss": 0.0086,
      "step": 2158760
    },
    {
      "epoch": 3.5328908178027403,
      "grad_norm": 0.20075315237045288,
      "learning_rate": 2.953552578691776e-06,
      "loss": 0.0133,
      "step": 2158780
    },
    {
      "epoch": 3.5329235482413934,
      "grad_norm": 0.48228737711906433,
      "learning_rate": 2.953486686478259e-06,
      "loss": 0.0118,
      "step": 2158800
    },
    {
      "epoch": 3.532956278680047,
      "grad_norm": 0.2328859269618988,
      "learning_rate": 2.953420794264742e-06,
      "loss": 0.0121,
      "step": 2158820
    },
    {
      "epoch": 3.5329890091187,
      "grad_norm": 0.05936426669359207,
      "learning_rate": 2.953354902051225e-06,
      "loss": 0.0094,
      "step": 2158840
    },
    {
      "epoch": 3.5330217395573538,
      "grad_norm": 0.3084678649902344,
      "learning_rate": 2.9532890098377075e-06,
      "loss": 0.0128,
      "step": 2158860
    },
    {
      "epoch": 3.533054469996007,
      "grad_norm": 0.21254085004329681,
      "learning_rate": 2.9532231176241903e-06,
      "loss": 0.0109,
      "step": 2158880
    },
    {
      "epoch": 3.53308720043466,
      "grad_norm": 0.9603158235549927,
      "learning_rate": 2.9531572254106734e-06,
      "loss": 0.014,
      "step": 2158900
    },
    {
      "epoch": 3.5331199308733137,
      "grad_norm": 0.07509396970272064,
      "learning_rate": 2.953091333197156e-06,
      "loss": 0.013,
      "step": 2158920
    },
    {
      "epoch": 3.533152661311967,
      "grad_norm": 0.0898105651140213,
      "learning_rate": 2.953025440983639e-06,
      "loss": 0.0099,
      "step": 2158940
    },
    {
      "epoch": 3.5331853917506204,
      "grad_norm": 0.36899417638778687,
      "learning_rate": 2.9529595487701217e-06,
      "loss": 0.0091,
      "step": 2158960
    },
    {
      "epoch": 3.5332181221892736,
      "grad_norm": 0.32948440313339233,
      "learning_rate": 2.9528936565566052e-06,
      "loss": 0.0098,
      "step": 2158980
    },
    {
      "epoch": 3.533250852627927,
      "grad_norm": 0.12948806583881378,
      "learning_rate": 2.952827764343088e-06,
      "loss": 0.0093,
      "step": 2159000
    },
    {
      "epoch": 3.5332835830665803,
      "grad_norm": 0.19638769328594208,
      "learning_rate": 2.9527618721295707e-06,
      "loss": 0.0131,
      "step": 2159020
    },
    {
      "epoch": 3.5333163135052335,
      "grad_norm": 0.2954648733139038,
      "learning_rate": 2.952695979916054e-06,
      "loss": 0.01,
      "step": 2159040
    },
    {
      "epoch": 3.533349043943887,
      "grad_norm": 0.21616804599761963,
      "learning_rate": 2.9526300877025366e-06,
      "loss": 0.0139,
      "step": 2159060
    },
    {
      "epoch": 3.53338177438254,
      "grad_norm": 0.5349636077880859,
      "learning_rate": 2.9525641954890194e-06,
      "loss": 0.008,
      "step": 2159080
    },
    {
      "epoch": 3.5334145048211933,
      "grad_norm": 1.4727791547775269,
      "learning_rate": 2.952498303275502e-06,
      "loss": 0.0066,
      "step": 2159100
    },
    {
      "epoch": 3.533447235259847,
      "grad_norm": 0.20182283222675323,
      "learning_rate": 2.9524324110619853e-06,
      "loss": 0.0143,
      "step": 2159120
    },
    {
      "epoch": 3.5334799656985005,
      "grad_norm": 0.46233829855918884,
      "learning_rate": 2.952366518848468e-06,
      "loss": 0.0158,
      "step": 2159140
    },
    {
      "epoch": 3.5335126961371537,
      "grad_norm": 0.2932797372341156,
      "learning_rate": 2.9523006266349508e-06,
      "loss": 0.011,
      "step": 2159160
    },
    {
      "epoch": 3.533545426575807,
      "grad_norm": 0.19236096739768982,
      "learning_rate": 2.9522347344214335e-06,
      "loss": 0.0081,
      "step": 2159180
    },
    {
      "epoch": 3.5335781570144604,
      "grad_norm": 1.222283959388733,
      "learning_rate": 2.9521688422079162e-06,
      "loss": 0.0097,
      "step": 2159200
    },
    {
      "epoch": 3.5336108874531136,
      "grad_norm": 0.4526402950286865,
      "learning_rate": 2.9521029499943994e-06,
      "loss": 0.011,
      "step": 2159220
    },
    {
      "epoch": 3.5336436178917667,
      "grad_norm": 0.2460649162530899,
      "learning_rate": 2.952037057780882e-06,
      "loss": 0.0085,
      "step": 2159240
    },
    {
      "epoch": 3.5336763483304203,
      "grad_norm": 0.2228427231311798,
      "learning_rate": 2.951971165567365e-06,
      "loss": 0.0109,
      "step": 2159260
    },
    {
      "epoch": 3.533709078769074,
      "grad_norm": 0.07311329245567322,
      "learning_rate": 2.9519052733538476e-06,
      "loss": 0.0087,
      "step": 2159280
    },
    {
      "epoch": 3.533741809207727,
      "grad_norm": 0.09066776931285858,
      "learning_rate": 2.9518393811403308e-06,
      "loss": 0.0116,
      "step": 2159300
    },
    {
      "epoch": 3.53377453964638,
      "grad_norm": 0.1841774582862854,
      "learning_rate": 2.951773488926814e-06,
      "loss": 0.0139,
      "step": 2159320
    },
    {
      "epoch": 3.533807270085034,
      "grad_norm": 0.34780073165893555,
      "learning_rate": 2.9517075967132967e-06,
      "loss": 0.0138,
      "step": 2159340
    },
    {
      "epoch": 3.533840000523687,
      "grad_norm": 0.12542401254177094,
      "learning_rate": 2.95164170449978e-06,
      "loss": 0.0133,
      "step": 2159360
    },
    {
      "epoch": 3.53387273096234,
      "grad_norm": 0.14346861839294434,
      "learning_rate": 2.9515758122862626e-06,
      "loss": 0.0093,
      "step": 2159380
    },
    {
      "epoch": 3.5339054614009937,
      "grad_norm": 0.48535609245300293,
      "learning_rate": 2.9515099200727453e-06,
      "loss": 0.0137,
      "step": 2159400
    },
    {
      "epoch": 3.5339381918396473,
      "grad_norm": 0.23041442036628723,
      "learning_rate": 2.951444027859228e-06,
      "loss": 0.0132,
      "step": 2159420
    },
    {
      "epoch": 3.5339709222783005,
      "grad_norm": 0.1979944109916687,
      "learning_rate": 2.9513781356457112e-06,
      "loss": 0.0154,
      "step": 2159440
    },
    {
      "epoch": 3.5340036527169536,
      "grad_norm": 0.2684469521045685,
      "learning_rate": 2.951312243432194e-06,
      "loss": 0.0154,
      "step": 2159460
    },
    {
      "epoch": 3.534036383155607,
      "grad_norm": 0.20345526933670044,
      "learning_rate": 2.9512463512186767e-06,
      "loss": 0.0112,
      "step": 2159480
    },
    {
      "epoch": 3.5340691135942603,
      "grad_norm": 0.15615351498126984,
      "learning_rate": 2.9511804590051594e-06,
      "loss": 0.0111,
      "step": 2159500
    },
    {
      "epoch": 3.5341018440329135,
      "grad_norm": 0.2633734345436096,
      "learning_rate": 2.9511145667916426e-06,
      "loss": 0.0097,
      "step": 2159520
    },
    {
      "epoch": 3.534134574471567,
      "grad_norm": 0.24811679124832153,
      "learning_rate": 2.9510486745781254e-06,
      "loss": 0.0133,
      "step": 2159540
    },
    {
      "epoch": 3.5341673049102202,
      "grad_norm": 0.31230974197387695,
      "learning_rate": 2.950982782364608e-06,
      "loss": 0.0118,
      "step": 2159560
    },
    {
      "epoch": 3.534200035348874,
      "grad_norm": 0.8371397852897644,
      "learning_rate": 2.950916890151091e-06,
      "loss": 0.0082,
      "step": 2159580
    },
    {
      "epoch": 3.534232765787527,
      "grad_norm": 0.5966309309005737,
      "learning_rate": 2.950850997937574e-06,
      "loss": 0.0178,
      "step": 2159600
    },
    {
      "epoch": 3.5342654962261806,
      "grad_norm": 0.2713737189769745,
      "learning_rate": 2.9507851057240567e-06,
      "loss": 0.0122,
      "step": 2159620
    },
    {
      "epoch": 3.5342982266648337,
      "grad_norm": 0.11765482276678085,
      "learning_rate": 2.9507192135105395e-06,
      "loss": 0.0096,
      "step": 2159640
    },
    {
      "epoch": 3.534330957103487,
      "grad_norm": 0.7355289459228516,
      "learning_rate": 2.9506533212970222e-06,
      "loss": 0.01,
      "step": 2159660
    },
    {
      "epoch": 3.5343636875421405,
      "grad_norm": 0.5874385833740234,
      "learning_rate": 2.950587429083506e-06,
      "loss": 0.0121,
      "step": 2159680
    },
    {
      "epoch": 3.5343964179807936,
      "grad_norm": 0.6392146944999695,
      "learning_rate": 2.9505215368699885e-06,
      "loss": 0.0064,
      "step": 2159700
    },
    {
      "epoch": 3.534429148419447,
      "grad_norm": 0.6325556039810181,
      "learning_rate": 2.9504556446564713e-06,
      "loss": 0.0104,
      "step": 2159720
    },
    {
      "epoch": 3.5344618788581004,
      "grad_norm": 0.09813941270112991,
      "learning_rate": 2.950389752442954e-06,
      "loss": 0.0138,
      "step": 2159740
    },
    {
      "epoch": 3.534494609296754,
      "grad_norm": 0.10323914885520935,
      "learning_rate": 2.950323860229437e-06,
      "loss": 0.0163,
      "step": 2159760
    },
    {
      "epoch": 3.534527339735407,
      "grad_norm": 0.0860888808965683,
      "learning_rate": 2.95025796801592e-06,
      "loss": 0.01,
      "step": 2159780
    },
    {
      "epoch": 3.5345600701740603,
      "grad_norm": 0.23596279323101044,
      "learning_rate": 2.9501920758024027e-06,
      "loss": 0.01,
      "step": 2159800
    },
    {
      "epoch": 3.534592800612714,
      "grad_norm": 0.3433927595615387,
      "learning_rate": 2.9501261835888854e-06,
      "loss": 0.0124,
      "step": 2159820
    },
    {
      "epoch": 3.534625531051367,
      "grad_norm": 0.47218817472457886,
      "learning_rate": 2.9500602913753686e-06,
      "loss": 0.0117,
      "step": 2159840
    },
    {
      "epoch": 3.5346582614900206,
      "grad_norm": 0.9342382550239563,
      "learning_rate": 2.9499943991618513e-06,
      "loss": 0.0112,
      "step": 2159860
    },
    {
      "epoch": 3.5346909919286738,
      "grad_norm": 0.24464748799800873,
      "learning_rate": 2.949928506948334e-06,
      "loss": 0.0105,
      "step": 2159880
    },
    {
      "epoch": 3.5347237223673273,
      "grad_norm": 0.22013838589191437,
      "learning_rate": 2.9498626147348168e-06,
      "loss": 0.0095,
      "step": 2159900
    },
    {
      "epoch": 3.5347564528059805,
      "grad_norm": 0.26650646328926086,
      "learning_rate": 2.9497967225213e-06,
      "loss": 0.012,
      "step": 2159920
    },
    {
      "epoch": 3.5347891832446336,
      "grad_norm": 0.20808200538158417,
      "learning_rate": 2.9497308303077827e-06,
      "loss": 0.0136,
      "step": 2159940
    },
    {
      "epoch": 3.5348219136832872,
      "grad_norm": 0.09112589806318283,
      "learning_rate": 2.9496649380942654e-06,
      "loss": 0.0101,
      "step": 2159960
    },
    {
      "epoch": 3.5348546441219404,
      "grad_norm": 0.5914229154586792,
      "learning_rate": 2.949599045880748e-06,
      "loss": 0.0095,
      "step": 2159980
    },
    {
      "epoch": 3.534887374560594,
      "grad_norm": 0.4465485215187073,
      "learning_rate": 2.9495331536672313e-06,
      "loss": 0.0114,
      "step": 2160000
    },
    {
      "epoch": 3.534920104999247,
      "grad_norm": 0.17275790870189667,
      "learning_rate": 2.949467261453714e-06,
      "loss": 0.0134,
      "step": 2160020
    },
    {
      "epoch": 3.5349528354379007,
      "grad_norm": 0.39006975293159485,
      "learning_rate": 2.9494013692401972e-06,
      "loss": 0.0098,
      "step": 2160040
    },
    {
      "epoch": 3.534985565876554,
      "grad_norm": 0.20176459848880768,
      "learning_rate": 2.9493354770266804e-06,
      "loss": 0.0131,
      "step": 2160060
    },
    {
      "epoch": 3.535018296315207,
      "grad_norm": 0.22449958324432373,
      "learning_rate": 2.949269584813163e-06,
      "loss": 0.0115,
      "step": 2160080
    },
    {
      "epoch": 3.5350510267538606,
      "grad_norm": 0.21494029462337494,
      "learning_rate": 2.949203692599646e-06,
      "loss": 0.0097,
      "step": 2160100
    },
    {
      "epoch": 3.5350837571925138,
      "grad_norm": 0.20954591035842896,
      "learning_rate": 2.9491378003861286e-06,
      "loss": 0.012,
      "step": 2160120
    },
    {
      "epoch": 3.5351164876311674,
      "grad_norm": 0.23169712722301483,
      "learning_rate": 2.9490719081726118e-06,
      "loss": 0.01,
      "step": 2160140
    },
    {
      "epoch": 3.5351492180698205,
      "grad_norm": 0.15077266097068787,
      "learning_rate": 2.9490060159590945e-06,
      "loss": 0.0072,
      "step": 2160160
    },
    {
      "epoch": 3.535181948508474,
      "grad_norm": 0.13169322907924652,
      "learning_rate": 2.9489401237455773e-06,
      "loss": 0.0126,
      "step": 2160180
    },
    {
      "epoch": 3.5352146789471273,
      "grad_norm": 0.2276783436536789,
      "learning_rate": 2.94887423153206e-06,
      "loss": 0.0122,
      "step": 2160200
    },
    {
      "epoch": 3.5352474093857804,
      "grad_norm": 0.30301433801651,
      "learning_rate": 2.9488083393185427e-06,
      "loss": 0.0117,
      "step": 2160220
    },
    {
      "epoch": 3.535280139824434,
      "grad_norm": 0.9760857224464417,
      "learning_rate": 2.948742447105026e-06,
      "loss": 0.018,
      "step": 2160240
    },
    {
      "epoch": 3.535312870263087,
      "grad_norm": 0.4373168349266052,
      "learning_rate": 2.9486765548915086e-06,
      "loss": 0.0091,
      "step": 2160260
    },
    {
      "epoch": 3.5353456007017408,
      "grad_norm": 0.28466522693634033,
      "learning_rate": 2.9486106626779914e-06,
      "loss": 0.0149,
      "step": 2160280
    },
    {
      "epoch": 3.535378331140394,
      "grad_norm": 0.09175277501344681,
      "learning_rate": 2.948544770464474e-06,
      "loss": 0.0162,
      "step": 2160300
    },
    {
      "epoch": 3.5354110615790475,
      "grad_norm": 0.3158702552318573,
      "learning_rate": 2.9484788782509573e-06,
      "loss": 0.0097,
      "step": 2160320
    },
    {
      "epoch": 3.5354437920177006,
      "grad_norm": 0.2973730266094208,
      "learning_rate": 2.94841298603744e-06,
      "loss": 0.0076,
      "step": 2160340
    },
    {
      "epoch": 3.535476522456354,
      "grad_norm": 0.2527307868003845,
      "learning_rate": 2.9483470938239228e-06,
      "loss": 0.0094,
      "step": 2160360
    },
    {
      "epoch": 3.5355092528950074,
      "grad_norm": 0.4695644676685333,
      "learning_rate": 2.9482812016104063e-06,
      "loss": 0.0173,
      "step": 2160380
    },
    {
      "epoch": 3.5355419833336605,
      "grad_norm": 0.08314120769500732,
      "learning_rate": 2.948215309396889e-06,
      "loss": 0.0111,
      "step": 2160400
    },
    {
      "epoch": 3.535574713772314,
      "grad_norm": 0.10103760659694672,
      "learning_rate": 2.948149417183372e-06,
      "loss": 0.0112,
      "step": 2160420
    },
    {
      "epoch": 3.5356074442109673,
      "grad_norm": 0.16550995409488678,
      "learning_rate": 2.9480835249698546e-06,
      "loss": 0.0112,
      "step": 2160440
    },
    {
      "epoch": 3.535640174649621,
      "grad_norm": 0.3365439474582672,
      "learning_rate": 2.9480176327563377e-06,
      "loss": 0.0144,
      "step": 2160460
    },
    {
      "epoch": 3.535672905088274,
      "grad_norm": 0.27362996339797974,
      "learning_rate": 2.9479517405428205e-06,
      "loss": 0.0127,
      "step": 2160480
    },
    {
      "epoch": 3.535705635526927,
      "grad_norm": 0.2890423834323883,
      "learning_rate": 2.947885848329303e-06,
      "loss": 0.0093,
      "step": 2160500
    },
    {
      "epoch": 3.5357383659655808,
      "grad_norm": 0.32218050956726074,
      "learning_rate": 2.947819956115786e-06,
      "loss": 0.0124,
      "step": 2160520
    },
    {
      "epoch": 3.535771096404234,
      "grad_norm": 0.45218709111213684,
      "learning_rate": 2.947754063902269e-06,
      "loss": 0.0079,
      "step": 2160540
    },
    {
      "epoch": 3.535803826842887,
      "grad_norm": 0.3640141785144806,
      "learning_rate": 2.947688171688752e-06,
      "loss": 0.0115,
      "step": 2160560
    },
    {
      "epoch": 3.5358365572815407,
      "grad_norm": 0.06573048233985901,
      "learning_rate": 2.9476222794752346e-06,
      "loss": 0.0093,
      "step": 2160580
    },
    {
      "epoch": 3.5358692877201943,
      "grad_norm": 0.35387685894966125,
      "learning_rate": 2.9475563872617173e-06,
      "loss": 0.0081,
      "step": 2160600
    },
    {
      "epoch": 3.5359020181588474,
      "grad_norm": 0.27995267510414124,
      "learning_rate": 2.9474904950482e-06,
      "loss": 0.0186,
      "step": 2160620
    },
    {
      "epoch": 3.5359347485975006,
      "grad_norm": 0.25313952565193176,
      "learning_rate": 2.9474246028346832e-06,
      "loss": 0.0108,
      "step": 2160640
    },
    {
      "epoch": 3.535967479036154,
      "grad_norm": 0.14781102538108826,
      "learning_rate": 2.947358710621166e-06,
      "loss": 0.0135,
      "step": 2160660
    },
    {
      "epoch": 3.5360002094748073,
      "grad_norm": 0.15273135900497437,
      "learning_rate": 2.9472928184076487e-06,
      "loss": 0.0166,
      "step": 2160680
    },
    {
      "epoch": 3.5360329399134605,
      "grad_norm": 0.5276204347610474,
      "learning_rate": 2.9472269261941315e-06,
      "loss": 0.0154,
      "step": 2160700
    },
    {
      "epoch": 3.536065670352114,
      "grad_norm": 0.3006225526332855,
      "learning_rate": 2.9471610339806146e-06,
      "loss": 0.0126,
      "step": 2160720
    },
    {
      "epoch": 3.5360984007907676,
      "grad_norm": 0.28802114725112915,
      "learning_rate": 2.9470951417670978e-06,
      "loss": 0.0086,
      "step": 2160740
    },
    {
      "epoch": 3.536131131229421,
      "grad_norm": 0.38307395577430725,
      "learning_rate": 2.9470292495535805e-06,
      "loss": 0.0133,
      "step": 2160760
    },
    {
      "epoch": 3.536163861668074,
      "grad_norm": 0.4312678575515747,
      "learning_rate": 2.9469633573400637e-06,
      "loss": 0.0121,
      "step": 2160780
    },
    {
      "epoch": 3.5361965921067275,
      "grad_norm": 0.2592446208000183,
      "learning_rate": 2.9468974651265464e-06,
      "loss": 0.0124,
      "step": 2160800
    },
    {
      "epoch": 3.5362293225453807,
      "grad_norm": 0.3093714416027069,
      "learning_rate": 2.946831572913029e-06,
      "loss": 0.0135,
      "step": 2160820
    },
    {
      "epoch": 3.536262052984034,
      "grad_norm": 0.13981325924396515,
      "learning_rate": 2.946765680699512e-06,
      "loss": 0.0126,
      "step": 2160840
    },
    {
      "epoch": 3.5362947834226874,
      "grad_norm": 0.05622453987598419,
      "learning_rate": 2.946699788485995e-06,
      "loss": 0.0088,
      "step": 2160860
    },
    {
      "epoch": 3.536327513861341,
      "grad_norm": 0.25288906693458557,
      "learning_rate": 2.946633896272478e-06,
      "loss": 0.0117,
      "step": 2160880
    },
    {
      "epoch": 3.536360244299994,
      "grad_norm": 0.27047502994537354,
      "learning_rate": 2.9465680040589605e-06,
      "loss": 0.0115,
      "step": 2160900
    },
    {
      "epoch": 3.5363929747386473,
      "grad_norm": 0.129725381731987,
      "learning_rate": 2.9465021118454433e-06,
      "loss": 0.0099,
      "step": 2160920
    },
    {
      "epoch": 3.536425705177301,
      "grad_norm": 0.3650289475917816,
      "learning_rate": 2.9464362196319264e-06,
      "loss": 0.0118,
      "step": 2160940
    },
    {
      "epoch": 3.536458435615954,
      "grad_norm": 0.12043430656194687,
      "learning_rate": 2.946370327418409e-06,
      "loss": 0.0103,
      "step": 2160960
    },
    {
      "epoch": 3.5364911660546072,
      "grad_norm": 0.5195424556732178,
      "learning_rate": 2.946304435204892e-06,
      "loss": 0.0154,
      "step": 2160980
    },
    {
      "epoch": 3.536523896493261,
      "grad_norm": 0.22186189889907837,
      "learning_rate": 2.9462385429913747e-06,
      "loss": 0.0167,
      "step": 2161000
    },
    {
      "epoch": 3.5365566269319144,
      "grad_norm": 0.11508135497570038,
      "learning_rate": 2.946172650777858e-06,
      "loss": 0.0154,
      "step": 2161020
    },
    {
      "epoch": 3.5365893573705676,
      "grad_norm": 0.15187960863113403,
      "learning_rate": 2.9461067585643406e-06,
      "loss": 0.0126,
      "step": 2161040
    },
    {
      "epoch": 3.5366220878092207,
      "grad_norm": 0.27847546339035034,
      "learning_rate": 2.9460408663508233e-06,
      "loss": 0.0107,
      "step": 2161060
    },
    {
      "epoch": 3.5366548182478743,
      "grad_norm": 0.9325039386749268,
      "learning_rate": 2.945974974137307e-06,
      "loss": 0.0132,
      "step": 2161080
    },
    {
      "epoch": 3.5366875486865275,
      "grad_norm": 0.62267005443573,
      "learning_rate": 2.9459090819237896e-06,
      "loss": 0.012,
      "step": 2161100
    },
    {
      "epoch": 3.5367202791251806,
      "grad_norm": 0.4688984751701355,
      "learning_rate": 2.9458431897102724e-06,
      "loss": 0.0168,
      "step": 2161120
    },
    {
      "epoch": 3.536753009563834,
      "grad_norm": 0.25498896837234497,
      "learning_rate": 2.945777297496755e-06,
      "loss": 0.0087,
      "step": 2161140
    },
    {
      "epoch": 3.5367857400024874,
      "grad_norm": 0.27086618542671204,
      "learning_rate": 2.945711405283238e-06,
      "loss": 0.0154,
      "step": 2161160
    },
    {
      "epoch": 3.536818470441141,
      "grad_norm": 0.6358779668807983,
      "learning_rate": 2.945645513069721e-06,
      "loss": 0.0143,
      "step": 2161180
    },
    {
      "epoch": 3.536851200879794,
      "grad_norm": 0.5487383604049683,
      "learning_rate": 2.9455796208562038e-06,
      "loss": 0.0123,
      "step": 2161200
    },
    {
      "epoch": 3.5368839313184477,
      "grad_norm": 0.200990229845047,
      "learning_rate": 2.9455137286426865e-06,
      "loss": 0.0105,
      "step": 2161220
    },
    {
      "epoch": 3.536916661757101,
      "grad_norm": 0.08604512363672256,
      "learning_rate": 2.9454478364291692e-06,
      "loss": 0.0077,
      "step": 2161240
    },
    {
      "epoch": 3.536949392195754,
      "grad_norm": 0.212245374917984,
      "learning_rate": 2.9453819442156524e-06,
      "loss": 0.0102,
      "step": 2161260
    },
    {
      "epoch": 3.5369821226344076,
      "grad_norm": 0.5653665661811829,
      "learning_rate": 2.945316052002135e-06,
      "loss": 0.0113,
      "step": 2161280
    },
    {
      "epoch": 3.5370148530730607,
      "grad_norm": 0.20080143213272095,
      "learning_rate": 2.945250159788618e-06,
      "loss": 0.015,
      "step": 2161300
    },
    {
      "epoch": 3.5370475835117143,
      "grad_norm": 0.38031813502311707,
      "learning_rate": 2.9451842675751006e-06,
      "loss": 0.0156,
      "step": 2161320
    },
    {
      "epoch": 3.5370803139503675,
      "grad_norm": 0.3471691608428955,
      "learning_rate": 2.9451183753615838e-06,
      "loss": 0.0119,
      "step": 2161340
    },
    {
      "epoch": 3.537113044389021,
      "grad_norm": 0.9631826877593994,
      "learning_rate": 2.9450524831480665e-06,
      "loss": 0.0187,
      "step": 2161360
    },
    {
      "epoch": 3.5371457748276742,
      "grad_norm": 0.35774698853492737,
      "learning_rate": 2.9449865909345493e-06,
      "loss": 0.0203,
      "step": 2161380
    },
    {
      "epoch": 3.5371785052663274,
      "grad_norm": 0.19498206675052643,
      "learning_rate": 2.944920698721032e-06,
      "loss": 0.0103,
      "step": 2161400
    },
    {
      "epoch": 3.537211235704981,
      "grad_norm": 0.26602399349212646,
      "learning_rate": 2.944854806507515e-06,
      "loss": 0.0106,
      "step": 2161420
    },
    {
      "epoch": 3.537243966143634,
      "grad_norm": 0.5188521146774292,
      "learning_rate": 2.9447889142939983e-06,
      "loss": 0.0109,
      "step": 2161440
    },
    {
      "epoch": 3.5372766965822877,
      "grad_norm": 0.5442285537719727,
      "learning_rate": 2.944723022080481e-06,
      "loss": 0.0133,
      "step": 2161460
    },
    {
      "epoch": 3.537309427020941,
      "grad_norm": 0.2850542366504669,
      "learning_rate": 2.9446571298669642e-06,
      "loss": 0.0126,
      "step": 2161480
    },
    {
      "epoch": 3.5373421574595945,
      "grad_norm": 0.1883651167154312,
      "learning_rate": 2.944591237653447e-06,
      "loss": 0.011,
      "step": 2161500
    },
    {
      "epoch": 3.5373748878982476,
      "grad_norm": 0.30897554755210876,
      "learning_rate": 2.9445253454399297e-06,
      "loss": 0.0079,
      "step": 2161520
    },
    {
      "epoch": 3.5374076183369008,
      "grad_norm": 0.3514760434627533,
      "learning_rate": 2.9444594532264125e-06,
      "loss": 0.0105,
      "step": 2161540
    },
    {
      "epoch": 3.5374403487755544,
      "grad_norm": 0.1309846043586731,
      "learning_rate": 2.9443935610128956e-06,
      "loss": 0.0096,
      "step": 2161560
    },
    {
      "epoch": 3.5374730792142075,
      "grad_norm": 0.17070315778255463,
      "learning_rate": 2.9443276687993784e-06,
      "loss": 0.0118,
      "step": 2161580
    },
    {
      "epoch": 3.537505809652861,
      "grad_norm": 0.18231411278247833,
      "learning_rate": 2.944261776585861e-06,
      "loss": 0.0111,
      "step": 2161600
    },
    {
      "epoch": 3.5375385400915142,
      "grad_norm": 0.46898123621940613,
      "learning_rate": 2.944195884372344e-06,
      "loss": 0.0141,
      "step": 2161620
    },
    {
      "epoch": 3.537571270530168,
      "grad_norm": 0.30524587631225586,
      "learning_rate": 2.9441299921588266e-06,
      "loss": 0.0095,
      "step": 2161640
    },
    {
      "epoch": 3.537604000968821,
      "grad_norm": 0.19757027924060822,
      "learning_rate": 2.9440640999453097e-06,
      "loss": 0.0127,
      "step": 2161660
    },
    {
      "epoch": 3.537636731407474,
      "grad_norm": 0.12091448903083801,
      "learning_rate": 2.9439982077317925e-06,
      "loss": 0.0123,
      "step": 2161680
    },
    {
      "epoch": 3.5376694618461277,
      "grad_norm": 0.1606205552816391,
      "learning_rate": 2.9439323155182752e-06,
      "loss": 0.0125,
      "step": 2161700
    },
    {
      "epoch": 3.537702192284781,
      "grad_norm": 0.9537831544876099,
      "learning_rate": 2.943866423304758e-06,
      "loss": 0.0118,
      "step": 2161720
    },
    {
      "epoch": 3.5377349227234345,
      "grad_norm": 0.11758211255073547,
      "learning_rate": 2.943800531091241e-06,
      "loss": 0.0147,
      "step": 2161740
    },
    {
      "epoch": 3.5377676531620876,
      "grad_norm": 0.18166576325893402,
      "learning_rate": 2.943734638877724e-06,
      "loss": 0.0101,
      "step": 2161760
    },
    {
      "epoch": 3.5378003836007412,
      "grad_norm": 0.27078554034233093,
      "learning_rate": 2.9436687466642066e-06,
      "loss": 0.0107,
      "step": 2161780
    },
    {
      "epoch": 3.5378331140393944,
      "grad_norm": 0.32365599274635315,
      "learning_rate": 2.94360285445069e-06,
      "loss": 0.0112,
      "step": 2161800
    },
    {
      "epoch": 3.5378658444780475,
      "grad_norm": 0.14776034653186798,
      "learning_rate": 2.943536962237173e-06,
      "loss": 0.0131,
      "step": 2161820
    },
    {
      "epoch": 3.537898574916701,
      "grad_norm": 0.4108065366744995,
      "learning_rate": 2.9434710700236557e-06,
      "loss": 0.0105,
      "step": 2161840
    },
    {
      "epoch": 3.5379313053553543,
      "grad_norm": 0.15125735104084015,
      "learning_rate": 2.9434051778101384e-06,
      "loss": 0.01,
      "step": 2161860
    },
    {
      "epoch": 3.537964035794008,
      "grad_norm": 1.0037877559661865,
      "learning_rate": 2.9433392855966216e-06,
      "loss": 0.0118,
      "step": 2161880
    },
    {
      "epoch": 3.537996766232661,
      "grad_norm": 0.34957173466682434,
      "learning_rate": 2.9432733933831043e-06,
      "loss": 0.0116,
      "step": 2161900
    },
    {
      "epoch": 3.5380294966713146,
      "grad_norm": 0.3313768804073334,
      "learning_rate": 2.943207501169587e-06,
      "loss": 0.01,
      "step": 2161920
    },
    {
      "epoch": 3.5380622271099678,
      "grad_norm": 0.26695752143859863,
      "learning_rate": 2.94314160895607e-06,
      "loss": 0.01,
      "step": 2161940
    },
    {
      "epoch": 3.538094957548621,
      "grad_norm": 0.22155974805355072,
      "learning_rate": 2.943075716742553e-06,
      "loss": 0.0107,
      "step": 2161960
    },
    {
      "epoch": 3.5381276879872745,
      "grad_norm": 0.21946172416210175,
      "learning_rate": 2.9430098245290357e-06,
      "loss": 0.0113,
      "step": 2161980
    },
    {
      "epoch": 3.5381604184259277,
      "grad_norm": 0.15192346274852753,
      "learning_rate": 2.9429439323155184e-06,
      "loss": 0.0158,
      "step": 2162000
    },
    {
      "epoch": 3.538193148864581,
      "grad_norm": 0.09916692227125168,
      "learning_rate": 2.942878040102001e-06,
      "loss": 0.0101,
      "step": 2162020
    },
    {
      "epoch": 3.5382258793032344,
      "grad_norm": 0.4132457375526428,
      "learning_rate": 2.9428121478884843e-06,
      "loss": 0.013,
      "step": 2162040
    },
    {
      "epoch": 3.538258609741888,
      "grad_norm": 0.6106852293014526,
      "learning_rate": 2.942746255674967e-06,
      "loss": 0.0065,
      "step": 2162060
    },
    {
      "epoch": 3.538291340180541,
      "grad_norm": 0.2756260633468628,
      "learning_rate": 2.94268036346145e-06,
      "loss": 0.009,
      "step": 2162080
    },
    {
      "epoch": 3.5383240706191943,
      "grad_norm": 0.12145520001649857,
      "learning_rate": 2.9426144712479326e-06,
      "loss": 0.0134,
      "step": 2162100
    },
    {
      "epoch": 3.538356801057848,
      "grad_norm": 0.5694571733474731,
      "learning_rate": 2.9425485790344153e-06,
      "loss": 0.0083,
      "step": 2162120
    },
    {
      "epoch": 3.538389531496501,
      "grad_norm": 0.3188260495662689,
      "learning_rate": 2.942482686820899e-06,
      "loss": 0.008,
      "step": 2162140
    },
    {
      "epoch": 3.538422261935154,
      "grad_norm": 0.21311785280704498,
      "learning_rate": 2.9424167946073816e-06,
      "loss": 0.0109,
      "step": 2162160
    },
    {
      "epoch": 3.538454992373808,
      "grad_norm": 0.48764026165008545,
      "learning_rate": 2.9423509023938644e-06,
      "loss": 0.0148,
      "step": 2162180
    },
    {
      "epoch": 3.5384877228124614,
      "grad_norm": 0.7685686945915222,
      "learning_rate": 2.9422850101803475e-06,
      "loss": 0.0111,
      "step": 2162200
    },
    {
      "epoch": 3.5385204532511145,
      "grad_norm": 0.17260104417800903,
      "learning_rate": 2.9422191179668303e-06,
      "loss": 0.0113,
      "step": 2162220
    },
    {
      "epoch": 3.5385531836897677,
      "grad_norm": 0.1433713287115097,
      "learning_rate": 2.942153225753313e-06,
      "loss": 0.0146,
      "step": 2162240
    },
    {
      "epoch": 3.5385859141284213,
      "grad_norm": 0.22188346087932587,
      "learning_rate": 2.9420873335397957e-06,
      "loss": 0.0099,
      "step": 2162260
    },
    {
      "epoch": 3.5386186445670744,
      "grad_norm": 0.09482024610042572,
      "learning_rate": 2.942021441326279e-06,
      "loss": 0.0152,
      "step": 2162280
    },
    {
      "epoch": 3.5386513750057276,
      "grad_norm": 0.15909476578235626,
      "learning_rate": 2.9419555491127616e-06,
      "loss": 0.0083,
      "step": 2162300
    },
    {
      "epoch": 3.538684105444381,
      "grad_norm": 0.2826116681098938,
      "learning_rate": 2.9418896568992444e-06,
      "loss": 0.01,
      "step": 2162320
    },
    {
      "epoch": 3.5387168358830348,
      "grad_norm": 0.6061310172080994,
      "learning_rate": 2.941823764685727e-06,
      "loss": 0.0117,
      "step": 2162340
    },
    {
      "epoch": 3.538749566321688,
      "grad_norm": 0.37439993023872375,
      "learning_rate": 2.9417578724722103e-06,
      "loss": 0.0099,
      "step": 2162360
    },
    {
      "epoch": 3.538782296760341,
      "grad_norm": 0.21711483597755432,
      "learning_rate": 2.941691980258693e-06,
      "loss": 0.0108,
      "step": 2162380
    },
    {
      "epoch": 3.5388150271989947,
      "grad_norm": 0.1439305543899536,
      "learning_rate": 2.9416260880451758e-06,
      "loss": 0.0137,
      "step": 2162400
    },
    {
      "epoch": 3.538847757637648,
      "grad_norm": 0.3724822402000427,
      "learning_rate": 2.9415601958316585e-06,
      "loss": 0.0096,
      "step": 2162420
    },
    {
      "epoch": 3.538880488076301,
      "grad_norm": 0.32977384328842163,
      "learning_rate": 2.9414943036181417e-06,
      "loss": 0.0091,
      "step": 2162440
    },
    {
      "epoch": 3.5389132185149546,
      "grad_norm": 0.44184353947639465,
      "learning_rate": 2.9414284114046244e-06,
      "loss": 0.0115,
      "step": 2162460
    },
    {
      "epoch": 3.538945948953608,
      "grad_norm": 0.16211368143558502,
      "learning_rate": 2.941362519191107e-06,
      "loss": 0.0121,
      "step": 2162480
    },
    {
      "epoch": 3.5389786793922613,
      "grad_norm": 0.18108926713466644,
      "learning_rate": 2.9412966269775907e-06,
      "loss": 0.0145,
      "step": 2162500
    },
    {
      "epoch": 3.5390114098309144,
      "grad_norm": 0.30691179633140564,
      "learning_rate": 2.9412307347640735e-06,
      "loss": 0.0094,
      "step": 2162520
    },
    {
      "epoch": 3.539044140269568,
      "grad_norm": 0.273742139339447,
      "learning_rate": 2.9411648425505562e-06,
      "loss": 0.0107,
      "step": 2162540
    },
    {
      "epoch": 3.539076870708221,
      "grad_norm": 0.32785338163375854,
      "learning_rate": 2.941098950337039e-06,
      "loss": 0.0109,
      "step": 2162560
    },
    {
      "epoch": 3.5391096011468743,
      "grad_norm": 0.16917620599269867,
      "learning_rate": 2.941033058123522e-06,
      "loss": 0.0082,
      "step": 2162580
    },
    {
      "epoch": 3.539142331585528,
      "grad_norm": 0.2506064772605896,
      "learning_rate": 2.940967165910005e-06,
      "loss": 0.0115,
      "step": 2162600
    },
    {
      "epoch": 3.539175062024181,
      "grad_norm": 0.3813694715499878,
      "learning_rate": 2.9409012736964876e-06,
      "loss": 0.0125,
      "step": 2162620
    },
    {
      "epoch": 3.5392077924628347,
      "grad_norm": 0.09491295367479324,
      "learning_rate": 2.9408353814829703e-06,
      "loss": 0.0101,
      "step": 2162640
    },
    {
      "epoch": 3.539240522901488,
      "grad_norm": 0.6015945672988892,
      "learning_rate": 2.940769489269453e-06,
      "loss": 0.0126,
      "step": 2162660
    },
    {
      "epoch": 3.5392732533401414,
      "grad_norm": 0.23277397453784943,
      "learning_rate": 2.9407035970559362e-06,
      "loss": 0.0127,
      "step": 2162680
    },
    {
      "epoch": 3.5393059837787946,
      "grad_norm": 0.38711944222450256,
      "learning_rate": 2.940637704842419e-06,
      "loss": 0.0173,
      "step": 2162700
    },
    {
      "epoch": 3.5393387142174477,
      "grad_norm": 0.1685417741537094,
      "learning_rate": 2.9405718126289017e-06,
      "loss": 0.0091,
      "step": 2162720
    },
    {
      "epoch": 3.5393714446561013,
      "grad_norm": 0.6547702550888062,
      "learning_rate": 2.9405059204153845e-06,
      "loss": 0.012,
      "step": 2162740
    },
    {
      "epoch": 3.5394041750947545,
      "grad_norm": 0.19774861633777618,
      "learning_rate": 2.9404400282018676e-06,
      "loss": 0.0099,
      "step": 2162760
    },
    {
      "epoch": 3.539436905533408,
      "grad_norm": 0.8150924444198608,
      "learning_rate": 2.9403741359883504e-06,
      "loss": 0.0189,
      "step": 2162780
    },
    {
      "epoch": 3.539469635972061,
      "grad_norm": 0.3508249521255493,
      "learning_rate": 2.940308243774833e-06,
      "loss": 0.0137,
      "step": 2162800
    },
    {
      "epoch": 3.539502366410715,
      "grad_norm": 0.1405901163816452,
      "learning_rate": 2.940242351561316e-06,
      "loss": 0.0124,
      "step": 2162820
    },
    {
      "epoch": 3.539535096849368,
      "grad_norm": 0.21135735511779785,
      "learning_rate": 2.9401764593477994e-06,
      "loss": 0.0127,
      "step": 2162840
    },
    {
      "epoch": 3.539567827288021,
      "grad_norm": 0.3319450914859772,
      "learning_rate": 2.940110567134282e-06,
      "loss": 0.0093,
      "step": 2162860
    },
    {
      "epoch": 3.5396005577266747,
      "grad_norm": 0.34945759177207947,
      "learning_rate": 2.940044674920765e-06,
      "loss": 0.0129,
      "step": 2162880
    },
    {
      "epoch": 3.539633288165328,
      "grad_norm": 0.5319458842277527,
      "learning_rate": 2.939978782707248e-06,
      "loss": 0.0172,
      "step": 2162900
    },
    {
      "epoch": 3.5396660186039814,
      "grad_norm": 0.2694256007671356,
      "learning_rate": 2.939912890493731e-06,
      "loss": 0.0108,
      "step": 2162920
    },
    {
      "epoch": 3.5396987490426346,
      "grad_norm": 0.7527240514755249,
      "learning_rate": 2.9398469982802136e-06,
      "loss": 0.0097,
      "step": 2162940
    },
    {
      "epoch": 3.539731479481288,
      "grad_norm": 0.19694043695926666,
      "learning_rate": 2.9397811060666963e-06,
      "loss": 0.0115,
      "step": 2162960
    },
    {
      "epoch": 3.5397642099199413,
      "grad_norm": 0.4000485837459564,
      "learning_rate": 2.9397152138531795e-06,
      "loss": 0.0098,
      "step": 2162980
    },
    {
      "epoch": 3.5397969403585945,
      "grad_norm": 0.4050978720188141,
      "learning_rate": 2.939649321639662e-06,
      "loss": 0.0084,
      "step": 2163000
    },
    {
      "epoch": 3.539829670797248,
      "grad_norm": 0.0794633999466896,
      "learning_rate": 2.939583429426145e-06,
      "loss": 0.0081,
      "step": 2163020
    },
    {
      "epoch": 3.5398624012359012,
      "grad_norm": 0.13708685338497162,
      "learning_rate": 2.9395175372126277e-06,
      "loss": 0.0103,
      "step": 2163040
    },
    {
      "epoch": 3.539895131674555,
      "grad_norm": 0.3369658291339874,
      "learning_rate": 2.9394516449991104e-06,
      "loss": 0.0103,
      "step": 2163060
    },
    {
      "epoch": 3.539927862113208,
      "grad_norm": 0.049331724643707275,
      "learning_rate": 2.9393857527855936e-06,
      "loss": 0.0127,
      "step": 2163080
    },
    {
      "epoch": 3.5399605925518616,
      "grad_norm": 0.3752765357494354,
      "learning_rate": 2.9393198605720763e-06,
      "loss": 0.0129,
      "step": 2163100
    },
    {
      "epoch": 3.5399933229905147,
      "grad_norm": 0.18260546028614044,
      "learning_rate": 2.939253968358559e-06,
      "loss": 0.0086,
      "step": 2163120
    },
    {
      "epoch": 3.540026053429168,
      "grad_norm": 0.14173398911952972,
      "learning_rate": 2.939188076145042e-06,
      "loss": 0.0159,
      "step": 2163140
    },
    {
      "epoch": 3.5400587838678215,
      "grad_norm": 0.07816486805677414,
      "learning_rate": 2.939122183931525e-06,
      "loss": 0.0087,
      "step": 2163160
    },
    {
      "epoch": 3.5400915143064746,
      "grad_norm": 0.17032325267791748,
      "learning_rate": 2.9390562917180077e-06,
      "loss": 0.0173,
      "step": 2163180
    },
    {
      "epoch": 3.540124244745128,
      "grad_norm": 0.29851120710372925,
      "learning_rate": 2.938990399504491e-06,
      "loss": 0.0138,
      "step": 2163200
    },
    {
      "epoch": 3.5401569751837814,
      "grad_norm": 0.21291251480579376,
      "learning_rate": 2.938924507290974e-06,
      "loss": 0.0152,
      "step": 2163220
    },
    {
      "epoch": 3.540189705622435,
      "grad_norm": 0.43419313430786133,
      "learning_rate": 2.9388586150774568e-06,
      "loss": 0.0105,
      "step": 2163240
    },
    {
      "epoch": 3.540222436061088,
      "grad_norm": 0.2512594163417816,
      "learning_rate": 2.9387927228639395e-06,
      "loss": 0.0096,
      "step": 2163260
    },
    {
      "epoch": 3.5402551664997413,
      "grad_norm": 0.46857523918151855,
      "learning_rate": 2.9387268306504222e-06,
      "loss": 0.0104,
      "step": 2163280
    },
    {
      "epoch": 3.540287896938395,
      "grad_norm": 0.4815799295902252,
      "learning_rate": 2.9386609384369054e-06,
      "loss": 0.0095,
      "step": 2163300
    },
    {
      "epoch": 3.540320627377048,
      "grad_norm": 0.18037429451942444,
      "learning_rate": 2.938595046223388e-06,
      "loss": 0.0166,
      "step": 2163320
    },
    {
      "epoch": 3.5403533578157016,
      "grad_norm": 0.16622841358184814,
      "learning_rate": 2.938529154009871e-06,
      "loss": 0.0107,
      "step": 2163340
    },
    {
      "epoch": 3.5403860882543547,
      "grad_norm": 0.2974317967891693,
      "learning_rate": 2.9384632617963536e-06,
      "loss": 0.0091,
      "step": 2163360
    },
    {
      "epoch": 3.5404188186930083,
      "grad_norm": 0.36429429054260254,
      "learning_rate": 2.938397369582837e-06,
      "loss": 0.0074,
      "step": 2163380
    },
    {
      "epoch": 3.5404515491316615,
      "grad_norm": 0.17743758857250214,
      "learning_rate": 2.9383314773693195e-06,
      "loss": 0.0149,
      "step": 2163400
    },
    {
      "epoch": 3.5404842795703146,
      "grad_norm": 0.18567785620689392,
      "learning_rate": 2.9382655851558023e-06,
      "loss": 0.0124,
      "step": 2163420
    },
    {
      "epoch": 3.5405170100089682,
      "grad_norm": 0.22299590706825256,
      "learning_rate": 2.938199692942285e-06,
      "loss": 0.0137,
      "step": 2163440
    },
    {
      "epoch": 3.5405497404476214,
      "grad_norm": 0.4086759686470032,
      "learning_rate": 2.938133800728768e-06,
      "loss": 0.0109,
      "step": 2163460
    },
    {
      "epoch": 3.540582470886275,
      "grad_norm": 0.1546419858932495,
      "learning_rate": 2.938067908515251e-06,
      "loss": 0.0134,
      "step": 2163480
    },
    {
      "epoch": 3.540615201324928,
      "grad_norm": 0.5714835524559021,
      "learning_rate": 2.9380020163017337e-06,
      "loss": 0.0126,
      "step": 2163500
    },
    {
      "epoch": 3.5406479317635817,
      "grad_norm": 0.3085348606109619,
      "learning_rate": 2.9379361240882164e-06,
      "loss": 0.007,
      "step": 2163520
    },
    {
      "epoch": 3.540680662202235,
      "grad_norm": 0.1546012908220291,
      "learning_rate": 2.937870231874699e-06,
      "loss": 0.0106,
      "step": 2163540
    },
    {
      "epoch": 3.540713392640888,
      "grad_norm": 0.5241180658340454,
      "learning_rate": 2.9378043396611827e-06,
      "loss": 0.0138,
      "step": 2163560
    },
    {
      "epoch": 3.5407461230795416,
      "grad_norm": 0.07592234015464783,
      "learning_rate": 2.9377384474476655e-06,
      "loss": 0.0112,
      "step": 2163580
    },
    {
      "epoch": 3.5407788535181948,
      "grad_norm": 0.09029074758291245,
      "learning_rate": 2.937672555234148e-06,
      "loss": 0.018,
      "step": 2163600
    },
    {
      "epoch": 3.540811583956848,
      "grad_norm": 0.2542584538459778,
      "learning_rate": 2.9376066630206314e-06,
      "loss": 0.0138,
      "step": 2163620
    },
    {
      "epoch": 3.5408443143955015,
      "grad_norm": 0.4387199580669403,
      "learning_rate": 2.937540770807114e-06,
      "loss": 0.0174,
      "step": 2163640
    },
    {
      "epoch": 3.540877044834155,
      "grad_norm": 0.13772989809513092,
      "learning_rate": 2.937474878593597e-06,
      "loss": 0.0175,
      "step": 2163660
    },
    {
      "epoch": 3.5409097752728083,
      "grad_norm": 0.13040421903133392,
      "learning_rate": 2.9374089863800796e-06,
      "loss": 0.0115,
      "step": 2163680
    },
    {
      "epoch": 3.5409425057114614,
      "grad_norm": 0.0562872588634491,
      "learning_rate": 2.9373430941665627e-06,
      "loss": 0.0106,
      "step": 2163700
    },
    {
      "epoch": 3.540975236150115,
      "grad_norm": 0.08104388415813446,
      "learning_rate": 2.9372772019530455e-06,
      "loss": 0.0085,
      "step": 2163720
    },
    {
      "epoch": 3.541007966588768,
      "grad_norm": 0.17817677557468414,
      "learning_rate": 2.9372113097395282e-06,
      "loss": 0.0114,
      "step": 2163740
    },
    {
      "epoch": 3.5410406970274213,
      "grad_norm": 0.12239370495080948,
      "learning_rate": 2.937145417526011e-06,
      "loss": 0.0092,
      "step": 2163760
    },
    {
      "epoch": 3.541073427466075,
      "grad_norm": 0.5433343052864075,
      "learning_rate": 2.937079525312494e-06,
      "loss": 0.008,
      "step": 2163780
    },
    {
      "epoch": 3.5411061579047285,
      "grad_norm": 0.18510247766971588,
      "learning_rate": 2.937013633098977e-06,
      "loss": 0.0096,
      "step": 2163800
    },
    {
      "epoch": 3.5411388883433816,
      "grad_norm": 0.5821171402931213,
      "learning_rate": 2.9369477408854596e-06,
      "loss": 0.0095,
      "step": 2163820
    },
    {
      "epoch": 3.541171618782035,
      "grad_norm": 0.24610760807991028,
      "learning_rate": 2.9368818486719423e-06,
      "loss": 0.0153,
      "step": 2163840
    },
    {
      "epoch": 3.5412043492206884,
      "grad_norm": 1.01044499874115,
      "learning_rate": 2.9368159564584255e-06,
      "loss": 0.0141,
      "step": 2163860
    },
    {
      "epoch": 3.5412370796593415,
      "grad_norm": 0.3641507923603058,
      "learning_rate": 2.9367500642449083e-06,
      "loss": 0.0086,
      "step": 2163880
    },
    {
      "epoch": 3.5412698100979947,
      "grad_norm": 0.16691426932811737,
      "learning_rate": 2.9366841720313914e-06,
      "loss": 0.0103,
      "step": 2163900
    },
    {
      "epoch": 3.5413025405366483,
      "grad_norm": 0.07253096252679825,
      "learning_rate": 2.9366182798178746e-06,
      "loss": 0.007,
      "step": 2163920
    },
    {
      "epoch": 3.541335270975302,
      "grad_norm": 0.28315261006355286,
      "learning_rate": 2.9365523876043573e-06,
      "loss": 0.0095,
      "step": 2163940
    },
    {
      "epoch": 3.541368001413955,
      "grad_norm": 0.22387440502643585,
      "learning_rate": 2.93648649539084e-06,
      "loss": 0.0093,
      "step": 2163960
    },
    {
      "epoch": 3.541400731852608,
      "grad_norm": 0.26359713077545166,
      "learning_rate": 2.936420603177323e-06,
      "loss": 0.0106,
      "step": 2163980
    },
    {
      "epoch": 3.5414334622912618,
      "grad_norm": 0.20259562134742737,
      "learning_rate": 2.936354710963806e-06,
      "loss": 0.0074,
      "step": 2164000
    },
    {
      "epoch": 3.541466192729915,
      "grad_norm": 0.4624043107032776,
      "learning_rate": 2.9362888187502887e-06,
      "loss": 0.0105,
      "step": 2164020
    },
    {
      "epoch": 3.541498923168568,
      "grad_norm": 0.20222099125385284,
      "learning_rate": 2.9362229265367714e-06,
      "loss": 0.0137,
      "step": 2164040
    },
    {
      "epoch": 3.5415316536072217,
      "grad_norm": 0.1376979947090149,
      "learning_rate": 2.936157034323254e-06,
      "loss": 0.0091,
      "step": 2164060
    },
    {
      "epoch": 3.5415643840458753,
      "grad_norm": 0.13584893941879272,
      "learning_rate": 2.936091142109737e-06,
      "loss": 0.0066,
      "step": 2164080
    },
    {
      "epoch": 3.5415971144845284,
      "grad_norm": 0.3517664074897766,
      "learning_rate": 2.93602524989622e-06,
      "loss": 0.0106,
      "step": 2164100
    },
    {
      "epoch": 3.5416298449231816,
      "grad_norm": 0.48090440034866333,
      "learning_rate": 2.935959357682703e-06,
      "loss": 0.0142,
      "step": 2164120
    },
    {
      "epoch": 3.541662575361835,
      "grad_norm": 0.13821884989738464,
      "learning_rate": 2.9358934654691856e-06,
      "loss": 0.0133,
      "step": 2164140
    },
    {
      "epoch": 3.5416953058004883,
      "grad_norm": 0.1968066692352295,
      "learning_rate": 2.9358275732556683e-06,
      "loss": 0.0094,
      "step": 2164160
    },
    {
      "epoch": 3.5417280362391415,
      "grad_norm": 0.19194625318050385,
      "learning_rate": 2.9357616810421515e-06,
      "loss": 0.0109,
      "step": 2164180
    },
    {
      "epoch": 3.541760766677795,
      "grad_norm": 0.22950544953346252,
      "learning_rate": 2.935695788828634e-06,
      "loss": 0.0077,
      "step": 2164200
    },
    {
      "epoch": 3.541793497116448,
      "grad_norm": 0.28179749846458435,
      "learning_rate": 2.935629896615117e-06,
      "loss": 0.0112,
      "step": 2164220
    },
    {
      "epoch": 3.541826227555102,
      "grad_norm": 0.29122626781463623,
      "learning_rate": 2.9355640044015997e-06,
      "loss": 0.0139,
      "step": 2164240
    },
    {
      "epoch": 3.541858957993755,
      "grad_norm": 0.2538945972919464,
      "learning_rate": 2.9354981121880833e-06,
      "loss": 0.0165,
      "step": 2164260
    },
    {
      "epoch": 3.5418916884324085,
      "grad_norm": 0.13157720863819122,
      "learning_rate": 2.935432219974566e-06,
      "loss": 0.0101,
      "step": 2164280
    },
    {
      "epoch": 3.5419244188710617,
      "grad_norm": 0.35838696360588074,
      "learning_rate": 2.9353663277610488e-06,
      "loss": 0.0105,
      "step": 2164300
    },
    {
      "epoch": 3.541957149309715,
      "grad_norm": 0.20671109855175018,
      "learning_rate": 2.935300435547532e-06,
      "loss": 0.0086,
      "step": 2164320
    },
    {
      "epoch": 3.5419898797483684,
      "grad_norm": 0.18760232627391815,
      "learning_rate": 2.9352345433340147e-06,
      "loss": 0.0086,
      "step": 2164340
    },
    {
      "epoch": 3.5420226101870216,
      "grad_norm": 0.14171797037124634,
      "learning_rate": 2.9351686511204974e-06,
      "loss": 0.0105,
      "step": 2164360
    },
    {
      "epoch": 3.542055340625675,
      "grad_norm": 0.32849404215812683,
      "learning_rate": 2.93510275890698e-06,
      "loss": 0.0129,
      "step": 2164380
    },
    {
      "epoch": 3.5420880710643283,
      "grad_norm": 0.4445694088935852,
      "learning_rate": 2.9350368666934633e-06,
      "loss": 0.0085,
      "step": 2164400
    },
    {
      "epoch": 3.542120801502982,
      "grad_norm": 0.46262678503990173,
      "learning_rate": 2.934970974479946e-06,
      "loss": 0.009,
      "step": 2164420
    },
    {
      "epoch": 3.542153531941635,
      "grad_norm": 0.14715994894504547,
      "learning_rate": 2.9349050822664288e-06,
      "loss": 0.013,
      "step": 2164440
    },
    {
      "epoch": 3.542186262380288,
      "grad_norm": 0.15718771517276764,
      "learning_rate": 2.9348391900529115e-06,
      "loss": 0.0066,
      "step": 2164460
    },
    {
      "epoch": 3.542218992818942,
      "grad_norm": 0.30034339427948,
      "learning_rate": 2.9347732978393943e-06,
      "loss": 0.0164,
      "step": 2164480
    },
    {
      "epoch": 3.542251723257595,
      "grad_norm": 0.741577684879303,
      "learning_rate": 2.9347074056258774e-06,
      "loss": 0.0091,
      "step": 2164500
    },
    {
      "epoch": 3.5422844536962486,
      "grad_norm": 0.3898026645183563,
      "learning_rate": 2.93464151341236e-06,
      "loss": 0.0094,
      "step": 2164520
    },
    {
      "epoch": 3.5423171841349017,
      "grad_norm": 0.31316429376602173,
      "learning_rate": 2.934575621198843e-06,
      "loss": 0.0144,
      "step": 2164540
    },
    {
      "epoch": 3.5423499145735553,
      "grad_norm": 0.2293245643377304,
      "learning_rate": 2.9345097289853256e-06,
      "loss": 0.0121,
      "step": 2164560
    },
    {
      "epoch": 3.5423826450122085,
      "grad_norm": 0.10692760348320007,
      "learning_rate": 2.934443836771809e-06,
      "loss": 0.0097,
      "step": 2164580
    },
    {
      "epoch": 3.5424153754508616,
      "grad_norm": 0.5194290280342102,
      "learning_rate": 2.934377944558292e-06,
      "loss": 0.0119,
      "step": 2164600
    },
    {
      "epoch": 3.542448105889515,
      "grad_norm": 0.20237761735916138,
      "learning_rate": 2.9343120523447747e-06,
      "loss": 0.0096,
      "step": 2164620
    },
    {
      "epoch": 3.5424808363281683,
      "grad_norm": 0.17011047899723053,
      "learning_rate": 2.934246160131258e-06,
      "loss": 0.0209,
      "step": 2164640
    },
    {
      "epoch": 3.542513566766822,
      "grad_norm": 0.535885751247406,
      "learning_rate": 2.9341802679177406e-06,
      "loss": 0.0135,
      "step": 2164660
    },
    {
      "epoch": 3.542546297205475,
      "grad_norm": 0.08815707266330719,
      "learning_rate": 2.9341143757042233e-06,
      "loss": 0.0077,
      "step": 2164680
    },
    {
      "epoch": 3.5425790276441287,
      "grad_norm": 0.48464053869247437,
      "learning_rate": 2.934048483490706e-06,
      "loss": 0.0126,
      "step": 2164700
    },
    {
      "epoch": 3.542611758082782,
      "grad_norm": 0.444098562002182,
      "learning_rate": 2.9339825912771892e-06,
      "loss": 0.0091,
      "step": 2164720
    },
    {
      "epoch": 3.542644488521435,
      "grad_norm": 0.09361784905195236,
      "learning_rate": 2.933916699063672e-06,
      "loss": 0.0071,
      "step": 2164740
    },
    {
      "epoch": 3.5426772189600886,
      "grad_norm": 0.17571474611759186,
      "learning_rate": 2.9338508068501547e-06,
      "loss": 0.0166,
      "step": 2164760
    },
    {
      "epoch": 3.5427099493987417,
      "grad_norm": 0.32766997814178467,
      "learning_rate": 2.9337849146366375e-06,
      "loss": 0.0139,
      "step": 2164780
    },
    {
      "epoch": 3.5427426798373953,
      "grad_norm": 0.3286745548248291,
      "learning_rate": 2.9337190224231206e-06,
      "loss": 0.0111,
      "step": 2164800
    },
    {
      "epoch": 3.5427754102760485,
      "grad_norm": 0.3140104413032532,
      "learning_rate": 2.9336531302096034e-06,
      "loss": 0.0096,
      "step": 2164820
    },
    {
      "epoch": 3.542808140714702,
      "grad_norm": 0.39905408024787903,
      "learning_rate": 2.933587237996086e-06,
      "loss": 0.0138,
      "step": 2164840
    },
    {
      "epoch": 3.542840871153355,
      "grad_norm": 0.3245524764060974,
      "learning_rate": 2.933521345782569e-06,
      "loss": 0.0164,
      "step": 2164860
    },
    {
      "epoch": 3.5428736015920084,
      "grad_norm": 0.212363138794899,
      "learning_rate": 2.933455453569052e-06,
      "loss": 0.0106,
      "step": 2164880
    },
    {
      "epoch": 3.542906332030662,
      "grad_norm": 0.07719465345144272,
      "learning_rate": 2.9333895613555348e-06,
      "loss": 0.0107,
      "step": 2164900
    },
    {
      "epoch": 3.542939062469315,
      "grad_norm": 0.25202885270118713,
      "learning_rate": 2.9333236691420175e-06,
      "loss": 0.0119,
      "step": 2164920
    },
    {
      "epoch": 3.5429717929079687,
      "grad_norm": 0.18945437669754028,
      "learning_rate": 2.9332577769285002e-06,
      "loss": 0.0087,
      "step": 2164940
    },
    {
      "epoch": 3.543004523346622,
      "grad_norm": 0.16936852037906647,
      "learning_rate": 2.933191884714984e-06,
      "loss": 0.009,
      "step": 2164960
    },
    {
      "epoch": 3.5430372537852755,
      "grad_norm": 0.3318684697151184,
      "learning_rate": 2.9331259925014666e-06,
      "loss": 0.0099,
      "step": 2164980
    },
    {
      "epoch": 3.5430699842239286,
      "grad_norm": 0.34468477964401245,
      "learning_rate": 2.9330601002879493e-06,
      "loss": 0.0099,
      "step": 2165000
    },
    {
      "epoch": 3.5431027146625818,
      "grad_norm": 0.2988353967666626,
      "learning_rate": 2.932994208074432e-06,
      "loss": 0.0105,
      "step": 2165020
    },
    {
      "epoch": 3.5431354451012353,
      "grad_norm": 0.18089748919010162,
      "learning_rate": 2.932928315860915e-06,
      "loss": 0.0141,
      "step": 2165040
    },
    {
      "epoch": 3.5431681755398885,
      "grad_norm": 0.2660222053527832,
      "learning_rate": 2.932862423647398e-06,
      "loss": 0.0094,
      "step": 2165060
    },
    {
      "epoch": 3.5432009059785416,
      "grad_norm": 0.5499976277351379,
      "learning_rate": 2.9327965314338807e-06,
      "loss": 0.0101,
      "step": 2165080
    },
    {
      "epoch": 3.5432336364171952,
      "grad_norm": 0.37078481912612915,
      "learning_rate": 2.9327306392203634e-06,
      "loss": 0.0101,
      "step": 2165100
    },
    {
      "epoch": 3.543266366855849,
      "grad_norm": 0.14028623700141907,
      "learning_rate": 2.9326647470068466e-06,
      "loss": 0.0104,
      "step": 2165120
    },
    {
      "epoch": 3.543299097294502,
      "grad_norm": 0.28575006127357483,
      "learning_rate": 2.9325988547933293e-06,
      "loss": 0.012,
      "step": 2165140
    },
    {
      "epoch": 3.543331827733155,
      "grad_norm": 0.32787299156188965,
      "learning_rate": 2.932532962579812e-06,
      "loss": 0.0094,
      "step": 2165160
    },
    {
      "epoch": 3.5433645581718087,
      "grad_norm": 0.4314960539340973,
      "learning_rate": 2.932467070366295e-06,
      "loss": 0.0123,
      "step": 2165180
    },
    {
      "epoch": 3.543397288610462,
      "grad_norm": 0.18848784267902374,
      "learning_rate": 2.932401178152778e-06,
      "loss": 0.0084,
      "step": 2165200
    },
    {
      "epoch": 3.543430019049115,
      "grad_norm": 0.17742936313152313,
      "learning_rate": 2.9323352859392607e-06,
      "loss": 0.0092,
      "step": 2165220
    },
    {
      "epoch": 3.5434627494877686,
      "grad_norm": 0.20088858902454376,
      "learning_rate": 2.9322693937257434e-06,
      "loss": 0.009,
      "step": 2165240
    },
    {
      "epoch": 3.543495479926422,
      "grad_norm": 0.8365811705589294,
      "learning_rate": 2.932203501512226e-06,
      "loss": 0.009,
      "step": 2165260
    },
    {
      "epoch": 3.5435282103650754,
      "grad_norm": 0.26539528369903564,
      "learning_rate": 2.9321376092987094e-06,
      "loss": 0.0113,
      "step": 2165280
    },
    {
      "epoch": 3.5435609408037285,
      "grad_norm": 0.4029506742954254,
      "learning_rate": 2.9320717170851925e-06,
      "loss": 0.0118,
      "step": 2165300
    },
    {
      "epoch": 3.543593671242382,
      "grad_norm": 0.224935382604599,
      "learning_rate": 2.9320058248716753e-06,
      "loss": 0.0085,
      "step": 2165320
    },
    {
      "epoch": 3.5436264016810353,
      "grad_norm": 0.3360113501548767,
      "learning_rate": 2.9319399326581584e-06,
      "loss": 0.0146,
      "step": 2165340
    },
    {
      "epoch": 3.5436591321196884,
      "grad_norm": 0.7106328010559082,
      "learning_rate": 2.931874040444641e-06,
      "loss": 0.0125,
      "step": 2165360
    },
    {
      "epoch": 3.543691862558342,
      "grad_norm": 0.5509027242660522,
      "learning_rate": 2.931808148231124e-06,
      "loss": 0.0148,
      "step": 2165380
    },
    {
      "epoch": 3.5437245929969956,
      "grad_norm": 0.27473753690719604,
      "learning_rate": 2.9317422560176066e-06,
      "loss": 0.007,
      "step": 2165400
    },
    {
      "epoch": 3.5437573234356488,
      "grad_norm": 0.1272791028022766,
      "learning_rate": 2.93167636380409e-06,
      "loss": 0.0155,
      "step": 2165420
    },
    {
      "epoch": 3.543790053874302,
      "grad_norm": 0.32269349694252014,
      "learning_rate": 2.9316104715905725e-06,
      "loss": 0.0099,
      "step": 2165440
    },
    {
      "epoch": 3.5438227843129555,
      "grad_norm": 0.4834478497505188,
      "learning_rate": 2.9315445793770553e-06,
      "loss": 0.0072,
      "step": 2165460
    },
    {
      "epoch": 3.5438555147516086,
      "grad_norm": 0.4239669442176819,
      "learning_rate": 2.931478687163538e-06,
      "loss": 0.0119,
      "step": 2165480
    },
    {
      "epoch": 3.543888245190262,
      "grad_norm": 0.16545961797237396,
      "learning_rate": 2.9314127949500208e-06,
      "loss": 0.0067,
      "step": 2165500
    },
    {
      "epoch": 3.5439209756289154,
      "grad_norm": 0.12680596113204956,
      "learning_rate": 2.931346902736504e-06,
      "loss": 0.0138,
      "step": 2165520
    },
    {
      "epoch": 3.543953706067569,
      "grad_norm": 0.25326892733573914,
      "learning_rate": 2.9312810105229867e-06,
      "loss": 0.0124,
      "step": 2165540
    },
    {
      "epoch": 3.543986436506222,
      "grad_norm": 0.17986278235912323,
      "learning_rate": 2.9312151183094694e-06,
      "loss": 0.0096,
      "step": 2165560
    },
    {
      "epoch": 3.5440191669448753,
      "grad_norm": 0.4102286100387573,
      "learning_rate": 2.931149226095952e-06,
      "loss": 0.0095,
      "step": 2165580
    },
    {
      "epoch": 3.544051897383529,
      "grad_norm": 0.3082342743873596,
      "learning_rate": 2.9310833338824353e-06,
      "loss": 0.0093,
      "step": 2165600
    },
    {
      "epoch": 3.544084627822182,
      "grad_norm": 0.6867476105690002,
      "learning_rate": 2.931017441668918e-06,
      "loss": 0.0102,
      "step": 2165620
    },
    {
      "epoch": 3.544117358260835,
      "grad_norm": 0.19491292536258698,
      "learning_rate": 2.9309515494554008e-06,
      "loss": 0.0065,
      "step": 2165640
    },
    {
      "epoch": 3.5441500886994888,
      "grad_norm": 0.09343072772026062,
      "learning_rate": 2.9308856572418844e-06,
      "loss": 0.0124,
      "step": 2165660
    },
    {
      "epoch": 3.544182819138142,
      "grad_norm": 0.3474957048892975,
      "learning_rate": 2.930819765028367e-06,
      "loss": 0.011,
      "step": 2165680
    },
    {
      "epoch": 3.5442155495767955,
      "grad_norm": 0.08492034673690796,
      "learning_rate": 2.93075387281485e-06,
      "loss": 0.0125,
      "step": 2165700
    },
    {
      "epoch": 3.5442482800154487,
      "grad_norm": 0.15799376368522644,
      "learning_rate": 2.9306879806013326e-06,
      "loss": 0.0117,
      "step": 2165720
    },
    {
      "epoch": 3.5442810104541023,
      "grad_norm": 0.3715139627456665,
      "learning_rate": 2.9306220883878158e-06,
      "loss": 0.0117,
      "step": 2165740
    },
    {
      "epoch": 3.5443137408927554,
      "grad_norm": 0.3680330514907837,
      "learning_rate": 2.9305561961742985e-06,
      "loss": 0.0117,
      "step": 2165760
    },
    {
      "epoch": 3.5443464713314086,
      "grad_norm": 0.4494176506996155,
      "learning_rate": 2.9304903039607812e-06,
      "loss": 0.0095,
      "step": 2165780
    },
    {
      "epoch": 3.544379201770062,
      "grad_norm": 0.19773587584495544,
      "learning_rate": 2.930424411747264e-06,
      "loss": 0.0096,
      "step": 2165800
    },
    {
      "epoch": 3.5444119322087153,
      "grad_norm": 0.2244703620672226,
      "learning_rate": 2.930358519533747e-06,
      "loss": 0.0098,
      "step": 2165820
    },
    {
      "epoch": 3.544444662647369,
      "grad_norm": 0.28521493077278137,
      "learning_rate": 2.93029262732023e-06,
      "loss": 0.012,
      "step": 2165840
    },
    {
      "epoch": 3.544477393086022,
      "grad_norm": 0.900359034538269,
      "learning_rate": 2.9302267351067126e-06,
      "loss": 0.0114,
      "step": 2165860
    },
    {
      "epoch": 3.5445101235246756,
      "grad_norm": 0.2754562497138977,
      "learning_rate": 2.9301608428931954e-06,
      "loss": 0.0095,
      "step": 2165880
    },
    {
      "epoch": 3.544542853963329,
      "grad_norm": 0.06363940238952637,
      "learning_rate": 2.9300949506796785e-06,
      "loss": 0.0126,
      "step": 2165900
    },
    {
      "epoch": 3.544575584401982,
      "grad_norm": 0.42340561747550964,
      "learning_rate": 2.9300290584661613e-06,
      "loss": 0.0082,
      "step": 2165920
    },
    {
      "epoch": 3.5446083148406355,
      "grad_norm": 0.21906784176826477,
      "learning_rate": 2.929963166252644e-06,
      "loss": 0.0135,
      "step": 2165940
    },
    {
      "epoch": 3.5446410452792887,
      "grad_norm": 0.550826370716095,
      "learning_rate": 2.9298972740391267e-06,
      "loss": 0.0098,
      "step": 2165960
    },
    {
      "epoch": 3.5446737757179423,
      "grad_norm": 0.23767583072185516,
      "learning_rate": 2.9298313818256095e-06,
      "loss": 0.0176,
      "step": 2165980
    },
    {
      "epoch": 3.5447065061565954,
      "grad_norm": 0.29078248143196106,
      "learning_rate": 2.9297654896120926e-06,
      "loss": 0.0079,
      "step": 2166000
    },
    {
      "epoch": 3.544739236595249,
      "grad_norm": 0.03745584189891815,
      "learning_rate": 2.929699597398576e-06,
      "loss": 0.007,
      "step": 2166020
    },
    {
      "epoch": 3.544771967033902,
      "grad_norm": 0.20475761592388153,
      "learning_rate": 2.9296337051850585e-06,
      "loss": 0.0132,
      "step": 2166040
    },
    {
      "epoch": 3.5448046974725553,
      "grad_norm": 0.8137696981430054,
      "learning_rate": 2.9295678129715417e-06,
      "loss": 0.0163,
      "step": 2166060
    },
    {
      "epoch": 3.544837427911209,
      "grad_norm": 0.13980789482593536,
      "learning_rate": 2.9295019207580244e-06,
      "loss": 0.0118,
      "step": 2166080
    },
    {
      "epoch": 3.544870158349862,
      "grad_norm": 0.39753496646881104,
      "learning_rate": 2.929436028544507e-06,
      "loss": 0.0113,
      "step": 2166100
    },
    {
      "epoch": 3.5449028887885157,
      "grad_norm": 0.13884711265563965,
      "learning_rate": 2.92937013633099e-06,
      "loss": 0.0098,
      "step": 2166120
    },
    {
      "epoch": 3.544935619227169,
      "grad_norm": 0.1416628658771515,
      "learning_rate": 2.929304244117473e-06,
      "loss": 0.0169,
      "step": 2166140
    },
    {
      "epoch": 3.5449683496658224,
      "grad_norm": 0.12271644175052643,
      "learning_rate": 2.929238351903956e-06,
      "loss": 0.01,
      "step": 2166160
    },
    {
      "epoch": 3.5450010801044756,
      "grad_norm": 0.4903457760810852,
      "learning_rate": 2.9291724596904386e-06,
      "loss": 0.0112,
      "step": 2166180
    },
    {
      "epoch": 3.5450338105431287,
      "grad_norm": 0.06082042306661606,
      "learning_rate": 2.9291065674769213e-06,
      "loss": 0.0081,
      "step": 2166200
    },
    {
      "epoch": 3.5450665409817823,
      "grad_norm": 0.1170317679643631,
      "learning_rate": 2.9290406752634045e-06,
      "loss": 0.0147,
      "step": 2166220
    },
    {
      "epoch": 3.5450992714204355,
      "grad_norm": 0.35173389315605164,
      "learning_rate": 2.928974783049887e-06,
      "loss": 0.017,
      "step": 2166240
    },
    {
      "epoch": 3.545132001859089,
      "grad_norm": 0.24277500808238983,
      "learning_rate": 2.92890889083637e-06,
      "loss": 0.0114,
      "step": 2166260
    },
    {
      "epoch": 3.545164732297742,
      "grad_norm": 0.40269291400909424,
      "learning_rate": 2.9288429986228527e-06,
      "loss": 0.0106,
      "step": 2166280
    },
    {
      "epoch": 3.545197462736396,
      "grad_norm": 0.11588873714208603,
      "learning_rate": 2.928777106409336e-06,
      "loss": 0.0113,
      "step": 2166300
    },
    {
      "epoch": 3.545230193175049,
      "grad_norm": 0.556100606918335,
      "learning_rate": 2.9287112141958186e-06,
      "loss": 0.0162,
      "step": 2166320
    },
    {
      "epoch": 3.545262923613702,
      "grad_norm": 0.26068276166915894,
      "learning_rate": 2.9286453219823013e-06,
      "loss": 0.0166,
      "step": 2166340
    },
    {
      "epoch": 3.5452956540523557,
      "grad_norm": 0.028279658406972885,
      "learning_rate": 2.928579429768785e-06,
      "loss": 0.0067,
      "step": 2166360
    },
    {
      "epoch": 3.545328384491009,
      "grad_norm": 0.18437160551548004,
      "learning_rate": 2.9285135375552677e-06,
      "loss": 0.0089,
      "step": 2166380
    },
    {
      "epoch": 3.5453611149296624,
      "grad_norm": 0.9276158809661865,
      "learning_rate": 2.9284476453417504e-06,
      "loss": 0.011,
      "step": 2166400
    },
    {
      "epoch": 3.5453938453683156,
      "grad_norm": 0.12303610891103745,
      "learning_rate": 2.928381753128233e-06,
      "loss": 0.0113,
      "step": 2166420
    },
    {
      "epoch": 3.545426575806969,
      "grad_norm": 0.4387280344963074,
      "learning_rate": 2.9283158609147163e-06,
      "loss": 0.0111,
      "step": 2166440
    },
    {
      "epoch": 3.5454593062456223,
      "grad_norm": 0.15286900103092194,
      "learning_rate": 2.928249968701199e-06,
      "loss": 0.0108,
      "step": 2166460
    },
    {
      "epoch": 3.5454920366842755,
      "grad_norm": 0.21936626732349396,
      "learning_rate": 2.9281840764876818e-06,
      "loss": 0.0164,
      "step": 2166480
    },
    {
      "epoch": 3.545524767122929,
      "grad_norm": 0.7225528955459595,
      "learning_rate": 2.9281181842741645e-06,
      "loss": 0.0107,
      "step": 2166500
    },
    {
      "epoch": 3.5455574975615822,
      "grad_norm": 0.28175583481788635,
      "learning_rate": 2.9280522920606473e-06,
      "loss": 0.0122,
      "step": 2166520
    },
    {
      "epoch": 3.545590228000236,
      "grad_norm": 0.1789928376674652,
      "learning_rate": 2.9279863998471304e-06,
      "loss": 0.0108,
      "step": 2166540
    },
    {
      "epoch": 3.545622958438889,
      "grad_norm": 0.30612680315971375,
      "learning_rate": 2.927920507633613e-06,
      "loss": 0.0145,
      "step": 2166560
    },
    {
      "epoch": 3.5456556888775426,
      "grad_norm": 0.22765521705150604,
      "learning_rate": 2.927854615420096e-06,
      "loss": 0.0093,
      "step": 2166580
    },
    {
      "epoch": 3.5456884193161957,
      "grad_norm": 1.0294568538665771,
      "learning_rate": 2.9277887232065786e-06,
      "loss": 0.0198,
      "step": 2166600
    },
    {
      "epoch": 3.545721149754849,
      "grad_norm": 0.0704861730337143,
      "learning_rate": 2.927722830993062e-06,
      "loss": 0.0126,
      "step": 2166620
    },
    {
      "epoch": 3.5457538801935025,
      "grad_norm": 0.15716926753520966,
      "learning_rate": 2.9276569387795445e-06,
      "loss": 0.0138,
      "step": 2166640
    },
    {
      "epoch": 3.5457866106321556,
      "grad_norm": 0.4363120496273041,
      "learning_rate": 2.9275910465660273e-06,
      "loss": 0.0116,
      "step": 2166660
    },
    {
      "epoch": 3.5458193410708088,
      "grad_norm": 0.09638677537441254,
      "learning_rate": 2.92752515435251e-06,
      "loss": 0.0082,
      "step": 2166680
    },
    {
      "epoch": 3.5458520715094624,
      "grad_norm": 0.16304293274879456,
      "learning_rate": 2.927459262138993e-06,
      "loss": 0.0124,
      "step": 2166700
    },
    {
      "epoch": 3.545884801948116,
      "grad_norm": 0.07595852762460709,
      "learning_rate": 2.9273933699254764e-06,
      "loss": 0.0147,
      "step": 2166720
    },
    {
      "epoch": 3.545917532386769,
      "grad_norm": 0.15006625652313232,
      "learning_rate": 2.927327477711959e-06,
      "loss": 0.0118,
      "step": 2166740
    },
    {
      "epoch": 3.5459502628254223,
      "grad_norm": 0.6021105051040649,
      "learning_rate": 2.9272615854984423e-06,
      "loss": 0.0195,
      "step": 2166760
    },
    {
      "epoch": 3.545982993264076,
      "grad_norm": 0.4076705574989319,
      "learning_rate": 2.927195693284925e-06,
      "loss": 0.011,
      "step": 2166780
    },
    {
      "epoch": 3.546015723702729,
      "grad_norm": 0.2215626984834671,
      "learning_rate": 2.9271298010714077e-06,
      "loss": 0.0106,
      "step": 2166800
    },
    {
      "epoch": 3.546048454141382,
      "grad_norm": 0.25594189763069153,
      "learning_rate": 2.9270639088578905e-06,
      "loss": 0.012,
      "step": 2166820
    },
    {
      "epoch": 3.5460811845800357,
      "grad_norm": 0.4955216348171234,
      "learning_rate": 2.9269980166443736e-06,
      "loss": 0.0065,
      "step": 2166840
    },
    {
      "epoch": 3.5461139150186893,
      "grad_norm": 0.23439744114875793,
      "learning_rate": 2.9269321244308564e-06,
      "loss": 0.0102,
      "step": 2166860
    },
    {
      "epoch": 3.5461466454573425,
      "grad_norm": 0.08370331674814224,
      "learning_rate": 2.926866232217339e-06,
      "loss": 0.0171,
      "step": 2166880
    },
    {
      "epoch": 3.5461793758959956,
      "grad_norm": 0.705919623374939,
      "learning_rate": 2.926800340003822e-06,
      "loss": 0.0105,
      "step": 2166900
    },
    {
      "epoch": 3.5462121063346492,
      "grad_norm": 0.12483645230531693,
      "learning_rate": 2.9267344477903046e-06,
      "loss": 0.0086,
      "step": 2166920
    },
    {
      "epoch": 3.5462448367733024,
      "grad_norm": 0.4060676097869873,
      "learning_rate": 2.9266685555767878e-06,
      "loss": 0.0122,
      "step": 2166940
    },
    {
      "epoch": 3.5462775672119555,
      "grad_norm": 0.15234433114528656,
      "learning_rate": 2.9266026633632705e-06,
      "loss": 0.0077,
      "step": 2166960
    },
    {
      "epoch": 3.546310297650609,
      "grad_norm": 0.7425764799118042,
      "learning_rate": 2.9265367711497532e-06,
      "loss": 0.0206,
      "step": 2166980
    },
    {
      "epoch": 3.5463430280892627,
      "grad_norm": 0.45180487632751465,
      "learning_rate": 2.926470878936236e-06,
      "loss": 0.0138,
      "step": 2167000
    },
    {
      "epoch": 3.546375758527916,
      "grad_norm": 0.13411828875541687,
      "learning_rate": 2.926404986722719e-06,
      "loss": 0.0109,
      "step": 2167020
    },
    {
      "epoch": 3.546408488966569,
      "grad_norm": 0.5432806015014648,
      "learning_rate": 2.926339094509202e-06,
      "loss": 0.015,
      "step": 2167040
    },
    {
      "epoch": 3.5464412194052226,
      "grad_norm": 0.17637011408805847,
      "learning_rate": 2.926273202295685e-06,
      "loss": 0.0115,
      "step": 2167060
    },
    {
      "epoch": 3.5464739498438758,
      "grad_norm": 0.3724742829799652,
      "learning_rate": 2.926207310082168e-06,
      "loss": 0.0126,
      "step": 2167080
    },
    {
      "epoch": 3.546506680282529,
      "grad_norm": 0.21865850687026978,
      "learning_rate": 2.926141417868651e-06,
      "loss": 0.0101,
      "step": 2167100
    },
    {
      "epoch": 3.5465394107211825,
      "grad_norm": 0.1541752964258194,
      "learning_rate": 2.9260755256551337e-06,
      "loss": 0.0074,
      "step": 2167120
    },
    {
      "epoch": 3.546572141159836,
      "grad_norm": 0.15866997838020325,
      "learning_rate": 2.9260096334416164e-06,
      "loss": 0.013,
      "step": 2167140
    },
    {
      "epoch": 3.5466048715984893,
      "grad_norm": 0.3381289839744568,
      "learning_rate": 2.9259437412280996e-06,
      "loss": 0.008,
      "step": 2167160
    },
    {
      "epoch": 3.5466376020371424,
      "grad_norm": 0.5530375838279724,
      "learning_rate": 2.9258778490145823e-06,
      "loss": 0.0096,
      "step": 2167180
    },
    {
      "epoch": 3.546670332475796,
      "grad_norm": 0.13273002207279205,
      "learning_rate": 2.925811956801065e-06,
      "loss": 0.0135,
      "step": 2167200
    },
    {
      "epoch": 3.546703062914449,
      "grad_norm": 0.11738216131925583,
      "learning_rate": 2.925746064587548e-06,
      "loss": 0.0134,
      "step": 2167220
    },
    {
      "epoch": 3.5467357933531023,
      "grad_norm": 0.2368669956922531,
      "learning_rate": 2.925680172374031e-06,
      "loss": 0.0171,
      "step": 2167240
    },
    {
      "epoch": 3.546768523791756,
      "grad_norm": 0.44311946630477905,
      "learning_rate": 2.9256142801605137e-06,
      "loss": 0.0103,
      "step": 2167260
    },
    {
      "epoch": 3.546801254230409,
      "grad_norm": 0.5903907418251038,
      "learning_rate": 2.9255483879469965e-06,
      "loss": 0.0122,
      "step": 2167280
    },
    {
      "epoch": 3.5468339846690626,
      "grad_norm": 0.21876294910907745,
      "learning_rate": 2.925482495733479e-06,
      "loss": 0.0134,
      "step": 2167300
    },
    {
      "epoch": 3.546866715107716,
      "grad_norm": 0.17139144241809845,
      "learning_rate": 2.9254166035199624e-06,
      "loss": 0.0126,
      "step": 2167320
    },
    {
      "epoch": 3.5468994455463694,
      "grad_norm": 0.281440407037735,
      "learning_rate": 2.925350711306445e-06,
      "loss": 0.0088,
      "step": 2167340
    },
    {
      "epoch": 3.5469321759850225,
      "grad_norm": 0.5652874112129211,
      "learning_rate": 2.925284819092928e-06,
      "loss": 0.0128,
      "step": 2167360
    },
    {
      "epoch": 3.5469649064236757,
      "grad_norm": 0.2296319454908371,
      "learning_rate": 2.9252189268794106e-06,
      "loss": 0.0085,
      "step": 2167380
    },
    {
      "epoch": 3.5469976368623293,
      "grad_norm": 0.15505915880203247,
      "learning_rate": 2.9251530346658933e-06,
      "loss": 0.0147,
      "step": 2167400
    },
    {
      "epoch": 3.5470303673009824,
      "grad_norm": 0.17110134661197662,
      "learning_rate": 2.925087142452377e-06,
      "loss": 0.0132,
      "step": 2167420
    },
    {
      "epoch": 3.547063097739636,
      "grad_norm": 0.45646995306015015,
      "learning_rate": 2.9250212502388596e-06,
      "loss": 0.015,
      "step": 2167440
    },
    {
      "epoch": 3.547095828178289,
      "grad_norm": 0.15728504955768585,
      "learning_rate": 2.9249553580253424e-06,
      "loss": 0.0113,
      "step": 2167460
    },
    {
      "epoch": 3.5471285586169428,
      "grad_norm": 0.13027340173721313,
      "learning_rate": 2.9248894658118255e-06,
      "loss": 0.0144,
      "step": 2167480
    },
    {
      "epoch": 3.547161289055596,
      "grad_norm": 0.13137947022914886,
      "learning_rate": 2.9248235735983083e-06,
      "loss": 0.0112,
      "step": 2167500
    },
    {
      "epoch": 3.547194019494249,
      "grad_norm": 0.12081517279148102,
      "learning_rate": 2.924757681384791e-06,
      "loss": 0.0087,
      "step": 2167520
    },
    {
      "epoch": 3.5472267499329027,
      "grad_norm": 0.3842902183532715,
      "learning_rate": 2.9246917891712738e-06,
      "loss": 0.0161,
      "step": 2167540
    },
    {
      "epoch": 3.547259480371556,
      "grad_norm": 0.38367342948913574,
      "learning_rate": 2.924625896957757e-06,
      "loss": 0.0082,
      "step": 2167560
    },
    {
      "epoch": 3.5472922108102094,
      "grad_norm": 0.3065774738788605,
      "learning_rate": 2.9245600047442397e-06,
      "loss": 0.0084,
      "step": 2167580
    },
    {
      "epoch": 3.5473249412488626,
      "grad_norm": 0.2558252215385437,
      "learning_rate": 2.9244941125307224e-06,
      "loss": 0.0107,
      "step": 2167600
    },
    {
      "epoch": 3.547357671687516,
      "grad_norm": 0.30644673109054565,
      "learning_rate": 2.924428220317205e-06,
      "loss": 0.0096,
      "step": 2167620
    },
    {
      "epoch": 3.5473904021261693,
      "grad_norm": 0.45113348960876465,
      "learning_rate": 2.9243623281036883e-06,
      "loss": 0.0104,
      "step": 2167640
    },
    {
      "epoch": 3.5474231325648224,
      "grad_norm": 0.21653683483600616,
      "learning_rate": 2.924296435890171e-06,
      "loss": 0.012,
      "step": 2167660
    },
    {
      "epoch": 3.547455863003476,
      "grad_norm": 0.38065922260284424,
      "learning_rate": 2.924230543676654e-06,
      "loss": 0.0081,
      "step": 2167680
    },
    {
      "epoch": 3.547488593442129,
      "grad_norm": 0.26115041971206665,
      "learning_rate": 2.9241646514631365e-06,
      "loss": 0.0101,
      "step": 2167700
    },
    {
      "epoch": 3.547521323880783,
      "grad_norm": 0.10091710090637207,
      "learning_rate": 2.9240987592496197e-06,
      "loss": 0.0091,
      "step": 2167720
    },
    {
      "epoch": 3.547554054319436,
      "grad_norm": 0.37779372930526733,
      "learning_rate": 2.9240328670361024e-06,
      "loss": 0.0098,
      "step": 2167740
    },
    {
      "epoch": 3.5475867847580895,
      "grad_norm": 0.2397935688495636,
      "learning_rate": 2.923966974822585e-06,
      "loss": 0.0126,
      "step": 2167760
    },
    {
      "epoch": 3.5476195151967427,
      "grad_norm": 0.31270936131477356,
      "learning_rate": 2.9239010826090688e-06,
      "loss": 0.0148,
      "step": 2167780
    },
    {
      "epoch": 3.547652245635396,
      "grad_norm": 0.22051937878131866,
      "learning_rate": 2.9238351903955515e-06,
      "loss": 0.0147,
      "step": 2167800
    },
    {
      "epoch": 3.5476849760740494,
      "grad_norm": 0.4957401752471924,
      "learning_rate": 2.9237692981820342e-06,
      "loss": 0.0161,
      "step": 2167820
    },
    {
      "epoch": 3.5477177065127026,
      "grad_norm": 0.5742661356925964,
      "learning_rate": 2.923703405968517e-06,
      "loss": 0.0103,
      "step": 2167840
    },
    {
      "epoch": 3.547750436951356,
      "grad_norm": 0.49223339557647705,
      "learning_rate": 2.923637513755e-06,
      "loss": 0.0122,
      "step": 2167860
    },
    {
      "epoch": 3.5477831673900093,
      "grad_norm": 0.318584680557251,
      "learning_rate": 2.923571621541483e-06,
      "loss": 0.0105,
      "step": 2167880
    },
    {
      "epoch": 3.547815897828663,
      "grad_norm": 0.10141763091087341,
      "learning_rate": 2.9235057293279656e-06,
      "loss": 0.0083,
      "step": 2167900
    },
    {
      "epoch": 3.547848628267316,
      "grad_norm": 0.13620753586292267,
      "learning_rate": 2.9234398371144484e-06,
      "loss": 0.0116,
      "step": 2167920
    },
    {
      "epoch": 3.547881358705969,
      "grad_norm": 0.3977425992488861,
      "learning_rate": 2.923373944900931e-06,
      "loss": 0.0091,
      "step": 2167940
    },
    {
      "epoch": 3.547914089144623,
      "grad_norm": 0.5719023942947388,
      "learning_rate": 2.9233080526874143e-06,
      "loss": 0.0076,
      "step": 2167960
    },
    {
      "epoch": 3.547946819583276,
      "grad_norm": 0.10783449560403824,
      "learning_rate": 2.923242160473897e-06,
      "loss": 0.0113,
      "step": 2167980
    },
    {
      "epoch": 3.5479795500219296,
      "grad_norm": 0.37508589029312134,
      "learning_rate": 2.9231762682603797e-06,
      "loss": 0.0154,
      "step": 2168000
    },
    {
      "epoch": 3.5480122804605827,
      "grad_norm": 0.41315874457359314,
      "learning_rate": 2.9231103760468625e-06,
      "loss": 0.0121,
      "step": 2168020
    },
    {
      "epoch": 3.5480450108992363,
      "grad_norm": 0.33757373690605164,
      "learning_rate": 2.9230444838333456e-06,
      "loss": 0.0113,
      "step": 2168040
    },
    {
      "epoch": 3.5480777413378894,
      "grad_norm": 0.1693694293498993,
      "learning_rate": 2.9229785916198284e-06,
      "loss": 0.0085,
      "step": 2168060
    },
    {
      "epoch": 3.5481104717765426,
      "grad_norm": 0.702232301235199,
      "learning_rate": 2.922912699406311e-06,
      "loss": 0.0154,
      "step": 2168080
    },
    {
      "epoch": 3.548143202215196,
      "grad_norm": 0.23943231999874115,
      "learning_rate": 2.922846807192794e-06,
      "loss": 0.0054,
      "step": 2168100
    },
    {
      "epoch": 3.5481759326538493,
      "grad_norm": 0.18583416938781738,
      "learning_rate": 2.9227809149792775e-06,
      "loss": 0.0102,
      "step": 2168120
    },
    {
      "epoch": 3.5482086630925025,
      "grad_norm": 0.13972687721252441,
      "learning_rate": 2.92271502276576e-06,
      "loss": 0.0092,
      "step": 2168140
    },
    {
      "epoch": 3.548241393531156,
      "grad_norm": 0.19045358896255493,
      "learning_rate": 2.922649130552243e-06,
      "loss": 0.0111,
      "step": 2168160
    },
    {
      "epoch": 3.5482741239698097,
      "grad_norm": 0.4105393886566162,
      "learning_rate": 2.922583238338726e-06,
      "loss": 0.0108,
      "step": 2168180
    },
    {
      "epoch": 3.548306854408463,
      "grad_norm": 0.20832772552967072,
      "learning_rate": 2.922517346125209e-06,
      "loss": 0.0114,
      "step": 2168200
    },
    {
      "epoch": 3.548339584847116,
      "grad_norm": 0.16047437489032745,
      "learning_rate": 2.9224514539116916e-06,
      "loss": 0.0093,
      "step": 2168220
    },
    {
      "epoch": 3.5483723152857696,
      "grad_norm": 0.11365126073360443,
      "learning_rate": 2.9223855616981743e-06,
      "loss": 0.011,
      "step": 2168240
    },
    {
      "epoch": 3.5484050457244227,
      "grad_norm": 0.46150097250938416,
      "learning_rate": 2.9223196694846575e-06,
      "loss": 0.0132,
      "step": 2168260
    },
    {
      "epoch": 3.548437776163076,
      "grad_norm": 0.3840852379798889,
      "learning_rate": 2.9222537772711402e-06,
      "loss": 0.0199,
      "step": 2168280
    },
    {
      "epoch": 3.5484705066017295,
      "grad_norm": 0.3825278580188751,
      "learning_rate": 2.922187885057623e-06,
      "loss": 0.0086,
      "step": 2168300
    },
    {
      "epoch": 3.548503237040383,
      "grad_norm": 0.34968313574790955,
      "learning_rate": 2.9221219928441057e-06,
      "loss": 0.0101,
      "step": 2168320
    },
    {
      "epoch": 3.548535967479036,
      "grad_norm": 0.15194763243198395,
      "learning_rate": 2.9220561006305884e-06,
      "loss": 0.0101,
      "step": 2168340
    },
    {
      "epoch": 3.5485686979176894,
      "grad_norm": 0.40582528710365295,
      "learning_rate": 2.9219902084170716e-06,
      "loss": 0.0108,
      "step": 2168360
    },
    {
      "epoch": 3.548601428356343,
      "grad_norm": 0.1406036615371704,
      "learning_rate": 2.9219243162035543e-06,
      "loss": 0.0097,
      "step": 2168380
    },
    {
      "epoch": 3.548634158794996,
      "grad_norm": 0.3341981768608093,
      "learning_rate": 2.921858423990037e-06,
      "loss": 0.0098,
      "step": 2168400
    },
    {
      "epoch": 3.5486668892336493,
      "grad_norm": 0.21176905930042267,
      "learning_rate": 2.92179253177652e-06,
      "loss": 0.0093,
      "step": 2168420
    },
    {
      "epoch": 3.548699619672303,
      "grad_norm": 0.2110222578048706,
      "learning_rate": 2.921726639563003e-06,
      "loss": 0.009,
      "step": 2168440
    },
    {
      "epoch": 3.5487323501109564,
      "grad_norm": 0.2355349361896515,
      "learning_rate": 2.9216607473494857e-06,
      "loss": 0.0108,
      "step": 2168460
    },
    {
      "epoch": 3.5487650805496096,
      "grad_norm": 0.1552491933107376,
      "learning_rate": 2.921594855135969e-06,
      "loss": 0.0098,
      "step": 2168480
    },
    {
      "epoch": 3.5487978109882627,
      "grad_norm": 0.1283205896615982,
      "learning_rate": 2.921528962922452e-06,
      "loss": 0.0073,
      "step": 2168500
    },
    {
      "epoch": 3.5488305414269163,
      "grad_norm": 0.24972493946552277,
      "learning_rate": 2.921463070708935e-06,
      "loss": 0.0093,
      "step": 2168520
    },
    {
      "epoch": 3.5488632718655695,
      "grad_norm": 0.21788950264453888,
      "learning_rate": 2.9213971784954175e-06,
      "loss": 0.0124,
      "step": 2168540
    },
    {
      "epoch": 3.5488960023042226,
      "grad_norm": 0.25305089354515076,
      "learning_rate": 2.9213312862819003e-06,
      "loss": 0.012,
      "step": 2168560
    },
    {
      "epoch": 3.5489287327428762,
      "grad_norm": 0.6615517139434814,
      "learning_rate": 2.9212653940683834e-06,
      "loss": 0.0135,
      "step": 2168580
    },
    {
      "epoch": 3.54896146318153,
      "grad_norm": 0.16147376596927643,
      "learning_rate": 2.921199501854866e-06,
      "loss": 0.0094,
      "step": 2168600
    },
    {
      "epoch": 3.548994193620183,
      "grad_norm": 0.40498900413513184,
      "learning_rate": 2.921133609641349e-06,
      "loss": 0.0109,
      "step": 2168620
    },
    {
      "epoch": 3.549026924058836,
      "grad_norm": 0.31818243861198425,
      "learning_rate": 2.9210677174278317e-06,
      "loss": 0.0099,
      "step": 2168640
    },
    {
      "epoch": 3.5490596544974897,
      "grad_norm": 0.17933297157287598,
      "learning_rate": 2.921001825214315e-06,
      "loss": 0.0099,
      "step": 2168660
    },
    {
      "epoch": 3.549092384936143,
      "grad_norm": 0.4292011559009552,
      "learning_rate": 2.9209359330007976e-06,
      "loss": 0.0139,
      "step": 2168680
    },
    {
      "epoch": 3.549125115374796,
      "grad_norm": 0.24892586469650269,
      "learning_rate": 2.9208700407872803e-06,
      "loss": 0.0139,
      "step": 2168700
    },
    {
      "epoch": 3.5491578458134496,
      "grad_norm": 0.5620760917663574,
      "learning_rate": 2.920804148573763e-06,
      "loss": 0.0087,
      "step": 2168720
    },
    {
      "epoch": 3.5491905762521028,
      "grad_norm": 0.1939472258090973,
      "learning_rate": 2.920738256360246e-06,
      "loss": 0.0115,
      "step": 2168740
    },
    {
      "epoch": 3.5492233066907564,
      "grad_norm": 0.8012385368347168,
      "learning_rate": 2.920672364146729e-06,
      "loss": 0.0144,
      "step": 2168760
    },
    {
      "epoch": 3.5492560371294095,
      "grad_norm": 0.17784899473190308,
      "learning_rate": 2.9206064719332117e-06,
      "loss": 0.009,
      "step": 2168780
    },
    {
      "epoch": 3.549288767568063,
      "grad_norm": 0.18927089869976044,
      "learning_rate": 2.9205405797196944e-06,
      "loss": 0.011,
      "step": 2168800
    },
    {
      "epoch": 3.5493214980067163,
      "grad_norm": 0.17643314599990845,
      "learning_rate": 2.920474687506178e-06,
      "loss": 0.0112,
      "step": 2168820
    },
    {
      "epoch": 3.5493542284453694,
      "grad_norm": 0.11773251742124557,
      "learning_rate": 2.9204087952926607e-06,
      "loss": 0.0084,
      "step": 2168840
    },
    {
      "epoch": 3.549386958884023,
      "grad_norm": 0.10432333499193192,
      "learning_rate": 2.9203429030791435e-06,
      "loss": 0.0096,
      "step": 2168860
    },
    {
      "epoch": 3.549419689322676,
      "grad_norm": 0.24330013990402222,
      "learning_rate": 2.9202770108656262e-06,
      "loss": 0.0165,
      "step": 2168880
    },
    {
      "epoch": 3.5494524197613297,
      "grad_norm": 0.07022571563720703,
      "learning_rate": 2.9202111186521094e-06,
      "loss": 0.014,
      "step": 2168900
    },
    {
      "epoch": 3.549485150199983,
      "grad_norm": 0.09512623399496078,
      "learning_rate": 2.920145226438592e-06,
      "loss": 0.0096,
      "step": 2168920
    },
    {
      "epoch": 3.5495178806386365,
      "grad_norm": 0.3068837523460388,
      "learning_rate": 2.920079334225075e-06,
      "loss": 0.0143,
      "step": 2168940
    },
    {
      "epoch": 3.5495506110772896,
      "grad_norm": 0.15604937076568604,
      "learning_rate": 2.9200134420115576e-06,
      "loss": 0.0181,
      "step": 2168960
    },
    {
      "epoch": 3.549583341515943,
      "grad_norm": 0.14312581717967987,
      "learning_rate": 2.9199475497980408e-06,
      "loss": 0.0076,
      "step": 2168980
    },
    {
      "epoch": 3.5496160719545964,
      "grad_norm": 0.17052343487739563,
      "learning_rate": 2.9198816575845235e-06,
      "loss": 0.0109,
      "step": 2169000
    },
    {
      "epoch": 3.5496488023932495,
      "grad_norm": 0.0987313985824585,
      "learning_rate": 2.9198157653710062e-06,
      "loss": 0.012,
      "step": 2169020
    },
    {
      "epoch": 3.549681532831903,
      "grad_norm": 0.15205460786819458,
      "learning_rate": 2.919749873157489e-06,
      "loss": 0.0121,
      "step": 2169040
    },
    {
      "epoch": 3.5497142632705563,
      "grad_norm": 0.7579324841499329,
      "learning_rate": 2.919683980943972e-06,
      "loss": 0.0152,
      "step": 2169060
    },
    {
      "epoch": 3.54974699370921,
      "grad_norm": 0.30749204754829407,
      "learning_rate": 2.919618088730455e-06,
      "loss": 0.0067,
      "step": 2169080
    },
    {
      "epoch": 3.549779724147863,
      "grad_norm": 0.6594597697257996,
      "learning_rate": 2.9195521965169376e-06,
      "loss": 0.0095,
      "step": 2169100
    },
    {
      "epoch": 3.549812454586516,
      "grad_norm": 0.3740364909172058,
      "learning_rate": 2.9194863043034204e-06,
      "loss": 0.0097,
      "step": 2169120
    },
    {
      "epoch": 3.5498451850251698,
      "grad_norm": 0.036168698221445084,
      "learning_rate": 2.9194204120899035e-06,
      "loss": 0.0117,
      "step": 2169140
    },
    {
      "epoch": 3.549877915463823,
      "grad_norm": 0.24818472564220428,
      "learning_rate": 2.9193545198763863e-06,
      "loss": 0.0123,
      "step": 2169160
    },
    {
      "epoch": 3.5499106459024765,
      "grad_norm": 0.3077806532382965,
      "learning_rate": 2.9192886276628694e-06,
      "loss": 0.0081,
      "step": 2169180
    },
    {
      "epoch": 3.5499433763411297,
      "grad_norm": 0.2333652377128601,
      "learning_rate": 2.9192227354493526e-06,
      "loss": 0.009,
      "step": 2169200
    },
    {
      "epoch": 3.5499761067797833,
      "grad_norm": 0.35590535402297974,
      "learning_rate": 2.9191568432358353e-06,
      "loss": 0.0085,
      "step": 2169220
    },
    {
      "epoch": 3.5500088372184364,
      "grad_norm": 0.584287703037262,
      "learning_rate": 2.919090951022318e-06,
      "loss": 0.011,
      "step": 2169240
    },
    {
      "epoch": 3.5500415676570896,
      "grad_norm": 0.22471579909324646,
      "learning_rate": 2.919025058808801e-06,
      "loss": 0.0078,
      "step": 2169260
    },
    {
      "epoch": 3.550074298095743,
      "grad_norm": 0.2003447562456131,
      "learning_rate": 2.918959166595284e-06,
      "loss": 0.0086,
      "step": 2169280
    },
    {
      "epoch": 3.5501070285343963,
      "grad_norm": 0.7430513501167297,
      "learning_rate": 2.9188932743817667e-06,
      "loss": 0.012,
      "step": 2169300
    },
    {
      "epoch": 3.55013975897305,
      "grad_norm": 0.14638744294643402,
      "learning_rate": 2.9188273821682495e-06,
      "loss": 0.0115,
      "step": 2169320
    },
    {
      "epoch": 3.550172489411703,
      "grad_norm": 0.3702232241630554,
      "learning_rate": 2.918761489954732e-06,
      "loss": 0.0113,
      "step": 2169340
    },
    {
      "epoch": 3.5502052198503566,
      "grad_norm": 0.27717313170433044,
      "learning_rate": 2.918695597741215e-06,
      "loss": 0.0125,
      "step": 2169360
    },
    {
      "epoch": 3.55023795028901,
      "grad_norm": 0.12949033081531525,
      "learning_rate": 2.918629705527698e-06,
      "loss": 0.0118,
      "step": 2169380
    },
    {
      "epoch": 3.550270680727663,
      "grad_norm": 1.948595643043518,
      "learning_rate": 2.918563813314181e-06,
      "loss": 0.0122,
      "step": 2169400
    },
    {
      "epoch": 3.5503034111663165,
      "grad_norm": 0.05142010375857353,
      "learning_rate": 2.9184979211006636e-06,
      "loss": 0.0073,
      "step": 2169420
    },
    {
      "epoch": 3.5503361416049697,
      "grad_norm": 0.22325867414474487,
      "learning_rate": 2.9184320288871463e-06,
      "loss": 0.01,
      "step": 2169440
    },
    {
      "epoch": 3.5503688720436233,
      "grad_norm": 0.28756147623062134,
      "learning_rate": 2.9183661366736295e-06,
      "loss": 0.0104,
      "step": 2169460
    },
    {
      "epoch": 3.5504016024822764,
      "grad_norm": 1.0320112705230713,
      "learning_rate": 2.9183002444601122e-06,
      "loss": 0.0135,
      "step": 2169480
    },
    {
      "epoch": 3.55043433292093,
      "grad_norm": 0.3393658697605133,
      "learning_rate": 2.918234352246595e-06,
      "loss": 0.013,
      "step": 2169500
    },
    {
      "epoch": 3.550467063359583,
      "grad_norm": 0.5196089744567871,
      "learning_rate": 2.9181684600330777e-06,
      "loss": 0.0095,
      "step": 2169520
    },
    {
      "epoch": 3.5504997937982363,
      "grad_norm": 0.14503195881843567,
      "learning_rate": 2.9181025678195613e-06,
      "loss": 0.016,
      "step": 2169540
    },
    {
      "epoch": 3.55053252423689,
      "grad_norm": 0.4050235450267792,
      "learning_rate": 2.918036675606044e-06,
      "loss": 0.0161,
      "step": 2169560
    },
    {
      "epoch": 3.550565254675543,
      "grad_norm": 0.4231606125831604,
      "learning_rate": 2.9179707833925268e-06,
      "loss": 0.0187,
      "step": 2169580
    },
    {
      "epoch": 3.5505979851141967,
      "grad_norm": 0.2392316460609436,
      "learning_rate": 2.91790489117901e-06,
      "loss": 0.0112,
      "step": 2169600
    },
    {
      "epoch": 3.55063071555285,
      "grad_norm": 0.18120631575584412,
      "learning_rate": 2.9178389989654927e-06,
      "loss": 0.0084,
      "step": 2169620
    },
    {
      "epoch": 3.5506634459915034,
      "grad_norm": 0.5567929744720459,
      "learning_rate": 2.9177731067519754e-06,
      "loss": 0.0122,
      "step": 2169640
    },
    {
      "epoch": 3.5506961764301566,
      "grad_norm": 0.17228351533412933,
      "learning_rate": 2.917707214538458e-06,
      "loss": 0.0099,
      "step": 2169660
    },
    {
      "epoch": 3.5507289068688097,
      "grad_norm": 0.3767213523387909,
      "learning_rate": 2.9176413223249413e-06,
      "loss": 0.0106,
      "step": 2169680
    },
    {
      "epoch": 3.5507616373074633,
      "grad_norm": 0.5161726474761963,
      "learning_rate": 2.917575430111424e-06,
      "loss": 0.0126,
      "step": 2169700
    },
    {
      "epoch": 3.5507943677461165,
      "grad_norm": 0.12922246754169464,
      "learning_rate": 2.917509537897907e-06,
      "loss": 0.0087,
      "step": 2169720
    },
    {
      "epoch": 3.5508270981847696,
      "grad_norm": 0.08207976818084717,
      "learning_rate": 2.9174436456843895e-06,
      "loss": 0.013,
      "step": 2169740
    },
    {
      "epoch": 3.550859828623423,
      "grad_norm": 0.2177235633134842,
      "learning_rate": 2.9173777534708723e-06,
      "loss": 0.0117,
      "step": 2169760
    },
    {
      "epoch": 3.550892559062077,
      "grad_norm": 0.2271849513053894,
      "learning_rate": 2.9173118612573554e-06,
      "loss": 0.0132,
      "step": 2169780
    },
    {
      "epoch": 3.55092528950073,
      "grad_norm": 0.3016884922981262,
      "learning_rate": 2.917245969043838e-06,
      "loss": 0.0105,
      "step": 2169800
    },
    {
      "epoch": 3.550958019939383,
      "grad_norm": 0.2267901748418808,
      "learning_rate": 2.917180076830321e-06,
      "loss": 0.0116,
      "step": 2169820
    },
    {
      "epoch": 3.5509907503780367,
      "grad_norm": 0.7023197412490845,
      "learning_rate": 2.9171141846168037e-06,
      "loss": 0.0099,
      "step": 2169840
    },
    {
      "epoch": 3.55102348081669,
      "grad_norm": 0.28354647755622864,
      "learning_rate": 2.917048292403287e-06,
      "loss": 0.0087,
      "step": 2169860
    },
    {
      "epoch": 3.551056211255343,
      "grad_norm": 0.281257301568985,
      "learning_rate": 2.91698240018977e-06,
      "loss": 0.0105,
      "step": 2169880
    },
    {
      "epoch": 3.5510889416939966,
      "grad_norm": 0.39287954568862915,
      "learning_rate": 2.9169165079762527e-06,
      "loss": 0.0101,
      "step": 2169900
    },
    {
      "epoch": 3.55112167213265,
      "grad_norm": 0.5946477651596069,
      "learning_rate": 2.916850615762736e-06,
      "loss": 0.0154,
      "step": 2169920
    },
    {
      "epoch": 3.5511544025713033,
      "grad_norm": 0.1224508211016655,
      "learning_rate": 2.9167847235492186e-06,
      "loss": 0.015,
      "step": 2169940
    },
    {
      "epoch": 3.5511871330099565,
      "grad_norm": 0.35138869285583496,
      "learning_rate": 2.9167188313357014e-06,
      "loss": 0.0112,
      "step": 2169960
    },
    {
      "epoch": 3.55121986344861,
      "grad_norm": 0.4154318571090698,
      "learning_rate": 2.916652939122184e-06,
      "loss": 0.009,
      "step": 2169980
    },
    {
      "epoch": 3.551252593887263,
      "grad_norm": 0.3038560450077057,
      "learning_rate": 2.9165870469086673e-06,
      "loss": 0.0102,
      "step": 2170000
    },
    {
      "epoch": 3.5512853243259164,
      "grad_norm": 0.3150276839733124,
      "learning_rate": 2.91652115469515e-06,
      "loss": 0.0091,
      "step": 2170020
    },
    {
      "epoch": 3.55131805476457,
      "grad_norm": 0.34514909982681274,
      "learning_rate": 2.9164552624816328e-06,
      "loss": 0.0101,
      "step": 2170040
    },
    {
      "epoch": 3.5513507852032236,
      "grad_norm": 0.04756103828549385,
      "learning_rate": 2.9163893702681155e-06,
      "loss": 0.0098,
      "step": 2170060
    },
    {
      "epoch": 3.5513835156418767,
      "grad_norm": 0.12950775027275085,
      "learning_rate": 2.9163234780545987e-06,
      "loss": 0.0131,
      "step": 2170080
    },
    {
      "epoch": 3.55141624608053,
      "grad_norm": 0.11566203087568283,
      "learning_rate": 2.9162575858410814e-06,
      "loss": 0.0116,
      "step": 2170100
    },
    {
      "epoch": 3.5514489765191835,
      "grad_norm": 0.34204885363578796,
      "learning_rate": 2.916191693627564e-06,
      "loss": 0.0168,
      "step": 2170120
    },
    {
      "epoch": 3.5514817069578366,
      "grad_norm": 0.1511152684688568,
      "learning_rate": 2.916125801414047e-06,
      "loss": 0.0096,
      "step": 2170140
    },
    {
      "epoch": 3.5515144373964898,
      "grad_norm": 0.2518673241138458,
      "learning_rate": 2.91605990920053e-06,
      "loss": 0.0105,
      "step": 2170160
    },
    {
      "epoch": 3.5515471678351433,
      "grad_norm": 0.23720282316207886,
      "learning_rate": 2.9159940169870128e-06,
      "loss": 0.0107,
      "step": 2170180
    },
    {
      "epoch": 3.551579898273797,
      "grad_norm": 1.3609148263931274,
      "learning_rate": 2.9159281247734955e-06,
      "loss": 0.0108,
      "step": 2170200
    },
    {
      "epoch": 3.55161262871245,
      "grad_norm": 0.4287129342556,
      "learning_rate": 2.9158622325599783e-06,
      "loss": 0.0085,
      "step": 2170220
    },
    {
      "epoch": 3.5516453591511032,
      "grad_norm": 0.7804372906684875,
      "learning_rate": 2.915796340346462e-06,
      "loss": 0.0106,
      "step": 2170240
    },
    {
      "epoch": 3.551678089589757,
      "grad_norm": 0.40295735001564026,
      "learning_rate": 2.9157304481329446e-06,
      "loss": 0.0075,
      "step": 2170260
    },
    {
      "epoch": 3.55171082002841,
      "grad_norm": 0.5042359828948975,
      "learning_rate": 2.9156645559194273e-06,
      "loss": 0.0102,
      "step": 2170280
    },
    {
      "epoch": 3.551743550467063,
      "grad_norm": 0.42887264490127563,
      "learning_rate": 2.91559866370591e-06,
      "loss": 0.0133,
      "step": 2170300
    },
    {
      "epoch": 3.5517762809057167,
      "grad_norm": 0.26040157675743103,
      "learning_rate": 2.9155327714923932e-06,
      "loss": 0.0077,
      "step": 2170320
    },
    {
      "epoch": 3.55180901134437,
      "grad_norm": 0.2177131325006485,
      "learning_rate": 2.915466879278876e-06,
      "loss": 0.0132,
      "step": 2170340
    },
    {
      "epoch": 3.5518417417830235,
      "grad_norm": 0.36821144819259644,
      "learning_rate": 2.9154009870653587e-06,
      "loss": 0.0137,
      "step": 2170360
    },
    {
      "epoch": 3.5518744722216766,
      "grad_norm": 0.1864616572856903,
      "learning_rate": 2.9153350948518414e-06,
      "loss": 0.012,
      "step": 2170380
    },
    {
      "epoch": 3.55190720266033,
      "grad_norm": 0.1997241973876953,
      "learning_rate": 2.9152692026383246e-06,
      "loss": 0.0101,
      "step": 2170400
    },
    {
      "epoch": 3.5519399330989834,
      "grad_norm": 0.4565906524658203,
      "learning_rate": 2.9152033104248073e-06,
      "loss": 0.0119,
      "step": 2170420
    },
    {
      "epoch": 3.5519726635376365,
      "grad_norm": 0.13213880360126495,
      "learning_rate": 2.91513741821129e-06,
      "loss": 0.0091,
      "step": 2170440
    },
    {
      "epoch": 3.55200539397629,
      "grad_norm": 0.44775527715682983,
      "learning_rate": 2.915071525997773e-06,
      "loss": 0.0154,
      "step": 2170460
    },
    {
      "epoch": 3.5520381244149433,
      "grad_norm": 0.3765280842781067,
      "learning_rate": 2.915005633784256e-06,
      "loss": 0.0082,
      "step": 2170480
    },
    {
      "epoch": 3.552070854853597,
      "grad_norm": 0.5563423037528992,
      "learning_rate": 2.9149397415707387e-06,
      "loss": 0.0159,
      "step": 2170500
    },
    {
      "epoch": 3.55210358529225,
      "grad_norm": 0.15976551175117493,
      "learning_rate": 2.9148738493572215e-06,
      "loss": 0.015,
      "step": 2170520
    },
    {
      "epoch": 3.5521363157309036,
      "grad_norm": 0.32965967059135437,
      "learning_rate": 2.914807957143704e-06,
      "loss": 0.0119,
      "step": 2170540
    },
    {
      "epoch": 3.5521690461695568,
      "grad_norm": 0.12681396305561066,
      "learning_rate": 2.9147420649301874e-06,
      "loss": 0.0088,
      "step": 2170560
    },
    {
      "epoch": 3.55220177660821,
      "grad_norm": 0.4984588623046875,
      "learning_rate": 2.9146761727166705e-06,
      "loss": 0.0119,
      "step": 2170580
    },
    {
      "epoch": 3.5522345070468635,
      "grad_norm": 0.16022486984729767,
      "learning_rate": 2.9146102805031533e-06,
      "loss": 0.0126,
      "step": 2170600
    },
    {
      "epoch": 3.5522672374855166,
      "grad_norm": 0.2835659682750702,
      "learning_rate": 2.9145443882896364e-06,
      "loss": 0.0132,
      "step": 2170620
    },
    {
      "epoch": 3.5522999679241702,
      "grad_norm": 0.2711658179759979,
      "learning_rate": 2.914478496076119e-06,
      "loss": 0.0082,
      "step": 2170640
    },
    {
      "epoch": 3.5523326983628234,
      "grad_norm": 0.19967278838157654,
      "learning_rate": 2.914412603862602e-06,
      "loss": 0.0116,
      "step": 2170660
    },
    {
      "epoch": 3.552365428801477,
      "grad_norm": 0.35218629240989685,
      "learning_rate": 2.9143467116490847e-06,
      "loss": 0.0172,
      "step": 2170680
    },
    {
      "epoch": 3.55239815924013,
      "grad_norm": 0.3709941804409027,
      "learning_rate": 2.914280819435568e-06,
      "loss": 0.0164,
      "step": 2170700
    },
    {
      "epoch": 3.5524308896787833,
      "grad_norm": 0.3229362666606903,
      "learning_rate": 2.9142149272220506e-06,
      "loss": 0.0102,
      "step": 2170720
    },
    {
      "epoch": 3.552463620117437,
      "grad_norm": 0.11197617650032043,
      "learning_rate": 2.9141490350085333e-06,
      "loss": 0.0123,
      "step": 2170740
    },
    {
      "epoch": 3.55249635055609,
      "grad_norm": 0.4822172522544861,
      "learning_rate": 2.914083142795016e-06,
      "loss": 0.0178,
      "step": 2170760
    },
    {
      "epoch": 3.5525290809947436,
      "grad_norm": 0.3398274779319763,
      "learning_rate": 2.9140172505814988e-06,
      "loss": 0.0083,
      "step": 2170780
    },
    {
      "epoch": 3.5525618114333968,
      "grad_norm": 0.11096614599227905,
      "learning_rate": 2.913951358367982e-06,
      "loss": 0.0122,
      "step": 2170800
    },
    {
      "epoch": 3.5525945418720504,
      "grad_norm": 0.23486794531345367,
      "learning_rate": 2.9138854661544647e-06,
      "loss": 0.0076,
      "step": 2170820
    },
    {
      "epoch": 3.5526272723107035,
      "grad_norm": 0.19816260039806366,
      "learning_rate": 2.9138195739409474e-06,
      "loss": 0.0103,
      "step": 2170840
    },
    {
      "epoch": 3.5526600027493567,
      "grad_norm": 0.16770388185977936,
      "learning_rate": 2.91375368172743e-06,
      "loss": 0.0074,
      "step": 2170860
    },
    {
      "epoch": 3.5526927331880103,
      "grad_norm": 0.10573416948318481,
      "learning_rate": 2.9136877895139133e-06,
      "loss": 0.0086,
      "step": 2170880
    },
    {
      "epoch": 3.5527254636266634,
      "grad_norm": 0.41410356760025024,
      "learning_rate": 2.913621897300396e-06,
      "loss": 0.0125,
      "step": 2170900
    },
    {
      "epoch": 3.552758194065317,
      "grad_norm": 0.3263983130455017,
      "learning_rate": 2.913556005086879e-06,
      "loss": 0.0129,
      "step": 2170920
    },
    {
      "epoch": 3.55279092450397,
      "grad_norm": 0.39765551686286926,
      "learning_rate": 2.9134901128733624e-06,
      "loss": 0.0079,
      "step": 2170940
    },
    {
      "epoch": 3.5528236549426238,
      "grad_norm": 0.3399094045162201,
      "learning_rate": 2.913424220659845e-06,
      "loss": 0.0152,
      "step": 2170960
    },
    {
      "epoch": 3.552856385381277,
      "grad_norm": 0.12540064752101898,
      "learning_rate": 2.913358328446328e-06,
      "loss": 0.0119,
      "step": 2170980
    },
    {
      "epoch": 3.55288911581993,
      "grad_norm": 0.41623565554618835,
      "learning_rate": 2.9132924362328106e-06,
      "loss": 0.0075,
      "step": 2171000
    },
    {
      "epoch": 3.5529218462585836,
      "grad_norm": 0.4602425992488861,
      "learning_rate": 2.9132265440192938e-06,
      "loss": 0.0163,
      "step": 2171020
    },
    {
      "epoch": 3.552954576697237,
      "grad_norm": 0.5677846074104309,
      "learning_rate": 2.9131606518057765e-06,
      "loss": 0.0112,
      "step": 2171040
    },
    {
      "epoch": 3.5529873071358904,
      "grad_norm": 0.19177354872226715,
      "learning_rate": 2.9130947595922593e-06,
      "loss": 0.0104,
      "step": 2171060
    },
    {
      "epoch": 3.5530200375745435,
      "grad_norm": 0.37746384739875793,
      "learning_rate": 2.913028867378742e-06,
      "loss": 0.0134,
      "step": 2171080
    },
    {
      "epoch": 3.553052768013197,
      "grad_norm": 0.259602814912796,
      "learning_rate": 2.912962975165225e-06,
      "loss": 0.0092,
      "step": 2171100
    },
    {
      "epoch": 3.5530854984518503,
      "grad_norm": 0.25304874777793884,
      "learning_rate": 2.912897082951708e-06,
      "loss": 0.0098,
      "step": 2171120
    },
    {
      "epoch": 3.5531182288905034,
      "grad_norm": 0.6247414946556091,
      "learning_rate": 2.9128311907381906e-06,
      "loss": 0.0121,
      "step": 2171140
    },
    {
      "epoch": 3.553150959329157,
      "grad_norm": 0.6245851516723633,
      "learning_rate": 2.9127652985246734e-06,
      "loss": 0.0139,
      "step": 2171160
    },
    {
      "epoch": 3.55318368976781,
      "grad_norm": 0.1352284699678421,
      "learning_rate": 2.9126994063111565e-06,
      "loss": 0.0077,
      "step": 2171180
    },
    {
      "epoch": 3.5532164202064633,
      "grad_norm": 0.11262243986129761,
      "learning_rate": 2.9126335140976393e-06,
      "loss": 0.0113,
      "step": 2171200
    },
    {
      "epoch": 3.553249150645117,
      "grad_norm": 0.433397114276886,
      "learning_rate": 2.912567621884122e-06,
      "loss": 0.0169,
      "step": 2171220
    },
    {
      "epoch": 3.5532818810837705,
      "grad_norm": 0.21011009812355042,
      "learning_rate": 2.9125017296706048e-06,
      "loss": 0.0111,
      "step": 2171240
    },
    {
      "epoch": 3.5533146115224237,
      "grad_norm": 0.33863332867622375,
      "learning_rate": 2.9124358374570875e-06,
      "loss": 0.0162,
      "step": 2171260
    },
    {
      "epoch": 3.553347341961077,
      "grad_norm": 0.319692999124527,
      "learning_rate": 2.9123699452435707e-06,
      "loss": 0.0093,
      "step": 2171280
    },
    {
      "epoch": 3.5533800723997304,
      "grad_norm": 0.37819889187812805,
      "learning_rate": 2.912304053030054e-06,
      "loss": 0.0087,
      "step": 2171300
    },
    {
      "epoch": 3.5534128028383836,
      "grad_norm": 0.08585721254348755,
      "learning_rate": 2.9122381608165366e-06,
      "loss": 0.0146,
      "step": 2171320
    },
    {
      "epoch": 3.5534455332770367,
      "grad_norm": 0.21044401824474335,
      "learning_rate": 2.9121722686030197e-06,
      "loss": 0.0075,
      "step": 2171340
    },
    {
      "epoch": 3.5534782637156903,
      "grad_norm": 0.37265047430992126,
      "learning_rate": 2.9121063763895025e-06,
      "loss": 0.0139,
      "step": 2171360
    },
    {
      "epoch": 3.553510994154344,
      "grad_norm": 0.2646540105342865,
      "learning_rate": 2.912040484175985e-06,
      "loss": 0.0077,
      "step": 2171380
    },
    {
      "epoch": 3.553543724592997,
      "grad_norm": 0.13655628263950348,
      "learning_rate": 2.911974591962468e-06,
      "loss": 0.0137,
      "step": 2171400
    },
    {
      "epoch": 3.55357645503165,
      "grad_norm": 0.23781216144561768,
      "learning_rate": 2.911908699748951e-06,
      "loss": 0.0087,
      "step": 2171420
    },
    {
      "epoch": 3.553609185470304,
      "grad_norm": 0.18922914564609528,
      "learning_rate": 2.911842807535434e-06,
      "loss": 0.011,
      "step": 2171440
    },
    {
      "epoch": 3.553641915908957,
      "grad_norm": 0.6635959148406982,
      "learning_rate": 2.9117769153219166e-06,
      "loss": 0.0127,
      "step": 2171460
    },
    {
      "epoch": 3.55367464634761,
      "grad_norm": 0.275696337223053,
      "learning_rate": 2.9117110231083993e-06,
      "loss": 0.0112,
      "step": 2171480
    },
    {
      "epoch": 3.5537073767862637,
      "grad_norm": 0.35681918263435364,
      "learning_rate": 2.9116451308948825e-06,
      "loss": 0.0108,
      "step": 2171500
    },
    {
      "epoch": 3.5537401072249173,
      "grad_norm": 0.2567119002342224,
      "learning_rate": 2.9115792386813652e-06,
      "loss": 0.0085,
      "step": 2171520
    },
    {
      "epoch": 3.5537728376635704,
      "grad_norm": 0.14097976684570312,
      "learning_rate": 2.911513346467848e-06,
      "loss": 0.0117,
      "step": 2171540
    },
    {
      "epoch": 3.5538055681022236,
      "grad_norm": 0.633918821811676,
      "learning_rate": 2.9114474542543307e-06,
      "loss": 0.012,
      "step": 2171560
    },
    {
      "epoch": 3.553838298540877,
      "grad_norm": 0.22926181554794312,
      "learning_rate": 2.911381562040814e-06,
      "loss": 0.0092,
      "step": 2171580
    },
    {
      "epoch": 3.5538710289795303,
      "grad_norm": 0.34670332074165344,
      "learning_rate": 2.9113156698272966e-06,
      "loss": 0.0091,
      "step": 2171600
    },
    {
      "epoch": 3.5539037594181835,
      "grad_norm": 0.2820059359073639,
      "learning_rate": 2.9112497776137794e-06,
      "loss": 0.0113,
      "step": 2171620
    },
    {
      "epoch": 3.553936489856837,
      "grad_norm": 0.18141847848892212,
      "learning_rate": 2.911183885400263e-06,
      "loss": 0.0073,
      "step": 2171640
    },
    {
      "epoch": 3.5539692202954907,
      "grad_norm": 0.44296279549598694,
      "learning_rate": 2.9111179931867457e-06,
      "loss": 0.0078,
      "step": 2171660
    },
    {
      "epoch": 3.554001950734144,
      "grad_norm": 0.5514010190963745,
      "learning_rate": 2.9110521009732284e-06,
      "loss": 0.0093,
      "step": 2171680
    },
    {
      "epoch": 3.554034681172797,
      "grad_norm": 1.1216962337493896,
      "learning_rate": 2.910986208759711e-06,
      "loss": 0.0119,
      "step": 2171700
    },
    {
      "epoch": 3.5540674116114506,
      "grad_norm": 0.13164818286895752,
      "learning_rate": 2.9109203165461943e-06,
      "loss": 0.0092,
      "step": 2171720
    },
    {
      "epoch": 3.5541001420501037,
      "grad_norm": 0.42287373542785645,
      "learning_rate": 2.910854424332677e-06,
      "loss": 0.0125,
      "step": 2171740
    },
    {
      "epoch": 3.554132872488757,
      "grad_norm": 0.1729590892791748,
      "learning_rate": 2.91078853211916e-06,
      "loss": 0.0077,
      "step": 2171760
    },
    {
      "epoch": 3.5541656029274105,
      "grad_norm": 0.17344194650650024,
      "learning_rate": 2.9107226399056425e-06,
      "loss": 0.0107,
      "step": 2171780
    },
    {
      "epoch": 3.5541983333660636,
      "grad_norm": 0.13888539373874664,
      "learning_rate": 2.9106567476921253e-06,
      "loss": 0.0107,
      "step": 2171800
    },
    {
      "epoch": 3.554231063804717,
      "grad_norm": 0.18463319540023804,
      "learning_rate": 2.9105908554786084e-06,
      "loss": 0.0135,
      "step": 2171820
    },
    {
      "epoch": 3.5542637942433704,
      "grad_norm": 0.1608133763074875,
      "learning_rate": 2.910524963265091e-06,
      "loss": 0.0097,
      "step": 2171840
    },
    {
      "epoch": 3.554296524682024,
      "grad_norm": 0.300884872674942,
      "learning_rate": 2.910459071051574e-06,
      "loss": 0.0109,
      "step": 2171860
    },
    {
      "epoch": 3.554329255120677,
      "grad_norm": 0.27556324005126953,
      "learning_rate": 2.9103931788380567e-06,
      "loss": 0.0095,
      "step": 2171880
    },
    {
      "epoch": 3.5543619855593303,
      "grad_norm": 0.1704862266778946,
      "learning_rate": 2.91032728662454e-06,
      "loss": 0.0092,
      "step": 2171900
    },
    {
      "epoch": 3.554394715997984,
      "grad_norm": 0.14547504484653473,
      "learning_rate": 2.9102613944110226e-06,
      "loss": 0.0095,
      "step": 2171920
    },
    {
      "epoch": 3.554427446436637,
      "grad_norm": 0.2829640507698059,
      "learning_rate": 2.9101955021975053e-06,
      "loss": 0.0072,
      "step": 2171940
    },
    {
      "epoch": 3.5544601768752906,
      "grad_norm": 0.23005063831806183,
      "learning_rate": 2.910129609983988e-06,
      "loss": 0.0121,
      "step": 2171960
    },
    {
      "epoch": 3.5544929073139437,
      "grad_norm": 0.40315330028533936,
      "learning_rate": 2.910063717770471e-06,
      "loss": 0.0132,
      "step": 2171980
    },
    {
      "epoch": 3.5545256377525973,
      "grad_norm": 0.47688910365104675,
      "learning_rate": 2.9099978255569544e-06,
      "loss": 0.0086,
      "step": 2172000
    },
    {
      "epoch": 3.5545583681912505,
      "grad_norm": 0.18681003153324127,
      "learning_rate": 2.909931933343437e-06,
      "loss": 0.0108,
      "step": 2172020
    },
    {
      "epoch": 3.5545910986299036,
      "grad_norm": 0.4764024019241333,
      "learning_rate": 2.9098660411299203e-06,
      "loss": 0.0152,
      "step": 2172040
    },
    {
      "epoch": 3.5546238290685572,
      "grad_norm": 0.4763345718383789,
      "learning_rate": 2.909800148916403e-06,
      "loss": 0.0154,
      "step": 2172060
    },
    {
      "epoch": 3.5546565595072104,
      "grad_norm": 0.17224794626235962,
      "learning_rate": 2.9097342567028858e-06,
      "loss": 0.0144,
      "step": 2172080
    },
    {
      "epoch": 3.554689289945864,
      "grad_norm": 0.13537122309207916,
      "learning_rate": 2.9096683644893685e-06,
      "loss": 0.0172,
      "step": 2172100
    },
    {
      "epoch": 3.554722020384517,
      "grad_norm": 0.3505949079990387,
      "learning_rate": 2.9096024722758517e-06,
      "loss": 0.0172,
      "step": 2172120
    },
    {
      "epoch": 3.5547547508231707,
      "grad_norm": 0.2296234369277954,
      "learning_rate": 2.9095365800623344e-06,
      "loss": 0.0112,
      "step": 2172140
    },
    {
      "epoch": 3.554787481261824,
      "grad_norm": 0.2565985918045044,
      "learning_rate": 2.909470687848817e-06,
      "loss": 0.0149,
      "step": 2172160
    },
    {
      "epoch": 3.554820211700477,
      "grad_norm": 0.20548532903194427,
      "learning_rate": 2.9094047956353e-06,
      "loss": 0.0121,
      "step": 2172180
    },
    {
      "epoch": 3.5548529421391306,
      "grad_norm": 0.2963983714580536,
      "learning_rate": 2.9093389034217826e-06,
      "loss": 0.0144,
      "step": 2172200
    },
    {
      "epoch": 3.5548856725777838,
      "grad_norm": 0.17539666593074799,
      "learning_rate": 2.9092730112082658e-06,
      "loss": 0.0068,
      "step": 2172220
    },
    {
      "epoch": 3.5549184030164374,
      "grad_norm": 0.1632155179977417,
      "learning_rate": 2.9092071189947485e-06,
      "loss": 0.0134,
      "step": 2172240
    },
    {
      "epoch": 3.5549511334550905,
      "grad_norm": 0.25756654143333435,
      "learning_rate": 2.9091412267812313e-06,
      "loss": 0.009,
      "step": 2172260
    },
    {
      "epoch": 3.554983863893744,
      "grad_norm": 0.48504337668418884,
      "learning_rate": 2.909075334567714e-06,
      "loss": 0.0107,
      "step": 2172280
    },
    {
      "epoch": 3.5550165943323973,
      "grad_norm": 0.4003485441207886,
      "learning_rate": 2.909009442354197e-06,
      "loss": 0.0097,
      "step": 2172300
    },
    {
      "epoch": 3.5550493247710504,
      "grad_norm": 0.18065325915813446,
      "learning_rate": 2.90894355014068e-06,
      "loss": 0.0119,
      "step": 2172320
    },
    {
      "epoch": 3.555082055209704,
      "grad_norm": 0.19353961944580078,
      "learning_rate": 2.908877657927163e-06,
      "loss": 0.0124,
      "step": 2172340
    },
    {
      "epoch": 3.555114785648357,
      "grad_norm": 0.20133820176124573,
      "learning_rate": 2.9088117657136462e-06,
      "loss": 0.0117,
      "step": 2172360
    },
    {
      "epoch": 3.5551475160870107,
      "grad_norm": 0.3244647681713104,
      "learning_rate": 2.908745873500129e-06,
      "loss": 0.0107,
      "step": 2172380
    },
    {
      "epoch": 3.555180246525664,
      "grad_norm": 0.5999850034713745,
      "learning_rate": 2.9086799812866117e-06,
      "loss": 0.0119,
      "step": 2172400
    },
    {
      "epoch": 3.5552129769643175,
      "grad_norm": 0.1193917989730835,
      "learning_rate": 2.9086140890730945e-06,
      "loss": 0.0129,
      "step": 2172420
    },
    {
      "epoch": 3.5552457074029706,
      "grad_norm": 0.2682912051677704,
      "learning_rate": 2.9085481968595776e-06,
      "loss": 0.01,
      "step": 2172440
    },
    {
      "epoch": 3.555278437841624,
      "grad_norm": 0.13281656801700592,
      "learning_rate": 2.9084823046460604e-06,
      "loss": 0.0108,
      "step": 2172460
    },
    {
      "epoch": 3.5553111682802774,
      "grad_norm": 0.3034469485282898,
      "learning_rate": 2.908416412432543e-06,
      "loss": 0.0089,
      "step": 2172480
    },
    {
      "epoch": 3.5553438987189305,
      "grad_norm": 0.3201761841773987,
      "learning_rate": 2.908350520219026e-06,
      "loss": 0.0114,
      "step": 2172500
    },
    {
      "epoch": 3.555376629157584,
      "grad_norm": 0.619586706161499,
      "learning_rate": 2.908284628005509e-06,
      "loss": 0.0157,
      "step": 2172520
    },
    {
      "epoch": 3.5554093595962373,
      "grad_norm": 0.19472718238830566,
      "learning_rate": 2.9082187357919917e-06,
      "loss": 0.0115,
      "step": 2172540
    },
    {
      "epoch": 3.555442090034891,
      "grad_norm": 0.265127956867218,
      "learning_rate": 2.9081528435784745e-06,
      "loss": 0.0083,
      "step": 2172560
    },
    {
      "epoch": 3.555474820473544,
      "grad_norm": 0.592920184135437,
      "learning_rate": 2.9080869513649572e-06,
      "loss": 0.0155,
      "step": 2172580
    },
    {
      "epoch": 3.555507550912197,
      "grad_norm": 0.6743080019950867,
      "learning_rate": 2.9080210591514404e-06,
      "loss": 0.0108,
      "step": 2172600
    },
    {
      "epoch": 3.5555402813508508,
      "grad_norm": 0.12972058355808258,
      "learning_rate": 2.907955166937923e-06,
      "loss": 0.0113,
      "step": 2172620
    },
    {
      "epoch": 3.555573011789504,
      "grad_norm": 0.23761241137981415,
      "learning_rate": 2.907889274724406e-06,
      "loss": 0.0094,
      "step": 2172640
    },
    {
      "epoch": 3.5556057422281575,
      "grad_norm": 0.27312764525413513,
      "learning_rate": 2.9078233825108886e-06,
      "loss": 0.0118,
      "step": 2172660
    },
    {
      "epoch": 3.5556384726668107,
      "grad_norm": 0.6952011585235596,
      "learning_rate": 2.9077574902973713e-06,
      "loss": 0.0115,
      "step": 2172680
    },
    {
      "epoch": 3.5556712031054643,
      "grad_norm": 0.5749917030334473,
      "learning_rate": 2.907691598083855e-06,
      "loss": 0.0091,
      "step": 2172700
    },
    {
      "epoch": 3.5557039335441174,
      "grad_norm": 0.38705310225486755,
      "learning_rate": 2.9076257058703377e-06,
      "loss": 0.0102,
      "step": 2172720
    },
    {
      "epoch": 3.5557366639827706,
      "grad_norm": 0.5221866369247437,
      "learning_rate": 2.9075598136568204e-06,
      "loss": 0.0141,
      "step": 2172740
    },
    {
      "epoch": 3.555769394421424,
      "grad_norm": 0.14274010062217712,
      "learning_rate": 2.9074939214433036e-06,
      "loss": 0.0101,
      "step": 2172760
    },
    {
      "epoch": 3.5558021248600773,
      "grad_norm": 0.28690409660339355,
      "learning_rate": 2.9074280292297863e-06,
      "loss": 0.0103,
      "step": 2172780
    },
    {
      "epoch": 3.5558348552987304,
      "grad_norm": 0.2873504161834717,
      "learning_rate": 2.907362137016269e-06,
      "loss": 0.0099,
      "step": 2172800
    },
    {
      "epoch": 3.555867585737384,
      "grad_norm": 0.2266339510679245,
      "learning_rate": 2.9072962448027518e-06,
      "loss": 0.014,
      "step": 2172820
    },
    {
      "epoch": 3.5559003161760376,
      "grad_norm": 0.6730720400810242,
      "learning_rate": 2.907230352589235e-06,
      "loss": 0.0073,
      "step": 2172840
    },
    {
      "epoch": 3.555933046614691,
      "grad_norm": 0.7853482961654663,
      "learning_rate": 2.9071644603757177e-06,
      "loss": 0.0102,
      "step": 2172860
    },
    {
      "epoch": 3.555965777053344,
      "grad_norm": 0.2938927412033081,
      "learning_rate": 2.9070985681622004e-06,
      "loss": 0.0178,
      "step": 2172880
    },
    {
      "epoch": 3.5559985074919975,
      "grad_norm": 0.21217018365859985,
      "learning_rate": 2.907032675948683e-06,
      "loss": 0.0079,
      "step": 2172900
    },
    {
      "epoch": 3.5560312379306507,
      "grad_norm": 0.14355744421482086,
      "learning_rate": 2.9069667837351663e-06,
      "loss": 0.011,
      "step": 2172920
    },
    {
      "epoch": 3.556063968369304,
      "grad_norm": 0.58611661195755,
      "learning_rate": 2.906900891521649e-06,
      "loss": 0.0104,
      "step": 2172940
    },
    {
      "epoch": 3.5560966988079574,
      "grad_norm": 0.11748168617486954,
      "learning_rate": 2.906834999308132e-06,
      "loss": 0.0089,
      "step": 2172960
    },
    {
      "epoch": 3.556129429246611,
      "grad_norm": 0.3360981047153473,
      "learning_rate": 2.9067691070946146e-06,
      "loss": 0.0106,
      "step": 2172980
    },
    {
      "epoch": 3.556162159685264,
      "grad_norm": 0.4468594789505005,
      "learning_rate": 2.9067032148810977e-06,
      "loss": 0.0106,
      "step": 2173000
    },
    {
      "epoch": 3.5561948901239173,
      "grad_norm": 0.30850812792778015,
      "learning_rate": 2.9066373226675805e-06,
      "loss": 0.0112,
      "step": 2173020
    },
    {
      "epoch": 3.556227620562571,
      "grad_norm": 0.27302953600883484,
      "learning_rate": 2.9065714304540636e-06,
      "loss": 0.0158,
      "step": 2173040
    },
    {
      "epoch": 3.556260351001224,
      "grad_norm": 0.3405446708202362,
      "learning_rate": 2.9065055382405468e-06,
      "loss": 0.0081,
      "step": 2173060
    },
    {
      "epoch": 3.556293081439877,
      "grad_norm": 0.17811891436576843,
      "learning_rate": 2.9064396460270295e-06,
      "loss": 0.007,
      "step": 2173080
    },
    {
      "epoch": 3.556325811878531,
      "grad_norm": 0.22558623552322388,
      "learning_rate": 2.9063737538135123e-06,
      "loss": 0.0113,
      "step": 2173100
    },
    {
      "epoch": 3.5563585423171844,
      "grad_norm": 2.163853168487549,
      "learning_rate": 2.906307861599995e-06,
      "loss": 0.0121,
      "step": 2173120
    },
    {
      "epoch": 3.5563912727558376,
      "grad_norm": 0.6179258823394775,
      "learning_rate": 2.906241969386478e-06,
      "loss": 0.0119,
      "step": 2173140
    },
    {
      "epoch": 3.5564240031944907,
      "grad_norm": 0.36236342787742615,
      "learning_rate": 2.906176077172961e-06,
      "loss": 0.0104,
      "step": 2173160
    },
    {
      "epoch": 3.5564567336331443,
      "grad_norm": 0.0849241316318512,
      "learning_rate": 2.9061101849594436e-06,
      "loss": 0.0108,
      "step": 2173180
    },
    {
      "epoch": 3.5564894640717974,
      "grad_norm": 0.18569138646125793,
      "learning_rate": 2.9060442927459264e-06,
      "loss": 0.0099,
      "step": 2173200
    },
    {
      "epoch": 3.5565221945104506,
      "grad_norm": 0.3371223211288452,
      "learning_rate": 2.905978400532409e-06,
      "loss": 0.0094,
      "step": 2173220
    },
    {
      "epoch": 3.556554924949104,
      "grad_norm": 0.14897936582565308,
      "learning_rate": 2.9059125083188923e-06,
      "loss": 0.0144,
      "step": 2173240
    },
    {
      "epoch": 3.5565876553877573,
      "grad_norm": 0.3746618330478668,
      "learning_rate": 2.905846616105375e-06,
      "loss": 0.0068,
      "step": 2173260
    },
    {
      "epoch": 3.556620385826411,
      "grad_norm": 0.25444933772087097,
      "learning_rate": 2.9057807238918578e-06,
      "loss": 0.0112,
      "step": 2173280
    },
    {
      "epoch": 3.556653116265064,
      "grad_norm": 0.663097620010376,
      "learning_rate": 2.9057148316783405e-06,
      "loss": 0.0122,
      "step": 2173300
    },
    {
      "epoch": 3.5566858467037177,
      "grad_norm": 0.35817885398864746,
      "learning_rate": 2.9056489394648237e-06,
      "loss": 0.0124,
      "step": 2173320
    },
    {
      "epoch": 3.556718577142371,
      "grad_norm": 0.14917148649692535,
      "learning_rate": 2.9055830472513064e-06,
      "loss": 0.0179,
      "step": 2173340
    },
    {
      "epoch": 3.556751307581024,
      "grad_norm": 0.3868049383163452,
      "learning_rate": 2.905517155037789e-06,
      "loss": 0.0124,
      "step": 2173360
    },
    {
      "epoch": 3.5567840380196776,
      "grad_norm": 0.07740382105112076,
      "learning_rate": 2.905451262824272e-06,
      "loss": 0.0113,
      "step": 2173380
    },
    {
      "epoch": 3.5568167684583307,
      "grad_norm": 0.19639632105827332,
      "learning_rate": 2.9053853706107555e-06,
      "loss": 0.0096,
      "step": 2173400
    },
    {
      "epoch": 3.5568494988969843,
      "grad_norm": 0.172538623213768,
      "learning_rate": 2.9053194783972382e-06,
      "loss": 0.0089,
      "step": 2173420
    },
    {
      "epoch": 3.5568822293356375,
      "grad_norm": 0.33313536643981934,
      "learning_rate": 2.905253586183721e-06,
      "loss": 0.0116,
      "step": 2173440
    },
    {
      "epoch": 3.556914959774291,
      "grad_norm": 0.2467714250087738,
      "learning_rate": 2.905187693970204e-06,
      "loss": 0.0066,
      "step": 2173460
    },
    {
      "epoch": 3.556947690212944,
      "grad_norm": 0.16767892241477966,
      "learning_rate": 2.905121801756687e-06,
      "loss": 0.0128,
      "step": 2173480
    },
    {
      "epoch": 3.5569804206515974,
      "grad_norm": 0.2241595834493637,
      "learning_rate": 2.9050559095431696e-06,
      "loss": 0.011,
      "step": 2173500
    },
    {
      "epoch": 3.557013151090251,
      "grad_norm": 0.2797792851924896,
      "learning_rate": 2.9049900173296523e-06,
      "loss": 0.0112,
      "step": 2173520
    },
    {
      "epoch": 3.557045881528904,
      "grad_norm": 0.5598288774490356,
      "learning_rate": 2.9049241251161355e-06,
      "loss": 0.0125,
      "step": 2173540
    },
    {
      "epoch": 3.5570786119675577,
      "grad_norm": 0.26130104064941406,
      "learning_rate": 2.9048582329026182e-06,
      "loss": 0.0058,
      "step": 2173560
    },
    {
      "epoch": 3.557111342406211,
      "grad_norm": 0.2708606421947479,
      "learning_rate": 2.904792340689101e-06,
      "loss": 0.01,
      "step": 2173580
    },
    {
      "epoch": 3.5571440728448644,
      "grad_norm": 0.14943744242191315,
      "learning_rate": 2.9047264484755837e-06,
      "loss": 0.0139,
      "step": 2173600
    },
    {
      "epoch": 3.5571768032835176,
      "grad_norm": 0.19069638848304749,
      "learning_rate": 2.9046605562620665e-06,
      "loss": 0.0103,
      "step": 2173620
    },
    {
      "epoch": 3.5572095337221707,
      "grad_norm": 0.14266374707221985,
      "learning_rate": 2.9045946640485496e-06,
      "loss": 0.0082,
      "step": 2173640
    },
    {
      "epoch": 3.5572422641608243,
      "grad_norm": 0.3842771053314209,
      "learning_rate": 2.9045287718350324e-06,
      "loss": 0.0128,
      "step": 2173660
    },
    {
      "epoch": 3.5572749945994775,
      "grad_norm": 0.2881618142127991,
      "learning_rate": 2.904462879621515e-06,
      "loss": 0.0118,
      "step": 2173680
    },
    {
      "epoch": 3.557307725038131,
      "grad_norm": 0.33421507477760315,
      "learning_rate": 2.904396987407998e-06,
      "loss": 0.0169,
      "step": 2173700
    },
    {
      "epoch": 3.5573404554767842,
      "grad_norm": 0.38120102882385254,
      "learning_rate": 2.904331095194481e-06,
      "loss": 0.0141,
      "step": 2173720
    },
    {
      "epoch": 3.557373185915438,
      "grad_norm": 0.2676413953304291,
      "learning_rate": 2.9042652029809637e-06,
      "loss": 0.0092,
      "step": 2173740
    },
    {
      "epoch": 3.557405916354091,
      "grad_norm": 0.47709742188453674,
      "learning_rate": 2.904199310767447e-06,
      "loss": 0.0109,
      "step": 2173760
    },
    {
      "epoch": 3.557438646792744,
      "grad_norm": 0.3419383764266968,
      "learning_rate": 2.90413341855393e-06,
      "loss": 0.0085,
      "step": 2173780
    },
    {
      "epoch": 3.5574713772313977,
      "grad_norm": 0.1851559728384018,
      "learning_rate": 2.904067526340413e-06,
      "loss": 0.0169,
      "step": 2173800
    },
    {
      "epoch": 3.557504107670051,
      "grad_norm": 0.3623593747615814,
      "learning_rate": 2.9040016341268955e-06,
      "loss": 0.0118,
      "step": 2173820
    },
    {
      "epoch": 3.5575368381087045,
      "grad_norm": 0.06792332231998444,
      "learning_rate": 2.9039357419133783e-06,
      "loss": 0.0139,
      "step": 2173840
    },
    {
      "epoch": 3.5575695685473576,
      "grad_norm": 0.2902388870716095,
      "learning_rate": 2.9038698496998615e-06,
      "loss": 0.0054,
      "step": 2173860
    },
    {
      "epoch": 3.557602298986011,
      "grad_norm": 0.1182158887386322,
      "learning_rate": 2.903803957486344e-06,
      "loss": 0.0118,
      "step": 2173880
    },
    {
      "epoch": 3.5576350294246644,
      "grad_norm": 0.23628626763820648,
      "learning_rate": 2.903738065272827e-06,
      "loss": 0.0105,
      "step": 2173900
    },
    {
      "epoch": 3.5576677598633175,
      "grad_norm": 0.14206142723560333,
      "learning_rate": 2.9036721730593097e-06,
      "loss": 0.0094,
      "step": 2173920
    },
    {
      "epoch": 3.557700490301971,
      "grad_norm": 0.147239550948143,
      "learning_rate": 2.903606280845793e-06,
      "loss": 0.0081,
      "step": 2173940
    },
    {
      "epoch": 3.5577332207406243,
      "grad_norm": 0.24456468224525452,
      "learning_rate": 2.9035403886322756e-06,
      "loss": 0.0102,
      "step": 2173960
    },
    {
      "epoch": 3.557765951179278,
      "grad_norm": 0.17185598611831665,
      "learning_rate": 2.9034744964187583e-06,
      "loss": 0.0163,
      "step": 2173980
    },
    {
      "epoch": 3.557798681617931,
      "grad_norm": 0.49459052085876465,
      "learning_rate": 2.903408604205241e-06,
      "loss": 0.0149,
      "step": 2174000
    },
    {
      "epoch": 3.5578314120565846,
      "grad_norm": 0.29011213779449463,
      "learning_rate": 2.9033427119917242e-06,
      "loss": 0.0097,
      "step": 2174020
    },
    {
      "epoch": 3.5578641424952377,
      "grad_norm": 0.3150334656238556,
      "learning_rate": 2.903276819778207e-06,
      "loss": 0.0148,
      "step": 2174040
    },
    {
      "epoch": 3.557896872933891,
      "grad_norm": 0.3388579189777374,
      "learning_rate": 2.9032109275646897e-06,
      "loss": 0.0097,
      "step": 2174060
    },
    {
      "epoch": 3.5579296033725445,
      "grad_norm": 0.9972257018089294,
      "learning_rate": 2.9031450353511724e-06,
      "loss": 0.0117,
      "step": 2174080
    },
    {
      "epoch": 3.5579623338111976,
      "grad_norm": 0.16574250161647797,
      "learning_rate": 2.903079143137656e-06,
      "loss": 0.0121,
      "step": 2174100
    },
    {
      "epoch": 3.5579950642498512,
      "grad_norm": 0.30914977192878723,
      "learning_rate": 2.9030132509241388e-06,
      "loss": 0.0133,
      "step": 2174120
    },
    {
      "epoch": 3.5580277946885044,
      "grad_norm": 0.5676515102386475,
      "learning_rate": 2.9029473587106215e-06,
      "loss": 0.0075,
      "step": 2174140
    },
    {
      "epoch": 3.558060525127158,
      "grad_norm": 0.4330188035964966,
      "learning_rate": 2.9028814664971042e-06,
      "loss": 0.009,
      "step": 2174160
    },
    {
      "epoch": 3.558093255565811,
      "grad_norm": 0.1463073045015335,
      "learning_rate": 2.9028155742835874e-06,
      "loss": 0.0124,
      "step": 2174180
    },
    {
      "epoch": 3.5581259860044643,
      "grad_norm": 0.28064092993736267,
      "learning_rate": 2.90274968207007e-06,
      "loss": 0.0189,
      "step": 2174200
    },
    {
      "epoch": 3.558158716443118,
      "grad_norm": 0.11026321351528168,
      "learning_rate": 2.902683789856553e-06,
      "loss": 0.0113,
      "step": 2174220
    },
    {
      "epoch": 3.558191446881771,
      "grad_norm": 0.26956403255462646,
      "learning_rate": 2.9026178976430356e-06,
      "loss": 0.0146,
      "step": 2174240
    },
    {
      "epoch": 3.558224177320424,
      "grad_norm": 0.27801886200904846,
      "learning_rate": 2.902552005429519e-06,
      "loss": 0.0113,
      "step": 2174260
    },
    {
      "epoch": 3.5582569077590778,
      "grad_norm": 0.08660753816366196,
      "learning_rate": 2.9024861132160015e-06,
      "loss": 0.0111,
      "step": 2174280
    },
    {
      "epoch": 3.5582896381977314,
      "grad_norm": 0.22093792259693146,
      "learning_rate": 2.9024202210024843e-06,
      "loss": 0.011,
      "step": 2174300
    },
    {
      "epoch": 3.5583223686363845,
      "grad_norm": 0.1503543257713318,
      "learning_rate": 2.902354328788967e-06,
      "loss": 0.0111,
      "step": 2174320
    },
    {
      "epoch": 3.5583550990750377,
      "grad_norm": 0.7386344075202942,
      "learning_rate": 2.90228843657545e-06,
      "loss": 0.01,
      "step": 2174340
    },
    {
      "epoch": 3.5583878295136913,
      "grad_norm": 0.16047464311122894,
      "learning_rate": 2.902222544361933e-06,
      "loss": 0.0067,
      "step": 2174360
    },
    {
      "epoch": 3.5584205599523444,
      "grad_norm": 0.10689198225736618,
      "learning_rate": 2.9021566521484157e-06,
      "loss": 0.0107,
      "step": 2174380
    },
    {
      "epoch": 3.5584532903909976,
      "grad_norm": 0.8733091950416565,
      "learning_rate": 2.9020907599348984e-06,
      "loss": 0.0158,
      "step": 2174400
    },
    {
      "epoch": 3.558486020829651,
      "grad_norm": 1.5257346630096436,
      "learning_rate": 2.9020248677213816e-06,
      "loss": 0.0137,
      "step": 2174420
    },
    {
      "epoch": 3.5585187512683047,
      "grad_norm": 0.3889901041984558,
      "learning_rate": 2.9019589755078643e-06,
      "loss": 0.0141,
      "step": 2174440
    },
    {
      "epoch": 3.558551481706958,
      "grad_norm": 0.07663542777299881,
      "learning_rate": 2.9018930832943475e-06,
      "loss": 0.0076,
      "step": 2174460
    },
    {
      "epoch": 3.558584212145611,
      "grad_norm": 0.3683421313762665,
      "learning_rate": 2.9018271910808306e-06,
      "loss": 0.0127,
      "step": 2174480
    },
    {
      "epoch": 3.5586169425842646,
      "grad_norm": 0.16650351881980896,
      "learning_rate": 2.9017612988673134e-06,
      "loss": 0.0112,
      "step": 2174500
    },
    {
      "epoch": 3.558649673022918,
      "grad_norm": 0.23614273965358734,
      "learning_rate": 2.901695406653796e-06,
      "loss": 0.0151,
      "step": 2174520
    },
    {
      "epoch": 3.558682403461571,
      "grad_norm": 0.525958240032196,
      "learning_rate": 2.901629514440279e-06,
      "loss": 0.0117,
      "step": 2174540
    },
    {
      "epoch": 3.5587151339002245,
      "grad_norm": 0.4188856780529022,
      "learning_rate": 2.901563622226762e-06,
      "loss": 0.0162,
      "step": 2174560
    },
    {
      "epoch": 3.558747864338878,
      "grad_norm": 0.034426212310791016,
      "learning_rate": 2.9014977300132447e-06,
      "loss": 0.0159,
      "step": 2174580
    },
    {
      "epoch": 3.5587805947775313,
      "grad_norm": 0.28095200657844543,
      "learning_rate": 2.9014318377997275e-06,
      "loss": 0.0113,
      "step": 2174600
    },
    {
      "epoch": 3.5588133252161844,
      "grad_norm": 0.22306016087532043,
      "learning_rate": 2.9013659455862102e-06,
      "loss": 0.0131,
      "step": 2174620
    },
    {
      "epoch": 3.558846055654838,
      "grad_norm": 0.47574836015701294,
      "learning_rate": 2.901300053372693e-06,
      "loss": 0.0117,
      "step": 2174640
    },
    {
      "epoch": 3.558878786093491,
      "grad_norm": 0.21765059232711792,
      "learning_rate": 2.901234161159176e-06,
      "loss": 0.0154,
      "step": 2174660
    },
    {
      "epoch": 3.5589115165321443,
      "grad_norm": 0.12592937052249908,
      "learning_rate": 2.901168268945659e-06,
      "loss": 0.0082,
      "step": 2174680
    },
    {
      "epoch": 3.558944246970798,
      "grad_norm": 0.24146132171154022,
      "learning_rate": 2.9011023767321416e-06,
      "loss": 0.0122,
      "step": 2174700
    },
    {
      "epoch": 3.5589769774094515,
      "grad_norm": 0.39078694581985474,
      "learning_rate": 2.9010364845186243e-06,
      "loss": 0.0123,
      "step": 2174720
    },
    {
      "epoch": 3.5590097078481047,
      "grad_norm": 0.5335160493850708,
      "learning_rate": 2.9009705923051075e-06,
      "loss": 0.0092,
      "step": 2174740
    },
    {
      "epoch": 3.559042438286758,
      "grad_norm": 0.27566859126091003,
      "learning_rate": 2.9009047000915902e-06,
      "loss": 0.0107,
      "step": 2174760
    },
    {
      "epoch": 3.5590751687254114,
      "grad_norm": 0.25784701108932495,
      "learning_rate": 2.900838807878073e-06,
      "loss": 0.0137,
      "step": 2174780
    },
    {
      "epoch": 3.5591078991640646,
      "grad_norm": 0.13323649764060974,
      "learning_rate": 2.9007729156645566e-06,
      "loss": 0.012,
      "step": 2174800
    },
    {
      "epoch": 3.5591406296027177,
      "grad_norm": 0.1410062611103058,
      "learning_rate": 2.9007070234510393e-06,
      "loss": 0.0083,
      "step": 2174820
    },
    {
      "epoch": 3.5591733600413713,
      "grad_norm": 0.3137059211730957,
      "learning_rate": 2.900641131237522e-06,
      "loss": 0.0099,
      "step": 2174840
    },
    {
      "epoch": 3.5592060904800245,
      "grad_norm": 0.051549188792705536,
      "learning_rate": 2.900575239024005e-06,
      "loss": 0.0147,
      "step": 2174860
    },
    {
      "epoch": 3.559238820918678,
      "grad_norm": 0.3883083164691925,
      "learning_rate": 2.900509346810488e-06,
      "loss": 0.015,
      "step": 2174880
    },
    {
      "epoch": 3.559271551357331,
      "grad_norm": 0.7059804201126099,
      "learning_rate": 2.9004434545969707e-06,
      "loss": 0.0128,
      "step": 2174900
    },
    {
      "epoch": 3.559304281795985,
      "grad_norm": 0.30060771107673645,
      "learning_rate": 2.9003775623834534e-06,
      "loss": 0.0124,
      "step": 2174920
    },
    {
      "epoch": 3.559337012234638,
      "grad_norm": 1.8294973373413086,
      "learning_rate": 2.900311670169936e-06,
      "loss": 0.015,
      "step": 2174940
    },
    {
      "epoch": 3.559369742673291,
      "grad_norm": 0.9109821915626526,
      "learning_rate": 2.9002457779564193e-06,
      "loss": 0.0094,
      "step": 2174960
    },
    {
      "epoch": 3.5594024731119447,
      "grad_norm": 0.36116984486579895,
      "learning_rate": 2.900179885742902e-06,
      "loss": 0.0155,
      "step": 2174980
    },
    {
      "epoch": 3.559435203550598,
      "grad_norm": 0.2025046944618225,
      "learning_rate": 2.900113993529385e-06,
      "loss": 0.0129,
      "step": 2175000
    },
    {
      "epoch": 3.5594679339892514,
      "grad_norm": 0.13370473682880402,
      "learning_rate": 2.9000481013158676e-06,
      "loss": 0.0146,
      "step": 2175020
    },
    {
      "epoch": 3.5595006644279046,
      "grad_norm": 0.11863411962985992,
      "learning_rate": 2.8999822091023507e-06,
      "loss": 0.0182,
      "step": 2175040
    },
    {
      "epoch": 3.559533394866558,
      "grad_norm": 0.13280236721038818,
      "learning_rate": 2.8999163168888335e-06,
      "loss": 0.0125,
      "step": 2175060
    },
    {
      "epoch": 3.5595661253052113,
      "grad_norm": 0.34567761421203613,
      "learning_rate": 2.899850424675316e-06,
      "loss": 0.0189,
      "step": 2175080
    },
    {
      "epoch": 3.5595988557438645,
      "grad_norm": 0.26212605834007263,
      "learning_rate": 2.899784532461799e-06,
      "loss": 0.0118,
      "step": 2175100
    },
    {
      "epoch": 3.559631586182518,
      "grad_norm": 0.14837829768657684,
      "learning_rate": 2.8997186402482817e-06,
      "loss": 0.0098,
      "step": 2175120
    },
    {
      "epoch": 3.559664316621171,
      "grad_norm": 0.6367000937461853,
      "learning_rate": 2.899652748034765e-06,
      "loss": 0.0167,
      "step": 2175140
    },
    {
      "epoch": 3.559697047059825,
      "grad_norm": 0.33925560116767883,
      "learning_rate": 2.899586855821248e-06,
      "loss": 0.0149,
      "step": 2175160
    },
    {
      "epoch": 3.559729777498478,
      "grad_norm": 0.14883172512054443,
      "learning_rate": 2.8995209636077307e-06,
      "loss": 0.0134,
      "step": 2175180
    },
    {
      "epoch": 3.5597625079371316,
      "grad_norm": 0.4141921401023865,
      "learning_rate": 2.899455071394214e-06,
      "loss": 0.009,
      "step": 2175200
    },
    {
      "epoch": 3.5597952383757847,
      "grad_norm": 0.19399087131023407,
      "learning_rate": 2.8993891791806966e-06,
      "loss": 0.011,
      "step": 2175220
    },
    {
      "epoch": 3.559827968814438,
      "grad_norm": 0.32932916283607483,
      "learning_rate": 2.8993232869671794e-06,
      "loss": 0.0089,
      "step": 2175240
    },
    {
      "epoch": 3.5598606992530915,
      "grad_norm": 0.9646726250648499,
      "learning_rate": 2.899257394753662e-06,
      "loss": 0.0128,
      "step": 2175260
    },
    {
      "epoch": 3.5598934296917446,
      "grad_norm": 0.11224006861448288,
      "learning_rate": 2.8991915025401453e-06,
      "loss": 0.0078,
      "step": 2175280
    },
    {
      "epoch": 3.559926160130398,
      "grad_norm": 0.11015548557043076,
      "learning_rate": 2.899125610326628e-06,
      "loss": 0.0099,
      "step": 2175300
    },
    {
      "epoch": 3.5599588905690513,
      "grad_norm": 0.2380611002445221,
      "learning_rate": 2.8990597181131108e-06,
      "loss": 0.0095,
      "step": 2175320
    },
    {
      "epoch": 3.559991621007705,
      "grad_norm": 0.23063480854034424,
      "learning_rate": 2.8989938258995935e-06,
      "loss": 0.0114,
      "step": 2175340
    },
    {
      "epoch": 3.560024351446358,
      "grad_norm": 0.24432224035263062,
      "learning_rate": 2.8989279336860767e-06,
      "loss": 0.0081,
      "step": 2175360
    },
    {
      "epoch": 3.5600570818850112,
      "grad_norm": 0.20214149355888367,
      "learning_rate": 2.8988620414725594e-06,
      "loss": 0.0105,
      "step": 2175380
    },
    {
      "epoch": 3.560089812323665,
      "grad_norm": 0.4049931764602661,
      "learning_rate": 2.898796149259042e-06,
      "loss": 0.0124,
      "step": 2175400
    },
    {
      "epoch": 3.560122542762318,
      "grad_norm": 0.3918614387512207,
      "learning_rate": 2.898730257045525e-06,
      "loss": 0.0124,
      "step": 2175420
    },
    {
      "epoch": 3.5601552732009716,
      "grad_norm": 0.09784285724163055,
      "learning_rate": 2.898664364832008e-06,
      "loss": 0.0109,
      "step": 2175440
    },
    {
      "epoch": 3.5601880036396247,
      "grad_norm": 0.8781613111495972,
      "learning_rate": 2.898598472618491e-06,
      "loss": 0.0081,
      "step": 2175460
    },
    {
      "epoch": 3.5602207340782783,
      "grad_norm": 0.46359047293663025,
      "learning_rate": 2.8985325804049735e-06,
      "loss": 0.0142,
      "step": 2175480
    },
    {
      "epoch": 3.5602534645169315,
      "grad_norm": 0.43647295236587524,
      "learning_rate": 2.8984666881914563e-06,
      "loss": 0.0103,
      "step": 2175500
    },
    {
      "epoch": 3.5602861949555846,
      "grad_norm": 0.41078177094459534,
      "learning_rate": 2.89840079597794e-06,
      "loss": 0.013,
      "step": 2175520
    },
    {
      "epoch": 3.560318925394238,
      "grad_norm": 0.4479806423187256,
      "learning_rate": 2.8983349037644226e-06,
      "loss": 0.0119,
      "step": 2175540
    },
    {
      "epoch": 3.5603516558328914,
      "grad_norm": 0.36834511160850525,
      "learning_rate": 2.8982690115509053e-06,
      "loss": 0.0116,
      "step": 2175560
    },
    {
      "epoch": 3.560384386271545,
      "grad_norm": 0.780301034450531,
      "learning_rate": 2.898203119337388e-06,
      "loss": 0.0136,
      "step": 2175580
    },
    {
      "epoch": 3.560417116710198,
      "grad_norm": 0.288857102394104,
      "learning_rate": 2.8981372271238712e-06,
      "loss": 0.0127,
      "step": 2175600
    },
    {
      "epoch": 3.5604498471488517,
      "grad_norm": 0.23697733879089355,
      "learning_rate": 2.898071334910354e-06,
      "loss": 0.0093,
      "step": 2175620
    },
    {
      "epoch": 3.560482577587505,
      "grad_norm": 0.10845125466585159,
      "learning_rate": 2.8980054426968367e-06,
      "loss": 0.0121,
      "step": 2175640
    },
    {
      "epoch": 3.560515308026158,
      "grad_norm": 0.17435206472873688,
      "learning_rate": 2.8979395504833195e-06,
      "loss": 0.014,
      "step": 2175660
    },
    {
      "epoch": 3.5605480384648116,
      "grad_norm": 0.22915077209472656,
      "learning_rate": 2.8978736582698026e-06,
      "loss": 0.0089,
      "step": 2175680
    },
    {
      "epoch": 3.5605807689034648,
      "grad_norm": 0.2661508619785309,
      "learning_rate": 2.8978077660562854e-06,
      "loss": 0.0169,
      "step": 2175700
    },
    {
      "epoch": 3.5606134993421183,
      "grad_norm": 0.22924493253231049,
      "learning_rate": 2.897741873842768e-06,
      "loss": 0.007,
      "step": 2175720
    },
    {
      "epoch": 3.5606462297807715,
      "grad_norm": 0.3046632707118988,
      "learning_rate": 2.897675981629251e-06,
      "loss": 0.0128,
      "step": 2175740
    },
    {
      "epoch": 3.560678960219425,
      "grad_norm": 0.15903912484645844,
      "learning_rate": 2.897610089415734e-06,
      "loss": 0.0144,
      "step": 2175760
    },
    {
      "epoch": 3.5607116906580782,
      "grad_norm": 0.13673238456249237,
      "learning_rate": 2.8975441972022168e-06,
      "loss": 0.0151,
      "step": 2175780
    },
    {
      "epoch": 3.5607444210967314,
      "grad_norm": 0.20044438540935516,
      "learning_rate": 2.8974783049886995e-06,
      "loss": 0.0112,
      "step": 2175800
    },
    {
      "epoch": 3.560777151535385,
      "grad_norm": 0.6971573829650879,
      "learning_rate": 2.8974124127751822e-06,
      "loss": 0.0072,
      "step": 2175820
    },
    {
      "epoch": 3.560809881974038,
      "grad_norm": 0.20708990097045898,
      "learning_rate": 2.8973465205616654e-06,
      "loss": 0.0116,
      "step": 2175840
    },
    {
      "epoch": 3.5608426124126913,
      "grad_norm": 0.1350410282611847,
      "learning_rate": 2.8972806283481486e-06,
      "loss": 0.0102,
      "step": 2175860
    },
    {
      "epoch": 3.560875342851345,
      "grad_norm": 0.1249949038028717,
      "learning_rate": 2.8972147361346313e-06,
      "loss": 0.0118,
      "step": 2175880
    },
    {
      "epoch": 3.5609080732899985,
      "grad_norm": 0.2127056121826172,
      "learning_rate": 2.8971488439211145e-06,
      "loss": 0.0118,
      "step": 2175900
    },
    {
      "epoch": 3.5609408037286516,
      "grad_norm": 0.18063434958457947,
      "learning_rate": 2.897082951707597e-06,
      "loss": 0.0125,
      "step": 2175920
    },
    {
      "epoch": 3.5609735341673048,
      "grad_norm": 0.727786123752594,
      "learning_rate": 2.89701705949408e-06,
      "loss": 0.0164,
      "step": 2175940
    },
    {
      "epoch": 3.5610062646059584,
      "grad_norm": 0.24461132287979126,
      "learning_rate": 2.8969511672805627e-06,
      "loss": 0.0103,
      "step": 2175960
    },
    {
      "epoch": 3.5610389950446115,
      "grad_norm": 0.1207214891910553,
      "learning_rate": 2.896885275067046e-06,
      "loss": 0.0151,
      "step": 2175980
    },
    {
      "epoch": 3.5610717254832647,
      "grad_norm": 0.3747211992740631,
      "learning_rate": 2.8968193828535286e-06,
      "loss": 0.0108,
      "step": 2176000
    },
    {
      "epoch": 3.5611044559219183,
      "grad_norm": 0.09324876219034195,
      "learning_rate": 2.8967534906400113e-06,
      "loss": 0.011,
      "step": 2176020
    },
    {
      "epoch": 3.561137186360572,
      "grad_norm": 0.3075866401195526,
      "learning_rate": 2.896687598426494e-06,
      "loss": 0.0103,
      "step": 2176040
    },
    {
      "epoch": 3.561169916799225,
      "grad_norm": 0.22738510370254517,
      "learning_rate": 2.896621706212977e-06,
      "loss": 0.0056,
      "step": 2176060
    },
    {
      "epoch": 3.561202647237878,
      "grad_norm": 0.2131342589855194,
      "learning_rate": 2.89655581399946e-06,
      "loss": 0.0185,
      "step": 2176080
    },
    {
      "epoch": 3.5612353776765318,
      "grad_norm": 0.1264837384223938,
      "learning_rate": 2.8964899217859427e-06,
      "loss": 0.0075,
      "step": 2176100
    },
    {
      "epoch": 3.561268108115185,
      "grad_norm": 0.33795803785324097,
      "learning_rate": 2.8964240295724254e-06,
      "loss": 0.0118,
      "step": 2176120
    },
    {
      "epoch": 3.561300838553838,
      "grad_norm": 0.37432658672332764,
      "learning_rate": 2.896358137358908e-06,
      "loss": 0.0077,
      "step": 2176140
    },
    {
      "epoch": 3.5613335689924916,
      "grad_norm": 0.039735984057188034,
      "learning_rate": 2.8962922451453913e-06,
      "loss": 0.0127,
      "step": 2176160
    },
    {
      "epoch": 3.5613662994311452,
      "grad_norm": 0.7220698595046997,
      "learning_rate": 2.896226352931874e-06,
      "loss": 0.0118,
      "step": 2176180
    },
    {
      "epoch": 3.5613990298697984,
      "grad_norm": 0.22338972985744476,
      "learning_rate": 2.896160460718357e-06,
      "loss": 0.0117,
      "step": 2176200
    },
    {
      "epoch": 3.5614317603084515,
      "grad_norm": 0.40420791506767273,
      "learning_rate": 2.8960945685048404e-06,
      "loss": 0.0097,
      "step": 2176220
    },
    {
      "epoch": 3.561464490747105,
      "grad_norm": 0.12362299859523773,
      "learning_rate": 2.896028676291323e-06,
      "loss": 0.0095,
      "step": 2176240
    },
    {
      "epoch": 3.5614972211857583,
      "grad_norm": 0.34298744797706604,
      "learning_rate": 2.895962784077806e-06,
      "loss": 0.0087,
      "step": 2176260
    },
    {
      "epoch": 3.5615299516244114,
      "grad_norm": 0.11693591624498367,
      "learning_rate": 2.8958968918642886e-06,
      "loss": 0.0084,
      "step": 2176280
    },
    {
      "epoch": 3.561562682063065,
      "grad_norm": 0.45115435123443604,
      "learning_rate": 2.895830999650772e-06,
      "loss": 0.015,
      "step": 2176300
    },
    {
      "epoch": 3.561595412501718,
      "grad_norm": 1.2888048887252808,
      "learning_rate": 2.8957651074372545e-06,
      "loss": 0.011,
      "step": 2176320
    },
    {
      "epoch": 3.5616281429403718,
      "grad_norm": 0.35133326053619385,
      "learning_rate": 2.8956992152237373e-06,
      "loss": 0.0101,
      "step": 2176340
    },
    {
      "epoch": 3.561660873379025,
      "grad_norm": 0.23552867770195007,
      "learning_rate": 2.89563332301022e-06,
      "loss": 0.0126,
      "step": 2176360
    },
    {
      "epoch": 3.5616936038176785,
      "grad_norm": 0.0773889422416687,
      "learning_rate": 2.895567430796703e-06,
      "loss": 0.0099,
      "step": 2176380
    },
    {
      "epoch": 3.5617263342563317,
      "grad_norm": 0.6414061784744263,
      "learning_rate": 2.895501538583186e-06,
      "loss": 0.0117,
      "step": 2176400
    },
    {
      "epoch": 3.561759064694985,
      "grad_norm": 0.7377325296401978,
      "learning_rate": 2.8954356463696687e-06,
      "loss": 0.0089,
      "step": 2176420
    },
    {
      "epoch": 3.5617917951336384,
      "grad_norm": 0.14590200781822205,
      "learning_rate": 2.8953697541561514e-06,
      "loss": 0.0093,
      "step": 2176440
    },
    {
      "epoch": 3.5618245255722916,
      "grad_norm": 0.48487767577171326,
      "learning_rate": 2.8953038619426346e-06,
      "loss": 0.0133,
      "step": 2176460
    },
    {
      "epoch": 3.561857256010945,
      "grad_norm": 0.12354162335395813,
      "learning_rate": 2.8952379697291173e-06,
      "loss": 0.014,
      "step": 2176480
    },
    {
      "epoch": 3.5618899864495983,
      "grad_norm": 0.38009124994277954,
      "learning_rate": 2.8951720775156e-06,
      "loss": 0.0112,
      "step": 2176500
    },
    {
      "epoch": 3.561922716888252,
      "grad_norm": 0.12366606295108795,
      "learning_rate": 2.8951061853020828e-06,
      "loss": 0.0115,
      "step": 2176520
    },
    {
      "epoch": 3.561955447326905,
      "grad_norm": 0.15432731807231903,
      "learning_rate": 2.8950402930885655e-06,
      "loss": 0.0093,
      "step": 2176540
    },
    {
      "epoch": 3.561988177765558,
      "grad_norm": 0.6028369665145874,
      "learning_rate": 2.894974400875049e-06,
      "loss": 0.0115,
      "step": 2176560
    },
    {
      "epoch": 3.562020908204212,
      "grad_norm": 0.14249712228775024,
      "learning_rate": 2.894908508661532e-06,
      "loss": 0.0091,
      "step": 2176580
    },
    {
      "epoch": 3.562053638642865,
      "grad_norm": 0.3345014154911041,
      "learning_rate": 2.8948426164480146e-06,
      "loss": 0.0117,
      "step": 2176600
    },
    {
      "epoch": 3.5620863690815185,
      "grad_norm": 0.3715056777000427,
      "learning_rate": 2.8947767242344977e-06,
      "loss": 0.0111,
      "step": 2176620
    },
    {
      "epoch": 3.5621190995201717,
      "grad_norm": 0.12917210161685944,
      "learning_rate": 2.8947108320209805e-06,
      "loss": 0.0069,
      "step": 2176640
    },
    {
      "epoch": 3.5621518299588253,
      "grad_norm": 0.1508486270904541,
      "learning_rate": 2.8946449398074632e-06,
      "loss": 0.0113,
      "step": 2176660
    },
    {
      "epoch": 3.5621845603974784,
      "grad_norm": 0.6373414993286133,
      "learning_rate": 2.894579047593946e-06,
      "loss": 0.014,
      "step": 2176680
    },
    {
      "epoch": 3.5622172908361316,
      "grad_norm": 0.506718635559082,
      "learning_rate": 2.894513155380429e-06,
      "loss": 0.0089,
      "step": 2176700
    },
    {
      "epoch": 3.562250021274785,
      "grad_norm": 0.3967048227787018,
      "learning_rate": 2.894447263166912e-06,
      "loss": 0.0142,
      "step": 2176720
    },
    {
      "epoch": 3.5622827517134383,
      "grad_norm": 0.27333784103393555,
      "learning_rate": 2.8943813709533946e-06,
      "loss": 0.008,
      "step": 2176740
    },
    {
      "epoch": 3.562315482152092,
      "grad_norm": 0.42396992444992065,
      "learning_rate": 2.8943154787398774e-06,
      "loss": 0.0134,
      "step": 2176760
    },
    {
      "epoch": 3.562348212590745,
      "grad_norm": 0.20507636666297913,
      "learning_rate": 2.8942495865263605e-06,
      "loss": 0.0105,
      "step": 2176780
    },
    {
      "epoch": 3.5623809430293987,
      "grad_norm": 0.16332100331783295,
      "learning_rate": 2.8941836943128433e-06,
      "loss": 0.009,
      "step": 2176800
    },
    {
      "epoch": 3.562413673468052,
      "grad_norm": 0.21520450711250305,
      "learning_rate": 2.894117802099326e-06,
      "loss": 0.006,
      "step": 2176820
    },
    {
      "epoch": 3.562446403906705,
      "grad_norm": 0.11806637048721313,
      "learning_rate": 2.8940519098858087e-06,
      "loss": 0.0072,
      "step": 2176840
    },
    {
      "epoch": 3.5624791343453586,
      "grad_norm": 0.32204002141952515,
      "learning_rate": 2.893986017672292e-06,
      "loss": 0.0087,
      "step": 2176860
    },
    {
      "epoch": 3.5625118647840117,
      "grad_norm": 0.21362869441509247,
      "learning_rate": 2.8939201254587746e-06,
      "loss": 0.0145,
      "step": 2176880
    },
    {
      "epoch": 3.5625445952226653,
      "grad_norm": 0.20228302478790283,
      "learning_rate": 2.8938542332452574e-06,
      "loss": 0.0136,
      "step": 2176900
    },
    {
      "epoch": 3.5625773256613185,
      "grad_norm": 0.4264095425605774,
      "learning_rate": 2.893788341031741e-06,
      "loss": 0.0088,
      "step": 2176920
    },
    {
      "epoch": 3.562610056099972,
      "grad_norm": 0.16126497089862823,
      "learning_rate": 2.8937224488182237e-06,
      "loss": 0.0088,
      "step": 2176940
    },
    {
      "epoch": 3.562642786538625,
      "grad_norm": 0.39050954580307007,
      "learning_rate": 2.8936565566047064e-06,
      "loss": 0.0112,
      "step": 2176960
    },
    {
      "epoch": 3.5626755169772784,
      "grad_norm": 0.7362273931503296,
      "learning_rate": 2.893590664391189e-06,
      "loss": 0.0097,
      "step": 2176980
    },
    {
      "epoch": 3.562708247415932,
      "grad_norm": 0.2345682978630066,
      "learning_rate": 2.8935247721776723e-06,
      "loss": 0.0064,
      "step": 2177000
    },
    {
      "epoch": 3.562740977854585,
      "grad_norm": 0.442688912153244,
      "learning_rate": 2.893458879964155e-06,
      "loss": 0.0157,
      "step": 2177020
    },
    {
      "epoch": 3.5627737082932387,
      "grad_norm": 0.09823440760374069,
      "learning_rate": 2.893392987750638e-06,
      "loss": 0.0134,
      "step": 2177040
    },
    {
      "epoch": 3.562806438731892,
      "grad_norm": 0.31409960985183716,
      "learning_rate": 2.8933270955371206e-06,
      "loss": 0.0131,
      "step": 2177060
    },
    {
      "epoch": 3.5628391691705454,
      "grad_norm": 0.44185304641723633,
      "learning_rate": 2.8932612033236033e-06,
      "loss": 0.0098,
      "step": 2177080
    },
    {
      "epoch": 3.5628718996091986,
      "grad_norm": 0.6048108339309692,
      "learning_rate": 2.8931953111100865e-06,
      "loss": 0.0152,
      "step": 2177100
    },
    {
      "epoch": 3.5629046300478517,
      "grad_norm": 0.23996108770370483,
      "learning_rate": 2.893129418896569e-06,
      "loss": 0.0098,
      "step": 2177120
    },
    {
      "epoch": 3.5629373604865053,
      "grad_norm": 0.39888352155685425,
      "learning_rate": 2.893063526683052e-06,
      "loss": 0.0149,
      "step": 2177140
    },
    {
      "epoch": 3.5629700909251585,
      "grad_norm": 0.5604145526885986,
      "learning_rate": 2.8929976344695347e-06,
      "loss": 0.0103,
      "step": 2177160
    },
    {
      "epoch": 3.563002821363812,
      "grad_norm": 0.20527133345603943,
      "learning_rate": 2.892931742256018e-06,
      "loss": 0.0088,
      "step": 2177180
    },
    {
      "epoch": 3.5630355518024652,
      "grad_norm": 0.2063220888376236,
      "learning_rate": 2.8928658500425006e-06,
      "loss": 0.0115,
      "step": 2177200
    },
    {
      "epoch": 3.563068282241119,
      "grad_norm": 0.42310839891433716,
      "learning_rate": 2.8927999578289833e-06,
      "loss": 0.0154,
      "step": 2177220
    },
    {
      "epoch": 3.563101012679772,
      "grad_norm": 0.2974238395690918,
      "learning_rate": 2.892734065615466e-06,
      "loss": 0.0086,
      "step": 2177240
    },
    {
      "epoch": 3.563133743118425,
      "grad_norm": 0.11015640199184418,
      "learning_rate": 2.8926681734019492e-06,
      "loss": 0.0081,
      "step": 2177260
    },
    {
      "epoch": 3.5631664735570787,
      "grad_norm": 0.19649170339107513,
      "learning_rate": 2.8926022811884324e-06,
      "loss": 0.0149,
      "step": 2177280
    },
    {
      "epoch": 3.563199203995732,
      "grad_norm": 0.1647414118051529,
      "learning_rate": 2.892536388974915e-06,
      "loss": 0.0092,
      "step": 2177300
    },
    {
      "epoch": 3.563231934434385,
      "grad_norm": 0.15563428401947021,
      "learning_rate": 2.8924704967613983e-06,
      "loss": 0.0088,
      "step": 2177320
    },
    {
      "epoch": 3.5632646648730386,
      "grad_norm": 0.0931769460439682,
      "learning_rate": 2.892404604547881e-06,
      "loss": 0.0141,
      "step": 2177340
    },
    {
      "epoch": 3.563297395311692,
      "grad_norm": 0.06677994132041931,
      "learning_rate": 2.8923387123343638e-06,
      "loss": 0.0085,
      "step": 2177360
    },
    {
      "epoch": 3.5633301257503454,
      "grad_norm": 0.01946200244128704,
      "learning_rate": 2.8922728201208465e-06,
      "loss": 0.0128,
      "step": 2177380
    },
    {
      "epoch": 3.5633628561889985,
      "grad_norm": 0.3932510018348694,
      "learning_rate": 2.8922069279073297e-06,
      "loss": 0.0146,
      "step": 2177400
    },
    {
      "epoch": 3.563395586627652,
      "grad_norm": 0.36969810724258423,
      "learning_rate": 2.8921410356938124e-06,
      "loss": 0.0079,
      "step": 2177420
    },
    {
      "epoch": 3.5634283170663053,
      "grad_norm": 0.1974562555551529,
      "learning_rate": 2.892075143480295e-06,
      "loss": 0.0119,
      "step": 2177440
    },
    {
      "epoch": 3.5634610475049584,
      "grad_norm": 0.5084273219108582,
      "learning_rate": 2.892009251266778e-06,
      "loss": 0.0119,
      "step": 2177460
    },
    {
      "epoch": 3.563493777943612,
      "grad_norm": 0.1778896450996399,
      "learning_rate": 2.8919433590532606e-06,
      "loss": 0.0151,
      "step": 2177480
    },
    {
      "epoch": 3.5635265083822656,
      "grad_norm": 0.09511016309261322,
      "learning_rate": 2.891877466839744e-06,
      "loss": 0.006,
      "step": 2177500
    },
    {
      "epoch": 3.5635592388209187,
      "grad_norm": 0.2611829340457916,
      "learning_rate": 2.8918115746262265e-06,
      "loss": 0.0114,
      "step": 2177520
    },
    {
      "epoch": 3.563591969259572,
      "grad_norm": 0.12598656117916107,
      "learning_rate": 2.8917456824127093e-06,
      "loss": 0.0111,
      "step": 2177540
    },
    {
      "epoch": 3.5636246996982255,
      "grad_norm": 0.33643367886543274,
      "learning_rate": 2.891679790199192e-06,
      "loss": 0.0122,
      "step": 2177560
    },
    {
      "epoch": 3.5636574301368786,
      "grad_norm": 0.1712459772825241,
      "learning_rate": 2.891613897985675e-06,
      "loss": 0.0102,
      "step": 2177580
    },
    {
      "epoch": 3.563690160575532,
      "grad_norm": 0.08359953761100769,
      "learning_rate": 2.891548005772158e-06,
      "loss": 0.0058,
      "step": 2177600
    },
    {
      "epoch": 3.5637228910141854,
      "grad_norm": 0.33531704545021057,
      "learning_rate": 2.891482113558641e-06,
      "loss": 0.0111,
      "step": 2177620
    },
    {
      "epoch": 3.563755621452839,
      "grad_norm": 0.565268874168396,
      "learning_rate": 2.8914162213451243e-06,
      "loss": 0.0106,
      "step": 2177640
    },
    {
      "epoch": 3.563788351891492,
      "grad_norm": 0.23254533112049103,
      "learning_rate": 2.891350329131607e-06,
      "loss": 0.0107,
      "step": 2177660
    },
    {
      "epoch": 3.5638210823301453,
      "grad_norm": 0.4705734848976135,
      "learning_rate": 2.8912844369180897e-06,
      "loss": 0.0122,
      "step": 2177680
    },
    {
      "epoch": 3.563853812768799,
      "grad_norm": 0.0629231408238411,
      "learning_rate": 2.8912185447045725e-06,
      "loss": 0.0068,
      "step": 2177700
    },
    {
      "epoch": 3.563886543207452,
      "grad_norm": 0.07191148400306702,
      "learning_rate": 2.8911526524910556e-06,
      "loss": 0.0147,
      "step": 2177720
    },
    {
      "epoch": 3.563919273646105,
      "grad_norm": 0.35932743549346924,
      "learning_rate": 2.8910867602775384e-06,
      "loss": 0.0142,
      "step": 2177740
    },
    {
      "epoch": 3.5639520040847588,
      "grad_norm": 0.252727210521698,
      "learning_rate": 2.891020868064021e-06,
      "loss": 0.0149,
      "step": 2177760
    },
    {
      "epoch": 3.5639847345234124,
      "grad_norm": 1.2149438858032227,
      "learning_rate": 2.890954975850504e-06,
      "loss": 0.0112,
      "step": 2177780
    },
    {
      "epoch": 3.5640174649620655,
      "grad_norm": 0.2400503158569336,
      "learning_rate": 2.890889083636987e-06,
      "loss": 0.0121,
      "step": 2177800
    },
    {
      "epoch": 3.5640501954007187,
      "grad_norm": 0.16082844138145447,
      "learning_rate": 2.8908231914234698e-06,
      "loss": 0.012,
      "step": 2177820
    },
    {
      "epoch": 3.5640829258393723,
      "grad_norm": 0.20736047625541687,
      "learning_rate": 2.8907572992099525e-06,
      "loss": 0.0105,
      "step": 2177840
    },
    {
      "epoch": 3.5641156562780254,
      "grad_norm": 0.15673410892486572,
      "learning_rate": 2.8906914069964352e-06,
      "loss": 0.01,
      "step": 2177860
    },
    {
      "epoch": 3.5641483867166786,
      "grad_norm": 0.050133537501096725,
      "learning_rate": 2.8906255147829184e-06,
      "loss": 0.0104,
      "step": 2177880
    },
    {
      "epoch": 3.564181117155332,
      "grad_norm": 0.8818559050559998,
      "learning_rate": 2.890559622569401e-06,
      "loss": 0.011,
      "step": 2177900
    },
    {
      "epoch": 3.5642138475939853,
      "grad_norm": 0.1139376237988472,
      "learning_rate": 2.890493730355884e-06,
      "loss": 0.0067,
      "step": 2177920
    },
    {
      "epoch": 3.564246578032639,
      "grad_norm": 0.2429962456226349,
      "learning_rate": 2.8904278381423666e-06,
      "loss": 0.0056,
      "step": 2177940
    },
    {
      "epoch": 3.564279308471292,
      "grad_norm": 0.27567484974861145,
      "learning_rate": 2.8903619459288494e-06,
      "loss": 0.0106,
      "step": 2177960
    },
    {
      "epoch": 3.5643120389099456,
      "grad_norm": 0.34777477383613586,
      "learning_rate": 2.890296053715333e-06,
      "loss": 0.0156,
      "step": 2177980
    },
    {
      "epoch": 3.564344769348599,
      "grad_norm": 0.5475711226463318,
      "learning_rate": 2.8902301615018157e-06,
      "loss": 0.0119,
      "step": 2178000
    },
    {
      "epoch": 3.564377499787252,
      "grad_norm": 0.17231710255146027,
      "learning_rate": 2.8901642692882984e-06,
      "loss": 0.0083,
      "step": 2178020
    },
    {
      "epoch": 3.5644102302259055,
      "grad_norm": 0.3442073464393616,
      "learning_rate": 2.8900983770747816e-06,
      "loss": 0.013,
      "step": 2178040
    },
    {
      "epoch": 3.5644429606645587,
      "grad_norm": 0.7279717326164246,
      "learning_rate": 2.8900324848612643e-06,
      "loss": 0.0146,
      "step": 2178060
    },
    {
      "epoch": 3.5644756911032123,
      "grad_norm": 0.5455262064933777,
      "learning_rate": 2.889966592647747e-06,
      "loss": 0.0112,
      "step": 2178080
    },
    {
      "epoch": 3.5645084215418654,
      "grad_norm": 0.7150212526321411,
      "learning_rate": 2.88990070043423e-06,
      "loss": 0.022,
      "step": 2178100
    },
    {
      "epoch": 3.564541151980519,
      "grad_norm": 0.21690256893634796,
      "learning_rate": 2.889834808220713e-06,
      "loss": 0.0096,
      "step": 2178120
    },
    {
      "epoch": 3.564573882419172,
      "grad_norm": 0.17805235087871552,
      "learning_rate": 2.8897689160071957e-06,
      "loss": 0.0123,
      "step": 2178140
    },
    {
      "epoch": 3.5646066128578253,
      "grad_norm": 0.3126070201396942,
      "learning_rate": 2.8897030237936785e-06,
      "loss": 0.0081,
      "step": 2178160
    },
    {
      "epoch": 3.564639343296479,
      "grad_norm": 0.17172746360301971,
      "learning_rate": 2.889637131580161e-06,
      "loss": 0.0152,
      "step": 2178180
    },
    {
      "epoch": 3.564672073735132,
      "grad_norm": 0.1729212999343872,
      "learning_rate": 2.8895712393666444e-06,
      "loss": 0.0093,
      "step": 2178200
    },
    {
      "epoch": 3.5647048041737857,
      "grad_norm": 0.25537604093551636,
      "learning_rate": 2.889505347153127e-06,
      "loss": 0.0151,
      "step": 2178220
    },
    {
      "epoch": 3.564737534612439,
      "grad_norm": 0.24385324120521545,
      "learning_rate": 2.88943945493961e-06,
      "loss": 0.0105,
      "step": 2178240
    },
    {
      "epoch": 3.5647702650510924,
      "grad_norm": 0.23950067162513733,
      "learning_rate": 2.8893735627260926e-06,
      "loss": 0.0119,
      "step": 2178260
    },
    {
      "epoch": 3.5648029954897456,
      "grad_norm": 0.21426010131835938,
      "learning_rate": 2.8893076705125757e-06,
      "loss": 0.0157,
      "step": 2178280
    },
    {
      "epoch": 3.5648357259283987,
      "grad_norm": 0.16688618063926697,
      "learning_rate": 2.8892417782990585e-06,
      "loss": 0.0077,
      "step": 2178300
    },
    {
      "epoch": 3.5648684563670523,
      "grad_norm": 0.4337734878063202,
      "learning_rate": 2.8891758860855416e-06,
      "loss": 0.0109,
      "step": 2178320
    },
    {
      "epoch": 3.5649011868057054,
      "grad_norm": 0.3004555106163025,
      "learning_rate": 2.889109993872025e-06,
      "loss": 0.0075,
      "step": 2178340
    },
    {
      "epoch": 3.564933917244359,
      "grad_norm": 0.11961499601602554,
      "learning_rate": 2.8890441016585075e-06,
      "loss": 0.0087,
      "step": 2178360
    },
    {
      "epoch": 3.564966647683012,
      "grad_norm": 0.43028032779693604,
      "learning_rate": 2.8889782094449903e-06,
      "loss": 0.0102,
      "step": 2178380
    },
    {
      "epoch": 3.564999378121666,
      "grad_norm": 0.2040390521287918,
      "learning_rate": 2.888912317231473e-06,
      "loss": 0.0096,
      "step": 2178400
    },
    {
      "epoch": 3.565032108560319,
      "grad_norm": 0.44925764203071594,
      "learning_rate": 2.888846425017956e-06,
      "loss": 0.0125,
      "step": 2178420
    },
    {
      "epoch": 3.565064838998972,
      "grad_norm": 0.2725822925567627,
      "learning_rate": 2.888780532804439e-06,
      "loss": 0.0148,
      "step": 2178440
    },
    {
      "epoch": 3.5650975694376257,
      "grad_norm": 0.30538299679756165,
      "learning_rate": 2.8887146405909217e-06,
      "loss": 0.0108,
      "step": 2178460
    },
    {
      "epoch": 3.565130299876279,
      "grad_norm": 0.3816995918750763,
      "learning_rate": 2.8886487483774044e-06,
      "loss": 0.0118,
      "step": 2178480
    },
    {
      "epoch": 3.5651630303149324,
      "grad_norm": 0.10118374228477478,
      "learning_rate": 2.888582856163887e-06,
      "loss": 0.0093,
      "step": 2178500
    },
    {
      "epoch": 3.5651957607535856,
      "grad_norm": 0.3359667658805847,
      "learning_rate": 2.8885169639503703e-06,
      "loss": 0.009,
      "step": 2178520
    },
    {
      "epoch": 3.565228491192239,
      "grad_norm": 0.2822243869304657,
      "learning_rate": 2.888451071736853e-06,
      "loss": 0.018,
      "step": 2178540
    },
    {
      "epoch": 3.5652612216308923,
      "grad_norm": 0.07343140244483948,
      "learning_rate": 2.8883851795233358e-06,
      "loss": 0.0121,
      "step": 2178560
    },
    {
      "epoch": 3.5652939520695455,
      "grad_norm": 0.15397030115127563,
      "learning_rate": 2.8883192873098185e-06,
      "loss": 0.011,
      "step": 2178580
    },
    {
      "epoch": 3.565326682508199,
      "grad_norm": 0.3479246199131012,
      "learning_rate": 2.8882533950963017e-06,
      "loss": 0.0111,
      "step": 2178600
    },
    {
      "epoch": 3.565359412946852,
      "grad_norm": 0.39381080865859985,
      "learning_rate": 2.8881875028827844e-06,
      "loss": 0.012,
      "step": 2178620
    },
    {
      "epoch": 3.565392143385506,
      "grad_norm": 0.3014630377292633,
      "learning_rate": 2.888121610669267e-06,
      "loss": 0.0118,
      "step": 2178640
    },
    {
      "epoch": 3.565424873824159,
      "grad_norm": 0.2484971582889557,
      "learning_rate": 2.88805571845575e-06,
      "loss": 0.014,
      "step": 2178660
    },
    {
      "epoch": 3.5654576042628126,
      "grad_norm": 0.5382654666900635,
      "learning_rate": 2.8879898262422335e-06,
      "loss": 0.0179,
      "step": 2178680
    },
    {
      "epoch": 3.5654903347014657,
      "grad_norm": 0.48490527272224426,
      "learning_rate": 2.8879239340287162e-06,
      "loss": 0.0137,
      "step": 2178700
    },
    {
      "epoch": 3.565523065140119,
      "grad_norm": 0.6964935660362244,
      "learning_rate": 2.887858041815199e-06,
      "loss": 0.0133,
      "step": 2178720
    },
    {
      "epoch": 3.5655557955787724,
      "grad_norm": 0.19610115885734558,
      "learning_rate": 2.887792149601682e-06,
      "loss": 0.0133,
      "step": 2178740
    },
    {
      "epoch": 3.5655885260174256,
      "grad_norm": 0.30625832080841064,
      "learning_rate": 2.887726257388165e-06,
      "loss": 0.0073,
      "step": 2178760
    },
    {
      "epoch": 3.5656212564560787,
      "grad_norm": 0.21153144538402557,
      "learning_rate": 2.8876603651746476e-06,
      "loss": 0.0165,
      "step": 2178780
    },
    {
      "epoch": 3.5656539868947323,
      "grad_norm": 0.25772082805633545,
      "learning_rate": 2.8875944729611304e-06,
      "loss": 0.0119,
      "step": 2178800
    },
    {
      "epoch": 3.565686717333386,
      "grad_norm": 0.265377014875412,
      "learning_rate": 2.8875285807476135e-06,
      "loss": 0.0115,
      "step": 2178820
    },
    {
      "epoch": 3.565719447772039,
      "grad_norm": 0.5847350358963013,
      "learning_rate": 2.8874626885340963e-06,
      "loss": 0.0128,
      "step": 2178840
    },
    {
      "epoch": 3.5657521782106922,
      "grad_norm": 0.23113036155700684,
      "learning_rate": 2.887396796320579e-06,
      "loss": 0.01,
      "step": 2178860
    },
    {
      "epoch": 3.565784908649346,
      "grad_norm": 0.2138809859752655,
      "learning_rate": 2.8873309041070617e-06,
      "loss": 0.0106,
      "step": 2178880
    },
    {
      "epoch": 3.565817639087999,
      "grad_norm": 0.2770077586174011,
      "learning_rate": 2.8872650118935445e-06,
      "loss": 0.0103,
      "step": 2178900
    },
    {
      "epoch": 3.565850369526652,
      "grad_norm": 0.2068811058998108,
      "learning_rate": 2.8871991196800276e-06,
      "loss": 0.0088,
      "step": 2178920
    },
    {
      "epoch": 3.5658830999653057,
      "grad_norm": 0.17306718230247498,
      "learning_rate": 2.8871332274665104e-06,
      "loss": 0.0103,
      "step": 2178940
    },
    {
      "epoch": 3.5659158304039593,
      "grad_norm": 0.5554760694503784,
      "learning_rate": 2.887067335252993e-06,
      "loss": 0.0106,
      "step": 2178960
    },
    {
      "epoch": 3.5659485608426125,
      "grad_norm": 0.28912150859832764,
      "learning_rate": 2.887001443039476e-06,
      "loss": 0.0066,
      "step": 2178980
    },
    {
      "epoch": 3.5659812912812656,
      "grad_norm": 0.4009858965873718,
      "learning_rate": 2.886935550825959e-06,
      "loss": 0.0116,
      "step": 2179000
    },
    {
      "epoch": 3.566014021719919,
      "grad_norm": 0.5160861015319824,
      "learning_rate": 2.8868696586124418e-06,
      "loss": 0.0104,
      "step": 2179020
    },
    {
      "epoch": 3.5660467521585724,
      "grad_norm": 0.6789504289627075,
      "learning_rate": 2.886803766398925e-06,
      "loss": 0.0141,
      "step": 2179040
    },
    {
      "epoch": 3.5660794825972255,
      "grad_norm": 0.17005252838134766,
      "learning_rate": 2.886737874185408e-06,
      "loss": 0.0105,
      "step": 2179060
    },
    {
      "epoch": 3.566112213035879,
      "grad_norm": 0.22886371612548828,
      "learning_rate": 2.886671981971891e-06,
      "loss": 0.0111,
      "step": 2179080
    },
    {
      "epoch": 3.5661449434745327,
      "grad_norm": 0.47983771562576294,
      "learning_rate": 2.8866060897583736e-06,
      "loss": 0.0091,
      "step": 2179100
    },
    {
      "epoch": 3.566177673913186,
      "grad_norm": 0.16255256533622742,
      "learning_rate": 2.8865401975448563e-06,
      "loss": 0.009,
      "step": 2179120
    },
    {
      "epoch": 3.566210404351839,
      "grad_norm": 0.23222339153289795,
      "learning_rate": 2.8864743053313395e-06,
      "loss": 0.0103,
      "step": 2179140
    },
    {
      "epoch": 3.5662431347904926,
      "grad_norm": 0.09022631496191025,
      "learning_rate": 2.8864084131178222e-06,
      "loss": 0.0118,
      "step": 2179160
    },
    {
      "epoch": 3.5662758652291457,
      "grad_norm": 0.17091785371303558,
      "learning_rate": 2.886342520904305e-06,
      "loss": 0.0124,
      "step": 2179180
    },
    {
      "epoch": 3.566308595667799,
      "grad_norm": 0.566186249256134,
      "learning_rate": 2.8862766286907877e-06,
      "loss": 0.0108,
      "step": 2179200
    },
    {
      "epoch": 3.5663413261064525,
      "grad_norm": 0.14917655289173126,
      "learning_rate": 2.886210736477271e-06,
      "loss": 0.0088,
      "step": 2179220
    },
    {
      "epoch": 3.566374056545106,
      "grad_norm": 0.10030672699213028,
      "learning_rate": 2.8861448442637536e-06,
      "loss": 0.0087,
      "step": 2179240
    },
    {
      "epoch": 3.5664067869837592,
      "grad_norm": 0.2688860297203064,
      "learning_rate": 2.8860789520502363e-06,
      "loss": 0.0098,
      "step": 2179260
    },
    {
      "epoch": 3.5664395174224124,
      "grad_norm": 0.9011573791503906,
      "learning_rate": 2.886013059836719e-06,
      "loss": 0.0115,
      "step": 2179280
    },
    {
      "epoch": 3.566472247861066,
      "grad_norm": 0.43355801701545715,
      "learning_rate": 2.8859471676232022e-06,
      "loss": 0.0109,
      "step": 2179300
    },
    {
      "epoch": 3.566504978299719,
      "grad_norm": 0.35748091340065,
      "learning_rate": 2.885881275409685e-06,
      "loss": 0.0093,
      "step": 2179320
    },
    {
      "epoch": 3.5665377087383723,
      "grad_norm": 0.16344548761844635,
      "learning_rate": 2.8858153831961677e-06,
      "loss": 0.0106,
      "step": 2179340
    },
    {
      "epoch": 3.566570439177026,
      "grad_norm": 0.06694276630878448,
      "learning_rate": 2.8857494909826505e-06,
      "loss": 0.0113,
      "step": 2179360
    },
    {
      "epoch": 3.566603169615679,
      "grad_norm": 0.45540574193000793,
      "learning_rate": 2.885683598769134e-06,
      "loss": 0.0147,
      "step": 2179380
    },
    {
      "epoch": 3.5666359000543326,
      "grad_norm": 0.41041237115859985,
      "learning_rate": 2.8856177065556168e-06,
      "loss": 0.0095,
      "step": 2179400
    },
    {
      "epoch": 3.5666686304929858,
      "grad_norm": 0.07717038691043854,
      "learning_rate": 2.8855518143420995e-06,
      "loss": 0.0108,
      "step": 2179420
    },
    {
      "epoch": 3.5667013609316394,
      "grad_norm": 0.5426139235496521,
      "learning_rate": 2.8854859221285823e-06,
      "loss": 0.0073,
      "step": 2179440
    },
    {
      "epoch": 3.5667340913702925,
      "grad_norm": 0.49930620193481445,
      "learning_rate": 2.8854200299150654e-06,
      "loss": 0.0122,
      "step": 2179460
    },
    {
      "epoch": 3.5667668218089457,
      "grad_norm": 0.2707650661468506,
      "learning_rate": 2.885354137701548e-06,
      "loss": 0.009,
      "step": 2179480
    },
    {
      "epoch": 3.5667995522475993,
      "grad_norm": 0.20629949867725372,
      "learning_rate": 2.885288245488031e-06,
      "loss": 0.011,
      "step": 2179500
    },
    {
      "epoch": 3.5668322826862524,
      "grad_norm": 0.3542976677417755,
      "learning_rate": 2.8852223532745136e-06,
      "loss": 0.0096,
      "step": 2179520
    },
    {
      "epoch": 3.566865013124906,
      "grad_norm": 0.1839078962802887,
      "learning_rate": 2.885156461060997e-06,
      "loss": 0.0101,
      "step": 2179540
    },
    {
      "epoch": 3.566897743563559,
      "grad_norm": 0.20921018719673157,
      "learning_rate": 2.8850905688474795e-06,
      "loss": 0.0168,
      "step": 2179560
    },
    {
      "epoch": 3.5669304740022127,
      "grad_norm": 0.23886357247829437,
      "learning_rate": 2.8850246766339623e-06,
      "loss": 0.01,
      "step": 2179580
    },
    {
      "epoch": 3.566963204440866,
      "grad_norm": 0.48742493987083435,
      "learning_rate": 2.884958784420445e-06,
      "loss": 0.0136,
      "step": 2179600
    },
    {
      "epoch": 3.566995934879519,
      "grad_norm": 0.6720280051231384,
      "learning_rate": 2.884892892206928e-06,
      "loss": 0.0155,
      "step": 2179620
    },
    {
      "epoch": 3.5670286653181726,
      "grad_norm": 0.14771562814712524,
      "learning_rate": 2.884826999993411e-06,
      "loss": 0.013,
      "step": 2179640
    },
    {
      "epoch": 3.567061395756826,
      "grad_norm": 0.13493895530700684,
      "learning_rate": 2.8847611077798937e-06,
      "loss": 0.0105,
      "step": 2179660
    },
    {
      "epoch": 3.5670941261954794,
      "grad_norm": 0.30169162154197693,
      "learning_rate": 2.8846952155663764e-06,
      "loss": 0.013,
      "step": 2179680
    },
    {
      "epoch": 3.5671268566341325,
      "grad_norm": 0.5627248287200928,
      "learning_rate": 2.8846293233528596e-06,
      "loss": 0.0127,
      "step": 2179700
    },
    {
      "epoch": 3.567159587072786,
      "grad_norm": 0.26426973938941956,
      "learning_rate": 2.8845634311393423e-06,
      "loss": 0.0158,
      "step": 2179720
    },
    {
      "epoch": 3.5671923175114393,
      "grad_norm": 0.6283659338951111,
      "learning_rate": 2.8844975389258255e-06,
      "loss": 0.0103,
      "step": 2179740
    },
    {
      "epoch": 3.5672250479500924,
      "grad_norm": 0.08054137229919434,
      "learning_rate": 2.8844316467123086e-06,
      "loss": 0.0092,
      "step": 2179760
    },
    {
      "epoch": 3.567257778388746,
      "grad_norm": 0.12304505705833435,
      "learning_rate": 2.8843657544987914e-06,
      "loss": 0.0129,
      "step": 2179780
    },
    {
      "epoch": 3.567290508827399,
      "grad_norm": 0.24972683191299438,
      "learning_rate": 2.884299862285274e-06,
      "loss": 0.0137,
      "step": 2179800
    },
    {
      "epoch": 3.5673232392660528,
      "grad_norm": 0.17692478001117706,
      "learning_rate": 2.884233970071757e-06,
      "loss": 0.0145,
      "step": 2179820
    },
    {
      "epoch": 3.567355969704706,
      "grad_norm": 0.6881178021430969,
      "learning_rate": 2.88416807785824e-06,
      "loss": 0.0124,
      "step": 2179840
    },
    {
      "epoch": 3.5673887001433595,
      "grad_norm": 0.2661254405975342,
      "learning_rate": 2.8841021856447228e-06,
      "loss": 0.0161,
      "step": 2179860
    },
    {
      "epoch": 3.5674214305820127,
      "grad_norm": 0.11024026572704315,
      "learning_rate": 2.8840362934312055e-06,
      "loss": 0.0094,
      "step": 2179880
    },
    {
      "epoch": 3.567454161020666,
      "grad_norm": 0.07813350111246109,
      "learning_rate": 2.8839704012176882e-06,
      "loss": 0.0101,
      "step": 2179900
    },
    {
      "epoch": 3.5674868914593194,
      "grad_norm": 0.076059989631176,
      "learning_rate": 2.883904509004171e-06,
      "loss": 0.0165,
      "step": 2179920
    },
    {
      "epoch": 3.5675196218979726,
      "grad_norm": 0.5281522274017334,
      "learning_rate": 2.883838616790654e-06,
      "loss": 0.0097,
      "step": 2179940
    },
    {
      "epoch": 3.567552352336626,
      "grad_norm": 0.620330274105072,
      "learning_rate": 2.883772724577137e-06,
      "loss": 0.0111,
      "step": 2179960
    },
    {
      "epoch": 3.5675850827752793,
      "grad_norm": 0.4478241205215454,
      "learning_rate": 2.8837068323636196e-06,
      "loss": 0.0106,
      "step": 2179980
    },
    {
      "epoch": 3.567617813213933,
      "grad_norm": 0.1769101619720459,
      "learning_rate": 2.8836409401501024e-06,
      "loss": 0.0082,
      "step": 2180000
    },
    {
      "epoch": 3.567650543652586,
      "grad_norm": 0.2081155925989151,
      "learning_rate": 2.8835750479365855e-06,
      "loss": 0.0076,
      "step": 2180020
    },
    {
      "epoch": 3.567683274091239,
      "grad_norm": 0.22605830430984497,
      "learning_rate": 2.8835091557230683e-06,
      "loss": 0.0125,
      "step": 2180040
    },
    {
      "epoch": 3.567716004529893,
      "grad_norm": 0.3447335660457611,
      "learning_rate": 2.883443263509551e-06,
      "loss": 0.0136,
      "step": 2180060
    },
    {
      "epoch": 3.567748734968546,
      "grad_norm": 0.1755402386188507,
      "learning_rate": 2.8833773712960346e-06,
      "loss": 0.0116,
      "step": 2180080
    },
    {
      "epoch": 3.5677814654071995,
      "grad_norm": 0.45015719532966614,
      "learning_rate": 2.8833114790825173e-06,
      "loss": 0.0127,
      "step": 2180100
    },
    {
      "epoch": 3.5678141958458527,
      "grad_norm": 0.16355423629283905,
      "learning_rate": 2.883245586869e-06,
      "loss": 0.0125,
      "step": 2180120
    },
    {
      "epoch": 3.5678469262845063,
      "grad_norm": 0.2823512852191925,
      "learning_rate": 2.883179694655483e-06,
      "loss": 0.0135,
      "step": 2180140
    },
    {
      "epoch": 3.5678796567231594,
      "grad_norm": 0.16496632993221283,
      "learning_rate": 2.883113802441966e-06,
      "loss": 0.01,
      "step": 2180160
    },
    {
      "epoch": 3.5679123871618126,
      "grad_norm": 0.08025761693716049,
      "learning_rate": 2.8830479102284487e-06,
      "loss": 0.0098,
      "step": 2180180
    },
    {
      "epoch": 3.567945117600466,
      "grad_norm": 0.16509319841861725,
      "learning_rate": 2.8829820180149315e-06,
      "loss": 0.0104,
      "step": 2180200
    },
    {
      "epoch": 3.5679778480391193,
      "grad_norm": 0.19628293812274933,
      "learning_rate": 2.882916125801414e-06,
      "loss": 0.0118,
      "step": 2180220
    },
    {
      "epoch": 3.568010578477773,
      "grad_norm": 0.1896199882030487,
      "learning_rate": 2.8828502335878974e-06,
      "loss": 0.0144,
      "step": 2180240
    },
    {
      "epoch": 3.568043308916426,
      "grad_norm": 0.37460628151893616,
      "learning_rate": 2.88278434137438e-06,
      "loss": 0.0128,
      "step": 2180260
    },
    {
      "epoch": 3.5680760393550797,
      "grad_norm": 0.641966700553894,
      "learning_rate": 2.882718449160863e-06,
      "loss": 0.0137,
      "step": 2180280
    },
    {
      "epoch": 3.568108769793733,
      "grad_norm": 0.7479884624481201,
      "learning_rate": 2.8826525569473456e-06,
      "loss": 0.0143,
      "step": 2180300
    },
    {
      "epoch": 3.568141500232386,
      "grad_norm": 0.3827569782733917,
      "learning_rate": 2.8825866647338287e-06,
      "loss": 0.0126,
      "step": 2180320
    },
    {
      "epoch": 3.5681742306710396,
      "grad_norm": 0.10430688410997391,
      "learning_rate": 2.8825207725203115e-06,
      "loss": 0.0134,
      "step": 2180340
    },
    {
      "epoch": 3.5682069611096927,
      "grad_norm": 0.279362291097641,
      "learning_rate": 2.8824548803067942e-06,
      "loss": 0.0085,
      "step": 2180360
    },
    {
      "epoch": 3.568239691548346,
      "grad_norm": 0.16829772293567657,
      "learning_rate": 2.882388988093277e-06,
      "loss": 0.0108,
      "step": 2180380
    },
    {
      "epoch": 3.5682724219869995,
      "grad_norm": 0.13816103339195251,
      "learning_rate": 2.8823230958797597e-06,
      "loss": 0.0099,
      "step": 2180400
    },
    {
      "epoch": 3.568305152425653,
      "grad_norm": 0.28107166290283203,
      "learning_rate": 2.882257203666243e-06,
      "loss": 0.0128,
      "step": 2180420
    },
    {
      "epoch": 3.568337882864306,
      "grad_norm": 0.06530065834522247,
      "learning_rate": 2.882191311452726e-06,
      "loss": 0.0146,
      "step": 2180440
    },
    {
      "epoch": 3.5683706133029593,
      "grad_norm": 0.1496010273694992,
      "learning_rate": 2.8821254192392088e-06,
      "loss": 0.014,
      "step": 2180460
    },
    {
      "epoch": 3.568403343741613,
      "grad_norm": 0.13453367352485657,
      "learning_rate": 2.882059527025692e-06,
      "loss": 0.0149,
      "step": 2180480
    },
    {
      "epoch": 3.568436074180266,
      "grad_norm": 0.13791076838970184,
      "learning_rate": 2.8819936348121747e-06,
      "loss": 0.0082,
      "step": 2180500
    },
    {
      "epoch": 3.5684688046189192,
      "grad_norm": 0.42338064312934875,
      "learning_rate": 2.8819277425986574e-06,
      "loss": 0.0098,
      "step": 2180520
    },
    {
      "epoch": 3.568501535057573,
      "grad_norm": 0.5319083333015442,
      "learning_rate": 2.88186185038514e-06,
      "loss": 0.0141,
      "step": 2180540
    },
    {
      "epoch": 3.5685342654962264,
      "grad_norm": 0.08707728981971741,
      "learning_rate": 2.8817959581716233e-06,
      "loss": 0.0108,
      "step": 2180560
    },
    {
      "epoch": 3.5685669959348796,
      "grad_norm": 1.3717421293258667,
      "learning_rate": 2.881730065958106e-06,
      "loss": 0.0134,
      "step": 2180580
    },
    {
      "epoch": 3.5685997263735327,
      "grad_norm": 0.6895398497581482,
      "learning_rate": 2.881664173744589e-06,
      "loss": 0.0147,
      "step": 2180600
    },
    {
      "epoch": 3.5686324568121863,
      "grad_norm": 0.29116493463516235,
      "learning_rate": 2.8815982815310715e-06,
      "loss": 0.0131,
      "step": 2180620
    },
    {
      "epoch": 3.5686651872508395,
      "grad_norm": 0.2422873079776764,
      "learning_rate": 2.8815323893175547e-06,
      "loss": 0.0166,
      "step": 2180640
    },
    {
      "epoch": 3.5686979176894926,
      "grad_norm": 0.500258207321167,
      "learning_rate": 2.8814664971040374e-06,
      "loss": 0.008,
      "step": 2180660
    },
    {
      "epoch": 3.568730648128146,
      "grad_norm": 0.21767011284828186,
      "learning_rate": 2.88140060489052e-06,
      "loss": 0.0084,
      "step": 2180680
    },
    {
      "epoch": 3.5687633785668,
      "grad_norm": 0.10603302717208862,
      "learning_rate": 2.881334712677003e-06,
      "loss": 0.0072,
      "step": 2180700
    },
    {
      "epoch": 3.568796109005453,
      "grad_norm": 0.6439968347549438,
      "learning_rate": 2.881268820463486e-06,
      "loss": 0.0121,
      "step": 2180720
    },
    {
      "epoch": 3.568828839444106,
      "grad_norm": 0.8109223246574402,
      "learning_rate": 2.881202928249969e-06,
      "loss": 0.0174,
      "step": 2180740
    },
    {
      "epoch": 3.5688615698827597,
      "grad_norm": 0.5873939990997314,
      "learning_rate": 2.8811370360364516e-06,
      "loss": 0.0113,
      "step": 2180760
    },
    {
      "epoch": 3.568894300321413,
      "grad_norm": 0.46105828881263733,
      "learning_rate": 2.8810711438229343e-06,
      "loss": 0.01,
      "step": 2180780
    },
    {
      "epoch": 3.568927030760066,
      "grad_norm": 0.1351027637720108,
      "learning_rate": 2.881005251609418e-06,
      "loss": 0.0097,
      "step": 2180800
    },
    {
      "epoch": 3.5689597611987196,
      "grad_norm": 0.4784601032733917,
      "learning_rate": 2.8809393593959006e-06,
      "loss": 0.0117,
      "step": 2180820
    },
    {
      "epoch": 3.568992491637373,
      "grad_norm": 0.11644832789897919,
      "learning_rate": 2.8808734671823834e-06,
      "loss": 0.0091,
      "step": 2180840
    },
    {
      "epoch": 3.5690252220760263,
      "grad_norm": 0.09797293692827225,
      "learning_rate": 2.8808075749688665e-06,
      "loss": 0.0134,
      "step": 2180860
    },
    {
      "epoch": 3.5690579525146795,
      "grad_norm": 0.2172047346830368,
      "learning_rate": 2.8807416827553493e-06,
      "loss": 0.0099,
      "step": 2180880
    },
    {
      "epoch": 3.569090682953333,
      "grad_norm": 0.21870572865009308,
      "learning_rate": 2.880675790541832e-06,
      "loss": 0.01,
      "step": 2180900
    },
    {
      "epoch": 3.5691234133919862,
      "grad_norm": 0.35615622997283936,
      "learning_rate": 2.8806098983283147e-06,
      "loss": 0.0103,
      "step": 2180920
    },
    {
      "epoch": 3.5691561438306394,
      "grad_norm": 0.09484685212373734,
      "learning_rate": 2.8805440061147975e-06,
      "loss": 0.0148,
      "step": 2180940
    },
    {
      "epoch": 3.569188874269293,
      "grad_norm": 0.14789044857025146,
      "learning_rate": 2.8804781139012806e-06,
      "loss": 0.0089,
      "step": 2180960
    },
    {
      "epoch": 3.569221604707946,
      "grad_norm": 0.3832792043685913,
      "learning_rate": 2.8804122216877634e-06,
      "loss": 0.0134,
      "step": 2180980
    },
    {
      "epoch": 3.5692543351465997,
      "grad_norm": 4.567290782928467,
      "learning_rate": 2.880346329474246e-06,
      "loss": 0.0099,
      "step": 2181000
    },
    {
      "epoch": 3.569287065585253,
      "grad_norm": 0.23669959604740143,
      "learning_rate": 2.880280437260729e-06,
      "loss": 0.0107,
      "step": 2181020
    },
    {
      "epoch": 3.5693197960239065,
      "grad_norm": 0.23793083429336548,
      "learning_rate": 2.880214545047212e-06,
      "loss": 0.0134,
      "step": 2181040
    },
    {
      "epoch": 3.5693525264625596,
      "grad_norm": 0.2826436161994934,
      "learning_rate": 2.8801486528336948e-06,
      "loss": 0.0133,
      "step": 2181060
    },
    {
      "epoch": 3.5693852569012128,
      "grad_norm": 0.22203315794467926,
      "learning_rate": 2.8800827606201775e-06,
      "loss": 0.0115,
      "step": 2181080
    },
    {
      "epoch": 3.5694179873398664,
      "grad_norm": 1.0500502586364746,
      "learning_rate": 2.8800168684066603e-06,
      "loss": 0.0082,
      "step": 2181100
    },
    {
      "epoch": 3.5694507177785195,
      "grad_norm": 0.13739700615406036,
      "learning_rate": 2.8799509761931434e-06,
      "loss": 0.0107,
      "step": 2181120
    },
    {
      "epoch": 3.569483448217173,
      "grad_norm": 0.18933463096618652,
      "learning_rate": 2.8798850839796266e-06,
      "loss": 0.0087,
      "step": 2181140
    },
    {
      "epoch": 3.5695161786558263,
      "grad_norm": 0.2026916742324829,
      "learning_rate": 2.8798191917661093e-06,
      "loss": 0.0084,
      "step": 2181160
    },
    {
      "epoch": 3.56954890909448,
      "grad_norm": 0.3045538365840912,
      "learning_rate": 2.8797532995525925e-06,
      "loss": 0.0091,
      "step": 2181180
    },
    {
      "epoch": 3.569581639533133,
      "grad_norm": 0.19803617894649506,
      "learning_rate": 2.8796874073390752e-06,
      "loss": 0.012,
      "step": 2181200
    },
    {
      "epoch": 3.569614369971786,
      "grad_norm": 0.0892292931675911,
      "learning_rate": 2.879621515125558e-06,
      "loss": 0.0146,
      "step": 2181220
    },
    {
      "epoch": 3.5696471004104398,
      "grad_norm": 0.2779306471347809,
      "learning_rate": 2.8795556229120407e-06,
      "loss": 0.0074,
      "step": 2181240
    },
    {
      "epoch": 3.569679830849093,
      "grad_norm": 0.6040697693824768,
      "learning_rate": 2.879489730698524e-06,
      "loss": 0.0126,
      "step": 2181260
    },
    {
      "epoch": 3.5697125612877465,
      "grad_norm": 0.20771542191505432,
      "learning_rate": 2.8794238384850066e-06,
      "loss": 0.0102,
      "step": 2181280
    },
    {
      "epoch": 3.5697452917263996,
      "grad_norm": 0.12253851443529129,
      "learning_rate": 2.8793579462714893e-06,
      "loss": 0.0094,
      "step": 2181300
    },
    {
      "epoch": 3.5697780221650532,
      "grad_norm": 0.615027129650116,
      "learning_rate": 2.879292054057972e-06,
      "loss": 0.0109,
      "step": 2181320
    },
    {
      "epoch": 3.5698107526037064,
      "grad_norm": 0.44086751341819763,
      "learning_rate": 2.879226161844455e-06,
      "loss": 0.0104,
      "step": 2181340
    },
    {
      "epoch": 3.5698434830423595,
      "grad_norm": 0.20410051941871643,
      "learning_rate": 2.879160269630938e-06,
      "loss": 0.0146,
      "step": 2181360
    },
    {
      "epoch": 3.569876213481013,
      "grad_norm": 0.14674149453639984,
      "learning_rate": 2.8790943774174207e-06,
      "loss": 0.0104,
      "step": 2181380
    },
    {
      "epoch": 3.5699089439196663,
      "grad_norm": 0.26768454909324646,
      "learning_rate": 2.8790284852039035e-06,
      "loss": 0.0147,
      "step": 2181400
    },
    {
      "epoch": 3.56994167435832,
      "grad_norm": 0.5042354464530945,
      "learning_rate": 2.878962592990386e-06,
      "loss": 0.0081,
      "step": 2181420
    },
    {
      "epoch": 3.569974404796973,
      "grad_norm": 0.1421183943748474,
      "learning_rate": 2.8788967007768694e-06,
      "loss": 0.01,
      "step": 2181440
    },
    {
      "epoch": 3.5700071352356266,
      "grad_norm": 0.24359652400016785,
      "learning_rate": 2.878830808563352e-06,
      "loss": 0.0104,
      "step": 2181460
    },
    {
      "epoch": 3.5700398656742798,
      "grad_norm": 0.1765252947807312,
      "learning_rate": 2.878764916349835e-06,
      "loss": 0.0087,
      "step": 2181480
    },
    {
      "epoch": 3.570072596112933,
      "grad_norm": 0.1596497893333435,
      "learning_rate": 2.8786990241363184e-06,
      "loss": 0.0071,
      "step": 2181500
    },
    {
      "epoch": 3.5701053265515865,
      "grad_norm": 0.1873733103275299,
      "learning_rate": 2.878633131922801e-06,
      "loss": 0.0103,
      "step": 2181520
    },
    {
      "epoch": 3.5701380569902397,
      "grad_norm": 0.4039054811000824,
      "learning_rate": 2.878567239709284e-06,
      "loss": 0.0151,
      "step": 2181540
    },
    {
      "epoch": 3.5701707874288933,
      "grad_norm": 0.16460104286670685,
      "learning_rate": 2.8785013474957667e-06,
      "loss": 0.0094,
      "step": 2181560
    },
    {
      "epoch": 3.5702035178675464,
      "grad_norm": 0.14889641106128693,
      "learning_rate": 2.87843545528225e-06,
      "loss": 0.013,
      "step": 2181580
    },
    {
      "epoch": 3.5702362483062,
      "grad_norm": 0.2402506172657013,
      "learning_rate": 2.8783695630687326e-06,
      "loss": 0.0124,
      "step": 2181600
    },
    {
      "epoch": 3.570268978744853,
      "grad_norm": 0.295941561460495,
      "learning_rate": 2.8783036708552153e-06,
      "loss": 0.0119,
      "step": 2181620
    },
    {
      "epoch": 3.5703017091835063,
      "grad_norm": 0.4582088589668274,
      "learning_rate": 2.878237778641698e-06,
      "loss": 0.0132,
      "step": 2181640
    },
    {
      "epoch": 3.57033443962216,
      "grad_norm": 0.18214328587055206,
      "learning_rate": 2.878171886428181e-06,
      "loss": 0.0105,
      "step": 2181660
    },
    {
      "epoch": 3.570367170060813,
      "grad_norm": 0.1856573224067688,
      "learning_rate": 2.878105994214664e-06,
      "loss": 0.0133,
      "step": 2181680
    },
    {
      "epoch": 3.5703999004994666,
      "grad_norm": 0.43479835987091064,
      "learning_rate": 2.8780401020011467e-06,
      "loss": 0.0151,
      "step": 2181700
    },
    {
      "epoch": 3.57043263093812,
      "grad_norm": 0.402703195810318,
      "learning_rate": 2.8779742097876294e-06,
      "loss": 0.009,
      "step": 2181720
    },
    {
      "epoch": 3.5704653613767734,
      "grad_norm": 0.41887062788009644,
      "learning_rate": 2.8779083175741126e-06,
      "loss": 0.0178,
      "step": 2181740
    },
    {
      "epoch": 3.5704980918154265,
      "grad_norm": 0.16614079475402832,
      "learning_rate": 2.8778424253605953e-06,
      "loss": 0.0179,
      "step": 2181760
    },
    {
      "epoch": 3.5705308222540797,
      "grad_norm": 0.49042394757270813,
      "learning_rate": 2.877776533147078e-06,
      "loss": 0.0147,
      "step": 2181780
    },
    {
      "epoch": 3.5705635526927333,
      "grad_norm": 0.16348299384117126,
      "learning_rate": 2.877710640933561e-06,
      "loss": 0.0099,
      "step": 2181800
    },
    {
      "epoch": 3.5705962831313864,
      "grad_norm": 0.4484579861164093,
      "learning_rate": 2.8776447487200435e-06,
      "loss": 0.0166,
      "step": 2181820
    },
    {
      "epoch": 3.5706290135700396,
      "grad_norm": 0.2841123044490814,
      "learning_rate": 2.877578856506527e-06,
      "loss": 0.0119,
      "step": 2181840
    },
    {
      "epoch": 3.570661744008693,
      "grad_norm": 0.5459864735603333,
      "learning_rate": 2.87751296429301e-06,
      "loss": 0.0103,
      "step": 2181860
    },
    {
      "epoch": 3.5706944744473468,
      "grad_norm": 0.24023756384849548,
      "learning_rate": 2.8774470720794926e-06,
      "loss": 0.0076,
      "step": 2181880
    },
    {
      "epoch": 3.570727204886,
      "grad_norm": 0.11791937053203583,
      "learning_rate": 2.8773811798659758e-06,
      "loss": 0.0088,
      "step": 2181900
    },
    {
      "epoch": 3.570759935324653,
      "grad_norm": 0.1039852425456047,
      "learning_rate": 2.8773152876524585e-06,
      "loss": 0.0109,
      "step": 2181920
    },
    {
      "epoch": 3.5707926657633067,
      "grad_norm": 0.2717890739440918,
      "learning_rate": 2.8772493954389412e-06,
      "loss": 0.0141,
      "step": 2181940
    },
    {
      "epoch": 3.57082539620196,
      "grad_norm": 0.14752662181854248,
      "learning_rate": 2.877183503225424e-06,
      "loss": 0.0092,
      "step": 2181960
    },
    {
      "epoch": 3.570858126640613,
      "grad_norm": 0.27982234954833984,
      "learning_rate": 2.877117611011907e-06,
      "loss": 0.0144,
      "step": 2181980
    },
    {
      "epoch": 3.5708908570792666,
      "grad_norm": 0.1884884238243103,
      "learning_rate": 2.87705171879839e-06,
      "loss": 0.0093,
      "step": 2182000
    },
    {
      "epoch": 3.57092358751792,
      "grad_norm": 0.18173447251319885,
      "learning_rate": 2.8769858265848726e-06,
      "loss": 0.0082,
      "step": 2182020
    },
    {
      "epoch": 3.5709563179565733,
      "grad_norm": 0.08030152320861816,
      "learning_rate": 2.8769199343713554e-06,
      "loss": 0.0096,
      "step": 2182040
    },
    {
      "epoch": 3.5709890483952265,
      "grad_norm": 0.41399601101875305,
      "learning_rate": 2.8768540421578385e-06,
      "loss": 0.0119,
      "step": 2182060
    },
    {
      "epoch": 3.57102177883388,
      "grad_norm": 0.4476262032985687,
      "learning_rate": 2.8767881499443213e-06,
      "loss": 0.0136,
      "step": 2182080
    },
    {
      "epoch": 3.571054509272533,
      "grad_norm": 0.2210332155227661,
      "learning_rate": 2.876722257730804e-06,
      "loss": 0.0089,
      "step": 2182100
    },
    {
      "epoch": 3.5710872397111864,
      "grad_norm": 0.18002614378929138,
      "learning_rate": 2.8766563655172868e-06,
      "loss": 0.0126,
      "step": 2182120
    },
    {
      "epoch": 3.57111997014984,
      "grad_norm": 0.20346495509147644,
      "learning_rate": 2.87659047330377e-06,
      "loss": 0.0152,
      "step": 2182140
    },
    {
      "epoch": 3.5711527005884935,
      "grad_norm": 0.5135778784751892,
      "learning_rate": 2.8765245810902527e-06,
      "loss": 0.0096,
      "step": 2182160
    },
    {
      "epoch": 3.5711854310271467,
      "grad_norm": 0.5842347145080566,
      "learning_rate": 2.8764586888767354e-06,
      "loss": 0.0139,
      "step": 2182180
    },
    {
      "epoch": 3.5712181614658,
      "grad_norm": 0.18747806549072266,
      "learning_rate": 2.876392796663219e-06,
      "loss": 0.0099,
      "step": 2182200
    },
    {
      "epoch": 3.5712508919044534,
      "grad_norm": 0.14038702845573425,
      "learning_rate": 2.8763269044497017e-06,
      "loss": 0.0124,
      "step": 2182220
    },
    {
      "epoch": 3.5712836223431066,
      "grad_norm": 0.3332171142101288,
      "learning_rate": 2.8762610122361845e-06,
      "loss": 0.011,
      "step": 2182240
    },
    {
      "epoch": 3.5713163527817597,
      "grad_norm": 0.355925977230072,
      "learning_rate": 2.876195120022667e-06,
      "loss": 0.0165,
      "step": 2182260
    },
    {
      "epoch": 3.5713490832204133,
      "grad_norm": 0.3606763184070587,
      "learning_rate": 2.8761292278091504e-06,
      "loss": 0.0184,
      "step": 2182280
    },
    {
      "epoch": 3.571381813659067,
      "grad_norm": 0.3223499655723572,
      "learning_rate": 2.876063335595633e-06,
      "loss": 0.0139,
      "step": 2182300
    },
    {
      "epoch": 3.57141454409772,
      "grad_norm": 0.31850847601890564,
      "learning_rate": 2.875997443382116e-06,
      "loss": 0.0108,
      "step": 2182320
    },
    {
      "epoch": 3.5714472745363732,
      "grad_norm": 0.16068686544895172,
      "learning_rate": 2.8759315511685986e-06,
      "loss": 0.0075,
      "step": 2182340
    },
    {
      "epoch": 3.571480004975027,
      "grad_norm": 0.20234645903110504,
      "learning_rate": 2.8758656589550813e-06,
      "loss": 0.0117,
      "step": 2182360
    },
    {
      "epoch": 3.57151273541368,
      "grad_norm": 0.7104856371879578,
      "learning_rate": 2.8757997667415645e-06,
      "loss": 0.0094,
      "step": 2182380
    },
    {
      "epoch": 3.571545465852333,
      "grad_norm": 0.2648921310901642,
      "learning_rate": 2.8757338745280472e-06,
      "loss": 0.0113,
      "step": 2182400
    },
    {
      "epoch": 3.5715781962909867,
      "grad_norm": 0.5257437825202942,
      "learning_rate": 2.87566798231453e-06,
      "loss": 0.0085,
      "step": 2182420
    },
    {
      "epoch": 3.57161092672964,
      "grad_norm": 0.5429030656814575,
      "learning_rate": 2.8756020901010127e-06,
      "loss": 0.0109,
      "step": 2182440
    },
    {
      "epoch": 3.5716436571682935,
      "grad_norm": 1.6153048276901245,
      "learning_rate": 2.875536197887496e-06,
      "loss": 0.0095,
      "step": 2182460
    },
    {
      "epoch": 3.5716763876069466,
      "grad_norm": 0.3378390669822693,
      "learning_rate": 2.8754703056739786e-06,
      "loss": 0.0117,
      "step": 2182480
    },
    {
      "epoch": 3.5717091180456,
      "grad_norm": 0.2484108805656433,
      "learning_rate": 2.8754044134604614e-06,
      "loss": 0.016,
      "step": 2182500
    },
    {
      "epoch": 3.5717418484842534,
      "grad_norm": 0.1704491823911667,
      "learning_rate": 2.875338521246944e-06,
      "loss": 0.0092,
      "step": 2182520
    },
    {
      "epoch": 3.5717745789229065,
      "grad_norm": 0.17155177891254425,
      "learning_rate": 2.8752726290334277e-06,
      "loss": 0.0103,
      "step": 2182540
    },
    {
      "epoch": 3.57180730936156,
      "grad_norm": 0.2444095015525818,
      "learning_rate": 2.8752067368199104e-06,
      "loss": 0.0103,
      "step": 2182560
    },
    {
      "epoch": 3.5718400398002133,
      "grad_norm": 0.18233847618103027,
      "learning_rate": 2.875140844606393e-06,
      "loss": 0.0119,
      "step": 2182580
    },
    {
      "epoch": 3.571872770238867,
      "grad_norm": 0.19329173862934113,
      "learning_rate": 2.8750749523928763e-06,
      "loss": 0.0131,
      "step": 2182600
    },
    {
      "epoch": 3.57190550067752,
      "grad_norm": 0.09617526084184647,
      "learning_rate": 2.875009060179359e-06,
      "loss": 0.0101,
      "step": 2182620
    },
    {
      "epoch": 3.5719382311161736,
      "grad_norm": 0.23424746096134186,
      "learning_rate": 2.874943167965842e-06,
      "loss": 0.0098,
      "step": 2182640
    },
    {
      "epoch": 3.5719709615548267,
      "grad_norm": 0.1134551614522934,
      "learning_rate": 2.8748772757523245e-06,
      "loss": 0.0092,
      "step": 2182660
    },
    {
      "epoch": 3.57200369199348,
      "grad_norm": 0.7148014307022095,
      "learning_rate": 2.8748113835388077e-06,
      "loss": 0.0156,
      "step": 2182680
    },
    {
      "epoch": 3.5720364224321335,
      "grad_norm": 0.2567680776119232,
      "learning_rate": 2.8747454913252904e-06,
      "loss": 0.0089,
      "step": 2182700
    },
    {
      "epoch": 3.5720691528707866,
      "grad_norm": 0.31652525067329407,
      "learning_rate": 2.874679599111773e-06,
      "loss": 0.008,
      "step": 2182720
    },
    {
      "epoch": 3.5721018833094402,
      "grad_norm": 0.5364648699760437,
      "learning_rate": 2.874613706898256e-06,
      "loss": 0.017,
      "step": 2182740
    },
    {
      "epoch": 3.5721346137480934,
      "grad_norm": 0.5122339129447937,
      "learning_rate": 2.8745478146847387e-06,
      "loss": 0.0088,
      "step": 2182760
    },
    {
      "epoch": 3.572167344186747,
      "grad_norm": 0.2518135905265808,
      "learning_rate": 2.874481922471222e-06,
      "loss": 0.0078,
      "step": 2182780
    },
    {
      "epoch": 3.5722000746254,
      "grad_norm": 0.14596590399742126,
      "learning_rate": 2.8744160302577046e-06,
      "loss": 0.0144,
      "step": 2182800
    },
    {
      "epoch": 3.5722328050640533,
      "grad_norm": 0.21737222373485565,
      "learning_rate": 2.8743501380441873e-06,
      "loss": 0.0105,
      "step": 2182820
    },
    {
      "epoch": 3.572265535502707,
      "grad_norm": 0.3292449712753296,
      "learning_rate": 2.87428424583067e-06,
      "loss": 0.0104,
      "step": 2182840
    },
    {
      "epoch": 3.57229826594136,
      "grad_norm": 0.24500314891338348,
      "learning_rate": 2.874218353617153e-06,
      "loss": 0.0097,
      "step": 2182860
    },
    {
      "epoch": 3.5723309963800136,
      "grad_norm": 0.3652539849281311,
      "learning_rate": 2.874152461403636e-06,
      "loss": 0.0139,
      "step": 2182880
    },
    {
      "epoch": 3.5723637268186668,
      "grad_norm": 0.3538475036621094,
      "learning_rate": 2.874086569190119e-06,
      "loss": 0.0104,
      "step": 2182900
    },
    {
      "epoch": 3.5723964572573204,
      "grad_norm": 0.09614493697881699,
      "learning_rate": 2.8740206769766023e-06,
      "loss": 0.0082,
      "step": 2182920
    },
    {
      "epoch": 3.5724291876959735,
      "grad_norm": 0.057215381413698196,
      "learning_rate": 2.873954784763085e-06,
      "loss": 0.0118,
      "step": 2182940
    },
    {
      "epoch": 3.5724619181346267,
      "grad_norm": 0.12492939084768295,
      "learning_rate": 2.8738888925495678e-06,
      "loss": 0.0074,
      "step": 2182960
    },
    {
      "epoch": 3.5724946485732803,
      "grad_norm": 0.07145827263593674,
      "learning_rate": 2.8738230003360505e-06,
      "loss": 0.0076,
      "step": 2182980
    },
    {
      "epoch": 3.5725273790119334,
      "grad_norm": 0.13981950283050537,
      "learning_rate": 2.8737571081225337e-06,
      "loss": 0.0135,
      "step": 2183000
    },
    {
      "epoch": 3.572560109450587,
      "grad_norm": 0.16212134063243866,
      "learning_rate": 2.8736912159090164e-06,
      "loss": 0.0098,
      "step": 2183020
    },
    {
      "epoch": 3.57259283988924,
      "grad_norm": 0.7568182349205017,
      "learning_rate": 2.873625323695499e-06,
      "loss": 0.0132,
      "step": 2183040
    },
    {
      "epoch": 3.5726255703278937,
      "grad_norm": 0.19332623481750488,
      "learning_rate": 2.873559431481982e-06,
      "loss": 0.0109,
      "step": 2183060
    },
    {
      "epoch": 3.572658300766547,
      "grad_norm": 0.1110256016254425,
      "learning_rate": 2.873493539268465e-06,
      "loss": 0.0069,
      "step": 2183080
    },
    {
      "epoch": 3.5726910312052,
      "grad_norm": 0.15815071761608124,
      "learning_rate": 2.8734276470549478e-06,
      "loss": 0.0091,
      "step": 2183100
    },
    {
      "epoch": 3.5727237616438536,
      "grad_norm": 0.11753278225660324,
      "learning_rate": 2.8733617548414305e-06,
      "loss": 0.0087,
      "step": 2183120
    },
    {
      "epoch": 3.572756492082507,
      "grad_norm": 0.303987979888916,
      "learning_rate": 2.8732958626279133e-06,
      "loss": 0.0107,
      "step": 2183140
    },
    {
      "epoch": 3.5727892225211604,
      "grad_norm": 0.22456836700439453,
      "learning_rate": 2.8732299704143964e-06,
      "loss": 0.013,
      "step": 2183160
    },
    {
      "epoch": 3.5728219529598135,
      "grad_norm": 0.6291401982307434,
      "learning_rate": 2.873164078200879e-06,
      "loss": 0.0124,
      "step": 2183180
    },
    {
      "epoch": 3.572854683398467,
      "grad_norm": 0.3984518051147461,
      "learning_rate": 2.873098185987362e-06,
      "loss": 0.0083,
      "step": 2183200
    },
    {
      "epoch": 3.5728874138371203,
      "grad_norm": 0.28655239939689636,
      "learning_rate": 2.8730322937738446e-06,
      "loss": 0.0144,
      "step": 2183220
    },
    {
      "epoch": 3.5729201442757734,
      "grad_norm": 0.09677135944366455,
      "learning_rate": 2.8729664015603274e-06,
      "loss": 0.0129,
      "step": 2183240
    },
    {
      "epoch": 3.572952874714427,
      "grad_norm": 0.09531273692846298,
      "learning_rate": 2.872900509346811e-06,
      "loss": 0.0094,
      "step": 2183260
    },
    {
      "epoch": 3.57298560515308,
      "grad_norm": 0.23695853352546692,
      "learning_rate": 2.8728346171332937e-06,
      "loss": 0.0121,
      "step": 2183280
    },
    {
      "epoch": 3.5730183355917338,
      "grad_norm": 0.17643478512763977,
      "learning_rate": 2.8727687249197764e-06,
      "loss": 0.0099,
      "step": 2183300
    },
    {
      "epoch": 3.573051066030387,
      "grad_norm": 0.24164094030857086,
      "learning_rate": 2.8727028327062596e-06,
      "loss": 0.0116,
      "step": 2183320
    },
    {
      "epoch": 3.5730837964690405,
      "grad_norm": 0.3816775679588318,
      "learning_rate": 2.8726369404927423e-06,
      "loss": 0.0119,
      "step": 2183340
    },
    {
      "epoch": 3.5731165269076937,
      "grad_norm": 0.5144953727722168,
      "learning_rate": 2.872571048279225e-06,
      "loss": 0.0092,
      "step": 2183360
    },
    {
      "epoch": 3.573149257346347,
      "grad_norm": 0.13643857836723328,
      "learning_rate": 2.872505156065708e-06,
      "loss": 0.0137,
      "step": 2183380
    },
    {
      "epoch": 3.5731819877850004,
      "grad_norm": 0.3887571692466736,
      "learning_rate": 2.872439263852191e-06,
      "loss": 0.0131,
      "step": 2183400
    },
    {
      "epoch": 3.5732147182236536,
      "grad_norm": 0.44090405106544495,
      "learning_rate": 2.8723733716386737e-06,
      "loss": 0.0094,
      "step": 2183420
    },
    {
      "epoch": 3.5732474486623067,
      "grad_norm": 0.12981921434402466,
      "learning_rate": 2.8723074794251565e-06,
      "loss": 0.0103,
      "step": 2183440
    },
    {
      "epoch": 3.5732801791009603,
      "grad_norm": 0.097240149974823,
      "learning_rate": 2.872241587211639e-06,
      "loss": 0.0086,
      "step": 2183460
    },
    {
      "epoch": 3.573312909539614,
      "grad_norm": 0.304960161447525,
      "learning_rate": 2.8721756949981224e-06,
      "loss": 0.0111,
      "step": 2183480
    },
    {
      "epoch": 3.573345639978267,
      "grad_norm": 0.17690454423427582,
      "learning_rate": 2.872109802784605e-06,
      "loss": 0.0114,
      "step": 2183500
    },
    {
      "epoch": 3.57337837041692,
      "grad_norm": 0.21584542095661163,
      "learning_rate": 2.872043910571088e-06,
      "loss": 0.0121,
      "step": 2183520
    },
    {
      "epoch": 3.573411100855574,
      "grad_norm": 0.49598073959350586,
      "learning_rate": 2.8719780183575706e-06,
      "loss": 0.0121,
      "step": 2183540
    },
    {
      "epoch": 3.573443831294227,
      "grad_norm": 0.1869657337665558,
      "learning_rate": 2.8719121261440538e-06,
      "loss": 0.0104,
      "step": 2183560
    },
    {
      "epoch": 3.57347656173288,
      "grad_norm": 0.2957480549812317,
      "learning_rate": 2.8718462339305365e-06,
      "loss": 0.0081,
      "step": 2183580
    },
    {
      "epoch": 3.5735092921715337,
      "grad_norm": 0.4734334349632263,
      "learning_rate": 2.8717803417170197e-06,
      "loss": 0.0177,
      "step": 2183600
    },
    {
      "epoch": 3.5735420226101873,
      "grad_norm": 0.49846702814102173,
      "learning_rate": 2.871714449503503e-06,
      "loss": 0.0117,
      "step": 2183620
    },
    {
      "epoch": 3.5735747530488404,
      "grad_norm": 0.25882387161254883,
      "learning_rate": 2.8716485572899856e-06,
      "loss": 0.009,
      "step": 2183640
    },
    {
      "epoch": 3.5736074834874936,
      "grad_norm": 0.25758153200149536,
      "learning_rate": 2.8715826650764683e-06,
      "loss": 0.0103,
      "step": 2183660
    },
    {
      "epoch": 3.573640213926147,
      "grad_norm": 0.18386110663414001,
      "learning_rate": 2.871516772862951e-06,
      "loss": 0.0132,
      "step": 2183680
    },
    {
      "epoch": 3.5736729443648003,
      "grad_norm": 0.23936839401721954,
      "learning_rate": 2.871450880649434e-06,
      "loss": 0.011,
      "step": 2183700
    },
    {
      "epoch": 3.5737056748034535,
      "grad_norm": 0.5577212572097778,
      "learning_rate": 2.871384988435917e-06,
      "loss": 0.0139,
      "step": 2183720
    },
    {
      "epoch": 3.573738405242107,
      "grad_norm": 0.10509489476680756,
      "learning_rate": 2.8713190962223997e-06,
      "loss": 0.0077,
      "step": 2183740
    },
    {
      "epoch": 3.5737711356807607,
      "grad_norm": 0.30585017800331116,
      "learning_rate": 2.8712532040088824e-06,
      "loss": 0.0174,
      "step": 2183760
    },
    {
      "epoch": 3.573803866119414,
      "grad_norm": 0.1846051812171936,
      "learning_rate": 2.871187311795365e-06,
      "loss": 0.0158,
      "step": 2183780
    },
    {
      "epoch": 3.573836596558067,
      "grad_norm": 0.11047081649303436,
      "learning_rate": 2.8711214195818483e-06,
      "loss": 0.0131,
      "step": 2183800
    },
    {
      "epoch": 3.5738693269967206,
      "grad_norm": 0.6058334708213806,
      "learning_rate": 2.871055527368331e-06,
      "loss": 0.0164,
      "step": 2183820
    },
    {
      "epoch": 3.5739020574353737,
      "grad_norm": 0.7715015411376953,
      "learning_rate": 2.870989635154814e-06,
      "loss": 0.013,
      "step": 2183840
    },
    {
      "epoch": 3.573934787874027,
      "grad_norm": 0.24532131850719452,
      "learning_rate": 2.8709237429412965e-06,
      "loss": 0.0163,
      "step": 2183860
    },
    {
      "epoch": 3.5739675183126804,
      "grad_norm": 0.26363202929496765,
      "learning_rate": 2.8708578507277797e-06,
      "loss": 0.0113,
      "step": 2183880
    },
    {
      "epoch": 3.574000248751334,
      "grad_norm": 0.09451743960380554,
      "learning_rate": 2.8707919585142625e-06,
      "loss": 0.0164,
      "step": 2183900
    },
    {
      "epoch": 3.574032979189987,
      "grad_norm": 0.3199857473373413,
      "learning_rate": 2.870726066300745e-06,
      "loss": 0.0129,
      "step": 2183920
    },
    {
      "epoch": 3.5740657096286403,
      "grad_norm": 0.29780346155166626,
      "learning_rate": 2.870660174087228e-06,
      "loss": 0.0123,
      "step": 2183940
    },
    {
      "epoch": 3.574098440067294,
      "grad_norm": 0.12799058854579926,
      "learning_rate": 2.8705942818737115e-06,
      "loss": 0.0139,
      "step": 2183960
    },
    {
      "epoch": 3.574131170505947,
      "grad_norm": 0.26398563385009766,
      "learning_rate": 2.8705283896601943e-06,
      "loss": 0.0115,
      "step": 2183980
    },
    {
      "epoch": 3.5741639009446002,
      "grad_norm": 0.3434807062149048,
      "learning_rate": 2.870462497446677e-06,
      "loss": 0.0091,
      "step": 2184000
    },
    {
      "epoch": 3.574196631383254,
      "grad_norm": 0.04488493874669075,
      "learning_rate": 2.87039660523316e-06,
      "loss": 0.0084,
      "step": 2184020
    },
    {
      "epoch": 3.574229361821907,
      "grad_norm": 0.3925855755805969,
      "learning_rate": 2.870330713019643e-06,
      "loss": 0.0097,
      "step": 2184040
    },
    {
      "epoch": 3.5742620922605606,
      "grad_norm": 0.2961331009864807,
      "learning_rate": 2.8702648208061256e-06,
      "loss": 0.0104,
      "step": 2184060
    },
    {
      "epoch": 3.5742948226992137,
      "grad_norm": 0.015126161277294159,
      "learning_rate": 2.8701989285926084e-06,
      "loss": 0.0068,
      "step": 2184080
    },
    {
      "epoch": 3.5743275531378673,
      "grad_norm": 0.1268807351589203,
      "learning_rate": 2.8701330363790915e-06,
      "loss": 0.0136,
      "step": 2184100
    },
    {
      "epoch": 3.5743602835765205,
      "grad_norm": 0.2953876852989197,
      "learning_rate": 2.8700671441655743e-06,
      "loss": 0.0128,
      "step": 2184120
    },
    {
      "epoch": 3.5743930140151736,
      "grad_norm": 0.39158642292022705,
      "learning_rate": 2.870001251952057e-06,
      "loss": 0.008,
      "step": 2184140
    },
    {
      "epoch": 3.574425744453827,
      "grad_norm": 0.4267643094062805,
      "learning_rate": 2.8699353597385398e-06,
      "loss": 0.0132,
      "step": 2184160
    },
    {
      "epoch": 3.5744584748924804,
      "grad_norm": 0.3179636597633362,
      "learning_rate": 2.8698694675250225e-06,
      "loss": 0.0127,
      "step": 2184180
    },
    {
      "epoch": 3.574491205331134,
      "grad_norm": 0.6922056078910828,
      "learning_rate": 2.8698035753115057e-06,
      "loss": 0.0089,
      "step": 2184200
    },
    {
      "epoch": 3.574523935769787,
      "grad_norm": 0.08504617214202881,
      "learning_rate": 2.8697376830979884e-06,
      "loss": 0.0167,
      "step": 2184220
    },
    {
      "epoch": 3.5745566662084407,
      "grad_norm": 0.20045819878578186,
      "learning_rate": 2.869671790884471e-06,
      "loss": 0.0078,
      "step": 2184240
    },
    {
      "epoch": 3.574589396647094,
      "grad_norm": 0.15236076712608337,
      "learning_rate": 2.869605898670954e-06,
      "loss": 0.0096,
      "step": 2184260
    },
    {
      "epoch": 3.574622127085747,
      "grad_norm": 0.5169095993041992,
      "learning_rate": 2.869540006457437e-06,
      "loss": 0.0107,
      "step": 2184280
    },
    {
      "epoch": 3.5746548575244006,
      "grad_norm": 0.594680666923523,
      "learning_rate": 2.86947411424392e-06,
      "loss": 0.0097,
      "step": 2184300
    },
    {
      "epoch": 3.5746875879630537,
      "grad_norm": 0.29961878061294556,
      "learning_rate": 2.869408222030403e-06,
      "loss": 0.0151,
      "step": 2184320
    },
    {
      "epoch": 3.5747203184017073,
      "grad_norm": 0.1913279891014099,
      "learning_rate": 2.869342329816886e-06,
      "loss": 0.0094,
      "step": 2184340
    },
    {
      "epoch": 3.5747530488403605,
      "grad_norm": 0.14787378907203674,
      "learning_rate": 2.869276437603369e-06,
      "loss": 0.0126,
      "step": 2184360
    },
    {
      "epoch": 3.574785779279014,
      "grad_norm": 0.15734636783599854,
      "learning_rate": 2.8692105453898516e-06,
      "loss": 0.0111,
      "step": 2184380
    },
    {
      "epoch": 3.5748185097176672,
      "grad_norm": 0.38271984457969666,
      "learning_rate": 2.8691446531763343e-06,
      "loss": 0.0148,
      "step": 2184400
    },
    {
      "epoch": 3.5748512401563204,
      "grad_norm": 0.21741707623004913,
      "learning_rate": 2.8690787609628175e-06,
      "loss": 0.0107,
      "step": 2184420
    },
    {
      "epoch": 3.574883970594974,
      "grad_norm": 0.16366001963615417,
      "learning_rate": 2.8690128687493002e-06,
      "loss": 0.0125,
      "step": 2184440
    },
    {
      "epoch": 3.574916701033627,
      "grad_norm": 0.5392230749130249,
      "learning_rate": 2.868946976535783e-06,
      "loss": 0.0178,
      "step": 2184460
    },
    {
      "epoch": 3.5749494314722807,
      "grad_norm": 0.12365993112325668,
      "learning_rate": 2.8688810843222657e-06,
      "loss": 0.0115,
      "step": 2184480
    },
    {
      "epoch": 3.574982161910934,
      "grad_norm": 0.16126318275928497,
      "learning_rate": 2.868815192108749e-06,
      "loss": 0.0075,
      "step": 2184500
    },
    {
      "epoch": 3.5750148923495875,
      "grad_norm": 0.5771490335464478,
      "learning_rate": 2.8687492998952316e-06,
      "loss": 0.0134,
      "step": 2184520
    },
    {
      "epoch": 3.5750476227882406,
      "grad_norm": 0.3129507005214691,
      "learning_rate": 2.8686834076817144e-06,
      "loss": 0.0105,
      "step": 2184540
    },
    {
      "epoch": 3.5750803532268938,
      "grad_norm": 0.10739347338676453,
      "learning_rate": 2.868617515468197e-06,
      "loss": 0.011,
      "step": 2184560
    },
    {
      "epoch": 3.5751130836655474,
      "grad_norm": 0.7856224179267883,
      "learning_rate": 2.8685516232546803e-06,
      "loss": 0.0096,
      "step": 2184580
    },
    {
      "epoch": 3.5751458141042005,
      "grad_norm": 0.1341952383518219,
      "learning_rate": 2.868485731041163e-06,
      "loss": 0.008,
      "step": 2184600
    },
    {
      "epoch": 3.575178544542854,
      "grad_norm": 0.2213057279586792,
      "learning_rate": 2.8684198388276457e-06,
      "loss": 0.0092,
      "step": 2184620
    },
    {
      "epoch": 3.5752112749815073,
      "grad_norm": 0.35230356454849243,
      "learning_rate": 2.8683539466141285e-06,
      "loss": 0.0165,
      "step": 2184640
    },
    {
      "epoch": 3.575244005420161,
      "grad_norm": 0.34106019139289856,
      "learning_rate": 2.868288054400612e-06,
      "loss": 0.0137,
      "step": 2184660
    },
    {
      "epoch": 3.575276735858814,
      "grad_norm": 0.1344936341047287,
      "learning_rate": 2.868222162187095e-06,
      "loss": 0.0111,
      "step": 2184680
    },
    {
      "epoch": 3.575309466297467,
      "grad_norm": 0.2987055778503418,
      "learning_rate": 2.8681562699735775e-06,
      "loss": 0.0111,
      "step": 2184700
    },
    {
      "epoch": 3.5753421967361207,
      "grad_norm": 0.21634817123413086,
      "learning_rate": 2.8680903777600603e-06,
      "loss": 0.0092,
      "step": 2184720
    },
    {
      "epoch": 3.575374927174774,
      "grad_norm": 0.18017363548278809,
      "learning_rate": 2.8680244855465434e-06,
      "loss": 0.0111,
      "step": 2184740
    },
    {
      "epoch": 3.5754076576134275,
      "grad_norm": 0.32705140113830566,
      "learning_rate": 2.867958593333026e-06,
      "loss": 0.0141,
      "step": 2184760
    },
    {
      "epoch": 3.5754403880520806,
      "grad_norm": 0.27999162673950195,
      "learning_rate": 2.867892701119509e-06,
      "loss": 0.0069,
      "step": 2184780
    },
    {
      "epoch": 3.5754731184907342,
      "grad_norm": 0.2487078607082367,
      "learning_rate": 2.8678268089059917e-06,
      "loss": 0.0105,
      "step": 2184800
    },
    {
      "epoch": 3.5755058489293874,
      "grad_norm": 0.49307680130004883,
      "learning_rate": 2.867760916692475e-06,
      "loss": 0.0153,
      "step": 2184820
    },
    {
      "epoch": 3.5755385793680405,
      "grad_norm": 0.056885622441768646,
      "learning_rate": 2.8676950244789576e-06,
      "loss": 0.0138,
      "step": 2184840
    },
    {
      "epoch": 3.575571309806694,
      "grad_norm": 0.37773987650871277,
      "learning_rate": 2.8676291322654403e-06,
      "loss": 0.0155,
      "step": 2184860
    },
    {
      "epoch": 3.5756040402453473,
      "grad_norm": 0.13338008522987366,
      "learning_rate": 2.867563240051923e-06,
      "loss": 0.01,
      "step": 2184880
    },
    {
      "epoch": 3.5756367706840004,
      "grad_norm": 0.19772407412528992,
      "learning_rate": 2.8674973478384062e-06,
      "loss": 0.0114,
      "step": 2184900
    },
    {
      "epoch": 3.575669501122654,
      "grad_norm": 0.2568935453891754,
      "learning_rate": 2.867431455624889e-06,
      "loss": 0.0122,
      "step": 2184920
    },
    {
      "epoch": 3.5757022315613076,
      "grad_norm": 0.8845711946487427,
      "learning_rate": 2.8673655634113717e-06,
      "loss": 0.0137,
      "step": 2184940
    },
    {
      "epoch": 3.5757349619999608,
      "grad_norm": 0.1723703294992447,
      "learning_rate": 2.8672996711978544e-06,
      "loss": 0.008,
      "step": 2184960
    },
    {
      "epoch": 3.575767692438614,
      "grad_norm": 0.10116954147815704,
      "learning_rate": 2.8672337789843376e-06,
      "loss": 0.0064,
      "step": 2184980
    },
    {
      "epoch": 3.5758004228772675,
      "grad_norm": 0.09907080233097076,
      "learning_rate": 2.8671678867708203e-06,
      "loss": 0.0073,
      "step": 2185000
    },
    {
      "epoch": 3.5758331533159207,
      "grad_norm": 0.6258238554000854,
      "learning_rate": 2.8671019945573035e-06,
      "loss": 0.0092,
      "step": 2185020
    },
    {
      "epoch": 3.575865883754574,
      "grad_norm": 0.34574759006500244,
      "learning_rate": 2.8670361023437867e-06,
      "loss": 0.0104,
      "step": 2185040
    },
    {
      "epoch": 3.5758986141932274,
      "grad_norm": 0.4222639799118042,
      "learning_rate": 2.8669702101302694e-06,
      "loss": 0.0158,
      "step": 2185060
    },
    {
      "epoch": 3.575931344631881,
      "grad_norm": 0.3116174042224884,
      "learning_rate": 2.866904317916752e-06,
      "loss": 0.0105,
      "step": 2185080
    },
    {
      "epoch": 3.575964075070534,
      "grad_norm": 0.11838386952877045,
      "learning_rate": 2.866838425703235e-06,
      "loss": 0.0086,
      "step": 2185100
    },
    {
      "epoch": 3.5759968055091873,
      "grad_norm": 0.3657139837741852,
      "learning_rate": 2.866772533489718e-06,
      "loss": 0.0124,
      "step": 2185120
    },
    {
      "epoch": 3.576029535947841,
      "grad_norm": 0.21442732214927673,
      "learning_rate": 2.8667066412762008e-06,
      "loss": 0.0106,
      "step": 2185140
    },
    {
      "epoch": 3.576062266386494,
      "grad_norm": 0.19628745317459106,
      "learning_rate": 2.8666407490626835e-06,
      "loss": 0.011,
      "step": 2185160
    },
    {
      "epoch": 3.576094996825147,
      "grad_norm": 0.27458301186561584,
      "learning_rate": 2.8665748568491663e-06,
      "loss": 0.0102,
      "step": 2185180
    },
    {
      "epoch": 3.576127727263801,
      "grad_norm": 0.31042319536209106,
      "learning_rate": 2.866508964635649e-06,
      "loss": 0.0125,
      "step": 2185200
    },
    {
      "epoch": 3.5761604577024544,
      "grad_norm": 0.48062393069267273,
      "learning_rate": 2.866443072422132e-06,
      "loss": 0.0098,
      "step": 2185220
    },
    {
      "epoch": 3.5761931881411075,
      "grad_norm": 0.2925846576690674,
      "learning_rate": 2.866377180208615e-06,
      "loss": 0.012,
      "step": 2185240
    },
    {
      "epoch": 3.5762259185797607,
      "grad_norm": 0.1445583701133728,
      "learning_rate": 2.8663112879950976e-06,
      "loss": 0.0129,
      "step": 2185260
    },
    {
      "epoch": 3.5762586490184143,
      "grad_norm": 0.3857046067714691,
      "learning_rate": 2.8662453957815804e-06,
      "loss": 0.0114,
      "step": 2185280
    },
    {
      "epoch": 3.5762913794570674,
      "grad_norm": 0.3689926862716675,
      "learning_rate": 2.8661795035680635e-06,
      "loss": 0.0099,
      "step": 2185300
    },
    {
      "epoch": 3.5763241098957206,
      "grad_norm": 0.36813250184059143,
      "learning_rate": 2.8661136113545463e-06,
      "loss": 0.0186,
      "step": 2185320
    },
    {
      "epoch": 3.576356840334374,
      "grad_norm": 0.5222365260124207,
      "learning_rate": 2.866047719141029e-06,
      "loss": 0.0084,
      "step": 2185340
    },
    {
      "epoch": 3.5763895707730278,
      "grad_norm": 0.1330033242702484,
      "learning_rate": 2.8659818269275126e-06,
      "loss": 0.0136,
      "step": 2185360
    },
    {
      "epoch": 3.576422301211681,
      "grad_norm": 0.34964168071746826,
      "learning_rate": 2.8659159347139954e-06,
      "loss": 0.0139,
      "step": 2185380
    },
    {
      "epoch": 3.576455031650334,
      "grad_norm": 0.20597122609615326,
      "learning_rate": 2.865850042500478e-06,
      "loss": 0.0092,
      "step": 2185400
    },
    {
      "epoch": 3.5764877620889877,
      "grad_norm": 0.2151360809803009,
      "learning_rate": 2.865784150286961e-06,
      "loss": 0.0174,
      "step": 2185420
    },
    {
      "epoch": 3.576520492527641,
      "grad_norm": 0.11235291510820389,
      "learning_rate": 2.865718258073444e-06,
      "loss": 0.0128,
      "step": 2185440
    },
    {
      "epoch": 3.576553222966294,
      "grad_norm": 0.40253034234046936,
      "learning_rate": 2.8656523658599267e-06,
      "loss": 0.0118,
      "step": 2185460
    },
    {
      "epoch": 3.5765859534049476,
      "grad_norm": 0.6733847856521606,
      "learning_rate": 2.8655864736464095e-06,
      "loss": 0.0118,
      "step": 2185480
    },
    {
      "epoch": 3.5766186838436007,
      "grad_norm": 0.1755974441766739,
      "learning_rate": 2.8655205814328922e-06,
      "loss": 0.0084,
      "step": 2185500
    },
    {
      "epoch": 3.5766514142822543,
      "grad_norm": 0.5212280750274658,
      "learning_rate": 2.8654546892193754e-06,
      "loss": 0.0207,
      "step": 2185520
    },
    {
      "epoch": 3.5766841447209075,
      "grad_norm": 0.27839338779449463,
      "learning_rate": 2.865388797005858e-06,
      "loss": 0.0098,
      "step": 2185540
    },
    {
      "epoch": 3.576716875159561,
      "grad_norm": 0.23752814531326294,
      "learning_rate": 2.865322904792341e-06,
      "loss": 0.0123,
      "step": 2185560
    },
    {
      "epoch": 3.576749605598214,
      "grad_norm": 0.3127201497554779,
      "learning_rate": 2.8652570125788236e-06,
      "loss": 0.0095,
      "step": 2185580
    },
    {
      "epoch": 3.5767823360368673,
      "grad_norm": 0.5329177379608154,
      "learning_rate": 2.8651911203653068e-06,
      "loss": 0.0185,
      "step": 2185600
    },
    {
      "epoch": 3.576815066475521,
      "grad_norm": 0.27439555525779724,
      "learning_rate": 2.8651252281517895e-06,
      "loss": 0.0087,
      "step": 2185620
    },
    {
      "epoch": 3.576847796914174,
      "grad_norm": 0.2240964025259018,
      "learning_rate": 2.8650593359382722e-06,
      "loss": 0.0153,
      "step": 2185640
    },
    {
      "epoch": 3.5768805273528277,
      "grad_norm": 0.09784948825836182,
      "learning_rate": 2.864993443724755e-06,
      "loss": 0.0097,
      "step": 2185660
    },
    {
      "epoch": 3.576913257791481,
      "grad_norm": 0.228570356965065,
      "learning_rate": 2.8649275515112377e-06,
      "loss": 0.0103,
      "step": 2185680
    },
    {
      "epoch": 3.5769459882301344,
      "grad_norm": 0.21848155558109283,
      "learning_rate": 2.864861659297721e-06,
      "loss": 0.0121,
      "step": 2185700
    },
    {
      "epoch": 3.5769787186687876,
      "grad_norm": 0.18063940107822418,
      "learning_rate": 2.864795767084204e-06,
      "loss": 0.0095,
      "step": 2185720
    },
    {
      "epoch": 3.5770114491074407,
      "grad_norm": 0.48327356576919556,
      "learning_rate": 2.864729874870687e-06,
      "loss": 0.0098,
      "step": 2185740
    },
    {
      "epoch": 3.5770441795460943,
      "grad_norm": 0.15871475636959076,
      "learning_rate": 2.86466398265717e-06,
      "loss": 0.0086,
      "step": 2185760
    },
    {
      "epoch": 3.5770769099847475,
      "grad_norm": 0.32454171776771545,
      "learning_rate": 2.8645980904436527e-06,
      "loss": 0.0092,
      "step": 2185780
    },
    {
      "epoch": 3.577109640423401,
      "grad_norm": 0.3161340653896332,
      "learning_rate": 2.8645321982301354e-06,
      "loss": 0.0113,
      "step": 2185800
    },
    {
      "epoch": 3.577142370862054,
      "grad_norm": 0.1195300966501236,
      "learning_rate": 2.864466306016618e-06,
      "loss": 0.0084,
      "step": 2185820
    },
    {
      "epoch": 3.577175101300708,
      "grad_norm": 0.18425127863883972,
      "learning_rate": 2.8644004138031013e-06,
      "loss": 0.0111,
      "step": 2185840
    },
    {
      "epoch": 3.577207831739361,
      "grad_norm": 0.44571518898010254,
      "learning_rate": 2.864334521589584e-06,
      "loss": 0.0123,
      "step": 2185860
    },
    {
      "epoch": 3.577240562178014,
      "grad_norm": 0.24877136945724487,
      "learning_rate": 2.864268629376067e-06,
      "loss": 0.0104,
      "step": 2185880
    },
    {
      "epoch": 3.5772732926166677,
      "grad_norm": 0.11641351878643036,
      "learning_rate": 2.8642027371625496e-06,
      "loss": 0.0075,
      "step": 2185900
    },
    {
      "epoch": 3.577306023055321,
      "grad_norm": 0.5694916248321533,
      "learning_rate": 2.8641368449490327e-06,
      "loss": 0.0132,
      "step": 2185920
    },
    {
      "epoch": 3.5773387534939745,
      "grad_norm": 0.2398548573255539,
      "learning_rate": 2.8640709527355155e-06,
      "loss": 0.0156,
      "step": 2185940
    },
    {
      "epoch": 3.5773714839326276,
      "grad_norm": 0.1834152340888977,
      "learning_rate": 2.864005060521998e-06,
      "loss": 0.0102,
      "step": 2185960
    },
    {
      "epoch": 3.577404214371281,
      "grad_norm": 0.4929195046424866,
      "learning_rate": 2.863939168308481e-06,
      "loss": 0.017,
      "step": 2185980
    },
    {
      "epoch": 3.5774369448099343,
      "grad_norm": 0.08406775444746017,
      "learning_rate": 2.863873276094964e-06,
      "loss": 0.0127,
      "step": 2186000
    },
    {
      "epoch": 3.5774696752485875,
      "grad_norm": 0.1045701876282692,
      "learning_rate": 2.863807383881447e-06,
      "loss": 0.0099,
      "step": 2186020
    },
    {
      "epoch": 3.577502405687241,
      "grad_norm": 0.4664677381515503,
      "learning_rate": 2.8637414916679296e-06,
      "loss": 0.0098,
      "step": 2186040
    },
    {
      "epoch": 3.5775351361258942,
      "grad_norm": 0.6860570311546326,
      "learning_rate": 2.863675599454413e-06,
      "loss": 0.0124,
      "step": 2186060
    },
    {
      "epoch": 3.577567866564548,
      "grad_norm": 0.2689366936683655,
      "learning_rate": 2.863609707240896e-06,
      "loss": 0.0093,
      "step": 2186080
    },
    {
      "epoch": 3.577600597003201,
      "grad_norm": 0.4259893298149109,
      "learning_rate": 2.8635438150273786e-06,
      "loss": 0.0112,
      "step": 2186100
    },
    {
      "epoch": 3.5776333274418546,
      "grad_norm": 0.3263612687587738,
      "learning_rate": 2.8634779228138614e-06,
      "loss": 0.0124,
      "step": 2186120
    },
    {
      "epoch": 3.5776660578805077,
      "grad_norm": 0.272673636674881,
      "learning_rate": 2.8634120306003445e-06,
      "loss": 0.0126,
      "step": 2186140
    },
    {
      "epoch": 3.577698788319161,
      "grad_norm": 0.3978039026260376,
      "learning_rate": 2.8633461383868273e-06,
      "loss": 0.0083,
      "step": 2186160
    },
    {
      "epoch": 3.5777315187578145,
      "grad_norm": 0.3517148494720459,
      "learning_rate": 2.86328024617331e-06,
      "loss": 0.01,
      "step": 2186180
    },
    {
      "epoch": 3.5777642491964676,
      "grad_norm": 0.25642016530036926,
      "learning_rate": 2.8632143539597928e-06,
      "loss": 0.0131,
      "step": 2186200
    },
    {
      "epoch": 3.577796979635121,
      "grad_norm": 0.3172532618045807,
      "learning_rate": 2.8631484617462755e-06,
      "loss": 0.0129,
      "step": 2186220
    },
    {
      "epoch": 3.5778297100737744,
      "grad_norm": 1.1634323596954346,
      "learning_rate": 2.8630825695327587e-06,
      "loss": 0.008,
      "step": 2186240
    },
    {
      "epoch": 3.577862440512428,
      "grad_norm": 0.13829459249973297,
      "learning_rate": 2.8630166773192414e-06,
      "loss": 0.0143,
      "step": 2186260
    },
    {
      "epoch": 3.577895170951081,
      "grad_norm": 0.21101726591587067,
      "learning_rate": 2.862950785105724e-06,
      "loss": 0.0074,
      "step": 2186280
    },
    {
      "epoch": 3.5779279013897343,
      "grad_norm": 0.25619077682495117,
      "learning_rate": 2.862884892892207e-06,
      "loss": 0.0089,
      "step": 2186300
    },
    {
      "epoch": 3.577960631828388,
      "grad_norm": 0.2596436142921448,
      "learning_rate": 2.86281900067869e-06,
      "loss": 0.0107,
      "step": 2186320
    },
    {
      "epoch": 3.577993362267041,
      "grad_norm": 0.14450255036354065,
      "learning_rate": 2.862753108465173e-06,
      "loss": 0.0093,
      "step": 2186340
    },
    {
      "epoch": 3.5780260927056946,
      "grad_norm": 0.8511890769004822,
      "learning_rate": 2.8626872162516555e-06,
      "loss": 0.0178,
      "step": 2186360
    },
    {
      "epoch": 3.5780588231443478,
      "grad_norm": 0.7362927794456482,
      "learning_rate": 2.8626213240381383e-06,
      "loss": 0.0087,
      "step": 2186380
    },
    {
      "epoch": 3.5780915535830013,
      "grad_norm": 0.1636703759431839,
      "learning_rate": 2.8625554318246214e-06,
      "loss": 0.0152,
      "step": 2186400
    },
    {
      "epoch": 3.5781242840216545,
      "grad_norm": 0.23578976094722748,
      "learning_rate": 2.8624895396111046e-06,
      "loss": 0.0127,
      "step": 2186420
    },
    {
      "epoch": 3.5781570144603076,
      "grad_norm": 0.3375643193721771,
      "learning_rate": 2.8624236473975873e-06,
      "loss": 0.0149,
      "step": 2186440
    },
    {
      "epoch": 3.5781897448989612,
      "grad_norm": 0.7332807183265686,
      "learning_rate": 2.8623577551840705e-06,
      "loss": 0.0096,
      "step": 2186460
    },
    {
      "epoch": 3.5782224753376144,
      "grad_norm": 0.33158501982688904,
      "learning_rate": 2.8622918629705532e-06,
      "loss": 0.0095,
      "step": 2186480
    },
    {
      "epoch": 3.5782552057762675,
      "grad_norm": 0.4134223163127899,
      "learning_rate": 2.862225970757036e-06,
      "loss": 0.0149,
      "step": 2186500
    },
    {
      "epoch": 3.578287936214921,
      "grad_norm": 0.33835160732269287,
      "learning_rate": 2.8621600785435187e-06,
      "loss": 0.0113,
      "step": 2186520
    },
    {
      "epoch": 3.5783206666535747,
      "grad_norm": 0.24581243097782135,
      "learning_rate": 2.862094186330002e-06,
      "loss": 0.0146,
      "step": 2186540
    },
    {
      "epoch": 3.578353397092228,
      "grad_norm": 0.3098849356174469,
      "learning_rate": 2.8620282941164846e-06,
      "loss": 0.0113,
      "step": 2186560
    },
    {
      "epoch": 3.578386127530881,
      "grad_norm": 0.39511626958847046,
      "learning_rate": 2.8619624019029674e-06,
      "loss": 0.0127,
      "step": 2186580
    },
    {
      "epoch": 3.5784188579695346,
      "grad_norm": 0.20875859260559082,
      "learning_rate": 2.86189650968945e-06,
      "loss": 0.0096,
      "step": 2186600
    },
    {
      "epoch": 3.5784515884081878,
      "grad_norm": 0.13222232460975647,
      "learning_rate": 2.861830617475933e-06,
      "loss": 0.0124,
      "step": 2186620
    },
    {
      "epoch": 3.578484318846841,
      "grad_norm": 0.5729987025260925,
      "learning_rate": 2.861764725262416e-06,
      "loss": 0.0116,
      "step": 2186640
    },
    {
      "epoch": 3.5785170492854945,
      "grad_norm": 0.232835054397583,
      "learning_rate": 2.8616988330488987e-06,
      "loss": 0.0152,
      "step": 2186660
    },
    {
      "epoch": 3.578549779724148,
      "grad_norm": 0.20748023688793182,
      "learning_rate": 2.8616329408353815e-06,
      "loss": 0.0123,
      "step": 2186680
    },
    {
      "epoch": 3.5785825101628013,
      "grad_norm": 0.23106010258197784,
      "learning_rate": 2.8615670486218642e-06,
      "loss": 0.0088,
      "step": 2186700
    },
    {
      "epoch": 3.5786152406014544,
      "grad_norm": 0.0904235988855362,
      "learning_rate": 2.8615011564083474e-06,
      "loss": 0.0084,
      "step": 2186720
    },
    {
      "epoch": 3.578647971040108,
      "grad_norm": 0.4314987063407898,
      "learning_rate": 2.86143526419483e-06,
      "loss": 0.0073,
      "step": 2186740
    },
    {
      "epoch": 3.578680701478761,
      "grad_norm": 0.46177053451538086,
      "learning_rate": 2.861369371981313e-06,
      "loss": 0.0091,
      "step": 2186760
    },
    {
      "epoch": 3.5787134319174143,
      "grad_norm": 0.11711188405752182,
      "learning_rate": 2.8613034797677965e-06,
      "loss": 0.0123,
      "step": 2186780
    },
    {
      "epoch": 3.578746162356068,
      "grad_norm": 0.08503565937280655,
      "learning_rate": 2.861237587554279e-06,
      "loss": 0.0097,
      "step": 2186800
    },
    {
      "epoch": 3.5787788927947215,
      "grad_norm": 0.2312362641096115,
      "learning_rate": 2.861171695340762e-06,
      "loss": 0.0089,
      "step": 2186820
    },
    {
      "epoch": 3.5788116232333747,
      "grad_norm": 0.25346747040748596,
      "learning_rate": 2.8611058031272447e-06,
      "loss": 0.0098,
      "step": 2186840
    },
    {
      "epoch": 3.578844353672028,
      "grad_norm": 0.2956169843673706,
      "learning_rate": 2.861039910913728e-06,
      "loss": 0.0126,
      "step": 2186860
    },
    {
      "epoch": 3.5788770841106814,
      "grad_norm": 0.1603597104549408,
      "learning_rate": 2.8609740187002106e-06,
      "loss": 0.0075,
      "step": 2186880
    },
    {
      "epoch": 3.5789098145493345,
      "grad_norm": 0.14504101872444153,
      "learning_rate": 2.8609081264866933e-06,
      "loss": 0.0072,
      "step": 2186900
    },
    {
      "epoch": 3.5789425449879877,
      "grad_norm": 0.27783289551734924,
      "learning_rate": 2.860842234273176e-06,
      "loss": 0.0098,
      "step": 2186920
    },
    {
      "epoch": 3.5789752754266413,
      "grad_norm": 0.44179651141166687,
      "learning_rate": 2.8607763420596592e-06,
      "loss": 0.0113,
      "step": 2186940
    },
    {
      "epoch": 3.579008005865295,
      "grad_norm": 0.12135927379131317,
      "learning_rate": 2.860710449846142e-06,
      "loss": 0.0122,
      "step": 2186960
    },
    {
      "epoch": 3.579040736303948,
      "grad_norm": 0.3233547508716583,
      "learning_rate": 2.8606445576326247e-06,
      "loss": 0.0118,
      "step": 2186980
    },
    {
      "epoch": 3.579073466742601,
      "grad_norm": 0.2142440378665924,
      "learning_rate": 2.8605786654191074e-06,
      "loss": 0.0106,
      "step": 2187000
    },
    {
      "epoch": 3.579106197181255,
      "grad_norm": 0.15153488516807556,
      "learning_rate": 2.8605127732055906e-06,
      "loss": 0.0087,
      "step": 2187020
    },
    {
      "epoch": 3.579138927619908,
      "grad_norm": 0.14331132173538208,
      "learning_rate": 2.8604468809920733e-06,
      "loss": 0.0084,
      "step": 2187040
    },
    {
      "epoch": 3.579171658058561,
      "grad_norm": 0.6474015116691589,
      "learning_rate": 2.860380988778556e-06,
      "loss": 0.0098,
      "step": 2187060
    },
    {
      "epoch": 3.5792043884972147,
      "grad_norm": 0.3841725289821625,
      "learning_rate": 2.860315096565039e-06,
      "loss": 0.0105,
      "step": 2187080
    },
    {
      "epoch": 3.579237118935868,
      "grad_norm": 0.1589026004076004,
      "learning_rate": 2.8602492043515216e-06,
      "loss": 0.0089,
      "step": 2187100
    },
    {
      "epoch": 3.5792698493745214,
      "grad_norm": 0.42331933975219727,
      "learning_rate": 2.860183312138005e-06,
      "loss": 0.0212,
      "step": 2187120
    },
    {
      "epoch": 3.5793025798131746,
      "grad_norm": 0.3518012762069702,
      "learning_rate": 2.860117419924488e-06,
      "loss": 0.0129,
      "step": 2187140
    },
    {
      "epoch": 3.579335310251828,
      "grad_norm": 0.21602432429790497,
      "learning_rate": 2.8600515277109706e-06,
      "loss": 0.0113,
      "step": 2187160
    },
    {
      "epoch": 3.5793680406904813,
      "grad_norm": 0.26015129685401917,
      "learning_rate": 2.859985635497454e-06,
      "loss": 0.0117,
      "step": 2187180
    },
    {
      "epoch": 3.5794007711291345,
      "grad_norm": 0.2671927809715271,
      "learning_rate": 2.8599197432839365e-06,
      "loss": 0.0088,
      "step": 2187200
    },
    {
      "epoch": 3.579433501567788,
      "grad_norm": 0.15345816314220428,
      "learning_rate": 2.8598538510704193e-06,
      "loss": 0.0139,
      "step": 2187220
    },
    {
      "epoch": 3.579466232006441,
      "grad_norm": 0.11844803392887115,
      "learning_rate": 2.859787958856902e-06,
      "loss": 0.0107,
      "step": 2187240
    },
    {
      "epoch": 3.579498962445095,
      "grad_norm": 0.06112892925739288,
      "learning_rate": 2.859722066643385e-06,
      "loss": 0.0166,
      "step": 2187260
    },
    {
      "epoch": 3.579531692883748,
      "grad_norm": 0.2655327022075653,
      "learning_rate": 2.859656174429868e-06,
      "loss": 0.009,
      "step": 2187280
    },
    {
      "epoch": 3.5795644233224015,
      "grad_norm": 0.1648765504360199,
      "learning_rate": 2.8595902822163507e-06,
      "loss": 0.0099,
      "step": 2187300
    },
    {
      "epoch": 3.5795971537610547,
      "grad_norm": 0.6322121024131775,
      "learning_rate": 2.8595243900028334e-06,
      "loss": 0.0224,
      "step": 2187320
    },
    {
      "epoch": 3.579629884199708,
      "grad_norm": 0.23758667707443237,
      "learning_rate": 2.8594584977893166e-06,
      "loss": 0.0064,
      "step": 2187340
    },
    {
      "epoch": 3.5796626146383614,
      "grad_norm": 0.12927357852458954,
      "learning_rate": 2.8593926055757993e-06,
      "loss": 0.0092,
      "step": 2187360
    },
    {
      "epoch": 3.5796953450770146,
      "grad_norm": 0.6171332001686096,
      "learning_rate": 2.859326713362282e-06,
      "loss": 0.0135,
      "step": 2187380
    },
    {
      "epoch": 3.579728075515668,
      "grad_norm": 0.3425735831260681,
      "learning_rate": 2.8592608211487648e-06,
      "loss": 0.012,
      "step": 2187400
    },
    {
      "epoch": 3.5797608059543213,
      "grad_norm": 0.10366158187389374,
      "learning_rate": 2.859194928935248e-06,
      "loss": 0.011,
      "step": 2187420
    },
    {
      "epoch": 3.579793536392975,
      "grad_norm": 0.21462659537792206,
      "learning_rate": 2.8591290367217307e-06,
      "loss": 0.0083,
      "step": 2187440
    },
    {
      "epoch": 3.579826266831628,
      "grad_norm": 0.16629235446453094,
      "learning_rate": 2.8590631445082134e-06,
      "loss": 0.0098,
      "step": 2187460
    },
    {
      "epoch": 3.5798589972702812,
      "grad_norm": 0.6189597249031067,
      "learning_rate": 2.858997252294697e-06,
      "loss": 0.0153,
      "step": 2187480
    },
    {
      "epoch": 3.579891727708935,
      "grad_norm": 0.6164439916610718,
      "learning_rate": 2.8589313600811797e-06,
      "loss": 0.009,
      "step": 2187500
    },
    {
      "epoch": 3.579924458147588,
      "grad_norm": 0.20934602618217468,
      "learning_rate": 2.8588654678676625e-06,
      "loss": 0.0081,
      "step": 2187520
    },
    {
      "epoch": 3.5799571885862416,
      "grad_norm": 0.45496752858161926,
      "learning_rate": 2.8587995756541452e-06,
      "loss": 0.0171,
      "step": 2187540
    },
    {
      "epoch": 3.5799899190248947,
      "grad_norm": 0.7468712329864502,
      "learning_rate": 2.8587336834406284e-06,
      "loss": 0.0081,
      "step": 2187560
    },
    {
      "epoch": 3.5800226494635483,
      "grad_norm": 0.14045840501785278,
      "learning_rate": 2.858667791227111e-06,
      "loss": 0.0079,
      "step": 2187580
    },
    {
      "epoch": 3.5800553799022015,
      "grad_norm": 0.13738204538822174,
      "learning_rate": 2.858601899013594e-06,
      "loss": 0.0116,
      "step": 2187600
    },
    {
      "epoch": 3.5800881103408546,
      "grad_norm": 0.09175390750169754,
      "learning_rate": 2.8585360068000766e-06,
      "loss": 0.01,
      "step": 2187620
    },
    {
      "epoch": 3.580120840779508,
      "grad_norm": 0.08030010014772415,
      "learning_rate": 2.8584701145865593e-06,
      "loss": 0.0124,
      "step": 2187640
    },
    {
      "epoch": 3.5801535712181614,
      "grad_norm": 0.7466810345649719,
      "learning_rate": 2.8584042223730425e-06,
      "loss": 0.0108,
      "step": 2187660
    },
    {
      "epoch": 3.580186301656815,
      "grad_norm": 0.12948569655418396,
      "learning_rate": 2.8583383301595252e-06,
      "loss": 0.009,
      "step": 2187680
    },
    {
      "epoch": 3.580219032095468,
      "grad_norm": 0.21389245986938477,
      "learning_rate": 2.858272437946008e-06,
      "loss": 0.0121,
      "step": 2187700
    },
    {
      "epoch": 3.5802517625341217,
      "grad_norm": 0.194786936044693,
      "learning_rate": 2.8582065457324907e-06,
      "loss": 0.0116,
      "step": 2187720
    },
    {
      "epoch": 3.580284492972775,
      "grad_norm": 0.27730557322502136,
      "learning_rate": 2.858140653518974e-06,
      "loss": 0.0128,
      "step": 2187740
    },
    {
      "epoch": 3.580317223411428,
      "grad_norm": 0.22960124909877777,
      "learning_rate": 2.8580747613054566e-06,
      "loss": 0.0105,
      "step": 2187760
    },
    {
      "epoch": 3.5803499538500816,
      "grad_norm": 0.33388543128967285,
      "learning_rate": 2.8580088690919394e-06,
      "loss": 0.009,
      "step": 2187780
    },
    {
      "epoch": 3.5803826842887347,
      "grad_norm": 0.06662563979625702,
      "learning_rate": 2.857942976878422e-06,
      "loss": 0.0145,
      "step": 2187800
    },
    {
      "epoch": 3.5804154147273883,
      "grad_norm": 0.8275489807128906,
      "learning_rate": 2.8578770846649057e-06,
      "loss": 0.0128,
      "step": 2187820
    },
    {
      "epoch": 3.5804481451660415,
      "grad_norm": 0.1957845836877823,
      "learning_rate": 2.8578111924513884e-06,
      "loss": 0.0102,
      "step": 2187840
    },
    {
      "epoch": 3.580480875604695,
      "grad_norm": 0.26901429891586304,
      "learning_rate": 2.857745300237871e-06,
      "loss": 0.0079,
      "step": 2187860
    },
    {
      "epoch": 3.5805136060433482,
      "grad_norm": 0.28268420696258545,
      "learning_rate": 2.8576794080243543e-06,
      "loss": 0.0096,
      "step": 2187880
    },
    {
      "epoch": 3.5805463364820014,
      "grad_norm": 0.18493956327438354,
      "learning_rate": 2.857613515810837e-06,
      "loss": 0.013,
      "step": 2187900
    },
    {
      "epoch": 3.580579066920655,
      "grad_norm": 0.07869397103786469,
      "learning_rate": 2.85754762359732e-06,
      "loss": 0.0154,
      "step": 2187920
    },
    {
      "epoch": 3.580611797359308,
      "grad_norm": 0.2532520890235901,
      "learning_rate": 2.8574817313838026e-06,
      "loss": 0.0118,
      "step": 2187940
    },
    {
      "epoch": 3.5806445277979613,
      "grad_norm": 0.3724772036075592,
      "learning_rate": 2.8574158391702857e-06,
      "loss": 0.0109,
      "step": 2187960
    },
    {
      "epoch": 3.580677258236615,
      "grad_norm": 0.28125107288360596,
      "learning_rate": 2.8573499469567685e-06,
      "loss": 0.0101,
      "step": 2187980
    },
    {
      "epoch": 3.5807099886752685,
      "grad_norm": 0.2524924576282501,
      "learning_rate": 2.857284054743251e-06,
      "loss": 0.0117,
      "step": 2188000
    },
    {
      "epoch": 3.5807427191139216,
      "grad_norm": 0.2799736559391022,
      "learning_rate": 2.857218162529734e-06,
      "loss": 0.0107,
      "step": 2188020
    },
    {
      "epoch": 3.5807754495525748,
      "grad_norm": 0.15691526234149933,
      "learning_rate": 2.8571522703162167e-06,
      "loss": 0.01,
      "step": 2188040
    },
    {
      "epoch": 3.5808081799912284,
      "grad_norm": 0.18839485943317413,
      "learning_rate": 2.8570863781027e-06,
      "loss": 0.0128,
      "step": 2188060
    },
    {
      "epoch": 3.5808409104298815,
      "grad_norm": 0.16544708609580994,
      "learning_rate": 2.8570204858891826e-06,
      "loss": 0.0079,
      "step": 2188080
    },
    {
      "epoch": 3.5808736408685347,
      "grad_norm": 0.21836647391319275,
      "learning_rate": 2.8569545936756653e-06,
      "loss": 0.0098,
      "step": 2188100
    },
    {
      "epoch": 3.5809063713071883,
      "grad_norm": 0.25073304772377014,
      "learning_rate": 2.856888701462148e-06,
      "loss": 0.0103,
      "step": 2188120
    },
    {
      "epoch": 3.580939101745842,
      "grad_norm": 0.5243313312530518,
      "learning_rate": 2.8568228092486312e-06,
      "loss": 0.0167,
      "step": 2188140
    },
    {
      "epoch": 3.580971832184495,
      "grad_norm": 0.08244843780994415,
      "learning_rate": 2.856756917035114e-06,
      "loss": 0.0108,
      "step": 2188160
    },
    {
      "epoch": 3.581004562623148,
      "grad_norm": 0.13792045414447784,
      "learning_rate": 2.856691024821597e-06,
      "loss": 0.0097,
      "step": 2188180
    },
    {
      "epoch": 3.5810372930618017,
      "grad_norm": 0.22548151016235352,
      "learning_rate": 2.8566251326080803e-06,
      "loss": 0.0068,
      "step": 2188200
    },
    {
      "epoch": 3.581070023500455,
      "grad_norm": 0.3846926689147949,
      "learning_rate": 2.856559240394563e-06,
      "loss": 0.0105,
      "step": 2188220
    },
    {
      "epoch": 3.581102753939108,
      "grad_norm": 0.16610084474086761,
      "learning_rate": 2.8564933481810458e-06,
      "loss": 0.0131,
      "step": 2188240
    },
    {
      "epoch": 3.5811354843777616,
      "grad_norm": 0.26551350951194763,
      "learning_rate": 2.8564274559675285e-06,
      "loss": 0.0105,
      "step": 2188260
    },
    {
      "epoch": 3.5811682148164152,
      "grad_norm": 0.18294984102249146,
      "learning_rate": 2.8563615637540117e-06,
      "loss": 0.0091,
      "step": 2188280
    },
    {
      "epoch": 3.5812009452550684,
      "grad_norm": 0.7183660268783569,
      "learning_rate": 2.8562956715404944e-06,
      "loss": 0.0106,
      "step": 2188300
    },
    {
      "epoch": 3.5812336756937215,
      "grad_norm": 0.1467701494693756,
      "learning_rate": 2.856229779326977e-06,
      "loss": 0.0108,
      "step": 2188320
    },
    {
      "epoch": 3.581266406132375,
      "grad_norm": 0.29974228143692017,
      "learning_rate": 2.85616388711346e-06,
      "loss": 0.0177,
      "step": 2188340
    },
    {
      "epoch": 3.5812991365710283,
      "grad_norm": 0.08379984647035599,
      "learning_rate": 2.856097994899943e-06,
      "loss": 0.0147,
      "step": 2188360
    },
    {
      "epoch": 3.5813318670096814,
      "grad_norm": 0.10126958042383194,
      "learning_rate": 2.856032102686426e-06,
      "loss": 0.0105,
      "step": 2188380
    },
    {
      "epoch": 3.581364597448335,
      "grad_norm": 0.3992871642112732,
      "learning_rate": 2.8559662104729085e-06,
      "loss": 0.0106,
      "step": 2188400
    },
    {
      "epoch": 3.5813973278869886,
      "grad_norm": 0.14600934088230133,
      "learning_rate": 2.8559003182593913e-06,
      "loss": 0.0121,
      "step": 2188420
    },
    {
      "epoch": 3.5814300583256418,
      "grad_norm": 0.2359989434480667,
      "learning_rate": 2.8558344260458744e-06,
      "loss": 0.0123,
      "step": 2188440
    },
    {
      "epoch": 3.581462788764295,
      "grad_norm": 0.08526837080717087,
      "learning_rate": 2.855768533832357e-06,
      "loss": 0.0086,
      "step": 2188460
    },
    {
      "epoch": 3.5814955192029485,
      "grad_norm": 0.42086261510849,
      "learning_rate": 2.85570264161884e-06,
      "loss": 0.0147,
      "step": 2188480
    },
    {
      "epoch": 3.5815282496416017,
      "grad_norm": 0.27733680605888367,
      "learning_rate": 2.8556367494053227e-06,
      "loss": 0.0098,
      "step": 2188500
    },
    {
      "epoch": 3.581560980080255,
      "grad_norm": 0.45381683111190796,
      "learning_rate": 2.8555708571918054e-06,
      "loss": 0.0107,
      "step": 2188520
    },
    {
      "epoch": 3.5815937105189084,
      "grad_norm": 0.21960872411727905,
      "learning_rate": 2.855504964978289e-06,
      "loss": 0.0107,
      "step": 2188540
    },
    {
      "epoch": 3.5816264409575616,
      "grad_norm": 0.15205007791519165,
      "learning_rate": 2.8554390727647717e-06,
      "loss": 0.0087,
      "step": 2188560
    },
    {
      "epoch": 3.581659171396215,
      "grad_norm": 0.5172863602638245,
      "learning_rate": 2.8553731805512545e-06,
      "loss": 0.0094,
      "step": 2188580
    },
    {
      "epoch": 3.5816919018348683,
      "grad_norm": 0.3271046280860901,
      "learning_rate": 2.8553072883377376e-06,
      "loss": 0.0086,
      "step": 2188600
    },
    {
      "epoch": 3.581724632273522,
      "grad_norm": Infinity,
      "learning_rate": 2.8552413961242204e-06,
      "loss": 0.01,
      "step": 2188620
    },
    {
      "epoch": 3.581757362712175,
      "grad_norm": 0.38068532943725586,
      "learning_rate": 2.855175503910703e-06,
      "loss": 0.0158,
      "step": 2188640
    },
    {
      "epoch": 3.581790093150828,
      "grad_norm": 0.08790998160839081,
      "learning_rate": 2.855109611697186e-06,
      "loss": 0.0081,
      "step": 2188660
    },
    {
      "epoch": 3.581822823589482,
      "grad_norm": 0.16128265857696533,
      "learning_rate": 2.855043719483669e-06,
      "loss": 0.0078,
      "step": 2188680
    },
    {
      "epoch": 3.581855554028135,
      "grad_norm": 0.08241510391235352,
      "learning_rate": 2.8549778272701518e-06,
      "loss": 0.0095,
      "step": 2188700
    },
    {
      "epoch": 3.5818882844667885,
      "grad_norm": 0.5052676200866699,
      "learning_rate": 2.8549119350566345e-06,
      "loss": 0.0092,
      "step": 2188720
    },
    {
      "epoch": 3.5819210149054417,
      "grad_norm": 0.5757574439048767,
      "learning_rate": 2.8548460428431172e-06,
      "loss": 0.0092,
      "step": 2188740
    },
    {
      "epoch": 3.5819537453440953,
      "grad_norm": 0.09187028557062149,
      "learning_rate": 2.8547801506296004e-06,
      "loss": 0.0096,
      "step": 2188760
    },
    {
      "epoch": 3.5819864757827484,
      "grad_norm": 0.19481134414672852,
      "learning_rate": 2.854714258416083e-06,
      "loss": 0.0092,
      "step": 2188780
    },
    {
      "epoch": 3.5820192062214016,
      "grad_norm": 0.23744317889213562,
      "learning_rate": 2.854648366202566e-06,
      "loss": 0.0073,
      "step": 2188800
    },
    {
      "epoch": 3.582051936660055,
      "grad_norm": 0.2059420794248581,
      "learning_rate": 2.8545824739890486e-06,
      "loss": 0.0112,
      "step": 2188820
    },
    {
      "epoch": 3.5820846670987083,
      "grad_norm": 0.41110536456108093,
      "learning_rate": 2.8545165817755318e-06,
      "loss": 0.0121,
      "step": 2188840
    },
    {
      "epoch": 3.582117397537362,
      "grad_norm": 0.19439144432544708,
      "learning_rate": 2.8544506895620145e-06,
      "loss": 0.0095,
      "step": 2188860
    },
    {
      "epoch": 3.582150127976015,
      "grad_norm": 0.16354966163635254,
      "learning_rate": 2.8543847973484977e-06,
      "loss": 0.006,
      "step": 2188880
    },
    {
      "epoch": 3.5821828584146687,
      "grad_norm": 0.6322147250175476,
      "learning_rate": 2.854318905134981e-06,
      "loss": 0.0131,
      "step": 2188900
    },
    {
      "epoch": 3.582215588853322,
      "grad_norm": 0.14327186346054077,
      "learning_rate": 2.8542530129214636e-06,
      "loss": 0.0092,
      "step": 2188920
    },
    {
      "epoch": 3.582248319291975,
      "grad_norm": 0.09202259033918381,
      "learning_rate": 2.8541871207079463e-06,
      "loss": 0.0118,
      "step": 2188940
    },
    {
      "epoch": 3.5822810497306286,
      "grad_norm": 0.26350435614585876,
      "learning_rate": 2.854121228494429e-06,
      "loss": 0.0088,
      "step": 2188960
    },
    {
      "epoch": 3.5823137801692817,
      "grad_norm": 0.23556770384311676,
      "learning_rate": 2.8540553362809122e-06,
      "loss": 0.0151,
      "step": 2188980
    },
    {
      "epoch": 3.5823465106079353,
      "grad_norm": 0.24733014404773712,
      "learning_rate": 2.853989444067395e-06,
      "loss": 0.0092,
      "step": 2189000
    },
    {
      "epoch": 3.5823792410465884,
      "grad_norm": 0.6747884750366211,
      "learning_rate": 2.8539235518538777e-06,
      "loss": 0.0172,
      "step": 2189020
    },
    {
      "epoch": 3.582411971485242,
      "grad_norm": 0.06431553512811661,
      "learning_rate": 2.8538576596403604e-06,
      "loss": 0.0073,
      "step": 2189040
    },
    {
      "epoch": 3.582444701923895,
      "grad_norm": 1.1952859163284302,
      "learning_rate": 2.853791767426843e-06,
      "loss": 0.0111,
      "step": 2189060
    },
    {
      "epoch": 3.5824774323625483,
      "grad_norm": 0.5483337044715881,
      "learning_rate": 2.8537258752133263e-06,
      "loss": 0.016,
      "step": 2189080
    },
    {
      "epoch": 3.582510162801202,
      "grad_norm": 0.24918177723884583,
      "learning_rate": 2.853659982999809e-06,
      "loss": 0.013,
      "step": 2189100
    },
    {
      "epoch": 3.582542893239855,
      "grad_norm": 0.16772353649139404,
      "learning_rate": 2.853594090786292e-06,
      "loss": 0.01,
      "step": 2189120
    },
    {
      "epoch": 3.5825756236785087,
      "grad_norm": 0.08264367282390594,
      "learning_rate": 2.8535281985727746e-06,
      "loss": 0.014,
      "step": 2189140
    },
    {
      "epoch": 3.582608354117162,
      "grad_norm": 0.3372402787208557,
      "learning_rate": 2.8534623063592577e-06,
      "loss": 0.0141,
      "step": 2189160
    },
    {
      "epoch": 3.5826410845558154,
      "grad_norm": 0.5464381575584412,
      "learning_rate": 2.8533964141457405e-06,
      "loss": 0.0064,
      "step": 2189180
    },
    {
      "epoch": 3.5826738149944686,
      "grad_norm": 0.24925148487091064,
      "learning_rate": 2.853330521932223e-06,
      "loss": 0.0102,
      "step": 2189200
    },
    {
      "epoch": 3.5827065454331217,
      "grad_norm": 0.2624313235282898,
      "learning_rate": 2.853264629718706e-06,
      "loss": 0.007,
      "step": 2189220
    },
    {
      "epoch": 3.5827392758717753,
      "grad_norm": 0.32952794432640076,
      "learning_rate": 2.8531987375051895e-06,
      "loss": 0.0143,
      "step": 2189240
    },
    {
      "epoch": 3.5827720063104285,
      "grad_norm": 0.08144066482782364,
      "learning_rate": 2.8531328452916723e-06,
      "loss": 0.0081,
      "step": 2189260
    },
    {
      "epoch": 3.582804736749082,
      "grad_norm": 0.25230956077575684,
      "learning_rate": 2.853066953078155e-06,
      "loss": 0.0107,
      "step": 2189280
    },
    {
      "epoch": 3.582837467187735,
      "grad_norm": 0.31552642583847046,
      "learning_rate": 2.853001060864638e-06,
      "loss": 0.0134,
      "step": 2189300
    },
    {
      "epoch": 3.582870197626389,
      "grad_norm": 0.12154797464609146,
      "learning_rate": 2.852935168651121e-06,
      "loss": 0.0125,
      "step": 2189320
    },
    {
      "epoch": 3.582902928065042,
      "grad_norm": 0.43642422556877136,
      "learning_rate": 2.8528692764376037e-06,
      "loss": 0.0136,
      "step": 2189340
    },
    {
      "epoch": 3.582935658503695,
      "grad_norm": 0.07015503942966461,
      "learning_rate": 2.8528033842240864e-06,
      "loss": 0.0085,
      "step": 2189360
    },
    {
      "epoch": 3.5829683889423487,
      "grad_norm": 0.13481378555297852,
      "learning_rate": 2.8527374920105696e-06,
      "loss": 0.0106,
      "step": 2189380
    },
    {
      "epoch": 3.583001119381002,
      "grad_norm": 0.29595717787742615,
      "learning_rate": 2.8526715997970523e-06,
      "loss": 0.0119,
      "step": 2189400
    },
    {
      "epoch": 3.5830338498196554,
      "grad_norm": 0.2293502539396286,
      "learning_rate": 2.852605707583535e-06,
      "loss": 0.0153,
      "step": 2189420
    },
    {
      "epoch": 3.5830665802583086,
      "grad_norm": 0.13756735622882843,
      "learning_rate": 2.8525398153700178e-06,
      "loss": 0.0092,
      "step": 2189440
    },
    {
      "epoch": 3.583099310696962,
      "grad_norm": 0.36166444420814514,
      "learning_rate": 2.852473923156501e-06,
      "loss": 0.0059,
      "step": 2189460
    },
    {
      "epoch": 3.5831320411356153,
      "grad_norm": 0.1644771248102188,
      "learning_rate": 2.8524080309429837e-06,
      "loss": 0.0099,
      "step": 2189480
    },
    {
      "epoch": 3.5831647715742685,
      "grad_norm": 0.1744578331708908,
      "learning_rate": 2.8523421387294664e-06,
      "loss": 0.0096,
      "step": 2189500
    },
    {
      "epoch": 3.583197502012922,
      "grad_norm": 0.14194943010807037,
      "learning_rate": 2.852276246515949e-06,
      "loss": 0.0163,
      "step": 2189520
    },
    {
      "epoch": 3.5832302324515752,
      "grad_norm": 0.0874549001455307,
      "learning_rate": 2.852210354302432e-06,
      "loss": 0.009,
      "step": 2189540
    },
    {
      "epoch": 3.5832629628902284,
      "grad_norm": 0.11041618138551712,
      "learning_rate": 2.852144462088915e-06,
      "loss": 0.0132,
      "step": 2189560
    },
    {
      "epoch": 3.583295693328882,
      "grad_norm": 0.23944489657878876,
      "learning_rate": 2.8520785698753982e-06,
      "loss": 0.0121,
      "step": 2189580
    },
    {
      "epoch": 3.5833284237675356,
      "grad_norm": 0.17706458270549774,
      "learning_rate": 2.852012677661881e-06,
      "loss": 0.0085,
      "step": 2189600
    },
    {
      "epoch": 3.5833611542061887,
      "grad_norm": 0.24453884363174438,
      "learning_rate": 2.851946785448364e-06,
      "loss": 0.0097,
      "step": 2189620
    },
    {
      "epoch": 3.583393884644842,
      "grad_norm": 0.2540930211544037,
      "learning_rate": 2.851880893234847e-06,
      "loss": 0.0147,
      "step": 2189640
    },
    {
      "epoch": 3.5834266150834955,
      "grad_norm": 0.49902471899986267,
      "learning_rate": 2.8518150010213296e-06,
      "loss": 0.0099,
      "step": 2189660
    },
    {
      "epoch": 3.5834593455221486,
      "grad_norm": 0.31486088037490845,
      "learning_rate": 2.8517491088078124e-06,
      "loss": 0.0131,
      "step": 2189680
    },
    {
      "epoch": 3.5834920759608018,
      "grad_norm": 0.27529069781303406,
      "learning_rate": 2.8516832165942955e-06,
      "loss": 0.0106,
      "step": 2189700
    },
    {
      "epoch": 3.5835248063994554,
      "grad_norm": 0.4141981303691864,
      "learning_rate": 2.8516173243807783e-06,
      "loss": 0.0108,
      "step": 2189720
    },
    {
      "epoch": 3.583557536838109,
      "grad_norm": 0.16908179223537445,
      "learning_rate": 2.851551432167261e-06,
      "loss": 0.0141,
      "step": 2189740
    },
    {
      "epoch": 3.583590267276762,
      "grad_norm": 0.24640601873397827,
      "learning_rate": 2.8514855399537437e-06,
      "loss": 0.0134,
      "step": 2189760
    },
    {
      "epoch": 3.5836229977154153,
      "grad_norm": 0.10622403770685196,
      "learning_rate": 2.851419647740227e-06,
      "loss": 0.0185,
      "step": 2189780
    },
    {
      "epoch": 3.583655728154069,
      "grad_norm": 0.36237356066703796,
      "learning_rate": 2.8513537555267096e-06,
      "loss": 0.0089,
      "step": 2189800
    },
    {
      "epoch": 3.583688458592722,
      "grad_norm": 0.10682860016822815,
      "learning_rate": 2.8512878633131924e-06,
      "loss": 0.0076,
      "step": 2189820
    },
    {
      "epoch": 3.583721189031375,
      "grad_norm": 0.10238973796367645,
      "learning_rate": 2.851221971099675e-06,
      "loss": 0.0089,
      "step": 2189840
    },
    {
      "epoch": 3.5837539194700287,
      "grad_norm": 0.27201467752456665,
      "learning_rate": 2.8511560788861583e-06,
      "loss": 0.0102,
      "step": 2189860
    },
    {
      "epoch": 3.5837866499086823,
      "grad_norm": 0.4795443117618561,
      "learning_rate": 2.851090186672641e-06,
      "loss": 0.0112,
      "step": 2189880
    },
    {
      "epoch": 3.5838193803473355,
      "grad_norm": 0.17921772599220276,
      "learning_rate": 2.8510242944591238e-06,
      "loss": 0.0093,
      "step": 2189900
    },
    {
      "epoch": 3.5838521107859886,
      "grad_norm": 0.24742767214775085,
      "learning_rate": 2.8509584022456065e-06,
      "loss": 0.014,
      "step": 2189920
    },
    {
      "epoch": 3.5838848412246422,
      "grad_norm": 0.14091122150421143,
      "learning_rate": 2.85089251003209e-06,
      "loss": 0.0139,
      "step": 2189940
    },
    {
      "epoch": 3.5839175716632954,
      "grad_norm": 0.4526004195213318,
      "learning_rate": 2.850826617818573e-06,
      "loss": 0.0093,
      "step": 2189960
    },
    {
      "epoch": 3.5839503021019485,
      "grad_norm": 0.19437691569328308,
      "learning_rate": 2.8507607256050556e-06,
      "loss": 0.0086,
      "step": 2189980
    },
    {
      "epoch": 3.583983032540602,
      "grad_norm": 0.07946384698152542,
      "learning_rate": 2.8506948333915387e-06,
      "loss": 0.0142,
      "step": 2190000
    },
    {
      "epoch": 3.5840157629792553,
      "grad_norm": 0.1632092297077179,
      "learning_rate": 2.8506289411780215e-06,
      "loss": 0.0068,
      "step": 2190020
    },
    {
      "epoch": 3.584048493417909,
      "grad_norm": 0.1022273376584053,
      "learning_rate": 2.850563048964504e-06,
      "loss": 0.01,
      "step": 2190040
    },
    {
      "epoch": 3.584081223856562,
      "grad_norm": 0.20199230313301086,
      "learning_rate": 2.850497156750987e-06,
      "loss": 0.0104,
      "step": 2190060
    },
    {
      "epoch": 3.5841139542952156,
      "grad_norm": 0.2521927058696747,
      "learning_rate": 2.8504312645374697e-06,
      "loss": 0.0103,
      "step": 2190080
    },
    {
      "epoch": 3.5841466847338688,
      "grad_norm": 0.11160092800855637,
      "learning_rate": 2.850365372323953e-06,
      "loss": 0.0126,
      "step": 2190100
    },
    {
      "epoch": 3.584179415172522,
      "grad_norm": 0.2028399407863617,
      "learning_rate": 2.8502994801104356e-06,
      "loss": 0.0128,
      "step": 2190120
    },
    {
      "epoch": 3.5842121456111755,
      "grad_norm": 0.2820223569869995,
      "learning_rate": 2.8502335878969183e-06,
      "loss": 0.0098,
      "step": 2190140
    },
    {
      "epoch": 3.5842448760498287,
      "grad_norm": 0.1995294839143753,
      "learning_rate": 2.850167695683401e-06,
      "loss": 0.0087,
      "step": 2190160
    },
    {
      "epoch": 3.5842776064884823,
      "grad_norm": 0.26085135340690613,
      "learning_rate": 2.8501018034698842e-06,
      "loss": 0.0103,
      "step": 2190180
    },
    {
      "epoch": 3.5843103369271354,
      "grad_norm": 0.7675741910934448,
      "learning_rate": 2.850035911256367e-06,
      "loss": 0.0135,
      "step": 2190200
    },
    {
      "epoch": 3.584343067365789,
      "grad_norm": 0.5105352997779846,
      "learning_rate": 2.8499700190428497e-06,
      "loss": 0.0103,
      "step": 2190220
    },
    {
      "epoch": 3.584375797804442,
      "grad_norm": 0.1373094618320465,
      "learning_rate": 2.8499041268293325e-06,
      "loss": 0.008,
      "step": 2190240
    },
    {
      "epoch": 3.5844085282430953,
      "grad_norm": 0.7345205545425415,
      "learning_rate": 2.8498382346158156e-06,
      "loss": 0.014,
      "step": 2190260
    },
    {
      "epoch": 3.584441258681749,
      "grad_norm": 0.14993302524089813,
      "learning_rate": 2.8497723424022988e-06,
      "loss": 0.0158,
      "step": 2190280
    },
    {
      "epoch": 3.584473989120402,
      "grad_norm": 0.24934928119182587,
      "learning_rate": 2.8497064501887815e-06,
      "loss": 0.0132,
      "step": 2190300
    },
    {
      "epoch": 3.5845067195590556,
      "grad_norm": 0.14364342391490936,
      "learning_rate": 2.8496405579752647e-06,
      "loss": 0.0084,
      "step": 2190320
    },
    {
      "epoch": 3.584539449997709,
      "grad_norm": 0.31823647022247314,
      "learning_rate": 2.8495746657617474e-06,
      "loss": 0.0108,
      "step": 2190340
    },
    {
      "epoch": 3.5845721804363624,
      "grad_norm": 0.16751472651958466,
      "learning_rate": 2.84950877354823e-06,
      "loss": 0.0091,
      "step": 2190360
    },
    {
      "epoch": 3.5846049108750155,
      "grad_norm": 0.20704710483551025,
      "learning_rate": 2.849442881334713e-06,
      "loss": 0.0123,
      "step": 2190380
    },
    {
      "epoch": 3.5846376413136687,
      "grad_norm": 0.3991861641407013,
      "learning_rate": 2.849376989121196e-06,
      "loss": 0.0073,
      "step": 2190400
    },
    {
      "epoch": 3.5846703717523223,
      "grad_norm": 0.17004777491092682,
      "learning_rate": 2.849311096907679e-06,
      "loss": 0.0066,
      "step": 2190420
    },
    {
      "epoch": 3.5847031021909754,
      "grad_norm": 0.2032434493303299,
      "learning_rate": 2.8492452046941615e-06,
      "loss": 0.0126,
      "step": 2190440
    },
    {
      "epoch": 3.584735832629629,
      "grad_norm": 0.4850562512874603,
      "learning_rate": 2.8491793124806443e-06,
      "loss": 0.0077,
      "step": 2190460
    },
    {
      "epoch": 3.584768563068282,
      "grad_norm": 0.13310275971889496,
      "learning_rate": 2.849113420267127e-06,
      "loss": 0.0115,
      "step": 2190480
    },
    {
      "epoch": 3.5848012935069358,
      "grad_norm": 0.08030595630407333,
      "learning_rate": 2.84904752805361e-06,
      "loss": 0.0061,
      "step": 2190500
    },
    {
      "epoch": 3.584834023945589,
      "grad_norm": 0.2266964465379715,
      "learning_rate": 2.848981635840093e-06,
      "loss": 0.0113,
      "step": 2190520
    },
    {
      "epoch": 3.584866754384242,
      "grad_norm": 0.2085549235343933,
      "learning_rate": 2.8489157436265757e-06,
      "loss": 0.0145,
      "step": 2190540
    },
    {
      "epoch": 3.5848994848228957,
      "grad_norm": 0.1903061419725418,
      "learning_rate": 2.8488498514130584e-06,
      "loss": 0.0084,
      "step": 2190560
    },
    {
      "epoch": 3.584932215261549,
      "grad_norm": 0.12225977331399918,
      "learning_rate": 2.8487839591995416e-06,
      "loss": 0.014,
      "step": 2190580
    },
    {
      "epoch": 3.5849649457002024,
      "grad_norm": 0.7885974049568176,
      "learning_rate": 2.8487180669860243e-06,
      "loss": 0.0106,
      "step": 2190600
    },
    {
      "epoch": 3.5849976761388556,
      "grad_norm": 0.12851114571094513,
      "learning_rate": 2.848652174772507e-06,
      "loss": 0.0101,
      "step": 2190620
    },
    {
      "epoch": 3.585030406577509,
      "grad_norm": 0.304857462644577,
      "learning_rate": 2.8485862825589906e-06,
      "loss": 0.0208,
      "step": 2190640
    },
    {
      "epoch": 3.5850631370161623,
      "grad_norm": 0.29544827342033386,
      "learning_rate": 2.8485203903454734e-06,
      "loss": 0.0084,
      "step": 2190660
    },
    {
      "epoch": 3.5850958674548155,
      "grad_norm": 0.3501710295677185,
      "learning_rate": 2.848454498131956e-06,
      "loss": 0.0099,
      "step": 2190680
    },
    {
      "epoch": 3.585128597893469,
      "grad_norm": 1.4337489604949951,
      "learning_rate": 2.848388605918439e-06,
      "loss": 0.0147,
      "step": 2190700
    },
    {
      "epoch": 3.585161328332122,
      "grad_norm": 0.14276227355003357,
      "learning_rate": 2.848322713704922e-06,
      "loss": 0.0171,
      "step": 2190720
    },
    {
      "epoch": 3.585194058770776,
      "grad_norm": 0.3808576464653015,
      "learning_rate": 2.8482568214914048e-06,
      "loss": 0.0069,
      "step": 2190740
    },
    {
      "epoch": 3.585226789209429,
      "grad_norm": 0.2309008091688156,
      "learning_rate": 2.8481909292778875e-06,
      "loss": 0.0148,
      "step": 2190760
    },
    {
      "epoch": 3.5852595196480825,
      "grad_norm": 0.6562569737434387,
      "learning_rate": 2.8481250370643702e-06,
      "loss": 0.0172,
      "step": 2190780
    },
    {
      "epoch": 3.5852922500867357,
      "grad_norm": 0.1660117506980896,
      "learning_rate": 2.8480591448508534e-06,
      "loss": 0.0188,
      "step": 2190800
    },
    {
      "epoch": 3.585324980525389,
      "grad_norm": 0.6465709805488586,
      "learning_rate": 2.847993252637336e-06,
      "loss": 0.0138,
      "step": 2190820
    },
    {
      "epoch": 3.5853577109640424,
      "grad_norm": 0.26034015417099,
      "learning_rate": 2.847927360423819e-06,
      "loss": 0.0139,
      "step": 2190840
    },
    {
      "epoch": 3.5853904414026956,
      "grad_norm": 0.27706438302993774,
      "learning_rate": 2.8478614682103016e-06,
      "loss": 0.0138,
      "step": 2190860
    },
    {
      "epoch": 3.585423171841349,
      "grad_norm": 0.14335551857948303,
      "learning_rate": 2.8477955759967848e-06,
      "loss": 0.0111,
      "step": 2190880
    },
    {
      "epoch": 3.5854559022800023,
      "grad_norm": 0.3522584140300751,
      "learning_rate": 2.8477296837832675e-06,
      "loss": 0.0137,
      "step": 2190900
    },
    {
      "epoch": 3.585488632718656,
      "grad_norm": 0.32576310634613037,
      "learning_rate": 2.8476637915697503e-06,
      "loss": 0.014,
      "step": 2190920
    },
    {
      "epoch": 3.585521363157309,
      "grad_norm": 0.5459523797035217,
      "learning_rate": 2.847597899356233e-06,
      "loss": 0.0115,
      "step": 2190940
    },
    {
      "epoch": 3.585554093595962,
      "grad_norm": 0.3560042679309845,
      "learning_rate": 2.8475320071427157e-06,
      "loss": 0.0114,
      "step": 2190960
    },
    {
      "epoch": 3.585586824034616,
      "grad_norm": 0.21012242138385773,
      "learning_rate": 2.847466114929199e-06,
      "loss": 0.0131,
      "step": 2190980
    },
    {
      "epoch": 3.585619554473269,
      "grad_norm": 0.22690269351005554,
      "learning_rate": 2.847400222715682e-06,
      "loss": 0.0084,
      "step": 2191000
    },
    {
      "epoch": 3.585652284911922,
      "grad_norm": 0.4265398681163788,
      "learning_rate": 2.847334330502165e-06,
      "loss": 0.0118,
      "step": 2191020
    },
    {
      "epoch": 3.5856850153505757,
      "grad_norm": 0.44002842903137207,
      "learning_rate": 2.847268438288648e-06,
      "loss": 0.0088,
      "step": 2191040
    },
    {
      "epoch": 3.5857177457892293,
      "grad_norm": 0.19496123492717743,
      "learning_rate": 2.8472025460751307e-06,
      "loss": 0.0095,
      "step": 2191060
    },
    {
      "epoch": 3.5857504762278825,
      "grad_norm": 0.6061258316040039,
      "learning_rate": 2.8471366538616135e-06,
      "loss": 0.0165,
      "step": 2191080
    },
    {
      "epoch": 3.5857832066665356,
      "grad_norm": 0.16772669553756714,
      "learning_rate": 2.847070761648096e-06,
      "loss": 0.0079,
      "step": 2191100
    },
    {
      "epoch": 3.585815937105189,
      "grad_norm": 0.33714476227760315,
      "learning_rate": 2.8470048694345794e-06,
      "loss": 0.0109,
      "step": 2191120
    },
    {
      "epoch": 3.5858486675438423,
      "grad_norm": 0.11898590624332428,
      "learning_rate": 2.846938977221062e-06,
      "loss": 0.0088,
      "step": 2191140
    },
    {
      "epoch": 3.5858813979824955,
      "grad_norm": 0.4013132154941559,
      "learning_rate": 2.846873085007545e-06,
      "loss": 0.008,
      "step": 2191160
    },
    {
      "epoch": 3.585914128421149,
      "grad_norm": 0.0506751574575901,
      "learning_rate": 2.8468071927940276e-06,
      "loss": 0.0104,
      "step": 2191180
    },
    {
      "epoch": 3.5859468588598027,
      "grad_norm": 0.18418721854686737,
      "learning_rate": 2.8467413005805107e-06,
      "loss": 0.0076,
      "step": 2191200
    },
    {
      "epoch": 3.585979589298456,
      "grad_norm": 0.05279015749692917,
      "learning_rate": 2.8466754083669935e-06,
      "loss": 0.0131,
      "step": 2191220
    },
    {
      "epoch": 3.586012319737109,
      "grad_norm": 0.16857574880123138,
      "learning_rate": 2.8466095161534762e-06,
      "loss": 0.0164,
      "step": 2191240
    },
    {
      "epoch": 3.5860450501757626,
      "grad_norm": 0.3860618472099304,
      "learning_rate": 2.846543623939959e-06,
      "loss": 0.0105,
      "step": 2191260
    },
    {
      "epoch": 3.5860777806144157,
      "grad_norm": 0.36721962690353394,
      "learning_rate": 2.846477731726442e-06,
      "loss": 0.0093,
      "step": 2191280
    },
    {
      "epoch": 3.586110511053069,
      "grad_norm": 0.1863246113061905,
      "learning_rate": 2.846411839512925e-06,
      "loss": 0.0116,
      "step": 2191300
    },
    {
      "epoch": 3.5861432414917225,
      "grad_norm": 0.13308148086071014,
      "learning_rate": 2.8463459472994076e-06,
      "loss": 0.0123,
      "step": 2191320
    },
    {
      "epoch": 3.586175971930376,
      "grad_norm": 0.3565048277378082,
      "learning_rate": 2.846280055085891e-06,
      "loss": 0.0098,
      "step": 2191340
    },
    {
      "epoch": 3.586208702369029,
      "grad_norm": 0.12892164289951324,
      "learning_rate": 2.846214162872374e-06,
      "loss": 0.0104,
      "step": 2191360
    },
    {
      "epoch": 3.5862414328076824,
      "grad_norm": 0.12726351618766785,
      "learning_rate": 2.8461482706588567e-06,
      "loss": 0.0074,
      "step": 2191380
    },
    {
      "epoch": 3.586274163246336,
      "grad_norm": 0.08386790752410889,
      "learning_rate": 2.8460823784453394e-06,
      "loss": 0.0084,
      "step": 2191400
    },
    {
      "epoch": 3.586306893684989,
      "grad_norm": 0.1601913869380951,
      "learning_rate": 2.8460164862318226e-06,
      "loss": 0.0073,
      "step": 2191420
    },
    {
      "epoch": 3.5863396241236423,
      "grad_norm": 0.3240945041179657,
      "learning_rate": 2.8459505940183053e-06,
      "loss": 0.0114,
      "step": 2191440
    },
    {
      "epoch": 3.586372354562296,
      "grad_norm": 0.19086343050003052,
      "learning_rate": 2.845884701804788e-06,
      "loss": 0.0154,
      "step": 2191460
    },
    {
      "epoch": 3.5864050850009495,
      "grad_norm": 0.07442625612020493,
      "learning_rate": 2.845818809591271e-06,
      "loss": 0.0099,
      "step": 2191480
    },
    {
      "epoch": 3.5864378154396026,
      "grad_norm": 0.4620644450187683,
      "learning_rate": 2.8457529173777535e-06,
      "loss": 0.0173,
      "step": 2191500
    },
    {
      "epoch": 3.5864705458782558,
      "grad_norm": 0.2655409574508667,
      "learning_rate": 2.8456870251642367e-06,
      "loss": 0.0109,
      "step": 2191520
    },
    {
      "epoch": 3.5865032763169093,
      "grad_norm": 0.1505693644285202,
      "learning_rate": 2.8456211329507194e-06,
      "loss": 0.009,
      "step": 2191540
    },
    {
      "epoch": 3.5865360067555625,
      "grad_norm": 0.6451501846313477,
      "learning_rate": 2.845555240737202e-06,
      "loss": 0.0091,
      "step": 2191560
    },
    {
      "epoch": 3.5865687371942157,
      "grad_norm": 0.48485156893730164,
      "learning_rate": 2.845489348523685e-06,
      "loss": 0.0122,
      "step": 2191580
    },
    {
      "epoch": 3.5866014676328692,
      "grad_norm": 0.435304194688797,
      "learning_rate": 2.845423456310168e-06,
      "loss": 0.0107,
      "step": 2191600
    },
    {
      "epoch": 3.5866341980715224,
      "grad_norm": 0.374107301235199,
      "learning_rate": 2.845357564096651e-06,
      "loss": 0.0178,
      "step": 2191620
    },
    {
      "epoch": 3.586666928510176,
      "grad_norm": 0.08607800304889679,
      "learning_rate": 2.8452916718831336e-06,
      "loss": 0.0094,
      "step": 2191640
    },
    {
      "epoch": 3.586699658948829,
      "grad_norm": 0.2568160891532898,
      "learning_rate": 2.8452257796696163e-06,
      "loss": 0.0129,
      "step": 2191660
    },
    {
      "epoch": 3.5867323893874827,
      "grad_norm": 0.7352520823478699,
      "learning_rate": 2.8451598874560995e-06,
      "loss": 0.0089,
      "step": 2191680
    },
    {
      "epoch": 3.586765119826136,
      "grad_norm": 0.27190494537353516,
      "learning_rate": 2.8450939952425826e-06,
      "loss": 0.0079,
      "step": 2191700
    },
    {
      "epoch": 3.586797850264789,
      "grad_norm": 0.3497040569782257,
      "learning_rate": 2.8450281030290654e-06,
      "loss": 0.0109,
      "step": 2191720
    },
    {
      "epoch": 3.5868305807034426,
      "grad_norm": 0.13590672612190247,
      "learning_rate": 2.8449622108155485e-06,
      "loss": 0.011,
      "step": 2191740
    },
    {
      "epoch": 3.586863311142096,
      "grad_norm": 0.17577752470970154,
      "learning_rate": 2.8448963186020313e-06,
      "loss": 0.0092,
      "step": 2191760
    },
    {
      "epoch": 3.5868960415807494,
      "grad_norm": 0.37612488865852356,
      "learning_rate": 2.844830426388514e-06,
      "loss": 0.0113,
      "step": 2191780
    },
    {
      "epoch": 3.5869287720194025,
      "grad_norm": 0.37006863951683044,
      "learning_rate": 2.8447645341749967e-06,
      "loss": 0.0149,
      "step": 2191800
    },
    {
      "epoch": 3.586961502458056,
      "grad_norm": 0.18930421769618988,
      "learning_rate": 2.84469864196148e-06,
      "loss": 0.0114,
      "step": 2191820
    },
    {
      "epoch": 3.5869942328967093,
      "grad_norm": 0.2662045955657959,
      "learning_rate": 2.8446327497479626e-06,
      "loss": 0.008,
      "step": 2191840
    },
    {
      "epoch": 3.5870269633353624,
      "grad_norm": 0.32614409923553467,
      "learning_rate": 2.8445668575344454e-06,
      "loss": 0.0093,
      "step": 2191860
    },
    {
      "epoch": 3.587059693774016,
      "grad_norm": 0.14341437816619873,
      "learning_rate": 2.844500965320928e-06,
      "loss": 0.0103,
      "step": 2191880
    },
    {
      "epoch": 3.587092424212669,
      "grad_norm": 0.0622413270175457,
      "learning_rate": 2.844435073107411e-06,
      "loss": 0.0137,
      "step": 2191900
    },
    {
      "epoch": 3.5871251546513228,
      "grad_norm": 0.2478295862674713,
      "learning_rate": 2.844369180893894e-06,
      "loss": 0.0093,
      "step": 2191920
    },
    {
      "epoch": 3.587157885089976,
      "grad_norm": 0.20640349388122559,
      "learning_rate": 2.8443032886803768e-06,
      "loss": 0.0093,
      "step": 2191940
    },
    {
      "epoch": 3.5871906155286295,
      "grad_norm": 0.11042168736457825,
      "learning_rate": 2.8442373964668595e-06,
      "loss": 0.007,
      "step": 2191960
    },
    {
      "epoch": 3.5872233459672827,
      "grad_norm": 0.44652849435806274,
      "learning_rate": 2.8441715042533422e-06,
      "loss": 0.0148,
      "step": 2191980
    },
    {
      "epoch": 3.587256076405936,
      "grad_norm": 0.3341243267059326,
      "learning_rate": 2.8441056120398254e-06,
      "loss": 0.013,
      "step": 2192000
    },
    {
      "epoch": 3.5872888068445894,
      "grad_norm": 0.3114696443080902,
      "learning_rate": 2.844039719826308e-06,
      "loss": 0.0085,
      "step": 2192020
    },
    {
      "epoch": 3.5873215372832425,
      "grad_norm": 0.7799341082572937,
      "learning_rate": 2.8439738276127913e-06,
      "loss": 0.0131,
      "step": 2192040
    },
    {
      "epoch": 3.587354267721896,
      "grad_norm": 0.11018481850624084,
      "learning_rate": 2.8439079353992745e-06,
      "loss": 0.012,
      "step": 2192060
    },
    {
      "epoch": 3.5873869981605493,
      "grad_norm": 0.188008651137352,
      "learning_rate": 2.8438420431857572e-06,
      "loss": 0.0067,
      "step": 2192080
    },
    {
      "epoch": 3.587419728599203,
      "grad_norm": 0.17664285004138947,
      "learning_rate": 2.84377615097224e-06,
      "loss": 0.0066,
      "step": 2192100
    },
    {
      "epoch": 3.587452459037856,
      "grad_norm": 0.14084716141223907,
      "learning_rate": 2.8437102587587227e-06,
      "loss": 0.012,
      "step": 2192120
    },
    {
      "epoch": 3.587485189476509,
      "grad_norm": 0.0758850947022438,
      "learning_rate": 2.843644366545206e-06,
      "loss": 0.0084,
      "step": 2192140
    },
    {
      "epoch": 3.587517919915163,
      "grad_norm": 0.16779977083206177,
      "learning_rate": 2.8435784743316886e-06,
      "loss": 0.0118,
      "step": 2192160
    },
    {
      "epoch": 3.587550650353816,
      "grad_norm": 0.25328853726387024,
      "learning_rate": 2.8435125821181713e-06,
      "loss": 0.0136,
      "step": 2192180
    },
    {
      "epoch": 3.5875833807924695,
      "grad_norm": 4.290714740753174,
      "learning_rate": 2.843446689904654e-06,
      "loss": 0.0106,
      "step": 2192200
    },
    {
      "epoch": 3.5876161112311227,
      "grad_norm": 0.34271153807640076,
      "learning_rate": 2.8433807976911372e-06,
      "loss": 0.0157,
      "step": 2192220
    },
    {
      "epoch": 3.5876488416697763,
      "grad_norm": 0.4130469262599945,
      "learning_rate": 2.84331490547762e-06,
      "loss": 0.0138,
      "step": 2192240
    },
    {
      "epoch": 3.5876815721084294,
      "grad_norm": 0.14466987550258636,
      "learning_rate": 2.8432490132641027e-06,
      "loss": 0.0099,
      "step": 2192260
    },
    {
      "epoch": 3.5877143025470826,
      "grad_norm": 0.10421989113092422,
      "learning_rate": 2.8431831210505855e-06,
      "loss": 0.0135,
      "step": 2192280
    },
    {
      "epoch": 3.587747032985736,
      "grad_norm": 0.347223162651062,
      "learning_rate": 2.8431172288370686e-06,
      "loss": 0.0126,
      "step": 2192300
    },
    {
      "epoch": 3.5877797634243893,
      "grad_norm": 0.5236332416534424,
      "learning_rate": 2.8430513366235514e-06,
      "loss": 0.0097,
      "step": 2192320
    },
    {
      "epoch": 3.587812493863043,
      "grad_norm": 0.3182762563228607,
      "learning_rate": 2.842985444410034e-06,
      "loss": 0.0099,
      "step": 2192340
    },
    {
      "epoch": 3.587845224301696,
      "grad_norm": 0.12071920186281204,
      "learning_rate": 2.842919552196517e-06,
      "loss": 0.0094,
      "step": 2192360
    },
    {
      "epoch": 3.5878779547403497,
      "grad_norm": 0.08163461089134216,
      "learning_rate": 2.8428536599829996e-06,
      "loss": 0.0126,
      "step": 2192380
    },
    {
      "epoch": 3.587910685179003,
      "grad_norm": 0.3091399073600769,
      "learning_rate": 2.842787767769483e-06,
      "loss": 0.0117,
      "step": 2192400
    },
    {
      "epoch": 3.587943415617656,
      "grad_norm": 0.5201766490936279,
      "learning_rate": 2.842721875555966e-06,
      "loss": 0.0097,
      "step": 2192420
    },
    {
      "epoch": 3.5879761460563095,
      "grad_norm": 0.1078382134437561,
      "learning_rate": 2.8426559833424486e-06,
      "loss": 0.0103,
      "step": 2192440
    },
    {
      "epoch": 3.5880088764949627,
      "grad_norm": 0.5020800232887268,
      "learning_rate": 2.842590091128932e-06,
      "loss": 0.01,
      "step": 2192460
    },
    {
      "epoch": 3.5880416069336163,
      "grad_norm": 0.13420212268829346,
      "learning_rate": 2.8425241989154146e-06,
      "loss": 0.0108,
      "step": 2192480
    },
    {
      "epoch": 3.5880743373722694,
      "grad_norm": 0.2902573347091675,
      "learning_rate": 2.8424583067018973e-06,
      "loss": 0.0074,
      "step": 2192500
    },
    {
      "epoch": 3.588107067810923,
      "grad_norm": 0.22311876714229584,
      "learning_rate": 2.84239241448838e-06,
      "loss": 0.0154,
      "step": 2192520
    },
    {
      "epoch": 3.588139798249576,
      "grad_norm": 0.15462535619735718,
      "learning_rate": 2.842326522274863e-06,
      "loss": 0.0092,
      "step": 2192540
    },
    {
      "epoch": 3.5881725286882293,
      "grad_norm": 0.09929902851581573,
      "learning_rate": 2.842260630061346e-06,
      "loss": 0.0077,
      "step": 2192560
    },
    {
      "epoch": 3.588205259126883,
      "grad_norm": 0.4750099778175354,
      "learning_rate": 2.8421947378478287e-06,
      "loss": 0.0106,
      "step": 2192580
    },
    {
      "epoch": 3.588237989565536,
      "grad_norm": 0.23462499678134918,
      "learning_rate": 2.8421288456343114e-06,
      "loss": 0.0102,
      "step": 2192600
    },
    {
      "epoch": 3.5882707200041892,
      "grad_norm": 0.17459924519062042,
      "learning_rate": 2.8420629534207946e-06,
      "loss": 0.0164,
      "step": 2192620
    },
    {
      "epoch": 3.588303450442843,
      "grad_norm": 0.17528435587882996,
      "learning_rate": 2.8419970612072773e-06,
      "loss": 0.0089,
      "step": 2192640
    },
    {
      "epoch": 3.5883361808814964,
      "grad_norm": 0.09397150576114655,
      "learning_rate": 2.84193116899376e-06,
      "loss": 0.011,
      "step": 2192660
    },
    {
      "epoch": 3.5883689113201496,
      "grad_norm": 0.2737874388694763,
      "learning_rate": 2.841865276780243e-06,
      "loss": 0.0103,
      "step": 2192680
    },
    {
      "epoch": 3.5884016417588027,
      "grad_norm": 0.08335447311401367,
      "learning_rate": 2.841799384566726e-06,
      "loss": 0.0076,
      "step": 2192700
    },
    {
      "epoch": 3.5884343721974563,
      "grad_norm": 0.2313615381717682,
      "learning_rate": 2.8417334923532087e-06,
      "loss": 0.0168,
      "step": 2192720
    },
    {
      "epoch": 3.5884671026361095,
      "grad_norm": 0.7073781490325928,
      "learning_rate": 2.8416676001396914e-06,
      "loss": 0.0148,
      "step": 2192740
    },
    {
      "epoch": 3.5884998330747626,
      "grad_norm": 0.24564652144908905,
      "learning_rate": 2.841601707926175e-06,
      "loss": 0.0114,
      "step": 2192760
    },
    {
      "epoch": 3.588532563513416,
      "grad_norm": 0.15684877336025238,
      "learning_rate": 2.8415358157126578e-06,
      "loss": 0.0106,
      "step": 2192780
    },
    {
      "epoch": 3.58856529395207,
      "grad_norm": 0.14695195853710175,
      "learning_rate": 2.8414699234991405e-06,
      "loss": 0.0127,
      "step": 2192800
    },
    {
      "epoch": 3.588598024390723,
      "grad_norm": 0.48457425832748413,
      "learning_rate": 2.8414040312856232e-06,
      "loss": 0.0095,
      "step": 2192820
    },
    {
      "epoch": 3.588630754829376,
      "grad_norm": 0.5313522815704346,
      "learning_rate": 2.8413381390721064e-06,
      "loss": 0.0119,
      "step": 2192840
    },
    {
      "epoch": 3.5886634852680297,
      "grad_norm": 0.20868611335754395,
      "learning_rate": 2.841272246858589e-06,
      "loss": 0.0151,
      "step": 2192860
    },
    {
      "epoch": 3.588696215706683,
      "grad_norm": 0.2501206398010254,
      "learning_rate": 2.841206354645072e-06,
      "loss": 0.0128,
      "step": 2192880
    },
    {
      "epoch": 3.588728946145336,
      "grad_norm": 0.6032630205154419,
      "learning_rate": 2.8411404624315546e-06,
      "loss": 0.0152,
      "step": 2192900
    },
    {
      "epoch": 3.5887616765839896,
      "grad_norm": 0.2331993281841278,
      "learning_rate": 2.8410745702180374e-06,
      "loss": 0.0119,
      "step": 2192920
    },
    {
      "epoch": 3.588794407022643,
      "grad_norm": 0.046558234840631485,
      "learning_rate": 2.8410086780045205e-06,
      "loss": 0.0102,
      "step": 2192940
    },
    {
      "epoch": 3.5888271374612963,
      "grad_norm": 0.24214960634708405,
      "learning_rate": 2.8409427857910033e-06,
      "loss": 0.0128,
      "step": 2192960
    },
    {
      "epoch": 3.5888598678999495,
      "grad_norm": 0.11621982604265213,
      "learning_rate": 2.840876893577486e-06,
      "loss": 0.0084,
      "step": 2192980
    },
    {
      "epoch": 3.588892598338603,
      "grad_norm": 0.33584699034690857,
      "learning_rate": 2.8408110013639688e-06,
      "loss": 0.0163,
      "step": 2193000
    },
    {
      "epoch": 3.5889253287772562,
      "grad_norm": 0.17941106855869293,
      "learning_rate": 2.840745109150452e-06,
      "loss": 0.0155,
      "step": 2193020
    },
    {
      "epoch": 3.5889580592159094,
      "grad_norm": 0.23555485904216766,
      "learning_rate": 2.8406792169369347e-06,
      "loss": 0.014,
      "step": 2193040
    },
    {
      "epoch": 3.588990789654563,
      "grad_norm": 0.5355510115623474,
      "learning_rate": 2.8406133247234174e-06,
      "loss": 0.0096,
      "step": 2193060
    },
    {
      "epoch": 3.589023520093216,
      "grad_norm": 0.1799737960100174,
      "learning_rate": 2.8405474325099e-06,
      "loss": 0.0069,
      "step": 2193080
    },
    {
      "epoch": 3.5890562505318697,
      "grad_norm": 0.11698644608259201,
      "learning_rate": 2.8404815402963837e-06,
      "loss": 0.0145,
      "step": 2193100
    },
    {
      "epoch": 3.589088980970523,
      "grad_norm": 0.16946475207805634,
      "learning_rate": 2.8404156480828665e-06,
      "loss": 0.0082,
      "step": 2193120
    },
    {
      "epoch": 3.5891217114091765,
      "grad_norm": 0.07386563718318939,
      "learning_rate": 2.840349755869349e-06,
      "loss": 0.0146,
      "step": 2193140
    },
    {
      "epoch": 3.5891544418478296,
      "grad_norm": 0.12401829659938812,
      "learning_rate": 2.8402838636558324e-06,
      "loss": 0.0118,
      "step": 2193160
    },
    {
      "epoch": 3.5891871722864828,
      "grad_norm": 0.14956872165203094,
      "learning_rate": 2.840217971442315e-06,
      "loss": 0.0094,
      "step": 2193180
    },
    {
      "epoch": 3.5892199027251364,
      "grad_norm": 0.11694555729627609,
      "learning_rate": 2.840152079228798e-06,
      "loss": 0.018,
      "step": 2193200
    },
    {
      "epoch": 3.5892526331637895,
      "grad_norm": 0.7143875360488892,
      "learning_rate": 2.8400861870152806e-06,
      "loss": 0.0117,
      "step": 2193220
    },
    {
      "epoch": 3.589285363602443,
      "grad_norm": 0.33713987469673157,
      "learning_rate": 2.8400202948017637e-06,
      "loss": 0.0151,
      "step": 2193240
    },
    {
      "epoch": 3.5893180940410963,
      "grad_norm": 0.22013162076473236,
      "learning_rate": 2.8399544025882465e-06,
      "loss": 0.0073,
      "step": 2193260
    },
    {
      "epoch": 3.58935082447975,
      "grad_norm": 0.532736599445343,
      "learning_rate": 2.8398885103747292e-06,
      "loss": 0.0094,
      "step": 2193280
    },
    {
      "epoch": 3.589383554918403,
      "grad_norm": 0.11704932898283005,
      "learning_rate": 2.839822618161212e-06,
      "loss": 0.0151,
      "step": 2193300
    },
    {
      "epoch": 3.589416285357056,
      "grad_norm": 0.15411734580993652,
      "learning_rate": 2.8397567259476947e-06,
      "loss": 0.0107,
      "step": 2193320
    },
    {
      "epoch": 3.5894490157957097,
      "grad_norm": 0.07808404415845871,
      "learning_rate": 2.839690833734178e-06,
      "loss": 0.0074,
      "step": 2193340
    },
    {
      "epoch": 3.589481746234363,
      "grad_norm": 0.22207088768482208,
      "learning_rate": 2.8396249415206606e-06,
      "loss": 0.0063,
      "step": 2193360
    },
    {
      "epoch": 3.5895144766730165,
      "grad_norm": 0.2597476840019226,
      "learning_rate": 2.8395590493071433e-06,
      "loss": 0.0078,
      "step": 2193380
    },
    {
      "epoch": 3.5895472071116696,
      "grad_norm": 0.3079480230808258,
      "learning_rate": 2.839493157093626e-06,
      "loss": 0.0146,
      "step": 2193400
    },
    {
      "epoch": 3.5895799375503232,
      "grad_norm": 0.24059756100177765,
      "learning_rate": 2.8394272648801092e-06,
      "loss": 0.0121,
      "step": 2193420
    },
    {
      "epoch": 3.5896126679889764,
      "grad_norm": 0.18397726118564606,
      "learning_rate": 2.839361372666592e-06,
      "loss": 0.013,
      "step": 2193440
    },
    {
      "epoch": 3.5896453984276295,
      "grad_norm": 0.025522606447339058,
      "learning_rate": 2.839295480453075e-06,
      "loss": 0.0096,
      "step": 2193460
    },
    {
      "epoch": 3.589678128866283,
      "grad_norm": 0.2877533435821533,
      "learning_rate": 2.8392295882395583e-06,
      "loss": 0.0137,
      "step": 2193480
    },
    {
      "epoch": 3.5897108593049363,
      "grad_norm": 0.6067853569984436,
      "learning_rate": 2.839163696026041e-06,
      "loss": 0.0157,
      "step": 2193500
    },
    {
      "epoch": 3.58974358974359,
      "grad_norm": 0.4931522607803345,
      "learning_rate": 2.839097803812524e-06,
      "loss": 0.0162,
      "step": 2193520
    },
    {
      "epoch": 3.589776320182243,
      "grad_norm": 0.21739041805267334,
      "learning_rate": 2.8390319115990065e-06,
      "loss": 0.0129,
      "step": 2193540
    },
    {
      "epoch": 3.5898090506208966,
      "grad_norm": 0.13270285725593567,
      "learning_rate": 2.8389660193854897e-06,
      "loss": 0.0088,
      "step": 2193560
    },
    {
      "epoch": 3.5898417810595498,
      "grad_norm": 0.5080248117446899,
      "learning_rate": 2.8389001271719724e-06,
      "loss": 0.007,
      "step": 2193580
    },
    {
      "epoch": 3.589874511498203,
      "grad_norm": 0.2997453212738037,
      "learning_rate": 2.838834234958455e-06,
      "loss": 0.0141,
      "step": 2193600
    },
    {
      "epoch": 3.5899072419368565,
      "grad_norm": 0.5747318267822266,
      "learning_rate": 2.838768342744938e-06,
      "loss": 0.0085,
      "step": 2193620
    },
    {
      "epoch": 3.5899399723755097,
      "grad_norm": 0.7181159257888794,
      "learning_rate": 2.838702450531421e-06,
      "loss": 0.0126,
      "step": 2193640
    },
    {
      "epoch": 3.5899727028141633,
      "grad_norm": 0.07393408566713333,
      "learning_rate": 2.838636558317904e-06,
      "loss": 0.011,
      "step": 2193660
    },
    {
      "epoch": 3.5900054332528164,
      "grad_norm": 0.2298886477947235,
      "learning_rate": 2.8385706661043866e-06,
      "loss": 0.0104,
      "step": 2193680
    },
    {
      "epoch": 3.59003816369147,
      "grad_norm": 0.21991898119449615,
      "learning_rate": 2.8385047738908693e-06,
      "loss": 0.0096,
      "step": 2193700
    },
    {
      "epoch": 3.590070894130123,
      "grad_norm": 0.1478492021560669,
      "learning_rate": 2.8384388816773525e-06,
      "loss": 0.0068,
      "step": 2193720
    },
    {
      "epoch": 3.5901036245687763,
      "grad_norm": 0.2419402152299881,
      "learning_rate": 2.838372989463835e-06,
      "loss": 0.0091,
      "step": 2193740
    },
    {
      "epoch": 3.59013635500743,
      "grad_norm": 0.20404738187789917,
      "learning_rate": 2.838307097250318e-06,
      "loss": 0.0089,
      "step": 2193760
    },
    {
      "epoch": 3.590169085446083,
      "grad_norm": 0.1411750763654709,
      "learning_rate": 2.8382412050368007e-06,
      "loss": 0.0153,
      "step": 2193780
    },
    {
      "epoch": 3.5902018158847366,
      "grad_norm": 0.045634396374225616,
      "learning_rate": 2.8381753128232843e-06,
      "loss": 0.0073,
      "step": 2193800
    },
    {
      "epoch": 3.59023454632339,
      "grad_norm": 0.4060414135456085,
      "learning_rate": 2.838109420609767e-06,
      "loss": 0.0131,
      "step": 2193820
    },
    {
      "epoch": 3.5902672767620434,
      "grad_norm": 0.8871859908103943,
      "learning_rate": 2.8380435283962497e-06,
      "loss": 0.0079,
      "step": 2193840
    },
    {
      "epoch": 3.5903000072006965,
      "grad_norm": 0.3390592336654663,
      "learning_rate": 2.8379776361827325e-06,
      "loss": 0.0077,
      "step": 2193860
    },
    {
      "epoch": 3.5903327376393497,
      "grad_norm": 0.16030384600162506,
      "learning_rate": 2.8379117439692157e-06,
      "loss": 0.01,
      "step": 2193880
    },
    {
      "epoch": 3.5903654680780033,
      "grad_norm": 0.43598631024360657,
      "learning_rate": 2.8378458517556984e-06,
      "loss": 0.0099,
      "step": 2193900
    },
    {
      "epoch": 3.5903981985166564,
      "grad_norm": 0.2268446981906891,
      "learning_rate": 2.837779959542181e-06,
      "loss": 0.0104,
      "step": 2193920
    },
    {
      "epoch": 3.59043092895531,
      "grad_norm": 0.30896979570388794,
      "learning_rate": 2.837714067328664e-06,
      "loss": 0.016,
      "step": 2193940
    },
    {
      "epoch": 3.590463659393963,
      "grad_norm": 0.05183141306042671,
      "learning_rate": 2.837648175115147e-06,
      "loss": 0.0113,
      "step": 2193960
    },
    {
      "epoch": 3.5904963898326168,
      "grad_norm": 0.10940568149089813,
      "learning_rate": 2.8375822829016298e-06,
      "loss": 0.0124,
      "step": 2193980
    },
    {
      "epoch": 3.59052912027127,
      "grad_norm": 0.5430625081062317,
      "learning_rate": 2.8375163906881125e-06,
      "loss": 0.0097,
      "step": 2194000
    },
    {
      "epoch": 3.590561850709923,
      "grad_norm": 0.08705437928438187,
      "learning_rate": 2.8374504984745953e-06,
      "loss": 0.0115,
      "step": 2194020
    },
    {
      "epoch": 3.5905945811485767,
      "grad_norm": 0.10944658517837524,
      "learning_rate": 2.8373846062610784e-06,
      "loss": 0.0092,
      "step": 2194040
    },
    {
      "epoch": 3.59062731158723,
      "grad_norm": 0.19494429230690002,
      "learning_rate": 2.837318714047561e-06,
      "loss": 0.0098,
      "step": 2194060
    },
    {
      "epoch": 3.590660042025883,
      "grad_norm": 0.3838040232658386,
      "learning_rate": 2.837252821834044e-06,
      "loss": 0.0095,
      "step": 2194080
    },
    {
      "epoch": 3.5906927724645366,
      "grad_norm": 0.41762906312942505,
      "learning_rate": 2.8371869296205266e-06,
      "loss": 0.0155,
      "step": 2194100
    },
    {
      "epoch": 3.59072550290319,
      "grad_norm": 0.16848984360694885,
      "learning_rate": 2.83712103740701e-06,
      "loss": 0.0128,
      "step": 2194120
    },
    {
      "epoch": 3.5907582333418433,
      "grad_norm": 0.4388807415962219,
      "learning_rate": 2.8370551451934925e-06,
      "loss": 0.014,
      "step": 2194140
    },
    {
      "epoch": 3.5907909637804964,
      "grad_norm": 0.3563694357872009,
      "learning_rate": 2.8369892529799757e-06,
      "loss": 0.011,
      "step": 2194160
    },
    {
      "epoch": 3.59082369421915,
      "grad_norm": 0.08940786123275757,
      "learning_rate": 2.836923360766459e-06,
      "loss": 0.0146,
      "step": 2194180
    },
    {
      "epoch": 3.590856424657803,
      "grad_norm": 0.07751265168190002,
      "learning_rate": 2.8368574685529416e-06,
      "loss": 0.0093,
      "step": 2194200
    },
    {
      "epoch": 3.5908891550964563,
      "grad_norm": 0.13023731112480164,
      "learning_rate": 2.8367915763394243e-06,
      "loss": 0.0142,
      "step": 2194220
    },
    {
      "epoch": 3.59092188553511,
      "grad_norm": 0.6975439786911011,
      "learning_rate": 2.836725684125907e-06,
      "loss": 0.0107,
      "step": 2194240
    },
    {
      "epoch": 3.5909546159737635,
      "grad_norm": 0.2616105377674103,
      "learning_rate": 2.8366597919123902e-06,
      "loss": 0.0117,
      "step": 2194260
    },
    {
      "epoch": 3.5909873464124167,
      "grad_norm": 0.055280156433582306,
      "learning_rate": 2.836593899698873e-06,
      "loss": 0.0067,
      "step": 2194280
    },
    {
      "epoch": 3.59102007685107,
      "grad_norm": 0.19408690929412842,
      "learning_rate": 2.8365280074853557e-06,
      "loss": 0.0125,
      "step": 2194300
    },
    {
      "epoch": 3.5910528072897234,
      "grad_norm": 0.29912176728248596,
      "learning_rate": 2.8364621152718385e-06,
      "loss": 0.0055,
      "step": 2194320
    },
    {
      "epoch": 3.5910855377283766,
      "grad_norm": 0.15913093090057373,
      "learning_rate": 2.836396223058321e-06,
      "loss": 0.011,
      "step": 2194340
    },
    {
      "epoch": 3.5911182681670297,
      "grad_norm": 0.17845550179481506,
      "learning_rate": 2.8363303308448044e-06,
      "loss": 0.0153,
      "step": 2194360
    },
    {
      "epoch": 3.5911509986056833,
      "grad_norm": 0.13018600642681122,
      "learning_rate": 2.836264438631287e-06,
      "loss": 0.0107,
      "step": 2194380
    },
    {
      "epoch": 3.591183729044337,
      "grad_norm": 0.28230246901512146,
      "learning_rate": 2.83619854641777e-06,
      "loss": 0.0122,
      "step": 2194400
    },
    {
      "epoch": 3.59121645948299,
      "grad_norm": 0.3450932502746582,
      "learning_rate": 2.8361326542042526e-06,
      "loss": 0.0129,
      "step": 2194420
    },
    {
      "epoch": 3.591249189921643,
      "grad_norm": 0.1782463937997818,
      "learning_rate": 2.8360667619907358e-06,
      "loss": 0.0141,
      "step": 2194440
    },
    {
      "epoch": 3.591281920360297,
      "grad_norm": 0.08032825589179993,
      "learning_rate": 2.8360008697772185e-06,
      "loss": 0.0077,
      "step": 2194460
    },
    {
      "epoch": 3.59131465079895,
      "grad_norm": 0.10025274008512497,
      "learning_rate": 2.8359349775637012e-06,
      "loss": 0.0119,
      "step": 2194480
    },
    {
      "epoch": 3.591347381237603,
      "grad_norm": 0.2833689749240875,
      "learning_rate": 2.835869085350184e-06,
      "loss": 0.0107,
      "step": 2194500
    },
    {
      "epoch": 3.5913801116762567,
      "grad_norm": 0.3233335316181183,
      "learning_rate": 2.8358031931366676e-06,
      "loss": 0.0107,
      "step": 2194520
    },
    {
      "epoch": 3.5914128421149103,
      "grad_norm": 0.691788375377655,
      "learning_rate": 2.8357373009231503e-06,
      "loss": 0.0152,
      "step": 2194540
    },
    {
      "epoch": 3.5914455725535634,
      "grad_norm": 0.1424713134765625,
      "learning_rate": 2.835671408709633e-06,
      "loss": 0.0114,
      "step": 2194560
    },
    {
      "epoch": 3.5914783029922166,
      "grad_norm": 0.31342053413391113,
      "learning_rate": 2.835605516496116e-06,
      "loss": 0.0106,
      "step": 2194580
    },
    {
      "epoch": 3.59151103343087,
      "grad_norm": 0.32122474908828735,
      "learning_rate": 2.835539624282599e-06,
      "loss": 0.0114,
      "step": 2194600
    },
    {
      "epoch": 3.5915437638695233,
      "grad_norm": 0.13313236832618713,
      "learning_rate": 2.8354737320690817e-06,
      "loss": 0.009,
      "step": 2194620
    },
    {
      "epoch": 3.5915764943081765,
      "grad_norm": 0.29613223671913147,
      "learning_rate": 2.8354078398555644e-06,
      "loss": 0.0091,
      "step": 2194640
    },
    {
      "epoch": 3.59160922474683,
      "grad_norm": 1.0788686275482178,
      "learning_rate": 2.8353419476420476e-06,
      "loss": 0.01,
      "step": 2194660
    },
    {
      "epoch": 3.5916419551854832,
      "grad_norm": 0.3204226493835449,
      "learning_rate": 2.8352760554285303e-06,
      "loss": 0.0179,
      "step": 2194680
    },
    {
      "epoch": 3.591674685624137,
      "grad_norm": 0.2355729341506958,
      "learning_rate": 2.835210163215013e-06,
      "loss": 0.0071,
      "step": 2194700
    },
    {
      "epoch": 3.59170741606279,
      "grad_norm": 0.27688536047935486,
      "learning_rate": 2.835144271001496e-06,
      "loss": 0.0167,
      "step": 2194720
    },
    {
      "epoch": 3.5917401465014436,
      "grad_norm": 0.2534371018409729,
      "learning_rate": 2.835078378787979e-06,
      "loss": 0.0142,
      "step": 2194740
    },
    {
      "epoch": 3.5917728769400967,
      "grad_norm": 0.16529187560081482,
      "learning_rate": 2.8350124865744617e-06,
      "loss": 0.0069,
      "step": 2194760
    },
    {
      "epoch": 3.59180560737875,
      "grad_norm": 0.12765587866306305,
      "learning_rate": 2.8349465943609444e-06,
      "loss": 0.0115,
      "step": 2194780
    },
    {
      "epoch": 3.5918383378174035,
      "grad_norm": 0.03364478424191475,
      "learning_rate": 2.834880702147427e-06,
      "loss": 0.0089,
      "step": 2194800
    },
    {
      "epoch": 3.5918710682560566,
      "grad_norm": 0.22377321124076843,
      "learning_rate": 2.83481480993391e-06,
      "loss": 0.0081,
      "step": 2194820
    },
    {
      "epoch": 3.59190379869471,
      "grad_norm": 0.18106350302696228,
      "learning_rate": 2.834748917720393e-06,
      "loss": 0.0102,
      "step": 2194840
    },
    {
      "epoch": 3.5919365291333634,
      "grad_norm": 0.2723633944988251,
      "learning_rate": 2.8346830255068763e-06,
      "loss": 0.011,
      "step": 2194860
    },
    {
      "epoch": 3.591969259572017,
      "grad_norm": 0.13118478655815125,
      "learning_rate": 2.834617133293359e-06,
      "loss": 0.0085,
      "step": 2194880
    },
    {
      "epoch": 3.59200199001067,
      "grad_norm": 0.10070107132196426,
      "learning_rate": 2.834551241079842e-06,
      "loss": 0.0133,
      "step": 2194900
    },
    {
      "epoch": 3.5920347204493233,
      "grad_norm": 0.17677010595798492,
      "learning_rate": 2.834485348866325e-06,
      "loss": 0.0099,
      "step": 2194920
    },
    {
      "epoch": 3.592067450887977,
      "grad_norm": 0.2180345207452774,
      "learning_rate": 2.8344194566528076e-06,
      "loss": 0.0077,
      "step": 2194940
    },
    {
      "epoch": 3.59210018132663,
      "grad_norm": 0.14389009773731232,
      "learning_rate": 2.8343535644392904e-06,
      "loss": 0.0125,
      "step": 2194960
    },
    {
      "epoch": 3.5921329117652836,
      "grad_norm": 0.38989436626434326,
      "learning_rate": 2.8342876722257735e-06,
      "loss": 0.0117,
      "step": 2194980
    },
    {
      "epoch": 3.5921656422039367,
      "grad_norm": 0.554771900177002,
      "learning_rate": 2.8342217800122563e-06,
      "loss": 0.0094,
      "step": 2195000
    },
    {
      "epoch": 3.5921983726425903,
      "grad_norm": 0.44680166244506836,
      "learning_rate": 2.834155887798739e-06,
      "loss": 0.0152,
      "step": 2195020
    },
    {
      "epoch": 3.5922311030812435,
      "grad_norm": 0.1037718877196312,
      "learning_rate": 2.8340899955852218e-06,
      "loss": 0.0124,
      "step": 2195040
    },
    {
      "epoch": 3.5922638335198966,
      "grad_norm": 0.16041219234466553,
      "learning_rate": 2.834024103371705e-06,
      "loss": 0.0149,
      "step": 2195060
    },
    {
      "epoch": 3.5922965639585502,
      "grad_norm": 0.3149453103542328,
      "learning_rate": 2.8339582111581877e-06,
      "loss": 0.0101,
      "step": 2195080
    },
    {
      "epoch": 3.5923292943972034,
      "grad_norm": 0.33731433749198914,
      "learning_rate": 2.8338923189446704e-06,
      "loss": 0.017,
      "step": 2195100
    },
    {
      "epoch": 3.592362024835857,
      "grad_norm": 0.4399418234825134,
      "learning_rate": 2.833826426731153e-06,
      "loss": 0.0084,
      "step": 2195120
    },
    {
      "epoch": 3.59239475527451,
      "grad_norm": 0.1649937629699707,
      "learning_rate": 2.8337605345176363e-06,
      "loss": 0.0153,
      "step": 2195140
    },
    {
      "epoch": 3.5924274857131637,
      "grad_norm": 0.22744198143482208,
      "learning_rate": 2.833694642304119e-06,
      "loss": 0.0104,
      "step": 2195160
    },
    {
      "epoch": 3.592460216151817,
      "grad_norm": 0.40644359588623047,
      "learning_rate": 2.8336287500906018e-06,
      "loss": 0.0097,
      "step": 2195180
    },
    {
      "epoch": 3.59249294659047,
      "grad_norm": 0.2516387104988098,
      "learning_rate": 2.8335628578770845e-06,
      "loss": 0.0133,
      "step": 2195200
    },
    {
      "epoch": 3.5925256770291236,
      "grad_norm": 0.3744683563709259,
      "learning_rate": 2.833496965663568e-06,
      "loss": 0.0106,
      "step": 2195220
    },
    {
      "epoch": 3.5925584074677768,
      "grad_norm": 0.07295520603656769,
      "learning_rate": 2.833431073450051e-06,
      "loss": 0.0073,
      "step": 2195240
    },
    {
      "epoch": 3.5925911379064304,
      "grad_norm": 0.12793034315109253,
      "learning_rate": 2.8333651812365336e-06,
      "loss": 0.0066,
      "step": 2195260
    },
    {
      "epoch": 3.5926238683450835,
      "grad_norm": 0.11975975334644318,
      "learning_rate": 2.8332992890230168e-06,
      "loss": 0.0162,
      "step": 2195280
    },
    {
      "epoch": 3.592656598783737,
      "grad_norm": 0.12829917669296265,
      "learning_rate": 2.8332333968094995e-06,
      "loss": 0.0085,
      "step": 2195300
    },
    {
      "epoch": 3.5926893292223903,
      "grad_norm": 0.299096018075943,
      "learning_rate": 2.8331675045959822e-06,
      "loss": 0.0135,
      "step": 2195320
    },
    {
      "epoch": 3.5927220596610434,
      "grad_norm": 0.3024820387363434,
      "learning_rate": 2.833101612382465e-06,
      "loss": 0.0089,
      "step": 2195340
    },
    {
      "epoch": 3.592754790099697,
      "grad_norm": 0.21190573275089264,
      "learning_rate": 2.8330357201689477e-06,
      "loss": 0.013,
      "step": 2195360
    },
    {
      "epoch": 3.59278752053835,
      "grad_norm": 0.2780575752258301,
      "learning_rate": 2.832969827955431e-06,
      "loss": 0.0098,
      "step": 2195380
    },
    {
      "epoch": 3.5928202509770037,
      "grad_norm": 0.196194589138031,
      "learning_rate": 2.8329039357419136e-06,
      "loss": 0.0098,
      "step": 2195400
    },
    {
      "epoch": 3.592852981415657,
      "grad_norm": 0.29251953959465027,
      "learning_rate": 2.8328380435283964e-06,
      "loss": 0.0121,
      "step": 2195420
    },
    {
      "epoch": 3.5928857118543105,
      "grad_norm": 0.040375784039497375,
      "learning_rate": 2.832772151314879e-06,
      "loss": 0.0129,
      "step": 2195440
    },
    {
      "epoch": 3.5929184422929636,
      "grad_norm": 0.19585020840168,
      "learning_rate": 2.8327062591013623e-06,
      "loss": 0.0117,
      "step": 2195460
    },
    {
      "epoch": 3.592951172731617,
      "grad_norm": 0.1845606118440628,
      "learning_rate": 2.832640366887845e-06,
      "loss": 0.0111,
      "step": 2195480
    },
    {
      "epoch": 3.5929839031702704,
      "grad_norm": 0.1607048213481903,
      "learning_rate": 2.8325744746743277e-06,
      "loss": 0.0147,
      "step": 2195500
    },
    {
      "epoch": 3.5930166336089235,
      "grad_norm": 0.34111282229423523,
      "learning_rate": 2.8325085824608105e-06,
      "loss": 0.0094,
      "step": 2195520
    },
    {
      "epoch": 3.5930493640475767,
      "grad_norm": 0.2586503326892853,
      "learning_rate": 2.8324426902472936e-06,
      "loss": 0.01,
      "step": 2195540
    },
    {
      "epoch": 3.5930820944862303,
      "grad_norm": 0.08202412724494934,
      "learning_rate": 2.832376798033777e-06,
      "loss": 0.0116,
      "step": 2195560
    },
    {
      "epoch": 3.593114824924884,
      "grad_norm": 0.07999575883150101,
      "learning_rate": 2.8323109058202595e-06,
      "loss": 0.01,
      "step": 2195580
    },
    {
      "epoch": 3.593147555363537,
      "grad_norm": 0.4102114737033844,
      "learning_rate": 2.8322450136067427e-06,
      "loss": 0.0111,
      "step": 2195600
    },
    {
      "epoch": 3.59318028580219,
      "grad_norm": 0.5757094025611877,
      "learning_rate": 2.8321791213932254e-06,
      "loss": 0.0101,
      "step": 2195620
    },
    {
      "epoch": 3.5932130162408438,
      "grad_norm": 0.3499809503555298,
      "learning_rate": 2.832113229179708e-06,
      "loss": 0.0079,
      "step": 2195640
    },
    {
      "epoch": 3.593245746679497,
      "grad_norm": 0.2621783912181854,
      "learning_rate": 2.832047336966191e-06,
      "loss": 0.0095,
      "step": 2195660
    },
    {
      "epoch": 3.59327847711815,
      "grad_norm": 0.08170180022716522,
      "learning_rate": 2.831981444752674e-06,
      "loss": 0.0099,
      "step": 2195680
    },
    {
      "epoch": 3.5933112075568037,
      "grad_norm": 0.24998129904270172,
      "learning_rate": 2.831915552539157e-06,
      "loss": 0.0101,
      "step": 2195700
    },
    {
      "epoch": 3.5933439379954573,
      "grad_norm": 0.19744467735290527,
      "learning_rate": 2.8318496603256396e-06,
      "loss": 0.0125,
      "step": 2195720
    },
    {
      "epoch": 3.5933766684341104,
      "grad_norm": 0.3410215377807617,
      "learning_rate": 2.8317837681121223e-06,
      "loss": 0.0098,
      "step": 2195740
    },
    {
      "epoch": 3.5934093988727636,
      "grad_norm": 0.22102303802967072,
      "learning_rate": 2.831717875898605e-06,
      "loss": 0.0088,
      "step": 2195760
    },
    {
      "epoch": 3.593442129311417,
      "grad_norm": 0.09764900803565979,
      "learning_rate": 2.831651983685088e-06,
      "loss": 0.0128,
      "step": 2195780
    },
    {
      "epoch": 3.5934748597500703,
      "grad_norm": 1.1826132535934448,
      "learning_rate": 2.831586091471571e-06,
      "loss": 0.0157,
      "step": 2195800
    },
    {
      "epoch": 3.5935075901887235,
      "grad_norm": 0.15404731035232544,
      "learning_rate": 2.8315201992580537e-06,
      "loss": 0.0092,
      "step": 2195820
    },
    {
      "epoch": 3.593540320627377,
      "grad_norm": 0.2649288773536682,
      "learning_rate": 2.8314543070445364e-06,
      "loss": 0.0105,
      "step": 2195840
    },
    {
      "epoch": 3.5935730510660306,
      "grad_norm": 0.20819894969463348,
      "learning_rate": 2.8313884148310196e-06,
      "loss": 0.0172,
      "step": 2195860
    },
    {
      "epoch": 3.593605781504684,
      "grad_norm": 0.07351846992969513,
      "learning_rate": 2.8313225226175023e-06,
      "loss": 0.0102,
      "step": 2195880
    },
    {
      "epoch": 3.593638511943337,
      "grad_norm": 0.30545148253440857,
      "learning_rate": 2.831256630403985e-06,
      "loss": 0.0161,
      "step": 2195900
    },
    {
      "epoch": 3.5936712423819905,
      "grad_norm": 0.05096346139907837,
      "learning_rate": 2.8311907381904687e-06,
      "loss": 0.0098,
      "step": 2195920
    },
    {
      "epoch": 3.5937039728206437,
      "grad_norm": 0.8148896098136902,
      "learning_rate": 2.8311248459769514e-06,
      "loss": 0.013,
      "step": 2195940
    },
    {
      "epoch": 3.593736703259297,
      "grad_norm": 0.10247696191072464,
      "learning_rate": 2.831058953763434e-06,
      "loss": 0.0106,
      "step": 2195960
    },
    {
      "epoch": 3.5937694336979504,
      "grad_norm": 0.29372844099998474,
      "learning_rate": 2.830993061549917e-06,
      "loss": 0.0105,
      "step": 2195980
    },
    {
      "epoch": 3.593802164136604,
      "grad_norm": 1.331902027130127,
      "learning_rate": 2.8309271693364e-06,
      "loss": 0.009,
      "step": 2196000
    },
    {
      "epoch": 3.593834894575257,
      "grad_norm": 0.21099749207496643,
      "learning_rate": 2.8308612771228828e-06,
      "loss": 0.011,
      "step": 2196020
    },
    {
      "epoch": 3.5938676250139103,
      "grad_norm": 0.9063189029693604,
      "learning_rate": 2.8307953849093655e-06,
      "loss": 0.0098,
      "step": 2196040
    },
    {
      "epoch": 3.593900355452564,
      "grad_norm": 0.3815245032310486,
      "learning_rate": 2.8307294926958483e-06,
      "loss": 0.0133,
      "step": 2196060
    },
    {
      "epoch": 3.593933085891217,
      "grad_norm": 0.19660590589046478,
      "learning_rate": 2.8306636004823314e-06,
      "loss": 0.0099,
      "step": 2196080
    },
    {
      "epoch": 3.59396581632987,
      "grad_norm": 0.29287609457969666,
      "learning_rate": 2.830597708268814e-06,
      "loss": 0.012,
      "step": 2196100
    },
    {
      "epoch": 3.593998546768524,
      "grad_norm": 0.1828170269727707,
      "learning_rate": 2.830531816055297e-06,
      "loss": 0.0102,
      "step": 2196120
    },
    {
      "epoch": 3.594031277207177,
      "grad_norm": 0.42684119939804077,
      "learning_rate": 2.8304659238417796e-06,
      "loss": 0.0104,
      "step": 2196140
    },
    {
      "epoch": 3.5940640076458306,
      "grad_norm": 0.3534112870693207,
      "learning_rate": 2.830400031628263e-06,
      "loss": 0.0132,
      "step": 2196160
    },
    {
      "epoch": 3.5940967380844837,
      "grad_norm": 0.7041259407997131,
      "learning_rate": 2.8303341394147455e-06,
      "loss": 0.0132,
      "step": 2196180
    },
    {
      "epoch": 3.5941294685231373,
      "grad_norm": 0.2800990045070648,
      "learning_rate": 2.8302682472012283e-06,
      "loss": 0.0164,
      "step": 2196200
    },
    {
      "epoch": 3.5941621989617905,
      "grad_norm": 0.5963242650032043,
      "learning_rate": 2.830202354987711e-06,
      "loss": 0.0132,
      "step": 2196220
    },
    {
      "epoch": 3.5941949294004436,
      "grad_norm": 0.23160764575004578,
      "learning_rate": 2.8301364627741938e-06,
      "loss": 0.0094,
      "step": 2196240
    },
    {
      "epoch": 3.594227659839097,
      "grad_norm": 0.4475628137588501,
      "learning_rate": 2.830070570560677e-06,
      "loss": 0.0125,
      "step": 2196260
    },
    {
      "epoch": 3.5942603902777503,
      "grad_norm": 0.13809844851493835,
      "learning_rate": 2.83000467834716e-06,
      "loss": 0.0086,
      "step": 2196280
    },
    {
      "epoch": 3.594293120716404,
      "grad_norm": 0.6336200833320618,
      "learning_rate": 2.829938786133643e-06,
      "loss": 0.0097,
      "step": 2196300
    },
    {
      "epoch": 3.594325851155057,
      "grad_norm": 0.5788704752922058,
      "learning_rate": 2.829872893920126e-06,
      "loss": 0.0098,
      "step": 2196320
    },
    {
      "epoch": 3.5943585815937107,
      "grad_norm": 0.7427761554718018,
      "learning_rate": 2.8298070017066087e-06,
      "loss": 0.0137,
      "step": 2196340
    },
    {
      "epoch": 3.594391312032364,
      "grad_norm": 0.2764757573604584,
      "learning_rate": 2.8297411094930915e-06,
      "loss": 0.0171,
      "step": 2196360
    },
    {
      "epoch": 3.594424042471017,
      "grad_norm": 0.24693290889263153,
      "learning_rate": 2.8296752172795742e-06,
      "loss": 0.0107,
      "step": 2196380
    },
    {
      "epoch": 3.5944567729096706,
      "grad_norm": 0.4568334221839905,
      "learning_rate": 2.8296093250660574e-06,
      "loss": 0.0082,
      "step": 2196400
    },
    {
      "epoch": 3.5944895033483237,
      "grad_norm": 0.31360384821891785,
      "learning_rate": 2.82954343285254e-06,
      "loss": 0.0124,
      "step": 2196420
    },
    {
      "epoch": 3.5945222337869773,
      "grad_norm": 0.10437754541635513,
      "learning_rate": 2.829477540639023e-06,
      "loss": 0.01,
      "step": 2196440
    },
    {
      "epoch": 3.5945549642256305,
      "grad_norm": 0.6027533411979675,
      "learning_rate": 2.8294116484255056e-06,
      "loss": 0.01,
      "step": 2196460
    },
    {
      "epoch": 3.594587694664284,
      "grad_norm": 0.6880001425743103,
      "learning_rate": 2.8293457562119888e-06,
      "loss": 0.0106,
      "step": 2196480
    },
    {
      "epoch": 3.5946204251029372,
      "grad_norm": 0.464976966381073,
      "learning_rate": 2.8292798639984715e-06,
      "loss": 0.0078,
      "step": 2196500
    },
    {
      "epoch": 3.5946531555415904,
      "grad_norm": 0.2731454074382782,
      "learning_rate": 2.8292139717849542e-06,
      "loss": 0.0127,
      "step": 2196520
    },
    {
      "epoch": 3.594685885980244,
      "grad_norm": 0.6395130157470703,
      "learning_rate": 2.829148079571437e-06,
      "loss": 0.0131,
      "step": 2196540
    },
    {
      "epoch": 3.594718616418897,
      "grad_norm": 0.2337932139635086,
      "learning_rate": 2.82908218735792e-06,
      "loss": 0.01,
      "step": 2196560
    },
    {
      "epoch": 3.5947513468575507,
      "grad_norm": 0.1926243007183075,
      "learning_rate": 2.829016295144403e-06,
      "loss": 0.0108,
      "step": 2196580
    },
    {
      "epoch": 3.594784077296204,
      "grad_norm": 0.17203432321548462,
      "learning_rate": 2.8289504029308856e-06,
      "loss": 0.0131,
      "step": 2196600
    },
    {
      "epoch": 3.5948168077348575,
      "grad_norm": 0.22242030501365662,
      "learning_rate": 2.828884510717369e-06,
      "loss": 0.0101,
      "step": 2196620
    },
    {
      "epoch": 3.5948495381735106,
      "grad_norm": 0.7574629783630371,
      "learning_rate": 2.828818618503852e-06,
      "loss": 0.0092,
      "step": 2196640
    },
    {
      "epoch": 3.5948822686121638,
      "grad_norm": 0.33267101645469666,
      "learning_rate": 2.8287527262903347e-06,
      "loss": 0.0078,
      "step": 2196660
    },
    {
      "epoch": 3.5949149990508174,
      "grad_norm": 0.27502262592315674,
      "learning_rate": 2.8286868340768174e-06,
      "loss": 0.0171,
      "step": 2196680
    },
    {
      "epoch": 3.5949477294894705,
      "grad_norm": 0.42351698875427246,
      "learning_rate": 2.8286209418633006e-06,
      "loss": 0.0096,
      "step": 2196700
    },
    {
      "epoch": 3.594980459928124,
      "grad_norm": 0.35562795400619507,
      "learning_rate": 2.8285550496497833e-06,
      "loss": 0.008,
      "step": 2196720
    },
    {
      "epoch": 3.5950131903667772,
      "grad_norm": 0.21018856763839722,
      "learning_rate": 2.828489157436266e-06,
      "loss": 0.0131,
      "step": 2196740
    },
    {
      "epoch": 3.595045920805431,
      "grad_norm": 0.34850165247917175,
      "learning_rate": 2.828423265222749e-06,
      "loss": 0.0112,
      "step": 2196760
    },
    {
      "epoch": 3.595078651244084,
      "grad_norm": 0.35373827815055847,
      "learning_rate": 2.8283573730092315e-06,
      "loss": 0.0082,
      "step": 2196780
    },
    {
      "epoch": 3.595111381682737,
      "grad_norm": 0.2790676951408386,
      "learning_rate": 2.8282914807957147e-06,
      "loss": 0.0098,
      "step": 2196800
    },
    {
      "epoch": 3.5951441121213907,
      "grad_norm": 0.3361588418483734,
      "learning_rate": 2.8282255885821975e-06,
      "loss": 0.013,
      "step": 2196820
    },
    {
      "epoch": 3.595176842560044,
      "grad_norm": 0.18242423236370087,
      "learning_rate": 2.82815969636868e-06,
      "loss": 0.0127,
      "step": 2196840
    },
    {
      "epoch": 3.5952095729986975,
      "grad_norm": 0.11369232088327408,
      "learning_rate": 2.828093804155163e-06,
      "loss": 0.0154,
      "step": 2196860
    },
    {
      "epoch": 3.5952423034373506,
      "grad_norm": 0.2661776542663574,
      "learning_rate": 2.828027911941646e-06,
      "loss": 0.0121,
      "step": 2196880
    },
    {
      "epoch": 3.5952750338760042,
      "grad_norm": 0.13224931061267853,
      "learning_rate": 2.827962019728129e-06,
      "loss": 0.0154,
      "step": 2196900
    },
    {
      "epoch": 3.5953077643146574,
      "grad_norm": 0.07717122882604599,
      "learning_rate": 2.8278961275146116e-06,
      "loss": 0.0171,
      "step": 2196920
    },
    {
      "epoch": 3.5953404947533105,
      "grad_norm": 0.4462997615337372,
      "learning_rate": 2.8278302353010943e-06,
      "loss": 0.0121,
      "step": 2196940
    },
    {
      "epoch": 3.595373225191964,
      "grad_norm": 0.2504083514213562,
      "learning_rate": 2.8277643430875775e-06,
      "loss": 0.0137,
      "step": 2196960
    },
    {
      "epoch": 3.5954059556306173,
      "grad_norm": 0.2930670976638794,
      "learning_rate": 2.8276984508740606e-06,
      "loss": 0.0156,
      "step": 2196980
    },
    {
      "epoch": 3.595438686069271,
      "grad_norm": 0.2567538022994995,
      "learning_rate": 2.8276325586605434e-06,
      "loss": 0.0106,
      "step": 2197000
    },
    {
      "epoch": 3.595471416507924,
      "grad_norm": 0.22325025498867035,
      "learning_rate": 2.8275666664470265e-06,
      "loss": 0.0121,
      "step": 2197020
    },
    {
      "epoch": 3.5955041469465776,
      "grad_norm": 0.26402780413627625,
      "learning_rate": 2.8275007742335093e-06,
      "loss": 0.0125,
      "step": 2197040
    },
    {
      "epoch": 3.5955368773852308,
      "grad_norm": 0.29421788454055786,
      "learning_rate": 2.827434882019992e-06,
      "loss": 0.0097,
      "step": 2197060
    },
    {
      "epoch": 3.595569607823884,
      "grad_norm": 0.3294728994369507,
      "learning_rate": 2.8273689898064748e-06,
      "loss": 0.0128,
      "step": 2197080
    },
    {
      "epoch": 3.5956023382625375,
      "grad_norm": 0.23599842190742493,
      "learning_rate": 2.827303097592958e-06,
      "loss": 0.0101,
      "step": 2197100
    },
    {
      "epoch": 3.5956350687011907,
      "grad_norm": 0.36880943179130554,
      "learning_rate": 2.8272372053794407e-06,
      "loss": 0.0096,
      "step": 2197120
    },
    {
      "epoch": 3.595667799139844,
      "grad_norm": 0.3964158296585083,
      "learning_rate": 2.8271713131659234e-06,
      "loss": 0.0093,
      "step": 2197140
    },
    {
      "epoch": 3.5957005295784974,
      "grad_norm": 0.21850831806659698,
      "learning_rate": 2.827105420952406e-06,
      "loss": 0.0124,
      "step": 2197160
    },
    {
      "epoch": 3.595733260017151,
      "grad_norm": 0.4300575852394104,
      "learning_rate": 2.827039528738889e-06,
      "loss": 0.0121,
      "step": 2197180
    },
    {
      "epoch": 3.595765990455804,
      "grad_norm": 0.3715741038322449,
      "learning_rate": 2.826973636525372e-06,
      "loss": 0.0132,
      "step": 2197200
    },
    {
      "epoch": 3.5957987208944573,
      "grad_norm": 0.16653980314731598,
      "learning_rate": 2.826907744311855e-06,
      "loss": 0.0095,
      "step": 2197220
    },
    {
      "epoch": 3.595831451333111,
      "grad_norm": 0.2877510190010071,
      "learning_rate": 2.8268418520983375e-06,
      "loss": 0.0128,
      "step": 2197240
    },
    {
      "epoch": 3.595864181771764,
      "grad_norm": 0.12351125478744507,
      "learning_rate": 2.8267759598848203e-06,
      "loss": 0.013,
      "step": 2197260
    },
    {
      "epoch": 3.595896912210417,
      "grad_norm": 0.49063020944595337,
      "learning_rate": 2.8267100676713034e-06,
      "loss": 0.0178,
      "step": 2197280
    },
    {
      "epoch": 3.595929642649071,
      "grad_norm": 0.10856076329946518,
      "learning_rate": 2.826644175457786e-06,
      "loss": 0.0076,
      "step": 2197300
    },
    {
      "epoch": 3.5959623730877244,
      "grad_norm": 0.33431175351142883,
      "learning_rate": 2.8265782832442693e-06,
      "loss": 0.0137,
      "step": 2197320
    },
    {
      "epoch": 3.5959951035263775,
      "grad_norm": 0.33037394285202026,
      "learning_rate": 2.8265123910307525e-06,
      "loss": 0.0106,
      "step": 2197340
    },
    {
      "epoch": 3.5960278339650307,
      "grad_norm": 0.12233187258243561,
      "learning_rate": 2.8264464988172352e-06,
      "loss": 0.0151,
      "step": 2197360
    },
    {
      "epoch": 3.5960605644036843,
      "grad_norm": 0.06632516533136368,
      "learning_rate": 2.826380606603718e-06,
      "loss": 0.0099,
      "step": 2197380
    },
    {
      "epoch": 3.5960932948423374,
      "grad_norm": 0.19700667262077332,
      "learning_rate": 2.8263147143902007e-06,
      "loss": 0.0151,
      "step": 2197400
    },
    {
      "epoch": 3.5961260252809906,
      "grad_norm": 1.0177249908447266,
      "learning_rate": 2.826248822176684e-06,
      "loss": 0.0135,
      "step": 2197420
    },
    {
      "epoch": 3.596158755719644,
      "grad_norm": 0.44707587361335754,
      "learning_rate": 2.8261829299631666e-06,
      "loss": 0.0115,
      "step": 2197440
    },
    {
      "epoch": 3.5961914861582978,
      "grad_norm": 0.7169890999794006,
      "learning_rate": 2.8261170377496494e-06,
      "loss": 0.0163,
      "step": 2197460
    },
    {
      "epoch": 3.596224216596951,
      "grad_norm": 0.27500462532043457,
      "learning_rate": 2.826051145536132e-06,
      "loss": 0.0093,
      "step": 2197480
    },
    {
      "epoch": 3.596256947035604,
      "grad_norm": 0.25994226336479187,
      "learning_rate": 2.8259852533226153e-06,
      "loss": 0.0099,
      "step": 2197500
    },
    {
      "epoch": 3.5962896774742577,
      "grad_norm": 0.3711382746696472,
      "learning_rate": 2.825919361109098e-06,
      "loss": 0.0064,
      "step": 2197520
    },
    {
      "epoch": 3.596322407912911,
      "grad_norm": 0.2709798216819763,
      "learning_rate": 2.8258534688955807e-06,
      "loss": 0.0085,
      "step": 2197540
    },
    {
      "epoch": 3.596355138351564,
      "grad_norm": 0.3528410494327545,
      "learning_rate": 2.8257875766820635e-06,
      "loss": 0.0182,
      "step": 2197560
    },
    {
      "epoch": 3.5963878687902175,
      "grad_norm": 0.054610952734947205,
      "learning_rate": 2.8257216844685466e-06,
      "loss": 0.0112,
      "step": 2197580
    },
    {
      "epoch": 3.596420599228871,
      "grad_norm": 0.2869241237640381,
      "learning_rate": 2.8256557922550294e-06,
      "loss": 0.0091,
      "step": 2197600
    },
    {
      "epoch": 3.5964533296675243,
      "grad_norm": 0.4232754111289978,
      "learning_rate": 2.825589900041512e-06,
      "loss": 0.008,
      "step": 2197620
    },
    {
      "epoch": 3.5964860601061774,
      "grad_norm": 0.2378527820110321,
      "learning_rate": 2.825524007827995e-06,
      "loss": 0.0104,
      "step": 2197640
    },
    {
      "epoch": 3.596518790544831,
      "grad_norm": 0.11159606277942657,
      "learning_rate": 2.8254581156144776e-06,
      "loss": 0.0098,
      "step": 2197660
    },
    {
      "epoch": 3.596551520983484,
      "grad_norm": 0.2820369303226471,
      "learning_rate": 2.825392223400961e-06,
      "loss": 0.0113,
      "step": 2197680
    },
    {
      "epoch": 3.5965842514221373,
      "grad_norm": 0.1666848361492157,
      "learning_rate": 2.825326331187444e-06,
      "loss": 0.0119,
      "step": 2197700
    },
    {
      "epoch": 3.596616981860791,
      "grad_norm": 0.2196037322282791,
      "learning_rate": 2.8252604389739267e-06,
      "loss": 0.0126,
      "step": 2197720
    },
    {
      "epoch": 3.596649712299444,
      "grad_norm": 0.21586306393146515,
      "learning_rate": 2.82519454676041e-06,
      "loss": 0.0133,
      "step": 2197740
    },
    {
      "epoch": 3.5966824427380977,
      "grad_norm": 0.12186285108327866,
      "learning_rate": 2.8251286545468926e-06,
      "loss": 0.0129,
      "step": 2197760
    },
    {
      "epoch": 3.596715173176751,
      "grad_norm": 0.3082873821258545,
      "learning_rate": 2.8250627623333753e-06,
      "loss": 0.0119,
      "step": 2197780
    },
    {
      "epoch": 3.5967479036154044,
      "grad_norm": 0.10521536320447922,
      "learning_rate": 2.824996870119858e-06,
      "loss": 0.0149,
      "step": 2197800
    },
    {
      "epoch": 3.5967806340540576,
      "grad_norm": 0.23615887761116028,
      "learning_rate": 2.8249309779063412e-06,
      "loss": 0.0156,
      "step": 2197820
    },
    {
      "epoch": 3.5968133644927107,
      "grad_norm": 0.07878083735704422,
      "learning_rate": 2.824865085692824e-06,
      "loss": 0.0139,
      "step": 2197840
    },
    {
      "epoch": 3.5968460949313643,
      "grad_norm": 1.1676470041275024,
      "learning_rate": 2.8247991934793067e-06,
      "loss": 0.0094,
      "step": 2197860
    },
    {
      "epoch": 3.5968788253700175,
      "grad_norm": 0.2776505947113037,
      "learning_rate": 2.8247333012657894e-06,
      "loss": 0.0163,
      "step": 2197880
    },
    {
      "epoch": 3.596911555808671,
      "grad_norm": 0.26518121361732483,
      "learning_rate": 2.8246674090522726e-06,
      "loss": 0.0111,
      "step": 2197900
    },
    {
      "epoch": 3.596944286247324,
      "grad_norm": 0.15581075847148895,
      "learning_rate": 2.8246015168387553e-06,
      "loss": 0.0061,
      "step": 2197920
    },
    {
      "epoch": 3.596977016685978,
      "grad_norm": 0.11419892311096191,
      "learning_rate": 2.824535624625238e-06,
      "loss": 0.0124,
      "step": 2197940
    },
    {
      "epoch": 3.597009747124631,
      "grad_norm": 0.25820598006248474,
      "learning_rate": 2.824469732411721e-06,
      "loss": 0.0141,
      "step": 2197960
    },
    {
      "epoch": 3.597042477563284,
      "grad_norm": 0.3360033929347992,
      "learning_rate": 2.824403840198204e-06,
      "loss": 0.0072,
      "step": 2197980
    },
    {
      "epoch": 3.5970752080019377,
      "grad_norm": 0.482918381690979,
      "learning_rate": 2.8243379479846867e-06,
      "loss": 0.0121,
      "step": 2198000
    },
    {
      "epoch": 3.597107938440591,
      "grad_norm": 0.23655565083026886,
      "learning_rate": 2.82427205577117e-06,
      "loss": 0.0085,
      "step": 2198020
    },
    {
      "epoch": 3.5971406688792444,
      "grad_norm": 0.42102518677711487,
      "learning_rate": 2.824206163557653e-06,
      "loss": 0.0129,
      "step": 2198040
    },
    {
      "epoch": 3.5971733993178976,
      "grad_norm": 0.12976022064685822,
      "learning_rate": 2.8241402713441358e-06,
      "loss": 0.0119,
      "step": 2198060
    },
    {
      "epoch": 3.597206129756551,
      "grad_norm": 0.22324778139591217,
      "learning_rate": 2.8240743791306185e-06,
      "loss": 0.0079,
      "step": 2198080
    },
    {
      "epoch": 3.5972388601952043,
      "grad_norm": 0.1722177416086197,
      "learning_rate": 2.8240084869171013e-06,
      "loss": 0.0098,
      "step": 2198100
    },
    {
      "epoch": 3.5972715906338575,
      "grad_norm": 0.3213047981262207,
      "learning_rate": 2.8239425947035844e-06,
      "loss": 0.0133,
      "step": 2198120
    },
    {
      "epoch": 3.597304321072511,
      "grad_norm": 0.356749027967453,
      "learning_rate": 2.823876702490067e-06,
      "loss": 0.0116,
      "step": 2198140
    },
    {
      "epoch": 3.5973370515111642,
      "grad_norm": 0.12412289530038834,
      "learning_rate": 2.82381081027655e-06,
      "loss": 0.0061,
      "step": 2198160
    },
    {
      "epoch": 3.597369781949818,
      "grad_norm": 0.6094200611114502,
      "learning_rate": 2.8237449180630326e-06,
      "loss": 0.0144,
      "step": 2198180
    },
    {
      "epoch": 3.597402512388471,
      "grad_norm": 1.3723654747009277,
      "learning_rate": 2.8236790258495154e-06,
      "loss": 0.0207,
      "step": 2198200
    },
    {
      "epoch": 3.5974352428271246,
      "grad_norm": 0.17491503059864044,
      "learning_rate": 2.8236131336359986e-06,
      "loss": 0.0129,
      "step": 2198220
    },
    {
      "epoch": 3.5974679732657777,
      "grad_norm": 0.14295238256454468,
      "learning_rate": 2.8235472414224813e-06,
      "loss": 0.0124,
      "step": 2198240
    },
    {
      "epoch": 3.597500703704431,
      "grad_norm": 1.0029258728027344,
      "learning_rate": 2.823481349208964e-06,
      "loss": 0.017,
      "step": 2198260
    },
    {
      "epoch": 3.5975334341430845,
      "grad_norm": 0.2776698172092438,
      "learning_rate": 2.8234154569954468e-06,
      "loss": 0.0161,
      "step": 2198280
    },
    {
      "epoch": 3.5975661645817376,
      "grad_norm": 0.08775728940963745,
      "learning_rate": 2.82334956478193e-06,
      "loss": 0.008,
      "step": 2198300
    },
    {
      "epoch": 3.597598895020391,
      "grad_norm": 0.3883960247039795,
      "learning_rate": 2.8232836725684127e-06,
      "loss": 0.0144,
      "step": 2198320
    },
    {
      "epoch": 3.5976316254590444,
      "grad_norm": 0.47338274121284485,
      "learning_rate": 2.8232177803548954e-06,
      "loss": 0.0109,
      "step": 2198340
    },
    {
      "epoch": 3.597664355897698,
      "grad_norm": 0.4668958783149719,
      "learning_rate": 2.823151888141378e-06,
      "loss": 0.0146,
      "step": 2198360
    },
    {
      "epoch": 3.597697086336351,
      "grad_norm": 0.41564419865608215,
      "learning_rate": 2.8230859959278617e-06,
      "loss": 0.0123,
      "step": 2198380
    },
    {
      "epoch": 3.5977298167750043,
      "grad_norm": 0.3867226541042328,
      "learning_rate": 2.8230201037143445e-06,
      "loss": 0.0104,
      "step": 2198400
    },
    {
      "epoch": 3.597762547213658,
      "grad_norm": 0.17199715971946716,
      "learning_rate": 2.8229542115008272e-06,
      "loss": 0.0089,
      "step": 2198420
    },
    {
      "epoch": 3.597795277652311,
      "grad_norm": 0.20908865332603455,
      "learning_rate": 2.8228883192873104e-06,
      "loss": 0.0145,
      "step": 2198440
    },
    {
      "epoch": 3.5978280080909646,
      "grad_norm": 0.06246268004179001,
      "learning_rate": 2.822822427073793e-06,
      "loss": 0.01,
      "step": 2198460
    },
    {
      "epoch": 3.5978607385296177,
      "grad_norm": 0.825989842414856,
      "learning_rate": 2.822756534860276e-06,
      "loss": 0.013,
      "step": 2198480
    },
    {
      "epoch": 3.5978934689682713,
      "grad_norm": 0.056009627878665924,
      "learning_rate": 2.8226906426467586e-06,
      "loss": 0.0144,
      "step": 2198500
    },
    {
      "epoch": 3.5979261994069245,
      "grad_norm": 0.4526788592338562,
      "learning_rate": 2.8226247504332418e-06,
      "loss": 0.011,
      "step": 2198520
    },
    {
      "epoch": 3.5979589298455776,
      "grad_norm": 0.6752966046333313,
      "learning_rate": 2.8225588582197245e-06,
      "loss": 0.0135,
      "step": 2198540
    },
    {
      "epoch": 3.5979916602842312,
      "grad_norm": 1.1432288885116577,
      "learning_rate": 2.8224929660062072e-06,
      "loss": 0.0098,
      "step": 2198560
    },
    {
      "epoch": 3.5980243907228844,
      "grad_norm": 0.08946965634822845,
      "learning_rate": 2.82242707379269e-06,
      "loss": 0.0074,
      "step": 2198580
    },
    {
      "epoch": 3.5980571211615375,
      "grad_norm": 0.4265124201774597,
      "learning_rate": 2.822361181579173e-06,
      "loss": 0.011,
      "step": 2198600
    },
    {
      "epoch": 3.598089851600191,
      "grad_norm": 0.20002399384975433,
      "learning_rate": 2.822295289365656e-06,
      "loss": 0.0123,
      "step": 2198620
    },
    {
      "epoch": 3.5981225820388447,
      "grad_norm": 0.38032814860343933,
      "learning_rate": 2.8222293971521386e-06,
      "loss": 0.0149,
      "step": 2198640
    },
    {
      "epoch": 3.598155312477498,
      "grad_norm": 0.3801003098487854,
      "learning_rate": 2.8221635049386214e-06,
      "loss": 0.0119,
      "step": 2198660
    },
    {
      "epoch": 3.598188042916151,
      "grad_norm": 0.08975981175899506,
      "learning_rate": 2.822097612725104e-06,
      "loss": 0.0093,
      "step": 2198680
    },
    {
      "epoch": 3.5982207733548046,
      "grad_norm": 0.5204350352287292,
      "learning_rate": 2.8220317205115873e-06,
      "loss": 0.0104,
      "step": 2198700
    },
    {
      "epoch": 3.5982535037934578,
      "grad_norm": 0.5387980341911316,
      "learning_rate": 2.82196582829807e-06,
      "loss": 0.01,
      "step": 2198720
    },
    {
      "epoch": 3.598286234232111,
      "grad_norm": 0.5447158813476562,
      "learning_rate": 2.821899936084553e-06,
      "loss": 0.0073,
      "step": 2198740
    },
    {
      "epoch": 3.5983189646707645,
      "grad_norm": 0.42189520597457886,
      "learning_rate": 2.8218340438710363e-06,
      "loss": 0.0111,
      "step": 2198760
    },
    {
      "epoch": 3.598351695109418,
      "grad_norm": 0.48130783438682556,
      "learning_rate": 2.821768151657519e-06,
      "loss": 0.0088,
      "step": 2198780
    },
    {
      "epoch": 3.5983844255480713,
      "grad_norm": 0.1757824420928955,
      "learning_rate": 2.821702259444002e-06,
      "loss": 0.0128,
      "step": 2198800
    },
    {
      "epoch": 3.5984171559867244,
      "grad_norm": 0.1420145481824875,
      "learning_rate": 2.8216363672304846e-06,
      "loss": 0.0151,
      "step": 2198820
    },
    {
      "epoch": 3.598449886425378,
      "grad_norm": 0.3742213249206543,
      "learning_rate": 2.8215704750169677e-06,
      "loss": 0.0141,
      "step": 2198840
    },
    {
      "epoch": 3.598482616864031,
      "grad_norm": 0.5480236411094666,
      "learning_rate": 2.8215045828034505e-06,
      "loss": 0.008,
      "step": 2198860
    },
    {
      "epoch": 3.5985153473026843,
      "grad_norm": 0.5067946314811707,
      "learning_rate": 2.821438690589933e-06,
      "loss": 0.0107,
      "step": 2198880
    },
    {
      "epoch": 3.598548077741338,
      "grad_norm": 0.396626353263855,
      "learning_rate": 2.821372798376416e-06,
      "loss": 0.0182,
      "step": 2198900
    },
    {
      "epoch": 3.5985808081799915,
      "grad_norm": 0.8369963765144348,
      "learning_rate": 2.821306906162899e-06,
      "loss": 0.0106,
      "step": 2198920
    },
    {
      "epoch": 3.5986135386186446,
      "grad_norm": 0.3450947403907776,
      "learning_rate": 2.821241013949382e-06,
      "loss": 0.0106,
      "step": 2198940
    },
    {
      "epoch": 3.598646269057298,
      "grad_norm": 0.11001940816640854,
      "learning_rate": 2.8211751217358646e-06,
      "loss": 0.0099,
      "step": 2198960
    },
    {
      "epoch": 3.5986789994959514,
      "grad_norm": 0.4250369668006897,
      "learning_rate": 2.8211092295223473e-06,
      "loss": 0.0079,
      "step": 2198980
    },
    {
      "epoch": 3.5987117299346045,
      "grad_norm": 0.46547967195510864,
      "learning_rate": 2.8210433373088305e-06,
      "loss": 0.0085,
      "step": 2199000
    },
    {
      "epoch": 3.5987444603732577,
      "grad_norm": 0.4449187219142914,
      "learning_rate": 2.8209774450953132e-06,
      "loss": 0.0093,
      "step": 2199020
    },
    {
      "epoch": 3.5987771908119113,
      "grad_norm": 0.26273855566978455,
      "learning_rate": 2.820911552881796e-06,
      "loss": 0.0067,
      "step": 2199040
    },
    {
      "epoch": 3.598809921250565,
      "grad_norm": 0.19180504977703094,
      "learning_rate": 2.8208456606682787e-06,
      "loss": 0.0103,
      "step": 2199060
    },
    {
      "epoch": 3.598842651689218,
      "grad_norm": 0.1926385909318924,
      "learning_rate": 2.8207797684547623e-06,
      "loss": 0.0158,
      "step": 2199080
    },
    {
      "epoch": 3.598875382127871,
      "grad_norm": 0.09920038282871246,
      "learning_rate": 2.820713876241245e-06,
      "loss": 0.0158,
      "step": 2199100
    },
    {
      "epoch": 3.5989081125665248,
      "grad_norm": 0.11692208796739578,
      "learning_rate": 2.8206479840277278e-06,
      "loss": 0.0175,
      "step": 2199120
    },
    {
      "epoch": 3.598940843005178,
      "grad_norm": 0.39326736330986023,
      "learning_rate": 2.8205820918142105e-06,
      "loss": 0.0105,
      "step": 2199140
    },
    {
      "epoch": 3.598973573443831,
      "grad_norm": 0.15846554934978485,
      "learning_rate": 2.8205161996006937e-06,
      "loss": 0.0099,
      "step": 2199160
    },
    {
      "epoch": 3.5990063038824847,
      "grad_norm": 0.2394481748342514,
      "learning_rate": 2.8204503073871764e-06,
      "loss": 0.0116,
      "step": 2199180
    },
    {
      "epoch": 3.599039034321138,
      "grad_norm": 0.1530025750398636,
      "learning_rate": 2.820384415173659e-06,
      "loss": 0.0123,
      "step": 2199200
    },
    {
      "epoch": 3.5990717647597914,
      "grad_norm": 0.6447461247444153,
      "learning_rate": 2.820318522960142e-06,
      "loss": 0.0121,
      "step": 2199220
    },
    {
      "epoch": 3.5991044951984446,
      "grad_norm": 0.17854256927967072,
      "learning_rate": 2.820252630746625e-06,
      "loss": 0.011,
      "step": 2199240
    },
    {
      "epoch": 3.599137225637098,
      "grad_norm": 0.14493082463741302,
      "learning_rate": 2.820186738533108e-06,
      "loss": 0.0103,
      "step": 2199260
    },
    {
      "epoch": 3.5991699560757513,
      "grad_norm": 0.23735322058200836,
      "learning_rate": 2.8201208463195905e-06,
      "loss": 0.0158,
      "step": 2199280
    },
    {
      "epoch": 3.5992026865144044,
      "grad_norm": 0.24532465636730194,
      "learning_rate": 2.8200549541060733e-06,
      "loss": 0.01,
      "step": 2199300
    },
    {
      "epoch": 3.599235416953058,
      "grad_norm": 0.2295961230993271,
      "learning_rate": 2.8199890618925564e-06,
      "loss": 0.0098,
      "step": 2199320
    },
    {
      "epoch": 3.599268147391711,
      "grad_norm": 0.07513532042503357,
      "learning_rate": 2.819923169679039e-06,
      "loss": 0.0114,
      "step": 2199340
    },
    {
      "epoch": 3.599300877830365,
      "grad_norm": 0.1628558486700058,
      "learning_rate": 2.819857277465522e-06,
      "loss": 0.013,
      "step": 2199360
    },
    {
      "epoch": 3.599333608269018,
      "grad_norm": 0.32898813486099243,
      "learning_rate": 2.8197913852520047e-06,
      "loss": 0.0097,
      "step": 2199380
    },
    {
      "epoch": 3.5993663387076715,
      "grad_norm": 0.32302218675613403,
      "learning_rate": 2.819725493038488e-06,
      "loss": 0.014,
      "step": 2199400
    },
    {
      "epoch": 3.5993990691463247,
      "grad_norm": 0.13050253689289093,
      "learning_rate": 2.8196596008249706e-06,
      "loss": 0.0137,
      "step": 2199420
    },
    {
      "epoch": 3.599431799584978,
      "grad_norm": 0.9835082292556763,
      "learning_rate": 2.8195937086114537e-06,
      "loss": 0.0115,
      "step": 2199440
    },
    {
      "epoch": 3.5994645300236314,
      "grad_norm": 0.23979191482067108,
      "learning_rate": 2.819527816397937e-06,
      "loss": 0.0149,
      "step": 2199460
    },
    {
      "epoch": 3.5994972604622846,
      "grad_norm": 0.48820197582244873,
      "learning_rate": 2.8194619241844196e-06,
      "loss": 0.0099,
      "step": 2199480
    },
    {
      "epoch": 3.599529990900938,
      "grad_norm": 0.3870428204536438,
      "learning_rate": 2.8193960319709024e-06,
      "loss": 0.0118,
      "step": 2199500
    },
    {
      "epoch": 3.5995627213395913,
      "grad_norm": 0.571556031703949,
      "learning_rate": 2.819330139757385e-06,
      "loss": 0.0161,
      "step": 2199520
    },
    {
      "epoch": 3.599595451778245,
      "grad_norm": 0.5807921886444092,
      "learning_rate": 2.8192642475438683e-06,
      "loss": 0.0086,
      "step": 2199540
    },
    {
      "epoch": 3.599628182216898,
      "grad_norm": 0.29872673749923706,
      "learning_rate": 2.819198355330351e-06,
      "loss": 0.0128,
      "step": 2199560
    },
    {
      "epoch": 3.599660912655551,
      "grad_norm": 0.25897058844566345,
      "learning_rate": 2.8191324631168337e-06,
      "loss": 0.0183,
      "step": 2199580
    },
    {
      "epoch": 3.599693643094205,
      "grad_norm": 0.29240715503692627,
      "learning_rate": 2.8190665709033165e-06,
      "loss": 0.0124,
      "step": 2199600
    },
    {
      "epoch": 3.599726373532858,
      "grad_norm": 0.1749437153339386,
      "learning_rate": 2.8190006786897992e-06,
      "loss": 0.0112,
      "step": 2199620
    },
    {
      "epoch": 3.5997591039715116,
      "grad_norm": 0.1788141429424286,
      "learning_rate": 2.8189347864762824e-06,
      "loss": 0.0087,
      "step": 2199640
    },
    {
      "epoch": 3.5997918344101647,
      "grad_norm": 0.17999914288520813,
      "learning_rate": 2.818868894262765e-06,
      "loss": 0.0074,
      "step": 2199660
    },
    {
      "epoch": 3.5998245648488183,
      "grad_norm": 0.1832219511270523,
      "learning_rate": 2.818803002049248e-06,
      "loss": 0.0093,
      "step": 2199680
    },
    {
      "epoch": 3.5998572952874714,
      "grad_norm": 0.45529311895370483,
      "learning_rate": 2.8187371098357306e-06,
      "loss": 0.0182,
      "step": 2199700
    },
    {
      "epoch": 3.5998900257261246,
      "grad_norm": 0.3953038454055786,
      "learning_rate": 2.8186712176222138e-06,
      "loss": 0.0151,
      "step": 2199720
    },
    {
      "epoch": 3.599922756164778,
      "grad_norm": 0.12981034815311432,
      "learning_rate": 2.8186053254086965e-06,
      "loss": 0.0137,
      "step": 2199740
    },
    {
      "epoch": 3.5999554866034313,
      "grad_norm": 0.10213062167167664,
      "learning_rate": 2.8185394331951793e-06,
      "loss": 0.0099,
      "step": 2199760
    },
    {
      "epoch": 3.599988217042085,
      "grad_norm": 3.240100145339966,
      "learning_rate": 2.818473540981663e-06,
      "loss": 0.0098,
      "step": 2199780
    },
    {
      "epoch": 3.600020947480738,
      "grad_norm": 0.13480332493782043,
      "learning_rate": 2.8184076487681456e-06,
      "loss": 0.0093,
      "step": 2199800
    },
    {
      "epoch": 3.6000536779193917,
      "grad_norm": 0.8100374937057495,
      "learning_rate": 2.8183417565546283e-06,
      "loss": 0.0143,
      "step": 2199820
    },
    {
      "epoch": 3.600086408358045,
      "grad_norm": 0.2680140733718872,
      "learning_rate": 2.818275864341111e-06,
      "loss": 0.0105,
      "step": 2199840
    },
    {
      "epoch": 3.600119138796698,
      "grad_norm": 0.22669146955013275,
      "learning_rate": 2.8182099721275942e-06,
      "loss": 0.0092,
      "step": 2199860
    },
    {
      "epoch": 3.6001518692353516,
      "grad_norm": 0.16097143292427063,
      "learning_rate": 2.818144079914077e-06,
      "loss": 0.011,
      "step": 2199880
    },
    {
      "epoch": 3.6001845996740047,
      "grad_norm": 0.23390688002109528,
      "learning_rate": 2.8180781877005597e-06,
      "loss": 0.0083,
      "step": 2199900
    },
    {
      "epoch": 3.6002173301126583,
      "grad_norm": 0.13171254098415375,
      "learning_rate": 2.8180122954870424e-06,
      "loss": 0.0093,
      "step": 2199920
    },
    {
      "epoch": 3.6002500605513115,
      "grad_norm": 0.5105098485946655,
      "learning_rate": 2.8179464032735256e-06,
      "loss": 0.0101,
      "step": 2199940
    },
    {
      "epoch": 3.600282790989965,
      "grad_norm": 0.3220466673374176,
      "learning_rate": 2.8178805110600083e-06,
      "loss": 0.0101,
      "step": 2199960
    },
    {
      "epoch": 3.600315521428618,
      "grad_norm": 0.41007199883461,
      "learning_rate": 2.817814618846491e-06,
      "loss": 0.011,
      "step": 2199980
    },
    {
      "epoch": 3.6003482518672714,
      "grad_norm": 0.2017720490694046,
      "learning_rate": 2.817748726632974e-06,
      "loss": 0.0149,
      "step": 2200000
    },
    {
      "epoch": 3.6003482518672714,
      "eval_loss": 0.0065612876787781715,
      "eval_runtime": 6517.7323,
      "eval_samples_per_second": 157.702,
      "eval_steps_per_second": 15.77,
      "eval_sts-dev_pearson_cosine": 0.9850428473274532,
      "eval_sts-dev_spearman_cosine": 0.8955687796846943,
      "step": 2200000
    },
    {
      "epoch": 3.600380982305925,
      "grad_norm": 0.5733932256698608,
      "learning_rate": 2.817682834419457e-06,
      "loss": 0.0096,
      "step": 2200020
    },
    {
      "epoch": 3.600413712744578,
      "grad_norm": 0.19948329031467438,
      "learning_rate": 2.8176169422059397e-06,
      "loss": 0.0094,
      "step": 2200040
    },
    {
      "epoch": 3.6004464431832317,
      "grad_norm": 0.11056330054998398,
      "learning_rate": 2.8175510499924225e-06,
      "loss": 0.0087,
      "step": 2200060
    },
    {
      "epoch": 3.600479173621885,
      "grad_norm": 0.32821208238601685,
      "learning_rate": 2.817485157778905e-06,
      "loss": 0.0122,
      "step": 2200080
    },
    {
      "epoch": 3.6005119040605384,
      "grad_norm": 0.08070129156112671,
      "learning_rate": 2.817419265565388e-06,
      "loss": 0.0101,
      "step": 2200100
    },
    {
      "epoch": 3.6005446344991916,
      "grad_norm": 0.060804352164268494,
      "learning_rate": 2.817353373351871e-06,
      "loss": 0.0116,
      "step": 2200120
    },
    {
      "epoch": 3.6005773649378447,
      "grad_norm": 0.36420363187789917,
      "learning_rate": 2.8172874811383543e-06,
      "loss": 0.0101,
      "step": 2200140
    },
    {
      "epoch": 3.6006100953764983,
      "grad_norm": 0.32934635877609253,
      "learning_rate": 2.817221588924837e-06,
      "loss": 0.0153,
      "step": 2200160
    },
    {
      "epoch": 3.6006428258151515,
      "grad_norm": 0.2547529339790344,
      "learning_rate": 2.81715569671132e-06,
      "loss": 0.0125,
      "step": 2200180
    },
    {
      "epoch": 3.6006755562538046,
      "grad_norm": 0.054033856838941574,
      "learning_rate": 2.817089804497803e-06,
      "loss": 0.0087,
      "step": 2200200
    },
    {
      "epoch": 3.6007082866924582,
      "grad_norm": 0.41105711460113525,
      "learning_rate": 2.8170239122842857e-06,
      "loss": 0.0092,
      "step": 2200220
    },
    {
      "epoch": 3.600741017131112,
      "grad_norm": 0.1643853634595871,
      "learning_rate": 2.8169580200707684e-06,
      "loss": 0.0141,
      "step": 2200240
    },
    {
      "epoch": 3.600773747569765,
      "grad_norm": 0.4141019284725189,
      "learning_rate": 2.8168921278572516e-06,
      "loss": 0.0107,
      "step": 2200260
    },
    {
      "epoch": 3.600806478008418,
      "grad_norm": 0.4595390558242798,
      "learning_rate": 2.8168262356437343e-06,
      "loss": 0.0147,
      "step": 2200280
    },
    {
      "epoch": 3.6008392084470717,
      "grad_norm": 0.2757015824317932,
      "learning_rate": 2.816760343430217e-06,
      "loss": 0.0128,
      "step": 2200300
    },
    {
      "epoch": 3.600871938885725,
      "grad_norm": 0.12573441863059998,
      "learning_rate": 2.8166944512166998e-06,
      "loss": 0.0085,
      "step": 2200320
    },
    {
      "epoch": 3.600904669324378,
      "grad_norm": 0.07902710884809494,
      "learning_rate": 2.816628559003183e-06,
      "loss": 0.0076,
      "step": 2200340
    },
    {
      "epoch": 3.6009373997630316,
      "grad_norm": 0.2466706782579422,
      "learning_rate": 2.8165626667896657e-06,
      "loss": 0.0064,
      "step": 2200360
    },
    {
      "epoch": 3.600970130201685,
      "grad_norm": 0.22577810287475586,
      "learning_rate": 2.8164967745761484e-06,
      "loss": 0.0103,
      "step": 2200380
    },
    {
      "epoch": 3.6010028606403384,
      "grad_norm": 0.1768350601196289,
      "learning_rate": 2.816430882362631e-06,
      "loss": 0.0077,
      "step": 2200400
    },
    {
      "epoch": 3.6010355910789915,
      "grad_norm": 0.17711755633354187,
      "learning_rate": 2.8163649901491143e-06,
      "loss": 0.0099,
      "step": 2200420
    },
    {
      "epoch": 3.601068321517645,
      "grad_norm": 0.5587303042411804,
      "learning_rate": 2.816299097935597e-06,
      "loss": 0.0172,
      "step": 2200440
    },
    {
      "epoch": 3.6011010519562983,
      "grad_norm": 0.2617509663105011,
      "learning_rate": 2.81623320572208e-06,
      "loss": 0.011,
      "step": 2200460
    },
    {
      "epoch": 3.6011337823949514,
      "grad_norm": 0.10484953969717026,
      "learning_rate": 2.8161673135085625e-06,
      "loss": 0.0093,
      "step": 2200480
    },
    {
      "epoch": 3.601166512833605,
      "grad_norm": 0.5156615376472473,
      "learning_rate": 2.816101421295046e-06,
      "loss": 0.0085,
      "step": 2200500
    },
    {
      "epoch": 3.6011992432722586,
      "grad_norm": 0.41548073291778564,
      "learning_rate": 2.816035529081529e-06,
      "loss": 0.0151,
      "step": 2200520
    },
    {
      "epoch": 3.6012319737109117,
      "grad_norm": 0.35750237107276917,
      "learning_rate": 2.8159696368680116e-06,
      "loss": 0.0115,
      "step": 2200540
    },
    {
      "epoch": 3.601264704149565,
      "grad_norm": 0.7346233129501343,
      "learning_rate": 2.8159037446544948e-06,
      "loss": 0.0158,
      "step": 2200560
    },
    {
      "epoch": 3.6012974345882185,
      "grad_norm": 0.31381329894065857,
      "learning_rate": 2.8158378524409775e-06,
      "loss": 0.0102,
      "step": 2200580
    },
    {
      "epoch": 3.6013301650268716,
      "grad_norm": 0.23885954916477203,
      "learning_rate": 2.8157719602274603e-06,
      "loss": 0.0093,
      "step": 2200600
    },
    {
      "epoch": 3.601362895465525,
      "grad_norm": 0.2256804257631302,
      "learning_rate": 2.815706068013943e-06,
      "loss": 0.0101,
      "step": 2200620
    },
    {
      "epoch": 3.6013956259041784,
      "grad_norm": 0.29204726219177246,
      "learning_rate": 2.8156401758004257e-06,
      "loss": 0.0098,
      "step": 2200640
    },
    {
      "epoch": 3.601428356342832,
      "grad_norm": 0.14281317591667175,
      "learning_rate": 2.815574283586909e-06,
      "loss": 0.0089,
      "step": 2200660
    },
    {
      "epoch": 3.601461086781485,
      "grad_norm": 0.19780629873275757,
      "learning_rate": 2.8155083913733916e-06,
      "loss": 0.008,
      "step": 2200680
    },
    {
      "epoch": 3.6014938172201383,
      "grad_norm": 0.0886688083410263,
      "learning_rate": 2.8154424991598744e-06,
      "loss": 0.0114,
      "step": 2200700
    },
    {
      "epoch": 3.601526547658792,
      "grad_norm": 0.21300239861011505,
      "learning_rate": 2.815376606946357e-06,
      "loss": 0.0072,
      "step": 2200720
    },
    {
      "epoch": 3.601559278097445,
      "grad_norm": 0.20742830634117126,
      "learning_rate": 2.8153107147328403e-06,
      "loss": 0.01,
      "step": 2200740
    },
    {
      "epoch": 3.601592008536098,
      "grad_norm": 0.1946747750043869,
      "learning_rate": 2.815244822519323e-06,
      "loss": 0.0107,
      "step": 2200760
    },
    {
      "epoch": 3.6016247389747518,
      "grad_norm": 0.3016832768917084,
      "learning_rate": 2.8151789303058058e-06,
      "loss": 0.0111,
      "step": 2200780
    },
    {
      "epoch": 3.601657469413405,
      "grad_norm": 0.5360979437828064,
      "learning_rate": 2.8151130380922885e-06,
      "loss": 0.0087,
      "step": 2200800
    },
    {
      "epoch": 3.6016901998520585,
      "grad_norm": 0.2851722836494446,
      "learning_rate": 2.8150471458787717e-06,
      "loss": 0.0073,
      "step": 2200820
    },
    {
      "epoch": 3.6017229302907117,
      "grad_norm": 0.2019377499818802,
      "learning_rate": 2.814981253665255e-06,
      "loss": 0.0103,
      "step": 2200840
    },
    {
      "epoch": 3.6017556607293653,
      "grad_norm": 0.8890635967254639,
      "learning_rate": 2.8149153614517376e-06,
      "loss": 0.0122,
      "step": 2200860
    },
    {
      "epoch": 3.6017883911680184,
      "grad_norm": 0.26619449257850647,
      "learning_rate": 2.8148494692382207e-06,
      "loss": 0.0135,
      "step": 2200880
    },
    {
      "epoch": 3.6018211216066716,
      "grad_norm": 0.26183566451072693,
      "learning_rate": 2.8147835770247035e-06,
      "loss": 0.0123,
      "step": 2200900
    },
    {
      "epoch": 3.601853852045325,
      "grad_norm": 0.2896025478839874,
      "learning_rate": 2.814717684811186e-06,
      "loss": 0.0111,
      "step": 2200920
    },
    {
      "epoch": 3.6018865824839783,
      "grad_norm": 0.6468671560287476,
      "learning_rate": 2.814651792597669e-06,
      "loss": 0.0165,
      "step": 2200940
    },
    {
      "epoch": 3.601919312922632,
      "grad_norm": 0.17920710146427155,
      "learning_rate": 2.814585900384152e-06,
      "loss": 0.0086,
      "step": 2200960
    },
    {
      "epoch": 3.601952043361285,
      "grad_norm": 0.17344218492507935,
      "learning_rate": 2.814520008170635e-06,
      "loss": 0.0098,
      "step": 2200980
    },
    {
      "epoch": 3.6019847737999386,
      "grad_norm": 0.2808930575847626,
      "learning_rate": 2.8144541159571176e-06,
      "loss": 0.0112,
      "step": 2201000
    },
    {
      "epoch": 3.602017504238592,
      "grad_norm": 0.05899600312113762,
      "learning_rate": 2.8143882237436003e-06,
      "loss": 0.0079,
      "step": 2201020
    },
    {
      "epoch": 3.602050234677245,
      "grad_norm": 0.37165483832359314,
      "learning_rate": 2.814322331530083e-06,
      "loss": 0.0141,
      "step": 2201040
    },
    {
      "epoch": 3.6020829651158985,
      "grad_norm": 0.43204423785209656,
      "learning_rate": 2.8142564393165662e-06,
      "loss": 0.0124,
      "step": 2201060
    },
    {
      "epoch": 3.6021156955545517,
      "grad_norm": 0.08621620386838913,
      "learning_rate": 2.814190547103049e-06,
      "loss": 0.0147,
      "step": 2201080
    },
    {
      "epoch": 3.6021484259932053,
      "grad_norm": 0.198036789894104,
      "learning_rate": 2.8141246548895317e-06,
      "loss": 0.0102,
      "step": 2201100
    },
    {
      "epoch": 3.6021811564318584,
      "grad_norm": 0.25884780287742615,
      "learning_rate": 2.8140587626760145e-06,
      "loss": 0.0129,
      "step": 2201120
    },
    {
      "epoch": 3.602213886870512,
      "grad_norm": 0.2925807535648346,
      "learning_rate": 2.8139928704624976e-06,
      "loss": 0.0146,
      "step": 2201140
    },
    {
      "epoch": 3.602246617309165,
      "grad_norm": 0.05074639990925789,
      "learning_rate": 2.8139269782489804e-06,
      "loss": 0.0103,
      "step": 2201160
    },
    {
      "epoch": 3.6022793477478183,
      "grad_norm": 0.8057348132133484,
      "learning_rate": 2.813861086035463e-06,
      "loss": 0.0139,
      "step": 2201180
    },
    {
      "epoch": 3.602312078186472,
      "grad_norm": 0.2204020619392395,
      "learning_rate": 2.8137951938219467e-06,
      "loss": 0.0089,
      "step": 2201200
    },
    {
      "epoch": 3.602344808625125,
      "grad_norm": 0.2950791120529175,
      "learning_rate": 2.8137293016084294e-06,
      "loss": 0.0146,
      "step": 2201220
    },
    {
      "epoch": 3.6023775390637787,
      "grad_norm": 0.3736211657524109,
      "learning_rate": 2.813663409394912e-06,
      "loss": 0.0097,
      "step": 2201240
    },
    {
      "epoch": 3.602410269502432,
      "grad_norm": 0.31029751896858215,
      "learning_rate": 2.813597517181395e-06,
      "loss": 0.0126,
      "step": 2201260
    },
    {
      "epoch": 3.6024429999410854,
      "grad_norm": 0.3480643033981323,
      "learning_rate": 2.813531624967878e-06,
      "loss": 0.01,
      "step": 2201280
    },
    {
      "epoch": 3.6024757303797386,
      "grad_norm": 0.2101554572582245,
      "learning_rate": 2.813465732754361e-06,
      "loss": 0.016,
      "step": 2201300
    },
    {
      "epoch": 3.6025084608183917,
      "grad_norm": 0.20534251630306244,
      "learning_rate": 2.8133998405408435e-06,
      "loss": 0.011,
      "step": 2201320
    },
    {
      "epoch": 3.6025411912570453,
      "grad_norm": 0.10984023660421371,
      "learning_rate": 2.8133339483273263e-06,
      "loss": 0.0073,
      "step": 2201340
    },
    {
      "epoch": 3.6025739216956985,
      "grad_norm": 0.40632370114326477,
      "learning_rate": 2.8132680561138094e-06,
      "loss": 0.0114,
      "step": 2201360
    },
    {
      "epoch": 3.602606652134352,
      "grad_norm": 0.4773399829864502,
      "learning_rate": 2.813202163900292e-06,
      "loss": 0.0158,
      "step": 2201380
    },
    {
      "epoch": 3.602639382573005,
      "grad_norm": 0.17515486478805542,
      "learning_rate": 2.813136271686775e-06,
      "loss": 0.009,
      "step": 2201400
    },
    {
      "epoch": 3.602672113011659,
      "grad_norm": 0.4149723947048187,
      "learning_rate": 2.8130703794732577e-06,
      "loss": 0.0149,
      "step": 2201420
    },
    {
      "epoch": 3.602704843450312,
      "grad_norm": 0.23759323358535767,
      "learning_rate": 2.813004487259741e-06,
      "loss": 0.0159,
      "step": 2201440
    },
    {
      "epoch": 3.602737573888965,
      "grad_norm": 0.049699824303388596,
      "learning_rate": 2.8129385950462236e-06,
      "loss": 0.0096,
      "step": 2201460
    },
    {
      "epoch": 3.6027703043276187,
      "grad_norm": 0.373557984828949,
      "learning_rate": 2.8128727028327063e-06,
      "loss": 0.0112,
      "step": 2201480
    },
    {
      "epoch": 3.602803034766272,
      "grad_norm": 0.2816719710826874,
      "learning_rate": 2.812806810619189e-06,
      "loss": 0.0103,
      "step": 2201500
    },
    {
      "epoch": 3.6028357652049254,
      "grad_norm": 0.15579231083393097,
      "learning_rate": 2.8127409184056718e-06,
      "loss": 0.0181,
      "step": 2201520
    },
    {
      "epoch": 3.6028684956435786,
      "grad_norm": 0.4575745761394501,
      "learning_rate": 2.8126750261921554e-06,
      "loss": 0.0148,
      "step": 2201540
    },
    {
      "epoch": 3.602901226082232,
      "grad_norm": 0.3808116912841797,
      "learning_rate": 2.812609133978638e-06,
      "loss": 0.012,
      "step": 2201560
    },
    {
      "epoch": 3.6029339565208853,
      "grad_norm": 0.40616342425346375,
      "learning_rate": 2.812543241765121e-06,
      "loss": 0.011,
      "step": 2201580
    },
    {
      "epoch": 3.6029666869595385,
      "grad_norm": 0.5549998879432678,
      "learning_rate": 2.812477349551604e-06,
      "loss": 0.0131,
      "step": 2201600
    },
    {
      "epoch": 3.602999417398192,
      "grad_norm": 0.3434007465839386,
      "learning_rate": 2.8124114573380868e-06,
      "loss": 0.0107,
      "step": 2201620
    },
    {
      "epoch": 3.6030321478368452,
      "grad_norm": 0.5805906653404236,
      "learning_rate": 2.8123455651245695e-06,
      "loss": 0.014,
      "step": 2201640
    },
    {
      "epoch": 3.6030648782754984,
      "grad_norm": 0.3401053249835968,
      "learning_rate": 2.8122796729110522e-06,
      "loss": 0.0127,
      "step": 2201660
    },
    {
      "epoch": 3.603097608714152,
      "grad_norm": 0.24901913106441498,
      "learning_rate": 2.8122137806975354e-06,
      "loss": 0.011,
      "step": 2201680
    },
    {
      "epoch": 3.6031303391528056,
      "grad_norm": 0.19480720162391663,
      "learning_rate": 2.812147888484018e-06,
      "loss": 0.009,
      "step": 2201700
    },
    {
      "epoch": 3.6031630695914587,
      "grad_norm": 0.2117576003074646,
      "learning_rate": 2.812081996270501e-06,
      "loss": 0.0101,
      "step": 2201720
    },
    {
      "epoch": 3.603195800030112,
      "grad_norm": 0.20539042353630066,
      "learning_rate": 2.8120161040569836e-06,
      "loss": 0.0093,
      "step": 2201740
    },
    {
      "epoch": 3.6032285304687655,
      "grad_norm": 0.4391063153743744,
      "learning_rate": 2.8119502118434668e-06,
      "loss": 0.0142,
      "step": 2201760
    },
    {
      "epoch": 3.6032612609074186,
      "grad_norm": 0.2547706961631775,
      "learning_rate": 2.8118843196299495e-06,
      "loss": 0.0089,
      "step": 2201780
    },
    {
      "epoch": 3.6032939913460718,
      "grad_norm": 0.22599567472934723,
      "learning_rate": 2.8118184274164323e-06,
      "loss": 0.0126,
      "step": 2201800
    },
    {
      "epoch": 3.6033267217847254,
      "grad_norm": 0.48208388686180115,
      "learning_rate": 2.811752535202915e-06,
      "loss": 0.0102,
      "step": 2201820
    },
    {
      "epoch": 3.603359452223379,
      "grad_norm": 0.13896618783473969,
      "learning_rate": 2.811686642989398e-06,
      "loss": 0.0131,
      "step": 2201840
    },
    {
      "epoch": 3.603392182662032,
      "grad_norm": 0.2877826690673828,
      "learning_rate": 2.811620750775881e-06,
      "loss": 0.009,
      "step": 2201860
    },
    {
      "epoch": 3.6034249131006852,
      "grad_norm": 0.159465491771698,
      "learning_rate": 2.8115548585623636e-06,
      "loss": 0.0098,
      "step": 2201880
    },
    {
      "epoch": 3.603457643539339,
      "grad_norm": 0.3421291708946228,
      "learning_rate": 2.8114889663488472e-06,
      "loss": 0.0124,
      "step": 2201900
    },
    {
      "epoch": 3.603490373977992,
      "grad_norm": 0.8842710852622986,
      "learning_rate": 2.81142307413533e-06,
      "loss": 0.0125,
      "step": 2201920
    },
    {
      "epoch": 3.603523104416645,
      "grad_norm": 0.5407053828239441,
      "learning_rate": 2.8113571819218127e-06,
      "loss": 0.0088,
      "step": 2201940
    },
    {
      "epoch": 3.6035558348552987,
      "grad_norm": 0.1639837920665741,
      "learning_rate": 2.8112912897082954e-06,
      "loss": 0.009,
      "step": 2201960
    },
    {
      "epoch": 3.6035885652939523,
      "grad_norm": 0.29075977206230164,
      "learning_rate": 2.8112253974947786e-06,
      "loss": 0.0083,
      "step": 2201980
    },
    {
      "epoch": 3.6036212957326055,
      "grad_norm": 0.07829073071479797,
      "learning_rate": 2.8111595052812614e-06,
      "loss": 0.0099,
      "step": 2202000
    },
    {
      "epoch": 3.6036540261712586,
      "grad_norm": 0.17701603472232819,
      "learning_rate": 2.811093613067744e-06,
      "loss": 0.0082,
      "step": 2202020
    },
    {
      "epoch": 3.6036867566099122,
      "grad_norm": 0.1899062991142273,
      "learning_rate": 2.811027720854227e-06,
      "loss": 0.0117,
      "step": 2202040
    },
    {
      "epoch": 3.6037194870485654,
      "grad_norm": 0.14054079353809357,
      "learning_rate": 2.8109618286407096e-06,
      "loss": 0.0117,
      "step": 2202060
    },
    {
      "epoch": 3.6037522174872185,
      "grad_norm": 0.5081546902656555,
      "learning_rate": 2.8108959364271927e-06,
      "loss": 0.0135,
      "step": 2202080
    },
    {
      "epoch": 3.603784947925872,
      "grad_norm": 0.08209355175495148,
      "learning_rate": 2.8108300442136755e-06,
      "loss": 0.0104,
      "step": 2202100
    },
    {
      "epoch": 3.6038176783645257,
      "grad_norm": 0.33136487007141113,
      "learning_rate": 2.8107641520001582e-06,
      "loss": 0.0117,
      "step": 2202120
    },
    {
      "epoch": 3.603850408803179,
      "grad_norm": 0.12626083195209503,
      "learning_rate": 2.810698259786641e-06,
      "loss": 0.0071,
      "step": 2202140
    },
    {
      "epoch": 3.603883139241832,
      "grad_norm": 0.422046035528183,
      "learning_rate": 2.810632367573124e-06,
      "loss": 0.0109,
      "step": 2202160
    },
    {
      "epoch": 3.6039158696804856,
      "grad_norm": 0.13891926407814026,
      "learning_rate": 2.810566475359607e-06,
      "loss": 0.0103,
      "step": 2202180
    },
    {
      "epoch": 3.6039486001191388,
      "grad_norm": 0.13189518451690674,
      "learning_rate": 2.8105005831460896e-06,
      "loss": 0.0084,
      "step": 2202200
    },
    {
      "epoch": 3.603981330557792,
      "grad_norm": 0.23527415096759796,
      "learning_rate": 2.8104346909325723e-06,
      "loss": 0.0094,
      "step": 2202220
    },
    {
      "epoch": 3.6040140609964455,
      "grad_norm": 0.25975021719932556,
      "learning_rate": 2.8103687987190555e-06,
      "loss": 0.0124,
      "step": 2202240
    },
    {
      "epoch": 3.6040467914350987,
      "grad_norm": 0.4138205051422119,
      "learning_rate": 2.8103029065055387e-06,
      "loss": 0.0098,
      "step": 2202260
    },
    {
      "epoch": 3.6040795218737522,
      "grad_norm": 0.5790348649024963,
      "learning_rate": 2.8102370142920214e-06,
      "loss": 0.0132,
      "step": 2202280
    },
    {
      "epoch": 3.6041122523124054,
      "grad_norm": 0.35524386167526245,
      "learning_rate": 2.8101711220785046e-06,
      "loss": 0.0082,
      "step": 2202300
    },
    {
      "epoch": 3.604144982751059,
      "grad_norm": 0.14746026694774628,
      "learning_rate": 2.8101052298649873e-06,
      "loss": 0.0105,
      "step": 2202320
    },
    {
      "epoch": 3.604177713189712,
      "grad_norm": 0.3528684377670288,
      "learning_rate": 2.81003933765147e-06,
      "loss": 0.0087,
      "step": 2202340
    },
    {
      "epoch": 3.6042104436283653,
      "grad_norm": 0.21120241284370422,
      "learning_rate": 2.8099734454379528e-06,
      "loss": 0.0152,
      "step": 2202360
    },
    {
      "epoch": 3.604243174067019,
      "grad_norm": 0.12565015256404877,
      "learning_rate": 2.809907553224436e-06,
      "loss": 0.0161,
      "step": 2202380
    },
    {
      "epoch": 3.604275904505672,
      "grad_norm": 0.05199870094656944,
      "learning_rate": 2.8098416610109187e-06,
      "loss": 0.0074,
      "step": 2202400
    },
    {
      "epoch": 3.6043086349443256,
      "grad_norm": 0.49680545926094055,
      "learning_rate": 2.8097757687974014e-06,
      "loss": 0.0124,
      "step": 2202420
    },
    {
      "epoch": 3.604341365382979,
      "grad_norm": 0.1779089719057083,
      "learning_rate": 2.809709876583884e-06,
      "loss": 0.0112,
      "step": 2202440
    },
    {
      "epoch": 3.6043740958216324,
      "grad_norm": 0.18953905999660492,
      "learning_rate": 2.809643984370367e-06,
      "loss": 0.0099,
      "step": 2202460
    },
    {
      "epoch": 3.6044068262602855,
      "grad_norm": 0.33295878767967224,
      "learning_rate": 2.80957809215685e-06,
      "loss": 0.0107,
      "step": 2202480
    },
    {
      "epoch": 3.6044395566989387,
      "grad_norm": 0.16598407924175262,
      "learning_rate": 2.809512199943333e-06,
      "loss": 0.0119,
      "step": 2202500
    },
    {
      "epoch": 3.6044722871375923,
      "grad_norm": 0.5288680791854858,
      "learning_rate": 2.8094463077298155e-06,
      "loss": 0.0136,
      "step": 2202520
    },
    {
      "epoch": 3.6045050175762454,
      "grad_norm": 0.10527944564819336,
      "learning_rate": 2.8093804155162983e-06,
      "loss": 0.0096,
      "step": 2202540
    },
    {
      "epoch": 3.604537748014899,
      "grad_norm": 0.09253945201635361,
      "learning_rate": 2.8093145233027815e-06,
      "loss": 0.0117,
      "step": 2202560
    },
    {
      "epoch": 3.604570478453552,
      "grad_norm": 0.1573542207479477,
      "learning_rate": 2.809248631089264e-06,
      "loss": 0.0121,
      "step": 2202580
    },
    {
      "epoch": 3.6046032088922058,
      "grad_norm": 0.22629903256893158,
      "learning_rate": 2.8091827388757474e-06,
      "loss": 0.0075,
      "step": 2202600
    },
    {
      "epoch": 3.604635939330859,
      "grad_norm": 0.6911869049072266,
      "learning_rate": 2.8091168466622305e-06,
      "loss": 0.0095,
      "step": 2202620
    },
    {
      "epoch": 3.604668669769512,
      "grad_norm": 0.3620097041130066,
      "learning_rate": 2.8090509544487133e-06,
      "loss": 0.0106,
      "step": 2202640
    },
    {
      "epoch": 3.6047014002081657,
      "grad_norm": 0.6330119967460632,
      "learning_rate": 2.808985062235196e-06,
      "loss": 0.0173,
      "step": 2202660
    },
    {
      "epoch": 3.604734130646819,
      "grad_norm": 0.25369390845298767,
      "learning_rate": 2.8089191700216787e-06,
      "loss": 0.0137,
      "step": 2202680
    },
    {
      "epoch": 3.6047668610854724,
      "grad_norm": 0.31589457392692566,
      "learning_rate": 2.808853277808162e-06,
      "loss": 0.0111,
      "step": 2202700
    },
    {
      "epoch": 3.6047995915241255,
      "grad_norm": 0.13297457993030548,
      "learning_rate": 2.8087873855946446e-06,
      "loss": 0.0107,
      "step": 2202720
    },
    {
      "epoch": 3.604832321962779,
      "grad_norm": 0.3609541952610016,
      "learning_rate": 2.8087214933811274e-06,
      "loss": 0.0081,
      "step": 2202740
    },
    {
      "epoch": 3.6048650524014323,
      "grad_norm": 0.34065282344818115,
      "learning_rate": 2.80865560116761e-06,
      "loss": 0.0103,
      "step": 2202760
    },
    {
      "epoch": 3.6048977828400854,
      "grad_norm": 0.8174771070480347,
      "learning_rate": 2.8085897089540933e-06,
      "loss": 0.018,
      "step": 2202780
    },
    {
      "epoch": 3.604930513278739,
      "grad_norm": 0.7004386186599731,
      "learning_rate": 2.808523816740576e-06,
      "loss": 0.0137,
      "step": 2202800
    },
    {
      "epoch": 3.604963243717392,
      "grad_norm": 0.13185876607894897,
      "learning_rate": 2.8084579245270588e-06,
      "loss": 0.0102,
      "step": 2202820
    },
    {
      "epoch": 3.604995974156046,
      "grad_norm": 0.8880437016487122,
      "learning_rate": 2.8083920323135415e-06,
      "loss": 0.0228,
      "step": 2202840
    },
    {
      "epoch": 3.605028704594699,
      "grad_norm": 0.2860678732395172,
      "learning_rate": 2.8083261401000247e-06,
      "loss": 0.013,
      "step": 2202860
    },
    {
      "epoch": 3.6050614350333525,
      "grad_norm": 0.11456578224897385,
      "learning_rate": 2.8082602478865074e-06,
      "loss": 0.0086,
      "step": 2202880
    },
    {
      "epoch": 3.6050941654720057,
      "grad_norm": 0.07640110701322556,
      "learning_rate": 2.80819435567299e-06,
      "loss": 0.0162,
      "step": 2202900
    },
    {
      "epoch": 3.605126895910659,
      "grad_norm": 0.5664086937904358,
      "learning_rate": 2.808128463459473e-06,
      "loss": 0.0113,
      "step": 2202920
    },
    {
      "epoch": 3.6051596263493124,
      "grad_norm": 0.2691720426082611,
      "learning_rate": 2.8080625712459556e-06,
      "loss": 0.0091,
      "step": 2202940
    },
    {
      "epoch": 3.6051923567879656,
      "grad_norm": 0.41689226031303406,
      "learning_rate": 2.8079966790324392e-06,
      "loss": 0.0084,
      "step": 2202960
    },
    {
      "epoch": 3.605225087226619,
      "grad_norm": 0.7537254691123962,
      "learning_rate": 2.807930786818922e-06,
      "loss": 0.0176,
      "step": 2202980
    },
    {
      "epoch": 3.6052578176652723,
      "grad_norm": 0.3015172779560089,
      "learning_rate": 2.8078648946054047e-06,
      "loss": 0.0095,
      "step": 2203000
    },
    {
      "epoch": 3.605290548103926,
      "grad_norm": 0.17361785471439362,
      "learning_rate": 2.807799002391888e-06,
      "loss": 0.0145,
      "step": 2203020
    },
    {
      "epoch": 3.605323278542579,
      "grad_norm": 0.43190956115722656,
      "learning_rate": 2.8077331101783706e-06,
      "loss": 0.0099,
      "step": 2203040
    },
    {
      "epoch": 3.605356008981232,
      "grad_norm": 0.21863988041877747,
      "learning_rate": 2.8076672179648533e-06,
      "loss": 0.0141,
      "step": 2203060
    },
    {
      "epoch": 3.605388739419886,
      "grad_norm": 0.4037351608276367,
      "learning_rate": 2.807601325751336e-06,
      "loss": 0.0107,
      "step": 2203080
    },
    {
      "epoch": 3.605421469858539,
      "grad_norm": 0.6783923506736755,
      "learning_rate": 2.8075354335378192e-06,
      "loss": 0.0103,
      "step": 2203100
    },
    {
      "epoch": 3.6054542002971925,
      "grad_norm": 0.22258135676383972,
      "learning_rate": 2.807469541324302e-06,
      "loss": 0.0084,
      "step": 2203120
    },
    {
      "epoch": 3.6054869307358457,
      "grad_norm": 0.2556068003177643,
      "learning_rate": 2.8074036491107847e-06,
      "loss": 0.0094,
      "step": 2203140
    },
    {
      "epoch": 3.6055196611744993,
      "grad_norm": 0.25803399085998535,
      "learning_rate": 2.8073377568972675e-06,
      "loss": 0.0143,
      "step": 2203160
    },
    {
      "epoch": 3.6055523916131524,
      "grad_norm": 0.1422414481639862,
      "learning_rate": 2.8072718646837506e-06,
      "loss": 0.0132,
      "step": 2203180
    },
    {
      "epoch": 3.6055851220518056,
      "grad_norm": 0.0848599523305893,
      "learning_rate": 2.8072059724702334e-06,
      "loss": 0.0131,
      "step": 2203200
    },
    {
      "epoch": 3.605617852490459,
      "grad_norm": 0.2810969054698944,
      "learning_rate": 2.807140080256716e-06,
      "loss": 0.0156,
      "step": 2203220
    },
    {
      "epoch": 3.6056505829291123,
      "grad_norm": 0.06890615820884705,
      "learning_rate": 2.807074188043199e-06,
      "loss": 0.0111,
      "step": 2203240
    },
    {
      "epoch": 3.6056833133677655,
      "grad_norm": 0.6782239675521851,
      "learning_rate": 2.807008295829682e-06,
      "loss": 0.0111,
      "step": 2203260
    },
    {
      "epoch": 3.605716043806419,
      "grad_norm": 0.08453388512134552,
      "learning_rate": 2.8069424036161647e-06,
      "loss": 0.0096,
      "step": 2203280
    },
    {
      "epoch": 3.6057487742450727,
      "grad_norm": 0.17398469150066376,
      "learning_rate": 2.806876511402648e-06,
      "loss": 0.0132,
      "step": 2203300
    },
    {
      "epoch": 3.605781504683726,
      "grad_norm": 0.08239616453647614,
      "learning_rate": 2.806810619189131e-06,
      "loss": 0.0101,
      "step": 2203320
    },
    {
      "epoch": 3.605814235122379,
      "grad_norm": 0.3308187425136566,
      "learning_rate": 2.806744726975614e-06,
      "loss": 0.01,
      "step": 2203340
    },
    {
      "epoch": 3.6058469655610326,
      "grad_norm": 0.3011704981327057,
      "learning_rate": 2.8066788347620965e-06,
      "loss": 0.0107,
      "step": 2203360
    },
    {
      "epoch": 3.6058796959996857,
      "grad_norm": 0.2793222963809967,
      "learning_rate": 2.8066129425485793e-06,
      "loss": 0.0107,
      "step": 2203380
    },
    {
      "epoch": 3.605912426438339,
      "grad_norm": 0.40424561500549316,
      "learning_rate": 2.8065470503350625e-06,
      "loss": 0.0083,
      "step": 2203400
    },
    {
      "epoch": 3.6059451568769925,
      "grad_norm": 0.20994235575199127,
      "learning_rate": 2.806481158121545e-06,
      "loss": 0.0094,
      "step": 2203420
    },
    {
      "epoch": 3.605977887315646,
      "grad_norm": 0.29601240158081055,
      "learning_rate": 2.806415265908028e-06,
      "loss": 0.016,
      "step": 2203440
    },
    {
      "epoch": 3.606010617754299,
      "grad_norm": 0.23377780616283417,
      "learning_rate": 2.8063493736945107e-06,
      "loss": 0.0095,
      "step": 2203460
    },
    {
      "epoch": 3.6060433481929524,
      "grad_norm": 0.1932699978351593,
      "learning_rate": 2.8062834814809934e-06,
      "loss": 0.0115,
      "step": 2203480
    },
    {
      "epoch": 3.606076078631606,
      "grad_norm": 0.10784042626619339,
      "learning_rate": 2.8062175892674766e-06,
      "loss": 0.0092,
      "step": 2203500
    },
    {
      "epoch": 3.606108809070259,
      "grad_norm": 0.4677780270576477,
      "learning_rate": 2.8061516970539593e-06,
      "loss": 0.0111,
      "step": 2203520
    },
    {
      "epoch": 3.6061415395089123,
      "grad_norm": 0.21147803962230682,
      "learning_rate": 2.806085804840442e-06,
      "loss": 0.0151,
      "step": 2203540
    },
    {
      "epoch": 3.606174269947566,
      "grad_norm": 0.5003637075424194,
      "learning_rate": 2.806019912626925e-06,
      "loss": 0.0113,
      "step": 2203560
    },
    {
      "epoch": 3.6062070003862194,
      "grad_norm": 0.23579959571361542,
      "learning_rate": 2.805954020413408e-06,
      "loss": 0.0099,
      "step": 2203580
    },
    {
      "epoch": 3.6062397308248726,
      "grad_norm": 0.19130222499370575,
      "learning_rate": 2.8058881281998907e-06,
      "loss": 0.0079,
      "step": 2203600
    },
    {
      "epoch": 3.6062724612635257,
      "grad_norm": 0.42410844564437866,
      "learning_rate": 2.8058222359863734e-06,
      "loss": 0.0129,
      "step": 2203620
    },
    {
      "epoch": 3.6063051917021793,
      "grad_norm": 0.25053948163986206,
      "learning_rate": 2.805756343772856e-06,
      "loss": 0.0114,
      "step": 2203640
    },
    {
      "epoch": 3.6063379221408325,
      "grad_norm": 0.4876338541507721,
      "learning_rate": 2.8056904515593398e-06,
      "loss": 0.017,
      "step": 2203660
    },
    {
      "epoch": 3.6063706525794856,
      "grad_norm": 0.5551321506500244,
      "learning_rate": 2.8056245593458225e-06,
      "loss": 0.0154,
      "step": 2203680
    },
    {
      "epoch": 3.6064033830181392,
      "grad_norm": 0.26746025681495667,
      "learning_rate": 2.8055586671323052e-06,
      "loss": 0.0142,
      "step": 2203700
    },
    {
      "epoch": 3.606436113456793,
      "grad_norm": 0.1315467357635498,
      "learning_rate": 2.8054927749187884e-06,
      "loss": 0.0109,
      "step": 2203720
    },
    {
      "epoch": 3.606468843895446,
      "grad_norm": 0.3198954164981842,
      "learning_rate": 2.805426882705271e-06,
      "loss": 0.0086,
      "step": 2203740
    },
    {
      "epoch": 3.606501574334099,
      "grad_norm": 0.5550175905227661,
      "learning_rate": 2.805360990491754e-06,
      "loss": 0.0123,
      "step": 2203760
    },
    {
      "epoch": 3.6065343047727527,
      "grad_norm": 0.11140944808721542,
      "learning_rate": 2.8052950982782366e-06,
      "loss": 0.007,
      "step": 2203780
    },
    {
      "epoch": 3.606567035211406,
      "grad_norm": 0.8251681923866272,
      "learning_rate": 2.8052292060647198e-06,
      "loss": 0.0146,
      "step": 2203800
    },
    {
      "epoch": 3.606599765650059,
      "grad_norm": 0.964208722114563,
      "learning_rate": 2.8051633138512025e-06,
      "loss": 0.0137,
      "step": 2203820
    },
    {
      "epoch": 3.6066324960887126,
      "grad_norm": 0.20019596815109253,
      "learning_rate": 2.8050974216376853e-06,
      "loss": 0.0126,
      "step": 2203840
    },
    {
      "epoch": 3.6066652265273658,
      "grad_norm": 0.3193090260028839,
      "learning_rate": 2.805031529424168e-06,
      "loss": 0.0115,
      "step": 2203860
    },
    {
      "epoch": 3.6066979569660194,
      "grad_norm": 0.5777187347412109,
      "learning_rate": 2.804965637210651e-06,
      "loss": 0.01,
      "step": 2203880
    },
    {
      "epoch": 3.6067306874046725,
      "grad_norm": 0.3072018325328827,
      "learning_rate": 2.804899744997134e-06,
      "loss": 0.0117,
      "step": 2203900
    },
    {
      "epoch": 3.606763417843326,
      "grad_norm": 0.35264524817466736,
      "learning_rate": 2.8048338527836166e-06,
      "loss": 0.0112,
      "step": 2203920
    },
    {
      "epoch": 3.6067961482819793,
      "grad_norm": 0.19640401005744934,
      "learning_rate": 2.8047679605700994e-06,
      "loss": 0.0096,
      "step": 2203940
    },
    {
      "epoch": 3.6068288787206324,
      "grad_norm": 0.13420668244361877,
      "learning_rate": 2.804702068356582e-06,
      "loss": 0.008,
      "step": 2203960
    },
    {
      "epoch": 3.606861609159286,
      "grad_norm": 0.5024933815002441,
      "learning_rate": 2.8046361761430653e-06,
      "loss": 0.0123,
      "step": 2203980
    },
    {
      "epoch": 3.606894339597939,
      "grad_norm": 0.18235816061496735,
      "learning_rate": 2.804570283929548e-06,
      "loss": 0.0103,
      "step": 2204000
    },
    {
      "epoch": 3.6069270700365927,
      "grad_norm": 0.30757835507392883,
      "learning_rate": 2.804504391716031e-06,
      "loss": 0.0128,
      "step": 2204020
    },
    {
      "epoch": 3.606959800475246,
      "grad_norm": 0.13824297487735748,
      "learning_rate": 2.8044384995025144e-06,
      "loss": 0.0142,
      "step": 2204040
    },
    {
      "epoch": 3.6069925309138995,
      "grad_norm": 0.22209720313549042,
      "learning_rate": 2.804372607288997e-06,
      "loss": 0.0145,
      "step": 2204060
    },
    {
      "epoch": 3.6070252613525526,
      "grad_norm": 0.8429542183876038,
      "learning_rate": 2.80430671507548e-06,
      "loss": 0.0122,
      "step": 2204080
    },
    {
      "epoch": 3.607057991791206,
      "grad_norm": 0.16552047431468964,
      "learning_rate": 2.8042408228619626e-06,
      "loss": 0.008,
      "step": 2204100
    },
    {
      "epoch": 3.6070907222298594,
      "grad_norm": 0.1650400459766388,
      "learning_rate": 2.8041749306484457e-06,
      "loss": 0.0122,
      "step": 2204120
    },
    {
      "epoch": 3.6071234526685125,
      "grad_norm": 0.16496844589710236,
      "learning_rate": 2.8041090384349285e-06,
      "loss": 0.0101,
      "step": 2204140
    },
    {
      "epoch": 3.607156183107166,
      "grad_norm": 0.13783246278762817,
      "learning_rate": 2.8040431462214112e-06,
      "loss": 0.0084,
      "step": 2204160
    },
    {
      "epoch": 3.6071889135458193,
      "grad_norm": 0.1899859607219696,
      "learning_rate": 2.803977254007894e-06,
      "loss": 0.0093,
      "step": 2204180
    },
    {
      "epoch": 3.607221643984473,
      "grad_norm": 0.2161647528409958,
      "learning_rate": 2.803911361794377e-06,
      "loss": 0.0149,
      "step": 2204200
    },
    {
      "epoch": 3.607254374423126,
      "grad_norm": 0.2829092741012573,
      "learning_rate": 2.80384546958086e-06,
      "loss": 0.0106,
      "step": 2204220
    },
    {
      "epoch": 3.607287104861779,
      "grad_norm": 0.2952424883842468,
      "learning_rate": 2.8037795773673426e-06,
      "loss": 0.0122,
      "step": 2204240
    },
    {
      "epoch": 3.6073198353004328,
      "grad_norm": 0.3850443661212921,
      "learning_rate": 2.8037136851538253e-06,
      "loss": 0.0138,
      "step": 2204260
    },
    {
      "epoch": 3.607352565739086,
      "grad_norm": 0.8639363050460815,
      "learning_rate": 2.8036477929403085e-06,
      "loss": 0.0102,
      "step": 2204280
    },
    {
      "epoch": 3.6073852961777395,
      "grad_norm": 0.17413286864757538,
      "learning_rate": 2.8035819007267912e-06,
      "loss": 0.0106,
      "step": 2204300
    },
    {
      "epoch": 3.6074180266163927,
      "grad_norm": 0.19834545254707336,
      "learning_rate": 2.803516008513274e-06,
      "loss": 0.0095,
      "step": 2204320
    },
    {
      "epoch": 3.6074507570550463,
      "grad_norm": 0.670993447303772,
      "learning_rate": 2.8034501162997567e-06,
      "loss": 0.0163,
      "step": 2204340
    },
    {
      "epoch": 3.6074834874936994,
      "grad_norm": 0.23926672339439392,
      "learning_rate": 2.8033842240862403e-06,
      "loss": 0.0124,
      "step": 2204360
    },
    {
      "epoch": 3.6075162179323526,
      "grad_norm": 0.1187613308429718,
      "learning_rate": 2.803318331872723e-06,
      "loss": 0.0135,
      "step": 2204380
    },
    {
      "epoch": 3.607548948371006,
      "grad_norm": 0.2531068027019501,
      "learning_rate": 2.803252439659206e-06,
      "loss": 0.0086,
      "step": 2204400
    },
    {
      "epoch": 3.6075816788096593,
      "grad_norm": 0.2138054519891739,
      "learning_rate": 2.803186547445689e-06,
      "loss": 0.0106,
      "step": 2204420
    },
    {
      "epoch": 3.607614409248313,
      "grad_norm": 0.13783913850784302,
      "learning_rate": 2.8031206552321717e-06,
      "loss": 0.0133,
      "step": 2204440
    },
    {
      "epoch": 3.607647139686966,
      "grad_norm": 0.2055606096982956,
      "learning_rate": 2.8030547630186544e-06,
      "loss": 0.0084,
      "step": 2204460
    },
    {
      "epoch": 3.6076798701256196,
      "grad_norm": 0.42722073197364807,
      "learning_rate": 2.802988870805137e-06,
      "loss": 0.0103,
      "step": 2204480
    },
    {
      "epoch": 3.607712600564273,
      "grad_norm": 0.15506689250469208,
      "learning_rate": 2.80292297859162e-06,
      "loss": 0.0166,
      "step": 2204500
    },
    {
      "epoch": 3.607745331002926,
      "grad_norm": 0.11720669269561768,
      "learning_rate": 2.802857086378103e-06,
      "loss": 0.0094,
      "step": 2204520
    },
    {
      "epoch": 3.6077780614415795,
      "grad_norm": 0.26734593510627747,
      "learning_rate": 2.802791194164586e-06,
      "loss": 0.01,
      "step": 2204540
    },
    {
      "epoch": 3.6078107918802327,
      "grad_norm": 0.222816601395607,
      "learning_rate": 2.8027253019510686e-06,
      "loss": 0.0111,
      "step": 2204560
    },
    {
      "epoch": 3.6078435223188863,
      "grad_norm": 0.3486121892929077,
      "learning_rate": 2.8026594097375513e-06,
      "loss": 0.0124,
      "step": 2204580
    },
    {
      "epoch": 3.6078762527575394,
      "grad_norm": 0.5023618936538696,
      "learning_rate": 2.8025935175240345e-06,
      "loss": 0.01,
      "step": 2204600
    },
    {
      "epoch": 3.607908983196193,
      "grad_norm": 0.20198436081409454,
      "learning_rate": 2.802527625310517e-06,
      "loss": 0.0131,
      "step": 2204620
    },
    {
      "epoch": 3.607941713634846,
      "grad_norm": 0.3934405446052551,
      "learning_rate": 2.802461733097e-06,
      "loss": 0.009,
      "step": 2204640
    },
    {
      "epoch": 3.6079744440734993,
      "grad_norm": 0.4848816394805908,
      "learning_rate": 2.8023958408834827e-06,
      "loss": 0.0075,
      "step": 2204660
    },
    {
      "epoch": 3.608007174512153,
      "grad_norm": 0.8863440155982971,
      "learning_rate": 2.802329948669966e-06,
      "loss": 0.0127,
      "step": 2204680
    },
    {
      "epoch": 3.608039904950806,
      "grad_norm": 0.3534793555736542,
      "learning_rate": 2.8022640564564486e-06,
      "loss": 0.013,
      "step": 2204700
    },
    {
      "epoch": 3.608072635389459,
      "grad_norm": 0.37524011731147766,
      "learning_rate": 2.8021981642429317e-06,
      "loss": 0.0075,
      "step": 2204720
    },
    {
      "epoch": 3.608105365828113,
      "grad_norm": 0.4160020351409912,
      "learning_rate": 2.802132272029415e-06,
      "loss": 0.0147,
      "step": 2204740
    },
    {
      "epoch": 3.6081380962667664,
      "grad_norm": 0.3618796467781067,
      "learning_rate": 2.8020663798158976e-06,
      "loss": 0.0108,
      "step": 2204760
    },
    {
      "epoch": 3.6081708267054196,
      "grad_norm": 0.3142634928226471,
      "learning_rate": 2.8020004876023804e-06,
      "loss": 0.0112,
      "step": 2204780
    },
    {
      "epoch": 3.6082035571440727,
      "grad_norm": 0.5637558102607727,
      "learning_rate": 2.801934595388863e-06,
      "loss": 0.011,
      "step": 2204800
    },
    {
      "epoch": 3.6082362875827263,
      "grad_norm": 0.5449420809745789,
      "learning_rate": 2.8018687031753463e-06,
      "loss": 0.0091,
      "step": 2204820
    },
    {
      "epoch": 3.6082690180213794,
      "grad_norm": 0.1986197531223297,
      "learning_rate": 2.801802810961829e-06,
      "loss": 0.0086,
      "step": 2204840
    },
    {
      "epoch": 3.6083017484600326,
      "grad_norm": 0.18556755781173706,
      "learning_rate": 2.8017369187483118e-06,
      "loss": 0.0076,
      "step": 2204860
    },
    {
      "epoch": 3.608334478898686,
      "grad_norm": 0.5258834958076477,
      "learning_rate": 2.8016710265347945e-06,
      "loss": 0.008,
      "step": 2204880
    },
    {
      "epoch": 3.60836720933734,
      "grad_norm": 0.22695805132389069,
      "learning_rate": 2.8016051343212772e-06,
      "loss": 0.0114,
      "step": 2204900
    },
    {
      "epoch": 3.608399939775993,
      "grad_norm": 0.25262579321861267,
      "learning_rate": 2.8015392421077604e-06,
      "loss": 0.0146,
      "step": 2204920
    },
    {
      "epoch": 3.608432670214646,
      "grad_norm": 0.2539439797401428,
      "learning_rate": 2.801473349894243e-06,
      "loss": 0.0148,
      "step": 2204940
    },
    {
      "epoch": 3.6084654006532997,
      "grad_norm": 0.36284753680229187,
      "learning_rate": 2.801407457680726e-06,
      "loss": 0.0113,
      "step": 2204960
    },
    {
      "epoch": 3.608498131091953,
      "grad_norm": 0.3492213189601898,
      "learning_rate": 2.8013415654672086e-06,
      "loss": 0.0122,
      "step": 2204980
    },
    {
      "epoch": 3.608530861530606,
      "grad_norm": 0.23120661079883575,
      "learning_rate": 2.801275673253692e-06,
      "loss": 0.0123,
      "step": 2205000
    },
    {
      "epoch": 3.6085635919692596,
      "grad_norm": 0.07140323519706726,
      "learning_rate": 2.8012097810401745e-06,
      "loss": 0.0082,
      "step": 2205020
    },
    {
      "epoch": 3.608596322407913,
      "grad_norm": 0.20518940687179565,
      "learning_rate": 2.8011438888266573e-06,
      "loss": 0.0116,
      "step": 2205040
    },
    {
      "epoch": 3.6086290528465663,
      "grad_norm": 0.3081376552581787,
      "learning_rate": 2.801077996613141e-06,
      "loss": 0.0106,
      "step": 2205060
    },
    {
      "epoch": 3.6086617832852195,
      "grad_norm": 0.3102659583091736,
      "learning_rate": 2.8010121043996236e-06,
      "loss": 0.0148,
      "step": 2205080
    },
    {
      "epoch": 3.608694513723873,
      "grad_norm": 0.0862734466791153,
      "learning_rate": 2.8009462121861063e-06,
      "loss": 0.01,
      "step": 2205100
    },
    {
      "epoch": 3.608727244162526,
      "grad_norm": 0.2548067271709442,
      "learning_rate": 2.800880319972589e-06,
      "loss": 0.0106,
      "step": 2205120
    },
    {
      "epoch": 3.6087599746011794,
      "grad_norm": 0.23265884816646576,
      "learning_rate": 2.8008144277590722e-06,
      "loss": 0.0062,
      "step": 2205140
    },
    {
      "epoch": 3.608792705039833,
      "grad_norm": 0.3013826310634613,
      "learning_rate": 2.800748535545555e-06,
      "loss": 0.0131,
      "step": 2205160
    },
    {
      "epoch": 3.6088254354784866,
      "grad_norm": 0.09152229130268097,
      "learning_rate": 2.8006826433320377e-06,
      "loss": 0.0118,
      "step": 2205180
    },
    {
      "epoch": 3.6088581659171397,
      "grad_norm": 0.08811858296394348,
      "learning_rate": 2.8006167511185205e-06,
      "loss": 0.0097,
      "step": 2205200
    },
    {
      "epoch": 3.608890896355793,
      "grad_norm": 0.45730069279670715,
      "learning_rate": 2.8005508589050036e-06,
      "loss": 0.0117,
      "step": 2205220
    },
    {
      "epoch": 3.6089236267944464,
      "grad_norm": 0.18785686790943146,
      "learning_rate": 2.8004849666914864e-06,
      "loss": 0.0124,
      "step": 2205240
    },
    {
      "epoch": 3.6089563572330996,
      "grad_norm": 0.15673887729644775,
      "learning_rate": 2.800419074477969e-06,
      "loss": 0.0089,
      "step": 2205260
    },
    {
      "epoch": 3.6089890876717527,
      "grad_norm": 0.11650865525007248,
      "learning_rate": 2.800353182264452e-06,
      "loss": 0.013,
      "step": 2205280
    },
    {
      "epoch": 3.6090218181104063,
      "grad_norm": 0.28022152185440063,
      "learning_rate": 2.800287290050935e-06,
      "loss": 0.0115,
      "step": 2205300
    },
    {
      "epoch": 3.6090545485490595,
      "grad_norm": 0.17648272216320038,
      "learning_rate": 2.8002213978374177e-06,
      "loss": 0.0074,
      "step": 2205320
    },
    {
      "epoch": 3.609087278987713,
      "grad_norm": 0.25608766078948975,
      "learning_rate": 2.8001555056239005e-06,
      "loss": 0.0072,
      "step": 2205340
    },
    {
      "epoch": 3.6091200094263662,
      "grad_norm": 0.3710591495037079,
      "learning_rate": 2.8000896134103832e-06,
      "loss": 0.0093,
      "step": 2205360
    },
    {
      "epoch": 3.60915273986502,
      "grad_norm": 0.12073492258787155,
      "learning_rate": 2.800023721196866e-06,
      "loss": 0.0079,
      "step": 2205380
    },
    {
      "epoch": 3.609185470303673,
      "grad_norm": 0.22459770739078522,
      "learning_rate": 2.799957828983349e-06,
      "loss": 0.0077,
      "step": 2205400
    },
    {
      "epoch": 3.609218200742326,
      "grad_norm": 0.08319900929927826,
      "learning_rate": 2.7998919367698323e-06,
      "loss": 0.0113,
      "step": 2205420
    },
    {
      "epoch": 3.6092509311809797,
      "grad_norm": 0.4226226210594177,
      "learning_rate": 2.799826044556315e-06,
      "loss": 0.0089,
      "step": 2205440
    },
    {
      "epoch": 3.609283661619633,
      "grad_norm": 0.30574142932891846,
      "learning_rate": 2.799760152342798e-06,
      "loss": 0.0105,
      "step": 2205460
    },
    {
      "epoch": 3.6093163920582865,
      "grad_norm": 0.022587036713957787,
      "learning_rate": 2.799694260129281e-06,
      "loss": 0.009,
      "step": 2205480
    },
    {
      "epoch": 3.6093491224969396,
      "grad_norm": 0.3424597680568695,
      "learning_rate": 2.7996283679157637e-06,
      "loss": 0.009,
      "step": 2205500
    },
    {
      "epoch": 3.609381852935593,
      "grad_norm": 0.19671638309955597,
      "learning_rate": 2.7995624757022464e-06,
      "loss": 0.0123,
      "step": 2205520
    },
    {
      "epoch": 3.6094145833742464,
      "grad_norm": 0.37539008259773254,
      "learning_rate": 2.7994965834887296e-06,
      "loss": 0.0147,
      "step": 2205540
    },
    {
      "epoch": 3.6094473138128995,
      "grad_norm": 1.792995572090149,
      "learning_rate": 2.7994306912752123e-06,
      "loss": 0.0095,
      "step": 2205560
    },
    {
      "epoch": 3.609480044251553,
      "grad_norm": 0.09424380213022232,
      "learning_rate": 2.799364799061695e-06,
      "loss": 0.0085,
      "step": 2205580
    },
    {
      "epoch": 3.6095127746902063,
      "grad_norm": 0.1882023811340332,
      "learning_rate": 2.799298906848178e-06,
      "loss": 0.0107,
      "step": 2205600
    },
    {
      "epoch": 3.60954550512886,
      "grad_norm": 0.04873858764767647,
      "learning_rate": 2.799233014634661e-06,
      "loss": 0.0117,
      "step": 2205620
    },
    {
      "epoch": 3.609578235567513,
      "grad_norm": 0.105844646692276,
      "learning_rate": 2.7991671224211437e-06,
      "loss": 0.0152,
      "step": 2205640
    },
    {
      "epoch": 3.6096109660061666,
      "grad_norm": 0.08675798028707504,
      "learning_rate": 2.7991012302076264e-06,
      "loss": 0.0084,
      "step": 2205660
    },
    {
      "epoch": 3.6096436964448197,
      "grad_norm": 0.17386911809444427,
      "learning_rate": 2.799035337994109e-06,
      "loss": 0.0103,
      "step": 2205680
    },
    {
      "epoch": 3.609676426883473,
      "grad_norm": 0.425647109746933,
      "learning_rate": 2.7989694457805923e-06,
      "loss": 0.0157,
      "step": 2205700
    },
    {
      "epoch": 3.6097091573221265,
      "grad_norm": 0.1832340657711029,
      "learning_rate": 2.798903553567075e-06,
      "loss": 0.0112,
      "step": 2205720
    },
    {
      "epoch": 3.6097418877607796,
      "grad_norm": 0.07748729735612869,
      "learning_rate": 2.798837661353558e-06,
      "loss": 0.0084,
      "step": 2205740
    },
    {
      "epoch": 3.6097746181994332,
      "grad_norm": 0.508176326751709,
      "learning_rate": 2.7987717691400406e-06,
      "loss": 0.0165,
      "step": 2205760
    },
    {
      "epoch": 3.6098073486380864,
      "grad_norm": 0.2744464874267578,
      "learning_rate": 2.798705876926524e-06,
      "loss": 0.0178,
      "step": 2205780
    },
    {
      "epoch": 3.60984007907674,
      "grad_norm": 0.11849936097860336,
      "learning_rate": 2.798639984713007e-06,
      "loss": 0.0156,
      "step": 2205800
    },
    {
      "epoch": 3.609872809515393,
      "grad_norm": 0.19536080956459045,
      "learning_rate": 2.7985740924994896e-06,
      "loss": 0.0103,
      "step": 2205820
    },
    {
      "epoch": 3.6099055399540463,
      "grad_norm": 0.24742838740348816,
      "learning_rate": 2.798508200285973e-06,
      "loss": 0.0079,
      "step": 2205840
    },
    {
      "epoch": 3.6099382703927,
      "grad_norm": 0.12190605700016022,
      "learning_rate": 2.7984423080724555e-06,
      "loss": 0.0078,
      "step": 2205860
    },
    {
      "epoch": 3.609971000831353,
      "grad_norm": 0.6811469197273254,
      "learning_rate": 2.7983764158589383e-06,
      "loss": 0.0134,
      "step": 2205880
    },
    {
      "epoch": 3.6100037312700066,
      "grad_norm": 0.059576183557510376,
      "learning_rate": 2.798310523645421e-06,
      "loss": 0.0082,
      "step": 2205900
    },
    {
      "epoch": 3.6100364617086598,
      "grad_norm": 0.11949807405471802,
      "learning_rate": 2.7982446314319038e-06,
      "loss": 0.0104,
      "step": 2205920
    },
    {
      "epoch": 3.6100691921473134,
      "grad_norm": 0.26969113945961,
      "learning_rate": 2.798178739218387e-06,
      "loss": 0.0121,
      "step": 2205940
    },
    {
      "epoch": 3.6101019225859665,
      "grad_norm": 0.5258713960647583,
      "learning_rate": 2.7981128470048697e-06,
      "loss": 0.0115,
      "step": 2205960
    },
    {
      "epoch": 3.6101346530246197,
      "grad_norm": 0.5317054986953735,
      "learning_rate": 2.7980469547913524e-06,
      "loss": 0.0123,
      "step": 2205980
    },
    {
      "epoch": 3.6101673834632733,
      "grad_norm": 0.4905807077884674,
      "learning_rate": 2.797981062577835e-06,
      "loss": 0.0135,
      "step": 2206000
    },
    {
      "epoch": 3.6102001139019264,
      "grad_norm": 0.6005521416664124,
      "learning_rate": 2.7979151703643183e-06,
      "loss": 0.0136,
      "step": 2206020
    },
    {
      "epoch": 3.61023284434058,
      "grad_norm": 0.29358723759651184,
      "learning_rate": 2.797849278150801e-06,
      "loss": 0.01,
      "step": 2206040
    },
    {
      "epoch": 3.610265574779233,
      "grad_norm": 0.17496813833713531,
      "learning_rate": 2.7977833859372838e-06,
      "loss": 0.0134,
      "step": 2206060
    },
    {
      "epoch": 3.6102983052178867,
      "grad_norm": 0.3188207447528839,
      "learning_rate": 2.7977174937237665e-06,
      "loss": 0.0087,
      "step": 2206080
    },
    {
      "epoch": 3.61033103565654,
      "grad_norm": 0.07280072569847107,
      "learning_rate": 2.7976516015102497e-06,
      "loss": 0.0118,
      "step": 2206100
    },
    {
      "epoch": 3.610363766095193,
      "grad_norm": 0.5487276911735535,
      "learning_rate": 2.797585709296733e-06,
      "loss": 0.01,
      "step": 2206120
    },
    {
      "epoch": 3.6103964965338466,
      "grad_norm": 0.29941055178642273,
      "learning_rate": 2.7975198170832156e-06,
      "loss": 0.0103,
      "step": 2206140
    },
    {
      "epoch": 3.6104292269725,
      "grad_norm": 0.24877099692821503,
      "learning_rate": 2.7974539248696987e-06,
      "loss": 0.0063,
      "step": 2206160
    },
    {
      "epoch": 3.6104619574111534,
      "grad_norm": 0.3327077329158783,
      "learning_rate": 2.7973880326561815e-06,
      "loss": 0.0109,
      "step": 2206180
    },
    {
      "epoch": 3.6104946878498065,
      "grad_norm": 0.10924558341503143,
      "learning_rate": 2.7973221404426642e-06,
      "loss": 0.0115,
      "step": 2206200
    },
    {
      "epoch": 3.61052741828846,
      "grad_norm": 0.22558237612247467,
      "learning_rate": 2.797256248229147e-06,
      "loss": 0.0116,
      "step": 2206220
    },
    {
      "epoch": 3.6105601487271133,
      "grad_norm": 0.10224393755197525,
      "learning_rate": 2.79719035601563e-06,
      "loss": 0.0114,
      "step": 2206240
    },
    {
      "epoch": 3.6105928791657664,
      "grad_norm": 0.4636445641517639,
      "learning_rate": 2.797124463802113e-06,
      "loss": 0.0086,
      "step": 2206260
    },
    {
      "epoch": 3.61062560960442,
      "grad_norm": 0.11056564748287201,
      "learning_rate": 2.7970585715885956e-06,
      "loss": 0.011,
      "step": 2206280
    },
    {
      "epoch": 3.610658340043073,
      "grad_norm": 0.29275617003440857,
      "learning_rate": 2.7969926793750783e-06,
      "loss": 0.006,
      "step": 2206300
    },
    {
      "epoch": 3.6106910704817263,
      "grad_norm": 0.2717682421207428,
      "learning_rate": 2.796926787161561e-06,
      "loss": 0.0086,
      "step": 2206320
    },
    {
      "epoch": 3.61072380092038,
      "grad_norm": 0.23216189444065094,
      "learning_rate": 2.7968608949480443e-06,
      "loss": 0.012,
      "step": 2206340
    },
    {
      "epoch": 3.6107565313590335,
      "grad_norm": 0.1948973387479782,
      "learning_rate": 2.796795002734527e-06,
      "loss": 0.0099,
      "step": 2206360
    },
    {
      "epoch": 3.6107892617976867,
      "grad_norm": 0.12138942629098892,
      "learning_rate": 2.7967291105210097e-06,
      "loss": 0.0119,
      "step": 2206380
    },
    {
      "epoch": 3.61082199223634,
      "grad_norm": 0.10771115869283676,
      "learning_rate": 2.7966632183074925e-06,
      "loss": 0.0082,
      "step": 2206400
    },
    {
      "epoch": 3.6108547226749934,
      "grad_norm": 0.14365197718143463,
      "learning_rate": 2.7965973260939756e-06,
      "loss": 0.009,
      "step": 2206420
    },
    {
      "epoch": 3.6108874531136466,
      "grad_norm": 0.2545904219150543,
      "learning_rate": 2.7965314338804584e-06,
      "loss": 0.0138,
      "step": 2206440
    },
    {
      "epoch": 3.6109201835522997,
      "grad_norm": 0.2350357174873352,
      "learning_rate": 2.796465541666941e-06,
      "loss": 0.0142,
      "step": 2206460
    },
    {
      "epoch": 3.6109529139909533,
      "grad_norm": 0.26745039224624634,
      "learning_rate": 2.7963996494534247e-06,
      "loss": 0.0129,
      "step": 2206480
    },
    {
      "epoch": 3.610985644429607,
      "grad_norm": 0.48900356888771057,
      "learning_rate": 2.7963337572399074e-06,
      "loss": 0.0137,
      "step": 2206500
    },
    {
      "epoch": 3.61101837486826,
      "grad_norm": 0.28416508436203003,
      "learning_rate": 2.79626786502639e-06,
      "loss": 0.011,
      "step": 2206520
    },
    {
      "epoch": 3.611051105306913,
      "grad_norm": 0.838148295879364,
      "learning_rate": 2.796201972812873e-06,
      "loss": 0.0095,
      "step": 2206540
    },
    {
      "epoch": 3.611083835745567,
      "grad_norm": 0.246151864528656,
      "learning_rate": 2.796136080599356e-06,
      "loss": 0.0109,
      "step": 2206560
    },
    {
      "epoch": 3.61111656618422,
      "grad_norm": 0.35218724608421326,
      "learning_rate": 2.796070188385839e-06,
      "loss": 0.0135,
      "step": 2206580
    },
    {
      "epoch": 3.611149296622873,
      "grad_norm": 0.35581520199775696,
      "learning_rate": 2.7960042961723216e-06,
      "loss": 0.008,
      "step": 2206600
    },
    {
      "epoch": 3.6111820270615267,
      "grad_norm": 0.3011264204978943,
      "learning_rate": 2.7959384039588043e-06,
      "loss": 0.0106,
      "step": 2206620
    },
    {
      "epoch": 3.6112147575001803,
      "grad_norm": 0.39008310437202454,
      "learning_rate": 2.7958725117452875e-06,
      "loss": 0.0139,
      "step": 2206640
    },
    {
      "epoch": 3.6112474879388334,
      "grad_norm": 0.13840574026107788,
      "learning_rate": 2.79580661953177e-06,
      "loss": 0.0088,
      "step": 2206660
    },
    {
      "epoch": 3.6112802183774866,
      "grad_norm": 0.06258970499038696,
      "learning_rate": 2.795740727318253e-06,
      "loss": 0.009,
      "step": 2206680
    },
    {
      "epoch": 3.61131294881614,
      "grad_norm": 0.32173243165016174,
      "learning_rate": 2.7956748351047357e-06,
      "loss": 0.0098,
      "step": 2206700
    },
    {
      "epoch": 3.6113456792547933,
      "grad_norm": 0.20500855147838593,
      "learning_rate": 2.795608942891219e-06,
      "loss": 0.022,
      "step": 2206720
    },
    {
      "epoch": 3.6113784096934465,
      "grad_norm": 0.41198161244392395,
      "learning_rate": 2.7955430506777016e-06,
      "loss": 0.012,
      "step": 2206740
    },
    {
      "epoch": 3.6114111401321,
      "grad_norm": 0.11492336541414261,
      "learning_rate": 2.7954771584641843e-06,
      "loss": 0.0146,
      "step": 2206760
    },
    {
      "epoch": 3.6114438705707532,
      "grad_norm": 0.14733505249023438,
      "learning_rate": 2.795411266250667e-06,
      "loss": 0.0115,
      "step": 2206780
    },
    {
      "epoch": 3.611476601009407,
      "grad_norm": 0.45316633582115173,
      "learning_rate": 2.79534537403715e-06,
      "loss": 0.0124,
      "step": 2206800
    },
    {
      "epoch": 3.61150933144806,
      "grad_norm": 0.10555519163608551,
      "learning_rate": 2.7952794818236334e-06,
      "loss": 0.0111,
      "step": 2206820
    },
    {
      "epoch": 3.6115420618867136,
      "grad_norm": 0.3738802373409271,
      "learning_rate": 2.795213589610116e-06,
      "loss": 0.0121,
      "step": 2206840
    },
    {
      "epoch": 3.6115747923253667,
      "grad_norm": 0.23784539103507996,
      "learning_rate": 2.795147697396599e-06,
      "loss": 0.0073,
      "step": 2206860
    },
    {
      "epoch": 3.61160752276402,
      "grad_norm": 0.9307608008384705,
      "learning_rate": 2.795081805183082e-06,
      "loss": 0.0184,
      "step": 2206880
    },
    {
      "epoch": 3.6116402532026735,
      "grad_norm": 0.31459006667137146,
      "learning_rate": 2.7950159129695648e-06,
      "loss": 0.0124,
      "step": 2206900
    },
    {
      "epoch": 3.6116729836413266,
      "grad_norm": 0.4151608347892761,
      "learning_rate": 2.7949500207560475e-06,
      "loss": 0.01,
      "step": 2206920
    },
    {
      "epoch": 3.61170571407998,
      "grad_norm": 0.49833160638809204,
      "learning_rate": 2.7948841285425303e-06,
      "loss": 0.0105,
      "step": 2206940
    },
    {
      "epoch": 3.6117384445186334,
      "grad_norm": 0.1815599501132965,
      "learning_rate": 2.7948182363290134e-06,
      "loss": 0.01,
      "step": 2206960
    },
    {
      "epoch": 3.611771174957287,
      "grad_norm": 0.28077366948127747,
      "learning_rate": 2.794752344115496e-06,
      "loss": 0.0071,
      "step": 2206980
    },
    {
      "epoch": 3.61180390539594,
      "grad_norm": 0.17120347917079926,
      "learning_rate": 2.794686451901979e-06,
      "loss": 0.0111,
      "step": 2207000
    },
    {
      "epoch": 3.6118366358345932,
      "grad_norm": 0.3323408365249634,
      "learning_rate": 2.7946205596884616e-06,
      "loss": 0.0096,
      "step": 2207020
    },
    {
      "epoch": 3.611869366273247,
      "grad_norm": 0.29913219809532166,
      "learning_rate": 2.794554667474945e-06,
      "loss": 0.0141,
      "step": 2207040
    },
    {
      "epoch": 3.6119020967119,
      "grad_norm": 0.23158666491508484,
      "learning_rate": 2.7944887752614275e-06,
      "loss": 0.0105,
      "step": 2207060
    },
    {
      "epoch": 3.6119348271505536,
      "grad_norm": 0.13384029269218445,
      "learning_rate": 2.7944228830479103e-06,
      "loss": 0.0108,
      "step": 2207080
    },
    {
      "epoch": 3.6119675575892067,
      "grad_norm": 0.35427358746528625,
      "learning_rate": 2.794356990834393e-06,
      "loss": 0.0116,
      "step": 2207100
    },
    {
      "epoch": 3.6120002880278603,
      "grad_norm": 0.25798866152763367,
      "learning_rate": 2.794291098620876e-06,
      "loss": 0.0096,
      "step": 2207120
    },
    {
      "epoch": 3.6120330184665135,
      "grad_norm": 0.42794713377952576,
      "learning_rate": 2.794225206407359e-06,
      "loss": 0.0098,
      "step": 2207140
    },
    {
      "epoch": 3.6120657489051666,
      "grad_norm": 0.39976564049720764,
      "learning_rate": 2.7941593141938417e-06,
      "loss": 0.0101,
      "step": 2207160
    },
    {
      "epoch": 3.6120984793438202,
      "grad_norm": 0.2761843800544739,
      "learning_rate": 2.7940934219803253e-06,
      "loss": 0.0093,
      "step": 2207180
    },
    {
      "epoch": 3.6121312097824734,
      "grad_norm": 0.18082986772060394,
      "learning_rate": 2.794027529766808e-06,
      "loss": 0.0098,
      "step": 2207200
    },
    {
      "epoch": 3.612163940221127,
      "grad_norm": 0.36504942178726196,
      "learning_rate": 2.7939616375532907e-06,
      "loss": 0.0132,
      "step": 2207220
    },
    {
      "epoch": 3.61219667065978,
      "grad_norm": 0.24746698141098022,
      "learning_rate": 2.7938957453397735e-06,
      "loss": 0.0096,
      "step": 2207240
    },
    {
      "epoch": 3.6122294010984337,
      "grad_norm": 0.19401158392429352,
      "learning_rate": 2.7938298531262566e-06,
      "loss": 0.0152,
      "step": 2207260
    },
    {
      "epoch": 3.612262131537087,
      "grad_norm": 0.24604278802871704,
      "learning_rate": 2.7937639609127394e-06,
      "loss": 0.0158,
      "step": 2207280
    },
    {
      "epoch": 3.61229486197574,
      "grad_norm": 0.1317249983549118,
      "learning_rate": 2.793698068699222e-06,
      "loss": 0.0068,
      "step": 2207300
    },
    {
      "epoch": 3.6123275924143936,
      "grad_norm": 0.1525644212961197,
      "learning_rate": 2.793632176485705e-06,
      "loss": 0.0119,
      "step": 2207320
    },
    {
      "epoch": 3.6123603228530468,
      "grad_norm": 0.25276944041252136,
      "learning_rate": 2.7935662842721876e-06,
      "loss": 0.0078,
      "step": 2207340
    },
    {
      "epoch": 3.6123930532917004,
      "grad_norm": 0.04182933643460274,
      "learning_rate": 2.7935003920586708e-06,
      "loss": 0.0167,
      "step": 2207360
    },
    {
      "epoch": 3.6124257837303535,
      "grad_norm": 0.16258449852466583,
      "learning_rate": 2.7934344998451535e-06,
      "loss": 0.0142,
      "step": 2207380
    },
    {
      "epoch": 3.612458514169007,
      "grad_norm": 0.16046102344989777,
      "learning_rate": 2.7933686076316362e-06,
      "loss": 0.0105,
      "step": 2207400
    },
    {
      "epoch": 3.6124912446076602,
      "grad_norm": 0.06443406641483307,
      "learning_rate": 2.793302715418119e-06,
      "loss": 0.0077,
      "step": 2207420
    },
    {
      "epoch": 3.6125239750463134,
      "grad_norm": 0.09296275675296783,
      "learning_rate": 2.793236823204602e-06,
      "loss": 0.0178,
      "step": 2207440
    },
    {
      "epoch": 3.612556705484967,
      "grad_norm": 0.21873103082180023,
      "learning_rate": 2.793170930991085e-06,
      "loss": 0.0174,
      "step": 2207460
    },
    {
      "epoch": 3.61258943592362,
      "grad_norm": 0.5454192161560059,
      "learning_rate": 2.7931050387775676e-06,
      "loss": 0.0113,
      "step": 2207480
    },
    {
      "epoch": 3.6126221663622737,
      "grad_norm": 0.36474159359931946,
      "learning_rate": 2.7930391465640504e-06,
      "loss": 0.0118,
      "step": 2207500
    },
    {
      "epoch": 3.612654896800927,
      "grad_norm": 0.475828617811203,
      "learning_rate": 2.792973254350534e-06,
      "loss": 0.0151,
      "step": 2207520
    },
    {
      "epoch": 3.6126876272395805,
      "grad_norm": 0.7578442096710205,
      "learning_rate": 2.7929073621370167e-06,
      "loss": 0.0074,
      "step": 2207540
    },
    {
      "epoch": 3.6127203576782336,
      "grad_norm": 0.3502238392829895,
      "learning_rate": 2.7928414699234994e-06,
      "loss": 0.008,
      "step": 2207560
    },
    {
      "epoch": 3.612753088116887,
      "grad_norm": 0.178781196475029,
      "learning_rate": 2.7927755777099826e-06,
      "loss": 0.0084,
      "step": 2207580
    },
    {
      "epoch": 3.6127858185555404,
      "grad_norm": 0.37264931201934814,
      "learning_rate": 2.7927096854964653e-06,
      "loss": 0.0101,
      "step": 2207600
    },
    {
      "epoch": 3.6128185489941935,
      "grad_norm": 0.157737135887146,
      "learning_rate": 2.792643793282948e-06,
      "loss": 0.0112,
      "step": 2207620
    },
    {
      "epoch": 3.612851279432847,
      "grad_norm": 0.2977716326713562,
      "learning_rate": 2.792577901069431e-06,
      "loss": 0.0086,
      "step": 2207640
    },
    {
      "epoch": 3.6128840098715003,
      "grad_norm": 0.25833553075790405,
      "learning_rate": 2.792512008855914e-06,
      "loss": 0.0096,
      "step": 2207660
    },
    {
      "epoch": 3.612916740310154,
      "grad_norm": 0.4324483573436737,
      "learning_rate": 2.7924461166423967e-06,
      "loss": 0.0105,
      "step": 2207680
    },
    {
      "epoch": 3.612949470748807,
      "grad_norm": 0.16260147094726562,
      "learning_rate": 2.7923802244288794e-06,
      "loss": 0.013,
      "step": 2207700
    },
    {
      "epoch": 3.61298220118746,
      "grad_norm": 0.3666501045227051,
      "learning_rate": 2.792314332215362e-06,
      "loss": 0.0087,
      "step": 2207720
    },
    {
      "epoch": 3.6130149316261138,
      "grad_norm": 0.14641977846622467,
      "learning_rate": 2.792248440001845e-06,
      "loss": 0.0118,
      "step": 2207740
    },
    {
      "epoch": 3.613047662064767,
      "grad_norm": 0.14298012852668762,
      "learning_rate": 2.792182547788328e-06,
      "loss": 0.0112,
      "step": 2207760
    },
    {
      "epoch": 3.61308039250342,
      "grad_norm": 0.14858126640319824,
      "learning_rate": 2.792116655574811e-06,
      "loss": 0.0076,
      "step": 2207780
    },
    {
      "epoch": 3.6131131229420737,
      "grad_norm": 0.6422856450080872,
      "learning_rate": 2.7920507633612936e-06,
      "loss": 0.0087,
      "step": 2207800
    },
    {
      "epoch": 3.6131458533807272,
      "grad_norm": 0.5302537679672241,
      "learning_rate": 2.7919848711477763e-06,
      "loss": 0.011,
      "step": 2207820
    },
    {
      "epoch": 3.6131785838193804,
      "grad_norm": 0.2626435458660126,
      "learning_rate": 2.7919189789342595e-06,
      "loss": 0.0094,
      "step": 2207840
    },
    {
      "epoch": 3.6132113142580335,
      "grad_norm": 0.18958908319473267,
      "learning_rate": 2.7918530867207422e-06,
      "loss": 0.0137,
      "step": 2207860
    },
    {
      "epoch": 3.613244044696687,
      "grad_norm": 0.2099977433681488,
      "learning_rate": 2.7917871945072254e-06,
      "loss": 0.0082,
      "step": 2207880
    },
    {
      "epoch": 3.6132767751353403,
      "grad_norm": 0.19733113050460815,
      "learning_rate": 2.7917213022937085e-06,
      "loss": 0.0094,
      "step": 2207900
    },
    {
      "epoch": 3.6133095055739934,
      "grad_norm": 0.26186007261276245,
      "learning_rate": 2.7916554100801913e-06,
      "loss": 0.0084,
      "step": 2207920
    },
    {
      "epoch": 3.613342236012647,
      "grad_norm": 0.11375493556261063,
      "learning_rate": 2.791589517866674e-06,
      "loss": 0.0082,
      "step": 2207940
    },
    {
      "epoch": 3.6133749664513006,
      "grad_norm": 0.06362834572792053,
      "learning_rate": 2.7915236256531568e-06,
      "loss": 0.0157,
      "step": 2207960
    },
    {
      "epoch": 3.613407696889954,
      "grad_norm": 0.6165883541107178,
      "learning_rate": 2.79145773343964e-06,
      "loss": 0.0097,
      "step": 2207980
    },
    {
      "epoch": 3.613440427328607,
      "grad_norm": 0.17187783122062683,
      "learning_rate": 2.7913918412261227e-06,
      "loss": 0.0084,
      "step": 2208000
    },
    {
      "epoch": 3.6134731577672605,
      "grad_norm": 0.14069664478302002,
      "learning_rate": 2.7913259490126054e-06,
      "loss": 0.0073,
      "step": 2208020
    },
    {
      "epoch": 3.6135058882059137,
      "grad_norm": 0.38839009404182434,
      "learning_rate": 2.791260056799088e-06,
      "loss": 0.0085,
      "step": 2208040
    },
    {
      "epoch": 3.613538618644567,
      "grad_norm": 0.2583971619606018,
      "learning_rate": 2.7911941645855713e-06,
      "loss": 0.0119,
      "step": 2208060
    },
    {
      "epoch": 3.6135713490832204,
      "grad_norm": 0.21565410494804382,
      "learning_rate": 2.791128272372054e-06,
      "loss": 0.0138,
      "step": 2208080
    },
    {
      "epoch": 3.613604079521874,
      "grad_norm": 0.34206512570381165,
      "learning_rate": 2.7910623801585368e-06,
      "loss": 0.0087,
      "step": 2208100
    },
    {
      "epoch": 3.613636809960527,
      "grad_norm": 0.1544550508260727,
      "learning_rate": 2.7909964879450195e-06,
      "loss": 0.0085,
      "step": 2208120
    },
    {
      "epoch": 3.6136695403991803,
      "grad_norm": 0.1277046948671341,
      "learning_rate": 2.7909305957315027e-06,
      "loss": 0.0136,
      "step": 2208140
    },
    {
      "epoch": 3.613702270837834,
      "grad_norm": 0.18110497295856476,
      "learning_rate": 2.7908647035179854e-06,
      "loss": 0.0113,
      "step": 2208160
    },
    {
      "epoch": 3.613735001276487,
      "grad_norm": 0.30594873428344727,
      "learning_rate": 2.790798811304468e-06,
      "loss": 0.016,
      "step": 2208180
    },
    {
      "epoch": 3.61376773171514,
      "grad_norm": 0.5444282293319702,
      "learning_rate": 2.790732919090951e-06,
      "loss": 0.011,
      "step": 2208200
    },
    {
      "epoch": 3.613800462153794,
      "grad_norm": 0.40311941504478455,
      "learning_rate": 2.7906670268774336e-06,
      "loss": 0.009,
      "step": 2208220
    },
    {
      "epoch": 3.6138331925924474,
      "grad_norm": 0.33575037121772766,
      "learning_rate": 2.7906011346639172e-06,
      "loss": 0.0092,
      "step": 2208240
    },
    {
      "epoch": 3.6138659230311005,
      "grad_norm": 0.20357273519039154,
      "learning_rate": 2.7905352424504e-06,
      "loss": 0.011,
      "step": 2208260
    },
    {
      "epoch": 3.6138986534697537,
      "grad_norm": 0.4233796000480652,
      "learning_rate": 2.7904693502368827e-06,
      "loss": 0.0141,
      "step": 2208280
    },
    {
      "epoch": 3.6139313839084073,
      "grad_norm": 0.4897328317165375,
      "learning_rate": 2.790403458023366e-06,
      "loss": 0.0158,
      "step": 2208300
    },
    {
      "epoch": 3.6139641143470604,
      "grad_norm": 0.0986168161034584,
      "learning_rate": 2.7903375658098486e-06,
      "loss": 0.0116,
      "step": 2208320
    },
    {
      "epoch": 3.6139968447857136,
      "grad_norm": 0.1881650984287262,
      "learning_rate": 2.7902716735963314e-06,
      "loss": 0.0075,
      "step": 2208340
    },
    {
      "epoch": 3.614029575224367,
      "grad_norm": 0.2658936381340027,
      "learning_rate": 2.790205781382814e-06,
      "loss": 0.0097,
      "step": 2208360
    },
    {
      "epoch": 3.6140623056630203,
      "grad_norm": 0.9695838689804077,
      "learning_rate": 2.7901398891692973e-06,
      "loss": 0.015,
      "step": 2208380
    },
    {
      "epoch": 3.614095036101674,
      "grad_norm": 0.2476273626089096,
      "learning_rate": 2.79007399695578e-06,
      "loss": 0.0129,
      "step": 2208400
    },
    {
      "epoch": 3.614127766540327,
      "grad_norm": 0.6723425388336182,
      "learning_rate": 2.7900081047422627e-06,
      "loss": 0.0116,
      "step": 2208420
    },
    {
      "epoch": 3.6141604969789807,
      "grad_norm": 0.3543790578842163,
      "learning_rate": 2.7899422125287455e-06,
      "loss": 0.0079,
      "step": 2208440
    },
    {
      "epoch": 3.614193227417634,
      "grad_norm": 0.13415832817554474,
      "learning_rate": 2.7898763203152286e-06,
      "loss": 0.0115,
      "step": 2208460
    },
    {
      "epoch": 3.614225957856287,
      "grad_norm": 0.2613040804862976,
      "learning_rate": 2.7898104281017114e-06,
      "loss": 0.0104,
      "step": 2208480
    },
    {
      "epoch": 3.6142586882949406,
      "grad_norm": 0.36523959040641785,
      "learning_rate": 2.789744535888194e-06,
      "loss": 0.0113,
      "step": 2208500
    },
    {
      "epoch": 3.6142914187335937,
      "grad_norm": 0.1294512152671814,
      "learning_rate": 2.789678643674677e-06,
      "loss": 0.0126,
      "step": 2208520
    },
    {
      "epoch": 3.6143241491722473,
      "grad_norm": 0.14501367509365082,
      "learning_rate": 2.78961275146116e-06,
      "loss": 0.0061,
      "step": 2208540
    },
    {
      "epoch": 3.6143568796109005,
      "grad_norm": 0.45884519815444946,
      "learning_rate": 2.7895468592476428e-06,
      "loss": 0.0102,
      "step": 2208560
    },
    {
      "epoch": 3.614389610049554,
      "grad_norm": 0.6727337837219238,
      "learning_rate": 2.789480967034126e-06,
      "loss": 0.0074,
      "step": 2208580
    },
    {
      "epoch": 3.614422340488207,
      "grad_norm": 0.08425628393888474,
      "learning_rate": 2.789415074820609e-06,
      "loss": 0.0105,
      "step": 2208600
    },
    {
      "epoch": 3.6144550709268604,
      "grad_norm": 0.2071540504693985,
      "learning_rate": 2.789349182607092e-06,
      "loss": 0.0138,
      "step": 2208620
    },
    {
      "epoch": 3.614487801365514,
      "grad_norm": 0.15574319660663605,
      "learning_rate": 2.7892832903935746e-06,
      "loss": 0.0094,
      "step": 2208640
    },
    {
      "epoch": 3.614520531804167,
      "grad_norm": 0.6327533721923828,
      "learning_rate": 2.7892173981800573e-06,
      "loss": 0.0104,
      "step": 2208660
    },
    {
      "epoch": 3.6145532622428207,
      "grad_norm": 0.21316397190093994,
      "learning_rate": 2.7891515059665405e-06,
      "loss": 0.016,
      "step": 2208680
    },
    {
      "epoch": 3.614585992681474,
      "grad_norm": 0.4983055591583252,
      "learning_rate": 2.7890856137530232e-06,
      "loss": 0.0123,
      "step": 2208700
    },
    {
      "epoch": 3.6146187231201274,
      "grad_norm": 0.14374199509620667,
      "learning_rate": 2.789019721539506e-06,
      "loss": 0.012,
      "step": 2208720
    },
    {
      "epoch": 3.6146514535587806,
      "grad_norm": 0.30990177392959595,
      "learning_rate": 2.7889538293259887e-06,
      "loss": 0.0118,
      "step": 2208740
    },
    {
      "epoch": 3.6146841839974337,
      "grad_norm": 0.3688281774520874,
      "learning_rate": 2.7888879371124714e-06,
      "loss": 0.0114,
      "step": 2208760
    },
    {
      "epoch": 3.6147169144360873,
      "grad_norm": 0.46740877628326416,
      "learning_rate": 2.7888220448989546e-06,
      "loss": 0.0101,
      "step": 2208780
    },
    {
      "epoch": 3.6147496448747405,
      "grad_norm": 0.11171087622642517,
      "learning_rate": 2.7887561526854373e-06,
      "loss": 0.011,
      "step": 2208800
    },
    {
      "epoch": 3.614782375313394,
      "grad_norm": 0.26910680532455444,
      "learning_rate": 2.78869026047192e-06,
      "loss": 0.0069,
      "step": 2208820
    },
    {
      "epoch": 3.6148151057520472,
      "grad_norm": 0.2389928549528122,
      "learning_rate": 2.788624368258403e-06,
      "loss": 0.013,
      "step": 2208840
    },
    {
      "epoch": 3.614847836190701,
      "grad_norm": 0.2943382263183594,
      "learning_rate": 2.788558476044886e-06,
      "loss": 0.0074,
      "step": 2208860
    },
    {
      "epoch": 3.614880566629354,
      "grad_norm": 0.11627715826034546,
      "learning_rate": 2.7884925838313687e-06,
      "loss": 0.0146,
      "step": 2208880
    },
    {
      "epoch": 3.614913297068007,
      "grad_norm": 0.10103956609964371,
      "learning_rate": 2.7884266916178515e-06,
      "loss": 0.0082,
      "step": 2208900
    },
    {
      "epoch": 3.6149460275066607,
      "grad_norm": 0.2440791130065918,
      "learning_rate": 2.788360799404334e-06,
      "loss": 0.0107,
      "step": 2208920
    },
    {
      "epoch": 3.614978757945314,
      "grad_norm": 0.1603577584028244,
      "learning_rate": 2.7882949071908178e-06,
      "loss": 0.0099,
      "step": 2208940
    },
    {
      "epoch": 3.6150114883839675,
      "grad_norm": 0.5258644223213196,
      "learning_rate": 2.7882290149773005e-06,
      "loss": 0.0109,
      "step": 2208960
    },
    {
      "epoch": 3.6150442188226206,
      "grad_norm": 0.4549086391925812,
      "learning_rate": 2.7881631227637833e-06,
      "loss": 0.0111,
      "step": 2208980
    },
    {
      "epoch": 3.615076949261274,
      "grad_norm": 0.38901323080062866,
      "learning_rate": 2.7880972305502664e-06,
      "loss": 0.0073,
      "step": 2209000
    },
    {
      "epoch": 3.6151096796999274,
      "grad_norm": 0.27722498774528503,
      "learning_rate": 2.788031338336749e-06,
      "loss": 0.0091,
      "step": 2209020
    },
    {
      "epoch": 3.6151424101385805,
      "grad_norm": 0.1483403444290161,
      "learning_rate": 2.787965446123232e-06,
      "loss": 0.008,
      "step": 2209040
    },
    {
      "epoch": 3.615175140577234,
      "grad_norm": 0.19245074689388275,
      "learning_rate": 2.7878995539097146e-06,
      "loss": 0.0106,
      "step": 2209060
    },
    {
      "epoch": 3.6152078710158873,
      "grad_norm": 0.5185511708259583,
      "learning_rate": 2.787833661696198e-06,
      "loss": 0.0147,
      "step": 2209080
    },
    {
      "epoch": 3.615240601454541,
      "grad_norm": 0.16457301378250122,
      "learning_rate": 2.7877677694826805e-06,
      "loss": 0.0125,
      "step": 2209100
    },
    {
      "epoch": 3.615273331893194,
      "grad_norm": 0.09358904510736465,
      "learning_rate": 2.7877018772691633e-06,
      "loss": 0.0093,
      "step": 2209120
    },
    {
      "epoch": 3.6153060623318476,
      "grad_norm": 0.4409874677658081,
      "learning_rate": 2.787635985055646e-06,
      "loss": 0.0147,
      "step": 2209140
    },
    {
      "epoch": 3.6153387927705007,
      "grad_norm": 0.163570374250412,
      "learning_rate": 2.787570092842129e-06,
      "loss": 0.0057,
      "step": 2209160
    },
    {
      "epoch": 3.615371523209154,
      "grad_norm": 0.7942025661468506,
      "learning_rate": 2.787504200628612e-06,
      "loss": 0.0108,
      "step": 2209180
    },
    {
      "epoch": 3.6154042536478075,
      "grad_norm": 0.6742255091667175,
      "learning_rate": 2.7874383084150947e-06,
      "loss": 0.0121,
      "step": 2209200
    },
    {
      "epoch": 3.6154369840864606,
      "grad_norm": 0.2630120515823364,
      "learning_rate": 2.7873724162015774e-06,
      "loss": 0.0101,
      "step": 2209220
    },
    {
      "epoch": 3.6154697145251142,
      "grad_norm": 0.07176391035318375,
      "learning_rate": 2.78730652398806e-06,
      "loss": 0.0098,
      "step": 2209240
    },
    {
      "epoch": 3.6155024449637674,
      "grad_norm": 0.27430281043052673,
      "learning_rate": 2.7872406317745433e-06,
      "loss": 0.0105,
      "step": 2209260
    },
    {
      "epoch": 3.615535175402421,
      "grad_norm": 0.44826531410217285,
      "learning_rate": 2.7871747395610265e-06,
      "loss": 0.0126,
      "step": 2209280
    },
    {
      "epoch": 3.615567905841074,
      "grad_norm": 2.834977865219116,
      "learning_rate": 2.7871088473475092e-06,
      "loss": 0.0152,
      "step": 2209300
    },
    {
      "epoch": 3.6156006362797273,
      "grad_norm": 0.12895739078521729,
      "learning_rate": 2.7870429551339924e-06,
      "loss": 0.0112,
      "step": 2209320
    },
    {
      "epoch": 3.615633366718381,
      "grad_norm": 0.11253062635660172,
      "learning_rate": 2.786977062920475e-06,
      "loss": 0.0073,
      "step": 2209340
    },
    {
      "epoch": 3.615666097157034,
      "grad_norm": 0.5671966671943665,
      "learning_rate": 2.786911170706958e-06,
      "loss": 0.0164,
      "step": 2209360
    },
    {
      "epoch": 3.615698827595687,
      "grad_norm": 0.38927242159843445,
      "learning_rate": 2.7868452784934406e-06,
      "loss": 0.0084,
      "step": 2209380
    },
    {
      "epoch": 3.6157315580343408,
      "grad_norm": 0.3071122467517853,
      "learning_rate": 2.7867793862799238e-06,
      "loss": 0.0073,
      "step": 2209400
    },
    {
      "epoch": 3.6157642884729944,
      "grad_norm": 0.32506367564201355,
      "learning_rate": 2.7867134940664065e-06,
      "loss": 0.0147,
      "step": 2209420
    },
    {
      "epoch": 3.6157970189116475,
      "grad_norm": 0.13221468031406403,
      "learning_rate": 2.7866476018528892e-06,
      "loss": 0.0103,
      "step": 2209440
    },
    {
      "epoch": 3.6158297493503007,
      "grad_norm": 0.2427995651960373,
      "learning_rate": 2.786581709639372e-06,
      "loss": 0.0205,
      "step": 2209460
    },
    {
      "epoch": 3.6158624797889543,
      "grad_norm": 0.18375259637832642,
      "learning_rate": 2.786515817425855e-06,
      "loss": 0.0079,
      "step": 2209480
    },
    {
      "epoch": 3.6158952102276074,
      "grad_norm": 0.17755280435085297,
      "learning_rate": 2.786449925212338e-06,
      "loss": 0.0114,
      "step": 2209500
    },
    {
      "epoch": 3.6159279406662606,
      "grad_norm": 0.6317607164382935,
      "learning_rate": 2.7863840329988206e-06,
      "loss": 0.0078,
      "step": 2209520
    },
    {
      "epoch": 3.615960671104914,
      "grad_norm": 0.6950590014457703,
      "learning_rate": 2.7863181407853034e-06,
      "loss": 0.0133,
      "step": 2209540
    },
    {
      "epoch": 3.6159934015435677,
      "grad_norm": 0.18971896171569824,
      "learning_rate": 2.7862522485717865e-06,
      "loss": 0.0081,
      "step": 2209560
    },
    {
      "epoch": 3.616026131982221,
      "grad_norm": 0.46990853548049927,
      "learning_rate": 2.7861863563582693e-06,
      "loss": 0.014,
      "step": 2209580
    },
    {
      "epoch": 3.616058862420874,
      "grad_norm": 0.2964622974395752,
      "learning_rate": 2.786120464144752e-06,
      "loss": 0.0074,
      "step": 2209600
    },
    {
      "epoch": 3.6160915928595276,
      "grad_norm": 0.1711081564426422,
      "learning_rate": 2.7860545719312347e-06,
      "loss": 0.0088,
      "step": 2209620
    },
    {
      "epoch": 3.616124323298181,
      "grad_norm": 0.6541637182235718,
      "learning_rate": 2.7859886797177183e-06,
      "loss": 0.0119,
      "step": 2209640
    },
    {
      "epoch": 3.616157053736834,
      "grad_norm": 0.12490632385015488,
      "learning_rate": 2.785922787504201e-06,
      "loss": 0.0105,
      "step": 2209660
    },
    {
      "epoch": 3.6161897841754875,
      "grad_norm": 0.16217108070850372,
      "learning_rate": 2.785856895290684e-06,
      "loss": 0.0091,
      "step": 2209680
    },
    {
      "epoch": 3.616222514614141,
      "grad_norm": 0.10294093191623688,
      "learning_rate": 2.785791003077167e-06,
      "loss": 0.0136,
      "step": 2209700
    },
    {
      "epoch": 3.6162552450527943,
      "grad_norm": 0.12067220360040665,
      "learning_rate": 2.7857251108636497e-06,
      "loss": 0.0123,
      "step": 2209720
    },
    {
      "epoch": 3.6162879754914474,
      "grad_norm": 0.3385867178440094,
      "learning_rate": 2.7856592186501325e-06,
      "loss": 0.01,
      "step": 2209740
    },
    {
      "epoch": 3.616320705930101,
      "grad_norm": 0.3054381012916565,
      "learning_rate": 2.785593326436615e-06,
      "loss": 0.0113,
      "step": 2209760
    },
    {
      "epoch": 3.616353436368754,
      "grad_norm": 0.11741958558559418,
      "learning_rate": 2.785527434223098e-06,
      "loss": 0.0066,
      "step": 2209780
    },
    {
      "epoch": 3.6163861668074073,
      "grad_norm": 0.2125903069972992,
      "learning_rate": 2.785461542009581e-06,
      "loss": 0.0095,
      "step": 2209800
    },
    {
      "epoch": 3.616418897246061,
      "grad_norm": 0.495114266872406,
      "learning_rate": 2.785395649796064e-06,
      "loss": 0.0115,
      "step": 2209820
    },
    {
      "epoch": 3.616451627684714,
      "grad_norm": 0.3503512144088745,
      "learning_rate": 2.7853297575825466e-06,
      "loss": 0.0115,
      "step": 2209840
    },
    {
      "epoch": 3.6164843581233677,
      "grad_norm": 0.216632679104805,
      "learning_rate": 2.7852638653690293e-06,
      "loss": 0.0089,
      "step": 2209860
    },
    {
      "epoch": 3.616517088562021,
      "grad_norm": 0.22032760083675385,
      "learning_rate": 2.7851979731555125e-06,
      "loss": 0.01,
      "step": 2209880
    },
    {
      "epoch": 3.6165498190006744,
      "grad_norm": 0.49006083607673645,
      "learning_rate": 2.7851320809419952e-06,
      "loss": 0.0117,
      "step": 2209900
    },
    {
      "epoch": 3.6165825494393276,
      "grad_norm": 0.24420595169067383,
      "learning_rate": 2.785066188728478e-06,
      "loss": 0.0127,
      "step": 2209920
    },
    {
      "epoch": 3.6166152798779807,
      "grad_norm": 0.3406563699245453,
      "learning_rate": 2.7850002965149607e-06,
      "loss": 0.0071,
      "step": 2209940
    },
    {
      "epoch": 3.6166480103166343,
      "grad_norm": 0.07044752687215805,
      "learning_rate": 2.784934404301444e-06,
      "loss": 0.0078,
      "step": 2209960
    },
    {
      "epoch": 3.6166807407552874,
      "grad_norm": 0.2944484055042267,
      "learning_rate": 2.7848685120879266e-06,
      "loss": 0.0127,
      "step": 2209980
    },
    {
      "epoch": 3.616713471193941,
      "grad_norm": 0.17635981738567352,
      "learning_rate": 2.7848026198744098e-06,
      "loss": 0.009,
      "step": 2210000
    },
    {
      "epoch": 3.616746201632594,
      "grad_norm": 0.10488548129796982,
      "learning_rate": 2.784736727660893e-06,
      "loss": 0.0124,
      "step": 2210020
    },
    {
      "epoch": 3.616778932071248,
      "grad_norm": 0.10174676775932312,
      "learning_rate": 2.7846708354473757e-06,
      "loss": 0.0109,
      "step": 2210040
    },
    {
      "epoch": 3.616811662509901,
      "grad_norm": 0.41465476155281067,
      "learning_rate": 2.7846049432338584e-06,
      "loss": 0.0129,
      "step": 2210060
    },
    {
      "epoch": 3.616844392948554,
      "grad_norm": 0.21312177181243896,
      "learning_rate": 2.784539051020341e-06,
      "loss": 0.0152,
      "step": 2210080
    },
    {
      "epoch": 3.6168771233872077,
      "grad_norm": 0.12782609462738037,
      "learning_rate": 2.7844731588068243e-06,
      "loss": 0.0114,
      "step": 2210100
    },
    {
      "epoch": 3.616909853825861,
      "grad_norm": 0.23858237266540527,
      "learning_rate": 2.784407266593307e-06,
      "loss": 0.0111,
      "step": 2210120
    },
    {
      "epoch": 3.6169425842645144,
      "grad_norm": 0.1643587201833725,
      "learning_rate": 2.78434137437979e-06,
      "loss": 0.0065,
      "step": 2210140
    },
    {
      "epoch": 3.6169753147031676,
      "grad_norm": 0.13278275728225708,
      "learning_rate": 2.7842754821662725e-06,
      "loss": 0.0076,
      "step": 2210160
    },
    {
      "epoch": 3.617008045141821,
      "grad_norm": 0.31720444560050964,
      "learning_rate": 2.7842095899527553e-06,
      "loss": 0.0143,
      "step": 2210180
    },
    {
      "epoch": 3.6170407755804743,
      "grad_norm": 0.19203400611877441,
      "learning_rate": 2.7841436977392384e-06,
      "loss": 0.0109,
      "step": 2210200
    },
    {
      "epoch": 3.6170735060191275,
      "grad_norm": 0.03484238684177399,
      "learning_rate": 2.784077805525721e-06,
      "loss": 0.0073,
      "step": 2210220
    },
    {
      "epoch": 3.617106236457781,
      "grad_norm": 0.34224793314933777,
      "learning_rate": 2.784011913312204e-06,
      "loss": 0.01,
      "step": 2210240
    },
    {
      "epoch": 3.617138966896434,
      "grad_norm": 0.12594588100910187,
      "learning_rate": 2.7839460210986867e-06,
      "loss": 0.0109,
      "step": 2210260
    },
    {
      "epoch": 3.617171697335088,
      "grad_norm": 0.43821248412132263,
      "learning_rate": 2.78388012888517e-06,
      "loss": 0.0112,
      "step": 2210280
    },
    {
      "epoch": 3.617204427773741,
      "grad_norm": 0.8198643326759338,
      "learning_rate": 2.7838142366716526e-06,
      "loss": 0.0098,
      "step": 2210300
    },
    {
      "epoch": 3.6172371582123946,
      "grad_norm": 0.23263207077980042,
      "learning_rate": 2.7837483444581353e-06,
      "loss": 0.0137,
      "step": 2210320
    },
    {
      "epoch": 3.6172698886510477,
      "grad_norm": 0.39554017782211304,
      "learning_rate": 2.783682452244619e-06,
      "loss": 0.0076,
      "step": 2210340
    },
    {
      "epoch": 3.617302619089701,
      "grad_norm": 0.1323731243610382,
      "learning_rate": 2.7836165600311016e-06,
      "loss": 0.01,
      "step": 2210360
    },
    {
      "epoch": 3.6173353495283544,
      "grad_norm": 0.24724532663822174,
      "learning_rate": 2.7835506678175844e-06,
      "loss": 0.0192,
      "step": 2210380
    },
    {
      "epoch": 3.6173680799670076,
      "grad_norm": 0.40129992365837097,
      "learning_rate": 2.783484775604067e-06,
      "loss": 0.0121,
      "step": 2210400
    },
    {
      "epoch": 3.617400810405661,
      "grad_norm": 0.4738143980503082,
      "learning_rate": 2.7834188833905503e-06,
      "loss": 0.009,
      "step": 2210420
    },
    {
      "epoch": 3.6174335408443143,
      "grad_norm": 0.14169161021709442,
      "learning_rate": 2.783352991177033e-06,
      "loss": 0.0108,
      "step": 2210440
    },
    {
      "epoch": 3.617466271282968,
      "grad_norm": 0.2599877715110779,
      "learning_rate": 2.7832870989635157e-06,
      "loss": 0.0146,
      "step": 2210460
    },
    {
      "epoch": 3.617499001721621,
      "grad_norm": 0.06450988352298737,
      "learning_rate": 2.7832212067499985e-06,
      "loss": 0.0086,
      "step": 2210480
    },
    {
      "epoch": 3.6175317321602742,
      "grad_norm": 0.33355700969696045,
      "learning_rate": 2.7831553145364816e-06,
      "loss": 0.0095,
      "step": 2210500
    },
    {
      "epoch": 3.617564462598928,
      "grad_norm": 0.4202571213245392,
      "learning_rate": 2.7830894223229644e-06,
      "loss": 0.0132,
      "step": 2210520
    },
    {
      "epoch": 3.617597193037581,
      "grad_norm": 0.6262732744216919,
      "learning_rate": 2.783023530109447e-06,
      "loss": 0.016,
      "step": 2210540
    },
    {
      "epoch": 3.6176299234762346,
      "grad_norm": 0.24807767570018768,
      "learning_rate": 2.78295763789593e-06,
      "loss": 0.011,
      "step": 2210560
    },
    {
      "epoch": 3.6176626539148877,
      "grad_norm": 0.21356116235256195,
      "learning_rate": 2.782891745682413e-06,
      "loss": 0.0107,
      "step": 2210580
    },
    {
      "epoch": 3.6176953843535413,
      "grad_norm": 0.17009513080120087,
      "learning_rate": 2.7828258534688958e-06,
      "loss": 0.0132,
      "step": 2210600
    },
    {
      "epoch": 3.6177281147921945,
      "grad_norm": 0.6154309511184692,
      "learning_rate": 2.7827599612553785e-06,
      "loss": 0.0093,
      "step": 2210620
    },
    {
      "epoch": 3.6177608452308476,
      "grad_norm": 0.20311950147151947,
      "learning_rate": 2.7826940690418612e-06,
      "loss": 0.0181,
      "step": 2210640
    },
    {
      "epoch": 3.617793575669501,
      "grad_norm": 0.5909527540206909,
      "learning_rate": 2.782628176828344e-06,
      "loss": 0.0091,
      "step": 2210660
    },
    {
      "epoch": 3.6178263061081544,
      "grad_norm": 0.31557613611221313,
      "learning_rate": 2.782562284614827e-06,
      "loss": 0.0115,
      "step": 2210680
    },
    {
      "epoch": 3.617859036546808,
      "grad_norm": 0.07265572994947433,
      "learning_rate": 2.7824963924013103e-06,
      "loss": 0.0082,
      "step": 2210700
    },
    {
      "epoch": 3.617891766985461,
      "grad_norm": 0.29596638679504395,
      "learning_rate": 2.782430500187793e-06,
      "loss": 0.0066,
      "step": 2210720
    },
    {
      "epoch": 3.6179244974241147,
      "grad_norm": 0.09172648936510086,
      "learning_rate": 2.7823646079742762e-06,
      "loss": 0.0081,
      "step": 2210740
    },
    {
      "epoch": 3.617957227862768,
      "grad_norm": 0.09771484136581421,
      "learning_rate": 2.782298715760759e-06,
      "loss": 0.011,
      "step": 2210760
    },
    {
      "epoch": 3.617989958301421,
      "grad_norm": 0.23233507573604584,
      "learning_rate": 2.7822328235472417e-06,
      "loss": 0.0085,
      "step": 2210780
    },
    {
      "epoch": 3.6180226887400746,
      "grad_norm": 0.1113627552986145,
      "learning_rate": 2.7821669313337244e-06,
      "loss": 0.0134,
      "step": 2210800
    },
    {
      "epoch": 3.6180554191787277,
      "grad_norm": 0.680995762348175,
      "learning_rate": 2.7821010391202076e-06,
      "loss": 0.0161,
      "step": 2210820
    },
    {
      "epoch": 3.618088149617381,
      "grad_norm": 0.3234294056892395,
      "learning_rate": 2.7820351469066903e-06,
      "loss": 0.0118,
      "step": 2210840
    },
    {
      "epoch": 3.6181208800560345,
      "grad_norm": 0.5982264280319214,
      "learning_rate": 2.781969254693173e-06,
      "loss": 0.0151,
      "step": 2210860
    },
    {
      "epoch": 3.618153610494688,
      "grad_norm": 0.17455574870109558,
      "learning_rate": 2.781903362479656e-06,
      "loss": 0.0111,
      "step": 2210880
    },
    {
      "epoch": 3.6181863409333412,
      "grad_norm": 0.16455449163913727,
      "learning_rate": 2.781837470266139e-06,
      "loss": 0.0154,
      "step": 2210900
    },
    {
      "epoch": 3.6182190713719944,
      "grad_norm": 0.3825171887874603,
      "learning_rate": 2.7817715780526217e-06,
      "loss": 0.0121,
      "step": 2210920
    },
    {
      "epoch": 3.618251801810648,
      "grad_norm": 0.5607823729515076,
      "learning_rate": 2.7817056858391045e-06,
      "loss": 0.009,
      "step": 2210940
    },
    {
      "epoch": 3.618284532249301,
      "grad_norm": 0.45454245805740356,
      "learning_rate": 2.781639793625587e-06,
      "loss": 0.0097,
      "step": 2210960
    },
    {
      "epoch": 3.6183172626879543,
      "grad_norm": 0.6948135495185852,
      "learning_rate": 2.7815739014120704e-06,
      "loss": 0.0118,
      "step": 2210980
    },
    {
      "epoch": 3.618349993126608,
      "grad_norm": 0.21132579445838928,
      "learning_rate": 2.781508009198553e-06,
      "loss": 0.0094,
      "step": 2211000
    },
    {
      "epoch": 3.6183827235652615,
      "grad_norm": 0.1724851131439209,
      "learning_rate": 2.781442116985036e-06,
      "loss": 0.0088,
      "step": 2211020
    },
    {
      "epoch": 3.6184154540039146,
      "grad_norm": 0.5592932105064392,
      "learning_rate": 2.7813762247715194e-06,
      "loss": 0.0107,
      "step": 2211040
    },
    {
      "epoch": 3.6184481844425678,
      "grad_norm": 0.08475837111473083,
      "learning_rate": 2.781310332558002e-06,
      "loss": 0.0123,
      "step": 2211060
    },
    {
      "epoch": 3.6184809148812214,
      "grad_norm": 0.14430062472820282,
      "learning_rate": 2.781244440344485e-06,
      "loss": 0.0096,
      "step": 2211080
    },
    {
      "epoch": 3.6185136453198745,
      "grad_norm": 0.22770504653453827,
      "learning_rate": 2.7811785481309677e-06,
      "loss": 0.0098,
      "step": 2211100
    },
    {
      "epoch": 3.6185463757585277,
      "grad_norm": 0.6057270169258118,
      "learning_rate": 2.781112655917451e-06,
      "loss": 0.0113,
      "step": 2211120
    },
    {
      "epoch": 3.6185791061971813,
      "grad_norm": 0.14173927903175354,
      "learning_rate": 2.7810467637039336e-06,
      "loss": 0.0099,
      "step": 2211140
    },
    {
      "epoch": 3.618611836635835,
      "grad_norm": 0.2412402629852295,
      "learning_rate": 2.7809808714904163e-06,
      "loss": 0.0117,
      "step": 2211160
    },
    {
      "epoch": 3.618644567074488,
      "grad_norm": 0.16297249495983124,
      "learning_rate": 2.780914979276899e-06,
      "loss": 0.0145,
      "step": 2211180
    },
    {
      "epoch": 3.618677297513141,
      "grad_norm": 0.12942349910736084,
      "learning_rate": 2.7808490870633818e-06,
      "loss": 0.0075,
      "step": 2211200
    },
    {
      "epoch": 3.6187100279517947,
      "grad_norm": 0.620834231376648,
      "learning_rate": 2.780783194849865e-06,
      "loss": 0.0085,
      "step": 2211220
    },
    {
      "epoch": 3.618742758390448,
      "grad_norm": 0.30559033155441284,
      "learning_rate": 2.7807173026363477e-06,
      "loss": 0.0123,
      "step": 2211240
    },
    {
      "epoch": 3.618775488829101,
      "grad_norm": 0.20005911588668823,
      "learning_rate": 2.7806514104228304e-06,
      "loss": 0.0074,
      "step": 2211260
    },
    {
      "epoch": 3.6188082192677546,
      "grad_norm": 0.15333200991153717,
      "learning_rate": 2.780585518209313e-06,
      "loss": 0.0092,
      "step": 2211280
    },
    {
      "epoch": 3.6188409497064082,
      "grad_norm": 0.7350553870201111,
      "learning_rate": 2.7805196259957963e-06,
      "loss": 0.0138,
      "step": 2211300
    },
    {
      "epoch": 3.6188736801450614,
      "grad_norm": 0.1986590027809143,
      "learning_rate": 2.780453733782279e-06,
      "loss": 0.0128,
      "step": 2211320
    },
    {
      "epoch": 3.6189064105837145,
      "grad_norm": 0.11424358934164047,
      "learning_rate": 2.780387841568762e-06,
      "loss": 0.0071,
      "step": 2211340
    },
    {
      "epoch": 3.618939141022368,
      "grad_norm": 0.2063201367855072,
      "learning_rate": 2.7803219493552445e-06,
      "loss": 0.0114,
      "step": 2211360
    },
    {
      "epoch": 3.6189718714610213,
      "grad_norm": 0.1762056201696396,
      "learning_rate": 2.7802560571417277e-06,
      "loss": 0.0098,
      "step": 2211380
    },
    {
      "epoch": 3.6190046018996744,
      "grad_norm": 0.18527664244174957,
      "learning_rate": 2.780190164928211e-06,
      "loss": 0.011,
      "step": 2211400
    },
    {
      "epoch": 3.619037332338328,
      "grad_norm": 0.6055440902709961,
      "learning_rate": 2.7801242727146936e-06,
      "loss": 0.0132,
      "step": 2211420
    },
    {
      "epoch": 3.619070062776981,
      "grad_norm": 0.31150320172309875,
      "learning_rate": 2.7800583805011768e-06,
      "loss": 0.011,
      "step": 2211440
    },
    {
      "epoch": 3.6191027932156348,
      "grad_norm": 0.7406283617019653,
      "learning_rate": 2.7799924882876595e-06,
      "loss": 0.0159,
      "step": 2211460
    },
    {
      "epoch": 3.619135523654288,
      "grad_norm": 0.4560202658176422,
      "learning_rate": 2.7799265960741422e-06,
      "loss": 0.0138,
      "step": 2211480
    },
    {
      "epoch": 3.6191682540929415,
      "grad_norm": 0.226133331656456,
      "learning_rate": 2.779860703860625e-06,
      "loss": 0.0099,
      "step": 2211500
    },
    {
      "epoch": 3.6192009845315947,
      "grad_norm": 0.37521785497665405,
      "learning_rate": 2.779794811647108e-06,
      "loss": 0.0099,
      "step": 2211520
    },
    {
      "epoch": 3.619233714970248,
      "grad_norm": 0.2170720249414444,
      "learning_rate": 2.779728919433591e-06,
      "loss": 0.0085,
      "step": 2211540
    },
    {
      "epoch": 3.6192664454089014,
      "grad_norm": 0.24802912771701813,
      "learning_rate": 2.7796630272200736e-06,
      "loss": 0.0158,
      "step": 2211560
    },
    {
      "epoch": 3.6192991758475546,
      "grad_norm": 0.5432752370834351,
      "learning_rate": 2.7795971350065564e-06,
      "loss": 0.009,
      "step": 2211580
    },
    {
      "epoch": 3.619331906286208,
      "grad_norm": 0.15910595655441284,
      "learning_rate": 2.779531242793039e-06,
      "loss": 0.0111,
      "step": 2211600
    },
    {
      "epoch": 3.6193646367248613,
      "grad_norm": 0.1196875050663948,
      "learning_rate": 2.7794653505795223e-06,
      "loss": 0.01,
      "step": 2211620
    },
    {
      "epoch": 3.619397367163515,
      "grad_norm": 0.06557515263557434,
      "learning_rate": 2.779399458366005e-06,
      "loss": 0.0114,
      "step": 2211640
    },
    {
      "epoch": 3.619430097602168,
      "grad_norm": 0.421852707862854,
      "learning_rate": 2.7793335661524878e-06,
      "loss": 0.0116,
      "step": 2211660
    },
    {
      "epoch": 3.619462828040821,
      "grad_norm": 0.4090503752231598,
      "learning_rate": 2.7792676739389705e-06,
      "loss": 0.0088,
      "step": 2211680
    },
    {
      "epoch": 3.619495558479475,
      "grad_norm": 0.34821414947509766,
      "learning_rate": 2.7792017817254537e-06,
      "loss": 0.0146,
      "step": 2211700
    },
    {
      "epoch": 3.619528288918128,
      "grad_norm": 0.269070029258728,
      "learning_rate": 2.7791358895119364e-06,
      "loss": 0.0099,
      "step": 2211720
    },
    {
      "epoch": 3.6195610193567815,
      "grad_norm": 0.19768379628658295,
      "learning_rate": 2.779069997298419e-06,
      "loss": 0.0133,
      "step": 2211740
    },
    {
      "epoch": 3.6195937497954347,
      "grad_norm": 0.07833247631788254,
      "learning_rate": 2.7790041050849027e-06,
      "loss": 0.0099,
      "step": 2211760
    },
    {
      "epoch": 3.6196264802340883,
      "grad_norm": 0.2300664633512497,
      "learning_rate": 2.7789382128713855e-06,
      "loss": 0.0116,
      "step": 2211780
    },
    {
      "epoch": 3.6196592106727414,
      "grad_norm": 0.27614107728004456,
      "learning_rate": 2.778872320657868e-06,
      "loss": 0.0117,
      "step": 2211800
    },
    {
      "epoch": 3.6196919411113946,
      "grad_norm": 0.20131853222846985,
      "learning_rate": 2.778806428444351e-06,
      "loss": 0.009,
      "step": 2211820
    },
    {
      "epoch": 3.619724671550048,
      "grad_norm": 1.0253187417984009,
      "learning_rate": 2.778740536230834e-06,
      "loss": 0.0142,
      "step": 2211840
    },
    {
      "epoch": 3.6197574019887013,
      "grad_norm": 0.3386174142360687,
      "learning_rate": 2.778674644017317e-06,
      "loss": 0.0109,
      "step": 2211860
    },
    {
      "epoch": 3.619790132427355,
      "grad_norm": 0.5066040754318237,
      "learning_rate": 2.7786087518037996e-06,
      "loss": 0.0164,
      "step": 2211880
    },
    {
      "epoch": 3.619822862866008,
      "grad_norm": 0.5475256443023682,
      "learning_rate": 2.7785428595902823e-06,
      "loss": 0.0096,
      "step": 2211900
    },
    {
      "epoch": 3.6198555933046617,
      "grad_norm": 0.31639108061790466,
      "learning_rate": 2.7784769673767655e-06,
      "loss": 0.0066,
      "step": 2211920
    },
    {
      "epoch": 3.619888323743315,
      "grad_norm": 0.3232043981552124,
      "learning_rate": 2.7784110751632482e-06,
      "loss": 0.0099,
      "step": 2211940
    },
    {
      "epoch": 3.619921054181968,
      "grad_norm": 0.20468220114707947,
      "learning_rate": 2.778345182949731e-06,
      "loss": 0.0108,
      "step": 2211960
    },
    {
      "epoch": 3.6199537846206216,
      "grad_norm": 0.14241184294223785,
      "learning_rate": 2.7782792907362137e-06,
      "loss": 0.0108,
      "step": 2211980
    },
    {
      "epoch": 3.6199865150592747,
      "grad_norm": 0.220442533493042,
      "learning_rate": 2.778213398522697e-06,
      "loss": 0.0074,
      "step": 2212000
    },
    {
      "epoch": 3.6200192454979283,
      "grad_norm": 0.14664946496486664,
      "learning_rate": 2.7781475063091796e-06,
      "loss": 0.0104,
      "step": 2212020
    },
    {
      "epoch": 3.6200519759365815,
      "grad_norm": 0.28935977816581726,
      "learning_rate": 2.7780816140956623e-06,
      "loss": 0.0128,
      "step": 2212040
    },
    {
      "epoch": 3.620084706375235,
      "grad_norm": 0.13911345601081848,
      "learning_rate": 2.778015721882145e-06,
      "loss": 0.0144,
      "step": 2212060
    },
    {
      "epoch": 3.620117436813888,
      "grad_norm": 0.38202059268951416,
      "learning_rate": 2.777949829668628e-06,
      "loss": 0.0097,
      "step": 2212080
    },
    {
      "epoch": 3.6201501672525414,
      "grad_norm": 0.15511612594127655,
      "learning_rate": 2.7778839374551114e-06,
      "loss": 0.0088,
      "step": 2212100
    },
    {
      "epoch": 3.620182897691195,
      "grad_norm": 0.35911333560943604,
      "learning_rate": 2.777818045241594e-06,
      "loss": 0.0127,
      "step": 2212120
    },
    {
      "epoch": 3.620215628129848,
      "grad_norm": 0.07013072818517685,
      "learning_rate": 2.777752153028077e-06,
      "loss": 0.0114,
      "step": 2212140
    },
    {
      "epoch": 3.6202483585685017,
      "grad_norm": 0.3865389823913574,
      "learning_rate": 2.77768626081456e-06,
      "loss": 0.0122,
      "step": 2212160
    },
    {
      "epoch": 3.620281089007155,
      "grad_norm": 0.5411056280136108,
      "learning_rate": 2.777620368601043e-06,
      "loss": 0.0109,
      "step": 2212180
    },
    {
      "epoch": 3.6203138194458084,
      "grad_norm": 0.21134433150291443,
      "learning_rate": 2.7775544763875255e-06,
      "loss": 0.0145,
      "step": 2212200
    },
    {
      "epoch": 3.6203465498844616,
      "grad_norm": 0.10902555286884308,
      "learning_rate": 2.7774885841740083e-06,
      "loss": 0.0072,
      "step": 2212220
    },
    {
      "epoch": 3.6203792803231147,
      "grad_norm": 0.41907191276550293,
      "learning_rate": 2.7774226919604914e-06,
      "loss": 0.0089,
      "step": 2212240
    },
    {
      "epoch": 3.6204120107617683,
      "grad_norm": 0.11119778454303741,
      "learning_rate": 2.777356799746974e-06,
      "loss": 0.0086,
      "step": 2212260
    },
    {
      "epoch": 3.6204447412004215,
      "grad_norm": 0.43739402294158936,
      "learning_rate": 2.777290907533457e-06,
      "loss": 0.0106,
      "step": 2212280
    },
    {
      "epoch": 3.620477471639075,
      "grad_norm": 0.7127438187599182,
      "learning_rate": 2.7772250153199397e-06,
      "loss": 0.0106,
      "step": 2212300
    },
    {
      "epoch": 3.6205102020777282,
      "grad_norm": 0.5733959674835205,
      "learning_rate": 2.777159123106423e-06,
      "loss": 0.0147,
      "step": 2212320
    },
    {
      "epoch": 3.620542932516382,
      "grad_norm": 0.3053026795387268,
      "learning_rate": 2.7770932308929056e-06,
      "loss": 0.0115,
      "step": 2212340
    },
    {
      "epoch": 3.620575662955035,
      "grad_norm": 0.2211391031742096,
      "learning_rate": 2.7770273386793883e-06,
      "loss": 0.011,
      "step": 2212360
    },
    {
      "epoch": 3.620608393393688,
      "grad_norm": 0.503478467464447,
      "learning_rate": 2.776961446465871e-06,
      "loss": 0.0105,
      "step": 2212380
    },
    {
      "epoch": 3.6206411238323417,
      "grad_norm": 0.20236103236675262,
      "learning_rate": 2.776895554252354e-06,
      "loss": 0.0075,
      "step": 2212400
    },
    {
      "epoch": 3.620673854270995,
      "grad_norm": 0.1403890699148178,
      "learning_rate": 2.776829662038837e-06,
      "loss": 0.0098,
      "step": 2212420
    },
    {
      "epoch": 3.620706584709648,
      "grad_norm": 0.33325275778770447,
      "learning_rate": 2.7767637698253197e-06,
      "loss": 0.0077,
      "step": 2212440
    },
    {
      "epoch": 3.6207393151483016,
      "grad_norm": 0.8245693445205688,
      "learning_rate": 2.7766978776118033e-06,
      "loss": 0.0083,
      "step": 2212460
    },
    {
      "epoch": 3.620772045586955,
      "grad_norm": 0.24407072365283966,
      "learning_rate": 2.776631985398286e-06,
      "loss": 0.0098,
      "step": 2212480
    },
    {
      "epoch": 3.6208047760256084,
      "grad_norm": 1.03790283203125,
      "learning_rate": 2.7765660931847688e-06,
      "loss": 0.0149,
      "step": 2212500
    },
    {
      "epoch": 3.6208375064642615,
      "grad_norm": 0.2394825965166092,
      "learning_rate": 2.7765002009712515e-06,
      "loss": 0.0144,
      "step": 2212520
    },
    {
      "epoch": 3.620870236902915,
      "grad_norm": 0.23098887503147125,
      "learning_rate": 2.7764343087577347e-06,
      "loss": 0.0139,
      "step": 2212540
    },
    {
      "epoch": 3.6209029673415682,
      "grad_norm": 0.12752240896224976,
      "learning_rate": 2.7763684165442174e-06,
      "loss": 0.009,
      "step": 2212560
    },
    {
      "epoch": 3.6209356977802214,
      "grad_norm": 0.5791919231414795,
      "learning_rate": 2.7763025243307e-06,
      "loss": 0.0158,
      "step": 2212580
    },
    {
      "epoch": 3.620968428218875,
      "grad_norm": 0.16284695267677307,
      "learning_rate": 2.776236632117183e-06,
      "loss": 0.0081,
      "step": 2212600
    },
    {
      "epoch": 3.6210011586575286,
      "grad_norm": 0.16676752269268036,
      "learning_rate": 2.7761707399036656e-06,
      "loss": 0.0113,
      "step": 2212620
    },
    {
      "epoch": 3.6210338890961817,
      "grad_norm": 0.04553350806236267,
      "learning_rate": 2.7761048476901488e-06,
      "loss": 0.0061,
      "step": 2212640
    },
    {
      "epoch": 3.621066619534835,
      "grad_norm": 0.24473857879638672,
      "learning_rate": 2.7760389554766315e-06,
      "loss": 0.0094,
      "step": 2212660
    },
    {
      "epoch": 3.6210993499734885,
      "grad_norm": 0.11931740492582321,
      "learning_rate": 2.7759730632631143e-06,
      "loss": 0.0089,
      "step": 2212680
    },
    {
      "epoch": 3.6211320804121416,
      "grad_norm": 0.40974733233451843,
      "learning_rate": 2.775907171049597e-06,
      "loss": 0.014,
      "step": 2212700
    },
    {
      "epoch": 3.621164810850795,
      "grad_norm": 0.3318110406398773,
      "learning_rate": 2.77584127883608e-06,
      "loss": 0.0111,
      "step": 2212720
    },
    {
      "epoch": 3.6211975412894484,
      "grad_norm": 0.4247836172580719,
      "learning_rate": 2.775775386622563e-06,
      "loss": 0.0109,
      "step": 2212740
    },
    {
      "epoch": 3.621230271728102,
      "grad_norm": 0.48948773741722107,
      "learning_rate": 2.7757094944090456e-06,
      "loss": 0.0114,
      "step": 2212760
    },
    {
      "epoch": 3.621263002166755,
      "grad_norm": 0.23193150758743286,
      "learning_rate": 2.7756436021955284e-06,
      "loss": 0.0175,
      "step": 2212780
    },
    {
      "epoch": 3.6212957326054083,
      "grad_norm": 0.12251803278923035,
      "learning_rate": 2.775577709982012e-06,
      "loss": 0.0198,
      "step": 2212800
    },
    {
      "epoch": 3.621328463044062,
      "grad_norm": 0.06913616508245468,
      "learning_rate": 2.7755118177684947e-06,
      "loss": 0.0116,
      "step": 2212820
    },
    {
      "epoch": 3.621361193482715,
      "grad_norm": 0.019292814657092094,
      "learning_rate": 2.7754459255549774e-06,
      "loss": 0.003,
      "step": 2212840
    },
    {
      "epoch": 3.621393923921368,
      "grad_norm": 0.05984620004892349,
      "learning_rate": 2.7753800333414606e-06,
      "loss": 0.0156,
      "step": 2212860
    },
    {
      "epoch": 3.6214266543600218,
      "grad_norm": 0.09359553456306458,
      "learning_rate": 2.7753141411279433e-06,
      "loss": 0.0117,
      "step": 2212880
    },
    {
      "epoch": 3.621459384798675,
      "grad_norm": 0.17178869247436523,
      "learning_rate": 2.775248248914426e-06,
      "loss": 0.0102,
      "step": 2212900
    },
    {
      "epoch": 3.6214921152373285,
      "grad_norm": 0.2969917953014374,
      "learning_rate": 2.775182356700909e-06,
      "loss": 0.0172,
      "step": 2212920
    },
    {
      "epoch": 3.6215248456759817,
      "grad_norm": 0.21207748353481293,
      "learning_rate": 2.775116464487392e-06,
      "loss": 0.0074,
      "step": 2212940
    },
    {
      "epoch": 3.6215575761146352,
      "grad_norm": 0.2560874819755554,
      "learning_rate": 2.7750505722738747e-06,
      "loss": 0.0098,
      "step": 2212960
    },
    {
      "epoch": 3.6215903065532884,
      "grad_norm": 0.549193263053894,
      "learning_rate": 2.7749846800603575e-06,
      "loss": 0.0181,
      "step": 2212980
    },
    {
      "epoch": 3.6216230369919415,
      "grad_norm": 0.17439472675323486,
      "learning_rate": 2.77491878784684e-06,
      "loss": 0.0098,
      "step": 2213000
    },
    {
      "epoch": 3.621655767430595,
      "grad_norm": 0.5914508700370789,
      "learning_rate": 2.7748528956333234e-06,
      "loss": 0.012,
      "step": 2213020
    },
    {
      "epoch": 3.6216884978692483,
      "grad_norm": 0.35356205701828003,
      "learning_rate": 2.774787003419806e-06,
      "loss": 0.0074,
      "step": 2213040
    },
    {
      "epoch": 3.621721228307902,
      "grad_norm": 0.3133993148803711,
      "learning_rate": 2.774721111206289e-06,
      "loss": 0.0099,
      "step": 2213060
    },
    {
      "epoch": 3.621753958746555,
      "grad_norm": 0.10425218939781189,
      "learning_rate": 2.7746552189927716e-06,
      "loss": 0.0101,
      "step": 2213080
    },
    {
      "epoch": 3.6217866891852086,
      "grad_norm": 0.08116461336612701,
      "learning_rate": 2.7745893267792543e-06,
      "loss": 0.0139,
      "step": 2213100
    },
    {
      "epoch": 3.621819419623862,
      "grad_norm": 0.19962464272975922,
      "learning_rate": 2.7745234345657375e-06,
      "loss": 0.0123,
      "step": 2213120
    },
    {
      "epoch": 3.621852150062515,
      "grad_norm": 0.11019303649663925,
      "learning_rate": 2.7744575423522202e-06,
      "loss": 0.0126,
      "step": 2213140
    },
    {
      "epoch": 3.6218848805011685,
      "grad_norm": 0.10928945243358612,
      "learning_rate": 2.7743916501387034e-06,
      "loss": 0.0096,
      "step": 2213160
    },
    {
      "epoch": 3.6219176109398217,
      "grad_norm": 0.15639980137348175,
      "learning_rate": 2.7743257579251866e-06,
      "loss": 0.0114,
      "step": 2213180
    },
    {
      "epoch": 3.6219503413784753,
      "grad_norm": 0.15742768347263336,
      "learning_rate": 2.7742598657116693e-06,
      "loss": 0.0077,
      "step": 2213200
    },
    {
      "epoch": 3.6219830718171284,
      "grad_norm": 0.22168458998203278,
      "learning_rate": 2.774193973498152e-06,
      "loss": 0.0113,
      "step": 2213220
    },
    {
      "epoch": 3.622015802255782,
      "grad_norm": 0.957262396812439,
      "learning_rate": 2.7741280812846348e-06,
      "loss": 0.0129,
      "step": 2213240
    },
    {
      "epoch": 3.622048532694435,
      "grad_norm": 0.28653696179389954,
      "learning_rate": 2.774062189071118e-06,
      "loss": 0.0141,
      "step": 2213260
    },
    {
      "epoch": 3.6220812631330883,
      "grad_norm": 0.631905734539032,
      "learning_rate": 2.7739962968576007e-06,
      "loss": 0.0113,
      "step": 2213280
    },
    {
      "epoch": 3.622113993571742,
      "grad_norm": 0.35270681977272034,
      "learning_rate": 2.7739304046440834e-06,
      "loss": 0.0118,
      "step": 2213300
    },
    {
      "epoch": 3.622146724010395,
      "grad_norm": 0.13810913264751434,
      "learning_rate": 2.773864512430566e-06,
      "loss": 0.011,
      "step": 2213320
    },
    {
      "epoch": 3.6221794544490487,
      "grad_norm": 0.2606782615184784,
      "learning_rate": 2.7737986202170493e-06,
      "loss": 0.0126,
      "step": 2213340
    },
    {
      "epoch": 3.622212184887702,
      "grad_norm": 0.10585527122020721,
      "learning_rate": 2.773732728003532e-06,
      "loss": 0.0116,
      "step": 2213360
    },
    {
      "epoch": 3.6222449153263554,
      "grad_norm": 0.6811834573745728,
      "learning_rate": 2.773666835790015e-06,
      "loss": 0.016,
      "step": 2213380
    },
    {
      "epoch": 3.6222776457650085,
      "grad_norm": 0.637674868106842,
      "learning_rate": 2.7736009435764975e-06,
      "loss": 0.0162,
      "step": 2213400
    },
    {
      "epoch": 3.6223103762036617,
      "grad_norm": 0.9553174376487732,
      "learning_rate": 2.7735350513629807e-06,
      "loss": 0.0086,
      "step": 2213420
    },
    {
      "epoch": 3.6223431066423153,
      "grad_norm": 0.39797723293304443,
      "learning_rate": 2.7734691591494634e-06,
      "loss": 0.0133,
      "step": 2213440
    },
    {
      "epoch": 3.6223758370809684,
      "grad_norm": 0.24986721575260162,
      "learning_rate": 2.773403266935946e-06,
      "loss": 0.0075,
      "step": 2213460
    },
    {
      "epoch": 3.622408567519622,
      "grad_norm": 0.26592594385147095,
      "learning_rate": 2.773337374722429e-06,
      "loss": 0.0105,
      "step": 2213480
    },
    {
      "epoch": 3.622441297958275,
      "grad_norm": 0.3623335659503937,
      "learning_rate": 2.7732714825089117e-06,
      "loss": 0.0151,
      "step": 2213500
    },
    {
      "epoch": 3.622474028396929,
      "grad_norm": 0.1860111951828003,
      "learning_rate": 2.7732055902953953e-06,
      "loss": 0.0105,
      "step": 2213520
    },
    {
      "epoch": 3.622506758835582,
      "grad_norm": 0.7312430143356323,
      "learning_rate": 2.773139698081878e-06,
      "loss": 0.0099,
      "step": 2213540
    },
    {
      "epoch": 3.622539489274235,
      "grad_norm": 0.12924447655677795,
      "learning_rate": 2.7730738058683607e-06,
      "loss": 0.0152,
      "step": 2213560
    },
    {
      "epoch": 3.6225722197128887,
      "grad_norm": 0.35961392521858215,
      "learning_rate": 2.773007913654844e-06,
      "loss": 0.013,
      "step": 2213580
    },
    {
      "epoch": 3.622604950151542,
      "grad_norm": 0.11197658628225327,
      "learning_rate": 2.7729420214413266e-06,
      "loss": 0.0088,
      "step": 2213600
    },
    {
      "epoch": 3.6226376805901954,
      "grad_norm": 0.47471556067466736,
      "learning_rate": 2.7728761292278094e-06,
      "loss": 0.0196,
      "step": 2213620
    },
    {
      "epoch": 3.6226704110288486,
      "grad_norm": 0.15025880932807922,
      "learning_rate": 2.772810237014292e-06,
      "loss": 0.0098,
      "step": 2213640
    },
    {
      "epoch": 3.622703141467502,
      "grad_norm": 0.2215370088815689,
      "learning_rate": 2.7727443448007753e-06,
      "loss": 0.0099,
      "step": 2213660
    },
    {
      "epoch": 3.6227358719061553,
      "grad_norm": 0.8610138893127441,
      "learning_rate": 2.772678452587258e-06,
      "loss": 0.0136,
      "step": 2213680
    },
    {
      "epoch": 3.6227686023448085,
      "grad_norm": 0.2776333689689636,
      "learning_rate": 2.7726125603737408e-06,
      "loss": 0.0085,
      "step": 2213700
    },
    {
      "epoch": 3.622801332783462,
      "grad_norm": 0.18861918151378632,
      "learning_rate": 2.7725466681602235e-06,
      "loss": 0.0105,
      "step": 2213720
    },
    {
      "epoch": 3.622834063222115,
      "grad_norm": 0.25024816393852234,
      "learning_rate": 2.7724807759467067e-06,
      "loss": 0.0088,
      "step": 2213740
    },
    {
      "epoch": 3.622866793660769,
      "grad_norm": 0.1149810254573822,
      "learning_rate": 2.7724148837331894e-06,
      "loss": 0.0091,
      "step": 2213760
    },
    {
      "epoch": 3.622899524099422,
      "grad_norm": 0.20241467654705048,
      "learning_rate": 2.772348991519672e-06,
      "loss": 0.0093,
      "step": 2213780
    },
    {
      "epoch": 3.6229322545380755,
      "grad_norm": 0.13821837306022644,
      "learning_rate": 2.772283099306155e-06,
      "loss": 0.0141,
      "step": 2213800
    },
    {
      "epoch": 3.6229649849767287,
      "grad_norm": 0.09015874564647675,
      "learning_rate": 2.772217207092638e-06,
      "loss": 0.0105,
      "step": 2213820
    },
    {
      "epoch": 3.622997715415382,
      "grad_norm": 0.2294473499059677,
      "learning_rate": 2.7721513148791208e-06,
      "loss": 0.009,
      "step": 2213840
    },
    {
      "epoch": 3.6230304458540354,
      "grad_norm": 0.25569042563438416,
      "learning_rate": 2.772085422665604e-06,
      "loss": 0.0115,
      "step": 2213860
    },
    {
      "epoch": 3.6230631762926886,
      "grad_norm": 0.0559840127825737,
      "learning_rate": 2.772019530452087e-06,
      "loss": 0.0096,
      "step": 2213880
    },
    {
      "epoch": 3.6230959067313417,
      "grad_norm": 0.3973133862018585,
      "learning_rate": 2.77195363823857e-06,
      "loss": 0.0095,
      "step": 2213900
    },
    {
      "epoch": 3.6231286371699953,
      "grad_norm": 0.1261698454618454,
      "learning_rate": 2.7718877460250526e-06,
      "loss": 0.0124,
      "step": 2213920
    },
    {
      "epoch": 3.623161367608649,
      "grad_norm": 0.33339717984199524,
      "learning_rate": 2.7718218538115353e-06,
      "loss": 0.0075,
      "step": 2213940
    },
    {
      "epoch": 3.623194098047302,
      "grad_norm": 0.24910421669483185,
      "learning_rate": 2.7717559615980185e-06,
      "loss": 0.0105,
      "step": 2213960
    },
    {
      "epoch": 3.6232268284859552,
      "grad_norm": 0.3309970200061798,
      "learning_rate": 2.7716900693845012e-06,
      "loss": 0.0122,
      "step": 2213980
    },
    {
      "epoch": 3.623259558924609,
      "grad_norm": 0.27641239762306213,
      "learning_rate": 2.771624177170984e-06,
      "loss": 0.0123,
      "step": 2214000
    },
    {
      "epoch": 3.623292289363262,
      "grad_norm": 0.06717413663864136,
      "learning_rate": 2.7715582849574667e-06,
      "loss": 0.0076,
      "step": 2214020
    },
    {
      "epoch": 3.623325019801915,
      "grad_norm": 0.48178330063819885,
      "learning_rate": 2.7714923927439495e-06,
      "loss": 0.0111,
      "step": 2214040
    },
    {
      "epoch": 3.6233577502405687,
      "grad_norm": 1.0392357110977173,
      "learning_rate": 2.7714265005304326e-06,
      "loss": 0.0157,
      "step": 2214060
    },
    {
      "epoch": 3.6233904806792223,
      "grad_norm": 0.06458140909671783,
      "learning_rate": 2.7713606083169154e-06,
      "loss": 0.0075,
      "step": 2214080
    },
    {
      "epoch": 3.6234232111178755,
      "grad_norm": 0.14064079523086548,
      "learning_rate": 2.771294716103398e-06,
      "loss": 0.0124,
      "step": 2214100
    },
    {
      "epoch": 3.6234559415565286,
      "grad_norm": 0.21756352484226227,
      "learning_rate": 2.771228823889881e-06,
      "loss": 0.0117,
      "step": 2214120
    },
    {
      "epoch": 3.623488671995182,
      "grad_norm": 0.24137598276138306,
      "learning_rate": 2.771162931676364e-06,
      "loss": 0.012,
      "step": 2214140
    },
    {
      "epoch": 3.6235214024338354,
      "grad_norm": 0.9319083094596863,
      "learning_rate": 2.7710970394628467e-06,
      "loss": 0.0114,
      "step": 2214160
    },
    {
      "epoch": 3.6235541328724885,
      "grad_norm": 0.5198391079902649,
      "learning_rate": 2.7710311472493295e-06,
      "loss": 0.0137,
      "step": 2214180
    },
    {
      "epoch": 3.623586863311142,
      "grad_norm": 0.19914349913597107,
      "learning_rate": 2.7709652550358122e-06,
      "loss": 0.0114,
      "step": 2214200
    },
    {
      "epoch": 3.6236195937497957,
      "grad_norm": 0.27698373794555664,
      "learning_rate": 2.770899362822296e-06,
      "loss": 0.0128,
      "step": 2214220
    },
    {
      "epoch": 3.623652324188449,
      "grad_norm": 0.31076937913894653,
      "learning_rate": 2.7708334706087785e-06,
      "loss": 0.0144,
      "step": 2214240
    },
    {
      "epoch": 3.623685054627102,
      "grad_norm": 0.18704800307750702,
      "learning_rate": 2.7707675783952613e-06,
      "loss": 0.0169,
      "step": 2214260
    },
    {
      "epoch": 3.6237177850657556,
      "grad_norm": 0.03901595249772072,
      "learning_rate": 2.7707016861817444e-06,
      "loss": 0.0113,
      "step": 2214280
    },
    {
      "epoch": 3.6237505155044087,
      "grad_norm": 0.35996633768081665,
      "learning_rate": 2.770635793968227e-06,
      "loss": 0.0107,
      "step": 2214300
    },
    {
      "epoch": 3.623783245943062,
      "grad_norm": 0.634913444519043,
      "learning_rate": 2.77056990175471e-06,
      "loss": 0.01,
      "step": 2214320
    },
    {
      "epoch": 3.6238159763817155,
      "grad_norm": 0.15814338624477386,
      "learning_rate": 2.7705040095411927e-06,
      "loss": 0.0081,
      "step": 2214340
    },
    {
      "epoch": 3.623848706820369,
      "grad_norm": 0.18892993032932281,
      "learning_rate": 2.770438117327676e-06,
      "loss": 0.0075,
      "step": 2214360
    },
    {
      "epoch": 3.6238814372590222,
      "grad_norm": 0.3221094310283661,
      "learning_rate": 2.7703722251141586e-06,
      "loss": 0.007,
      "step": 2214380
    },
    {
      "epoch": 3.6239141676976754,
      "grad_norm": 0.09647726267576218,
      "learning_rate": 2.7703063329006413e-06,
      "loss": 0.0161,
      "step": 2214400
    },
    {
      "epoch": 3.623946898136329,
      "grad_norm": 0.32651373744010925,
      "learning_rate": 2.770240440687124e-06,
      "loss": 0.0079,
      "step": 2214420
    },
    {
      "epoch": 3.623979628574982,
      "grad_norm": 0.2023003250360489,
      "learning_rate": 2.7701745484736072e-06,
      "loss": 0.0079,
      "step": 2214440
    },
    {
      "epoch": 3.6240123590136353,
      "grad_norm": 0.14720304310321808,
      "learning_rate": 2.77010865626009e-06,
      "loss": 0.0084,
      "step": 2214460
    },
    {
      "epoch": 3.624045089452289,
      "grad_norm": 0.34399688243865967,
      "learning_rate": 2.7700427640465727e-06,
      "loss": 0.0172,
      "step": 2214480
    },
    {
      "epoch": 3.624077819890942,
      "grad_norm": 0.1635044813156128,
      "learning_rate": 2.7699768718330554e-06,
      "loss": 0.0133,
      "step": 2214500
    },
    {
      "epoch": 3.6241105503295956,
      "grad_norm": 0.1415894776582718,
      "learning_rate": 2.769910979619538e-06,
      "loss": 0.0091,
      "step": 2214520
    },
    {
      "epoch": 3.6241432807682488,
      "grad_norm": 0.2913234829902649,
      "learning_rate": 2.7698450874060213e-06,
      "loss": 0.019,
      "step": 2214540
    },
    {
      "epoch": 3.6241760112069024,
      "grad_norm": 0.2576419413089752,
      "learning_rate": 2.7697791951925045e-06,
      "loss": 0.0072,
      "step": 2214560
    },
    {
      "epoch": 3.6242087416455555,
      "grad_norm": 0.32852593064308167,
      "learning_rate": 2.7697133029789872e-06,
      "loss": 0.0099,
      "step": 2214580
    },
    {
      "epoch": 3.6242414720842087,
      "grad_norm": 0.20052607357501984,
      "learning_rate": 2.7696474107654704e-06,
      "loss": 0.0188,
      "step": 2214600
    },
    {
      "epoch": 3.6242742025228623,
      "grad_norm": 0.7391347885131836,
      "learning_rate": 2.769581518551953e-06,
      "loss": 0.0205,
      "step": 2214620
    },
    {
      "epoch": 3.6243069329615154,
      "grad_norm": 0.35154327750205994,
      "learning_rate": 2.769515626338436e-06,
      "loss": 0.0131,
      "step": 2214640
    },
    {
      "epoch": 3.624339663400169,
      "grad_norm": 0.227545827627182,
      "learning_rate": 2.7694497341249186e-06,
      "loss": 0.0132,
      "step": 2214660
    },
    {
      "epoch": 3.624372393838822,
      "grad_norm": 0.5276728272438049,
      "learning_rate": 2.7693838419114018e-06,
      "loss": 0.0104,
      "step": 2214680
    },
    {
      "epoch": 3.6244051242774757,
      "grad_norm": 0.19995075464248657,
      "learning_rate": 2.7693179496978845e-06,
      "loss": 0.0091,
      "step": 2214700
    },
    {
      "epoch": 3.624437854716129,
      "grad_norm": 0.12382130324840546,
      "learning_rate": 2.7692520574843673e-06,
      "loss": 0.0099,
      "step": 2214720
    },
    {
      "epoch": 3.624470585154782,
      "grad_norm": 0.36916911602020264,
      "learning_rate": 2.76918616527085e-06,
      "loss": 0.0073,
      "step": 2214740
    },
    {
      "epoch": 3.6245033155934356,
      "grad_norm": 0.34077391028404236,
      "learning_rate": 2.769120273057333e-06,
      "loss": 0.0148,
      "step": 2214760
    },
    {
      "epoch": 3.624536046032089,
      "grad_norm": 0.07538646459579468,
      "learning_rate": 2.769054380843816e-06,
      "loss": 0.0092,
      "step": 2214780
    },
    {
      "epoch": 3.6245687764707424,
      "grad_norm": 0.3664783239364624,
      "learning_rate": 2.7689884886302986e-06,
      "loss": 0.0136,
      "step": 2214800
    },
    {
      "epoch": 3.6246015069093955,
      "grad_norm": 0.526085376739502,
      "learning_rate": 2.7689225964167814e-06,
      "loss": 0.0087,
      "step": 2214820
    },
    {
      "epoch": 3.624634237348049,
      "grad_norm": 0.3731670379638672,
      "learning_rate": 2.7688567042032645e-06,
      "loss": 0.0111,
      "step": 2214840
    },
    {
      "epoch": 3.6246669677867023,
      "grad_norm": 0.13173936307430267,
      "learning_rate": 2.7687908119897473e-06,
      "loss": 0.0132,
      "step": 2214860
    },
    {
      "epoch": 3.6246996982253554,
      "grad_norm": 0.20432229340076447,
      "learning_rate": 2.76872491977623e-06,
      "loss": 0.0108,
      "step": 2214880
    },
    {
      "epoch": 3.624732428664009,
      "grad_norm": 0.2809513211250305,
      "learning_rate": 2.7686590275627128e-06,
      "loss": 0.0109,
      "step": 2214900
    },
    {
      "epoch": 3.624765159102662,
      "grad_norm": 0.10493317991495132,
      "learning_rate": 2.7685931353491964e-06,
      "loss": 0.0125,
      "step": 2214920
    },
    {
      "epoch": 3.6247978895413158,
      "grad_norm": 0.23515832424163818,
      "learning_rate": 2.768527243135679e-06,
      "loss": 0.0129,
      "step": 2214940
    },
    {
      "epoch": 3.624830619979969,
      "grad_norm": 0.23535694181919098,
      "learning_rate": 2.768461350922162e-06,
      "loss": 0.0089,
      "step": 2214960
    },
    {
      "epoch": 3.6248633504186225,
      "grad_norm": 0.46876800060272217,
      "learning_rate": 2.768395458708645e-06,
      "loss": 0.0101,
      "step": 2214980
    },
    {
      "epoch": 3.6248960808572757,
      "grad_norm": 0.336002916097641,
      "learning_rate": 2.7683295664951277e-06,
      "loss": 0.0114,
      "step": 2215000
    },
    {
      "epoch": 3.624928811295929,
      "grad_norm": 0.2816939353942871,
      "learning_rate": 2.7682636742816105e-06,
      "loss": 0.0087,
      "step": 2215020
    },
    {
      "epoch": 3.6249615417345824,
      "grad_norm": 0.3130966126918793,
      "learning_rate": 2.7681977820680932e-06,
      "loss": 0.011,
      "step": 2215040
    },
    {
      "epoch": 3.6249942721732356,
      "grad_norm": 0.30364593863487244,
      "learning_rate": 2.768131889854576e-06,
      "loss": 0.014,
      "step": 2215060
    },
    {
      "epoch": 3.625027002611889,
      "grad_norm": 0.6420596241950989,
      "learning_rate": 2.768065997641059e-06,
      "loss": 0.0114,
      "step": 2215080
    },
    {
      "epoch": 3.6250597330505423,
      "grad_norm": 0.125787153840065,
      "learning_rate": 2.768000105427542e-06,
      "loss": 0.0101,
      "step": 2215100
    },
    {
      "epoch": 3.625092463489196,
      "grad_norm": 0.9285579323768616,
      "learning_rate": 2.7679342132140246e-06,
      "loss": 0.0158,
      "step": 2215120
    },
    {
      "epoch": 3.625125193927849,
      "grad_norm": 0.07161534577608109,
      "learning_rate": 2.7678683210005073e-06,
      "loss": 0.0127,
      "step": 2215140
    },
    {
      "epoch": 3.625157924366502,
      "grad_norm": 0.236736461520195,
      "learning_rate": 2.7678024287869905e-06,
      "loss": 0.0091,
      "step": 2215160
    },
    {
      "epoch": 3.625190654805156,
      "grad_norm": 0.3838317394256592,
      "learning_rate": 2.7677365365734732e-06,
      "loss": 0.0132,
      "step": 2215180
    },
    {
      "epoch": 3.625223385243809,
      "grad_norm": 0.9272936582565308,
      "learning_rate": 2.767670644359956e-06,
      "loss": 0.0099,
      "step": 2215200
    },
    {
      "epoch": 3.6252561156824625,
      "grad_norm": 0.4346347451210022,
      "learning_rate": 2.7676047521464387e-06,
      "loss": 0.0107,
      "step": 2215220
    },
    {
      "epoch": 3.6252888461211157,
      "grad_norm": 0.5265690684318542,
      "learning_rate": 2.767538859932922e-06,
      "loss": 0.0102,
      "step": 2215240
    },
    {
      "epoch": 3.6253215765597693,
      "grad_norm": 0.1933405101299286,
      "learning_rate": 2.767472967719405e-06,
      "loss": 0.0125,
      "step": 2215260
    },
    {
      "epoch": 3.6253543069984224,
      "grad_norm": 0.20514482259750366,
      "learning_rate": 2.7674070755058878e-06,
      "loss": 0.0112,
      "step": 2215280
    },
    {
      "epoch": 3.6253870374370756,
      "grad_norm": 0.3744772672653198,
      "learning_rate": 2.767341183292371e-06,
      "loss": 0.0126,
      "step": 2215300
    },
    {
      "epoch": 3.625419767875729,
      "grad_norm": 0.31568238139152527,
      "learning_rate": 2.7672752910788537e-06,
      "loss": 0.0132,
      "step": 2215320
    },
    {
      "epoch": 3.6254524983143823,
      "grad_norm": 0.13294941186904907,
      "learning_rate": 2.7672093988653364e-06,
      "loss": 0.0152,
      "step": 2215340
    },
    {
      "epoch": 3.6254852287530355,
      "grad_norm": 0.028870929032564163,
      "learning_rate": 2.767143506651819e-06,
      "loss": 0.01,
      "step": 2215360
    },
    {
      "epoch": 3.625517959191689,
      "grad_norm": 0.10150884091854095,
      "learning_rate": 2.7670776144383023e-06,
      "loss": 0.0106,
      "step": 2215380
    },
    {
      "epoch": 3.6255506896303427,
      "grad_norm": 0.137864351272583,
      "learning_rate": 2.767011722224785e-06,
      "loss": 0.0066,
      "step": 2215400
    },
    {
      "epoch": 3.625583420068996,
      "grad_norm": 4.36797571182251,
      "learning_rate": 2.766945830011268e-06,
      "loss": 0.013,
      "step": 2215420
    },
    {
      "epoch": 3.625616150507649,
      "grad_norm": 0.3557070195674896,
      "learning_rate": 2.7668799377977506e-06,
      "loss": 0.0108,
      "step": 2215440
    },
    {
      "epoch": 3.6256488809463026,
      "grad_norm": 0.5404301881790161,
      "learning_rate": 2.7668140455842333e-06,
      "loss": 0.0085,
      "step": 2215460
    },
    {
      "epoch": 3.6256816113849557,
      "grad_norm": 0.16153103113174438,
      "learning_rate": 2.7667481533707165e-06,
      "loss": 0.0078,
      "step": 2215480
    },
    {
      "epoch": 3.625714341823609,
      "grad_norm": 0.24294933676719666,
      "learning_rate": 2.766682261157199e-06,
      "loss": 0.0088,
      "step": 2215500
    },
    {
      "epoch": 3.6257470722622624,
      "grad_norm": 0.1851785033941269,
      "learning_rate": 2.766616368943682e-06,
      "loss": 0.0122,
      "step": 2215520
    },
    {
      "epoch": 3.625779802700916,
      "grad_norm": 0.31923791766166687,
      "learning_rate": 2.7665504767301647e-06,
      "loss": 0.0091,
      "step": 2215540
    },
    {
      "epoch": 3.625812533139569,
      "grad_norm": 0.29803526401519775,
      "learning_rate": 2.766484584516648e-06,
      "loss": 0.0105,
      "step": 2215560
    },
    {
      "epoch": 3.6258452635782223,
      "grad_norm": 0.22198449075222015,
      "learning_rate": 2.7664186923031306e-06,
      "loss": 0.0125,
      "step": 2215580
    },
    {
      "epoch": 3.625877994016876,
      "grad_norm": 0.39778417348861694,
      "learning_rate": 2.7663528000896133e-06,
      "loss": 0.0124,
      "step": 2215600
    },
    {
      "epoch": 3.625910724455529,
      "grad_norm": 0.3734185993671417,
      "learning_rate": 2.766286907876097e-06,
      "loss": 0.0129,
      "step": 2215620
    },
    {
      "epoch": 3.6259434548941822,
      "grad_norm": 0.24152350425720215,
      "learning_rate": 2.7662210156625796e-06,
      "loss": 0.0107,
      "step": 2215640
    },
    {
      "epoch": 3.625976185332836,
      "grad_norm": 0.3654560446739197,
      "learning_rate": 2.7661551234490624e-06,
      "loss": 0.0142,
      "step": 2215660
    },
    {
      "epoch": 3.6260089157714894,
      "grad_norm": 0.17402981221675873,
      "learning_rate": 2.766089231235545e-06,
      "loss": 0.0085,
      "step": 2215680
    },
    {
      "epoch": 3.6260416462101426,
      "grad_norm": 0.18711869418621063,
      "learning_rate": 2.7660233390220283e-06,
      "loss": 0.0172,
      "step": 2215700
    },
    {
      "epoch": 3.6260743766487957,
      "grad_norm": 0.7990756034851074,
      "learning_rate": 2.765957446808511e-06,
      "loss": 0.0128,
      "step": 2215720
    },
    {
      "epoch": 3.6261071070874493,
      "grad_norm": 0.17282994091510773,
      "learning_rate": 2.7658915545949938e-06,
      "loss": 0.0111,
      "step": 2215740
    },
    {
      "epoch": 3.6261398375261025,
      "grad_norm": 0.17683906853199005,
      "learning_rate": 2.7658256623814765e-06,
      "loss": 0.0079,
      "step": 2215760
    },
    {
      "epoch": 3.6261725679647556,
      "grad_norm": 0.46164998412132263,
      "learning_rate": 2.7657597701679597e-06,
      "loss": 0.0135,
      "step": 2215780
    },
    {
      "epoch": 3.626205298403409,
      "grad_norm": 0.3811115622520447,
      "learning_rate": 2.7656938779544424e-06,
      "loss": 0.0104,
      "step": 2215800
    },
    {
      "epoch": 3.626238028842063,
      "grad_norm": 0.10592520982027054,
      "learning_rate": 2.765627985740925e-06,
      "loss": 0.0142,
      "step": 2215820
    },
    {
      "epoch": 3.626270759280716,
      "grad_norm": 0.16126450896263123,
      "learning_rate": 2.765562093527408e-06,
      "loss": 0.0075,
      "step": 2215840
    },
    {
      "epoch": 3.626303489719369,
      "grad_norm": 0.17174990475177765,
      "learning_rate": 2.765496201313891e-06,
      "loss": 0.0123,
      "step": 2215860
    },
    {
      "epoch": 3.6263362201580227,
      "grad_norm": 0.36450621485710144,
      "learning_rate": 2.765430309100374e-06,
      "loss": 0.0138,
      "step": 2215880
    },
    {
      "epoch": 3.626368950596676,
      "grad_norm": 0.2611550986766815,
      "learning_rate": 2.7653644168868565e-06,
      "loss": 0.0099,
      "step": 2215900
    },
    {
      "epoch": 3.626401681035329,
      "grad_norm": 1.3582168817520142,
      "learning_rate": 2.7652985246733393e-06,
      "loss": 0.0139,
      "step": 2215920
    },
    {
      "epoch": 3.6264344114739826,
      "grad_norm": 0.42621544003486633,
      "learning_rate": 2.765232632459822e-06,
      "loss": 0.0103,
      "step": 2215940
    },
    {
      "epoch": 3.6264671419126357,
      "grad_norm": 0.05021078884601593,
      "learning_rate": 2.765166740246305e-06,
      "loss": 0.0081,
      "step": 2215960
    },
    {
      "epoch": 3.6264998723512893,
      "grad_norm": 0.2773760259151459,
      "learning_rate": 2.7651008480327883e-06,
      "loss": 0.0085,
      "step": 2215980
    },
    {
      "epoch": 3.6265326027899425,
      "grad_norm": 0.13854584097862244,
      "learning_rate": 2.765034955819271e-06,
      "loss": 0.0145,
      "step": 2216000
    },
    {
      "epoch": 3.626565333228596,
      "grad_norm": 0.13907095789909363,
      "learning_rate": 2.7649690636057542e-06,
      "loss": 0.0123,
      "step": 2216020
    },
    {
      "epoch": 3.6265980636672492,
      "grad_norm": 0.15983831882476807,
      "learning_rate": 2.764903171392237e-06,
      "loss": 0.0108,
      "step": 2216040
    },
    {
      "epoch": 3.6266307941059024,
      "grad_norm": 1.0444285869598389,
      "learning_rate": 2.7648372791787197e-06,
      "loss": 0.0092,
      "step": 2216060
    },
    {
      "epoch": 3.626663524544556,
      "grad_norm": 0.18092934787273407,
      "learning_rate": 2.7647713869652025e-06,
      "loss": 0.0113,
      "step": 2216080
    },
    {
      "epoch": 3.626696254983209,
      "grad_norm": 0.15167886018753052,
      "learning_rate": 2.7647054947516856e-06,
      "loss": 0.0071,
      "step": 2216100
    },
    {
      "epoch": 3.6267289854218627,
      "grad_norm": 0.07770352065563202,
      "learning_rate": 2.7646396025381684e-06,
      "loss": 0.0118,
      "step": 2216120
    },
    {
      "epoch": 3.626761715860516,
      "grad_norm": 0.5203475952148438,
      "learning_rate": 2.764573710324651e-06,
      "loss": 0.0081,
      "step": 2216140
    },
    {
      "epoch": 3.6267944462991695,
      "grad_norm": 0.361028254032135,
      "learning_rate": 2.764507818111134e-06,
      "loss": 0.0078,
      "step": 2216160
    },
    {
      "epoch": 3.6268271767378226,
      "grad_norm": 0.4640166759490967,
      "learning_rate": 2.764441925897617e-06,
      "loss": 0.0107,
      "step": 2216180
    },
    {
      "epoch": 3.6268599071764758,
      "grad_norm": 0.4276949167251587,
      "learning_rate": 2.7643760336840997e-06,
      "loss": 0.01,
      "step": 2216200
    },
    {
      "epoch": 3.6268926376151294,
      "grad_norm": 0.20000682771205902,
      "learning_rate": 2.7643101414705825e-06,
      "loss": 0.0109,
      "step": 2216220
    },
    {
      "epoch": 3.6269253680537825,
      "grad_norm": 0.10610754787921906,
      "learning_rate": 2.7642442492570652e-06,
      "loss": 0.0121,
      "step": 2216240
    },
    {
      "epoch": 3.626958098492436,
      "grad_norm": 0.1231083869934082,
      "learning_rate": 2.7641783570435484e-06,
      "loss": 0.0093,
      "step": 2216260
    },
    {
      "epoch": 3.6269908289310893,
      "grad_norm": 0.3623817563056946,
      "learning_rate": 2.764112464830031e-06,
      "loss": 0.011,
      "step": 2216280
    },
    {
      "epoch": 3.627023559369743,
      "grad_norm": 0.11611808091402054,
      "learning_rate": 2.764046572616514e-06,
      "loss": 0.0136,
      "step": 2216300
    },
    {
      "epoch": 3.627056289808396,
      "grad_norm": 0.19575922191143036,
      "learning_rate": 2.7639806804029975e-06,
      "loss": 0.0083,
      "step": 2216320
    },
    {
      "epoch": 3.627089020247049,
      "grad_norm": 0.11712438613176346,
      "learning_rate": 2.76391478818948e-06,
      "loss": 0.0189,
      "step": 2216340
    },
    {
      "epoch": 3.6271217506857028,
      "grad_norm": 0.4679733216762543,
      "learning_rate": 2.763848895975963e-06,
      "loss": 0.0144,
      "step": 2216360
    },
    {
      "epoch": 3.627154481124356,
      "grad_norm": 0.15008467435836792,
      "learning_rate": 2.7637830037624457e-06,
      "loss": 0.0103,
      "step": 2216380
    },
    {
      "epoch": 3.6271872115630095,
      "grad_norm": 0.21261070668697357,
      "learning_rate": 2.763717111548929e-06,
      "loss": 0.0114,
      "step": 2216400
    },
    {
      "epoch": 3.6272199420016626,
      "grad_norm": 0.19003252685070038,
      "learning_rate": 2.7636512193354116e-06,
      "loss": 0.0095,
      "step": 2216420
    },
    {
      "epoch": 3.6272526724403162,
      "grad_norm": 0.08832082152366638,
      "learning_rate": 2.7635853271218943e-06,
      "loss": 0.009,
      "step": 2216440
    },
    {
      "epoch": 3.6272854028789694,
      "grad_norm": 0.3530847728252411,
      "learning_rate": 2.763519434908377e-06,
      "loss": 0.0101,
      "step": 2216460
    },
    {
      "epoch": 3.6273181333176225,
      "grad_norm": 0.46615082025527954,
      "learning_rate": 2.76345354269486e-06,
      "loss": 0.0126,
      "step": 2216480
    },
    {
      "epoch": 3.627350863756276,
      "grad_norm": 0.3592381179332733,
      "learning_rate": 2.763387650481343e-06,
      "loss": 0.0142,
      "step": 2216500
    },
    {
      "epoch": 3.6273835941949293,
      "grad_norm": 0.14561614394187927,
      "learning_rate": 2.7633217582678257e-06,
      "loss": 0.0147,
      "step": 2216520
    },
    {
      "epoch": 3.627416324633583,
      "grad_norm": 0.30553632974624634,
      "learning_rate": 2.7632558660543084e-06,
      "loss": 0.0128,
      "step": 2216540
    },
    {
      "epoch": 3.627449055072236,
      "grad_norm": 0.1826947182416916,
      "learning_rate": 2.763189973840791e-06,
      "loss": 0.0065,
      "step": 2216560
    },
    {
      "epoch": 3.6274817855108896,
      "grad_norm": 0.4930422008037567,
      "learning_rate": 2.7631240816272743e-06,
      "loss": 0.0105,
      "step": 2216580
    },
    {
      "epoch": 3.6275145159495428,
      "grad_norm": 0.6362987160682678,
      "learning_rate": 2.763058189413757e-06,
      "loss": 0.0155,
      "step": 2216600
    },
    {
      "epoch": 3.627547246388196,
      "grad_norm": 0.2626124620437622,
      "learning_rate": 2.76299229720024e-06,
      "loss": 0.0091,
      "step": 2216620
    },
    {
      "epoch": 3.6275799768268495,
      "grad_norm": 0.11737359315156937,
      "learning_rate": 2.7629264049867226e-06,
      "loss": 0.0107,
      "step": 2216640
    },
    {
      "epoch": 3.6276127072655027,
      "grad_norm": 0.23852568864822388,
      "learning_rate": 2.7628605127732057e-06,
      "loss": 0.0082,
      "step": 2216660
    },
    {
      "epoch": 3.6276454377041563,
      "grad_norm": 0.35000044107437134,
      "learning_rate": 2.762794620559689e-06,
      "loss": 0.0184,
      "step": 2216680
    },
    {
      "epoch": 3.6276781681428094,
      "grad_norm": 0.20799031853675842,
      "learning_rate": 2.7627287283461716e-06,
      "loss": 0.0146,
      "step": 2216700
    },
    {
      "epoch": 3.627710898581463,
      "grad_norm": 0.21888616681098938,
      "learning_rate": 2.762662836132655e-06,
      "loss": 0.0085,
      "step": 2216720
    },
    {
      "epoch": 3.627743629020116,
      "grad_norm": 0.3533952534198761,
      "learning_rate": 2.7625969439191375e-06,
      "loss": 0.0129,
      "step": 2216740
    },
    {
      "epoch": 3.6277763594587693,
      "grad_norm": 1.010680079460144,
      "learning_rate": 2.7625310517056203e-06,
      "loss": 0.0143,
      "step": 2216760
    },
    {
      "epoch": 3.627809089897423,
      "grad_norm": 0.20651380717754364,
      "learning_rate": 2.762465159492103e-06,
      "loss": 0.0098,
      "step": 2216780
    },
    {
      "epoch": 3.627841820336076,
      "grad_norm": 0.17948897182941437,
      "learning_rate": 2.762399267278586e-06,
      "loss": 0.0079,
      "step": 2216800
    },
    {
      "epoch": 3.6278745507747296,
      "grad_norm": 0.1300974041223526,
      "learning_rate": 2.762333375065069e-06,
      "loss": 0.0152,
      "step": 2216820
    },
    {
      "epoch": 3.627907281213383,
      "grad_norm": 0.5810167193412781,
      "learning_rate": 2.7622674828515517e-06,
      "loss": 0.0111,
      "step": 2216840
    },
    {
      "epoch": 3.6279400116520364,
      "grad_norm": 0.22832421958446503,
      "learning_rate": 2.7622015906380344e-06,
      "loss": 0.0159,
      "step": 2216860
    },
    {
      "epoch": 3.6279727420906895,
      "grad_norm": 0.1358005553483963,
      "learning_rate": 2.762135698424517e-06,
      "loss": 0.0171,
      "step": 2216880
    },
    {
      "epoch": 3.6280054725293427,
      "grad_norm": 0.37523430585861206,
      "learning_rate": 2.7620698062110003e-06,
      "loss": 0.0101,
      "step": 2216900
    },
    {
      "epoch": 3.6280382029679963,
      "grad_norm": 0.2264544516801834,
      "learning_rate": 2.762003913997483e-06,
      "loss": 0.0146,
      "step": 2216920
    },
    {
      "epoch": 3.6280709334066494,
      "grad_norm": 0.6523686051368713,
      "learning_rate": 2.7619380217839658e-06,
      "loss": 0.0105,
      "step": 2216940
    },
    {
      "epoch": 3.6281036638453026,
      "grad_norm": 0.10206408053636551,
      "learning_rate": 2.7618721295704485e-06,
      "loss": 0.0112,
      "step": 2216960
    },
    {
      "epoch": 3.628136394283956,
      "grad_norm": 0.635574221611023,
      "learning_rate": 2.7618062373569317e-06,
      "loss": 0.0114,
      "step": 2216980
    },
    {
      "epoch": 3.6281691247226098,
      "grad_norm": 0.09571846574544907,
      "learning_rate": 2.7617403451434144e-06,
      "loss": 0.0091,
      "step": 2217000
    },
    {
      "epoch": 3.628201855161263,
      "grad_norm": 0.2621437907218933,
      "learning_rate": 2.7616744529298976e-06,
      "loss": 0.0143,
      "step": 2217020
    },
    {
      "epoch": 3.628234585599916,
      "grad_norm": 0.7362802624702454,
      "learning_rate": 2.7616085607163807e-06,
      "loss": 0.0137,
      "step": 2217040
    },
    {
      "epoch": 3.6282673160385697,
      "grad_norm": 0.32428592443466187,
      "learning_rate": 2.7615426685028635e-06,
      "loss": 0.0075,
      "step": 2217060
    },
    {
      "epoch": 3.628300046477223,
      "grad_norm": 0.2892051935195923,
      "learning_rate": 2.7614767762893462e-06,
      "loss": 0.0095,
      "step": 2217080
    },
    {
      "epoch": 3.628332776915876,
      "grad_norm": 0.20606239140033722,
      "learning_rate": 2.761410884075829e-06,
      "loss": 0.0095,
      "step": 2217100
    },
    {
      "epoch": 3.6283655073545296,
      "grad_norm": 0.1563776433467865,
      "learning_rate": 2.761344991862312e-06,
      "loss": 0.0083,
      "step": 2217120
    },
    {
      "epoch": 3.628398237793183,
      "grad_norm": 0.29119154810905457,
      "learning_rate": 2.761279099648795e-06,
      "loss": 0.0125,
      "step": 2217140
    },
    {
      "epoch": 3.6284309682318363,
      "grad_norm": 0.43348392844200134,
      "learning_rate": 2.7612132074352776e-06,
      "loss": 0.0114,
      "step": 2217160
    },
    {
      "epoch": 3.6284636986704895,
      "grad_norm": 0.2517094910144806,
      "learning_rate": 2.7611473152217603e-06,
      "loss": 0.0113,
      "step": 2217180
    },
    {
      "epoch": 3.628496429109143,
      "grad_norm": 0.38963037729263306,
      "learning_rate": 2.7610814230082435e-06,
      "loss": 0.0136,
      "step": 2217200
    },
    {
      "epoch": 3.628529159547796,
      "grad_norm": 0.2367124855518341,
      "learning_rate": 2.7610155307947262e-06,
      "loss": 0.009,
      "step": 2217220
    },
    {
      "epoch": 3.6285618899864494,
      "grad_norm": 0.10731925070285797,
      "learning_rate": 2.760949638581209e-06,
      "loss": 0.0124,
      "step": 2217240
    },
    {
      "epoch": 3.628594620425103,
      "grad_norm": 0.2422475516796112,
      "learning_rate": 2.7608837463676917e-06,
      "loss": 0.008,
      "step": 2217260
    },
    {
      "epoch": 3.6286273508637565,
      "grad_norm": 0.2541167438030243,
      "learning_rate": 2.760817854154175e-06,
      "loss": 0.0081,
      "step": 2217280
    },
    {
      "epoch": 3.6286600813024097,
      "grad_norm": 0.13204562664031982,
      "learning_rate": 2.7607519619406576e-06,
      "loss": 0.0133,
      "step": 2217300
    },
    {
      "epoch": 3.628692811741063,
      "grad_norm": 0.2744646966457367,
      "learning_rate": 2.7606860697271404e-06,
      "loss": 0.0086,
      "step": 2217320
    },
    {
      "epoch": 3.6287255421797164,
      "grad_norm": 0.4560393989086151,
      "learning_rate": 2.760620177513623e-06,
      "loss": 0.0159,
      "step": 2217340
    },
    {
      "epoch": 3.6287582726183696,
      "grad_norm": 0.2786467671394348,
      "learning_rate": 2.760554285300106e-06,
      "loss": 0.0088,
      "step": 2217360
    },
    {
      "epoch": 3.6287910030570227,
      "grad_norm": 0.5185956954956055,
      "learning_rate": 2.7604883930865894e-06,
      "loss": 0.0103,
      "step": 2217380
    },
    {
      "epoch": 3.6288237334956763,
      "grad_norm": 0.22761313617229462,
      "learning_rate": 2.760422500873072e-06,
      "loss": 0.0107,
      "step": 2217400
    },
    {
      "epoch": 3.62885646393433,
      "grad_norm": 0.21254746615886688,
      "learning_rate": 2.760356608659555e-06,
      "loss": 0.0077,
      "step": 2217420
    },
    {
      "epoch": 3.628889194372983,
      "grad_norm": 0.1522320806980133,
      "learning_rate": 2.760290716446038e-06,
      "loss": 0.012,
      "step": 2217440
    },
    {
      "epoch": 3.6289219248116362,
      "grad_norm": 0.24155427515506744,
      "learning_rate": 2.760224824232521e-06,
      "loss": 0.0188,
      "step": 2217460
    },
    {
      "epoch": 3.62895465525029,
      "grad_norm": 0.2492961883544922,
      "learning_rate": 2.7601589320190036e-06,
      "loss": 0.0079,
      "step": 2217480
    },
    {
      "epoch": 3.628987385688943,
      "grad_norm": 0.4417768716812134,
      "learning_rate": 2.7600930398054863e-06,
      "loss": 0.0169,
      "step": 2217500
    },
    {
      "epoch": 3.629020116127596,
      "grad_norm": 0.3321841359138489,
      "learning_rate": 2.7600271475919695e-06,
      "loss": 0.0106,
      "step": 2217520
    },
    {
      "epoch": 3.6290528465662497,
      "grad_norm": 0.3725109398365021,
      "learning_rate": 2.759961255378452e-06,
      "loss": 0.0092,
      "step": 2217540
    },
    {
      "epoch": 3.629085577004903,
      "grad_norm": 0.25741443037986755,
      "learning_rate": 2.759895363164935e-06,
      "loss": 0.0131,
      "step": 2217560
    },
    {
      "epoch": 3.6291183074435565,
      "grad_norm": 0.16628780961036682,
      "learning_rate": 2.7598294709514177e-06,
      "loss": 0.0157,
      "step": 2217580
    },
    {
      "epoch": 3.6291510378822096,
      "grad_norm": 0.34290650486946106,
      "learning_rate": 2.759763578737901e-06,
      "loss": 0.0139,
      "step": 2217600
    },
    {
      "epoch": 3.629183768320863,
      "grad_norm": 0.09382477402687073,
      "learning_rate": 2.7596976865243836e-06,
      "loss": 0.0081,
      "step": 2217620
    },
    {
      "epoch": 3.6292164987595164,
      "grad_norm": 0.39149174094200134,
      "learning_rate": 2.7596317943108663e-06,
      "loss": 0.0072,
      "step": 2217640
    },
    {
      "epoch": 3.6292492291981695,
      "grad_norm": 0.14104951918125153,
      "learning_rate": 2.759565902097349e-06,
      "loss": 0.009,
      "step": 2217660
    },
    {
      "epoch": 3.629281959636823,
      "grad_norm": 0.2610846161842346,
      "learning_rate": 2.7595000098838322e-06,
      "loss": 0.0101,
      "step": 2217680
    },
    {
      "epoch": 3.6293146900754762,
      "grad_norm": 0.19728519022464752,
      "learning_rate": 2.759434117670315e-06,
      "loss": 0.0093,
      "step": 2217700
    },
    {
      "epoch": 3.62934742051413,
      "grad_norm": 0.24518978595733643,
      "learning_rate": 2.7593682254567977e-06,
      "loss": 0.0099,
      "step": 2217720
    },
    {
      "epoch": 3.629380150952783,
      "grad_norm": 0.19076180458068848,
      "learning_rate": 2.7593023332432813e-06,
      "loss": 0.0082,
      "step": 2217740
    },
    {
      "epoch": 3.6294128813914366,
      "grad_norm": 0.9402899146080017,
      "learning_rate": 2.759236441029764e-06,
      "loss": 0.011,
      "step": 2217760
    },
    {
      "epoch": 3.6294456118300897,
      "grad_norm": 0.1768227219581604,
      "learning_rate": 2.7591705488162468e-06,
      "loss": 0.0113,
      "step": 2217780
    },
    {
      "epoch": 3.629478342268743,
      "grad_norm": 0.12068387120962143,
      "learning_rate": 2.7591046566027295e-06,
      "loss": 0.0084,
      "step": 2217800
    },
    {
      "epoch": 3.6295110727073965,
      "grad_norm": 0.148413747549057,
      "learning_rate": 2.7590387643892127e-06,
      "loss": 0.0087,
      "step": 2217820
    },
    {
      "epoch": 3.6295438031460496,
      "grad_norm": 0.239431232213974,
      "learning_rate": 2.7589728721756954e-06,
      "loss": 0.0094,
      "step": 2217840
    },
    {
      "epoch": 3.6295765335847032,
      "grad_norm": 0.1964336335659027,
      "learning_rate": 2.758906979962178e-06,
      "loss": 0.0098,
      "step": 2217860
    },
    {
      "epoch": 3.6296092640233564,
      "grad_norm": 0.23570029437541962,
      "learning_rate": 2.758841087748661e-06,
      "loss": 0.01,
      "step": 2217880
    },
    {
      "epoch": 3.62964199446201,
      "grad_norm": 0.18036659061908722,
      "learning_rate": 2.7587751955351436e-06,
      "loss": 0.0138,
      "step": 2217900
    },
    {
      "epoch": 3.629674724900663,
      "grad_norm": 0.29710617661476135,
      "learning_rate": 2.758709303321627e-06,
      "loss": 0.0088,
      "step": 2217920
    },
    {
      "epoch": 3.6297074553393163,
      "grad_norm": 0.7543284893035889,
      "learning_rate": 2.7586434111081095e-06,
      "loss": 0.0115,
      "step": 2217940
    },
    {
      "epoch": 3.62974018577797,
      "grad_norm": 0.26223599910736084,
      "learning_rate": 2.7585775188945923e-06,
      "loss": 0.0073,
      "step": 2217960
    },
    {
      "epoch": 3.629772916216623,
      "grad_norm": 0.13160942494869232,
      "learning_rate": 2.758511626681075e-06,
      "loss": 0.0063,
      "step": 2217980
    },
    {
      "epoch": 3.6298056466552766,
      "grad_norm": 0.29138606786727905,
      "learning_rate": 2.758445734467558e-06,
      "loss": 0.0124,
      "step": 2218000
    },
    {
      "epoch": 3.6298383770939298,
      "grad_norm": 3.3017520904541016,
      "learning_rate": 2.758379842254041e-06,
      "loss": 0.0113,
      "step": 2218020
    },
    {
      "epoch": 3.6298711075325834,
      "grad_norm": 0.12872062623500824,
      "learning_rate": 2.7583139500405237e-06,
      "loss": 0.0174,
      "step": 2218040
    },
    {
      "epoch": 3.6299038379712365,
      "grad_norm": 0.2209463268518448,
      "learning_rate": 2.7582480578270064e-06,
      "loss": 0.0095,
      "step": 2218060
    },
    {
      "epoch": 3.6299365684098897,
      "grad_norm": 0.4593004286289215,
      "learning_rate": 2.75818216561349e-06,
      "loss": 0.0105,
      "step": 2218080
    },
    {
      "epoch": 3.6299692988485432,
      "grad_norm": 0.5017822980880737,
      "learning_rate": 2.7581162733999727e-06,
      "loss": 0.0108,
      "step": 2218100
    },
    {
      "epoch": 3.6300020292871964,
      "grad_norm": 0.27979689836502075,
      "learning_rate": 2.7580503811864555e-06,
      "loss": 0.0106,
      "step": 2218120
    },
    {
      "epoch": 3.63003475972585,
      "grad_norm": 0.2089991718530655,
      "learning_rate": 2.7579844889729386e-06,
      "loss": 0.0106,
      "step": 2218140
    },
    {
      "epoch": 3.630067490164503,
      "grad_norm": 0.1080474853515625,
      "learning_rate": 2.7579185967594214e-06,
      "loss": 0.0076,
      "step": 2218160
    },
    {
      "epoch": 3.6301002206031567,
      "grad_norm": 0.13416606187820435,
      "learning_rate": 2.757852704545904e-06,
      "loss": 0.0074,
      "step": 2218180
    },
    {
      "epoch": 3.63013295104181,
      "grad_norm": 0.2790040671825409,
      "learning_rate": 2.757786812332387e-06,
      "loss": 0.0126,
      "step": 2218200
    },
    {
      "epoch": 3.630165681480463,
      "grad_norm": 0.28303664922714233,
      "learning_rate": 2.75772092011887e-06,
      "loss": 0.014,
      "step": 2218220
    },
    {
      "epoch": 3.6301984119191166,
      "grad_norm": 0.1369858831167221,
      "learning_rate": 2.7576550279053528e-06,
      "loss": 0.0087,
      "step": 2218240
    },
    {
      "epoch": 3.63023114235777,
      "grad_norm": 0.09183545410633087,
      "learning_rate": 2.7575891356918355e-06,
      "loss": 0.0149,
      "step": 2218260
    },
    {
      "epoch": 3.6302638727964234,
      "grad_norm": 0.4637574255466461,
      "learning_rate": 2.7575232434783182e-06,
      "loss": 0.0074,
      "step": 2218280
    },
    {
      "epoch": 3.6302966032350765,
      "grad_norm": 0.3540005385875702,
      "learning_rate": 2.7574573512648014e-06,
      "loss": 0.0089,
      "step": 2218300
    },
    {
      "epoch": 3.63032933367373,
      "grad_norm": 0.11654269695281982,
      "learning_rate": 2.757391459051284e-06,
      "loss": 0.0075,
      "step": 2218320
    },
    {
      "epoch": 3.6303620641123833,
      "grad_norm": 0.2386939972639084,
      "learning_rate": 2.757325566837767e-06,
      "loss": 0.01,
      "step": 2218340
    },
    {
      "epoch": 3.6303947945510364,
      "grad_norm": 0.29644837975502014,
      "learning_rate": 2.7572596746242496e-06,
      "loss": 0.0078,
      "step": 2218360
    },
    {
      "epoch": 3.63042752498969,
      "grad_norm": 0.3856648802757263,
      "learning_rate": 2.7571937824107324e-06,
      "loss": 0.0149,
      "step": 2218380
    },
    {
      "epoch": 3.630460255428343,
      "grad_norm": 0.49736806750297546,
      "learning_rate": 2.7571278901972155e-06,
      "loss": 0.0148,
      "step": 2218400
    },
    {
      "epoch": 3.6304929858669963,
      "grad_norm": 0.3173362910747528,
      "learning_rate": 2.7570619979836983e-06,
      "loss": 0.0143,
      "step": 2218420
    },
    {
      "epoch": 3.63052571630565,
      "grad_norm": 0.10403581708669662,
      "learning_rate": 2.7569961057701814e-06,
      "loss": 0.0092,
      "step": 2218440
    },
    {
      "epoch": 3.6305584467443035,
      "grad_norm": 0.05974997207522392,
      "learning_rate": 2.7569302135566646e-06,
      "loss": 0.0103,
      "step": 2218460
    },
    {
      "epoch": 3.6305911771829567,
      "grad_norm": 0.04480385035276413,
      "learning_rate": 2.7568643213431473e-06,
      "loss": 0.0116,
      "step": 2218480
    },
    {
      "epoch": 3.63062390762161,
      "grad_norm": 0.05756084993481636,
      "learning_rate": 2.75679842912963e-06,
      "loss": 0.0127,
      "step": 2218500
    },
    {
      "epoch": 3.6306566380602634,
      "grad_norm": 0.08666430413722992,
      "learning_rate": 2.756732536916113e-06,
      "loss": 0.0062,
      "step": 2218520
    },
    {
      "epoch": 3.6306893684989165,
      "grad_norm": 0.811922550201416,
      "learning_rate": 2.756666644702596e-06,
      "loss": 0.0129,
      "step": 2218540
    },
    {
      "epoch": 3.6307220989375697,
      "grad_norm": 0.27633941173553467,
      "learning_rate": 2.7566007524890787e-06,
      "loss": 0.0094,
      "step": 2218560
    },
    {
      "epoch": 3.6307548293762233,
      "grad_norm": 0.41495004296302795,
      "learning_rate": 2.7565348602755614e-06,
      "loss": 0.016,
      "step": 2218580
    },
    {
      "epoch": 3.630787559814877,
      "grad_norm": 0.3955974876880646,
      "learning_rate": 2.756468968062044e-06,
      "loss": 0.0128,
      "step": 2218600
    },
    {
      "epoch": 3.63082029025353,
      "grad_norm": 0.028372900560498238,
      "learning_rate": 2.7564030758485273e-06,
      "loss": 0.0136,
      "step": 2218620
    },
    {
      "epoch": 3.630853020692183,
      "grad_norm": 0.22803835570812225,
      "learning_rate": 2.75633718363501e-06,
      "loss": 0.01,
      "step": 2218640
    },
    {
      "epoch": 3.630885751130837,
      "grad_norm": 0.13117823004722595,
      "learning_rate": 2.756271291421493e-06,
      "loss": 0.0087,
      "step": 2218660
    },
    {
      "epoch": 3.63091848156949,
      "grad_norm": 0.23094692826271057,
      "learning_rate": 2.7562053992079756e-06,
      "loss": 0.0086,
      "step": 2218680
    },
    {
      "epoch": 3.630951212008143,
      "grad_norm": 0.3059467673301697,
      "learning_rate": 2.7561395069944587e-06,
      "loss": 0.011,
      "step": 2218700
    },
    {
      "epoch": 3.6309839424467967,
      "grad_norm": 0.08090361952781677,
      "learning_rate": 2.7560736147809415e-06,
      "loss": 0.0124,
      "step": 2218720
    },
    {
      "epoch": 3.6310166728854503,
      "grad_norm": 0.10875502228736877,
      "learning_rate": 2.756007722567424e-06,
      "loss": 0.0116,
      "step": 2218740
    },
    {
      "epoch": 3.6310494033241034,
      "grad_norm": 0.20042601227760315,
      "learning_rate": 2.755941830353907e-06,
      "loss": 0.01,
      "step": 2218760
    },
    {
      "epoch": 3.6310821337627566,
      "grad_norm": 0.27237144112586975,
      "learning_rate": 2.7558759381403905e-06,
      "loss": 0.0126,
      "step": 2218780
    },
    {
      "epoch": 3.63111486420141,
      "grad_norm": 0.12835147976875305,
      "learning_rate": 2.7558100459268733e-06,
      "loss": 0.0079,
      "step": 2218800
    },
    {
      "epoch": 3.6311475946400633,
      "grad_norm": 0.23824219405651093,
      "learning_rate": 2.755744153713356e-06,
      "loss": 0.011,
      "step": 2218820
    },
    {
      "epoch": 3.6311803250787165,
      "grad_norm": 0.21195866167545319,
      "learning_rate": 2.755678261499839e-06,
      "loss": 0.0098,
      "step": 2218840
    },
    {
      "epoch": 3.63121305551737,
      "grad_norm": 0.2989182472229004,
      "learning_rate": 2.755612369286322e-06,
      "loss": 0.0089,
      "step": 2218860
    },
    {
      "epoch": 3.6312457859560237,
      "grad_norm": 0.23429064452648163,
      "learning_rate": 2.7555464770728047e-06,
      "loss": 0.0097,
      "step": 2218880
    },
    {
      "epoch": 3.631278516394677,
      "grad_norm": 0.1937103420495987,
      "learning_rate": 2.7554805848592874e-06,
      "loss": 0.0084,
      "step": 2218900
    },
    {
      "epoch": 3.63131124683333,
      "grad_norm": 0.24381637573242188,
      "learning_rate": 2.75541469264577e-06,
      "loss": 0.0104,
      "step": 2218920
    },
    {
      "epoch": 3.6313439772719835,
      "grad_norm": 0.20584258437156677,
      "learning_rate": 2.7553488004322533e-06,
      "loss": 0.0091,
      "step": 2218940
    },
    {
      "epoch": 3.6313767077106367,
      "grad_norm": 0.18966397643089294,
      "learning_rate": 2.755282908218736e-06,
      "loss": 0.0095,
      "step": 2218960
    },
    {
      "epoch": 3.63140943814929,
      "grad_norm": 0.144260972738266,
      "learning_rate": 2.7552170160052188e-06,
      "loss": 0.0123,
      "step": 2218980
    },
    {
      "epoch": 3.6314421685879434,
      "grad_norm": 0.3864055573940277,
      "learning_rate": 2.7551511237917015e-06,
      "loss": 0.011,
      "step": 2219000
    },
    {
      "epoch": 3.6314748990265966,
      "grad_norm": 0.5118230581283569,
      "learning_rate": 2.7550852315781847e-06,
      "loss": 0.0103,
      "step": 2219020
    },
    {
      "epoch": 3.63150762946525,
      "grad_norm": 0.11185251176357269,
      "learning_rate": 2.7550193393646674e-06,
      "loss": 0.0155,
      "step": 2219040
    },
    {
      "epoch": 3.6315403599039033,
      "grad_norm": 0.25532159209251404,
      "learning_rate": 2.75495344715115e-06,
      "loss": 0.0095,
      "step": 2219060
    },
    {
      "epoch": 3.631573090342557,
      "grad_norm": 0.12261202186346054,
      "learning_rate": 2.754887554937633e-06,
      "loss": 0.0119,
      "step": 2219080
    },
    {
      "epoch": 3.63160582078121,
      "grad_norm": 0.4740425944328308,
      "learning_rate": 2.754821662724116e-06,
      "loss": 0.0101,
      "step": 2219100
    },
    {
      "epoch": 3.6316385512198632,
      "grad_norm": 0.40438082814216614,
      "learning_rate": 2.754755770510599e-06,
      "loss": 0.012,
      "step": 2219120
    },
    {
      "epoch": 3.631671281658517,
      "grad_norm": 0.871614933013916,
      "learning_rate": 2.754689878297082e-06,
      "loss": 0.0093,
      "step": 2219140
    },
    {
      "epoch": 3.63170401209717,
      "grad_norm": 0.13406731188297272,
      "learning_rate": 2.754623986083565e-06,
      "loss": 0.0105,
      "step": 2219160
    },
    {
      "epoch": 3.6317367425358236,
      "grad_norm": 0.24228410422801971,
      "learning_rate": 2.754558093870048e-06,
      "loss": 0.0115,
      "step": 2219180
    },
    {
      "epoch": 3.6317694729744767,
      "grad_norm": 0.8444294929504395,
      "learning_rate": 2.7544922016565306e-06,
      "loss": 0.0147,
      "step": 2219200
    },
    {
      "epoch": 3.6318022034131303,
      "grad_norm": 0.3636029064655304,
      "learning_rate": 2.7544263094430134e-06,
      "loss": 0.0132,
      "step": 2219220
    },
    {
      "epoch": 3.6318349338517835,
      "grad_norm": 0.5804764628410339,
      "learning_rate": 2.7543604172294965e-06,
      "loss": 0.0155,
      "step": 2219240
    },
    {
      "epoch": 3.6318676642904366,
      "grad_norm": 0.24393314123153687,
      "learning_rate": 2.7542945250159793e-06,
      "loss": 0.0124,
      "step": 2219260
    },
    {
      "epoch": 3.63190039472909,
      "grad_norm": 0.17296601831912994,
      "learning_rate": 2.754228632802462e-06,
      "loss": 0.0106,
      "step": 2219280
    },
    {
      "epoch": 3.6319331251677434,
      "grad_norm": 0.23747135698795319,
      "learning_rate": 2.7541627405889447e-06,
      "loss": 0.0162,
      "step": 2219300
    },
    {
      "epoch": 3.631965855606397,
      "grad_norm": 0.704770565032959,
      "learning_rate": 2.7540968483754275e-06,
      "loss": 0.0103,
      "step": 2219320
    },
    {
      "epoch": 3.63199858604505,
      "grad_norm": 0.38328537344932556,
      "learning_rate": 2.7540309561619106e-06,
      "loss": 0.01,
      "step": 2219340
    },
    {
      "epoch": 3.6320313164837037,
      "grad_norm": 0.20388074219226837,
      "learning_rate": 2.7539650639483934e-06,
      "loss": 0.0086,
      "step": 2219360
    },
    {
      "epoch": 3.632064046922357,
      "grad_norm": 0.18675212562084198,
      "learning_rate": 2.753899171734876e-06,
      "loss": 0.0104,
      "step": 2219380
    },
    {
      "epoch": 3.63209677736101,
      "grad_norm": 0.2284335494041443,
      "learning_rate": 2.753833279521359e-06,
      "loss": 0.0127,
      "step": 2219400
    },
    {
      "epoch": 3.6321295077996636,
      "grad_norm": 0.29811155796051025,
      "learning_rate": 2.753767387307842e-06,
      "loss": 0.0129,
      "step": 2219420
    },
    {
      "epoch": 3.6321622382383167,
      "grad_norm": 0.1436874121427536,
      "learning_rate": 2.7537014950943248e-06,
      "loss": 0.0067,
      "step": 2219440
    },
    {
      "epoch": 3.6321949686769703,
      "grad_norm": 0.07827098667621613,
      "learning_rate": 2.7536356028808075e-06,
      "loss": 0.0126,
      "step": 2219460
    },
    {
      "epoch": 3.6322276991156235,
      "grad_norm": 0.14669980108737946,
      "learning_rate": 2.7535697106672902e-06,
      "loss": 0.0083,
      "step": 2219480
    },
    {
      "epoch": 3.632260429554277,
      "grad_norm": 0.5149639248847961,
      "learning_rate": 2.753503818453774e-06,
      "loss": 0.0132,
      "step": 2219500
    },
    {
      "epoch": 3.6322931599929302,
      "grad_norm": 0.16812539100646973,
      "learning_rate": 2.7534379262402566e-06,
      "loss": 0.0123,
      "step": 2219520
    },
    {
      "epoch": 3.6323258904315834,
      "grad_norm": 0.7461028695106506,
      "learning_rate": 2.7533720340267393e-06,
      "loss": 0.0109,
      "step": 2219540
    },
    {
      "epoch": 3.632358620870237,
      "grad_norm": 0.3570917546749115,
      "learning_rate": 2.7533061418132225e-06,
      "loss": 0.0091,
      "step": 2219560
    },
    {
      "epoch": 3.63239135130889,
      "grad_norm": 0.15766407549381256,
      "learning_rate": 2.753240249599705e-06,
      "loss": 0.0074,
      "step": 2219580
    },
    {
      "epoch": 3.6324240817475437,
      "grad_norm": 0.3955313265323639,
      "learning_rate": 2.753174357386188e-06,
      "loss": 0.013,
      "step": 2219600
    },
    {
      "epoch": 3.632456812186197,
      "grad_norm": 0.17645475268363953,
      "learning_rate": 2.7531084651726707e-06,
      "loss": 0.0087,
      "step": 2219620
    },
    {
      "epoch": 3.6324895426248505,
      "grad_norm": 0.5722184181213379,
      "learning_rate": 2.753042572959154e-06,
      "loss": 0.0135,
      "step": 2219640
    },
    {
      "epoch": 3.6325222730635036,
      "grad_norm": 0.18664145469665527,
      "learning_rate": 2.7529766807456366e-06,
      "loss": 0.0104,
      "step": 2219660
    },
    {
      "epoch": 3.6325550035021568,
      "grad_norm": 0.19503401219844818,
      "learning_rate": 2.7529107885321193e-06,
      "loss": 0.0118,
      "step": 2219680
    },
    {
      "epoch": 3.6325877339408104,
      "grad_norm": 0.10767407715320587,
      "learning_rate": 2.752844896318602e-06,
      "loss": 0.0145,
      "step": 2219700
    },
    {
      "epoch": 3.6326204643794635,
      "grad_norm": 0.3141511082649231,
      "learning_rate": 2.7527790041050852e-06,
      "loss": 0.0115,
      "step": 2219720
    },
    {
      "epoch": 3.632653194818117,
      "grad_norm": 0.020289888605475426,
      "learning_rate": 2.752713111891568e-06,
      "loss": 0.0103,
      "step": 2219740
    },
    {
      "epoch": 3.6326859252567703,
      "grad_norm": 0.1398894488811493,
      "learning_rate": 2.7526472196780507e-06,
      "loss": 0.0098,
      "step": 2219760
    },
    {
      "epoch": 3.632718655695424,
      "grad_norm": 0.26482146978378296,
      "learning_rate": 2.7525813274645335e-06,
      "loss": 0.0118,
      "step": 2219780
    },
    {
      "epoch": 3.632751386134077,
      "grad_norm": 0.2395656406879425,
      "learning_rate": 2.752515435251016e-06,
      "loss": 0.0087,
      "step": 2219800
    },
    {
      "epoch": 3.63278411657273,
      "grad_norm": 0.17680108547210693,
      "learning_rate": 2.7524495430374994e-06,
      "loss": 0.0135,
      "step": 2219820
    },
    {
      "epoch": 3.6328168470113837,
      "grad_norm": 0.27363526821136475,
      "learning_rate": 2.7523836508239825e-06,
      "loss": 0.0093,
      "step": 2219840
    },
    {
      "epoch": 3.632849577450037,
      "grad_norm": 0.7303900718688965,
      "learning_rate": 2.7523177586104653e-06,
      "loss": 0.0137,
      "step": 2219860
    },
    {
      "epoch": 3.6328823078886905,
      "grad_norm": 0.14757966995239258,
      "learning_rate": 2.7522518663969484e-06,
      "loss": 0.0082,
      "step": 2219880
    },
    {
      "epoch": 3.6329150383273436,
      "grad_norm": 0.1926785111427307,
      "learning_rate": 2.752185974183431e-06,
      "loss": 0.0161,
      "step": 2219900
    },
    {
      "epoch": 3.6329477687659972,
      "grad_norm": 0.20662234723567963,
      "learning_rate": 2.752120081969914e-06,
      "loss": 0.0157,
      "step": 2219920
    },
    {
      "epoch": 3.6329804992046504,
      "grad_norm": 0.19689872860908508,
      "learning_rate": 2.7520541897563966e-06,
      "loss": 0.0082,
      "step": 2219940
    },
    {
      "epoch": 3.6330132296433035,
      "grad_norm": 0.36652451753616333,
      "learning_rate": 2.75198829754288e-06,
      "loss": 0.0086,
      "step": 2219960
    },
    {
      "epoch": 3.633045960081957,
      "grad_norm": 0.0601385124027729,
      "learning_rate": 2.7519224053293625e-06,
      "loss": 0.0081,
      "step": 2219980
    },
    {
      "epoch": 3.6330786905206103,
      "grad_norm": 0.7858706116676331,
      "learning_rate": 2.7518565131158453e-06,
      "loss": 0.0126,
      "step": 2220000
    },
    {
      "epoch": 3.6331114209592634,
      "grad_norm": 0.0444159097969532,
      "learning_rate": 2.751790620902328e-06,
      "loss": 0.0089,
      "step": 2220020
    },
    {
      "epoch": 3.633144151397917,
      "grad_norm": 0.18253275752067566,
      "learning_rate": 2.751724728688811e-06,
      "loss": 0.0104,
      "step": 2220040
    },
    {
      "epoch": 3.6331768818365706,
      "grad_norm": 0.22739668190479279,
      "learning_rate": 2.751658836475294e-06,
      "loss": 0.012,
      "step": 2220060
    },
    {
      "epoch": 3.6332096122752238,
      "grad_norm": 0.08163033425807953,
      "learning_rate": 2.7515929442617767e-06,
      "loss": 0.0119,
      "step": 2220080
    },
    {
      "epoch": 3.633242342713877,
      "grad_norm": 0.19392980635166168,
      "learning_rate": 2.7515270520482594e-06,
      "loss": 0.0142,
      "step": 2220100
    },
    {
      "epoch": 3.6332750731525305,
      "grad_norm": 0.3007626235485077,
      "learning_rate": 2.7514611598347426e-06,
      "loss": 0.0087,
      "step": 2220120
    },
    {
      "epoch": 3.6333078035911837,
      "grad_norm": 0.708162784576416,
      "learning_rate": 2.7513952676212253e-06,
      "loss": 0.0146,
      "step": 2220140
    },
    {
      "epoch": 3.633340534029837,
      "grad_norm": 0.22611314058303833,
      "learning_rate": 2.751329375407708e-06,
      "loss": 0.0093,
      "step": 2220160
    },
    {
      "epoch": 3.6333732644684904,
      "grad_norm": 0.5617403388023376,
      "learning_rate": 2.751263483194191e-06,
      "loss": 0.0145,
      "step": 2220180
    },
    {
      "epoch": 3.633405994907144,
      "grad_norm": 0.1236431896686554,
      "learning_rate": 2.7511975909806744e-06,
      "loss": 0.0082,
      "step": 2220200
    },
    {
      "epoch": 3.633438725345797,
      "grad_norm": 0.27074161171913147,
      "learning_rate": 2.751131698767157e-06,
      "loss": 0.0097,
      "step": 2220220
    },
    {
      "epoch": 3.6334714557844503,
      "grad_norm": 0.2594783902168274,
      "learning_rate": 2.75106580655364e-06,
      "loss": 0.0102,
      "step": 2220240
    },
    {
      "epoch": 3.633504186223104,
      "grad_norm": 0.4755861163139343,
      "learning_rate": 2.750999914340123e-06,
      "loss": 0.0178,
      "step": 2220260
    },
    {
      "epoch": 3.633536916661757,
      "grad_norm": 0.9637808799743652,
      "learning_rate": 2.7509340221266058e-06,
      "loss": 0.0126,
      "step": 2220280
    },
    {
      "epoch": 3.63356964710041,
      "grad_norm": 0.3961101472377777,
      "learning_rate": 2.7508681299130885e-06,
      "loss": 0.012,
      "step": 2220300
    },
    {
      "epoch": 3.633602377539064,
      "grad_norm": 0.7310461401939392,
      "learning_rate": 2.7508022376995712e-06,
      "loss": 0.0138,
      "step": 2220320
    },
    {
      "epoch": 3.6336351079777174,
      "grad_norm": 0.26916882395744324,
      "learning_rate": 2.750736345486054e-06,
      "loss": 0.014,
      "step": 2220340
    },
    {
      "epoch": 3.6336678384163705,
      "grad_norm": 0.17648635804653168,
      "learning_rate": 2.750670453272537e-06,
      "loss": 0.0084,
      "step": 2220360
    },
    {
      "epoch": 3.6337005688550237,
      "grad_norm": 0.23959843814373016,
      "learning_rate": 2.75060456105902e-06,
      "loss": 0.0115,
      "step": 2220380
    },
    {
      "epoch": 3.6337332992936773,
      "grad_norm": 0.2617063522338867,
      "learning_rate": 2.7505386688455026e-06,
      "loss": 0.013,
      "step": 2220400
    },
    {
      "epoch": 3.6337660297323304,
      "grad_norm": 0.7416643500328064,
      "learning_rate": 2.7504727766319854e-06,
      "loss": 0.009,
      "step": 2220420
    },
    {
      "epoch": 3.6337987601709836,
      "grad_norm": 0.10042118281126022,
      "learning_rate": 2.7504068844184685e-06,
      "loss": 0.0068,
      "step": 2220440
    },
    {
      "epoch": 3.633831490609637,
      "grad_norm": 0.23233914375305176,
      "learning_rate": 2.7503409922049513e-06,
      "loss": 0.0116,
      "step": 2220460
    },
    {
      "epoch": 3.6338642210482908,
      "grad_norm": 0.7401642203330994,
      "learning_rate": 2.750275099991434e-06,
      "loss": 0.0122,
      "step": 2220480
    },
    {
      "epoch": 3.633896951486944,
      "grad_norm": 0.45725876092910767,
      "learning_rate": 2.7502092077779167e-06,
      "loss": 0.0076,
      "step": 2220500
    },
    {
      "epoch": 3.633929681925597,
      "grad_norm": 0.3620340824127197,
      "learning_rate": 2.7501433155644e-06,
      "loss": 0.0101,
      "step": 2220520
    },
    {
      "epoch": 3.6339624123642507,
      "grad_norm": 0.08313605934381485,
      "learning_rate": 2.750077423350883e-06,
      "loss": 0.0136,
      "step": 2220540
    },
    {
      "epoch": 3.633995142802904,
      "grad_norm": 0.32049742341041565,
      "learning_rate": 2.750011531137366e-06,
      "loss": 0.0159,
      "step": 2220560
    },
    {
      "epoch": 3.634027873241557,
      "grad_norm": 0.09697888791561127,
      "learning_rate": 2.749945638923849e-06,
      "loss": 0.0102,
      "step": 2220580
    },
    {
      "epoch": 3.6340606036802106,
      "grad_norm": 0.3465149402618408,
      "learning_rate": 2.7498797467103317e-06,
      "loss": 0.0133,
      "step": 2220600
    },
    {
      "epoch": 3.6340933341188637,
      "grad_norm": 0.12348908931016922,
      "learning_rate": 2.7498138544968145e-06,
      "loss": 0.0111,
      "step": 2220620
    },
    {
      "epoch": 3.6341260645575173,
      "grad_norm": 0.06973091512918472,
      "learning_rate": 2.749747962283297e-06,
      "loss": 0.0107,
      "step": 2220640
    },
    {
      "epoch": 3.6341587949961704,
      "grad_norm": 0.1844463348388672,
      "learning_rate": 2.7496820700697804e-06,
      "loss": 0.0084,
      "step": 2220660
    },
    {
      "epoch": 3.634191525434824,
      "grad_norm": 0.8083610534667969,
      "learning_rate": 2.749616177856263e-06,
      "loss": 0.015,
      "step": 2220680
    },
    {
      "epoch": 3.634224255873477,
      "grad_norm": 0.4925677180290222,
      "learning_rate": 2.749550285642746e-06,
      "loss": 0.0119,
      "step": 2220700
    },
    {
      "epoch": 3.6342569863121303,
      "grad_norm": 0.09697163105010986,
      "learning_rate": 2.7494843934292286e-06,
      "loss": 0.0127,
      "step": 2220720
    },
    {
      "epoch": 3.634289716750784,
      "grad_norm": 0.3066200315952301,
      "learning_rate": 2.7494185012157113e-06,
      "loss": 0.0111,
      "step": 2220740
    },
    {
      "epoch": 3.634322447189437,
      "grad_norm": 0.21309751272201538,
      "learning_rate": 2.7493526090021945e-06,
      "loss": 0.019,
      "step": 2220760
    },
    {
      "epoch": 3.6343551776280907,
      "grad_norm": 0.29911211133003235,
      "learning_rate": 2.7492867167886772e-06,
      "loss": 0.0109,
      "step": 2220780
    },
    {
      "epoch": 3.634387908066744,
      "grad_norm": 0.3729061186313629,
      "learning_rate": 2.74922082457516e-06,
      "loss": 0.0142,
      "step": 2220800
    },
    {
      "epoch": 3.6344206385053974,
      "grad_norm": 0.3322320878505707,
      "learning_rate": 2.7491549323616427e-06,
      "loss": 0.0146,
      "step": 2220820
    },
    {
      "epoch": 3.6344533689440506,
      "grad_norm": 0.8860964775085449,
      "learning_rate": 2.749089040148126e-06,
      "loss": 0.0118,
      "step": 2220840
    },
    {
      "epoch": 3.6344860993827037,
      "grad_norm": 0.3255937695503235,
      "learning_rate": 2.7490231479346086e-06,
      "loss": 0.0133,
      "step": 2220860
    },
    {
      "epoch": 3.6345188298213573,
      "grad_norm": 0.152574822306633,
      "learning_rate": 2.7489572557210913e-06,
      "loss": 0.0103,
      "step": 2220880
    },
    {
      "epoch": 3.6345515602600105,
      "grad_norm": 0.185395285487175,
      "learning_rate": 2.748891363507575e-06,
      "loss": 0.0117,
      "step": 2220900
    },
    {
      "epoch": 3.634584290698664,
      "grad_norm": 0.4115482270717621,
      "learning_rate": 2.7488254712940577e-06,
      "loss": 0.0083,
      "step": 2220920
    },
    {
      "epoch": 3.634617021137317,
      "grad_norm": 0.16877003014087677,
      "learning_rate": 2.7487595790805404e-06,
      "loss": 0.0094,
      "step": 2220940
    },
    {
      "epoch": 3.634649751575971,
      "grad_norm": 0.0450458787381649,
      "learning_rate": 2.748693686867023e-06,
      "loss": 0.0093,
      "step": 2220960
    },
    {
      "epoch": 3.634682482014624,
      "grad_norm": 0.08503850549459457,
      "learning_rate": 2.7486277946535063e-06,
      "loss": 0.0115,
      "step": 2220980
    },
    {
      "epoch": 3.634715212453277,
      "grad_norm": 0.682550847530365,
      "learning_rate": 2.748561902439989e-06,
      "loss": 0.0126,
      "step": 2221000
    },
    {
      "epoch": 3.6347479428919307,
      "grad_norm": 0.4140605330467224,
      "learning_rate": 2.7484960102264718e-06,
      "loss": 0.0099,
      "step": 2221020
    },
    {
      "epoch": 3.634780673330584,
      "grad_norm": 0.36318671703338623,
      "learning_rate": 2.7484301180129545e-06,
      "loss": 0.0118,
      "step": 2221040
    },
    {
      "epoch": 3.6348134037692374,
      "grad_norm": 0.19008813798427582,
      "learning_rate": 2.7483642257994377e-06,
      "loss": 0.0124,
      "step": 2221060
    },
    {
      "epoch": 3.6348461342078906,
      "grad_norm": 0.06378168612718582,
      "learning_rate": 2.7482983335859204e-06,
      "loss": 0.0084,
      "step": 2221080
    },
    {
      "epoch": 3.634878864646544,
      "grad_norm": 0.05868067592382431,
      "learning_rate": 2.748232441372403e-06,
      "loss": 0.0087,
      "step": 2221100
    },
    {
      "epoch": 3.6349115950851973,
      "grad_norm": 0.8283935189247131,
      "learning_rate": 2.748166549158886e-06,
      "loss": 0.0098,
      "step": 2221120
    },
    {
      "epoch": 3.6349443255238505,
      "grad_norm": 0.44934239983558655,
      "learning_rate": 2.748100656945369e-06,
      "loss": 0.0143,
      "step": 2221140
    },
    {
      "epoch": 3.634977055962504,
      "grad_norm": 0.2124178111553192,
      "learning_rate": 2.748034764731852e-06,
      "loss": 0.009,
      "step": 2221160
    },
    {
      "epoch": 3.6350097864011572,
      "grad_norm": 0.19818736612796783,
      "learning_rate": 2.7479688725183346e-06,
      "loss": 0.01,
      "step": 2221180
    },
    {
      "epoch": 3.635042516839811,
      "grad_norm": 0.25839167833328247,
      "learning_rate": 2.7479029803048173e-06,
      "loss": 0.0125,
      "step": 2221200
    },
    {
      "epoch": 3.635075247278464,
      "grad_norm": 0.20082572102546692,
      "learning_rate": 2.7478370880913e-06,
      "loss": 0.0094,
      "step": 2221220
    },
    {
      "epoch": 3.6351079777171176,
      "grad_norm": 0.26581040024757385,
      "learning_rate": 2.747771195877783e-06,
      "loss": 0.0089,
      "step": 2221240
    },
    {
      "epoch": 3.6351407081557707,
      "grad_norm": 0.4713590443134308,
      "learning_rate": 2.7477053036642664e-06,
      "loss": 0.011,
      "step": 2221260
    },
    {
      "epoch": 3.635173438594424,
      "grad_norm": 0.17148743569850922,
      "learning_rate": 2.747639411450749e-06,
      "loss": 0.0092,
      "step": 2221280
    },
    {
      "epoch": 3.6352061690330775,
      "grad_norm": 0.6515381336212158,
      "learning_rate": 2.7475735192372323e-06,
      "loss": 0.0113,
      "step": 2221300
    },
    {
      "epoch": 3.6352388994717306,
      "grad_norm": 0.1735265702009201,
      "learning_rate": 2.747507627023715e-06,
      "loss": 0.0086,
      "step": 2221320
    },
    {
      "epoch": 3.635271629910384,
      "grad_norm": 0.3807513415813446,
      "learning_rate": 2.7474417348101977e-06,
      "loss": 0.0118,
      "step": 2221340
    },
    {
      "epoch": 3.6353043603490374,
      "grad_norm": 0.29150131344795227,
      "learning_rate": 2.7473758425966805e-06,
      "loss": 0.0168,
      "step": 2221360
    },
    {
      "epoch": 3.635337090787691,
      "grad_norm": 0.4233214855194092,
      "learning_rate": 2.7473099503831636e-06,
      "loss": 0.0091,
      "step": 2221380
    },
    {
      "epoch": 3.635369821226344,
      "grad_norm": 0.23096312582492828,
      "learning_rate": 2.7472440581696464e-06,
      "loss": 0.0084,
      "step": 2221400
    },
    {
      "epoch": 3.6354025516649973,
      "grad_norm": 0.26424846053123474,
      "learning_rate": 2.747178165956129e-06,
      "loss": 0.011,
      "step": 2221420
    },
    {
      "epoch": 3.635435282103651,
      "grad_norm": 0.21620354056358337,
      "learning_rate": 2.747112273742612e-06,
      "loss": 0.0098,
      "step": 2221440
    },
    {
      "epoch": 3.635468012542304,
      "grad_norm": 0.1714731901884079,
      "learning_rate": 2.747046381529095e-06,
      "loss": 0.0115,
      "step": 2221460
    },
    {
      "epoch": 3.635500742980957,
      "grad_norm": 0.18060795962810516,
      "learning_rate": 2.7469804893155778e-06,
      "loss": 0.0134,
      "step": 2221480
    },
    {
      "epoch": 3.6355334734196108,
      "grad_norm": 0.16679175198078156,
      "learning_rate": 2.7469145971020605e-06,
      "loss": 0.0115,
      "step": 2221500
    },
    {
      "epoch": 3.6355662038582643,
      "grad_norm": 0.050209831446409225,
      "learning_rate": 2.7468487048885432e-06,
      "loss": 0.0083,
      "step": 2221520
    },
    {
      "epoch": 3.6355989342969175,
      "grad_norm": 0.05715293437242508,
      "learning_rate": 2.7467828126750264e-06,
      "loss": 0.0129,
      "step": 2221540
    },
    {
      "epoch": 3.6356316647355706,
      "grad_norm": 0.5499107837677002,
      "learning_rate": 2.746716920461509e-06,
      "loss": 0.0104,
      "step": 2221560
    },
    {
      "epoch": 3.6356643951742242,
      "grad_norm": 0.2003539800643921,
      "learning_rate": 2.746651028247992e-06,
      "loss": 0.011,
      "step": 2221580
    },
    {
      "epoch": 3.6356971256128774,
      "grad_norm": 0.5558725595474243,
      "learning_rate": 2.7465851360344755e-06,
      "loss": 0.0086,
      "step": 2221600
    },
    {
      "epoch": 3.6357298560515305,
      "grad_norm": 0.1670597642660141,
      "learning_rate": 2.7465192438209582e-06,
      "loss": 0.01,
      "step": 2221620
    },
    {
      "epoch": 3.635762586490184,
      "grad_norm": 0.08497942984104156,
      "learning_rate": 2.746453351607441e-06,
      "loss": 0.0097,
      "step": 2221640
    },
    {
      "epoch": 3.6357953169288377,
      "grad_norm": 0.10691414028406143,
      "learning_rate": 2.7463874593939237e-06,
      "loss": 0.009,
      "step": 2221660
    },
    {
      "epoch": 3.635828047367491,
      "grad_norm": 0.422646164894104,
      "learning_rate": 2.746321567180407e-06,
      "loss": 0.0111,
      "step": 2221680
    },
    {
      "epoch": 3.635860777806144,
      "grad_norm": 0.2545478940010071,
      "learning_rate": 2.7462556749668896e-06,
      "loss": 0.0084,
      "step": 2221700
    },
    {
      "epoch": 3.6358935082447976,
      "grad_norm": 0.20485128462314606,
      "learning_rate": 2.7461897827533723e-06,
      "loss": 0.0122,
      "step": 2221720
    },
    {
      "epoch": 3.6359262386834508,
      "grad_norm": 0.28006282448768616,
      "learning_rate": 2.746123890539855e-06,
      "loss": 0.0082,
      "step": 2221740
    },
    {
      "epoch": 3.635958969122104,
      "grad_norm": 0.323677659034729,
      "learning_rate": 2.746057998326338e-06,
      "loss": 0.0129,
      "step": 2221760
    },
    {
      "epoch": 3.6359916995607575,
      "grad_norm": 0.395364373922348,
      "learning_rate": 2.745992106112821e-06,
      "loss": 0.0126,
      "step": 2221780
    },
    {
      "epoch": 3.636024429999411,
      "grad_norm": 0.2666240632534027,
      "learning_rate": 2.7459262138993037e-06,
      "loss": 0.0146,
      "step": 2221800
    },
    {
      "epoch": 3.6360571604380643,
      "grad_norm": 0.2811509966850281,
      "learning_rate": 2.7458603216857865e-06,
      "loss": 0.0088,
      "step": 2221820
    },
    {
      "epoch": 3.6360898908767174,
      "grad_norm": 0.4786224961280823,
      "learning_rate": 2.745794429472269e-06,
      "loss": 0.0138,
      "step": 2221840
    },
    {
      "epoch": 3.636122621315371,
      "grad_norm": 0.30722275376319885,
      "learning_rate": 2.7457285372587524e-06,
      "loss": 0.0082,
      "step": 2221860
    },
    {
      "epoch": 3.636155351754024,
      "grad_norm": 0.16275951266288757,
      "learning_rate": 2.745662645045235e-06,
      "loss": 0.0121,
      "step": 2221880
    },
    {
      "epoch": 3.6361880821926773,
      "grad_norm": 0.49560096859931946,
      "learning_rate": 2.745596752831718e-06,
      "loss": 0.0168,
      "step": 2221900
    },
    {
      "epoch": 3.636220812631331,
      "grad_norm": 0.4449184536933899,
      "learning_rate": 2.7455308606182006e-06,
      "loss": 0.0088,
      "step": 2221920
    },
    {
      "epoch": 3.6362535430699845,
      "grad_norm": 0.3874541223049164,
      "learning_rate": 2.7454649684046837e-06,
      "loss": 0.0109,
      "step": 2221940
    },
    {
      "epoch": 3.6362862735086376,
      "grad_norm": 0.41984888911247253,
      "learning_rate": 2.745399076191167e-06,
      "loss": 0.0094,
      "step": 2221960
    },
    {
      "epoch": 3.636319003947291,
      "grad_norm": 0.6086972951889038,
      "learning_rate": 2.7453331839776496e-06,
      "loss": 0.0103,
      "step": 2221980
    },
    {
      "epoch": 3.6363517343859444,
      "grad_norm": 0.32465630769729614,
      "learning_rate": 2.745267291764133e-06,
      "loss": 0.0107,
      "step": 2222000
    },
    {
      "epoch": 3.6363844648245975,
      "grad_norm": 0.13963575661182404,
      "learning_rate": 2.7452013995506156e-06,
      "loss": 0.0088,
      "step": 2222020
    },
    {
      "epoch": 3.6364171952632507,
      "grad_norm": 0.29245370626449585,
      "learning_rate": 2.7451355073370983e-06,
      "loss": 0.0138,
      "step": 2222040
    },
    {
      "epoch": 3.6364499257019043,
      "grad_norm": 0.848658561706543,
      "learning_rate": 2.745069615123581e-06,
      "loss": 0.0125,
      "step": 2222060
    },
    {
      "epoch": 3.6364826561405574,
      "grad_norm": 0.1028989627957344,
      "learning_rate": 2.745003722910064e-06,
      "loss": 0.0112,
      "step": 2222080
    },
    {
      "epoch": 3.636515386579211,
      "grad_norm": 0.2112625390291214,
      "learning_rate": 2.744937830696547e-06,
      "loss": 0.0168,
      "step": 2222100
    },
    {
      "epoch": 3.636548117017864,
      "grad_norm": 0.6590520143508911,
      "learning_rate": 2.7448719384830297e-06,
      "loss": 0.0094,
      "step": 2222120
    },
    {
      "epoch": 3.6365808474565178,
      "grad_norm": 0.1734132319688797,
      "learning_rate": 2.7448060462695124e-06,
      "loss": 0.0108,
      "step": 2222140
    },
    {
      "epoch": 3.636613577895171,
      "grad_norm": 0.2415250986814499,
      "learning_rate": 2.7447401540559956e-06,
      "loss": 0.0125,
      "step": 2222160
    },
    {
      "epoch": 3.636646308333824,
      "grad_norm": 0.2804718315601349,
      "learning_rate": 2.7446742618424783e-06,
      "loss": 0.0088,
      "step": 2222180
    },
    {
      "epoch": 3.6366790387724777,
      "grad_norm": 0.11362113058567047,
      "learning_rate": 2.744608369628961e-06,
      "loss": 0.008,
      "step": 2222200
    },
    {
      "epoch": 3.636711769211131,
      "grad_norm": 0.30326372385025024,
      "learning_rate": 2.744542477415444e-06,
      "loss": 0.0181,
      "step": 2222220
    },
    {
      "epoch": 3.6367444996497844,
      "grad_norm": 0.6848187446594238,
      "learning_rate": 2.7444765852019265e-06,
      "loss": 0.0099,
      "step": 2222240
    },
    {
      "epoch": 3.6367772300884376,
      "grad_norm": 0.4001249372959137,
      "learning_rate": 2.7444106929884097e-06,
      "loss": 0.0117,
      "step": 2222260
    },
    {
      "epoch": 3.636809960527091,
      "grad_norm": 0.15728513896465302,
      "learning_rate": 2.7443448007748924e-06,
      "loss": 0.0092,
      "step": 2222280
    },
    {
      "epoch": 3.6368426909657443,
      "grad_norm": 0.33193275332450867,
      "learning_rate": 2.7442789085613756e-06,
      "loss": 0.0176,
      "step": 2222300
    },
    {
      "epoch": 3.6368754214043975,
      "grad_norm": 0.2899611294269562,
      "learning_rate": 2.7442130163478588e-06,
      "loss": 0.0096,
      "step": 2222320
    },
    {
      "epoch": 3.636908151843051,
      "grad_norm": 0.2024371474981308,
      "learning_rate": 2.7441471241343415e-06,
      "loss": 0.0109,
      "step": 2222340
    },
    {
      "epoch": 3.636940882281704,
      "grad_norm": 0.6527227163314819,
      "learning_rate": 2.7440812319208242e-06,
      "loss": 0.0091,
      "step": 2222360
    },
    {
      "epoch": 3.636973612720358,
      "grad_norm": 0.15654785931110382,
      "learning_rate": 2.744015339707307e-06,
      "loss": 0.0072,
      "step": 2222380
    },
    {
      "epoch": 3.637006343159011,
      "grad_norm": 0.19032976031303406,
      "learning_rate": 2.74394944749379e-06,
      "loss": 0.008,
      "step": 2222400
    },
    {
      "epoch": 3.6370390735976645,
      "grad_norm": 0.22782933712005615,
      "learning_rate": 2.743883555280273e-06,
      "loss": 0.0109,
      "step": 2222420
    },
    {
      "epoch": 3.6370718040363177,
      "grad_norm": 0.3311508297920227,
      "learning_rate": 2.7438176630667556e-06,
      "loss": 0.0141,
      "step": 2222440
    },
    {
      "epoch": 3.637104534474971,
      "grad_norm": 0.42581525444984436,
      "learning_rate": 2.7437517708532384e-06,
      "loss": 0.0106,
      "step": 2222460
    },
    {
      "epoch": 3.6371372649136244,
      "grad_norm": 0.8385244011878967,
      "learning_rate": 2.7436858786397215e-06,
      "loss": 0.015,
      "step": 2222480
    },
    {
      "epoch": 3.6371699953522776,
      "grad_norm": 0.40775567293167114,
      "learning_rate": 2.7436199864262043e-06,
      "loss": 0.0163,
      "step": 2222500
    },
    {
      "epoch": 3.637202725790931,
      "grad_norm": 0.8139646053314209,
      "learning_rate": 2.743554094212687e-06,
      "loss": 0.0126,
      "step": 2222520
    },
    {
      "epoch": 3.6372354562295843,
      "grad_norm": 0.44548869132995605,
      "learning_rate": 2.7434882019991697e-06,
      "loss": 0.0114,
      "step": 2222540
    },
    {
      "epoch": 3.637268186668238,
      "grad_norm": 0.3751486837863922,
      "learning_rate": 2.743422309785653e-06,
      "loss": 0.0126,
      "step": 2222560
    },
    {
      "epoch": 3.637300917106891,
      "grad_norm": 0.5541645288467407,
      "learning_rate": 2.7433564175721357e-06,
      "loss": 0.0144,
      "step": 2222580
    },
    {
      "epoch": 3.6373336475455442,
      "grad_norm": 0.16421562433242798,
      "learning_rate": 2.7432905253586184e-06,
      "loss": 0.0096,
      "step": 2222600
    },
    {
      "epoch": 3.637366377984198,
      "grad_norm": 0.27112242579460144,
      "learning_rate": 2.743224633145101e-06,
      "loss": 0.0094,
      "step": 2222620
    },
    {
      "epoch": 3.637399108422851,
      "grad_norm": 0.35024818778038025,
      "learning_rate": 2.743158740931584e-06,
      "loss": 0.0116,
      "step": 2222640
    },
    {
      "epoch": 3.6374318388615046,
      "grad_norm": 0.24707528948783875,
      "learning_rate": 2.7430928487180675e-06,
      "loss": 0.0133,
      "step": 2222660
    },
    {
      "epoch": 3.6374645693001577,
      "grad_norm": 0.27684468030929565,
      "learning_rate": 2.74302695650455e-06,
      "loss": 0.0101,
      "step": 2222680
    },
    {
      "epoch": 3.6374972997388113,
      "grad_norm": 0.22151435911655426,
      "learning_rate": 2.742961064291033e-06,
      "loss": 0.0148,
      "step": 2222700
    },
    {
      "epoch": 3.6375300301774645,
      "grad_norm": 0.4961223304271698,
      "learning_rate": 2.742895172077516e-06,
      "loss": 0.0125,
      "step": 2222720
    },
    {
      "epoch": 3.6375627606161176,
      "grad_norm": 0.32282763719558716,
      "learning_rate": 2.742829279863999e-06,
      "loss": 0.016,
      "step": 2222740
    },
    {
      "epoch": 3.637595491054771,
      "grad_norm": 0.3463258445262909,
      "learning_rate": 2.7427633876504816e-06,
      "loss": 0.0149,
      "step": 2222760
    },
    {
      "epoch": 3.6376282214934244,
      "grad_norm": 0.15860827267169952,
      "learning_rate": 2.7426974954369643e-06,
      "loss": 0.0112,
      "step": 2222780
    },
    {
      "epoch": 3.637660951932078,
      "grad_norm": 0.3464585542678833,
      "learning_rate": 2.7426316032234475e-06,
      "loss": 0.0156,
      "step": 2222800
    },
    {
      "epoch": 3.637693682370731,
      "grad_norm": 0.30099162459373474,
      "learning_rate": 2.7425657110099302e-06,
      "loss": 0.0161,
      "step": 2222820
    },
    {
      "epoch": 3.6377264128093847,
      "grad_norm": 0.4142327606678009,
      "learning_rate": 2.742499818796413e-06,
      "loss": 0.0094,
      "step": 2222840
    },
    {
      "epoch": 3.637759143248038,
      "grad_norm": 0.24024008214473724,
      "learning_rate": 2.7424339265828957e-06,
      "loss": 0.0156,
      "step": 2222860
    },
    {
      "epoch": 3.637791873686691,
      "grad_norm": 0.3161381781101227,
      "learning_rate": 2.742368034369379e-06,
      "loss": 0.0111,
      "step": 2222880
    },
    {
      "epoch": 3.6378246041253446,
      "grad_norm": 0.547529399394989,
      "learning_rate": 2.7423021421558616e-06,
      "loss": 0.0091,
      "step": 2222900
    },
    {
      "epoch": 3.6378573345639977,
      "grad_norm": 0.3185596168041229,
      "learning_rate": 2.7422362499423443e-06,
      "loss": 0.0153,
      "step": 2222920
    },
    {
      "epoch": 3.6378900650026513,
      "grad_norm": 2.6668026447296143,
      "learning_rate": 2.742170357728827e-06,
      "loss": 0.0137,
      "step": 2222940
    },
    {
      "epoch": 3.6379227954413045,
      "grad_norm": 0.16299185156822205,
      "learning_rate": 2.7421044655153102e-06,
      "loss": 0.011,
      "step": 2222960
    },
    {
      "epoch": 3.637955525879958,
      "grad_norm": 0.24527859687805176,
      "learning_rate": 2.742038573301793e-06,
      "loss": 0.0129,
      "step": 2222980
    },
    {
      "epoch": 3.6379882563186112,
      "grad_norm": 0.714657187461853,
      "learning_rate": 2.741972681088276e-06,
      "loss": 0.0125,
      "step": 2223000
    },
    {
      "epoch": 3.6380209867572644,
      "grad_norm": 0.3079553246498108,
      "learning_rate": 2.7419067888747593e-06,
      "loss": 0.008,
      "step": 2223020
    },
    {
      "epoch": 3.638053717195918,
      "grad_norm": 0.36437633633613586,
      "learning_rate": 2.741840896661242e-06,
      "loss": 0.0101,
      "step": 2223040
    },
    {
      "epoch": 3.638086447634571,
      "grad_norm": 0.5354411005973816,
      "learning_rate": 2.741775004447725e-06,
      "loss": 0.0094,
      "step": 2223060
    },
    {
      "epoch": 3.6381191780732243,
      "grad_norm": 0.8250482678413391,
      "learning_rate": 2.7417091122342075e-06,
      "loss": 0.0103,
      "step": 2223080
    },
    {
      "epoch": 3.638151908511878,
      "grad_norm": 0.30877774953842163,
      "learning_rate": 2.7416432200206907e-06,
      "loss": 0.0114,
      "step": 2223100
    },
    {
      "epoch": 3.6381846389505315,
      "grad_norm": 0.2287798821926117,
      "learning_rate": 2.7415773278071734e-06,
      "loss": 0.0107,
      "step": 2223120
    },
    {
      "epoch": 3.6382173693891846,
      "grad_norm": 0.34546980261802673,
      "learning_rate": 2.741511435593656e-06,
      "loss": 0.0138,
      "step": 2223140
    },
    {
      "epoch": 3.6382500998278378,
      "grad_norm": 0.04664585739374161,
      "learning_rate": 2.741445543380139e-06,
      "loss": 0.0088,
      "step": 2223160
    },
    {
      "epoch": 3.6382828302664914,
      "grad_norm": 0.11021832376718521,
      "learning_rate": 2.7413796511666217e-06,
      "loss": 0.0119,
      "step": 2223180
    },
    {
      "epoch": 3.6383155607051445,
      "grad_norm": 0.22960618138313293,
      "learning_rate": 2.741313758953105e-06,
      "loss": 0.0101,
      "step": 2223200
    },
    {
      "epoch": 3.6383482911437977,
      "grad_norm": 0.3053506016731262,
      "learning_rate": 2.7412478667395876e-06,
      "loss": 0.0085,
      "step": 2223220
    },
    {
      "epoch": 3.6383810215824512,
      "grad_norm": 0.2997972071170807,
      "learning_rate": 2.7411819745260703e-06,
      "loss": 0.0138,
      "step": 2223240
    },
    {
      "epoch": 3.638413752021105,
      "grad_norm": 0.17141781747341156,
      "learning_rate": 2.741116082312553e-06,
      "loss": 0.011,
      "step": 2223260
    },
    {
      "epoch": 3.638446482459758,
      "grad_norm": 0.24969631433486938,
      "learning_rate": 2.741050190099036e-06,
      "loss": 0.0159,
      "step": 2223280
    },
    {
      "epoch": 3.638479212898411,
      "grad_norm": 0.07688853144645691,
      "learning_rate": 2.740984297885519e-06,
      "loss": 0.0109,
      "step": 2223300
    },
    {
      "epoch": 3.6385119433370647,
      "grad_norm": 0.23012414574623108,
      "learning_rate": 2.7409184056720017e-06,
      "loss": 0.0155,
      "step": 2223320
    },
    {
      "epoch": 3.638544673775718,
      "grad_norm": 0.300495982170105,
      "learning_rate": 2.7408525134584844e-06,
      "loss": 0.0147,
      "step": 2223340
    },
    {
      "epoch": 3.638577404214371,
      "grad_norm": 0.154170960187912,
      "learning_rate": 2.740786621244968e-06,
      "loss": 0.0126,
      "step": 2223360
    },
    {
      "epoch": 3.6386101346530246,
      "grad_norm": 0.3362767994403839,
      "learning_rate": 2.7407207290314507e-06,
      "loss": 0.016,
      "step": 2223380
    },
    {
      "epoch": 3.6386428650916782,
      "grad_norm": 0.13420763611793518,
      "learning_rate": 2.7406548368179335e-06,
      "loss": 0.0104,
      "step": 2223400
    },
    {
      "epoch": 3.6386755955303314,
      "grad_norm": 0.37599119544029236,
      "learning_rate": 2.7405889446044167e-06,
      "loss": 0.0068,
      "step": 2223420
    },
    {
      "epoch": 3.6387083259689845,
      "grad_norm": 0.20739023387432098,
      "learning_rate": 2.7405230523908994e-06,
      "loss": 0.0133,
      "step": 2223440
    },
    {
      "epoch": 3.638741056407638,
      "grad_norm": 0.451750248670578,
      "learning_rate": 2.740457160177382e-06,
      "loss": 0.0084,
      "step": 2223460
    },
    {
      "epoch": 3.6387737868462913,
      "grad_norm": 0.3022725582122803,
      "learning_rate": 2.740391267963865e-06,
      "loss": 0.013,
      "step": 2223480
    },
    {
      "epoch": 3.6388065172849444,
      "grad_norm": 0.14075256884098053,
      "learning_rate": 2.740325375750348e-06,
      "loss": 0.0104,
      "step": 2223500
    },
    {
      "epoch": 3.638839247723598,
      "grad_norm": 0.26579776406288147,
      "learning_rate": 2.7402594835368308e-06,
      "loss": 0.0132,
      "step": 2223520
    },
    {
      "epoch": 3.638871978162251,
      "grad_norm": 0.026275642216205597,
      "learning_rate": 2.7401935913233135e-06,
      "loss": 0.0118,
      "step": 2223540
    },
    {
      "epoch": 3.6389047086009048,
      "grad_norm": 0.19905869662761688,
      "learning_rate": 2.7401276991097963e-06,
      "loss": 0.0126,
      "step": 2223560
    },
    {
      "epoch": 3.638937439039558,
      "grad_norm": 0.13239726424217224,
      "learning_rate": 2.7400618068962794e-06,
      "loss": 0.0094,
      "step": 2223580
    },
    {
      "epoch": 3.6389701694782115,
      "grad_norm": 0.13296890258789062,
      "learning_rate": 2.739995914682762e-06,
      "loss": 0.023,
      "step": 2223600
    },
    {
      "epoch": 3.6390028999168647,
      "grad_norm": 0.10048359632492065,
      "learning_rate": 2.739930022469245e-06,
      "loss": 0.009,
      "step": 2223620
    },
    {
      "epoch": 3.639035630355518,
      "grad_norm": 0.2653408944606781,
      "learning_rate": 2.7398641302557276e-06,
      "loss": 0.0142,
      "step": 2223640
    },
    {
      "epoch": 3.6390683607941714,
      "grad_norm": 0.3175467550754547,
      "learning_rate": 2.7397982380422104e-06,
      "loss": 0.0091,
      "step": 2223660
    },
    {
      "epoch": 3.6391010912328245,
      "grad_norm": 0.2233119159936905,
      "learning_rate": 2.7397323458286935e-06,
      "loss": 0.0072,
      "step": 2223680
    },
    {
      "epoch": 3.639133821671478,
      "grad_norm": 0.3884377181529999,
      "learning_rate": 2.7396664536151763e-06,
      "loss": 0.0074,
      "step": 2223700
    },
    {
      "epoch": 3.6391665521101313,
      "grad_norm": 0.5000163912773132,
      "learning_rate": 2.7396005614016594e-06,
      "loss": 0.0102,
      "step": 2223720
    },
    {
      "epoch": 3.639199282548785,
      "grad_norm": 0.4936692416667938,
      "learning_rate": 2.7395346691881426e-06,
      "loss": 0.0164,
      "step": 2223740
    },
    {
      "epoch": 3.639232012987438,
      "grad_norm": 0.17733481526374817,
      "learning_rate": 2.7394687769746253e-06,
      "loss": 0.0135,
      "step": 2223760
    },
    {
      "epoch": 3.639264743426091,
      "grad_norm": 0.07528071850538254,
      "learning_rate": 2.739402884761108e-06,
      "loss": 0.0115,
      "step": 2223780
    },
    {
      "epoch": 3.639297473864745,
      "grad_norm": 0.22986258566379547,
      "learning_rate": 2.739336992547591e-06,
      "loss": 0.0137,
      "step": 2223800
    },
    {
      "epoch": 3.639330204303398,
      "grad_norm": 0.33719730377197266,
      "learning_rate": 2.739271100334074e-06,
      "loss": 0.0123,
      "step": 2223820
    },
    {
      "epoch": 3.6393629347420515,
      "grad_norm": 0.16872841119766235,
      "learning_rate": 2.7392052081205567e-06,
      "loss": 0.0171,
      "step": 2223840
    },
    {
      "epoch": 3.6393956651807047,
      "grad_norm": 0.11027636379003525,
      "learning_rate": 2.7391393159070395e-06,
      "loss": 0.0078,
      "step": 2223860
    },
    {
      "epoch": 3.6394283956193583,
      "grad_norm": 0.10568224638700485,
      "learning_rate": 2.739073423693522e-06,
      "loss": 0.0104,
      "step": 2223880
    },
    {
      "epoch": 3.6394611260580114,
      "grad_norm": 0.4185272455215454,
      "learning_rate": 2.7390075314800054e-06,
      "loss": 0.008,
      "step": 2223900
    },
    {
      "epoch": 3.6394938564966646,
      "grad_norm": 0.5496379137039185,
      "learning_rate": 2.738941639266488e-06,
      "loss": 0.0134,
      "step": 2223920
    },
    {
      "epoch": 3.639526586935318,
      "grad_norm": 0.33273571729660034,
      "learning_rate": 2.738875747052971e-06,
      "loss": 0.0135,
      "step": 2223940
    },
    {
      "epoch": 3.6395593173739713,
      "grad_norm": 0.29931631684303284,
      "learning_rate": 2.7388098548394536e-06,
      "loss": 0.009,
      "step": 2223960
    },
    {
      "epoch": 3.639592047812625,
      "grad_norm": 0.11693545430898666,
      "learning_rate": 2.7387439626259368e-06,
      "loss": 0.0117,
      "step": 2223980
    },
    {
      "epoch": 3.639624778251278,
      "grad_norm": 0.2511438727378845,
      "learning_rate": 2.7386780704124195e-06,
      "loss": 0.0102,
      "step": 2224000
    },
    {
      "epoch": 3.6396575086899317,
      "grad_norm": 0.336974173784256,
      "learning_rate": 2.7386121781989022e-06,
      "loss": 0.0109,
      "step": 2224020
    },
    {
      "epoch": 3.639690239128585,
      "grad_norm": 0.18228761851787567,
      "learning_rate": 2.738546285985385e-06,
      "loss": 0.0094,
      "step": 2224040
    },
    {
      "epoch": 3.639722969567238,
      "grad_norm": 0.3286340534687042,
      "learning_rate": 2.7384803937718686e-06,
      "loss": 0.0126,
      "step": 2224060
    },
    {
      "epoch": 3.6397557000058915,
      "grad_norm": 0.2008032202720642,
      "learning_rate": 2.7384145015583513e-06,
      "loss": 0.0195,
      "step": 2224080
    },
    {
      "epoch": 3.6397884304445447,
      "grad_norm": 0.3469008505344391,
      "learning_rate": 2.738348609344834e-06,
      "loss": 0.009,
      "step": 2224100
    },
    {
      "epoch": 3.6398211608831983,
      "grad_norm": 0.42831048369407654,
      "learning_rate": 2.738282717131317e-06,
      "loss": 0.0095,
      "step": 2224120
    },
    {
      "epoch": 3.6398538913218514,
      "grad_norm": 0.07316809892654419,
      "learning_rate": 2.7382168249178e-06,
      "loss": 0.0118,
      "step": 2224140
    },
    {
      "epoch": 3.639886621760505,
      "grad_norm": 0.17429691553115845,
      "learning_rate": 2.7381509327042827e-06,
      "loss": 0.0136,
      "step": 2224160
    },
    {
      "epoch": 3.639919352199158,
      "grad_norm": 0.559334397315979,
      "learning_rate": 2.7380850404907654e-06,
      "loss": 0.0101,
      "step": 2224180
    },
    {
      "epoch": 3.6399520826378113,
      "grad_norm": 0.45105910301208496,
      "learning_rate": 2.738019148277248e-06,
      "loss": 0.0126,
      "step": 2224200
    },
    {
      "epoch": 3.639984813076465,
      "grad_norm": 0.25450459122657776,
      "learning_rate": 2.7379532560637313e-06,
      "loss": 0.016,
      "step": 2224220
    },
    {
      "epoch": 3.640017543515118,
      "grad_norm": 0.19244177639484406,
      "learning_rate": 2.737887363850214e-06,
      "loss": 0.0133,
      "step": 2224240
    },
    {
      "epoch": 3.6400502739537717,
      "grad_norm": 0.1925959438085556,
      "learning_rate": 2.737821471636697e-06,
      "loss": 0.0102,
      "step": 2224260
    },
    {
      "epoch": 3.640083004392425,
      "grad_norm": 0.22517777979373932,
      "learning_rate": 2.7377555794231795e-06,
      "loss": 0.0113,
      "step": 2224280
    },
    {
      "epoch": 3.6401157348310784,
      "grad_norm": 0.5818178057670593,
      "learning_rate": 2.7376896872096627e-06,
      "loss": 0.0119,
      "step": 2224300
    },
    {
      "epoch": 3.6401484652697316,
      "grad_norm": 0.24611122906208038,
      "learning_rate": 2.7376237949961454e-06,
      "loss": 0.0146,
      "step": 2224320
    },
    {
      "epoch": 3.6401811957083847,
      "grad_norm": 0.2080889642238617,
      "learning_rate": 2.737557902782628e-06,
      "loss": 0.0068,
      "step": 2224340
    },
    {
      "epoch": 3.6402139261470383,
      "grad_norm": 0.3752906918525696,
      "learning_rate": 2.737492010569111e-06,
      "loss": 0.0089,
      "step": 2224360
    },
    {
      "epoch": 3.6402466565856915,
      "grad_norm": 0.23860107362270355,
      "learning_rate": 2.737426118355594e-06,
      "loss": 0.0057,
      "step": 2224380
    },
    {
      "epoch": 3.640279387024345,
      "grad_norm": 0.10936509072780609,
      "learning_rate": 2.737360226142077e-06,
      "loss": 0.0116,
      "step": 2224400
    },
    {
      "epoch": 3.640312117462998,
      "grad_norm": 0.1104888841509819,
      "learning_rate": 2.73729433392856e-06,
      "loss": 0.0135,
      "step": 2224420
    },
    {
      "epoch": 3.640344847901652,
      "grad_norm": 0.06230805814266205,
      "learning_rate": 2.737228441715043e-06,
      "loss": 0.0117,
      "step": 2224440
    },
    {
      "epoch": 3.640377578340305,
      "grad_norm": 0.20805805921554565,
      "learning_rate": 2.737162549501526e-06,
      "loss": 0.0085,
      "step": 2224460
    },
    {
      "epoch": 3.640410308778958,
      "grad_norm": 0.26246264576911926,
      "learning_rate": 2.7370966572880086e-06,
      "loss": 0.0139,
      "step": 2224480
    },
    {
      "epoch": 3.6404430392176117,
      "grad_norm": 0.4825178384780884,
      "learning_rate": 2.7370307650744914e-06,
      "loss": 0.0098,
      "step": 2224500
    },
    {
      "epoch": 3.640475769656265,
      "grad_norm": 0.14070311188697815,
      "learning_rate": 2.7369648728609745e-06,
      "loss": 0.015,
      "step": 2224520
    },
    {
      "epoch": 3.640508500094918,
      "grad_norm": 0.06565897166728973,
      "learning_rate": 2.7368989806474573e-06,
      "loss": 0.009,
      "step": 2224540
    },
    {
      "epoch": 3.6405412305335716,
      "grad_norm": 0.36874109506607056,
      "learning_rate": 2.73683308843394e-06,
      "loss": 0.0124,
      "step": 2224560
    },
    {
      "epoch": 3.640573960972225,
      "grad_norm": 0.1688140332698822,
      "learning_rate": 2.7367671962204228e-06,
      "loss": 0.01,
      "step": 2224580
    },
    {
      "epoch": 3.6406066914108783,
      "grad_norm": 2.7426514625549316,
      "learning_rate": 2.7367013040069055e-06,
      "loss": 0.0144,
      "step": 2224600
    },
    {
      "epoch": 3.6406394218495315,
      "grad_norm": 0.06162455677986145,
      "learning_rate": 2.7366354117933887e-06,
      "loss": 0.0062,
      "step": 2224620
    },
    {
      "epoch": 3.640672152288185,
      "grad_norm": 0.17737078666687012,
      "learning_rate": 2.7365695195798714e-06,
      "loss": 0.0124,
      "step": 2224640
    },
    {
      "epoch": 3.6407048827268382,
      "grad_norm": 0.07238979637622833,
      "learning_rate": 2.736503627366354e-06,
      "loss": 0.0082,
      "step": 2224660
    },
    {
      "epoch": 3.6407376131654914,
      "grad_norm": 0.23155784606933594,
      "learning_rate": 2.736437735152837e-06,
      "loss": 0.0092,
      "step": 2224680
    },
    {
      "epoch": 3.640770343604145,
      "grad_norm": 0.11857368797063828,
      "learning_rate": 2.73637184293932e-06,
      "loss": 0.0115,
      "step": 2224700
    },
    {
      "epoch": 3.6408030740427986,
      "grad_norm": 0.5840580463409424,
      "learning_rate": 2.7363059507258028e-06,
      "loss": 0.0123,
      "step": 2224720
    },
    {
      "epoch": 3.6408358044814517,
      "grad_norm": 0.3626875877380371,
      "learning_rate": 2.7362400585122855e-06,
      "loss": 0.0117,
      "step": 2224740
    },
    {
      "epoch": 3.640868534920105,
      "grad_norm": 0.17497046291828156,
      "learning_rate": 2.736174166298769e-06,
      "loss": 0.0163,
      "step": 2224760
    },
    {
      "epoch": 3.6409012653587585,
      "grad_norm": 0.3406550884246826,
      "learning_rate": 2.736108274085252e-06,
      "loss": 0.0154,
      "step": 2224780
    },
    {
      "epoch": 3.6409339957974116,
      "grad_norm": 0.17668665945529938,
      "learning_rate": 2.7360423818717346e-06,
      "loss": 0.0122,
      "step": 2224800
    },
    {
      "epoch": 3.6409667262360648,
      "grad_norm": 0.8417873978614807,
      "learning_rate": 2.7359764896582173e-06,
      "loss": 0.0085,
      "step": 2224820
    },
    {
      "epoch": 3.6409994566747184,
      "grad_norm": 0.18289883434772491,
      "learning_rate": 2.7359105974447005e-06,
      "loss": 0.0139,
      "step": 2224840
    },
    {
      "epoch": 3.641032187113372,
      "grad_norm": 0.5321295857429504,
      "learning_rate": 2.7358447052311832e-06,
      "loss": 0.0086,
      "step": 2224860
    },
    {
      "epoch": 3.641064917552025,
      "grad_norm": 0.5908035635948181,
      "learning_rate": 2.735778813017666e-06,
      "loss": 0.0118,
      "step": 2224880
    },
    {
      "epoch": 3.6410976479906783,
      "grad_norm": 0.3793885111808777,
      "learning_rate": 2.7357129208041487e-06,
      "loss": 0.0082,
      "step": 2224900
    },
    {
      "epoch": 3.641130378429332,
      "grad_norm": 0.2763654291629791,
      "learning_rate": 2.735647028590632e-06,
      "loss": 0.0106,
      "step": 2224920
    },
    {
      "epoch": 3.641163108867985,
      "grad_norm": 0.25637510418891907,
      "learning_rate": 2.7355811363771146e-06,
      "loss": 0.0095,
      "step": 2224940
    },
    {
      "epoch": 3.641195839306638,
      "grad_norm": 0.2966775596141815,
      "learning_rate": 2.7355152441635974e-06,
      "loss": 0.0114,
      "step": 2224960
    },
    {
      "epoch": 3.6412285697452917,
      "grad_norm": 0.32531923055648804,
      "learning_rate": 2.73544935195008e-06,
      "loss": 0.0139,
      "step": 2224980
    },
    {
      "epoch": 3.6412613001839453,
      "grad_norm": 0.2939338982105255,
      "learning_rate": 2.7353834597365633e-06,
      "loss": 0.0086,
      "step": 2225000
    },
    {
      "epoch": 3.6412940306225985,
      "grad_norm": 0.06407779455184937,
      "learning_rate": 2.735317567523046e-06,
      "loss": 0.0101,
      "step": 2225020
    },
    {
      "epoch": 3.6413267610612516,
      "grad_norm": 0.3857232630252838,
      "learning_rate": 2.7352516753095287e-06,
      "loss": 0.0089,
      "step": 2225040
    },
    {
      "epoch": 3.6413594914999052,
      "grad_norm": 0.14185212552547455,
      "learning_rate": 2.7351857830960115e-06,
      "loss": 0.0097,
      "step": 2225060
    },
    {
      "epoch": 3.6413922219385584,
      "grad_norm": 0.2539948523044586,
      "learning_rate": 2.7351198908824942e-06,
      "loss": 0.0099,
      "step": 2225080
    },
    {
      "epoch": 3.6414249523772115,
      "grad_norm": 0.04554339498281479,
      "learning_rate": 2.7350539986689774e-06,
      "loss": 0.0127,
      "step": 2225100
    },
    {
      "epoch": 3.641457682815865,
      "grad_norm": 0.3738816976547241,
      "learning_rate": 2.7349881064554605e-06,
      "loss": 0.011,
      "step": 2225120
    },
    {
      "epoch": 3.6414904132545183,
      "grad_norm": 0.46058300137519836,
      "learning_rate": 2.7349222142419433e-06,
      "loss": 0.0118,
      "step": 2225140
    },
    {
      "epoch": 3.641523143693172,
      "grad_norm": 0.3094578683376312,
      "learning_rate": 2.7348563220284264e-06,
      "loss": 0.0144,
      "step": 2225160
    },
    {
      "epoch": 3.641555874131825,
      "grad_norm": 0.7641234397888184,
      "learning_rate": 2.734790429814909e-06,
      "loss": 0.0134,
      "step": 2225180
    },
    {
      "epoch": 3.6415886045704786,
      "grad_norm": 0.32071858644485474,
      "learning_rate": 2.734724537601392e-06,
      "loss": 0.0075,
      "step": 2225200
    },
    {
      "epoch": 3.6416213350091318,
      "grad_norm": 0.4879230260848999,
      "learning_rate": 2.7346586453878747e-06,
      "loss": 0.0136,
      "step": 2225220
    },
    {
      "epoch": 3.641654065447785,
      "grad_norm": 0.5240522623062134,
      "learning_rate": 2.734592753174358e-06,
      "loss": 0.0125,
      "step": 2225240
    },
    {
      "epoch": 3.6416867958864385,
      "grad_norm": 0.5176231861114502,
      "learning_rate": 2.7345268609608406e-06,
      "loss": 0.0094,
      "step": 2225260
    },
    {
      "epoch": 3.6417195263250917,
      "grad_norm": 0.38487961888313293,
      "learning_rate": 2.7344609687473233e-06,
      "loss": 0.0157,
      "step": 2225280
    },
    {
      "epoch": 3.6417522567637453,
      "grad_norm": 0.24990087747573853,
      "learning_rate": 2.734395076533806e-06,
      "loss": 0.0111,
      "step": 2225300
    },
    {
      "epoch": 3.6417849872023984,
      "grad_norm": 0.313568651676178,
      "learning_rate": 2.734329184320289e-06,
      "loss": 0.0104,
      "step": 2225320
    },
    {
      "epoch": 3.641817717641052,
      "grad_norm": 0.15034759044647217,
      "learning_rate": 2.734263292106772e-06,
      "loss": 0.0128,
      "step": 2225340
    },
    {
      "epoch": 3.641850448079705,
      "grad_norm": 0.8404107093811035,
      "learning_rate": 2.7341973998932547e-06,
      "loss": 0.0114,
      "step": 2225360
    },
    {
      "epoch": 3.6418831785183583,
      "grad_norm": 0.29863983392715454,
      "learning_rate": 2.7341315076797374e-06,
      "loss": 0.0115,
      "step": 2225380
    },
    {
      "epoch": 3.641915908957012,
      "grad_norm": 0.3687755763530731,
      "learning_rate": 2.7340656154662206e-06,
      "loss": 0.0196,
      "step": 2225400
    },
    {
      "epoch": 3.641948639395665,
      "grad_norm": 0.33229079842567444,
      "learning_rate": 2.7339997232527033e-06,
      "loss": 0.006,
      "step": 2225420
    },
    {
      "epoch": 3.6419813698343186,
      "grad_norm": 0.5162741541862488,
      "learning_rate": 2.733933831039186e-06,
      "loss": 0.0093,
      "step": 2225440
    },
    {
      "epoch": 3.642014100272972,
      "grad_norm": 0.26447799801826477,
      "learning_rate": 2.733867938825669e-06,
      "loss": 0.008,
      "step": 2225460
    },
    {
      "epoch": 3.6420468307116254,
      "grad_norm": 0.34537795186042786,
      "learning_rate": 2.7338020466121524e-06,
      "loss": 0.0121,
      "step": 2225480
    },
    {
      "epoch": 3.6420795611502785,
      "grad_norm": 0.13022366166114807,
      "learning_rate": 2.733736154398635e-06,
      "loss": 0.0126,
      "step": 2225500
    },
    {
      "epoch": 3.6421122915889317,
      "grad_norm": 0.6396426558494568,
      "learning_rate": 2.733670262185118e-06,
      "loss": 0.0152,
      "step": 2225520
    },
    {
      "epoch": 3.6421450220275853,
      "grad_norm": 0.15525925159454346,
      "learning_rate": 2.733604369971601e-06,
      "loss": 0.0089,
      "step": 2225540
    },
    {
      "epoch": 3.6421777524662384,
      "grad_norm": 0.4054456949234009,
      "learning_rate": 2.7335384777580838e-06,
      "loss": 0.0098,
      "step": 2225560
    },
    {
      "epoch": 3.642210482904892,
      "grad_norm": 0.11915575712919235,
      "learning_rate": 2.7334725855445665e-06,
      "loss": 0.0075,
      "step": 2225580
    },
    {
      "epoch": 3.642243213343545,
      "grad_norm": 0.4220055937767029,
      "learning_rate": 2.7334066933310493e-06,
      "loss": 0.0132,
      "step": 2225600
    },
    {
      "epoch": 3.6422759437821988,
      "grad_norm": 0.290263295173645,
      "learning_rate": 2.733340801117532e-06,
      "loss": 0.0076,
      "step": 2225620
    },
    {
      "epoch": 3.642308674220852,
      "grad_norm": 0.29873955249786377,
      "learning_rate": 2.733274908904015e-06,
      "loss": 0.012,
      "step": 2225640
    },
    {
      "epoch": 3.642341404659505,
      "grad_norm": 0.07569270581007004,
      "learning_rate": 2.733209016690498e-06,
      "loss": 0.0124,
      "step": 2225660
    },
    {
      "epoch": 3.6423741350981587,
      "grad_norm": 0.3277181386947632,
      "learning_rate": 2.7331431244769806e-06,
      "loss": 0.0101,
      "step": 2225680
    },
    {
      "epoch": 3.642406865536812,
      "grad_norm": 0.20687340199947357,
      "learning_rate": 2.7330772322634634e-06,
      "loss": 0.0073,
      "step": 2225700
    },
    {
      "epoch": 3.6424395959754654,
      "grad_norm": 0.09122020751237869,
      "learning_rate": 2.7330113400499465e-06,
      "loss": 0.0115,
      "step": 2225720
    },
    {
      "epoch": 3.6424723264141186,
      "grad_norm": 0.1807916760444641,
      "learning_rate": 2.7329454478364293e-06,
      "loss": 0.01,
      "step": 2225740
    },
    {
      "epoch": 3.642505056852772,
      "grad_norm": 0.37372052669525146,
      "learning_rate": 2.732879555622912e-06,
      "loss": 0.013,
      "step": 2225760
    },
    {
      "epoch": 3.6425377872914253,
      "grad_norm": 0.0896979495882988,
      "learning_rate": 2.7328136634093948e-06,
      "loss": 0.013,
      "step": 2225780
    },
    {
      "epoch": 3.6425705177300784,
      "grad_norm": 0.09407185763120651,
      "learning_rate": 2.732747771195878e-06,
      "loss": 0.0091,
      "step": 2225800
    },
    {
      "epoch": 3.642603248168732,
      "grad_norm": 0.594385027885437,
      "learning_rate": 2.732681878982361e-06,
      "loss": 0.0111,
      "step": 2225820
    },
    {
      "epoch": 3.642635978607385,
      "grad_norm": 0.304546982049942,
      "learning_rate": 2.732615986768844e-06,
      "loss": 0.0111,
      "step": 2225840
    },
    {
      "epoch": 3.642668709046039,
      "grad_norm": 0.38251402974128723,
      "learning_rate": 2.732550094555327e-06,
      "loss": 0.0113,
      "step": 2225860
    },
    {
      "epoch": 3.642701439484692,
      "grad_norm": 0.06763876974582672,
      "learning_rate": 2.7324842023418097e-06,
      "loss": 0.0063,
      "step": 2225880
    },
    {
      "epoch": 3.6427341699233455,
      "grad_norm": 0.13963650166988373,
      "learning_rate": 2.7324183101282925e-06,
      "loss": 0.0095,
      "step": 2225900
    },
    {
      "epoch": 3.6427669003619987,
      "grad_norm": 0.25407442450523376,
      "learning_rate": 2.7323524179147752e-06,
      "loss": 0.0117,
      "step": 2225920
    },
    {
      "epoch": 3.642799630800652,
      "grad_norm": 0.25440841913223267,
      "learning_rate": 2.7322865257012584e-06,
      "loss": 0.0139,
      "step": 2225940
    },
    {
      "epoch": 3.6428323612393054,
      "grad_norm": 0.15493078529834747,
      "learning_rate": 2.732220633487741e-06,
      "loss": 0.0096,
      "step": 2225960
    },
    {
      "epoch": 3.6428650916779586,
      "grad_norm": 0.16515791416168213,
      "learning_rate": 2.732154741274224e-06,
      "loss": 0.0104,
      "step": 2225980
    },
    {
      "epoch": 3.642897822116612,
      "grad_norm": 0.3061692714691162,
      "learning_rate": 2.7320888490607066e-06,
      "loss": 0.011,
      "step": 2226000
    },
    {
      "epoch": 3.6429305525552653,
      "grad_norm": 0.47960108518600464,
      "learning_rate": 2.7320229568471893e-06,
      "loss": 0.0089,
      "step": 2226020
    },
    {
      "epoch": 3.642963282993919,
      "grad_norm": 0.24107356369495392,
      "learning_rate": 2.7319570646336725e-06,
      "loss": 0.0081,
      "step": 2226040
    },
    {
      "epoch": 3.642996013432572,
      "grad_norm": 0.36736083030700684,
      "learning_rate": 2.7318911724201552e-06,
      "loss": 0.0115,
      "step": 2226060
    },
    {
      "epoch": 3.643028743871225,
      "grad_norm": 0.3025771379470825,
      "learning_rate": 2.731825280206638e-06,
      "loss": 0.0076,
      "step": 2226080
    },
    {
      "epoch": 3.643061474309879,
      "grad_norm": 0.36622104048728943,
      "learning_rate": 2.7317593879931207e-06,
      "loss": 0.0113,
      "step": 2226100
    },
    {
      "epoch": 3.643094204748532,
      "grad_norm": 0.2758456766605377,
      "learning_rate": 2.731693495779604e-06,
      "loss": 0.0065,
      "step": 2226120
    },
    {
      "epoch": 3.643126935187185,
      "grad_norm": 1.2880969047546387,
      "learning_rate": 2.7316276035660866e-06,
      "loss": 0.0179,
      "step": 2226140
    },
    {
      "epoch": 3.6431596656258387,
      "grad_norm": 0.8074509501457214,
      "learning_rate": 2.7315617113525694e-06,
      "loss": 0.0087,
      "step": 2226160
    },
    {
      "epoch": 3.6431923960644923,
      "grad_norm": 0.10792139172554016,
      "learning_rate": 2.731495819139053e-06,
      "loss": 0.0141,
      "step": 2226180
    },
    {
      "epoch": 3.6432251265031455,
      "grad_norm": 0.651075541973114,
      "learning_rate": 2.7314299269255357e-06,
      "loss": 0.0152,
      "step": 2226200
    },
    {
      "epoch": 3.6432578569417986,
      "grad_norm": 0.12785504758358002,
      "learning_rate": 2.7313640347120184e-06,
      "loss": 0.0129,
      "step": 2226220
    },
    {
      "epoch": 3.643290587380452,
      "grad_norm": 0.25963637232780457,
      "learning_rate": 2.731298142498501e-06,
      "loss": 0.0156,
      "step": 2226240
    },
    {
      "epoch": 3.6433233178191053,
      "grad_norm": 0.9117614030838013,
      "learning_rate": 2.7312322502849843e-06,
      "loss": 0.0117,
      "step": 2226260
    },
    {
      "epoch": 3.6433560482577585,
      "grad_norm": 0.14430390298366547,
      "learning_rate": 2.731166358071467e-06,
      "loss": 0.0106,
      "step": 2226280
    },
    {
      "epoch": 3.643388778696412,
      "grad_norm": 0.10203506797552109,
      "learning_rate": 2.73110046585795e-06,
      "loss": 0.0116,
      "step": 2226300
    },
    {
      "epoch": 3.6434215091350657,
      "grad_norm": 0.26722556352615356,
      "learning_rate": 2.7310345736444325e-06,
      "loss": 0.0132,
      "step": 2226320
    },
    {
      "epoch": 3.643454239573719,
      "grad_norm": 0.3792097568511963,
      "learning_rate": 2.7309686814309157e-06,
      "loss": 0.0066,
      "step": 2226340
    },
    {
      "epoch": 3.643486970012372,
      "grad_norm": 0.27975431084632874,
      "learning_rate": 2.7309027892173985e-06,
      "loss": 0.014,
      "step": 2226360
    },
    {
      "epoch": 3.6435197004510256,
      "grad_norm": 0.08183354139328003,
      "learning_rate": 2.730836897003881e-06,
      "loss": 0.0148,
      "step": 2226380
    },
    {
      "epoch": 3.6435524308896787,
      "grad_norm": 0.29393911361694336,
      "learning_rate": 2.730771004790364e-06,
      "loss": 0.0155,
      "step": 2226400
    },
    {
      "epoch": 3.643585161328332,
      "grad_norm": 0.4111998677253723,
      "learning_rate": 2.730705112576847e-06,
      "loss": 0.0052,
      "step": 2226420
    },
    {
      "epoch": 3.6436178917669855,
      "grad_norm": 0.2574847340583801,
      "learning_rate": 2.73063922036333e-06,
      "loss": 0.0088,
      "step": 2226440
    },
    {
      "epoch": 3.643650622205639,
      "grad_norm": 0.8064146041870117,
      "learning_rate": 2.7305733281498126e-06,
      "loss": 0.0108,
      "step": 2226460
    },
    {
      "epoch": 3.643683352644292,
      "grad_norm": 0.4252359867095947,
      "learning_rate": 2.7305074359362953e-06,
      "loss": 0.0151,
      "step": 2226480
    },
    {
      "epoch": 3.6437160830829454,
      "grad_norm": 0.12040704488754272,
      "learning_rate": 2.730441543722778e-06,
      "loss": 0.0104,
      "step": 2226500
    },
    {
      "epoch": 3.643748813521599,
      "grad_norm": 0.1815631091594696,
      "learning_rate": 2.7303756515092616e-06,
      "loss": 0.011,
      "step": 2226520
    },
    {
      "epoch": 3.643781543960252,
      "grad_norm": 0.16864649951457977,
      "learning_rate": 2.7303097592957444e-06,
      "loss": 0.0142,
      "step": 2226540
    },
    {
      "epoch": 3.6438142743989053,
      "grad_norm": 0.3004985749721527,
      "learning_rate": 2.730243867082227e-06,
      "loss": 0.0128,
      "step": 2226560
    },
    {
      "epoch": 3.643847004837559,
      "grad_norm": 0.203861266374588,
      "learning_rate": 2.7301779748687103e-06,
      "loss": 0.0106,
      "step": 2226580
    },
    {
      "epoch": 3.643879735276212,
      "grad_norm": 0.41485726833343506,
      "learning_rate": 2.730112082655193e-06,
      "loss": 0.0079,
      "step": 2226600
    },
    {
      "epoch": 3.6439124657148656,
      "grad_norm": 0.17884987592697144,
      "learning_rate": 2.7300461904416758e-06,
      "loss": 0.0152,
      "step": 2226620
    },
    {
      "epoch": 3.6439451961535188,
      "grad_norm": 0.1904020458459854,
      "learning_rate": 2.7299802982281585e-06,
      "loss": 0.0105,
      "step": 2226640
    },
    {
      "epoch": 3.6439779265921723,
      "grad_norm": 0.31776148080825806,
      "learning_rate": 2.7299144060146417e-06,
      "loss": 0.0109,
      "step": 2226660
    },
    {
      "epoch": 3.6440106570308255,
      "grad_norm": 0.08139599859714508,
      "learning_rate": 2.7298485138011244e-06,
      "loss": 0.0097,
      "step": 2226680
    },
    {
      "epoch": 3.6440433874694786,
      "grad_norm": 0.4727244973182678,
      "learning_rate": 2.729782621587607e-06,
      "loss": 0.0097,
      "step": 2226700
    },
    {
      "epoch": 3.6440761179081322,
      "grad_norm": 0.2141205370426178,
      "learning_rate": 2.72971672937409e-06,
      "loss": 0.0102,
      "step": 2226720
    },
    {
      "epoch": 3.6441088483467854,
      "grad_norm": 0.3810963034629822,
      "learning_rate": 2.729650837160573e-06,
      "loss": 0.0159,
      "step": 2226740
    },
    {
      "epoch": 3.644141578785439,
      "grad_norm": 0.26040610671043396,
      "learning_rate": 2.7295849449470558e-06,
      "loss": 0.009,
      "step": 2226760
    },
    {
      "epoch": 3.644174309224092,
      "grad_norm": 0.25075584650039673,
      "learning_rate": 2.7295190527335385e-06,
      "loss": 0.0152,
      "step": 2226780
    },
    {
      "epoch": 3.6442070396627457,
      "grad_norm": 0.2604829668998718,
      "learning_rate": 2.7294531605200213e-06,
      "loss": 0.0084,
      "step": 2226800
    },
    {
      "epoch": 3.644239770101399,
      "grad_norm": 0.3137955367565155,
      "learning_rate": 2.7293872683065044e-06,
      "loss": 0.008,
      "step": 2226820
    },
    {
      "epoch": 3.644272500540052,
      "grad_norm": 0.3657636046409607,
      "learning_rate": 2.729321376092987e-06,
      "loss": 0.0095,
      "step": 2226840
    },
    {
      "epoch": 3.6443052309787056,
      "grad_norm": 0.2836211025714874,
      "learning_rate": 2.72925548387947e-06,
      "loss": 0.0113,
      "step": 2226860
    },
    {
      "epoch": 3.6443379614173588,
      "grad_norm": 0.4639965891838074,
      "learning_rate": 2.7291895916659535e-06,
      "loss": 0.0148,
      "step": 2226880
    },
    {
      "epoch": 3.6443706918560124,
      "grad_norm": 0.5279977917671204,
      "learning_rate": 2.7291236994524362e-06,
      "loss": 0.0129,
      "step": 2226900
    },
    {
      "epoch": 3.6444034222946655,
      "grad_norm": 0.1028837189078331,
      "learning_rate": 2.729057807238919e-06,
      "loss": 0.0109,
      "step": 2226920
    },
    {
      "epoch": 3.644436152733319,
      "grad_norm": 0.2711750864982605,
      "learning_rate": 2.7289919150254017e-06,
      "loss": 0.0122,
      "step": 2226940
    },
    {
      "epoch": 3.6444688831719723,
      "grad_norm": 0.9147727489471436,
      "learning_rate": 2.728926022811885e-06,
      "loss": 0.0091,
      "step": 2226960
    },
    {
      "epoch": 3.6445016136106254,
      "grad_norm": 0.07927101850509644,
      "learning_rate": 2.7288601305983676e-06,
      "loss": 0.0186,
      "step": 2226980
    },
    {
      "epoch": 3.644534344049279,
      "grad_norm": 0.07949236035346985,
      "learning_rate": 2.7287942383848504e-06,
      "loss": 0.0079,
      "step": 2227000
    },
    {
      "epoch": 3.644567074487932,
      "grad_norm": 0.06303886324167252,
      "learning_rate": 2.728728346171333e-06,
      "loss": 0.0105,
      "step": 2227020
    },
    {
      "epoch": 3.6445998049265858,
      "grad_norm": 0.19383448362350464,
      "learning_rate": 2.728662453957816e-06,
      "loss": 0.0107,
      "step": 2227040
    },
    {
      "epoch": 3.644632535365239,
      "grad_norm": 0.24890229105949402,
      "learning_rate": 2.728596561744299e-06,
      "loss": 0.011,
      "step": 2227060
    },
    {
      "epoch": 3.6446652658038925,
      "grad_norm": 0.2592635154724121,
      "learning_rate": 2.7285306695307817e-06,
      "loss": 0.0119,
      "step": 2227080
    },
    {
      "epoch": 3.6446979962425456,
      "grad_norm": 0.28340211510658264,
      "learning_rate": 2.7284647773172645e-06,
      "loss": 0.011,
      "step": 2227100
    },
    {
      "epoch": 3.644730726681199,
      "grad_norm": 0.5548971891403198,
      "learning_rate": 2.7283988851037472e-06,
      "loss": 0.0111,
      "step": 2227120
    },
    {
      "epoch": 3.6447634571198524,
      "grad_norm": 0.5285749435424805,
      "learning_rate": 2.7283329928902304e-06,
      "loss": 0.0103,
      "step": 2227140
    },
    {
      "epoch": 3.6447961875585055,
      "grad_norm": 0.33588775992393494,
      "learning_rate": 2.728267100676713e-06,
      "loss": 0.0085,
      "step": 2227160
    },
    {
      "epoch": 3.644828917997159,
      "grad_norm": 0.1999877542257309,
      "learning_rate": 2.728201208463196e-06,
      "loss": 0.0106,
      "step": 2227180
    },
    {
      "epoch": 3.6448616484358123,
      "grad_norm": 0.06910425424575806,
      "learning_rate": 2.7281353162496786e-06,
      "loss": 0.0113,
      "step": 2227200
    },
    {
      "epoch": 3.644894378874466,
      "grad_norm": 0.22504334151744843,
      "learning_rate": 2.7280694240361618e-06,
      "loss": 0.0112,
      "step": 2227220
    },
    {
      "epoch": 3.644927109313119,
      "grad_norm": 0.13435989618301392,
      "learning_rate": 2.728003531822645e-06,
      "loss": 0.0093,
      "step": 2227240
    },
    {
      "epoch": 3.644959839751772,
      "grad_norm": 0.06362191587686539,
      "learning_rate": 2.7279376396091277e-06,
      "loss": 0.0075,
      "step": 2227260
    },
    {
      "epoch": 3.6449925701904258,
      "grad_norm": 1.6443625688552856,
      "learning_rate": 2.727871747395611e-06,
      "loss": 0.0136,
      "step": 2227280
    },
    {
      "epoch": 3.645025300629079,
      "grad_norm": 0.2713068425655365,
      "learning_rate": 2.7278058551820936e-06,
      "loss": 0.0084,
      "step": 2227300
    },
    {
      "epoch": 3.6450580310677325,
      "grad_norm": 0.8856253623962402,
      "learning_rate": 2.7277399629685763e-06,
      "loss": 0.0131,
      "step": 2227320
    },
    {
      "epoch": 3.6450907615063857,
      "grad_norm": 0.13109882175922394,
      "learning_rate": 2.727674070755059e-06,
      "loss": 0.0085,
      "step": 2227340
    },
    {
      "epoch": 3.6451234919450393,
      "grad_norm": 0.25716084241867065,
      "learning_rate": 2.7276081785415422e-06,
      "loss": 0.0119,
      "step": 2227360
    },
    {
      "epoch": 3.6451562223836924,
      "grad_norm": 0.5830909013748169,
      "learning_rate": 2.727542286328025e-06,
      "loss": 0.0135,
      "step": 2227380
    },
    {
      "epoch": 3.6451889528223456,
      "grad_norm": 0.8319342732429504,
      "learning_rate": 2.7274763941145077e-06,
      "loss": 0.009,
      "step": 2227400
    },
    {
      "epoch": 3.645221683260999,
      "grad_norm": 0.22792062163352966,
      "learning_rate": 2.7274105019009904e-06,
      "loss": 0.0073,
      "step": 2227420
    },
    {
      "epoch": 3.6452544136996523,
      "grad_norm": 1.499477505683899,
      "learning_rate": 2.7273446096874736e-06,
      "loss": 0.0125,
      "step": 2227440
    },
    {
      "epoch": 3.645287144138306,
      "grad_norm": 0.09944165498018265,
      "learning_rate": 2.7272787174739563e-06,
      "loss": 0.0084,
      "step": 2227460
    },
    {
      "epoch": 3.645319874576959,
      "grad_norm": 0.21725179255008698,
      "learning_rate": 2.727212825260439e-06,
      "loss": 0.0116,
      "step": 2227480
    },
    {
      "epoch": 3.6453526050156126,
      "grad_norm": 0.21513909101486206,
      "learning_rate": 2.727146933046922e-06,
      "loss": 0.0109,
      "step": 2227500
    },
    {
      "epoch": 3.645385335454266,
      "grad_norm": 0.3889806568622589,
      "learning_rate": 2.7270810408334046e-06,
      "loss": 0.0106,
      "step": 2227520
    },
    {
      "epoch": 3.645418065892919,
      "grad_norm": 0.28672075271606445,
      "learning_rate": 2.7270151486198877e-06,
      "loss": 0.0136,
      "step": 2227540
    },
    {
      "epoch": 3.6454507963315725,
      "grad_norm": 0.4628104865550995,
      "learning_rate": 2.7269492564063705e-06,
      "loss": 0.0142,
      "step": 2227560
    },
    {
      "epoch": 3.6454835267702257,
      "grad_norm": 0.40588289499282837,
      "learning_rate": 2.7268833641928536e-06,
      "loss": 0.0114,
      "step": 2227580
    },
    {
      "epoch": 3.645516257208879,
      "grad_norm": 0.1274252086877823,
      "learning_rate": 2.7268174719793368e-06,
      "loss": 0.0089,
      "step": 2227600
    },
    {
      "epoch": 3.6455489876475324,
      "grad_norm": 0.29270750284194946,
      "learning_rate": 2.7267515797658195e-06,
      "loss": 0.012,
      "step": 2227620
    },
    {
      "epoch": 3.645581718086186,
      "grad_norm": 0.349000483751297,
      "learning_rate": 2.7266856875523023e-06,
      "loss": 0.0107,
      "step": 2227640
    },
    {
      "epoch": 3.645614448524839,
      "grad_norm": 0.2206829935312271,
      "learning_rate": 2.726619795338785e-06,
      "loss": 0.0112,
      "step": 2227660
    },
    {
      "epoch": 3.6456471789634923,
      "grad_norm": 0.1529984027147293,
      "learning_rate": 2.726553903125268e-06,
      "loss": 0.0156,
      "step": 2227680
    },
    {
      "epoch": 3.645679909402146,
      "grad_norm": 0.6867262125015259,
      "learning_rate": 2.726488010911751e-06,
      "loss": 0.0095,
      "step": 2227700
    },
    {
      "epoch": 3.645712639840799,
      "grad_norm": 0.1053125336766243,
      "learning_rate": 2.7264221186982336e-06,
      "loss": 0.0103,
      "step": 2227720
    },
    {
      "epoch": 3.6457453702794522,
      "grad_norm": 0.31081727147102356,
      "learning_rate": 2.7263562264847164e-06,
      "loss": 0.0102,
      "step": 2227740
    },
    {
      "epoch": 3.645778100718106,
      "grad_norm": 0.35285285115242004,
      "learning_rate": 2.7262903342711996e-06,
      "loss": 0.0083,
      "step": 2227760
    },
    {
      "epoch": 3.6458108311567594,
      "grad_norm": 0.4360247552394867,
      "learning_rate": 2.7262244420576823e-06,
      "loss": 0.0089,
      "step": 2227780
    },
    {
      "epoch": 3.6458435615954126,
      "grad_norm": 0.16919846832752228,
      "learning_rate": 2.726158549844165e-06,
      "loss": 0.0097,
      "step": 2227800
    },
    {
      "epoch": 3.6458762920340657,
      "grad_norm": 0.20901556313037872,
      "learning_rate": 2.7260926576306478e-06,
      "loss": 0.0102,
      "step": 2227820
    },
    {
      "epoch": 3.6459090224727193,
      "grad_norm": 0.25810837745666504,
      "learning_rate": 2.726026765417131e-06,
      "loss": 0.0113,
      "step": 2227840
    },
    {
      "epoch": 3.6459417529113725,
      "grad_norm": 0.3633507192134857,
      "learning_rate": 2.7259608732036137e-06,
      "loss": 0.0148,
      "step": 2227860
    },
    {
      "epoch": 3.6459744833500256,
      "grad_norm": 0.33084747195243835,
      "learning_rate": 2.7258949809900964e-06,
      "loss": 0.0137,
      "step": 2227880
    },
    {
      "epoch": 3.646007213788679,
      "grad_norm": 0.6030169725418091,
      "learning_rate": 2.725829088776579e-06,
      "loss": 0.0101,
      "step": 2227900
    },
    {
      "epoch": 3.646039944227333,
      "grad_norm": 0.5737408995628357,
      "learning_rate": 2.725763196563062e-06,
      "loss": 0.0085,
      "step": 2227920
    },
    {
      "epoch": 3.646072674665986,
      "grad_norm": 0.15851590037345886,
      "learning_rate": 2.7256973043495455e-06,
      "loss": 0.0138,
      "step": 2227940
    },
    {
      "epoch": 3.646105405104639,
      "grad_norm": 0.1544167399406433,
      "learning_rate": 2.7256314121360282e-06,
      "loss": 0.0094,
      "step": 2227960
    },
    {
      "epoch": 3.6461381355432927,
      "grad_norm": 0.24656271934509277,
      "learning_rate": 2.7255655199225114e-06,
      "loss": 0.0123,
      "step": 2227980
    },
    {
      "epoch": 3.646170865981946,
      "grad_norm": 0.11791881918907166,
      "learning_rate": 2.725499627708994e-06,
      "loss": 0.0088,
      "step": 2228000
    },
    {
      "epoch": 3.646203596420599,
      "grad_norm": 0.1800110638141632,
      "learning_rate": 2.725433735495477e-06,
      "loss": 0.0125,
      "step": 2228020
    },
    {
      "epoch": 3.6462363268592526,
      "grad_norm": 0.4309098422527313,
      "learning_rate": 2.7253678432819596e-06,
      "loss": 0.0121,
      "step": 2228040
    },
    {
      "epoch": 3.646269057297906,
      "grad_norm": 0.2500018775463104,
      "learning_rate": 2.7253019510684423e-06,
      "loss": 0.0201,
      "step": 2228060
    },
    {
      "epoch": 3.6463017877365593,
      "grad_norm": 0.14080415666103363,
      "learning_rate": 2.7252360588549255e-06,
      "loss": 0.0083,
      "step": 2228080
    },
    {
      "epoch": 3.6463345181752125,
      "grad_norm": 0.6713696122169495,
      "learning_rate": 2.7251701666414082e-06,
      "loss": 0.012,
      "step": 2228100
    },
    {
      "epoch": 3.646367248613866,
      "grad_norm": 0.11793763190507889,
      "learning_rate": 2.725104274427891e-06,
      "loss": 0.0129,
      "step": 2228120
    },
    {
      "epoch": 3.6463999790525192,
      "grad_norm": 0.14595630764961243,
      "learning_rate": 2.7250383822143737e-06,
      "loss": 0.0123,
      "step": 2228140
    },
    {
      "epoch": 3.6464327094911724,
      "grad_norm": 0.3155466914176941,
      "learning_rate": 2.724972490000857e-06,
      "loss": 0.0134,
      "step": 2228160
    },
    {
      "epoch": 3.646465439929826,
      "grad_norm": 0.1608603447675705,
      "learning_rate": 2.7249065977873396e-06,
      "loss": 0.0115,
      "step": 2228180
    },
    {
      "epoch": 3.646498170368479,
      "grad_norm": 0.24808436632156372,
      "learning_rate": 2.7248407055738224e-06,
      "loss": 0.0133,
      "step": 2228200
    },
    {
      "epoch": 3.6465309008071327,
      "grad_norm": 0.1892167627811432,
      "learning_rate": 2.724774813360305e-06,
      "loss": 0.0065,
      "step": 2228220
    },
    {
      "epoch": 3.646563631245786,
      "grad_norm": 0.21988272666931152,
      "learning_rate": 2.7247089211467883e-06,
      "loss": 0.0088,
      "step": 2228240
    },
    {
      "epoch": 3.6465963616844395,
      "grad_norm": 0.24368569254875183,
      "learning_rate": 2.724643028933271e-06,
      "loss": 0.01,
      "step": 2228260
    },
    {
      "epoch": 3.6466290921230926,
      "grad_norm": 0.41247960925102234,
      "learning_rate": 2.724577136719754e-06,
      "loss": 0.0077,
      "step": 2228280
    },
    {
      "epoch": 3.6466618225617458,
      "grad_norm": 1.188832402229309,
      "learning_rate": 2.7245112445062373e-06,
      "loss": 0.0174,
      "step": 2228300
    },
    {
      "epoch": 3.6466945530003994,
      "grad_norm": 0.40306341648101807,
      "learning_rate": 2.72444535229272e-06,
      "loss": 0.0088,
      "step": 2228320
    },
    {
      "epoch": 3.6467272834390525,
      "grad_norm": 0.1146397739648819,
      "learning_rate": 2.724379460079203e-06,
      "loss": 0.0098,
      "step": 2228340
    },
    {
      "epoch": 3.646760013877706,
      "grad_norm": 0.13995176553726196,
      "learning_rate": 2.7243135678656856e-06,
      "loss": 0.0114,
      "step": 2228360
    },
    {
      "epoch": 3.6467927443163592,
      "grad_norm": 0.13299822807312012,
      "learning_rate": 2.7242476756521687e-06,
      "loss": 0.0091,
      "step": 2228380
    },
    {
      "epoch": 3.646825474755013,
      "grad_norm": 0.21558041870594025,
      "learning_rate": 2.7241817834386515e-06,
      "loss": 0.0125,
      "step": 2228400
    },
    {
      "epoch": 3.646858205193666,
      "grad_norm": 0.8225412964820862,
      "learning_rate": 2.724115891225134e-06,
      "loss": 0.0153,
      "step": 2228420
    },
    {
      "epoch": 3.646890935632319,
      "grad_norm": 0.15102529525756836,
      "learning_rate": 2.724049999011617e-06,
      "loss": 0.012,
      "step": 2228440
    },
    {
      "epoch": 3.6469236660709727,
      "grad_norm": 1.6349185705184937,
      "learning_rate": 2.7239841067980997e-06,
      "loss": 0.0141,
      "step": 2228460
    },
    {
      "epoch": 3.646956396509626,
      "grad_norm": 0.6672519445419312,
      "learning_rate": 2.723918214584583e-06,
      "loss": 0.0146,
      "step": 2228480
    },
    {
      "epoch": 3.6469891269482795,
      "grad_norm": 0.12646211683750153,
      "learning_rate": 2.7238523223710656e-06,
      "loss": 0.0123,
      "step": 2228500
    },
    {
      "epoch": 3.6470218573869326,
      "grad_norm": 0.22541840374469757,
      "learning_rate": 2.7237864301575483e-06,
      "loss": 0.0099,
      "step": 2228520
    },
    {
      "epoch": 3.6470545878255862,
      "grad_norm": 0.3797849118709564,
      "learning_rate": 2.723720537944031e-06,
      "loss": 0.0126,
      "step": 2228540
    },
    {
      "epoch": 3.6470873182642394,
      "grad_norm": 0.15686675906181335,
      "learning_rate": 2.7236546457305142e-06,
      "loss": 0.0143,
      "step": 2228560
    },
    {
      "epoch": 3.6471200487028925,
      "grad_norm": 0.12332820892333984,
      "learning_rate": 2.723588753516997e-06,
      "loss": 0.013,
      "step": 2228580
    },
    {
      "epoch": 3.647152779141546,
      "grad_norm": 0.3064231872558594,
      "learning_rate": 2.7235228613034797e-06,
      "loss": 0.0125,
      "step": 2228600
    },
    {
      "epoch": 3.6471855095801993,
      "grad_norm": 0.20980021357536316,
      "learning_rate": 2.7234569690899624e-06,
      "loss": 0.0104,
      "step": 2228620
    },
    {
      "epoch": 3.647218240018853,
      "grad_norm": 0.08100453019142151,
      "learning_rate": 2.723391076876446e-06,
      "loss": 0.0152,
      "step": 2228640
    },
    {
      "epoch": 3.647250970457506,
      "grad_norm": 0.4091624915599823,
      "learning_rate": 2.7233251846629288e-06,
      "loss": 0.0138,
      "step": 2228660
    },
    {
      "epoch": 3.6472837008961596,
      "grad_norm": 0.14268308877944946,
      "learning_rate": 2.7232592924494115e-06,
      "loss": 0.0106,
      "step": 2228680
    },
    {
      "epoch": 3.6473164313348128,
      "grad_norm": 0.506912887096405,
      "learning_rate": 2.7231934002358947e-06,
      "loss": 0.0084,
      "step": 2228700
    },
    {
      "epoch": 3.647349161773466,
      "grad_norm": 0.5858525037765503,
      "learning_rate": 2.7231275080223774e-06,
      "loss": 0.0163,
      "step": 2228720
    },
    {
      "epoch": 3.6473818922121195,
      "grad_norm": 0.23905591666698456,
      "learning_rate": 2.72306161580886e-06,
      "loss": 0.0112,
      "step": 2228740
    },
    {
      "epoch": 3.6474146226507727,
      "grad_norm": 0.09508009999990463,
      "learning_rate": 2.722995723595343e-06,
      "loss": 0.011,
      "step": 2228760
    },
    {
      "epoch": 3.6474473530894262,
      "grad_norm": 0.2934779226779938,
      "learning_rate": 2.722929831381826e-06,
      "loss": 0.0123,
      "step": 2228780
    },
    {
      "epoch": 3.6474800835280794,
      "grad_norm": 0.051026370376348495,
      "learning_rate": 2.722863939168309e-06,
      "loss": 0.0128,
      "step": 2228800
    },
    {
      "epoch": 3.647512813966733,
      "grad_norm": 0.35547980666160583,
      "learning_rate": 2.7227980469547915e-06,
      "loss": 0.0086,
      "step": 2228820
    },
    {
      "epoch": 3.647545544405386,
      "grad_norm": 0.6177670359611511,
      "learning_rate": 2.7227321547412743e-06,
      "loss": 0.0125,
      "step": 2228840
    },
    {
      "epoch": 3.6475782748440393,
      "grad_norm": 0.7708375453948975,
      "learning_rate": 2.7226662625277574e-06,
      "loss": 0.0141,
      "step": 2228860
    },
    {
      "epoch": 3.647611005282693,
      "grad_norm": 0.4369150400161743,
      "learning_rate": 2.72260037031424e-06,
      "loss": 0.0102,
      "step": 2228880
    },
    {
      "epoch": 3.647643735721346,
      "grad_norm": 0.7268105149269104,
      "learning_rate": 2.722534478100723e-06,
      "loss": 0.0102,
      "step": 2228900
    },
    {
      "epoch": 3.6476764661599996,
      "grad_norm": 0.22041557729244232,
      "learning_rate": 2.7224685858872057e-06,
      "loss": 0.016,
      "step": 2228920
    },
    {
      "epoch": 3.647709196598653,
      "grad_norm": 0.18601207435131073,
      "learning_rate": 2.7224026936736884e-06,
      "loss": 0.0088,
      "step": 2228940
    },
    {
      "epoch": 3.6477419270373064,
      "grad_norm": 0.08772536367177963,
      "learning_rate": 2.7223368014601716e-06,
      "loss": 0.0086,
      "step": 2228960
    },
    {
      "epoch": 3.6477746574759595,
      "grad_norm": 0.32272282242774963,
      "learning_rate": 2.7222709092466543e-06,
      "loss": 0.0131,
      "step": 2228980
    },
    {
      "epoch": 3.6478073879146127,
      "grad_norm": 0.11608006805181503,
      "learning_rate": 2.7222050170331375e-06,
      "loss": 0.008,
      "step": 2229000
    },
    {
      "epoch": 3.6478401183532663,
      "grad_norm": 0.19238993525505066,
      "learning_rate": 2.7221391248196206e-06,
      "loss": 0.0118,
      "step": 2229020
    },
    {
      "epoch": 3.6478728487919194,
      "grad_norm": 0.1924624890089035,
      "learning_rate": 2.7220732326061034e-06,
      "loss": 0.0185,
      "step": 2229040
    },
    {
      "epoch": 3.647905579230573,
      "grad_norm": 0.36825451254844666,
      "learning_rate": 2.722007340392586e-06,
      "loss": 0.0098,
      "step": 2229060
    },
    {
      "epoch": 3.647938309669226,
      "grad_norm": 0.1691502332687378,
      "learning_rate": 2.721941448179069e-06,
      "loss": 0.0122,
      "step": 2229080
    },
    {
      "epoch": 3.6479710401078798,
      "grad_norm": 0.2768721878528595,
      "learning_rate": 2.721875555965552e-06,
      "loss": 0.0134,
      "step": 2229100
    },
    {
      "epoch": 3.648003770546533,
      "grad_norm": 0.20192895829677582,
      "learning_rate": 2.7218096637520347e-06,
      "loss": 0.0096,
      "step": 2229120
    },
    {
      "epoch": 3.648036500985186,
      "grad_norm": 0.42491796612739563,
      "learning_rate": 2.7217437715385175e-06,
      "loss": 0.0138,
      "step": 2229140
    },
    {
      "epoch": 3.6480692314238397,
      "grad_norm": 0.3994576036930084,
      "learning_rate": 2.7216778793250002e-06,
      "loss": 0.0119,
      "step": 2229160
    },
    {
      "epoch": 3.648101961862493,
      "grad_norm": 0.3206811547279358,
      "learning_rate": 2.7216119871114834e-06,
      "loss": 0.0089,
      "step": 2229180
    },
    {
      "epoch": 3.648134692301146,
      "grad_norm": 0.2464005947113037,
      "learning_rate": 2.721546094897966e-06,
      "loss": 0.0114,
      "step": 2229200
    },
    {
      "epoch": 3.6481674227397995,
      "grad_norm": 0.04987575113773346,
      "learning_rate": 2.721480202684449e-06,
      "loss": 0.0129,
      "step": 2229220
    },
    {
      "epoch": 3.648200153178453,
      "grad_norm": 0.044438544660806656,
      "learning_rate": 2.7214143104709316e-06,
      "loss": 0.0155,
      "step": 2229240
    },
    {
      "epoch": 3.6482328836171063,
      "grad_norm": 0.11477874964475632,
      "learning_rate": 2.7213484182574148e-06,
      "loss": 0.014,
      "step": 2229260
    },
    {
      "epoch": 3.6482656140557594,
      "grad_norm": 0.29326897859573364,
      "learning_rate": 2.7212825260438975e-06,
      "loss": 0.0088,
      "step": 2229280
    },
    {
      "epoch": 3.648298344494413,
      "grad_norm": 0.22384563088417053,
      "learning_rate": 2.7212166338303803e-06,
      "loss": 0.009,
      "step": 2229300
    },
    {
      "epoch": 3.648331074933066,
      "grad_norm": 0.6116023063659668,
      "learning_rate": 2.721150741616863e-06,
      "loss": 0.012,
      "step": 2229320
    },
    {
      "epoch": 3.6483638053717193,
      "grad_norm": 0.35059189796447754,
      "learning_rate": 2.7210848494033466e-06,
      "loss": 0.0083,
      "step": 2229340
    },
    {
      "epoch": 3.648396535810373,
      "grad_norm": 0.09949289262294769,
      "learning_rate": 2.7210189571898293e-06,
      "loss": 0.015,
      "step": 2229360
    },
    {
      "epoch": 3.6484292662490265,
      "grad_norm": 0.3121131956577301,
      "learning_rate": 2.720953064976312e-06,
      "loss": 0.0093,
      "step": 2229380
    },
    {
      "epoch": 3.6484619966876797,
      "grad_norm": 0.968534529209137,
      "learning_rate": 2.7208871727627952e-06,
      "loss": 0.0164,
      "step": 2229400
    },
    {
      "epoch": 3.648494727126333,
      "grad_norm": 0.2241567224264145,
      "learning_rate": 2.720821280549278e-06,
      "loss": 0.0144,
      "step": 2229420
    },
    {
      "epoch": 3.6485274575649864,
      "grad_norm": 0.1058192327618599,
      "learning_rate": 2.7207553883357607e-06,
      "loss": 0.011,
      "step": 2229440
    },
    {
      "epoch": 3.6485601880036396,
      "grad_norm": 0.21359415352344513,
      "learning_rate": 2.7206894961222434e-06,
      "loss": 0.0126,
      "step": 2229460
    },
    {
      "epoch": 3.6485929184422927,
      "grad_norm": 0.31129735708236694,
      "learning_rate": 2.720623603908726e-06,
      "loss": 0.0111,
      "step": 2229480
    },
    {
      "epoch": 3.6486256488809463,
      "grad_norm": 0.13948467373847961,
      "learning_rate": 2.7205577116952093e-06,
      "loss": 0.0073,
      "step": 2229500
    },
    {
      "epoch": 3.6486583793196,
      "grad_norm": 0.7995581030845642,
      "learning_rate": 2.720491819481692e-06,
      "loss": 0.0077,
      "step": 2229520
    },
    {
      "epoch": 3.648691109758253,
      "grad_norm": 0.19220459461212158,
      "learning_rate": 2.720425927268175e-06,
      "loss": 0.0084,
      "step": 2229540
    },
    {
      "epoch": 3.648723840196906,
      "grad_norm": 0.21572595834732056,
      "learning_rate": 2.7203600350546576e-06,
      "loss": 0.012,
      "step": 2229560
    },
    {
      "epoch": 3.64875657063556,
      "grad_norm": 0.5420714616775513,
      "learning_rate": 2.7202941428411407e-06,
      "loss": 0.0119,
      "step": 2229580
    },
    {
      "epoch": 3.648789301074213,
      "grad_norm": 0.2049098163843155,
      "learning_rate": 2.7202282506276235e-06,
      "loss": 0.0084,
      "step": 2229600
    },
    {
      "epoch": 3.648822031512866,
      "grad_norm": 0.1898771971464157,
      "learning_rate": 2.720162358414106e-06,
      "loss": 0.0119,
      "step": 2229620
    },
    {
      "epoch": 3.6488547619515197,
      "grad_norm": 0.6412110328674316,
      "learning_rate": 2.720096466200589e-06,
      "loss": 0.0152,
      "step": 2229640
    },
    {
      "epoch": 3.648887492390173,
      "grad_norm": 0.07047615945339203,
      "learning_rate": 2.720030573987072e-06,
      "loss": 0.0095,
      "step": 2229660
    },
    {
      "epoch": 3.6489202228288264,
      "grad_norm": 0.11268749088048935,
      "learning_rate": 2.719964681773555e-06,
      "loss": 0.0077,
      "step": 2229680
    },
    {
      "epoch": 3.6489529532674796,
      "grad_norm": 0.2754870653152466,
      "learning_rate": 2.719898789560038e-06,
      "loss": 0.0094,
      "step": 2229700
    },
    {
      "epoch": 3.648985683706133,
      "grad_norm": 0.08916931599378586,
      "learning_rate": 2.719832897346521e-06,
      "loss": 0.0083,
      "step": 2229720
    },
    {
      "epoch": 3.6490184141447863,
      "grad_norm": 0.5369783043861389,
      "learning_rate": 2.719767005133004e-06,
      "loss": 0.0122,
      "step": 2229740
    },
    {
      "epoch": 3.6490511445834395,
      "grad_norm": 0.34578537940979004,
      "learning_rate": 2.7197011129194867e-06,
      "loss": 0.0158,
      "step": 2229760
    },
    {
      "epoch": 3.649083875022093,
      "grad_norm": 0.25507232546806335,
      "learning_rate": 2.7196352207059694e-06,
      "loss": 0.0098,
      "step": 2229780
    },
    {
      "epoch": 3.6491166054607462,
      "grad_norm": 0.5207188725471497,
      "learning_rate": 2.7195693284924526e-06,
      "loss": 0.0115,
      "step": 2229800
    },
    {
      "epoch": 3.6491493358994,
      "grad_norm": 0.25226786732673645,
      "learning_rate": 2.7195034362789353e-06,
      "loss": 0.011,
      "step": 2229820
    },
    {
      "epoch": 3.649182066338053,
      "grad_norm": 0.615484356880188,
      "learning_rate": 2.719437544065418e-06,
      "loss": 0.0093,
      "step": 2229840
    },
    {
      "epoch": 3.6492147967767066,
      "grad_norm": 0.1789376437664032,
      "learning_rate": 2.7193716518519008e-06,
      "loss": 0.0103,
      "step": 2229860
    },
    {
      "epoch": 3.6492475272153597,
      "grad_norm": 0.3978828191757202,
      "learning_rate": 2.7193057596383835e-06,
      "loss": 0.0093,
      "step": 2229880
    },
    {
      "epoch": 3.649280257654013,
      "grad_norm": 0.1376197338104248,
      "learning_rate": 2.7192398674248667e-06,
      "loss": 0.0101,
      "step": 2229900
    },
    {
      "epoch": 3.6493129880926665,
      "grad_norm": 0.23588153719902039,
      "learning_rate": 2.7191739752113494e-06,
      "loss": 0.0136,
      "step": 2229920
    },
    {
      "epoch": 3.6493457185313196,
      "grad_norm": 0.31312671303749084,
      "learning_rate": 2.719108082997832e-06,
      "loss": 0.0124,
      "step": 2229940
    },
    {
      "epoch": 3.649378448969973,
      "grad_norm": 0.19326664507389069,
      "learning_rate": 2.719042190784315e-06,
      "loss": 0.0104,
      "step": 2229960
    },
    {
      "epoch": 3.6494111794086264,
      "grad_norm": 0.19203226268291473,
      "learning_rate": 2.718976298570798e-06,
      "loss": 0.0171,
      "step": 2229980
    },
    {
      "epoch": 3.64944390984728,
      "grad_norm": 0.23713570833206177,
      "learning_rate": 2.718910406357281e-06,
      "loss": 0.0088,
      "step": 2230000
    },
    {
      "epoch": 3.649476640285933,
      "grad_norm": 0.2617802321910858,
      "learning_rate": 2.7188445141437635e-06,
      "loss": 0.0141,
      "step": 2230020
    },
    {
      "epoch": 3.6495093707245863,
      "grad_norm": 0.1937902569770813,
      "learning_rate": 2.718778621930247e-06,
      "loss": 0.0108,
      "step": 2230040
    },
    {
      "epoch": 3.64954210116324,
      "grad_norm": 0.1407538801431656,
      "learning_rate": 2.71871272971673e-06,
      "loss": 0.0089,
      "step": 2230060
    },
    {
      "epoch": 3.649574831601893,
      "grad_norm": 0.20208857953548431,
      "learning_rate": 2.7186468375032126e-06,
      "loss": 0.0113,
      "step": 2230080
    },
    {
      "epoch": 3.6496075620405466,
      "grad_norm": 0.12080869823694229,
      "learning_rate": 2.7185809452896953e-06,
      "loss": 0.0139,
      "step": 2230100
    },
    {
      "epoch": 3.6496402924791997,
      "grad_norm": 0.14560241997241974,
      "learning_rate": 2.7185150530761785e-06,
      "loss": 0.0121,
      "step": 2230120
    },
    {
      "epoch": 3.6496730229178533,
      "grad_norm": 0.6138115525245667,
      "learning_rate": 2.7184491608626613e-06,
      "loss": 0.0134,
      "step": 2230140
    },
    {
      "epoch": 3.6497057533565065,
      "grad_norm": 0.22409510612487793,
      "learning_rate": 2.718383268649144e-06,
      "loss": 0.0129,
      "step": 2230160
    },
    {
      "epoch": 3.6497384837951596,
      "grad_norm": 1.3019280433654785,
      "learning_rate": 2.7183173764356267e-06,
      "loss": 0.0106,
      "step": 2230180
    },
    {
      "epoch": 3.6497712142338132,
      "grad_norm": 0.3123781383037567,
      "learning_rate": 2.71825148422211e-06,
      "loss": 0.0121,
      "step": 2230200
    },
    {
      "epoch": 3.6498039446724664,
      "grad_norm": 0.317960649728775,
      "learning_rate": 2.7181855920085926e-06,
      "loss": 0.0134,
      "step": 2230220
    },
    {
      "epoch": 3.64983667511112,
      "grad_norm": 0.5100091099739075,
      "learning_rate": 2.7181196997950754e-06,
      "loss": 0.0106,
      "step": 2230240
    },
    {
      "epoch": 3.649869405549773,
      "grad_norm": 0.30321380496025085,
      "learning_rate": 2.718053807581558e-06,
      "loss": 0.0118,
      "step": 2230260
    },
    {
      "epoch": 3.6499021359884267,
      "grad_norm": 0.36228272318840027,
      "learning_rate": 2.7179879153680413e-06,
      "loss": 0.0093,
      "step": 2230280
    },
    {
      "epoch": 3.64993486642708,
      "grad_norm": 0.5088090300559998,
      "learning_rate": 2.717922023154524e-06,
      "loss": 0.0076,
      "step": 2230300
    },
    {
      "epoch": 3.649967596865733,
      "grad_norm": 0.23878607153892517,
      "learning_rate": 2.7178561309410068e-06,
      "loss": 0.0128,
      "step": 2230320
    },
    {
      "epoch": 3.6500003273043866,
      "grad_norm": 0.3999108076095581,
      "learning_rate": 2.7177902387274895e-06,
      "loss": 0.0102,
      "step": 2230340
    },
    {
      "epoch": 3.6500330577430398,
      "grad_norm": 0.14951619505882263,
      "learning_rate": 2.7177243465139722e-06,
      "loss": 0.0124,
      "step": 2230360
    },
    {
      "epoch": 3.6500657881816934,
      "grad_norm": 0.198643296957016,
      "learning_rate": 2.7176584543004554e-06,
      "loss": 0.0077,
      "step": 2230380
    },
    {
      "epoch": 3.6500985186203465,
      "grad_norm": 0.0763615071773529,
      "learning_rate": 2.7175925620869386e-06,
      "loss": 0.0089,
      "step": 2230400
    },
    {
      "epoch": 3.650131249059,
      "grad_norm": 0.19386734068393707,
      "learning_rate": 2.7175266698734213e-06,
      "loss": 0.0134,
      "step": 2230420
    },
    {
      "epoch": 3.6501639794976533,
      "grad_norm": 0.1519881784915924,
      "learning_rate": 2.7174607776599045e-06,
      "loss": 0.0148,
      "step": 2230440
    },
    {
      "epoch": 3.6501967099363064,
      "grad_norm": 0.23048244416713715,
      "learning_rate": 2.717394885446387e-06,
      "loss": 0.0113,
      "step": 2230460
    },
    {
      "epoch": 3.65022944037496,
      "grad_norm": 0.24178744852542877,
      "learning_rate": 2.71732899323287e-06,
      "loss": 0.0101,
      "step": 2230480
    },
    {
      "epoch": 3.650262170813613,
      "grad_norm": 0.5406452417373657,
      "learning_rate": 2.7172631010193527e-06,
      "loss": 0.0125,
      "step": 2230500
    },
    {
      "epoch": 3.6502949012522667,
      "grad_norm": 1.1436591148376465,
      "learning_rate": 2.717197208805836e-06,
      "loss": 0.0119,
      "step": 2230520
    },
    {
      "epoch": 3.65032763169092,
      "grad_norm": 0.34430646896362305,
      "learning_rate": 2.7171313165923186e-06,
      "loss": 0.0123,
      "step": 2230540
    },
    {
      "epoch": 3.6503603621295735,
      "grad_norm": 0.18040989339351654,
      "learning_rate": 2.7170654243788013e-06,
      "loss": 0.0133,
      "step": 2230560
    },
    {
      "epoch": 3.6503930925682266,
      "grad_norm": 1.7290723323822021,
      "learning_rate": 2.716999532165284e-06,
      "loss": 0.0166,
      "step": 2230580
    },
    {
      "epoch": 3.65042582300688,
      "grad_norm": 0.5039363503456116,
      "learning_rate": 2.7169336399517672e-06,
      "loss": 0.0107,
      "step": 2230600
    },
    {
      "epoch": 3.6504585534455334,
      "grad_norm": 0.487589031457901,
      "learning_rate": 2.71686774773825e-06,
      "loss": 0.0092,
      "step": 2230620
    },
    {
      "epoch": 3.6504912838841865,
      "grad_norm": 0.10704168677330017,
      "learning_rate": 2.7168018555247327e-06,
      "loss": 0.0106,
      "step": 2230640
    },
    {
      "epoch": 3.6505240143228397,
      "grad_norm": 0.49044308066368103,
      "learning_rate": 2.7167359633112154e-06,
      "loss": 0.011,
      "step": 2230660
    },
    {
      "epoch": 3.6505567447614933,
      "grad_norm": 0.16389420628547668,
      "learning_rate": 2.7166700710976986e-06,
      "loss": 0.0071,
      "step": 2230680
    },
    {
      "epoch": 3.650589475200147,
      "grad_norm": 0.2557843029499054,
      "learning_rate": 2.7166041788841814e-06,
      "loss": 0.0105,
      "step": 2230700
    },
    {
      "epoch": 3.6506222056388,
      "grad_norm": 0.3875090777873993,
      "learning_rate": 2.716538286670664e-06,
      "loss": 0.0093,
      "step": 2230720
    },
    {
      "epoch": 3.650654936077453,
      "grad_norm": 0.18024088442325592,
      "learning_rate": 2.716472394457147e-06,
      "loss": 0.0155,
      "step": 2230740
    },
    {
      "epoch": 3.6506876665161068,
      "grad_norm": 1.322062611579895,
      "learning_rate": 2.7164065022436304e-06,
      "loss": 0.0114,
      "step": 2230760
    },
    {
      "epoch": 3.65072039695476,
      "grad_norm": 0.5014638304710388,
      "learning_rate": 2.716340610030113e-06,
      "loss": 0.0111,
      "step": 2230780
    },
    {
      "epoch": 3.650753127393413,
      "grad_norm": 0.3009452521800995,
      "learning_rate": 2.716274717816596e-06,
      "loss": 0.0062,
      "step": 2230800
    },
    {
      "epoch": 3.6507858578320667,
      "grad_norm": 0.2726190686225891,
      "learning_rate": 2.716208825603079e-06,
      "loss": 0.015,
      "step": 2230820
    },
    {
      "epoch": 3.6508185882707203,
      "grad_norm": 0.4526791274547577,
      "learning_rate": 2.716142933389562e-06,
      "loss": 0.0076,
      "step": 2230840
    },
    {
      "epoch": 3.6508513187093734,
      "grad_norm": 0.16376759111881256,
      "learning_rate": 2.7160770411760445e-06,
      "loss": 0.0136,
      "step": 2230860
    },
    {
      "epoch": 3.6508840491480266,
      "grad_norm": 0.13793568313121796,
      "learning_rate": 2.7160111489625273e-06,
      "loss": 0.0131,
      "step": 2230880
    },
    {
      "epoch": 3.65091677958668,
      "grad_norm": 1.11661696434021,
      "learning_rate": 2.71594525674901e-06,
      "loss": 0.0115,
      "step": 2230900
    },
    {
      "epoch": 3.6509495100253333,
      "grad_norm": 0.2280558943748474,
      "learning_rate": 2.715879364535493e-06,
      "loss": 0.0106,
      "step": 2230920
    },
    {
      "epoch": 3.6509822404639865,
      "grad_norm": 0.2490548938512802,
      "learning_rate": 2.715813472321976e-06,
      "loss": 0.0116,
      "step": 2230940
    },
    {
      "epoch": 3.65101497090264,
      "grad_norm": 0.28640949726104736,
      "learning_rate": 2.7157475801084587e-06,
      "loss": 0.0101,
      "step": 2230960
    },
    {
      "epoch": 3.6510477013412936,
      "grad_norm": 0.4987507462501526,
      "learning_rate": 2.7156816878949414e-06,
      "loss": 0.0114,
      "step": 2230980
    },
    {
      "epoch": 3.651080431779947,
      "grad_norm": 0.231271430850029,
      "learning_rate": 2.7156157956814246e-06,
      "loss": 0.0107,
      "step": 2231000
    },
    {
      "epoch": 3.6511131622186,
      "grad_norm": 0.16389980912208557,
      "learning_rate": 2.7155499034679073e-06,
      "loss": 0.0091,
      "step": 2231020
    },
    {
      "epoch": 3.6511458926572535,
      "grad_norm": 0.39429032802581787,
      "learning_rate": 2.71548401125439e-06,
      "loss": 0.0097,
      "step": 2231040
    },
    {
      "epoch": 3.6511786230959067,
      "grad_norm": 0.1937284767627716,
      "learning_rate": 2.7154181190408728e-06,
      "loss": 0.0136,
      "step": 2231060
    },
    {
      "epoch": 3.65121135353456,
      "grad_norm": 0.09867417812347412,
      "learning_rate": 2.715352226827356e-06,
      "loss": 0.0166,
      "step": 2231080
    },
    {
      "epoch": 3.6512440839732134,
      "grad_norm": 0.14643646776676178,
      "learning_rate": 2.715286334613839e-06,
      "loss": 0.0076,
      "step": 2231100
    },
    {
      "epoch": 3.651276814411867,
      "grad_norm": 0.08845487982034683,
      "learning_rate": 2.715220442400322e-06,
      "loss": 0.0097,
      "step": 2231120
    },
    {
      "epoch": 3.65130954485052,
      "grad_norm": 0.4196142852306366,
      "learning_rate": 2.715154550186805e-06,
      "loss": 0.0188,
      "step": 2231140
    },
    {
      "epoch": 3.6513422752891733,
      "grad_norm": 0.5073742270469666,
      "learning_rate": 2.7150886579732878e-06,
      "loss": 0.0124,
      "step": 2231160
    },
    {
      "epoch": 3.651375005727827,
      "grad_norm": 0.07097555696964264,
      "learning_rate": 2.7150227657597705e-06,
      "loss": 0.0073,
      "step": 2231180
    },
    {
      "epoch": 3.65140773616648,
      "grad_norm": 0.2595181167125702,
      "learning_rate": 2.7149568735462532e-06,
      "loss": 0.0147,
      "step": 2231200
    },
    {
      "epoch": 3.651440466605133,
      "grad_norm": 0.107545405626297,
      "learning_rate": 2.7148909813327364e-06,
      "loss": 0.0112,
      "step": 2231220
    },
    {
      "epoch": 3.651473197043787,
      "grad_norm": 0.11283989250659943,
      "learning_rate": 2.714825089119219e-06,
      "loss": 0.0085,
      "step": 2231240
    },
    {
      "epoch": 3.65150592748244,
      "grad_norm": 0.9866945743560791,
      "learning_rate": 2.714759196905702e-06,
      "loss": 0.0175,
      "step": 2231260
    },
    {
      "epoch": 3.6515386579210936,
      "grad_norm": 0.40196505188941956,
      "learning_rate": 2.7146933046921846e-06,
      "loss": 0.0114,
      "step": 2231280
    },
    {
      "epoch": 3.6515713883597467,
      "grad_norm": 0.1133200079202652,
      "learning_rate": 2.7146274124786674e-06,
      "loss": 0.0095,
      "step": 2231300
    },
    {
      "epoch": 3.6516041187984003,
      "grad_norm": 0.20574288070201874,
      "learning_rate": 2.7145615202651505e-06,
      "loss": 0.0095,
      "step": 2231320
    },
    {
      "epoch": 3.6516368492370535,
      "grad_norm": 0.43460819125175476,
      "learning_rate": 2.7144956280516333e-06,
      "loss": 0.0111,
      "step": 2231340
    },
    {
      "epoch": 3.6516695796757066,
      "grad_norm": 0.6850241422653198,
      "learning_rate": 2.714429735838116e-06,
      "loss": 0.0131,
      "step": 2231360
    },
    {
      "epoch": 3.65170231011436,
      "grad_norm": 0.08888114243745804,
      "learning_rate": 2.7143638436245987e-06,
      "loss": 0.0104,
      "step": 2231380
    },
    {
      "epoch": 3.6517350405530133,
      "grad_norm": 0.2381337434053421,
      "learning_rate": 2.714297951411082e-06,
      "loss": 0.0097,
      "step": 2231400
    },
    {
      "epoch": 3.651767770991667,
      "grad_norm": 0.19214311242103577,
      "learning_rate": 2.7142320591975646e-06,
      "loss": 0.009,
      "step": 2231420
    },
    {
      "epoch": 3.65180050143032,
      "grad_norm": 0.16867764294147491,
      "learning_rate": 2.7141661669840474e-06,
      "loss": 0.0069,
      "step": 2231440
    },
    {
      "epoch": 3.6518332318689737,
      "grad_norm": 0.4723166823387146,
      "learning_rate": 2.714100274770531e-06,
      "loss": 0.0106,
      "step": 2231460
    },
    {
      "epoch": 3.651865962307627,
      "grad_norm": 0.536619246006012,
      "learning_rate": 2.7140343825570137e-06,
      "loss": 0.0105,
      "step": 2231480
    },
    {
      "epoch": 3.65189869274628,
      "grad_norm": 0.5748001337051392,
      "learning_rate": 2.7139684903434964e-06,
      "loss": 0.0101,
      "step": 2231500
    },
    {
      "epoch": 3.6519314231849336,
      "grad_norm": 0.6171906590461731,
      "learning_rate": 2.713902598129979e-06,
      "loss": 0.0138,
      "step": 2231520
    },
    {
      "epoch": 3.6519641536235867,
      "grad_norm": 0.1720847636461258,
      "learning_rate": 2.7138367059164624e-06,
      "loss": 0.0107,
      "step": 2231540
    },
    {
      "epoch": 3.6519968840622403,
      "grad_norm": 0.27073004841804504,
      "learning_rate": 2.713770813702945e-06,
      "loss": 0.013,
      "step": 2231560
    },
    {
      "epoch": 3.6520296145008935,
      "grad_norm": 0.17772312462329865,
      "learning_rate": 2.713704921489428e-06,
      "loss": 0.0106,
      "step": 2231580
    },
    {
      "epoch": 3.652062344939547,
      "grad_norm": 0.6620407104492188,
      "learning_rate": 2.7136390292759106e-06,
      "loss": 0.0123,
      "step": 2231600
    },
    {
      "epoch": 3.6520950753782,
      "grad_norm": 0.33189094066619873,
      "learning_rate": 2.7135731370623937e-06,
      "loss": 0.0075,
      "step": 2231620
    },
    {
      "epoch": 3.6521278058168534,
      "grad_norm": 0.6016626954078674,
      "learning_rate": 2.7135072448488765e-06,
      "loss": 0.0127,
      "step": 2231640
    },
    {
      "epoch": 3.652160536255507,
      "grad_norm": 0.22305595874786377,
      "learning_rate": 2.7134413526353592e-06,
      "loss": 0.0134,
      "step": 2231660
    },
    {
      "epoch": 3.65219326669416,
      "grad_norm": 0.10289584845304489,
      "learning_rate": 2.713375460421842e-06,
      "loss": 0.0119,
      "step": 2231680
    },
    {
      "epoch": 3.6522259971328137,
      "grad_norm": 0.3355473279953003,
      "learning_rate": 2.713309568208325e-06,
      "loss": 0.0096,
      "step": 2231700
    },
    {
      "epoch": 3.652258727571467,
      "grad_norm": 0.8362483382225037,
      "learning_rate": 2.713243675994808e-06,
      "loss": 0.0137,
      "step": 2231720
    },
    {
      "epoch": 3.6522914580101205,
      "grad_norm": 0.15794126689434052,
      "learning_rate": 2.7131777837812906e-06,
      "loss": 0.0119,
      "step": 2231740
    },
    {
      "epoch": 3.6523241884487736,
      "grad_norm": 0.1263623833656311,
      "learning_rate": 2.7131118915677733e-06,
      "loss": 0.0107,
      "step": 2231760
    },
    {
      "epoch": 3.6523569188874268,
      "grad_norm": 0.13152585923671722,
      "learning_rate": 2.713045999354256e-06,
      "loss": 0.0101,
      "step": 2231780
    },
    {
      "epoch": 3.6523896493260803,
      "grad_norm": 0.7770116329193115,
      "learning_rate": 2.7129801071407397e-06,
      "loss": 0.0137,
      "step": 2231800
    },
    {
      "epoch": 3.6524223797647335,
      "grad_norm": 0.29970523715019226,
      "learning_rate": 2.7129142149272224e-06,
      "loss": 0.0143,
      "step": 2231820
    },
    {
      "epoch": 3.652455110203387,
      "grad_norm": 0.37021833658218384,
      "learning_rate": 2.712848322713705e-06,
      "loss": 0.0049,
      "step": 2231840
    },
    {
      "epoch": 3.6524878406420402,
      "grad_norm": 0.32800546288490295,
      "learning_rate": 2.7127824305001883e-06,
      "loss": 0.0125,
      "step": 2231860
    },
    {
      "epoch": 3.652520571080694,
      "grad_norm": 0.05473700910806656,
      "learning_rate": 2.712716538286671e-06,
      "loss": 0.0124,
      "step": 2231880
    },
    {
      "epoch": 3.652553301519347,
      "grad_norm": 0.16549524664878845,
      "learning_rate": 2.7126506460731538e-06,
      "loss": 0.0079,
      "step": 2231900
    },
    {
      "epoch": 3.652586031958,
      "grad_norm": 0.10974343866109848,
      "learning_rate": 2.7125847538596365e-06,
      "loss": 0.0144,
      "step": 2231920
    },
    {
      "epoch": 3.6526187623966537,
      "grad_norm": 0.4214002192020416,
      "learning_rate": 2.7125188616461197e-06,
      "loss": 0.008,
      "step": 2231940
    },
    {
      "epoch": 3.652651492835307,
      "grad_norm": 0.3019602298736572,
      "learning_rate": 2.7124529694326024e-06,
      "loss": 0.0099,
      "step": 2231960
    },
    {
      "epoch": 3.6526842232739605,
      "grad_norm": 0.17237862944602966,
      "learning_rate": 2.712387077219085e-06,
      "loss": 0.0122,
      "step": 2231980
    },
    {
      "epoch": 3.6527169537126136,
      "grad_norm": 0.2664945721626282,
      "learning_rate": 2.712321185005568e-06,
      "loss": 0.0127,
      "step": 2232000
    },
    {
      "epoch": 3.652749684151267,
      "grad_norm": 0.330098032951355,
      "learning_rate": 2.712255292792051e-06,
      "loss": 0.0155,
      "step": 2232020
    },
    {
      "epoch": 3.6527824145899204,
      "grad_norm": 0.3918077349662781,
      "learning_rate": 2.712189400578534e-06,
      "loss": 0.0156,
      "step": 2232040
    },
    {
      "epoch": 3.6528151450285735,
      "grad_norm": 0.28896018862724304,
      "learning_rate": 2.7121235083650165e-06,
      "loss": 0.0096,
      "step": 2232060
    },
    {
      "epoch": 3.652847875467227,
      "grad_norm": 0.19323942065238953,
      "learning_rate": 2.7120576161514993e-06,
      "loss": 0.0171,
      "step": 2232080
    },
    {
      "epoch": 3.6528806059058803,
      "grad_norm": 0.29452309012413025,
      "learning_rate": 2.7119917239379825e-06,
      "loss": 0.0095,
      "step": 2232100
    },
    {
      "epoch": 3.6529133363445334,
      "grad_norm": 0.10999973863363266,
      "learning_rate": 2.711925831724465e-06,
      "loss": 0.0096,
      "step": 2232120
    },
    {
      "epoch": 3.652946066783187,
      "grad_norm": 0.14829589426517487,
      "learning_rate": 2.711859939510948e-06,
      "loss": 0.0124,
      "step": 2232140
    },
    {
      "epoch": 3.6529787972218406,
      "grad_norm": 0.33097243309020996,
      "learning_rate": 2.7117940472974315e-06,
      "loss": 0.0131,
      "step": 2232160
    },
    {
      "epoch": 3.6530115276604938,
      "grad_norm": 0.8162348866462708,
      "learning_rate": 2.7117281550839143e-06,
      "loss": 0.0127,
      "step": 2232180
    },
    {
      "epoch": 3.653044258099147,
      "grad_norm": 0.4131467044353485,
      "learning_rate": 2.711662262870397e-06,
      "loss": 0.0093,
      "step": 2232200
    },
    {
      "epoch": 3.6530769885378005,
      "grad_norm": 0.25596243143081665,
      "learning_rate": 2.7115963706568797e-06,
      "loss": 0.0129,
      "step": 2232220
    },
    {
      "epoch": 3.6531097189764536,
      "grad_norm": 0.3686676323413849,
      "learning_rate": 2.711530478443363e-06,
      "loss": 0.0102,
      "step": 2232240
    },
    {
      "epoch": 3.653142449415107,
      "grad_norm": 0.15867236256599426,
      "learning_rate": 2.7114645862298456e-06,
      "loss": 0.0154,
      "step": 2232260
    },
    {
      "epoch": 3.6531751798537604,
      "grad_norm": 0.07914610952138901,
      "learning_rate": 2.7113986940163284e-06,
      "loss": 0.0087,
      "step": 2232280
    },
    {
      "epoch": 3.653207910292414,
      "grad_norm": 0.2722782492637634,
      "learning_rate": 2.711332801802811e-06,
      "loss": 0.0088,
      "step": 2232300
    },
    {
      "epoch": 3.653240640731067,
      "grad_norm": 0.12387043237686157,
      "learning_rate": 2.711266909589294e-06,
      "loss": 0.0103,
      "step": 2232320
    },
    {
      "epoch": 3.6532733711697203,
      "grad_norm": 0.2331782877445221,
      "learning_rate": 2.711201017375777e-06,
      "loss": 0.0093,
      "step": 2232340
    },
    {
      "epoch": 3.653306101608374,
      "grad_norm": 0.9299818873405457,
      "learning_rate": 2.7111351251622598e-06,
      "loss": 0.0139,
      "step": 2232360
    },
    {
      "epoch": 3.653338832047027,
      "grad_norm": 0.27591824531555176,
      "learning_rate": 2.7110692329487425e-06,
      "loss": 0.0135,
      "step": 2232380
    },
    {
      "epoch": 3.65337156248568,
      "grad_norm": 0.07554257661104202,
      "learning_rate": 2.7110033407352252e-06,
      "loss": 0.0109,
      "step": 2232400
    },
    {
      "epoch": 3.6534042929243338,
      "grad_norm": 0.061204999685287476,
      "learning_rate": 2.7109374485217084e-06,
      "loss": 0.0112,
      "step": 2232420
    },
    {
      "epoch": 3.6534370233629874,
      "grad_norm": 0.11067117750644684,
      "learning_rate": 2.710871556308191e-06,
      "loss": 0.0101,
      "step": 2232440
    },
    {
      "epoch": 3.6534697538016405,
      "grad_norm": 0.1444423645734787,
      "learning_rate": 2.710805664094674e-06,
      "loss": 0.0091,
      "step": 2232460
    },
    {
      "epoch": 3.6535024842402937,
      "grad_norm": 0.5295807719230652,
      "learning_rate": 2.7107397718811566e-06,
      "loss": 0.0091,
      "step": 2232480
    },
    {
      "epoch": 3.6535352146789473,
      "grad_norm": 0.1894172728061676,
      "learning_rate": 2.71067387966764e-06,
      "loss": 0.0132,
      "step": 2232500
    },
    {
      "epoch": 3.6535679451176004,
      "grad_norm": 0.16940844058990479,
      "learning_rate": 2.710607987454123e-06,
      "loss": 0.0107,
      "step": 2232520
    },
    {
      "epoch": 3.6536006755562536,
      "grad_norm": 0.8038338422775269,
      "learning_rate": 2.7105420952406057e-06,
      "loss": 0.0097,
      "step": 2232540
    },
    {
      "epoch": 3.653633405994907,
      "grad_norm": 0.46836355328559875,
      "learning_rate": 2.710476203027089e-06,
      "loss": 0.0094,
      "step": 2232560
    },
    {
      "epoch": 3.6536661364335608,
      "grad_norm": 0.32824885845184326,
      "learning_rate": 2.7104103108135716e-06,
      "loss": 0.0129,
      "step": 2232580
    },
    {
      "epoch": 3.653698866872214,
      "grad_norm": 1.8423352241516113,
      "learning_rate": 2.7103444186000543e-06,
      "loss": 0.0116,
      "step": 2232600
    },
    {
      "epoch": 3.653731597310867,
      "grad_norm": 0.3019411265850067,
      "learning_rate": 2.710278526386537e-06,
      "loss": 0.0122,
      "step": 2232620
    },
    {
      "epoch": 3.6537643277495206,
      "grad_norm": 0.26624053716659546,
      "learning_rate": 2.7102126341730202e-06,
      "loss": 0.0109,
      "step": 2232640
    },
    {
      "epoch": 3.653797058188174,
      "grad_norm": 0.5908321142196655,
      "learning_rate": 2.710146741959503e-06,
      "loss": 0.0124,
      "step": 2232660
    },
    {
      "epoch": 3.653829788626827,
      "grad_norm": 0.11447926610708237,
      "learning_rate": 2.7100808497459857e-06,
      "loss": 0.0107,
      "step": 2232680
    },
    {
      "epoch": 3.6538625190654805,
      "grad_norm": 0.5670217275619507,
      "learning_rate": 2.7100149575324685e-06,
      "loss": 0.0144,
      "step": 2232700
    },
    {
      "epoch": 3.6538952495041337,
      "grad_norm": 0.4588881731033325,
      "learning_rate": 2.7099490653189516e-06,
      "loss": 0.012,
      "step": 2232720
    },
    {
      "epoch": 3.6539279799427873,
      "grad_norm": 0.2313949018716812,
      "learning_rate": 2.7098831731054344e-06,
      "loss": 0.011,
      "step": 2232740
    },
    {
      "epoch": 3.6539607103814404,
      "grad_norm": 0.3977947235107422,
      "learning_rate": 2.709817280891917e-06,
      "loss": 0.0088,
      "step": 2232760
    },
    {
      "epoch": 3.653993440820094,
      "grad_norm": 0.3994247317314148,
      "learning_rate": 2.7097513886784e-06,
      "loss": 0.0106,
      "step": 2232780
    },
    {
      "epoch": 3.654026171258747,
      "grad_norm": 0.559421718120575,
      "learning_rate": 2.7096854964648826e-06,
      "loss": 0.012,
      "step": 2232800
    },
    {
      "epoch": 3.6540589016974003,
      "grad_norm": 0.03790142759680748,
      "learning_rate": 2.7096196042513657e-06,
      "loss": 0.013,
      "step": 2232820
    },
    {
      "epoch": 3.654091632136054,
      "grad_norm": 0.13966725766658783,
      "learning_rate": 2.7095537120378485e-06,
      "loss": 0.0097,
      "step": 2232840
    },
    {
      "epoch": 3.654124362574707,
      "grad_norm": 0.17818234860897064,
      "learning_rate": 2.7094878198243316e-06,
      "loss": 0.0099,
      "step": 2232860
    },
    {
      "epoch": 3.6541570930133607,
      "grad_norm": 0.20823904871940613,
      "learning_rate": 2.709421927610815e-06,
      "loss": 0.0135,
      "step": 2232880
    },
    {
      "epoch": 3.654189823452014,
      "grad_norm": 0.2577720582485199,
      "learning_rate": 2.7093560353972975e-06,
      "loss": 0.0082,
      "step": 2232900
    },
    {
      "epoch": 3.6542225538906674,
      "grad_norm": 0.12629052996635437,
      "learning_rate": 2.7092901431837803e-06,
      "loss": 0.0099,
      "step": 2232920
    },
    {
      "epoch": 3.6542552843293206,
      "grad_norm": 0.6384558081626892,
      "learning_rate": 2.709224250970263e-06,
      "loss": 0.0131,
      "step": 2232940
    },
    {
      "epoch": 3.6542880147679737,
      "grad_norm": 1.4267431497573853,
      "learning_rate": 2.709158358756746e-06,
      "loss": 0.0143,
      "step": 2232960
    },
    {
      "epoch": 3.6543207452066273,
      "grad_norm": 0.11582878232002258,
      "learning_rate": 2.709092466543229e-06,
      "loss": 0.0107,
      "step": 2232980
    },
    {
      "epoch": 3.6543534756452805,
      "grad_norm": 0.14154723286628723,
      "learning_rate": 2.7090265743297117e-06,
      "loss": 0.0098,
      "step": 2233000
    },
    {
      "epoch": 3.654386206083934,
      "grad_norm": 0.5225620269775391,
      "learning_rate": 2.7089606821161944e-06,
      "loss": 0.0108,
      "step": 2233020
    },
    {
      "epoch": 3.654418936522587,
      "grad_norm": 0.18306508660316467,
      "learning_rate": 2.7088947899026776e-06,
      "loss": 0.0127,
      "step": 2233040
    },
    {
      "epoch": 3.654451666961241,
      "grad_norm": 0.18377497792243958,
      "learning_rate": 2.7088288976891603e-06,
      "loss": 0.0088,
      "step": 2233060
    },
    {
      "epoch": 3.654484397399894,
      "grad_norm": 0.20357586443424225,
      "learning_rate": 2.708763005475643e-06,
      "loss": 0.0073,
      "step": 2233080
    },
    {
      "epoch": 3.654517127838547,
      "grad_norm": 0.2756598889827728,
      "learning_rate": 2.708697113262126e-06,
      "loss": 0.0085,
      "step": 2233100
    },
    {
      "epoch": 3.6545498582772007,
      "grad_norm": 0.35474392771720886,
      "learning_rate": 2.708631221048609e-06,
      "loss": 0.0095,
      "step": 2233120
    },
    {
      "epoch": 3.654582588715854,
      "grad_norm": 0.4312019348144531,
      "learning_rate": 2.7085653288350917e-06,
      "loss": 0.0121,
      "step": 2233140
    },
    {
      "epoch": 3.6546153191545074,
      "grad_norm": 0.14682504534721375,
      "learning_rate": 2.7084994366215744e-06,
      "loss": 0.0132,
      "step": 2233160
    },
    {
      "epoch": 3.6546480495931606,
      "grad_norm": 0.23277832567691803,
      "learning_rate": 2.708433544408057e-06,
      "loss": 0.0178,
      "step": 2233180
    },
    {
      "epoch": 3.654680780031814,
      "grad_norm": 0.11302512139081955,
      "learning_rate": 2.70836765219454e-06,
      "loss": 0.0082,
      "step": 2233200
    },
    {
      "epoch": 3.6547135104704673,
      "grad_norm": 0.4475319981575012,
      "learning_rate": 2.7083017599810235e-06,
      "loss": 0.0078,
      "step": 2233220
    },
    {
      "epoch": 3.6547462409091205,
      "grad_norm": 0.18856783211231232,
      "learning_rate": 2.7082358677675062e-06,
      "loss": 0.0137,
      "step": 2233240
    },
    {
      "epoch": 3.654778971347774,
      "grad_norm": 0.473469614982605,
      "learning_rate": 2.7081699755539894e-06,
      "loss": 0.0117,
      "step": 2233260
    },
    {
      "epoch": 3.6548117017864272,
      "grad_norm": 0.1572270542383194,
      "learning_rate": 2.708104083340472e-06,
      "loss": 0.0106,
      "step": 2233280
    },
    {
      "epoch": 3.654844432225081,
      "grad_norm": 0.4066035747528076,
      "learning_rate": 2.708038191126955e-06,
      "loss": 0.0113,
      "step": 2233300
    },
    {
      "epoch": 3.654877162663734,
      "grad_norm": 0.4329627752304077,
      "learning_rate": 2.7079722989134376e-06,
      "loss": 0.0181,
      "step": 2233320
    },
    {
      "epoch": 3.6549098931023876,
      "grad_norm": 0.11989825963973999,
      "learning_rate": 2.7079064066999204e-06,
      "loss": 0.0078,
      "step": 2233340
    },
    {
      "epoch": 3.6549426235410407,
      "grad_norm": 0.10189393162727356,
      "learning_rate": 2.7078405144864035e-06,
      "loss": 0.0057,
      "step": 2233360
    },
    {
      "epoch": 3.654975353979694,
      "grad_norm": 0.22021764516830444,
      "learning_rate": 2.7077746222728863e-06,
      "loss": 0.0121,
      "step": 2233380
    },
    {
      "epoch": 3.6550080844183475,
      "grad_norm": 0.5535517334938049,
      "learning_rate": 2.707708730059369e-06,
      "loss": 0.0139,
      "step": 2233400
    },
    {
      "epoch": 3.6550408148570006,
      "grad_norm": 0.3353799283504486,
      "learning_rate": 2.7076428378458517e-06,
      "loss": 0.0093,
      "step": 2233420
    },
    {
      "epoch": 3.655073545295654,
      "grad_norm": 0.38363170623779297,
      "learning_rate": 2.707576945632335e-06,
      "loss": 0.0101,
      "step": 2233440
    },
    {
      "epoch": 3.6551062757343074,
      "grad_norm": 0.41723349690437317,
      "learning_rate": 2.7075110534188176e-06,
      "loss": 0.0122,
      "step": 2233460
    },
    {
      "epoch": 3.655139006172961,
      "grad_norm": 0.12707862257957458,
      "learning_rate": 2.7074451612053004e-06,
      "loss": 0.0095,
      "step": 2233480
    },
    {
      "epoch": 3.655171736611614,
      "grad_norm": 0.11313081532716751,
      "learning_rate": 2.707379268991783e-06,
      "loss": 0.0109,
      "step": 2233500
    },
    {
      "epoch": 3.6552044670502672,
      "grad_norm": 0.10407580435276031,
      "learning_rate": 2.7073133767782663e-06,
      "loss": 0.0121,
      "step": 2233520
    },
    {
      "epoch": 3.655237197488921,
      "grad_norm": 0.8184136748313904,
      "learning_rate": 2.707247484564749e-06,
      "loss": 0.0143,
      "step": 2233540
    },
    {
      "epoch": 3.655269927927574,
      "grad_norm": 0.3316667377948761,
      "learning_rate": 2.707181592351232e-06,
      "loss": 0.0117,
      "step": 2233560
    },
    {
      "epoch": 3.6553026583662276,
      "grad_norm": 0.33536723256111145,
      "learning_rate": 2.7071157001377154e-06,
      "loss": 0.0083,
      "step": 2233580
    },
    {
      "epoch": 3.6553353888048807,
      "grad_norm": 0.13096000254154205,
      "learning_rate": 2.707049807924198e-06,
      "loss": 0.0088,
      "step": 2233600
    },
    {
      "epoch": 3.6553681192435343,
      "grad_norm": 0.17326486110687256,
      "learning_rate": 2.706983915710681e-06,
      "loss": 0.0118,
      "step": 2233620
    },
    {
      "epoch": 3.6554008496821875,
      "grad_norm": 0.5765974521636963,
      "learning_rate": 2.7069180234971636e-06,
      "loss": 0.0098,
      "step": 2233640
    },
    {
      "epoch": 3.6554335801208406,
      "grad_norm": 0.12022379040718079,
      "learning_rate": 2.7068521312836467e-06,
      "loss": 0.0094,
      "step": 2233660
    },
    {
      "epoch": 3.6554663105594942,
      "grad_norm": 0.22545838356018066,
      "learning_rate": 2.7067862390701295e-06,
      "loss": 0.0084,
      "step": 2233680
    },
    {
      "epoch": 3.6554990409981474,
      "grad_norm": 0.2898704707622528,
      "learning_rate": 2.7067203468566122e-06,
      "loss": 0.0097,
      "step": 2233700
    },
    {
      "epoch": 3.6555317714368005,
      "grad_norm": 0.7567245364189148,
      "learning_rate": 2.706654454643095e-06,
      "loss": 0.0181,
      "step": 2233720
    },
    {
      "epoch": 3.655564501875454,
      "grad_norm": 0.15896722674369812,
      "learning_rate": 2.7065885624295777e-06,
      "loss": 0.0113,
      "step": 2233740
    },
    {
      "epoch": 3.6555972323141077,
      "grad_norm": 0.16292063891887665,
      "learning_rate": 2.706522670216061e-06,
      "loss": 0.0118,
      "step": 2233760
    },
    {
      "epoch": 3.655629962752761,
      "grad_norm": 0.2045326977968216,
      "learning_rate": 2.7064567780025436e-06,
      "loss": 0.0079,
      "step": 2233780
    },
    {
      "epoch": 3.655662693191414,
      "grad_norm": 0.0733925849199295,
      "learning_rate": 2.7063908857890263e-06,
      "loss": 0.0115,
      "step": 2233800
    },
    {
      "epoch": 3.6556954236300676,
      "grad_norm": 0.40676531195640564,
      "learning_rate": 2.706324993575509e-06,
      "loss": 0.0133,
      "step": 2233820
    },
    {
      "epoch": 3.6557281540687208,
      "grad_norm": 0.28282541036605835,
      "learning_rate": 2.7062591013619922e-06,
      "loss": 0.0117,
      "step": 2233840
    },
    {
      "epoch": 3.655760884507374,
      "grad_norm": 0.1399153620004654,
      "learning_rate": 2.706193209148475e-06,
      "loss": 0.0059,
      "step": 2233860
    },
    {
      "epoch": 3.6557936149460275,
      "grad_norm": 0.18010590970516205,
      "learning_rate": 2.7061273169349577e-06,
      "loss": 0.0067,
      "step": 2233880
    },
    {
      "epoch": 3.655826345384681,
      "grad_norm": 0.5324400067329407,
      "learning_rate": 2.7060614247214405e-06,
      "loss": 0.0153,
      "step": 2233900
    },
    {
      "epoch": 3.6558590758233342,
      "grad_norm": 0.15827740728855133,
      "learning_rate": 2.705995532507924e-06,
      "loss": 0.0133,
      "step": 2233920
    },
    {
      "epoch": 3.6558918062619874,
      "grad_norm": 0.2367379069328308,
      "learning_rate": 2.705929640294407e-06,
      "loss": 0.0087,
      "step": 2233940
    },
    {
      "epoch": 3.655924536700641,
      "grad_norm": 0.1795983761548996,
      "learning_rate": 2.7058637480808895e-06,
      "loss": 0.0071,
      "step": 2233960
    },
    {
      "epoch": 3.655957267139294,
      "grad_norm": 0.1584012806415558,
      "learning_rate": 2.7057978558673727e-06,
      "loss": 0.0072,
      "step": 2233980
    },
    {
      "epoch": 3.6559899975779473,
      "grad_norm": 0.22134479880332947,
      "learning_rate": 2.7057319636538554e-06,
      "loss": 0.007,
      "step": 2234000
    },
    {
      "epoch": 3.656022728016601,
      "grad_norm": 0.35689935088157654,
      "learning_rate": 2.705666071440338e-06,
      "loss": 0.0113,
      "step": 2234020
    },
    {
      "epoch": 3.6560554584552545,
      "grad_norm": 0.47277188301086426,
      "learning_rate": 2.705600179226821e-06,
      "loss": 0.0107,
      "step": 2234040
    },
    {
      "epoch": 3.6560881888939076,
      "grad_norm": 0.49582773447036743,
      "learning_rate": 2.705534287013304e-06,
      "loss": 0.0113,
      "step": 2234060
    },
    {
      "epoch": 3.656120919332561,
      "grad_norm": 0.27566903829574585,
      "learning_rate": 2.705468394799787e-06,
      "loss": 0.0073,
      "step": 2234080
    },
    {
      "epoch": 3.6561536497712144,
      "grad_norm": 0.5887039303779602,
      "learning_rate": 2.7054025025862696e-06,
      "loss": 0.0113,
      "step": 2234100
    },
    {
      "epoch": 3.6561863802098675,
      "grad_norm": 0.3139854371547699,
      "learning_rate": 2.7053366103727523e-06,
      "loss": 0.0125,
      "step": 2234120
    },
    {
      "epoch": 3.6562191106485207,
      "grad_norm": 0.12426335364580154,
      "learning_rate": 2.7052707181592355e-06,
      "loss": 0.0097,
      "step": 2234140
    },
    {
      "epoch": 3.6562518410871743,
      "grad_norm": 0.11017722636461258,
      "learning_rate": 2.705204825945718e-06,
      "loss": 0.0103,
      "step": 2234160
    },
    {
      "epoch": 3.656284571525828,
      "grad_norm": 0.3483181595802307,
      "learning_rate": 2.705138933732201e-06,
      "loss": 0.016,
      "step": 2234180
    },
    {
      "epoch": 3.656317301964481,
      "grad_norm": 0.19296982884407043,
      "learning_rate": 2.7050730415186837e-06,
      "loss": 0.0075,
      "step": 2234200
    },
    {
      "epoch": 3.656350032403134,
      "grad_norm": 0.15437960624694824,
      "learning_rate": 2.7050071493051664e-06,
      "loss": 0.0085,
      "step": 2234220
    },
    {
      "epoch": 3.6563827628417878,
      "grad_norm": 0.1655103862285614,
      "learning_rate": 2.7049412570916496e-06,
      "loss": 0.0129,
      "step": 2234240
    },
    {
      "epoch": 3.656415493280441,
      "grad_norm": 0.22557146847248077,
      "learning_rate": 2.7048753648781327e-06,
      "loss": 0.01,
      "step": 2234260
    },
    {
      "epoch": 3.656448223719094,
      "grad_norm": 0.1979682743549347,
      "learning_rate": 2.7048094726646155e-06,
      "loss": 0.0109,
      "step": 2234280
    },
    {
      "epoch": 3.6564809541577477,
      "grad_norm": 0.03894433379173279,
      "learning_rate": 2.7047435804510986e-06,
      "loss": 0.0099,
      "step": 2234300
    },
    {
      "epoch": 3.656513684596401,
      "grad_norm": 0.811229407787323,
      "learning_rate": 2.7046776882375814e-06,
      "loss": 0.0083,
      "step": 2234320
    },
    {
      "epoch": 3.6565464150350544,
      "grad_norm": 0.9082954525947571,
      "learning_rate": 2.704611796024064e-06,
      "loss": 0.0099,
      "step": 2234340
    },
    {
      "epoch": 3.6565791454737075,
      "grad_norm": 0.5111849904060364,
      "learning_rate": 2.704545903810547e-06,
      "loss": 0.01,
      "step": 2234360
    },
    {
      "epoch": 3.656611875912361,
      "grad_norm": 0.20832566916942596,
      "learning_rate": 2.70448001159703e-06,
      "loss": 0.0082,
      "step": 2234380
    },
    {
      "epoch": 3.6566446063510143,
      "grad_norm": 0.11946594715118408,
      "learning_rate": 2.7044141193835128e-06,
      "loss": 0.0079,
      "step": 2234400
    },
    {
      "epoch": 3.6566773367896674,
      "grad_norm": Infinity,
      "learning_rate": 2.7043482271699955e-06,
      "loss": 0.0101,
      "step": 2234420
    },
    {
      "epoch": 3.656710067228321,
      "grad_norm": 0.28037020564079285,
      "learning_rate": 2.7042823349564782e-06,
      "loss": 0.012,
      "step": 2234440
    },
    {
      "epoch": 3.656742797666974,
      "grad_norm": 0.1561766415834427,
      "learning_rate": 2.7042164427429614e-06,
      "loss": 0.0084,
      "step": 2234460
    },
    {
      "epoch": 3.656775528105628,
      "grad_norm": 0.2425888627767563,
      "learning_rate": 2.704150550529444e-06,
      "loss": 0.0085,
      "step": 2234480
    },
    {
      "epoch": 3.656808258544281,
      "grad_norm": 0.20717573165893555,
      "learning_rate": 2.704084658315927e-06,
      "loss": 0.012,
      "step": 2234500
    },
    {
      "epoch": 3.6568409889829345,
      "grad_norm": 0.2769518792629242,
      "learning_rate": 2.7040187661024096e-06,
      "loss": 0.0149,
      "step": 2234520
    },
    {
      "epoch": 3.6568737194215877,
      "grad_norm": 0.2601260840892792,
      "learning_rate": 2.703952873888893e-06,
      "loss": 0.0138,
      "step": 2234540
    },
    {
      "epoch": 3.656906449860241,
      "grad_norm": 0.2560502886772156,
      "learning_rate": 2.7038869816753755e-06,
      "loss": 0.0082,
      "step": 2234560
    },
    {
      "epoch": 3.6569391802988944,
      "grad_norm": 0.13861821591854095,
      "learning_rate": 2.7038210894618583e-06,
      "loss": 0.0165,
      "step": 2234580
    },
    {
      "epoch": 3.6569719107375476,
      "grad_norm": 0.1279802918434143,
      "learning_rate": 2.703755197248341e-06,
      "loss": 0.0133,
      "step": 2234600
    },
    {
      "epoch": 3.657004641176201,
      "grad_norm": 0.3848313093185425,
      "learning_rate": 2.7036893050348246e-06,
      "loss": 0.0089,
      "step": 2234620
    },
    {
      "epoch": 3.6570373716148543,
      "grad_norm": 0.09738074243068695,
      "learning_rate": 2.7036234128213073e-06,
      "loss": 0.0106,
      "step": 2234640
    },
    {
      "epoch": 3.657070102053508,
      "grad_norm": 0.2935850918292999,
      "learning_rate": 2.70355752060779e-06,
      "loss": 0.0126,
      "step": 2234660
    },
    {
      "epoch": 3.657102832492161,
      "grad_norm": 0.1557539701461792,
      "learning_rate": 2.7034916283942732e-06,
      "loss": 0.0125,
      "step": 2234680
    },
    {
      "epoch": 3.657135562930814,
      "grad_norm": 0.11111515015363693,
      "learning_rate": 2.703425736180756e-06,
      "loss": 0.0125,
      "step": 2234700
    },
    {
      "epoch": 3.657168293369468,
      "grad_norm": 0.6622995138168335,
      "learning_rate": 2.7033598439672387e-06,
      "loss": 0.0155,
      "step": 2234720
    },
    {
      "epoch": 3.657201023808121,
      "grad_norm": 0.17176157236099243,
      "learning_rate": 2.7032939517537215e-06,
      "loss": 0.0156,
      "step": 2234740
    },
    {
      "epoch": 3.6572337542467745,
      "grad_norm": 0.1675044596195221,
      "learning_rate": 2.703228059540204e-06,
      "loss": 0.0165,
      "step": 2234760
    },
    {
      "epoch": 3.6572664846854277,
      "grad_norm": 0.19529668986797333,
      "learning_rate": 2.7031621673266874e-06,
      "loss": 0.0096,
      "step": 2234780
    },
    {
      "epoch": 3.6572992151240813,
      "grad_norm": 0.17631733417510986,
      "learning_rate": 2.70309627511317e-06,
      "loss": 0.0127,
      "step": 2234800
    },
    {
      "epoch": 3.6573319455627344,
      "grad_norm": 0.38092029094696045,
      "learning_rate": 2.703030382899653e-06,
      "loss": 0.0103,
      "step": 2234820
    },
    {
      "epoch": 3.6573646760013876,
      "grad_norm": 0.42324984073638916,
      "learning_rate": 2.7029644906861356e-06,
      "loss": 0.0131,
      "step": 2234840
    },
    {
      "epoch": 3.657397406440041,
      "grad_norm": 0.21654139459133148,
      "learning_rate": 2.7028985984726187e-06,
      "loss": 0.0099,
      "step": 2234860
    },
    {
      "epoch": 3.6574301368786943,
      "grad_norm": 0.23957745730876923,
      "learning_rate": 2.7028327062591015e-06,
      "loss": 0.0078,
      "step": 2234880
    },
    {
      "epoch": 3.657462867317348,
      "grad_norm": 0.07357245683670044,
      "learning_rate": 2.7027668140455842e-06,
      "loss": 0.0092,
      "step": 2234900
    },
    {
      "epoch": 3.657495597756001,
      "grad_norm": 0.1233954206109047,
      "learning_rate": 2.702700921832067e-06,
      "loss": 0.0144,
      "step": 2234920
    },
    {
      "epoch": 3.6575283281946547,
      "grad_norm": 0.11593069136142731,
      "learning_rate": 2.70263502961855e-06,
      "loss": 0.0119,
      "step": 2234940
    },
    {
      "epoch": 3.657561058633308,
      "grad_norm": 0.2853325307369232,
      "learning_rate": 2.702569137405033e-06,
      "loss": 0.0094,
      "step": 2234960
    },
    {
      "epoch": 3.657593789071961,
      "grad_norm": 0.23379145562648773,
      "learning_rate": 2.702503245191516e-06,
      "loss": 0.0141,
      "step": 2234980
    },
    {
      "epoch": 3.6576265195106146,
      "grad_norm": 0.4939074218273163,
      "learning_rate": 2.702437352977999e-06,
      "loss": 0.016,
      "step": 2235000
    },
    {
      "epoch": 3.6576592499492677,
      "grad_norm": 0.23164184391498566,
      "learning_rate": 2.702371460764482e-06,
      "loss": 0.0154,
      "step": 2235020
    },
    {
      "epoch": 3.6576919803879213,
      "grad_norm": 0.2900407016277313,
      "learning_rate": 2.7023055685509647e-06,
      "loss": 0.0084,
      "step": 2235040
    },
    {
      "epoch": 3.6577247108265745,
      "grad_norm": 0.7242562174797058,
      "learning_rate": 2.7022396763374474e-06,
      "loss": 0.0125,
      "step": 2235060
    },
    {
      "epoch": 3.657757441265228,
      "grad_norm": 0.2980184853076935,
      "learning_rate": 2.7021737841239306e-06,
      "loss": 0.0112,
      "step": 2235080
    },
    {
      "epoch": 3.657790171703881,
      "grad_norm": 0.19385266304016113,
      "learning_rate": 2.7021078919104133e-06,
      "loss": 0.012,
      "step": 2235100
    },
    {
      "epoch": 3.6578229021425344,
      "grad_norm": 0.052233338356018066,
      "learning_rate": 2.702041999696896e-06,
      "loss": 0.0078,
      "step": 2235120
    },
    {
      "epoch": 3.657855632581188,
      "grad_norm": 0.14963169395923615,
      "learning_rate": 2.701976107483379e-06,
      "loss": 0.0064,
      "step": 2235140
    },
    {
      "epoch": 3.657888363019841,
      "grad_norm": 0.28319817781448364,
      "learning_rate": 2.7019102152698615e-06,
      "loss": 0.0132,
      "step": 2235160
    },
    {
      "epoch": 3.6579210934584943,
      "grad_norm": 0.38399451971054077,
      "learning_rate": 2.7018443230563447e-06,
      "loss": 0.0111,
      "step": 2235180
    },
    {
      "epoch": 3.657953823897148,
      "grad_norm": 0.1547086089849472,
      "learning_rate": 2.7017784308428274e-06,
      "loss": 0.0173,
      "step": 2235200
    },
    {
      "epoch": 3.6579865543358014,
      "grad_norm": 0.19078192114830017,
      "learning_rate": 2.70171253862931e-06,
      "loss": 0.0141,
      "step": 2235220
    },
    {
      "epoch": 3.6580192847744546,
      "grad_norm": 0.147863507270813,
      "learning_rate": 2.701646646415793e-06,
      "loss": 0.0083,
      "step": 2235240
    },
    {
      "epoch": 3.6580520152131077,
      "grad_norm": 0.2486175000667572,
      "learning_rate": 2.701580754202276e-06,
      "loss": 0.009,
      "step": 2235260
    },
    {
      "epoch": 3.6580847456517613,
      "grad_norm": 0.11255909502506256,
      "learning_rate": 2.701514861988759e-06,
      "loss": 0.0101,
      "step": 2235280
    },
    {
      "epoch": 3.6581174760904145,
      "grad_norm": 0.16913248598575592,
      "learning_rate": 2.7014489697752416e-06,
      "loss": 0.0115,
      "step": 2235300
    },
    {
      "epoch": 3.6581502065290676,
      "grad_norm": 0.16074977815151215,
      "learning_rate": 2.701383077561725e-06,
      "loss": 0.0188,
      "step": 2235320
    },
    {
      "epoch": 3.6581829369677212,
      "grad_norm": 0.11778481304645538,
      "learning_rate": 2.701317185348208e-06,
      "loss": 0.0094,
      "step": 2235340
    },
    {
      "epoch": 3.658215667406375,
      "grad_norm": 0.12529206275939941,
      "learning_rate": 2.7012512931346906e-06,
      "loss": 0.0167,
      "step": 2235360
    },
    {
      "epoch": 3.658248397845028,
      "grad_norm": 0.1868707686662674,
      "learning_rate": 2.7011854009211734e-06,
      "loss": 0.0067,
      "step": 2235380
    },
    {
      "epoch": 3.658281128283681,
      "grad_norm": 0.19271530210971832,
      "learning_rate": 2.7011195087076565e-06,
      "loss": 0.0074,
      "step": 2235400
    },
    {
      "epoch": 3.6583138587223347,
      "grad_norm": 0.26578566431999207,
      "learning_rate": 2.7010536164941393e-06,
      "loss": 0.0098,
      "step": 2235420
    },
    {
      "epoch": 3.658346589160988,
      "grad_norm": 0.5148072838783264,
      "learning_rate": 2.700987724280622e-06,
      "loss": 0.0098,
      "step": 2235440
    },
    {
      "epoch": 3.658379319599641,
      "grad_norm": 0.13059532642364502,
      "learning_rate": 2.7009218320671048e-06,
      "loss": 0.008,
      "step": 2235460
    },
    {
      "epoch": 3.6584120500382946,
      "grad_norm": 0.09226372838020325,
      "learning_rate": 2.700855939853588e-06,
      "loss": 0.0074,
      "step": 2235480
    },
    {
      "epoch": 3.658444780476948,
      "grad_norm": 0.20011648535728455,
      "learning_rate": 2.7007900476400707e-06,
      "loss": 0.0078,
      "step": 2235500
    },
    {
      "epoch": 3.6584775109156014,
      "grad_norm": 0.2740658223628998,
      "learning_rate": 2.7007241554265534e-06,
      "loss": 0.0091,
      "step": 2235520
    },
    {
      "epoch": 3.6585102413542545,
      "grad_norm": 0.42664286494255066,
      "learning_rate": 2.700658263213036e-06,
      "loss": 0.0126,
      "step": 2235540
    },
    {
      "epoch": 3.658542971792908,
      "grad_norm": 0.21064570546150208,
      "learning_rate": 2.7005923709995193e-06,
      "loss": 0.0094,
      "step": 2235560
    },
    {
      "epoch": 3.6585757022315613,
      "grad_norm": 0.20081491768360138,
      "learning_rate": 2.700526478786002e-06,
      "loss": 0.0097,
      "step": 2235580
    },
    {
      "epoch": 3.6586084326702144,
      "grad_norm": 0.1908988356590271,
      "learning_rate": 2.7004605865724848e-06,
      "loss": 0.0088,
      "step": 2235600
    },
    {
      "epoch": 3.658641163108868,
      "grad_norm": 0.14825868606567383,
      "learning_rate": 2.7003946943589675e-06,
      "loss": 0.0132,
      "step": 2235620
    },
    {
      "epoch": 3.6586738935475216,
      "grad_norm": 0.18093575537204742,
      "learning_rate": 2.7003288021454503e-06,
      "loss": 0.008,
      "step": 2235640
    },
    {
      "epoch": 3.6587066239861747,
      "grad_norm": 0.41546493768692017,
      "learning_rate": 2.7002629099319334e-06,
      "loss": 0.0146,
      "step": 2235660
    },
    {
      "epoch": 3.658739354424828,
      "grad_norm": 0.5368474125862122,
      "learning_rate": 2.7001970177184166e-06,
      "loss": 0.0155,
      "step": 2235680
    },
    {
      "epoch": 3.6587720848634815,
      "grad_norm": 0.55174720287323,
      "learning_rate": 2.7001311255048993e-06,
      "loss": 0.0106,
      "step": 2235700
    },
    {
      "epoch": 3.6588048153021346,
      "grad_norm": 0.22531932592391968,
      "learning_rate": 2.7000652332913825e-06,
      "loss": 0.016,
      "step": 2235720
    },
    {
      "epoch": 3.658837545740788,
      "grad_norm": 0.3743591904640198,
      "learning_rate": 2.6999993410778652e-06,
      "loss": 0.0083,
      "step": 2235740
    },
    {
      "epoch": 3.6588702761794414,
      "grad_norm": 0.1945810616016388,
      "learning_rate": 2.699933448864348e-06,
      "loss": 0.0134,
      "step": 2235760
    },
    {
      "epoch": 3.6589030066180945,
      "grad_norm": 0.21144744753837585,
      "learning_rate": 2.6998675566508307e-06,
      "loss": 0.0106,
      "step": 2235780
    },
    {
      "epoch": 3.658935737056748,
      "grad_norm": 0.16694968938827515,
      "learning_rate": 2.699801664437314e-06,
      "loss": 0.0067,
      "step": 2235800
    },
    {
      "epoch": 3.6589684674954013,
      "grad_norm": 0.2176491916179657,
      "learning_rate": 2.6997357722237966e-06,
      "loss": 0.0098,
      "step": 2235820
    },
    {
      "epoch": 3.659001197934055,
      "grad_norm": 0.2665676176548004,
      "learning_rate": 2.6996698800102793e-06,
      "loss": 0.0116,
      "step": 2235840
    },
    {
      "epoch": 3.659033928372708,
      "grad_norm": 0.0883990228176117,
      "learning_rate": 2.699603987796762e-06,
      "loss": 0.0085,
      "step": 2235860
    },
    {
      "epoch": 3.659066658811361,
      "grad_norm": 0.2186998873949051,
      "learning_rate": 2.6995380955832453e-06,
      "loss": 0.0154,
      "step": 2235880
    },
    {
      "epoch": 3.6590993892500148,
      "grad_norm": 2.0534615516662598,
      "learning_rate": 2.699472203369728e-06,
      "loss": 0.0152,
      "step": 2235900
    },
    {
      "epoch": 3.659132119688668,
      "grad_norm": 0.3306930959224701,
      "learning_rate": 2.6994063111562107e-06,
      "loss": 0.0131,
      "step": 2235920
    },
    {
      "epoch": 3.6591648501273215,
      "grad_norm": 0.14909879863262177,
      "learning_rate": 2.6993404189426935e-06,
      "loss": 0.0073,
      "step": 2235940
    },
    {
      "epoch": 3.6591975805659747,
      "grad_norm": 0.1927807331085205,
      "learning_rate": 2.6992745267291766e-06,
      "loss": 0.0098,
      "step": 2235960
    },
    {
      "epoch": 3.6592303110046283,
      "grad_norm": 0.1407926380634308,
      "learning_rate": 2.6992086345156594e-06,
      "loss": 0.0097,
      "step": 2235980
    },
    {
      "epoch": 3.6592630414432814,
      "grad_norm": 0.26793500781059265,
      "learning_rate": 2.699142742302142e-06,
      "loss": 0.0157,
      "step": 2236000
    },
    {
      "epoch": 3.6592957718819346,
      "grad_norm": 0.09748222678899765,
      "learning_rate": 2.6990768500886257e-06,
      "loss": 0.0089,
      "step": 2236020
    },
    {
      "epoch": 3.659328502320588,
      "grad_norm": 0.13983912765979767,
      "learning_rate": 2.6990109578751084e-06,
      "loss": 0.0132,
      "step": 2236040
    },
    {
      "epoch": 3.6593612327592413,
      "grad_norm": 0.32440149784088135,
      "learning_rate": 2.698945065661591e-06,
      "loss": 0.0067,
      "step": 2236060
    },
    {
      "epoch": 3.659393963197895,
      "grad_norm": 0.5399071574211121,
      "learning_rate": 2.698879173448074e-06,
      "loss": 0.0136,
      "step": 2236080
    },
    {
      "epoch": 3.659426693636548,
      "grad_norm": 1.2752604484558105,
      "learning_rate": 2.698813281234557e-06,
      "loss": 0.01,
      "step": 2236100
    },
    {
      "epoch": 3.6594594240752016,
      "grad_norm": 0.20859397947788239,
      "learning_rate": 2.69874738902104e-06,
      "loss": 0.0124,
      "step": 2236120
    },
    {
      "epoch": 3.659492154513855,
      "grad_norm": 0.26228538155555725,
      "learning_rate": 2.6986814968075226e-06,
      "loss": 0.0101,
      "step": 2236140
    },
    {
      "epoch": 3.659524884952508,
      "grad_norm": 0.42235028743743896,
      "learning_rate": 2.6986156045940053e-06,
      "loss": 0.0116,
      "step": 2236160
    },
    {
      "epoch": 3.6595576153911615,
      "grad_norm": 0.20434395968914032,
      "learning_rate": 2.698549712380488e-06,
      "loss": 0.0098,
      "step": 2236180
    },
    {
      "epoch": 3.6595903458298147,
      "grad_norm": 0.9216824769973755,
      "learning_rate": 2.698483820166971e-06,
      "loss": 0.0115,
      "step": 2236200
    },
    {
      "epoch": 3.6596230762684683,
      "grad_norm": 0.2040897011756897,
      "learning_rate": 2.698417927953454e-06,
      "loss": 0.0127,
      "step": 2236220
    },
    {
      "epoch": 3.6596558067071214,
      "grad_norm": 0.36907535791397095,
      "learning_rate": 2.6983520357399367e-06,
      "loss": 0.0128,
      "step": 2236240
    },
    {
      "epoch": 3.659688537145775,
      "grad_norm": 0.23419196903705597,
      "learning_rate": 2.6982861435264194e-06,
      "loss": 0.0144,
      "step": 2236260
    },
    {
      "epoch": 3.659721267584428,
      "grad_norm": 0.2915708124637604,
      "learning_rate": 2.6982202513129026e-06,
      "loss": 0.0089,
      "step": 2236280
    },
    {
      "epoch": 3.6597539980230813,
      "grad_norm": 0.19960497319698334,
      "learning_rate": 2.6981543590993853e-06,
      "loss": 0.0163,
      "step": 2236300
    },
    {
      "epoch": 3.659786728461735,
      "grad_norm": 0.35463225841522217,
      "learning_rate": 2.698088466885868e-06,
      "loss": 0.0136,
      "step": 2236320
    },
    {
      "epoch": 3.659819458900388,
      "grad_norm": 0.3061080873012543,
      "learning_rate": 2.698022574672351e-06,
      "loss": 0.011,
      "step": 2236340
    },
    {
      "epoch": 3.6598521893390417,
      "grad_norm": 0.09078553318977356,
      "learning_rate": 2.697956682458834e-06,
      "loss": 0.0105,
      "step": 2236360
    },
    {
      "epoch": 3.659884919777695,
      "grad_norm": 0.7294420599937439,
      "learning_rate": 2.697890790245317e-06,
      "loss": 0.0117,
      "step": 2236380
    },
    {
      "epoch": 3.6599176502163484,
      "grad_norm": 0.29217901825904846,
      "learning_rate": 2.6978248980318e-06,
      "loss": 0.0107,
      "step": 2236400
    },
    {
      "epoch": 3.6599503806550016,
      "grad_norm": 0.20221440494060516,
      "learning_rate": 2.697759005818283e-06,
      "loss": 0.0105,
      "step": 2236420
    },
    {
      "epoch": 3.6599831110936547,
      "grad_norm": 0.14124630391597748,
      "learning_rate": 2.6976931136047658e-06,
      "loss": 0.0085,
      "step": 2236440
    },
    {
      "epoch": 3.6600158415323083,
      "grad_norm": 0.35689133405685425,
      "learning_rate": 2.6976272213912485e-06,
      "loss": 0.0097,
      "step": 2236460
    },
    {
      "epoch": 3.6600485719709615,
      "grad_norm": 0.3292683959007263,
      "learning_rate": 2.6975613291777313e-06,
      "loss": 0.0147,
      "step": 2236480
    },
    {
      "epoch": 3.660081302409615,
      "grad_norm": 0.34993743896484375,
      "learning_rate": 2.6974954369642144e-06,
      "loss": 0.0135,
      "step": 2236500
    },
    {
      "epoch": 3.660114032848268,
      "grad_norm": 0.05108817294239998,
      "learning_rate": 2.697429544750697e-06,
      "loss": 0.0161,
      "step": 2236520
    },
    {
      "epoch": 3.660146763286922,
      "grad_norm": 0.2086319625377655,
      "learning_rate": 2.69736365253718e-06,
      "loss": 0.0104,
      "step": 2236540
    },
    {
      "epoch": 3.660179493725575,
      "grad_norm": 0.21643123030662537,
      "learning_rate": 2.6972977603236626e-06,
      "loss": 0.0085,
      "step": 2236560
    },
    {
      "epoch": 3.660212224164228,
      "grad_norm": 0.6426394581794739,
      "learning_rate": 2.697231868110146e-06,
      "loss": 0.0105,
      "step": 2236580
    },
    {
      "epoch": 3.6602449546028817,
      "grad_norm": 0.23944897949695587,
      "learning_rate": 2.6971659758966285e-06,
      "loss": 0.0109,
      "step": 2236600
    },
    {
      "epoch": 3.660277685041535,
      "grad_norm": 0.20850172638893127,
      "learning_rate": 2.6971000836831113e-06,
      "loss": 0.01,
      "step": 2236620
    },
    {
      "epoch": 3.6603104154801884,
      "grad_norm": 0.25925543904304504,
      "learning_rate": 2.697034191469594e-06,
      "loss": 0.0107,
      "step": 2236640
    },
    {
      "epoch": 3.6603431459188416,
      "grad_norm": 0.27886101603507996,
      "learning_rate": 2.6969682992560768e-06,
      "loss": 0.0112,
      "step": 2236660
    },
    {
      "epoch": 3.660375876357495,
      "grad_norm": 0.20544423162937164,
      "learning_rate": 2.69690240704256e-06,
      "loss": 0.0094,
      "step": 2236680
    },
    {
      "epoch": 3.6604086067961483,
      "grad_norm": 0.08692507445812225,
      "learning_rate": 2.6968365148290427e-06,
      "loss": 0.0101,
      "step": 2236700
    },
    {
      "epoch": 3.6604413372348015,
      "grad_norm": 0.08001048117876053,
      "learning_rate": 2.6967706226155254e-06,
      "loss": 0.0111,
      "step": 2236720
    },
    {
      "epoch": 3.660474067673455,
      "grad_norm": 0.3686845302581787,
      "learning_rate": 2.696704730402009e-06,
      "loss": 0.0142,
      "step": 2236740
    },
    {
      "epoch": 3.660506798112108,
      "grad_norm": 0.37079620361328125,
      "learning_rate": 2.6966388381884917e-06,
      "loss": 0.0111,
      "step": 2236760
    },
    {
      "epoch": 3.6605395285507614,
      "grad_norm": 0.202497661113739,
      "learning_rate": 2.6965729459749745e-06,
      "loss": 0.0138,
      "step": 2236780
    },
    {
      "epoch": 3.660572258989415,
      "grad_norm": 0.38429561257362366,
      "learning_rate": 2.696507053761457e-06,
      "loss": 0.0121,
      "step": 2236800
    },
    {
      "epoch": 3.6606049894280686,
      "grad_norm": 0.4444892704486847,
      "learning_rate": 2.6964411615479404e-06,
      "loss": 0.0083,
      "step": 2236820
    },
    {
      "epoch": 3.6606377198667217,
      "grad_norm": 0.394715279340744,
      "learning_rate": 2.696375269334423e-06,
      "loss": 0.0098,
      "step": 2236840
    },
    {
      "epoch": 3.660670450305375,
      "grad_norm": 0.2911718785762787,
      "learning_rate": 2.696309377120906e-06,
      "loss": 0.015,
      "step": 2236860
    },
    {
      "epoch": 3.6607031807440285,
      "grad_norm": 0.13351526856422424,
      "learning_rate": 2.6962434849073886e-06,
      "loss": 0.0097,
      "step": 2236880
    },
    {
      "epoch": 3.6607359111826816,
      "grad_norm": 0.29645243287086487,
      "learning_rate": 2.6961775926938718e-06,
      "loss": 0.0159,
      "step": 2236900
    },
    {
      "epoch": 3.6607686416213348,
      "grad_norm": 0.35079821944236755,
      "learning_rate": 2.6961117004803545e-06,
      "loss": 0.014,
      "step": 2236920
    },
    {
      "epoch": 3.6608013720599883,
      "grad_norm": 0.24501381814479828,
      "learning_rate": 2.6960458082668372e-06,
      "loss": 0.0113,
      "step": 2236940
    },
    {
      "epoch": 3.660834102498642,
      "grad_norm": 0.45381027460098267,
      "learning_rate": 2.69597991605332e-06,
      "loss": 0.0087,
      "step": 2236960
    },
    {
      "epoch": 3.660866832937295,
      "grad_norm": 0.5573692321777344,
      "learning_rate": 2.695914023839803e-06,
      "loss": 0.0083,
      "step": 2236980
    },
    {
      "epoch": 3.6608995633759482,
      "grad_norm": 0.10665789991617203,
      "learning_rate": 2.695848131626286e-06,
      "loss": 0.0083,
      "step": 2237000
    },
    {
      "epoch": 3.660932293814602,
      "grad_norm": 0.19118618965148926,
      "learning_rate": 2.6957822394127686e-06,
      "loss": 0.0124,
      "step": 2237020
    },
    {
      "epoch": 3.660965024253255,
      "grad_norm": 0.19228416681289673,
      "learning_rate": 2.6957163471992514e-06,
      "loss": 0.0059,
      "step": 2237040
    },
    {
      "epoch": 3.660997754691908,
      "grad_norm": 0.3692297041416168,
      "learning_rate": 2.695650454985734e-06,
      "loss": 0.0096,
      "step": 2237060
    },
    {
      "epoch": 3.6610304851305617,
      "grad_norm": 0.9860897660255432,
      "learning_rate": 2.6955845627722177e-06,
      "loss": 0.0094,
      "step": 2237080
    },
    {
      "epoch": 3.6610632155692153,
      "grad_norm": 0.7762541174888611,
      "learning_rate": 2.6955186705587004e-06,
      "loss": 0.016,
      "step": 2237100
    },
    {
      "epoch": 3.6610959460078685,
      "grad_norm": 0.37436643242836,
      "learning_rate": 2.695452778345183e-06,
      "loss": 0.0131,
      "step": 2237120
    },
    {
      "epoch": 3.6611286764465216,
      "grad_norm": 0.0881703719496727,
      "learning_rate": 2.6953868861316663e-06,
      "loss": 0.0087,
      "step": 2237140
    },
    {
      "epoch": 3.661161406885175,
      "grad_norm": 0.11337186396121979,
      "learning_rate": 2.695320993918149e-06,
      "loss": 0.009,
      "step": 2237160
    },
    {
      "epoch": 3.6611941373238284,
      "grad_norm": 0.19439640641212463,
      "learning_rate": 2.695255101704632e-06,
      "loss": 0.0106,
      "step": 2237180
    },
    {
      "epoch": 3.6612268677624815,
      "grad_norm": 0.5193849205970764,
      "learning_rate": 2.6951892094911145e-06,
      "loss": 0.0086,
      "step": 2237200
    },
    {
      "epoch": 3.661259598201135,
      "grad_norm": 0.19387301802635193,
      "learning_rate": 2.6951233172775977e-06,
      "loss": 0.0121,
      "step": 2237220
    },
    {
      "epoch": 3.6612923286397887,
      "grad_norm": 0.38158223032951355,
      "learning_rate": 2.6950574250640804e-06,
      "loss": 0.011,
      "step": 2237240
    },
    {
      "epoch": 3.661325059078442,
      "grad_norm": 0.29668277502059937,
      "learning_rate": 2.694991532850563e-06,
      "loss": 0.0105,
      "step": 2237260
    },
    {
      "epoch": 3.661357789517095,
      "grad_norm": 0.14614759385585785,
      "learning_rate": 2.694925640637046e-06,
      "loss": 0.0106,
      "step": 2237280
    },
    {
      "epoch": 3.6613905199557486,
      "grad_norm": 0.21131891012191772,
      "learning_rate": 2.694859748423529e-06,
      "loss": 0.0046,
      "step": 2237300
    },
    {
      "epoch": 3.6614232503944018,
      "grad_norm": 0.43863239884376526,
      "learning_rate": 2.694793856210012e-06,
      "loss": 0.0131,
      "step": 2237320
    },
    {
      "epoch": 3.661455980833055,
      "grad_norm": 0.165549635887146,
      "learning_rate": 2.6947279639964946e-06,
      "loss": 0.0094,
      "step": 2237340
    },
    {
      "epoch": 3.6614887112717085,
      "grad_norm": 0.04946276545524597,
      "learning_rate": 2.6946620717829773e-06,
      "loss": 0.0096,
      "step": 2237360
    },
    {
      "epoch": 3.6615214417103616,
      "grad_norm": 0.10490463674068451,
      "learning_rate": 2.6945961795694605e-06,
      "loss": 0.0146,
      "step": 2237380
    },
    {
      "epoch": 3.6615541721490152,
      "grad_norm": 0.7185834646224976,
      "learning_rate": 2.6945302873559432e-06,
      "loss": 0.0121,
      "step": 2237400
    },
    {
      "epoch": 3.6615869025876684,
      "grad_norm": 0.3912288248538971,
      "learning_rate": 2.694464395142426e-06,
      "loss": 0.007,
      "step": 2237420
    },
    {
      "epoch": 3.661619633026322,
      "grad_norm": 0.42307770252227783,
      "learning_rate": 2.6943985029289095e-06,
      "loss": 0.0102,
      "step": 2237440
    },
    {
      "epoch": 3.661652363464975,
      "grad_norm": 0.22575248777866364,
      "learning_rate": 2.6943326107153923e-06,
      "loss": 0.0122,
      "step": 2237460
    },
    {
      "epoch": 3.6616850939036283,
      "grad_norm": 0.30925509333610535,
      "learning_rate": 2.694266718501875e-06,
      "loss": 0.0123,
      "step": 2237480
    },
    {
      "epoch": 3.661717824342282,
      "grad_norm": 0.3737814426422119,
      "learning_rate": 2.6942008262883578e-06,
      "loss": 0.0108,
      "step": 2237500
    },
    {
      "epoch": 3.661750554780935,
      "grad_norm": 1.0725924968719482,
      "learning_rate": 2.694134934074841e-06,
      "loss": 0.0097,
      "step": 2237520
    },
    {
      "epoch": 3.6617832852195886,
      "grad_norm": 0.19774022698402405,
      "learning_rate": 2.6940690418613237e-06,
      "loss": 0.0097,
      "step": 2237540
    },
    {
      "epoch": 3.6618160156582418,
      "grad_norm": 0.29480475187301636,
      "learning_rate": 2.6940031496478064e-06,
      "loss": 0.013,
      "step": 2237560
    },
    {
      "epoch": 3.6618487460968954,
      "grad_norm": 0.40205612778663635,
      "learning_rate": 2.693937257434289e-06,
      "loss": 0.012,
      "step": 2237580
    },
    {
      "epoch": 3.6618814765355485,
      "grad_norm": 0.14454111456871033,
      "learning_rate": 2.693871365220772e-06,
      "loss": 0.0108,
      "step": 2237600
    },
    {
      "epoch": 3.6619142069742017,
      "grad_norm": 0.29260510206222534,
      "learning_rate": 2.693805473007255e-06,
      "loss": 0.0085,
      "step": 2237620
    },
    {
      "epoch": 3.6619469374128553,
      "grad_norm": 0.13374526798725128,
      "learning_rate": 2.6937395807937378e-06,
      "loss": 0.0117,
      "step": 2237640
    },
    {
      "epoch": 3.6619796678515084,
      "grad_norm": 0.15205438435077667,
      "learning_rate": 2.6936736885802205e-06,
      "loss": 0.0144,
      "step": 2237660
    },
    {
      "epoch": 3.662012398290162,
      "grad_norm": 0.06925280392169952,
      "learning_rate": 2.6936077963667033e-06,
      "loss": 0.0108,
      "step": 2237680
    },
    {
      "epoch": 3.662045128728815,
      "grad_norm": 0.47954022884368896,
      "learning_rate": 2.6935419041531864e-06,
      "loss": 0.0156,
      "step": 2237700
    },
    {
      "epoch": 3.6620778591674688,
      "grad_norm": 0.19376981258392334,
      "learning_rate": 2.693476011939669e-06,
      "loss": 0.0098,
      "step": 2237720
    },
    {
      "epoch": 3.662110589606122,
      "grad_norm": 0.15709635615348816,
      "learning_rate": 2.693410119726152e-06,
      "loss": 0.0099,
      "step": 2237740
    },
    {
      "epoch": 3.662143320044775,
      "grad_norm": 0.1971503496170044,
      "learning_rate": 2.6933442275126346e-06,
      "loss": 0.0114,
      "step": 2237760
    },
    {
      "epoch": 3.6621760504834286,
      "grad_norm": 0.2522241175174713,
      "learning_rate": 2.6932783352991182e-06,
      "loss": 0.0091,
      "step": 2237780
    },
    {
      "epoch": 3.662208780922082,
      "grad_norm": 0.22971677780151367,
      "learning_rate": 2.693212443085601e-06,
      "loss": 0.0116,
      "step": 2237800
    },
    {
      "epoch": 3.6622415113607354,
      "grad_norm": 0.12810391187667847,
      "learning_rate": 2.6931465508720837e-06,
      "loss": 0.0078,
      "step": 2237820
    },
    {
      "epoch": 3.6622742417993885,
      "grad_norm": 0.12272216379642487,
      "learning_rate": 2.693080658658567e-06,
      "loss": 0.0101,
      "step": 2237840
    },
    {
      "epoch": 3.662306972238042,
      "grad_norm": 0.17200300097465515,
      "learning_rate": 2.6930147664450496e-06,
      "loss": 0.01,
      "step": 2237860
    },
    {
      "epoch": 3.6623397026766953,
      "grad_norm": 0.342973530292511,
      "learning_rate": 2.6929488742315324e-06,
      "loss": 0.0054,
      "step": 2237880
    },
    {
      "epoch": 3.6623724331153484,
      "grad_norm": 0.23019163310527802,
      "learning_rate": 2.692882982018015e-06,
      "loss": 0.0104,
      "step": 2237900
    },
    {
      "epoch": 3.662405163554002,
      "grad_norm": 0.27112990617752075,
      "learning_rate": 2.6928170898044983e-06,
      "loss": 0.0157,
      "step": 2237920
    },
    {
      "epoch": 3.662437893992655,
      "grad_norm": 0.2677963376045227,
      "learning_rate": 2.692751197590981e-06,
      "loss": 0.0049,
      "step": 2237940
    },
    {
      "epoch": 3.6624706244313088,
      "grad_norm": 0.2741359770298004,
      "learning_rate": 2.6926853053774637e-06,
      "loss": 0.0058,
      "step": 2237960
    },
    {
      "epoch": 3.662503354869962,
      "grad_norm": 0.8916109204292297,
      "learning_rate": 2.6926194131639465e-06,
      "loss": 0.012,
      "step": 2237980
    },
    {
      "epoch": 3.6625360853086155,
      "grad_norm": 0.18629074096679688,
      "learning_rate": 2.6925535209504296e-06,
      "loss": 0.0072,
      "step": 2238000
    },
    {
      "epoch": 3.6625688157472687,
      "grad_norm": 0.18092352151870728,
      "learning_rate": 2.6924876287369124e-06,
      "loss": 0.0143,
      "step": 2238020
    },
    {
      "epoch": 3.662601546185922,
      "grad_norm": 0.18534433841705322,
      "learning_rate": 2.692421736523395e-06,
      "loss": 0.0086,
      "step": 2238040
    },
    {
      "epoch": 3.6626342766245754,
      "grad_norm": 0.37156057357788086,
      "learning_rate": 2.692355844309878e-06,
      "loss": 0.0129,
      "step": 2238060
    },
    {
      "epoch": 3.6626670070632286,
      "grad_norm": 0.8854758739471436,
      "learning_rate": 2.6922899520963606e-06,
      "loss": 0.0161,
      "step": 2238080
    },
    {
      "epoch": 3.662699737501882,
      "grad_norm": 0.05788620188832283,
      "learning_rate": 2.6922240598828438e-06,
      "loss": 0.0147,
      "step": 2238100
    },
    {
      "epoch": 3.6627324679405353,
      "grad_norm": 0.1443748027086258,
      "learning_rate": 2.6921581676693265e-06,
      "loss": 0.0138,
      "step": 2238120
    },
    {
      "epoch": 3.662765198379189,
      "grad_norm": 0.28673067688941956,
      "learning_rate": 2.6920922754558097e-06,
      "loss": 0.0124,
      "step": 2238140
    },
    {
      "epoch": 3.662797928817842,
      "grad_norm": 0.416230708360672,
      "learning_rate": 2.692026383242293e-06,
      "loss": 0.0092,
      "step": 2238160
    },
    {
      "epoch": 3.662830659256495,
      "grad_norm": 0.2701956331729889,
      "learning_rate": 2.6919604910287756e-06,
      "loss": 0.0118,
      "step": 2238180
    },
    {
      "epoch": 3.662863389695149,
      "grad_norm": 0.4782538414001465,
      "learning_rate": 2.6918945988152583e-06,
      "loss": 0.0105,
      "step": 2238200
    },
    {
      "epoch": 3.662896120133802,
      "grad_norm": 0.36756452918052673,
      "learning_rate": 2.691828706601741e-06,
      "loss": 0.0065,
      "step": 2238220
    },
    {
      "epoch": 3.662928850572455,
      "grad_norm": 0.05802549049258232,
      "learning_rate": 2.691762814388224e-06,
      "loss": 0.0109,
      "step": 2238240
    },
    {
      "epoch": 3.6629615810111087,
      "grad_norm": 0.3787298798561096,
      "learning_rate": 2.691696922174707e-06,
      "loss": 0.0125,
      "step": 2238260
    },
    {
      "epoch": 3.6629943114497623,
      "grad_norm": 0.1965114325284958,
      "learning_rate": 2.6916310299611897e-06,
      "loss": 0.008,
      "step": 2238280
    },
    {
      "epoch": 3.6630270418884154,
      "grad_norm": 0.20591768622398376,
      "learning_rate": 2.6915651377476724e-06,
      "loss": 0.0091,
      "step": 2238300
    },
    {
      "epoch": 3.6630597723270686,
      "grad_norm": 0.2893805503845215,
      "learning_rate": 2.6914992455341556e-06,
      "loss": 0.014,
      "step": 2238320
    },
    {
      "epoch": 3.663092502765722,
      "grad_norm": 0.175155371427536,
      "learning_rate": 2.6914333533206383e-06,
      "loss": 0.0108,
      "step": 2238340
    },
    {
      "epoch": 3.6631252332043753,
      "grad_norm": 0.17355896532535553,
      "learning_rate": 2.691367461107121e-06,
      "loss": 0.0123,
      "step": 2238360
    },
    {
      "epoch": 3.6631579636430285,
      "grad_norm": 0.25639426708221436,
      "learning_rate": 2.691301568893604e-06,
      "loss": 0.0101,
      "step": 2238380
    },
    {
      "epoch": 3.663190694081682,
      "grad_norm": 0.5279756784439087,
      "learning_rate": 2.691235676680087e-06,
      "loss": 0.008,
      "step": 2238400
    },
    {
      "epoch": 3.6632234245203357,
      "grad_norm": 0.2445247918367386,
      "learning_rate": 2.6911697844665697e-06,
      "loss": 0.0132,
      "step": 2238420
    },
    {
      "epoch": 3.663256154958989,
      "grad_norm": 0.31513699889183044,
      "learning_rate": 2.6911038922530525e-06,
      "loss": 0.0132,
      "step": 2238440
    },
    {
      "epoch": 3.663288885397642,
      "grad_norm": 0.19267474114894867,
      "learning_rate": 2.691038000039535e-06,
      "loss": 0.0119,
      "step": 2238460
    },
    {
      "epoch": 3.6633216158362956,
      "grad_norm": 0.05600380897521973,
      "learning_rate": 2.690972107826018e-06,
      "loss": 0.0121,
      "step": 2238480
    },
    {
      "epoch": 3.6633543462749487,
      "grad_norm": 0.34414899349212646,
      "learning_rate": 2.6909062156125015e-06,
      "loss": 0.0121,
      "step": 2238500
    },
    {
      "epoch": 3.663387076713602,
      "grad_norm": 0.21873363852500916,
      "learning_rate": 2.6908403233989843e-06,
      "loss": 0.0174,
      "step": 2238520
    },
    {
      "epoch": 3.6634198071522555,
      "grad_norm": 0.17357061803340912,
      "learning_rate": 2.6907744311854674e-06,
      "loss": 0.0132,
      "step": 2238540
    },
    {
      "epoch": 3.663452537590909,
      "grad_norm": 0.4353448450565338,
      "learning_rate": 2.69070853897195e-06,
      "loss": 0.011,
      "step": 2238560
    },
    {
      "epoch": 3.663485268029562,
      "grad_norm": 0.05159566551446915,
      "learning_rate": 2.690642646758433e-06,
      "loss": 0.0119,
      "step": 2238580
    },
    {
      "epoch": 3.6635179984682154,
      "grad_norm": 0.060011010617017746,
      "learning_rate": 2.6905767545449156e-06,
      "loss": 0.0091,
      "step": 2238600
    },
    {
      "epoch": 3.663550728906869,
      "grad_norm": 0.13648144900798798,
      "learning_rate": 2.6905108623313984e-06,
      "loss": 0.0086,
      "step": 2238620
    },
    {
      "epoch": 3.663583459345522,
      "grad_norm": 0.1873164027929306,
      "learning_rate": 2.6904449701178815e-06,
      "loss": 0.0129,
      "step": 2238640
    },
    {
      "epoch": 3.6636161897841752,
      "grad_norm": 0.3555585443973541,
      "learning_rate": 2.6903790779043643e-06,
      "loss": 0.0086,
      "step": 2238660
    },
    {
      "epoch": 3.663648920222829,
      "grad_norm": 0.1909603625535965,
      "learning_rate": 2.690313185690847e-06,
      "loss": 0.0093,
      "step": 2238680
    },
    {
      "epoch": 3.6636816506614824,
      "grad_norm": 0.1739097237586975,
      "learning_rate": 2.6902472934773298e-06,
      "loss": 0.0113,
      "step": 2238700
    },
    {
      "epoch": 3.6637143811001356,
      "grad_norm": 0.14797784388065338,
      "learning_rate": 2.690181401263813e-06,
      "loss": 0.0098,
      "step": 2238720
    },
    {
      "epoch": 3.6637471115387887,
      "grad_norm": 0.3748859465122223,
      "learning_rate": 2.6901155090502957e-06,
      "loss": 0.0085,
      "step": 2238740
    },
    {
      "epoch": 3.6637798419774423,
      "grad_norm": 0.04868948832154274,
      "learning_rate": 2.6900496168367784e-06,
      "loss": 0.0129,
      "step": 2238760
    },
    {
      "epoch": 3.6638125724160955,
      "grad_norm": 0.11460928618907928,
      "learning_rate": 2.689983724623261e-06,
      "loss": 0.011,
      "step": 2238780
    },
    {
      "epoch": 3.6638453028547486,
      "grad_norm": 0.2699979841709137,
      "learning_rate": 2.6899178324097443e-06,
      "loss": 0.0079,
      "step": 2238800
    },
    {
      "epoch": 3.6638780332934022,
      "grad_norm": 0.14213798940181732,
      "learning_rate": 2.689851940196227e-06,
      "loss": 0.0066,
      "step": 2238820
    },
    {
      "epoch": 3.6639107637320554,
      "grad_norm": 0.12689568102359772,
      "learning_rate": 2.6897860479827102e-06,
      "loss": 0.0099,
      "step": 2238840
    },
    {
      "epoch": 3.663943494170709,
      "grad_norm": 0.058876100927591324,
      "learning_rate": 2.6897201557691934e-06,
      "loss": 0.0105,
      "step": 2238860
    },
    {
      "epoch": 3.663976224609362,
      "grad_norm": 0.16961118578910828,
      "learning_rate": 2.689654263555676e-06,
      "loss": 0.0107,
      "step": 2238880
    },
    {
      "epoch": 3.6640089550480157,
      "grad_norm": 0.4142184555530548,
      "learning_rate": 2.689588371342159e-06,
      "loss": 0.0126,
      "step": 2238900
    },
    {
      "epoch": 3.664041685486669,
      "grad_norm": 0.32651108503341675,
      "learning_rate": 2.6895224791286416e-06,
      "loss": 0.0096,
      "step": 2238920
    },
    {
      "epoch": 3.664074415925322,
      "grad_norm": 0.05291328579187393,
      "learning_rate": 2.6894565869151248e-06,
      "loss": 0.0103,
      "step": 2238940
    },
    {
      "epoch": 3.6641071463639756,
      "grad_norm": 0.09364162385463715,
      "learning_rate": 2.6893906947016075e-06,
      "loss": 0.0093,
      "step": 2238960
    },
    {
      "epoch": 3.6641398768026288,
      "grad_norm": 0.3667154014110565,
      "learning_rate": 2.6893248024880902e-06,
      "loss": 0.01,
      "step": 2238980
    },
    {
      "epoch": 3.6641726072412824,
      "grad_norm": 0.2663857638835907,
      "learning_rate": 2.689258910274573e-06,
      "loss": 0.0132,
      "step": 2239000
    },
    {
      "epoch": 3.6642053376799355,
      "grad_norm": 0.37558355927467346,
      "learning_rate": 2.6891930180610557e-06,
      "loss": 0.008,
      "step": 2239020
    },
    {
      "epoch": 3.664238068118589,
      "grad_norm": 0.406305193901062,
      "learning_rate": 2.689127125847539e-06,
      "loss": 0.0166,
      "step": 2239040
    },
    {
      "epoch": 3.6642707985572422,
      "grad_norm": 0.09997572749853134,
      "learning_rate": 2.6890612336340216e-06,
      "loss": 0.0138,
      "step": 2239060
    },
    {
      "epoch": 3.6643035289958954,
      "grad_norm": 0.16837462782859802,
      "learning_rate": 2.6889953414205044e-06,
      "loss": 0.0098,
      "step": 2239080
    },
    {
      "epoch": 3.664336259434549,
      "grad_norm": 1.3939754962921143,
      "learning_rate": 2.688929449206987e-06,
      "loss": 0.016,
      "step": 2239100
    },
    {
      "epoch": 3.664368989873202,
      "grad_norm": 0.11463415622711182,
      "learning_rate": 2.6888635569934703e-06,
      "loss": 0.0092,
      "step": 2239120
    },
    {
      "epoch": 3.6644017203118557,
      "grad_norm": 0.280164510011673,
      "learning_rate": 2.688797664779953e-06,
      "loss": 0.0091,
      "step": 2239140
    },
    {
      "epoch": 3.664434450750509,
      "grad_norm": 0.2809179127216339,
      "learning_rate": 2.6887317725664357e-06,
      "loss": 0.0119,
      "step": 2239160
    },
    {
      "epoch": 3.6644671811891625,
      "grad_norm": 0.09952417761087418,
      "learning_rate": 2.6886658803529185e-06,
      "loss": 0.0084,
      "step": 2239180
    },
    {
      "epoch": 3.6644999116278156,
      "grad_norm": 0.23768222332000732,
      "learning_rate": 2.688599988139402e-06,
      "loss": 0.017,
      "step": 2239200
    },
    {
      "epoch": 3.664532642066469,
      "grad_norm": 0.28398585319519043,
      "learning_rate": 2.688534095925885e-06,
      "loss": 0.0085,
      "step": 2239220
    },
    {
      "epoch": 3.6645653725051224,
      "grad_norm": 0.22475242614746094,
      "learning_rate": 2.6884682037123676e-06,
      "loss": 0.0216,
      "step": 2239240
    },
    {
      "epoch": 3.6645981029437755,
      "grad_norm": 0.24628084897994995,
      "learning_rate": 2.6884023114988507e-06,
      "loss": 0.0105,
      "step": 2239260
    },
    {
      "epoch": 3.664630833382429,
      "grad_norm": 0.35454171895980835,
      "learning_rate": 2.6883364192853335e-06,
      "loss": 0.012,
      "step": 2239280
    },
    {
      "epoch": 3.6646635638210823,
      "grad_norm": 0.12365864217281342,
      "learning_rate": 2.688270527071816e-06,
      "loss": 0.0088,
      "step": 2239300
    },
    {
      "epoch": 3.664696294259736,
      "grad_norm": 0.3909347951412201,
      "learning_rate": 2.688204634858299e-06,
      "loss": 0.0125,
      "step": 2239320
    },
    {
      "epoch": 3.664729024698389,
      "grad_norm": 0.0721587985754013,
      "learning_rate": 2.688138742644782e-06,
      "loss": 0.0113,
      "step": 2239340
    },
    {
      "epoch": 3.664761755137042,
      "grad_norm": 0.16694730520248413,
      "learning_rate": 2.688072850431265e-06,
      "loss": 0.0125,
      "step": 2239360
    },
    {
      "epoch": 3.6647944855756958,
      "grad_norm": 0.19629208743572235,
      "learning_rate": 2.6880069582177476e-06,
      "loss": 0.011,
      "step": 2239380
    },
    {
      "epoch": 3.664827216014349,
      "grad_norm": 0.45191314816474915,
      "learning_rate": 2.6879410660042303e-06,
      "loss": 0.0138,
      "step": 2239400
    },
    {
      "epoch": 3.6648599464530025,
      "grad_norm": 0.5633642673492432,
      "learning_rate": 2.6878751737907135e-06,
      "loss": 0.0131,
      "step": 2239420
    },
    {
      "epoch": 3.6648926768916557,
      "grad_norm": 0.11049684137105942,
      "learning_rate": 2.6878092815771962e-06,
      "loss": 0.0106,
      "step": 2239440
    },
    {
      "epoch": 3.6649254073303092,
      "grad_norm": 0.39749982953071594,
      "learning_rate": 2.687743389363679e-06,
      "loss": 0.0137,
      "step": 2239460
    },
    {
      "epoch": 3.6649581377689624,
      "grad_norm": 0.13720978796482086,
      "learning_rate": 2.6876774971501617e-06,
      "loss": 0.0134,
      "step": 2239480
    },
    {
      "epoch": 3.6649908682076155,
      "grad_norm": 0.20908299088478088,
      "learning_rate": 2.6876116049366444e-06,
      "loss": 0.0088,
      "step": 2239500
    },
    {
      "epoch": 3.665023598646269,
      "grad_norm": 0.3295590877532959,
      "learning_rate": 2.6875457127231276e-06,
      "loss": 0.0088,
      "step": 2239520
    },
    {
      "epoch": 3.6650563290849223,
      "grad_norm": 1.01585054397583,
      "learning_rate": 2.6874798205096108e-06,
      "loss": 0.0116,
      "step": 2239540
    },
    {
      "epoch": 3.665089059523576,
      "grad_norm": 0.24136002361774445,
      "learning_rate": 2.6874139282960935e-06,
      "loss": 0.0193,
      "step": 2239560
    },
    {
      "epoch": 3.665121789962229,
      "grad_norm": 0.12711335718631744,
      "learning_rate": 2.6873480360825767e-06,
      "loss": 0.0074,
      "step": 2239580
    },
    {
      "epoch": 3.6651545204008826,
      "grad_norm": 0.11586876958608627,
      "learning_rate": 2.6872821438690594e-06,
      "loss": 0.014,
      "step": 2239600
    },
    {
      "epoch": 3.665187250839536,
      "grad_norm": 0.4245776832103729,
      "learning_rate": 2.687216251655542e-06,
      "loss": 0.0108,
      "step": 2239620
    },
    {
      "epoch": 3.665219981278189,
      "grad_norm": 0.20017385482788086,
      "learning_rate": 2.687150359442025e-06,
      "loss": 0.0081,
      "step": 2239640
    },
    {
      "epoch": 3.6652527117168425,
      "grad_norm": 0.22039133310317993,
      "learning_rate": 2.687084467228508e-06,
      "loss": 0.0108,
      "step": 2239660
    },
    {
      "epoch": 3.6652854421554957,
      "grad_norm": 0.4174404442310333,
      "learning_rate": 2.687018575014991e-06,
      "loss": 0.0085,
      "step": 2239680
    },
    {
      "epoch": 3.6653181725941493,
      "grad_norm": 0.10713519155979156,
      "learning_rate": 2.6869526828014735e-06,
      "loss": 0.0097,
      "step": 2239700
    },
    {
      "epoch": 3.6653509030328024,
      "grad_norm": 0.13531267642974854,
      "learning_rate": 2.6868867905879563e-06,
      "loss": 0.0107,
      "step": 2239720
    },
    {
      "epoch": 3.665383633471456,
      "grad_norm": 0.053966522216796875,
      "learning_rate": 2.6868208983744394e-06,
      "loss": 0.0084,
      "step": 2239740
    },
    {
      "epoch": 3.665416363910109,
      "grad_norm": 0.1262996643781662,
      "learning_rate": 2.686755006160922e-06,
      "loss": 0.0081,
      "step": 2239760
    },
    {
      "epoch": 3.6654490943487623,
      "grad_norm": 0.15559475123882294,
      "learning_rate": 2.686689113947405e-06,
      "loss": 0.0094,
      "step": 2239780
    },
    {
      "epoch": 3.665481824787416,
      "grad_norm": 0.17249491810798645,
      "learning_rate": 2.6866232217338877e-06,
      "loss": 0.0065,
      "step": 2239800
    },
    {
      "epoch": 3.665514555226069,
      "grad_norm": 0.11740892380475998,
      "learning_rate": 2.686557329520371e-06,
      "loss": 0.0071,
      "step": 2239820
    },
    {
      "epoch": 3.665547285664722,
      "grad_norm": 0.37473857402801514,
      "learning_rate": 2.6864914373068536e-06,
      "loss": 0.01,
      "step": 2239840
    },
    {
      "epoch": 3.665580016103376,
      "grad_norm": 0.25024572014808655,
      "learning_rate": 2.6864255450933363e-06,
      "loss": 0.0122,
      "step": 2239860
    },
    {
      "epoch": 3.6656127465420294,
      "grad_norm": 0.2670612037181854,
      "learning_rate": 2.686359652879819e-06,
      "loss": 0.0062,
      "step": 2239880
    },
    {
      "epoch": 3.6656454769806825,
      "grad_norm": 0.08448166400194168,
      "learning_rate": 2.6862937606663026e-06,
      "loss": 0.0074,
      "step": 2239900
    },
    {
      "epoch": 3.6656782074193357,
      "grad_norm": 0.5179440379142761,
      "learning_rate": 2.6862278684527854e-06,
      "loss": 0.0116,
      "step": 2239920
    },
    {
      "epoch": 3.6657109378579893,
      "grad_norm": 0.1877802163362503,
      "learning_rate": 2.686161976239268e-06,
      "loss": 0.016,
      "step": 2239940
    },
    {
      "epoch": 3.6657436682966424,
      "grad_norm": 0.12433478981256485,
      "learning_rate": 2.6860960840257513e-06,
      "loss": 0.0097,
      "step": 2239960
    },
    {
      "epoch": 3.6657763987352956,
      "grad_norm": 0.06945820152759552,
      "learning_rate": 2.686030191812234e-06,
      "loss": 0.0126,
      "step": 2239980
    },
    {
      "epoch": 3.665809129173949,
      "grad_norm": 0.24014608561992645,
      "learning_rate": 2.6859642995987167e-06,
      "loss": 0.0085,
      "step": 2240000
    },
    {
      "epoch": 3.665841859612603,
      "grad_norm": 0.1976328343153,
      "learning_rate": 2.6858984073851995e-06,
      "loss": 0.0118,
      "step": 2240020
    },
    {
      "epoch": 3.665874590051256,
      "grad_norm": 0.19374342262744904,
      "learning_rate": 2.6858325151716822e-06,
      "loss": 0.0087,
      "step": 2240040
    },
    {
      "epoch": 3.665907320489909,
      "grad_norm": 0.37612012028694153,
      "learning_rate": 2.6857666229581654e-06,
      "loss": 0.0131,
      "step": 2240060
    },
    {
      "epoch": 3.6659400509285627,
      "grad_norm": 0.46941813826560974,
      "learning_rate": 2.685700730744648e-06,
      "loss": 0.012,
      "step": 2240080
    },
    {
      "epoch": 3.665972781367216,
      "grad_norm": 0.19884264469146729,
      "learning_rate": 2.685634838531131e-06,
      "loss": 0.0096,
      "step": 2240100
    },
    {
      "epoch": 3.666005511805869,
      "grad_norm": 0.4949006736278534,
      "learning_rate": 2.6855689463176136e-06,
      "loss": 0.0179,
      "step": 2240120
    },
    {
      "epoch": 3.6660382422445226,
      "grad_norm": 0.10792228579521179,
      "learning_rate": 2.6855030541040968e-06,
      "loss": 0.0071,
      "step": 2240140
    },
    {
      "epoch": 3.666070972683176,
      "grad_norm": 0.2339882105588913,
      "learning_rate": 2.6854371618905795e-06,
      "loss": 0.0128,
      "step": 2240160
    },
    {
      "epoch": 3.6661037031218293,
      "grad_norm": 0.32796338200569153,
      "learning_rate": 2.6853712696770622e-06,
      "loss": 0.0121,
      "step": 2240180
    },
    {
      "epoch": 3.6661364335604825,
      "grad_norm": 0.07893475890159607,
      "learning_rate": 2.685305377463545e-06,
      "loss": 0.0147,
      "step": 2240200
    },
    {
      "epoch": 3.666169163999136,
      "grad_norm": 0.6416136026382446,
      "learning_rate": 2.685239485250028e-06,
      "loss": 0.0159,
      "step": 2240220
    },
    {
      "epoch": 3.666201894437789,
      "grad_norm": 0.41262927651405334,
      "learning_rate": 2.6851735930365113e-06,
      "loss": 0.0095,
      "step": 2240240
    },
    {
      "epoch": 3.6662346248764424,
      "grad_norm": 0.6609415411949158,
      "learning_rate": 2.685107700822994e-06,
      "loss": 0.0115,
      "step": 2240260
    },
    {
      "epoch": 3.666267355315096,
      "grad_norm": 0.23222428560256958,
      "learning_rate": 2.6850418086094772e-06,
      "loss": 0.0074,
      "step": 2240280
    },
    {
      "epoch": 3.6663000857537495,
      "grad_norm": 0.14900842308998108,
      "learning_rate": 2.68497591639596e-06,
      "loss": 0.0145,
      "step": 2240300
    },
    {
      "epoch": 3.6663328161924027,
      "grad_norm": 0.3645379841327667,
      "learning_rate": 2.6849100241824427e-06,
      "loss": 0.0069,
      "step": 2240320
    },
    {
      "epoch": 3.666365546631056,
      "grad_norm": 0.5518047213554382,
      "learning_rate": 2.6848441319689254e-06,
      "loss": 0.0108,
      "step": 2240340
    },
    {
      "epoch": 3.6663982770697094,
      "grad_norm": 0.2955242693424225,
      "learning_rate": 2.6847782397554086e-06,
      "loss": 0.0119,
      "step": 2240360
    },
    {
      "epoch": 3.6664310075083626,
      "grad_norm": 0.6391690969467163,
      "learning_rate": 2.6847123475418913e-06,
      "loss": 0.0113,
      "step": 2240380
    },
    {
      "epoch": 3.6664637379470157,
      "grad_norm": 0.2545182406902313,
      "learning_rate": 2.684646455328374e-06,
      "loss": 0.009,
      "step": 2240400
    },
    {
      "epoch": 3.6664964683856693,
      "grad_norm": 0.14310885965824127,
      "learning_rate": 2.684580563114857e-06,
      "loss": 0.013,
      "step": 2240420
    },
    {
      "epoch": 3.6665291988243225,
      "grad_norm": 0.26573091745376587,
      "learning_rate": 2.6845146709013396e-06,
      "loss": 0.0119,
      "step": 2240440
    },
    {
      "epoch": 3.666561929262976,
      "grad_norm": 0.15793967247009277,
      "learning_rate": 2.6844487786878227e-06,
      "loss": 0.0105,
      "step": 2240460
    },
    {
      "epoch": 3.6665946597016292,
      "grad_norm": 0.10216329991817474,
      "learning_rate": 2.6843828864743055e-06,
      "loss": 0.0093,
      "step": 2240480
    },
    {
      "epoch": 3.666627390140283,
      "grad_norm": 0.507674515247345,
      "learning_rate": 2.684316994260788e-06,
      "loss": 0.0087,
      "step": 2240500
    },
    {
      "epoch": 3.666660120578936,
      "grad_norm": 0.194822758436203,
      "learning_rate": 2.684251102047271e-06,
      "loss": 0.0133,
      "step": 2240520
    },
    {
      "epoch": 3.666692851017589,
      "grad_norm": 0.6137591004371643,
      "learning_rate": 2.684185209833754e-06,
      "loss": 0.0126,
      "step": 2240540
    },
    {
      "epoch": 3.6667255814562427,
      "grad_norm": 0.4767493009567261,
      "learning_rate": 2.684119317620237e-06,
      "loss": 0.0074,
      "step": 2240560
    },
    {
      "epoch": 3.666758311894896,
      "grad_norm": 0.056763626635074615,
      "learning_rate": 2.6840534254067196e-06,
      "loss": 0.0104,
      "step": 2240580
    },
    {
      "epoch": 3.6667910423335495,
      "grad_norm": 0.6356978416442871,
      "learning_rate": 2.683987533193203e-06,
      "loss": 0.0119,
      "step": 2240600
    },
    {
      "epoch": 3.6668237727722026,
      "grad_norm": 0.47558143734931946,
      "learning_rate": 2.683921640979686e-06,
      "loss": 0.0146,
      "step": 2240620
    },
    {
      "epoch": 3.666856503210856,
      "grad_norm": 0.13596183061599731,
      "learning_rate": 2.6838557487661687e-06,
      "loss": 0.0074,
      "step": 2240640
    },
    {
      "epoch": 3.6668892336495094,
      "grad_norm": 0.4422833323478699,
      "learning_rate": 2.6837898565526514e-06,
      "loss": 0.0137,
      "step": 2240660
    },
    {
      "epoch": 3.6669219640881625,
      "grad_norm": 0.22632326185703278,
      "learning_rate": 2.6837239643391346e-06,
      "loss": 0.0088,
      "step": 2240680
    },
    {
      "epoch": 3.666954694526816,
      "grad_norm": 0.893701434135437,
      "learning_rate": 2.6836580721256173e-06,
      "loss": 0.0131,
      "step": 2240700
    },
    {
      "epoch": 3.6669874249654693,
      "grad_norm": 0.08658561110496521,
      "learning_rate": 2.6835921799121e-06,
      "loss": 0.0083,
      "step": 2240720
    },
    {
      "epoch": 3.667020155404123,
      "grad_norm": 0.14865316450595856,
      "learning_rate": 2.6835262876985828e-06,
      "loss": 0.0107,
      "step": 2240740
    },
    {
      "epoch": 3.667052885842776,
      "grad_norm": 0.1958758682012558,
      "learning_rate": 2.683460395485066e-06,
      "loss": 0.0112,
      "step": 2240760
    },
    {
      "epoch": 3.6670856162814296,
      "grad_norm": 0.11104308068752289,
      "learning_rate": 2.6833945032715487e-06,
      "loss": 0.0156,
      "step": 2240780
    },
    {
      "epoch": 3.6671183467200827,
      "grad_norm": 0.4565103352069855,
      "learning_rate": 2.6833286110580314e-06,
      "loss": 0.01,
      "step": 2240800
    },
    {
      "epoch": 3.667151077158736,
      "grad_norm": 0.03221272677183151,
      "learning_rate": 2.683262718844514e-06,
      "loss": 0.0128,
      "step": 2240820
    },
    {
      "epoch": 3.6671838075973895,
      "grad_norm": 0.6293228268623352,
      "learning_rate": 2.6831968266309973e-06,
      "loss": 0.0123,
      "step": 2240840
    },
    {
      "epoch": 3.6672165380360426,
      "grad_norm": 0.1110437735915184,
      "learning_rate": 2.68313093441748e-06,
      "loss": 0.0095,
      "step": 2240860
    },
    {
      "epoch": 3.6672492684746962,
      "grad_norm": 0.24875319004058838,
      "learning_rate": 2.683065042203963e-06,
      "loss": 0.0131,
      "step": 2240880
    },
    {
      "epoch": 3.6672819989133494,
      "grad_norm": 0.1914437860250473,
      "learning_rate": 2.6829991499904455e-06,
      "loss": 0.009,
      "step": 2240900
    },
    {
      "epoch": 3.667314729352003,
      "grad_norm": 0.5002875328063965,
      "learning_rate": 2.6829332577769283e-06,
      "loss": 0.0102,
      "step": 2240920
    },
    {
      "epoch": 3.667347459790656,
      "grad_norm": 0.14854983985424042,
      "learning_rate": 2.6828673655634114e-06,
      "loss": 0.0111,
      "step": 2240940
    },
    {
      "epoch": 3.6673801902293093,
      "grad_norm": 0.6042291522026062,
      "learning_rate": 2.6828014733498946e-06,
      "loss": 0.0132,
      "step": 2240960
    },
    {
      "epoch": 3.667412920667963,
      "grad_norm": 0.48048317432403564,
      "learning_rate": 2.6827355811363773e-06,
      "loss": 0.0145,
      "step": 2240980
    },
    {
      "epoch": 3.667445651106616,
      "grad_norm": 0.30586695671081543,
      "learning_rate": 2.6826696889228605e-06,
      "loss": 0.0105,
      "step": 2241000
    },
    {
      "epoch": 3.6674783815452696,
      "grad_norm": 0.53139328956604,
      "learning_rate": 2.6826037967093432e-06,
      "loss": 0.0104,
      "step": 2241020
    },
    {
      "epoch": 3.6675111119839228,
      "grad_norm": 0.13659796118736267,
      "learning_rate": 2.682537904495826e-06,
      "loss": 0.0079,
      "step": 2241040
    },
    {
      "epoch": 3.6675438424225764,
      "grad_norm": 0.6148696541786194,
      "learning_rate": 2.6824720122823087e-06,
      "loss": 0.0096,
      "step": 2241060
    },
    {
      "epoch": 3.6675765728612295,
      "grad_norm": 0.23398102819919586,
      "learning_rate": 2.682406120068792e-06,
      "loss": 0.0114,
      "step": 2241080
    },
    {
      "epoch": 3.6676093032998827,
      "grad_norm": 0.16849498450756073,
      "learning_rate": 2.6823402278552746e-06,
      "loss": 0.0121,
      "step": 2241100
    },
    {
      "epoch": 3.6676420337385363,
      "grad_norm": 0.2809329032897949,
      "learning_rate": 2.6822743356417574e-06,
      "loss": 0.0092,
      "step": 2241120
    },
    {
      "epoch": 3.6676747641771894,
      "grad_norm": 0.41185498237609863,
      "learning_rate": 2.68220844342824e-06,
      "loss": 0.01,
      "step": 2241140
    },
    {
      "epoch": 3.667707494615843,
      "grad_norm": 0.6266224384307861,
      "learning_rate": 2.6821425512147233e-06,
      "loss": 0.0153,
      "step": 2241160
    },
    {
      "epoch": 3.667740225054496,
      "grad_norm": 0.3021058738231659,
      "learning_rate": 2.682076659001206e-06,
      "loss": 0.013,
      "step": 2241180
    },
    {
      "epoch": 3.6677729554931497,
      "grad_norm": 0.24622048437595367,
      "learning_rate": 2.6820107667876888e-06,
      "loss": 0.0158,
      "step": 2241200
    },
    {
      "epoch": 3.667805685931803,
      "grad_norm": 0.22079400718212128,
      "learning_rate": 2.6819448745741715e-06,
      "loss": 0.0135,
      "step": 2241220
    },
    {
      "epoch": 3.667838416370456,
      "grad_norm": 0.056109022349119186,
      "learning_rate": 2.6818789823606547e-06,
      "loss": 0.0112,
      "step": 2241240
    },
    {
      "epoch": 3.6678711468091096,
      "grad_norm": 0.18062593042850494,
      "learning_rate": 2.6818130901471374e-06,
      "loss": 0.0112,
      "step": 2241260
    },
    {
      "epoch": 3.667903877247763,
      "grad_norm": 0.1972602903842926,
      "learning_rate": 2.68174719793362e-06,
      "loss": 0.0081,
      "step": 2241280
    },
    {
      "epoch": 3.667936607686416,
      "grad_norm": 0.29316845536231995,
      "learning_rate": 2.6816813057201037e-06,
      "loss": 0.0099,
      "step": 2241300
    },
    {
      "epoch": 3.6679693381250695,
      "grad_norm": 0.630573034286499,
      "learning_rate": 2.6816154135065865e-06,
      "loss": 0.0093,
      "step": 2241320
    },
    {
      "epoch": 3.668002068563723,
      "grad_norm": 0.2934875190258026,
      "learning_rate": 2.681549521293069e-06,
      "loss": 0.0105,
      "step": 2241340
    },
    {
      "epoch": 3.6680347990023763,
      "grad_norm": 0.12953969836235046,
      "learning_rate": 2.681483629079552e-06,
      "loss": 0.0082,
      "step": 2241360
    },
    {
      "epoch": 3.6680675294410294,
      "grad_norm": 0.1676063984632492,
      "learning_rate": 2.681417736866035e-06,
      "loss": 0.0117,
      "step": 2241380
    },
    {
      "epoch": 3.668100259879683,
      "grad_norm": 0.382710337638855,
      "learning_rate": 2.681351844652518e-06,
      "loss": 0.0137,
      "step": 2241400
    },
    {
      "epoch": 3.668132990318336,
      "grad_norm": 0.22337153553962708,
      "learning_rate": 2.6812859524390006e-06,
      "loss": 0.0123,
      "step": 2241420
    },
    {
      "epoch": 3.6681657207569893,
      "grad_norm": 0.09576129913330078,
      "learning_rate": 2.6812200602254833e-06,
      "loss": 0.0069,
      "step": 2241440
    },
    {
      "epoch": 3.668198451195643,
      "grad_norm": 0.11810752749443054,
      "learning_rate": 2.681154168011966e-06,
      "loss": 0.0101,
      "step": 2241460
    },
    {
      "epoch": 3.6682311816342965,
      "grad_norm": 0.21539616584777832,
      "learning_rate": 2.6810882757984492e-06,
      "loss": 0.0137,
      "step": 2241480
    },
    {
      "epoch": 3.6682639120729497,
      "grad_norm": 0.3100888431072235,
      "learning_rate": 2.681022383584932e-06,
      "loss": 0.0118,
      "step": 2241500
    },
    {
      "epoch": 3.668296642511603,
      "grad_norm": 0.2947918176651001,
      "learning_rate": 2.6809564913714147e-06,
      "loss": 0.0109,
      "step": 2241520
    },
    {
      "epoch": 3.6683293729502564,
      "grad_norm": 0.2301645576953888,
      "learning_rate": 2.6808905991578974e-06,
      "loss": 0.0063,
      "step": 2241540
    },
    {
      "epoch": 3.6683621033889096,
      "grad_norm": 0.18704719841480255,
      "learning_rate": 2.6808247069443806e-06,
      "loss": 0.0109,
      "step": 2241560
    },
    {
      "epoch": 3.6683948338275627,
      "grad_norm": 0.5917717814445496,
      "learning_rate": 2.6807588147308633e-06,
      "loss": 0.0128,
      "step": 2241580
    },
    {
      "epoch": 3.6684275642662163,
      "grad_norm": 0.16409119963645935,
      "learning_rate": 2.680692922517346e-06,
      "loss": 0.0077,
      "step": 2241600
    },
    {
      "epoch": 3.66846029470487,
      "grad_norm": 0.4583916664123535,
      "learning_rate": 2.680627030303829e-06,
      "loss": 0.0106,
      "step": 2241620
    },
    {
      "epoch": 3.668493025143523,
      "grad_norm": 0.049127884209156036,
      "learning_rate": 2.680561138090312e-06,
      "loss": 0.0082,
      "step": 2241640
    },
    {
      "epoch": 3.668525755582176,
      "grad_norm": 0.13827523589134216,
      "learning_rate": 2.680495245876795e-06,
      "loss": 0.0165,
      "step": 2241660
    },
    {
      "epoch": 3.66855848602083,
      "grad_norm": 0.17335806787014008,
      "learning_rate": 2.680429353663278e-06,
      "loss": 0.0076,
      "step": 2241680
    },
    {
      "epoch": 3.668591216459483,
      "grad_norm": 0.26154810190200806,
      "learning_rate": 2.680363461449761e-06,
      "loss": 0.0129,
      "step": 2241700
    },
    {
      "epoch": 3.668623946898136,
      "grad_norm": 0.36931100487709045,
      "learning_rate": 2.680297569236244e-06,
      "loss": 0.0114,
      "step": 2241720
    },
    {
      "epoch": 3.6686566773367897,
      "grad_norm": 0.2550394833087921,
      "learning_rate": 2.6802316770227265e-06,
      "loss": 0.0144,
      "step": 2241740
    },
    {
      "epoch": 3.6686894077754433,
      "grad_norm": 0.666883647441864,
      "learning_rate": 2.6801657848092093e-06,
      "loss": 0.0187,
      "step": 2241760
    },
    {
      "epoch": 3.6687221382140964,
      "grad_norm": 0.24789300560951233,
      "learning_rate": 2.6800998925956924e-06,
      "loss": 0.0116,
      "step": 2241780
    },
    {
      "epoch": 3.6687548686527496,
      "grad_norm": 0.16661915183067322,
      "learning_rate": 2.680034000382175e-06,
      "loss": 0.0098,
      "step": 2241800
    },
    {
      "epoch": 3.668787599091403,
      "grad_norm": 0.30303677916526794,
      "learning_rate": 2.679968108168658e-06,
      "loss": 0.0094,
      "step": 2241820
    },
    {
      "epoch": 3.6688203295300563,
      "grad_norm": 0.1337846964597702,
      "learning_rate": 2.6799022159551407e-06,
      "loss": 0.0091,
      "step": 2241840
    },
    {
      "epoch": 3.6688530599687095,
      "grad_norm": 0.07773445546627045,
      "learning_rate": 2.679836323741624e-06,
      "loss": 0.0095,
      "step": 2241860
    },
    {
      "epoch": 3.668885790407363,
      "grad_norm": 0.4088793396949768,
      "learning_rate": 2.6797704315281066e-06,
      "loss": 0.0112,
      "step": 2241880
    },
    {
      "epoch": 3.668918520846016,
      "grad_norm": 0.16037027537822723,
      "learning_rate": 2.6797045393145893e-06,
      "loss": 0.0078,
      "step": 2241900
    },
    {
      "epoch": 3.66895125128467,
      "grad_norm": 0.13740205764770508,
      "learning_rate": 2.679638647101072e-06,
      "loss": 0.0106,
      "step": 2241920
    },
    {
      "epoch": 3.668983981723323,
      "grad_norm": 0.23541133105754852,
      "learning_rate": 2.6795727548875548e-06,
      "loss": 0.0129,
      "step": 2241940
    },
    {
      "epoch": 3.6690167121619766,
      "grad_norm": 0.1711287498474121,
      "learning_rate": 2.679506862674038e-06,
      "loss": 0.0135,
      "step": 2241960
    },
    {
      "epoch": 3.6690494426006297,
      "grad_norm": 0.22572076320648193,
      "learning_rate": 2.6794409704605207e-06,
      "loss": 0.0105,
      "step": 2241980
    },
    {
      "epoch": 3.669082173039283,
      "grad_norm": 0.4267139434814453,
      "learning_rate": 2.679375078247004e-06,
      "loss": 0.0153,
      "step": 2242000
    },
    {
      "epoch": 3.6691149034779365,
      "grad_norm": 0.18387746810913086,
      "learning_rate": 2.679309186033487e-06,
      "loss": 0.0148,
      "step": 2242020
    },
    {
      "epoch": 3.6691476339165896,
      "grad_norm": 0.20664234459400177,
      "learning_rate": 2.6792432938199698e-06,
      "loss": 0.0087,
      "step": 2242040
    },
    {
      "epoch": 3.669180364355243,
      "grad_norm": 0.29077643156051636,
      "learning_rate": 2.6791774016064525e-06,
      "loss": 0.0127,
      "step": 2242060
    },
    {
      "epoch": 3.6692130947938963,
      "grad_norm": 0.3625011146068573,
      "learning_rate": 2.6791115093929352e-06,
      "loss": 0.0152,
      "step": 2242080
    },
    {
      "epoch": 3.66924582523255,
      "grad_norm": 0.21570199728012085,
      "learning_rate": 2.6790456171794184e-06,
      "loss": 0.0115,
      "step": 2242100
    },
    {
      "epoch": 3.669278555671203,
      "grad_norm": 0.1843646764755249,
      "learning_rate": 2.678979724965901e-06,
      "loss": 0.0134,
      "step": 2242120
    },
    {
      "epoch": 3.6693112861098562,
      "grad_norm": 0.15666881203651428,
      "learning_rate": 2.678913832752384e-06,
      "loss": 0.0109,
      "step": 2242140
    },
    {
      "epoch": 3.66934401654851,
      "grad_norm": 0.214115709066391,
      "learning_rate": 2.6788479405388666e-06,
      "loss": 0.0082,
      "step": 2242160
    },
    {
      "epoch": 3.669376746987163,
      "grad_norm": 0.19401460886001587,
      "learning_rate": 2.6787820483253498e-06,
      "loss": 0.0116,
      "step": 2242180
    },
    {
      "epoch": 3.6694094774258166,
      "grad_norm": 0.11053063720464706,
      "learning_rate": 2.6787161561118325e-06,
      "loss": 0.0113,
      "step": 2242200
    },
    {
      "epoch": 3.6694422078644697,
      "grad_norm": 0.4623991549015045,
      "learning_rate": 2.6786502638983153e-06,
      "loss": 0.0129,
      "step": 2242220
    },
    {
      "epoch": 3.6694749383031233,
      "grad_norm": 0.25479331612586975,
      "learning_rate": 2.678584371684798e-06,
      "loss": 0.0113,
      "step": 2242240
    },
    {
      "epoch": 3.6695076687417765,
      "grad_norm": 0.23732997477054596,
      "learning_rate": 2.678518479471281e-06,
      "loss": 0.0097,
      "step": 2242260
    },
    {
      "epoch": 3.6695403991804296,
      "grad_norm": 0.24952654540538788,
      "learning_rate": 2.678452587257764e-06,
      "loss": 0.0075,
      "step": 2242280
    },
    {
      "epoch": 3.669573129619083,
      "grad_norm": 0.10908644646406174,
      "learning_rate": 2.6783866950442466e-06,
      "loss": 0.01,
      "step": 2242300
    },
    {
      "epoch": 3.6696058600577364,
      "grad_norm": 0.061862293630838394,
      "learning_rate": 2.6783208028307294e-06,
      "loss": 0.0103,
      "step": 2242320
    },
    {
      "epoch": 3.66963859049639,
      "grad_norm": 0.20387880504131317,
      "learning_rate": 2.678254910617212e-06,
      "loss": 0.0116,
      "step": 2242340
    },
    {
      "epoch": 3.669671320935043,
      "grad_norm": 0.2640540599822998,
      "learning_rate": 2.6781890184036957e-06,
      "loss": 0.0093,
      "step": 2242360
    },
    {
      "epoch": 3.6697040513736967,
      "grad_norm": 0.40618422627449036,
      "learning_rate": 2.6781231261901784e-06,
      "loss": 0.0108,
      "step": 2242380
    },
    {
      "epoch": 3.66973678181235,
      "grad_norm": 0.5311495065689087,
      "learning_rate": 2.6780572339766616e-06,
      "loss": 0.0092,
      "step": 2242400
    },
    {
      "epoch": 3.669769512251003,
      "grad_norm": 0.5123701691627502,
      "learning_rate": 2.6779913417631443e-06,
      "loss": 0.0086,
      "step": 2242420
    },
    {
      "epoch": 3.6698022426896566,
      "grad_norm": 0.33850935101509094,
      "learning_rate": 2.677925449549627e-06,
      "loss": 0.0109,
      "step": 2242440
    },
    {
      "epoch": 3.6698349731283098,
      "grad_norm": 0.1667724996805191,
      "learning_rate": 2.67785955733611e-06,
      "loss": 0.0126,
      "step": 2242460
    },
    {
      "epoch": 3.6698677035669633,
      "grad_norm": 0.6295790076255798,
      "learning_rate": 2.6777936651225926e-06,
      "loss": 0.0092,
      "step": 2242480
    },
    {
      "epoch": 3.6699004340056165,
      "grad_norm": 0.26908186078071594,
      "learning_rate": 2.6777277729090757e-06,
      "loss": 0.0118,
      "step": 2242500
    },
    {
      "epoch": 3.66993316444427,
      "grad_norm": 0.08534133434295654,
      "learning_rate": 2.6776618806955585e-06,
      "loss": 0.0097,
      "step": 2242520
    },
    {
      "epoch": 3.6699658948829232,
      "grad_norm": 0.2690490186214447,
      "learning_rate": 2.677595988482041e-06,
      "loss": 0.0153,
      "step": 2242540
    },
    {
      "epoch": 3.6699986253215764,
      "grad_norm": 0.17368006706237793,
      "learning_rate": 2.677530096268524e-06,
      "loss": 0.0115,
      "step": 2242560
    },
    {
      "epoch": 3.67003135576023,
      "grad_norm": 0.1754184365272522,
      "learning_rate": 2.677464204055007e-06,
      "loss": 0.0114,
      "step": 2242580
    },
    {
      "epoch": 3.670064086198883,
      "grad_norm": 0.47379744052886963,
      "learning_rate": 2.67739831184149e-06,
      "loss": 0.0108,
      "step": 2242600
    },
    {
      "epoch": 3.6700968166375367,
      "grad_norm": 0.2574459910392761,
      "learning_rate": 2.6773324196279726e-06,
      "loss": 0.0119,
      "step": 2242620
    },
    {
      "epoch": 3.67012954707619,
      "grad_norm": 0.20959216356277466,
      "learning_rate": 2.6772665274144553e-06,
      "loss": 0.0124,
      "step": 2242640
    },
    {
      "epoch": 3.6701622775148435,
      "grad_norm": 0.1423707753419876,
      "learning_rate": 2.6772006352009385e-06,
      "loss": 0.0103,
      "step": 2242660
    },
    {
      "epoch": 3.6701950079534966,
      "grad_norm": 0.2203105092048645,
      "learning_rate": 2.6771347429874212e-06,
      "loss": 0.0065,
      "step": 2242680
    },
    {
      "epoch": 3.6702277383921498,
      "grad_norm": 0.08920983225107193,
      "learning_rate": 2.677068850773904e-06,
      "loss": 0.0076,
      "step": 2242700
    },
    {
      "epoch": 3.6702604688308034,
      "grad_norm": 0.30949634313583374,
      "learning_rate": 2.6770029585603876e-06,
      "loss": 0.016,
      "step": 2242720
    },
    {
      "epoch": 3.6702931992694565,
      "grad_norm": 0.3746282458305359,
      "learning_rate": 2.6769370663468703e-06,
      "loss": 0.0088,
      "step": 2242740
    },
    {
      "epoch": 3.67032592970811,
      "grad_norm": 0.0946722999215126,
      "learning_rate": 2.676871174133353e-06,
      "loss": 0.0084,
      "step": 2242760
    },
    {
      "epoch": 3.6703586601467633,
      "grad_norm": 0.522819995880127,
      "learning_rate": 2.6768052819198358e-06,
      "loss": 0.0152,
      "step": 2242780
    },
    {
      "epoch": 3.670391390585417,
      "grad_norm": 0.30385246872901917,
      "learning_rate": 2.676739389706319e-06,
      "loss": 0.0113,
      "step": 2242800
    },
    {
      "epoch": 3.67042412102407,
      "grad_norm": 0.7525188326835632,
      "learning_rate": 2.6766734974928017e-06,
      "loss": 0.0159,
      "step": 2242820
    },
    {
      "epoch": 3.670456851462723,
      "grad_norm": 0.5871658325195312,
      "learning_rate": 2.6766076052792844e-06,
      "loss": 0.0186,
      "step": 2242840
    },
    {
      "epoch": 3.6704895819013768,
      "grad_norm": 0.13301795721054077,
      "learning_rate": 2.676541713065767e-06,
      "loss": 0.0114,
      "step": 2242860
    },
    {
      "epoch": 3.67052231234003,
      "grad_norm": 1.152224063873291,
      "learning_rate": 2.67647582085225e-06,
      "loss": 0.0107,
      "step": 2242880
    },
    {
      "epoch": 3.670555042778683,
      "grad_norm": 0.5616565346717834,
      "learning_rate": 2.676409928638733e-06,
      "loss": 0.0102,
      "step": 2242900
    },
    {
      "epoch": 3.6705877732173366,
      "grad_norm": 0.1473562866449356,
      "learning_rate": 2.676344036425216e-06,
      "loss": 0.0114,
      "step": 2242920
    },
    {
      "epoch": 3.6706205036559902,
      "grad_norm": 0.22501912713050842,
      "learning_rate": 2.6762781442116985e-06,
      "loss": 0.0095,
      "step": 2242940
    },
    {
      "epoch": 3.6706532340946434,
      "grad_norm": 0.1817188560962677,
      "learning_rate": 2.6762122519981813e-06,
      "loss": 0.0123,
      "step": 2242960
    },
    {
      "epoch": 3.6706859645332965,
      "grad_norm": 0.27186110615730286,
      "learning_rate": 2.6761463597846644e-06,
      "loss": 0.0121,
      "step": 2242980
    },
    {
      "epoch": 3.67071869497195,
      "grad_norm": 0.1752256453037262,
      "learning_rate": 2.676080467571147e-06,
      "loss": 0.0122,
      "step": 2243000
    },
    {
      "epoch": 3.6707514254106033,
      "grad_norm": 0.09956784546375275,
      "learning_rate": 2.67601457535763e-06,
      "loss": 0.0076,
      "step": 2243020
    },
    {
      "epoch": 3.6707841558492564,
      "grad_norm": 0.1800607442855835,
      "learning_rate": 2.6759486831441127e-06,
      "loss": 0.0164,
      "step": 2243040
    },
    {
      "epoch": 3.67081688628791,
      "grad_norm": 0.6548409461975098,
      "learning_rate": 2.6758827909305963e-06,
      "loss": 0.0131,
      "step": 2243060
    },
    {
      "epoch": 3.6708496167265636,
      "grad_norm": 0.5497658848762512,
      "learning_rate": 2.675816898717079e-06,
      "loss": 0.0144,
      "step": 2243080
    },
    {
      "epoch": 3.6708823471652168,
      "grad_norm": 0.203933447599411,
      "learning_rate": 2.6757510065035617e-06,
      "loss": 0.0128,
      "step": 2243100
    },
    {
      "epoch": 3.67091507760387,
      "grad_norm": 0.1598498672246933,
      "learning_rate": 2.675685114290045e-06,
      "loss": 0.0089,
      "step": 2243120
    },
    {
      "epoch": 3.6709478080425235,
      "grad_norm": 0.6970047950744629,
      "learning_rate": 2.6756192220765276e-06,
      "loss": 0.0154,
      "step": 2243140
    },
    {
      "epoch": 3.6709805384811767,
      "grad_norm": 0.12416408956050873,
      "learning_rate": 2.6755533298630104e-06,
      "loss": 0.0134,
      "step": 2243160
    },
    {
      "epoch": 3.67101326891983,
      "grad_norm": 0.2781127095222473,
      "learning_rate": 2.675487437649493e-06,
      "loss": 0.0097,
      "step": 2243180
    },
    {
      "epoch": 3.6710459993584834,
      "grad_norm": 0.5469699501991272,
      "learning_rate": 2.6754215454359763e-06,
      "loss": 0.0202,
      "step": 2243200
    },
    {
      "epoch": 3.671078729797137,
      "grad_norm": 0.0822184756398201,
      "learning_rate": 2.675355653222459e-06,
      "loss": 0.008,
      "step": 2243220
    },
    {
      "epoch": 3.67111146023579,
      "grad_norm": 0.05555294081568718,
      "learning_rate": 2.6752897610089418e-06,
      "loss": 0.01,
      "step": 2243240
    },
    {
      "epoch": 3.6711441906744433,
      "grad_norm": 0.10435072332620621,
      "learning_rate": 2.6752238687954245e-06,
      "loss": 0.008,
      "step": 2243260
    },
    {
      "epoch": 3.671176921113097,
      "grad_norm": 1.1648030281066895,
      "learning_rate": 2.6751579765819077e-06,
      "loss": 0.019,
      "step": 2243280
    },
    {
      "epoch": 3.67120965155175,
      "grad_norm": 0.4692387878894806,
      "learning_rate": 2.6750920843683904e-06,
      "loss": 0.0089,
      "step": 2243300
    },
    {
      "epoch": 3.671242381990403,
      "grad_norm": 0.1351611614227295,
      "learning_rate": 2.675026192154873e-06,
      "loss": 0.0121,
      "step": 2243320
    },
    {
      "epoch": 3.671275112429057,
      "grad_norm": 0.3647139072418213,
      "learning_rate": 2.674960299941356e-06,
      "loss": 0.0154,
      "step": 2243340
    },
    {
      "epoch": 3.67130784286771,
      "grad_norm": 0.19598445296287537,
      "learning_rate": 2.6748944077278386e-06,
      "loss": 0.0142,
      "step": 2243360
    },
    {
      "epoch": 3.6713405733063635,
      "grad_norm": 0.2764661908149719,
      "learning_rate": 2.6748285155143218e-06,
      "loss": 0.0129,
      "step": 2243380
    },
    {
      "epoch": 3.6713733037450167,
      "grad_norm": 0.18685317039489746,
      "learning_rate": 2.6747626233008045e-06,
      "loss": 0.009,
      "step": 2243400
    },
    {
      "epoch": 3.6714060341836703,
      "grad_norm": 0.1251211166381836,
      "learning_rate": 2.6746967310872877e-06,
      "loss": 0.0175,
      "step": 2243420
    },
    {
      "epoch": 3.6714387646223234,
      "grad_norm": 0.21042987704277039,
      "learning_rate": 2.674630838873771e-06,
      "loss": 0.0105,
      "step": 2243440
    },
    {
      "epoch": 3.6714714950609766,
      "grad_norm": 0.051644518971443176,
      "learning_rate": 2.6745649466602536e-06,
      "loss": 0.0079,
      "step": 2243460
    },
    {
      "epoch": 3.67150422549963,
      "grad_norm": 0.47112706303596497,
      "learning_rate": 2.6744990544467363e-06,
      "loss": 0.0134,
      "step": 2243480
    },
    {
      "epoch": 3.6715369559382833,
      "grad_norm": 0.5615781545639038,
      "learning_rate": 2.674433162233219e-06,
      "loss": 0.0095,
      "step": 2243500
    },
    {
      "epoch": 3.671569686376937,
      "grad_norm": 0.8164483308792114,
      "learning_rate": 2.6743672700197022e-06,
      "loss": 0.013,
      "step": 2243520
    },
    {
      "epoch": 3.67160241681559,
      "grad_norm": 0.33057981729507446,
      "learning_rate": 2.674301377806185e-06,
      "loss": 0.0105,
      "step": 2243540
    },
    {
      "epoch": 3.6716351472542437,
      "grad_norm": 0.18026886880397797,
      "learning_rate": 2.6742354855926677e-06,
      "loss": 0.0104,
      "step": 2243560
    },
    {
      "epoch": 3.671667877692897,
      "grad_norm": 0.3008505702018738,
      "learning_rate": 2.6741695933791505e-06,
      "loss": 0.0168,
      "step": 2243580
    },
    {
      "epoch": 3.67170060813155,
      "grad_norm": 0.4244263470172882,
      "learning_rate": 2.6741037011656336e-06,
      "loss": 0.0106,
      "step": 2243600
    },
    {
      "epoch": 3.6717333385702036,
      "grad_norm": 0.18315687775611877,
      "learning_rate": 2.6740378089521164e-06,
      "loss": 0.01,
      "step": 2243620
    },
    {
      "epoch": 3.6717660690088567,
      "grad_norm": 0.8756853938102722,
      "learning_rate": 2.673971916738599e-06,
      "loss": 0.0133,
      "step": 2243640
    },
    {
      "epoch": 3.6717987994475103,
      "grad_norm": 0.4065700173377991,
      "learning_rate": 2.673906024525082e-06,
      "loss": 0.0125,
      "step": 2243660
    },
    {
      "epoch": 3.6718315298861635,
      "grad_norm": 0.14180384576320648,
      "learning_rate": 2.673840132311565e-06,
      "loss": 0.0123,
      "step": 2243680
    },
    {
      "epoch": 3.671864260324817,
      "grad_norm": 0.2713117301464081,
      "learning_rate": 2.6737742400980477e-06,
      "loss": 0.0116,
      "step": 2243700
    },
    {
      "epoch": 3.67189699076347,
      "grad_norm": 0.10233204066753387,
      "learning_rate": 2.6737083478845305e-06,
      "loss": 0.0103,
      "step": 2243720
    },
    {
      "epoch": 3.6719297212021234,
      "grad_norm": 0.08524105697870255,
      "learning_rate": 2.6736424556710132e-06,
      "loss": 0.0095,
      "step": 2243740
    },
    {
      "epoch": 3.671962451640777,
      "grad_norm": 0.4384132921695709,
      "learning_rate": 2.673576563457497e-06,
      "loss": 0.0096,
      "step": 2243760
    },
    {
      "epoch": 3.67199518207943,
      "grad_norm": 0.1306292861700058,
      "learning_rate": 2.6735106712439795e-06,
      "loss": 0.0096,
      "step": 2243780
    },
    {
      "epoch": 3.6720279125180837,
      "grad_norm": 0.13226209580898285,
      "learning_rate": 2.6734447790304623e-06,
      "loss": 0.0144,
      "step": 2243800
    },
    {
      "epoch": 3.672060642956737,
      "grad_norm": 0.2144678682088852,
      "learning_rate": 2.6733788868169454e-06,
      "loss": 0.0086,
      "step": 2243820
    },
    {
      "epoch": 3.6720933733953904,
      "grad_norm": 0.07920260727405548,
      "learning_rate": 2.673312994603428e-06,
      "loss": 0.0066,
      "step": 2243840
    },
    {
      "epoch": 3.6721261038340436,
      "grad_norm": 0.20296993851661682,
      "learning_rate": 2.673247102389911e-06,
      "loss": 0.0156,
      "step": 2243860
    },
    {
      "epoch": 3.6721588342726967,
      "grad_norm": 0.08380518853664398,
      "learning_rate": 2.6731812101763937e-06,
      "loss": 0.0106,
      "step": 2243880
    },
    {
      "epoch": 3.6721915647113503,
      "grad_norm": 0.2200068086385727,
      "learning_rate": 2.6731153179628764e-06,
      "loss": 0.014,
      "step": 2243900
    },
    {
      "epoch": 3.6722242951500035,
      "grad_norm": 0.7951085567474365,
      "learning_rate": 2.6730494257493596e-06,
      "loss": 0.0088,
      "step": 2243920
    },
    {
      "epoch": 3.672257025588657,
      "grad_norm": 0.500948965549469,
      "learning_rate": 2.6729835335358423e-06,
      "loss": 0.0096,
      "step": 2243940
    },
    {
      "epoch": 3.6722897560273102,
      "grad_norm": 0.5689599514007568,
      "learning_rate": 2.672917641322325e-06,
      "loss": 0.0094,
      "step": 2243960
    },
    {
      "epoch": 3.672322486465964,
      "grad_norm": 0.48260965943336487,
      "learning_rate": 2.6728517491088078e-06,
      "loss": 0.0148,
      "step": 2243980
    },
    {
      "epoch": 3.672355216904617,
      "grad_norm": 0.27944058179855347,
      "learning_rate": 2.672785856895291e-06,
      "loss": 0.0104,
      "step": 2244000
    },
    {
      "epoch": 3.67238794734327,
      "grad_norm": 2.7428030967712402,
      "learning_rate": 2.6727199646817737e-06,
      "loss": 0.015,
      "step": 2244020
    },
    {
      "epoch": 3.6724206777819237,
      "grad_norm": 0.36958909034729004,
      "learning_rate": 2.6726540724682564e-06,
      "loss": 0.0114,
      "step": 2244040
    },
    {
      "epoch": 3.672453408220577,
      "grad_norm": 0.36360907554626465,
      "learning_rate": 2.672588180254739e-06,
      "loss": 0.0121,
      "step": 2244060
    },
    {
      "epoch": 3.6724861386592305,
      "grad_norm": 0.8394021391868591,
      "learning_rate": 2.6725222880412223e-06,
      "loss": 0.015,
      "step": 2244080
    },
    {
      "epoch": 3.6725188690978836,
      "grad_norm": 0.28300943970680237,
      "learning_rate": 2.672456395827705e-06,
      "loss": 0.0107,
      "step": 2244100
    },
    {
      "epoch": 3.672551599536537,
      "grad_norm": 0.7699063420295715,
      "learning_rate": 2.6723905036141882e-06,
      "loss": 0.0167,
      "step": 2244120
    },
    {
      "epoch": 3.6725843299751904,
      "grad_norm": 0.4211258292198181,
      "learning_rate": 2.6723246114006714e-06,
      "loss": 0.0097,
      "step": 2244140
    },
    {
      "epoch": 3.6726170604138435,
      "grad_norm": 0.0865422859787941,
      "learning_rate": 2.672258719187154e-06,
      "loss": 0.0137,
      "step": 2244160
    },
    {
      "epoch": 3.672649790852497,
      "grad_norm": 0.20202840864658356,
      "learning_rate": 2.672192826973637e-06,
      "loss": 0.0083,
      "step": 2244180
    },
    {
      "epoch": 3.6726825212911502,
      "grad_norm": 0.46237146854400635,
      "learning_rate": 2.6721269347601196e-06,
      "loss": 0.0113,
      "step": 2244200
    },
    {
      "epoch": 3.672715251729804,
      "grad_norm": 0.18015150725841522,
      "learning_rate": 2.6720610425466028e-06,
      "loss": 0.0105,
      "step": 2244220
    },
    {
      "epoch": 3.672747982168457,
      "grad_norm": 0.3821141719818115,
      "learning_rate": 2.6719951503330855e-06,
      "loss": 0.0098,
      "step": 2244240
    },
    {
      "epoch": 3.6727807126071106,
      "grad_norm": 0.1794956773519516,
      "learning_rate": 2.6719292581195683e-06,
      "loss": 0.0138,
      "step": 2244260
    },
    {
      "epoch": 3.6728134430457637,
      "grad_norm": 0.24509243667125702,
      "learning_rate": 2.671863365906051e-06,
      "loss": 0.0152,
      "step": 2244280
    },
    {
      "epoch": 3.672846173484417,
      "grad_norm": 0.15773531794548035,
      "learning_rate": 2.6717974736925337e-06,
      "loss": 0.0095,
      "step": 2244300
    },
    {
      "epoch": 3.6728789039230705,
      "grad_norm": 0.7741701006889343,
      "learning_rate": 2.671731581479017e-06,
      "loss": 0.0132,
      "step": 2244320
    },
    {
      "epoch": 3.6729116343617236,
      "grad_norm": 0.628659725189209,
      "learning_rate": 2.6716656892654996e-06,
      "loss": 0.0103,
      "step": 2244340
    },
    {
      "epoch": 3.672944364800377,
      "grad_norm": 0.5761454701423645,
      "learning_rate": 2.6715997970519824e-06,
      "loss": 0.0106,
      "step": 2244360
    },
    {
      "epoch": 3.6729770952390304,
      "grad_norm": 0.48195675015449524,
      "learning_rate": 2.671533904838465e-06,
      "loss": 0.0098,
      "step": 2244380
    },
    {
      "epoch": 3.673009825677684,
      "grad_norm": 0.04265186935663223,
      "learning_rate": 2.6714680126249483e-06,
      "loss": 0.0118,
      "step": 2244400
    },
    {
      "epoch": 3.673042556116337,
      "grad_norm": 0.159011572599411,
      "learning_rate": 2.671402120411431e-06,
      "loss": 0.011,
      "step": 2244420
    },
    {
      "epoch": 3.6730752865549903,
      "grad_norm": 0.061567191034555435,
      "learning_rate": 2.6713362281979138e-06,
      "loss": 0.0118,
      "step": 2244440
    },
    {
      "epoch": 3.673108016993644,
      "grad_norm": 0.0702178105711937,
      "learning_rate": 2.6712703359843965e-06,
      "loss": 0.0154,
      "step": 2244460
    },
    {
      "epoch": 3.673140747432297,
      "grad_norm": 0.619882345199585,
      "learning_rate": 2.67120444377088e-06,
      "loss": 0.0145,
      "step": 2244480
    },
    {
      "epoch": 3.67317347787095,
      "grad_norm": 0.8207880258560181,
      "learning_rate": 2.671138551557363e-06,
      "loss": 0.013,
      "step": 2244500
    },
    {
      "epoch": 3.6732062083096038,
      "grad_norm": 0.15053333342075348,
      "learning_rate": 2.6710726593438456e-06,
      "loss": 0.0082,
      "step": 2244520
    },
    {
      "epoch": 3.6732389387482574,
      "grad_norm": 0.2622227072715759,
      "learning_rate": 2.6710067671303287e-06,
      "loss": 0.0118,
      "step": 2244540
    },
    {
      "epoch": 3.6732716691869105,
      "grad_norm": 0.3741827607154846,
      "learning_rate": 2.6709408749168115e-06,
      "loss": 0.0101,
      "step": 2244560
    },
    {
      "epoch": 3.6733043996255637,
      "grad_norm": 0.6605943441390991,
      "learning_rate": 2.6708749827032942e-06,
      "loss": 0.0127,
      "step": 2244580
    },
    {
      "epoch": 3.6733371300642172,
      "grad_norm": 0.779982328414917,
      "learning_rate": 2.670809090489777e-06,
      "loss": 0.0092,
      "step": 2244600
    },
    {
      "epoch": 3.6733698605028704,
      "grad_norm": 0.13117244839668274,
      "learning_rate": 2.67074319827626e-06,
      "loss": 0.0129,
      "step": 2244620
    },
    {
      "epoch": 3.6734025909415235,
      "grad_norm": 0.3586468994617462,
      "learning_rate": 2.670677306062743e-06,
      "loss": 0.0104,
      "step": 2244640
    },
    {
      "epoch": 3.673435321380177,
      "grad_norm": 0.37502798438072205,
      "learning_rate": 2.6706114138492256e-06,
      "loss": 0.0125,
      "step": 2244660
    },
    {
      "epoch": 3.6734680518188307,
      "grad_norm": 0.19750598073005676,
      "learning_rate": 2.6705455216357083e-06,
      "loss": 0.0101,
      "step": 2244680
    },
    {
      "epoch": 3.673500782257484,
      "grad_norm": 0.5521093010902405,
      "learning_rate": 2.6704796294221915e-06,
      "loss": 0.011,
      "step": 2244700
    },
    {
      "epoch": 3.673533512696137,
      "grad_norm": 0.19369542598724365,
      "learning_rate": 2.6704137372086742e-06,
      "loss": 0.009,
      "step": 2244720
    },
    {
      "epoch": 3.6735662431347906,
      "grad_norm": 0.10905225574970245,
      "learning_rate": 2.670347844995157e-06,
      "loss": 0.0069,
      "step": 2244740
    },
    {
      "epoch": 3.673598973573444,
      "grad_norm": 0.631790280342102,
      "learning_rate": 2.6702819527816397e-06,
      "loss": 0.0123,
      "step": 2244760
    },
    {
      "epoch": 3.673631704012097,
      "grad_norm": 0.3129254877567291,
      "learning_rate": 2.6702160605681225e-06,
      "loss": 0.008,
      "step": 2244780
    },
    {
      "epoch": 3.6736644344507505,
      "grad_norm": 0.13825227320194244,
      "learning_rate": 2.6701501683546056e-06,
      "loss": 0.0058,
      "step": 2244800
    },
    {
      "epoch": 3.673697164889404,
      "grad_norm": 0.2500205636024475,
      "learning_rate": 2.6700842761410888e-06,
      "loss": 0.0091,
      "step": 2244820
    },
    {
      "epoch": 3.6737298953280573,
      "grad_norm": 0.08700871467590332,
      "learning_rate": 2.6700183839275715e-06,
      "loss": 0.0085,
      "step": 2244840
    },
    {
      "epoch": 3.6737626257667104,
      "grad_norm": 0.23482108116149902,
      "learning_rate": 2.6699524917140547e-06,
      "loss": 0.0094,
      "step": 2244860
    },
    {
      "epoch": 3.673795356205364,
      "grad_norm": 0.17109966278076172,
      "learning_rate": 2.6698865995005374e-06,
      "loss": 0.0113,
      "step": 2244880
    },
    {
      "epoch": 3.673828086644017,
      "grad_norm": 0.18895436823368073,
      "learning_rate": 2.66982070728702e-06,
      "loss": 0.0127,
      "step": 2244900
    },
    {
      "epoch": 3.6738608170826703,
      "grad_norm": 0.38631322979927063,
      "learning_rate": 2.669754815073503e-06,
      "loss": 0.0134,
      "step": 2244920
    },
    {
      "epoch": 3.673893547521324,
      "grad_norm": 0.16822069883346558,
      "learning_rate": 2.669688922859986e-06,
      "loss": 0.0086,
      "step": 2244940
    },
    {
      "epoch": 3.673926277959977,
      "grad_norm": 0.11496670544147491,
      "learning_rate": 2.669623030646469e-06,
      "loss": 0.0102,
      "step": 2244960
    },
    {
      "epoch": 3.6739590083986307,
      "grad_norm": 0.41107743978500366,
      "learning_rate": 2.6695571384329516e-06,
      "loss": 0.0124,
      "step": 2244980
    },
    {
      "epoch": 3.673991738837284,
      "grad_norm": 0.23449118435382843,
      "learning_rate": 2.6694912462194343e-06,
      "loss": 0.0113,
      "step": 2245000
    },
    {
      "epoch": 3.6740244692759374,
      "grad_norm": 0.4792085289955139,
      "learning_rate": 2.6694253540059175e-06,
      "loss": 0.0101,
      "step": 2245020
    },
    {
      "epoch": 3.6740571997145905,
      "grad_norm": 0.607490599155426,
      "learning_rate": 2.6693594617924e-06,
      "loss": 0.0095,
      "step": 2245040
    },
    {
      "epoch": 3.6740899301532437,
      "grad_norm": 0.2773596942424774,
      "learning_rate": 2.669293569578883e-06,
      "loss": 0.0072,
      "step": 2245060
    },
    {
      "epoch": 3.6741226605918973,
      "grad_norm": 0.26148512959480286,
      "learning_rate": 2.6692276773653657e-06,
      "loss": 0.0103,
      "step": 2245080
    },
    {
      "epoch": 3.6741553910305504,
      "grad_norm": 0.15635333955287933,
      "learning_rate": 2.669161785151849e-06,
      "loss": 0.01,
      "step": 2245100
    },
    {
      "epoch": 3.674188121469204,
      "grad_norm": 0.4581115245819092,
      "learning_rate": 2.6690958929383316e-06,
      "loss": 0.0146,
      "step": 2245120
    },
    {
      "epoch": 3.674220851907857,
      "grad_norm": 0.17666448652744293,
      "learning_rate": 2.6690300007248143e-06,
      "loss": 0.0086,
      "step": 2245140
    },
    {
      "epoch": 3.674253582346511,
      "grad_norm": 0.2887052595615387,
      "learning_rate": 2.668964108511297e-06,
      "loss": 0.0103,
      "step": 2245160
    },
    {
      "epoch": 3.674286312785164,
      "grad_norm": 0.23091113567352295,
      "learning_rate": 2.6688982162977806e-06,
      "loss": 0.0139,
      "step": 2245180
    },
    {
      "epoch": 3.674319043223817,
      "grad_norm": 0.26243171095848083,
      "learning_rate": 2.6688323240842634e-06,
      "loss": 0.0134,
      "step": 2245200
    },
    {
      "epoch": 3.6743517736624707,
      "grad_norm": 0.24820269644260406,
      "learning_rate": 2.668766431870746e-06,
      "loss": 0.0079,
      "step": 2245220
    },
    {
      "epoch": 3.674384504101124,
      "grad_norm": 0.16874803602695465,
      "learning_rate": 2.6687005396572293e-06,
      "loss": 0.0086,
      "step": 2245240
    },
    {
      "epoch": 3.6744172345397774,
      "grad_norm": 0.17627447843551636,
      "learning_rate": 2.668634647443712e-06,
      "loss": 0.0133,
      "step": 2245260
    },
    {
      "epoch": 3.6744499649784306,
      "grad_norm": 0.2901277244091034,
      "learning_rate": 2.6685687552301948e-06,
      "loss": 0.02,
      "step": 2245280
    },
    {
      "epoch": 3.674482695417084,
      "grad_norm": 0.20491857826709747,
      "learning_rate": 2.6685028630166775e-06,
      "loss": 0.0122,
      "step": 2245300
    },
    {
      "epoch": 3.6745154258557373,
      "grad_norm": 0.21867209672927856,
      "learning_rate": 2.6684369708031602e-06,
      "loss": 0.0136,
      "step": 2245320
    },
    {
      "epoch": 3.6745481562943905,
      "grad_norm": 0.6908467411994934,
      "learning_rate": 2.6683710785896434e-06,
      "loss": 0.0196,
      "step": 2245340
    },
    {
      "epoch": 3.674580886733044,
      "grad_norm": 0.4734615683555603,
      "learning_rate": 2.668305186376126e-06,
      "loss": 0.0124,
      "step": 2245360
    },
    {
      "epoch": 3.674613617171697,
      "grad_norm": 0.07722314447164536,
      "learning_rate": 2.668239294162609e-06,
      "loss": 0.0126,
      "step": 2245380
    },
    {
      "epoch": 3.674646347610351,
      "grad_norm": 0.39299073815345764,
      "learning_rate": 2.6681734019490916e-06,
      "loss": 0.0107,
      "step": 2245400
    },
    {
      "epoch": 3.674679078049004,
      "grad_norm": 0.4391861855983734,
      "learning_rate": 2.668107509735575e-06,
      "loss": 0.0079,
      "step": 2245420
    },
    {
      "epoch": 3.6747118084876575,
      "grad_norm": 0.5048412084579468,
      "learning_rate": 2.6680416175220575e-06,
      "loss": 0.01,
      "step": 2245440
    },
    {
      "epoch": 3.6747445389263107,
      "grad_norm": 0.16309762001037598,
      "learning_rate": 2.6679757253085403e-06,
      "loss": 0.0072,
      "step": 2245460
    },
    {
      "epoch": 3.674777269364964,
      "grad_norm": 0.06353722512722015,
      "learning_rate": 2.667909833095023e-06,
      "loss": 0.0127,
      "step": 2245480
    },
    {
      "epoch": 3.6748099998036174,
      "grad_norm": 0.1769406944513321,
      "learning_rate": 2.667843940881506e-06,
      "loss": 0.0103,
      "step": 2245500
    },
    {
      "epoch": 3.6748427302422706,
      "grad_norm": 0.17737910151481628,
      "learning_rate": 2.6677780486679893e-06,
      "loss": 0.0095,
      "step": 2245520
    },
    {
      "epoch": 3.674875460680924,
      "grad_norm": 0.07400034368038177,
      "learning_rate": 2.667712156454472e-06,
      "loss": 0.0099,
      "step": 2245540
    },
    {
      "epoch": 3.6749081911195773,
      "grad_norm": 1.3005481958389282,
      "learning_rate": 2.6676462642409552e-06,
      "loss": 0.0147,
      "step": 2245560
    },
    {
      "epoch": 3.674940921558231,
      "grad_norm": 0.16819682717323303,
      "learning_rate": 2.667580372027438e-06,
      "loss": 0.0101,
      "step": 2245580
    },
    {
      "epoch": 3.674973651996884,
      "grad_norm": 0.43850618600845337,
      "learning_rate": 2.6675144798139207e-06,
      "loss": 0.0142,
      "step": 2245600
    },
    {
      "epoch": 3.6750063824355372,
      "grad_norm": 0.4585055112838745,
      "learning_rate": 2.6674485876004035e-06,
      "loss": 0.01,
      "step": 2245620
    },
    {
      "epoch": 3.675039112874191,
      "grad_norm": 0.39912959933280945,
      "learning_rate": 2.6673826953868866e-06,
      "loss": 0.013,
      "step": 2245640
    },
    {
      "epoch": 3.675071843312844,
      "grad_norm": 0.29298222064971924,
      "learning_rate": 2.6673168031733694e-06,
      "loss": 0.0091,
      "step": 2245660
    },
    {
      "epoch": 3.6751045737514976,
      "grad_norm": 0.34978970885276794,
      "learning_rate": 2.667250910959852e-06,
      "loss": 0.0092,
      "step": 2245680
    },
    {
      "epoch": 3.6751373041901507,
      "grad_norm": 0.11330819129943848,
      "learning_rate": 2.667185018746335e-06,
      "loss": 0.0104,
      "step": 2245700
    },
    {
      "epoch": 3.6751700346288043,
      "grad_norm": 0.24841085076332092,
      "learning_rate": 2.667119126532818e-06,
      "loss": 0.0068,
      "step": 2245720
    },
    {
      "epoch": 3.6752027650674575,
      "grad_norm": 0.2456078678369522,
      "learning_rate": 2.6670532343193007e-06,
      "loss": 0.0087,
      "step": 2245740
    },
    {
      "epoch": 3.6752354955061106,
      "grad_norm": 0.4765080511569977,
      "learning_rate": 2.6669873421057835e-06,
      "loss": 0.0115,
      "step": 2245760
    },
    {
      "epoch": 3.675268225944764,
      "grad_norm": 0.30075836181640625,
      "learning_rate": 2.6669214498922662e-06,
      "loss": 0.0128,
      "step": 2245780
    },
    {
      "epoch": 3.6753009563834174,
      "grad_norm": 0.1300678551197052,
      "learning_rate": 2.666855557678749e-06,
      "loss": 0.0099,
      "step": 2245800
    },
    {
      "epoch": 3.675333686822071,
      "grad_norm": 0.14181196689605713,
      "learning_rate": 2.666789665465232e-06,
      "loss": 0.0104,
      "step": 2245820
    },
    {
      "epoch": 3.675366417260724,
      "grad_norm": 0.15034116804599762,
      "learning_rate": 2.666723773251715e-06,
      "loss": 0.0117,
      "step": 2245840
    },
    {
      "epoch": 3.6753991476993777,
      "grad_norm": 0.2904602885246277,
      "learning_rate": 2.6666578810381976e-06,
      "loss": 0.0137,
      "step": 2245860
    },
    {
      "epoch": 3.675431878138031,
      "grad_norm": 0.055783502757549286,
      "learning_rate": 2.666591988824681e-06,
      "loss": 0.0105,
      "step": 2245880
    },
    {
      "epoch": 3.675464608576684,
      "grad_norm": 0.17613959312438965,
      "learning_rate": 2.666526096611164e-06,
      "loss": 0.0087,
      "step": 2245900
    },
    {
      "epoch": 3.6754973390153376,
      "grad_norm": 0.38573846220970154,
      "learning_rate": 2.6664602043976467e-06,
      "loss": 0.0097,
      "step": 2245920
    },
    {
      "epoch": 3.6755300694539907,
      "grad_norm": 0.4143279194831848,
      "learning_rate": 2.6663943121841294e-06,
      "loss": 0.0173,
      "step": 2245940
    },
    {
      "epoch": 3.675562799892644,
      "grad_norm": 0.3129323720932007,
      "learning_rate": 2.6663284199706126e-06,
      "loss": 0.0126,
      "step": 2245960
    },
    {
      "epoch": 3.6755955303312975,
      "grad_norm": 0.2353704869747162,
      "learning_rate": 2.6662625277570953e-06,
      "loss": 0.0122,
      "step": 2245980
    },
    {
      "epoch": 3.675628260769951,
      "grad_norm": 0.5435222387313843,
      "learning_rate": 2.666196635543578e-06,
      "loss": 0.0114,
      "step": 2246000
    },
    {
      "epoch": 3.6756609912086042,
      "grad_norm": 0.16886202991008759,
      "learning_rate": 2.666130743330061e-06,
      "loss": 0.0101,
      "step": 2246020
    },
    {
      "epoch": 3.6756937216472574,
      "grad_norm": 0.2712043821811676,
      "learning_rate": 2.666064851116544e-06,
      "loss": 0.0089,
      "step": 2246040
    },
    {
      "epoch": 3.675726452085911,
      "grad_norm": 0.31565147638320923,
      "learning_rate": 2.6659989589030267e-06,
      "loss": 0.015,
      "step": 2246060
    },
    {
      "epoch": 3.675759182524564,
      "grad_norm": 0.3360856771469116,
      "learning_rate": 2.6659330666895094e-06,
      "loss": 0.0133,
      "step": 2246080
    },
    {
      "epoch": 3.6757919129632173,
      "grad_norm": 0.28849437832832336,
      "learning_rate": 2.665867174475992e-06,
      "loss": 0.016,
      "step": 2246100
    },
    {
      "epoch": 3.675824643401871,
      "grad_norm": 0.2888600826263428,
      "learning_rate": 2.6658012822624753e-06,
      "loss": 0.0159,
      "step": 2246120
    },
    {
      "epoch": 3.6758573738405245,
      "grad_norm": 0.9053279161453247,
      "learning_rate": 2.665735390048958e-06,
      "loss": 0.0172,
      "step": 2246140
    },
    {
      "epoch": 3.6758901042791776,
      "grad_norm": 0.17597496509552002,
      "learning_rate": 2.665669497835441e-06,
      "loss": 0.0064,
      "step": 2246160
    },
    {
      "epoch": 3.6759228347178308,
      "grad_norm": 0.38155409693717957,
      "learning_rate": 2.6656036056219236e-06,
      "loss": 0.0109,
      "step": 2246180
    },
    {
      "epoch": 3.6759555651564844,
      "grad_norm": 0.17113825678825378,
      "learning_rate": 2.6655377134084063e-06,
      "loss": 0.0123,
      "step": 2246200
    },
    {
      "epoch": 3.6759882955951375,
      "grad_norm": 0.14333032071590424,
      "learning_rate": 2.6654718211948895e-06,
      "loss": 0.0087,
      "step": 2246220
    },
    {
      "epoch": 3.6760210260337907,
      "grad_norm": 0.3018471896648407,
      "learning_rate": 2.6654059289813726e-06,
      "loss": 0.0074,
      "step": 2246240
    },
    {
      "epoch": 3.6760537564724443,
      "grad_norm": 0.26809266209602356,
      "learning_rate": 2.6653400367678554e-06,
      "loss": 0.0095,
      "step": 2246260
    },
    {
      "epoch": 3.676086486911098,
      "grad_norm": 0.2220078557729721,
      "learning_rate": 2.6652741445543385e-06,
      "loss": 0.0067,
      "step": 2246280
    },
    {
      "epoch": 3.676119217349751,
      "grad_norm": 0.5574595928192139,
      "learning_rate": 2.6652082523408213e-06,
      "loss": 0.008,
      "step": 2246300
    },
    {
      "epoch": 3.676151947788404,
      "grad_norm": 1.0919649600982666,
      "learning_rate": 2.665142360127304e-06,
      "loss": 0.0161,
      "step": 2246320
    },
    {
      "epoch": 3.6761846782270577,
      "grad_norm": 0.2922949194908142,
      "learning_rate": 2.6650764679137867e-06,
      "loss": 0.0156,
      "step": 2246340
    },
    {
      "epoch": 3.676217408665711,
      "grad_norm": 0.8866512179374695,
      "learning_rate": 2.66501057570027e-06,
      "loss": 0.0131,
      "step": 2246360
    },
    {
      "epoch": 3.676250139104364,
      "grad_norm": 0.9715309739112854,
      "learning_rate": 2.6649446834867527e-06,
      "loss": 0.0123,
      "step": 2246380
    },
    {
      "epoch": 3.6762828695430176,
      "grad_norm": 0.1472330093383789,
      "learning_rate": 2.6648787912732354e-06,
      "loss": 0.0079,
      "step": 2246400
    },
    {
      "epoch": 3.676315599981671,
      "grad_norm": 0.2528902590274811,
      "learning_rate": 2.664812899059718e-06,
      "loss": 0.0083,
      "step": 2246420
    },
    {
      "epoch": 3.6763483304203244,
      "grad_norm": 0.25710225105285645,
      "learning_rate": 2.6647470068462013e-06,
      "loss": 0.0115,
      "step": 2246440
    },
    {
      "epoch": 3.6763810608589775,
      "grad_norm": 0.11940973997116089,
      "learning_rate": 2.664681114632684e-06,
      "loss": 0.012,
      "step": 2246460
    },
    {
      "epoch": 3.676413791297631,
      "grad_norm": 0.46928760409355164,
      "learning_rate": 2.6646152224191668e-06,
      "loss": 0.0111,
      "step": 2246480
    },
    {
      "epoch": 3.6764465217362843,
      "grad_norm": 0.20749010145664215,
      "learning_rate": 2.6645493302056495e-06,
      "loss": 0.0114,
      "step": 2246500
    },
    {
      "epoch": 3.6764792521749374,
      "grad_norm": 0.16926564276218414,
      "learning_rate": 2.6644834379921327e-06,
      "loss": 0.0122,
      "step": 2246520
    },
    {
      "epoch": 3.676511982613591,
      "grad_norm": 0.08948109298944473,
      "learning_rate": 2.6644175457786154e-06,
      "loss": 0.0147,
      "step": 2246540
    },
    {
      "epoch": 3.676544713052244,
      "grad_norm": 0.5513737201690674,
      "learning_rate": 2.664351653565098e-06,
      "loss": 0.0091,
      "step": 2246560
    },
    {
      "epoch": 3.6765774434908978,
      "grad_norm": 0.22576677799224854,
      "learning_rate": 2.6642857613515817e-06,
      "loss": 0.0084,
      "step": 2246580
    },
    {
      "epoch": 3.676610173929551,
      "grad_norm": 0.12528517842292786,
      "learning_rate": 2.6642198691380645e-06,
      "loss": 0.0106,
      "step": 2246600
    },
    {
      "epoch": 3.6766429043682045,
      "grad_norm": 0.1255708932876587,
      "learning_rate": 2.6641539769245472e-06,
      "loss": 0.0074,
      "step": 2246620
    },
    {
      "epoch": 3.6766756348068577,
      "grad_norm": 0.15845027565956116,
      "learning_rate": 2.66408808471103e-06,
      "loss": 0.0093,
      "step": 2246640
    },
    {
      "epoch": 3.676708365245511,
      "grad_norm": 0.07180408388376236,
      "learning_rate": 2.664022192497513e-06,
      "loss": 0.0128,
      "step": 2246660
    },
    {
      "epoch": 3.6767410956841644,
      "grad_norm": 0.8542495369911194,
      "learning_rate": 2.663956300283996e-06,
      "loss": 0.0136,
      "step": 2246680
    },
    {
      "epoch": 3.6767738261228176,
      "grad_norm": 0.35569751262664795,
      "learning_rate": 2.6638904080704786e-06,
      "loss": 0.0164,
      "step": 2246700
    },
    {
      "epoch": 3.676806556561471,
      "grad_norm": 0.20303530991077423,
      "learning_rate": 2.6638245158569613e-06,
      "loss": 0.0102,
      "step": 2246720
    },
    {
      "epoch": 3.6768392870001243,
      "grad_norm": 0.08838016539812088,
      "learning_rate": 2.663758623643444e-06,
      "loss": 0.0129,
      "step": 2246740
    },
    {
      "epoch": 3.676872017438778,
      "grad_norm": 0.4150594174861908,
      "learning_rate": 2.6636927314299272e-06,
      "loss": 0.0073,
      "step": 2246760
    },
    {
      "epoch": 3.676904747877431,
      "grad_norm": 0.054759152233600616,
      "learning_rate": 2.66362683921641e-06,
      "loss": 0.0094,
      "step": 2246780
    },
    {
      "epoch": 3.676937478316084,
      "grad_norm": 0.5185983180999756,
      "learning_rate": 2.6635609470028927e-06,
      "loss": 0.0092,
      "step": 2246800
    },
    {
      "epoch": 3.676970208754738,
      "grad_norm": 0.3735249638557434,
      "learning_rate": 2.6634950547893755e-06,
      "loss": 0.0112,
      "step": 2246820
    },
    {
      "epoch": 3.677002939193391,
      "grad_norm": 0.1475152224302292,
      "learning_rate": 2.6634291625758586e-06,
      "loss": 0.0131,
      "step": 2246840
    },
    {
      "epoch": 3.6770356696320445,
      "grad_norm": 0.2640839219093323,
      "learning_rate": 2.6633632703623414e-06,
      "loss": 0.0134,
      "step": 2246860
    },
    {
      "epoch": 3.6770684000706977,
      "grad_norm": 1.8205451965332031,
      "learning_rate": 2.663297378148824e-06,
      "loss": 0.009,
      "step": 2246880
    },
    {
      "epoch": 3.6771011305093513,
      "grad_norm": 0.1683507114648819,
      "learning_rate": 2.663231485935307e-06,
      "loss": 0.019,
      "step": 2246900
    },
    {
      "epoch": 3.6771338609480044,
      "grad_norm": 1.9211355447769165,
      "learning_rate": 2.66316559372179e-06,
      "loss": 0.0108,
      "step": 2246920
    },
    {
      "epoch": 3.6771665913866576,
      "grad_norm": 0.22055427730083466,
      "learning_rate": 2.663099701508273e-06,
      "loss": 0.0092,
      "step": 2246940
    },
    {
      "epoch": 3.677199321825311,
      "grad_norm": 0.1266232579946518,
      "learning_rate": 2.663033809294756e-06,
      "loss": 0.007,
      "step": 2246960
    },
    {
      "epoch": 3.6772320522639643,
      "grad_norm": 0.10547251999378204,
      "learning_rate": 2.662967917081239e-06,
      "loss": 0.0135,
      "step": 2246980
    },
    {
      "epoch": 3.677264782702618,
      "grad_norm": 0.19745907187461853,
      "learning_rate": 2.662902024867722e-06,
      "loss": 0.0143,
      "step": 2247000
    },
    {
      "epoch": 3.677297513141271,
      "grad_norm": 0.2925061285495758,
      "learning_rate": 2.6628361326542046e-06,
      "loss": 0.0118,
      "step": 2247020
    },
    {
      "epoch": 3.6773302435799247,
      "grad_norm": 0.18046116828918457,
      "learning_rate": 2.6627702404406873e-06,
      "loss": 0.0143,
      "step": 2247040
    },
    {
      "epoch": 3.677362974018578,
      "grad_norm": 0.30795732140541077,
      "learning_rate": 2.6627043482271705e-06,
      "loss": 0.0086,
      "step": 2247060
    },
    {
      "epoch": 3.677395704457231,
      "grad_norm": 0.39819416403770447,
      "learning_rate": 2.662638456013653e-06,
      "loss": 0.017,
      "step": 2247080
    },
    {
      "epoch": 3.6774284348958846,
      "grad_norm": 0.13700361549854279,
      "learning_rate": 2.662572563800136e-06,
      "loss": 0.0123,
      "step": 2247100
    },
    {
      "epoch": 3.6774611653345377,
      "grad_norm": 0.1583099365234375,
      "learning_rate": 2.6625066715866187e-06,
      "loss": 0.0156,
      "step": 2247120
    },
    {
      "epoch": 3.6774938957731913,
      "grad_norm": 0.5331025123596191,
      "learning_rate": 2.662440779373102e-06,
      "loss": 0.0136,
      "step": 2247140
    },
    {
      "epoch": 3.6775266262118445,
      "grad_norm": 0.30698052048683167,
      "learning_rate": 2.6623748871595846e-06,
      "loss": 0.0133,
      "step": 2247160
    },
    {
      "epoch": 3.677559356650498,
      "grad_norm": 0.2203189581632614,
      "learning_rate": 2.6623089949460673e-06,
      "loss": 0.0117,
      "step": 2247180
    },
    {
      "epoch": 3.677592087089151,
      "grad_norm": 0.07867302000522614,
      "learning_rate": 2.66224310273255e-06,
      "loss": 0.015,
      "step": 2247200
    },
    {
      "epoch": 3.6776248175278043,
      "grad_norm": 0.3423975110054016,
      "learning_rate": 2.662177210519033e-06,
      "loss": 0.0189,
      "step": 2247220
    },
    {
      "epoch": 3.677657547966458,
      "grad_norm": 0.4076039493083954,
      "learning_rate": 2.662111318305516e-06,
      "loss": 0.0103,
      "step": 2247240
    },
    {
      "epoch": 3.677690278405111,
      "grad_norm": 0.1830984354019165,
      "learning_rate": 2.6620454260919987e-06,
      "loss": 0.011,
      "step": 2247260
    },
    {
      "epoch": 3.6777230088437647,
      "grad_norm": 0.1762811541557312,
      "learning_rate": 2.661979533878482e-06,
      "loss": 0.0127,
      "step": 2247280
    },
    {
      "epoch": 3.677755739282418,
      "grad_norm": 0.04909062013030052,
      "learning_rate": 2.661913641664965e-06,
      "loss": 0.0067,
      "step": 2247300
    },
    {
      "epoch": 3.6777884697210714,
      "grad_norm": 0.9768010973930359,
      "learning_rate": 2.6618477494514478e-06,
      "loss": 0.0126,
      "step": 2247320
    },
    {
      "epoch": 3.6778212001597246,
      "grad_norm": 0.14292608201503754,
      "learning_rate": 2.6617818572379305e-06,
      "loss": 0.0088,
      "step": 2247340
    },
    {
      "epoch": 3.6778539305983777,
      "grad_norm": 0.12191633135080338,
      "learning_rate": 2.6617159650244133e-06,
      "loss": 0.0138,
      "step": 2247360
    },
    {
      "epoch": 3.6778866610370313,
      "grad_norm": 0.420686274766922,
      "learning_rate": 2.6616500728108964e-06,
      "loss": 0.011,
      "step": 2247380
    },
    {
      "epoch": 3.6779193914756845,
      "grad_norm": 0.37541821599006653,
      "learning_rate": 2.661584180597379e-06,
      "loss": 0.0103,
      "step": 2247400
    },
    {
      "epoch": 3.6779521219143376,
      "grad_norm": 0.4652140140533447,
      "learning_rate": 2.661518288383862e-06,
      "loss": 0.0116,
      "step": 2247420
    },
    {
      "epoch": 3.677984852352991,
      "grad_norm": 0.3207916021347046,
      "learning_rate": 2.6614523961703446e-06,
      "loss": 0.0113,
      "step": 2247440
    },
    {
      "epoch": 3.678017582791645,
      "grad_norm": 0.24611948430538177,
      "learning_rate": 2.661386503956828e-06,
      "loss": 0.0108,
      "step": 2247460
    },
    {
      "epoch": 3.678050313230298,
      "grad_norm": 0.15708652138710022,
      "learning_rate": 2.6613206117433105e-06,
      "loss": 0.0136,
      "step": 2247480
    },
    {
      "epoch": 3.678083043668951,
      "grad_norm": 0.23511479794979095,
      "learning_rate": 2.6612547195297933e-06,
      "loss": 0.0173,
      "step": 2247500
    },
    {
      "epoch": 3.6781157741076047,
      "grad_norm": 0.18297427892684937,
      "learning_rate": 2.661188827316276e-06,
      "loss": 0.0102,
      "step": 2247520
    },
    {
      "epoch": 3.678148504546258,
      "grad_norm": 0.13153956830501556,
      "learning_rate": 2.661122935102759e-06,
      "loss": 0.0106,
      "step": 2247540
    },
    {
      "epoch": 3.678181234984911,
      "grad_norm": 0.522560715675354,
      "learning_rate": 2.661057042889242e-06,
      "loss": 0.0087,
      "step": 2247560
    },
    {
      "epoch": 3.6782139654235646,
      "grad_norm": 0.12421182543039322,
      "learning_rate": 2.6609911506757247e-06,
      "loss": 0.013,
      "step": 2247580
    },
    {
      "epoch": 3.678246695862218,
      "grad_norm": 0.9048059582710266,
      "learning_rate": 2.6609252584622074e-06,
      "loss": 0.011,
      "step": 2247600
    },
    {
      "epoch": 3.6782794263008713,
      "grad_norm": 0.26164716482162476,
      "learning_rate": 2.66085936624869e-06,
      "loss": 0.0186,
      "step": 2247620
    },
    {
      "epoch": 3.6783121567395245,
      "grad_norm": 0.22930316627025604,
      "learning_rate": 2.6607934740351737e-06,
      "loss": 0.0135,
      "step": 2247640
    },
    {
      "epoch": 3.678344887178178,
      "grad_norm": 0.5222398042678833,
      "learning_rate": 2.6607275818216565e-06,
      "loss": 0.0161,
      "step": 2247660
    },
    {
      "epoch": 3.6783776176168312,
      "grad_norm": 0.41139543056488037,
      "learning_rate": 2.6606616896081396e-06,
      "loss": 0.0079,
      "step": 2247680
    },
    {
      "epoch": 3.6784103480554844,
      "grad_norm": 0.023944873362779617,
      "learning_rate": 2.6605957973946224e-06,
      "loss": 0.0111,
      "step": 2247700
    },
    {
      "epoch": 3.678443078494138,
      "grad_norm": 0.24093031883239746,
      "learning_rate": 2.660529905181105e-06,
      "loss": 0.0078,
      "step": 2247720
    },
    {
      "epoch": 3.6784758089327916,
      "grad_norm": 0.15285588800907135,
      "learning_rate": 2.660464012967588e-06,
      "loss": 0.0059,
      "step": 2247740
    },
    {
      "epoch": 3.6785085393714447,
      "grad_norm": 0.3220638036727905,
      "learning_rate": 2.6603981207540706e-06,
      "loss": 0.0128,
      "step": 2247760
    },
    {
      "epoch": 3.678541269810098,
      "grad_norm": 0.17082209885120392,
      "learning_rate": 2.6603322285405538e-06,
      "loss": 0.0086,
      "step": 2247780
    },
    {
      "epoch": 3.6785740002487515,
      "grad_norm": 0.4313279986381531,
      "learning_rate": 2.6602663363270365e-06,
      "loss": 0.0066,
      "step": 2247800
    },
    {
      "epoch": 3.6786067306874046,
      "grad_norm": 0.3141493499279022,
      "learning_rate": 2.6602004441135192e-06,
      "loss": 0.0126,
      "step": 2247820
    },
    {
      "epoch": 3.6786394611260578,
      "grad_norm": 0.3119353950023651,
      "learning_rate": 2.660134551900002e-06,
      "loss": 0.0085,
      "step": 2247840
    },
    {
      "epoch": 3.6786721915647114,
      "grad_norm": 0.6228880286216736,
      "learning_rate": 2.660068659686485e-06,
      "loss": 0.0116,
      "step": 2247860
    },
    {
      "epoch": 3.678704922003365,
      "grad_norm": 0.14370538294315338,
      "learning_rate": 2.660002767472968e-06,
      "loss": 0.0102,
      "step": 2247880
    },
    {
      "epoch": 3.678737652442018,
      "grad_norm": 0.34907880425453186,
      "learning_rate": 2.6599368752594506e-06,
      "loss": 0.0103,
      "step": 2247900
    },
    {
      "epoch": 3.6787703828806713,
      "grad_norm": 0.2855131924152374,
      "learning_rate": 2.6598709830459334e-06,
      "loss": 0.0088,
      "step": 2247920
    },
    {
      "epoch": 3.678803113319325,
      "grad_norm": 0.198417529463768,
      "learning_rate": 2.6598050908324165e-06,
      "loss": 0.0111,
      "step": 2247940
    },
    {
      "epoch": 3.678835843757978,
      "grad_norm": 0.15418313443660736,
      "learning_rate": 2.6597391986188993e-06,
      "loss": 0.0095,
      "step": 2247960
    },
    {
      "epoch": 3.678868574196631,
      "grad_norm": 0.7092726230621338,
      "learning_rate": 2.659673306405382e-06,
      "loss": 0.0111,
      "step": 2247980
    },
    {
      "epoch": 3.6789013046352848,
      "grad_norm": 0.21784283220767975,
      "learning_rate": 2.6596074141918656e-06,
      "loss": 0.0096,
      "step": 2248000
    },
    {
      "epoch": 3.678934035073938,
      "grad_norm": 0.26427292823791504,
      "learning_rate": 2.6595415219783483e-06,
      "loss": 0.0108,
      "step": 2248020
    },
    {
      "epoch": 3.6789667655125915,
      "grad_norm": 0.6052875518798828,
      "learning_rate": 2.659475629764831e-06,
      "loss": 0.0123,
      "step": 2248040
    },
    {
      "epoch": 3.6789994959512446,
      "grad_norm": 0.09910647571086884,
      "learning_rate": 2.659409737551314e-06,
      "loss": 0.0069,
      "step": 2248060
    },
    {
      "epoch": 3.6790322263898982,
      "grad_norm": 0.23596464097499847,
      "learning_rate": 2.659343845337797e-06,
      "loss": 0.0124,
      "step": 2248080
    },
    {
      "epoch": 3.6790649568285514,
      "grad_norm": 0.18790940940380096,
      "learning_rate": 2.6592779531242797e-06,
      "loss": 0.0085,
      "step": 2248100
    },
    {
      "epoch": 3.6790976872672045,
      "grad_norm": 0.35623133182525635,
      "learning_rate": 2.6592120609107624e-06,
      "loss": 0.0094,
      "step": 2248120
    },
    {
      "epoch": 3.679130417705858,
      "grad_norm": 0.35344821214675903,
      "learning_rate": 2.659146168697245e-06,
      "loss": 0.0145,
      "step": 2248140
    },
    {
      "epoch": 3.6791631481445113,
      "grad_norm": 0.13608665764331818,
      "learning_rate": 2.659080276483728e-06,
      "loss": 0.0078,
      "step": 2248160
    },
    {
      "epoch": 3.679195878583165,
      "grad_norm": 0.1912011057138443,
      "learning_rate": 2.659014384270211e-06,
      "loss": 0.0145,
      "step": 2248180
    },
    {
      "epoch": 3.679228609021818,
      "grad_norm": 0.15072113275527954,
      "learning_rate": 2.658948492056694e-06,
      "loss": 0.0115,
      "step": 2248200
    },
    {
      "epoch": 3.6792613394604716,
      "grad_norm": 0.46426212787628174,
      "learning_rate": 2.6588825998431766e-06,
      "loss": 0.0142,
      "step": 2248220
    },
    {
      "epoch": 3.6792940698991248,
      "grad_norm": 0.08925911039113998,
      "learning_rate": 2.6588167076296593e-06,
      "loss": 0.0095,
      "step": 2248240
    },
    {
      "epoch": 3.679326800337778,
      "grad_norm": 0.1578105241060257,
      "learning_rate": 2.6587508154161425e-06,
      "loss": 0.0173,
      "step": 2248260
    },
    {
      "epoch": 3.6793595307764315,
      "grad_norm": 0.30441659688949585,
      "learning_rate": 2.658684923202625e-06,
      "loss": 0.013,
      "step": 2248280
    },
    {
      "epoch": 3.6793922612150847,
      "grad_norm": 0.21724744141101837,
      "learning_rate": 2.658619030989108e-06,
      "loss": 0.0065,
      "step": 2248300
    },
    {
      "epoch": 3.6794249916537383,
      "grad_norm": 0.3264628052711487,
      "learning_rate": 2.6585531387755907e-06,
      "loss": 0.0122,
      "step": 2248320
    },
    {
      "epoch": 3.6794577220923914,
      "grad_norm": 0.30762621760368347,
      "learning_rate": 2.6584872465620743e-06,
      "loss": 0.0109,
      "step": 2248340
    },
    {
      "epoch": 3.679490452531045,
      "grad_norm": 0.1482834666967392,
      "learning_rate": 2.658421354348557e-06,
      "loss": 0.0109,
      "step": 2248360
    },
    {
      "epoch": 3.679523182969698,
      "grad_norm": 0.18039953708648682,
      "learning_rate": 2.6583554621350398e-06,
      "loss": 0.0137,
      "step": 2248380
    },
    {
      "epoch": 3.6795559134083513,
      "grad_norm": 0.19063372910022736,
      "learning_rate": 2.658289569921523e-06,
      "loss": 0.0162,
      "step": 2248400
    },
    {
      "epoch": 3.679588643847005,
      "grad_norm": 0.3524577021598816,
      "learning_rate": 2.6582236777080057e-06,
      "loss": 0.0102,
      "step": 2248420
    },
    {
      "epoch": 3.679621374285658,
      "grad_norm": 0.1800701767206192,
      "learning_rate": 2.6581577854944884e-06,
      "loss": 0.0107,
      "step": 2248440
    },
    {
      "epoch": 3.6796541047243116,
      "grad_norm": 0.320730060338974,
      "learning_rate": 2.658091893280971e-06,
      "loss": 0.0069,
      "step": 2248460
    },
    {
      "epoch": 3.679686835162965,
      "grad_norm": 0.41353705525398254,
      "learning_rate": 2.6580260010674543e-06,
      "loss": 0.0088,
      "step": 2248480
    },
    {
      "epoch": 3.6797195656016184,
      "grad_norm": 0.21343742311000824,
      "learning_rate": 2.657960108853937e-06,
      "loss": 0.0079,
      "step": 2248500
    },
    {
      "epoch": 3.6797522960402715,
      "grad_norm": 0.2959873080253601,
      "learning_rate": 2.6578942166404198e-06,
      "loss": 0.0086,
      "step": 2248520
    },
    {
      "epoch": 3.6797850264789247,
      "grad_norm": 0.20341600477695465,
      "learning_rate": 2.6578283244269025e-06,
      "loss": 0.0111,
      "step": 2248540
    },
    {
      "epoch": 3.6798177569175783,
      "grad_norm": 0.27780550718307495,
      "learning_rate": 2.6577624322133857e-06,
      "loss": 0.0131,
      "step": 2248560
    },
    {
      "epoch": 3.6798504873562314,
      "grad_norm": 0.12424928694963455,
      "learning_rate": 2.6576965399998684e-06,
      "loss": 0.0111,
      "step": 2248580
    },
    {
      "epoch": 3.679883217794885,
      "grad_norm": 0.30466434359550476,
      "learning_rate": 2.657630647786351e-06,
      "loss": 0.0135,
      "step": 2248600
    },
    {
      "epoch": 3.679915948233538,
      "grad_norm": 0.12099532783031464,
      "learning_rate": 2.657564755572834e-06,
      "loss": 0.0119,
      "step": 2248620
    },
    {
      "epoch": 3.6799486786721918,
      "grad_norm": 0.2868994176387787,
      "learning_rate": 2.6574988633593166e-06,
      "loss": 0.0139,
      "step": 2248640
    },
    {
      "epoch": 3.679981409110845,
      "grad_norm": 0.42388373613357544,
      "learning_rate": 2.6574329711458e-06,
      "loss": 0.011,
      "step": 2248660
    },
    {
      "epoch": 3.680014139549498,
      "grad_norm": 0.20377789437770844,
      "learning_rate": 2.6573670789322825e-06,
      "loss": 0.0124,
      "step": 2248680
    },
    {
      "epoch": 3.6800468699881517,
      "grad_norm": 0.24510255455970764,
      "learning_rate": 2.6573011867187657e-06,
      "loss": 0.0099,
      "step": 2248700
    },
    {
      "epoch": 3.680079600426805,
      "grad_norm": 0.5940520167350769,
      "learning_rate": 2.657235294505249e-06,
      "loss": 0.0146,
      "step": 2248720
    },
    {
      "epoch": 3.6801123308654584,
      "grad_norm": 0.2156658172607422,
      "learning_rate": 2.6571694022917316e-06,
      "loss": 0.0111,
      "step": 2248740
    },
    {
      "epoch": 3.6801450613041116,
      "grad_norm": 0.2758248448371887,
      "learning_rate": 2.6571035100782144e-06,
      "loss": 0.0117,
      "step": 2248760
    },
    {
      "epoch": 3.680177791742765,
      "grad_norm": 0.24267837405204773,
      "learning_rate": 2.657037617864697e-06,
      "loss": 0.0059,
      "step": 2248780
    },
    {
      "epoch": 3.6802105221814183,
      "grad_norm": 0.11810632795095444,
      "learning_rate": 2.6569717256511803e-06,
      "loss": 0.0104,
      "step": 2248800
    },
    {
      "epoch": 3.6802432526200715,
      "grad_norm": 0.21651554107666016,
      "learning_rate": 2.656905833437663e-06,
      "loss": 0.0138,
      "step": 2248820
    },
    {
      "epoch": 3.680275983058725,
      "grad_norm": 0.22229748964309692,
      "learning_rate": 2.6568399412241457e-06,
      "loss": 0.0108,
      "step": 2248840
    },
    {
      "epoch": 3.680308713497378,
      "grad_norm": 0.338165283203125,
      "learning_rate": 2.6567740490106285e-06,
      "loss": 0.0106,
      "step": 2248860
    },
    {
      "epoch": 3.6803414439360314,
      "grad_norm": 0.07023785263299942,
      "learning_rate": 2.6567081567971116e-06,
      "loss": 0.0119,
      "step": 2248880
    },
    {
      "epoch": 3.680374174374685,
      "grad_norm": 0.6350435614585876,
      "learning_rate": 2.6566422645835944e-06,
      "loss": 0.0114,
      "step": 2248900
    },
    {
      "epoch": 3.6804069048133385,
      "grad_norm": 0.36672914028167725,
      "learning_rate": 2.656576372370077e-06,
      "loss": 0.0098,
      "step": 2248920
    },
    {
      "epoch": 3.6804396352519917,
      "grad_norm": 0.4810720682144165,
      "learning_rate": 2.65651048015656e-06,
      "loss": 0.0112,
      "step": 2248940
    },
    {
      "epoch": 3.680472365690645,
      "grad_norm": 0.3334149122238159,
      "learning_rate": 2.656444587943043e-06,
      "loss": 0.0109,
      "step": 2248960
    },
    {
      "epoch": 3.6805050961292984,
      "grad_norm": 0.8353931307792664,
      "learning_rate": 2.6563786957295258e-06,
      "loss": 0.016,
      "step": 2248980
    },
    {
      "epoch": 3.6805378265679516,
      "grad_norm": 0.30140337347984314,
      "learning_rate": 2.6563128035160085e-06,
      "loss": 0.0119,
      "step": 2249000
    },
    {
      "epoch": 3.6805705570066047,
      "grad_norm": 0.17740419507026672,
      "learning_rate": 2.6562469113024912e-06,
      "loss": 0.0085,
      "step": 2249020
    },
    {
      "epoch": 3.6806032874452583,
      "grad_norm": 0.6440789699554443,
      "learning_rate": 2.656181019088975e-06,
      "loss": 0.0143,
      "step": 2249040
    },
    {
      "epoch": 3.680636017883912,
      "grad_norm": 0.4961344003677368,
      "learning_rate": 2.6561151268754576e-06,
      "loss": 0.0156,
      "step": 2249060
    },
    {
      "epoch": 3.680668748322565,
      "grad_norm": 0.35903599858283997,
      "learning_rate": 2.6560492346619403e-06,
      "loss": 0.009,
      "step": 2249080
    },
    {
      "epoch": 3.6807014787612182,
      "grad_norm": 0.09447281807661057,
      "learning_rate": 2.6559833424484235e-06,
      "loss": 0.0102,
      "step": 2249100
    },
    {
      "epoch": 3.680734209199872,
      "grad_norm": 0.2096327394247055,
      "learning_rate": 2.655917450234906e-06,
      "loss": 0.0148,
      "step": 2249120
    },
    {
      "epoch": 3.680766939638525,
      "grad_norm": 0.18632803857326508,
      "learning_rate": 2.655851558021389e-06,
      "loss": 0.0083,
      "step": 2249140
    },
    {
      "epoch": 3.680799670077178,
      "grad_norm": 1.3051856756210327,
      "learning_rate": 2.6557856658078717e-06,
      "loss": 0.0113,
      "step": 2249160
    },
    {
      "epoch": 3.6808324005158317,
      "grad_norm": 0.48407068848609924,
      "learning_rate": 2.6557197735943544e-06,
      "loss": 0.0113,
      "step": 2249180
    },
    {
      "epoch": 3.6808651309544853,
      "grad_norm": 0.2775423228740692,
      "learning_rate": 2.6556538813808376e-06,
      "loss": 0.0111,
      "step": 2249200
    },
    {
      "epoch": 3.6808978613931385,
      "grad_norm": 0.20951931178569794,
      "learning_rate": 2.6555879891673203e-06,
      "loss": 0.0112,
      "step": 2249220
    },
    {
      "epoch": 3.6809305918317916,
      "grad_norm": 0.2879183292388916,
      "learning_rate": 2.655522096953803e-06,
      "loss": 0.0082,
      "step": 2249240
    },
    {
      "epoch": 3.680963322270445,
      "grad_norm": 0.3290310502052307,
      "learning_rate": 2.655456204740286e-06,
      "loss": 0.0098,
      "step": 2249260
    },
    {
      "epoch": 3.6809960527090984,
      "grad_norm": 0.6395948529243469,
      "learning_rate": 2.655390312526769e-06,
      "loss": 0.0167,
      "step": 2249280
    },
    {
      "epoch": 3.6810287831477515,
      "grad_norm": 0.10486984252929688,
      "learning_rate": 2.6553244203132517e-06,
      "loss": 0.0081,
      "step": 2249300
    },
    {
      "epoch": 3.681061513586405,
      "grad_norm": 0.7245898246765137,
      "learning_rate": 2.6552585280997345e-06,
      "loss": 0.0117,
      "step": 2249320
    },
    {
      "epoch": 3.6810942440250587,
      "grad_norm": 0.2008269727230072,
      "learning_rate": 2.655192635886217e-06,
      "loss": 0.0094,
      "step": 2249340
    },
    {
      "epoch": 3.681126974463712,
      "grad_norm": 0.10741250962018967,
      "learning_rate": 2.6551267436727004e-06,
      "loss": 0.0126,
      "step": 2249360
    },
    {
      "epoch": 3.681159704902365,
      "grad_norm": 0.15124838054180145,
      "learning_rate": 2.655060851459183e-06,
      "loss": 0.0152,
      "step": 2249380
    },
    {
      "epoch": 3.6811924353410186,
      "grad_norm": 0.10590258240699768,
      "learning_rate": 2.6549949592456663e-06,
      "loss": 0.0106,
      "step": 2249400
    },
    {
      "epoch": 3.6812251657796717,
      "grad_norm": 0.8125067949295044,
      "learning_rate": 2.6549290670321494e-06,
      "loss": 0.0143,
      "step": 2249420
    },
    {
      "epoch": 3.681257896218325,
      "grad_norm": 0.5365321040153503,
      "learning_rate": 2.654863174818632e-06,
      "loss": 0.012,
      "step": 2249440
    },
    {
      "epoch": 3.6812906266569785,
      "grad_norm": 0.4276626408100128,
      "learning_rate": 2.654797282605115e-06,
      "loss": 0.0115,
      "step": 2249460
    },
    {
      "epoch": 3.6813233570956316,
      "grad_norm": 0.13107910752296448,
      "learning_rate": 2.6547313903915976e-06,
      "loss": 0.0094,
      "step": 2249480
    },
    {
      "epoch": 3.6813560875342852,
      "grad_norm": 0.09508015960454941,
      "learning_rate": 2.654665498178081e-06,
      "loss": 0.0108,
      "step": 2249500
    },
    {
      "epoch": 3.6813888179729384,
      "grad_norm": 0.1240205317735672,
      "learning_rate": 2.6545996059645635e-06,
      "loss": 0.0128,
      "step": 2249520
    },
    {
      "epoch": 3.681421548411592,
      "grad_norm": 0.5076256990432739,
      "learning_rate": 2.6545337137510463e-06,
      "loss": 0.0103,
      "step": 2249540
    },
    {
      "epoch": 3.681454278850245,
      "grad_norm": 0.21509094536304474,
      "learning_rate": 2.654467821537529e-06,
      "loss": 0.0116,
      "step": 2249560
    },
    {
      "epoch": 3.6814870092888983,
      "grad_norm": 0.13496193289756775,
      "learning_rate": 2.6544019293240118e-06,
      "loss": 0.0117,
      "step": 2249580
    },
    {
      "epoch": 3.681519739727552,
      "grad_norm": 0.11260417103767395,
      "learning_rate": 2.654336037110495e-06,
      "loss": 0.0066,
      "step": 2249600
    },
    {
      "epoch": 3.681552470166205,
      "grad_norm": 0.1916663944721222,
      "learning_rate": 2.6542701448969777e-06,
      "loss": 0.0133,
      "step": 2249620
    },
    {
      "epoch": 3.6815852006048586,
      "grad_norm": 0.40139609575271606,
      "learning_rate": 2.6542042526834604e-06,
      "loss": 0.0115,
      "step": 2249640
    },
    {
      "epoch": 3.6816179310435118,
      "grad_norm": 0.1549043357372284,
      "learning_rate": 2.654138360469943e-06,
      "loss": 0.0118,
      "step": 2249660
    },
    {
      "epoch": 3.6816506614821654,
      "grad_norm": 0.20377613604068756,
      "learning_rate": 2.6540724682564263e-06,
      "loss": 0.0095,
      "step": 2249680
    },
    {
      "epoch": 3.6816833919208185,
      "grad_norm": 0.17572158575057983,
      "learning_rate": 2.654006576042909e-06,
      "loss": 0.011,
      "step": 2249700
    },
    {
      "epoch": 3.6817161223594717,
      "grad_norm": 0.5937030911445618,
      "learning_rate": 2.6539406838293918e-06,
      "loss": 0.0162,
      "step": 2249720
    },
    {
      "epoch": 3.6817488527981252,
      "grad_norm": 0.07923177629709244,
      "learning_rate": 2.6538747916158754e-06,
      "loss": 0.0086,
      "step": 2249740
    },
    {
      "epoch": 3.6817815832367784,
      "grad_norm": 0.18050076067447662,
      "learning_rate": 2.653808899402358e-06,
      "loss": 0.0132,
      "step": 2249760
    },
    {
      "epoch": 3.681814313675432,
      "grad_norm": 0.24712400138378143,
      "learning_rate": 2.653743007188841e-06,
      "loss": 0.0118,
      "step": 2249780
    },
    {
      "epoch": 3.681847044114085,
      "grad_norm": 0.20229105651378632,
      "learning_rate": 2.6536771149753236e-06,
      "loss": 0.0073,
      "step": 2249800
    },
    {
      "epoch": 3.6818797745527387,
      "grad_norm": 0.29644185304641724,
      "learning_rate": 2.6536112227618068e-06,
      "loss": 0.0117,
      "step": 2249820
    },
    {
      "epoch": 3.681912504991392,
      "grad_norm": 0.200151726603508,
      "learning_rate": 2.6535453305482895e-06,
      "loss": 0.0132,
      "step": 2249840
    },
    {
      "epoch": 3.681945235430045,
      "grad_norm": 0.16372303664684296,
      "learning_rate": 2.6534794383347722e-06,
      "loss": 0.0138,
      "step": 2249860
    },
    {
      "epoch": 3.6819779658686986,
      "grad_norm": 0.1416337788105011,
      "learning_rate": 2.653413546121255e-06,
      "loss": 0.0166,
      "step": 2249880
    },
    {
      "epoch": 3.682010696307352,
      "grad_norm": 0.4813194274902344,
      "learning_rate": 2.653347653907738e-06,
      "loss": 0.0128,
      "step": 2249900
    },
    {
      "epoch": 3.6820434267460054,
      "grad_norm": 0.4423680305480957,
      "learning_rate": 2.653281761694221e-06,
      "loss": 0.0124,
      "step": 2249920
    },
    {
      "epoch": 3.6820761571846585,
      "grad_norm": 0.9646541476249695,
      "learning_rate": 2.6532158694807036e-06,
      "loss": 0.013,
      "step": 2249940
    },
    {
      "epoch": 3.682108887623312,
      "grad_norm": 0.31045863032341003,
      "learning_rate": 2.6531499772671864e-06,
      "loss": 0.0143,
      "step": 2249960
    },
    {
      "epoch": 3.6821416180619653,
      "grad_norm": 0.1350637525320053,
      "learning_rate": 2.6530840850536695e-06,
      "loss": 0.0076,
      "step": 2249980
    },
    {
      "epoch": 3.6821743485006184,
      "grad_norm": 0.47876599431037903,
      "learning_rate": 2.6530181928401523e-06,
      "loss": 0.0078,
      "step": 2250000
    },
    {
      "epoch": 3.6821743485006184,
      "eval_loss": 0.006499048788100481,
      "eval_runtime": 6504.2156,
      "eval_samples_per_second": 158.03,
      "eval_steps_per_second": 15.803,
      "eval_sts-dev_pearson_cosine": 0.9852490842495237,
      "eval_sts-dev_spearman_cosine": 0.8956952021415281,
      "step": 2250000
    },
    {
      "epoch": 3.682207078939272,
      "grad_norm": 0.5381374359130859,
      "learning_rate": 2.652952300626635e-06,
      "loss": 0.0102,
      "step": 2250020
    },
    {
      "epoch": 3.682239809377925,
      "grad_norm": 0.038058556616306305,
      "learning_rate": 2.6528864084131177e-06,
      "loss": 0.0112,
      "step": 2250040
    },
    {
      "epoch": 3.6822725398165788,
      "grad_norm": 0.4688425064086914,
      "learning_rate": 2.6528205161996005e-06,
      "loss": 0.0171,
      "step": 2250060
    },
    {
      "epoch": 3.682305270255232,
      "grad_norm": 0.7725332379341125,
      "learning_rate": 2.6527546239860836e-06,
      "loss": 0.011,
      "step": 2250080
    },
    {
      "epoch": 3.6823380006938855,
      "grad_norm": 1.0147504806518555,
      "learning_rate": 2.652688731772567e-06,
      "loss": 0.0124,
      "step": 2250100
    },
    {
      "epoch": 3.6823707311325387,
      "grad_norm": 0.09882214665412903,
      "learning_rate": 2.6526228395590495e-06,
      "loss": 0.0097,
      "step": 2250120
    },
    {
      "epoch": 3.682403461571192,
      "grad_norm": 0.2057235687971115,
      "learning_rate": 2.6525569473455327e-06,
      "loss": 0.0167,
      "step": 2250140
    },
    {
      "epoch": 3.6824361920098454,
      "grad_norm": 0.09800053387880325,
      "learning_rate": 2.6524910551320155e-06,
      "loss": 0.0116,
      "step": 2250160
    },
    {
      "epoch": 3.6824689224484985,
      "grad_norm": 0.1877739578485489,
      "learning_rate": 2.652425162918498e-06,
      "loss": 0.0103,
      "step": 2250180
    },
    {
      "epoch": 3.682501652887152,
      "grad_norm": 0.23950397968292236,
      "learning_rate": 2.652359270704981e-06,
      "loss": 0.0134,
      "step": 2250200
    },
    {
      "epoch": 3.6825343833258053,
      "grad_norm": 0.07584282755851746,
      "learning_rate": 2.652293378491464e-06,
      "loss": 0.0088,
      "step": 2250220
    },
    {
      "epoch": 3.682567113764459,
      "grad_norm": 0.08374770730733871,
      "learning_rate": 2.652227486277947e-06,
      "loss": 0.0122,
      "step": 2250240
    },
    {
      "epoch": 3.682599844203112,
      "grad_norm": 0.1320665180683136,
      "learning_rate": 2.6521615940644296e-06,
      "loss": 0.0143,
      "step": 2250260
    },
    {
      "epoch": 3.682632574641765,
      "grad_norm": 0.13062281906604767,
      "learning_rate": 2.6520957018509123e-06,
      "loss": 0.0091,
      "step": 2250280
    },
    {
      "epoch": 3.682665305080419,
      "grad_norm": 0.2935613691806793,
      "learning_rate": 2.6520298096373955e-06,
      "loss": 0.0132,
      "step": 2250300
    },
    {
      "epoch": 3.682698035519072,
      "grad_norm": 0.4547991156578064,
      "learning_rate": 2.6519639174238782e-06,
      "loss": 0.0111,
      "step": 2250320
    },
    {
      "epoch": 3.6827307659577255,
      "grad_norm": 0.1138136237859726,
      "learning_rate": 2.651898025210361e-06,
      "loss": 0.0096,
      "step": 2250340
    },
    {
      "epoch": 3.6827634963963787,
      "grad_norm": 0.29819628596305847,
      "learning_rate": 2.6518321329968437e-06,
      "loss": 0.01,
      "step": 2250360
    },
    {
      "epoch": 3.6827962268350323,
      "grad_norm": 0.20762628316879272,
      "learning_rate": 2.651766240783327e-06,
      "loss": 0.0115,
      "step": 2250380
    },
    {
      "epoch": 3.6828289572736854,
      "grad_norm": 0.41493523120880127,
      "learning_rate": 2.6517003485698096e-06,
      "loss": 0.0105,
      "step": 2250400
    },
    {
      "epoch": 3.6828616877123386,
      "grad_norm": 0.24142055213451385,
      "learning_rate": 2.6516344563562923e-06,
      "loss": 0.0168,
      "step": 2250420
    },
    {
      "epoch": 3.682894418150992,
      "grad_norm": 0.24867668747901917,
      "learning_rate": 2.651568564142775e-06,
      "loss": 0.0092,
      "step": 2250440
    },
    {
      "epoch": 3.6829271485896453,
      "grad_norm": 0.31985124945640564,
      "learning_rate": 2.6515026719292587e-06,
      "loss": 0.0108,
      "step": 2250460
    },
    {
      "epoch": 3.6829598790282985,
      "grad_norm": 0.3432711362838745,
      "learning_rate": 2.6514367797157414e-06,
      "loss": 0.0083,
      "step": 2250480
    },
    {
      "epoch": 3.682992609466952,
      "grad_norm": 0.3379366397857666,
      "learning_rate": 2.651370887502224e-06,
      "loss": 0.0133,
      "step": 2250500
    },
    {
      "epoch": 3.6830253399056057,
      "grad_norm": 0.25706642866134644,
      "learning_rate": 2.6513049952887073e-06,
      "loss": 0.0134,
      "step": 2250520
    },
    {
      "epoch": 3.683058070344259,
      "grad_norm": 0.2796086370944977,
      "learning_rate": 2.65123910307519e-06,
      "loss": 0.0119,
      "step": 2250540
    },
    {
      "epoch": 3.683090800782912,
      "grad_norm": 0.22845694422721863,
      "learning_rate": 2.6511732108616728e-06,
      "loss": 0.0097,
      "step": 2250560
    },
    {
      "epoch": 3.6831235312215655,
      "grad_norm": 0.5295549631118774,
      "learning_rate": 2.6511073186481555e-06,
      "loss": 0.0129,
      "step": 2250580
    },
    {
      "epoch": 3.6831562616602187,
      "grad_norm": 0.11606913059949875,
      "learning_rate": 2.6510414264346383e-06,
      "loss": 0.01,
      "step": 2250600
    },
    {
      "epoch": 3.683188992098872,
      "grad_norm": 0.4227483570575714,
      "learning_rate": 2.6509755342211214e-06,
      "loss": 0.0171,
      "step": 2250620
    },
    {
      "epoch": 3.6832217225375254,
      "grad_norm": 0.3503817617893219,
      "learning_rate": 2.650909642007604e-06,
      "loss": 0.0123,
      "step": 2250640
    },
    {
      "epoch": 3.683254452976179,
      "grad_norm": 0.5628974437713623,
      "learning_rate": 2.650843749794087e-06,
      "loss": 0.0103,
      "step": 2250660
    },
    {
      "epoch": 3.683287183414832,
      "grad_norm": 0.5669282674789429,
      "learning_rate": 2.6507778575805696e-06,
      "loss": 0.0101,
      "step": 2250680
    },
    {
      "epoch": 3.6833199138534853,
      "grad_norm": 0.25322675704956055,
      "learning_rate": 2.650711965367053e-06,
      "loss": 0.0142,
      "step": 2250700
    },
    {
      "epoch": 3.683352644292139,
      "grad_norm": 0.3380630612373352,
      "learning_rate": 2.6506460731535356e-06,
      "loss": 0.0075,
      "step": 2250720
    },
    {
      "epoch": 3.683385374730792,
      "grad_norm": 3.647786855697632,
      "learning_rate": 2.6505801809400183e-06,
      "loss": 0.0104,
      "step": 2250740
    },
    {
      "epoch": 3.6834181051694452,
      "grad_norm": 0.6908805966377258,
      "learning_rate": 2.650514288726501e-06,
      "loss": 0.0137,
      "step": 2250760
    },
    {
      "epoch": 3.683450835608099,
      "grad_norm": 0.41215115785598755,
      "learning_rate": 2.650448396512984e-06,
      "loss": 0.0106,
      "step": 2250780
    },
    {
      "epoch": 3.6834835660467524,
      "grad_norm": 0.3003731071949005,
      "learning_rate": 2.6503825042994674e-06,
      "loss": 0.0073,
      "step": 2250800
    },
    {
      "epoch": 3.6835162964854056,
      "grad_norm": 0.4710962474346161,
      "learning_rate": 2.65031661208595e-06,
      "loss": 0.0092,
      "step": 2250820
    },
    {
      "epoch": 3.6835490269240587,
      "grad_norm": 0.10451658815145493,
      "learning_rate": 2.6502507198724333e-06,
      "loss": 0.011,
      "step": 2250840
    },
    {
      "epoch": 3.6835817573627123,
      "grad_norm": 0.17783492803573608,
      "learning_rate": 2.650184827658916e-06,
      "loss": 0.0095,
      "step": 2250860
    },
    {
      "epoch": 3.6836144878013655,
      "grad_norm": 0.3844922184944153,
      "learning_rate": 2.6501189354453987e-06,
      "loss": 0.008,
      "step": 2250880
    },
    {
      "epoch": 3.6836472182400186,
      "grad_norm": 0.2570139765739441,
      "learning_rate": 2.6500530432318815e-06,
      "loss": 0.0107,
      "step": 2250900
    },
    {
      "epoch": 3.683679948678672,
      "grad_norm": 0.18757356703281403,
      "learning_rate": 2.6499871510183646e-06,
      "loss": 0.0152,
      "step": 2250920
    },
    {
      "epoch": 3.683712679117326,
      "grad_norm": 0.10408590734004974,
      "learning_rate": 2.6499212588048474e-06,
      "loss": 0.0084,
      "step": 2250940
    },
    {
      "epoch": 3.683745409555979,
      "grad_norm": 0.5008354187011719,
      "learning_rate": 2.64985536659133e-06,
      "loss": 0.0179,
      "step": 2250960
    },
    {
      "epoch": 3.683778139994632,
      "grad_norm": 0.22345654666423798,
      "learning_rate": 2.649789474377813e-06,
      "loss": 0.0082,
      "step": 2250980
    },
    {
      "epoch": 3.6838108704332857,
      "grad_norm": 0.15325374901294708,
      "learning_rate": 2.649723582164296e-06,
      "loss": 0.0118,
      "step": 2251000
    },
    {
      "epoch": 3.683843600871939,
      "grad_norm": 0.8341471552848816,
      "learning_rate": 2.6496576899507788e-06,
      "loss": 0.0131,
      "step": 2251020
    },
    {
      "epoch": 3.683876331310592,
      "grad_norm": 0.17981913685798645,
      "learning_rate": 2.6495917977372615e-06,
      "loss": 0.0109,
      "step": 2251040
    },
    {
      "epoch": 3.6839090617492456,
      "grad_norm": 0.31965285539627075,
      "learning_rate": 2.6495259055237442e-06,
      "loss": 0.0088,
      "step": 2251060
    },
    {
      "epoch": 3.6839417921878987,
      "grad_norm": 0.38734155893325806,
      "learning_rate": 2.649460013310227e-06,
      "loss": 0.0151,
      "step": 2251080
    },
    {
      "epoch": 3.6839745226265523,
      "grad_norm": 0.5693636536598206,
      "learning_rate": 2.64939412109671e-06,
      "loss": 0.0115,
      "step": 2251100
    },
    {
      "epoch": 3.6840072530652055,
      "grad_norm": 0.11086935549974442,
      "learning_rate": 2.649328228883193e-06,
      "loss": 0.0082,
      "step": 2251120
    },
    {
      "epoch": 3.684039983503859,
      "grad_norm": 0.13368596136569977,
      "learning_rate": 2.6492623366696756e-06,
      "loss": 0.0059,
      "step": 2251140
    },
    {
      "epoch": 3.6840727139425122,
      "grad_norm": 0.12614217400550842,
      "learning_rate": 2.6491964444561592e-06,
      "loss": 0.0105,
      "step": 2251160
    },
    {
      "epoch": 3.6841054443811654,
      "grad_norm": 0.07338691502809525,
      "learning_rate": 2.649130552242642e-06,
      "loss": 0.0067,
      "step": 2251180
    },
    {
      "epoch": 3.684138174819819,
      "grad_norm": 0.34061717987060547,
      "learning_rate": 2.6490646600291247e-06,
      "loss": 0.0134,
      "step": 2251200
    },
    {
      "epoch": 3.684170905258472,
      "grad_norm": 0.1573074907064438,
      "learning_rate": 2.6489987678156074e-06,
      "loss": 0.0154,
      "step": 2251220
    },
    {
      "epoch": 3.6842036356971257,
      "grad_norm": 0.34628307819366455,
      "learning_rate": 2.6489328756020906e-06,
      "loss": 0.0108,
      "step": 2251240
    },
    {
      "epoch": 3.684236366135779,
      "grad_norm": 0.2642378509044647,
      "learning_rate": 2.6488669833885733e-06,
      "loss": 0.0081,
      "step": 2251260
    },
    {
      "epoch": 3.6842690965744325,
      "grad_norm": 0.3535594940185547,
      "learning_rate": 2.648801091175056e-06,
      "loss": 0.0109,
      "step": 2251280
    },
    {
      "epoch": 3.6843018270130856,
      "grad_norm": 0.1914684772491455,
      "learning_rate": 2.648735198961539e-06,
      "loss": 0.0084,
      "step": 2251300
    },
    {
      "epoch": 3.6843345574517388,
      "grad_norm": 0.19066756963729858,
      "learning_rate": 2.648669306748022e-06,
      "loss": 0.0185,
      "step": 2251320
    },
    {
      "epoch": 3.6843672878903924,
      "grad_norm": 0.18074484169483185,
      "learning_rate": 2.6486034145345047e-06,
      "loss": 0.011,
      "step": 2251340
    },
    {
      "epoch": 3.6844000183290455,
      "grad_norm": 0.22261114418506622,
      "learning_rate": 2.6485375223209875e-06,
      "loss": 0.012,
      "step": 2251360
    },
    {
      "epoch": 3.684432748767699,
      "grad_norm": 0.21677878499031067,
      "learning_rate": 2.64847163010747e-06,
      "loss": 0.011,
      "step": 2251380
    },
    {
      "epoch": 3.6844654792063523,
      "grad_norm": 0.1878846287727356,
      "learning_rate": 2.6484057378939534e-06,
      "loss": 0.0111,
      "step": 2251400
    },
    {
      "epoch": 3.684498209645006,
      "grad_norm": 0.19344249367713928,
      "learning_rate": 2.648339845680436e-06,
      "loss": 0.0135,
      "step": 2251420
    },
    {
      "epoch": 3.684530940083659,
      "grad_norm": 0.49356985092163086,
      "learning_rate": 2.648273953466919e-06,
      "loss": 0.0157,
      "step": 2251440
    },
    {
      "epoch": 3.684563670522312,
      "grad_norm": 0.17078284919261932,
      "learning_rate": 2.6482080612534016e-06,
      "loss": 0.0091,
      "step": 2251460
    },
    {
      "epoch": 3.6845964009609657,
      "grad_norm": 0.25052371621131897,
      "learning_rate": 2.6481421690398843e-06,
      "loss": 0.0099,
      "step": 2251480
    },
    {
      "epoch": 3.684629131399619,
      "grad_norm": 0.6939968466758728,
      "learning_rate": 2.648076276826368e-06,
      "loss": 0.0115,
      "step": 2251500
    },
    {
      "epoch": 3.6846618618382725,
      "grad_norm": 0.31076234579086304,
      "learning_rate": 2.6480103846128506e-06,
      "loss": 0.008,
      "step": 2251520
    },
    {
      "epoch": 3.6846945922769256,
      "grad_norm": 0.2763529121875763,
      "learning_rate": 2.647944492399334e-06,
      "loss": 0.008,
      "step": 2251540
    },
    {
      "epoch": 3.6847273227155792,
      "grad_norm": 0.7396954894065857,
      "learning_rate": 2.6478786001858166e-06,
      "loss": 0.0093,
      "step": 2251560
    },
    {
      "epoch": 3.6847600531542324,
      "grad_norm": 0.45013684034347534,
      "learning_rate": 2.6478127079722993e-06,
      "loss": 0.0153,
      "step": 2251580
    },
    {
      "epoch": 3.6847927835928855,
      "grad_norm": 0.20317715406417847,
      "learning_rate": 2.647746815758782e-06,
      "loss": 0.011,
      "step": 2251600
    },
    {
      "epoch": 3.684825514031539,
      "grad_norm": 0.1233471930027008,
      "learning_rate": 2.6476809235452648e-06,
      "loss": 0.0097,
      "step": 2251620
    },
    {
      "epoch": 3.6848582444701923,
      "grad_norm": 0.47434812784194946,
      "learning_rate": 2.647615031331748e-06,
      "loss": 0.0121,
      "step": 2251640
    },
    {
      "epoch": 3.684890974908846,
      "grad_norm": 0.8286579251289368,
      "learning_rate": 2.6475491391182307e-06,
      "loss": 0.0142,
      "step": 2251660
    },
    {
      "epoch": 3.684923705347499,
      "grad_norm": 0.11998401582241058,
      "learning_rate": 2.6474832469047134e-06,
      "loss": 0.0095,
      "step": 2251680
    },
    {
      "epoch": 3.6849564357861526,
      "grad_norm": 0.23387184739112854,
      "learning_rate": 2.647417354691196e-06,
      "loss": 0.0123,
      "step": 2251700
    },
    {
      "epoch": 3.6849891662248058,
      "grad_norm": 0.11578549444675446,
      "learning_rate": 2.6473514624776793e-06,
      "loss": 0.0121,
      "step": 2251720
    },
    {
      "epoch": 3.685021896663459,
      "grad_norm": 0.24489404261112213,
      "learning_rate": 2.647285570264162e-06,
      "loss": 0.0108,
      "step": 2251740
    },
    {
      "epoch": 3.6850546271021125,
      "grad_norm": 0.10101977735757828,
      "learning_rate": 2.647219678050645e-06,
      "loss": 0.0153,
      "step": 2251760
    },
    {
      "epoch": 3.6850873575407657,
      "grad_norm": 0.5695520639419556,
      "learning_rate": 2.6471537858371275e-06,
      "loss": 0.0123,
      "step": 2251780
    },
    {
      "epoch": 3.6851200879794193,
      "grad_norm": 0.09810624271631241,
      "learning_rate": 2.6470878936236107e-06,
      "loss": 0.0145,
      "step": 2251800
    },
    {
      "epoch": 3.6851528184180724,
      "grad_norm": 0.41380539536476135,
      "learning_rate": 2.6470220014100934e-06,
      "loss": 0.0131,
      "step": 2251820
    },
    {
      "epoch": 3.685185548856726,
      "grad_norm": 0.24987490475177765,
      "learning_rate": 2.646956109196576e-06,
      "loss": 0.013,
      "step": 2251840
    },
    {
      "epoch": 3.685218279295379,
      "grad_norm": 0.37054339051246643,
      "learning_rate": 2.6468902169830598e-06,
      "loss": 0.0082,
      "step": 2251860
    },
    {
      "epoch": 3.6852510097340323,
      "grad_norm": 0.3646143078804016,
      "learning_rate": 2.6468243247695425e-06,
      "loss": 0.0112,
      "step": 2251880
    },
    {
      "epoch": 3.685283740172686,
      "grad_norm": 0.1844683140516281,
      "learning_rate": 2.6467584325560252e-06,
      "loss": 0.0081,
      "step": 2251900
    },
    {
      "epoch": 3.685316470611339,
      "grad_norm": 0.6960710883140564,
      "learning_rate": 2.646692540342508e-06,
      "loss": 0.0114,
      "step": 2251920
    },
    {
      "epoch": 3.685349201049992,
      "grad_norm": 0.5122987031936646,
      "learning_rate": 2.646626648128991e-06,
      "loss": 0.012,
      "step": 2251940
    },
    {
      "epoch": 3.685381931488646,
      "grad_norm": 0.20857855677604675,
      "learning_rate": 2.646560755915474e-06,
      "loss": 0.01,
      "step": 2251960
    },
    {
      "epoch": 3.6854146619272994,
      "grad_norm": 0.15645276010036469,
      "learning_rate": 2.6464948637019566e-06,
      "loss": 0.0131,
      "step": 2251980
    },
    {
      "epoch": 3.6854473923659525,
      "grad_norm": 0.13434173166751862,
      "learning_rate": 2.6464289714884394e-06,
      "loss": 0.007,
      "step": 2252000
    },
    {
      "epoch": 3.6854801228046057,
      "grad_norm": 0.5994883179664612,
      "learning_rate": 2.646363079274922e-06,
      "loss": 0.0112,
      "step": 2252020
    },
    {
      "epoch": 3.6855128532432593,
      "grad_norm": 0.5158697962760925,
      "learning_rate": 2.6462971870614053e-06,
      "loss": 0.007,
      "step": 2252040
    },
    {
      "epoch": 3.6855455836819124,
      "grad_norm": 0.34676215052604675,
      "learning_rate": 2.646231294847888e-06,
      "loss": 0.0089,
      "step": 2252060
    },
    {
      "epoch": 3.6855783141205656,
      "grad_norm": 0.0742625817656517,
      "learning_rate": 2.6461654026343707e-06,
      "loss": 0.0104,
      "step": 2252080
    },
    {
      "epoch": 3.685611044559219,
      "grad_norm": 0.18870308995246887,
      "learning_rate": 2.6460995104208535e-06,
      "loss": 0.0126,
      "step": 2252100
    },
    {
      "epoch": 3.6856437749978728,
      "grad_norm": 0.35272499918937683,
      "learning_rate": 2.6460336182073367e-06,
      "loss": 0.0099,
      "step": 2252120
    },
    {
      "epoch": 3.685676505436526,
      "grad_norm": 0.338113009929657,
      "learning_rate": 2.6459677259938194e-06,
      "loss": 0.0143,
      "step": 2252140
    },
    {
      "epoch": 3.685709235875179,
      "grad_norm": 0.10435216128826141,
      "learning_rate": 2.645901833780302e-06,
      "loss": 0.008,
      "step": 2252160
    },
    {
      "epoch": 3.6857419663138327,
      "grad_norm": 0.14323952794075012,
      "learning_rate": 2.645835941566785e-06,
      "loss": 0.0144,
      "step": 2252180
    },
    {
      "epoch": 3.685774696752486,
      "grad_norm": 0.45315882563591003,
      "learning_rate": 2.645770049353268e-06,
      "loss": 0.0098,
      "step": 2252200
    },
    {
      "epoch": 3.685807427191139,
      "grad_norm": 0.48736807703971863,
      "learning_rate": 2.645704157139751e-06,
      "loss": 0.0125,
      "step": 2252220
    },
    {
      "epoch": 3.6858401576297926,
      "grad_norm": 0.4485923647880554,
      "learning_rate": 2.645638264926234e-06,
      "loss": 0.0102,
      "step": 2252240
    },
    {
      "epoch": 3.685872888068446,
      "grad_norm": 0.15562106668949127,
      "learning_rate": 2.645572372712717e-06,
      "loss": 0.0083,
      "step": 2252260
    },
    {
      "epoch": 3.6859056185070993,
      "grad_norm": 0.27000728249549866,
      "learning_rate": 2.6455064804992e-06,
      "loss": 0.0088,
      "step": 2252280
    },
    {
      "epoch": 3.6859383489457525,
      "grad_norm": 0.07143867015838623,
      "learning_rate": 2.6454405882856826e-06,
      "loss": 0.0107,
      "step": 2252300
    },
    {
      "epoch": 3.685971079384406,
      "grad_norm": 0.19480343163013458,
      "learning_rate": 2.6453746960721653e-06,
      "loss": 0.0099,
      "step": 2252320
    },
    {
      "epoch": 3.686003809823059,
      "grad_norm": 0.13539908826351166,
      "learning_rate": 2.6453088038586485e-06,
      "loss": 0.0071,
      "step": 2252340
    },
    {
      "epoch": 3.6860365402617123,
      "grad_norm": 0.21607765555381775,
      "learning_rate": 2.6452429116451312e-06,
      "loss": 0.0078,
      "step": 2252360
    },
    {
      "epoch": 3.686069270700366,
      "grad_norm": 0.2227172702550888,
      "learning_rate": 2.645177019431614e-06,
      "loss": 0.0123,
      "step": 2252380
    },
    {
      "epoch": 3.6861020011390195,
      "grad_norm": 0.17483097314834595,
      "learning_rate": 2.6451111272180967e-06,
      "loss": 0.0075,
      "step": 2252400
    },
    {
      "epoch": 3.6861347315776727,
      "grad_norm": 0.2644740343093872,
      "learning_rate": 2.64504523500458e-06,
      "loss": 0.0095,
      "step": 2252420
    },
    {
      "epoch": 3.686167462016326,
      "grad_norm": 0.4997532069683075,
      "learning_rate": 2.6449793427910626e-06,
      "loss": 0.0093,
      "step": 2252440
    },
    {
      "epoch": 3.6862001924549794,
      "grad_norm": 0.3691023290157318,
      "learning_rate": 2.6449134505775453e-06,
      "loss": 0.0114,
      "step": 2252460
    },
    {
      "epoch": 3.6862329228936326,
      "grad_norm": 0.3568102717399597,
      "learning_rate": 2.644847558364028e-06,
      "loss": 0.0103,
      "step": 2252480
    },
    {
      "epoch": 3.6862656533322857,
      "grad_norm": 0.29874786734580994,
      "learning_rate": 2.644781666150511e-06,
      "loss": 0.0087,
      "step": 2252500
    },
    {
      "epoch": 3.6862983837709393,
      "grad_norm": 0.18074819445610046,
      "learning_rate": 2.644715773936994e-06,
      "loss": 0.016,
      "step": 2252520
    },
    {
      "epoch": 3.6863311142095925,
      "grad_norm": 0.14054752886295319,
      "learning_rate": 2.6446498817234767e-06,
      "loss": 0.0083,
      "step": 2252540
    },
    {
      "epoch": 3.686363844648246,
      "grad_norm": 0.1371353268623352,
      "learning_rate": 2.64458398950996e-06,
      "loss": 0.0131,
      "step": 2252560
    },
    {
      "epoch": 3.686396575086899,
      "grad_norm": 0.05628855526447296,
      "learning_rate": 2.644518097296443e-06,
      "loss": 0.0092,
      "step": 2252580
    },
    {
      "epoch": 3.686429305525553,
      "grad_norm": 0.7690414190292358,
      "learning_rate": 2.644452205082926e-06,
      "loss": 0.0147,
      "step": 2252600
    },
    {
      "epoch": 3.686462035964206,
      "grad_norm": 0.5498402714729309,
      "learning_rate": 2.6443863128694085e-06,
      "loss": 0.0121,
      "step": 2252620
    },
    {
      "epoch": 3.686494766402859,
      "grad_norm": 0.31819263100624084,
      "learning_rate": 2.6443204206558913e-06,
      "loss": 0.0137,
      "step": 2252640
    },
    {
      "epoch": 3.6865274968415127,
      "grad_norm": 0.10628975927829742,
      "learning_rate": 2.6442545284423744e-06,
      "loss": 0.0118,
      "step": 2252660
    },
    {
      "epoch": 3.686560227280166,
      "grad_norm": 0.3396908640861511,
      "learning_rate": 2.644188636228857e-06,
      "loss": 0.0089,
      "step": 2252680
    },
    {
      "epoch": 3.6865929577188195,
      "grad_norm": 0.2482084184885025,
      "learning_rate": 2.64412274401534e-06,
      "loss": 0.0116,
      "step": 2252700
    },
    {
      "epoch": 3.6866256881574726,
      "grad_norm": 0.2700482904911041,
      "learning_rate": 2.6440568518018227e-06,
      "loss": 0.0119,
      "step": 2252720
    },
    {
      "epoch": 3.686658418596126,
      "grad_norm": 0.5893441438674927,
      "learning_rate": 2.643990959588306e-06,
      "loss": 0.0128,
      "step": 2252740
    },
    {
      "epoch": 3.6866911490347793,
      "grad_norm": 0.36937275528907776,
      "learning_rate": 2.6439250673747886e-06,
      "loss": 0.0095,
      "step": 2252760
    },
    {
      "epoch": 3.6867238794734325,
      "grad_norm": 0.31003502011299133,
      "learning_rate": 2.6438591751612713e-06,
      "loss": 0.008,
      "step": 2252780
    },
    {
      "epoch": 3.686756609912086,
      "grad_norm": 0.25133106112480164,
      "learning_rate": 2.643793282947754e-06,
      "loss": 0.0117,
      "step": 2252800
    },
    {
      "epoch": 3.6867893403507392,
      "grad_norm": 0.27982449531555176,
      "learning_rate": 2.643727390734237e-06,
      "loss": 0.0139,
      "step": 2252820
    },
    {
      "epoch": 3.686822070789393,
      "grad_norm": 0.3662676513195038,
      "learning_rate": 2.64366149852072e-06,
      "loss": 0.0166,
      "step": 2252840
    },
    {
      "epoch": 3.686854801228046,
      "grad_norm": 0.4066931903362274,
      "learning_rate": 2.6435956063072027e-06,
      "loss": 0.0154,
      "step": 2252860
    },
    {
      "epoch": 3.6868875316666996,
      "grad_norm": 0.2093864232301712,
      "learning_rate": 2.6435297140936854e-06,
      "loss": 0.009,
      "step": 2252880
    },
    {
      "epoch": 3.6869202621053527,
      "grad_norm": 0.2325369417667389,
      "learning_rate": 2.643463821880168e-06,
      "loss": 0.009,
      "step": 2252900
    },
    {
      "epoch": 3.686952992544006,
      "grad_norm": 0.0936611071228981,
      "learning_rate": 2.6433979296666517e-06,
      "loss": 0.0162,
      "step": 2252920
    },
    {
      "epoch": 3.6869857229826595,
      "grad_norm": 0.21130618453025818,
      "learning_rate": 2.6433320374531345e-06,
      "loss": 0.0099,
      "step": 2252940
    },
    {
      "epoch": 3.6870184534213126,
      "grad_norm": 0.16607262194156647,
      "learning_rate": 2.6432661452396176e-06,
      "loss": 0.0134,
      "step": 2252960
    },
    {
      "epoch": 3.687051183859966,
      "grad_norm": 0.3716622292995453,
      "learning_rate": 2.6432002530261004e-06,
      "loss": 0.0093,
      "step": 2252980
    },
    {
      "epoch": 3.6870839142986194,
      "grad_norm": 1.0191389322280884,
      "learning_rate": 2.643134360812583e-06,
      "loss": 0.0083,
      "step": 2253000
    },
    {
      "epoch": 3.687116644737273,
      "grad_norm": 0.17851074039936066,
      "learning_rate": 2.643068468599066e-06,
      "loss": 0.009,
      "step": 2253020
    },
    {
      "epoch": 3.687149375175926,
      "grad_norm": 0.1874387413263321,
      "learning_rate": 2.6430025763855486e-06,
      "loss": 0.0117,
      "step": 2253040
    },
    {
      "epoch": 3.6871821056145793,
      "grad_norm": 0.18852712213993073,
      "learning_rate": 2.6429366841720318e-06,
      "loss": 0.01,
      "step": 2253060
    },
    {
      "epoch": 3.687214836053233,
      "grad_norm": 0.3152753710746765,
      "learning_rate": 2.6428707919585145e-06,
      "loss": 0.0107,
      "step": 2253080
    },
    {
      "epoch": 3.687247566491886,
      "grad_norm": 0.12452401965856552,
      "learning_rate": 2.6428048997449973e-06,
      "loss": 0.0107,
      "step": 2253100
    },
    {
      "epoch": 3.6872802969305396,
      "grad_norm": 0.4640166461467743,
      "learning_rate": 2.64273900753148e-06,
      "loss": 0.0149,
      "step": 2253120
    },
    {
      "epoch": 3.6873130273691928,
      "grad_norm": 0.3754383325576782,
      "learning_rate": 2.642673115317963e-06,
      "loss": 0.0107,
      "step": 2253140
    },
    {
      "epoch": 3.6873457578078463,
      "grad_norm": 0.18442402780056,
      "learning_rate": 2.642607223104446e-06,
      "loss": 0.01,
      "step": 2253160
    },
    {
      "epoch": 3.6873784882464995,
      "grad_norm": 0.3117683529853821,
      "learning_rate": 2.6425413308909286e-06,
      "loss": 0.0107,
      "step": 2253180
    },
    {
      "epoch": 3.6874112186851526,
      "grad_norm": 0.2992222309112549,
      "learning_rate": 2.6424754386774114e-06,
      "loss": 0.0097,
      "step": 2253200
    },
    {
      "epoch": 3.6874439491238062,
      "grad_norm": 0.20127534866333008,
      "learning_rate": 2.6424095464638945e-06,
      "loss": 0.0146,
      "step": 2253220
    },
    {
      "epoch": 3.6874766795624594,
      "grad_norm": 0.23191846907138824,
      "learning_rate": 2.6423436542503773e-06,
      "loss": 0.0094,
      "step": 2253240
    },
    {
      "epoch": 3.687509410001113,
      "grad_norm": 0.5333139896392822,
      "learning_rate": 2.6422777620368604e-06,
      "loss": 0.0103,
      "step": 2253260
    },
    {
      "epoch": 3.687542140439766,
      "grad_norm": 0.5930336117744446,
      "learning_rate": 2.6422118698233436e-06,
      "loss": 0.0128,
      "step": 2253280
    },
    {
      "epoch": 3.6875748708784197,
      "grad_norm": 0.1955374926328659,
      "learning_rate": 2.6421459776098263e-06,
      "loss": 0.0086,
      "step": 2253300
    },
    {
      "epoch": 3.687607601317073,
      "grad_norm": 0.28614550828933716,
      "learning_rate": 2.642080085396309e-06,
      "loss": 0.012,
      "step": 2253320
    },
    {
      "epoch": 3.687640331755726,
      "grad_norm": 0.5892541408538818,
      "learning_rate": 2.642014193182792e-06,
      "loss": 0.0149,
      "step": 2253340
    },
    {
      "epoch": 3.6876730621943796,
      "grad_norm": 0.3423681855201721,
      "learning_rate": 2.641948300969275e-06,
      "loss": 0.0133,
      "step": 2253360
    },
    {
      "epoch": 3.6877057926330328,
      "grad_norm": 0.4746312201023102,
      "learning_rate": 2.6418824087557577e-06,
      "loss": 0.0125,
      "step": 2253380
    },
    {
      "epoch": 3.6877385230716864,
      "grad_norm": 0.27697160840034485,
      "learning_rate": 2.6418165165422405e-06,
      "loss": 0.0135,
      "step": 2253400
    },
    {
      "epoch": 3.6877712535103395,
      "grad_norm": 0.3915603458881378,
      "learning_rate": 2.641750624328723e-06,
      "loss": 0.0121,
      "step": 2253420
    },
    {
      "epoch": 3.687803983948993,
      "grad_norm": 0.08079500496387482,
      "learning_rate": 2.641684732115206e-06,
      "loss": 0.0095,
      "step": 2253440
    },
    {
      "epoch": 3.6878367143876463,
      "grad_norm": 0.2207556962966919,
      "learning_rate": 2.641618839901689e-06,
      "loss": 0.0074,
      "step": 2253460
    },
    {
      "epoch": 3.6878694448262994,
      "grad_norm": 0.29516708850860596,
      "learning_rate": 2.641552947688172e-06,
      "loss": 0.0095,
      "step": 2253480
    },
    {
      "epoch": 3.687902175264953,
      "grad_norm": 0.29649287462234497,
      "learning_rate": 2.6414870554746546e-06,
      "loss": 0.0118,
      "step": 2253500
    },
    {
      "epoch": 3.687934905703606,
      "grad_norm": 0.3516334593296051,
      "learning_rate": 2.6414211632611373e-06,
      "loss": 0.0115,
      "step": 2253520
    },
    {
      "epoch": 3.6879676361422593,
      "grad_norm": 0.48603159189224243,
      "learning_rate": 2.6413552710476205e-06,
      "loss": 0.0069,
      "step": 2253540
    },
    {
      "epoch": 3.688000366580913,
      "grad_norm": 0.3382061719894409,
      "learning_rate": 2.6412893788341032e-06,
      "loss": 0.0097,
      "step": 2253560
    },
    {
      "epoch": 3.6880330970195665,
      "grad_norm": 0.23801939189434052,
      "learning_rate": 2.641223486620586e-06,
      "loss": 0.0107,
      "step": 2253580
    },
    {
      "epoch": 3.6880658274582196,
      "grad_norm": 0.31395137310028076,
      "learning_rate": 2.6411575944070687e-06,
      "loss": 0.0088,
      "step": 2253600
    },
    {
      "epoch": 3.688098557896873,
      "grad_norm": 0.26922112703323364,
      "learning_rate": 2.6410917021935523e-06,
      "loss": 0.0095,
      "step": 2253620
    },
    {
      "epoch": 3.6881312883355264,
      "grad_norm": 0.5978260636329651,
      "learning_rate": 2.641025809980035e-06,
      "loss": 0.014,
      "step": 2253640
    },
    {
      "epoch": 3.6881640187741795,
      "grad_norm": 0.18263442814350128,
      "learning_rate": 2.6409599177665178e-06,
      "loss": 0.0173,
      "step": 2253660
    },
    {
      "epoch": 3.6881967492128327,
      "grad_norm": 0.2566024363040924,
      "learning_rate": 2.640894025553001e-06,
      "loss": 0.009,
      "step": 2253680
    },
    {
      "epoch": 3.6882294796514863,
      "grad_norm": 0.053514111787080765,
      "learning_rate": 2.6408281333394837e-06,
      "loss": 0.0085,
      "step": 2253700
    },
    {
      "epoch": 3.68826221009014,
      "grad_norm": 0.35384657979011536,
      "learning_rate": 2.6407622411259664e-06,
      "loss": 0.0168,
      "step": 2253720
    },
    {
      "epoch": 3.688294940528793,
      "grad_norm": 0.3845210075378418,
      "learning_rate": 2.640696348912449e-06,
      "loss": 0.0086,
      "step": 2253740
    },
    {
      "epoch": 3.688327670967446,
      "grad_norm": 0.18043769896030426,
      "learning_rate": 2.6406304566989323e-06,
      "loss": 0.0137,
      "step": 2253760
    },
    {
      "epoch": 3.6883604014060998,
      "grad_norm": 0.24004217982292175,
      "learning_rate": 2.640564564485415e-06,
      "loss": 0.0093,
      "step": 2253780
    },
    {
      "epoch": 3.688393131844753,
      "grad_norm": 0.15381760895252228,
      "learning_rate": 2.640498672271898e-06,
      "loss": 0.0091,
      "step": 2253800
    },
    {
      "epoch": 3.688425862283406,
      "grad_norm": 0.34002354741096497,
      "learning_rate": 2.6404327800583805e-06,
      "loss": 0.0114,
      "step": 2253820
    },
    {
      "epoch": 3.6884585927220597,
      "grad_norm": 0.09844283759593964,
      "learning_rate": 2.6403668878448637e-06,
      "loss": 0.0102,
      "step": 2253840
    },
    {
      "epoch": 3.6884913231607133,
      "grad_norm": 0.09076162427663803,
      "learning_rate": 2.6403009956313464e-06,
      "loss": 0.0084,
      "step": 2253860
    },
    {
      "epoch": 3.6885240535993664,
      "grad_norm": 0.7200327515602112,
      "learning_rate": 2.640235103417829e-06,
      "loss": 0.0107,
      "step": 2253880
    },
    {
      "epoch": 3.6885567840380196,
      "grad_norm": 0.1641649603843689,
      "learning_rate": 2.640169211204312e-06,
      "loss": 0.0118,
      "step": 2253900
    },
    {
      "epoch": 3.688589514476673,
      "grad_norm": 0.2965448796749115,
      "learning_rate": 2.6401033189907947e-06,
      "loss": 0.0086,
      "step": 2253920
    },
    {
      "epoch": 3.6886222449153263,
      "grad_norm": 0.2830432057380676,
      "learning_rate": 2.640037426777278e-06,
      "loss": 0.0095,
      "step": 2253940
    },
    {
      "epoch": 3.6886549753539795,
      "grad_norm": 0.13544787466526031,
      "learning_rate": 2.6399715345637606e-06,
      "loss": 0.0134,
      "step": 2253960
    },
    {
      "epoch": 3.688687705792633,
      "grad_norm": 0.6934430003166199,
      "learning_rate": 2.6399056423502437e-06,
      "loss": 0.0112,
      "step": 2253980
    },
    {
      "epoch": 3.6887204362312866,
      "grad_norm": 0.24661603569984436,
      "learning_rate": 2.639839750136727e-06,
      "loss": 0.0103,
      "step": 2254000
    },
    {
      "epoch": 3.68875316666994,
      "grad_norm": 0.39456698298454285,
      "learning_rate": 2.6397738579232096e-06,
      "loss": 0.0074,
      "step": 2254020
    },
    {
      "epoch": 3.688785897108593,
      "grad_norm": 0.38077589869499207,
      "learning_rate": 2.6397079657096924e-06,
      "loss": 0.0182,
      "step": 2254040
    },
    {
      "epoch": 3.6888186275472465,
      "grad_norm": 0.07867611944675446,
      "learning_rate": 2.639642073496175e-06,
      "loss": 0.0113,
      "step": 2254060
    },
    {
      "epoch": 3.6888513579858997,
      "grad_norm": 0.10598741471767426,
      "learning_rate": 2.6395761812826583e-06,
      "loss": 0.013,
      "step": 2254080
    },
    {
      "epoch": 3.688884088424553,
      "grad_norm": 0.3676091432571411,
      "learning_rate": 2.639510289069141e-06,
      "loss": 0.0116,
      "step": 2254100
    },
    {
      "epoch": 3.6889168188632064,
      "grad_norm": 0.21690352261066437,
      "learning_rate": 2.6394443968556238e-06,
      "loss": 0.0074,
      "step": 2254120
    },
    {
      "epoch": 3.6889495493018596,
      "grad_norm": 0.4075707197189331,
      "learning_rate": 2.6393785046421065e-06,
      "loss": 0.0066,
      "step": 2254140
    },
    {
      "epoch": 3.688982279740513,
      "grad_norm": 0.1809598058462143,
      "learning_rate": 2.6393126124285897e-06,
      "loss": 0.0137,
      "step": 2254160
    },
    {
      "epoch": 3.6890150101791663,
      "grad_norm": 0.28876689076423645,
      "learning_rate": 2.6392467202150724e-06,
      "loss": 0.0109,
      "step": 2254180
    },
    {
      "epoch": 3.68904774061782,
      "grad_norm": 0.3780647814273834,
      "learning_rate": 2.639180828001555e-06,
      "loss": 0.0086,
      "step": 2254200
    },
    {
      "epoch": 3.689080471056473,
      "grad_norm": 0.464501291513443,
      "learning_rate": 2.639114935788038e-06,
      "loss": 0.0114,
      "step": 2254220
    },
    {
      "epoch": 3.6891132014951262,
      "grad_norm": 0.2584501802921295,
      "learning_rate": 2.639049043574521e-06,
      "loss": 0.0134,
      "step": 2254240
    },
    {
      "epoch": 3.68914593193378,
      "grad_norm": 0.23515771329402924,
      "learning_rate": 2.6389831513610038e-06,
      "loss": 0.0123,
      "step": 2254260
    },
    {
      "epoch": 3.689178662372433,
      "grad_norm": 0.14981307089328766,
      "learning_rate": 2.6389172591474865e-06,
      "loss": 0.0153,
      "step": 2254280
    },
    {
      "epoch": 3.6892113928110866,
      "grad_norm": 0.10536472499370575,
      "learning_rate": 2.6388513669339693e-06,
      "loss": 0.0125,
      "step": 2254300
    },
    {
      "epoch": 3.6892441232497397,
      "grad_norm": 0.43461304903030396,
      "learning_rate": 2.638785474720453e-06,
      "loss": 0.0083,
      "step": 2254320
    },
    {
      "epoch": 3.6892768536883933,
      "grad_norm": 0.3126198351383209,
      "learning_rate": 2.6387195825069356e-06,
      "loss": 0.0161,
      "step": 2254340
    },
    {
      "epoch": 3.6893095841270465,
      "grad_norm": 0.24725653231143951,
      "learning_rate": 2.6386536902934183e-06,
      "loss": 0.0118,
      "step": 2254360
    },
    {
      "epoch": 3.6893423145656996,
      "grad_norm": 0.23591643571853638,
      "learning_rate": 2.6385877980799015e-06,
      "loss": 0.0092,
      "step": 2254380
    },
    {
      "epoch": 3.689375045004353,
      "grad_norm": 0.5951764583587646,
      "learning_rate": 2.6385219058663842e-06,
      "loss": 0.013,
      "step": 2254400
    },
    {
      "epoch": 3.6894077754430064,
      "grad_norm": 0.07630764693021774,
      "learning_rate": 2.638456013652867e-06,
      "loss": 0.0134,
      "step": 2254420
    },
    {
      "epoch": 3.68944050588166,
      "grad_norm": 0.06351731717586517,
      "learning_rate": 2.6383901214393497e-06,
      "loss": 0.0091,
      "step": 2254440
    },
    {
      "epoch": 3.689473236320313,
      "grad_norm": 0.6600154042243958,
      "learning_rate": 2.6383242292258324e-06,
      "loss": 0.0149,
      "step": 2254460
    },
    {
      "epoch": 3.6895059667589667,
      "grad_norm": 0.11164500564336777,
      "learning_rate": 2.6382583370123156e-06,
      "loss": 0.0118,
      "step": 2254480
    },
    {
      "epoch": 3.68953869719762,
      "grad_norm": 0.32158368825912476,
      "learning_rate": 2.6381924447987984e-06,
      "loss": 0.014,
      "step": 2254500
    },
    {
      "epoch": 3.689571427636273,
      "grad_norm": 0.20192749798297882,
      "learning_rate": 2.638126552585281e-06,
      "loss": 0.0099,
      "step": 2254520
    },
    {
      "epoch": 3.6896041580749266,
      "grad_norm": 0.27164334058761597,
      "learning_rate": 2.638060660371764e-06,
      "loss": 0.0102,
      "step": 2254540
    },
    {
      "epoch": 3.6896368885135797,
      "grad_norm": 0.19902393221855164,
      "learning_rate": 2.637994768158247e-06,
      "loss": 0.0084,
      "step": 2254560
    },
    {
      "epoch": 3.6896696189522333,
      "grad_norm": 0.1900629997253418,
      "learning_rate": 2.6379288759447297e-06,
      "loss": 0.0112,
      "step": 2254580
    },
    {
      "epoch": 3.6897023493908865,
      "grad_norm": 0.6090890765190125,
      "learning_rate": 2.6378629837312125e-06,
      "loss": 0.0111,
      "step": 2254600
    },
    {
      "epoch": 3.68973507982954,
      "grad_norm": 0.15747074782848358,
      "learning_rate": 2.6377970915176952e-06,
      "loss": 0.0072,
      "step": 2254620
    },
    {
      "epoch": 3.6897678102681932,
      "grad_norm": 0.13032959401607513,
      "learning_rate": 2.6377311993041784e-06,
      "loss": 0.0099,
      "step": 2254640
    },
    {
      "epoch": 3.6898005407068464,
      "grad_norm": 0.0737457200884819,
      "learning_rate": 2.637665307090661e-06,
      "loss": 0.0099,
      "step": 2254660
    },
    {
      "epoch": 3.6898332711455,
      "grad_norm": 0.24010999500751495,
      "learning_rate": 2.6375994148771443e-06,
      "loss": 0.0111,
      "step": 2254680
    },
    {
      "epoch": 3.689866001584153,
      "grad_norm": 0.11656658351421356,
      "learning_rate": 2.6375335226636274e-06,
      "loss": 0.0139,
      "step": 2254700
    },
    {
      "epoch": 3.6898987320228067,
      "grad_norm": 0.27571192383766174,
      "learning_rate": 2.63746763045011e-06,
      "loss": 0.0109,
      "step": 2254720
    },
    {
      "epoch": 3.68993146246146,
      "grad_norm": 0.16683684289455414,
      "learning_rate": 2.637401738236593e-06,
      "loss": 0.0071,
      "step": 2254740
    },
    {
      "epoch": 3.6899641929001135,
      "grad_norm": 0.09638915956020355,
      "learning_rate": 2.6373358460230757e-06,
      "loss": 0.0079,
      "step": 2254760
    },
    {
      "epoch": 3.6899969233387666,
      "grad_norm": 0.23044456541538239,
      "learning_rate": 2.637269953809559e-06,
      "loss": 0.0128,
      "step": 2254780
    },
    {
      "epoch": 3.6900296537774198,
      "grad_norm": 0.33235567808151245,
      "learning_rate": 2.6372040615960416e-06,
      "loss": 0.0243,
      "step": 2254800
    },
    {
      "epoch": 3.6900623842160734,
      "grad_norm": 0.14773769676685333,
      "learning_rate": 2.6371381693825243e-06,
      "loss": 0.0103,
      "step": 2254820
    },
    {
      "epoch": 3.6900951146547265,
      "grad_norm": 0.21862506866455078,
      "learning_rate": 2.637072277169007e-06,
      "loss": 0.0116,
      "step": 2254840
    },
    {
      "epoch": 3.69012784509338,
      "grad_norm": 0.2818710505962372,
      "learning_rate": 2.6370063849554898e-06,
      "loss": 0.0132,
      "step": 2254860
    },
    {
      "epoch": 3.6901605755320332,
      "grad_norm": 0.17988556623458862,
      "learning_rate": 2.636940492741973e-06,
      "loss": 0.0061,
      "step": 2254880
    },
    {
      "epoch": 3.690193305970687,
      "grad_norm": 0.23584477603435516,
      "learning_rate": 2.6368746005284557e-06,
      "loss": 0.0086,
      "step": 2254900
    },
    {
      "epoch": 3.69022603640934,
      "grad_norm": 0.5466842651367188,
      "learning_rate": 2.6368087083149384e-06,
      "loss": 0.0107,
      "step": 2254920
    },
    {
      "epoch": 3.690258766847993,
      "grad_norm": 0.6754474639892578,
      "learning_rate": 2.636742816101421e-06,
      "loss": 0.0131,
      "step": 2254940
    },
    {
      "epoch": 3.6902914972866467,
      "grad_norm": 0.8320347666740417,
      "learning_rate": 2.6366769238879043e-06,
      "loss": 0.0105,
      "step": 2254960
    },
    {
      "epoch": 3.6903242277253,
      "grad_norm": 0.0799272283911705,
      "learning_rate": 2.636611031674387e-06,
      "loss": 0.0066,
      "step": 2254980
    },
    {
      "epoch": 3.690356958163953,
      "grad_norm": 0.48843592405319214,
      "learning_rate": 2.63654513946087e-06,
      "loss": 0.0125,
      "step": 2255000
    },
    {
      "epoch": 3.6903896886026066,
      "grad_norm": 0.7089838981628418,
      "learning_rate": 2.6364792472473534e-06,
      "loss": 0.01,
      "step": 2255020
    },
    {
      "epoch": 3.6904224190412602,
      "grad_norm": 0.07059497386217117,
      "learning_rate": 2.636413355033836e-06,
      "loss": 0.0092,
      "step": 2255040
    },
    {
      "epoch": 3.6904551494799134,
      "grad_norm": 0.27523908019065857,
      "learning_rate": 2.636347462820319e-06,
      "loss": 0.013,
      "step": 2255060
    },
    {
      "epoch": 3.6904878799185665,
      "grad_norm": 0.15983819961547852,
      "learning_rate": 2.6362815706068016e-06,
      "loss": 0.0153,
      "step": 2255080
    },
    {
      "epoch": 3.69052061035722,
      "grad_norm": 0.12800610065460205,
      "learning_rate": 2.6362156783932848e-06,
      "loss": 0.0062,
      "step": 2255100
    },
    {
      "epoch": 3.6905533407958733,
      "grad_norm": 0.1665961593389511,
      "learning_rate": 2.6361497861797675e-06,
      "loss": 0.0114,
      "step": 2255120
    },
    {
      "epoch": 3.6905860712345264,
      "grad_norm": 0.47267094254493713,
      "learning_rate": 2.6360838939662503e-06,
      "loss": 0.016,
      "step": 2255140
    },
    {
      "epoch": 3.69061880167318,
      "grad_norm": 0.8733881115913391,
      "learning_rate": 2.636018001752733e-06,
      "loss": 0.0122,
      "step": 2255160
    },
    {
      "epoch": 3.6906515321118336,
      "grad_norm": 0.12040518969297409,
      "learning_rate": 2.635952109539216e-06,
      "loss": 0.0059,
      "step": 2255180
    },
    {
      "epoch": 3.6906842625504868,
      "grad_norm": 0.0568392239511013,
      "learning_rate": 2.635886217325699e-06,
      "loss": 0.0165,
      "step": 2255200
    },
    {
      "epoch": 3.69071699298914,
      "grad_norm": 0.6510241031646729,
      "learning_rate": 2.6358203251121816e-06,
      "loss": 0.0101,
      "step": 2255220
    },
    {
      "epoch": 3.6907497234277935,
      "grad_norm": 0.10987242311239243,
      "learning_rate": 2.6357544328986644e-06,
      "loss": 0.0062,
      "step": 2255240
    },
    {
      "epoch": 3.6907824538664467,
      "grad_norm": 0.5539098381996155,
      "learning_rate": 2.6356885406851475e-06,
      "loss": 0.01,
      "step": 2255260
    },
    {
      "epoch": 3.6908151843051,
      "grad_norm": 0.1667630672454834,
      "learning_rate": 2.6356226484716303e-06,
      "loss": 0.0095,
      "step": 2255280
    },
    {
      "epoch": 3.6908479147437534,
      "grad_norm": 0.21303683519363403,
      "learning_rate": 2.635556756258113e-06,
      "loss": 0.0083,
      "step": 2255300
    },
    {
      "epoch": 3.690880645182407,
      "grad_norm": 0.28142523765563965,
      "learning_rate": 2.6354908640445958e-06,
      "loss": 0.0126,
      "step": 2255320
    },
    {
      "epoch": 3.69091337562106,
      "grad_norm": 0.24387986958026886,
      "learning_rate": 2.6354249718310785e-06,
      "loss": 0.0116,
      "step": 2255340
    },
    {
      "epoch": 3.6909461060597133,
      "grad_norm": 0.3689635097980499,
      "learning_rate": 2.6353590796175617e-06,
      "loss": 0.0097,
      "step": 2255360
    },
    {
      "epoch": 3.690978836498367,
      "grad_norm": 0.4775983393192291,
      "learning_rate": 2.635293187404045e-06,
      "loss": 0.0163,
      "step": 2255380
    },
    {
      "epoch": 3.69101156693702,
      "grad_norm": 0.10435133427381516,
      "learning_rate": 2.6352272951905276e-06,
      "loss": 0.0197,
      "step": 2255400
    },
    {
      "epoch": 3.691044297375673,
      "grad_norm": 0.28236380219459534,
      "learning_rate": 2.6351614029770107e-06,
      "loss": 0.0102,
      "step": 2255420
    },
    {
      "epoch": 3.691077027814327,
      "grad_norm": 0.222329780459404,
      "learning_rate": 2.6350955107634935e-06,
      "loss": 0.0117,
      "step": 2255440
    },
    {
      "epoch": 3.6911097582529804,
      "grad_norm": 0.21374012529850006,
      "learning_rate": 2.635029618549976e-06,
      "loss": 0.0095,
      "step": 2255460
    },
    {
      "epoch": 3.6911424886916335,
      "grad_norm": 0.21767768263816833,
      "learning_rate": 2.634963726336459e-06,
      "loss": 0.011,
      "step": 2255480
    },
    {
      "epoch": 3.6911752191302867,
      "grad_norm": 0.6001651287078857,
      "learning_rate": 2.634897834122942e-06,
      "loss": 0.0137,
      "step": 2255500
    },
    {
      "epoch": 3.6912079495689403,
      "grad_norm": 0.19415228068828583,
      "learning_rate": 2.634831941909425e-06,
      "loss": 0.0149,
      "step": 2255520
    },
    {
      "epoch": 3.6912406800075934,
      "grad_norm": 0.40224316716194153,
      "learning_rate": 2.6347660496959076e-06,
      "loss": 0.015,
      "step": 2255540
    },
    {
      "epoch": 3.6912734104462466,
      "grad_norm": 0.2564517855644226,
      "learning_rate": 2.6347001574823903e-06,
      "loss": 0.0156,
      "step": 2255560
    },
    {
      "epoch": 3.6913061408849,
      "grad_norm": 0.11935904622077942,
      "learning_rate": 2.6346342652688735e-06,
      "loss": 0.0139,
      "step": 2255580
    },
    {
      "epoch": 3.6913388713235533,
      "grad_norm": 0.24819664657115936,
      "learning_rate": 2.6345683730553562e-06,
      "loss": 0.0095,
      "step": 2255600
    },
    {
      "epoch": 3.691371601762207,
      "grad_norm": 0.3501059412956238,
      "learning_rate": 2.634502480841839e-06,
      "loss": 0.0106,
      "step": 2255620
    },
    {
      "epoch": 3.69140433220086,
      "grad_norm": 0.1366599202156067,
      "learning_rate": 2.6344365886283217e-06,
      "loss": 0.009,
      "step": 2255640
    },
    {
      "epoch": 3.6914370626395137,
      "grad_norm": 0.28904420137405396,
      "learning_rate": 2.634370696414805e-06,
      "loss": 0.0091,
      "step": 2255660
    },
    {
      "epoch": 3.691469793078167,
      "grad_norm": 0.1190139502286911,
      "learning_rate": 2.6343048042012876e-06,
      "loss": 0.012,
      "step": 2255680
    },
    {
      "epoch": 3.69150252351682,
      "grad_norm": 0.12522540986537933,
      "learning_rate": 2.6342389119877704e-06,
      "loss": 0.0116,
      "step": 2255700
    },
    {
      "epoch": 3.6915352539554736,
      "grad_norm": 0.18277543783187866,
      "learning_rate": 2.634173019774253e-06,
      "loss": 0.0108,
      "step": 2255720
    },
    {
      "epoch": 3.6915679843941267,
      "grad_norm": 0.18838827311992645,
      "learning_rate": 2.6341071275607367e-06,
      "loss": 0.0109,
      "step": 2255740
    },
    {
      "epoch": 3.6916007148327803,
      "grad_norm": 0.2929095923900604,
      "learning_rate": 2.6340412353472194e-06,
      "loss": 0.0126,
      "step": 2255760
    },
    {
      "epoch": 3.6916334452714334,
      "grad_norm": 0.20328764617443085,
      "learning_rate": 2.633975343133702e-06,
      "loss": 0.0114,
      "step": 2255780
    },
    {
      "epoch": 3.691666175710087,
      "grad_norm": 0.20643554627895355,
      "learning_rate": 2.6339094509201853e-06,
      "loss": 0.0154,
      "step": 2255800
    },
    {
      "epoch": 3.69169890614874,
      "grad_norm": 0.3672272264957428,
      "learning_rate": 2.633843558706668e-06,
      "loss": 0.0193,
      "step": 2255820
    },
    {
      "epoch": 3.6917316365873933,
      "grad_norm": 0.21842728555202484,
      "learning_rate": 2.633777666493151e-06,
      "loss": 0.0066,
      "step": 2255840
    },
    {
      "epoch": 3.691764367026047,
      "grad_norm": 0.1851191222667694,
      "learning_rate": 2.6337117742796335e-06,
      "loss": 0.0133,
      "step": 2255860
    },
    {
      "epoch": 3.6917970974647,
      "grad_norm": 0.32307010889053345,
      "learning_rate": 2.6336458820661163e-06,
      "loss": 0.0111,
      "step": 2255880
    },
    {
      "epoch": 3.6918298279033537,
      "grad_norm": 0.7657080292701721,
      "learning_rate": 2.6335799898525995e-06,
      "loss": 0.0142,
      "step": 2255900
    },
    {
      "epoch": 3.691862558342007,
      "grad_norm": 0.3007397949695587,
      "learning_rate": 2.633514097639082e-06,
      "loss": 0.0083,
      "step": 2255920
    },
    {
      "epoch": 3.6918952887806604,
      "grad_norm": 0.45047348737716675,
      "learning_rate": 2.633448205425565e-06,
      "loss": 0.012,
      "step": 2255940
    },
    {
      "epoch": 3.6919280192193136,
      "grad_norm": 0.1564399003982544,
      "learning_rate": 2.6333823132120477e-06,
      "loss": 0.0152,
      "step": 2255960
    },
    {
      "epoch": 3.6919607496579667,
      "grad_norm": 0.5321513414382935,
      "learning_rate": 2.633316420998531e-06,
      "loss": 0.0099,
      "step": 2255980
    },
    {
      "epoch": 3.6919934800966203,
      "grad_norm": 0.2880376875400543,
      "learning_rate": 2.6332505287850136e-06,
      "loss": 0.0134,
      "step": 2256000
    },
    {
      "epoch": 3.6920262105352735,
      "grad_norm": 0.482375830411911,
      "learning_rate": 2.6331846365714963e-06,
      "loss": 0.0103,
      "step": 2256020
    },
    {
      "epoch": 3.692058940973927,
      "grad_norm": 0.32405856251716614,
      "learning_rate": 2.633118744357979e-06,
      "loss": 0.0152,
      "step": 2256040
    },
    {
      "epoch": 3.69209167141258,
      "grad_norm": 0.43834516406059265,
      "learning_rate": 2.6330528521444622e-06,
      "loss": 0.0097,
      "step": 2256060
    },
    {
      "epoch": 3.692124401851234,
      "grad_norm": 0.23955701291561127,
      "learning_rate": 2.6329869599309454e-06,
      "loss": 0.0073,
      "step": 2256080
    },
    {
      "epoch": 3.692157132289887,
      "grad_norm": 0.4489278495311737,
      "learning_rate": 2.632921067717428e-06,
      "loss": 0.0118,
      "step": 2256100
    },
    {
      "epoch": 3.69218986272854,
      "grad_norm": 0.24860797822475433,
      "learning_rate": 2.6328551755039113e-06,
      "loss": 0.0115,
      "step": 2256120
    },
    {
      "epoch": 3.6922225931671937,
      "grad_norm": 0.16423362493515015,
      "learning_rate": 2.632789283290394e-06,
      "loss": 0.0214,
      "step": 2256140
    },
    {
      "epoch": 3.692255323605847,
      "grad_norm": 0.30058956146240234,
      "learning_rate": 2.6327233910768768e-06,
      "loss": 0.0191,
      "step": 2256160
    },
    {
      "epoch": 3.6922880540445004,
      "grad_norm": 0.20151470601558685,
      "learning_rate": 2.6326574988633595e-06,
      "loss": 0.0088,
      "step": 2256180
    },
    {
      "epoch": 3.6923207844831536,
      "grad_norm": 0.3341156840324402,
      "learning_rate": 2.6325916066498427e-06,
      "loss": 0.0095,
      "step": 2256200
    },
    {
      "epoch": 3.692353514921807,
      "grad_norm": 0.034871816635131836,
      "learning_rate": 2.6325257144363254e-06,
      "loss": 0.0148,
      "step": 2256220
    },
    {
      "epoch": 3.6923862453604603,
      "grad_norm": 0.26353922486305237,
      "learning_rate": 2.632459822222808e-06,
      "loss": 0.0229,
      "step": 2256240
    },
    {
      "epoch": 3.6924189757991135,
      "grad_norm": 0.17764757573604584,
      "learning_rate": 2.632393930009291e-06,
      "loss": 0.0099,
      "step": 2256260
    },
    {
      "epoch": 3.692451706237767,
      "grad_norm": 0.08252152800559998,
      "learning_rate": 2.632328037795774e-06,
      "loss": 0.0098,
      "step": 2256280
    },
    {
      "epoch": 3.6924844366764202,
      "grad_norm": 0.22296006977558136,
      "learning_rate": 2.6322621455822568e-06,
      "loss": 0.011,
      "step": 2256300
    },
    {
      "epoch": 3.692517167115074,
      "grad_norm": 0.0594245083630085,
      "learning_rate": 2.6321962533687395e-06,
      "loss": 0.0084,
      "step": 2256320
    },
    {
      "epoch": 3.692549897553727,
      "grad_norm": 0.4115299582481384,
      "learning_rate": 2.6321303611552223e-06,
      "loss": 0.0079,
      "step": 2256340
    },
    {
      "epoch": 3.6925826279923806,
      "grad_norm": 0.108135886490345,
      "learning_rate": 2.632064468941705e-06,
      "loss": 0.0107,
      "step": 2256360
    },
    {
      "epoch": 3.6926153584310337,
      "grad_norm": 0.22869868576526642,
      "learning_rate": 2.631998576728188e-06,
      "loss": 0.0084,
      "step": 2256380
    },
    {
      "epoch": 3.692648088869687,
      "grad_norm": 0.1516600102186203,
      "learning_rate": 2.631932684514671e-06,
      "loss": 0.0076,
      "step": 2256400
    },
    {
      "epoch": 3.6926808193083405,
      "grad_norm": 0.18419787287712097,
      "learning_rate": 2.6318667923011536e-06,
      "loss": 0.017,
      "step": 2256420
    },
    {
      "epoch": 3.6927135497469936,
      "grad_norm": 0.14943745732307434,
      "learning_rate": 2.6318009000876372e-06,
      "loss": 0.0066,
      "step": 2256440
    },
    {
      "epoch": 3.692746280185647,
      "grad_norm": 0.15238814055919647,
      "learning_rate": 2.63173500787412e-06,
      "loss": 0.0102,
      "step": 2256460
    },
    {
      "epoch": 3.6927790106243004,
      "grad_norm": 0.4610326588153839,
      "learning_rate": 2.6316691156606027e-06,
      "loss": 0.0101,
      "step": 2256480
    },
    {
      "epoch": 3.692811741062954,
      "grad_norm": 0.34568336606025696,
      "learning_rate": 2.6316032234470855e-06,
      "loss": 0.013,
      "step": 2256500
    },
    {
      "epoch": 3.692844471501607,
      "grad_norm": 0.3121407628059387,
      "learning_rate": 2.6315373312335686e-06,
      "loss": 0.0129,
      "step": 2256520
    },
    {
      "epoch": 3.6928772019402603,
      "grad_norm": 0.5789530873298645,
      "learning_rate": 2.6314714390200514e-06,
      "loss": 0.0105,
      "step": 2256540
    },
    {
      "epoch": 3.692909932378914,
      "grad_norm": 0.777495265007019,
      "learning_rate": 2.631405546806534e-06,
      "loss": 0.0178,
      "step": 2256560
    },
    {
      "epoch": 3.692942662817567,
      "grad_norm": 0.3431224822998047,
      "learning_rate": 2.631339654593017e-06,
      "loss": 0.0159,
      "step": 2256580
    },
    {
      "epoch": 3.69297539325622,
      "grad_norm": 0.24686723947525024,
      "learning_rate": 2.6312737623795e-06,
      "loss": 0.0081,
      "step": 2256600
    },
    {
      "epoch": 3.6930081236948737,
      "grad_norm": 0.4063464105129242,
      "learning_rate": 2.6312078701659827e-06,
      "loss": 0.0145,
      "step": 2256620
    },
    {
      "epoch": 3.6930408541335273,
      "grad_norm": 0.34643879532814026,
      "learning_rate": 2.6311419779524655e-06,
      "loss": 0.0092,
      "step": 2256640
    },
    {
      "epoch": 3.6930735845721805,
      "grad_norm": 0.3931477665901184,
      "learning_rate": 2.6310760857389482e-06,
      "loss": 0.0142,
      "step": 2256660
    },
    {
      "epoch": 3.6931063150108336,
      "grad_norm": 0.6312056183815002,
      "learning_rate": 2.6310101935254314e-06,
      "loss": 0.0117,
      "step": 2256680
    },
    {
      "epoch": 3.6931390454494872,
      "grad_norm": 0.25153282284736633,
      "learning_rate": 2.630944301311914e-06,
      "loss": 0.0122,
      "step": 2256700
    },
    {
      "epoch": 3.6931717758881404,
      "grad_norm": 0.06466066837310791,
      "learning_rate": 2.630878409098397e-06,
      "loss": 0.0086,
      "step": 2256720
    },
    {
      "epoch": 3.6932045063267935,
      "grad_norm": 0.23572920262813568,
      "learning_rate": 2.6308125168848796e-06,
      "loss": 0.0122,
      "step": 2256740
    },
    {
      "epoch": 3.693237236765447,
      "grad_norm": 0.4624486267566681,
      "learning_rate": 2.6307466246713623e-06,
      "loss": 0.0114,
      "step": 2256760
    },
    {
      "epoch": 3.6932699672041007,
      "grad_norm": 0.6619243621826172,
      "learning_rate": 2.630680732457846e-06,
      "loss": 0.0091,
      "step": 2256780
    },
    {
      "epoch": 3.693302697642754,
      "grad_norm": 1.9169957637786865,
      "learning_rate": 2.6306148402443287e-06,
      "loss": 0.0089,
      "step": 2256800
    },
    {
      "epoch": 3.693335428081407,
      "grad_norm": 0.13314446806907654,
      "learning_rate": 2.630548948030812e-06,
      "loss": 0.0079,
      "step": 2256820
    },
    {
      "epoch": 3.6933681585200606,
      "grad_norm": 0.7009586691856384,
      "learning_rate": 2.6304830558172946e-06,
      "loss": 0.0188,
      "step": 2256840
    },
    {
      "epoch": 3.6934008889587138,
      "grad_norm": 0.3124375641345978,
      "learning_rate": 2.6304171636037773e-06,
      "loss": 0.0087,
      "step": 2256860
    },
    {
      "epoch": 3.693433619397367,
      "grad_norm": 0.19484558701515198,
      "learning_rate": 2.63035127139026e-06,
      "loss": 0.0141,
      "step": 2256880
    },
    {
      "epoch": 3.6934663498360205,
      "grad_norm": 0.29834821820259094,
      "learning_rate": 2.630285379176743e-06,
      "loss": 0.0085,
      "step": 2256900
    },
    {
      "epoch": 3.693499080274674,
      "grad_norm": 0.30195319652557373,
      "learning_rate": 2.630219486963226e-06,
      "loss": 0.0118,
      "step": 2256920
    },
    {
      "epoch": 3.6935318107133273,
      "grad_norm": 0.26323583722114563,
      "learning_rate": 2.6301535947497087e-06,
      "loss": 0.0135,
      "step": 2256940
    },
    {
      "epoch": 3.6935645411519804,
      "grad_norm": 0.22727316617965698,
      "learning_rate": 2.6300877025361914e-06,
      "loss": 0.0112,
      "step": 2256960
    },
    {
      "epoch": 3.693597271590634,
      "grad_norm": 0.049180228263139725,
      "learning_rate": 2.630021810322674e-06,
      "loss": 0.0085,
      "step": 2256980
    },
    {
      "epoch": 3.693630002029287,
      "grad_norm": 0.394481360912323,
      "learning_rate": 2.6299559181091573e-06,
      "loss": 0.0164,
      "step": 2257000
    },
    {
      "epoch": 3.6936627324679403,
      "grad_norm": 1.1163091659545898,
      "learning_rate": 2.62989002589564e-06,
      "loss": 0.0116,
      "step": 2257020
    },
    {
      "epoch": 3.693695462906594,
      "grad_norm": 0.1719457358121872,
      "learning_rate": 2.629824133682123e-06,
      "loss": 0.0148,
      "step": 2257040
    },
    {
      "epoch": 3.6937281933452475,
      "grad_norm": 0.3051861524581909,
      "learning_rate": 2.6297582414686056e-06,
      "loss": 0.0103,
      "step": 2257060
    },
    {
      "epoch": 3.6937609237839006,
      "grad_norm": 0.3032664954662323,
      "learning_rate": 2.6296923492550887e-06,
      "loss": 0.0094,
      "step": 2257080
    },
    {
      "epoch": 3.693793654222554,
      "grad_norm": 0.11055435240268707,
      "learning_rate": 2.6296264570415715e-06,
      "loss": 0.0129,
      "step": 2257100
    },
    {
      "epoch": 3.6938263846612074,
      "grad_norm": 0.08160235732793808,
      "learning_rate": 2.629560564828054e-06,
      "loss": 0.007,
      "step": 2257120
    },
    {
      "epoch": 3.6938591150998605,
      "grad_norm": 0.26822593808174133,
      "learning_rate": 2.6294946726145378e-06,
      "loss": 0.0172,
      "step": 2257140
    },
    {
      "epoch": 3.6938918455385137,
      "grad_norm": 0.622029721736908,
      "learning_rate": 2.6294287804010205e-06,
      "loss": 0.0132,
      "step": 2257160
    },
    {
      "epoch": 3.6939245759771673,
      "grad_norm": 0.330475777387619,
      "learning_rate": 2.6293628881875033e-06,
      "loss": 0.0091,
      "step": 2257180
    },
    {
      "epoch": 3.6939573064158204,
      "grad_norm": 0.4219745993614197,
      "learning_rate": 2.629296995973986e-06,
      "loss": 0.0139,
      "step": 2257200
    },
    {
      "epoch": 3.693990036854474,
      "grad_norm": 0.12363159656524658,
      "learning_rate": 2.629231103760469e-06,
      "loss": 0.0101,
      "step": 2257220
    },
    {
      "epoch": 3.694022767293127,
      "grad_norm": 0.20786647498607635,
      "learning_rate": 2.629165211546952e-06,
      "loss": 0.0119,
      "step": 2257240
    },
    {
      "epoch": 3.6940554977317808,
      "grad_norm": 0.11461223661899567,
      "learning_rate": 2.6290993193334346e-06,
      "loss": 0.0071,
      "step": 2257260
    },
    {
      "epoch": 3.694088228170434,
      "grad_norm": 0.2958245873451233,
      "learning_rate": 2.6290334271199174e-06,
      "loss": 0.009,
      "step": 2257280
    },
    {
      "epoch": 3.694120958609087,
      "grad_norm": 0.25387251377105713,
      "learning_rate": 2.6289675349064e-06,
      "loss": 0.01,
      "step": 2257300
    },
    {
      "epoch": 3.6941536890477407,
      "grad_norm": 0.21889257431030273,
      "learning_rate": 2.6289016426928833e-06,
      "loss": 0.01,
      "step": 2257320
    },
    {
      "epoch": 3.694186419486394,
      "grad_norm": 0.1636432707309723,
      "learning_rate": 2.628835750479366e-06,
      "loss": 0.0114,
      "step": 2257340
    },
    {
      "epoch": 3.6942191499250474,
      "grad_norm": 0.19117775559425354,
      "learning_rate": 2.6287698582658488e-06,
      "loss": 0.0087,
      "step": 2257360
    },
    {
      "epoch": 3.6942518803637006,
      "grad_norm": 0.4149741232395172,
      "learning_rate": 2.6287039660523315e-06,
      "loss": 0.0088,
      "step": 2257380
    },
    {
      "epoch": 3.694284610802354,
      "grad_norm": 0.3146187961101532,
      "learning_rate": 2.6286380738388147e-06,
      "loss": 0.0169,
      "step": 2257400
    },
    {
      "epoch": 3.6943173412410073,
      "grad_norm": 0.046103689819574356,
      "learning_rate": 2.6285721816252974e-06,
      "loss": 0.0064,
      "step": 2257420
    },
    {
      "epoch": 3.6943500716796605,
      "grad_norm": 0.3537747263908386,
      "learning_rate": 2.62850628941178e-06,
      "loss": 0.0127,
      "step": 2257440
    },
    {
      "epoch": 3.694382802118314,
      "grad_norm": 0.1947849839925766,
      "learning_rate": 2.628440397198263e-06,
      "loss": 0.0084,
      "step": 2257460
    },
    {
      "epoch": 3.694415532556967,
      "grad_norm": 0.5336946845054626,
      "learning_rate": 2.6283745049847465e-06,
      "loss": 0.0116,
      "step": 2257480
    },
    {
      "epoch": 3.694448262995621,
      "grad_norm": 0.05019819363951683,
      "learning_rate": 2.6283086127712292e-06,
      "loss": 0.0091,
      "step": 2257500
    },
    {
      "epoch": 3.694480993434274,
      "grad_norm": 0.02589508332312107,
      "learning_rate": 2.628242720557712e-06,
      "loss": 0.0123,
      "step": 2257520
    },
    {
      "epoch": 3.6945137238729275,
      "grad_norm": 0.3936349153518677,
      "learning_rate": 2.628176828344195e-06,
      "loss": 0.0128,
      "step": 2257540
    },
    {
      "epoch": 3.6945464543115807,
      "grad_norm": 0.317279189825058,
      "learning_rate": 2.628110936130678e-06,
      "loss": 0.0108,
      "step": 2257560
    },
    {
      "epoch": 3.694579184750234,
      "grad_norm": 0.18402600288391113,
      "learning_rate": 2.6280450439171606e-06,
      "loss": 0.0144,
      "step": 2257580
    },
    {
      "epoch": 3.6946119151888874,
      "grad_norm": 0.4397314190864563,
      "learning_rate": 2.6279791517036433e-06,
      "loss": 0.0123,
      "step": 2257600
    },
    {
      "epoch": 3.6946446456275406,
      "grad_norm": 0.8380683660507202,
      "learning_rate": 2.6279132594901265e-06,
      "loss": 0.0118,
      "step": 2257620
    },
    {
      "epoch": 3.694677376066194,
      "grad_norm": 0.17992812395095825,
      "learning_rate": 2.6278473672766092e-06,
      "loss": 0.0086,
      "step": 2257640
    },
    {
      "epoch": 3.6947101065048473,
      "grad_norm": 0.3959643840789795,
      "learning_rate": 2.627781475063092e-06,
      "loss": 0.0106,
      "step": 2257660
    },
    {
      "epoch": 3.694742836943501,
      "grad_norm": 0.2727406620979309,
      "learning_rate": 2.6277155828495747e-06,
      "loss": 0.012,
      "step": 2257680
    },
    {
      "epoch": 3.694775567382154,
      "grad_norm": 0.28594857454299927,
      "learning_rate": 2.627649690636058e-06,
      "loss": 0.0113,
      "step": 2257700
    },
    {
      "epoch": 3.694808297820807,
      "grad_norm": 0.35236164927482605,
      "learning_rate": 2.6275837984225406e-06,
      "loss": 0.0126,
      "step": 2257720
    },
    {
      "epoch": 3.694841028259461,
      "grad_norm": 0.20638327300548553,
      "learning_rate": 2.6275179062090234e-06,
      "loss": 0.0123,
      "step": 2257740
    },
    {
      "epoch": 3.694873758698114,
      "grad_norm": 0.38951992988586426,
      "learning_rate": 2.627452013995506e-06,
      "loss": 0.0147,
      "step": 2257760
    },
    {
      "epoch": 3.6949064891367676,
      "grad_norm": 0.0566541813313961,
      "learning_rate": 2.627386121781989e-06,
      "loss": 0.0135,
      "step": 2257780
    },
    {
      "epoch": 3.6949392195754207,
      "grad_norm": 0.17967021465301514,
      "learning_rate": 2.627320229568472e-06,
      "loss": 0.0113,
      "step": 2257800
    },
    {
      "epoch": 3.6949719500140743,
      "grad_norm": 0.11953712999820709,
      "learning_rate": 2.6272543373549547e-06,
      "loss": 0.0135,
      "step": 2257820
    },
    {
      "epoch": 3.6950046804527275,
      "grad_norm": 0.026297349482774734,
      "learning_rate": 2.627188445141438e-06,
      "loss": 0.0128,
      "step": 2257840
    },
    {
      "epoch": 3.6950374108913806,
      "grad_norm": 0.4950559735298157,
      "learning_rate": 2.627122552927921e-06,
      "loss": 0.0118,
      "step": 2257860
    },
    {
      "epoch": 3.695070141330034,
      "grad_norm": 0.14186283946037292,
      "learning_rate": 2.627056660714404e-06,
      "loss": 0.0113,
      "step": 2257880
    },
    {
      "epoch": 3.6951028717686873,
      "grad_norm": 0.5008938312530518,
      "learning_rate": 2.6269907685008866e-06,
      "loss": 0.0114,
      "step": 2257900
    },
    {
      "epoch": 3.695135602207341,
      "grad_norm": 0.23543740808963776,
      "learning_rate": 2.6269248762873693e-06,
      "loss": 0.0135,
      "step": 2257920
    },
    {
      "epoch": 3.695168332645994,
      "grad_norm": 0.09349046647548676,
      "learning_rate": 2.6268589840738525e-06,
      "loss": 0.0058,
      "step": 2257940
    },
    {
      "epoch": 3.6952010630846477,
      "grad_norm": 0.1327006071805954,
      "learning_rate": 2.626793091860335e-06,
      "loss": 0.0088,
      "step": 2257960
    },
    {
      "epoch": 3.695233793523301,
      "grad_norm": 0.15230083465576172,
      "learning_rate": 2.626727199646818e-06,
      "loss": 0.0139,
      "step": 2257980
    },
    {
      "epoch": 3.695266523961954,
      "grad_norm": 0.20137299597263336,
      "learning_rate": 2.6266613074333007e-06,
      "loss": 0.0066,
      "step": 2258000
    },
    {
      "epoch": 3.6952992544006076,
      "grad_norm": 0.09420356154441833,
      "learning_rate": 2.626595415219784e-06,
      "loss": 0.0125,
      "step": 2258020
    },
    {
      "epoch": 3.6953319848392607,
      "grad_norm": 0.41095444560050964,
      "learning_rate": 2.6265295230062666e-06,
      "loss": 0.0105,
      "step": 2258040
    },
    {
      "epoch": 3.695364715277914,
      "grad_norm": 0.21904920041561127,
      "learning_rate": 2.6264636307927493e-06,
      "loss": 0.0086,
      "step": 2258060
    },
    {
      "epoch": 3.6953974457165675,
      "grad_norm": 0.6481403708457947,
      "learning_rate": 2.626397738579232e-06,
      "loss": 0.0141,
      "step": 2258080
    },
    {
      "epoch": 3.695430176155221,
      "grad_norm": 0.28328874707221985,
      "learning_rate": 2.6263318463657152e-06,
      "loss": 0.015,
      "step": 2258100
    },
    {
      "epoch": 3.695462906593874,
      "grad_norm": 0.47651663422584534,
      "learning_rate": 2.626265954152198e-06,
      "loss": 0.013,
      "step": 2258120
    },
    {
      "epoch": 3.6954956370325274,
      "grad_norm": 0.15846003592014313,
      "learning_rate": 2.6262000619386807e-06,
      "loss": 0.0073,
      "step": 2258140
    },
    {
      "epoch": 3.695528367471181,
      "grad_norm": 0.2915429174900055,
      "learning_rate": 2.6261341697251634e-06,
      "loss": 0.0114,
      "step": 2258160
    },
    {
      "epoch": 3.695561097909834,
      "grad_norm": 0.14908170700073242,
      "learning_rate": 2.626068277511646e-06,
      "loss": 0.0142,
      "step": 2258180
    },
    {
      "epoch": 3.6955938283484873,
      "grad_norm": 0.26658570766448975,
      "learning_rate": 2.6260023852981298e-06,
      "loss": 0.0123,
      "step": 2258200
    },
    {
      "epoch": 3.695626558787141,
      "grad_norm": 0.37021252512931824,
      "learning_rate": 2.6259364930846125e-06,
      "loss": 0.0107,
      "step": 2258220
    },
    {
      "epoch": 3.6956592892257945,
      "grad_norm": 0.48499971628189087,
      "learning_rate": 2.6258706008710957e-06,
      "loss": 0.0094,
      "step": 2258240
    },
    {
      "epoch": 3.6956920196644476,
      "grad_norm": 0.15380947291851044,
      "learning_rate": 2.6258047086575784e-06,
      "loss": 0.0124,
      "step": 2258260
    },
    {
      "epoch": 3.6957247501031008,
      "grad_norm": 0.5159236192703247,
      "learning_rate": 2.625738816444061e-06,
      "loss": 0.0079,
      "step": 2258280
    },
    {
      "epoch": 3.6957574805417543,
      "grad_norm": 0.13917607069015503,
      "learning_rate": 2.625672924230544e-06,
      "loss": 0.0124,
      "step": 2258300
    },
    {
      "epoch": 3.6957902109804075,
      "grad_norm": 0.31849101185798645,
      "learning_rate": 2.6256070320170266e-06,
      "loss": 0.0094,
      "step": 2258320
    },
    {
      "epoch": 3.6958229414190606,
      "grad_norm": 0.06152809411287308,
      "learning_rate": 2.62554113980351e-06,
      "loss": 0.0076,
      "step": 2258340
    },
    {
      "epoch": 3.6958556718577142,
      "grad_norm": 0.09705112129449844,
      "learning_rate": 2.6254752475899925e-06,
      "loss": 0.0078,
      "step": 2258360
    },
    {
      "epoch": 3.695888402296368,
      "grad_norm": 0.3052334487438202,
      "learning_rate": 2.6254093553764753e-06,
      "loss": 0.0165,
      "step": 2258380
    },
    {
      "epoch": 3.695921132735021,
      "grad_norm": 0.4836418032646179,
      "learning_rate": 2.625343463162958e-06,
      "loss": 0.0107,
      "step": 2258400
    },
    {
      "epoch": 3.695953863173674,
      "grad_norm": 0.4128192961215973,
      "learning_rate": 2.625277570949441e-06,
      "loss": 0.0157,
      "step": 2258420
    },
    {
      "epoch": 3.6959865936123277,
      "grad_norm": 0.47550836205482483,
      "learning_rate": 2.625211678735924e-06,
      "loss": 0.0124,
      "step": 2258440
    },
    {
      "epoch": 3.696019324050981,
      "grad_norm": 0.0983518436551094,
      "learning_rate": 2.6251457865224067e-06,
      "loss": 0.0076,
      "step": 2258460
    },
    {
      "epoch": 3.696052054489634,
      "grad_norm": 0.4754866659641266,
      "learning_rate": 2.6250798943088894e-06,
      "loss": 0.0109,
      "step": 2258480
    },
    {
      "epoch": 3.6960847849282876,
      "grad_norm": 0.19467265903949738,
      "learning_rate": 2.6250140020953726e-06,
      "loss": 0.0083,
      "step": 2258500
    },
    {
      "epoch": 3.696117515366941,
      "grad_norm": 0.11983316391706467,
      "learning_rate": 2.6249481098818553e-06,
      "loss": 0.0083,
      "step": 2258520
    },
    {
      "epoch": 3.6961502458055944,
      "grad_norm": 0.11861921101808548,
      "learning_rate": 2.6248822176683385e-06,
      "loss": 0.0159,
      "step": 2258540
    },
    {
      "epoch": 3.6961829762442475,
      "grad_norm": 0.17335757613182068,
      "learning_rate": 2.6248163254548216e-06,
      "loss": 0.01,
      "step": 2258560
    },
    {
      "epoch": 3.696215706682901,
      "grad_norm": 0.050992026925086975,
      "learning_rate": 2.6247504332413044e-06,
      "loss": 0.0096,
      "step": 2258580
    },
    {
      "epoch": 3.6962484371215543,
      "grad_norm": 0.41001275181770325,
      "learning_rate": 2.624684541027787e-06,
      "loss": 0.0122,
      "step": 2258600
    },
    {
      "epoch": 3.6962811675602074,
      "grad_norm": 0.3019021451473236,
      "learning_rate": 2.62461864881427e-06,
      "loss": 0.01,
      "step": 2258620
    },
    {
      "epoch": 3.696313897998861,
      "grad_norm": 0.3564927279949188,
      "learning_rate": 2.624552756600753e-06,
      "loss": 0.0162,
      "step": 2258640
    },
    {
      "epoch": 3.696346628437514,
      "grad_norm": 0.6427326202392578,
      "learning_rate": 2.6244868643872357e-06,
      "loss": 0.0128,
      "step": 2258660
    },
    {
      "epoch": 3.6963793588761678,
      "grad_norm": 0.18844538927078247,
      "learning_rate": 2.6244209721737185e-06,
      "loss": 0.0082,
      "step": 2258680
    },
    {
      "epoch": 3.696412089314821,
      "grad_norm": 0.2691749930381775,
      "learning_rate": 2.6243550799602012e-06,
      "loss": 0.0109,
      "step": 2258700
    },
    {
      "epoch": 3.6964448197534745,
      "grad_norm": 0.45860689878463745,
      "learning_rate": 2.624289187746684e-06,
      "loss": 0.0126,
      "step": 2258720
    },
    {
      "epoch": 3.6964775501921276,
      "grad_norm": 0.3410465717315674,
      "learning_rate": 2.624223295533167e-06,
      "loss": 0.0155,
      "step": 2258740
    },
    {
      "epoch": 3.696510280630781,
      "grad_norm": 0.20928342640399933,
      "learning_rate": 2.62415740331965e-06,
      "loss": 0.0084,
      "step": 2258760
    },
    {
      "epoch": 3.6965430110694344,
      "grad_norm": 0.1734071671962738,
      "learning_rate": 2.6240915111061326e-06,
      "loss": 0.0101,
      "step": 2258780
    },
    {
      "epoch": 3.6965757415080875,
      "grad_norm": 0.10783592611551285,
      "learning_rate": 2.6240256188926153e-06,
      "loss": 0.008,
      "step": 2258800
    },
    {
      "epoch": 3.696608471946741,
      "grad_norm": 0.40407392382621765,
      "learning_rate": 2.6239597266790985e-06,
      "loss": 0.0113,
      "step": 2258820
    },
    {
      "epoch": 3.6966412023853943,
      "grad_norm": 0.2037205994129181,
      "learning_rate": 2.6238938344655813e-06,
      "loss": 0.0103,
      "step": 2258840
    },
    {
      "epoch": 3.696673932824048,
      "grad_norm": 0.12663643062114716,
      "learning_rate": 2.623827942252064e-06,
      "loss": 0.0115,
      "step": 2258860
    },
    {
      "epoch": 3.696706663262701,
      "grad_norm": 0.17931073904037476,
      "learning_rate": 2.6237620500385467e-06,
      "loss": 0.0093,
      "step": 2258880
    },
    {
      "epoch": 3.696739393701354,
      "grad_norm": 0.5340150594711304,
      "learning_rate": 2.6236961578250303e-06,
      "loss": 0.0102,
      "step": 2258900
    },
    {
      "epoch": 3.6967721241400078,
      "grad_norm": 0.4515508711338043,
      "learning_rate": 2.623630265611513e-06,
      "loss": 0.0153,
      "step": 2258920
    },
    {
      "epoch": 3.696804854578661,
      "grad_norm": 0.9009065628051758,
      "learning_rate": 2.623564373397996e-06,
      "loss": 0.012,
      "step": 2258940
    },
    {
      "epoch": 3.6968375850173145,
      "grad_norm": 0.967295229434967,
      "learning_rate": 2.623498481184479e-06,
      "loss": 0.0159,
      "step": 2258960
    },
    {
      "epoch": 3.6968703154559677,
      "grad_norm": 0.13763123750686646,
      "learning_rate": 2.6234325889709617e-06,
      "loss": 0.0101,
      "step": 2258980
    },
    {
      "epoch": 3.6969030458946213,
      "grad_norm": 0.10626375675201416,
      "learning_rate": 2.6233666967574444e-06,
      "loss": 0.0095,
      "step": 2259000
    },
    {
      "epoch": 3.6969357763332744,
      "grad_norm": 0.6444329023361206,
      "learning_rate": 2.623300804543927e-06,
      "loss": 0.0102,
      "step": 2259020
    },
    {
      "epoch": 3.6969685067719276,
      "grad_norm": 0.1575772911310196,
      "learning_rate": 2.6232349123304103e-06,
      "loss": 0.0102,
      "step": 2259040
    },
    {
      "epoch": 3.697001237210581,
      "grad_norm": 0.32893669605255127,
      "learning_rate": 2.623169020116893e-06,
      "loss": 0.0097,
      "step": 2259060
    },
    {
      "epoch": 3.6970339676492343,
      "grad_norm": 0.2529875934123993,
      "learning_rate": 2.623103127903376e-06,
      "loss": 0.0125,
      "step": 2259080
    },
    {
      "epoch": 3.697066698087888,
      "grad_norm": 0.6093990206718445,
      "learning_rate": 2.6230372356898586e-06,
      "loss": 0.0151,
      "step": 2259100
    },
    {
      "epoch": 3.697099428526541,
      "grad_norm": 0.4138883650302887,
      "learning_rate": 2.6229713434763417e-06,
      "loss": 0.0101,
      "step": 2259120
    },
    {
      "epoch": 3.6971321589651946,
      "grad_norm": 0.18478235602378845,
      "learning_rate": 2.6229054512628245e-06,
      "loss": 0.0184,
      "step": 2259140
    },
    {
      "epoch": 3.697164889403848,
      "grad_norm": 0.23782502114772797,
      "learning_rate": 2.622839559049307e-06,
      "loss": 0.0104,
      "step": 2259160
    },
    {
      "epoch": 3.697197619842501,
      "grad_norm": 0.4041808247566223,
      "learning_rate": 2.62277366683579e-06,
      "loss": 0.0101,
      "step": 2259180
    },
    {
      "epoch": 3.6972303502811545,
      "grad_norm": 0.33852940797805786,
      "learning_rate": 2.6227077746222727e-06,
      "loss": 0.0137,
      "step": 2259200
    },
    {
      "epoch": 3.6972630807198077,
      "grad_norm": 0.12731988728046417,
      "learning_rate": 2.622641882408756e-06,
      "loss": 0.0127,
      "step": 2259220
    },
    {
      "epoch": 3.6972958111584613,
      "grad_norm": 0.16134454309940338,
      "learning_rate": 2.622575990195239e-06,
      "loss": 0.0089,
      "step": 2259240
    },
    {
      "epoch": 3.6973285415971144,
      "grad_norm": 0.2922549247741699,
      "learning_rate": 2.6225100979817218e-06,
      "loss": 0.0092,
      "step": 2259260
    },
    {
      "epoch": 3.697361272035768,
      "grad_norm": 0.27449411153793335,
      "learning_rate": 2.622444205768205e-06,
      "loss": 0.0094,
      "step": 2259280
    },
    {
      "epoch": 3.697394002474421,
      "grad_norm": 0.7863168716430664,
      "learning_rate": 2.6223783135546877e-06,
      "loss": 0.011,
      "step": 2259300
    },
    {
      "epoch": 3.6974267329130743,
      "grad_norm": 0.14223851263523102,
      "learning_rate": 2.6223124213411704e-06,
      "loss": 0.0123,
      "step": 2259320
    },
    {
      "epoch": 3.697459463351728,
      "grad_norm": 0.18223650753498077,
      "learning_rate": 2.622246529127653e-06,
      "loss": 0.012,
      "step": 2259340
    },
    {
      "epoch": 3.697492193790381,
      "grad_norm": 0.057155121117830276,
      "learning_rate": 2.6221806369141363e-06,
      "loss": 0.0085,
      "step": 2259360
    },
    {
      "epoch": 3.6975249242290347,
      "grad_norm": 0.3317757248878479,
      "learning_rate": 2.622114744700619e-06,
      "loss": 0.0085,
      "step": 2259380
    },
    {
      "epoch": 3.697557654667688,
      "grad_norm": 0.08497472107410431,
      "learning_rate": 2.6220488524871018e-06,
      "loss": 0.0102,
      "step": 2259400
    },
    {
      "epoch": 3.6975903851063414,
      "grad_norm": 0.4127808213233948,
      "learning_rate": 2.6219829602735845e-06,
      "loss": 0.0112,
      "step": 2259420
    },
    {
      "epoch": 3.6976231155449946,
      "grad_norm": 0.41265302896499634,
      "learning_rate": 2.6219170680600677e-06,
      "loss": 0.0101,
      "step": 2259440
    },
    {
      "epoch": 3.6976558459836477,
      "grad_norm": 0.2208540290594101,
      "learning_rate": 2.6218511758465504e-06,
      "loss": 0.0107,
      "step": 2259460
    },
    {
      "epoch": 3.6976885764223013,
      "grad_norm": 0.3609853684902191,
      "learning_rate": 2.621785283633033e-06,
      "loss": 0.0085,
      "step": 2259480
    },
    {
      "epoch": 3.6977213068609545,
      "grad_norm": 0.21208631992340088,
      "learning_rate": 2.621719391419516e-06,
      "loss": 0.0084,
      "step": 2259500
    },
    {
      "epoch": 3.697754037299608,
      "grad_norm": 0.1838122010231018,
      "learning_rate": 2.621653499205999e-06,
      "loss": 0.0139,
      "step": 2259520
    },
    {
      "epoch": 3.697786767738261,
      "grad_norm": 0.2672531306743622,
      "learning_rate": 2.621587606992482e-06,
      "loss": 0.0093,
      "step": 2259540
    },
    {
      "epoch": 3.697819498176915,
      "grad_norm": 0.3166254758834839,
      "learning_rate": 2.6215217147789645e-06,
      "loss": 0.008,
      "step": 2259560
    },
    {
      "epoch": 3.697852228615568,
      "grad_norm": 0.17880818247795105,
      "learning_rate": 2.6214558225654473e-06,
      "loss": 0.0091,
      "step": 2259580
    },
    {
      "epoch": 3.697884959054221,
      "grad_norm": 0.10211795568466187,
      "learning_rate": 2.621389930351931e-06,
      "loss": 0.009,
      "step": 2259600
    },
    {
      "epoch": 3.6979176894928747,
      "grad_norm": 0.5514730215072632,
      "learning_rate": 2.6213240381384136e-06,
      "loss": 0.0116,
      "step": 2259620
    },
    {
      "epoch": 3.697950419931528,
      "grad_norm": 0.3997144401073456,
      "learning_rate": 2.6212581459248963e-06,
      "loss": 0.008,
      "step": 2259640
    },
    {
      "epoch": 3.697983150370181,
      "grad_norm": 0.5290052890777588,
      "learning_rate": 2.6211922537113795e-06,
      "loss": 0.0128,
      "step": 2259660
    },
    {
      "epoch": 3.6980158808088346,
      "grad_norm": 0.14270855486392975,
      "learning_rate": 2.6211263614978623e-06,
      "loss": 0.011,
      "step": 2259680
    },
    {
      "epoch": 3.698048611247488,
      "grad_norm": 0.23349903523921967,
      "learning_rate": 2.621060469284345e-06,
      "loss": 0.0086,
      "step": 2259700
    },
    {
      "epoch": 3.6980813416861413,
      "grad_norm": 0.18824833631515503,
      "learning_rate": 2.6209945770708277e-06,
      "loss": 0.0104,
      "step": 2259720
    },
    {
      "epoch": 3.6981140721247945,
      "grad_norm": 0.3993490934371948,
      "learning_rate": 2.6209286848573105e-06,
      "loss": 0.0063,
      "step": 2259740
    },
    {
      "epoch": 3.698146802563448,
      "grad_norm": 0.3153510093688965,
      "learning_rate": 2.6208627926437936e-06,
      "loss": 0.0138,
      "step": 2259760
    },
    {
      "epoch": 3.6981795330021012,
      "grad_norm": 0.15199647843837738,
      "learning_rate": 2.6207969004302764e-06,
      "loss": 0.0108,
      "step": 2259780
    },
    {
      "epoch": 3.6982122634407544,
      "grad_norm": 0.31613028049468994,
      "learning_rate": 2.620731008216759e-06,
      "loss": 0.0108,
      "step": 2259800
    },
    {
      "epoch": 3.698244993879408,
      "grad_norm": 0.13446104526519775,
      "learning_rate": 2.620665116003242e-06,
      "loss": 0.0107,
      "step": 2259820
    },
    {
      "epoch": 3.6982777243180616,
      "grad_norm": 0.1340683549642563,
      "learning_rate": 2.620599223789725e-06,
      "loss": 0.0184,
      "step": 2259840
    },
    {
      "epoch": 3.6983104547567147,
      "grad_norm": 0.12764334678649902,
      "learning_rate": 2.6205333315762078e-06,
      "loss": 0.0087,
      "step": 2259860
    },
    {
      "epoch": 3.698343185195368,
      "grad_norm": 0.33697107434272766,
      "learning_rate": 2.6204674393626905e-06,
      "loss": 0.0123,
      "step": 2259880
    },
    {
      "epoch": 3.6983759156340215,
      "grad_norm": 0.2552911639213562,
      "learning_rate": 2.6204015471491732e-06,
      "loss": 0.0161,
      "step": 2259900
    },
    {
      "epoch": 3.6984086460726746,
      "grad_norm": 0.15025408565998077,
      "learning_rate": 2.6203356549356564e-06,
      "loss": 0.0104,
      "step": 2259920
    },
    {
      "epoch": 3.6984413765113278,
      "grad_norm": 0.2513049840927124,
      "learning_rate": 2.620269762722139e-06,
      "loss": 0.0101,
      "step": 2259940
    },
    {
      "epoch": 3.6984741069499814,
      "grad_norm": 0.2855125069618225,
      "learning_rate": 2.6202038705086223e-06,
      "loss": 0.0078,
      "step": 2259960
    },
    {
      "epoch": 3.698506837388635,
      "grad_norm": 0.1405891329050064,
      "learning_rate": 2.6201379782951055e-06,
      "loss": 0.0135,
      "step": 2259980
    },
    {
      "epoch": 3.698539567827288,
      "grad_norm": 0.04213583469390869,
      "learning_rate": 2.620072086081588e-06,
      "loss": 0.0125,
      "step": 2260000
    },
    {
      "epoch": 3.6985722982659412,
      "grad_norm": 0.523090124130249,
      "learning_rate": 2.620006193868071e-06,
      "loss": 0.0149,
      "step": 2260020
    },
    {
      "epoch": 3.698605028704595,
      "grad_norm": 0.22534160315990448,
      "learning_rate": 2.6199403016545537e-06,
      "loss": 0.0091,
      "step": 2260040
    },
    {
      "epoch": 3.698637759143248,
      "grad_norm": 0.09400470554828644,
      "learning_rate": 2.619874409441037e-06,
      "loss": 0.0084,
      "step": 2260060
    },
    {
      "epoch": 3.698670489581901,
      "grad_norm": 0.225516676902771,
      "learning_rate": 2.6198085172275196e-06,
      "loss": 0.0125,
      "step": 2260080
    },
    {
      "epoch": 3.6987032200205547,
      "grad_norm": 0.28018897771835327,
      "learning_rate": 2.6197426250140023e-06,
      "loss": 0.0176,
      "step": 2260100
    },
    {
      "epoch": 3.698735950459208,
      "grad_norm": 0.2328464388847351,
      "learning_rate": 2.619676732800485e-06,
      "loss": 0.009,
      "step": 2260120
    },
    {
      "epoch": 3.6987686808978615,
      "grad_norm": 0.21952228248119354,
      "learning_rate": 2.6196108405869682e-06,
      "loss": 0.013,
      "step": 2260140
    },
    {
      "epoch": 3.6988014113365146,
      "grad_norm": 0.3099214732646942,
      "learning_rate": 2.619544948373451e-06,
      "loss": 0.0133,
      "step": 2260160
    },
    {
      "epoch": 3.6988341417751682,
      "grad_norm": 0.36854055523872375,
      "learning_rate": 2.6194790561599337e-06,
      "loss": 0.0153,
      "step": 2260180
    },
    {
      "epoch": 3.6988668722138214,
      "grad_norm": 0.12218141555786133,
      "learning_rate": 2.6194131639464164e-06,
      "loss": 0.0131,
      "step": 2260200
    },
    {
      "epoch": 3.6988996026524745,
      "grad_norm": 0.5115929841995239,
      "learning_rate": 2.619347271732899e-06,
      "loss": 0.0097,
      "step": 2260220
    },
    {
      "epoch": 3.698932333091128,
      "grad_norm": 0.3859643042087555,
      "learning_rate": 2.6192813795193824e-06,
      "loss": 0.0115,
      "step": 2260240
    },
    {
      "epoch": 3.6989650635297813,
      "grad_norm": 0.15543882548809052,
      "learning_rate": 2.619215487305865e-06,
      "loss": 0.0094,
      "step": 2260260
    },
    {
      "epoch": 3.698997793968435,
      "grad_norm": 0.29055285453796387,
      "learning_rate": 2.619149595092348e-06,
      "loss": 0.0105,
      "step": 2260280
    },
    {
      "epoch": 3.699030524407088,
      "grad_norm": 0.7583267092704773,
      "learning_rate": 2.6190837028788314e-06,
      "loss": 0.0156,
      "step": 2260300
    },
    {
      "epoch": 3.6990632548457416,
      "grad_norm": 0.413459837436676,
      "learning_rate": 2.619017810665314e-06,
      "loss": 0.01,
      "step": 2260320
    },
    {
      "epoch": 3.6990959852843948,
      "grad_norm": 0.1358187049627304,
      "learning_rate": 2.618951918451797e-06,
      "loss": 0.0098,
      "step": 2260340
    },
    {
      "epoch": 3.699128715723048,
      "grad_norm": 0.11332195997238159,
      "learning_rate": 2.6188860262382796e-06,
      "loss": 0.009,
      "step": 2260360
    },
    {
      "epoch": 3.6991614461617015,
      "grad_norm": 0.5563556551933289,
      "learning_rate": 2.618820134024763e-06,
      "loss": 0.0133,
      "step": 2260380
    },
    {
      "epoch": 3.6991941766003547,
      "grad_norm": 0.2757534980773926,
      "learning_rate": 2.6187542418112455e-06,
      "loss": 0.0082,
      "step": 2260400
    },
    {
      "epoch": 3.6992269070390082,
      "grad_norm": 0.14538711309432983,
      "learning_rate": 2.6186883495977283e-06,
      "loss": 0.0136,
      "step": 2260420
    },
    {
      "epoch": 3.6992596374776614,
      "grad_norm": 0.061428818851709366,
      "learning_rate": 2.618622457384211e-06,
      "loss": 0.0059,
      "step": 2260440
    },
    {
      "epoch": 3.699292367916315,
      "grad_norm": 0.6222015619277954,
      "learning_rate": 2.618556565170694e-06,
      "loss": 0.0153,
      "step": 2260460
    },
    {
      "epoch": 3.699325098354968,
      "grad_norm": 0.23085986077785492,
      "learning_rate": 2.618490672957177e-06,
      "loss": 0.01,
      "step": 2260480
    },
    {
      "epoch": 3.6993578287936213,
      "grad_norm": 0.33725181221961975,
      "learning_rate": 2.6184247807436597e-06,
      "loss": 0.0078,
      "step": 2260500
    },
    {
      "epoch": 3.699390559232275,
      "grad_norm": 0.2630983591079712,
      "learning_rate": 2.6183588885301424e-06,
      "loss": 0.0082,
      "step": 2260520
    },
    {
      "epoch": 3.699423289670928,
      "grad_norm": 0.5525891184806824,
      "learning_rate": 2.6182929963166256e-06,
      "loss": 0.0104,
      "step": 2260540
    },
    {
      "epoch": 3.6994560201095816,
      "grad_norm": 0.3921392560005188,
      "learning_rate": 2.6182271041031083e-06,
      "loss": 0.0148,
      "step": 2260560
    },
    {
      "epoch": 3.699488750548235,
      "grad_norm": 0.14167962968349457,
      "learning_rate": 2.618161211889591e-06,
      "loss": 0.0093,
      "step": 2260580
    },
    {
      "epoch": 3.6995214809868884,
      "grad_norm": 0.14171071350574493,
      "learning_rate": 2.6180953196760738e-06,
      "loss": 0.0118,
      "step": 2260600
    },
    {
      "epoch": 3.6995542114255415,
      "grad_norm": 0.4315442740917206,
      "learning_rate": 2.6180294274625565e-06,
      "loss": 0.0142,
      "step": 2260620
    },
    {
      "epoch": 3.6995869418641947,
      "grad_norm": 0.36163827776908875,
      "learning_rate": 2.6179635352490397e-06,
      "loss": 0.0101,
      "step": 2260640
    },
    {
      "epoch": 3.6996196723028483,
      "grad_norm": 0.044997040182352066,
      "learning_rate": 2.617897643035523e-06,
      "loss": 0.0104,
      "step": 2260660
    },
    {
      "epoch": 3.6996524027415014,
      "grad_norm": 0.06762462854385376,
      "learning_rate": 2.6178317508220056e-06,
      "loss": 0.0098,
      "step": 2260680
    },
    {
      "epoch": 3.699685133180155,
      "grad_norm": 0.5446690917015076,
      "learning_rate": 2.6177658586084888e-06,
      "loss": 0.0127,
      "step": 2260700
    },
    {
      "epoch": 3.699717863618808,
      "grad_norm": 0.27118661999702454,
      "learning_rate": 2.6176999663949715e-06,
      "loss": 0.0123,
      "step": 2260720
    },
    {
      "epoch": 3.6997505940574618,
      "grad_norm": 0.45147404074668884,
      "learning_rate": 2.6176340741814542e-06,
      "loss": 0.0084,
      "step": 2260740
    },
    {
      "epoch": 3.699783324496115,
      "grad_norm": 0.28343990445137024,
      "learning_rate": 2.617568181967937e-06,
      "loss": 0.0097,
      "step": 2260760
    },
    {
      "epoch": 3.699816054934768,
      "grad_norm": 0.4236334562301636,
      "learning_rate": 2.61750228975442e-06,
      "loss": 0.0085,
      "step": 2260780
    },
    {
      "epoch": 3.6998487853734217,
      "grad_norm": 0.08432190120220184,
      "learning_rate": 2.617436397540903e-06,
      "loss": 0.0096,
      "step": 2260800
    },
    {
      "epoch": 3.699881515812075,
      "grad_norm": 0.07364095747470856,
      "learning_rate": 2.6173705053273856e-06,
      "loss": 0.0074,
      "step": 2260820
    },
    {
      "epoch": 3.6999142462507284,
      "grad_norm": 0.15130998194217682,
      "learning_rate": 2.6173046131138684e-06,
      "loss": 0.0086,
      "step": 2260840
    },
    {
      "epoch": 3.6999469766893816,
      "grad_norm": 0.5024072527885437,
      "learning_rate": 2.6172387209003515e-06,
      "loss": 0.0133,
      "step": 2260860
    },
    {
      "epoch": 3.699979707128035,
      "grad_norm": 0.09500294178724289,
      "learning_rate": 2.6171728286868343e-06,
      "loss": 0.0102,
      "step": 2260880
    },
    {
      "epoch": 3.7000124375666883,
      "grad_norm": 0.267593652009964,
      "learning_rate": 2.617106936473317e-06,
      "loss": 0.0145,
      "step": 2260900
    },
    {
      "epoch": 3.7000451680053414,
      "grad_norm": 0.14731623232364655,
      "learning_rate": 2.6170410442597997e-06,
      "loss": 0.0148,
      "step": 2260920
    },
    {
      "epoch": 3.700077898443995,
      "grad_norm": 0.3947989344596863,
      "learning_rate": 2.616975152046283e-06,
      "loss": 0.0109,
      "step": 2260940
    },
    {
      "epoch": 3.700110628882648,
      "grad_norm": 0.16606085002422333,
      "learning_rate": 2.6169092598327656e-06,
      "loss": 0.0098,
      "step": 2260960
    },
    {
      "epoch": 3.700143359321302,
      "grad_norm": 0.07919426262378693,
      "learning_rate": 2.6168433676192484e-06,
      "loss": 0.0122,
      "step": 2260980
    },
    {
      "epoch": 3.700176089759955,
      "grad_norm": 0.16496731340885162,
      "learning_rate": 2.616777475405732e-06,
      "loss": 0.0151,
      "step": 2261000
    },
    {
      "epoch": 3.7002088201986085,
      "grad_norm": 0.5148796439170837,
      "learning_rate": 2.6167115831922147e-06,
      "loss": 0.0149,
      "step": 2261020
    },
    {
      "epoch": 3.7002415506372617,
      "grad_norm": 0.3760640323162079,
      "learning_rate": 2.6166456909786974e-06,
      "loss": 0.0082,
      "step": 2261040
    },
    {
      "epoch": 3.700274281075915,
      "grad_norm": 0.2698991000652313,
      "learning_rate": 2.61657979876518e-06,
      "loss": 0.0133,
      "step": 2261060
    },
    {
      "epoch": 3.7003070115145684,
      "grad_norm": 0.5209751725196838,
      "learning_rate": 2.6165139065516633e-06,
      "loss": 0.0117,
      "step": 2261080
    },
    {
      "epoch": 3.7003397419532216,
      "grad_norm": 0.23468470573425293,
      "learning_rate": 2.616448014338146e-06,
      "loss": 0.0103,
      "step": 2261100
    },
    {
      "epoch": 3.7003724723918747,
      "grad_norm": 0.28010380268096924,
      "learning_rate": 2.616382122124629e-06,
      "loss": 0.012,
      "step": 2261120
    },
    {
      "epoch": 3.7004052028305283,
      "grad_norm": 0.2114115059375763,
      "learning_rate": 2.6163162299111116e-06,
      "loss": 0.0106,
      "step": 2261140
    },
    {
      "epoch": 3.700437933269182,
      "grad_norm": 0.3268167972564697,
      "learning_rate": 2.6162503376975943e-06,
      "loss": 0.0084,
      "step": 2261160
    },
    {
      "epoch": 3.700470663707835,
      "grad_norm": 0.15592046082019806,
      "learning_rate": 2.6161844454840775e-06,
      "loss": 0.012,
      "step": 2261180
    },
    {
      "epoch": 3.700503394146488,
      "grad_norm": 0.05599395930767059,
      "learning_rate": 2.6161185532705602e-06,
      "loss": 0.0115,
      "step": 2261200
    },
    {
      "epoch": 3.700536124585142,
      "grad_norm": 0.07252602279186249,
      "learning_rate": 2.616052661057043e-06,
      "loss": 0.0103,
      "step": 2261220
    },
    {
      "epoch": 3.700568855023795,
      "grad_norm": 0.4801289737224579,
      "learning_rate": 2.6159867688435257e-06,
      "loss": 0.0087,
      "step": 2261240
    },
    {
      "epoch": 3.700601585462448,
      "grad_norm": 0.1405547708272934,
      "learning_rate": 2.615920876630009e-06,
      "loss": 0.0092,
      "step": 2261260
    },
    {
      "epoch": 3.7006343159011017,
      "grad_norm": 0.14002962410449982,
      "learning_rate": 2.6158549844164916e-06,
      "loss": 0.009,
      "step": 2261280
    },
    {
      "epoch": 3.7006670463397553,
      "grad_norm": 0.2963007986545563,
      "learning_rate": 2.6157890922029743e-06,
      "loss": 0.0145,
      "step": 2261300
    },
    {
      "epoch": 3.7006997767784084,
      "grad_norm": 0.5451273918151855,
      "learning_rate": 2.615723199989457e-06,
      "loss": 0.0161,
      "step": 2261320
    },
    {
      "epoch": 3.7007325072170616,
      "grad_norm": 0.16135954856872559,
      "learning_rate": 2.6156573077759402e-06,
      "loss": 0.0074,
      "step": 2261340
    },
    {
      "epoch": 3.700765237655715,
      "grad_norm": 0.0813678577542305,
      "learning_rate": 2.6155914155624234e-06,
      "loss": 0.0095,
      "step": 2261360
    },
    {
      "epoch": 3.7007979680943683,
      "grad_norm": 0.09223081171512604,
      "learning_rate": 2.615525523348906e-06,
      "loss": 0.0101,
      "step": 2261380
    },
    {
      "epoch": 3.7008306985330215,
      "grad_norm": 0.631928026676178,
      "learning_rate": 2.6154596311353893e-06,
      "loss": 0.0083,
      "step": 2261400
    },
    {
      "epoch": 3.700863428971675,
      "grad_norm": 0.23781783878803253,
      "learning_rate": 2.615393738921872e-06,
      "loss": 0.0111,
      "step": 2261420
    },
    {
      "epoch": 3.7008961594103287,
      "grad_norm": 0.514060914516449,
      "learning_rate": 2.6153278467083548e-06,
      "loss": 0.0133,
      "step": 2261440
    },
    {
      "epoch": 3.700928889848982,
      "grad_norm": 0.3776819109916687,
      "learning_rate": 2.6152619544948375e-06,
      "loss": 0.0091,
      "step": 2261460
    },
    {
      "epoch": 3.700961620287635,
      "grad_norm": 0.1989116221666336,
      "learning_rate": 2.6151960622813207e-06,
      "loss": 0.0127,
      "step": 2261480
    },
    {
      "epoch": 3.7009943507262886,
      "grad_norm": 0.24103805422782898,
      "learning_rate": 2.6151301700678034e-06,
      "loss": 0.0085,
      "step": 2261500
    },
    {
      "epoch": 3.7010270811649417,
      "grad_norm": 0.2430315613746643,
      "learning_rate": 2.615064277854286e-06,
      "loss": 0.008,
      "step": 2261520
    },
    {
      "epoch": 3.701059811603595,
      "grad_norm": 0.14468508958816528,
      "learning_rate": 2.614998385640769e-06,
      "loss": 0.0085,
      "step": 2261540
    },
    {
      "epoch": 3.7010925420422485,
      "grad_norm": 0.12460853904485703,
      "learning_rate": 2.614932493427252e-06,
      "loss": 0.0129,
      "step": 2261560
    },
    {
      "epoch": 3.701125272480902,
      "grad_norm": 0.2724999785423279,
      "learning_rate": 2.614866601213735e-06,
      "loss": 0.0102,
      "step": 2261580
    },
    {
      "epoch": 3.701158002919555,
      "grad_norm": 0.5128753185272217,
      "learning_rate": 2.6148007090002175e-06,
      "loss": 0.0094,
      "step": 2261600
    },
    {
      "epoch": 3.7011907333582084,
      "grad_norm": 0.08799155056476593,
      "learning_rate": 2.6147348167867003e-06,
      "loss": 0.0088,
      "step": 2261620
    },
    {
      "epoch": 3.701223463796862,
      "grad_norm": 0.15036576986312866,
      "learning_rate": 2.614668924573183e-06,
      "loss": 0.0087,
      "step": 2261640
    },
    {
      "epoch": 3.701256194235515,
      "grad_norm": 0.33753502368927,
      "learning_rate": 2.614603032359666e-06,
      "loss": 0.0125,
      "step": 2261660
    },
    {
      "epoch": 3.7012889246741683,
      "grad_norm": 0.19771309196949005,
      "learning_rate": 2.614537140146149e-06,
      "loss": 0.0121,
      "step": 2261680
    },
    {
      "epoch": 3.701321655112822,
      "grad_norm": 0.20419763028621674,
      "learning_rate": 2.6144712479326317e-06,
      "loss": 0.0144,
      "step": 2261700
    },
    {
      "epoch": 3.701354385551475,
      "grad_norm": 0.4125668704509735,
      "learning_rate": 2.6144053557191153e-06,
      "loss": 0.0079,
      "step": 2261720
    },
    {
      "epoch": 3.7013871159901286,
      "grad_norm": 0.2636745274066925,
      "learning_rate": 2.614339463505598e-06,
      "loss": 0.0116,
      "step": 2261740
    },
    {
      "epoch": 3.7014198464287817,
      "grad_norm": 0.3791026473045349,
      "learning_rate": 2.6142735712920807e-06,
      "loss": 0.0128,
      "step": 2261760
    },
    {
      "epoch": 3.7014525768674353,
      "grad_norm": 0.2284935712814331,
      "learning_rate": 2.6142076790785635e-06,
      "loss": 0.0107,
      "step": 2261780
    },
    {
      "epoch": 3.7014853073060885,
      "grad_norm": 0.2338753044605255,
      "learning_rate": 2.6141417868650466e-06,
      "loss": 0.0079,
      "step": 2261800
    },
    {
      "epoch": 3.7015180377447416,
      "grad_norm": 0.19556860625743866,
      "learning_rate": 2.6140758946515294e-06,
      "loss": 0.0107,
      "step": 2261820
    },
    {
      "epoch": 3.7015507681833952,
      "grad_norm": 0.24210669100284576,
      "learning_rate": 2.614010002438012e-06,
      "loss": 0.0107,
      "step": 2261840
    },
    {
      "epoch": 3.7015834986220484,
      "grad_norm": 0.08901934325695038,
      "learning_rate": 2.613944110224495e-06,
      "loss": 0.0083,
      "step": 2261860
    },
    {
      "epoch": 3.701616229060702,
      "grad_norm": 0.11588519811630249,
      "learning_rate": 2.613878218010978e-06,
      "loss": 0.0152,
      "step": 2261880
    },
    {
      "epoch": 3.701648959499355,
      "grad_norm": 0.2076481133699417,
      "learning_rate": 2.6138123257974608e-06,
      "loss": 0.0123,
      "step": 2261900
    },
    {
      "epoch": 3.7016816899380087,
      "grad_norm": 0.8140698075294495,
      "learning_rate": 2.6137464335839435e-06,
      "loss": 0.0111,
      "step": 2261920
    },
    {
      "epoch": 3.701714420376662,
      "grad_norm": 0.49445226788520813,
      "learning_rate": 2.6136805413704262e-06,
      "loss": 0.0146,
      "step": 2261940
    },
    {
      "epoch": 3.701747150815315,
      "grad_norm": 0.21116752922534943,
      "learning_rate": 2.6136146491569094e-06,
      "loss": 0.0117,
      "step": 2261960
    },
    {
      "epoch": 3.7017798812539686,
      "grad_norm": 0.18551620841026306,
      "learning_rate": 2.613548756943392e-06,
      "loss": 0.0093,
      "step": 2261980
    },
    {
      "epoch": 3.7018126116926218,
      "grad_norm": 0.8536592721939087,
      "learning_rate": 2.613482864729875e-06,
      "loss": 0.0102,
      "step": 2262000
    },
    {
      "epoch": 3.7018453421312754,
      "grad_norm": 0.29613879323005676,
      "learning_rate": 2.6134169725163576e-06,
      "loss": 0.0096,
      "step": 2262020
    },
    {
      "epoch": 3.7018780725699285,
      "grad_norm": 0.2839878797531128,
      "learning_rate": 2.6133510803028404e-06,
      "loss": 0.0115,
      "step": 2262040
    },
    {
      "epoch": 3.701910803008582,
      "grad_norm": 0.25234663486480713,
      "learning_rate": 2.613285188089324e-06,
      "loss": 0.0099,
      "step": 2262060
    },
    {
      "epoch": 3.7019435334472353,
      "grad_norm": 0.2694964110851288,
      "learning_rate": 2.6132192958758067e-06,
      "loss": 0.01,
      "step": 2262080
    },
    {
      "epoch": 3.7019762638858884,
      "grad_norm": 0.28268396854400635,
      "learning_rate": 2.61315340366229e-06,
      "loss": 0.0117,
      "step": 2262100
    },
    {
      "epoch": 3.702008994324542,
      "grad_norm": 0.22250157594680786,
      "learning_rate": 2.6130875114487726e-06,
      "loss": 0.0089,
      "step": 2262120
    },
    {
      "epoch": 3.702041724763195,
      "grad_norm": 0.12309542298316956,
      "learning_rate": 2.6130216192352553e-06,
      "loss": 0.0087,
      "step": 2262140
    },
    {
      "epoch": 3.7020744552018487,
      "grad_norm": 0.16170012950897217,
      "learning_rate": 2.612955727021738e-06,
      "loss": 0.0088,
      "step": 2262160
    },
    {
      "epoch": 3.702107185640502,
      "grad_norm": 0.10430913418531418,
      "learning_rate": 2.612889834808221e-06,
      "loss": 0.0075,
      "step": 2262180
    },
    {
      "epoch": 3.7021399160791555,
      "grad_norm": 0.2527978718280792,
      "learning_rate": 2.612823942594704e-06,
      "loss": 0.0069,
      "step": 2262200
    },
    {
      "epoch": 3.7021726465178086,
      "grad_norm": 0.3778219223022461,
      "learning_rate": 2.6127580503811867e-06,
      "loss": 0.0102,
      "step": 2262220
    },
    {
      "epoch": 3.702205376956462,
      "grad_norm": 0.23409965634346008,
      "learning_rate": 2.6126921581676695e-06,
      "loss": 0.0141,
      "step": 2262240
    },
    {
      "epoch": 3.7022381073951154,
      "grad_norm": 0.22118383646011353,
      "learning_rate": 2.612626265954152e-06,
      "loss": 0.0115,
      "step": 2262260
    },
    {
      "epoch": 3.7022708378337685,
      "grad_norm": 0.2613992393016815,
      "learning_rate": 2.6125603737406354e-06,
      "loss": 0.0075,
      "step": 2262280
    },
    {
      "epoch": 3.702303568272422,
      "grad_norm": 0.13410907983779907,
      "learning_rate": 2.612494481527118e-06,
      "loss": 0.0094,
      "step": 2262300
    },
    {
      "epoch": 3.7023362987110753,
      "grad_norm": 0.16590917110443115,
      "learning_rate": 2.612428589313601e-06,
      "loss": 0.011,
      "step": 2262320
    },
    {
      "epoch": 3.702369029149729,
      "grad_norm": 0.1691029965877533,
      "learning_rate": 2.6123626971000836e-06,
      "loss": 0.0104,
      "step": 2262340
    },
    {
      "epoch": 3.702401759588382,
      "grad_norm": 0.10654255747795105,
      "learning_rate": 2.6122968048865667e-06,
      "loss": 0.0101,
      "step": 2262360
    },
    {
      "epoch": 3.702434490027035,
      "grad_norm": 0.2549622654914856,
      "learning_rate": 2.6122309126730495e-06,
      "loss": 0.0111,
      "step": 2262380
    },
    {
      "epoch": 3.7024672204656888,
      "grad_norm": 0.12400063872337341,
      "learning_rate": 2.6121650204595322e-06,
      "loss": 0.012,
      "step": 2262400
    },
    {
      "epoch": 3.702499950904342,
      "grad_norm": 0.3860522210597992,
      "learning_rate": 2.612099128246016e-06,
      "loss": 0.0112,
      "step": 2262420
    },
    {
      "epoch": 3.7025326813429955,
      "grad_norm": 0.15576454997062683,
      "learning_rate": 2.6120332360324985e-06,
      "loss": 0.0121,
      "step": 2262440
    },
    {
      "epoch": 3.7025654117816487,
      "grad_norm": 0.4241863787174225,
      "learning_rate": 2.6119673438189813e-06,
      "loss": 0.0122,
      "step": 2262460
    },
    {
      "epoch": 3.7025981422203023,
      "grad_norm": 0.3255855143070221,
      "learning_rate": 2.611901451605464e-06,
      "loss": 0.0088,
      "step": 2262480
    },
    {
      "epoch": 3.7026308726589554,
      "grad_norm": 0.4524858295917511,
      "learning_rate": 2.611835559391947e-06,
      "loss": 0.0119,
      "step": 2262500
    },
    {
      "epoch": 3.7026636030976086,
      "grad_norm": 0.08703562617301941,
      "learning_rate": 2.61176966717843e-06,
      "loss": 0.0056,
      "step": 2262520
    },
    {
      "epoch": 3.702696333536262,
      "grad_norm": 0.20006482303142548,
      "learning_rate": 2.6117037749649127e-06,
      "loss": 0.0064,
      "step": 2262540
    },
    {
      "epoch": 3.7027290639749153,
      "grad_norm": 0.9681673049926758,
      "learning_rate": 2.6116378827513954e-06,
      "loss": 0.0116,
      "step": 2262560
    },
    {
      "epoch": 3.702761794413569,
      "grad_norm": 0.17319197952747345,
      "learning_rate": 2.611571990537878e-06,
      "loss": 0.013,
      "step": 2262580
    },
    {
      "epoch": 3.702794524852222,
      "grad_norm": 0.4109748601913452,
      "learning_rate": 2.6115060983243613e-06,
      "loss": 0.0133,
      "step": 2262600
    },
    {
      "epoch": 3.7028272552908756,
      "grad_norm": 0.8554919958114624,
      "learning_rate": 2.611440206110844e-06,
      "loss": 0.0126,
      "step": 2262620
    },
    {
      "epoch": 3.702859985729529,
      "grad_norm": 0.3258274793624878,
      "learning_rate": 2.611374313897327e-06,
      "loss": 0.0123,
      "step": 2262640
    },
    {
      "epoch": 3.702892716168182,
      "grad_norm": 0.8965397477149963,
      "learning_rate": 2.6113084216838095e-06,
      "loss": 0.0088,
      "step": 2262660
    },
    {
      "epoch": 3.7029254466068355,
      "grad_norm": 0.28175264596939087,
      "learning_rate": 2.6112425294702927e-06,
      "loss": 0.0116,
      "step": 2262680
    },
    {
      "epoch": 3.7029581770454887,
      "grad_norm": 0.3174549639225006,
      "learning_rate": 2.6111766372567754e-06,
      "loss": 0.0124,
      "step": 2262700
    },
    {
      "epoch": 3.702990907484142,
      "grad_norm": 0.18869024515151978,
      "learning_rate": 2.611110745043258e-06,
      "loss": 0.013,
      "step": 2262720
    },
    {
      "epoch": 3.7030236379227954,
      "grad_norm": 0.3446502387523651,
      "learning_rate": 2.611044852829741e-06,
      "loss": 0.0145,
      "step": 2262740
    },
    {
      "epoch": 3.703056368361449,
      "grad_norm": 0.20231389999389648,
      "learning_rate": 2.6109789606162245e-06,
      "loss": 0.0128,
      "step": 2262760
    },
    {
      "epoch": 3.703089098800102,
      "grad_norm": 0.5002245903015137,
      "learning_rate": 2.6109130684027072e-06,
      "loss": 0.0088,
      "step": 2262780
    },
    {
      "epoch": 3.7031218292387553,
      "grad_norm": 0.21938134729862213,
      "learning_rate": 2.61084717618919e-06,
      "loss": 0.0068,
      "step": 2262800
    },
    {
      "epoch": 3.703154559677409,
      "grad_norm": 0.7428589463233948,
      "learning_rate": 2.610781283975673e-06,
      "loss": 0.0132,
      "step": 2262820
    },
    {
      "epoch": 3.703187290116062,
      "grad_norm": 0.11933929473161697,
      "learning_rate": 2.610715391762156e-06,
      "loss": 0.0112,
      "step": 2262840
    },
    {
      "epoch": 3.703220020554715,
      "grad_norm": 0.36837759613990784,
      "learning_rate": 2.6106494995486386e-06,
      "loss": 0.0128,
      "step": 2262860
    },
    {
      "epoch": 3.703252750993369,
      "grad_norm": 0.33498620986938477,
      "learning_rate": 2.6105836073351214e-06,
      "loss": 0.0147,
      "step": 2262880
    },
    {
      "epoch": 3.7032854814320224,
      "grad_norm": 0.37915802001953125,
      "learning_rate": 2.6105177151216045e-06,
      "loss": 0.0172,
      "step": 2262900
    },
    {
      "epoch": 3.7033182118706756,
      "grad_norm": 0.24487069249153137,
      "learning_rate": 2.6104518229080873e-06,
      "loss": 0.0147,
      "step": 2262920
    },
    {
      "epoch": 3.7033509423093287,
      "grad_norm": 0.14023959636688232,
      "learning_rate": 2.61038593069457e-06,
      "loss": 0.0115,
      "step": 2262940
    },
    {
      "epoch": 3.7033836727479823,
      "grad_norm": 0.34641435742378235,
      "learning_rate": 2.6103200384810527e-06,
      "loss": 0.0105,
      "step": 2262960
    },
    {
      "epoch": 3.7034164031866355,
      "grad_norm": 0.3681812584400177,
      "learning_rate": 2.610254146267536e-06,
      "loss": 0.0116,
      "step": 2262980
    },
    {
      "epoch": 3.7034491336252886,
      "grad_norm": 0.17010384798049927,
      "learning_rate": 2.6101882540540186e-06,
      "loss": 0.0122,
      "step": 2263000
    },
    {
      "epoch": 3.703481864063942,
      "grad_norm": 0.16953857243061066,
      "learning_rate": 2.6101223618405014e-06,
      "loss": 0.011,
      "step": 2263020
    },
    {
      "epoch": 3.703514594502596,
      "grad_norm": 0.12684519588947296,
      "learning_rate": 2.610056469626984e-06,
      "loss": 0.0109,
      "step": 2263040
    },
    {
      "epoch": 3.703547324941249,
      "grad_norm": 0.17264510691165924,
      "learning_rate": 2.609990577413467e-06,
      "loss": 0.0134,
      "step": 2263060
    },
    {
      "epoch": 3.703580055379902,
      "grad_norm": 0.463388055562973,
      "learning_rate": 2.60992468519995e-06,
      "loss": 0.0111,
      "step": 2263080
    },
    {
      "epoch": 3.7036127858185557,
      "grad_norm": 0.11899074167013168,
      "learning_rate": 2.6098587929864328e-06,
      "loss": 0.0097,
      "step": 2263100
    },
    {
      "epoch": 3.703645516257209,
      "grad_norm": 0.32280734181404114,
      "learning_rate": 2.609792900772916e-06,
      "loss": 0.0085,
      "step": 2263120
    },
    {
      "epoch": 3.703678246695862,
      "grad_norm": 0.24087318778038025,
      "learning_rate": 2.609727008559399e-06,
      "loss": 0.0098,
      "step": 2263140
    },
    {
      "epoch": 3.7037109771345156,
      "grad_norm": 0.09929350018501282,
      "learning_rate": 2.609661116345882e-06,
      "loss": 0.0112,
      "step": 2263160
    },
    {
      "epoch": 3.7037437075731687,
      "grad_norm": 0.4145810008049011,
      "learning_rate": 2.6095952241323646e-06,
      "loss": 0.0127,
      "step": 2263180
    },
    {
      "epoch": 3.7037764380118223,
      "grad_norm": 0.320632666349411,
      "learning_rate": 2.6095293319188473e-06,
      "loss": 0.0099,
      "step": 2263200
    },
    {
      "epoch": 3.7038091684504755,
      "grad_norm": 0.08682799339294434,
      "learning_rate": 2.6094634397053305e-06,
      "loss": 0.0101,
      "step": 2263220
    },
    {
      "epoch": 3.703841898889129,
      "grad_norm": 0.3024834394454956,
      "learning_rate": 2.6093975474918132e-06,
      "loss": 0.0074,
      "step": 2263240
    },
    {
      "epoch": 3.703874629327782,
      "grad_norm": 0.30237826704978943,
      "learning_rate": 2.609331655278296e-06,
      "loss": 0.0107,
      "step": 2263260
    },
    {
      "epoch": 3.7039073597664354,
      "grad_norm": 0.269989013671875,
      "learning_rate": 2.6092657630647787e-06,
      "loss": 0.0104,
      "step": 2263280
    },
    {
      "epoch": 3.703940090205089,
      "grad_norm": 0.24559779465198517,
      "learning_rate": 2.609199870851262e-06,
      "loss": 0.0125,
      "step": 2263300
    },
    {
      "epoch": 3.703972820643742,
      "grad_norm": 0.25841960310935974,
      "learning_rate": 2.6091339786377446e-06,
      "loss": 0.0073,
      "step": 2263320
    },
    {
      "epoch": 3.7040055510823957,
      "grad_norm": 0.27110832929611206,
      "learning_rate": 2.6090680864242273e-06,
      "loss": 0.0107,
      "step": 2263340
    },
    {
      "epoch": 3.704038281521049,
      "grad_norm": 0.5595190525054932,
      "learning_rate": 2.60900219421071e-06,
      "loss": 0.0154,
      "step": 2263360
    },
    {
      "epoch": 3.7040710119597025,
      "grad_norm": 0.6452472805976868,
      "learning_rate": 2.6089363019971932e-06,
      "loss": 0.0093,
      "step": 2263380
    },
    {
      "epoch": 3.7041037423983556,
      "grad_norm": 0.23101535439491272,
      "learning_rate": 2.608870409783676e-06,
      "loss": 0.0089,
      "step": 2263400
    },
    {
      "epoch": 3.7041364728370088,
      "grad_norm": 0.5312212109565735,
      "learning_rate": 2.6088045175701587e-06,
      "loss": 0.0101,
      "step": 2263420
    },
    {
      "epoch": 3.7041692032756623,
      "grad_norm": 0.3023957312107086,
      "learning_rate": 2.6087386253566415e-06,
      "loss": 0.0114,
      "step": 2263440
    },
    {
      "epoch": 3.7042019337143155,
      "grad_norm": 0.47407597303390503,
      "learning_rate": 2.608672733143124e-06,
      "loss": 0.0077,
      "step": 2263460
    },
    {
      "epoch": 3.704234664152969,
      "grad_norm": 0.20648038387298584,
      "learning_rate": 2.608606840929608e-06,
      "loss": 0.0096,
      "step": 2263480
    },
    {
      "epoch": 3.7042673945916222,
      "grad_norm": 0.2691781520843506,
      "learning_rate": 2.6085409487160905e-06,
      "loss": 0.0122,
      "step": 2263500
    },
    {
      "epoch": 3.704300125030276,
      "grad_norm": 0.35419410467147827,
      "learning_rate": 2.6084750565025737e-06,
      "loss": 0.0064,
      "step": 2263520
    },
    {
      "epoch": 3.704332855468929,
      "grad_norm": 0.11622952669858932,
      "learning_rate": 2.6084091642890564e-06,
      "loss": 0.0097,
      "step": 2263540
    },
    {
      "epoch": 3.704365585907582,
      "grad_norm": 0.30931633710861206,
      "learning_rate": 2.608343272075539e-06,
      "loss": 0.0129,
      "step": 2263560
    },
    {
      "epoch": 3.7043983163462357,
      "grad_norm": 0.16074752807617188,
      "learning_rate": 2.608277379862022e-06,
      "loss": 0.01,
      "step": 2263580
    },
    {
      "epoch": 3.704431046784889,
      "grad_norm": 0.062292762100696564,
      "learning_rate": 2.6082114876485047e-06,
      "loss": 0.0094,
      "step": 2263600
    },
    {
      "epoch": 3.7044637772235425,
      "grad_norm": 0.35330596566200256,
      "learning_rate": 2.608145595434988e-06,
      "loss": 0.0106,
      "step": 2263620
    },
    {
      "epoch": 3.7044965076621956,
      "grad_norm": 0.255685031414032,
      "learning_rate": 2.6080797032214706e-06,
      "loss": 0.0141,
      "step": 2263640
    },
    {
      "epoch": 3.704529238100849,
      "grad_norm": 0.48952069878578186,
      "learning_rate": 2.6080138110079533e-06,
      "loss": 0.013,
      "step": 2263660
    },
    {
      "epoch": 3.7045619685395024,
      "grad_norm": 0.5935753583908081,
      "learning_rate": 2.607947918794436e-06,
      "loss": 0.0149,
      "step": 2263680
    },
    {
      "epoch": 3.7045946989781555,
      "grad_norm": 0.15207068622112274,
      "learning_rate": 2.607882026580919e-06,
      "loss": 0.01,
      "step": 2263700
    },
    {
      "epoch": 3.704627429416809,
      "grad_norm": 0.26366955041885376,
      "learning_rate": 2.607816134367402e-06,
      "loss": 0.0093,
      "step": 2263720
    },
    {
      "epoch": 3.7046601598554623,
      "grad_norm": 0.17443309724330902,
      "learning_rate": 2.6077502421538847e-06,
      "loss": 0.0138,
      "step": 2263740
    },
    {
      "epoch": 3.704692890294116,
      "grad_norm": 0.1382860541343689,
      "learning_rate": 2.6076843499403674e-06,
      "loss": 0.0129,
      "step": 2263760
    },
    {
      "epoch": 3.704725620732769,
      "grad_norm": 0.18779727816581726,
      "learning_rate": 2.6076184577268506e-06,
      "loss": 0.0105,
      "step": 2263780
    },
    {
      "epoch": 3.7047583511714226,
      "grad_norm": 0.4875630736351013,
      "learning_rate": 2.6075525655133333e-06,
      "loss": 0.0114,
      "step": 2263800
    },
    {
      "epoch": 3.7047910816100758,
      "grad_norm": 0.3429104685783386,
      "learning_rate": 2.6074866732998165e-06,
      "loss": 0.0099,
      "step": 2263820
    },
    {
      "epoch": 3.704823812048729,
      "grad_norm": 1.0294420719146729,
      "learning_rate": 2.6074207810862996e-06,
      "loss": 0.0174,
      "step": 2263840
    },
    {
      "epoch": 3.7048565424873825,
      "grad_norm": 0.4641674757003784,
      "learning_rate": 2.6073548888727824e-06,
      "loss": 0.0101,
      "step": 2263860
    },
    {
      "epoch": 3.7048892729260356,
      "grad_norm": 0.45654287934303284,
      "learning_rate": 2.607288996659265e-06,
      "loss": 0.0082,
      "step": 2263880
    },
    {
      "epoch": 3.7049220033646892,
      "grad_norm": 0.06020399183034897,
      "learning_rate": 2.607223104445748e-06,
      "loss": 0.0093,
      "step": 2263900
    },
    {
      "epoch": 3.7049547338033424,
      "grad_norm": 0.500584065914154,
      "learning_rate": 2.607157212232231e-06,
      "loss": 0.0089,
      "step": 2263920
    },
    {
      "epoch": 3.704987464241996,
      "grad_norm": 0.27767273783683777,
      "learning_rate": 2.6070913200187138e-06,
      "loss": 0.0102,
      "step": 2263940
    },
    {
      "epoch": 3.705020194680649,
      "grad_norm": 0.2457413375377655,
      "learning_rate": 2.6070254278051965e-06,
      "loss": 0.0145,
      "step": 2263960
    },
    {
      "epoch": 3.7050529251193023,
      "grad_norm": 0.1759948879480362,
      "learning_rate": 2.6069595355916792e-06,
      "loss": 0.01,
      "step": 2263980
    },
    {
      "epoch": 3.705085655557956,
      "grad_norm": 0.466553270816803,
      "learning_rate": 2.606893643378162e-06,
      "loss": 0.0122,
      "step": 2264000
    },
    {
      "epoch": 3.705118385996609,
      "grad_norm": 0.04804832115769386,
      "learning_rate": 2.606827751164645e-06,
      "loss": 0.0106,
      "step": 2264020
    },
    {
      "epoch": 3.7051511164352626,
      "grad_norm": 0.26165977120399475,
      "learning_rate": 2.606761858951128e-06,
      "loss": 0.0132,
      "step": 2264040
    },
    {
      "epoch": 3.7051838468739158,
      "grad_norm": 0.25759971141815186,
      "learning_rate": 2.6066959667376106e-06,
      "loss": 0.0095,
      "step": 2264060
    },
    {
      "epoch": 3.7052165773125694,
      "grad_norm": 0.26291966438293457,
      "learning_rate": 2.6066300745240934e-06,
      "loss": 0.0085,
      "step": 2264080
    },
    {
      "epoch": 3.7052493077512225,
      "grad_norm": 0.11826851218938828,
      "learning_rate": 2.6065641823105765e-06,
      "loss": 0.0128,
      "step": 2264100
    },
    {
      "epoch": 3.7052820381898757,
      "grad_norm": 0.42082205414772034,
      "learning_rate": 2.6064982900970593e-06,
      "loss": 0.0083,
      "step": 2264120
    },
    {
      "epoch": 3.7053147686285293,
      "grad_norm": 0.4288097023963928,
      "learning_rate": 2.606432397883542e-06,
      "loss": 0.0103,
      "step": 2264140
    },
    {
      "epoch": 3.7053474990671824,
      "grad_norm": 0.16371411085128784,
      "learning_rate": 2.6063665056700248e-06,
      "loss": 0.0141,
      "step": 2264160
    },
    {
      "epoch": 3.7053802295058356,
      "grad_norm": 0.09879808872938156,
      "learning_rate": 2.6063006134565083e-06,
      "loss": 0.0139,
      "step": 2264180
    },
    {
      "epoch": 3.705412959944489,
      "grad_norm": 0.10574305057525635,
      "learning_rate": 2.606234721242991e-06,
      "loss": 0.0092,
      "step": 2264200
    },
    {
      "epoch": 3.7054456903831428,
      "grad_norm": 0.18394885957241058,
      "learning_rate": 2.606168829029474e-06,
      "loss": 0.0104,
      "step": 2264220
    },
    {
      "epoch": 3.705478420821796,
      "grad_norm": 0.06618840247392654,
      "learning_rate": 2.606102936815957e-06,
      "loss": 0.0068,
      "step": 2264240
    },
    {
      "epoch": 3.705511151260449,
      "grad_norm": 0.20346228778362274,
      "learning_rate": 2.6060370446024397e-06,
      "loss": 0.0073,
      "step": 2264260
    },
    {
      "epoch": 3.7055438816991026,
      "grad_norm": 0.1955188363790512,
      "learning_rate": 2.6059711523889225e-06,
      "loss": 0.0103,
      "step": 2264280
    },
    {
      "epoch": 3.705576612137756,
      "grad_norm": 0.6290736794471741,
      "learning_rate": 2.605905260175405e-06,
      "loss": 0.0144,
      "step": 2264300
    },
    {
      "epoch": 3.705609342576409,
      "grad_norm": 0.22201475501060486,
      "learning_rate": 2.6058393679618884e-06,
      "loss": 0.0061,
      "step": 2264320
    },
    {
      "epoch": 3.7056420730150625,
      "grad_norm": 0.4000435173511505,
      "learning_rate": 2.605773475748371e-06,
      "loss": 0.0182,
      "step": 2264340
    },
    {
      "epoch": 3.705674803453716,
      "grad_norm": 0.278915673494339,
      "learning_rate": 2.605707583534854e-06,
      "loss": 0.0089,
      "step": 2264360
    },
    {
      "epoch": 3.7057075338923693,
      "grad_norm": 0.3395988643169403,
      "learning_rate": 2.6056416913213366e-06,
      "loss": 0.0108,
      "step": 2264380
    },
    {
      "epoch": 3.7057402643310224,
      "grad_norm": 0.15629370510578156,
      "learning_rate": 2.6055757991078197e-06,
      "loss": 0.0145,
      "step": 2264400
    },
    {
      "epoch": 3.705772994769676,
      "grad_norm": 0.13278351724147797,
      "learning_rate": 2.6055099068943025e-06,
      "loss": 0.012,
      "step": 2264420
    },
    {
      "epoch": 3.705805725208329,
      "grad_norm": 0.38012218475341797,
      "learning_rate": 2.6054440146807852e-06,
      "loss": 0.0092,
      "step": 2264440
    },
    {
      "epoch": 3.7058384556469823,
      "grad_norm": 0.27025938034057617,
      "learning_rate": 2.605378122467268e-06,
      "loss": 0.0137,
      "step": 2264460
    },
    {
      "epoch": 3.705871186085636,
      "grad_norm": 0.3281216621398926,
      "learning_rate": 2.6053122302537507e-06,
      "loss": 0.0112,
      "step": 2264480
    },
    {
      "epoch": 3.7059039165242895,
      "grad_norm": 0.02673647738993168,
      "learning_rate": 2.605246338040234e-06,
      "loss": 0.0128,
      "step": 2264500
    },
    {
      "epoch": 3.7059366469629427,
      "grad_norm": 0.3471212387084961,
      "learning_rate": 2.605180445826717e-06,
      "loss": 0.0117,
      "step": 2264520
    },
    {
      "epoch": 3.705969377401596,
      "grad_norm": 0.1976582556962967,
      "learning_rate": 2.6051145536131998e-06,
      "loss": 0.0087,
      "step": 2264540
    },
    {
      "epoch": 3.7060021078402494,
      "grad_norm": 0.2797206938266754,
      "learning_rate": 2.605048661399683e-06,
      "loss": 0.0094,
      "step": 2264560
    },
    {
      "epoch": 3.7060348382789026,
      "grad_norm": 0.27614837884902954,
      "learning_rate": 2.6049827691861657e-06,
      "loss": 0.0108,
      "step": 2264580
    },
    {
      "epoch": 3.7060675687175557,
      "grad_norm": 0.23894749581813812,
      "learning_rate": 2.6049168769726484e-06,
      "loss": 0.0135,
      "step": 2264600
    },
    {
      "epoch": 3.7061002991562093,
      "grad_norm": 0.3290884494781494,
      "learning_rate": 2.604850984759131e-06,
      "loss": 0.0113,
      "step": 2264620
    },
    {
      "epoch": 3.706133029594863,
      "grad_norm": 0.14275330305099487,
      "learning_rate": 2.6047850925456143e-06,
      "loss": 0.0065,
      "step": 2264640
    },
    {
      "epoch": 3.706165760033516,
      "grad_norm": 0.15995138883590698,
      "learning_rate": 2.604719200332097e-06,
      "loss": 0.0146,
      "step": 2264660
    },
    {
      "epoch": 3.706198490472169,
      "grad_norm": 0.08077037334442139,
      "learning_rate": 2.60465330811858e-06,
      "loss": 0.0059,
      "step": 2264680
    },
    {
      "epoch": 3.706231220910823,
      "grad_norm": 0.3406763970851898,
      "learning_rate": 2.6045874159050625e-06,
      "loss": 0.011,
      "step": 2264700
    },
    {
      "epoch": 3.706263951349476,
      "grad_norm": 0.2542492747306824,
      "learning_rate": 2.6045215236915457e-06,
      "loss": 0.0101,
      "step": 2264720
    },
    {
      "epoch": 3.706296681788129,
      "grad_norm": 0.11850088089704514,
      "learning_rate": 2.6044556314780284e-06,
      "loss": 0.0074,
      "step": 2264740
    },
    {
      "epoch": 3.7063294122267827,
      "grad_norm": 0.10695250332355499,
      "learning_rate": 2.604389739264511e-06,
      "loss": 0.0084,
      "step": 2264760
    },
    {
      "epoch": 3.706362142665436,
      "grad_norm": 0.4755772054195404,
      "learning_rate": 2.604323847050994e-06,
      "loss": 0.0096,
      "step": 2264780
    },
    {
      "epoch": 3.7063948731040894,
      "grad_norm": 0.29568880796432495,
      "learning_rate": 2.604257954837477e-06,
      "loss": 0.009,
      "step": 2264800
    },
    {
      "epoch": 3.7064276035427426,
      "grad_norm": 0.09723684191703796,
      "learning_rate": 2.60419206262396e-06,
      "loss": 0.0133,
      "step": 2264820
    },
    {
      "epoch": 3.706460333981396,
      "grad_norm": 0.5406030416488647,
      "learning_rate": 2.6041261704104426e-06,
      "loss": 0.0108,
      "step": 2264840
    },
    {
      "epoch": 3.7064930644200493,
      "grad_norm": 0.17447474598884583,
      "learning_rate": 2.6040602781969253e-06,
      "loss": 0.0076,
      "step": 2264860
    },
    {
      "epoch": 3.7065257948587025,
      "grad_norm": 0.19419001042842865,
      "learning_rate": 2.603994385983409e-06,
      "loss": 0.0081,
      "step": 2264880
    },
    {
      "epoch": 3.706558525297356,
      "grad_norm": 0.6754600405693054,
      "learning_rate": 2.6039284937698916e-06,
      "loss": 0.0129,
      "step": 2264900
    },
    {
      "epoch": 3.7065912557360092,
      "grad_norm": 0.23216430842876434,
      "learning_rate": 2.6038626015563744e-06,
      "loss": 0.013,
      "step": 2264920
    },
    {
      "epoch": 3.706623986174663,
      "grad_norm": 0.16289544105529785,
      "learning_rate": 2.6037967093428575e-06,
      "loss": 0.0113,
      "step": 2264940
    },
    {
      "epoch": 3.706656716613316,
      "grad_norm": 0.42404264211654663,
      "learning_rate": 2.6037308171293403e-06,
      "loss": 0.0134,
      "step": 2264960
    },
    {
      "epoch": 3.7066894470519696,
      "grad_norm": 0.20098675787448883,
      "learning_rate": 2.603664924915823e-06,
      "loss": 0.011,
      "step": 2264980
    },
    {
      "epoch": 3.7067221774906227,
      "grad_norm": 0.4872186779975891,
      "learning_rate": 2.6035990327023058e-06,
      "loss": 0.0169,
      "step": 2265000
    },
    {
      "epoch": 3.706754907929276,
      "grad_norm": 0.45241424441337585,
      "learning_rate": 2.6035331404887885e-06,
      "loss": 0.0077,
      "step": 2265020
    },
    {
      "epoch": 3.7067876383679295,
      "grad_norm": 0.33230674266815186,
      "learning_rate": 2.6034672482752717e-06,
      "loss": 0.0085,
      "step": 2265040
    },
    {
      "epoch": 3.7068203688065826,
      "grad_norm": 0.2078905999660492,
      "learning_rate": 2.6034013560617544e-06,
      "loss": 0.0119,
      "step": 2265060
    },
    {
      "epoch": 3.706853099245236,
      "grad_norm": 0.280691534280777,
      "learning_rate": 2.603335463848237e-06,
      "loss": 0.0093,
      "step": 2265080
    },
    {
      "epoch": 3.7068858296838894,
      "grad_norm": 0.7080933451652527,
      "learning_rate": 2.60326957163472e-06,
      "loss": 0.0135,
      "step": 2265100
    },
    {
      "epoch": 3.706918560122543,
      "grad_norm": 0.15989184379577637,
      "learning_rate": 2.603203679421203e-06,
      "loss": 0.008,
      "step": 2265120
    },
    {
      "epoch": 3.706951290561196,
      "grad_norm": 0.6661303639411926,
      "learning_rate": 2.6031377872076858e-06,
      "loss": 0.0082,
      "step": 2265140
    },
    {
      "epoch": 3.7069840209998492,
      "grad_norm": 0.2658931612968445,
      "learning_rate": 2.6030718949941685e-06,
      "loss": 0.0169,
      "step": 2265160
    },
    {
      "epoch": 3.707016751438503,
      "grad_norm": 0.1760386973619461,
      "learning_rate": 2.6030060027806513e-06,
      "loss": 0.0139,
      "step": 2265180
    },
    {
      "epoch": 3.707049481877156,
      "grad_norm": 0.26228663325309753,
      "learning_rate": 2.6029401105671344e-06,
      "loss": 0.0108,
      "step": 2265200
    },
    {
      "epoch": 3.7070822123158096,
      "grad_norm": 0.16463571786880493,
      "learning_rate": 2.6028742183536176e-06,
      "loss": 0.0083,
      "step": 2265220
    },
    {
      "epoch": 3.7071149427544627,
      "grad_norm": 0.22681927680969238,
      "learning_rate": 2.6028083261401003e-06,
      "loss": 0.0121,
      "step": 2265240
    },
    {
      "epoch": 3.7071476731931163,
      "grad_norm": 0.4701938033103943,
      "learning_rate": 2.6027424339265835e-06,
      "loss": 0.0085,
      "step": 2265260
    },
    {
      "epoch": 3.7071804036317695,
      "grad_norm": 0.3045731484889984,
      "learning_rate": 2.6026765417130662e-06,
      "loss": 0.0127,
      "step": 2265280
    },
    {
      "epoch": 3.7072131340704226,
      "grad_norm": 0.3115372955799103,
      "learning_rate": 2.602610649499549e-06,
      "loss": 0.0145,
      "step": 2265300
    },
    {
      "epoch": 3.7072458645090762,
      "grad_norm": 0.4939747154712677,
      "learning_rate": 2.6025447572860317e-06,
      "loss": 0.0103,
      "step": 2265320
    },
    {
      "epoch": 3.7072785949477294,
      "grad_norm": 0.22543860971927643,
      "learning_rate": 2.602478865072515e-06,
      "loss": 0.0101,
      "step": 2265340
    },
    {
      "epoch": 3.707311325386383,
      "grad_norm": 0.6627412438392639,
      "learning_rate": 2.6024129728589976e-06,
      "loss": 0.0117,
      "step": 2265360
    },
    {
      "epoch": 3.707344055825036,
      "grad_norm": 0.8460144996643066,
      "learning_rate": 2.6023470806454803e-06,
      "loss": 0.0094,
      "step": 2265380
    },
    {
      "epoch": 3.7073767862636897,
      "grad_norm": 0.4383257031440735,
      "learning_rate": 2.602281188431963e-06,
      "loss": 0.0109,
      "step": 2265400
    },
    {
      "epoch": 3.707409516702343,
      "grad_norm": 0.16368164122104645,
      "learning_rate": 2.6022152962184463e-06,
      "loss": 0.0142,
      "step": 2265420
    },
    {
      "epoch": 3.707442247140996,
      "grad_norm": 0.7166650891304016,
      "learning_rate": 2.602149404004929e-06,
      "loss": 0.01,
      "step": 2265440
    },
    {
      "epoch": 3.7074749775796496,
      "grad_norm": 0.33136117458343506,
      "learning_rate": 2.6020835117914117e-06,
      "loss": 0.0121,
      "step": 2265460
    },
    {
      "epoch": 3.7075077080183028,
      "grad_norm": 0.17983348667621613,
      "learning_rate": 2.6020176195778945e-06,
      "loss": 0.0117,
      "step": 2265480
    },
    {
      "epoch": 3.7075404384569564,
      "grad_norm": 0.18909339606761932,
      "learning_rate": 2.601951727364377e-06,
      "loss": 0.0121,
      "step": 2265500
    },
    {
      "epoch": 3.7075731688956095,
      "grad_norm": 0.23297260701656342,
      "learning_rate": 2.6018858351508604e-06,
      "loss": 0.0092,
      "step": 2265520
    },
    {
      "epoch": 3.707605899334263,
      "grad_norm": 0.11027681082487106,
      "learning_rate": 2.601819942937343e-06,
      "loss": 0.0092,
      "step": 2265540
    },
    {
      "epoch": 3.7076386297729163,
      "grad_norm": 0.24058587849140167,
      "learning_rate": 2.601754050723826e-06,
      "loss": 0.0144,
      "step": 2265560
    },
    {
      "epoch": 3.7076713602115694,
      "grad_norm": 0.824822723865509,
      "learning_rate": 2.6016881585103094e-06,
      "loss": 0.0161,
      "step": 2265580
    },
    {
      "epoch": 3.707704090650223,
      "grad_norm": 0.08528686314821243,
      "learning_rate": 2.601622266296792e-06,
      "loss": 0.0077,
      "step": 2265600
    },
    {
      "epoch": 3.707736821088876,
      "grad_norm": 1.008858561515808,
      "learning_rate": 2.601556374083275e-06,
      "loss": 0.0088,
      "step": 2265620
    },
    {
      "epoch": 3.7077695515275293,
      "grad_norm": 0.21302779018878937,
      "learning_rate": 2.6014904818697577e-06,
      "loss": 0.0121,
      "step": 2265640
    },
    {
      "epoch": 3.707802281966183,
      "grad_norm": 0.4851907193660736,
      "learning_rate": 2.601424589656241e-06,
      "loss": 0.0132,
      "step": 2265660
    },
    {
      "epoch": 3.7078350124048365,
      "grad_norm": 0.03709062933921814,
      "learning_rate": 2.6013586974427236e-06,
      "loss": 0.0079,
      "step": 2265680
    },
    {
      "epoch": 3.7078677428434896,
      "grad_norm": 0.2970755696296692,
      "learning_rate": 2.6012928052292063e-06,
      "loss": 0.0095,
      "step": 2265700
    },
    {
      "epoch": 3.707900473282143,
      "grad_norm": 0.4965899884700775,
      "learning_rate": 2.601226913015689e-06,
      "loss": 0.0123,
      "step": 2265720
    },
    {
      "epoch": 3.7079332037207964,
      "grad_norm": 0.42449793219566345,
      "learning_rate": 2.601161020802172e-06,
      "loss": 0.01,
      "step": 2265740
    },
    {
      "epoch": 3.7079659341594495,
      "grad_norm": 0.19597791135311127,
      "learning_rate": 2.601095128588655e-06,
      "loss": 0.0137,
      "step": 2265760
    },
    {
      "epoch": 3.7079986645981027,
      "grad_norm": 0.18770389258861542,
      "learning_rate": 2.6010292363751377e-06,
      "loss": 0.0104,
      "step": 2265780
    },
    {
      "epoch": 3.7080313950367563,
      "grad_norm": 0.20349745452404022,
      "learning_rate": 2.6009633441616204e-06,
      "loss": 0.0116,
      "step": 2265800
    },
    {
      "epoch": 3.70806412547541,
      "grad_norm": 0.1446826308965683,
      "learning_rate": 2.6008974519481036e-06,
      "loss": 0.0108,
      "step": 2265820
    },
    {
      "epoch": 3.708096855914063,
      "grad_norm": 0.14573757350444794,
      "learning_rate": 2.6008315597345863e-06,
      "loss": 0.0112,
      "step": 2265840
    },
    {
      "epoch": 3.708129586352716,
      "grad_norm": 0.14756609499454498,
      "learning_rate": 2.600765667521069e-06,
      "loss": 0.0091,
      "step": 2265860
    },
    {
      "epoch": 3.7081623167913698,
      "grad_norm": 0.20013155043125153,
      "learning_rate": 2.600699775307552e-06,
      "loss": 0.0106,
      "step": 2265880
    },
    {
      "epoch": 3.708195047230023,
      "grad_norm": 0.22700534760951996,
      "learning_rate": 2.6006338830940345e-06,
      "loss": 0.0108,
      "step": 2265900
    },
    {
      "epoch": 3.708227777668676,
      "grad_norm": 0.3379793167114258,
      "learning_rate": 2.6005679908805177e-06,
      "loss": 0.0147,
      "step": 2265920
    },
    {
      "epoch": 3.7082605081073297,
      "grad_norm": 0.1355723887681961,
      "learning_rate": 2.600502098667001e-06,
      "loss": 0.0095,
      "step": 2265940
    },
    {
      "epoch": 3.7082932385459833,
      "grad_norm": 0.23751109838485718,
      "learning_rate": 2.600436206453484e-06,
      "loss": 0.0086,
      "step": 2265960
    },
    {
      "epoch": 3.7083259689846364,
      "grad_norm": 0.2735294699668884,
      "learning_rate": 2.6003703142399668e-06,
      "loss": 0.0107,
      "step": 2265980
    },
    {
      "epoch": 3.7083586994232896,
      "grad_norm": 0.8218542337417603,
      "learning_rate": 2.6003044220264495e-06,
      "loss": 0.0132,
      "step": 2266000
    },
    {
      "epoch": 3.708391429861943,
      "grad_norm": 0.21412527561187744,
      "learning_rate": 2.6002385298129323e-06,
      "loss": 0.0098,
      "step": 2266020
    },
    {
      "epoch": 3.7084241603005963,
      "grad_norm": 0.5836832523345947,
      "learning_rate": 2.600172637599415e-06,
      "loss": 0.0133,
      "step": 2266040
    },
    {
      "epoch": 3.7084568907392494,
      "grad_norm": 0.14612716436386108,
      "learning_rate": 2.600106745385898e-06,
      "loss": 0.0089,
      "step": 2266060
    },
    {
      "epoch": 3.708489621177903,
      "grad_norm": 0.14951562881469727,
      "learning_rate": 2.600040853172381e-06,
      "loss": 0.0109,
      "step": 2266080
    },
    {
      "epoch": 3.7085223516165566,
      "grad_norm": 0.12713617086410522,
      "learning_rate": 2.5999749609588636e-06,
      "loss": 0.0088,
      "step": 2266100
    },
    {
      "epoch": 3.70855508205521,
      "grad_norm": 0.051531195640563965,
      "learning_rate": 2.5999090687453464e-06,
      "loss": 0.0141,
      "step": 2266120
    },
    {
      "epoch": 3.708587812493863,
      "grad_norm": 0.16029520332813263,
      "learning_rate": 2.5998431765318295e-06,
      "loss": 0.0079,
      "step": 2266140
    },
    {
      "epoch": 3.7086205429325165,
      "grad_norm": 0.07787249237298965,
      "learning_rate": 2.5997772843183123e-06,
      "loss": 0.0096,
      "step": 2266160
    },
    {
      "epoch": 3.7086532733711697,
      "grad_norm": 0.5096417665481567,
      "learning_rate": 2.599711392104795e-06,
      "loss": 0.0101,
      "step": 2266180
    },
    {
      "epoch": 3.708686003809823,
      "grad_norm": 0.1698705404996872,
      "learning_rate": 2.5996454998912778e-06,
      "loss": 0.0106,
      "step": 2266200
    },
    {
      "epoch": 3.7087187342484764,
      "grad_norm": 0.07544605433940887,
      "learning_rate": 2.599579607677761e-06,
      "loss": 0.0146,
      "step": 2266220
    },
    {
      "epoch": 3.7087514646871296,
      "grad_norm": 0.092005155980587,
      "learning_rate": 2.5995137154642437e-06,
      "loss": 0.0128,
      "step": 2266240
    },
    {
      "epoch": 3.708784195125783,
      "grad_norm": 0.08759161084890366,
      "learning_rate": 2.5994478232507264e-06,
      "loss": 0.0098,
      "step": 2266260
    },
    {
      "epoch": 3.7088169255644363,
      "grad_norm": 0.2539505362510681,
      "learning_rate": 2.59938193103721e-06,
      "loss": 0.0112,
      "step": 2266280
    },
    {
      "epoch": 3.70884965600309,
      "grad_norm": 0.17123398184776306,
      "learning_rate": 2.5993160388236927e-06,
      "loss": 0.013,
      "step": 2266300
    },
    {
      "epoch": 3.708882386441743,
      "grad_norm": 0.4697761535644531,
      "learning_rate": 2.5992501466101755e-06,
      "loss": 0.012,
      "step": 2266320
    },
    {
      "epoch": 3.708915116880396,
      "grad_norm": 0.27741187810897827,
      "learning_rate": 2.599184254396658e-06,
      "loss": 0.0097,
      "step": 2266340
    },
    {
      "epoch": 3.70894784731905,
      "grad_norm": 0.20381034910678864,
      "learning_rate": 2.5991183621831414e-06,
      "loss": 0.0098,
      "step": 2266360
    },
    {
      "epoch": 3.708980577757703,
      "grad_norm": 0.12829194962978363,
      "learning_rate": 2.599052469969624e-06,
      "loss": 0.0125,
      "step": 2266380
    },
    {
      "epoch": 3.7090133081963566,
      "grad_norm": 0.26496997475624084,
      "learning_rate": 2.598986577756107e-06,
      "loss": 0.0084,
      "step": 2266400
    },
    {
      "epoch": 3.7090460386350097,
      "grad_norm": 0.15977811813354492,
      "learning_rate": 2.5989206855425896e-06,
      "loss": 0.0075,
      "step": 2266420
    },
    {
      "epoch": 3.7090787690736633,
      "grad_norm": 0.2866324484348297,
      "learning_rate": 2.5988547933290723e-06,
      "loss": 0.0117,
      "step": 2266440
    },
    {
      "epoch": 3.7091114995123164,
      "grad_norm": 0.39458903670310974,
      "learning_rate": 2.5987889011155555e-06,
      "loss": 0.0102,
      "step": 2266460
    },
    {
      "epoch": 3.7091442299509696,
      "grad_norm": 0.3699686527252197,
      "learning_rate": 2.5987230089020382e-06,
      "loss": 0.0088,
      "step": 2266480
    },
    {
      "epoch": 3.709176960389623,
      "grad_norm": 0.2600029706954956,
      "learning_rate": 2.598657116688521e-06,
      "loss": 0.0123,
      "step": 2266500
    },
    {
      "epoch": 3.7092096908282763,
      "grad_norm": 0.13408564031124115,
      "learning_rate": 2.5985912244750037e-06,
      "loss": 0.0136,
      "step": 2266520
    },
    {
      "epoch": 3.70924242126693,
      "grad_norm": 0.5964114665985107,
      "learning_rate": 2.598525332261487e-06,
      "loss": 0.0112,
      "step": 2266540
    },
    {
      "epoch": 3.709275151705583,
      "grad_norm": 0.09190483391284943,
      "learning_rate": 2.5984594400479696e-06,
      "loss": 0.0111,
      "step": 2266560
    },
    {
      "epoch": 3.7093078821442367,
      "grad_norm": 0.22298024594783783,
      "learning_rate": 2.5983935478344524e-06,
      "loss": 0.0119,
      "step": 2266580
    },
    {
      "epoch": 3.70934061258289,
      "grad_norm": 0.5622876286506653,
      "learning_rate": 2.598327655620935e-06,
      "loss": 0.008,
      "step": 2266600
    },
    {
      "epoch": 3.709373343021543,
      "grad_norm": 0.2897750735282898,
      "learning_rate": 2.5982617634074183e-06,
      "loss": 0.0077,
      "step": 2266620
    },
    {
      "epoch": 3.7094060734601966,
      "grad_norm": 0.1331809014081955,
      "learning_rate": 2.5981958711939014e-06,
      "loss": 0.0127,
      "step": 2266640
    },
    {
      "epoch": 3.7094388038988497,
      "grad_norm": 0.442350834608078,
      "learning_rate": 2.598129978980384e-06,
      "loss": 0.0153,
      "step": 2266660
    },
    {
      "epoch": 3.7094715343375033,
      "grad_norm": 0.33856749534606934,
      "learning_rate": 2.5980640867668673e-06,
      "loss": 0.0169,
      "step": 2266680
    },
    {
      "epoch": 3.7095042647761565,
      "grad_norm": 0.3217105567455292,
      "learning_rate": 2.59799819455335e-06,
      "loss": 0.0111,
      "step": 2266700
    },
    {
      "epoch": 3.70953699521481,
      "grad_norm": 0.07149644941091537,
      "learning_rate": 2.597932302339833e-06,
      "loss": 0.0112,
      "step": 2266720
    },
    {
      "epoch": 3.709569725653463,
      "grad_norm": 0.3634187579154968,
      "learning_rate": 2.5978664101263155e-06,
      "loss": 0.015,
      "step": 2266740
    },
    {
      "epoch": 3.7096024560921164,
      "grad_norm": 0.09037746489048004,
      "learning_rate": 2.5978005179127987e-06,
      "loss": 0.0089,
      "step": 2266760
    },
    {
      "epoch": 3.70963518653077,
      "grad_norm": 0.2496684193611145,
      "learning_rate": 2.5977346256992814e-06,
      "loss": 0.0127,
      "step": 2266780
    },
    {
      "epoch": 3.709667916969423,
      "grad_norm": 0.7803921699523926,
      "learning_rate": 2.597668733485764e-06,
      "loss": 0.0074,
      "step": 2266800
    },
    {
      "epoch": 3.7097006474080767,
      "grad_norm": 0.17593371868133545,
      "learning_rate": 2.597602841272247e-06,
      "loss": 0.0084,
      "step": 2266820
    },
    {
      "epoch": 3.70973337784673,
      "grad_norm": 0.17233771085739136,
      "learning_rate": 2.59753694905873e-06,
      "loss": 0.0116,
      "step": 2266840
    },
    {
      "epoch": 3.7097661082853834,
      "grad_norm": 0.15413245558738708,
      "learning_rate": 2.597471056845213e-06,
      "loss": 0.0101,
      "step": 2266860
    },
    {
      "epoch": 3.7097988387240366,
      "grad_norm": 0.07747219502925873,
      "learning_rate": 2.5974051646316956e-06,
      "loss": 0.0095,
      "step": 2266880
    },
    {
      "epoch": 3.7098315691626897,
      "grad_norm": 0.41805848479270935,
      "learning_rate": 2.5973392724181783e-06,
      "loss": 0.0117,
      "step": 2266900
    },
    {
      "epoch": 3.7098642996013433,
      "grad_norm": 0.3401545286178589,
      "learning_rate": 2.597273380204661e-06,
      "loss": 0.0095,
      "step": 2266920
    },
    {
      "epoch": 3.7098970300399965,
      "grad_norm": 0.2565597593784332,
      "learning_rate": 2.5972074879911442e-06,
      "loss": 0.0121,
      "step": 2266940
    },
    {
      "epoch": 3.70992976047865,
      "grad_norm": 0.06721695512533188,
      "learning_rate": 2.597141595777627e-06,
      "loss": 0.0129,
      "step": 2266960
    },
    {
      "epoch": 3.7099624909173032,
      "grad_norm": 0.17147022485733032,
      "learning_rate": 2.59707570356411e-06,
      "loss": 0.0106,
      "step": 2266980
    },
    {
      "epoch": 3.709995221355957,
      "grad_norm": 0.6526160836219788,
      "learning_rate": 2.5970098113505933e-06,
      "loss": 0.0091,
      "step": 2267000
    },
    {
      "epoch": 3.71002795179461,
      "grad_norm": 0.3268638253211975,
      "learning_rate": 2.596943919137076e-06,
      "loss": 0.0132,
      "step": 2267020
    },
    {
      "epoch": 3.710060682233263,
      "grad_norm": 0.6074493527412415,
      "learning_rate": 2.5968780269235588e-06,
      "loss": 0.0178,
      "step": 2267040
    },
    {
      "epoch": 3.7100934126719167,
      "grad_norm": 0.3047865331172943,
      "learning_rate": 2.5968121347100415e-06,
      "loss": 0.0107,
      "step": 2267060
    },
    {
      "epoch": 3.71012614311057,
      "grad_norm": 0.07698430120944977,
      "learning_rate": 2.5967462424965247e-06,
      "loss": 0.0132,
      "step": 2267080
    },
    {
      "epoch": 3.7101588735492235,
      "grad_norm": 0.15759988129138947,
      "learning_rate": 2.5966803502830074e-06,
      "loss": 0.0133,
      "step": 2267100
    },
    {
      "epoch": 3.7101916039878766,
      "grad_norm": 0.08680351078510284,
      "learning_rate": 2.59661445806949e-06,
      "loss": 0.0111,
      "step": 2267120
    },
    {
      "epoch": 3.71022433442653,
      "grad_norm": 0.2233499139547348,
      "learning_rate": 2.596548565855973e-06,
      "loss": 0.0113,
      "step": 2267140
    },
    {
      "epoch": 3.7102570648651834,
      "grad_norm": 0.23056398332118988,
      "learning_rate": 2.596482673642456e-06,
      "loss": 0.011,
      "step": 2267160
    },
    {
      "epoch": 3.7102897953038365,
      "grad_norm": 0.13108813762664795,
      "learning_rate": 2.5964167814289388e-06,
      "loss": 0.0084,
      "step": 2267180
    },
    {
      "epoch": 3.71032252574249,
      "grad_norm": 0.24672675132751465,
      "learning_rate": 2.5963508892154215e-06,
      "loss": 0.0111,
      "step": 2267200
    },
    {
      "epoch": 3.7103552561811433,
      "grad_norm": 0.1489311009645462,
      "learning_rate": 2.5962849970019043e-06,
      "loss": 0.0113,
      "step": 2267220
    },
    {
      "epoch": 3.7103879866197964,
      "grad_norm": 0.09776589274406433,
      "learning_rate": 2.5962191047883874e-06,
      "loss": 0.0111,
      "step": 2267240
    },
    {
      "epoch": 3.71042071705845,
      "grad_norm": 0.2428756207227707,
      "learning_rate": 2.59615321257487e-06,
      "loss": 0.0107,
      "step": 2267260
    },
    {
      "epoch": 3.7104534474971036,
      "grad_norm": 0.24211075901985168,
      "learning_rate": 2.596087320361353e-06,
      "loss": 0.0153,
      "step": 2267280
    },
    {
      "epoch": 3.7104861779357567,
      "grad_norm": 0.22691485285758972,
      "learning_rate": 2.5960214281478356e-06,
      "loss": 0.0098,
      "step": 2267300
    },
    {
      "epoch": 3.71051890837441,
      "grad_norm": 0.29088401794433594,
      "learning_rate": 2.5959555359343184e-06,
      "loss": 0.015,
      "step": 2267320
    },
    {
      "epoch": 3.7105516388130635,
      "grad_norm": 0.11107313632965088,
      "learning_rate": 2.595889643720802e-06,
      "loss": 0.0082,
      "step": 2267340
    },
    {
      "epoch": 3.7105843692517166,
      "grad_norm": 0.3530469238758087,
      "learning_rate": 2.5958237515072847e-06,
      "loss": 0.0116,
      "step": 2267360
    },
    {
      "epoch": 3.71061709969037,
      "grad_norm": 0.18629153072834015,
      "learning_rate": 2.595757859293768e-06,
      "loss": 0.0085,
      "step": 2267380
    },
    {
      "epoch": 3.7106498301290234,
      "grad_norm": 0.4363214373588562,
      "learning_rate": 2.5956919670802506e-06,
      "loss": 0.01,
      "step": 2267400
    },
    {
      "epoch": 3.710682560567677,
      "grad_norm": 0.3234868347644806,
      "learning_rate": 2.5956260748667334e-06,
      "loss": 0.0098,
      "step": 2267420
    },
    {
      "epoch": 3.71071529100633,
      "grad_norm": 0.22290757298469543,
      "learning_rate": 2.595560182653216e-06,
      "loss": 0.0119,
      "step": 2267440
    },
    {
      "epoch": 3.7107480214449833,
      "grad_norm": 0.2572319805622101,
      "learning_rate": 2.595494290439699e-06,
      "loss": 0.0096,
      "step": 2267460
    },
    {
      "epoch": 3.710780751883637,
      "grad_norm": 0.4172881543636322,
      "learning_rate": 2.595428398226182e-06,
      "loss": 0.0108,
      "step": 2267480
    },
    {
      "epoch": 3.71081348232229,
      "grad_norm": 0.15978798270225525,
      "learning_rate": 2.5953625060126647e-06,
      "loss": 0.0089,
      "step": 2267500
    },
    {
      "epoch": 3.710846212760943,
      "grad_norm": 1.1569998264312744,
      "learning_rate": 2.5952966137991475e-06,
      "loss": 0.009,
      "step": 2267520
    },
    {
      "epoch": 3.7108789431995968,
      "grad_norm": 0.20544248819351196,
      "learning_rate": 2.5952307215856302e-06,
      "loss": 0.0144,
      "step": 2267540
    },
    {
      "epoch": 3.7109116736382504,
      "grad_norm": 0.3209584951400757,
      "learning_rate": 2.5951648293721134e-06,
      "loss": 0.014,
      "step": 2267560
    },
    {
      "epoch": 3.7109444040769035,
      "grad_norm": 0.21800263226032257,
      "learning_rate": 2.595098937158596e-06,
      "loss": 0.0105,
      "step": 2267580
    },
    {
      "epoch": 3.7109771345155567,
      "grad_norm": 0.17879131436347961,
      "learning_rate": 2.595033044945079e-06,
      "loss": 0.0098,
      "step": 2267600
    },
    {
      "epoch": 3.7110098649542103,
      "grad_norm": 0.39253896474838257,
      "learning_rate": 2.5949671527315616e-06,
      "loss": 0.0092,
      "step": 2267620
    },
    {
      "epoch": 3.7110425953928634,
      "grad_norm": 0.7026441693305969,
      "learning_rate": 2.5949012605180448e-06,
      "loss": 0.0141,
      "step": 2267640
    },
    {
      "epoch": 3.7110753258315166,
      "grad_norm": 0.17020666599273682,
      "learning_rate": 2.5948353683045275e-06,
      "loss": 0.0164,
      "step": 2267660
    },
    {
      "epoch": 3.71110805627017,
      "grad_norm": 0.14771796762943268,
      "learning_rate": 2.5947694760910102e-06,
      "loss": 0.0109,
      "step": 2267680
    },
    {
      "epoch": 3.7111407867088237,
      "grad_norm": 0.39178916811943054,
      "learning_rate": 2.594703583877494e-06,
      "loss": 0.0078,
      "step": 2267700
    },
    {
      "epoch": 3.711173517147477,
      "grad_norm": 0.14903299510478973,
      "learning_rate": 2.5946376916639766e-06,
      "loss": 0.0094,
      "step": 2267720
    },
    {
      "epoch": 3.71120624758613,
      "grad_norm": 0.5176839232444763,
      "learning_rate": 2.5945717994504593e-06,
      "loss": 0.0151,
      "step": 2267740
    },
    {
      "epoch": 3.7112389780247836,
      "grad_norm": 0.9179659485816956,
      "learning_rate": 2.594505907236942e-06,
      "loss": 0.012,
      "step": 2267760
    },
    {
      "epoch": 3.711271708463437,
      "grad_norm": 0.1089225709438324,
      "learning_rate": 2.594440015023425e-06,
      "loss": 0.0108,
      "step": 2267780
    },
    {
      "epoch": 3.71130443890209,
      "grad_norm": 0.20178170502185822,
      "learning_rate": 2.594374122809908e-06,
      "loss": 0.0091,
      "step": 2267800
    },
    {
      "epoch": 3.7113371693407435,
      "grad_norm": 0.16679975390434265,
      "learning_rate": 2.5943082305963907e-06,
      "loss": 0.0095,
      "step": 2267820
    },
    {
      "epoch": 3.7113698997793967,
      "grad_norm": 0.1734222173690796,
      "learning_rate": 2.5942423383828734e-06,
      "loss": 0.0079,
      "step": 2267840
    },
    {
      "epoch": 3.7114026302180503,
      "grad_norm": 0.24740298092365265,
      "learning_rate": 2.594176446169356e-06,
      "loss": 0.0107,
      "step": 2267860
    },
    {
      "epoch": 3.7114353606567034,
      "grad_norm": 0.20448383688926697,
      "learning_rate": 2.5941105539558393e-06,
      "loss": 0.012,
      "step": 2267880
    },
    {
      "epoch": 3.711468091095357,
      "grad_norm": 0.030236590653657913,
      "learning_rate": 2.594044661742322e-06,
      "loss": 0.0154,
      "step": 2267900
    },
    {
      "epoch": 3.71150082153401,
      "grad_norm": 0.21974222362041473,
      "learning_rate": 2.593978769528805e-06,
      "loss": 0.0074,
      "step": 2267920
    },
    {
      "epoch": 3.7115335519726633,
      "grad_norm": 0.4667561650276184,
      "learning_rate": 2.5939128773152876e-06,
      "loss": 0.0162,
      "step": 2267940
    },
    {
      "epoch": 3.711566282411317,
      "grad_norm": 0.26175081729888916,
      "learning_rate": 2.5938469851017707e-06,
      "loss": 0.0106,
      "step": 2267960
    },
    {
      "epoch": 3.71159901284997,
      "grad_norm": 0.48213765025138855,
      "learning_rate": 2.5937810928882535e-06,
      "loss": 0.0079,
      "step": 2267980
    },
    {
      "epoch": 3.7116317432886237,
      "grad_norm": 0.035466138273477554,
      "learning_rate": 2.593715200674736e-06,
      "loss": 0.0076,
      "step": 2268000
    },
    {
      "epoch": 3.711664473727277,
      "grad_norm": 0.4270799458026886,
      "learning_rate": 2.593649308461219e-06,
      "loss": 0.008,
      "step": 2268020
    },
    {
      "epoch": 3.7116972041659304,
      "grad_norm": 0.1886342316865921,
      "learning_rate": 2.5935834162477025e-06,
      "loss": 0.0073,
      "step": 2268040
    },
    {
      "epoch": 3.7117299346045836,
      "grad_norm": 0.21656262874603271,
      "learning_rate": 2.5935175240341853e-06,
      "loss": 0.0113,
      "step": 2268060
    },
    {
      "epoch": 3.7117626650432367,
      "grad_norm": 0.3317703902721405,
      "learning_rate": 2.593451631820668e-06,
      "loss": 0.0149,
      "step": 2268080
    },
    {
      "epoch": 3.7117953954818903,
      "grad_norm": 0.4311399757862091,
      "learning_rate": 2.593385739607151e-06,
      "loss": 0.0114,
      "step": 2268100
    },
    {
      "epoch": 3.7118281259205435,
      "grad_norm": 0.1570233553647995,
      "learning_rate": 2.593319847393634e-06,
      "loss": 0.01,
      "step": 2268120
    },
    {
      "epoch": 3.711860856359197,
      "grad_norm": 0.35866156220436096,
      "learning_rate": 2.5932539551801166e-06,
      "loss": 0.0102,
      "step": 2268140
    },
    {
      "epoch": 3.71189358679785,
      "grad_norm": 0.13652744889259338,
      "learning_rate": 2.5931880629665994e-06,
      "loss": 0.0071,
      "step": 2268160
    },
    {
      "epoch": 3.711926317236504,
      "grad_norm": 0.20636171102523804,
      "learning_rate": 2.5931221707530825e-06,
      "loss": 0.0172,
      "step": 2268180
    },
    {
      "epoch": 3.711959047675157,
      "grad_norm": 0.13408690690994263,
      "learning_rate": 2.5930562785395653e-06,
      "loss": 0.0091,
      "step": 2268200
    },
    {
      "epoch": 3.71199177811381,
      "grad_norm": 0.4234261214733124,
      "learning_rate": 2.592990386326048e-06,
      "loss": 0.0121,
      "step": 2268220
    },
    {
      "epoch": 3.7120245085524637,
      "grad_norm": 0.22430147230625153,
      "learning_rate": 2.5929244941125308e-06,
      "loss": 0.0093,
      "step": 2268240
    },
    {
      "epoch": 3.712057238991117,
      "grad_norm": 0.1404016762971878,
      "learning_rate": 2.592858601899014e-06,
      "loss": 0.0097,
      "step": 2268260
    },
    {
      "epoch": 3.7120899694297704,
      "grad_norm": 0.5583996176719666,
      "learning_rate": 2.5927927096854967e-06,
      "loss": 0.0101,
      "step": 2268280
    },
    {
      "epoch": 3.7121226998684236,
      "grad_norm": 0.15587393939495087,
      "learning_rate": 2.5927268174719794e-06,
      "loss": 0.0127,
      "step": 2268300
    },
    {
      "epoch": 3.712155430307077,
      "grad_norm": 0.2141171246767044,
      "learning_rate": 2.592660925258462e-06,
      "loss": 0.0101,
      "step": 2268320
    },
    {
      "epoch": 3.7121881607457303,
      "grad_norm": 0.6073476076126099,
      "learning_rate": 2.592595033044945e-06,
      "loss": 0.0117,
      "step": 2268340
    },
    {
      "epoch": 3.7122208911843835,
      "grad_norm": 0.3206230103969574,
      "learning_rate": 2.592529140831428e-06,
      "loss": 0.0122,
      "step": 2268360
    },
    {
      "epoch": 3.712253621623037,
      "grad_norm": 0.37910518050193787,
      "learning_rate": 2.592463248617911e-06,
      "loss": 0.0119,
      "step": 2268380
    },
    {
      "epoch": 3.71228635206169,
      "grad_norm": 1.5271258354187012,
      "learning_rate": 2.592397356404394e-06,
      "loss": 0.0125,
      "step": 2268400
    },
    {
      "epoch": 3.712319082500344,
      "grad_norm": 0.39679205417633057,
      "learning_rate": 2.592331464190877e-06,
      "loss": 0.0082,
      "step": 2268420
    },
    {
      "epoch": 3.712351812938997,
      "grad_norm": 0.14649555087089539,
      "learning_rate": 2.59226557197736e-06,
      "loss": 0.007,
      "step": 2268440
    },
    {
      "epoch": 3.7123845433776506,
      "grad_norm": 0.3938901126384735,
      "learning_rate": 2.5921996797638426e-06,
      "loss": 0.0123,
      "step": 2268460
    },
    {
      "epoch": 3.7124172738163037,
      "grad_norm": 0.38349565863609314,
      "learning_rate": 2.5921337875503253e-06,
      "loss": 0.0097,
      "step": 2268480
    },
    {
      "epoch": 3.712450004254957,
      "grad_norm": 0.49963614344596863,
      "learning_rate": 2.5920678953368085e-06,
      "loss": 0.0108,
      "step": 2268500
    },
    {
      "epoch": 3.7124827346936105,
      "grad_norm": 0.042214490473270416,
      "learning_rate": 2.5920020031232912e-06,
      "loss": 0.0157,
      "step": 2268520
    },
    {
      "epoch": 3.7125154651322636,
      "grad_norm": 0.1852409690618515,
      "learning_rate": 2.591936110909774e-06,
      "loss": 0.0084,
      "step": 2268540
    },
    {
      "epoch": 3.712548195570917,
      "grad_norm": 0.07434238493442535,
      "learning_rate": 2.5918702186962567e-06,
      "loss": 0.0122,
      "step": 2268560
    },
    {
      "epoch": 3.7125809260095703,
      "grad_norm": 0.48229917883872986,
      "learning_rate": 2.59180432648274e-06,
      "loss": 0.0123,
      "step": 2268580
    },
    {
      "epoch": 3.712613656448224,
      "grad_norm": 0.19965794682502747,
      "learning_rate": 2.5917384342692226e-06,
      "loss": 0.0177,
      "step": 2268600
    },
    {
      "epoch": 3.712646386886877,
      "grad_norm": 0.1046220138669014,
      "learning_rate": 2.5916725420557054e-06,
      "loss": 0.0103,
      "step": 2268620
    },
    {
      "epoch": 3.7126791173255302,
      "grad_norm": 0.13453301787376404,
      "learning_rate": 2.591606649842188e-06,
      "loss": 0.0109,
      "step": 2268640
    },
    {
      "epoch": 3.712711847764184,
      "grad_norm": 0.22614255547523499,
      "learning_rate": 2.5915407576286713e-06,
      "loss": 0.0088,
      "step": 2268660
    },
    {
      "epoch": 3.712744578202837,
      "grad_norm": 0.3740738034248352,
      "learning_rate": 2.591474865415154e-06,
      "loss": 0.0126,
      "step": 2268680
    },
    {
      "epoch": 3.71277730864149,
      "grad_norm": 0.35610514879226685,
      "learning_rate": 2.5914089732016367e-06,
      "loss": 0.0101,
      "step": 2268700
    },
    {
      "epoch": 3.7128100390801437,
      "grad_norm": 0.31159889698028564,
      "learning_rate": 2.5913430809881195e-06,
      "loss": 0.0085,
      "step": 2268720
    },
    {
      "epoch": 3.7128427695187973,
      "grad_norm": 0.3146885931491852,
      "learning_rate": 2.591277188774603e-06,
      "loss": 0.0075,
      "step": 2268740
    },
    {
      "epoch": 3.7128754999574505,
      "grad_norm": 0.5251352190971375,
      "learning_rate": 2.591211296561086e-06,
      "loss": 0.0118,
      "step": 2268760
    },
    {
      "epoch": 3.7129082303961036,
      "grad_norm": 0.32235294580459595,
      "learning_rate": 2.5911454043475686e-06,
      "loss": 0.0143,
      "step": 2268780
    },
    {
      "epoch": 3.712940960834757,
      "grad_norm": 0.3959839344024658,
      "learning_rate": 2.5910795121340517e-06,
      "loss": 0.0203,
      "step": 2268800
    },
    {
      "epoch": 3.7129736912734104,
      "grad_norm": 0.2634916603565216,
      "learning_rate": 2.5910136199205345e-06,
      "loss": 0.0116,
      "step": 2268820
    },
    {
      "epoch": 3.7130064217120635,
      "grad_norm": 0.2318887561559677,
      "learning_rate": 2.590947727707017e-06,
      "loss": 0.0088,
      "step": 2268840
    },
    {
      "epoch": 3.713039152150717,
      "grad_norm": 0.21952824294567108,
      "learning_rate": 2.5908818354935e-06,
      "loss": 0.0074,
      "step": 2268860
    },
    {
      "epoch": 3.7130718825893707,
      "grad_norm": 0.19840684533119202,
      "learning_rate": 2.5908159432799827e-06,
      "loss": 0.0099,
      "step": 2268880
    },
    {
      "epoch": 3.713104613028024,
      "grad_norm": 0.14587566256523132,
      "learning_rate": 2.590750051066466e-06,
      "loss": 0.008,
      "step": 2268900
    },
    {
      "epoch": 3.713137343466677,
      "grad_norm": 0.22363238036632538,
      "learning_rate": 2.5906841588529486e-06,
      "loss": 0.0126,
      "step": 2268920
    },
    {
      "epoch": 3.7131700739053306,
      "grad_norm": 0.479321151971817,
      "learning_rate": 2.5906182666394313e-06,
      "loss": 0.0101,
      "step": 2268940
    },
    {
      "epoch": 3.7132028043439838,
      "grad_norm": 0.09454765170812607,
      "learning_rate": 2.590552374425914e-06,
      "loss": 0.0087,
      "step": 2268960
    },
    {
      "epoch": 3.713235534782637,
      "grad_norm": 0.23617513477802277,
      "learning_rate": 2.5904864822123972e-06,
      "loss": 0.0118,
      "step": 2268980
    },
    {
      "epoch": 3.7132682652212905,
      "grad_norm": 0.18618988990783691,
      "learning_rate": 2.59042058999888e-06,
      "loss": 0.0121,
      "step": 2269000
    },
    {
      "epoch": 3.713300995659944,
      "grad_norm": 0.24638864398002625,
      "learning_rate": 2.5903546977853627e-06,
      "loss": 0.0116,
      "step": 2269020
    },
    {
      "epoch": 3.7133337260985972,
      "grad_norm": 0.14107045531272888,
      "learning_rate": 2.5902888055718454e-06,
      "loss": 0.0132,
      "step": 2269040
    },
    {
      "epoch": 3.7133664565372504,
      "grad_norm": 0.11693308502435684,
      "learning_rate": 2.5902229133583286e-06,
      "loss": 0.0098,
      "step": 2269060
    },
    {
      "epoch": 3.713399186975904,
      "grad_norm": 0.4836483299732208,
      "learning_rate": 2.5901570211448113e-06,
      "loss": 0.0133,
      "step": 2269080
    },
    {
      "epoch": 3.713431917414557,
      "grad_norm": 0.2824225127696991,
      "learning_rate": 2.5900911289312945e-06,
      "loss": 0.0115,
      "step": 2269100
    },
    {
      "epoch": 3.7134646478532103,
      "grad_norm": 0.14713910222053528,
      "learning_rate": 2.5900252367177777e-06,
      "loss": 0.0129,
      "step": 2269120
    },
    {
      "epoch": 3.713497378291864,
      "grad_norm": 0.284970760345459,
      "learning_rate": 2.5899593445042604e-06,
      "loss": 0.0088,
      "step": 2269140
    },
    {
      "epoch": 3.7135301087305175,
      "grad_norm": 0.174360990524292,
      "learning_rate": 2.589893452290743e-06,
      "loss": 0.0095,
      "step": 2269160
    },
    {
      "epoch": 3.7135628391691706,
      "grad_norm": 0.26070159673690796,
      "learning_rate": 2.589827560077226e-06,
      "loss": 0.0116,
      "step": 2269180
    },
    {
      "epoch": 3.7135955696078238,
      "grad_norm": 0.2742266356945038,
      "learning_rate": 2.589761667863709e-06,
      "loss": 0.0125,
      "step": 2269200
    },
    {
      "epoch": 3.7136283000464774,
      "grad_norm": 0.044223882257938385,
      "learning_rate": 2.589695775650192e-06,
      "loss": 0.0069,
      "step": 2269220
    },
    {
      "epoch": 3.7136610304851305,
      "grad_norm": 0.4579731225967407,
      "learning_rate": 2.5896298834366745e-06,
      "loss": 0.0086,
      "step": 2269240
    },
    {
      "epoch": 3.7136937609237837,
      "grad_norm": 0.5121051669120789,
      "learning_rate": 2.5895639912231573e-06,
      "loss": 0.0094,
      "step": 2269260
    },
    {
      "epoch": 3.7137264913624373,
      "grad_norm": 0.17607906460762024,
      "learning_rate": 2.58949809900964e-06,
      "loss": 0.0104,
      "step": 2269280
    },
    {
      "epoch": 3.7137592218010904,
      "grad_norm": 0.060587603598833084,
      "learning_rate": 2.589432206796123e-06,
      "loss": 0.0081,
      "step": 2269300
    },
    {
      "epoch": 3.713791952239744,
      "grad_norm": 0.11490508168935776,
      "learning_rate": 2.589366314582606e-06,
      "loss": 0.0089,
      "step": 2269320
    },
    {
      "epoch": 3.713824682678397,
      "grad_norm": 1.1464120149612427,
      "learning_rate": 2.5893004223690887e-06,
      "loss": 0.0074,
      "step": 2269340
    },
    {
      "epoch": 3.7138574131170508,
      "grad_norm": 0.2854444086551666,
      "learning_rate": 2.5892345301555714e-06,
      "loss": 0.0091,
      "step": 2269360
    },
    {
      "epoch": 3.713890143555704,
      "grad_norm": 0.4812743663787842,
      "learning_rate": 2.5891686379420546e-06,
      "loss": 0.0086,
      "step": 2269380
    },
    {
      "epoch": 3.713922873994357,
      "grad_norm": 0.5271369218826294,
      "learning_rate": 2.5891027457285373e-06,
      "loss": 0.0121,
      "step": 2269400
    },
    {
      "epoch": 3.7139556044330106,
      "grad_norm": 0.09213598817586899,
      "learning_rate": 2.58903685351502e-06,
      "loss": 0.0129,
      "step": 2269420
    },
    {
      "epoch": 3.713988334871664,
      "grad_norm": 0.18787313997745514,
      "learning_rate": 2.5889709613015028e-06,
      "loss": 0.0128,
      "step": 2269440
    },
    {
      "epoch": 3.7140210653103174,
      "grad_norm": 0.12261784821748734,
      "learning_rate": 2.5889050690879864e-06,
      "loss": 0.0129,
      "step": 2269460
    },
    {
      "epoch": 3.7140537957489705,
      "grad_norm": 0.04050976037979126,
      "learning_rate": 2.588839176874469e-06,
      "loss": 0.0116,
      "step": 2269480
    },
    {
      "epoch": 3.714086526187624,
      "grad_norm": 0.2614721953868866,
      "learning_rate": 2.588773284660952e-06,
      "loss": 0.013,
      "step": 2269500
    },
    {
      "epoch": 3.7141192566262773,
      "grad_norm": 0.11495570093393326,
      "learning_rate": 2.588707392447435e-06,
      "loss": 0.0061,
      "step": 2269520
    },
    {
      "epoch": 3.7141519870649304,
      "grad_norm": 0.1811654418706894,
      "learning_rate": 2.5886415002339177e-06,
      "loss": 0.0123,
      "step": 2269540
    },
    {
      "epoch": 3.714184717503584,
      "grad_norm": 0.15920649468898773,
      "learning_rate": 2.5885756080204005e-06,
      "loss": 0.0139,
      "step": 2269560
    },
    {
      "epoch": 3.714217447942237,
      "grad_norm": 0.2307879775762558,
      "learning_rate": 2.5885097158068832e-06,
      "loss": 0.0151,
      "step": 2269580
    },
    {
      "epoch": 3.7142501783808908,
      "grad_norm": 0.3050953447818756,
      "learning_rate": 2.5884438235933664e-06,
      "loss": 0.0107,
      "step": 2269600
    },
    {
      "epoch": 3.714282908819544,
      "grad_norm": 0.07002618908882141,
      "learning_rate": 2.588377931379849e-06,
      "loss": 0.0082,
      "step": 2269620
    },
    {
      "epoch": 3.7143156392581975,
      "grad_norm": 0.327322393655777,
      "learning_rate": 2.588312039166332e-06,
      "loss": 0.0088,
      "step": 2269640
    },
    {
      "epoch": 3.7143483696968507,
      "grad_norm": 0.20916473865509033,
      "learning_rate": 2.5882461469528146e-06,
      "loss": 0.008,
      "step": 2269660
    },
    {
      "epoch": 3.714381100135504,
      "grad_norm": 0.5327247977256775,
      "learning_rate": 2.5881802547392978e-06,
      "loss": 0.0139,
      "step": 2269680
    },
    {
      "epoch": 3.7144138305741574,
      "grad_norm": 0.2862924635410309,
      "learning_rate": 2.5881143625257805e-06,
      "loss": 0.0151,
      "step": 2269700
    },
    {
      "epoch": 3.7144465610128106,
      "grad_norm": 0.17627498507499695,
      "learning_rate": 2.5880484703122632e-06,
      "loss": 0.0142,
      "step": 2269720
    },
    {
      "epoch": 3.714479291451464,
      "grad_norm": 0.10830976068973541,
      "learning_rate": 2.587982578098746e-06,
      "loss": 0.0112,
      "step": 2269740
    },
    {
      "epoch": 3.7145120218901173,
      "grad_norm": 0.13033996522426605,
      "learning_rate": 2.5879166858852287e-06,
      "loss": 0.0081,
      "step": 2269760
    },
    {
      "epoch": 3.714544752328771,
      "grad_norm": 0.14076432585716248,
      "learning_rate": 2.587850793671712e-06,
      "loss": 0.0096,
      "step": 2269780
    },
    {
      "epoch": 3.714577482767424,
      "grad_norm": 0.07313873618841171,
      "learning_rate": 2.587784901458195e-06,
      "loss": 0.0096,
      "step": 2269800
    },
    {
      "epoch": 3.714610213206077,
      "grad_norm": 0.29482629895210266,
      "learning_rate": 2.587719009244678e-06,
      "loss": 0.0098,
      "step": 2269820
    },
    {
      "epoch": 3.714642943644731,
      "grad_norm": 0.24364836513996124,
      "learning_rate": 2.587653117031161e-06,
      "loss": 0.0119,
      "step": 2269840
    },
    {
      "epoch": 3.714675674083384,
      "grad_norm": 0.21307477355003357,
      "learning_rate": 2.5875872248176437e-06,
      "loss": 0.0107,
      "step": 2269860
    },
    {
      "epoch": 3.7147084045220375,
      "grad_norm": 0.26508042216300964,
      "learning_rate": 2.5875213326041264e-06,
      "loss": 0.0087,
      "step": 2269880
    },
    {
      "epoch": 3.7147411349606907,
      "grad_norm": 0.40499362349510193,
      "learning_rate": 2.587455440390609e-06,
      "loss": 0.0139,
      "step": 2269900
    },
    {
      "epoch": 3.7147738653993443,
      "grad_norm": 0.433967262506485,
      "learning_rate": 2.5873895481770923e-06,
      "loss": 0.0122,
      "step": 2269920
    },
    {
      "epoch": 3.7148065958379974,
      "grad_norm": 0.28808191418647766,
      "learning_rate": 2.587323655963575e-06,
      "loss": 0.0091,
      "step": 2269940
    },
    {
      "epoch": 3.7148393262766506,
      "grad_norm": 0.13994066417217255,
      "learning_rate": 2.587257763750058e-06,
      "loss": 0.0109,
      "step": 2269960
    },
    {
      "epoch": 3.714872056715304,
      "grad_norm": 0.16647516191005707,
      "learning_rate": 2.5871918715365406e-06,
      "loss": 0.0092,
      "step": 2269980
    },
    {
      "epoch": 3.7149047871539573,
      "grad_norm": 0.5872595906257629,
      "learning_rate": 2.5871259793230237e-06,
      "loss": 0.0116,
      "step": 2270000
    },
    {
      "epoch": 3.714937517592611,
      "grad_norm": 0.11594393104314804,
      "learning_rate": 2.5870600871095065e-06,
      "loss": 0.0073,
      "step": 2270020
    },
    {
      "epoch": 3.714970248031264,
      "grad_norm": 0.06909511238336563,
      "learning_rate": 2.586994194895989e-06,
      "loss": 0.0106,
      "step": 2270040
    },
    {
      "epoch": 3.7150029784699177,
      "grad_norm": 1.1167594194412231,
      "learning_rate": 2.586928302682472e-06,
      "loss": 0.0099,
      "step": 2270060
    },
    {
      "epoch": 3.715035708908571,
      "grad_norm": 0.37206193804740906,
      "learning_rate": 2.586862410468955e-06,
      "loss": 0.014,
      "step": 2270080
    },
    {
      "epoch": 3.715068439347224,
      "grad_norm": 0.3821907937526703,
      "learning_rate": 2.586796518255438e-06,
      "loss": 0.0113,
      "step": 2270100
    },
    {
      "epoch": 3.7151011697858776,
      "grad_norm": 0.4307532012462616,
      "learning_rate": 2.5867306260419206e-06,
      "loss": 0.0174,
      "step": 2270120
    },
    {
      "epoch": 3.7151339002245307,
      "grad_norm": 0.16740632057189941,
      "learning_rate": 2.5866647338284033e-06,
      "loss": 0.0071,
      "step": 2270140
    },
    {
      "epoch": 3.7151666306631843,
      "grad_norm": 0.1881154328584671,
      "learning_rate": 2.586598841614887e-06,
      "loss": 0.0146,
      "step": 2270160
    },
    {
      "epoch": 3.7151993611018375,
      "grad_norm": 0.10738164186477661,
      "learning_rate": 2.5865329494013697e-06,
      "loss": 0.0129,
      "step": 2270180
    },
    {
      "epoch": 3.715232091540491,
      "grad_norm": 0.2224794179201126,
      "learning_rate": 2.5864670571878524e-06,
      "loss": 0.0083,
      "step": 2270200
    },
    {
      "epoch": 3.715264821979144,
      "grad_norm": 0.5319948196411133,
      "learning_rate": 2.5864011649743356e-06,
      "loss": 0.0109,
      "step": 2270220
    },
    {
      "epoch": 3.7152975524177974,
      "grad_norm": 0.6039097905158997,
      "learning_rate": 2.5863352727608183e-06,
      "loss": 0.0092,
      "step": 2270240
    },
    {
      "epoch": 3.715330282856451,
      "grad_norm": 0.31326422095298767,
      "learning_rate": 2.586269380547301e-06,
      "loss": 0.0148,
      "step": 2270260
    },
    {
      "epoch": 3.715363013295104,
      "grad_norm": 0.2339818924665451,
      "learning_rate": 2.5862034883337838e-06,
      "loss": 0.0099,
      "step": 2270280
    },
    {
      "epoch": 3.7153957437337573,
      "grad_norm": 0.1819920837879181,
      "learning_rate": 2.5861375961202665e-06,
      "loss": 0.009,
      "step": 2270300
    },
    {
      "epoch": 3.715428474172411,
      "grad_norm": 0.8951243162155151,
      "learning_rate": 2.5860717039067497e-06,
      "loss": 0.0164,
      "step": 2270320
    },
    {
      "epoch": 3.7154612046110644,
      "grad_norm": 0.14638911187648773,
      "learning_rate": 2.5860058116932324e-06,
      "loss": 0.0112,
      "step": 2270340
    },
    {
      "epoch": 3.7154939350497176,
      "grad_norm": 0.08327533304691315,
      "learning_rate": 2.585939919479715e-06,
      "loss": 0.01,
      "step": 2270360
    },
    {
      "epoch": 3.7155266654883707,
      "grad_norm": 0.17459793388843536,
      "learning_rate": 2.585874027266198e-06,
      "loss": 0.012,
      "step": 2270380
    },
    {
      "epoch": 3.7155593959270243,
      "grad_norm": 0.42471015453338623,
      "learning_rate": 2.585808135052681e-06,
      "loss": 0.0097,
      "step": 2270400
    },
    {
      "epoch": 3.7155921263656775,
      "grad_norm": 0.23318324983119965,
      "learning_rate": 2.585742242839164e-06,
      "loss": 0.0109,
      "step": 2270420
    },
    {
      "epoch": 3.7156248568043306,
      "grad_norm": 0.16615161299705505,
      "learning_rate": 2.5856763506256465e-06,
      "loss": 0.012,
      "step": 2270440
    },
    {
      "epoch": 3.7156575872429842,
      "grad_norm": 0.3794088363647461,
      "learning_rate": 2.5856104584121293e-06,
      "loss": 0.0097,
      "step": 2270460
    },
    {
      "epoch": 3.715690317681638,
      "grad_norm": 0.26458483934402466,
      "learning_rate": 2.5855445661986124e-06,
      "loss": 0.0087,
      "step": 2270480
    },
    {
      "epoch": 3.715723048120291,
      "grad_norm": 0.24743179976940155,
      "learning_rate": 2.5854786739850956e-06,
      "loss": 0.0129,
      "step": 2270500
    },
    {
      "epoch": 3.715755778558944,
      "grad_norm": 1.7370219230651855,
      "learning_rate": 2.5854127817715783e-06,
      "loss": 0.0123,
      "step": 2270520
    },
    {
      "epoch": 3.7157885089975977,
      "grad_norm": 0.22158613801002502,
      "learning_rate": 2.5853468895580615e-06,
      "loss": 0.0104,
      "step": 2270540
    },
    {
      "epoch": 3.715821239436251,
      "grad_norm": 0.17674526572227478,
      "learning_rate": 2.5852809973445442e-06,
      "loss": 0.0077,
      "step": 2270560
    },
    {
      "epoch": 3.715853969874904,
      "grad_norm": 0.0704834833741188,
      "learning_rate": 2.585215105131027e-06,
      "loss": 0.0105,
      "step": 2270580
    },
    {
      "epoch": 3.7158867003135576,
      "grad_norm": 0.6103295683860779,
      "learning_rate": 2.5851492129175097e-06,
      "loss": 0.0086,
      "step": 2270600
    },
    {
      "epoch": 3.715919430752211,
      "grad_norm": 0.21708683669567108,
      "learning_rate": 2.585083320703993e-06,
      "loss": 0.0144,
      "step": 2270620
    },
    {
      "epoch": 3.7159521611908644,
      "grad_norm": 0.15759330987930298,
      "learning_rate": 2.5850174284904756e-06,
      "loss": 0.0163,
      "step": 2270640
    },
    {
      "epoch": 3.7159848916295175,
      "grad_norm": 0.09131603688001633,
      "learning_rate": 2.5849515362769584e-06,
      "loss": 0.0093,
      "step": 2270660
    },
    {
      "epoch": 3.716017622068171,
      "grad_norm": 0.22314949333667755,
      "learning_rate": 2.584885644063441e-06,
      "loss": 0.0095,
      "step": 2270680
    },
    {
      "epoch": 3.7160503525068243,
      "grad_norm": 0.05847572162747383,
      "learning_rate": 2.5848197518499243e-06,
      "loss": 0.0078,
      "step": 2270700
    },
    {
      "epoch": 3.7160830829454774,
      "grad_norm": 0.35027971863746643,
      "learning_rate": 2.584753859636407e-06,
      "loss": 0.0116,
      "step": 2270720
    },
    {
      "epoch": 3.716115813384131,
      "grad_norm": 0.36844804883003235,
      "learning_rate": 2.5846879674228898e-06,
      "loss": 0.0122,
      "step": 2270740
    },
    {
      "epoch": 3.7161485438227846,
      "grad_norm": 0.2705005407333374,
      "learning_rate": 2.5846220752093725e-06,
      "loss": 0.0156,
      "step": 2270760
    },
    {
      "epoch": 3.7161812742614377,
      "grad_norm": 0.19901002943515778,
      "learning_rate": 2.5845561829958552e-06,
      "loss": 0.0137,
      "step": 2270780
    },
    {
      "epoch": 3.716214004700091,
      "grad_norm": 0.243205264210701,
      "learning_rate": 2.5844902907823384e-06,
      "loss": 0.0154,
      "step": 2270800
    },
    {
      "epoch": 3.7162467351387445,
      "grad_norm": 0.4265563488006592,
      "learning_rate": 2.584424398568821e-06,
      "loss": 0.0091,
      "step": 2270820
    },
    {
      "epoch": 3.7162794655773976,
      "grad_norm": 0.14100657403469086,
      "learning_rate": 2.584358506355304e-06,
      "loss": 0.009,
      "step": 2270840
    },
    {
      "epoch": 3.716312196016051,
      "grad_norm": 0.44181355834007263,
      "learning_rate": 2.5842926141417875e-06,
      "loss": 0.0091,
      "step": 2270860
    },
    {
      "epoch": 3.7163449264547044,
      "grad_norm": 0.5901340842247009,
      "learning_rate": 2.58422672192827e-06,
      "loss": 0.0157,
      "step": 2270880
    },
    {
      "epoch": 3.7163776568933575,
      "grad_norm": 0.6449863314628601,
      "learning_rate": 2.584160829714753e-06,
      "loss": 0.0138,
      "step": 2270900
    },
    {
      "epoch": 3.716410387332011,
      "grad_norm": 0.4987339675426483,
      "learning_rate": 2.5840949375012357e-06,
      "loss": 0.0112,
      "step": 2270920
    },
    {
      "epoch": 3.7164431177706643,
      "grad_norm": 0.4340936541557312,
      "learning_rate": 2.584029045287719e-06,
      "loss": 0.0091,
      "step": 2270940
    },
    {
      "epoch": 3.716475848209318,
      "grad_norm": 0.1230442076921463,
      "learning_rate": 2.5839631530742016e-06,
      "loss": 0.01,
      "step": 2270960
    },
    {
      "epoch": 3.716508578647971,
      "grad_norm": 0.07869825512170792,
      "learning_rate": 2.5838972608606843e-06,
      "loss": 0.0077,
      "step": 2270980
    },
    {
      "epoch": 3.716541309086624,
      "grad_norm": 0.2950202226638794,
      "learning_rate": 2.583831368647167e-06,
      "loss": 0.0107,
      "step": 2271000
    },
    {
      "epoch": 3.7165740395252778,
      "grad_norm": 0.16497592628002167,
      "learning_rate": 2.5837654764336502e-06,
      "loss": 0.0167,
      "step": 2271020
    },
    {
      "epoch": 3.716606769963931,
      "grad_norm": 0.25966978073120117,
      "learning_rate": 2.583699584220133e-06,
      "loss": 0.0125,
      "step": 2271040
    },
    {
      "epoch": 3.7166395004025845,
      "grad_norm": 0.3854220509529114,
      "learning_rate": 2.5836336920066157e-06,
      "loss": 0.0117,
      "step": 2271060
    },
    {
      "epoch": 3.7166722308412377,
      "grad_norm": 0.2423272579908371,
      "learning_rate": 2.5835677997930984e-06,
      "loss": 0.011,
      "step": 2271080
    },
    {
      "epoch": 3.7167049612798913,
      "grad_norm": 0.07173462957143784,
      "learning_rate": 2.5835019075795816e-06,
      "loss": 0.0096,
      "step": 2271100
    },
    {
      "epoch": 3.7167376917185444,
      "grad_norm": 0.39594098925590515,
      "learning_rate": 2.5834360153660643e-06,
      "loss": 0.009,
      "step": 2271120
    },
    {
      "epoch": 3.7167704221571976,
      "grad_norm": 0.4091602861881256,
      "learning_rate": 2.583370123152547e-06,
      "loss": 0.0085,
      "step": 2271140
    },
    {
      "epoch": 3.716803152595851,
      "grad_norm": 0.5109675526618958,
      "learning_rate": 2.58330423093903e-06,
      "loss": 0.0072,
      "step": 2271160
    },
    {
      "epoch": 3.7168358830345043,
      "grad_norm": 0.7706988453865051,
      "learning_rate": 2.5832383387255126e-06,
      "loss": 0.0126,
      "step": 2271180
    },
    {
      "epoch": 3.716868613473158,
      "grad_norm": 0.4103783071041107,
      "learning_rate": 2.5831724465119957e-06,
      "loss": 0.0145,
      "step": 2271200
    },
    {
      "epoch": 3.716901343911811,
      "grad_norm": 0.15278251469135284,
      "learning_rate": 2.583106554298479e-06,
      "loss": 0.0073,
      "step": 2271220
    },
    {
      "epoch": 3.7169340743504646,
      "grad_norm": 0.20479992032051086,
      "learning_rate": 2.583040662084962e-06,
      "loss": 0.0166,
      "step": 2271240
    },
    {
      "epoch": 3.716966804789118,
      "grad_norm": 0.12565167248249054,
      "learning_rate": 2.582974769871445e-06,
      "loss": 0.0095,
      "step": 2271260
    },
    {
      "epoch": 3.716999535227771,
      "grad_norm": 0.161822110414505,
      "learning_rate": 2.5829088776579275e-06,
      "loss": 0.0078,
      "step": 2271280
    },
    {
      "epoch": 3.7170322656664245,
      "grad_norm": 0.16358445584774017,
      "learning_rate": 2.5828429854444103e-06,
      "loss": 0.0074,
      "step": 2271300
    },
    {
      "epoch": 3.7170649961050777,
      "grad_norm": 0.32857081294059753,
      "learning_rate": 2.582777093230893e-06,
      "loss": 0.0137,
      "step": 2271320
    },
    {
      "epoch": 3.7170977265437313,
      "grad_norm": 0.05280093476176262,
      "learning_rate": 2.582711201017376e-06,
      "loss": 0.0085,
      "step": 2271340
    },
    {
      "epoch": 3.7171304569823844,
      "grad_norm": 0.1372535526752472,
      "learning_rate": 2.582645308803859e-06,
      "loss": 0.0095,
      "step": 2271360
    },
    {
      "epoch": 3.717163187421038,
      "grad_norm": 0.50037682056427,
      "learning_rate": 2.5825794165903417e-06,
      "loss": 0.0131,
      "step": 2271380
    },
    {
      "epoch": 3.717195917859691,
      "grad_norm": 0.5158042907714844,
      "learning_rate": 2.5825135243768244e-06,
      "loss": 0.0111,
      "step": 2271400
    },
    {
      "epoch": 3.7172286482983443,
      "grad_norm": 0.02342664636671543,
      "learning_rate": 2.5824476321633076e-06,
      "loss": 0.0076,
      "step": 2271420
    },
    {
      "epoch": 3.717261378736998,
      "grad_norm": 0.15598911046981812,
      "learning_rate": 2.5823817399497903e-06,
      "loss": 0.0066,
      "step": 2271440
    },
    {
      "epoch": 3.717294109175651,
      "grad_norm": 0.1698731929063797,
      "learning_rate": 2.582315847736273e-06,
      "loss": 0.0102,
      "step": 2271460
    },
    {
      "epoch": 3.7173268396143047,
      "grad_norm": 0.3536478579044342,
      "learning_rate": 2.5822499555227558e-06,
      "loss": 0.0118,
      "step": 2271480
    },
    {
      "epoch": 3.717359570052958,
      "grad_norm": 0.17473426461219788,
      "learning_rate": 2.582184063309239e-06,
      "loss": 0.0084,
      "step": 2271500
    },
    {
      "epoch": 3.7173923004916114,
      "grad_norm": 0.04731563851237297,
      "learning_rate": 2.5821181710957217e-06,
      "loss": 0.0071,
      "step": 2271520
    },
    {
      "epoch": 3.7174250309302646,
      "grad_norm": 0.14542889595031738,
      "learning_rate": 2.5820522788822044e-06,
      "loss": 0.0074,
      "step": 2271540
    },
    {
      "epoch": 3.7174577613689177,
      "grad_norm": 0.2936258018016815,
      "learning_rate": 2.581986386668688e-06,
      "loss": 0.0089,
      "step": 2271560
    },
    {
      "epoch": 3.7174904918075713,
      "grad_norm": 0.5750541090965271,
      "learning_rate": 2.5819204944551707e-06,
      "loss": 0.0146,
      "step": 2271580
    },
    {
      "epoch": 3.7175232222462244,
      "grad_norm": 0.12593427300453186,
      "learning_rate": 2.5818546022416535e-06,
      "loss": 0.0069,
      "step": 2271600
    },
    {
      "epoch": 3.717555952684878,
      "grad_norm": 0.1878422051668167,
      "learning_rate": 2.5817887100281362e-06,
      "loss": 0.0091,
      "step": 2271620
    },
    {
      "epoch": 3.717588683123531,
      "grad_norm": 0.2901042103767395,
      "learning_rate": 2.5817228178146194e-06,
      "loss": 0.0101,
      "step": 2271640
    },
    {
      "epoch": 3.717621413562185,
      "grad_norm": 0.20665521919727325,
      "learning_rate": 2.581656925601102e-06,
      "loss": 0.0087,
      "step": 2271660
    },
    {
      "epoch": 3.717654144000838,
      "grad_norm": 0.053184907883405685,
      "learning_rate": 2.581591033387585e-06,
      "loss": 0.0117,
      "step": 2271680
    },
    {
      "epoch": 3.717686874439491,
      "grad_norm": 0.6030233502388,
      "learning_rate": 2.5815251411740676e-06,
      "loss": 0.0122,
      "step": 2271700
    },
    {
      "epoch": 3.7177196048781447,
      "grad_norm": 0.07671438157558441,
      "learning_rate": 2.5814592489605504e-06,
      "loss": 0.0113,
      "step": 2271720
    },
    {
      "epoch": 3.717752335316798,
      "grad_norm": 0.37454330921173096,
      "learning_rate": 2.5813933567470335e-06,
      "loss": 0.0097,
      "step": 2271740
    },
    {
      "epoch": 3.717785065755451,
      "grad_norm": 0.13050228357315063,
      "learning_rate": 2.5813274645335163e-06,
      "loss": 0.0159,
      "step": 2271760
    },
    {
      "epoch": 3.7178177961941046,
      "grad_norm": 0.21578644216060638,
      "learning_rate": 2.581261572319999e-06,
      "loss": 0.0151,
      "step": 2271780
    },
    {
      "epoch": 3.717850526632758,
      "grad_norm": 0.6727948188781738,
      "learning_rate": 2.5811956801064817e-06,
      "loss": 0.0157,
      "step": 2271800
    },
    {
      "epoch": 3.7178832570714113,
      "grad_norm": 0.18100422620773315,
      "learning_rate": 2.581129787892965e-06,
      "loss": 0.0115,
      "step": 2271820
    },
    {
      "epoch": 3.7179159875100645,
      "grad_norm": 0.16841119527816772,
      "learning_rate": 2.5810638956794476e-06,
      "loss": 0.0106,
      "step": 2271840
    },
    {
      "epoch": 3.717948717948718,
      "grad_norm": 0.21608711779117584,
      "learning_rate": 2.5809980034659304e-06,
      "loss": 0.0104,
      "step": 2271860
    },
    {
      "epoch": 3.717981448387371,
      "grad_norm": 0.19015178084373474,
      "learning_rate": 2.580932111252413e-06,
      "loss": 0.0164,
      "step": 2271880
    },
    {
      "epoch": 3.7180141788260244,
      "grad_norm": 0.19626954197883606,
      "learning_rate": 2.5808662190388963e-06,
      "loss": 0.008,
      "step": 2271900
    },
    {
      "epoch": 3.718046909264678,
      "grad_norm": 1.8113954067230225,
      "learning_rate": 2.5808003268253794e-06,
      "loss": 0.0115,
      "step": 2271920
    },
    {
      "epoch": 3.7180796397033316,
      "grad_norm": 0.14862599968910217,
      "learning_rate": 2.580734434611862e-06,
      "loss": 0.0157,
      "step": 2271940
    },
    {
      "epoch": 3.7181123701419847,
      "grad_norm": 0.36635175347328186,
      "learning_rate": 2.5806685423983453e-06,
      "loss": 0.0127,
      "step": 2271960
    },
    {
      "epoch": 3.718145100580638,
      "grad_norm": 0.2561700642108917,
      "learning_rate": 2.580602650184828e-06,
      "loss": 0.0133,
      "step": 2271980
    },
    {
      "epoch": 3.7181778310192914,
      "grad_norm": 0.6429907083511353,
      "learning_rate": 2.580536757971311e-06,
      "loss": 0.0152,
      "step": 2272000
    },
    {
      "epoch": 3.7182105614579446,
      "grad_norm": 0.26440683007240295,
      "learning_rate": 2.5804708657577936e-06,
      "loss": 0.0167,
      "step": 2272020
    },
    {
      "epoch": 3.7182432918965977,
      "grad_norm": 0.1223105862736702,
      "learning_rate": 2.5804049735442767e-06,
      "loss": 0.0097,
      "step": 2272040
    },
    {
      "epoch": 3.7182760223352513,
      "grad_norm": 0.7539368867874146,
      "learning_rate": 2.5803390813307595e-06,
      "loss": 0.0115,
      "step": 2272060
    },
    {
      "epoch": 3.718308752773905,
      "grad_norm": 0.15634796023368835,
      "learning_rate": 2.580273189117242e-06,
      "loss": 0.0079,
      "step": 2272080
    },
    {
      "epoch": 3.718341483212558,
      "grad_norm": 0.4097621738910675,
      "learning_rate": 2.580207296903725e-06,
      "loss": 0.0121,
      "step": 2272100
    },
    {
      "epoch": 3.7183742136512112,
      "grad_norm": 0.4294263422489166,
      "learning_rate": 2.580141404690208e-06,
      "loss": 0.0116,
      "step": 2272120
    },
    {
      "epoch": 3.718406944089865,
      "grad_norm": 0.29429349303245544,
      "learning_rate": 2.580075512476691e-06,
      "loss": 0.0151,
      "step": 2272140
    },
    {
      "epoch": 3.718439674528518,
      "grad_norm": 0.23094722628593445,
      "learning_rate": 2.5800096202631736e-06,
      "loss": 0.0123,
      "step": 2272160
    },
    {
      "epoch": 3.718472404967171,
      "grad_norm": 0.5016355514526367,
      "learning_rate": 2.5799437280496563e-06,
      "loss": 0.0075,
      "step": 2272180
    },
    {
      "epoch": 3.7185051354058247,
      "grad_norm": 0.22306184470653534,
      "learning_rate": 2.579877835836139e-06,
      "loss": 0.013,
      "step": 2272200
    },
    {
      "epoch": 3.7185378658444783,
      "grad_norm": 0.13166649639606476,
      "learning_rate": 2.5798119436226222e-06,
      "loss": 0.0082,
      "step": 2272220
    },
    {
      "epoch": 3.7185705962831315,
      "grad_norm": 0.08106240630149841,
      "learning_rate": 2.579746051409105e-06,
      "loss": 0.0142,
      "step": 2272240
    },
    {
      "epoch": 3.7186033267217846,
      "grad_norm": 0.451698899269104,
      "learning_rate": 2.579680159195588e-06,
      "loss": 0.0083,
      "step": 2272260
    },
    {
      "epoch": 3.718636057160438,
      "grad_norm": 0.23038053512573242,
      "learning_rate": 2.5796142669820713e-06,
      "loss": 0.0163,
      "step": 2272280
    },
    {
      "epoch": 3.7186687875990914,
      "grad_norm": 1.2185062170028687,
      "learning_rate": 2.579548374768554e-06,
      "loss": 0.0108,
      "step": 2272300
    },
    {
      "epoch": 3.7187015180377445,
      "grad_norm": 0.21474187076091766,
      "learning_rate": 2.5794824825550368e-06,
      "loss": 0.0091,
      "step": 2272320
    },
    {
      "epoch": 3.718734248476398,
      "grad_norm": 1.4911197423934937,
      "learning_rate": 2.5794165903415195e-06,
      "loss": 0.0146,
      "step": 2272340
    },
    {
      "epoch": 3.7187669789150513,
      "grad_norm": 0.2948935925960541,
      "learning_rate": 2.5793506981280027e-06,
      "loss": 0.0105,
      "step": 2272360
    },
    {
      "epoch": 3.718799709353705,
      "grad_norm": 0.22369053959846497,
      "learning_rate": 2.5792848059144854e-06,
      "loss": 0.01,
      "step": 2272380
    },
    {
      "epoch": 3.718832439792358,
      "grad_norm": 0.6239799857139587,
      "learning_rate": 2.579218913700968e-06,
      "loss": 0.0117,
      "step": 2272400
    },
    {
      "epoch": 3.7188651702310116,
      "grad_norm": 0.1254289448261261,
      "learning_rate": 2.579153021487451e-06,
      "loss": 0.0212,
      "step": 2272420
    },
    {
      "epoch": 3.7188979006696647,
      "grad_norm": 0.827908992767334,
      "learning_rate": 2.579087129273934e-06,
      "loss": 0.0137,
      "step": 2272440
    },
    {
      "epoch": 3.718930631108318,
      "grad_norm": 0.49887269735336304,
      "learning_rate": 2.579021237060417e-06,
      "loss": 0.0087,
      "step": 2272460
    },
    {
      "epoch": 3.7189633615469715,
      "grad_norm": 0.2854021489620209,
      "learning_rate": 2.5789553448468995e-06,
      "loss": 0.0111,
      "step": 2272480
    },
    {
      "epoch": 3.7189960919856246,
      "grad_norm": 0.09679154306650162,
      "learning_rate": 2.5788894526333823e-06,
      "loss": 0.0106,
      "step": 2272500
    },
    {
      "epoch": 3.7190288224242782,
      "grad_norm": 0.17573513090610504,
      "learning_rate": 2.5788235604198654e-06,
      "loss": 0.0091,
      "step": 2272520
    },
    {
      "epoch": 3.7190615528629314,
      "grad_norm": 0.16647709906101227,
      "learning_rate": 2.578757668206348e-06,
      "loss": 0.0109,
      "step": 2272540
    },
    {
      "epoch": 3.719094283301585,
      "grad_norm": 0.37749001383781433,
      "learning_rate": 2.578691775992831e-06,
      "loss": 0.0089,
      "step": 2272560
    },
    {
      "epoch": 3.719127013740238,
      "grad_norm": 0.790468692779541,
      "learning_rate": 2.5786258837793137e-06,
      "loss": 0.011,
      "step": 2272580
    },
    {
      "epoch": 3.7191597441788913,
      "grad_norm": 0.14787599444389343,
      "learning_rate": 2.5785599915657964e-06,
      "loss": 0.0067,
      "step": 2272600
    },
    {
      "epoch": 3.719192474617545,
      "grad_norm": 0.29737594723701477,
      "learning_rate": 2.57849409935228e-06,
      "loss": 0.0094,
      "step": 2272620
    },
    {
      "epoch": 3.719225205056198,
      "grad_norm": 0.2251475304365158,
      "learning_rate": 2.5784282071387627e-06,
      "loss": 0.0095,
      "step": 2272640
    },
    {
      "epoch": 3.7192579354948516,
      "grad_norm": 0.13373535871505737,
      "learning_rate": 2.578362314925246e-06,
      "loss": 0.0093,
      "step": 2272660
    },
    {
      "epoch": 3.7192906659335048,
      "grad_norm": 0.09083791822195053,
      "learning_rate": 2.5782964227117286e-06,
      "loss": 0.0127,
      "step": 2272680
    },
    {
      "epoch": 3.7193233963721584,
      "grad_norm": 0.2511249780654907,
      "learning_rate": 2.5782305304982114e-06,
      "loss": 0.0092,
      "step": 2272700
    },
    {
      "epoch": 3.7193561268108115,
      "grad_norm": 0.2190050482749939,
      "learning_rate": 2.578164638284694e-06,
      "loss": 0.0106,
      "step": 2272720
    },
    {
      "epoch": 3.7193888572494647,
      "grad_norm": 0.10691064596176147,
      "learning_rate": 2.578098746071177e-06,
      "loss": 0.0101,
      "step": 2272740
    },
    {
      "epoch": 3.7194215876881183,
      "grad_norm": 0.16095887124538422,
      "learning_rate": 2.57803285385766e-06,
      "loss": 0.0068,
      "step": 2272760
    },
    {
      "epoch": 3.7194543181267714,
      "grad_norm": 0.45299601554870605,
      "learning_rate": 2.5779669616441428e-06,
      "loss": 0.0143,
      "step": 2272780
    },
    {
      "epoch": 3.719487048565425,
      "grad_norm": 0.20031580328941345,
      "learning_rate": 2.5779010694306255e-06,
      "loss": 0.011,
      "step": 2272800
    },
    {
      "epoch": 3.719519779004078,
      "grad_norm": 0.11904484033584595,
      "learning_rate": 2.5778351772171082e-06,
      "loss": 0.0126,
      "step": 2272820
    },
    {
      "epoch": 3.7195525094427317,
      "grad_norm": 0.09603319317102432,
      "learning_rate": 2.5777692850035914e-06,
      "loss": 0.008,
      "step": 2272840
    },
    {
      "epoch": 3.719585239881385,
      "grad_norm": 0.060900699347257614,
      "learning_rate": 2.577703392790074e-06,
      "loss": 0.0111,
      "step": 2272860
    },
    {
      "epoch": 3.719617970320038,
      "grad_norm": 0.41124334931373596,
      "learning_rate": 2.577637500576557e-06,
      "loss": 0.0095,
      "step": 2272880
    },
    {
      "epoch": 3.7196507007586916,
      "grad_norm": 0.25208795070648193,
      "learning_rate": 2.5775716083630396e-06,
      "loss": 0.0081,
      "step": 2272900
    },
    {
      "epoch": 3.719683431197345,
      "grad_norm": 0.3566421568393707,
      "learning_rate": 2.5775057161495228e-06,
      "loss": 0.0174,
      "step": 2272920
    },
    {
      "epoch": 3.7197161616359984,
      "grad_norm": 0.24460560083389282,
      "learning_rate": 2.5774398239360055e-06,
      "loss": 0.0117,
      "step": 2272940
    },
    {
      "epoch": 3.7197488920746515,
      "grad_norm": 0.23614974319934845,
      "learning_rate": 2.5773739317224883e-06,
      "loss": 0.0108,
      "step": 2272960
    },
    {
      "epoch": 3.719781622513305,
      "grad_norm": 0.43398141860961914,
      "learning_rate": 2.577308039508972e-06,
      "loss": 0.0136,
      "step": 2272980
    },
    {
      "epoch": 3.7198143529519583,
      "grad_norm": 0.11680277436971664,
      "learning_rate": 2.5772421472954546e-06,
      "loss": 0.0089,
      "step": 2273000
    },
    {
      "epoch": 3.7198470833906114,
      "grad_norm": 0.10904312133789062,
      "learning_rate": 2.5771762550819373e-06,
      "loss": 0.0111,
      "step": 2273020
    },
    {
      "epoch": 3.719879813829265,
      "grad_norm": 0.2771359384059906,
      "learning_rate": 2.57711036286842e-06,
      "loss": 0.0121,
      "step": 2273040
    },
    {
      "epoch": 3.719912544267918,
      "grad_norm": 0.5851813554763794,
      "learning_rate": 2.5770444706549032e-06,
      "loss": 0.0174,
      "step": 2273060
    },
    {
      "epoch": 3.7199452747065718,
      "grad_norm": 0.27752557396888733,
      "learning_rate": 2.576978578441386e-06,
      "loss": 0.0104,
      "step": 2273080
    },
    {
      "epoch": 3.719978005145225,
      "grad_norm": 0.23329782485961914,
      "learning_rate": 2.5769126862278687e-06,
      "loss": 0.0096,
      "step": 2273100
    },
    {
      "epoch": 3.7200107355838785,
      "grad_norm": 0.2687353789806366,
      "learning_rate": 2.5768467940143515e-06,
      "loss": 0.0109,
      "step": 2273120
    },
    {
      "epoch": 3.7200434660225317,
      "grad_norm": 0.18419604003429413,
      "learning_rate": 2.576780901800834e-06,
      "loss": 0.013,
      "step": 2273140
    },
    {
      "epoch": 3.720076196461185,
      "grad_norm": 0.22227342426776886,
      "learning_rate": 2.5767150095873174e-06,
      "loss": 0.0091,
      "step": 2273160
    },
    {
      "epoch": 3.7201089268998384,
      "grad_norm": 0.14280347526073456,
      "learning_rate": 2.5766491173738e-06,
      "loss": 0.0092,
      "step": 2273180
    },
    {
      "epoch": 3.7201416573384916,
      "grad_norm": 0.14088638126850128,
      "learning_rate": 2.576583225160283e-06,
      "loss": 0.016,
      "step": 2273200
    },
    {
      "epoch": 3.720174387777145,
      "grad_norm": 0.9071589112281799,
      "learning_rate": 2.5765173329467656e-06,
      "loss": 0.0129,
      "step": 2273220
    },
    {
      "epoch": 3.7202071182157983,
      "grad_norm": 0.4711548388004303,
      "learning_rate": 2.5764514407332487e-06,
      "loss": 0.0108,
      "step": 2273240
    },
    {
      "epoch": 3.720239848654452,
      "grad_norm": 0.1941167265176773,
      "learning_rate": 2.5763855485197315e-06,
      "loss": 0.012,
      "step": 2273260
    },
    {
      "epoch": 3.720272579093105,
      "grad_norm": 0.2727908790111542,
      "learning_rate": 2.5763196563062142e-06,
      "loss": 0.0101,
      "step": 2273280
    },
    {
      "epoch": 3.720305309531758,
      "grad_norm": 0.27745726704597473,
      "learning_rate": 2.576253764092697e-06,
      "loss": 0.0117,
      "step": 2273300
    },
    {
      "epoch": 3.720338039970412,
      "grad_norm": 0.1865645945072174,
      "learning_rate": 2.5761878718791805e-06,
      "loss": 0.0133,
      "step": 2273320
    },
    {
      "epoch": 3.720370770409065,
      "grad_norm": 0.5177838206291199,
      "learning_rate": 2.5761219796656633e-06,
      "loss": 0.0139,
      "step": 2273340
    },
    {
      "epoch": 3.720403500847718,
      "grad_norm": 0.2947939336299896,
      "learning_rate": 2.576056087452146e-06,
      "loss": 0.0182,
      "step": 2273360
    },
    {
      "epoch": 3.7204362312863717,
      "grad_norm": 0.4345674514770508,
      "learning_rate": 2.575990195238629e-06,
      "loss": 0.0133,
      "step": 2273380
    },
    {
      "epoch": 3.7204689617250253,
      "grad_norm": 0.6275259256362915,
      "learning_rate": 2.575924303025112e-06,
      "loss": 0.0185,
      "step": 2273400
    },
    {
      "epoch": 3.7205016921636784,
      "grad_norm": 0.17872396111488342,
      "learning_rate": 2.5758584108115947e-06,
      "loss": 0.0078,
      "step": 2273420
    },
    {
      "epoch": 3.7205344226023316,
      "grad_norm": 0.18705442547798157,
      "learning_rate": 2.5757925185980774e-06,
      "loss": 0.0124,
      "step": 2273440
    },
    {
      "epoch": 3.720567153040985,
      "grad_norm": 0.11278115957975388,
      "learning_rate": 2.5757266263845606e-06,
      "loss": 0.011,
      "step": 2273460
    },
    {
      "epoch": 3.7205998834796383,
      "grad_norm": 0.7710111737251282,
      "learning_rate": 2.5756607341710433e-06,
      "loss": 0.0111,
      "step": 2273480
    },
    {
      "epoch": 3.7206326139182915,
      "grad_norm": 0.842279851436615,
      "learning_rate": 2.575594841957526e-06,
      "loss": 0.013,
      "step": 2273500
    },
    {
      "epoch": 3.720665344356945,
      "grad_norm": 0.06365961581468582,
      "learning_rate": 2.5755289497440088e-06,
      "loss": 0.008,
      "step": 2273520
    },
    {
      "epoch": 3.7206980747955987,
      "grad_norm": 0.3189060389995575,
      "learning_rate": 2.575463057530492e-06,
      "loss": 0.0142,
      "step": 2273540
    },
    {
      "epoch": 3.720730805234252,
      "grad_norm": 0.2886350452899933,
      "learning_rate": 2.5753971653169747e-06,
      "loss": 0.0097,
      "step": 2273560
    },
    {
      "epoch": 3.720763535672905,
      "grad_norm": 0.33310651779174805,
      "learning_rate": 2.5753312731034574e-06,
      "loss": 0.0102,
      "step": 2273580
    },
    {
      "epoch": 3.7207962661115586,
      "grad_norm": 0.3787929117679596,
      "learning_rate": 2.57526538088994e-06,
      "loss": 0.0138,
      "step": 2273600
    },
    {
      "epoch": 3.7208289965502117,
      "grad_norm": 0.06447311490774155,
      "learning_rate": 2.575199488676423e-06,
      "loss": 0.0089,
      "step": 2273620
    },
    {
      "epoch": 3.720861726988865,
      "grad_norm": 0.4442860782146454,
      "learning_rate": 2.575133596462906e-06,
      "loss": 0.0094,
      "step": 2273640
    },
    {
      "epoch": 3.7208944574275185,
      "grad_norm": 0.5083667635917664,
      "learning_rate": 2.575067704249389e-06,
      "loss": 0.0085,
      "step": 2273660
    },
    {
      "epoch": 3.720927187866172,
      "grad_norm": 0.49244290590286255,
      "learning_rate": 2.575001812035872e-06,
      "loss": 0.0089,
      "step": 2273680
    },
    {
      "epoch": 3.720959918304825,
      "grad_norm": 0.06646810472011566,
      "learning_rate": 2.574935919822355e-06,
      "loss": 0.0098,
      "step": 2273700
    },
    {
      "epoch": 3.7209926487434783,
      "grad_norm": 0.5542391538619995,
      "learning_rate": 2.574870027608838e-06,
      "loss": 0.0135,
      "step": 2273720
    },
    {
      "epoch": 3.721025379182132,
      "grad_norm": 0.617943286895752,
      "learning_rate": 2.5748041353953206e-06,
      "loss": 0.0131,
      "step": 2273740
    },
    {
      "epoch": 3.721058109620785,
      "grad_norm": 0.6065294742584229,
      "learning_rate": 2.5747382431818034e-06,
      "loss": 0.0111,
      "step": 2273760
    },
    {
      "epoch": 3.7210908400594382,
      "grad_norm": 0.2959253489971161,
      "learning_rate": 2.5746723509682865e-06,
      "loss": 0.0078,
      "step": 2273780
    },
    {
      "epoch": 3.721123570498092,
      "grad_norm": 0.14045891165733337,
      "learning_rate": 2.5746064587547693e-06,
      "loss": 0.0082,
      "step": 2273800
    },
    {
      "epoch": 3.7211563009367454,
      "grad_norm": 0.20374904572963715,
      "learning_rate": 2.574540566541252e-06,
      "loss": 0.0095,
      "step": 2273820
    },
    {
      "epoch": 3.7211890313753986,
      "grad_norm": 0.5279752016067505,
      "learning_rate": 2.5744746743277347e-06,
      "loss": 0.0134,
      "step": 2273840
    },
    {
      "epoch": 3.7212217618140517,
      "grad_norm": 0.5586682558059692,
      "learning_rate": 2.574408782114218e-06,
      "loss": 0.0109,
      "step": 2273860
    },
    {
      "epoch": 3.7212544922527053,
      "grad_norm": 0.24168190360069275,
      "learning_rate": 2.5743428899007006e-06,
      "loss": 0.0148,
      "step": 2273880
    },
    {
      "epoch": 3.7212872226913585,
      "grad_norm": 0.42614439129829407,
      "learning_rate": 2.5742769976871834e-06,
      "loss": 0.0132,
      "step": 2273900
    },
    {
      "epoch": 3.7213199531300116,
      "grad_norm": 0.41011375188827515,
      "learning_rate": 2.574211105473666e-06,
      "loss": 0.0109,
      "step": 2273920
    },
    {
      "epoch": 3.721352683568665,
      "grad_norm": 0.07643001526594162,
      "learning_rate": 2.5741452132601493e-06,
      "loss": 0.0137,
      "step": 2273940
    },
    {
      "epoch": 3.7213854140073184,
      "grad_norm": 0.2572247385978699,
      "learning_rate": 2.574079321046632e-06,
      "loss": 0.0111,
      "step": 2273960
    },
    {
      "epoch": 3.721418144445972,
      "grad_norm": 0.5138658285140991,
      "learning_rate": 2.5740134288331148e-06,
      "loss": 0.0103,
      "step": 2273980
    },
    {
      "epoch": 3.721450874884625,
      "grad_norm": 0.11613797396421432,
      "learning_rate": 2.5739475366195975e-06,
      "loss": 0.0051,
      "step": 2274000
    },
    {
      "epoch": 3.7214836053232787,
      "grad_norm": 0.13066312670707703,
      "learning_rate": 2.573881644406081e-06,
      "loss": 0.0095,
      "step": 2274020
    },
    {
      "epoch": 3.721516335761932,
      "grad_norm": 0.13760961592197418,
      "learning_rate": 2.573815752192564e-06,
      "loss": 0.009,
      "step": 2274040
    },
    {
      "epoch": 3.721549066200585,
      "grad_norm": 0.4136752486228943,
      "learning_rate": 2.5737498599790466e-06,
      "loss": 0.0192,
      "step": 2274060
    },
    {
      "epoch": 3.7215817966392386,
      "grad_norm": 0.18321849405765533,
      "learning_rate": 2.5736839677655297e-06,
      "loss": 0.0114,
      "step": 2274080
    },
    {
      "epoch": 3.7216145270778918,
      "grad_norm": 0.49440324306488037,
      "learning_rate": 2.5736180755520125e-06,
      "loss": 0.0074,
      "step": 2274100
    },
    {
      "epoch": 3.7216472575165453,
      "grad_norm": 0.11692581325769424,
      "learning_rate": 2.5735521833384952e-06,
      "loss": 0.0089,
      "step": 2274120
    },
    {
      "epoch": 3.7216799879551985,
      "grad_norm": 0.3904331624507904,
      "learning_rate": 2.573486291124978e-06,
      "loss": 0.0113,
      "step": 2274140
    },
    {
      "epoch": 3.721712718393852,
      "grad_norm": 0.14445164799690247,
      "learning_rate": 2.5734203989114607e-06,
      "loss": 0.0092,
      "step": 2274160
    },
    {
      "epoch": 3.7217454488325052,
      "grad_norm": 0.5265713930130005,
      "learning_rate": 2.573354506697944e-06,
      "loss": 0.016,
      "step": 2274180
    },
    {
      "epoch": 3.7217781792711584,
      "grad_norm": 0.19122417271137238,
      "learning_rate": 2.5732886144844266e-06,
      "loss": 0.0109,
      "step": 2274200
    },
    {
      "epoch": 3.721810909709812,
      "grad_norm": 0.11739393323659897,
      "learning_rate": 2.5732227222709093e-06,
      "loss": 0.0128,
      "step": 2274220
    },
    {
      "epoch": 3.721843640148465,
      "grad_norm": 0.12321842461824417,
      "learning_rate": 2.573156830057392e-06,
      "loss": 0.012,
      "step": 2274240
    },
    {
      "epoch": 3.7218763705871187,
      "grad_norm": 0.4509766697883606,
      "learning_rate": 2.5730909378438752e-06,
      "loss": 0.0123,
      "step": 2274260
    },
    {
      "epoch": 3.721909101025772,
      "grad_norm": 0.10832205414772034,
      "learning_rate": 2.573025045630358e-06,
      "loss": 0.0086,
      "step": 2274280
    },
    {
      "epoch": 3.7219418314644255,
      "grad_norm": 0.18136532604694366,
      "learning_rate": 2.5729591534168407e-06,
      "loss": 0.0116,
      "step": 2274300
    },
    {
      "epoch": 3.7219745619030786,
      "grad_norm": 0.1682877093553543,
      "learning_rate": 2.5728932612033235e-06,
      "loss": 0.0132,
      "step": 2274320
    },
    {
      "epoch": 3.7220072923417318,
      "grad_norm": 0.2103370726108551,
      "learning_rate": 2.5728273689898066e-06,
      "loss": 0.0098,
      "step": 2274340
    },
    {
      "epoch": 3.7220400227803854,
      "grad_norm": 0.6261650323867798,
      "learning_rate": 2.5727614767762894e-06,
      "loss": 0.0132,
      "step": 2274360
    },
    {
      "epoch": 3.7220727532190385,
      "grad_norm": 0.39422762393951416,
      "learning_rate": 2.5726955845627725e-06,
      "loss": 0.0113,
      "step": 2274380
    },
    {
      "epoch": 3.722105483657692,
      "grad_norm": 0.1713789850473404,
      "learning_rate": 2.5726296923492557e-06,
      "loss": 0.0108,
      "step": 2274400
    },
    {
      "epoch": 3.7221382140963453,
      "grad_norm": 0.08267350494861603,
      "learning_rate": 2.5725638001357384e-06,
      "loss": 0.0121,
      "step": 2274420
    },
    {
      "epoch": 3.722170944534999,
      "grad_norm": 0.428314745426178,
      "learning_rate": 2.572497907922221e-06,
      "loss": 0.0112,
      "step": 2274440
    },
    {
      "epoch": 3.722203674973652,
      "grad_norm": 0.18550145626068115,
      "learning_rate": 2.572432015708704e-06,
      "loss": 0.008,
      "step": 2274460
    },
    {
      "epoch": 3.722236405412305,
      "grad_norm": 0.08183880150318146,
      "learning_rate": 2.572366123495187e-06,
      "loss": 0.0106,
      "step": 2274480
    },
    {
      "epoch": 3.7222691358509588,
      "grad_norm": 0.4288295805454254,
      "learning_rate": 2.57230023128167e-06,
      "loss": 0.0118,
      "step": 2274500
    },
    {
      "epoch": 3.722301866289612,
      "grad_norm": 0.15248411893844604,
      "learning_rate": 2.5722343390681526e-06,
      "loss": 0.0128,
      "step": 2274520
    },
    {
      "epoch": 3.7223345967282655,
      "grad_norm": 0.674990177154541,
      "learning_rate": 2.5721684468546353e-06,
      "loss": 0.0109,
      "step": 2274540
    },
    {
      "epoch": 3.7223673271669186,
      "grad_norm": 0.1532524824142456,
      "learning_rate": 2.5721025546411185e-06,
      "loss": 0.0087,
      "step": 2274560
    },
    {
      "epoch": 3.7224000576055722,
      "grad_norm": 0.26252010464668274,
      "learning_rate": 2.572036662427601e-06,
      "loss": 0.0089,
      "step": 2274580
    },
    {
      "epoch": 3.7224327880442254,
      "grad_norm": 0.35079702734947205,
      "learning_rate": 2.571970770214084e-06,
      "loss": 0.0065,
      "step": 2274600
    },
    {
      "epoch": 3.7224655184828785,
      "grad_norm": 0.4954719543457031,
      "learning_rate": 2.5719048780005667e-06,
      "loss": 0.0123,
      "step": 2274620
    },
    {
      "epoch": 3.722498248921532,
      "grad_norm": 0.13039828836917877,
      "learning_rate": 2.5718389857870494e-06,
      "loss": 0.0134,
      "step": 2274640
    },
    {
      "epoch": 3.7225309793601853,
      "grad_norm": 0.19331057369709015,
      "learning_rate": 2.5717730935735326e-06,
      "loss": 0.008,
      "step": 2274660
    },
    {
      "epoch": 3.722563709798839,
      "grad_norm": 0.18595588207244873,
      "learning_rate": 2.5717072013600153e-06,
      "loss": 0.0075,
      "step": 2274680
    },
    {
      "epoch": 3.722596440237492,
      "grad_norm": 0.12789595127105713,
      "learning_rate": 2.571641309146498e-06,
      "loss": 0.0112,
      "step": 2274700
    },
    {
      "epoch": 3.7226291706761456,
      "grad_norm": 0.32896625995635986,
      "learning_rate": 2.5715754169329816e-06,
      "loss": 0.0098,
      "step": 2274720
    },
    {
      "epoch": 3.7226619011147988,
      "grad_norm": 0.2604944705963135,
      "learning_rate": 2.5715095247194644e-06,
      "loss": 0.0114,
      "step": 2274740
    },
    {
      "epoch": 3.722694631553452,
      "grad_norm": 0.22968855500221252,
      "learning_rate": 2.571443632505947e-06,
      "loss": 0.0115,
      "step": 2274760
    },
    {
      "epoch": 3.7227273619921055,
      "grad_norm": 0.37790772318840027,
      "learning_rate": 2.57137774029243e-06,
      "loss": 0.0089,
      "step": 2274780
    },
    {
      "epoch": 3.7227600924307587,
      "grad_norm": 0.26850980520248413,
      "learning_rate": 2.571311848078913e-06,
      "loss": 0.0168,
      "step": 2274800
    },
    {
      "epoch": 3.722792822869412,
      "grad_norm": 0.17316842079162598,
      "learning_rate": 2.5712459558653958e-06,
      "loss": 0.0112,
      "step": 2274820
    },
    {
      "epoch": 3.7228255533080654,
      "grad_norm": 0.0855802521109581,
      "learning_rate": 2.5711800636518785e-06,
      "loss": 0.0088,
      "step": 2274840
    },
    {
      "epoch": 3.722858283746719,
      "grad_norm": 0.5257823467254639,
      "learning_rate": 2.5711141714383612e-06,
      "loss": 0.0121,
      "step": 2274860
    },
    {
      "epoch": 3.722891014185372,
      "grad_norm": 0.49431610107421875,
      "learning_rate": 2.5710482792248444e-06,
      "loss": 0.0086,
      "step": 2274880
    },
    {
      "epoch": 3.7229237446240253,
      "grad_norm": 0.40057358145713806,
      "learning_rate": 2.570982387011327e-06,
      "loss": 0.0116,
      "step": 2274900
    },
    {
      "epoch": 3.722956475062679,
      "grad_norm": 0.7137553095817566,
      "learning_rate": 2.57091649479781e-06,
      "loss": 0.0134,
      "step": 2274920
    },
    {
      "epoch": 3.722989205501332,
      "grad_norm": 0.28088146448135376,
      "learning_rate": 2.5708506025842926e-06,
      "loss": 0.0118,
      "step": 2274940
    },
    {
      "epoch": 3.723021935939985,
      "grad_norm": 0.2146301567554474,
      "learning_rate": 2.570784710370776e-06,
      "loss": 0.0129,
      "step": 2274960
    },
    {
      "epoch": 3.723054666378639,
      "grad_norm": 0.4072932004928589,
      "learning_rate": 2.5707188181572585e-06,
      "loss": 0.0091,
      "step": 2274980
    },
    {
      "epoch": 3.7230873968172924,
      "grad_norm": 0.21667897701263428,
      "learning_rate": 2.5706529259437413e-06,
      "loss": 0.0124,
      "step": 2275000
    },
    {
      "epoch": 3.7231201272559455,
      "grad_norm": 0.5211838483810425,
      "learning_rate": 2.570587033730224e-06,
      "loss": 0.0128,
      "step": 2275020
    },
    {
      "epoch": 3.7231528576945987,
      "grad_norm": 0.32916006445884705,
      "learning_rate": 2.5705211415167067e-06,
      "loss": 0.011,
      "step": 2275040
    },
    {
      "epoch": 3.7231855881332523,
      "grad_norm": 0.08105473965406418,
      "learning_rate": 2.57045524930319e-06,
      "loss": 0.0097,
      "step": 2275060
    },
    {
      "epoch": 3.7232183185719054,
      "grad_norm": 0.5470079779624939,
      "learning_rate": 2.570389357089673e-06,
      "loss": 0.0095,
      "step": 2275080
    },
    {
      "epoch": 3.7232510490105586,
      "grad_norm": 0.6791425943374634,
      "learning_rate": 2.5703234648761562e-06,
      "loss": 0.0117,
      "step": 2275100
    },
    {
      "epoch": 3.723283779449212,
      "grad_norm": 1.1726667881011963,
      "learning_rate": 2.570257572662639e-06,
      "loss": 0.0169,
      "step": 2275120
    },
    {
      "epoch": 3.7233165098878658,
      "grad_norm": 0.12814131379127502,
      "learning_rate": 2.5701916804491217e-06,
      "loss": 0.0114,
      "step": 2275140
    },
    {
      "epoch": 3.723349240326519,
      "grad_norm": 0.1476716250181198,
      "learning_rate": 2.5701257882356045e-06,
      "loss": 0.0075,
      "step": 2275160
    },
    {
      "epoch": 3.723381970765172,
      "grad_norm": 0.6709638833999634,
      "learning_rate": 2.570059896022087e-06,
      "loss": 0.0181,
      "step": 2275180
    },
    {
      "epoch": 3.7234147012038257,
      "grad_norm": 0.1351807713508606,
      "learning_rate": 2.5699940038085704e-06,
      "loss": 0.0116,
      "step": 2275200
    },
    {
      "epoch": 3.723447431642479,
      "grad_norm": 0.2016110122203827,
      "learning_rate": 2.569928111595053e-06,
      "loss": 0.0104,
      "step": 2275220
    },
    {
      "epoch": 3.723480162081132,
      "grad_norm": 0.3476795554161072,
      "learning_rate": 2.569862219381536e-06,
      "loss": 0.0185,
      "step": 2275240
    },
    {
      "epoch": 3.7235128925197856,
      "grad_norm": 0.18983550369739532,
      "learning_rate": 2.5697963271680186e-06,
      "loss": 0.0203,
      "step": 2275260
    },
    {
      "epoch": 3.723545622958439,
      "grad_norm": 0.7287381291389465,
      "learning_rate": 2.5697304349545017e-06,
      "loss": 0.0119,
      "step": 2275280
    },
    {
      "epoch": 3.7235783533970923,
      "grad_norm": 0.18729574978351593,
      "learning_rate": 2.5696645427409845e-06,
      "loss": 0.0103,
      "step": 2275300
    },
    {
      "epoch": 3.7236110838357455,
      "grad_norm": 0.11758232861757278,
      "learning_rate": 2.5695986505274672e-06,
      "loss": 0.0088,
      "step": 2275320
    },
    {
      "epoch": 3.723643814274399,
      "grad_norm": 0.10474765300750732,
      "learning_rate": 2.56953275831395e-06,
      "loss": 0.0081,
      "step": 2275340
    },
    {
      "epoch": 3.723676544713052,
      "grad_norm": 0.11077858507633209,
      "learning_rate": 2.569466866100433e-06,
      "loss": 0.0131,
      "step": 2275360
    },
    {
      "epoch": 3.7237092751517054,
      "grad_norm": 0.2021111398935318,
      "learning_rate": 2.569400973886916e-06,
      "loss": 0.0103,
      "step": 2275380
    },
    {
      "epoch": 3.723742005590359,
      "grad_norm": 0.11837183684110641,
      "learning_rate": 2.5693350816733986e-06,
      "loss": 0.0121,
      "step": 2275400
    },
    {
      "epoch": 3.723774736029012,
      "grad_norm": 0.4655841290950775,
      "learning_rate": 2.5692691894598813e-06,
      "loss": 0.0092,
      "step": 2275420
    },
    {
      "epoch": 3.7238074664676657,
      "grad_norm": 0.3039257526397705,
      "learning_rate": 2.569203297246365e-06,
      "loss": 0.0153,
      "step": 2275440
    },
    {
      "epoch": 3.723840196906319,
      "grad_norm": 0.11707640439271927,
      "learning_rate": 2.5691374050328477e-06,
      "loss": 0.0109,
      "step": 2275460
    },
    {
      "epoch": 3.7238729273449724,
      "grad_norm": 0.3281824290752411,
      "learning_rate": 2.5690715128193304e-06,
      "loss": 0.0123,
      "step": 2275480
    },
    {
      "epoch": 3.7239056577836256,
      "grad_norm": 0.3224031925201416,
      "learning_rate": 2.5690056206058136e-06,
      "loss": 0.0136,
      "step": 2275500
    },
    {
      "epoch": 3.7239383882222787,
      "grad_norm": 0.08399823307991028,
      "learning_rate": 2.5689397283922963e-06,
      "loss": 0.0097,
      "step": 2275520
    },
    {
      "epoch": 3.7239711186609323,
      "grad_norm": 0.2843870520591736,
      "learning_rate": 2.568873836178779e-06,
      "loss": 0.0166,
      "step": 2275540
    },
    {
      "epoch": 3.7240038490995855,
      "grad_norm": 0.22483494877815247,
      "learning_rate": 2.568807943965262e-06,
      "loss": 0.0102,
      "step": 2275560
    },
    {
      "epoch": 3.724036579538239,
      "grad_norm": 0.07813418656587601,
      "learning_rate": 2.5687420517517445e-06,
      "loss": 0.0109,
      "step": 2275580
    },
    {
      "epoch": 3.7240693099768922,
      "grad_norm": 0.0474957637488842,
      "learning_rate": 2.5686761595382277e-06,
      "loss": 0.0183,
      "step": 2275600
    },
    {
      "epoch": 3.724102040415546,
      "grad_norm": 0.14925193786621094,
      "learning_rate": 2.5686102673247104e-06,
      "loss": 0.0144,
      "step": 2275620
    },
    {
      "epoch": 3.724134770854199,
      "grad_norm": 0.33169227838516235,
      "learning_rate": 2.568544375111193e-06,
      "loss": 0.0096,
      "step": 2275640
    },
    {
      "epoch": 3.724167501292852,
      "grad_norm": 0.6848742961883545,
      "learning_rate": 2.568478482897676e-06,
      "loss": 0.0142,
      "step": 2275660
    },
    {
      "epoch": 3.7242002317315057,
      "grad_norm": 0.16077347099781036,
      "learning_rate": 2.568412590684159e-06,
      "loss": 0.0168,
      "step": 2275680
    },
    {
      "epoch": 3.724232962170159,
      "grad_norm": 0.915574848651886,
      "learning_rate": 2.568346698470642e-06,
      "loss": 0.0143,
      "step": 2275700
    },
    {
      "epoch": 3.7242656926088125,
      "grad_norm": 0.09880810230970383,
      "learning_rate": 2.5682808062571246e-06,
      "loss": 0.009,
      "step": 2275720
    },
    {
      "epoch": 3.7242984230474656,
      "grad_norm": 0.15604476630687714,
      "learning_rate": 2.5682149140436073e-06,
      "loss": 0.0121,
      "step": 2275740
    },
    {
      "epoch": 3.724331153486119,
      "grad_norm": 0.03510590270161629,
      "learning_rate": 2.5681490218300905e-06,
      "loss": 0.0126,
      "step": 2275760
    },
    {
      "epoch": 3.7243638839247724,
      "grad_norm": 0.375430166721344,
      "learning_rate": 2.5680831296165736e-06,
      "loss": 0.0154,
      "step": 2275780
    },
    {
      "epoch": 3.7243966143634255,
      "grad_norm": 0.4880546033382416,
      "learning_rate": 2.5680172374030564e-06,
      "loss": 0.0159,
      "step": 2275800
    },
    {
      "epoch": 3.724429344802079,
      "grad_norm": 0.06574033200740814,
      "learning_rate": 2.5679513451895395e-06,
      "loss": 0.015,
      "step": 2275820
    },
    {
      "epoch": 3.7244620752407323,
      "grad_norm": 0.2991683781147003,
      "learning_rate": 2.5678854529760223e-06,
      "loss": 0.0131,
      "step": 2275840
    },
    {
      "epoch": 3.724494805679386,
      "grad_norm": 0.17064088582992554,
      "learning_rate": 2.567819560762505e-06,
      "loss": 0.0097,
      "step": 2275860
    },
    {
      "epoch": 3.724527536118039,
      "grad_norm": 0.17656216025352478,
      "learning_rate": 2.5677536685489877e-06,
      "loss": 0.0113,
      "step": 2275880
    },
    {
      "epoch": 3.7245602665566926,
      "grad_norm": 0.1887163668870926,
      "learning_rate": 2.567687776335471e-06,
      "loss": 0.009,
      "step": 2275900
    },
    {
      "epoch": 3.7245929969953457,
      "grad_norm": 0.10598426312208176,
      "learning_rate": 2.5676218841219537e-06,
      "loss": 0.0094,
      "step": 2275920
    },
    {
      "epoch": 3.724625727433999,
      "grad_norm": 0.050178080797195435,
      "learning_rate": 2.5675559919084364e-06,
      "loss": 0.0077,
      "step": 2275940
    },
    {
      "epoch": 3.7246584578726525,
      "grad_norm": 0.27161848545074463,
      "learning_rate": 2.567490099694919e-06,
      "loss": 0.0098,
      "step": 2275960
    },
    {
      "epoch": 3.7246911883113056,
      "grad_norm": 0.2605956494808197,
      "learning_rate": 2.5674242074814023e-06,
      "loss": 0.012,
      "step": 2275980
    },
    {
      "epoch": 3.7247239187499592,
      "grad_norm": 0.272487610578537,
      "learning_rate": 2.567358315267885e-06,
      "loss": 0.0094,
      "step": 2276000
    },
    {
      "epoch": 3.7247566491886124,
      "grad_norm": 0.6594424843788147,
      "learning_rate": 2.5672924230543678e-06,
      "loss": 0.0139,
      "step": 2276020
    },
    {
      "epoch": 3.724789379627266,
      "grad_norm": 0.16115306317806244,
      "learning_rate": 2.5672265308408505e-06,
      "loss": 0.0068,
      "step": 2276040
    },
    {
      "epoch": 3.724822110065919,
      "grad_norm": 0.310752272605896,
      "learning_rate": 2.5671606386273333e-06,
      "loss": 0.0102,
      "step": 2276060
    },
    {
      "epoch": 3.7248548405045723,
      "grad_norm": 0.7857879400253296,
      "learning_rate": 2.5670947464138164e-06,
      "loss": 0.0151,
      "step": 2276080
    },
    {
      "epoch": 3.724887570943226,
      "grad_norm": 0.7165228724479675,
      "learning_rate": 2.567028854200299e-06,
      "loss": 0.0185,
      "step": 2276100
    },
    {
      "epoch": 3.724920301381879,
      "grad_norm": 0.2796113193035126,
      "learning_rate": 2.566962961986782e-06,
      "loss": 0.0101,
      "step": 2276120
    },
    {
      "epoch": 3.7249530318205326,
      "grad_norm": 0.160678893327713,
      "learning_rate": 2.5668970697732655e-06,
      "loss": 0.014,
      "step": 2276140
    },
    {
      "epoch": 3.7249857622591858,
      "grad_norm": 0.5698539614677429,
      "learning_rate": 2.5668311775597482e-06,
      "loss": 0.0063,
      "step": 2276160
    },
    {
      "epoch": 3.7250184926978394,
      "grad_norm": 0.21126867830753326,
      "learning_rate": 2.566765285346231e-06,
      "loss": 0.0135,
      "step": 2276180
    },
    {
      "epoch": 3.7250512231364925,
      "grad_norm": 0.150005042552948,
      "learning_rate": 2.5666993931327137e-06,
      "loss": 0.0111,
      "step": 2276200
    },
    {
      "epoch": 3.7250839535751457,
      "grad_norm": 0.11535310000181198,
      "learning_rate": 2.566633500919197e-06,
      "loss": 0.0138,
      "step": 2276220
    },
    {
      "epoch": 3.7251166840137993,
      "grad_norm": 0.40347445011138916,
      "learning_rate": 2.5665676087056796e-06,
      "loss": 0.0103,
      "step": 2276240
    },
    {
      "epoch": 3.7251494144524524,
      "grad_norm": 0.41224151849746704,
      "learning_rate": 2.5665017164921623e-06,
      "loss": 0.009,
      "step": 2276260
    },
    {
      "epoch": 3.725182144891106,
      "grad_norm": 0.1755426973104477,
      "learning_rate": 2.566435824278645e-06,
      "loss": 0.0154,
      "step": 2276280
    },
    {
      "epoch": 3.725214875329759,
      "grad_norm": 0.35116827487945557,
      "learning_rate": 2.5663699320651282e-06,
      "loss": 0.0149,
      "step": 2276300
    },
    {
      "epoch": 3.7252476057684127,
      "grad_norm": 0.09615476429462433,
      "learning_rate": 2.566304039851611e-06,
      "loss": 0.0096,
      "step": 2276320
    },
    {
      "epoch": 3.725280336207066,
      "grad_norm": 0.2599233388900757,
      "learning_rate": 2.5662381476380937e-06,
      "loss": 0.0092,
      "step": 2276340
    },
    {
      "epoch": 3.725313066645719,
      "grad_norm": 0.250670850276947,
      "learning_rate": 2.5661722554245765e-06,
      "loss": 0.0132,
      "step": 2276360
    },
    {
      "epoch": 3.7253457970843726,
      "grad_norm": 0.45446112751960754,
      "learning_rate": 2.5661063632110596e-06,
      "loss": 0.009,
      "step": 2276380
    },
    {
      "epoch": 3.725378527523026,
      "grad_norm": 0.27406448125839233,
      "learning_rate": 2.5660404709975424e-06,
      "loss": 0.0112,
      "step": 2276400
    },
    {
      "epoch": 3.725411257961679,
      "grad_norm": 0.23193064332008362,
      "learning_rate": 2.565974578784025e-06,
      "loss": 0.0124,
      "step": 2276420
    },
    {
      "epoch": 3.7254439884003325,
      "grad_norm": 1.1013580560684204,
      "learning_rate": 2.565908686570508e-06,
      "loss": 0.0111,
      "step": 2276440
    },
    {
      "epoch": 3.725476718838986,
      "grad_norm": 0.3386397361755371,
      "learning_rate": 2.5658427943569906e-06,
      "loss": 0.0094,
      "step": 2276460
    },
    {
      "epoch": 3.7255094492776393,
      "grad_norm": 0.16814498603343964,
      "learning_rate": 2.565776902143474e-06,
      "loss": 0.0106,
      "step": 2276480
    },
    {
      "epoch": 3.7255421797162924,
      "grad_norm": 0.5606725215911865,
      "learning_rate": 2.565711009929957e-06,
      "loss": 0.0133,
      "step": 2276500
    },
    {
      "epoch": 3.725574910154946,
      "grad_norm": 0.19019904732704163,
      "learning_rate": 2.56564511771644e-06,
      "loss": 0.0104,
      "step": 2276520
    },
    {
      "epoch": 3.725607640593599,
      "grad_norm": 0.17153725028038025,
      "learning_rate": 2.565579225502923e-06,
      "loss": 0.0084,
      "step": 2276540
    },
    {
      "epoch": 3.7256403710322523,
      "grad_norm": 0.21267524361610413,
      "learning_rate": 2.5655133332894056e-06,
      "loss": 0.0113,
      "step": 2276560
    },
    {
      "epoch": 3.725673101470906,
      "grad_norm": 0.20639196038246155,
      "learning_rate": 2.5654474410758883e-06,
      "loss": 0.01,
      "step": 2276580
    },
    {
      "epoch": 3.7257058319095595,
      "grad_norm": 0.30300313234329224,
      "learning_rate": 2.565381548862371e-06,
      "loss": 0.0191,
      "step": 2276600
    },
    {
      "epoch": 3.7257385623482127,
      "grad_norm": 0.18348248302936554,
      "learning_rate": 2.565315656648854e-06,
      "loss": 0.0107,
      "step": 2276620
    },
    {
      "epoch": 3.725771292786866,
      "grad_norm": 0.10610353946685791,
      "learning_rate": 2.565249764435337e-06,
      "loss": 0.0109,
      "step": 2276640
    },
    {
      "epoch": 3.7258040232255194,
      "grad_norm": 0.3504309058189392,
      "learning_rate": 2.5651838722218197e-06,
      "loss": 0.012,
      "step": 2276660
    },
    {
      "epoch": 3.7258367536641726,
      "grad_norm": 0.4718967378139496,
      "learning_rate": 2.5651179800083024e-06,
      "loss": 0.0067,
      "step": 2276680
    },
    {
      "epoch": 3.7258694841028257,
      "grad_norm": 0.20347219705581665,
      "learning_rate": 2.5650520877947856e-06,
      "loss": 0.0088,
      "step": 2276700
    },
    {
      "epoch": 3.7259022145414793,
      "grad_norm": 0.16576850414276123,
      "learning_rate": 2.5649861955812683e-06,
      "loss": 0.0139,
      "step": 2276720
    },
    {
      "epoch": 3.725934944980133,
      "grad_norm": 0.2584053874015808,
      "learning_rate": 2.564920303367751e-06,
      "loss": 0.0127,
      "step": 2276740
    },
    {
      "epoch": 3.725967675418786,
      "grad_norm": 0.15087628364562988,
      "learning_rate": 2.564854411154234e-06,
      "loss": 0.0118,
      "step": 2276760
    },
    {
      "epoch": 3.726000405857439,
      "grad_norm": 0.16446459293365479,
      "learning_rate": 2.564788518940717e-06,
      "loss": 0.01,
      "step": 2276780
    },
    {
      "epoch": 3.726033136296093,
      "grad_norm": 0.5679804086685181,
      "learning_rate": 2.5647226267271997e-06,
      "loss": 0.0132,
      "step": 2276800
    },
    {
      "epoch": 3.726065866734746,
      "grad_norm": 0.4752262830734253,
      "learning_rate": 2.5646567345136824e-06,
      "loss": 0.0124,
      "step": 2276820
    },
    {
      "epoch": 3.726098597173399,
      "grad_norm": 0.6714679002761841,
      "learning_rate": 2.564590842300166e-06,
      "loss": 0.0093,
      "step": 2276840
    },
    {
      "epoch": 3.7261313276120527,
      "grad_norm": 0.4901946187019348,
      "learning_rate": 2.5645249500866488e-06,
      "loss": 0.0083,
      "step": 2276860
    },
    {
      "epoch": 3.726164058050706,
      "grad_norm": 0.21787549555301666,
      "learning_rate": 2.5644590578731315e-06,
      "loss": 0.0139,
      "step": 2276880
    },
    {
      "epoch": 3.7261967884893594,
      "grad_norm": 0.18958063423633575,
      "learning_rate": 2.5643931656596143e-06,
      "loss": 0.0095,
      "step": 2276900
    },
    {
      "epoch": 3.7262295189280126,
      "grad_norm": 0.5523514747619629,
      "learning_rate": 2.5643272734460974e-06,
      "loss": 0.0134,
      "step": 2276920
    },
    {
      "epoch": 3.726262249366666,
      "grad_norm": 0.1875329613685608,
      "learning_rate": 2.56426138123258e-06,
      "loss": 0.0132,
      "step": 2276940
    },
    {
      "epoch": 3.7262949798053193,
      "grad_norm": 0.890505313873291,
      "learning_rate": 2.564195489019063e-06,
      "loss": 0.0116,
      "step": 2276960
    },
    {
      "epoch": 3.7263277102439725,
      "grad_norm": 0.2954116463661194,
      "learning_rate": 2.5641295968055456e-06,
      "loss": 0.0089,
      "step": 2276980
    },
    {
      "epoch": 3.726360440682626,
      "grad_norm": 0.0865982174873352,
      "learning_rate": 2.5640637045920284e-06,
      "loss": 0.0119,
      "step": 2277000
    },
    {
      "epoch": 3.726393171121279,
      "grad_norm": 0.2004650980234146,
      "learning_rate": 2.5639978123785115e-06,
      "loss": 0.0137,
      "step": 2277020
    },
    {
      "epoch": 3.726425901559933,
      "grad_norm": 0.3716641068458557,
      "learning_rate": 2.5639319201649943e-06,
      "loss": 0.0135,
      "step": 2277040
    },
    {
      "epoch": 3.726458631998586,
      "grad_norm": 0.3815653324127197,
      "learning_rate": 2.563866027951477e-06,
      "loss": 0.0089,
      "step": 2277060
    },
    {
      "epoch": 3.7264913624372396,
      "grad_norm": 0.4733753800392151,
      "learning_rate": 2.5638001357379598e-06,
      "loss": 0.0121,
      "step": 2277080
    },
    {
      "epoch": 3.7265240928758927,
      "grad_norm": 0.3039968013763428,
      "learning_rate": 2.563734243524443e-06,
      "loss": 0.0139,
      "step": 2277100
    },
    {
      "epoch": 3.726556823314546,
      "grad_norm": 0.3748372495174408,
      "learning_rate": 2.5636683513109257e-06,
      "loss": 0.0128,
      "step": 2277120
    },
    {
      "epoch": 3.7265895537531994,
      "grad_norm": 0.44779515266418457,
      "learning_rate": 2.5636024590974084e-06,
      "loss": 0.0105,
      "step": 2277140
    },
    {
      "epoch": 3.7266222841918526,
      "grad_norm": 0.28626686334609985,
      "learning_rate": 2.563536566883891e-06,
      "loss": 0.0103,
      "step": 2277160
    },
    {
      "epoch": 3.726655014630506,
      "grad_norm": 0.6740992665290833,
      "learning_rate": 2.5634706746703743e-06,
      "loss": 0.0103,
      "step": 2277180
    },
    {
      "epoch": 3.7266877450691593,
      "grad_norm": 0.23116935789585114,
      "learning_rate": 2.5634047824568575e-06,
      "loss": 0.0109,
      "step": 2277200
    },
    {
      "epoch": 3.726720475507813,
      "grad_norm": 0.16022558510303497,
      "learning_rate": 2.56333889024334e-06,
      "loss": 0.0094,
      "step": 2277220
    },
    {
      "epoch": 3.726753205946466,
      "grad_norm": 0.11358655989170074,
      "learning_rate": 2.5632729980298234e-06,
      "loss": 0.0076,
      "step": 2277240
    },
    {
      "epoch": 3.7267859363851192,
      "grad_norm": 0.37675321102142334,
      "learning_rate": 2.563207105816306e-06,
      "loss": 0.0116,
      "step": 2277260
    },
    {
      "epoch": 3.726818666823773,
      "grad_norm": 0.1337914615869522,
      "learning_rate": 2.563141213602789e-06,
      "loss": 0.0091,
      "step": 2277280
    },
    {
      "epoch": 3.726851397262426,
      "grad_norm": 0.22763095796108246,
      "learning_rate": 2.5630753213892716e-06,
      "loss": 0.013,
      "step": 2277300
    },
    {
      "epoch": 3.7268841277010796,
      "grad_norm": 0.3851390480995178,
      "learning_rate": 2.5630094291757547e-06,
      "loss": 0.0087,
      "step": 2277320
    },
    {
      "epoch": 3.7269168581397327,
      "grad_norm": 3.266334295272827,
      "learning_rate": 2.5629435369622375e-06,
      "loss": 0.0137,
      "step": 2277340
    },
    {
      "epoch": 3.7269495885783863,
      "grad_norm": 0.27696219086647034,
      "learning_rate": 2.5628776447487202e-06,
      "loss": 0.012,
      "step": 2277360
    },
    {
      "epoch": 3.7269823190170395,
      "grad_norm": 0.11473899334669113,
      "learning_rate": 2.562811752535203e-06,
      "loss": 0.0127,
      "step": 2277380
    },
    {
      "epoch": 3.7270150494556926,
      "grad_norm": 0.2571275532245636,
      "learning_rate": 2.562745860321686e-06,
      "loss": 0.0099,
      "step": 2277400
    },
    {
      "epoch": 3.727047779894346,
      "grad_norm": 0.2511381506919861,
      "learning_rate": 2.562679968108169e-06,
      "loss": 0.0086,
      "step": 2277420
    },
    {
      "epoch": 3.7270805103329994,
      "grad_norm": 0.17538583278656006,
      "learning_rate": 2.5626140758946516e-06,
      "loss": 0.0101,
      "step": 2277440
    },
    {
      "epoch": 3.727113240771653,
      "grad_norm": 0.13739070296287537,
      "learning_rate": 2.5625481836811344e-06,
      "loss": 0.0056,
      "step": 2277460
    },
    {
      "epoch": 3.727145971210306,
      "grad_norm": 0.3958912193775177,
      "learning_rate": 2.562482291467617e-06,
      "loss": 0.0078,
      "step": 2277480
    },
    {
      "epoch": 3.7271787016489597,
      "grad_norm": 0.3197786808013916,
      "learning_rate": 2.5624163992541003e-06,
      "loss": 0.0118,
      "step": 2277500
    },
    {
      "epoch": 3.727211432087613,
      "grad_norm": 0.18753869831562042,
      "learning_rate": 2.562350507040583e-06,
      "loss": 0.0116,
      "step": 2277520
    },
    {
      "epoch": 3.727244162526266,
      "grad_norm": 0.34578293561935425,
      "learning_rate": 2.562284614827066e-06,
      "loss": 0.0112,
      "step": 2277540
    },
    {
      "epoch": 3.7272768929649196,
      "grad_norm": 0.1458093822002411,
      "learning_rate": 2.5622187226135493e-06,
      "loss": 0.0099,
      "step": 2277560
    },
    {
      "epoch": 3.7273096234035727,
      "grad_norm": 0.16837401688098907,
      "learning_rate": 2.562152830400032e-06,
      "loss": 0.0106,
      "step": 2277580
    },
    {
      "epoch": 3.7273423538422263,
      "grad_norm": 0.25982901453971863,
      "learning_rate": 2.562086938186515e-06,
      "loss": 0.0082,
      "step": 2277600
    },
    {
      "epoch": 3.7273750842808795,
      "grad_norm": 0.06064412742853165,
      "learning_rate": 2.5620210459729975e-06,
      "loss": 0.0211,
      "step": 2277620
    },
    {
      "epoch": 3.727407814719533,
      "grad_norm": 0.17710688710212708,
      "learning_rate": 2.5619551537594807e-06,
      "loss": 0.0092,
      "step": 2277640
    },
    {
      "epoch": 3.7274405451581862,
      "grad_norm": 0.21326588094234467,
      "learning_rate": 2.5618892615459634e-06,
      "loss": 0.0153,
      "step": 2277660
    },
    {
      "epoch": 3.7274732755968394,
      "grad_norm": 0.08602079749107361,
      "learning_rate": 2.561823369332446e-06,
      "loss": 0.0112,
      "step": 2277680
    },
    {
      "epoch": 3.727506006035493,
      "grad_norm": 0.7744941711425781,
      "learning_rate": 2.561757477118929e-06,
      "loss": 0.0104,
      "step": 2277700
    },
    {
      "epoch": 3.727538736474146,
      "grad_norm": 0.08808929473161697,
      "learning_rate": 2.561691584905412e-06,
      "loss": 0.0094,
      "step": 2277720
    },
    {
      "epoch": 3.7275714669127997,
      "grad_norm": 0.39109042286872864,
      "learning_rate": 2.561625692691895e-06,
      "loss": 0.0092,
      "step": 2277740
    },
    {
      "epoch": 3.727604197351453,
      "grad_norm": 0.5232645869255066,
      "learning_rate": 2.5615598004783776e-06,
      "loss": 0.0097,
      "step": 2277760
    },
    {
      "epoch": 3.7276369277901065,
      "grad_norm": 0.10254933685064316,
      "learning_rate": 2.5614939082648603e-06,
      "loss": 0.0127,
      "step": 2277780
    },
    {
      "epoch": 3.7276696582287596,
      "grad_norm": 0.41979527473449707,
      "learning_rate": 2.5614280160513435e-06,
      "loss": 0.0127,
      "step": 2277800
    },
    {
      "epoch": 3.7277023886674128,
      "grad_norm": 0.171248659491539,
      "learning_rate": 2.561362123837826e-06,
      "loss": 0.0083,
      "step": 2277820
    },
    {
      "epoch": 3.7277351191060664,
      "grad_norm": 0.12730805575847626,
      "learning_rate": 2.561296231624309e-06,
      "loss": 0.0136,
      "step": 2277840
    },
    {
      "epoch": 3.7277678495447195,
      "grad_norm": 0.26654693484306335,
      "learning_rate": 2.5612303394107917e-06,
      "loss": 0.0081,
      "step": 2277860
    },
    {
      "epoch": 3.7278005799833727,
      "grad_norm": 0.2892504632472992,
      "learning_rate": 2.561164447197275e-06,
      "loss": 0.0081,
      "step": 2277880
    },
    {
      "epoch": 3.7278333104220263,
      "grad_norm": 0.21152012050151825,
      "learning_rate": 2.561098554983758e-06,
      "loss": 0.0098,
      "step": 2277900
    },
    {
      "epoch": 3.72786604086068,
      "grad_norm": 0.5827230215072632,
      "learning_rate": 2.5610326627702408e-06,
      "loss": 0.0093,
      "step": 2277920
    },
    {
      "epoch": 3.727898771299333,
      "grad_norm": 0.22728979587554932,
      "learning_rate": 2.560966770556724e-06,
      "loss": 0.0091,
      "step": 2277940
    },
    {
      "epoch": 3.727931501737986,
      "grad_norm": 0.4905948340892792,
      "learning_rate": 2.5609008783432067e-06,
      "loss": 0.0117,
      "step": 2277960
    },
    {
      "epoch": 3.7279642321766397,
      "grad_norm": 0.3297352194786072,
      "learning_rate": 2.5608349861296894e-06,
      "loss": 0.0087,
      "step": 2277980
    },
    {
      "epoch": 3.727996962615293,
      "grad_norm": 0.17102205753326416,
      "learning_rate": 2.560769093916172e-06,
      "loss": 0.0091,
      "step": 2278000
    },
    {
      "epoch": 3.728029693053946,
      "grad_norm": 0.17563970386981964,
      "learning_rate": 2.560703201702655e-06,
      "loss": 0.0126,
      "step": 2278020
    },
    {
      "epoch": 3.7280624234925996,
      "grad_norm": 0.2912482023239136,
      "learning_rate": 2.560637309489138e-06,
      "loss": 0.0121,
      "step": 2278040
    },
    {
      "epoch": 3.7280951539312532,
      "grad_norm": 0.3283642530441284,
      "learning_rate": 2.5605714172756208e-06,
      "loss": 0.0119,
      "step": 2278060
    },
    {
      "epoch": 3.7281278843699064,
      "grad_norm": 0.210984006524086,
      "learning_rate": 2.5605055250621035e-06,
      "loss": 0.009,
      "step": 2278080
    },
    {
      "epoch": 3.7281606148085595,
      "grad_norm": 0.5853256583213806,
      "learning_rate": 2.5604396328485863e-06,
      "loss": 0.01,
      "step": 2278100
    },
    {
      "epoch": 3.728193345247213,
      "grad_norm": 0.23790878057479858,
      "learning_rate": 2.5603737406350694e-06,
      "loss": 0.0157,
      "step": 2278120
    },
    {
      "epoch": 3.7282260756858663,
      "grad_norm": 0.19541488587856293,
      "learning_rate": 2.560307848421552e-06,
      "loss": 0.0156,
      "step": 2278140
    },
    {
      "epoch": 3.7282588061245194,
      "grad_norm": 0.43799635767936707,
      "learning_rate": 2.560241956208035e-06,
      "loss": 0.0079,
      "step": 2278160
    },
    {
      "epoch": 3.728291536563173,
      "grad_norm": 0.36987578868865967,
      "learning_rate": 2.5601760639945176e-06,
      "loss": 0.0171,
      "step": 2278180
    },
    {
      "epoch": 3.7283242670018266,
      "grad_norm": 0.14031657576560974,
      "learning_rate": 2.560110171781001e-06,
      "loss": 0.0074,
      "step": 2278200
    },
    {
      "epoch": 3.7283569974404798,
      "grad_norm": 0.21686510741710663,
      "learning_rate": 2.5600442795674835e-06,
      "loss": 0.0124,
      "step": 2278220
    },
    {
      "epoch": 3.728389727879133,
      "grad_norm": 0.11618095636367798,
      "learning_rate": 2.5599783873539667e-06,
      "loss": 0.008,
      "step": 2278240
    },
    {
      "epoch": 3.7284224583177865,
      "grad_norm": 0.29190486669540405,
      "learning_rate": 2.55991249514045e-06,
      "loss": 0.0088,
      "step": 2278260
    },
    {
      "epoch": 3.7284551887564397,
      "grad_norm": 0.19904294610023499,
      "learning_rate": 2.5598466029269326e-06,
      "loss": 0.0085,
      "step": 2278280
    },
    {
      "epoch": 3.728487919195093,
      "grad_norm": 0.5695692300796509,
      "learning_rate": 2.5597807107134154e-06,
      "loss": 0.0098,
      "step": 2278300
    },
    {
      "epoch": 3.7285206496337464,
      "grad_norm": 0.10556867718696594,
      "learning_rate": 2.559714818499898e-06,
      "loss": 0.0144,
      "step": 2278320
    },
    {
      "epoch": 3.7285533800724,
      "grad_norm": 0.31845009326934814,
      "learning_rate": 2.5596489262863813e-06,
      "loss": 0.0147,
      "step": 2278340
    },
    {
      "epoch": 3.728586110511053,
      "grad_norm": 0.07323171198368073,
      "learning_rate": 2.559583034072864e-06,
      "loss": 0.0105,
      "step": 2278360
    },
    {
      "epoch": 3.7286188409497063,
      "grad_norm": 0.36936840415000916,
      "learning_rate": 2.5595171418593467e-06,
      "loss": 0.0162,
      "step": 2278380
    },
    {
      "epoch": 3.72865157138836,
      "grad_norm": 0.1893996000289917,
      "learning_rate": 2.5594512496458295e-06,
      "loss": 0.0134,
      "step": 2278400
    },
    {
      "epoch": 3.728684301827013,
      "grad_norm": 0.2384098321199417,
      "learning_rate": 2.5593853574323122e-06,
      "loss": 0.0135,
      "step": 2278420
    },
    {
      "epoch": 3.728717032265666,
      "grad_norm": 0.34176522493362427,
      "learning_rate": 2.5593194652187954e-06,
      "loss": 0.0089,
      "step": 2278440
    },
    {
      "epoch": 3.72874976270432,
      "grad_norm": 0.40674516558647156,
      "learning_rate": 2.559253573005278e-06,
      "loss": 0.011,
      "step": 2278460
    },
    {
      "epoch": 3.728782493142973,
      "grad_norm": 0.3923887014389038,
      "learning_rate": 2.559187680791761e-06,
      "loss": 0.0103,
      "step": 2278480
    },
    {
      "epoch": 3.7288152235816265,
      "grad_norm": 0.21471139788627625,
      "learning_rate": 2.5591217885782436e-06,
      "loss": 0.019,
      "step": 2278500
    },
    {
      "epoch": 3.7288479540202797,
      "grad_norm": 0.17781102657318115,
      "learning_rate": 2.5590558963647268e-06,
      "loss": 0.0091,
      "step": 2278520
    },
    {
      "epoch": 3.7288806844589333,
      "grad_norm": 0.3836681842803955,
      "learning_rate": 2.5589900041512095e-06,
      "loss": 0.0116,
      "step": 2278540
    },
    {
      "epoch": 3.7289134148975864,
      "grad_norm": 0.42287173867225647,
      "learning_rate": 2.5589241119376922e-06,
      "loss": 0.0119,
      "step": 2278560
    },
    {
      "epoch": 3.7289461453362396,
      "grad_norm": 0.12405064702033997,
      "learning_rate": 2.558858219724175e-06,
      "loss": 0.0121,
      "step": 2278580
    },
    {
      "epoch": 3.728978875774893,
      "grad_norm": 0.20792262256145477,
      "learning_rate": 2.5587923275106586e-06,
      "loss": 0.0107,
      "step": 2278600
    },
    {
      "epoch": 3.7290116062135463,
      "grad_norm": 0.1488075852394104,
      "learning_rate": 2.5587264352971413e-06,
      "loss": 0.0125,
      "step": 2278620
    },
    {
      "epoch": 3.7290443366522,
      "grad_norm": 0.3878125846385956,
      "learning_rate": 2.558660543083624e-06,
      "loss": 0.0078,
      "step": 2278640
    },
    {
      "epoch": 3.729077067090853,
      "grad_norm": 0.07892715185880661,
      "learning_rate": 2.558594650870107e-06,
      "loss": 0.0099,
      "step": 2278660
    },
    {
      "epoch": 3.7291097975295067,
      "grad_norm": 0.14568941295146942,
      "learning_rate": 2.55852875865659e-06,
      "loss": 0.0069,
      "step": 2278680
    },
    {
      "epoch": 3.72914252796816,
      "grad_norm": 0.07380242645740509,
      "learning_rate": 2.5584628664430727e-06,
      "loss": 0.0135,
      "step": 2278700
    },
    {
      "epoch": 3.729175258406813,
      "grad_norm": 0.05496755242347717,
      "learning_rate": 2.5583969742295554e-06,
      "loss": 0.0117,
      "step": 2278720
    },
    {
      "epoch": 3.7292079888454666,
      "grad_norm": 0.15185831487178802,
      "learning_rate": 2.5583310820160386e-06,
      "loss": 0.0107,
      "step": 2278740
    },
    {
      "epoch": 3.7292407192841197,
      "grad_norm": 0.21419046819210052,
      "learning_rate": 2.5582651898025213e-06,
      "loss": 0.0149,
      "step": 2278760
    },
    {
      "epoch": 3.7292734497227733,
      "grad_norm": 0.2044050395488739,
      "learning_rate": 2.558199297589004e-06,
      "loss": 0.0103,
      "step": 2278780
    },
    {
      "epoch": 3.7293061801614265,
      "grad_norm": 0.12474203109741211,
      "learning_rate": 2.558133405375487e-06,
      "loss": 0.0153,
      "step": 2278800
    },
    {
      "epoch": 3.72933891060008,
      "grad_norm": 0.1959972381591797,
      "learning_rate": 2.55806751316197e-06,
      "loss": 0.0078,
      "step": 2278820
    },
    {
      "epoch": 3.729371641038733,
      "grad_norm": 0.17406906187534332,
      "learning_rate": 2.5580016209484527e-06,
      "loss": 0.0155,
      "step": 2278840
    },
    {
      "epoch": 3.7294043714773863,
      "grad_norm": 0.12539611756801605,
      "learning_rate": 2.5579357287349355e-06,
      "loss": 0.0066,
      "step": 2278860
    },
    {
      "epoch": 3.72943710191604,
      "grad_norm": 0.11019698530435562,
      "learning_rate": 2.557869836521418e-06,
      "loss": 0.0081,
      "step": 2278880
    },
    {
      "epoch": 3.729469832354693,
      "grad_norm": 0.1566462367773056,
      "learning_rate": 2.557803944307901e-06,
      "loss": 0.0099,
      "step": 2278900
    },
    {
      "epoch": 3.7295025627933467,
      "grad_norm": 0.3384717106819153,
      "learning_rate": 2.557738052094384e-06,
      "loss": 0.0112,
      "step": 2278920
    },
    {
      "epoch": 3.729535293232,
      "grad_norm": 0.2595256567001343,
      "learning_rate": 2.557672159880867e-06,
      "loss": 0.0104,
      "step": 2278940
    },
    {
      "epoch": 3.7295680236706534,
      "grad_norm": 0.43468788266181946,
      "learning_rate": 2.55760626766735e-06,
      "loss": 0.0107,
      "step": 2278960
    },
    {
      "epoch": 3.7296007541093066,
      "grad_norm": 0.24474456906318665,
      "learning_rate": 2.557540375453833e-06,
      "loss": 0.008,
      "step": 2278980
    },
    {
      "epoch": 3.7296334845479597,
      "grad_norm": 0.5303102731704712,
      "learning_rate": 2.557474483240316e-06,
      "loss": 0.0119,
      "step": 2279000
    },
    {
      "epoch": 3.7296662149866133,
      "grad_norm": 0.20976336300373077,
      "learning_rate": 2.5574085910267986e-06,
      "loss": 0.0108,
      "step": 2279020
    },
    {
      "epoch": 3.7296989454252665,
      "grad_norm": 0.22317150235176086,
      "learning_rate": 2.5573426988132814e-06,
      "loss": 0.0124,
      "step": 2279040
    },
    {
      "epoch": 3.72973167586392,
      "grad_norm": 0.2066268026828766,
      "learning_rate": 2.5572768065997645e-06,
      "loss": 0.0068,
      "step": 2279060
    },
    {
      "epoch": 3.729764406302573,
      "grad_norm": 0.3115125596523285,
      "learning_rate": 2.5572109143862473e-06,
      "loss": 0.0063,
      "step": 2279080
    },
    {
      "epoch": 3.729797136741227,
      "grad_norm": 0.24334494769573212,
      "learning_rate": 2.55714502217273e-06,
      "loss": 0.007,
      "step": 2279100
    },
    {
      "epoch": 3.72982986717988,
      "grad_norm": 0.44526368379592896,
      "learning_rate": 2.5570791299592128e-06,
      "loss": 0.0118,
      "step": 2279120
    },
    {
      "epoch": 3.729862597618533,
      "grad_norm": 0.3110673129558563,
      "learning_rate": 2.557013237745696e-06,
      "loss": 0.0149,
      "step": 2279140
    },
    {
      "epoch": 3.7298953280571867,
      "grad_norm": 0.32224249839782715,
      "learning_rate": 2.5569473455321787e-06,
      "loss": 0.0155,
      "step": 2279160
    },
    {
      "epoch": 3.72992805849584,
      "grad_norm": 0.21490037441253662,
      "learning_rate": 2.5568814533186614e-06,
      "loss": 0.0097,
      "step": 2279180
    },
    {
      "epoch": 3.7299607889344935,
      "grad_norm": 0.16034749150276184,
      "learning_rate": 2.556815561105144e-06,
      "loss": 0.014,
      "step": 2279200
    },
    {
      "epoch": 3.7299935193731466,
      "grad_norm": 0.1627177596092224,
      "learning_rate": 2.5567496688916273e-06,
      "loss": 0.0108,
      "step": 2279220
    },
    {
      "epoch": 3.7300262498118,
      "grad_norm": 0.31029292941093445,
      "learning_rate": 2.55668377667811e-06,
      "loss": 0.0171,
      "step": 2279240
    },
    {
      "epoch": 3.7300589802504533,
      "grad_norm": 0.6914983987808228,
      "learning_rate": 2.5566178844645928e-06,
      "loss": 0.0124,
      "step": 2279260
    },
    {
      "epoch": 3.7300917106891065,
      "grad_norm": 0.45099934935569763,
      "learning_rate": 2.5565519922510755e-06,
      "loss": 0.011,
      "step": 2279280
    },
    {
      "epoch": 3.73012444112776,
      "grad_norm": 0.24073848128318787,
      "learning_rate": 2.556486100037559e-06,
      "loss": 0.0139,
      "step": 2279300
    },
    {
      "epoch": 3.7301571715664132,
      "grad_norm": 0.2826625108718872,
      "learning_rate": 2.556420207824042e-06,
      "loss": 0.0076,
      "step": 2279320
    },
    {
      "epoch": 3.730189902005067,
      "grad_norm": 0.1640256941318512,
      "learning_rate": 2.5563543156105246e-06,
      "loss": 0.0097,
      "step": 2279340
    },
    {
      "epoch": 3.73022263244372,
      "grad_norm": 0.4638881981372833,
      "learning_rate": 2.5562884233970078e-06,
      "loss": 0.01,
      "step": 2279360
    },
    {
      "epoch": 3.7302553628823736,
      "grad_norm": 0.7580717206001282,
      "learning_rate": 2.5562225311834905e-06,
      "loss": 0.009,
      "step": 2279380
    },
    {
      "epoch": 3.7302880933210267,
      "grad_norm": 0.04486517980694771,
      "learning_rate": 2.5561566389699732e-06,
      "loss": 0.0122,
      "step": 2279400
    },
    {
      "epoch": 3.73032082375968,
      "grad_norm": 0.11143166571855545,
      "learning_rate": 2.556090746756456e-06,
      "loss": 0.0131,
      "step": 2279420
    },
    {
      "epoch": 3.7303535541983335,
      "grad_norm": 0.3134457468986511,
      "learning_rate": 2.5560248545429387e-06,
      "loss": 0.0079,
      "step": 2279440
    },
    {
      "epoch": 3.7303862846369866,
      "grad_norm": 0.23726919293403625,
      "learning_rate": 2.555958962329422e-06,
      "loss": 0.0178,
      "step": 2279460
    },
    {
      "epoch": 3.7304190150756398,
      "grad_norm": 0.2293175607919693,
      "learning_rate": 2.5558930701159046e-06,
      "loss": 0.0106,
      "step": 2279480
    },
    {
      "epoch": 3.7304517455142934,
      "grad_norm": 0.39136984944343567,
      "learning_rate": 2.5558271779023874e-06,
      "loss": 0.0092,
      "step": 2279500
    },
    {
      "epoch": 3.730484475952947,
      "grad_norm": 0.28396376967430115,
      "learning_rate": 2.55576128568887e-06,
      "loss": 0.0101,
      "step": 2279520
    },
    {
      "epoch": 3.7305172063916,
      "grad_norm": 0.09954795241355896,
      "learning_rate": 2.5556953934753533e-06,
      "loss": 0.0067,
      "step": 2279540
    },
    {
      "epoch": 3.7305499368302533,
      "grad_norm": 0.5338327288627625,
      "learning_rate": 2.555629501261836e-06,
      "loss": 0.0146,
      "step": 2279560
    },
    {
      "epoch": 3.730582667268907,
      "grad_norm": 0.16317248344421387,
      "learning_rate": 2.5555636090483187e-06,
      "loss": 0.0102,
      "step": 2279580
    },
    {
      "epoch": 3.73061539770756,
      "grad_norm": 0.21769626438617706,
      "learning_rate": 2.5554977168348015e-06,
      "loss": 0.007,
      "step": 2279600
    },
    {
      "epoch": 3.730648128146213,
      "grad_norm": 0.6095866560935974,
      "learning_rate": 2.5554318246212846e-06,
      "loss": 0.0144,
      "step": 2279620
    },
    {
      "epoch": 3.7306808585848668,
      "grad_norm": 0.4652564823627472,
      "learning_rate": 2.5553659324077674e-06,
      "loss": 0.014,
      "step": 2279640
    },
    {
      "epoch": 3.7307135890235203,
      "grad_norm": 0.16686053574085236,
      "learning_rate": 2.5553000401942505e-06,
      "loss": 0.0139,
      "step": 2279660
    },
    {
      "epoch": 3.7307463194621735,
      "grad_norm": 0.07861553877592087,
      "learning_rate": 2.5552341479807337e-06,
      "loss": 0.0168,
      "step": 2279680
    },
    {
      "epoch": 3.7307790499008266,
      "grad_norm": 0.07430783659219742,
      "learning_rate": 2.5551682557672164e-06,
      "loss": 0.0085,
      "step": 2279700
    },
    {
      "epoch": 3.7308117803394802,
      "grad_norm": 0.5504649877548218,
      "learning_rate": 2.555102363553699e-06,
      "loss": 0.0108,
      "step": 2279720
    },
    {
      "epoch": 3.7308445107781334,
      "grad_norm": 0.32680103182792664,
      "learning_rate": 2.555036471340182e-06,
      "loss": 0.0121,
      "step": 2279740
    },
    {
      "epoch": 3.7308772412167865,
      "grad_norm": 0.3676842153072357,
      "learning_rate": 2.554970579126665e-06,
      "loss": 0.0126,
      "step": 2279760
    },
    {
      "epoch": 3.73090997165544,
      "grad_norm": 0.33260875940322876,
      "learning_rate": 2.554904686913148e-06,
      "loss": 0.0161,
      "step": 2279780
    },
    {
      "epoch": 3.7309427020940937,
      "grad_norm": 0.16146793961524963,
      "learning_rate": 2.5548387946996306e-06,
      "loss": 0.009,
      "step": 2279800
    },
    {
      "epoch": 3.730975432532747,
      "grad_norm": 0.1702975332736969,
      "learning_rate": 2.5547729024861133e-06,
      "loss": 0.0064,
      "step": 2279820
    },
    {
      "epoch": 3.7310081629714,
      "grad_norm": 0.4120480716228485,
      "learning_rate": 2.5547070102725965e-06,
      "loss": 0.0162,
      "step": 2279840
    },
    {
      "epoch": 3.7310408934100536,
      "grad_norm": 0.1675591617822647,
      "learning_rate": 2.5546411180590792e-06,
      "loss": 0.0131,
      "step": 2279860
    },
    {
      "epoch": 3.7310736238487068,
      "grad_norm": 0.577576756477356,
      "learning_rate": 2.554575225845562e-06,
      "loss": 0.0074,
      "step": 2279880
    },
    {
      "epoch": 3.73110635428736,
      "grad_norm": 0.5377054214477539,
      "learning_rate": 2.5545093336320447e-06,
      "loss": 0.0113,
      "step": 2279900
    },
    {
      "epoch": 3.7311390847260135,
      "grad_norm": 0.30121973156929016,
      "learning_rate": 2.5544434414185274e-06,
      "loss": 0.0116,
      "step": 2279920
    },
    {
      "epoch": 3.7311718151646667,
      "grad_norm": 0.2422085702419281,
      "learning_rate": 2.5543775492050106e-06,
      "loss": 0.0106,
      "step": 2279940
    },
    {
      "epoch": 3.7312045456033203,
      "grad_norm": 0.25258776545524597,
      "learning_rate": 2.5543116569914933e-06,
      "loss": 0.013,
      "step": 2279960
    },
    {
      "epoch": 3.7312372760419734,
      "grad_norm": 0.3064478933811188,
      "learning_rate": 2.554245764777976e-06,
      "loss": 0.011,
      "step": 2279980
    },
    {
      "epoch": 3.731270006480627,
      "grad_norm": 0.07051438093185425,
      "learning_rate": 2.5541798725644597e-06,
      "loss": 0.0094,
      "step": 2280000
    },
    {
      "epoch": 3.73130273691928,
      "grad_norm": 0.2828691303730011,
      "learning_rate": 2.5541139803509424e-06,
      "loss": 0.0091,
      "step": 2280020
    },
    {
      "epoch": 3.7313354673579333,
      "grad_norm": 0.25620636343955994,
      "learning_rate": 2.554048088137425e-06,
      "loss": 0.0103,
      "step": 2280040
    },
    {
      "epoch": 3.731368197796587,
      "grad_norm": 0.1392354965209961,
      "learning_rate": 2.553982195923908e-06,
      "loss": 0.0112,
      "step": 2280060
    },
    {
      "epoch": 3.73140092823524,
      "grad_norm": 0.33023738861083984,
      "learning_rate": 2.553916303710391e-06,
      "loss": 0.0092,
      "step": 2280080
    },
    {
      "epoch": 3.7314336586738936,
      "grad_norm": 0.19534435868263245,
      "learning_rate": 2.5538504114968738e-06,
      "loss": 0.0105,
      "step": 2280100
    },
    {
      "epoch": 3.731466389112547,
      "grad_norm": 0.30545949935913086,
      "learning_rate": 2.5537845192833565e-06,
      "loss": 0.012,
      "step": 2280120
    },
    {
      "epoch": 3.7314991195512004,
      "grad_norm": 0.21496577560901642,
      "learning_rate": 2.5537186270698393e-06,
      "loss": 0.0107,
      "step": 2280140
    },
    {
      "epoch": 3.7315318499898535,
      "grad_norm": 0.20650999248027802,
      "learning_rate": 2.5536527348563224e-06,
      "loss": 0.01,
      "step": 2280160
    },
    {
      "epoch": 3.7315645804285067,
      "grad_norm": 0.13493897020816803,
      "learning_rate": 2.553586842642805e-06,
      "loss": 0.0124,
      "step": 2280180
    },
    {
      "epoch": 3.7315973108671603,
      "grad_norm": 0.41117578744888306,
      "learning_rate": 2.553520950429288e-06,
      "loss": 0.0083,
      "step": 2280200
    },
    {
      "epoch": 3.7316300413058134,
      "grad_norm": 0.17202644050121307,
      "learning_rate": 2.5534550582157706e-06,
      "loss": 0.0071,
      "step": 2280220
    },
    {
      "epoch": 3.731662771744467,
      "grad_norm": 0.345364511013031,
      "learning_rate": 2.553389166002254e-06,
      "loss": 0.01,
      "step": 2280240
    },
    {
      "epoch": 3.73169550218312,
      "grad_norm": 0.3555116355419159,
      "learning_rate": 2.5533232737887366e-06,
      "loss": 0.0089,
      "step": 2280260
    },
    {
      "epoch": 3.7317282326217738,
      "grad_norm": 0.17867818474769592,
      "learning_rate": 2.5532573815752193e-06,
      "loss": 0.0103,
      "step": 2280280
    },
    {
      "epoch": 3.731760963060427,
      "grad_norm": 0.1782136708498001,
      "learning_rate": 2.553191489361702e-06,
      "loss": 0.0137,
      "step": 2280300
    },
    {
      "epoch": 3.73179369349908,
      "grad_norm": 0.42103132605552673,
      "learning_rate": 2.5531255971481848e-06,
      "loss": 0.0137,
      "step": 2280320
    },
    {
      "epoch": 3.7318264239377337,
      "grad_norm": 0.8801684975624084,
      "learning_rate": 2.553059704934668e-06,
      "loss": 0.0134,
      "step": 2280340
    },
    {
      "epoch": 3.731859154376387,
      "grad_norm": 0.6401221752166748,
      "learning_rate": 2.552993812721151e-06,
      "loss": 0.0077,
      "step": 2280360
    },
    {
      "epoch": 3.7318918848150404,
      "grad_norm": 0.12602850794792175,
      "learning_rate": 2.5529279205076343e-06,
      "loss": 0.0081,
      "step": 2280380
    },
    {
      "epoch": 3.7319246152536936,
      "grad_norm": 0.2673332393169403,
      "learning_rate": 2.552862028294117e-06,
      "loss": 0.0085,
      "step": 2280400
    },
    {
      "epoch": 3.731957345692347,
      "grad_norm": 0.2313159555196762,
      "learning_rate": 2.5527961360805997e-06,
      "loss": 0.0142,
      "step": 2280420
    },
    {
      "epoch": 3.7319900761310003,
      "grad_norm": 0.3656105399131775,
      "learning_rate": 2.5527302438670825e-06,
      "loss": 0.0115,
      "step": 2280440
    },
    {
      "epoch": 3.7320228065696535,
      "grad_norm": 0.16263197362422943,
      "learning_rate": 2.5526643516535652e-06,
      "loss": 0.0124,
      "step": 2280460
    },
    {
      "epoch": 3.732055537008307,
      "grad_norm": 0.18456365168094635,
      "learning_rate": 2.5525984594400484e-06,
      "loss": 0.009,
      "step": 2280480
    },
    {
      "epoch": 3.73208826744696,
      "grad_norm": 0.5408287644386292,
      "learning_rate": 2.552532567226531e-06,
      "loss": 0.0153,
      "step": 2280500
    },
    {
      "epoch": 3.732120997885614,
      "grad_norm": 0.15409040451049805,
      "learning_rate": 2.552466675013014e-06,
      "loss": 0.0097,
      "step": 2280520
    },
    {
      "epoch": 3.732153728324267,
      "grad_norm": 0.25987547636032104,
      "learning_rate": 2.5524007827994966e-06,
      "loss": 0.0088,
      "step": 2280540
    },
    {
      "epoch": 3.7321864587629205,
      "grad_norm": 0.5381078720092773,
      "learning_rate": 2.5523348905859798e-06,
      "loss": 0.0137,
      "step": 2280560
    },
    {
      "epoch": 3.7322191892015737,
      "grad_norm": 0.36223357915878296,
      "learning_rate": 2.5522689983724625e-06,
      "loss": 0.0113,
      "step": 2280580
    },
    {
      "epoch": 3.732251919640227,
      "grad_norm": 0.20677445828914642,
      "learning_rate": 2.5522031061589452e-06,
      "loss": 0.0127,
      "step": 2280600
    },
    {
      "epoch": 3.7322846500788804,
      "grad_norm": 0.18483510613441467,
      "learning_rate": 2.552137213945428e-06,
      "loss": 0.0104,
      "step": 2280620
    },
    {
      "epoch": 3.7323173805175336,
      "grad_norm": 0.23208749294281006,
      "learning_rate": 2.552071321731911e-06,
      "loss": 0.0101,
      "step": 2280640
    },
    {
      "epoch": 3.732350110956187,
      "grad_norm": 0.26720762252807617,
      "learning_rate": 2.552005429518394e-06,
      "loss": 0.0214,
      "step": 2280660
    },
    {
      "epoch": 3.7323828413948403,
      "grad_norm": 0.9199648499488831,
      "learning_rate": 2.5519395373048766e-06,
      "loss": 0.0099,
      "step": 2280680
    },
    {
      "epoch": 3.732415571833494,
      "grad_norm": 0.16787220537662506,
      "learning_rate": 2.5518736450913594e-06,
      "loss": 0.0098,
      "step": 2280700
    },
    {
      "epoch": 3.732448302272147,
      "grad_norm": 0.09450461715459824,
      "learning_rate": 2.551807752877843e-06,
      "loss": 0.0115,
      "step": 2280720
    },
    {
      "epoch": 3.7324810327108002,
      "grad_norm": 0.0882968083024025,
      "learning_rate": 2.5517418606643257e-06,
      "loss": 0.0136,
      "step": 2280740
    },
    {
      "epoch": 3.732513763149454,
      "grad_norm": 0.3653767704963684,
      "learning_rate": 2.5516759684508084e-06,
      "loss": 0.0127,
      "step": 2280760
    },
    {
      "epoch": 3.732546493588107,
      "grad_norm": 0.4573148488998413,
      "learning_rate": 2.5516100762372916e-06,
      "loss": 0.0169,
      "step": 2280780
    },
    {
      "epoch": 3.7325792240267606,
      "grad_norm": 0.5203593969345093,
      "learning_rate": 2.5515441840237743e-06,
      "loss": 0.0125,
      "step": 2280800
    },
    {
      "epoch": 3.7326119544654137,
      "grad_norm": 0.24947459995746613,
      "learning_rate": 2.551478291810257e-06,
      "loss": 0.0085,
      "step": 2280820
    },
    {
      "epoch": 3.7326446849040673,
      "grad_norm": 0.5892741084098816,
      "learning_rate": 2.55141239959674e-06,
      "loss": 0.0105,
      "step": 2280840
    },
    {
      "epoch": 3.7326774153427205,
      "grad_norm": 0.3761195242404938,
      "learning_rate": 2.5513465073832226e-06,
      "loss": 0.0117,
      "step": 2280860
    },
    {
      "epoch": 3.7327101457813736,
      "grad_norm": 0.4073543846607208,
      "learning_rate": 2.5512806151697057e-06,
      "loss": 0.0097,
      "step": 2280880
    },
    {
      "epoch": 3.732742876220027,
      "grad_norm": 0.07338027656078339,
      "learning_rate": 2.5512147229561885e-06,
      "loss": 0.0097,
      "step": 2280900
    },
    {
      "epoch": 3.7327756066586804,
      "grad_norm": 0.38331884145736694,
      "learning_rate": 2.551148830742671e-06,
      "loss": 0.0148,
      "step": 2280920
    },
    {
      "epoch": 3.7328083370973335,
      "grad_norm": 0.3226320743560791,
      "learning_rate": 2.551082938529154e-06,
      "loss": 0.0085,
      "step": 2280940
    },
    {
      "epoch": 3.732841067535987,
      "grad_norm": 0.3768411874771118,
      "learning_rate": 2.551017046315637e-06,
      "loss": 0.0096,
      "step": 2280960
    },
    {
      "epoch": 3.7328737979746407,
      "grad_norm": 0.4535074532032013,
      "learning_rate": 2.55095115410212e-06,
      "loss": 0.0134,
      "step": 2280980
    },
    {
      "epoch": 3.732906528413294,
      "grad_norm": 0.2808960974216461,
      "learning_rate": 2.5508852618886026e-06,
      "loss": 0.0083,
      "step": 2281000
    },
    {
      "epoch": 3.732939258851947,
      "grad_norm": 0.32834169268608093,
      "learning_rate": 2.5508193696750853e-06,
      "loss": 0.0123,
      "step": 2281020
    },
    {
      "epoch": 3.7329719892906006,
      "grad_norm": 0.08427953720092773,
      "learning_rate": 2.5507534774615685e-06,
      "loss": 0.0125,
      "step": 2281040
    },
    {
      "epoch": 3.7330047197292537,
      "grad_norm": 0.24338188767433167,
      "learning_rate": 2.5506875852480516e-06,
      "loss": 0.0105,
      "step": 2281060
    },
    {
      "epoch": 3.733037450167907,
      "grad_norm": 0.5928859710693359,
      "learning_rate": 2.5506216930345344e-06,
      "loss": 0.0177,
      "step": 2281080
    },
    {
      "epoch": 3.7330701806065605,
      "grad_norm": 0.12060356140136719,
      "learning_rate": 2.5505558008210175e-06,
      "loss": 0.0065,
      "step": 2281100
    },
    {
      "epoch": 3.733102911045214,
      "grad_norm": 0.39338165521621704,
      "learning_rate": 2.5504899086075003e-06,
      "loss": 0.013,
      "step": 2281120
    },
    {
      "epoch": 3.7331356414838672,
      "grad_norm": 0.6625385284423828,
      "learning_rate": 2.550424016393983e-06,
      "loss": 0.0116,
      "step": 2281140
    },
    {
      "epoch": 3.7331683719225204,
      "grad_norm": 0.11587974429130554,
      "learning_rate": 2.5503581241804658e-06,
      "loss": 0.0114,
      "step": 2281160
    },
    {
      "epoch": 3.733201102361174,
      "grad_norm": 0.1669296771287918,
      "learning_rate": 2.550292231966949e-06,
      "loss": 0.013,
      "step": 2281180
    },
    {
      "epoch": 3.733233832799827,
      "grad_norm": 0.26623979210853577,
      "learning_rate": 2.5502263397534317e-06,
      "loss": 0.0084,
      "step": 2281200
    },
    {
      "epoch": 3.7332665632384803,
      "grad_norm": 0.39923080801963806,
      "learning_rate": 2.5501604475399144e-06,
      "loss": 0.0128,
      "step": 2281220
    },
    {
      "epoch": 3.733299293677134,
      "grad_norm": 1.649359941482544,
      "learning_rate": 2.550094555326397e-06,
      "loss": 0.0125,
      "step": 2281240
    },
    {
      "epoch": 3.7333320241157875,
      "grad_norm": 0.11018072068691254,
      "learning_rate": 2.5500286631128803e-06,
      "loss": 0.0097,
      "step": 2281260
    },
    {
      "epoch": 3.7333647545544406,
      "grad_norm": 0.18312940001487732,
      "learning_rate": 2.549962770899363e-06,
      "loss": 0.0105,
      "step": 2281280
    },
    {
      "epoch": 3.7333974849930938,
      "grad_norm": 0.897990882396698,
      "learning_rate": 2.549896878685846e-06,
      "loss": 0.0113,
      "step": 2281300
    },
    {
      "epoch": 3.7334302154317474,
      "grad_norm": 0.06134247034788132,
      "learning_rate": 2.5498309864723285e-06,
      "loss": 0.0103,
      "step": 2281320
    },
    {
      "epoch": 3.7334629458704005,
      "grad_norm": 0.19409604370594025,
      "learning_rate": 2.5497650942588113e-06,
      "loss": 0.0109,
      "step": 2281340
    },
    {
      "epoch": 3.7334956763090537,
      "grad_norm": 0.13847708702087402,
      "learning_rate": 2.5496992020452944e-06,
      "loss": 0.0158,
      "step": 2281360
    },
    {
      "epoch": 3.7335284067477073,
      "grad_norm": 0.0991906151175499,
      "learning_rate": 2.549633309831777e-06,
      "loss": 0.0111,
      "step": 2281380
    },
    {
      "epoch": 3.733561137186361,
      "grad_norm": 0.13756820559501648,
      "learning_rate": 2.54956741761826e-06,
      "loss": 0.0092,
      "step": 2281400
    },
    {
      "epoch": 3.733593867625014,
      "grad_norm": 2.3291568756103516,
      "learning_rate": 2.5495015254047435e-06,
      "loss": 0.012,
      "step": 2281420
    },
    {
      "epoch": 3.733626598063667,
      "grad_norm": 0.0998539850115776,
      "learning_rate": 2.5494356331912262e-06,
      "loss": 0.0081,
      "step": 2281440
    },
    {
      "epoch": 3.7336593285023207,
      "grad_norm": 2.2146029472351074,
      "learning_rate": 2.549369740977709e-06,
      "loss": 0.0129,
      "step": 2281460
    },
    {
      "epoch": 3.733692058940974,
      "grad_norm": 0.2334032505750656,
      "learning_rate": 2.5493038487641917e-06,
      "loss": 0.0104,
      "step": 2281480
    },
    {
      "epoch": 3.733724789379627,
      "grad_norm": 0.5310318470001221,
      "learning_rate": 2.549237956550675e-06,
      "loss": 0.0117,
      "step": 2281500
    },
    {
      "epoch": 3.7337575198182806,
      "grad_norm": 0.08824922889471054,
      "learning_rate": 2.5491720643371576e-06,
      "loss": 0.0071,
      "step": 2281520
    },
    {
      "epoch": 3.733790250256934,
      "grad_norm": 0.3386088013648987,
      "learning_rate": 2.5491061721236404e-06,
      "loss": 0.0115,
      "step": 2281540
    },
    {
      "epoch": 3.7338229806955874,
      "grad_norm": 0.3977835476398468,
      "learning_rate": 2.549040279910123e-06,
      "loss": 0.0088,
      "step": 2281560
    },
    {
      "epoch": 3.7338557111342405,
      "grad_norm": 0.8024914860725403,
      "learning_rate": 2.5489743876966063e-06,
      "loss": 0.012,
      "step": 2281580
    },
    {
      "epoch": 3.733888441572894,
      "grad_norm": 0.12560200691223145,
      "learning_rate": 2.548908495483089e-06,
      "loss": 0.01,
      "step": 2281600
    },
    {
      "epoch": 3.7339211720115473,
      "grad_norm": 0.12225686758756638,
      "learning_rate": 2.5488426032695717e-06,
      "loss": 0.0094,
      "step": 2281620
    },
    {
      "epoch": 3.7339539024502004,
      "grad_norm": 0.3519367277622223,
      "learning_rate": 2.5487767110560545e-06,
      "loss": 0.0118,
      "step": 2281640
    },
    {
      "epoch": 3.733986632888854,
      "grad_norm": 0.219045951962471,
      "learning_rate": 2.5487108188425377e-06,
      "loss": 0.0068,
      "step": 2281660
    },
    {
      "epoch": 3.734019363327507,
      "grad_norm": 0.1907198280096054,
      "learning_rate": 2.5486449266290204e-06,
      "loss": 0.0101,
      "step": 2281680
    },
    {
      "epoch": 3.7340520937661608,
      "grad_norm": 0.18995609879493713,
      "learning_rate": 2.548579034415503e-06,
      "loss": 0.0127,
      "step": 2281700
    },
    {
      "epoch": 3.734084824204814,
      "grad_norm": 0.3708605468273163,
      "learning_rate": 2.548513142201986e-06,
      "loss": 0.0114,
      "step": 2281720
    },
    {
      "epoch": 3.7341175546434675,
      "grad_norm": 0.2081046849489212,
      "learning_rate": 2.5484472499884686e-06,
      "loss": 0.0067,
      "step": 2281740
    },
    {
      "epoch": 3.7341502850821207,
      "grad_norm": 0.14953559637069702,
      "learning_rate": 2.548381357774952e-06,
      "loss": 0.0116,
      "step": 2281760
    },
    {
      "epoch": 3.734183015520774,
      "grad_norm": 0.5790644288063049,
      "learning_rate": 2.548315465561435e-06,
      "loss": 0.0141,
      "step": 2281780
    },
    {
      "epoch": 3.7342157459594274,
      "grad_norm": 0.2157185971736908,
      "learning_rate": 2.548249573347918e-06,
      "loss": 0.01,
      "step": 2281800
    },
    {
      "epoch": 3.7342484763980806,
      "grad_norm": 0.17645953595638275,
      "learning_rate": 2.548183681134401e-06,
      "loss": 0.0107,
      "step": 2281820
    },
    {
      "epoch": 3.734281206836734,
      "grad_norm": 0.18399028480052948,
      "learning_rate": 2.5481177889208836e-06,
      "loss": 0.0108,
      "step": 2281840
    },
    {
      "epoch": 3.7343139372753873,
      "grad_norm": 0.06723620742559433,
      "learning_rate": 2.5480518967073663e-06,
      "loss": 0.0075,
      "step": 2281860
    },
    {
      "epoch": 3.734346667714041,
      "grad_norm": 0.03997715562582016,
      "learning_rate": 2.547986004493849e-06,
      "loss": 0.0148,
      "step": 2281880
    },
    {
      "epoch": 3.734379398152694,
      "grad_norm": 1.4911563396453857,
      "learning_rate": 2.5479201122803322e-06,
      "loss": 0.0146,
      "step": 2281900
    },
    {
      "epoch": 3.734412128591347,
      "grad_norm": 0.23127299547195435,
      "learning_rate": 2.547854220066815e-06,
      "loss": 0.0214,
      "step": 2281920
    },
    {
      "epoch": 3.734444859030001,
      "grad_norm": 0.38396361470222473,
      "learning_rate": 2.5477883278532977e-06,
      "loss": 0.0097,
      "step": 2281940
    },
    {
      "epoch": 3.734477589468654,
      "grad_norm": 1.153454065322876,
      "learning_rate": 2.5477224356397804e-06,
      "loss": 0.0165,
      "step": 2281960
    },
    {
      "epoch": 3.7345103199073075,
      "grad_norm": 0.3016878664493561,
      "learning_rate": 2.5476565434262636e-06,
      "loss": 0.0137,
      "step": 2281980
    },
    {
      "epoch": 3.7345430503459607,
      "grad_norm": 0.14294320344924927,
      "learning_rate": 2.5475906512127463e-06,
      "loss": 0.0073,
      "step": 2282000
    },
    {
      "epoch": 3.7345757807846143,
      "grad_norm": 0.3421110510826111,
      "learning_rate": 2.547524758999229e-06,
      "loss": 0.0128,
      "step": 2282020
    },
    {
      "epoch": 3.7346085112232674,
      "grad_norm": 0.10308773070573807,
      "learning_rate": 2.547458866785712e-06,
      "loss": 0.0137,
      "step": 2282040
    },
    {
      "epoch": 3.7346412416619206,
      "grad_norm": 0.20420099794864655,
      "learning_rate": 2.547392974572195e-06,
      "loss": 0.0098,
      "step": 2282060
    },
    {
      "epoch": 3.734673972100574,
      "grad_norm": 0.2151390165090561,
      "learning_rate": 2.5473270823586777e-06,
      "loss": 0.0081,
      "step": 2282080
    },
    {
      "epoch": 3.7347067025392273,
      "grad_norm": 0.2475392073392868,
      "learning_rate": 2.5472611901451605e-06,
      "loss": 0.0164,
      "step": 2282100
    },
    {
      "epoch": 3.734739432977881,
      "grad_norm": 0.2052743285894394,
      "learning_rate": 2.547195297931644e-06,
      "loss": 0.011,
      "step": 2282120
    },
    {
      "epoch": 3.734772163416534,
      "grad_norm": 0.20216920971870422,
      "learning_rate": 2.547129405718127e-06,
      "loss": 0.0074,
      "step": 2282140
    },
    {
      "epoch": 3.7348048938551877,
      "grad_norm": 0.2171047478914261,
      "learning_rate": 2.5470635135046095e-06,
      "loss": 0.0056,
      "step": 2282160
    },
    {
      "epoch": 3.734837624293841,
      "grad_norm": 0.21092185378074646,
      "learning_rate": 2.5469976212910923e-06,
      "loss": 0.0079,
      "step": 2282180
    },
    {
      "epoch": 3.734870354732494,
      "grad_norm": 0.41492247581481934,
      "learning_rate": 2.5469317290775754e-06,
      "loss": 0.0094,
      "step": 2282200
    },
    {
      "epoch": 3.7349030851711476,
      "grad_norm": 0.22946690022945404,
      "learning_rate": 2.546865836864058e-06,
      "loss": 0.0108,
      "step": 2282220
    },
    {
      "epoch": 3.7349358156098007,
      "grad_norm": 0.22553712129592896,
      "learning_rate": 2.546799944650541e-06,
      "loss": 0.009,
      "step": 2282240
    },
    {
      "epoch": 3.7349685460484543,
      "grad_norm": 0.08729995042085648,
      "learning_rate": 2.5467340524370237e-06,
      "loss": 0.0101,
      "step": 2282260
    },
    {
      "epoch": 3.7350012764871074,
      "grad_norm": 0.26651954650878906,
      "learning_rate": 2.5466681602235064e-06,
      "loss": 0.0073,
      "step": 2282280
    },
    {
      "epoch": 3.735034006925761,
      "grad_norm": 0.2223583310842514,
      "learning_rate": 2.5466022680099896e-06,
      "loss": 0.0083,
      "step": 2282300
    },
    {
      "epoch": 3.735066737364414,
      "grad_norm": 0.11440104246139526,
      "learning_rate": 2.5465363757964723e-06,
      "loss": 0.0098,
      "step": 2282320
    },
    {
      "epoch": 3.7350994678030673,
      "grad_norm": 0.3701237142086029,
      "learning_rate": 2.546470483582955e-06,
      "loss": 0.0122,
      "step": 2282340
    },
    {
      "epoch": 3.735132198241721,
      "grad_norm": 0.5982440710067749,
      "learning_rate": 2.5464045913694378e-06,
      "loss": 0.008,
      "step": 2282360
    },
    {
      "epoch": 3.735164928680374,
      "grad_norm": 0.48412221670150757,
      "learning_rate": 2.546338699155921e-06,
      "loss": 0.0142,
      "step": 2282380
    },
    {
      "epoch": 3.7351976591190272,
      "grad_norm": 0.2139304131269455,
      "learning_rate": 2.5462728069424037e-06,
      "loss": 0.0078,
      "step": 2282400
    },
    {
      "epoch": 3.735230389557681,
      "grad_norm": 0.29687750339508057,
      "learning_rate": 2.5462069147288864e-06,
      "loss": 0.011,
      "step": 2282420
    },
    {
      "epoch": 3.7352631199963344,
      "grad_norm": 0.1960722953081131,
      "learning_rate": 2.546141022515369e-06,
      "loss": 0.0093,
      "step": 2282440
    },
    {
      "epoch": 3.7352958504349876,
      "grad_norm": 0.541085958480835,
      "learning_rate": 2.5460751303018527e-06,
      "loss": 0.0122,
      "step": 2282460
    },
    {
      "epoch": 3.7353285808736407,
      "grad_norm": 0.13132163882255554,
      "learning_rate": 2.5460092380883355e-06,
      "loss": 0.0085,
      "step": 2282480
    },
    {
      "epoch": 3.7353613113122943,
      "grad_norm": 0.2873170077800751,
      "learning_rate": 2.5459433458748182e-06,
      "loss": 0.0109,
      "step": 2282500
    },
    {
      "epoch": 3.7353940417509475,
      "grad_norm": 0.19865381717681885,
      "learning_rate": 2.5458774536613014e-06,
      "loss": 0.0117,
      "step": 2282520
    },
    {
      "epoch": 3.7354267721896006,
      "grad_norm": 0.1476277858018875,
      "learning_rate": 2.545811561447784e-06,
      "loss": 0.0078,
      "step": 2282540
    },
    {
      "epoch": 3.735459502628254,
      "grad_norm": 0.28133922815322876,
      "learning_rate": 2.545745669234267e-06,
      "loss": 0.0095,
      "step": 2282560
    },
    {
      "epoch": 3.735492233066908,
      "grad_norm": 0.47414079308509827,
      "learning_rate": 2.5456797770207496e-06,
      "loss": 0.0108,
      "step": 2282580
    },
    {
      "epoch": 3.735524963505561,
      "grad_norm": 0.4216848313808441,
      "learning_rate": 2.5456138848072328e-06,
      "loss": 0.0104,
      "step": 2282600
    },
    {
      "epoch": 3.735557693944214,
      "grad_norm": 0.1778620034456253,
      "learning_rate": 2.5455479925937155e-06,
      "loss": 0.0122,
      "step": 2282620
    },
    {
      "epoch": 3.7355904243828677,
      "grad_norm": 0.3785017132759094,
      "learning_rate": 2.5454821003801983e-06,
      "loss": 0.0094,
      "step": 2282640
    },
    {
      "epoch": 3.735623154821521,
      "grad_norm": 0.11365456879138947,
      "learning_rate": 2.545416208166681e-06,
      "loss": 0.0122,
      "step": 2282660
    },
    {
      "epoch": 3.735655885260174,
      "grad_norm": 0.3715183734893799,
      "learning_rate": 2.545350315953164e-06,
      "loss": 0.0094,
      "step": 2282680
    },
    {
      "epoch": 3.7356886156988276,
      "grad_norm": 0.17098389565944672,
      "learning_rate": 2.545284423739647e-06,
      "loss": 0.0119,
      "step": 2282700
    },
    {
      "epoch": 3.735721346137481,
      "grad_norm": 0.2098456472158432,
      "learning_rate": 2.5452185315261296e-06,
      "loss": 0.0119,
      "step": 2282720
    },
    {
      "epoch": 3.7357540765761343,
      "grad_norm": 0.5443642139434814,
      "learning_rate": 2.5451526393126124e-06,
      "loss": 0.0077,
      "step": 2282740
    },
    {
      "epoch": 3.7357868070147875,
      "grad_norm": 0.06460139900445938,
      "learning_rate": 2.545086747099095e-06,
      "loss": 0.0083,
      "step": 2282760
    },
    {
      "epoch": 3.735819537453441,
      "grad_norm": 0.10096324235200882,
      "learning_rate": 2.5450208548855783e-06,
      "loss": 0.0136,
      "step": 2282780
    },
    {
      "epoch": 3.7358522678920942,
      "grad_norm": 0.22071173787117004,
      "learning_rate": 2.544954962672061e-06,
      "loss": 0.0091,
      "step": 2282800
    },
    {
      "epoch": 3.7358849983307474,
      "grad_norm": 0.1967039406299591,
      "learning_rate": 2.544889070458544e-06,
      "loss": 0.0101,
      "step": 2282820
    },
    {
      "epoch": 3.735917728769401,
      "grad_norm": 0.22577038407325745,
      "learning_rate": 2.5448231782450273e-06,
      "loss": 0.0094,
      "step": 2282840
    },
    {
      "epoch": 3.7359504592080546,
      "grad_norm": 0.10145916789770126,
      "learning_rate": 2.54475728603151e-06,
      "loss": 0.0109,
      "step": 2282860
    },
    {
      "epoch": 3.7359831896467077,
      "grad_norm": 0.12297622859477997,
      "learning_rate": 2.544691393817993e-06,
      "loss": 0.0089,
      "step": 2282880
    },
    {
      "epoch": 3.736015920085361,
      "grad_norm": 0.20965032279491425,
      "learning_rate": 2.5446255016044756e-06,
      "loss": 0.0088,
      "step": 2282900
    },
    {
      "epoch": 3.7360486505240145,
      "grad_norm": 0.3261328935623169,
      "learning_rate": 2.5445596093909587e-06,
      "loss": 0.0114,
      "step": 2282920
    },
    {
      "epoch": 3.7360813809626676,
      "grad_norm": 0.20509761571884155,
      "learning_rate": 2.5444937171774415e-06,
      "loss": 0.0132,
      "step": 2282940
    },
    {
      "epoch": 3.7361141114013208,
      "grad_norm": 0.29047277569770813,
      "learning_rate": 2.544427824963924e-06,
      "loss": 0.0106,
      "step": 2282960
    },
    {
      "epoch": 3.7361468418399744,
      "grad_norm": 0.8187554478645325,
      "learning_rate": 2.544361932750407e-06,
      "loss": 0.0138,
      "step": 2282980
    },
    {
      "epoch": 3.7361795722786275,
      "grad_norm": 0.11540165543556213,
      "learning_rate": 2.54429604053689e-06,
      "loss": 0.0089,
      "step": 2283000
    },
    {
      "epoch": 3.736212302717281,
      "grad_norm": 0.08505582064390182,
      "learning_rate": 2.544230148323373e-06,
      "loss": 0.0106,
      "step": 2283020
    },
    {
      "epoch": 3.7362450331559343,
      "grad_norm": 0.3668884038925171,
      "learning_rate": 2.5441642561098556e-06,
      "loss": 0.0101,
      "step": 2283040
    },
    {
      "epoch": 3.736277763594588,
      "grad_norm": 0.6082322597503662,
      "learning_rate": 2.5440983638963383e-06,
      "loss": 0.0133,
      "step": 2283060
    },
    {
      "epoch": 3.736310494033241,
      "grad_norm": 0.42112997174263,
      "learning_rate": 2.5440324716828215e-06,
      "loss": 0.0089,
      "step": 2283080
    },
    {
      "epoch": 3.736343224471894,
      "grad_norm": 0.2553267180919647,
      "learning_rate": 2.5439665794693042e-06,
      "loss": 0.0114,
      "step": 2283100
    },
    {
      "epoch": 3.7363759549105477,
      "grad_norm": 0.5605299472808838,
      "learning_rate": 2.543900687255787e-06,
      "loss": 0.0145,
      "step": 2283120
    },
    {
      "epoch": 3.736408685349201,
      "grad_norm": 0.1568630337715149,
      "learning_rate": 2.5438347950422697e-06,
      "loss": 0.0116,
      "step": 2283140
    },
    {
      "epoch": 3.7364414157878545,
      "grad_norm": 0.07824459671974182,
      "learning_rate": 2.543768902828753e-06,
      "loss": 0.0176,
      "step": 2283160
    },
    {
      "epoch": 3.7364741462265076,
      "grad_norm": 0.5107955932617188,
      "learning_rate": 2.543703010615236e-06,
      "loss": 0.0164,
      "step": 2283180
    },
    {
      "epoch": 3.7365068766651612,
      "grad_norm": 0.28418436646461487,
      "learning_rate": 2.5436371184017188e-06,
      "loss": 0.0099,
      "step": 2283200
    },
    {
      "epoch": 3.7365396071038144,
      "grad_norm": 0.2229011058807373,
      "learning_rate": 2.543571226188202e-06,
      "loss": 0.0101,
      "step": 2283220
    },
    {
      "epoch": 3.7365723375424675,
      "grad_norm": 0.6774258613586426,
      "learning_rate": 2.5435053339746847e-06,
      "loss": 0.0126,
      "step": 2283240
    },
    {
      "epoch": 3.736605067981121,
      "grad_norm": 0.1831996589899063,
      "learning_rate": 2.5434394417611674e-06,
      "loss": 0.0115,
      "step": 2283260
    },
    {
      "epoch": 3.7366377984197743,
      "grad_norm": 0.24279041588306427,
      "learning_rate": 2.54337354954765e-06,
      "loss": 0.0089,
      "step": 2283280
    },
    {
      "epoch": 3.736670528858428,
      "grad_norm": 0.24042750895023346,
      "learning_rate": 2.543307657334133e-06,
      "loss": 0.0107,
      "step": 2283300
    },
    {
      "epoch": 3.736703259297081,
      "grad_norm": 0.25553828477859497,
      "learning_rate": 2.543241765120616e-06,
      "loss": 0.0089,
      "step": 2283320
    },
    {
      "epoch": 3.7367359897357346,
      "grad_norm": 0.3841504454612732,
      "learning_rate": 2.543175872907099e-06,
      "loss": 0.0103,
      "step": 2283340
    },
    {
      "epoch": 3.7367687201743878,
      "grad_norm": 0.26487302780151367,
      "learning_rate": 2.5431099806935815e-06,
      "loss": 0.0141,
      "step": 2283360
    },
    {
      "epoch": 3.736801450613041,
      "grad_norm": 0.45671704411506653,
      "learning_rate": 2.5430440884800643e-06,
      "loss": 0.0117,
      "step": 2283380
    },
    {
      "epoch": 3.7368341810516945,
      "grad_norm": 0.16853681206703186,
      "learning_rate": 2.5429781962665474e-06,
      "loss": 0.0124,
      "step": 2283400
    },
    {
      "epoch": 3.7368669114903477,
      "grad_norm": 0.042102374136447906,
      "learning_rate": 2.54291230405303e-06,
      "loss": 0.0111,
      "step": 2283420
    },
    {
      "epoch": 3.7368996419290013,
      "grad_norm": 0.3797741234302521,
      "learning_rate": 2.542846411839513e-06,
      "loss": 0.0097,
      "step": 2283440
    },
    {
      "epoch": 3.7369323723676544,
      "grad_norm": 0.4145975708961487,
      "learning_rate": 2.5427805196259957e-06,
      "loss": 0.0122,
      "step": 2283460
    },
    {
      "epoch": 3.736965102806308,
      "grad_norm": 0.12917225062847137,
      "learning_rate": 2.542714627412479e-06,
      "loss": 0.009,
      "step": 2283480
    },
    {
      "epoch": 3.736997833244961,
      "grad_norm": 0.8324941396713257,
      "learning_rate": 2.5426487351989616e-06,
      "loss": 0.0138,
      "step": 2283500
    },
    {
      "epoch": 3.7370305636836143,
      "grad_norm": 0.6602490544319153,
      "learning_rate": 2.5425828429854447e-06,
      "loss": 0.0131,
      "step": 2283520
    },
    {
      "epoch": 3.737063294122268,
      "grad_norm": 0.06800059229135513,
      "learning_rate": 2.542516950771928e-06,
      "loss": 0.0099,
      "step": 2283540
    },
    {
      "epoch": 3.737096024560921,
      "grad_norm": 0.14672242105007172,
      "learning_rate": 2.5424510585584106e-06,
      "loss": 0.0097,
      "step": 2283560
    },
    {
      "epoch": 3.7371287549995746,
      "grad_norm": 0.5097994208335876,
      "learning_rate": 2.5423851663448934e-06,
      "loss": 0.0119,
      "step": 2283580
    },
    {
      "epoch": 3.737161485438228,
      "grad_norm": 0.5490310192108154,
      "learning_rate": 2.542319274131376e-06,
      "loss": 0.0059,
      "step": 2283600
    },
    {
      "epoch": 3.7371942158768814,
      "grad_norm": 0.4465612769126892,
      "learning_rate": 2.5422533819178593e-06,
      "loss": 0.0093,
      "step": 2283620
    },
    {
      "epoch": 3.7372269463155345,
      "grad_norm": 0.26208189129829407,
      "learning_rate": 2.542187489704342e-06,
      "loss": 0.0105,
      "step": 2283640
    },
    {
      "epoch": 3.7372596767541877,
      "grad_norm": 0.11305630952119827,
      "learning_rate": 2.5421215974908248e-06,
      "loss": 0.0114,
      "step": 2283660
    },
    {
      "epoch": 3.7372924071928413,
      "grad_norm": 0.0873633325099945,
      "learning_rate": 2.5420557052773075e-06,
      "loss": 0.0182,
      "step": 2283680
    },
    {
      "epoch": 3.7373251376314944,
      "grad_norm": 0.10433169454336166,
      "learning_rate": 2.5419898130637907e-06,
      "loss": 0.0136,
      "step": 2283700
    },
    {
      "epoch": 3.737357868070148,
      "grad_norm": 0.5033484101295471,
      "learning_rate": 2.5419239208502734e-06,
      "loss": 0.0094,
      "step": 2283720
    },
    {
      "epoch": 3.737390598508801,
      "grad_norm": 0.5542687177658081,
      "learning_rate": 2.541858028636756e-06,
      "loss": 0.0115,
      "step": 2283740
    },
    {
      "epoch": 3.7374233289474548,
      "grad_norm": 0.3887477219104767,
      "learning_rate": 2.541792136423239e-06,
      "loss": 0.0088,
      "step": 2283760
    },
    {
      "epoch": 3.737456059386108,
      "grad_norm": 0.4549505412578583,
      "learning_rate": 2.5417262442097216e-06,
      "loss": 0.0119,
      "step": 2283780
    },
    {
      "epoch": 3.737488789824761,
      "grad_norm": 0.1916419118642807,
      "learning_rate": 2.5416603519962048e-06,
      "loss": 0.0135,
      "step": 2283800
    },
    {
      "epoch": 3.7375215202634147,
      "grad_norm": 0.3195644021034241,
      "learning_rate": 2.5415944597826875e-06,
      "loss": 0.0119,
      "step": 2283820
    },
    {
      "epoch": 3.737554250702068,
      "grad_norm": 0.3996293544769287,
      "learning_rate": 2.5415285675691703e-06,
      "loss": 0.0122,
      "step": 2283840
    },
    {
      "epoch": 3.7375869811407214,
      "grad_norm": 0.2781708836555481,
      "learning_rate": 2.541462675355653e-06,
      "loss": 0.0083,
      "step": 2283860
    },
    {
      "epoch": 3.7376197115793746,
      "grad_norm": 0.13087661564350128,
      "learning_rate": 2.5413967831421366e-06,
      "loss": 0.0114,
      "step": 2283880
    },
    {
      "epoch": 3.737652442018028,
      "grad_norm": 0.2668106257915497,
      "learning_rate": 2.5413308909286193e-06,
      "loss": 0.0108,
      "step": 2283900
    },
    {
      "epoch": 3.7376851724566813,
      "grad_norm": 0.14943042397499084,
      "learning_rate": 2.541264998715102e-06,
      "loss": 0.0087,
      "step": 2283920
    },
    {
      "epoch": 3.7377179028953345,
      "grad_norm": 0.248058021068573,
      "learning_rate": 2.5411991065015852e-06,
      "loss": 0.0098,
      "step": 2283940
    },
    {
      "epoch": 3.737750633333988,
      "grad_norm": 1.9298993349075317,
      "learning_rate": 2.541133214288068e-06,
      "loss": 0.0135,
      "step": 2283960
    },
    {
      "epoch": 3.737783363772641,
      "grad_norm": 0.08133043348789215,
      "learning_rate": 2.5410673220745507e-06,
      "loss": 0.0121,
      "step": 2283980
    },
    {
      "epoch": 3.7378160942112943,
      "grad_norm": 0.32335662841796875,
      "learning_rate": 2.5410014298610334e-06,
      "loss": 0.0158,
      "step": 2284000
    },
    {
      "epoch": 3.737848824649948,
      "grad_norm": 0.4475342929363251,
      "learning_rate": 2.5409355376475166e-06,
      "loss": 0.0142,
      "step": 2284020
    },
    {
      "epoch": 3.7378815550886015,
      "grad_norm": 0.0750090554356575,
      "learning_rate": 2.5408696454339994e-06,
      "loss": 0.0117,
      "step": 2284040
    },
    {
      "epoch": 3.7379142855272547,
      "grad_norm": 0.04895509406924248,
      "learning_rate": 2.540803753220482e-06,
      "loss": 0.0115,
      "step": 2284060
    },
    {
      "epoch": 3.737947015965908,
      "grad_norm": 0.19430311024188995,
      "learning_rate": 2.540737861006965e-06,
      "loss": 0.0094,
      "step": 2284080
    },
    {
      "epoch": 3.7379797464045614,
      "grad_norm": 0.35655537247657776,
      "learning_rate": 2.540671968793448e-06,
      "loss": 0.0147,
      "step": 2284100
    },
    {
      "epoch": 3.7380124768432146,
      "grad_norm": 0.3600994646549225,
      "learning_rate": 2.5406060765799307e-06,
      "loss": 0.0104,
      "step": 2284120
    },
    {
      "epoch": 3.7380452072818677,
      "grad_norm": 0.4908948540687561,
      "learning_rate": 2.5405401843664135e-06,
      "loss": 0.0149,
      "step": 2284140
    },
    {
      "epoch": 3.7380779377205213,
      "grad_norm": 0.20516160130500793,
      "learning_rate": 2.5404742921528962e-06,
      "loss": 0.0094,
      "step": 2284160
    },
    {
      "epoch": 3.738110668159175,
      "grad_norm": 0.19383038580417633,
      "learning_rate": 2.540408399939379e-06,
      "loss": 0.0081,
      "step": 2284180
    },
    {
      "epoch": 3.738143398597828,
      "grad_norm": 0.09259901195764542,
      "learning_rate": 2.540342507725862e-06,
      "loss": 0.0114,
      "step": 2284200
    },
    {
      "epoch": 3.738176129036481,
      "grad_norm": 0.11627469956874847,
      "learning_rate": 2.5402766155123453e-06,
      "loss": 0.0162,
      "step": 2284220
    },
    {
      "epoch": 3.738208859475135,
      "grad_norm": 0.3645547032356262,
      "learning_rate": 2.540210723298828e-06,
      "loss": 0.0112,
      "step": 2284240
    },
    {
      "epoch": 3.738241589913788,
      "grad_norm": 0.34258776903152466,
      "learning_rate": 2.540144831085311e-06,
      "loss": 0.0106,
      "step": 2284260
    },
    {
      "epoch": 3.738274320352441,
      "grad_norm": 0.13925699889659882,
      "learning_rate": 2.540078938871794e-06,
      "loss": 0.0093,
      "step": 2284280
    },
    {
      "epoch": 3.7383070507910947,
      "grad_norm": 1.0491986274719238,
      "learning_rate": 2.5400130466582767e-06,
      "loss": 0.014,
      "step": 2284300
    },
    {
      "epoch": 3.7383397812297483,
      "grad_norm": 0.3566596210002899,
      "learning_rate": 2.5399471544447594e-06,
      "loss": 0.0097,
      "step": 2284320
    },
    {
      "epoch": 3.7383725116684015,
      "grad_norm": 0.3207089900970459,
      "learning_rate": 2.5398812622312426e-06,
      "loss": 0.0059,
      "step": 2284340
    },
    {
      "epoch": 3.7384052421070546,
      "grad_norm": 0.07455801218748093,
      "learning_rate": 2.5398153700177253e-06,
      "loss": 0.0104,
      "step": 2284360
    },
    {
      "epoch": 3.738437972545708,
      "grad_norm": 0.3192192316055298,
      "learning_rate": 2.539749477804208e-06,
      "loss": 0.0094,
      "step": 2284380
    },
    {
      "epoch": 3.7384707029843613,
      "grad_norm": 0.3333078622817993,
      "learning_rate": 2.5396835855906908e-06,
      "loss": 0.013,
      "step": 2284400
    },
    {
      "epoch": 3.7385034334230145,
      "grad_norm": 0.20239757001399994,
      "learning_rate": 2.539617693377174e-06,
      "loss": 0.0111,
      "step": 2284420
    },
    {
      "epoch": 3.738536163861668,
      "grad_norm": 0.12437169253826141,
      "learning_rate": 2.5395518011636567e-06,
      "loss": 0.0107,
      "step": 2284440
    },
    {
      "epoch": 3.7385688943003217,
      "grad_norm": 0.5947098135948181,
      "learning_rate": 2.5394859089501394e-06,
      "loss": 0.013,
      "step": 2284460
    },
    {
      "epoch": 3.738601624738975,
      "grad_norm": 0.05806167051196098,
      "learning_rate": 2.539420016736622e-06,
      "loss": 0.0113,
      "step": 2284480
    },
    {
      "epoch": 3.738634355177628,
      "grad_norm": 0.3001466989517212,
      "learning_rate": 2.5393541245231053e-06,
      "loss": 0.0135,
      "step": 2284500
    },
    {
      "epoch": 3.7386670856162816,
      "grad_norm": 0.11736339330673218,
      "learning_rate": 2.539288232309588e-06,
      "loss": 0.0099,
      "step": 2284520
    },
    {
      "epoch": 3.7386998160549347,
      "grad_norm": 0.18797099590301514,
      "learning_rate": 2.539222340096071e-06,
      "loss": 0.0142,
      "step": 2284540
    },
    {
      "epoch": 3.738732546493588,
      "grad_norm": 0.10048291087150574,
      "learning_rate": 2.5391564478825535e-06,
      "loss": 0.0092,
      "step": 2284560
    },
    {
      "epoch": 3.7387652769322415,
      "grad_norm": 0.8551233410835266,
      "learning_rate": 2.539090555669037e-06,
      "loss": 0.0154,
      "step": 2284580
    },
    {
      "epoch": 3.7387980073708946,
      "grad_norm": 0.6479488611221313,
      "learning_rate": 2.53902466345552e-06,
      "loss": 0.0099,
      "step": 2284600
    },
    {
      "epoch": 3.738830737809548,
      "grad_norm": 0.20761854946613312,
      "learning_rate": 2.5389587712420026e-06,
      "loss": 0.01,
      "step": 2284620
    },
    {
      "epoch": 3.7388634682482014,
      "grad_norm": 0.09815537184476852,
      "learning_rate": 2.5388928790284858e-06,
      "loss": 0.0107,
      "step": 2284640
    },
    {
      "epoch": 3.738896198686855,
      "grad_norm": 0.131356880068779,
      "learning_rate": 2.5388269868149685e-06,
      "loss": 0.007,
      "step": 2284660
    },
    {
      "epoch": 3.738928929125508,
      "grad_norm": 0.23973257839679718,
      "learning_rate": 2.5387610946014513e-06,
      "loss": 0.0121,
      "step": 2284680
    },
    {
      "epoch": 3.7389616595641613,
      "grad_norm": 0.1075737401843071,
      "learning_rate": 2.538695202387934e-06,
      "loss": 0.009,
      "step": 2284700
    },
    {
      "epoch": 3.738994390002815,
      "grad_norm": 0.1581292748451233,
      "learning_rate": 2.5386293101744167e-06,
      "loss": 0.0112,
      "step": 2284720
    },
    {
      "epoch": 3.739027120441468,
      "grad_norm": 0.25837913155555725,
      "learning_rate": 2.5385634179609e-06,
      "loss": 0.0091,
      "step": 2284740
    },
    {
      "epoch": 3.7390598508801216,
      "grad_norm": 0.5807052254676819,
      "learning_rate": 2.5384975257473826e-06,
      "loss": 0.0142,
      "step": 2284760
    },
    {
      "epoch": 3.7390925813187748,
      "grad_norm": 0.5394447445869446,
      "learning_rate": 2.5384316335338654e-06,
      "loss": 0.0089,
      "step": 2284780
    },
    {
      "epoch": 3.7391253117574283,
      "grad_norm": 0.13228119909763336,
      "learning_rate": 2.538365741320348e-06,
      "loss": 0.0068,
      "step": 2284800
    },
    {
      "epoch": 3.7391580421960815,
      "grad_norm": 0.0860903188586235,
      "learning_rate": 2.5382998491068313e-06,
      "loss": 0.0106,
      "step": 2284820
    },
    {
      "epoch": 3.7391907726347346,
      "grad_norm": 0.16623052954673767,
      "learning_rate": 2.538233956893314e-06,
      "loss": 0.0096,
      "step": 2284840
    },
    {
      "epoch": 3.7392235030733882,
      "grad_norm": 0.09100854396820068,
      "learning_rate": 2.5381680646797968e-06,
      "loss": 0.0117,
      "step": 2284860
    },
    {
      "epoch": 3.7392562335120414,
      "grad_norm": 0.3094521760940552,
      "learning_rate": 2.5381021724662795e-06,
      "loss": 0.0117,
      "step": 2284880
    },
    {
      "epoch": 3.739288963950695,
      "grad_norm": 0.4447149932384491,
      "learning_rate": 2.5380362802527627e-06,
      "loss": 0.0095,
      "step": 2284900
    },
    {
      "epoch": 3.739321694389348,
      "grad_norm": 0.2801174819469452,
      "learning_rate": 2.5379703880392454e-06,
      "loss": 0.0106,
      "step": 2284920
    },
    {
      "epoch": 3.7393544248280017,
      "grad_norm": 0.18284614384174347,
      "learning_rate": 2.5379044958257286e-06,
      "loss": 0.0124,
      "step": 2284940
    },
    {
      "epoch": 3.739387155266655,
      "grad_norm": 0.19852496683597565,
      "learning_rate": 2.5378386036122117e-06,
      "loss": 0.0076,
      "step": 2284960
    },
    {
      "epoch": 3.739419885705308,
      "grad_norm": 0.29350125789642334,
      "learning_rate": 2.5377727113986945e-06,
      "loss": 0.0172,
      "step": 2284980
    },
    {
      "epoch": 3.7394526161439616,
      "grad_norm": 0.38065460324287415,
      "learning_rate": 2.537706819185177e-06,
      "loss": 0.016,
      "step": 2285000
    },
    {
      "epoch": 3.7394853465826148,
      "grad_norm": 0.17187167704105377,
      "learning_rate": 2.53764092697166e-06,
      "loss": 0.0102,
      "step": 2285020
    },
    {
      "epoch": 3.7395180770212684,
      "grad_norm": 0.21574115753173828,
      "learning_rate": 2.537575034758143e-06,
      "loss": 0.0114,
      "step": 2285040
    },
    {
      "epoch": 3.7395508074599215,
      "grad_norm": 0.13564066588878632,
      "learning_rate": 2.537509142544626e-06,
      "loss": 0.0173,
      "step": 2285060
    },
    {
      "epoch": 3.739583537898575,
      "grad_norm": 0.40850332379341125,
      "learning_rate": 2.5374432503311086e-06,
      "loss": 0.0113,
      "step": 2285080
    },
    {
      "epoch": 3.7396162683372283,
      "grad_norm": 0.20721977949142456,
      "learning_rate": 2.5373773581175913e-06,
      "loss": 0.0131,
      "step": 2285100
    },
    {
      "epoch": 3.7396489987758814,
      "grad_norm": 0.48908016085624695,
      "learning_rate": 2.5373114659040745e-06,
      "loss": 0.0082,
      "step": 2285120
    },
    {
      "epoch": 3.739681729214535,
      "grad_norm": 0.3622359335422516,
      "learning_rate": 2.5372455736905572e-06,
      "loss": 0.0102,
      "step": 2285140
    },
    {
      "epoch": 3.739714459653188,
      "grad_norm": 0.2057519108057022,
      "learning_rate": 2.53717968147704e-06,
      "loss": 0.0177,
      "step": 2285160
    },
    {
      "epoch": 3.7397471900918418,
      "grad_norm": 0.12691162526607513,
      "learning_rate": 2.5371137892635227e-06,
      "loss": 0.0085,
      "step": 2285180
    },
    {
      "epoch": 3.739779920530495,
      "grad_norm": 0.3112489581108093,
      "learning_rate": 2.5370478970500055e-06,
      "loss": 0.0072,
      "step": 2285200
    },
    {
      "epoch": 3.7398126509691485,
      "grad_norm": 0.6334123015403748,
      "learning_rate": 2.5369820048364886e-06,
      "loss": 0.0168,
      "step": 2285220
    },
    {
      "epoch": 3.7398453814078016,
      "grad_norm": 0.23778985440731049,
      "learning_rate": 2.5369161126229714e-06,
      "loss": 0.009,
      "step": 2285240
    },
    {
      "epoch": 3.739878111846455,
      "grad_norm": 0.49594685435295105,
      "learning_rate": 2.536850220409454e-06,
      "loss": 0.0112,
      "step": 2285260
    },
    {
      "epoch": 3.7399108422851084,
      "grad_norm": 0.26341572403907776,
      "learning_rate": 2.5367843281959377e-06,
      "loss": 0.0087,
      "step": 2285280
    },
    {
      "epoch": 3.7399435727237615,
      "grad_norm": 0.16226494312286377,
      "learning_rate": 2.5367184359824204e-06,
      "loss": 0.0103,
      "step": 2285300
    },
    {
      "epoch": 3.739976303162415,
      "grad_norm": 0.11455991119146347,
      "learning_rate": 2.536652543768903e-06,
      "loss": 0.009,
      "step": 2285320
    },
    {
      "epoch": 3.7400090336010683,
      "grad_norm": 0.2398165762424469,
      "learning_rate": 2.536586651555386e-06,
      "loss": 0.0135,
      "step": 2285340
    },
    {
      "epoch": 3.740041764039722,
      "grad_norm": 0.29862213134765625,
      "learning_rate": 2.536520759341869e-06,
      "loss": 0.0143,
      "step": 2285360
    },
    {
      "epoch": 3.740074494478375,
      "grad_norm": 0.17264269292354584,
      "learning_rate": 2.536454867128352e-06,
      "loss": 0.0107,
      "step": 2285380
    },
    {
      "epoch": 3.740107224917028,
      "grad_norm": 0.37803182005882263,
      "learning_rate": 2.5363889749148345e-06,
      "loss": 0.0081,
      "step": 2285400
    },
    {
      "epoch": 3.7401399553556818,
      "grad_norm": 0.06105535849928856,
      "learning_rate": 2.5363230827013173e-06,
      "loss": 0.0068,
      "step": 2285420
    },
    {
      "epoch": 3.740172685794335,
      "grad_norm": 0.24160557985305786,
      "learning_rate": 2.5362571904878004e-06,
      "loss": 0.008,
      "step": 2285440
    },
    {
      "epoch": 3.740205416232988,
      "grad_norm": 0.2664431631565094,
      "learning_rate": 2.536191298274283e-06,
      "loss": 0.0073,
      "step": 2285460
    },
    {
      "epoch": 3.7402381466716417,
      "grad_norm": 0.6022338271141052,
      "learning_rate": 2.536125406060766e-06,
      "loss": 0.0146,
      "step": 2285480
    },
    {
      "epoch": 3.7402708771102953,
      "grad_norm": 0.2768556475639343,
      "learning_rate": 2.5360595138472487e-06,
      "loss": 0.0095,
      "step": 2285500
    },
    {
      "epoch": 3.7403036075489484,
      "grad_norm": 0.770150363445282,
      "learning_rate": 2.535993621633732e-06,
      "loss": 0.0105,
      "step": 2285520
    },
    {
      "epoch": 3.7403363379876016,
      "grad_norm": 0.28053033351898193,
      "learning_rate": 2.5359277294202146e-06,
      "loss": 0.0096,
      "step": 2285540
    },
    {
      "epoch": 3.740369068426255,
      "grad_norm": 0.24597658216953278,
      "learning_rate": 2.5358618372066973e-06,
      "loss": 0.0066,
      "step": 2285560
    },
    {
      "epoch": 3.7404017988649083,
      "grad_norm": 0.2639786899089813,
      "learning_rate": 2.53579594499318e-06,
      "loss": 0.0064,
      "step": 2285580
    },
    {
      "epoch": 3.7404345293035615,
      "grad_norm": 0.6288033723831177,
      "learning_rate": 2.535730052779663e-06,
      "loss": 0.0118,
      "step": 2285600
    },
    {
      "epoch": 3.740467259742215,
      "grad_norm": 0.29995375871658325,
      "learning_rate": 2.535664160566146e-06,
      "loss": 0.0077,
      "step": 2285620
    },
    {
      "epoch": 3.7404999901808687,
      "grad_norm": 0.21683073043823242,
      "learning_rate": 2.535598268352629e-06,
      "loss": 0.0079,
      "step": 2285640
    },
    {
      "epoch": 3.740532720619522,
      "grad_norm": 0.3795689344406128,
      "learning_rate": 2.5355323761391123e-06,
      "loss": 0.0093,
      "step": 2285660
    },
    {
      "epoch": 3.740565451058175,
      "grad_norm": 0.08906476944684982,
      "learning_rate": 2.535466483925595e-06,
      "loss": 0.0109,
      "step": 2285680
    },
    {
      "epoch": 3.7405981814968285,
      "grad_norm": 0.47591596841812134,
      "learning_rate": 2.5354005917120778e-06,
      "loss": 0.0144,
      "step": 2285700
    },
    {
      "epoch": 3.7406309119354817,
      "grad_norm": 0.09881957620382309,
      "learning_rate": 2.5353346994985605e-06,
      "loss": 0.0078,
      "step": 2285720
    },
    {
      "epoch": 3.740663642374135,
      "grad_norm": 0.1465948224067688,
      "learning_rate": 2.5352688072850432e-06,
      "loss": 0.0103,
      "step": 2285740
    },
    {
      "epoch": 3.7406963728127884,
      "grad_norm": 0.30449020862579346,
      "learning_rate": 2.5352029150715264e-06,
      "loss": 0.0103,
      "step": 2285760
    },
    {
      "epoch": 3.740729103251442,
      "grad_norm": 0.3641110360622406,
      "learning_rate": 2.535137022858009e-06,
      "loss": 0.0168,
      "step": 2285780
    },
    {
      "epoch": 3.740761833690095,
      "grad_norm": 0.31241047382354736,
      "learning_rate": 2.535071130644492e-06,
      "loss": 0.0126,
      "step": 2285800
    },
    {
      "epoch": 3.7407945641287483,
      "grad_norm": 0.07211475819349289,
      "learning_rate": 2.5350052384309746e-06,
      "loss": 0.0098,
      "step": 2285820
    },
    {
      "epoch": 3.740827294567402,
      "grad_norm": 0.08846872299909592,
      "learning_rate": 2.5349393462174578e-06,
      "loss": 0.0135,
      "step": 2285840
    },
    {
      "epoch": 3.740860025006055,
      "grad_norm": 0.45114269852638245,
      "learning_rate": 2.5348734540039405e-06,
      "loss": 0.0155,
      "step": 2285860
    },
    {
      "epoch": 3.7408927554447082,
      "grad_norm": 0.1405288279056549,
      "learning_rate": 2.5348075617904233e-06,
      "loss": 0.0121,
      "step": 2285880
    },
    {
      "epoch": 3.740925485883362,
      "grad_norm": 0.22526758909225464,
      "learning_rate": 2.534741669576906e-06,
      "loss": 0.0119,
      "step": 2285900
    },
    {
      "epoch": 3.7409582163220154,
      "grad_norm": 0.1305195540189743,
      "learning_rate": 2.534675777363389e-06,
      "loss": 0.0079,
      "step": 2285920
    },
    {
      "epoch": 3.7409909467606686,
      "grad_norm": 0.16927669942378998,
      "learning_rate": 2.534609885149872e-06,
      "loss": 0.0124,
      "step": 2285940
    },
    {
      "epoch": 3.7410236771993217,
      "grad_norm": 0.3736167848110199,
      "learning_rate": 2.5345439929363546e-06,
      "loss": 0.0091,
      "step": 2285960
    },
    {
      "epoch": 3.7410564076379753,
      "grad_norm": 1.103988528251648,
      "learning_rate": 2.5344781007228382e-06,
      "loss": 0.0129,
      "step": 2285980
    },
    {
      "epoch": 3.7410891380766285,
      "grad_norm": 0.07000245153903961,
      "learning_rate": 2.534412208509321e-06,
      "loss": 0.0132,
      "step": 2286000
    },
    {
      "epoch": 3.7411218685152816,
      "grad_norm": 0.16177771985530853,
      "learning_rate": 2.5343463162958037e-06,
      "loss": 0.0084,
      "step": 2286020
    },
    {
      "epoch": 3.741154598953935,
      "grad_norm": 0.11747010052204132,
      "learning_rate": 2.5342804240822865e-06,
      "loss": 0.0098,
      "step": 2286040
    },
    {
      "epoch": 3.7411873293925884,
      "grad_norm": 0.08469248563051224,
      "learning_rate": 2.5342145318687696e-06,
      "loss": 0.0099,
      "step": 2286060
    },
    {
      "epoch": 3.741220059831242,
      "grad_norm": 0.062375981360673904,
      "learning_rate": 2.5341486396552524e-06,
      "loss": 0.0097,
      "step": 2286080
    },
    {
      "epoch": 3.741252790269895,
      "grad_norm": 0.10814742743968964,
      "learning_rate": 2.534082747441735e-06,
      "loss": 0.0075,
      "step": 2286100
    },
    {
      "epoch": 3.7412855207085487,
      "grad_norm": 0.04414798691868782,
      "learning_rate": 2.534016855228218e-06,
      "loss": 0.0073,
      "step": 2286120
    },
    {
      "epoch": 3.741318251147202,
      "grad_norm": 0.17875435948371887,
      "learning_rate": 2.5339509630147006e-06,
      "loss": 0.0084,
      "step": 2286140
    },
    {
      "epoch": 3.741350981585855,
      "grad_norm": 0.15761135518550873,
      "learning_rate": 2.5338850708011837e-06,
      "loss": 0.0114,
      "step": 2286160
    },
    {
      "epoch": 3.7413837120245086,
      "grad_norm": 0.563511312007904,
      "learning_rate": 2.5338191785876665e-06,
      "loss": 0.0156,
      "step": 2286180
    },
    {
      "epoch": 3.7414164424631617,
      "grad_norm": 0.07213267683982849,
      "learning_rate": 2.5337532863741492e-06,
      "loss": 0.0083,
      "step": 2286200
    },
    {
      "epoch": 3.7414491729018153,
      "grad_norm": 0.12683585286140442,
      "learning_rate": 2.533687394160632e-06,
      "loss": 0.0105,
      "step": 2286220
    },
    {
      "epoch": 3.7414819033404685,
      "grad_norm": 0.6430310606956482,
      "learning_rate": 2.533621501947115e-06,
      "loss": 0.0147,
      "step": 2286240
    },
    {
      "epoch": 3.741514633779122,
      "grad_norm": 0.22457613050937653,
      "learning_rate": 2.533555609733598e-06,
      "loss": 0.0212,
      "step": 2286260
    },
    {
      "epoch": 3.7415473642177752,
      "grad_norm": 0.29587364196777344,
      "learning_rate": 2.5334897175200806e-06,
      "loss": 0.012,
      "step": 2286280
    },
    {
      "epoch": 3.7415800946564284,
      "grad_norm": 0.2780260741710663,
      "learning_rate": 2.5334238253065633e-06,
      "loss": 0.0135,
      "step": 2286300
    },
    {
      "epoch": 3.741612825095082,
      "grad_norm": 0.10167872160673141,
      "learning_rate": 2.5333579330930465e-06,
      "loss": 0.0119,
      "step": 2286320
    },
    {
      "epoch": 3.741645555533735,
      "grad_norm": 0.25555419921875,
      "learning_rate": 2.5332920408795297e-06,
      "loss": 0.0137,
      "step": 2286340
    },
    {
      "epoch": 3.7416782859723887,
      "grad_norm": 0.7032939791679382,
      "learning_rate": 2.5332261486660124e-06,
      "loss": 0.0147,
      "step": 2286360
    },
    {
      "epoch": 3.741711016411042,
      "grad_norm": 0.3542536497116089,
      "learning_rate": 2.5331602564524956e-06,
      "loss": 0.0075,
      "step": 2286380
    },
    {
      "epoch": 3.7417437468496955,
      "grad_norm": 0.4151762127876282,
      "learning_rate": 2.5330943642389783e-06,
      "loss": 0.0118,
      "step": 2286400
    },
    {
      "epoch": 3.7417764772883486,
      "grad_norm": 0.5163936614990234,
      "learning_rate": 2.533028472025461e-06,
      "loss": 0.0083,
      "step": 2286420
    },
    {
      "epoch": 3.7418092077270018,
      "grad_norm": 0.19756090641021729,
      "learning_rate": 2.532962579811944e-06,
      "loss": 0.0093,
      "step": 2286440
    },
    {
      "epoch": 3.7418419381656554,
      "grad_norm": 0.4108999967575073,
      "learning_rate": 2.532896687598427e-06,
      "loss": 0.0161,
      "step": 2286460
    },
    {
      "epoch": 3.7418746686043085,
      "grad_norm": 0.27035337686538696,
      "learning_rate": 2.5328307953849097e-06,
      "loss": 0.0124,
      "step": 2286480
    },
    {
      "epoch": 3.741907399042962,
      "grad_norm": 0.43507617712020874,
      "learning_rate": 2.5327649031713924e-06,
      "loss": 0.0128,
      "step": 2286500
    },
    {
      "epoch": 3.7419401294816153,
      "grad_norm": 0.3276928663253784,
      "learning_rate": 2.532699010957875e-06,
      "loss": 0.0127,
      "step": 2286520
    },
    {
      "epoch": 3.741972859920269,
      "grad_norm": 0.23996008932590485,
      "learning_rate": 2.5326331187443583e-06,
      "loss": 0.0111,
      "step": 2286540
    },
    {
      "epoch": 3.742005590358922,
      "grad_norm": 0.2520655393600464,
      "learning_rate": 2.532567226530841e-06,
      "loss": 0.0112,
      "step": 2286560
    },
    {
      "epoch": 3.742038320797575,
      "grad_norm": 0.4862630367279053,
      "learning_rate": 2.532501334317324e-06,
      "loss": 0.0107,
      "step": 2286580
    },
    {
      "epoch": 3.7420710512362287,
      "grad_norm": 0.3635377883911133,
      "learning_rate": 2.5324354421038066e-06,
      "loss": 0.0115,
      "step": 2286600
    },
    {
      "epoch": 3.742103781674882,
      "grad_norm": 0.29110607504844666,
      "learning_rate": 2.5323695498902893e-06,
      "loss": 0.0114,
      "step": 2286620
    },
    {
      "epoch": 3.7421365121135355,
      "grad_norm": 0.11374559998512268,
      "learning_rate": 2.5323036576767725e-06,
      "loss": 0.0077,
      "step": 2286640
    },
    {
      "epoch": 3.7421692425521886,
      "grad_norm": 0.1629137545824051,
      "learning_rate": 2.532237765463255e-06,
      "loss": 0.0123,
      "step": 2286660
    },
    {
      "epoch": 3.7422019729908422,
      "grad_norm": 0.320407897233963,
      "learning_rate": 2.532171873249738e-06,
      "loss": 0.0081,
      "step": 2286680
    },
    {
      "epoch": 3.7422347034294954,
      "grad_norm": 1.8499786853790283,
      "learning_rate": 2.5321059810362215e-06,
      "loss": 0.0118,
      "step": 2286700
    },
    {
      "epoch": 3.7422674338681485,
      "grad_norm": 0.2591593861579895,
      "learning_rate": 2.5320400888227043e-06,
      "loss": 0.0111,
      "step": 2286720
    },
    {
      "epoch": 3.742300164306802,
      "grad_norm": 0.17561684548854828,
      "learning_rate": 2.531974196609187e-06,
      "loss": 0.0117,
      "step": 2286740
    },
    {
      "epoch": 3.7423328947454553,
      "grad_norm": 0.12159577012062073,
      "learning_rate": 2.5319083043956697e-06,
      "loss": 0.0124,
      "step": 2286760
    },
    {
      "epoch": 3.742365625184109,
      "grad_norm": 0.2825179696083069,
      "learning_rate": 2.531842412182153e-06,
      "loss": 0.0102,
      "step": 2286780
    },
    {
      "epoch": 3.742398355622762,
      "grad_norm": 0.202617809176445,
      "learning_rate": 2.5317765199686356e-06,
      "loss": 0.0123,
      "step": 2286800
    },
    {
      "epoch": 3.7424310860614156,
      "grad_norm": 0.356579065322876,
      "learning_rate": 2.5317106277551184e-06,
      "loss": 0.0096,
      "step": 2286820
    },
    {
      "epoch": 3.7424638165000688,
      "grad_norm": 0.11699173599481583,
      "learning_rate": 2.531644735541601e-06,
      "loss": 0.0105,
      "step": 2286840
    },
    {
      "epoch": 3.742496546938722,
      "grad_norm": 0.2918843626976013,
      "learning_rate": 2.5315788433280843e-06,
      "loss": 0.0062,
      "step": 2286860
    },
    {
      "epoch": 3.7425292773773755,
      "grad_norm": 0.20692716538906097,
      "learning_rate": 2.531512951114567e-06,
      "loss": 0.0114,
      "step": 2286880
    },
    {
      "epoch": 3.7425620078160287,
      "grad_norm": 0.19123367965221405,
      "learning_rate": 2.5314470589010498e-06,
      "loss": 0.0162,
      "step": 2286900
    },
    {
      "epoch": 3.7425947382546823,
      "grad_norm": 0.5109369158744812,
      "learning_rate": 2.5313811666875325e-06,
      "loss": 0.0106,
      "step": 2286920
    },
    {
      "epoch": 3.7426274686933354,
      "grad_norm": 0.26465389132499695,
      "learning_rate": 2.5313152744740157e-06,
      "loss": 0.0157,
      "step": 2286940
    },
    {
      "epoch": 3.742660199131989,
      "grad_norm": 0.3884337246417999,
      "learning_rate": 2.5312493822604984e-06,
      "loss": 0.015,
      "step": 2286960
    },
    {
      "epoch": 3.742692929570642,
      "grad_norm": 0.3234604001045227,
      "learning_rate": 2.531183490046981e-06,
      "loss": 0.0132,
      "step": 2286980
    },
    {
      "epoch": 3.7427256600092953,
      "grad_norm": 0.41679462790489197,
      "learning_rate": 2.531117597833464e-06,
      "loss": 0.0113,
      "step": 2287000
    },
    {
      "epoch": 3.742758390447949,
      "grad_norm": 0.16093017160892487,
      "learning_rate": 2.5310517056199466e-06,
      "loss": 0.0059,
      "step": 2287020
    },
    {
      "epoch": 3.742791120886602,
      "grad_norm": 0.09541846066713333,
      "learning_rate": 2.5309858134064302e-06,
      "loss": 0.0153,
      "step": 2287040
    },
    {
      "epoch": 3.742823851325255,
      "grad_norm": 0.8642978072166443,
      "learning_rate": 2.530919921192913e-06,
      "loss": 0.0106,
      "step": 2287060
    },
    {
      "epoch": 3.742856581763909,
      "grad_norm": 0.32547280192375183,
      "learning_rate": 2.530854028979396e-06,
      "loss": 0.0125,
      "step": 2287080
    },
    {
      "epoch": 3.7428893122025624,
      "grad_norm": 0.20426078140735626,
      "learning_rate": 2.530788136765879e-06,
      "loss": 0.021,
      "step": 2287100
    },
    {
      "epoch": 3.7429220426412155,
      "grad_norm": 0.07290948182344437,
      "learning_rate": 2.5307222445523616e-06,
      "loss": 0.0104,
      "step": 2287120
    },
    {
      "epoch": 3.7429547730798687,
      "grad_norm": 0.1860424280166626,
      "learning_rate": 2.5306563523388443e-06,
      "loss": 0.0106,
      "step": 2287140
    },
    {
      "epoch": 3.7429875035185223,
      "grad_norm": 0.08873879164457321,
      "learning_rate": 2.530590460125327e-06,
      "loss": 0.0153,
      "step": 2287160
    },
    {
      "epoch": 3.7430202339571754,
      "grad_norm": 0.2347966879606247,
      "learning_rate": 2.5305245679118102e-06,
      "loss": 0.0094,
      "step": 2287180
    },
    {
      "epoch": 3.7430529643958286,
      "grad_norm": 0.15945802628993988,
      "learning_rate": 2.530458675698293e-06,
      "loss": 0.0129,
      "step": 2287200
    },
    {
      "epoch": 3.743085694834482,
      "grad_norm": 0.042953286319971085,
      "learning_rate": 2.5303927834847757e-06,
      "loss": 0.0095,
      "step": 2287220
    },
    {
      "epoch": 3.7431184252731358,
      "grad_norm": 0.2655588388442993,
      "learning_rate": 2.5303268912712585e-06,
      "loss": 0.0109,
      "step": 2287240
    },
    {
      "epoch": 3.743151155711789,
      "grad_norm": 0.23051342368125916,
      "learning_rate": 2.5302609990577416e-06,
      "loss": 0.0101,
      "step": 2287260
    },
    {
      "epoch": 3.743183886150442,
      "grad_norm": 0.32767364382743835,
      "learning_rate": 2.5301951068442244e-06,
      "loss": 0.0108,
      "step": 2287280
    },
    {
      "epoch": 3.7432166165890957,
      "grad_norm": 0.15967752039432526,
      "learning_rate": 2.530129214630707e-06,
      "loss": 0.0076,
      "step": 2287300
    },
    {
      "epoch": 3.743249347027749,
      "grad_norm": 0.2290169596672058,
      "learning_rate": 2.53006332241719e-06,
      "loss": 0.009,
      "step": 2287320
    },
    {
      "epoch": 3.743282077466402,
      "grad_norm": 0.24011632800102234,
      "learning_rate": 2.529997430203673e-06,
      "loss": 0.0114,
      "step": 2287340
    },
    {
      "epoch": 3.7433148079050556,
      "grad_norm": 0.8493170738220215,
      "learning_rate": 2.5299315379901557e-06,
      "loss": 0.014,
      "step": 2287360
    },
    {
      "epoch": 3.743347538343709,
      "grad_norm": 0.09827913343906403,
      "learning_rate": 2.5298656457766385e-06,
      "loss": 0.0073,
      "step": 2287380
    },
    {
      "epoch": 3.7433802687823623,
      "grad_norm": 0.3854089677333832,
      "learning_rate": 2.529799753563122e-06,
      "loss": 0.0146,
      "step": 2287400
    },
    {
      "epoch": 3.7434129992210154,
      "grad_norm": 0.1907944679260254,
      "learning_rate": 2.529733861349605e-06,
      "loss": 0.0074,
      "step": 2287420
    },
    {
      "epoch": 3.743445729659669,
      "grad_norm": 0.12428126484155655,
      "learning_rate": 2.5296679691360876e-06,
      "loss": 0.0075,
      "step": 2287440
    },
    {
      "epoch": 3.743478460098322,
      "grad_norm": 0.22633232176303864,
      "learning_rate": 2.5296020769225703e-06,
      "loss": 0.0129,
      "step": 2287460
    },
    {
      "epoch": 3.7435111905369753,
      "grad_norm": 0.13844971358776093,
      "learning_rate": 2.5295361847090535e-06,
      "loss": 0.0071,
      "step": 2287480
    },
    {
      "epoch": 3.743543920975629,
      "grad_norm": 0.46532177925109863,
      "learning_rate": 2.529470292495536e-06,
      "loss": 0.0091,
      "step": 2287500
    },
    {
      "epoch": 3.7435766514142825,
      "grad_norm": 0.3274756968021393,
      "learning_rate": 2.529404400282019e-06,
      "loss": 0.0115,
      "step": 2287520
    },
    {
      "epoch": 3.7436093818529357,
      "grad_norm": 0.48211997747421265,
      "learning_rate": 2.5293385080685017e-06,
      "loss": 0.0098,
      "step": 2287540
    },
    {
      "epoch": 3.743642112291589,
      "grad_norm": 0.2593604624271393,
      "learning_rate": 2.5292726158549844e-06,
      "loss": 0.0139,
      "step": 2287560
    },
    {
      "epoch": 3.7436748427302424,
      "grad_norm": 0.43130767345428467,
      "learning_rate": 2.5292067236414676e-06,
      "loss": 0.0152,
      "step": 2287580
    },
    {
      "epoch": 3.7437075731688956,
      "grad_norm": 0.19233955442905426,
      "learning_rate": 2.5291408314279503e-06,
      "loss": 0.0069,
      "step": 2287600
    },
    {
      "epoch": 3.7437403036075487,
      "grad_norm": 0.19875678420066833,
      "learning_rate": 2.529074939214433e-06,
      "loss": 0.011,
      "step": 2287620
    },
    {
      "epoch": 3.7437730340462023,
      "grad_norm": 0.44831201434135437,
      "learning_rate": 2.529009047000916e-06,
      "loss": 0.0167,
      "step": 2287640
    },
    {
      "epoch": 3.7438057644848555,
      "grad_norm": 0.2859509289264679,
      "learning_rate": 2.528943154787399e-06,
      "loss": 0.013,
      "step": 2287660
    },
    {
      "epoch": 3.743838494923509,
      "grad_norm": 0.22203129529953003,
      "learning_rate": 2.5288772625738817e-06,
      "loss": 0.0098,
      "step": 2287680
    },
    {
      "epoch": 3.743871225362162,
      "grad_norm": 0.43723106384277344,
      "learning_rate": 2.5288113703603644e-06,
      "loss": 0.0131,
      "step": 2287700
    },
    {
      "epoch": 3.743903955800816,
      "grad_norm": 0.30197638273239136,
      "learning_rate": 2.528745478146847e-06,
      "loss": 0.0118,
      "step": 2287720
    },
    {
      "epoch": 3.743936686239469,
      "grad_norm": 0.3052353262901306,
      "learning_rate": 2.5286795859333308e-06,
      "loss": 0.0102,
      "step": 2287740
    },
    {
      "epoch": 3.743969416678122,
      "grad_norm": 0.1393391638994217,
      "learning_rate": 2.5286136937198135e-06,
      "loss": 0.006,
      "step": 2287760
    },
    {
      "epoch": 3.7440021471167757,
      "grad_norm": 0.10092396289110184,
      "learning_rate": 2.5285478015062962e-06,
      "loss": 0.008,
      "step": 2287780
    },
    {
      "epoch": 3.744034877555429,
      "grad_norm": 0.43508148193359375,
      "learning_rate": 2.5284819092927794e-06,
      "loss": 0.0148,
      "step": 2287800
    },
    {
      "epoch": 3.7440676079940824,
      "grad_norm": 0.08478277921676636,
      "learning_rate": 2.528416017079262e-06,
      "loss": 0.0073,
      "step": 2287820
    },
    {
      "epoch": 3.7441003384327356,
      "grad_norm": 0.08707665652036667,
      "learning_rate": 2.528350124865745e-06,
      "loss": 0.0098,
      "step": 2287840
    },
    {
      "epoch": 3.744133068871389,
      "grad_norm": 0.3523610234260559,
      "learning_rate": 2.5282842326522276e-06,
      "loss": 0.0126,
      "step": 2287860
    },
    {
      "epoch": 3.7441657993100423,
      "grad_norm": 0.22054535150527954,
      "learning_rate": 2.528218340438711e-06,
      "loss": 0.0107,
      "step": 2287880
    },
    {
      "epoch": 3.7441985297486955,
      "grad_norm": 0.0998668372631073,
      "learning_rate": 2.5281524482251935e-06,
      "loss": 0.0087,
      "step": 2287900
    },
    {
      "epoch": 3.744231260187349,
      "grad_norm": 0.33826810121536255,
      "learning_rate": 2.5280865560116763e-06,
      "loss": 0.0112,
      "step": 2287920
    },
    {
      "epoch": 3.7442639906260022,
      "grad_norm": 0.10034074634313583,
      "learning_rate": 2.528020663798159e-06,
      "loss": 0.0087,
      "step": 2287940
    },
    {
      "epoch": 3.744296721064656,
      "grad_norm": 0.14571881294250488,
      "learning_rate": 2.527954771584642e-06,
      "loss": 0.0115,
      "step": 2287960
    },
    {
      "epoch": 3.744329451503309,
      "grad_norm": 0.7243786454200745,
      "learning_rate": 2.527888879371125e-06,
      "loss": 0.0113,
      "step": 2287980
    },
    {
      "epoch": 3.7443621819419626,
      "grad_norm": 0.378445565700531,
      "learning_rate": 2.5278229871576077e-06,
      "loss": 0.0099,
      "step": 2288000
    },
    {
      "epoch": 3.7443949123806157,
      "grad_norm": 0.18536554276943207,
      "learning_rate": 2.5277570949440904e-06,
      "loss": 0.0069,
      "step": 2288020
    },
    {
      "epoch": 3.744427642819269,
      "grad_norm": 0.08895471692085266,
      "learning_rate": 2.527691202730573e-06,
      "loss": 0.0093,
      "step": 2288040
    },
    {
      "epoch": 3.7444603732579225,
      "grad_norm": 0.08174512535333633,
      "learning_rate": 2.5276253105170563e-06,
      "loss": 0.0118,
      "step": 2288060
    },
    {
      "epoch": 3.7444931036965756,
      "grad_norm": 0.14290204644203186,
      "learning_rate": 2.527559418303539e-06,
      "loss": 0.0123,
      "step": 2288080
    },
    {
      "epoch": 3.744525834135229,
      "grad_norm": 0.25970160961151123,
      "learning_rate": 2.527493526090022e-06,
      "loss": 0.0084,
      "step": 2288100
    },
    {
      "epoch": 3.7445585645738824,
      "grad_norm": 0.09960801154375076,
      "learning_rate": 2.5274276338765054e-06,
      "loss": 0.0186,
      "step": 2288120
    },
    {
      "epoch": 3.744591295012536,
      "grad_norm": 0.2288227081298828,
      "learning_rate": 2.527361741662988e-06,
      "loss": 0.0093,
      "step": 2288140
    },
    {
      "epoch": 3.744624025451189,
      "grad_norm": 0.2500172555446625,
      "learning_rate": 2.527295849449471e-06,
      "loss": 0.0081,
      "step": 2288160
    },
    {
      "epoch": 3.7446567558898423,
      "grad_norm": 0.12273873388767242,
      "learning_rate": 2.5272299572359536e-06,
      "loss": 0.0146,
      "step": 2288180
    },
    {
      "epoch": 3.744689486328496,
      "grad_norm": 0.11995315551757812,
      "learning_rate": 2.5271640650224367e-06,
      "loss": 0.0147,
      "step": 2288200
    },
    {
      "epoch": 3.744722216767149,
      "grad_norm": 0.5054798126220703,
      "learning_rate": 2.5270981728089195e-06,
      "loss": 0.011,
      "step": 2288220
    },
    {
      "epoch": 3.7447549472058026,
      "grad_norm": 0.15706288814544678,
      "learning_rate": 2.5270322805954022e-06,
      "loss": 0.0092,
      "step": 2288240
    },
    {
      "epoch": 3.7447876776444557,
      "grad_norm": 0.28835204243659973,
      "learning_rate": 2.526966388381885e-06,
      "loss": 0.0137,
      "step": 2288260
    },
    {
      "epoch": 3.7448204080831093,
      "grad_norm": 0.06784939765930176,
      "learning_rate": 2.526900496168368e-06,
      "loss": 0.0084,
      "step": 2288280
    },
    {
      "epoch": 3.7448531385217625,
      "grad_norm": 0.5289937257766724,
      "learning_rate": 2.526834603954851e-06,
      "loss": 0.0129,
      "step": 2288300
    },
    {
      "epoch": 3.7448858689604156,
      "grad_norm": 0.6760396957397461,
      "learning_rate": 2.5267687117413336e-06,
      "loss": 0.018,
      "step": 2288320
    },
    {
      "epoch": 3.7449185993990692,
      "grad_norm": 0.31481117010116577,
      "learning_rate": 2.5267028195278163e-06,
      "loss": 0.0126,
      "step": 2288340
    },
    {
      "epoch": 3.7449513298377224,
      "grad_norm": 0.23354750871658325,
      "learning_rate": 2.5266369273142995e-06,
      "loss": 0.0058,
      "step": 2288360
    },
    {
      "epoch": 3.744984060276376,
      "grad_norm": 0.24915754795074463,
      "learning_rate": 2.5265710351007823e-06,
      "loss": 0.0107,
      "step": 2288380
    },
    {
      "epoch": 3.745016790715029,
      "grad_norm": 0.16140475869178772,
      "learning_rate": 2.526505142887265e-06,
      "loss": 0.0128,
      "step": 2288400
    },
    {
      "epoch": 3.7450495211536827,
      "grad_norm": 0.13611158728599548,
      "learning_rate": 2.5264392506737477e-06,
      "loss": 0.0117,
      "step": 2288420
    },
    {
      "epoch": 3.745082251592336,
      "grad_norm": 0.3811383545398712,
      "learning_rate": 2.526373358460231e-06,
      "loss": 0.0126,
      "step": 2288440
    },
    {
      "epoch": 3.745114982030989,
      "grad_norm": 0.14414376020431519,
      "learning_rate": 2.526307466246714e-06,
      "loss": 0.0116,
      "step": 2288460
    },
    {
      "epoch": 3.7451477124696426,
      "grad_norm": 0.6753638386726379,
      "learning_rate": 2.526241574033197e-06,
      "loss": 0.0091,
      "step": 2288480
    },
    {
      "epoch": 3.7451804429082958,
      "grad_norm": 0.2137262523174286,
      "learning_rate": 2.52617568181968e-06,
      "loss": 0.0131,
      "step": 2288500
    },
    {
      "epoch": 3.745213173346949,
      "grad_norm": 0.2418256402015686,
      "learning_rate": 2.5261097896061627e-06,
      "loss": 0.0086,
      "step": 2288520
    },
    {
      "epoch": 3.7452459037856025,
      "grad_norm": 0.17011074721813202,
      "learning_rate": 2.5260438973926454e-06,
      "loss": 0.0105,
      "step": 2288540
    },
    {
      "epoch": 3.745278634224256,
      "grad_norm": 0.3656446933746338,
      "learning_rate": 2.525978005179128e-06,
      "loss": 0.0157,
      "step": 2288560
    },
    {
      "epoch": 3.7453113646629093,
      "grad_norm": 0.39809298515319824,
      "learning_rate": 2.525912112965611e-06,
      "loss": 0.0124,
      "step": 2288580
    },
    {
      "epoch": 3.7453440951015624,
      "grad_norm": 0.3402279317378998,
      "learning_rate": 2.525846220752094e-06,
      "loss": 0.0067,
      "step": 2288600
    },
    {
      "epoch": 3.745376825540216,
      "grad_norm": 0.3742290139198303,
      "learning_rate": 2.525780328538577e-06,
      "loss": 0.0155,
      "step": 2288620
    },
    {
      "epoch": 3.745409555978869,
      "grad_norm": 0.1334681510925293,
      "learning_rate": 2.5257144363250596e-06,
      "loss": 0.0141,
      "step": 2288640
    },
    {
      "epoch": 3.7454422864175223,
      "grad_norm": 0.31880730390548706,
      "learning_rate": 2.5256485441115423e-06,
      "loss": 0.0116,
      "step": 2288660
    },
    {
      "epoch": 3.745475016856176,
      "grad_norm": 0.2079165130853653,
      "learning_rate": 2.5255826518980255e-06,
      "loss": 0.0159,
      "step": 2288680
    },
    {
      "epoch": 3.7455077472948295,
      "grad_norm": 0.16907960176467896,
      "learning_rate": 2.525516759684508e-06,
      "loss": 0.0133,
      "step": 2288700
    },
    {
      "epoch": 3.7455404777334826,
      "grad_norm": 0.28828710317611694,
      "learning_rate": 2.525450867470991e-06,
      "loss": 0.0082,
      "step": 2288720
    },
    {
      "epoch": 3.745573208172136,
      "grad_norm": 0.4602632522583008,
      "learning_rate": 2.5253849752574737e-06,
      "loss": 0.0098,
      "step": 2288740
    },
    {
      "epoch": 3.7456059386107894,
      "grad_norm": 0.0913957729935646,
      "learning_rate": 2.525319083043957e-06,
      "loss": 0.0072,
      "step": 2288760
    },
    {
      "epoch": 3.7456386690494425,
      "grad_norm": 0.4729558825492859,
      "learning_rate": 2.5252531908304396e-06,
      "loss": 0.0139,
      "step": 2288780
    },
    {
      "epoch": 3.7456713994880957,
      "grad_norm": 0.2630649209022522,
      "learning_rate": 2.5251872986169227e-06,
      "loss": 0.0085,
      "step": 2288800
    },
    {
      "epoch": 3.7457041299267493,
      "grad_norm": 0.1438564956188202,
      "learning_rate": 2.525121406403406e-06,
      "loss": 0.0122,
      "step": 2288820
    },
    {
      "epoch": 3.745736860365403,
      "grad_norm": 0.0827535018324852,
      "learning_rate": 2.5250555141898887e-06,
      "loss": 0.0109,
      "step": 2288840
    },
    {
      "epoch": 3.745769590804056,
      "grad_norm": 0.21740983426570892,
      "learning_rate": 2.5249896219763714e-06,
      "loss": 0.0077,
      "step": 2288860
    },
    {
      "epoch": 3.745802321242709,
      "grad_norm": 0.32023048400878906,
      "learning_rate": 2.524923729762854e-06,
      "loss": 0.0114,
      "step": 2288880
    },
    {
      "epoch": 3.7458350516813628,
      "grad_norm": 0.1300724893808365,
      "learning_rate": 2.5248578375493373e-06,
      "loss": 0.0125,
      "step": 2288900
    },
    {
      "epoch": 3.745867782120016,
      "grad_norm": 0.2074379026889801,
      "learning_rate": 2.52479194533582e-06,
      "loss": 0.0138,
      "step": 2288920
    },
    {
      "epoch": 3.745900512558669,
      "grad_norm": 0.18238016963005066,
      "learning_rate": 2.5247260531223028e-06,
      "loss": 0.0137,
      "step": 2288940
    },
    {
      "epoch": 3.7459332429973227,
      "grad_norm": 0.2717878222465515,
      "learning_rate": 2.5246601609087855e-06,
      "loss": 0.0162,
      "step": 2288960
    },
    {
      "epoch": 3.7459659734359763,
      "grad_norm": 0.2605646848678589,
      "learning_rate": 2.5245942686952687e-06,
      "loss": 0.0098,
      "step": 2288980
    },
    {
      "epoch": 3.7459987038746294,
      "grad_norm": 0.5371413230895996,
      "learning_rate": 2.5245283764817514e-06,
      "loss": 0.0186,
      "step": 2289000
    },
    {
      "epoch": 3.7460314343132826,
      "grad_norm": 0.3948095738887787,
      "learning_rate": 2.524462484268234e-06,
      "loss": 0.0092,
      "step": 2289020
    },
    {
      "epoch": 3.746064164751936,
      "grad_norm": 0.25956034660339355,
      "learning_rate": 2.524396592054717e-06,
      "loss": 0.0101,
      "step": 2289040
    },
    {
      "epoch": 3.7460968951905893,
      "grad_norm": 0.5557032823562622,
      "learning_rate": 2.5243306998411996e-06,
      "loss": 0.0098,
      "step": 2289060
    },
    {
      "epoch": 3.7461296256292425,
      "grad_norm": 0.06252282112836838,
      "learning_rate": 2.524264807627683e-06,
      "loss": 0.011,
      "step": 2289080
    },
    {
      "epoch": 3.746162356067896,
      "grad_norm": 0.15145066380500793,
      "learning_rate": 2.5241989154141655e-06,
      "loss": 0.0111,
      "step": 2289100
    },
    {
      "epoch": 3.746195086506549,
      "grad_norm": 0.5551413893699646,
      "learning_rate": 2.5241330232006483e-06,
      "loss": 0.0128,
      "step": 2289120
    },
    {
      "epoch": 3.746227816945203,
      "grad_norm": 0.16075527667999268,
      "learning_rate": 2.524067130987131e-06,
      "loss": 0.0079,
      "step": 2289140
    },
    {
      "epoch": 3.746260547383856,
      "grad_norm": 0.19296567142009735,
      "learning_rate": 2.5240012387736146e-06,
      "loss": 0.0088,
      "step": 2289160
    },
    {
      "epoch": 3.7462932778225095,
      "grad_norm": 0.2351500540971756,
      "learning_rate": 2.5239353465600973e-06,
      "loss": 0.0073,
      "step": 2289180
    },
    {
      "epoch": 3.7463260082611627,
      "grad_norm": 0.19780881702899933,
      "learning_rate": 2.52386945434658e-06,
      "loss": 0.0081,
      "step": 2289200
    },
    {
      "epoch": 3.746358738699816,
      "grad_norm": 0.3068724572658539,
      "learning_rate": 2.5238035621330632e-06,
      "loss": 0.0108,
      "step": 2289220
    },
    {
      "epoch": 3.7463914691384694,
      "grad_norm": 0.3087942600250244,
      "learning_rate": 2.523737669919546e-06,
      "loss": 0.0146,
      "step": 2289240
    },
    {
      "epoch": 3.7464241995771226,
      "grad_norm": 0.382520467042923,
      "learning_rate": 2.5236717777060287e-06,
      "loss": 0.0109,
      "step": 2289260
    },
    {
      "epoch": 3.746456930015776,
      "grad_norm": 0.0719473659992218,
      "learning_rate": 2.5236058854925115e-06,
      "loss": 0.0093,
      "step": 2289280
    },
    {
      "epoch": 3.7464896604544293,
      "grad_norm": 0.4303399920463562,
      "learning_rate": 2.5235399932789946e-06,
      "loss": 0.0129,
      "step": 2289300
    },
    {
      "epoch": 3.746522390893083,
      "grad_norm": 0.3032871186733246,
      "learning_rate": 2.5234741010654774e-06,
      "loss": 0.0078,
      "step": 2289320
    },
    {
      "epoch": 3.746555121331736,
      "grad_norm": 0.35811692476272583,
      "learning_rate": 2.52340820885196e-06,
      "loss": 0.0075,
      "step": 2289340
    },
    {
      "epoch": 3.746587851770389,
      "grad_norm": 0.2640453577041626,
      "learning_rate": 2.523342316638443e-06,
      "loss": 0.0074,
      "step": 2289360
    },
    {
      "epoch": 3.746620582209043,
      "grad_norm": 0.14213956892490387,
      "learning_rate": 2.523276424424926e-06,
      "loss": 0.0134,
      "step": 2289380
    },
    {
      "epoch": 3.746653312647696,
      "grad_norm": 0.2590258717536926,
      "learning_rate": 2.5232105322114088e-06,
      "loss": 0.009,
      "step": 2289400
    },
    {
      "epoch": 3.7466860430863496,
      "grad_norm": 0.13880936801433563,
      "learning_rate": 2.5231446399978915e-06,
      "loss": 0.0141,
      "step": 2289420
    },
    {
      "epoch": 3.7467187735250027,
      "grad_norm": 0.1296793520450592,
      "learning_rate": 2.5230787477843742e-06,
      "loss": 0.0112,
      "step": 2289440
    },
    {
      "epoch": 3.7467515039636563,
      "grad_norm": 0.466076135635376,
      "learning_rate": 2.523012855570857e-06,
      "loss": 0.0082,
      "step": 2289460
    },
    {
      "epoch": 3.7467842344023095,
      "grad_norm": 0.20775675773620605,
      "learning_rate": 2.52294696335734e-06,
      "loss": 0.0095,
      "step": 2289480
    },
    {
      "epoch": 3.7468169648409626,
      "grad_norm": 0.2762274444103241,
      "learning_rate": 2.5228810711438233e-06,
      "loss": 0.0133,
      "step": 2289500
    },
    {
      "epoch": 3.746849695279616,
      "grad_norm": 0.22453460097312927,
      "learning_rate": 2.5228151789303065e-06,
      "loss": 0.0118,
      "step": 2289520
    },
    {
      "epoch": 3.7468824257182693,
      "grad_norm": 0.6948035955429077,
      "learning_rate": 2.522749286716789e-06,
      "loss": 0.0188,
      "step": 2289540
    },
    {
      "epoch": 3.746915156156923,
      "grad_norm": 1.106145977973938,
      "learning_rate": 2.522683394503272e-06,
      "loss": 0.0075,
      "step": 2289560
    },
    {
      "epoch": 3.746947886595576,
      "grad_norm": 0.12772327661514282,
      "learning_rate": 2.5226175022897547e-06,
      "loss": 0.0067,
      "step": 2289580
    },
    {
      "epoch": 3.7469806170342297,
      "grad_norm": 0.1916794627904892,
      "learning_rate": 2.5225516100762374e-06,
      "loss": 0.0148,
      "step": 2289600
    },
    {
      "epoch": 3.747013347472883,
      "grad_norm": 0.6446729302406311,
      "learning_rate": 2.5224857178627206e-06,
      "loss": 0.0108,
      "step": 2289620
    },
    {
      "epoch": 3.747046077911536,
      "grad_norm": 0.1359013170003891,
      "learning_rate": 2.5224198256492033e-06,
      "loss": 0.0087,
      "step": 2289640
    },
    {
      "epoch": 3.7470788083501896,
      "grad_norm": 0.6624331474304199,
      "learning_rate": 2.522353933435686e-06,
      "loss": 0.01,
      "step": 2289660
    },
    {
      "epoch": 3.7471115387888427,
      "grad_norm": 0.32164061069488525,
      "learning_rate": 2.522288041222169e-06,
      "loss": 0.0078,
      "step": 2289680
    },
    {
      "epoch": 3.7471442692274963,
      "grad_norm": 0.15895803272724152,
      "learning_rate": 2.522222149008652e-06,
      "loss": 0.0083,
      "step": 2289700
    },
    {
      "epoch": 3.7471769996661495,
      "grad_norm": 0.39329132437705994,
      "learning_rate": 2.5221562567951347e-06,
      "loss": 0.0104,
      "step": 2289720
    },
    {
      "epoch": 3.747209730104803,
      "grad_norm": 0.23033443093299866,
      "learning_rate": 2.5220903645816174e-06,
      "loss": 0.0071,
      "step": 2289740
    },
    {
      "epoch": 3.747242460543456,
      "grad_norm": 0.2964145243167877,
      "learning_rate": 2.5220244723681e-06,
      "loss": 0.0135,
      "step": 2289760
    },
    {
      "epoch": 3.7472751909821094,
      "grad_norm": 0.2427171766757965,
      "learning_rate": 2.5219585801545834e-06,
      "loss": 0.0094,
      "step": 2289780
    },
    {
      "epoch": 3.747307921420763,
      "grad_norm": 0.2535307705402374,
      "learning_rate": 2.521892687941066e-06,
      "loss": 0.0126,
      "step": 2289800
    },
    {
      "epoch": 3.747340651859416,
      "grad_norm": 0.13691596686840057,
      "learning_rate": 2.521826795727549e-06,
      "loss": 0.0107,
      "step": 2289820
    },
    {
      "epoch": 3.7473733822980697,
      "grad_norm": 0.07076313346624374,
      "learning_rate": 2.5217609035140316e-06,
      "loss": 0.0077,
      "step": 2289840
    },
    {
      "epoch": 3.747406112736723,
      "grad_norm": 0.23766589164733887,
      "learning_rate": 2.521695011300515e-06,
      "loss": 0.0073,
      "step": 2289860
    },
    {
      "epoch": 3.7474388431753765,
      "grad_norm": 0.48043686151504517,
      "learning_rate": 2.521629119086998e-06,
      "loss": 0.0117,
      "step": 2289880
    },
    {
      "epoch": 3.7474715736140296,
      "grad_norm": 0.19657856225967407,
      "learning_rate": 2.5215632268734806e-06,
      "loss": 0.0066,
      "step": 2289900
    },
    {
      "epoch": 3.7475043040526828,
      "grad_norm": 0.8775281310081482,
      "learning_rate": 2.521497334659964e-06,
      "loss": 0.0094,
      "step": 2289920
    },
    {
      "epoch": 3.7475370344913363,
      "grad_norm": 0.1830131560564041,
      "learning_rate": 2.5214314424464465e-06,
      "loss": 0.0102,
      "step": 2289940
    },
    {
      "epoch": 3.7475697649299895,
      "grad_norm": 0.3138387203216553,
      "learning_rate": 2.5213655502329293e-06,
      "loss": 0.0111,
      "step": 2289960
    },
    {
      "epoch": 3.747602495368643,
      "grad_norm": 0.7648668885231018,
      "learning_rate": 2.521299658019412e-06,
      "loss": 0.0097,
      "step": 2289980
    },
    {
      "epoch": 3.7476352258072962,
      "grad_norm": 0.11291680485010147,
      "learning_rate": 2.5212337658058948e-06,
      "loss": 0.0052,
      "step": 2290000
    },
    {
      "epoch": 3.74766795624595,
      "grad_norm": 0.6221747398376465,
      "learning_rate": 2.521167873592378e-06,
      "loss": 0.0094,
      "step": 2290020
    },
    {
      "epoch": 3.747700686684603,
      "grad_norm": 0.1181526854634285,
      "learning_rate": 2.5211019813788607e-06,
      "loss": 0.0093,
      "step": 2290040
    },
    {
      "epoch": 3.747733417123256,
      "grad_norm": 0.5126340985298157,
      "learning_rate": 2.5210360891653434e-06,
      "loss": 0.0098,
      "step": 2290060
    },
    {
      "epoch": 3.7477661475619097,
      "grad_norm": 0.2050718069076538,
      "learning_rate": 2.520970196951826e-06,
      "loss": 0.0123,
      "step": 2290080
    },
    {
      "epoch": 3.747798878000563,
      "grad_norm": 0.11888635903596878,
      "learning_rate": 2.5209043047383093e-06,
      "loss": 0.0139,
      "step": 2290100
    },
    {
      "epoch": 3.747831608439216,
      "grad_norm": 0.16507287323474884,
      "learning_rate": 2.520838412524792e-06,
      "loss": 0.0107,
      "step": 2290120
    },
    {
      "epoch": 3.7478643388778696,
      "grad_norm": 0.14895576238632202,
      "learning_rate": 2.5207725203112748e-06,
      "loss": 0.0114,
      "step": 2290140
    },
    {
      "epoch": 3.747897069316523,
      "grad_norm": 0.43268102407455444,
      "learning_rate": 2.5207066280977575e-06,
      "loss": 0.0133,
      "step": 2290160
    },
    {
      "epoch": 3.7479297997551764,
      "grad_norm": 0.3532849848270416,
      "learning_rate": 2.5206407358842407e-06,
      "loss": 0.0102,
      "step": 2290180
    },
    {
      "epoch": 3.7479625301938295,
      "grad_norm": 0.19769524037837982,
      "learning_rate": 2.520574843670724e-06,
      "loss": 0.0123,
      "step": 2290200
    },
    {
      "epoch": 3.747995260632483,
      "grad_norm": 0.2872075140476227,
      "learning_rate": 2.5205089514572066e-06,
      "loss": 0.0083,
      "step": 2290220
    },
    {
      "epoch": 3.7480279910711363,
      "grad_norm": 0.16008366644382477,
      "learning_rate": 2.5204430592436898e-06,
      "loss": 0.0113,
      "step": 2290240
    },
    {
      "epoch": 3.7480607215097894,
      "grad_norm": 0.2191544771194458,
      "learning_rate": 2.5203771670301725e-06,
      "loss": 0.0083,
      "step": 2290260
    },
    {
      "epoch": 3.748093451948443,
      "grad_norm": 0.30150845646858215,
      "learning_rate": 2.5203112748166552e-06,
      "loss": 0.011,
      "step": 2290280
    },
    {
      "epoch": 3.7481261823870966,
      "grad_norm": 0.13518628478050232,
      "learning_rate": 2.520245382603138e-06,
      "loss": 0.0156,
      "step": 2290300
    },
    {
      "epoch": 3.7481589128257498,
      "grad_norm": 0.30503055453300476,
      "learning_rate": 2.520179490389621e-06,
      "loss": 0.0181,
      "step": 2290320
    },
    {
      "epoch": 3.748191643264403,
      "grad_norm": 0.7946648001670837,
      "learning_rate": 2.520113598176104e-06,
      "loss": 0.0107,
      "step": 2290340
    },
    {
      "epoch": 3.7482243737030565,
      "grad_norm": 0.5734938979148865,
      "learning_rate": 2.5200477059625866e-06,
      "loss": 0.0107,
      "step": 2290360
    },
    {
      "epoch": 3.7482571041417097,
      "grad_norm": 1.1531318426132202,
      "learning_rate": 2.5199818137490694e-06,
      "loss": 0.0084,
      "step": 2290380
    },
    {
      "epoch": 3.748289834580363,
      "grad_norm": 0.26247304677963257,
      "learning_rate": 2.5199159215355525e-06,
      "loss": 0.0068,
      "step": 2290400
    },
    {
      "epoch": 3.7483225650190164,
      "grad_norm": 0.2590216398239136,
      "learning_rate": 2.5198500293220353e-06,
      "loss": 0.0092,
      "step": 2290420
    },
    {
      "epoch": 3.74835529545767,
      "grad_norm": 0.6491782069206238,
      "learning_rate": 2.519784137108518e-06,
      "loss": 0.014,
      "step": 2290440
    },
    {
      "epoch": 3.748388025896323,
      "grad_norm": 0.47249987721443176,
      "learning_rate": 2.5197182448950007e-06,
      "loss": 0.0153,
      "step": 2290460
    },
    {
      "epoch": 3.7484207563349763,
      "grad_norm": 0.39424946904182434,
      "learning_rate": 2.5196523526814835e-06,
      "loss": 0.0123,
      "step": 2290480
    },
    {
      "epoch": 3.74845348677363,
      "grad_norm": 0.1445007026195526,
      "learning_rate": 2.5195864604679666e-06,
      "loss": 0.007,
      "step": 2290500
    },
    {
      "epoch": 3.748486217212283,
      "grad_norm": 0.3696630895137787,
      "learning_rate": 2.5195205682544494e-06,
      "loss": 0.0081,
      "step": 2290520
    },
    {
      "epoch": 3.748518947650936,
      "grad_norm": 0.19895142316818237,
      "learning_rate": 2.519454676040932e-06,
      "loss": 0.0107,
      "step": 2290540
    },
    {
      "epoch": 3.74855167808959,
      "grad_norm": 0.20714153349399567,
      "learning_rate": 2.5193887838274157e-06,
      "loss": 0.0109,
      "step": 2290560
    },
    {
      "epoch": 3.7485844085282434,
      "grad_norm": 0.17429213225841522,
      "learning_rate": 2.5193228916138984e-06,
      "loss": 0.0094,
      "step": 2290580
    },
    {
      "epoch": 3.7486171389668965,
      "grad_norm": 0.12787631154060364,
      "learning_rate": 2.519256999400381e-06,
      "loss": 0.0078,
      "step": 2290600
    },
    {
      "epoch": 3.7486498694055497,
      "grad_norm": 0.32144492864608765,
      "learning_rate": 2.519191107186864e-06,
      "loss": 0.0088,
      "step": 2290620
    },
    {
      "epoch": 3.7486825998442033,
      "grad_norm": 0.2960156500339508,
      "learning_rate": 2.519125214973347e-06,
      "loss": 0.0074,
      "step": 2290640
    },
    {
      "epoch": 3.7487153302828564,
      "grad_norm": 0.5717071294784546,
      "learning_rate": 2.51905932275983e-06,
      "loss": 0.0164,
      "step": 2290660
    },
    {
      "epoch": 3.7487480607215096,
      "grad_norm": 0.19370101392269135,
      "learning_rate": 2.5189934305463126e-06,
      "loss": 0.0102,
      "step": 2290680
    },
    {
      "epoch": 3.748780791160163,
      "grad_norm": 0.218079075217247,
      "learning_rate": 2.5189275383327953e-06,
      "loss": 0.0134,
      "step": 2290700
    },
    {
      "epoch": 3.7488135215988163,
      "grad_norm": 0.1690283566713333,
      "learning_rate": 2.5188616461192785e-06,
      "loss": 0.0111,
      "step": 2290720
    },
    {
      "epoch": 3.74884625203747,
      "grad_norm": 0.2650006413459778,
      "learning_rate": 2.518795753905761e-06,
      "loss": 0.0153,
      "step": 2290740
    },
    {
      "epoch": 3.748878982476123,
      "grad_norm": 0.26601260900497437,
      "learning_rate": 2.518729861692244e-06,
      "loss": 0.0113,
      "step": 2290760
    },
    {
      "epoch": 3.7489117129147767,
      "grad_norm": 0.44363829493522644,
      "learning_rate": 2.5186639694787267e-06,
      "loss": 0.0129,
      "step": 2290780
    },
    {
      "epoch": 3.74894444335343,
      "grad_norm": 0.16471229493618011,
      "learning_rate": 2.51859807726521e-06,
      "loss": 0.0119,
      "step": 2290800
    },
    {
      "epoch": 3.748977173792083,
      "grad_norm": 0.14114537835121155,
      "learning_rate": 2.5185321850516926e-06,
      "loss": 0.0104,
      "step": 2290820
    },
    {
      "epoch": 3.7490099042307365,
      "grad_norm": 0.3000403344631195,
      "learning_rate": 2.5184662928381753e-06,
      "loss": 0.0109,
      "step": 2290840
    },
    {
      "epoch": 3.7490426346693897,
      "grad_norm": 0.4369828999042511,
      "learning_rate": 2.518400400624658e-06,
      "loss": 0.0118,
      "step": 2290860
    },
    {
      "epoch": 3.7490753651080433,
      "grad_norm": 0.4737723171710968,
      "learning_rate": 2.518334508411141e-06,
      "loss": 0.0134,
      "step": 2290880
    },
    {
      "epoch": 3.7491080955466964,
      "grad_norm": 0.15619508922100067,
      "learning_rate": 2.518268616197624e-06,
      "loss": 0.007,
      "step": 2290900
    },
    {
      "epoch": 3.74914082598535,
      "grad_norm": 0.10429197549819946,
      "learning_rate": 2.518202723984107e-06,
      "loss": 0.0107,
      "step": 2290920
    },
    {
      "epoch": 3.749173556424003,
      "grad_norm": 0.22467687726020813,
      "learning_rate": 2.5181368317705903e-06,
      "loss": 0.0078,
      "step": 2290940
    },
    {
      "epoch": 3.7492062868626563,
      "grad_norm": 0.4066281020641327,
      "learning_rate": 2.518070939557073e-06,
      "loss": 0.0113,
      "step": 2290960
    },
    {
      "epoch": 3.74923901730131,
      "grad_norm": 0.0363042838871479,
      "learning_rate": 2.5180050473435558e-06,
      "loss": 0.0154,
      "step": 2290980
    },
    {
      "epoch": 3.749271747739963,
      "grad_norm": 0.49687203764915466,
      "learning_rate": 2.5179391551300385e-06,
      "loss": 0.015,
      "step": 2291000
    },
    {
      "epoch": 3.7493044781786167,
      "grad_norm": 0.36333510279655457,
      "learning_rate": 2.5178732629165213e-06,
      "loss": 0.0126,
      "step": 2291020
    },
    {
      "epoch": 3.74933720861727,
      "grad_norm": 0.3432893455028534,
      "learning_rate": 2.5178073707030044e-06,
      "loss": 0.0079,
      "step": 2291040
    },
    {
      "epoch": 3.7493699390559234,
      "grad_norm": 0.15227586030960083,
      "learning_rate": 2.517741478489487e-06,
      "loss": 0.0058,
      "step": 2291060
    },
    {
      "epoch": 3.7494026694945766,
      "grad_norm": 0.10800070315599442,
      "learning_rate": 2.51767558627597e-06,
      "loss": 0.0126,
      "step": 2291080
    },
    {
      "epoch": 3.7494353999332297,
      "grad_norm": 0.13790374994277954,
      "learning_rate": 2.5176096940624526e-06,
      "loss": 0.0091,
      "step": 2291100
    },
    {
      "epoch": 3.7494681303718833,
      "grad_norm": 0.26263368129730225,
      "learning_rate": 2.517543801848936e-06,
      "loss": 0.0076,
      "step": 2291120
    },
    {
      "epoch": 3.7495008608105365,
      "grad_norm": 0.16689978539943695,
      "learning_rate": 2.5174779096354185e-06,
      "loss": 0.0159,
      "step": 2291140
    },
    {
      "epoch": 3.74953359124919,
      "grad_norm": 0.35181769728660583,
      "learning_rate": 2.5174120174219013e-06,
      "loss": 0.0133,
      "step": 2291160
    },
    {
      "epoch": 3.749566321687843,
      "grad_norm": 0.2314351201057434,
      "learning_rate": 2.517346125208384e-06,
      "loss": 0.0086,
      "step": 2291180
    },
    {
      "epoch": 3.749599052126497,
      "grad_norm": 0.24888209998607635,
      "learning_rate": 2.517280232994867e-06,
      "loss": 0.0078,
      "step": 2291200
    },
    {
      "epoch": 3.74963178256515,
      "grad_norm": 0.08636967092752457,
      "learning_rate": 2.51721434078135e-06,
      "loss": 0.0091,
      "step": 2291220
    },
    {
      "epoch": 3.749664513003803,
      "grad_norm": 0.40519705414772034,
      "learning_rate": 2.5171484485678327e-06,
      "loss": 0.0082,
      "step": 2291240
    },
    {
      "epoch": 3.7496972434424567,
      "grad_norm": 0.3447100818157196,
      "learning_rate": 2.5170825563543163e-06,
      "loss": 0.0135,
      "step": 2291260
    },
    {
      "epoch": 3.74972997388111,
      "grad_norm": 0.23732712864875793,
      "learning_rate": 2.517016664140799e-06,
      "loss": 0.0123,
      "step": 2291280
    },
    {
      "epoch": 3.7497627043197634,
      "grad_norm": 0.11118224263191223,
      "learning_rate": 2.5169507719272817e-06,
      "loss": 0.0098,
      "step": 2291300
    },
    {
      "epoch": 3.7497954347584166,
      "grad_norm": 0.333377867937088,
      "learning_rate": 2.5168848797137645e-06,
      "loss": 0.0075,
      "step": 2291320
    },
    {
      "epoch": 3.74982816519707,
      "grad_norm": 0.17174896597862244,
      "learning_rate": 2.5168189875002476e-06,
      "loss": 0.01,
      "step": 2291340
    },
    {
      "epoch": 3.7498608956357233,
      "grad_norm": 0.35085058212280273,
      "learning_rate": 2.5167530952867304e-06,
      "loss": 0.0104,
      "step": 2291360
    },
    {
      "epoch": 3.7498936260743765,
      "grad_norm": 0.07778231799602509,
      "learning_rate": 2.516687203073213e-06,
      "loss": 0.0054,
      "step": 2291380
    },
    {
      "epoch": 3.74992635651303,
      "grad_norm": 0.35099682211875916,
      "learning_rate": 2.516621310859696e-06,
      "loss": 0.0089,
      "step": 2291400
    },
    {
      "epoch": 3.7499590869516832,
      "grad_norm": 0.17216618359088898,
      "learning_rate": 2.5165554186461786e-06,
      "loss": 0.0094,
      "step": 2291420
    },
    {
      "epoch": 3.749991817390337,
      "grad_norm": 0.09068445861339569,
      "learning_rate": 2.5164895264326618e-06,
      "loss": 0.0147,
      "step": 2291440
    },
    {
      "epoch": 3.75002454782899,
      "grad_norm": 0.7638381719589233,
      "learning_rate": 2.5164236342191445e-06,
      "loss": 0.0223,
      "step": 2291460
    },
    {
      "epoch": 3.7500572782676436,
      "grad_norm": 0.5544121265411377,
      "learning_rate": 2.5163577420056272e-06,
      "loss": 0.0095,
      "step": 2291480
    },
    {
      "epoch": 3.7500900087062967,
      "grad_norm": 0.19410771131515503,
      "learning_rate": 2.51629184979211e-06,
      "loss": 0.0111,
      "step": 2291500
    },
    {
      "epoch": 3.75012273914495,
      "grad_norm": 0.32385584712028503,
      "learning_rate": 2.516225957578593e-06,
      "loss": 0.0127,
      "step": 2291520
    },
    {
      "epoch": 3.7501554695836035,
      "grad_norm": 0.5579994320869446,
      "learning_rate": 2.516160065365076e-06,
      "loss": 0.0131,
      "step": 2291540
    },
    {
      "epoch": 3.7501882000222566,
      "grad_norm": 0.9352757334709167,
      "learning_rate": 2.5160941731515586e-06,
      "loss": 0.0155,
      "step": 2291560
    },
    {
      "epoch": 3.7502209304609098,
      "grad_norm": 0.16010621190071106,
      "learning_rate": 2.5160282809380414e-06,
      "loss": 0.0102,
      "step": 2291580
    },
    {
      "epoch": 3.7502536608995634,
      "grad_norm": 0.33384275436401367,
      "learning_rate": 2.5159623887245245e-06,
      "loss": 0.0101,
      "step": 2291600
    },
    {
      "epoch": 3.750286391338217,
      "grad_norm": 0.6389110088348389,
      "learning_rate": 2.5158964965110077e-06,
      "loss": 0.0106,
      "step": 2291620
    },
    {
      "epoch": 3.75031912177687,
      "grad_norm": 0.11599135398864746,
      "learning_rate": 2.5158306042974904e-06,
      "loss": 0.0108,
      "step": 2291640
    },
    {
      "epoch": 3.7503518522155233,
      "grad_norm": 0.36130180954933167,
      "learning_rate": 2.5157647120839736e-06,
      "loss": 0.0146,
      "step": 2291660
    },
    {
      "epoch": 3.750384582654177,
      "grad_norm": 0.24863702058792114,
      "learning_rate": 2.5156988198704563e-06,
      "loss": 0.0087,
      "step": 2291680
    },
    {
      "epoch": 3.75041731309283,
      "grad_norm": 0.2999441623687744,
      "learning_rate": 2.515632927656939e-06,
      "loss": 0.01,
      "step": 2291700
    },
    {
      "epoch": 3.750450043531483,
      "grad_norm": 0.10917699337005615,
      "learning_rate": 2.515567035443422e-06,
      "loss": 0.0106,
      "step": 2291720
    },
    {
      "epoch": 3.7504827739701367,
      "grad_norm": 0.2649570107460022,
      "learning_rate": 2.515501143229905e-06,
      "loss": 0.0085,
      "step": 2291740
    },
    {
      "epoch": 3.7505155044087903,
      "grad_norm": 0.17421036958694458,
      "learning_rate": 2.5154352510163877e-06,
      "loss": 0.0115,
      "step": 2291760
    },
    {
      "epoch": 3.7505482348474435,
      "grad_norm": 0.4455283582210541,
      "learning_rate": 2.5153693588028705e-06,
      "loss": 0.0174,
      "step": 2291780
    },
    {
      "epoch": 3.7505809652860966,
      "grad_norm": 0.36526620388031006,
      "learning_rate": 2.515303466589353e-06,
      "loss": 0.0087,
      "step": 2291800
    },
    {
      "epoch": 3.7506136957247502,
      "grad_norm": 0.3056749701499939,
      "learning_rate": 2.5152375743758364e-06,
      "loss": 0.0094,
      "step": 2291820
    },
    {
      "epoch": 3.7506464261634034,
      "grad_norm": 0.32597389817237854,
      "learning_rate": 2.515171682162319e-06,
      "loss": 0.0104,
      "step": 2291840
    },
    {
      "epoch": 3.7506791566020565,
      "grad_norm": 0.18848411738872528,
      "learning_rate": 2.515105789948802e-06,
      "loss": 0.0095,
      "step": 2291860
    },
    {
      "epoch": 3.75071188704071,
      "grad_norm": 0.16329772770404816,
      "learning_rate": 2.5150398977352846e-06,
      "loss": 0.0134,
      "step": 2291880
    },
    {
      "epoch": 3.7507446174793637,
      "grad_norm": 0.17925260961055756,
      "learning_rate": 2.5149740055217673e-06,
      "loss": 0.0111,
      "step": 2291900
    },
    {
      "epoch": 3.750777347918017,
      "grad_norm": 0.39333486557006836,
      "learning_rate": 2.5149081133082505e-06,
      "loss": 0.0086,
      "step": 2291920
    },
    {
      "epoch": 3.75081007835667,
      "grad_norm": 0.0643945187330246,
      "learning_rate": 2.5148422210947332e-06,
      "loss": 0.0085,
      "step": 2291940
    },
    {
      "epoch": 3.7508428087953236,
      "grad_norm": 0.07652776688337326,
      "learning_rate": 2.5147763288812164e-06,
      "loss": 0.0063,
      "step": 2291960
    },
    {
      "epoch": 3.7508755392339768,
      "grad_norm": 0.16761966049671173,
      "learning_rate": 2.5147104366676995e-06,
      "loss": 0.0097,
      "step": 2291980
    },
    {
      "epoch": 3.75090826967263,
      "grad_norm": 0.12185897678136826,
      "learning_rate": 2.5146445444541823e-06,
      "loss": 0.0105,
      "step": 2292000
    },
    {
      "epoch": 3.7509410001112835,
      "grad_norm": 0.45736533403396606,
      "learning_rate": 2.514578652240665e-06,
      "loss": 0.0129,
      "step": 2292020
    },
    {
      "epoch": 3.750973730549937,
      "grad_norm": 0.15099169313907623,
      "learning_rate": 2.5145127600271478e-06,
      "loss": 0.0139,
      "step": 2292040
    },
    {
      "epoch": 3.7510064609885903,
      "grad_norm": 0.3872620463371277,
      "learning_rate": 2.514446867813631e-06,
      "loss": 0.0105,
      "step": 2292060
    },
    {
      "epoch": 3.7510391914272434,
      "grad_norm": 0.08972957730293274,
      "learning_rate": 2.5143809756001137e-06,
      "loss": 0.0153,
      "step": 2292080
    },
    {
      "epoch": 3.751071921865897,
      "grad_norm": 0.133615180850029,
      "learning_rate": 2.5143150833865964e-06,
      "loss": 0.0098,
      "step": 2292100
    },
    {
      "epoch": 3.75110465230455,
      "grad_norm": 0.40215110778808594,
      "learning_rate": 2.514249191173079e-06,
      "loss": 0.0119,
      "step": 2292120
    },
    {
      "epoch": 3.7511373827432033,
      "grad_norm": 0.2727484405040741,
      "learning_rate": 2.5141832989595623e-06,
      "loss": 0.0112,
      "step": 2292140
    },
    {
      "epoch": 3.751170113181857,
      "grad_norm": 0.1683594286441803,
      "learning_rate": 2.514117406746045e-06,
      "loss": 0.0065,
      "step": 2292160
    },
    {
      "epoch": 3.75120284362051,
      "grad_norm": 0.12188659608364105,
      "learning_rate": 2.514051514532528e-06,
      "loss": 0.0125,
      "step": 2292180
    },
    {
      "epoch": 3.7512355740591636,
      "grad_norm": 0.19918455183506012,
      "learning_rate": 2.5139856223190105e-06,
      "loss": 0.0143,
      "step": 2292200
    },
    {
      "epoch": 3.751268304497817,
      "grad_norm": 0.13835673034191132,
      "learning_rate": 2.5139197301054937e-06,
      "loss": 0.0093,
      "step": 2292220
    },
    {
      "epoch": 3.7513010349364704,
      "grad_norm": 0.08651795238256454,
      "learning_rate": 2.5138538378919764e-06,
      "loss": 0.0173,
      "step": 2292240
    },
    {
      "epoch": 3.7513337653751235,
      "grad_norm": 0.5086897611618042,
      "learning_rate": 2.513787945678459e-06,
      "loss": 0.0135,
      "step": 2292260
    },
    {
      "epoch": 3.7513664958137767,
      "grad_norm": 0.16587796807289124,
      "learning_rate": 2.513722053464942e-06,
      "loss": 0.0103,
      "step": 2292280
    },
    {
      "epoch": 3.7513992262524303,
      "grad_norm": 0.2308041751384735,
      "learning_rate": 2.513656161251425e-06,
      "loss": 0.0142,
      "step": 2292300
    },
    {
      "epoch": 3.7514319566910834,
      "grad_norm": 0.18506592512130737,
      "learning_rate": 2.5135902690379082e-06,
      "loss": 0.0087,
      "step": 2292320
    },
    {
      "epoch": 3.751464687129737,
      "grad_norm": 0.15698224306106567,
      "learning_rate": 2.513524376824391e-06,
      "loss": 0.0124,
      "step": 2292340
    },
    {
      "epoch": 3.75149741756839,
      "grad_norm": 0.13529375195503235,
      "learning_rate": 2.513458484610874e-06,
      "loss": 0.0062,
      "step": 2292360
    },
    {
      "epoch": 3.7515301480070438,
      "grad_norm": 0.1125919446349144,
      "learning_rate": 2.513392592397357e-06,
      "loss": 0.0139,
      "step": 2292380
    },
    {
      "epoch": 3.751562878445697,
      "grad_norm": 0.1063293069601059,
      "learning_rate": 2.5133267001838396e-06,
      "loss": 0.0072,
      "step": 2292400
    },
    {
      "epoch": 3.75159560888435,
      "grad_norm": 0.30836355686187744,
      "learning_rate": 2.5132608079703224e-06,
      "loss": 0.0103,
      "step": 2292420
    },
    {
      "epoch": 3.7516283393230037,
      "grad_norm": 0.740984320640564,
      "learning_rate": 2.513194915756805e-06,
      "loss": 0.008,
      "step": 2292440
    },
    {
      "epoch": 3.751661069761657,
      "grad_norm": 0.1247936561703682,
      "learning_rate": 2.5131290235432883e-06,
      "loss": 0.0112,
      "step": 2292460
    },
    {
      "epoch": 3.7516938002003104,
      "grad_norm": 0.1638031005859375,
      "learning_rate": 2.513063131329771e-06,
      "loss": 0.0124,
      "step": 2292480
    },
    {
      "epoch": 3.7517265306389636,
      "grad_norm": 0.4539973735809326,
      "learning_rate": 2.5129972391162537e-06,
      "loss": 0.0101,
      "step": 2292500
    },
    {
      "epoch": 3.751759261077617,
      "grad_norm": 0.2084270864725113,
      "learning_rate": 2.5129313469027365e-06,
      "loss": 0.014,
      "step": 2292520
    },
    {
      "epoch": 3.7517919915162703,
      "grad_norm": 0.1560269445180893,
      "learning_rate": 2.5128654546892196e-06,
      "loss": 0.0131,
      "step": 2292540
    },
    {
      "epoch": 3.7518247219549234,
      "grad_norm": 0.6129572987556458,
      "learning_rate": 2.5127995624757024e-06,
      "loss": 0.017,
      "step": 2292560
    },
    {
      "epoch": 3.751857452393577,
      "grad_norm": 0.2831514775753021,
      "learning_rate": 2.512733670262185e-06,
      "loss": 0.0081,
      "step": 2292580
    },
    {
      "epoch": 3.75189018283223,
      "grad_norm": 0.20697088539600372,
      "learning_rate": 2.512667778048668e-06,
      "loss": 0.0077,
      "step": 2292600
    },
    {
      "epoch": 3.751922913270884,
      "grad_norm": 0.35408860445022583,
      "learning_rate": 2.512601885835151e-06,
      "loss": 0.0107,
      "step": 2292620
    },
    {
      "epoch": 3.751955643709537,
      "grad_norm": 0.1427362710237503,
      "learning_rate": 2.5125359936216338e-06,
      "loss": 0.0125,
      "step": 2292640
    },
    {
      "epoch": 3.7519883741481905,
      "grad_norm": 0.3402506113052368,
      "learning_rate": 2.5124701014081165e-06,
      "loss": 0.0094,
      "step": 2292660
    },
    {
      "epoch": 3.7520211045868437,
      "grad_norm": 0.6728392839431763,
      "learning_rate": 2.5124042091946e-06,
      "loss": 0.0151,
      "step": 2292680
    },
    {
      "epoch": 3.752053835025497,
      "grad_norm": 0.5088242292404175,
      "learning_rate": 2.512338316981083e-06,
      "loss": 0.0108,
      "step": 2292700
    },
    {
      "epoch": 3.7520865654641504,
      "grad_norm": 0.25864970684051514,
      "learning_rate": 2.5122724247675656e-06,
      "loss": 0.0087,
      "step": 2292720
    },
    {
      "epoch": 3.7521192959028036,
      "grad_norm": 0.6705514192581177,
      "learning_rate": 2.5122065325540483e-06,
      "loss": 0.0135,
      "step": 2292740
    },
    {
      "epoch": 3.752152026341457,
      "grad_norm": 0.3595169186592102,
      "learning_rate": 2.5121406403405315e-06,
      "loss": 0.0091,
      "step": 2292760
    },
    {
      "epoch": 3.7521847567801103,
      "grad_norm": 0.1821717768907547,
      "learning_rate": 2.5120747481270142e-06,
      "loss": 0.0081,
      "step": 2292780
    },
    {
      "epoch": 3.752217487218764,
      "grad_norm": 0.3935767114162445,
      "learning_rate": 2.512008855913497e-06,
      "loss": 0.01,
      "step": 2292800
    },
    {
      "epoch": 3.752250217657417,
      "grad_norm": 0.23513101041316986,
      "learning_rate": 2.5119429636999797e-06,
      "loss": 0.0143,
      "step": 2292820
    },
    {
      "epoch": 3.75228294809607,
      "grad_norm": 0.5734557509422302,
      "learning_rate": 2.5118770714864624e-06,
      "loss": 0.0103,
      "step": 2292840
    },
    {
      "epoch": 3.752315678534724,
      "grad_norm": 0.28201282024383545,
      "learning_rate": 2.5118111792729456e-06,
      "loss": 0.01,
      "step": 2292860
    },
    {
      "epoch": 3.752348408973377,
      "grad_norm": 0.49653878808021545,
      "learning_rate": 2.5117452870594283e-06,
      "loss": 0.011,
      "step": 2292880
    },
    {
      "epoch": 3.7523811394120306,
      "grad_norm": 0.3247373402118683,
      "learning_rate": 2.511679394845911e-06,
      "loss": 0.0088,
      "step": 2292900
    },
    {
      "epoch": 3.7524138698506837,
      "grad_norm": 0.0631432980298996,
      "learning_rate": 2.511613502632394e-06,
      "loss": 0.0121,
      "step": 2292920
    },
    {
      "epoch": 3.7524466002893373,
      "grad_norm": 0.21125242114067078,
      "learning_rate": 2.511547610418877e-06,
      "loss": 0.0144,
      "step": 2292940
    },
    {
      "epoch": 3.7524793307279904,
      "grad_norm": 0.13494211435317993,
      "learning_rate": 2.5114817182053597e-06,
      "loss": 0.0098,
      "step": 2292960
    },
    {
      "epoch": 3.7525120611666436,
      "grad_norm": 0.29769474267959595,
      "learning_rate": 2.5114158259918425e-06,
      "loss": 0.0095,
      "step": 2292980
    },
    {
      "epoch": 3.752544791605297,
      "grad_norm": 0.16853460669517517,
      "learning_rate": 2.511349933778325e-06,
      "loss": 0.0102,
      "step": 2293000
    },
    {
      "epoch": 3.7525775220439503,
      "grad_norm": 0.17584335803985596,
      "learning_rate": 2.511284041564809e-06,
      "loss": 0.0085,
      "step": 2293020
    },
    {
      "epoch": 3.752610252482604,
      "grad_norm": 0.28004923462867737,
      "learning_rate": 2.5112181493512915e-06,
      "loss": 0.0119,
      "step": 2293040
    },
    {
      "epoch": 3.752642982921257,
      "grad_norm": 0.1767304390668869,
      "learning_rate": 2.5111522571377743e-06,
      "loss": 0.0127,
      "step": 2293060
    },
    {
      "epoch": 3.7526757133599107,
      "grad_norm": 0.09273374825716019,
      "learning_rate": 2.5110863649242574e-06,
      "loss": 0.0112,
      "step": 2293080
    },
    {
      "epoch": 3.752708443798564,
      "grad_norm": 0.23644261062145233,
      "learning_rate": 2.51102047271074e-06,
      "loss": 0.0084,
      "step": 2293100
    },
    {
      "epoch": 3.752741174237217,
      "grad_norm": 0.503261923789978,
      "learning_rate": 2.510954580497223e-06,
      "loss": 0.013,
      "step": 2293120
    },
    {
      "epoch": 3.7527739046758706,
      "grad_norm": 0.5016639828681946,
      "learning_rate": 2.5108886882837057e-06,
      "loss": 0.0091,
      "step": 2293140
    },
    {
      "epoch": 3.7528066351145237,
      "grad_norm": 0.19821956753730774,
      "learning_rate": 2.510822796070189e-06,
      "loss": 0.0092,
      "step": 2293160
    },
    {
      "epoch": 3.752839365553177,
      "grad_norm": 0.20047275722026825,
      "learning_rate": 2.5107569038566716e-06,
      "loss": 0.0089,
      "step": 2293180
    },
    {
      "epoch": 3.7528720959918305,
      "grad_norm": 0.32353177666664124,
      "learning_rate": 2.5106910116431543e-06,
      "loss": 0.0134,
      "step": 2293200
    },
    {
      "epoch": 3.752904826430484,
      "grad_norm": 0.305228590965271,
      "learning_rate": 2.510625119429637e-06,
      "loss": 0.0105,
      "step": 2293220
    },
    {
      "epoch": 3.752937556869137,
      "grad_norm": 0.2074894905090332,
      "learning_rate": 2.51055922721612e-06,
      "loss": 0.0119,
      "step": 2293240
    },
    {
      "epoch": 3.7529702873077904,
      "grad_norm": 0.21946285665035248,
      "learning_rate": 2.510493335002603e-06,
      "loss": 0.0077,
      "step": 2293260
    },
    {
      "epoch": 3.753003017746444,
      "grad_norm": 0.32410886883735657,
      "learning_rate": 2.5104274427890857e-06,
      "loss": 0.0137,
      "step": 2293280
    },
    {
      "epoch": 3.753035748185097,
      "grad_norm": 0.2525862753391266,
      "learning_rate": 2.5103615505755684e-06,
      "loss": 0.0131,
      "step": 2293300
    },
    {
      "epoch": 3.7530684786237503,
      "grad_norm": 0.14844776690006256,
      "learning_rate": 2.510295658362051e-06,
      "loss": 0.0141,
      "step": 2293320
    },
    {
      "epoch": 3.753101209062404,
      "grad_norm": 0.21829946339130402,
      "learning_rate": 2.5102297661485343e-06,
      "loss": 0.012,
      "step": 2293340
    },
    {
      "epoch": 3.7531339395010574,
      "grad_norm": 0.21768730878829956,
      "learning_rate": 2.510163873935017e-06,
      "loss": 0.0077,
      "step": 2293360
    },
    {
      "epoch": 3.7531666699397106,
      "grad_norm": 0.3852125406265259,
      "learning_rate": 2.5100979817215002e-06,
      "loss": 0.0098,
      "step": 2293380
    },
    {
      "epoch": 3.7531994003783637,
      "grad_norm": 0.3700734078884125,
      "learning_rate": 2.5100320895079834e-06,
      "loss": 0.0083,
      "step": 2293400
    },
    {
      "epoch": 3.7532321308170173,
      "grad_norm": 0.13468314707279205,
      "learning_rate": 2.509966197294466e-06,
      "loss": 0.0102,
      "step": 2293420
    },
    {
      "epoch": 3.7532648612556705,
      "grad_norm": 0.031468991190195084,
      "learning_rate": 2.509900305080949e-06,
      "loss": 0.0152,
      "step": 2293440
    },
    {
      "epoch": 3.7532975916943236,
      "grad_norm": 0.2907178997993469,
      "learning_rate": 2.5098344128674316e-06,
      "loss": 0.0088,
      "step": 2293460
    },
    {
      "epoch": 3.7533303221329772,
      "grad_norm": 0.1603156328201294,
      "learning_rate": 2.5097685206539148e-06,
      "loss": 0.0105,
      "step": 2293480
    },
    {
      "epoch": 3.753363052571631,
      "grad_norm": 0.32956695556640625,
      "learning_rate": 2.5097026284403975e-06,
      "loss": 0.0103,
      "step": 2293500
    },
    {
      "epoch": 3.753395783010284,
      "grad_norm": 0.3808339238166809,
      "learning_rate": 2.5096367362268802e-06,
      "loss": 0.0129,
      "step": 2293520
    },
    {
      "epoch": 3.753428513448937,
      "grad_norm": 0.11728687584400177,
      "learning_rate": 2.509570844013363e-06,
      "loss": 0.0108,
      "step": 2293540
    },
    {
      "epoch": 3.7534612438875907,
      "grad_norm": 0.16373059153556824,
      "learning_rate": 2.509504951799846e-06,
      "loss": 0.009,
      "step": 2293560
    },
    {
      "epoch": 3.753493974326244,
      "grad_norm": 0.2507076859474182,
      "learning_rate": 2.509439059586329e-06,
      "loss": 0.0118,
      "step": 2293580
    },
    {
      "epoch": 3.753526704764897,
      "grad_norm": 0.4310761094093323,
      "learning_rate": 2.5093731673728116e-06,
      "loss": 0.0119,
      "step": 2293600
    },
    {
      "epoch": 3.7535594352035506,
      "grad_norm": 0.053244587033987045,
      "learning_rate": 2.5093072751592944e-06,
      "loss": 0.0106,
      "step": 2293620
    },
    {
      "epoch": 3.7535921656422038,
      "grad_norm": 0.24559909105300903,
      "learning_rate": 2.5092413829457775e-06,
      "loss": 0.0092,
      "step": 2293640
    },
    {
      "epoch": 3.7536248960808574,
      "grad_norm": 0.28071504831314087,
      "learning_rate": 2.5091754907322603e-06,
      "loss": 0.0097,
      "step": 2293660
    },
    {
      "epoch": 3.7536576265195105,
      "grad_norm": 0.2036459892988205,
      "learning_rate": 2.509109598518743e-06,
      "loss": 0.0135,
      "step": 2293680
    },
    {
      "epoch": 3.753690356958164,
      "grad_norm": 0.1433066874742508,
      "learning_rate": 2.5090437063052258e-06,
      "loss": 0.0104,
      "step": 2293700
    },
    {
      "epoch": 3.7537230873968173,
      "grad_norm": 0.1284579038619995,
      "learning_rate": 2.5089778140917093e-06,
      "loss": 0.0093,
      "step": 2293720
    },
    {
      "epoch": 3.7537558178354704,
      "grad_norm": 0.42405110597610474,
      "learning_rate": 2.508911921878192e-06,
      "loss": 0.0091,
      "step": 2293740
    },
    {
      "epoch": 3.753788548274124,
      "grad_norm": 0.23669739067554474,
      "learning_rate": 2.508846029664675e-06,
      "loss": 0.01,
      "step": 2293760
    },
    {
      "epoch": 3.753821278712777,
      "grad_norm": 0.327492892742157,
      "learning_rate": 2.508780137451158e-06,
      "loss": 0.0119,
      "step": 2293780
    },
    {
      "epoch": 3.7538540091514307,
      "grad_norm": 0.13669201731681824,
      "learning_rate": 2.5087142452376407e-06,
      "loss": 0.0111,
      "step": 2293800
    },
    {
      "epoch": 3.753886739590084,
      "grad_norm": 0.09818746149539948,
      "learning_rate": 2.5086483530241235e-06,
      "loss": 0.0111,
      "step": 2293820
    },
    {
      "epoch": 3.7539194700287375,
      "grad_norm": 0.2992088794708252,
      "learning_rate": 2.508582460810606e-06,
      "loss": 0.0116,
      "step": 2293840
    },
    {
      "epoch": 3.7539522004673906,
      "grad_norm": 0.34553220868110657,
      "learning_rate": 2.508516568597089e-06,
      "loss": 0.0097,
      "step": 2293860
    },
    {
      "epoch": 3.753984930906044,
      "grad_norm": 0.2481543868780136,
      "learning_rate": 2.508450676383572e-06,
      "loss": 0.0127,
      "step": 2293880
    },
    {
      "epoch": 3.7540176613446974,
      "grad_norm": 0.21266542375087738,
      "learning_rate": 2.508384784170055e-06,
      "loss": 0.0108,
      "step": 2293900
    },
    {
      "epoch": 3.7540503917833505,
      "grad_norm": 0.11417601257562637,
      "learning_rate": 2.5083188919565376e-06,
      "loss": 0.0066,
      "step": 2293920
    },
    {
      "epoch": 3.754083122222004,
      "grad_norm": 0.33063310384750366,
      "learning_rate": 2.5082529997430203e-06,
      "loss": 0.0103,
      "step": 2293940
    },
    {
      "epoch": 3.7541158526606573,
      "grad_norm": 0.24854816496372223,
      "learning_rate": 2.5081871075295035e-06,
      "loss": 0.0121,
      "step": 2293960
    },
    {
      "epoch": 3.754148583099311,
      "grad_norm": 0.1901463121175766,
      "learning_rate": 2.5081212153159862e-06,
      "loss": 0.0085,
      "step": 2293980
    },
    {
      "epoch": 3.754181313537964,
      "grad_norm": 0.1780761182308197,
      "learning_rate": 2.508055323102469e-06,
      "loss": 0.0094,
      "step": 2294000
    },
    {
      "epoch": 3.754214043976617,
      "grad_norm": 0.3019789457321167,
      "learning_rate": 2.5079894308889517e-06,
      "loss": 0.0102,
      "step": 2294020
    },
    {
      "epoch": 3.7542467744152708,
      "grad_norm": 0.36584481596946716,
      "learning_rate": 2.507923538675435e-06,
      "loss": 0.008,
      "step": 2294040
    },
    {
      "epoch": 3.754279504853924,
      "grad_norm": 0.5759909152984619,
      "learning_rate": 2.5078576464619176e-06,
      "loss": 0.0157,
      "step": 2294060
    },
    {
      "epoch": 3.7543122352925775,
      "grad_norm": 0.2617744207382202,
      "learning_rate": 2.5077917542484008e-06,
      "loss": 0.0112,
      "step": 2294080
    },
    {
      "epoch": 3.7543449657312307,
      "grad_norm": 0.1335289478302002,
      "learning_rate": 2.507725862034884e-06,
      "loss": 0.0097,
      "step": 2294100
    },
    {
      "epoch": 3.7543776961698843,
      "grad_norm": 0.2150876671075821,
      "learning_rate": 2.5076599698213667e-06,
      "loss": 0.0114,
      "step": 2294120
    },
    {
      "epoch": 3.7544104266085374,
      "grad_norm": 0.0727127194404602,
      "learning_rate": 2.5075940776078494e-06,
      "loss": 0.009,
      "step": 2294140
    },
    {
      "epoch": 3.7544431570471906,
      "grad_norm": 0.039006371051073074,
      "learning_rate": 2.507528185394332e-06,
      "loss": 0.0084,
      "step": 2294160
    },
    {
      "epoch": 3.754475887485844,
      "grad_norm": 0.1448708027601242,
      "learning_rate": 2.5074622931808153e-06,
      "loss": 0.0107,
      "step": 2294180
    },
    {
      "epoch": 3.7545086179244973,
      "grad_norm": 0.6910737752914429,
      "learning_rate": 2.507396400967298e-06,
      "loss": 0.0092,
      "step": 2294200
    },
    {
      "epoch": 3.754541348363151,
      "grad_norm": 0.4346202313899994,
      "learning_rate": 2.507330508753781e-06,
      "loss": 0.0082,
      "step": 2294220
    },
    {
      "epoch": 3.754574078801804,
      "grad_norm": 0.26880428194999695,
      "learning_rate": 2.5072646165402635e-06,
      "loss": 0.0097,
      "step": 2294240
    },
    {
      "epoch": 3.7546068092404576,
      "grad_norm": 0.055085379630327225,
      "learning_rate": 2.5071987243267467e-06,
      "loss": 0.0175,
      "step": 2294260
    },
    {
      "epoch": 3.754639539679111,
      "grad_norm": 0.07813969999551773,
      "learning_rate": 2.5071328321132294e-06,
      "loss": 0.0081,
      "step": 2294280
    },
    {
      "epoch": 3.754672270117764,
      "grad_norm": 0.6668938398361206,
      "learning_rate": 2.507066939899712e-06,
      "loss": 0.0151,
      "step": 2294300
    },
    {
      "epoch": 3.7547050005564175,
      "grad_norm": 0.17905744910240173,
      "learning_rate": 2.507001047686195e-06,
      "loss": 0.0149,
      "step": 2294320
    },
    {
      "epoch": 3.7547377309950707,
      "grad_norm": 0.5260429382324219,
      "learning_rate": 2.5069351554726777e-06,
      "loss": 0.0098,
      "step": 2294340
    },
    {
      "epoch": 3.7547704614337243,
      "grad_norm": 0.28487086296081543,
      "learning_rate": 2.506869263259161e-06,
      "loss": 0.0108,
      "step": 2294360
    },
    {
      "epoch": 3.7548031918723774,
      "grad_norm": 0.14132164418697357,
      "learning_rate": 2.5068033710456436e-06,
      "loss": 0.0095,
      "step": 2294380
    },
    {
      "epoch": 3.754835922311031,
      "grad_norm": 0.4368065595626831,
      "learning_rate": 2.5067374788321263e-06,
      "loss": 0.0139,
      "step": 2294400
    },
    {
      "epoch": 3.754868652749684,
      "grad_norm": 0.346904993057251,
      "learning_rate": 2.506671586618609e-06,
      "loss": 0.0094,
      "step": 2294420
    },
    {
      "epoch": 3.7549013831883373,
      "grad_norm": 0.0926302894949913,
      "learning_rate": 2.5066056944050926e-06,
      "loss": 0.0141,
      "step": 2294440
    },
    {
      "epoch": 3.754934113626991,
      "grad_norm": 0.10992476344108582,
      "learning_rate": 2.5065398021915754e-06,
      "loss": 0.0096,
      "step": 2294460
    },
    {
      "epoch": 3.754966844065644,
      "grad_norm": 0.1418808400630951,
      "learning_rate": 2.506473909978058e-06,
      "loss": 0.0123,
      "step": 2294480
    },
    {
      "epoch": 3.7549995745042977,
      "grad_norm": 0.1255815327167511,
      "learning_rate": 2.5064080177645413e-06,
      "loss": 0.012,
      "step": 2294500
    },
    {
      "epoch": 3.755032304942951,
      "grad_norm": 0.22496925294399261,
      "learning_rate": 2.506342125551024e-06,
      "loss": 0.0072,
      "step": 2294520
    },
    {
      "epoch": 3.7550650353816044,
      "grad_norm": 0.2978251278400421,
      "learning_rate": 2.5062762333375067e-06,
      "loss": 0.0076,
      "step": 2294540
    },
    {
      "epoch": 3.7550977658202576,
      "grad_norm": 0.07377243041992188,
      "learning_rate": 2.5062103411239895e-06,
      "loss": 0.0116,
      "step": 2294560
    },
    {
      "epoch": 3.7551304962589107,
      "grad_norm": 0.14184199273586273,
      "learning_rate": 2.5061444489104727e-06,
      "loss": 0.0157,
      "step": 2294580
    },
    {
      "epoch": 3.7551632266975643,
      "grad_norm": 0.1369616687297821,
      "learning_rate": 2.5060785566969554e-06,
      "loss": 0.0138,
      "step": 2294600
    },
    {
      "epoch": 3.7551959571362175,
      "grad_norm": 0.13013955950737,
      "learning_rate": 2.506012664483438e-06,
      "loss": 0.0142,
      "step": 2294620
    },
    {
      "epoch": 3.7552286875748706,
      "grad_norm": 0.29299038648605347,
      "learning_rate": 2.505946772269921e-06,
      "loss": 0.0096,
      "step": 2294640
    },
    {
      "epoch": 3.755261418013524,
      "grad_norm": 0.03125317022204399,
      "learning_rate": 2.505880880056404e-06,
      "loss": 0.0135,
      "step": 2294660
    },
    {
      "epoch": 3.755294148452178,
      "grad_norm": 0.4270651340484619,
      "learning_rate": 2.5058149878428868e-06,
      "loss": 0.0141,
      "step": 2294680
    },
    {
      "epoch": 3.755326878890831,
      "grad_norm": 0.28528010845184326,
      "learning_rate": 2.5057490956293695e-06,
      "loss": 0.0177,
      "step": 2294700
    },
    {
      "epoch": 3.755359609329484,
      "grad_norm": 0.67506343126297,
      "learning_rate": 2.5056832034158523e-06,
      "loss": 0.009,
      "step": 2294720
    },
    {
      "epoch": 3.7553923397681377,
      "grad_norm": 0.6643957495689392,
      "learning_rate": 2.505617311202335e-06,
      "loss": 0.0108,
      "step": 2294740
    },
    {
      "epoch": 3.755425070206791,
      "grad_norm": 0.2483798861503601,
      "learning_rate": 2.505551418988818e-06,
      "loss": 0.0149,
      "step": 2294760
    },
    {
      "epoch": 3.755457800645444,
      "grad_norm": 0.16576991975307465,
      "learning_rate": 2.5054855267753013e-06,
      "loss": 0.014,
      "step": 2294780
    },
    {
      "epoch": 3.7554905310840976,
      "grad_norm": 0.6025569438934326,
      "learning_rate": 2.5054196345617845e-06,
      "loss": 0.0126,
      "step": 2294800
    },
    {
      "epoch": 3.755523261522751,
      "grad_norm": 0.19079503417015076,
      "learning_rate": 2.5053537423482672e-06,
      "loss": 0.0109,
      "step": 2294820
    },
    {
      "epoch": 3.7555559919614043,
      "grad_norm": 0.1326894611120224,
      "learning_rate": 2.50528785013475e-06,
      "loss": 0.0118,
      "step": 2294840
    },
    {
      "epoch": 3.7555887224000575,
      "grad_norm": 0.19626258313655853,
      "learning_rate": 2.5052219579212327e-06,
      "loss": 0.0083,
      "step": 2294860
    },
    {
      "epoch": 3.755621452838711,
      "grad_norm": 0.08269089460372925,
      "learning_rate": 2.5051560657077154e-06,
      "loss": 0.0167,
      "step": 2294880
    },
    {
      "epoch": 3.755654183277364,
      "grad_norm": 0.08664838969707489,
      "learning_rate": 2.5050901734941986e-06,
      "loss": 0.0095,
      "step": 2294900
    },
    {
      "epoch": 3.7556869137160174,
      "grad_norm": 0.38251036405563354,
      "learning_rate": 2.5050242812806813e-06,
      "loss": 0.0163,
      "step": 2294920
    },
    {
      "epoch": 3.755719644154671,
      "grad_norm": 0.21659903228282928,
      "learning_rate": 2.504958389067164e-06,
      "loss": 0.015,
      "step": 2294940
    },
    {
      "epoch": 3.7557523745933246,
      "grad_norm": 0.11179929971694946,
      "learning_rate": 2.504892496853647e-06,
      "loss": 0.0084,
      "step": 2294960
    },
    {
      "epoch": 3.7557851050319777,
      "grad_norm": 0.2245452105998993,
      "learning_rate": 2.50482660464013e-06,
      "loss": 0.0081,
      "step": 2294980
    },
    {
      "epoch": 3.755817835470631,
      "grad_norm": 0.10354408621788025,
      "learning_rate": 2.5047607124266127e-06,
      "loss": 0.0134,
      "step": 2295000
    },
    {
      "epoch": 3.7558505659092845,
      "grad_norm": 0.6011390686035156,
      "learning_rate": 2.5046948202130955e-06,
      "loss": 0.012,
      "step": 2295020
    },
    {
      "epoch": 3.7558832963479376,
      "grad_norm": 0.19935697317123413,
      "learning_rate": 2.504628927999578e-06,
      "loss": 0.0112,
      "step": 2295040
    },
    {
      "epoch": 3.7559160267865908,
      "grad_norm": 0.9085395932197571,
      "learning_rate": 2.5045630357860614e-06,
      "loss": 0.0095,
      "step": 2295060
    },
    {
      "epoch": 3.7559487572252443,
      "grad_norm": 0.3204777240753174,
      "learning_rate": 2.504497143572544e-06,
      "loss": 0.0063,
      "step": 2295080
    },
    {
      "epoch": 3.755981487663898,
      "grad_norm": 0.20682109892368317,
      "learning_rate": 2.504431251359027e-06,
      "loss": 0.0115,
      "step": 2295100
    },
    {
      "epoch": 3.756014218102551,
      "grad_norm": 0.20102804899215698,
      "learning_rate": 2.5043653591455096e-06,
      "loss": 0.0085,
      "step": 2295120
    },
    {
      "epoch": 3.7560469485412042,
      "grad_norm": 0.31842315196990967,
      "learning_rate": 2.504299466931993e-06,
      "loss": 0.0098,
      "step": 2295140
    },
    {
      "epoch": 3.756079678979858,
      "grad_norm": 0.05576653033494949,
      "learning_rate": 2.504233574718476e-06,
      "loss": 0.0097,
      "step": 2295160
    },
    {
      "epoch": 3.756112409418511,
      "grad_norm": 0.07429187744855881,
      "learning_rate": 2.5041676825049587e-06,
      "loss": 0.0109,
      "step": 2295180
    },
    {
      "epoch": 3.756145139857164,
      "grad_norm": 0.2352912575006485,
      "learning_rate": 2.504101790291442e-06,
      "loss": 0.0108,
      "step": 2295200
    },
    {
      "epoch": 3.7561778702958177,
      "grad_norm": 0.20498976111412048,
      "learning_rate": 2.5040358980779246e-06,
      "loss": 0.0085,
      "step": 2295220
    },
    {
      "epoch": 3.756210600734471,
      "grad_norm": 0.18165448307991028,
      "learning_rate": 2.5039700058644073e-06,
      "loss": 0.0114,
      "step": 2295240
    },
    {
      "epoch": 3.7562433311731245,
      "grad_norm": 0.14687694609165192,
      "learning_rate": 2.50390411365089e-06,
      "loss": 0.0133,
      "step": 2295260
    },
    {
      "epoch": 3.7562760616117776,
      "grad_norm": 0.42619046568870544,
      "learning_rate": 2.5038382214373728e-06,
      "loss": 0.0088,
      "step": 2295280
    },
    {
      "epoch": 3.7563087920504312,
      "grad_norm": 0.26737692952156067,
      "learning_rate": 2.503772329223856e-06,
      "loss": 0.0102,
      "step": 2295300
    },
    {
      "epoch": 3.7563415224890844,
      "grad_norm": 0.6039056181907654,
      "learning_rate": 2.5037064370103387e-06,
      "loss": 0.0152,
      "step": 2295320
    },
    {
      "epoch": 3.7563742529277375,
      "grad_norm": 0.26349490880966187,
      "learning_rate": 2.5036405447968214e-06,
      "loss": 0.0098,
      "step": 2295340
    },
    {
      "epoch": 3.756406983366391,
      "grad_norm": 0.22797757387161255,
      "learning_rate": 2.503574652583304e-06,
      "loss": 0.0064,
      "step": 2295360
    },
    {
      "epoch": 3.7564397138050443,
      "grad_norm": 0.640567421913147,
      "learning_rate": 2.5035087603697873e-06,
      "loss": 0.0143,
      "step": 2295380
    },
    {
      "epoch": 3.756472444243698,
      "grad_norm": 0.6312927007675171,
      "learning_rate": 2.50344286815627e-06,
      "loss": 0.0107,
      "step": 2295400
    },
    {
      "epoch": 3.756505174682351,
      "grad_norm": 0.19203951954841614,
      "learning_rate": 2.503376975942753e-06,
      "loss": 0.0083,
      "step": 2295420
    },
    {
      "epoch": 3.7565379051210046,
      "grad_norm": 0.0989501029253006,
      "learning_rate": 2.5033110837292355e-06,
      "loss": 0.009,
      "step": 2295440
    },
    {
      "epoch": 3.7565706355596578,
      "grad_norm": 0.24356120824813843,
      "learning_rate": 2.5032451915157187e-06,
      "loss": 0.0085,
      "step": 2295460
    },
    {
      "epoch": 3.756603365998311,
      "grad_norm": 0.2044849842786789,
      "learning_rate": 2.503179299302202e-06,
      "loss": 0.0106,
      "step": 2295480
    },
    {
      "epoch": 3.7566360964369645,
      "grad_norm": 0.1435624063014984,
      "learning_rate": 2.5031134070886846e-06,
      "loss": 0.0091,
      "step": 2295500
    },
    {
      "epoch": 3.7566688268756177,
      "grad_norm": 0.08466840535402298,
      "learning_rate": 2.5030475148751678e-06,
      "loss": 0.011,
      "step": 2295520
    },
    {
      "epoch": 3.7567015573142712,
      "grad_norm": 0.23693856596946716,
      "learning_rate": 2.5029816226616505e-06,
      "loss": 0.01,
      "step": 2295540
    },
    {
      "epoch": 3.7567342877529244,
      "grad_norm": 0.18766207993030548,
      "learning_rate": 2.5029157304481333e-06,
      "loss": 0.0095,
      "step": 2295560
    },
    {
      "epoch": 3.756767018191578,
      "grad_norm": 0.09105049818754196,
      "learning_rate": 2.502849838234616e-06,
      "loss": 0.0114,
      "step": 2295580
    },
    {
      "epoch": 3.756799748630231,
      "grad_norm": 0.30115029215812683,
      "learning_rate": 2.502783946021099e-06,
      "loss": 0.0084,
      "step": 2295600
    },
    {
      "epoch": 3.7568324790688843,
      "grad_norm": 0.10608387738466263,
      "learning_rate": 2.502718053807582e-06,
      "loss": 0.0133,
      "step": 2295620
    },
    {
      "epoch": 3.756865209507538,
      "grad_norm": 0.1250738501548767,
      "learning_rate": 2.5026521615940646e-06,
      "loss": 0.0091,
      "step": 2295640
    },
    {
      "epoch": 3.756897939946191,
      "grad_norm": 0.24115332961082458,
      "learning_rate": 2.5025862693805474e-06,
      "loss": 0.0131,
      "step": 2295660
    },
    {
      "epoch": 3.7569306703848446,
      "grad_norm": 1.2239775657653809,
      "learning_rate": 2.5025203771670305e-06,
      "loss": 0.0079,
      "step": 2295680
    },
    {
      "epoch": 3.756963400823498,
      "grad_norm": 0.558355450630188,
      "learning_rate": 2.5024544849535133e-06,
      "loss": 0.0161,
      "step": 2295700
    },
    {
      "epoch": 3.7569961312621514,
      "grad_norm": 0.39286839962005615,
      "learning_rate": 2.502388592739996e-06,
      "loss": 0.0092,
      "step": 2295720
    },
    {
      "epoch": 3.7570288617008045,
      "grad_norm": 0.11708433926105499,
      "learning_rate": 2.5023227005264788e-06,
      "loss": 0.0105,
      "step": 2295740
    },
    {
      "epoch": 3.7570615921394577,
      "grad_norm": 0.11660908162593842,
      "learning_rate": 2.5022568083129615e-06,
      "loss": 0.0135,
      "step": 2295760
    },
    {
      "epoch": 3.7570943225781113,
      "grad_norm": 0.2291019707918167,
      "learning_rate": 2.5021909160994447e-06,
      "loss": 0.0089,
      "step": 2295780
    },
    {
      "epoch": 3.7571270530167644,
      "grad_norm": 0.32799041271209717,
      "learning_rate": 2.5021250238859274e-06,
      "loss": 0.008,
      "step": 2295800
    },
    {
      "epoch": 3.757159783455418,
      "grad_norm": 0.28149178624153137,
      "learning_rate": 2.50205913167241e-06,
      "loss": 0.0117,
      "step": 2295820
    },
    {
      "epoch": 3.757192513894071,
      "grad_norm": 0.3417384922504425,
      "learning_rate": 2.5019932394588937e-06,
      "loss": 0.0086,
      "step": 2295840
    },
    {
      "epoch": 3.7572252443327248,
      "grad_norm": 0.35651957988739014,
      "learning_rate": 2.5019273472453765e-06,
      "loss": 0.0105,
      "step": 2295860
    },
    {
      "epoch": 3.757257974771378,
      "grad_norm": 0.13648433983325958,
      "learning_rate": 2.501861455031859e-06,
      "loss": 0.0092,
      "step": 2295880
    },
    {
      "epoch": 3.757290705210031,
      "grad_norm": 0.2823866307735443,
      "learning_rate": 2.501795562818342e-06,
      "loss": 0.012,
      "step": 2295900
    },
    {
      "epoch": 3.7573234356486847,
      "grad_norm": 0.19302493333816528,
      "learning_rate": 2.501729670604825e-06,
      "loss": 0.0087,
      "step": 2295920
    },
    {
      "epoch": 3.757356166087338,
      "grad_norm": 0.21867360174655914,
      "learning_rate": 2.501663778391308e-06,
      "loss": 0.0094,
      "step": 2295940
    },
    {
      "epoch": 3.7573888965259914,
      "grad_norm": 0.209734708070755,
      "learning_rate": 2.5015978861777906e-06,
      "loss": 0.0111,
      "step": 2295960
    },
    {
      "epoch": 3.7574216269646445,
      "grad_norm": 0.2556416690349579,
      "learning_rate": 2.5015319939642733e-06,
      "loss": 0.01,
      "step": 2295980
    },
    {
      "epoch": 3.757454357403298,
      "grad_norm": 0.6112436056137085,
      "learning_rate": 2.5014661017507565e-06,
      "loss": 0.015,
      "step": 2296000
    },
    {
      "epoch": 3.7574870878419513,
      "grad_norm": 0.21756896376609802,
      "learning_rate": 2.5014002095372392e-06,
      "loss": 0.0085,
      "step": 2296020
    },
    {
      "epoch": 3.7575198182806044,
      "grad_norm": 0.0406816191971302,
      "learning_rate": 2.501334317323722e-06,
      "loss": 0.0095,
      "step": 2296040
    },
    {
      "epoch": 3.757552548719258,
      "grad_norm": 0.7503858804702759,
      "learning_rate": 2.5012684251102047e-06,
      "loss": 0.0125,
      "step": 2296060
    },
    {
      "epoch": 3.757585279157911,
      "grad_norm": 0.07551500201225281,
      "learning_rate": 2.501202532896688e-06,
      "loss": 0.0123,
      "step": 2296080
    },
    {
      "epoch": 3.757618009596565,
      "grad_norm": 0.1938101351261139,
      "learning_rate": 2.5011366406831706e-06,
      "loss": 0.0133,
      "step": 2296100
    },
    {
      "epoch": 3.757650740035218,
      "grad_norm": 0.14312238991260529,
      "learning_rate": 2.5010707484696534e-06,
      "loss": 0.0093,
      "step": 2296120
    },
    {
      "epoch": 3.7576834704738715,
      "grad_norm": 0.06364434212446213,
      "learning_rate": 2.501004856256136e-06,
      "loss": 0.0086,
      "step": 2296140
    },
    {
      "epoch": 3.7577162009125247,
      "grad_norm": 0.45352840423583984,
      "learning_rate": 2.500938964042619e-06,
      "loss": 0.0103,
      "step": 2296160
    },
    {
      "epoch": 3.757748931351178,
      "grad_norm": 0.6168630719184875,
      "learning_rate": 2.500873071829102e-06,
      "loss": 0.008,
      "step": 2296180
    },
    {
      "epoch": 3.7577816617898314,
      "grad_norm": 0.26975077390670776,
      "learning_rate": 2.500807179615585e-06,
      "loss": 0.0097,
      "step": 2296200
    },
    {
      "epoch": 3.7578143922284846,
      "grad_norm": 0.23100724816322327,
      "learning_rate": 2.5007412874020683e-06,
      "loss": 0.0105,
      "step": 2296220
    },
    {
      "epoch": 3.7578471226671377,
      "grad_norm": 0.47839245200157166,
      "learning_rate": 2.500675395188551e-06,
      "loss": 0.0141,
      "step": 2296240
    },
    {
      "epoch": 3.7578798531057913,
      "grad_norm": 0.34614965319633484,
      "learning_rate": 2.500609502975034e-06,
      "loss": 0.0096,
      "step": 2296260
    },
    {
      "epoch": 3.757912583544445,
      "grad_norm": 0.13561038672924042,
      "learning_rate": 2.5005436107615165e-06,
      "loss": 0.0107,
      "step": 2296280
    },
    {
      "epoch": 3.757945313983098,
      "grad_norm": 1.2192641496658325,
      "learning_rate": 2.5004777185479993e-06,
      "loss": 0.011,
      "step": 2296300
    },
    {
      "epoch": 3.757978044421751,
      "grad_norm": 0.16656900942325592,
      "learning_rate": 2.5004118263344824e-06,
      "loss": 0.0079,
      "step": 2296320
    },
    {
      "epoch": 3.758010774860405,
      "grad_norm": 0.34073787927627563,
      "learning_rate": 2.500345934120965e-06,
      "loss": 0.0083,
      "step": 2296340
    },
    {
      "epoch": 3.758043505299058,
      "grad_norm": 0.29394766688346863,
      "learning_rate": 2.500280041907448e-06,
      "loss": 0.0131,
      "step": 2296360
    },
    {
      "epoch": 3.758076235737711,
      "grad_norm": 0.4345794916152954,
      "learning_rate": 2.5002141496939307e-06,
      "loss": 0.0105,
      "step": 2296380
    },
    {
      "epoch": 3.7581089661763647,
      "grad_norm": 0.2743653357028961,
      "learning_rate": 2.500148257480414e-06,
      "loss": 0.0094,
      "step": 2296400
    },
    {
      "epoch": 3.7581416966150183,
      "grad_norm": 0.1616036742925644,
      "learning_rate": 2.5000823652668966e-06,
      "loss": 0.0111,
      "step": 2296420
    },
    {
      "epoch": 3.7581744270536714,
      "grad_norm": 0.18067030608654022,
      "learning_rate": 2.5000164730533793e-06,
      "loss": 0.0113,
      "step": 2296440
    },
    {
      "epoch": 3.7582071574923246,
      "grad_norm": 0.33801016211509705,
      "learning_rate": 2.4999505808398625e-06,
      "loss": 0.0111,
      "step": 2296460
    },
    {
      "epoch": 3.758239887930978,
      "grad_norm": 0.22878754138946533,
      "learning_rate": 2.499884688626345e-06,
      "loss": 0.0094,
      "step": 2296480
    },
    {
      "epoch": 3.7582726183696313,
      "grad_norm": 0.8542931079864502,
      "learning_rate": 2.499818796412828e-06,
      "loss": 0.0145,
      "step": 2296500
    },
    {
      "epoch": 3.7583053488082845,
      "grad_norm": 0.42945605516433716,
      "learning_rate": 2.499752904199311e-06,
      "loss": 0.0099,
      "step": 2296520
    },
    {
      "epoch": 3.758338079246938,
      "grad_norm": 0.23988765478134155,
      "learning_rate": 2.499687011985794e-06,
      "loss": 0.0125,
      "step": 2296540
    },
    {
      "epoch": 3.7583708096855917,
      "grad_norm": 0.0430448018014431,
      "learning_rate": 2.4996211197722766e-06,
      "loss": 0.0091,
      "step": 2296560
    },
    {
      "epoch": 3.758403540124245,
      "grad_norm": 0.17567452788352966,
      "learning_rate": 2.4995552275587593e-06,
      "loss": 0.0083,
      "step": 2296580
    },
    {
      "epoch": 3.758436270562898,
      "grad_norm": 0.21623460948467255,
      "learning_rate": 2.4994893353452425e-06,
      "loss": 0.0095,
      "step": 2296600
    },
    {
      "epoch": 3.7584690010015516,
      "grad_norm": 0.19250817596912384,
      "learning_rate": 2.4994234431317252e-06,
      "loss": 0.0131,
      "step": 2296620
    },
    {
      "epoch": 3.7585017314402047,
      "grad_norm": 0.42711499333381653,
      "learning_rate": 2.4993575509182084e-06,
      "loss": 0.012,
      "step": 2296640
    },
    {
      "epoch": 3.758534461878858,
      "grad_norm": 0.5587573647499084,
      "learning_rate": 2.499291658704691e-06,
      "loss": 0.0074,
      "step": 2296660
    },
    {
      "epoch": 3.7585671923175115,
      "grad_norm": 0.26911383867263794,
      "learning_rate": 2.499225766491174e-06,
      "loss": 0.0094,
      "step": 2296680
    },
    {
      "epoch": 3.7585999227561646,
      "grad_norm": 0.3696483075618744,
      "learning_rate": 2.4991598742776566e-06,
      "loss": 0.0088,
      "step": 2296700
    },
    {
      "epoch": 3.758632653194818,
      "grad_norm": 0.5491644740104675,
      "learning_rate": 2.4990939820641398e-06,
      "loss": 0.0111,
      "step": 2296720
    },
    {
      "epoch": 3.7586653836334714,
      "grad_norm": 0.5896581411361694,
      "learning_rate": 2.4990280898506225e-06,
      "loss": 0.0174,
      "step": 2296740
    },
    {
      "epoch": 3.758698114072125,
      "grad_norm": 0.10870229452848434,
      "learning_rate": 2.4989621976371053e-06,
      "loss": 0.0116,
      "step": 2296760
    },
    {
      "epoch": 3.758730844510778,
      "grad_norm": 0.10722559690475464,
      "learning_rate": 2.498896305423588e-06,
      "loss": 0.0105,
      "step": 2296780
    },
    {
      "epoch": 3.7587635749494313,
      "grad_norm": 0.3116324841976166,
      "learning_rate": 2.498830413210071e-06,
      "loss": 0.0106,
      "step": 2296800
    },
    {
      "epoch": 3.758796305388085,
      "grad_norm": 0.47580206394195557,
      "learning_rate": 2.4987645209965543e-06,
      "loss": 0.0146,
      "step": 2296820
    },
    {
      "epoch": 3.758829035826738,
      "grad_norm": 0.26639944314956665,
      "learning_rate": 2.498698628783037e-06,
      "loss": 0.0134,
      "step": 2296840
    },
    {
      "epoch": 3.7588617662653916,
      "grad_norm": 0.19541200995445251,
      "learning_rate": 2.49863273656952e-06,
      "loss": 0.0083,
      "step": 2296860
    },
    {
      "epoch": 3.7588944967040447,
      "grad_norm": 0.19909170269966125,
      "learning_rate": 2.4985668443560025e-06,
      "loss": 0.0109,
      "step": 2296880
    },
    {
      "epoch": 3.7589272271426983,
      "grad_norm": 0.17118200659751892,
      "learning_rate": 2.4985009521424857e-06,
      "loss": 0.0095,
      "step": 2296900
    },
    {
      "epoch": 3.7589599575813515,
      "grad_norm": 0.2042437642812729,
      "learning_rate": 2.4984350599289684e-06,
      "loss": 0.0094,
      "step": 2296920
    },
    {
      "epoch": 3.7589926880200046,
      "grad_norm": 0.3589494824409485,
      "learning_rate": 2.498369167715451e-06,
      "loss": 0.0084,
      "step": 2296940
    },
    {
      "epoch": 3.7590254184586582,
      "grad_norm": 0.31093665957450867,
      "learning_rate": 2.498303275501934e-06,
      "loss": 0.0167,
      "step": 2296960
    },
    {
      "epoch": 3.7590581488973114,
      "grad_norm": 0.23827674984931946,
      "learning_rate": 2.498237383288417e-06,
      "loss": 0.0128,
      "step": 2296980
    },
    {
      "epoch": 3.759090879335965,
      "grad_norm": 0.2033638209104538,
      "learning_rate": 2.4981714910749e-06,
      "loss": 0.017,
      "step": 2297000
    },
    {
      "epoch": 3.759123609774618,
      "grad_norm": 0.21829922497272491,
      "learning_rate": 2.498105598861383e-06,
      "loss": 0.0139,
      "step": 2297020
    },
    {
      "epoch": 3.7591563402132717,
      "grad_norm": 0.640137255191803,
      "learning_rate": 2.4980397066478657e-06,
      "loss": 0.0093,
      "step": 2297040
    },
    {
      "epoch": 3.759189070651925,
      "grad_norm": 0.46575814485549927,
      "learning_rate": 2.4979738144343485e-06,
      "loss": 0.0114,
      "step": 2297060
    },
    {
      "epoch": 3.759221801090578,
      "grad_norm": 0.05308741331100464,
      "learning_rate": 2.4979079222208312e-06,
      "loss": 0.0086,
      "step": 2297080
    },
    {
      "epoch": 3.7592545315292316,
      "grad_norm": 0.0884358286857605,
      "learning_rate": 2.4978420300073144e-06,
      "loss": 0.0168,
      "step": 2297100
    },
    {
      "epoch": 3.7592872619678848,
      "grad_norm": 0.130558580160141,
      "learning_rate": 2.497776137793797e-06,
      "loss": 0.0108,
      "step": 2297120
    },
    {
      "epoch": 3.7593199924065384,
      "grad_norm": 0.15704800188541412,
      "learning_rate": 2.49771024558028e-06,
      "loss": 0.0091,
      "step": 2297140
    },
    {
      "epoch": 3.7593527228451915,
      "grad_norm": 0.32642072439193726,
      "learning_rate": 2.497644353366763e-06,
      "loss": 0.0101,
      "step": 2297160
    },
    {
      "epoch": 3.759385453283845,
      "grad_norm": 0.2654038965702057,
      "learning_rate": 2.4975784611532458e-06,
      "loss": 0.0101,
      "step": 2297180
    },
    {
      "epoch": 3.7594181837224983,
      "grad_norm": 0.12694789469242096,
      "learning_rate": 2.4975125689397285e-06,
      "loss": 0.0101,
      "step": 2297200
    },
    {
      "epoch": 3.7594509141611514,
      "grad_norm": 0.2957538068294525,
      "learning_rate": 2.4974466767262117e-06,
      "loss": 0.0127,
      "step": 2297220
    },
    {
      "epoch": 3.759483644599805,
      "grad_norm": 0.1815064698457718,
      "learning_rate": 2.4973807845126944e-06,
      "loss": 0.0069,
      "step": 2297240
    },
    {
      "epoch": 3.759516375038458,
      "grad_norm": 0.6082707643508911,
      "learning_rate": 2.497314892299177e-06,
      "loss": 0.0086,
      "step": 2297260
    },
    {
      "epoch": 3.7595491054771117,
      "grad_norm": 0.056255847215652466,
      "learning_rate": 2.49724900008566e-06,
      "loss": 0.0113,
      "step": 2297280
    },
    {
      "epoch": 3.759581835915765,
      "grad_norm": 0.2585480809211731,
      "learning_rate": 2.497183107872143e-06,
      "loss": 0.0072,
      "step": 2297300
    },
    {
      "epoch": 3.7596145663544185,
      "grad_norm": 0.2408054620027542,
      "learning_rate": 2.4971172156586258e-06,
      "loss": 0.0085,
      "step": 2297320
    },
    {
      "epoch": 3.7596472967930716,
      "grad_norm": 0.35253337025642395,
      "learning_rate": 2.497051323445109e-06,
      "loss": 0.0085,
      "step": 2297340
    },
    {
      "epoch": 3.759680027231725,
      "grad_norm": 0.3006431758403778,
      "learning_rate": 2.4969854312315917e-06,
      "loss": 0.0073,
      "step": 2297360
    },
    {
      "epoch": 3.7597127576703784,
      "grad_norm": 0.2502046227455139,
      "learning_rate": 2.4969195390180744e-06,
      "loss": 0.0078,
      "step": 2297380
    },
    {
      "epoch": 3.7597454881090315,
      "grad_norm": 0.28753507137298584,
      "learning_rate": 2.496853646804557e-06,
      "loss": 0.0059,
      "step": 2297400
    },
    {
      "epoch": 3.759778218547685,
      "grad_norm": 0.08755806088447571,
      "learning_rate": 2.4967877545910403e-06,
      "loss": 0.0101,
      "step": 2297420
    },
    {
      "epoch": 3.7598109489863383,
      "grad_norm": 0.20870137214660645,
      "learning_rate": 2.496721862377523e-06,
      "loss": 0.011,
      "step": 2297440
    },
    {
      "epoch": 3.759843679424992,
      "grad_norm": 0.3507422208786011,
      "learning_rate": 2.496655970164006e-06,
      "loss": 0.013,
      "step": 2297460
    },
    {
      "epoch": 3.759876409863645,
      "grad_norm": 0.34605106711387634,
      "learning_rate": 2.4965900779504886e-06,
      "loss": 0.0112,
      "step": 2297480
    },
    {
      "epoch": 3.759909140302298,
      "grad_norm": 0.09649351239204407,
      "learning_rate": 2.4965241857369717e-06,
      "loss": 0.0077,
      "step": 2297500
    },
    {
      "epoch": 3.7599418707409518,
      "grad_norm": 0.12269283831119537,
      "learning_rate": 2.4964582935234545e-06,
      "loss": 0.0088,
      "step": 2297520
    },
    {
      "epoch": 3.759974601179605,
      "grad_norm": 0.11813533306121826,
      "learning_rate": 2.4963924013099376e-06,
      "loss": 0.0064,
      "step": 2297540
    },
    {
      "epoch": 3.7600073316182585,
      "grad_norm": 0.2157859206199646,
      "learning_rate": 2.4963265090964204e-06,
      "loss": 0.0112,
      "step": 2297560
    },
    {
      "epoch": 3.7600400620569117,
      "grad_norm": 0.2327130138874054,
      "learning_rate": 2.496260616882903e-06,
      "loss": 0.0119,
      "step": 2297580
    },
    {
      "epoch": 3.7600727924955653,
      "grad_norm": 0.2844978868961334,
      "learning_rate": 2.496194724669386e-06,
      "loss": 0.0069,
      "step": 2297600
    },
    {
      "epoch": 3.7601055229342184,
      "grad_norm": 0.33868443965911865,
      "learning_rate": 2.496128832455869e-06,
      "loss": 0.0123,
      "step": 2297620
    },
    {
      "epoch": 3.7601382533728716,
      "grad_norm": 0.25871118903160095,
      "learning_rate": 2.4960629402423517e-06,
      "loss": 0.01,
      "step": 2297640
    },
    {
      "epoch": 3.760170983811525,
      "grad_norm": 0.21408991515636444,
      "learning_rate": 2.4959970480288345e-06,
      "loss": 0.0089,
      "step": 2297660
    },
    {
      "epoch": 3.7602037142501783,
      "grad_norm": 0.6136823892593384,
      "learning_rate": 2.4959311558153176e-06,
      "loss": 0.0107,
      "step": 2297680
    },
    {
      "epoch": 3.7602364446888314,
      "grad_norm": 0.04229037091135979,
      "learning_rate": 2.4958652636018004e-06,
      "loss": 0.0106,
      "step": 2297700
    },
    {
      "epoch": 3.760269175127485,
      "grad_norm": 0.4077141284942627,
      "learning_rate": 2.495799371388283e-06,
      "loss": 0.0109,
      "step": 2297720
    },
    {
      "epoch": 3.7603019055661386,
      "grad_norm": 0.26263701915740967,
      "learning_rate": 2.4957334791747663e-06,
      "loss": 0.0092,
      "step": 2297740
    },
    {
      "epoch": 3.760334636004792,
      "grad_norm": 0.21762867271900177,
      "learning_rate": 2.495667586961249e-06,
      "loss": 0.008,
      "step": 2297760
    },
    {
      "epoch": 3.760367366443445,
      "grad_norm": 0.21885551512241364,
      "learning_rate": 2.4956016947477318e-06,
      "loss": 0.0181,
      "step": 2297780
    },
    {
      "epoch": 3.7604000968820985,
      "grad_norm": 0.15034513175487518,
      "learning_rate": 2.4955358025342145e-06,
      "loss": 0.0093,
      "step": 2297800
    },
    {
      "epoch": 3.7604328273207517,
      "grad_norm": 0.2367771565914154,
      "learning_rate": 2.4954699103206977e-06,
      "loss": 0.0061,
      "step": 2297820
    },
    {
      "epoch": 3.760465557759405,
      "grad_norm": 1.1138187646865845,
      "learning_rate": 2.4954040181071804e-06,
      "loss": 0.016,
      "step": 2297840
    },
    {
      "epoch": 3.7604982881980584,
      "grad_norm": 0.4982354938983917,
      "learning_rate": 2.4953381258936636e-06,
      "loss": 0.01,
      "step": 2297860
    },
    {
      "epoch": 3.760531018636712,
      "grad_norm": 0.14964523911476135,
      "learning_rate": 2.4952722336801463e-06,
      "loss": 0.0092,
      "step": 2297880
    },
    {
      "epoch": 3.760563749075365,
      "grad_norm": 0.4222976267337799,
      "learning_rate": 2.495206341466629e-06,
      "loss": 0.0088,
      "step": 2297900
    },
    {
      "epoch": 3.7605964795140183,
      "grad_norm": 0.19888396561145782,
      "learning_rate": 2.495140449253112e-06,
      "loss": 0.0116,
      "step": 2297920
    },
    {
      "epoch": 3.760629209952672,
      "grad_norm": 0.17421603202819824,
      "learning_rate": 2.495074557039595e-06,
      "loss": 0.0113,
      "step": 2297940
    },
    {
      "epoch": 3.760661940391325,
      "grad_norm": 0.1950993686914444,
      "learning_rate": 2.4950086648260777e-06,
      "loss": 0.0131,
      "step": 2297960
    },
    {
      "epoch": 3.760694670829978,
      "grad_norm": 0.7335221767425537,
      "learning_rate": 2.4949427726125604e-06,
      "loss": 0.025,
      "step": 2297980
    },
    {
      "epoch": 3.760727401268632,
      "grad_norm": 0.32260969281196594,
      "learning_rate": 2.494876880399043e-06,
      "loss": 0.0103,
      "step": 2298000
    },
    {
      "epoch": 3.7607601317072854,
      "grad_norm": 0.11652924865484238,
      "learning_rate": 2.4948109881855263e-06,
      "loss": 0.0115,
      "step": 2298020
    },
    {
      "epoch": 3.7607928621459386,
      "grad_norm": 0.21966557204723358,
      "learning_rate": 2.4947450959720095e-06,
      "loss": 0.0114,
      "step": 2298040
    },
    {
      "epoch": 3.7608255925845917,
      "grad_norm": 0.39382779598236084,
      "learning_rate": 2.4946792037584922e-06,
      "loss": 0.0133,
      "step": 2298060
    },
    {
      "epoch": 3.7608583230232453,
      "grad_norm": 0.16401225328445435,
      "learning_rate": 2.494613311544975e-06,
      "loss": 0.0085,
      "step": 2298080
    },
    {
      "epoch": 3.7608910534618984,
      "grad_norm": 0.07873868197202682,
      "learning_rate": 2.4945474193314577e-06,
      "loss": 0.0162,
      "step": 2298100
    },
    {
      "epoch": 3.7609237839005516,
      "grad_norm": 0.18539008498191833,
      "learning_rate": 2.494481527117941e-06,
      "loss": 0.0083,
      "step": 2298120
    },
    {
      "epoch": 3.760956514339205,
      "grad_norm": 0.4111158549785614,
      "learning_rate": 2.4944156349044236e-06,
      "loss": 0.0117,
      "step": 2298140
    },
    {
      "epoch": 3.760989244777859,
      "grad_norm": 0.4155454635620117,
      "learning_rate": 2.4943497426909064e-06,
      "loss": 0.0113,
      "step": 2298160
    },
    {
      "epoch": 3.761021975216512,
      "grad_norm": 0.495862752199173,
      "learning_rate": 2.494283850477389e-06,
      "loss": 0.0127,
      "step": 2298180
    },
    {
      "epoch": 3.761054705655165,
      "grad_norm": 0.056189995259046555,
      "learning_rate": 2.494217958263872e-06,
      "loss": 0.0087,
      "step": 2298200
    },
    {
      "epoch": 3.7610874360938187,
      "grad_norm": 0.099321648478508,
      "learning_rate": 2.494152066050355e-06,
      "loss": 0.0109,
      "step": 2298220
    },
    {
      "epoch": 3.761120166532472,
      "grad_norm": 0.28677186369895935,
      "learning_rate": 2.494086173836838e-06,
      "loss": 0.0125,
      "step": 2298240
    },
    {
      "epoch": 3.761152896971125,
      "grad_norm": 0.22269679605960846,
      "learning_rate": 2.494020281623321e-06,
      "loss": 0.0106,
      "step": 2298260
    },
    {
      "epoch": 3.7611856274097786,
      "grad_norm": 0.2190648317337036,
      "learning_rate": 2.4939543894098036e-06,
      "loss": 0.0115,
      "step": 2298280
    },
    {
      "epoch": 3.7612183578484317,
      "grad_norm": 0.42341703176498413,
      "learning_rate": 2.4938884971962864e-06,
      "loss": 0.0103,
      "step": 2298300
    },
    {
      "epoch": 3.7612510882870853,
      "grad_norm": 0.36934739351272583,
      "learning_rate": 2.4938226049827695e-06,
      "loss": 0.0176,
      "step": 2298320
    },
    {
      "epoch": 3.7612838187257385,
      "grad_norm": 0.1958201825618744,
      "learning_rate": 2.4937567127692523e-06,
      "loss": 0.0117,
      "step": 2298340
    },
    {
      "epoch": 3.761316549164392,
      "grad_norm": 0.35638517141342163,
      "learning_rate": 2.493690820555735e-06,
      "loss": 0.0146,
      "step": 2298360
    },
    {
      "epoch": 3.761349279603045,
      "grad_norm": 0.11513980478048325,
      "learning_rate": 2.4936249283422178e-06,
      "loss": 0.0105,
      "step": 2298380
    },
    {
      "epoch": 3.7613820100416984,
      "grad_norm": 0.9135622978210449,
      "learning_rate": 2.493559036128701e-06,
      "loss": 0.0097,
      "step": 2298400
    },
    {
      "epoch": 3.761414740480352,
      "grad_norm": 0.37171563506126404,
      "learning_rate": 2.4934931439151837e-06,
      "loss": 0.0117,
      "step": 2298420
    },
    {
      "epoch": 3.761447470919005,
      "grad_norm": 0.22961942851543427,
      "learning_rate": 2.493427251701667e-06,
      "loss": 0.0109,
      "step": 2298440
    },
    {
      "epoch": 3.7614802013576587,
      "grad_norm": 0.1229940876364708,
      "learning_rate": 2.4933613594881496e-06,
      "loss": 0.0108,
      "step": 2298460
    },
    {
      "epoch": 3.761512931796312,
      "grad_norm": 0.2152930498123169,
      "learning_rate": 2.4932954672746323e-06,
      "loss": 0.0084,
      "step": 2298480
    },
    {
      "epoch": 3.7615456622349654,
      "grad_norm": 0.08912841975688934,
      "learning_rate": 2.493229575061115e-06,
      "loss": 0.0128,
      "step": 2298500
    },
    {
      "epoch": 3.7615783926736186,
      "grad_norm": 0.1637231856584549,
      "learning_rate": 2.4931636828475982e-06,
      "loss": 0.0116,
      "step": 2298520
    },
    {
      "epoch": 3.7616111231122717,
      "grad_norm": 0.16172511875629425,
      "learning_rate": 2.493097790634081e-06,
      "loss": 0.0123,
      "step": 2298540
    },
    {
      "epoch": 3.7616438535509253,
      "grad_norm": 1.3722021579742432,
      "learning_rate": 2.493031898420564e-06,
      "loss": 0.0107,
      "step": 2298560
    },
    {
      "epoch": 3.7616765839895785,
      "grad_norm": 0.14217402040958405,
      "learning_rate": 2.492966006207047e-06,
      "loss": 0.0088,
      "step": 2298580
    },
    {
      "epoch": 3.761709314428232,
      "grad_norm": 0.3075718283653259,
      "learning_rate": 2.4929001139935296e-06,
      "loss": 0.0105,
      "step": 2298600
    },
    {
      "epoch": 3.7617420448668852,
      "grad_norm": 0.23841486871242523,
      "learning_rate": 2.4928342217800123e-06,
      "loss": 0.0114,
      "step": 2298620
    },
    {
      "epoch": 3.761774775305539,
      "grad_norm": 0.7757089138031006,
      "learning_rate": 2.4927683295664955e-06,
      "loss": 0.0166,
      "step": 2298640
    },
    {
      "epoch": 3.761807505744192,
      "grad_norm": 0.13249824941158295,
      "learning_rate": 2.4927024373529782e-06,
      "loss": 0.0104,
      "step": 2298660
    },
    {
      "epoch": 3.761840236182845,
      "grad_norm": 0.07171251624822617,
      "learning_rate": 2.492636545139461e-06,
      "loss": 0.0116,
      "step": 2298680
    },
    {
      "epoch": 3.7618729666214987,
      "grad_norm": 0.22655220329761505,
      "learning_rate": 2.4925706529259437e-06,
      "loss": 0.0116,
      "step": 2298700
    },
    {
      "epoch": 3.761905697060152,
      "grad_norm": 0.10772310942411423,
      "learning_rate": 2.492504760712427e-06,
      "loss": 0.0097,
      "step": 2298720
    },
    {
      "epoch": 3.7619384274988055,
      "grad_norm": 0.12769845128059387,
      "learning_rate": 2.4924388684989096e-06,
      "loss": 0.0103,
      "step": 2298740
    },
    {
      "epoch": 3.7619711579374586,
      "grad_norm": 0.09207139164209366,
      "learning_rate": 2.492372976285393e-06,
      "loss": 0.0062,
      "step": 2298760
    },
    {
      "epoch": 3.762003888376112,
      "grad_norm": 0.28266435861587524,
      "learning_rate": 2.4923070840718755e-06,
      "loss": 0.0119,
      "step": 2298780
    },
    {
      "epoch": 3.7620366188147654,
      "grad_norm": 0.3024457097053528,
      "learning_rate": 2.4922411918583583e-06,
      "loss": 0.0082,
      "step": 2298800
    },
    {
      "epoch": 3.7620693492534185,
      "grad_norm": 0.1825343519449234,
      "learning_rate": 2.492175299644841e-06,
      "loss": 0.0124,
      "step": 2298820
    },
    {
      "epoch": 3.762102079692072,
      "grad_norm": 0.30822741985321045,
      "learning_rate": 2.492109407431324e-06,
      "loss": 0.0098,
      "step": 2298840
    },
    {
      "epoch": 3.7621348101307253,
      "grad_norm": 0.19295090436935425,
      "learning_rate": 2.492043515217807e-06,
      "loss": 0.0149,
      "step": 2298860
    },
    {
      "epoch": 3.762167540569379,
      "grad_norm": 0.6790626645088196,
      "learning_rate": 2.4919776230042897e-06,
      "loss": 0.0133,
      "step": 2298880
    },
    {
      "epoch": 3.762200271008032,
      "grad_norm": 0.18580736219882965,
      "learning_rate": 2.4919117307907724e-06,
      "loss": 0.0136,
      "step": 2298900
    },
    {
      "epoch": 3.7622330014466856,
      "grad_norm": 0.35874438285827637,
      "learning_rate": 2.4918458385772556e-06,
      "loss": 0.0102,
      "step": 2298920
    },
    {
      "epoch": 3.7622657318853387,
      "grad_norm": 0.2139779031276703,
      "learning_rate": 2.4917799463637383e-06,
      "loss": 0.0119,
      "step": 2298940
    },
    {
      "epoch": 3.762298462323992,
      "grad_norm": 0.43782883882522583,
      "learning_rate": 2.4917140541502215e-06,
      "loss": 0.0123,
      "step": 2298960
    },
    {
      "epoch": 3.7623311927626455,
      "grad_norm": 0.26259106397628784,
      "learning_rate": 2.491648161936704e-06,
      "loss": 0.015,
      "step": 2298980
    },
    {
      "epoch": 3.7623639232012986,
      "grad_norm": 0.15487977862358093,
      "learning_rate": 2.491582269723187e-06,
      "loss": 0.0114,
      "step": 2299000
    },
    {
      "epoch": 3.7623966536399522,
      "grad_norm": 0.2143869549036026,
      "learning_rate": 2.4915163775096697e-06,
      "loss": 0.0175,
      "step": 2299020
    },
    {
      "epoch": 3.7624293840786054,
      "grad_norm": 0.8135484457015991,
      "learning_rate": 2.491450485296153e-06,
      "loss": 0.0088,
      "step": 2299040
    },
    {
      "epoch": 3.762462114517259,
      "grad_norm": 0.1509460061788559,
      "learning_rate": 2.4913845930826356e-06,
      "loss": 0.0153,
      "step": 2299060
    },
    {
      "epoch": 3.762494844955912,
      "grad_norm": 0.13421759009361267,
      "learning_rate": 2.4913187008691183e-06,
      "loss": 0.0108,
      "step": 2299080
    },
    {
      "epoch": 3.7625275753945653,
      "grad_norm": 0.14870573580265045,
      "learning_rate": 2.4912528086556015e-06,
      "loss": 0.0089,
      "step": 2299100
    },
    {
      "epoch": 3.762560305833219,
      "grad_norm": 0.20151188969612122,
      "learning_rate": 2.4911869164420842e-06,
      "loss": 0.0073,
      "step": 2299120
    },
    {
      "epoch": 3.762593036271872,
      "grad_norm": 0.08331277221441269,
      "learning_rate": 2.491121024228567e-06,
      "loss": 0.0126,
      "step": 2299140
    },
    {
      "epoch": 3.762625766710525,
      "grad_norm": 0.1537773758172989,
      "learning_rate": 2.49105513201505e-06,
      "loss": 0.0106,
      "step": 2299160
    },
    {
      "epoch": 3.7626584971491788,
      "grad_norm": 0.27422645688056946,
      "learning_rate": 2.490989239801533e-06,
      "loss": 0.013,
      "step": 2299180
    },
    {
      "epoch": 3.7626912275878324,
      "grad_norm": 0.5236251950263977,
      "learning_rate": 2.4909233475880156e-06,
      "loss": 0.0112,
      "step": 2299200
    },
    {
      "epoch": 3.7627239580264855,
      "grad_norm": 0.24526165425777435,
      "learning_rate": 2.4908574553744983e-06,
      "loss": 0.0118,
      "step": 2299220
    },
    {
      "epoch": 3.7627566884651387,
      "grad_norm": 0.13934840261936188,
      "learning_rate": 2.4907915631609815e-06,
      "loss": 0.0126,
      "step": 2299240
    },
    {
      "epoch": 3.7627894189037923,
      "grad_norm": 0.3018222749233246,
      "learning_rate": 2.4907256709474647e-06,
      "loss": 0.0113,
      "step": 2299260
    },
    {
      "epoch": 3.7628221493424454,
      "grad_norm": 0.34022337198257446,
      "learning_rate": 2.4906597787339474e-06,
      "loss": 0.0109,
      "step": 2299280
    },
    {
      "epoch": 3.7628548797810986,
      "grad_norm": 0.34978458285331726,
      "learning_rate": 2.49059388652043e-06,
      "loss": 0.008,
      "step": 2299300
    },
    {
      "epoch": 3.762887610219752,
      "grad_norm": 0.21234339475631714,
      "learning_rate": 2.490527994306913e-06,
      "loss": 0.0063,
      "step": 2299320
    },
    {
      "epoch": 3.7629203406584057,
      "grad_norm": 0.42550963163375854,
      "learning_rate": 2.4904621020933956e-06,
      "loss": 0.0117,
      "step": 2299340
    },
    {
      "epoch": 3.762953071097059,
      "grad_norm": 0.23295152187347412,
      "learning_rate": 2.490396209879879e-06,
      "loss": 0.0168,
      "step": 2299360
    },
    {
      "epoch": 3.762985801535712,
      "grad_norm": 0.14217746257781982,
      "learning_rate": 2.4903303176663615e-06,
      "loss": 0.0114,
      "step": 2299380
    },
    {
      "epoch": 3.7630185319743656,
      "grad_norm": 0.11388858407735825,
      "learning_rate": 2.4902644254528443e-06,
      "loss": 0.0067,
      "step": 2299400
    },
    {
      "epoch": 3.763051262413019,
      "grad_norm": 0.1723104864358902,
      "learning_rate": 2.490198533239327e-06,
      "loss": 0.0084,
      "step": 2299420
    },
    {
      "epoch": 3.763083992851672,
      "grad_norm": 0.15173032879829407,
      "learning_rate": 2.49013264102581e-06,
      "loss": 0.0138,
      "step": 2299440
    },
    {
      "epoch": 3.7631167232903255,
      "grad_norm": 0.1490967571735382,
      "learning_rate": 2.4900667488122933e-06,
      "loss": 0.008,
      "step": 2299460
    },
    {
      "epoch": 3.763149453728979,
      "grad_norm": 0.21216663718223572,
      "learning_rate": 2.490000856598776e-06,
      "loss": 0.0129,
      "step": 2299480
    },
    {
      "epoch": 3.7631821841676323,
      "grad_norm": 0.3817897439002991,
      "learning_rate": 2.489934964385259e-06,
      "loss": 0.0069,
      "step": 2299500
    },
    {
      "epoch": 3.7632149146062854,
      "grad_norm": 0.08832164853811264,
      "learning_rate": 2.4898690721717416e-06,
      "loss": 0.0152,
      "step": 2299520
    },
    {
      "epoch": 3.763247645044939,
      "grad_norm": 0.09769997745752335,
      "learning_rate": 2.4898031799582247e-06,
      "loss": 0.0119,
      "step": 2299540
    },
    {
      "epoch": 3.763280375483592,
      "grad_norm": 0.13503824174404144,
      "learning_rate": 2.4897372877447075e-06,
      "loss": 0.0172,
      "step": 2299560
    },
    {
      "epoch": 3.7633131059222453,
      "grad_norm": 0.4423811137676239,
      "learning_rate": 2.48967139553119e-06,
      "loss": 0.0117,
      "step": 2299580
    },
    {
      "epoch": 3.763345836360899,
      "grad_norm": 0.46581023931503296,
      "learning_rate": 2.489605503317673e-06,
      "loss": 0.0127,
      "step": 2299600
    },
    {
      "epoch": 3.7633785667995525,
      "grad_norm": 0.15789668262004852,
      "learning_rate": 2.489539611104156e-06,
      "loss": 0.0092,
      "step": 2299620
    },
    {
      "epoch": 3.7634112972382057,
      "grad_norm": 0.37250056862831116,
      "learning_rate": 2.489473718890639e-06,
      "loss": 0.0105,
      "step": 2299640
    },
    {
      "epoch": 3.763444027676859,
      "grad_norm": 0.022898081690073013,
      "learning_rate": 2.489407826677122e-06,
      "loss": 0.0081,
      "step": 2299660
    },
    {
      "epoch": 3.7634767581155124,
      "grad_norm": 0.14443765580654144,
      "learning_rate": 2.4893419344636047e-06,
      "loss": 0.0127,
      "step": 2299680
    },
    {
      "epoch": 3.7635094885541656,
      "grad_norm": 0.4435120224952698,
      "learning_rate": 2.4892760422500875e-06,
      "loss": 0.0179,
      "step": 2299700
    },
    {
      "epoch": 3.7635422189928187,
      "grad_norm": 0.5393391251564026,
      "learning_rate": 2.4892101500365702e-06,
      "loss": 0.0107,
      "step": 2299720
    },
    {
      "epoch": 3.7635749494314723,
      "grad_norm": 0.31982213258743286,
      "learning_rate": 2.4891442578230534e-06,
      "loss": 0.0132,
      "step": 2299740
    },
    {
      "epoch": 3.7636076798701255,
      "grad_norm": 0.13194793462753296,
      "learning_rate": 2.489078365609536e-06,
      "loss": 0.0206,
      "step": 2299760
    },
    {
      "epoch": 3.763640410308779,
      "grad_norm": 0.12654857337474823,
      "learning_rate": 2.489012473396019e-06,
      "loss": 0.0107,
      "step": 2299780
    },
    {
      "epoch": 3.763673140747432,
      "grad_norm": 0.31777140498161316,
      "learning_rate": 2.488946581182502e-06,
      "loss": 0.0087,
      "step": 2299800
    },
    {
      "epoch": 3.763705871186086,
      "grad_norm": 0.3706737756729126,
      "learning_rate": 2.4888806889689848e-06,
      "loss": 0.0122,
      "step": 2299820
    },
    {
      "epoch": 3.763738601624739,
      "grad_norm": 0.29256337881088257,
      "learning_rate": 2.4888147967554675e-06,
      "loss": 0.0151,
      "step": 2299840
    },
    {
      "epoch": 3.763771332063392,
      "grad_norm": 0.23991192877292633,
      "learning_rate": 2.4887489045419507e-06,
      "loss": 0.0097,
      "step": 2299860
    },
    {
      "epoch": 3.7638040625020457,
      "grad_norm": 0.05260646343231201,
      "learning_rate": 2.4886830123284334e-06,
      "loss": 0.0102,
      "step": 2299880
    },
    {
      "epoch": 3.763836792940699,
      "grad_norm": 0.18261052668094635,
      "learning_rate": 2.488617120114916e-06,
      "loss": 0.0159,
      "step": 2299900
    },
    {
      "epoch": 3.7638695233793524,
      "grad_norm": 0.22085456550121307,
      "learning_rate": 2.488551227901399e-06,
      "loss": 0.0074,
      "step": 2299920
    },
    {
      "epoch": 3.7639022538180056,
      "grad_norm": 0.678417444229126,
      "learning_rate": 2.488485335687882e-06,
      "loss": 0.01,
      "step": 2299940
    },
    {
      "epoch": 3.763934984256659,
      "grad_norm": 0.293178915977478,
      "learning_rate": 2.488419443474365e-06,
      "loss": 0.0085,
      "step": 2299960
    },
    {
      "epoch": 3.7639677146953123,
      "grad_norm": 0.077218197286129,
      "learning_rate": 2.488353551260848e-06,
      "loss": 0.0124,
      "step": 2299980
    },
    {
      "epoch": 3.7640004451339655,
      "grad_norm": 0.2850187122821808,
      "learning_rate": 2.4882876590473307e-06,
      "loss": 0.0134,
      "step": 2300000
    },
    {
      "epoch": 3.7640004451339655,
      "eval_loss": 0.00636646430939436,
      "eval_runtime": 6511.0959,
      "eval_samples_per_second": 157.863,
      "eval_steps_per_second": 15.786,
      "eval_sts-dev_pearson_cosine": 0.9854800351426399,
      "eval_sts-dev_spearman_cosine": 0.8958081866430069,
      "step": 2300000
    },
    {
      "epoch": 3.764033175572619,
      "grad_norm": 0.2244972586631775,
      "learning_rate": 2.4882217668338134e-06,
      "loss": 0.0122,
      "step": 2300020
    },
    {
      "epoch": 3.7640659060112722,
      "grad_norm": 0.26039573550224304,
      "learning_rate": 2.488155874620296e-06,
      "loss": 0.0093,
      "step": 2300040
    },
    {
      "epoch": 3.764098636449926,
      "grad_norm": 0.3956642150878906,
      "learning_rate": 2.4880899824067793e-06,
      "loss": 0.0127,
      "step": 2300060
    },
    {
      "epoch": 3.764131366888579,
      "grad_norm": 0.2333669364452362,
      "learning_rate": 2.488024090193262e-06,
      "loss": 0.0127,
      "step": 2300080
    },
    {
      "epoch": 3.7641640973272326,
      "grad_norm": 0.2582799792289734,
      "learning_rate": 2.487958197979745e-06,
      "loss": 0.0134,
      "step": 2300100
    },
    {
      "epoch": 3.7641968277658857,
      "grad_norm": 0.2900920510292053,
      "learning_rate": 2.4878923057662276e-06,
      "loss": 0.0097,
      "step": 2300120
    },
    {
      "epoch": 3.764229558204539,
      "grad_norm": 0.27136465907096863,
      "learning_rate": 2.4878264135527107e-06,
      "loss": 0.0085,
      "step": 2300140
    },
    {
      "epoch": 3.7642622886431925,
      "grad_norm": 0.08281420916318893,
      "learning_rate": 2.4877605213391935e-06,
      "loss": 0.0124,
      "step": 2300160
    },
    {
      "epoch": 3.7642950190818456,
      "grad_norm": 0.16124969720840454,
      "learning_rate": 2.4876946291256766e-06,
      "loss": 0.0084,
      "step": 2300180
    },
    {
      "epoch": 3.764327749520499,
      "grad_norm": 0.2230922430753708,
      "learning_rate": 2.4876287369121594e-06,
      "loss": 0.0081,
      "step": 2300200
    },
    {
      "epoch": 3.7643604799591524,
      "grad_norm": 0.08003954589366913,
      "learning_rate": 2.487562844698642e-06,
      "loss": 0.0109,
      "step": 2300220
    },
    {
      "epoch": 3.764393210397806,
      "grad_norm": 0.756353497505188,
      "learning_rate": 2.487496952485125e-06,
      "loss": 0.0115,
      "step": 2300240
    },
    {
      "epoch": 3.764425940836459,
      "grad_norm": 0.25668975710868835,
      "learning_rate": 2.487431060271608e-06,
      "loss": 0.0109,
      "step": 2300260
    },
    {
      "epoch": 3.7644586712751122,
      "grad_norm": 0.208165243268013,
      "learning_rate": 2.4873651680580908e-06,
      "loss": 0.0075,
      "step": 2300280
    },
    {
      "epoch": 3.764491401713766,
      "grad_norm": 0.7690454721450806,
      "learning_rate": 2.4872992758445735e-06,
      "loss": 0.0073,
      "step": 2300300
    },
    {
      "epoch": 3.764524132152419,
      "grad_norm": 0.2357809841632843,
      "learning_rate": 2.4872333836310567e-06,
      "loss": 0.0088,
      "step": 2300320
    },
    {
      "epoch": 3.7645568625910726,
      "grad_norm": 0.5339875221252441,
      "learning_rate": 2.4871674914175394e-06,
      "loss": 0.0097,
      "step": 2300340
    },
    {
      "epoch": 3.7645895930297257,
      "grad_norm": 0.5001785159111023,
      "learning_rate": 2.487101599204022e-06,
      "loss": 0.0086,
      "step": 2300360
    },
    {
      "epoch": 3.7646223234683793,
      "grad_norm": 0.2044697254896164,
      "learning_rate": 2.4870357069905053e-06,
      "loss": 0.0133,
      "step": 2300380
    },
    {
      "epoch": 3.7646550539070325,
      "grad_norm": 0.1986873894929886,
      "learning_rate": 2.486969814776988e-06,
      "loss": 0.0104,
      "step": 2300400
    },
    {
      "epoch": 3.7646877843456856,
      "grad_norm": 0.454563707113266,
      "learning_rate": 2.4869039225634708e-06,
      "loss": 0.0172,
      "step": 2300420
    },
    {
      "epoch": 3.7647205147843392,
      "grad_norm": 0.4450404942035675,
      "learning_rate": 2.4868380303499535e-06,
      "loss": 0.0139,
      "step": 2300440
    },
    {
      "epoch": 3.7647532452229924,
      "grad_norm": 0.38918063044548035,
      "learning_rate": 2.4867721381364367e-06,
      "loss": 0.0157,
      "step": 2300460
    },
    {
      "epoch": 3.764785975661646,
      "grad_norm": 0.3869544267654419,
      "learning_rate": 2.4867062459229194e-06,
      "loss": 0.0105,
      "step": 2300480
    },
    {
      "epoch": 3.764818706100299,
      "grad_norm": 0.16336394846439362,
      "learning_rate": 2.4866403537094026e-06,
      "loss": 0.0095,
      "step": 2300500
    },
    {
      "epoch": 3.7648514365389527,
      "grad_norm": 0.27994975447654724,
      "learning_rate": 2.4865744614958853e-06,
      "loss": 0.0132,
      "step": 2300520
    },
    {
      "epoch": 3.764884166977606,
      "grad_norm": 0.11256518959999084,
      "learning_rate": 2.486508569282368e-06,
      "loss": 0.0075,
      "step": 2300540
    },
    {
      "epoch": 3.764916897416259,
      "grad_norm": 0.20810526609420776,
      "learning_rate": 2.486442677068851e-06,
      "loss": 0.0113,
      "step": 2300560
    },
    {
      "epoch": 3.7649496278549126,
      "grad_norm": 0.13681648671627045,
      "learning_rate": 2.486376784855334e-06,
      "loss": 0.0074,
      "step": 2300580
    },
    {
      "epoch": 3.7649823582935658,
      "grad_norm": 0.24136805534362793,
      "learning_rate": 2.4863108926418167e-06,
      "loss": 0.0137,
      "step": 2300600
    },
    {
      "epoch": 3.7650150887322194,
      "grad_norm": 0.2567666172981262,
      "learning_rate": 2.4862450004282994e-06,
      "loss": 0.0095,
      "step": 2300620
    },
    {
      "epoch": 3.7650478191708725,
      "grad_norm": 0.19290363788604736,
      "learning_rate": 2.486179108214782e-06,
      "loss": 0.0114,
      "step": 2300640
    },
    {
      "epoch": 3.765080549609526,
      "grad_norm": 0.1825779676437378,
      "learning_rate": 2.4861132160012653e-06,
      "loss": 0.0142,
      "step": 2300660
    },
    {
      "epoch": 3.7651132800481792,
      "grad_norm": 0.5463716387748718,
      "learning_rate": 2.4860473237877485e-06,
      "loss": 0.0183,
      "step": 2300680
    },
    {
      "epoch": 3.7651460104868324,
      "grad_norm": 0.25213709473609924,
      "learning_rate": 2.4859814315742312e-06,
      "loss": 0.0084,
      "step": 2300700
    },
    {
      "epoch": 3.765178740925486,
      "grad_norm": 0.5029280185699463,
      "learning_rate": 2.485915539360714e-06,
      "loss": 0.0116,
      "step": 2300720
    },
    {
      "epoch": 3.765211471364139,
      "grad_norm": 0.22805659472942352,
      "learning_rate": 2.4858496471471967e-06,
      "loss": 0.0075,
      "step": 2300740
    },
    {
      "epoch": 3.7652442018027923,
      "grad_norm": 0.3592846691608429,
      "learning_rate": 2.48578375493368e-06,
      "loss": 0.0143,
      "step": 2300760
    },
    {
      "epoch": 3.765276932241446,
      "grad_norm": 0.21438710391521454,
      "learning_rate": 2.4857178627201626e-06,
      "loss": 0.0102,
      "step": 2300780
    },
    {
      "epoch": 3.7653096626800995,
      "grad_norm": 0.24583634734153748,
      "learning_rate": 2.4856519705066454e-06,
      "loss": 0.0132,
      "step": 2300800
    },
    {
      "epoch": 3.7653423931187526,
      "grad_norm": 0.39086973667144775,
      "learning_rate": 2.485586078293128e-06,
      "loss": 0.0142,
      "step": 2300820
    },
    {
      "epoch": 3.765375123557406,
      "grad_norm": 0.3032916486263275,
      "learning_rate": 2.485520186079611e-06,
      "loss": 0.0093,
      "step": 2300840
    },
    {
      "epoch": 3.7654078539960594,
      "grad_norm": 0.2373330146074295,
      "learning_rate": 2.485454293866094e-06,
      "loss": 0.0101,
      "step": 2300860
    },
    {
      "epoch": 3.7654405844347125,
      "grad_norm": 0.11429235339164734,
      "learning_rate": 2.485388401652577e-06,
      "loss": 0.0081,
      "step": 2300880
    },
    {
      "epoch": 3.7654733148733657,
      "grad_norm": 0.19954225420951843,
      "learning_rate": 2.48532250943906e-06,
      "loss": 0.0074,
      "step": 2300900
    },
    {
      "epoch": 3.7655060453120193,
      "grad_norm": 0.1631108969449997,
      "learning_rate": 2.4852566172255427e-06,
      "loss": 0.017,
      "step": 2300920
    },
    {
      "epoch": 3.765538775750673,
      "grad_norm": 0.8009822368621826,
      "learning_rate": 2.4851907250120254e-06,
      "loss": 0.0168,
      "step": 2300940
    },
    {
      "epoch": 3.765571506189326,
      "grad_norm": 0.12690973281860352,
      "learning_rate": 2.4851248327985086e-06,
      "loss": 0.0124,
      "step": 2300960
    },
    {
      "epoch": 3.765604236627979,
      "grad_norm": 0.5537133812904358,
      "learning_rate": 2.4850589405849913e-06,
      "loss": 0.0129,
      "step": 2300980
    },
    {
      "epoch": 3.7656369670666328,
      "grad_norm": 0.7474743723869324,
      "learning_rate": 2.484993048371474e-06,
      "loss": 0.0122,
      "step": 2301000
    },
    {
      "epoch": 3.765669697505286,
      "grad_norm": 0.35799357295036316,
      "learning_rate": 2.484927156157957e-06,
      "loss": 0.021,
      "step": 2301020
    },
    {
      "epoch": 3.765702427943939,
      "grad_norm": 0.17949530482292175,
      "learning_rate": 2.48486126394444e-06,
      "loss": 0.0072,
      "step": 2301040
    },
    {
      "epoch": 3.7657351583825927,
      "grad_norm": 0.45913684368133545,
      "learning_rate": 2.4847953717309227e-06,
      "loss": 0.0081,
      "step": 2301060
    },
    {
      "epoch": 3.7657678888212462,
      "grad_norm": 0.21311944723129272,
      "learning_rate": 2.484729479517406e-06,
      "loss": 0.0132,
      "step": 2301080
    },
    {
      "epoch": 3.7658006192598994,
      "grad_norm": 0.15352372825145721,
      "learning_rate": 2.4846635873038886e-06,
      "loss": 0.0079,
      "step": 2301100
    },
    {
      "epoch": 3.7658333496985525,
      "grad_norm": 0.09587496519088745,
      "learning_rate": 2.4845976950903713e-06,
      "loss": 0.0084,
      "step": 2301120
    },
    {
      "epoch": 3.765866080137206,
      "grad_norm": 0.41050150990486145,
      "learning_rate": 2.484531802876854e-06,
      "loss": 0.0138,
      "step": 2301140
    },
    {
      "epoch": 3.7658988105758593,
      "grad_norm": 0.17623654007911682,
      "learning_rate": 2.4844659106633372e-06,
      "loss": 0.0096,
      "step": 2301160
    },
    {
      "epoch": 3.7659315410145124,
      "grad_norm": 0.038779955357313156,
      "learning_rate": 2.48440001844982e-06,
      "loss": 0.0099,
      "step": 2301180
    },
    {
      "epoch": 3.765964271453166,
      "grad_norm": 0.2799510061740875,
      "learning_rate": 2.484334126236303e-06,
      "loss": 0.0129,
      "step": 2301200
    },
    {
      "epoch": 3.7659970018918196,
      "grad_norm": 0.4202582538127899,
      "learning_rate": 2.484268234022786e-06,
      "loss": 0.0096,
      "step": 2301220
    },
    {
      "epoch": 3.766029732330473,
      "grad_norm": 0.15475866198539734,
      "learning_rate": 2.4842023418092686e-06,
      "loss": 0.0076,
      "step": 2301240
    },
    {
      "epoch": 3.766062462769126,
      "grad_norm": 0.20640873908996582,
      "learning_rate": 2.4841364495957514e-06,
      "loss": 0.0093,
      "step": 2301260
    },
    {
      "epoch": 3.7660951932077795,
      "grad_norm": 0.5238672494888306,
      "learning_rate": 2.4840705573822345e-06,
      "loss": 0.0095,
      "step": 2301280
    },
    {
      "epoch": 3.7661279236464327,
      "grad_norm": 0.16756318509578705,
      "learning_rate": 2.4840046651687173e-06,
      "loss": 0.0074,
      "step": 2301300
    },
    {
      "epoch": 3.766160654085086,
      "grad_norm": 0.10618273913860321,
      "learning_rate": 2.4839387729552e-06,
      "loss": 0.0072,
      "step": 2301320
    },
    {
      "epoch": 3.7661933845237394,
      "grad_norm": 0.23906131088733673,
      "learning_rate": 2.4838728807416827e-06,
      "loss": 0.0088,
      "step": 2301340
    },
    {
      "epoch": 3.7662261149623926,
      "grad_norm": 0.4397740662097931,
      "learning_rate": 2.483806988528166e-06,
      "loss": 0.0131,
      "step": 2301360
    },
    {
      "epoch": 3.766258845401046,
      "grad_norm": 0.10856308788061142,
      "learning_rate": 2.4837410963146486e-06,
      "loss": 0.0099,
      "step": 2301380
    },
    {
      "epoch": 3.7662915758396993,
      "grad_norm": 0.21846893429756165,
      "learning_rate": 2.483675204101132e-06,
      "loss": 0.0101,
      "step": 2301400
    },
    {
      "epoch": 3.766324306278353,
      "grad_norm": 0.5033252835273743,
      "learning_rate": 2.4836093118876145e-06,
      "loss": 0.0108,
      "step": 2301420
    },
    {
      "epoch": 3.766357036717006,
      "grad_norm": 0.2458941638469696,
      "learning_rate": 2.4835434196740973e-06,
      "loss": 0.0166,
      "step": 2301440
    },
    {
      "epoch": 3.766389767155659,
      "grad_norm": 0.368251234292984,
      "learning_rate": 2.48347752746058e-06,
      "loss": 0.0131,
      "step": 2301460
    },
    {
      "epoch": 3.766422497594313,
      "grad_norm": 0.3023205101490021,
      "learning_rate": 2.483411635247063e-06,
      "loss": 0.0095,
      "step": 2301480
    },
    {
      "epoch": 3.766455228032966,
      "grad_norm": 0.13878585398197174,
      "learning_rate": 2.483345743033546e-06,
      "loss": 0.0162,
      "step": 2301500
    },
    {
      "epoch": 3.7664879584716195,
      "grad_norm": 0.05087482929229736,
      "learning_rate": 2.4832798508200287e-06,
      "loss": 0.0128,
      "step": 2301520
    },
    {
      "epoch": 3.7665206889102727,
      "grad_norm": 0.3382861316204071,
      "learning_rate": 2.4832139586065114e-06,
      "loss": 0.0076,
      "step": 2301540
    },
    {
      "epoch": 3.7665534193489263,
      "grad_norm": 0.2585658133029938,
      "learning_rate": 2.4831480663929946e-06,
      "loss": 0.0184,
      "step": 2301560
    },
    {
      "epoch": 3.7665861497875794,
      "grad_norm": 0.28104496002197266,
      "learning_rate": 2.4830821741794773e-06,
      "loss": 0.0111,
      "step": 2301580
    },
    {
      "epoch": 3.7666188802262326,
      "grad_norm": 0.36243706941604614,
      "learning_rate": 2.4830162819659605e-06,
      "loss": 0.01,
      "step": 2301600
    },
    {
      "epoch": 3.766651610664886,
      "grad_norm": 0.1134706437587738,
      "learning_rate": 2.482950389752443e-06,
      "loss": 0.0121,
      "step": 2301620
    },
    {
      "epoch": 3.7666843411035393,
      "grad_norm": 0.4798373878002167,
      "learning_rate": 2.482884497538926e-06,
      "loss": 0.0077,
      "step": 2301640
    },
    {
      "epoch": 3.766717071542193,
      "grad_norm": 0.1370573192834854,
      "learning_rate": 2.4828186053254087e-06,
      "loss": 0.0085,
      "step": 2301660
    },
    {
      "epoch": 3.766749801980846,
      "grad_norm": 0.5217686891555786,
      "learning_rate": 2.482752713111892e-06,
      "loss": 0.0104,
      "step": 2301680
    },
    {
      "epoch": 3.7667825324194997,
      "grad_norm": 0.3117229640483856,
      "learning_rate": 2.4826868208983746e-06,
      "loss": 0.0135,
      "step": 2301700
    },
    {
      "epoch": 3.766815262858153,
      "grad_norm": 0.47742217779159546,
      "learning_rate": 2.4826209286848573e-06,
      "loss": 0.012,
      "step": 2301720
    },
    {
      "epoch": 3.766847993296806,
      "grad_norm": 0.11197763681411743,
      "learning_rate": 2.4825550364713405e-06,
      "loss": 0.0071,
      "step": 2301740
    },
    {
      "epoch": 3.7668807237354596,
      "grad_norm": 0.18755094707012177,
      "learning_rate": 2.4824891442578232e-06,
      "loss": 0.0111,
      "step": 2301760
    },
    {
      "epoch": 3.7669134541741127,
      "grad_norm": 0.23300771415233612,
      "learning_rate": 2.482423252044306e-06,
      "loss": 0.0094,
      "step": 2301780
    },
    {
      "epoch": 3.7669461846127663,
      "grad_norm": 0.11044531315565109,
      "learning_rate": 2.482357359830789e-06,
      "loss": 0.0094,
      "step": 2301800
    },
    {
      "epoch": 3.7669789150514195,
      "grad_norm": 0.5964597463607788,
      "learning_rate": 2.482291467617272e-06,
      "loss": 0.0124,
      "step": 2301820
    },
    {
      "epoch": 3.767011645490073,
      "grad_norm": 0.225626602768898,
      "learning_rate": 2.4822255754037546e-06,
      "loss": 0.0094,
      "step": 2301840
    },
    {
      "epoch": 3.767044375928726,
      "grad_norm": 0.12459108233451843,
      "learning_rate": 2.4821596831902374e-06,
      "loss": 0.0158,
      "step": 2301860
    },
    {
      "epoch": 3.7670771063673794,
      "grad_norm": 0.1036502942442894,
      "learning_rate": 2.4820937909767205e-06,
      "loss": 0.01,
      "step": 2301880
    },
    {
      "epoch": 3.767109836806033,
      "grad_norm": 0.39065566658973694,
      "learning_rate": 2.4820278987632037e-06,
      "loss": 0.0058,
      "step": 2301900
    },
    {
      "epoch": 3.767142567244686,
      "grad_norm": 0.24355165660381317,
      "learning_rate": 2.4819620065496864e-06,
      "loss": 0.0116,
      "step": 2301920
    },
    {
      "epoch": 3.7671752976833397,
      "grad_norm": 0.19176137447357178,
      "learning_rate": 2.481896114336169e-06,
      "loss": 0.0105,
      "step": 2301940
    },
    {
      "epoch": 3.767208028121993,
      "grad_norm": 0.10208553075790405,
      "learning_rate": 2.481830222122652e-06,
      "loss": 0.0166,
      "step": 2301960
    },
    {
      "epoch": 3.7672407585606464,
      "grad_norm": 0.3661390542984009,
      "learning_rate": 2.4817643299091346e-06,
      "loss": 0.0091,
      "step": 2301980
    },
    {
      "epoch": 3.7672734889992996,
      "grad_norm": 0.11482158303260803,
      "learning_rate": 2.481698437695618e-06,
      "loss": 0.0094,
      "step": 2302000
    },
    {
      "epoch": 3.7673062194379527,
      "grad_norm": 0.32298487424850464,
      "learning_rate": 2.4816325454821005e-06,
      "loss": 0.0084,
      "step": 2302020
    },
    {
      "epoch": 3.7673389498766063,
      "grad_norm": 0.4900306165218353,
      "learning_rate": 2.4815666532685833e-06,
      "loss": 0.0123,
      "step": 2302040
    },
    {
      "epoch": 3.7673716803152595,
      "grad_norm": 0.6755208969116211,
      "learning_rate": 2.481500761055066e-06,
      "loss": 0.0101,
      "step": 2302060
    },
    {
      "epoch": 3.767404410753913,
      "grad_norm": 0.2905208170413971,
      "learning_rate": 2.481434868841549e-06,
      "loss": 0.0079,
      "step": 2302080
    },
    {
      "epoch": 3.7674371411925662,
      "grad_norm": 0.2965690493583679,
      "learning_rate": 2.4813689766280323e-06,
      "loss": 0.0112,
      "step": 2302100
    },
    {
      "epoch": 3.76746987163122,
      "grad_norm": 0.5589122176170349,
      "learning_rate": 2.481303084414515e-06,
      "loss": 0.0132,
      "step": 2302120
    },
    {
      "epoch": 3.767502602069873,
      "grad_norm": 0.14526791870594025,
      "learning_rate": 2.481237192200998e-06,
      "loss": 0.0132,
      "step": 2302140
    },
    {
      "epoch": 3.767535332508526,
      "grad_norm": 0.053610216826200485,
      "learning_rate": 2.4811712999874806e-06,
      "loss": 0.0118,
      "step": 2302160
    },
    {
      "epoch": 3.7675680629471797,
      "grad_norm": 0.20981420576572418,
      "learning_rate": 2.4811054077739637e-06,
      "loss": 0.0121,
      "step": 2302180
    },
    {
      "epoch": 3.767600793385833,
      "grad_norm": 0.3978550136089325,
      "learning_rate": 2.4810395155604465e-06,
      "loss": 0.014,
      "step": 2302200
    },
    {
      "epoch": 3.767633523824486,
      "grad_norm": 0.1446492224931717,
      "learning_rate": 2.480973623346929e-06,
      "loss": 0.014,
      "step": 2302220
    },
    {
      "epoch": 3.7676662542631396,
      "grad_norm": 0.2048608660697937,
      "learning_rate": 2.480907731133412e-06,
      "loss": 0.0099,
      "step": 2302240
    },
    {
      "epoch": 3.767698984701793,
      "grad_norm": 0.5928172469139099,
      "learning_rate": 2.480841838919895e-06,
      "loss": 0.0158,
      "step": 2302260
    },
    {
      "epoch": 3.7677317151404464,
      "grad_norm": 0.38022685050964355,
      "learning_rate": 2.480775946706378e-06,
      "loss": 0.0105,
      "step": 2302280
    },
    {
      "epoch": 3.7677644455790995,
      "grad_norm": 0.700398325920105,
      "learning_rate": 2.480710054492861e-06,
      "loss": 0.0163,
      "step": 2302300
    },
    {
      "epoch": 3.767797176017753,
      "grad_norm": 0.8353578448295593,
      "learning_rate": 2.4806441622793438e-06,
      "loss": 0.0087,
      "step": 2302320
    },
    {
      "epoch": 3.7678299064564063,
      "grad_norm": 0.3442448377609253,
      "learning_rate": 2.4805782700658265e-06,
      "loss": 0.0117,
      "step": 2302340
    },
    {
      "epoch": 3.7678626368950594,
      "grad_norm": 0.3589108884334564,
      "learning_rate": 2.4805123778523092e-06,
      "loss": 0.0111,
      "step": 2302360
    },
    {
      "epoch": 3.767895367333713,
      "grad_norm": 0.13986483216285706,
      "learning_rate": 2.4804464856387924e-06,
      "loss": 0.0111,
      "step": 2302380
    },
    {
      "epoch": 3.7679280977723666,
      "grad_norm": 0.44841086864471436,
      "learning_rate": 2.480380593425275e-06,
      "loss": 0.0125,
      "step": 2302400
    },
    {
      "epoch": 3.7679608282110197,
      "grad_norm": 0.09700986742973328,
      "learning_rate": 2.480314701211758e-06,
      "loss": 0.0103,
      "step": 2302420
    },
    {
      "epoch": 3.767993558649673,
      "grad_norm": 0.10774709284305573,
      "learning_rate": 2.480248808998241e-06,
      "loss": 0.0086,
      "step": 2302440
    },
    {
      "epoch": 3.7680262890883265,
      "grad_norm": 0.6571221947669983,
      "learning_rate": 2.4801829167847238e-06,
      "loss": 0.008,
      "step": 2302460
    },
    {
      "epoch": 3.7680590195269796,
      "grad_norm": 0.2007931023836136,
      "learning_rate": 2.4801170245712065e-06,
      "loss": 0.0062,
      "step": 2302480
    },
    {
      "epoch": 3.768091749965633,
      "grad_norm": 0.34735390543937683,
      "learning_rate": 2.4800511323576897e-06,
      "loss": 0.0121,
      "step": 2302500
    },
    {
      "epoch": 3.7681244804042864,
      "grad_norm": 0.3302910327911377,
      "learning_rate": 2.4799852401441724e-06,
      "loss": 0.0085,
      "step": 2302520
    },
    {
      "epoch": 3.76815721084294,
      "grad_norm": 0.21194778382778168,
      "learning_rate": 2.479919347930655e-06,
      "loss": 0.0077,
      "step": 2302540
    },
    {
      "epoch": 3.768189941281593,
      "grad_norm": 0.2345176637172699,
      "learning_rate": 2.479853455717138e-06,
      "loss": 0.009,
      "step": 2302560
    },
    {
      "epoch": 3.7682226717202463,
      "grad_norm": 0.6300014853477478,
      "learning_rate": 2.479787563503621e-06,
      "loss": 0.0136,
      "step": 2302580
    },
    {
      "epoch": 3.7682554021589,
      "grad_norm": 0.04195940122008324,
      "learning_rate": 2.479721671290104e-06,
      "loss": 0.0114,
      "step": 2302600
    },
    {
      "epoch": 3.768288132597553,
      "grad_norm": 0.2834394574165344,
      "learning_rate": 2.479655779076587e-06,
      "loss": 0.0122,
      "step": 2302620
    },
    {
      "epoch": 3.768320863036206,
      "grad_norm": 0.7036622166633606,
      "learning_rate": 2.4795898868630697e-06,
      "loss": 0.0124,
      "step": 2302640
    },
    {
      "epoch": 3.7683535934748598,
      "grad_norm": 0.09693463146686554,
      "learning_rate": 2.4795239946495524e-06,
      "loss": 0.0103,
      "step": 2302660
    },
    {
      "epoch": 3.7683863239135134,
      "grad_norm": 0.2516861855983734,
      "learning_rate": 2.479458102436035e-06,
      "loss": 0.0092,
      "step": 2302680
    },
    {
      "epoch": 3.7684190543521665,
      "grad_norm": 0.24909262359142303,
      "learning_rate": 2.4793922102225184e-06,
      "loss": 0.0084,
      "step": 2302700
    },
    {
      "epoch": 3.7684517847908197,
      "grad_norm": 0.28023117780685425,
      "learning_rate": 2.479326318009001e-06,
      "loss": 0.01,
      "step": 2302720
    },
    {
      "epoch": 3.7684845152294733,
      "grad_norm": 0.21891887485980988,
      "learning_rate": 2.479260425795484e-06,
      "loss": 0.0087,
      "step": 2302740
    },
    {
      "epoch": 3.7685172456681264,
      "grad_norm": 0.27955877780914307,
      "learning_rate": 2.4791945335819666e-06,
      "loss": 0.0142,
      "step": 2302760
    },
    {
      "epoch": 3.7685499761067796,
      "grad_norm": 0.38791635632514954,
      "learning_rate": 2.4791286413684497e-06,
      "loss": 0.01,
      "step": 2302780
    },
    {
      "epoch": 3.768582706545433,
      "grad_norm": 0.3170691728591919,
      "learning_rate": 2.4790627491549325e-06,
      "loss": 0.0128,
      "step": 2302800
    },
    {
      "epoch": 3.7686154369840863,
      "grad_norm": 0.3256106376647949,
      "learning_rate": 2.4789968569414156e-06,
      "loss": 0.0117,
      "step": 2302820
    },
    {
      "epoch": 3.76864816742274,
      "grad_norm": 0.19022761285305023,
      "learning_rate": 2.4789309647278984e-06,
      "loss": 0.0118,
      "step": 2302840
    },
    {
      "epoch": 3.768680897861393,
      "grad_norm": 0.25187021493911743,
      "learning_rate": 2.478865072514381e-06,
      "loss": 0.0068,
      "step": 2302860
    },
    {
      "epoch": 3.7687136283000466,
      "grad_norm": 0.22583089768886566,
      "learning_rate": 2.478799180300864e-06,
      "loss": 0.0151,
      "step": 2302880
    },
    {
      "epoch": 3.7687463587387,
      "grad_norm": 0.3077259361743927,
      "learning_rate": 2.478733288087347e-06,
      "loss": 0.0123,
      "step": 2302900
    },
    {
      "epoch": 3.768779089177353,
      "grad_norm": 0.4818030595779419,
      "learning_rate": 2.4786673958738298e-06,
      "loss": 0.0097,
      "step": 2302920
    },
    {
      "epoch": 3.7688118196160065,
      "grad_norm": 0.13974280655384064,
      "learning_rate": 2.4786015036603125e-06,
      "loss": 0.0124,
      "step": 2302940
    },
    {
      "epoch": 3.7688445500546597,
      "grad_norm": 0.13250534236431122,
      "learning_rate": 2.4785356114467957e-06,
      "loss": 0.0131,
      "step": 2302960
    },
    {
      "epoch": 3.7688772804933133,
      "grad_norm": 0.3955811858177185,
      "learning_rate": 2.4784697192332784e-06,
      "loss": 0.0118,
      "step": 2302980
    },
    {
      "epoch": 3.7689100109319664,
      "grad_norm": 0.5060364007949829,
      "learning_rate": 2.478403827019761e-06,
      "loss": 0.0122,
      "step": 2303000
    },
    {
      "epoch": 3.76894274137062,
      "grad_norm": 0.09284621477127075,
      "learning_rate": 2.4783379348062443e-06,
      "loss": 0.0111,
      "step": 2303020
    },
    {
      "epoch": 3.768975471809273,
      "grad_norm": 0.3859003782272339,
      "learning_rate": 2.478272042592727e-06,
      "loss": 0.0123,
      "step": 2303040
    },
    {
      "epoch": 3.7690082022479263,
      "grad_norm": 0.4391394257545471,
      "learning_rate": 2.4782061503792098e-06,
      "loss": 0.0081,
      "step": 2303060
    },
    {
      "epoch": 3.76904093268658,
      "grad_norm": 0.34411072731018066,
      "learning_rate": 2.4781402581656925e-06,
      "loss": 0.0085,
      "step": 2303080
    },
    {
      "epoch": 3.769073663125233,
      "grad_norm": 0.17975138127803802,
      "learning_rate": 2.4780743659521757e-06,
      "loss": 0.0112,
      "step": 2303100
    },
    {
      "epoch": 3.7691063935638867,
      "grad_norm": 0.10663266479969025,
      "learning_rate": 2.4780084737386584e-06,
      "loss": 0.0105,
      "step": 2303120
    },
    {
      "epoch": 3.76913912400254,
      "grad_norm": 0.2556231915950775,
      "learning_rate": 2.4779425815251416e-06,
      "loss": 0.0111,
      "step": 2303140
    },
    {
      "epoch": 3.7691718544411934,
      "grad_norm": 0.313221275806427,
      "learning_rate": 2.4778766893116243e-06,
      "loss": 0.011,
      "step": 2303160
    },
    {
      "epoch": 3.7692045848798466,
      "grad_norm": 0.15887513756752014,
      "learning_rate": 2.477810797098107e-06,
      "loss": 0.0087,
      "step": 2303180
    },
    {
      "epoch": 3.7692373153184997,
      "grad_norm": 0.21117915213108063,
      "learning_rate": 2.47774490488459e-06,
      "loss": 0.0192,
      "step": 2303200
    },
    {
      "epoch": 3.7692700457571533,
      "grad_norm": 0.17698946595191956,
      "learning_rate": 2.477679012671073e-06,
      "loss": 0.0131,
      "step": 2303220
    },
    {
      "epoch": 3.7693027761958064,
      "grad_norm": 0.1487443596124649,
      "learning_rate": 2.4776131204575557e-06,
      "loss": 0.0101,
      "step": 2303240
    },
    {
      "epoch": 3.76933550663446,
      "grad_norm": 0.6309245824813843,
      "learning_rate": 2.4775472282440385e-06,
      "loss": 0.0179,
      "step": 2303260
    },
    {
      "epoch": 3.769368237073113,
      "grad_norm": 0.24379809200763702,
      "learning_rate": 2.477481336030521e-06,
      "loss": 0.0119,
      "step": 2303280
    },
    {
      "epoch": 3.769400967511767,
      "grad_norm": 0.6483701467514038,
      "learning_rate": 2.4774154438170044e-06,
      "loss": 0.0131,
      "step": 2303300
    },
    {
      "epoch": 3.76943369795042,
      "grad_norm": 0.2596345841884613,
      "learning_rate": 2.4773495516034875e-06,
      "loss": 0.0085,
      "step": 2303320
    },
    {
      "epoch": 3.769466428389073,
      "grad_norm": 0.3032603859901428,
      "learning_rate": 2.4772836593899703e-06,
      "loss": 0.0145,
      "step": 2303340
    },
    {
      "epoch": 3.7694991588277267,
      "grad_norm": 0.37381237745285034,
      "learning_rate": 2.477217767176453e-06,
      "loss": 0.0146,
      "step": 2303360
    },
    {
      "epoch": 3.76953188926638,
      "grad_norm": 0.5702537894248962,
      "learning_rate": 2.4771518749629357e-06,
      "loss": 0.0105,
      "step": 2303380
    },
    {
      "epoch": 3.7695646197050334,
      "grad_norm": 0.19357256591320038,
      "learning_rate": 2.477085982749419e-06,
      "loss": 0.0111,
      "step": 2303400
    },
    {
      "epoch": 3.7695973501436866,
      "grad_norm": 0.3939211368560791,
      "learning_rate": 2.4770200905359016e-06,
      "loss": 0.0089,
      "step": 2303420
    },
    {
      "epoch": 3.76963008058234,
      "grad_norm": 0.17445005476474762,
      "learning_rate": 2.4769541983223844e-06,
      "loss": 0.0114,
      "step": 2303440
    },
    {
      "epoch": 3.7696628110209933,
      "grad_norm": 0.3082427382469177,
      "learning_rate": 2.476888306108867e-06,
      "loss": 0.0083,
      "step": 2303460
    },
    {
      "epoch": 3.7696955414596465,
      "grad_norm": 0.22759146988391876,
      "learning_rate": 2.47682241389535e-06,
      "loss": 0.0086,
      "step": 2303480
    },
    {
      "epoch": 3.7697282718983,
      "grad_norm": 0.3032609224319458,
      "learning_rate": 2.476756521681833e-06,
      "loss": 0.0096,
      "step": 2303500
    },
    {
      "epoch": 3.769761002336953,
      "grad_norm": 0.03644776716828346,
      "learning_rate": 2.476690629468316e-06,
      "loss": 0.0087,
      "step": 2303520
    },
    {
      "epoch": 3.769793732775607,
      "grad_norm": 0.15596403181552887,
      "learning_rate": 2.476624737254799e-06,
      "loss": 0.0116,
      "step": 2303540
    },
    {
      "epoch": 3.76982646321426,
      "grad_norm": 0.21399077773094177,
      "learning_rate": 2.4765588450412817e-06,
      "loss": 0.013,
      "step": 2303560
    },
    {
      "epoch": 3.7698591936529136,
      "grad_norm": 0.8842558264732361,
      "learning_rate": 2.4764929528277644e-06,
      "loss": 0.0136,
      "step": 2303580
    },
    {
      "epoch": 3.7698919240915667,
      "grad_norm": 0.0810527577996254,
      "learning_rate": 2.4764270606142476e-06,
      "loss": 0.0152,
      "step": 2303600
    },
    {
      "epoch": 3.76992465453022,
      "grad_norm": 1.191583275794983,
      "learning_rate": 2.4763611684007303e-06,
      "loss": 0.0141,
      "step": 2303620
    },
    {
      "epoch": 3.7699573849688734,
      "grad_norm": 0.41355884075164795,
      "learning_rate": 2.476295276187213e-06,
      "loss": 0.0142,
      "step": 2303640
    },
    {
      "epoch": 3.7699901154075266,
      "grad_norm": 0.43273353576660156,
      "learning_rate": 2.4762293839736962e-06,
      "loss": 0.0114,
      "step": 2303660
    },
    {
      "epoch": 3.77002284584618,
      "grad_norm": 0.25049638748168945,
      "learning_rate": 2.476163491760179e-06,
      "loss": 0.0142,
      "step": 2303680
    },
    {
      "epoch": 3.7700555762848333,
      "grad_norm": 0.2718939483165741,
      "learning_rate": 2.4760975995466617e-06,
      "loss": 0.0068,
      "step": 2303700
    },
    {
      "epoch": 3.770088306723487,
      "grad_norm": 0.21159885823726654,
      "learning_rate": 2.476031707333145e-06,
      "loss": 0.0173,
      "step": 2303720
    },
    {
      "epoch": 3.77012103716214,
      "grad_norm": 0.31354138255119324,
      "learning_rate": 2.4759658151196276e-06,
      "loss": 0.0111,
      "step": 2303740
    },
    {
      "epoch": 3.7701537676007932,
      "grad_norm": 0.5159723162651062,
      "learning_rate": 2.4758999229061103e-06,
      "loss": 0.0112,
      "step": 2303760
    },
    {
      "epoch": 3.770186498039447,
      "grad_norm": 0.24075569212436676,
      "learning_rate": 2.475834030692593e-06,
      "loss": 0.0113,
      "step": 2303780
    },
    {
      "epoch": 3.7702192284781,
      "grad_norm": 0.2530636489391327,
      "learning_rate": 2.4757681384790762e-06,
      "loss": 0.0086,
      "step": 2303800
    },
    {
      "epoch": 3.770251958916753,
      "grad_norm": 0.28497418761253357,
      "learning_rate": 2.475702246265559e-06,
      "loss": 0.0116,
      "step": 2303820
    },
    {
      "epoch": 3.7702846893554067,
      "grad_norm": 0.11767133325338364,
      "learning_rate": 2.475636354052042e-06,
      "loss": 0.0132,
      "step": 2303840
    },
    {
      "epoch": 3.7703174197940603,
      "grad_norm": 0.06948510557413101,
      "learning_rate": 2.475570461838525e-06,
      "loss": 0.0146,
      "step": 2303860
    },
    {
      "epoch": 3.7703501502327135,
      "grad_norm": 0.23129868507385254,
      "learning_rate": 2.4755045696250076e-06,
      "loss": 0.0071,
      "step": 2303880
    },
    {
      "epoch": 3.7703828806713666,
      "grad_norm": 0.6981489062309265,
      "learning_rate": 2.4754386774114904e-06,
      "loss": 0.0083,
      "step": 2303900
    },
    {
      "epoch": 3.77041561111002,
      "grad_norm": 0.6947921514511108,
      "learning_rate": 2.4753727851979735e-06,
      "loss": 0.0169,
      "step": 2303920
    },
    {
      "epoch": 3.7704483415486734,
      "grad_norm": 0.27693191170692444,
      "learning_rate": 2.4753068929844563e-06,
      "loss": 0.0082,
      "step": 2303940
    },
    {
      "epoch": 3.7704810719873265,
      "grad_norm": 0.3261897563934326,
      "learning_rate": 2.475241000770939e-06,
      "loss": 0.01,
      "step": 2303960
    },
    {
      "epoch": 3.77051380242598,
      "grad_norm": 0.09866515547037125,
      "learning_rate": 2.4751751085574217e-06,
      "loss": 0.0113,
      "step": 2303980
    },
    {
      "epoch": 3.7705465328646337,
      "grad_norm": 0.5886237621307373,
      "learning_rate": 2.475109216343905e-06,
      "loss": 0.009,
      "step": 2304000
    },
    {
      "epoch": 3.770579263303287,
      "grad_norm": 0.5730385184288025,
      "learning_rate": 2.4750433241303876e-06,
      "loss": 0.011,
      "step": 2304020
    },
    {
      "epoch": 3.77061199374194,
      "grad_norm": 0.18517671525478363,
      "learning_rate": 2.474977431916871e-06,
      "loss": 0.0124,
      "step": 2304040
    },
    {
      "epoch": 3.7706447241805936,
      "grad_norm": 0.1845266968011856,
      "learning_rate": 2.4749115397033535e-06,
      "loss": 0.0101,
      "step": 2304060
    },
    {
      "epoch": 3.7706774546192467,
      "grad_norm": 0.2723506689071655,
      "learning_rate": 2.4748456474898363e-06,
      "loss": 0.0113,
      "step": 2304080
    },
    {
      "epoch": 3.7707101850579,
      "grad_norm": 0.30063408613204956,
      "learning_rate": 2.474779755276319e-06,
      "loss": 0.0119,
      "step": 2304100
    },
    {
      "epoch": 3.7707429154965535,
      "grad_norm": 0.2147584706544876,
      "learning_rate": 2.474713863062802e-06,
      "loss": 0.0096,
      "step": 2304120
    },
    {
      "epoch": 3.770775645935207,
      "grad_norm": 0.3379777669906616,
      "learning_rate": 2.474647970849285e-06,
      "loss": 0.0083,
      "step": 2304140
    },
    {
      "epoch": 3.7708083763738602,
      "grad_norm": 0.20001228153705597,
      "learning_rate": 2.4745820786357677e-06,
      "loss": 0.0076,
      "step": 2304160
    },
    {
      "epoch": 3.7708411068125134,
      "grad_norm": 0.13960371911525726,
      "learning_rate": 2.4745161864222504e-06,
      "loss": 0.0099,
      "step": 2304180
    },
    {
      "epoch": 3.770873837251167,
      "grad_norm": 0.0932779535651207,
      "learning_rate": 2.4744502942087336e-06,
      "loss": 0.0125,
      "step": 2304200
    },
    {
      "epoch": 3.77090656768982,
      "grad_norm": 0.36453428864479065,
      "learning_rate": 2.4743844019952163e-06,
      "loss": 0.0144,
      "step": 2304220
    },
    {
      "epoch": 3.7709392981284733,
      "grad_norm": 0.13132309913635254,
      "learning_rate": 2.4743185097816995e-06,
      "loss": 0.009,
      "step": 2304240
    },
    {
      "epoch": 3.770972028567127,
      "grad_norm": 0.09661649167537689,
      "learning_rate": 2.4742526175681822e-06,
      "loss": 0.0077,
      "step": 2304260
    },
    {
      "epoch": 3.7710047590057805,
      "grad_norm": 0.18996284902095795,
      "learning_rate": 2.474186725354665e-06,
      "loss": 0.0099,
      "step": 2304280
    },
    {
      "epoch": 3.7710374894444336,
      "grad_norm": 0.3880441188812256,
      "learning_rate": 2.4741208331411477e-06,
      "loss": 0.0095,
      "step": 2304300
    },
    {
      "epoch": 3.7710702198830868,
      "grad_norm": 0.2675010561943054,
      "learning_rate": 2.474054940927631e-06,
      "loss": 0.0085,
      "step": 2304320
    },
    {
      "epoch": 3.7711029503217404,
      "grad_norm": 0.8296955823898315,
      "learning_rate": 2.4739890487141136e-06,
      "loss": 0.009,
      "step": 2304340
    },
    {
      "epoch": 3.7711356807603935,
      "grad_norm": 0.6677971482276917,
      "learning_rate": 2.4739231565005963e-06,
      "loss": 0.013,
      "step": 2304360
    },
    {
      "epoch": 3.7711684111990467,
      "grad_norm": 0.1223447322845459,
      "learning_rate": 2.4738572642870795e-06,
      "loss": 0.0128,
      "step": 2304380
    },
    {
      "epoch": 3.7712011416377003,
      "grad_norm": 0.04612414166331291,
      "learning_rate": 2.4737913720735622e-06,
      "loss": 0.0119,
      "step": 2304400
    },
    {
      "epoch": 3.7712338720763534,
      "grad_norm": 0.2993387281894684,
      "learning_rate": 2.473725479860045e-06,
      "loss": 0.0141,
      "step": 2304420
    },
    {
      "epoch": 3.771266602515007,
      "grad_norm": 0.10166004300117493,
      "learning_rate": 2.473659587646528e-06,
      "loss": 0.0088,
      "step": 2304440
    },
    {
      "epoch": 3.77129933295366,
      "grad_norm": 0.45726528763771057,
      "learning_rate": 2.473593695433011e-06,
      "loss": 0.0077,
      "step": 2304460
    },
    {
      "epoch": 3.7713320633923137,
      "grad_norm": 0.1858871877193451,
      "learning_rate": 2.4735278032194936e-06,
      "loss": 0.0091,
      "step": 2304480
    },
    {
      "epoch": 3.771364793830967,
      "grad_norm": 0.3385978639125824,
      "learning_rate": 2.4734619110059764e-06,
      "loss": 0.0083,
      "step": 2304500
    },
    {
      "epoch": 3.77139752426962,
      "grad_norm": 0.3730185627937317,
      "learning_rate": 2.4733960187924595e-06,
      "loss": 0.0098,
      "step": 2304520
    },
    {
      "epoch": 3.7714302547082736,
      "grad_norm": 0.48840197920799255,
      "learning_rate": 2.4733301265789427e-06,
      "loss": 0.0154,
      "step": 2304540
    },
    {
      "epoch": 3.771462985146927,
      "grad_norm": 0.10684294998645782,
      "learning_rate": 2.4732642343654254e-06,
      "loss": 0.0101,
      "step": 2304560
    },
    {
      "epoch": 3.7714957155855804,
      "grad_norm": 0.24469318985939026,
      "learning_rate": 2.473198342151908e-06,
      "loss": 0.0218,
      "step": 2304580
    },
    {
      "epoch": 3.7715284460242335,
      "grad_norm": 1.1828116178512573,
      "learning_rate": 2.473132449938391e-06,
      "loss": 0.013,
      "step": 2304600
    },
    {
      "epoch": 3.771561176462887,
      "grad_norm": 0.15360969305038452,
      "learning_rate": 2.4730665577248737e-06,
      "loss": 0.0125,
      "step": 2304620
    },
    {
      "epoch": 3.7715939069015403,
      "grad_norm": 0.3363262116909027,
      "learning_rate": 2.473000665511357e-06,
      "loss": 0.0076,
      "step": 2304640
    },
    {
      "epoch": 3.7716266373401934,
      "grad_norm": 0.20207129418849945,
      "learning_rate": 2.4729347732978396e-06,
      "loss": 0.0099,
      "step": 2304660
    },
    {
      "epoch": 3.771659367778847,
      "grad_norm": 0.16485463082790375,
      "learning_rate": 2.4728688810843223e-06,
      "loss": 0.0108,
      "step": 2304680
    },
    {
      "epoch": 3.7716920982175,
      "grad_norm": 0.49777477979660034,
      "learning_rate": 2.472802988870805e-06,
      "loss": 0.0097,
      "step": 2304700
    },
    {
      "epoch": 3.7717248286561538,
      "grad_norm": 0.23417910933494568,
      "learning_rate": 2.472737096657288e-06,
      "loss": 0.0099,
      "step": 2304720
    },
    {
      "epoch": 3.771757559094807,
      "grad_norm": 0.3619575500488281,
      "learning_rate": 2.4726712044437714e-06,
      "loss": 0.0102,
      "step": 2304740
    },
    {
      "epoch": 3.7717902895334605,
      "grad_norm": 0.22396649420261383,
      "learning_rate": 2.472605312230254e-06,
      "loss": 0.0096,
      "step": 2304760
    },
    {
      "epoch": 3.7718230199721137,
      "grad_norm": 0.32861343026161194,
      "learning_rate": 2.472539420016737e-06,
      "loss": 0.0125,
      "step": 2304780
    },
    {
      "epoch": 3.771855750410767,
      "grad_norm": 0.3368902802467346,
      "learning_rate": 2.4724735278032196e-06,
      "loss": 0.0129,
      "step": 2304800
    },
    {
      "epoch": 3.7718884808494204,
      "grad_norm": 0.18484589457511902,
      "learning_rate": 2.4724076355897027e-06,
      "loss": 0.0078,
      "step": 2304820
    },
    {
      "epoch": 3.7719212112880736,
      "grad_norm": 0.10855969786643982,
      "learning_rate": 2.4723417433761855e-06,
      "loss": 0.0149,
      "step": 2304840
    },
    {
      "epoch": 3.771953941726727,
      "grad_norm": 0.4895971715450287,
      "learning_rate": 2.4722758511626682e-06,
      "loss": 0.0108,
      "step": 2304860
    },
    {
      "epoch": 3.7719866721653803,
      "grad_norm": 0.11363021284341812,
      "learning_rate": 2.472209958949151e-06,
      "loss": 0.0128,
      "step": 2304880
    },
    {
      "epoch": 3.772019402604034,
      "grad_norm": 0.17879870533943176,
      "learning_rate": 2.472144066735634e-06,
      "loss": 0.0097,
      "step": 2304900
    },
    {
      "epoch": 3.772052133042687,
      "grad_norm": 0.19078576564788818,
      "learning_rate": 2.472078174522117e-06,
      "loss": 0.0111,
      "step": 2304920
    },
    {
      "epoch": 3.77208486348134,
      "grad_norm": 0.11750730127096176,
      "learning_rate": 2.4720122823086e-06,
      "loss": 0.0119,
      "step": 2304940
    },
    {
      "epoch": 3.772117593919994,
      "grad_norm": 0.22493019700050354,
      "learning_rate": 2.4719463900950828e-06,
      "loss": 0.0067,
      "step": 2304960
    },
    {
      "epoch": 3.772150324358647,
      "grad_norm": Infinity,
      "learning_rate": 2.4718804978815655e-06,
      "loss": 0.0127,
      "step": 2304980
    },
    {
      "epoch": 3.7721830547973005,
      "grad_norm": 0.24423569440841675,
      "learning_rate": 2.4718146056680482e-06,
      "loss": 0.0087,
      "step": 2305000
    },
    {
      "epoch": 3.7722157852359537,
      "grad_norm": 0.14655591547489166,
      "learning_rate": 2.4717487134545314e-06,
      "loss": 0.0073,
      "step": 2305020
    },
    {
      "epoch": 3.7722485156746073,
      "grad_norm": 0.0682373195886612,
      "learning_rate": 2.471682821241014e-06,
      "loss": 0.0101,
      "step": 2305040
    },
    {
      "epoch": 3.7722812461132604,
      "grad_norm": 0.0833188071846962,
      "learning_rate": 2.471616929027497e-06,
      "loss": 0.0094,
      "step": 2305060
    },
    {
      "epoch": 3.7723139765519136,
      "grad_norm": 0.27680501341819763,
      "learning_rate": 2.47155103681398e-06,
      "loss": 0.0149,
      "step": 2305080
    },
    {
      "epoch": 3.772346706990567,
      "grad_norm": 0.16258876025676727,
      "learning_rate": 2.471485144600463e-06,
      "loss": 0.0102,
      "step": 2305100
    },
    {
      "epoch": 3.7723794374292203,
      "grad_norm": 0.02085944451391697,
      "learning_rate": 2.4714192523869455e-06,
      "loss": 0.0067,
      "step": 2305120
    },
    {
      "epoch": 3.772412167867874,
      "grad_norm": 0.3797348141670227,
      "learning_rate": 2.4713533601734287e-06,
      "loss": 0.0082,
      "step": 2305140
    },
    {
      "epoch": 3.772444898306527,
      "grad_norm": 0.12757174670696259,
      "learning_rate": 2.4712874679599114e-06,
      "loss": 0.0127,
      "step": 2305160
    },
    {
      "epoch": 3.7724776287451807,
      "grad_norm": 0.1771046668291092,
      "learning_rate": 2.471221575746394e-06,
      "loss": 0.0095,
      "step": 2305180
    },
    {
      "epoch": 3.772510359183834,
      "grad_norm": 0.14244505763053894,
      "learning_rate": 2.471155683532877e-06,
      "loss": 0.0109,
      "step": 2305200
    },
    {
      "epoch": 3.772543089622487,
      "grad_norm": 0.386099249124527,
      "learning_rate": 2.47108979131936e-06,
      "loss": 0.0113,
      "step": 2305220
    },
    {
      "epoch": 3.7725758200611406,
      "grad_norm": 0.1838649958372116,
      "learning_rate": 2.471023899105843e-06,
      "loss": 0.009,
      "step": 2305240
    },
    {
      "epoch": 3.7726085504997937,
      "grad_norm": 0.6220707297325134,
      "learning_rate": 2.470958006892326e-06,
      "loss": 0.0083,
      "step": 2305260
    },
    {
      "epoch": 3.772641280938447,
      "grad_norm": 0.39375776052474976,
      "learning_rate": 2.4708921146788087e-06,
      "loss": 0.0099,
      "step": 2305280
    },
    {
      "epoch": 3.7726740113771005,
      "grad_norm": 1.0966596603393555,
      "learning_rate": 2.4708262224652915e-06,
      "loss": 0.0101,
      "step": 2305300
    },
    {
      "epoch": 3.772706741815754,
      "grad_norm": 0.4751209616661072,
      "learning_rate": 2.470760330251774e-06,
      "loss": 0.0149,
      "step": 2305320
    },
    {
      "epoch": 3.772739472254407,
      "grad_norm": 0.3001628518104553,
      "learning_rate": 2.4706944380382574e-06,
      "loss": 0.012,
      "step": 2305340
    },
    {
      "epoch": 3.7727722026930604,
      "grad_norm": 0.20523251593112946,
      "learning_rate": 2.47062854582474e-06,
      "loss": 0.0103,
      "step": 2305360
    },
    {
      "epoch": 3.772804933131714,
      "grad_norm": 0.2875678837299347,
      "learning_rate": 2.470562653611223e-06,
      "loss": 0.0111,
      "step": 2305380
    },
    {
      "epoch": 3.772837663570367,
      "grad_norm": 0.2097417712211609,
      "learning_rate": 2.4704967613977056e-06,
      "loss": 0.0098,
      "step": 2305400
    },
    {
      "epoch": 3.7728703940090202,
      "grad_norm": 0.11824437975883484,
      "learning_rate": 2.4704308691841887e-06,
      "loss": 0.011,
      "step": 2305420
    },
    {
      "epoch": 3.772903124447674,
      "grad_norm": 0.38373294472694397,
      "learning_rate": 2.4703649769706715e-06,
      "loss": 0.0108,
      "step": 2305440
    },
    {
      "epoch": 3.7729358548863274,
      "grad_norm": 0.48197200894355774,
      "learning_rate": 2.4702990847571546e-06,
      "loss": 0.0133,
      "step": 2305460
    },
    {
      "epoch": 3.7729685853249806,
      "grad_norm": 0.07859010994434357,
      "learning_rate": 2.4702331925436374e-06,
      "loss": 0.0085,
      "step": 2305480
    },
    {
      "epoch": 3.7730013157636337,
      "grad_norm": 0.2130838930606842,
      "learning_rate": 2.47016730033012e-06,
      "loss": 0.0075,
      "step": 2305500
    },
    {
      "epoch": 3.7730340462022873,
      "grad_norm": 0.7935680747032166,
      "learning_rate": 2.470101408116603e-06,
      "loss": 0.0144,
      "step": 2305520
    },
    {
      "epoch": 3.7730667766409405,
      "grad_norm": 0.2598908841609955,
      "learning_rate": 2.470035515903086e-06,
      "loss": 0.0114,
      "step": 2305540
    },
    {
      "epoch": 3.7730995070795936,
      "grad_norm": 0.14124469459056854,
      "learning_rate": 2.4699696236895688e-06,
      "loss": 0.0092,
      "step": 2305560
    },
    {
      "epoch": 3.7731322375182472,
      "grad_norm": 0.31911593675613403,
      "learning_rate": 2.4699037314760515e-06,
      "loss": 0.0128,
      "step": 2305580
    },
    {
      "epoch": 3.773164967956901,
      "grad_norm": 0.17934074997901917,
      "learning_rate": 2.4698378392625347e-06,
      "loss": 0.0132,
      "step": 2305600
    },
    {
      "epoch": 3.773197698395554,
      "grad_norm": 0.19653534889221191,
      "learning_rate": 2.4697719470490174e-06,
      "loss": 0.008,
      "step": 2305620
    },
    {
      "epoch": 3.773230428834207,
      "grad_norm": 0.5777615308761597,
      "learning_rate": 2.4697060548355e-06,
      "loss": 0.0069,
      "step": 2305640
    },
    {
      "epoch": 3.7732631592728607,
      "grad_norm": 0.2024044245481491,
      "learning_rate": 2.4696401626219833e-06,
      "loss": 0.0113,
      "step": 2305660
    },
    {
      "epoch": 3.773295889711514,
      "grad_norm": 0.23193176090717316,
      "learning_rate": 2.469574270408466e-06,
      "loss": 0.0101,
      "step": 2305680
    },
    {
      "epoch": 3.773328620150167,
      "grad_norm": 0.21852357685565948,
      "learning_rate": 2.469508378194949e-06,
      "loss": 0.0092,
      "step": 2305700
    },
    {
      "epoch": 3.7733613505888206,
      "grad_norm": 0.49677518010139465,
      "learning_rate": 2.4694424859814315e-06,
      "loss": 0.0152,
      "step": 2305720
    },
    {
      "epoch": 3.773394081027474,
      "grad_norm": 0.13305701315402985,
      "learning_rate": 2.4693765937679147e-06,
      "loss": 0.0059,
      "step": 2305740
    },
    {
      "epoch": 3.7734268114661274,
      "grad_norm": 0.17645704746246338,
      "learning_rate": 2.4693107015543974e-06,
      "loss": 0.006,
      "step": 2305760
    },
    {
      "epoch": 3.7734595419047805,
      "grad_norm": 0.4098578691482544,
      "learning_rate": 2.4692448093408806e-06,
      "loss": 0.0189,
      "step": 2305780
    },
    {
      "epoch": 3.773492272343434,
      "grad_norm": 0.08035214245319366,
      "learning_rate": 2.4691789171273633e-06,
      "loss": 0.0146,
      "step": 2305800
    },
    {
      "epoch": 3.7735250027820872,
      "grad_norm": 0.41859549283981323,
      "learning_rate": 2.469113024913846e-06,
      "loss": 0.0084,
      "step": 2305820
    },
    {
      "epoch": 3.7735577332207404,
      "grad_norm": 0.7498294115066528,
      "learning_rate": 2.469047132700329e-06,
      "loss": 0.0093,
      "step": 2305840
    },
    {
      "epoch": 3.773590463659394,
      "grad_norm": 0.7442502975463867,
      "learning_rate": 2.468981240486812e-06,
      "loss": 0.011,
      "step": 2305860
    },
    {
      "epoch": 3.773623194098047,
      "grad_norm": 0.3709218502044678,
      "learning_rate": 2.4689153482732947e-06,
      "loss": 0.0102,
      "step": 2305880
    },
    {
      "epoch": 3.7736559245367007,
      "grad_norm": 0.20267091691493988,
      "learning_rate": 2.4688494560597775e-06,
      "loss": 0.0104,
      "step": 2305900
    },
    {
      "epoch": 3.773688654975354,
      "grad_norm": 0.09553999453783035,
      "learning_rate": 2.46878356384626e-06,
      "loss": 0.0168,
      "step": 2305920
    },
    {
      "epoch": 3.7737213854140075,
      "grad_norm": 0.29328998923301697,
      "learning_rate": 2.4687176716327434e-06,
      "loss": 0.0117,
      "step": 2305940
    },
    {
      "epoch": 3.7737541158526606,
      "grad_norm": 0.17161615192890167,
      "learning_rate": 2.4686517794192265e-06,
      "loss": 0.0125,
      "step": 2305960
    },
    {
      "epoch": 3.773786846291314,
      "grad_norm": 0.2074812948703766,
      "learning_rate": 2.4685858872057093e-06,
      "loss": 0.0142,
      "step": 2305980
    },
    {
      "epoch": 3.7738195767299674,
      "grad_norm": 0.208663672208786,
      "learning_rate": 2.468519994992192e-06,
      "loss": 0.0103,
      "step": 2306000
    },
    {
      "epoch": 3.7738523071686205,
      "grad_norm": 0.08566401898860931,
      "learning_rate": 2.4684541027786748e-06,
      "loss": 0.0132,
      "step": 2306020
    },
    {
      "epoch": 3.773885037607274,
      "grad_norm": 0.20800523459911346,
      "learning_rate": 2.468388210565158e-06,
      "loss": 0.0092,
      "step": 2306040
    },
    {
      "epoch": 3.7739177680459273,
      "grad_norm": 0.5845740437507629,
      "learning_rate": 2.4683223183516407e-06,
      "loss": 0.0166,
      "step": 2306060
    },
    {
      "epoch": 3.773950498484581,
      "grad_norm": 0.19644638895988464,
      "learning_rate": 2.4682564261381234e-06,
      "loss": 0.0133,
      "step": 2306080
    },
    {
      "epoch": 3.773983228923234,
      "grad_norm": 0.4360790252685547,
      "learning_rate": 2.468190533924606e-06,
      "loss": 0.0092,
      "step": 2306100
    },
    {
      "epoch": 3.774015959361887,
      "grad_norm": 0.2187112718820572,
      "learning_rate": 2.468124641711089e-06,
      "loss": 0.0099,
      "step": 2306120
    },
    {
      "epoch": 3.7740486898005408,
      "grad_norm": 0.35038238763809204,
      "learning_rate": 2.468058749497572e-06,
      "loss": 0.0101,
      "step": 2306140
    },
    {
      "epoch": 3.774081420239194,
      "grad_norm": 0.11959892511367798,
      "learning_rate": 2.467992857284055e-06,
      "loss": 0.009,
      "step": 2306160
    },
    {
      "epoch": 3.7741141506778475,
      "grad_norm": 0.13021782040596008,
      "learning_rate": 2.467926965070538e-06,
      "loss": 0.0077,
      "step": 2306180
    },
    {
      "epoch": 3.7741468811165007,
      "grad_norm": 0.6920473575592041,
      "learning_rate": 2.4678610728570207e-06,
      "loss": 0.0091,
      "step": 2306200
    },
    {
      "epoch": 3.7741796115551542,
      "grad_norm": 0.20008912682533264,
      "learning_rate": 2.4677951806435034e-06,
      "loss": 0.0112,
      "step": 2306220
    },
    {
      "epoch": 3.7742123419938074,
      "grad_norm": 0.2881956100463867,
      "learning_rate": 2.4677292884299866e-06,
      "loss": 0.0098,
      "step": 2306240
    },
    {
      "epoch": 3.7742450724324605,
      "grad_norm": 0.2645407021045685,
      "learning_rate": 2.4676633962164693e-06,
      "loss": 0.01,
      "step": 2306260
    },
    {
      "epoch": 3.774277802871114,
      "grad_norm": 0.23655958473682404,
      "learning_rate": 2.467597504002952e-06,
      "loss": 0.0115,
      "step": 2306280
    },
    {
      "epoch": 3.7743105333097673,
      "grad_norm": 0.31043267250061035,
      "learning_rate": 2.4675316117894352e-06,
      "loss": 0.0085,
      "step": 2306300
    },
    {
      "epoch": 3.774343263748421,
      "grad_norm": 0.35316532850265503,
      "learning_rate": 2.467465719575918e-06,
      "loss": 0.0102,
      "step": 2306320
    },
    {
      "epoch": 3.774375994187074,
      "grad_norm": 0.30799445509910583,
      "learning_rate": 2.4673998273624007e-06,
      "loss": 0.0116,
      "step": 2306340
    },
    {
      "epoch": 3.7744087246257276,
      "grad_norm": 0.17231084406375885,
      "learning_rate": 2.467333935148884e-06,
      "loss": 0.0122,
      "step": 2306360
    },
    {
      "epoch": 3.774441455064381,
      "grad_norm": 0.940187931060791,
      "learning_rate": 2.4672680429353666e-06,
      "loss": 0.0114,
      "step": 2306380
    },
    {
      "epoch": 3.774474185503034,
      "grad_norm": 0.31761473417282104,
      "learning_rate": 2.4672021507218493e-06,
      "loss": 0.0069,
      "step": 2306400
    },
    {
      "epoch": 3.7745069159416875,
      "grad_norm": 0.25922930240631104,
      "learning_rate": 2.467136258508332e-06,
      "loss": 0.009,
      "step": 2306420
    },
    {
      "epoch": 3.7745396463803407,
      "grad_norm": 0.10223492234945297,
      "learning_rate": 2.4670703662948152e-06,
      "loss": 0.0116,
      "step": 2306440
    },
    {
      "epoch": 3.7745723768189943,
      "grad_norm": 0.5150922536849976,
      "learning_rate": 2.467004474081298e-06,
      "loss": 0.0086,
      "step": 2306460
    },
    {
      "epoch": 3.7746051072576474,
      "grad_norm": 0.17604251205921173,
      "learning_rate": 2.466938581867781e-06,
      "loss": 0.0083,
      "step": 2306480
    },
    {
      "epoch": 3.774637837696301,
      "grad_norm": 0.42600318789482117,
      "learning_rate": 2.466872689654264e-06,
      "loss": 0.0154,
      "step": 2306500
    },
    {
      "epoch": 3.774670568134954,
      "grad_norm": 0.4425506889820099,
      "learning_rate": 2.4668067974407466e-06,
      "loss": 0.0139,
      "step": 2306520
    },
    {
      "epoch": 3.7747032985736073,
      "grad_norm": 0.48457708954811096,
      "learning_rate": 2.4667409052272294e-06,
      "loss": 0.0094,
      "step": 2306540
    },
    {
      "epoch": 3.774736029012261,
      "grad_norm": 0.09062652289867401,
      "learning_rate": 2.4666750130137125e-06,
      "loss": 0.0158,
      "step": 2306560
    },
    {
      "epoch": 3.774768759450914,
      "grad_norm": 0.042143870145082474,
      "learning_rate": 2.4666091208001953e-06,
      "loss": 0.0081,
      "step": 2306580
    },
    {
      "epoch": 3.7748014898895677,
      "grad_norm": 0.5469561815261841,
      "learning_rate": 2.466543228586678e-06,
      "loss": 0.0072,
      "step": 2306600
    },
    {
      "epoch": 3.774834220328221,
      "grad_norm": 0.14895206689834595,
      "learning_rate": 2.4664773363731608e-06,
      "loss": 0.015,
      "step": 2306620
    },
    {
      "epoch": 3.7748669507668744,
      "grad_norm": 0.4049862027168274,
      "learning_rate": 2.466411444159644e-06,
      "loss": 0.0153,
      "step": 2306640
    },
    {
      "epoch": 3.7748996812055275,
      "grad_norm": 0.14418479800224304,
      "learning_rate": 2.4663455519461267e-06,
      "loss": 0.0136,
      "step": 2306660
    },
    {
      "epoch": 3.7749324116441807,
      "grad_norm": 0.1966395378112793,
      "learning_rate": 2.46627965973261e-06,
      "loss": 0.0104,
      "step": 2306680
    },
    {
      "epoch": 3.7749651420828343,
      "grad_norm": 0.12631827592849731,
      "learning_rate": 2.4662137675190926e-06,
      "loss": 0.0105,
      "step": 2306700
    },
    {
      "epoch": 3.7749978725214874,
      "grad_norm": 0.46545955538749695,
      "learning_rate": 2.4661478753055753e-06,
      "loss": 0.0083,
      "step": 2306720
    },
    {
      "epoch": 3.775030602960141,
      "grad_norm": 0.33389225602149963,
      "learning_rate": 2.466081983092058e-06,
      "loss": 0.0083,
      "step": 2306740
    },
    {
      "epoch": 3.775063333398794,
      "grad_norm": 0.2857111692428589,
      "learning_rate": 2.466016090878541e-06,
      "loss": 0.0057,
      "step": 2306760
    },
    {
      "epoch": 3.775096063837448,
      "grad_norm": 0.27151283621788025,
      "learning_rate": 2.465950198665024e-06,
      "loss": 0.012,
      "step": 2306780
    },
    {
      "epoch": 3.775128794276101,
      "grad_norm": 0.4423823058605194,
      "learning_rate": 2.4658843064515067e-06,
      "loss": 0.0071,
      "step": 2306800
    },
    {
      "epoch": 3.775161524714754,
      "grad_norm": 0.14282934367656708,
      "learning_rate": 2.4658184142379894e-06,
      "loss": 0.0103,
      "step": 2306820
    },
    {
      "epoch": 3.7751942551534077,
      "grad_norm": 0.18447758257389069,
      "learning_rate": 2.4657525220244726e-06,
      "loss": 0.0125,
      "step": 2306840
    },
    {
      "epoch": 3.775226985592061,
      "grad_norm": 0.08047811686992645,
      "learning_rate": 2.4656866298109553e-06,
      "loss": 0.0124,
      "step": 2306860
    },
    {
      "epoch": 3.775259716030714,
      "grad_norm": 0.19366984069347382,
      "learning_rate": 2.4656207375974385e-06,
      "loss": 0.0105,
      "step": 2306880
    },
    {
      "epoch": 3.7752924464693676,
      "grad_norm": 0.10050006955862045,
      "learning_rate": 2.4655548453839212e-06,
      "loss": 0.0096,
      "step": 2306900
    },
    {
      "epoch": 3.775325176908021,
      "grad_norm": 0.44066375494003296,
      "learning_rate": 2.465488953170404e-06,
      "loss": 0.0104,
      "step": 2306920
    },
    {
      "epoch": 3.7753579073466743,
      "grad_norm": 0.17428918182849884,
      "learning_rate": 2.4654230609568867e-06,
      "loss": 0.0114,
      "step": 2306940
    },
    {
      "epoch": 3.7753906377853275,
      "grad_norm": 0.20422841608524323,
      "learning_rate": 2.46535716874337e-06,
      "loss": 0.0126,
      "step": 2306960
    },
    {
      "epoch": 3.775423368223981,
      "grad_norm": 0.1841999888420105,
      "learning_rate": 2.4652912765298526e-06,
      "loss": 0.008,
      "step": 2306980
    },
    {
      "epoch": 3.775456098662634,
      "grad_norm": 0.10538693517446518,
      "learning_rate": 2.4652253843163358e-06,
      "loss": 0.0089,
      "step": 2307000
    },
    {
      "epoch": 3.7754888291012874,
      "grad_norm": 0.4596186578273773,
      "learning_rate": 2.4651594921028185e-06,
      "loss": 0.0089,
      "step": 2307020
    },
    {
      "epoch": 3.775521559539941,
      "grad_norm": 0.17785072326660156,
      "learning_rate": 2.4650935998893013e-06,
      "loss": 0.0097,
      "step": 2307040
    },
    {
      "epoch": 3.7755542899785945,
      "grad_norm": 0.15160416066646576,
      "learning_rate": 2.465027707675784e-06,
      "loss": 0.0105,
      "step": 2307060
    },
    {
      "epoch": 3.7755870204172477,
      "grad_norm": 0.09314823895692825,
      "learning_rate": 2.464961815462267e-06,
      "loss": 0.0142,
      "step": 2307080
    },
    {
      "epoch": 3.775619750855901,
      "grad_norm": 0.5108033418655396,
      "learning_rate": 2.46489592324875e-06,
      "loss": 0.0118,
      "step": 2307100
    },
    {
      "epoch": 3.7756524812945544,
      "grad_norm": 0.20098833739757538,
      "learning_rate": 2.4648300310352326e-06,
      "loss": 0.0151,
      "step": 2307120
    },
    {
      "epoch": 3.7756852117332076,
      "grad_norm": 0.08536551892757416,
      "learning_rate": 2.4647641388217154e-06,
      "loss": 0.0083,
      "step": 2307140
    },
    {
      "epoch": 3.7757179421718607,
      "grad_norm": 0.18190133571624756,
      "learning_rate": 2.4646982466081985e-06,
      "loss": 0.0181,
      "step": 2307160
    },
    {
      "epoch": 3.7757506726105143,
      "grad_norm": 0.351701945066452,
      "learning_rate": 2.4646323543946817e-06,
      "loss": 0.0097,
      "step": 2307180
    },
    {
      "epoch": 3.775783403049168,
      "grad_norm": 0.12436188012361526,
      "learning_rate": 2.4645664621811644e-06,
      "loss": 0.0115,
      "step": 2307200
    },
    {
      "epoch": 3.775816133487821,
      "grad_norm": 0.045916538685560226,
      "learning_rate": 2.464500569967647e-06,
      "loss": 0.0089,
      "step": 2307220
    },
    {
      "epoch": 3.7758488639264742,
      "grad_norm": 0.11917057633399963,
      "learning_rate": 2.46443467775413e-06,
      "loss": 0.009,
      "step": 2307240
    },
    {
      "epoch": 3.775881594365128,
      "grad_norm": 0.6532852649688721,
      "learning_rate": 2.464368785540613e-06,
      "loss": 0.0103,
      "step": 2307260
    },
    {
      "epoch": 3.775914324803781,
      "grad_norm": 0.3742867708206177,
      "learning_rate": 2.464302893327096e-06,
      "loss": 0.0105,
      "step": 2307280
    },
    {
      "epoch": 3.775947055242434,
      "grad_norm": 0.86420738697052,
      "learning_rate": 2.4642370011135786e-06,
      "loss": 0.013,
      "step": 2307300
    },
    {
      "epoch": 3.7759797856810877,
      "grad_norm": 0.17195340991020203,
      "learning_rate": 2.4641711089000613e-06,
      "loss": 0.0135,
      "step": 2307320
    },
    {
      "epoch": 3.7760125161197413,
      "grad_norm": 0.1602494716644287,
      "learning_rate": 2.464105216686544e-06,
      "loss": 0.0074,
      "step": 2307340
    },
    {
      "epoch": 3.7760452465583945,
      "grad_norm": 0.15746130049228668,
      "learning_rate": 2.464039324473027e-06,
      "loss": 0.0124,
      "step": 2307360
    },
    {
      "epoch": 3.7760779769970476,
      "grad_norm": 0.2936290204524994,
      "learning_rate": 2.4639734322595104e-06,
      "loss": 0.0116,
      "step": 2307380
    },
    {
      "epoch": 3.776110707435701,
      "grad_norm": 0.3832608163356781,
      "learning_rate": 2.463907540045993e-06,
      "loss": 0.0134,
      "step": 2307400
    },
    {
      "epoch": 3.7761434378743544,
      "grad_norm": 0.5507716536521912,
      "learning_rate": 2.463841647832476e-06,
      "loss": 0.0137,
      "step": 2307420
    },
    {
      "epoch": 3.7761761683130075,
      "grad_norm": 0.0942833349108696,
      "learning_rate": 2.4637757556189586e-06,
      "loss": 0.0143,
      "step": 2307440
    },
    {
      "epoch": 3.776208898751661,
      "grad_norm": 0.25670772790908813,
      "learning_rate": 2.4637098634054418e-06,
      "loss": 0.0081,
      "step": 2307460
    },
    {
      "epoch": 3.7762416291903143,
      "grad_norm": 0.25875744223594666,
      "learning_rate": 2.4636439711919245e-06,
      "loss": 0.0106,
      "step": 2307480
    },
    {
      "epoch": 3.776274359628968,
      "grad_norm": 0.14200057089328766,
      "learning_rate": 2.4635780789784072e-06,
      "loss": 0.0058,
      "step": 2307500
    },
    {
      "epoch": 3.776307090067621,
      "grad_norm": 0.2669326961040497,
      "learning_rate": 2.46351218676489e-06,
      "loss": 0.0124,
      "step": 2307520
    },
    {
      "epoch": 3.7763398205062746,
      "grad_norm": 0.18578046560287476,
      "learning_rate": 2.463446294551373e-06,
      "loss": 0.0092,
      "step": 2307540
    },
    {
      "epoch": 3.7763725509449277,
      "grad_norm": 0.15161313116550446,
      "learning_rate": 2.463380402337856e-06,
      "loss": 0.0099,
      "step": 2307560
    },
    {
      "epoch": 3.776405281383581,
      "grad_norm": 0.2666257321834564,
      "learning_rate": 2.463314510124339e-06,
      "loss": 0.0082,
      "step": 2307580
    },
    {
      "epoch": 3.7764380118222345,
      "grad_norm": 0.5326164364814758,
      "learning_rate": 2.4632486179108218e-06,
      "loss": 0.0127,
      "step": 2307600
    },
    {
      "epoch": 3.7764707422608876,
      "grad_norm": 0.7664021253585815,
      "learning_rate": 2.4631827256973045e-06,
      "loss": 0.0113,
      "step": 2307620
    },
    {
      "epoch": 3.7765034726995412,
      "grad_norm": 0.14976856112480164,
      "learning_rate": 2.4631168334837873e-06,
      "loss": 0.0097,
      "step": 2307640
    },
    {
      "epoch": 3.7765362031381944,
      "grad_norm": 0.5176991820335388,
      "learning_rate": 2.4630509412702704e-06,
      "loss": 0.0114,
      "step": 2307660
    },
    {
      "epoch": 3.776568933576848,
      "grad_norm": 0.11434175074100494,
      "learning_rate": 2.462985049056753e-06,
      "loss": 0.0062,
      "step": 2307680
    },
    {
      "epoch": 3.776601664015501,
      "grad_norm": 0.14518241584300995,
      "learning_rate": 2.462919156843236e-06,
      "loss": 0.0081,
      "step": 2307700
    },
    {
      "epoch": 3.7766343944541543,
      "grad_norm": 0.05201762542128563,
      "learning_rate": 2.462853264629719e-06,
      "loss": 0.014,
      "step": 2307720
    },
    {
      "epoch": 3.776667124892808,
      "grad_norm": 0.20038136839866638,
      "learning_rate": 2.462787372416202e-06,
      "loss": 0.0119,
      "step": 2307740
    },
    {
      "epoch": 3.776699855331461,
      "grad_norm": 0.6122767329216003,
      "learning_rate": 2.4627214802026845e-06,
      "loss": 0.0122,
      "step": 2307760
    },
    {
      "epoch": 3.7767325857701146,
      "grad_norm": 0.20211990177631378,
      "learning_rate": 2.4626555879891677e-06,
      "loss": 0.01,
      "step": 2307780
    },
    {
      "epoch": 3.7767653162087678,
      "grad_norm": 0.13189725577831268,
      "learning_rate": 2.4625896957756504e-06,
      "loss": 0.0108,
      "step": 2307800
    },
    {
      "epoch": 3.7767980466474214,
      "grad_norm": 0.265845388174057,
      "learning_rate": 2.462523803562133e-06,
      "loss": 0.0182,
      "step": 2307820
    },
    {
      "epoch": 3.7768307770860745,
      "grad_norm": 0.20837920904159546,
      "learning_rate": 2.462457911348616e-06,
      "loss": 0.0085,
      "step": 2307840
    },
    {
      "epoch": 3.7768635075247277,
      "grad_norm": 0.14882464706897736,
      "learning_rate": 2.462392019135099e-06,
      "loss": 0.0082,
      "step": 2307860
    },
    {
      "epoch": 3.7768962379633813,
      "grad_norm": 0.23277348279953003,
      "learning_rate": 2.462326126921582e-06,
      "loss": 0.0113,
      "step": 2307880
    },
    {
      "epoch": 3.7769289684020344,
      "grad_norm": 0.1445496678352356,
      "learning_rate": 2.462260234708065e-06,
      "loss": 0.0075,
      "step": 2307900
    },
    {
      "epoch": 3.776961698840688,
      "grad_norm": 0.15142880380153656,
      "learning_rate": 2.4621943424945477e-06,
      "loss": 0.0091,
      "step": 2307920
    },
    {
      "epoch": 3.776994429279341,
      "grad_norm": 0.12690141797065735,
      "learning_rate": 2.4621284502810305e-06,
      "loss": 0.0125,
      "step": 2307940
    },
    {
      "epoch": 3.7770271597179947,
      "grad_norm": 0.2605362832546234,
      "learning_rate": 2.462062558067513e-06,
      "loss": 0.0109,
      "step": 2307960
    },
    {
      "epoch": 3.777059890156648,
      "grad_norm": 0.14677679538726807,
      "learning_rate": 2.4619966658539964e-06,
      "loss": 0.0121,
      "step": 2307980
    },
    {
      "epoch": 3.777092620595301,
      "grad_norm": 0.2928345203399658,
      "learning_rate": 2.461930773640479e-06,
      "loss": 0.0116,
      "step": 2308000
    },
    {
      "epoch": 3.7771253510339546,
      "grad_norm": 0.10330915451049805,
      "learning_rate": 2.461864881426962e-06,
      "loss": 0.0103,
      "step": 2308020
    },
    {
      "epoch": 3.777158081472608,
      "grad_norm": 0.24152249097824097,
      "learning_rate": 2.4617989892134446e-06,
      "loss": 0.0087,
      "step": 2308040
    },
    {
      "epoch": 3.7771908119112614,
      "grad_norm": 0.14229777455329895,
      "learning_rate": 2.4617330969999278e-06,
      "loss": 0.0194,
      "step": 2308060
    },
    {
      "epoch": 3.7772235423499145,
      "grad_norm": 0.5144169330596924,
      "learning_rate": 2.4616672047864105e-06,
      "loss": 0.0086,
      "step": 2308080
    },
    {
      "epoch": 3.777256272788568,
      "grad_norm": 0.2947317659854889,
      "learning_rate": 2.4616013125728937e-06,
      "loss": 0.013,
      "step": 2308100
    },
    {
      "epoch": 3.7772890032272213,
      "grad_norm": 0.5999981760978699,
      "learning_rate": 2.4615354203593764e-06,
      "loss": 0.0126,
      "step": 2308120
    },
    {
      "epoch": 3.7773217336658744,
      "grad_norm": 0.2532652020454407,
      "learning_rate": 2.461469528145859e-06,
      "loss": 0.015,
      "step": 2308140
    },
    {
      "epoch": 3.777354464104528,
      "grad_norm": 0.7427994608879089,
      "learning_rate": 2.461403635932342e-06,
      "loss": 0.0111,
      "step": 2308160
    },
    {
      "epoch": 3.777387194543181,
      "grad_norm": 0.6143581867218018,
      "learning_rate": 2.461337743718825e-06,
      "loss": 0.0123,
      "step": 2308180
    },
    {
      "epoch": 3.7774199249818348,
      "grad_norm": 0.4653359055519104,
      "learning_rate": 2.4612718515053078e-06,
      "loss": 0.0086,
      "step": 2308200
    },
    {
      "epoch": 3.777452655420488,
      "grad_norm": 0.12055046856403351,
      "learning_rate": 2.4612059592917905e-06,
      "loss": 0.0171,
      "step": 2308220
    },
    {
      "epoch": 3.7774853858591415,
      "grad_norm": 0.1608666032552719,
      "learning_rate": 2.4611400670782737e-06,
      "loss": 0.0094,
      "step": 2308240
    },
    {
      "epoch": 3.7775181162977947,
      "grad_norm": 0.2596520185470581,
      "learning_rate": 2.4610741748647564e-06,
      "loss": 0.0103,
      "step": 2308260
    },
    {
      "epoch": 3.777550846736448,
      "grad_norm": 0.5721874833106995,
      "learning_rate": 2.461008282651239e-06,
      "loss": 0.0124,
      "step": 2308280
    },
    {
      "epoch": 3.7775835771751014,
      "grad_norm": 0.21798843145370483,
      "learning_rate": 2.4609423904377223e-06,
      "loss": 0.0094,
      "step": 2308300
    },
    {
      "epoch": 3.7776163076137546,
      "grad_norm": 0.14400959014892578,
      "learning_rate": 2.460876498224205e-06,
      "loss": 0.0079,
      "step": 2308320
    },
    {
      "epoch": 3.7776490380524077,
      "grad_norm": 0.38665103912353516,
      "learning_rate": 2.460810606010688e-06,
      "loss": 0.0085,
      "step": 2308340
    },
    {
      "epoch": 3.7776817684910613,
      "grad_norm": 0.15497900545597076,
      "learning_rate": 2.4607447137971705e-06,
      "loss": 0.0093,
      "step": 2308360
    },
    {
      "epoch": 3.777714498929715,
      "grad_norm": 0.19264158606529236,
      "learning_rate": 2.4606788215836537e-06,
      "loss": 0.0119,
      "step": 2308380
    },
    {
      "epoch": 3.777747229368368,
      "grad_norm": 0.5648990273475647,
      "learning_rate": 2.4606129293701364e-06,
      "loss": 0.0186,
      "step": 2308400
    },
    {
      "epoch": 3.777779959807021,
      "grad_norm": 0.21477703750133514,
      "learning_rate": 2.4605470371566196e-06,
      "loss": 0.0086,
      "step": 2308420
    },
    {
      "epoch": 3.777812690245675,
      "grad_norm": 0.363989919424057,
      "learning_rate": 2.4604811449431024e-06,
      "loss": 0.0105,
      "step": 2308440
    },
    {
      "epoch": 3.777845420684328,
      "grad_norm": 0.3040127158164978,
      "learning_rate": 2.460415252729585e-06,
      "loss": 0.0153,
      "step": 2308460
    },
    {
      "epoch": 3.777878151122981,
      "grad_norm": 0.36369526386260986,
      "learning_rate": 2.460349360516068e-06,
      "loss": 0.0109,
      "step": 2308480
    },
    {
      "epoch": 3.7779108815616347,
      "grad_norm": 0.266863077878952,
      "learning_rate": 2.460283468302551e-06,
      "loss": 0.0134,
      "step": 2308500
    },
    {
      "epoch": 3.7779436120002883,
      "grad_norm": 0.13580761849880219,
      "learning_rate": 2.4602175760890337e-06,
      "loss": 0.0093,
      "step": 2308520
    },
    {
      "epoch": 3.7779763424389414,
      "grad_norm": 0.2201470136642456,
      "learning_rate": 2.4601516838755165e-06,
      "loss": 0.0091,
      "step": 2308540
    },
    {
      "epoch": 3.7780090728775946,
      "grad_norm": 0.2259889841079712,
      "learning_rate": 2.4600857916619992e-06,
      "loss": 0.0107,
      "step": 2308560
    },
    {
      "epoch": 3.778041803316248,
      "grad_norm": 0.43674638867378235,
      "learning_rate": 2.4600198994484824e-06,
      "loss": 0.0096,
      "step": 2308580
    },
    {
      "epoch": 3.7780745337549013,
      "grad_norm": 0.41418904066085815,
      "learning_rate": 2.4599540072349655e-06,
      "loss": 0.0104,
      "step": 2308600
    },
    {
      "epoch": 3.7781072641935545,
      "grad_norm": 0.6525124311447144,
      "learning_rate": 2.4598881150214483e-06,
      "loss": 0.0106,
      "step": 2308620
    },
    {
      "epoch": 3.778139994632208,
      "grad_norm": 0.3281300365924835,
      "learning_rate": 2.459822222807931e-06,
      "loss": 0.0105,
      "step": 2308640
    },
    {
      "epoch": 3.7781727250708617,
      "grad_norm": 0.09564030170440674,
      "learning_rate": 2.4597563305944138e-06,
      "loss": 0.0141,
      "step": 2308660
    },
    {
      "epoch": 3.778205455509515,
      "grad_norm": 0.0683685690164566,
      "learning_rate": 2.459690438380897e-06,
      "loss": 0.0119,
      "step": 2308680
    },
    {
      "epoch": 3.778238185948168,
      "grad_norm": 0.6449680924415588,
      "learning_rate": 2.4596245461673797e-06,
      "loss": 0.009,
      "step": 2308700
    },
    {
      "epoch": 3.7782709163868216,
      "grad_norm": 0.2288878858089447,
      "learning_rate": 2.4595586539538624e-06,
      "loss": 0.0089,
      "step": 2308720
    },
    {
      "epoch": 3.7783036468254747,
      "grad_norm": 0.38139939308166504,
      "learning_rate": 2.459492761740345e-06,
      "loss": 0.0126,
      "step": 2308740
    },
    {
      "epoch": 3.778336377264128,
      "grad_norm": 0.19263924658298492,
      "learning_rate": 2.4594268695268283e-06,
      "loss": 0.0063,
      "step": 2308760
    },
    {
      "epoch": 3.7783691077027814,
      "grad_norm": 0.16175295412540436,
      "learning_rate": 2.459360977313311e-06,
      "loss": 0.0106,
      "step": 2308780
    },
    {
      "epoch": 3.778401838141435,
      "grad_norm": 0.2589987814426422,
      "learning_rate": 2.459295085099794e-06,
      "loss": 0.0101,
      "step": 2308800
    },
    {
      "epoch": 3.778434568580088,
      "grad_norm": 0.23624742031097412,
      "learning_rate": 2.459229192886277e-06,
      "loss": 0.0137,
      "step": 2308820
    },
    {
      "epoch": 3.7784672990187413,
      "grad_norm": 0.21359507739543915,
      "learning_rate": 2.4591633006727597e-06,
      "loss": 0.0081,
      "step": 2308840
    },
    {
      "epoch": 3.778500029457395,
      "grad_norm": 0.22498996555805206,
      "learning_rate": 2.4590974084592424e-06,
      "loss": 0.0144,
      "step": 2308860
    },
    {
      "epoch": 3.778532759896048,
      "grad_norm": 0.06510566174983978,
      "learning_rate": 2.4590315162457256e-06,
      "loss": 0.0129,
      "step": 2308880
    },
    {
      "epoch": 3.7785654903347012,
      "grad_norm": 0.3103226125240326,
      "learning_rate": 2.4589656240322083e-06,
      "loss": 0.0109,
      "step": 2308900
    },
    {
      "epoch": 3.778598220773355,
      "grad_norm": 0.09901482611894608,
      "learning_rate": 2.458899731818691e-06,
      "loss": 0.0103,
      "step": 2308920
    },
    {
      "epoch": 3.778630951212008,
      "grad_norm": 0.32545363903045654,
      "learning_rate": 2.4588338396051742e-06,
      "loss": 0.0151,
      "step": 2308940
    },
    {
      "epoch": 3.7786636816506616,
      "grad_norm": 0.3333035707473755,
      "learning_rate": 2.458767947391657e-06,
      "loss": 0.0131,
      "step": 2308960
    },
    {
      "epoch": 3.7786964120893147,
      "grad_norm": 0.20240697264671326,
      "learning_rate": 2.4587020551781397e-06,
      "loss": 0.0083,
      "step": 2308980
    },
    {
      "epoch": 3.7787291425279683,
      "grad_norm": 0.09939891844987869,
      "learning_rate": 2.458636162964623e-06,
      "loss": 0.0133,
      "step": 2309000
    },
    {
      "epoch": 3.7787618729666215,
      "grad_norm": 0.7130666971206665,
      "learning_rate": 2.4585702707511056e-06,
      "loss": 0.0134,
      "step": 2309020
    },
    {
      "epoch": 3.7787946034052746,
      "grad_norm": 0.4536047875881195,
      "learning_rate": 2.4585043785375884e-06,
      "loss": 0.0092,
      "step": 2309040
    },
    {
      "epoch": 3.778827333843928,
      "grad_norm": 0.08076426386833191,
      "learning_rate": 2.458438486324071e-06,
      "loss": 0.0112,
      "step": 2309060
    },
    {
      "epoch": 3.7788600642825814,
      "grad_norm": 0.17436759173870087,
      "learning_rate": 2.4583725941105543e-06,
      "loss": 0.0114,
      "step": 2309080
    },
    {
      "epoch": 3.778892794721235,
      "grad_norm": 0.5544314384460449,
      "learning_rate": 2.458306701897037e-06,
      "loss": 0.0155,
      "step": 2309100
    },
    {
      "epoch": 3.778925525159888,
      "grad_norm": 0.1910567581653595,
      "learning_rate": 2.45824080968352e-06,
      "loss": 0.0079,
      "step": 2309120
    },
    {
      "epoch": 3.7789582555985417,
      "grad_norm": 0.574661910533905,
      "learning_rate": 2.458174917470003e-06,
      "loss": 0.0164,
      "step": 2309140
    },
    {
      "epoch": 3.778990986037195,
      "grad_norm": 0.14243336021900177,
      "learning_rate": 2.4581090252564856e-06,
      "loss": 0.0175,
      "step": 2309160
    },
    {
      "epoch": 3.779023716475848,
      "grad_norm": 0.11406740546226501,
      "learning_rate": 2.4580431330429684e-06,
      "loss": 0.0084,
      "step": 2309180
    },
    {
      "epoch": 3.7790564469145016,
      "grad_norm": 0.19313473999500275,
      "learning_rate": 2.4579772408294515e-06,
      "loss": 0.01,
      "step": 2309200
    },
    {
      "epoch": 3.7790891773531547,
      "grad_norm": 0.20447689294815063,
      "learning_rate": 2.4579113486159343e-06,
      "loss": 0.0123,
      "step": 2309220
    },
    {
      "epoch": 3.7791219077918083,
      "grad_norm": 0.17221908271312714,
      "learning_rate": 2.457845456402417e-06,
      "loss": 0.0137,
      "step": 2309240
    },
    {
      "epoch": 3.7791546382304615,
      "grad_norm": 0.15334509313106537,
      "learning_rate": 2.4577795641888998e-06,
      "loss": 0.0089,
      "step": 2309260
    },
    {
      "epoch": 3.779187368669115,
      "grad_norm": 0.08143807202577591,
      "learning_rate": 2.457713671975383e-06,
      "loss": 0.0147,
      "step": 2309280
    },
    {
      "epoch": 3.7792200991077682,
      "grad_norm": 0.07158776372671127,
      "learning_rate": 2.4576477797618657e-06,
      "loss": 0.0092,
      "step": 2309300
    },
    {
      "epoch": 3.7792528295464214,
      "grad_norm": 0.8024848699569702,
      "learning_rate": 2.457581887548349e-06,
      "loss": 0.0122,
      "step": 2309320
    },
    {
      "epoch": 3.779285559985075,
      "grad_norm": 1.579687476158142,
      "learning_rate": 2.4575159953348316e-06,
      "loss": 0.0095,
      "step": 2309340
    },
    {
      "epoch": 3.779318290423728,
      "grad_norm": 0.16772960126399994,
      "learning_rate": 2.4574501031213143e-06,
      "loss": 0.0122,
      "step": 2309360
    },
    {
      "epoch": 3.7793510208623817,
      "grad_norm": 1.1910163164138794,
      "learning_rate": 2.457384210907797e-06,
      "loss": 0.0116,
      "step": 2309380
    },
    {
      "epoch": 3.779383751301035,
      "grad_norm": 0.33172258734703064,
      "learning_rate": 2.4573183186942802e-06,
      "loss": 0.0121,
      "step": 2309400
    },
    {
      "epoch": 3.7794164817396885,
      "grad_norm": 0.2588348984718323,
      "learning_rate": 2.457252426480763e-06,
      "loss": 0.0151,
      "step": 2309420
    },
    {
      "epoch": 3.7794492121783416,
      "grad_norm": 0.23847931623458862,
      "learning_rate": 2.4571865342672457e-06,
      "loss": 0.0133,
      "step": 2309440
    },
    {
      "epoch": 3.7794819426169948,
      "grad_norm": 0.0892491489648819,
      "learning_rate": 2.4571206420537284e-06,
      "loss": 0.011,
      "step": 2309460
    },
    {
      "epoch": 3.7795146730556484,
      "grad_norm": 0.11883673816919327,
      "learning_rate": 2.4570547498402116e-06,
      "loss": 0.0079,
      "step": 2309480
    },
    {
      "epoch": 3.7795474034943015,
      "grad_norm": 0.08282113075256348,
      "learning_rate": 2.4569888576266943e-06,
      "loss": 0.0104,
      "step": 2309500
    },
    {
      "epoch": 3.779580133932955,
      "grad_norm": 0.10818419605493546,
      "learning_rate": 2.4569229654131775e-06,
      "loss": 0.0152,
      "step": 2309520
    },
    {
      "epoch": 3.7796128643716083,
      "grad_norm": 0.20586128532886505,
      "learning_rate": 2.4568570731996602e-06,
      "loss": 0.0099,
      "step": 2309540
    },
    {
      "epoch": 3.779645594810262,
      "grad_norm": 0.14387747645378113,
      "learning_rate": 2.456791180986143e-06,
      "loss": 0.0152,
      "step": 2309560
    },
    {
      "epoch": 3.779678325248915,
      "grad_norm": 0.12390822172164917,
      "learning_rate": 2.4567252887726257e-06,
      "loss": 0.0109,
      "step": 2309580
    },
    {
      "epoch": 3.779711055687568,
      "grad_norm": 0.19906599819660187,
      "learning_rate": 2.456659396559109e-06,
      "loss": 0.0139,
      "step": 2309600
    },
    {
      "epoch": 3.7797437861262217,
      "grad_norm": 0.11703673750162125,
      "learning_rate": 2.4565935043455916e-06,
      "loss": 0.0096,
      "step": 2309620
    },
    {
      "epoch": 3.779776516564875,
      "grad_norm": 0.1060718521475792,
      "learning_rate": 2.4565276121320748e-06,
      "loss": 0.0139,
      "step": 2309640
    },
    {
      "epoch": 3.7798092470035285,
      "grad_norm": 0.31221237778663635,
      "learning_rate": 2.4564617199185575e-06,
      "loss": 0.016,
      "step": 2309660
    },
    {
      "epoch": 3.7798419774421816,
      "grad_norm": 0.32752665877342224,
      "learning_rate": 2.4563958277050403e-06,
      "loss": 0.0055,
      "step": 2309680
    },
    {
      "epoch": 3.7798747078808352,
      "grad_norm": 0.12137790024280548,
      "learning_rate": 2.456329935491523e-06,
      "loss": 0.0116,
      "step": 2309700
    },
    {
      "epoch": 3.7799074383194884,
      "grad_norm": 0.37768667936325073,
      "learning_rate": 2.456264043278006e-06,
      "loss": 0.0103,
      "step": 2309720
    },
    {
      "epoch": 3.7799401687581415,
      "grad_norm": 0.46819084882736206,
      "learning_rate": 2.456198151064489e-06,
      "loss": 0.014,
      "step": 2309740
    },
    {
      "epoch": 3.779972899196795,
      "grad_norm": 0.1431969553232193,
      "learning_rate": 2.4561322588509716e-06,
      "loss": 0.0078,
      "step": 2309760
    },
    {
      "epoch": 3.7800056296354483,
      "grad_norm": 0.4671955704689026,
      "learning_rate": 2.4560663666374544e-06,
      "loss": 0.0089,
      "step": 2309780
    },
    {
      "epoch": 3.780038360074102,
      "grad_norm": 0.4316343367099762,
      "learning_rate": 2.4560004744239375e-06,
      "loss": 0.0169,
      "step": 2309800
    },
    {
      "epoch": 3.780071090512755,
      "grad_norm": 0.35007405281066895,
      "learning_rate": 2.4559345822104207e-06,
      "loss": 0.0077,
      "step": 2309820
    },
    {
      "epoch": 3.7801038209514086,
      "grad_norm": 0.4035980999469757,
      "learning_rate": 2.4558686899969035e-06,
      "loss": 0.0142,
      "step": 2309840
    },
    {
      "epoch": 3.7801365513900618,
      "grad_norm": 0.21025054156780243,
      "learning_rate": 2.455802797783386e-06,
      "loss": 0.0143,
      "step": 2309860
    },
    {
      "epoch": 3.780169281828715,
      "grad_norm": 0.035355862230062485,
      "learning_rate": 2.455736905569869e-06,
      "loss": 0.0065,
      "step": 2309880
    },
    {
      "epoch": 3.7802020122673685,
      "grad_norm": 0.47273334860801697,
      "learning_rate": 2.455671013356352e-06,
      "loss": 0.0132,
      "step": 2309900
    },
    {
      "epoch": 3.7802347427060217,
      "grad_norm": 1.0583603382110596,
      "learning_rate": 2.455605121142835e-06,
      "loss": 0.0117,
      "step": 2309920
    },
    {
      "epoch": 3.780267473144675,
      "grad_norm": 0.23236535489559174,
      "learning_rate": 2.4555392289293176e-06,
      "loss": 0.0099,
      "step": 2309940
    },
    {
      "epoch": 3.7803002035833284,
      "grad_norm": 0.3668138086795807,
      "learning_rate": 2.4554733367158003e-06,
      "loss": 0.0068,
      "step": 2309960
    },
    {
      "epoch": 3.780332934021982,
      "grad_norm": 0.3199279010295868,
      "learning_rate": 2.455407444502283e-06,
      "loss": 0.0101,
      "step": 2309980
    },
    {
      "epoch": 3.780365664460635,
      "grad_norm": 0.2747223973274231,
      "learning_rate": 2.4553415522887662e-06,
      "loss": 0.007,
      "step": 2310000
    },
    {
      "epoch": 3.7803983948992883,
      "grad_norm": 0.40020957589149475,
      "learning_rate": 2.4552756600752494e-06,
      "loss": 0.0155,
      "step": 2310020
    },
    {
      "epoch": 3.780431125337942,
      "grad_norm": 0.14901219308376312,
      "learning_rate": 2.455209767861732e-06,
      "loss": 0.0099,
      "step": 2310040
    },
    {
      "epoch": 3.780463855776595,
      "grad_norm": 0.14777624607086182,
      "learning_rate": 2.455143875648215e-06,
      "loss": 0.0135,
      "step": 2310060
    },
    {
      "epoch": 3.780496586215248,
      "grad_norm": 0.44606950879096985,
      "learning_rate": 2.4550779834346976e-06,
      "loss": 0.0132,
      "step": 2310080
    },
    {
      "epoch": 3.780529316653902,
      "grad_norm": 0.4283407926559448,
      "learning_rate": 2.4550120912211808e-06,
      "loss": 0.0096,
      "step": 2310100
    },
    {
      "epoch": 3.7805620470925554,
      "grad_norm": 0.10165642946958542,
      "learning_rate": 2.4549461990076635e-06,
      "loss": 0.0074,
      "step": 2310120
    },
    {
      "epoch": 3.7805947775312085,
      "grad_norm": 0.23604433238506317,
      "learning_rate": 2.4548803067941462e-06,
      "loss": 0.0107,
      "step": 2310140
    },
    {
      "epoch": 3.7806275079698617,
      "grad_norm": 0.44820326566696167,
      "learning_rate": 2.454814414580629e-06,
      "loss": 0.0085,
      "step": 2310160
    },
    {
      "epoch": 3.7806602384085153,
      "grad_norm": 0.1516914814710617,
      "learning_rate": 2.454748522367112e-06,
      "loss": 0.0128,
      "step": 2310180
    },
    {
      "epoch": 3.7806929688471684,
      "grad_norm": 0.22637194395065308,
      "learning_rate": 2.454682630153595e-06,
      "loss": 0.0133,
      "step": 2310200
    },
    {
      "epoch": 3.7807256992858216,
      "grad_norm": 0.18554870784282684,
      "learning_rate": 2.454616737940078e-06,
      "loss": 0.0094,
      "step": 2310220
    },
    {
      "epoch": 3.780758429724475,
      "grad_norm": 0.17076097428798676,
      "learning_rate": 2.454550845726561e-06,
      "loss": 0.0078,
      "step": 2310240
    },
    {
      "epoch": 3.7807911601631288,
      "grad_norm": 0.2372266948223114,
      "learning_rate": 2.4544849535130435e-06,
      "loss": 0.0082,
      "step": 2310260
    },
    {
      "epoch": 3.780823890601782,
      "grad_norm": 0.4945752024650574,
      "learning_rate": 2.4544190612995263e-06,
      "loss": 0.0117,
      "step": 2310280
    },
    {
      "epoch": 3.780856621040435,
      "grad_norm": 0.2723425328731537,
      "learning_rate": 2.4543531690860094e-06,
      "loss": 0.0092,
      "step": 2310300
    },
    {
      "epoch": 3.7808893514790887,
      "grad_norm": 0.4928736686706543,
      "learning_rate": 2.454287276872492e-06,
      "loss": 0.012,
      "step": 2310320
    },
    {
      "epoch": 3.780922081917742,
      "grad_norm": 0.35010647773742676,
      "learning_rate": 2.454221384658975e-06,
      "loss": 0.0096,
      "step": 2310340
    },
    {
      "epoch": 3.780954812356395,
      "grad_norm": 0.09465408325195312,
      "learning_rate": 2.454155492445458e-06,
      "loss": 0.009,
      "step": 2310360
    },
    {
      "epoch": 3.7809875427950486,
      "grad_norm": 0.33072948455810547,
      "learning_rate": 2.454089600231941e-06,
      "loss": 0.0099,
      "step": 2310380
    },
    {
      "epoch": 3.7810202732337017,
      "grad_norm": 0.7343683242797852,
      "learning_rate": 2.4540237080184236e-06,
      "loss": 0.017,
      "step": 2310400
    },
    {
      "epoch": 3.7810530036723553,
      "grad_norm": 0.06339563429355621,
      "learning_rate": 2.4539578158049067e-06,
      "loss": 0.0099,
      "step": 2310420
    },
    {
      "epoch": 3.7810857341110085,
      "grad_norm": 0.5267737507820129,
      "learning_rate": 2.4538919235913895e-06,
      "loss": 0.0087,
      "step": 2310440
    },
    {
      "epoch": 3.781118464549662,
      "grad_norm": 0.36569708585739136,
      "learning_rate": 2.453826031377872e-06,
      "loss": 0.0072,
      "step": 2310460
    },
    {
      "epoch": 3.781151194988315,
      "grad_norm": 0.3894908130168915,
      "learning_rate": 2.453760139164355e-06,
      "loss": 0.0101,
      "step": 2310480
    },
    {
      "epoch": 3.7811839254269684,
      "grad_norm": 0.27595409750938416,
      "learning_rate": 2.453694246950838e-06,
      "loss": 0.0096,
      "step": 2310500
    },
    {
      "epoch": 3.781216655865622,
      "grad_norm": 0.253200501203537,
      "learning_rate": 2.453628354737321e-06,
      "loss": 0.0095,
      "step": 2310520
    },
    {
      "epoch": 3.781249386304275,
      "grad_norm": 0.06735889613628387,
      "learning_rate": 2.453562462523804e-06,
      "loss": 0.0109,
      "step": 2310540
    },
    {
      "epoch": 3.7812821167429287,
      "grad_norm": 0.1518048197031021,
      "learning_rate": 2.4534965703102867e-06,
      "loss": 0.0129,
      "step": 2310560
    },
    {
      "epoch": 3.781314847181582,
      "grad_norm": 0.10783892124891281,
      "learning_rate": 2.4534306780967695e-06,
      "loss": 0.0107,
      "step": 2310580
    },
    {
      "epoch": 3.7813475776202354,
      "grad_norm": 0.10790544748306274,
      "learning_rate": 2.4533647858832522e-06,
      "loss": 0.0106,
      "step": 2310600
    },
    {
      "epoch": 3.7813803080588886,
      "grad_norm": 0.29527947306632996,
      "learning_rate": 2.4532988936697354e-06,
      "loss": 0.0091,
      "step": 2310620
    },
    {
      "epoch": 3.7814130384975417,
      "grad_norm": 0.28232669830322266,
      "learning_rate": 2.453233001456218e-06,
      "loss": 0.0111,
      "step": 2310640
    },
    {
      "epoch": 3.7814457689361953,
      "grad_norm": 0.2352658063173294,
      "learning_rate": 2.453167109242701e-06,
      "loss": 0.0143,
      "step": 2310660
    },
    {
      "epoch": 3.7814784993748485,
      "grad_norm": 0.547275722026825,
      "learning_rate": 2.4531012170291836e-06,
      "loss": 0.0174,
      "step": 2310680
    },
    {
      "epoch": 3.781511229813502,
      "grad_norm": 0.208887979388237,
      "learning_rate": 2.4530353248156668e-06,
      "loss": 0.0086,
      "step": 2310700
    },
    {
      "epoch": 3.7815439602521552,
      "grad_norm": 0.10917378216981888,
      "learning_rate": 2.4529694326021495e-06,
      "loss": 0.0121,
      "step": 2310720
    },
    {
      "epoch": 3.781576690690809,
      "grad_norm": 0.2282593548297882,
      "learning_rate": 2.4529035403886327e-06,
      "loss": 0.0066,
      "step": 2310740
    },
    {
      "epoch": 3.781609421129462,
      "grad_norm": 0.33089104294776917,
      "learning_rate": 2.4528376481751154e-06,
      "loss": 0.0091,
      "step": 2310760
    },
    {
      "epoch": 3.781642151568115,
      "grad_norm": 0.1226295530796051,
      "learning_rate": 2.452771755961598e-06,
      "loss": 0.0094,
      "step": 2310780
    },
    {
      "epoch": 3.7816748820067687,
      "grad_norm": 0.174698606133461,
      "learning_rate": 2.452705863748081e-06,
      "loss": 0.0078,
      "step": 2310800
    },
    {
      "epoch": 3.781707612445422,
      "grad_norm": 0.8550392985343933,
      "learning_rate": 2.452639971534564e-06,
      "loss": 0.01,
      "step": 2310820
    },
    {
      "epoch": 3.7817403428840755,
      "grad_norm": 0.42494308948516846,
      "learning_rate": 2.452574079321047e-06,
      "loss": 0.0116,
      "step": 2310840
    },
    {
      "epoch": 3.7817730733227286,
      "grad_norm": 0.20025955140590668,
      "learning_rate": 2.4525081871075295e-06,
      "loss": 0.0082,
      "step": 2310860
    },
    {
      "epoch": 3.781805803761382,
      "grad_norm": 0.18926183879375458,
      "learning_rate": 2.4524422948940127e-06,
      "loss": 0.0102,
      "step": 2310880
    },
    {
      "epoch": 3.7818385342000354,
      "grad_norm": 0.5875833034515381,
      "learning_rate": 2.4523764026804954e-06,
      "loss": 0.0141,
      "step": 2310900
    },
    {
      "epoch": 3.7818712646386885,
      "grad_norm": 0.06065623462200165,
      "learning_rate": 2.452310510466978e-06,
      "loss": 0.0125,
      "step": 2310920
    },
    {
      "epoch": 3.781903995077342,
      "grad_norm": 0.9620949625968933,
      "learning_rate": 2.4522446182534613e-06,
      "loss": 0.0111,
      "step": 2310940
    },
    {
      "epoch": 3.7819367255159952,
      "grad_norm": 0.18469199538230896,
      "learning_rate": 2.452178726039944e-06,
      "loss": 0.0106,
      "step": 2310960
    },
    {
      "epoch": 3.781969455954649,
      "grad_norm": 0.1563011109828949,
      "learning_rate": 2.452112833826427e-06,
      "loss": 0.0065,
      "step": 2310980
    },
    {
      "epoch": 3.782002186393302,
      "grad_norm": 0.25931599736213684,
      "learning_rate": 2.4520469416129096e-06,
      "loss": 0.0118,
      "step": 2311000
    },
    {
      "epoch": 3.7820349168319556,
      "grad_norm": 0.4343183934688568,
      "learning_rate": 2.4519810493993927e-06,
      "loss": 0.0122,
      "step": 2311020
    },
    {
      "epoch": 3.7820676472706087,
      "grad_norm": 0.268771231174469,
      "learning_rate": 2.4519151571858755e-06,
      "loss": 0.0083,
      "step": 2311040
    },
    {
      "epoch": 3.782100377709262,
      "grad_norm": 0.5639548301696777,
      "learning_rate": 2.4518492649723586e-06,
      "loss": 0.0126,
      "step": 2311060
    },
    {
      "epoch": 3.7821331081479155,
      "grad_norm": 0.11370983719825745,
      "learning_rate": 2.4517833727588414e-06,
      "loss": 0.013,
      "step": 2311080
    },
    {
      "epoch": 3.7821658385865686,
      "grad_norm": 1.0320253372192383,
      "learning_rate": 2.451717480545324e-06,
      "loss": 0.0114,
      "step": 2311100
    },
    {
      "epoch": 3.7821985690252222,
      "grad_norm": 0.15307027101516724,
      "learning_rate": 2.451651588331807e-06,
      "loss": 0.0083,
      "step": 2311120
    },
    {
      "epoch": 3.7822312994638754,
      "grad_norm": 0.3949885964393616,
      "learning_rate": 2.45158569611829e-06,
      "loss": 0.0184,
      "step": 2311140
    },
    {
      "epoch": 3.782264029902529,
      "grad_norm": 1.2458845376968384,
      "learning_rate": 2.4515198039047727e-06,
      "loss": 0.0083,
      "step": 2311160
    },
    {
      "epoch": 3.782296760341182,
      "grad_norm": 0.12367179244756699,
      "learning_rate": 2.4514539116912555e-06,
      "loss": 0.0074,
      "step": 2311180
    },
    {
      "epoch": 3.7823294907798353,
      "grad_norm": 0.28714773058891296,
      "learning_rate": 2.4513880194777382e-06,
      "loss": 0.0133,
      "step": 2311200
    },
    {
      "epoch": 3.782362221218489,
      "grad_norm": 0.5050169229507446,
      "learning_rate": 2.4513221272642214e-06,
      "loss": 0.0131,
      "step": 2311220
    },
    {
      "epoch": 3.782394951657142,
      "grad_norm": 0.15084806084632874,
      "learning_rate": 2.4512562350507046e-06,
      "loss": 0.0068,
      "step": 2311240
    },
    {
      "epoch": 3.7824276820957956,
      "grad_norm": 0.24943698942661285,
      "learning_rate": 2.4511903428371873e-06,
      "loss": 0.0139,
      "step": 2311260
    },
    {
      "epoch": 3.7824604125344488,
      "grad_norm": 0.24476765096187592,
      "learning_rate": 2.45112445062367e-06,
      "loss": 0.009,
      "step": 2311280
    },
    {
      "epoch": 3.7824931429731024,
      "grad_norm": 0.2042616456747055,
      "learning_rate": 2.4510585584101528e-06,
      "loss": 0.0157,
      "step": 2311300
    },
    {
      "epoch": 3.7825258734117555,
      "grad_norm": 0.05385712906718254,
      "learning_rate": 2.450992666196636e-06,
      "loss": 0.0138,
      "step": 2311320
    },
    {
      "epoch": 3.7825586038504087,
      "grad_norm": 0.0969390794634819,
      "learning_rate": 2.4509267739831187e-06,
      "loss": 0.0083,
      "step": 2311340
    },
    {
      "epoch": 3.7825913342890622,
      "grad_norm": 0.5663461685180664,
      "learning_rate": 2.4508608817696014e-06,
      "loss": 0.0089,
      "step": 2311360
    },
    {
      "epoch": 3.7826240647277154,
      "grad_norm": 0.33028537034988403,
      "learning_rate": 2.450794989556084e-06,
      "loss": 0.012,
      "step": 2311380
    },
    {
      "epoch": 3.7826567951663685,
      "grad_norm": 0.44462794065475464,
      "learning_rate": 2.4507290973425673e-06,
      "loss": 0.0107,
      "step": 2311400
    },
    {
      "epoch": 3.782689525605022,
      "grad_norm": 0.2666664123535156,
      "learning_rate": 2.45066320512905e-06,
      "loss": 0.0111,
      "step": 2311420
    },
    {
      "epoch": 3.7827222560436757,
      "grad_norm": 0.36126184463500977,
      "learning_rate": 2.4505973129155332e-06,
      "loss": 0.0096,
      "step": 2311440
    },
    {
      "epoch": 3.782754986482329,
      "grad_norm": 0.05158156901597977,
      "learning_rate": 2.450531420702016e-06,
      "loss": 0.009,
      "step": 2311460
    },
    {
      "epoch": 3.782787716920982,
      "grad_norm": 0.6599988341331482,
      "learning_rate": 2.4504655284884987e-06,
      "loss": 0.0103,
      "step": 2311480
    },
    {
      "epoch": 3.7828204473596356,
      "grad_norm": 0.5840439200401306,
      "learning_rate": 2.4503996362749814e-06,
      "loss": 0.0088,
      "step": 2311500
    },
    {
      "epoch": 3.782853177798289,
      "grad_norm": 0.25795331597328186,
      "learning_rate": 2.4503337440614646e-06,
      "loss": 0.0101,
      "step": 2311520
    },
    {
      "epoch": 3.782885908236942,
      "grad_norm": 0.17147019505500793,
      "learning_rate": 2.4502678518479473e-06,
      "loss": 0.0096,
      "step": 2311540
    },
    {
      "epoch": 3.7829186386755955,
      "grad_norm": 0.2550518810749054,
      "learning_rate": 2.45020195963443e-06,
      "loss": 0.0163,
      "step": 2311560
    },
    {
      "epoch": 3.782951369114249,
      "grad_norm": 0.20346210896968842,
      "learning_rate": 2.4501360674209132e-06,
      "loss": 0.0134,
      "step": 2311580
    },
    {
      "epoch": 3.7829840995529023,
      "grad_norm": 0.2093643844127655,
      "learning_rate": 2.450070175207396e-06,
      "loss": 0.0091,
      "step": 2311600
    },
    {
      "epoch": 3.7830168299915554,
      "grad_norm": 0.8050775527954102,
      "learning_rate": 2.4500042829938787e-06,
      "loss": 0.0132,
      "step": 2311620
    },
    {
      "epoch": 3.783049560430209,
      "grad_norm": 0.7414385080337524,
      "learning_rate": 2.449938390780362e-06,
      "loss": 0.0109,
      "step": 2311640
    },
    {
      "epoch": 3.783082290868862,
      "grad_norm": 0.6970232129096985,
      "learning_rate": 2.4498724985668446e-06,
      "loss": 0.011,
      "step": 2311660
    },
    {
      "epoch": 3.7831150213075153,
      "grad_norm": 0.17534887790679932,
      "learning_rate": 2.4498066063533274e-06,
      "loss": 0.0117,
      "step": 2311680
    },
    {
      "epoch": 3.783147751746169,
      "grad_norm": 0.3651632070541382,
      "learning_rate": 2.44974071413981e-06,
      "loss": 0.0143,
      "step": 2311700
    },
    {
      "epoch": 3.7831804821848225,
      "grad_norm": 0.21134544909000397,
      "learning_rate": 2.4496748219262933e-06,
      "loss": 0.0126,
      "step": 2311720
    },
    {
      "epoch": 3.7832132126234757,
      "grad_norm": 1.0160847902297974,
      "learning_rate": 2.449608929712776e-06,
      "loss": 0.01,
      "step": 2311740
    },
    {
      "epoch": 3.783245943062129,
      "grad_norm": 0.8665460348129272,
      "learning_rate": 2.449543037499259e-06,
      "loss": 0.014,
      "step": 2311760
    },
    {
      "epoch": 3.7832786735007824,
      "grad_norm": 0.5425889492034912,
      "learning_rate": 2.449477145285742e-06,
      "loss": 0.0132,
      "step": 2311780
    },
    {
      "epoch": 3.7833114039394355,
      "grad_norm": 0.2375030368566513,
      "learning_rate": 2.4494112530722247e-06,
      "loss": 0.0088,
      "step": 2311800
    },
    {
      "epoch": 3.7833441343780887,
      "grad_norm": 0.5053090453147888,
      "learning_rate": 2.4493453608587074e-06,
      "loss": 0.0094,
      "step": 2311820
    },
    {
      "epoch": 3.7833768648167423,
      "grad_norm": 0.26351994276046753,
      "learning_rate": 2.4492794686451906e-06,
      "loss": 0.0112,
      "step": 2311840
    },
    {
      "epoch": 3.783409595255396,
      "grad_norm": 0.3771398365497589,
      "learning_rate": 2.4492135764316733e-06,
      "loss": 0.0089,
      "step": 2311860
    },
    {
      "epoch": 3.783442325694049,
      "grad_norm": 0.4920268654823303,
      "learning_rate": 2.449147684218156e-06,
      "loss": 0.0071,
      "step": 2311880
    },
    {
      "epoch": 3.783475056132702,
      "grad_norm": 0.22989027202129364,
      "learning_rate": 2.4490817920046388e-06,
      "loss": 0.0099,
      "step": 2311900
    },
    {
      "epoch": 3.783507786571356,
      "grad_norm": 0.44451457262039185,
      "learning_rate": 2.449015899791122e-06,
      "loss": 0.0123,
      "step": 2311920
    },
    {
      "epoch": 3.783540517010009,
      "grad_norm": 0.1410188525915146,
      "learning_rate": 2.4489500075776047e-06,
      "loss": 0.0095,
      "step": 2311940
    },
    {
      "epoch": 3.783573247448662,
      "grad_norm": 0.16634394228458405,
      "learning_rate": 2.448884115364088e-06,
      "loss": 0.0097,
      "step": 2311960
    },
    {
      "epoch": 3.7836059778873157,
      "grad_norm": 0.07656458765268326,
      "learning_rate": 2.4488182231505706e-06,
      "loss": 0.0101,
      "step": 2311980
    },
    {
      "epoch": 3.783638708325969,
      "grad_norm": 0.2253713309764862,
      "learning_rate": 2.4487523309370533e-06,
      "loss": 0.0099,
      "step": 2312000
    },
    {
      "epoch": 3.7836714387646224,
      "grad_norm": 0.3979957401752472,
      "learning_rate": 2.448686438723536e-06,
      "loss": 0.0098,
      "step": 2312020
    },
    {
      "epoch": 3.7837041692032756,
      "grad_norm": 0.1796497106552124,
      "learning_rate": 2.4486205465100192e-06,
      "loss": 0.0063,
      "step": 2312040
    },
    {
      "epoch": 3.783736899641929,
      "grad_norm": 0.3939685821533203,
      "learning_rate": 2.448554654296502e-06,
      "loss": 0.0141,
      "step": 2312060
    },
    {
      "epoch": 3.7837696300805823,
      "grad_norm": 0.09241961687803268,
      "learning_rate": 2.4484887620829847e-06,
      "loss": 0.011,
      "step": 2312080
    },
    {
      "epoch": 3.7838023605192355,
      "grad_norm": 0.1337425261735916,
      "learning_rate": 2.4484228698694674e-06,
      "loss": 0.0103,
      "step": 2312100
    },
    {
      "epoch": 3.783835090957889,
      "grad_norm": 0.5626499056816101,
      "learning_rate": 2.4483569776559506e-06,
      "loss": 0.0082,
      "step": 2312120
    },
    {
      "epoch": 3.783867821396542,
      "grad_norm": 0.22503915429115295,
      "learning_rate": 2.4482910854424333e-06,
      "loss": 0.0096,
      "step": 2312140
    },
    {
      "epoch": 3.783900551835196,
      "grad_norm": 0.24940496683120728,
      "learning_rate": 2.4482251932289165e-06,
      "loss": 0.01,
      "step": 2312160
    },
    {
      "epoch": 3.783933282273849,
      "grad_norm": 0.1835469901561737,
      "learning_rate": 2.4481593010153992e-06,
      "loss": 0.0072,
      "step": 2312180
    },
    {
      "epoch": 3.7839660127125025,
      "grad_norm": 0.22847002744674683,
      "learning_rate": 2.448093408801882e-06,
      "loss": 0.0137,
      "step": 2312200
    },
    {
      "epoch": 3.7839987431511557,
      "grad_norm": 0.12062345445156097,
      "learning_rate": 2.4480275165883647e-06,
      "loss": 0.0078,
      "step": 2312220
    },
    {
      "epoch": 3.784031473589809,
      "grad_norm": 0.19498269259929657,
      "learning_rate": 2.447961624374848e-06,
      "loss": 0.0131,
      "step": 2312240
    },
    {
      "epoch": 3.7840642040284624,
      "grad_norm": 0.22928620874881744,
      "learning_rate": 2.4478957321613306e-06,
      "loss": 0.0148,
      "step": 2312260
    },
    {
      "epoch": 3.7840969344671156,
      "grad_norm": 0.15777696669101715,
      "learning_rate": 2.447829839947814e-06,
      "loss": 0.0068,
      "step": 2312280
    },
    {
      "epoch": 3.784129664905769,
      "grad_norm": 0.17107394337654114,
      "learning_rate": 2.4477639477342965e-06,
      "loss": 0.0081,
      "step": 2312300
    },
    {
      "epoch": 3.7841623953444223,
      "grad_norm": 0.18245339393615723,
      "learning_rate": 2.4476980555207793e-06,
      "loss": 0.0091,
      "step": 2312320
    },
    {
      "epoch": 3.784195125783076,
      "grad_norm": 0.15743716061115265,
      "learning_rate": 2.447632163307262e-06,
      "loss": 0.0143,
      "step": 2312340
    },
    {
      "epoch": 3.784227856221729,
      "grad_norm": 0.2710492014884949,
      "learning_rate": 2.447566271093745e-06,
      "loss": 0.0102,
      "step": 2312360
    },
    {
      "epoch": 3.7842605866603822,
      "grad_norm": 0.20527110993862152,
      "learning_rate": 2.447500378880228e-06,
      "loss": 0.0065,
      "step": 2312380
    },
    {
      "epoch": 3.784293317099036,
      "grad_norm": 0.7320367097854614,
      "learning_rate": 2.4474344866667107e-06,
      "loss": 0.0101,
      "step": 2312400
    },
    {
      "epoch": 3.784326047537689,
      "grad_norm": 0.39890751242637634,
      "learning_rate": 2.4473685944531934e-06,
      "loss": 0.0102,
      "step": 2312420
    },
    {
      "epoch": 3.7843587779763426,
      "grad_norm": 0.7898322939872742,
      "learning_rate": 2.4473027022396766e-06,
      "loss": 0.0124,
      "step": 2312440
    },
    {
      "epoch": 3.7843915084149957,
      "grad_norm": 0.10659696906805038,
      "learning_rate": 2.4472368100261597e-06,
      "loss": 0.0121,
      "step": 2312460
    },
    {
      "epoch": 3.7844242388536493,
      "grad_norm": 0.6516105532646179,
      "learning_rate": 2.4471709178126425e-06,
      "loss": 0.0089,
      "step": 2312480
    },
    {
      "epoch": 3.7844569692923025,
      "grad_norm": 0.43698498606681824,
      "learning_rate": 2.447105025599125e-06,
      "loss": 0.0168,
      "step": 2312500
    },
    {
      "epoch": 3.7844896997309556,
      "grad_norm": 0.13903328776359558,
      "learning_rate": 2.447039133385608e-06,
      "loss": 0.009,
      "step": 2312520
    },
    {
      "epoch": 3.784522430169609,
      "grad_norm": 0.6065985560417175,
      "learning_rate": 2.446973241172091e-06,
      "loss": 0.0096,
      "step": 2312540
    },
    {
      "epoch": 3.7845551606082624,
      "grad_norm": 0.39402705430984497,
      "learning_rate": 2.446907348958574e-06,
      "loss": 0.0172,
      "step": 2312560
    },
    {
      "epoch": 3.784587891046916,
      "grad_norm": 0.36140602827072144,
      "learning_rate": 2.4468414567450566e-06,
      "loss": 0.0119,
      "step": 2312580
    },
    {
      "epoch": 3.784620621485569,
      "grad_norm": 0.2140001505613327,
      "learning_rate": 2.4467755645315393e-06,
      "loss": 0.0099,
      "step": 2312600
    },
    {
      "epoch": 3.7846533519242227,
      "grad_norm": 0.40153658390045166,
      "learning_rate": 2.446709672318022e-06,
      "loss": 0.013,
      "step": 2312620
    },
    {
      "epoch": 3.784686082362876,
      "grad_norm": 0.34549278020858765,
      "learning_rate": 2.4466437801045052e-06,
      "loss": 0.0153,
      "step": 2312640
    },
    {
      "epoch": 3.784718812801529,
      "grad_norm": 0.2513732612133026,
      "learning_rate": 2.4465778878909884e-06,
      "loss": 0.02,
      "step": 2312660
    },
    {
      "epoch": 3.7847515432401826,
      "grad_norm": 0.3057817220687866,
      "learning_rate": 2.446511995677471e-06,
      "loss": 0.0085,
      "step": 2312680
    },
    {
      "epoch": 3.7847842736788357,
      "grad_norm": 0.37440142035484314,
      "learning_rate": 2.446446103463954e-06,
      "loss": 0.0117,
      "step": 2312700
    },
    {
      "epoch": 3.7848170041174893,
      "grad_norm": 0.6194979548454285,
      "learning_rate": 2.4463802112504366e-06,
      "loss": 0.014,
      "step": 2312720
    },
    {
      "epoch": 3.7848497345561425,
      "grad_norm": 0.5357590913772583,
      "learning_rate": 2.4463143190369198e-06,
      "loss": 0.012,
      "step": 2312740
    },
    {
      "epoch": 3.784882464994796,
      "grad_norm": 0.4060096740722656,
      "learning_rate": 2.4462484268234025e-06,
      "loss": 0.012,
      "step": 2312760
    },
    {
      "epoch": 3.7849151954334492,
      "grad_norm": 0.11900915950536728,
      "learning_rate": 2.4461825346098853e-06,
      "loss": 0.0114,
      "step": 2312780
    },
    {
      "epoch": 3.7849479258721024,
      "grad_norm": 0.35206085443496704,
      "learning_rate": 2.446116642396368e-06,
      "loss": 0.0135,
      "step": 2312800
    },
    {
      "epoch": 3.784980656310756,
      "grad_norm": 0.22279669344425201,
      "learning_rate": 2.446050750182851e-06,
      "loss": 0.0107,
      "step": 2312820
    },
    {
      "epoch": 3.785013386749409,
      "grad_norm": 0.12514492869377136,
      "learning_rate": 2.445984857969334e-06,
      "loss": 0.0117,
      "step": 2312840
    },
    {
      "epoch": 3.7850461171880627,
      "grad_norm": 0.2264503389596939,
      "learning_rate": 2.445918965755817e-06,
      "loss": 0.0064,
      "step": 2312860
    },
    {
      "epoch": 3.785078847626716,
      "grad_norm": 0.7685675024986267,
      "learning_rate": 2.4458530735423e-06,
      "loss": 0.0107,
      "step": 2312880
    },
    {
      "epoch": 3.7851115780653695,
      "grad_norm": 0.06488697230815887,
      "learning_rate": 2.4457871813287825e-06,
      "loss": 0.0136,
      "step": 2312900
    },
    {
      "epoch": 3.7851443085040226,
      "grad_norm": 0.5040580630302429,
      "learning_rate": 2.4457212891152653e-06,
      "loss": 0.0142,
      "step": 2312920
    },
    {
      "epoch": 3.7851770389426758,
      "grad_norm": 0.038263775408267975,
      "learning_rate": 2.4456553969017484e-06,
      "loss": 0.0143,
      "step": 2312940
    },
    {
      "epoch": 3.7852097693813294,
      "grad_norm": 0.08824174106121063,
      "learning_rate": 2.445589504688231e-06,
      "loss": 0.0057,
      "step": 2312960
    },
    {
      "epoch": 3.7852424998199825,
      "grad_norm": 0.39060908555984497,
      "learning_rate": 2.445523612474714e-06,
      "loss": 0.0135,
      "step": 2312980
    },
    {
      "epoch": 3.7852752302586357,
      "grad_norm": 0.3196570575237274,
      "learning_rate": 2.445457720261197e-06,
      "loss": 0.0114,
      "step": 2313000
    },
    {
      "epoch": 3.7853079606972893,
      "grad_norm": 0.3662482500076294,
      "learning_rate": 2.44539182804768e-06,
      "loss": 0.012,
      "step": 2313020
    },
    {
      "epoch": 3.785340691135943,
      "grad_norm": 0.27181336283683777,
      "learning_rate": 2.4453259358341626e-06,
      "loss": 0.0072,
      "step": 2313040
    },
    {
      "epoch": 3.785373421574596,
      "grad_norm": 0.1593528538942337,
      "learning_rate": 2.4452600436206457e-06,
      "loss": 0.0098,
      "step": 2313060
    },
    {
      "epoch": 3.785406152013249,
      "grad_norm": 0.24302974343299866,
      "learning_rate": 2.4451941514071285e-06,
      "loss": 0.0069,
      "step": 2313080
    },
    {
      "epoch": 3.7854388824519027,
      "grad_norm": 0.20562797784805298,
      "learning_rate": 2.445128259193611e-06,
      "loss": 0.0112,
      "step": 2313100
    },
    {
      "epoch": 3.785471612890556,
      "grad_norm": 0.1387012004852295,
      "learning_rate": 2.445062366980094e-06,
      "loss": 0.0136,
      "step": 2313120
    },
    {
      "epoch": 3.785504343329209,
      "grad_norm": 0.2948357164859772,
      "learning_rate": 2.444996474766577e-06,
      "loss": 0.0089,
      "step": 2313140
    },
    {
      "epoch": 3.7855370737678626,
      "grad_norm": 0.42818453907966614,
      "learning_rate": 2.44493058255306e-06,
      "loss": 0.0094,
      "step": 2313160
    },
    {
      "epoch": 3.7855698042065162,
      "grad_norm": 0.15109685063362122,
      "learning_rate": 2.444864690339543e-06,
      "loss": 0.0113,
      "step": 2313180
    },
    {
      "epoch": 3.7856025346451694,
      "grad_norm": 0.06465794891119003,
      "learning_rate": 2.4447987981260258e-06,
      "loss": 0.0115,
      "step": 2313200
    },
    {
      "epoch": 3.7856352650838225,
      "grad_norm": 0.1168505847454071,
      "learning_rate": 2.4447329059125085e-06,
      "loss": 0.0078,
      "step": 2313220
    },
    {
      "epoch": 3.785667995522476,
      "grad_norm": 0.35391953587532043,
      "learning_rate": 2.4446670136989912e-06,
      "loss": 0.0121,
      "step": 2313240
    },
    {
      "epoch": 3.7857007259611293,
      "grad_norm": 0.23438617587089539,
      "learning_rate": 2.4446011214854744e-06,
      "loss": 0.0098,
      "step": 2313260
    },
    {
      "epoch": 3.7857334563997824,
      "grad_norm": 0.42530786991119385,
      "learning_rate": 2.444535229271957e-06,
      "loss": 0.0111,
      "step": 2313280
    },
    {
      "epoch": 3.785766186838436,
      "grad_norm": 0.2117152363061905,
      "learning_rate": 2.44446933705844e-06,
      "loss": 0.0117,
      "step": 2313300
    },
    {
      "epoch": 3.7857989172770896,
      "grad_norm": 0.18320615589618683,
      "learning_rate": 2.4444034448449226e-06,
      "loss": 0.0123,
      "step": 2313320
    },
    {
      "epoch": 3.7858316477157428,
      "grad_norm": 0.287166953086853,
      "learning_rate": 2.4443375526314058e-06,
      "loss": 0.0114,
      "step": 2313340
    },
    {
      "epoch": 3.785864378154396,
      "grad_norm": 0.09241136163473129,
      "learning_rate": 2.4442716604178885e-06,
      "loss": 0.0097,
      "step": 2313360
    },
    {
      "epoch": 3.7858971085930495,
      "grad_norm": 0.37841248512268066,
      "learning_rate": 2.4442057682043717e-06,
      "loss": 0.0088,
      "step": 2313380
    },
    {
      "epoch": 3.7859298390317027,
      "grad_norm": 0.18134470283985138,
      "learning_rate": 2.4441398759908544e-06,
      "loss": 0.0112,
      "step": 2313400
    },
    {
      "epoch": 3.785962569470356,
      "grad_norm": 0.1254190355539322,
      "learning_rate": 2.444073983777337e-06,
      "loss": 0.0095,
      "step": 2313420
    },
    {
      "epoch": 3.7859952999090094,
      "grad_norm": 0.4905415177345276,
      "learning_rate": 2.44400809156382e-06,
      "loss": 0.0188,
      "step": 2313440
    },
    {
      "epoch": 3.7860280303476626,
      "grad_norm": 0.22306393086910248,
      "learning_rate": 2.443942199350303e-06,
      "loss": 0.0132,
      "step": 2313460
    },
    {
      "epoch": 3.786060760786316,
      "grad_norm": 0.22707055509090424,
      "learning_rate": 2.443876307136786e-06,
      "loss": 0.0137,
      "step": 2313480
    },
    {
      "epoch": 3.7860934912249693,
      "grad_norm": 0.30748307704925537,
      "learning_rate": 2.4438104149232685e-06,
      "loss": 0.0106,
      "step": 2313500
    },
    {
      "epoch": 3.786126221663623,
      "grad_norm": 0.17640362679958344,
      "learning_rate": 2.4437445227097517e-06,
      "loss": 0.0113,
      "step": 2313520
    },
    {
      "epoch": 3.786158952102276,
      "grad_norm": 0.4820750653743744,
      "learning_rate": 2.4436786304962344e-06,
      "loss": 0.0071,
      "step": 2313540
    },
    {
      "epoch": 3.786191682540929,
      "grad_norm": 0.2837844491004944,
      "learning_rate": 2.443612738282717e-06,
      "loss": 0.0098,
      "step": 2313560
    },
    {
      "epoch": 3.786224412979583,
      "grad_norm": 1.182172417640686,
      "learning_rate": 2.4435468460692003e-06,
      "loss": 0.0123,
      "step": 2313580
    },
    {
      "epoch": 3.786257143418236,
      "grad_norm": 0.5043084025382996,
      "learning_rate": 2.443480953855683e-06,
      "loss": 0.0106,
      "step": 2313600
    },
    {
      "epoch": 3.7862898738568895,
      "grad_norm": 0.14689841866493225,
      "learning_rate": 2.443415061642166e-06,
      "loss": 0.0098,
      "step": 2313620
    },
    {
      "epoch": 3.7863226042955427,
      "grad_norm": 0.08809099346399307,
      "learning_rate": 2.4433491694286486e-06,
      "loss": 0.0127,
      "step": 2313640
    },
    {
      "epoch": 3.7863553347341963,
      "grad_norm": 0.0568854995071888,
      "learning_rate": 2.4432832772151317e-06,
      "loss": 0.0097,
      "step": 2313660
    },
    {
      "epoch": 3.7863880651728494,
      "grad_norm": 0.39611726999282837,
      "learning_rate": 2.4432173850016145e-06,
      "loss": 0.0172,
      "step": 2313680
    },
    {
      "epoch": 3.7864207956115026,
      "grad_norm": 0.28050824999809265,
      "learning_rate": 2.4431514927880976e-06,
      "loss": 0.0117,
      "step": 2313700
    },
    {
      "epoch": 3.786453526050156,
      "grad_norm": 0.33829110860824585,
      "learning_rate": 2.4430856005745804e-06,
      "loss": 0.0162,
      "step": 2313720
    },
    {
      "epoch": 3.7864862564888093,
      "grad_norm": 0.4869016408920288,
      "learning_rate": 2.443019708361063e-06,
      "loss": 0.0125,
      "step": 2313740
    },
    {
      "epoch": 3.786518986927463,
      "grad_norm": 0.15335257351398468,
      "learning_rate": 2.442953816147546e-06,
      "loss": 0.0093,
      "step": 2313760
    },
    {
      "epoch": 3.786551717366116,
      "grad_norm": 0.39812323451042175,
      "learning_rate": 2.442887923934029e-06,
      "loss": 0.0085,
      "step": 2313780
    },
    {
      "epoch": 3.7865844478047697,
      "grad_norm": 0.32319924235343933,
      "learning_rate": 2.4428220317205118e-06,
      "loss": 0.0102,
      "step": 2313800
    },
    {
      "epoch": 3.786617178243423,
      "grad_norm": 0.40690168738365173,
      "learning_rate": 2.4427561395069945e-06,
      "loss": 0.0098,
      "step": 2313820
    },
    {
      "epoch": 3.786649908682076,
      "grad_norm": 0.2668001651763916,
      "learning_rate": 2.4426902472934772e-06,
      "loss": 0.0118,
      "step": 2313840
    },
    {
      "epoch": 3.7866826391207296,
      "grad_norm": 0.38602912425994873,
      "learning_rate": 2.4426243550799604e-06,
      "loss": 0.0099,
      "step": 2313860
    },
    {
      "epoch": 3.7867153695593827,
      "grad_norm": 0.1270553022623062,
      "learning_rate": 2.4425584628664436e-06,
      "loss": 0.0111,
      "step": 2313880
    },
    {
      "epoch": 3.7867480999980363,
      "grad_norm": 0.18961186707019806,
      "learning_rate": 2.4424925706529263e-06,
      "loss": 0.0125,
      "step": 2313900
    },
    {
      "epoch": 3.7867808304366894,
      "grad_norm": 0.07083318382501602,
      "learning_rate": 2.442426678439409e-06,
      "loss": 0.0096,
      "step": 2313920
    },
    {
      "epoch": 3.786813560875343,
      "grad_norm": 0.1203942820429802,
      "learning_rate": 2.4423607862258918e-06,
      "loss": 0.0078,
      "step": 2313940
    },
    {
      "epoch": 3.786846291313996,
      "grad_norm": 0.40350499749183655,
      "learning_rate": 2.442294894012375e-06,
      "loss": 0.0136,
      "step": 2313960
    },
    {
      "epoch": 3.7868790217526493,
      "grad_norm": 0.17518386244773865,
      "learning_rate": 2.4422290017988577e-06,
      "loss": 0.0104,
      "step": 2313980
    },
    {
      "epoch": 3.786911752191303,
      "grad_norm": 0.3543531000614166,
      "learning_rate": 2.4421631095853404e-06,
      "loss": 0.0108,
      "step": 2314000
    },
    {
      "epoch": 3.786944482629956,
      "grad_norm": 0.20938752591609955,
      "learning_rate": 2.442097217371823e-06,
      "loss": 0.014,
      "step": 2314020
    },
    {
      "epoch": 3.7869772130686097,
      "grad_norm": 0.1644185185432434,
      "learning_rate": 2.4420313251583063e-06,
      "loss": 0.0106,
      "step": 2314040
    },
    {
      "epoch": 3.787009943507263,
      "grad_norm": 0.16554324328899384,
      "learning_rate": 2.441965432944789e-06,
      "loss": 0.0129,
      "step": 2314060
    },
    {
      "epoch": 3.7870426739459164,
      "grad_norm": 0.45000556111335754,
      "learning_rate": 2.4418995407312722e-06,
      "loss": 0.0084,
      "step": 2314080
    },
    {
      "epoch": 3.7870754043845696,
      "grad_norm": 0.2947816252708435,
      "learning_rate": 2.441833648517755e-06,
      "loss": 0.012,
      "step": 2314100
    },
    {
      "epoch": 3.7871081348232227,
      "grad_norm": 0.8809424638748169,
      "learning_rate": 2.4417677563042377e-06,
      "loss": 0.008,
      "step": 2314120
    },
    {
      "epoch": 3.7871408652618763,
      "grad_norm": 0.262626051902771,
      "learning_rate": 2.4417018640907205e-06,
      "loss": 0.0131,
      "step": 2314140
    },
    {
      "epoch": 3.7871735957005295,
      "grad_norm": 0.16444803774356842,
      "learning_rate": 2.4416359718772036e-06,
      "loss": 0.0069,
      "step": 2314160
    },
    {
      "epoch": 3.787206326139183,
      "grad_norm": 0.4600597321987152,
      "learning_rate": 2.4415700796636864e-06,
      "loss": 0.0151,
      "step": 2314180
    },
    {
      "epoch": 3.787239056577836,
      "grad_norm": 0.5799981355667114,
      "learning_rate": 2.441504187450169e-06,
      "loss": 0.0153,
      "step": 2314200
    },
    {
      "epoch": 3.78727178701649,
      "grad_norm": 0.32636135816574097,
      "learning_rate": 2.4414382952366523e-06,
      "loss": 0.0064,
      "step": 2314220
    },
    {
      "epoch": 3.787304517455143,
      "grad_norm": 0.3311842381954193,
      "learning_rate": 2.441372403023135e-06,
      "loss": 0.0099,
      "step": 2314240
    },
    {
      "epoch": 3.787337247893796,
      "grad_norm": 0.19030295312404633,
      "learning_rate": 2.4413065108096177e-06,
      "loss": 0.0113,
      "step": 2314260
    },
    {
      "epoch": 3.7873699783324497,
      "grad_norm": 0.08068190515041351,
      "learning_rate": 2.441240618596101e-06,
      "loss": 0.0122,
      "step": 2314280
    },
    {
      "epoch": 3.787402708771103,
      "grad_norm": 0.14514900743961334,
      "learning_rate": 2.4411747263825836e-06,
      "loss": 0.0093,
      "step": 2314300
    },
    {
      "epoch": 3.7874354392097564,
      "grad_norm": 0.0875183716416359,
      "learning_rate": 2.4411088341690664e-06,
      "loss": 0.0108,
      "step": 2314320
    },
    {
      "epoch": 3.7874681696484096,
      "grad_norm": 0.254260778427124,
      "learning_rate": 2.441042941955549e-06,
      "loss": 0.0091,
      "step": 2314340
    },
    {
      "epoch": 3.787500900087063,
      "grad_norm": 0.345440536737442,
      "learning_rate": 2.4409770497420323e-06,
      "loss": 0.0101,
      "step": 2314360
    },
    {
      "epoch": 3.7875336305257163,
      "grad_norm": 0.20087911188602448,
      "learning_rate": 2.440911157528515e-06,
      "loss": 0.0157,
      "step": 2314380
    },
    {
      "epoch": 3.7875663609643695,
      "grad_norm": 0.23818407952785492,
      "learning_rate": 2.440845265314998e-06,
      "loss": 0.0066,
      "step": 2314400
    },
    {
      "epoch": 3.787599091403023,
      "grad_norm": 0.5744062066078186,
      "learning_rate": 2.440779373101481e-06,
      "loss": 0.0135,
      "step": 2314420
    },
    {
      "epoch": 3.7876318218416762,
      "grad_norm": 0.06695389002561569,
      "learning_rate": 2.4407134808879637e-06,
      "loss": 0.0085,
      "step": 2314440
    },
    {
      "epoch": 3.7876645522803294,
      "grad_norm": 0.21121928095817566,
      "learning_rate": 2.4406475886744464e-06,
      "loss": 0.0113,
      "step": 2314460
    },
    {
      "epoch": 3.787697282718983,
      "grad_norm": 0.24095283448696136,
      "learning_rate": 2.4405816964609296e-06,
      "loss": 0.0152,
      "step": 2314480
    },
    {
      "epoch": 3.7877300131576366,
      "grad_norm": 0.13245388865470886,
      "learning_rate": 2.4405158042474123e-06,
      "loss": 0.0107,
      "step": 2314500
    },
    {
      "epoch": 3.7877627435962897,
      "grad_norm": 0.3149229884147644,
      "learning_rate": 2.440449912033895e-06,
      "loss": 0.0097,
      "step": 2314520
    },
    {
      "epoch": 3.787795474034943,
      "grad_norm": 0.16263972222805023,
      "learning_rate": 2.4403840198203778e-06,
      "loss": 0.0131,
      "step": 2314540
    },
    {
      "epoch": 3.7878282044735965,
      "grad_norm": 0.169162780046463,
      "learning_rate": 2.440318127606861e-06,
      "loss": 0.0099,
      "step": 2314560
    },
    {
      "epoch": 3.7878609349122496,
      "grad_norm": 0.3064083755016327,
      "learning_rate": 2.4402522353933437e-06,
      "loss": 0.0181,
      "step": 2314580
    },
    {
      "epoch": 3.7878936653509028,
      "grad_norm": 0.2222791612148285,
      "learning_rate": 2.440186343179827e-06,
      "loss": 0.0114,
      "step": 2314600
    },
    {
      "epoch": 3.7879263957895564,
      "grad_norm": 0.6564168334007263,
      "learning_rate": 2.4401204509663096e-06,
      "loss": 0.0149,
      "step": 2314620
    },
    {
      "epoch": 3.78795912622821,
      "grad_norm": 1.7209522724151611,
      "learning_rate": 2.4400545587527923e-06,
      "loss": 0.0105,
      "step": 2314640
    },
    {
      "epoch": 3.787991856666863,
      "grad_norm": 0.15118180215358734,
      "learning_rate": 2.439988666539275e-06,
      "loss": 0.0082,
      "step": 2314660
    },
    {
      "epoch": 3.7880245871055163,
      "grad_norm": 0.4965406656265259,
      "learning_rate": 2.4399227743257582e-06,
      "loss": 0.0134,
      "step": 2314680
    },
    {
      "epoch": 3.78805731754417,
      "grad_norm": 0.214422345161438,
      "learning_rate": 2.439856882112241e-06,
      "loss": 0.0087,
      "step": 2314700
    },
    {
      "epoch": 3.788090047982823,
      "grad_norm": 0.14858299493789673,
      "learning_rate": 2.4397909898987237e-06,
      "loss": 0.0109,
      "step": 2314720
    },
    {
      "epoch": 3.788122778421476,
      "grad_norm": 0.6949650049209595,
      "learning_rate": 2.4397250976852065e-06,
      "loss": 0.0137,
      "step": 2314740
    },
    {
      "epoch": 3.7881555088601297,
      "grad_norm": 0.0741487443447113,
      "learning_rate": 2.4396592054716896e-06,
      "loss": 0.0113,
      "step": 2314760
    },
    {
      "epoch": 3.7881882392987833,
      "grad_norm": 0.9117729067802429,
      "learning_rate": 2.4395933132581724e-06,
      "loss": 0.0121,
      "step": 2314780
    },
    {
      "epoch": 3.7882209697374365,
      "grad_norm": 0.2951742112636566,
      "learning_rate": 2.4395274210446555e-06,
      "loss": 0.0086,
      "step": 2314800
    },
    {
      "epoch": 3.7882537001760896,
      "grad_norm": 0.15478871762752533,
      "learning_rate": 2.4394615288311383e-06,
      "loss": 0.0091,
      "step": 2314820
    },
    {
      "epoch": 3.7882864306147432,
      "grad_norm": 0.2155957818031311,
      "learning_rate": 2.439395636617621e-06,
      "loss": 0.0079,
      "step": 2314840
    },
    {
      "epoch": 3.7883191610533964,
      "grad_norm": 0.16719745099544525,
      "learning_rate": 2.4393297444041037e-06,
      "loss": 0.0088,
      "step": 2314860
    },
    {
      "epoch": 3.7883518914920495,
      "grad_norm": 0.43136656284332275,
      "learning_rate": 2.439263852190587e-06,
      "loss": 0.0123,
      "step": 2314880
    },
    {
      "epoch": 3.788384621930703,
      "grad_norm": 0.13138598203659058,
      "learning_rate": 2.4391979599770696e-06,
      "loss": 0.0063,
      "step": 2314900
    },
    {
      "epoch": 3.7884173523693567,
      "grad_norm": 0.18064922094345093,
      "learning_rate": 2.439132067763553e-06,
      "loss": 0.0073,
      "step": 2314920
    },
    {
      "epoch": 3.78845008280801,
      "grad_norm": 0.31039950251579285,
      "learning_rate": 2.4390661755500355e-06,
      "loss": 0.0147,
      "step": 2314940
    },
    {
      "epoch": 3.788482813246663,
      "grad_norm": 0.2133849412202835,
      "learning_rate": 2.4390002833365183e-06,
      "loss": 0.012,
      "step": 2314960
    },
    {
      "epoch": 3.7885155436853166,
      "grad_norm": 0.3146841824054718,
      "learning_rate": 2.438934391123001e-06,
      "loss": 0.0111,
      "step": 2314980
    },
    {
      "epoch": 3.7885482741239698,
      "grad_norm": 0.22723662853240967,
      "learning_rate": 2.438868498909484e-06,
      "loss": 0.0124,
      "step": 2315000
    },
    {
      "epoch": 3.788581004562623,
      "grad_norm": 0.293554425239563,
      "learning_rate": 2.438802606695967e-06,
      "loss": 0.012,
      "step": 2315020
    },
    {
      "epoch": 3.7886137350012765,
      "grad_norm": 0.7273106575012207,
      "learning_rate": 2.4387367144824497e-06,
      "loss": 0.0082,
      "step": 2315040
    },
    {
      "epoch": 3.7886464654399297,
      "grad_norm": 0.235815167427063,
      "learning_rate": 2.4386708222689324e-06,
      "loss": 0.0091,
      "step": 2315060
    },
    {
      "epoch": 3.7886791958785833,
      "grad_norm": 0.3386314809322357,
      "learning_rate": 2.4386049300554156e-06,
      "loss": 0.0124,
      "step": 2315080
    },
    {
      "epoch": 3.7887119263172364,
      "grad_norm": 0.25169262290000916,
      "learning_rate": 2.4385390378418987e-06,
      "loss": 0.0125,
      "step": 2315100
    },
    {
      "epoch": 3.78874465675589,
      "grad_norm": 0.17977389693260193,
      "learning_rate": 2.4384731456283815e-06,
      "loss": 0.0107,
      "step": 2315120
    },
    {
      "epoch": 3.788777387194543,
      "grad_norm": 0.14970341324806213,
      "learning_rate": 2.4384072534148642e-06,
      "loss": 0.0101,
      "step": 2315140
    },
    {
      "epoch": 3.7888101176331963,
      "grad_norm": 0.11143428832292557,
      "learning_rate": 2.438341361201347e-06,
      "loss": 0.0089,
      "step": 2315160
    },
    {
      "epoch": 3.78884284807185,
      "grad_norm": 0.13576292991638184,
      "learning_rate": 2.43827546898783e-06,
      "loss": 0.0154,
      "step": 2315180
    },
    {
      "epoch": 3.788875578510503,
      "grad_norm": 0.6284812688827515,
      "learning_rate": 2.438209576774313e-06,
      "loss": 0.0151,
      "step": 2315200
    },
    {
      "epoch": 3.7889083089491566,
      "grad_norm": 0.37694597244262695,
      "learning_rate": 2.4381436845607956e-06,
      "loss": 0.0151,
      "step": 2315220
    },
    {
      "epoch": 3.78894103938781,
      "grad_norm": 0.4208732545375824,
      "learning_rate": 2.4380777923472783e-06,
      "loss": 0.0084,
      "step": 2315240
    },
    {
      "epoch": 3.7889737698264634,
      "grad_norm": 0.03650302439928055,
      "learning_rate": 2.438011900133761e-06,
      "loss": 0.0086,
      "step": 2315260
    },
    {
      "epoch": 3.7890065002651165,
      "grad_norm": 0.13336911797523499,
      "learning_rate": 2.4379460079202442e-06,
      "loss": 0.008,
      "step": 2315280
    },
    {
      "epoch": 3.7890392307037697,
      "grad_norm": 0.08899250626564026,
      "learning_rate": 2.4378801157067274e-06,
      "loss": 0.0093,
      "step": 2315300
    },
    {
      "epoch": 3.7890719611424233,
      "grad_norm": 0.19961047172546387,
      "learning_rate": 2.43781422349321e-06,
      "loss": 0.0085,
      "step": 2315320
    },
    {
      "epoch": 3.7891046915810764,
      "grad_norm": 0.3114605247974396,
      "learning_rate": 2.437748331279693e-06,
      "loss": 0.0081,
      "step": 2315340
    },
    {
      "epoch": 3.78913742201973,
      "grad_norm": 0.22272253036499023,
      "learning_rate": 2.4376824390661756e-06,
      "loss": 0.0118,
      "step": 2315360
    },
    {
      "epoch": 3.789170152458383,
      "grad_norm": 0.1894923746585846,
      "learning_rate": 2.4376165468526588e-06,
      "loss": 0.017,
      "step": 2315380
    },
    {
      "epoch": 3.7892028828970368,
      "grad_norm": 0.24813492596149445,
      "learning_rate": 2.4375506546391415e-06,
      "loss": 0.0089,
      "step": 2315400
    },
    {
      "epoch": 3.78923561333569,
      "grad_norm": 0.27972936630249023,
      "learning_rate": 2.4374847624256243e-06,
      "loss": 0.009,
      "step": 2315420
    },
    {
      "epoch": 3.789268343774343,
      "grad_norm": 0.14804227650165558,
      "learning_rate": 2.437418870212107e-06,
      "loss": 0.015,
      "step": 2315440
    },
    {
      "epoch": 3.7893010742129967,
      "grad_norm": 0.1645287573337555,
      "learning_rate": 2.43735297799859e-06,
      "loss": 0.0109,
      "step": 2315460
    },
    {
      "epoch": 3.78933380465165,
      "grad_norm": 0.47727495431900024,
      "learning_rate": 2.437287085785073e-06,
      "loss": 0.0106,
      "step": 2315480
    },
    {
      "epoch": 3.7893665350903034,
      "grad_norm": 0.6661205291748047,
      "learning_rate": 2.437221193571556e-06,
      "loss": 0.0199,
      "step": 2315500
    },
    {
      "epoch": 3.7893992655289566,
      "grad_norm": 0.1196862980723381,
      "learning_rate": 2.437155301358039e-06,
      "loss": 0.0111,
      "step": 2315520
    },
    {
      "epoch": 3.78943199596761,
      "grad_norm": 0.14681246876716614,
      "learning_rate": 2.4370894091445215e-06,
      "loss": 0.0102,
      "step": 2315540
    },
    {
      "epoch": 3.7894647264062633,
      "grad_norm": 0.21980011463165283,
      "learning_rate": 2.4370235169310043e-06,
      "loss": 0.0129,
      "step": 2315560
    },
    {
      "epoch": 3.7894974568449165,
      "grad_norm": 0.36303621530532837,
      "learning_rate": 2.4369576247174875e-06,
      "loss": 0.016,
      "step": 2315580
    },
    {
      "epoch": 3.78953018728357,
      "grad_norm": 0.7712767720222473,
      "learning_rate": 2.43689173250397e-06,
      "loss": 0.0094,
      "step": 2315600
    },
    {
      "epoch": 3.789562917722223,
      "grad_norm": 0.07360828667879105,
      "learning_rate": 2.4368258402904534e-06,
      "loss": 0.01,
      "step": 2315620
    },
    {
      "epoch": 3.789595648160877,
      "grad_norm": 0.10689257085323334,
      "learning_rate": 2.436759948076936e-06,
      "loss": 0.0082,
      "step": 2315640
    },
    {
      "epoch": 3.78962837859953,
      "grad_norm": 0.09348177909851074,
      "learning_rate": 2.436694055863419e-06,
      "loss": 0.0115,
      "step": 2315660
    },
    {
      "epoch": 3.7896611090381835,
      "grad_norm": 0.3234717845916748,
      "learning_rate": 2.4366281636499016e-06,
      "loss": 0.0081,
      "step": 2315680
    },
    {
      "epoch": 3.7896938394768367,
      "grad_norm": 0.10551688075065613,
      "learning_rate": 2.4365622714363847e-06,
      "loss": 0.0131,
      "step": 2315700
    },
    {
      "epoch": 3.78972656991549,
      "grad_norm": 0.2958173155784607,
      "learning_rate": 2.4364963792228675e-06,
      "loss": 0.0045,
      "step": 2315720
    },
    {
      "epoch": 3.7897593003541434,
      "grad_norm": 0.18699774146080017,
      "learning_rate": 2.4364304870093502e-06,
      "loss": 0.0098,
      "step": 2315740
    },
    {
      "epoch": 3.7897920307927966,
      "grad_norm": 0.11361392587423325,
      "learning_rate": 2.436364594795833e-06,
      "loss": 0.0099,
      "step": 2315760
    },
    {
      "epoch": 3.78982476123145,
      "grad_norm": 0.08155598491430283,
      "learning_rate": 2.436298702582316e-06,
      "loss": 0.0102,
      "step": 2315780
    },
    {
      "epoch": 3.7898574916701033,
      "grad_norm": 0.07015342265367508,
      "learning_rate": 2.436232810368799e-06,
      "loss": 0.0084,
      "step": 2315800
    },
    {
      "epoch": 3.789890222108757,
      "grad_norm": 0.29858317971229553,
      "learning_rate": 2.436166918155282e-06,
      "loss": 0.0107,
      "step": 2315820
    },
    {
      "epoch": 3.78992295254741,
      "grad_norm": 0.1939954161643982,
      "learning_rate": 2.4361010259417648e-06,
      "loss": 0.0118,
      "step": 2315840
    },
    {
      "epoch": 3.7899556829860632,
      "grad_norm": 0.08694861084222794,
      "learning_rate": 2.4360351337282475e-06,
      "loss": 0.0129,
      "step": 2315860
    },
    {
      "epoch": 3.789988413424717,
      "grad_norm": 0.0977020189166069,
      "learning_rate": 2.4359692415147302e-06,
      "loss": 0.0095,
      "step": 2315880
    },
    {
      "epoch": 3.79002114386337,
      "grad_norm": 0.31601789593696594,
      "learning_rate": 2.4359033493012134e-06,
      "loss": 0.0061,
      "step": 2315900
    },
    {
      "epoch": 3.7900538743020236,
      "grad_norm": 0.487330824136734,
      "learning_rate": 2.435837457087696e-06,
      "loss": 0.01,
      "step": 2315920
    },
    {
      "epoch": 3.7900866047406767,
      "grad_norm": 0.11226335167884827,
      "learning_rate": 2.435771564874179e-06,
      "loss": 0.0117,
      "step": 2315940
    },
    {
      "epoch": 3.7901193351793303,
      "grad_norm": 0.5106651186943054,
      "learning_rate": 2.4357056726606616e-06,
      "loss": 0.0101,
      "step": 2315960
    },
    {
      "epoch": 3.7901520656179835,
      "grad_norm": 0.2609846293926239,
      "learning_rate": 2.435639780447145e-06,
      "loss": 0.0086,
      "step": 2315980
    },
    {
      "epoch": 3.7901847960566366,
      "grad_norm": 0.08047869801521301,
      "learning_rate": 2.4355738882336275e-06,
      "loss": 0.0111,
      "step": 2316000
    },
    {
      "epoch": 3.79021752649529,
      "grad_norm": 0.5073683857917786,
      "learning_rate": 2.4355079960201107e-06,
      "loss": 0.0109,
      "step": 2316020
    },
    {
      "epoch": 3.7902502569339434,
      "grad_norm": 0.1714235097169876,
      "learning_rate": 2.4354421038065934e-06,
      "loss": 0.0085,
      "step": 2316040
    },
    {
      "epoch": 3.7902829873725965,
      "grad_norm": 0.6558829545974731,
      "learning_rate": 2.435376211593076e-06,
      "loss": 0.0162,
      "step": 2316060
    },
    {
      "epoch": 3.79031571781125,
      "grad_norm": 0.2896538972854614,
      "learning_rate": 2.435310319379559e-06,
      "loss": 0.0131,
      "step": 2316080
    },
    {
      "epoch": 3.7903484482499037,
      "grad_norm": 0.18148237466812134,
      "learning_rate": 2.435244427166042e-06,
      "loss": 0.0138,
      "step": 2316100
    },
    {
      "epoch": 3.790381178688557,
      "grad_norm": 0.5591451525688171,
      "learning_rate": 2.435178534952525e-06,
      "loss": 0.0143,
      "step": 2316120
    },
    {
      "epoch": 3.79041390912721,
      "grad_norm": 0.19966623187065125,
      "learning_rate": 2.4351126427390076e-06,
      "loss": 0.012,
      "step": 2316140
    },
    {
      "epoch": 3.7904466395658636,
      "grad_norm": 0.557304322719574,
      "learning_rate": 2.4350467505254907e-06,
      "loss": 0.011,
      "step": 2316160
    },
    {
      "epoch": 3.7904793700045167,
      "grad_norm": 0.19619859755039215,
      "learning_rate": 2.4349808583119735e-06,
      "loss": 0.0101,
      "step": 2316180
    },
    {
      "epoch": 3.79051210044317,
      "grad_norm": 0.4251593053340912,
      "learning_rate": 2.434914966098456e-06,
      "loss": 0.0103,
      "step": 2316200
    },
    {
      "epoch": 3.7905448308818235,
      "grad_norm": 1.1307307481765747,
      "learning_rate": 2.4348490738849394e-06,
      "loss": 0.0172,
      "step": 2316220
    },
    {
      "epoch": 3.790577561320477,
      "grad_norm": 0.2151082456111908,
      "learning_rate": 2.434783181671422e-06,
      "loss": 0.009,
      "step": 2316240
    },
    {
      "epoch": 3.7906102917591302,
      "grad_norm": 0.168418288230896,
      "learning_rate": 2.434717289457905e-06,
      "loss": 0.0108,
      "step": 2316260
    },
    {
      "epoch": 3.7906430221977834,
      "grad_norm": 0.4166876971721649,
      "learning_rate": 2.4346513972443876e-06,
      "loss": 0.0105,
      "step": 2316280
    },
    {
      "epoch": 3.790675752636437,
      "grad_norm": 0.2024472951889038,
      "learning_rate": 2.4345855050308707e-06,
      "loss": 0.0082,
      "step": 2316300
    },
    {
      "epoch": 3.79070848307509,
      "grad_norm": 0.23001541197299957,
      "learning_rate": 2.4345196128173535e-06,
      "loss": 0.0075,
      "step": 2316320
    },
    {
      "epoch": 3.7907412135137433,
      "grad_norm": 0.26870623230934143,
      "learning_rate": 2.4344537206038366e-06,
      "loss": 0.0072,
      "step": 2316340
    },
    {
      "epoch": 3.790773943952397,
      "grad_norm": 0.09078752994537354,
      "learning_rate": 2.4343878283903194e-06,
      "loss": 0.0129,
      "step": 2316360
    },
    {
      "epoch": 3.7908066743910505,
      "grad_norm": 0.17195875942707062,
      "learning_rate": 2.434321936176802e-06,
      "loss": 0.0088,
      "step": 2316380
    },
    {
      "epoch": 3.7908394048297036,
      "grad_norm": 0.7227991223335266,
      "learning_rate": 2.434256043963285e-06,
      "loss": 0.006,
      "step": 2316400
    },
    {
      "epoch": 3.7908721352683568,
      "grad_norm": 0.24738389253616333,
      "learning_rate": 2.434190151749768e-06,
      "loss": 0.01,
      "step": 2316420
    },
    {
      "epoch": 3.7909048657070104,
      "grad_norm": 0.19539041817188263,
      "learning_rate": 2.4341242595362508e-06,
      "loss": 0.0144,
      "step": 2316440
    },
    {
      "epoch": 3.7909375961456635,
      "grad_norm": 0.2285865694284439,
      "learning_rate": 2.4340583673227335e-06,
      "loss": 0.011,
      "step": 2316460
    },
    {
      "epoch": 3.7909703265843167,
      "grad_norm": 0.5065213441848755,
      "learning_rate": 2.4339924751092162e-06,
      "loss": 0.0129,
      "step": 2316480
    },
    {
      "epoch": 3.7910030570229702,
      "grad_norm": 0.6790422797203064,
      "learning_rate": 2.4339265828956994e-06,
      "loss": 0.0152,
      "step": 2316500
    },
    {
      "epoch": 3.7910357874616234,
      "grad_norm": 0.4134131968021393,
      "learning_rate": 2.4338606906821826e-06,
      "loss": 0.0156,
      "step": 2316520
    },
    {
      "epoch": 3.791068517900277,
      "grad_norm": 0.07136930525302887,
      "learning_rate": 2.4337947984686653e-06,
      "loss": 0.0098,
      "step": 2316540
    },
    {
      "epoch": 3.79110124833893,
      "grad_norm": 0.2829034924507141,
      "learning_rate": 2.433728906255148e-06,
      "loss": 0.0176,
      "step": 2316560
    },
    {
      "epoch": 3.7911339787775837,
      "grad_norm": 0.7968271970748901,
      "learning_rate": 2.433663014041631e-06,
      "loss": 0.0104,
      "step": 2316580
    },
    {
      "epoch": 3.791166709216237,
      "grad_norm": 0.5468341708183289,
      "learning_rate": 2.433597121828114e-06,
      "loss": 0.01,
      "step": 2316600
    },
    {
      "epoch": 3.79119943965489,
      "grad_norm": 0.14469455182552338,
      "learning_rate": 2.4335312296145967e-06,
      "loss": 0.0072,
      "step": 2316620
    },
    {
      "epoch": 3.7912321700935436,
      "grad_norm": 0.4117211103439331,
      "learning_rate": 2.4334653374010794e-06,
      "loss": 0.0108,
      "step": 2316640
    },
    {
      "epoch": 3.791264900532197,
      "grad_norm": 0.30215612053871155,
      "learning_rate": 2.433399445187562e-06,
      "loss": 0.0127,
      "step": 2316660
    },
    {
      "epoch": 3.7912976309708504,
      "grad_norm": 0.16108258068561554,
      "learning_rate": 2.4333335529740453e-06,
      "loss": 0.0096,
      "step": 2316680
    },
    {
      "epoch": 3.7913303614095035,
      "grad_norm": 0.7581264972686768,
      "learning_rate": 2.433267660760528e-06,
      "loss": 0.0125,
      "step": 2316700
    },
    {
      "epoch": 3.791363091848157,
      "grad_norm": 0.05224334076046944,
      "learning_rate": 2.4332017685470112e-06,
      "loss": 0.0108,
      "step": 2316720
    },
    {
      "epoch": 3.7913958222868103,
      "grad_norm": 0.4287249445915222,
      "learning_rate": 2.433135876333494e-06,
      "loss": 0.0099,
      "step": 2316740
    },
    {
      "epoch": 3.7914285527254634,
      "grad_norm": 0.23203624784946442,
      "learning_rate": 2.4330699841199767e-06,
      "loss": 0.0152,
      "step": 2316760
    },
    {
      "epoch": 3.791461283164117,
      "grad_norm": 0.22125257551670074,
      "learning_rate": 2.4330040919064595e-06,
      "loss": 0.0082,
      "step": 2316780
    },
    {
      "epoch": 3.79149401360277,
      "grad_norm": 0.17257745563983917,
      "learning_rate": 2.4329381996929426e-06,
      "loss": 0.0125,
      "step": 2316800
    },
    {
      "epoch": 3.7915267440414238,
      "grad_norm": 0.5643719434738159,
      "learning_rate": 2.4328723074794254e-06,
      "loss": 0.0164,
      "step": 2316820
    },
    {
      "epoch": 3.791559474480077,
      "grad_norm": 0.5375087857246399,
      "learning_rate": 2.432806415265908e-06,
      "loss": 0.0107,
      "step": 2316840
    },
    {
      "epoch": 3.7915922049187305,
      "grad_norm": 0.22069814801216125,
      "learning_rate": 2.4327405230523913e-06,
      "loss": 0.0122,
      "step": 2316860
    },
    {
      "epoch": 3.7916249353573837,
      "grad_norm": 0.11317531764507294,
      "learning_rate": 2.432674630838874e-06,
      "loss": 0.0153,
      "step": 2316880
    },
    {
      "epoch": 3.791657665796037,
      "grad_norm": 0.1128881648182869,
      "learning_rate": 2.4326087386253567e-06,
      "loss": 0.0084,
      "step": 2316900
    },
    {
      "epoch": 3.7916903962346904,
      "grad_norm": 0.0813424363732338,
      "learning_rate": 2.43254284641184e-06,
      "loss": 0.0091,
      "step": 2316920
    },
    {
      "epoch": 3.7917231266733435,
      "grad_norm": 0.11238669604063034,
      "learning_rate": 2.4324769541983226e-06,
      "loss": 0.0069,
      "step": 2316940
    },
    {
      "epoch": 3.791755857111997,
      "grad_norm": 0.2283989042043686,
      "learning_rate": 2.4324110619848054e-06,
      "loss": 0.0155,
      "step": 2316960
    },
    {
      "epoch": 3.7917885875506503,
      "grad_norm": 0.08836235851049423,
      "learning_rate": 2.432345169771288e-06,
      "loss": 0.0133,
      "step": 2316980
    },
    {
      "epoch": 3.791821317989304,
      "grad_norm": 0.3012942373752594,
      "learning_rate": 2.4322792775577713e-06,
      "loss": 0.0093,
      "step": 2317000
    },
    {
      "epoch": 3.791854048427957,
      "grad_norm": 0.20513904094696045,
      "learning_rate": 2.432213385344254e-06,
      "loss": 0.0203,
      "step": 2317020
    },
    {
      "epoch": 3.79188677886661,
      "grad_norm": 0.15137754380702972,
      "learning_rate": 2.432147493130737e-06,
      "loss": 0.0102,
      "step": 2317040
    },
    {
      "epoch": 3.791919509305264,
      "grad_norm": 0.1365732103586197,
      "learning_rate": 2.43208160091722e-06,
      "loss": 0.0137,
      "step": 2317060
    },
    {
      "epoch": 3.791952239743917,
      "grad_norm": 0.09621360898017883,
      "learning_rate": 2.4320157087037027e-06,
      "loss": 0.0067,
      "step": 2317080
    },
    {
      "epoch": 3.7919849701825705,
      "grad_norm": 0.07216005772352219,
      "learning_rate": 2.4319498164901854e-06,
      "loss": 0.0102,
      "step": 2317100
    },
    {
      "epoch": 3.7920177006212237,
      "grad_norm": 0.5652552843093872,
      "learning_rate": 2.4318839242766686e-06,
      "loss": 0.0093,
      "step": 2317120
    },
    {
      "epoch": 3.7920504310598773,
      "grad_norm": 0.7971488833427429,
      "learning_rate": 2.4318180320631513e-06,
      "loss": 0.0086,
      "step": 2317140
    },
    {
      "epoch": 3.7920831614985304,
      "grad_norm": 0.264270544052124,
      "learning_rate": 2.431752139849634e-06,
      "loss": 0.0154,
      "step": 2317160
    },
    {
      "epoch": 3.7921158919371836,
      "grad_norm": 1.0187232494354248,
      "learning_rate": 2.431686247636117e-06,
      "loss": 0.0104,
      "step": 2317180
    },
    {
      "epoch": 3.792148622375837,
      "grad_norm": 0.41223037242889404,
      "learning_rate": 2.4316203554226e-06,
      "loss": 0.0093,
      "step": 2317200
    },
    {
      "epoch": 3.7921813528144903,
      "grad_norm": 0.18634052574634552,
      "learning_rate": 2.4315544632090827e-06,
      "loss": 0.0072,
      "step": 2317220
    },
    {
      "epoch": 3.792214083253144,
      "grad_norm": 0.27881142497062683,
      "learning_rate": 2.431488570995566e-06,
      "loss": 0.0112,
      "step": 2317240
    },
    {
      "epoch": 3.792246813691797,
      "grad_norm": 0.19507355988025665,
      "learning_rate": 2.4314226787820486e-06,
      "loss": 0.0125,
      "step": 2317260
    },
    {
      "epoch": 3.7922795441304507,
      "grad_norm": 0.03340528905391693,
      "learning_rate": 2.4313567865685313e-06,
      "loss": 0.0077,
      "step": 2317280
    },
    {
      "epoch": 3.792312274569104,
      "grad_norm": 0.17531482875347137,
      "learning_rate": 2.431290894355014e-06,
      "loss": 0.0092,
      "step": 2317300
    },
    {
      "epoch": 3.792345005007757,
      "grad_norm": 0.1906154453754425,
      "learning_rate": 2.4312250021414972e-06,
      "loss": 0.0078,
      "step": 2317320
    },
    {
      "epoch": 3.7923777354464105,
      "grad_norm": 0.22487619519233704,
      "learning_rate": 2.43115910992798e-06,
      "loss": 0.0116,
      "step": 2317340
    },
    {
      "epoch": 3.7924104658850637,
      "grad_norm": 0.10502377897500992,
      "learning_rate": 2.4310932177144627e-06,
      "loss": 0.0094,
      "step": 2317360
    },
    {
      "epoch": 3.7924431963237173,
      "grad_norm": 0.3601694107055664,
      "learning_rate": 2.431027325500946e-06,
      "loss": 0.0089,
      "step": 2317380
    },
    {
      "epoch": 3.7924759267623704,
      "grad_norm": 0.2220315933227539,
      "learning_rate": 2.4309614332874286e-06,
      "loss": 0.0128,
      "step": 2317400
    },
    {
      "epoch": 3.792508657201024,
      "grad_norm": 0.43152451515197754,
      "learning_rate": 2.4308955410739114e-06,
      "loss": 0.0118,
      "step": 2317420
    },
    {
      "epoch": 3.792541387639677,
      "grad_norm": 0.26114314794540405,
      "learning_rate": 2.4308296488603945e-06,
      "loss": 0.0149,
      "step": 2317440
    },
    {
      "epoch": 3.7925741180783303,
      "grad_norm": 0.13528703153133392,
      "learning_rate": 2.4307637566468773e-06,
      "loss": 0.0108,
      "step": 2317460
    },
    {
      "epoch": 3.792606848516984,
      "grad_norm": 0.1638442575931549,
      "learning_rate": 2.43069786443336e-06,
      "loss": 0.0088,
      "step": 2317480
    },
    {
      "epoch": 3.792639578955637,
      "grad_norm": 0.38971659541130066,
      "learning_rate": 2.4306319722198428e-06,
      "loss": 0.0093,
      "step": 2317500
    },
    {
      "epoch": 3.7926723093942902,
      "grad_norm": 0.1578352153301239,
      "learning_rate": 2.430566080006326e-06,
      "loss": 0.0056,
      "step": 2317520
    },
    {
      "epoch": 3.792705039832944,
      "grad_norm": 0.07636723667383194,
      "learning_rate": 2.4305001877928087e-06,
      "loss": 0.013,
      "step": 2317540
    },
    {
      "epoch": 3.7927377702715974,
      "grad_norm": 0.12470709532499313,
      "learning_rate": 2.430434295579292e-06,
      "loss": 0.0174,
      "step": 2317560
    },
    {
      "epoch": 3.7927705007102506,
      "grad_norm": 0.6355189681053162,
      "learning_rate": 2.4303684033657746e-06,
      "loss": 0.0126,
      "step": 2317580
    },
    {
      "epoch": 3.7928032311489037,
      "grad_norm": 0.2384704202413559,
      "learning_rate": 2.4303025111522573e-06,
      "loss": 0.0128,
      "step": 2317600
    },
    {
      "epoch": 3.7928359615875573,
      "grad_norm": 0.11764278262853622,
      "learning_rate": 2.43023661893874e-06,
      "loss": 0.0089,
      "step": 2317620
    },
    {
      "epoch": 3.7928686920262105,
      "grad_norm": 0.09721159189939499,
      "learning_rate": 2.430170726725223e-06,
      "loss": 0.0146,
      "step": 2317640
    },
    {
      "epoch": 3.7929014224648636,
      "grad_norm": 0.8860117793083191,
      "learning_rate": 2.430104834511706e-06,
      "loss": 0.014,
      "step": 2317660
    },
    {
      "epoch": 3.792934152903517,
      "grad_norm": 0.5145623087882996,
      "learning_rate": 2.4300389422981887e-06,
      "loss": 0.01,
      "step": 2317680
    },
    {
      "epoch": 3.792966883342171,
      "grad_norm": 0.3439122140407562,
      "learning_rate": 2.4299730500846714e-06,
      "loss": 0.0093,
      "step": 2317700
    },
    {
      "epoch": 3.792999613780824,
      "grad_norm": 0.08190323412418365,
      "learning_rate": 2.4299071578711546e-06,
      "loss": 0.0099,
      "step": 2317720
    },
    {
      "epoch": 3.793032344219477,
      "grad_norm": 0.33780327439308167,
      "learning_rate": 2.4298412656576377e-06,
      "loss": 0.0149,
      "step": 2317740
    },
    {
      "epoch": 3.7930650746581307,
      "grad_norm": 0.3234424889087677,
      "learning_rate": 2.4297753734441205e-06,
      "loss": 0.0083,
      "step": 2317760
    },
    {
      "epoch": 3.793097805096784,
      "grad_norm": 0.4084774851799011,
      "learning_rate": 2.4297094812306032e-06,
      "loss": 0.0093,
      "step": 2317780
    },
    {
      "epoch": 3.793130535535437,
      "grad_norm": 0.09573289752006531,
      "learning_rate": 2.429643589017086e-06,
      "loss": 0.0107,
      "step": 2317800
    },
    {
      "epoch": 3.7931632659740906,
      "grad_norm": 0.41497424244880676,
      "learning_rate": 2.429577696803569e-06,
      "loss": 0.0144,
      "step": 2317820
    },
    {
      "epoch": 3.793195996412744,
      "grad_norm": 0.25474658608436584,
      "learning_rate": 2.429511804590052e-06,
      "loss": 0.0084,
      "step": 2317840
    },
    {
      "epoch": 3.7932287268513973,
      "grad_norm": 0.37490954995155334,
      "learning_rate": 2.4294459123765346e-06,
      "loss": 0.0096,
      "step": 2317860
    },
    {
      "epoch": 3.7932614572900505,
      "grad_norm": 0.328797847032547,
      "learning_rate": 2.4293800201630173e-06,
      "loss": 0.0118,
      "step": 2317880
    },
    {
      "epoch": 3.793294187728704,
      "grad_norm": 0.1264488697052002,
      "learning_rate": 2.4293141279495e-06,
      "loss": 0.0087,
      "step": 2317900
    },
    {
      "epoch": 3.7933269181673572,
      "grad_norm": 0.3453097641468048,
      "learning_rate": 2.4292482357359832e-06,
      "loss": 0.0159,
      "step": 2317920
    },
    {
      "epoch": 3.7933596486060104,
      "grad_norm": 0.3066602349281311,
      "learning_rate": 2.4291823435224664e-06,
      "loss": 0.0144,
      "step": 2317940
    },
    {
      "epoch": 3.793392379044664,
      "grad_norm": 0.08988220244646072,
      "learning_rate": 2.429116451308949e-06,
      "loss": 0.0111,
      "step": 2317960
    },
    {
      "epoch": 3.7934251094833176,
      "grad_norm": 0.26292720437049866,
      "learning_rate": 2.429050559095432e-06,
      "loss": 0.0075,
      "step": 2317980
    },
    {
      "epoch": 3.7934578399219707,
      "grad_norm": 0.6157659888267517,
      "learning_rate": 2.4289846668819146e-06,
      "loss": 0.0166,
      "step": 2318000
    },
    {
      "epoch": 3.793490570360624,
      "grad_norm": 0.20694725215435028,
      "learning_rate": 2.428918774668398e-06,
      "loss": 0.0119,
      "step": 2318020
    },
    {
      "epoch": 3.7935233007992775,
      "grad_norm": 0.06303352117538452,
      "learning_rate": 2.4288528824548805e-06,
      "loss": 0.0104,
      "step": 2318040
    },
    {
      "epoch": 3.7935560312379306,
      "grad_norm": 0.3885863721370697,
      "learning_rate": 2.4287869902413633e-06,
      "loss": 0.0129,
      "step": 2318060
    },
    {
      "epoch": 3.7935887616765838,
      "grad_norm": 0.18632328510284424,
      "learning_rate": 2.428721098027846e-06,
      "loss": 0.0133,
      "step": 2318080
    },
    {
      "epoch": 3.7936214921152374,
      "grad_norm": 0.03386944904923439,
      "learning_rate": 2.428655205814329e-06,
      "loss": 0.0146,
      "step": 2318100
    },
    {
      "epoch": 3.7936542225538905,
      "grad_norm": 0.2339356690645218,
      "learning_rate": 2.428589313600812e-06,
      "loss": 0.0119,
      "step": 2318120
    },
    {
      "epoch": 3.793686952992544,
      "grad_norm": 0.16984866559505463,
      "learning_rate": 2.428523421387295e-06,
      "loss": 0.0052,
      "step": 2318140
    },
    {
      "epoch": 3.7937196834311973,
      "grad_norm": 0.10888005793094635,
      "learning_rate": 2.428457529173778e-06,
      "loss": 0.0103,
      "step": 2318160
    },
    {
      "epoch": 3.793752413869851,
      "grad_norm": 0.16819331049919128,
      "learning_rate": 2.4283916369602606e-06,
      "loss": 0.0115,
      "step": 2318180
    },
    {
      "epoch": 3.793785144308504,
      "grad_norm": 0.23004230856895447,
      "learning_rate": 2.4283257447467433e-06,
      "loss": 0.0122,
      "step": 2318200
    },
    {
      "epoch": 3.793817874747157,
      "grad_norm": 0.6060411334037781,
      "learning_rate": 2.4282598525332265e-06,
      "loss": 0.0131,
      "step": 2318220
    },
    {
      "epoch": 3.7938506051858107,
      "grad_norm": 0.20844724774360657,
      "learning_rate": 2.428193960319709e-06,
      "loss": 0.0081,
      "step": 2318240
    },
    {
      "epoch": 3.793883335624464,
      "grad_norm": 0.11817259341478348,
      "learning_rate": 2.4281280681061924e-06,
      "loss": 0.0103,
      "step": 2318260
    },
    {
      "epoch": 3.7939160660631175,
      "grad_norm": 0.1145097091794014,
      "learning_rate": 2.428062175892675e-06,
      "loss": 0.007,
      "step": 2318280
    },
    {
      "epoch": 3.7939487965017706,
      "grad_norm": 0.20942260324954987,
      "learning_rate": 2.427996283679158e-06,
      "loss": 0.0124,
      "step": 2318300
    },
    {
      "epoch": 3.7939815269404242,
      "grad_norm": 0.13408984243869781,
      "learning_rate": 2.4279303914656406e-06,
      "loss": 0.0145,
      "step": 2318320
    },
    {
      "epoch": 3.7940142573790774,
      "grad_norm": 0.06721151620149612,
      "learning_rate": 2.4278644992521237e-06,
      "loss": 0.0132,
      "step": 2318340
    },
    {
      "epoch": 3.7940469878177305,
      "grad_norm": 0.3101494014263153,
      "learning_rate": 2.4277986070386065e-06,
      "loss": 0.0111,
      "step": 2318360
    },
    {
      "epoch": 3.794079718256384,
      "grad_norm": 0.29599419236183167,
      "learning_rate": 2.4277327148250892e-06,
      "loss": 0.011,
      "step": 2318380
    },
    {
      "epoch": 3.7941124486950373,
      "grad_norm": 0.5140907168388367,
      "learning_rate": 2.427666822611572e-06,
      "loss": 0.0109,
      "step": 2318400
    },
    {
      "epoch": 3.794145179133691,
      "grad_norm": 0.11074145138263702,
      "learning_rate": 2.427600930398055e-06,
      "loss": 0.0095,
      "step": 2318420
    },
    {
      "epoch": 3.794177909572344,
      "grad_norm": 0.3993701934814453,
      "learning_rate": 2.427535038184538e-06,
      "loss": 0.0104,
      "step": 2318440
    },
    {
      "epoch": 3.7942106400109976,
      "grad_norm": 0.10957685112953186,
      "learning_rate": 2.427469145971021e-06,
      "loss": 0.0065,
      "step": 2318460
    },
    {
      "epoch": 3.7942433704496508,
      "grad_norm": 0.1889062076807022,
      "learning_rate": 2.4274032537575038e-06,
      "loss": 0.0164,
      "step": 2318480
    },
    {
      "epoch": 3.794276100888304,
      "grad_norm": 0.22502431273460388,
      "learning_rate": 2.4273373615439865e-06,
      "loss": 0.0157,
      "step": 2318500
    },
    {
      "epoch": 3.7943088313269575,
      "grad_norm": 0.6888188719749451,
      "learning_rate": 2.4272714693304693e-06,
      "loss": 0.0104,
      "step": 2318520
    },
    {
      "epoch": 3.7943415617656107,
      "grad_norm": 0.2908463478088379,
      "learning_rate": 2.4272055771169524e-06,
      "loss": 0.0092,
      "step": 2318540
    },
    {
      "epoch": 3.7943742922042643,
      "grad_norm": 0.242680624127388,
      "learning_rate": 2.427139684903435e-06,
      "loss": 0.0093,
      "step": 2318560
    },
    {
      "epoch": 3.7944070226429174,
      "grad_norm": 0.14053699374198914,
      "learning_rate": 2.427073792689918e-06,
      "loss": 0.0122,
      "step": 2318580
    },
    {
      "epoch": 3.794439753081571,
      "grad_norm": 0.8595337867736816,
      "learning_rate": 2.4270079004764006e-06,
      "loss": 0.0097,
      "step": 2318600
    },
    {
      "epoch": 3.794472483520224,
      "grad_norm": 0.185028538107872,
      "learning_rate": 2.426942008262884e-06,
      "loss": 0.0117,
      "step": 2318620
    },
    {
      "epoch": 3.7945052139588773,
      "grad_norm": 0.09363968670368195,
      "learning_rate": 2.4268761160493665e-06,
      "loss": 0.0095,
      "step": 2318640
    },
    {
      "epoch": 3.794537944397531,
      "grad_norm": 2.333630323410034,
      "learning_rate": 2.4268102238358497e-06,
      "loss": 0.0126,
      "step": 2318660
    },
    {
      "epoch": 3.794570674836184,
      "grad_norm": 0.28311923146247864,
      "learning_rate": 2.4267443316223324e-06,
      "loss": 0.0098,
      "step": 2318680
    },
    {
      "epoch": 3.7946034052748376,
      "grad_norm": 0.10635946691036224,
      "learning_rate": 2.426678439408815e-06,
      "loss": 0.0095,
      "step": 2318700
    },
    {
      "epoch": 3.794636135713491,
      "grad_norm": 0.22653643786907196,
      "learning_rate": 2.426612547195298e-06,
      "loss": 0.0098,
      "step": 2318720
    },
    {
      "epoch": 3.7946688661521444,
      "grad_norm": 0.3526405096054077,
      "learning_rate": 2.426546654981781e-06,
      "loss": 0.0087,
      "step": 2318740
    },
    {
      "epoch": 3.7947015965907975,
      "grad_norm": 0.3718612492084503,
      "learning_rate": 2.426480762768264e-06,
      "loss": 0.0086,
      "step": 2318760
    },
    {
      "epoch": 3.7947343270294507,
      "grad_norm": 0.5387949347496033,
      "learning_rate": 2.4264148705547466e-06,
      "loss": 0.0104,
      "step": 2318780
    },
    {
      "epoch": 3.7947670574681043,
      "grad_norm": 0.14930877089500427,
      "learning_rate": 2.4263489783412297e-06,
      "loss": 0.0069,
      "step": 2318800
    },
    {
      "epoch": 3.7947997879067574,
      "grad_norm": 0.30786556005477905,
      "learning_rate": 2.4262830861277125e-06,
      "loss": 0.0138,
      "step": 2318820
    },
    {
      "epoch": 3.794832518345411,
      "grad_norm": 0.4321359097957611,
      "learning_rate": 2.426217193914195e-06,
      "loss": 0.0175,
      "step": 2318840
    },
    {
      "epoch": 3.794865248784064,
      "grad_norm": 0.128765270113945,
      "learning_rate": 2.4261513017006784e-06,
      "loss": 0.0109,
      "step": 2318860
    },
    {
      "epoch": 3.7948979792227178,
      "grad_norm": 0.14903363585472107,
      "learning_rate": 2.426085409487161e-06,
      "loss": 0.0107,
      "step": 2318880
    },
    {
      "epoch": 3.794930709661371,
      "grad_norm": 0.247420072555542,
      "learning_rate": 2.426019517273644e-06,
      "loss": 0.0102,
      "step": 2318900
    },
    {
      "epoch": 3.794963440100024,
      "grad_norm": 0.3540038466453552,
      "learning_rate": 2.4259536250601266e-06,
      "loss": 0.0111,
      "step": 2318920
    },
    {
      "epoch": 3.7949961705386777,
      "grad_norm": 0.3238941431045532,
      "learning_rate": 2.4258877328466098e-06,
      "loss": 0.0153,
      "step": 2318940
    },
    {
      "epoch": 3.795028900977331,
      "grad_norm": 0.3903968930244446,
      "learning_rate": 2.4258218406330925e-06,
      "loss": 0.0127,
      "step": 2318960
    },
    {
      "epoch": 3.795061631415984,
      "grad_norm": 0.879148006439209,
      "learning_rate": 2.4257559484195757e-06,
      "loss": 0.0087,
      "step": 2318980
    },
    {
      "epoch": 3.7950943618546376,
      "grad_norm": 0.07726976275444031,
      "learning_rate": 2.4256900562060584e-06,
      "loss": 0.0094,
      "step": 2319000
    },
    {
      "epoch": 3.795127092293291,
      "grad_norm": 0.5132477879524231,
      "learning_rate": 2.425624163992541e-06,
      "loss": 0.011,
      "step": 2319020
    },
    {
      "epoch": 3.7951598227319443,
      "grad_norm": 0.06902770698070526,
      "learning_rate": 2.4255582717790243e-06,
      "loss": 0.0074,
      "step": 2319040
    },
    {
      "epoch": 3.7951925531705974,
      "grad_norm": 0.08316757529973984,
      "learning_rate": 2.425492379565507e-06,
      "loss": 0.007,
      "step": 2319060
    },
    {
      "epoch": 3.795225283609251,
      "grad_norm": 0.46458959579467773,
      "learning_rate": 2.4254264873519898e-06,
      "loss": 0.0067,
      "step": 2319080
    },
    {
      "epoch": 3.795258014047904,
      "grad_norm": 0.2246973216533661,
      "learning_rate": 2.4253605951384725e-06,
      "loss": 0.0126,
      "step": 2319100
    },
    {
      "epoch": 3.7952907444865573,
      "grad_norm": 0.11862873286008835,
      "learning_rate": 2.4252947029249553e-06,
      "loss": 0.0131,
      "step": 2319120
    },
    {
      "epoch": 3.795323474925211,
      "grad_norm": 0.324067622423172,
      "learning_rate": 2.4252288107114384e-06,
      "loss": 0.0128,
      "step": 2319140
    },
    {
      "epoch": 3.7953562053638645,
      "grad_norm": 0.13241402804851532,
      "learning_rate": 2.4251629184979216e-06,
      "loss": 0.0089,
      "step": 2319160
    },
    {
      "epoch": 3.7953889358025177,
      "grad_norm": 0.23729850351810455,
      "learning_rate": 2.4250970262844043e-06,
      "loss": 0.0101,
      "step": 2319180
    },
    {
      "epoch": 3.795421666241171,
      "grad_norm": 0.1774711310863495,
      "learning_rate": 2.425031134070887e-06,
      "loss": 0.0109,
      "step": 2319200
    },
    {
      "epoch": 3.7954543966798244,
      "grad_norm": 0.1891331523656845,
      "learning_rate": 2.42496524185737e-06,
      "loss": 0.0086,
      "step": 2319220
    },
    {
      "epoch": 3.7954871271184776,
      "grad_norm": 0.11622945219278336,
      "learning_rate": 2.424899349643853e-06,
      "loss": 0.008,
      "step": 2319240
    },
    {
      "epoch": 3.7955198575571307,
      "grad_norm": 0.6817792654037476,
      "learning_rate": 2.4248334574303357e-06,
      "loss": 0.0145,
      "step": 2319260
    },
    {
      "epoch": 3.7955525879957843,
      "grad_norm": 0.21615168452262878,
      "learning_rate": 2.4247675652168184e-06,
      "loss": 0.0127,
      "step": 2319280
    },
    {
      "epoch": 3.795585318434438,
      "grad_norm": 0.225789874792099,
      "learning_rate": 2.424701673003301e-06,
      "loss": 0.0129,
      "step": 2319300
    },
    {
      "epoch": 3.795618048873091,
      "grad_norm": 0.06264778971672058,
      "learning_rate": 2.4246357807897843e-06,
      "loss": 0.0091,
      "step": 2319320
    },
    {
      "epoch": 3.795650779311744,
      "grad_norm": 0.1901913583278656,
      "learning_rate": 2.424569888576267e-06,
      "loss": 0.0112,
      "step": 2319340
    },
    {
      "epoch": 3.795683509750398,
      "grad_norm": 0.18351347744464874,
      "learning_rate": 2.4245039963627503e-06,
      "loss": 0.0159,
      "step": 2319360
    },
    {
      "epoch": 3.795716240189051,
      "grad_norm": 0.16511526703834534,
      "learning_rate": 2.424438104149233e-06,
      "loss": 0.0132,
      "step": 2319380
    },
    {
      "epoch": 3.795748970627704,
      "grad_norm": 0.19362220168113708,
      "learning_rate": 2.4243722119357157e-06,
      "loss": 0.013,
      "step": 2319400
    },
    {
      "epoch": 3.7957817010663577,
      "grad_norm": 0.4616858959197998,
      "learning_rate": 2.4243063197221985e-06,
      "loss": 0.0141,
      "step": 2319420
    },
    {
      "epoch": 3.7958144315050113,
      "grad_norm": 0.3290124833583832,
      "learning_rate": 2.4242404275086816e-06,
      "loss": 0.0067,
      "step": 2319440
    },
    {
      "epoch": 3.7958471619436644,
      "grad_norm": 0.2085658758878708,
      "learning_rate": 2.4241745352951644e-06,
      "loss": 0.0133,
      "step": 2319460
    },
    {
      "epoch": 3.7958798923823176,
      "grad_norm": 0.3500310778617859,
      "learning_rate": 2.424108643081647e-06,
      "loss": 0.01,
      "step": 2319480
    },
    {
      "epoch": 3.795912622820971,
      "grad_norm": 0.22192730009555817,
      "learning_rate": 2.4240427508681303e-06,
      "loss": 0.008,
      "step": 2319500
    },
    {
      "epoch": 3.7959453532596243,
      "grad_norm": 0.48195263743400574,
      "learning_rate": 2.423976858654613e-06,
      "loss": 0.0125,
      "step": 2319520
    },
    {
      "epoch": 3.7959780836982775,
      "grad_norm": 0.2776821553707123,
      "learning_rate": 2.4239109664410958e-06,
      "loss": 0.0087,
      "step": 2319540
    },
    {
      "epoch": 3.796010814136931,
      "grad_norm": 0.33646807074546814,
      "learning_rate": 2.423845074227579e-06,
      "loss": 0.0145,
      "step": 2319560
    },
    {
      "epoch": 3.7960435445755842,
      "grad_norm": 0.3880893886089325,
      "learning_rate": 2.4237791820140617e-06,
      "loss": 0.0165,
      "step": 2319580
    },
    {
      "epoch": 3.796076275014238,
      "grad_norm": 0.3252480924129486,
      "learning_rate": 2.4237132898005444e-06,
      "loss": 0.011,
      "step": 2319600
    },
    {
      "epoch": 3.796109005452891,
      "grad_norm": 0.11261913925409317,
      "learning_rate": 2.423647397587027e-06,
      "loss": 0.0095,
      "step": 2319620
    },
    {
      "epoch": 3.7961417358915446,
      "grad_norm": 0.20347407460212708,
      "learning_rate": 2.4235815053735103e-06,
      "loss": 0.0193,
      "step": 2319640
    },
    {
      "epoch": 3.7961744663301977,
      "grad_norm": 0.18463042378425598,
      "learning_rate": 2.423515613159993e-06,
      "loss": 0.0116,
      "step": 2319660
    },
    {
      "epoch": 3.796207196768851,
      "grad_norm": 0.31610170006752014,
      "learning_rate": 2.423449720946476e-06,
      "loss": 0.0099,
      "step": 2319680
    },
    {
      "epoch": 3.7962399272075045,
      "grad_norm": 0.2249874472618103,
      "learning_rate": 2.423383828732959e-06,
      "loss": 0.012,
      "step": 2319700
    },
    {
      "epoch": 3.7962726576461576,
      "grad_norm": 0.6376059055328369,
      "learning_rate": 2.4233179365194417e-06,
      "loss": 0.0116,
      "step": 2319720
    },
    {
      "epoch": 3.796305388084811,
      "grad_norm": 0.039216358214616776,
      "learning_rate": 2.4232520443059244e-06,
      "loss": 0.0098,
      "step": 2319740
    },
    {
      "epoch": 3.7963381185234644,
      "grad_norm": 0.18059603869915009,
      "learning_rate": 2.4231861520924076e-06,
      "loss": 0.0103,
      "step": 2319760
    },
    {
      "epoch": 3.796370848962118,
      "grad_norm": 0.1429091840982437,
      "learning_rate": 2.4231202598788903e-06,
      "loss": 0.0109,
      "step": 2319780
    },
    {
      "epoch": 3.796403579400771,
      "grad_norm": 0.5098853707313538,
      "learning_rate": 2.423054367665373e-06,
      "loss": 0.011,
      "step": 2319800
    },
    {
      "epoch": 3.7964363098394243,
      "grad_norm": 0.18617035448551178,
      "learning_rate": 2.422988475451856e-06,
      "loss": 0.0133,
      "step": 2319820
    },
    {
      "epoch": 3.796469040278078,
      "grad_norm": 0.5047283172607422,
      "learning_rate": 2.422922583238339e-06,
      "loss": 0.0123,
      "step": 2319840
    },
    {
      "epoch": 3.796501770716731,
      "grad_norm": 0.16203777492046356,
      "learning_rate": 2.4228566910248217e-06,
      "loss": 0.0086,
      "step": 2319860
    },
    {
      "epoch": 3.7965345011553846,
      "grad_norm": 0.4446246922016144,
      "learning_rate": 2.422790798811305e-06,
      "loss": 0.0082,
      "step": 2319880
    },
    {
      "epoch": 3.7965672315940378,
      "grad_norm": 0.4207289218902588,
      "learning_rate": 2.4227249065977876e-06,
      "loss": 0.0112,
      "step": 2319900
    },
    {
      "epoch": 3.7965999620326913,
      "grad_norm": 0.20176391303539276,
      "learning_rate": 2.4226590143842704e-06,
      "loss": 0.0085,
      "step": 2319920
    },
    {
      "epoch": 3.7966326924713445,
      "grad_norm": 0.16908976435661316,
      "learning_rate": 2.422593122170753e-06,
      "loss": 0.009,
      "step": 2319940
    },
    {
      "epoch": 3.7966654229099976,
      "grad_norm": 0.4812087118625641,
      "learning_rate": 2.4225272299572363e-06,
      "loss": 0.0128,
      "step": 2319960
    },
    {
      "epoch": 3.7966981533486512,
      "grad_norm": 0.4400434195995331,
      "learning_rate": 2.422461337743719e-06,
      "loss": 0.0123,
      "step": 2319980
    },
    {
      "epoch": 3.7967308837873044,
      "grad_norm": 0.2380029857158661,
      "learning_rate": 2.4223954455302017e-06,
      "loss": 0.0085,
      "step": 2320000
    },
    {
      "epoch": 3.796763614225958,
      "grad_norm": 0.19120162725448608,
      "learning_rate": 2.422329553316685e-06,
      "loss": 0.0202,
      "step": 2320020
    },
    {
      "epoch": 3.796796344664611,
      "grad_norm": 0.22003355622291565,
      "learning_rate": 2.4222636611031676e-06,
      "loss": 0.0095,
      "step": 2320040
    },
    {
      "epoch": 3.7968290751032647,
      "grad_norm": 0.09853264689445496,
      "learning_rate": 2.4221977688896504e-06,
      "loss": 0.0077,
      "step": 2320060
    },
    {
      "epoch": 3.796861805541918,
      "grad_norm": 0.5251595377922058,
      "learning_rate": 2.4221318766761335e-06,
      "loss": 0.0078,
      "step": 2320080
    },
    {
      "epoch": 3.796894535980571,
      "grad_norm": 0.24475565552711487,
      "learning_rate": 2.4220659844626163e-06,
      "loss": 0.0079,
      "step": 2320100
    },
    {
      "epoch": 3.7969272664192246,
      "grad_norm": 0.4612441658973694,
      "learning_rate": 2.422000092249099e-06,
      "loss": 0.0156,
      "step": 2320120
    },
    {
      "epoch": 3.7969599968578778,
      "grad_norm": 0.14026889204978943,
      "learning_rate": 2.4219342000355818e-06,
      "loss": 0.0123,
      "step": 2320140
    },
    {
      "epoch": 3.7969927272965314,
      "grad_norm": 0.08556371927261353,
      "learning_rate": 2.421868307822065e-06,
      "loss": 0.007,
      "step": 2320160
    },
    {
      "epoch": 3.7970254577351845,
      "grad_norm": 0.11414425820112228,
      "learning_rate": 2.4218024156085477e-06,
      "loss": 0.0147,
      "step": 2320180
    },
    {
      "epoch": 3.797058188173838,
      "grad_norm": 0.36434298753738403,
      "learning_rate": 2.421736523395031e-06,
      "loss": 0.014,
      "step": 2320200
    },
    {
      "epoch": 3.7970909186124913,
      "grad_norm": 0.17376722395420074,
      "learning_rate": 2.4216706311815136e-06,
      "loss": 0.0099,
      "step": 2320220
    },
    {
      "epoch": 3.7971236490511444,
      "grad_norm": 0.10416438430547714,
      "learning_rate": 2.4216047389679963e-06,
      "loss": 0.0119,
      "step": 2320240
    },
    {
      "epoch": 3.797156379489798,
      "grad_norm": 0.5592865347862244,
      "learning_rate": 2.421538846754479e-06,
      "loss": 0.0111,
      "step": 2320260
    },
    {
      "epoch": 3.797189109928451,
      "grad_norm": 0.6591649651527405,
      "learning_rate": 2.421472954540962e-06,
      "loss": 0.012,
      "step": 2320280
    },
    {
      "epoch": 3.7972218403671048,
      "grad_norm": 0.316752165555954,
      "learning_rate": 2.421407062327445e-06,
      "loss": 0.0124,
      "step": 2320300
    },
    {
      "epoch": 3.797254570805758,
      "grad_norm": 0.3562352955341339,
      "learning_rate": 2.4213411701139277e-06,
      "loss": 0.0106,
      "step": 2320320
    },
    {
      "epoch": 3.7972873012444115,
      "grad_norm": 0.06875799596309662,
      "learning_rate": 2.4212752779004104e-06,
      "loss": 0.0129,
      "step": 2320340
    },
    {
      "epoch": 3.7973200316830646,
      "grad_norm": 0.16195622086524963,
      "learning_rate": 2.4212093856868936e-06,
      "loss": 0.0126,
      "step": 2320360
    },
    {
      "epoch": 3.797352762121718,
      "grad_norm": 0.17560835182666779,
      "learning_rate": 2.4211434934733768e-06,
      "loss": 0.0132,
      "step": 2320380
    },
    {
      "epoch": 3.7973854925603714,
      "grad_norm": 0.1261553317308426,
      "learning_rate": 2.4210776012598595e-06,
      "loss": 0.0086,
      "step": 2320400
    },
    {
      "epoch": 3.7974182229990245,
      "grad_norm": 0.1205981895327568,
      "learning_rate": 2.4210117090463422e-06,
      "loss": 0.0166,
      "step": 2320420
    },
    {
      "epoch": 3.797450953437678,
      "grad_norm": 0.16916756331920624,
      "learning_rate": 2.420945816832825e-06,
      "loss": 0.0102,
      "step": 2320440
    },
    {
      "epoch": 3.7974836838763313,
      "grad_norm": 0.22951219975948334,
      "learning_rate": 2.420879924619308e-06,
      "loss": 0.0069,
      "step": 2320460
    },
    {
      "epoch": 3.797516414314985,
      "grad_norm": 0.1263599395751953,
      "learning_rate": 2.420814032405791e-06,
      "loss": 0.0075,
      "step": 2320480
    },
    {
      "epoch": 3.797549144753638,
      "grad_norm": 0.6877126097679138,
      "learning_rate": 2.4207481401922736e-06,
      "loss": 0.0173,
      "step": 2320500
    },
    {
      "epoch": 3.797581875192291,
      "grad_norm": 0.4404054582118988,
      "learning_rate": 2.4206822479787564e-06,
      "loss": 0.0142,
      "step": 2320520
    },
    {
      "epoch": 3.7976146056309448,
      "grad_norm": 0.10998576134443283,
      "learning_rate": 2.420616355765239e-06,
      "loss": 0.0107,
      "step": 2320540
    },
    {
      "epoch": 3.797647336069598,
      "grad_norm": 0.19715791940689087,
      "learning_rate": 2.4205504635517223e-06,
      "loss": 0.0089,
      "step": 2320560
    },
    {
      "epoch": 3.797680066508251,
      "grad_norm": 0.9738654494285583,
      "learning_rate": 2.4204845713382054e-06,
      "loss": 0.0121,
      "step": 2320580
    },
    {
      "epoch": 3.7977127969469047,
      "grad_norm": 0.25089317560195923,
      "learning_rate": 2.420418679124688e-06,
      "loss": 0.0111,
      "step": 2320600
    },
    {
      "epoch": 3.7977455273855583,
      "grad_norm": 0.12232521176338196,
      "learning_rate": 2.420352786911171e-06,
      "loss": 0.0077,
      "step": 2320620
    },
    {
      "epoch": 3.7977782578242114,
      "grad_norm": 0.37736594676971436,
      "learning_rate": 2.4202868946976536e-06,
      "loss": 0.0091,
      "step": 2320640
    },
    {
      "epoch": 3.7978109882628646,
      "grad_norm": 0.4032462537288666,
      "learning_rate": 2.420221002484137e-06,
      "loss": 0.0095,
      "step": 2320660
    },
    {
      "epoch": 3.797843718701518,
      "grad_norm": 0.13721106946468353,
      "learning_rate": 2.4201551102706195e-06,
      "loss": 0.0116,
      "step": 2320680
    },
    {
      "epoch": 3.7978764491401713,
      "grad_norm": 0.17374585568904877,
      "learning_rate": 2.4200892180571023e-06,
      "loss": 0.0076,
      "step": 2320700
    },
    {
      "epoch": 3.7979091795788245,
      "grad_norm": 0.27794864773750305,
      "learning_rate": 2.420023325843585e-06,
      "loss": 0.0066,
      "step": 2320720
    },
    {
      "epoch": 3.797941910017478,
      "grad_norm": 0.21520625054836273,
      "learning_rate": 2.419957433630068e-06,
      "loss": 0.0151,
      "step": 2320740
    },
    {
      "epoch": 3.7979746404561316,
      "grad_norm": 1.1112815141677856,
      "learning_rate": 2.419891541416551e-06,
      "loss": 0.013,
      "step": 2320760
    },
    {
      "epoch": 3.798007370894785,
      "grad_norm": 0.24640721082687378,
      "learning_rate": 2.419825649203034e-06,
      "loss": 0.0134,
      "step": 2320780
    },
    {
      "epoch": 3.798040101333438,
      "grad_norm": 0.65116947889328,
      "learning_rate": 2.419759756989517e-06,
      "loss": 0.0184,
      "step": 2320800
    },
    {
      "epoch": 3.7980728317720915,
      "grad_norm": 0.24026663601398468,
      "learning_rate": 2.4196938647759996e-06,
      "loss": 0.0093,
      "step": 2320820
    },
    {
      "epoch": 3.7981055622107447,
      "grad_norm": 0.22799450159072876,
      "learning_rate": 2.4196279725624823e-06,
      "loss": 0.0156,
      "step": 2320840
    },
    {
      "epoch": 3.798138292649398,
      "grad_norm": 0.23572954535484314,
      "learning_rate": 2.4195620803489655e-06,
      "loss": 0.011,
      "step": 2320860
    },
    {
      "epoch": 3.7981710230880514,
      "grad_norm": 0.2170441895723343,
      "learning_rate": 2.4194961881354482e-06,
      "loss": 0.0159,
      "step": 2320880
    },
    {
      "epoch": 3.798203753526705,
      "grad_norm": 0.44636669754981995,
      "learning_rate": 2.4194302959219314e-06,
      "loss": 0.0099,
      "step": 2320900
    },
    {
      "epoch": 3.798236483965358,
      "grad_norm": 0.17128314077854156,
      "learning_rate": 2.419364403708414e-06,
      "loss": 0.0103,
      "step": 2320920
    },
    {
      "epoch": 3.7982692144040113,
      "grad_norm": 0.4171797037124634,
      "learning_rate": 2.419298511494897e-06,
      "loss": 0.008,
      "step": 2320940
    },
    {
      "epoch": 3.798301944842665,
      "grad_norm": 0.7707261443138123,
      "learning_rate": 2.4192326192813796e-06,
      "loss": 0.013,
      "step": 2320960
    },
    {
      "epoch": 3.798334675281318,
      "grad_norm": 0.32080700993537903,
      "learning_rate": 2.4191667270678628e-06,
      "loss": 0.0125,
      "step": 2320980
    },
    {
      "epoch": 3.7983674057199712,
      "grad_norm": 0.3601538836956024,
      "learning_rate": 2.4191008348543455e-06,
      "loss": 0.0077,
      "step": 2321000
    },
    {
      "epoch": 3.798400136158625,
      "grad_norm": 0.1267554759979248,
      "learning_rate": 2.4190349426408282e-06,
      "loss": 0.0102,
      "step": 2321020
    },
    {
      "epoch": 3.7984328665972784,
      "grad_norm": 0.13362814486026764,
      "learning_rate": 2.418969050427311e-06,
      "loss": 0.0123,
      "step": 2321040
    },
    {
      "epoch": 3.7984655970359316,
      "grad_norm": 0.2295420616865158,
      "learning_rate": 2.418903158213794e-06,
      "loss": 0.0119,
      "step": 2321060
    },
    {
      "epoch": 3.7984983274745847,
      "grad_norm": 0.5548064708709717,
      "learning_rate": 2.418837266000277e-06,
      "loss": 0.0134,
      "step": 2321080
    },
    {
      "epoch": 3.7985310579132383,
      "grad_norm": 0.7975553274154663,
      "learning_rate": 2.41877137378676e-06,
      "loss": 0.0139,
      "step": 2321100
    },
    {
      "epoch": 3.7985637883518915,
      "grad_norm": 0.48652273416519165,
      "learning_rate": 2.4187054815732428e-06,
      "loss": 0.01,
      "step": 2321120
    },
    {
      "epoch": 3.7985965187905446,
      "grad_norm": 0.2348824292421341,
      "learning_rate": 2.4186395893597255e-06,
      "loss": 0.0089,
      "step": 2321140
    },
    {
      "epoch": 3.798629249229198,
      "grad_norm": 0.28236597776412964,
      "learning_rate": 2.4185736971462083e-06,
      "loss": 0.0126,
      "step": 2321160
    },
    {
      "epoch": 3.7986619796678514,
      "grad_norm": 0.3843674063682556,
      "learning_rate": 2.4185078049326914e-06,
      "loss": 0.0119,
      "step": 2321180
    },
    {
      "epoch": 3.798694710106505,
      "grad_norm": 0.14194180071353912,
      "learning_rate": 2.418441912719174e-06,
      "loss": 0.0118,
      "step": 2321200
    },
    {
      "epoch": 3.798727440545158,
      "grad_norm": 0.42428264021873474,
      "learning_rate": 2.418376020505657e-06,
      "loss": 0.007,
      "step": 2321220
    },
    {
      "epoch": 3.7987601709838117,
      "grad_norm": 0.09237696975469589,
      "learning_rate": 2.4183101282921396e-06,
      "loss": 0.0064,
      "step": 2321240
    },
    {
      "epoch": 3.798792901422465,
      "grad_norm": 0.16416393220424652,
      "learning_rate": 2.418244236078623e-06,
      "loss": 0.0124,
      "step": 2321260
    },
    {
      "epoch": 3.798825631861118,
      "grad_norm": 0.25319284200668335,
      "learning_rate": 2.4181783438651055e-06,
      "loss": 0.0078,
      "step": 2321280
    },
    {
      "epoch": 3.7988583622997716,
      "grad_norm": 0.24542325735092163,
      "learning_rate": 2.4181124516515887e-06,
      "loss": 0.0129,
      "step": 2321300
    },
    {
      "epoch": 3.7988910927384247,
      "grad_norm": 0.5674623847007751,
      "learning_rate": 2.4180465594380715e-06,
      "loss": 0.0141,
      "step": 2321320
    },
    {
      "epoch": 3.7989238231770783,
      "grad_norm": 0.22513772547245026,
      "learning_rate": 2.417980667224554e-06,
      "loss": 0.0127,
      "step": 2321340
    },
    {
      "epoch": 3.7989565536157315,
      "grad_norm": 0.12522965669631958,
      "learning_rate": 2.417914775011037e-06,
      "loss": 0.0087,
      "step": 2321360
    },
    {
      "epoch": 3.798989284054385,
      "grad_norm": 0.297318696975708,
      "learning_rate": 2.41784888279752e-06,
      "loss": 0.0125,
      "step": 2321380
    },
    {
      "epoch": 3.7990220144930382,
      "grad_norm": 0.4234248399734497,
      "learning_rate": 2.417782990584003e-06,
      "loss": 0.0111,
      "step": 2321400
    },
    {
      "epoch": 3.7990547449316914,
      "grad_norm": 0.6550824642181396,
      "learning_rate": 2.4177170983704856e-06,
      "loss": 0.0129,
      "step": 2321420
    },
    {
      "epoch": 3.799087475370345,
      "grad_norm": 0.4157114028930664,
      "learning_rate": 2.4176512061569687e-06,
      "loss": 0.01,
      "step": 2321440
    },
    {
      "epoch": 3.799120205808998,
      "grad_norm": 0.6926338076591492,
      "learning_rate": 2.4175853139434515e-06,
      "loss": 0.0112,
      "step": 2321460
    },
    {
      "epoch": 3.7991529362476517,
      "grad_norm": 0.08816426992416382,
      "learning_rate": 2.4175194217299342e-06,
      "loss": 0.0113,
      "step": 2321480
    },
    {
      "epoch": 3.799185666686305,
      "grad_norm": 0.16433210670948029,
      "learning_rate": 2.4174535295164174e-06,
      "loss": 0.0118,
      "step": 2321500
    },
    {
      "epoch": 3.7992183971249585,
      "grad_norm": 0.21124106645584106,
      "learning_rate": 2.4173876373029e-06,
      "loss": 0.0085,
      "step": 2321520
    },
    {
      "epoch": 3.7992511275636116,
      "grad_norm": 0.45229384303092957,
      "learning_rate": 2.417321745089383e-06,
      "loss": 0.009,
      "step": 2321540
    },
    {
      "epoch": 3.7992838580022648,
      "grad_norm": 0.2765486240386963,
      "learning_rate": 2.4172558528758656e-06,
      "loss": 0.0117,
      "step": 2321560
    },
    {
      "epoch": 3.7993165884409184,
      "grad_norm": 0.0770440474152565,
      "learning_rate": 2.4171899606623488e-06,
      "loss": 0.0112,
      "step": 2321580
    },
    {
      "epoch": 3.7993493188795715,
      "grad_norm": 0.2599122226238251,
      "learning_rate": 2.4171240684488315e-06,
      "loss": 0.0095,
      "step": 2321600
    },
    {
      "epoch": 3.799382049318225,
      "grad_norm": 0.5365888476371765,
      "learning_rate": 2.4170581762353147e-06,
      "loss": 0.0117,
      "step": 2321620
    },
    {
      "epoch": 3.7994147797568782,
      "grad_norm": 0.07883036136627197,
      "learning_rate": 2.4169922840217974e-06,
      "loss": 0.0103,
      "step": 2321640
    },
    {
      "epoch": 3.799447510195532,
      "grad_norm": 0.13349603116512299,
      "learning_rate": 2.41692639180828e-06,
      "loss": 0.0084,
      "step": 2321660
    },
    {
      "epoch": 3.799480240634185,
      "grad_norm": 0.08828745037317276,
      "learning_rate": 2.4168604995947633e-06,
      "loss": 0.0113,
      "step": 2321680
    },
    {
      "epoch": 3.799512971072838,
      "grad_norm": 0.1570189893245697,
      "learning_rate": 2.416794607381246e-06,
      "loss": 0.01,
      "step": 2321700
    },
    {
      "epoch": 3.7995457015114917,
      "grad_norm": 0.320276141166687,
      "learning_rate": 2.416728715167729e-06,
      "loss": 0.0096,
      "step": 2321720
    },
    {
      "epoch": 3.799578431950145,
      "grad_norm": 0.12116233259439468,
      "learning_rate": 2.4166628229542115e-06,
      "loss": 0.0124,
      "step": 2321740
    },
    {
      "epoch": 3.7996111623887985,
      "grad_norm": 0.760024905204773,
      "learning_rate": 2.4165969307406943e-06,
      "loss": 0.0124,
      "step": 2321760
    },
    {
      "epoch": 3.7996438928274516,
      "grad_norm": 0.7285146713256836,
      "learning_rate": 2.4165310385271774e-06,
      "loss": 0.0129,
      "step": 2321780
    },
    {
      "epoch": 3.7996766232661052,
      "grad_norm": 0.27226176857948303,
      "learning_rate": 2.4164651463136606e-06,
      "loss": 0.0093,
      "step": 2321800
    },
    {
      "epoch": 3.7997093537047584,
      "grad_norm": 0.2778991460800171,
      "learning_rate": 2.4163992541001433e-06,
      "loss": 0.0151,
      "step": 2321820
    },
    {
      "epoch": 3.7997420841434115,
      "grad_norm": 0.14780743420124054,
      "learning_rate": 2.416333361886626e-06,
      "loss": 0.0122,
      "step": 2321840
    },
    {
      "epoch": 3.799774814582065,
      "grad_norm": 0.4248216152191162,
      "learning_rate": 2.416267469673109e-06,
      "loss": 0.0096,
      "step": 2321860
    },
    {
      "epoch": 3.7998075450207183,
      "grad_norm": 0.14166001975536346,
      "learning_rate": 2.416201577459592e-06,
      "loss": 0.0121,
      "step": 2321880
    },
    {
      "epoch": 3.799840275459372,
      "grad_norm": 1.1551270484924316,
      "learning_rate": 2.4161356852460747e-06,
      "loss": 0.012,
      "step": 2321900
    },
    {
      "epoch": 3.799873005898025,
      "grad_norm": 0.16404935717582703,
      "learning_rate": 2.4160697930325575e-06,
      "loss": 0.0123,
      "step": 2321920
    },
    {
      "epoch": 3.7999057363366786,
      "grad_norm": 0.20709937810897827,
      "learning_rate": 2.41600390081904e-06,
      "loss": 0.0093,
      "step": 2321940
    },
    {
      "epoch": 3.7999384667753318,
      "grad_norm": 0.28934767842292786,
      "learning_rate": 2.4159380086055234e-06,
      "loss": 0.0115,
      "step": 2321960
    },
    {
      "epoch": 3.799971197213985,
      "grad_norm": 0.15110035240650177,
      "learning_rate": 2.415872116392006e-06,
      "loss": 0.0089,
      "step": 2321980
    },
    {
      "epoch": 3.8000039276526385,
      "grad_norm": 0.252211332321167,
      "learning_rate": 2.4158062241784893e-06,
      "loss": 0.0081,
      "step": 2322000
    },
    {
      "epoch": 3.8000366580912917,
      "grad_norm": 0.22947318851947784,
      "learning_rate": 2.415740331964972e-06,
      "loss": 0.0078,
      "step": 2322020
    },
    {
      "epoch": 3.800069388529945,
      "grad_norm": 0.12279748171567917,
      "learning_rate": 2.4156744397514547e-06,
      "loss": 0.0128,
      "step": 2322040
    },
    {
      "epoch": 3.8001021189685984,
      "grad_norm": 0.15239489078521729,
      "learning_rate": 2.4156085475379375e-06,
      "loss": 0.0095,
      "step": 2322060
    },
    {
      "epoch": 3.800134849407252,
      "grad_norm": 0.04376070946455002,
      "learning_rate": 2.4155426553244206e-06,
      "loss": 0.01,
      "step": 2322080
    },
    {
      "epoch": 3.800167579845905,
      "grad_norm": 0.3607940077781677,
      "learning_rate": 2.4154767631109034e-06,
      "loss": 0.0115,
      "step": 2322100
    },
    {
      "epoch": 3.8002003102845583,
      "grad_norm": 0.42818066477775574,
      "learning_rate": 2.415410870897386e-06,
      "loss": 0.0088,
      "step": 2322120
    },
    {
      "epoch": 3.800233040723212,
      "grad_norm": 0.26984554529190063,
      "learning_rate": 2.4153449786838693e-06,
      "loss": 0.011,
      "step": 2322140
    },
    {
      "epoch": 3.800265771161865,
      "grad_norm": 0.10479390621185303,
      "learning_rate": 2.415279086470352e-06,
      "loss": 0.0109,
      "step": 2322160
    },
    {
      "epoch": 3.800298501600518,
      "grad_norm": 0.2494942992925644,
      "learning_rate": 2.4152131942568348e-06,
      "loss": 0.0086,
      "step": 2322180
    },
    {
      "epoch": 3.800331232039172,
      "grad_norm": 0.14195358753204346,
      "learning_rate": 2.415147302043318e-06,
      "loss": 0.0108,
      "step": 2322200
    },
    {
      "epoch": 3.8003639624778254,
      "grad_norm": 0.19930899143218994,
      "learning_rate": 2.4150814098298007e-06,
      "loss": 0.0138,
      "step": 2322220
    },
    {
      "epoch": 3.8003966929164785,
      "grad_norm": 0.12701143324375153,
      "learning_rate": 2.4150155176162834e-06,
      "loss": 0.0063,
      "step": 2322240
    },
    {
      "epoch": 3.8004294233551317,
      "grad_norm": 0.6606073975563049,
      "learning_rate": 2.414949625402766e-06,
      "loss": 0.0147,
      "step": 2322260
    },
    {
      "epoch": 3.8004621537937853,
      "grad_norm": 0.44874492287635803,
      "learning_rate": 2.4148837331892493e-06,
      "loss": 0.0086,
      "step": 2322280
    },
    {
      "epoch": 3.8004948842324384,
      "grad_norm": 0.11052294820547104,
      "learning_rate": 2.414817840975732e-06,
      "loss": 0.0102,
      "step": 2322300
    },
    {
      "epoch": 3.8005276146710916,
      "grad_norm": 0.20007246732711792,
      "learning_rate": 2.4147519487622152e-06,
      "loss": 0.0097,
      "step": 2322320
    },
    {
      "epoch": 3.800560345109745,
      "grad_norm": 0.3330749571323395,
      "learning_rate": 2.414686056548698e-06,
      "loss": 0.0076,
      "step": 2322340
    },
    {
      "epoch": 3.8005930755483988,
      "grad_norm": 0.2965734302997589,
      "learning_rate": 2.4146201643351807e-06,
      "loss": 0.0096,
      "step": 2322360
    },
    {
      "epoch": 3.800625805987052,
      "grad_norm": 0.4875999391078949,
      "learning_rate": 2.4145542721216634e-06,
      "loss": 0.0111,
      "step": 2322380
    },
    {
      "epoch": 3.800658536425705,
      "grad_norm": 0.29313188791275024,
      "learning_rate": 2.4144883799081466e-06,
      "loss": 0.0128,
      "step": 2322400
    },
    {
      "epoch": 3.8006912668643587,
      "grad_norm": 0.3095300793647766,
      "learning_rate": 2.4144224876946293e-06,
      "loss": 0.0099,
      "step": 2322420
    },
    {
      "epoch": 3.800723997303012,
      "grad_norm": 0.32133084535598755,
      "learning_rate": 2.414356595481112e-06,
      "loss": 0.0085,
      "step": 2322440
    },
    {
      "epoch": 3.800756727741665,
      "grad_norm": 0.34218794107437134,
      "learning_rate": 2.414290703267595e-06,
      "loss": 0.0122,
      "step": 2322460
    },
    {
      "epoch": 3.8007894581803185,
      "grad_norm": 0.4932064712047577,
      "learning_rate": 2.414224811054078e-06,
      "loss": 0.0113,
      "step": 2322480
    },
    {
      "epoch": 3.800822188618972,
      "grad_norm": 0.31875529885292053,
      "learning_rate": 2.4141589188405607e-06,
      "loss": 0.011,
      "step": 2322500
    },
    {
      "epoch": 3.8008549190576253,
      "grad_norm": 0.12256389856338501,
      "learning_rate": 2.414093026627044e-06,
      "loss": 0.0195,
      "step": 2322520
    },
    {
      "epoch": 3.8008876494962784,
      "grad_norm": 0.16064858436584473,
      "learning_rate": 2.4140271344135266e-06,
      "loss": 0.0105,
      "step": 2322540
    },
    {
      "epoch": 3.800920379934932,
      "grad_norm": 0.21605181694030762,
      "learning_rate": 2.4139612422000094e-06,
      "loss": 0.0081,
      "step": 2322560
    },
    {
      "epoch": 3.800953110373585,
      "grad_norm": 0.43145763874053955,
      "learning_rate": 2.413895349986492e-06,
      "loss": 0.0096,
      "step": 2322580
    },
    {
      "epoch": 3.8009858408122383,
      "grad_norm": 0.40074628591537476,
      "learning_rate": 2.4138294577729753e-06,
      "loss": 0.0093,
      "step": 2322600
    },
    {
      "epoch": 3.801018571250892,
      "grad_norm": 0.42980971932411194,
      "learning_rate": 2.413763565559458e-06,
      "loss": 0.009,
      "step": 2322620
    },
    {
      "epoch": 3.801051301689545,
      "grad_norm": 0.19774751365184784,
      "learning_rate": 2.4136976733459407e-06,
      "loss": 0.0145,
      "step": 2322640
    },
    {
      "epoch": 3.8010840321281987,
      "grad_norm": 0.11420122534036636,
      "learning_rate": 2.413631781132424e-06,
      "loss": 0.008,
      "step": 2322660
    },
    {
      "epoch": 3.801116762566852,
      "grad_norm": NaN,
      "learning_rate": 2.4135658889189066e-06,
      "loss": 0.0128,
      "step": 2322680
    },
    {
      "epoch": 3.8011494930055054,
      "grad_norm": 0.14652878046035767,
      "learning_rate": 2.4134999967053894e-06,
      "loss": 0.0089,
      "step": 2322700
    },
    {
      "epoch": 3.8011822234441586,
      "grad_norm": 0.538636326789856,
      "learning_rate": 2.4134341044918726e-06,
      "loss": 0.0145,
      "step": 2322720
    },
    {
      "epoch": 3.8012149538828117,
      "grad_norm": 0.09127305448055267,
      "learning_rate": 2.4133682122783553e-06,
      "loss": 0.0095,
      "step": 2322740
    },
    {
      "epoch": 3.8012476843214653,
      "grad_norm": 0.14524297416210175,
      "learning_rate": 2.413302320064838e-06,
      "loss": 0.0115,
      "step": 2322760
    },
    {
      "epoch": 3.8012804147601185,
      "grad_norm": 0.6536385416984558,
      "learning_rate": 2.4132364278513208e-06,
      "loss": 0.0115,
      "step": 2322780
    },
    {
      "epoch": 3.801313145198772,
      "grad_norm": 0.18480895459651947,
      "learning_rate": 2.413170535637804e-06,
      "loss": 0.0072,
      "step": 2322800
    },
    {
      "epoch": 3.801345875637425,
      "grad_norm": 0.7274032235145569,
      "learning_rate": 2.4131046434242867e-06,
      "loss": 0.0079,
      "step": 2322820
    },
    {
      "epoch": 3.801378606076079,
      "grad_norm": 0.11597512662410736,
      "learning_rate": 2.41303875121077e-06,
      "loss": 0.0093,
      "step": 2322840
    },
    {
      "epoch": 3.801411336514732,
      "grad_norm": 0.192331001162529,
      "learning_rate": 2.4129728589972526e-06,
      "loss": 0.0106,
      "step": 2322860
    },
    {
      "epoch": 3.801444066953385,
      "grad_norm": 0.28233540058135986,
      "learning_rate": 2.4129069667837353e-06,
      "loss": 0.0076,
      "step": 2322880
    },
    {
      "epoch": 3.8014767973920387,
      "grad_norm": 0.1503845900297165,
      "learning_rate": 2.412841074570218e-06,
      "loss": 0.0136,
      "step": 2322900
    },
    {
      "epoch": 3.801509527830692,
      "grad_norm": 0.22757713496685028,
      "learning_rate": 2.4127751823567012e-06,
      "loss": 0.0089,
      "step": 2322920
    },
    {
      "epoch": 3.8015422582693454,
      "grad_norm": 0.4406333863735199,
      "learning_rate": 2.412709290143184e-06,
      "loss": 0.0095,
      "step": 2322940
    },
    {
      "epoch": 3.8015749887079986,
      "grad_norm": 0.20402854681015015,
      "learning_rate": 2.4126433979296667e-06,
      "loss": 0.0099,
      "step": 2322960
    },
    {
      "epoch": 3.801607719146652,
      "grad_norm": 0.3297272026538849,
      "learning_rate": 2.4125775057161494e-06,
      "loss": 0.0097,
      "step": 2322980
    },
    {
      "epoch": 3.8016404495853053,
      "grad_norm": 0.16863331198692322,
      "learning_rate": 2.4125116135026326e-06,
      "loss": 0.0116,
      "step": 2323000
    },
    {
      "epoch": 3.8016731800239585,
      "grad_norm": 0.2282964438199997,
      "learning_rate": 2.4124457212891158e-06,
      "loss": 0.0121,
      "step": 2323020
    },
    {
      "epoch": 3.801705910462612,
      "grad_norm": 0.4429379105567932,
      "learning_rate": 2.4123798290755985e-06,
      "loss": 0.0082,
      "step": 2323040
    },
    {
      "epoch": 3.8017386409012652,
      "grad_norm": 0.2949759066104889,
      "learning_rate": 2.4123139368620812e-06,
      "loss": 0.0082,
      "step": 2323060
    },
    {
      "epoch": 3.801771371339919,
      "grad_norm": 1.067525863647461,
      "learning_rate": 2.412248044648564e-06,
      "loss": 0.0139,
      "step": 2323080
    },
    {
      "epoch": 3.801804101778572,
      "grad_norm": 0.3463304042816162,
      "learning_rate": 2.412182152435047e-06,
      "loss": 0.0106,
      "step": 2323100
    },
    {
      "epoch": 3.8018368322172256,
      "grad_norm": 0.20218580961227417,
      "learning_rate": 2.41211626022153e-06,
      "loss": 0.0085,
      "step": 2323120
    },
    {
      "epoch": 3.8018695626558787,
      "grad_norm": 0.11064974218606949,
      "learning_rate": 2.4120503680080126e-06,
      "loss": 0.0174,
      "step": 2323140
    },
    {
      "epoch": 3.801902293094532,
      "grad_norm": 0.32606273889541626,
      "learning_rate": 2.4119844757944954e-06,
      "loss": 0.0152,
      "step": 2323160
    },
    {
      "epoch": 3.8019350235331855,
      "grad_norm": 0.14088524878025055,
      "learning_rate": 2.411918583580978e-06,
      "loss": 0.0087,
      "step": 2323180
    },
    {
      "epoch": 3.8019677539718386,
      "grad_norm": 0.2227887362241745,
      "learning_rate": 2.4118526913674613e-06,
      "loss": 0.0081,
      "step": 2323200
    },
    {
      "epoch": 3.802000484410492,
      "grad_norm": 0.21685166656970978,
      "learning_rate": 2.4117867991539444e-06,
      "loss": 0.0074,
      "step": 2323220
    },
    {
      "epoch": 3.8020332148491454,
      "grad_norm": 0.9972677826881409,
      "learning_rate": 2.411720906940427e-06,
      "loss": 0.018,
      "step": 2323240
    },
    {
      "epoch": 3.802065945287799,
      "grad_norm": 0.109292171895504,
      "learning_rate": 2.41165501472691e-06,
      "loss": 0.0122,
      "step": 2323260
    },
    {
      "epoch": 3.802098675726452,
      "grad_norm": 0.18697194755077362,
      "learning_rate": 2.4115891225133927e-06,
      "loss": 0.0164,
      "step": 2323280
    },
    {
      "epoch": 3.8021314061651053,
      "grad_norm": 0.27252060174942017,
      "learning_rate": 2.411523230299876e-06,
      "loss": 0.0104,
      "step": 2323300
    },
    {
      "epoch": 3.802164136603759,
      "grad_norm": 0.6118445992469788,
      "learning_rate": 2.4114573380863586e-06,
      "loss": 0.0139,
      "step": 2323320
    },
    {
      "epoch": 3.802196867042412,
      "grad_norm": 0.10836640000343323,
      "learning_rate": 2.4113914458728413e-06,
      "loss": 0.0103,
      "step": 2323340
    },
    {
      "epoch": 3.8022295974810656,
      "grad_norm": 0.06620926409959793,
      "learning_rate": 2.411325553659324e-06,
      "loss": 0.0068,
      "step": 2323360
    },
    {
      "epoch": 3.8022623279197187,
      "grad_norm": 0.2848722040653229,
      "learning_rate": 2.411259661445807e-06,
      "loss": 0.0085,
      "step": 2323380
    },
    {
      "epoch": 3.8022950583583723,
      "grad_norm": 0.4567141532897949,
      "learning_rate": 2.41119376923229e-06,
      "loss": 0.0129,
      "step": 2323400
    },
    {
      "epoch": 3.8023277887970255,
      "grad_norm": 1.0592550039291382,
      "learning_rate": 2.411127877018773e-06,
      "loss": 0.0095,
      "step": 2323420
    },
    {
      "epoch": 3.8023605192356786,
      "grad_norm": 0.17921893298625946,
      "learning_rate": 2.411061984805256e-06,
      "loss": 0.0098,
      "step": 2323440
    },
    {
      "epoch": 3.8023932496743322,
      "grad_norm": 0.3972625732421875,
      "learning_rate": 2.4109960925917386e-06,
      "loss": 0.013,
      "step": 2323460
    },
    {
      "epoch": 3.8024259801129854,
      "grad_norm": 0.3151710331439972,
      "learning_rate": 2.4109302003782213e-06,
      "loss": 0.0131,
      "step": 2323480
    },
    {
      "epoch": 3.802458710551639,
      "grad_norm": 0.3052598536014557,
      "learning_rate": 2.4108643081647045e-06,
      "loss": 0.0096,
      "step": 2323500
    },
    {
      "epoch": 3.802491440990292,
      "grad_norm": 0.38730692863464355,
      "learning_rate": 2.4107984159511872e-06,
      "loss": 0.012,
      "step": 2323520
    },
    {
      "epoch": 3.8025241714289457,
      "grad_norm": 0.39882805943489075,
      "learning_rate": 2.4107325237376704e-06,
      "loss": 0.0107,
      "step": 2323540
    },
    {
      "epoch": 3.802556901867599,
      "grad_norm": 0.2514398992061615,
      "learning_rate": 2.410666631524153e-06,
      "loss": 0.0185,
      "step": 2323560
    },
    {
      "epoch": 3.802589632306252,
      "grad_norm": 0.2180197536945343,
      "learning_rate": 2.410600739310636e-06,
      "loss": 0.0115,
      "step": 2323580
    },
    {
      "epoch": 3.8026223627449056,
      "grad_norm": 0.19860422611236572,
      "learning_rate": 2.4105348470971186e-06,
      "loss": 0.0077,
      "step": 2323600
    },
    {
      "epoch": 3.8026550931835588,
      "grad_norm": 0.2136741429567337,
      "learning_rate": 2.4104689548836018e-06,
      "loss": 0.0079,
      "step": 2323620
    },
    {
      "epoch": 3.802687823622212,
      "grad_norm": 0.10931981354951859,
      "learning_rate": 2.4104030626700845e-06,
      "loss": 0.0099,
      "step": 2323640
    },
    {
      "epoch": 3.8027205540608655,
      "grad_norm": 0.2736540138721466,
      "learning_rate": 2.4103371704565672e-06,
      "loss": 0.0145,
      "step": 2323660
    },
    {
      "epoch": 3.802753284499519,
      "grad_norm": 0.39645713567733765,
      "learning_rate": 2.41027127824305e-06,
      "loss": 0.0126,
      "step": 2323680
    },
    {
      "epoch": 3.8027860149381723,
      "grad_norm": 0.17813549935817719,
      "learning_rate": 2.410205386029533e-06,
      "loss": 0.0106,
      "step": 2323700
    },
    {
      "epoch": 3.8028187453768254,
      "grad_norm": 0.3427185118198395,
      "learning_rate": 2.410139493816016e-06,
      "loss": 0.0114,
      "step": 2323720
    },
    {
      "epoch": 3.802851475815479,
      "grad_norm": 0.23005478084087372,
      "learning_rate": 2.410073601602499e-06,
      "loss": 0.0083,
      "step": 2323740
    },
    {
      "epoch": 3.802884206254132,
      "grad_norm": 0.27530258893966675,
      "learning_rate": 2.410007709388982e-06,
      "loss": 0.0134,
      "step": 2323760
    },
    {
      "epoch": 3.8029169366927853,
      "grad_norm": 1.1710315942764282,
      "learning_rate": 2.4099418171754645e-06,
      "loss": 0.0168,
      "step": 2323780
    },
    {
      "epoch": 3.802949667131439,
      "grad_norm": 0.17866916954517365,
      "learning_rate": 2.4098759249619473e-06,
      "loss": 0.011,
      "step": 2323800
    },
    {
      "epoch": 3.8029823975700925,
      "grad_norm": 0.13922718167304993,
      "learning_rate": 2.4098100327484304e-06,
      "loss": 0.008,
      "step": 2323820
    },
    {
      "epoch": 3.8030151280087456,
      "grad_norm": 0.22466397285461426,
      "learning_rate": 2.409744140534913e-06,
      "loss": 0.0123,
      "step": 2323840
    },
    {
      "epoch": 3.803047858447399,
      "grad_norm": 0.09076923131942749,
      "learning_rate": 2.409678248321396e-06,
      "loss": 0.011,
      "step": 2323860
    },
    {
      "epoch": 3.8030805888860524,
      "grad_norm": 0.058308109641075134,
      "learning_rate": 2.4096123561078787e-06,
      "loss": 0.0071,
      "step": 2323880
    },
    {
      "epoch": 3.8031133193247055,
      "grad_norm": 0.31804850697517395,
      "learning_rate": 2.409546463894362e-06,
      "loss": 0.0111,
      "step": 2323900
    },
    {
      "epoch": 3.8031460497633587,
      "grad_norm": 0.46060603857040405,
      "learning_rate": 2.4094805716808446e-06,
      "loss": 0.0079,
      "step": 2323920
    },
    {
      "epoch": 3.8031787802020123,
      "grad_norm": 0.4325012266635895,
      "learning_rate": 2.4094146794673277e-06,
      "loss": 0.0098,
      "step": 2323940
    },
    {
      "epoch": 3.803211510640666,
      "grad_norm": 0.31278887391090393,
      "learning_rate": 2.4093487872538105e-06,
      "loss": 0.0097,
      "step": 2323960
    },
    {
      "epoch": 3.803244241079319,
      "grad_norm": 0.37336015701293945,
      "learning_rate": 2.409282895040293e-06,
      "loss": 0.0089,
      "step": 2323980
    },
    {
      "epoch": 3.803276971517972,
      "grad_norm": 0.3250342607498169,
      "learning_rate": 2.409217002826776e-06,
      "loss": 0.0099,
      "step": 2324000
    },
    {
      "epoch": 3.8033097019566258,
      "grad_norm": 0.0908074826002121,
      "learning_rate": 2.409151110613259e-06,
      "loss": 0.0059,
      "step": 2324020
    },
    {
      "epoch": 3.803342432395279,
      "grad_norm": 0.36190760135650635,
      "learning_rate": 2.409085218399742e-06,
      "loss": 0.0074,
      "step": 2324040
    },
    {
      "epoch": 3.803375162833932,
      "grad_norm": 0.23266306519508362,
      "learning_rate": 2.4090193261862246e-06,
      "loss": 0.0106,
      "step": 2324060
    },
    {
      "epoch": 3.8034078932725857,
      "grad_norm": 0.14066001772880554,
      "learning_rate": 2.4089534339727077e-06,
      "loss": 0.0111,
      "step": 2324080
    },
    {
      "epoch": 3.8034406237112393,
      "grad_norm": 0.7007961273193359,
      "learning_rate": 2.4088875417591905e-06,
      "loss": 0.0128,
      "step": 2324100
    },
    {
      "epoch": 3.8034733541498924,
      "grad_norm": 0.15673618018627167,
      "learning_rate": 2.4088216495456732e-06,
      "loss": 0.009,
      "step": 2324120
    },
    {
      "epoch": 3.8035060845885456,
      "grad_norm": 0.2040226012468338,
      "learning_rate": 2.4087557573321564e-06,
      "loss": 0.0066,
      "step": 2324140
    },
    {
      "epoch": 3.803538815027199,
      "grad_norm": 0.07033791393041611,
      "learning_rate": 2.408689865118639e-06,
      "loss": 0.0113,
      "step": 2324160
    },
    {
      "epoch": 3.8035715454658523,
      "grad_norm": 0.13332398235797882,
      "learning_rate": 2.408623972905122e-06,
      "loss": 0.0093,
      "step": 2324180
    },
    {
      "epoch": 3.8036042759045054,
      "grad_norm": 0.2784142792224884,
      "learning_rate": 2.4085580806916046e-06,
      "loss": 0.0096,
      "step": 2324200
    },
    {
      "epoch": 3.803637006343159,
      "grad_norm": 0.7293435335159302,
      "learning_rate": 2.4084921884780878e-06,
      "loss": 0.013,
      "step": 2324220
    },
    {
      "epoch": 3.803669736781812,
      "grad_norm": 0.1634581834077835,
      "learning_rate": 2.408426296264571e-06,
      "loss": 0.0091,
      "step": 2324240
    },
    {
      "epoch": 3.803702467220466,
      "grad_norm": 0.2544553279876709,
      "learning_rate": 2.4083604040510537e-06,
      "loss": 0.0173,
      "step": 2324260
    },
    {
      "epoch": 3.803735197659119,
      "grad_norm": 0.09058673679828644,
      "learning_rate": 2.4082945118375364e-06,
      "loss": 0.0113,
      "step": 2324280
    },
    {
      "epoch": 3.8037679280977725,
      "grad_norm": 0.13262122869491577,
      "learning_rate": 2.408228619624019e-06,
      "loss": 0.0138,
      "step": 2324300
    },
    {
      "epoch": 3.8038006585364257,
      "grad_norm": 0.10423678159713745,
      "learning_rate": 2.4081627274105023e-06,
      "loss": 0.0076,
      "step": 2324320
    },
    {
      "epoch": 3.803833388975079,
      "grad_norm": 0.06591964513063431,
      "learning_rate": 2.408096835196985e-06,
      "loss": 0.0065,
      "step": 2324340
    },
    {
      "epoch": 3.8038661194137324,
      "grad_norm": 0.30652037262916565,
      "learning_rate": 2.408030942983468e-06,
      "loss": 0.0059,
      "step": 2324360
    },
    {
      "epoch": 3.8038988498523856,
      "grad_norm": 0.8712261319160461,
      "learning_rate": 2.4079650507699505e-06,
      "loss": 0.0106,
      "step": 2324380
    },
    {
      "epoch": 3.803931580291039,
      "grad_norm": 0.15247642993927002,
      "learning_rate": 2.4078991585564333e-06,
      "loss": 0.0103,
      "step": 2324400
    },
    {
      "epoch": 3.8039643107296923,
      "grad_norm": 0.360483318567276,
      "learning_rate": 2.4078332663429164e-06,
      "loss": 0.0104,
      "step": 2324420
    },
    {
      "epoch": 3.803997041168346,
      "grad_norm": 0.1523985117673874,
      "learning_rate": 2.4077673741293996e-06,
      "loss": 0.0083,
      "step": 2324440
    },
    {
      "epoch": 3.804029771606999,
      "grad_norm": 0.358585000038147,
      "learning_rate": 2.4077014819158823e-06,
      "loss": 0.0112,
      "step": 2324460
    },
    {
      "epoch": 3.804062502045652,
      "grad_norm": 0.3189743161201477,
      "learning_rate": 2.407635589702365e-06,
      "loss": 0.0104,
      "step": 2324480
    },
    {
      "epoch": 3.804095232484306,
      "grad_norm": 0.6250844597816467,
      "learning_rate": 2.407569697488848e-06,
      "loss": 0.0139,
      "step": 2324500
    },
    {
      "epoch": 3.804127962922959,
      "grad_norm": 0.28037312626838684,
      "learning_rate": 2.407503805275331e-06,
      "loss": 0.0111,
      "step": 2324520
    },
    {
      "epoch": 3.8041606933616126,
      "grad_norm": 0.07977104187011719,
      "learning_rate": 2.4074379130618137e-06,
      "loss": 0.0117,
      "step": 2324540
    },
    {
      "epoch": 3.8041934238002657,
      "grad_norm": 0.5521059632301331,
      "learning_rate": 2.4073720208482965e-06,
      "loss": 0.0086,
      "step": 2324560
    },
    {
      "epoch": 3.8042261542389193,
      "grad_norm": 0.06310203671455383,
      "learning_rate": 2.407306128634779e-06,
      "loss": 0.0082,
      "step": 2324580
    },
    {
      "epoch": 3.8042588846775724,
      "grad_norm": 0.1707829236984253,
      "learning_rate": 2.4072402364212624e-06,
      "loss": 0.0098,
      "step": 2324600
    },
    {
      "epoch": 3.8042916151162256,
      "grad_norm": 0.32700517773628235,
      "learning_rate": 2.407174344207745e-06,
      "loss": 0.0115,
      "step": 2324620
    },
    {
      "epoch": 3.804324345554879,
      "grad_norm": 0.36555710434913635,
      "learning_rate": 2.4071084519942283e-06,
      "loss": 0.0084,
      "step": 2324640
    },
    {
      "epoch": 3.8043570759935323,
      "grad_norm": 0.6425320506095886,
      "learning_rate": 2.407042559780711e-06,
      "loss": 0.0108,
      "step": 2324660
    },
    {
      "epoch": 3.804389806432186,
      "grad_norm": 0.4435916841030121,
      "learning_rate": 2.4069766675671938e-06,
      "loss": 0.0122,
      "step": 2324680
    },
    {
      "epoch": 3.804422536870839,
      "grad_norm": 0.17504028975963593,
      "learning_rate": 2.4069107753536765e-06,
      "loss": 0.008,
      "step": 2324700
    },
    {
      "epoch": 3.8044552673094927,
      "grad_norm": 0.10632355511188507,
      "learning_rate": 2.4068448831401597e-06,
      "loss": 0.0133,
      "step": 2324720
    },
    {
      "epoch": 3.804487997748146,
      "grad_norm": 0.22156137228012085,
      "learning_rate": 2.4067789909266424e-06,
      "loss": 0.0079,
      "step": 2324740
    },
    {
      "epoch": 3.804520728186799,
      "grad_norm": 0.5181034207344055,
      "learning_rate": 2.406713098713125e-06,
      "loss": 0.0116,
      "step": 2324760
    },
    {
      "epoch": 3.8045534586254526,
      "grad_norm": 0.4114243984222412,
      "learning_rate": 2.4066472064996083e-06,
      "loss": 0.0119,
      "step": 2324780
    },
    {
      "epoch": 3.8045861890641057,
      "grad_norm": 0.15347139537334442,
      "learning_rate": 2.406581314286091e-06,
      "loss": 0.0127,
      "step": 2324800
    },
    {
      "epoch": 3.8046189195027593,
      "grad_norm": 0.6788878440856934,
      "learning_rate": 2.4065154220725738e-06,
      "loss": 0.0088,
      "step": 2324820
    },
    {
      "epoch": 3.8046516499414125,
      "grad_norm": 0.18609380722045898,
      "learning_rate": 2.406449529859057e-06,
      "loss": 0.0088,
      "step": 2324840
    },
    {
      "epoch": 3.804684380380066,
      "grad_norm": 0.13229812681674957,
      "learning_rate": 2.4063836376455397e-06,
      "loss": 0.012,
      "step": 2324860
    },
    {
      "epoch": 3.804717110818719,
      "grad_norm": 0.42102259397506714,
      "learning_rate": 2.4063177454320224e-06,
      "loss": 0.0163,
      "step": 2324880
    },
    {
      "epoch": 3.8047498412573724,
      "grad_norm": 0.19554515182971954,
      "learning_rate": 2.406251853218505e-06,
      "loss": 0.0111,
      "step": 2324900
    },
    {
      "epoch": 3.804782571696026,
      "grad_norm": 0.18644671142101288,
      "learning_rate": 2.4061859610049883e-06,
      "loss": 0.0094,
      "step": 2324920
    },
    {
      "epoch": 3.804815302134679,
      "grad_norm": 0.13047416508197784,
      "learning_rate": 2.406120068791471e-06,
      "loss": 0.0092,
      "step": 2324940
    },
    {
      "epoch": 3.8048480325733327,
      "grad_norm": 0.07525795698165894,
      "learning_rate": 2.4060541765779542e-06,
      "loss": 0.0093,
      "step": 2324960
    },
    {
      "epoch": 3.804880763011986,
      "grad_norm": 0.19108623266220093,
      "learning_rate": 2.405988284364437e-06,
      "loss": 0.0148,
      "step": 2324980
    },
    {
      "epoch": 3.8049134934506395,
      "grad_norm": 0.34135904908180237,
      "learning_rate": 2.4059223921509197e-06,
      "loss": 0.0131,
      "step": 2325000
    },
    {
      "epoch": 3.8049462238892926,
      "grad_norm": 0.6449105739593506,
      "learning_rate": 2.4058564999374024e-06,
      "loss": 0.0066,
      "step": 2325020
    },
    {
      "epoch": 3.8049789543279458,
      "grad_norm": 0.2411920130252838,
      "learning_rate": 2.4057906077238856e-06,
      "loss": 0.015,
      "step": 2325040
    },
    {
      "epoch": 3.8050116847665993,
      "grad_norm": 0.06727391481399536,
      "learning_rate": 2.4057247155103683e-06,
      "loss": 0.0094,
      "step": 2325060
    },
    {
      "epoch": 3.8050444152052525,
      "grad_norm": 0.14732122421264648,
      "learning_rate": 2.405658823296851e-06,
      "loss": 0.0126,
      "step": 2325080
    },
    {
      "epoch": 3.8050771456439056,
      "grad_norm": 0.20964865386486053,
      "learning_rate": 2.405592931083334e-06,
      "loss": 0.0104,
      "step": 2325100
    },
    {
      "epoch": 3.8051098760825592,
      "grad_norm": 0.14835602045059204,
      "learning_rate": 2.405527038869817e-06,
      "loss": 0.0115,
      "step": 2325120
    },
    {
      "epoch": 3.805142606521213,
      "grad_norm": 0.42973265051841736,
      "learning_rate": 2.4054611466562997e-06,
      "loss": 0.0088,
      "step": 2325140
    },
    {
      "epoch": 3.805175336959866,
      "grad_norm": 0.14954321086406708,
      "learning_rate": 2.405395254442783e-06,
      "loss": 0.0074,
      "step": 2325160
    },
    {
      "epoch": 3.805208067398519,
      "grad_norm": 0.14288292825222015,
      "learning_rate": 2.4053293622292656e-06,
      "loss": 0.0112,
      "step": 2325180
    },
    {
      "epoch": 3.8052407978371727,
      "grad_norm": 0.16669391095638275,
      "learning_rate": 2.4052634700157484e-06,
      "loss": 0.0103,
      "step": 2325200
    },
    {
      "epoch": 3.805273528275826,
      "grad_norm": 0.2735497057437897,
      "learning_rate": 2.405197577802231e-06,
      "loss": 0.0115,
      "step": 2325220
    },
    {
      "epoch": 3.805306258714479,
      "grad_norm": 0.6559970378875732,
      "learning_rate": 2.4051316855887143e-06,
      "loss": 0.0101,
      "step": 2325240
    },
    {
      "epoch": 3.8053389891531326,
      "grad_norm": 0.4210783839225769,
      "learning_rate": 2.405065793375197e-06,
      "loss": 0.0101,
      "step": 2325260
    },
    {
      "epoch": 3.805371719591786,
      "grad_norm": 0.3791930377483368,
      "learning_rate": 2.4049999011616798e-06,
      "loss": 0.0067,
      "step": 2325280
    },
    {
      "epoch": 3.8054044500304394,
      "grad_norm": 0.21262486279010773,
      "learning_rate": 2.404934008948163e-06,
      "loss": 0.0117,
      "step": 2325300
    },
    {
      "epoch": 3.8054371804690925,
      "grad_norm": 0.13215704262256622,
      "learning_rate": 2.4048681167346457e-06,
      "loss": 0.015,
      "step": 2325320
    },
    {
      "epoch": 3.805469910907746,
      "grad_norm": 0.36360710859298706,
      "learning_rate": 2.4048022245211284e-06,
      "loss": 0.0077,
      "step": 2325340
    },
    {
      "epoch": 3.8055026413463993,
      "grad_norm": 0.13857600092887878,
      "learning_rate": 2.4047363323076116e-06,
      "loss": 0.0112,
      "step": 2325360
    },
    {
      "epoch": 3.8055353717850524,
      "grad_norm": 0.3611665666103363,
      "learning_rate": 2.4046704400940943e-06,
      "loss": 0.0086,
      "step": 2325380
    },
    {
      "epoch": 3.805568102223706,
      "grad_norm": 0.34007391333580017,
      "learning_rate": 2.404604547880577e-06,
      "loss": 0.0082,
      "step": 2325400
    },
    {
      "epoch": 3.8056008326623596,
      "grad_norm": 0.19695773720741272,
      "learning_rate": 2.4045386556670598e-06,
      "loss": 0.0099,
      "step": 2325420
    },
    {
      "epoch": 3.8056335631010128,
      "grad_norm": 0.20860828459262848,
      "learning_rate": 2.404472763453543e-06,
      "loss": 0.0109,
      "step": 2325440
    },
    {
      "epoch": 3.805666293539666,
      "grad_norm": 0.12359564006328583,
      "learning_rate": 2.4044068712400257e-06,
      "loss": 0.0121,
      "step": 2325460
    },
    {
      "epoch": 3.8056990239783195,
      "grad_norm": 0.322957843542099,
      "learning_rate": 2.404340979026509e-06,
      "loss": 0.0076,
      "step": 2325480
    },
    {
      "epoch": 3.8057317544169726,
      "grad_norm": 0.5111436247825623,
      "learning_rate": 2.4042750868129916e-06,
      "loss": 0.012,
      "step": 2325500
    },
    {
      "epoch": 3.805764484855626,
      "grad_norm": 0.28490185737609863,
      "learning_rate": 2.4042091945994743e-06,
      "loss": 0.0079,
      "step": 2325520
    },
    {
      "epoch": 3.8057972152942794,
      "grad_norm": 0.3205191493034363,
      "learning_rate": 2.404143302385957e-06,
      "loss": 0.009,
      "step": 2325540
    },
    {
      "epoch": 3.805829945732933,
      "grad_norm": 0.2865201532840729,
      "learning_rate": 2.4040774101724402e-06,
      "loss": 0.0077,
      "step": 2325560
    },
    {
      "epoch": 3.805862676171586,
      "grad_norm": 0.13018935918807983,
      "learning_rate": 2.404011517958923e-06,
      "loss": 0.0096,
      "step": 2325580
    },
    {
      "epoch": 3.8058954066102393,
      "grad_norm": 0.4243442118167877,
      "learning_rate": 2.4039456257454057e-06,
      "loss": 0.0119,
      "step": 2325600
    },
    {
      "epoch": 3.805928137048893,
      "grad_norm": 0.6520804762840271,
      "learning_rate": 2.4038797335318885e-06,
      "loss": 0.0101,
      "step": 2325620
    },
    {
      "epoch": 3.805960867487546,
      "grad_norm": 0.46478909254074097,
      "learning_rate": 2.4038138413183716e-06,
      "loss": 0.0103,
      "step": 2325640
    },
    {
      "epoch": 3.805993597926199,
      "grad_norm": 0.23016081750392914,
      "learning_rate": 2.4037479491048548e-06,
      "loss": 0.0121,
      "step": 2325660
    },
    {
      "epoch": 3.8060263283648528,
      "grad_norm": 0.3784906268119812,
      "learning_rate": 2.4036820568913375e-06,
      "loss": 0.0097,
      "step": 2325680
    },
    {
      "epoch": 3.806059058803506,
      "grad_norm": 0.1577877253293991,
      "learning_rate": 2.4036161646778203e-06,
      "loss": 0.0103,
      "step": 2325700
    },
    {
      "epoch": 3.8060917892421595,
      "grad_norm": 0.3642633557319641,
      "learning_rate": 2.403550272464303e-06,
      "loss": 0.0127,
      "step": 2325720
    },
    {
      "epoch": 3.8061245196808127,
      "grad_norm": 0.12669816613197327,
      "learning_rate": 2.403484380250786e-06,
      "loss": 0.0067,
      "step": 2325740
    },
    {
      "epoch": 3.8061572501194663,
      "grad_norm": 0.3408818542957306,
      "learning_rate": 2.403418488037269e-06,
      "loss": 0.0138,
      "step": 2325760
    },
    {
      "epoch": 3.8061899805581194,
      "grad_norm": 0.5944621562957764,
      "learning_rate": 2.4033525958237516e-06,
      "loss": 0.0117,
      "step": 2325780
    },
    {
      "epoch": 3.8062227109967726,
      "grad_norm": 0.2997450530529022,
      "learning_rate": 2.4032867036102344e-06,
      "loss": 0.0084,
      "step": 2325800
    },
    {
      "epoch": 3.806255441435426,
      "grad_norm": 0.16640529036521912,
      "learning_rate": 2.403220811396717e-06,
      "loss": 0.0101,
      "step": 2325820
    },
    {
      "epoch": 3.8062881718740793,
      "grad_norm": 0.27354666590690613,
      "learning_rate": 2.4031549191832003e-06,
      "loss": 0.0125,
      "step": 2325840
    },
    {
      "epoch": 3.806320902312733,
      "grad_norm": 1.2627497911453247,
      "learning_rate": 2.4030890269696834e-06,
      "loss": 0.0126,
      "step": 2325860
    },
    {
      "epoch": 3.806353632751386,
      "grad_norm": 0.13429439067840576,
      "learning_rate": 2.403023134756166e-06,
      "loss": 0.0131,
      "step": 2325880
    },
    {
      "epoch": 3.8063863631900396,
      "grad_norm": 0.3444347679615021,
      "learning_rate": 2.402957242542649e-06,
      "loss": 0.0174,
      "step": 2325900
    },
    {
      "epoch": 3.806419093628693,
      "grad_norm": 0.210514098405838,
      "learning_rate": 2.4028913503291317e-06,
      "loss": 0.0133,
      "step": 2325920
    },
    {
      "epoch": 3.806451824067346,
      "grad_norm": 0.15534666180610657,
      "learning_rate": 2.402825458115615e-06,
      "loss": 0.0092,
      "step": 2325940
    },
    {
      "epoch": 3.8064845545059995,
      "grad_norm": 0.5295615792274475,
      "learning_rate": 2.4027595659020976e-06,
      "loss": 0.0115,
      "step": 2325960
    },
    {
      "epoch": 3.8065172849446527,
      "grad_norm": 0.20836974680423737,
      "learning_rate": 2.4026936736885803e-06,
      "loss": 0.0063,
      "step": 2325980
    },
    {
      "epoch": 3.8065500153833063,
      "grad_norm": 0.15117764472961426,
      "learning_rate": 2.4026277814750635e-06,
      "loss": 0.0157,
      "step": 2326000
    },
    {
      "epoch": 3.8065827458219594,
      "grad_norm": 0.12444281578063965,
      "learning_rate": 2.402561889261546e-06,
      "loss": 0.0069,
      "step": 2326020
    },
    {
      "epoch": 3.806615476260613,
      "grad_norm": 0.17310957610607147,
      "learning_rate": 2.402495997048029e-06,
      "loss": 0.0144,
      "step": 2326040
    },
    {
      "epoch": 3.806648206699266,
      "grad_norm": 0.13464243710041046,
      "learning_rate": 2.402430104834512e-06,
      "loss": 0.0176,
      "step": 2326060
    },
    {
      "epoch": 3.8066809371379193,
      "grad_norm": 0.412977933883667,
      "learning_rate": 2.402364212620995e-06,
      "loss": 0.0119,
      "step": 2326080
    },
    {
      "epoch": 3.806713667576573,
      "grad_norm": 0.117108553647995,
      "learning_rate": 2.4022983204074776e-06,
      "loss": 0.0086,
      "step": 2326100
    },
    {
      "epoch": 3.806746398015226,
      "grad_norm": 0.20498183369636536,
      "learning_rate": 2.4022324281939603e-06,
      "loss": 0.0105,
      "step": 2326120
    },
    {
      "epoch": 3.8067791284538797,
      "grad_norm": 0.26642200350761414,
      "learning_rate": 2.4021665359804435e-06,
      "loss": 0.0104,
      "step": 2326140
    },
    {
      "epoch": 3.806811858892533,
      "grad_norm": 0.2445560246706009,
      "learning_rate": 2.4021006437669262e-06,
      "loss": 0.0122,
      "step": 2326160
    },
    {
      "epoch": 3.8068445893311864,
      "grad_norm": 0.12383993715047836,
      "learning_rate": 2.4020347515534094e-06,
      "loss": 0.0112,
      "step": 2326180
    },
    {
      "epoch": 3.8068773197698396,
      "grad_norm": 0.11290700733661652,
      "learning_rate": 2.401968859339892e-06,
      "loss": 0.0081,
      "step": 2326200
    },
    {
      "epoch": 3.8069100502084927,
      "grad_norm": 0.30727145075798035,
      "learning_rate": 2.401902967126375e-06,
      "loss": 0.0099,
      "step": 2326220
    },
    {
      "epoch": 3.8069427806471463,
      "grad_norm": 0.4778454005718231,
      "learning_rate": 2.4018370749128576e-06,
      "loss": 0.011,
      "step": 2326240
    },
    {
      "epoch": 3.8069755110857995,
      "grad_norm": 0.22771158814430237,
      "learning_rate": 2.4017711826993408e-06,
      "loss": 0.0128,
      "step": 2326260
    },
    {
      "epoch": 3.807008241524453,
      "grad_norm": 0.22208347916603088,
      "learning_rate": 2.4017052904858235e-06,
      "loss": 0.0143,
      "step": 2326280
    },
    {
      "epoch": 3.807040971963106,
      "grad_norm": 0.196149542927742,
      "learning_rate": 2.4016393982723063e-06,
      "loss": 0.006,
      "step": 2326300
    },
    {
      "epoch": 3.80707370240176,
      "grad_norm": 0.4296480715274811,
      "learning_rate": 2.401573506058789e-06,
      "loss": 0.0108,
      "step": 2326320
    },
    {
      "epoch": 3.807106432840413,
      "grad_norm": 0.18327879905700684,
      "learning_rate": 2.401507613845272e-06,
      "loss": 0.0078,
      "step": 2326340
    },
    {
      "epoch": 3.807139163279066,
      "grad_norm": 0.49475499987602234,
      "learning_rate": 2.401441721631755e-06,
      "loss": 0.0136,
      "step": 2326360
    },
    {
      "epoch": 3.8071718937177197,
      "grad_norm": 0.5979270339012146,
      "learning_rate": 2.401375829418238e-06,
      "loss": 0.0142,
      "step": 2326380
    },
    {
      "epoch": 3.807204624156373,
      "grad_norm": 0.5173987746238708,
      "learning_rate": 2.401309937204721e-06,
      "loss": 0.0099,
      "step": 2326400
    },
    {
      "epoch": 3.8072373545950264,
      "grad_norm": 0.08130297809839249,
      "learning_rate": 2.4012440449912035e-06,
      "loss": 0.0103,
      "step": 2326420
    },
    {
      "epoch": 3.8072700850336796,
      "grad_norm": 0.37894758582115173,
      "learning_rate": 2.4011781527776863e-06,
      "loss": 0.012,
      "step": 2326440
    },
    {
      "epoch": 3.807302815472333,
      "grad_norm": 0.125301331281662,
      "learning_rate": 2.4011122605641694e-06,
      "loss": 0.0092,
      "step": 2326460
    },
    {
      "epoch": 3.8073355459109863,
      "grad_norm": 0.1897556632757187,
      "learning_rate": 2.401046368350652e-06,
      "loss": 0.0107,
      "step": 2326480
    },
    {
      "epoch": 3.8073682763496395,
      "grad_norm": 0.4898206889629364,
      "learning_rate": 2.400980476137135e-06,
      "loss": 0.0124,
      "step": 2326500
    },
    {
      "epoch": 3.807401006788293,
      "grad_norm": 0.1272701472043991,
      "learning_rate": 2.4009145839236177e-06,
      "loss": 0.0115,
      "step": 2326520
    },
    {
      "epoch": 3.8074337372269462,
      "grad_norm": 0.2602772116661072,
      "learning_rate": 2.400848691710101e-06,
      "loss": 0.014,
      "step": 2326540
    },
    {
      "epoch": 3.8074664676656,
      "grad_norm": 0.1328025609254837,
      "learning_rate": 2.4007827994965836e-06,
      "loss": 0.0128,
      "step": 2326560
    },
    {
      "epoch": 3.807499198104253,
      "grad_norm": 0.2327296882867813,
      "learning_rate": 2.4007169072830667e-06,
      "loss": 0.0108,
      "step": 2326580
    },
    {
      "epoch": 3.8075319285429066,
      "grad_norm": 0.092508964240551,
      "learning_rate": 2.4006510150695495e-06,
      "loss": 0.0064,
      "step": 2326600
    },
    {
      "epoch": 3.8075646589815597,
      "grad_norm": 0.6004506349563599,
      "learning_rate": 2.4005851228560322e-06,
      "loss": 0.0112,
      "step": 2326620
    },
    {
      "epoch": 3.807597389420213,
      "grad_norm": 0.3268704414367676,
      "learning_rate": 2.400519230642515e-06,
      "loss": 0.0119,
      "step": 2326640
    },
    {
      "epoch": 3.8076301198588665,
      "grad_norm": 0.10424531996250153,
      "learning_rate": 2.400453338428998e-06,
      "loss": 0.0088,
      "step": 2326660
    },
    {
      "epoch": 3.8076628502975196,
      "grad_norm": 0.4045582115650177,
      "learning_rate": 2.400387446215481e-06,
      "loss": 0.0125,
      "step": 2326680
    },
    {
      "epoch": 3.8076955807361728,
      "grad_norm": 0.1655297726392746,
      "learning_rate": 2.4003215540019636e-06,
      "loss": 0.0137,
      "step": 2326700
    },
    {
      "epoch": 3.8077283111748264,
      "grad_norm": 0.07236776500940323,
      "learning_rate": 2.4002556617884468e-06,
      "loss": 0.0116,
      "step": 2326720
    },
    {
      "epoch": 3.80776104161348,
      "grad_norm": 0.04658091440796852,
      "learning_rate": 2.4001897695749295e-06,
      "loss": 0.013,
      "step": 2326740
    },
    {
      "epoch": 3.807793772052133,
      "grad_norm": 0.3359077274799347,
      "learning_rate": 2.4001238773614122e-06,
      "loss": 0.011,
      "step": 2326760
    },
    {
      "epoch": 3.8078265024907862,
      "grad_norm": 0.7347129583358765,
      "learning_rate": 2.4000579851478954e-06,
      "loss": 0.0115,
      "step": 2326780
    },
    {
      "epoch": 3.80785923292944,
      "grad_norm": 0.11824941635131836,
      "learning_rate": 2.399992092934378e-06,
      "loss": 0.0119,
      "step": 2326800
    },
    {
      "epoch": 3.807891963368093,
      "grad_norm": 0.30081629753112793,
      "learning_rate": 2.399926200720861e-06,
      "loss": 0.0099,
      "step": 2326820
    },
    {
      "epoch": 3.807924693806746,
      "grad_norm": 0.5188764333724976,
      "learning_rate": 2.3998603085073436e-06,
      "loss": 0.0063,
      "step": 2326840
    },
    {
      "epoch": 3.8079574242453997,
      "grad_norm": 0.26323166489601135,
      "learning_rate": 2.3997944162938268e-06,
      "loss": 0.012,
      "step": 2326860
    },
    {
      "epoch": 3.8079901546840533,
      "grad_norm": 0.22382326424121857,
      "learning_rate": 2.39972852408031e-06,
      "loss": 0.0069,
      "step": 2326880
    },
    {
      "epoch": 3.8080228851227065,
      "grad_norm": 0.47255972027778625,
      "learning_rate": 2.3996626318667927e-06,
      "loss": 0.0131,
      "step": 2326900
    },
    {
      "epoch": 3.8080556155613596,
      "grad_norm": 0.12577177584171295,
      "learning_rate": 2.3995967396532754e-06,
      "loss": 0.0087,
      "step": 2326920
    },
    {
      "epoch": 3.8080883460000132,
      "grad_norm": 0.4677582085132599,
      "learning_rate": 2.399530847439758e-06,
      "loss": 0.0099,
      "step": 2326940
    },
    {
      "epoch": 3.8081210764386664,
      "grad_norm": 0.11949045956134796,
      "learning_rate": 2.3994649552262413e-06,
      "loss": 0.0088,
      "step": 2326960
    },
    {
      "epoch": 3.8081538068773195,
      "grad_norm": 0.3524439334869385,
      "learning_rate": 2.399399063012724e-06,
      "loss": 0.01,
      "step": 2326980
    },
    {
      "epoch": 3.808186537315973,
      "grad_norm": 0.22558242082595825,
      "learning_rate": 2.399333170799207e-06,
      "loss": 0.0113,
      "step": 2327000
    },
    {
      "epoch": 3.8082192677546267,
      "grad_norm": 0.13460709154605865,
      "learning_rate": 2.3992672785856895e-06,
      "loss": 0.0077,
      "step": 2327020
    },
    {
      "epoch": 3.80825199819328,
      "grad_norm": 0.21465842425823212,
      "learning_rate": 2.3992013863721723e-06,
      "loss": 0.0145,
      "step": 2327040
    },
    {
      "epoch": 3.808284728631933,
      "grad_norm": 0.1393047422170639,
      "learning_rate": 2.3991354941586555e-06,
      "loss": 0.0173,
      "step": 2327060
    },
    {
      "epoch": 3.8083174590705866,
      "grad_norm": 0.17431867122650146,
      "learning_rate": 2.3990696019451386e-06,
      "loss": 0.0118,
      "step": 2327080
    },
    {
      "epoch": 3.8083501895092398,
      "grad_norm": 0.143876850605011,
      "learning_rate": 2.3990037097316214e-06,
      "loss": 0.0098,
      "step": 2327100
    },
    {
      "epoch": 3.808382919947893,
      "grad_norm": 0.6961622834205627,
      "learning_rate": 2.398937817518104e-06,
      "loss": 0.0114,
      "step": 2327120
    },
    {
      "epoch": 3.8084156503865465,
      "grad_norm": 0.1568099558353424,
      "learning_rate": 2.398871925304587e-06,
      "loss": 0.0096,
      "step": 2327140
    },
    {
      "epoch": 3.8084483808251997,
      "grad_norm": 0.10742118954658508,
      "learning_rate": 2.39880603309107e-06,
      "loss": 0.0159,
      "step": 2327160
    },
    {
      "epoch": 3.8084811112638532,
      "grad_norm": 0.2698463797569275,
      "learning_rate": 2.3987401408775527e-06,
      "loss": 0.0136,
      "step": 2327180
    },
    {
      "epoch": 3.8085138417025064,
      "grad_norm": 0.20382346212863922,
      "learning_rate": 2.3986742486640355e-06,
      "loss": 0.0137,
      "step": 2327200
    },
    {
      "epoch": 3.80854657214116,
      "grad_norm": 0.3492071330547333,
      "learning_rate": 2.3986083564505182e-06,
      "loss": 0.0106,
      "step": 2327220
    },
    {
      "epoch": 3.808579302579813,
      "grad_norm": 0.17072360217571259,
      "learning_rate": 2.3985424642370014e-06,
      "loss": 0.0103,
      "step": 2327240
    },
    {
      "epoch": 3.8086120330184663,
      "grad_norm": 0.17129074037075043,
      "learning_rate": 2.398476572023484e-06,
      "loss": 0.0125,
      "step": 2327260
    },
    {
      "epoch": 3.80864476345712,
      "grad_norm": 0.76336669921875,
      "learning_rate": 2.3984106798099673e-06,
      "loss": 0.0109,
      "step": 2327280
    },
    {
      "epoch": 3.808677493895773,
      "grad_norm": 0.40764299035072327,
      "learning_rate": 2.39834478759645e-06,
      "loss": 0.0137,
      "step": 2327300
    },
    {
      "epoch": 3.8087102243344266,
      "grad_norm": 0.16236846148967743,
      "learning_rate": 2.3982788953829328e-06,
      "loss": 0.0112,
      "step": 2327320
    },
    {
      "epoch": 3.80874295477308,
      "grad_norm": 0.039503347128629684,
      "learning_rate": 2.3982130031694155e-06,
      "loss": 0.008,
      "step": 2327340
    },
    {
      "epoch": 3.8087756852117334,
      "grad_norm": 0.292926162481308,
      "learning_rate": 2.3981471109558987e-06,
      "loss": 0.0069,
      "step": 2327360
    },
    {
      "epoch": 3.8088084156503865,
      "grad_norm": 0.2667239010334015,
      "learning_rate": 2.3980812187423814e-06,
      "loss": 0.0113,
      "step": 2327380
    },
    {
      "epoch": 3.8088411460890397,
      "grad_norm": 0.21026834845542908,
      "learning_rate": 2.398015326528864e-06,
      "loss": 0.0103,
      "step": 2327400
    },
    {
      "epoch": 3.8088738765276933,
      "grad_norm": 0.40401309728622437,
      "learning_rate": 2.3979494343153473e-06,
      "loss": 0.0101,
      "step": 2327420
    },
    {
      "epoch": 3.8089066069663464,
      "grad_norm": 0.16680137813091278,
      "learning_rate": 2.39788354210183e-06,
      "loss": 0.0094,
      "step": 2327440
    },
    {
      "epoch": 3.808939337405,
      "grad_norm": 0.2908364236354828,
      "learning_rate": 2.397817649888313e-06,
      "loss": 0.0097,
      "step": 2327460
    },
    {
      "epoch": 3.808972067843653,
      "grad_norm": 0.3264826834201813,
      "learning_rate": 2.397751757674796e-06,
      "loss": 0.0112,
      "step": 2327480
    },
    {
      "epoch": 3.8090047982823068,
      "grad_norm": 0.26582852005958557,
      "learning_rate": 2.3976858654612787e-06,
      "loss": 0.0132,
      "step": 2327500
    },
    {
      "epoch": 3.80903752872096,
      "grad_norm": 0.16376914083957672,
      "learning_rate": 2.3976199732477614e-06,
      "loss": 0.0081,
      "step": 2327520
    },
    {
      "epoch": 3.809070259159613,
      "grad_norm": 0.18936385214328766,
      "learning_rate": 2.397554081034244e-06,
      "loss": 0.0064,
      "step": 2327540
    },
    {
      "epoch": 3.8091029895982667,
      "grad_norm": 0.9379075765609741,
      "learning_rate": 2.3974881888207273e-06,
      "loss": 0.0136,
      "step": 2327560
    },
    {
      "epoch": 3.80913572003692,
      "grad_norm": 0.6712864637374878,
      "learning_rate": 2.39742229660721e-06,
      "loss": 0.0153,
      "step": 2327580
    },
    {
      "epoch": 3.8091684504755734,
      "grad_norm": 0.34196388721466064,
      "learning_rate": 2.3973564043936932e-06,
      "loss": 0.0123,
      "step": 2327600
    },
    {
      "epoch": 3.8092011809142265,
      "grad_norm": 0.07109608501195908,
      "learning_rate": 2.397290512180176e-06,
      "loss": 0.0117,
      "step": 2327620
    },
    {
      "epoch": 3.80923391135288,
      "grad_norm": 0.12933151423931122,
      "learning_rate": 2.3972246199666587e-06,
      "loss": 0.0167,
      "step": 2327640
    },
    {
      "epoch": 3.8092666417915333,
      "grad_norm": 0.3531496226787567,
      "learning_rate": 2.3971587277531415e-06,
      "loss": 0.0102,
      "step": 2327660
    },
    {
      "epoch": 3.8092993722301864,
      "grad_norm": 0.7685215473175049,
      "learning_rate": 2.3970928355396246e-06,
      "loss": 0.0094,
      "step": 2327680
    },
    {
      "epoch": 3.80933210266884,
      "grad_norm": 0.7731313109397888,
      "learning_rate": 2.3970269433261074e-06,
      "loss": 0.0118,
      "step": 2327700
    },
    {
      "epoch": 3.809364833107493,
      "grad_norm": 0.6329684853553772,
      "learning_rate": 2.39696105111259e-06,
      "loss": 0.0097,
      "step": 2327720
    },
    {
      "epoch": 3.809397563546147,
      "grad_norm": 0.028565960004925728,
      "learning_rate": 2.396895158899073e-06,
      "loss": 0.0052,
      "step": 2327740
    },
    {
      "epoch": 3.8094302939848,
      "grad_norm": 0.1579807698726654,
      "learning_rate": 2.396829266685556e-06,
      "loss": 0.0107,
      "step": 2327760
    },
    {
      "epoch": 3.8094630244234535,
      "grad_norm": 0.25592079758644104,
      "learning_rate": 2.3967633744720387e-06,
      "loss": 0.0112,
      "step": 2327780
    },
    {
      "epoch": 3.8094957548621067,
      "grad_norm": 0.1243775263428688,
      "learning_rate": 2.396697482258522e-06,
      "loss": 0.0102,
      "step": 2327800
    },
    {
      "epoch": 3.80952848530076,
      "grad_norm": 0.07564687728881836,
      "learning_rate": 2.3966315900450046e-06,
      "loss": 0.0085,
      "step": 2327820
    },
    {
      "epoch": 3.8095612157394134,
      "grad_norm": 0.3046620190143585,
      "learning_rate": 2.3965656978314874e-06,
      "loss": 0.0121,
      "step": 2327840
    },
    {
      "epoch": 3.8095939461780666,
      "grad_norm": 0.26929447054862976,
      "learning_rate": 2.39649980561797e-06,
      "loss": 0.0095,
      "step": 2327860
    },
    {
      "epoch": 3.80962667661672,
      "grad_norm": 0.15531055629253387,
      "learning_rate": 2.3964339134044533e-06,
      "loss": 0.0137,
      "step": 2327880
    },
    {
      "epoch": 3.8096594070553733,
      "grad_norm": 0.5145413875579834,
      "learning_rate": 2.396368021190936e-06,
      "loss": 0.0089,
      "step": 2327900
    },
    {
      "epoch": 3.809692137494027,
      "grad_norm": 0.13408000767230988,
      "learning_rate": 2.3963021289774188e-06,
      "loss": 0.0122,
      "step": 2327920
    },
    {
      "epoch": 3.80972486793268,
      "grad_norm": 0.1850317269563675,
      "learning_rate": 2.396236236763902e-06,
      "loss": 0.0135,
      "step": 2327940
    },
    {
      "epoch": 3.809757598371333,
      "grad_norm": 0.3490045964717865,
      "learning_rate": 2.3961703445503847e-06,
      "loss": 0.0073,
      "step": 2327960
    },
    {
      "epoch": 3.809790328809987,
      "grad_norm": 0.16207438707351685,
      "learning_rate": 2.3961044523368674e-06,
      "loss": 0.0107,
      "step": 2327980
    },
    {
      "epoch": 3.80982305924864,
      "grad_norm": 0.21341337263584137,
      "learning_rate": 2.3960385601233506e-06,
      "loss": 0.0124,
      "step": 2328000
    },
    {
      "epoch": 3.8098557896872935,
      "grad_norm": 0.10373970866203308,
      "learning_rate": 2.3959726679098333e-06,
      "loss": 0.0101,
      "step": 2328020
    },
    {
      "epoch": 3.8098885201259467,
      "grad_norm": 0.19548889994621277,
      "learning_rate": 2.395906775696316e-06,
      "loss": 0.0116,
      "step": 2328040
    },
    {
      "epoch": 3.8099212505646003,
      "grad_norm": 0.10449133813381195,
      "learning_rate": 2.395840883482799e-06,
      "loss": 0.0104,
      "step": 2328060
    },
    {
      "epoch": 3.8099539810032534,
      "grad_norm": 0.21423715353012085,
      "learning_rate": 2.395774991269282e-06,
      "loss": 0.0148,
      "step": 2328080
    },
    {
      "epoch": 3.8099867114419066,
      "grad_norm": 0.13599878549575806,
      "learning_rate": 2.3957090990557647e-06,
      "loss": 0.0121,
      "step": 2328100
    },
    {
      "epoch": 3.81001944188056,
      "grad_norm": 0.08151999115943909,
      "learning_rate": 2.395643206842248e-06,
      "loss": 0.0067,
      "step": 2328120
    },
    {
      "epoch": 3.8100521723192133,
      "grad_norm": 0.6122006773948669,
      "learning_rate": 2.3955773146287306e-06,
      "loss": 0.0166,
      "step": 2328140
    },
    {
      "epoch": 3.8100849027578665,
      "grad_norm": 0.38083741068840027,
      "learning_rate": 2.3955114224152133e-06,
      "loss": 0.0121,
      "step": 2328160
    },
    {
      "epoch": 3.81011763319652,
      "grad_norm": 0.35958191752433777,
      "learning_rate": 2.395445530201696e-06,
      "loss": 0.019,
      "step": 2328180
    },
    {
      "epoch": 3.8101503636351737,
      "grad_norm": 0.40668419003486633,
      "learning_rate": 2.3953796379881792e-06,
      "loss": 0.0096,
      "step": 2328200
    },
    {
      "epoch": 3.810183094073827,
      "grad_norm": 0.6548355221748352,
      "learning_rate": 2.395313745774662e-06,
      "loss": 0.0129,
      "step": 2328220
    },
    {
      "epoch": 3.81021582451248,
      "grad_norm": 0.19054964184761047,
      "learning_rate": 2.3952478535611447e-06,
      "loss": 0.01,
      "step": 2328240
    },
    {
      "epoch": 3.8102485549511336,
      "grad_norm": 0.32077932357788086,
      "learning_rate": 2.3951819613476275e-06,
      "loss": 0.0079,
      "step": 2328260
    },
    {
      "epoch": 3.8102812853897867,
      "grad_norm": 0.26997295022010803,
      "learning_rate": 2.3951160691341106e-06,
      "loss": 0.0175,
      "step": 2328280
    },
    {
      "epoch": 3.81031401582844,
      "grad_norm": 0.29041215777397156,
      "learning_rate": 2.3950501769205938e-06,
      "loss": 0.0119,
      "step": 2328300
    },
    {
      "epoch": 3.8103467462670935,
      "grad_norm": 0.7688356041908264,
      "learning_rate": 2.3949842847070765e-06,
      "loss": 0.0132,
      "step": 2328320
    },
    {
      "epoch": 3.810379476705747,
      "grad_norm": 0.2391490787267685,
      "learning_rate": 2.3949183924935593e-06,
      "loss": 0.0107,
      "step": 2328340
    },
    {
      "epoch": 3.8104122071444,
      "grad_norm": 0.3152989447116852,
      "learning_rate": 2.394852500280042e-06,
      "loss": 0.01,
      "step": 2328360
    },
    {
      "epoch": 3.8104449375830534,
      "grad_norm": 0.40558528900146484,
      "learning_rate": 2.394786608066525e-06,
      "loss": 0.0149,
      "step": 2328380
    },
    {
      "epoch": 3.810477668021707,
      "grad_norm": 0.8386488556861877,
      "learning_rate": 2.394720715853008e-06,
      "loss": 0.0149,
      "step": 2328400
    },
    {
      "epoch": 3.81051039846036,
      "grad_norm": 0.3673747777938843,
      "learning_rate": 2.3946548236394906e-06,
      "loss": 0.01,
      "step": 2328420
    },
    {
      "epoch": 3.8105431288990133,
      "grad_norm": 0.3609412610530853,
      "learning_rate": 2.3945889314259734e-06,
      "loss": 0.0142,
      "step": 2328440
    },
    {
      "epoch": 3.810575859337667,
      "grad_norm": 0.2273128628730774,
      "learning_rate": 2.394523039212456e-06,
      "loss": 0.0107,
      "step": 2328460
    },
    {
      "epoch": 3.8106085897763204,
      "grad_norm": 0.1458529531955719,
      "learning_rate": 2.3944571469989393e-06,
      "loss": 0.0086,
      "step": 2328480
    },
    {
      "epoch": 3.8106413202149736,
      "grad_norm": 0.15686088800430298,
      "learning_rate": 2.3943912547854225e-06,
      "loss": 0.0106,
      "step": 2328500
    },
    {
      "epoch": 3.8106740506536267,
      "grad_norm": 0.10191567242145538,
      "learning_rate": 2.394325362571905e-06,
      "loss": 0.0102,
      "step": 2328520
    },
    {
      "epoch": 3.8107067810922803,
      "grad_norm": 0.5322715640068054,
      "learning_rate": 2.394259470358388e-06,
      "loss": 0.0122,
      "step": 2328540
    },
    {
      "epoch": 3.8107395115309335,
      "grad_norm": 0.33354365825653076,
      "learning_rate": 2.3941935781448707e-06,
      "loss": 0.0103,
      "step": 2328560
    },
    {
      "epoch": 3.8107722419695866,
      "grad_norm": 0.23744742572307587,
      "learning_rate": 2.394127685931354e-06,
      "loss": 0.0125,
      "step": 2328580
    },
    {
      "epoch": 3.8108049724082402,
      "grad_norm": 0.41456177830696106,
      "learning_rate": 2.3940617937178366e-06,
      "loss": 0.009,
      "step": 2328600
    },
    {
      "epoch": 3.810837702846894,
      "grad_norm": 0.1372821182012558,
      "learning_rate": 2.3939959015043193e-06,
      "loss": 0.018,
      "step": 2328620
    },
    {
      "epoch": 3.810870433285547,
      "grad_norm": 0.18433144688606262,
      "learning_rate": 2.3939300092908025e-06,
      "loss": 0.0088,
      "step": 2328640
    },
    {
      "epoch": 3.8109031637242,
      "grad_norm": 0.30153340101242065,
      "learning_rate": 2.3938641170772852e-06,
      "loss": 0.0125,
      "step": 2328660
    },
    {
      "epoch": 3.8109358941628537,
      "grad_norm": 0.16749544441699982,
      "learning_rate": 2.393798224863768e-06,
      "loss": 0.0114,
      "step": 2328680
    },
    {
      "epoch": 3.810968624601507,
      "grad_norm": 0.23041585087776184,
      "learning_rate": 2.393732332650251e-06,
      "loss": 0.0115,
      "step": 2328700
    },
    {
      "epoch": 3.81100135504016,
      "grad_norm": 0.13540059328079224,
      "learning_rate": 2.393666440436734e-06,
      "loss": 0.0122,
      "step": 2328720
    },
    {
      "epoch": 3.8110340854788136,
      "grad_norm": 0.590387225151062,
      "learning_rate": 2.3936005482232166e-06,
      "loss": 0.0087,
      "step": 2328740
    },
    {
      "epoch": 3.8110668159174668,
      "grad_norm": 0.09552557021379471,
      "learning_rate": 2.3935346560096993e-06,
      "loss": 0.0077,
      "step": 2328760
    },
    {
      "epoch": 3.8110995463561204,
      "grad_norm": 0.2068081498146057,
      "learning_rate": 2.3934687637961825e-06,
      "loss": 0.0101,
      "step": 2328780
    },
    {
      "epoch": 3.8111322767947735,
      "grad_norm": 0.6192365884780884,
      "learning_rate": 2.3934028715826652e-06,
      "loss": 0.0144,
      "step": 2328800
    },
    {
      "epoch": 3.811165007233427,
      "grad_norm": 0.046623483300209045,
      "learning_rate": 2.3933369793691484e-06,
      "loss": 0.0073,
      "step": 2328820
    },
    {
      "epoch": 3.8111977376720803,
      "grad_norm": 0.11378554999828339,
      "learning_rate": 2.393271087155631e-06,
      "loss": 0.0103,
      "step": 2328840
    },
    {
      "epoch": 3.8112304681107334,
      "grad_norm": 0.15243929624557495,
      "learning_rate": 2.393205194942114e-06,
      "loss": 0.006,
      "step": 2328860
    },
    {
      "epoch": 3.811263198549387,
      "grad_norm": 0.09981781244277954,
      "learning_rate": 2.3931393027285966e-06,
      "loss": 0.0066,
      "step": 2328880
    },
    {
      "epoch": 3.81129592898804,
      "grad_norm": 0.22217844426631927,
      "learning_rate": 2.39307341051508e-06,
      "loss": 0.0122,
      "step": 2328900
    },
    {
      "epoch": 3.8113286594266937,
      "grad_norm": 0.12696048617362976,
      "learning_rate": 2.3930075183015625e-06,
      "loss": 0.0101,
      "step": 2328920
    },
    {
      "epoch": 3.811361389865347,
      "grad_norm": 0.28015923500061035,
      "learning_rate": 2.3929416260880453e-06,
      "loss": 0.0105,
      "step": 2328940
    },
    {
      "epoch": 3.8113941203040005,
      "grad_norm": 0.12220320850610733,
      "learning_rate": 2.392875733874528e-06,
      "loss": 0.0116,
      "step": 2328960
    },
    {
      "epoch": 3.8114268507426536,
      "grad_norm": 0.08990589529275894,
      "learning_rate": 2.392809841661011e-06,
      "loss": 0.0067,
      "step": 2328980
    },
    {
      "epoch": 3.811459581181307,
      "grad_norm": 0.35519900918006897,
      "learning_rate": 2.392743949447494e-06,
      "loss": 0.0095,
      "step": 2329000
    },
    {
      "epoch": 3.8114923116199604,
      "grad_norm": 0.1161021962761879,
      "learning_rate": 2.392678057233977e-06,
      "loss": 0.0083,
      "step": 2329020
    },
    {
      "epoch": 3.8115250420586135,
      "grad_norm": 0.4645858407020569,
      "learning_rate": 2.39261216502046e-06,
      "loss": 0.017,
      "step": 2329040
    },
    {
      "epoch": 3.811557772497267,
      "grad_norm": 0.07109073549509048,
      "learning_rate": 2.3925462728069426e-06,
      "loss": 0.0119,
      "step": 2329060
    },
    {
      "epoch": 3.8115905029359203,
      "grad_norm": 0.2220991998910904,
      "learning_rate": 2.3924803805934253e-06,
      "loss": 0.0098,
      "step": 2329080
    },
    {
      "epoch": 3.811623233374574,
      "grad_norm": 0.14764532446861267,
      "learning_rate": 2.3924144883799085e-06,
      "loss": 0.0091,
      "step": 2329100
    },
    {
      "epoch": 3.811655963813227,
      "grad_norm": 0.20253635942935944,
      "learning_rate": 2.392348596166391e-06,
      "loss": 0.0108,
      "step": 2329120
    },
    {
      "epoch": 3.81168869425188,
      "grad_norm": 0.12105692923069,
      "learning_rate": 2.392282703952874e-06,
      "loss": 0.0065,
      "step": 2329140
    },
    {
      "epoch": 3.8117214246905338,
      "grad_norm": 0.1203741729259491,
      "learning_rate": 2.3922168117393567e-06,
      "loss": 0.0087,
      "step": 2329160
    },
    {
      "epoch": 3.811754155129187,
      "grad_norm": 0.17511481046676636,
      "learning_rate": 2.39215091952584e-06,
      "loss": 0.0117,
      "step": 2329180
    },
    {
      "epoch": 3.8117868855678405,
      "grad_norm": 0.2813825309276581,
      "learning_rate": 2.3920850273123226e-06,
      "loss": 0.0087,
      "step": 2329200
    },
    {
      "epoch": 3.8118196160064937,
      "grad_norm": 0.22944001853466034,
      "learning_rate": 2.3920191350988057e-06,
      "loss": 0.0088,
      "step": 2329220
    },
    {
      "epoch": 3.8118523464451473,
      "grad_norm": 0.5080893039703369,
      "learning_rate": 2.3919532428852885e-06,
      "loss": 0.0148,
      "step": 2329240
    },
    {
      "epoch": 3.8118850768838004,
      "grad_norm": 0.08478738367557526,
      "learning_rate": 2.3918873506717712e-06,
      "loss": 0.0092,
      "step": 2329260
    },
    {
      "epoch": 3.8119178073224536,
      "grad_norm": 0.2088521420955658,
      "learning_rate": 2.391821458458254e-06,
      "loss": 0.0085,
      "step": 2329280
    },
    {
      "epoch": 3.811950537761107,
      "grad_norm": 0.1252940446138382,
      "learning_rate": 2.391755566244737e-06,
      "loss": 0.0076,
      "step": 2329300
    },
    {
      "epoch": 3.8119832681997603,
      "grad_norm": 0.2213551104068756,
      "learning_rate": 2.39168967403122e-06,
      "loss": 0.014,
      "step": 2329320
    },
    {
      "epoch": 3.812015998638414,
      "grad_norm": 0.31031009554862976,
      "learning_rate": 2.3916237818177026e-06,
      "loss": 0.0106,
      "step": 2329340
    },
    {
      "epoch": 3.812048729077067,
      "grad_norm": 0.17735064029693604,
      "learning_rate": 2.3915578896041858e-06,
      "loss": 0.0085,
      "step": 2329360
    },
    {
      "epoch": 3.8120814595157206,
      "grad_norm": 0.06900779157876968,
      "learning_rate": 2.3914919973906685e-06,
      "loss": 0.0086,
      "step": 2329380
    },
    {
      "epoch": 3.812114189954374,
      "grad_norm": 0.24401961266994476,
      "learning_rate": 2.3914261051771512e-06,
      "loss": 0.0127,
      "step": 2329400
    },
    {
      "epoch": 3.812146920393027,
      "grad_norm": 0.3373716175556183,
      "learning_rate": 2.3913602129636344e-06,
      "loss": 0.0128,
      "step": 2329420
    },
    {
      "epoch": 3.8121796508316805,
      "grad_norm": 0.20079956948757172,
      "learning_rate": 2.391294320750117e-06,
      "loss": 0.0085,
      "step": 2329440
    },
    {
      "epoch": 3.8122123812703337,
      "grad_norm": 0.2133222222328186,
      "learning_rate": 2.3912284285366e-06,
      "loss": 0.0078,
      "step": 2329460
    },
    {
      "epoch": 3.8122451117089873,
      "grad_norm": 0.18029877543449402,
      "learning_rate": 2.3911625363230826e-06,
      "loss": 0.0096,
      "step": 2329480
    },
    {
      "epoch": 3.8122778421476404,
      "grad_norm": 0.22061030566692352,
      "learning_rate": 2.391096644109566e-06,
      "loss": 0.0133,
      "step": 2329500
    },
    {
      "epoch": 3.812310572586294,
      "grad_norm": 0.2679128646850586,
      "learning_rate": 2.391030751896049e-06,
      "loss": 0.0147,
      "step": 2329520
    },
    {
      "epoch": 3.812343303024947,
      "grad_norm": 0.05278704687952995,
      "learning_rate": 2.3909648596825317e-06,
      "loss": 0.0081,
      "step": 2329540
    },
    {
      "epoch": 3.8123760334636003,
      "grad_norm": 0.26742076873779297,
      "learning_rate": 2.3908989674690144e-06,
      "loss": 0.0086,
      "step": 2329560
    },
    {
      "epoch": 3.812408763902254,
      "grad_norm": 0.28069013357162476,
      "learning_rate": 2.390833075255497e-06,
      "loss": 0.006,
      "step": 2329580
    },
    {
      "epoch": 3.812441494340907,
      "grad_norm": 0.3541977107524872,
      "learning_rate": 2.3907671830419803e-06,
      "loss": 0.0097,
      "step": 2329600
    },
    {
      "epoch": 3.8124742247795607,
      "grad_norm": 0.5509018898010254,
      "learning_rate": 2.390701290828463e-06,
      "loss": 0.01,
      "step": 2329620
    },
    {
      "epoch": 3.812506955218214,
      "grad_norm": 0.19583900272846222,
      "learning_rate": 2.390635398614946e-06,
      "loss": 0.0119,
      "step": 2329640
    },
    {
      "epoch": 3.8125396856568674,
      "grad_norm": 0.2205754667520523,
      "learning_rate": 2.3905695064014286e-06,
      "loss": 0.0114,
      "step": 2329660
    },
    {
      "epoch": 3.8125724160955206,
      "grad_norm": 0.17747928202152252,
      "learning_rate": 2.3905036141879113e-06,
      "loss": 0.006,
      "step": 2329680
    },
    {
      "epoch": 3.8126051465341737,
      "grad_norm": 0.14855855703353882,
      "learning_rate": 2.3904377219743945e-06,
      "loss": 0.0139,
      "step": 2329700
    },
    {
      "epoch": 3.8126378769728273,
      "grad_norm": 0.05314334109425545,
      "learning_rate": 2.3903718297608776e-06,
      "loss": 0.0074,
      "step": 2329720
    },
    {
      "epoch": 3.8126706074114805,
      "grad_norm": 0.22226165235042572,
      "learning_rate": 2.3903059375473604e-06,
      "loss": 0.0128,
      "step": 2329740
    },
    {
      "epoch": 3.8127033378501336,
      "grad_norm": 0.29117587208747864,
      "learning_rate": 2.390240045333843e-06,
      "loss": 0.012,
      "step": 2329760
    },
    {
      "epoch": 3.812736068288787,
      "grad_norm": 0.22523729503154755,
      "learning_rate": 2.390174153120326e-06,
      "loss": 0.01,
      "step": 2329780
    },
    {
      "epoch": 3.812768798727441,
      "grad_norm": 0.6518778204917908,
      "learning_rate": 2.390108260906809e-06,
      "loss": 0.0088,
      "step": 2329800
    },
    {
      "epoch": 3.812801529166094,
      "grad_norm": 0.24942366778850555,
      "learning_rate": 2.3900423686932917e-06,
      "loss": 0.0082,
      "step": 2329820
    },
    {
      "epoch": 3.812834259604747,
      "grad_norm": 0.33025431632995605,
      "learning_rate": 2.3899764764797745e-06,
      "loss": 0.0159,
      "step": 2329840
    },
    {
      "epoch": 3.8128669900434007,
      "grad_norm": 0.40203672647476196,
      "learning_rate": 2.3899105842662572e-06,
      "loss": 0.0128,
      "step": 2329860
    },
    {
      "epoch": 3.812899720482054,
      "grad_norm": 0.5052657127380371,
      "learning_rate": 2.3898446920527404e-06,
      "loss": 0.0082,
      "step": 2329880
    },
    {
      "epoch": 3.812932450920707,
      "grad_norm": 0.16964557766914368,
      "learning_rate": 2.389778799839223e-06,
      "loss": 0.0068,
      "step": 2329900
    },
    {
      "epoch": 3.8129651813593606,
      "grad_norm": 0.06362268328666687,
      "learning_rate": 2.3897129076257063e-06,
      "loss": 0.011,
      "step": 2329920
    },
    {
      "epoch": 3.812997911798014,
      "grad_norm": 0.26826223731040955,
      "learning_rate": 2.389647015412189e-06,
      "loss": 0.0125,
      "step": 2329940
    },
    {
      "epoch": 3.8130306422366673,
      "grad_norm": 0.07944254577159882,
      "learning_rate": 2.3895811231986718e-06,
      "loss": 0.0099,
      "step": 2329960
    },
    {
      "epoch": 3.8130633726753205,
      "grad_norm": 0.11287694424390793,
      "learning_rate": 2.3895152309851545e-06,
      "loss": 0.0124,
      "step": 2329980
    },
    {
      "epoch": 3.813096103113974,
      "grad_norm": 0.22425077855587006,
      "learning_rate": 2.3894493387716377e-06,
      "loss": 0.009,
      "step": 2330000
    },
    {
      "epoch": 3.813128833552627,
      "grad_norm": 0.18979957699775696,
      "learning_rate": 2.3893834465581204e-06,
      "loss": 0.0112,
      "step": 2330020
    },
    {
      "epoch": 3.8131615639912804,
      "grad_norm": 0.4335334897041321,
      "learning_rate": 2.389317554344603e-06,
      "loss": 0.0108,
      "step": 2330040
    },
    {
      "epoch": 3.813194294429934,
      "grad_norm": 0.2523394823074341,
      "learning_rate": 2.3892516621310863e-06,
      "loss": 0.0073,
      "step": 2330060
    },
    {
      "epoch": 3.8132270248685876,
      "grad_norm": 0.3496547341346741,
      "learning_rate": 2.389185769917569e-06,
      "loss": 0.013,
      "step": 2330080
    },
    {
      "epoch": 3.8132597553072407,
      "grad_norm": 0.5246179103851318,
      "learning_rate": 2.389119877704052e-06,
      "loss": 0.0143,
      "step": 2330100
    },
    {
      "epoch": 3.813292485745894,
      "grad_norm": 0.351122111082077,
      "learning_rate": 2.389053985490535e-06,
      "loss": 0.0112,
      "step": 2330120
    },
    {
      "epoch": 3.8133252161845475,
      "grad_norm": 0.05171939358115196,
      "learning_rate": 2.3889880932770177e-06,
      "loss": 0.0165,
      "step": 2330140
    },
    {
      "epoch": 3.8133579466232006,
      "grad_norm": 2.292652130126953,
      "learning_rate": 2.3889222010635004e-06,
      "loss": 0.0135,
      "step": 2330160
    },
    {
      "epoch": 3.8133906770618538,
      "grad_norm": 0.30202701687812805,
      "learning_rate": 2.388856308849983e-06,
      "loss": 0.0115,
      "step": 2330180
    },
    {
      "epoch": 3.8134234075005073,
      "grad_norm": 0.38926422595977783,
      "learning_rate": 2.3887904166364663e-06,
      "loss": 0.0161,
      "step": 2330200
    },
    {
      "epoch": 3.8134561379391605,
      "grad_norm": 0.2991965413093567,
      "learning_rate": 2.388724524422949e-06,
      "loss": 0.0122,
      "step": 2330220
    },
    {
      "epoch": 3.813488868377814,
      "grad_norm": 0.2823862135410309,
      "learning_rate": 2.3886586322094322e-06,
      "loss": 0.0109,
      "step": 2330240
    },
    {
      "epoch": 3.8135215988164672,
      "grad_norm": 0.08084171265363693,
      "learning_rate": 2.388592739995915e-06,
      "loss": 0.0104,
      "step": 2330260
    },
    {
      "epoch": 3.813554329255121,
      "grad_norm": 0.40585729479789734,
      "learning_rate": 2.3885268477823977e-06,
      "loss": 0.0076,
      "step": 2330280
    },
    {
      "epoch": 3.813587059693774,
      "grad_norm": 0.2919710576534271,
      "learning_rate": 2.3884609555688805e-06,
      "loss": 0.0136,
      "step": 2330300
    },
    {
      "epoch": 3.813619790132427,
      "grad_norm": 0.16897515952587128,
      "learning_rate": 2.3883950633553636e-06,
      "loss": 0.0121,
      "step": 2330320
    },
    {
      "epoch": 3.8136525205710807,
      "grad_norm": 0.2977674901485443,
      "learning_rate": 2.3883291711418464e-06,
      "loss": 0.0114,
      "step": 2330340
    },
    {
      "epoch": 3.813685251009734,
      "grad_norm": 0.19929662346839905,
      "learning_rate": 2.388263278928329e-06,
      "loss": 0.0114,
      "step": 2330360
    },
    {
      "epoch": 3.8137179814483875,
      "grad_norm": 0.7901824116706848,
      "learning_rate": 2.388197386714812e-06,
      "loss": 0.0132,
      "step": 2330380
    },
    {
      "epoch": 3.8137507118870406,
      "grad_norm": 0.16028587520122528,
      "learning_rate": 2.388131494501295e-06,
      "loss": 0.006,
      "step": 2330400
    },
    {
      "epoch": 3.813783442325694,
      "grad_norm": 0.08110076189041138,
      "learning_rate": 2.3880656022877778e-06,
      "loss": 0.0103,
      "step": 2330420
    },
    {
      "epoch": 3.8138161727643474,
      "grad_norm": 0.25542575120925903,
      "learning_rate": 2.387999710074261e-06,
      "loss": 0.0099,
      "step": 2330440
    },
    {
      "epoch": 3.8138489032030005,
      "grad_norm": 0.07320567965507507,
      "learning_rate": 2.3879338178607437e-06,
      "loss": 0.0132,
      "step": 2330460
    },
    {
      "epoch": 3.813881633641654,
      "grad_norm": 0.11004206538200378,
      "learning_rate": 2.3878679256472264e-06,
      "loss": 0.0065,
      "step": 2330480
    },
    {
      "epoch": 3.8139143640803073,
      "grad_norm": 0.08289382606744766,
      "learning_rate": 2.387802033433709e-06,
      "loss": 0.013,
      "step": 2330500
    },
    {
      "epoch": 3.813947094518961,
      "grad_norm": 0.16726262867450714,
      "learning_rate": 2.3877361412201923e-06,
      "loss": 0.0114,
      "step": 2330520
    },
    {
      "epoch": 3.813979824957614,
      "grad_norm": 0.5267691016197205,
      "learning_rate": 2.387670249006675e-06,
      "loss": 0.013,
      "step": 2330540
    },
    {
      "epoch": 3.8140125553962676,
      "grad_norm": 0.17076662182807922,
      "learning_rate": 2.3876043567931578e-06,
      "loss": 0.0118,
      "step": 2330560
    },
    {
      "epoch": 3.8140452858349208,
      "grad_norm": 0.33480310440063477,
      "learning_rate": 2.387538464579641e-06,
      "loss": 0.0135,
      "step": 2330580
    },
    {
      "epoch": 3.814078016273574,
      "grad_norm": 0.36410006880760193,
      "learning_rate": 2.3874725723661237e-06,
      "loss": 0.0068,
      "step": 2330600
    },
    {
      "epoch": 3.8141107467122275,
      "grad_norm": 0.1939658522605896,
      "learning_rate": 2.3874066801526064e-06,
      "loss": 0.0071,
      "step": 2330620
    },
    {
      "epoch": 3.8141434771508806,
      "grad_norm": 0.2903882563114166,
      "learning_rate": 2.3873407879390896e-06,
      "loss": 0.0073,
      "step": 2330640
    },
    {
      "epoch": 3.8141762075895342,
      "grad_norm": 0.25834909081459045,
      "learning_rate": 2.3872748957255723e-06,
      "loss": 0.0098,
      "step": 2330660
    },
    {
      "epoch": 3.8142089380281874,
      "grad_norm": 0.11871200799942017,
      "learning_rate": 2.387209003512055e-06,
      "loss": 0.0115,
      "step": 2330680
    },
    {
      "epoch": 3.814241668466841,
      "grad_norm": 0.044277552515268326,
      "learning_rate": 2.387143111298538e-06,
      "loss": 0.0121,
      "step": 2330700
    },
    {
      "epoch": 3.814274398905494,
      "grad_norm": 0.11255059391260147,
      "learning_rate": 2.387077219085021e-06,
      "loss": 0.0097,
      "step": 2330720
    },
    {
      "epoch": 3.8143071293441473,
      "grad_norm": 1.0234875679016113,
      "learning_rate": 2.3870113268715037e-06,
      "loss": 0.0104,
      "step": 2330740
    },
    {
      "epoch": 3.814339859782801,
      "grad_norm": 0.3040810525417328,
      "learning_rate": 2.386945434657987e-06,
      "loss": 0.013,
      "step": 2330760
    },
    {
      "epoch": 3.814372590221454,
      "grad_norm": 0.31841015815734863,
      "learning_rate": 2.3868795424444696e-06,
      "loss": 0.0124,
      "step": 2330780
    },
    {
      "epoch": 3.8144053206601076,
      "grad_norm": 0.15475308895111084,
      "learning_rate": 2.3868136502309523e-06,
      "loss": 0.0117,
      "step": 2330800
    },
    {
      "epoch": 3.8144380510987608,
      "grad_norm": 0.3976163864135742,
      "learning_rate": 2.3867477580174355e-06,
      "loss": 0.0082,
      "step": 2330820
    },
    {
      "epoch": 3.8144707815374144,
      "grad_norm": 0.9963712096214294,
      "learning_rate": 2.3866818658039183e-06,
      "loss": 0.0153,
      "step": 2330840
    },
    {
      "epoch": 3.8145035119760675,
      "grad_norm": 0.08777549117803574,
      "learning_rate": 2.386615973590401e-06,
      "loss": 0.0112,
      "step": 2330860
    },
    {
      "epoch": 3.8145362424147207,
      "grad_norm": 0.2451060563325882,
      "learning_rate": 2.3865500813768837e-06,
      "loss": 0.0077,
      "step": 2330880
    },
    {
      "epoch": 3.8145689728533743,
      "grad_norm": 0.6172143816947937,
      "learning_rate": 2.3864841891633665e-06,
      "loss": 0.0115,
      "step": 2330900
    },
    {
      "epoch": 3.8146017032920274,
      "grad_norm": 0.6850541830062866,
      "learning_rate": 2.3864182969498496e-06,
      "loss": 0.011,
      "step": 2330920
    },
    {
      "epoch": 3.814634433730681,
      "grad_norm": 0.11650039255619049,
      "learning_rate": 2.386352404736333e-06,
      "loss": 0.0093,
      "step": 2330940
    },
    {
      "epoch": 3.814667164169334,
      "grad_norm": 0.1281089186668396,
      "learning_rate": 2.3862865125228155e-06,
      "loss": 0.0116,
      "step": 2330960
    },
    {
      "epoch": 3.8146998946079878,
      "grad_norm": 0.2598087191581726,
      "learning_rate": 2.3862206203092983e-06,
      "loss": 0.0142,
      "step": 2330980
    },
    {
      "epoch": 3.814732625046641,
      "grad_norm": 0.0995405912399292,
      "learning_rate": 2.386154728095781e-06,
      "loss": 0.0098,
      "step": 2331000
    },
    {
      "epoch": 3.814765355485294,
      "grad_norm": 0.4560019373893738,
      "learning_rate": 2.386088835882264e-06,
      "loss": 0.0116,
      "step": 2331020
    },
    {
      "epoch": 3.8147980859239476,
      "grad_norm": 0.08053144067525864,
      "learning_rate": 2.386022943668747e-06,
      "loss": 0.0141,
      "step": 2331040
    },
    {
      "epoch": 3.814830816362601,
      "grad_norm": 0.4646424353122711,
      "learning_rate": 2.3859570514552297e-06,
      "loss": 0.0103,
      "step": 2331060
    },
    {
      "epoch": 3.8148635468012544,
      "grad_norm": 0.23940789699554443,
      "learning_rate": 2.3858911592417124e-06,
      "loss": 0.0101,
      "step": 2331080
    },
    {
      "epoch": 3.8148962772399075,
      "grad_norm": 0.4213249981403351,
      "learning_rate": 2.385825267028195e-06,
      "loss": 0.0142,
      "step": 2331100
    },
    {
      "epoch": 3.814929007678561,
      "grad_norm": 0.1703564077615738,
      "learning_rate": 2.3857593748146783e-06,
      "loss": 0.0084,
      "step": 2331120
    },
    {
      "epoch": 3.8149617381172143,
      "grad_norm": 0.4623723030090332,
      "learning_rate": 2.3856934826011615e-06,
      "loss": 0.0096,
      "step": 2331140
    },
    {
      "epoch": 3.8149944685558674,
      "grad_norm": 0.2386186569929123,
      "learning_rate": 2.385627590387644e-06,
      "loss": 0.0137,
      "step": 2331160
    },
    {
      "epoch": 3.815027198994521,
      "grad_norm": 0.0910244882106781,
      "learning_rate": 2.385561698174127e-06,
      "loss": 0.0089,
      "step": 2331180
    },
    {
      "epoch": 3.815059929433174,
      "grad_norm": 0.6901415586471558,
      "learning_rate": 2.3854958059606097e-06,
      "loss": 0.0171,
      "step": 2331200
    },
    {
      "epoch": 3.8150926598718273,
      "grad_norm": 0.24879136681556702,
      "learning_rate": 2.385429913747093e-06,
      "loss": 0.0144,
      "step": 2331220
    },
    {
      "epoch": 3.815125390310481,
      "grad_norm": 0.3947027325630188,
      "learning_rate": 2.3853640215335756e-06,
      "loss": 0.0113,
      "step": 2331240
    },
    {
      "epoch": 3.8151581207491345,
      "grad_norm": 0.18021777272224426,
      "learning_rate": 2.3852981293200583e-06,
      "loss": 0.0087,
      "step": 2331260
    },
    {
      "epoch": 3.8151908511877877,
      "grad_norm": 0.3137110769748688,
      "learning_rate": 2.3852322371065415e-06,
      "loss": 0.0105,
      "step": 2331280
    },
    {
      "epoch": 3.815223581626441,
      "grad_norm": 0.4940871596336365,
      "learning_rate": 2.3851663448930242e-06,
      "loss": 0.0149,
      "step": 2331300
    },
    {
      "epoch": 3.8152563120650944,
      "grad_norm": 0.050060857087373734,
      "learning_rate": 2.385100452679507e-06,
      "loss": 0.0062,
      "step": 2331320
    },
    {
      "epoch": 3.8152890425037476,
      "grad_norm": 0.2805255055427551,
      "learning_rate": 2.38503456046599e-06,
      "loss": 0.0115,
      "step": 2331340
    },
    {
      "epoch": 3.8153217729424007,
      "grad_norm": 0.3604187071323395,
      "learning_rate": 2.384968668252473e-06,
      "loss": 0.0094,
      "step": 2331360
    },
    {
      "epoch": 3.8153545033810543,
      "grad_norm": 0.15553069114685059,
      "learning_rate": 2.3849027760389556e-06,
      "loss": 0.0087,
      "step": 2331380
    },
    {
      "epoch": 3.815387233819708,
      "grad_norm": 0.17187611758708954,
      "learning_rate": 2.3848368838254384e-06,
      "loss": 0.0145,
      "step": 2331400
    },
    {
      "epoch": 3.815419964258361,
      "grad_norm": 0.5794063210487366,
      "learning_rate": 2.3847709916119215e-06,
      "loss": 0.0124,
      "step": 2331420
    },
    {
      "epoch": 3.815452694697014,
      "grad_norm": 0.4558557868003845,
      "learning_rate": 2.3847050993984043e-06,
      "loss": 0.0114,
      "step": 2331440
    },
    {
      "epoch": 3.815485425135668,
      "grad_norm": 0.45379605889320374,
      "learning_rate": 2.3846392071848874e-06,
      "loss": 0.0097,
      "step": 2331460
    },
    {
      "epoch": 3.815518155574321,
      "grad_norm": 0.24760717153549194,
      "learning_rate": 2.38457331497137e-06,
      "loss": 0.0144,
      "step": 2331480
    },
    {
      "epoch": 3.815550886012974,
      "grad_norm": 0.18413683772087097,
      "learning_rate": 2.384507422757853e-06,
      "loss": 0.0118,
      "step": 2331500
    },
    {
      "epoch": 3.8155836164516277,
      "grad_norm": 0.21746191382408142,
      "learning_rate": 2.3844415305443356e-06,
      "loss": 0.0126,
      "step": 2331520
    },
    {
      "epoch": 3.8156163468902813,
      "grad_norm": 0.2965732514858246,
      "learning_rate": 2.384375638330819e-06,
      "loss": 0.0121,
      "step": 2331540
    },
    {
      "epoch": 3.8156490773289344,
      "grad_norm": 0.31236398220062256,
      "learning_rate": 2.3843097461173015e-06,
      "loss": 0.0106,
      "step": 2331560
    },
    {
      "epoch": 3.8156818077675876,
      "grad_norm": 0.2011118084192276,
      "learning_rate": 2.3842438539037843e-06,
      "loss": 0.0114,
      "step": 2331580
    },
    {
      "epoch": 3.815714538206241,
      "grad_norm": 0.6492195725440979,
      "learning_rate": 2.384177961690267e-06,
      "loss": 0.0077,
      "step": 2331600
    },
    {
      "epoch": 3.8157472686448943,
      "grad_norm": 0.10691653937101364,
      "learning_rate": 2.38411206947675e-06,
      "loss": 0.0105,
      "step": 2331620
    },
    {
      "epoch": 3.8157799990835475,
      "grad_norm": 0.3633074462413788,
      "learning_rate": 2.384046177263233e-06,
      "loss": 0.0129,
      "step": 2331640
    },
    {
      "epoch": 3.815812729522201,
      "grad_norm": 0.1430816352367401,
      "learning_rate": 2.383980285049716e-06,
      "loss": 0.0074,
      "step": 2331660
    },
    {
      "epoch": 3.8158454599608547,
      "grad_norm": 0.16092263162136078,
      "learning_rate": 2.383914392836199e-06,
      "loss": 0.0082,
      "step": 2331680
    },
    {
      "epoch": 3.815878190399508,
      "grad_norm": 0.25752517580986023,
      "learning_rate": 2.3838485006226816e-06,
      "loss": 0.0123,
      "step": 2331700
    },
    {
      "epoch": 3.815910920838161,
      "grad_norm": 0.15509387850761414,
      "learning_rate": 2.3837826084091643e-06,
      "loss": 0.0122,
      "step": 2331720
    },
    {
      "epoch": 3.8159436512768146,
      "grad_norm": 0.16478361189365387,
      "learning_rate": 2.3837167161956475e-06,
      "loss": 0.01,
      "step": 2331740
    },
    {
      "epoch": 3.8159763817154677,
      "grad_norm": 0.09177204966545105,
      "learning_rate": 2.38365082398213e-06,
      "loss": 0.0077,
      "step": 2331760
    },
    {
      "epoch": 3.816009112154121,
      "grad_norm": 0.2479364573955536,
      "learning_rate": 2.383584931768613e-06,
      "loss": 0.0081,
      "step": 2331780
    },
    {
      "epoch": 3.8160418425927745,
      "grad_norm": 0.10983685404062271,
      "learning_rate": 2.3835190395550957e-06,
      "loss": 0.009,
      "step": 2331800
    },
    {
      "epoch": 3.8160745730314276,
      "grad_norm": 0.7325656414031982,
      "learning_rate": 2.383453147341579e-06,
      "loss": 0.0086,
      "step": 2331820
    },
    {
      "epoch": 3.816107303470081,
      "grad_norm": 0.12730154395103455,
      "learning_rate": 2.3833872551280616e-06,
      "loss": 0.0122,
      "step": 2331840
    },
    {
      "epoch": 3.8161400339087344,
      "grad_norm": 0.20640629529953003,
      "learning_rate": 2.3833213629145448e-06,
      "loss": 0.0132,
      "step": 2331860
    },
    {
      "epoch": 3.816172764347388,
      "grad_norm": 0.18918687105178833,
      "learning_rate": 2.3832554707010275e-06,
      "loss": 0.0109,
      "step": 2331880
    },
    {
      "epoch": 3.816205494786041,
      "grad_norm": 0.27441638708114624,
      "learning_rate": 2.3831895784875102e-06,
      "loss": 0.0082,
      "step": 2331900
    },
    {
      "epoch": 3.8162382252246942,
      "grad_norm": 0.20851323008537292,
      "learning_rate": 2.383123686273993e-06,
      "loss": 0.0106,
      "step": 2331920
    },
    {
      "epoch": 3.816270955663348,
      "grad_norm": 0.3053647577762604,
      "learning_rate": 2.383057794060476e-06,
      "loss": 0.0131,
      "step": 2331940
    },
    {
      "epoch": 3.816303686102001,
      "grad_norm": 0.19934476912021637,
      "learning_rate": 2.382991901846959e-06,
      "loss": 0.0125,
      "step": 2331960
    },
    {
      "epoch": 3.8163364165406546,
      "grad_norm": 0.753442645072937,
      "learning_rate": 2.3829260096334416e-06,
      "loss": 0.0113,
      "step": 2331980
    },
    {
      "epoch": 3.8163691469793077,
      "grad_norm": 0.2733229398727417,
      "learning_rate": 2.3828601174199248e-06,
      "loss": 0.0105,
      "step": 2332000
    },
    {
      "epoch": 3.8164018774179613,
      "grad_norm": 0.20351509749889374,
      "learning_rate": 2.3827942252064075e-06,
      "loss": 0.0116,
      "step": 2332020
    },
    {
      "epoch": 3.8164346078566145,
      "grad_norm": 0.4634411931037903,
      "learning_rate": 2.3827283329928903e-06,
      "loss": 0.0111,
      "step": 2332040
    },
    {
      "epoch": 3.8164673382952676,
      "grad_norm": 0.31262731552124023,
      "learning_rate": 2.3826624407793734e-06,
      "loss": 0.0088,
      "step": 2332060
    },
    {
      "epoch": 3.8165000687339212,
      "grad_norm": 0.1837911307811737,
      "learning_rate": 2.382596548565856e-06,
      "loss": 0.0131,
      "step": 2332080
    },
    {
      "epoch": 3.8165327991725744,
      "grad_norm": 0.14239832758903503,
      "learning_rate": 2.382530656352339e-06,
      "loss": 0.0083,
      "step": 2332100
    },
    {
      "epoch": 3.816565529611228,
      "grad_norm": 0.09005489945411682,
      "learning_rate": 2.3824647641388216e-06,
      "loss": 0.0104,
      "step": 2332120
    },
    {
      "epoch": 3.816598260049881,
      "grad_norm": 0.47333988547325134,
      "learning_rate": 2.382398871925305e-06,
      "loss": 0.0095,
      "step": 2332140
    },
    {
      "epoch": 3.8166309904885347,
      "grad_norm": 0.5632969737052917,
      "learning_rate": 2.382332979711788e-06,
      "loss": 0.0127,
      "step": 2332160
    },
    {
      "epoch": 3.816663720927188,
      "grad_norm": 0.14886727929115295,
      "learning_rate": 2.3822670874982707e-06,
      "loss": 0.01,
      "step": 2332180
    },
    {
      "epoch": 3.816696451365841,
      "grad_norm": 0.233207106590271,
      "learning_rate": 2.3822011952847534e-06,
      "loss": 0.0086,
      "step": 2332200
    },
    {
      "epoch": 3.8167291818044946,
      "grad_norm": 0.07601508498191833,
      "learning_rate": 2.382135303071236e-06,
      "loss": 0.0093,
      "step": 2332220
    },
    {
      "epoch": 3.8167619122431478,
      "grad_norm": 0.19222919642925262,
      "learning_rate": 2.3820694108577194e-06,
      "loss": 0.014,
      "step": 2332240
    },
    {
      "epoch": 3.8167946426818014,
      "grad_norm": 1.9950248003005981,
      "learning_rate": 2.382003518644202e-06,
      "loss": 0.0101,
      "step": 2332260
    },
    {
      "epoch": 3.8168273731204545,
      "grad_norm": 0.24964743852615356,
      "learning_rate": 2.381937626430685e-06,
      "loss": 0.0109,
      "step": 2332280
    },
    {
      "epoch": 3.816860103559108,
      "grad_norm": 0.22382943332195282,
      "learning_rate": 2.3818717342171676e-06,
      "loss": 0.0131,
      "step": 2332300
    },
    {
      "epoch": 3.8168928339977612,
      "grad_norm": 0.2978154420852661,
      "learning_rate": 2.3818058420036503e-06,
      "loss": 0.0091,
      "step": 2332320
    },
    {
      "epoch": 3.8169255644364144,
      "grad_norm": 0.09621551632881165,
      "learning_rate": 2.3817399497901335e-06,
      "loss": 0.0166,
      "step": 2332340
    },
    {
      "epoch": 3.816958294875068,
      "grad_norm": 0.26364588737487793,
      "learning_rate": 2.3816740575766166e-06,
      "loss": 0.0111,
      "step": 2332360
    },
    {
      "epoch": 3.816991025313721,
      "grad_norm": 0.47869786620140076,
      "learning_rate": 2.3816081653630994e-06,
      "loss": 0.0083,
      "step": 2332380
    },
    {
      "epoch": 3.8170237557523747,
      "grad_norm": 0.14187686145305634,
      "learning_rate": 2.381542273149582e-06,
      "loss": 0.0066,
      "step": 2332400
    },
    {
      "epoch": 3.817056486191028,
      "grad_norm": 0.16635678708553314,
      "learning_rate": 2.381476380936065e-06,
      "loss": 0.0053,
      "step": 2332420
    },
    {
      "epoch": 3.8170892166296815,
      "grad_norm": 0.17444294691085815,
      "learning_rate": 2.381410488722548e-06,
      "loss": 0.0081,
      "step": 2332440
    },
    {
      "epoch": 3.8171219470683346,
      "grad_norm": 0.09362830966711044,
      "learning_rate": 2.3813445965090308e-06,
      "loss": 0.0107,
      "step": 2332460
    },
    {
      "epoch": 3.817154677506988,
      "grad_norm": 0.1210755929350853,
      "learning_rate": 2.3812787042955135e-06,
      "loss": 0.0101,
      "step": 2332480
    },
    {
      "epoch": 3.8171874079456414,
      "grad_norm": 0.6476570963859558,
      "learning_rate": 2.3812128120819962e-06,
      "loss": 0.0128,
      "step": 2332500
    },
    {
      "epoch": 3.8172201383842945,
      "grad_norm": 0.31566354632377625,
      "learning_rate": 2.3811469198684794e-06,
      "loss": 0.01,
      "step": 2332520
    },
    {
      "epoch": 3.817252868822948,
      "grad_norm": 0.29647159576416016,
      "learning_rate": 2.381081027654962e-06,
      "loss": 0.0074,
      "step": 2332540
    },
    {
      "epoch": 3.8172855992616013,
      "grad_norm": 0.3829024136066437,
      "learning_rate": 2.3810151354414453e-06,
      "loss": 0.0082,
      "step": 2332560
    },
    {
      "epoch": 3.817318329700255,
      "grad_norm": 0.10856083035469055,
      "learning_rate": 2.380949243227928e-06,
      "loss": 0.0112,
      "step": 2332580
    },
    {
      "epoch": 3.817351060138908,
      "grad_norm": 0.12962999939918518,
      "learning_rate": 2.3808833510144108e-06,
      "loss": 0.0133,
      "step": 2332600
    },
    {
      "epoch": 3.817383790577561,
      "grad_norm": 0.3356306552886963,
      "learning_rate": 2.3808174588008935e-06,
      "loss": 0.011,
      "step": 2332620
    },
    {
      "epoch": 3.8174165210162148,
      "grad_norm": 0.09837673604488373,
      "learning_rate": 2.3807515665873767e-06,
      "loss": 0.0122,
      "step": 2332640
    },
    {
      "epoch": 3.817449251454868,
      "grad_norm": 0.22413848340511322,
      "learning_rate": 2.3806856743738594e-06,
      "loss": 0.0163,
      "step": 2332660
    },
    {
      "epoch": 3.8174819818935215,
      "grad_norm": 0.4736514091491699,
      "learning_rate": 2.380619782160342e-06,
      "loss": 0.0139,
      "step": 2332680
    },
    {
      "epoch": 3.8175147123321747,
      "grad_norm": 0.2779858410358429,
      "learning_rate": 2.3805538899468253e-06,
      "loss": 0.0091,
      "step": 2332700
    },
    {
      "epoch": 3.8175474427708282,
      "grad_norm": 0.2366645783185959,
      "learning_rate": 2.380487997733308e-06,
      "loss": 0.0184,
      "step": 2332720
    },
    {
      "epoch": 3.8175801732094814,
      "grad_norm": 0.20789913833141327,
      "learning_rate": 2.380422105519791e-06,
      "loss": 0.0118,
      "step": 2332740
    },
    {
      "epoch": 3.8176129036481345,
      "grad_norm": 0.25785183906555176,
      "learning_rate": 2.380356213306274e-06,
      "loss": 0.0097,
      "step": 2332760
    },
    {
      "epoch": 3.817645634086788,
      "grad_norm": 0.1610817164182663,
      "learning_rate": 2.3802903210927567e-06,
      "loss": 0.0153,
      "step": 2332780
    },
    {
      "epoch": 3.8176783645254413,
      "grad_norm": 0.12131573259830475,
      "learning_rate": 2.3802244288792395e-06,
      "loss": 0.0117,
      "step": 2332800
    },
    {
      "epoch": 3.8177110949640944,
      "grad_norm": 0.19511742889881134,
      "learning_rate": 2.380158536665722e-06,
      "loss": 0.0136,
      "step": 2332820
    },
    {
      "epoch": 3.817743825402748,
      "grad_norm": 0.24824413657188416,
      "learning_rate": 2.3800926444522054e-06,
      "loss": 0.0096,
      "step": 2332840
    },
    {
      "epoch": 3.8177765558414016,
      "grad_norm": 0.19683048129081726,
      "learning_rate": 2.380026752238688e-06,
      "loss": 0.011,
      "step": 2332860
    },
    {
      "epoch": 3.817809286280055,
      "grad_norm": 0.1704852133989334,
      "learning_rate": 2.3799608600251713e-06,
      "loss": 0.0126,
      "step": 2332880
    },
    {
      "epoch": 3.817842016718708,
      "grad_norm": 0.24569447338581085,
      "learning_rate": 2.379894967811654e-06,
      "loss": 0.0108,
      "step": 2332900
    },
    {
      "epoch": 3.8178747471573615,
      "grad_norm": 0.09348226338624954,
      "learning_rate": 2.3798290755981367e-06,
      "loss": 0.0082,
      "step": 2332920
    },
    {
      "epoch": 3.8179074775960147,
      "grad_norm": 0.48370835185050964,
      "learning_rate": 2.3797631833846195e-06,
      "loss": 0.0122,
      "step": 2332940
    },
    {
      "epoch": 3.817940208034668,
      "grad_norm": 0.14558358490467072,
      "learning_rate": 2.3796972911711026e-06,
      "loss": 0.0097,
      "step": 2332960
    },
    {
      "epoch": 3.8179729384733214,
      "grad_norm": 0.1972455084323883,
      "learning_rate": 2.3796313989575854e-06,
      "loss": 0.0098,
      "step": 2332980
    },
    {
      "epoch": 3.818005668911975,
      "grad_norm": 0.15260618925094604,
      "learning_rate": 2.379565506744068e-06,
      "loss": 0.0118,
      "step": 2333000
    },
    {
      "epoch": 3.818038399350628,
      "grad_norm": 0.18940849602222443,
      "learning_rate": 2.379499614530551e-06,
      "loss": 0.013,
      "step": 2333020
    },
    {
      "epoch": 3.8180711297892813,
      "grad_norm": 0.6544428467750549,
      "learning_rate": 2.379433722317034e-06,
      "loss": 0.0154,
      "step": 2333040
    },
    {
      "epoch": 3.818103860227935,
      "grad_norm": 2.022808074951172,
      "learning_rate": 2.3793678301035168e-06,
      "loss": 0.0135,
      "step": 2333060
    },
    {
      "epoch": 3.818136590666588,
      "grad_norm": 0.05912526324391365,
      "learning_rate": 2.37930193789e-06,
      "loss": 0.0119,
      "step": 2333080
    },
    {
      "epoch": 3.818169321105241,
      "grad_norm": 0.154865100979805,
      "learning_rate": 2.3792360456764827e-06,
      "loss": 0.011,
      "step": 2333100
    },
    {
      "epoch": 3.818202051543895,
      "grad_norm": 0.16402283310890198,
      "learning_rate": 2.3791701534629654e-06,
      "loss": 0.0088,
      "step": 2333120
    },
    {
      "epoch": 3.8182347819825484,
      "grad_norm": 0.4050658345222473,
      "learning_rate": 2.379104261249448e-06,
      "loss": 0.0072,
      "step": 2333140
    },
    {
      "epoch": 3.8182675124212015,
      "grad_norm": 0.09650785475969315,
      "learning_rate": 2.3790383690359313e-06,
      "loss": 0.0065,
      "step": 2333160
    },
    {
      "epoch": 3.8183002428598547,
      "grad_norm": 0.3471689224243164,
      "learning_rate": 2.378972476822414e-06,
      "loss": 0.0147,
      "step": 2333180
    },
    {
      "epoch": 3.8183329732985083,
      "grad_norm": 0.11187440156936646,
      "learning_rate": 2.378906584608897e-06,
      "loss": 0.014,
      "step": 2333200
    },
    {
      "epoch": 3.8183657037371614,
      "grad_norm": 0.1931564211845398,
      "learning_rate": 2.37884069239538e-06,
      "loss": 0.0128,
      "step": 2333220
    },
    {
      "epoch": 3.8183984341758146,
      "grad_norm": 0.2382090836763382,
      "learning_rate": 2.3787748001818627e-06,
      "loss": 0.0116,
      "step": 2333240
    },
    {
      "epoch": 3.818431164614468,
      "grad_norm": 0.32955479621887207,
      "learning_rate": 2.3787089079683454e-06,
      "loss": 0.0095,
      "step": 2333260
    },
    {
      "epoch": 3.8184638950531213,
      "grad_norm": 0.5050615072250366,
      "learning_rate": 2.3786430157548286e-06,
      "loss": 0.0161,
      "step": 2333280
    },
    {
      "epoch": 3.818496625491775,
      "grad_norm": 0.23558960855007172,
      "learning_rate": 2.3785771235413113e-06,
      "loss": 0.0106,
      "step": 2333300
    },
    {
      "epoch": 3.818529355930428,
      "grad_norm": 0.921299159526825,
      "learning_rate": 2.378511231327794e-06,
      "loss": 0.0094,
      "step": 2333320
    },
    {
      "epoch": 3.8185620863690817,
      "grad_norm": 0.517624020576477,
      "learning_rate": 2.378445339114277e-06,
      "loss": 0.0124,
      "step": 2333340
    },
    {
      "epoch": 3.818594816807735,
      "grad_norm": 0.32784679532051086,
      "learning_rate": 2.37837944690076e-06,
      "loss": 0.013,
      "step": 2333360
    },
    {
      "epoch": 3.818627547246388,
      "grad_norm": 0.24136103689670563,
      "learning_rate": 2.3783135546872427e-06,
      "loss": 0.0064,
      "step": 2333380
    },
    {
      "epoch": 3.8186602776850416,
      "grad_norm": 0.3208208978176117,
      "learning_rate": 2.378247662473726e-06,
      "loss": 0.0102,
      "step": 2333400
    },
    {
      "epoch": 3.8186930081236947,
      "grad_norm": 0.17146962881088257,
      "learning_rate": 2.3781817702602086e-06,
      "loss": 0.0262,
      "step": 2333420
    },
    {
      "epoch": 3.8187257385623483,
      "grad_norm": 0.507997453212738,
      "learning_rate": 2.3781158780466914e-06,
      "loss": 0.0161,
      "step": 2333440
    },
    {
      "epoch": 3.8187584690010015,
      "grad_norm": 0.23315641283988953,
      "learning_rate": 2.3780499858331745e-06,
      "loss": 0.0105,
      "step": 2333460
    },
    {
      "epoch": 3.818791199439655,
      "grad_norm": 0.12378707528114319,
      "learning_rate": 2.3779840936196573e-06,
      "loss": 0.0127,
      "step": 2333480
    },
    {
      "epoch": 3.818823929878308,
      "grad_norm": 0.21689122915267944,
      "learning_rate": 2.37791820140614e-06,
      "loss": 0.0114,
      "step": 2333500
    },
    {
      "epoch": 3.8188566603169614,
      "grad_norm": 0.2615429162979126,
      "learning_rate": 2.3778523091926227e-06,
      "loss": 0.0126,
      "step": 2333520
    },
    {
      "epoch": 3.818889390755615,
      "grad_norm": 0.13295407593250275,
      "learning_rate": 2.3777864169791055e-06,
      "loss": 0.0147,
      "step": 2333540
    },
    {
      "epoch": 3.818922121194268,
      "grad_norm": 0.1846918761730194,
      "learning_rate": 2.3777205247655886e-06,
      "loss": 0.0105,
      "step": 2333560
    },
    {
      "epoch": 3.8189548516329217,
      "grad_norm": 0.2448379546403885,
      "learning_rate": 2.377654632552072e-06,
      "loss": 0.0089,
      "step": 2333580
    },
    {
      "epoch": 3.818987582071575,
      "grad_norm": 0.1338798999786377,
      "learning_rate": 2.3775887403385545e-06,
      "loss": 0.016,
      "step": 2333600
    },
    {
      "epoch": 3.8190203125102284,
      "grad_norm": 0.2526049315929413,
      "learning_rate": 2.3775228481250373e-06,
      "loss": 0.0121,
      "step": 2333620
    },
    {
      "epoch": 3.8190530429488816,
      "grad_norm": 0.24987734854221344,
      "learning_rate": 2.37745695591152e-06,
      "loss": 0.0097,
      "step": 2333640
    },
    {
      "epoch": 3.8190857733875347,
      "grad_norm": 0.18771125376224518,
      "learning_rate": 2.377391063698003e-06,
      "loss": 0.0091,
      "step": 2333660
    },
    {
      "epoch": 3.8191185038261883,
      "grad_norm": 0.053014177829027176,
      "learning_rate": 2.377325171484486e-06,
      "loss": 0.0103,
      "step": 2333680
    },
    {
      "epoch": 3.8191512342648415,
      "grad_norm": 0.04028348997235298,
      "learning_rate": 2.3772592792709687e-06,
      "loss": 0.0107,
      "step": 2333700
    },
    {
      "epoch": 3.819183964703495,
      "grad_norm": 0.18390561640262604,
      "learning_rate": 2.3771933870574514e-06,
      "loss": 0.0102,
      "step": 2333720
    },
    {
      "epoch": 3.8192166951421482,
      "grad_norm": 0.25657981634140015,
      "learning_rate": 2.3771274948439346e-06,
      "loss": 0.0101,
      "step": 2333740
    },
    {
      "epoch": 3.819249425580802,
      "grad_norm": 0.27076831459999084,
      "learning_rate": 2.3770616026304173e-06,
      "loss": 0.011,
      "step": 2333760
    },
    {
      "epoch": 3.819282156019455,
      "grad_norm": 0.4405800402164459,
      "learning_rate": 2.3769957104169005e-06,
      "loss": 0.0122,
      "step": 2333780
    },
    {
      "epoch": 3.819314886458108,
      "grad_norm": 0.3680196702480316,
      "learning_rate": 2.3769298182033832e-06,
      "loss": 0.0127,
      "step": 2333800
    },
    {
      "epoch": 3.8193476168967617,
      "grad_norm": 0.11205270141363144,
      "learning_rate": 2.376863925989866e-06,
      "loss": 0.0094,
      "step": 2333820
    },
    {
      "epoch": 3.819380347335415,
      "grad_norm": 0.43365782499313354,
      "learning_rate": 2.3767980337763487e-06,
      "loss": 0.0154,
      "step": 2333840
    },
    {
      "epoch": 3.8194130777740685,
      "grad_norm": 0.21305078268051147,
      "learning_rate": 2.376732141562832e-06,
      "loss": 0.0109,
      "step": 2333860
    },
    {
      "epoch": 3.8194458082127216,
      "grad_norm": 0.11143038421869278,
      "learning_rate": 2.3766662493493146e-06,
      "loss": 0.011,
      "step": 2333880
    },
    {
      "epoch": 3.819478538651375,
      "grad_norm": 0.1618872582912445,
      "learning_rate": 2.3766003571357973e-06,
      "loss": 0.0075,
      "step": 2333900
    },
    {
      "epoch": 3.8195112690900284,
      "grad_norm": 0.35222184658050537,
      "learning_rate": 2.3765344649222805e-06,
      "loss": 0.0128,
      "step": 2333920
    },
    {
      "epoch": 3.8195439995286815,
      "grad_norm": 0.49193066358566284,
      "learning_rate": 2.3764685727087632e-06,
      "loss": 0.0128,
      "step": 2333940
    },
    {
      "epoch": 3.819576729967335,
      "grad_norm": 0.6377468705177307,
      "learning_rate": 2.376402680495246e-06,
      "loss": 0.0123,
      "step": 2333960
    },
    {
      "epoch": 3.8196094604059883,
      "grad_norm": 0.10256671905517578,
      "learning_rate": 2.376336788281729e-06,
      "loss": 0.0098,
      "step": 2333980
    },
    {
      "epoch": 3.819642190844642,
      "grad_norm": 0.2716236114501953,
      "learning_rate": 2.376270896068212e-06,
      "loss": 0.0149,
      "step": 2334000
    },
    {
      "epoch": 3.819674921283295,
      "grad_norm": 0.5180038809776306,
      "learning_rate": 2.3762050038546946e-06,
      "loss": 0.0081,
      "step": 2334020
    },
    {
      "epoch": 3.8197076517219486,
      "grad_norm": 0.3089786469936371,
      "learning_rate": 2.3761391116411774e-06,
      "loss": 0.012,
      "step": 2334040
    },
    {
      "epoch": 3.8197403821606017,
      "grad_norm": 0.07955635339021683,
      "learning_rate": 2.3760732194276605e-06,
      "loss": 0.0139,
      "step": 2334060
    },
    {
      "epoch": 3.819773112599255,
      "grad_norm": 0.3985401690006256,
      "learning_rate": 2.3760073272141433e-06,
      "loss": 0.0111,
      "step": 2334080
    },
    {
      "epoch": 3.8198058430379085,
      "grad_norm": 0.5873110890388489,
      "learning_rate": 2.3759414350006264e-06,
      "loss": 0.012,
      "step": 2334100
    },
    {
      "epoch": 3.8198385734765616,
      "grad_norm": 0.27701181173324585,
      "learning_rate": 2.375875542787109e-06,
      "loss": 0.0084,
      "step": 2334120
    },
    {
      "epoch": 3.8198713039152152,
      "grad_norm": 0.27996164560317993,
      "learning_rate": 2.375809650573592e-06,
      "loss": 0.0132,
      "step": 2334140
    },
    {
      "epoch": 3.8199040343538684,
      "grad_norm": 0.15473204851150513,
      "learning_rate": 2.3757437583600746e-06,
      "loss": 0.0131,
      "step": 2334160
    },
    {
      "epoch": 3.819936764792522,
      "grad_norm": 0.37290525436401367,
      "learning_rate": 2.375677866146558e-06,
      "loss": 0.0127,
      "step": 2334180
    },
    {
      "epoch": 3.819969495231175,
      "grad_norm": 0.35134157538414,
      "learning_rate": 2.3756119739330406e-06,
      "loss": 0.0104,
      "step": 2334200
    },
    {
      "epoch": 3.8200022256698283,
      "grad_norm": 0.5964921116828918,
      "learning_rate": 2.3755460817195233e-06,
      "loss": 0.0154,
      "step": 2334220
    },
    {
      "epoch": 3.820034956108482,
      "grad_norm": 0.25303661823272705,
      "learning_rate": 2.375480189506006e-06,
      "loss": 0.0145,
      "step": 2334240
    },
    {
      "epoch": 3.820067686547135,
      "grad_norm": 0.2661357820034027,
      "learning_rate": 2.375414297292489e-06,
      "loss": 0.0111,
      "step": 2334260
    },
    {
      "epoch": 3.820100416985788,
      "grad_norm": 0.07381453365087509,
      "learning_rate": 2.375348405078972e-06,
      "loss": 0.0071,
      "step": 2334280
    },
    {
      "epoch": 3.8201331474244418,
      "grad_norm": 0.3022828996181488,
      "learning_rate": 2.375282512865455e-06,
      "loss": 0.0135,
      "step": 2334300
    },
    {
      "epoch": 3.8201658778630954,
      "grad_norm": 0.1034650132060051,
      "learning_rate": 2.375216620651938e-06,
      "loss": 0.0089,
      "step": 2334320
    },
    {
      "epoch": 3.8201986083017485,
      "grad_norm": 0.613609790802002,
      "learning_rate": 2.3751507284384206e-06,
      "loss": 0.009,
      "step": 2334340
    },
    {
      "epoch": 3.8202313387404017,
      "grad_norm": 0.17994603514671326,
      "learning_rate": 2.3750848362249033e-06,
      "loss": 0.0093,
      "step": 2334360
    },
    {
      "epoch": 3.8202640691790553,
      "grad_norm": 0.09338918328285217,
      "learning_rate": 2.3750189440113865e-06,
      "loss": 0.0126,
      "step": 2334380
    },
    {
      "epoch": 3.8202967996177084,
      "grad_norm": 0.1520823985338211,
      "learning_rate": 2.3749530517978692e-06,
      "loss": 0.0096,
      "step": 2334400
    },
    {
      "epoch": 3.8203295300563616,
      "grad_norm": 0.24956728518009186,
      "learning_rate": 2.374887159584352e-06,
      "loss": 0.0139,
      "step": 2334420
    },
    {
      "epoch": 3.820362260495015,
      "grad_norm": 0.2654336392879486,
      "learning_rate": 2.3748212673708347e-06,
      "loss": 0.0106,
      "step": 2334440
    },
    {
      "epoch": 3.8203949909336687,
      "grad_norm": 0.11724964529275894,
      "learning_rate": 2.374755375157318e-06,
      "loss": 0.0089,
      "step": 2334460
    },
    {
      "epoch": 3.820427721372322,
      "grad_norm": 0.5684306025505066,
      "learning_rate": 2.3746894829438006e-06,
      "loss": 0.0134,
      "step": 2334480
    },
    {
      "epoch": 3.820460451810975,
      "grad_norm": 0.21779625117778778,
      "learning_rate": 2.3746235907302838e-06,
      "loss": 0.0063,
      "step": 2334500
    },
    {
      "epoch": 3.8204931822496286,
      "grad_norm": 0.30469003319740295,
      "learning_rate": 2.3745576985167665e-06,
      "loss": 0.014,
      "step": 2334520
    },
    {
      "epoch": 3.820525912688282,
      "grad_norm": 0.2248181700706482,
      "learning_rate": 2.3744918063032492e-06,
      "loss": 0.0125,
      "step": 2334540
    },
    {
      "epoch": 3.820558643126935,
      "grad_norm": 0.35038384795188904,
      "learning_rate": 2.374425914089732e-06,
      "loss": 0.0153,
      "step": 2334560
    },
    {
      "epoch": 3.8205913735655885,
      "grad_norm": 0.1960403174161911,
      "learning_rate": 2.374360021876215e-06,
      "loss": 0.013,
      "step": 2334580
    },
    {
      "epoch": 3.820624104004242,
      "grad_norm": 0.3741615414619446,
      "learning_rate": 2.374294129662698e-06,
      "loss": 0.0111,
      "step": 2334600
    },
    {
      "epoch": 3.8206568344428953,
      "grad_norm": 0.3641246259212494,
      "learning_rate": 2.374228237449181e-06,
      "loss": 0.0107,
      "step": 2334620
    },
    {
      "epoch": 3.8206895648815484,
      "grad_norm": 0.09414530545473099,
      "learning_rate": 2.374162345235664e-06,
      "loss": 0.0104,
      "step": 2334640
    },
    {
      "epoch": 3.820722295320202,
      "grad_norm": 0.05238866060972214,
      "learning_rate": 2.3740964530221465e-06,
      "loss": 0.0127,
      "step": 2334660
    },
    {
      "epoch": 3.820755025758855,
      "grad_norm": 0.19577035307884216,
      "learning_rate": 2.3740305608086293e-06,
      "loss": 0.0112,
      "step": 2334680
    },
    {
      "epoch": 3.8207877561975083,
      "grad_norm": 0.3521204888820648,
      "learning_rate": 2.3739646685951124e-06,
      "loss": 0.0091,
      "step": 2334700
    },
    {
      "epoch": 3.820820486636162,
      "grad_norm": 0.3141529858112335,
      "learning_rate": 2.373898776381595e-06,
      "loss": 0.0155,
      "step": 2334720
    },
    {
      "epoch": 3.8208532170748155,
      "grad_norm": 0.28676727414131165,
      "learning_rate": 2.373832884168078e-06,
      "loss": 0.0083,
      "step": 2334740
    },
    {
      "epoch": 3.8208859475134687,
      "grad_norm": 0.19207488000392914,
      "learning_rate": 2.3737669919545607e-06,
      "loss": 0.0077,
      "step": 2334760
    },
    {
      "epoch": 3.820918677952122,
      "grad_norm": 0.19726674258708954,
      "learning_rate": 2.373701099741044e-06,
      "loss": 0.0175,
      "step": 2334780
    },
    {
      "epoch": 3.8209514083907754,
      "grad_norm": 0.1548595130443573,
      "learning_rate": 2.373635207527527e-06,
      "loss": 0.0102,
      "step": 2334800
    },
    {
      "epoch": 3.8209841388294286,
      "grad_norm": 0.232826367020607,
      "learning_rate": 2.3735693153140097e-06,
      "loss": 0.0121,
      "step": 2334820
    },
    {
      "epoch": 3.8210168692680817,
      "grad_norm": 0.5536879897117615,
      "learning_rate": 2.3735034231004925e-06,
      "loss": 0.0104,
      "step": 2334840
    },
    {
      "epoch": 3.8210495997067353,
      "grad_norm": 0.4378911554813385,
      "learning_rate": 2.373437530886975e-06,
      "loss": 0.0118,
      "step": 2334860
    },
    {
      "epoch": 3.8210823301453885,
      "grad_norm": 0.15561802685260773,
      "learning_rate": 2.3733716386734584e-06,
      "loss": 0.0096,
      "step": 2334880
    },
    {
      "epoch": 3.821115060584042,
      "grad_norm": 0.16976013779640198,
      "learning_rate": 2.373305746459941e-06,
      "loss": 0.0102,
      "step": 2334900
    },
    {
      "epoch": 3.821147791022695,
      "grad_norm": 0.23604199290275574,
      "learning_rate": 2.373239854246424e-06,
      "loss": 0.0134,
      "step": 2334920
    },
    {
      "epoch": 3.821180521461349,
      "grad_norm": 0.3035859167575836,
      "learning_rate": 2.3731739620329066e-06,
      "loss": 0.0083,
      "step": 2334940
    },
    {
      "epoch": 3.821213251900002,
      "grad_norm": 0.16421595215797424,
      "learning_rate": 2.3731080698193893e-06,
      "loss": 0.0096,
      "step": 2334960
    },
    {
      "epoch": 3.821245982338655,
      "grad_norm": 0.43222537636756897,
      "learning_rate": 2.3730421776058725e-06,
      "loss": 0.0119,
      "step": 2334980
    },
    {
      "epoch": 3.8212787127773087,
      "grad_norm": 0.5555577874183655,
      "learning_rate": 2.3729762853923556e-06,
      "loss": 0.014,
      "step": 2335000
    },
    {
      "epoch": 3.821311443215962,
      "grad_norm": 0.3878438174724579,
      "learning_rate": 2.3729103931788384e-06,
      "loss": 0.0131,
      "step": 2335020
    },
    {
      "epoch": 3.8213441736546154,
      "grad_norm": 0.4402596354484558,
      "learning_rate": 2.372844500965321e-06,
      "loss": 0.0099,
      "step": 2335040
    },
    {
      "epoch": 3.8213769040932686,
      "grad_norm": 0.5073079466819763,
      "learning_rate": 2.372778608751804e-06,
      "loss": 0.0104,
      "step": 2335060
    },
    {
      "epoch": 3.821409634531922,
      "grad_norm": 0.3823885917663574,
      "learning_rate": 2.372712716538287e-06,
      "loss": 0.0085,
      "step": 2335080
    },
    {
      "epoch": 3.8214423649705753,
      "grad_norm": 0.30517762899398804,
      "learning_rate": 2.3726468243247698e-06,
      "loss": 0.0078,
      "step": 2335100
    },
    {
      "epoch": 3.8214750954092285,
      "grad_norm": 0.17485985159873962,
      "learning_rate": 2.3725809321112525e-06,
      "loss": 0.0068,
      "step": 2335120
    },
    {
      "epoch": 3.821507825847882,
      "grad_norm": 0.4170075058937073,
      "learning_rate": 2.3725150398977352e-06,
      "loss": 0.0155,
      "step": 2335140
    },
    {
      "epoch": 3.821540556286535,
      "grad_norm": 0.1915740817785263,
      "learning_rate": 2.3724491476842184e-06,
      "loss": 0.0113,
      "step": 2335160
    },
    {
      "epoch": 3.821573286725189,
      "grad_norm": 0.15651635825634003,
      "learning_rate": 2.372383255470701e-06,
      "loss": 0.0173,
      "step": 2335180
    },
    {
      "epoch": 3.821606017163842,
      "grad_norm": 0.15599679946899414,
      "learning_rate": 2.3723173632571843e-06,
      "loss": 0.0074,
      "step": 2335200
    },
    {
      "epoch": 3.8216387476024956,
      "grad_norm": 0.13245880603790283,
      "learning_rate": 2.372251471043667e-06,
      "loss": 0.0118,
      "step": 2335220
    },
    {
      "epoch": 3.8216714780411487,
      "grad_norm": 0.4709989130496979,
      "learning_rate": 2.37218557883015e-06,
      "loss": 0.0117,
      "step": 2335240
    },
    {
      "epoch": 3.821704208479802,
      "grad_norm": 0.2656804621219635,
      "learning_rate": 2.3721196866166325e-06,
      "loss": 0.009,
      "step": 2335260
    },
    {
      "epoch": 3.8217369389184555,
      "grad_norm": 0.4282718002796173,
      "learning_rate": 2.3720537944031157e-06,
      "loss": 0.0156,
      "step": 2335280
    },
    {
      "epoch": 3.8217696693571086,
      "grad_norm": 0.41023513674736023,
      "learning_rate": 2.3719879021895984e-06,
      "loss": 0.0147,
      "step": 2335300
    },
    {
      "epoch": 3.821802399795762,
      "grad_norm": 0.22806306183338165,
      "learning_rate": 2.371922009976081e-06,
      "loss": 0.0092,
      "step": 2335320
    },
    {
      "epoch": 3.8218351302344153,
      "grad_norm": 0.17470037937164307,
      "learning_rate": 2.3718561177625643e-06,
      "loss": 0.0132,
      "step": 2335340
    },
    {
      "epoch": 3.821867860673069,
      "grad_norm": 0.20015357434749603,
      "learning_rate": 2.371790225549047e-06,
      "loss": 0.0092,
      "step": 2335360
    },
    {
      "epoch": 3.821900591111722,
      "grad_norm": 0.6534326076507568,
      "learning_rate": 2.37172433333553e-06,
      "loss": 0.01,
      "step": 2335380
    },
    {
      "epoch": 3.8219333215503752,
      "grad_norm": 0.17798762023448944,
      "learning_rate": 2.371658441122013e-06,
      "loss": 0.0119,
      "step": 2335400
    },
    {
      "epoch": 3.821966051989029,
      "grad_norm": 0.5671550631523132,
      "learning_rate": 2.3715925489084957e-06,
      "loss": 0.0189,
      "step": 2335420
    },
    {
      "epoch": 3.821998782427682,
      "grad_norm": 0.19564996659755707,
      "learning_rate": 2.3715266566949785e-06,
      "loss": 0.0119,
      "step": 2335440
    },
    {
      "epoch": 3.8220315128663356,
      "grad_norm": 0.22601892054080963,
      "learning_rate": 2.371460764481461e-06,
      "loss": 0.0085,
      "step": 2335460
    },
    {
      "epoch": 3.8220642433049887,
      "grad_norm": 0.21032145619392395,
      "learning_rate": 2.3713948722679444e-06,
      "loss": 0.0126,
      "step": 2335480
    },
    {
      "epoch": 3.8220969737436423,
      "grad_norm": 0.1378333568572998,
      "learning_rate": 2.371328980054427e-06,
      "loss": 0.0075,
      "step": 2335500
    },
    {
      "epoch": 3.8221297041822955,
      "grad_norm": 0.26098331809043884,
      "learning_rate": 2.3712630878409103e-06,
      "loss": 0.0107,
      "step": 2335520
    },
    {
      "epoch": 3.8221624346209486,
      "grad_norm": 0.24991334974765778,
      "learning_rate": 2.371197195627393e-06,
      "loss": 0.0121,
      "step": 2335540
    },
    {
      "epoch": 3.822195165059602,
      "grad_norm": 0.05767746642231941,
      "learning_rate": 2.3711313034138757e-06,
      "loss": 0.011,
      "step": 2335560
    },
    {
      "epoch": 3.8222278954982554,
      "grad_norm": 0.5009844303131104,
      "learning_rate": 2.3710654112003585e-06,
      "loss": 0.0142,
      "step": 2335580
    },
    {
      "epoch": 3.822260625936909,
      "grad_norm": 0.8727834224700928,
      "learning_rate": 2.3709995189868417e-06,
      "loss": 0.0086,
      "step": 2335600
    },
    {
      "epoch": 3.822293356375562,
      "grad_norm": 0.16871865093708038,
      "learning_rate": 2.3709336267733244e-06,
      "loss": 0.0127,
      "step": 2335620
    },
    {
      "epoch": 3.8223260868142157,
      "grad_norm": 0.5509008765220642,
      "learning_rate": 2.370867734559807e-06,
      "loss": 0.0092,
      "step": 2335640
    },
    {
      "epoch": 3.822358817252869,
      "grad_norm": 0.8141089677810669,
      "learning_rate": 2.37080184234629e-06,
      "loss": 0.0129,
      "step": 2335660
    },
    {
      "epoch": 3.822391547691522,
      "grad_norm": 0.2318372279405594,
      "learning_rate": 2.370735950132773e-06,
      "loss": 0.009,
      "step": 2335680
    },
    {
      "epoch": 3.8224242781301756,
      "grad_norm": 0.19690753519535065,
      "learning_rate": 2.3706700579192558e-06,
      "loss": 0.0111,
      "step": 2335700
    },
    {
      "epoch": 3.8224570085688288,
      "grad_norm": 0.22278818488121033,
      "learning_rate": 2.370604165705739e-06,
      "loss": 0.0078,
      "step": 2335720
    },
    {
      "epoch": 3.822489739007482,
      "grad_norm": 0.12681953608989716,
      "learning_rate": 2.3705382734922217e-06,
      "loss": 0.0066,
      "step": 2335740
    },
    {
      "epoch": 3.8225224694461355,
      "grad_norm": 0.21689146757125854,
      "learning_rate": 2.3704723812787044e-06,
      "loss": 0.0111,
      "step": 2335760
    },
    {
      "epoch": 3.822555199884789,
      "grad_norm": 0.2597962021827698,
      "learning_rate": 2.370406489065187e-06,
      "loss": 0.008,
      "step": 2335780
    },
    {
      "epoch": 3.8225879303234422,
      "grad_norm": 0.38893312215805054,
      "learning_rate": 2.3703405968516703e-06,
      "loss": 0.0117,
      "step": 2335800
    },
    {
      "epoch": 3.8226206607620954,
      "grad_norm": 0.1739882528781891,
      "learning_rate": 2.370274704638153e-06,
      "loss": 0.0153,
      "step": 2335820
    },
    {
      "epoch": 3.822653391200749,
      "grad_norm": 0.3978537321090698,
      "learning_rate": 2.370208812424636e-06,
      "loss": 0.0121,
      "step": 2335840
    },
    {
      "epoch": 3.822686121639402,
      "grad_norm": 0.16021524369716644,
      "learning_rate": 2.370142920211119e-06,
      "loss": 0.0083,
      "step": 2335860
    },
    {
      "epoch": 3.8227188520780553,
      "grad_norm": 0.14848531782627106,
      "learning_rate": 2.3700770279976017e-06,
      "loss": 0.0082,
      "step": 2335880
    },
    {
      "epoch": 3.822751582516709,
      "grad_norm": 0.43618524074554443,
      "learning_rate": 2.3700111357840844e-06,
      "loss": 0.0139,
      "step": 2335900
    },
    {
      "epoch": 3.8227843129553625,
      "grad_norm": 0.2237568497657776,
      "learning_rate": 2.3699452435705676e-06,
      "loss": 0.0084,
      "step": 2335920
    },
    {
      "epoch": 3.8228170433940156,
      "grad_norm": 0.43941444158554077,
      "learning_rate": 2.3698793513570503e-06,
      "loss": 0.0171,
      "step": 2335940
    },
    {
      "epoch": 3.8228497738326688,
      "grad_norm": 0.38271471858024597,
      "learning_rate": 2.369813459143533e-06,
      "loss": 0.0135,
      "step": 2335960
    },
    {
      "epoch": 3.8228825042713224,
      "grad_norm": 0.2224145084619522,
      "learning_rate": 2.369747566930016e-06,
      "loss": 0.0073,
      "step": 2335980
    },
    {
      "epoch": 3.8229152347099755,
      "grad_norm": 0.3008153438568115,
      "learning_rate": 2.369681674716499e-06,
      "loss": 0.01,
      "step": 2336000
    },
    {
      "epoch": 3.8229479651486287,
      "grad_norm": 0.23362848162651062,
      "learning_rate": 2.3696157825029817e-06,
      "loss": 0.007,
      "step": 2336020
    },
    {
      "epoch": 3.8229806955872823,
      "grad_norm": 0.11202862858772278,
      "learning_rate": 2.369549890289465e-06,
      "loss": 0.0103,
      "step": 2336040
    },
    {
      "epoch": 3.823013426025936,
      "grad_norm": 0.4949382543563843,
      "learning_rate": 2.3694839980759476e-06,
      "loss": 0.0151,
      "step": 2336060
    },
    {
      "epoch": 3.823046156464589,
      "grad_norm": 0.2521006464958191,
      "learning_rate": 2.3694181058624304e-06,
      "loss": 0.0112,
      "step": 2336080
    },
    {
      "epoch": 3.823078886903242,
      "grad_norm": 0.15872924029827118,
      "learning_rate": 2.3693522136489135e-06,
      "loss": 0.0112,
      "step": 2336100
    },
    {
      "epoch": 3.8231116173418958,
      "grad_norm": 0.1562005579471588,
      "learning_rate": 2.3692863214353963e-06,
      "loss": 0.0142,
      "step": 2336120
    },
    {
      "epoch": 3.823144347780549,
      "grad_norm": 0.1407654732465744,
      "learning_rate": 2.369220429221879e-06,
      "loss": 0.0144,
      "step": 2336140
    },
    {
      "epoch": 3.823177078219202,
      "grad_norm": 0.3164982497692108,
      "learning_rate": 2.3691545370083618e-06,
      "loss": 0.0118,
      "step": 2336160
    },
    {
      "epoch": 3.8232098086578556,
      "grad_norm": 0.3169795274734497,
      "learning_rate": 2.3690886447948445e-06,
      "loss": 0.0098,
      "step": 2336180
    },
    {
      "epoch": 3.8232425390965092,
      "grad_norm": 0.10867369174957275,
      "learning_rate": 2.3690227525813277e-06,
      "loss": 0.0116,
      "step": 2336200
    },
    {
      "epoch": 3.8232752695351624,
      "grad_norm": 0.2134992927312851,
      "learning_rate": 2.368956860367811e-06,
      "loss": 0.0088,
      "step": 2336220
    },
    {
      "epoch": 3.8233079999738155,
      "grad_norm": 0.1537621170282364,
      "learning_rate": 2.3688909681542936e-06,
      "loss": 0.009,
      "step": 2336240
    },
    {
      "epoch": 3.823340730412469,
      "grad_norm": 0.32296139001846313,
      "learning_rate": 2.3688250759407763e-06,
      "loss": 0.0065,
      "step": 2336260
    },
    {
      "epoch": 3.8233734608511223,
      "grad_norm": 0.13211295008659363,
      "learning_rate": 2.368759183727259e-06,
      "loss": 0.009,
      "step": 2336280
    },
    {
      "epoch": 3.8234061912897754,
      "grad_norm": 0.19951024651527405,
      "learning_rate": 2.368693291513742e-06,
      "loss": 0.0197,
      "step": 2336300
    },
    {
      "epoch": 3.823438921728429,
      "grad_norm": 0.21564964950084686,
      "learning_rate": 2.368627399300225e-06,
      "loss": 0.0111,
      "step": 2336320
    },
    {
      "epoch": 3.823471652167082,
      "grad_norm": 0.424379825592041,
      "learning_rate": 2.3685615070867077e-06,
      "loss": 0.01,
      "step": 2336340
    },
    {
      "epoch": 3.8235043826057358,
      "grad_norm": 0.23659661412239075,
      "learning_rate": 2.3684956148731904e-06,
      "loss": 0.0062,
      "step": 2336360
    },
    {
      "epoch": 3.823537113044389,
      "grad_norm": 0.14019134640693665,
      "learning_rate": 2.3684297226596736e-06,
      "loss": 0.0172,
      "step": 2336380
    },
    {
      "epoch": 3.8235698434830425,
      "grad_norm": 0.2571575343608856,
      "learning_rate": 2.3683638304461563e-06,
      "loss": 0.0107,
      "step": 2336400
    },
    {
      "epoch": 3.8236025739216957,
      "grad_norm": 0.2803826332092285,
      "learning_rate": 2.3682979382326395e-06,
      "loss": 0.0063,
      "step": 2336420
    },
    {
      "epoch": 3.823635304360349,
      "grad_norm": 0.3646557927131653,
      "learning_rate": 2.3682320460191222e-06,
      "loss": 0.0135,
      "step": 2336440
    },
    {
      "epoch": 3.8236680347990024,
      "grad_norm": 0.3023863434791565,
      "learning_rate": 2.368166153805605e-06,
      "loss": 0.0093,
      "step": 2336460
    },
    {
      "epoch": 3.8237007652376556,
      "grad_norm": 0.01985728181898594,
      "learning_rate": 2.3681002615920877e-06,
      "loss": 0.0097,
      "step": 2336480
    },
    {
      "epoch": 3.823733495676309,
      "grad_norm": 0.18020258843898773,
      "learning_rate": 2.368034369378571e-06,
      "loss": 0.0114,
      "step": 2336500
    },
    {
      "epoch": 3.8237662261149623,
      "grad_norm": 0.3326222598552704,
      "learning_rate": 2.3679684771650536e-06,
      "loss": 0.0134,
      "step": 2336520
    },
    {
      "epoch": 3.823798956553616,
      "grad_norm": 0.2279708832502365,
      "learning_rate": 2.3679025849515363e-06,
      "loss": 0.011,
      "step": 2336540
    },
    {
      "epoch": 3.823831686992269,
      "grad_norm": 0.3317301571369171,
      "learning_rate": 2.3678366927380195e-06,
      "loss": 0.0061,
      "step": 2336560
    },
    {
      "epoch": 3.823864417430922,
      "grad_norm": 0.16661806404590607,
      "learning_rate": 2.3677708005245023e-06,
      "loss": 0.007,
      "step": 2336580
    },
    {
      "epoch": 3.823897147869576,
      "grad_norm": 0.36869892477989197,
      "learning_rate": 2.367704908310985e-06,
      "loss": 0.0115,
      "step": 2336600
    },
    {
      "epoch": 3.823929878308229,
      "grad_norm": 0.28777533769607544,
      "learning_rate": 2.367639016097468e-06,
      "loss": 0.0083,
      "step": 2336620
    },
    {
      "epoch": 3.8239626087468825,
      "grad_norm": 0.5142426490783691,
      "learning_rate": 2.367573123883951e-06,
      "loss": 0.0109,
      "step": 2336640
    },
    {
      "epoch": 3.8239953391855357,
      "grad_norm": 0.19330830872058868,
      "learning_rate": 2.3675072316704336e-06,
      "loss": 0.0122,
      "step": 2336660
    },
    {
      "epoch": 3.8240280696241893,
      "grad_norm": 0.36418047547340393,
      "learning_rate": 2.3674413394569164e-06,
      "loss": 0.0099,
      "step": 2336680
    },
    {
      "epoch": 3.8240608000628424,
      "grad_norm": 0.16446343064308167,
      "learning_rate": 2.3673754472433995e-06,
      "loss": 0.0112,
      "step": 2336700
    },
    {
      "epoch": 3.8240935305014956,
      "grad_norm": 0.1960603892803192,
      "learning_rate": 2.3673095550298823e-06,
      "loss": 0.0069,
      "step": 2336720
    },
    {
      "epoch": 3.824126260940149,
      "grad_norm": 0.2334197610616684,
      "learning_rate": 2.3672436628163654e-06,
      "loss": 0.0121,
      "step": 2336740
    },
    {
      "epoch": 3.8241589913788023,
      "grad_norm": 0.14641214907169342,
      "learning_rate": 2.367177770602848e-06,
      "loss": 0.0122,
      "step": 2336760
    },
    {
      "epoch": 3.824191721817456,
      "grad_norm": 0.08327564597129822,
      "learning_rate": 2.367111878389331e-06,
      "loss": 0.0082,
      "step": 2336780
    },
    {
      "epoch": 3.824224452256109,
      "grad_norm": 0.48886334896087646,
      "learning_rate": 2.3670459861758137e-06,
      "loss": 0.0105,
      "step": 2336800
    },
    {
      "epoch": 3.8242571826947627,
      "grad_norm": 0.6128763556480408,
      "learning_rate": 2.366980093962297e-06,
      "loss": 0.0105,
      "step": 2336820
    },
    {
      "epoch": 3.824289913133416,
      "grad_norm": 0.04583276808261871,
      "learning_rate": 2.3669142017487796e-06,
      "loss": 0.0088,
      "step": 2336840
    },
    {
      "epoch": 3.824322643572069,
      "grad_norm": 0.16453365981578827,
      "learning_rate": 2.3668483095352623e-06,
      "loss": 0.0063,
      "step": 2336860
    },
    {
      "epoch": 3.8243553740107226,
      "grad_norm": 0.18626584112644196,
      "learning_rate": 2.366782417321745e-06,
      "loss": 0.0141,
      "step": 2336880
    },
    {
      "epoch": 3.8243881044493757,
      "grad_norm": 0.5623782873153687,
      "learning_rate": 2.366716525108228e-06,
      "loss": 0.0095,
      "step": 2336900
    },
    {
      "epoch": 3.8244208348880293,
      "grad_norm": 0.1250210851430893,
      "learning_rate": 2.366650632894711e-06,
      "loss": 0.0085,
      "step": 2336920
    },
    {
      "epoch": 3.8244535653266825,
      "grad_norm": 0.28383204340934753,
      "learning_rate": 2.366584740681194e-06,
      "loss": 0.0093,
      "step": 2336940
    },
    {
      "epoch": 3.824486295765336,
      "grad_norm": 0.39320728182792664,
      "learning_rate": 2.366518848467677e-06,
      "loss": 0.0129,
      "step": 2336960
    },
    {
      "epoch": 3.824519026203989,
      "grad_norm": 0.27196282148361206,
      "learning_rate": 2.3664529562541596e-06,
      "loss": 0.0125,
      "step": 2336980
    },
    {
      "epoch": 3.8245517566426424,
      "grad_norm": 0.1728510558605194,
      "learning_rate": 2.3663870640406423e-06,
      "loss": 0.0092,
      "step": 2337000
    },
    {
      "epoch": 3.824584487081296,
      "grad_norm": 0.10759726166725159,
      "learning_rate": 2.3663211718271255e-06,
      "loss": 0.007,
      "step": 2337020
    },
    {
      "epoch": 3.824617217519949,
      "grad_norm": 0.3380134403705597,
      "learning_rate": 2.3662552796136082e-06,
      "loss": 0.0095,
      "step": 2337040
    },
    {
      "epoch": 3.8246499479586027,
      "grad_norm": 0.19127695262432098,
      "learning_rate": 2.366189387400091e-06,
      "loss": 0.0096,
      "step": 2337060
    },
    {
      "epoch": 3.824682678397256,
      "grad_norm": 0.6249805688858032,
      "learning_rate": 2.3661234951865737e-06,
      "loss": 0.0185,
      "step": 2337080
    },
    {
      "epoch": 3.8247154088359094,
      "grad_norm": 0.1331220120191574,
      "learning_rate": 2.366057602973057e-06,
      "loss": 0.0139,
      "step": 2337100
    },
    {
      "epoch": 3.8247481392745626,
      "grad_norm": 0.15521039068698883,
      "learning_rate": 2.3659917107595396e-06,
      "loss": 0.0098,
      "step": 2337120
    },
    {
      "epoch": 3.8247808697132157,
      "grad_norm": 0.3604710102081299,
      "learning_rate": 2.3659258185460228e-06,
      "loss": 0.0125,
      "step": 2337140
    },
    {
      "epoch": 3.8248136001518693,
      "grad_norm": 0.23755085468292236,
      "learning_rate": 2.3658599263325055e-06,
      "loss": 0.0095,
      "step": 2337160
    },
    {
      "epoch": 3.8248463305905225,
      "grad_norm": 0.2087870091199875,
      "learning_rate": 2.3657940341189883e-06,
      "loss": 0.011,
      "step": 2337180
    },
    {
      "epoch": 3.824879061029176,
      "grad_norm": 0.10239915549755096,
      "learning_rate": 2.365728141905471e-06,
      "loss": 0.0125,
      "step": 2337200
    },
    {
      "epoch": 3.8249117914678292,
      "grad_norm": 0.11815202236175537,
      "learning_rate": 2.365662249691954e-06,
      "loss": 0.007,
      "step": 2337220
    },
    {
      "epoch": 3.824944521906483,
      "grad_norm": 0.23637935519218445,
      "learning_rate": 2.365596357478437e-06,
      "loss": 0.0088,
      "step": 2337240
    },
    {
      "epoch": 3.824977252345136,
      "grad_norm": 0.34730932116508484,
      "learning_rate": 2.36553046526492e-06,
      "loss": 0.0066,
      "step": 2337260
    },
    {
      "epoch": 3.825009982783789,
      "grad_norm": 0.3614833652973175,
      "learning_rate": 2.365464573051403e-06,
      "loss": 0.0137,
      "step": 2337280
    },
    {
      "epoch": 3.8250427132224427,
      "grad_norm": 0.7905304431915283,
      "learning_rate": 2.3653986808378855e-06,
      "loss": 0.0154,
      "step": 2337300
    },
    {
      "epoch": 3.825075443661096,
      "grad_norm": 0.06340460479259491,
      "learning_rate": 2.3653327886243683e-06,
      "loss": 0.0118,
      "step": 2337320
    },
    {
      "epoch": 3.825108174099749,
      "grad_norm": 0.22458834946155548,
      "learning_rate": 2.3652668964108514e-06,
      "loss": 0.0101,
      "step": 2337340
    },
    {
      "epoch": 3.8251409045384026,
      "grad_norm": 0.1421041339635849,
      "learning_rate": 2.365201004197334e-06,
      "loss": 0.0139,
      "step": 2337360
    },
    {
      "epoch": 3.825173634977056,
      "grad_norm": 0.8343003392219543,
      "learning_rate": 2.365135111983817e-06,
      "loss": 0.02,
      "step": 2337380
    },
    {
      "epoch": 3.8252063654157094,
      "grad_norm": 0.0329006053507328,
      "learning_rate": 2.3650692197702997e-06,
      "loss": 0.0151,
      "step": 2337400
    },
    {
      "epoch": 3.8252390958543625,
      "grad_norm": 0.12863846123218536,
      "learning_rate": 2.365003327556783e-06,
      "loss": 0.0083,
      "step": 2337420
    },
    {
      "epoch": 3.825271826293016,
      "grad_norm": 0.1498631238937378,
      "learning_rate": 2.364937435343266e-06,
      "loss": 0.0137,
      "step": 2337440
    },
    {
      "epoch": 3.8253045567316692,
      "grad_norm": 0.5155318379402161,
      "learning_rate": 2.3648715431297487e-06,
      "loss": 0.0122,
      "step": 2337460
    },
    {
      "epoch": 3.8253372871703224,
      "grad_norm": 0.2093639373779297,
      "learning_rate": 2.3648056509162315e-06,
      "loss": 0.0131,
      "step": 2337480
    },
    {
      "epoch": 3.825370017608976,
      "grad_norm": 0.11080091446638107,
      "learning_rate": 2.364739758702714e-06,
      "loss": 0.0126,
      "step": 2337500
    },
    {
      "epoch": 3.8254027480476296,
      "grad_norm": 0.13857203722000122,
      "learning_rate": 2.3646738664891974e-06,
      "loss": 0.0119,
      "step": 2337520
    },
    {
      "epoch": 3.8254354784862827,
      "grad_norm": 0.18387357890605927,
      "learning_rate": 2.36460797427568e-06,
      "loss": 0.0097,
      "step": 2337540
    },
    {
      "epoch": 3.825468208924936,
      "grad_norm": 0.1570601463317871,
      "learning_rate": 2.364542082062163e-06,
      "loss": 0.0124,
      "step": 2337560
    },
    {
      "epoch": 3.8255009393635895,
      "grad_norm": 0.2691344618797302,
      "learning_rate": 2.3644761898486456e-06,
      "loss": 0.0131,
      "step": 2337580
    },
    {
      "epoch": 3.8255336698022426,
      "grad_norm": 0.1514221876859665,
      "learning_rate": 2.3644102976351283e-06,
      "loss": 0.0088,
      "step": 2337600
    },
    {
      "epoch": 3.825566400240896,
      "grad_norm": 0.854356586933136,
      "learning_rate": 2.3643444054216115e-06,
      "loss": 0.0153,
      "step": 2337620
    },
    {
      "epoch": 3.8255991306795494,
      "grad_norm": 0.24268710613250732,
      "learning_rate": 2.3642785132080947e-06,
      "loss": 0.0091,
      "step": 2337640
    },
    {
      "epoch": 3.825631861118203,
      "grad_norm": 0.11316586285829544,
      "learning_rate": 2.3642126209945774e-06,
      "loss": 0.0088,
      "step": 2337660
    },
    {
      "epoch": 3.825664591556856,
      "grad_norm": 0.16528652608394623,
      "learning_rate": 2.36414672878106e-06,
      "loss": 0.0096,
      "step": 2337680
    },
    {
      "epoch": 3.8256973219955093,
      "grad_norm": 0.1726727932691574,
      "learning_rate": 2.364080836567543e-06,
      "loss": 0.0115,
      "step": 2337700
    },
    {
      "epoch": 3.825730052434163,
      "grad_norm": 0.4083665907382965,
      "learning_rate": 2.364014944354026e-06,
      "loss": 0.0119,
      "step": 2337720
    },
    {
      "epoch": 3.825762782872816,
      "grad_norm": 0.06610289216041565,
      "learning_rate": 2.3639490521405088e-06,
      "loss": 0.0095,
      "step": 2337740
    },
    {
      "epoch": 3.825795513311469,
      "grad_norm": 0.18366451561450958,
      "learning_rate": 2.3638831599269915e-06,
      "loss": 0.0108,
      "step": 2337760
    },
    {
      "epoch": 3.8258282437501228,
      "grad_norm": 0.24860797822475433,
      "learning_rate": 2.3638172677134743e-06,
      "loss": 0.0127,
      "step": 2337780
    },
    {
      "epoch": 3.8258609741887764,
      "grad_norm": 0.45772936940193176,
      "learning_rate": 2.3637513754999574e-06,
      "loss": 0.0084,
      "step": 2337800
    },
    {
      "epoch": 3.8258937046274295,
      "grad_norm": 0.19056501984596252,
      "learning_rate": 2.36368548328644e-06,
      "loss": 0.0119,
      "step": 2337820
    },
    {
      "epoch": 3.8259264350660827,
      "grad_norm": 0.10124096274375916,
      "learning_rate": 2.3636195910729233e-06,
      "loss": 0.0083,
      "step": 2337840
    },
    {
      "epoch": 3.8259591655047362,
      "grad_norm": 0.27076587080955505,
      "learning_rate": 2.363553698859406e-06,
      "loss": 0.0132,
      "step": 2337860
    },
    {
      "epoch": 3.8259918959433894,
      "grad_norm": 0.16744489967823029,
      "learning_rate": 2.363487806645889e-06,
      "loss": 0.0132,
      "step": 2337880
    },
    {
      "epoch": 3.8260246263820425,
      "grad_norm": 0.04354269802570343,
      "learning_rate": 2.3634219144323715e-06,
      "loss": 0.007,
      "step": 2337900
    },
    {
      "epoch": 3.826057356820696,
      "grad_norm": 0.08159316331148148,
      "learning_rate": 2.3633560222188547e-06,
      "loss": 0.0088,
      "step": 2337920
    },
    {
      "epoch": 3.8260900872593493,
      "grad_norm": 0.06733817607164383,
      "learning_rate": 2.3632901300053374e-06,
      "loss": 0.0089,
      "step": 2337940
    },
    {
      "epoch": 3.826122817698003,
      "grad_norm": 0.15479101240634918,
      "learning_rate": 2.36322423779182e-06,
      "loss": 0.0119,
      "step": 2337960
    },
    {
      "epoch": 3.826155548136656,
      "grad_norm": 0.2804906964302063,
      "learning_rate": 2.3631583455783034e-06,
      "loss": 0.0069,
      "step": 2337980
    },
    {
      "epoch": 3.8261882785753096,
      "grad_norm": 0.24046166241168976,
      "learning_rate": 2.363092453364786e-06,
      "loss": 0.0094,
      "step": 2338000
    },
    {
      "epoch": 3.826221009013963,
      "grad_norm": 0.11678006500005722,
      "learning_rate": 2.363026561151269e-06,
      "loss": 0.0145,
      "step": 2338020
    },
    {
      "epoch": 3.826253739452616,
      "grad_norm": 0.0602230429649353,
      "learning_rate": 2.362960668937752e-06,
      "loss": 0.01,
      "step": 2338040
    },
    {
      "epoch": 3.8262864698912695,
      "grad_norm": 0.09438800811767578,
      "learning_rate": 2.3628947767242347e-06,
      "loss": 0.01,
      "step": 2338060
    },
    {
      "epoch": 3.8263192003299227,
      "grad_norm": 0.5010305643081665,
      "learning_rate": 2.3628288845107175e-06,
      "loss": 0.0152,
      "step": 2338080
    },
    {
      "epoch": 3.8263519307685763,
      "grad_norm": 0.041620418429374695,
      "learning_rate": 2.3627629922972002e-06,
      "loss": 0.0092,
      "step": 2338100
    },
    {
      "epoch": 3.8263846612072294,
      "grad_norm": 0.288850337266922,
      "learning_rate": 2.3626971000836834e-06,
      "loss": 0.0112,
      "step": 2338120
    },
    {
      "epoch": 3.826417391645883,
      "grad_norm": 0.40160053968429565,
      "learning_rate": 2.362631207870166e-06,
      "loss": 0.0116,
      "step": 2338140
    },
    {
      "epoch": 3.826450122084536,
      "grad_norm": 0.37788090109825134,
      "learning_rate": 2.3625653156566493e-06,
      "loss": 0.01,
      "step": 2338160
    },
    {
      "epoch": 3.8264828525231893,
      "grad_norm": 0.064176045358181,
      "learning_rate": 2.362499423443132e-06,
      "loss": 0.0129,
      "step": 2338180
    },
    {
      "epoch": 3.826515582961843,
      "grad_norm": 0.17003700137138367,
      "learning_rate": 2.3624335312296148e-06,
      "loss": 0.0065,
      "step": 2338200
    },
    {
      "epoch": 3.826548313400496,
      "grad_norm": 0.35590240359306335,
      "learning_rate": 2.3623676390160975e-06,
      "loss": 0.0133,
      "step": 2338220
    },
    {
      "epoch": 3.8265810438391497,
      "grad_norm": 0.5635402798652649,
      "learning_rate": 2.3623017468025807e-06,
      "loss": 0.0108,
      "step": 2338240
    },
    {
      "epoch": 3.826613774277803,
      "grad_norm": 0.09879803657531738,
      "learning_rate": 2.3622358545890634e-06,
      "loss": 0.0063,
      "step": 2338260
    },
    {
      "epoch": 3.8266465047164564,
      "grad_norm": 0.06723044067621231,
      "learning_rate": 2.362169962375546e-06,
      "loss": 0.0064,
      "step": 2338280
    },
    {
      "epoch": 3.8266792351551095,
      "grad_norm": 0.15976347029209137,
      "learning_rate": 2.362104070162029e-06,
      "loss": 0.0072,
      "step": 2338300
    },
    {
      "epoch": 3.8267119655937627,
      "grad_norm": 1.1880460977554321,
      "learning_rate": 2.362038177948512e-06,
      "loss": 0.0063,
      "step": 2338320
    },
    {
      "epoch": 3.8267446960324163,
      "grad_norm": 0.41441458463668823,
      "learning_rate": 2.3619722857349948e-06,
      "loss": 0.0112,
      "step": 2338340
    },
    {
      "epoch": 3.8267774264710694,
      "grad_norm": 0.3720650374889374,
      "learning_rate": 2.361906393521478e-06,
      "loss": 0.0143,
      "step": 2338360
    },
    {
      "epoch": 3.826810156909723,
      "grad_norm": 0.08029566705226898,
      "learning_rate": 2.3618405013079607e-06,
      "loss": 0.0107,
      "step": 2338380
    },
    {
      "epoch": 3.826842887348376,
      "grad_norm": 0.4194429814815521,
      "learning_rate": 2.3617746090944434e-06,
      "loss": 0.0097,
      "step": 2338400
    },
    {
      "epoch": 3.82687561778703,
      "grad_norm": 0.14834043383598328,
      "learning_rate": 2.361708716880926e-06,
      "loss": 0.012,
      "step": 2338420
    },
    {
      "epoch": 3.826908348225683,
      "grad_norm": 0.24060390889644623,
      "learning_rate": 2.3616428246674093e-06,
      "loss": 0.014,
      "step": 2338440
    },
    {
      "epoch": 3.826941078664336,
      "grad_norm": 0.035494301468133926,
      "learning_rate": 2.361576932453892e-06,
      "loss": 0.0116,
      "step": 2338460
    },
    {
      "epoch": 3.8269738091029897,
      "grad_norm": 0.3316871225833893,
      "learning_rate": 2.361511040240375e-06,
      "loss": 0.011,
      "step": 2338480
    },
    {
      "epoch": 3.827006539541643,
      "grad_norm": 0.19449876248836517,
      "learning_rate": 2.361445148026858e-06,
      "loss": 0.0129,
      "step": 2338500
    },
    {
      "epoch": 3.8270392699802964,
      "grad_norm": 0.11265918612480164,
      "learning_rate": 2.3613792558133407e-06,
      "loss": 0.0112,
      "step": 2338520
    },
    {
      "epoch": 3.8270720004189496,
      "grad_norm": 0.33339810371398926,
      "learning_rate": 2.3613133635998235e-06,
      "loss": 0.0087,
      "step": 2338540
    },
    {
      "epoch": 3.827104730857603,
      "grad_norm": 0.16278956830501556,
      "learning_rate": 2.3612474713863066e-06,
      "loss": 0.0142,
      "step": 2338560
    },
    {
      "epoch": 3.8271374612962563,
      "grad_norm": 0.27328982949256897,
      "learning_rate": 2.3611815791727894e-06,
      "loss": 0.0084,
      "step": 2338580
    },
    {
      "epoch": 3.8271701917349095,
      "grad_norm": 0.17750263214111328,
      "learning_rate": 2.361115686959272e-06,
      "loss": 0.0103,
      "step": 2338600
    },
    {
      "epoch": 3.827202922173563,
      "grad_norm": 0.24482329189777374,
      "learning_rate": 2.361049794745755e-06,
      "loss": 0.0108,
      "step": 2338620
    },
    {
      "epoch": 3.827235652612216,
      "grad_norm": 0.16967780888080597,
      "learning_rate": 2.360983902532238e-06,
      "loss": 0.0113,
      "step": 2338640
    },
    {
      "epoch": 3.82726838305087,
      "grad_norm": 0.12316927313804626,
      "learning_rate": 2.3609180103187207e-06,
      "loss": 0.009,
      "step": 2338660
    },
    {
      "epoch": 3.827301113489523,
      "grad_norm": 0.8463262319564819,
      "learning_rate": 2.360852118105204e-06,
      "loss": 0.0088,
      "step": 2338680
    },
    {
      "epoch": 3.8273338439281765,
      "grad_norm": 0.23806160688400269,
      "learning_rate": 2.3607862258916866e-06,
      "loss": 0.008,
      "step": 2338700
    },
    {
      "epoch": 3.8273665743668297,
      "grad_norm": 0.6129069328308105,
      "learning_rate": 2.3607203336781694e-06,
      "loss": 0.0095,
      "step": 2338720
    },
    {
      "epoch": 3.827399304805483,
      "grad_norm": 0.4213419258594513,
      "learning_rate": 2.3606544414646525e-06,
      "loss": 0.0145,
      "step": 2338740
    },
    {
      "epoch": 3.8274320352441364,
      "grad_norm": 0.24828365445137024,
      "learning_rate": 2.3605885492511353e-06,
      "loss": 0.0141,
      "step": 2338760
    },
    {
      "epoch": 3.8274647656827896,
      "grad_norm": 0.4416040778160095,
      "learning_rate": 2.360522657037618e-06,
      "loss": 0.0123,
      "step": 2338780
    },
    {
      "epoch": 3.8274974961214427,
      "grad_norm": 0.643139660358429,
      "learning_rate": 2.3604567648241008e-06,
      "loss": 0.0101,
      "step": 2338800
    },
    {
      "epoch": 3.8275302265600963,
      "grad_norm": 0.3445221781730652,
      "learning_rate": 2.3603908726105835e-06,
      "loss": 0.0131,
      "step": 2338820
    },
    {
      "epoch": 3.82756295699875,
      "grad_norm": 0.09370443969964981,
      "learning_rate": 2.3603249803970667e-06,
      "loss": 0.0104,
      "step": 2338840
    },
    {
      "epoch": 3.827595687437403,
      "grad_norm": 0.6934188008308411,
      "learning_rate": 2.36025908818355e-06,
      "loss": 0.0156,
      "step": 2338860
    },
    {
      "epoch": 3.8276284178760562,
      "grad_norm": 0.3520922064781189,
      "learning_rate": 2.3601931959700326e-06,
      "loss": 0.0111,
      "step": 2338880
    },
    {
      "epoch": 3.82766114831471,
      "grad_norm": 0.0949430763721466,
      "learning_rate": 2.3601273037565153e-06,
      "loss": 0.0102,
      "step": 2338900
    },
    {
      "epoch": 3.827693878753363,
      "grad_norm": 0.959463357925415,
      "learning_rate": 2.360061411542998e-06,
      "loss": 0.0127,
      "step": 2338920
    },
    {
      "epoch": 3.827726609192016,
      "grad_norm": 0.09151052683591843,
      "learning_rate": 2.3599955193294812e-06,
      "loss": 0.0074,
      "step": 2338940
    },
    {
      "epoch": 3.8277593396306697,
      "grad_norm": 0.11435206234455109,
      "learning_rate": 2.359929627115964e-06,
      "loss": 0.0119,
      "step": 2338960
    },
    {
      "epoch": 3.8277920700693233,
      "grad_norm": 0.15925532579421997,
      "learning_rate": 2.3598637349024467e-06,
      "loss": 0.0079,
      "step": 2338980
    },
    {
      "epoch": 3.8278248005079765,
      "grad_norm": 0.3424217700958252,
      "learning_rate": 2.3597978426889294e-06,
      "loss": 0.0105,
      "step": 2339000
    },
    {
      "epoch": 3.8278575309466296,
      "grad_norm": 0.2003181278705597,
      "learning_rate": 2.3597319504754126e-06,
      "loss": 0.0115,
      "step": 2339020
    },
    {
      "epoch": 3.827890261385283,
      "grad_norm": 0.255603164434433,
      "learning_rate": 2.3596660582618953e-06,
      "loss": 0.0094,
      "step": 2339040
    },
    {
      "epoch": 3.8279229918239364,
      "grad_norm": 0.1819654256105423,
      "learning_rate": 2.3596001660483785e-06,
      "loss": 0.0054,
      "step": 2339060
    },
    {
      "epoch": 3.8279557222625895,
      "grad_norm": 0.10107932239770889,
      "learning_rate": 2.3595342738348612e-06,
      "loss": 0.0086,
      "step": 2339080
    },
    {
      "epoch": 3.827988452701243,
      "grad_norm": 0.09107859432697296,
      "learning_rate": 2.359468381621344e-06,
      "loss": 0.012,
      "step": 2339100
    },
    {
      "epoch": 3.8280211831398967,
      "grad_norm": 0.2598884403705597,
      "learning_rate": 2.3594024894078267e-06,
      "loss": 0.0121,
      "step": 2339120
    },
    {
      "epoch": 3.82805391357855,
      "grad_norm": 0.1420487016439438,
      "learning_rate": 2.35933659719431e-06,
      "loss": 0.0088,
      "step": 2339140
    },
    {
      "epoch": 3.828086644017203,
      "grad_norm": 0.301049143075943,
      "learning_rate": 2.3592707049807926e-06,
      "loss": 0.0111,
      "step": 2339160
    },
    {
      "epoch": 3.8281193744558566,
      "grad_norm": 0.2077259123325348,
      "learning_rate": 2.3592048127672754e-06,
      "loss": 0.0096,
      "step": 2339180
    },
    {
      "epoch": 3.8281521048945097,
      "grad_norm": 0.48142528533935547,
      "learning_rate": 2.3591389205537585e-06,
      "loss": 0.0101,
      "step": 2339200
    },
    {
      "epoch": 3.828184835333163,
      "grad_norm": 0.33360520005226135,
      "learning_rate": 2.3590730283402413e-06,
      "loss": 0.0124,
      "step": 2339220
    },
    {
      "epoch": 3.8282175657718165,
      "grad_norm": 0.3524933457374573,
      "learning_rate": 2.359007136126724e-06,
      "loss": 0.01,
      "step": 2339240
    },
    {
      "epoch": 3.82825029621047,
      "grad_norm": 0.39029866456985474,
      "learning_rate": 2.358941243913207e-06,
      "loss": 0.0123,
      "step": 2339260
    },
    {
      "epoch": 3.8282830266491232,
      "grad_norm": 0.23994426429271698,
      "learning_rate": 2.35887535169969e-06,
      "loss": 0.0096,
      "step": 2339280
    },
    {
      "epoch": 3.8283157570877764,
      "grad_norm": 0.28271257877349854,
      "learning_rate": 2.3588094594861726e-06,
      "loss": 0.0131,
      "step": 2339300
    },
    {
      "epoch": 3.82834848752643,
      "grad_norm": 0.54361891746521,
      "learning_rate": 2.3587435672726554e-06,
      "loss": 0.0117,
      "step": 2339320
    },
    {
      "epoch": 3.828381217965083,
      "grad_norm": 0.08094846457242966,
      "learning_rate": 2.3586776750591385e-06,
      "loss": 0.0149,
      "step": 2339340
    },
    {
      "epoch": 3.8284139484037363,
      "grad_norm": 0.2314416766166687,
      "learning_rate": 2.3586117828456213e-06,
      "loss": 0.0108,
      "step": 2339360
    },
    {
      "epoch": 3.82844667884239,
      "grad_norm": 0.22311536967754364,
      "learning_rate": 2.3585458906321045e-06,
      "loss": 0.0069,
      "step": 2339380
    },
    {
      "epoch": 3.828479409281043,
      "grad_norm": 0.3228234052658081,
      "learning_rate": 2.358479998418587e-06,
      "loss": 0.0106,
      "step": 2339400
    },
    {
      "epoch": 3.8285121397196966,
      "grad_norm": 0.17693662643432617,
      "learning_rate": 2.35841410620507e-06,
      "loss": 0.0149,
      "step": 2339420
    },
    {
      "epoch": 3.8285448701583498,
      "grad_norm": 0.39966121315956116,
      "learning_rate": 2.3583482139915527e-06,
      "loss": 0.0143,
      "step": 2339440
    },
    {
      "epoch": 3.8285776005970034,
      "grad_norm": 0.33945563435554504,
      "learning_rate": 2.358282321778036e-06,
      "loss": 0.0086,
      "step": 2339460
    },
    {
      "epoch": 3.8286103310356565,
      "grad_norm": 0.37894943356513977,
      "learning_rate": 2.3582164295645186e-06,
      "loss": 0.0093,
      "step": 2339480
    },
    {
      "epoch": 3.8286430614743097,
      "grad_norm": 0.3938201367855072,
      "learning_rate": 2.3581505373510013e-06,
      "loss": 0.0074,
      "step": 2339500
    },
    {
      "epoch": 3.8286757919129633,
      "grad_norm": 0.23802223801612854,
      "learning_rate": 2.358084645137484e-06,
      "loss": 0.0133,
      "step": 2339520
    },
    {
      "epoch": 3.8287085223516164,
      "grad_norm": 0.7854681015014648,
      "learning_rate": 2.3580187529239672e-06,
      "loss": 0.0121,
      "step": 2339540
    },
    {
      "epoch": 3.82874125279027,
      "grad_norm": 0.4427739977836609,
      "learning_rate": 2.35795286071045e-06,
      "loss": 0.0095,
      "step": 2339560
    },
    {
      "epoch": 3.828773983228923,
      "grad_norm": 0.2816874384880066,
      "learning_rate": 2.357886968496933e-06,
      "loss": 0.0098,
      "step": 2339580
    },
    {
      "epoch": 3.8288067136675767,
      "grad_norm": 0.2733475863933563,
      "learning_rate": 2.357821076283416e-06,
      "loss": 0.0126,
      "step": 2339600
    },
    {
      "epoch": 3.82883944410623,
      "grad_norm": 0.28798484802246094,
      "learning_rate": 2.3577551840698986e-06,
      "loss": 0.0155,
      "step": 2339620
    },
    {
      "epoch": 3.828872174544883,
      "grad_norm": 0.16205313801765442,
      "learning_rate": 2.3576892918563813e-06,
      "loss": 0.0111,
      "step": 2339640
    },
    {
      "epoch": 3.8289049049835366,
      "grad_norm": 0.04398929700255394,
      "learning_rate": 2.3576233996428645e-06,
      "loss": 0.0079,
      "step": 2339660
    },
    {
      "epoch": 3.82893763542219,
      "grad_norm": 0.11389781534671783,
      "learning_rate": 2.3575575074293472e-06,
      "loss": 0.0078,
      "step": 2339680
    },
    {
      "epoch": 3.8289703658608434,
      "grad_norm": 0.24531593918800354,
      "learning_rate": 2.35749161521583e-06,
      "loss": 0.0124,
      "step": 2339700
    },
    {
      "epoch": 3.8290030962994965,
      "grad_norm": 0.22062157094478607,
      "learning_rate": 2.3574257230023127e-06,
      "loss": 0.0146,
      "step": 2339720
    },
    {
      "epoch": 3.82903582673815,
      "grad_norm": 0.8562837243080139,
      "learning_rate": 2.357359830788796e-06,
      "loss": 0.0097,
      "step": 2339740
    },
    {
      "epoch": 3.8290685571768033,
      "grad_norm": 0.6473693251609802,
      "learning_rate": 2.3572939385752786e-06,
      "loss": 0.01,
      "step": 2339760
    },
    {
      "epoch": 3.8291012876154564,
      "grad_norm": 0.2053125649690628,
      "learning_rate": 2.3572280463617618e-06,
      "loss": 0.011,
      "step": 2339780
    },
    {
      "epoch": 3.82913401805411,
      "grad_norm": 0.17382971942424774,
      "learning_rate": 2.3571621541482445e-06,
      "loss": 0.0083,
      "step": 2339800
    },
    {
      "epoch": 3.829166748492763,
      "grad_norm": 0.37459924817085266,
      "learning_rate": 2.3570962619347273e-06,
      "loss": 0.0202,
      "step": 2339820
    },
    {
      "epoch": 3.8291994789314168,
      "grad_norm": 0.1936623901128769,
      "learning_rate": 2.35703036972121e-06,
      "loss": 0.0094,
      "step": 2339840
    },
    {
      "epoch": 3.82923220937007,
      "grad_norm": 0.6197572350502014,
      "learning_rate": 2.356964477507693e-06,
      "loss": 0.0106,
      "step": 2339860
    },
    {
      "epoch": 3.8292649398087235,
      "grad_norm": 0.3174077868461609,
      "learning_rate": 2.356898585294176e-06,
      "loss": 0.0121,
      "step": 2339880
    },
    {
      "epoch": 3.8292976702473767,
      "grad_norm": 0.28074076771736145,
      "learning_rate": 2.356832693080659e-06,
      "loss": 0.0122,
      "step": 2339900
    },
    {
      "epoch": 3.82933040068603,
      "grad_norm": 0.26450982689857483,
      "learning_rate": 2.356766800867142e-06,
      "loss": 0.0138,
      "step": 2339920
    },
    {
      "epoch": 3.8293631311246834,
      "grad_norm": 0.14245958626270294,
      "learning_rate": 2.3567009086536246e-06,
      "loss": 0.0118,
      "step": 2339940
    },
    {
      "epoch": 3.8293958615633366,
      "grad_norm": 0.078019879758358,
      "learning_rate": 2.3566350164401073e-06,
      "loss": 0.0114,
      "step": 2339960
    },
    {
      "epoch": 3.82942859200199,
      "grad_norm": 0.4226072430610657,
      "learning_rate": 2.3565691242265905e-06,
      "loss": 0.0084,
      "step": 2339980
    },
    {
      "epoch": 3.8294613224406433,
      "grad_norm": 0.3471786677837372,
      "learning_rate": 2.356503232013073e-06,
      "loss": 0.0078,
      "step": 2340000
    },
    {
      "epoch": 3.829494052879297,
      "grad_norm": 0.4036760628223419,
      "learning_rate": 2.356437339799556e-06,
      "loss": 0.0111,
      "step": 2340020
    },
    {
      "epoch": 3.82952678331795,
      "grad_norm": 0.2576536238193512,
      "learning_rate": 2.3563714475860387e-06,
      "loss": 0.0104,
      "step": 2340040
    },
    {
      "epoch": 3.829559513756603,
      "grad_norm": 0.4309597909450531,
      "learning_rate": 2.356305555372522e-06,
      "loss": 0.0149,
      "step": 2340060
    },
    {
      "epoch": 3.829592244195257,
      "grad_norm": 0.07152964919805527,
      "learning_rate": 2.356239663159005e-06,
      "loss": 0.0059,
      "step": 2340080
    },
    {
      "epoch": 3.82962497463391,
      "grad_norm": 0.49615243077278137,
      "learning_rate": 2.3561737709454877e-06,
      "loss": 0.0138,
      "step": 2340100
    },
    {
      "epoch": 3.8296577050725635,
      "grad_norm": 0.09479287266731262,
      "learning_rate": 2.3561078787319705e-06,
      "loss": 0.016,
      "step": 2340120
    },
    {
      "epoch": 3.8296904355112167,
      "grad_norm": 0.08960936218500137,
      "learning_rate": 2.3560419865184532e-06,
      "loss": 0.0113,
      "step": 2340140
    },
    {
      "epoch": 3.8297231659498703,
      "grad_norm": 0.028279149904847145,
      "learning_rate": 2.3559760943049364e-06,
      "loss": 0.015,
      "step": 2340160
    },
    {
      "epoch": 3.8297558963885234,
      "grad_norm": 0.46588870882987976,
      "learning_rate": 2.355910202091419e-06,
      "loss": 0.014,
      "step": 2340180
    },
    {
      "epoch": 3.8297886268271766,
      "grad_norm": 0.5791556239128113,
      "learning_rate": 2.355844309877902e-06,
      "loss": 0.012,
      "step": 2340200
    },
    {
      "epoch": 3.82982135726583,
      "grad_norm": 0.16157186031341553,
      "learning_rate": 2.3557784176643846e-06,
      "loss": 0.0093,
      "step": 2340220
    },
    {
      "epoch": 3.8298540877044833,
      "grad_norm": 0.3236856460571289,
      "learning_rate": 2.3557125254508673e-06,
      "loss": 0.0094,
      "step": 2340240
    },
    {
      "epoch": 3.829886818143137,
      "grad_norm": 0.3157406449317932,
      "learning_rate": 2.3556466332373505e-06,
      "loss": 0.0125,
      "step": 2340260
    },
    {
      "epoch": 3.82991954858179,
      "grad_norm": 0.1558038592338562,
      "learning_rate": 2.3555807410238337e-06,
      "loss": 0.0085,
      "step": 2340280
    },
    {
      "epoch": 3.8299522790204437,
      "grad_norm": 0.4648709297180176,
      "learning_rate": 2.3555148488103164e-06,
      "loss": 0.013,
      "step": 2340300
    },
    {
      "epoch": 3.829985009459097,
      "grad_norm": 0.089666947722435,
      "learning_rate": 2.355448956596799e-06,
      "loss": 0.0084,
      "step": 2340320
    },
    {
      "epoch": 3.83001773989775,
      "grad_norm": 0.3506333529949188,
      "learning_rate": 2.355383064383282e-06,
      "loss": 0.0097,
      "step": 2340340
    },
    {
      "epoch": 3.8300504703364036,
      "grad_norm": 0.2519521117210388,
      "learning_rate": 2.355317172169765e-06,
      "loss": 0.0117,
      "step": 2340360
    },
    {
      "epoch": 3.8300832007750567,
      "grad_norm": 0.24519187211990356,
      "learning_rate": 2.355251279956248e-06,
      "loss": 0.0138,
      "step": 2340380
    },
    {
      "epoch": 3.83011593121371,
      "grad_norm": 0.20807956159114838,
      "learning_rate": 2.3551853877427305e-06,
      "loss": 0.0103,
      "step": 2340400
    },
    {
      "epoch": 3.8301486616523635,
      "grad_norm": 0.21104413270950317,
      "learning_rate": 2.3551194955292133e-06,
      "loss": 0.0161,
      "step": 2340420
    },
    {
      "epoch": 3.830181392091017,
      "grad_norm": 0.4161234200000763,
      "learning_rate": 2.3550536033156964e-06,
      "loss": 0.0145,
      "step": 2340440
    },
    {
      "epoch": 3.83021412252967,
      "grad_norm": 0.7341769933700562,
      "learning_rate": 2.354987711102179e-06,
      "loss": 0.0152,
      "step": 2340460
    },
    {
      "epoch": 3.8302468529683233,
      "grad_norm": 0.6877319812774658,
      "learning_rate": 2.3549218188886623e-06,
      "loss": 0.0124,
      "step": 2340480
    },
    {
      "epoch": 3.830279583406977,
      "grad_norm": 0.5498764514923096,
      "learning_rate": 2.354855926675145e-06,
      "loss": 0.0142,
      "step": 2340500
    },
    {
      "epoch": 3.83031231384563,
      "grad_norm": 0.17553359270095825,
      "learning_rate": 2.354790034461628e-06,
      "loss": 0.0164,
      "step": 2340520
    },
    {
      "epoch": 3.8303450442842832,
      "grad_norm": 0.4277152419090271,
      "learning_rate": 2.3547241422481106e-06,
      "loss": 0.0113,
      "step": 2340540
    },
    {
      "epoch": 3.830377774722937,
      "grad_norm": 0.26261043548583984,
      "learning_rate": 2.3546582500345937e-06,
      "loss": 0.0091,
      "step": 2340560
    },
    {
      "epoch": 3.8304105051615904,
      "grad_norm": 0.06726470589637756,
      "learning_rate": 2.3545923578210765e-06,
      "loss": 0.0093,
      "step": 2340580
    },
    {
      "epoch": 3.8304432356002436,
      "grad_norm": 0.12277933955192566,
      "learning_rate": 2.3545264656075596e-06,
      "loss": 0.0108,
      "step": 2340600
    },
    {
      "epoch": 3.8304759660388967,
      "grad_norm": 0.39369112253189087,
      "learning_rate": 2.3544605733940424e-06,
      "loss": 0.0084,
      "step": 2340620
    },
    {
      "epoch": 3.8305086964775503,
      "grad_norm": 0.24317362904548645,
      "learning_rate": 2.354394681180525e-06,
      "loss": 0.0137,
      "step": 2340640
    },
    {
      "epoch": 3.8305414269162035,
      "grad_norm": 0.5198979377746582,
      "learning_rate": 2.354328788967008e-06,
      "loss": 0.0136,
      "step": 2340660
    },
    {
      "epoch": 3.8305741573548566,
      "grad_norm": 0.07150421291589737,
      "learning_rate": 2.354262896753491e-06,
      "loss": 0.0067,
      "step": 2340680
    },
    {
      "epoch": 3.83060688779351,
      "grad_norm": 0.38826513290405273,
      "learning_rate": 2.3541970045399737e-06,
      "loss": 0.0107,
      "step": 2340700
    },
    {
      "epoch": 3.830639618232164,
      "grad_norm": 0.33596447110176086,
      "learning_rate": 2.3541311123264565e-06,
      "loss": 0.0074,
      "step": 2340720
    },
    {
      "epoch": 3.830672348670817,
      "grad_norm": 0.4019787311553955,
      "learning_rate": 2.3540652201129392e-06,
      "loss": 0.0095,
      "step": 2340740
    },
    {
      "epoch": 3.83070507910947,
      "grad_norm": 0.3204859495162964,
      "learning_rate": 2.3539993278994224e-06,
      "loss": 0.0119,
      "step": 2340760
    },
    {
      "epoch": 3.8307378095481237,
      "grad_norm": 0.30082443356513977,
      "learning_rate": 2.353933435685905e-06,
      "loss": 0.0104,
      "step": 2340780
    },
    {
      "epoch": 3.830770539986777,
      "grad_norm": 0.19111712276935577,
      "learning_rate": 2.3538675434723883e-06,
      "loss": 0.0098,
      "step": 2340800
    },
    {
      "epoch": 3.83080327042543,
      "grad_norm": 0.1370968520641327,
      "learning_rate": 2.353801651258871e-06,
      "loss": 0.009,
      "step": 2340820
    },
    {
      "epoch": 3.8308360008640836,
      "grad_norm": 0.17996886372566223,
      "learning_rate": 2.3537357590453538e-06,
      "loss": 0.0104,
      "step": 2340840
    },
    {
      "epoch": 3.830868731302737,
      "grad_norm": 0.40571674704551697,
      "learning_rate": 2.3536698668318365e-06,
      "loss": 0.0072,
      "step": 2340860
    },
    {
      "epoch": 3.8309014617413903,
      "grad_norm": 0.25402846932411194,
      "learning_rate": 2.3536039746183197e-06,
      "loss": 0.0085,
      "step": 2340880
    },
    {
      "epoch": 3.8309341921800435,
      "grad_norm": 0.12175102531909943,
      "learning_rate": 2.3535380824048024e-06,
      "loss": 0.0158,
      "step": 2340900
    },
    {
      "epoch": 3.830966922618697,
      "grad_norm": 0.22900456190109253,
      "learning_rate": 2.353472190191285e-06,
      "loss": 0.0117,
      "step": 2340920
    },
    {
      "epoch": 3.8309996530573502,
      "grad_norm": 0.07801211625337601,
      "learning_rate": 2.353406297977768e-06,
      "loss": 0.0174,
      "step": 2340940
    },
    {
      "epoch": 3.8310323834960034,
      "grad_norm": 0.2836669683456421,
      "learning_rate": 2.353340405764251e-06,
      "loss": 0.01,
      "step": 2340960
    },
    {
      "epoch": 3.831065113934657,
      "grad_norm": 0.2088463455438614,
      "learning_rate": 2.353274513550734e-06,
      "loss": 0.0145,
      "step": 2340980
    },
    {
      "epoch": 3.83109784437331,
      "grad_norm": 0.6773832440376282,
      "learning_rate": 2.353208621337217e-06,
      "loss": 0.0097,
      "step": 2341000
    },
    {
      "epoch": 3.8311305748119637,
      "grad_norm": 0.6800671815872192,
      "learning_rate": 2.3531427291236997e-06,
      "loss": 0.0164,
      "step": 2341020
    },
    {
      "epoch": 3.831163305250617,
      "grad_norm": 0.3711183965206146,
      "learning_rate": 2.3530768369101824e-06,
      "loss": 0.0139,
      "step": 2341040
    },
    {
      "epoch": 3.8311960356892705,
      "grad_norm": 1.084179401397705,
      "learning_rate": 2.353010944696665e-06,
      "loss": 0.0121,
      "step": 2341060
    },
    {
      "epoch": 3.8312287661279236,
      "grad_norm": 0.1522877812385559,
      "learning_rate": 2.3529450524831483e-06,
      "loss": 0.0098,
      "step": 2341080
    },
    {
      "epoch": 3.8312614965665768,
      "grad_norm": 0.19490927457809448,
      "learning_rate": 2.352879160269631e-06,
      "loss": 0.0092,
      "step": 2341100
    },
    {
      "epoch": 3.8312942270052304,
      "grad_norm": 0.2774938941001892,
      "learning_rate": 2.352813268056114e-06,
      "loss": 0.0142,
      "step": 2341120
    },
    {
      "epoch": 3.8313269574438835,
      "grad_norm": 0.4186594486236572,
      "learning_rate": 2.352747375842597e-06,
      "loss": 0.0132,
      "step": 2341140
    },
    {
      "epoch": 3.831359687882537,
      "grad_norm": 0.30415070056915283,
      "learning_rate": 2.3526814836290797e-06,
      "loss": 0.0085,
      "step": 2341160
    },
    {
      "epoch": 3.8313924183211903,
      "grad_norm": 0.4721403419971466,
      "learning_rate": 2.3526155914155625e-06,
      "loss": 0.014,
      "step": 2341180
    },
    {
      "epoch": 3.831425148759844,
      "grad_norm": 0.2629139721393585,
      "learning_rate": 2.3525496992020456e-06,
      "loss": 0.0128,
      "step": 2341200
    },
    {
      "epoch": 3.831457879198497,
      "grad_norm": 0.3576148450374603,
      "learning_rate": 2.3524838069885284e-06,
      "loss": 0.0122,
      "step": 2341220
    },
    {
      "epoch": 3.83149060963715,
      "grad_norm": 0.20917850732803345,
      "learning_rate": 2.352417914775011e-06,
      "loss": 0.0139,
      "step": 2341240
    },
    {
      "epoch": 3.8315233400758038,
      "grad_norm": 0.18859906494617462,
      "learning_rate": 2.352352022561494e-06,
      "loss": 0.0124,
      "step": 2341260
    },
    {
      "epoch": 3.831556070514457,
      "grad_norm": 0.16916513442993164,
      "learning_rate": 2.352286130347977e-06,
      "loss": 0.0146,
      "step": 2341280
    },
    {
      "epoch": 3.8315888009531105,
      "grad_norm": 0.22252647578716278,
      "learning_rate": 2.3522202381344597e-06,
      "loss": 0.0082,
      "step": 2341300
    },
    {
      "epoch": 3.8316215313917636,
      "grad_norm": 0.5399900078773499,
      "learning_rate": 2.352154345920943e-06,
      "loss": 0.0118,
      "step": 2341320
    },
    {
      "epoch": 3.8316542618304172,
      "grad_norm": 0.19582311809062958,
      "learning_rate": 2.3520884537074257e-06,
      "loss": 0.0091,
      "step": 2341340
    },
    {
      "epoch": 3.8316869922690704,
      "grad_norm": 0.36565887928009033,
      "learning_rate": 2.3520225614939084e-06,
      "loss": 0.0106,
      "step": 2341360
    },
    {
      "epoch": 3.8317197227077235,
      "grad_norm": 0.21465949714183807,
      "learning_rate": 2.3519566692803916e-06,
      "loss": 0.0097,
      "step": 2341380
    },
    {
      "epoch": 3.831752453146377,
      "grad_norm": 0.24543748795986176,
      "learning_rate": 2.3518907770668743e-06,
      "loss": 0.0187,
      "step": 2341400
    },
    {
      "epoch": 3.8317851835850303,
      "grad_norm": 0.14125755429267883,
      "learning_rate": 2.351824884853357e-06,
      "loss": 0.0113,
      "step": 2341420
    },
    {
      "epoch": 3.831817914023684,
      "grad_norm": 0.3088577389717102,
      "learning_rate": 2.3517589926398398e-06,
      "loss": 0.0188,
      "step": 2341440
    },
    {
      "epoch": 3.831850644462337,
      "grad_norm": 0.1516980677843094,
      "learning_rate": 2.3516931004263225e-06,
      "loss": 0.0064,
      "step": 2341460
    },
    {
      "epoch": 3.8318833749009906,
      "grad_norm": 0.3434257209300995,
      "learning_rate": 2.3516272082128057e-06,
      "loss": 0.0104,
      "step": 2341480
    },
    {
      "epoch": 3.8319161053396438,
      "grad_norm": 0.37935009598731995,
      "learning_rate": 2.351561315999289e-06,
      "loss": 0.0079,
      "step": 2341500
    },
    {
      "epoch": 3.831948835778297,
      "grad_norm": 0.19006142020225525,
      "learning_rate": 2.3514954237857716e-06,
      "loss": 0.0094,
      "step": 2341520
    },
    {
      "epoch": 3.8319815662169505,
      "grad_norm": 0.8994504809379578,
      "learning_rate": 2.3514295315722543e-06,
      "loss": 0.0089,
      "step": 2341540
    },
    {
      "epoch": 3.8320142966556037,
      "grad_norm": 0.162614643573761,
      "learning_rate": 2.351363639358737e-06,
      "loss": 0.0106,
      "step": 2341560
    },
    {
      "epoch": 3.8320470270942573,
      "grad_norm": 0.2335749864578247,
      "learning_rate": 2.3512977471452202e-06,
      "loss": 0.0177,
      "step": 2341580
    },
    {
      "epoch": 3.8320797575329104,
      "grad_norm": 0.2790067195892334,
      "learning_rate": 2.351231854931703e-06,
      "loss": 0.0105,
      "step": 2341600
    },
    {
      "epoch": 3.832112487971564,
      "grad_norm": 0.1507909893989563,
      "learning_rate": 2.3511659627181857e-06,
      "loss": 0.0099,
      "step": 2341620
    },
    {
      "epoch": 3.832145218410217,
      "grad_norm": 0.11378034949302673,
      "learning_rate": 2.3511000705046684e-06,
      "loss": 0.0161,
      "step": 2341640
    },
    {
      "epoch": 3.8321779488488703,
      "grad_norm": 0.26498404145240784,
      "learning_rate": 2.3510341782911516e-06,
      "loss": 0.0099,
      "step": 2341660
    },
    {
      "epoch": 3.832210679287524,
      "grad_norm": 0.1581607460975647,
      "learning_rate": 2.3509682860776343e-06,
      "loss": 0.0131,
      "step": 2341680
    },
    {
      "epoch": 3.832243409726177,
      "grad_norm": 0.2534720301628113,
      "learning_rate": 2.3509023938641175e-06,
      "loss": 0.0123,
      "step": 2341700
    },
    {
      "epoch": 3.8322761401648306,
      "grad_norm": 0.31385836005210876,
      "learning_rate": 2.3508365016506002e-06,
      "loss": 0.0094,
      "step": 2341720
    },
    {
      "epoch": 3.832308870603484,
      "grad_norm": 0.20620156824588776,
      "learning_rate": 2.350770609437083e-06,
      "loss": 0.0103,
      "step": 2341740
    },
    {
      "epoch": 3.8323416010421374,
      "grad_norm": 0.35503873229026794,
      "learning_rate": 2.3507047172235657e-06,
      "loss": 0.0097,
      "step": 2341760
    },
    {
      "epoch": 3.8323743314807905,
      "grad_norm": 0.36641833186149597,
      "learning_rate": 2.350638825010049e-06,
      "loss": 0.0132,
      "step": 2341780
    },
    {
      "epoch": 3.8324070619194437,
      "grad_norm": 0.34268686175346375,
      "learning_rate": 2.3505729327965316e-06,
      "loss": 0.0078,
      "step": 2341800
    },
    {
      "epoch": 3.8324397923580973,
      "grad_norm": 0.407284677028656,
      "learning_rate": 2.3505070405830144e-06,
      "loss": 0.0126,
      "step": 2341820
    },
    {
      "epoch": 3.8324725227967504,
      "grad_norm": 0.43661102652549744,
      "learning_rate": 2.3504411483694975e-06,
      "loss": 0.0112,
      "step": 2341840
    },
    {
      "epoch": 3.8325052532354036,
      "grad_norm": 0.21373136341571808,
      "learning_rate": 2.3503752561559803e-06,
      "loss": 0.0088,
      "step": 2341860
    },
    {
      "epoch": 3.832537983674057,
      "grad_norm": 0.27715203166007996,
      "learning_rate": 2.350309363942463e-06,
      "loss": 0.0102,
      "step": 2341880
    },
    {
      "epoch": 3.8325707141127108,
      "grad_norm": 0.2592814266681671,
      "learning_rate": 2.350243471728946e-06,
      "loss": 0.0071,
      "step": 2341900
    },
    {
      "epoch": 3.832603444551364,
      "grad_norm": 0.4322536885738373,
      "learning_rate": 2.350177579515429e-06,
      "loss": 0.01,
      "step": 2341920
    },
    {
      "epoch": 3.832636174990017,
      "grad_norm": 0.33936578035354614,
      "learning_rate": 2.3501116873019117e-06,
      "loss": 0.0105,
      "step": 2341940
    },
    {
      "epoch": 3.8326689054286707,
      "grad_norm": 0.22803303599357605,
      "learning_rate": 2.3500457950883944e-06,
      "loss": 0.0112,
      "step": 2341960
    },
    {
      "epoch": 3.832701635867324,
      "grad_norm": 0.29526469111442566,
      "learning_rate": 2.3499799028748776e-06,
      "loss": 0.0093,
      "step": 2341980
    },
    {
      "epoch": 3.832734366305977,
      "grad_norm": 0.49606049060821533,
      "learning_rate": 2.3499140106613603e-06,
      "loss": 0.0107,
      "step": 2342000
    },
    {
      "epoch": 3.8327670967446306,
      "grad_norm": 0.120809406042099,
      "learning_rate": 2.3498481184478435e-06,
      "loss": 0.0181,
      "step": 2342020
    },
    {
      "epoch": 3.832799827183284,
      "grad_norm": 0.4679502248764038,
      "learning_rate": 2.349782226234326e-06,
      "loss": 0.0079,
      "step": 2342040
    },
    {
      "epoch": 3.8328325576219373,
      "grad_norm": 0.9407098293304443,
      "learning_rate": 2.349716334020809e-06,
      "loss": 0.0152,
      "step": 2342060
    },
    {
      "epoch": 3.8328652880605905,
      "grad_norm": 0.4418625831604004,
      "learning_rate": 2.3496504418072917e-06,
      "loss": 0.0103,
      "step": 2342080
    },
    {
      "epoch": 3.832898018499244,
      "grad_norm": 0.17492803931236267,
      "learning_rate": 2.349584549593775e-06,
      "loss": 0.0098,
      "step": 2342100
    },
    {
      "epoch": 3.832930748937897,
      "grad_norm": 0.6352765560150146,
      "learning_rate": 2.3495186573802576e-06,
      "loss": 0.0151,
      "step": 2342120
    },
    {
      "epoch": 3.8329634793765504,
      "grad_norm": 0.7904167771339417,
      "learning_rate": 2.3494527651667403e-06,
      "loss": 0.0146,
      "step": 2342140
    },
    {
      "epoch": 3.832996209815204,
      "grad_norm": 0.08438301086425781,
      "learning_rate": 2.349386872953223e-06,
      "loss": 0.011,
      "step": 2342160
    },
    {
      "epoch": 3.8330289402538575,
      "grad_norm": 0.7114929556846619,
      "learning_rate": 2.3493209807397062e-06,
      "loss": 0.0147,
      "step": 2342180
    },
    {
      "epoch": 3.8330616706925107,
      "grad_norm": 0.37969914078712463,
      "learning_rate": 2.349255088526189e-06,
      "loss": 0.009,
      "step": 2342200
    },
    {
      "epoch": 3.833094401131164,
      "grad_norm": 0.16075187921524048,
      "learning_rate": 2.349189196312672e-06,
      "loss": 0.0086,
      "step": 2342220
    },
    {
      "epoch": 3.8331271315698174,
      "grad_norm": 0.21885550022125244,
      "learning_rate": 2.349123304099155e-06,
      "loss": 0.0111,
      "step": 2342240
    },
    {
      "epoch": 3.8331598620084706,
      "grad_norm": 0.2236407846212387,
      "learning_rate": 2.3490574118856376e-06,
      "loss": 0.0101,
      "step": 2342260
    },
    {
      "epoch": 3.8331925924471237,
      "grad_norm": 0.1425330489873886,
      "learning_rate": 2.3489915196721203e-06,
      "loss": 0.01,
      "step": 2342280
    },
    {
      "epoch": 3.8332253228857773,
      "grad_norm": 0.6158893704414368,
      "learning_rate": 2.3489256274586035e-06,
      "loss": 0.0107,
      "step": 2342300
    },
    {
      "epoch": 3.833258053324431,
      "grad_norm": 0.44242554903030396,
      "learning_rate": 2.3488597352450863e-06,
      "loss": 0.0134,
      "step": 2342320
    },
    {
      "epoch": 3.833290783763084,
      "grad_norm": 0.4606665074825287,
      "learning_rate": 2.348793843031569e-06,
      "loss": 0.0103,
      "step": 2342340
    },
    {
      "epoch": 3.8333235142017372,
      "grad_norm": 0.38672372698783875,
      "learning_rate": 2.348727950818052e-06,
      "loss": 0.0152,
      "step": 2342360
    },
    {
      "epoch": 3.833356244640391,
      "grad_norm": 0.41290774941444397,
      "learning_rate": 2.348662058604535e-06,
      "loss": 0.0132,
      "step": 2342380
    },
    {
      "epoch": 3.833388975079044,
      "grad_norm": 0.36710453033447266,
      "learning_rate": 2.3485961663910176e-06,
      "loss": 0.0111,
      "step": 2342400
    },
    {
      "epoch": 3.833421705517697,
      "grad_norm": 0.3377743363380432,
      "learning_rate": 2.348530274177501e-06,
      "loss": 0.0134,
      "step": 2342420
    },
    {
      "epoch": 3.8334544359563507,
      "grad_norm": 0.19536280632019043,
      "learning_rate": 2.3484643819639835e-06,
      "loss": 0.0079,
      "step": 2342440
    },
    {
      "epoch": 3.833487166395004,
      "grad_norm": 0.43816670775413513,
      "learning_rate": 2.3483984897504663e-06,
      "loss": 0.0154,
      "step": 2342460
    },
    {
      "epoch": 3.8335198968336575,
      "grad_norm": 0.14057587087154388,
      "learning_rate": 2.348332597536949e-06,
      "loss": 0.0115,
      "step": 2342480
    },
    {
      "epoch": 3.8335526272723106,
      "grad_norm": 0.11400356143712997,
      "learning_rate": 2.348266705323432e-06,
      "loss": 0.0069,
      "step": 2342500
    },
    {
      "epoch": 3.833585357710964,
      "grad_norm": 0.5482026934623718,
      "learning_rate": 2.348200813109915e-06,
      "loss": 0.0141,
      "step": 2342520
    },
    {
      "epoch": 3.8336180881496174,
      "grad_norm": 0.16266609728336334,
      "learning_rate": 2.348134920896398e-06,
      "loss": 0.0063,
      "step": 2342540
    },
    {
      "epoch": 3.8336508185882705,
      "grad_norm": 0.518061637878418,
      "learning_rate": 2.348069028682881e-06,
      "loss": 0.0126,
      "step": 2342560
    },
    {
      "epoch": 3.833683549026924,
      "grad_norm": 0.2628377676010132,
      "learning_rate": 2.3480031364693636e-06,
      "loss": 0.0142,
      "step": 2342580
    },
    {
      "epoch": 3.8337162794655772,
      "grad_norm": 0.5414116382598877,
      "learning_rate": 2.3479372442558467e-06,
      "loss": 0.0075,
      "step": 2342600
    },
    {
      "epoch": 3.833749009904231,
      "grad_norm": 0.16832280158996582,
      "learning_rate": 2.3478713520423295e-06,
      "loss": 0.0138,
      "step": 2342620
    },
    {
      "epoch": 3.833781740342884,
      "grad_norm": 0.13223060965538025,
      "learning_rate": 2.347805459828812e-06,
      "loss": 0.0126,
      "step": 2342640
    },
    {
      "epoch": 3.8338144707815376,
      "grad_norm": 0.25949645042419434,
      "learning_rate": 2.347739567615295e-06,
      "loss": 0.0099,
      "step": 2342660
    },
    {
      "epoch": 3.8338472012201907,
      "grad_norm": 0.20902670919895172,
      "learning_rate": 2.3476736754017777e-06,
      "loss": 0.0091,
      "step": 2342680
    },
    {
      "epoch": 3.833879931658844,
      "grad_norm": 0.2355365753173828,
      "learning_rate": 2.347607783188261e-06,
      "loss": 0.0093,
      "step": 2342700
    },
    {
      "epoch": 3.8339126620974975,
      "grad_norm": 0.050533000379800797,
      "learning_rate": 2.347541890974744e-06,
      "loss": 0.0073,
      "step": 2342720
    },
    {
      "epoch": 3.8339453925361506,
      "grad_norm": 0.27623480558395386,
      "learning_rate": 2.3474759987612268e-06,
      "loss": 0.0112,
      "step": 2342740
    },
    {
      "epoch": 3.8339781229748042,
      "grad_norm": 0.05520445480942726,
      "learning_rate": 2.3474101065477095e-06,
      "loss": 0.0095,
      "step": 2342760
    },
    {
      "epoch": 3.8340108534134574,
      "grad_norm": 0.327987402677536,
      "learning_rate": 2.3473442143341922e-06,
      "loss": 0.012,
      "step": 2342780
    },
    {
      "epoch": 3.834043583852111,
      "grad_norm": 0.2633536159992218,
      "learning_rate": 2.3472783221206754e-06,
      "loss": 0.012,
      "step": 2342800
    },
    {
      "epoch": 3.834076314290764,
      "grad_norm": 0.3154972791671753,
      "learning_rate": 2.347212429907158e-06,
      "loss": 0.0172,
      "step": 2342820
    },
    {
      "epoch": 3.8341090447294173,
      "grad_norm": 0.15052564442157745,
      "learning_rate": 2.347146537693641e-06,
      "loss": 0.0114,
      "step": 2342840
    },
    {
      "epoch": 3.834141775168071,
      "grad_norm": 0.14115172624588013,
      "learning_rate": 2.3470806454801236e-06,
      "loss": 0.0098,
      "step": 2342860
    },
    {
      "epoch": 3.834174505606724,
      "grad_norm": 0.3671582341194153,
      "learning_rate": 2.3470147532666064e-06,
      "loss": 0.0099,
      "step": 2342880
    },
    {
      "epoch": 3.8342072360453776,
      "grad_norm": 0.21978239715099335,
      "learning_rate": 2.3469488610530895e-06,
      "loss": 0.0109,
      "step": 2342900
    },
    {
      "epoch": 3.8342399664840308,
      "grad_norm": 0.10080379992723465,
      "learning_rate": 2.3468829688395727e-06,
      "loss": 0.0074,
      "step": 2342920
    },
    {
      "epoch": 3.8342726969226844,
      "grad_norm": 0.45642513036727905,
      "learning_rate": 2.3468170766260554e-06,
      "loss": 0.0112,
      "step": 2342940
    },
    {
      "epoch": 3.8343054273613375,
      "grad_norm": 0.24488316476345062,
      "learning_rate": 2.346751184412538e-06,
      "loss": 0.0102,
      "step": 2342960
    },
    {
      "epoch": 3.8343381577999907,
      "grad_norm": 0.6378955841064453,
      "learning_rate": 2.346685292199021e-06,
      "loss": 0.0167,
      "step": 2342980
    },
    {
      "epoch": 3.8343708882386442,
      "grad_norm": 0.26497069001197815,
      "learning_rate": 2.346619399985504e-06,
      "loss": 0.0119,
      "step": 2343000
    },
    {
      "epoch": 3.8344036186772974,
      "grad_norm": 0.47532689571380615,
      "learning_rate": 2.346553507771987e-06,
      "loss": 0.008,
      "step": 2343020
    },
    {
      "epoch": 3.834436349115951,
      "grad_norm": 0.09728314727544785,
      "learning_rate": 2.3464876155584695e-06,
      "loss": 0.0065,
      "step": 2343040
    },
    {
      "epoch": 3.834469079554604,
      "grad_norm": 0.17617245018482208,
      "learning_rate": 2.3464217233449523e-06,
      "loss": 0.0085,
      "step": 2343060
    },
    {
      "epoch": 3.8345018099932577,
      "grad_norm": 0.1095350980758667,
      "learning_rate": 2.3463558311314354e-06,
      "loss": 0.0123,
      "step": 2343080
    },
    {
      "epoch": 3.834534540431911,
      "grad_norm": 0.321378618478775,
      "learning_rate": 2.346289938917918e-06,
      "loss": 0.0078,
      "step": 2343100
    },
    {
      "epoch": 3.834567270870564,
      "grad_norm": 0.17980961501598358,
      "learning_rate": 2.3462240467044013e-06,
      "loss": 0.0092,
      "step": 2343120
    },
    {
      "epoch": 3.8346000013092176,
      "grad_norm": 0.09864170849323273,
      "learning_rate": 2.346158154490884e-06,
      "loss": 0.0117,
      "step": 2343140
    },
    {
      "epoch": 3.834632731747871,
      "grad_norm": 0.15081018209457397,
      "learning_rate": 2.346092262277367e-06,
      "loss": 0.008,
      "step": 2343160
    },
    {
      "epoch": 3.8346654621865244,
      "grad_norm": 0.1703924983739853,
      "learning_rate": 2.3460263700638496e-06,
      "loss": 0.0102,
      "step": 2343180
    },
    {
      "epoch": 3.8346981926251775,
      "grad_norm": 0.28538990020751953,
      "learning_rate": 2.3459604778503327e-06,
      "loss": 0.0098,
      "step": 2343200
    },
    {
      "epoch": 3.834730923063831,
      "grad_norm": 0.2692256271839142,
      "learning_rate": 2.3458945856368155e-06,
      "loss": 0.016,
      "step": 2343220
    },
    {
      "epoch": 3.8347636535024843,
      "grad_norm": 0.2848101556301117,
      "learning_rate": 2.3458286934232986e-06,
      "loss": 0.0108,
      "step": 2343240
    },
    {
      "epoch": 3.8347963839411374,
      "grad_norm": 0.13630183041095734,
      "learning_rate": 2.3457628012097814e-06,
      "loss": 0.0075,
      "step": 2343260
    },
    {
      "epoch": 3.834829114379791,
      "grad_norm": 0.18901944160461426,
      "learning_rate": 2.345696908996264e-06,
      "loss": 0.0124,
      "step": 2343280
    },
    {
      "epoch": 3.834861844818444,
      "grad_norm": 0.27963492274284363,
      "learning_rate": 2.345631016782747e-06,
      "loss": 0.0082,
      "step": 2343300
    },
    {
      "epoch": 3.8348945752570978,
      "grad_norm": 0.12088040262460709,
      "learning_rate": 2.34556512456923e-06,
      "loss": 0.0111,
      "step": 2343320
    },
    {
      "epoch": 3.834927305695751,
      "grad_norm": 0.7474560737609863,
      "learning_rate": 2.3454992323557128e-06,
      "loss": 0.0094,
      "step": 2343340
    },
    {
      "epoch": 3.8349600361344045,
      "grad_norm": 0.17696627974510193,
      "learning_rate": 2.3454333401421955e-06,
      "loss": 0.013,
      "step": 2343360
    },
    {
      "epoch": 3.8349927665730577,
      "grad_norm": 0.7542131543159485,
      "learning_rate": 2.3453674479286782e-06,
      "loss": 0.0081,
      "step": 2343380
    },
    {
      "epoch": 3.835025497011711,
      "grad_norm": 0.05258449912071228,
      "learning_rate": 2.3453015557151614e-06,
      "loss": 0.0122,
      "step": 2343400
    },
    {
      "epoch": 3.8350582274503644,
      "grad_norm": 0.23002450168132782,
      "learning_rate": 2.345235663501644e-06,
      "loss": 0.0094,
      "step": 2343420
    },
    {
      "epoch": 3.8350909578890175,
      "grad_norm": 0.12260955572128296,
      "learning_rate": 2.3451697712881273e-06,
      "loss": 0.0122,
      "step": 2343440
    },
    {
      "epoch": 3.8351236883276707,
      "grad_norm": 0.23625430464744568,
      "learning_rate": 2.34510387907461e-06,
      "loss": 0.0088,
      "step": 2343460
    },
    {
      "epoch": 3.8351564187663243,
      "grad_norm": 0.059696443378925323,
      "learning_rate": 2.3450379868610928e-06,
      "loss": 0.015,
      "step": 2343480
    },
    {
      "epoch": 3.835189149204978,
      "grad_norm": 0.9967877864837646,
      "learning_rate": 2.3449720946475755e-06,
      "loss": 0.0094,
      "step": 2343500
    },
    {
      "epoch": 3.835221879643631,
      "grad_norm": 0.6865822076797485,
      "learning_rate": 2.3449062024340587e-06,
      "loss": 0.0108,
      "step": 2343520
    },
    {
      "epoch": 3.835254610082284,
      "grad_norm": 0.11599498987197876,
      "learning_rate": 2.3448403102205414e-06,
      "loss": 0.0159,
      "step": 2343540
    },
    {
      "epoch": 3.835287340520938,
      "grad_norm": 0.3078138828277588,
      "learning_rate": 2.344774418007024e-06,
      "loss": 0.0147,
      "step": 2343560
    },
    {
      "epoch": 3.835320070959591,
      "grad_norm": 0.13966628909111023,
      "learning_rate": 2.344708525793507e-06,
      "loss": 0.0141,
      "step": 2343580
    },
    {
      "epoch": 3.835352801398244,
      "grad_norm": 0.35073122382164,
      "learning_rate": 2.34464263357999e-06,
      "loss": 0.0099,
      "step": 2343600
    },
    {
      "epoch": 3.8353855318368977,
      "grad_norm": 0.3369213938713074,
      "learning_rate": 2.344576741366473e-06,
      "loss": 0.0124,
      "step": 2343620
    },
    {
      "epoch": 3.8354182622755513,
      "grad_norm": 0.2145189493894577,
      "learning_rate": 2.344510849152956e-06,
      "loss": 0.0144,
      "step": 2343640
    },
    {
      "epoch": 3.8354509927142044,
      "grad_norm": 0.5026580691337585,
      "learning_rate": 2.3444449569394387e-06,
      "loss": 0.0124,
      "step": 2343660
    },
    {
      "epoch": 3.8354837231528576,
      "grad_norm": 0.2803143262863159,
      "learning_rate": 2.3443790647259214e-06,
      "loss": 0.0114,
      "step": 2343680
    },
    {
      "epoch": 3.835516453591511,
      "grad_norm": 0.15687653422355652,
      "learning_rate": 2.344313172512404e-06,
      "loss": 0.0107,
      "step": 2343700
    },
    {
      "epoch": 3.8355491840301643,
      "grad_norm": 0.17655909061431885,
      "learning_rate": 2.3442472802988874e-06,
      "loss": 0.0088,
      "step": 2343720
    },
    {
      "epoch": 3.8355819144688175,
      "grad_norm": 0.307750940322876,
      "learning_rate": 2.34418138808537e-06,
      "loss": 0.0104,
      "step": 2343740
    },
    {
      "epoch": 3.835614644907471,
      "grad_norm": 0.08635623008012772,
      "learning_rate": 2.344115495871853e-06,
      "loss": 0.0113,
      "step": 2343760
    },
    {
      "epoch": 3.8356473753461247,
      "grad_norm": 0.3849845826625824,
      "learning_rate": 2.344049603658336e-06,
      "loss": 0.0118,
      "step": 2343780
    },
    {
      "epoch": 3.835680105784778,
      "grad_norm": 0.14377188682556152,
      "learning_rate": 2.3439837114448187e-06,
      "loss": 0.0134,
      "step": 2343800
    },
    {
      "epoch": 3.835712836223431,
      "grad_norm": 0.4804007112979889,
      "learning_rate": 2.3439178192313015e-06,
      "loss": 0.0117,
      "step": 2343820
    },
    {
      "epoch": 3.8357455666620845,
      "grad_norm": 0.2003120630979538,
      "learning_rate": 2.3438519270177846e-06,
      "loss": 0.0119,
      "step": 2343840
    },
    {
      "epoch": 3.8357782971007377,
      "grad_norm": 0.7944617867469788,
      "learning_rate": 2.3437860348042674e-06,
      "loss": 0.0113,
      "step": 2343860
    },
    {
      "epoch": 3.835811027539391,
      "grad_norm": 0.2048977166414261,
      "learning_rate": 2.34372014259075e-06,
      "loss": 0.0092,
      "step": 2343880
    },
    {
      "epoch": 3.8358437579780444,
      "grad_norm": 0.228897824883461,
      "learning_rate": 2.343654250377233e-06,
      "loss": 0.0133,
      "step": 2343900
    },
    {
      "epoch": 3.835876488416698,
      "grad_norm": 0.33609580993652344,
      "learning_rate": 2.343588358163716e-06,
      "loss": 0.0079,
      "step": 2343920
    },
    {
      "epoch": 3.835909218855351,
      "grad_norm": 0.2692778408527374,
      "learning_rate": 2.3435224659501988e-06,
      "loss": 0.0093,
      "step": 2343940
    },
    {
      "epoch": 3.8359419492940043,
      "grad_norm": 0.5421761274337769,
      "learning_rate": 2.343456573736682e-06,
      "loss": 0.012,
      "step": 2343960
    },
    {
      "epoch": 3.835974679732658,
      "grad_norm": 0.4725610911846161,
      "learning_rate": 2.3433906815231647e-06,
      "loss": 0.0133,
      "step": 2343980
    },
    {
      "epoch": 3.836007410171311,
      "grad_norm": 0.19314630329608917,
      "learning_rate": 2.3433247893096474e-06,
      "loss": 0.0119,
      "step": 2344000
    },
    {
      "epoch": 3.8360401406099642,
      "grad_norm": 0.2313603162765503,
      "learning_rate": 2.3432588970961306e-06,
      "loss": 0.0165,
      "step": 2344020
    },
    {
      "epoch": 3.836072871048618,
      "grad_norm": 0.09745044261217117,
      "learning_rate": 2.3431930048826133e-06,
      "loss": 0.0093,
      "step": 2344040
    },
    {
      "epoch": 3.836105601487271,
      "grad_norm": 0.07294022291898727,
      "learning_rate": 2.343127112669096e-06,
      "loss": 0.0074,
      "step": 2344060
    },
    {
      "epoch": 3.8361383319259246,
      "grad_norm": 0.44611120223999023,
      "learning_rate": 2.3430612204555788e-06,
      "loss": 0.0156,
      "step": 2344080
    },
    {
      "epoch": 3.8361710623645777,
      "grad_norm": 0.22424188256263733,
      "learning_rate": 2.3429953282420615e-06,
      "loss": 0.0128,
      "step": 2344100
    },
    {
      "epoch": 3.8362037928032313,
      "grad_norm": 0.11122570931911469,
      "learning_rate": 2.3429294360285447e-06,
      "loss": 0.0087,
      "step": 2344120
    },
    {
      "epoch": 3.8362365232418845,
      "grad_norm": 0.16594438254833221,
      "learning_rate": 2.342863543815028e-06,
      "loss": 0.0081,
      "step": 2344140
    },
    {
      "epoch": 3.8362692536805376,
      "grad_norm": 0.11915302276611328,
      "learning_rate": 2.3427976516015106e-06,
      "loss": 0.0091,
      "step": 2344160
    },
    {
      "epoch": 3.836301984119191,
      "grad_norm": 0.07323133945465088,
      "learning_rate": 2.3427317593879933e-06,
      "loss": 0.0083,
      "step": 2344180
    },
    {
      "epoch": 3.8363347145578444,
      "grad_norm": 0.1625565141439438,
      "learning_rate": 2.342665867174476e-06,
      "loss": 0.0094,
      "step": 2344200
    },
    {
      "epoch": 3.836367444996498,
      "grad_norm": 0.29489758610725403,
      "learning_rate": 2.3425999749609592e-06,
      "loss": 0.0122,
      "step": 2344220
    },
    {
      "epoch": 3.836400175435151,
      "grad_norm": 0.167471244931221,
      "learning_rate": 2.342534082747442e-06,
      "loss": 0.0097,
      "step": 2344240
    },
    {
      "epoch": 3.8364329058738047,
      "grad_norm": 0.09121054410934448,
      "learning_rate": 2.3424681905339247e-06,
      "loss": 0.0099,
      "step": 2344260
    },
    {
      "epoch": 3.836465636312458,
      "grad_norm": 0.05697525665163994,
      "learning_rate": 2.3424022983204075e-06,
      "loss": 0.007,
      "step": 2344280
    },
    {
      "epoch": 3.836498366751111,
      "grad_norm": 0.06923035532236099,
      "learning_rate": 2.3423364061068906e-06,
      "loss": 0.0081,
      "step": 2344300
    },
    {
      "epoch": 3.8365310971897646,
      "grad_norm": 0.32914265990257263,
      "learning_rate": 2.3422705138933734e-06,
      "loss": 0.0109,
      "step": 2344320
    },
    {
      "epoch": 3.8365638276284177,
      "grad_norm": 0.5630251169204712,
      "learning_rate": 2.3422046216798565e-06,
      "loss": 0.0109,
      "step": 2344340
    },
    {
      "epoch": 3.8365965580670713,
      "grad_norm": 0.09115072339773178,
      "learning_rate": 2.3421387294663393e-06,
      "loss": 0.0095,
      "step": 2344360
    },
    {
      "epoch": 3.8366292885057245,
      "grad_norm": 0.15248633921146393,
      "learning_rate": 2.342072837252822e-06,
      "loss": 0.0113,
      "step": 2344380
    },
    {
      "epoch": 3.836662018944378,
      "grad_norm": 1.0007096529006958,
      "learning_rate": 2.3420069450393047e-06,
      "loss": 0.014,
      "step": 2344400
    },
    {
      "epoch": 3.8366947493830312,
      "grad_norm": 0.08545225113630295,
      "learning_rate": 2.341941052825788e-06,
      "loss": 0.0095,
      "step": 2344420
    },
    {
      "epoch": 3.8367274798216844,
      "grad_norm": 0.49929454922676086,
      "learning_rate": 2.3418751606122706e-06,
      "loss": 0.0142,
      "step": 2344440
    },
    {
      "epoch": 3.836760210260338,
      "grad_norm": 0.09882274270057678,
      "learning_rate": 2.3418092683987534e-06,
      "loss": 0.0101,
      "step": 2344460
    },
    {
      "epoch": 3.836792940698991,
      "grad_norm": 0.2813211679458618,
      "learning_rate": 2.3417433761852365e-06,
      "loss": 0.0068,
      "step": 2344480
    },
    {
      "epoch": 3.8368256711376447,
      "grad_norm": 0.3391604423522949,
      "learning_rate": 2.3416774839717193e-06,
      "loss": 0.0134,
      "step": 2344500
    },
    {
      "epoch": 3.836858401576298,
      "grad_norm": 0.19566455483436584,
      "learning_rate": 2.341611591758202e-06,
      "loss": 0.016,
      "step": 2344520
    },
    {
      "epoch": 3.8368911320149515,
      "grad_norm": 0.21073809266090393,
      "learning_rate": 2.341545699544685e-06,
      "loss": 0.0107,
      "step": 2344540
    },
    {
      "epoch": 3.8369238624536046,
      "grad_norm": 0.17769983410835266,
      "learning_rate": 2.341479807331168e-06,
      "loss": 0.0113,
      "step": 2344560
    },
    {
      "epoch": 3.8369565928922578,
      "grad_norm": 0.4882924556732178,
      "learning_rate": 2.3414139151176507e-06,
      "loss": 0.009,
      "step": 2344580
    },
    {
      "epoch": 3.8369893233309114,
      "grad_norm": 0.05048339068889618,
      "learning_rate": 2.3413480229041334e-06,
      "loss": 0.011,
      "step": 2344600
    },
    {
      "epoch": 3.8370220537695645,
      "grad_norm": 0.04312612861394882,
      "learning_rate": 2.3412821306906166e-06,
      "loss": 0.0106,
      "step": 2344620
    },
    {
      "epoch": 3.837054784208218,
      "grad_norm": 0.1693984866142273,
      "learning_rate": 2.3412162384770993e-06,
      "loss": 0.016,
      "step": 2344640
    },
    {
      "epoch": 3.8370875146468713,
      "grad_norm": 0.1372023969888687,
      "learning_rate": 2.3411503462635825e-06,
      "loss": 0.0103,
      "step": 2344660
    },
    {
      "epoch": 3.837120245085525,
      "grad_norm": 0.4227278530597687,
      "learning_rate": 2.3410844540500652e-06,
      "loss": 0.0112,
      "step": 2344680
    },
    {
      "epoch": 3.837152975524178,
      "grad_norm": 0.4759845733642578,
      "learning_rate": 2.341018561836548e-06,
      "loss": 0.0108,
      "step": 2344700
    },
    {
      "epoch": 3.837185705962831,
      "grad_norm": 0.5921774506568909,
      "learning_rate": 2.3409526696230307e-06,
      "loss": 0.0116,
      "step": 2344720
    },
    {
      "epoch": 3.8372184364014847,
      "grad_norm": 0.23922698199748993,
      "learning_rate": 2.340886777409514e-06,
      "loss": 0.0158,
      "step": 2344740
    },
    {
      "epoch": 3.837251166840138,
      "grad_norm": 0.452132910490036,
      "learning_rate": 2.3408208851959966e-06,
      "loss": 0.0138,
      "step": 2344760
    },
    {
      "epoch": 3.8372838972787915,
      "grad_norm": 0.48049572110176086,
      "learning_rate": 2.3407549929824793e-06,
      "loss": 0.0103,
      "step": 2344780
    },
    {
      "epoch": 3.8373166277174446,
      "grad_norm": 0.1978480964899063,
      "learning_rate": 2.340689100768962e-06,
      "loss": 0.008,
      "step": 2344800
    },
    {
      "epoch": 3.8373493581560982,
      "grad_norm": 0.4252942204475403,
      "learning_rate": 2.3406232085554452e-06,
      "loss": 0.0117,
      "step": 2344820
    },
    {
      "epoch": 3.8373820885947514,
      "grad_norm": 0.2600593864917755,
      "learning_rate": 2.340557316341928e-06,
      "loss": 0.019,
      "step": 2344840
    },
    {
      "epoch": 3.8374148190334045,
      "grad_norm": 0.14943213760852814,
      "learning_rate": 2.340491424128411e-06,
      "loss": 0.0088,
      "step": 2344860
    },
    {
      "epoch": 3.837447549472058,
      "grad_norm": 0.21801526844501495,
      "learning_rate": 2.340425531914894e-06,
      "loss": 0.0174,
      "step": 2344880
    },
    {
      "epoch": 3.8374802799107113,
      "grad_norm": 0.36449912190437317,
      "learning_rate": 2.3403596397013766e-06,
      "loss": 0.012,
      "step": 2344900
    },
    {
      "epoch": 3.8375130103493644,
      "grad_norm": 0.0909154862165451,
      "learning_rate": 2.3402937474878594e-06,
      "loss": 0.0109,
      "step": 2344920
    },
    {
      "epoch": 3.837545740788018,
      "grad_norm": 0.3464278280735016,
      "learning_rate": 2.3402278552743425e-06,
      "loss": 0.0121,
      "step": 2344940
    },
    {
      "epoch": 3.8375784712266716,
      "grad_norm": 0.2147974967956543,
      "learning_rate": 2.3401619630608253e-06,
      "loss": 0.0136,
      "step": 2344960
    },
    {
      "epoch": 3.8376112016653248,
      "grad_norm": 0.3099048137664795,
      "learning_rate": 2.340096070847308e-06,
      "loss": 0.0111,
      "step": 2344980
    },
    {
      "epoch": 3.837643932103978,
      "grad_norm": 0.2716480791568756,
      "learning_rate": 2.340030178633791e-06,
      "loss": 0.0112,
      "step": 2345000
    },
    {
      "epoch": 3.8376766625426315,
      "grad_norm": 0.20383486151695251,
      "learning_rate": 2.339964286420274e-06,
      "loss": 0.0132,
      "step": 2345020
    },
    {
      "epoch": 3.8377093929812847,
      "grad_norm": 0.2233290672302246,
      "learning_rate": 2.3398983942067566e-06,
      "loss": 0.0112,
      "step": 2345040
    },
    {
      "epoch": 3.837742123419938,
      "grad_norm": 0.2844841778278351,
      "learning_rate": 2.33983250199324e-06,
      "loss": 0.0115,
      "step": 2345060
    },
    {
      "epoch": 3.8377748538585914,
      "grad_norm": 0.449316143989563,
      "learning_rate": 2.3397666097797225e-06,
      "loss": 0.0149,
      "step": 2345080
    },
    {
      "epoch": 3.837807584297245,
      "grad_norm": 0.24812453985214233,
      "learning_rate": 2.3397007175662053e-06,
      "loss": 0.0127,
      "step": 2345100
    },
    {
      "epoch": 3.837840314735898,
      "grad_norm": 0.2168068140745163,
      "learning_rate": 2.339634825352688e-06,
      "loss": 0.009,
      "step": 2345120
    },
    {
      "epoch": 3.8378730451745513,
      "grad_norm": 0.13767282664775848,
      "learning_rate": 2.339568933139171e-06,
      "loss": 0.0097,
      "step": 2345140
    },
    {
      "epoch": 3.837905775613205,
      "grad_norm": 0.129092276096344,
      "learning_rate": 2.339503040925654e-06,
      "loss": 0.0076,
      "step": 2345160
    },
    {
      "epoch": 3.837938506051858,
      "grad_norm": 0.25535163283348083,
      "learning_rate": 2.339437148712137e-06,
      "loss": 0.0077,
      "step": 2345180
    },
    {
      "epoch": 3.837971236490511,
      "grad_norm": 0.3360174298286438,
      "learning_rate": 2.33937125649862e-06,
      "loss": 0.0132,
      "step": 2345200
    },
    {
      "epoch": 3.838003966929165,
      "grad_norm": 0.3324722945690155,
      "learning_rate": 2.3393053642851026e-06,
      "loss": 0.0107,
      "step": 2345220
    },
    {
      "epoch": 3.8380366973678184,
      "grad_norm": 0.4833487570285797,
      "learning_rate": 2.3392394720715857e-06,
      "loss": 0.0139,
      "step": 2345240
    },
    {
      "epoch": 3.8380694278064715,
      "grad_norm": 0.1603999137878418,
      "learning_rate": 2.3391735798580685e-06,
      "loss": 0.0095,
      "step": 2345260
    },
    {
      "epoch": 3.8381021582451247,
      "grad_norm": 0.20005643367767334,
      "learning_rate": 2.3391076876445512e-06,
      "loss": 0.017,
      "step": 2345280
    },
    {
      "epoch": 3.8381348886837783,
      "grad_norm": 0.29900071024894714,
      "learning_rate": 2.339041795431034e-06,
      "loss": 0.008,
      "step": 2345300
    },
    {
      "epoch": 3.8381676191224314,
      "grad_norm": 0.36190325021743774,
      "learning_rate": 2.3389759032175167e-06,
      "loss": 0.0101,
      "step": 2345320
    },
    {
      "epoch": 3.8382003495610846,
      "grad_norm": 0.09243007004261017,
      "learning_rate": 2.338910011004e-06,
      "loss": 0.0129,
      "step": 2345340
    },
    {
      "epoch": 3.838233079999738,
      "grad_norm": 0.12444815784692764,
      "learning_rate": 2.338844118790483e-06,
      "loss": 0.014,
      "step": 2345360
    },
    {
      "epoch": 3.8382658104383918,
      "grad_norm": 0.3749275207519531,
      "learning_rate": 2.3387782265769658e-06,
      "loss": 0.0108,
      "step": 2345380
    },
    {
      "epoch": 3.838298540877045,
      "grad_norm": 0.13762542605400085,
      "learning_rate": 2.3387123343634485e-06,
      "loss": 0.0108,
      "step": 2345400
    },
    {
      "epoch": 3.838331271315698,
      "grad_norm": 0.3957323729991913,
      "learning_rate": 2.3386464421499312e-06,
      "loss": 0.0101,
      "step": 2345420
    },
    {
      "epoch": 3.8383640017543517,
      "grad_norm": 0.30042916536331177,
      "learning_rate": 2.3385805499364144e-06,
      "loss": 0.0083,
      "step": 2345440
    },
    {
      "epoch": 3.838396732193005,
      "grad_norm": 0.7174088954925537,
      "learning_rate": 2.338514657722897e-06,
      "loss": 0.0141,
      "step": 2345460
    },
    {
      "epoch": 3.838429462631658,
      "grad_norm": 0.05337674543261528,
      "learning_rate": 2.33844876550938e-06,
      "loss": 0.0109,
      "step": 2345480
    },
    {
      "epoch": 3.8384621930703116,
      "grad_norm": 0.6621538996696472,
      "learning_rate": 2.3383828732958626e-06,
      "loss": 0.0098,
      "step": 2345500
    },
    {
      "epoch": 3.8384949235089647,
      "grad_norm": 0.07217010855674744,
      "learning_rate": 2.3383169810823454e-06,
      "loss": 0.0068,
      "step": 2345520
    },
    {
      "epoch": 3.8385276539476183,
      "grad_norm": 0.2149943709373474,
      "learning_rate": 2.3382510888688285e-06,
      "loss": 0.0098,
      "step": 2345540
    },
    {
      "epoch": 3.8385603843862715,
      "grad_norm": 0.22472906112670898,
      "learning_rate": 2.3381851966553117e-06,
      "loss": 0.0085,
      "step": 2345560
    },
    {
      "epoch": 3.838593114824925,
      "grad_norm": 0.364615797996521,
      "learning_rate": 2.3381193044417944e-06,
      "loss": 0.0102,
      "step": 2345580
    },
    {
      "epoch": 3.838625845263578,
      "grad_norm": 0.20275799930095673,
      "learning_rate": 2.338053412228277e-06,
      "loss": 0.0116,
      "step": 2345600
    },
    {
      "epoch": 3.8386585757022313,
      "grad_norm": 0.21721522510051727,
      "learning_rate": 2.33798752001476e-06,
      "loss": 0.0122,
      "step": 2345620
    },
    {
      "epoch": 3.838691306140885,
      "grad_norm": 0.2581877112388611,
      "learning_rate": 2.337921627801243e-06,
      "loss": 0.0088,
      "step": 2345640
    },
    {
      "epoch": 3.838724036579538,
      "grad_norm": 0.5828729271888733,
      "learning_rate": 2.337855735587726e-06,
      "loss": 0.0108,
      "step": 2345660
    },
    {
      "epoch": 3.8387567670181917,
      "grad_norm": 0.3026449978351593,
      "learning_rate": 2.3377898433742086e-06,
      "loss": 0.0102,
      "step": 2345680
    },
    {
      "epoch": 3.838789497456845,
      "grad_norm": 0.08183356374502182,
      "learning_rate": 2.3377239511606913e-06,
      "loss": 0.009,
      "step": 2345700
    },
    {
      "epoch": 3.8388222278954984,
      "grad_norm": 0.13359548151493073,
      "learning_rate": 2.3376580589471745e-06,
      "loss": 0.0098,
      "step": 2345720
    },
    {
      "epoch": 3.8388549583341516,
      "grad_norm": 0.4427507221698761,
      "learning_rate": 2.337592166733657e-06,
      "loss": 0.0126,
      "step": 2345740
    },
    {
      "epoch": 3.8388876887728047,
      "grad_norm": 0.24916958808898926,
      "learning_rate": 2.3375262745201404e-06,
      "loss": 0.0094,
      "step": 2345760
    },
    {
      "epoch": 3.8389204192114583,
      "grad_norm": 0.18313391506671906,
      "learning_rate": 2.337460382306623e-06,
      "loss": 0.014,
      "step": 2345780
    },
    {
      "epoch": 3.8389531496501115,
      "grad_norm": 0.3412454128265381,
      "learning_rate": 2.337394490093106e-06,
      "loss": 0.0105,
      "step": 2345800
    },
    {
      "epoch": 3.838985880088765,
      "grad_norm": 0.24537476897239685,
      "learning_rate": 2.3373285978795886e-06,
      "loss": 0.0109,
      "step": 2345820
    },
    {
      "epoch": 3.839018610527418,
      "grad_norm": 0.21222583949565887,
      "learning_rate": 2.3372627056660717e-06,
      "loss": 0.0104,
      "step": 2345840
    },
    {
      "epoch": 3.839051340966072,
      "grad_norm": 0.44828104972839355,
      "learning_rate": 2.3371968134525545e-06,
      "loss": 0.013,
      "step": 2345860
    },
    {
      "epoch": 3.839084071404725,
      "grad_norm": 0.3123267889022827,
      "learning_rate": 2.3371309212390376e-06,
      "loss": 0.0128,
      "step": 2345880
    },
    {
      "epoch": 3.839116801843378,
      "grad_norm": 0.27443981170654297,
      "learning_rate": 2.3370650290255204e-06,
      "loss": 0.0083,
      "step": 2345900
    },
    {
      "epoch": 3.8391495322820317,
      "grad_norm": 0.1662898063659668,
      "learning_rate": 2.336999136812003e-06,
      "loss": 0.0113,
      "step": 2345920
    },
    {
      "epoch": 3.839182262720685,
      "grad_norm": 0.49567148089408875,
      "learning_rate": 2.336933244598486e-06,
      "loss": 0.0173,
      "step": 2345940
    },
    {
      "epoch": 3.8392149931593385,
      "grad_norm": 0.07169947028160095,
      "learning_rate": 2.336867352384969e-06,
      "loss": 0.0151,
      "step": 2345960
    },
    {
      "epoch": 3.8392477235979916,
      "grad_norm": 0.3892870247364044,
      "learning_rate": 2.3368014601714518e-06,
      "loss": 0.0106,
      "step": 2345980
    },
    {
      "epoch": 3.839280454036645,
      "grad_norm": 0.09143586456775665,
      "learning_rate": 2.3367355679579345e-06,
      "loss": 0.0159,
      "step": 2346000
    },
    {
      "epoch": 3.8393131844752983,
      "grad_norm": 0.15176445245742798,
      "learning_rate": 2.3366696757444172e-06,
      "loss": 0.0101,
      "step": 2346020
    },
    {
      "epoch": 3.8393459149139515,
      "grad_norm": 0.35250890254974365,
      "learning_rate": 2.3366037835309004e-06,
      "loss": 0.0109,
      "step": 2346040
    },
    {
      "epoch": 3.839378645352605,
      "grad_norm": 0.15911126136779785,
      "learning_rate": 2.336537891317383e-06,
      "loss": 0.0082,
      "step": 2346060
    },
    {
      "epoch": 3.8394113757912582,
      "grad_norm": 0.21945849061012268,
      "learning_rate": 2.3364719991038663e-06,
      "loss": 0.0081,
      "step": 2346080
    },
    {
      "epoch": 3.839444106229912,
      "grad_norm": 0.2811615467071533,
      "learning_rate": 2.336406106890349e-06,
      "loss": 0.0102,
      "step": 2346100
    },
    {
      "epoch": 3.839476836668565,
      "grad_norm": 0.3897705376148224,
      "learning_rate": 2.336340214676832e-06,
      "loss": 0.0116,
      "step": 2346120
    },
    {
      "epoch": 3.8395095671072186,
      "grad_norm": 0.2640675902366638,
      "learning_rate": 2.3362743224633145e-06,
      "loss": 0.0052,
      "step": 2346140
    },
    {
      "epoch": 3.8395422975458717,
      "grad_norm": 0.04989226534962654,
      "learning_rate": 2.3362084302497977e-06,
      "loss": 0.0083,
      "step": 2346160
    },
    {
      "epoch": 3.839575027984525,
      "grad_norm": 0.21872079372406006,
      "learning_rate": 2.3361425380362804e-06,
      "loss": 0.0106,
      "step": 2346180
    },
    {
      "epoch": 3.8396077584231785,
      "grad_norm": 0.3444095253944397,
      "learning_rate": 2.336076645822763e-06,
      "loss": 0.0138,
      "step": 2346200
    },
    {
      "epoch": 3.8396404888618316,
      "grad_norm": 0.0870538055896759,
      "learning_rate": 2.336010753609246e-06,
      "loss": 0.0109,
      "step": 2346220
    },
    {
      "epoch": 3.839673219300485,
      "grad_norm": 0.2519396245479584,
      "learning_rate": 2.335944861395729e-06,
      "loss": 0.0132,
      "step": 2346240
    },
    {
      "epoch": 3.8397059497391384,
      "grad_norm": 0.17903728783130646,
      "learning_rate": 2.335878969182212e-06,
      "loss": 0.0117,
      "step": 2346260
    },
    {
      "epoch": 3.839738680177792,
      "grad_norm": 0.6563876867294312,
      "learning_rate": 2.335813076968695e-06,
      "loss": 0.0122,
      "step": 2346280
    },
    {
      "epoch": 3.839771410616445,
      "grad_norm": 0.17937806248664856,
      "learning_rate": 2.3357471847551777e-06,
      "loss": 0.0085,
      "step": 2346300
    },
    {
      "epoch": 3.8398041410550983,
      "grad_norm": 0.10829275101423264,
      "learning_rate": 2.3356812925416605e-06,
      "loss": 0.0093,
      "step": 2346320
    },
    {
      "epoch": 3.839836871493752,
      "grad_norm": 0.18966849148273468,
      "learning_rate": 2.335615400328143e-06,
      "loss": 0.0125,
      "step": 2346340
    },
    {
      "epoch": 3.839869601932405,
      "grad_norm": 0.2270561307668686,
      "learning_rate": 2.3355495081146264e-06,
      "loss": 0.0114,
      "step": 2346360
    },
    {
      "epoch": 3.8399023323710586,
      "grad_norm": 0.2182498276233673,
      "learning_rate": 2.335483615901109e-06,
      "loss": 0.0113,
      "step": 2346380
    },
    {
      "epoch": 3.8399350628097118,
      "grad_norm": 0.5273731350898743,
      "learning_rate": 2.335417723687592e-06,
      "loss": 0.0094,
      "step": 2346400
    },
    {
      "epoch": 3.8399677932483653,
      "grad_norm": 1.0944496393203735,
      "learning_rate": 2.335351831474075e-06,
      "loss": 0.0161,
      "step": 2346420
    },
    {
      "epoch": 3.8400005236870185,
      "grad_norm": 0.6255252957344055,
      "learning_rate": 2.3352859392605577e-06,
      "loss": 0.0113,
      "step": 2346440
    },
    {
      "epoch": 3.8400332541256716,
      "grad_norm": 0.18578951060771942,
      "learning_rate": 2.3352200470470405e-06,
      "loss": 0.015,
      "step": 2346460
    },
    {
      "epoch": 3.8400659845643252,
      "grad_norm": 0.22570165991783142,
      "learning_rate": 2.3351541548335236e-06,
      "loss": 0.0106,
      "step": 2346480
    },
    {
      "epoch": 3.8400987150029784,
      "grad_norm": 0.365617036819458,
      "learning_rate": 2.3350882626200064e-06,
      "loss": 0.0124,
      "step": 2346500
    },
    {
      "epoch": 3.8401314454416315,
      "grad_norm": 0.43209823966026306,
      "learning_rate": 2.335022370406489e-06,
      "loss": 0.0101,
      "step": 2346520
    },
    {
      "epoch": 3.840164175880285,
      "grad_norm": 0.5140618085861206,
      "learning_rate": 2.334956478192972e-06,
      "loss": 0.0177,
      "step": 2346540
    },
    {
      "epoch": 3.8401969063189387,
      "grad_norm": 0.07615546882152557,
      "learning_rate": 2.334890585979455e-06,
      "loss": 0.0099,
      "step": 2346560
    },
    {
      "epoch": 3.840229636757592,
      "grad_norm": 0.23380526900291443,
      "learning_rate": 2.3348246937659378e-06,
      "loss": 0.0107,
      "step": 2346580
    },
    {
      "epoch": 3.840262367196245,
      "grad_norm": 0.17088453471660614,
      "learning_rate": 2.334758801552421e-06,
      "loss": 0.01,
      "step": 2346600
    },
    {
      "epoch": 3.8402950976348986,
      "grad_norm": 0.08097454160451889,
      "learning_rate": 2.3346929093389037e-06,
      "loss": 0.01,
      "step": 2346620
    },
    {
      "epoch": 3.8403278280735518,
      "grad_norm": 0.1392771452665329,
      "learning_rate": 2.3346270171253864e-06,
      "loss": 0.0061,
      "step": 2346640
    },
    {
      "epoch": 3.840360558512205,
      "grad_norm": 0.34003329277038574,
      "learning_rate": 2.3345611249118696e-06,
      "loss": 0.0113,
      "step": 2346660
    },
    {
      "epoch": 3.8403932889508585,
      "grad_norm": 0.3204056918621063,
      "learning_rate": 2.3344952326983523e-06,
      "loss": 0.0118,
      "step": 2346680
    },
    {
      "epoch": 3.840426019389512,
      "grad_norm": 0.5469939112663269,
      "learning_rate": 2.334429340484835e-06,
      "loss": 0.0147,
      "step": 2346700
    },
    {
      "epoch": 3.8404587498281653,
      "grad_norm": 0.3870674967765808,
      "learning_rate": 2.334363448271318e-06,
      "loss": 0.013,
      "step": 2346720
    },
    {
      "epoch": 3.8404914802668184,
      "grad_norm": 0.2652936577796936,
      "learning_rate": 2.3342975560578005e-06,
      "loss": 0.0068,
      "step": 2346740
    },
    {
      "epoch": 3.840524210705472,
      "grad_norm": 0.49562323093414307,
      "learning_rate": 2.3342316638442837e-06,
      "loss": 0.0094,
      "step": 2346760
    },
    {
      "epoch": 3.840556941144125,
      "grad_norm": 0.22146254777908325,
      "learning_rate": 2.334165771630767e-06,
      "loss": 0.0129,
      "step": 2346780
    },
    {
      "epoch": 3.8405896715827783,
      "grad_norm": 0.16991287469863892,
      "learning_rate": 2.3340998794172496e-06,
      "loss": 0.0089,
      "step": 2346800
    },
    {
      "epoch": 3.840622402021432,
      "grad_norm": 0.2891785502433777,
      "learning_rate": 2.3340339872037323e-06,
      "loss": 0.0086,
      "step": 2346820
    },
    {
      "epoch": 3.8406551324600855,
      "grad_norm": 0.6172915101051331,
      "learning_rate": 2.333968094990215e-06,
      "loss": 0.0146,
      "step": 2346840
    },
    {
      "epoch": 3.8406878628987386,
      "grad_norm": 0.3914926052093506,
      "learning_rate": 2.3339022027766982e-06,
      "loss": 0.0082,
      "step": 2346860
    },
    {
      "epoch": 3.840720593337392,
      "grad_norm": 0.2545790374279022,
      "learning_rate": 2.333836310563181e-06,
      "loss": 0.0106,
      "step": 2346880
    },
    {
      "epoch": 3.8407533237760454,
      "grad_norm": 0.31337717175483704,
      "learning_rate": 2.3337704183496637e-06,
      "loss": 0.0094,
      "step": 2346900
    },
    {
      "epoch": 3.8407860542146985,
      "grad_norm": 0.261599600315094,
      "learning_rate": 2.3337045261361465e-06,
      "loss": 0.0102,
      "step": 2346920
    },
    {
      "epoch": 3.8408187846533517,
      "grad_norm": 0.4002510905265808,
      "learning_rate": 2.3336386339226296e-06,
      "loss": 0.0094,
      "step": 2346940
    },
    {
      "epoch": 3.8408515150920053,
      "grad_norm": 0.41764479875564575,
      "learning_rate": 2.3335727417091124e-06,
      "loss": 0.0104,
      "step": 2346960
    },
    {
      "epoch": 3.8408842455306584,
      "grad_norm": 0.14026975631713867,
      "learning_rate": 2.3335068494955955e-06,
      "loss": 0.0102,
      "step": 2346980
    },
    {
      "epoch": 3.840916975969312,
      "grad_norm": 0.515565812587738,
      "learning_rate": 2.3334409572820783e-06,
      "loss": 0.0142,
      "step": 2347000
    },
    {
      "epoch": 3.840949706407965,
      "grad_norm": 0.08325457572937012,
      "learning_rate": 2.333375065068561e-06,
      "loss": 0.0078,
      "step": 2347020
    },
    {
      "epoch": 3.8409824368466188,
      "grad_norm": 0.2576311528682709,
      "learning_rate": 2.3333091728550437e-06,
      "loss": 0.0081,
      "step": 2347040
    },
    {
      "epoch": 3.841015167285272,
      "grad_norm": 0.101401686668396,
      "learning_rate": 2.333243280641527e-06,
      "loss": 0.012,
      "step": 2347060
    },
    {
      "epoch": 3.841047897723925,
      "grad_norm": 0.6172102093696594,
      "learning_rate": 2.3331773884280097e-06,
      "loss": 0.0111,
      "step": 2347080
    },
    {
      "epoch": 3.8410806281625787,
      "grad_norm": 0.5476047992706299,
      "learning_rate": 2.3331114962144924e-06,
      "loss": 0.012,
      "step": 2347100
    },
    {
      "epoch": 3.841113358601232,
      "grad_norm": 0.2841273844242096,
      "learning_rate": 2.3330456040009756e-06,
      "loss": 0.0128,
      "step": 2347120
    },
    {
      "epoch": 3.8411460890398854,
      "grad_norm": 0.21068286895751953,
      "learning_rate": 2.3329797117874583e-06,
      "loss": 0.0105,
      "step": 2347140
    },
    {
      "epoch": 3.8411788194785386,
      "grad_norm": 0.4314272105693817,
      "learning_rate": 2.332913819573941e-06,
      "loss": 0.0111,
      "step": 2347160
    },
    {
      "epoch": 3.841211549917192,
      "grad_norm": 0.06970812380313873,
      "learning_rate": 2.332847927360424e-06,
      "loss": 0.0086,
      "step": 2347180
    },
    {
      "epoch": 3.8412442803558453,
      "grad_norm": 0.2670590877532959,
      "learning_rate": 2.332782035146907e-06,
      "loss": 0.0072,
      "step": 2347200
    },
    {
      "epoch": 3.8412770107944985,
      "grad_norm": 0.3796602785587311,
      "learning_rate": 2.3327161429333897e-06,
      "loss": 0.0111,
      "step": 2347220
    },
    {
      "epoch": 3.841309741233152,
      "grad_norm": 0.14513902366161346,
      "learning_rate": 2.3326502507198724e-06,
      "loss": 0.0063,
      "step": 2347240
    },
    {
      "epoch": 3.841342471671805,
      "grad_norm": 0.21106529235839844,
      "learning_rate": 2.3325843585063556e-06,
      "loss": 0.0103,
      "step": 2347260
    },
    {
      "epoch": 3.841375202110459,
      "grad_norm": 0.08566246181726456,
      "learning_rate": 2.3325184662928383e-06,
      "loss": 0.0121,
      "step": 2347280
    },
    {
      "epoch": 3.841407932549112,
      "grad_norm": 0.1331264227628708,
      "learning_rate": 2.3324525740793215e-06,
      "loss": 0.0074,
      "step": 2347300
    },
    {
      "epoch": 3.8414406629877655,
      "grad_norm": 0.13658730685710907,
      "learning_rate": 2.3323866818658042e-06,
      "loss": 0.0083,
      "step": 2347320
    },
    {
      "epoch": 3.8414733934264187,
      "grad_norm": 0.24403513967990875,
      "learning_rate": 2.332320789652287e-06,
      "loss": 0.0138,
      "step": 2347340
    },
    {
      "epoch": 3.841506123865072,
      "grad_norm": 0.208247572183609,
      "learning_rate": 2.3322548974387697e-06,
      "loss": 0.0136,
      "step": 2347360
    },
    {
      "epoch": 3.8415388543037254,
      "grad_norm": 0.6664013862609863,
      "learning_rate": 2.332189005225253e-06,
      "loss": 0.0145,
      "step": 2347380
    },
    {
      "epoch": 3.8415715847423786,
      "grad_norm": 0.42996132373809814,
      "learning_rate": 2.3321231130117356e-06,
      "loss": 0.0112,
      "step": 2347400
    },
    {
      "epoch": 3.841604315181032,
      "grad_norm": 0.29315847158432007,
      "learning_rate": 2.3320572207982183e-06,
      "loss": 0.0063,
      "step": 2347420
    },
    {
      "epoch": 3.8416370456196853,
      "grad_norm": 0.36779388785362244,
      "learning_rate": 2.331991328584701e-06,
      "loss": 0.0082,
      "step": 2347440
    },
    {
      "epoch": 3.841669776058339,
      "grad_norm": 0.38138702511787415,
      "learning_rate": 2.3319254363711842e-06,
      "loss": 0.0124,
      "step": 2347460
    },
    {
      "epoch": 3.841702506496992,
      "grad_norm": 0.4299682080745697,
      "learning_rate": 2.331859544157667e-06,
      "loss": 0.0081,
      "step": 2347480
    },
    {
      "epoch": 3.8417352369356452,
      "grad_norm": 0.23464438319206238,
      "learning_rate": 2.33179365194415e-06,
      "loss": 0.0151,
      "step": 2347500
    },
    {
      "epoch": 3.841767967374299,
      "grad_norm": 0.6354734301567078,
      "learning_rate": 2.331727759730633e-06,
      "loss": 0.0081,
      "step": 2347520
    },
    {
      "epoch": 3.841800697812952,
      "grad_norm": 0.27524593472480774,
      "learning_rate": 2.3316618675171156e-06,
      "loss": 0.0101,
      "step": 2347540
    },
    {
      "epoch": 3.8418334282516056,
      "grad_norm": 0.08356623351573944,
      "learning_rate": 2.3315959753035984e-06,
      "loss": 0.0071,
      "step": 2347560
    },
    {
      "epoch": 3.8418661586902587,
      "grad_norm": 0.1022053137421608,
      "learning_rate": 2.3315300830900815e-06,
      "loss": 0.0078,
      "step": 2347580
    },
    {
      "epoch": 3.8418988891289123,
      "grad_norm": 0.49453282356262207,
      "learning_rate": 2.3314641908765643e-06,
      "loss": 0.0119,
      "step": 2347600
    },
    {
      "epoch": 3.8419316195675655,
      "grad_norm": 0.3239281177520752,
      "learning_rate": 2.331398298663047e-06,
      "loss": 0.0099,
      "step": 2347620
    },
    {
      "epoch": 3.8419643500062186,
      "grad_norm": 0.638414740562439,
      "learning_rate": 2.33133240644953e-06,
      "loss": 0.0133,
      "step": 2347640
    },
    {
      "epoch": 3.841997080444872,
      "grad_norm": 0.15638281404972076,
      "learning_rate": 2.331266514236013e-06,
      "loss": 0.0104,
      "step": 2347660
    },
    {
      "epoch": 3.8420298108835254,
      "grad_norm": 0.3538954555988312,
      "learning_rate": 2.3312006220224957e-06,
      "loss": 0.008,
      "step": 2347680
    },
    {
      "epoch": 3.842062541322179,
      "grad_norm": 0.37903353571891785,
      "learning_rate": 2.331134729808979e-06,
      "loss": 0.0099,
      "step": 2347700
    },
    {
      "epoch": 3.842095271760832,
      "grad_norm": 0.29117241501808167,
      "learning_rate": 2.3310688375954616e-06,
      "loss": 0.0126,
      "step": 2347720
    },
    {
      "epoch": 3.8421280021994857,
      "grad_norm": 0.12453649193048477,
      "learning_rate": 2.3310029453819443e-06,
      "loss": 0.0051,
      "step": 2347740
    },
    {
      "epoch": 3.842160732638139,
      "grad_norm": 0.12217504531145096,
      "learning_rate": 2.330937053168427e-06,
      "loss": 0.0094,
      "step": 2347760
    },
    {
      "epoch": 3.842193463076792,
      "grad_norm": 0.5230604410171509,
      "learning_rate": 2.33087116095491e-06,
      "loss": 0.0076,
      "step": 2347780
    },
    {
      "epoch": 3.8422261935154456,
      "grad_norm": 0.5242425203323364,
      "learning_rate": 2.330805268741393e-06,
      "loss": 0.0092,
      "step": 2347800
    },
    {
      "epoch": 3.8422589239540987,
      "grad_norm": 0.11563467979431152,
      "learning_rate": 2.330739376527876e-06,
      "loss": 0.0095,
      "step": 2347820
    },
    {
      "epoch": 3.8422916543927523,
      "grad_norm": 0.6934964060783386,
      "learning_rate": 2.330673484314359e-06,
      "loss": 0.0128,
      "step": 2347840
    },
    {
      "epoch": 3.8423243848314055,
      "grad_norm": 0.2463327795267105,
      "learning_rate": 2.3306075921008416e-06,
      "loss": 0.0088,
      "step": 2347860
    },
    {
      "epoch": 3.842357115270059,
      "grad_norm": 0.3597470223903656,
      "learning_rate": 2.3305416998873247e-06,
      "loss": 0.0148,
      "step": 2347880
    },
    {
      "epoch": 3.8423898457087122,
      "grad_norm": 0.7717790007591248,
      "learning_rate": 2.3304758076738075e-06,
      "loss": 0.0109,
      "step": 2347900
    },
    {
      "epoch": 3.8424225761473654,
      "grad_norm": 0.13893571496009827,
      "learning_rate": 2.3304099154602902e-06,
      "loss": 0.0104,
      "step": 2347920
    },
    {
      "epoch": 3.842455306586019,
      "grad_norm": 0.3211019039154053,
      "learning_rate": 2.330344023246773e-06,
      "loss": 0.0075,
      "step": 2347940
    },
    {
      "epoch": 3.842488037024672,
      "grad_norm": 0.25141119956970215,
      "learning_rate": 2.3302781310332557e-06,
      "loss": 0.0081,
      "step": 2347960
    },
    {
      "epoch": 3.8425207674633253,
      "grad_norm": 0.05654250085353851,
      "learning_rate": 2.330212238819739e-06,
      "loss": 0.0113,
      "step": 2347980
    },
    {
      "epoch": 3.842553497901979,
      "grad_norm": 0.16217073798179626,
      "learning_rate": 2.330146346606222e-06,
      "loss": 0.0152,
      "step": 2348000
    },
    {
      "epoch": 3.8425862283406325,
      "grad_norm": 0.41442832350730896,
      "learning_rate": 2.3300804543927048e-06,
      "loss": 0.0085,
      "step": 2348020
    },
    {
      "epoch": 3.8426189587792856,
      "grad_norm": 0.4028471112251282,
      "learning_rate": 2.3300145621791875e-06,
      "loss": 0.0148,
      "step": 2348040
    },
    {
      "epoch": 3.8426516892179388,
      "grad_norm": 0.1295180469751358,
      "learning_rate": 2.3299486699656703e-06,
      "loss": 0.0099,
      "step": 2348060
    },
    {
      "epoch": 3.8426844196565924,
      "grad_norm": 0.484353244304657,
      "learning_rate": 2.3298827777521534e-06,
      "loss": 0.01,
      "step": 2348080
    },
    {
      "epoch": 3.8427171500952455,
      "grad_norm": 1.1053043603897095,
      "learning_rate": 2.329816885538636e-06,
      "loss": 0.0133,
      "step": 2348100
    },
    {
      "epoch": 3.8427498805338987,
      "grad_norm": 0.18998366594314575,
      "learning_rate": 2.329750993325119e-06,
      "loss": 0.0103,
      "step": 2348120
    },
    {
      "epoch": 3.8427826109725522,
      "grad_norm": 0.18022465705871582,
      "learning_rate": 2.3296851011116016e-06,
      "loss": 0.0112,
      "step": 2348140
    },
    {
      "epoch": 3.842815341411206,
      "grad_norm": 0.1340150088071823,
      "learning_rate": 2.3296192088980844e-06,
      "loss": 0.0094,
      "step": 2348160
    },
    {
      "epoch": 3.842848071849859,
      "grad_norm": 0.2955682575702667,
      "learning_rate": 2.3295533166845675e-06,
      "loss": 0.012,
      "step": 2348180
    },
    {
      "epoch": 3.842880802288512,
      "grad_norm": 0.9046555161476135,
      "learning_rate": 2.3294874244710507e-06,
      "loss": 0.0101,
      "step": 2348200
    },
    {
      "epoch": 3.8429135327271657,
      "grad_norm": 0.3062340021133423,
      "learning_rate": 2.3294215322575334e-06,
      "loss": 0.0123,
      "step": 2348220
    },
    {
      "epoch": 3.842946263165819,
      "grad_norm": 0.38764673471450806,
      "learning_rate": 2.329355640044016e-06,
      "loss": 0.0111,
      "step": 2348240
    },
    {
      "epoch": 3.842978993604472,
      "grad_norm": 0.28898388147354126,
      "learning_rate": 2.329289747830499e-06,
      "loss": 0.0124,
      "step": 2348260
    },
    {
      "epoch": 3.8430117240431256,
      "grad_norm": 0.25102269649505615,
      "learning_rate": 2.329223855616982e-06,
      "loss": 0.0105,
      "step": 2348280
    },
    {
      "epoch": 3.8430444544817792,
      "grad_norm": 0.46388447284698486,
      "learning_rate": 2.329157963403465e-06,
      "loss": 0.014,
      "step": 2348300
    },
    {
      "epoch": 3.8430771849204324,
      "grad_norm": 0.21476802229881287,
      "learning_rate": 2.3290920711899476e-06,
      "loss": 0.0109,
      "step": 2348320
    },
    {
      "epoch": 3.8431099153590855,
      "grad_norm": 0.2409866750240326,
      "learning_rate": 2.3290261789764303e-06,
      "loss": 0.0104,
      "step": 2348340
    },
    {
      "epoch": 3.843142645797739,
      "grad_norm": 0.07153856009244919,
      "learning_rate": 2.3289602867629135e-06,
      "loss": 0.0068,
      "step": 2348360
    },
    {
      "epoch": 3.8431753762363923,
      "grad_norm": 0.17186638712882996,
      "learning_rate": 2.328894394549396e-06,
      "loss": 0.0106,
      "step": 2348380
    },
    {
      "epoch": 3.8432081066750454,
      "grad_norm": 0.26588529348373413,
      "learning_rate": 2.3288285023358794e-06,
      "loss": 0.0097,
      "step": 2348400
    },
    {
      "epoch": 3.843240837113699,
      "grad_norm": 0.4168224036693573,
      "learning_rate": 2.328762610122362e-06,
      "loss": 0.0165,
      "step": 2348420
    },
    {
      "epoch": 3.8432735675523526,
      "grad_norm": 0.1559881716966629,
      "learning_rate": 2.328696717908845e-06,
      "loss": 0.0134,
      "step": 2348440
    },
    {
      "epoch": 3.8433062979910058,
      "grad_norm": 0.1860879510641098,
      "learning_rate": 2.3286308256953276e-06,
      "loss": 0.0135,
      "step": 2348460
    },
    {
      "epoch": 3.843339028429659,
      "grad_norm": 0.2922971844673157,
      "learning_rate": 2.3285649334818108e-06,
      "loss": 0.0131,
      "step": 2348480
    },
    {
      "epoch": 3.8433717588683125,
      "grad_norm": 0.724216639995575,
      "learning_rate": 2.3284990412682935e-06,
      "loss": 0.0085,
      "step": 2348500
    },
    {
      "epoch": 3.8434044893069657,
      "grad_norm": 0.3286774158477783,
      "learning_rate": 2.3284331490547767e-06,
      "loss": 0.0137,
      "step": 2348520
    },
    {
      "epoch": 3.843437219745619,
      "grad_norm": 0.25377118587493896,
      "learning_rate": 2.3283672568412594e-06,
      "loss": 0.0117,
      "step": 2348540
    },
    {
      "epoch": 3.8434699501842724,
      "grad_norm": 0.20522785186767578,
      "learning_rate": 2.328301364627742e-06,
      "loss": 0.0058,
      "step": 2348560
    },
    {
      "epoch": 3.8435026806229255,
      "grad_norm": 0.22723616659641266,
      "learning_rate": 2.328235472414225e-06,
      "loss": 0.008,
      "step": 2348580
    },
    {
      "epoch": 3.843535411061579,
      "grad_norm": 0.19950486719608307,
      "learning_rate": 2.328169580200708e-06,
      "loss": 0.0053,
      "step": 2348600
    },
    {
      "epoch": 3.8435681415002323,
      "grad_norm": 0.20830319821834564,
      "learning_rate": 2.3281036879871908e-06,
      "loss": 0.0131,
      "step": 2348620
    },
    {
      "epoch": 3.843600871938886,
      "grad_norm": 0.4210416078567505,
      "learning_rate": 2.3280377957736735e-06,
      "loss": 0.0116,
      "step": 2348640
    },
    {
      "epoch": 3.843633602377539,
      "grad_norm": 0.06929996609687805,
      "learning_rate": 2.3279719035601563e-06,
      "loss": 0.0117,
      "step": 2348660
    },
    {
      "epoch": 3.843666332816192,
      "grad_norm": 0.13307136297225952,
      "learning_rate": 2.3279060113466394e-06,
      "loss": 0.0093,
      "step": 2348680
    },
    {
      "epoch": 3.843699063254846,
      "grad_norm": 0.3515773117542267,
      "learning_rate": 2.327840119133122e-06,
      "loss": 0.0075,
      "step": 2348700
    },
    {
      "epoch": 3.843731793693499,
      "grad_norm": 0.09287676960229874,
      "learning_rate": 2.3277742269196053e-06,
      "loss": 0.0067,
      "step": 2348720
    },
    {
      "epoch": 3.8437645241321525,
      "grad_norm": 0.1706800013780594,
      "learning_rate": 2.327708334706088e-06,
      "loss": 0.0098,
      "step": 2348740
    },
    {
      "epoch": 3.8437972545708057,
      "grad_norm": 0.14966733753681183,
      "learning_rate": 2.327642442492571e-06,
      "loss": 0.0093,
      "step": 2348760
    },
    {
      "epoch": 3.8438299850094593,
      "grad_norm": 0.1545148342847824,
      "learning_rate": 2.3275765502790535e-06,
      "loss": 0.0127,
      "step": 2348780
    },
    {
      "epoch": 3.8438627154481124,
      "grad_norm": 0.3728388547897339,
      "learning_rate": 2.3275106580655367e-06,
      "loss": 0.014,
      "step": 2348800
    },
    {
      "epoch": 3.8438954458867656,
      "grad_norm": 0.37358054518699646,
      "learning_rate": 2.3274447658520194e-06,
      "loss": 0.0134,
      "step": 2348820
    },
    {
      "epoch": 3.843928176325419,
      "grad_norm": 0.5806841254234314,
      "learning_rate": 2.327378873638502e-06,
      "loss": 0.0099,
      "step": 2348840
    },
    {
      "epoch": 3.8439609067640723,
      "grad_norm": 0.18795889616012573,
      "learning_rate": 2.327312981424985e-06,
      "loss": 0.0093,
      "step": 2348860
    },
    {
      "epoch": 3.843993637202726,
      "grad_norm": 0.225628063082695,
      "learning_rate": 2.327247089211468e-06,
      "loss": 0.0111,
      "step": 2348880
    },
    {
      "epoch": 3.844026367641379,
      "grad_norm": 0.2649807631969452,
      "learning_rate": 2.327181196997951e-06,
      "loss": 0.0097,
      "step": 2348900
    },
    {
      "epoch": 3.8440590980800327,
      "grad_norm": 0.15020456910133362,
      "learning_rate": 2.327115304784434e-06,
      "loss": 0.0103,
      "step": 2348920
    },
    {
      "epoch": 3.844091828518686,
      "grad_norm": 0.2789425253868103,
      "learning_rate": 2.3270494125709167e-06,
      "loss": 0.0087,
      "step": 2348940
    },
    {
      "epoch": 3.844124558957339,
      "grad_norm": 0.2748667597770691,
      "learning_rate": 2.3269835203573995e-06,
      "loss": 0.0073,
      "step": 2348960
    },
    {
      "epoch": 3.8441572893959925,
      "grad_norm": 0.251277357339859,
      "learning_rate": 2.326917628143882e-06,
      "loss": 0.011,
      "step": 2348980
    },
    {
      "epoch": 3.8441900198346457,
      "grad_norm": 0.2863243818283081,
      "learning_rate": 2.3268517359303654e-06,
      "loss": 0.0099,
      "step": 2349000
    },
    {
      "epoch": 3.8442227502732993,
      "grad_norm": 0.11462623625993729,
      "learning_rate": 2.326785843716848e-06,
      "loss": 0.0056,
      "step": 2349020
    },
    {
      "epoch": 3.8442554807119524,
      "grad_norm": 0.20959638059139252,
      "learning_rate": 2.326719951503331e-06,
      "loss": 0.011,
      "step": 2349040
    },
    {
      "epoch": 3.844288211150606,
      "grad_norm": 0.32353806495666504,
      "learning_rate": 2.326654059289814e-06,
      "loss": 0.0202,
      "step": 2349060
    },
    {
      "epoch": 3.844320941589259,
      "grad_norm": 0.08945462107658386,
      "learning_rate": 2.3265881670762968e-06,
      "loss": 0.0095,
      "step": 2349080
    },
    {
      "epoch": 3.8443536720279123,
      "grad_norm": 0.11238033324480057,
      "learning_rate": 2.3265222748627795e-06,
      "loss": 0.0132,
      "step": 2349100
    },
    {
      "epoch": 3.844386402466566,
      "grad_norm": 0.16130098700523376,
      "learning_rate": 2.3264563826492627e-06,
      "loss": 0.0091,
      "step": 2349120
    },
    {
      "epoch": 3.844419132905219,
      "grad_norm": 0.29335886240005493,
      "learning_rate": 2.3263904904357454e-06,
      "loss": 0.0123,
      "step": 2349140
    },
    {
      "epoch": 3.8444518633438727,
      "grad_norm": 0.21477091312408447,
      "learning_rate": 2.326324598222228e-06,
      "loss": 0.0169,
      "step": 2349160
    },
    {
      "epoch": 3.844484593782526,
      "grad_norm": 0.31715157628059387,
      "learning_rate": 2.326258706008711e-06,
      "loss": 0.0115,
      "step": 2349180
    },
    {
      "epoch": 3.8445173242211794,
      "grad_norm": 0.145130455493927,
      "learning_rate": 2.326192813795194e-06,
      "loss": 0.0094,
      "step": 2349200
    },
    {
      "epoch": 3.8445500546598326,
      "grad_norm": 0.41190633177757263,
      "learning_rate": 2.326126921581677e-06,
      "loss": 0.0091,
      "step": 2349220
    },
    {
      "epoch": 3.8445827850984857,
      "grad_norm": 0.36031374335289,
      "learning_rate": 2.32606102936816e-06,
      "loss": 0.0091,
      "step": 2349240
    },
    {
      "epoch": 3.8446155155371393,
      "grad_norm": 0.09581495076417923,
      "learning_rate": 2.3259951371546427e-06,
      "loss": 0.0112,
      "step": 2349260
    },
    {
      "epoch": 3.8446482459757925,
      "grad_norm": 0.22471003234386444,
      "learning_rate": 2.3259292449411254e-06,
      "loss": 0.0154,
      "step": 2349280
    },
    {
      "epoch": 3.844680976414446,
      "grad_norm": 0.2424568384885788,
      "learning_rate": 2.3258633527276086e-06,
      "loss": 0.013,
      "step": 2349300
    },
    {
      "epoch": 3.844713706853099,
      "grad_norm": 0.376888632774353,
      "learning_rate": 2.3257974605140913e-06,
      "loss": 0.0073,
      "step": 2349320
    },
    {
      "epoch": 3.844746437291753,
      "grad_norm": 0.6824190616607666,
      "learning_rate": 2.325731568300574e-06,
      "loss": 0.0115,
      "step": 2349340
    },
    {
      "epoch": 3.844779167730406,
      "grad_norm": 0.4307961165904999,
      "learning_rate": 2.325665676087057e-06,
      "loss": 0.0095,
      "step": 2349360
    },
    {
      "epoch": 3.844811898169059,
      "grad_norm": 0.11062183976173401,
      "learning_rate": 2.3255997838735395e-06,
      "loss": 0.0156,
      "step": 2349380
    },
    {
      "epoch": 3.8448446286077127,
      "grad_norm": 0.6765015125274658,
      "learning_rate": 2.3255338916600227e-06,
      "loss": 0.0115,
      "step": 2349400
    },
    {
      "epoch": 3.844877359046366,
      "grad_norm": 0.4223584532737732,
      "learning_rate": 2.325467999446506e-06,
      "loss": 0.0115,
      "step": 2349420
    },
    {
      "epoch": 3.8449100894850194,
      "grad_norm": 0.11341621726751328,
      "learning_rate": 2.3254021072329886e-06,
      "loss": 0.0127,
      "step": 2349440
    },
    {
      "epoch": 3.8449428199236726,
      "grad_norm": 0.2914058566093445,
      "learning_rate": 2.3253362150194714e-06,
      "loss": 0.0071,
      "step": 2349460
    },
    {
      "epoch": 3.844975550362326,
      "grad_norm": 0.42948660254478455,
      "learning_rate": 2.325270322805954e-06,
      "loss": 0.0136,
      "step": 2349480
    },
    {
      "epoch": 3.8450082808009793,
      "grad_norm": 0.326587975025177,
      "learning_rate": 2.3252044305924373e-06,
      "loss": 0.0087,
      "step": 2349500
    },
    {
      "epoch": 3.8450410112396325,
      "grad_norm": 0.4056166708469391,
      "learning_rate": 2.32513853837892e-06,
      "loss": 0.0123,
      "step": 2349520
    },
    {
      "epoch": 3.845073741678286,
      "grad_norm": 0.09412620961666107,
      "learning_rate": 2.3250726461654027e-06,
      "loss": 0.0118,
      "step": 2349540
    },
    {
      "epoch": 3.8451064721169392,
      "grad_norm": 0.5116639137268066,
      "learning_rate": 2.3250067539518855e-06,
      "loss": 0.017,
      "step": 2349560
    },
    {
      "epoch": 3.8451392025555924,
      "grad_norm": 0.1039358451962471,
      "learning_rate": 2.3249408617383686e-06,
      "loss": 0.0116,
      "step": 2349580
    },
    {
      "epoch": 3.845171932994246,
      "grad_norm": 0.3471866548061371,
      "learning_rate": 2.3248749695248514e-06,
      "loss": 0.0109,
      "step": 2349600
    },
    {
      "epoch": 3.8452046634328996,
      "grad_norm": 0.06362839043140411,
      "learning_rate": 2.3248090773113345e-06,
      "loss": 0.0076,
      "step": 2349620
    },
    {
      "epoch": 3.8452373938715527,
      "grad_norm": 0.05874316766858101,
      "learning_rate": 2.3247431850978173e-06,
      "loss": 0.0103,
      "step": 2349640
    },
    {
      "epoch": 3.845270124310206,
      "grad_norm": 0.24866561591625214,
      "learning_rate": 2.3246772928843e-06,
      "loss": 0.008,
      "step": 2349660
    },
    {
      "epoch": 3.8453028547488595,
      "grad_norm": 0.21887581050395966,
      "learning_rate": 2.3246114006707828e-06,
      "loss": 0.0142,
      "step": 2349680
    },
    {
      "epoch": 3.8453355851875126,
      "grad_norm": 0.6486048698425293,
      "learning_rate": 2.324545508457266e-06,
      "loss": 0.0131,
      "step": 2349700
    },
    {
      "epoch": 3.8453683156261658,
      "grad_norm": 0.03031052090227604,
      "learning_rate": 2.3244796162437487e-06,
      "loss": 0.0119,
      "step": 2349720
    },
    {
      "epoch": 3.8454010460648194,
      "grad_norm": 0.46158549189567566,
      "learning_rate": 2.3244137240302314e-06,
      "loss": 0.0129,
      "step": 2349740
    },
    {
      "epoch": 3.845433776503473,
      "grad_norm": 0.17300830781459808,
      "learning_rate": 2.3243478318167146e-06,
      "loss": 0.0144,
      "step": 2349760
    },
    {
      "epoch": 3.845466506942126,
      "grad_norm": 0.20493434369564056,
      "learning_rate": 2.3242819396031973e-06,
      "loss": 0.0087,
      "step": 2349780
    },
    {
      "epoch": 3.8454992373807793,
      "grad_norm": 0.1536141335964203,
      "learning_rate": 2.32421604738968e-06,
      "loss": 0.0139,
      "step": 2349800
    },
    {
      "epoch": 3.845531967819433,
      "grad_norm": 0.34244728088378906,
      "learning_rate": 2.324150155176163e-06,
      "loss": 0.0121,
      "step": 2349820
    },
    {
      "epoch": 3.845564698258086,
      "grad_norm": 0.18648609519004822,
      "learning_rate": 2.324084262962646e-06,
      "loss": 0.0124,
      "step": 2349840
    },
    {
      "epoch": 3.845597428696739,
      "grad_norm": 0.09156069904565811,
      "learning_rate": 2.3240183707491287e-06,
      "loss": 0.0124,
      "step": 2349860
    },
    {
      "epoch": 3.8456301591353927,
      "grad_norm": 0.19616344571113586,
      "learning_rate": 2.3239524785356114e-06,
      "loss": 0.0116,
      "step": 2349880
    },
    {
      "epoch": 3.8456628895740463,
      "grad_norm": 0.045869119465351105,
      "learning_rate": 2.3238865863220946e-06,
      "loss": 0.0149,
      "step": 2349900
    },
    {
      "epoch": 3.8456956200126995,
      "grad_norm": 0.17155379056930542,
      "learning_rate": 2.3238206941085773e-06,
      "loss": 0.0095,
      "step": 2349920
    },
    {
      "epoch": 3.8457283504513526,
      "grad_norm": 0.34019961953163147,
      "learning_rate": 2.3237548018950605e-06,
      "loss": 0.0091,
      "step": 2349940
    },
    {
      "epoch": 3.8457610808900062,
      "grad_norm": 0.4112849533557892,
      "learning_rate": 2.3236889096815432e-06,
      "loss": 0.0097,
      "step": 2349960
    },
    {
      "epoch": 3.8457938113286594,
      "grad_norm": 0.33239197731018066,
      "learning_rate": 2.323623017468026e-06,
      "loss": 0.0118,
      "step": 2349980
    },
    {
      "epoch": 3.8458265417673125,
      "grad_norm": 0.19466248154640198,
      "learning_rate": 2.3235571252545087e-06,
      "loss": 0.0146,
      "step": 2350000
    },
    {
      "epoch": 3.8458265417673125,
      "eval_loss": 0.006305353250354528,
      "eval_runtime": 6520.3717,
      "eval_samples_per_second": 157.638,
      "eval_steps_per_second": 15.764,
      "eval_sts-dev_pearson_cosine": 0.9856547978104537,
      "eval_sts-dev_spearman_cosine": 0.89593667743651,
      "step": 2350000
    },
    {
      "epoch": 3.845859272205966,
      "grad_norm": 0.09187908470630646,
      "learning_rate": 2.323491233040992e-06,
      "loss": 0.0096,
      "step": 2350020
    },
    {
      "epoch": 3.8458920026446193,
      "grad_norm": 0.7400413155555725,
      "learning_rate": 2.3234253408274746e-06,
      "loss": 0.01,
      "step": 2350040
    },
    {
      "epoch": 3.845924733083273,
      "grad_norm": 0.14780500531196594,
      "learning_rate": 2.3233594486139574e-06,
      "loss": 0.0131,
      "step": 2350060
    },
    {
      "epoch": 3.845957463521926,
      "grad_norm": 0.31320902705192566,
      "learning_rate": 2.32329355640044e-06,
      "loss": 0.0138,
      "step": 2350080
    },
    {
      "epoch": 3.8459901939605796,
      "grad_norm": 0.12208658456802368,
      "learning_rate": 2.3232276641869233e-06,
      "loss": 0.0131,
      "step": 2350100
    },
    {
      "epoch": 3.8460229243992328,
      "grad_norm": 0.20895077288150787,
      "learning_rate": 2.323161771973406e-06,
      "loss": 0.0122,
      "step": 2350120
    },
    {
      "epoch": 3.846055654837886,
      "grad_norm": 0.06057995930314064,
      "learning_rate": 2.323095879759889e-06,
      "loss": 0.0083,
      "step": 2350140
    },
    {
      "epoch": 3.8460883852765395,
      "grad_norm": 0.2558281123638153,
      "learning_rate": 2.323029987546372e-06,
      "loss": 0.01,
      "step": 2350160
    },
    {
      "epoch": 3.8461211157151927,
      "grad_norm": 0.37492644786834717,
      "learning_rate": 2.3229640953328546e-06,
      "loss": 0.0123,
      "step": 2350180
    },
    {
      "epoch": 3.8461538461538463,
      "grad_norm": 0.4956769645214081,
      "learning_rate": 2.3228982031193374e-06,
      "loss": 0.0109,
      "step": 2350200
    },
    {
      "epoch": 3.8461865765924994,
      "grad_norm": 0.2503112852573395,
      "learning_rate": 2.3228323109058205e-06,
      "loss": 0.0122,
      "step": 2350220
    },
    {
      "epoch": 3.846219307031153,
      "grad_norm": 0.09556321054697037,
      "learning_rate": 2.3227664186923033e-06,
      "loss": 0.0122,
      "step": 2350240
    },
    {
      "epoch": 3.846252037469806,
      "grad_norm": 0.053401824086904526,
      "learning_rate": 2.322700526478786e-06,
      "loss": 0.017,
      "step": 2350260
    },
    {
      "epoch": 3.8462847679084593,
      "grad_norm": 0.11934301257133484,
      "learning_rate": 2.322634634265269e-06,
      "loss": 0.0066,
      "step": 2350280
    },
    {
      "epoch": 3.846317498347113,
      "grad_norm": 0.08713740110397339,
      "learning_rate": 2.322568742051752e-06,
      "loss": 0.0077,
      "step": 2350300
    },
    {
      "epoch": 3.846350228785766,
      "grad_norm": 0.44058656692504883,
      "learning_rate": 2.3225028498382347e-06,
      "loss": 0.0087,
      "step": 2350320
    },
    {
      "epoch": 3.8463829592244196,
      "grad_norm": 0.14615187048912048,
      "learning_rate": 2.322436957624718e-06,
      "loss": 0.0091,
      "step": 2350340
    },
    {
      "epoch": 3.846415689663073,
      "grad_norm": 0.4913715422153473,
      "learning_rate": 2.3223710654112006e-06,
      "loss": 0.0095,
      "step": 2350360
    },
    {
      "epoch": 3.8464484201017264,
      "grad_norm": 0.09419255703687668,
      "learning_rate": 2.3223051731976833e-06,
      "loss": 0.0067,
      "step": 2350380
    },
    {
      "epoch": 3.8464811505403795,
      "grad_norm": 0.8016321659088135,
      "learning_rate": 2.322239280984166e-06,
      "loss": 0.017,
      "step": 2350400
    },
    {
      "epoch": 3.8465138809790327,
      "grad_norm": 0.1521136611700058,
      "learning_rate": 2.3221733887706492e-06,
      "loss": 0.0077,
      "step": 2350420
    },
    {
      "epoch": 3.8465466114176863,
      "grad_norm": 0.8303942084312439,
      "learning_rate": 2.322107496557132e-06,
      "loss": 0.0135,
      "step": 2350440
    },
    {
      "epoch": 3.8465793418563394,
      "grad_norm": 0.2669854164123535,
      "learning_rate": 2.322041604343615e-06,
      "loss": 0.0091,
      "step": 2350460
    },
    {
      "epoch": 3.846612072294993,
      "grad_norm": 0.2848968505859375,
      "learning_rate": 2.321975712130098e-06,
      "loss": 0.01,
      "step": 2350480
    },
    {
      "epoch": 3.846644802733646,
      "grad_norm": 0.10883138328790665,
      "learning_rate": 2.3219098199165806e-06,
      "loss": 0.0172,
      "step": 2350500
    },
    {
      "epoch": 3.8466775331722998,
      "grad_norm": 0.3246437907218933,
      "learning_rate": 2.3218439277030638e-06,
      "loss": 0.0075,
      "step": 2350520
    },
    {
      "epoch": 3.846710263610953,
      "grad_norm": 1.156091570854187,
      "learning_rate": 2.3217780354895465e-06,
      "loss": 0.0079,
      "step": 2350540
    },
    {
      "epoch": 3.846742994049606,
      "grad_norm": 0.07337167859077454,
      "learning_rate": 2.3217121432760292e-06,
      "loss": 0.0078,
      "step": 2350560
    },
    {
      "epoch": 3.8467757244882597,
      "grad_norm": 0.10596846044063568,
      "learning_rate": 2.321646251062512e-06,
      "loss": 0.0107,
      "step": 2350580
    },
    {
      "epoch": 3.846808454926913,
      "grad_norm": 0.10527781397104263,
      "learning_rate": 2.3215803588489947e-06,
      "loss": 0.016,
      "step": 2350600
    },
    {
      "epoch": 3.8468411853655664,
      "grad_norm": 0.6130526661872864,
      "learning_rate": 2.321514466635478e-06,
      "loss": 0.0088,
      "step": 2350620
    },
    {
      "epoch": 3.8468739158042196,
      "grad_norm": 0.24551741778850555,
      "learning_rate": 2.321448574421961e-06,
      "loss": 0.0116,
      "step": 2350640
    },
    {
      "epoch": 3.846906646242873,
      "grad_norm": 0.0897766649723053,
      "learning_rate": 2.3213826822084438e-06,
      "loss": 0.0108,
      "step": 2350660
    },
    {
      "epoch": 3.8469393766815263,
      "grad_norm": 0.36668580770492554,
      "learning_rate": 2.3213167899949265e-06,
      "loss": 0.0106,
      "step": 2350680
    },
    {
      "epoch": 3.8469721071201795,
      "grad_norm": 0.7808116674423218,
      "learning_rate": 2.3212508977814093e-06,
      "loss": 0.0132,
      "step": 2350700
    },
    {
      "epoch": 3.847004837558833,
      "grad_norm": 0.2373596429824829,
      "learning_rate": 2.3211850055678924e-06,
      "loss": 0.0106,
      "step": 2350720
    },
    {
      "epoch": 3.847037567997486,
      "grad_norm": 0.3495900332927704,
      "learning_rate": 2.321119113354375e-06,
      "loss": 0.0122,
      "step": 2350740
    },
    {
      "epoch": 3.84707029843614,
      "grad_norm": 0.08520840853452682,
      "learning_rate": 2.321053221140858e-06,
      "loss": 0.0102,
      "step": 2350760
    },
    {
      "epoch": 3.847103028874793,
      "grad_norm": 0.2185843288898468,
      "learning_rate": 2.3209873289273406e-06,
      "loss": 0.0087,
      "step": 2350780
    },
    {
      "epoch": 3.8471357593134465,
      "grad_norm": 0.32109397649765015,
      "learning_rate": 2.3209214367138234e-06,
      "loss": 0.0131,
      "step": 2350800
    },
    {
      "epoch": 3.8471684897520997,
      "grad_norm": 0.34407585859298706,
      "learning_rate": 2.3208555445003065e-06,
      "loss": 0.0081,
      "step": 2350820
    },
    {
      "epoch": 3.847201220190753,
      "grad_norm": 0.1085318848490715,
      "learning_rate": 2.3207896522867897e-06,
      "loss": 0.0124,
      "step": 2350840
    },
    {
      "epoch": 3.8472339506294064,
      "grad_norm": 0.04407500848174095,
      "learning_rate": 2.3207237600732725e-06,
      "loss": 0.0104,
      "step": 2350860
    },
    {
      "epoch": 3.8472666810680596,
      "grad_norm": 0.32117217779159546,
      "learning_rate": 2.320657867859755e-06,
      "loss": 0.0082,
      "step": 2350880
    },
    {
      "epoch": 3.847299411506713,
      "grad_norm": 0.2513541281223297,
      "learning_rate": 2.320591975646238e-06,
      "loss": 0.0122,
      "step": 2350900
    },
    {
      "epoch": 3.8473321419453663,
      "grad_norm": 0.3104330003261566,
      "learning_rate": 2.320526083432721e-06,
      "loss": 0.0123,
      "step": 2350920
    },
    {
      "epoch": 3.84736487238402,
      "grad_norm": 0.2084580957889557,
      "learning_rate": 2.320460191219204e-06,
      "loss": 0.0176,
      "step": 2350940
    },
    {
      "epoch": 3.847397602822673,
      "grad_norm": 0.3966453969478607,
      "learning_rate": 2.3203942990056866e-06,
      "loss": 0.0089,
      "step": 2350960
    },
    {
      "epoch": 3.847430333261326,
      "grad_norm": 0.11572670936584473,
      "learning_rate": 2.3203284067921697e-06,
      "loss": 0.0119,
      "step": 2350980
    },
    {
      "epoch": 3.84746306369998,
      "grad_norm": 0.1960301697254181,
      "learning_rate": 2.3202625145786525e-06,
      "loss": 0.0082,
      "step": 2351000
    },
    {
      "epoch": 3.847495794138633,
      "grad_norm": 0.24646884202957153,
      "learning_rate": 2.3201966223651352e-06,
      "loss": 0.0082,
      "step": 2351020
    },
    {
      "epoch": 3.847528524577286,
      "grad_norm": 0.16671425104141235,
      "learning_rate": 2.3201307301516184e-06,
      "loss": 0.0104,
      "step": 2351040
    },
    {
      "epoch": 3.8475612550159397,
      "grad_norm": 0.32222995162010193,
      "learning_rate": 2.320064837938101e-06,
      "loss": 0.0143,
      "step": 2351060
    },
    {
      "epoch": 3.8475939854545933,
      "grad_norm": 0.5671210885047913,
      "learning_rate": 2.319998945724584e-06,
      "loss": 0.0128,
      "step": 2351080
    },
    {
      "epoch": 3.8476267158932465,
      "grad_norm": 0.07040120661258698,
      "learning_rate": 2.3199330535110666e-06,
      "loss": 0.0084,
      "step": 2351100
    },
    {
      "epoch": 3.8476594463318996,
      "grad_norm": 0.44965222477912903,
      "learning_rate": 2.3198671612975498e-06,
      "loss": 0.0085,
      "step": 2351120
    },
    {
      "epoch": 3.847692176770553,
      "grad_norm": 0.27737757563591003,
      "learning_rate": 2.3198012690840325e-06,
      "loss": 0.0159,
      "step": 2351140
    },
    {
      "epoch": 3.8477249072092063,
      "grad_norm": 0.3334006667137146,
      "learning_rate": 2.3197353768705157e-06,
      "loss": 0.0094,
      "step": 2351160
    },
    {
      "epoch": 3.8477576376478595,
      "grad_norm": 0.17032450437545776,
      "learning_rate": 2.3196694846569984e-06,
      "loss": 0.0092,
      "step": 2351180
    },
    {
      "epoch": 3.847790368086513,
      "grad_norm": 0.11015261709690094,
      "learning_rate": 2.319603592443481e-06,
      "loss": 0.0175,
      "step": 2351200
    },
    {
      "epoch": 3.8478230985251667,
      "grad_norm": 0.443913072347641,
      "learning_rate": 2.319537700229964e-06,
      "loss": 0.0113,
      "step": 2351220
    },
    {
      "epoch": 3.84785582896382,
      "grad_norm": 0.20962661504745483,
      "learning_rate": 2.319471808016447e-06,
      "loss": 0.0068,
      "step": 2351240
    },
    {
      "epoch": 3.847888559402473,
      "grad_norm": 0.32029205560684204,
      "learning_rate": 2.3194059158029298e-06,
      "loss": 0.0143,
      "step": 2351260
    },
    {
      "epoch": 3.8479212898411266,
      "grad_norm": 0.2800360321998596,
      "learning_rate": 2.3193400235894125e-06,
      "loss": 0.01,
      "step": 2351280
    },
    {
      "epoch": 3.8479540202797797,
      "grad_norm": 0.1609182208776474,
      "learning_rate": 2.3192741313758953e-06,
      "loss": 0.0148,
      "step": 2351300
    },
    {
      "epoch": 3.847986750718433,
      "grad_norm": 0.5290324687957764,
      "learning_rate": 2.3192082391623784e-06,
      "loss": 0.0127,
      "step": 2351320
    },
    {
      "epoch": 3.8480194811570865,
      "grad_norm": 0.5243545174598694,
      "learning_rate": 2.319142346948861e-06,
      "loss": 0.0119,
      "step": 2351340
    },
    {
      "epoch": 3.84805221159574,
      "grad_norm": 0.35051798820495605,
      "learning_rate": 2.3190764547353443e-06,
      "loss": 0.0066,
      "step": 2351360
    },
    {
      "epoch": 3.848084942034393,
      "grad_norm": 0.2673656642436981,
      "learning_rate": 2.319010562521827e-06,
      "loss": 0.0136,
      "step": 2351380
    },
    {
      "epoch": 3.8481176724730464,
      "grad_norm": 0.2547742426395416,
      "learning_rate": 2.31894467030831e-06,
      "loss": 0.0172,
      "step": 2351400
    },
    {
      "epoch": 3.8481504029117,
      "grad_norm": 0.521675705909729,
      "learning_rate": 2.3188787780947926e-06,
      "loss": 0.0105,
      "step": 2351420
    },
    {
      "epoch": 3.848183133350353,
      "grad_norm": 0.24479317665100098,
      "learning_rate": 2.3188128858812757e-06,
      "loss": 0.0103,
      "step": 2351440
    },
    {
      "epoch": 3.8482158637890063,
      "grad_norm": 1.1313793659210205,
      "learning_rate": 2.3187469936677585e-06,
      "loss": 0.0107,
      "step": 2351460
    },
    {
      "epoch": 3.84824859422766,
      "grad_norm": 0.10502114146947861,
      "learning_rate": 2.318681101454241e-06,
      "loss": 0.0123,
      "step": 2351480
    },
    {
      "epoch": 3.8482813246663135,
      "grad_norm": 0.5474587082862854,
      "learning_rate": 2.318615209240724e-06,
      "loss": 0.0094,
      "step": 2351500
    },
    {
      "epoch": 3.8483140551049666,
      "grad_norm": 0.31073272228240967,
      "learning_rate": 2.318549317027207e-06,
      "loss": 0.0168,
      "step": 2351520
    },
    {
      "epoch": 3.8483467855436198,
      "grad_norm": 0.08774000406265259,
      "learning_rate": 2.31848342481369e-06,
      "loss": 0.0106,
      "step": 2351540
    },
    {
      "epoch": 3.8483795159822733,
      "grad_norm": 0.19687719643115997,
      "learning_rate": 2.318417532600173e-06,
      "loss": 0.0113,
      "step": 2351560
    },
    {
      "epoch": 3.8484122464209265,
      "grad_norm": 0.2330896407365799,
      "learning_rate": 2.3183516403866557e-06,
      "loss": 0.0163,
      "step": 2351580
    },
    {
      "epoch": 3.8484449768595796,
      "grad_norm": 0.13871265947818756,
      "learning_rate": 2.3182857481731385e-06,
      "loss": 0.014,
      "step": 2351600
    },
    {
      "epoch": 3.8484777072982332,
      "grad_norm": 0.2879807949066162,
      "learning_rate": 2.3182198559596212e-06,
      "loss": 0.0122,
      "step": 2351620
    },
    {
      "epoch": 3.8485104377368864,
      "grad_norm": 0.6151255965232849,
      "learning_rate": 2.3181539637461044e-06,
      "loss": 0.0141,
      "step": 2351640
    },
    {
      "epoch": 3.84854316817554,
      "grad_norm": 0.2864861786365509,
      "learning_rate": 2.318088071532587e-06,
      "loss": 0.0082,
      "step": 2351660
    },
    {
      "epoch": 3.848575898614193,
      "grad_norm": 0.34724682569503784,
      "learning_rate": 2.31802217931907e-06,
      "loss": 0.0115,
      "step": 2351680
    },
    {
      "epoch": 3.8486086290528467,
      "grad_norm": 0.7141672372817993,
      "learning_rate": 2.317956287105553e-06,
      "loss": 0.0112,
      "step": 2351700
    },
    {
      "epoch": 3.8486413594915,
      "grad_norm": 0.3330421447753906,
      "learning_rate": 2.3178903948920358e-06,
      "loss": 0.0085,
      "step": 2351720
    },
    {
      "epoch": 3.848674089930153,
      "grad_norm": 0.12231121212244034,
      "learning_rate": 2.3178245026785185e-06,
      "loss": 0.0066,
      "step": 2351740
    },
    {
      "epoch": 3.8487068203688066,
      "grad_norm": 0.47860798239707947,
      "learning_rate": 2.3177586104650017e-06,
      "loss": 0.0115,
      "step": 2351760
    },
    {
      "epoch": 3.8487395508074598,
      "grad_norm": 0.17407751083374023,
      "learning_rate": 2.3176927182514844e-06,
      "loss": 0.0089,
      "step": 2351780
    },
    {
      "epoch": 3.8487722812461134,
      "grad_norm": 0.103851817548275,
      "learning_rate": 2.317626826037967e-06,
      "loss": 0.0081,
      "step": 2351800
    },
    {
      "epoch": 3.8488050116847665,
      "grad_norm": 0.9144881367683411,
      "learning_rate": 2.31756093382445e-06,
      "loss": 0.0146,
      "step": 2351820
    },
    {
      "epoch": 3.84883774212342,
      "grad_norm": 0.1398618072271347,
      "learning_rate": 2.317495041610933e-06,
      "loss": 0.011,
      "step": 2351840
    },
    {
      "epoch": 3.8488704725620733,
      "grad_norm": 0.5418756008148193,
      "learning_rate": 2.3174291493974162e-06,
      "loss": 0.0142,
      "step": 2351860
    },
    {
      "epoch": 3.8489032030007264,
      "grad_norm": 0.077517069876194,
      "learning_rate": 2.317363257183899e-06,
      "loss": 0.0095,
      "step": 2351880
    },
    {
      "epoch": 3.84893593343938,
      "grad_norm": 0.11089590936899185,
      "learning_rate": 2.3172973649703817e-06,
      "loss": 0.011,
      "step": 2351900
    },
    {
      "epoch": 3.848968663878033,
      "grad_norm": 0.6525685787200928,
      "learning_rate": 2.3172314727568644e-06,
      "loss": 0.0087,
      "step": 2351920
    },
    {
      "epoch": 3.8490013943166868,
      "grad_norm": 0.3067801296710968,
      "learning_rate": 2.3171655805433476e-06,
      "loss": 0.0114,
      "step": 2351940
    },
    {
      "epoch": 3.84903412475534,
      "grad_norm": 0.7724832892417908,
      "learning_rate": 2.3170996883298303e-06,
      "loss": 0.0156,
      "step": 2351960
    },
    {
      "epoch": 3.8490668551939935,
      "grad_norm": 0.2914864122867584,
      "learning_rate": 2.317033796116313e-06,
      "loss": 0.0081,
      "step": 2351980
    },
    {
      "epoch": 3.8490995856326466,
      "grad_norm": 0.22668327391147614,
      "learning_rate": 2.316967903902796e-06,
      "loss": 0.0121,
      "step": 2352000
    },
    {
      "epoch": 3.8491323160713,
      "grad_norm": 0.7784795761108398,
      "learning_rate": 2.3169020116892786e-06,
      "loss": 0.0154,
      "step": 2352020
    },
    {
      "epoch": 3.8491650465099534,
      "grad_norm": 0.15223471820354462,
      "learning_rate": 2.3168361194757617e-06,
      "loss": 0.0069,
      "step": 2352040
    },
    {
      "epoch": 3.8491977769486065,
      "grad_norm": 0.47316333651542664,
      "learning_rate": 2.316770227262245e-06,
      "loss": 0.0086,
      "step": 2352060
    },
    {
      "epoch": 3.84923050738726,
      "grad_norm": 0.15961992740631104,
      "learning_rate": 2.3167043350487276e-06,
      "loss": 0.0091,
      "step": 2352080
    },
    {
      "epoch": 3.8492632378259133,
      "grad_norm": 0.1465364545583725,
      "learning_rate": 2.3166384428352104e-06,
      "loss": 0.0113,
      "step": 2352100
    },
    {
      "epoch": 3.849295968264567,
      "grad_norm": 0.3919437527656555,
      "learning_rate": 2.316572550621693e-06,
      "loss": 0.014,
      "step": 2352120
    },
    {
      "epoch": 3.84932869870322,
      "grad_norm": 0.2742985486984253,
      "learning_rate": 2.3165066584081763e-06,
      "loss": 0.0112,
      "step": 2352140
    },
    {
      "epoch": 3.849361429141873,
      "grad_norm": 0.2554943561553955,
      "learning_rate": 2.316440766194659e-06,
      "loss": 0.0093,
      "step": 2352160
    },
    {
      "epoch": 3.8493941595805268,
      "grad_norm": 0.21999236941337585,
      "learning_rate": 2.3163748739811417e-06,
      "loss": 0.0123,
      "step": 2352180
    },
    {
      "epoch": 3.84942689001918,
      "grad_norm": 0.17087113857269287,
      "learning_rate": 2.3163089817676245e-06,
      "loss": 0.0089,
      "step": 2352200
    },
    {
      "epoch": 3.8494596204578335,
      "grad_norm": 0.26143234968185425,
      "learning_rate": 2.3162430895541076e-06,
      "loss": 0.0116,
      "step": 2352220
    },
    {
      "epoch": 3.8494923508964867,
      "grad_norm": 0.3433404266834259,
      "learning_rate": 2.3161771973405904e-06,
      "loss": 0.0104,
      "step": 2352240
    },
    {
      "epoch": 3.8495250813351403,
      "grad_norm": 0.325717031955719,
      "learning_rate": 2.3161113051270736e-06,
      "loss": 0.0112,
      "step": 2352260
    },
    {
      "epoch": 3.8495578117737934,
      "grad_norm": 0.26097214221954346,
      "learning_rate": 2.3160454129135563e-06,
      "loss": 0.012,
      "step": 2352280
    },
    {
      "epoch": 3.8495905422124466,
      "grad_norm": 0.25950032472610474,
      "learning_rate": 2.315979520700039e-06,
      "loss": 0.0108,
      "step": 2352300
    },
    {
      "epoch": 3.8496232726511,
      "grad_norm": 0.1327497959136963,
      "learning_rate": 2.3159136284865218e-06,
      "loss": 0.009,
      "step": 2352320
    },
    {
      "epoch": 3.8496560030897533,
      "grad_norm": 0.16096031665802002,
      "learning_rate": 2.315847736273005e-06,
      "loss": 0.0087,
      "step": 2352340
    },
    {
      "epoch": 3.849688733528407,
      "grad_norm": 0.5433915853500366,
      "learning_rate": 2.3157818440594877e-06,
      "loss": 0.009,
      "step": 2352360
    },
    {
      "epoch": 3.84972146396706,
      "grad_norm": 0.11365508288145065,
      "learning_rate": 2.3157159518459704e-06,
      "loss": 0.0112,
      "step": 2352380
    },
    {
      "epoch": 3.8497541944057136,
      "grad_norm": 0.4388844966888428,
      "learning_rate": 2.3156500596324536e-06,
      "loss": 0.0075,
      "step": 2352400
    },
    {
      "epoch": 3.849786924844367,
      "grad_norm": 0.10746332257986069,
      "learning_rate": 2.3155841674189363e-06,
      "loss": 0.0136,
      "step": 2352420
    },
    {
      "epoch": 3.84981965528302,
      "grad_norm": 0.44915658235549927,
      "learning_rate": 2.315518275205419e-06,
      "loss": 0.0135,
      "step": 2352440
    },
    {
      "epoch": 3.8498523857216735,
      "grad_norm": 0.1446189284324646,
      "learning_rate": 2.3154523829919022e-06,
      "loss": 0.0113,
      "step": 2352460
    },
    {
      "epoch": 3.8498851161603267,
      "grad_norm": 0.3472136855125427,
      "learning_rate": 2.315386490778385e-06,
      "loss": 0.0086,
      "step": 2352480
    },
    {
      "epoch": 3.84991784659898,
      "grad_norm": 0.13631659746170044,
      "learning_rate": 2.3153205985648677e-06,
      "loss": 0.0164,
      "step": 2352500
    },
    {
      "epoch": 3.8499505770376334,
      "grad_norm": 0.6277628540992737,
      "learning_rate": 2.3152547063513504e-06,
      "loss": 0.0188,
      "step": 2352520
    },
    {
      "epoch": 3.849983307476287,
      "grad_norm": 0.1347419172525406,
      "learning_rate": 2.3151888141378336e-06,
      "loss": 0.0078,
      "step": 2352540
    },
    {
      "epoch": 3.85001603791494,
      "grad_norm": 0.34020259976387024,
      "learning_rate": 2.3151229219243163e-06,
      "loss": 0.0113,
      "step": 2352560
    },
    {
      "epoch": 3.8500487683535933,
      "grad_norm": 0.1582251638174057,
      "learning_rate": 2.3150570297107995e-06,
      "loss": 0.0129,
      "step": 2352580
    },
    {
      "epoch": 3.850081498792247,
      "grad_norm": 0.9182626605033875,
      "learning_rate": 2.3149911374972822e-06,
      "loss": 0.0132,
      "step": 2352600
    },
    {
      "epoch": 3.8501142292309,
      "grad_norm": 0.22868932783603668,
      "learning_rate": 2.314925245283765e-06,
      "loss": 0.0128,
      "step": 2352620
    },
    {
      "epoch": 3.8501469596695532,
      "grad_norm": 0.1551842838525772,
      "learning_rate": 2.3148593530702477e-06,
      "loss": 0.0098,
      "step": 2352640
    },
    {
      "epoch": 3.850179690108207,
      "grad_norm": 0.2617793679237366,
      "learning_rate": 2.314793460856731e-06,
      "loss": 0.0114,
      "step": 2352660
    },
    {
      "epoch": 3.8502124205468604,
      "grad_norm": 0.185324028134346,
      "learning_rate": 2.3147275686432136e-06,
      "loss": 0.0115,
      "step": 2352680
    },
    {
      "epoch": 3.8502451509855136,
      "grad_norm": 0.5205739140510559,
      "learning_rate": 2.3146616764296964e-06,
      "loss": 0.0083,
      "step": 2352700
    },
    {
      "epoch": 3.8502778814241667,
      "grad_norm": 0.6110379695892334,
      "learning_rate": 2.314595784216179e-06,
      "loss": 0.0127,
      "step": 2352720
    },
    {
      "epoch": 3.8503106118628203,
      "grad_norm": 0.5085046887397766,
      "learning_rate": 2.3145298920026623e-06,
      "loss": 0.0096,
      "step": 2352740
    },
    {
      "epoch": 3.8503433423014735,
      "grad_norm": 0.36687424778938293,
      "learning_rate": 2.314463999789145e-06,
      "loss": 0.0109,
      "step": 2352760
    },
    {
      "epoch": 3.8503760727401266,
      "grad_norm": 0.3106669485569,
      "learning_rate": 2.314398107575628e-06,
      "loss": 0.0129,
      "step": 2352780
    },
    {
      "epoch": 3.85040880317878,
      "grad_norm": 0.14918990433216095,
      "learning_rate": 2.314332215362111e-06,
      "loss": 0.0134,
      "step": 2352800
    },
    {
      "epoch": 3.850441533617434,
      "grad_norm": 0.1902274638414383,
      "learning_rate": 2.3142663231485937e-06,
      "loss": 0.0106,
      "step": 2352820
    },
    {
      "epoch": 3.850474264056087,
      "grad_norm": 0.19962087273597717,
      "learning_rate": 2.3142004309350764e-06,
      "loss": 0.009,
      "step": 2352840
    },
    {
      "epoch": 3.85050699449474,
      "grad_norm": 0.30956950783729553,
      "learning_rate": 2.3141345387215596e-06,
      "loss": 0.0086,
      "step": 2352860
    },
    {
      "epoch": 3.8505397249333937,
      "grad_norm": 0.35541191697120667,
      "learning_rate": 2.3140686465080423e-06,
      "loss": 0.0084,
      "step": 2352880
    },
    {
      "epoch": 3.850572455372047,
      "grad_norm": 0.48412492871284485,
      "learning_rate": 2.314002754294525e-06,
      "loss": 0.0103,
      "step": 2352900
    },
    {
      "epoch": 3.8506051858107,
      "grad_norm": 0.11728992313146591,
      "learning_rate": 2.313936862081008e-06,
      "loss": 0.0098,
      "step": 2352920
    },
    {
      "epoch": 3.8506379162493536,
      "grad_norm": 0.14836175739765167,
      "learning_rate": 2.313870969867491e-06,
      "loss": 0.0127,
      "step": 2352940
    },
    {
      "epoch": 3.850670646688007,
      "grad_norm": 0.1655191034078598,
      "learning_rate": 2.3138050776539737e-06,
      "loss": 0.012,
      "step": 2352960
    },
    {
      "epoch": 3.8507033771266603,
      "grad_norm": 0.11586584150791168,
      "learning_rate": 2.313739185440457e-06,
      "loss": 0.0135,
      "step": 2352980
    },
    {
      "epoch": 3.8507361075653135,
      "grad_norm": 0.20654335618019104,
      "learning_rate": 2.3136732932269396e-06,
      "loss": 0.0149,
      "step": 2353000
    },
    {
      "epoch": 3.850768838003967,
      "grad_norm": 0.13800780475139618,
      "learning_rate": 2.3136074010134223e-06,
      "loss": 0.0059,
      "step": 2353020
    },
    {
      "epoch": 3.8508015684426202,
      "grad_norm": 0.17586304247379303,
      "learning_rate": 2.313541508799905e-06,
      "loss": 0.0151,
      "step": 2353040
    },
    {
      "epoch": 3.8508342988812734,
      "grad_norm": 0.25808537006378174,
      "learning_rate": 2.3134756165863882e-06,
      "loss": 0.0131,
      "step": 2353060
    },
    {
      "epoch": 3.850867029319927,
      "grad_norm": 0.264996737241745,
      "learning_rate": 2.313409724372871e-06,
      "loss": 0.0105,
      "step": 2353080
    },
    {
      "epoch": 3.85089975975858,
      "grad_norm": 0.4337722957134247,
      "learning_rate": 2.313343832159354e-06,
      "loss": 0.0134,
      "step": 2353100
    },
    {
      "epoch": 3.8509324901972337,
      "grad_norm": 0.547838032245636,
      "learning_rate": 2.313277939945837e-06,
      "loss": 0.0143,
      "step": 2353120
    },
    {
      "epoch": 3.850965220635887,
      "grad_norm": 0.3738601803779602,
      "learning_rate": 2.3132120477323196e-06,
      "loss": 0.0111,
      "step": 2353140
    },
    {
      "epoch": 3.8509979510745405,
      "grad_norm": 0.13561570644378662,
      "learning_rate": 2.3131461555188028e-06,
      "loss": 0.0072,
      "step": 2353160
    },
    {
      "epoch": 3.8510306815131936,
      "grad_norm": 0.6633261442184448,
      "learning_rate": 2.3130802633052855e-06,
      "loss": 0.0174,
      "step": 2353180
    },
    {
      "epoch": 3.8510634119518468,
      "grad_norm": 0.08107171207666397,
      "learning_rate": 2.3130143710917682e-06,
      "loss": 0.0061,
      "step": 2353200
    },
    {
      "epoch": 3.8510961423905004,
      "grad_norm": 0.45235511660575867,
      "learning_rate": 2.312948478878251e-06,
      "loss": 0.0098,
      "step": 2353220
    },
    {
      "epoch": 3.8511288728291535,
      "grad_norm": 0.17106756567955017,
      "learning_rate": 2.3128825866647337e-06,
      "loss": 0.0089,
      "step": 2353240
    },
    {
      "epoch": 3.851161603267807,
      "grad_norm": 0.4993264377117157,
      "learning_rate": 2.312816694451217e-06,
      "loss": 0.0123,
      "step": 2353260
    },
    {
      "epoch": 3.8511943337064602,
      "grad_norm": 0.3443540632724762,
      "learning_rate": 2.3127508022377e-06,
      "loss": 0.0128,
      "step": 2353280
    },
    {
      "epoch": 3.851227064145114,
      "grad_norm": 0.3882448971271515,
      "learning_rate": 2.312684910024183e-06,
      "loss": 0.0152,
      "step": 2353300
    },
    {
      "epoch": 3.851259794583767,
      "grad_norm": 0.49452194571495056,
      "learning_rate": 2.3126190178106655e-06,
      "loss": 0.0134,
      "step": 2353320
    },
    {
      "epoch": 3.85129252502242,
      "grad_norm": 0.30172333121299744,
      "learning_rate": 2.3125531255971483e-06,
      "loss": 0.0092,
      "step": 2353340
    },
    {
      "epoch": 3.8513252554610737,
      "grad_norm": 0.20497702062129974,
      "learning_rate": 2.3124872333836314e-06,
      "loss": 0.0071,
      "step": 2353360
    },
    {
      "epoch": 3.851357985899727,
      "grad_norm": 0.21906372904777527,
      "learning_rate": 2.312421341170114e-06,
      "loss": 0.0098,
      "step": 2353380
    },
    {
      "epoch": 3.8513907163383805,
      "grad_norm": 0.09505857527256012,
      "learning_rate": 2.312355448956597e-06,
      "loss": 0.0064,
      "step": 2353400
    },
    {
      "epoch": 3.8514234467770336,
      "grad_norm": 0.1325642168521881,
      "learning_rate": 2.3122895567430797e-06,
      "loss": 0.0066,
      "step": 2353420
    },
    {
      "epoch": 3.8514561772156872,
      "grad_norm": 0.0957077294588089,
      "learning_rate": 2.3122236645295624e-06,
      "loss": 0.0085,
      "step": 2353440
    },
    {
      "epoch": 3.8514889076543404,
      "grad_norm": 0.12760423123836517,
      "learning_rate": 2.3121577723160456e-06,
      "loss": 0.0082,
      "step": 2353460
    },
    {
      "epoch": 3.8515216380929935,
      "grad_norm": 0.09583011269569397,
      "learning_rate": 2.3120918801025287e-06,
      "loss": 0.0102,
      "step": 2353480
    },
    {
      "epoch": 3.851554368531647,
      "grad_norm": 0.11707720905542374,
      "learning_rate": 2.3120259878890115e-06,
      "loss": 0.0096,
      "step": 2353500
    },
    {
      "epoch": 3.8515870989703003,
      "grad_norm": 0.14436812698841095,
      "learning_rate": 2.311960095675494e-06,
      "loss": 0.0052,
      "step": 2353520
    },
    {
      "epoch": 3.851619829408954,
      "grad_norm": 0.22689227759838104,
      "learning_rate": 2.311894203461977e-06,
      "loss": 0.0099,
      "step": 2353540
    },
    {
      "epoch": 3.851652559847607,
      "grad_norm": 0.18174542486667633,
      "learning_rate": 2.31182831124846e-06,
      "loss": 0.0115,
      "step": 2353560
    },
    {
      "epoch": 3.8516852902862606,
      "grad_norm": 0.4529491662979126,
      "learning_rate": 2.311762419034943e-06,
      "loss": 0.0123,
      "step": 2353580
    },
    {
      "epoch": 3.8517180207249138,
      "grad_norm": 0.06860651075839996,
      "learning_rate": 2.3116965268214256e-06,
      "loss": 0.0076,
      "step": 2353600
    },
    {
      "epoch": 3.851750751163567,
      "grad_norm": 0.5312148928642273,
      "learning_rate": 2.3116306346079087e-06,
      "loss": 0.0101,
      "step": 2353620
    },
    {
      "epoch": 3.8517834816022205,
      "grad_norm": 0.3344833254814148,
      "learning_rate": 2.3115647423943915e-06,
      "loss": 0.0091,
      "step": 2353640
    },
    {
      "epoch": 3.8518162120408737,
      "grad_norm": 0.12003882229328156,
      "learning_rate": 2.3114988501808742e-06,
      "loss": 0.009,
      "step": 2353660
    },
    {
      "epoch": 3.8518489424795272,
      "grad_norm": 0.3136129677295685,
      "learning_rate": 2.3114329579673574e-06,
      "loss": 0.0128,
      "step": 2353680
    },
    {
      "epoch": 3.8518816729181804,
      "grad_norm": 0.2214430421590805,
      "learning_rate": 2.31136706575384e-06,
      "loss": 0.0094,
      "step": 2353700
    },
    {
      "epoch": 3.851914403356834,
      "grad_norm": 0.1994975358247757,
      "learning_rate": 2.311301173540323e-06,
      "loss": 0.0121,
      "step": 2353720
    },
    {
      "epoch": 3.851947133795487,
      "grad_norm": 0.4566243290901184,
      "learning_rate": 2.3112352813268056e-06,
      "loss": 0.0107,
      "step": 2353740
    },
    {
      "epoch": 3.8519798642341403,
      "grad_norm": 0.35220029950141907,
      "learning_rate": 2.3111693891132888e-06,
      "loss": 0.0123,
      "step": 2353760
    },
    {
      "epoch": 3.852012594672794,
      "grad_norm": 0.47707071900367737,
      "learning_rate": 2.3111034968997715e-06,
      "loss": 0.0128,
      "step": 2353780
    },
    {
      "epoch": 3.852045325111447,
      "grad_norm": 0.22144922614097595,
      "learning_rate": 2.3110376046862547e-06,
      "loss": 0.0107,
      "step": 2353800
    },
    {
      "epoch": 3.8520780555501006,
      "grad_norm": 0.3386795222759247,
      "learning_rate": 2.3109717124727374e-06,
      "loss": 0.0128,
      "step": 2353820
    },
    {
      "epoch": 3.852110785988754,
      "grad_norm": 1.2561014890670776,
      "learning_rate": 2.31090582025922e-06,
      "loss": 0.0166,
      "step": 2353840
    },
    {
      "epoch": 3.8521435164274074,
      "grad_norm": 0.4368143081665039,
      "learning_rate": 2.310839928045703e-06,
      "loss": 0.0084,
      "step": 2353860
    },
    {
      "epoch": 3.8521762468660605,
      "grad_norm": 0.14166444540023804,
      "learning_rate": 2.310774035832186e-06,
      "loss": 0.0111,
      "step": 2353880
    },
    {
      "epoch": 3.8522089773047137,
      "grad_norm": 0.14946791529655457,
      "learning_rate": 2.310708143618669e-06,
      "loss": 0.0119,
      "step": 2353900
    },
    {
      "epoch": 3.8522417077433673,
      "grad_norm": 0.09176935255527496,
      "learning_rate": 2.3106422514051515e-06,
      "loss": 0.0079,
      "step": 2353920
    },
    {
      "epoch": 3.8522744381820204,
      "grad_norm": 0.36075255274772644,
      "learning_rate": 2.3105763591916343e-06,
      "loss": 0.0086,
      "step": 2353940
    },
    {
      "epoch": 3.852307168620674,
      "grad_norm": 2.507753610610962,
      "learning_rate": 2.3105104669781174e-06,
      "loss": 0.012,
      "step": 2353960
    },
    {
      "epoch": 3.852339899059327,
      "grad_norm": 0.18475262820720673,
      "learning_rate": 2.3104445747646e-06,
      "loss": 0.0095,
      "step": 2353980
    },
    {
      "epoch": 3.8523726294979808,
      "grad_norm": 0.07728204131126404,
      "learning_rate": 2.3103786825510833e-06,
      "loss": 0.008,
      "step": 2354000
    },
    {
      "epoch": 3.852405359936634,
      "grad_norm": 0.658164918422699,
      "learning_rate": 2.310312790337566e-06,
      "loss": 0.0085,
      "step": 2354020
    },
    {
      "epoch": 3.852438090375287,
      "grad_norm": 0.28758230805397034,
      "learning_rate": 2.310246898124049e-06,
      "loss": 0.0103,
      "step": 2354040
    },
    {
      "epoch": 3.8524708208139407,
      "grad_norm": 0.47278568148612976,
      "learning_rate": 2.3101810059105316e-06,
      "loss": 0.0127,
      "step": 2354060
    },
    {
      "epoch": 3.852503551252594,
      "grad_norm": 0.18323415517807007,
      "learning_rate": 2.3101151136970147e-06,
      "loss": 0.0113,
      "step": 2354080
    },
    {
      "epoch": 3.852536281691247,
      "grad_norm": 0.09317050129175186,
      "learning_rate": 2.3100492214834975e-06,
      "loss": 0.0082,
      "step": 2354100
    },
    {
      "epoch": 3.8525690121299005,
      "grad_norm": 0.09140671044588089,
      "learning_rate": 2.30998332926998e-06,
      "loss": 0.0085,
      "step": 2354120
    },
    {
      "epoch": 3.852601742568554,
      "grad_norm": 0.17985861003398895,
      "learning_rate": 2.309917437056463e-06,
      "loss": 0.0104,
      "step": 2354140
    },
    {
      "epoch": 3.8526344730072073,
      "grad_norm": 0.21209003031253815,
      "learning_rate": 2.309851544842946e-06,
      "loss": 0.0084,
      "step": 2354160
    },
    {
      "epoch": 3.8526672034458604,
      "grad_norm": 0.16931773722171783,
      "learning_rate": 2.309785652629429e-06,
      "loss": 0.0078,
      "step": 2354180
    },
    {
      "epoch": 3.852699933884514,
      "grad_norm": 0.26862823963165283,
      "learning_rate": 2.309719760415912e-06,
      "loss": 0.0112,
      "step": 2354200
    },
    {
      "epoch": 3.852732664323167,
      "grad_norm": 0.268776535987854,
      "learning_rate": 2.3096538682023948e-06,
      "loss": 0.0059,
      "step": 2354220
    },
    {
      "epoch": 3.8527653947618203,
      "grad_norm": 0.15174934267997742,
      "learning_rate": 2.3095879759888775e-06,
      "loss": 0.0077,
      "step": 2354240
    },
    {
      "epoch": 3.852798125200474,
      "grad_norm": 0.23889008164405823,
      "learning_rate": 2.3095220837753602e-06,
      "loss": 0.0072,
      "step": 2354260
    },
    {
      "epoch": 3.8528308556391275,
      "grad_norm": 0.09546550363302231,
      "learning_rate": 2.3094561915618434e-06,
      "loss": 0.0128,
      "step": 2354280
    },
    {
      "epoch": 3.8528635860777807,
      "grad_norm": 0.06293882429599762,
      "learning_rate": 2.309390299348326e-06,
      "loss": 0.0105,
      "step": 2354300
    },
    {
      "epoch": 3.852896316516434,
      "grad_norm": 0.20986764132976532,
      "learning_rate": 2.309324407134809e-06,
      "loss": 0.0106,
      "step": 2354320
    },
    {
      "epoch": 3.8529290469550874,
      "grad_norm": 0.07537820935249329,
      "learning_rate": 2.309258514921292e-06,
      "loss": 0.0073,
      "step": 2354340
    },
    {
      "epoch": 3.8529617773937406,
      "grad_norm": 0.46420755982398987,
      "learning_rate": 2.3091926227077748e-06,
      "loss": 0.0126,
      "step": 2354360
    },
    {
      "epoch": 3.8529945078323937,
      "grad_norm": 0.22867853939533234,
      "learning_rate": 2.309126730494258e-06,
      "loss": 0.0079,
      "step": 2354380
    },
    {
      "epoch": 3.8530272382710473,
      "grad_norm": 0.38006332516670227,
      "learning_rate": 2.3090608382807407e-06,
      "loss": 0.0107,
      "step": 2354400
    },
    {
      "epoch": 3.853059968709701,
      "grad_norm": 0.12053186446428299,
      "learning_rate": 2.3089949460672234e-06,
      "loss": 0.0134,
      "step": 2354420
    },
    {
      "epoch": 3.853092699148354,
      "grad_norm": 0.22973522543907166,
      "learning_rate": 2.308929053853706e-06,
      "loss": 0.0073,
      "step": 2354440
    },
    {
      "epoch": 3.853125429587007,
      "grad_norm": 0.11668256670236588,
      "learning_rate": 2.308863161640189e-06,
      "loss": 0.0145,
      "step": 2354460
    },
    {
      "epoch": 3.853158160025661,
      "grad_norm": 0.35804349184036255,
      "learning_rate": 2.308797269426672e-06,
      "loss": 0.0113,
      "step": 2354480
    },
    {
      "epoch": 3.853190890464314,
      "grad_norm": 0.4473264515399933,
      "learning_rate": 2.3087313772131552e-06,
      "loss": 0.0171,
      "step": 2354500
    },
    {
      "epoch": 3.853223620902967,
      "grad_norm": 0.5799703598022461,
      "learning_rate": 2.308665484999638e-06,
      "loss": 0.0088,
      "step": 2354520
    },
    {
      "epoch": 3.8532563513416207,
      "grad_norm": 0.44921404123306274,
      "learning_rate": 2.3085995927861207e-06,
      "loss": 0.012,
      "step": 2354540
    },
    {
      "epoch": 3.8532890817802743,
      "grad_norm": 0.15277472138404846,
      "learning_rate": 2.3085337005726034e-06,
      "loss": 0.0139,
      "step": 2354560
    },
    {
      "epoch": 3.8533218122189274,
      "grad_norm": 0.266012042760849,
      "learning_rate": 2.3084678083590866e-06,
      "loss": 0.0099,
      "step": 2354580
    },
    {
      "epoch": 3.8533545426575806,
      "grad_norm": 0.8714088797569275,
      "learning_rate": 2.3084019161455693e-06,
      "loss": 0.0115,
      "step": 2354600
    },
    {
      "epoch": 3.853387273096234,
      "grad_norm": 0.14682544767856598,
      "learning_rate": 2.308336023932052e-06,
      "loss": 0.0093,
      "step": 2354620
    },
    {
      "epoch": 3.8534200035348873,
      "grad_norm": 0.1400078386068344,
      "learning_rate": 2.308270131718535e-06,
      "loss": 0.0099,
      "step": 2354640
    },
    {
      "epoch": 3.8534527339735405,
      "grad_norm": 0.17936541140079498,
      "learning_rate": 2.3082042395050176e-06,
      "loss": 0.0085,
      "step": 2354660
    },
    {
      "epoch": 3.853485464412194,
      "grad_norm": 0.18406303226947784,
      "learning_rate": 2.3081383472915007e-06,
      "loss": 0.0092,
      "step": 2354680
    },
    {
      "epoch": 3.8535181948508472,
      "grad_norm": 0.23320503532886505,
      "learning_rate": 2.308072455077984e-06,
      "loss": 0.0106,
      "step": 2354700
    },
    {
      "epoch": 3.853550925289501,
      "grad_norm": 0.17766760289669037,
      "learning_rate": 2.3080065628644666e-06,
      "loss": 0.0109,
      "step": 2354720
    },
    {
      "epoch": 3.853583655728154,
      "grad_norm": 0.390737920999527,
      "learning_rate": 2.3079406706509494e-06,
      "loss": 0.0117,
      "step": 2354740
    },
    {
      "epoch": 3.8536163861668076,
      "grad_norm": 0.49158114194869995,
      "learning_rate": 2.307874778437432e-06,
      "loss": 0.0116,
      "step": 2354760
    },
    {
      "epoch": 3.8536491166054607,
      "grad_norm": 1.0155531167984009,
      "learning_rate": 2.3078088862239153e-06,
      "loss": 0.0084,
      "step": 2354780
    },
    {
      "epoch": 3.853681847044114,
      "grad_norm": 2.215184450149536,
      "learning_rate": 2.307742994010398e-06,
      "loss": 0.0129,
      "step": 2354800
    },
    {
      "epoch": 3.8537145774827675,
      "grad_norm": 0.14721348881721497,
      "learning_rate": 2.3076771017968808e-06,
      "loss": 0.0106,
      "step": 2354820
    },
    {
      "epoch": 3.8537473079214206,
      "grad_norm": 0.24974451959133148,
      "learning_rate": 2.3076112095833635e-06,
      "loss": 0.0137,
      "step": 2354840
    },
    {
      "epoch": 3.853780038360074,
      "grad_norm": 0.3751016855239868,
      "learning_rate": 2.3075453173698467e-06,
      "loss": 0.0122,
      "step": 2354860
    },
    {
      "epoch": 3.8538127687987274,
      "grad_norm": 0.2584444582462311,
      "learning_rate": 2.3074794251563294e-06,
      "loss": 0.0077,
      "step": 2354880
    },
    {
      "epoch": 3.853845499237381,
      "grad_norm": 0.4680464267730713,
      "learning_rate": 2.3074135329428126e-06,
      "loss": 0.0115,
      "step": 2354900
    },
    {
      "epoch": 3.853878229676034,
      "grad_norm": 0.1471012383699417,
      "learning_rate": 2.3073476407292953e-06,
      "loss": 0.0096,
      "step": 2354920
    },
    {
      "epoch": 3.8539109601146873,
      "grad_norm": 0.306554913520813,
      "learning_rate": 2.307281748515778e-06,
      "loss": 0.0082,
      "step": 2354940
    },
    {
      "epoch": 3.853943690553341,
      "grad_norm": 0.2634322941303253,
      "learning_rate": 2.3072158563022608e-06,
      "loss": 0.0111,
      "step": 2354960
    },
    {
      "epoch": 3.853976420991994,
      "grad_norm": 0.18444691598415375,
      "learning_rate": 2.307149964088744e-06,
      "loss": 0.0088,
      "step": 2354980
    },
    {
      "epoch": 3.8540091514306476,
      "grad_norm": 0.5126745700836182,
      "learning_rate": 2.3070840718752267e-06,
      "loss": 0.0134,
      "step": 2355000
    },
    {
      "epoch": 3.8540418818693007,
      "grad_norm": 0.19069115817546844,
      "learning_rate": 2.3070181796617094e-06,
      "loss": 0.0114,
      "step": 2355020
    },
    {
      "epoch": 3.8540746123079543,
      "grad_norm": 0.12373524159193039,
      "learning_rate": 2.3069522874481926e-06,
      "loss": 0.0083,
      "step": 2355040
    },
    {
      "epoch": 3.8541073427466075,
      "grad_norm": 0.8146402835845947,
      "learning_rate": 2.3068863952346753e-06,
      "loss": 0.0145,
      "step": 2355060
    },
    {
      "epoch": 3.8541400731852606,
      "grad_norm": 0.25878599286079407,
      "learning_rate": 2.306820503021158e-06,
      "loss": 0.0119,
      "step": 2355080
    },
    {
      "epoch": 3.8541728036239142,
      "grad_norm": 0.7208564281463623,
      "learning_rate": 2.3067546108076412e-06,
      "loss": 0.016,
      "step": 2355100
    },
    {
      "epoch": 3.8542055340625674,
      "grad_norm": 0.08826135843992233,
      "learning_rate": 2.306688718594124e-06,
      "loss": 0.0073,
      "step": 2355120
    },
    {
      "epoch": 3.854238264501221,
      "grad_norm": 0.11981964111328125,
      "learning_rate": 2.3066228263806067e-06,
      "loss": 0.0089,
      "step": 2355140
    },
    {
      "epoch": 3.854270994939874,
      "grad_norm": 0.2387704998254776,
      "learning_rate": 2.3065569341670894e-06,
      "loss": 0.0086,
      "step": 2355160
    },
    {
      "epoch": 3.8543037253785277,
      "grad_norm": 0.1588667780160904,
      "learning_rate": 2.3064910419535726e-06,
      "loss": 0.0103,
      "step": 2355180
    },
    {
      "epoch": 3.854336455817181,
      "grad_norm": 0.3037683367729187,
      "learning_rate": 2.3064251497400554e-06,
      "loss": 0.0101,
      "step": 2355200
    },
    {
      "epoch": 3.854369186255834,
      "grad_norm": 0.6077982187271118,
      "learning_rate": 2.3063592575265385e-06,
      "loss": 0.0106,
      "step": 2355220
    },
    {
      "epoch": 3.8544019166944876,
      "grad_norm": 0.3753747045993805,
      "learning_rate": 2.3062933653130213e-06,
      "loss": 0.0115,
      "step": 2355240
    },
    {
      "epoch": 3.8544346471331408,
      "grad_norm": 0.28388598561286926,
      "learning_rate": 2.306227473099504e-06,
      "loss": 0.0124,
      "step": 2355260
    },
    {
      "epoch": 3.8544673775717944,
      "grad_norm": 0.10908201336860657,
      "learning_rate": 2.3061615808859867e-06,
      "loss": 0.0151,
      "step": 2355280
    },
    {
      "epoch": 3.8545001080104475,
      "grad_norm": 0.11389819532632828,
      "learning_rate": 2.30609568867247e-06,
      "loss": 0.0123,
      "step": 2355300
    },
    {
      "epoch": 3.854532838449101,
      "grad_norm": 0.32761165499687195,
      "learning_rate": 2.3060297964589526e-06,
      "loss": 0.012,
      "step": 2355320
    },
    {
      "epoch": 3.8545655688877543,
      "grad_norm": 0.1331414133310318,
      "learning_rate": 2.3059639042454354e-06,
      "loss": 0.0087,
      "step": 2355340
    },
    {
      "epoch": 3.8545982993264074,
      "grad_norm": 0.18311919271945953,
      "learning_rate": 2.305898012031918e-06,
      "loss": 0.0116,
      "step": 2355360
    },
    {
      "epoch": 3.854631029765061,
      "grad_norm": 0.22628989815711975,
      "learning_rate": 2.3058321198184013e-06,
      "loss": 0.0096,
      "step": 2355380
    },
    {
      "epoch": 3.854663760203714,
      "grad_norm": 0.30522289872169495,
      "learning_rate": 2.305766227604884e-06,
      "loss": 0.0107,
      "step": 2355400
    },
    {
      "epoch": 3.8546964906423677,
      "grad_norm": 0.17357812821865082,
      "learning_rate": 2.305700335391367e-06,
      "loss": 0.0102,
      "step": 2355420
    },
    {
      "epoch": 3.854729221081021,
      "grad_norm": 0.3103189766407013,
      "learning_rate": 2.30563444317785e-06,
      "loss": 0.0108,
      "step": 2355440
    },
    {
      "epoch": 3.8547619515196745,
      "grad_norm": 0.2931194305419922,
      "learning_rate": 2.3055685509643327e-06,
      "loss": 0.0124,
      "step": 2355460
    },
    {
      "epoch": 3.8547946819583276,
      "grad_norm": 0.6132599115371704,
      "learning_rate": 2.3055026587508154e-06,
      "loss": 0.0097,
      "step": 2355480
    },
    {
      "epoch": 3.854827412396981,
      "grad_norm": 0.18969453871250153,
      "learning_rate": 2.3054367665372986e-06,
      "loss": 0.0093,
      "step": 2355500
    },
    {
      "epoch": 3.8548601428356344,
      "grad_norm": 0.3203756809234619,
      "learning_rate": 2.3053708743237813e-06,
      "loss": 0.0114,
      "step": 2355520
    },
    {
      "epoch": 3.8548928732742875,
      "grad_norm": 0.37079933285713196,
      "learning_rate": 2.305304982110264e-06,
      "loss": 0.0084,
      "step": 2355540
    },
    {
      "epoch": 3.8549256037129407,
      "grad_norm": 0.5302944779396057,
      "learning_rate": 2.305239089896747e-06,
      "loss": 0.008,
      "step": 2355560
    },
    {
      "epoch": 3.8549583341515943,
      "grad_norm": 0.28207793831825256,
      "learning_rate": 2.30517319768323e-06,
      "loss": 0.0102,
      "step": 2355580
    },
    {
      "epoch": 3.854991064590248,
      "grad_norm": 0.3083360195159912,
      "learning_rate": 2.3051073054697127e-06,
      "loss": 0.0089,
      "step": 2355600
    },
    {
      "epoch": 3.855023795028901,
      "grad_norm": 0.5931931734085083,
      "learning_rate": 2.305041413256196e-06,
      "loss": 0.0131,
      "step": 2355620
    },
    {
      "epoch": 3.855056525467554,
      "grad_norm": 0.23371952772140503,
      "learning_rate": 2.3049755210426786e-06,
      "loss": 0.0076,
      "step": 2355640
    },
    {
      "epoch": 3.8550892559062078,
      "grad_norm": 0.3646777868270874,
      "learning_rate": 2.3049096288291613e-06,
      "loss": 0.0122,
      "step": 2355660
    },
    {
      "epoch": 3.855121986344861,
      "grad_norm": 0.35023635625839233,
      "learning_rate": 2.304843736615644e-06,
      "loss": 0.0062,
      "step": 2355680
    },
    {
      "epoch": 3.855154716783514,
      "grad_norm": 0.5802974104881287,
      "learning_rate": 2.3047778444021272e-06,
      "loss": 0.009,
      "step": 2355700
    },
    {
      "epoch": 3.8551874472221677,
      "grad_norm": 0.34530526399612427,
      "learning_rate": 2.30471195218861e-06,
      "loss": 0.0107,
      "step": 2355720
    },
    {
      "epoch": 3.8552201776608213,
      "grad_norm": 0.16380071640014648,
      "learning_rate": 2.304646059975093e-06,
      "loss": 0.0108,
      "step": 2355740
    },
    {
      "epoch": 3.8552529080994744,
      "grad_norm": 0.16905878484249115,
      "learning_rate": 2.304580167761576e-06,
      "loss": 0.0105,
      "step": 2355760
    },
    {
      "epoch": 3.8552856385381276,
      "grad_norm": 0.18078561127185822,
      "learning_rate": 2.3045142755480586e-06,
      "loss": 0.0147,
      "step": 2355780
    },
    {
      "epoch": 3.855318368976781,
      "grad_norm": 0.5991483926773071,
      "learning_rate": 2.3044483833345418e-06,
      "loss": 0.0129,
      "step": 2355800
    },
    {
      "epoch": 3.8553510994154343,
      "grad_norm": 0.5866791009902954,
      "learning_rate": 2.3043824911210245e-06,
      "loss": 0.0131,
      "step": 2355820
    },
    {
      "epoch": 3.8553838298540875,
      "grad_norm": 1.085829496383667,
      "learning_rate": 2.3043165989075073e-06,
      "loss": 0.0119,
      "step": 2355840
    },
    {
      "epoch": 3.855416560292741,
      "grad_norm": 0.8768782615661621,
      "learning_rate": 2.30425070669399e-06,
      "loss": 0.0085,
      "step": 2355860
    },
    {
      "epoch": 3.8554492907313946,
      "grad_norm": 0.10094017535448074,
      "learning_rate": 2.3041848144804727e-06,
      "loss": 0.0101,
      "step": 2355880
    },
    {
      "epoch": 3.855482021170048,
      "grad_norm": 0.3800349831581116,
      "learning_rate": 2.304118922266956e-06,
      "loss": 0.0174,
      "step": 2355900
    },
    {
      "epoch": 3.855514751608701,
      "grad_norm": 0.2509084641933441,
      "learning_rate": 2.304053030053439e-06,
      "loss": 0.0124,
      "step": 2355920
    },
    {
      "epoch": 3.8555474820473545,
      "grad_norm": 0.10851830989122391,
      "learning_rate": 2.303987137839922e-06,
      "loss": 0.0099,
      "step": 2355940
    },
    {
      "epoch": 3.8555802124860077,
      "grad_norm": 0.8135757446289062,
      "learning_rate": 2.3039212456264045e-06,
      "loss": 0.0096,
      "step": 2355960
    },
    {
      "epoch": 3.855612942924661,
      "grad_norm": 0.32877683639526367,
      "learning_rate": 2.3038553534128873e-06,
      "loss": 0.01,
      "step": 2355980
    },
    {
      "epoch": 3.8556456733633144,
      "grad_norm": 0.26800405979156494,
      "learning_rate": 2.3037894611993704e-06,
      "loss": 0.0103,
      "step": 2356000
    },
    {
      "epoch": 3.855678403801968,
      "grad_norm": 0.3633086681365967,
      "learning_rate": 2.303723568985853e-06,
      "loss": 0.0125,
      "step": 2356020
    },
    {
      "epoch": 3.855711134240621,
      "grad_norm": 0.2984689474105835,
      "learning_rate": 2.303657676772336e-06,
      "loss": 0.0154,
      "step": 2356040
    },
    {
      "epoch": 3.8557438646792743,
      "grad_norm": 0.17198045551776886,
      "learning_rate": 2.3035917845588187e-06,
      "loss": 0.0109,
      "step": 2356060
    },
    {
      "epoch": 3.855776595117928,
      "grad_norm": 0.5474488139152527,
      "learning_rate": 2.3035258923453014e-06,
      "loss": 0.0117,
      "step": 2356080
    },
    {
      "epoch": 3.855809325556581,
      "grad_norm": 0.177317276597023,
      "learning_rate": 2.3034600001317846e-06,
      "loss": 0.0136,
      "step": 2356100
    },
    {
      "epoch": 3.855842055995234,
      "grad_norm": 0.464041531085968,
      "learning_rate": 2.3033941079182677e-06,
      "loss": 0.0142,
      "step": 2356120
    },
    {
      "epoch": 3.855874786433888,
      "grad_norm": 0.4057799279689789,
      "learning_rate": 2.3033282157047505e-06,
      "loss": 0.0137,
      "step": 2356140
    },
    {
      "epoch": 3.855907516872541,
      "grad_norm": 0.20535318553447723,
      "learning_rate": 2.3032623234912332e-06,
      "loss": 0.0104,
      "step": 2356160
    },
    {
      "epoch": 3.8559402473111946,
      "grad_norm": 0.2794589698314667,
      "learning_rate": 2.303196431277716e-06,
      "loss": 0.007,
      "step": 2356180
    },
    {
      "epoch": 3.8559729777498477,
      "grad_norm": 0.40975260734558105,
      "learning_rate": 2.303130539064199e-06,
      "loss": 0.0132,
      "step": 2356200
    },
    {
      "epoch": 3.8560057081885013,
      "grad_norm": 0.49947473406791687,
      "learning_rate": 2.303064646850682e-06,
      "loss": 0.0134,
      "step": 2356220
    },
    {
      "epoch": 3.8560384386271545,
      "grad_norm": 0.26194947957992554,
      "learning_rate": 2.3029987546371646e-06,
      "loss": 0.014,
      "step": 2356240
    },
    {
      "epoch": 3.8560711690658076,
      "grad_norm": 0.4276587665081024,
      "learning_rate": 2.3029328624236478e-06,
      "loss": 0.0111,
      "step": 2356260
    },
    {
      "epoch": 3.856103899504461,
      "grad_norm": 0.09542853385210037,
      "learning_rate": 2.3028669702101305e-06,
      "loss": 0.0125,
      "step": 2356280
    },
    {
      "epoch": 3.8561366299431143,
      "grad_norm": 0.34229668974876404,
      "learning_rate": 2.3028010779966132e-06,
      "loss": 0.0131,
      "step": 2356300
    },
    {
      "epoch": 3.856169360381768,
      "grad_norm": 0.2423788160085678,
      "learning_rate": 2.3027351857830964e-06,
      "loss": 0.0066,
      "step": 2356320
    },
    {
      "epoch": 3.856202090820421,
      "grad_norm": 0.14242202043533325,
      "learning_rate": 2.302669293569579e-06,
      "loss": 0.0083,
      "step": 2356340
    },
    {
      "epoch": 3.8562348212590747,
      "grad_norm": 0.09906360507011414,
      "learning_rate": 2.302603401356062e-06,
      "loss": 0.0085,
      "step": 2356360
    },
    {
      "epoch": 3.856267551697728,
      "grad_norm": 0.33070507645606995,
      "learning_rate": 2.3025375091425446e-06,
      "loss": 0.0082,
      "step": 2356380
    },
    {
      "epoch": 3.856300282136381,
      "grad_norm": 0.17656850814819336,
      "learning_rate": 2.3024716169290278e-06,
      "loss": 0.012,
      "step": 2356400
    },
    {
      "epoch": 3.8563330125750346,
      "grad_norm": 0.21705234050750732,
      "learning_rate": 2.3024057247155105e-06,
      "loss": 0.0122,
      "step": 2356420
    },
    {
      "epoch": 3.8563657430136877,
      "grad_norm": 0.2659977674484253,
      "learning_rate": 2.3023398325019937e-06,
      "loss": 0.0127,
      "step": 2356440
    },
    {
      "epoch": 3.8563984734523413,
      "grad_norm": 0.09013159573078156,
      "learning_rate": 2.3022739402884764e-06,
      "loss": 0.0074,
      "step": 2356460
    },
    {
      "epoch": 3.8564312038909945,
      "grad_norm": 0.33164530992507935,
      "learning_rate": 2.302208048074959e-06,
      "loss": 0.014,
      "step": 2356480
    },
    {
      "epoch": 3.856463934329648,
      "grad_norm": 0.3875247836112976,
      "learning_rate": 2.302142155861442e-06,
      "loss": 0.0072,
      "step": 2356500
    },
    {
      "epoch": 3.856496664768301,
      "grad_norm": 0.11285804212093353,
      "learning_rate": 2.302076263647925e-06,
      "loss": 0.0107,
      "step": 2356520
    },
    {
      "epoch": 3.8565293952069544,
      "grad_norm": 0.14988401532173157,
      "learning_rate": 2.302010371434408e-06,
      "loss": 0.0163,
      "step": 2356540
    },
    {
      "epoch": 3.856562125645608,
      "grad_norm": 0.26496583223342896,
      "learning_rate": 2.3019444792208905e-06,
      "loss": 0.0109,
      "step": 2356560
    },
    {
      "epoch": 3.856594856084261,
      "grad_norm": 0.36763718724250793,
      "learning_rate": 2.3018785870073733e-06,
      "loss": 0.0096,
      "step": 2356580
    },
    {
      "epoch": 3.8566275865229147,
      "grad_norm": 0.2541849613189697,
      "learning_rate": 2.3018126947938565e-06,
      "loss": 0.0138,
      "step": 2356600
    },
    {
      "epoch": 3.856660316961568,
      "grad_norm": 0.21511726081371307,
      "learning_rate": 2.301746802580339e-06,
      "loss": 0.0078,
      "step": 2356620
    },
    {
      "epoch": 3.8566930474002215,
      "grad_norm": 0.15247273445129395,
      "learning_rate": 2.3016809103668224e-06,
      "loss": 0.0151,
      "step": 2356640
    },
    {
      "epoch": 3.8567257778388746,
      "grad_norm": 0.1352740228176117,
      "learning_rate": 2.301615018153305e-06,
      "loss": 0.0099,
      "step": 2356660
    },
    {
      "epoch": 3.8567585082775278,
      "grad_norm": 0.156251460313797,
      "learning_rate": 2.301549125939788e-06,
      "loss": 0.0108,
      "step": 2356680
    },
    {
      "epoch": 3.8567912387161813,
      "grad_norm": 0.29439517855644226,
      "learning_rate": 2.3014832337262706e-06,
      "loss": 0.0124,
      "step": 2356700
    },
    {
      "epoch": 3.8568239691548345,
      "grad_norm": 0.1259237676858902,
      "learning_rate": 2.3014173415127537e-06,
      "loss": 0.0082,
      "step": 2356720
    },
    {
      "epoch": 3.856856699593488,
      "grad_norm": 0.10121724009513855,
      "learning_rate": 2.3013514492992365e-06,
      "loss": 0.0131,
      "step": 2356740
    },
    {
      "epoch": 3.8568894300321412,
      "grad_norm": 0.20259903371334076,
      "learning_rate": 2.3012855570857192e-06,
      "loss": 0.0163,
      "step": 2356760
    },
    {
      "epoch": 3.856922160470795,
      "grad_norm": 0.30093884468078613,
      "learning_rate": 2.301219664872202e-06,
      "loss": 0.0095,
      "step": 2356780
    },
    {
      "epoch": 3.856954890909448,
      "grad_norm": 0.4468245208263397,
      "learning_rate": 2.301153772658685e-06,
      "loss": 0.0135,
      "step": 2356800
    },
    {
      "epoch": 3.856987621348101,
      "grad_norm": 0.14539441466331482,
      "learning_rate": 2.301087880445168e-06,
      "loss": 0.0141,
      "step": 2356820
    },
    {
      "epoch": 3.8570203517867547,
      "grad_norm": 0.29300278425216675,
      "learning_rate": 2.301021988231651e-06,
      "loss": 0.0093,
      "step": 2356840
    },
    {
      "epoch": 3.857053082225408,
      "grad_norm": 0.1403099149465561,
      "learning_rate": 2.3009560960181338e-06,
      "loss": 0.0106,
      "step": 2356860
    },
    {
      "epoch": 3.8570858126640615,
      "grad_norm": 0.35846400260925293,
      "learning_rate": 2.3008902038046165e-06,
      "loss": 0.008,
      "step": 2356880
    },
    {
      "epoch": 3.8571185431027146,
      "grad_norm": 0.1615796983242035,
      "learning_rate": 2.3008243115910992e-06,
      "loss": 0.0116,
      "step": 2356900
    },
    {
      "epoch": 3.857151273541368,
      "grad_norm": 0.28291234374046326,
      "learning_rate": 2.3007584193775824e-06,
      "loss": 0.0147,
      "step": 2356920
    },
    {
      "epoch": 3.8571840039800214,
      "grad_norm": 0.0946250930428505,
      "learning_rate": 2.300692527164065e-06,
      "loss": 0.0119,
      "step": 2356940
    },
    {
      "epoch": 3.8572167344186745,
      "grad_norm": 0.09342409670352936,
      "learning_rate": 2.300626634950548e-06,
      "loss": 0.0079,
      "step": 2356960
    },
    {
      "epoch": 3.857249464857328,
      "grad_norm": 0.10757779330015182,
      "learning_rate": 2.300560742737031e-06,
      "loss": 0.0132,
      "step": 2356980
    },
    {
      "epoch": 3.8572821952959813,
      "grad_norm": 1.4265711307525635,
      "learning_rate": 2.3004948505235138e-06,
      "loss": 0.0168,
      "step": 2357000
    },
    {
      "epoch": 3.857314925734635,
      "grad_norm": 0.22060973942279816,
      "learning_rate": 2.300428958309997e-06,
      "loss": 0.0111,
      "step": 2357020
    },
    {
      "epoch": 3.857347656173288,
      "grad_norm": 0.6782315969467163,
      "learning_rate": 2.3003630660964797e-06,
      "loss": 0.0128,
      "step": 2357040
    },
    {
      "epoch": 3.8573803866119416,
      "grad_norm": 0.07911311835050583,
      "learning_rate": 2.3002971738829624e-06,
      "loss": 0.0125,
      "step": 2357060
    },
    {
      "epoch": 3.8574131170505948,
      "grad_norm": 0.4041309356689453,
      "learning_rate": 2.300231281669445e-06,
      "loss": 0.0084,
      "step": 2357080
    },
    {
      "epoch": 3.857445847489248,
      "grad_norm": 0.4000560939311981,
      "learning_rate": 2.300165389455928e-06,
      "loss": 0.0143,
      "step": 2357100
    },
    {
      "epoch": 3.8574785779279015,
      "grad_norm": 0.40438327193260193,
      "learning_rate": 2.300099497242411e-06,
      "loss": 0.0133,
      "step": 2357120
    },
    {
      "epoch": 3.8575113083665546,
      "grad_norm": 0.23724807798862457,
      "learning_rate": 2.3000336050288942e-06,
      "loss": 0.0094,
      "step": 2357140
    },
    {
      "epoch": 3.857544038805208,
      "grad_norm": 0.12411175668239594,
      "learning_rate": 2.299967712815377e-06,
      "loss": 0.0111,
      "step": 2357160
    },
    {
      "epoch": 3.8575767692438614,
      "grad_norm": 0.1864699274301529,
      "learning_rate": 2.2999018206018597e-06,
      "loss": 0.0115,
      "step": 2357180
    },
    {
      "epoch": 3.857609499682515,
      "grad_norm": 0.2300223410129547,
      "learning_rate": 2.2998359283883425e-06,
      "loss": 0.0106,
      "step": 2357200
    },
    {
      "epoch": 3.857642230121168,
      "grad_norm": 0.28559908270835876,
      "learning_rate": 2.2997700361748256e-06,
      "loss": 0.0096,
      "step": 2357220
    },
    {
      "epoch": 3.8576749605598213,
      "grad_norm": 0.8494526743888855,
      "learning_rate": 2.2997041439613084e-06,
      "loss": 0.0137,
      "step": 2357240
    },
    {
      "epoch": 3.857707690998475,
      "grad_norm": 0.4794254004955292,
      "learning_rate": 2.299638251747791e-06,
      "loss": 0.0145,
      "step": 2357260
    },
    {
      "epoch": 3.857740421437128,
      "grad_norm": 0.3049890100955963,
      "learning_rate": 2.299572359534274e-06,
      "loss": 0.014,
      "step": 2357280
    },
    {
      "epoch": 3.857773151875781,
      "grad_norm": 0.10154476761817932,
      "learning_rate": 2.2995064673207566e-06,
      "loss": 0.0073,
      "step": 2357300
    },
    {
      "epoch": 3.8578058823144348,
      "grad_norm": 0.18060599267482758,
      "learning_rate": 2.2994405751072397e-06,
      "loss": 0.0051,
      "step": 2357320
    },
    {
      "epoch": 3.8578386127530884,
      "grad_norm": 0.14720454812049866,
      "learning_rate": 2.299374682893723e-06,
      "loss": 0.0172,
      "step": 2357340
    },
    {
      "epoch": 3.8578713431917415,
      "grad_norm": 0.11555584520101547,
      "learning_rate": 2.2993087906802056e-06,
      "loss": 0.0115,
      "step": 2357360
    },
    {
      "epoch": 3.8579040736303947,
      "grad_norm": 0.22705064713954926,
      "learning_rate": 2.2992428984666884e-06,
      "loss": 0.0088,
      "step": 2357380
    },
    {
      "epoch": 3.8579368040690483,
      "grad_norm": 0.24420768022537231,
      "learning_rate": 2.299177006253171e-06,
      "loss": 0.0124,
      "step": 2357400
    },
    {
      "epoch": 3.8579695345077014,
      "grad_norm": 0.21323826909065247,
      "learning_rate": 2.2991111140396543e-06,
      "loss": 0.0176,
      "step": 2357420
    },
    {
      "epoch": 3.8580022649463546,
      "grad_norm": 0.5502190589904785,
      "learning_rate": 2.299045221826137e-06,
      "loss": 0.0111,
      "step": 2357440
    },
    {
      "epoch": 3.858034995385008,
      "grad_norm": 0.2528759837150574,
      "learning_rate": 2.2989793296126198e-06,
      "loss": 0.0138,
      "step": 2357460
    },
    {
      "epoch": 3.8580677258236618,
      "grad_norm": 0.2526985704898834,
      "learning_rate": 2.2989134373991025e-06,
      "loss": 0.0161,
      "step": 2357480
    },
    {
      "epoch": 3.858100456262315,
      "grad_norm": 0.29902034997940063,
      "learning_rate": 2.2988475451855857e-06,
      "loss": 0.0055,
      "step": 2357500
    },
    {
      "epoch": 3.858133186700968,
      "grad_norm": 0.30708932876586914,
      "learning_rate": 2.2987816529720684e-06,
      "loss": 0.0178,
      "step": 2357520
    },
    {
      "epoch": 3.8581659171396216,
      "grad_norm": 0.128030925989151,
      "learning_rate": 2.2987157607585516e-06,
      "loss": 0.0109,
      "step": 2357540
    },
    {
      "epoch": 3.858198647578275,
      "grad_norm": 0.3190992772579193,
      "learning_rate": 2.2986498685450343e-06,
      "loss": 0.0095,
      "step": 2357560
    },
    {
      "epoch": 3.858231378016928,
      "grad_norm": 0.41048163175582886,
      "learning_rate": 2.298583976331517e-06,
      "loss": 0.012,
      "step": 2357580
    },
    {
      "epoch": 3.8582641084555815,
      "grad_norm": 0.06304413825273514,
      "learning_rate": 2.298518084118e-06,
      "loss": 0.0141,
      "step": 2357600
    },
    {
      "epoch": 3.858296838894235,
      "grad_norm": 0.8350439667701721,
      "learning_rate": 2.298452191904483e-06,
      "loss": 0.0125,
      "step": 2357620
    },
    {
      "epoch": 3.8583295693328883,
      "grad_norm": 0.15812106430530548,
      "learning_rate": 2.2983862996909657e-06,
      "loss": 0.0112,
      "step": 2357640
    },
    {
      "epoch": 3.8583622997715414,
      "grad_norm": 0.5535070896148682,
      "learning_rate": 2.2983204074774484e-06,
      "loss": 0.0105,
      "step": 2357660
    },
    {
      "epoch": 3.858395030210195,
      "grad_norm": 0.08876442909240723,
      "learning_rate": 2.2982545152639316e-06,
      "loss": 0.0113,
      "step": 2357680
    },
    {
      "epoch": 3.858427760648848,
      "grad_norm": 0.19360221922397614,
      "learning_rate": 2.2981886230504143e-06,
      "loss": 0.0137,
      "step": 2357700
    },
    {
      "epoch": 3.8584604910875013,
      "grad_norm": 0.3907775282859802,
      "learning_rate": 2.298122730836897e-06,
      "loss": 0.0104,
      "step": 2357720
    },
    {
      "epoch": 3.858493221526155,
      "grad_norm": 0.1360573172569275,
      "learning_rate": 2.2980568386233802e-06,
      "loss": 0.0099,
      "step": 2357740
    },
    {
      "epoch": 3.858525951964808,
      "grad_norm": 0.4512840807437897,
      "learning_rate": 2.297990946409863e-06,
      "loss": 0.0125,
      "step": 2357760
    },
    {
      "epoch": 3.8585586824034617,
      "grad_norm": 0.12299764901399612,
      "learning_rate": 2.2979250541963457e-06,
      "loss": 0.0065,
      "step": 2357780
    },
    {
      "epoch": 3.858591412842115,
      "grad_norm": 0.3003409504890442,
      "learning_rate": 2.2978591619828285e-06,
      "loss": 0.0116,
      "step": 2357800
    },
    {
      "epoch": 3.8586241432807684,
      "grad_norm": 0.5414413809776306,
      "learning_rate": 2.2977932697693116e-06,
      "loss": 0.0094,
      "step": 2357820
    },
    {
      "epoch": 3.8586568737194216,
      "grad_norm": 0.09885106980800629,
      "learning_rate": 2.2977273775557944e-06,
      "loss": 0.009,
      "step": 2357840
    },
    {
      "epoch": 3.8586896041580747,
      "grad_norm": 1.0164066553115845,
      "learning_rate": 2.2976614853422775e-06,
      "loss": 0.014,
      "step": 2357860
    },
    {
      "epoch": 3.8587223345967283,
      "grad_norm": 0.27774345874786377,
      "learning_rate": 2.2975955931287603e-06,
      "loss": 0.0078,
      "step": 2357880
    },
    {
      "epoch": 3.8587550650353815,
      "grad_norm": 0.3388840854167938,
      "learning_rate": 2.297529700915243e-06,
      "loss": 0.0084,
      "step": 2357900
    },
    {
      "epoch": 3.858787795474035,
      "grad_norm": 0.5438398718833923,
      "learning_rate": 2.2974638087017257e-06,
      "loss": 0.0138,
      "step": 2357920
    },
    {
      "epoch": 3.858820525912688,
      "grad_norm": 0.22983166575431824,
      "learning_rate": 2.297397916488209e-06,
      "loss": 0.0099,
      "step": 2357940
    },
    {
      "epoch": 3.858853256351342,
      "grad_norm": 0.49453186988830566,
      "learning_rate": 2.2973320242746916e-06,
      "loss": 0.0128,
      "step": 2357960
    },
    {
      "epoch": 3.858885986789995,
      "grad_norm": 0.08909522742033005,
      "learning_rate": 2.2972661320611744e-06,
      "loss": 0.0078,
      "step": 2357980
    },
    {
      "epoch": 3.858918717228648,
      "grad_norm": 0.611301839351654,
      "learning_rate": 2.297200239847657e-06,
      "loss": 0.0106,
      "step": 2358000
    },
    {
      "epoch": 3.8589514476673017,
      "grad_norm": 0.2912842333316803,
      "learning_rate": 2.2971343476341403e-06,
      "loss": 0.0102,
      "step": 2358020
    },
    {
      "epoch": 3.858984178105955,
      "grad_norm": 0.22355414927005768,
      "learning_rate": 2.297068455420623e-06,
      "loss": 0.0097,
      "step": 2358040
    },
    {
      "epoch": 3.8590169085446084,
      "grad_norm": 0.15028929710388184,
      "learning_rate": 2.297002563207106e-06,
      "loss": 0.0108,
      "step": 2358060
    },
    {
      "epoch": 3.8590496389832616,
      "grad_norm": 0.23966176807880402,
      "learning_rate": 2.296936670993589e-06,
      "loss": 0.0067,
      "step": 2358080
    },
    {
      "epoch": 3.859082369421915,
      "grad_norm": 0.5038373470306396,
      "learning_rate": 2.2968707787800717e-06,
      "loss": 0.0102,
      "step": 2358100
    },
    {
      "epoch": 3.8591150998605683,
      "grad_norm": 0.3662678599357605,
      "learning_rate": 2.2968048865665544e-06,
      "loss": 0.0154,
      "step": 2358120
    },
    {
      "epoch": 3.8591478302992215,
      "grad_norm": 0.42205944657325745,
      "learning_rate": 2.2967389943530376e-06,
      "loss": 0.0203,
      "step": 2358140
    },
    {
      "epoch": 3.859180560737875,
      "grad_norm": 0.1301881968975067,
      "learning_rate": 2.2966731021395203e-06,
      "loss": 0.0111,
      "step": 2358160
    },
    {
      "epoch": 3.8592132911765282,
      "grad_norm": 0.19021452963352203,
      "learning_rate": 2.296607209926003e-06,
      "loss": 0.0096,
      "step": 2358180
    },
    {
      "epoch": 3.859246021615182,
      "grad_norm": 0.6159616708755493,
      "learning_rate": 2.2965413177124862e-06,
      "loss": 0.0059,
      "step": 2358200
    },
    {
      "epoch": 3.859278752053835,
      "grad_norm": 0.4274331033229828,
      "learning_rate": 2.296475425498969e-06,
      "loss": 0.0084,
      "step": 2358220
    },
    {
      "epoch": 3.8593114824924886,
      "grad_norm": 0.08184656500816345,
      "learning_rate": 2.2964095332854517e-06,
      "loss": 0.008,
      "step": 2358240
    },
    {
      "epoch": 3.8593442129311417,
      "grad_norm": 0.26697060465812683,
      "learning_rate": 2.296343641071935e-06,
      "loss": 0.0106,
      "step": 2358260
    },
    {
      "epoch": 3.859376943369795,
      "grad_norm": 0.28475379943847656,
      "learning_rate": 2.2962777488584176e-06,
      "loss": 0.0103,
      "step": 2358280
    },
    {
      "epoch": 3.8594096738084485,
      "grad_norm": 0.5278586745262146,
      "learning_rate": 2.2962118566449003e-06,
      "loss": 0.0086,
      "step": 2358300
    },
    {
      "epoch": 3.8594424042471016,
      "grad_norm": 0.16635914146900177,
      "learning_rate": 2.296145964431383e-06,
      "loss": 0.0126,
      "step": 2358320
    },
    {
      "epoch": 3.859475134685755,
      "grad_norm": 0.16766850650310516,
      "learning_rate": 2.2960800722178662e-06,
      "loss": 0.0074,
      "step": 2358340
    },
    {
      "epoch": 3.8595078651244084,
      "grad_norm": 0.43988093733787537,
      "learning_rate": 2.296014180004349e-06,
      "loss": 0.0135,
      "step": 2358360
    },
    {
      "epoch": 3.859540595563062,
      "grad_norm": 0.3483040928840637,
      "learning_rate": 2.295948287790832e-06,
      "loss": 0.0088,
      "step": 2358380
    },
    {
      "epoch": 3.859573326001715,
      "grad_norm": 0.2741675078868866,
      "learning_rate": 2.295882395577315e-06,
      "loss": 0.0071,
      "step": 2358400
    },
    {
      "epoch": 3.8596060564403682,
      "grad_norm": 0.07818818837404251,
      "learning_rate": 2.2958165033637976e-06,
      "loss": 0.0121,
      "step": 2358420
    },
    {
      "epoch": 3.859638786879022,
      "grad_norm": 0.11703607439994812,
      "learning_rate": 2.295750611150281e-06,
      "loss": 0.0118,
      "step": 2358440
    },
    {
      "epoch": 3.859671517317675,
      "grad_norm": 0.1316848248243332,
      "learning_rate": 2.2956847189367635e-06,
      "loss": 0.0121,
      "step": 2358460
    },
    {
      "epoch": 3.8597042477563286,
      "grad_norm": 0.10544290393590927,
      "learning_rate": 2.2956188267232463e-06,
      "loss": 0.0093,
      "step": 2358480
    },
    {
      "epoch": 3.8597369781949817,
      "grad_norm": 0.12276194244623184,
      "learning_rate": 2.295552934509729e-06,
      "loss": 0.0103,
      "step": 2358500
    },
    {
      "epoch": 3.8597697086336353,
      "grad_norm": 0.13987688720226288,
      "learning_rate": 2.2954870422962117e-06,
      "loss": 0.0107,
      "step": 2358520
    },
    {
      "epoch": 3.8598024390722885,
      "grad_norm": 0.5221505761146545,
      "learning_rate": 2.295421150082695e-06,
      "loss": 0.0185,
      "step": 2358540
    },
    {
      "epoch": 3.8598351695109416,
      "grad_norm": 0.12291065603494644,
      "learning_rate": 2.295355257869178e-06,
      "loss": 0.0103,
      "step": 2358560
    },
    {
      "epoch": 3.8598678999495952,
      "grad_norm": 0.325874388217926,
      "learning_rate": 2.295289365655661e-06,
      "loss": 0.0063,
      "step": 2358580
    },
    {
      "epoch": 3.8599006303882484,
      "grad_norm": 0.127202108502388,
      "learning_rate": 2.2952234734421436e-06,
      "loss": 0.0064,
      "step": 2358600
    },
    {
      "epoch": 3.8599333608269015,
      "grad_norm": 0.3172670602798462,
      "learning_rate": 2.2951575812286263e-06,
      "loss": 0.0093,
      "step": 2358620
    },
    {
      "epoch": 3.859966091265555,
      "grad_norm": 0.3566863536834717,
      "learning_rate": 2.2950916890151095e-06,
      "loss": 0.0103,
      "step": 2358640
    },
    {
      "epoch": 3.8599988217042087,
      "grad_norm": 0.2802547216415405,
      "learning_rate": 2.295025796801592e-06,
      "loss": 0.0107,
      "step": 2358660
    },
    {
      "epoch": 3.860031552142862,
      "grad_norm": 0.32440248131752014,
      "learning_rate": 2.294959904588075e-06,
      "loss": 0.0127,
      "step": 2358680
    },
    {
      "epoch": 3.860064282581515,
      "grad_norm": 0.3178534209728241,
      "learning_rate": 2.2948940123745577e-06,
      "loss": 0.0086,
      "step": 2358700
    },
    {
      "epoch": 3.8600970130201686,
      "grad_norm": 0.15076950192451477,
      "learning_rate": 2.294828120161041e-06,
      "loss": 0.0119,
      "step": 2358720
    },
    {
      "epoch": 3.8601297434588218,
      "grad_norm": 0.10982907563447952,
      "learning_rate": 2.2947622279475236e-06,
      "loss": 0.0118,
      "step": 2358740
    },
    {
      "epoch": 3.860162473897475,
      "grad_norm": 0.2539372742176056,
      "learning_rate": 2.2946963357340067e-06,
      "loss": 0.0126,
      "step": 2358760
    },
    {
      "epoch": 3.8601952043361285,
      "grad_norm": 0.4397139251232147,
      "learning_rate": 2.2946304435204895e-06,
      "loss": 0.0116,
      "step": 2358780
    },
    {
      "epoch": 3.860227934774782,
      "grad_norm": 0.3710262179374695,
      "learning_rate": 2.2945645513069722e-06,
      "loss": 0.0081,
      "step": 2358800
    },
    {
      "epoch": 3.8602606652134352,
      "grad_norm": 0.26026126742362976,
      "learning_rate": 2.294498659093455e-06,
      "loss": 0.0103,
      "step": 2358820
    },
    {
      "epoch": 3.8602933956520884,
      "grad_norm": 0.9690695405006409,
      "learning_rate": 2.294432766879938e-06,
      "loss": 0.0137,
      "step": 2358840
    },
    {
      "epoch": 3.860326126090742,
      "grad_norm": 0.27961647510528564,
      "learning_rate": 2.294366874666421e-06,
      "loss": 0.0075,
      "step": 2358860
    },
    {
      "epoch": 3.860358856529395,
      "grad_norm": 0.20795011520385742,
      "learning_rate": 2.2943009824529036e-06,
      "loss": 0.0123,
      "step": 2358880
    },
    {
      "epoch": 3.8603915869680483,
      "grad_norm": 0.12927281856536865,
      "learning_rate": 2.2942350902393868e-06,
      "loss": 0.0093,
      "step": 2358900
    },
    {
      "epoch": 3.860424317406702,
      "grad_norm": 0.3853525221347809,
      "learning_rate": 2.2941691980258695e-06,
      "loss": 0.0133,
      "step": 2358920
    },
    {
      "epoch": 3.8604570478453555,
      "grad_norm": 0.09890089929103851,
      "learning_rate": 2.2941033058123522e-06,
      "loss": 0.0133,
      "step": 2358940
    },
    {
      "epoch": 3.8604897782840086,
      "grad_norm": 0.909767746925354,
      "learning_rate": 2.2940374135988354e-06,
      "loss": 0.0109,
      "step": 2358960
    },
    {
      "epoch": 3.860522508722662,
      "grad_norm": 0.09946882724761963,
      "learning_rate": 2.293971521385318e-06,
      "loss": 0.0073,
      "step": 2358980
    },
    {
      "epoch": 3.8605552391613154,
      "grad_norm": 0.060442596673965454,
      "learning_rate": 2.293905629171801e-06,
      "loss": 0.0083,
      "step": 2359000
    },
    {
      "epoch": 3.8605879695999685,
      "grad_norm": 0.5662330389022827,
      "learning_rate": 2.2938397369582836e-06,
      "loss": 0.014,
      "step": 2359020
    },
    {
      "epoch": 3.8606207000386217,
      "grad_norm": 0.09776995331048965,
      "learning_rate": 2.293773844744767e-06,
      "loss": 0.0126,
      "step": 2359040
    },
    {
      "epoch": 3.8606534304772753,
      "grad_norm": 0.3249475657939911,
      "learning_rate": 2.2937079525312495e-06,
      "loss": 0.0093,
      "step": 2359060
    },
    {
      "epoch": 3.860686160915929,
      "grad_norm": 0.19371534883975983,
      "learning_rate": 2.2936420603177327e-06,
      "loss": 0.0074,
      "step": 2359080
    },
    {
      "epoch": 3.860718891354582,
      "grad_norm": 0.2959372103214264,
      "learning_rate": 2.2935761681042154e-06,
      "loss": 0.0165,
      "step": 2359100
    },
    {
      "epoch": 3.860751621793235,
      "grad_norm": 0.3560088872909546,
      "learning_rate": 2.293510275890698e-06,
      "loss": 0.0114,
      "step": 2359120
    },
    {
      "epoch": 3.8607843522318888,
      "grad_norm": 0.29982268810272217,
      "learning_rate": 2.293444383677181e-06,
      "loss": 0.0137,
      "step": 2359140
    },
    {
      "epoch": 3.860817082670542,
      "grad_norm": 0.21962296962738037,
      "learning_rate": 2.293378491463664e-06,
      "loss": 0.0101,
      "step": 2359160
    },
    {
      "epoch": 3.860849813109195,
      "grad_norm": 0.5624606609344482,
      "learning_rate": 2.293312599250147e-06,
      "loss": 0.0067,
      "step": 2359180
    },
    {
      "epoch": 3.8608825435478487,
      "grad_norm": 0.49133726954460144,
      "learning_rate": 2.2932467070366296e-06,
      "loss": 0.0081,
      "step": 2359200
    },
    {
      "epoch": 3.860915273986502,
      "grad_norm": 0.2506600618362427,
      "learning_rate": 2.2931808148231123e-06,
      "loss": 0.0129,
      "step": 2359220
    },
    {
      "epoch": 3.8609480044251554,
      "grad_norm": 0.20732976496219635,
      "learning_rate": 2.2931149226095955e-06,
      "loss": 0.0104,
      "step": 2359240
    },
    {
      "epoch": 3.8609807348638086,
      "grad_norm": 0.3902941644191742,
      "learning_rate": 2.293049030396078e-06,
      "loss": 0.0143,
      "step": 2359260
    },
    {
      "epoch": 3.861013465302462,
      "grad_norm": 0.41941943764686584,
      "learning_rate": 2.2929831381825614e-06,
      "loss": 0.0082,
      "step": 2359280
    },
    {
      "epoch": 3.8610461957411153,
      "grad_norm": 0.19074031710624695,
      "learning_rate": 2.292917245969044e-06,
      "loss": 0.0076,
      "step": 2359300
    },
    {
      "epoch": 3.8610789261797684,
      "grad_norm": 0.4296815097332001,
      "learning_rate": 2.292851353755527e-06,
      "loss": 0.014,
      "step": 2359320
    },
    {
      "epoch": 3.861111656618422,
      "grad_norm": 0.4975398778915405,
      "learning_rate": 2.2927854615420096e-06,
      "loss": 0.008,
      "step": 2359340
    },
    {
      "epoch": 3.861144387057075,
      "grad_norm": 0.10512874275445938,
      "learning_rate": 2.2927195693284927e-06,
      "loss": 0.0054,
      "step": 2359360
    },
    {
      "epoch": 3.861177117495729,
      "grad_norm": 0.30865979194641113,
      "learning_rate": 2.2926536771149755e-06,
      "loss": 0.0115,
      "step": 2359380
    },
    {
      "epoch": 3.861209847934382,
      "grad_norm": 0.29082438349723816,
      "learning_rate": 2.2925877849014582e-06,
      "loss": 0.0063,
      "step": 2359400
    },
    {
      "epoch": 3.8612425783730355,
      "grad_norm": 0.18259909749031067,
      "learning_rate": 2.292521892687941e-06,
      "loss": 0.0171,
      "step": 2359420
    },
    {
      "epoch": 3.8612753088116887,
      "grad_norm": 0.4748464822769165,
      "learning_rate": 2.292456000474424e-06,
      "loss": 0.0101,
      "step": 2359440
    },
    {
      "epoch": 3.861308039250342,
      "grad_norm": 0.32450905442237854,
      "learning_rate": 2.292390108260907e-06,
      "loss": 0.0141,
      "step": 2359460
    },
    {
      "epoch": 3.8613407696889954,
      "grad_norm": 0.5448298454284668,
      "learning_rate": 2.29232421604739e-06,
      "loss": 0.0101,
      "step": 2359480
    },
    {
      "epoch": 3.8613735001276486,
      "grad_norm": 0.1341550648212433,
      "learning_rate": 2.2922583238338728e-06,
      "loss": 0.0065,
      "step": 2359500
    },
    {
      "epoch": 3.861406230566302,
      "grad_norm": 0.4027913212776184,
      "learning_rate": 2.2921924316203555e-06,
      "loss": 0.0081,
      "step": 2359520
    },
    {
      "epoch": 3.8614389610049553,
      "grad_norm": 0.5544828176498413,
      "learning_rate": 2.2921265394068383e-06,
      "loss": 0.0083,
      "step": 2359540
    },
    {
      "epoch": 3.861471691443609,
      "grad_norm": 0.13763369619846344,
      "learning_rate": 2.2920606471933214e-06,
      "loss": 0.007,
      "step": 2359560
    },
    {
      "epoch": 3.861504421882262,
      "grad_norm": 0.0894305557012558,
      "learning_rate": 2.291994754979804e-06,
      "loss": 0.0137,
      "step": 2359580
    },
    {
      "epoch": 3.861537152320915,
      "grad_norm": 0.31053033471107483,
      "learning_rate": 2.2919288627662873e-06,
      "loss": 0.0087,
      "step": 2359600
    },
    {
      "epoch": 3.861569882759569,
      "grad_norm": 0.22283092141151428,
      "learning_rate": 2.29186297055277e-06,
      "loss": 0.0059,
      "step": 2359620
    },
    {
      "epoch": 3.861602613198222,
      "grad_norm": 0.18735694885253906,
      "learning_rate": 2.291797078339253e-06,
      "loss": 0.0135,
      "step": 2359640
    },
    {
      "epoch": 3.8616353436368756,
      "grad_norm": 0.17399296164512634,
      "learning_rate": 2.291731186125736e-06,
      "loss": 0.0102,
      "step": 2359660
    },
    {
      "epoch": 3.8616680740755287,
      "grad_norm": 0.2320602834224701,
      "learning_rate": 2.2916652939122187e-06,
      "loss": 0.0143,
      "step": 2359680
    },
    {
      "epoch": 3.8617008045141823,
      "grad_norm": 0.1331421136856079,
      "learning_rate": 2.2915994016987014e-06,
      "loss": 0.0081,
      "step": 2359700
    },
    {
      "epoch": 3.8617335349528354,
      "grad_norm": 0.10949204862117767,
      "learning_rate": 2.291533509485184e-06,
      "loss": 0.008,
      "step": 2359720
    },
    {
      "epoch": 3.8617662653914886,
      "grad_norm": 0.12760576605796814,
      "learning_rate": 2.291467617271667e-06,
      "loss": 0.0077,
      "step": 2359740
    },
    {
      "epoch": 3.861798995830142,
      "grad_norm": 0.3519744873046875,
      "learning_rate": 2.29140172505815e-06,
      "loss": 0.0133,
      "step": 2359760
    },
    {
      "epoch": 3.8618317262687953,
      "grad_norm": 0.3087528944015503,
      "learning_rate": 2.2913358328446332e-06,
      "loss": 0.008,
      "step": 2359780
    },
    {
      "epoch": 3.861864456707449,
      "grad_norm": 0.20362764596939087,
      "learning_rate": 2.291269940631116e-06,
      "loss": 0.0118,
      "step": 2359800
    },
    {
      "epoch": 3.861897187146102,
      "grad_norm": 0.19899694621562958,
      "learning_rate": 2.2912040484175987e-06,
      "loss": 0.0059,
      "step": 2359820
    },
    {
      "epoch": 3.8619299175847557,
      "grad_norm": 0.3752427101135254,
      "learning_rate": 2.2911381562040815e-06,
      "loss": 0.009,
      "step": 2359840
    },
    {
      "epoch": 3.861962648023409,
      "grad_norm": 0.6123694777488708,
      "learning_rate": 2.2910722639905646e-06,
      "loss": 0.0168,
      "step": 2359860
    },
    {
      "epoch": 3.861995378462062,
      "grad_norm": 0.19393397867679596,
      "learning_rate": 2.2910063717770474e-06,
      "loss": 0.0113,
      "step": 2359880
    },
    {
      "epoch": 3.8620281089007156,
      "grad_norm": 0.23072436451911926,
      "learning_rate": 2.29094047956353e-06,
      "loss": 0.0111,
      "step": 2359900
    },
    {
      "epoch": 3.8620608393393687,
      "grad_norm": 0.2707935869693756,
      "learning_rate": 2.290874587350013e-06,
      "loss": 0.008,
      "step": 2359920
    },
    {
      "epoch": 3.8620935697780223,
      "grad_norm": 0.1367393583059311,
      "learning_rate": 2.2908086951364956e-06,
      "loss": 0.0107,
      "step": 2359940
    },
    {
      "epoch": 3.8621263002166755,
      "grad_norm": 0.15399198234081268,
      "learning_rate": 2.2907428029229788e-06,
      "loss": 0.0099,
      "step": 2359960
    },
    {
      "epoch": 3.862159030655329,
      "grad_norm": 0.5660099983215332,
      "learning_rate": 2.290676910709462e-06,
      "loss": 0.0088,
      "step": 2359980
    },
    {
      "epoch": 3.862191761093982,
      "grad_norm": 0.22031353414058685,
      "learning_rate": 2.2906110184959447e-06,
      "loss": 0.0149,
      "step": 2360000
    },
    {
      "epoch": 3.8622244915326354,
      "grad_norm": 0.39914941787719727,
      "learning_rate": 2.2905451262824274e-06,
      "loss": 0.013,
      "step": 2360020
    },
    {
      "epoch": 3.862257221971289,
      "grad_norm": 0.11883656680583954,
      "learning_rate": 2.29047923406891e-06,
      "loss": 0.0107,
      "step": 2360040
    },
    {
      "epoch": 3.862289952409942,
      "grad_norm": 0.08040402084589005,
      "learning_rate": 2.2904133418553933e-06,
      "loss": 0.0109,
      "step": 2360060
    },
    {
      "epoch": 3.8623226828485957,
      "grad_norm": 0.057852644473314285,
      "learning_rate": 2.290347449641876e-06,
      "loss": 0.0105,
      "step": 2360080
    },
    {
      "epoch": 3.862355413287249,
      "grad_norm": 0.3358754515647888,
      "learning_rate": 2.2902815574283588e-06,
      "loss": 0.0096,
      "step": 2360100
    },
    {
      "epoch": 3.8623881437259024,
      "grad_norm": 0.18380948901176453,
      "learning_rate": 2.2902156652148415e-06,
      "loss": 0.0149,
      "step": 2360120
    },
    {
      "epoch": 3.8624208741645556,
      "grad_norm": 0.5080764889717102,
      "learning_rate": 2.2901497730013247e-06,
      "loss": 0.0156,
      "step": 2360140
    },
    {
      "epoch": 3.8624536046032087,
      "grad_norm": 0.1458052396774292,
      "learning_rate": 2.2900838807878074e-06,
      "loss": 0.0116,
      "step": 2360160
    },
    {
      "epoch": 3.8624863350418623,
      "grad_norm": 0.3155438005924225,
      "learning_rate": 2.2900179885742906e-06,
      "loss": 0.0113,
      "step": 2360180
    },
    {
      "epoch": 3.8625190654805155,
      "grad_norm": 0.13961346447467804,
      "learning_rate": 2.2899520963607733e-06,
      "loss": 0.0065,
      "step": 2360200
    },
    {
      "epoch": 3.8625517959191686,
      "grad_norm": 0.4990071952342987,
      "learning_rate": 2.289886204147256e-06,
      "loss": 0.0111,
      "step": 2360220
    },
    {
      "epoch": 3.8625845263578222,
      "grad_norm": 0.49281418323516846,
      "learning_rate": 2.289820311933739e-06,
      "loss": 0.0118,
      "step": 2360240
    },
    {
      "epoch": 3.862617256796476,
      "grad_norm": 0.1703418344259262,
      "learning_rate": 2.289754419720222e-06,
      "loss": 0.006,
      "step": 2360260
    },
    {
      "epoch": 3.862649987235129,
      "grad_norm": 0.2575209140777588,
      "learning_rate": 2.2896885275067047e-06,
      "loss": 0.0116,
      "step": 2360280
    },
    {
      "epoch": 3.862682717673782,
      "grad_norm": 0.31576910614967346,
      "learning_rate": 2.2896226352931874e-06,
      "loss": 0.0112,
      "step": 2360300
    },
    {
      "epoch": 3.8627154481124357,
      "grad_norm": 0.3642122447490692,
      "learning_rate": 2.2895567430796706e-06,
      "loss": 0.008,
      "step": 2360320
    },
    {
      "epoch": 3.862748178551089,
      "grad_norm": 0.33198824524879456,
      "learning_rate": 2.2894908508661533e-06,
      "loss": 0.0107,
      "step": 2360340
    },
    {
      "epoch": 3.862780908989742,
      "grad_norm": 0.1897558569908142,
      "learning_rate": 2.289424958652636e-06,
      "loss": 0.0105,
      "step": 2360360
    },
    {
      "epoch": 3.8628136394283956,
      "grad_norm": 0.2289501428604126,
      "learning_rate": 2.2893590664391193e-06,
      "loss": 0.0083,
      "step": 2360380
    },
    {
      "epoch": 3.862846369867049,
      "grad_norm": 0.5480055809020996,
      "learning_rate": 2.289293174225602e-06,
      "loss": 0.0146,
      "step": 2360400
    },
    {
      "epoch": 3.8628791003057024,
      "grad_norm": 0.28597262501716614,
      "learning_rate": 2.2892272820120847e-06,
      "loss": 0.0145,
      "step": 2360420
    },
    {
      "epoch": 3.8629118307443555,
      "grad_norm": 0.27823296189308167,
      "learning_rate": 2.2891613897985675e-06,
      "loss": 0.009,
      "step": 2360440
    },
    {
      "epoch": 3.862944561183009,
      "grad_norm": 0.24190573394298553,
      "learning_rate": 2.2890954975850506e-06,
      "loss": 0.0202,
      "step": 2360460
    },
    {
      "epoch": 3.8629772916216623,
      "grad_norm": 0.38959842920303345,
      "learning_rate": 2.2890296053715334e-06,
      "loss": 0.0091,
      "step": 2360480
    },
    {
      "epoch": 3.8630100220603154,
      "grad_norm": 0.7780032157897949,
      "learning_rate": 2.2889637131580165e-06,
      "loss": 0.0115,
      "step": 2360500
    },
    {
      "epoch": 3.863042752498969,
      "grad_norm": 0.23598583042621613,
      "learning_rate": 2.2888978209444993e-06,
      "loss": 0.0099,
      "step": 2360520
    },
    {
      "epoch": 3.8630754829376226,
      "grad_norm": 0.22929418087005615,
      "learning_rate": 2.288831928730982e-06,
      "loss": 0.0116,
      "step": 2360540
    },
    {
      "epoch": 3.8631082133762757,
      "grad_norm": 0.412360280752182,
      "learning_rate": 2.2887660365174648e-06,
      "loss": 0.0081,
      "step": 2360560
    },
    {
      "epoch": 3.863140943814929,
      "grad_norm": 0.24883075058460236,
      "learning_rate": 2.288700144303948e-06,
      "loss": 0.0109,
      "step": 2360580
    },
    {
      "epoch": 3.8631736742535825,
      "grad_norm": 0.49568983912467957,
      "learning_rate": 2.2886342520904307e-06,
      "loss": 0.011,
      "step": 2360600
    },
    {
      "epoch": 3.8632064046922356,
      "grad_norm": 0.2854401767253876,
      "learning_rate": 2.2885683598769134e-06,
      "loss": 0.0107,
      "step": 2360620
    },
    {
      "epoch": 3.863239135130889,
      "grad_norm": 0.10609948635101318,
      "learning_rate": 2.288502467663396e-06,
      "loss": 0.0083,
      "step": 2360640
    },
    {
      "epoch": 3.8632718655695424,
      "grad_norm": 0.3247931897640228,
      "learning_rate": 2.2884365754498793e-06,
      "loss": 0.0123,
      "step": 2360660
    },
    {
      "epoch": 3.863304596008196,
      "grad_norm": 0.26999950408935547,
      "learning_rate": 2.288370683236362e-06,
      "loss": 0.009,
      "step": 2360680
    },
    {
      "epoch": 3.863337326446849,
      "grad_norm": 0.18565566837787628,
      "learning_rate": 2.288304791022845e-06,
      "loss": 0.0122,
      "step": 2360700
    },
    {
      "epoch": 3.8633700568855023,
      "grad_norm": 0.045977603644132614,
      "learning_rate": 2.288238898809328e-06,
      "loss": 0.0115,
      "step": 2360720
    },
    {
      "epoch": 3.863402787324156,
      "grad_norm": 0.12970532476902008,
      "learning_rate": 2.2881730065958107e-06,
      "loss": 0.0091,
      "step": 2360740
    },
    {
      "epoch": 3.863435517762809,
      "grad_norm": 0.39592328667640686,
      "learning_rate": 2.2881071143822934e-06,
      "loss": 0.0118,
      "step": 2360760
    },
    {
      "epoch": 3.863468248201462,
      "grad_norm": 0.16944792866706848,
      "learning_rate": 2.2880412221687766e-06,
      "loss": 0.0091,
      "step": 2360780
    },
    {
      "epoch": 3.8635009786401158,
      "grad_norm": 0.4179115891456604,
      "learning_rate": 2.2879753299552593e-06,
      "loss": 0.0129,
      "step": 2360800
    },
    {
      "epoch": 3.863533709078769,
      "grad_norm": 0.28922221064567566,
      "learning_rate": 2.287909437741742e-06,
      "loss": 0.0093,
      "step": 2360820
    },
    {
      "epoch": 3.8635664395174225,
      "grad_norm": 0.4002993106842041,
      "learning_rate": 2.2878435455282252e-06,
      "loss": 0.0116,
      "step": 2360840
    },
    {
      "epoch": 3.8635991699560757,
      "grad_norm": 0.08757475763559341,
      "learning_rate": 2.287777653314708e-06,
      "loss": 0.0132,
      "step": 2360860
    },
    {
      "epoch": 3.8636319003947293,
      "grad_norm": 0.09409838914871216,
      "learning_rate": 2.2877117611011907e-06,
      "loss": 0.0069,
      "step": 2360880
    },
    {
      "epoch": 3.8636646308333824,
      "grad_norm": 0.11628593504428864,
      "learning_rate": 2.287645868887674e-06,
      "loss": 0.0078,
      "step": 2360900
    },
    {
      "epoch": 3.8636973612720356,
      "grad_norm": 0.08325785398483276,
      "learning_rate": 2.2875799766741566e-06,
      "loss": 0.0125,
      "step": 2360920
    },
    {
      "epoch": 3.863730091710689,
      "grad_norm": 0.17634724080562592,
      "learning_rate": 2.2875140844606394e-06,
      "loss": 0.0093,
      "step": 2360940
    },
    {
      "epoch": 3.8637628221493423,
      "grad_norm": 0.2844868302345276,
      "learning_rate": 2.287448192247122e-06,
      "loss": 0.0105,
      "step": 2360960
    },
    {
      "epoch": 3.863795552587996,
      "grad_norm": 0.19967202842235565,
      "learning_rate": 2.2873823000336053e-06,
      "loss": 0.0088,
      "step": 2360980
    },
    {
      "epoch": 3.863828283026649,
      "grad_norm": 0.22067856788635254,
      "learning_rate": 2.287316407820088e-06,
      "loss": 0.0076,
      "step": 2361000
    },
    {
      "epoch": 3.8638610134653026,
      "grad_norm": 0.16019217669963837,
      "learning_rate": 2.287250515606571e-06,
      "loss": 0.0079,
      "step": 2361020
    },
    {
      "epoch": 3.863893743903956,
      "grad_norm": 0.11329619586467743,
      "learning_rate": 2.287184623393054e-06,
      "loss": 0.0134,
      "step": 2361040
    },
    {
      "epoch": 3.863926474342609,
      "grad_norm": 0.4543170928955078,
      "learning_rate": 2.2871187311795366e-06,
      "loss": 0.0123,
      "step": 2361060
    },
    {
      "epoch": 3.8639592047812625,
      "grad_norm": 0.3751790523529053,
      "learning_rate": 2.28705283896602e-06,
      "loss": 0.0108,
      "step": 2361080
    },
    {
      "epoch": 3.8639919352199157,
      "grad_norm": 0.1288461536169052,
      "learning_rate": 2.2869869467525025e-06,
      "loss": 0.014,
      "step": 2361100
    },
    {
      "epoch": 3.8640246656585693,
      "grad_norm": 0.17382940649986267,
      "learning_rate": 2.2869210545389853e-06,
      "loss": 0.0076,
      "step": 2361120
    },
    {
      "epoch": 3.8640573960972224,
      "grad_norm": 0.7029066681861877,
      "learning_rate": 2.286855162325468e-06,
      "loss": 0.0129,
      "step": 2361140
    },
    {
      "epoch": 3.864090126535876,
      "grad_norm": 0.6795240044593811,
      "learning_rate": 2.2867892701119508e-06,
      "loss": 0.0134,
      "step": 2361160
    },
    {
      "epoch": 3.864122856974529,
      "grad_norm": 0.09202093631029129,
      "learning_rate": 2.286723377898434e-06,
      "loss": 0.0187,
      "step": 2361180
    },
    {
      "epoch": 3.8641555874131823,
      "grad_norm": 0.29110953211784363,
      "learning_rate": 2.286657485684917e-06,
      "loss": 0.0108,
      "step": 2361200
    },
    {
      "epoch": 3.864188317851836,
      "grad_norm": 0.4796576201915741,
      "learning_rate": 2.2865915934714e-06,
      "loss": 0.0123,
      "step": 2361220
    },
    {
      "epoch": 3.864221048290489,
      "grad_norm": 0.17032547295093536,
      "learning_rate": 2.2865257012578826e-06,
      "loss": 0.0107,
      "step": 2361240
    },
    {
      "epoch": 3.8642537787291427,
      "grad_norm": 0.13613975048065186,
      "learning_rate": 2.2864598090443653e-06,
      "loss": 0.0139,
      "step": 2361260
    },
    {
      "epoch": 3.864286509167796,
      "grad_norm": 0.7285643219947815,
      "learning_rate": 2.2863939168308485e-06,
      "loss": 0.0136,
      "step": 2361280
    },
    {
      "epoch": 3.8643192396064494,
      "grad_norm": 1.5070374011993408,
      "learning_rate": 2.286328024617331e-06,
      "loss": 0.0163,
      "step": 2361300
    },
    {
      "epoch": 3.8643519700451026,
      "grad_norm": 0.26146063208580017,
      "learning_rate": 2.286262132403814e-06,
      "loss": 0.0147,
      "step": 2361320
    },
    {
      "epoch": 3.8643847004837557,
      "grad_norm": 0.20776484906673431,
      "learning_rate": 2.2861962401902967e-06,
      "loss": 0.0119,
      "step": 2361340
    },
    {
      "epoch": 3.8644174309224093,
      "grad_norm": 0.08531158417463303,
      "learning_rate": 2.28613034797678e-06,
      "loss": 0.011,
      "step": 2361360
    },
    {
      "epoch": 3.8644501613610625,
      "grad_norm": 0.14709965884685516,
      "learning_rate": 2.2860644557632626e-06,
      "loss": 0.0115,
      "step": 2361380
    },
    {
      "epoch": 3.864482891799716,
      "grad_norm": 0.08493131399154663,
      "learning_rate": 2.2859985635497458e-06,
      "loss": 0.0077,
      "step": 2361400
    },
    {
      "epoch": 3.864515622238369,
      "grad_norm": 0.225443497300148,
      "learning_rate": 2.2859326713362285e-06,
      "loss": 0.0084,
      "step": 2361420
    },
    {
      "epoch": 3.864548352677023,
      "grad_norm": 0.2817825675010681,
      "learning_rate": 2.2858667791227112e-06,
      "loss": 0.0129,
      "step": 2361440
    },
    {
      "epoch": 3.864581083115676,
      "grad_norm": 0.15220893919467926,
      "learning_rate": 2.285800886909194e-06,
      "loss": 0.011,
      "step": 2361460
    },
    {
      "epoch": 3.864613813554329,
      "grad_norm": 0.16942554712295532,
      "learning_rate": 2.285734994695677e-06,
      "loss": 0.0122,
      "step": 2361480
    },
    {
      "epoch": 3.8646465439929827,
      "grad_norm": 0.22685222327709198,
      "learning_rate": 2.28566910248216e-06,
      "loss": 0.0108,
      "step": 2361500
    },
    {
      "epoch": 3.864679274431636,
      "grad_norm": 0.23244886100292206,
      "learning_rate": 2.2856032102686426e-06,
      "loss": 0.0104,
      "step": 2361520
    },
    {
      "epoch": 3.8647120048702894,
      "grad_norm": 0.22189496457576752,
      "learning_rate": 2.2855373180551258e-06,
      "loss": 0.0075,
      "step": 2361540
    },
    {
      "epoch": 3.8647447353089426,
      "grad_norm": 0.5034303069114685,
      "learning_rate": 2.2854714258416085e-06,
      "loss": 0.011,
      "step": 2361560
    },
    {
      "epoch": 3.864777465747596,
      "grad_norm": 0.3317106366157532,
      "learning_rate": 2.2854055336280913e-06,
      "loss": 0.009,
      "step": 2361580
    },
    {
      "epoch": 3.8648101961862493,
      "grad_norm": 0.4522717297077179,
      "learning_rate": 2.2853396414145744e-06,
      "loss": 0.0155,
      "step": 2361600
    },
    {
      "epoch": 3.8648429266249025,
      "grad_norm": 0.12078224867582321,
      "learning_rate": 2.285273749201057e-06,
      "loss": 0.0115,
      "step": 2361620
    },
    {
      "epoch": 3.864875657063556,
      "grad_norm": 0.5198962092399597,
      "learning_rate": 2.28520785698754e-06,
      "loss": 0.0105,
      "step": 2361640
    },
    {
      "epoch": 3.864908387502209,
      "grad_norm": 0.3067035973072052,
      "learning_rate": 2.2851419647740226e-06,
      "loss": 0.014,
      "step": 2361660
    },
    {
      "epoch": 3.8649411179408624,
      "grad_norm": 0.2534070312976837,
      "learning_rate": 2.285076072560506e-06,
      "loss": 0.0102,
      "step": 2361680
    },
    {
      "epoch": 3.864973848379516,
      "grad_norm": 0.3012450039386749,
      "learning_rate": 2.2850101803469885e-06,
      "loss": 0.0096,
      "step": 2361700
    },
    {
      "epoch": 3.8650065788181696,
      "grad_norm": 0.2692411541938782,
      "learning_rate": 2.2849442881334717e-06,
      "loss": 0.0111,
      "step": 2361720
    },
    {
      "epoch": 3.8650393092568227,
      "grad_norm": 0.3784356713294983,
      "learning_rate": 2.2848783959199544e-06,
      "loss": 0.0129,
      "step": 2361740
    },
    {
      "epoch": 3.865072039695476,
      "grad_norm": 0.23847661912441254,
      "learning_rate": 2.284812503706437e-06,
      "loss": 0.0095,
      "step": 2361760
    },
    {
      "epoch": 3.8651047701341295,
      "grad_norm": 0.34261879324913025,
      "learning_rate": 2.28474661149292e-06,
      "loss": 0.0071,
      "step": 2361780
    },
    {
      "epoch": 3.8651375005727826,
      "grad_norm": 0.24141164124011993,
      "learning_rate": 2.284680719279403e-06,
      "loss": 0.0129,
      "step": 2361800
    },
    {
      "epoch": 3.8651702310114358,
      "grad_norm": 0.17764966189861298,
      "learning_rate": 2.284614827065886e-06,
      "loss": 0.0143,
      "step": 2361820
    },
    {
      "epoch": 3.8652029614500893,
      "grad_norm": 0.41352972388267517,
      "learning_rate": 2.2845489348523686e-06,
      "loss": 0.0137,
      "step": 2361840
    },
    {
      "epoch": 3.865235691888743,
      "grad_norm": 0.048663195222616196,
      "learning_rate": 2.2844830426388513e-06,
      "loss": 0.0091,
      "step": 2361860
    },
    {
      "epoch": 3.865268422327396,
      "grad_norm": 0.13474635779857635,
      "learning_rate": 2.2844171504253345e-06,
      "loss": 0.0104,
      "step": 2361880
    },
    {
      "epoch": 3.8653011527660492,
      "grad_norm": 0.08301453292369843,
      "learning_rate": 2.2843512582118172e-06,
      "loss": 0.0095,
      "step": 2361900
    },
    {
      "epoch": 3.865333883204703,
      "grad_norm": 0.13588985800743103,
      "learning_rate": 2.2842853659983004e-06,
      "loss": 0.009,
      "step": 2361920
    },
    {
      "epoch": 3.865366613643356,
      "grad_norm": 0.07363581657409668,
      "learning_rate": 2.284219473784783e-06,
      "loss": 0.0078,
      "step": 2361940
    },
    {
      "epoch": 3.865399344082009,
      "grad_norm": 0.13359560072422028,
      "learning_rate": 2.284153581571266e-06,
      "loss": 0.0096,
      "step": 2361960
    },
    {
      "epoch": 3.8654320745206627,
      "grad_norm": 0.0960448682308197,
      "learning_rate": 2.2840876893577486e-06,
      "loss": 0.0102,
      "step": 2361980
    },
    {
      "epoch": 3.8654648049593163,
      "grad_norm": 0.2524755895137787,
      "learning_rate": 2.2840217971442318e-06,
      "loss": 0.0087,
      "step": 2362000
    },
    {
      "epoch": 3.8654975353979695,
      "grad_norm": 0.29086750745773315,
      "learning_rate": 2.2839559049307145e-06,
      "loss": 0.0099,
      "step": 2362020
    },
    {
      "epoch": 3.8655302658366226,
      "grad_norm": 0.12933531403541565,
      "learning_rate": 2.2838900127171972e-06,
      "loss": 0.0097,
      "step": 2362040
    },
    {
      "epoch": 3.865562996275276,
      "grad_norm": 0.27591225504875183,
      "learning_rate": 2.28382412050368e-06,
      "loss": 0.0086,
      "step": 2362060
    },
    {
      "epoch": 3.8655957267139294,
      "grad_norm": 0.6982781291007996,
      "learning_rate": 2.283758228290163e-06,
      "loss": 0.0102,
      "step": 2362080
    },
    {
      "epoch": 3.8656284571525825,
      "grad_norm": 0.11625532805919647,
      "learning_rate": 2.283692336076646e-06,
      "loss": 0.0061,
      "step": 2362100
    },
    {
      "epoch": 3.865661187591236,
      "grad_norm": 0.6239997744560242,
      "learning_rate": 2.283626443863129e-06,
      "loss": 0.0144,
      "step": 2362120
    },
    {
      "epoch": 3.8656939180298897,
      "grad_norm": 0.2083446979522705,
      "learning_rate": 2.2835605516496118e-06,
      "loss": 0.0158,
      "step": 2362140
    },
    {
      "epoch": 3.865726648468543,
      "grad_norm": 0.4890488386154175,
      "learning_rate": 2.2834946594360945e-06,
      "loss": 0.0101,
      "step": 2362160
    },
    {
      "epoch": 3.865759378907196,
      "grad_norm": 0.15966670215129852,
      "learning_rate": 2.2834287672225773e-06,
      "loss": 0.0083,
      "step": 2362180
    },
    {
      "epoch": 3.8657921093458496,
      "grad_norm": 0.12177843600511551,
      "learning_rate": 2.2833628750090604e-06,
      "loss": 0.0084,
      "step": 2362200
    },
    {
      "epoch": 3.8658248397845028,
      "grad_norm": 0.31334230303764343,
      "learning_rate": 2.283296982795543e-06,
      "loss": 0.0148,
      "step": 2362220
    },
    {
      "epoch": 3.865857570223156,
      "grad_norm": 0.2523285150527954,
      "learning_rate": 2.2832310905820263e-06,
      "loss": 0.0128,
      "step": 2362240
    },
    {
      "epoch": 3.8658903006618095,
      "grad_norm": 0.2003190815448761,
      "learning_rate": 2.283165198368509e-06,
      "loss": 0.0147,
      "step": 2362260
    },
    {
      "epoch": 3.8659230311004626,
      "grad_norm": 0.19564928114414215,
      "learning_rate": 2.283099306154992e-06,
      "loss": 0.0096,
      "step": 2362280
    },
    {
      "epoch": 3.8659557615391162,
      "grad_norm": 0.6250738501548767,
      "learning_rate": 2.283033413941475e-06,
      "loss": 0.0129,
      "step": 2362300
    },
    {
      "epoch": 3.8659884919777694,
      "grad_norm": 0.186264768242836,
      "learning_rate": 2.2829675217279577e-06,
      "loss": 0.0175,
      "step": 2362320
    },
    {
      "epoch": 3.866021222416423,
      "grad_norm": 0.08762415498495102,
      "learning_rate": 2.2829016295144405e-06,
      "loss": 0.0092,
      "step": 2362340
    },
    {
      "epoch": 3.866053952855076,
      "grad_norm": 0.3195432424545288,
      "learning_rate": 2.282835737300923e-06,
      "loss": 0.0112,
      "step": 2362360
    },
    {
      "epoch": 3.8660866832937293,
      "grad_norm": 0.23394182324409485,
      "learning_rate": 2.282769845087406e-06,
      "loss": 0.0116,
      "step": 2362380
    },
    {
      "epoch": 3.866119413732383,
      "grad_norm": 0.36316579580307007,
      "learning_rate": 2.282703952873889e-06,
      "loss": 0.0152,
      "step": 2362400
    },
    {
      "epoch": 3.866152144171036,
      "grad_norm": 0.495170921087265,
      "learning_rate": 2.2826380606603723e-06,
      "loss": 0.0105,
      "step": 2362420
    },
    {
      "epoch": 3.8661848746096896,
      "grad_norm": 0.1770877242088318,
      "learning_rate": 2.282572168446855e-06,
      "loss": 0.0149,
      "step": 2362440
    },
    {
      "epoch": 3.8662176050483428,
      "grad_norm": 0.5939925909042358,
      "learning_rate": 2.2825062762333377e-06,
      "loss": 0.0124,
      "step": 2362460
    },
    {
      "epoch": 3.8662503354869964,
      "grad_norm": 0.15168879926204681,
      "learning_rate": 2.2824403840198205e-06,
      "loss": 0.0135,
      "step": 2362480
    },
    {
      "epoch": 3.8662830659256495,
      "grad_norm": 0.3185678720474243,
      "learning_rate": 2.2823744918063036e-06,
      "loss": 0.0119,
      "step": 2362500
    },
    {
      "epoch": 3.8663157963643027,
      "grad_norm": 0.11568539589643478,
      "learning_rate": 2.2823085995927864e-06,
      "loss": 0.0116,
      "step": 2362520
    },
    {
      "epoch": 3.8663485268029563,
      "grad_norm": 0.24556246399879456,
      "learning_rate": 2.282242707379269e-06,
      "loss": 0.0102,
      "step": 2362540
    },
    {
      "epoch": 3.8663812572416094,
      "grad_norm": 0.15193183720111847,
      "learning_rate": 2.282176815165752e-06,
      "loss": 0.0105,
      "step": 2362560
    },
    {
      "epoch": 3.866413987680263,
      "grad_norm": 0.2771875560283661,
      "learning_rate": 2.2821109229522346e-06,
      "loss": 0.0133,
      "step": 2362580
    },
    {
      "epoch": 3.866446718118916,
      "grad_norm": 0.18503254652023315,
      "learning_rate": 2.2820450307387178e-06,
      "loss": 0.0083,
      "step": 2362600
    },
    {
      "epoch": 3.8664794485575698,
      "grad_norm": 0.20924396812915802,
      "learning_rate": 2.281979138525201e-06,
      "loss": 0.009,
      "step": 2362620
    },
    {
      "epoch": 3.866512178996223,
      "grad_norm": 0.3224366307258606,
      "learning_rate": 2.2819132463116837e-06,
      "loss": 0.0094,
      "step": 2362640
    },
    {
      "epoch": 3.866544909434876,
      "grad_norm": 0.5605480670928955,
      "learning_rate": 2.2818473540981664e-06,
      "loss": 0.0101,
      "step": 2362660
    },
    {
      "epoch": 3.8665776398735296,
      "grad_norm": 0.319245845079422,
      "learning_rate": 2.281781461884649e-06,
      "loss": 0.008,
      "step": 2362680
    },
    {
      "epoch": 3.866610370312183,
      "grad_norm": 0.16842277348041534,
      "learning_rate": 2.2817155696711323e-06,
      "loss": 0.0067,
      "step": 2362700
    },
    {
      "epoch": 3.8666431007508364,
      "grad_norm": 0.16786786913871765,
      "learning_rate": 2.281649677457615e-06,
      "loss": 0.0078,
      "step": 2362720
    },
    {
      "epoch": 3.8666758311894895,
      "grad_norm": 0.10256192833185196,
      "learning_rate": 2.2815837852440978e-06,
      "loss": 0.0073,
      "step": 2362740
    },
    {
      "epoch": 3.866708561628143,
      "grad_norm": 0.4992418587207794,
      "learning_rate": 2.2815178930305805e-06,
      "loss": 0.0129,
      "step": 2362760
    },
    {
      "epoch": 3.8667412920667963,
      "grad_norm": 0.21739743649959564,
      "learning_rate": 2.2814520008170637e-06,
      "loss": 0.0124,
      "step": 2362780
    },
    {
      "epoch": 3.8667740225054494,
      "grad_norm": 0.5382721424102783,
      "learning_rate": 2.2813861086035464e-06,
      "loss": 0.0136,
      "step": 2362800
    },
    {
      "epoch": 3.866806752944103,
      "grad_norm": 0.16736063361167908,
      "learning_rate": 2.2813202163900296e-06,
      "loss": 0.0119,
      "step": 2362820
    },
    {
      "epoch": 3.866839483382756,
      "grad_norm": 0.14079074561595917,
      "learning_rate": 2.2812543241765123e-06,
      "loss": 0.0108,
      "step": 2362840
    },
    {
      "epoch": 3.8668722138214098,
      "grad_norm": 0.2948997914791107,
      "learning_rate": 2.281188431962995e-06,
      "loss": 0.0128,
      "step": 2362860
    },
    {
      "epoch": 3.866904944260063,
      "grad_norm": 0.2900003492832184,
      "learning_rate": 2.281122539749478e-06,
      "loss": 0.0107,
      "step": 2362880
    },
    {
      "epoch": 3.8669376746987165,
      "grad_norm": 0.7236825823783875,
      "learning_rate": 2.281056647535961e-06,
      "loss": 0.0146,
      "step": 2362900
    },
    {
      "epoch": 3.8669704051373697,
      "grad_norm": 0.11355633288621902,
      "learning_rate": 2.2809907553224437e-06,
      "loss": 0.0103,
      "step": 2362920
    },
    {
      "epoch": 3.867003135576023,
      "grad_norm": 0.4079922139644623,
      "learning_rate": 2.2809248631089265e-06,
      "loss": 0.0133,
      "step": 2362940
    },
    {
      "epoch": 3.8670358660146764,
      "grad_norm": 0.36928796768188477,
      "learning_rate": 2.2808589708954096e-06,
      "loss": 0.0118,
      "step": 2362960
    },
    {
      "epoch": 3.8670685964533296,
      "grad_norm": 0.04980463534593582,
      "learning_rate": 2.2807930786818924e-06,
      "loss": 0.0099,
      "step": 2362980
    },
    {
      "epoch": 3.867101326891983,
      "grad_norm": 0.26956042647361755,
      "learning_rate": 2.280727186468375e-06,
      "loss": 0.0109,
      "step": 2363000
    },
    {
      "epoch": 3.8671340573306363,
      "grad_norm": 0.680202841758728,
      "learning_rate": 2.2806612942548583e-06,
      "loss": 0.0075,
      "step": 2363020
    },
    {
      "epoch": 3.86716678776929,
      "grad_norm": 0.5074182748794556,
      "learning_rate": 2.280595402041341e-06,
      "loss": 0.0103,
      "step": 2363040
    },
    {
      "epoch": 3.867199518207943,
      "grad_norm": 0.48341676592826843,
      "learning_rate": 2.2805295098278237e-06,
      "loss": 0.0107,
      "step": 2363060
    },
    {
      "epoch": 3.867232248646596,
      "grad_norm": 0.3376811146736145,
      "learning_rate": 2.2804636176143065e-06,
      "loss": 0.0077,
      "step": 2363080
    },
    {
      "epoch": 3.86726497908525,
      "grad_norm": 0.21330130100250244,
      "learning_rate": 2.2803977254007896e-06,
      "loss": 0.0076,
      "step": 2363100
    },
    {
      "epoch": 3.867297709523903,
      "grad_norm": 0.07913998514413834,
      "learning_rate": 2.2803318331872724e-06,
      "loss": 0.0135,
      "step": 2363120
    },
    {
      "epoch": 3.8673304399625565,
      "grad_norm": 0.2562558352947235,
      "learning_rate": 2.2802659409737555e-06,
      "loss": 0.0105,
      "step": 2363140
    },
    {
      "epoch": 3.8673631704012097,
      "grad_norm": 0.22598573565483093,
      "learning_rate": 2.2802000487602383e-06,
      "loss": 0.0068,
      "step": 2363160
    },
    {
      "epoch": 3.8673959008398633,
      "grad_norm": 0.16260434687137604,
      "learning_rate": 2.280134156546721e-06,
      "loss": 0.009,
      "step": 2363180
    },
    {
      "epoch": 3.8674286312785164,
      "grad_norm": 0.5413272976875305,
      "learning_rate": 2.2800682643332038e-06,
      "loss": 0.0132,
      "step": 2363200
    },
    {
      "epoch": 3.8674613617171696,
      "grad_norm": 0.42444509267807007,
      "learning_rate": 2.280002372119687e-06,
      "loss": 0.0114,
      "step": 2363220
    },
    {
      "epoch": 3.867494092155823,
      "grad_norm": 0.1496933251619339,
      "learning_rate": 2.2799364799061697e-06,
      "loss": 0.0101,
      "step": 2363240
    },
    {
      "epoch": 3.8675268225944763,
      "grad_norm": 0.11880557984113693,
      "learning_rate": 2.2798705876926524e-06,
      "loss": 0.0134,
      "step": 2363260
    },
    {
      "epoch": 3.8675595530331295,
      "grad_norm": 0.40896326303482056,
      "learning_rate": 2.279804695479135e-06,
      "loss": 0.0113,
      "step": 2363280
    },
    {
      "epoch": 3.867592283471783,
      "grad_norm": 0.42807501554489136,
      "learning_rate": 2.2797388032656183e-06,
      "loss": 0.0092,
      "step": 2363300
    },
    {
      "epoch": 3.8676250139104367,
      "grad_norm": 0.2520773410797119,
      "learning_rate": 2.279672911052101e-06,
      "loss": 0.0069,
      "step": 2363320
    },
    {
      "epoch": 3.86765774434909,
      "grad_norm": 0.23809419572353363,
      "learning_rate": 2.2796070188385842e-06,
      "loss": 0.0134,
      "step": 2363340
    },
    {
      "epoch": 3.867690474787743,
      "grad_norm": 0.21679143607616425,
      "learning_rate": 2.279541126625067e-06,
      "loss": 0.0103,
      "step": 2363360
    },
    {
      "epoch": 3.8677232052263966,
      "grad_norm": 0.4537413418292999,
      "learning_rate": 2.2794752344115497e-06,
      "loss": 0.0123,
      "step": 2363380
    },
    {
      "epoch": 3.8677559356650497,
      "grad_norm": 0.38677018880844116,
      "learning_rate": 2.2794093421980324e-06,
      "loss": 0.01,
      "step": 2363400
    },
    {
      "epoch": 3.867788666103703,
      "grad_norm": 0.10739117115736008,
      "learning_rate": 2.2793434499845156e-06,
      "loss": 0.0108,
      "step": 2363420
    },
    {
      "epoch": 3.8678213965423565,
      "grad_norm": 0.2413085252046585,
      "learning_rate": 2.2792775577709983e-06,
      "loss": 0.0059,
      "step": 2363440
    },
    {
      "epoch": 3.86785412698101,
      "grad_norm": 0.1393299698829651,
      "learning_rate": 2.279211665557481e-06,
      "loss": 0.0123,
      "step": 2363460
    },
    {
      "epoch": 3.867886857419663,
      "grad_norm": 0.06035831943154335,
      "learning_rate": 2.2791457733439642e-06,
      "loss": 0.0123,
      "step": 2363480
    },
    {
      "epoch": 3.8679195878583164,
      "grad_norm": 0.12849946320056915,
      "learning_rate": 2.279079881130447e-06,
      "loss": 0.0092,
      "step": 2363500
    },
    {
      "epoch": 3.86795231829697,
      "grad_norm": 0.43517956137657166,
      "learning_rate": 2.2790139889169297e-06,
      "loss": 0.0113,
      "step": 2363520
    },
    {
      "epoch": 3.867985048735623,
      "grad_norm": 0.24746233224868774,
      "learning_rate": 2.278948096703413e-06,
      "loss": 0.0114,
      "step": 2363540
    },
    {
      "epoch": 3.8680177791742762,
      "grad_norm": 0.19462484121322632,
      "learning_rate": 2.2788822044898956e-06,
      "loss": 0.0094,
      "step": 2363560
    },
    {
      "epoch": 3.86805050961293,
      "grad_norm": 0.24001452326774597,
      "learning_rate": 2.2788163122763784e-06,
      "loss": 0.0145,
      "step": 2363580
    },
    {
      "epoch": 3.8680832400515834,
      "grad_norm": 0.6324217319488525,
      "learning_rate": 2.278750420062861e-06,
      "loss": 0.0106,
      "step": 2363600
    },
    {
      "epoch": 3.8681159704902366,
      "grad_norm": 1.0842353105545044,
      "learning_rate": 2.2786845278493443e-06,
      "loss": 0.0149,
      "step": 2363620
    },
    {
      "epoch": 3.8681487009288897,
      "grad_norm": 0.1696689873933792,
      "learning_rate": 2.278618635635827e-06,
      "loss": 0.0186,
      "step": 2363640
    },
    {
      "epoch": 3.8681814313675433,
      "grad_norm": 0.09087231755256653,
      "learning_rate": 2.27855274342231e-06,
      "loss": 0.0137,
      "step": 2363660
    },
    {
      "epoch": 3.8682141618061965,
      "grad_norm": 0.09392822533845901,
      "learning_rate": 2.278486851208793e-06,
      "loss": 0.0093,
      "step": 2363680
    },
    {
      "epoch": 3.8682468922448496,
      "grad_norm": 0.2966727018356323,
      "learning_rate": 2.2784209589952756e-06,
      "loss": 0.0113,
      "step": 2363700
    },
    {
      "epoch": 3.8682796226835032,
      "grad_norm": 0.09525887668132782,
      "learning_rate": 2.278355066781759e-06,
      "loss": 0.0114,
      "step": 2363720
    },
    {
      "epoch": 3.8683123531221564,
      "grad_norm": 0.48201116919517517,
      "learning_rate": 2.2782891745682416e-06,
      "loss": 0.02,
      "step": 2363740
    },
    {
      "epoch": 3.86834508356081,
      "grad_norm": 0.5215305685997009,
      "learning_rate": 2.2782232823547243e-06,
      "loss": 0.0097,
      "step": 2363760
    },
    {
      "epoch": 3.868377813999463,
      "grad_norm": 0.2748684883117676,
      "learning_rate": 2.278157390141207e-06,
      "loss": 0.007,
      "step": 2363780
    },
    {
      "epoch": 3.8684105444381167,
      "grad_norm": 0.07956859469413757,
      "learning_rate": 2.2780914979276898e-06,
      "loss": 0.0089,
      "step": 2363800
    },
    {
      "epoch": 3.86844327487677,
      "grad_norm": 0.35792502760887146,
      "learning_rate": 2.278025605714173e-06,
      "loss": 0.0148,
      "step": 2363820
    },
    {
      "epoch": 3.868476005315423,
      "grad_norm": 0.20547527074813843,
      "learning_rate": 2.277959713500656e-06,
      "loss": 0.0095,
      "step": 2363840
    },
    {
      "epoch": 3.8685087357540766,
      "grad_norm": 0.5602300763130188,
      "learning_rate": 2.277893821287139e-06,
      "loss": 0.0085,
      "step": 2363860
    },
    {
      "epoch": 3.8685414661927298,
      "grad_norm": 0.15721215307712555,
      "learning_rate": 2.2778279290736216e-06,
      "loss": 0.0127,
      "step": 2363880
    },
    {
      "epoch": 3.8685741966313834,
      "grad_norm": 0.1867251843214035,
      "learning_rate": 2.2777620368601043e-06,
      "loss": 0.0055,
      "step": 2363900
    },
    {
      "epoch": 3.8686069270700365,
      "grad_norm": 0.30515193939208984,
      "learning_rate": 2.2776961446465875e-06,
      "loss": 0.0145,
      "step": 2363920
    },
    {
      "epoch": 3.86863965750869,
      "grad_norm": 0.3333728313446045,
      "learning_rate": 2.2776302524330702e-06,
      "loss": 0.0138,
      "step": 2363940
    },
    {
      "epoch": 3.8686723879473432,
      "grad_norm": 0.23327943682670593,
      "learning_rate": 2.277564360219553e-06,
      "loss": 0.0103,
      "step": 2363960
    },
    {
      "epoch": 3.8687051183859964,
      "grad_norm": 0.1539757400751114,
      "learning_rate": 2.2774984680060357e-06,
      "loss": 0.0115,
      "step": 2363980
    },
    {
      "epoch": 3.86873784882465,
      "grad_norm": 0.20242686569690704,
      "learning_rate": 2.277432575792519e-06,
      "loss": 0.0096,
      "step": 2364000
    },
    {
      "epoch": 3.868770579263303,
      "grad_norm": 0.27438580989837646,
      "learning_rate": 2.2773666835790016e-06,
      "loss": 0.0073,
      "step": 2364020
    },
    {
      "epoch": 3.8688033097019567,
      "grad_norm": 0.21988050639629364,
      "learning_rate": 2.2773007913654848e-06,
      "loss": 0.0091,
      "step": 2364040
    },
    {
      "epoch": 3.86883604014061,
      "grad_norm": 0.4055030941963196,
      "learning_rate": 2.2772348991519675e-06,
      "loss": 0.0076,
      "step": 2364060
    },
    {
      "epoch": 3.8688687705792635,
      "grad_norm": 0.3063168525695801,
      "learning_rate": 2.2771690069384502e-06,
      "loss": 0.0134,
      "step": 2364080
    },
    {
      "epoch": 3.8689015010179166,
      "grad_norm": 0.2957008183002472,
      "learning_rate": 2.277103114724933e-06,
      "loss": 0.0121,
      "step": 2364100
    },
    {
      "epoch": 3.86893423145657,
      "grad_norm": 0.034594856202602386,
      "learning_rate": 2.277037222511416e-06,
      "loss": 0.0069,
      "step": 2364120
    },
    {
      "epoch": 3.8689669618952234,
      "grad_norm": 0.326528400182724,
      "learning_rate": 2.276971330297899e-06,
      "loss": 0.0094,
      "step": 2364140
    },
    {
      "epoch": 3.8689996923338765,
      "grad_norm": 0.07362142950296402,
      "learning_rate": 2.2769054380843816e-06,
      "loss": 0.0101,
      "step": 2364160
    },
    {
      "epoch": 3.86903242277253,
      "grad_norm": 0.2336214780807495,
      "learning_rate": 2.276839545870865e-06,
      "loss": 0.0142,
      "step": 2364180
    },
    {
      "epoch": 3.8690651532111833,
      "grad_norm": 0.26572108268737793,
      "learning_rate": 2.2767736536573475e-06,
      "loss": 0.0108,
      "step": 2364200
    },
    {
      "epoch": 3.869097883649837,
      "grad_norm": 0.6765769720077515,
      "learning_rate": 2.2767077614438303e-06,
      "loss": 0.0101,
      "step": 2364220
    },
    {
      "epoch": 3.86913061408849,
      "grad_norm": 0.3000147044658661,
      "learning_rate": 2.2766418692303134e-06,
      "loss": 0.0114,
      "step": 2364240
    },
    {
      "epoch": 3.869163344527143,
      "grad_norm": 0.6124870777130127,
      "learning_rate": 2.276575977016796e-06,
      "loss": 0.0085,
      "step": 2364260
    },
    {
      "epoch": 3.8691960749657968,
      "grad_norm": 0.5007818341255188,
      "learning_rate": 2.276510084803279e-06,
      "loss": 0.012,
      "step": 2364280
    },
    {
      "epoch": 3.86922880540445,
      "grad_norm": 0.483687162399292,
      "learning_rate": 2.2764441925897617e-06,
      "loss": 0.0191,
      "step": 2364300
    },
    {
      "epoch": 3.8692615358431035,
      "grad_norm": 0.6052073240280151,
      "learning_rate": 2.276378300376245e-06,
      "loss": 0.0109,
      "step": 2364320
    },
    {
      "epoch": 3.8692942662817567,
      "grad_norm": 0.6020929217338562,
      "learning_rate": 2.2763124081627276e-06,
      "loss": 0.0105,
      "step": 2364340
    },
    {
      "epoch": 3.8693269967204102,
      "grad_norm": 0.24044616520404816,
      "learning_rate": 2.2762465159492107e-06,
      "loss": 0.0116,
      "step": 2364360
    },
    {
      "epoch": 3.8693597271590634,
      "grad_norm": 0.40547245740890503,
      "learning_rate": 2.2761806237356935e-06,
      "loss": 0.0131,
      "step": 2364380
    },
    {
      "epoch": 3.8693924575977166,
      "grad_norm": 2.0949904918670654,
      "learning_rate": 2.276114731522176e-06,
      "loss": 0.0149,
      "step": 2364400
    },
    {
      "epoch": 3.86942518803637,
      "grad_norm": 0.28376439213752747,
      "learning_rate": 2.276048839308659e-06,
      "loss": 0.0062,
      "step": 2364420
    },
    {
      "epoch": 3.8694579184750233,
      "grad_norm": 0.3974476754665375,
      "learning_rate": 2.275982947095142e-06,
      "loss": 0.0132,
      "step": 2364440
    },
    {
      "epoch": 3.869490648913677,
      "grad_norm": 0.2832255959510803,
      "learning_rate": 2.275917054881625e-06,
      "loss": 0.0129,
      "step": 2364460
    },
    {
      "epoch": 3.86952337935233,
      "grad_norm": 0.3006170094013214,
      "learning_rate": 2.2758511626681076e-06,
      "loss": 0.0077,
      "step": 2364480
    },
    {
      "epoch": 3.8695561097909836,
      "grad_norm": 0.24752232432365417,
      "learning_rate": 2.2757852704545903e-06,
      "loss": 0.009,
      "step": 2364500
    },
    {
      "epoch": 3.869588840229637,
      "grad_norm": 0.07260393351316452,
      "learning_rate": 2.2757193782410735e-06,
      "loss": 0.0086,
      "step": 2364520
    },
    {
      "epoch": 3.86962157066829,
      "grad_norm": 0.13493959605693817,
      "learning_rate": 2.2756534860275562e-06,
      "loss": 0.0089,
      "step": 2364540
    },
    {
      "epoch": 3.8696543011069435,
      "grad_norm": 0.7396860122680664,
      "learning_rate": 2.2755875938140394e-06,
      "loss": 0.0126,
      "step": 2364560
    },
    {
      "epoch": 3.8696870315455967,
      "grad_norm": 0.3743552267551422,
      "learning_rate": 2.275521701600522e-06,
      "loss": 0.0093,
      "step": 2364580
    },
    {
      "epoch": 3.8697197619842503,
      "grad_norm": 0.44829702377319336,
      "learning_rate": 2.275455809387005e-06,
      "loss": 0.0142,
      "step": 2364600
    },
    {
      "epoch": 3.8697524924229034,
      "grad_norm": 0.10762781649827957,
      "learning_rate": 2.2753899171734876e-06,
      "loss": 0.012,
      "step": 2364620
    },
    {
      "epoch": 3.869785222861557,
      "grad_norm": 0.23238113522529602,
      "learning_rate": 2.2753240249599708e-06,
      "loss": 0.0131,
      "step": 2364640
    },
    {
      "epoch": 3.86981795330021,
      "grad_norm": 0.09090267866849899,
      "learning_rate": 2.2752581327464535e-06,
      "loss": 0.0097,
      "step": 2364660
    },
    {
      "epoch": 3.8698506837388633,
      "grad_norm": 0.20082710683345795,
      "learning_rate": 2.2751922405329362e-06,
      "loss": 0.0095,
      "step": 2364680
    },
    {
      "epoch": 3.869883414177517,
      "grad_norm": 1.264573335647583,
      "learning_rate": 2.275126348319419e-06,
      "loss": 0.013,
      "step": 2364700
    },
    {
      "epoch": 3.86991614461617,
      "grad_norm": 0.09701356291770935,
      "learning_rate": 2.275060456105902e-06,
      "loss": 0.0088,
      "step": 2364720
    },
    {
      "epoch": 3.869948875054823,
      "grad_norm": 0.40010178089141846,
      "learning_rate": 2.274994563892385e-06,
      "loss": 0.0113,
      "step": 2364740
    },
    {
      "epoch": 3.869981605493477,
      "grad_norm": 1.9911773204803467,
      "learning_rate": 2.274928671678868e-06,
      "loss": 0.0107,
      "step": 2364760
    },
    {
      "epoch": 3.8700143359321304,
      "grad_norm": 0.7595197558403015,
      "learning_rate": 2.274862779465351e-06,
      "loss": 0.0098,
      "step": 2364780
    },
    {
      "epoch": 3.8700470663707836,
      "grad_norm": 0.12442515045404434,
      "learning_rate": 2.2747968872518335e-06,
      "loss": 0.0079,
      "step": 2364800
    },
    {
      "epoch": 3.8700797968094367,
      "grad_norm": 0.11800517141819,
      "learning_rate": 2.2747309950383163e-06,
      "loss": 0.0081,
      "step": 2364820
    },
    {
      "epoch": 3.8701125272480903,
      "grad_norm": 0.28796613216400146,
      "learning_rate": 2.2746651028247994e-06,
      "loss": 0.0161,
      "step": 2364840
    },
    {
      "epoch": 3.8701452576867434,
      "grad_norm": 0.146060049533844,
      "learning_rate": 2.274599210611282e-06,
      "loss": 0.0083,
      "step": 2364860
    },
    {
      "epoch": 3.8701779881253966,
      "grad_norm": 0.0986754447221756,
      "learning_rate": 2.2745333183977653e-06,
      "loss": 0.0127,
      "step": 2364880
    },
    {
      "epoch": 3.87021071856405,
      "grad_norm": 0.8439369797706604,
      "learning_rate": 2.274467426184248e-06,
      "loss": 0.0113,
      "step": 2364900
    },
    {
      "epoch": 3.870243449002704,
      "grad_norm": 0.06422441452741623,
      "learning_rate": 2.274401533970731e-06,
      "loss": 0.0102,
      "step": 2364920
    },
    {
      "epoch": 3.870276179441357,
      "grad_norm": 0.3169707953929901,
      "learning_rate": 2.274335641757214e-06,
      "loss": 0.0075,
      "step": 2364940
    },
    {
      "epoch": 3.87030890988001,
      "grad_norm": 0.4316486418247223,
      "learning_rate": 2.2742697495436967e-06,
      "loss": 0.0156,
      "step": 2364960
    },
    {
      "epoch": 3.8703416403186637,
      "grad_norm": 0.1875259429216385,
      "learning_rate": 2.2742038573301795e-06,
      "loss": 0.0132,
      "step": 2364980
    },
    {
      "epoch": 3.870374370757317,
      "grad_norm": 0.19470424950122833,
      "learning_rate": 2.274137965116662e-06,
      "loss": 0.0109,
      "step": 2365000
    },
    {
      "epoch": 3.87040710119597,
      "grad_norm": 0.14919526875019073,
      "learning_rate": 2.274072072903145e-06,
      "loss": 0.0117,
      "step": 2365020
    },
    {
      "epoch": 3.8704398316346236,
      "grad_norm": 0.2717396020889282,
      "learning_rate": 2.274006180689628e-06,
      "loss": 0.0073,
      "step": 2365040
    },
    {
      "epoch": 3.870472562073277,
      "grad_norm": 0.04196053743362427,
      "learning_rate": 2.2739402884761113e-06,
      "loss": 0.0106,
      "step": 2365060
    },
    {
      "epoch": 3.8705052925119303,
      "grad_norm": 0.11985933780670166,
      "learning_rate": 2.273874396262594e-06,
      "loss": 0.008,
      "step": 2365080
    },
    {
      "epoch": 3.8705380229505835,
      "grad_norm": 0.14383193850517273,
      "learning_rate": 2.2738085040490767e-06,
      "loss": 0.0116,
      "step": 2365100
    },
    {
      "epoch": 3.870570753389237,
      "grad_norm": 0.27866825461387634,
      "learning_rate": 2.2737426118355595e-06,
      "loss": 0.0115,
      "step": 2365120
    },
    {
      "epoch": 3.87060348382789,
      "grad_norm": 0.1715995967388153,
      "learning_rate": 2.2736767196220427e-06,
      "loss": 0.0061,
      "step": 2365140
    },
    {
      "epoch": 3.8706362142665434,
      "grad_norm": 0.4718250036239624,
      "learning_rate": 2.2736108274085254e-06,
      "loss": 0.009,
      "step": 2365160
    },
    {
      "epoch": 3.870668944705197,
      "grad_norm": 0.18126247823238373,
      "learning_rate": 2.273544935195008e-06,
      "loss": 0.0076,
      "step": 2365180
    },
    {
      "epoch": 3.8707016751438506,
      "grad_norm": 0.24837806820869446,
      "learning_rate": 2.273479042981491e-06,
      "loss": 0.0094,
      "step": 2365200
    },
    {
      "epoch": 3.8707344055825037,
      "grad_norm": 0.1874985545873642,
      "learning_rate": 2.2734131507679736e-06,
      "loss": 0.0083,
      "step": 2365220
    },
    {
      "epoch": 3.870767136021157,
      "grad_norm": 0.08256624639034271,
      "learning_rate": 2.2733472585544568e-06,
      "loss": 0.0088,
      "step": 2365240
    },
    {
      "epoch": 3.8707998664598104,
      "grad_norm": 0.2029624730348587,
      "learning_rate": 2.27328136634094e-06,
      "loss": 0.0103,
      "step": 2365260
    },
    {
      "epoch": 3.8708325968984636,
      "grad_norm": 0.8058137893676758,
      "learning_rate": 2.2732154741274227e-06,
      "loss": 0.01,
      "step": 2365280
    },
    {
      "epoch": 3.8708653273371167,
      "grad_norm": 0.3847551643848419,
      "learning_rate": 2.2731495819139054e-06,
      "loss": 0.0101,
      "step": 2365300
    },
    {
      "epoch": 3.8708980577757703,
      "grad_norm": 0.4350874722003937,
      "learning_rate": 2.273083689700388e-06,
      "loss": 0.0128,
      "step": 2365320
    },
    {
      "epoch": 3.8709307882144235,
      "grad_norm": 0.2825809717178345,
      "learning_rate": 2.2730177974868713e-06,
      "loss": 0.0066,
      "step": 2365340
    },
    {
      "epoch": 3.870963518653077,
      "grad_norm": 0.3510264754295349,
      "learning_rate": 2.272951905273354e-06,
      "loss": 0.0112,
      "step": 2365360
    },
    {
      "epoch": 3.8709962490917302,
      "grad_norm": 0.20453231036663055,
      "learning_rate": 2.272886013059837e-06,
      "loss": 0.0105,
      "step": 2365380
    },
    {
      "epoch": 3.871028979530384,
      "grad_norm": 0.35729268193244934,
      "learning_rate": 2.2728201208463195e-06,
      "loss": 0.0138,
      "step": 2365400
    },
    {
      "epoch": 3.871061709969037,
      "grad_norm": 0.214079350233078,
      "learning_rate": 2.2727542286328027e-06,
      "loss": 0.0083,
      "step": 2365420
    },
    {
      "epoch": 3.87109444040769,
      "grad_norm": 0.10892407596111298,
      "learning_rate": 2.2726883364192854e-06,
      "loss": 0.0098,
      "step": 2365440
    },
    {
      "epoch": 3.8711271708463437,
      "grad_norm": 0.10488662868738174,
      "learning_rate": 2.2726224442057686e-06,
      "loss": 0.0092,
      "step": 2365460
    },
    {
      "epoch": 3.871159901284997,
      "grad_norm": 0.1779399812221527,
      "learning_rate": 2.2725565519922513e-06,
      "loss": 0.0112,
      "step": 2365480
    },
    {
      "epoch": 3.8711926317236505,
      "grad_norm": 0.14852337539196014,
      "learning_rate": 2.272490659778734e-06,
      "loss": 0.0102,
      "step": 2365500
    },
    {
      "epoch": 3.8712253621623036,
      "grad_norm": 0.2424481362104416,
      "learning_rate": 2.272424767565217e-06,
      "loss": 0.0093,
      "step": 2365520
    },
    {
      "epoch": 3.871258092600957,
      "grad_norm": 0.08571844547986984,
      "learning_rate": 2.2723588753517e-06,
      "loss": 0.0144,
      "step": 2365540
    },
    {
      "epoch": 3.8712908230396104,
      "grad_norm": 0.15111666917800903,
      "learning_rate": 2.2722929831381827e-06,
      "loss": 0.0126,
      "step": 2365560
    },
    {
      "epoch": 3.8713235534782635,
      "grad_norm": 0.3572767674922943,
      "learning_rate": 2.2722270909246655e-06,
      "loss": 0.0101,
      "step": 2365580
    },
    {
      "epoch": 3.871356283916917,
      "grad_norm": 1.476717472076416,
      "learning_rate": 2.2721611987111486e-06,
      "loss": 0.0102,
      "step": 2365600
    },
    {
      "epoch": 3.8713890143555703,
      "grad_norm": 0.3698793649673462,
      "learning_rate": 2.2720953064976314e-06,
      "loss": 0.0059,
      "step": 2365620
    },
    {
      "epoch": 3.871421744794224,
      "grad_norm": 0.11243100464344025,
      "learning_rate": 2.272029414284114e-06,
      "loss": 0.0096,
      "step": 2365640
    },
    {
      "epoch": 3.871454475232877,
      "grad_norm": 0.33566975593566895,
      "learning_rate": 2.2719635220705973e-06,
      "loss": 0.0082,
      "step": 2365660
    },
    {
      "epoch": 3.8714872056715306,
      "grad_norm": 0.16079579293727875,
      "learning_rate": 2.27189762985708e-06,
      "loss": 0.0089,
      "step": 2365680
    },
    {
      "epoch": 3.8715199361101837,
      "grad_norm": 0.0704197958111763,
      "learning_rate": 2.2718317376435628e-06,
      "loss": 0.0101,
      "step": 2365700
    },
    {
      "epoch": 3.871552666548837,
      "grad_norm": 0.2757987976074219,
      "learning_rate": 2.2717658454300455e-06,
      "loss": 0.0106,
      "step": 2365720
    },
    {
      "epoch": 3.8715853969874905,
      "grad_norm": 0.48818665742874146,
      "learning_rate": 2.2716999532165287e-06,
      "loss": 0.0101,
      "step": 2365740
    },
    {
      "epoch": 3.8716181274261436,
      "grad_norm": 0.20386186242103577,
      "learning_rate": 2.2716340610030114e-06,
      "loss": 0.0193,
      "step": 2365760
    },
    {
      "epoch": 3.8716508578647972,
      "grad_norm": 0.19104516506195068,
      "learning_rate": 2.2715681687894946e-06,
      "loss": 0.0096,
      "step": 2365780
    },
    {
      "epoch": 3.8716835883034504,
      "grad_norm": 0.070330411195755,
      "learning_rate": 2.2715022765759773e-06,
      "loss": 0.013,
      "step": 2365800
    },
    {
      "epoch": 3.871716318742104,
      "grad_norm": 0.11691482365131378,
      "learning_rate": 2.27143638436246e-06,
      "loss": 0.0102,
      "step": 2365820
    },
    {
      "epoch": 3.871749049180757,
      "grad_norm": 0.1038505956530571,
      "learning_rate": 2.2713704921489428e-06,
      "loss": 0.0058,
      "step": 2365840
    },
    {
      "epoch": 3.8717817796194103,
      "grad_norm": 0.32221469283103943,
      "learning_rate": 2.271304599935426e-06,
      "loss": 0.0112,
      "step": 2365860
    },
    {
      "epoch": 3.871814510058064,
      "grad_norm": 0.06546350568532944,
      "learning_rate": 2.2712387077219087e-06,
      "loss": 0.0078,
      "step": 2365880
    },
    {
      "epoch": 3.871847240496717,
      "grad_norm": 0.07519299536943436,
      "learning_rate": 2.2711728155083914e-06,
      "loss": 0.0111,
      "step": 2365900
    },
    {
      "epoch": 3.8718799709353706,
      "grad_norm": 0.08441442251205444,
      "learning_rate": 2.271106923294874e-06,
      "loss": 0.0125,
      "step": 2365920
    },
    {
      "epoch": 3.8719127013740238,
      "grad_norm": 0.22609367966651917,
      "learning_rate": 2.2710410310813573e-06,
      "loss": 0.0078,
      "step": 2365940
    },
    {
      "epoch": 3.8719454318126774,
      "grad_norm": 0.47567877173423767,
      "learning_rate": 2.27097513886784e-06,
      "loss": 0.0075,
      "step": 2365960
    },
    {
      "epoch": 3.8719781622513305,
      "grad_norm": 0.9414736032485962,
      "learning_rate": 2.2709092466543232e-06,
      "loss": 0.0087,
      "step": 2365980
    },
    {
      "epoch": 3.8720108926899837,
      "grad_norm": 0.2490077167749405,
      "learning_rate": 2.270843354440806e-06,
      "loss": 0.0162,
      "step": 2366000
    },
    {
      "epoch": 3.8720436231286373,
      "grad_norm": 0.6015007495880127,
      "learning_rate": 2.2707774622272887e-06,
      "loss": 0.0158,
      "step": 2366020
    },
    {
      "epoch": 3.8720763535672904,
      "grad_norm": 0.259085088968277,
      "learning_rate": 2.2707115700137714e-06,
      "loss": 0.0097,
      "step": 2366040
    },
    {
      "epoch": 3.872109084005944,
      "grad_norm": 0.16926948726177216,
      "learning_rate": 2.2706456778002546e-06,
      "loss": 0.0105,
      "step": 2366060
    },
    {
      "epoch": 3.872141814444597,
      "grad_norm": 0.3572752773761749,
      "learning_rate": 2.2705797855867373e-06,
      "loss": 0.0099,
      "step": 2366080
    },
    {
      "epoch": 3.8721745448832507,
      "grad_norm": 0.4430800974369049,
      "learning_rate": 2.27051389337322e-06,
      "loss": 0.0106,
      "step": 2366100
    },
    {
      "epoch": 3.872207275321904,
      "grad_norm": 0.3050707280635834,
      "learning_rate": 2.2704480011597033e-06,
      "loss": 0.0179,
      "step": 2366120
    },
    {
      "epoch": 3.872240005760557,
      "grad_norm": 0.3238723576068878,
      "learning_rate": 2.270382108946186e-06,
      "loss": 0.005,
      "step": 2366140
    },
    {
      "epoch": 3.8722727361992106,
      "grad_norm": 0.14439818263053894,
      "learning_rate": 2.270316216732669e-06,
      "loss": 0.0172,
      "step": 2366160
    },
    {
      "epoch": 3.872305466637864,
      "grad_norm": 0.20784124732017517,
      "learning_rate": 2.270250324519152e-06,
      "loss": 0.0119,
      "step": 2366180
    },
    {
      "epoch": 3.8723381970765174,
      "grad_norm": 0.1826670616865158,
      "learning_rate": 2.2701844323056346e-06,
      "loss": 0.0086,
      "step": 2366200
    },
    {
      "epoch": 3.8723709275151705,
      "grad_norm": 0.2470792829990387,
      "learning_rate": 2.2701185400921174e-06,
      "loss": 0.0122,
      "step": 2366220
    },
    {
      "epoch": 3.872403657953824,
      "grad_norm": 0.43095657229423523,
      "learning_rate": 2.2700526478786e-06,
      "loss": 0.0167,
      "step": 2366240
    },
    {
      "epoch": 3.8724363883924773,
      "grad_norm": 0.1568831503391266,
      "learning_rate": 2.2699867556650833e-06,
      "loss": 0.0137,
      "step": 2366260
    },
    {
      "epoch": 3.8724691188311304,
      "grad_norm": 0.31236860156059265,
      "learning_rate": 2.269920863451566e-06,
      "loss": 0.0125,
      "step": 2366280
    },
    {
      "epoch": 3.872501849269784,
      "grad_norm": 0.21070437133312225,
      "learning_rate": 2.269854971238049e-06,
      "loss": 0.0093,
      "step": 2366300
    },
    {
      "epoch": 3.872534579708437,
      "grad_norm": 0.37395545840263367,
      "learning_rate": 2.269789079024532e-06,
      "loss": 0.0089,
      "step": 2366320
    },
    {
      "epoch": 3.8725673101470903,
      "grad_norm": 0.12266751378774643,
      "learning_rate": 2.2697231868110147e-06,
      "loss": 0.006,
      "step": 2366340
    },
    {
      "epoch": 3.872600040585744,
      "grad_norm": 0.22672204673290253,
      "learning_rate": 2.269657294597498e-06,
      "loss": 0.0087,
      "step": 2366360
    },
    {
      "epoch": 3.8726327710243975,
      "grad_norm": 0.04386923834681511,
      "learning_rate": 2.2695914023839806e-06,
      "loss": 0.0118,
      "step": 2366380
    },
    {
      "epoch": 3.8726655014630507,
      "grad_norm": 0.5386331081390381,
      "learning_rate": 2.2695255101704633e-06,
      "loss": 0.0085,
      "step": 2366400
    },
    {
      "epoch": 3.872698231901704,
      "grad_norm": 0.37498077750205994,
      "learning_rate": 2.269459617956946e-06,
      "loss": 0.0117,
      "step": 2366420
    },
    {
      "epoch": 3.8727309623403574,
      "grad_norm": 0.14529792964458466,
      "learning_rate": 2.2693937257434288e-06,
      "loss": 0.0091,
      "step": 2366440
    },
    {
      "epoch": 3.8727636927790106,
      "grad_norm": 0.13924360275268555,
      "learning_rate": 2.269327833529912e-06,
      "loss": 0.011,
      "step": 2366460
    },
    {
      "epoch": 3.8727964232176637,
      "grad_norm": 0.16755127906799316,
      "learning_rate": 2.269261941316395e-06,
      "loss": 0.0116,
      "step": 2366480
    },
    {
      "epoch": 3.8728291536563173,
      "grad_norm": 0.13174867630004883,
      "learning_rate": 2.269196049102878e-06,
      "loss": 0.0136,
      "step": 2366500
    },
    {
      "epoch": 3.872861884094971,
      "grad_norm": 0.14584733545780182,
      "learning_rate": 2.2691301568893606e-06,
      "loss": 0.0083,
      "step": 2366520
    },
    {
      "epoch": 3.872894614533624,
      "grad_norm": 0.3043953478336334,
      "learning_rate": 2.2690642646758433e-06,
      "loss": 0.0111,
      "step": 2366540
    },
    {
      "epoch": 3.872927344972277,
      "grad_norm": 0.12183958292007446,
      "learning_rate": 2.2689983724623265e-06,
      "loss": 0.012,
      "step": 2366560
    },
    {
      "epoch": 3.872960075410931,
      "grad_norm": 0.2946297228336334,
      "learning_rate": 2.2689324802488092e-06,
      "loss": 0.0091,
      "step": 2366580
    },
    {
      "epoch": 3.872992805849584,
      "grad_norm": 0.15765754878520966,
      "learning_rate": 2.268866588035292e-06,
      "loss": 0.0136,
      "step": 2366600
    },
    {
      "epoch": 3.873025536288237,
      "grad_norm": 0.1896544247865677,
      "learning_rate": 2.2688006958217747e-06,
      "loss": 0.0135,
      "step": 2366620
    },
    {
      "epoch": 3.8730582667268907,
      "grad_norm": 0.23447144031524658,
      "learning_rate": 2.268734803608258e-06,
      "loss": 0.0095,
      "step": 2366640
    },
    {
      "epoch": 3.8730909971655443,
      "grad_norm": 0.16173918545246124,
      "learning_rate": 2.2686689113947406e-06,
      "loss": 0.013,
      "step": 2366660
    },
    {
      "epoch": 3.8731237276041974,
      "grad_norm": 0.08676686137914658,
      "learning_rate": 2.2686030191812238e-06,
      "loss": 0.006,
      "step": 2366680
    },
    {
      "epoch": 3.8731564580428506,
      "grad_norm": 0.2351006418466568,
      "learning_rate": 2.2685371269677065e-06,
      "loss": 0.009,
      "step": 2366700
    },
    {
      "epoch": 3.873189188481504,
      "grad_norm": 0.11477330327033997,
      "learning_rate": 2.2684712347541893e-06,
      "loss": 0.0113,
      "step": 2366720
    },
    {
      "epoch": 3.8732219189201573,
      "grad_norm": 0.10147509723901749,
      "learning_rate": 2.268405342540672e-06,
      "loss": 0.0099,
      "step": 2366740
    },
    {
      "epoch": 3.8732546493588105,
      "grad_norm": 0.0962471216917038,
      "learning_rate": 2.268339450327155e-06,
      "loss": 0.0115,
      "step": 2366760
    },
    {
      "epoch": 3.873287379797464,
      "grad_norm": 0.29098373651504517,
      "learning_rate": 2.268273558113638e-06,
      "loss": 0.0097,
      "step": 2366780
    },
    {
      "epoch": 3.873320110236117,
      "grad_norm": 0.147064208984375,
      "learning_rate": 2.2682076659001206e-06,
      "loss": 0.0127,
      "step": 2366800
    },
    {
      "epoch": 3.873352840674771,
      "grad_norm": 0.1091131642460823,
      "learning_rate": 2.268141773686604e-06,
      "loss": 0.0113,
      "step": 2366820
    },
    {
      "epoch": 3.873385571113424,
      "grad_norm": 0.3814544081687927,
      "learning_rate": 2.2680758814730865e-06,
      "loss": 0.0117,
      "step": 2366840
    },
    {
      "epoch": 3.8734183015520776,
      "grad_norm": 0.03906853497028351,
      "learning_rate": 2.2680099892595693e-06,
      "loss": 0.0106,
      "step": 2366860
    },
    {
      "epoch": 3.8734510319907307,
      "grad_norm": 0.25110679864883423,
      "learning_rate": 2.2679440970460524e-06,
      "loss": 0.0117,
      "step": 2366880
    },
    {
      "epoch": 3.873483762429384,
      "grad_norm": 1.716185212135315,
      "learning_rate": 2.267878204832535e-06,
      "loss": 0.0103,
      "step": 2366900
    },
    {
      "epoch": 3.8735164928680375,
      "grad_norm": 0.20510448515415192,
      "learning_rate": 2.267812312619018e-06,
      "loss": 0.0144,
      "step": 2366920
    },
    {
      "epoch": 3.8735492233066906,
      "grad_norm": 0.23835597932338715,
      "learning_rate": 2.2677464204055007e-06,
      "loss": 0.0093,
      "step": 2366940
    },
    {
      "epoch": 3.873581953745344,
      "grad_norm": 0.23754647374153137,
      "learning_rate": 2.267680528191984e-06,
      "loss": 0.0097,
      "step": 2366960
    },
    {
      "epoch": 3.8736146841839973,
      "grad_norm": 0.18387538194656372,
      "learning_rate": 2.2676146359784666e-06,
      "loss": 0.0123,
      "step": 2366980
    },
    {
      "epoch": 3.873647414622651,
      "grad_norm": 0.21843492984771729,
      "learning_rate": 2.2675487437649497e-06,
      "loss": 0.0078,
      "step": 2367000
    },
    {
      "epoch": 3.873680145061304,
      "grad_norm": 0.15029454231262207,
      "learning_rate": 2.2674828515514325e-06,
      "loss": 0.0092,
      "step": 2367020
    },
    {
      "epoch": 3.8737128754999572,
      "grad_norm": 0.13915003836154938,
      "learning_rate": 2.267416959337915e-06,
      "loss": 0.0114,
      "step": 2367040
    },
    {
      "epoch": 3.873745605938611,
      "grad_norm": 0.20900879800319672,
      "learning_rate": 2.267351067124398e-06,
      "loss": 0.0069,
      "step": 2367060
    },
    {
      "epoch": 3.873778336377264,
      "grad_norm": 0.6162710785865784,
      "learning_rate": 2.267285174910881e-06,
      "loss": 0.0109,
      "step": 2367080
    },
    {
      "epoch": 3.8738110668159176,
      "grad_norm": 0.21396887302398682,
      "learning_rate": 2.267219282697364e-06,
      "loss": 0.02,
      "step": 2367100
    },
    {
      "epoch": 3.8738437972545707,
      "grad_norm": 0.3801412284374237,
      "learning_rate": 2.2671533904838466e-06,
      "loss": 0.0101,
      "step": 2367120
    },
    {
      "epoch": 3.8738765276932243,
      "grad_norm": 0.11191323399543762,
      "learning_rate": 2.2670874982703293e-06,
      "loss": 0.0123,
      "step": 2367140
    },
    {
      "epoch": 3.8739092581318775,
      "grad_norm": 0.1343315988779068,
      "learning_rate": 2.2670216060568125e-06,
      "loss": 0.0064,
      "step": 2367160
    },
    {
      "epoch": 3.8739419885705306,
      "grad_norm": 0.10613169521093369,
      "learning_rate": 2.2669557138432952e-06,
      "loss": 0.0111,
      "step": 2367180
    },
    {
      "epoch": 3.873974719009184,
      "grad_norm": 0.12607550621032715,
      "learning_rate": 2.2668898216297784e-06,
      "loss": 0.0107,
      "step": 2367200
    },
    {
      "epoch": 3.8740074494478374,
      "grad_norm": 0.023130711168050766,
      "learning_rate": 2.266823929416261e-06,
      "loss": 0.0094,
      "step": 2367220
    },
    {
      "epoch": 3.874040179886491,
      "grad_norm": 0.19889701902866364,
      "learning_rate": 2.266758037202744e-06,
      "loss": 0.0135,
      "step": 2367240
    },
    {
      "epoch": 3.874072910325144,
      "grad_norm": 0.15609431266784668,
      "learning_rate": 2.2666921449892266e-06,
      "loss": 0.0097,
      "step": 2367260
    },
    {
      "epoch": 3.8741056407637977,
      "grad_norm": 0.23550079762935638,
      "learning_rate": 2.2666262527757098e-06,
      "loss": 0.0152,
      "step": 2367280
    },
    {
      "epoch": 3.874138371202451,
      "grad_norm": 0.37795206904411316,
      "learning_rate": 2.2665603605621925e-06,
      "loss": 0.0093,
      "step": 2367300
    },
    {
      "epoch": 3.874171101641104,
      "grad_norm": 0.20740213990211487,
      "learning_rate": 2.2664944683486753e-06,
      "loss": 0.0113,
      "step": 2367320
    },
    {
      "epoch": 3.8742038320797576,
      "grad_norm": 0.4470623731613159,
      "learning_rate": 2.2664285761351584e-06,
      "loss": 0.0094,
      "step": 2367340
    },
    {
      "epoch": 3.8742365625184108,
      "grad_norm": 0.3951262831687927,
      "learning_rate": 2.266362683921641e-06,
      "loss": 0.0153,
      "step": 2367360
    },
    {
      "epoch": 3.8742692929570643,
      "grad_norm": 0.3101314604282379,
      "learning_rate": 2.266296791708124e-06,
      "loss": 0.0091,
      "step": 2367380
    },
    {
      "epoch": 3.8743020233957175,
      "grad_norm": 0.29981938004493713,
      "learning_rate": 2.266230899494607e-06,
      "loss": 0.0124,
      "step": 2367400
    },
    {
      "epoch": 3.874334753834371,
      "grad_norm": 0.2768225073814392,
      "learning_rate": 2.26616500728109e-06,
      "loss": 0.0159,
      "step": 2367420
    },
    {
      "epoch": 3.8743674842730242,
      "grad_norm": 0.3063315451145172,
      "learning_rate": 2.2660991150675725e-06,
      "loss": 0.0098,
      "step": 2367440
    },
    {
      "epoch": 3.8744002147116774,
      "grad_norm": 0.9404512643814087,
      "learning_rate": 2.2660332228540553e-06,
      "loss": 0.0114,
      "step": 2367460
    },
    {
      "epoch": 3.874432945150331,
      "grad_norm": 0.35400059819221497,
      "learning_rate": 2.2659673306405384e-06,
      "loss": 0.0123,
      "step": 2367480
    },
    {
      "epoch": 3.874465675588984,
      "grad_norm": 0.7392861247062683,
      "learning_rate": 2.265901438427021e-06,
      "loss": 0.0133,
      "step": 2367500
    },
    {
      "epoch": 3.8744984060276377,
      "grad_norm": 0.15853483974933624,
      "learning_rate": 2.2658355462135044e-06,
      "loss": 0.0102,
      "step": 2367520
    },
    {
      "epoch": 3.874531136466291,
      "grad_norm": 0.2241610437631607,
      "learning_rate": 2.265769653999987e-06,
      "loss": 0.0105,
      "step": 2367540
    },
    {
      "epoch": 3.8745638669049445,
      "grad_norm": 0.251461386680603,
      "learning_rate": 2.26570376178647e-06,
      "loss": 0.0101,
      "step": 2367560
    },
    {
      "epoch": 3.8745965973435976,
      "grad_norm": 0.17907185852527618,
      "learning_rate": 2.265637869572953e-06,
      "loss": 0.0096,
      "step": 2367580
    },
    {
      "epoch": 3.8746293277822508,
      "grad_norm": 0.2501550018787384,
      "learning_rate": 2.2655719773594357e-06,
      "loss": 0.0119,
      "step": 2367600
    },
    {
      "epoch": 3.8746620582209044,
      "grad_norm": 0.055210839956998825,
      "learning_rate": 2.2655060851459185e-06,
      "loss": 0.0086,
      "step": 2367620
    },
    {
      "epoch": 3.8746947886595575,
      "grad_norm": 0.251847505569458,
      "learning_rate": 2.2654401929324012e-06,
      "loss": 0.0088,
      "step": 2367640
    },
    {
      "epoch": 3.874727519098211,
      "grad_norm": 0.22615426778793335,
      "learning_rate": 2.265374300718884e-06,
      "loss": 0.0133,
      "step": 2367660
    },
    {
      "epoch": 3.8747602495368643,
      "grad_norm": 0.22787366807460785,
      "learning_rate": 2.265308408505367e-06,
      "loss": 0.0112,
      "step": 2367680
    },
    {
      "epoch": 3.874792979975518,
      "grad_norm": 0.19067290425300598,
      "learning_rate": 2.2652425162918503e-06,
      "loss": 0.0069,
      "step": 2367700
    },
    {
      "epoch": 3.874825710414171,
      "grad_norm": 0.14049488306045532,
      "learning_rate": 2.265176624078333e-06,
      "loss": 0.0069,
      "step": 2367720
    },
    {
      "epoch": 3.874858440852824,
      "grad_norm": 0.23634018003940582,
      "learning_rate": 2.2651107318648158e-06,
      "loss": 0.0107,
      "step": 2367740
    },
    {
      "epoch": 3.8748911712914778,
      "grad_norm": 0.1171417310833931,
      "learning_rate": 2.2650448396512985e-06,
      "loss": 0.0125,
      "step": 2367760
    },
    {
      "epoch": 3.874923901730131,
      "grad_norm": 0.2987683415412903,
      "learning_rate": 2.2649789474377817e-06,
      "loss": 0.0107,
      "step": 2367780
    },
    {
      "epoch": 3.874956632168784,
      "grad_norm": 0.20756448805332184,
      "learning_rate": 2.2649130552242644e-06,
      "loss": 0.0131,
      "step": 2367800
    },
    {
      "epoch": 3.8749893626074376,
      "grad_norm": 0.2620333135128021,
      "learning_rate": 2.264847163010747e-06,
      "loss": 0.0074,
      "step": 2367820
    },
    {
      "epoch": 3.8750220930460912,
      "grad_norm": 0.14459024369716644,
      "learning_rate": 2.26478127079723e-06,
      "loss": 0.016,
      "step": 2367840
    },
    {
      "epoch": 3.8750548234847444,
      "grad_norm": 0.41517990827560425,
      "learning_rate": 2.2647153785837126e-06,
      "loss": 0.0089,
      "step": 2367860
    },
    {
      "epoch": 3.8750875539233975,
      "grad_norm": 0.570172905921936,
      "learning_rate": 2.2646494863701958e-06,
      "loss": 0.0097,
      "step": 2367880
    },
    {
      "epoch": 3.875120284362051,
      "grad_norm": 0.07410205900669098,
      "learning_rate": 2.264583594156679e-06,
      "loss": 0.0086,
      "step": 2367900
    },
    {
      "epoch": 3.8751530148007043,
      "grad_norm": 0.4225596785545349,
      "learning_rate": 2.2645177019431617e-06,
      "loss": 0.0109,
      "step": 2367920
    },
    {
      "epoch": 3.8751857452393574,
      "grad_norm": 0.30432164669036865,
      "learning_rate": 2.2644518097296444e-06,
      "loss": 0.0107,
      "step": 2367940
    },
    {
      "epoch": 3.875218475678011,
      "grad_norm": 0.5702047944068909,
      "learning_rate": 2.264385917516127e-06,
      "loss": 0.0097,
      "step": 2367960
    },
    {
      "epoch": 3.8752512061166646,
      "grad_norm": 0.8807928562164307,
      "learning_rate": 2.2643200253026103e-06,
      "loss": 0.0145,
      "step": 2367980
    },
    {
      "epoch": 3.8752839365553178,
      "grad_norm": 0.10122821480035782,
      "learning_rate": 2.264254133089093e-06,
      "loss": 0.0131,
      "step": 2368000
    },
    {
      "epoch": 3.875316666993971,
      "grad_norm": 0.33341914415359497,
      "learning_rate": 2.264188240875576e-06,
      "loss": 0.01,
      "step": 2368020
    },
    {
      "epoch": 3.8753493974326245,
      "grad_norm": 0.12964411079883575,
      "learning_rate": 2.2641223486620585e-06,
      "loss": 0.0096,
      "step": 2368040
    },
    {
      "epoch": 3.8753821278712777,
      "grad_norm": 0.18004226684570312,
      "learning_rate": 2.2640564564485417e-06,
      "loss": 0.0078,
      "step": 2368060
    },
    {
      "epoch": 3.875414858309931,
      "grad_norm": 0.17851653695106506,
      "learning_rate": 2.2639905642350245e-06,
      "loss": 0.0148,
      "step": 2368080
    },
    {
      "epoch": 3.8754475887485844,
      "grad_norm": 0.7648863196372986,
      "learning_rate": 2.2639246720215076e-06,
      "loss": 0.0144,
      "step": 2368100
    },
    {
      "epoch": 3.875480319187238,
      "grad_norm": 0.44548192620277405,
      "learning_rate": 2.2638587798079904e-06,
      "loss": 0.0164,
      "step": 2368120
    },
    {
      "epoch": 3.875513049625891,
      "grad_norm": 0.22289495170116425,
      "learning_rate": 2.263792887594473e-06,
      "loss": 0.0082,
      "step": 2368140
    },
    {
      "epoch": 3.8755457800645443,
      "grad_norm": 0.2831575870513916,
      "learning_rate": 2.263726995380956e-06,
      "loss": 0.0108,
      "step": 2368160
    },
    {
      "epoch": 3.875578510503198,
      "grad_norm": 0.203856959939003,
      "learning_rate": 2.263661103167439e-06,
      "loss": 0.0084,
      "step": 2368180
    },
    {
      "epoch": 3.875611240941851,
      "grad_norm": 0.582990288734436,
      "learning_rate": 2.2635952109539217e-06,
      "loss": 0.0097,
      "step": 2368200
    },
    {
      "epoch": 3.875643971380504,
      "grad_norm": 0.2941393554210663,
      "learning_rate": 2.263529318740405e-06,
      "loss": 0.0078,
      "step": 2368220
    },
    {
      "epoch": 3.875676701819158,
      "grad_norm": 0.33312898874282837,
      "learning_rate": 2.2634634265268876e-06,
      "loss": 0.0163,
      "step": 2368240
    },
    {
      "epoch": 3.8757094322578114,
      "grad_norm": 0.17655090987682343,
      "learning_rate": 2.2633975343133704e-06,
      "loss": 0.0117,
      "step": 2368260
    },
    {
      "epoch": 3.8757421626964645,
      "grad_norm": 0.2588925361633301,
      "learning_rate": 2.263331642099853e-06,
      "loss": 0.0121,
      "step": 2368280
    },
    {
      "epoch": 3.8757748931351177,
      "grad_norm": 0.05204908177256584,
      "learning_rate": 2.2632657498863363e-06,
      "loss": 0.0138,
      "step": 2368300
    },
    {
      "epoch": 3.8758076235737713,
      "grad_norm": 0.1349918097257614,
      "learning_rate": 2.263199857672819e-06,
      "loss": 0.0077,
      "step": 2368320
    },
    {
      "epoch": 3.8758403540124244,
      "grad_norm": 0.2074832171201706,
      "learning_rate": 2.2631339654593018e-06,
      "loss": 0.0072,
      "step": 2368340
    },
    {
      "epoch": 3.8758730844510776,
      "grad_norm": 0.14423492550849915,
      "learning_rate": 2.2630680732457845e-06,
      "loss": 0.0136,
      "step": 2368360
    },
    {
      "epoch": 3.875905814889731,
      "grad_norm": 0.1883828341960907,
      "learning_rate": 2.2630021810322677e-06,
      "loss": 0.0075,
      "step": 2368380
    },
    {
      "epoch": 3.8759385453283843,
      "grad_norm": 0.2661208212375641,
      "learning_rate": 2.2629362888187504e-06,
      "loss": 0.0108,
      "step": 2368400
    },
    {
      "epoch": 3.875971275767038,
      "grad_norm": 0.09147914499044418,
      "learning_rate": 2.2628703966052336e-06,
      "loss": 0.0077,
      "step": 2368420
    },
    {
      "epoch": 3.876004006205691,
      "grad_norm": 0.234429731965065,
      "learning_rate": 2.2628045043917163e-06,
      "loss": 0.0164,
      "step": 2368440
    },
    {
      "epoch": 3.8760367366443447,
      "grad_norm": 0.3456559479236603,
      "learning_rate": 2.262738612178199e-06,
      "loss": 0.0101,
      "step": 2368460
    },
    {
      "epoch": 3.876069467082998,
      "grad_norm": 0.1446157693862915,
      "learning_rate": 2.2626727199646818e-06,
      "loss": 0.0093,
      "step": 2368480
    },
    {
      "epoch": 3.876102197521651,
      "grad_norm": 0.26245078444480896,
      "learning_rate": 2.262606827751165e-06,
      "loss": 0.0102,
      "step": 2368500
    },
    {
      "epoch": 3.8761349279603046,
      "grad_norm": 0.0754651129245758,
      "learning_rate": 2.2625409355376477e-06,
      "loss": 0.0111,
      "step": 2368520
    },
    {
      "epoch": 3.8761676583989577,
      "grad_norm": 0.40418657660484314,
      "learning_rate": 2.2624750433241304e-06,
      "loss": 0.0144,
      "step": 2368540
    },
    {
      "epoch": 3.8762003888376113,
      "grad_norm": 0.22819237411022186,
      "learning_rate": 2.262409151110613e-06,
      "loss": 0.0087,
      "step": 2368560
    },
    {
      "epoch": 3.8762331192762645,
      "grad_norm": 0.1164492666721344,
      "learning_rate": 2.2623432588970963e-06,
      "loss": 0.0115,
      "step": 2368580
    },
    {
      "epoch": 3.876265849714918,
      "grad_norm": 0.1585581749677658,
      "learning_rate": 2.262277366683579e-06,
      "loss": 0.0105,
      "step": 2368600
    },
    {
      "epoch": 3.876298580153571,
      "grad_norm": 0.4035060405731201,
      "learning_rate": 2.2622114744700622e-06,
      "loss": 0.0121,
      "step": 2368620
    },
    {
      "epoch": 3.8763313105922244,
      "grad_norm": 0.07718431204557419,
      "learning_rate": 2.262145582256545e-06,
      "loss": 0.0089,
      "step": 2368640
    },
    {
      "epoch": 3.876364041030878,
      "grad_norm": 0.13200460374355316,
      "learning_rate": 2.2620796900430277e-06,
      "loss": 0.0121,
      "step": 2368660
    },
    {
      "epoch": 3.876396771469531,
      "grad_norm": 0.18369151651859283,
      "learning_rate": 2.2620137978295105e-06,
      "loss": 0.0074,
      "step": 2368680
    },
    {
      "epoch": 3.8764295019081847,
      "grad_norm": 0.2206815928220749,
      "learning_rate": 2.2619479056159936e-06,
      "loss": 0.0114,
      "step": 2368700
    },
    {
      "epoch": 3.876462232346838,
      "grad_norm": 0.19898702204227448,
      "learning_rate": 2.2618820134024764e-06,
      "loss": 0.0076,
      "step": 2368720
    },
    {
      "epoch": 3.8764949627854914,
      "grad_norm": 0.6316778063774109,
      "learning_rate": 2.261816121188959e-06,
      "loss": 0.0098,
      "step": 2368740
    },
    {
      "epoch": 3.8765276932241446,
      "grad_norm": 0.44034314155578613,
      "learning_rate": 2.2617502289754423e-06,
      "loss": 0.0098,
      "step": 2368760
    },
    {
      "epoch": 3.8765604236627977,
      "grad_norm": 0.13219480216503143,
      "learning_rate": 2.261684336761925e-06,
      "loss": 0.0137,
      "step": 2368780
    },
    {
      "epoch": 3.8765931541014513,
      "grad_norm": 0.3759649097919464,
      "learning_rate": 2.261618444548408e-06,
      "loss": 0.0156,
      "step": 2368800
    },
    {
      "epoch": 3.8766258845401045,
      "grad_norm": 0.21392421424388885,
      "learning_rate": 2.261552552334891e-06,
      "loss": 0.0054,
      "step": 2368820
    },
    {
      "epoch": 3.876658614978758,
      "grad_norm": 0.23133355379104614,
      "learning_rate": 2.2614866601213736e-06,
      "loss": 0.011,
      "step": 2368840
    },
    {
      "epoch": 3.8766913454174112,
      "grad_norm": 0.34909388422966003,
      "learning_rate": 2.2614207679078564e-06,
      "loss": 0.0081,
      "step": 2368860
    },
    {
      "epoch": 3.876724075856065,
      "grad_norm": 0.44462448358535767,
      "learning_rate": 2.261354875694339e-06,
      "loss": 0.0059,
      "step": 2368880
    },
    {
      "epoch": 3.876756806294718,
      "grad_norm": 0.4282202422618866,
      "learning_rate": 2.2612889834808223e-06,
      "loss": 0.0161,
      "step": 2368900
    },
    {
      "epoch": 3.876789536733371,
      "grad_norm": 0.23134402930736542,
      "learning_rate": 2.261223091267305e-06,
      "loss": 0.014,
      "step": 2368920
    },
    {
      "epoch": 3.8768222671720247,
      "grad_norm": 0.13115444779396057,
      "learning_rate": 2.261157199053788e-06,
      "loss": 0.0108,
      "step": 2368940
    },
    {
      "epoch": 3.876854997610678,
      "grad_norm": 0.1247996836900711,
      "learning_rate": 2.261091306840271e-06,
      "loss": 0.0073,
      "step": 2368960
    },
    {
      "epoch": 3.8768877280493315,
      "grad_norm": 0.2455284744501114,
      "learning_rate": 2.2610254146267537e-06,
      "loss": 0.0096,
      "step": 2368980
    },
    {
      "epoch": 3.8769204584879846,
      "grad_norm": 0.4489620327949524,
      "learning_rate": 2.260959522413237e-06,
      "loss": 0.0089,
      "step": 2369000
    },
    {
      "epoch": 3.876953188926638,
      "grad_norm": 0.11037948727607727,
      "learning_rate": 2.2608936301997196e-06,
      "loss": 0.0077,
      "step": 2369020
    },
    {
      "epoch": 3.8769859193652914,
      "grad_norm": 0.1159168928861618,
      "learning_rate": 2.2608277379862023e-06,
      "loss": 0.0073,
      "step": 2369040
    },
    {
      "epoch": 3.8770186498039445,
      "grad_norm": 0.3089137077331543,
      "learning_rate": 2.260761845772685e-06,
      "loss": 0.011,
      "step": 2369060
    },
    {
      "epoch": 3.877051380242598,
      "grad_norm": 0.21482981741428375,
      "learning_rate": 2.260695953559168e-06,
      "loss": 0.0103,
      "step": 2369080
    },
    {
      "epoch": 3.8770841106812512,
      "grad_norm": 0.19069726765155792,
      "learning_rate": 2.260630061345651e-06,
      "loss": 0.0103,
      "step": 2369100
    },
    {
      "epoch": 3.877116841119905,
      "grad_norm": 0.10737090557813644,
      "learning_rate": 2.260564169132134e-06,
      "loss": 0.0134,
      "step": 2369120
    },
    {
      "epoch": 3.877149571558558,
      "grad_norm": 0.18754521012306213,
      "learning_rate": 2.260498276918617e-06,
      "loss": 0.0102,
      "step": 2369140
    },
    {
      "epoch": 3.8771823019972116,
      "grad_norm": 0.27260974049568176,
      "learning_rate": 2.2604323847050996e-06,
      "loss": 0.0127,
      "step": 2369160
    },
    {
      "epoch": 3.8772150324358647,
      "grad_norm": 0.8505170345306396,
      "learning_rate": 2.2603664924915823e-06,
      "loss": 0.0145,
      "step": 2369180
    },
    {
      "epoch": 3.877247762874518,
      "grad_norm": 0.13148480653762817,
      "learning_rate": 2.2603006002780655e-06,
      "loss": 0.0143,
      "step": 2369200
    },
    {
      "epoch": 3.8772804933131715,
      "grad_norm": 0.46095114946365356,
      "learning_rate": 2.2602347080645482e-06,
      "loss": 0.012,
      "step": 2369220
    },
    {
      "epoch": 3.8773132237518246,
      "grad_norm": 0.0460323840379715,
      "learning_rate": 2.260168815851031e-06,
      "loss": 0.0079,
      "step": 2369240
    },
    {
      "epoch": 3.877345954190478,
      "grad_norm": 0.3300136923789978,
      "learning_rate": 2.2601029236375137e-06,
      "loss": 0.0118,
      "step": 2369260
    },
    {
      "epoch": 3.8773786846291314,
      "grad_norm": 0.09915848821401596,
      "learning_rate": 2.260037031423997e-06,
      "loss": 0.0095,
      "step": 2369280
    },
    {
      "epoch": 3.877411415067785,
      "grad_norm": 0.34220850467681885,
      "learning_rate": 2.2599711392104796e-06,
      "loss": 0.013,
      "step": 2369300
    },
    {
      "epoch": 3.877444145506438,
      "grad_norm": 0.2995581328868866,
      "learning_rate": 2.2599052469969628e-06,
      "loss": 0.0064,
      "step": 2369320
    },
    {
      "epoch": 3.8774768759450913,
      "grad_norm": 0.12704713642597198,
      "learning_rate": 2.2598393547834455e-06,
      "loss": 0.0069,
      "step": 2369340
    },
    {
      "epoch": 3.877509606383745,
      "grad_norm": 0.15590934455394745,
      "learning_rate": 2.2597734625699283e-06,
      "loss": 0.0122,
      "step": 2369360
    },
    {
      "epoch": 3.877542336822398,
      "grad_norm": 0.22197970747947693,
      "learning_rate": 2.259707570356411e-06,
      "loss": 0.0081,
      "step": 2369380
    },
    {
      "epoch": 3.877575067261051,
      "grad_norm": 0.2661387026309967,
      "learning_rate": 2.259641678142894e-06,
      "loss": 0.0098,
      "step": 2369400
    },
    {
      "epoch": 3.8776077976997048,
      "grad_norm": 0.31654781103134155,
      "learning_rate": 2.259575785929377e-06,
      "loss": 0.011,
      "step": 2369420
    },
    {
      "epoch": 3.8776405281383584,
      "grad_norm": 0.3942877948284149,
      "learning_rate": 2.2595098937158596e-06,
      "loss": 0.0152,
      "step": 2369440
    },
    {
      "epoch": 3.8776732585770115,
      "grad_norm": 0.19393649697303772,
      "learning_rate": 2.259444001502343e-06,
      "loss": 0.0143,
      "step": 2369460
    },
    {
      "epoch": 3.8777059890156647,
      "grad_norm": 0.23073233664035797,
      "learning_rate": 2.2593781092888256e-06,
      "loss": 0.0145,
      "step": 2369480
    },
    {
      "epoch": 3.8777387194543183,
      "grad_norm": 0.11870970577001572,
      "learning_rate": 2.2593122170753083e-06,
      "loss": 0.0082,
      "step": 2369500
    },
    {
      "epoch": 3.8777714498929714,
      "grad_norm": 0.3612184524536133,
      "learning_rate": 2.2592463248617915e-06,
      "loss": 0.0146,
      "step": 2369520
    },
    {
      "epoch": 3.8778041803316246,
      "grad_norm": 0.2173452079296112,
      "learning_rate": 2.259180432648274e-06,
      "loss": 0.0099,
      "step": 2369540
    },
    {
      "epoch": 3.877836910770278,
      "grad_norm": 0.11473395675420761,
      "learning_rate": 2.259114540434757e-06,
      "loss": 0.0106,
      "step": 2369560
    },
    {
      "epoch": 3.8778696412089317,
      "grad_norm": 0.2577148675918579,
      "learning_rate": 2.2590486482212397e-06,
      "loss": 0.0103,
      "step": 2369580
    },
    {
      "epoch": 3.877902371647585,
      "grad_norm": 0.35868096351623535,
      "learning_rate": 2.258982756007723e-06,
      "loss": 0.0113,
      "step": 2369600
    },
    {
      "epoch": 3.877935102086238,
      "grad_norm": 0.4454348087310791,
      "learning_rate": 2.2589168637942056e-06,
      "loss": 0.0099,
      "step": 2369620
    },
    {
      "epoch": 3.8779678325248916,
      "grad_norm": 0.3760265111923218,
      "learning_rate": 2.2588509715806887e-06,
      "loss": 0.0113,
      "step": 2369640
    },
    {
      "epoch": 3.878000562963545,
      "grad_norm": 0.6256715059280396,
      "learning_rate": 2.2587850793671715e-06,
      "loss": 0.0118,
      "step": 2369660
    },
    {
      "epoch": 3.878033293402198,
      "grad_norm": 0.23694801330566406,
      "learning_rate": 2.2587191871536542e-06,
      "loss": 0.0186,
      "step": 2369680
    },
    {
      "epoch": 3.8780660238408515,
      "grad_norm": 0.351059228181839,
      "learning_rate": 2.258653294940137e-06,
      "loss": 0.0126,
      "step": 2369700
    },
    {
      "epoch": 3.878098754279505,
      "grad_norm": 0.24395272135734558,
      "learning_rate": 2.25858740272662e-06,
      "loss": 0.0132,
      "step": 2369720
    },
    {
      "epoch": 3.8781314847181583,
      "grad_norm": 0.4646955728530884,
      "learning_rate": 2.258521510513103e-06,
      "loss": 0.0128,
      "step": 2369740
    },
    {
      "epoch": 3.8781642151568114,
      "grad_norm": 0.17811864614486694,
      "learning_rate": 2.2584556182995856e-06,
      "loss": 0.0153,
      "step": 2369760
    },
    {
      "epoch": 3.878196945595465,
      "grad_norm": 0.2979585528373718,
      "learning_rate": 2.2583897260860683e-06,
      "loss": 0.0118,
      "step": 2369780
    },
    {
      "epoch": 3.878229676034118,
      "grad_norm": 0.4052545130252838,
      "learning_rate": 2.2583238338725515e-06,
      "loss": 0.0128,
      "step": 2369800
    },
    {
      "epoch": 3.8782624064727713,
      "grad_norm": 0.1427733451128006,
      "learning_rate": 2.2582579416590342e-06,
      "loss": 0.0104,
      "step": 2369820
    },
    {
      "epoch": 3.878295136911425,
      "grad_norm": 0.2620590925216675,
      "learning_rate": 2.2581920494455174e-06,
      "loss": 0.0079,
      "step": 2369840
    },
    {
      "epoch": 3.878327867350078,
      "grad_norm": 0.0559210404753685,
      "learning_rate": 2.258126157232e-06,
      "loss": 0.0092,
      "step": 2369860
    },
    {
      "epoch": 3.8783605977887317,
      "grad_norm": 0.17585700750350952,
      "learning_rate": 2.258060265018483e-06,
      "loss": 0.0067,
      "step": 2369880
    },
    {
      "epoch": 3.878393328227385,
      "grad_norm": 0.037406887859106064,
      "learning_rate": 2.2579943728049656e-06,
      "loss": 0.0076,
      "step": 2369900
    },
    {
      "epoch": 3.8784260586660384,
      "grad_norm": 0.4783688485622406,
      "learning_rate": 2.257928480591449e-06,
      "loss": 0.0125,
      "step": 2369920
    },
    {
      "epoch": 3.8784587891046916,
      "grad_norm": 0.35398536920547485,
      "learning_rate": 2.2578625883779315e-06,
      "loss": 0.0082,
      "step": 2369940
    },
    {
      "epoch": 3.8784915195433447,
      "grad_norm": 0.3762570917606354,
      "learning_rate": 2.2577966961644143e-06,
      "loss": 0.0143,
      "step": 2369960
    },
    {
      "epoch": 3.8785242499819983,
      "grad_norm": 0.15834656357765198,
      "learning_rate": 2.2577308039508974e-06,
      "loss": 0.0091,
      "step": 2369980
    },
    {
      "epoch": 3.8785569804206514,
      "grad_norm": 0.1532316505908966,
      "learning_rate": 2.25766491173738e-06,
      "loss": 0.0107,
      "step": 2370000
    },
    {
      "epoch": 3.878589710859305,
      "grad_norm": 0.3598123788833618,
      "learning_rate": 2.257599019523863e-06,
      "loss": 0.0142,
      "step": 2370020
    },
    {
      "epoch": 3.878622441297958,
      "grad_norm": 0.18989306688308716,
      "learning_rate": 2.257533127310346e-06,
      "loss": 0.0094,
      "step": 2370040
    },
    {
      "epoch": 3.878655171736612,
      "grad_norm": 0.2687033712863922,
      "learning_rate": 2.257467235096829e-06,
      "loss": 0.0094,
      "step": 2370060
    },
    {
      "epoch": 3.878687902175265,
      "grad_norm": 0.2714621126651764,
      "learning_rate": 2.2574013428833116e-06,
      "loss": 0.0173,
      "step": 2370080
    },
    {
      "epoch": 3.878720632613918,
      "grad_norm": 0.1672535389661789,
      "learning_rate": 2.2573354506697943e-06,
      "loss": 0.006,
      "step": 2370100
    },
    {
      "epoch": 3.8787533630525717,
      "grad_norm": 0.186017706990242,
      "learning_rate": 2.2572695584562775e-06,
      "loss": 0.0129,
      "step": 2370120
    },
    {
      "epoch": 3.878786093491225,
      "grad_norm": 0.6789759993553162,
      "learning_rate": 2.25720366624276e-06,
      "loss": 0.0123,
      "step": 2370140
    },
    {
      "epoch": 3.8788188239298784,
      "grad_norm": 0.0895661786198616,
      "learning_rate": 2.2571377740292434e-06,
      "loss": 0.0113,
      "step": 2370160
    },
    {
      "epoch": 3.8788515543685316,
      "grad_norm": 0.11780627071857452,
      "learning_rate": 2.257071881815726e-06,
      "loss": 0.0072,
      "step": 2370180
    },
    {
      "epoch": 3.878884284807185,
      "grad_norm": 0.6735424399375916,
      "learning_rate": 2.257005989602209e-06,
      "loss": 0.0092,
      "step": 2370200
    },
    {
      "epoch": 3.8789170152458383,
      "grad_norm": 0.22706034779548645,
      "learning_rate": 2.256940097388692e-06,
      "loss": 0.0128,
      "step": 2370220
    },
    {
      "epoch": 3.8789497456844915,
      "grad_norm": 0.432708740234375,
      "learning_rate": 2.2568742051751747e-06,
      "loss": 0.0084,
      "step": 2370240
    },
    {
      "epoch": 3.878982476123145,
      "grad_norm": 0.10122446715831757,
      "learning_rate": 2.2568083129616575e-06,
      "loss": 0.0113,
      "step": 2370260
    },
    {
      "epoch": 3.879015206561798,
      "grad_norm": 0.1170823872089386,
      "learning_rate": 2.2567424207481402e-06,
      "loss": 0.0139,
      "step": 2370280
    },
    {
      "epoch": 3.879047937000452,
      "grad_norm": 0.19712388515472412,
      "learning_rate": 2.256676528534623e-06,
      "loss": 0.011,
      "step": 2370300
    },
    {
      "epoch": 3.879080667439105,
      "grad_norm": 0.4860222637653351,
      "learning_rate": 2.256610636321106e-06,
      "loss": 0.0094,
      "step": 2370320
    },
    {
      "epoch": 3.8791133978777586,
      "grad_norm": 0.21105967462062836,
      "learning_rate": 2.2565447441075893e-06,
      "loss": 0.0121,
      "step": 2370340
    },
    {
      "epoch": 3.8791461283164117,
      "grad_norm": 0.314628541469574,
      "learning_rate": 2.256478851894072e-06,
      "loss": 0.0109,
      "step": 2370360
    },
    {
      "epoch": 3.879178858755065,
      "grad_norm": 0.07420558482408524,
      "learning_rate": 2.2564129596805548e-06,
      "loss": 0.0099,
      "step": 2370380
    },
    {
      "epoch": 3.8792115891937184,
      "grad_norm": 0.1624428927898407,
      "learning_rate": 2.2563470674670375e-06,
      "loss": 0.0081,
      "step": 2370400
    },
    {
      "epoch": 3.8792443196323716,
      "grad_norm": 0.3244051933288574,
      "learning_rate": 2.2562811752535207e-06,
      "loss": 0.0096,
      "step": 2370420
    },
    {
      "epoch": 3.879277050071025,
      "grad_norm": 0.4052940905094147,
      "learning_rate": 2.2562152830400034e-06,
      "loss": 0.0104,
      "step": 2370440
    },
    {
      "epoch": 3.8793097805096783,
      "grad_norm": 0.30661740899086,
      "learning_rate": 2.256149390826486e-06,
      "loss": 0.0117,
      "step": 2370460
    },
    {
      "epoch": 3.879342510948332,
      "grad_norm": 0.15546366572380066,
      "learning_rate": 2.256083498612969e-06,
      "loss": 0.0104,
      "step": 2370480
    },
    {
      "epoch": 3.879375241386985,
      "grad_norm": 0.787230372428894,
      "learning_rate": 2.2560176063994516e-06,
      "loss": 0.0163,
      "step": 2370500
    },
    {
      "epoch": 3.8794079718256382,
      "grad_norm": 0.1130603477358818,
      "learning_rate": 2.255951714185935e-06,
      "loss": 0.008,
      "step": 2370520
    },
    {
      "epoch": 3.879440702264292,
      "grad_norm": 0.6227585077285767,
      "learning_rate": 2.255885821972418e-06,
      "loss": 0.0159,
      "step": 2370540
    },
    {
      "epoch": 3.879473432702945,
      "grad_norm": 0.8121296167373657,
      "learning_rate": 2.2558199297589007e-06,
      "loss": 0.019,
      "step": 2370560
    },
    {
      "epoch": 3.8795061631415986,
      "grad_norm": 0.251869797706604,
      "learning_rate": 2.2557540375453834e-06,
      "loss": 0.01,
      "step": 2370580
    },
    {
      "epoch": 3.8795388935802517,
      "grad_norm": 0.22476135194301605,
      "learning_rate": 2.255688145331866e-06,
      "loss": 0.01,
      "step": 2370600
    },
    {
      "epoch": 3.8795716240189053,
      "grad_norm": 0.11451569199562073,
      "learning_rate": 2.2556222531183493e-06,
      "loss": 0.0129,
      "step": 2370620
    },
    {
      "epoch": 3.8796043544575585,
      "grad_norm": 0.2704525589942932,
      "learning_rate": 2.255556360904832e-06,
      "loss": 0.0092,
      "step": 2370640
    },
    {
      "epoch": 3.8796370848962116,
      "grad_norm": 0.08903585374355316,
      "learning_rate": 2.255490468691315e-06,
      "loss": 0.0084,
      "step": 2370660
    },
    {
      "epoch": 3.879669815334865,
      "grad_norm": 0.19078373908996582,
      "learning_rate": 2.2554245764777976e-06,
      "loss": 0.0071,
      "step": 2370680
    },
    {
      "epoch": 3.8797025457735184,
      "grad_norm": 0.11124313622713089,
      "learning_rate": 2.2553586842642807e-06,
      "loss": 0.0085,
      "step": 2370700
    },
    {
      "epoch": 3.879735276212172,
      "grad_norm": 0.22311370074748993,
      "learning_rate": 2.2552927920507635e-06,
      "loss": 0.0109,
      "step": 2370720
    },
    {
      "epoch": 3.879768006650825,
      "grad_norm": 0.19418905675411224,
      "learning_rate": 2.2552268998372466e-06,
      "loss": 0.0116,
      "step": 2370740
    },
    {
      "epoch": 3.8798007370894787,
      "grad_norm": 0.4083288013935089,
      "learning_rate": 2.2551610076237294e-06,
      "loss": 0.0107,
      "step": 2370760
    },
    {
      "epoch": 3.879833467528132,
      "grad_norm": 0.5527884364128113,
      "learning_rate": 2.255095115410212e-06,
      "loss": 0.0071,
      "step": 2370780
    },
    {
      "epoch": 3.879866197966785,
      "grad_norm": 0.15584060549736023,
      "learning_rate": 2.255029223196695e-06,
      "loss": 0.0095,
      "step": 2370800
    },
    {
      "epoch": 3.8798989284054386,
      "grad_norm": 0.10674458742141724,
      "learning_rate": 2.254963330983178e-06,
      "loss": 0.0071,
      "step": 2370820
    },
    {
      "epoch": 3.8799316588440917,
      "grad_norm": 0.2968120276927948,
      "learning_rate": 2.2548974387696607e-06,
      "loss": 0.0077,
      "step": 2370840
    },
    {
      "epoch": 3.879964389282745,
      "grad_norm": 0.19353221356868744,
      "learning_rate": 2.254831546556144e-06,
      "loss": 0.0069,
      "step": 2370860
    },
    {
      "epoch": 3.8799971197213985,
      "grad_norm": 0.4096539616584778,
      "learning_rate": 2.2547656543426267e-06,
      "loss": 0.0109,
      "step": 2370880
    },
    {
      "epoch": 3.880029850160052,
      "grad_norm": 0.5043774247169495,
      "learning_rate": 2.2546997621291094e-06,
      "loss": 0.0092,
      "step": 2370900
    },
    {
      "epoch": 3.8800625805987052,
      "grad_norm": 0.42099350690841675,
      "learning_rate": 2.254633869915592e-06,
      "loss": 0.014,
      "step": 2370920
    },
    {
      "epoch": 3.8800953110373584,
      "grad_norm": 0.3144209384918213,
      "learning_rate": 2.2545679777020753e-06,
      "loss": 0.0105,
      "step": 2370940
    },
    {
      "epoch": 3.880128041476012,
      "grad_norm": 0.1322469860315323,
      "learning_rate": 2.254502085488558e-06,
      "loss": 0.0109,
      "step": 2370960
    },
    {
      "epoch": 3.880160771914665,
      "grad_norm": 0.2531459629535675,
      "learning_rate": 2.2544361932750408e-06,
      "loss": 0.0077,
      "step": 2370980
    },
    {
      "epoch": 3.8801935023533183,
      "grad_norm": 0.2698415517807007,
      "learning_rate": 2.2543703010615235e-06,
      "loss": 0.0104,
      "step": 2371000
    },
    {
      "epoch": 3.880226232791972,
      "grad_norm": 0.22572477161884308,
      "learning_rate": 2.2543044088480067e-06,
      "loss": 0.0075,
      "step": 2371020
    },
    {
      "epoch": 3.8802589632306255,
      "grad_norm": 0.26471686363220215,
      "learning_rate": 2.2542385166344894e-06,
      "loss": 0.0084,
      "step": 2371040
    },
    {
      "epoch": 3.8802916936692786,
      "grad_norm": 0.31030720472335815,
      "learning_rate": 2.2541726244209726e-06,
      "loss": 0.0135,
      "step": 2371060
    },
    {
      "epoch": 3.8803244241079318,
      "grad_norm": 0.16951411962509155,
      "learning_rate": 2.2541067322074553e-06,
      "loss": 0.0142,
      "step": 2371080
    },
    {
      "epoch": 3.8803571545465854,
      "grad_norm": 0.29260504245758057,
      "learning_rate": 2.254040839993938e-06,
      "loss": 0.0124,
      "step": 2371100
    },
    {
      "epoch": 3.8803898849852385,
      "grad_norm": 0.12814216315746307,
      "learning_rate": 2.253974947780421e-06,
      "loss": 0.01,
      "step": 2371120
    },
    {
      "epoch": 3.8804226154238917,
      "grad_norm": 1.038105845451355,
      "learning_rate": 2.253909055566904e-06,
      "loss": 0.0093,
      "step": 2371140
    },
    {
      "epoch": 3.8804553458625453,
      "grad_norm": 0.3551676869392395,
      "learning_rate": 2.2538431633533867e-06,
      "loss": 0.0101,
      "step": 2371160
    },
    {
      "epoch": 3.880488076301199,
      "grad_norm": 0.19578474760055542,
      "learning_rate": 2.2537772711398694e-06,
      "loss": 0.0128,
      "step": 2371180
    },
    {
      "epoch": 3.880520806739852,
      "grad_norm": 0.3359357714653015,
      "learning_rate": 2.253711378926352e-06,
      "loss": 0.0126,
      "step": 2371200
    },
    {
      "epoch": 3.880553537178505,
      "grad_norm": 0.4425346255302429,
      "learning_rate": 2.2536454867128353e-06,
      "loss": 0.0117,
      "step": 2371220
    },
    {
      "epoch": 3.8805862676171587,
      "grad_norm": 0.31338897347450256,
      "learning_rate": 2.253579594499318e-06,
      "loss": 0.0083,
      "step": 2371240
    },
    {
      "epoch": 3.880618998055812,
      "grad_norm": 0.33910664916038513,
      "learning_rate": 2.2535137022858012e-06,
      "loss": 0.0096,
      "step": 2371260
    },
    {
      "epoch": 3.880651728494465,
      "grad_norm": 0.24251775443553925,
      "learning_rate": 2.253447810072284e-06,
      "loss": 0.0092,
      "step": 2371280
    },
    {
      "epoch": 3.8806844589331186,
      "grad_norm": 0.06008704751729965,
      "learning_rate": 2.2533819178587667e-06,
      "loss": 0.0089,
      "step": 2371300
    },
    {
      "epoch": 3.8807171893717722,
      "grad_norm": 0.6210569739341736,
      "learning_rate": 2.2533160256452495e-06,
      "loss": 0.0136,
      "step": 2371320
    },
    {
      "epoch": 3.8807499198104254,
      "grad_norm": 0.14301832020282745,
      "learning_rate": 2.2532501334317326e-06,
      "loss": 0.0149,
      "step": 2371340
    },
    {
      "epoch": 3.8807826502490785,
      "grad_norm": 0.302104115486145,
      "learning_rate": 2.2531842412182154e-06,
      "loss": 0.0208,
      "step": 2371360
    },
    {
      "epoch": 3.880815380687732,
      "grad_norm": 0.3851124942302704,
      "learning_rate": 2.253118349004698e-06,
      "loss": 0.0189,
      "step": 2371380
    },
    {
      "epoch": 3.8808481111263853,
      "grad_norm": 0.19630706310272217,
      "learning_rate": 2.2530524567911813e-06,
      "loss": 0.0135,
      "step": 2371400
    },
    {
      "epoch": 3.8808808415650384,
      "grad_norm": 0.04951820895075798,
      "learning_rate": 2.252986564577664e-06,
      "loss": 0.01,
      "step": 2371420
    },
    {
      "epoch": 3.880913572003692,
      "grad_norm": 0.3570711314678192,
      "learning_rate": 2.252920672364147e-06,
      "loss": 0.0095,
      "step": 2371440
    },
    {
      "epoch": 3.880946302442345,
      "grad_norm": 0.16585475206375122,
      "learning_rate": 2.25285478015063e-06,
      "loss": 0.0105,
      "step": 2371460
    },
    {
      "epoch": 3.8809790328809988,
      "grad_norm": 0.42980530858039856,
      "learning_rate": 2.2527888879371127e-06,
      "loss": 0.0105,
      "step": 2371480
    },
    {
      "epoch": 3.881011763319652,
      "grad_norm": 0.43220287561416626,
      "learning_rate": 2.2527229957235954e-06,
      "loss": 0.0139,
      "step": 2371500
    },
    {
      "epoch": 3.8810444937583055,
      "grad_norm": 0.8905355334281921,
      "learning_rate": 2.252657103510078e-06,
      "loss": 0.0181,
      "step": 2371520
    },
    {
      "epoch": 3.8810772241969587,
      "grad_norm": 0.08903177082538605,
      "learning_rate": 2.2525912112965613e-06,
      "loss": 0.0127,
      "step": 2371540
    },
    {
      "epoch": 3.881109954635612,
      "grad_norm": 0.15699948370456696,
      "learning_rate": 2.252525319083044e-06,
      "loss": 0.0121,
      "step": 2371560
    },
    {
      "epoch": 3.8811426850742654,
      "grad_norm": 0.37491732835769653,
      "learning_rate": 2.252459426869527e-06,
      "loss": 0.0093,
      "step": 2371580
    },
    {
      "epoch": 3.8811754155129186,
      "grad_norm": 0.36044710874557495,
      "learning_rate": 2.25239353465601e-06,
      "loss": 0.0095,
      "step": 2371600
    },
    {
      "epoch": 3.881208145951572,
      "grad_norm": 0.25761350989341736,
      "learning_rate": 2.2523276424424927e-06,
      "loss": 0.0096,
      "step": 2371620
    },
    {
      "epoch": 3.8812408763902253,
      "grad_norm": 0.41344407200813293,
      "learning_rate": 2.252261750228976e-06,
      "loss": 0.0125,
      "step": 2371640
    },
    {
      "epoch": 3.881273606828879,
      "grad_norm": 1.0289185047149658,
      "learning_rate": 2.2521958580154586e-06,
      "loss": 0.0067,
      "step": 2371660
    },
    {
      "epoch": 3.881306337267532,
      "grad_norm": 0.08120963722467422,
      "learning_rate": 2.2521299658019413e-06,
      "loss": 0.0096,
      "step": 2371680
    },
    {
      "epoch": 3.881339067706185,
      "grad_norm": 0.3387785851955414,
      "learning_rate": 2.252064073588424e-06,
      "loss": 0.0094,
      "step": 2371700
    },
    {
      "epoch": 3.881371798144839,
      "grad_norm": 0.37982651591300964,
      "learning_rate": 2.251998181374907e-06,
      "loss": 0.012,
      "step": 2371720
    },
    {
      "epoch": 3.881404528583492,
      "grad_norm": 1.1457146406173706,
      "learning_rate": 2.25193228916139e-06,
      "loss": 0.0095,
      "step": 2371740
    },
    {
      "epoch": 3.8814372590221455,
      "grad_norm": 0.4423733949661255,
      "learning_rate": 2.251866396947873e-06,
      "loss": 0.0145,
      "step": 2371760
    },
    {
      "epoch": 3.8814699894607987,
      "grad_norm": 0.11817406862974167,
      "learning_rate": 2.251800504734356e-06,
      "loss": 0.0153,
      "step": 2371780
    },
    {
      "epoch": 3.8815027198994523,
      "grad_norm": 0.1447920948266983,
      "learning_rate": 2.2517346125208386e-06,
      "loss": 0.0133,
      "step": 2371800
    },
    {
      "epoch": 3.8815354503381054,
      "grad_norm": 0.29752519726753235,
      "learning_rate": 2.2516687203073213e-06,
      "loss": 0.01,
      "step": 2371820
    },
    {
      "epoch": 3.8815681807767586,
      "grad_norm": 0.7971218824386597,
      "learning_rate": 2.2516028280938045e-06,
      "loss": 0.0122,
      "step": 2371840
    },
    {
      "epoch": 3.881600911215412,
      "grad_norm": 0.6180862188339233,
      "learning_rate": 2.2515369358802873e-06,
      "loss": 0.0104,
      "step": 2371860
    },
    {
      "epoch": 3.8816336416540653,
      "grad_norm": 0.4414272606372833,
      "learning_rate": 2.25147104366677e-06,
      "loss": 0.0089,
      "step": 2371880
    },
    {
      "epoch": 3.881666372092719,
      "grad_norm": 0.18474239110946655,
      "learning_rate": 2.2514051514532527e-06,
      "loss": 0.0118,
      "step": 2371900
    },
    {
      "epoch": 3.881699102531372,
      "grad_norm": 0.24625003337860107,
      "learning_rate": 2.251339259239736e-06,
      "loss": 0.0086,
      "step": 2371920
    },
    {
      "epoch": 3.8817318329700257,
      "grad_norm": 0.11681806296110153,
      "learning_rate": 2.2512733670262186e-06,
      "loss": 0.0075,
      "step": 2371940
    },
    {
      "epoch": 3.881764563408679,
      "grad_norm": 0.17767880856990814,
      "learning_rate": 2.251207474812702e-06,
      "loss": 0.0139,
      "step": 2371960
    },
    {
      "epoch": 3.881797293847332,
      "grad_norm": 0.18915672600269318,
      "learning_rate": 2.2511415825991845e-06,
      "loss": 0.0129,
      "step": 2371980
    },
    {
      "epoch": 3.8818300242859856,
      "grad_norm": 0.20166611671447754,
      "learning_rate": 2.2510756903856673e-06,
      "loss": 0.0131,
      "step": 2372000
    },
    {
      "epoch": 3.8818627547246387,
      "grad_norm": 0.09989521652460098,
      "learning_rate": 2.25100979817215e-06,
      "loss": 0.0117,
      "step": 2372020
    },
    {
      "epoch": 3.8818954851632923,
      "grad_norm": 0.40488898754119873,
      "learning_rate": 2.250943905958633e-06,
      "loss": 0.0136,
      "step": 2372040
    },
    {
      "epoch": 3.8819282156019455,
      "grad_norm": 0.18769878149032593,
      "learning_rate": 2.250878013745116e-06,
      "loss": 0.0094,
      "step": 2372060
    },
    {
      "epoch": 3.881960946040599,
      "grad_norm": 0.07315909117460251,
      "learning_rate": 2.2508121215315987e-06,
      "loss": 0.0071,
      "step": 2372080
    },
    {
      "epoch": 3.881993676479252,
      "grad_norm": 0.17617499828338623,
      "learning_rate": 2.250746229318082e-06,
      "loss": 0.0108,
      "step": 2372100
    },
    {
      "epoch": 3.8820264069179053,
      "grad_norm": 0.3767072558403015,
      "learning_rate": 2.2506803371045646e-06,
      "loss": 0.0084,
      "step": 2372120
    },
    {
      "epoch": 3.882059137356559,
      "grad_norm": 0.2847834527492523,
      "learning_rate": 2.2506144448910473e-06,
      "loss": 0.0133,
      "step": 2372140
    },
    {
      "epoch": 3.882091867795212,
      "grad_norm": 0.10640634596347809,
      "learning_rate": 2.2505485526775305e-06,
      "loss": 0.0089,
      "step": 2372160
    },
    {
      "epoch": 3.8821245982338657,
      "grad_norm": 0.14381907880306244,
      "learning_rate": 2.250482660464013e-06,
      "loss": 0.0077,
      "step": 2372180
    },
    {
      "epoch": 3.882157328672519,
      "grad_norm": 0.34775546193122864,
      "learning_rate": 2.250416768250496e-06,
      "loss": 0.0125,
      "step": 2372200
    },
    {
      "epoch": 3.8821900591111724,
      "grad_norm": 0.6143619418144226,
      "learning_rate": 2.2503508760369787e-06,
      "loss": 0.0128,
      "step": 2372220
    },
    {
      "epoch": 3.8822227895498256,
      "grad_norm": 0.7558390498161316,
      "learning_rate": 2.250284983823462e-06,
      "loss": 0.0142,
      "step": 2372240
    },
    {
      "epoch": 3.8822555199884787,
      "grad_norm": 0.3836444318294525,
      "learning_rate": 2.2502190916099446e-06,
      "loss": 0.0142,
      "step": 2372260
    },
    {
      "epoch": 3.8822882504271323,
      "grad_norm": 0.5029927492141724,
      "learning_rate": 2.2501531993964278e-06,
      "loss": 0.0134,
      "step": 2372280
    },
    {
      "epoch": 3.8823209808657855,
      "grad_norm": 3.45918869972229,
      "learning_rate": 2.2500873071829105e-06,
      "loss": 0.0088,
      "step": 2372300
    },
    {
      "epoch": 3.8823537113044386,
      "grad_norm": 0.09879165887832642,
      "learning_rate": 2.2500214149693932e-06,
      "loss": 0.0123,
      "step": 2372320
    },
    {
      "epoch": 3.882386441743092,
      "grad_norm": 0.2896769642829895,
      "learning_rate": 2.249955522755876e-06,
      "loss": 0.009,
      "step": 2372340
    },
    {
      "epoch": 3.882419172181746,
      "grad_norm": 0.09413787722587585,
      "learning_rate": 2.249889630542359e-06,
      "loss": 0.0102,
      "step": 2372360
    },
    {
      "epoch": 3.882451902620399,
      "grad_norm": 0.3483161926269531,
      "learning_rate": 2.249823738328842e-06,
      "loss": 0.0126,
      "step": 2372380
    },
    {
      "epoch": 3.882484633059052,
      "grad_norm": 0.3101382553577423,
      "learning_rate": 2.2497578461153246e-06,
      "loss": 0.0109,
      "step": 2372400
    },
    {
      "epoch": 3.8825173634977057,
      "grad_norm": 0.21015521883964539,
      "learning_rate": 2.2496919539018074e-06,
      "loss": 0.0092,
      "step": 2372420
    },
    {
      "epoch": 3.882550093936359,
      "grad_norm": 0.43937188386917114,
      "learning_rate": 2.2496260616882905e-06,
      "loss": 0.0134,
      "step": 2372440
    },
    {
      "epoch": 3.882582824375012,
      "grad_norm": 0.33589285612106323,
      "learning_rate": 2.2495601694747733e-06,
      "loss": 0.0153,
      "step": 2372460
    },
    {
      "epoch": 3.8826155548136656,
      "grad_norm": 0.24345742166042328,
      "learning_rate": 2.2494942772612564e-06,
      "loss": 0.0132,
      "step": 2372480
    },
    {
      "epoch": 3.882648285252319,
      "grad_norm": 0.17952993512153625,
      "learning_rate": 2.249428385047739e-06,
      "loss": 0.0115,
      "step": 2372500
    },
    {
      "epoch": 3.8826810156909723,
      "grad_norm": 0.02821972593665123,
      "learning_rate": 2.249362492834222e-06,
      "loss": 0.0069,
      "step": 2372520
    },
    {
      "epoch": 3.8827137461296255,
      "grad_norm": 0.1276141107082367,
      "learning_rate": 2.2492966006207046e-06,
      "loss": 0.0163,
      "step": 2372540
    },
    {
      "epoch": 3.882746476568279,
      "grad_norm": 0.4351537227630615,
      "learning_rate": 2.249230708407188e-06,
      "loss": 0.0067,
      "step": 2372560
    },
    {
      "epoch": 3.8827792070069322,
      "grad_norm": 0.1133241355419159,
      "learning_rate": 2.2491648161936705e-06,
      "loss": 0.0102,
      "step": 2372580
    },
    {
      "epoch": 3.8828119374455854,
      "grad_norm": 0.19791296124458313,
      "learning_rate": 2.2490989239801533e-06,
      "loss": 0.0114,
      "step": 2372600
    },
    {
      "epoch": 3.882844667884239,
      "grad_norm": 0.39985936880111694,
      "learning_rate": 2.2490330317666364e-06,
      "loss": 0.0112,
      "step": 2372620
    },
    {
      "epoch": 3.8828773983228926,
      "grad_norm": 0.6489684581756592,
      "learning_rate": 2.248967139553119e-06,
      "loss": 0.0095,
      "step": 2372640
    },
    {
      "epoch": 3.8829101287615457,
      "grad_norm": 0.10594150424003601,
      "learning_rate": 2.248901247339602e-06,
      "loss": 0.0123,
      "step": 2372660
    },
    {
      "epoch": 3.882942859200199,
      "grad_norm": 0.06904557347297668,
      "learning_rate": 2.248835355126085e-06,
      "loss": 0.0096,
      "step": 2372680
    },
    {
      "epoch": 3.8829755896388525,
      "grad_norm": 0.11065788567066193,
      "learning_rate": 2.248769462912568e-06,
      "loss": 0.0087,
      "step": 2372700
    },
    {
      "epoch": 3.8830083200775056,
      "grad_norm": 0.6007095575332642,
      "learning_rate": 2.2487035706990506e-06,
      "loss": 0.0108,
      "step": 2372720
    },
    {
      "epoch": 3.8830410505161588,
      "grad_norm": 0.3307693600654602,
      "learning_rate": 2.2486376784855333e-06,
      "loss": 0.0113,
      "step": 2372740
    },
    {
      "epoch": 3.8830737809548124,
      "grad_norm": 0.48628509044647217,
      "learning_rate": 2.2485717862720165e-06,
      "loss": 0.0112,
      "step": 2372760
    },
    {
      "epoch": 3.883106511393466,
      "grad_norm": 0.6596059799194336,
      "learning_rate": 2.248505894058499e-06,
      "loss": 0.011,
      "step": 2372780
    },
    {
      "epoch": 3.883139241832119,
      "grad_norm": 0.34524333477020264,
      "learning_rate": 2.2484400018449824e-06,
      "loss": 0.0096,
      "step": 2372800
    },
    {
      "epoch": 3.8831719722707723,
      "grad_norm": 0.16399948298931122,
      "learning_rate": 2.248374109631465e-06,
      "loss": 0.011,
      "step": 2372820
    },
    {
      "epoch": 3.883204702709426,
      "grad_norm": 0.5358750224113464,
      "learning_rate": 2.248308217417948e-06,
      "loss": 0.0143,
      "step": 2372840
    },
    {
      "epoch": 3.883237433148079,
      "grad_norm": 0.6826691031455994,
      "learning_rate": 2.248242325204431e-06,
      "loss": 0.015,
      "step": 2372860
    },
    {
      "epoch": 3.883270163586732,
      "grad_norm": 0.8009220957756042,
      "learning_rate": 2.2481764329909138e-06,
      "loss": 0.0109,
      "step": 2372880
    },
    {
      "epoch": 3.8833028940253858,
      "grad_norm": 0.1289130449295044,
      "learning_rate": 2.2481105407773965e-06,
      "loss": 0.0128,
      "step": 2372900
    },
    {
      "epoch": 3.883335624464039,
      "grad_norm": 0.43051257729530334,
      "learning_rate": 2.2480446485638792e-06,
      "loss": 0.0138,
      "step": 2372920
    },
    {
      "epoch": 3.8833683549026925,
      "grad_norm": 0.5672578811645508,
      "learning_rate": 2.247978756350362e-06,
      "loss": 0.0114,
      "step": 2372940
    },
    {
      "epoch": 3.8834010853413456,
      "grad_norm": 0.1592743843793869,
      "learning_rate": 2.247912864136845e-06,
      "loss": 0.0096,
      "step": 2372960
    },
    {
      "epoch": 3.8834338157799992,
      "grad_norm": 0.08291090279817581,
      "learning_rate": 2.2478469719233283e-06,
      "loss": 0.0113,
      "step": 2372980
    },
    {
      "epoch": 3.8834665462186524,
      "grad_norm": 0.19067944586277008,
      "learning_rate": 2.247781079709811e-06,
      "loss": 0.0135,
      "step": 2373000
    },
    {
      "epoch": 3.8834992766573055,
      "grad_norm": 0.20765244960784912,
      "learning_rate": 2.2477151874962938e-06,
      "loss": 0.0077,
      "step": 2373020
    },
    {
      "epoch": 3.883532007095959,
      "grad_norm": 0.20907992124557495,
      "learning_rate": 2.2476492952827765e-06,
      "loss": 0.0095,
      "step": 2373040
    },
    {
      "epoch": 3.8835647375346123,
      "grad_norm": 0.20335076749324799,
      "learning_rate": 2.2475834030692597e-06,
      "loss": 0.009,
      "step": 2373060
    },
    {
      "epoch": 3.883597467973266,
      "grad_norm": 1.1594512462615967,
      "learning_rate": 2.2475175108557424e-06,
      "loss": 0.0126,
      "step": 2373080
    },
    {
      "epoch": 3.883630198411919,
      "grad_norm": 0.09953507781028748,
      "learning_rate": 2.247451618642225e-06,
      "loss": 0.0151,
      "step": 2373100
    },
    {
      "epoch": 3.8836629288505726,
      "grad_norm": 0.13039274513721466,
      "learning_rate": 2.247385726428708e-06,
      "loss": 0.0086,
      "step": 2373120
    },
    {
      "epoch": 3.8836956592892258,
      "grad_norm": 0.23277677595615387,
      "learning_rate": 2.2473198342151906e-06,
      "loss": 0.0091,
      "step": 2373140
    },
    {
      "epoch": 3.883728389727879,
      "grad_norm": 0.19027647376060486,
      "learning_rate": 2.247253942001674e-06,
      "loss": 0.008,
      "step": 2373160
    },
    {
      "epoch": 3.8837611201665325,
      "grad_norm": 0.10275653749704361,
      "learning_rate": 2.247188049788157e-06,
      "loss": 0.0117,
      "step": 2373180
    },
    {
      "epoch": 3.8837938506051857,
      "grad_norm": 0.15709464251995087,
      "learning_rate": 2.2471221575746397e-06,
      "loss": 0.0093,
      "step": 2373200
    },
    {
      "epoch": 3.8838265810438393,
      "grad_norm": 0.33544838428497314,
      "learning_rate": 2.2470562653611224e-06,
      "loss": 0.0097,
      "step": 2373220
    },
    {
      "epoch": 3.8838593114824924,
      "grad_norm": 0.09321878105401993,
      "learning_rate": 2.246990373147605e-06,
      "loss": 0.0155,
      "step": 2373240
    },
    {
      "epoch": 3.883892041921146,
      "grad_norm": 0.09376416355371475,
      "learning_rate": 2.2469244809340884e-06,
      "loss": 0.0092,
      "step": 2373260
    },
    {
      "epoch": 3.883924772359799,
      "grad_norm": 0.13255079090595245,
      "learning_rate": 2.246858588720571e-06,
      "loss": 0.0113,
      "step": 2373280
    },
    {
      "epoch": 3.8839575027984523,
      "grad_norm": 0.25325360894203186,
      "learning_rate": 2.246792696507054e-06,
      "loss": 0.0163,
      "step": 2373300
    },
    {
      "epoch": 3.883990233237106,
      "grad_norm": 0.28855177760124207,
      "learning_rate": 2.2467268042935366e-06,
      "loss": 0.0085,
      "step": 2373320
    },
    {
      "epoch": 3.884022963675759,
      "grad_norm": 0.08042250573635101,
      "learning_rate": 2.2466609120800197e-06,
      "loss": 0.0072,
      "step": 2373340
    },
    {
      "epoch": 3.8840556941144126,
      "grad_norm": 0.38000932335853577,
      "learning_rate": 2.2465950198665025e-06,
      "loss": 0.0083,
      "step": 2373360
    },
    {
      "epoch": 3.884088424553066,
      "grad_norm": 0.26343420147895813,
      "learning_rate": 2.2465291276529856e-06,
      "loss": 0.0065,
      "step": 2373380
    },
    {
      "epoch": 3.8841211549917194,
      "grad_norm": 0.5429386496543884,
      "learning_rate": 2.2464632354394684e-06,
      "loss": 0.0135,
      "step": 2373400
    },
    {
      "epoch": 3.8841538854303725,
      "grad_norm": 0.21205835044384003,
      "learning_rate": 2.246397343225951e-06,
      "loss": 0.0165,
      "step": 2373420
    },
    {
      "epoch": 3.8841866158690257,
      "grad_norm": 0.31205636262893677,
      "learning_rate": 2.246331451012434e-06,
      "loss": 0.011,
      "step": 2373440
    },
    {
      "epoch": 3.8842193463076793,
      "grad_norm": 0.2964217960834503,
      "learning_rate": 2.246265558798917e-06,
      "loss": 0.0106,
      "step": 2373460
    },
    {
      "epoch": 3.8842520767463324,
      "grad_norm": 0.07723674178123474,
      "learning_rate": 2.2461996665853998e-06,
      "loss": 0.0078,
      "step": 2373480
    },
    {
      "epoch": 3.884284807184986,
      "grad_norm": 0.12287092208862305,
      "learning_rate": 2.246133774371883e-06,
      "loss": 0.013,
      "step": 2373500
    },
    {
      "epoch": 3.884317537623639,
      "grad_norm": 0.16291716694831848,
      "learning_rate": 2.2460678821583657e-06,
      "loss": 0.0116,
      "step": 2373520
    },
    {
      "epoch": 3.8843502680622928,
      "grad_norm": 0.733252763748169,
      "learning_rate": 2.2460019899448484e-06,
      "loss": 0.0122,
      "step": 2373540
    },
    {
      "epoch": 3.884382998500946,
      "grad_norm": 0.21669569611549377,
      "learning_rate": 2.245936097731331e-06,
      "loss": 0.012,
      "step": 2373560
    },
    {
      "epoch": 3.884415728939599,
      "grad_norm": 0.36112526059150696,
      "learning_rate": 2.2458702055178143e-06,
      "loss": 0.015,
      "step": 2373580
    },
    {
      "epoch": 3.8844484593782527,
      "grad_norm": 0.23761041462421417,
      "learning_rate": 2.245804313304297e-06,
      "loss": 0.0092,
      "step": 2373600
    },
    {
      "epoch": 3.884481189816906,
      "grad_norm": 0.3523924946784973,
      "learning_rate": 2.2457384210907798e-06,
      "loss": 0.0074,
      "step": 2373620
    },
    {
      "epoch": 3.8845139202555594,
      "grad_norm": 0.2917346954345703,
      "learning_rate": 2.2456725288772625e-06,
      "loss": 0.0117,
      "step": 2373640
    },
    {
      "epoch": 3.8845466506942126,
      "grad_norm": 0.08135437220335007,
      "learning_rate": 2.2456066366637457e-06,
      "loss": 0.0149,
      "step": 2373660
    },
    {
      "epoch": 3.884579381132866,
      "grad_norm": 0.3875521421432495,
      "learning_rate": 2.2455407444502284e-06,
      "loss": 0.0142,
      "step": 2373680
    },
    {
      "epoch": 3.8846121115715193,
      "grad_norm": 0.29215192794799805,
      "learning_rate": 2.2454748522367116e-06,
      "loss": 0.0104,
      "step": 2373700
    },
    {
      "epoch": 3.8846448420101725,
      "grad_norm": 0.2038169652223587,
      "learning_rate": 2.2454089600231943e-06,
      "loss": 0.0098,
      "step": 2373720
    },
    {
      "epoch": 3.884677572448826,
      "grad_norm": 0.7552807927131653,
      "learning_rate": 2.245343067809677e-06,
      "loss": 0.0083,
      "step": 2373740
    },
    {
      "epoch": 3.884710302887479,
      "grad_norm": 0.159449964761734,
      "learning_rate": 2.24527717559616e-06,
      "loss": 0.0063,
      "step": 2373760
    },
    {
      "epoch": 3.884743033326133,
      "grad_norm": 0.3918236196041107,
      "learning_rate": 2.245211283382643e-06,
      "loss": 0.0092,
      "step": 2373780
    },
    {
      "epoch": 3.884775763764786,
      "grad_norm": 0.20356258749961853,
      "learning_rate": 2.2451453911691257e-06,
      "loss": 0.0108,
      "step": 2373800
    },
    {
      "epoch": 3.8848084942034395,
      "grad_norm": 0.3973194658756256,
      "learning_rate": 2.2450794989556085e-06,
      "loss": 0.0117,
      "step": 2373820
    },
    {
      "epoch": 3.8848412246420927,
      "grad_norm": 0.3726505935192108,
      "learning_rate": 2.245013606742091e-06,
      "loss": 0.0087,
      "step": 2373840
    },
    {
      "epoch": 3.884873955080746,
      "grad_norm": 0.414155513048172,
      "learning_rate": 2.2449477145285744e-06,
      "loss": 0.008,
      "step": 2373860
    },
    {
      "epoch": 3.8849066855193994,
      "grad_norm": 0.1788232922554016,
      "learning_rate": 2.244881822315057e-06,
      "loss": 0.0158,
      "step": 2373880
    },
    {
      "epoch": 3.8849394159580526,
      "grad_norm": 0.3608776926994324,
      "learning_rate": 2.2448159301015403e-06,
      "loss": 0.0121,
      "step": 2373900
    },
    {
      "epoch": 3.8849721463967057,
      "grad_norm": 0.16690368950366974,
      "learning_rate": 2.244750037888023e-06,
      "loss": 0.0126,
      "step": 2373920
    },
    {
      "epoch": 3.8850048768353593,
      "grad_norm": 0.6420905590057373,
      "learning_rate": 2.2446841456745057e-06,
      "loss": 0.018,
      "step": 2373940
    },
    {
      "epoch": 3.885037607274013,
      "grad_norm": 0.4293903410434723,
      "learning_rate": 2.2446182534609885e-06,
      "loss": 0.0073,
      "step": 2373960
    },
    {
      "epoch": 3.885070337712666,
      "grad_norm": 0.05812137573957443,
      "learning_rate": 2.2445523612474716e-06,
      "loss": 0.0144,
      "step": 2373980
    },
    {
      "epoch": 3.8851030681513192,
      "grad_norm": 0.4886610507965088,
      "learning_rate": 2.2444864690339544e-06,
      "loss": 0.0118,
      "step": 2374000
    },
    {
      "epoch": 3.885135798589973,
      "grad_norm": 0.29060623049736023,
      "learning_rate": 2.244420576820437e-06,
      "loss": 0.0092,
      "step": 2374020
    },
    {
      "epoch": 3.885168529028626,
      "grad_norm": 0.3525841534137726,
      "learning_rate": 2.2443546846069203e-06,
      "loss": 0.011,
      "step": 2374040
    },
    {
      "epoch": 3.885201259467279,
      "grad_norm": 0.19983670115470886,
      "learning_rate": 2.244288792393403e-06,
      "loss": 0.0075,
      "step": 2374060
    },
    {
      "epoch": 3.8852339899059327,
      "grad_norm": 0.34271207451820374,
      "learning_rate": 2.244222900179886e-06,
      "loss": 0.0113,
      "step": 2374080
    },
    {
      "epoch": 3.8852667203445863,
      "grad_norm": 0.17221882939338684,
      "learning_rate": 2.244157007966369e-06,
      "loss": 0.0071,
      "step": 2374100
    },
    {
      "epoch": 3.8852994507832395,
      "grad_norm": 0.30727750062942505,
      "learning_rate": 2.2440911157528517e-06,
      "loss": 0.0115,
      "step": 2374120
    },
    {
      "epoch": 3.8853321812218926,
      "grad_norm": 0.1996123045682907,
      "learning_rate": 2.2440252235393344e-06,
      "loss": 0.012,
      "step": 2374140
    },
    {
      "epoch": 3.885364911660546,
      "grad_norm": 0.31904998421669006,
      "learning_rate": 2.243959331325817e-06,
      "loss": 0.009,
      "step": 2374160
    },
    {
      "epoch": 3.8853976420991994,
      "grad_norm": 0.0953221246600151,
      "learning_rate": 2.2438934391123003e-06,
      "loss": 0.0136,
      "step": 2374180
    },
    {
      "epoch": 3.8854303725378525,
      "grad_norm": 0.3117649555206299,
      "learning_rate": 2.243827546898783e-06,
      "loss": 0.0092,
      "step": 2374200
    },
    {
      "epoch": 3.885463102976506,
      "grad_norm": 0.07185754925012589,
      "learning_rate": 2.243761654685266e-06,
      "loss": 0.0064,
      "step": 2374220
    },
    {
      "epoch": 3.8854958334151597,
      "grad_norm": 0.10953308641910553,
      "learning_rate": 2.243695762471749e-06,
      "loss": 0.0078,
      "step": 2374240
    },
    {
      "epoch": 3.885528563853813,
      "grad_norm": 0.3643815815448761,
      "learning_rate": 2.2436298702582317e-06,
      "loss": 0.0117,
      "step": 2374260
    },
    {
      "epoch": 3.885561294292466,
      "grad_norm": 0.4393511116504669,
      "learning_rate": 2.243563978044715e-06,
      "loss": 0.0119,
      "step": 2374280
    },
    {
      "epoch": 3.8855940247311196,
      "grad_norm": 0.2297956645488739,
      "learning_rate": 2.2434980858311976e-06,
      "loss": 0.014,
      "step": 2374300
    },
    {
      "epoch": 3.8856267551697727,
      "grad_norm": 0.11137333512306213,
      "learning_rate": 2.2434321936176803e-06,
      "loss": 0.0098,
      "step": 2374320
    },
    {
      "epoch": 3.885659485608426,
      "grad_norm": 0.5818334817886353,
      "learning_rate": 2.243366301404163e-06,
      "loss": 0.0126,
      "step": 2374340
    },
    {
      "epoch": 3.8856922160470795,
      "grad_norm": 0.3204461932182312,
      "learning_rate": 2.243300409190646e-06,
      "loss": 0.0115,
      "step": 2374360
    },
    {
      "epoch": 3.885724946485733,
      "grad_norm": 0.24200721085071564,
      "learning_rate": 2.243234516977129e-06,
      "loss": 0.0095,
      "step": 2374380
    },
    {
      "epoch": 3.8857576769243862,
      "grad_norm": 0.3911570608615875,
      "learning_rate": 2.243168624763612e-06,
      "loss": 0.0133,
      "step": 2374400
    },
    {
      "epoch": 3.8857904073630394,
      "grad_norm": 0.06513695418834686,
      "learning_rate": 2.243102732550095e-06,
      "loss": 0.0117,
      "step": 2374420
    },
    {
      "epoch": 3.885823137801693,
      "grad_norm": 0.12266983836889267,
      "learning_rate": 2.2430368403365776e-06,
      "loss": 0.0111,
      "step": 2374440
    },
    {
      "epoch": 3.885855868240346,
      "grad_norm": 0.06971895694732666,
      "learning_rate": 2.2429709481230604e-06,
      "loss": 0.0075,
      "step": 2374460
    },
    {
      "epoch": 3.8858885986789993,
      "grad_norm": 0.14508070051670074,
      "learning_rate": 2.2429050559095435e-06,
      "loss": 0.0109,
      "step": 2374480
    },
    {
      "epoch": 3.885921329117653,
      "grad_norm": 0.07035774737596512,
      "learning_rate": 2.2428391636960263e-06,
      "loss": 0.0117,
      "step": 2374500
    },
    {
      "epoch": 3.885954059556306,
      "grad_norm": 0.3849602937698364,
      "learning_rate": 2.242773271482509e-06,
      "loss": 0.0113,
      "step": 2374520
    },
    {
      "epoch": 3.8859867899949596,
      "grad_norm": 0.2860840857028961,
      "learning_rate": 2.2427073792689917e-06,
      "loss": 0.0147,
      "step": 2374540
    },
    {
      "epoch": 3.8860195204336128,
      "grad_norm": 0.28757601976394653,
      "learning_rate": 2.242641487055475e-06,
      "loss": 0.0189,
      "step": 2374560
    },
    {
      "epoch": 3.8860522508722664,
      "grad_norm": 0.4786982238292694,
      "learning_rate": 2.2425755948419576e-06,
      "loss": 0.0119,
      "step": 2374580
    },
    {
      "epoch": 3.8860849813109195,
      "grad_norm": 0.36345815658569336,
      "learning_rate": 2.242509702628441e-06,
      "loss": 0.0091,
      "step": 2374600
    },
    {
      "epoch": 3.8861177117495727,
      "grad_norm": 0.10120341926813126,
      "learning_rate": 2.2424438104149235e-06,
      "loss": 0.0108,
      "step": 2374620
    },
    {
      "epoch": 3.8861504421882263,
      "grad_norm": 0.2608415484428406,
      "learning_rate": 2.2423779182014063e-06,
      "loss": 0.0133,
      "step": 2374640
    },
    {
      "epoch": 3.8861831726268794,
      "grad_norm": 0.23472648859024048,
      "learning_rate": 2.242312025987889e-06,
      "loss": 0.0091,
      "step": 2374660
    },
    {
      "epoch": 3.886215903065533,
      "grad_norm": 0.15740090608596802,
      "learning_rate": 2.242246133774372e-06,
      "loss": 0.0106,
      "step": 2374680
    },
    {
      "epoch": 3.886248633504186,
      "grad_norm": 0.31504112482070923,
      "learning_rate": 2.242180241560855e-06,
      "loss": 0.0138,
      "step": 2374700
    },
    {
      "epoch": 3.8862813639428397,
      "grad_norm": 0.18845434486865997,
      "learning_rate": 2.2421143493473377e-06,
      "loss": 0.0111,
      "step": 2374720
    },
    {
      "epoch": 3.886314094381493,
      "grad_norm": 0.24346959590911865,
      "learning_rate": 2.242048457133821e-06,
      "loss": 0.0101,
      "step": 2374740
    },
    {
      "epoch": 3.886346824820146,
      "grad_norm": 0.1331186592578888,
      "learning_rate": 2.2419825649203036e-06,
      "loss": 0.0099,
      "step": 2374760
    },
    {
      "epoch": 3.8863795552587996,
      "grad_norm": 0.11385165899991989,
      "learning_rate": 2.2419166727067863e-06,
      "loss": 0.0077,
      "step": 2374780
    },
    {
      "epoch": 3.886412285697453,
      "grad_norm": 0.16262613236904144,
      "learning_rate": 2.2418507804932695e-06,
      "loss": 0.01,
      "step": 2374800
    },
    {
      "epoch": 3.8864450161361064,
      "grad_norm": 0.11141795665025711,
      "learning_rate": 2.2417848882797522e-06,
      "loss": 0.0101,
      "step": 2374820
    },
    {
      "epoch": 3.8864777465747595,
      "grad_norm": 0.10060204565525055,
      "learning_rate": 2.241718996066235e-06,
      "loss": 0.0088,
      "step": 2374840
    },
    {
      "epoch": 3.886510477013413,
      "grad_norm": 0.30310681462287903,
      "learning_rate": 2.2416531038527177e-06,
      "loss": 0.0155,
      "step": 2374860
    },
    {
      "epoch": 3.8865432074520663,
      "grad_norm": 0.10692238807678223,
      "learning_rate": 2.241587211639201e-06,
      "loss": 0.0149,
      "step": 2374880
    },
    {
      "epoch": 3.8865759378907194,
      "grad_norm": 0.08393308520317078,
      "learning_rate": 2.2415213194256836e-06,
      "loss": 0.0106,
      "step": 2374900
    },
    {
      "epoch": 3.886608668329373,
      "grad_norm": 0.16939067840576172,
      "learning_rate": 2.2414554272121668e-06,
      "loss": 0.0087,
      "step": 2374920
    },
    {
      "epoch": 3.886641398768026,
      "grad_norm": 0.32556670904159546,
      "learning_rate": 2.2413895349986495e-06,
      "loss": 0.0112,
      "step": 2374940
    },
    {
      "epoch": 3.8866741292066798,
      "grad_norm": 0.3224150836467743,
      "learning_rate": 2.2413236427851322e-06,
      "loss": 0.0066,
      "step": 2374960
    },
    {
      "epoch": 3.886706859645333,
      "grad_norm": 0.1315838247537613,
      "learning_rate": 2.241257750571615e-06,
      "loss": 0.0131,
      "step": 2374980
    },
    {
      "epoch": 3.8867395900839865,
      "grad_norm": 0.24123330414295197,
      "learning_rate": 2.241191858358098e-06,
      "loss": 0.0132,
      "step": 2375000
    },
    {
      "epoch": 3.8867723205226397,
      "grad_norm": 0.28024572134017944,
      "learning_rate": 2.241125966144581e-06,
      "loss": 0.0113,
      "step": 2375020
    },
    {
      "epoch": 3.886805050961293,
      "grad_norm": 0.19240008294582367,
      "learning_rate": 2.2410600739310636e-06,
      "loss": 0.0131,
      "step": 2375040
    },
    {
      "epoch": 3.8868377813999464,
      "grad_norm": 0.1631050854921341,
      "learning_rate": 2.2409941817175464e-06,
      "loss": 0.0093,
      "step": 2375060
    },
    {
      "epoch": 3.8868705118385996,
      "grad_norm": 0.14409956336021423,
      "learning_rate": 2.2409282895040295e-06,
      "loss": 0.011,
      "step": 2375080
    },
    {
      "epoch": 3.886903242277253,
      "grad_norm": 0.9820072650909424,
      "learning_rate": 2.2408623972905123e-06,
      "loss": 0.0116,
      "step": 2375100
    },
    {
      "epoch": 3.8869359727159063,
      "grad_norm": 0.7992329001426697,
      "learning_rate": 2.2407965050769954e-06,
      "loss": 0.0134,
      "step": 2375120
    },
    {
      "epoch": 3.88696870315456,
      "grad_norm": 0.11128270626068115,
      "learning_rate": 2.240730612863478e-06,
      "loss": 0.0122,
      "step": 2375140
    },
    {
      "epoch": 3.887001433593213,
      "grad_norm": 0.7910823225975037,
      "learning_rate": 2.240664720649961e-06,
      "loss": 0.0128,
      "step": 2375160
    },
    {
      "epoch": 3.887034164031866,
      "grad_norm": 0.2957490086555481,
      "learning_rate": 2.2405988284364436e-06,
      "loss": 0.0085,
      "step": 2375180
    },
    {
      "epoch": 3.88706689447052,
      "grad_norm": 0.06741899996995926,
      "learning_rate": 2.240532936222927e-06,
      "loss": 0.0123,
      "step": 2375200
    },
    {
      "epoch": 3.887099624909173,
      "grad_norm": 0.12565107643604279,
      "learning_rate": 2.2404670440094096e-06,
      "loss": 0.0112,
      "step": 2375220
    },
    {
      "epoch": 3.8871323553478265,
      "grad_norm": 0.3346124589443207,
      "learning_rate": 2.2404011517958923e-06,
      "loss": 0.0099,
      "step": 2375240
    },
    {
      "epoch": 3.8871650857864797,
      "grad_norm": 0.6204129457473755,
      "learning_rate": 2.2403352595823755e-06,
      "loss": 0.0115,
      "step": 2375260
    },
    {
      "epoch": 3.8871978162251333,
      "grad_norm": 0.7501589059829712,
      "learning_rate": 2.240269367368858e-06,
      "loss": 0.0159,
      "step": 2375280
    },
    {
      "epoch": 3.8872305466637864,
      "grad_norm": 0.9942716956138611,
      "learning_rate": 2.240203475155341e-06,
      "loss": 0.0131,
      "step": 2375300
    },
    {
      "epoch": 3.8872632771024396,
      "grad_norm": 0.31729552149772644,
      "learning_rate": 2.240137582941824e-06,
      "loss": 0.0117,
      "step": 2375320
    },
    {
      "epoch": 3.887296007541093,
      "grad_norm": 0.8364990949630737,
      "learning_rate": 2.240071690728307e-06,
      "loss": 0.0169,
      "step": 2375340
    },
    {
      "epoch": 3.8873287379797463,
      "grad_norm": 0.5443165302276611,
      "learning_rate": 2.2400057985147896e-06,
      "loss": 0.0108,
      "step": 2375360
    },
    {
      "epoch": 3.8873614684183995,
      "grad_norm": 0.6079615354537964,
      "learning_rate": 2.2399399063012723e-06,
      "loss": 0.0097,
      "step": 2375380
    },
    {
      "epoch": 3.887394198857053,
      "grad_norm": 0.45252734422683716,
      "learning_rate": 2.2398740140877555e-06,
      "loss": 0.012,
      "step": 2375400
    },
    {
      "epoch": 3.8874269292957067,
      "grad_norm": 0.1102379560470581,
      "learning_rate": 2.2398081218742382e-06,
      "loss": 0.011,
      "step": 2375420
    },
    {
      "epoch": 3.88745965973436,
      "grad_norm": 0.5177238583564758,
      "learning_rate": 2.2397422296607214e-06,
      "loss": 0.0075,
      "step": 2375440
    },
    {
      "epoch": 3.887492390173013,
      "grad_norm": 0.13300086557865143,
      "learning_rate": 2.239676337447204e-06,
      "loss": 0.0118,
      "step": 2375460
    },
    {
      "epoch": 3.8875251206116666,
      "grad_norm": 0.22668787837028503,
      "learning_rate": 2.239610445233687e-06,
      "loss": 0.0095,
      "step": 2375480
    },
    {
      "epoch": 3.8875578510503197,
      "grad_norm": 0.2610286772251129,
      "learning_rate": 2.23954455302017e-06,
      "loss": 0.0109,
      "step": 2375500
    },
    {
      "epoch": 3.887590581488973,
      "grad_norm": 0.17316333949565887,
      "learning_rate": 2.2394786608066528e-06,
      "loss": 0.0092,
      "step": 2375520
    },
    {
      "epoch": 3.8876233119276264,
      "grad_norm": 0.37980520725250244,
      "learning_rate": 2.2394127685931355e-06,
      "loss": 0.0167,
      "step": 2375540
    },
    {
      "epoch": 3.88765604236628,
      "grad_norm": 0.19175077974796295,
      "learning_rate": 2.2393468763796182e-06,
      "loss": 0.0097,
      "step": 2375560
    },
    {
      "epoch": 3.887688772804933,
      "grad_norm": 0.12399911135435104,
      "learning_rate": 2.239280984166101e-06,
      "loss": 0.0107,
      "step": 2375580
    },
    {
      "epoch": 3.8877215032435863,
      "grad_norm": 0.015343149192631245,
      "learning_rate": 2.239215091952584e-06,
      "loss": 0.01,
      "step": 2375600
    },
    {
      "epoch": 3.88775423368224,
      "grad_norm": 0.33297786116600037,
      "learning_rate": 2.2391491997390673e-06,
      "loss": 0.0157,
      "step": 2375620
    },
    {
      "epoch": 3.887786964120893,
      "grad_norm": 0.6816012859344482,
      "learning_rate": 2.23908330752555e-06,
      "loss": 0.0112,
      "step": 2375640
    },
    {
      "epoch": 3.8878196945595462,
      "grad_norm": 0.03521720692515373,
      "learning_rate": 2.239017415312033e-06,
      "loss": 0.0133,
      "step": 2375660
    },
    {
      "epoch": 3.8878524249982,
      "grad_norm": 0.601349949836731,
      "learning_rate": 2.2389515230985155e-06,
      "loss": 0.0095,
      "step": 2375680
    },
    {
      "epoch": 3.8878851554368534,
      "grad_norm": 0.17151561379432678,
      "learning_rate": 2.2388856308849987e-06,
      "loss": 0.0176,
      "step": 2375700
    },
    {
      "epoch": 3.8879178858755066,
      "grad_norm": 0.19339096546173096,
      "learning_rate": 2.2388197386714814e-06,
      "loss": 0.0084,
      "step": 2375720
    },
    {
      "epoch": 3.8879506163141597,
      "grad_norm": 0.21533913910388947,
      "learning_rate": 2.238753846457964e-06,
      "loss": 0.0144,
      "step": 2375740
    },
    {
      "epoch": 3.8879833467528133,
      "grad_norm": 0.2541762590408325,
      "learning_rate": 2.238687954244447e-06,
      "loss": 0.0129,
      "step": 2375760
    },
    {
      "epoch": 3.8880160771914665,
      "grad_norm": 0.261403888463974,
      "learning_rate": 2.2386220620309297e-06,
      "loss": 0.0199,
      "step": 2375780
    },
    {
      "epoch": 3.8880488076301196,
      "grad_norm": 0.45418548583984375,
      "learning_rate": 2.238556169817413e-06,
      "loss": 0.009,
      "step": 2375800
    },
    {
      "epoch": 3.888081538068773,
      "grad_norm": 0.27482369542121887,
      "learning_rate": 2.238490277603896e-06,
      "loss": 0.0119,
      "step": 2375820
    },
    {
      "epoch": 3.888114268507427,
      "grad_norm": 0.3684878945350647,
      "learning_rate": 2.2384243853903787e-06,
      "loss": 0.0117,
      "step": 2375840
    },
    {
      "epoch": 3.88814699894608,
      "grad_norm": 0.25316447019577026,
      "learning_rate": 2.2383584931768615e-06,
      "loss": 0.0096,
      "step": 2375860
    },
    {
      "epoch": 3.888179729384733,
      "grad_norm": 0.5945002436637878,
      "learning_rate": 2.238292600963344e-06,
      "loss": 0.0099,
      "step": 2375880
    },
    {
      "epoch": 3.8882124598233867,
      "grad_norm": 0.2434372752904892,
      "learning_rate": 2.2382267087498274e-06,
      "loss": 0.0103,
      "step": 2375900
    },
    {
      "epoch": 3.88824519026204,
      "grad_norm": 0.1549645960330963,
      "learning_rate": 2.23816081653631e-06,
      "loss": 0.0111,
      "step": 2375920
    },
    {
      "epoch": 3.888277920700693,
      "grad_norm": 0.08091481029987335,
      "learning_rate": 2.238094924322793e-06,
      "loss": 0.0131,
      "step": 2375940
    },
    {
      "epoch": 3.8883106511393466,
      "grad_norm": 0.2385251820087433,
      "learning_rate": 2.238029032109276e-06,
      "loss": 0.01,
      "step": 2375960
    },
    {
      "epoch": 3.8883433815779997,
      "grad_norm": 0.43619269132614136,
      "learning_rate": 2.2379631398957587e-06,
      "loss": 0.0112,
      "step": 2375980
    },
    {
      "epoch": 3.8883761120166533,
      "grad_norm": 0.25942298769950867,
      "learning_rate": 2.2378972476822415e-06,
      "loss": 0.0137,
      "step": 2376000
    },
    {
      "epoch": 3.8884088424553065,
      "grad_norm": 0.29540228843688965,
      "learning_rate": 2.2378313554687246e-06,
      "loss": 0.01,
      "step": 2376020
    },
    {
      "epoch": 3.88844157289396,
      "grad_norm": 0.14439789950847626,
      "learning_rate": 2.2377654632552074e-06,
      "loss": 0.0236,
      "step": 2376040
    },
    {
      "epoch": 3.8884743033326132,
      "grad_norm": 0.09134779125452042,
      "learning_rate": 2.23769957104169e-06,
      "loss": 0.018,
      "step": 2376060
    },
    {
      "epoch": 3.8885070337712664,
      "grad_norm": 0.5833159685134888,
      "learning_rate": 2.237633678828173e-06,
      "loss": 0.019,
      "step": 2376080
    },
    {
      "epoch": 3.88853976420992,
      "grad_norm": 0.23958656191825867,
      "learning_rate": 2.237567786614656e-06,
      "loss": 0.0121,
      "step": 2376100
    },
    {
      "epoch": 3.888572494648573,
      "grad_norm": 0.19287404417991638,
      "learning_rate": 2.2375018944011388e-06,
      "loss": 0.0149,
      "step": 2376120
    },
    {
      "epoch": 3.8886052250872267,
      "grad_norm": 0.21127380430698395,
      "learning_rate": 2.237436002187622e-06,
      "loss": 0.0115,
      "step": 2376140
    },
    {
      "epoch": 3.88863795552588,
      "grad_norm": 0.2845034897327423,
      "learning_rate": 2.2373701099741047e-06,
      "loss": 0.0128,
      "step": 2376160
    },
    {
      "epoch": 3.8886706859645335,
      "grad_norm": 0.20515088737010956,
      "learning_rate": 2.2373042177605874e-06,
      "loss": 0.0076,
      "step": 2376180
    },
    {
      "epoch": 3.8887034164031866,
      "grad_norm": 0.3761433959007263,
      "learning_rate": 2.23723832554707e-06,
      "loss": 0.0114,
      "step": 2376200
    },
    {
      "epoch": 3.8887361468418398,
      "grad_norm": 0.34476616978645325,
      "learning_rate": 2.2371724333335533e-06,
      "loss": 0.0125,
      "step": 2376220
    },
    {
      "epoch": 3.8887688772804934,
      "grad_norm": 0.37410059571266174,
      "learning_rate": 2.237106541120036e-06,
      "loss": 0.0136,
      "step": 2376240
    },
    {
      "epoch": 3.8888016077191465,
      "grad_norm": 0.21421052515506744,
      "learning_rate": 2.237040648906519e-06,
      "loss": 0.0104,
      "step": 2376260
    },
    {
      "epoch": 3.8888343381578,
      "grad_norm": 0.41422927379608154,
      "learning_rate": 2.2369747566930015e-06,
      "loss": 0.0115,
      "step": 2376280
    },
    {
      "epoch": 3.8888670685964533,
      "grad_norm": 0.3453342914581299,
      "learning_rate": 2.2369088644794847e-06,
      "loss": 0.0081,
      "step": 2376300
    },
    {
      "epoch": 3.888899799035107,
      "grad_norm": 0.08652683347463608,
      "learning_rate": 2.2368429722659674e-06,
      "loss": 0.0087,
      "step": 2376320
    },
    {
      "epoch": 3.88893252947376,
      "grad_norm": 0.43307358026504517,
      "learning_rate": 2.2367770800524506e-06,
      "loss": 0.0094,
      "step": 2376340
    },
    {
      "epoch": 3.888965259912413,
      "grad_norm": 0.5612319111824036,
      "learning_rate": 2.2367111878389333e-06,
      "loss": 0.0101,
      "step": 2376360
    },
    {
      "epoch": 3.8889979903510667,
      "grad_norm": 0.48025405406951904,
      "learning_rate": 2.236645295625416e-06,
      "loss": 0.0136,
      "step": 2376380
    },
    {
      "epoch": 3.88903072078972,
      "grad_norm": 0.996444821357727,
      "learning_rate": 2.236579403411899e-06,
      "loss": 0.0143,
      "step": 2376400
    },
    {
      "epoch": 3.8890634512283735,
      "grad_norm": 0.2236279547214508,
      "learning_rate": 2.236513511198382e-06,
      "loss": 0.011,
      "step": 2376420
    },
    {
      "epoch": 3.8890961816670266,
      "grad_norm": 0.09239251911640167,
      "learning_rate": 2.2364476189848647e-06,
      "loss": 0.0077,
      "step": 2376440
    },
    {
      "epoch": 3.8891289121056802,
      "grad_norm": 0.3194490373134613,
      "learning_rate": 2.2363817267713475e-06,
      "loss": 0.0101,
      "step": 2376460
    },
    {
      "epoch": 3.8891616425443334,
      "grad_norm": 0.17053836584091187,
      "learning_rate": 2.23631583455783e-06,
      "loss": 0.01,
      "step": 2376480
    },
    {
      "epoch": 3.8891943729829865,
      "grad_norm": 1.1281812191009521,
      "learning_rate": 2.2362499423443134e-06,
      "loss": 0.0121,
      "step": 2376500
    },
    {
      "epoch": 3.88922710342164,
      "grad_norm": 0.4160289168357849,
      "learning_rate": 2.236184050130796e-06,
      "loss": 0.0098,
      "step": 2376520
    },
    {
      "epoch": 3.8892598338602933,
      "grad_norm": 0.07934349030256271,
      "learning_rate": 2.2361181579172793e-06,
      "loss": 0.0078,
      "step": 2376540
    },
    {
      "epoch": 3.889292564298947,
      "grad_norm": 0.07976781576871872,
      "learning_rate": 2.236052265703762e-06,
      "loss": 0.014,
      "step": 2376560
    },
    {
      "epoch": 3.8893252947376,
      "grad_norm": 0.6731614470481873,
      "learning_rate": 2.2359863734902447e-06,
      "loss": 0.0102,
      "step": 2376580
    },
    {
      "epoch": 3.8893580251762536,
      "grad_norm": 0.3450630009174347,
      "learning_rate": 2.2359204812767275e-06,
      "loss": 0.0122,
      "step": 2376600
    },
    {
      "epoch": 3.8893907556149068,
      "grad_norm": 0.17256203293800354,
      "learning_rate": 2.2358545890632107e-06,
      "loss": 0.0101,
      "step": 2376620
    },
    {
      "epoch": 3.88942348605356,
      "grad_norm": 0.5108936429023743,
      "learning_rate": 2.2357886968496934e-06,
      "loss": 0.0158,
      "step": 2376640
    },
    {
      "epoch": 3.8894562164922135,
      "grad_norm": 0.4021535813808441,
      "learning_rate": 2.235722804636176e-06,
      "loss": 0.0141,
      "step": 2376660
    },
    {
      "epoch": 3.8894889469308667,
      "grad_norm": 0.18365928530693054,
      "learning_rate": 2.2356569124226593e-06,
      "loss": 0.0094,
      "step": 2376680
    },
    {
      "epoch": 3.8895216773695203,
      "grad_norm": 0.5424859523773193,
      "learning_rate": 2.235591020209142e-06,
      "loss": 0.0123,
      "step": 2376700
    },
    {
      "epoch": 3.8895544078081734,
      "grad_norm": 0.15446904301643372,
      "learning_rate": 2.235525127995625e-06,
      "loss": 0.0151,
      "step": 2376720
    },
    {
      "epoch": 3.889587138246827,
      "grad_norm": 0.4178447425365448,
      "learning_rate": 2.235459235782108e-06,
      "loss": 0.0197,
      "step": 2376740
    },
    {
      "epoch": 3.88961986868548,
      "grad_norm": 0.5787005424499512,
      "learning_rate": 2.2353933435685907e-06,
      "loss": 0.0115,
      "step": 2376760
    },
    {
      "epoch": 3.8896525991241333,
      "grad_norm": 0.420732319355011,
      "learning_rate": 2.2353274513550734e-06,
      "loss": 0.0091,
      "step": 2376780
    },
    {
      "epoch": 3.889685329562787,
      "grad_norm": 0.10917683690786362,
      "learning_rate": 2.235261559141556e-06,
      "loss": 0.0091,
      "step": 2376800
    },
    {
      "epoch": 3.88971806000144,
      "grad_norm": 0.04163668677210808,
      "learning_rate": 2.2351956669280393e-06,
      "loss": 0.0089,
      "step": 2376820
    },
    {
      "epoch": 3.8897507904400936,
      "grad_norm": 0.3007742464542389,
      "learning_rate": 2.2351297747145225e-06,
      "loss": 0.0088,
      "step": 2376840
    },
    {
      "epoch": 3.889783520878747,
      "grad_norm": 0.2857634425163269,
      "learning_rate": 2.2350638825010052e-06,
      "loss": 0.0103,
      "step": 2376860
    },
    {
      "epoch": 3.8898162513174004,
      "grad_norm": 0.27709439396858215,
      "learning_rate": 2.234997990287488e-06,
      "loss": 0.0109,
      "step": 2376880
    },
    {
      "epoch": 3.8898489817560535,
      "grad_norm": 0.17953555285930634,
      "learning_rate": 2.2349320980739707e-06,
      "loss": 0.0153,
      "step": 2376900
    },
    {
      "epoch": 3.8898817121947067,
      "grad_norm": 0.09698863327503204,
      "learning_rate": 2.234866205860454e-06,
      "loss": 0.0101,
      "step": 2376920
    },
    {
      "epoch": 3.8899144426333603,
      "grad_norm": 0.3783215582370758,
      "learning_rate": 2.2348003136469366e-06,
      "loss": 0.0111,
      "step": 2376940
    },
    {
      "epoch": 3.8899471730720134,
      "grad_norm": 0.2388957142829895,
      "learning_rate": 2.2347344214334193e-06,
      "loss": 0.0133,
      "step": 2376960
    },
    {
      "epoch": 3.8899799035106666,
      "grad_norm": 1.0047674179077148,
      "learning_rate": 2.234668529219902e-06,
      "loss": 0.0098,
      "step": 2376980
    },
    {
      "epoch": 3.89001263394932,
      "grad_norm": 0.7135886549949646,
      "learning_rate": 2.234602637006385e-06,
      "loss": 0.0125,
      "step": 2377000
    },
    {
      "epoch": 3.8900453643879738,
      "grad_norm": 0.21433594822883606,
      "learning_rate": 2.234536744792868e-06,
      "loss": 0.0143,
      "step": 2377020
    },
    {
      "epoch": 3.890078094826627,
      "grad_norm": 0.6703692078590393,
      "learning_rate": 2.234470852579351e-06,
      "loss": 0.0101,
      "step": 2377040
    },
    {
      "epoch": 3.89011082526528,
      "grad_norm": 0.43229252099990845,
      "learning_rate": 2.234404960365834e-06,
      "loss": 0.0083,
      "step": 2377060
    },
    {
      "epoch": 3.8901435557039337,
      "grad_norm": 0.6329357624053955,
      "learning_rate": 2.2343390681523166e-06,
      "loss": 0.0098,
      "step": 2377080
    },
    {
      "epoch": 3.890176286142587,
      "grad_norm": 0.2605389952659607,
      "learning_rate": 2.2342731759387994e-06,
      "loss": 0.0102,
      "step": 2377100
    },
    {
      "epoch": 3.89020901658124,
      "grad_norm": 0.3185320496559143,
      "learning_rate": 2.2342072837252825e-06,
      "loss": 0.0108,
      "step": 2377120
    },
    {
      "epoch": 3.8902417470198936,
      "grad_norm": 0.6561490297317505,
      "learning_rate": 2.2341413915117653e-06,
      "loss": 0.0097,
      "step": 2377140
    },
    {
      "epoch": 3.890274477458547,
      "grad_norm": 0.11211256682872772,
      "learning_rate": 2.234075499298248e-06,
      "loss": 0.0131,
      "step": 2377160
    },
    {
      "epoch": 3.8903072078972003,
      "grad_norm": 0.40677833557128906,
      "learning_rate": 2.2340096070847308e-06,
      "loss": 0.0089,
      "step": 2377180
    },
    {
      "epoch": 3.8903399383358535,
      "grad_norm": 0.714051365852356,
      "learning_rate": 2.233943714871214e-06,
      "loss": 0.0078,
      "step": 2377200
    },
    {
      "epoch": 3.890372668774507,
      "grad_norm": 0.22626487910747528,
      "learning_rate": 2.2338778226576967e-06,
      "loss": 0.0106,
      "step": 2377220
    },
    {
      "epoch": 3.89040539921316,
      "grad_norm": 0.13337740302085876,
      "learning_rate": 2.23381193044418e-06,
      "loss": 0.0103,
      "step": 2377240
    },
    {
      "epoch": 3.8904381296518133,
      "grad_norm": 0.10429910570383072,
      "learning_rate": 2.2337460382306626e-06,
      "loss": 0.0094,
      "step": 2377260
    },
    {
      "epoch": 3.890470860090467,
      "grad_norm": 0.0743451938033104,
      "learning_rate": 2.2336801460171453e-06,
      "loss": 0.0142,
      "step": 2377280
    },
    {
      "epoch": 3.8905035905291205,
      "grad_norm": 0.3816761076450348,
      "learning_rate": 2.233614253803628e-06,
      "loss": 0.0089,
      "step": 2377300
    },
    {
      "epoch": 3.8905363209677737,
      "grad_norm": 0.40558019280433655,
      "learning_rate": 2.233548361590111e-06,
      "loss": 0.0101,
      "step": 2377320
    },
    {
      "epoch": 3.890569051406427,
      "grad_norm": 0.2495274394750595,
      "learning_rate": 2.233482469376594e-06,
      "loss": 0.0088,
      "step": 2377340
    },
    {
      "epoch": 3.8906017818450804,
      "grad_norm": 0.2889198660850525,
      "learning_rate": 2.2334165771630767e-06,
      "loss": 0.0119,
      "step": 2377360
    },
    {
      "epoch": 3.8906345122837336,
      "grad_norm": 0.44969406723976135,
      "learning_rate": 2.23335068494956e-06,
      "loss": 0.0122,
      "step": 2377380
    },
    {
      "epoch": 3.8906672427223867,
      "grad_norm": 0.4311278760433197,
      "learning_rate": 2.2332847927360426e-06,
      "loss": 0.0079,
      "step": 2377400
    },
    {
      "epoch": 3.8906999731610403,
      "grad_norm": 0.463493674993515,
      "learning_rate": 2.2332189005225253e-06,
      "loss": 0.0095,
      "step": 2377420
    },
    {
      "epoch": 3.890732703599694,
      "grad_norm": 0.10342835634946823,
      "learning_rate": 2.2331530083090085e-06,
      "loss": 0.0119,
      "step": 2377440
    },
    {
      "epoch": 3.890765434038347,
      "grad_norm": 0.4374130964279175,
      "learning_rate": 2.2330871160954912e-06,
      "loss": 0.0128,
      "step": 2377460
    },
    {
      "epoch": 3.890798164477,
      "grad_norm": 1.8300505876541138,
      "learning_rate": 2.233021223881974e-06,
      "loss": 0.0107,
      "step": 2377480
    },
    {
      "epoch": 3.890830894915654,
      "grad_norm": 0.26684680581092834,
      "learning_rate": 2.2329553316684567e-06,
      "loss": 0.0105,
      "step": 2377500
    },
    {
      "epoch": 3.890863625354307,
      "grad_norm": 0.2650394141674042,
      "learning_rate": 2.23288943945494e-06,
      "loss": 0.0074,
      "step": 2377520
    },
    {
      "epoch": 3.89089635579296,
      "grad_norm": 0.2491765171289444,
      "learning_rate": 2.2328235472414226e-06,
      "loss": 0.0081,
      "step": 2377540
    },
    {
      "epoch": 3.8909290862316137,
      "grad_norm": 0.08165819942951202,
      "learning_rate": 2.2327576550279058e-06,
      "loss": 0.0081,
      "step": 2377560
    },
    {
      "epoch": 3.890961816670267,
      "grad_norm": 0.10199499875307083,
      "learning_rate": 2.2326917628143885e-06,
      "loss": 0.0123,
      "step": 2377580
    },
    {
      "epoch": 3.8909945471089205,
      "grad_norm": 0.0524255633354187,
      "learning_rate": 2.2326258706008713e-06,
      "loss": 0.015,
      "step": 2377600
    },
    {
      "epoch": 3.8910272775475736,
      "grad_norm": 0.07585295289754868,
      "learning_rate": 2.232559978387354e-06,
      "loss": 0.0083,
      "step": 2377620
    },
    {
      "epoch": 3.891060007986227,
      "grad_norm": 0.13572333753108978,
      "learning_rate": 2.232494086173837e-06,
      "loss": 0.0142,
      "step": 2377640
    },
    {
      "epoch": 3.8910927384248803,
      "grad_norm": 0.129461869597435,
      "learning_rate": 2.23242819396032e-06,
      "loss": 0.0183,
      "step": 2377660
    },
    {
      "epoch": 3.8911254688635335,
      "grad_norm": 0.2967672348022461,
      "learning_rate": 2.2323623017468026e-06,
      "loss": 0.0126,
      "step": 2377680
    },
    {
      "epoch": 3.891158199302187,
      "grad_norm": 0.38544008135795593,
      "learning_rate": 2.2322964095332854e-06,
      "loss": 0.0118,
      "step": 2377700
    },
    {
      "epoch": 3.8911909297408402,
      "grad_norm": 0.3680284917354584,
      "learning_rate": 2.2322305173197685e-06,
      "loss": 0.0091,
      "step": 2377720
    },
    {
      "epoch": 3.891223660179494,
      "grad_norm": 0.5159342288970947,
      "learning_rate": 2.2321646251062513e-06,
      "loss": 0.0106,
      "step": 2377740
    },
    {
      "epoch": 3.891256390618147,
      "grad_norm": 0.1581696718931198,
      "learning_rate": 2.2320987328927344e-06,
      "loss": 0.0098,
      "step": 2377760
    },
    {
      "epoch": 3.8912891210568006,
      "grad_norm": 0.2173130065202713,
      "learning_rate": 2.232032840679217e-06,
      "loss": 0.0138,
      "step": 2377780
    },
    {
      "epoch": 3.8913218514954537,
      "grad_norm": 0.17096740007400513,
      "learning_rate": 2.2319669484657e-06,
      "loss": 0.0071,
      "step": 2377800
    },
    {
      "epoch": 3.891354581934107,
      "grad_norm": 0.19987641274929047,
      "learning_rate": 2.2319010562521827e-06,
      "loss": 0.0089,
      "step": 2377820
    },
    {
      "epoch": 3.8913873123727605,
      "grad_norm": 0.1774131953716278,
      "learning_rate": 2.231835164038666e-06,
      "loss": 0.0142,
      "step": 2377840
    },
    {
      "epoch": 3.8914200428114136,
      "grad_norm": 0.20992963016033173,
      "learning_rate": 2.2317692718251486e-06,
      "loss": 0.0075,
      "step": 2377860
    },
    {
      "epoch": 3.891452773250067,
      "grad_norm": 0.3644181191921234,
      "learning_rate": 2.2317033796116313e-06,
      "loss": 0.0108,
      "step": 2377880
    },
    {
      "epoch": 3.8914855036887204,
      "grad_norm": 0.33934590220451355,
      "learning_rate": 2.2316374873981145e-06,
      "loss": 0.0114,
      "step": 2377900
    },
    {
      "epoch": 3.891518234127374,
      "grad_norm": 0.3255667984485626,
      "learning_rate": 2.231571595184597e-06,
      "loss": 0.008,
      "step": 2377920
    },
    {
      "epoch": 3.891550964566027,
      "grad_norm": 0.36043885350227356,
      "learning_rate": 2.2315057029710804e-06,
      "loss": 0.0123,
      "step": 2377940
    },
    {
      "epoch": 3.8915836950046803,
      "grad_norm": 0.3836881220340729,
      "learning_rate": 2.231439810757563e-06,
      "loss": 0.0094,
      "step": 2377960
    },
    {
      "epoch": 3.891616425443334,
      "grad_norm": 0.16479934751987457,
      "learning_rate": 2.231373918544046e-06,
      "loss": 0.0125,
      "step": 2377980
    },
    {
      "epoch": 3.891649155881987,
      "grad_norm": 0.18918387591838837,
      "learning_rate": 2.2313080263305286e-06,
      "loss": 0.0072,
      "step": 2378000
    },
    {
      "epoch": 3.8916818863206406,
      "grad_norm": 0.12927201390266418,
      "learning_rate": 2.2312421341170113e-06,
      "loss": 0.0116,
      "step": 2378020
    },
    {
      "epoch": 3.8917146167592938,
      "grad_norm": 0.12915807962417603,
      "learning_rate": 2.2311762419034945e-06,
      "loss": 0.0152,
      "step": 2378040
    },
    {
      "epoch": 3.8917473471979473,
      "grad_norm": 0.3134101331233978,
      "learning_rate": 2.2311103496899772e-06,
      "loss": 0.0111,
      "step": 2378060
    },
    {
      "epoch": 3.8917800776366005,
      "grad_norm": 0.8601952791213989,
      "learning_rate": 2.2310444574764604e-06,
      "loss": 0.0116,
      "step": 2378080
    },
    {
      "epoch": 3.8918128080752536,
      "grad_norm": 0.22631734609603882,
      "learning_rate": 2.230978565262943e-06,
      "loss": 0.0085,
      "step": 2378100
    },
    {
      "epoch": 3.8918455385139072,
      "grad_norm": 0.3451055884361267,
      "learning_rate": 2.230912673049426e-06,
      "loss": 0.0059,
      "step": 2378120
    },
    {
      "epoch": 3.8918782689525604,
      "grad_norm": 0.32695895433425903,
      "learning_rate": 2.230846780835909e-06,
      "loss": 0.0102,
      "step": 2378140
    },
    {
      "epoch": 3.891910999391214,
      "grad_norm": 0.6300952434539795,
      "learning_rate": 2.2307808886223918e-06,
      "loss": 0.0142,
      "step": 2378160
    },
    {
      "epoch": 3.891943729829867,
      "grad_norm": 0.7004247903823853,
      "learning_rate": 2.2307149964088745e-06,
      "loss": 0.008,
      "step": 2378180
    },
    {
      "epoch": 3.8919764602685207,
      "grad_norm": 0.1237536072731018,
      "learning_rate": 2.2306491041953573e-06,
      "loss": 0.0111,
      "step": 2378200
    },
    {
      "epoch": 3.892009190707174,
      "grad_norm": 0.08895303308963776,
      "learning_rate": 2.23058321198184e-06,
      "loss": 0.0108,
      "step": 2378220
    },
    {
      "epoch": 3.892041921145827,
      "grad_norm": 0.280879944562912,
      "learning_rate": 2.230517319768323e-06,
      "loss": 0.0076,
      "step": 2378240
    },
    {
      "epoch": 3.8920746515844806,
      "grad_norm": 0.15122829377651215,
      "learning_rate": 2.2304514275548063e-06,
      "loss": 0.0069,
      "step": 2378260
    },
    {
      "epoch": 3.8921073820231338,
      "grad_norm": 0.2406454086303711,
      "learning_rate": 2.230385535341289e-06,
      "loss": 0.0087,
      "step": 2378280
    },
    {
      "epoch": 3.8921401124617874,
      "grad_norm": 0.15939880907535553,
      "learning_rate": 2.230319643127772e-06,
      "loss": 0.0101,
      "step": 2378300
    },
    {
      "epoch": 3.8921728429004405,
      "grad_norm": 0.27073395252227783,
      "learning_rate": 2.2302537509142545e-06,
      "loss": 0.007,
      "step": 2378320
    },
    {
      "epoch": 3.892205573339094,
      "grad_norm": 0.732075572013855,
      "learning_rate": 2.2301878587007377e-06,
      "loss": 0.0137,
      "step": 2378340
    },
    {
      "epoch": 3.8922383037777473,
      "grad_norm": 0.18113940954208374,
      "learning_rate": 2.2301219664872204e-06,
      "loss": 0.008,
      "step": 2378360
    },
    {
      "epoch": 3.8922710342164004,
      "grad_norm": 0.06735938787460327,
      "learning_rate": 2.230056074273703e-06,
      "loss": 0.011,
      "step": 2378380
    },
    {
      "epoch": 3.892303764655054,
      "grad_norm": 0.20823483169078827,
      "learning_rate": 2.229990182060186e-06,
      "loss": 0.0099,
      "step": 2378400
    },
    {
      "epoch": 3.892336495093707,
      "grad_norm": 0.5527276396751404,
      "learning_rate": 2.2299242898466687e-06,
      "loss": 0.0118,
      "step": 2378420
    },
    {
      "epoch": 3.8923692255323603,
      "grad_norm": 0.11380264908075333,
      "learning_rate": 2.229858397633152e-06,
      "loss": 0.0075,
      "step": 2378440
    },
    {
      "epoch": 3.892401955971014,
      "grad_norm": 0.24005180597305298,
      "learning_rate": 2.229792505419635e-06,
      "loss": 0.0124,
      "step": 2378460
    },
    {
      "epoch": 3.8924346864096675,
      "grad_norm": 0.2246667593717575,
      "learning_rate": 2.2297266132061177e-06,
      "loss": 0.0142,
      "step": 2378480
    },
    {
      "epoch": 3.8924674168483206,
      "grad_norm": 0.5964361429214478,
      "learning_rate": 2.2296607209926005e-06,
      "loss": 0.0095,
      "step": 2378500
    },
    {
      "epoch": 3.892500147286974,
      "grad_norm": 1.0234194993972778,
      "learning_rate": 2.229594828779083e-06,
      "loss": 0.0104,
      "step": 2378520
    },
    {
      "epoch": 3.8925328777256274,
      "grad_norm": 0.6981378793716431,
      "learning_rate": 2.2295289365655664e-06,
      "loss": 0.0118,
      "step": 2378540
    },
    {
      "epoch": 3.8925656081642805,
      "grad_norm": 0.7352263331413269,
      "learning_rate": 2.229463044352049e-06,
      "loss": 0.0075,
      "step": 2378560
    },
    {
      "epoch": 3.8925983386029337,
      "grad_norm": 0.08302431553602219,
      "learning_rate": 2.229397152138532e-06,
      "loss": 0.0076,
      "step": 2378580
    },
    {
      "epoch": 3.8926310690415873,
      "grad_norm": 0.25832322239875793,
      "learning_rate": 2.229331259925015e-06,
      "loss": 0.0104,
      "step": 2378600
    },
    {
      "epoch": 3.892663799480241,
      "grad_norm": 0.2392788827419281,
      "learning_rate": 2.2292653677114978e-06,
      "loss": 0.0095,
      "step": 2378620
    },
    {
      "epoch": 3.892696529918894,
      "grad_norm": 0.09993156045675278,
      "learning_rate": 2.2291994754979805e-06,
      "loss": 0.0111,
      "step": 2378640
    },
    {
      "epoch": 3.892729260357547,
      "grad_norm": 0.6146432757377625,
      "learning_rate": 2.2291335832844637e-06,
      "loss": 0.0089,
      "step": 2378660
    },
    {
      "epoch": 3.8927619907962008,
      "grad_norm": 0.0938112661242485,
      "learning_rate": 2.2290676910709464e-06,
      "loss": 0.0104,
      "step": 2378680
    },
    {
      "epoch": 3.892794721234854,
      "grad_norm": 0.4031262695789337,
      "learning_rate": 2.229001798857429e-06,
      "loss": 0.0121,
      "step": 2378700
    },
    {
      "epoch": 3.892827451673507,
      "grad_norm": 0.346958190202713,
      "learning_rate": 2.228935906643912e-06,
      "loss": 0.0109,
      "step": 2378720
    },
    {
      "epoch": 3.8928601821121607,
      "grad_norm": 0.3519222140312195,
      "learning_rate": 2.228870014430395e-06,
      "loss": 0.0084,
      "step": 2378740
    },
    {
      "epoch": 3.8928929125508143,
      "grad_norm": 0.3266216218471527,
      "learning_rate": 2.2288041222168778e-06,
      "loss": 0.0127,
      "step": 2378760
    },
    {
      "epoch": 3.8929256429894674,
      "grad_norm": 0.4207233190536499,
      "learning_rate": 2.228738230003361e-06,
      "loss": 0.0097,
      "step": 2378780
    },
    {
      "epoch": 3.8929583734281206,
      "grad_norm": 0.6578459143638611,
      "learning_rate": 2.2286723377898437e-06,
      "loss": 0.0128,
      "step": 2378800
    },
    {
      "epoch": 3.892991103866774,
      "grad_norm": 0.2612927258014679,
      "learning_rate": 2.2286064455763264e-06,
      "loss": 0.0121,
      "step": 2378820
    },
    {
      "epoch": 3.8930238343054273,
      "grad_norm": 0.1759125143289566,
      "learning_rate": 2.228540553362809e-06,
      "loss": 0.0098,
      "step": 2378840
    },
    {
      "epoch": 3.8930565647440805,
      "grad_norm": 0.2850185036659241,
      "learning_rate": 2.2284746611492923e-06,
      "loss": 0.0091,
      "step": 2378860
    },
    {
      "epoch": 3.893089295182734,
      "grad_norm": 0.15045887231826782,
      "learning_rate": 2.228408768935775e-06,
      "loss": 0.0107,
      "step": 2378880
    },
    {
      "epoch": 3.8931220256213876,
      "grad_norm": 0.1793622374534607,
      "learning_rate": 2.228342876722258e-06,
      "loss": 0.0132,
      "step": 2378900
    },
    {
      "epoch": 3.893154756060041,
      "grad_norm": 1.2600548267364502,
      "learning_rate": 2.2282769845087405e-06,
      "loss": 0.0107,
      "step": 2378920
    },
    {
      "epoch": 3.893187486498694,
      "grad_norm": 0.32730644941329956,
      "learning_rate": 2.2282110922952237e-06,
      "loss": 0.0102,
      "step": 2378940
    },
    {
      "epoch": 3.8932202169373475,
      "grad_norm": 0.24562469124794006,
      "learning_rate": 2.2281452000817064e-06,
      "loss": 0.0082,
      "step": 2378960
    },
    {
      "epoch": 3.8932529473760007,
      "grad_norm": 0.7185380458831787,
      "learning_rate": 2.2280793078681896e-06,
      "loss": 0.0061,
      "step": 2378980
    },
    {
      "epoch": 3.893285677814654,
      "grad_norm": 0.2509707510471344,
      "learning_rate": 2.2280134156546724e-06,
      "loss": 0.0112,
      "step": 2379000
    },
    {
      "epoch": 3.8933184082533074,
      "grad_norm": 0.3586002290248871,
      "learning_rate": 2.227947523441155e-06,
      "loss": 0.0082,
      "step": 2379020
    },
    {
      "epoch": 3.8933511386919606,
      "grad_norm": 0.183623269200325,
      "learning_rate": 2.227881631227638e-06,
      "loss": 0.0148,
      "step": 2379040
    },
    {
      "epoch": 3.893383869130614,
      "grad_norm": 0.39891645312309265,
      "learning_rate": 2.227815739014121e-06,
      "loss": 0.0084,
      "step": 2379060
    },
    {
      "epoch": 3.8934165995692673,
      "grad_norm": 0.2611302435398102,
      "learning_rate": 2.2277498468006037e-06,
      "loss": 0.0081,
      "step": 2379080
    },
    {
      "epoch": 3.893449330007921,
      "grad_norm": 0.38261160254478455,
      "learning_rate": 2.2276839545870865e-06,
      "loss": 0.0093,
      "step": 2379100
    },
    {
      "epoch": 3.893482060446574,
      "grad_norm": 0.33817198872566223,
      "learning_rate": 2.2276180623735692e-06,
      "loss": 0.0115,
      "step": 2379120
    },
    {
      "epoch": 3.8935147908852272,
      "grad_norm": 0.16152594983577728,
      "learning_rate": 2.2275521701600524e-06,
      "loss": 0.0106,
      "step": 2379140
    },
    {
      "epoch": 3.893547521323881,
      "grad_norm": 0.30925944447517395,
      "learning_rate": 2.227486277946535e-06,
      "loss": 0.01,
      "step": 2379160
    },
    {
      "epoch": 3.893580251762534,
      "grad_norm": 0.21216338872909546,
      "learning_rate": 2.2274203857330183e-06,
      "loss": 0.0134,
      "step": 2379180
    },
    {
      "epoch": 3.8936129822011876,
      "grad_norm": 0.10783523321151733,
      "learning_rate": 2.227354493519501e-06,
      "loss": 0.0084,
      "step": 2379200
    },
    {
      "epoch": 3.8936457126398407,
      "grad_norm": 0.7073766589164734,
      "learning_rate": 2.2272886013059838e-06,
      "loss": 0.0072,
      "step": 2379220
    },
    {
      "epoch": 3.8936784430784943,
      "grad_norm": 0.17433175444602966,
      "learning_rate": 2.2272227090924665e-06,
      "loss": 0.0117,
      "step": 2379240
    },
    {
      "epoch": 3.8937111735171475,
      "grad_norm": 0.2609046995639801,
      "learning_rate": 2.2271568168789497e-06,
      "loss": 0.0094,
      "step": 2379260
    },
    {
      "epoch": 3.8937439039558006,
      "grad_norm": 0.32561779022216797,
      "learning_rate": 2.2270909246654324e-06,
      "loss": 0.0107,
      "step": 2379280
    },
    {
      "epoch": 3.893776634394454,
      "grad_norm": 0.2199823260307312,
      "learning_rate": 2.227025032451915e-06,
      "loss": 0.0077,
      "step": 2379300
    },
    {
      "epoch": 3.8938093648331074,
      "grad_norm": 0.23635464906692505,
      "learning_rate": 2.2269591402383983e-06,
      "loss": 0.0097,
      "step": 2379320
    },
    {
      "epoch": 3.893842095271761,
      "grad_norm": 0.49285566806793213,
      "learning_rate": 2.226893248024881e-06,
      "loss": 0.0117,
      "step": 2379340
    },
    {
      "epoch": 3.893874825710414,
      "grad_norm": 0.06771363317966461,
      "learning_rate": 2.226827355811364e-06,
      "loss": 0.0099,
      "step": 2379360
    },
    {
      "epoch": 3.8939075561490677,
      "grad_norm": 0.4315823018550873,
      "learning_rate": 2.226761463597847e-06,
      "loss": 0.0098,
      "step": 2379380
    },
    {
      "epoch": 3.893940286587721,
      "grad_norm": 0.19994127750396729,
      "learning_rate": 2.2266955713843297e-06,
      "loss": 0.0144,
      "step": 2379400
    },
    {
      "epoch": 3.893973017026374,
      "grad_norm": 0.4025917053222656,
      "learning_rate": 2.2266296791708124e-06,
      "loss": 0.0134,
      "step": 2379420
    },
    {
      "epoch": 3.8940057474650276,
      "grad_norm": 0.12363777309656143,
      "learning_rate": 2.226563786957295e-06,
      "loss": 0.0074,
      "step": 2379440
    },
    {
      "epoch": 3.8940384779036807,
      "grad_norm": 0.2657504379749298,
      "learning_rate": 2.2264978947437783e-06,
      "loss": 0.018,
      "step": 2379460
    },
    {
      "epoch": 3.8940712083423343,
      "grad_norm": 0.2289419174194336,
      "learning_rate": 2.2264320025302615e-06,
      "loss": 0.0091,
      "step": 2379480
    },
    {
      "epoch": 3.8941039387809875,
      "grad_norm": 0.13964208960533142,
      "learning_rate": 2.2263661103167442e-06,
      "loss": 0.0118,
      "step": 2379500
    },
    {
      "epoch": 3.894136669219641,
      "grad_norm": 0.2426096349954605,
      "learning_rate": 2.226300218103227e-06,
      "loss": 0.0159,
      "step": 2379520
    },
    {
      "epoch": 3.8941693996582942,
      "grad_norm": 0.17349617183208466,
      "learning_rate": 2.2262343258897097e-06,
      "loss": 0.0081,
      "step": 2379540
    },
    {
      "epoch": 3.8942021300969474,
      "grad_norm": 0.07094680517911911,
      "learning_rate": 2.226168433676193e-06,
      "loss": 0.0137,
      "step": 2379560
    },
    {
      "epoch": 3.894234860535601,
      "grad_norm": 0.33392050862312317,
      "learning_rate": 2.2261025414626756e-06,
      "loss": 0.0114,
      "step": 2379580
    },
    {
      "epoch": 3.894267590974254,
      "grad_norm": 0.4236653447151184,
      "learning_rate": 2.2260366492491584e-06,
      "loss": 0.0101,
      "step": 2379600
    },
    {
      "epoch": 3.8943003214129077,
      "grad_norm": 0.36611631512641907,
      "learning_rate": 2.225970757035641e-06,
      "loss": 0.0138,
      "step": 2379620
    },
    {
      "epoch": 3.894333051851561,
      "grad_norm": 0.31690531969070435,
      "learning_rate": 2.225904864822124e-06,
      "loss": 0.0086,
      "step": 2379640
    },
    {
      "epoch": 3.8943657822902145,
      "grad_norm": 0.24891656637191772,
      "learning_rate": 2.225838972608607e-06,
      "loss": 0.0156,
      "step": 2379660
    },
    {
      "epoch": 3.8943985127288676,
      "grad_norm": 0.42022207379341125,
      "learning_rate": 2.22577308039509e-06,
      "loss": 0.0103,
      "step": 2379680
    },
    {
      "epoch": 3.8944312431675208,
      "grad_norm": 0.2518467605113983,
      "learning_rate": 2.225707188181573e-06,
      "loss": 0.01,
      "step": 2379700
    },
    {
      "epoch": 3.8944639736061744,
      "grad_norm": 0.18190455436706543,
      "learning_rate": 2.2256412959680556e-06,
      "loss": 0.0076,
      "step": 2379720
    },
    {
      "epoch": 3.8944967040448275,
      "grad_norm": 0.8073619604110718,
      "learning_rate": 2.2255754037545384e-06,
      "loss": 0.0114,
      "step": 2379740
    },
    {
      "epoch": 3.894529434483481,
      "grad_norm": 0.22247661650180817,
      "learning_rate": 2.2255095115410215e-06,
      "loss": 0.0087,
      "step": 2379760
    },
    {
      "epoch": 3.8945621649221343,
      "grad_norm": 0.1703866720199585,
      "learning_rate": 2.2254436193275043e-06,
      "loss": 0.0095,
      "step": 2379780
    },
    {
      "epoch": 3.894594895360788,
      "grad_norm": 0.24309185147285461,
      "learning_rate": 2.225377727113987e-06,
      "loss": 0.0072,
      "step": 2379800
    },
    {
      "epoch": 3.894627625799441,
      "grad_norm": 0.11319581419229507,
      "learning_rate": 2.2253118349004698e-06,
      "loss": 0.0108,
      "step": 2379820
    },
    {
      "epoch": 3.894660356238094,
      "grad_norm": 0.14630430936813354,
      "learning_rate": 2.225245942686953e-06,
      "loss": 0.0072,
      "step": 2379840
    },
    {
      "epoch": 3.8946930866767477,
      "grad_norm": 0.16971156001091003,
      "learning_rate": 2.2251800504734357e-06,
      "loss": 0.0073,
      "step": 2379860
    },
    {
      "epoch": 3.894725817115401,
      "grad_norm": 0.05405941233038902,
      "learning_rate": 2.225114158259919e-06,
      "loss": 0.013,
      "step": 2379880
    },
    {
      "epoch": 3.8947585475540545,
      "grad_norm": 0.6352961659431458,
      "learning_rate": 2.2250482660464016e-06,
      "loss": 0.0119,
      "step": 2379900
    },
    {
      "epoch": 3.8947912779927076,
      "grad_norm": 0.5082038640975952,
      "learning_rate": 2.2249823738328843e-06,
      "loss": 0.0075,
      "step": 2379920
    },
    {
      "epoch": 3.8948240084313612,
      "grad_norm": 0.09342339634895325,
      "learning_rate": 2.224916481619367e-06,
      "loss": 0.0115,
      "step": 2379940
    },
    {
      "epoch": 3.8948567388700144,
      "grad_norm": 0.23985201120376587,
      "learning_rate": 2.22485058940585e-06,
      "loss": 0.0093,
      "step": 2379960
    },
    {
      "epoch": 3.8948894693086675,
      "grad_norm": 0.06727200746536255,
      "learning_rate": 2.224784697192333e-06,
      "loss": 0.0116,
      "step": 2379980
    },
    {
      "epoch": 3.894922199747321,
      "grad_norm": 0.14591623842716217,
      "learning_rate": 2.2247188049788157e-06,
      "loss": 0.0081,
      "step": 2380000
    },
    {
      "epoch": 3.8949549301859743,
      "grad_norm": 0.14836741983890533,
      "learning_rate": 2.224652912765299e-06,
      "loss": 0.0105,
      "step": 2380020
    },
    {
      "epoch": 3.8949876606246274,
      "grad_norm": 0.07822959870100021,
      "learning_rate": 2.2245870205517816e-06,
      "loss": 0.0161,
      "step": 2380040
    },
    {
      "epoch": 3.895020391063281,
      "grad_norm": 0.11047997325658798,
      "learning_rate": 2.2245211283382643e-06,
      "loss": 0.0085,
      "step": 2380060
    },
    {
      "epoch": 3.8950531215019346,
      "grad_norm": 0.2572782635688782,
      "learning_rate": 2.2244552361247475e-06,
      "loss": 0.0067,
      "step": 2380080
    },
    {
      "epoch": 3.8950858519405878,
      "grad_norm": 0.2911519408226013,
      "learning_rate": 2.2243893439112302e-06,
      "loss": 0.0065,
      "step": 2380100
    },
    {
      "epoch": 3.895118582379241,
      "grad_norm": 0.15769049525260925,
      "learning_rate": 2.224323451697713e-06,
      "loss": 0.0089,
      "step": 2380120
    },
    {
      "epoch": 3.8951513128178945,
      "grad_norm": 0.36496269702911377,
      "learning_rate": 2.2242575594841957e-06,
      "loss": 0.0122,
      "step": 2380140
    },
    {
      "epoch": 3.8951840432565477,
      "grad_norm": 0.22205471992492676,
      "learning_rate": 2.224191667270679e-06,
      "loss": 0.0114,
      "step": 2380160
    },
    {
      "epoch": 3.895216773695201,
      "grad_norm": 0.15935564041137695,
      "learning_rate": 2.2241257750571616e-06,
      "loss": 0.011,
      "step": 2380180
    },
    {
      "epoch": 3.8952495041338544,
      "grad_norm": 0.2198518067598343,
      "learning_rate": 2.2240598828436448e-06,
      "loss": 0.0127,
      "step": 2380200
    },
    {
      "epoch": 3.895282234572508,
      "grad_norm": 0.33113187551498413,
      "learning_rate": 2.2239939906301275e-06,
      "loss": 0.0127,
      "step": 2380220
    },
    {
      "epoch": 3.895314965011161,
      "grad_norm": 0.18441757559776306,
      "learning_rate": 2.2239280984166103e-06,
      "loss": 0.014,
      "step": 2380240
    },
    {
      "epoch": 3.8953476954498143,
      "grad_norm": 0.4679829478263855,
      "learning_rate": 2.223862206203093e-06,
      "loss": 0.0088,
      "step": 2380260
    },
    {
      "epoch": 3.895380425888468,
      "grad_norm": 0.13054810464382172,
      "learning_rate": 2.223796313989576e-06,
      "loss": 0.0134,
      "step": 2380280
    },
    {
      "epoch": 3.895413156327121,
      "grad_norm": 0.2554324269294739,
      "learning_rate": 2.223730421776059e-06,
      "loss": 0.0107,
      "step": 2380300
    },
    {
      "epoch": 3.895445886765774,
      "grad_norm": 0.3847962021827698,
      "learning_rate": 2.2236645295625416e-06,
      "loss": 0.0111,
      "step": 2380320
    },
    {
      "epoch": 3.895478617204428,
      "grad_norm": 0.3762606084346771,
      "learning_rate": 2.2235986373490244e-06,
      "loss": 0.0151,
      "step": 2380340
    },
    {
      "epoch": 3.8955113476430814,
      "grad_norm": 0.23950478434562683,
      "learning_rate": 2.2235327451355075e-06,
      "loss": 0.0115,
      "step": 2380360
    },
    {
      "epoch": 3.8955440780817345,
      "grad_norm": 0.6383917331695557,
      "learning_rate": 2.2234668529219903e-06,
      "loss": 0.0086,
      "step": 2380380
    },
    {
      "epoch": 3.8955768085203877,
      "grad_norm": 0.147680401802063,
      "learning_rate": 2.2234009607084735e-06,
      "loss": 0.013,
      "step": 2380400
    },
    {
      "epoch": 3.8956095389590413,
      "grad_norm": 0.12842291593551636,
      "learning_rate": 2.223335068494956e-06,
      "loss": 0.0091,
      "step": 2380420
    },
    {
      "epoch": 3.8956422693976944,
      "grad_norm": 0.23174580931663513,
      "learning_rate": 2.223269176281439e-06,
      "loss": 0.0091,
      "step": 2380440
    },
    {
      "epoch": 3.8956749998363476,
      "grad_norm": 0.5895920991897583,
      "learning_rate": 2.2232032840679217e-06,
      "loss": 0.013,
      "step": 2380460
    },
    {
      "epoch": 3.895707730275001,
      "grad_norm": 0.46367302536964417,
      "learning_rate": 2.223137391854405e-06,
      "loss": 0.0106,
      "step": 2380480
    },
    {
      "epoch": 3.8957404607136543,
      "grad_norm": 0.5492613315582275,
      "learning_rate": 2.2230714996408876e-06,
      "loss": 0.0131,
      "step": 2380500
    },
    {
      "epoch": 3.895773191152308,
      "grad_norm": 0.2387217879295349,
      "learning_rate": 2.2230056074273703e-06,
      "loss": 0.0093,
      "step": 2380520
    },
    {
      "epoch": 3.895805921590961,
      "grad_norm": 0.33158037066459656,
      "learning_rate": 2.2229397152138535e-06,
      "loss": 0.0068,
      "step": 2380540
    },
    {
      "epoch": 3.8958386520296147,
      "grad_norm": 0.57183837890625,
      "learning_rate": 2.2228738230003362e-06,
      "loss": 0.0126,
      "step": 2380560
    },
    {
      "epoch": 3.895871382468268,
      "grad_norm": 0.13007362186908722,
      "learning_rate": 2.2228079307868194e-06,
      "loss": 0.0172,
      "step": 2380580
    },
    {
      "epoch": 3.895904112906921,
      "grad_norm": 0.35106462240219116,
      "learning_rate": 2.222742038573302e-06,
      "loss": 0.0092,
      "step": 2380600
    },
    {
      "epoch": 3.8959368433455746,
      "grad_norm": 0.16668687760829926,
      "learning_rate": 2.222676146359785e-06,
      "loss": 0.0094,
      "step": 2380620
    },
    {
      "epoch": 3.8959695737842277,
      "grad_norm": 0.20579120516777039,
      "learning_rate": 2.2226102541462676e-06,
      "loss": 0.0056,
      "step": 2380640
    },
    {
      "epoch": 3.8960023042228813,
      "grad_norm": 0.20575034618377686,
      "learning_rate": 2.2225443619327503e-06,
      "loss": 0.0085,
      "step": 2380660
    },
    {
      "epoch": 3.8960350346615344,
      "grad_norm": 0.6401746869087219,
      "learning_rate": 2.2224784697192335e-06,
      "loss": 0.0097,
      "step": 2380680
    },
    {
      "epoch": 3.896067765100188,
      "grad_norm": 0.31520628929138184,
      "learning_rate": 2.2224125775057162e-06,
      "loss": 0.0072,
      "step": 2380700
    },
    {
      "epoch": 3.896100495538841,
      "grad_norm": 0.3240112066268921,
      "learning_rate": 2.2223466852921994e-06,
      "loss": 0.0139,
      "step": 2380720
    },
    {
      "epoch": 3.8961332259774943,
      "grad_norm": 0.11678636819124222,
      "learning_rate": 2.222280793078682e-06,
      "loss": 0.0125,
      "step": 2380740
    },
    {
      "epoch": 3.896165956416148,
      "grad_norm": 0.34467318654060364,
      "learning_rate": 2.222214900865165e-06,
      "loss": 0.0084,
      "step": 2380760
    },
    {
      "epoch": 3.896198686854801,
      "grad_norm": 0.22178402543067932,
      "learning_rate": 2.222149008651648e-06,
      "loss": 0.0082,
      "step": 2380780
    },
    {
      "epoch": 3.8962314172934547,
      "grad_norm": 0.20292022824287415,
      "learning_rate": 2.2220831164381308e-06,
      "loss": 0.0123,
      "step": 2380800
    },
    {
      "epoch": 3.896264147732108,
      "grad_norm": 0.2145567238330841,
      "learning_rate": 2.2220172242246135e-06,
      "loss": 0.0122,
      "step": 2380820
    },
    {
      "epoch": 3.8962968781707614,
      "grad_norm": 0.30605369806289673,
      "learning_rate": 2.2219513320110963e-06,
      "loss": 0.0085,
      "step": 2380840
    },
    {
      "epoch": 3.8963296086094146,
      "grad_norm": 0.3592970669269562,
      "learning_rate": 2.221885439797579e-06,
      "loss": 0.0077,
      "step": 2380860
    },
    {
      "epoch": 3.8963623390480677,
      "grad_norm": 0.32179388403892517,
      "learning_rate": 2.221819547584062e-06,
      "loss": 0.0136,
      "step": 2380880
    },
    {
      "epoch": 3.8963950694867213,
      "grad_norm": 0.14637143909931183,
      "learning_rate": 2.2217536553705453e-06,
      "loss": 0.0094,
      "step": 2380900
    },
    {
      "epoch": 3.8964277999253745,
      "grad_norm": 0.12913958728313446,
      "learning_rate": 2.221687763157028e-06,
      "loss": 0.0118,
      "step": 2380920
    },
    {
      "epoch": 3.896460530364028,
      "grad_norm": 0.27287501096725464,
      "learning_rate": 2.221621870943511e-06,
      "loss": 0.0106,
      "step": 2380940
    },
    {
      "epoch": 3.896493260802681,
      "grad_norm": 0.15715260803699493,
      "learning_rate": 2.2215559787299936e-06,
      "loss": 0.0106,
      "step": 2380960
    },
    {
      "epoch": 3.896525991241335,
      "grad_norm": 0.1819634735584259,
      "learning_rate": 2.2214900865164767e-06,
      "loss": 0.0134,
      "step": 2380980
    },
    {
      "epoch": 3.896558721679988,
      "grad_norm": 0.2189590036869049,
      "learning_rate": 2.2214241943029595e-06,
      "loss": 0.0092,
      "step": 2381000
    },
    {
      "epoch": 3.896591452118641,
      "grad_norm": 0.2718655467033386,
      "learning_rate": 2.221358302089442e-06,
      "loss": 0.0123,
      "step": 2381020
    },
    {
      "epoch": 3.8966241825572947,
      "grad_norm": 0.32428982853889465,
      "learning_rate": 2.221292409875925e-06,
      "loss": 0.009,
      "step": 2381040
    },
    {
      "epoch": 3.896656912995948,
      "grad_norm": 0.14546778798103333,
      "learning_rate": 2.2212265176624077e-06,
      "loss": 0.0105,
      "step": 2381060
    },
    {
      "epoch": 3.8966896434346014,
      "grad_norm": 0.34827056527137756,
      "learning_rate": 2.221160625448891e-06,
      "loss": 0.0069,
      "step": 2381080
    },
    {
      "epoch": 3.8967223738732546,
      "grad_norm": 0.4511249363422394,
      "learning_rate": 2.221094733235374e-06,
      "loss": 0.0101,
      "step": 2381100
    },
    {
      "epoch": 3.896755104311908,
      "grad_norm": 0.15876857936382294,
      "learning_rate": 2.2210288410218567e-06,
      "loss": 0.007,
      "step": 2381120
    },
    {
      "epoch": 3.8967878347505613,
      "grad_norm": 0.32528743147850037,
      "learning_rate": 2.2209629488083395e-06,
      "loss": 0.0111,
      "step": 2381140
    },
    {
      "epoch": 3.8968205651892145,
      "grad_norm": 0.19567875564098358,
      "learning_rate": 2.2208970565948222e-06,
      "loss": 0.0104,
      "step": 2381160
    },
    {
      "epoch": 3.896853295627868,
      "grad_norm": 0.2826131284236908,
      "learning_rate": 2.2208311643813054e-06,
      "loss": 0.0128,
      "step": 2381180
    },
    {
      "epoch": 3.8968860260665212,
      "grad_norm": 0.23083746433258057,
      "learning_rate": 2.220765272167788e-06,
      "loss": 0.008,
      "step": 2381200
    },
    {
      "epoch": 3.896918756505175,
      "grad_norm": 0.5256085991859436,
      "learning_rate": 2.220699379954271e-06,
      "loss": 0.0092,
      "step": 2381220
    },
    {
      "epoch": 3.896951486943828,
      "grad_norm": 0.17800241708755493,
      "learning_rate": 2.220633487740754e-06,
      "loss": 0.0105,
      "step": 2381240
    },
    {
      "epoch": 3.8969842173824816,
      "grad_norm": 0.3503071069717407,
      "learning_rate": 2.2205675955272368e-06,
      "loss": 0.018,
      "step": 2381260
    },
    {
      "epoch": 3.8970169478211347,
      "grad_norm": 0.22187539935112,
      "learning_rate": 2.2205017033137195e-06,
      "loss": 0.0102,
      "step": 2381280
    },
    {
      "epoch": 3.897049678259788,
      "grad_norm": 0.20416218042373657,
      "learning_rate": 2.2204358111002027e-06,
      "loss": 0.0104,
      "step": 2381300
    },
    {
      "epoch": 3.8970824086984415,
      "grad_norm": 0.08030374348163605,
      "learning_rate": 2.2203699188866854e-06,
      "loss": 0.0106,
      "step": 2381320
    },
    {
      "epoch": 3.8971151391370946,
      "grad_norm": 0.3304680287837982,
      "learning_rate": 2.220304026673168e-06,
      "loss": 0.0088,
      "step": 2381340
    },
    {
      "epoch": 3.897147869575748,
      "grad_norm": 0.7691118121147156,
      "learning_rate": 2.220238134459651e-06,
      "loss": 0.0114,
      "step": 2381360
    },
    {
      "epoch": 3.8971806000144014,
      "grad_norm": 0.3355821967124939,
      "learning_rate": 2.220172242246134e-06,
      "loss": 0.0106,
      "step": 2381380
    },
    {
      "epoch": 3.897213330453055,
      "grad_norm": 0.14324364066123962,
      "learning_rate": 2.220106350032617e-06,
      "loss": 0.0093,
      "step": 2381400
    },
    {
      "epoch": 3.897246060891708,
      "grad_norm": 0.2561488449573517,
      "learning_rate": 2.2200404578191e-06,
      "loss": 0.0145,
      "step": 2381420
    },
    {
      "epoch": 3.8972787913303613,
      "grad_norm": 0.1685025840997696,
      "learning_rate": 2.2199745656055827e-06,
      "loss": 0.0132,
      "step": 2381440
    },
    {
      "epoch": 3.897311521769015,
      "grad_norm": 0.16293011605739594,
      "learning_rate": 2.2199086733920654e-06,
      "loss": 0.0099,
      "step": 2381460
    },
    {
      "epoch": 3.897344252207668,
      "grad_norm": 0.40092843770980835,
      "learning_rate": 2.219842781178548e-06,
      "loss": 0.0099,
      "step": 2381480
    },
    {
      "epoch": 3.897376982646321,
      "grad_norm": 0.515289843082428,
      "learning_rate": 2.2197768889650313e-06,
      "loss": 0.0076,
      "step": 2381500
    },
    {
      "epoch": 3.8974097130849747,
      "grad_norm": 0.31654274463653564,
      "learning_rate": 2.219710996751514e-06,
      "loss": 0.0122,
      "step": 2381520
    },
    {
      "epoch": 3.8974424435236283,
      "grad_norm": 0.11325845122337341,
      "learning_rate": 2.219645104537997e-06,
      "loss": 0.0113,
      "step": 2381540
    },
    {
      "epoch": 3.8974751739622815,
      "grad_norm": 0.2161172777414322,
      "learning_rate": 2.2195792123244796e-06,
      "loss": 0.0088,
      "step": 2381560
    },
    {
      "epoch": 3.8975079044009346,
      "grad_norm": 0.2436530888080597,
      "learning_rate": 2.2195133201109627e-06,
      "loss": 0.0135,
      "step": 2381580
    },
    {
      "epoch": 3.8975406348395882,
      "grad_norm": 0.20679856836795807,
      "learning_rate": 2.2194474278974455e-06,
      "loss": 0.0087,
      "step": 2381600
    },
    {
      "epoch": 3.8975733652782414,
      "grad_norm": 0.3380541503429413,
      "learning_rate": 2.2193815356839286e-06,
      "loss": 0.0094,
      "step": 2381620
    },
    {
      "epoch": 3.8976060957168945,
      "grad_norm": 0.16490277647972107,
      "learning_rate": 2.2193156434704114e-06,
      "loss": 0.0113,
      "step": 2381640
    },
    {
      "epoch": 3.897638826155548,
      "grad_norm": 0.24160613119602203,
      "learning_rate": 2.219249751256894e-06,
      "loss": 0.0097,
      "step": 2381660
    },
    {
      "epoch": 3.8976715565942017,
      "grad_norm": 0.06320717185735703,
      "learning_rate": 2.219183859043377e-06,
      "loss": 0.0097,
      "step": 2381680
    },
    {
      "epoch": 3.897704287032855,
      "grad_norm": 0.4080093204975128,
      "learning_rate": 2.21911796682986e-06,
      "loss": 0.0116,
      "step": 2381700
    },
    {
      "epoch": 3.897737017471508,
      "grad_norm": 0.37931278347969055,
      "learning_rate": 2.2190520746163427e-06,
      "loss": 0.0101,
      "step": 2381720
    },
    {
      "epoch": 3.8977697479101616,
      "grad_norm": 0.12636859714984894,
      "learning_rate": 2.2189861824028255e-06,
      "loss": 0.0116,
      "step": 2381740
    },
    {
      "epoch": 3.8978024783488148,
      "grad_norm": 0.13076597452163696,
      "learning_rate": 2.2189202901893082e-06,
      "loss": 0.0081,
      "step": 2381760
    },
    {
      "epoch": 3.897835208787468,
      "grad_norm": 0.16228701174259186,
      "learning_rate": 2.2188543979757914e-06,
      "loss": 0.0102,
      "step": 2381780
    },
    {
      "epoch": 3.8978679392261215,
      "grad_norm": 0.20568804442882538,
      "learning_rate": 2.218788505762274e-06,
      "loss": 0.0106,
      "step": 2381800
    },
    {
      "epoch": 3.897900669664775,
      "grad_norm": 0.3755970001220703,
      "learning_rate": 2.2187226135487573e-06,
      "loss": 0.0102,
      "step": 2381820
    },
    {
      "epoch": 3.8979334001034283,
      "grad_norm": 0.21070459485054016,
      "learning_rate": 2.21865672133524e-06,
      "loss": 0.0082,
      "step": 2381840
    },
    {
      "epoch": 3.8979661305420814,
      "grad_norm": 0.2016696035861969,
      "learning_rate": 2.2185908291217228e-06,
      "loss": 0.0109,
      "step": 2381860
    },
    {
      "epoch": 3.897998860980735,
      "grad_norm": 0.11718399077653885,
      "learning_rate": 2.2185249369082055e-06,
      "loss": 0.0102,
      "step": 2381880
    },
    {
      "epoch": 3.898031591419388,
      "grad_norm": 0.19168585538864136,
      "learning_rate": 2.2184590446946887e-06,
      "loss": 0.0092,
      "step": 2381900
    },
    {
      "epoch": 3.8980643218580413,
      "grad_norm": 5.09884786605835,
      "learning_rate": 2.2183931524811714e-06,
      "loss": 0.0081,
      "step": 2381920
    },
    {
      "epoch": 3.898097052296695,
      "grad_norm": 0.054757095873355865,
      "learning_rate": 2.218327260267654e-06,
      "loss": 0.0108,
      "step": 2381940
    },
    {
      "epoch": 3.8981297827353485,
      "grad_norm": 0.19413192570209503,
      "learning_rate": 2.2182613680541373e-06,
      "loss": 0.0092,
      "step": 2381960
    },
    {
      "epoch": 3.8981625131740016,
      "grad_norm": 0.12485478818416595,
      "learning_rate": 2.21819547584062e-06,
      "loss": 0.01,
      "step": 2381980
    },
    {
      "epoch": 3.898195243612655,
      "grad_norm": 0.27243852615356445,
      "learning_rate": 2.2181295836271032e-06,
      "loss": 0.0088,
      "step": 2382000
    },
    {
      "epoch": 3.8982279740513084,
      "grad_norm": 0.2665271759033203,
      "learning_rate": 2.218063691413586e-06,
      "loss": 0.0076,
      "step": 2382020
    },
    {
      "epoch": 3.8982607044899615,
      "grad_norm": 0.25094175338745117,
      "learning_rate": 2.2179977992000687e-06,
      "loss": 0.0122,
      "step": 2382040
    },
    {
      "epoch": 3.8982934349286147,
      "grad_norm": 0.2703058123588562,
      "learning_rate": 2.2179319069865514e-06,
      "loss": 0.0063,
      "step": 2382060
    },
    {
      "epoch": 3.8983261653672683,
      "grad_norm": 0.1383519172668457,
      "learning_rate": 2.217866014773034e-06,
      "loss": 0.0082,
      "step": 2382080
    },
    {
      "epoch": 3.8983588958059214,
      "grad_norm": 0.13430865108966827,
      "learning_rate": 2.2178001225595173e-06,
      "loss": 0.0154,
      "step": 2382100
    },
    {
      "epoch": 3.898391626244575,
      "grad_norm": 0.21236690878868103,
      "learning_rate": 2.2177342303460005e-06,
      "loss": 0.0077,
      "step": 2382120
    },
    {
      "epoch": 3.898424356683228,
      "grad_norm": 0.5241343975067139,
      "learning_rate": 2.2176683381324832e-06,
      "loss": 0.0105,
      "step": 2382140
    },
    {
      "epoch": 3.8984570871218818,
      "grad_norm": 0.20904487371444702,
      "learning_rate": 2.217602445918966e-06,
      "loss": 0.0105,
      "step": 2382160
    },
    {
      "epoch": 3.898489817560535,
      "grad_norm": 0.4824725389480591,
      "learning_rate": 2.2175365537054487e-06,
      "loss": 0.0083,
      "step": 2382180
    },
    {
      "epoch": 3.898522547999188,
      "grad_norm": 0.23821935057640076,
      "learning_rate": 2.217470661491932e-06,
      "loss": 0.0111,
      "step": 2382200
    },
    {
      "epoch": 3.8985552784378417,
      "grad_norm": 0.2078583538532257,
      "learning_rate": 2.2174047692784146e-06,
      "loss": 0.007,
      "step": 2382220
    },
    {
      "epoch": 3.898588008876495,
      "grad_norm": 0.25747647881507874,
      "learning_rate": 2.2173388770648974e-06,
      "loss": 0.0114,
      "step": 2382240
    },
    {
      "epoch": 3.8986207393151484,
      "grad_norm": 0.25120729207992554,
      "learning_rate": 2.21727298485138e-06,
      "loss": 0.0155,
      "step": 2382260
    },
    {
      "epoch": 3.8986534697538016,
      "grad_norm": 0.2148629128932953,
      "learning_rate": 2.217207092637863e-06,
      "loss": 0.012,
      "step": 2382280
    },
    {
      "epoch": 3.898686200192455,
      "grad_norm": 0.06926233321428299,
      "learning_rate": 2.217141200424346e-06,
      "loss": 0.012,
      "step": 2382300
    },
    {
      "epoch": 3.8987189306311083,
      "grad_norm": 0.13740293681621552,
      "learning_rate": 2.217075308210829e-06,
      "loss": 0.0114,
      "step": 2382320
    },
    {
      "epoch": 3.8987516610697615,
      "grad_norm": 0.17821425199508667,
      "learning_rate": 2.217009415997312e-06,
      "loss": 0.0155,
      "step": 2382340
    },
    {
      "epoch": 3.898784391508415,
      "grad_norm": 0.32625821232795715,
      "learning_rate": 2.2169435237837947e-06,
      "loss": 0.0097,
      "step": 2382360
    },
    {
      "epoch": 3.898817121947068,
      "grad_norm": 0.2454424798488617,
      "learning_rate": 2.2168776315702774e-06,
      "loss": 0.0092,
      "step": 2382380
    },
    {
      "epoch": 3.898849852385722,
      "grad_norm": 0.06458401679992676,
      "learning_rate": 2.2168117393567606e-06,
      "loss": 0.0087,
      "step": 2382400
    },
    {
      "epoch": 3.898882582824375,
      "grad_norm": 0.053335659205913544,
      "learning_rate": 2.2167458471432433e-06,
      "loss": 0.0106,
      "step": 2382420
    },
    {
      "epoch": 3.8989153132630285,
      "grad_norm": 0.33171606063842773,
      "learning_rate": 2.216679954929726e-06,
      "loss": 0.0121,
      "step": 2382440
    },
    {
      "epoch": 3.8989480437016817,
      "grad_norm": 0.12634101510047913,
      "learning_rate": 2.2166140627162088e-06,
      "loss": 0.0087,
      "step": 2382460
    },
    {
      "epoch": 3.898980774140335,
      "grad_norm": 0.2630172073841095,
      "learning_rate": 2.216548170502692e-06,
      "loss": 0.0097,
      "step": 2382480
    },
    {
      "epoch": 3.8990135045789884,
      "grad_norm": 0.20939010381698608,
      "learning_rate": 2.2164822782891747e-06,
      "loss": 0.0149,
      "step": 2382500
    },
    {
      "epoch": 3.8990462350176416,
      "grad_norm": 0.33540070056915283,
      "learning_rate": 2.216416386075658e-06,
      "loss": 0.009,
      "step": 2382520
    },
    {
      "epoch": 3.899078965456295,
      "grad_norm": 0.10552625358104706,
      "learning_rate": 2.2163504938621406e-06,
      "loss": 0.0073,
      "step": 2382540
    },
    {
      "epoch": 3.8991116958949483,
      "grad_norm": 0.2291838824748993,
      "learning_rate": 2.2162846016486233e-06,
      "loss": 0.016,
      "step": 2382560
    },
    {
      "epoch": 3.899144426333602,
      "grad_norm": 0.4560910761356354,
      "learning_rate": 2.216218709435106e-06,
      "loss": 0.0119,
      "step": 2382580
    },
    {
      "epoch": 3.899177156772255,
      "grad_norm": 0.7021360397338867,
      "learning_rate": 2.2161528172215892e-06,
      "loss": 0.0153,
      "step": 2382600
    },
    {
      "epoch": 3.899209887210908,
      "grad_norm": 0.4534292221069336,
      "learning_rate": 2.216086925008072e-06,
      "loss": 0.0102,
      "step": 2382620
    },
    {
      "epoch": 3.899242617649562,
      "grad_norm": 0.17000983655452728,
      "learning_rate": 2.2160210327945547e-06,
      "loss": 0.011,
      "step": 2382640
    },
    {
      "epoch": 3.899275348088215,
      "grad_norm": 0.19046564400196075,
      "learning_rate": 2.215955140581038e-06,
      "loss": 0.0094,
      "step": 2382660
    },
    {
      "epoch": 3.8993080785268686,
      "grad_norm": 0.20835405588150024,
      "learning_rate": 2.2158892483675206e-06,
      "loss": 0.0119,
      "step": 2382680
    },
    {
      "epoch": 3.8993408089655217,
      "grad_norm": 0.5231007933616638,
      "learning_rate": 2.2158233561540033e-06,
      "loss": 0.0096,
      "step": 2382700
    },
    {
      "epoch": 3.8993735394041753,
      "grad_norm": 0.24280594289302826,
      "learning_rate": 2.2157574639404865e-06,
      "loss": 0.0133,
      "step": 2382720
    },
    {
      "epoch": 3.8994062698428285,
      "grad_norm": 0.7028642892837524,
      "learning_rate": 2.2156915717269692e-06,
      "loss": 0.0124,
      "step": 2382740
    },
    {
      "epoch": 3.8994390002814816,
      "grad_norm": 0.14397627115249634,
      "learning_rate": 2.215625679513452e-06,
      "loss": 0.009,
      "step": 2382760
    },
    {
      "epoch": 3.899471730720135,
      "grad_norm": 0.5144090056419373,
      "learning_rate": 2.2155597872999347e-06,
      "loss": 0.0124,
      "step": 2382780
    },
    {
      "epoch": 3.8995044611587883,
      "grad_norm": 0.4540652334690094,
      "learning_rate": 2.215493895086418e-06,
      "loss": 0.0095,
      "step": 2382800
    },
    {
      "epoch": 3.899537191597442,
      "grad_norm": 0.2983231842517853,
      "learning_rate": 2.2154280028729006e-06,
      "loss": 0.0076,
      "step": 2382820
    },
    {
      "epoch": 3.899569922036095,
      "grad_norm": 0.40348178148269653,
      "learning_rate": 2.215362110659384e-06,
      "loss": 0.0154,
      "step": 2382840
    },
    {
      "epoch": 3.8996026524747487,
      "grad_norm": 0.6498847603797913,
      "learning_rate": 2.2152962184458665e-06,
      "loss": 0.0145,
      "step": 2382860
    },
    {
      "epoch": 3.899635382913402,
      "grad_norm": 0.23528693616390228,
      "learning_rate": 2.2152303262323493e-06,
      "loss": 0.0085,
      "step": 2382880
    },
    {
      "epoch": 3.899668113352055,
      "grad_norm": 0.16535039246082306,
      "learning_rate": 2.215164434018832e-06,
      "loss": 0.0111,
      "step": 2382900
    },
    {
      "epoch": 3.8997008437907086,
      "grad_norm": 0.2100939154624939,
      "learning_rate": 2.215098541805315e-06,
      "loss": 0.0088,
      "step": 2382920
    },
    {
      "epoch": 3.8997335742293617,
      "grad_norm": 0.23473258316516876,
      "learning_rate": 2.215032649591798e-06,
      "loss": 0.0118,
      "step": 2382940
    },
    {
      "epoch": 3.8997663046680153,
      "grad_norm": 0.24457094073295593,
      "learning_rate": 2.2149667573782807e-06,
      "loss": 0.0078,
      "step": 2382960
    },
    {
      "epoch": 3.8997990351066685,
      "grad_norm": 0.2792308032512665,
      "learning_rate": 2.2149008651647634e-06,
      "loss": 0.0088,
      "step": 2382980
    },
    {
      "epoch": 3.899831765545322,
      "grad_norm": 0.18391269445419312,
      "learning_rate": 2.2148349729512466e-06,
      "loss": 0.0132,
      "step": 2383000
    },
    {
      "epoch": 3.899864495983975,
      "grad_norm": 0.28902339935302734,
      "learning_rate": 2.2147690807377293e-06,
      "loss": 0.0073,
      "step": 2383020
    },
    {
      "epoch": 3.8998972264226284,
      "grad_norm": 0.2731188237667084,
      "learning_rate": 2.2147031885242125e-06,
      "loss": 0.0087,
      "step": 2383040
    },
    {
      "epoch": 3.899929956861282,
      "grad_norm": 0.4052491784095764,
      "learning_rate": 2.214637296310695e-06,
      "loss": 0.0096,
      "step": 2383060
    },
    {
      "epoch": 3.899962687299935,
      "grad_norm": 0.22803440690040588,
      "learning_rate": 2.214571404097178e-06,
      "loss": 0.0098,
      "step": 2383080
    },
    {
      "epoch": 3.8999954177385883,
      "grad_norm": 0.033993303775787354,
      "learning_rate": 2.2145055118836607e-06,
      "loss": 0.0095,
      "step": 2383100
    },
    {
      "epoch": 3.900028148177242,
      "grad_norm": 0.31065991520881653,
      "learning_rate": 2.214439619670144e-06,
      "loss": 0.0076,
      "step": 2383120
    },
    {
      "epoch": 3.9000608786158955,
      "grad_norm": 0.2892102301120758,
      "learning_rate": 2.2143737274566266e-06,
      "loss": 0.0168,
      "step": 2383140
    },
    {
      "epoch": 3.9000936090545486,
      "grad_norm": 0.3405907452106476,
      "learning_rate": 2.2143078352431093e-06,
      "loss": 0.009,
      "step": 2383160
    },
    {
      "epoch": 3.9001263394932018,
      "grad_norm": 0.1316075474023819,
      "learning_rate": 2.2142419430295925e-06,
      "loss": 0.0115,
      "step": 2383180
    },
    {
      "epoch": 3.9001590699318553,
      "grad_norm": 0.6219108700752258,
      "learning_rate": 2.2141760508160752e-06,
      "loss": 0.0129,
      "step": 2383200
    },
    {
      "epoch": 3.9001918003705085,
      "grad_norm": 0.19630281627178192,
      "learning_rate": 2.2141101586025584e-06,
      "loss": 0.008,
      "step": 2383220
    },
    {
      "epoch": 3.9002245308091616,
      "grad_norm": 0.40254268050193787,
      "learning_rate": 2.214044266389041e-06,
      "loss": 0.007,
      "step": 2383240
    },
    {
      "epoch": 3.9002572612478152,
      "grad_norm": 0.14321330189704895,
      "learning_rate": 2.213978374175524e-06,
      "loss": 0.0135,
      "step": 2383260
    },
    {
      "epoch": 3.900289991686469,
      "grad_norm": 0.7209756374359131,
      "learning_rate": 2.2139124819620066e-06,
      "loss": 0.0153,
      "step": 2383280
    },
    {
      "epoch": 3.900322722125122,
      "grad_norm": 0.16649946570396423,
      "learning_rate": 2.2138465897484893e-06,
      "loss": 0.0117,
      "step": 2383300
    },
    {
      "epoch": 3.900355452563775,
      "grad_norm": 0.5606269836425781,
      "learning_rate": 2.2137806975349725e-06,
      "loss": 0.0085,
      "step": 2383320
    },
    {
      "epoch": 3.9003881830024287,
      "grad_norm": 0.3549547493457794,
      "learning_rate": 2.2137148053214553e-06,
      "loss": 0.0129,
      "step": 2383340
    },
    {
      "epoch": 3.900420913441082,
      "grad_norm": 0.4954944849014282,
      "learning_rate": 2.2136489131079384e-06,
      "loss": 0.0118,
      "step": 2383360
    },
    {
      "epoch": 3.900453643879735,
      "grad_norm": 0.39055702090263367,
      "learning_rate": 2.213583020894421e-06,
      "loss": 0.0104,
      "step": 2383380
    },
    {
      "epoch": 3.9004863743183886,
      "grad_norm": 0.08696463704109192,
      "learning_rate": 2.213517128680904e-06,
      "loss": 0.0124,
      "step": 2383400
    },
    {
      "epoch": 3.900519104757042,
      "grad_norm": 0.06222549080848694,
      "learning_rate": 2.213451236467387e-06,
      "loss": 0.0118,
      "step": 2383420
    },
    {
      "epoch": 3.9005518351956954,
      "grad_norm": 0.19610583782196045,
      "learning_rate": 2.21338534425387e-06,
      "loss": 0.0119,
      "step": 2383440
    },
    {
      "epoch": 3.9005845656343485,
      "grad_norm": 0.10619957745075226,
      "learning_rate": 2.2133194520403525e-06,
      "loss": 0.0099,
      "step": 2383460
    },
    {
      "epoch": 3.900617296073002,
      "grad_norm": 0.4154605269432068,
      "learning_rate": 2.2132535598268353e-06,
      "loss": 0.011,
      "step": 2383480
    },
    {
      "epoch": 3.9006500265116553,
      "grad_norm": 0.3896155059337616,
      "learning_rate": 2.213187667613318e-06,
      "loss": 0.0144,
      "step": 2383500
    },
    {
      "epoch": 3.9006827569503084,
      "grad_norm": 0.14897798001766205,
      "learning_rate": 2.213121775399801e-06,
      "loss": 0.0105,
      "step": 2383520
    },
    {
      "epoch": 3.900715487388962,
      "grad_norm": 0.4524662494659424,
      "learning_rate": 2.2130558831862843e-06,
      "loss": 0.0107,
      "step": 2383540
    },
    {
      "epoch": 3.900748217827615,
      "grad_norm": 0.23514093458652496,
      "learning_rate": 2.212989990972767e-06,
      "loss": 0.0105,
      "step": 2383560
    },
    {
      "epoch": 3.9007809482662688,
      "grad_norm": 0.09702098369598389,
      "learning_rate": 2.21292409875925e-06,
      "loss": 0.0105,
      "step": 2383580
    },
    {
      "epoch": 3.900813678704922,
      "grad_norm": 0.052256934344768524,
      "learning_rate": 2.2128582065457326e-06,
      "loss": 0.0129,
      "step": 2383600
    },
    {
      "epoch": 3.9008464091435755,
      "grad_norm": 0.3786791265010834,
      "learning_rate": 2.2127923143322157e-06,
      "loss": 0.0132,
      "step": 2383620
    },
    {
      "epoch": 3.9008791395822286,
      "grad_norm": 0.3865293562412262,
      "learning_rate": 2.2127264221186985e-06,
      "loss": 0.011,
      "step": 2383640
    },
    {
      "epoch": 3.900911870020882,
      "grad_norm": 0.13282231986522675,
      "learning_rate": 2.212660529905181e-06,
      "loss": 0.0077,
      "step": 2383660
    },
    {
      "epoch": 3.9009446004595354,
      "grad_norm": 0.11097449064254761,
      "learning_rate": 2.212594637691664e-06,
      "loss": 0.0151,
      "step": 2383680
    },
    {
      "epoch": 3.9009773308981885,
      "grad_norm": 0.3309057056903839,
      "learning_rate": 2.212528745478147e-06,
      "loss": 0.0125,
      "step": 2383700
    },
    {
      "epoch": 3.901010061336842,
      "grad_norm": 0.5057249069213867,
      "learning_rate": 2.21246285326463e-06,
      "loss": 0.0084,
      "step": 2383720
    },
    {
      "epoch": 3.9010427917754953,
      "grad_norm": 0.36580637097358704,
      "learning_rate": 2.212396961051113e-06,
      "loss": 0.0091,
      "step": 2383740
    },
    {
      "epoch": 3.901075522214149,
      "grad_norm": 0.28900575637817383,
      "learning_rate": 2.2123310688375958e-06,
      "loss": 0.0114,
      "step": 2383760
    },
    {
      "epoch": 3.901108252652802,
      "grad_norm": 0.4277319610118866,
      "learning_rate": 2.2122651766240785e-06,
      "loss": 0.0153,
      "step": 2383780
    },
    {
      "epoch": 3.901140983091455,
      "grad_norm": 0.14513017237186432,
      "learning_rate": 2.2121992844105612e-06,
      "loss": 0.008,
      "step": 2383800
    },
    {
      "epoch": 3.9011737135301088,
      "grad_norm": 0.15761470794677734,
      "learning_rate": 2.2121333921970444e-06,
      "loss": 0.0068,
      "step": 2383820
    },
    {
      "epoch": 3.901206443968762,
      "grad_norm": 0.5261196494102478,
      "learning_rate": 2.212067499983527e-06,
      "loss": 0.0097,
      "step": 2383840
    },
    {
      "epoch": 3.9012391744074155,
      "grad_norm": 0.34474265575408936,
      "learning_rate": 2.21200160777001e-06,
      "loss": 0.0107,
      "step": 2383860
    },
    {
      "epoch": 3.9012719048460687,
      "grad_norm": 0.19192081689834595,
      "learning_rate": 2.211935715556493e-06,
      "loss": 0.0106,
      "step": 2383880
    },
    {
      "epoch": 3.9013046352847223,
      "grad_norm": 0.4583113193511963,
      "learning_rate": 2.2118698233429758e-06,
      "loss": 0.0144,
      "step": 2383900
    },
    {
      "epoch": 3.9013373657233754,
      "grad_norm": 0.3166735768318176,
      "learning_rate": 2.2118039311294585e-06,
      "loss": 0.0118,
      "step": 2383920
    },
    {
      "epoch": 3.9013700961620286,
      "grad_norm": 0.4140736162662506,
      "learning_rate": 2.2117380389159417e-06,
      "loss": 0.0112,
      "step": 2383940
    },
    {
      "epoch": 3.901402826600682,
      "grad_norm": 0.08222828805446625,
      "learning_rate": 2.2116721467024244e-06,
      "loss": 0.0064,
      "step": 2383960
    },
    {
      "epoch": 3.9014355570393353,
      "grad_norm": 0.18912364542484283,
      "learning_rate": 2.211606254488907e-06,
      "loss": 0.0082,
      "step": 2383980
    },
    {
      "epoch": 3.901468287477989,
      "grad_norm": 0.11625464260578156,
      "learning_rate": 2.21154036227539e-06,
      "loss": 0.0129,
      "step": 2384000
    },
    {
      "epoch": 3.901501017916642,
      "grad_norm": 0.5623049139976501,
      "learning_rate": 2.211474470061873e-06,
      "loss": 0.0112,
      "step": 2384020
    },
    {
      "epoch": 3.9015337483552956,
      "grad_norm": 0.4409778416156769,
      "learning_rate": 2.211408577848356e-06,
      "loss": 0.0142,
      "step": 2384040
    },
    {
      "epoch": 3.901566478793949,
      "grad_norm": 0.14194083213806152,
      "learning_rate": 2.211342685634839e-06,
      "loss": 0.0088,
      "step": 2384060
    },
    {
      "epoch": 3.901599209232602,
      "grad_norm": 0.2187800258398056,
      "learning_rate": 2.2112767934213217e-06,
      "loss": 0.0092,
      "step": 2384080
    },
    {
      "epoch": 3.9016319396712555,
      "grad_norm": 0.22689224779605865,
      "learning_rate": 2.2112109012078044e-06,
      "loss": 0.0107,
      "step": 2384100
    },
    {
      "epoch": 3.9016646701099087,
      "grad_norm": 0.13874326646327972,
      "learning_rate": 2.211145008994287e-06,
      "loss": 0.0117,
      "step": 2384120
    },
    {
      "epoch": 3.9016974005485623,
      "grad_norm": 0.5240502953529358,
      "learning_rate": 2.2110791167807703e-06,
      "loss": 0.0136,
      "step": 2384140
    },
    {
      "epoch": 3.9017301309872154,
      "grad_norm": 0.7192515134811401,
      "learning_rate": 2.211013224567253e-06,
      "loss": 0.0103,
      "step": 2384160
    },
    {
      "epoch": 3.901762861425869,
      "grad_norm": 0.25416073203086853,
      "learning_rate": 2.210947332353736e-06,
      "loss": 0.012,
      "step": 2384180
    },
    {
      "epoch": 3.901795591864522,
      "grad_norm": 0.13797424733638763,
      "learning_rate": 2.2108814401402186e-06,
      "loss": 0.0139,
      "step": 2384200
    },
    {
      "epoch": 3.9018283223031753,
      "grad_norm": 0.0991506576538086,
      "learning_rate": 2.2108155479267017e-06,
      "loss": 0.0107,
      "step": 2384220
    },
    {
      "epoch": 3.901861052741829,
      "grad_norm": 0.6314441561698914,
      "learning_rate": 2.2107496557131845e-06,
      "loss": 0.0094,
      "step": 2384240
    },
    {
      "epoch": 3.901893783180482,
      "grad_norm": 0.23496279120445251,
      "learning_rate": 2.2106837634996676e-06,
      "loss": 0.0071,
      "step": 2384260
    },
    {
      "epoch": 3.9019265136191357,
      "grad_norm": 0.290200799703598,
      "learning_rate": 2.2106178712861504e-06,
      "loss": 0.0098,
      "step": 2384280
    },
    {
      "epoch": 3.901959244057789,
      "grad_norm": 0.2650286853313446,
      "learning_rate": 2.210551979072633e-06,
      "loss": 0.0072,
      "step": 2384300
    },
    {
      "epoch": 3.9019919744964424,
      "grad_norm": 0.27335256338119507,
      "learning_rate": 2.210486086859116e-06,
      "loss": 0.0106,
      "step": 2384320
    },
    {
      "epoch": 3.9020247049350956,
      "grad_norm": 0.25012752413749695,
      "learning_rate": 2.210420194645599e-06,
      "loss": 0.0172,
      "step": 2384340
    },
    {
      "epoch": 3.9020574353737487,
      "grad_norm": 0.20609137415885925,
      "learning_rate": 2.2103543024320818e-06,
      "loss": 0.0073,
      "step": 2384360
    },
    {
      "epoch": 3.9020901658124023,
      "grad_norm": 0.2537740170955658,
      "learning_rate": 2.2102884102185645e-06,
      "loss": 0.0079,
      "step": 2384380
    },
    {
      "epoch": 3.9021228962510555,
      "grad_norm": 0.15177449584007263,
      "learning_rate": 2.2102225180050472e-06,
      "loss": 0.0132,
      "step": 2384400
    },
    {
      "epoch": 3.902155626689709,
      "grad_norm": 0.757224440574646,
      "learning_rate": 2.2101566257915304e-06,
      "loss": 0.015,
      "step": 2384420
    },
    {
      "epoch": 3.902188357128362,
      "grad_norm": 0.20010718703269958,
      "learning_rate": 2.210090733578013e-06,
      "loss": 0.0093,
      "step": 2384440
    },
    {
      "epoch": 3.902221087567016,
      "grad_norm": 0.3824217915534973,
      "learning_rate": 2.2100248413644963e-06,
      "loss": 0.0128,
      "step": 2384460
    },
    {
      "epoch": 3.902253818005669,
      "grad_norm": 0.611741840839386,
      "learning_rate": 2.209958949150979e-06,
      "loss": 0.0119,
      "step": 2384480
    },
    {
      "epoch": 3.902286548444322,
      "grad_norm": 0.17607587575912476,
      "learning_rate": 2.2098930569374618e-06,
      "loss": 0.0096,
      "step": 2384500
    },
    {
      "epoch": 3.9023192788829757,
      "grad_norm": 0.2647514343261719,
      "learning_rate": 2.2098271647239445e-06,
      "loss": 0.0147,
      "step": 2384520
    },
    {
      "epoch": 3.902352009321629,
      "grad_norm": 0.26422372460365295,
      "learning_rate": 2.2097612725104277e-06,
      "loss": 0.0078,
      "step": 2384540
    },
    {
      "epoch": 3.902384739760282,
      "grad_norm": 0.2670706808567047,
      "learning_rate": 2.2096953802969104e-06,
      "loss": 0.01,
      "step": 2384560
    },
    {
      "epoch": 3.9024174701989356,
      "grad_norm": 0.6223155856132507,
      "learning_rate": 2.2096294880833936e-06,
      "loss": 0.0143,
      "step": 2384580
    },
    {
      "epoch": 3.902450200637589,
      "grad_norm": 0.1910814791917801,
      "learning_rate": 2.2095635958698763e-06,
      "loss": 0.0064,
      "step": 2384600
    },
    {
      "epoch": 3.9024829310762423,
      "grad_norm": 0.14021122455596924,
      "learning_rate": 2.209497703656359e-06,
      "loss": 0.0157,
      "step": 2384620
    },
    {
      "epoch": 3.9025156615148955,
      "grad_norm": 0.5431809425354004,
      "learning_rate": 2.2094318114428422e-06,
      "loss": 0.0165,
      "step": 2384640
    },
    {
      "epoch": 3.902548391953549,
      "grad_norm": 0.33001139760017395,
      "learning_rate": 2.209365919229325e-06,
      "loss": 0.0102,
      "step": 2384660
    },
    {
      "epoch": 3.9025811223922022,
      "grad_norm": 0.672911524772644,
      "learning_rate": 2.2093000270158077e-06,
      "loss": 0.0085,
      "step": 2384680
    },
    {
      "epoch": 3.9026138528308554,
      "grad_norm": 0.11674481630325317,
      "learning_rate": 2.2092341348022904e-06,
      "loss": 0.0064,
      "step": 2384700
    },
    {
      "epoch": 3.902646583269509,
      "grad_norm": 0.1464650183916092,
      "learning_rate": 2.209168242588773e-06,
      "loss": 0.0121,
      "step": 2384720
    },
    {
      "epoch": 3.9026793137081626,
      "grad_norm": 0.31494811177253723,
      "learning_rate": 2.2091023503752564e-06,
      "loss": 0.0161,
      "step": 2384740
    },
    {
      "epoch": 3.9027120441468157,
      "grad_norm": 0.7311075329780579,
      "learning_rate": 2.2090364581617395e-06,
      "loss": 0.0084,
      "step": 2384760
    },
    {
      "epoch": 3.902744774585469,
      "grad_norm": 0.3156825304031372,
      "learning_rate": 2.2089705659482223e-06,
      "loss": 0.0084,
      "step": 2384780
    },
    {
      "epoch": 3.9027775050241225,
      "grad_norm": 0.2582318186759949,
      "learning_rate": 2.208904673734705e-06,
      "loss": 0.0114,
      "step": 2384800
    },
    {
      "epoch": 3.9028102354627756,
      "grad_norm": 0.24625182151794434,
      "learning_rate": 2.2088387815211877e-06,
      "loss": 0.0118,
      "step": 2384820
    },
    {
      "epoch": 3.9028429659014288,
      "grad_norm": 0.26645931601524353,
      "learning_rate": 2.208772889307671e-06,
      "loss": 0.0103,
      "step": 2384840
    },
    {
      "epoch": 3.9028756963400824,
      "grad_norm": 0.20169351994991302,
      "learning_rate": 2.2087069970941536e-06,
      "loss": 0.0114,
      "step": 2384860
    },
    {
      "epoch": 3.902908426778736,
      "grad_norm": 0.4144107401371002,
      "learning_rate": 2.2086411048806364e-06,
      "loss": 0.0082,
      "step": 2384880
    },
    {
      "epoch": 3.902941157217389,
      "grad_norm": 0.30019471049308777,
      "learning_rate": 2.208575212667119e-06,
      "loss": 0.0168,
      "step": 2384900
    },
    {
      "epoch": 3.9029738876560423,
      "grad_norm": 0.45004037022590637,
      "learning_rate": 2.208509320453602e-06,
      "loss": 0.0116,
      "step": 2384920
    },
    {
      "epoch": 3.903006618094696,
      "grad_norm": 0.3182707726955414,
      "learning_rate": 2.208443428240085e-06,
      "loss": 0.0115,
      "step": 2384940
    },
    {
      "epoch": 3.903039348533349,
      "grad_norm": 0.28626877069473267,
      "learning_rate": 2.208377536026568e-06,
      "loss": 0.0069,
      "step": 2384960
    },
    {
      "epoch": 3.903072078972002,
      "grad_norm": 0.38169780373573303,
      "learning_rate": 2.208311643813051e-06,
      "loss": 0.0118,
      "step": 2384980
    },
    {
      "epoch": 3.9031048094106557,
      "grad_norm": 0.19337214529514313,
      "learning_rate": 2.2082457515995337e-06,
      "loss": 0.0071,
      "step": 2385000
    },
    {
      "epoch": 3.9031375398493093,
      "grad_norm": 0.19196383655071259,
      "learning_rate": 2.2081798593860164e-06,
      "loss": 0.0117,
      "step": 2385020
    },
    {
      "epoch": 3.9031702702879625,
      "grad_norm": 0.1900545060634613,
      "learning_rate": 2.2081139671724996e-06,
      "loss": 0.0087,
      "step": 2385040
    },
    {
      "epoch": 3.9032030007266156,
      "grad_norm": 0.39941084384918213,
      "learning_rate": 2.2080480749589823e-06,
      "loss": 0.0205,
      "step": 2385060
    },
    {
      "epoch": 3.9032357311652692,
      "grad_norm": 0.29148343205451965,
      "learning_rate": 2.207982182745465e-06,
      "loss": 0.011,
      "step": 2385080
    },
    {
      "epoch": 3.9032684616039224,
      "grad_norm": 0.10036580264568329,
      "learning_rate": 2.2079162905319478e-06,
      "loss": 0.0083,
      "step": 2385100
    },
    {
      "epoch": 3.9033011920425755,
      "grad_norm": 0.37115514278411865,
      "learning_rate": 2.207850398318431e-06,
      "loss": 0.0114,
      "step": 2385120
    },
    {
      "epoch": 3.903333922481229,
      "grad_norm": 0.09434502571821213,
      "learning_rate": 2.2077845061049137e-06,
      "loss": 0.0065,
      "step": 2385140
    },
    {
      "epoch": 3.9033666529198823,
      "grad_norm": 0.30332422256469727,
      "learning_rate": 2.207718613891397e-06,
      "loss": 0.0102,
      "step": 2385160
    },
    {
      "epoch": 3.903399383358536,
      "grad_norm": 0.2424655258655548,
      "learning_rate": 2.2076527216778796e-06,
      "loss": 0.0101,
      "step": 2385180
    },
    {
      "epoch": 3.903432113797189,
      "grad_norm": 1.4681034088134766,
      "learning_rate": 2.2075868294643623e-06,
      "loss": 0.0083,
      "step": 2385200
    },
    {
      "epoch": 3.9034648442358426,
      "grad_norm": 0.09290722757577896,
      "learning_rate": 2.207520937250845e-06,
      "loss": 0.012,
      "step": 2385220
    },
    {
      "epoch": 3.9034975746744958,
      "grad_norm": 0.11672266572713852,
      "learning_rate": 2.2074550450373282e-06,
      "loss": 0.0144,
      "step": 2385240
    },
    {
      "epoch": 3.903530305113149,
      "grad_norm": 0.12667496502399445,
      "learning_rate": 2.207389152823811e-06,
      "loss": 0.0117,
      "step": 2385260
    },
    {
      "epoch": 3.9035630355518025,
      "grad_norm": 0.18623705208301544,
      "learning_rate": 2.2073232606102937e-06,
      "loss": 0.0174,
      "step": 2385280
    },
    {
      "epoch": 3.9035957659904557,
      "grad_norm": 0.934526264667511,
      "learning_rate": 2.207257368396777e-06,
      "loss": 0.0088,
      "step": 2385300
    },
    {
      "epoch": 3.9036284964291093,
      "grad_norm": 0.07630058377981186,
      "learning_rate": 2.2071914761832596e-06,
      "loss": 0.0143,
      "step": 2385320
    },
    {
      "epoch": 3.9036612268677624,
      "grad_norm": 0.3803790211677551,
      "learning_rate": 2.2071255839697424e-06,
      "loss": 0.0095,
      "step": 2385340
    },
    {
      "epoch": 3.903693957306416,
      "grad_norm": 0.05036146938800812,
      "learning_rate": 2.2070596917562255e-06,
      "loss": 0.0089,
      "step": 2385360
    },
    {
      "epoch": 3.903726687745069,
      "grad_norm": 0.7388644814491272,
      "learning_rate": 2.2069937995427083e-06,
      "loss": 0.0156,
      "step": 2385380
    },
    {
      "epoch": 3.9037594181837223,
      "grad_norm": 0.4626286029815674,
      "learning_rate": 2.206927907329191e-06,
      "loss": 0.0095,
      "step": 2385400
    },
    {
      "epoch": 3.903792148622376,
      "grad_norm": 0.07169085741043091,
      "learning_rate": 2.2068620151156737e-06,
      "loss": 0.0121,
      "step": 2385420
    },
    {
      "epoch": 3.903824879061029,
      "grad_norm": 0.2514050602912903,
      "learning_rate": 2.206796122902157e-06,
      "loss": 0.0111,
      "step": 2385440
    },
    {
      "epoch": 3.9038576094996826,
      "grad_norm": 0.4828363060951233,
      "learning_rate": 2.2067302306886396e-06,
      "loss": 0.011,
      "step": 2385460
    },
    {
      "epoch": 3.903890339938336,
      "grad_norm": 0.17585276067256927,
      "learning_rate": 2.206664338475123e-06,
      "loss": 0.0138,
      "step": 2385480
    },
    {
      "epoch": 3.9039230703769894,
      "grad_norm": 0.2907741963863373,
      "learning_rate": 2.2065984462616055e-06,
      "loss": 0.0072,
      "step": 2385500
    },
    {
      "epoch": 3.9039558008156425,
      "grad_norm": 0.22293312847614288,
      "learning_rate": 2.2065325540480883e-06,
      "loss": 0.0129,
      "step": 2385520
    },
    {
      "epoch": 3.9039885312542957,
      "grad_norm": 0.12402784079313278,
      "learning_rate": 2.206466661834571e-06,
      "loss": 0.0119,
      "step": 2385540
    },
    {
      "epoch": 3.9040212616929493,
      "grad_norm": 0.4713967442512512,
      "learning_rate": 2.206400769621054e-06,
      "loss": 0.0097,
      "step": 2385560
    },
    {
      "epoch": 3.9040539921316024,
      "grad_norm": 0.5934202671051025,
      "learning_rate": 2.206334877407537e-06,
      "loss": 0.008,
      "step": 2385580
    },
    {
      "epoch": 3.904086722570256,
      "grad_norm": 0.3826488256454468,
      "learning_rate": 2.2062689851940197e-06,
      "loss": 0.0084,
      "step": 2385600
    },
    {
      "epoch": 3.904119453008909,
      "grad_norm": 0.5349652171134949,
      "learning_rate": 2.2062030929805024e-06,
      "loss": 0.0107,
      "step": 2385620
    },
    {
      "epoch": 3.9041521834475628,
      "grad_norm": 0.36894601583480835,
      "learning_rate": 2.2061372007669856e-06,
      "loss": 0.0125,
      "step": 2385640
    },
    {
      "epoch": 3.904184913886216,
      "grad_norm": 0.08192620426416397,
      "learning_rate": 2.2060713085534683e-06,
      "loss": 0.0056,
      "step": 2385660
    },
    {
      "epoch": 3.904217644324869,
      "grad_norm": 0.29953113198280334,
      "learning_rate": 2.2060054163399515e-06,
      "loss": 0.0069,
      "step": 2385680
    },
    {
      "epoch": 3.9042503747635227,
      "grad_norm": 0.20529302954673767,
      "learning_rate": 2.205939524126434e-06,
      "loss": 0.0091,
      "step": 2385700
    },
    {
      "epoch": 3.904283105202176,
      "grad_norm": 0.17628145217895508,
      "learning_rate": 2.205873631912917e-06,
      "loss": 0.0115,
      "step": 2385720
    },
    {
      "epoch": 3.9043158356408294,
      "grad_norm": 0.3509937524795532,
      "learning_rate": 2.2058077396993997e-06,
      "loss": 0.0082,
      "step": 2385740
    },
    {
      "epoch": 3.9043485660794826,
      "grad_norm": 0.12565630674362183,
      "learning_rate": 2.205741847485883e-06,
      "loss": 0.0103,
      "step": 2385760
    },
    {
      "epoch": 3.904381296518136,
      "grad_norm": 0.33621975779533386,
      "learning_rate": 2.2056759552723656e-06,
      "loss": 0.0102,
      "step": 2385780
    },
    {
      "epoch": 3.9044140269567893,
      "grad_norm": 0.4363843500614166,
      "learning_rate": 2.2056100630588483e-06,
      "loss": 0.0104,
      "step": 2385800
    },
    {
      "epoch": 3.9044467573954424,
      "grad_norm": 0.08036932349205017,
      "learning_rate": 2.2055441708453315e-06,
      "loss": 0.0081,
      "step": 2385820
    },
    {
      "epoch": 3.904479487834096,
      "grad_norm": 0.5235810279846191,
      "learning_rate": 2.2054782786318142e-06,
      "loss": 0.0104,
      "step": 2385840
    },
    {
      "epoch": 3.904512218272749,
      "grad_norm": 0.5086125731468201,
      "learning_rate": 2.2054123864182974e-06,
      "loss": 0.012,
      "step": 2385860
    },
    {
      "epoch": 3.904544948711403,
      "grad_norm": 0.09697292000055313,
      "learning_rate": 2.20534649420478e-06,
      "loss": 0.0076,
      "step": 2385880
    },
    {
      "epoch": 3.904577679150056,
      "grad_norm": 0.45889607071876526,
      "learning_rate": 2.205280601991263e-06,
      "loss": 0.0101,
      "step": 2385900
    },
    {
      "epoch": 3.9046104095887095,
      "grad_norm": 0.605620265007019,
      "learning_rate": 2.2052147097777456e-06,
      "loss": 0.0151,
      "step": 2385920
    },
    {
      "epoch": 3.9046431400273627,
      "grad_norm": 0.18340109288692474,
      "learning_rate": 2.2051488175642284e-06,
      "loss": 0.0098,
      "step": 2385940
    },
    {
      "epoch": 3.904675870466016,
      "grad_norm": 0.21562154591083527,
      "learning_rate": 2.2050829253507115e-06,
      "loss": 0.0071,
      "step": 2385960
    },
    {
      "epoch": 3.9047086009046694,
      "grad_norm": 0.10982920974493027,
      "learning_rate": 2.2050170331371943e-06,
      "loss": 0.0119,
      "step": 2385980
    },
    {
      "epoch": 3.9047413313433226,
      "grad_norm": 0.12220506370067596,
      "learning_rate": 2.2049511409236774e-06,
      "loss": 0.0059,
      "step": 2386000
    },
    {
      "epoch": 3.9047740617819757,
      "grad_norm": 0.14730603992938995,
      "learning_rate": 2.20488524871016e-06,
      "loss": 0.012,
      "step": 2386020
    },
    {
      "epoch": 3.9048067922206293,
      "grad_norm": 0.1410784125328064,
      "learning_rate": 2.204819356496643e-06,
      "loss": 0.0161,
      "step": 2386040
    },
    {
      "epoch": 3.904839522659283,
      "grad_norm": 0.22421985864639282,
      "learning_rate": 2.204753464283126e-06,
      "loss": 0.0132,
      "step": 2386060
    },
    {
      "epoch": 3.904872253097936,
      "grad_norm": 0.4190833568572998,
      "learning_rate": 2.204687572069609e-06,
      "loss": 0.0135,
      "step": 2386080
    },
    {
      "epoch": 3.904904983536589,
      "grad_norm": 0.2972166836261749,
      "learning_rate": 2.2046216798560915e-06,
      "loss": 0.013,
      "step": 2386100
    },
    {
      "epoch": 3.904937713975243,
      "grad_norm": 0.07529859244823456,
      "learning_rate": 2.2045557876425743e-06,
      "loss": 0.0074,
      "step": 2386120
    },
    {
      "epoch": 3.904970444413896,
      "grad_norm": 0.056797634810209274,
      "learning_rate": 2.204489895429057e-06,
      "loss": 0.0157,
      "step": 2386140
    },
    {
      "epoch": 3.905003174852549,
      "grad_norm": 0.48116299510002136,
      "learning_rate": 2.20442400321554e-06,
      "loss": 0.0143,
      "step": 2386160
    },
    {
      "epoch": 3.9050359052912027,
      "grad_norm": 0.08148697763681412,
      "learning_rate": 2.2043581110020234e-06,
      "loss": 0.0078,
      "step": 2386180
    },
    {
      "epoch": 3.9050686357298563,
      "grad_norm": 0.11665144562721252,
      "learning_rate": 2.204292218788506e-06,
      "loss": 0.0127,
      "step": 2386200
    },
    {
      "epoch": 3.9051013661685094,
      "grad_norm": 0.4132431149482727,
      "learning_rate": 2.204226326574989e-06,
      "loss": 0.0117,
      "step": 2386220
    },
    {
      "epoch": 3.9051340966071626,
      "grad_norm": 0.3158904016017914,
      "learning_rate": 2.2041604343614716e-06,
      "loss": 0.0082,
      "step": 2386240
    },
    {
      "epoch": 3.905166827045816,
      "grad_norm": 0.2039419561624527,
      "learning_rate": 2.2040945421479547e-06,
      "loss": 0.0118,
      "step": 2386260
    },
    {
      "epoch": 3.9051995574844693,
      "grad_norm": 0.21802422404289246,
      "learning_rate": 2.2040286499344375e-06,
      "loss": 0.009,
      "step": 2386280
    },
    {
      "epoch": 3.9052322879231225,
      "grad_norm": 0.6738815903663635,
      "learning_rate": 2.2039627577209202e-06,
      "loss": 0.0165,
      "step": 2386300
    },
    {
      "epoch": 3.905265018361776,
      "grad_norm": 0.17436178028583527,
      "learning_rate": 2.203896865507403e-06,
      "loss": 0.0112,
      "step": 2386320
    },
    {
      "epoch": 3.9052977488004297,
      "grad_norm": 0.4318682849407196,
      "learning_rate": 2.203830973293886e-06,
      "loss": 0.0073,
      "step": 2386340
    },
    {
      "epoch": 3.905330479239083,
      "grad_norm": 0.1406620293855667,
      "learning_rate": 2.203765081080369e-06,
      "loss": 0.0098,
      "step": 2386360
    },
    {
      "epoch": 3.905363209677736,
      "grad_norm": 0.2254076600074768,
      "learning_rate": 2.203699188866852e-06,
      "loss": 0.0165,
      "step": 2386380
    },
    {
      "epoch": 3.9053959401163896,
      "grad_norm": 0.12999343872070312,
      "learning_rate": 2.2036332966533348e-06,
      "loss": 0.0102,
      "step": 2386400
    },
    {
      "epoch": 3.9054286705550427,
      "grad_norm": 0.079737089574337,
      "learning_rate": 2.2035674044398175e-06,
      "loss": 0.0106,
      "step": 2386420
    },
    {
      "epoch": 3.905461400993696,
      "grad_norm": 0.3646548092365265,
      "learning_rate": 2.2035015122263002e-06,
      "loss": 0.014,
      "step": 2386440
    },
    {
      "epoch": 3.9054941314323495,
      "grad_norm": 0.2964690029621124,
      "learning_rate": 2.2034356200127834e-06,
      "loss": 0.0055,
      "step": 2386460
    },
    {
      "epoch": 3.905526861871003,
      "grad_norm": 0.31942570209503174,
      "learning_rate": 2.203369727799266e-06,
      "loss": 0.0103,
      "step": 2386480
    },
    {
      "epoch": 3.905559592309656,
      "grad_norm": 0.5306209325790405,
      "learning_rate": 2.203303835585749e-06,
      "loss": 0.0114,
      "step": 2386500
    },
    {
      "epoch": 3.9055923227483094,
      "grad_norm": 0.41688743233680725,
      "learning_rate": 2.203237943372232e-06,
      "loss": 0.009,
      "step": 2386520
    },
    {
      "epoch": 3.905625053186963,
      "grad_norm": 0.23159122467041016,
      "learning_rate": 2.2031720511587148e-06,
      "loss": 0.0145,
      "step": 2386540
    },
    {
      "epoch": 3.905657783625616,
      "grad_norm": 0.13798530399799347,
      "learning_rate": 2.2031061589451975e-06,
      "loss": 0.0084,
      "step": 2386560
    },
    {
      "epoch": 3.9056905140642693,
      "grad_norm": 0.255420058965683,
      "learning_rate": 2.2030402667316807e-06,
      "loss": 0.0109,
      "step": 2386580
    },
    {
      "epoch": 3.905723244502923,
      "grad_norm": 0.42667096853256226,
      "learning_rate": 2.2029743745181634e-06,
      "loss": 0.01,
      "step": 2386600
    },
    {
      "epoch": 3.905755974941576,
      "grad_norm": 0.10807090252637863,
      "learning_rate": 2.202908482304646e-06,
      "loss": 0.0079,
      "step": 2386620
    },
    {
      "epoch": 3.9057887053802296,
      "grad_norm": 0.41406941413879395,
      "learning_rate": 2.202842590091129e-06,
      "loss": 0.0109,
      "step": 2386640
    },
    {
      "epoch": 3.9058214358188827,
      "grad_norm": 0.13730622828006744,
      "learning_rate": 2.202776697877612e-06,
      "loss": 0.0179,
      "step": 2386660
    },
    {
      "epoch": 3.9058541662575363,
      "grad_norm": 0.6322585940361023,
      "learning_rate": 2.202710805664095e-06,
      "loss": 0.0118,
      "step": 2386680
    },
    {
      "epoch": 3.9058868966961895,
      "grad_norm": 0.4927504062652588,
      "learning_rate": 2.202644913450578e-06,
      "loss": 0.0092,
      "step": 2386700
    },
    {
      "epoch": 3.9059196271348426,
      "grad_norm": 0.2802183926105499,
      "learning_rate": 2.2025790212370607e-06,
      "loss": 0.0095,
      "step": 2386720
    },
    {
      "epoch": 3.9059523575734962,
      "grad_norm": 0.3368776738643646,
      "learning_rate": 2.2025131290235435e-06,
      "loss": 0.0131,
      "step": 2386740
    },
    {
      "epoch": 3.9059850880121494,
      "grad_norm": 0.1848696768283844,
      "learning_rate": 2.202447236810026e-06,
      "loss": 0.0099,
      "step": 2386760
    },
    {
      "epoch": 3.906017818450803,
      "grad_norm": 0.41422560811042786,
      "learning_rate": 2.2023813445965094e-06,
      "loss": 0.0094,
      "step": 2386780
    },
    {
      "epoch": 3.906050548889456,
      "grad_norm": 0.40709418058395386,
      "learning_rate": 2.202315452382992e-06,
      "loss": 0.008,
      "step": 2386800
    },
    {
      "epoch": 3.9060832793281097,
      "grad_norm": 0.5153847336769104,
      "learning_rate": 2.202249560169475e-06,
      "loss": 0.0145,
      "step": 2386820
    },
    {
      "epoch": 3.906116009766763,
      "grad_norm": 0.057298693805933,
      "learning_rate": 2.2021836679559576e-06,
      "loss": 0.0127,
      "step": 2386840
    },
    {
      "epoch": 3.906148740205416,
      "grad_norm": 0.2215760201215744,
      "learning_rate": 2.2021177757424407e-06,
      "loss": 0.0067,
      "step": 2386860
    },
    {
      "epoch": 3.9061814706440696,
      "grad_norm": 0.22450925409793854,
      "learning_rate": 2.2020518835289235e-06,
      "loss": 0.0083,
      "step": 2386880
    },
    {
      "epoch": 3.9062142010827228,
      "grad_norm": 0.07298804819583893,
      "learning_rate": 2.2019859913154066e-06,
      "loss": 0.0143,
      "step": 2386900
    },
    {
      "epoch": 3.9062469315213764,
      "grad_norm": 0.0756872370839119,
      "learning_rate": 2.2019200991018894e-06,
      "loss": 0.0095,
      "step": 2386920
    },
    {
      "epoch": 3.9062796619600295,
      "grad_norm": 0.1518462747335434,
      "learning_rate": 2.201854206888372e-06,
      "loss": 0.0103,
      "step": 2386940
    },
    {
      "epoch": 3.906312392398683,
      "grad_norm": 0.3020484447479248,
      "learning_rate": 2.201788314674855e-06,
      "loss": 0.0123,
      "step": 2386960
    },
    {
      "epoch": 3.9063451228373363,
      "grad_norm": 0.14274489879608154,
      "learning_rate": 2.201722422461338e-06,
      "loss": 0.0155,
      "step": 2386980
    },
    {
      "epoch": 3.9063778532759894,
      "grad_norm": 0.46797966957092285,
      "learning_rate": 2.2016565302478208e-06,
      "loss": 0.0089,
      "step": 2387000
    },
    {
      "epoch": 3.906410583714643,
      "grad_norm": 0.18922413885593414,
      "learning_rate": 2.2015906380343035e-06,
      "loss": 0.0138,
      "step": 2387020
    },
    {
      "epoch": 3.906443314153296,
      "grad_norm": 0.4531071186065674,
      "learning_rate": 2.2015247458207862e-06,
      "loss": 0.0094,
      "step": 2387040
    },
    {
      "epoch": 3.9064760445919497,
      "grad_norm": 0.4488718807697296,
      "learning_rate": 2.2014588536072694e-06,
      "loss": 0.015,
      "step": 2387060
    },
    {
      "epoch": 3.906508775030603,
      "grad_norm": 0.5167778730392456,
      "learning_rate": 2.201392961393752e-06,
      "loss": 0.0111,
      "step": 2387080
    },
    {
      "epoch": 3.9065415054692565,
      "grad_norm": 0.4607048034667969,
      "learning_rate": 2.2013270691802353e-06,
      "loss": 0.0178,
      "step": 2387100
    },
    {
      "epoch": 3.9065742359079096,
      "grad_norm": 0.32123976945877075,
      "learning_rate": 2.201261176966718e-06,
      "loss": 0.0137,
      "step": 2387120
    },
    {
      "epoch": 3.906606966346563,
      "grad_norm": 1.106154441833496,
      "learning_rate": 2.201195284753201e-06,
      "loss": 0.0127,
      "step": 2387140
    },
    {
      "epoch": 3.9066396967852164,
      "grad_norm": 0.6448153853416443,
      "learning_rate": 2.2011293925396835e-06,
      "loss": 0.0104,
      "step": 2387160
    },
    {
      "epoch": 3.9066724272238695,
      "grad_norm": 0.10588482022285461,
      "learning_rate": 2.2010635003261667e-06,
      "loss": 0.0105,
      "step": 2387180
    },
    {
      "epoch": 3.906705157662523,
      "grad_norm": 0.16430838406085968,
      "learning_rate": 2.2009976081126494e-06,
      "loss": 0.0077,
      "step": 2387200
    },
    {
      "epoch": 3.9067378881011763,
      "grad_norm": 0.17924383282661438,
      "learning_rate": 2.2009317158991326e-06,
      "loss": 0.0121,
      "step": 2387220
    },
    {
      "epoch": 3.90677061853983,
      "grad_norm": 0.15085244178771973,
      "learning_rate": 2.2008658236856153e-06,
      "loss": 0.0118,
      "step": 2387240
    },
    {
      "epoch": 3.906803348978483,
      "grad_norm": 2.8059937953948975,
      "learning_rate": 2.200799931472098e-06,
      "loss": 0.008,
      "step": 2387260
    },
    {
      "epoch": 3.906836079417136,
      "grad_norm": 0.12291216105222702,
      "learning_rate": 2.2007340392585812e-06,
      "loss": 0.0106,
      "step": 2387280
    },
    {
      "epoch": 3.9068688098557898,
      "grad_norm": 0.3028104901313782,
      "learning_rate": 2.200668147045064e-06,
      "loss": 0.0107,
      "step": 2387300
    },
    {
      "epoch": 3.906901540294443,
      "grad_norm": 0.2940519452095032,
      "learning_rate": 2.2006022548315467e-06,
      "loss": 0.0097,
      "step": 2387320
    },
    {
      "epoch": 3.9069342707330965,
      "grad_norm": 0.17456713318824768,
      "learning_rate": 2.2005363626180295e-06,
      "loss": 0.0082,
      "step": 2387340
    },
    {
      "epoch": 3.9069670011717497,
      "grad_norm": 0.5783917903900146,
      "learning_rate": 2.200470470404512e-06,
      "loss": 0.0125,
      "step": 2387360
    },
    {
      "epoch": 3.9069997316104033,
      "grad_norm": 0.35884857177734375,
      "learning_rate": 2.2004045781909954e-06,
      "loss": 0.0085,
      "step": 2387380
    },
    {
      "epoch": 3.9070324620490564,
      "grad_norm": 0.2797614634037018,
      "learning_rate": 2.2003386859774785e-06,
      "loss": 0.0069,
      "step": 2387400
    },
    {
      "epoch": 3.9070651924877096,
      "grad_norm": 0.12733325362205505,
      "learning_rate": 2.2002727937639613e-06,
      "loss": 0.0137,
      "step": 2387420
    },
    {
      "epoch": 3.907097922926363,
      "grad_norm": 0.4073331356048584,
      "learning_rate": 2.200206901550444e-06,
      "loss": 0.0094,
      "step": 2387440
    },
    {
      "epoch": 3.9071306533650163,
      "grad_norm": 0.3650779724121094,
      "learning_rate": 2.2001410093369267e-06,
      "loss": 0.0068,
      "step": 2387460
    },
    {
      "epoch": 3.90716338380367,
      "grad_norm": 0.2332739531993866,
      "learning_rate": 2.20007511712341e-06,
      "loss": 0.0097,
      "step": 2387480
    },
    {
      "epoch": 3.907196114242323,
      "grad_norm": 0.3651689291000366,
      "learning_rate": 2.2000092249098926e-06,
      "loss": 0.0092,
      "step": 2387500
    },
    {
      "epoch": 3.9072288446809766,
      "grad_norm": 0.45560142397880554,
      "learning_rate": 2.1999433326963754e-06,
      "loss": 0.0113,
      "step": 2387520
    },
    {
      "epoch": 3.90726157511963,
      "grad_norm": 0.2036447823047638,
      "learning_rate": 2.199877440482858e-06,
      "loss": 0.0069,
      "step": 2387540
    },
    {
      "epoch": 3.907294305558283,
      "grad_norm": 0.06783948838710785,
      "learning_rate": 2.199811548269341e-06,
      "loss": 0.0081,
      "step": 2387560
    },
    {
      "epoch": 3.9073270359969365,
      "grad_norm": 0.16846853494644165,
      "learning_rate": 2.199745656055824e-06,
      "loss": 0.0104,
      "step": 2387580
    },
    {
      "epoch": 3.9073597664355897,
      "grad_norm": 0.46542155742645264,
      "learning_rate": 2.199679763842307e-06,
      "loss": 0.0084,
      "step": 2387600
    },
    {
      "epoch": 3.907392496874243,
      "grad_norm": 0.17098799347877502,
      "learning_rate": 2.19961387162879e-06,
      "loss": 0.0071,
      "step": 2387620
    },
    {
      "epoch": 3.9074252273128964,
      "grad_norm": 0.183780699968338,
      "learning_rate": 2.1995479794152727e-06,
      "loss": 0.0116,
      "step": 2387640
    },
    {
      "epoch": 3.90745795775155,
      "grad_norm": 0.3591727316379547,
      "learning_rate": 2.1994820872017554e-06,
      "loss": 0.0081,
      "step": 2387660
    },
    {
      "epoch": 3.907490688190203,
      "grad_norm": 0.283733606338501,
      "learning_rate": 2.1994161949882386e-06,
      "loss": 0.0095,
      "step": 2387680
    },
    {
      "epoch": 3.9075234186288563,
      "grad_norm": 0.6258656978607178,
      "learning_rate": 2.1993503027747213e-06,
      "loss": 0.0122,
      "step": 2387700
    },
    {
      "epoch": 3.90755614906751,
      "grad_norm": 0.3712034821510315,
      "learning_rate": 2.199284410561204e-06,
      "loss": 0.0117,
      "step": 2387720
    },
    {
      "epoch": 3.907588879506163,
      "grad_norm": 0.18161100149154663,
      "learning_rate": 2.199218518347687e-06,
      "loss": 0.0084,
      "step": 2387740
    },
    {
      "epoch": 3.907621609944816,
      "grad_norm": 0.4526309370994568,
      "learning_rate": 2.19915262613417e-06,
      "loss": 0.0095,
      "step": 2387760
    },
    {
      "epoch": 3.90765434038347,
      "grad_norm": 0.3373643457889557,
      "learning_rate": 2.1990867339206527e-06,
      "loss": 0.0088,
      "step": 2387780
    },
    {
      "epoch": 3.9076870708221234,
      "grad_norm": 0.20276346802711487,
      "learning_rate": 2.199020841707136e-06,
      "loss": 0.0085,
      "step": 2387800
    },
    {
      "epoch": 3.9077198012607766,
      "grad_norm": 0.5749790668487549,
      "learning_rate": 2.1989549494936186e-06,
      "loss": 0.0112,
      "step": 2387820
    },
    {
      "epoch": 3.9077525316994297,
      "grad_norm": 0.23526547849178314,
      "learning_rate": 2.1988890572801013e-06,
      "loss": 0.0069,
      "step": 2387840
    },
    {
      "epoch": 3.9077852621380833,
      "grad_norm": 0.3772222697734833,
      "learning_rate": 2.198823165066584e-06,
      "loss": 0.0088,
      "step": 2387860
    },
    {
      "epoch": 3.9078179925767365,
      "grad_norm": 0.18359264731407166,
      "learning_rate": 2.1987572728530672e-06,
      "loss": 0.0092,
      "step": 2387880
    },
    {
      "epoch": 3.9078507230153896,
      "grad_norm": 0.10811742395162582,
      "learning_rate": 2.19869138063955e-06,
      "loss": 0.0078,
      "step": 2387900
    },
    {
      "epoch": 3.907883453454043,
      "grad_norm": 0.5824403166770935,
      "learning_rate": 2.1986254884260327e-06,
      "loss": 0.0121,
      "step": 2387920
    },
    {
      "epoch": 3.907916183892697,
      "grad_norm": 0.29609671235084534,
      "learning_rate": 2.198559596212516e-06,
      "loss": 0.0155,
      "step": 2387940
    },
    {
      "epoch": 3.90794891433135,
      "grad_norm": 0.4331536591053009,
      "learning_rate": 2.1984937039989986e-06,
      "loss": 0.0102,
      "step": 2387960
    },
    {
      "epoch": 3.907981644770003,
      "grad_norm": 0.7257826328277588,
      "learning_rate": 2.1984278117854814e-06,
      "loss": 0.0126,
      "step": 2387980
    },
    {
      "epoch": 3.9080143752086567,
      "grad_norm": 0.03388627991080284,
      "learning_rate": 2.1983619195719645e-06,
      "loss": 0.0058,
      "step": 2388000
    },
    {
      "epoch": 3.90804710564731,
      "grad_norm": 0.2810138165950775,
      "learning_rate": 2.1982960273584473e-06,
      "loss": 0.0151,
      "step": 2388020
    },
    {
      "epoch": 3.908079836085963,
      "grad_norm": 0.17985846102237701,
      "learning_rate": 2.19823013514493e-06,
      "loss": 0.0097,
      "step": 2388040
    },
    {
      "epoch": 3.9081125665246166,
      "grad_norm": 0.05406595394015312,
      "learning_rate": 2.1981642429314127e-06,
      "loss": 0.0085,
      "step": 2388060
    },
    {
      "epoch": 3.90814529696327,
      "grad_norm": 0.1630410999059677,
      "learning_rate": 2.198098350717896e-06,
      "loss": 0.0074,
      "step": 2388080
    },
    {
      "epoch": 3.9081780274019233,
      "grad_norm": 0.07825785875320435,
      "learning_rate": 2.1980324585043787e-06,
      "loss": 0.0106,
      "step": 2388100
    },
    {
      "epoch": 3.9082107578405765,
      "grad_norm": 0.3759196400642395,
      "learning_rate": 2.197966566290862e-06,
      "loss": 0.0135,
      "step": 2388120
    },
    {
      "epoch": 3.90824348827923,
      "grad_norm": 0.22304579615592957,
      "learning_rate": 2.1979006740773446e-06,
      "loss": 0.0169,
      "step": 2388140
    },
    {
      "epoch": 3.908276218717883,
      "grad_norm": 0.2510165572166443,
      "learning_rate": 2.1978347818638273e-06,
      "loss": 0.0079,
      "step": 2388160
    },
    {
      "epoch": 3.9083089491565364,
      "grad_norm": 0.08600662648677826,
      "learning_rate": 2.19776888965031e-06,
      "loss": 0.01,
      "step": 2388180
    },
    {
      "epoch": 3.90834167959519,
      "grad_norm": 0.6703043580055237,
      "learning_rate": 2.197702997436793e-06,
      "loss": 0.0104,
      "step": 2388200
    },
    {
      "epoch": 3.908374410033843,
      "grad_norm": 0.25582846999168396,
      "learning_rate": 2.197637105223276e-06,
      "loss": 0.008,
      "step": 2388220
    },
    {
      "epoch": 3.9084071404724967,
      "grad_norm": 0.08455439656972885,
      "learning_rate": 2.1975712130097587e-06,
      "loss": 0.0079,
      "step": 2388240
    },
    {
      "epoch": 3.90843987091115,
      "grad_norm": 0.2544097900390625,
      "learning_rate": 2.1975053207962414e-06,
      "loss": 0.0111,
      "step": 2388260
    },
    {
      "epoch": 3.9084726013498035,
      "grad_norm": 0.45254334807395935,
      "learning_rate": 2.1974394285827246e-06,
      "loss": 0.0101,
      "step": 2388280
    },
    {
      "epoch": 3.9085053317884566,
      "grad_norm": 0.16690677404403687,
      "learning_rate": 2.1973735363692073e-06,
      "loss": 0.0162,
      "step": 2388300
    },
    {
      "epoch": 3.9085380622271098,
      "grad_norm": 0.15678182244300842,
      "learning_rate": 2.1973076441556905e-06,
      "loss": 0.0089,
      "step": 2388320
    },
    {
      "epoch": 3.9085707926657633,
      "grad_norm": 0.3597032427787781,
      "learning_rate": 2.1972417519421732e-06,
      "loss": 0.0111,
      "step": 2388340
    },
    {
      "epoch": 3.9086035231044165,
      "grad_norm": 0.3462873101234436,
      "learning_rate": 2.197175859728656e-06,
      "loss": 0.0089,
      "step": 2388360
    },
    {
      "epoch": 3.90863625354307,
      "grad_norm": 0.09741507470607758,
      "learning_rate": 2.1971099675151387e-06,
      "loss": 0.0128,
      "step": 2388380
    },
    {
      "epoch": 3.9086689839817232,
      "grad_norm": 0.10770818591117859,
      "learning_rate": 2.197044075301622e-06,
      "loss": 0.0108,
      "step": 2388400
    },
    {
      "epoch": 3.908701714420377,
      "grad_norm": 0.2532489597797394,
      "learning_rate": 2.1969781830881046e-06,
      "loss": 0.0118,
      "step": 2388420
    },
    {
      "epoch": 3.90873444485903,
      "grad_norm": 0.15928776562213898,
      "learning_rate": 2.1969122908745873e-06,
      "loss": 0.0118,
      "step": 2388440
    },
    {
      "epoch": 3.908767175297683,
      "grad_norm": 0.24412652850151062,
      "learning_rate": 2.1968463986610705e-06,
      "loss": 0.0068,
      "step": 2388460
    },
    {
      "epoch": 3.9087999057363367,
      "grad_norm": 0.0681035965681076,
      "learning_rate": 2.1967805064475532e-06,
      "loss": 0.0107,
      "step": 2388480
    },
    {
      "epoch": 3.90883263617499,
      "grad_norm": 0.27998170256614685,
      "learning_rate": 2.1967146142340364e-06,
      "loss": 0.0084,
      "step": 2388500
    },
    {
      "epoch": 3.9088653666136435,
      "grad_norm": 0.4110199511051178,
      "learning_rate": 2.196648722020519e-06,
      "loss": 0.012,
      "step": 2388520
    },
    {
      "epoch": 3.9088980970522966,
      "grad_norm": 1.2889608144760132,
      "learning_rate": 2.196582829807002e-06,
      "loss": 0.0088,
      "step": 2388540
    },
    {
      "epoch": 3.90893082749095,
      "grad_norm": 0.17663262784481049,
      "learning_rate": 2.1965169375934846e-06,
      "loss": 0.0087,
      "step": 2388560
    },
    {
      "epoch": 3.9089635579296034,
      "grad_norm": 0.3085181713104248,
      "learning_rate": 2.1964510453799674e-06,
      "loss": 0.0128,
      "step": 2388580
    },
    {
      "epoch": 3.9089962883682565,
      "grad_norm": 0.0768132135272026,
      "learning_rate": 2.1963851531664505e-06,
      "loss": 0.0074,
      "step": 2388600
    },
    {
      "epoch": 3.90902901880691,
      "grad_norm": 0.21374066174030304,
      "learning_rate": 2.1963192609529333e-06,
      "loss": 0.0084,
      "step": 2388620
    },
    {
      "epoch": 3.9090617492455633,
      "grad_norm": 0.46090468764305115,
      "learning_rate": 2.1962533687394164e-06,
      "loss": 0.0097,
      "step": 2388640
    },
    {
      "epoch": 3.909094479684217,
      "grad_norm": 0.12448420375585556,
      "learning_rate": 2.196187476525899e-06,
      "loss": 0.0125,
      "step": 2388660
    },
    {
      "epoch": 3.90912721012287,
      "grad_norm": 0.5524542927742004,
      "learning_rate": 2.196121584312382e-06,
      "loss": 0.0096,
      "step": 2388680
    },
    {
      "epoch": 3.9091599405615236,
      "grad_norm": 0.11119501292705536,
      "learning_rate": 2.196055692098865e-06,
      "loss": 0.0109,
      "step": 2388700
    },
    {
      "epoch": 3.9091926710001768,
      "grad_norm": 0.2092094123363495,
      "learning_rate": 2.195989799885348e-06,
      "loss": 0.0075,
      "step": 2388720
    },
    {
      "epoch": 3.90922540143883,
      "grad_norm": 0.1108817532658577,
      "learning_rate": 2.1959239076718306e-06,
      "loss": 0.0099,
      "step": 2388740
    },
    {
      "epoch": 3.9092581318774835,
      "grad_norm": 0.35283663868904114,
      "learning_rate": 2.1958580154583133e-06,
      "loss": 0.0083,
      "step": 2388760
    },
    {
      "epoch": 3.9092908623161366,
      "grad_norm": 0.28242650628089905,
      "learning_rate": 2.195792123244796e-06,
      "loss": 0.0089,
      "step": 2388780
    },
    {
      "epoch": 3.9093235927547902,
      "grad_norm": 0.03406039997935295,
      "learning_rate": 2.195726231031279e-06,
      "loss": 0.0146,
      "step": 2388800
    },
    {
      "epoch": 3.9093563231934434,
      "grad_norm": 0.3017246127128601,
      "learning_rate": 2.1956603388177624e-06,
      "loss": 0.0126,
      "step": 2388820
    },
    {
      "epoch": 3.909389053632097,
      "grad_norm": 0.30434370040893555,
      "learning_rate": 2.195594446604245e-06,
      "loss": 0.0104,
      "step": 2388840
    },
    {
      "epoch": 3.90942178407075,
      "grad_norm": 0.42149749398231506,
      "learning_rate": 2.195528554390728e-06,
      "loss": 0.0093,
      "step": 2388860
    },
    {
      "epoch": 3.9094545145094033,
      "grad_norm": 0.15878453850746155,
      "learning_rate": 2.1954626621772106e-06,
      "loss": 0.0089,
      "step": 2388880
    },
    {
      "epoch": 3.909487244948057,
      "grad_norm": 0.17573845386505127,
      "learning_rate": 2.1953967699636937e-06,
      "loss": 0.0079,
      "step": 2388900
    },
    {
      "epoch": 3.90951997538671,
      "grad_norm": 0.15292340517044067,
      "learning_rate": 2.1953308777501765e-06,
      "loss": 0.0168,
      "step": 2388920
    },
    {
      "epoch": 3.9095527058253636,
      "grad_norm": 0.13565173745155334,
      "learning_rate": 2.1952649855366592e-06,
      "loss": 0.0074,
      "step": 2388940
    },
    {
      "epoch": 3.9095854362640168,
      "grad_norm": 0.20101533830165863,
      "learning_rate": 2.195199093323142e-06,
      "loss": 0.0094,
      "step": 2388960
    },
    {
      "epoch": 3.9096181667026704,
      "grad_norm": 0.21586400270462036,
      "learning_rate": 2.195133201109625e-06,
      "loss": 0.0093,
      "step": 2388980
    },
    {
      "epoch": 3.9096508971413235,
      "grad_norm": 0.2729988396167755,
      "learning_rate": 2.195067308896108e-06,
      "loss": 0.0147,
      "step": 2389000
    },
    {
      "epoch": 3.9096836275799767,
      "grad_norm": 0.6774573922157288,
      "learning_rate": 2.195001416682591e-06,
      "loss": 0.0203,
      "step": 2389020
    },
    {
      "epoch": 3.9097163580186303,
      "grad_norm": 0.21603544056415558,
      "learning_rate": 2.1949355244690738e-06,
      "loss": 0.0102,
      "step": 2389040
    },
    {
      "epoch": 3.9097490884572834,
      "grad_norm": 0.09029210358858109,
      "learning_rate": 2.1948696322555565e-06,
      "loss": 0.0121,
      "step": 2389060
    },
    {
      "epoch": 3.9097818188959366,
      "grad_norm": 0.1664336770772934,
      "learning_rate": 2.1948037400420393e-06,
      "loss": 0.0264,
      "step": 2389080
    },
    {
      "epoch": 3.90981454933459,
      "grad_norm": 0.19842377305030823,
      "learning_rate": 2.1947378478285224e-06,
      "loss": 0.0096,
      "step": 2389100
    },
    {
      "epoch": 3.9098472797732438,
      "grad_norm": 0.18270865082740784,
      "learning_rate": 2.194671955615005e-06,
      "loss": 0.009,
      "step": 2389120
    },
    {
      "epoch": 3.909880010211897,
      "grad_norm": 0.4062819182872772,
      "learning_rate": 2.194606063401488e-06,
      "loss": 0.0106,
      "step": 2389140
    },
    {
      "epoch": 3.90991274065055,
      "grad_norm": 0.0816694125533104,
      "learning_rate": 2.194540171187971e-06,
      "loss": 0.0119,
      "step": 2389160
    },
    {
      "epoch": 3.9099454710892037,
      "grad_norm": 0.2701151371002197,
      "learning_rate": 2.194474278974454e-06,
      "loss": 0.0125,
      "step": 2389180
    },
    {
      "epoch": 3.909978201527857,
      "grad_norm": 0.1482062041759491,
      "learning_rate": 2.1944083867609365e-06,
      "loss": 0.0102,
      "step": 2389200
    },
    {
      "epoch": 3.91001093196651,
      "grad_norm": 0.14550520479679108,
      "learning_rate": 2.1943424945474197e-06,
      "loss": 0.0117,
      "step": 2389220
    },
    {
      "epoch": 3.9100436624051635,
      "grad_norm": 0.320324569940567,
      "learning_rate": 2.1942766023339024e-06,
      "loss": 0.0139,
      "step": 2389240
    },
    {
      "epoch": 3.910076392843817,
      "grad_norm": 0.14908377826213837,
      "learning_rate": 2.194210710120385e-06,
      "loss": 0.009,
      "step": 2389260
    },
    {
      "epoch": 3.9101091232824703,
      "grad_norm": 0.18220575153827667,
      "learning_rate": 2.194144817906868e-06,
      "loss": 0.0088,
      "step": 2389280
    },
    {
      "epoch": 3.9101418537211234,
      "grad_norm": 0.23818524181842804,
      "learning_rate": 2.194078925693351e-06,
      "loss": 0.0156,
      "step": 2389300
    },
    {
      "epoch": 3.910174584159777,
      "grad_norm": 0.4896414577960968,
      "learning_rate": 2.194013033479834e-06,
      "loss": 0.0134,
      "step": 2389320
    },
    {
      "epoch": 3.91020731459843,
      "grad_norm": 0.11984999477863312,
      "learning_rate": 2.193947141266317e-06,
      "loss": 0.0104,
      "step": 2389340
    },
    {
      "epoch": 3.9102400450370833,
      "grad_norm": 0.4276570677757263,
      "learning_rate": 2.1938812490527997e-06,
      "loss": 0.0135,
      "step": 2389360
    },
    {
      "epoch": 3.910272775475737,
      "grad_norm": 0.173549622297287,
      "learning_rate": 2.1938153568392825e-06,
      "loss": 0.0131,
      "step": 2389380
    },
    {
      "epoch": 3.9103055059143905,
      "grad_norm": 0.1467071920633316,
      "learning_rate": 2.193749464625765e-06,
      "loss": 0.0103,
      "step": 2389400
    },
    {
      "epoch": 3.9103382363530437,
      "grad_norm": 0.22441349923610687,
      "learning_rate": 2.1936835724122484e-06,
      "loss": 0.0088,
      "step": 2389420
    },
    {
      "epoch": 3.910370966791697,
      "grad_norm": 0.5704275965690613,
      "learning_rate": 2.193617680198731e-06,
      "loss": 0.0123,
      "step": 2389440
    },
    {
      "epoch": 3.9104036972303504,
      "grad_norm": 0.3054540455341339,
      "learning_rate": 2.193551787985214e-06,
      "loss": 0.0105,
      "step": 2389460
    },
    {
      "epoch": 3.9104364276690036,
      "grad_norm": 0.20873206853866577,
      "learning_rate": 2.1934858957716966e-06,
      "loss": 0.0156,
      "step": 2389480
    },
    {
      "epoch": 3.9104691581076567,
      "grad_norm": 0.29573720693588257,
      "learning_rate": 2.1934200035581798e-06,
      "loss": 0.0084,
      "step": 2389500
    },
    {
      "epoch": 3.9105018885463103,
      "grad_norm": 0.7451414465904236,
      "learning_rate": 2.1933541113446625e-06,
      "loss": 0.0103,
      "step": 2389520
    },
    {
      "epoch": 3.910534618984964,
      "grad_norm": 0.4517137110233307,
      "learning_rate": 2.1932882191311457e-06,
      "loss": 0.0145,
      "step": 2389540
    },
    {
      "epoch": 3.910567349423617,
      "grad_norm": 0.12609076499938965,
      "learning_rate": 2.1932223269176284e-06,
      "loss": 0.013,
      "step": 2389560
    },
    {
      "epoch": 3.91060007986227,
      "grad_norm": 0.14337201416492462,
      "learning_rate": 2.193156434704111e-06,
      "loss": 0.0072,
      "step": 2389580
    },
    {
      "epoch": 3.910632810300924,
      "grad_norm": 0.18627005815505981,
      "learning_rate": 2.193090542490594e-06,
      "loss": 0.008,
      "step": 2389600
    },
    {
      "epoch": 3.910665540739577,
      "grad_norm": 0.47680819034576416,
      "learning_rate": 2.193024650277077e-06,
      "loss": 0.0127,
      "step": 2389620
    },
    {
      "epoch": 3.91069827117823,
      "grad_norm": 0.4744068682193756,
      "learning_rate": 2.1929587580635598e-06,
      "loss": 0.0132,
      "step": 2389640
    },
    {
      "epoch": 3.9107310016168837,
      "grad_norm": 0.16833814978599548,
      "learning_rate": 2.1928928658500425e-06,
      "loss": 0.0069,
      "step": 2389660
    },
    {
      "epoch": 3.910763732055537,
      "grad_norm": 0.44538894295692444,
      "learning_rate": 2.1928269736365253e-06,
      "loss": 0.0112,
      "step": 2389680
    },
    {
      "epoch": 3.9107964624941904,
      "grad_norm": 0.045615632086992264,
      "learning_rate": 2.1927610814230084e-06,
      "loss": 0.0123,
      "step": 2389700
    },
    {
      "epoch": 3.9108291929328436,
      "grad_norm": 0.16352689266204834,
      "learning_rate": 2.1926951892094916e-06,
      "loss": 0.0111,
      "step": 2389720
    },
    {
      "epoch": 3.910861923371497,
      "grad_norm": 0.1057872623205185,
      "learning_rate": 2.1926292969959743e-06,
      "loss": 0.0112,
      "step": 2389740
    },
    {
      "epoch": 3.9108946538101503,
      "grad_norm": 0.19761836528778076,
      "learning_rate": 2.192563404782457e-06,
      "loss": 0.0129,
      "step": 2389760
    },
    {
      "epoch": 3.9109273842488035,
      "grad_norm": 0.19640976190567017,
      "learning_rate": 2.19249751256894e-06,
      "loss": 0.01,
      "step": 2389780
    },
    {
      "epoch": 3.910960114687457,
      "grad_norm": 0.11869306862354279,
      "learning_rate": 2.1924316203554225e-06,
      "loss": 0.0108,
      "step": 2389800
    },
    {
      "epoch": 3.9109928451261102,
      "grad_norm": 0.1503053456544876,
      "learning_rate": 2.1923657281419057e-06,
      "loss": 0.0093,
      "step": 2389820
    },
    {
      "epoch": 3.911025575564764,
      "grad_norm": 0.0758933499455452,
      "learning_rate": 2.1922998359283884e-06,
      "loss": 0.0079,
      "step": 2389840
    },
    {
      "epoch": 3.911058306003417,
      "grad_norm": 0.08121313899755478,
      "learning_rate": 2.1922339437148716e-06,
      "loss": 0.0129,
      "step": 2389860
    },
    {
      "epoch": 3.9110910364420706,
      "grad_norm": 0.6337894201278687,
      "learning_rate": 2.1921680515013543e-06,
      "loss": 0.0077,
      "step": 2389880
    },
    {
      "epoch": 3.9111237668807237,
      "grad_norm": 1.1311503648757935,
      "learning_rate": 2.192102159287837e-06,
      "loss": 0.0173,
      "step": 2389900
    },
    {
      "epoch": 3.911156497319377,
      "grad_norm": 0.6225122809410095,
      "learning_rate": 2.1920362670743203e-06,
      "loss": 0.0109,
      "step": 2389920
    },
    {
      "epoch": 3.9111892277580305,
      "grad_norm": 0.330619215965271,
      "learning_rate": 2.191970374860803e-06,
      "loss": 0.0069,
      "step": 2389940
    },
    {
      "epoch": 3.9112219581966836,
      "grad_norm": 0.28347495198249817,
      "learning_rate": 2.1919044826472857e-06,
      "loss": 0.0118,
      "step": 2389960
    },
    {
      "epoch": 3.911254688635337,
      "grad_norm": 0.07747454196214676,
      "learning_rate": 2.1918385904337685e-06,
      "loss": 0.0057,
      "step": 2389980
    },
    {
      "epoch": 3.9112874190739904,
      "grad_norm": 0.44247177243232727,
      "learning_rate": 2.191772698220251e-06,
      "loss": 0.0148,
      "step": 2390000
    },
    {
      "epoch": 3.911320149512644,
      "grad_norm": 0.31537479162216187,
      "learning_rate": 2.1917068060067344e-06,
      "loss": 0.0166,
      "step": 2390020
    },
    {
      "epoch": 3.911352879951297,
      "grad_norm": 0.1714436262845993,
      "learning_rate": 2.1916409137932175e-06,
      "loss": 0.0088,
      "step": 2390040
    },
    {
      "epoch": 3.9113856103899503,
      "grad_norm": 0.9748483896255493,
      "learning_rate": 2.1915750215797003e-06,
      "loss": 0.0165,
      "step": 2390060
    },
    {
      "epoch": 3.911418340828604,
      "grad_norm": 0.15383389592170715,
      "learning_rate": 2.191509129366183e-06,
      "loss": 0.0084,
      "step": 2390080
    },
    {
      "epoch": 3.911451071267257,
      "grad_norm": 0.07667504996061325,
      "learning_rate": 2.1914432371526658e-06,
      "loss": 0.0125,
      "step": 2390100
    },
    {
      "epoch": 3.9114838017059106,
      "grad_norm": 0.15686802566051483,
      "learning_rate": 2.191377344939149e-06,
      "loss": 0.0093,
      "step": 2390120
    },
    {
      "epoch": 3.9115165321445637,
      "grad_norm": 0.14488573372364044,
      "learning_rate": 2.1913114527256317e-06,
      "loss": 0.0086,
      "step": 2390140
    },
    {
      "epoch": 3.9115492625832173,
      "grad_norm": 0.1681295931339264,
      "learning_rate": 2.1912455605121144e-06,
      "loss": 0.0124,
      "step": 2390160
    },
    {
      "epoch": 3.9115819930218705,
      "grad_norm": 0.1936805695295334,
      "learning_rate": 2.191179668298597e-06,
      "loss": 0.0126,
      "step": 2390180
    },
    {
      "epoch": 3.9116147234605236,
      "grad_norm": 0.1752692461013794,
      "learning_rate": 2.19111377608508e-06,
      "loss": 0.0095,
      "step": 2390200
    },
    {
      "epoch": 3.9116474538991772,
      "grad_norm": 0.665371835231781,
      "learning_rate": 2.191047883871563e-06,
      "loss": 0.0119,
      "step": 2390220
    },
    {
      "epoch": 3.9116801843378304,
      "grad_norm": 0.3524608612060547,
      "learning_rate": 2.190981991658046e-06,
      "loss": 0.0148,
      "step": 2390240
    },
    {
      "epoch": 3.911712914776484,
      "grad_norm": 0.23330745100975037,
      "learning_rate": 2.190916099444529e-06,
      "loss": 0.0151,
      "step": 2390260
    },
    {
      "epoch": 3.911745645215137,
      "grad_norm": 0.13292641937732697,
      "learning_rate": 2.1908502072310117e-06,
      "loss": 0.008,
      "step": 2390280
    },
    {
      "epoch": 3.9117783756537907,
      "grad_norm": 0.3977717161178589,
      "learning_rate": 2.1907843150174944e-06,
      "loss": 0.0095,
      "step": 2390300
    },
    {
      "epoch": 3.911811106092444,
      "grad_norm": 0.24791033565998077,
      "learning_rate": 2.1907184228039776e-06,
      "loss": 0.0084,
      "step": 2390320
    },
    {
      "epoch": 3.911843836531097,
      "grad_norm": 0.2279929220676422,
      "learning_rate": 2.1906525305904603e-06,
      "loss": 0.0079,
      "step": 2390340
    },
    {
      "epoch": 3.9118765669697506,
      "grad_norm": Infinity,
      "learning_rate": 2.190586638376943e-06,
      "loss": 0.0173,
      "step": 2390360
    },
    {
      "epoch": 3.9119092974084038,
      "grad_norm": 0.1290976107120514,
      "learning_rate": 2.190520746163426e-06,
      "loss": 0.0102,
      "step": 2390380
    },
    {
      "epoch": 3.9119420278470574,
      "grad_norm": 0.14267438650131226,
      "learning_rate": 2.190454853949909e-06,
      "loss": 0.0079,
      "step": 2390400
    },
    {
      "epoch": 3.9119747582857105,
      "grad_norm": 0.15706564486026764,
      "learning_rate": 2.1903889617363917e-06,
      "loss": 0.0143,
      "step": 2390420
    },
    {
      "epoch": 3.912007488724364,
      "grad_norm": 0.0970069095492363,
      "learning_rate": 2.190323069522875e-06,
      "loss": 0.0146,
      "step": 2390440
    },
    {
      "epoch": 3.9120402191630173,
      "grad_norm": 1.0007405281066895,
      "learning_rate": 2.1902571773093576e-06,
      "loss": 0.0166,
      "step": 2390460
    },
    {
      "epoch": 3.9120729496016704,
      "grad_norm": 1.631895661354065,
      "learning_rate": 2.1901912850958404e-06,
      "loss": 0.0104,
      "step": 2390480
    },
    {
      "epoch": 3.912105680040324,
      "grad_norm": 0.08230196684598923,
      "learning_rate": 2.190125392882323e-06,
      "loss": 0.0083,
      "step": 2390500
    },
    {
      "epoch": 3.912138410478977,
      "grad_norm": 0.2517302632331848,
      "learning_rate": 2.1900595006688063e-06,
      "loss": 0.0095,
      "step": 2390520
    },
    {
      "epoch": 3.9121711409176307,
      "grad_norm": 0.3901570439338684,
      "learning_rate": 2.189993608455289e-06,
      "loss": 0.0088,
      "step": 2390540
    },
    {
      "epoch": 3.912203871356284,
      "grad_norm": 0.218584343791008,
      "learning_rate": 2.1899277162417717e-06,
      "loss": 0.0107,
      "step": 2390560
    },
    {
      "epoch": 3.9122366017949375,
      "grad_norm": 0.29262128472328186,
      "learning_rate": 2.189861824028255e-06,
      "loss": 0.0125,
      "step": 2390580
    },
    {
      "epoch": 3.9122693322335906,
      "grad_norm": 0.06845201551914215,
      "learning_rate": 2.1897959318147376e-06,
      "loss": 0.0108,
      "step": 2390600
    },
    {
      "epoch": 3.912302062672244,
      "grad_norm": 0.43392112851142883,
      "learning_rate": 2.1897300396012204e-06,
      "loss": 0.0132,
      "step": 2390620
    },
    {
      "epoch": 3.9123347931108974,
      "grad_norm": 0.7982003092765808,
      "learning_rate": 2.1896641473877035e-06,
      "loss": 0.0161,
      "step": 2390640
    },
    {
      "epoch": 3.9123675235495505,
      "grad_norm": 0.6658269166946411,
      "learning_rate": 2.1895982551741863e-06,
      "loss": 0.013,
      "step": 2390660
    },
    {
      "epoch": 3.9124002539882037,
      "grad_norm": 0.06776832789182663,
      "learning_rate": 2.189532362960669e-06,
      "loss": 0.0153,
      "step": 2390680
    },
    {
      "epoch": 3.9124329844268573,
      "grad_norm": 0.31283682584762573,
      "learning_rate": 2.1894664707471518e-06,
      "loss": 0.015,
      "step": 2390700
    },
    {
      "epoch": 3.912465714865511,
      "grad_norm": 0.21523158252239227,
      "learning_rate": 2.189400578533635e-06,
      "loss": 0.0117,
      "step": 2390720
    },
    {
      "epoch": 3.912498445304164,
      "grad_norm": 0.17865407466888428,
      "learning_rate": 2.1893346863201177e-06,
      "loss": 0.0131,
      "step": 2390740
    },
    {
      "epoch": 3.912531175742817,
      "grad_norm": 0.30061066150665283,
      "learning_rate": 2.189268794106601e-06,
      "loss": 0.0096,
      "step": 2390760
    },
    {
      "epoch": 3.9125639061814708,
      "grad_norm": 0.17132194340229034,
      "learning_rate": 2.1892029018930836e-06,
      "loss": 0.0124,
      "step": 2390780
    },
    {
      "epoch": 3.912596636620124,
      "grad_norm": 0.14769777655601501,
      "learning_rate": 2.1891370096795663e-06,
      "loss": 0.0121,
      "step": 2390800
    },
    {
      "epoch": 3.912629367058777,
      "grad_norm": 0.44971731305122375,
      "learning_rate": 2.189071117466049e-06,
      "loss": 0.0096,
      "step": 2390820
    },
    {
      "epoch": 3.9126620974974307,
      "grad_norm": 0.10599413514137268,
      "learning_rate": 2.189005225252532e-06,
      "loss": 0.0068,
      "step": 2390840
    },
    {
      "epoch": 3.9126948279360843,
      "grad_norm": 0.14855222404003143,
      "learning_rate": 2.188939333039015e-06,
      "loss": 0.008,
      "step": 2390860
    },
    {
      "epoch": 3.9127275583747374,
      "grad_norm": 0.1501619517803192,
      "learning_rate": 2.1888734408254977e-06,
      "loss": 0.009,
      "step": 2390880
    },
    {
      "epoch": 3.9127602888133906,
      "grad_norm": 0.32062581181526184,
      "learning_rate": 2.1888075486119804e-06,
      "loss": 0.0097,
      "step": 2390900
    },
    {
      "epoch": 3.912793019252044,
      "grad_norm": 0.17476364970207214,
      "learning_rate": 2.1887416563984636e-06,
      "loss": 0.0099,
      "step": 2390920
    },
    {
      "epoch": 3.9128257496906973,
      "grad_norm": 0.11263874918222427,
      "learning_rate": 2.1886757641849463e-06,
      "loss": 0.0125,
      "step": 2390940
    },
    {
      "epoch": 3.9128584801293504,
      "grad_norm": 0.13621553778648376,
      "learning_rate": 2.1886098719714295e-06,
      "loss": 0.0053,
      "step": 2390960
    },
    {
      "epoch": 3.912891210568004,
      "grad_norm": 0.49329978227615356,
      "learning_rate": 2.1885439797579122e-06,
      "loss": 0.0087,
      "step": 2390980
    },
    {
      "epoch": 3.9129239410066576,
      "grad_norm": 0.24902674555778503,
      "learning_rate": 2.188478087544395e-06,
      "loss": 0.0157,
      "step": 2391000
    },
    {
      "epoch": 3.912956671445311,
      "grad_norm": 0.4737427532672882,
      "learning_rate": 2.1884121953308777e-06,
      "loss": 0.0103,
      "step": 2391020
    },
    {
      "epoch": 3.912989401883964,
      "grad_norm": 0.266225665807724,
      "learning_rate": 2.188346303117361e-06,
      "loss": 0.0104,
      "step": 2391040
    },
    {
      "epoch": 3.9130221323226175,
      "grad_norm": 0.052782945334911346,
      "learning_rate": 2.1882804109038436e-06,
      "loss": 0.0176,
      "step": 2391060
    },
    {
      "epoch": 3.9130548627612707,
      "grad_norm": 0.4616330862045288,
      "learning_rate": 2.1882145186903264e-06,
      "loss": 0.0086,
      "step": 2391080
    },
    {
      "epoch": 3.913087593199924,
      "grad_norm": 0.23248615860939026,
      "learning_rate": 2.1881486264768095e-06,
      "loss": 0.0094,
      "step": 2391100
    },
    {
      "epoch": 3.9131203236385774,
      "grad_norm": 0.32403936982154846,
      "learning_rate": 2.1880827342632923e-06,
      "loss": 0.0091,
      "step": 2391120
    },
    {
      "epoch": 3.913153054077231,
      "grad_norm": 0.18317052721977234,
      "learning_rate": 2.1880168420497754e-06,
      "loss": 0.0088,
      "step": 2391140
    },
    {
      "epoch": 3.913185784515884,
      "grad_norm": 0.2144049108028412,
      "learning_rate": 2.187950949836258e-06,
      "loss": 0.0103,
      "step": 2391160
    },
    {
      "epoch": 3.9132185149545373,
      "grad_norm": 0.11911837756633759,
      "learning_rate": 2.187885057622741e-06,
      "loss": 0.0085,
      "step": 2391180
    },
    {
      "epoch": 3.913251245393191,
      "grad_norm": 0.3128373920917511,
      "learning_rate": 2.1878191654092236e-06,
      "loss": 0.0137,
      "step": 2391200
    },
    {
      "epoch": 3.913283975831844,
      "grad_norm": 0.4904046654701233,
      "learning_rate": 2.1877532731957064e-06,
      "loss": 0.0101,
      "step": 2391220
    },
    {
      "epoch": 3.913316706270497,
      "grad_norm": 0.1277843415737152,
      "learning_rate": 2.1876873809821895e-06,
      "loss": 0.0079,
      "step": 2391240
    },
    {
      "epoch": 3.913349436709151,
      "grad_norm": 0.41058939695358276,
      "learning_rate": 2.1876214887686723e-06,
      "loss": 0.0087,
      "step": 2391260
    },
    {
      "epoch": 3.913382167147804,
      "grad_norm": 0.23388303816318512,
      "learning_rate": 2.1875555965551554e-06,
      "loss": 0.0071,
      "step": 2391280
    },
    {
      "epoch": 3.9134148975864576,
      "grad_norm": 0.09533800184726715,
      "learning_rate": 2.187489704341638e-06,
      "loss": 0.0134,
      "step": 2391300
    },
    {
      "epoch": 3.9134476280251107,
      "grad_norm": 0.2241789996623993,
      "learning_rate": 2.187423812128121e-06,
      "loss": 0.0074,
      "step": 2391320
    },
    {
      "epoch": 3.9134803584637643,
      "grad_norm": 0.41042712330818176,
      "learning_rate": 2.187357919914604e-06,
      "loss": 0.0127,
      "step": 2391340
    },
    {
      "epoch": 3.9135130889024174,
      "grad_norm": 0.3823675811290741,
      "learning_rate": 2.187292027701087e-06,
      "loss": 0.0109,
      "step": 2391360
    },
    {
      "epoch": 3.9135458193410706,
      "grad_norm": 0.1577637940645218,
      "learning_rate": 2.1872261354875696e-06,
      "loss": 0.0119,
      "step": 2391380
    },
    {
      "epoch": 3.913578549779724,
      "grad_norm": 0.35488563776016235,
      "learning_rate": 2.1871602432740523e-06,
      "loss": 0.011,
      "step": 2391400
    },
    {
      "epoch": 3.9136112802183773,
      "grad_norm": 0.14714498817920685,
      "learning_rate": 2.187094351060535e-06,
      "loss": 0.0079,
      "step": 2391420
    },
    {
      "epoch": 3.913644010657031,
      "grad_norm": 0.5758938789367676,
      "learning_rate": 2.187028458847018e-06,
      "loss": 0.0087,
      "step": 2391440
    },
    {
      "epoch": 3.913676741095684,
      "grad_norm": 0.19251209497451782,
      "learning_rate": 2.1869625666335014e-06,
      "loss": 0.0141,
      "step": 2391460
    },
    {
      "epoch": 3.9137094715343377,
      "grad_norm": 0.5301044583320618,
      "learning_rate": 2.186896674419984e-06,
      "loss": 0.0092,
      "step": 2391480
    },
    {
      "epoch": 3.913742201972991,
      "grad_norm": 0.1505805402994156,
      "learning_rate": 2.186830782206467e-06,
      "loss": 0.0103,
      "step": 2391500
    },
    {
      "epoch": 3.913774932411644,
      "grad_norm": 0.15191411972045898,
      "learning_rate": 2.1867648899929496e-06,
      "loss": 0.0103,
      "step": 2391520
    },
    {
      "epoch": 3.9138076628502976,
      "grad_norm": 0.48280003666877747,
      "learning_rate": 2.1866989977794328e-06,
      "loss": 0.0085,
      "step": 2391540
    },
    {
      "epoch": 3.9138403932889507,
      "grad_norm": 0.11692441254854202,
      "learning_rate": 2.1866331055659155e-06,
      "loss": 0.0098,
      "step": 2391560
    },
    {
      "epoch": 3.9138731237276043,
      "grad_norm": 0.7569073438644409,
      "learning_rate": 2.1865672133523982e-06,
      "loss": 0.0154,
      "step": 2391580
    },
    {
      "epoch": 3.9139058541662575,
      "grad_norm": 0.23156720399856567,
      "learning_rate": 2.186501321138881e-06,
      "loss": 0.0064,
      "step": 2391600
    },
    {
      "epoch": 3.913938584604911,
      "grad_norm": 0.5353361368179321,
      "learning_rate": 2.186435428925364e-06,
      "loss": 0.0075,
      "step": 2391620
    },
    {
      "epoch": 3.913971315043564,
      "grad_norm": 0.2902565598487854,
      "learning_rate": 2.186369536711847e-06,
      "loss": 0.0121,
      "step": 2391640
    },
    {
      "epoch": 3.9140040454822174,
      "grad_norm": 0.4069528877735138,
      "learning_rate": 2.18630364449833e-06,
      "loss": 0.0096,
      "step": 2391660
    },
    {
      "epoch": 3.914036775920871,
      "grad_norm": 0.34110361337661743,
      "learning_rate": 2.1862377522848128e-06,
      "loss": 0.0058,
      "step": 2391680
    },
    {
      "epoch": 3.914069506359524,
      "grad_norm": 0.19000022113323212,
      "learning_rate": 2.1861718600712955e-06,
      "loss": 0.0151,
      "step": 2391700
    },
    {
      "epoch": 3.9141022367981777,
      "grad_norm": 0.24822643399238586,
      "learning_rate": 2.1861059678577783e-06,
      "loss": 0.0081,
      "step": 2391720
    },
    {
      "epoch": 3.914134967236831,
      "grad_norm": 0.1070345863699913,
      "learning_rate": 2.1860400756442614e-06,
      "loss": 0.0056,
      "step": 2391740
    },
    {
      "epoch": 3.9141676976754844,
      "grad_norm": 0.14163842797279358,
      "learning_rate": 2.185974183430744e-06,
      "loss": 0.0158,
      "step": 2391760
    },
    {
      "epoch": 3.9142004281141376,
      "grad_norm": 0.3332802355289459,
      "learning_rate": 2.185908291217227e-06,
      "loss": 0.0123,
      "step": 2391780
    },
    {
      "epoch": 3.9142331585527907,
      "grad_norm": 0.8746460676193237,
      "learning_rate": 2.18584239900371e-06,
      "loss": 0.0114,
      "step": 2391800
    },
    {
      "epoch": 3.9142658889914443,
      "grad_norm": 0.21960672736167908,
      "learning_rate": 2.185776506790193e-06,
      "loss": 0.0084,
      "step": 2391820
    },
    {
      "epoch": 3.9142986194300975,
      "grad_norm": 0.23225678503513336,
      "learning_rate": 2.1857106145766755e-06,
      "loss": 0.0126,
      "step": 2391840
    },
    {
      "epoch": 3.914331349868751,
      "grad_norm": 0.3612869679927826,
      "learning_rate": 2.1856447223631587e-06,
      "loss": 0.0132,
      "step": 2391860
    },
    {
      "epoch": 3.9143640803074042,
      "grad_norm": 0.1579728126525879,
      "learning_rate": 2.1855788301496415e-06,
      "loss": 0.01,
      "step": 2391880
    },
    {
      "epoch": 3.914396810746058,
      "grad_norm": 0.08843546360731125,
      "learning_rate": 2.185512937936124e-06,
      "loss": 0.0152,
      "step": 2391900
    },
    {
      "epoch": 3.914429541184711,
      "grad_norm": 0.4994197189807892,
      "learning_rate": 2.185447045722607e-06,
      "loss": 0.0109,
      "step": 2391920
    },
    {
      "epoch": 3.914462271623364,
      "grad_norm": 0.5986570715904236,
      "learning_rate": 2.18538115350909e-06,
      "loss": 0.0106,
      "step": 2391940
    },
    {
      "epoch": 3.9144950020620177,
      "grad_norm": 0.1368211954832077,
      "learning_rate": 2.185315261295573e-06,
      "loss": 0.014,
      "step": 2391960
    },
    {
      "epoch": 3.914527732500671,
      "grad_norm": 0.7681766152381897,
      "learning_rate": 2.185249369082056e-06,
      "loss": 0.0104,
      "step": 2391980
    },
    {
      "epoch": 3.9145604629393245,
      "grad_norm": 0.14808596670627594,
      "learning_rate": 2.1851834768685387e-06,
      "loss": 0.0095,
      "step": 2392000
    },
    {
      "epoch": 3.9145931933779776,
      "grad_norm": 0.22848203778266907,
      "learning_rate": 2.1851175846550215e-06,
      "loss": 0.0134,
      "step": 2392020
    },
    {
      "epoch": 3.914625923816631,
      "grad_norm": 0.10190655291080475,
      "learning_rate": 2.1850516924415042e-06,
      "loss": 0.0167,
      "step": 2392040
    },
    {
      "epoch": 3.9146586542552844,
      "grad_norm": 0.07259612530469894,
      "learning_rate": 2.1849858002279874e-06,
      "loss": 0.0091,
      "step": 2392060
    },
    {
      "epoch": 3.9146913846939375,
      "grad_norm": 0.31695160269737244,
      "learning_rate": 2.18491990801447e-06,
      "loss": 0.0117,
      "step": 2392080
    },
    {
      "epoch": 3.914724115132591,
      "grad_norm": 0.2478339523077011,
      "learning_rate": 2.184854015800953e-06,
      "loss": 0.0104,
      "step": 2392100
    },
    {
      "epoch": 3.9147568455712443,
      "grad_norm": 0.23909053206443787,
      "learning_rate": 2.1847881235874356e-06,
      "loss": 0.0084,
      "step": 2392120
    },
    {
      "epoch": 3.9147895760098974,
      "grad_norm": 0.2940759062767029,
      "learning_rate": 2.1847222313739188e-06,
      "loss": 0.0119,
      "step": 2392140
    },
    {
      "epoch": 3.914822306448551,
      "grad_norm": 0.1807042360305786,
      "learning_rate": 2.1846563391604015e-06,
      "loss": 0.012,
      "step": 2392160
    },
    {
      "epoch": 3.9148550368872046,
      "grad_norm": 0.656525194644928,
      "learning_rate": 2.1845904469468847e-06,
      "loss": 0.0102,
      "step": 2392180
    },
    {
      "epoch": 3.9148877673258577,
      "grad_norm": 0.6873666644096375,
      "learning_rate": 2.1845245547333674e-06,
      "loss": 0.0125,
      "step": 2392200
    },
    {
      "epoch": 3.914920497764511,
      "grad_norm": 0.14296025037765503,
      "learning_rate": 2.18445866251985e-06,
      "loss": 0.0093,
      "step": 2392220
    },
    {
      "epoch": 3.9149532282031645,
      "grad_norm": 0.29377880692481995,
      "learning_rate": 2.184392770306333e-06,
      "loss": 0.0068,
      "step": 2392240
    },
    {
      "epoch": 3.9149859586418176,
      "grad_norm": 0.04694751650094986,
      "learning_rate": 2.184326878092816e-06,
      "loss": 0.0104,
      "step": 2392260
    },
    {
      "epoch": 3.915018689080471,
      "grad_norm": 0.1886940598487854,
      "learning_rate": 2.1842609858792988e-06,
      "loss": 0.0137,
      "step": 2392280
    },
    {
      "epoch": 3.9150514195191244,
      "grad_norm": 0.14834251999855042,
      "learning_rate": 2.1841950936657815e-06,
      "loss": 0.0111,
      "step": 2392300
    },
    {
      "epoch": 3.915084149957778,
      "grad_norm": 0.25659066438674927,
      "learning_rate": 2.1841292014522647e-06,
      "loss": 0.0113,
      "step": 2392320
    },
    {
      "epoch": 3.915116880396431,
      "grad_norm": 0.31183457374572754,
      "learning_rate": 2.1840633092387474e-06,
      "loss": 0.0094,
      "step": 2392340
    },
    {
      "epoch": 3.9151496108350843,
      "grad_norm": 0.3275049328804016,
      "learning_rate": 2.1839974170252306e-06,
      "loss": 0.0083,
      "step": 2392360
    },
    {
      "epoch": 3.915182341273738,
      "grad_norm": 0.25829705595970154,
      "learning_rate": 2.1839315248117133e-06,
      "loss": 0.0076,
      "step": 2392380
    },
    {
      "epoch": 3.915215071712391,
      "grad_norm": 0.1361839324235916,
      "learning_rate": 2.183865632598196e-06,
      "loss": 0.0077,
      "step": 2392400
    },
    {
      "epoch": 3.915247802151044,
      "grad_norm": 0.27770912647247314,
      "learning_rate": 2.183799740384679e-06,
      "loss": 0.0138,
      "step": 2392420
    },
    {
      "epoch": 3.9152805325896978,
      "grad_norm": 0.33106598258018494,
      "learning_rate": 2.1837338481711616e-06,
      "loss": 0.0109,
      "step": 2392440
    },
    {
      "epoch": 3.9153132630283514,
      "grad_norm": 0.08668392896652222,
      "learning_rate": 2.1836679559576447e-06,
      "loss": 0.011,
      "step": 2392460
    },
    {
      "epoch": 3.9153459934670045,
      "grad_norm": 0.2917942702770233,
      "learning_rate": 2.1836020637441275e-06,
      "loss": 0.0107,
      "step": 2392480
    },
    {
      "epoch": 3.9153787239056577,
      "grad_norm": 0.28039997816085815,
      "learning_rate": 2.1835361715306106e-06,
      "loss": 0.0147,
      "step": 2392500
    },
    {
      "epoch": 3.9154114543443113,
      "grad_norm": 0.18017587065696716,
      "learning_rate": 2.1834702793170934e-06,
      "loss": 0.0095,
      "step": 2392520
    },
    {
      "epoch": 3.9154441847829644,
      "grad_norm": 0.18583200871944427,
      "learning_rate": 2.183404387103576e-06,
      "loss": 0.0177,
      "step": 2392540
    },
    {
      "epoch": 3.9154769152216176,
      "grad_norm": 0.1479450762271881,
      "learning_rate": 2.1833384948900593e-06,
      "loss": 0.0102,
      "step": 2392560
    },
    {
      "epoch": 3.915509645660271,
      "grad_norm": 0.18537455797195435,
      "learning_rate": 2.183272602676542e-06,
      "loss": 0.012,
      "step": 2392580
    },
    {
      "epoch": 3.9155423760989247,
      "grad_norm": 0.13504792749881744,
      "learning_rate": 2.1832067104630247e-06,
      "loss": 0.011,
      "step": 2392600
    },
    {
      "epoch": 3.915575106537578,
      "grad_norm": 0.4116010367870331,
      "learning_rate": 2.1831408182495075e-06,
      "loss": 0.0129,
      "step": 2392620
    },
    {
      "epoch": 3.915607836976231,
      "grad_norm": 0.6563143134117126,
      "learning_rate": 2.1830749260359902e-06,
      "loss": 0.0097,
      "step": 2392640
    },
    {
      "epoch": 3.9156405674148846,
      "grad_norm": 0.44696635007858276,
      "learning_rate": 2.1830090338224734e-06,
      "loss": 0.0134,
      "step": 2392660
    },
    {
      "epoch": 3.915673297853538,
      "grad_norm": 0.32474565505981445,
      "learning_rate": 2.1829431416089565e-06,
      "loss": 0.0107,
      "step": 2392680
    },
    {
      "epoch": 3.915706028292191,
      "grad_norm": 0.3203403055667877,
      "learning_rate": 2.1828772493954393e-06,
      "loss": 0.0091,
      "step": 2392700
    },
    {
      "epoch": 3.9157387587308445,
      "grad_norm": 0.7708861827850342,
      "learning_rate": 2.182811357181922e-06,
      "loss": 0.0074,
      "step": 2392720
    },
    {
      "epoch": 3.9157714891694977,
      "grad_norm": 0.2094789296388626,
      "learning_rate": 2.1827454649684048e-06,
      "loss": 0.0079,
      "step": 2392740
    },
    {
      "epoch": 3.9158042196081513,
      "grad_norm": 0.353661447763443,
      "learning_rate": 2.182679572754888e-06,
      "loss": 0.0086,
      "step": 2392760
    },
    {
      "epoch": 3.9158369500468044,
      "grad_norm": 0.0820375606417656,
      "learning_rate": 2.1826136805413707e-06,
      "loss": 0.0074,
      "step": 2392780
    },
    {
      "epoch": 3.915869680485458,
      "grad_norm": 0.06207457184791565,
      "learning_rate": 2.1825477883278534e-06,
      "loss": 0.0095,
      "step": 2392800
    },
    {
      "epoch": 3.915902410924111,
      "grad_norm": 0.2793513834476471,
      "learning_rate": 2.182481896114336e-06,
      "loss": 0.0081,
      "step": 2392820
    },
    {
      "epoch": 3.9159351413627643,
      "grad_norm": 0.2847011983394623,
      "learning_rate": 2.182416003900819e-06,
      "loss": 0.0107,
      "step": 2392840
    },
    {
      "epoch": 3.915967871801418,
      "grad_norm": 0.18262198567390442,
      "learning_rate": 2.182350111687302e-06,
      "loss": 0.0103,
      "step": 2392860
    },
    {
      "epoch": 3.916000602240071,
      "grad_norm": 0.36473414301872253,
      "learning_rate": 2.1822842194737852e-06,
      "loss": 0.009,
      "step": 2392880
    },
    {
      "epoch": 3.9160333326787247,
      "grad_norm": 0.2794612646102905,
      "learning_rate": 2.182218327260268e-06,
      "loss": 0.0064,
      "step": 2392900
    },
    {
      "epoch": 3.916066063117378,
      "grad_norm": 0.22123321890830994,
      "learning_rate": 2.1821524350467507e-06,
      "loss": 0.0107,
      "step": 2392920
    },
    {
      "epoch": 3.9160987935560314,
      "grad_norm": 0.04141898453235626,
      "learning_rate": 2.1820865428332334e-06,
      "loss": 0.0095,
      "step": 2392940
    },
    {
      "epoch": 3.9161315239946846,
      "grad_norm": 0.5940049290657043,
      "learning_rate": 2.1820206506197166e-06,
      "loss": 0.0069,
      "step": 2392960
    },
    {
      "epoch": 3.9161642544333377,
      "grad_norm": 0.32445210218429565,
      "learning_rate": 2.1819547584061993e-06,
      "loss": 0.0071,
      "step": 2392980
    },
    {
      "epoch": 3.9161969848719913,
      "grad_norm": 0.34378039836883545,
      "learning_rate": 2.181888866192682e-06,
      "loss": 0.0116,
      "step": 2393000
    },
    {
      "epoch": 3.9162297153106445,
      "grad_norm": 0.4161940813064575,
      "learning_rate": 2.181822973979165e-06,
      "loss": 0.0102,
      "step": 2393020
    },
    {
      "epoch": 3.916262445749298,
      "grad_norm": 0.424686461687088,
      "learning_rate": 2.181757081765648e-06,
      "loss": 0.0124,
      "step": 2393040
    },
    {
      "epoch": 3.916295176187951,
      "grad_norm": 0.09252534806728363,
      "learning_rate": 2.1816911895521307e-06,
      "loss": 0.0094,
      "step": 2393060
    },
    {
      "epoch": 3.916327906626605,
      "grad_norm": 0.16740110516548157,
      "learning_rate": 2.181625297338614e-06,
      "loss": 0.0106,
      "step": 2393080
    },
    {
      "epoch": 3.916360637065258,
      "grad_norm": 0.07633904367685318,
      "learning_rate": 2.1815594051250966e-06,
      "loss": 0.0108,
      "step": 2393100
    },
    {
      "epoch": 3.916393367503911,
      "grad_norm": 0.27736011147499084,
      "learning_rate": 2.1814935129115794e-06,
      "loss": 0.0094,
      "step": 2393120
    },
    {
      "epoch": 3.9164260979425647,
      "grad_norm": 0.28961196541786194,
      "learning_rate": 2.181427620698062e-06,
      "loss": 0.0095,
      "step": 2393140
    },
    {
      "epoch": 3.916458828381218,
      "grad_norm": 0.24980312585830688,
      "learning_rate": 2.1813617284845453e-06,
      "loss": 0.0106,
      "step": 2393160
    },
    {
      "epoch": 3.9164915588198714,
      "grad_norm": 0.32219886779785156,
      "learning_rate": 2.181295836271028e-06,
      "loss": 0.0089,
      "step": 2393180
    },
    {
      "epoch": 3.9165242892585246,
      "grad_norm": 0.32062625885009766,
      "learning_rate": 2.181229944057511e-06,
      "loss": 0.006,
      "step": 2393200
    },
    {
      "epoch": 3.916557019697178,
      "grad_norm": 0.20125150680541992,
      "learning_rate": 2.181164051843994e-06,
      "loss": 0.0128,
      "step": 2393220
    },
    {
      "epoch": 3.9165897501358313,
      "grad_norm": 0.39822375774383545,
      "learning_rate": 2.1810981596304766e-06,
      "loss": 0.0121,
      "step": 2393240
    },
    {
      "epoch": 3.9166224805744845,
      "grad_norm": 0.19198386371135712,
      "learning_rate": 2.1810322674169594e-06,
      "loss": 0.0086,
      "step": 2393260
    },
    {
      "epoch": 3.916655211013138,
      "grad_norm": 0.621712327003479,
      "learning_rate": 2.1809663752034426e-06,
      "loss": 0.0085,
      "step": 2393280
    },
    {
      "epoch": 3.916687941451791,
      "grad_norm": 0.33889061212539673,
      "learning_rate": 2.1809004829899253e-06,
      "loss": 0.01,
      "step": 2393300
    },
    {
      "epoch": 3.916720671890445,
      "grad_norm": 0.24263840913772583,
      "learning_rate": 2.180834590776408e-06,
      "loss": 0.0123,
      "step": 2393320
    },
    {
      "epoch": 3.916753402329098,
      "grad_norm": 0.7051368355751038,
      "learning_rate": 2.1807686985628908e-06,
      "loss": 0.0137,
      "step": 2393340
    },
    {
      "epoch": 3.9167861327677516,
      "grad_norm": 0.5592025518417358,
      "learning_rate": 2.180702806349374e-06,
      "loss": 0.0091,
      "step": 2393360
    },
    {
      "epoch": 3.9168188632064047,
      "grad_norm": 0.4392834007740021,
      "learning_rate": 2.1806369141358567e-06,
      "loss": 0.0098,
      "step": 2393380
    },
    {
      "epoch": 3.916851593645058,
      "grad_norm": 0.30163589119911194,
      "learning_rate": 2.18057102192234e-06,
      "loss": 0.0144,
      "step": 2393400
    },
    {
      "epoch": 3.9168843240837115,
      "grad_norm": 1.3364166021347046,
      "learning_rate": 2.1805051297088226e-06,
      "loss": 0.0128,
      "step": 2393420
    },
    {
      "epoch": 3.9169170545223646,
      "grad_norm": 0.3148977756500244,
      "learning_rate": 2.1804392374953053e-06,
      "loss": 0.0109,
      "step": 2393440
    },
    {
      "epoch": 3.916949784961018,
      "grad_norm": 0.6353715062141418,
      "learning_rate": 2.180373345281788e-06,
      "loss": 0.0173,
      "step": 2393460
    },
    {
      "epoch": 3.9169825153996713,
      "grad_norm": 0.12747198343276978,
      "learning_rate": 2.1803074530682712e-06,
      "loss": 0.0129,
      "step": 2393480
    },
    {
      "epoch": 3.917015245838325,
      "grad_norm": 0.22117683291435242,
      "learning_rate": 2.180241560854754e-06,
      "loss": 0.0123,
      "step": 2393500
    },
    {
      "epoch": 3.917047976276978,
      "grad_norm": 0.13141655921936035,
      "learning_rate": 2.1801756686412367e-06,
      "loss": 0.0083,
      "step": 2393520
    },
    {
      "epoch": 3.9170807067156312,
      "grad_norm": 0.13366974890232086,
      "learning_rate": 2.1801097764277194e-06,
      "loss": 0.0085,
      "step": 2393540
    },
    {
      "epoch": 3.917113437154285,
      "grad_norm": 0.10771846771240234,
      "learning_rate": 2.1800438842142026e-06,
      "loss": 0.0113,
      "step": 2393560
    },
    {
      "epoch": 3.917146167592938,
      "grad_norm": 0.24167025089263916,
      "learning_rate": 2.1799779920006853e-06,
      "loss": 0.0106,
      "step": 2393580
    },
    {
      "epoch": 3.9171788980315916,
      "grad_norm": 0.7444430589675903,
      "learning_rate": 2.1799120997871685e-06,
      "loss": 0.0149,
      "step": 2393600
    },
    {
      "epoch": 3.9172116284702447,
      "grad_norm": 0.39072883129119873,
      "learning_rate": 2.1798462075736512e-06,
      "loss": 0.0115,
      "step": 2393620
    },
    {
      "epoch": 3.9172443589088983,
      "grad_norm": 0.058380816131830215,
      "learning_rate": 2.179780315360134e-06,
      "loss": 0.0114,
      "step": 2393640
    },
    {
      "epoch": 3.9172770893475515,
      "grad_norm": 0.839819610118866,
      "learning_rate": 2.1797144231466167e-06,
      "loss": 0.0136,
      "step": 2393660
    },
    {
      "epoch": 3.9173098197862046,
      "grad_norm": 0.09368938207626343,
      "learning_rate": 2.1796485309331e-06,
      "loss": 0.0136,
      "step": 2393680
    },
    {
      "epoch": 3.917342550224858,
      "grad_norm": 0.2759885787963867,
      "learning_rate": 2.1795826387195826e-06,
      "loss": 0.0094,
      "step": 2393700
    },
    {
      "epoch": 3.9173752806635114,
      "grad_norm": 0.31227388978004456,
      "learning_rate": 2.1795167465060654e-06,
      "loss": 0.0133,
      "step": 2393720
    },
    {
      "epoch": 3.9174080111021645,
      "grad_norm": 0.34496748447418213,
      "learning_rate": 2.1794508542925485e-06,
      "loss": 0.0096,
      "step": 2393740
    },
    {
      "epoch": 3.917440741540818,
      "grad_norm": 0.24984389543533325,
      "learning_rate": 2.1793849620790313e-06,
      "loss": 0.0106,
      "step": 2393760
    },
    {
      "epoch": 3.9174734719794717,
      "grad_norm": 0.14308872818946838,
      "learning_rate": 2.1793190698655144e-06,
      "loss": 0.0092,
      "step": 2393780
    },
    {
      "epoch": 3.917506202418125,
      "grad_norm": 0.2230343222618103,
      "learning_rate": 2.179253177651997e-06,
      "loss": 0.0087,
      "step": 2393800
    },
    {
      "epoch": 3.917538932856778,
      "grad_norm": 0.19925454258918762,
      "learning_rate": 2.17918728543848e-06,
      "loss": 0.0098,
      "step": 2393820
    },
    {
      "epoch": 3.9175716632954316,
      "grad_norm": 0.22529518604278564,
      "learning_rate": 2.1791213932249627e-06,
      "loss": 0.0147,
      "step": 2393840
    },
    {
      "epoch": 3.9176043937340848,
      "grad_norm": 0.25551483035087585,
      "learning_rate": 2.1790555010114454e-06,
      "loss": 0.0096,
      "step": 2393860
    },
    {
      "epoch": 3.917637124172738,
      "grad_norm": 0.15361319482326508,
      "learning_rate": 2.1789896087979286e-06,
      "loss": 0.0089,
      "step": 2393880
    },
    {
      "epoch": 3.9176698546113915,
      "grad_norm": 0.26386314630508423,
      "learning_rate": 2.1789237165844113e-06,
      "loss": 0.01,
      "step": 2393900
    },
    {
      "epoch": 3.917702585050045,
      "grad_norm": 0.11573483794927597,
      "learning_rate": 2.1788578243708945e-06,
      "loss": 0.0099,
      "step": 2393920
    },
    {
      "epoch": 3.9177353154886982,
      "grad_norm": 0.18022876977920532,
      "learning_rate": 2.178791932157377e-06,
      "loss": 0.0078,
      "step": 2393940
    },
    {
      "epoch": 3.9177680459273514,
      "grad_norm": 0.14686362445354462,
      "learning_rate": 2.17872603994386e-06,
      "loss": 0.0122,
      "step": 2393960
    },
    {
      "epoch": 3.917800776366005,
      "grad_norm": 0.1497737318277359,
      "learning_rate": 2.178660147730343e-06,
      "loss": 0.0081,
      "step": 2393980
    },
    {
      "epoch": 3.917833506804658,
      "grad_norm": 0.13040299713611603,
      "learning_rate": 2.178594255516826e-06,
      "loss": 0.0142,
      "step": 2394000
    },
    {
      "epoch": 3.9178662372433113,
      "grad_norm": 0.7153228521347046,
      "learning_rate": 2.1785283633033086e-06,
      "loss": 0.0134,
      "step": 2394020
    },
    {
      "epoch": 3.917898967681965,
      "grad_norm": 0.17467904090881348,
      "learning_rate": 2.1784624710897913e-06,
      "loss": 0.0173,
      "step": 2394040
    },
    {
      "epoch": 3.9179316981206185,
      "grad_norm": 0.13146856427192688,
      "learning_rate": 2.178396578876274e-06,
      "loss": 0.0085,
      "step": 2394060
    },
    {
      "epoch": 3.9179644285592716,
      "grad_norm": 0.1896972507238388,
      "learning_rate": 2.1783306866627572e-06,
      "loss": 0.0114,
      "step": 2394080
    },
    {
      "epoch": 3.917997158997925,
      "grad_norm": 0.2256801426410675,
      "learning_rate": 2.1782647944492404e-06,
      "loss": 0.009,
      "step": 2394100
    },
    {
      "epoch": 3.9180298894365784,
      "grad_norm": 0.06236131116747856,
      "learning_rate": 2.178198902235723e-06,
      "loss": 0.009,
      "step": 2394120
    },
    {
      "epoch": 3.9180626198752315,
      "grad_norm": 0.06927367299795151,
      "learning_rate": 2.178133010022206e-06,
      "loss": 0.0093,
      "step": 2394140
    },
    {
      "epoch": 3.9180953503138847,
      "grad_norm": 0.3638014495372772,
      "learning_rate": 2.1780671178086886e-06,
      "loss": 0.0124,
      "step": 2394160
    },
    {
      "epoch": 3.9181280807525383,
      "grad_norm": 0.21319971978664398,
      "learning_rate": 2.1780012255951718e-06,
      "loss": 0.0131,
      "step": 2394180
    },
    {
      "epoch": 3.918160811191192,
      "grad_norm": 0.10064046084880829,
      "learning_rate": 2.1779353333816545e-06,
      "loss": 0.0115,
      "step": 2394200
    },
    {
      "epoch": 3.918193541629845,
      "grad_norm": 0.20572878420352936,
      "learning_rate": 2.1778694411681372e-06,
      "loss": 0.0084,
      "step": 2394220
    },
    {
      "epoch": 3.918226272068498,
      "grad_norm": 0.2039719969034195,
      "learning_rate": 2.17780354895462e-06,
      "loss": 0.0086,
      "step": 2394240
    },
    {
      "epoch": 3.9182590025071518,
      "grad_norm": 0.6081520318984985,
      "learning_rate": 2.177737656741103e-06,
      "loss": 0.0159,
      "step": 2394260
    },
    {
      "epoch": 3.918291732945805,
      "grad_norm": 0.043867748230695724,
      "learning_rate": 2.177671764527586e-06,
      "loss": 0.0082,
      "step": 2394280
    },
    {
      "epoch": 3.918324463384458,
      "grad_norm": 0.45438775420188904,
      "learning_rate": 2.177605872314069e-06,
      "loss": 0.0162,
      "step": 2394300
    },
    {
      "epoch": 3.9183571938231117,
      "grad_norm": 0.24732156097888947,
      "learning_rate": 2.177539980100552e-06,
      "loss": 0.0127,
      "step": 2394320
    },
    {
      "epoch": 3.918389924261765,
      "grad_norm": 0.11567056179046631,
      "learning_rate": 2.1774740878870345e-06,
      "loss": 0.0102,
      "step": 2394340
    },
    {
      "epoch": 3.9184226547004184,
      "grad_norm": 0.3135235607624054,
      "learning_rate": 2.1774081956735173e-06,
      "loss": 0.0095,
      "step": 2394360
    },
    {
      "epoch": 3.9184553851390715,
      "grad_norm": 0.21172182261943817,
      "learning_rate": 2.1773423034600004e-06,
      "loss": 0.0099,
      "step": 2394380
    },
    {
      "epoch": 3.918488115577725,
      "grad_norm": 0.17999699711799622,
      "learning_rate": 2.177276411246483e-06,
      "loss": 0.0084,
      "step": 2394400
    },
    {
      "epoch": 3.9185208460163783,
      "grad_norm": 0.2629643678665161,
      "learning_rate": 2.177210519032966e-06,
      "loss": 0.0107,
      "step": 2394420
    },
    {
      "epoch": 3.9185535764550314,
      "grad_norm": 0.08933516591787338,
      "learning_rate": 2.177144626819449e-06,
      "loss": 0.0065,
      "step": 2394440
    },
    {
      "epoch": 3.918586306893685,
      "grad_norm": 0.41073471307754517,
      "learning_rate": 2.177078734605932e-06,
      "loss": 0.0106,
      "step": 2394460
    },
    {
      "epoch": 3.918619037332338,
      "grad_norm": 0.08176901936531067,
      "learning_rate": 2.1770128423924146e-06,
      "loss": 0.0131,
      "step": 2394480
    },
    {
      "epoch": 3.918651767770992,
      "grad_norm": 0.6617550849914551,
      "learning_rate": 2.1769469501788977e-06,
      "loss": 0.0111,
      "step": 2394500
    },
    {
      "epoch": 3.918684498209645,
      "grad_norm": 0.28620800375938416,
      "learning_rate": 2.1768810579653805e-06,
      "loss": 0.0074,
      "step": 2394520
    },
    {
      "epoch": 3.9187172286482985,
      "grad_norm": 0.08749471604824066,
      "learning_rate": 2.176815165751863e-06,
      "loss": 0.0118,
      "step": 2394540
    },
    {
      "epoch": 3.9187499590869517,
      "grad_norm": 0.06089315935969353,
      "learning_rate": 2.176749273538346e-06,
      "loss": 0.0096,
      "step": 2394560
    },
    {
      "epoch": 3.918782689525605,
      "grad_norm": 0.10697468370199203,
      "learning_rate": 2.176683381324829e-06,
      "loss": 0.016,
      "step": 2394580
    },
    {
      "epoch": 3.9188154199642584,
      "grad_norm": 0.2766834497451782,
      "learning_rate": 2.176617489111312e-06,
      "loss": 0.0138,
      "step": 2394600
    },
    {
      "epoch": 3.9188481504029116,
      "grad_norm": 0.47166526317596436,
      "learning_rate": 2.176551596897795e-06,
      "loss": 0.011,
      "step": 2394620
    },
    {
      "epoch": 3.918880880841565,
      "grad_norm": 0.3477415144443512,
      "learning_rate": 2.1764857046842777e-06,
      "loss": 0.0116,
      "step": 2394640
    },
    {
      "epoch": 3.9189136112802183,
      "grad_norm": 0.36003318428993225,
      "learning_rate": 2.1764198124707605e-06,
      "loss": 0.0132,
      "step": 2394660
    },
    {
      "epoch": 3.918946341718872,
      "grad_norm": 0.2894909381866455,
      "learning_rate": 2.1763539202572432e-06,
      "loss": 0.0074,
      "step": 2394680
    },
    {
      "epoch": 3.918979072157525,
      "grad_norm": 0.19675543904304504,
      "learning_rate": 2.1762880280437264e-06,
      "loss": 0.0125,
      "step": 2394700
    },
    {
      "epoch": 3.919011802596178,
      "grad_norm": 0.32818272709846497,
      "learning_rate": 2.176222135830209e-06,
      "loss": 0.01,
      "step": 2394720
    },
    {
      "epoch": 3.919044533034832,
      "grad_norm": 0.29861995577812195,
      "learning_rate": 2.176156243616692e-06,
      "loss": 0.0116,
      "step": 2394740
    },
    {
      "epoch": 3.919077263473485,
      "grad_norm": 0.22541163861751556,
      "learning_rate": 2.1760903514031746e-06,
      "loss": 0.0108,
      "step": 2394760
    },
    {
      "epoch": 3.9191099939121385,
      "grad_norm": 0.5948250889778137,
      "learning_rate": 2.1760244591896578e-06,
      "loss": 0.0105,
      "step": 2394780
    },
    {
      "epoch": 3.9191427243507917,
      "grad_norm": 0.21059750020503998,
      "learning_rate": 2.1759585669761405e-06,
      "loss": 0.0142,
      "step": 2394800
    },
    {
      "epoch": 3.9191754547894453,
      "grad_norm": 0.26272422075271606,
      "learning_rate": 2.1758926747626237e-06,
      "loss": 0.0088,
      "step": 2394820
    },
    {
      "epoch": 3.9192081852280984,
      "grad_norm": 0.2834343910217285,
      "learning_rate": 2.1758267825491064e-06,
      "loss": 0.0094,
      "step": 2394840
    },
    {
      "epoch": 3.9192409156667516,
      "grad_norm": 0.1198703795671463,
      "learning_rate": 2.175760890335589e-06,
      "loss": 0.0141,
      "step": 2394860
    },
    {
      "epoch": 3.919273646105405,
      "grad_norm": 0.23689259588718414,
      "learning_rate": 2.175694998122072e-06,
      "loss": 0.0105,
      "step": 2394880
    },
    {
      "epoch": 3.9193063765440583,
      "grad_norm": 0.5547754168510437,
      "learning_rate": 2.175629105908555e-06,
      "loss": 0.0085,
      "step": 2394900
    },
    {
      "epoch": 3.919339106982712,
      "grad_norm": 0.44478124380111694,
      "learning_rate": 2.175563213695038e-06,
      "loss": 0.0078,
      "step": 2394920
    },
    {
      "epoch": 3.919371837421365,
      "grad_norm": 0.22860726714134216,
      "learning_rate": 2.1754973214815205e-06,
      "loss": 0.0117,
      "step": 2394940
    },
    {
      "epoch": 3.9194045678600187,
      "grad_norm": 0.21803097426891327,
      "learning_rate": 2.1754314292680037e-06,
      "loss": 0.0147,
      "step": 2394960
    },
    {
      "epoch": 3.919437298298672,
      "grad_norm": 0.13574503362178802,
      "learning_rate": 2.1753655370544864e-06,
      "loss": 0.0122,
      "step": 2394980
    },
    {
      "epoch": 3.919470028737325,
      "grad_norm": 0.25355395674705505,
      "learning_rate": 2.1752996448409696e-06,
      "loss": 0.0113,
      "step": 2395000
    },
    {
      "epoch": 3.9195027591759786,
      "grad_norm": 0.18351255357265472,
      "learning_rate": 2.1752337526274523e-06,
      "loss": 0.0055,
      "step": 2395020
    },
    {
      "epoch": 3.9195354896146317,
      "grad_norm": 0.22628571093082428,
      "learning_rate": 2.175167860413935e-06,
      "loss": 0.0108,
      "step": 2395040
    },
    {
      "epoch": 3.9195682200532853,
      "grad_norm": 0.7176032066345215,
      "learning_rate": 2.175101968200418e-06,
      "loss": 0.0123,
      "step": 2395060
    },
    {
      "epoch": 3.9196009504919385,
      "grad_norm": 0.31702494621276855,
      "learning_rate": 2.1750360759869006e-06,
      "loss": 0.011,
      "step": 2395080
    },
    {
      "epoch": 3.919633680930592,
      "grad_norm": 0.26503273844718933,
      "learning_rate": 2.1749701837733837e-06,
      "loss": 0.01,
      "step": 2395100
    },
    {
      "epoch": 3.919666411369245,
      "grad_norm": 0.24542225897312164,
      "learning_rate": 2.1749042915598665e-06,
      "loss": 0.009,
      "step": 2395120
    },
    {
      "epoch": 3.9196991418078984,
      "grad_norm": 0.7410097718238831,
      "learning_rate": 2.1748383993463496e-06,
      "loss": 0.0185,
      "step": 2395140
    },
    {
      "epoch": 3.919731872246552,
      "grad_norm": 0.2243773341178894,
      "learning_rate": 2.1747725071328324e-06,
      "loss": 0.0214,
      "step": 2395160
    },
    {
      "epoch": 3.919764602685205,
      "grad_norm": 0.2669880986213684,
      "learning_rate": 2.174706614919315e-06,
      "loss": 0.0089,
      "step": 2395180
    },
    {
      "epoch": 3.9197973331238583,
      "grad_norm": 0.22428089380264282,
      "learning_rate": 2.1746407227057983e-06,
      "loss": 0.0107,
      "step": 2395200
    },
    {
      "epoch": 3.919830063562512,
      "grad_norm": 0.07913626730442047,
      "learning_rate": 2.174574830492281e-06,
      "loss": 0.0106,
      "step": 2395220
    },
    {
      "epoch": 3.9198627940011654,
      "grad_norm": 0.24278146028518677,
      "learning_rate": 2.1745089382787638e-06,
      "loss": 0.0135,
      "step": 2395240
    },
    {
      "epoch": 3.9198955244398186,
      "grad_norm": 0.28740981221199036,
      "learning_rate": 2.1744430460652465e-06,
      "loss": 0.0093,
      "step": 2395260
    },
    {
      "epoch": 3.9199282548784717,
      "grad_norm": 0.7294576168060303,
      "learning_rate": 2.1743771538517292e-06,
      "loss": 0.0124,
      "step": 2395280
    },
    {
      "epoch": 3.9199609853171253,
      "grad_norm": 0.14931422472000122,
      "learning_rate": 2.1743112616382124e-06,
      "loss": 0.0082,
      "step": 2395300
    },
    {
      "epoch": 3.9199937157557785,
      "grad_norm": 0.40258681774139404,
      "learning_rate": 2.1742453694246956e-06,
      "loss": 0.0132,
      "step": 2395320
    },
    {
      "epoch": 3.9200264461944316,
      "grad_norm": 0.2985352873802185,
      "learning_rate": 2.1741794772111783e-06,
      "loss": 0.0082,
      "step": 2395340
    },
    {
      "epoch": 3.9200591766330852,
      "grad_norm": 0.07072415202856064,
      "learning_rate": 2.174113584997661e-06,
      "loss": 0.0151,
      "step": 2395360
    },
    {
      "epoch": 3.920091907071739,
      "grad_norm": 0.10880734771490097,
      "learning_rate": 2.1740476927841438e-06,
      "loss": 0.008,
      "step": 2395380
    },
    {
      "epoch": 3.920124637510392,
      "grad_norm": 0.29532909393310547,
      "learning_rate": 2.173981800570627e-06,
      "loss": 0.0081,
      "step": 2395400
    },
    {
      "epoch": 3.920157367949045,
      "grad_norm": 0.17438067495822906,
      "learning_rate": 2.1739159083571097e-06,
      "loss": 0.0104,
      "step": 2395420
    },
    {
      "epoch": 3.9201900983876987,
      "grad_norm": 0.36264336109161377,
      "learning_rate": 2.1738500161435924e-06,
      "loss": 0.0091,
      "step": 2395440
    },
    {
      "epoch": 3.920222828826352,
      "grad_norm": 0.2733296751976013,
      "learning_rate": 2.173784123930075e-06,
      "loss": 0.0083,
      "step": 2395460
    },
    {
      "epoch": 3.920255559265005,
      "grad_norm": 0.1762205958366394,
      "learning_rate": 2.173718231716558e-06,
      "loss": 0.0136,
      "step": 2395480
    },
    {
      "epoch": 3.9202882897036586,
      "grad_norm": 0.14338435232639313,
      "learning_rate": 2.173652339503041e-06,
      "loss": 0.0089,
      "step": 2395500
    },
    {
      "epoch": 3.920321020142312,
      "grad_norm": 0.521020233631134,
      "learning_rate": 2.1735864472895242e-06,
      "loss": 0.0105,
      "step": 2395520
    },
    {
      "epoch": 3.9203537505809654,
      "grad_norm": 0.237446591258049,
      "learning_rate": 2.173520555076007e-06,
      "loss": 0.0084,
      "step": 2395540
    },
    {
      "epoch": 3.9203864810196185,
      "grad_norm": 0.6972219944000244,
      "learning_rate": 2.1734546628624897e-06,
      "loss": 0.0141,
      "step": 2395560
    },
    {
      "epoch": 3.920419211458272,
      "grad_norm": 0.09559671580791473,
      "learning_rate": 2.1733887706489724e-06,
      "loss": 0.0078,
      "step": 2395580
    },
    {
      "epoch": 3.9204519418969253,
      "grad_norm": 0.089995838701725,
      "learning_rate": 2.1733228784354556e-06,
      "loss": 0.0077,
      "step": 2395600
    },
    {
      "epoch": 3.9204846723355784,
      "grad_norm": 0.28535425662994385,
      "learning_rate": 2.1732569862219383e-06,
      "loss": 0.0077,
      "step": 2395620
    },
    {
      "epoch": 3.920517402774232,
      "grad_norm": 0.719852089881897,
      "learning_rate": 2.173191094008421e-06,
      "loss": 0.0157,
      "step": 2395640
    },
    {
      "epoch": 3.9205501332128856,
      "grad_norm": 0.1524173468351364,
      "learning_rate": 2.173125201794904e-06,
      "loss": 0.0068,
      "step": 2395660
    },
    {
      "epoch": 3.9205828636515387,
      "grad_norm": 0.22753465175628662,
      "learning_rate": 2.173059309581387e-06,
      "loss": 0.0095,
      "step": 2395680
    },
    {
      "epoch": 3.920615594090192,
      "grad_norm": 0.46338987350463867,
      "learning_rate": 2.1729934173678697e-06,
      "loss": 0.0101,
      "step": 2395700
    },
    {
      "epoch": 3.9206483245288455,
      "grad_norm": 0.17875568568706512,
      "learning_rate": 2.172927525154353e-06,
      "loss": 0.0089,
      "step": 2395720
    },
    {
      "epoch": 3.9206810549674986,
      "grad_norm": 0.3437552750110626,
      "learning_rate": 2.1728616329408356e-06,
      "loss": 0.0134,
      "step": 2395740
    },
    {
      "epoch": 3.920713785406152,
      "grad_norm": 0.264954537153244,
      "learning_rate": 2.1727957407273184e-06,
      "loss": 0.0154,
      "step": 2395760
    },
    {
      "epoch": 3.9207465158448054,
      "grad_norm": 0.2099827080965042,
      "learning_rate": 2.172729848513801e-06,
      "loss": 0.0056,
      "step": 2395780
    },
    {
      "epoch": 3.9207792462834585,
      "grad_norm": 0.30179262161254883,
      "learning_rate": 2.1726639563002843e-06,
      "loss": 0.0138,
      "step": 2395800
    },
    {
      "epoch": 3.920811976722112,
      "grad_norm": 0.10526791214942932,
      "learning_rate": 2.172598064086767e-06,
      "loss": 0.0089,
      "step": 2395820
    },
    {
      "epoch": 3.9208447071607653,
      "grad_norm": 0.3000865578651428,
      "learning_rate": 2.17253217187325e-06,
      "loss": 0.0094,
      "step": 2395840
    },
    {
      "epoch": 3.920877437599419,
      "grad_norm": 0.12057386338710785,
      "learning_rate": 2.172466279659733e-06,
      "loss": 0.01,
      "step": 2395860
    },
    {
      "epoch": 3.920910168038072,
      "grad_norm": 0.23591294884681702,
      "learning_rate": 2.1724003874462157e-06,
      "loss": 0.011,
      "step": 2395880
    },
    {
      "epoch": 3.920942898476725,
      "grad_norm": 0.07801911979913712,
      "learning_rate": 2.1723344952326984e-06,
      "loss": 0.007,
      "step": 2395900
    },
    {
      "epoch": 3.9209756289153788,
      "grad_norm": 0.20373103022575378,
      "learning_rate": 2.1722686030191816e-06,
      "loss": 0.0108,
      "step": 2395920
    },
    {
      "epoch": 3.921008359354032,
      "grad_norm": 0.24711570143699646,
      "learning_rate": 2.1722027108056643e-06,
      "loss": 0.015,
      "step": 2395940
    },
    {
      "epoch": 3.9210410897926855,
      "grad_norm": 0.08020622283220291,
      "learning_rate": 2.172136818592147e-06,
      "loss": 0.009,
      "step": 2395960
    },
    {
      "epoch": 3.9210738202313387,
      "grad_norm": 0.4420895576477051,
      "learning_rate": 2.1720709263786298e-06,
      "loss": 0.0082,
      "step": 2395980
    },
    {
      "epoch": 3.9211065506699923,
      "grad_norm": 0.227762371301651,
      "learning_rate": 2.172005034165113e-06,
      "loss": 0.0125,
      "step": 2396000
    },
    {
      "epoch": 3.9211392811086454,
      "grad_norm": 0.4006327986717224,
      "learning_rate": 2.1719391419515957e-06,
      "loss": 0.0122,
      "step": 2396020
    },
    {
      "epoch": 3.9211720115472986,
      "grad_norm": 0.4878242313861847,
      "learning_rate": 2.171873249738079e-06,
      "loss": 0.0144,
      "step": 2396040
    },
    {
      "epoch": 3.921204741985952,
      "grad_norm": 0.7175260782241821,
      "learning_rate": 2.1718073575245616e-06,
      "loss": 0.0177,
      "step": 2396060
    },
    {
      "epoch": 3.9212374724246053,
      "grad_norm": 0.33598047494888306,
      "learning_rate": 2.1717414653110443e-06,
      "loss": 0.015,
      "step": 2396080
    },
    {
      "epoch": 3.921270202863259,
      "grad_norm": 0.7629652619361877,
      "learning_rate": 2.171675573097527e-06,
      "loss": 0.0115,
      "step": 2396100
    },
    {
      "epoch": 3.921302933301912,
      "grad_norm": 0.1571105271577835,
      "learning_rate": 2.1716096808840102e-06,
      "loss": 0.0133,
      "step": 2396120
    },
    {
      "epoch": 3.9213356637405656,
      "grad_norm": 0.11006996035575867,
      "learning_rate": 2.171543788670493e-06,
      "loss": 0.0087,
      "step": 2396140
    },
    {
      "epoch": 3.921368394179219,
      "grad_norm": 0.18940068781375885,
      "learning_rate": 2.1714778964569757e-06,
      "loss": 0.0083,
      "step": 2396160
    },
    {
      "epoch": 3.921401124617872,
      "grad_norm": 0.20041801035404205,
      "learning_rate": 2.1714120042434584e-06,
      "loss": 0.0119,
      "step": 2396180
    },
    {
      "epoch": 3.9214338550565255,
      "grad_norm": 0.580748438835144,
      "learning_rate": 2.1713461120299416e-06,
      "loss": 0.0123,
      "step": 2396200
    },
    {
      "epoch": 3.9214665854951787,
      "grad_norm": 0.12828919291496277,
      "learning_rate": 2.1712802198164244e-06,
      "loss": 0.0062,
      "step": 2396220
    },
    {
      "epoch": 3.9214993159338323,
      "grad_norm": 0.3707471489906311,
      "learning_rate": 2.1712143276029075e-06,
      "loss": 0.0096,
      "step": 2396240
    },
    {
      "epoch": 3.9215320463724854,
      "grad_norm": 0.2411283254623413,
      "learning_rate": 2.1711484353893903e-06,
      "loss": 0.0098,
      "step": 2396260
    },
    {
      "epoch": 3.921564776811139,
      "grad_norm": 0.1749683916568756,
      "learning_rate": 2.171082543175873e-06,
      "loss": 0.01,
      "step": 2396280
    },
    {
      "epoch": 3.921597507249792,
      "grad_norm": 0.46077990531921387,
      "learning_rate": 2.1710166509623557e-06,
      "loss": 0.022,
      "step": 2396300
    },
    {
      "epoch": 3.9216302376884453,
      "grad_norm": 0.1803942769765854,
      "learning_rate": 2.170950758748839e-06,
      "loss": 0.0149,
      "step": 2396320
    },
    {
      "epoch": 3.921662968127099,
      "grad_norm": 0.10864365100860596,
      "learning_rate": 2.1708848665353216e-06,
      "loss": 0.0081,
      "step": 2396340
    },
    {
      "epoch": 3.921695698565752,
      "grad_norm": 0.17699405550956726,
      "learning_rate": 2.1708189743218044e-06,
      "loss": 0.0092,
      "step": 2396360
    },
    {
      "epoch": 3.9217284290044057,
      "grad_norm": 0.06910896301269531,
      "learning_rate": 2.1707530821082875e-06,
      "loss": 0.0083,
      "step": 2396380
    },
    {
      "epoch": 3.921761159443059,
      "grad_norm": 0.3546658456325531,
      "learning_rate": 2.1706871898947703e-06,
      "loss": 0.0094,
      "step": 2396400
    },
    {
      "epoch": 3.9217938898817124,
      "grad_norm": 0.29160618782043457,
      "learning_rate": 2.1706212976812534e-06,
      "loss": 0.0085,
      "step": 2396420
    },
    {
      "epoch": 3.9218266203203656,
      "grad_norm": 0.2011483907699585,
      "learning_rate": 2.170555405467736e-06,
      "loss": 0.0087,
      "step": 2396440
    },
    {
      "epoch": 3.9218593507590187,
      "grad_norm": 0.26267319917678833,
      "learning_rate": 2.170489513254219e-06,
      "loss": 0.0098,
      "step": 2396460
    },
    {
      "epoch": 3.9218920811976723,
      "grad_norm": 0.32262465357780457,
      "learning_rate": 2.1704236210407017e-06,
      "loss": 0.0107,
      "step": 2396480
    },
    {
      "epoch": 3.9219248116363254,
      "grad_norm": 0.052719682455062866,
      "learning_rate": 2.1703577288271844e-06,
      "loss": 0.0148,
      "step": 2396500
    },
    {
      "epoch": 3.921957542074979,
      "grad_norm": 0.14782360196113586,
      "learning_rate": 2.1702918366136676e-06,
      "loss": 0.0138,
      "step": 2396520
    },
    {
      "epoch": 3.921990272513632,
      "grad_norm": 0.12148785591125488,
      "learning_rate": 2.1702259444001503e-06,
      "loss": 0.0118,
      "step": 2396540
    },
    {
      "epoch": 3.922023002952286,
      "grad_norm": 0.4265863299369812,
      "learning_rate": 2.1701600521866335e-06,
      "loss": 0.0123,
      "step": 2396560
    },
    {
      "epoch": 3.922055733390939,
      "grad_norm": 1.150581955909729,
      "learning_rate": 2.170094159973116e-06,
      "loss": 0.0108,
      "step": 2396580
    },
    {
      "epoch": 3.922088463829592,
      "grad_norm": 0.2139262855052948,
      "learning_rate": 2.170028267759599e-06,
      "loss": 0.0111,
      "step": 2396600
    },
    {
      "epoch": 3.9221211942682457,
      "grad_norm": 0.15723201632499695,
      "learning_rate": 2.169962375546082e-06,
      "loss": 0.0105,
      "step": 2396620
    },
    {
      "epoch": 3.922153924706899,
      "grad_norm": 0.3718652129173279,
      "learning_rate": 2.169896483332565e-06,
      "loss": 0.0112,
      "step": 2396640
    },
    {
      "epoch": 3.9221866551455524,
      "grad_norm": 0.09244776517152786,
      "learning_rate": 2.1698305911190476e-06,
      "loss": 0.008,
      "step": 2396660
    },
    {
      "epoch": 3.9222193855842056,
      "grad_norm": 0.2147853523492813,
      "learning_rate": 2.1697646989055303e-06,
      "loss": 0.0141,
      "step": 2396680
    },
    {
      "epoch": 3.922252116022859,
      "grad_norm": 0.30520525574684143,
      "learning_rate": 2.169698806692013e-06,
      "loss": 0.0093,
      "step": 2396700
    },
    {
      "epoch": 3.9222848464615123,
      "grad_norm": 0.3729672431945801,
      "learning_rate": 2.1696329144784962e-06,
      "loss": 0.0093,
      "step": 2396720
    },
    {
      "epoch": 3.9223175769001655,
      "grad_norm": 0.2315363585948944,
      "learning_rate": 2.1695670222649794e-06,
      "loss": 0.0079,
      "step": 2396740
    },
    {
      "epoch": 3.922350307338819,
      "grad_norm": 0.44397181272506714,
      "learning_rate": 2.169501130051462e-06,
      "loss": 0.0139,
      "step": 2396760
    },
    {
      "epoch": 3.922383037777472,
      "grad_norm": 0.2765136659145355,
      "learning_rate": 2.169435237837945e-06,
      "loss": 0.007,
      "step": 2396780
    },
    {
      "epoch": 3.9224157682161254,
      "grad_norm": 0.7056474089622498,
      "learning_rate": 2.1693693456244276e-06,
      "loss": 0.011,
      "step": 2396800
    },
    {
      "epoch": 3.922448498654779,
      "grad_norm": 0.2765047550201416,
      "learning_rate": 2.1693034534109108e-06,
      "loss": 0.0165,
      "step": 2396820
    },
    {
      "epoch": 3.9224812290934326,
      "grad_norm": 0.610609233379364,
      "learning_rate": 2.1692375611973935e-06,
      "loss": 0.0097,
      "step": 2396840
    },
    {
      "epoch": 3.9225139595320857,
      "grad_norm": 0.10740012675523758,
      "learning_rate": 2.1691716689838763e-06,
      "loss": 0.0114,
      "step": 2396860
    },
    {
      "epoch": 3.922546689970739,
      "grad_norm": 0.20175693929195404,
      "learning_rate": 2.169105776770359e-06,
      "loss": 0.0075,
      "step": 2396880
    },
    {
      "epoch": 3.9225794204093924,
      "grad_norm": 0.571114718914032,
      "learning_rate": 2.169039884556842e-06,
      "loss": 0.0081,
      "step": 2396900
    },
    {
      "epoch": 3.9226121508480456,
      "grad_norm": 0.2805044651031494,
      "learning_rate": 2.168973992343325e-06,
      "loss": 0.0076,
      "step": 2396920
    },
    {
      "epoch": 3.9226448812866987,
      "grad_norm": 0.1982349306344986,
      "learning_rate": 2.168908100129808e-06,
      "loss": 0.0096,
      "step": 2396940
    },
    {
      "epoch": 3.9226776117253523,
      "grad_norm": 0.09310346096754074,
      "learning_rate": 2.168842207916291e-06,
      "loss": 0.0131,
      "step": 2396960
    },
    {
      "epoch": 3.922710342164006,
      "grad_norm": 0.21710596978664398,
      "learning_rate": 2.1687763157027735e-06,
      "loss": 0.009,
      "step": 2396980
    },
    {
      "epoch": 3.922743072602659,
      "grad_norm": 0.3511079251766205,
      "learning_rate": 2.1687104234892563e-06,
      "loss": 0.0144,
      "step": 2397000
    },
    {
      "epoch": 3.9227758030413122,
      "grad_norm": 0.390369713306427,
      "learning_rate": 2.1686445312757394e-06,
      "loss": 0.0094,
      "step": 2397020
    },
    {
      "epoch": 3.922808533479966,
      "grad_norm": 0.14026693999767303,
      "learning_rate": 2.168578639062222e-06,
      "loss": 0.0079,
      "step": 2397040
    },
    {
      "epoch": 3.922841263918619,
      "grad_norm": 0.14259018003940582,
      "learning_rate": 2.168512746848705e-06,
      "loss": 0.0075,
      "step": 2397060
    },
    {
      "epoch": 3.922873994357272,
      "grad_norm": 0.5372412204742432,
      "learning_rate": 2.168446854635188e-06,
      "loss": 0.0194,
      "step": 2397080
    },
    {
      "epoch": 3.9229067247959257,
      "grad_norm": 0.08929107338190079,
      "learning_rate": 2.168380962421671e-06,
      "loss": 0.013,
      "step": 2397100
    },
    {
      "epoch": 3.9229394552345793,
      "grad_norm": 0.8154314756393433,
      "learning_rate": 2.1683150702081536e-06,
      "loss": 0.0158,
      "step": 2397120
    },
    {
      "epoch": 3.9229721856732325,
      "grad_norm": 0.07369760423898697,
      "learning_rate": 2.1682491779946367e-06,
      "loss": 0.0128,
      "step": 2397140
    },
    {
      "epoch": 3.9230049161118856,
      "grad_norm": 0.2619105875492096,
      "learning_rate": 2.1681832857811195e-06,
      "loss": 0.0158,
      "step": 2397160
    },
    {
      "epoch": 3.923037646550539,
      "grad_norm": 0.14449764788150787,
      "learning_rate": 2.1681173935676022e-06,
      "loss": 0.0084,
      "step": 2397180
    },
    {
      "epoch": 3.9230703769891924,
      "grad_norm": 0.20595939457416534,
      "learning_rate": 2.168051501354085e-06,
      "loss": 0.0082,
      "step": 2397200
    },
    {
      "epoch": 3.9231031074278455,
      "grad_norm": 0.21508553624153137,
      "learning_rate": 2.167985609140568e-06,
      "loss": 0.015,
      "step": 2397220
    },
    {
      "epoch": 3.923135837866499,
      "grad_norm": 0.3033592402935028,
      "learning_rate": 2.167919716927051e-06,
      "loss": 0.0099,
      "step": 2397240
    },
    {
      "epoch": 3.9231685683051523,
      "grad_norm": 0.10221768170595169,
      "learning_rate": 2.167853824713534e-06,
      "loss": 0.0074,
      "step": 2397260
    },
    {
      "epoch": 3.923201298743806,
      "grad_norm": 0.321698933839798,
      "learning_rate": 2.1677879325000168e-06,
      "loss": 0.0142,
      "step": 2397280
    },
    {
      "epoch": 3.923234029182459,
      "grad_norm": 0.4208238422870636,
      "learning_rate": 2.1677220402864995e-06,
      "loss": 0.009,
      "step": 2397300
    },
    {
      "epoch": 3.9232667596211126,
      "grad_norm": 0.2755928039550781,
      "learning_rate": 2.1676561480729822e-06,
      "loss": 0.011,
      "step": 2397320
    },
    {
      "epoch": 3.9232994900597657,
      "grad_norm": 0.7231970429420471,
      "learning_rate": 2.1675902558594654e-06,
      "loss": 0.0094,
      "step": 2397340
    },
    {
      "epoch": 3.923332220498419,
      "grad_norm": 0.13648778200149536,
      "learning_rate": 2.167524363645948e-06,
      "loss": 0.0111,
      "step": 2397360
    },
    {
      "epoch": 3.9233649509370725,
      "grad_norm": 0.6724127531051636,
      "learning_rate": 2.167458471432431e-06,
      "loss": 0.01,
      "step": 2397380
    },
    {
      "epoch": 3.9233976813757256,
      "grad_norm": 0.04670748487114906,
      "learning_rate": 2.1673925792189136e-06,
      "loss": 0.0107,
      "step": 2397400
    },
    {
      "epoch": 3.9234304118143792,
      "grad_norm": 0.23285186290740967,
      "learning_rate": 2.1673266870053968e-06,
      "loss": 0.0128,
      "step": 2397420
    },
    {
      "epoch": 3.9234631422530324,
      "grad_norm": 0.320297509431839,
      "learning_rate": 2.1672607947918795e-06,
      "loss": 0.0096,
      "step": 2397440
    },
    {
      "epoch": 3.923495872691686,
      "grad_norm": 0.3870580792427063,
      "learning_rate": 2.1671949025783627e-06,
      "loss": 0.0093,
      "step": 2397460
    },
    {
      "epoch": 3.923528603130339,
      "grad_norm": 0.14032109081745148,
      "learning_rate": 2.1671290103648454e-06,
      "loss": 0.0087,
      "step": 2397480
    },
    {
      "epoch": 3.9235613335689923,
      "grad_norm": 0.4636514186859131,
      "learning_rate": 2.167063118151328e-06,
      "loss": 0.0106,
      "step": 2397500
    },
    {
      "epoch": 3.923594064007646,
      "grad_norm": 0.6940030455589294,
      "learning_rate": 2.166997225937811e-06,
      "loss": 0.0091,
      "step": 2397520
    },
    {
      "epoch": 3.923626794446299,
      "grad_norm": 0.1300833523273468,
      "learning_rate": 2.166931333724294e-06,
      "loss": 0.0096,
      "step": 2397540
    },
    {
      "epoch": 3.9236595248849526,
      "grad_norm": 0.2051175832748413,
      "learning_rate": 2.166865441510777e-06,
      "loss": 0.0085,
      "step": 2397560
    },
    {
      "epoch": 3.9236922553236058,
      "grad_norm": 0.26767152547836304,
      "learning_rate": 2.1667995492972595e-06,
      "loss": 0.01,
      "step": 2397580
    },
    {
      "epoch": 3.9237249857622594,
      "grad_norm": 0.30214521288871765,
      "learning_rate": 2.1667336570837427e-06,
      "loss": 0.0106,
      "step": 2397600
    },
    {
      "epoch": 3.9237577162009125,
      "grad_norm": 0.4540548026561737,
      "learning_rate": 2.1666677648702255e-06,
      "loss": 0.0118,
      "step": 2397620
    },
    {
      "epoch": 3.9237904466395657,
      "grad_norm": 0.2210397571325302,
      "learning_rate": 2.1666018726567086e-06,
      "loss": 0.0089,
      "step": 2397640
    },
    {
      "epoch": 3.9238231770782193,
      "grad_norm": 0.1633203625679016,
      "learning_rate": 2.1665359804431914e-06,
      "loss": 0.0143,
      "step": 2397660
    },
    {
      "epoch": 3.9238559075168724,
      "grad_norm": 0.3133428394794464,
      "learning_rate": 2.166470088229674e-06,
      "loss": 0.0073,
      "step": 2397680
    },
    {
      "epoch": 3.923888637955526,
      "grad_norm": 0.24149833619594574,
      "learning_rate": 2.166404196016157e-06,
      "loss": 0.0119,
      "step": 2397700
    },
    {
      "epoch": 3.923921368394179,
      "grad_norm": 0.4401445984840393,
      "learning_rate": 2.1663383038026396e-06,
      "loss": 0.0198,
      "step": 2397720
    },
    {
      "epoch": 3.9239540988328327,
      "grad_norm": 0.2242027074098587,
      "learning_rate": 2.1662724115891227e-06,
      "loss": 0.0089,
      "step": 2397740
    },
    {
      "epoch": 3.923986829271486,
      "grad_norm": 0.27332279086112976,
      "learning_rate": 2.1662065193756055e-06,
      "loss": 0.0074,
      "step": 2397760
    },
    {
      "epoch": 3.924019559710139,
      "grad_norm": 0.24988286197185516,
      "learning_rate": 2.1661406271620886e-06,
      "loss": 0.0141,
      "step": 2397780
    },
    {
      "epoch": 3.9240522901487926,
      "grad_norm": 0.307513564825058,
      "learning_rate": 2.1660747349485714e-06,
      "loss": 0.016,
      "step": 2397800
    },
    {
      "epoch": 3.924085020587446,
      "grad_norm": 0.2573213577270508,
      "learning_rate": 2.166008842735054e-06,
      "loss": 0.0126,
      "step": 2397820
    },
    {
      "epoch": 3.9241177510260994,
      "grad_norm": 0.1624748855829239,
      "learning_rate": 2.1659429505215373e-06,
      "loss": 0.0098,
      "step": 2397840
    },
    {
      "epoch": 3.9241504814647525,
      "grad_norm": 0.35371094942092896,
      "learning_rate": 2.16587705830802e-06,
      "loss": 0.0127,
      "step": 2397860
    },
    {
      "epoch": 3.924183211903406,
      "grad_norm": 0.14984361827373505,
      "learning_rate": 2.1658111660945028e-06,
      "loss": 0.0094,
      "step": 2397880
    },
    {
      "epoch": 3.9242159423420593,
      "grad_norm": 0.10970812290906906,
      "learning_rate": 2.1657452738809855e-06,
      "loss": 0.0063,
      "step": 2397900
    },
    {
      "epoch": 3.9242486727807124,
      "grad_norm": 0.08707837015390396,
      "learning_rate": 2.1656793816674682e-06,
      "loss": 0.0084,
      "step": 2397920
    },
    {
      "epoch": 3.924281403219366,
      "grad_norm": 0.25252988934516907,
      "learning_rate": 2.1656134894539514e-06,
      "loss": 0.0105,
      "step": 2397940
    },
    {
      "epoch": 3.924314133658019,
      "grad_norm": 0.2467482089996338,
      "learning_rate": 2.1655475972404346e-06,
      "loss": 0.0067,
      "step": 2397960
    },
    {
      "epoch": 3.9243468640966728,
      "grad_norm": 0.6086057424545288,
      "learning_rate": 2.1654817050269173e-06,
      "loss": 0.012,
      "step": 2397980
    },
    {
      "epoch": 3.924379594535326,
      "grad_norm": 0.17524023354053497,
      "learning_rate": 2.1654158128134e-06,
      "loss": 0.0068,
      "step": 2398000
    },
    {
      "epoch": 3.9244123249739795,
      "grad_norm": 0.3352638781070709,
      "learning_rate": 2.1653499205998828e-06,
      "loss": 0.0122,
      "step": 2398020
    },
    {
      "epoch": 3.9244450554126327,
      "grad_norm": 0.47225821018218994,
      "learning_rate": 2.165284028386366e-06,
      "loss": 0.0065,
      "step": 2398040
    },
    {
      "epoch": 3.924477785851286,
      "grad_norm": 0.02986009791493416,
      "learning_rate": 2.1652181361728487e-06,
      "loss": 0.0128,
      "step": 2398060
    },
    {
      "epoch": 3.9245105162899394,
      "grad_norm": 0.17110444605350494,
      "learning_rate": 2.1651522439593314e-06,
      "loss": 0.0085,
      "step": 2398080
    },
    {
      "epoch": 3.9245432467285926,
      "grad_norm": 0.13878664374351501,
      "learning_rate": 2.165086351745814e-06,
      "loss": 0.0054,
      "step": 2398100
    },
    {
      "epoch": 3.924575977167246,
      "grad_norm": 1.0818767547607422,
      "learning_rate": 2.165020459532297e-06,
      "loss": 0.0111,
      "step": 2398120
    },
    {
      "epoch": 3.9246087076058993,
      "grad_norm": 0.2577059268951416,
      "learning_rate": 2.16495456731878e-06,
      "loss": 0.0133,
      "step": 2398140
    },
    {
      "epoch": 3.924641438044553,
      "grad_norm": 0.18690447509288788,
      "learning_rate": 2.1648886751052632e-06,
      "loss": 0.0119,
      "step": 2398160
    },
    {
      "epoch": 3.924674168483206,
      "grad_norm": 0.22074128687381744,
      "learning_rate": 2.164822782891746e-06,
      "loss": 0.0137,
      "step": 2398180
    },
    {
      "epoch": 3.924706898921859,
      "grad_norm": 0.5880985856056213,
      "learning_rate": 2.1647568906782287e-06,
      "loss": 0.0129,
      "step": 2398200
    },
    {
      "epoch": 3.924739629360513,
      "grad_norm": 0.7253508567810059,
      "learning_rate": 2.1646909984647115e-06,
      "loss": 0.0142,
      "step": 2398220
    },
    {
      "epoch": 3.924772359799166,
      "grad_norm": 0.20935004949569702,
      "learning_rate": 2.1646251062511946e-06,
      "loss": 0.0158,
      "step": 2398240
    },
    {
      "epoch": 3.924805090237819,
      "grad_norm": 0.25721415877342224,
      "learning_rate": 2.1645592140376774e-06,
      "loss": 0.0163,
      "step": 2398260
    },
    {
      "epoch": 3.9248378206764727,
      "grad_norm": 0.6105403304100037,
      "learning_rate": 2.16449332182416e-06,
      "loss": 0.0129,
      "step": 2398280
    },
    {
      "epoch": 3.9248705511151263,
      "grad_norm": 0.8948208689689636,
      "learning_rate": 2.164427429610643e-06,
      "loss": 0.0113,
      "step": 2398300
    },
    {
      "epoch": 3.9249032815537794,
      "grad_norm": 0.3616907596588135,
      "learning_rate": 2.164361537397126e-06,
      "loss": 0.0131,
      "step": 2398320
    },
    {
      "epoch": 3.9249360119924326,
      "grad_norm": 0.2010430097579956,
      "learning_rate": 2.1642956451836087e-06,
      "loss": 0.0098,
      "step": 2398340
    },
    {
      "epoch": 3.924968742431086,
      "grad_norm": 0.23081658780574799,
      "learning_rate": 2.164229752970092e-06,
      "loss": 0.0118,
      "step": 2398360
    },
    {
      "epoch": 3.9250014728697393,
      "grad_norm": 0.04406888782978058,
      "learning_rate": 2.1641638607565746e-06,
      "loss": 0.0091,
      "step": 2398380
    },
    {
      "epoch": 3.9250342033083925,
      "grad_norm": 0.18268850445747375,
      "learning_rate": 2.1640979685430574e-06,
      "loss": 0.0142,
      "step": 2398400
    },
    {
      "epoch": 3.925066933747046,
      "grad_norm": 0.08484899252653122,
      "learning_rate": 2.16403207632954e-06,
      "loss": 0.0117,
      "step": 2398420
    },
    {
      "epoch": 3.9250996641856997,
      "grad_norm": 0.14368116855621338,
      "learning_rate": 2.1639661841160233e-06,
      "loss": 0.0077,
      "step": 2398440
    },
    {
      "epoch": 3.925132394624353,
      "grad_norm": 0.26526084542274475,
      "learning_rate": 2.163900291902506e-06,
      "loss": 0.0108,
      "step": 2398460
    },
    {
      "epoch": 3.925165125063006,
      "grad_norm": 0.33367106318473816,
      "learning_rate": 2.163834399688989e-06,
      "loss": 0.013,
      "step": 2398480
    },
    {
      "epoch": 3.9251978555016596,
      "grad_norm": 0.8205496072769165,
      "learning_rate": 2.163768507475472e-06,
      "loss": 0.0114,
      "step": 2398500
    },
    {
      "epoch": 3.9252305859403127,
      "grad_norm": 0.2245984673500061,
      "learning_rate": 2.1637026152619547e-06,
      "loss": 0.0074,
      "step": 2398520
    },
    {
      "epoch": 3.925263316378966,
      "grad_norm": 0.09827394783496857,
      "learning_rate": 2.1636367230484374e-06,
      "loss": 0.0112,
      "step": 2398540
    },
    {
      "epoch": 3.9252960468176195,
      "grad_norm": 0.4683910012245178,
      "learning_rate": 2.1635708308349206e-06,
      "loss": 0.0078,
      "step": 2398560
    },
    {
      "epoch": 3.925328777256273,
      "grad_norm": 0.3518311679363251,
      "learning_rate": 2.1635049386214033e-06,
      "loss": 0.0128,
      "step": 2398580
    },
    {
      "epoch": 3.925361507694926,
      "grad_norm": 0.4209801256656647,
      "learning_rate": 2.163439046407886e-06,
      "loss": 0.0126,
      "step": 2398600
    },
    {
      "epoch": 3.9253942381335793,
      "grad_norm": 0.08927717804908752,
      "learning_rate": 2.163373154194369e-06,
      "loss": 0.0113,
      "step": 2398620
    },
    {
      "epoch": 3.925426968572233,
      "grad_norm": 0.2983272969722748,
      "learning_rate": 2.163307261980852e-06,
      "loss": 0.0075,
      "step": 2398640
    },
    {
      "epoch": 3.925459699010886,
      "grad_norm": 0.17605388164520264,
      "learning_rate": 2.1632413697673347e-06,
      "loss": 0.0088,
      "step": 2398660
    },
    {
      "epoch": 3.9254924294495392,
      "grad_norm": 0.2985464334487915,
      "learning_rate": 2.163175477553818e-06,
      "loss": 0.014,
      "step": 2398680
    },
    {
      "epoch": 3.925525159888193,
      "grad_norm": 0.17653627693653107,
      "learning_rate": 2.1631095853403006e-06,
      "loss": 0.009,
      "step": 2398700
    },
    {
      "epoch": 3.9255578903268464,
      "grad_norm": 0.07359233498573303,
      "learning_rate": 2.1630436931267833e-06,
      "loss": 0.0086,
      "step": 2398720
    },
    {
      "epoch": 3.9255906207654996,
      "grad_norm": 0.21890564262866974,
      "learning_rate": 2.162977800913266e-06,
      "loss": 0.0099,
      "step": 2398740
    },
    {
      "epoch": 3.9256233512041527,
      "grad_norm": 0.4219922721385956,
      "learning_rate": 2.1629119086997492e-06,
      "loss": 0.0132,
      "step": 2398760
    },
    {
      "epoch": 3.9256560816428063,
      "grad_norm": 1.0974749326705933,
      "learning_rate": 2.162846016486232e-06,
      "loss": 0.0119,
      "step": 2398780
    },
    {
      "epoch": 3.9256888120814595,
      "grad_norm": 0.0761183574795723,
      "learning_rate": 2.1627801242727147e-06,
      "loss": 0.0124,
      "step": 2398800
    },
    {
      "epoch": 3.9257215425201126,
      "grad_norm": 0.23644326627254486,
      "learning_rate": 2.1627142320591975e-06,
      "loss": 0.0084,
      "step": 2398820
    },
    {
      "epoch": 3.9257542729587662,
      "grad_norm": 0.3986397087574005,
      "learning_rate": 2.1626483398456806e-06,
      "loss": 0.0112,
      "step": 2398840
    },
    {
      "epoch": 3.9257870033974194,
      "grad_norm": 0.3509485125541687,
      "learning_rate": 2.1625824476321634e-06,
      "loss": 0.0092,
      "step": 2398860
    },
    {
      "epoch": 3.925819733836073,
      "grad_norm": 0.11627243459224701,
      "learning_rate": 2.1625165554186465e-06,
      "loss": 0.014,
      "step": 2398880
    },
    {
      "epoch": 3.925852464274726,
      "grad_norm": 0.347176730632782,
      "learning_rate": 2.1624506632051293e-06,
      "loss": 0.0133,
      "step": 2398900
    },
    {
      "epoch": 3.9258851947133797,
      "grad_norm": 0.22428007423877716,
      "learning_rate": 2.162384770991612e-06,
      "loss": 0.0057,
      "step": 2398920
    },
    {
      "epoch": 3.925917925152033,
      "grad_norm": 0.11345276981592178,
      "learning_rate": 2.1623188787780947e-06,
      "loss": 0.0139,
      "step": 2398940
    },
    {
      "epoch": 3.925950655590686,
      "grad_norm": 0.25780346989631653,
      "learning_rate": 2.162252986564578e-06,
      "loss": 0.0091,
      "step": 2398960
    },
    {
      "epoch": 3.9259833860293396,
      "grad_norm": 0.2483537793159485,
      "learning_rate": 2.1621870943510606e-06,
      "loss": 0.0137,
      "step": 2398980
    },
    {
      "epoch": 3.9260161164679928,
      "grad_norm": 0.447461873292923,
      "learning_rate": 2.1621212021375434e-06,
      "loss": 0.0089,
      "step": 2399000
    },
    {
      "epoch": 3.9260488469066464,
      "grad_norm": 0.5879902243614197,
      "learning_rate": 2.1620553099240266e-06,
      "loss": 0.0163,
      "step": 2399020
    },
    {
      "epoch": 3.9260815773452995,
      "grad_norm": 0.2647005021572113,
      "learning_rate": 2.1619894177105093e-06,
      "loss": 0.0137,
      "step": 2399040
    },
    {
      "epoch": 3.926114307783953,
      "grad_norm": 0.4257477819919586,
      "learning_rate": 2.1619235254969925e-06,
      "loss": 0.0098,
      "step": 2399060
    },
    {
      "epoch": 3.9261470382226062,
      "grad_norm": 0.15101154148578644,
      "learning_rate": 2.161857633283475e-06,
      "loss": 0.0108,
      "step": 2399080
    },
    {
      "epoch": 3.9261797686612594,
      "grad_norm": 0.20250439643859863,
      "learning_rate": 2.161791741069958e-06,
      "loss": 0.0082,
      "step": 2399100
    },
    {
      "epoch": 3.926212499099913,
      "grad_norm": 0.2282758504152298,
      "learning_rate": 2.1617258488564407e-06,
      "loss": 0.0076,
      "step": 2399120
    },
    {
      "epoch": 3.926245229538566,
      "grad_norm": 0.6720253229141235,
      "learning_rate": 2.1616599566429234e-06,
      "loss": 0.0109,
      "step": 2399140
    },
    {
      "epoch": 3.9262779599772197,
      "grad_norm": 0.12434522062540054,
      "learning_rate": 2.1615940644294066e-06,
      "loss": 0.0114,
      "step": 2399160
    },
    {
      "epoch": 3.926310690415873,
      "grad_norm": 0.09812547266483307,
      "learning_rate": 2.1615281722158893e-06,
      "loss": 0.0119,
      "step": 2399180
    },
    {
      "epoch": 3.9263434208545265,
      "grad_norm": 0.156840518116951,
      "learning_rate": 2.1614622800023725e-06,
      "loss": 0.0098,
      "step": 2399200
    },
    {
      "epoch": 3.9263761512931796,
      "grad_norm": 0.2720579206943512,
      "learning_rate": 2.1613963877888552e-06,
      "loss": 0.0111,
      "step": 2399220
    },
    {
      "epoch": 3.926408881731833,
      "grad_norm": 0.3952707350254059,
      "learning_rate": 2.161330495575338e-06,
      "loss": 0.0097,
      "step": 2399240
    },
    {
      "epoch": 3.9264416121704864,
      "grad_norm": 0.09794987738132477,
      "learning_rate": 2.161264603361821e-06,
      "loss": 0.0112,
      "step": 2399260
    },
    {
      "epoch": 3.9264743426091395,
      "grad_norm": 0.356248140335083,
      "learning_rate": 2.161198711148304e-06,
      "loss": 0.013,
      "step": 2399280
    },
    {
      "epoch": 3.926507073047793,
      "grad_norm": 0.6411557197570801,
      "learning_rate": 2.1611328189347866e-06,
      "loss": 0.0142,
      "step": 2399300
    },
    {
      "epoch": 3.9265398034864463,
      "grad_norm": 0.14772029221057892,
      "learning_rate": 2.1610669267212693e-06,
      "loss": 0.0114,
      "step": 2399320
    },
    {
      "epoch": 3.9265725339251,
      "grad_norm": 0.2914454936981201,
      "learning_rate": 2.161001034507752e-06,
      "loss": 0.0126,
      "step": 2399340
    },
    {
      "epoch": 3.926605264363753,
      "grad_norm": 1.1963402032852173,
      "learning_rate": 2.1609351422942352e-06,
      "loss": 0.0132,
      "step": 2399360
    },
    {
      "epoch": 3.926637994802406,
      "grad_norm": 0.18575669825077057,
      "learning_rate": 2.1608692500807184e-06,
      "loss": 0.0093,
      "step": 2399380
    },
    {
      "epoch": 3.9266707252410598,
      "grad_norm": 0.43189966678619385,
      "learning_rate": 2.160803357867201e-06,
      "loss": 0.0125,
      "step": 2399400
    },
    {
      "epoch": 3.926703455679713,
      "grad_norm": 0.3722893297672272,
      "learning_rate": 2.160737465653684e-06,
      "loss": 0.0117,
      "step": 2399420
    },
    {
      "epoch": 3.9267361861183665,
      "grad_norm": 0.1266680508852005,
      "learning_rate": 2.1606715734401666e-06,
      "loss": 0.0122,
      "step": 2399440
    },
    {
      "epoch": 3.9267689165570197,
      "grad_norm": 0.3460504114627838,
      "learning_rate": 2.16060568122665e-06,
      "loss": 0.0107,
      "step": 2399460
    },
    {
      "epoch": 3.9268016469956732,
      "grad_norm": 0.3222249746322632,
      "learning_rate": 2.1605397890131325e-06,
      "loss": 0.0139,
      "step": 2399480
    },
    {
      "epoch": 3.9268343774343264,
      "grad_norm": 0.44381076097488403,
      "learning_rate": 2.1604738967996153e-06,
      "loss": 0.0071,
      "step": 2399500
    },
    {
      "epoch": 3.9268671078729795,
      "grad_norm": 0.15820004045963287,
      "learning_rate": 2.160408004586098e-06,
      "loss": 0.0108,
      "step": 2399520
    },
    {
      "epoch": 3.926899838311633,
      "grad_norm": 0.40846025943756104,
      "learning_rate": 2.160342112372581e-06,
      "loss": 0.0115,
      "step": 2399540
    },
    {
      "epoch": 3.9269325687502863,
      "grad_norm": 0.2376822531223297,
      "learning_rate": 2.160276220159064e-06,
      "loss": 0.0135,
      "step": 2399560
    },
    {
      "epoch": 3.92696529918894,
      "grad_norm": 0.07297787070274353,
      "learning_rate": 2.160210327945547e-06,
      "loss": 0.009,
      "step": 2399580
    },
    {
      "epoch": 3.926998029627593,
      "grad_norm": 0.10485101491212845,
      "learning_rate": 2.16014443573203e-06,
      "loss": 0.0078,
      "step": 2399600
    },
    {
      "epoch": 3.9270307600662466,
      "grad_norm": 0.6441547274589539,
      "learning_rate": 2.1600785435185126e-06,
      "loss": 0.0109,
      "step": 2399620
    },
    {
      "epoch": 3.9270634905049,
      "grad_norm": 0.16854237020015717,
      "learning_rate": 2.1600126513049953e-06,
      "loss": 0.0066,
      "step": 2399640
    },
    {
      "epoch": 3.927096220943553,
      "grad_norm": 0.17500725388526917,
      "learning_rate": 2.1599467590914785e-06,
      "loss": 0.0134,
      "step": 2399660
    },
    {
      "epoch": 3.9271289513822065,
      "grad_norm": 0.5252330303192139,
      "learning_rate": 2.159880866877961e-06,
      "loss": 0.0088,
      "step": 2399680
    },
    {
      "epoch": 3.9271616818208597,
      "grad_norm": 0.3046848475933075,
      "learning_rate": 2.159814974664444e-06,
      "loss": 0.012,
      "step": 2399700
    },
    {
      "epoch": 3.9271944122595133,
      "grad_norm": 0.23925355076789856,
      "learning_rate": 2.159749082450927e-06,
      "loss": 0.0102,
      "step": 2399720
    },
    {
      "epoch": 3.9272271426981664,
      "grad_norm": 0.2500116527080536,
      "learning_rate": 2.15968319023741e-06,
      "loss": 0.0104,
      "step": 2399740
    },
    {
      "epoch": 3.92725987313682,
      "grad_norm": 0.17823344469070435,
      "learning_rate": 2.1596172980238926e-06,
      "loss": 0.0105,
      "step": 2399760
    },
    {
      "epoch": 3.927292603575473,
      "grad_norm": 0.40036046504974365,
      "learning_rate": 2.1595514058103757e-06,
      "loss": 0.0106,
      "step": 2399780
    },
    {
      "epoch": 3.9273253340141263,
      "grad_norm": 0.09154600650072098,
      "learning_rate": 2.1594855135968585e-06,
      "loss": 0.0101,
      "step": 2399800
    },
    {
      "epoch": 3.92735806445278,
      "grad_norm": 0.06328534334897995,
      "learning_rate": 2.1594196213833412e-06,
      "loss": 0.012,
      "step": 2399820
    },
    {
      "epoch": 3.927390794891433,
      "grad_norm": 0.4829942584037781,
      "learning_rate": 2.159353729169824e-06,
      "loss": 0.0094,
      "step": 2399840
    },
    {
      "epoch": 3.927423525330086,
      "grad_norm": 0.29420799016952515,
      "learning_rate": 2.159287836956307e-06,
      "loss": 0.0075,
      "step": 2399860
    },
    {
      "epoch": 3.92745625576874,
      "grad_norm": 0.4484683573246002,
      "learning_rate": 2.15922194474279e-06,
      "loss": 0.0105,
      "step": 2399880
    },
    {
      "epoch": 3.9274889862073934,
      "grad_norm": 0.40438714623451233,
      "learning_rate": 2.159156052529273e-06,
      "loss": 0.0139,
      "step": 2399900
    },
    {
      "epoch": 3.9275217166460465,
      "grad_norm": 0.17787118256092072,
      "learning_rate": 2.1590901603157558e-06,
      "loss": 0.0132,
      "step": 2399920
    },
    {
      "epoch": 3.9275544470846997,
      "grad_norm": 0.35246706008911133,
      "learning_rate": 2.1590242681022385e-06,
      "loss": 0.0099,
      "step": 2399940
    },
    {
      "epoch": 3.9275871775233533,
      "grad_norm": 0.37479549646377563,
      "learning_rate": 2.1589583758887212e-06,
      "loss": 0.0069,
      "step": 2399960
    },
    {
      "epoch": 3.9276199079620064,
      "grad_norm": 0.08141536265611649,
      "learning_rate": 2.1588924836752044e-06,
      "loss": 0.01,
      "step": 2399980
    },
    {
      "epoch": 3.9276526384006596,
      "grad_norm": 0.23249228298664093,
      "learning_rate": 2.158826591461687e-06,
      "loss": 0.0089,
      "step": 2400000
    },
    {
      "epoch": 3.9276526384006596,
      "eval_loss": 0.006214005872607231,
      "eval_runtime": 6506.1485,
      "eval_samples_per_second": 157.983,
      "eval_steps_per_second": 15.798,
      "eval_sts-dev_pearson_cosine": 0.9859095555456304,
      "eval_sts-dev_spearman_cosine": 0.8962042250516552,
      "step": 2400000
    },
    {
      "epoch": 3.927685368839313,
      "grad_norm": 0.3673672378063202,
      "learning_rate": 2.15876069924817e-06,
      "loss": 0.0109,
      "step": 2400020
    },
    {
      "epoch": 3.927718099277967,
      "grad_norm": 1.2044050693511963,
      "learning_rate": 2.1586948070346526e-06,
      "loss": 0.0156,
      "step": 2400040
    },
    {
      "epoch": 3.92775082971662,
      "grad_norm": 0.1434427797794342,
      "learning_rate": 2.158628914821136e-06,
      "loss": 0.0121,
      "step": 2400060
    },
    {
      "epoch": 3.927783560155273,
      "grad_norm": 0.29739174246788025,
      "learning_rate": 2.1585630226076185e-06,
      "loss": 0.0102,
      "step": 2400080
    },
    {
      "epoch": 3.9278162905939267,
      "grad_norm": 0.2964848279953003,
      "learning_rate": 2.1584971303941017e-06,
      "loss": 0.0118,
      "step": 2400100
    },
    {
      "epoch": 3.92784902103258,
      "grad_norm": 0.23452135920524597,
      "learning_rate": 2.1584312381805844e-06,
      "loss": 0.0092,
      "step": 2400120
    },
    {
      "epoch": 3.927881751471233,
      "grad_norm": 0.34419769048690796,
      "learning_rate": 2.158365345967067e-06,
      "loss": 0.0143,
      "step": 2400140
    },
    {
      "epoch": 3.9279144819098866,
      "grad_norm": 0.3633345067501068,
      "learning_rate": 2.15829945375355e-06,
      "loss": 0.0103,
      "step": 2400160
    },
    {
      "epoch": 3.92794721234854,
      "grad_norm": 0.1894105225801468,
      "learning_rate": 2.158233561540033e-06,
      "loss": 0.0084,
      "step": 2400180
    },
    {
      "epoch": 3.9279799427871933,
      "grad_norm": 0.22313012182712555,
      "learning_rate": 2.158167669326516e-06,
      "loss": 0.008,
      "step": 2400200
    },
    {
      "epoch": 3.9280126732258465,
      "grad_norm": 0.2319570630788803,
      "learning_rate": 2.1581017771129986e-06,
      "loss": 0.0151,
      "step": 2400220
    },
    {
      "epoch": 3.9280454036645,
      "grad_norm": 0.6107162833213806,
      "learning_rate": 2.1580358848994817e-06,
      "loss": 0.0124,
      "step": 2400240
    },
    {
      "epoch": 3.928078134103153,
      "grad_norm": 0.20740115642547607,
      "learning_rate": 2.1579699926859645e-06,
      "loss": 0.0144,
      "step": 2400260
    },
    {
      "epoch": 3.9281108645418064,
      "grad_norm": 0.4176521599292755,
      "learning_rate": 2.1579041004724476e-06,
      "loss": 0.0086,
      "step": 2400280
    },
    {
      "epoch": 3.92814359498046,
      "grad_norm": 0.3655374050140381,
      "learning_rate": 2.1578382082589304e-06,
      "loss": 0.0142,
      "step": 2400300
    },
    {
      "epoch": 3.928176325419113,
      "grad_norm": 0.28584566712379456,
      "learning_rate": 2.157772316045413e-06,
      "loss": 0.0088,
      "step": 2400320
    },
    {
      "epoch": 3.9282090558577667,
      "grad_norm": 0.4768868386745453,
      "learning_rate": 2.157706423831896e-06,
      "loss": 0.0066,
      "step": 2400340
    },
    {
      "epoch": 3.92824178629642,
      "grad_norm": 0.3765034079551697,
      "learning_rate": 2.1576405316183786e-06,
      "loss": 0.0078,
      "step": 2400360
    },
    {
      "epoch": 3.9282745167350734,
      "grad_norm": 0.886341392993927,
      "learning_rate": 2.1575746394048617e-06,
      "loss": 0.0099,
      "step": 2400380
    },
    {
      "epoch": 3.9283072471737266,
      "grad_norm": 0.2008339911699295,
      "learning_rate": 2.1575087471913445e-06,
      "loss": 0.0095,
      "step": 2400400
    },
    {
      "epoch": 3.9283399776123797,
      "grad_norm": 0.34679457545280457,
      "learning_rate": 2.1574428549778276e-06,
      "loss": 0.006,
      "step": 2400420
    },
    {
      "epoch": 3.9283727080510333,
      "grad_norm": 0.5556120872497559,
      "learning_rate": 2.1573769627643104e-06,
      "loss": 0.0086,
      "step": 2400440
    },
    {
      "epoch": 3.9284054384896865,
      "grad_norm": 0.22931747138500214,
      "learning_rate": 2.157311070550793e-06,
      "loss": 0.0136,
      "step": 2400460
    },
    {
      "epoch": 3.92843816892834,
      "grad_norm": 0.6125495433807373,
      "learning_rate": 2.1572451783372763e-06,
      "loss": 0.0137,
      "step": 2400480
    },
    {
      "epoch": 3.9284708993669932,
      "grad_norm": 0.21327008306980133,
      "learning_rate": 2.157179286123759e-06,
      "loss": 0.0089,
      "step": 2400500
    },
    {
      "epoch": 3.928503629805647,
      "grad_norm": 0.15794189274311066,
      "learning_rate": 2.1571133939102418e-06,
      "loss": 0.0101,
      "step": 2400520
    },
    {
      "epoch": 3.9285363602443,
      "grad_norm": 0.2092386931180954,
      "learning_rate": 2.1570475016967245e-06,
      "loss": 0.0058,
      "step": 2400540
    },
    {
      "epoch": 3.928569090682953,
      "grad_norm": 0.09038018435239792,
      "learning_rate": 2.1569816094832073e-06,
      "loss": 0.0062,
      "step": 2400560
    },
    {
      "epoch": 3.9286018211216067,
      "grad_norm": 0.07097095996141434,
      "learning_rate": 2.1569157172696904e-06,
      "loss": 0.008,
      "step": 2400580
    },
    {
      "epoch": 3.92863455156026,
      "grad_norm": 0.2269126921892166,
      "learning_rate": 2.1568498250561736e-06,
      "loss": 0.0093,
      "step": 2400600
    },
    {
      "epoch": 3.9286672819989135,
      "grad_norm": 0.21731482446193695,
      "learning_rate": 2.1567839328426563e-06,
      "loss": 0.0108,
      "step": 2400620
    },
    {
      "epoch": 3.9287000124375666,
      "grad_norm": 0.22974778711795807,
      "learning_rate": 2.156718040629139e-06,
      "loss": 0.0117,
      "step": 2400640
    },
    {
      "epoch": 3.92873274287622,
      "grad_norm": 0.14657163619995117,
      "learning_rate": 2.156652148415622e-06,
      "loss": 0.0128,
      "step": 2400660
    },
    {
      "epoch": 3.9287654733148734,
      "grad_norm": 0.4939683675765991,
      "learning_rate": 2.156586256202105e-06,
      "loss": 0.0146,
      "step": 2400680
    },
    {
      "epoch": 3.9287982037535265,
      "grad_norm": 0.25007304549217224,
      "learning_rate": 2.1565203639885877e-06,
      "loss": 0.0092,
      "step": 2400700
    },
    {
      "epoch": 3.92883093419218,
      "grad_norm": 0.1568857729434967,
      "learning_rate": 2.1564544717750704e-06,
      "loss": 0.0129,
      "step": 2400720
    },
    {
      "epoch": 3.9288636646308333,
      "grad_norm": 0.12714813649654388,
      "learning_rate": 2.156388579561553e-06,
      "loss": 0.0123,
      "step": 2400740
    },
    {
      "epoch": 3.928896395069487,
      "grad_norm": 0.2455187290906906,
      "learning_rate": 2.156322687348036e-06,
      "loss": 0.0091,
      "step": 2400760
    },
    {
      "epoch": 3.92892912550814,
      "grad_norm": 0.10310053080320358,
      "learning_rate": 2.156256795134519e-06,
      "loss": 0.0065,
      "step": 2400780
    },
    {
      "epoch": 3.9289618559467936,
      "grad_norm": 0.3096328675746918,
      "learning_rate": 2.1561909029210022e-06,
      "loss": 0.0077,
      "step": 2400800
    },
    {
      "epoch": 3.9289945863854467,
      "grad_norm": 0.21030572056770325,
      "learning_rate": 2.156125010707485e-06,
      "loss": 0.0103,
      "step": 2400820
    },
    {
      "epoch": 3.9290273168241,
      "grad_norm": 0.4312645196914673,
      "learning_rate": 2.1560591184939677e-06,
      "loss": 0.0075,
      "step": 2400840
    },
    {
      "epoch": 3.9290600472627535,
      "grad_norm": 0.10667071491479874,
      "learning_rate": 2.1559932262804505e-06,
      "loss": 0.0096,
      "step": 2400860
    },
    {
      "epoch": 3.9290927777014066,
      "grad_norm": 0.40076565742492676,
      "learning_rate": 2.1559273340669336e-06,
      "loss": 0.0177,
      "step": 2400880
    },
    {
      "epoch": 3.9291255081400602,
      "grad_norm": 0.2567357122898102,
      "learning_rate": 2.1558614418534164e-06,
      "loss": 0.0101,
      "step": 2400900
    },
    {
      "epoch": 3.9291582385787134,
      "grad_norm": 0.5646861791610718,
      "learning_rate": 2.155795549639899e-06,
      "loss": 0.0115,
      "step": 2400920
    },
    {
      "epoch": 3.929190969017367,
      "grad_norm": 0.1386621743440628,
      "learning_rate": 2.1557296574263823e-06,
      "loss": 0.0119,
      "step": 2400940
    },
    {
      "epoch": 3.92922369945602,
      "grad_norm": 0.4637189507484436,
      "learning_rate": 2.155663765212865e-06,
      "loss": 0.0067,
      "step": 2400960
    },
    {
      "epoch": 3.9292564298946733,
      "grad_norm": 0.25164100527763367,
      "learning_rate": 2.1555978729993478e-06,
      "loss": 0.0087,
      "step": 2400980
    },
    {
      "epoch": 3.929289160333327,
      "grad_norm": 0.29438742995262146,
      "learning_rate": 2.155531980785831e-06,
      "loss": 0.0117,
      "step": 2401000
    },
    {
      "epoch": 3.92932189077198,
      "grad_norm": 0.3274385333061218,
      "learning_rate": 2.1554660885723137e-06,
      "loss": 0.0112,
      "step": 2401020
    },
    {
      "epoch": 3.9293546212106336,
      "grad_norm": 0.2683106064796448,
      "learning_rate": 2.1554001963587964e-06,
      "loss": 0.0104,
      "step": 2401040
    },
    {
      "epoch": 3.9293873516492868,
      "grad_norm": 0.2675940990447998,
      "learning_rate": 2.155334304145279e-06,
      "loss": 0.0088,
      "step": 2401060
    },
    {
      "epoch": 3.9294200820879404,
      "grad_norm": 0.08615891635417938,
      "learning_rate": 2.1552684119317623e-06,
      "loss": 0.0074,
      "step": 2401080
    },
    {
      "epoch": 3.9294528125265935,
      "grad_norm": 0.15749458968639374,
      "learning_rate": 2.155202519718245e-06,
      "loss": 0.0094,
      "step": 2401100
    },
    {
      "epoch": 3.9294855429652467,
      "grad_norm": 0.20026032626628876,
      "learning_rate": 2.155136627504728e-06,
      "loss": 0.0065,
      "step": 2401120
    },
    {
      "epoch": 3.9295182734039003,
      "grad_norm": 0.08970776200294495,
      "learning_rate": 2.155070735291211e-06,
      "loss": 0.0125,
      "step": 2401140
    },
    {
      "epoch": 3.9295510038425534,
      "grad_norm": 0.046688374131917953,
      "learning_rate": 2.1550048430776937e-06,
      "loss": 0.0087,
      "step": 2401160
    },
    {
      "epoch": 3.929583734281207,
      "grad_norm": 0.1184249073266983,
      "learning_rate": 2.1549389508641764e-06,
      "loss": 0.0141,
      "step": 2401180
    },
    {
      "epoch": 3.92961646471986,
      "grad_norm": 0.16764602065086365,
      "learning_rate": 2.1548730586506596e-06,
      "loss": 0.0126,
      "step": 2401200
    },
    {
      "epoch": 3.9296491951585137,
      "grad_norm": 0.15559934079647064,
      "learning_rate": 2.1548071664371423e-06,
      "loss": 0.0083,
      "step": 2401220
    },
    {
      "epoch": 3.929681925597167,
      "grad_norm": 0.5139546990394592,
      "learning_rate": 2.154741274223625e-06,
      "loss": 0.0106,
      "step": 2401240
    },
    {
      "epoch": 3.92971465603582,
      "grad_norm": 0.11061131954193115,
      "learning_rate": 2.154675382010108e-06,
      "loss": 0.0115,
      "step": 2401260
    },
    {
      "epoch": 3.9297473864744736,
      "grad_norm": 0.1678999662399292,
      "learning_rate": 2.154609489796591e-06,
      "loss": 0.0107,
      "step": 2401280
    },
    {
      "epoch": 3.929780116913127,
      "grad_norm": 0.3752574324607849,
      "learning_rate": 2.1545435975830737e-06,
      "loss": 0.0123,
      "step": 2401300
    },
    {
      "epoch": 3.92981284735178,
      "grad_norm": 0.4169705808162689,
      "learning_rate": 2.154477705369557e-06,
      "loss": 0.0143,
      "step": 2401320
    },
    {
      "epoch": 3.9298455777904335,
      "grad_norm": 0.25706392526626587,
      "learning_rate": 2.1544118131560396e-06,
      "loss": 0.0162,
      "step": 2401340
    },
    {
      "epoch": 3.929878308229087,
      "grad_norm": 0.6081706285476685,
      "learning_rate": 2.1543459209425223e-06,
      "loss": 0.0111,
      "step": 2401360
    },
    {
      "epoch": 3.9299110386677403,
      "grad_norm": 0.2637535631656647,
      "learning_rate": 2.154280028729005e-06,
      "loss": 0.0131,
      "step": 2401380
    },
    {
      "epoch": 3.9299437691063934,
      "grad_norm": 0.137327179312706,
      "learning_rate": 2.1542141365154883e-06,
      "loss": 0.0108,
      "step": 2401400
    },
    {
      "epoch": 3.929976499545047,
      "grad_norm": 0.3536089360713959,
      "learning_rate": 2.154148244301971e-06,
      "loss": 0.0086,
      "step": 2401420
    },
    {
      "epoch": 3.9300092299837,
      "grad_norm": 0.16030611097812653,
      "learning_rate": 2.1540823520884537e-06,
      "loss": 0.0098,
      "step": 2401440
    },
    {
      "epoch": 3.9300419604223533,
      "grad_norm": 0.2982540428638458,
      "learning_rate": 2.1540164598749365e-06,
      "loss": 0.0159,
      "step": 2401460
    },
    {
      "epoch": 3.930074690861007,
      "grad_norm": 0.4151265323162079,
      "learning_rate": 2.1539505676614196e-06,
      "loss": 0.01,
      "step": 2401480
    },
    {
      "epoch": 3.9301074212996605,
      "grad_norm": 0.0841171070933342,
      "learning_rate": 2.153884675447903e-06,
      "loss": 0.0069,
      "step": 2401500
    },
    {
      "epoch": 3.9301401517383137,
      "grad_norm": 0.1553155779838562,
      "learning_rate": 2.1538187832343855e-06,
      "loss": 0.0125,
      "step": 2401520
    },
    {
      "epoch": 3.930172882176967,
      "grad_norm": 0.46346578001976013,
      "learning_rate": 2.1537528910208683e-06,
      "loss": 0.0105,
      "step": 2401540
    },
    {
      "epoch": 3.9302056126156204,
      "grad_norm": 0.23866814374923706,
      "learning_rate": 2.153686998807351e-06,
      "loss": 0.0097,
      "step": 2401560
    },
    {
      "epoch": 3.9302383430542736,
      "grad_norm": 0.21614080667495728,
      "learning_rate": 2.1536211065938338e-06,
      "loss": 0.0058,
      "step": 2401580
    },
    {
      "epoch": 3.9302710734929267,
      "grad_norm": 0.0792616754770279,
      "learning_rate": 2.153555214380317e-06,
      "loss": 0.0088,
      "step": 2401600
    },
    {
      "epoch": 3.9303038039315803,
      "grad_norm": 0.2481459379196167,
      "learning_rate": 2.1534893221667997e-06,
      "loss": 0.0163,
      "step": 2401620
    },
    {
      "epoch": 3.930336534370234,
      "grad_norm": 0.4456874132156372,
      "learning_rate": 2.1534234299532824e-06,
      "loss": 0.0086,
      "step": 2401640
    },
    {
      "epoch": 3.930369264808887,
      "grad_norm": 0.35765933990478516,
      "learning_rate": 2.1533575377397656e-06,
      "loss": 0.0142,
      "step": 2401660
    },
    {
      "epoch": 3.93040199524754,
      "grad_norm": 0.2759532034397125,
      "learning_rate": 2.1532916455262483e-06,
      "loss": 0.007,
      "step": 2401680
    },
    {
      "epoch": 3.930434725686194,
      "grad_norm": 0.38037240505218506,
      "learning_rate": 2.1532257533127315e-06,
      "loss": 0.0111,
      "step": 2401700
    },
    {
      "epoch": 3.930467456124847,
      "grad_norm": 0.2808615565299988,
      "learning_rate": 2.153159861099214e-06,
      "loss": 0.0106,
      "step": 2401720
    },
    {
      "epoch": 3.9305001865635,
      "grad_norm": 0.25827255845069885,
      "learning_rate": 2.153093968885697e-06,
      "loss": 0.0108,
      "step": 2401740
    },
    {
      "epoch": 3.9305329170021537,
      "grad_norm": 0.29428645968437195,
      "learning_rate": 2.1530280766721797e-06,
      "loss": 0.0101,
      "step": 2401760
    },
    {
      "epoch": 3.9305656474408073,
      "grad_norm": 0.2872689366340637,
      "learning_rate": 2.1529621844586624e-06,
      "loss": 0.0158,
      "step": 2401780
    },
    {
      "epoch": 3.9305983778794604,
      "grad_norm": 0.751348614692688,
      "learning_rate": 2.1528962922451456e-06,
      "loss": 0.0133,
      "step": 2401800
    },
    {
      "epoch": 3.9306311083181136,
      "grad_norm": 0.11495386809110641,
      "learning_rate": 2.1528304000316287e-06,
      "loss": 0.0057,
      "step": 2401820
    },
    {
      "epoch": 3.930663838756767,
      "grad_norm": 0.44434577226638794,
      "learning_rate": 2.1527645078181115e-06,
      "loss": 0.0071,
      "step": 2401840
    },
    {
      "epoch": 3.9306965691954203,
      "grad_norm": 0.1552150547504425,
      "learning_rate": 2.1526986156045942e-06,
      "loss": 0.0117,
      "step": 2401860
    },
    {
      "epoch": 3.9307292996340735,
      "grad_norm": 0.5071342587471008,
      "learning_rate": 2.152632723391077e-06,
      "loss": 0.0142,
      "step": 2401880
    },
    {
      "epoch": 3.930762030072727,
      "grad_norm": 0.421249657869339,
      "learning_rate": 2.15256683117756e-06,
      "loss": 0.0086,
      "step": 2401900
    },
    {
      "epoch": 3.93079476051138,
      "grad_norm": 0.2547127306461334,
      "learning_rate": 2.152500938964043e-06,
      "loss": 0.0132,
      "step": 2401920
    },
    {
      "epoch": 3.930827490950034,
      "grad_norm": 0.13567005097866058,
      "learning_rate": 2.1524350467505256e-06,
      "loss": 0.0149,
      "step": 2401940
    },
    {
      "epoch": 3.930860221388687,
      "grad_norm": 0.12430021911859512,
      "learning_rate": 2.1523691545370084e-06,
      "loss": 0.0117,
      "step": 2401960
    },
    {
      "epoch": 3.9308929518273406,
      "grad_norm": 0.173424631357193,
      "learning_rate": 2.152303262323491e-06,
      "loss": 0.0121,
      "step": 2401980
    },
    {
      "epoch": 3.9309256822659937,
      "grad_norm": 0.46892300248146057,
      "learning_rate": 2.1522373701099743e-06,
      "loss": 0.0109,
      "step": 2402000
    },
    {
      "epoch": 3.930958412704647,
      "grad_norm": 0.33621764183044434,
      "learning_rate": 2.1521714778964574e-06,
      "loss": 0.0107,
      "step": 2402020
    },
    {
      "epoch": 3.9309911431433004,
      "grad_norm": 0.3371416926383972,
      "learning_rate": 2.15210558568294e-06,
      "loss": 0.0104,
      "step": 2402040
    },
    {
      "epoch": 3.9310238735819536,
      "grad_norm": 0.3145335614681244,
      "learning_rate": 2.152039693469423e-06,
      "loss": 0.0073,
      "step": 2402060
    },
    {
      "epoch": 3.931056604020607,
      "grad_norm": 0.1273287534713745,
      "learning_rate": 2.1519738012559056e-06,
      "loss": 0.012,
      "step": 2402080
    },
    {
      "epoch": 3.9310893344592603,
      "grad_norm": 0.29849523305892944,
      "learning_rate": 2.151907909042389e-06,
      "loss": 0.0128,
      "step": 2402100
    },
    {
      "epoch": 3.931122064897914,
      "grad_norm": 0.1152132973074913,
      "learning_rate": 2.1518420168288715e-06,
      "loss": 0.0095,
      "step": 2402120
    },
    {
      "epoch": 3.931154795336567,
      "grad_norm": 0.36489376425743103,
      "learning_rate": 2.1517761246153543e-06,
      "loss": 0.0143,
      "step": 2402140
    },
    {
      "epoch": 3.9311875257752202,
      "grad_norm": 0.25876253843307495,
      "learning_rate": 2.151710232401837e-06,
      "loss": 0.0131,
      "step": 2402160
    },
    {
      "epoch": 3.931220256213874,
      "grad_norm": 0.36951106786727905,
      "learning_rate": 2.15164434018832e-06,
      "loss": 0.0126,
      "step": 2402180
    },
    {
      "epoch": 3.931252986652527,
      "grad_norm": 0.28240230679512024,
      "learning_rate": 2.151578447974803e-06,
      "loss": 0.0093,
      "step": 2402200
    },
    {
      "epoch": 3.9312857170911806,
      "grad_norm": 0.4008548855781555,
      "learning_rate": 2.151512555761286e-06,
      "loss": 0.0098,
      "step": 2402220
    },
    {
      "epoch": 3.9313184475298337,
      "grad_norm": 0.3044871389865875,
      "learning_rate": 2.151446663547769e-06,
      "loss": 0.0107,
      "step": 2402240
    },
    {
      "epoch": 3.9313511779684873,
      "grad_norm": 0.20197729766368866,
      "learning_rate": 2.1513807713342516e-06,
      "loss": 0.0083,
      "step": 2402260
    },
    {
      "epoch": 3.9313839084071405,
      "grad_norm": 0.19898828864097595,
      "learning_rate": 2.1513148791207343e-06,
      "loss": 0.0112,
      "step": 2402280
    },
    {
      "epoch": 3.9314166388457936,
      "grad_norm": 0.29267531633377075,
      "learning_rate": 2.1512489869072175e-06,
      "loss": 0.014,
      "step": 2402300
    },
    {
      "epoch": 3.931449369284447,
      "grad_norm": 0.18686334788799286,
      "learning_rate": 2.1511830946937e-06,
      "loss": 0.0074,
      "step": 2402320
    },
    {
      "epoch": 3.9314820997231004,
      "grad_norm": 0.07015549391508102,
      "learning_rate": 2.151117202480183e-06,
      "loss": 0.0116,
      "step": 2402340
    },
    {
      "epoch": 3.931514830161754,
      "grad_norm": 0.27423804998397827,
      "learning_rate": 2.151051310266666e-06,
      "loss": 0.0165,
      "step": 2402360
    },
    {
      "epoch": 3.931547560600407,
      "grad_norm": 0.11555024981498718,
      "learning_rate": 2.150985418053149e-06,
      "loss": 0.0063,
      "step": 2402380
    },
    {
      "epoch": 3.9315802910390607,
      "grad_norm": 0.1291266828775406,
      "learning_rate": 2.1509195258396316e-06,
      "loss": 0.0099,
      "step": 2402400
    },
    {
      "epoch": 3.931613021477714,
      "grad_norm": 0.3698015809059143,
      "learning_rate": 2.1508536336261148e-06,
      "loss": 0.0096,
      "step": 2402420
    },
    {
      "epoch": 3.931645751916367,
      "grad_norm": 0.3050170838832855,
      "learning_rate": 2.1507877414125975e-06,
      "loss": 0.0137,
      "step": 2402440
    },
    {
      "epoch": 3.9316784823550206,
      "grad_norm": 0.23510272800922394,
      "learning_rate": 2.1507218491990802e-06,
      "loss": 0.012,
      "step": 2402460
    },
    {
      "epoch": 3.9317112127936737,
      "grad_norm": 0.11872677505016327,
      "learning_rate": 2.150655956985563e-06,
      "loss": 0.0084,
      "step": 2402480
    },
    {
      "epoch": 3.9317439432323273,
      "grad_norm": 0.10082594305276871,
      "learning_rate": 2.150590064772046e-06,
      "loss": 0.0097,
      "step": 2402500
    },
    {
      "epoch": 3.9317766736709805,
      "grad_norm": 0.1319122165441513,
      "learning_rate": 2.150524172558529e-06,
      "loss": 0.0142,
      "step": 2402520
    },
    {
      "epoch": 3.931809404109634,
      "grad_norm": 0.146314337849617,
      "learning_rate": 2.150458280345012e-06,
      "loss": 0.0056,
      "step": 2402540
    },
    {
      "epoch": 3.9318421345482872,
      "grad_norm": 0.15655550360679626,
      "learning_rate": 2.1503923881314948e-06,
      "loss": 0.01,
      "step": 2402560
    },
    {
      "epoch": 3.9318748649869404,
      "grad_norm": 0.4930245280265808,
      "learning_rate": 2.1503264959179775e-06,
      "loss": 0.014,
      "step": 2402580
    },
    {
      "epoch": 3.931907595425594,
      "grad_norm": 0.09893520176410675,
      "learning_rate": 2.1502606037044603e-06,
      "loss": 0.008,
      "step": 2402600
    },
    {
      "epoch": 3.931940325864247,
      "grad_norm": 0.04190140590071678,
      "learning_rate": 2.1501947114909434e-06,
      "loss": 0.0066,
      "step": 2402620
    },
    {
      "epoch": 3.9319730563029007,
      "grad_norm": 0.451021671295166,
      "learning_rate": 2.150128819277426e-06,
      "loss": 0.009,
      "step": 2402640
    },
    {
      "epoch": 3.932005786741554,
      "grad_norm": 0.1441219300031662,
      "learning_rate": 2.150062927063909e-06,
      "loss": 0.0072,
      "step": 2402660
    },
    {
      "epoch": 3.9320385171802075,
      "grad_norm": 0.07336726039648056,
      "learning_rate": 2.1499970348503916e-06,
      "loss": 0.0099,
      "step": 2402680
    },
    {
      "epoch": 3.9320712476188606,
      "grad_norm": 0.7321093082427979,
      "learning_rate": 2.149931142636875e-06,
      "loss": 0.013,
      "step": 2402700
    },
    {
      "epoch": 3.9321039780575138,
      "grad_norm": 0.2172691375017166,
      "learning_rate": 2.1498652504233575e-06,
      "loss": 0.0115,
      "step": 2402720
    },
    {
      "epoch": 3.9321367084961674,
      "grad_norm": 0.20112264156341553,
      "learning_rate": 2.1497993582098407e-06,
      "loss": 0.0087,
      "step": 2402740
    },
    {
      "epoch": 3.9321694389348205,
      "grad_norm": 0.3707323968410492,
      "learning_rate": 2.1497334659963234e-06,
      "loss": 0.0095,
      "step": 2402760
    },
    {
      "epoch": 3.9322021693734737,
      "grad_norm": 0.3948899805545807,
      "learning_rate": 2.149667573782806e-06,
      "loss": 0.0102,
      "step": 2402780
    },
    {
      "epoch": 3.9322348998121273,
      "grad_norm": 0.23543845117092133,
      "learning_rate": 2.149601681569289e-06,
      "loss": 0.0096,
      "step": 2402800
    },
    {
      "epoch": 3.932267630250781,
      "grad_norm": 0.6519582271575928,
      "learning_rate": 2.149535789355772e-06,
      "loss": 0.0145,
      "step": 2402820
    },
    {
      "epoch": 3.932300360689434,
      "grad_norm": 0.2577255368232727,
      "learning_rate": 2.149469897142255e-06,
      "loss": 0.0098,
      "step": 2402840
    },
    {
      "epoch": 3.932333091128087,
      "grad_norm": 0.8287241458892822,
      "learning_rate": 2.1494040049287376e-06,
      "loss": 0.0112,
      "step": 2402860
    },
    {
      "epoch": 3.9323658215667407,
      "grad_norm": 0.2610194981098175,
      "learning_rate": 2.1493381127152207e-06,
      "loss": 0.0133,
      "step": 2402880
    },
    {
      "epoch": 3.932398552005394,
      "grad_norm": 0.3663122057914734,
      "learning_rate": 2.1492722205017035e-06,
      "loss": 0.0148,
      "step": 2402900
    },
    {
      "epoch": 3.932431282444047,
      "grad_norm": 0.45987024903297424,
      "learning_rate": 2.1492063282881866e-06,
      "loss": 0.0078,
      "step": 2402920
    },
    {
      "epoch": 3.9324640128827006,
      "grad_norm": 0.21548448503017426,
      "learning_rate": 2.1491404360746694e-06,
      "loss": 0.0116,
      "step": 2402940
    },
    {
      "epoch": 3.9324967433213542,
      "grad_norm": 0.09601952880620956,
      "learning_rate": 2.149074543861152e-06,
      "loss": 0.0133,
      "step": 2402960
    },
    {
      "epoch": 3.9325294737600074,
      "grad_norm": 0.09266534447669983,
      "learning_rate": 2.149008651647635e-06,
      "loss": 0.0126,
      "step": 2402980
    },
    {
      "epoch": 3.9325622041986605,
      "grad_norm": 0.09673662483692169,
      "learning_rate": 2.1489427594341176e-06,
      "loss": 0.0098,
      "step": 2403000
    },
    {
      "epoch": 3.932594934637314,
      "grad_norm": 0.2853009104728699,
      "learning_rate": 2.1488768672206008e-06,
      "loss": 0.0097,
      "step": 2403020
    },
    {
      "epoch": 3.9326276650759673,
      "grad_norm": 0.3016960024833679,
      "learning_rate": 2.1488109750070835e-06,
      "loss": 0.0099,
      "step": 2403040
    },
    {
      "epoch": 3.9326603955146204,
      "grad_norm": 0.12532682716846466,
      "learning_rate": 2.1487450827935667e-06,
      "loss": 0.0115,
      "step": 2403060
    },
    {
      "epoch": 3.932693125953274,
      "grad_norm": 0.2789938747882843,
      "learning_rate": 2.1486791905800494e-06,
      "loss": 0.0094,
      "step": 2403080
    },
    {
      "epoch": 3.9327258563919276,
      "grad_norm": 0.2705085873603821,
      "learning_rate": 2.148613298366532e-06,
      "loss": 0.0126,
      "step": 2403100
    },
    {
      "epoch": 3.9327585868305808,
      "grad_norm": 0.15155120193958282,
      "learning_rate": 2.1485474061530153e-06,
      "loss": 0.0065,
      "step": 2403120
    },
    {
      "epoch": 3.932791317269234,
      "grad_norm": 0.15262170135974884,
      "learning_rate": 2.148481513939498e-06,
      "loss": 0.0117,
      "step": 2403140
    },
    {
      "epoch": 3.9328240477078875,
      "grad_norm": 0.11749134957790375,
      "learning_rate": 2.1484156217259808e-06,
      "loss": 0.0068,
      "step": 2403160
    },
    {
      "epoch": 3.9328567781465407,
      "grad_norm": 0.5073546171188354,
      "learning_rate": 2.1483497295124635e-06,
      "loss": 0.0148,
      "step": 2403180
    },
    {
      "epoch": 3.932889508585194,
      "grad_norm": 0.0786994993686676,
      "learning_rate": 2.1482838372989463e-06,
      "loss": 0.013,
      "step": 2403200
    },
    {
      "epoch": 3.9329222390238474,
      "grad_norm": 0.14297036826610565,
      "learning_rate": 2.1482179450854294e-06,
      "loss": 0.0115,
      "step": 2403220
    },
    {
      "epoch": 3.932954969462501,
      "grad_norm": 0.13809619843959808,
      "learning_rate": 2.1481520528719126e-06,
      "loss": 0.0141,
      "step": 2403240
    },
    {
      "epoch": 3.932987699901154,
      "grad_norm": 0.3017900586128235,
      "learning_rate": 2.1480861606583953e-06,
      "loss": 0.0101,
      "step": 2403260
    },
    {
      "epoch": 3.9330204303398073,
      "grad_norm": 0.1964576095342636,
      "learning_rate": 2.148020268444878e-06,
      "loss": 0.0076,
      "step": 2403280
    },
    {
      "epoch": 3.933053160778461,
      "grad_norm": 0.48885607719421387,
      "learning_rate": 2.147954376231361e-06,
      "loss": 0.011,
      "step": 2403300
    },
    {
      "epoch": 3.933085891217114,
      "grad_norm": 0.27347031235694885,
      "learning_rate": 2.147888484017844e-06,
      "loss": 0.0063,
      "step": 2403320
    },
    {
      "epoch": 3.933118621655767,
      "grad_norm": 0.4338309168815613,
      "learning_rate": 2.1478225918043267e-06,
      "loss": 0.0151,
      "step": 2403340
    },
    {
      "epoch": 3.933151352094421,
      "grad_norm": 0.8346026539802551,
      "learning_rate": 2.1477566995908095e-06,
      "loss": 0.0097,
      "step": 2403360
    },
    {
      "epoch": 3.933184082533074,
      "grad_norm": 0.0481320321559906,
      "learning_rate": 2.147690807377292e-06,
      "loss": 0.012,
      "step": 2403380
    },
    {
      "epoch": 3.9332168129717275,
      "grad_norm": 0.1633448451757431,
      "learning_rate": 2.147624915163775e-06,
      "loss": 0.0103,
      "step": 2403400
    },
    {
      "epoch": 3.9332495434103807,
      "grad_norm": 0.2642061412334442,
      "learning_rate": 2.147559022950258e-06,
      "loss": 0.0122,
      "step": 2403420
    },
    {
      "epoch": 3.9332822738490343,
      "grad_norm": 0.1492520123720169,
      "learning_rate": 2.1474931307367413e-06,
      "loss": 0.0142,
      "step": 2403440
    },
    {
      "epoch": 3.9333150042876874,
      "grad_norm": 0.3929269015789032,
      "learning_rate": 2.147427238523224e-06,
      "loss": 0.0087,
      "step": 2403460
    },
    {
      "epoch": 3.9333477347263406,
      "grad_norm": 0.17755550146102905,
      "learning_rate": 2.1473613463097067e-06,
      "loss": 0.0112,
      "step": 2403480
    },
    {
      "epoch": 3.933380465164994,
      "grad_norm": 0.2702886760234833,
      "learning_rate": 2.1472954540961895e-06,
      "loss": 0.0099,
      "step": 2403500
    },
    {
      "epoch": 3.9334131956036473,
      "grad_norm": 0.29183855652809143,
      "learning_rate": 2.1472295618826726e-06,
      "loss": 0.0099,
      "step": 2403520
    },
    {
      "epoch": 3.933445926042301,
      "grad_norm": 0.4050540328025818,
      "learning_rate": 2.1471636696691554e-06,
      "loss": 0.0122,
      "step": 2403540
    },
    {
      "epoch": 3.933478656480954,
      "grad_norm": 0.29274827241897583,
      "learning_rate": 2.147097777455638e-06,
      "loss": 0.0115,
      "step": 2403560
    },
    {
      "epoch": 3.9335113869196077,
      "grad_norm": 0.4974001944065094,
      "learning_rate": 2.1470318852421213e-06,
      "loss": 0.0123,
      "step": 2403580
    },
    {
      "epoch": 3.933544117358261,
      "grad_norm": 0.1884549856185913,
      "learning_rate": 2.146965993028604e-06,
      "loss": 0.0075,
      "step": 2403600
    },
    {
      "epoch": 3.933576847796914,
      "grad_norm": 0.1625892072916031,
      "learning_rate": 2.1469001008150868e-06,
      "loss": 0.0097,
      "step": 2403620
    },
    {
      "epoch": 3.9336095782355676,
      "grad_norm": 0.2170143723487854,
      "learning_rate": 2.14683420860157e-06,
      "loss": 0.006,
      "step": 2403640
    },
    {
      "epoch": 3.9336423086742207,
      "grad_norm": 0.3234648108482361,
      "learning_rate": 2.1467683163880527e-06,
      "loss": 0.0137,
      "step": 2403660
    },
    {
      "epoch": 3.9336750391128743,
      "grad_norm": 0.3113346993923187,
      "learning_rate": 2.1467024241745354e-06,
      "loss": 0.0137,
      "step": 2403680
    },
    {
      "epoch": 3.9337077695515275,
      "grad_norm": 0.6561026573181152,
      "learning_rate": 2.146636531961018e-06,
      "loss": 0.0139,
      "step": 2403700
    },
    {
      "epoch": 3.933740499990181,
      "grad_norm": 0.2667958736419678,
      "learning_rate": 2.1465706397475013e-06,
      "loss": 0.0145,
      "step": 2403720
    },
    {
      "epoch": 3.933773230428834,
      "grad_norm": 0.18964385986328125,
      "learning_rate": 2.146504747533984e-06,
      "loss": 0.0129,
      "step": 2403740
    },
    {
      "epoch": 3.9338059608674874,
      "grad_norm": 0.5255964398384094,
      "learning_rate": 2.146438855320467e-06,
      "loss": 0.0096,
      "step": 2403760
    },
    {
      "epoch": 3.933838691306141,
      "grad_norm": 0.16697466373443604,
      "learning_rate": 2.14637296310695e-06,
      "loss": 0.0095,
      "step": 2403780
    },
    {
      "epoch": 3.933871421744794,
      "grad_norm": 0.12402841448783875,
      "learning_rate": 2.1463070708934327e-06,
      "loss": 0.0129,
      "step": 2403800
    },
    {
      "epoch": 3.9339041521834477,
      "grad_norm": 0.2448195517063141,
      "learning_rate": 2.1462411786799154e-06,
      "loss": 0.009,
      "step": 2403820
    },
    {
      "epoch": 3.933936882622101,
      "grad_norm": 0.3781227171421051,
      "learning_rate": 2.1461752864663986e-06,
      "loss": 0.0125,
      "step": 2403840
    },
    {
      "epoch": 3.9339696130607544,
      "grad_norm": 0.25077834725379944,
      "learning_rate": 2.1461093942528813e-06,
      "loss": 0.0098,
      "step": 2403860
    },
    {
      "epoch": 3.9340023434994076,
      "grad_norm": 0.2844306230545044,
      "learning_rate": 2.146043502039364e-06,
      "loss": 0.0131,
      "step": 2403880
    },
    {
      "epoch": 3.9340350739380607,
      "grad_norm": 0.11591582745313644,
      "learning_rate": 2.145977609825847e-06,
      "loss": 0.0093,
      "step": 2403900
    },
    {
      "epoch": 3.9340678043767143,
      "grad_norm": 0.02291632816195488,
      "learning_rate": 2.14591171761233e-06,
      "loss": 0.0094,
      "step": 2403920
    },
    {
      "epoch": 3.9341005348153675,
      "grad_norm": 0.1476343870162964,
      "learning_rate": 2.1458458253988127e-06,
      "loss": 0.0137,
      "step": 2403940
    },
    {
      "epoch": 3.934133265254021,
      "grad_norm": 0.21986904740333557,
      "learning_rate": 2.145779933185296e-06,
      "loss": 0.0077,
      "step": 2403960
    },
    {
      "epoch": 3.9341659956926742,
      "grad_norm": 0.38636431097984314,
      "learning_rate": 2.1457140409717786e-06,
      "loss": 0.0103,
      "step": 2403980
    },
    {
      "epoch": 3.934198726131328,
      "grad_norm": 0.34409087896347046,
      "learning_rate": 2.1456481487582614e-06,
      "loss": 0.0112,
      "step": 2404000
    },
    {
      "epoch": 3.934231456569981,
      "grad_norm": 0.18810142576694489,
      "learning_rate": 2.145582256544744e-06,
      "loss": 0.0104,
      "step": 2404020
    },
    {
      "epoch": 3.934264187008634,
      "grad_norm": 0.1871877759695053,
      "learning_rate": 2.1455163643312273e-06,
      "loss": 0.0095,
      "step": 2404040
    },
    {
      "epoch": 3.9342969174472877,
      "grad_norm": 0.26484978199005127,
      "learning_rate": 2.14545047211771e-06,
      "loss": 0.012,
      "step": 2404060
    },
    {
      "epoch": 3.934329647885941,
      "grad_norm": 0.24385260045528412,
      "learning_rate": 2.1453845799041927e-06,
      "loss": 0.0094,
      "step": 2404080
    },
    {
      "epoch": 3.9343623783245945,
      "grad_norm": 0.13446077704429626,
      "learning_rate": 2.1453186876906755e-06,
      "loss": 0.008,
      "step": 2404100
    },
    {
      "epoch": 3.9343951087632476,
      "grad_norm": 0.29541054368019104,
      "learning_rate": 2.1452527954771586e-06,
      "loss": 0.0107,
      "step": 2404120
    },
    {
      "epoch": 3.934427839201901,
      "grad_norm": 0.5054616928100586,
      "learning_rate": 2.145186903263642e-06,
      "loss": 0.0059,
      "step": 2404140
    },
    {
      "epoch": 3.9344605696405544,
      "grad_norm": 0.10675877332687378,
      "learning_rate": 2.1451210110501245e-06,
      "loss": 0.0097,
      "step": 2404160
    },
    {
      "epoch": 3.9344933000792075,
      "grad_norm": 0.2156314104795456,
      "learning_rate": 2.1450551188366073e-06,
      "loss": 0.011,
      "step": 2404180
    },
    {
      "epoch": 3.934526030517861,
      "grad_norm": 0.5926553606987,
      "learning_rate": 2.14498922662309e-06,
      "loss": 0.013,
      "step": 2404200
    },
    {
      "epoch": 3.9345587609565142,
      "grad_norm": 0.1674615442752838,
      "learning_rate": 2.1449233344095728e-06,
      "loss": 0.0071,
      "step": 2404220
    },
    {
      "epoch": 3.934591491395168,
      "grad_norm": 0.16699771583080292,
      "learning_rate": 2.144857442196056e-06,
      "loss": 0.0115,
      "step": 2404240
    },
    {
      "epoch": 3.934624221833821,
      "grad_norm": 0.5301253199577332,
      "learning_rate": 2.1447915499825387e-06,
      "loss": 0.0081,
      "step": 2404260
    },
    {
      "epoch": 3.9346569522724746,
      "grad_norm": 0.8395534157752991,
      "learning_rate": 2.1447256577690214e-06,
      "loss": 0.0141,
      "step": 2404280
    },
    {
      "epoch": 3.9346896827111277,
      "grad_norm": 0.1554621458053589,
      "learning_rate": 2.1446597655555046e-06,
      "loss": 0.0063,
      "step": 2404300
    },
    {
      "epoch": 3.934722413149781,
      "grad_norm": 0.2554304599761963,
      "learning_rate": 2.1445938733419873e-06,
      "loss": 0.0116,
      "step": 2404320
    },
    {
      "epoch": 3.9347551435884345,
      "grad_norm": 0.13484010100364685,
      "learning_rate": 2.1445279811284705e-06,
      "loss": 0.0072,
      "step": 2404340
    },
    {
      "epoch": 3.9347878740270876,
      "grad_norm": 0.1932581216096878,
      "learning_rate": 2.1444620889149532e-06,
      "loss": 0.0133,
      "step": 2404360
    },
    {
      "epoch": 3.934820604465741,
      "grad_norm": 0.20643380284309387,
      "learning_rate": 2.144396196701436e-06,
      "loss": 0.0092,
      "step": 2404380
    },
    {
      "epoch": 3.9348533349043944,
      "grad_norm": 0.4106637239456177,
      "learning_rate": 2.1443303044879187e-06,
      "loss": 0.0117,
      "step": 2404400
    },
    {
      "epoch": 3.934886065343048,
      "grad_norm": 0.06560082733631134,
      "learning_rate": 2.1442644122744014e-06,
      "loss": 0.0102,
      "step": 2404420
    },
    {
      "epoch": 3.934918795781701,
      "grad_norm": 0.4003850519657135,
      "learning_rate": 2.1441985200608846e-06,
      "loss": 0.0082,
      "step": 2404440
    },
    {
      "epoch": 3.9349515262203543,
      "grad_norm": 0.18770831823349,
      "learning_rate": 2.1441326278473678e-06,
      "loss": 0.0086,
      "step": 2404460
    },
    {
      "epoch": 3.934984256659008,
      "grad_norm": 0.07413779199123383,
      "learning_rate": 2.1440667356338505e-06,
      "loss": 0.0119,
      "step": 2404480
    },
    {
      "epoch": 3.935016987097661,
      "grad_norm": 0.4111812114715576,
      "learning_rate": 2.1440008434203332e-06,
      "loss": 0.0143,
      "step": 2404500
    },
    {
      "epoch": 3.935049717536314,
      "grad_norm": 0.18860620260238647,
      "learning_rate": 2.143934951206816e-06,
      "loss": 0.0101,
      "step": 2404520
    },
    {
      "epoch": 3.9350824479749678,
      "grad_norm": 0.08763615787029266,
      "learning_rate": 2.143869058993299e-06,
      "loss": 0.0134,
      "step": 2404540
    },
    {
      "epoch": 3.9351151784136214,
      "grad_norm": 0.137932687997818,
      "learning_rate": 2.143803166779782e-06,
      "loss": 0.0105,
      "step": 2404560
    },
    {
      "epoch": 3.9351479088522745,
      "grad_norm": 0.272938072681427,
      "learning_rate": 2.1437372745662646e-06,
      "loss": 0.0072,
      "step": 2404580
    },
    {
      "epoch": 3.9351806392909277,
      "grad_norm": 0.32527920603752136,
      "learning_rate": 2.1436713823527474e-06,
      "loss": 0.007,
      "step": 2404600
    },
    {
      "epoch": 3.9352133697295812,
      "grad_norm": 0.11273913085460663,
      "learning_rate": 2.14360549013923e-06,
      "loss": 0.0065,
      "step": 2404620
    },
    {
      "epoch": 3.9352461001682344,
      "grad_norm": 0.19512638449668884,
      "learning_rate": 2.1435395979257133e-06,
      "loss": 0.011,
      "step": 2404640
    },
    {
      "epoch": 3.9352788306068875,
      "grad_norm": 0.0796879380941391,
      "learning_rate": 2.1434737057121964e-06,
      "loss": 0.0131,
      "step": 2404660
    },
    {
      "epoch": 3.935311561045541,
      "grad_norm": 0.09051303565502167,
      "learning_rate": 2.143407813498679e-06,
      "loss": 0.0075,
      "step": 2404680
    },
    {
      "epoch": 3.9353442914841947,
      "grad_norm": 0.2443644255399704,
      "learning_rate": 2.143341921285162e-06,
      "loss": 0.0132,
      "step": 2404700
    },
    {
      "epoch": 3.935377021922848,
      "grad_norm": 0.42105868458747864,
      "learning_rate": 2.1432760290716446e-06,
      "loss": 0.0119,
      "step": 2404720
    },
    {
      "epoch": 3.935409752361501,
      "grad_norm": 0.4455738663673401,
      "learning_rate": 2.143210136858128e-06,
      "loss": 0.009,
      "step": 2404740
    },
    {
      "epoch": 3.9354424828001546,
      "grad_norm": 0.08308175951242447,
      "learning_rate": 2.1431442446446106e-06,
      "loss": 0.0087,
      "step": 2404760
    },
    {
      "epoch": 3.935475213238808,
      "grad_norm": 0.09779021888971329,
      "learning_rate": 2.1430783524310933e-06,
      "loss": 0.0073,
      "step": 2404780
    },
    {
      "epoch": 3.935507943677461,
      "grad_norm": 0.17534089088439941,
      "learning_rate": 2.143012460217576e-06,
      "loss": 0.0066,
      "step": 2404800
    },
    {
      "epoch": 3.9355406741161145,
      "grad_norm": 0.3665803372859955,
      "learning_rate": 2.142946568004059e-06,
      "loss": 0.0127,
      "step": 2404820
    },
    {
      "epoch": 3.935573404554768,
      "grad_norm": 0.29251620173454285,
      "learning_rate": 2.142880675790542e-06,
      "loss": 0.0139,
      "step": 2404840
    },
    {
      "epoch": 3.9356061349934213,
      "grad_norm": 0.26209771633148193,
      "learning_rate": 2.142814783577025e-06,
      "loss": 0.0111,
      "step": 2404860
    },
    {
      "epoch": 3.9356388654320744,
      "grad_norm": 0.18214869499206543,
      "learning_rate": 2.142748891363508e-06,
      "loss": 0.007,
      "step": 2404880
    },
    {
      "epoch": 3.935671595870728,
      "grad_norm": 0.3512759506702423,
      "learning_rate": 2.1426829991499906e-06,
      "loss": 0.0101,
      "step": 2404900
    },
    {
      "epoch": 3.935704326309381,
      "grad_norm": 0.243804469704628,
      "learning_rate": 2.1426171069364733e-06,
      "loss": 0.0085,
      "step": 2404920
    },
    {
      "epoch": 3.9357370567480343,
      "grad_norm": 0.21392549574375153,
      "learning_rate": 2.1425512147229565e-06,
      "loss": 0.0101,
      "step": 2404940
    },
    {
      "epoch": 3.935769787186688,
      "grad_norm": 0.2608880400657654,
      "learning_rate": 2.1424853225094392e-06,
      "loss": 0.0095,
      "step": 2404960
    },
    {
      "epoch": 3.935802517625341,
      "grad_norm": 0.07340666651725769,
      "learning_rate": 2.142419430295922e-06,
      "loss": 0.0096,
      "step": 2404980
    },
    {
      "epoch": 3.9358352480639947,
      "grad_norm": 0.6785368323326111,
      "learning_rate": 2.142353538082405e-06,
      "loss": 0.0144,
      "step": 2405000
    },
    {
      "epoch": 3.935867978502648,
      "grad_norm": 0.12645602226257324,
      "learning_rate": 2.142287645868888e-06,
      "loss": 0.0129,
      "step": 2405020
    },
    {
      "epoch": 3.9359007089413014,
      "grad_norm": 0.3038420081138611,
      "learning_rate": 2.1422217536553706e-06,
      "loss": 0.0073,
      "step": 2405040
    },
    {
      "epoch": 3.9359334393799545,
      "grad_norm": 0.1348196417093277,
      "learning_rate": 2.1421558614418538e-06,
      "loss": 0.0101,
      "step": 2405060
    },
    {
      "epoch": 3.9359661698186077,
      "grad_norm": 0.2227315455675125,
      "learning_rate": 2.1420899692283365e-06,
      "loss": 0.0104,
      "step": 2405080
    },
    {
      "epoch": 3.9359989002572613,
      "grad_norm": 0.3829863369464874,
      "learning_rate": 2.1420240770148192e-06,
      "loss": 0.0112,
      "step": 2405100
    },
    {
      "epoch": 3.9360316306959144,
      "grad_norm": 0.3203001618385315,
      "learning_rate": 2.141958184801302e-06,
      "loss": 0.0101,
      "step": 2405120
    },
    {
      "epoch": 3.936064361134568,
      "grad_norm": 0.2783602774143219,
      "learning_rate": 2.141892292587785e-06,
      "loss": 0.0085,
      "step": 2405140
    },
    {
      "epoch": 3.936097091573221,
      "grad_norm": 0.31448352336883545,
      "learning_rate": 2.141826400374268e-06,
      "loss": 0.0157,
      "step": 2405160
    },
    {
      "epoch": 3.936129822011875,
      "grad_norm": 0.3422245383262634,
      "learning_rate": 2.141760508160751e-06,
      "loss": 0.0075,
      "step": 2405180
    },
    {
      "epoch": 3.936162552450528,
      "grad_norm": 0.18790489435195923,
      "learning_rate": 2.141694615947234e-06,
      "loss": 0.0135,
      "step": 2405200
    },
    {
      "epoch": 3.936195282889181,
      "grad_norm": 0.13544423878192902,
      "learning_rate": 2.1416287237337165e-06,
      "loss": 0.0134,
      "step": 2405220
    },
    {
      "epoch": 3.9362280133278347,
      "grad_norm": 0.18631894886493683,
      "learning_rate": 2.1415628315201993e-06,
      "loss": 0.0107,
      "step": 2405240
    },
    {
      "epoch": 3.936260743766488,
      "grad_norm": 0.39560115337371826,
      "learning_rate": 2.1414969393066824e-06,
      "loss": 0.0131,
      "step": 2405260
    },
    {
      "epoch": 3.9362934742051414,
      "grad_norm": 0.4604092836380005,
      "learning_rate": 2.141431047093165e-06,
      "loss": 0.0092,
      "step": 2405280
    },
    {
      "epoch": 3.9363262046437946,
      "grad_norm": 0.22687853872776031,
      "learning_rate": 2.141365154879648e-06,
      "loss": 0.0111,
      "step": 2405300
    },
    {
      "epoch": 3.936358935082448,
      "grad_norm": 0.39652353525161743,
      "learning_rate": 2.1412992626661307e-06,
      "loss": 0.0112,
      "step": 2405320
    },
    {
      "epoch": 3.9363916655211013,
      "grad_norm": 0.2132977843284607,
      "learning_rate": 2.141233370452614e-06,
      "loss": 0.0103,
      "step": 2405340
    },
    {
      "epoch": 3.9364243959597545,
      "grad_norm": 0.19021867215633392,
      "learning_rate": 2.1411674782390966e-06,
      "loss": 0.0076,
      "step": 2405360
    },
    {
      "epoch": 3.936457126398408,
      "grad_norm": 0.16080859303474426,
      "learning_rate": 2.1411015860255797e-06,
      "loss": 0.011,
      "step": 2405380
    },
    {
      "epoch": 3.936489856837061,
      "grad_norm": 0.06365984678268433,
      "learning_rate": 2.1410356938120625e-06,
      "loss": 0.0079,
      "step": 2405400
    },
    {
      "epoch": 3.936522587275715,
      "grad_norm": 0.1963227242231369,
      "learning_rate": 2.140969801598545e-06,
      "loss": 0.0105,
      "step": 2405420
    },
    {
      "epoch": 3.936555317714368,
      "grad_norm": 0.0838521420955658,
      "learning_rate": 2.140903909385028e-06,
      "loss": 0.0188,
      "step": 2405440
    },
    {
      "epoch": 3.9365880481530215,
      "grad_norm": 0.30932584404945374,
      "learning_rate": 2.140838017171511e-06,
      "loss": 0.0069,
      "step": 2405460
    },
    {
      "epoch": 3.9366207785916747,
      "grad_norm": 0.4220360517501831,
      "learning_rate": 2.140772124957994e-06,
      "loss": 0.0086,
      "step": 2405480
    },
    {
      "epoch": 3.936653509030328,
      "grad_norm": 0.21351297199726105,
      "learning_rate": 2.1407062327444766e-06,
      "loss": 0.0112,
      "step": 2405500
    },
    {
      "epoch": 3.9366862394689814,
      "grad_norm": 0.9266870021820068,
      "learning_rate": 2.1406403405309597e-06,
      "loss": 0.0097,
      "step": 2405520
    },
    {
      "epoch": 3.9367189699076346,
      "grad_norm": 0.259406715631485,
      "learning_rate": 2.1405744483174425e-06,
      "loss": 0.0096,
      "step": 2405540
    },
    {
      "epoch": 3.936751700346288,
      "grad_norm": 0.15451186895370483,
      "learning_rate": 2.1405085561039256e-06,
      "loss": 0.0093,
      "step": 2405560
    },
    {
      "epoch": 3.9367844307849413,
      "grad_norm": 0.24798426032066345,
      "learning_rate": 2.1404426638904084e-06,
      "loss": 0.0121,
      "step": 2405580
    },
    {
      "epoch": 3.936817161223595,
      "grad_norm": 0.16180585324764252,
      "learning_rate": 2.140376771676891e-06,
      "loss": 0.009,
      "step": 2405600
    },
    {
      "epoch": 3.936849891662248,
      "grad_norm": 0.11926087737083435,
      "learning_rate": 2.140310879463374e-06,
      "loss": 0.01,
      "step": 2405620
    },
    {
      "epoch": 3.9368826221009012,
      "grad_norm": 0.2805013954639435,
      "learning_rate": 2.1402449872498566e-06,
      "loss": 0.0101,
      "step": 2405640
    },
    {
      "epoch": 3.936915352539555,
      "grad_norm": 0.1635744720697403,
      "learning_rate": 2.1401790950363398e-06,
      "loss": 0.0117,
      "step": 2405660
    },
    {
      "epoch": 3.936948082978208,
      "grad_norm": 0.25585833191871643,
      "learning_rate": 2.1401132028228225e-06,
      "loss": 0.0102,
      "step": 2405680
    },
    {
      "epoch": 3.9369808134168616,
      "grad_norm": 0.09099126607179642,
      "learning_rate": 2.1400473106093057e-06,
      "loss": 0.0097,
      "step": 2405700
    },
    {
      "epoch": 3.9370135438555147,
      "grad_norm": 0.07373581826686859,
      "learning_rate": 2.1399814183957884e-06,
      "loss": 0.0094,
      "step": 2405720
    },
    {
      "epoch": 3.9370462742941683,
      "grad_norm": 0.519127368927002,
      "learning_rate": 2.139915526182271e-06,
      "loss": 0.0104,
      "step": 2405740
    },
    {
      "epoch": 3.9370790047328215,
      "grad_norm": 0.23019184172153473,
      "learning_rate": 2.1398496339687543e-06,
      "loss": 0.0133,
      "step": 2405760
    },
    {
      "epoch": 3.9371117351714746,
      "grad_norm": 0.43881818652153015,
      "learning_rate": 2.139783741755237e-06,
      "loss": 0.0072,
      "step": 2405780
    },
    {
      "epoch": 3.937144465610128,
      "grad_norm": 0.3361002504825592,
      "learning_rate": 2.13971784954172e-06,
      "loss": 0.0118,
      "step": 2405800
    },
    {
      "epoch": 3.9371771960487814,
      "grad_norm": 0.07773357629776001,
      "learning_rate": 2.1396519573282025e-06,
      "loss": 0.0087,
      "step": 2405820
    },
    {
      "epoch": 3.9372099264874345,
      "grad_norm": 0.1493019461631775,
      "learning_rate": 2.1395860651146853e-06,
      "loss": 0.0127,
      "step": 2405840
    },
    {
      "epoch": 3.937242656926088,
      "grad_norm": 0.31097716093063354,
      "learning_rate": 2.1395201729011684e-06,
      "loss": 0.0095,
      "step": 2405860
    },
    {
      "epoch": 3.9372753873647417,
      "grad_norm": 0.36940184235572815,
      "learning_rate": 2.1394542806876516e-06,
      "loss": 0.0161,
      "step": 2405880
    },
    {
      "epoch": 3.937308117803395,
      "grad_norm": 0.5093036890029907,
      "learning_rate": 2.1393883884741343e-06,
      "loss": 0.015,
      "step": 2405900
    },
    {
      "epoch": 3.937340848242048,
      "grad_norm": 0.13147494196891785,
      "learning_rate": 2.139322496260617e-06,
      "loss": 0.0101,
      "step": 2405920
    },
    {
      "epoch": 3.9373735786807016,
      "grad_norm": 0.20783266425132751,
      "learning_rate": 2.1392566040471e-06,
      "loss": 0.0078,
      "step": 2405940
    },
    {
      "epoch": 3.9374063091193547,
      "grad_norm": 0.33544912934303284,
      "learning_rate": 2.139190711833583e-06,
      "loss": 0.0092,
      "step": 2405960
    },
    {
      "epoch": 3.937439039558008,
      "grad_norm": 0.3202739655971527,
      "learning_rate": 2.1391248196200657e-06,
      "loss": 0.0125,
      "step": 2405980
    },
    {
      "epoch": 3.9374717699966615,
      "grad_norm": 0.17418645322322845,
      "learning_rate": 2.1390589274065485e-06,
      "loss": 0.0089,
      "step": 2406000
    },
    {
      "epoch": 3.937504500435315,
      "grad_norm": 0.10041981190443039,
      "learning_rate": 2.138993035193031e-06,
      "loss": 0.0124,
      "step": 2406020
    },
    {
      "epoch": 3.9375372308739682,
      "grad_norm": 0.9349315166473389,
      "learning_rate": 2.138927142979514e-06,
      "loss": 0.0107,
      "step": 2406040
    },
    {
      "epoch": 3.9375699613126214,
      "grad_norm": 0.8004878163337708,
      "learning_rate": 2.138861250765997e-06,
      "loss": 0.017,
      "step": 2406060
    },
    {
      "epoch": 3.937602691751275,
      "grad_norm": 0.2867076098918915,
      "learning_rate": 2.1387953585524803e-06,
      "loss": 0.0111,
      "step": 2406080
    },
    {
      "epoch": 3.937635422189928,
      "grad_norm": 0.03046276606619358,
      "learning_rate": 2.138729466338963e-06,
      "loss": 0.0096,
      "step": 2406100
    },
    {
      "epoch": 3.9376681526285813,
      "grad_norm": 0.34510764479637146,
      "learning_rate": 2.1386635741254457e-06,
      "loss": 0.0088,
      "step": 2406120
    },
    {
      "epoch": 3.937700883067235,
      "grad_norm": 0.5834279656410217,
      "learning_rate": 2.1385976819119285e-06,
      "loss": 0.0127,
      "step": 2406140
    },
    {
      "epoch": 3.9377336135058885,
      "grad_norm": 0.22183570265769958,
      "learning_rate": 2.1385317896984117e-06,
      "loss": 0.0092,
      "step": 2406160
    },
    {
      "epoch": 3.9377663439445416,
      "grad_norm": 0.19629345834255219,
      "learning_rate": 2.1384658974848944e-06,
      "loss": 0.015,
      "step": 2406180
    },
    {
      "epoch": 3.9377990743831948,
      "grad_norm": 0.33536845445632935,
      "learning_rate": 2.138400005271377e-06,
      "loss": 0.0126,
      "step": 2406200
    },
    {
      "epoch": 3.9378318048218484,
      "grad_norm": 0.16527703404426575,
      "learning_rate": 2.1383341130578603e-06,
      "loss": 0.0106,
      "step": 2406220
    },
    {
      "epoch": 3.9378645352605015,
      "grad_norm": 0.0996830016374588,
      "learning_rate": 2.138268220844343e-06,
      "loss": 0.0051,
      "step": 2406240
    },
    {
      "epoch": 3.9378972656991547,
      "grad_norm": 0.5907093286514282,
      "learning_rate": 2.1382023286308258e-06,
      "loss": 0.0103,
      "step": 2406260
    },
    {
      "epoch": 3.9379299961378083,
      "grad_norm": 0.9467793107032776,
      "learning_rate": 2.138136436417309e-06,
      "loss": 0.0118,
      "step": 2406280
    },
    {
      "epoch": 3.937962726576462,
      "grad_norm": 0.11996981501579285,
      "learning_rate": 2.1380705442037917e-06,
      "loss": 0.0084,
      "step": 2406300
    },
    {
      "epoch": 3.937995457015115,
      "grad_norm": 0.24546049535274506,
      "learning_rate": 2.1380046519902744e-06,
      "loss": 0.0062,
      "step": 2406320
    },
    {
      "epoch": 3.938028187453768,
      "grad_norm": 0.09826409816741943,
      "learning_rate": 2.137938759776757e-06,
      "loss": 0.0105,
      "step": 2406340
    },
    {
      "epoch": 3.9380609178924217,
      "grad_norm": 0.3922070264816284,
      "learning_rate": 2.1378728675632403e-06,
      "loss": 0.0158,
      "step": 2406360
    },
    {
      "epoch": 3.938093648331075,
      "grad_norm": 0.3020980954170227,
      "learning_rate": 2.137806975349723e-06,
      "loss": 0.0096,
      "step": 2406380
    },
    {
      "epoch": 3.938126378769728,
      "grad_norm": 0.11748924106359482,
      "learning_rate": 2.1377410831362062e-06,
      "loss": 0.0198,
      "step": 2406400
    },
    {
      "epoch": 3.9381591092083816,
      "grad_norm": 0.1541634202003479,
      "learning_rate": 2.137675190922689e-06,
      "loss": 0.008,
      "step": 2406420
    },
    {
      "epoch": 3.938191839647035,
      "grad_norm": 0.148887038230896,
      "learning_rate": 2.1376092987091717e-06,
      "loss": 0.0128,
      "step": 2406440
    },
    {
      "epoch": 3.9382245700856884,
      "grad_norm": 0.5210239887237549,
      "learning_rate": 2.1375434064956544e-06,
      "loss": 0.0063,
      "step": 2406460
    },
    {
      "epoch": 3.9382573005243415,
      "grad_norm": 0.22743472456932068,
      "learning_rate": 2.1374775142821376e-06,
      "loss": 0.0116,
      "step": 2406480
    },
    {
      "epoch": 3.938290030962995,
      "grad_norm": 0.10585487633943558,
      "learning_rate": 2.1374116220686203e-06,
      "loss": 0.0074,
      "step": 2406500
    },
    {
      "epoch": 3.9383227614016483,
      "grad_norm": 0.33884483575820923,
      "learning_rate": 2.137345729855103e-06,
      "loss": 0.0107,
      "step": 2406520
    },
    {
      "epoch": 3.9383554918403014,
      "grad_norm": 0.21393094956874847,
      "learning_rate": 2.137279837641586e-06,
      "loss": 0.0077,
      "step": 2406540
    },
    {
      "epoch": 3.938388222278955,
      "grad_norm": 0.09599043428897858,
      "learning_rate": 2.137213945428069e-06,
      "loss": 0.0091,
      "step": 2406560
    },
    {
      "epoch": 3.938420952717608,
      "grad_norm": 0.26152148842811584,
      "learning_rate": 2.1371480532145517e-06,
      "loss": 0.0126,
      "step": 2406580
    },
    {
      "epoch": 3.9384536831562618,
      "grad_norm": 0.1822098195552826,
      "learning_rate": 2.137082161001035e-06,
      "loss": 0.013,
      "step": 2406600
    },
    {
      "epoch": 3.938486413594915,
      "grad_norm": 0.39377620816230774,
      "learning_rate": 2.1370162687875176e-06,
      "loss": 0.0065,
      "step": 2406620
    },
    {
      "epoch": 3.9385191440335685,
      "grad_norm": 0.17131178081035614,
      "learning_rate": 2.1369503765740004e-06,
      "loss": 0.0093,
      "step": 2406640
    },
    {
      "epoch": 3.9385518744722217,
      "grad_norm": 0.16915519535541534,
      "learning_rate": 2.136884484360483e-06,
      "loss": 0.0128,
      "step": 2406660
    },
    {
      "epoch": 3.938584604910875,
      "grad_norm": 0.2572288513183594,
      "learning_rate": 2.1368185921469663e-06,
      "loss": 0.0129,
      "step": 2406680
    },
    {
      "epoch": 3.9386173353495284,
      "grad_norm": 0.2875558137893677,
      "learning_rate": 2.136752699933449e-06,
      "loss": 0.0079,
      "step": 2406700
    },
    {
      "epoch": 3.9386500657881816,
      "grad_norm": 0.14060631394386292,
      "learning_rate": 2.1366868077199318e-06,
      "loss": 0.0096,
      "step": 2406720
    },
    {
      "epoch": 3.938682796226835,
      "grad_norm": 0.4793262183666229,
      "learning_rate": 2.1366209155064145e-06,
      "loss": 0.0132,
      "step": 2406740
    },
    {
      "epoch": 3.9387155266654883,
      "grad_norm": 0.26706111431121826,
      "learning_rate": 2.1365550232928977e-06,
      "loss": 0.0085,
      "step": 2406760
    },
    {
      "epoch": 3.938748257104142,
      "grad_norm": 0.07324227690696716,
      "learning_rate": 2.136489131079381e-06,
      "loss": 0.0091,
      "step": 2406780
    },
    {
      "epoch": 3.938780987542795,
      "grad_norm": 0.11777431517839432,
      "learning_rate": 2.1364232388658636e-06,
      "loss": 0.0072,
      "step": 2406800
    },
    {
      "epoch": 3.938813717981448,
      "grad_norm": 0.07717052847146988,
      "learning_rate": 2.1363573466523463e-06,
      "loss": 0.007,
      "step": 2406820
    },
    {
      "epoch": 3.938846448420102,
      "grad_norm": 0.2080511748790741,
      "learning_rate": 2.136291454438829e-06,
      "loss": 0.0119,
      "step": 2406840
    },
    {
      "epoch": 3.938879178858755,
      "grad_norm": 0.5651553273200989,
      "learning_rate": 2.1362255622253118e-06,
      "loss": 0.0089,
      "step": 2406860
    },
    {
      "epoch": 3.9389119092974085,
      "grad_norm": 0.07151967287063599,
      "learning_rate": 2.136159670011795e-06,
      "loss": 0.0109,
      "step": 2406880
    },
    {
      "epoch": 3.9389446397360617,
      "grad_norm": 0.21192654967308044,
      "learning_rate": 2.1360937777982777e-06,
      "loss": 0.0146,
      "step": 2406900
    },
    {
      "epoch": 3.9389773701747153,
      "grad_norm": 0.07842162996530533,
      "learning_rate": 2.1360278855847604e-06,
      "loss": 0.0112,
      "step": 2406920
    },
    {
      "epoch": 3.9390101006133684,
      "grad_norm": 0.1655455380678177,
      "learning_rate": 2.1359619933712436e-06,
      "loss": 0.0098,
      "step": 2406940
    },
    {
      "epoch": 3.9390428310520216,
      "grad_norm": 0.10055264830589294,
      "learning_rate": 2.1358961011577263e-06,
      "loss": 0.0105,
      "step": 2406960
    },
    {
      "epoch": 3.939075561490675,
      "grad_norm": 0.2510421574115753,
      "learning_rate": 2.1358302089442095e-06,
      "loss": 0.0133,
      "step": 2406980
    },
    {
      "epoch": 3.9391082919293283,
      "grad_norm": 0.26236245036125183,
      "learning_rate": 2.1357643167306922e-06,
      "loss": 0.0074,
      "step": 2407000
    },
    {
      "epoch": 3.939141022367982,
      "grad_norm": 0.45118334889411926,
      "learning_rate": 2.135698424517175e-06,
      "loss": 0.0111,
      "step": 2407020
    },
    {
      "epoch": 3.939173752806635,
      "grad_norm": 0.4022664427757263,
      "learning_rate": 2.1356325323036577e-06,
      "loss": 0.0082,
      "step": 2407040
    },
    {
      "epoch": 3.9392064832452887,
      "grad_norm": 0.28643107414245605,
      "learning_rate": 2.1355666400901404e-06,
      "loss": 0.0074,
      "step": 2407060
    },
    {
      "epoch": 3.939239213683942,
      "grad_norm": 0.20829379558563232,
      "learning_rate": 2.1355007478766236e-06,
      "loss": 0.0104,
      "step": 2407080
    },
    {
      "epoch": 3.939271944122595,
      "grad_norm": 0.1496381163597107,
      "learning_rate": 2.1354348556631068e-06,
      "loss": 0.0085,
      "step": 2407100
    },
    {
      "epoch": 3.9393046745612486,
      "grad_norm": 0.17761576175689697,
      "learning_rate": 2.1353689634495895e-06,
      "loss": 0.0097,
      "step": 2407120
    },
    {
      "epoch": 3.9393374049999017,
      "grad_norm": 0.5668264627456665,
      "learning_rate": 2.1353030712360723e-06,
      "loss": 0.015,
      "step": 2407140
    },
    {
      "epoch": 3.9393701354385553,
      "grad_norm": 0.1929258108139038,
      "learning_rate": 2.135237179022555e-06,
      "loss": 0.0137,
      "step": 2407160
    },
    {
      "epoch": 3.9394028658772084,
      "grad_norm": 0.4489424228668213,
      "learning_rate": 2.135171286809038e-06,
      "loss": 0.0149,
      "step": 2407180
    },
    {
      "epoch": 3.939435596315862,
      "grad_norm": 0.15883313119411469,
      "learning_rate": 2.135105394595521e-06,
      "loss": 0.0105,
      "step": 2407200
    },
    {
      "epoch": 3.939468326754515,
      "grad_norm": 0.15110307931900024,
      "learning_rate": 2.1350395023820036e-06,
      "loss": 0.0074,
      "step": 2407220
    },
    {
      "epoch": 3.9395010571931683,
      "grad_norm": 0.464135080575943,
      "learning_rate": 2.1349736101684864e-06,
      "loss": 0.0088,
      "step": 2407240
    },
    {
      "epoch": 3.939533787631822,
      "grad_norm": 0.46725478768348694,
      "learning_rate": 2.134907717954969e-06,
      "loss": 0.011,
      "step": 2407260
    },
    {
      "epoch": 3.939566518070475,
      "grad_norm": 0.26445242762565613,
      "learning_rate": 2.1348418257414523e-06,
      "loss": 0.0138,
      "step": 2407280
    },
    {
      "epoch": 3.9395992485091287,
      "grad_norm": 0.6195948123931885,
      "learning_rate": 2.1347759335279354e-06,
      "loss": 0.0132,
      "step": 2407300
    },
    {
      "epoch": 3.939631978947782,
      "grad_norm": 0.5724092125892639,
      "learning_rate": 2.134710041314418e-06,
      "loss": 0.0108,
      "step": 2407320
    },
    {
      "epoch": 3.9396647093864354,
      "grad_norm": 0.7403793334960938,
      "learning_rate": 2.134644149100901e-06,
      "loss": 0.0149,
      "step": 2407340
    },
    {
      "epoch": 3.9396974398250886,
      "grad_norm": 0.631667971611023,
      "learning_rate": 2.1345782568873837e-06,
      "loss": 0.0087,
      "step": 2407360
    },
    {
      "epoch": 3.9397301702637417,
      "grad_norm": 0.08407144993543625,
      "learning_rate": 2.134512364673867e-06,
      "loss": 0.0098,
      "step": 2407380
    },
    {
      "epoch": 3.9397629007023953,
      "grad_norm": 0.4609787166118622,
      "learning_rate": 2.1344464724603496e-06,
      "loss": 0.0126,
      "step": 2407400
    },
    {
      "epoch": 3.9397956311410485,
      "grad_norm": 0.17568697035312653,
      "learning_rate": 2.1343805802468323e-06,
      "loss": 0.0096,
      "step": 2407420
    },
    {
      "epoch": 3.9398283615797016,
      "grad_norm": Infinity,
      "learning_rate": 2.134314688033315e-06,
      "loss": 0.0082,
      "step": 2407440
    },
    {
      "epoch": 3.939861092018355,
      "grad_norm": 0.21620087325572968,
      "learning_rate": 2.134248795819798e-06,
      "loss": 0.0115,
      "step": 2407460
    },
    {
      "epoch": 3.939893822457009,
      "grad_norm": 0.21002940833568573,
      "learning_rate": 2.134182903606281e-06,
      "loss": 0.0112,
      "step": 2407480
    },
    {
      "epoch": 3.939926552895662,
      "grad_norm": 0.05194038525223732,
      "learning_rate": 2.134117011392764e-06,
      "loss": 0.0124,
      "step": 2407500
    },
    {
      "epoch": 3.939959283334315,
      "grad_norm": 0.07438254356384277,
      "learning_rate": 2.134051119179247e-06,
      "loss": 0.0101,
      "step": 2407520
    },
    {
      "epoch": 3.9399920137729687,
      "grad_norm": 0.3557170629501343,
      "learning_rate": 2.1339852269657296e-06,
      "loss": 0.0089,
      "step": 2407540
    },
    {
      "epoch": 3.940024744211622,
      "grad_norm": 0.2419935017824173,
      "learning_rate": 2.1339193347522123e-06,
      "loss": 0.0137,
      "step": 2407560
    },
    {
      "epoch": 3.940057474650275,
      "grad_norm": 0.10348562151193619,
      "learning_rate": 2.1338534425386955e-06,
      "loss": 0.0095,
      "step": 2407580
    },
    {
      "epoch": 3.9400902050889286,
      "grad_norm": 0.47111639380455017,
      "learning_rate": 2.1337875503251782e-06,
      "loss": 0.0147,
      "step": 2407600
    },
    {
      "epoch": 3.940122935527582,
      "grad_norm": 0.17547790706157684,
      "learning_rate": 2.133721658111661e-06,
      "loss": 0.016,
      "step": 2407620
    },
    {
      "epoch": 3.9401556659662353,
      "grad_norm": 0.06712890416383743,
      "learning_rate": 2.133655765898144e-06,
      "loss": 0.0097,
      "step": 2407640
    },
    {
      "epoch": 3.9401883964048885,
      "grad_norm": 0.32373470067977905,
      "learning_rate": 2.133589873684627e-06,
      "loss": 0.0112,
      "step": 2407660
    },
    {
      "epoch": 3.940221126843542,
      "grad_norm": 0.21046586334705353,
      "learning_rate": 2.1335239814711096e-06,
      "loss": 0.0113,
      "step": 2407680
    },
    {
      "epoch": 3.9402538572821952,
      "grad_norm": 0.38280734419822693,
      "learning_rate": 2.1334580892575928e-06,
      "loss": 0.0104,
      "step": 2407700
    },
    {
      "epoch": 3.9402865877208484,
      "grad_norm": 0.11024603247642517,
      "learning_rate": 2.1333921970440755e-06,
      "loss": 0.0132,
      "step": 2407720
    },
    {
      "epoch": 3.940319318159502,
      "grad_norm": 1.051310420036316,
      "learning_rate": 2.1333263048305583e-06,
      "loss": 0.0157,
      "step": 2407740
    },
    {
      "epoch": 3.9403520485981556,
      "grad_norm": 0.4997955858707428,
      "learning_rate": 2.133260412617041e-06,
      "loss": 0.0132,
      "step": 2407760
    },
    {
      "epoch": 3.9403847790368087,
      "grad_norm": 0.15492019057273865,
      "learning_rate": 2.133194520403524e-06,
      "loss": 0.0115,
      "step": 2407780
    },
    {
      "epoch": 3.940417509475462,
      "grad_norm": 0.5493330359458923,
      "learning_rate": 2.133128628190007e-06,
      "loss": 0.0084,
      "step": 2407800
    },
    {
      "epoch": 3.9404502399141155,
      "grad_norm": 0.23629377782344818,
      "learning_rate": 2.13306273597649e-06,
      "loss": 0.0136,
      "step": 2407820
    },
    {
      "epoch": 3.9404829703527686,
      "grad_norm": 0.2374749630689621,
      "learning_rate": 2.132996843762973e-06,
      "loss": 0.0084,
      "step": 2407840
    },
    {
      "epoch": 3.9405157007914218,
      "grad_norm": 0.0956132709980011,
      "learning_rate": 2.1329309515494555e-06,
      "loss": 0.0115,
      "step": 2407860
    },
    {
      "epoch": 3.9405484312300754,
      "grad_norm": 0.1323738545179367,
      "learning_rate": 2.1328650593359383e-06,
      "loss": 0.0128,
      "step": 2407880
    },
    {
      "epoch": 3.940581161668729,
      "grad_norm": 0.2899695932865143,
      "learning_rate": 2.1327991671224214e-06,
      "loss": 0.0073,
      "step": 2407900
    },
    {
      "epoch": 3.940613892107382,
      "grad_norm": 0.7045188546180725,
      "learning_rate": 2.132733274908904e-06,
      "loss": 0.0088,
      "step": 2407920
    },
    {
      "epoch": 3.9406466225460353,
      "grad_norm": 0.5272842645645142,
      "learning_rate": 2.132667382695387e-06,
      "loss": 0.0129,
      "step": 2407940
    },
    {
      "epoch": 3.940679352984689,
      "grad_norm": 0.38337427377700806,
      "learning_rate": 2.1326014904818697e-06,
      "loss": 0.0111,
      "step": 2407960
    },
    {
      "epoch": 3.940712083423342,
      "grad_norm": 0.8932709693908691,
      "learning_rate": 2.132535598268353e-06,
      "loss": 0.0139,
      "step": 2407980
    },
    {
      "epoch": 3.940744813861995,
      "grad_norm": 0.3739500641822815,
      "learning_rate": 2.1324697060548356e-06,
      "loss": 0.0137,
      "step": 2408000
    },
    {
      "epoch": 3.9407775443006487,
      "grad_norm": 0.054317545145750046,
      "learning_rate": 2.1324038138413187e-06,
      "loss": 0.0072,
      "step": 2408020
    },
    {
      "epoch": 3.940810274739302,
      "grad_norm": 0.18659625947475433,
      "learning_rate": 2.1323379216278015e-06,
      "loss": 0.0097,
      "step": 2408040
    },
    {
      "epoch": 3.9408430051779555,
      "grad_norm": 0.4290953278541565,
      "learning_rate": 2.132272029414284e-06,
      "loss": 0.011,
      "step": 2408060
    },
    {
      "epoch": 3.9408757356166086,
      "grad_norm": 0.5512735843658447,
      "learning_rate": 2.132206137200767e-06,
      "loss": 0.0102,
      "step": 2408080
    },
    {
      "epoch": 3.9409084660552622,
      "grad_norm": 0.06723731756210327,
      "learning_rate": 2.13214024498725e-06,
      "loss": 0.0087,
      "step": 2408100
    },
    {
      "epoch": 3.9409411964939154,
      "grad_norm": 0.041923437267541885,
      "learning_rate": 2.132074352773733e-06,
      "loss": 0.0068,
      "step": 2408120
    },
    {
      "epoch": 3.9409739269325685,
      "grad_norm": 0.3626430928707123,
      "learning_rate": 2.1320084605602156e-06,
      "loss": 0.0165,
      "step": 2408140
    },
    {
      "epoch": 3.941006657371222,
      "grad_norm": 0.3722633719444275,
      "learning_rate": 2.1319425683466988e-06,
      "loss": 0.0096,
      "step": 2408160
    },
    {
      "epoch": 3.9410393878098753,
      "grad_norm": 0.08672120422124863,
      "learning_rate": 2.1318766761331815e-06,
      "loss": 0.0109,
      "step": 2408180
    },
    {
      "epoch": 3.941072118248529,
      "grad_norm": 0.24510127305984497,
      "learning_rate": 2.1318107839196647e-06,
      "loss": 0.011,
      "step": 2408200
    },
    {
      "epoch": 3.941104848687182,
      "grad_norm": 0.43110671639442444,
      "learning_rate": 2.1317448917061474e-06,
      "loss": 0.0126,
      "step": 2408220
    },
    {
      "epoch": 3.9411375791258356,
      "grad_norm": 1.1155017614364624,
      "learning_rate": 2.13167899949263e-06,
      "loss": 0.0113,
      "step": 2408240
    },
    {
      "epoch": 3.9411703095644888,
      "grad_norm": 0.26572123169898987,
      "learning_rate": 2.131613107279113e-06,
      "loss": 0.0144,
      "step": 2408260
    },
    {
      "epoch": 3.941203040003142,
      "grad_norm": 0.31269359588623047,
      "learning_rate": 2.1315472150655956e-06,
      "loss": 0.0093,
      "step": 2408280
    },
    {
      "epoch": 3.9412357704417955,
      "grad_norm": 0.5307633876800537,
      "learning_rate": 2.1314813228520788e-06,
      "loss": 0.0109,
      "step": 2408300
    },
    {
      "epoch": 3.9412685008804487,
      "grad_norm": 0.3772070109844208,
      "learning_rate": 2.1314154306385615e-06,
      "loss": 0.009,
      "step": 2408320
    },
    {
      "epoch": 3.9413012313191023,
      "grad_norm": 0.271431565284729,
      "learning_rate": 2.1313495384250447e-06,
      "loss": 0.0107,
      "step": 2408340
    },
    {
      "epoch": 3.9413339617577554,
      "grad_norm": 0.18592923879623413,
      "learning_rate": 2.1312836462115274e-06,
      "loss": 0.0087,
      "step": 2408360
    },
    {
      "epoch": 3.941366692196409,
      "grad_norm": 0.061353765428066254,
      "learning_rate": 2.13121775399801e-06,
      "loss": 0.0124,
      "step": 2408380
    },
    {
      "epoch": 3.941399422635062,
      "grad_norm": 0.17408542335033417,
      "learning_rate": 2.1311518617844933e-06,
      "loss": 0.0106,
      "step": 2408400
    },
    {
      "epoch": 3.9414321530737153,
      "grad_norm": 0.3603779971599579,
      "learning_rate": 2.131085969570976e-06,
      "loss": 0.0114,
      "step": 2408420
    },
    {
      "epoch": 3.941464883512369,
      "grad_norm": 0.1881638616323471,
      "learning_rate": 2.131020077357459e-06,
      "loss": 0.0097,
      "step": 2408440
    },
    {
      "epoch": 3.941497613951022,
      "grad_norm": 0.2974518835544586,
      "learning_rate": 2.1309541851439415e-06,
      "loss": 0.0127,
      "step": 2408460
    },
    {
      "epoch": 3.9415303443896756,
      "grad_norm": 0.7362555265426636,
      "learning_rate": 2.1308882929304243e-06,
      "loss": 0.0126,
      "step": 2408480
    },
    {
      "epoch": 3.941563074828329,
      "grad_norm": 0.37740835547447205,
      "learning_rate": 2.1308224007169074e-06,
      "loss": 0.0084,
      "step": 2408500
    },
    {
      "epoch": 3.9415958052669824,
      "grad_norm": 0.14126795530319214,
      "learning_rate": 2.1307565085033906e-06,
      "loss": 0.0106,
      "step": 2408520
    },
    {
      "epoch": 3.9416285357056355,
      "grad_norm": 1.690731167793274,
      "learning_rate": 2.1306906162898733e-06,
      "loss": 0.0139,
      "step": 2408540
    },
    {
      "epoch": 3.9416612661442887,
      "grad_norm": 0.6132060885429382,
      "learning_rate": 2.130624724076356e-06,
      "loss": 0.0092,
      "step": 2408560
    },
    {
      "epoch": 3.9416939965829423,
      "grad_norm": 0.21354688704013824,
      "learning_rate": 2.130558831862839e-06,
      "loss": 0.0074,
      "step": 2408580
    },
    {
      "epoch": 3.9417267270215954,
      "grad_norm": 0.10471440106630325,
      "learning_rate": 2.130492939649322e-06,
      "loss": 0.0096,
      "step": 2408600
    },
    {
      "epoch": 3.941759457460249,
      "grad_norm": 0.15250429511070251,
      "learning_rate": 2.1304270474358047e-06,
      "loss": 0.0125,
      "step": 2408620
    },
    {
      "epoch": 3.941792187898902,
      "grad_norm": 0.1567230075597763,
      "learning_rate": 2.1303611552222875e-06,
      "loss": 0.0114,
      "step": 2408640
    },
    {
      "epoch": 3.9418249183375558,
      "grad_norm": 0.15378978848457336,
      "learning_rate": 2.1302952630087702e-06,
      "loss": 0.0105,
      "step": 2408660
    },
    {
      "epoch": 3.941857648776209,
      "grad_norm": 0.35588371753692627,
      "learning_rate": 2.1302293707952534e-06,
      "loss": 0.009,
      "step": 2408680
    },
    {
      "epoch": 3.941890379214862,
      "grad_norm": 0.2541998028755188,
      "learning_rate": 2.130163478581736e-06,
      "loss": 0.0168,
      "step": 2408700
    },
    {
      "epoch": 3.9419231096535157,
      "grad_norm": 0.1271667629480362,
      "learning_rate": 2.1300975863682193e-06,
      "loss": 0.014,
      "step": 2408720
    },
    {
      "epoch": 3.941955840092169,
      "grad_norm": 0.32753580808639526,
      "learning_rate": 2.130031694154702e-06,
      "loss": 0.0087,
      "step": 2408740
    },
    {
      "epoch": 3.9419885705308224,
      "grad_norm": 0.15164531767368317,
      "learning_rate": 2.1299658019411848e-06,
      "loss": 0.011,
      "step": 2408760
    },
    {
      "epoch": 3.9420213009694756,
      "grad_norm": 0.13485269248485565,
      "learning_rate": 2.1298999097276675e-06,
      "loss": 0.0081,
      "step": 2408780
    },
    {
      "epoch": 3.942054031408129,
      "grad_norm": 0.19299013912677765,
      "learning_rate": 2.1298340175141507e-06,
      "loss": 0.0101,
      "step": 2408800
    },
    {
      "epoch": 3.9420867618467823,
      "grad_norm": 0.25288209319114685,
      "learning_rate": 2.1297681253006334e-06,
      "loss": 0.0095,
      "step": 2408820
    },
    {
      "epoch": 3.9421194922854355,
      "grad_norm": 0.20469874143600464,
      "learning_rate": 2.129702233087116e-06,
      "loss": 0.01,
      "step": 2408840
    },
    {
      "epoch": 3.942152222724089,
      "grad_norm": 0.17606274783611298,
      "learning_rate": 2.1296363408735993e-06,
      "loss": 0.0081,
      "step": 2408860
    },
    {
      "epoch": 3.942184953162742,
      "grad_norm": 0.19525834918022156,
      "learning_rate": 2.129570448660082e-06,
      "loss": 0.0109,
      "step": 2408880
    },
    {
      "epoch": 3.9422176836013954,
      "grad_norm": 0.24652442336082458,
      "learning_rate": 2.1295045564465648e-06,
      "loss": 0.0084,
      "step": 2408900
    },
    {
      "epoch": 3.942250414040049,
      "grad_norm": 0.1378096044063568,
      "learning_rate": 2.129438664233048e-06,
      "loss": 0.0084,
      "step": 2408920
    },
    {
      "epoch": 3.9422831444787025,
      "grad_norm": 0.2555832862854004,
      "learning_rate": 2.1293727720195307e-06,
      "loss": 0.0091,
      "step": 2408940
    },
    {
      "epoch": 3.9423158749173557,
      "grad_norm": 0.18266062438488007,
      "learning_rate": 2.1293068798060134e-06,
      "loss": 0.0072,
      "step": 2408960
    },
    {
      "epoch": 3.942348605356009,
      "grad_norm": 0.1441957801580429,
      "learning_rate": 2.129240987592496e-06,
      "loss": 0.0134,
      "step": 2408980
    },
    {
      "epoch": 3.9423813357946624,
      "grad_norm": 2.2083921432495117,
      "learning_rate": 2.1291750953789793e-06,
      "loss": 0.0135,
      "step": 2409000
    },
    {
      "epoch": 3.9424140662333156,
      "grad_norm": 0.12459667026996613,
      "learning_rate": 2.129109203165462e-06,
      "loss": 0.0112,
      "step": 2409020
    },
    {
      "epoch": 3.9424467966719687,
      "grad_norm": 0.24177071452140808,
      "learning_rate": 2.1290433109519452e-06,
      "loss": 0.0131,
      "step": 2409040
    },
    {
      "epoch": 3.9424795271106223,
      "grad_norm": 0.21525390446186066,
      "learning_rate": 2.128977418738428e-06,
      "loss": 0.0103,
      "step": 2409060
    },
    {
      "epoch": 3.942512257549276,
      "grad_norm": 0.22737795114517212,
      "learning_rate": 2.1289115265249107e-06,
      "loss": 0.0151,
      "step": 2409080
    },
    {
      "epoch": 3.942544987987929,
      "grad_norm": 0.8688539266586304,
      "learning_rate": 2.1288456343113935e-06,
      "loss": 0.0158,
      "step": 2409100
    },
    {
      "epoch": 3.9425777184265822,
      "grad_norm": 0.2876633107662201,
      "learning_rate": 2.1287797420978766e-06,
      "loss": 0.0105,
      "step": 2409120
    },
    {
      "epoch": 3.942610448865236,
      "grad_norm": 0.16210505366325378,
      "learning_rate": 2.1287138498843594e-06,
      "loss": 0.0101,
      "step": 2409140
    },
    {
      "epoch": 3.942643179303889,
      "grad_norm": 0.28193169832229614,
      "learning_rate": 2.128647957670842e-06,
      "loss": 0.0131,
      "step": 2409160
    },
    {
      "epoch": 3.942675909742542,
      "grad_norm": 0.17299526929855347,
      "learning_rate": 2.128582065457325e-06,
      "loss": 0.0102,
      "step": 2409180
    },
    {
      "epoch": 3.9427086401811957,
      "grad_norm": 0.5327468514442444,
      "learning_rate": 2.128516173243808e-06,
      "loss": 0.0149,
      "step": 2409200
    },
    {
      "epoch": 3.9427413706198493,
      "grad_norm": 0.23281043767929077,
      "learning_rate": 2.1284502810302907e-06,
      "loss": 0.0109,
      "step": 2409220
    },
    {
      "epoch": 3.9427741010585025,
      "grad_norm": 0.20444490015506744,
      "learning_rate": 2.128384388816774e-06,
      "loss": 0.0098,
      "step": 2409240
    },
    {
      "epoch": 3.9428068314971556,
      "grad_norm": 0.250374972820282,
      "learning_rate": 2.1283184966032566e-06,
      "loss": 0.0102,
      "step": 2409260
    },
    {
      "epoch": 3.942839561935809,
      "grad_norm": 0.22274748980998993,
      "learning_rate": 2.1282526043897394e-06,
      "loss": 0.0095,
      "step": 2409280
    },
    {
      "epoch": 3.9428722923744624,
      "grad_norm": 0.05640847235918045,
      "learning_rate": 2.128186712176222e-06,
      "loss": 0.0083,
      "step": 2409300
    },
    {
      "epoch": 3.9429050228131155,
      "grad_norm": 0.19119572639465332,
      "learning_rate": 2.1281208199627053e-06,
      "loss": 0.0128,
      "step": 2409320
    },
    {
      "epoch": 3.942937753251769,
      "grad_norm": 0.12981796264648438,
      "learning_rate": 2.128054927749188e-06,
      "loss": 0.0092,
      "step": 2409340
    },
    {
      "epoch": 3.9429704836904227,
      "grad_norm": 0.15906035900115967,
      "learning_rate": 2.1279890355356708e-06,
      "loss": 0.0065,
      "step": 2409360
    },
    {
      "epoch": 3.943003214129076,
      "grad_norm": 0.20593784749507904,
      "learning_rate": 2.1279231433221535e-06,
      "loss": 0.0109,
      "step": 2409380
    },
    {
      "epoch": 3.943035944567729,
      "grad_norm": 0.23017117381095886,
      "learning_rate": 2.1278572511086367e-06,
      "loss": 0.0091,
      "step": 2409400
    },
    {
      "epoch": 3.9430686750063826,
      "grad_norm": 0.2943960726261139,
      "learning_rate": 2.12779135889512e-06,
      "loss": 0.0094,
      "step": 2409420
    },
    {
      "epoch": 3.9431014054450357,
      "grad_norm": 0.0478418730199337,
      "learning_rate": 2.1277254666816026e-06,
      "loss": 0.0112,
      "step": 2409440
    },
    {
      "epoch": 3.943134135883689,
      "grad_norm": 0.6581411361694336,
      "learning_rate": 2.1276595744680853e-06,
      "loss": 0.0113,
      "step": 2409460
    },
    {
      "epoch": 3.9431668663223425,
      "grad_norm": 0.16468031704425812,
      "learning_rate": 2.127593682254568e-06,
      "loss": 0.0071,
      "step": 2409480
    },
    {
      "epoch": 3.9431995967609956,
      "grad_norm": 0.4766317903995514,
      "learning_rate": 2.1275277900410508e-06,
      "loss": 0.0128,
      "step": 2409500
    },
    {
      "epoch": 3.9432323271996492,
      "grad_norm": 0.29664507508277893,
      "learning_rate": 2.127461897827534e-06,
      "loss": 0.0076,
      "step": 2409520
    },
    {
      "epoch": 3.9432650576383024,
      "grad_norm": 0.5387241840362549,
      "learning_rate": 2.1273960056140167e-06,
      "loss": 0.009,
      "step": 2409540
    },
    {
      "epoch": 3.943297788076956,
      "grad_norm": 0.4684253931045532,
      "learning_rate": 2.1273301134005e-06,
      "loss": 0.0085,
      "step": 2409560
    },
    {
      "epoch": 3.943330518515609,
      "grad_norm": 0.32246458530426025,
      "learning_rate": 2.1272642211869826e-06,
      "loss": 0.0078,
      "step": 2409580
    },
    {
      "epoch": 3.9433632489542623,
      "grad_norm": 0.4000692665576935,
      "learning_rate": 2.1271983289734653e-06,
      "loss": 0.0133,
      "step": 2409600
    },
    {
      "epoch": 3.943395979392916,
      "grad_norm": 0.17304636538028717,
      "learning_rate": 2.1271324367599485e-06,
      "loss": 0.0099,
      "step": 2409620
    },
    {
      "epoch": 3.943428709831569,
      "grad_norm": 0.061160411685705185,
      "learning_rate": 2.1270665445464312e-06,
      "loss": 0.0111,
      "step": 2409640
    },
    {
      "epoch": 3.9434614402702226,
      "grad_norm": 0.11497436463832855,
      "learning_rate": 2.127000652332914e-06,
      "loss": 0.0084,
      "step": 2409660
    },
    {
      "epoch": 3.9434941707088758,
      "grad_norm": 0.4098432958126068,
      "learning_rate": 2.1269347601193967e-06,
      "loss": 0.0119,
      "step": 2409680
    },
    {
      "epoch": 3.9435269011475294,
      "grad_norm": 0.512890100479126,
      "learning_rate": 2.1268688679058795e-06,
      "loss": 0.0136,
      "step": 2409700
    },
    {
      "epoch": 3.9435596315861825,
      "grad_norm": 0.3310408294200897,
      "learning_rate": 2.1268029756923626e-06,
      "loss": 0.0146,
      "step": 2409720
    },
    {
      "epoch": 3.9435923620248357,
      "grad_norm": 0.20556311309337616,
      "learning_rate": 2.1267370834788458e-06,
      "loss": 0.0099,
      "step": 2409740
    },
    {
      "epoch": 3.9436250924634892,
      "grad_norm": 0.19014041125774384,
      "learning_rate": 2.1266711912653285e-06,
      "loss": 0.0119,
      "step": 2409760
    },
    {
      "epoch": 3.9436578229021424,
      "grad_norm": 0.7209887504577637,
      "learning_rate": 2.1266052990518113e-06,
      "loss": 0.0138,
      "step": 2409780
    },
    {
      "epoch": 3.943690553340796,
      "grad_norm": 0.07058832049369812,
      "learning_rate": 2.126539406838294e-06,
      "loss": 0.0106,
      "step": 2409800
    },
    {
      "epoch": 3.943723283779449,
      "grad_norm": 0.26922938227653503,
      "learning_rate": 2.126473514624777e-06,
      "loss": 0.0105,
      "step": 2409820
    },
    {
      "epoch": 3.9437560142181027,
      "grad_norm": 0.20107901096343994,
      "learning_rate": 2.12640762241126e-06,
      "loss": 0.0125,
      "step": 2409840
    },
    {
      "epoch": 3.943788744656756,
      "grad_norm": 0.17770196497440338,
      "learning_rate": 2.1263417301977426e-06,
      "loss": 0.0072,
      "step": 2409860
    },
    {
      "epoch": 3.943821475095409,
      "grad_norm": 0.06082121655344963,
      "learning_rate": 2.1262758379842254e-06,
      "loss": 0.0082,
      "step": 2409880
    },
    {
      "epoch": 3.9438542055340626,
      "grad_norm": 0.11944764852523804,
      "learning_rate": 2.126209945770708e-06,
      "loss": 0.0098,
      "step": 2409900
    },
    {
      "epoch": 3.943886935972716,
      "grad_norm": 0.5763804316520691,
      "learning_rate": 2.1261440535571913e-06,
      "loss": 0.0095,
      "step": 2409920
    },
    {
      "epoch": 3.9439196664113694,
      "grad_norm": 0.20583705604076385,
      "learning_rate": 2.1260781613436744e-06,
      "loss": 0.0121,
      "step": 2409940
    },
    {
      "epoch": 3.9439523968500225,
      "grad_norm": 0.19873468577861786,
      "learning_rate": 2.126012269130157e-06,
      "loss": 0.0105,
      "step": 2409960
    },
    {
      "epoch": 3.943985127288676,
      "grad_norm": 0.3069004714488983,
      "learning_rate": 2.12594637691664e-06,
      "loss": 0.0094,
      "step": 2409980
    },
    {
      "epoch": 3.9440178577273293,
      "grad_norm": 0.05327359959483147,
      "learning_rate": 2.1258804847031227e-06,
      "loss": 0.0124,
      "step": 2410000
    },
    {
      "epoch": 3.9440505881659824,
      "grad_norm": 0.12563659250736237,
      "learning_rate": 2.125814592489606e-06,
      "loss": 0.0136,
      "step": 2410020
    },
    {
      "epoch": 3.944083318604636,
      "grad_norm": 0.22040557861328125,
      "learning_rate": 2.1257487002760886e-06,
      "loss": 0.0118,
      "step": 2410040
    },
    {
      "epoch": 3.944116049043289,
      "grad_norm": 0.28774893283843994,
      "learning_rate": 2.1256828080625713e-06,
      "loss": 0.013,
      "step": 2410060
    },
    {
      "epoch": 3.9441487794819428,
      "grad_norm": 0.06604109704494476,
      "learning_rate": 2.125616915849054e-06,
      "loss": 0.0133,
      "step": 2410080
    },
    {
      "epoch": 3.944181509920596,
      "grad_norm": 0.25035762786865234,
      "learning_rate": 2.1255510236355372e-06,
      "loss": 0.0102,
      "step": 2410100
    },
    {
      "epoch": 3.9442142403592495,
      "grad_norm": 0.3976464867591858,
      "learning_rate": 2.12548513142202e-06,
      "loss": 0.0121,
      "step": 2410120
    },
    {
      "epoch": 3.9442469707979027,
      "grad_norm": 0.22509624063968658,
      "learning_rate": 2.125419239208503e-06,
      "loss": 0.0095,
      "step": 2410140
    },
    {
      "epoch": 3.944279701236556,
      "grad_norm": 0.28532347083091736,
      "learning_rate": 2.125353346994986e-06,
      "loss": 0.0085,
      "step": 2410160
    },
    {
      "epoch": 3.9443124316752094,
      "grad_norm": 0.7158549427986145,
      "learning_rate": 2.1252874547814686e-06,
      "loss": 0.0142,
      "step": 2410180
    },
    {
      "epoch": 3.9443451621138625,
      "grad_norm": 0.4160730838775635,
      "learning_rate": 2.1252215625679513e-06,
      "loss": 0.0111,
      "step": 2410200
    },
    {
      "epoch": 3.944377892552516,
      "grad_norm": 0.3180283010005951,
      "learning_rate": 2.1251556703544345e-06,
      "loss": 0.0095,
      "step": 2410220
    },
    {
      "epoch": 3.9444106229911693,
      "grad_norm": 0.14337731897830963,
      "learning_rate": 2.1250897781409172e-06,
      "loss": 0.0123,
      "step": 2410240
    },
    {
      "epoch": 3.944443353429823,
      "grad_norm": 0.5589697957038879,
      "learning_rate": 2.1250238859274e-06,
      "loss": 0.0094,
      "step": 2410260
    },
    {
      "epoch": 3.944476083868476,
      "grad_norm": 0.17019374668598175,
      "learning_rate": 2.124957993713883e-06,
      "loss": 0.0202,
      "step": 2410280
    },
    {
      "epoch": 3.944508814307129,
      "grad_norm": 0.7116612792015076,
      "learning_rate": 2.124892101500366e-06,
      "loss": 0.0159,
      "step": 2410300
    },
    {
      "epoch": 3.944541544745783,
      "grad_norm": 0.31056490540504456,
      "learning_rate": 2.1248262092868486e-06,
      "loss": 0.0079,
      "step": 2410320
    },
    {
      "epoch": 3.944574275184436,
      "grad_norm": 0.3635486364364624,
      "learning_rate": 2.1247603170733318e-06,
      "loss": 0.0113,
      "step": 2410340
    },
    {
      "epoch": 3.9446070056230895,
      "grad_norm": 0.21340064704418182,
      "learning_rate": 2.1246944248598145e-06,
      "loss": 0.0127,
      "step": 2410360
    },
    {
      "epoch": 3.9446397360617427,
      "grad_norm": 0.19013413786888123,
      "learning_rate": 2.1246285326462973e-06,
      "loss": 0.0081,
      "step": 2410380
    },
    {
      "epoch": 3.9446724665003963,
      "grad_norm": 0.4893626868724823,
      "learning_rate": 2.12456264043278e-06,
      "loss": 0.0182,
      "step": 2410400
    },
    {
      "epoch": 3.9447051969390494,
      "grad_norm": 0.6197115182876587,
      "learning_rate": 2.124496748219263e-06,
      "loss": 0.015,
      "step": 2410420
    },
    {
      "epoch": 3.9447379273777026,
      "grad_norm": 0.33462944626808167,
      "learning_rate": 2.124430856005746e-06,
      "loss": 0.009,
      "step": 2410440
    },
    {
      "epoch": 3.944770657816356,
      "grad_norm": 0.17886221408843994,
      "learning_rate": 2.124364963792229e-06,
      "loss": 0.011,
      "step": 2410460
    },
    {
      "epoch": 3.9448033882550093,
      "grad_norm": 0.03664784133434296,
      "learning_rate": 2.124299071578712e-06,
      "loss": 0.012,
      "step": 2410480
    },
    {
      "epoch": 3.9448361186936625,
      "grad_norm": 0.0815492570400238,
      "learning_rate": 2.1242331793651946e-06,
      "loss": 0.0137,
      "step": 2410500
    },
    {
      "epoch": 3.944868849132316,
      "grad_norm": 0.13500641286373138,
      "learning_rate": 2.1241672871516773e-06,
      "loss": 0.0096,
      "step": 2410520
    },
    {
      "epoch": 3.9449015795709697,
      "grad_norm": 0.07773566991090775,
      "learning_rate": 2.1241013949381605e-06,
      "loss": 0.0111,
      "step": 2410540
    },
    {
      "epoch": 3.944934310009623,
      "grad_norm": 0.2948663532733917,
      "learning_rate": 2.124035502724643e-06,
      "loss": 0.0093,
      "step": 2410560
    },
    {
      "epoch": 3.944967040448276,
      "grad_norm": 0.5949100852012634,
      "learning_rate": 2.123969610511126e-06,
      "loss": 0.0116,
      "step": 2410580
    },
    {
      "epoch": 3.9449997708869295,
      "grad_norm": 0.29203006625175476,
      "learning_rate": 2.1239037182976087e-06,
      "loss": 0.014,
      "step": 2410600
    },
    {
      "epoch": 3.9450325013255827,
      "grad_norm": 0.1778033822774887,
      "learning_rate": 2.123837826084092e-06,
      "loss": 0.01,
      "step": 2410620
    },
    {
      "epoch": 3.945065231764236,
      "grad_norm": 0.2490519881248474,
      "learning_rate": 2.1237719338705746e-06,
      "loss": 0.0093,
      "step": 2410640
    },
    {
      "epoch": 3.9450979622028894,
      "grad_norm": 0.15842579305171967,
      "learning_rate": 2.1237060416570577e-06,
      "loss": 0.0175,
      "step": 2410660
    },
    {
      "epoch": 3.945130692641543,
      "grad_norm": 0.19653688371181488,
      "learning_rate": 2.1236401494435405e-06,
      "loss": 0.0084,
      "step": 2410680
    },
    {
      "epoch": 3.945163423080196,
      "grad_norm": 0.10390599071979523,
      "learning_rate": 2.1235742572300232e-06,
      "loss": 0.0114,
      "step": 2410700
    },
    {
      "epoch": 3.9451961535188493,
      "grad_norm": 0.31767550110816956,
      "learning_rate": 2.123508365016506e-06,
      "loss": 0.0062,
      "step": 2410720
    },
    {
      "epoch": 3.945228883957503,
      "grad_norm": 0.20690490305423737,
      "learning_rate": 2.123442472802989e-06,
      "loss": 0.0139,
      "step": 2410740
    },
    {
      "epoch": 3.945261614396156,
      "grad_norm": 0.11849771440029144,
      "learning_rate": 2.123376580589472e-06,
      "loss": 0.0118,
      "step": 2410760
    },
    {
      "epoch": 3.9452943448348092,
      "grad_norm": 0.2502462565898895,
      "learning_rate": 2.1233106883759546e-06,
      "loss": 0.0118,
      "step": 2410780
    },
    {
      "epoch": 3.945327075273463,
      "grad_norm": 0.11212275922298431,
      "learning_rate": 2.1232447961624378e-06,
      "loss": 0.008,
      "step": 2410800
    },
    {
      "epoch": 3.9453598057121164,
      "grad_norm": 0.1127217635512352,
      "learning_rate": 2.1231789039489205e-06,
      "loss": 0.0104,
      "step": 2410820
    },
    {
      "epoch": 3.9453925361507696,
      "grad_norm": 0.11363761126995087,
      "learning_rate": 2.1231130117354037e-06,
      "loss": 0.0148,
      "step": 2410840
    },
    {
      "epoch": 3.9454252665894227,
      "grad_norm": 0.1829097867012024,
      "learning_rate": 2.1230471195218864e-06,
      "loss": 0.0104,
      "step": 2410860
    },
    {
      "epoch": 3.9454579970280763,
      "grad_norm": 0.05537800490856171,
      "learning_rate": 2.122981227308369e-06,
      "loss": 0.0077,
      "step": 2410880
    },
    {
      "epoch": 3.9454907274667295,
      "grad_norm": 0.24590887129306793,
      "learning_rate": 2.122915335094852e-06,
      "loss": 0.0107,
      "step": 2410900
    },
    {
      "epoch": 3.9455234579053826,
      "grad_norm": 0.1685045212507248,
      "learning_rate": 2.1228494428813346e-06,
      "loss": 0.0142,
      "step": 2410920
    },
    {
      "epoch": 3.945556188344036,
      "grad_norm": 0.20534050464630127,
      "learning_rate": 2.122783550667818e-06,
      "loss": 0.0139,
      "step": 2410940
    },
    {
      "epoch": 3.94558891878269,
      "grad_norm": 0.24641487002372742,
      "learning_rate": 2.1227176584543005e-06,
      "loss": 0.0154,
      "step": 2410960
    },
    {
      "epoch": 3.945621649221343,
      "grad_norm": 0.18970321118831635,
      "learning_rate": 2.1226517662407837e-06,
      "loss": 0.0109,
      "step": 2410980
    },
    {
      "epoch": 3.945654379659996,
      "grad_norm": 0.25972458720207214,
      "learning_rate": 2.1225858740272664e-06,
      "loss": 0.0098,
      "step": 2411000
    },
    {
      "epoch": 3.9456871100986497,
      "grad_norm": 0.19007182121276855,
      "learning_rate": 2.122519981813749e-06,
      "loss": 0.0098,
      "step": 2411020
    },
    {
      "epoch": 3.945719840537303,
      "grad_norm": 0.3553658127784729,
      "learning_rate": 2.1224540896002323e-06,
      "loss": 0.0081,
      "step": 2411040
    },
    {
      "epoch": 3.945752570975956,
      "grad_norm": 0.9250363111495972,
      "learning_rate": 2.122388197386715e-06,
      "loss": 0.0191,
      "step": 2411060
    },
    {
      "epoch": 3.9457853014146096,
      "grad_norm": 0.17937703430652618,
      "learning_rate": 2.122322305173198e-06,
      "loss": 0.0071,
      "step": 2411080
    },
    {
      "epoch": 3.9458180318532627,
      "grad_norm": 0.19229258596897125,
      "learning_rate": 2.1222564129596806e-06,
      "loss": 0.0105,
      "step": 2411100
    },
    {
      "epoch": 3.9458507622919163,
      "grad_norm": 0.08660942316055298,
      "learning_rate": 2.1221905207461633e-06,
      "loss": 0.0089,
      "step": 2411120
    },
    {
      "epoch": 3.9458834927305695,
      "grad_norm": 0.09902138262987137,
      "learning_rate": 2.1221246285326465e-06,
      "loss": 0.0105,
      "step": 2411140
    },
    {
      "epoch": 3.945916223169223,
      "grad_norm": 0.10153625905513763,
      "learning_rate": 2.1220587363191296e-06,
      "loss": 0.0101,
      "step": 2411160
    },
    {
      "epoch": 3.9459489536078762,
      "grad_norm": 0.34118175506591797,
      "learning_rate": 2.1219928441056124e-06,
      "loss": 0.0089,
      "step": 2411180
    },
    {
      "epoch": 3.9459816840465294,
      "grad_norm": 0.1598692685365677,
      "learning_rate": 2.121926951892095e-06,
      "loss": 0.0148,
      "step": 2411200
    },
    {
      "epoch": 3.946014414485183,
      "grad_norm": 0.19609399139881134,
      "learning_rate": 2.121861059678578e-06,
      "loss": 0.0108,
      "step": 2411220
    },
    {
      "epoch": 3.946047144923836,
      "grad_norm": 0.16664569079875946,
      "learning_rate": 2.121795167465061e-06,
      "loss": 0.0078,
      "step": 2411240
    },
    {
      "epoch": 3.9460798753624897,
      "grad_norm": 0.19541475176811218,
      "learning_rate": 2.1217292752515437e-06,
      "loss": 0.0083,
      "step": 2411260
    },
    {
      "epoch": 3.946112605801143,
      "grad_norm": 0.18728190660476685,
      "learning_rate": 2.1216633830380265e-06,
      "loss": 0.0105,
      "step": 2411280
    },
    {
      "epoch": 3.9461453362397965,
      "grad_norm": 0.09053023159503937,
      "learning_rate": 2.1215974908245092e-06,
      "loss": 0.0142,
      "step": 2411300
    },
    {
      "epoch": 3.9461780666784496,
      "grad_norm": 0.18880435824394226,
      "learning_rate": 2.1215315986109924e-06,
      "loss": 0.014,
      "step": 2411320
    },
    {
      "epoch": 3.9462107971171028,
      "grad_norm": 0.11133384704589844,
      "learning_rate": 2.121465706397475e-06,
      "loss": 0.0131,
      "step": 2411340
    },
    {
      "epoch": 3.9462435275557564,
      "grad_norm": 0.3758355677127838,
      "learning_rate": 2.1213998141839583e-06,
      "loss": 0.0081,
      "step": 2411360
    },
    {
      "epoch": 3.9462762579944095,
      "grad_norm": 0.06251559406518936,
      "learning_rate": 2.121333921970441e-06,
      "loss": 0.0088,
      "step": 2411380
    },
    {
      "epoch": 3.946308988433063,
      "grad_norm": 0.20079632103443146,
      "learning_rate": 2.1212680297569238e-06,
      "loss": 0.0124,
      "step": 2411400
    },
    {
      "epoch": 3.9463417188717163,
      "grad_norm": 0.050697650760412216,
      "learning_rate": 2.1212021375434065e-06,
      "loss": 0.0081,
      "step": 2411420
    },
    {
      "epoch": 3.94637444931037,
      "grad_norm": 0.28780922293663025,
      "learning_rate": 2.1211362453298897e-06,
      "loss": 0.014,
      "step": 2411440
    },
    {
      "epoch": 3.946407179749023,
      "grad_norm": 0.6245903968811035,
      "learning_rate": 2.1210703531163724e-06,
      "loss": 0.0124,
      "step": 2411460
    },
    {
      "epoch": 3.946439910187676,
      "grad_norm": 0.2522260844707489,
      "learning_rate": 2.121004460902855e-06,
      "loss": 0.01,
      "step": 2411480
    },
    {
      "epoch": 3.9464726406263297,
      "grad_norm": 0.7273321151733398,
      "learning_rate": 2.1209385686893383e-06,
      "loss": 0.0152,
      "step": 2411500
    },
    {
      "epoch": 3.946505371064983,
      "grad_norm": 0.16280421614646912,
      "learning_rate": 2.120872676475821e-06,
      "loss": 0.0085,
      "step": 2411520
    },
    {
      "epoch": 3.9465381015036365,
      "grad_norm": 0.18166859447956085,
      "learning_rate": 2.120806784262304e-06,
      "loss": 0.0098,
      "step": 2411540
    },
    {
      "epoch": 3.9465708319422896,
      "grad_norm": 0.6323814988136292,
      "learning_rate": 2.120740892048787e-06,
      "loss": 0.0113,
      "step": 2411560
    },
    {
      "epoch": 3.9466035623809432,
      "grad_norm": 0.40585529804229736,
      "learning_rate": 2.1206749998352697e-06,
      "loss": 0.0114,
      "step": 2411580
    },
    {
      "epoch": 3.9466362928195964,
      "grad_norm": 0.2313212752342224,
      "learning_rate": 2.1206091076217524e-06,
      "loss": 0.011,
      "step": 2411600
    },
    {
      "epoch": 3.9466690232582495,
      "grad_norm": 0.10532104969024658,
      "learning_rate": 2.120543215408235e-06,
      "loss": 0.0121,
      "step": 2411620
    },
    {
      "epoch": 3.946701753696903,
      "grad_norm": 0.25400620698928833,
      "learning_rate": 2.1204773231947183e-06,
      "loss": 0.0087,
      "step": 2411640
    },
    {
      "epoch": 3.9467344841355563,
      "grad_norm": 0.12684546411037445,
      "learning_rate": 2.120411430981201e-06,
      "loss": 0.0104,
      "step": 2411660
    },
    {
      "epoch": 3.94676721457421,
      "grad_norm": 1.0174086093902588,
      "learning_rate": 2.1203455387676842e-06,
      "loss": 0.0135,
      "step": 2411680
    },
    {
      "epoch": 3.946799945012863,
      "grad_norm": 0.2623622417449951,
      "learning_rate": 2.120279646554167e-06,
      "loss": 0.0101,
      "step": 2411700
    },
    {
      "epoch": 3.9468326754515166,
      "grad_norm": 0.8702585101127625,
      "learning_rate": 2.1202137543406497e-06,
      "loss": 0.0115,
      "step": 2411720
    },
    {
      "epoch": 3.9468654058901698,
      "grad_norm": 0.68497633934021,
      "learning_rate": 2.1201478621271325e-06,
      "loss": 0.0138,
      "step": 2411740
    },
    {
      "epoch": 3.946898136328823,
      "grad_norm": 0.07498665899038315,
      "learning_rate": 2.1200819699136156e-06,
      "loss": 0.0127,
      "step": 2411760
    },
    {
      "epoch": 3.9469308667674765,
      "grad_norm": 0.11267536133527756,
      "learning_rate": 2.1200160777000984e-06,
      "loss": 0.0146,
      "step": 2411780
    },
    {
      "epoch": 3.9469635972061297,
      "grad_norm": 0.8060317635536194,
      "learning_rate": 2.119950185486581e-06,
      "loss": 0.0101,
      "step": 2411800
    },
    {
      "epoch": 3.9469963276447833,
      "grad_norm": 0.22892417013645172,
      "learning_rate": 2.119884293273064e-06,
      "loss": 0.0115,
      "step": 2411820
    },
    {
      "epoch": 3.9470290580834364,
      "grad_norm": 0.3424838185310364,
      "learning_rate": 2.119818401059547e-06,
      "loss": 0.0111,
      "step": 2411840
    },
    {
      "epoch": 3.94706178852209,
      "grad_norm": 0.4149870276451111,
      "learning_rate": 2.1197525088460297e-06,
      "loss": 0.0098,
      "step": 2411860
    },
    {
      "epoch": 3.947094518960743,
      "grad_norm": 0.18754497170448303,
      "learning_rate": 2.119686616632513e-06,
      "loss": 0.011,
      "step": 2411880
    },
    {
      "epoch": 3.9471272493993963,
      "grad_norm": 0.08947604894638062,
      "learning_rate": 2.1196207244189957e-06,
      "loss": 0.0131,
      "step": 2411900
    },
    {
      "epoch": 3.94715997983805,
      "grad_norm": 0.26997825503349304,
      "learning_rate": 2.1195548322054784e-06,
      "loss": 0.015,
      "step": 2411920
    },
    {
      "epoch": 3.947192710276703,
      "grad_norm": 0.6650199890136719,
      "learning_rate": 2.119488939991961e-06,
      "loss": 0.0167,
      "step": 2411940
    },
    {
      "epoch": 3.947225440715356,
      "grad_norm": 0.6904833316802979,
      "learning_rate": 2.1194230477784443e-06,
      "loss": 0.009,
      "step": 2411960
    },
    {
      "epoch": 3.94725817115401,
      "grad_norm": 0.13812316954135895,
      "learning_rate": 2.119357155564927e-06,
      "loss": 0.0117,
      "step": 2411980
    },
    {
      "epoch": 3.9472909015926634,
      "grad_norm": 0.6072360873222351,
      "learning_rate": 2.1192912633514098e-06,
      "loss": 0.0081,
      "step": 2412000
    },
    {
      "epoch": 3.9473236320313165,
      "grad_norm": 0.13235947489738464,
      "learning_rate": 2.1192253711378925e-06,
      "loss": 0.0144,
      "step": 2412020
    },
    {
      "epoch": 3.9473563624699697,
      "grad_norm": 0.12833745777606964,
      "learning_rate": 2.1191594789243757e-06,
      "loss": 0.0177,
      "step": 2412040
    },
    {
      "epoch": 3.9473890929086233,
      "grad_norm": 0.47587084770202637,
      "learning_rate": 2.119093586710859e-06,
      "loss": 0.0109,
      "step": 2412060
    },
    {
      "epoch": 3.9474218233472764,
      "grad_norm": 0.12675409018993378,
      "learning_rate": 2.1190276944973416e-06,
      "loss": 0.0088,
      "step": 2412080
    },
    {
      "epoch": 3.9474545537859296,
      "grad_norm": 0.8236662149429321,
      "learning_rate": 2.1189618022838243e-06,
      "loss": 0.0109,
      "step": 2412100
    },
    {
      "epoch": 3.947487284224583,
      "grad_norm": 0.14201940596103668,
      "learning_rate": 2.118895910070307e-06,
      "loss": 0.0118,
      "step": 2412120
    },
    {
      "epoch": 3.9475200146632368,
      "grad_norm": 0.32004064321517944,
      "learning_rate": 2.11883001785679e-06,
      "loss": 0.0099,
      "step": 2412140
    },
    {
      "epoch": 3.94755274510189,
      "grad_norm": 0.37089216709136963,
      "learning_rate": 2.118764125643273e-06,
      "loss": 0.0152,
      "step": 2412160
    },
    {
      "epoch": 3.947585475540543,
      "grad_norm": 0.4456265866756439,
      "learning_rate": 2.1186982334297557e-06,
      "loss": 0.0103,
      "step": 2412180
    },
    {
      "epoch": 3.9476182059791967,
      "grad_norm": 0.39038723707199097,
      "learning_rate": 2.118632341216239e-06,
      "loss": 0.0114,
      "step": 2412200
    },
    {
      "epoch": 3.94765093641785,
      "grad_norm": 0.16252736747264862,
      "learning_rate": 2.1185664490027216e-06,
      "loss": 0.0096,
      "step": 2412220
    },
    {
      "epoch": 3.947683666856503,
      "grad_norm": 0.1752723753452301,
      "learning_rate": 2.1185005567892043e-06,
      "loss": 0.0114,
      "step": 2412240
    },
    {
      "epoch": 3.9477163972951566,
      "grad_norm": 1.0186800956726074,
      "learning_rate": 2.1184346645756875e-06,
      "loss": 0.0118,
      "step": 2412260
    },
    {
      "epoch": 3.94774912773381,
      "grad_norm": 0.2179558426141739,
      "learning_rate": 2.1183687723621702e-06,
      "loss": 0.0105,
      "step": 2412280
    },
    {
      "epoch": 3.9477818581724633,
      "grad_norm": 0.1308739334344864,
      "learning_rate": 2.118302880148653e-06,
      "loss": 0.0141,
      "step": 2412300
    },
    {
      "epoch": 3.9478145886111164,
      "grad_norm": 0.6258695125579834,
      "learning_rate": 2.1182369879351357e-06,
      "loss": 0.0123,
      "step": 2412320
    },
    {
      "epoch": 3.94784731904977,
      "grad_norm": 0.08544090390205383,
      "learning_rate": 2.1181710957216185e-06,
      "loss": 0.0082,
      "step": 2412340
    },
    {
      "epoch": 3.947880049488423,
      "grad_norm": 0.23586487770080566,
      "learning_rate": 2.1181052035081016e-06,
      "loss": 0.0151,
      "step": 2412360
    },
    {
      "epoch": 3.9479127799270763,
      "grad_norm": 0.3739285469055176,
      "learning_rate": 2.118039311294585e-06,
      "loss": 0.0078,
      "step": 2412380
    },
    {
      "epoch": 3.94794551036573,
      "grad_norm": 0.2373981773853302,
      "learning_rate": 2.1179734190810675e-06,
      "loss": 0.0118,
      "step": 2412400
    },
    {
      "epoch": 3.9479782408043835,
      "grad_norm": 0.2646988034248352,
      "learning_rate": 2.1179075268675503e-06,
      "loss": 0.0101,
      "step": 2412420
    },
    {
      "epoch": 3.9480109712430367,
      "grad_norm": 0.841159999370575,
      "learning_rate": 2.117841634654033e-06,
      "loss": 0.011,
      "step": 2412440
    },
    {
      "epoch": 3.94804370168169,
      "grad_norm": 0.5137645602226257,
      "learning_rate": 2.117775742440516e-06,
      "loss": 0.0127,
      "step": 2412460
    },
    {
      "epoch": 3.9480764321203434,
      "grad_norm": 0.2631629407405853,
      "learning_rate": 2.117709850226999e-06,
      "loss": 0.0097,
      "step": 2412480
    },
    {
      "epoch": 3.9481091625589966,
      "grad_norm": 0.30610427260398865,
      "learning_rate": 2.1176439580134817e-06,
      "loss": 0.0103,
      "step": 2412500
    },
    {
      "epoch": 3.9481418929976497,
      "grad_norm": 0.08837847411632538,
      "learning_rate": 2.1175780657999644e-06,
      "loss": 0.0132,
      "step": 2412520
    },
    {
      "epoch": 3.9481746234363033,
      "grad_norm": 0.2087675929069519,
      "learning_rate": 2.117512173586447e-06,
      "loss": 0.0066,
      "step": 2412540
    },
    {
      "epoch": 3.9482073538749565,
      "grad_norm": 0.1356119066476822,
      "learning_rate": 2.1174462813729303e-06,
      "loss": 0.013,
      "step": 2412560
    },
    {
      "epoch": 3.94824008431361,
      "grad_norm": 0.21039026975631714,
      "learning_rate": 2.1173803891594135e-06,
      "loss": 0.0124,
      "step": 2412580
    },
    {
      "epoch": 3.948272814752263,
      "grad_norm": 0.60896897315979,
      "learning_rate": 2.117314496945896e-06,
      "loss": 0.0149,
      "step": 2412600
    },
    {
      "epoch": 3.948305545190917,
      "grad_norm": 0.2011169195175171,
      "learning_rate": 2.117248604732379e-06,
      "loss": 0.0132,
      "step": 2412620
    },
    {
      "epoch": 3.94833827562957,
      "grad_norm": 0.19637411832809448,
      "learning_rate": 2.1171827125188617e-06,
      "loss": 0.0068,
      "step": 2412640
    },
    {
      "epoch": 3.948371006068223,
      "grad_norm": 0.6872471570968628,
      "learning_rate": 2.117116820305345e-06,
      "loss": 0.0088,
      "step": 2412660
    },
    {
      "epoch": 3.9484037365068767,
      "grad_norm": 0.06554727256298065,
      "learning_rate": 2.1170509280918276e-06,
      "loss": 0.0142,
      "step": 2412680
    },
    {
      "epoch": 3.94843646694553,
      "grad_norm": 0.37498238682746887,
      "learning_rate": 2.1169850358783103e-06,
      "loss": 0.0068,
      "step": 2412700
    },
    {
      "epoch": 3.9484691973841834,
      "grad_norm": 0.16329318284988403,
      "learning_rate": 2.116919143664793e-06,
      "loss": 0.0105,
      "step": 2412720
    },
    {
      "epoch": 3.9485019278228366,
      "grad_norm": 0.21053194999694824,
      "learning_rate": 2.1168532514512762e-06,
      "loss": 0.0106,
      "step": 2412740
    },
    {
      "epoch": 3.94853465826149,
      "grad_norm": 0.27386584877967834,
      "learning_rate": 2.116787359237759e-06,
      "loss": 0.0103,
      "step": 2412760
    },
    {
      "epoch": 3.9485673887001433,
      "grad_norm": 0.8544203639030457,
      "learning_rate": 2.116721467024242e-06,
      "loss": 0.0118,
      "step": 2412780
    },
    {
      "epoch": 3.9486001191387965,
      "grad_norm": 0.5510292649269104,
      "learning_rate": 2.116655574810725e-06,
      "loss": 0.011,
      "step": 2412800
    },
    {
      "epoch": 3.94863284957745,
      "grad_norm": 0.510478138923645,
      "learning_rate": 2.1165896825972076e-06,
      "loss": 0.0087,
      "step": 2412820
    },
    {
      "epoch": 3.9486655800161032,
      "grad_norm": 0.2740685045719147,
      "learning_rate": 2.1165237903836903e-06,
      "loss": 0.0118,
      "step": 2412840
    },
    {
      "epoch": 3.948698310454757,
      "grad_norm": 0.2941359877586365,
      "learning_rate": 2.1164578981701735e-06,
      "loss": 0.0112,
      "step": 2412860
    },
    {
      "epoch": 3.94873104089341,
      "grad_norm": 0.041481081396341324,
      "learning_rate": 2.1163920059566563e-06,
      "loss": 0.0105,
      "step": 2412880
    },
    {
      "epoch": 3.9487637713320636,
      "grad_norm": 0.275079607963562,
      "learning_rate": 2.116326113743139e-06,
      "loss": 0.009,
      "step": 2412900
    },
    {
      "epoch": 3.9487965017707167,
      "grad_norm": 0.4731181263923645,
      "learning_rate": 2.116260221529622e-06,
      "loss": 0.0081,
      "step": 2412920
    },
    {
      "epoch": 3.94882923220937,
      "grad_norm": 0.33046624064445496,
      "learning_rate": 2.116194329316105e-06,
      "loss": 0.0117,
      "step": 2412940
    },
    {
      "epoch": 3.9488619626480235,
      "grad_norm": 0.18900278210639954,
      "learning_rate": 2.1161284371025876e-06,
      "loss": 0.0145,
      "step": 2412960
    },
    {
      "epoch": 3.9488946930866766,
      "grad_norm": 0.08985208719968796,
      "learning_rate": 2.116062544889071e-06,
      "loss": 0.0044,
      "step": 2412980
    },
    {
      "epoch": 3.94892742352533,
      "grad_norm": 0.29760172963142395,
      "learning_rate": 2.1159966526755535e-06,
      "loss": 0.0165,
      "step": 2413000
    },
    {
      "epoch": 3.9489601539639834,
      "grad_norm": 0.1791238784790039,
      "learning_rate": 2.1159307604620363e-06,
      "loss": 0.0089,
      "step": 2413020
    },
    {
      "epoch": 3.948992884402637,
      "grad_norm": 0.06860877573490143,
      "learning_rate": 2.115864868248519e-06,
      "loss": 0.0099,
      "step": 2413040
    },
    {
      "epoch": 3.94902561484129,
      "grad_norm": 0.2167363166809082,
      "learning_rate": 2.115798976035002e-06,
      "loss": 0.0083,
      "step": 2413060
    },
    {
      "epoch": 3.9490583452799433,
      "grad_norm": 0.10260346531867981,
      "learning_rate": 2.115733083821485e-06,
      "loss": 0.0107,
      "step": 2413080
    },
    {
      "epoch": 3.949091075718597,
      "grad_norm": 0.31412991881370544,
      "learning_rate": 2.115667191607968e-06,
      "loss": 0.0122,
      "step": 2413100
    },
    {
      "epoch": 3.94912380615725,
      "grad_norm": 0.7668580412864685,
      "learning_rate": 2.115601299394451e-06,
      "loss": 0.0117,
      "step": 2413120
    },
    {
      "epoch": 3.9491565365959036,
      "grad_norm": 0.16542014479637146,
      "learning_rate": 2.1155354071809336e-06,
      "loss": 0.0058,
      "step": 2413140
    },
    {
      "epoch": 3.9491892670345567,
      "grad_norm": 0.8106721639633179,
      "learning_rate": 2.1154695149674163e-06,
      "loss": 0.0106,
      "step": 2413160
    },
    {
      "epoch": 3.9492219974732103,
      "grad_norm": 0.24033410847187042,
      "learning_rate": 2.1154036227538995e-06,
      "loss": 0.0092,
      "step": 2413180
    },
    {
      "epoch": 3.9492547279118635,
      "grad_norm": 0.2962746024131775,
      "learning_rate": 2.115337730540382e-06,
      "loss": 0.0087,
      "step": 2413200
    },
    {
      "epoch": 3.9492874583505166,
      "grad_norm": 0.3243885040283203,
      "learning_rate": 2.115271838326865e-06,
      "loss": 0.009,
      "step": 2413220
    },
    {
      "epoch": 3.9493201887891702,
      "grad_norm": 0.40412262082099915,
      "learning_rate": 2.1152059461133477e-06,
      "loss": 0.0083,
      "step": 2413240
    },
    {
      "epoch": 3.9493529192278234,
      "grad_norm": 0.14608007669448853,
      "learning_rate": 2.115140053899831e-06,
      "loss": 0.015,
      "step": 2413260
    },
    {
      "epoch": 3.949385649666477,
      "grad_norm": 0.1714930385351181,
      "learning_rate": 2.115074161686314e-06,
      "loss": 0.012,
      "step": 2413280
    },
    {
      "epoch": 3.94941838010513,
      "grad_norm": 0.274291455745697,
      "learning_rate": 2.1150082694727967e-06,
      "loss": 0.0122,
      "step": 2413300
    },
    {
      "epoch": 3.9494511105437837,
      "grad_norm": 1.0854692459106445,
      "learning_rate": 2.1149423772592795e-06,
      "loss": 0.012,
      "step": 2413320
    },
    {
      "epoch": 3.949483840982437,
      "grad_norm": 0.6052502989768982,
      "learning_rate": 2.1148764850457622e-06,
      "loss": 0.0122,
      "step": 2413340
    },
    {
      "epoch": 3.94951657142109,
      "grad_norm": 0.25731030106544495,
      "learning_rate": 2.114810592832245e-06,
      "loss": 0.0133,
      "step": 2413360
    },
    {
      "epoch": 3.9495493018597436,
      "grad_norm": 0.3379456698894501,
      "learning_rate": 2.114744700618728e-06,
      "loss": 0.0101,
      "step": 2413380
    },
    {
      "epoch": 3.9495820322983968,
      "grad_norm": 0.05178334563970566,
      "learning_rate": 2.114678808405211e-06,
      "loss": 0.0108,
      "step": 2413400
    },
    {
      "epoch": 3.9496147627370504,
      "grad_norm": 0.6806238293647766,
      "learning_rate": 2.1146129161916936e-06,
      "loss": 0.0136,
      "step": 2413420
    },
    {
      "epoch": 3.9496474931757035,
      "grad_norm": 0.08627938479185104,
      "learning_rate": 2.1145470239781768e-06,
      "loss": 0.0047,
      "step": 2413440
    },
    {
      "epoch": 3.949680223614357,
      "grad_norm": 0.6854605078697205,
      "learning_rate": 2.1144811317646595e-06,
      "loss": 0.0094,
      "step": 2413460
    },
    {
      "epoch": 3.9497129540530103,
      "grad_norm": 0.25513359904289246,
      "learning_rate": 2.1144152395511427e-06,
      "loss": 0.0114,
      "step": 2413480
    },
    {
      "epoch": 3.9497456844916634,
      "grad_norm": 0.08173155039548874,
      "learning_rate": 2.1143493473376254e-06,
      "loss": 0.0099,
      "step": 2413500
    },
    {
      "epoch": 3.949778414930317,
      "grad_norm": 0.6364463567733765,
      "learning_rate": 2.114283455124108e-06,
      "loss": 0.0129,
      "step": 2413520
    },
    {
      "epoch": 3.94981114536897,
      "grad_norm": 0.544963538646698,
      "learning_rate": 2.114217562910591e-06,
      "loss": 0.0089,
      "step": 2413540
    },
    {
      "epoch": 3.9498438758076233,
      "grad_norm": 0.2888096272945404,
      "learning_rate": 2.1141516706970736e-06,
      "loss": 0.0089,
      "step": 2413560
    },
    {
      "epoch": 3.949876606246277,
      "grad_norm": 0.706839382648468,
      "learning_rate": 2.114085778483557e-06,
      "loss": 0.0093,
      "step": 2413580
    },
    {
      "epoch": 3.9499093366849305,
      "grad_norm": 0.04491394758224487,
      "learning_rate": 2.1140198862700395e-06,
      "loss": 0.0084,
      "step": 2413600
    },
    {
      "epoch": 3.9499420671235836,
      "grad_norm": 0.06552359461784363,
      "learning_rate": 2.1139539940565227e-06,
      "loss": 0.0076,
      "step": 2413620
    },
    {
      "epoch": 3.949974797562237,
      "grad_norm": 0.2808948755264282,
      "learning_rate": 2.1138881018430054e-06,
      "loss": 0.0124,
      "step": 2413640
    },
    {
      "epoch": 3.9500075280008904,
      "grad_norm": 0.12916617095470428,
      "learning_rate": 2.113822209629488e-06,
      "loss": 0.0092,
      "step": 2413660
    },
    {
      "epoch": 3.9500402584395435,
      "grad_norm": 0.12425076216459274,
      "learning_rate": 2.1137563174159713e-06,
      "loss": 0.0127,
      "step": 2413680
    },
    {
      "epoch": 3.9500729888781967,
      "grad_norm": 0.14719383418560028,
      "learning_rate": 2.113690425202454e-06,
      "loss": 0.0096,
      "step": 2413700
    },
    {
      "epoch": 3.9501057193168503,
      "grad_norm": 0.09592746198177338,
      "learning_rate": 2.113624532988937e-06,
      "loss": 0.0138,
      "step": 2413720
    },
    {
      "epoch": 3.950138449755504,
      "grad_norm": 0.42490240931510925,
      "learning_rate": 2.1135586407754196e-06,
      "loss": 0.0117,
      "step": 2413740
    },
    {
      "epoch": 3.950171180194157,
      "grad_norm": 0.09537656605243683,
      "learning_rate": 2.1134927485619023e-06,
      "loss": 0.0083,
      "step": 2413760
    },
    {
      "epoch": 3.95020391063281,
      "grad_norm": 0.24210412800312042,
      "learning_rate": 2.1134268563483855e-06,
      "loss": 0.0129,
      "step": 2413780
    },
    {
      "epoch": 3.9502366410714638,
      "grad_norm": 0.35178059339523315,
      "learning_rate": 2.1133609641348686e-06,
      "loss": 0.007,
      "step": 2413800
    },
    {
      "epoch": 3.950269371510117,
      "grad_norm": 0.21961908042430878,
      "learning_rate": 2.1132950719213514e-06,
      "loss": 0.0071,
      "step": 2413820
    },
    {
      "epoch": 3.95030210194877,
      "grad_norm": 0.1466200202703476,
      "learning_rate": 2.113229179707834e-06,
      "loss": 0.007,
      "step": 2413840
    },
    {
      "epoch": 3.9503348323874237,
      "grad_norm": 0.12333827465772629,
      "learning_rate": 2.113163287494317e-06,
      "loss": 0.0071,
      "step": 2413860
    },
    {
      "epoch": 3.9503675628260773,
      "grad_norm": 0.31190094351768494,
      "learning_rate": 2.1130973952808e-06,
      "loss": 0.0094,
      "step": 2413880
    },
    {
      "epoch": 3.9504002932647304,
      "grad_norm": 0.09457579255104065,
      "learning_rate": 2.1130315030672828e-06,
      "loss": 0.0099,
      "step": 2413900
    },
    {
      "epoch": 3.9504330237033836,
      "grad_norm": 0.447954922914505,
      "learning_rate": 2.1129656108537655e-06,
      "loss": 0.0139,
      "step": 2413920
    },
    {
      "epoch": 3.950465754142037,
      "grad_norm": 0.041793111711740494,
      "learning_rate": 2.1128997186402482e-06,
      "loss": 0.0095,
      "step": 2413940
    },
    {
      "epoch": 3.9504984845806903,
      "grad_norm": 0.17022188007831573,
      "learning_rate": 2.1128338264267314e-06,
      "loss": 0.0077,
      "step": 2413960
    },
    {
      "epoch": 3.9505312150193435,
      "grad_norm": 0.3897542953491211,
      "learning_rate": 2.112767934213214e-06,
      "loss": 0.0113,
      "step": 2413980
    },
    {
      "epoch": 3.950563945457997,
      "grad_norm": 0.15311779081821442,
      "learning_rate": 2.1127020419996973e-06,
      "loss": 0.0159,
      "step": 2414000
    },
    {
      "epoch": 3.95059667589665,
      "grad_norm": 0.6206973195075989,
      "learning_rate": 2.11263614978618e-06,
      "loss": 0.0092,
      "step": 2414020
    },
    {
      "epoch": 3.950629406335304,
      "grad_norm": 0.1571057289838791,
      "learning_rate": 2.1125702575726628e-06,
      "loss": 0.0093,
      "step": 2414040
    },
    {
      "epoch": 3.950662136773957,
      "grad_norm": 0.11469342559576035,
      "learning_rate": 2.1125043653591455e-06,
      "loss": 0.0118,
      "step": 2414060
    },
    {
      "epoch": 3.9506948672126105,
      "grad_norm": 0.20740629732608795,
      "learning_rate": 2.1124384731456287e-06,
      "loss": 0.0131,
      "step": 2414080
    },
    {
      "epoch": 3.9507275976512637,
      "grad_norm": 0.13635295629501343,
      "learning_rate": 2.1123725809321114e-06,
      "loss": 0.0152,
      "step": 2414100
    },
    {
      "epoch": 3.950760328089917,
      "grad_norm": 0.22681215405464172,
      "learning_rate": 2.112306688718594e-06,
      "loss": 0.0116,
      "step": 2414120
    },
    {
      "epoch": 3.9507930585285704,
      "grad_norm": 0.4735834300518036,
      "learning_rate": 2.1122407965050773e-06,
      "loss": 0.0081,
      "step": 2414140
    },
    {
      "epoch": 3.9508257889672236,
      "grad_norm": 0.26995617151260376,
      "learning_rate": 2.11217490429156e-06,
      "loss": 0.0118,
      "step": 2414160
    },
    {
      "epoch": 3.950858519405877,
      "grad_norm": 0.2805562913417816,
      "learning_rate": 2.112109012078043e-06,
      "loss": 0.0107,
      "step": 2414180
    },
    {
      "epoch": 3.9508912498445303,
      "grad_norm": 0.3139770030975342,
      "learning_rate": 2.112043119864526e-06,
      "loss": 0.0123,
      "step": 2414200
    },
    {
      "epoch": 3.950923980283184,
      "grad_norm": 0.10284090787172318,
      "learning_rate": 2.1119772276510087e-06,
      "loss": 0.0079,
      "step": 2414220
    },
    {
      "epoch": 3.950956710721837,
      "grad_norm": 0.08856716752052307,
      "learning_rate": 2.1119113354374914e-06,
      "loss": 0.0069,
      "step": 2414240
    },
    {
      "epoch": 3.9509894411604902,
      "grad_norm": 0.33226993680000305,
      "learning_rate": 2.111845443223974e-06,
      "loss": 0.0137,
      "step": 2414260
    },
    {
      "epoch": 3.951022171599144,
      "grad_norm": 0.069393590092659,
      "learning_rate": 2.1117795510104573e-06,
      "loss": 0.0085,
      "step": 2414280
    },
    {
      "epoch": 3.951054902037797,
      "grad_norm": 0.06426739692687988,
      "learning_rate": 2.11171365879694e-06,
      "loss": 0.0111,
      "step": 2414300
    },
    {
      "epoch": 3.9510876324764506,
      "grad_norm": 0.35588574409484863,
      "learning_rate": 2.1116477665834233e-06,
      "loss": 0.0058,
      "step": 2414320
    },
    {
      "epoch": 3.9511203629151037,
      "grad_norm": 0.5384901165962219,
      "learning_rate": 2.111581874369906e-06,
      "loss": 0.0126,
      "step": 2414340
    },
    {
      "epoch": 3.9511530933537573,
      "grad_norm": 0.3551982641220093,
      "learning_rate": 2.1115159821563887e-06,
      "loss": 0.0109,
      "step": 2414360
    },
    {
      "epoch": 3.9511858237924105,
      "grad_norm": 0.2223716825246811,
      "learning_rate": 2.1114500899428715e-06,
      "loss": 0.0127,
      "step": 2414380
    },
    {
      "epoch": 3.9512185542310636,
      "grad_norm": 0.6293401718139648,
      "learning_rate": 2.1113841977293546e-06,
      "loss": 0.0083,
      "step": 2414400
    },
    {
      "epoch": 3.951251284669717,
      "grad_norm": 0.20072130858898163,
      "learning_rate": 2.1113183055158374e-06,
      "loss": 0.0141,
      "step": 2414420
    },
    {
      "epoch": 3.9512840151083704,
      "grad_norm": 0.15154701471328735,
      "learning_rate": 2.11125241330232e-06,
      "loss": 0.0071,
      "step": 2414440
    },
    {
      "epoch": 3.951316745547024,
      "grad_norm": 0.28278055787086487,
      "learning_rate": 2.111186521088803e-06,
      "loss": 0.0085,
      "step": 2414460
    },
    {
      "epoch": 3.951349475985677,
      "grad_norm": 0.10355737805366516,
      "learning_rate": 2.111120628875286e-06,
      "loss": 0.0072,
      "step": 2414480
    },
    {
      "epoch": 3.9513822064243307,
      "grad_norm": 0.2687308192253113,
      "learning_rate": 2.1110547366617688e-06,
      "loss": 0.0137,
      "step": 2414500
    },
    {
      "epoch": 3.951414936862984,
      "grad_norm": 0.32202357053756714,
      "learning_rate": 2.110988844448252e-06,
      "loss": 0.0133,
      "step": 2414520
    },
    {
      "epoch": 3.951447667301637,
      "grad_norm": 0.1662120223045349,
      "learning_rate": 2.1109229522347347e-06,
      "loss": 0.0122,
      "step": 2414540
    },
    {
      "epoch": 3.9514803977402906,
      "grad_norm": 0.4122387170791626,
      "learning_rate": 2.1108570600212174e-06,
      "loss": 0.0089,
      "step": 2414560
    },
    {
      "epoch": 3.9515131281789437,
      "grad_norm": 0.4815744459629059,
      "learning_rate": 2.1107911678077e-06,
      "loss": 0.0128,
      "step": 2414580
    },
    {
      "epoch": 3.9515458586175973,
      "grad_norm": 0.21203726530075073,
      "learning_rate": 2.1107252755941833e-06,
      "loss": 0.0084,
      "step": 2414600
    },
    {
      "epoch": 3.9515785890562505,
      "grad_norm": 0.2875818908214569,
      "learning_rate": 2.110659383380666e-06,
      "loss": 0.0086,
      "step": 2414620
    },
    {
      "epoch": 3.951611319494904,
      "grad_norm": 0.13006539642810822,
      "learning_rate": 2.1105934911671488e-06,
      "loss": 0.0131,
      "step": 2414640
    },
    {
      "epoch": 3.9516440499335572,
      "grad_norm": 0.1790309101343155,
      "learning_rate": 2.1105275989536315e-06,
      "loss": 0.01,
      "step": 2414660
    },
    {
      "epoch": 3.9516767803722104,
      "grad_norm": 0.2207014262676239,
      "learning_rate": 2.1104617067401147e-06,
      "loss": 0.0093,
      "step": 2414680
    },
    {
      "epoch": 3.951709510810864,
      "grad_norm": 0.19533470273017883,
      "learning_rate": 2.110395814526598e-06,
      "loss": 0.0121,
      "step": 2414700
    },
    {
      "epoch": 3.951742241249517,
      "grad_norm": 0.09665729105472565,
      "learning_rate": 2.1103299223130806e-06,
      "loss": 0.0097,
      "step": 2414720
    },
    {
      "epoch": 3.9517749716881707,
      "grad_norm": 0.23778294026851654,
      "learning_rate": 2.1102640300995633e-06,
      "loss": 0.0095,
      "step": 2414740
    },
    {
      "epoch": 3.951807702126824,
      "grad_norm": 0.23691384494304657,
      "learning_rate": 2.110198137886046e-06,
      "loss": 0.0146,
      "step": 2414760
    },
    {
      "epoch": 3.9518404325654775,
      "grad_norm": 0.13052833080291748,
      "learning_rate": 2.110132245672529e-06,
      "loss": 0.0079,
      "step": 2414780
    },
    {
      "epoch": 3.9518731630041306,
      "grad_norm": 0.32262521982192993,
      "learning_rate": 2.110066353459012e-06,
      "loss": 0.0132,
      "step": 2414800
    },
    {
      "epoch": 3.9519058934427838,
      "grad_norm": 0.7923567891120911,
      "learning_rate": 2.1100004612454947e-06,
      "loss": 0.0096,
      "step": 2414820
    },
    {
      "epoch": 3.9519386238814374,
      "grad_norm": 0.19264332950115204,
      "learning_rate": 2.109934569031978e-06,
      "loss": 0.0121,
      "step": 2414840
    },
    {
      "epoch": 3.9519713543200905,
      "grad_norm": 0.20469221472740173,
      "learning_rate": 2.1098686768184606e-06,
      "loss": 0.0101,
      "step": 2414860
    },
    {
      "epoch": 3.952004084758744,
      "grad_norm": 0.04140916466712952,
      "learning_rate": 2.1098027846049434e-06,
      "loss": 0.0088,
      "step": 2414880
    },
    {
      "epoch": 3.9520368151973972,
      "grad_norm": 0.2687673270702362,
      "learning_rate": 2.1097368923914265e-06,
      "loss": 0.0152,
      "step": 2414900
    },
    {
      "epoch": 3.952069545636051,
      "grad_norm": 0.046053942292928696,
      "learning_rate": 2.1096710001779093e-06,
      "loss": 0.0082,
      "step": 2414920
    },
    {
      "epoch": 3.952102276074704,
      "grad_norm": 0.07402351498603821,
      "learning_rate": 2.109605107964392e-06,
      "loss": 0.0134,
      "step": 2414940
    },
    {
      "epoch": 3.952135006513357,
      "grad_norm": 0.31735408306121826,
      "learning_rate": 2.1095392157508747e-06,
      "loss": 0.011,
      "step": 2414960
    },
    {
      "epoch": 3.9521677369520107,
      "grad_norm": 0.26042255759239197,
      "learning_rate": 2.1094733235373575e-06,
      "loss": 0.0122,
      "step": 2414980
    },
    {
      "epoch": 3.952200467390664,
      "grad_norm": 0.44418081641197205,
      "learning_rate": 2.1094074313238406e-06,
      "loss": 0.0111,
      "step": 2415000
    },
    {
      "epoch": 3.952233197829317,
      "grad_norm": 0.16523511707782745,
      "learning_rate": 2.109341539110324e-06,
      "loss": 0.0105,
      "step": 2415020
    },
    {
      "epoch": 3.9522659282679706,
      "grad_norm": 0.3311506509780884,
      "learning_rate": 2.1092756468968065e-06,
      "loss": 0.0078,
      "step": 2415040
    },
    {
      "epoch": 3.9522986587066242,
      "grad_norm": 0.2069551646709442,
      "learning_rate": 2.1092097546832893e-06,
      "loss": 0.0116,
      "step": 2415060
    },
    {
      "epoch": 3.9523313891452774,
      "grad_norm": 0.054915010929107666,
      "learning_rate": 2.109143862469772e-06,
      "loss": 0.0062,
      "step": 2415080
    },
    {
      "epoch": 3.9523641195839305,
      "grad_norm": 0.8530710935592651,
      "learning_rate": 2.109077970256255e-06,
      "loss": 0.0125,
      "step": 2415100
    },
    {
      "epoch": 3.952396850022584,
      "grad_norm": 0.25082308053970337,
      "learning_rate": 2.109012078042738e-06,
      "loss": 0.0106,
      "step": 2415120
    },
    {
      "epoch": 3.9524295804612373,
      "grad_norm": 0.0783236175775528,
      "learning_rate": 2.1089461858292207e-06,
      "loss": 0.0091,
      "step": 2415140
    },
    {
      "epoch": 3.9524623108998904,
      "grad_norm": 0.139508455991745,
      "learning_rate": 2.1088802936157034e-06,
      "loss": 0.0079,
      "step": 2415160
    },
    {
      "epoch": 3.952495041338544,
      "grad_norm": 0.09897632896900177,
      "learning_rate": 2.108814401402186e-06,
      "loss": 0.0128,
      "step": 2415180
    },
    {
      "epoch": 3.9525277717771976,
      "grad_norm": 0.4180945158004761,
      "learning_rate": 2.1087485091886693e-06,
      "loss": 0.0097,
      "step": 2415200
    },
    {
      "epoch": 3.9525605022158508,
      "grad_norm": 0.4390569031238556,
      "learning_rate": 2.1086826169751525e-06,
      "loss": 0.0087,
      "step": 2415220
    },
    {
      "epoch": 3.952593232654504,
      "grad_norm": 0.36367273330688477,
      "learning_rate": 2.108616724761635e-06,
      "loss": 0.0099,
      "step": 2415240
    },
    {
      "epoch": 3.9526259630931575,
      "grad_norm": 0.07560362666845322,
      "learning_rate": 2.108550832548118e-06,
      "loss": 0.0117,
      "step": 2415260
    },
    {
      "epoch": 3.9526586935318107,
      "grad_norm": 0.31323421001434326,
      "learning_rate": 2.1084849403346007e-06,
      "loss": 0.011,
      "step": 2415280
    },
    {
      "epoch": 3.952691423970464,
      "grad_norm": 0.21993255615234375,
      "learning_rate": 2.108419048121084e-06,
      "loss": 0.0109,
      "step": 2415300
    },
    {
      "epoch": 3.9527241544091174,
      "grad_norm": 0.14307479560375214,
      "learning_rate": 2.1083531559075666e-06,
      "loss": 0.0063,
      "step": 2415320
    },
    {
      "epoch": 3.952756884847771,
      "grad_norm": 0.06702116876840591,
      "learning_rate": 2.1082872636940493e-06,
      "loss": 0.0105,
      "step": 2415340
    },
    {
      "epoch": 3.952789615286424,
      "grad_norm": 0.1602228283882141,
      "learning_rate": 2.108221371480532e-06,
      "loss": 0.0096,
      "step": 2415360
    },
    {
      "epoch": 3.9528223457250773,
      "grad_norm": 0.0777229517698288,
      "learning_rate": 2.1081554792670152e-06,
      "loss": 0.0133,
      "step": 2415380
    },
    {
      "epoch": 3.952855076163731,
      "grad_norm": 0.06969091296195984,
      "learning_rate": 2.108089587053498e-06,
      "loss": 0.0076,
      "step": 2415400
    },
    {
      "epoch": 3.952887806602384,
      "grad_norm": 0.15936467051506042,
      "learning_rate": 2.108023694839981e-06,
      "loss": 0.0123,
      "step": 2415420
    },
    {
      "epoch": 3.952920537041037,
      "grad_norm": 0.2649083733558655,
      "learning_rate": 2.107957802626464e-06,
      "loss": 0.006,
      "step": 2415440
    },
    {
      "epoch": 3.952953267479691,
      "grad_norm": 0.13678744435310364,
      "learning_rate": 2.1078919104129466e-06,
      "loss": 0.0135,
      "step": 2415460
    },
    {
      "epoch": 3.9529859979183444,
      "grad_norm": 0.17680929601192474,
      "learning_rate": 2.1078260181994294e-06,
      "loss": 0.0112,
      "step": 2415480
    },
    {
      "epoch": 3.9530187283569975,
      "grad_norm": 0.12333414703607559,
      "learning_rate": 2.1077601259859125e-06,
      "loss": 0.0091,
      "step": 2415500
    },
    {
      "epoch": 3.9530514587956507,
      "grad_norm": 0.17962269484996796,
      "learning_rate": 2.1076942337723953e-06,
      "loss": 0.0124,
      "step": 2415520
    },
    {
      "epoch": 3.9530841892343043,
      "grad_norm": 0.19517219066619873,
      "learning_rate": 2.107628341558878e-06,
      "loss": 0.0095,
      "step": 2415540
    },
    {
      "epoch": 3.9531169196729574,
      "grad_norm": 0.131615549325943,
      "learning_rate": 2.107562449345361e-06,
      "loss": 0.0125,
      "step": 2415560
    },
    {
      "epoch": 3.9531496501116106,
      "grad_norm": 0.033290814608335495,
      "learning_rate": 2.107496557131844e-06,
      "loss": 0.0104,
      "step": 2415580
    },
    {
      "epoch": 3.953182380550264,
      "grad_norm": 0.14381060004234314,
      "learning_rate": 2.1074306649183266e-06,
      "loss": 0.0082,
      "step": 2415600
    },
    {
      "epoch": 3.9532151109889173,
      "grad_norm": 0.3349582552909851,
      "learning_rate": 2.10736477270481e-06,
      "loss": 0.0164,
      "step": 2415620
    },
    {
      "epoch": 3.953247841427571,
      "grad_norm": 0.3885263204574585,
      "learning_rate": 2.1072988804912925e-06,
      "loss": 0.0072,
      "step": 2415640
    },
    {
      "epoch": 3.953280571866224,
      "grad_norm": 0.5877062678337097,
      "learning_rate": 2.1072329882777753e-06,
      "loss": 0.0142,
      "step": 2415660
    },
    {
      "epoch": 3.9533133023048777,
      "grad_norm": 0.2671287953853607,
      "learning_rate": 2.107167096064258e-06,
      "loss": 0.0115,
      "step": 2415680
    },
    {
      "epoch": 3.953346032743531,
      "grad_norm": 0.10505138337612152,
      "learning_rate": 2.107101203850741e-06,
      "loss": 0.0084,
      "step": 2415700
    },
    {
      "epoch": 3.953378763182184,
      "grad_norm": 0.23162776231765747,
      "learning_rate": 2.107035311637224e-06,
      "loss": 0.0122,
      "step": 2415720
    },
    {
      "epoch": 3.9534114936208375,
      "grad_norm": 0.6193253993988037,
      "learning_rate": 2.106969419423707e-06,
      "loss": 0.01,
      "step": 2415740
    },
    {
      "epoch": 3.9534442240594907,
      "grad_norm": 0.20984163880348206,
      "learning_rate": 2.10690352721019e-06,
      "loss": 0.0102,
      "step": 2415760
    },
    {
      "epoch": 3.9534769544981443,
      "grad_norm": 0.3255999684333801,
      "learning_rate": 2.1068376349966726e-06,
      "loss": 0.0157,
      "step": 2415780
    },
    {
      "epoch": 3.9535096849367974,
      "grad_norm": 0.17170874774456024,
      "learning_rate": 2.1067717427831553e-06,
      "loss": 0.0095,
      "step": 2415800
    },
    {
      "epoch": 3.953542415375451,
      "grad_norm": 0.19939649105072021,
      "learning_rate": 2.1067058505696385e-06,
      "loss": 0.0093,
      "step": 2415820
    },
    {
      "epoch": 3.953575145814104,
      "grad_norm": 0.49165111780166626,
      "learning_rate": 2.1066399583561212e-06,
      "loss": 0.0121,
      "step": 2415840
    },
    {
      "epoch": 3.9536078762527573,
      "grad_norm": 0.22030678391456604,
      "learning_rate": 2.106574066142604e-06,
      "loss": 0.0134,
      "step": 2415860
    },
    {
      "epoch": 3.953640606691411,
      "grad_norm": 0.41776272654533386,
      "learning_rate": 2.1065081739290867e-06,
      "loss": 0.0086,
      "step": 2415880
    },
    {
      "epoch": 3.953673337130064,
      "grad_norm": 0.21238087117671967,
      "learning_rate": 2.10644228171557e-06,
      "loss": 0.0105,
      "step": 2415900
    },
    {
      "epoch": 3.9537060675687177,
      "grad_norm": 0.40380096435546875,
      "learning_rate": 2.106376389502053e-06,
      "loss": 0.0124,
      "step": 2415920
    },
    {
      "epoch": 3.953738798007371,
      "grad_norm": 0.09242215007543564,
      "learning_rate": 2.1063104972885358e-06,
      "loss": 0.0119,
      "step": 2415940
    },
    {
      "epoch": 3.9537715284460244,
      "grad_norm": 0.16515326499938965,
      "learning_rate": 2.1062446050750185e-06,
      "loss": 0.006,
      "step": 2415960
    },
    {
      "epoch": 3.9538042588846776,
      "grad_norm": 0.4282270669937134,
      "learning_rate": 2.1061787128615012e-06,
      "loss": 0.0154,
      "step": 2415980
    },
    {
      "epoch": 3.9538369893233307,
      "grad_norm": 0.42524781823158264,
      "learning_rate": 2.106112820647984e-06,
      "loss": 0.0158,
      "step": 2416000
    },
    {
      "epoch": 3.9538697197619843,
      "grad_norm": 0.2760026454925537,
      "learning_rate": 2.106046928434467e-06,
      "loss": 0.0101,
      "step": 2416020
    },
    {
      "epoch": 3.9539024502006375,
      "grad_norm": 0.26079869270324707,
      "learning_rate": 2.10598103622095e-06,
      "loss": 0.0074,
      "step": 2416040
    },
    {
      "epoch": 3.953935180639291,
      "grad_norm": 0.10954587161540985,
      "learning_rate": 2.1059151440074326e-06,
      "loss": 0.012,
      "step": 2416060
    },
    {
      "epoch": 3.953967911077944,
      "grad_norm": 0.34011876583099365,
      "learning_rate": 2.1058492517939158e-06,
      "loss": 0.0081,
      "step": 2416080
    },
    {
      "epoch": 3.954000641516598,
      "grad_norm": 0.1902346909046173,
      "learning_rate": 2.1057833595803985e-06,
      "loss": 0.0063,
      "step": 2416100
    },
    {
      "epoch": 3.954033371955251,
      "grad_norm": 0.20424853265285492,
      "learning_rate": 2.1057174673668817e-06,
      "loss": 0.0101,
      "step": 2416120
    },
    {
      "epoch": 3.954066102393904,
      "grad_norm": 0.17613135278224945,
      "learning_rate": 2.1056515751533644e-06,
      "loss": 0.0096,
      "step": 2416140
    },
    {
      "epoch": 3.9540988328325577,
      "grad_norm": 0.10840636491775513,
      "learning_rate": 2.105585682939847e-06,
      "loss": 0.0078,
      "step": 2416160
    },
    {
      "epoch": 3.954131563271211,
      "grad_norm": 0.42033636569976807,
      "learning_rate": 2.10551979072633e-06,
      "loss": 0.0117,
      "step": 2416180
    },
    {
      "epoch": 3.9541642937098644,
      "grad_norm": 0.2069026529788971,
      "learning_rate": 2.1054538985128126e-06,
      "loss": 0.0221,
      "step": 2416200
    },
    {
      "epoch": 3.9541970241485176,
      "grad_norm": 0.2223796844482422,
      "learning_rate": 2.105388006299296e-06,
      "loss": 0.0111,
      "step": 2416220
    },
    {
      "epoch": 3.954229754587171,
      "grad_norm": 0.15152302384376526,
      "learning_rate": 2.1053221140857786e-06,
      "loss": 0.012,
      "step": 2416240
    },
    {
      "epoch": 3.9542624850258243,
      "grad_norm": 0.15790505707263947,
      "learning_rate": 2.1052562218722617e-06,
      "loss": 0.0117,
      "step": 2416260
    },
    {
      "epoch": 3.9542952154644775,
      "grad_norm": 0.3363669216632843,
      "learning_rate": 2.1051903296587445e-06,
      "loss": 0.0083,
      "step": 2416280
    },
    {
      "epoch": 3.954327945903131,
      "grad_norm": 0.11557427793741226,
      "learning_rate": 2.105124437445227e-06,
      "loss": 0.0102,
      "step": 2416300
    },
    {
      "epoch": 3.9543606763417842,
      "grad_norm": 0.5322729349136353,
      "learning_rate": 2.1050585452317104e-06,
      "loss": 0.0146,
      "step": 2416320
    },
    {
      "epoch": 3.954393406780438,
      "grad_norm": 0.4701443612575531,
      "learning_rate": 2.104992653018193e-06,
      "loss": 0.0079,
      "step": 2416340
    },
    {
      "epoch": 3.954426137219091,
      "grad_norm": 0.2722082734107971,
      "learning_rate": 2.104926760804676e-06,
      "loss": 0.0068,
      "step": 2416360
    },
    {
      "epoch": 3.9544588676577446,
      "grad_norm": 0.31701093912124634,
      "learning_rate": 2.1048608685911586e-06,
      "loss": 0.0111,
      "step": 2416380
    },
    {
      "epoch": 3.9544915980963977,
      "grad_norm": 0.18609023094177246,
      "learning_rate": 2.1047949763776413e-06,
      "loss": 0.0158,
      "step": 2416400
    },
    {
      "epoch": 3.954524328535051,
      "grad_norm": 0.25151559710502625,
      "learning_rate": 2.1047290841641245e-06,
      "loss": 0.0101,
      "step": 2416420
    },
    {
      "epoch": 3.9545570589737045,
      "grad_norm": 0.21830762922763824,
      "learning_rate": 2.1046631919506076e-06,
      "loss": 0.0084,
      "step": 2416440
    },
    {
      "epoch": 3.9545897894123576,
      "grad_norm": 0.16703428328037262,
      "learning_rate": 2.1045972997370904e-06,
      "loss": 0.0102,
      "step": 2416460
    },
    {
      "epoch": 3.954622519851011,
      "grad_norm": 0.6723656058311462,
      "learning_rate": 2.104531407523573e-06,
      "loss": 0.0106,
      "step": 2416480
    },
    {
      "epoch": 3.9546552502896644,
      "grad_norm": 0.18870426714420319,
      "learning_rate": 2.104465515310056e-06,
      "loss": 0.0076,
      "step": 2416500
    },
    {
      "epoch": 3.954687980728318,
      "grad_norm": 0.31587427854537964,
      "learning_rate": 2.104399623096539e-06,
      "loss": 0.0122,
      "step": 2416520
    },
    {
      "epoch": 3.954720711166971,
      "grad_norm": 0.33482545614242554,
      "learning_rate": 2.1043337308830218e-06,
      "loss": 0.0105,
      "step": 2416540
    },
    {
      "epoch": 3.9547534416056243,
      "grad_norm": 0.06899144500494003,
      "learning_rate": 2.1042678386695045e-06,
      "loss": 0.0089,
      "step": 2416560
    },
    {
      "epoch": 3.954786172044278,
      "grad_norm": 0.1628929078578949,
      "learning_rate": 2.1042019464559872e-06,
      "loss": 0.0091,
      "step": 2416580
    },
    {
      "epoch": 3.954818902482931,
      "grad_norm": 0.6816266179084778,
      "learning_rate": 2.1041360542424704e-06,
      "loss": 0.0126,
      "step": 2416600
    },
    {
      "epoch": 3.954851632921584,
      "grad_norm": 0.11652688682079315,
      "learning_rate": 2.104070162028953e-06,
      "loss": 0.0056,
      "step": 2416620
    },
    {
      "epoch": 3.9548843633602377,
      "grad_norm": 0.350088894367218,
      "learning_rate": 2.1040042698154363e-06,
      "loss": 0.0098,
      "step": 2416640
    },
    {
      "epoch": 3.9549170937988913,
      "grad_norm": 0.5695211291313171,
      "learning_rate": 2.103938377601919e-06,
      "loss": 0.0101,
      "step": 2416660
    },
    {
      "epoch": 3.9549498242375445,
      "grad_norm": 0.15466585755348206,
      "learning_rate": 2.103872485388402e-06,
      "loss": 0.0044,
      "step": 2416680
    },
    {
      "epoch": 3.9549825546761976,
      "grad_norm": 0.29870039224624634,
      "learning_rate": 2.1038065931748845e-06,
      "loss": 0.0088,
      "step": 2416700
    },
    {
      "epoch": 3.9550152851148512,
      "grad_norm": 0.3565014600753784,
      "learning_rate": 2.1037407009613677e-06,
      "loss": 0.0082,
      "step": 2416720
    },
    {
      "epoch": 3.9550480155535044,
      "grad_norm": 0.6744745373725891,
      "learning_rate": 2.1036748087478504e-06,
      "loss": 0.0152,
      "step": 2416740
    },
    {
      "epoch": 3.9550807459921575,
      "grad_norm": 0.23285940289497375,
      "learning_rate": 2.103608916534333e-06,
      "loss": 0.0076,
      "step": 2416760
    },
    {
      "epoch": 3.955113476430811,
      "grad_norm": 0.13074645400047302,
      "learning_rate": 2.1035430243208163e-06,
      "loss": 0.0189,
      "step": 2416780
    },
    {
      "epoch": 3.9551462068694647,
      "grad_norm": 0.5630912780761719,
      "learning_rate": 2.103477132107299e-06,
      "loss": 0.0108,
      "step": 2416800
    },
    {
      "epoch": 3.955178937308118,
      "grad_norm": 0.302408367395401,
      "learning_rate": 2.103411239893782e-06,
      "loss": 0.009,
      "step": 2416820
    },
    {
      "epoch": 3.955211667746771,
      "grad_norm": 0.31958064436912537,
      "learning_rate": 2.103345347680265e-06,
      "loss": 0.0118,
      "step": 2416840
    },
    {
      "epoch": 3.9552443981854246,
      "grad_norm": 0.19177399575710297,
      "learning_rate": 2.1032794554667477e-06,
      "loss": 0.0077,
      "step": 2416860
    },
    {
      "epoch": 3.9552771286240778,
      "grad_norm": 0.4876551330089569,
      "learning_rate": 2.1032135632532305e-06,
      "loss": 0.01,
      "step": 2416880
    },
    {
      "epoch": 3.955309859062731,
      "grad_norm": 0.313876748085022,
      "learning_rate": 2.103147671039713e-06,
      "loss": 0.0076,
      "step": 2416900
    },
    {
      "epoch": 3.9553425895013845,
      "grad_norm": 0.4086149334907532,
      "learning_rate": 2.1030817788261964e-06,
      "loss": 0.0095,
      "step": 2416920
    },
    {
      "epoch": 3.955375319940038,
      "grad_norm": 0.1341368407011032,
      "learning_rate": 2.103015886612679e-06,
      "loss": 0.0121,
      "step": 2416940
    },
    {
      "epoch": 3.9554080503786913,
      "grad_norm": 0.12796680629253387,
      "learning_rate": 2.1029499943991623e-06,
      "loss": 0.0113,
      "step": 2416960
    },
    {
      "epoch": 3.9554407808173444,
      "grad_norm": 0.15573827922344208,
      "learning_rate": 2.102884102185645e-06,
      "loss": 0.0121,
      "step": 2416980
    },
    {
      "epoch": 3.955473511255998,
      "grad_norm": 0.1423567533493042,
      "learning_rate": 2.1028182099721277e-06,
      "loss": 0.0157,
      "step": 2417000
    },
    {
      "epoch": 3.955506241694651,
      "grad_norm": 0.08860696107149124,
      "learning_rate": 2.1027523177586105e-06,
      "loss": 0.0097,
      "step": 2417020
    },
    {
      "epoch": 3.9555389721333043,
      "grad_norm": 0.13130107522010803,
      "learning_rate": 2.1026864255450936e-06,
      "loss": 0.0084,
      "step": 2417040
    },
    {
      "epoch": 3.955571702571958,
      "grad_norm": 0.22884339094161987,
      "learning_rate": 2.1026205333315764e-06,
      "loss": 0.0075,
      "step": 2417060
    },
    {
      "epoch": 3.955604433010611,
      "grad_norm": 0.34133028984069824,
      "learning_rate": 2.102554641118059e-06,
      "loss": 0.0135,
      "step": 2417080
    },
    {
      "epoch": 3.9556371634492646,
      "grad_norm": 0.17546460032463074,
      "learning_rate": 2.102488748904542e-06,
      "loss": 0.0101,
      "step": 2417100
    },
    {
      "epoch": 3.955669893887918,
      "grad_norm": 0.28063780069351196,
      "learning_rate": 2.102422856691025e-06,
      "loss": 0.0113,
      "step": 2417120
    },
    {
      "epoch": 3.9557026243265714,
      "grad_norm": 0.23945559561252594,
      "learning_rate": 2.1023569644775078e-06,
      "loss": 0.0141,
      "step": 2417140
    },
    {
      "epoch": 3.9557353547652245,
      "grad_norm": 0.0956440195441246,
      "learning_rate": 2.102291072263991e-06,
      "loss": 0.0073,
      "step": 2417160
    },
    {
      "epoch": 3.9557680852038777,
      "grad_norm": 0.18263469636440277,
      "learning_rate": 2.1022251800504737e-06,
      "loss": 0.0078,
      "step": 2417180
    },
    {
      "epoch": 3.9558008156425313,
      "grad_norm": 0.03706654533743858,
      "learning_rate": 2.1021592878369564e-06,
      "loss": 0.0099,
      "step": 2417200
    },
    {
      "epoch": 3.9558335460811844,
      "grad_norm": 0.17635764181613922,
      "learning_rate": 2.102093395623439e-06,
      "loss": 0.0104,
      "step": 2417220
    },
    {
      "epoch": 3.955866276519838,
      "grad_norm": 0.40693503618240356,
      "learning_rate": 2.1020275034099223e-06,
      "loss": 0.0148,
      "step": 2417240
    },
    {
      "epoch": 3.955899006958491,
      "grad_norm": 0.1443479061126709,
      "learning_rate": 2.101961611196405e-06,
      "loss": 0.009,
      "step": 2417260
    },
    {
      "epoch": 3.9559317373971448,
      "grad_norm": 0.15715770423412323,
      "learning_rate": 2.101895718982888e-06,
      "loss": 0.0091,
      "step": 2417280
    },
    {
      "epoch": 3.955964467835798,
      "grad_norm": 0.4610510766506195,
      "learning_rate": 2.101829826769371e-06,
      "loss": 0.0076,
      "step": 2417300
    },
    {
      "epoch": 3.955997198274451,
      "grad_norm": 0.22846801578998566,
      "learning_rate": 2.1017639345558537e-06,
      "loss": 0.0125,
      "step": 2417320
    },
    {
      "epoch": 3.9560299287131047,
      "grad_norm": 0.09189148992300034,
      "learning_rate": 2.101698042342337e-06,
      "loss": 0.013,
      "step": 2417340
    },
    {
      "epoch": 3.956062659151758,
      "grad_norm": 0.3536752462387085,
      "learning_rate": 2.1016321501288196e-06,
      "loss": 0.0114,
      "step": 2417360
    },
    {
      "epoch": 3.9560953895904114,
      "grad_norm": 0.33840298652648926,
      "learning_rate": 2.1015662579153023e-06,
      "loss": 0.0076,
      "step": 2417380
    },
    {
      "epoch": 3.9561281200290646,
      "grad_norm": 0.08059248328208923,
      "learning_rate": 2.101500365701785e-06,
      "loss": 0.0138,
      "step": 2417400
    },
    {
      "epoch": 3.956160850467718,
      "grad_norm": 0.16921566426753998,
      "learning_rate": 2.101434473488268e-06,
      "loss": 0.0133,
      "step": 2417420
    },
    {
      "epoch": 3.9561935809063713,
      "grad_norm": 0.18133556842803955,
      "learning_rate": 2.101368581274751e-06,
      "loss": 0.0144,
      "step": 2417440
    },
    {
      "epoch": 3.9562263113450244,
      "grad_norm": 0.08273978531360626,
      "learning_rate": 2.1013026890612337e-06,
      "loss": 0.0075,
      "step": 2417460
    },
    {
      "epoch": 3.956259041783678,
      "grad_norm": 0.14725792407989502,
      "learning_rate": 2.101236796847717e-06,
      "loss": 0.0138,
      "step": 2417480
    },
    {
      "epoch": 3.956291772222331,
      "grad_norm": 0.21871361136436462,
      "learning_rate": 2.1011709046341996e-06,
      "loss": 0.009,
      "step": 2417500
    },
    {
      "epoch": 3.956324502660985,
      "grad_norm": 0.5735102891921997,
      "learning_rate": 2.1011050124206824e-06,
      "loss": 0.013,
      "step": 2417520
    },
    {
      "epoch": 3.956357233099638,
      "grad_norm": 0.14503245055675507,
      "learning_rate": 2.1010391202071655e-06,
      "loss": 0.0103,
      "step": 2417540
    },
    {
      "epoch": 3.9563899635382915,
      "grad_norm": 0.3444572985172272,
      "learning_rate": 2.1009732279936483e-06,
      "loss": 0.0101,
      "step": 2417560
    },
    {
      "epoch": 3.9564226939769447,
      "grad_norm": 0.1997789591550827,
      "learning_rate": 2.100907335780131e-06,
      "loss": 0.0079,
      "step": 2417580
    },
    {
      "epoch": 3.956455424415598,
      "grad_norm": 0.4491136074066162,
      "learning_rate": 2.1008414435666137e-06,
      "loss": 0.0125,
      "step": 2417600
    },
    {
      "epoch": 3.9564881548542514,
      "grad_norm": 0.08412262797355652,
      "learning_rate": 2.1007755513530965e-06,
      "loss": 0.0044,
      "step": 2417620
    },
    {
      "epoch": 3.9565208852929046,
      "grad_norm": 0.36206990480422974,
      "learning_rate": 2.1007096591395797e-06,
      "loss": 0.007,
      "step": 2417640
    },
    {
      "epoch": 3.956553615731558,
      "grad_norm": 0.3132633864879608,
      "learning_rate": 2.100643766926063e-06,
      "loss": 0.0079,
      "step": 2417660
    },
    {
      "epoch": 3.9565863461702113,
      "grad_norm": 0.1890491545200348,
      "learning_rate": 2.1005778747125456e-06,
      "loss": 0.0142,
      "step": 2417680
    },
    {
      "epoch": 3.956619076608865,
      "grad_norm": 0.10199222713708878,
      "learning_rate": 2.1005119824990283e-06,
      "loss": 0.0121,
      "step": 2417700
    },
    {
      "epoch": 3.956651807047518,
      "grad_norm": 0.2374129742383957,
      "learning_rate": 2.100446090285511e-06,
      "loss": 0.0116,
      "step": 2417720
    },
    {
      "epoch": 3.956684537486171,
      "grad_norm": 0.29241472482681274,
      "learning_rate": 2.100380198071994e-06,
      "loss": 0.0111,
      "step": 2417740
    },
    {
      "epoch": 3.956717267924825,
      "grad_norm": 0.2211025506258011,
      "learning_rate": 2.100314305858477e-06,
      "loss": 0.0154,
      "step": 2417760
    },
    {
      "epoch": 3.956749998363478,
      "grad_norm": 0.16957375407218933,
      "learning_rate": 2.1002484136449597e-06,
      "loss": 0.01,
      "step": 2417780
    },
    {
      "epoch": 3.9567827288021316,
      "grad_norm": 0.46654579043388367,
      "learning_rate": 2.1001825214314424e-06,
      "loss": 0.008,
      "step": 2417800
    },
    {
      "epoch": 3.9568154592407847,
      "grad_norm": 0.5777663588523865,
      "learning_rate": 2.100116629217925e-06,
      "loss": 0.0164,
      "step": 2417820
    },
    {
      "epoch": 3.9568481896794383,
      "grad_norm": 0.2499004602432251,
      "learning_rate": 2.1000507370044083e-06,
      "loss": 0.0106,
      "step": 2417840
    },
    {
      "epoch": 3.9568809201180914,
      "grad_norm": 0.10005927085876465,
      "learning_rate": 2.0999848447908915e-06,
      "loss": 0.0067,
      "step": 2417860
    },
    {
      "epoch": 3.9569136505567446,
      "grad_norm": 0.2467949539422989,
      "learning_rate": 2.0999189525773742e-06,
      "loss": 0.0102,
      "step": 2417880
    },
    {
      "epoch": 3.956946380995398,
      "grad_norm": 0.8702334761619568,
      "learning_rate": 2.099853060363857e-06,
      "loss": 0.0083,
      "step": 2417900
    },
    {
      "epoch": 3.9569791114340513,
      "grad_norm": 0.12336702644824982,
      "learning_rate": 2.0997871681503397e-06,
      "loss": 0.0072,
      "step": 2417920
    },
    {
      "epoch": 3.957011841872705,
      "grad_norm": 0.12356609851121902,
      "learning_rate": 2.099721275936823e-06,
      "loss": 0.0119,
      "step": 2417940
    },
    {
      "epoch": 3.957044572311358,
      "grad_norm": 0.03641289472579956,
      "learning_rate": 2.0996553837233056e-06,
      "loss": 0.0069,
      "step": 2417960
    },
    {
      "epoch": 3.9570773027500117,
      "grad_norm": 0.16176936030387878,
      "learning_rate": 2.0995894915097883e-06,
      "loss": 0.0099,
      "step": 2417980
    },
    {
      "epoch": 3.957110033188665,
      "grad_norm": 0.1570737063884735,
      "learning_rate": 2.099523599296271e-06,
      "loss": 0.0082,
      "step": 2418000
    },
    {
      "epoch": 3.957142763627318,
      "grad_norm": 0.3019956946372986,
      "learning_rate": 2.0994577070827542e-06,
      "loss": 0.0139,
      "step": 2418020
    },
    {
      "epoch": 3.9571754940659716,
      "grad_norm": 1.2209402322769165,
      "learning_rate": 2.099391814869237e-06,
      "loss": 0.0094,
      "step": 2418040
    },
    {
      "epoch": 3.9572082245046247,
      "grad_norm": 0.06271860748529434,
      "learning_rate": 2.09932592265572e-06,
      "loss": 0.0109,
      "step": 2418060
    },
    {
      "epoch": 3.957240954943278,
      "grad_norm": 0.6114007830619812,
      "learning_rate": 2.099260030442203e-06,
      "loss": 0.0097,
      "step": 2418080
    },
    {
      "epoch": 3.9572736853819315,
      "grad_norm": 0.1526484191417694,
      "learning_rate": 2.0991941382286856e-06,
      "loss": 0.0144,
      "step": 2418100
    },
    {
      "epoch": 3.957306415820585,
      "grad_norm": 0.16685621440410614,
      "learning_rate": 2.0991282460151684e-06,
      "loss": 0.0145,
      "step": 2418120
    },
    {
      "epoch": 3.957339146259238,
      "grad_norm": 0.226842999458313,
      "learning_rate": 2.0990623538016515e-06,
      "loss": 0.0094,
      "step": 2418140
    },
    {
      "epoch": 3.9573718766978914,
      "grad_norm": 0.3211677074432373,
      "learning_rate": 2.0989964615881343e-06,
      "loss": 0.0105,
      "step": 2418160
    },
    {
      "epoch": 3.957404607136545,
      "grad_norm": 0.15288840234279633,
      "learning_rate": 2.0989305693746174e-06,
      "loss": 0.0106,
      "step": 2418180
    },
    {
      "epoch": 3.957437337575198,
      "grad_norm": 1.1037957668304443,
      "learning_rate": 2.0988646771611e-06,
      "loss": 0.01,
      "step": 2418200
    },
    {
      "epoch": 3.9574700680138513,
      "grad_norm": 0.21401788294315338,
      "learning_rate": 2.098798784947583e-06,
      "loss": 0.0073,
      "step": 2418220
    },
    {
      "epoch": 3.957502798452505,
      "grad_norm": 0.4890753924846649,
      "learning_rate": 2.0987328927340657e-06,
      "loss": 0.0088,
      "step": 2418240
    },
    {
      "epoch": 3.9575355288911584,
      "grad_norm": 0.47446200251579285,
      "learning_rate": 2.098667000520549e-06,
      "loss": 0.0126,
      "step": 2418260
    },
    {
      "epoch": 3.9575682593298116,
      "grad_norm": 0.129556342959404,
      "learning_rate": 2.0986011083070316e-06,
      "loss": 0.0123,
      "step": 2418280
    },
    {
      "epoch": 3.9576009897684647,
      "grad_norm": 0.3310023546218872,
      "learning_rate": 2.0985352160935143e-06,
      "loss": 0.0125,
      "step": 2418300
    },
    {
      "epoch": 3.9576337202071183,
      "grad_norm": 0.35137152671813965,
      "learning_rate": 2.098469323879997e-06,
      "loss": 0.01,
      "step": 2418320
    },
    {
      "epoch": 3.9576664506457715,
      "grad_norm": 0.16326045989990234,
      "learning_rate": 2.09840343166648e-06,
      "loss": 0.0122,
      "step": 2418340
    },
    {
      "epoch": 3.9576991810844246,
      "grad_norm": 0.3383260667324066,
      "learning_rate": 2.098337539452963e-06,
      "loss": 0.0102,
      "step": 2418360
    },
    {
      "epoch": 3.9577319115230782,
      "grad_norm": 0.13530346751213074,
      "learning_rate": 2.098271647239446e-06,
      "loss": 0.0116,
      "step": 2418380
    },
    {
      "epoch": 3.957764641961732,
      "grad_norm": 0.10943709313869476,
      "learning_rate": 2.098205755025929e-06,
      "loss": 0.0115,
      "step": 2418400
    },
    {
      "epoch": 3.957797372400385,
      "grad_norm": 0.2602819502353668,
      "learning_rate": 2.0981398628124116e-06,
      "loss": 0.0093,
      "step": 2418420
    },
    {
      "epoch": 3.957830102839038,
      "grad_norm": 0.3040057122707367,
      "learning_rate": 2.0980739705988943e-06,
      "loss": 0.009,
      "step": 2418440
    },
    {
      "epoch": 3.9578628332776917,
      "grad_norm": 0.13189685344696045,
      "learning_rate": 2.0980080783853775e-06,
      "loss": 0.0092,
      "step": 2418460
    },
    {
      "epoch": 3.957895563716345,
      "grad_norm": 0.0774664580821991,
      "learning_rate": 2.0979421861718602e-06,
      "loss": 0.0082,
      "step": 2418480
    },
    {
      "epoch": 3.957928294154998,
      "grad_norm": 0.35960790514945984,
      "learning_rate": 2.097876293958343e-06,
      "loss": 0.0086,
      "step": 2418500
    },
    {
      "epoch": 3.9579610245936516,
      "grad_norm": 0.19190004467964172,
      "learning_rate": 2.0978104017448257e-06,
      "loss": 0.009,
      "step": 2418520
    },
    {
      "epoch": 3.957993755032305,
      "grad_norm": 0.1781788021326065,
      "learning_rate": 2.097744509531309e-06,
      "loss": 0.0132,
      "step": 2418540
    },
    {
      "epoch": 3.9580264854709584,
      "grad_norm": 0.5305209755897522,
      "learning_rate": 2.097678617317792e-06,
      "loss": 0.011,
      "step": 2418560
    },
    {
      "epoch": 3.9580592159096115,
      "grad_norm": 0.1362977921962738,
      "learning_rate": 2.0976127251042748e-06,
      "loss": 0.0128,
      "step": 2418580
    },
    {
      "epoch": 3.958091946348265,
      "grad_norm": 0.2503391206264496,
      "learning_rate": 2.0975468328907575e-06,
      "loss": 0.0093,
      "step": 2418600
    },
    {
      "epoch": 3.9581246767869183,
      "grad_norm": 0.36350882053375244,
      "learning_rate": 2.0974809406772403e-06,
      "loss": 0.0099,
      "step": 2418620
    },
    {
      "epoch": 3.9581574072255714,
      "grad_norm": 0.44665244221687317,
      "learning_rate": 2.097415048463723e-06,
      "loss": 0.0094,
      "step": 2418640
    },
    {
      "epoch": 3.958190137664225,
      "grad_norm": 0.18498682975769043,
      "learning_rate": 2.097349156250206e-06,
      "loss": 0.0116,
      "step": 2418660
    },
    {
      "epoch": 3.958222868102878,
      "grad_norm": 0.4835411310195923,
      "learning_rate": 2.097283264036689e-06,
      "loss": 0.0085,
      "step": 2418680
    },
    {
      "epoch": 3.9582555985415318,
      "grad_norm": 0.20964093506336212,
      "learning_rate": 2.0972173718231716e-06,
      "loss": 0.0113,
      "step": 2418700
    },
    {
      "epoch": 3.958288328980185,
      "grad_norm": 0.06445413827896118,
      "learning_rate": 2.097151479609655e-06,
      "loss": 0.0075,
      "step": 2418720
    },
    {
      "epoch": 3.9583210594188385,
      "grad_norm": 0.5753540396690369,
      "learning_rate": 2.0970855873961375e-06,
      "loss": 0.0099,
      "step": 2418740
    },
    {
      "epoch": 3.9583537898574916,
      "grad_norm": 0.15389148890972137,
      "learning_rate": 2.0970196951826207e-06,
      "loss": 0.0117,
      "step": 2418760
    },
    {
      "epoch": 3.958386520296145,
      "grad_norm": 0.3095090389251709,
      "learning_rate": 2.0969538029691034e-06,
      "loss": 0.0106,
      "step": 2418780
    },
    {
      "epoch": 3.9584192507347984,
      "grad_norm": 0.16333866119384766,
      "learning_rate": 2.096887910755586e-06,
      "loss": 0.0091,
      "step": 2418800
    },
    {
      "epoch": 3.9584519811734515,
      "grad_norm": 0.20404422283172607,
      "learning_rate": 2.096822018542069e-06,
      "loss": 0.0086,
      "step": 2418820
    },
    {
      "epoch": 3.958484711612105,
      "grad_norm": 0.08308364450931549,
      "learning_rate": 2.0967561263285517e-06,
      "loss": 0.0086,
      "step": 2418840
    },
    {
      "epoch": 3.9585174420507583,
      "grad_norm": 0.04656050726771355,
      "learning_rate": 2.096690234115035e-06,
      "loss": 0.0115,
      "step": 2418860
    },
    {
      "epoch": 3.958550172489412,
      "grad_norm": 0.16242258250713348,
      "learning_rate": 2.0966243419015176e-06,
      "loss": 0.0094,
      "step": 2418880
    },
    {
      "epoch": 3.958582902928065,
      "grad_norm": 0.1275840401649475,
      "learning_rate": 2.0965584496880007e-06,
      "loss": 0.0132,
      "step": 2418900
    },
    {
      "epoch": 3.958615633366718,
      "grad_norm": 0.2383965700864792,
      "learning_rate": 2.0964925574744835e-06,
      "loss": 0.0109,
      "step": 2418920
    },
    {
      "epoch": 3.9586483638053718,
      "grad_norm": 0.3373810648918152,
      "learning_rate": 2.096426665260966e-06,
      "loss": 0.0116,
      "step": 2418940
    },
    {
      "epoch": 3.958681094244025,
      "grad_norm": 0.16400811076164246,
      "learning_rate": 2.0963607730474494e-06,
      "loss": 0.0104,
      "step": 2418960
    },
    {
      "epoch": 3.9587138246826785,
      "grad_norm": 0.22234389185905457,
      "learning_rate": 2.096294880833932e-06,
      "loss": 0.0127,
      "step": 2418980
    },
    {
      "epoch": 3.9587465551213317,
      "grad_norm": 0.28125083446502686,
      "learning_rate": 2.096228988620415e-06,
      "loss": 0.0115,
      "step": 2419000
    },
    {
      "epoch": 3.9587792855599853,
      "grad_norm": 0.2668324410915375,
      "learning_rate": 2.0961630964068976e-06,
      "loss": 0.0118,
      "step": 2419020
    },
    {
      "epoch": 3.9588120159986384,
      "grad_norm": 0.08850044012069702,
      "learning_rate": 2.0960972041933803e-06,
      "loss": 0.0165,
      "step": 2419040
    },
    {
      "epoch": 3.9588447464372916,
      "grad_norm": 0.15549659729003906,
      "learning_rate": 2.0960313119798635e-06,
      "loss": 0.0092,
      "step": 2419060
    },
    {
      "epoch": 3.958877476875945,
      "grad_norm": 0.4770033061504364,
      "learning_rate": 2.0959654197663467e-06,
      "loss": 0.0095,
      "step": 2419080
    },
    {
      "epoch": 3.9589102073145983,
      "grad_norm": 0.16632847487926483,
      "learning_rate": 2.0958995275528294e-06,
      "loss": 0.0064,
      "step": 2419100
    },
    {
      "epoch": 3.958942937753252,
      "grad_norm": 0.105165034532547,
      "learning_rate": 2.095833635339312e-06,
      "loss": 0.0067,
      "step": 2419120
    },
    {
      "epoch": 3.958975668191905,
      "grad_norm": 0.3575746715068817,
      "learning_rate": 2.095767743125795e-06,
      "loss": 0.0099,
      "step": 2419140
    },
    {
      "epoch": 3.9590083986305586,
      "grad_norm": 0.35679927468299866,
      "learning_rate": 2.095701850912278e-06,
      "loss": 0.0115,
      "step": 2419160
    },
    {
      "epoch": 3.959041129069212,
      "grad_norm": 0.1630127876996994,
      "learning_rate": 2.0956359586987608e-06,
      "loss": 0.0119,
      "step": 2419180
    },
    {
      "epoch": 3.959073859507865,
      "grad_norm": 0.2537357807159424,
      "learning_rate": 2.0955700664852435e-06,
      "loss": 0.0115,
      "step": 2419200
    },
    {
      "epoch": 3.9591065899465185,
      "grad_norm": 0.20235349237918854,
      "learning_rate": 2.0955041742717263e-06,
      "loss": 0.0117,
      "step": 2419220
    },
    {
      "epoch": 3.9591393203851717,
      "grad_norm": 0.8496261835098267,
      "learning_rate": 2.0954382820582094e-06,
      "loss": 0.01,
      "step": 2419240
    },
    {
      "epoch": 3.9591720508238253,
      "grad_norm": 0.48658210039138794,
      "learning_rate": 2.095372389844692e-06,
      "loss": 0.01,
      "step": 2419260
    },
    {
      "epoch": 3.9592047812624784,
      "grad_norm": 0.11963258683681488,
      "learning_rate": 2.0953064976311753e-06,
      "loss": 0.0091,
      "step": 2419280
    },
    {
      "epoch": 3.959237511701132,
      "grad_norm": 0.23741813004016876,
      "learning_rate": 2.095240605417658e-06,
      "loss": 0.013,
      "step": 2419300
    },
    {
      "epoch": 3.959270242139785,
      "grad_norm": 0.12768001854419708,
      "learning_rate": 2.095174713204141e-06,
      "loss": 0.0112,
      "step": 2419320
    },
    {
      "epoch": 3.9593029725784383,
      "grad_norm": 0.25053051114082336,
      "learning_rate": 2.0951088209906235e-06,
      "loss": 0.0109,
      "step": 2419340
    },
    {
      "epoch": 3.959335703017092,
      "grad_norm": 0.144786536693573,
      "learning_rate": 2.0950429287771067e-06,
      "loss": 0.0106,
      "step": 2419360
    },
    {
      "epoch": 3.959368433455745,
      "grad_norm": 0.1340622901916504,
      "learning_rate": 2.0949770365635894e-06,
      "loss": 0.0102,
      "step": 2419380
    },
    {
      "epoch": 3.9594011638943987,
      "grad_norm": 0.32527419924736023,
      "learning_rate": 2.094911144350072e-06,
      "loss": 0.0154,
      "step": 2419400
    },
    {
      "epoch": 3.959433894333052,
      "grad_norm": 0.4497453570365906,
      "learning_rate": 2.0948452521365553e-06,
      "loss": 0.0129,
      "step": 2419420
    },
    {
      "epoch": 3.9594666247717054,
      "grad_norm": 0.1589611917734146,
      "learning_rate": 2.094779359923038e-06,
      "loss": 0.01,
      "step": 2419440
    },
    {
      "epoch": 3.9594993552103586,
      "grad_norm": 0.08535479754209518,
      "learning_rate": 2.094713467709521e-06,
      "loss": 0.0096,
      "step": 2419460
    },
    {
      "epoch": 3.9595320856490117,
      "grad_norm": 0.30716192722320557,
      "learning_rate": 2.094647575496004e-06,
      "loss": 0.009,
      "step": 2419480
    },
    {
      "epoch": 3.9595648160876653,
      "grad_norm": 0.36575886607170105,
      "learning_rate": 2.0945816832824867e-06,
      "loss": 0.0118,
      "step": 2419500
    },
    {
      "epoch": 3.9595975465263185,
      "grad_norm": 0.133451908826828,
      "learning_rate": 2.0945157910689695e-06,
      "loss": 0.0075,
      "step": 2419520
    },
    {
      "epoch": 3.959630276964972,
      "grad_norm": 0.16007453203201294,
      "learning_rate": 2.094449898855452e-06,
      "loss": 0.0099,
      "step": 2419540
    },
    {
      "epoch": 3.959663007403625,
      "grad_norm": 0.2839280664920807,
      "learning_rate": 2.0943840066419354e-06,
      "loss": 0.0117,
      "step": 2419560
    },
    {
      "epoch": 3.959695737842279,
      "grad_norm": 0.040067724883556366,
      "learning_rate": 2.094318114428418e-06,
      "loss": 0.0091,
      "step": 2419580
    },
    {
      "epoch": 3.959728468280932,
      "grad_norm": 0.5444403886795044,
      "learning_rate": 2.0942522222149013e-06,
      "loss": 0.0095,
      "step": 2419600
    },
    {
      "epoch": 3.959761198719585,
      "grad_norm": 0.5510744452476501,
      "learning_rate": 2.094186330001384e-06,
      "loss": 0.0076,
      "step": 2419620
    },
    {
      "epoch": 3.9597939291582387,
      "grad_norm": 0.3134309649467468,
      "learning_rate": 2.0941204377878668e-06,
      "loss": 0.0115,
      "step": 2419640
    },
    {
      "epoch": 3.959826659596892,
      "grad_norm": 0.14138369262218475,
      "learning_rate": 2.0940545455743495e-06,
      "loss": 0.0074,
      "step": 2419660
    },
    {
      "epoch": 3.959859390035545,
      "grad_norm": 0.07607962191104889,
      "learning_rate": 2.0939886533608327e-06,
      "loss": 0.0107,
      "step": 2419680
    },
    {
      "epoch": 3.9598921204741986,
      "grad_norm": 0.26437291502952576,
      "learning_rate": 2.0939227611473154e-06,
      "loss": 0.0161,
      "step": 2419700
    },
    {
      "epoch": 3.959924850912852,
      "grad_norm": 0.3419925570487976,
      "learning_rate": 2.093856868933798e-06,
      "loss": 0.0165,
      "step": 2419720
    },
    {
      "epoch": 3.9599575813515053,
      "grad_norm": 0.2487993836402893,
      "learning_rate": 2.093790976720281e-06,
      "loss": 0.0133,
      "step": 2419740
    },
    {
      "epoch": 3.9599903117901585,
      "grad_norm": 0.12160880118608475,
      "learning_rate": 2.093725084506764e-06,
      "loss": 0.0046,
      "step": 2419760
    },
    {
      "epoch": 3.960023042228812,
      "grad_norm": 0.30675384402275085,
      "learning_rate": 2.0936591922932468e-06,
      "loss": 0.0087,
      "step": 2419780
    },
    {
      "epoch": 3.9600557726674652,
      "grad_norm": 0.4948725998401642,
      "learning_rate": 2.09359330007973e-06,
      "loss": 0.0117,
      "step": 2419800
    },
    {
      "epoch": 3.9600885031061184,
      "grad_norm": 0.2403595745563507,
      "learning_rate": 2.0935274078662127e-06,
      "loss": 0.0135,
      "step": 2419820
    },
    {
      "epoch": 3.960121233544772,
      "grad_norm": 0.14260944724082947,
      "learning_rate": 2.0934615156526954e-06,
      "loss": 0.0173,
      "step": 2419840
    },
    {
      "epoch": 3.9601539639834256,
      "grad_norm": 0.6756857633590698,
      "learning_rate": 2.093395623439178e-06,
      "loss": 0.0122,
      "step": 2419860
    },
    {
      "epoch": 3.9601866944220787,
      "grad_norm": 0.15858381986618042,
      "learning_rate": 2.0933297312256613e-06,
      "loss": 0.0136,
      "step": 2419880
    },
    {
      "epoch": 3.960219424860732,
      "grad_norm": 0.21143735945224762,
      "learning_rate": 2.093263839012144e-06,
      "loss": 0.0117,
      "step": 2419900
    },
    {
      "epoch": 3.9602521552993855,
      "grad_norm": 0.08952268213033676,
      "learning_rate": 2.093197946798627e-06,
      "loss": 0.0093,
      "step": 2419920
    },
    {
      "epoch": 3.9602848857380386,
      "grad_norm": 0.29501503705978394,
      "learning_rate": 2.09313205458511e-06,
      "loss": 0.0107,
      "step": 2419940
    },
    {
      "epoch": 3.9603176161766918,
      "grad_norm": 0.41997024416923523,
      "learning_rate": 2.0930661623715927e-06,
      "loss": 0.0069,
      "step": 2419960
    },
    {
      "epoch": 3.9603503466153454,
      "grad_norm": 0.21271444857120514,
      "learning_rate": 2.093000270158076e-06,
      "loss": 0.0099,
      "step": 2419980
    },
    {
      "epoch": 3.960383077053999,
      "grad_norm": 0.7699361443519592,
      "learning_rate": 2.0929343779445586e-06,
      "loss": 0.0142,
      "step": 2420000
    },
    {
      "epoch": 3.960415807492652,
      "grad_norm": 0.15179356932640076,
      "learning_rate": 2.0928684857310414e-06,
      "loss": 0.0113,
      "step": 2420020
    },
    {
      "epoch": 3.9604485379313052,
      "grad_norm": 0.2864859402179718,
      "learning_rate": 2.092802593517524e-06,
      "loss": 0.0122,
      "step": 2420040
    },
    {
      "epoch": 3.960481268369959,
      "grad_norm": 0.28560835123062134,
      "learning_rate": 2.092736701304007e-06,
      "loss": 0.0169,
      "step": 2420060
    },
    {
      "epoch": 3.960513998808612,
      "grad_norm": 0.32960766553878784,
      "learning_rate": 2.09267080909049e-06,
      "loss": 0.0093,
      "step": 2420080
    },
    {
      "epoch": 3.960546729247265,
      "grad_norm": 0.1288764327764511,
      "learning_rate": 2.0926049168769727e-06,
      "loss": 0.0079,
      "step": 2420100
    },
    {
      "epoch": 3.9605794596859187,
      "grad_norm": 0.16068124771118164,
      "learning_rate": 2.092539024663456e-06,
      "loss": 0.0092,
      "step": 2420120
    },
    {
      "epoch": 3.960612190124572,
      "grad_norm": 0.543565571308136,
      "learning_rate": 2.0924731324499386e-06,
      "loss": 0.0107,
      "step": 2420140
    },
    {
      "epoch": 3.9606449205632255,
      "grad_norm": 0.40096303820610046,
      "learning_rate": 2.0924072402364214e-06,
      "loss": 0.0141,
      "step": 2420160
    },
    {
      "epoch": 3.9606776510018786,
      "grad_norm": 0.16253596544265747,
      "learning_rate": 2.0923413480229045e-06,
      "loss": 0.0113,
      "step": 2420180
    },
    {
      "epoch": 3.9607103814405322,
      "grad_norm": 0.18178366124629974,
      "learning_rate": 2.0922754558093873e-06,
      "loss": 0.0126,
      "step": 2420200
    },
    {
      "epoch": 3.9607431118791854,
      "grad_norm": 0.2328716665506363,
      "learning_rate": 2.09220956359587e-06,
      "loss": 0.0087,
      "step": 2420220
    },
    {
      "epoch": 3.9607758423178385,
      "grad_norm": 0.15012399852275848,
      "learning_rate": 2.0921436713823528e-06,
      "loss": 0.0137,
      "step": 2420240
    },
    {
      "epoch": 3.960808572756492,
      "grad_norm": 0.0786518082022667,
      "learning_rate": 2.0920777791688355e-06,
      "loss": 0.0075,
      "step": 2420260
    },
    {
      "epoch": 3.9608413031951453,
      "grad_norm": 0.45876944065093994,
      "learning_rate": 2.0920118869553187e-06,
      "loss": 0.011,
      "step": 2420280
    },
    {
      "epoch": 3.960874033633799,
      "grad_norm": 0.3069070279598236,
      "learning_rate": 2.091945994741802e-06,
      "loss": 0.0147,
      "step": 2420300
    },
    {
      "epoch": 3.960906764072452,
      "grad_norm": 0.38629066944122314,
      "learning_rate": 2.0918801025282846e-06,
      "loss": 0.0096,
      "step": 2420320
    },
    {
      "epoch": 3.9609394945111056,
      "grad_norm": 0.1547955423593521,
      "learning_rate": 2.0918142103147673e-06,
      "loss": 0.0079,
      "step": 2420340
    },
    {
      "epoch": 3.9609722249497588,
      "grad_norm": 0.08702664822340012,
      "learning_rate": 2.09174831810125e-06,
      "loss": 0.0058,
      "step": 2420360
    },
    {
      "epoch": 3.961004955388412,
      "grad_norm": 0.23869821429252625,
      "learning_rate": 2.091682425887733e-06,
      "loss": 0.0085,
      "step": 2420380
    },
    {
      "epoch": 3.9610376858270655,
      "grad_norm": 0.21757233142852783,
      "learning_rate": 2.091616533674216e-06,
      "loss": 0.0122,
      "step": 2420400
    },
    {
      "epoch": 3.9610704162657187,
      "grad_norm": 0.23667550086975098,
      "learning_rate": 2.0915506414606987e-06,
      "loss": 0.0113,
      "step": 2420420
    },
    {
      "epoch": 3.9611031467043722,
      "grad_norm": 0.4523268938064575,
      "learning_rate": 2.0914847492471814e-06,
      "loss": 0.0105,
      "step": 2420440
    },
    {
      "epoch": 3.9611358771430254,
      "grad_norm": 0.15525226294994354,
      "learning_rate": 2.091418857033664e-06,
      "loss": 0.0097,
      "step": 2420460
    },
    {
      "epoch": 3.961168607581679,
      "grad_norm": 0.3801705539226532,
      "learning_rate": 2.0913529648201473e-06,
      "loss": 0.0061,
      "step": 2420480
    },
    {
      "epoch": 3.961201338020332,
      "grad_norm": 0.35813888907432556,
      "learning_rate": 2.0912870726066305e-06,
      "loss": 0.0153,
      "step": 2420500
    },
    {
      "epoch": 3.9612340684589853,
      "grad_norm": 0.08763415366411209,
      "learning_rate": 2.0912211803931132e-06,
      "loss": 0.0087,
      "step": 2420520
    },
    {
      "epoch": 3.961266798897639,
      "grad_norm": 0.07598347961902618,
      "learning_rate": 2.091155288179596e-06,
      "loss": 0.0075,
      "step": 2420540
    },
    {
      "epoch": 3.961299529336292,
      "grad_norm": 0.42446252703666687,
      "learning_rate": 2.0910893959660787e-06,
      "loss": 0.0102,
      "step": 2420560
    },
    {
      "epoch": 3.9613322597749456,
      "grad_norm": 0.10418690741062164,
      "learning_rate": 2.091023503752562e-06,
      "loss": 0.0055,
      "step": 2420580
    },
    {
      "epoch": 3.961364990213599,
      "grad_norm": 1.0642205476760864,
      "learning_rate": 2.0909576115390446e-06,
      "loss": 0.0118,
      "step": 2420600
    },
    {
      "epoch": 3.9613977206522524,
      "grad_norm": 0.32761627435684204,
      "learning_rate": 2.0908917193255274e-06,
      "loss": 0.0099,
      "step": 2420620
    },
    {
      "epoch": 3.9614304510909055,
      "grad_norm": 0.3452301025390625,
      "learning_rate": 2.09082582711201e-06,
      "loss": 0.0082,
      "step": 2420640
    },
    {
      "epoch": 3.9614631815295587,
      "grad_norm": 0.4638337194919586,
      "learning_rate": 2.0907599348984933e-06,
      "loss": 0.0139,
      "step": 2420660
    },
    {
      "epoch": 3.9614959119682123,
      "grad_norm": 0.22112587094306946,
      "learning_rate": 2.090694042684976e-06,
      "loss": 0.011,
      "step": 2420680
    },
    {
      "epoch": 3.9615286424068654,
      "grad_norm": 0.08383035659790039,
      "learning_rate": 2.090628150471459e-06,
      "loss": 0.0087,
      "step": 2420700
    },
    {
      "epoch": 3.961561372845519,
      "grad_norm": 0.13637498021125793,
      "learning_rate": 2.090562258257942e-06,
      "loss": 0.0093,
      "step": 2420720
    },
    {
      "epoch": 3.961594103284172,
      "grad_norm": 0.18194317817687988,
      "learning_rate": 2.0904963660444246e-06,
      "loss": 0.0097,
      "step": 2420740
    },
    {
      "epoch": 3.9616268337228258,
      "grad_norm": 0.15262307226657867,
      "learning_rate": 2.0904304738309074e-06,
      "loss": 0.0111,
      "step": 2420760
    },
    {
      "epoch": 3.961659564161479,
      "grad_norm": 0.3196316957473755,
      "learning_rate": 2.0903645816173905e-06,
      "loss": 0.0126,
      "step": 2420780
    },
    {
      "epoch": 3.961692294600132,
      "grad_norm": 0.09340143203735352,
      "learning_rate": 2.0902986894038733e-06,
      "loss": 0.0117,
      "step": 2420800
    },
    {
      "epoch": 3.9617250250387857,
      "grad_norm": 0.06676369160413742,
      "learning_rate": 2.0902327971903564e-06,
      "loss": 0.0077,
      "step": 2420820
    },
    {
      "epoch": 3.961757755477439,
      "grad_norm": 0.1311165988445282,
      "learning_rate": 2.090166904976839e-06,
      "loss": 0.0111,
      "step": 2420840
    },
    {
      "epoch": 3.9617904859160924,
      "grad_norm": 0.25125011801719666,
      "learning_rate": 2.090101012763322e-06,
      "loss": 0.0108,
      "step": 2420860
    },
    {
      "epoch": 3.9618232163547455,
      "grad_norm": 0.40086716413497925,
      "learning_rate": 2.0900351205498047e-06,
      "loss": 0.0108,
      "step": 2420880
    },
    {
      "epoch": 3.961855946793399,
      "grad_norm": 0.8939930200576782,
      "learning_rate": 2.089969228336288e-06,
      "loss": 0.0095,
      "step": 2420900
    },
    {
      "epoch": 3.9618886772320523,
      "grad_norm": 0.1281287968158722,
      "learning_rate": 2.0899033361227706e-06,
      "loss": 0.0123,
      "step": 2420920
    },
    {
      "epoch": 3.9619214076707054,
      "grad_norm": 0.15279987454414368,
      "learning_rate": 2.0898374439092533e-06,
      "loss": 0.0094,
      "step": 2420940
    },
    {
      "epoch": 3.961954138109359,
      "grad_norm": 0.2423584908246994,
      "learning_rate": 2.089771551695736e-06,
      "loss": 0.012,
      "step": 2420960
    },
    {
      "epoch": 3.961986868548012,
      "grad_norm": 0.20421330630779266,
      "learning_rate": 2.089705659482219e-06,
      "loss": 0.0084,
      "step": 2420980
    },
    {
      "epoch": 3.962019598986666,
      "grad_norm": 0.2520725131034851,
      "learning_rate": 2.089639767268702e-06,
      "loss": 0.0096,
      "step": 2421000
    },
    {
      "epoch": 3.962052329425319,
      "grad_norm": 0.2067975550889969,
      "learning_rate": 2.089573875055185e-06,
      "loss": 0.0096,
      "step": 2421020
    },
    {
      "epoch": 3.9620850598639725,
      "grad_norm": 0.25364989042282104,
      "learning_rate": 2.089507982841668e-06,
      "loss": 0.009,
      "step": 2421040
    },
    {
      "epoch": 3.9621177903026257,
      "grad_norm": 0.28792256116867065,
      "learning_rate": 2.0894420906281506e-06,
      "loss": 0.0096,
      "step": 2421060
    },
    {
      "epoch": 3.962150520741279,
      "grad_norm": 0.19769519567489624,
      "learning_rate": 2.0893761984146333e-06,
      "loss": 0.0143,
      "step": 2421080
    },
    {
      "epoch": 3.9621832511799324,
      "grad_norm": 0.1613377034664154,
      "learning_rate": 2.0893103062011165e-06,
      "loss": 0.0106,
      "step": 2421100
    },
    {
      "epoch": 3.9622159816185856,
      "grad_norm": 0.10784351825714111,
      "learning_rate": 2.0892444139875992e-06,
      "loss": 0.012,
      "step": 2421120
    },
    {
      "epoch": 3.9622487120572387,
      "grad_norm": 0.29826977849006653,
      "learning_rate": 2.089178521774082e-06,
      "loss": 0.0078,
      "step": 2421140
    },
    {
      "epoch": 3.9622814424958923,
      "grad_norm": 0.46690839529037476,
      "learning_rate": 2.0891126295605647e-06,
      "loss": 0.0105,
      "step": 2421160
    },
    {
      "epoch": 3.962314172934546,
      "grad_norm": 0.5452932119369507,
      "learning_rate": 2.089046737347048e-06,
      "loss": 0.0103,
      "step": 2421180
    },
    {
      "epoch": 3.962346903373199,
      "grad_norm": 0.09521558880805969,
      "learning_rate": 2.088980845133531e-06,
      "loss": 0.0103,
      "step": 2421200
    },
    {
      "epoch": 3.962379633811852,
      "grad_norm": 0.4113803803920746,
      "learning_rate": 2.0889149529200138e-06,
      "loss": 0.0092,
      "step": 2421220
    },
    {
      "epoch": 3.962412364250506,
      "grad_norm": 0.2585313022136688,
      "learning_rate": 2.0888490607064965e-06,
      "loss": 0.0069,
      "step": 2421240
    },
    {
      "epoch": 3.962445094689159,
      "grad_norm": 0.29015031456947327,
      "learning_rate": 2.0887831684929793e-06,
      "loss": 0.0101,
      "step": 2421260
    },
    {
      "epoch": 3.962477825127812,
      "grad_norm": 0.46047934889793396,
      "learning_rate": 2.088717276279462e-06,
      "loss": 0.0143,
      "step": 2421280
    },
    {
      "epoch": 3.9625105555664657,
      "grad_norm": 0.19588391482830048,
      "learning_rate": 2.088651384065945e-06,
      "loss": 0.0104,
      "step": 2421300
    },
    {
      "epoch": 3.9625432860051193,
      "grad_norm": 0.09859898686408997,
      "learning_rate": 2.088585491852428e-06,
      "loss": 0.0086,
      "step": 2421320
    },
    {
      "epoch": 3.9625760164437724,
      "grad_norm": 0.09763380885124207,
      "learning_rate": 2.0885195996389106e-06,
      "loss": 0.0102,
      "step": 2421340
    },
    {
      "epoch": 3.9626087468824256,
      "grad_norm": 0.318150132894516,
      "learning_rate": 2.088453707425394e-06,
      "loss": 0.0106,
      "step": 2421360
    },
    {
      "epoch": 3.962641477321079,
      "grad_norm": 0.18578311800956726,
      "learning_rate": 2.0883878152118765e-06,
      "loss": 0.009,
      "step": 2421380
    },
    {
      "epoch": 3.9626742077597323,
      "grad_norm": 0.4227793216705322,
      "learning_rate": 2.0883219229983597e-06,
      "loss": 0.0086,
      "step": 2421400
    },
    {
      "epoch": 3.9627069381983855,
      "grad_norm": 0.21072520315647125,
      "learning_rate": 2.0882560307848424e-06,
      "loss": 0.0154,
      "step": 2421420
    },
    {
      "epoch": 3.962739668637039,
      "grad_norm": 0.22699393332004547,
      "learning_rate": 2.088190138571325e-06,
      "loss": 0.0138,
      "step": 2421440
    },
    {
      "epoch": 3.9627723990756927,
      "grad_norm": 0.08722995221614838,
      "learning_rate": 2.088124246357808e-06,
      "loss": 0.0103,
      "step": 2421460
    },
    {
      "epoch": 3.962805129514346,
      "grad_norm": 0.07979235053062439,
      "learning_rate": 2.0880583541442907e-06,
      "loss": 0.0128,
      "step": 2421480
    },
    {
      "epoch": 3.962837859952999,
      "grad_norm": 0.12445854395627975,
      "learning_rate": 2.087992461930774e-06,
      "loss": 0.0133,
      "step": 2421500
    },
    {
      "epoch": 3.9628705903916526,
      "grad_norm": 0.2353602647781372,
      "learning_rate": 2.0879265697172566e-06,
      "loss": 0.0102,
      "step": 2421520
    },
    {
      "epoch": 3.9629033208303057,
      "grad_norm": 0.42234307527542114,
      "learning_rate": 2.0878606775037397e-06,
      "loss": 0.0106,
      "step": 2421540
    },
    {
      "epoch": 3.962936051268959,
      "grad_norm": 0.4333852231502533,
      "learning_rate": 2.0877947852902225e-06,
      "loss": 0.0175,
      "step": 2421560
    },
    {
      "epoch": 3.9629687817076125,
      "grad_norm": 0.779415488243103,
      "learning_rate": 2.0877288930767052e-06,
      "loss": 0.0119,
      "step": 2421580
    },
    {
      "epoch": 3.963001512146266,
      "grad_norm": 0.1284501850605011,
      "learning_rate": 2.0876630008631884e-06,
      "loss": 0.0078,
      "step": 2421600
    },
    {
      "epoch": 3.963034242584919,
      "grad_norm": 0.09521006792783737,
      "learning_rate": 2.087597108649671e-06,
      "loss": 0.0066,
      "step": 2421620
    },
    {
      "epoch": 3.9630669730235724,
      "grad_norm": 0.12211627513170242,
      "learning_rate": 2.087531216436154e-06,
      "loss": 0.0082,
      "step": 2421640
    },
    {
      "epoch": 3.963099703462226,
      "grad_norm": 0.07752392441034317,
      "learning_rate": 2.0874653242226366e-06,
      "loss": 0.0066,
      "step": 2421660
    },
    {
      "epoch": 3.963132433900879,
      "grad_norm": 1.2123836278915405,
      "learning_rate": 2.0873994320091193e-06,
      "loss": 0.008,
      "step": 2421680
    },
    {
      "epoch": 3.9631651643395323,
      "grad_norm": 0.146372452378273,
      "learning_rate": 2.0873335397956025e-06,
      "loss": 0.0104,
      "step": 2421700
    },
    {
      "epoch": 3.963197894778186,
      "grad_norm": 0.9571613073348999,
      "learning_rate": 2.0872676475820857e-06,
      "loss": 0.0121,
      "step": 2421720
    },
    {
      "epoch": 3.963230625216839,
      "grad_norm": 0.09882477670907974,
      "learning_rate": 2.0872017553685684e-06,
      "loss": 0.0127,
      "step": 2421740
    },
    {
      "epoch": 3.9632633556554926,
      "grad_norm": 0.5913278460502625,
      "learning_rate": 2.087135863155051e-06,
      "loss": 0.0092,
      "step": 2421760
    },
    {
      "epoch": 3.9632960860941457,
      "grad_norm": 0.19937461614608765,
      "learning_rate": 2.087069970941534e-06,
      "loss": 0.0108,
      "step": 2421780
    },
    {
      "epoch": 3.9633288165327993,
      "grad_norm": 0.19838924705982208,
      "learning_rate": 2.087004078728017e-06,
      "loss": 0.0099,
      "step": 2421800
    },
    {
      "epoch": 3.9633615469714525,
      "grad_norm": 0.7609323859214783,
      "learning_rate": 2.0869381865144998e-06,
      "loss": 0.0107,
      "step": 2421820
    },
    {
      "epoch": 3.9633942774101056,
      "grad_norm": 0.493101567029953,
      "learning_rate": 2.0868722943009825e-06,
      "loss": 0.0118,
      "step": 2421840
    },
    {
      "epoch": 3.9634270078487592,
      "grad_norm": 0.18403196334838867,
      "learning_rate": 2.0868064020874653e-06,
      "loss": 0.0108,
      "step": 2421860
    },
    {
      "epoch": 3.9634597382874124,
      "grad_norm": 0.18715223670005798,
      "learning_rate": 2.0867405098739484e-06,
      "loss": 0.0092,
      "step": 2421880
    },
    {
      "epoch": 3.963492468726066,
      "grad_norm": 0.16426783800125122,
      "learning_rate": 2.086674617660431e-06,
      "loss": 0.0121,
      "step": 2421900
    },
    {
      "epoch": 3.963525199164719,
      "grad_norm": 0.05989309772849083,
      "learning_rate": 2.0866087254469143e-06,
      "loss": 0.0094,
      "step": 2421920
    },
    {
      "epoch": 3.9635579296033727,
      "grad_norm": 0.47417524456977844,
      "learning_rate": 2.086542833233397e-06,
      "loss": 0.0104,
      "step": 2421940
    },
    {
      "epoch": 3.963590660042026,
      "grad_norm": 0.1445295810699463,
      "learning_rate": 2.08647694101988e-06,
      "loss": 0.0093,
      "step": 2421960
    },
    {
      "epoch": 3.963623390480679,
      "grad_norm": 0.1730634719133377,
      "learning_rate": 2.0864110488063626e-06,
      "loss": 0.0114,
      "step": 2421980
    },
    {
      "epoch": 3.9636561209193326,
      "grad_norm": 0.14664991199970245,
      "learning_rate": 2.0863451565928457e-06,
      "loss": 0.0101,
      "step": 2422000
    },
    {
      "epoch": 3.9636888513579858,
      "grad_norm": 1.2955632209777832,
      "learning_rate": 2.0862792643793285e-06,
      "loss": 0.0102,
      "step": 2422020
    },
    {
      "epoch": 3.9637215817966394,
      "grad_norm": 0.4056362211704254,
      "learning_rate": 2.086213372165811e-06,
      "loss": 0.0093,
      "step": 2422040
    },
    {
      "epoch": 3.9637543122352925,
      "grad_norm": 0.189496710896492,
      "learning_rate": 2.0861474799522944e-06,
      "loss": 0.0084,
      "step": 2422060
    },
    {
      "epoch": 3.963787042673946,
      "grad_norm": 0.454936146736145,
      "learning_rate": 2.086081587738777e-06,
      "loss": 0.0156,
      "step": 2422080
    },
    {
      "epoch": 3.9638197731125993,
      "grad_norm": 0.18415901064872742,
      "learning_rate": 2.08601569552526e-06,
      "loss": 0.0081,
      "step": 2422100
    },
    {
      "epoch": 3.9638525035512524,
      "grad_norm": 0.25315484404563904,
      "learning_rate": 2.085949803311743e-06,
      "loss": 0.0068,
      "step": 2422120
    },
    {
      "epoch": 3.963885233989906,
      "grad_norm": 0.31019553542137146,
      "learning_rate": 2.0858839110982257e-06,
      "loss": 0.0106,
      "step": 2422140
    },
    {
      "epoch": 3.963917964428559,
      "grad_norm": 0.41463300585746765,
      "learning_rate": 2.0858180188847085e-06,
      "loss": 0.0107,
      "step": 2422160
    },
    {
      "epoch": 3.9639506948672127,
      "grad_norm": 0.12520071864128113,
      "learning_rate": 2.0857521266711912e-06,
      "loss": 0.0107,
      "step": 2422180
    },
    {
      "epoch": 3.963983425305866,
      "grad_norm": 0.30036410689353943,
      "learning_rate": 2.0856862344576744e-06,
      "loss": 0.009,
      "step": 2422200
    },
    {
      "epoch": 3.9640161557445195,
      "grad_norm": 0.037011317908763885,
      "learning_rate": 2.085620342244157e-06,
      "loss": 0.0139,
      "step": 2422220
    },
    {
      "epoch": 3.9640488861831726,
      "grad_norm": 0.3221077024936676,
      "learning_rate": 2.0855544500306403e-06,
      "loss": 0.0123,
      "step": 2422240
    },
    {
      "epoch": 3.964081616621826,
      "grad_norm": 0.15411719679832458,
      "learning_rate": 2.085488557817123e-06,
      "loss": 0.0142,
      "step": 2422260
    },
    {
      "epoch": 3.9641143470604794,
      "grad_norm": 0.16783392429351807,
      "learning_rate": 2.0854226656036058e-06,
      "loss": 0.0074,
      "step": 2422280
    },
    {
      "epoch": 3.9641470774991325,
      "grad_norm": 0.11454201489686966,
      "learning_rate": 2.0853567733900885e-06,
      "loss": 0.0149,
      "step": 2422300
    },
    {
      "epoch": 3.964179807937786,
      "grad_norm": 0.374743789434433,
      "learning_rate": 2.0852908811765717e-06,
      "loss": 0.0116,
      "step": 2422320
    },
    {
      "epoch": 3.9642125383764393,
      "grad_norm": 0.5165891647338867,
      "learning_rate": 2.0852249889630544e-06,
      "loss": 0.01,
      "step": 2422340
    },
    {
      "epoch": 3.964245268815093,
      "grad_norm": 0.0977480560541153,
      "learning_rate": 2.085159096749537e-06,
      "loss": 0.0109,
      "step": 2422360
    },
    {
      "epoch": 3.964277999253746,
      "grad_norm": 0.2969525456428528,
      "learning_rate": 2.08509320453602e-06,
      "loss": 0.0113,
      "step": 2422380
    },
    {
      "epoch": 3.964310729692399,
      "grad_norm": 0.5118829011917114,
      "learning_rate": 2.085027312322503e-06,
      "loss": 0.012,
      "step": 2422400
    },
    {
      "epoch": 3.9643434601310528,
      "grad_norm": 0.2499476671218872,
      "learning_rate": 2.084961420108986e-06,
      "loss": 0.0114,
      "step": 2422420
    },
    {
      "epoch": 3.964376190569706,
      "grad_norm": 0.4018625020980835,
      "learning_rate": 2.084895527895469e-06,
      "loss": 0.0098,
      "step": 2422440
    },
    {
      "epoch": 3.9644089210083595,
      "grad_norm": 0.1721092313528061,
      "learning_rate": 2.0848296356819517e-06,
      "loss": 0.0105,
      "step": 2422460
    },
    {
      "epoch": 3.9644416514470127,
      "grad_norm": 0.15656623244285583,
      "learning_rate": 2.0847637434684344e-06,
      "loss": 0.01,
      "step": 2422480
    },
    {
      "epoch": 3.9644743818856663,
      "grad_norm": 0.1778872162103653,
      "learning_rate": 2.084697851254917e-06,
      "loss": 0.0096,
      "step": 2422500
    },
    {
      "epoch": 3.9645071123243194,
      "grad_norm": 0.03267529606819153,
      "learning_rate": 2.0846319590414003e-06,
      "loss": 0.0107,
      "step": 2422520
    },
    {
      "epoch": 3.9645398427629726,
      "grad_norm": 0.3112792670726776,
      "learning_rate": 2.084566066827883e-06,
      "loss": 0.0097,
      "step": 2422540
    },
    {
      "epoch": 3.964572573201626,
      "grad_norm": 0.193547323346138,
      "learning_rate": 2.084500174614366e-06,
      "loss": 0.0102,
      "step": 2422560
    },
    {
      "epoch": 3.9646053036402793,
      "grad_norm": 0.3697473406791687,
      "learning_rate": 2.084434282400849e-06,
      "loss": 0.0132,
      "step": 2422580
    },
    {
      "epoch": 3.9646380340789324,
      "grad_norm": 0.5602667331695557,
      "learning_rate": 2.0843683901873317e-06,
      "loss": 0.0119,
      "step": 2422600
    },
    {
      "epoch": 3.964670764517586,
      "grad_norm": 0.2799239754676819,
      "learning_rate": 2.084302497973815e-06,
      "loss": 0.0078,
      "step": 2422620
    },
    {
      "epoch": 3.9647034949562396,
      "grad_norm": 0.20625077188014984,
      "learning_rate": 2.0842366057602976e-06,
      "loss": 0.0101,
      "step": 2422640
    },
    {
      "epoch": 3.964736225394893,
      "grad_norm": 0.20133355259895325,
      "learning_rate": 2.0841707135467804e-06,
      "loss": 0.0074,
      "step": 2422660
    },
    {
      "epoch": 3.964768955833546,
      "grad_norm": 0.22674325108528137,
      "learning_rate": 2.084104821333263e-06,
      "loss": 0.0117,
      "step": 2422680
    },
    {
      "epoch": 3.9648016862721995,
      "grad_norm": 0.18146060407161713,
      "learning_rate": 2.084038929119746e-06,
      "loss": 0.0091,
      "step": 2422700
    },
    {
      "epoch": 3.9648344167108527,
      "grad_norm": 0.1166653037071228,
      "learning_rate": 2.083973036906229e-06,
      "loss": 0.0174,
      "step": 2422720
    },
    {
      "epoch": 3.964867147149506,
      "grad_norm": 0.29811471700668335,
      "learning_rate": 2.0839071446927117e-06,
      "loss": 0.0126,
      "step": 2422740
    },
    {
      "epoch": 3.9648998775881594,
      "grad_norm": 0.2986027002334595,
      "learning_rate": 2.083841252479195e-06,
      "loss": 0.01,
      "step": 2422760
    },
    {
      "epoch": 3.964932608026813,
      "grad_norm": 2.6674373149871826,
      "learning_rate": 2.0837753602656776e-06,
      "loss": 0.0093,
      "step": 2422780
    },
    {
      "epoch": 3.964965338465466,
      "grad_norm": 0.24089327454566956,
      "learning_rate": 2.0837094680521604e-06,
      "loss": 0.0083,
      "step": 2422800
    },
    {
      "epoch": 3.9649980689041193,
      "grad_norm": 0.15588058531284332,
      "learning_rate": 2.0836435758386435e-06,
      "loss": 0.0102,
      "step": 2422820
    },
    {
      "epoch": 3.965030799342773,
      "grad_norm": 0.5781093835830688,
      "learning_rate": 2.0835776836251263e-06,
      "loss": 0.0079,
      "step": 2422840
    },
    {
      "epoch": 3.965063529781426,
      "grad_norm": 0.5360636115074158,
      "learning_rate": 2.083511791411609e-06,
      "loss": 0.0142,
      "step": 2422860
    },
    {
      "epoch": 3.965096260220079,
      "grad_norm": 0.678503692150116,
      "learning_rate": 2.0834458991980918e-06,
      "loss": 0.0075,
      "step": 2422880
    },
    {
      "epoch": 3.965128990658733,
      "grad_norm": 0.43271583318710327,
      "learning_rate": 2.0833800069845745e-06,
      "loss": 0.0098,
      "step": 2422900
    },
    {
      "epoch": 3.9651617210973864,
      "grad_norm": 0.44642412662506104,
      "learning_rate": 2.0833141147710577e-06,
      "loss": 0.0076,
      "step": 2422920
    },
    {
      "epoch": 3.9651944515360396,
      "grad_norm": 0.6616262793540955,
      "learning_rate": 2.083248222557541e-06,
      "loss": 0.0089,
      "step": 2422940
    },
    {
      "epoch": 3.9652271819746927,
      "grad_norm": 0.436936616897583,
      "learning_rate": 2.0831823303440236e-06,
      "loss": 0.0151,
      "step": 2422960
    },
    {
      "epoch": 3.9652599124133463,
      "grad_norm": 0.15851575136184692,
      "learning_rate": 2.0831164381305063e-06,
      "loss": 0.01,
      "step": 2422980
    },
    {
      "epoch": 3.9652926428519994,
      "grad_norm": 0.23513513803482056,
      "learning_rate": 2.083050545916989e-06,
      "loss": 0.0063,
      "step": 2423000
    },
    {
      "epoch": 3.9653253732906526,
      "grad_norm": 0.15791283547878265,
      "learning_rate": 2.0829846537034722e-06,
      "loss": 0.0109,
      "step": 2423020
    },
    {
      "epoch": 3.965358103729306,
      "grad_norm": 0.37806037068367004,
      "learning_rate": 2.082918761489955e-06,
      "loss": 0.0165,
      "step": 2423040
    },
    {
      "epoch": 3.96539083416796,
      "grad_norm": 0.2970787286758423,
      "learning_rate": 2.0828528692764377e-06,
      "loss": 0.0112,
      "step": 2423060
    },
    {
      "epoch": 3.965423564606613,
      "grad_norm": 0.127900168299675,
      "learning_rate": 2.0827869770629204e-06,
      "loss": 0.0084,
      "step": 2423080
    },
    {
      "epoch": 3.965456295045266,
      "grad_norm": 0.1799776554107666,
      "learning_rate": 2.082721084849403e-06,
      "loss": 0.0082,
      "step": 2423100
    },
    {
      "epoch": 3.9654890254839197,
      "grad_norm": 0.849929690361023,
      "learning_rate": 2.0826551926358863e-06,
      "loss": 0.0111,
      "step": 2423120
    },
    {
      "epoch": 3.965521755922573,
      "grad_norm": 0.3121735453605652,
      "learning_rate": 2.0825893004223695e-06,
      "loss": 0.0118,
      "step": 2423140
    },
    {
      "epoch": 3.965554486361226,
      "grad_norm": 0.19200444221496582,
      "learning_rate": 2.0825234082088522e-06,
      "loss": 0.0094,
      "step": 2423160
    },
    {
      "epoch": 3.9655872167998796,
      "grad_norm": 0.026987675577402115,
      "learning_rate": 2.082457515995335e-06,
      "loss": 0.0072,
      "step": 2423180
    },
    {
      "epoch": 3.9656199472385327,
      "grad_norm": 0.2915322184562683,
      "learning_rate": 2.0823916237818177e-06,
      "loss": 0.0103,
      "step": 2423200
    },
    {
      "epoch": 3.9656526776771863,
      "grad_norm": 0.08747733384370804,
      "learning_rate": 2.082325731568301e-06,
      "loss": 0.0074,
      "step": 2423220
    },
    {
      "epoch": 3.9656854081158395,
      "grad_norm": 0.08868025243282318,
      "learning_rate": 2.0822598393547836e-06,
      "loss": 0.0127,
      "step": 2423240
    },
    {
      "epoch": 3.965718138554493,
      "grad_norm": 0.25073572993278503,
      "learning_rate": 2.0821939471412664e-06,
      "loss": 0.006,
      "step": 2423260
    },
    {
      "epoch": 3.965750868993146,
      "grad_norm": 0.18159209191799164,
      "learning_rate": 2.082128054927749e-06,
      "loss": 0.0092,
      "step": 2423280
    },
    {
      "epoch": 3.9657835994317994,
      "grad_norm": 0.25589707493782043,
      "learning_rate": 2.0820621627142323e-06,
      "loss": 0.0137,
      "step": 2423300
    },
    {
      "epoch": 3.965816329870453,
      "grad_norm": 0.23585906624794006,
      "learning_rate": 2.081996270500715e-06,
      "loss": 0.0172,
      "step": 2423320
    },
    {
      "epoch": 3.965849060309106,
      "grad_norm": 0.29944562911987305,
      "learning_rate": 2.081930378287198e-06,
      "loss": 0.0075,
      "step": 2423340
    },
    {
      "epoch": 3.9658817907477597,
      "grad_norm": 0.11263394355773926,
      "learning_rate": 2.081864486073681e-06,
      "loss": 0.014,
      "step": 2423360
    },
    {
      "epoch": 3.965914521186413,
      "grad_norm": 0.09554560482501984,
      "learning_rate": 2.0817985938601637e-06,
      "loss": 0.0156,
      "step": 2423380
    },
    {
      "epoch": 3.9659472516250664,
      "grad_norm": 0.2661136984825134,
      "learning_rate": 2.0817327016466464e-06,
      "loss": 0.0066,
      "step": 2423400
    },
    {
      "epoch": 3.9659799820637196,
      "grad_norm": 0.2150149643421173,
      "learning_rate": 2.0816668094331296e-06,
      "loss": 0.0109,
      "step": 2423420
    },
    {
      "epoch": 3.9660127125023728,
      "grad_norm": 0.027809539809823036,
      "learning_rate": 2.0816009172196123e-06,
      "loss": 0.0141,
      "step": 2423440
    },
    {
      "epoch": 3.9660454429410263,
      "grad_norm": 0.17773421108722687,
      "learning_rate": 2.0815350250060955e-06,
      "loss": 0.0068,
      "step": 2423460
    },
    {
      "epoch": 3.9660781733796795,
      "grad_norm": 0.3157941997051239,
      "learning_rate": 2.081469132792578e-06,
      "loss": 0.0162,
      "step": 2423480
    },
    {
      "epoch": 3.966110903818333,
      "grad_norm": 0.28959742188453674,
      "learning_rate": 2.081403240579061e-06,
      "loss": 0.0098,
      "step": 2423500
    },
    {
      "epoch": 3.9661436342569862,
      "grad_norm": 0.17698492109775543,
      "learning_rate": 2.0813373483655437e-06,
      "loss": 0.0088,
      "step": 2423520
    },
    {
      "epoch": 3.96617636469564,
      "grad_norm": 0.21951359510421753,
      "learning_rate": 2.081271456152027e-06,
      "loss": 0.0089,
      "step": 2423540
    },
    {
      "epoch": 3.966209095134293,
      "grad_norm": 0.6186726689338684,
      "learning_rate": 2.0812055639385096e-06,
      "loss": 0.012,
      "step": 2423560
    },
    {
      "epoch": 3.966241825572946,
      "grad_norm": 0.46582674980163574,
      "learning_rate": 2.0811396717249923e-06,
      "loss": 0.0077,
      "step": 2423580
    },
    {
      "epoch": 3.9662745560115997,
      "grad_norm": 0.6985095143318176,
      "learning_rate": 2.081073779511475e-06,
      "loss": 0.0153,
      "step": 2423600
    },
    {
      "epoch": 3.966307286450253,
      "grad_norm": 0.198034405708313,
      "learning_rate": 2.0810078872979582e-06,
      "loss": 0.0128,
      "step": 2423620
    },
    {
      "epoch": 3.9663400168889065,
      "grad_norm": 0.6490117907524109,
      "learning_rate": 2.080941995084441e-06,
      "loss": 0.0063,
      "step": 2423640
    },
    {
      "epoch": 3.9663727473275596,
      "grad_norm": 0.4346444010734558,
      "learning_rate": 2.080876102870924e-06,
      "loss": 0.0082,
      "step": 2423660
    },
    {
      "epoch": 3.966405477766213,
      "grad_norm": 0.29084497690200806,
      "learning_rate": 2.080810210657407e-06,
      "loss": 0.0125,
      "step": 2423680
    },
    {
      "epoch": 3.9664382082048664,
      "grad_norm": 0.27403566241264343,
      "learning_rate": 2.0807443184438896e-06,
      "loss": 0.0127,
      "step": 2423700
    },
    {
      "epoch": 3.9664709386435195,
      "grad_norm": 0.3075283169746399,
      "learning_rate": 2.0806784262303723e-06,
      "loss": 0.0128,
      "step": 2423720
    },
    {
      "epoch": 3.966503669082173,
      "grad_norm": 0.3012188673019409,
      "learning_rate": 2.0806125340168555e-06,
      "loss": 0.0061,
      "step": 2423740
    },
    {
      "epoch": 3.9665363995208263,
      "grad_norm": 0.1523510217666626,
      "learning_rate": 2.0805466418033382e-06,
      "loss": 0.0143,
      "step": 2423760
    },
    {
      "epoch": 3.96656912995948,
      "grad_norm": 0.1449064165353775,
      "learning_rate": 2.080480749589821e-06,
      "loss": 0.0097,
      "step": 2423780
    },
    {
      "epoch": 3.966601860398133,
      "grad_norm": 0.49095284938812256,
      "learning_rate": 2.0804148573763037e-06,
      "loss": 0.0094,
      "step": 2423800
    },
    {
      "epoch": 3.9666345908367866,
      "grad_norm": 0.2977936863899231,
      "learning_rate": 2.080348965162787e-06,
      "loss": 0.0108,
      "step": 2423820
    },
    {
      "epoch": 3.9666673212754398,
      "grad_norm": 0.5324633717536926,
      "learning_rate": 2.08028307294927e-06,
      "loss": 0.007,
      "step": 2423840
    },
    {
      "epoch": 3.966700051714093,
      "grad_norm": 0.4800027906894684,
      "learning_rate": 2.080217180735753e-06,
      "loss": 0.0117,
      "step": 2423860
    },
    {
      "epoch": 3.9667327821527465,
      "grad_norm": 0.3434552550315857,
      "learning_rate": 2.0801512885222355e-06,
      "loss": 0.0109,
      "step": 2423880
    },
    {
      "epoch": 3.9667655125913996,
      "grad_norm": 0.07765628397464752,
      "learning_rate": 2.0800853963087183e-06,
      "loss": 0.0084,
      "step": 2423900
    },
    {
      "epoch": 3.9667982430300532,
      "grad_norm": 0.3714732527732849,
      "learning_rate": 2.080019504095201e-06,
      "loss": 0.0096,
      "step": 2423920
    },
    {
      "epoch": 3.9668309734687064,
      "grad_norm": 0.41092753410339355,
      "learning_rate": 2.079953611881684e-06,
      "loss": 0.0084,
      "step": 2423940
    },
    {
      "epoch": 3.96686370390736,
      "grad_norm": 0.056880220770835876,
      "learning_rate": 2.079887719668167e-06,
      "loss": 0.0093,
      "step": 2423960
    },
    {
      "epoch": 3.966896434346013,
      "grad_norm": 0.4083041250705719,
      "learning_rate": 2.0798218274546497e-06,
      "loss": 0.0085,
      "step": 2423980
    },
    {
      "epoch": 3.9669291647846663,
      "grad_norm": 0.08013003319501877,
      "learning_rate": 2.079755935241133e-06,
      "loss": 0.0112,
      "step": 2424000
    },
    {
      "epoch": 3.96696189522332,
      "grad_norm": 0.2323111593723297,
      "learning_rate": 2.0796900430276156e-06,
      "loss": 0.0059,
      "step": 2424020
    },
    {
      "epoch": 3.966994625661973,
      "grad_norm": 0.20196843147277832,
      "learning_rate": 2.0796241508140987e-06,
      "loss": 0.0099,
      "step": 2424040
    },
    {
      "epoch": 3.9670273561006266,
      "grad_norm": 0.2863888144493103,
      "learning_rate": 2.0795582586005815e-06,
      "loss": 0.0076,
      "step": 2424060
    },
    {
      "epoch": 3.9670600865392798,
      "grad_norm": 0.18895524740219116,
      "learning_rate": 2.079492366387064e-06,
      "loss": 0.0107,
      "step": 2424080
    },
    {
      "epoch": 3.9670928169779334,
      "grad_norm": 0.30530673265457153,
      "learning_rate": 2.079426474173547e-06,
      "loss": 0.0126,
      "step": 2424100
    },
    {
      "epoch": 3.9671255474165865,
      "grad_norm": 0.16215460002422333,
      "learning_rate": 2.0793605819600297e-06,
      "loss": 0.0122,
      "step": 2424120
    },
    {
      "epoch": 3.9671582778552397,
      "grad_norm": 0.4221445620059967,
      "learning_rate": 2.079294689746513e-06,
      "loss": 0.0066,
      "step": 2424140
    },
    {
      "epoch": 3.9671910082938933,
      "grad_norm": 0.40995797514915466,
      "learning_rate": 2.0792287975329956e-06,
      "loss": 0.0109,
      "step": 2424160
    },
    {
      "epoch": 3.9672237387325464,
      "grad_norm": 0.12350569665431976,
      "learning_rate": 2.0791629053194787e-06,
      "loss": 0.0098,
      "step": 2424180
    },
    {
      "epoch": 3.9672564691711996,
      "grad_norm": 0.10342498868703842,
      "learning_rate": 2.0790970131059615e-06,
      "loss": 0.0085,
      "step": 2424200
    },
    {
      "epoch": 3.967289199609853,
      "grad_norm": 0.2847781777381897,
      "learning_rate": 2.0790311208924442e-06,
      "loss": 0.0093,
      "step": 2424220
    },
    {
      "epoch": 3.9673219300485068,
      "grad_norm": 0.06040923669934273,
      "learning_rate": 2.0789652286789274e-06,
      "loss": 0.0077,
      "step": 2424240
    },
    {
      "epoch": 3.96735466048716,
      "grad_norm": 0.4162571132183075,
      "learning_rate": 2.07889933646541e-06,
      "loss": 0.0131,
      "step": 2424260
    },
    {
      "epoch": 3.967387390925813,
      "grad_norm": 0.08411389589309692,
      "learning_rate": 2.078833444251893e-06,
      "loss": 0.008,
      "step": 2424280
    },
    {
      "epoch": 3.9674201213644666,
      "grad_norm": 0.4777160882949829,
      "learning_rate": 2.0787675520383756e-06,
      "loss": 0.0118,
      "step": 2424300
    },
    {
      "epoch": 3.96745285180312,
      "grad_norm": 0.44410625100135803,
      "learning_rate": 2.0787016598248583e-06,
      "loss": 0.0105,
      "step": 2424320
    },
    {
      "epoch": 3.967485582241773,
      "grad_norm": 0.21242405474185944,
      "learning_rate": 2.0786357676113415e-06,
      "loss": 0.0101,
      "step": 2424340
    },
    {
      "epoch": 3.9675183126804265,
      "grad_norm": 0.09464046359062195,
      "learning_rate": 2.0785698753978247e-06,
      "loss": 0.0137,
      "step": 2424360
    },
    {
      "epoch": 3.96755104311908,
      "grad_norm": 0.4028652310371399,
      "learning_rate": 2.0785039831843074e-06,
      "loss": 0.0121,
      "step": 2424380
    },
    {
      "epoch": 3.9675837735577333,
      "grad_norm": 0.03202527388930321,
      "learning_rate": 2.07843809097079e-06,
      "loss": 0.011,
      "step": 2424400
    },
    {
      "epoch": 3.9676165039963864,
      "grad_norm": 0.22607362270355225,
      "learning_rate": 2.078372198757273e-06,
      "loss": 0.0125,
      "step": 2424420
    },
    {
      "epoch": 3.96764923443504,
      "grad_norm": 0.30849137902259827,
      "learning_rate": 2.078306306543756e-06,
      "loss": 0.0077,
      "step": 2424440
    },
    {
      "epoch": 3.967681964873693,
      "grad_norm": 0.24929559230804443,
      "learning_rate": 2.078240414330239e-06,
      "loss": 0.0136,
      "step": 2424460
    },
    {
      "epoch": 3.9677146953123463,
      "grad_norm": 0.23883771896362305,
      "learning_rate": 2.0781745221167215e-06,
      "loss": 0.0114,
      "step": 2424480
    },
    {
      "epoch": 3.967747425751,
      "grad_norm": 0.11246688663959503,
      "learning_rate": 2.0781086299032043e-06,
      "loss": 0.0098,
      "step": 2424500
    },
    {
      "epoch": 3.9677801561896535,
      "grad_norm": 0.08193057775497437,
      "learning_rate": 2.0780427376896874e-06,
      "loss": 0.0116,
      "step": 2424520
    },
    {
      "epoch": 3.9678128866283067,
      "grad_norm": 0.1102348268032074,
      "learning_rate": 2.07797684547617e-06,
      "loss": 0.011,
      "step": 2424540
    },
    {
      "epoch": 3.96784561706696,
      "grad_norm": 0.35725724697113037,
      "learning_rate": 2.0779109532626533e-06,
      "loss": 0.0096,
      "step": 2424560
    },
    {
      "epoch": 3.9678783475056134,
      "grad_norm": 0.2157229334115982,
      "learning_rate": 2.077845061049136e-06,
      "loss": 0.0081,
      "step": 2424580
    },
    {
      "epoch": 3.9679110779442666,
      "grad_norm": 0.16007214784622192,
      "learning_rate": 2.077779168835619e-06,
      "loss": 0.0075,
      "step": 2424600
    },
    {
      "epoch": 3.9679438083829197,
      "grad_norm": 0.24246792495250702,
      "learning_rate": 2.0777132766221016e-06,
      "loss": 0.0122,
      "step": 2424620
    },
    {
      "epoch": 3.9679765388215733,
      "grad_norm": 0.21062405407428741,
      "learning_rate": 2.0776473844085847e-06,
      "loss": 0.0106,
      "step": 2424640
    },
    {
      "epoch": 3.968009269260227,
      "grad_norm": 0.10239163786172867,
      "learning_rate": 2.0775814921950675e-06,
      "loss": 0.0117,
      "step": 2424660
    },
    {
      "epoch": 3.96804199969888,
      "grad_norm": 0.2772095203399658,
      "learning_rate": 2.07751559998155e-06,
      "loss": 0.0149,
      "step": 2424680
    },
    {
      "epoch": 3.968074730137533,
      "grad_norm": 0.28819695115089417,
      "learning_rate": 2.0774497077680334e-06,
      "loss": 0.0128,
      "step": 2424700
    },
    {
      "epoch": 3.968107460576187,
      "grad_norm": 0.9470335245132446,
      "learning_rate": 2.077383815554516e-06,
      "loss": 0.0129,
      "step": 2424720
    },
    {
      "epoch": 3.96814019101484,
      "grad_norm": 0.22683878242969513,
      "learning_rate": 2.077317923340999e-06,
      "loss": 0.0116,
      "step": 2424740
    },
    {
      "epoch": 3.968172921453493,
      "grad_norm": 0.40861502289772034,
      "learning_rate": 2.077252031127482e-06,
      "loss": 0.0122,
      "step": 2424760
    },
    {
      "epoch": 3.9682056518921467,
      "grad_norm": 0.13725847005844116,
      "learning_rate": 2.0771861389139647e-06,
      "loss": 0.0083,
      "step": 2424780
    },
    {
      "epoch": 3.9682383823308,
      "grad_norm": 0.7833839058876038,
      "learning_rate": 2.0771202467004475e-06,
      "loss": 0.0177,
      "step": 2424800
    },
    {
      "epoch": 3.9682711127694534,
      "grad_norm": 0.10333184152841568,
      "learning_rate": 2.0770543544869302e-06,
      "loss": 0.0108,
      "step": 2424820
    },
    {
      "epoch": 3.9683038432081066,
      "grad_norm": 0.4011983573436737,
      "learning_rate": 2.0769884622734134e-06,
      "loss": 0.0098,
      "step": 2424840
    },
    {
      "epoch": 3.96833657364676,
      "grad_norm": 0.6403418183326721,
      "learning_rate": 2.076922570059896e-06,
      "loss": 0.0116,
      "step": 2424860
    },
    {
      "epoch": 3.9683693040854133,
      "grad_norm": 0.08395221829414368,
      "learning_rate": 2.0768566778463793e-06,
      "loss": 0.0116,
      "step": 2424880
    },
    {
      "epoch": 3.9684020345240665,
      "grad_norm": 0.19092929363250732,
      "learning_rate": 2.076790785632862e-06,
      "loss": 0.0104,
      "step": 2424900
    },
    {
      "epoch": 3.96843476496272,
      "grad_norm": 0.1475200206041336,
      "learning_rate": 2.0767248934193448e-06,
      "loss": 0.009,
      "step": 2424920
    },
    {
      "epoch": 3.9684674954013732,
      "grad_norm": 0.13128915429115295,
      "learning_rate": 2.0766590012058275e-06,
      "loss": 0.0107,
      "step": 2424940
    },
    {
      "epoch": 3.968500225840027,
      "grad_norm": 0.3128434121608734,
      "learning_rate": 2.0765931089923107e-06,
      "loss": 0.0106,
      "step": 2424960
    },
    {
      "epoch": 3.96853295627868,
      "grad_norm": 0.6355857849121094,
      "learning_rate": 2.0765272167787934e-06,
      "loss": 0.0104,
      "step": 2424980
    },
    {
      "epoch": 3.9685656867173336,
      "grad_norm": 0.20450690388679504,
      "learning_rate": 2.076461324565276e-06,
      "loss": 0.0147,
      "step": 2425000
    },
    {
      "epoch": 3.9685984171559867,
      "grad_norm": 0.1225360706448555,
      "learning_rate": 2.076395432351759e-06,
      "loss": 0.0099,
      "step": 2425020
    },
    {
      "epoch": 3.96863114759464,
      "grad_norm": 0.18385015428066254,
      "learning_rate": 2.076329540138242e-06,
      "loss": 0.0138,
      "step": 2425040
    },
    {
      "epoch": 3.9686638780332935,
      "grad_norm": 0.3888838589191437,
      "learning_rate": 2.076263647924725e-06,
      "loss": 0.0131,
      "step": 2425060
    },
    {
      "epoch": 3.9686966084719466,
      "grad_norm": 0.23634889721870422,
      "learning_rate": 2.076197755711208e-06,
      "loss": 0.0112,
      "step": 2425080
    },
    {
      "epoch": 3.9687293389106,
      "grad_norm": 0.28427934646606445,
      "learning_rate": 2.0761318634976907e-06,
      "loss": 0.0167,
      "step": 2425100
    },
    {
      "epoch": 3.9687620693492534,
      "grad_norm": 0.387001633644104,
      "learning_rate": 2.0760659712841734e-06,
      "loss": 0.0091,
      "step": 2425120
    },
    {
      "epoch": 3.968794799787907,
      "grad_norm": 0.42166614532470703,
      "learning_rate": 2.076000079070656e-06,
      "loss": 0.0142,
      "step": 2425140
    },
    {
      "epoch": 3.96882753022656,
      "grad_norm": 0.26324981451034546,
      "learning_rate": 2.0759341868571393e-06,
      "loss": 0.0095,
      "step": 2425160
    },
    {
      "epoch": 3.9688602606652132,
      "grad_norm": 0.7548539638519287,
      "learning_rate": 2.075868294643622e-06,
      "loss": 0.0106,
      "step": 2425180
    },
    {
      "epoch": 3.968892991103867,
      "grad_norm": 0.09433139115571976,
      "learning_rate": 2.075802402430105e-06,
      "loss": 0.0094,
      "step": 2425200
    },
    {
      "epoch": 3.96892572154252,
      "grad_norm": 0.2700631320476532,
      "learning_rate": 2.075736510216588e-06,
      "loss": 0.0073,
      "step": 2425220
    },
    {
      "epoch": 3.9689584519811736,
      "grad_norm": 0.10481252521276474,
      "learning_rate": 2.0756706180030707e-06,
      "loss": 0.0112,
      "step": 2425240
    },
    {
      "epoch": 3.9689911824198267,
      "grad_norm": 0.175113707780838,
      "learning_rate": 2.075604725789554e-06,
      "loss": 0.0084,
      "step": 2425260
    },
    {
      "epoch": 3.9690239128584803,
      "grad_norm": 0.06189679354429245,
      "learning_rate": 2.0755388335760366e-06,
      "loss": 0.0105,
      "step": 2425280
    },
    {
      "epoch": 3.9690566432971335,
      "grad_norm": 0.25308412313461304,
      "learning_rate": 2.0754729413625194e-06,
      "loss": 0.0153,
      "step": 2425300
    },
    {
      "epoch": 3.9690893737357866,
      "grad_norm": 0.45409858226776123,
      "learning_rate": 2.075407049149002e-06,
      "loss": 0.012,
      "step": 2425320
    },
    {
      "epoch": 3.9691221041744402,
      "grad_norm": 0.2187252640724182,
      "learning_rate": 2.075341156935485e-06,
      "loss": 0.011,
      "step": 2425340
    },
    {
      "epoch": 3.9691548346130934,
      "grad_norm": 0.30060532689094543,
      "learning_rate": 2.075275264721968e-06,
      "loss": 0.0071,
      "step": 2425360
    },
    {
      "epoch": 3.969187565051747,
      "grad_norm": 0.15297670662403107,
      "learning_rate": 2.0752093725084508e-06,
      "loss": 0.0105,
      "step": 2425380
    },
    {
      "epoch": 3.9692202954904,
      "grad_norm": 0.25641608238220215,
      "learning_rate": 2.075143480294934e-06,
      "loss": 0.0108,
      "step": 2425400
    },
    {
      "epoch": 3.9692530259290537,
      "grad_norm": 0.1742876023054123,
      "learning_rate": 2.0750775880814167e-06,
      "loss": 0.0105,
      "step": 2425420
    },
    {
      "epoch": 3.969285756367707,
      "grad_norm": 0.1364726722240448,
      "learning_rate": 2.0750116958678994e-06,
      "loss": 0.0108,
      "step": 2425440
    },
    {
      "epoch": 3.96931848680636,
      "grad_norm": 0.16633789241313934,
      "learning_rate": 2.0749458036543826e-06,
      "loss": 0.0061,
      "step": 2425460
    },
    {
      "epoch": 3.9693512172450136,
      "grad_norm": 0.4930972754955292,
      "learning_rate": 2.0748799114408653e-06,
      "loss": 0.0096,
      "step": 2425480
    },
    {
      "epoch": 3.9693839476836668,
      "grad_norm": 0.086475670337677,
      "learning_rate": 2.074814019227348e-06,
      "loss": 0.0084,
      "step": 2425500
    },
    {
      "epoch": 3.9694166781223204,
      "grad_norm": 0.16045306622982025,
      "learning_rate": 2.0747481270138308e-06,
      "loss": 0.0136,
      "step": 2425520
    },
    {
      "epoch": 3.9694494085609735,
      "grad_norm": 0.05974279344081879,
      "learning_rate": 2.0746822348003135e-06,
      "loss": 0.0085,
      "step": 2425540
    },
    {
      "epoch": 3.969482138999627,
      "grad_norm": 0.37560606002807617,
      "learning_rate": 2.0746163425867967e-06,
      "loss": 0.0109,
      "step": 2425560
    },
    {
      "epoch": 3.9695148694382802,
      "grad_norm": 0.1764136552810669,
      "learning_rate": 2.07455045037328e-06,
      "loss": 0.0101,
      "step": 2425580
    },
    {
      "epoch": 3.9695475998769334,
      "grad_norm": 0.039522893726825714,
      "learning_rate": 2.0744845581597626e-06,
      "loss": 0.0091,
      "step": 2425600
    },
    {
      "epoch": 3.969580330315587,
      "grad_norm": 0.0535164475440979,
      "learning_rate": 2.0744186659462453e-06,
      "loss": 0.0119,
      "step": 2425620
    },
    {
      "epoch": 3.96961306075424,
      "grad_norm": 0.10109588503837585,
      "learning_rate": 2.074352773732728e-06,
      "loss": 0.0179,
      "step": 2425640
    },
    {
      "epoch": 3.9696457911928933,
      "grad_norm": 0.07117266207933426,
      "learning_rate": 2.0742868815192112e-06,
      "loss": 0.0079,
      "step": 2425660
    },
    {
      "epoch": 3.969678521631547,
      "grad_norm": 0.6566253900527954,
      "learning_rate": 2.074220989305694e-06,
      "loss": 0.0138,
      "step": 2425680
    },
    {
      "epoch": 3.9697112520702005,
      "grad_norm": 0.6243242025375366,
      "learning_rate": 2.0741550970921767e-06,
      "loss": 0.0102,
      "step": 2425700
    },
    {
      "epoch": 3.9697439825088536,
      "grad_norm": 0.35936248302459717,
      "learning_rate": 2.0740892048786594e-06,
      "loss": 0.0148,
      "step": 2425720
    },
    {
      "epoch": 3.969776712947507,
      "grad_norm": 0.25755223631858826,
      "learning_rate": 2.074023312665142e-06,
      "loss": 0.0138,
      "step": 2425740
    },
    {
      "epoch": 3.9698094433861604,
      "grad_norm": 0.4254409968852997,
      "learning_rate": 2.0739574204516254e-06,
      "loss": 0.007,
      "step": 2425760
    },
    {
      "epoch": 3.9698421738248135,
      "grad_norm": 0.27923834323883057,
      "learning_rate": 2.0738915282381085e-06,
      "loss": 0.013,
      "step": 2425780
    },
    {
      "epoch": 3.9698749042634667,
      "grad_norm": 0.2482607066631317,
      "learning_rate": 2.0738256360245913e-06,
      "loss": 0.0149,
      "step": 2425800
    },
    {
      "epoch": 3.9699076347021203,
      "grad_norm": 0.07857496291399002,
      "learning_rate": 2.073759743811074e-06,
      "loss": 0.0115,
      "step": 2425820
    },
    {
      "epoch": 3.969940365140774,
      "grad_norm": 0.13886544108390808,
      "learning_rate": 2.0736938515975567e-06,
      "loss": 0.01,
      "step": 2425840
    },
    {
      "epoch": 3.969973095579427,
      "grad_norm": 0.4565264880657196,
      "learning_rate": 2.07362795938404e-06,
      "loss": 0.0138,
      "step": 2425860
    },
    {
      "epoch": 3.97000582601808,
      "grad_norm": 0.40107080340385437,
      "learning_rate": 2.0735620671705226e-06,
      "loss": 0.0126,
      "step": 2425880
    },
    {
      "epoch": 3.9700385564567338,
      "grad_norm": 0.2321334332227707,
      "learning_rate": 2.0734961749570054e-06,
      "loss": 0.0062,
      "step": 2425900
    },
    {
      "epoch": 3.970071286895387,
      "grad_norm": 0.09983383119106293,
      "learning_rate": 2.0734302827434885e-06,
      "loss": 0.0089,
      "step": 2425920
    },
    {
      "epoch": 3.97010401733404,
      "grad_norm": 0.17330966889858246,
      "learning_rate": 2.0733643905299713e-06,
      "loss": 0.014,
      "step": 2425940
    },
    {
      "epoch": 3.9701367477726937,
      "grad_norm": 0.1771302968263626,
      "learning_rate": 2.073298498316454e-06,
      "loss": 0.011,
      "step": 2425960
    },
    {
      "epoch": 3.9701694782113472,
      "grad_norm": 0.23484699428081512,
      "learning_rate": 2.073232606102937e-06,
      "loss": 0.0106,
      "step": 2425980
    },
    {
      "epoch": 3.9702022086500004,
      "grad_norm": 0.473835825920105,
      "learning_rate": 2.07316671388942e-06,
      "loss": 0.008,
      "step": 2426000
    },
    {
      "epoch": 3.9702349390886535,
      "grad_norm": 0.22067490220069885,
      "learning_rate": 2.0731008216759027e-06,
      "loss": 0.0109,
      "step": 2426020
    },
    {
      "epoch": 3.970267669527307,
      "grad_norm": 0.21412517130374908,
      "learning_rate": 2.0730349294623854e-06,
      "loss": 0.01,
      "step": 2426040
    },
    {
      "epoch": 3.9703003999659603,
      "grad_norm": 0.7345486879348755,
      "learning_rate": 2.0729690372488686e-06,
      "loss": 0.0161,
      "step": 2426060
    },
    {
      "epoch": 3.9703331304046134,
      "grad_norm": 0.573904812335968,
      "learning_rate": 2.0729031450353513e-06,
      "loss": 0.0146,
      "step": 2426080
    },
    {
      "epoch": 3.970365860843267,
      "grad_norm": 0.3900356888771057,
      "learning_rate": 2.0728372528218345e-06,
      "loss": 0.0132,
      "step": 2426100
    },
    {
      "epoch": 3.9703985912819206,
      "grad_norm": 0.42761164903640747,
      "learning_rate": 2.072771360608317e-06,
      "loss": 0.0106,
      "step": 2426120
    },
    {
      "epoch": 3.970431321720574,
      "grad_norm": 0.27440425753593445,
      "learning_rate": 2.0727054683948e-06,
      "loss": 0.0138,
      "step": 2426140
    },
    {
      "epoch": 3.970464052159227,
      "grad_norm": 0.25735485553741455,
      "learning_rate": 2.0726395761812827e-06,
      "loss": 0.0076,
      "step": 2426160
    },
    {
      "epoch": 3.9704967825978805,
      "grad_norm": 1.1071017980575562,
      "learning_rate": 2.072573683967766e-06,
      "loss": 0.012,
      "step": 2426180
    },
    {
      "epoch": 3.9705295130365337,
      "grad_norm": 0.2501002848148346,
      "learning_rate": 2.0725077917542486e-06,
      "loss": 0.012,
      "step": 2426200
    },
    {
      "epoch": 3.970562243475187,
      "grad_norm": 0.36044272780418396,
      "learning_rate": 2.0724418995407313e-06,
      "loss": 0.0102,
      "step": 2426220
    },
    {
      "epoch": 3.9705949739138404,
      "grad_norm": 0.14181914925575256,
      "learning_rate": 2.072376007327214e-06,
      "loss": 0.0081,
      "step": 2426240
    },
    {
      "epoch": 3.9706277043524936,
      "grad_norm": 0.18650998175144196,
      "learning_rate": 2.0723101151136972e-06,
      "loss": 0.0076,
      "step": 2426260
    },
    {
      "epoch": 3.970660434791147,
      "grad_norm": 0.2594248354434967,
      "learning_rate": 2.07224422290018e-06,
      "loss": 0.0124,
      "step": 2426280
    },
    {
      "epoch": 3.9706931652298003,
      "grad_norm": 0.3185671269893646,
      "learning_rate": 2.072178330686663e-06,
      "loss": 0.0117,
      "step": 2426300
    },
    {
      "epoch": 3.970725895668454,
      "grad_norm": 0.13691207766532898,
      "learning_rate": 2.072112438473146e-06,
      "loss": 0.0172,
      "step": 2426320
    },
    {
      "epoch": 3.970758626107107,
      "grad_norm": 0.14827761054039001,
      "learning_rate": 2.0720465462596286e-06,
      "loss": 0.0071,
      "step": 2426340
    },
    {
      "epoch": 3.97079135654576,
      "grad_norm": 0.46400997042655945,
      "learning_rate": 2.0719806540461114e-06,
      "loss": 0.0078,
      "step": 2426360
    },
    {
      "epoch": 3.970824086984414,
      "grad_norm": 0.22086015343666077,
      "learning_rate": 2.0719147618325945e-06,
      "loss": 0.0105,
      "step": 2426380
    },
    {
      "epoch": 3.970856817423067,
      "grad_norm": 0.5570323467254639,
      "learning_rate": 2.0718488696190773e-06,
      "loss": 0.0081,
      "step": 2426400
    },
    {
      "epoch": 3.9708895478617205,
      "grad_norm": 0.11179200559854507,
      "learning_rate": 2.07178297740556e-06,
      "loss": 0.0079,
      "step": 2426420
    },
    {
      "epoch": 3.9709222783003737,
      "grad_norm": 0.3094905614852905,
      "learning_rate": 2.0717170851920427e-06,
      "loss": 0.0108,
      "step": 2426440
    },
    {
      "epoch": 3.9709550087390273,
      "grad_norm": 0.6838714480400085,
      "learning_rate": 2.071651192978526e-06,
      "loss": 0.0102,
      "step": 2426460
    },
    {
      "epoch": 3.9709877391776804,
      "grad_norm": 0.19751198589801788,
      "learning_rate": 2.071585300765009e-06,
      "loss": 0.0086,
      "step": 2426480
    },
    {
      "epoch": 3.9710204696163336,
      "grad_norm": 0.12349370867013931,
      "learning_rate": 2.071519408551492e-06,
      "loss": 0.0107,
      "step": 2426500
    },
    {
      "epoch": 3.971053200054987,
      "grad_norm": 0.18029262125492096,
      "learning_rate": 2.0714535163379745e-06,
      "loss": 0.0086,
      "step": 2426520
    },
    {
      "epoch": 3.9710859304936403,
      "grad_norm": 0.44634392857551575,
      "learning_rate": 2.0713876241244573e-06,
      "loss": 0.0118,
      "step": 2426540
    },
    {
      "epoch": 3.971118660932294,
      "grad_norm": 0.6006777286529541,
      "learning_rate": 2.07132173191094e-06,
      "loss": 0.0122,
      "step": 2426560
    },
    {
      "epoch": 3.971151391370947,
      "grad_norm": 0.11827384680509567,
      "learning_rate": 2.071255839697423e-06,
      "loss": 0.0159,
      "step": 2426580
    },
    {
      "epoch": 3.9711841218096007,
      "grad_norm": 0.5243664383888245,
      "learning_rate": 2.071189947483906e-06,
      "loss": 0.014,
      "step": 2426600
    },
    {
      "epoch": 3.971216852248254,
      "grad_norm": 0.18275471031665802,
      "learning_rate": 2.0711240552703887e-06,
      "loss": 0.0113,
      "step": 2426620
    },
    {
      "epoch": 3.971249582686907,
      "grad_norm": 0.36069226264953613,
      "learning_rate": 2.071058163056872e-06,
      "loss": 0.0093,
      "step": 2426640
    },
    {
      "epoch": 3.9712823131255606,
      "grad_norm": 0.1930391490459442,
      "learning_rate": 2.0709922708433546e-06,
      "loss": 0.0094,
      "step": 2426660
    },
    {
      "epoch": 3.9713150435642137,
      "grad_norm": 0.20379699766635895,
      "learning_rate": 2.0709263786298377e-06,
      "loss": 0.0143,
      "step": 2426680
    },
    {
      "epoch": 3.9713477740028673,
      "grad_norm": 0.19105982780456543,
      "learning_rate": 2.0708604864163205e-06,
      "loss": 0.0128,
      "step": 2426700
    },
    {
      "epoch": 3.9713805044415205,
      "grad_norm": 0.055709436535835266,
      "learning_rate": 2.070794594202803e-06,
      "loss": 0.0134,
      "step": 2426720
    },
    {
      "epoch": 3.971413234880174,
      "grad_norm": 0.5129170417785645,
      "learning_rate": 2.070728701989286e-06,
      "loss": 0.0106,
      "step": 2426740
    },
    {
      "epoch": 3.971445965318827,
      "grad_norm": 0.1328677386045456,
      "learning_rate": 2.0706628097757687e-06,
      "loss": 0.0095,
      "step": 2426760
    },
    {
      "epoch": 3.9714786957574804,
      "grad_norm": 0.3208463191986084,
      "learning_rate": 2.070596917562252e-06,
      "loss": 0.0117,
      "step": 2426780
    },
    {
      "epoch": 3.971511426196134,
      "grad_norm": 0.22565963864326477,
      "learning_rate": 2.070531025348735e-06,
      "loss": 0.0117,
      "step": 2426800
    },
    {
      "epoch": 3.971544156634787,
      "grad_norm": 0.07457195222377777,
      "learning_rate": 2.0704651331352178e-06,
      "loss": 0.0085,
      "step": 2426820
    },
    {
      "epoch": 3.9715768870734407,
      "grad_norm": 0.5097449421882629,
      "learning_rate": 2.0703992409217005e-06,
      "loss": 0.011,
      "step": 2426840
    },
    {
      "epoch": 3.971609617512094,
      "grad_norm": 0.030057646334171295,
      "learning_rate": 2.0703333487081832e-06,
      "loss": 0.0108,
      "step": 2426860
    },
    {
      "epoch": 3.9716423479507474,
      "grad_norm": 0.24080650508403778,
      "learning_rate": 2.0702674564946664e-06,
      "loss": 0.0062,
      "step": 2426880
    },
    {
      "epoch": 3.9716750783894006,
      "grad_norm": 0.32962095737457275,
      "learning_rate": 2.070201564281149e-06,
      "loss": 0.0106,
      "step": 2426900
    },
    {
      "epoch": 3.9717078088280537,
      "grad_norm": 0.1383168250322342,
      "learning_rate": 2.070135672067632e-06,
      "loss": 0.0078,
      "step": 2426920
    },
    {
      "epoch": 3.9717405392667073,
      "grad_norm": 0.4839046597480774,
      "learning_rate": 2.0700697798541146e-06,
      "loss": 0.0121,
      "step": 2426940
    },
    {
      "epoch": 3.9717732697053605,
      "grad_norm": 1.1507006883621216,
      "learning_rate": 2.0700038876405974e-06,
      "loss": 0.0114,
      "step": 2426960
    },
    {
      "epoch": 3.971806000144014,
      "grad_norm": 0.34203827381134033,
      "learning_rate": 2.0699379954270805e-06,
      "loss": 0.008,
      "step": 2426980
    },
    {
      "epoch": 3.9718387305826672,
      "grad_norm": 0.03996088728308678,
      "learning_rate": 2.0698721032135637e-06,
      "loss": 0.0092,
      "step": 2427000
    },
    {
      "epoch": 3.971871461021321,
      "grad_norm": 0.10802695155143738,
      "learning_rate": 2.0698062110000464e-06,
      "loss": 0.0117,
      "step": 2427020
    },
    {
      "epoch": 3.971904191459974,
      "grad_norm": 0.2975311279296875,
      "learning_rate": 2.069740318786529e-06,
      "loss": 0.0091,
      "step": 2427040
    },
    {
      "epoch": 3.971936921898627,
      "grad_norm": 0.1658807247877121,
      "learning_rate": 2.069674426573012e-06,
      "loss": 0.0147,
      "step": 2427060
    },
    {
      "epoch": 3.9719696523372807,
      "grad_norm": 0.23261584341526031,
      "learning_rate": 2.069608534359495e-06,
      "loss": 0.0085,
      "step": 2427080
    },
    {
      "epoch": 3.972002382775934,
      "grad_norm": 0.2125929296016693,
      "learning_rate": 2.069542642145978e-06,
      "loss": 0.0077,
      "step": 2427100
    },
    {
      "epoch": 3.9720351132145875,
      "grad_norm": 0.11180408298969269,
      "learning_rate": 2.0694767499324605e-06,
      "loss": 0.005,
      "step": 2427120
    },
    {
      "epoch": 3.9720678436532406,
      "grad_norm": 0.10066051781177521,
      "learning_rate": 2.0694108577189433e-06,
      "loss": 0.0101,
      "step": 2427140
    },
    {
      "epoch": 3.972100574091894,
      "grad_norm": 0.5863403081893921,
      "learning_rate": 2.0693449655054264e-06,
      "loss": 0.0068,
      "step": 2427160
    },
    {
      "epoch": 3.9721333045305474,
      "grad_norm": 0.28328830003738403,
      "learning_rate": 2.069279073291909e-06,
      "loss": 0.0135,
      "step": 2427180
    },
    {
      "epoch": 3.9721660349692005,
      "grad_norm": 0.18413476645946503,
      "learning_rate": 2.0692131810783924e-06,
      "loss": 0.0084,
      "step": 2427200
    },
    {
      "epoch": 3.972198765407854,
      "grad_norm": 0.23362237215042114,
      "learning_rate": 2.069147288864875e-06,
      "loss": 0.014,
      "step": 2427220
    },
    {
      "epoch": 3.9722314958465073,
      "grad_norm": 0.478257954120636,
      "learning_rate": 2.069081396651358e-06,
      "loss": 0.0102,
      "step": 2427240
    },
    {
      "epoch": 3.9722642262851604,
      "grad_norm": 0.06502596288919449,
      "learning_rate": 2.0690155044378406e-06,
      "loss": 0.0123,
      "step": 2427260
    },
    {
      "epoch": 3.972296956723814,
      "grad_norm": 0.35523542761802673,
      "learning_rate": 2.0689496122243237e-06,
      "loss": 0.0133,
      "step": 2427280
    },
    {
      "epoch": 3.9723296871624676,
      "grad_norm": 0.26116007566452026,
      "learning_rate": 2.0688837200108065e-06,
      "loss": 0.0081,
      "step": 2427300
    },
    {
      "epoch": 3.9723624176011207,
      "grad_norm": 0.2719184160232544,
      "learning_rate": 2.0688178277972892e-06,
      "loss": 0.0077,
      "step": 2427320
    },
    {
      "epoch": 3.972395148039774,
      "grad_norm": 0.20050644874572754,
      "learning_rate": 2.0687519355837724e-06,
      "loss": 0.0086,
      "step": 2427340
    },
    {
      "epoch": 3.9724278784784275,
      "grad_norm": 0.2095312625169754,
      "learning_rate": 2.068686043370255e-06,
      "loss": 0.0111,
      "step": 2427360
    },
    {
      "epoch": 3.9724606089170806,
      "grad_norm": 0.1728294938802719,
      "learning_rate": 2.068620151156738e-06,
      "loss": 0.0163,
      "step": 2427380
    },
    {
      "epoch": 3.972493339355734,
      "grad_norm": 0.3259124457836151,
      "learning_rate": 2.068554258943221e-06,
      "loss": 0.0094,
      "step": 2427400
    },
    {
      "epoch": 3.9725260697943874,
      "grad_norm": 0.08257075399160385,
      "learning_rate": 2.0684883667297038e-06,
      "loss": 0.0109,
      "step": 2427420
    },
    {
      "epoch": 3.972558800233041,
      "grad_norm": 0.16992484033107758,
      "learning_rate": 2.0684224745161865e-06,
      "loss": 0.0107,
      "step": 2427440
    },
    {
      "epoch": 3.972591530671694,
      "grad_norm": 0.16301462054252625,
      "learning_rate": 2.0683565823026692e-06,
      "loss": 0.0144,
      "step": 2427460
    },
    {
      "epoch": 3.9726242611103473,
      "grad_norm": 0.25323686003685,
      "learning_rate": 2.0682906900891524e-06,
      "loss": 0.0095,
      "step": 2427480
    },
    {
      "epoch": 3.972656991549001,
      "grad_norm": 0.15720412135124207,
      "learning_rate": 2.068224797875635e-06,
      "loss": 0.0157,
      "step": 2427500
    },
    {
      "epoch": 3.972689721987654,
      "grad_norm": 0.16868765652179718,
      "learning_rate": 2.0681589056621183e-06,
      "loss": 0.0086,
      "step": 2427520
    },
    {
      "epoch": 3.972722452426307,
      "grad_norm": 0.24953050911426544,
      "learning_rate": 2.068093013448601e-06,
      "loss": 0.0085,
      "step": 2427540
    },
    {
      "epoch": 3.9727551828649608,
      "grad_norm": 0.4979262351989746,
      "learning_rate": 2.0680271212350838e-06,
      "loss": 0.0121,
      "step": 2427560
    },
    {
      "epoch": 3.9727879133036144,
      "grad_norm": 0.2596924304962158,
      "learning_rate": 2.0679612290215665e-06,
      "loss": 0.0133,
      "step": 2427580
    },
    {
      "epoch": 3.9728206437422675,
      "grad_norm": 0.26829299330711365,
      "learning_rate": 2.0678953368080497e-06,
      "loss": 0.0095,
      "step": 2427600
    },
    {
      "epoch": 3.9728533741809207,
      "grad_norm": 0.5025382041931152,
      "learning_rate": 2.0678294445945324e-06,
      "loss": 0.0136,
      "step": 2427620
    },
    {
      "epoch": 3.9728861046195743,
      "grad_norm": 0.5169979929924011,
      "learning_rate": 2.067763552381015e-06,
      "loss": 0.0114,
      "step": 2427640
    },
    {
      "epoch": 3.9729188350582274,
      "grad_norm": 0.16509561240673065,
      "learning_rate": 2.067697660167498e-06,
      "loss": 0.0149,
      "step": 2427660
    },
    {
      "epoch": 3.9729515654968806,
      "grad_norm": 0.11215740442276001,
      "learning_rate": 2.067631767953981e-06,
      "loss": 0.0101,
      "step": 2427680
    },
    {
      "epoch": 3.972984295935534,
      "grad_norm": 0.19297882914543152,
      "learning_rate": 2.0675658757404642e-06,
      "loss": 0.0097,
      "step": 2427700
    },
    {
      "epoch": 3.9730170263741877,
      "grad_norm": 1.0767537355422974,
      "learning_rate": 2.067499983526947e-06,
      "loss": 0.0117,
      "step": 2427720
    },
    {
      "epoch": 3.973049756812841,
      "grad_norm": 0.13258765637874603,
      "learning_rate": 2.0674340913134297e-06,
      "loss": 0.0136,
      "step": 2427740
    },
    {
      "epoch": 3.973082487251494,
      "grad_norm": 0.1582062840461731,
      "learning_rate": 2.0673681990999125e-06,
      "loss": 0.0129,
      "step": 2427760
    },
    {
      "epoch": 3.9731152176901476,
      "grad_norm": 0.03799924999475479,
      "learning_rate": 2.067302306886395e-06,
      "loss": 0.0083,
      "step": 2427780
    },
    {
      "epoch": 3.973147948128801,
      "grad_norm": 0.11209329217672348,
      "learning_rate": 2.0672364146728784e-06,
      "loss": 0.0106,
      "step": 2427800
    },
    {
      "epoch": 3.973180678567454,
      "grad_norm": 0.4387264549732208,
      "learning_rate": 2.067170522459361e-06,
      "loss": 0.0107,
      "step": 2427820
    },
    {
      "epoch": 3.9732134090061075,
      "grad_norm": 0.14712435007095337,
      "learning_rate": 2.067104630245844e-06,
      "loss": 0.0109,
      "step": 2427840
    },
    {
      "epoch": 3.9732461394447607,
      "grad_norm": 0.2583693265914917,
      "learning_rate": 2.067038738032327e-06,
      "loss": 0.0151,
      "step": 2427860
    },
    {
      "epoch": 3.9732788698834143,
      "grad_norm": 0.22912725806236267,
      "learning_rate": 2.0669728458188097e-06,
      "loss": 0.009,
      "step": 2427880
    },
    {
      "epoch": 3.9733116003220674,
      "grad_norm": 0.3223758935928345,
      "learning_rate": 2.066906953605293e-06,
      "loss": 0.0108,
      "step": 2427900
    },
    {
      "epoch": 3.973344330760721,
      "grad_norm": 0.3709554374217987,
      "learning_rate": 2.0668410613917756e-06,
      "loss": 0.0148,
      "step": 2427920
    },
    {
      "epoch": 3.973377061199374,
      "grad_norm": 0.2562800347805023,
      "learning_rate": 2.0667751691782584e-06,
      "loss": 0.0074,
      "step": 2427940
    },
    {
      "epoch": 3.9734097916380273,
      "grad_norm": 0.2536250352859497,
      "learning_rate": 2.066709276964741e-06,
      "loss": 0.0118,
      "step": 2427960
    },
    {
      "epoch": 3.973442522076681,
      "grad_norm": 0.18026548624038696,
      "learning_rate": 2.066643384751224e-06,
      "loss": 0.0124,
      "step": 2427980
    },
    {
      "epoch": 3.973475252515334,
      "grad_norm": 0.34257933497428894,
      "learning_rate": 2.066577492537707e-06,
      "loss": 0.0083,
      "step": 2428000
    },
    {
      "epoch": 3.9735079829539877,
      "grad_norm": 0.2986220717430115,
      "learning_rate": 2.0665116003241898e-06,
      "loss": 0.0071,
      "step": 2428020
    },
    {
      "epoch": 3.973540713392641,
      "grad_norm": 0.2759976089000702,
      "learning_rate": 2.066445708110673e-06,
      "loss": 0.0111,
      "step": 2428040
    },
    {
      "epoch": 3.9735734438312944,
      "grad_norm": 0.45081156492233276,
      "learning_rate": 2.0663798158971557e-06,
      "loss": 0.0072,
      "step": 2428060
    },
    {
      "epoch": 3.9736061742699476,
      "grad_norm": 0.019092196598649025,
      "learning_rate": 2.0663139236836384e-06,
      "loss": 0.01,
      "step": 2428080
    },
    {
      "epoch": 3.9736389047086007,
      "grad_norm": 0.16945794224739075,
      "learning_rate": 2.0662480314701216e-06,
      "loss": 0.012,
      "step": 2428100
    },
    {
      "epoch": 3.9736716351472543,
      "grad_norm": 0.06963279843330383,
      "learning_rate": 2.0661821392566043e-06,
      "loss": 0.0137,
      "step": 2428120
    },
    {
      "epoch": 3.9737043655859074,
      "grad_norm": 0.21822333335876465,
      "learning_rate": 2.066116247043087e-06,
      "loss": 0.0085,
      "step": 2428140
    },
    {
      "epoch": 3.973737096024561,
      "grad_norm": 0.6139687299728394,
      "learning_rate": 2.06605035482957e-06,
      "loss": 0.0122,
      "step": 2428160
    },
    {
      "epoch": 3.973769826463214,
      "grad_norm": 0.4364910423755646,
      "learning_rate": 2.0659844626160525e-06,
      "loss": 0.0108,
      "step": 2428180
    },
    {
      "epoch": 3.973802556901868,
      "grad_norm": 0.3528801202774048,
      "learning_rate": 2.0659185704025357e-06,
      "loss": 0.0123,
      "step": 2428200
    },
    {
      "epoch": 3.973835287340521,
      "grad_norm": 0.24786359071731567,
      "learning_rate": 2.065852678189019e-06,
      "loss": 0.0105,
      "step": 2428220
    },
    {
      "epoch": 3.973868017779174,
      "grad_norm": 0.2900550365447998,
      "learning_rate": 2.0657867859755016e-06,
      "loss": 0.0103,
      "step": 2428240
    },
    {
      "epoch": 3.9739007482178277,
      "grad_norm": 0.5049446225166321,
      "learning_rate": 2.0657208937619843e-06,
      "loss": 0.009,
      "step": 2428260
    },
    {
      "epoch": 3.973933478656481,
      "grad_norm": 0.8046616315841675,
      "learning_rate": 2.065655001548467e-06,
      "loss": 0.0153,
      "step": 2428280
    },
    {
      "epoch": 3.9739662090951344,
      "grad_norm": 0.17309123277664185,
      "learning_rate": 2.0655891093349502e-06,
      "loss": 0.0106,
      "step": 2428300
    },
    {
      "epoch": 3.9739989395337876,
      "grad_norm": 0.13106156885623932,
      "learning_rate": 2.065523217121433e-06,
      "loss": 0.0068,
      "step": 2428320
    },
    {
      "epoch": 3.974031669972441,
      "grad_norm": 0.23127664625644684,
      "learning_rate": 2.0654573249079157e-06,
      "loss": 0.0131,
      "step": 2428340
    },
    {
      "epoch": 3.9740644004110943,
      "grad_norm": 0.21851035952568054,
      "learning_rate": 2.0653914326943985e-06,
      "loss": 0.0098,
      "step": 2428360
    },
    {
      "epoch": 3.9740971308497475,
      "grad_norm": 0.12887820601463318,
      "learning_rate": 2.065325540480881e-06,
      "loss": 0.0112,
      "step": 2428380
    },
    {
      "epoch": 3.974129861288401,
      "grad_norm": 0.12472721934318542,
      "learning_rate": 2.0652596482673644e-06,
      "loss": 0.0091,
      "step": 2428400
    },
    {
      "epoch": 3.974162591727054,
      "grad_norm": 0.47281190752983093,
      "learning_rate": 2.0651937560538475e-06,
      "loss": 0.0103,
      "step": 2428420
    },
    {
      "epoch": 3.974195322165708,
      "grad_norm": 0.2888883650302887,
      "learning_rate": 2.0651278638403303e-06,
      "loss": 0.0132,
      "step": 2428440
    },
    {
      "epoch": 3.974228052604361,
      "grad_norm": 0.2616775631904602,
      "learning_rate": 2.065061971626813e-06,
      "loss": 0.009,
      "step": 2428460
    },
    {
      "epoch": 3.9742607830430146,
      "grad_norm": 0.13827602565288544,
      "learning_rate": 2.0649960794132957e-06,
      "loss": 0.0108,
      "step": 2428480
    },
    {
      "epoch": 3.9742935134816677,
      "grad_norm": 0.16997963190078735,
      "learning_rate": 2.064930187199779e-06,
      "loss": 0.0108,
      "step": 2428500
    },
    {
      "epoch": 3.974326243920321,
      "grad_norm": 0.0680767148733139,
      "learning_rate": 2.0648642949862616e-06,
      "loss": 0.0093,
      "step": 2428520
    },
    {
      "epoch": 3.9743589743589745,
      "grad_norm": 0.83732670545578,
      "learning_rate": 2.0647984027727444e-06,
      "loss": 0.0083,
      "step": 2428540
    },
    {
      "epoch": 3.9743917047976276,
      "grad_norm": 0.30729660391807556,
      "learning_rate": 2.0647325105592275e-06,
      "loss": 0.01,
      "step": 2428560
    },
    {
      "epoch": 3.974424435236281,
      "grad_norm": 0.37595880031585693,
      "learning_rate": 2.0646666183457103e-06,
      "loss": 0.0095,
      "step": 2428580
    },
    {
      "epoch": 3.9744571656749343,
      "grad_norm": 0.7220096588134766,
      "learning_rate": 2.064600726132193e-06,
      "loss": 0.0129,
      "step": 2428600
    },
    {
      "epoch": 3.974489896113588,
      "grad_norm": 0.14721503853797913,
      "learning_rate": 2.064534833918676e-06,
      "loss": 0.0084,
      "step": 2428620
    },
    {
      "epoch": 3.974522626552241,
      "grad_norm": 0.30205103754997253,
      "learning_rate": 2.064468941705159e-06,
      "loss": 0.017,
      "step": 2428640
    },
    {
      "epoch": 3.9745553569908942,
      "grad_norm": 0.5138034224510193,
      "learning_rate": 2.0644030494916417e-06,
      "loss": 0.0106,
      "step": 2428660
    },
    {
      "epoch": 3.974588087429548,
      "grad_norm": 0.3638007640838623,
      "learning_rate": 2.0643371572781244e-06,
      "loss": 0.0138,
      "step": 2428680
    },
    {
      "epoch": 3.974620817868201,
      "grad_norm": 0.6556950211524963,
      "learning_rate": 2.0642712650646076e-06,
      "loss": 0.0147,
      "step": 2428700
    },
    {
      "epoch": 3.974653548306854,
      "grad_norm": 0.07659856975078583,
      "learning_rate": 2.0642053728510903e-06,
      "loss": 0.0144,
      "step": 2428720
    },
    {
      "epoch": 3.9746862787455077,
      "grad_norm": 0.3120166063308716,
      "learning_rate": 2.0641394806375735e-06,
      "loss": 0.0086,
      "step": 2428740
    },
    {
      "epoch": 3.9747190091841613,
      "grad_norm": 0.1304347664117813,
      "learning_rate": 2.0640735884240562e-06,
      "loss": 0.009,
      "step": 2428760
    },
    {
      "epoch": 3.9747517396228145,
      "grad_norm": 0.10080960392951965,
      "learning_rate": 2.064007696210539e-06,
      "loss": 0.0142,
      "step": 2428780
    },
    {
      "epoch": 3.9747844700614676,
      "grad_norm": 0.20679150521755219,
      "learning_rate": 2.0639418039970217e-06,
      "loss": 0.0085,
      "step": 2428800
    },
    {
      "epoch": 3.974817200500121,
      "grad_norm": 0.336137980222702,
      "learning_rate": 2.063875911783505e-06,
      "loss": 0.0134,
      "step": 2428820
    },
    {
      "epoch": 3.9748499309387744,
      "grad_norm": 0.36964601278305054,
      "learning_rate": 2.0638100195699876e-06,
      "loss": 0.0105,
      "step": 2428840
    },
    {
      "epoch": 3.9748826613774275,
      "grad_norm": 0.5832308530807495,
      "learning_rate": 2.0637441273564703e-06,
      "loss": 0.0132,
      "step": 2428860
    },
    {
      "epoch": 3.974915391816081,
      "grad_norm": 0.34440767765045166,
      "learning_rate": 2.063678235142953e-06,
      "loss": 0.007,
      "step": 2428880
    },
    {
      "epoch": 3.9749481222547347,
      "grad_norm": 0.04641127213835716,
      "learning_rate": 2.0636123429294362e-06,
      "loss": 0.0077,
      "step": 2428900
    },
    {
      "epoch": 3.974980852693388,
      "grad_norm": 0.5318935513496399,
      "learning_rate": 2.063546450715919e-06,
      "loss": 0.0091,
      "step": 2428920
    },
    {
      "epoch": 3.975013583132041,
      "grad_norm": 0.24724018573760986,
      "learning_rate": 2.063480558502402e-06,
      "loss": 0.0081,
      "step": 2428940
    },
    {
      "epoch": 3.9750463135706946,
      "grad_norm": 0.34310272336006165,
      "learning_rate": 2.063414666288885e-06,
      "loss": 0.0131,
      "step": 2428960
    },
    {
      "epoch": 3.9750790440093478,
      "grad_norm": 0.2608852982521057,
      "learning_rate": 2.0633487740753676e-06,
      "loss": 0.0105,
      "step": 2428980
    },
    {
      "epoch": 3.975111774448001,
      "grad_norm": 0.2654914855957031,
      "learning_rate": 2.0632828818618504e-06,
      "loss": 0.0123,
      "step": 2429000
    },
    {
      "epoch": 3.9751445048866545,
      "grad_norm": 0.2160854935646057,
      "learning_rate": 2.0632169896483335e-06,
      "loss": 0.011,
      "step": 2429020
    },
    {
      "epoch": 3.975177235325308,
      "grad_norm": 0.3981582522392273,
      "learning_rate": 2.0631510974348163e-06,
      "loss": 0.0103,
      "step": 2429040
    },
    {
      "epoch": 3.9752099657639612,
      "grad_norm": 0.14646178483963013,
      "learning_rate": 2.063085205221299e-06,
      "loss": 0.0098,
      "step": 2429060
    },
    {
      "epoch": 3.9752426962026144,
      "grad_norm": 0.1687486469745636,
      "learning_rate": 2.0630193130077817e-06,
      "loss": 0.0075,
      "step": 2429080
    },
    {
      "epoch": 3.975275426641268,
      "grad_norm": 0.26164236664772034,
      "learning_rate": 2.062953420794265e-06,
      "loss": 0.0159,
      "step": 2429100
    },
    {
      "epoch": 3.975308157079921,
      "grad_norm": 0.31692031025886536,
      "learning_rate": 2.062887528580748e-06,
      "loss": 0.0102,
      "step": 2429120
    },
    {
      "epoch": 3.9753408875185743,
      "grad_norm": 0.30082079768180847,
      "learning_rate": 2.062821636367231e-06,
      "loss": 0.007,
      "step": 2429140
    },
    {
      "epoch": 3.975373617957228,
      "grad_norm": 0.40367060899734497,
      "learning_rate": 2.0627557441537136e-06,
      "loss": 0.0089,
      "step": 2429160
    },
    {
      "epoch": 3.9754063483958815,
      "grad_norm": 0.17423029243946075,
      "learning_rate": 2.0626898519401963e-06,
      "loss": 0.0121,
      "step": 2429180
    },
    {
      "epoch": 3.9754390788345346,
      "grad_norm": 0.23528508841991425,
      "learning_rate": 2.062623959726679e-06,
      "loss": 0.0074,
      "step": 2429200
    },
    {
      "epoch": 3.9754718092731878,
      "grad_norm": 0.22074638307094574,
      "learning_rate": 2.062558067513162e-06,
      "loss": 0.0107,
      "step": 2429220
    },
    {
      "epoch": 3.9755045397118414,
      "grad_norm": 0.237038716673851,
      "learning_rate": 2.062492175299645e-06,
      "loss": 0.0091,
      "step": 2429240
    },
    {
      "epoch": 3.9755372701504945,
      "grad_norm": 0.20403560996055603,
      "learning_rate": 2.0624262830861277e-06,
      "loss": 0.0148,
      "step": 2429260
    },
    {
      "epoch": 3.9755700005891477,
      "grad_norm": 0.4240969717502594,
      "learning_rate": 2.062360390872611e-06,
      "loss": 0.0097,
      "step": 2429280
    },
    {
      "epoch": 3.9756027310278013,
      "grad_norm": 0.3197806477546692,
      "learning_rate": 2.0622944986590936e-06,
      "loss": 0.0093,
      "step": 2429300
    },
    {
      "epoch": 3.9756354614664544,
      "grad_norm": 0.41938331723213196,
      "learning_rate": 2.0622286064455767e-06,
      "loss": 0.0149,
      "step": 2429320
    },
    {
      "epoch": 3.975668191905108,
      "grad_norm": 0.5278419852256775,
      "learning_rate": 2.0621627142320595e-06,
      "loss": 0.0126,
      "step": 2429340
    },
    {
      "epoch": 3.975700922343761,
      "grad_norm": 0.20345711708068848,
      "learning_rate": 2.0620968220185422e-06,
      "loss": 0.0091,
      "step": 2429360
    },
    {
      "epoch": 3.9757336527824148,
      "grad_norm": 0.8029236793518066,
      "learning_rate": 2.062030929805025e-06,
      "loss": 0.0172,
      "step": 2429380
    },
    {
      "epoch": 3.975766383221068,
      "grad_norm": 0.14754323661327362,
      "learning_rate": 2.0619650375915077e-06,
      "loss": 0.0117,
      "step": 2429400
    },
    {
      "epoch": 3.975799113659721,
      "grad_norm": 0.35470449924468994,
      "learning_rate": 2.061899145377991e-06,
      "loss": 0.0078,
      "step": 2429420
    },
    {
      "epoch": 3.9758318440983746,
      "grad_norm": 0.16940367221832275,
      "learning_rate": 2.061833253164474e-06,
      "loss": 0.0069,
      "step": 2429440
    },
    {
      "epoch": 3.975864574537028,
      "grad_norm": 1.1238619089126587,
      "learning_rate": 2.0617673609509568e-06,
      "loss": 0.0123,
      "step": 2429460
    },
    {
      "epoch": 3.9758973049756814,
      "grad_norm": 0.6540200114250183,
      "learning_rate": 2.0617014687374395e-06,
      "loss": 0.0123,
      "step": 2429480
    },
    {
      "epoch": 3.9759300354143345,
      "grad_norm": 0.18860206007957458,
      "learning_rate": 2.0616355765239222e-06,
      "loss": 0.0079,
      "step": 2429500
    },
    {
      "epoch": 3.975962765852988,
      "grad_norm": 0.08107516914606094,
      "learning_rate": 2.0615696843104054e-06,
      "loss": 0.0143,
      "step": 2429520
    },
    {
      "epoch": 3.9759954962916413,
      "grad_norm": 0.26982906460762024,
      "learning_rate": 2.061503792096888e-06,
      "loss": 0.0134,
      "step": 2429540
    },
    {
      "epoch": 3.9760282267302944,
      "grad_norm": 0.0810059905052185,
      "learning_rate": 2.061437899883371e-06,
      "loss": 0.01,
      "step": 2429560
    },
    {
      "epoch": 3.976060957168948,
      "grad_norm": 0.41983723640441895,
      "learning_rate": 2.0613720076698536e-06,
      "loss": 0.008,
      "step": 2429580
    },
    {
      "epoch": 3.976093687607601,
      "grad_norm": 0.17308075726032257,
      "learning_rate": 2.0613061154563364e-06,
      "loss": 0.0102,
      "step": 2429600
    },
    {
      "epoch": 3.9761264180462548,
      "grad_norm": 0.3267022669315338,
      "learning_rate": 2.0612402232428195e-06,
      "loss": 0.0053,
      "step": 2429620
    },
    {
      "epoch": 3.976159148484908,
      "grad_norm": 0.16746419668197632,
      "learning_rate": 2.0611743310293027e-06,
      "loss": 0.0132,
      "step": 2429640
    },
    {
      "epoch": 3.9761918789235615,
      "grad_norm": 0.5325840711593628,
      "learning_rate": 2.0611084388157854e-06,
      "loss": 0.0085,
      "step": 2429660
    },
    {
      "epoch": 3.9762246093622147,
      "grad_norm": 0.22861337661743164,
      "learning_rate": 2.061042546602268e-06,
      "loss": 0.0104,
      "step": 2429680
    },
    {
      "epoch": 3.976257339800868,
      "grad_norm": 0.08187178522348404,
      "learning_rate": 2.060976654388751e-06,
      "loss": 0.0075,
      "step": 2429700
    },
    {
      "epoch": 3.9762900702395214,
      "grad_norm": 0.3630543053150177,
      "learning_rate": 2.060910762175234e-06,
      "loss": 0.0081,
      "step": 2429720
    },
    {
      "epoch": 3.9763228006781746,
      "grad_norm": 0.21597358584403992,
      "learning_rate": 2.060844869961717e-06,
      "loss": 0.008,
      "step": 2429740
    },
    {
      "epoch": 3.976355531116828,
      "grad_norm": 0.20528985559940338,
      "learning_rate": 2.0607789777481996e-06,
      "loss": 0.0084,
      "step": 2429760
    },
    {
      "epoch": 3.9763882615554813,
      "grad_norm": 0.7444139122962952,
      "learning_rate": 2.0607130855346823e-06,
      "loss": 0.0154,
      "step": 2429780
    },
    {
      "epoch": 3.976420991994135,
      "grad_norm": 0.1447916328907013,
      "learning_rate": 2.0606471933211655e-06,
      "loss": 0.0094,
      "step": 2429800
    },
    {
      "epoch": 3.976453722432788,
      "grad_norm": 0.16784624755382538,
      "learning_rate": 2.060581301107648e-06,
      "loss": 0.0076,
      "step": 2429820
    },
    {
      "epoch": 3.976486452871441,
      "grad_norm": 0.3008272051811218,
      "learning_rate": 2.0605154088941314e-06,
      "loss": 0.0058,
      "step": 2429840
    },
    {
      "epoch": 3.976519183310095,
      "grad_norm": 0.16283632814884186,
      "learning_rate": 2.060449516680614e-06,
      "loss": 0.0141,
      "step": 2429860
    },
    {
      "epoch": 3.976551913748748,
      "grad_norm": 0.11237800121307373,
      "learning_rate": 2.060383624467097e-06,
      "loss": 0.0095,
      "step": 2429880
    },
    {
      "epoch": 3.9765846441874015,
      "grad_norm": 0.5873095989227295,
      "learning_rate": 2.0603177322535796e-06,
      "loss": 0.0112,
      "step": 2429900
    },
    {
      "epoch": 3.9766173746260547,
      "grad_norm": 0.7064017057418823,
      "learning_rate": 2.0602518400400627e-06,
      "loss": 0.0103,
      "step": 2429920
    },
    {
      "epoch": 3.9766501050647083,
      "grad_norm": 0.306902140378952,
      "learning_rate": 2.0601859478265455e-06,
      "loss": 0.0106,
      "step": 2429940
    },
    {
      "epoch": 3.9766828355033614,
      "grad_norm": 0.14906926453113556,
      "learning_rate": 2.0601200556130282e-06,
      "loss": 0.0121,
      "step": 2429960
    },
    {
      "epoch": 3.9767155659420146,
      "grad_norm": 0.1344306468963623,
      "learning_rate": 2.0600541633995114e-06,
      "loss": 0.0079,
      "step": 2429980
    },
    {
      "epoch": 3.976748296380668,
      "grad_norm": 0.13440072536468506,
      "learning_rate": 2.059988271185994e-06,
      "loss": 0.0096,
      "step": 2430000
    },
    {
      "epoch": 3.9767810268193213,
      "grad_norm": 0.19865258038043976,
      "learning_rate": 2.059922378972477e-06,
      "loss": 0.0111,
      "step": 2430020
    },
    {
      "epoch": 3.976813757257975,
      "grad_norm": 0.29881879687309265,
      "learning_rate": 2.05985648675896e-06,
      "loss": 0.012,
      "step": 2430040
    },
    {
      "epoch": 3.976846487696628,
      "grad_norm": 0.16970576345920563,
      "learning_rate": 2.0597905945454428e-06,
      "loss": 0.0124,
      "step": 2430060
    },
    {
      "epoch": 3.9768792181352817,
      "grad_norm": 0.09901712834835052,
      "learning_rate": 2.0597247023319255e-06,
      "loss": 0.0126,
      "step": 2430080
    },
    {
      "epoch": 3.976911948573935,
      "grad_norm": 0.9364538192749023,
      "learning_rate": 2.0596588101184083e-06,
      "loss": 0.0116,
      "step": 2430100
    },
    {
      "epoch": 3.976944679012588,
      "grad_norm": 0.18214383721351624,
      "learning_rate": 2.0595929179048914e-06,
      "loss": 0.0106,
      "step": 2430120
    },
    {
      "epoch": 3.9769774094512416,
      "grad_norm": 0.18838702142238617,
      "learning_rate": 2.059527025691374e-06,
      "loss": 0.0054,
      "step": 2430140
    },
    {
      "epoch": 3.9770101398898947,
      "grad_norm": 0.2661188542842865,
      "learning_rate": 2.0594611334778573e-06,
      "loss": 0.0063,
      "step": 2430160
    },
    {
      "epoch": 3.9770428703285483,
      "grad_norm": 0.3120507001876831,
      "learning_rate": 2.05939524126434e-06,
      "loss": 0.007,
      "step": 2430180
    },
    {
      "epoch": 3.9770756007672015,
      "grad_norm": 0.3345133364200592,
      "learning_rate": 2.059329349050823e-06,
      "loss": 0.0107,
      "step": 2430200
    },
    {
      "epoch": 3.977108331205855,
      "grad_norm": 0.10647895187139511,
      "learning_rate": 2.0592634568373055e-06,
      "loss": 0.0079,
      "step": 2430220
    },
    {
      "epoch": 3.977141061644508,
      "grad_norm": 0.6494737267494202,
      "learning_rate": 2.0591975646237887e-06,
      "loss": 0.0139,
      "step": 2430240
    },
    {
      "epoch": 3.9771737920831614,
      "grad_norm": 0.34916993975639343,
      "learning_rate": 2.0591316724102714e-06,
      "loss": 0.0164,
      "step": 2430260
    },
    {
      "epoch": 3.977206522521815,
      "grad_norm": 0.09522084146738052,
      "learning_rate": 2.059065780196754e-06,
      "loss": 0.0112,
      "step": 2430280
    },
    {
      "epoch": 3.977239252960468,
      "grad_norm": 0.21662190556526184,
      "learning_rate": 2.058999887983237e-06,
      "loss": 0.0103,
      "step": 2430300
    },
    {
      "epoch": 3.9772719833991212,
      "grad_norm": 0.23879612982273102,
      "learning_rate": 2.05893399576972e-06,
      "loss": 0.0085,
      "step": 2430320
    },
    {
      "epoch": 3.977304713837775,
      "grad_norm": 0.6547340154647827,
      "learning_rate": 2.0588681035562032e-06,
      "loss": 0.0123,
      "step": 2430340
    },
    {
      "epoch": 3.9773374442764284,
      "grad_norm": 0.5280249714851379,
      "learning_rate": 2.058802211342686e-06,
      "loss": 0.0131,
      "step": 2430360
    },
    {
      "epoch": 3.9773701747150816,
      "grad_norm": 0.8663879632949829,
      "learning_rate": 2.0587363191291687e-06,
      "loss": 0.0079,
      "step": 2430380
    },
    {
      "epoch": 3.9774029051537347,
      "grad_norm": 0.33718737959861755,
      "learning_rate": 2.0586704269156515e-06,
      "loss": 0.0121,
      "step": 2430400
    },
    {
      "epoch": 3.9774356355923883,
      "grad_norm": 0.4884014129638672,
      "learning_rate": 2.058604534702134e-06,
      "loss": 0.0106,
      "step": 2430420
    },
    {
      "epoch": 3.9774683660310415,
      "grad_norm": 0.4406799077987671,
      "learning_rate": 2.0585386424886174e-06,
      "loss": 0.0113,
      "step": 2430440
    },
    {
      "epoch": 3.9775010964696946,
      "grad_norm": 0.08535782992839813,
      "learning_rate": 2.0584727502751e-06,
      "loss": 0.0136,
      "step": 2430460
    },
    {
      "epoch": 3.9775338269083482,
      "grad_norm": 0.3345986008644104,
      "learning_rate": 2.058406858061583e-06,
      "loss": 0.0154,
      "step": 2430480
    },
    {
      "epoch": 3.977566557347002,
      "grad_norm": 0.181726336479187,
      "learning_rate": 2.058340965848066e-06,
      "loss": 0.0073,
      "step": 2430500
    },
    {
      "epoch": 3.977599287785655,
      "grad_norm": 0.10454744100570679,
      "learning_rate": 2.0582750736345487e-06,
      "loss": 0.0077,
      "step": 2430520
    },
    {
      "epoch": 3.977632018224308,
      "grad_norm": 0.09087160229682922,
      "learning_rate": 2.058209181421032e-06,
      "loss": 0.0089,
      "step": 2430540
    },
    {
      "epoch": 3.9776647486629617,
      "grad_norm": 1.6733750104904175,
      "learning_rate": 2.0581432892075147e-06,
      "loss": 0.0124,
      "step": 2430560
    },
    {
      "epoch": 3.977697479101615,
      "grad_norm": 0.05475981533527374,
      "learning_rate": 2.0580773969939974e-06,
      "loss": 0.0125,
      "step": 2430580
    },
    {
      "epoch": 3.977730209540268,
      "grad_norm": 0.23672190308570862,
      "learning_rate": 2.05801150478048e-06,
      "loss": 0.0102,
      "step": 2430600
    },
    {
      "epoch": 3.9777629399789216,
      "grad_norm": 0.12112780660390854,
      "learning_rate": 2.057945612566963e-06,
      "loss": 0.0099,
      "step": 2430620
    },
    {
      "epoch": 3.977795670417575,
      "grad_norm": 0.40813398361206055,
      "learning_rate": 2.057879720353446e-06,
      "loss": 0.0108,
      "step": 2430640
    },
    {
      "epoch": 3.9778284008562284,
      "grad_norm": 0.3614561855792999,
      "learning_rate": 2.0578138281399288e-06,
      "loss": 0.0078,
      "step": 2430660
    },
    {
      "epoch": 3.9778611312948815,
      "grad_norm": 0.14460915327072144,
      "learning_rate": 2.057747935926412e-06,
      "loss": 0.0069,
      "step": 2430680
    },
    {
      "epoch": 3.977893861733535,
      "grad_norm": 0.5016691088676453,
      "learning_rate": 2.0576820437128947e-06,
      "loss": 0.0135,
      "step": 2430700
    },
    {
      "epoch": 3.9779265921721882,
      "grad_norm": 0.10888630151748657,
      "learning_rate": 2.0576161514993774e-06,
      "loss": 0.0089,
      "step": 2430720
    },
    {
      "epoch": 3.9779593226108414,
      "grad_norm": 0.44231951236724854,
      "learning_rate": 2.0575502592858606e-06,
      "loss": 0.0179,
      "step": 2430740
    },
    {
      "epoch": 3.977992053049495,
      "grad_norm": 0.13266274333000183,
      "learning_rate": 2.0574843670723433e-06,
      "loss": 0.0125,
      "step": 2430760
    },
    {
      "epoch": 3.978024783488148,
      "grad_norm": 0.0838419571518898,
      "learning_rate": 2.057418474858826e-06,
      "loss": 0.0105,
      "step": 2430780
    },
    {
      "epoch": 3.9780575139268017,
      "grad_norm": 0.1830928921699524,
      "learning_rate": 2.057352582645309e-06,
      "loss": 0.0105,
      "step": 2430800
    },
    {
      "epoch": 3.978090244365455,
      "grad_norm": 0.20896928012371063,
      "learning_rate": 2.0572866904317915e-06,
      "loss": 0.0088,
      "step": 2430820
    },
    {
      "epoch": 3.9781229748041085,
      "grad_norm": 0.4933985769748688,
      "learning_rate": 2.0572207982182747e-06,
      "loss": 0.0086,
      "step": 2430840
    },
    {
      "epoch": 3.9781557052427616,
      "grad_norm": 0.42816293239593506,
      "learning_rate": 2.057154906004758e-06,
      "loss": 0.0101,
      "step": 2430860
    },
    {
      "epoch": 3.978188435681415,
      "grad_norm": 0.17350220680236816,
      "learning_rate": 2.0570890137912406e-06,
      "loss": 0.0066,
      "step": 2430880
    },
    {
      "epoch": 3.9782211661200684,
      "grad_norm": 0.11985132098197937,
      "learning_rate": 2.0570231215777233e-06,
      "loss": 0.0076,
      "step": 2430900
    },
    {
      "epoch": 3.9782538965587215,
      "grad_norm": 0.1324966847896576,
      "learning_rate": 2.056957229364206e-06,
      "loss": 0.0073,
      "step": 2430920
    },
    {
      "epoch": 3.978286626997375,
      "grad_norm": 0.5931168794631958,
      "learning_rate": 2.0568913371506892e-06,
      "loss": 0.0118,
      "step": 2430940
    },
    {
      "epoch": 3.9783193574360283,
      "grad_norm": 0.17217111587524414,
      "learning_rate": 2.056825444937172e-06,
      "loss": 0.0118,
      "step": 2430960
    },
    {
      "epoch": 3.978352087874682,
      "grad_norm": 0.4830901026725769,
      "learning_rate": 2.0567595527236547e-06,
      "loss": 0.0119,
      "step": 2430980
    },
    {
      "epoch": 3.978384818313335,
      "grad_norm": 0.38460031151771545,
      "learning_rate": 2.0566936605101375e-06,
      "loss": 0.0106,
      "step": 2431000
    },
    {
      "epoch": 3.978417548751988,
      "grad_norm": 0.05170071870088577,
      "learning_rate": 2.05662776829662e-06,
      "loss": 0.015,
      "step": 2431020
    },
    {
      "epoch": 3.9784502791906418,
      "grad_norm": 0.20414963364601135,
      "learning_rate": 2.0565618760831034e-06,
      "loss": 0.0084,
      "step": 2431040
    },
    {
      "epoch": 3.978483009629295,
      "grad_norm": 0.14831940829753876,
      "learning_rate": 2.0564959838695865e-06,
      "loss": 0.0152,
      "step": 2431060
    },
    {
      "epoch": 3.9785157400679485,
      "grad_norm": 0.17471417784690857,
      "learning_rate": 2.0564300916560693e-06,
      "loss": 0.013,
      "step": 2431080
    },
    {
      "epoch": 3.9785484705066017,
      "grad_norm": 0.7783030271530151,
      "learning_rate": 2.056364199442552e-06,
      "loss": 0.015,
      "step": 2431100
    },
    {
      "epoch": 3.9785812009452552,
      "grad_norm": 0.24151331186294556,
      "learning_rate": 2.0562983072290348e-06,
      "loss": 0.0127,
      "step": 2431120
    },
    {
      "epoch": 3.9786139313839084,
      "grad_norm": 0.32368576526641846,
      "learning_rate": 2.056232415015518e-06,
      "loss": 0.0095,
      "step": 2431140
    },
    {
      "epoch": 3.9786466618225615,
      "grad_norm": 0.25611943006515503,
      "learning_rate": 2.0561665228020007e-06,
      "loss": 0.0087,
      "step": 2431160
    },
    {
      "epoch": 3.978679392261215,
      "grad_norm": 0.13370171189308167,
      "learning_rate": 2.0561006305884834e-06,
      "loss": 0.0138,
      "step": 2431180
    },
    {
      "epoch": 3.9787121226998683,
      "grad_norm": 0.11771468818187714,
      "learning_rate": 2.0560347383749666e-06,
      "loss": 0.0068,
      "step": 2431200
    },
    {
      "epoch": 3.978744853138522,
      "grad_norm": 0.2782062888145447,
      "learning_rate": 2.0559688461614493e-06,
      "loss": 0.0081,
      "step": 2431220
    },
    {
      "epoch": 3.978777583577175,
      "grad_norm": 0.3867562413215637,
      "learning_rate": 2.055902953947932e-06,
      "loss": 0.0098,
      "step": 2431240
    },
    {
      "epoch": 3.9788103140158286,
      "grad_norm": 0.22678370773792267,
      "learning_rate": 2.055837061734415e-06,
      "loss": 0.0104,
      "step": 2431260
    },
    {
      "epoch": 3.978843044454482,
      "grad_norm": 0.4993140995502472,
      "learning_rate": 2.055771169520898e-06,
      "loss": 0.0145,
      "step": 2431280
    },
    {
      "epoch": 3.978875774893135,
      "grad_norm": 0.1439439207315445,
      "learning_rate": 2.0557052773073807e-06,
      "loss": 0.0112,
      "step": 2431300
    },
    {
      "epoch": 3.9789085053317885,
      "grad_norm": 0.24650326371192932,
      "learning_rate": 2.0556393850938634e-06,
      "loss": 0.0125,
      "step": 2431320
    },
    {
      "epoch": 3.9789412357704417,
      "grad_norm": 0.1957228034734726,
      "learning_rate": 2.0555734928803466e-06,
      "loss": 0.0084,
      "step": 2431340
    },
    {
      "epoch": 3.9789739662090953,
      "grad_norm": 0.15898412466049194,
      "learning_rate": 2.0555076006668293e-06,
      "loss": 0.0149,
      "step": 2431360
    },
    {
      "epoch": 3.9790066966477484,
      "grad_norm": 0.4289267063140869,
      "learning_rate": 2.0554417084533125e-06,
      "loss": 0.0146,
      "step": 2431380
    },
    {
      "epoch": 3.979039427086402,
      "grad_norm": 0.1511244922876358,
      "learning_rate": 2.0553758162397952e-06,
      "loss": 0.0086,
      "step": 2431400
    },
    {
      "epoch": 3.979072157525055,
      "grad_norm": 1.072800874710083,
      "learning_rate": 2.055309924026278e-06,
      "loss": 0.0136,
      "step": 2431420
    },
    {
      "epoch": 3.9791048879637083,
      "grad_norm": 0.0715070515871048,
      "learning_rate": 2.0552440318127607e-06,
      "loss": 0.0095,
      "step": 2431440
    },
    {
      "epoch": 3.979137618402362,
      "grad_norm": 0.16759558022022247,
      "learning_rate": 2.055178139599244e-06,
      "loss": 0.0076,
      "step": 2431460
    },
    {
      "epoch": 3.979170348841015,
      "grad_norm": 0.3039776384830475,
      "learning_rate": 2.0551122473857266e-06,
      "loss": 0.0091,
      "step": 2431480
    },
    {
      "epoch": 3.9792030792796687,
      "grad_norm": 0.37055519223213196,
      "learning_rate": 2.0550463551722094e-06,
      "loss": 0.0076,
      "step": 2431500
    },
    {
      "epoch": 3.979235809718322,
      "grad_norm": 0.04928974434733391,
      "learning_rate": 2.054980462958692e-06,
      "loss": 0.0091,
      "step": 2431520
    },
    {
      "epoch": 3.9792685401569754,
      "grad_norm": 0.09509245306253433,
      "learning_rate": 2.0549145707451753e-06,
      "loss": 0.0143,
      "step": 2431540
    },
    {
      "epoch": 3.9793012705956285,
      "grad_norm": 0.4693496823310852,
      "learning_rate": 2.054848678531658e-06,
      "loss": 0.0088,
      "step": 2431560
    },
    {
      "epoch": 3.9793340010342817,
      "grad_norm": 0.3177213966846466,
      "learning_rate": 2.054782786318141e-06,
      "loss": 0.0105,
      "step": 2431580
    },
    {
      "epoch": 3.9793667314729353,
      "grad_norm": 0.2183837741613388,
      "learning_rate": 2.054716894104624e-06,
      "loss": 0.0072,
      "step": 2431600
    },
    {
      "epoch": 3.9793994619115884,
      "grad_norm": 0.20254673063755035,
      "learning_rate": 2.0546510018911066e-06,
      "loss": 0.0073,
      "step": 2431620
    },
    {
      "epoch": 3.979432192350242,
      "grad_norm": 0.2029559463262558,
      "learning_rate": 2.0545851096775894e-06,
      "loss": 0.0102,
      "step": 2431640
    },
    {
      "epoch": 3.979464922788895,
      "grad_norm": 0.0276421457529068,
      "learning_rate": 2.0545192174640725e-06,
      "loss": 0.0154,
      "step": 2431660
    },
    {
      "epoch": 3.979497653227549,
      "grad_norm": 0.17198844254016876,
      "learning_rate": 2.0544533252505553e-06,
      "loss": 0.0119,
      "step": 2431680
    },
    {
      "epoch": 3.979530383666202,
      "grad_norm": 0.4054291844367981,
      "learning_rate": 2.054387433037038e-06,
      "loss": 0.0129,
      "step": 2431700
    },
    {
      "epoch": 3.979563114104855,
      "grad_norm": 0.10072006285190582,
      "learning_rate": 2.0543215408235208e-06,
      "loss": 0.0083,
      "step": 2431720
    },
    {
      "epoch": 3.9795958445435087,
      "grad_norm": 0.22362203896045685,
      "learning_rate": 2.054255648610004e-06,
      "loss": 0.0109,
      "step": 2431740
    },
    {
      "epoch": 3.979628574982162,
      "grad_norm": 0.3075625002384186,
      "learning_rate": 2.054189756396487e-06,
      "loss": 0.0114,
      "step": 2431760
    },
    {
      "epoch": 3.979661305420815,
      "grad_norm": 0.10047667473554611,
      "learning_rate": 2.05412386418297e-06,
      "loss": 0.0091,
      "step": 2431780
    },
    {
      "epoch": 3.9796940358594686,
      "grad_norm": 0.48229530453681946,
      "learning_rate": 2.0540579719694526e-06,
      "loss": 0.0117,
      "step": 2431800
    },
    {
      "epoch": 3.979726766298122,
      "grad_norm": 0.20065706968307495,
      "learning_rate": 2.0539920797559353e-06,
      "loss": 0.0087,
      "step": 2431820
    },
    {
      "epoch": 3.9797594967367753,
      "grad_norm": 0.32813772559165955,
      "learning_rate": 2.053926187542418e-06,
      "loss": 0.0121,
      "step": 2431840
    },
    {
      "epoch": 3.9797922271754285,
      "grad_norm": 0.06175355613231659,
      "learning_rate": 2.053860295328901e-06,
      "loss": 0.01,
      "step": 2431860
    },
    {
      "epoch": 3.979824957614082,
      "grad_norm": 0.19707708060741425,
      "learning_rate": 2.053794403115384e-06,
      "loss": 0.0151,
      "step": 2431880
    },
    {
      "epoch": 3.979857688052735,
      "grad_norm": 0.20953813195228577,
      "learning_rate": 2.0537285109018667e-06,
      "loss": 0.0114,
      "step": 2431900
    },
    {
      "epoch": 3.9798904184913884,
      "grad_norm": 0.5904144644737244,
      "learning_rate": 2.05366261868835e-06,
      "loss": 0.0088,
      "step": 2431920
    },
    {
      "epoch": 3.979923148930042,
      "grad_norm": 0.17180496454238892,
      "learning_rate": 2.0535967264748326e-06,
      "loss": 0.007,
      "step": 2431940
    },
    {
      "epoch": 3.9799558793686955,
      "grad_norm": 0.19030743837356567,
      "learning_rate": 2.0535308342613158e-06,
      "loss": 0.0104,
      "step": 2431960
    },
    {
      "epoch": 3.9799886098073487,
      "grad_norm": 0.22896233201026917,
      "learning_rate": 2.0534649420477985e-06,
      "loss": 0.0071,
      "step": 2431980
    },
    {
      "epoch": 3.980021340246002,
      "grad_norm": 0.4740087389945984,
      "learning_rate": 2.0533990498342812e-06,
      "loss": 0.0101,
      "step": 2432000
    },
    {
      "epoch": 3.9800540706846554,
      "grad_norm": 0.08330587297677994,
      "learning_rate": 2.053333157620764e-06,
      "loss": 0.0136,
      "step": 2432020
    },
    {
      "epoch": 3.9800868011233086,
      "grad_norm": 0.23527179658412933,
      "learning_rate": 2.0532672654072467e-06,
      "loss": 0.0146,
      "step": 2432040
    },
    {
      "epoch": 3.9801195315619617,
      "grad_norm": 0.3633939027786255,
      "learning_rate": 2.05320137319373e-06,
      "loss": 0.0115,
      "step": 2432060
    },
    {
      "epoch": 3.9801522620006153,
      "grad_norm": 0.20511457324028015,
      "learning_rate": 2.053135480980213e-06,
      "loss": 0.0105,
      "step": 2432080
    },
    {
      "epoch": 3.980184992439269,
      "grad_norm": 0.29341456294059753,
      "learning_rate": 2.0530695887666958e-06,
      "loss": 0.0079,
      "step": 2432100
    },
    {
      "epoch": 3.980217722877922,
      "grad_norm": 0.23697425425052643,
      "learning_rate": 2.0530036965531785e-06,
      "loss": 0.0117,
      "step": 2432120
    },
    {
      "epoch": 3.9802504533165752,
      "grad_norm": 0.22378815710544586,
      "learning_rate": 2.0529378043396613e-06,
      "loss": 0.0103,
      "step": 2432140
    },
    {
      "epoch": 3.980283183755229,
      "grad_norm": 0.263665109872818,
      "learning_rate": 2.0528719121261444e-06,
      "loss": 0.012,
      "step": 2432160
    },
    {
      "epoch": 3.980315914193882,
      "grad_norm": 0.06521858274936676,
      "learning_rate": 2.052806019912627e-06,
      "loss": 0.0111,
      "step": 2432180
    },
    {
      "epoch": 3.980348644632535,
      "grad_norm": 0.11049029976129532,
      "learning_rate": 2.05274012769911e-06,
      "loss": 0.0109,
      "step": 2432200
    },
    {
      "epoch": 3.9803813750711887,
      "grad_norm": 0.06978930532932281,
      "learning_rate": 2.0526742354855926e-06,
      "loss": 0.0129,
      "step": 2432220
    },
    {
      "epoch": 3.9804141055098423,
      "grad_norm": 0.1852327287197113,
      "learning_rate": 2.0526083432720754e-06,
      "loss": 0.0098,
      "step": 2432240
    },
    {
      "epoch": 3.9804468359484955,
      "grad_norm": 0.4034041464328766,
      "learning_rate": 2.0525424510585585e-06,
      "loss": 0.0085,
      "step": 2432260
    },
    {
      "epoch": 3.9804795663871486,
      "grad_norm": 0.4439968764781952,
      "learning_rate": 2.0524765588450417e-06,
      "loss": 0.0107,
      "step": 2432280
    },
    {
      "epoch": 3.980512296825802,
      "grad_norm": 0.9352974891662598,
      "learning_rate": 2.0524106666315244e-06,
      "loss": 0.0114,
      "step": 2432300
    },
    {
      "epoch": 3.9805450272644554,
      "grad_norm": 1.222272276878357,
      "learning_rate": 2.052344774418007e-06,
      "loss": 0.0127,
      "step": 2432320
    },
    {
      "epoch": 3.9805777577031085,
      "grad_norm": 0.06678418070077896,
      "learning_rate": 2.05227888220449e-06,
      "loss": 0.0091,
      "step": 2432340
    },
    {
      "epoch": 3.980610488141762,
      "grad_norm": 0.26991310715675354,
      "learning_rate": 2.052212989990973e-06,
      "loss": 0.0175,
      "step": 2432360
    },
    {
      "epoch": 3.9806432185804153,
      "grad_norm": 0.24072541296482086,
      "learning_rate": 2.052147097777456e-06,
      "loss": 0.0085,
      "step": 2432380
    },
    {
      "epoch": 3.980675949019069,
      "grad_norm": 0.18630941212177277,
      "learning_rate": 2.0520812055639386e-06,
      "loss": 0.0089,
      "step": 2432400
    },
    {
      "epoch": 3.980708679457722,
      "grad_norm": 0.18449561297893524,
      "learning_rate": 2.0520153133504213e-06,
      "loss": 0.0152,
      "step": 2432420
    },
    {
      "epoch": 3.9807414098963756,
      "grad_norm": 0.3160947263240814,
      "learning_rate": 2.0519494211369045e-06,
      "loss": 0.0138,
      "step": 2432440
    },
    {
      "epoch": 3.9807741403350287,
      "grad_norm": 0.28736063838005066,
      "learning_rate": 2.051883528923387e-06,
      "loss": 0.0116,
      "step": 2432460
    },
    {
      "epoch": 3.980806870773682,
      "grad_norm": 0.6306604146957397,
      "learning_rate": 2.0518176367098704e-06,
      "loss": 0.0091,
      "step": 2432480
    },
    {
      "epoch": 3.9808396012123355,
      "grad_norm": 0.6169842481613159,
      "learning_rate": 2.051751744496353e-06,
      "loss": 0.0106,
      "step": 2432500
    },
    {
      "epoch": 3.9808723316509886,
      "grad_norm": 0.16902077198028564,
      "learning_rate": 2.051685852282836e-06,
      "loss": 0.0065,
      "step": 2432520
    },
    {
      "epoch": 3.9809050620896422,
      "grad_norm": 0.28807303309440613,
      "learning_rate": 2.0516199600693186e-06,
      "loss": 0.0129,
      "step": 2432540
    },
    {
      "epoch": 3.9809377925282954,
      "grad_norm": 0.06423037499189377,
      "learning_rate": 2.0515540678558018e-06,
      "loss": 0.0072,
      "step": 2432560
    },
    {
      "epoch": 3.980970522966949,
      "grad_norm": 0.1269087791442871,
      "learning_rate": 2.0514881756422845e-06,
      "loss": 0.0128,
      "step": 2432580
    },
    {
      "epoch": 3.981003253405602,
      "grad_norm": 0.4433484673500061,
      "learning_rate": 2.0514222834287672e-06,
      "loss": 0.0151,
      "step": 2432600
    },
    {
      "epoch": 3.9810359838442553,
      "grad_norm": 0.07689123600721359,
      "learning_rate": 2.0513563912152504e-06,
      "loss": 0.0106,
      "step": 2432620
    },
    {
      "epoch": 3.981068714282909,
      "grad_norm": 0.3066064119338989,
      "learning_rate": 2.051290499001733e-06,
      "loss": 0.0147,
      "step": 2432640
    },
    {
      "epoch": 3.981101444721562,
      "grad_norm": 0.14973336458206177,
      "learning_rate": 2.051224606788216e-06,
      "loss": 0.01,
      "step": 2432660
    },
    {
      "epoch": 3.9811341751602156,
      "grad_norm": 0.1791946440935135,
      "learning_rate": 2.051158714574699e-06,
      "loss": 0.0088,
      "step": 2432680
    },
    {
      "epoch": 3.9811669055988688,
      "grad_norm": 0.7966590523719788,
      "learning_rate": 2.0510928223611818e-06,
      "loss": 0.0082,
      "step": 2432700
    },
    {
      "epoch": 3.9811996360375224,
      "grad_norm": 0.42355915904045105,
      "learning_rate": 2.0510269301476645e-06,
      "loss": 0.0152,
      "step": 2432720
    },
    {
      "epoch": 3.9812323664761755,
      "grad_norm": 0.28312787413597107,
      "learning_rate": 2.0509610379341473e-06,
      "loss": 0.0081,
      "step": 2432740
    },
    {
      "epoch": 3.9812650969148287,
      "grad_norm": 0.08759630471467972,
      "learning_rate": 2.0508951457206304e-06,
      "loss": 0.0069,
      "step": 2432760
    },
    {
      "epoch": 3.9812978273534823,
      "grad_norm": 0.11203866451978683,
      "learning_rate": 2.050829253507113e-06,
      "loss": 0.0112,
      "step": 2432780
    },
    {
      "epoch": 3.9813305577921354,
      "grad_norm": 0.13681310415267944,
      "learning_rate": 2.0507633612935963e-06,
      "loss": 0.0077,
      "step": 2432800
    },
    {
      "epoch": 3.981363288230789,
      "grad_norm": 0.18905024230480194,
      "learning_rate": 2.050697469080079e-06,
      "loss": 0.0166,
      "step": 2432820
    },
    {
      "epoch": 3.981396018669442,
      "grad_norm": 0.14719747006893158,
      "learning_rate": 2.050631576866562e-06,
      "loss": 0.0092,
      "step": 2432840
    },
    {
      "epoch": 3.9814287491080957,
      "grad_norm": 0.34513014554977417,
      "learning_rate": 2.0505656846530445e-06,
      "loss": 0.0116,
      "step": 2432860
    },
    {
      "epoch": 3.981461479546749,
      "grad_norm": 0.3013949394226074,
      "learning_rate": 2.0504997924395277e-06,
      "loss": 0.009,
      "step": 2432880
    },
    {
      "epoch": 3.981494209985402,
      "grad_norm": 0.39838171005249023,
      "learning_rate": 2.0504339002260104e-06,
      "loss": 0.0167,
      "step": 2432900
    },
    {
      "epoch": 3.9815269404240556,
      "grad_norm": 0.14728668332099915,
      "learning_rate": 2.050368008012493e-06,
      "loss": 0.0148,
      "step": 2432920
    },
    {
      "epoch": 3.981559670862709,
      "grad_norm": 0.042430974543094635,
      "learning_rate": 2.050302115798976e-06,
      "loss": 0.0081,
      "step": 2432940
    },
    {
      "epoch": 3.9815924013013624,
      "grad_norm": 0.5431132912635803,
      "learning_rate": 2.050236223585459e-06,
      "loss": 0.0155,
      "step": 2432960
    },
    {
      "epoch": 3.9816251317400155,
      "grad_norm": 0.36585864424705505,
      "learning_rate": 2.0501703313719423e-06,
      "loss": 0.0061,
      "step": 2432980
    },
    {
      "epoch": 3.981657862178669,
      "grad_norm": 0.17479921877384186,
      "learning_rate": 2.050104439158425e-06,
      "loss": 0.0088,
      "step": 2433000
    },
    {
      "epoch": 3.9816905926173223,
      "grad_norm": 0.12201257795095444,
      "learning_rate": 2.0500385469449077e-06,
      "loss": 0.0086,
      "step": 2433020
    },
    {
      "epoch": 3.9817233230559754,
      "grad_norm": 0.10469385236501694,
      "learning_rate": 2.0499726547313905e-06,
      "loss": 0.0107,
      "step": 2433040
    },
    {
      "epoch": 3.981756053494629,
      "grad_norm": 0.07091682404279709,
      "learning_rate": 2.0499067625178732e-06,
      "loss": 0.0122,
      "step": 2433060
    },
    {
      "epoch": 3.981788783933282,
      "grad_norm": 0.22423473000526428,
      "learning_rate": 2.0498408703043564e-06,
      "loss": 0.0121,
      "step": 2433080
    },
    {
      "epoch": 3.9818215143719358,
      "grad_norm": 0.1666356325149536,
      "learning_rate": 2.049774978090839e-06,
      "loss": 0.0094,
      "step": 2433100
    },
    {
      "epoch": 3.981854244810589,
      "grad_norm": 0.40299859642982483,
      "learning_rate": 2.049709085877322e-06,
      "loss": 0.0094,
      "step": 2433120
    },
    {
      "epoch": 3.9818869752492425,
      "grad_norm": 0.10891255736351013,
      "learning_rate": 2.049643193663805e-06,
      "loss": 0.0078,
      "step": 2433140
    },
    {
      "epoch": 3.9819197056878957,
      "grad_norm": 0.21654066443443298,
      "learning_rate": 2.0495773014502878e-06,
      "loss": 0.0134,
      "step": 2433160
    },
    {
      "epoch": 3.981952436126549,
      "grad_norm": 0.23506686091423035,
      "learning_rate": 2.049511409236771e-06,
      "loss": 0.0123,
      "step": 2433180
    },
    {
      "epoch": 3.9819851665652024,
      "grad_norm": 1.2076284885406494,
      "learning_rate": 2.0494455170232537e-06,
      "loss": 0.013,
      "step": 2433200
    },
    {
      "epoch": 3.9820178970038556,
      "grad_norm": 0.6475723385810852,
      "learning_rate": 2.0493796248097364e-06,
      "loss": 0.0138,
      "step": 2433220
    },
    {
      "epoch": 3.982050627442509,
      "grad_norm": 0.06678399443626404,
      "learning_rate": 2.049313732596219e-06,
      "loss": 0.0066,
      "step": 2433240
    },
    {
      "epoch": 3.9820833578811623,
      "grad_norm": 0.08255589008331299,
      "learning_rate": 2.049247840382702e-06,
      "loss": 0.0127,
      "step": 2433260
    },
    {
      "epoch": 3.982116088319816,
      "grad_norm": 0.4526604413986206,
      "learning_rate": 2.049181948169185e-06,
      "loss": 0.0102,
      "step": 2433280
    },
    {
      "epoch": 3.982148818758469,
      "grad_norm": 0.4206041991710663,
      "learning_rate": 2.0491160559556678e-06,
      "loss": 0.0103,
      "step": 2433300
    },
    {
      "epoch": 3.982181549197122,
      "grad_norm": 0.2861787676811218,
      "learning_rate": 2.049050163742151e-06,
      "loss": 0.0134,
      "step": 2433320
    },
    {
      "epoch": 3.982214279635776,
      "grad_norm": 0.2534940540790558,
      "learning_rate": 2.0489842715286337e-06,
      "loss": 0.0123,
      "step": 2433340
    },
    {
      "epoch": 3.982247010074429,
      "grad_norm": 0.6969974637031555,
      "learning_rate": 2.0489183793151164e-06,
      "loss": 0.0159,
      "step": 2433360
    },
    {
      "epoch": 3.982279740513082,
      "grad_norm": 0.22701281309127808,
      "learning_rate": 2.0488524871015996e-06,
      "loss": 0.0079,
      "step": 2433380
    },
    {
      "epoch": 3.9823124709517357,
      "grad_norm": 0.10461319237947464,
      "learning_rate": 2.0487865948880823e-06,
      "loss": 0.0083,
      "step": 2433400
    },
    {
      "epoch": 3.9823452013903893,
      "grad_norm": 0.07436250895261765,
      "learning_rate": 2.048720702674565e-06,
      "loss": 0.0064,
      "step": 2433420
    },
    {
      "epoch": 3.9823779318290424,
      "grad_norm": 0.24136289954185486,
      "learning_rate": 2.048654810461048e-06,
      "loss": 0.0093,
      "step": 2433440
    },
    {
      "epoch": 3.9824106622676956,
      "grad_norm": 0.17472697794437408,
      "learning_rate": 2.0485889182475306e-06,
      "loss": 0.0108,
      "step": 2433460
    },
    {
      "epoch": 3.982443392706349,
      "grad_norm": 0.17959751188755035,
      "learning_rate": 2.0485230260340137e-06,
      "loss": 0.0118,
      "step": 2433480
    },
    {
      "epoch": 3.9824761231450023,
      "grad_norm": 0.28736189007759094,
      "learning_rate": 2.048457133820497e-06,
      "loss": 0.0141,
      "step": 2433500
    },
    {
      "epoch": 3.9825088535836555,
      "grad_norm": 0.1294524073600769,
      "learning_rate": 2.0483912416069796e-06,
      "loss": 0.0067,
      "step": 2433520
    },
    {
      "epoch": 3.982541584022309,
      "grad_norm": 0.3218419551849365,
      "learning_rate": 2.0483253493934624e-06,
      "loss": 0.011,
      "step": 2433540
    },
    {
      "epoch": 3.9825743144609627,
      "grad_norm": 0.17676447331905365,
      "learning_rate": 2.048259457179945e-06,
      "loss": 0.0089,
      "step": 2433560
    },
    {
      "epoch": 3.982607044899616,
      "grad_norm": 0.291677325963974,
      "learning_rate": 2.0481935649664283e-06,
      "loss": 0.0129,
      "step": 2433580
    },
    {
      "epoch": 3.982639775338269,
      "grad_norm": 0.23033294081687927,
      "learning_rate": 2.048127672752911e-06,
      "loss": 0.0118,
      "step": 2433600
    },
    {
      "epoch": 3.9826725057769226,
      "grad_norm": 0.33115464448928833,
      "learning_rate": 2.0480617805393937e-06,
      "loss": 0.0066,
      "step": 2433620
    },
    {
      "epoch": 3.9827052362155757,
      "grad_norm": 0.3076503574848175,
      "learning_rate": 2.0479958883258765e-06,
      "loss": 0.0138,
      "step": 2433640
    },
    {
      "epoch": 3.982737966654229,
      "grad_norm": 0.6676207184791565,
      "learning_rate": 2.0479299961123596e-06,
      "loss": 0.0147,
      "step": 2433660
    },
    {
      "epoch": 3.9827706970928825,
      "grad_norm": 0.1522110253572464,
      "learning_rate": 2.0478641038988424e-06,
      "loss": 0.0071,
      "step": 2433680
    },
    {
      "epoch": 3.982803427531536,
      "grad_norm": 0.2894035577774048,
      "learning_rate": 2.0477982116853255e-06,
      "loss": 0.014,
      "step": 2433700
    },
    {
      "epoch": 3.982836157970189,
      "grad_norm": 0.42482393980026245,
      "learning_rate": 2.0477323194718083e-06,
      "loss": 0.0113,
      "step": 2433720
    },
    {
      "epoch": 3.9828688884088423,
      "grad_norm": 0.31057730317115784,
      "learning_rate": 2.047666427258291e-06,
      "loss": 0.0111,
      "step": 2433740
    },
    {
      "epoch": 3.982901618847496,
      "grad_norm": 0.25585415959358215,
      "learning_rate": 2.0476005350447738e-06,
      "loss": 0.0093,
      "step": 2433760
    },
    {
      "epoch": 3.982934349286149,
      "grad_norm": 0.505469560623169,
      "learning_rate": 2.047534642831257e-06,
      "loss": 0.0145,
      "step": 2433780
    },
    {
      "epoch": 3.9829670797248022,
      "grad_norm": 0.37207430601119995,
      "learning_rate": 2.0474687506177397e-06,
      "loss": 0.01,
      "step": 2433800
    },
    {
      "epoch": 3.982999810163456,
      "grad_norm": 0.25217199325561523,
      "learning_rate": 2.0474028584042224e-06,
      "loss": 0.0113,
      "step": 2433820
    },
    {
      "epoch": 3.983032540602109,
      "grad_norm": 0.1328001469373703,
      "learning_rate": 2.0473369661907056e-06,
      "loss": 0.0113,
      "step": 2433840
    },
    {
      "epoch": 3.9830652710407626,
      "grad_norm": 0.5108091831207275,
      "learning_rate": 2.0472710739771883e-06,
      "loss": 0.013,
      "step": 2433860
    },
    {
      "epoch": 3.9830980014794157,
      "grad_norm": 0.19649003446102142,
      "learning_rate": 2.047205181763671e-06,
      "loss": 0.0083,
      "step": 2433880
    },
    {
      "epoch": 3.9831307319180693,
      "grad_norm": 0.2891276478767395,
      "learning_rate": 2.0471392895501542e-06,
      "loss": 0.0083,
      "step": 2433900
    },
    {
      "epoch": 3.9831634623567225,
      "grad_norm": 0.20363636314868927,
      "learning_rate": 2.047073397336637e-06,
      "loss": 0.0082,
      "step": 2433920
    },
    {
      "epoch": 3.9831961927953756,
      "grad_norm": 0.08121141046285629,
      "learning_rate": 2.0470075051231197e-06,
      "loss": 0.0164,
      "step": 2433940
    },
    {
      "epoch": 3.983228923234029,
      "grad_norm": 0.7626439332962036,
      "learning_rate": 2.0469416129096024e-06,
      "loss": 0.01,
      "step": 2433960
    },
    {
      "epoch": 3.9832616536726824,
      "grad_norm": 0.571670651435852,
      "learning_rate": 2.0468757206960856e-06,
      "loss": 0.0173,
      "step": 2433980
    },
    {
      "epoch": 3.983294384111336,
      "grad_norm": 0.26324233412742615,
      "learning_rate": 2.0468098284825683e-06,
      "loss": 0.0085,
      "step": 2434000
    },
    {
      "epoch": 3.983327114549989,
      "grad_norm": 0.47066938877105713,
      "learning_rate": 2.0467439362690515e-06,
      "loss": 0.0126,
      "step": 2434020
    },
    {
      "epoch": 3.9833598449886427,
      "grad_norm": 0.2822154760360718,
      "learning_rate": 2.0466780440555342e-06,
      "loss": 0.0117,
      "step": 2434040
    },
    {
      "epoch": 3.983392575427296,
      "grad_norm": 0.1772957593202591,
      "learning_rate": 2.046612151842017e-06,
      "loss": 0.0076,
      "step": 2434060
    },
    {
      "epoch": 3.983425305865949,
      "grad_norm": 0.3719649016857147,
      "learning_rate": 2.0465462596284997e-06,
      "loss": 0.0095,
      "step": 2434080
    },
    {
      "epoch": 3.9834580363046026,
      "grad_norm": 0.4818778932094574,
      "learning_rate": 2.046480367414983e-06,
      "loss": 0.0098,
      "step": 2434100
    },
    {
      "epoch": 3.9834907667432558,
      "grad_norm": 0.1071912869811058,
      "learning_rate": 2.0464144752014656e-06,
      "loss": 0.0101,
      "step": 2434120
    },
    {
      "epoch": 3.9835234971819093,
      "grad_norm": 0.3244456946849823,
      "learning_rate": 2.0463485829879484e-06,
      "loss": 0.0083,
      "step": 2434140
    },
    {
      "epoch": 3.9835562276205625,
      "grad_norm": 0.2043236643075943,
      "learning_rate": 2.046282690774431e-06,
      "loss": 0.0064,
      "step": 2434160
    },
    {
      "epoch": 3.983588958059216,
      "grad_norm": 0.43523821234703064,
      "learning_rate": 2.0462167985609143e-06,
      "loss": 0.0088,
      "step": 2434180
    },
    {
      "epoch": 3.9836216884978692,
      "grad_norm": 0.35877516865730286,
      "learning_rate": 2.046150906347397e-06,
      "loss": 0.0065,
      "step": 2434200
    },
    {
      "epoch": 3.9836544189365224,
      "grad_norm": 0.10836495459079742,
      "learning_rate": 2.04608501413388e-06,
      "loss": 0.0118,
      "step": 2434220
    },
    {
      "epoch": 3.983687149375176,
      "grad_norm": 0.09923729300498962,
      "learning_rate": 2.046019121920363e-06,
      "loss": 0.0092,
      "step": 2434240
    },
    {
      "epoch": 3.983719879813829,
      "grad_norm": 0.2650564908981323,
      "learning_rate": 2.0459532297068456e-06,
      "loss": 0.0135,
      "step": 2434260
    },
    {
      "epoch": 3.9837526102524827,
      "grad_norm": 0.4002077877521515,
      "learning_rate": 2.0458873374933284e-06,
      "loss": 0.0095,
      "step": 2434280
    },
    {
      "epoch": 3.983785340691136,
      "grad_norm": 0.06648373603820801,
      "learning_rate": 2.0458214452798115e-06,
      "loss": 0.0091,
      "step": 2434300
    },
    {
      "epoch": 3.9838180711297895,
      "grad_norm": 0.37315353751182556,
      "learning_rate": 2.0457555530662943e-06,
      "loss": 0.0094,
      "step": 2434320
    },
    {
      "epoch": 3.9838508015684426,
      "grad_norm": 0.3752118945121765,
      "learning_rate": 2.045689660852777e-06,
      "loss": 0.012,
      "step": 2434340
    },
    {
      "epoch": 3.9838835320070958,
      "grad_norm": 0.2905615568161011,
      "learning_rate": 2.0456237686392598e-06,
      "loss": 0.015,
      "step": 2434360
    },
    {
      "epoch": 3.9839162624457494,
      "grad_norm": 0.5140987634658813,
      "learning_rate": 2.045557876425743e-06,
      "loss": 0.0135,
      "step": 2434380
    },
    {
      "epoch": 3.9839489928844025,
      "grad_norm": 0.23314349353313446,
      "learning_rate": 2.045491984212226e-06,
      "loss": 0.0057,
      "step": 2434400
    },
    {
      "epoch": 3.983981723323056,
      "grad_norm": 0.564221978187561,
      "learning_rate": 2.045426091998709e-06,
      "loss": 0.0098,
      "step": 2434420
    },
    {
      "epoch": 3.9840144537617093,
      "grad_norm": 0.4250233471393585,
      "learning_rate": 2.0453601997851916e-06,
      "loss": 0.0112,
      "step": 2434440
    },
    {
      "epoch": 3.984047184200363,
      "grad_norm": 1.172278881072998,
      "learning_rate": 2.0452943075716743e-06,
      "loss": 0.0109,
      "step": 2434460
    },
    {
      "epoch": 3.984079914639016,
      "grad_norm": 0.1030895859003067,
      "learning_rate": 2.045228415358157e-06,
      "loss": 0.0085,
      "step": 2434480
    },
    {
      "epoch": 3.984112645077669,
      "grad_norm": 0.4892269968986511,
      "learning_rate": 2.0451625231446402e-06,
      "loss": 0.013,
      "step": 2434500
    },
    {
      "epoch": 3.9841453755163228,
      "grad_norm": 0.2625972628593445,
      "learning_rate": 2.045096630931123e-06,
      "loss": 0.0129,
      "step": 2434520
    },
    {
      "epoch": 3.984178105954976,
      "grad_norm": 0.18260397017002106,
      "learning_rate": 2.045030738717606e-06,
      "loss": 0.0086,
      "step": 2434540
    },
    {
      "epoch": 3.9842108363936295,
      "grad_norm": 0.3304016888141632,
      "learning_rate": 2.044964846504089e-06,
      "loss": 0.0192,
      "step": 2434560
    },
    {
      "epoch": 3.9842435668322826,
      "grad_norm": 0.1343737542629242,
      "learning_rate": 2.0448989542905716e-06,
      "loss": 0.0088,
      "step": 2434580
    },
    {
      "epoch": 3.9842762972709362,
      "grad_norm": 0.11386212706565857,
      "learning_rate": 2.0448330620770548e-06,
      "loss": 0.0084,
      "step": 2434600
    },
    {
      "epoch": 3.9843090277095894,
      "grad_norm": 0.24345678091049194,
      "learning_rate": 2.0447671698635375e-06,
      "loss": 0.0055,
      "step": 2434620
    },
    {
      "epoch": 3.9843417581482425,
      "grad_norm": 0.4263547956943512,
      "learning_rate": 2.0447012776500202e-06,
      "loss": 0.0112,
      "step": 2434640
    },
    {
      "epoch": 3.984374488586896,
      "grad_norm": 0.5444225072860718,
      "learning_rate": 2.044635385436503e-06,
      "loss": 0.0155,
      "step": 2434660
    },
    {
      "epoch": 3.9844072190255493,
      "grad_norm": 0.5700013637542725,
      "learning_rate": 2.0445694932229857e-06,
      "loss": 0.0096,
      "step": 2434680
    },
    {
      "epoch": 3.984439949464203,
      "grad_norm": 0.6649409532546997,
      "learning_rate": 2.044503601009469e-06,
      "loss": 0.0167,
      "step": 2434700
    },
    {
      "epoch": 3.984472679902856,
      "grad_norm": 0.1827896237373352,
      "learning_rate": 2.044437708795952e-06,
      "loss": 0.0122,
      "step": 2434720
    },
    {
      "epoch": 3.9845054103415096,
      "grad_norm": 0.3421325385570526,
      "learning_rate": 2.044371816582435e-06,
      "loss": 0.0106,
      "step": 2434740
    },
    {
      "epoch": 3.9845381407801628,
      "grad_norm": 0.10873071104288101,
      "learning_rate": 2.0443059243689175e-06,
      "loss": 0.0132,
      "step": 2434760
    },
    {
      "epoch": 3.984570871218816,
      "grad_norm": 0.10957735031843185,
      "learning_rate": 2.0442400321554003e-06,
      "loss": 0.0093,
      "step": 2434780
    },
    {
      "epoch": 3.9846036016574695,
      "grad_norm": 0.6050418615341187,
      "learning_rate": 2.0441741399418834e-06,
      "loss": 0.0123,
      "step": 2434800
    },
    {
      "epoch": 3.9846363320961227,
      "grad_norm": 0.5440212488174438,
      "learning_rate": 2.044108247728366e-06,
      "loss": 0.0105,
      "step": 2434820
    },
    {
      "epoch": 3.984669062534776,
      "grad_norm": 1.0803948640823364,
      "learning_rate": 2.044042355514849e-06,
      "loss": 0.013,
      "step": 2434840
    },
    {
      "epoch": 3.9847017929734294,
      "grad_norm": 0.3446025550365448,
      "learning_rate": 2.0439764633013317e-06,
      "loss": 0.0101,
      "step": 2434860
    },
    {
      "epoch": 3.984734523412083,
      "grad_norm": 0.18224069476127625,
      "learning_rate": 2.0439105710878144e-06,
      "loss": 0.0102,
      "step": 2434880
    },
    {
      "epoch": 3.984767253850736,
      "grad_norm": 0.07519392669200897,
      "learning_rate": 2.0438446788742976e-06,
      "loss": 0.0075,
      "step": 2434900
    },
    {
      "epoch": 3.9847999842893893,
      "grad_norm": 0.12344291061162949,
      "learning_rate": 2.0437787866607807e-06,
      "loss": 0.0135,
      "step": 2434920
    },
    {
      "epoch": 3.984832714728043,
      "grad_norm": 0.12446514517068863,
      "learning_rate": 2.0437128944472635e-06,
      "loss": 0.0082,
      "step": 2434940
    },
    {
      "epoch": 3.984865445166696,
      "grad_norm": 0.3598411977291107,
      "learning_rate": 2.043647002233746e-06,
      "loss": 0.0122,
      "step": 2434960
    },
    {
      "epoch": 3.984898175605349,
      "grad_norm": 0.3928397595882416,
      "learning_rate": 2.043581110020229e-06,
      "loss": 0.0133,
      "step": 2434980
    },
    {
      "epoch": 3.984930906044003,
      "grad_norm": 0.1398603320121765,
      "learning_rate": 2.043515217806712e-06,
      "loss": 0.0131,
      "step": 2435000
    },
    {
      "epoch": 3.9849636364826564,
      "grad_norm": 0.4408959448337555,
      "learning_rate": 2.043449325593195e-06,
      "loss": 0.0096,
      "step": 2435020
    },
    {
      "epoch": 3.9849963669213095,
      "grad_norm": 0.9363487362861633,
      "learning_rate": 2.0433834333796776e-06,
      "loss": 0.0099,
      "step": 2435040
    },
    {
      "epoch": 3.9850290973599627,
      "grad_norm": 0.23782682418823242,
      "learning_rate": 2.0433175411661603e-06,
      "loss": 0.0097,
      "step": 2435060
    },
    {
      "epoch": 3.9850618277986163,
      "grad_norm": 0.14094547927379608,
      "learning_rate": 2.0432516489526435e-06,
      "loss": 0.0093,
      "step": 2435080
    },
    {
      "epoch": 3.9850945582372694,
      "grad_norm": 0.08132915198802948,
      "learning_rate": 2.0431857567391262e-06,
      "loss": 0.0126,
      "step": 2435100
    },
    {
      "epoch": 3.9851272886759226,
      "grad_norm": 0.41420456767082214,
      "learning_rate": 2.0431198645256094e-06,
      "loss": 0.0137,
      "step": 2435120
    },
    {
      "epoch": 3.985160019114576,
      "grad_norm": 0.41221967339515686,
      "learning_rate": 2.043053972312092e-06,
      "loss": 0.0143,
      "step": 2435140
    },
    {
      "epoch": 3.9851927495532298,
      "grad_norm": 0.1605955809354782,
      "learning_rate": 2.042988080098575e-06,
      "loss": 0.0056,
      "step": 2435160
    },
    {
      "epoch": 3.985225479991883,
      "grad_norm": 0.30106133222579956,
      "learning_rate": 2.0429221878850576e-06,
      "loss": 0.0117,
      "step": 2435180
    },
    {
      "epoch": 3.985258210430536,
      "grad_norm": 0.6236743927001953,
      "learning_rate": 2.0428562956715408e-06,
      "loss": 0.0067,
      "step": 2435200
    },
    {
      "epoch": 3.9852909408691897,
      "grad_norm": 0.8198187947273254,
      "learning_rate": 2.0427904034580235e-06,
      "loss": 0.0105,
      "step": 2435220
    },
    {
      "epoch": 3.985323671307843,
      "grad_norm": 0.3825375735759735,
      "learning_rate": 2.0427245112445062e-06,
      "loss": 0.0141,
      "step": 2435240
    },
    {
      "epoch": 3.985356401746496,
      "grad_norm": 0.32981687784194946,
      "learning_rate": 2.0426586190309894e-06,
      "loss": 0.0094,
      "step": 2435260
    },
    {
      "epoch": 3.9853891321851496,
      "grad_norm": 0.12299850583076477,
      "learning_rate": 2.042592726817472e-06,
      "loss": 0.0165,
      "step": 2435280
    },
    {
      "epoch": 3.985421862623803,
      "grad_norm": 0.36648446321487427,
      "learning_rate": 2.042526834603955e-06,
      "loss": 0.0105,
      "step": 2435300
    },
    {
      "epoch": 3.9854545930624563,
      "grad_norm": 0.1521306335926056,
      "learning_rate": 2.042460942390438e-06,
      "loss": 0.0098,
      "step": 2435320
    },
    {
      "epoch": 3.9854873235011095,
      "grad_norm": 0.11244388669729233,
      "learning_rate": 2.042395050176921e-06,
      "loss": 0.0111,
      "step": 2435340
    },
    {
      "epoch": 3.985520053939763,
      "grad_norm": 0.18510498106479645,
      "learning_rate": 2.0423291579634035e-06,
      "loss": 0.0069,
      "step": 2435360
    },
    {
      "epoch": 3.985552784378416,
      "grad_norm": 0.6001695394515991,
      "learning_rate": 2.0422632657498863e-06,
      "loss": 0.0132,
      "step": 2435380
    },
    {
      "epoch": 3.9855855148170694,
      "grad_norm": 0.18860748410224915,
      "learning_rate": 2.0421973735363694e-06,
      "loss": 0.0091,
      "step": 2435400
    },
    {
      "epoch": 3.985618245255723,
      "grad_norm": 0.14154782891273499,
      "learning_rate": 2.042131481322852e-06,
      "loss": 0.0113,
      "step": 2435420
    },
    {
      "epoch": 3.985650975694376,
      "grad_norm": 0.21495188772678375,
      "learning_rate": 2.0420655891093353e-06,
      "loss": 0.01,
      "step": 2435440
    },
    {
      "epoch": 3.9856837061330297,
      "grad_norm": 1.0150318145751953,
      "learning_rate": 2.041999696895818e-06,
      "loss": 0.0174,
      "step": 2435460
    },
    {
      "epoch": 3.985716436571683,
      "grad_norm": 0.2670067846775055,
      "learning_rate": 2.041933804682301e-06,
      "loss": 0.0111,
      "step": 2435480
    },
    {
      "epoch": 3.9857491670103364,
      "grad_norm": 0.24205146729946136,
      "learning_rate": 2.0418679124687836e-06,
      "loss": 0.0099,
      "step": 2435500
    },
    {
      "epoch": 3.9857818974489896,
      "grad_norm": 0.14096148312091827,
      "learning_rate": 2.0418020202552667e-06,
      "loss": 0.011,
      "step": 2435520
    },
    {
      "epoch": 3.9858146278876427,
      "grad_norm": 0.7097052931785583,
      "learning_rate": 2.0417361280417495e-06,
      "loss": 0.0125,
      "step": 2435540
    },
    {
      "epoch": 3.9858473583262963,
      "grad_norm": 0.463993102312088,
      "learning_rate": 2.041670235828232e-06,
      "loss": 0.0089,
      "step": 2435560
    },
    {
      "epoch": 3.9858800887649495,
      "grad_norm": 0.39382442831993103,
      "learning_rate": 2.041604343614715e-06,
      "loss": 0.0124,
      "step": 2435580
    },
    {
      "epoch": 3.985912819203603,
      "grad_norm": 0.2971354126930237,
      "learning_rate": 2.041538451401198e-06,
      "loss": 0.0146,
      "step": 2435600
    },
    {
      "epoch": 3.9859455496422562,
      "grad_norm": 0.14893347024917603,
      "learning_rate": 2.0414725591876813e-06,
      "loss": 0.0083,
      "step": 2435620
    },
    {
      "epoch": 3.98597828008091,
      "grad_norm": 0.1683737337589264,
      "learning_rate": 2.041406666974164e-06,
      "loss": 0.0121,
      "step": 2435640
    },
    {
      "epoch": 3.986011010519563,
      "grad_norm": 0.17726066708564758,
      "learning_rate": 2.0413407747606467e-06,
      "loss": 0.0095,
      "step": 2435660
    },
    {
      "epoch": 3.986043740958216,
      "grad_norm": 0.09132266789674759,
      "learning_rate": 2.0412748825471295e-06,
      "loss": 0.0086,
      "step": 2435680
    },
    {
      "epoch": 3.9860764713968697,
      "grad_norm": 0.2196246236562729,
      "learning_rate": 2.0412089903336122e-06,
      "loss": 0.0099,
      "step": 2435700
    },
    {
      "epoch": 3.986109201835523,
      "grad_norm": 0.09333875775337219,
      "learning_rate": 2.0411430981200954e-06,
      "loss": 0.0098,
      "step": 2435720
    },
    {
      "epoch": 3.9861419322741765,
      "grad_norm": 0.34879571199417114,
      "learning_rate": 2.041077205906578e-06,
      "loss": 0.0092,
      "step": 2435740
    },
    {
      "epoch": 3.9861746627128296,
      "grad_norm": 0.9041534662246704,
      "learning_rate": 2.041011313693061e-06,
      "loss": 0.0185,
      "step": 2435760
    },
    {
      "epoch": 3.986207393151483,
      "grad_norm": 0.3421861231327057,
      "learning_rate": 2.040945421479544e-06,
      "loss": 0.0111,
      "step": 2435780
    },
    {
      "epoch": 3.9862401235901364,
      "grad_norm": 0.39716851711273193,
      "learning_rate": 2.0408795292660268e-06,
      "loss": 0.0086,
      "step": 2435800
    },
    {
      "epoch": 3.9862728540287895,
      "grad_norm": 0.07215560972690582,
      "learning_rate": 2.04081363705251e-06,
      "loss": 0.0115,
      "step": 2435820
    },
    {
      "epoch": 3.986305584467443,
      "grad_norm": 0.23689961433410645,
      "learning_rate": 2.0407477448389927e-06,
      "loss": 0.011,
      "step": 2435840
    },
    {
      "epoch": 3.9863383149060962,
      "grad_norm": 0.34646081924438477,
      "learning_rate": 2.0406818526254754e-06,
      "loss": 0.0096,
      "step": 2435860
    },
    {
      "epoch": 3.98637104534475,
      "grad_norm": 0.15177005529403687,
      "learning_rate": 2.040615960411958e-06,
      "loss": 0.0088,
      "step": 2435880
    },
    {
      "epoch": 3.986403775783403,
      "grad_norm": 0.43450936675071716,
      "learning_rate": 2.040550068198441e-06,
      "loss": 0.0138,
      "step": 2435900
    },
    {
      "epoch": 3.9864365062220566,
      "grad_norm": 0.36200159788131714,
      "learning_rate": 2.040484175984924e-06,
      "loss": 0.0122,
      "step": 2435920
    },
    {
      "epoch": 3.9864692366607097,
      "grad_norm": 0.4762115776538849,
      "learning_rate": 2.040418283771407e-06,
      "loss": 0.0102,
      "step": 2435940
    },
    {
      "epoch": 3.986501967099363,
      "grad_norm": 0.6914675235748291,
      "learning_rate": 2.04035239155789e-06,
      "loss": 0.011,
      "step": 2435960
    },
    {
      "epoch": 3.9865346975380165,
      "grad_norm": 0.4345954656600952,
      "learning_rate": 2.0402864993443727e-06,
      "loss": 0.0125,
      "step": 2435980
    },
    {
      "epoch": 3.9865674279766696,
      "grad_norm": 0.13107448816299438,
      "learning_rate": 2.0402206071308554e-06,
      "loss": 0.0128,
      "step": 2436000
    },
    {
      "epoch": 3.9866001584153232,
      "grad_norm": 0.35353586077690125,
      "learning_rate": 2.0401547149173386e-06,
      "loss": 0.0083,
      "step": 2436020
    },
    {
      "epoch": 3.9866328888539764,
      "grad_norm": 0.2145978957414627,
      "learning_rate": 2.0400888227038213e-06,
      "loss": 0.0093,
      "step": 2436040
    },
    {
      "epoch": 3.98666561929263,
      "grad_norm": 0.15067830681800842,
      "learning_rate": 2.040022930490304e-06,
      "loss": 0.0106,
      "step": 2436060
    },
    {
      "epoch": 3.986698349731283,
      "grad_norm": 0.06467798352241516,
      "learning_rate": 2.039957038276787e-06,
      "loss": 0.0079,
      "step": 2436080
    },
    {
      "epoch": 3.9867310801699363,
      "grad_norm": 0.25379541516304016,
      "learning_rate": 2.0398911460632696e-06,
      "loss": 0.011,
      "step": 2436100
    },
    {
      "epoch": 3.98676381060859,
      "grad_norm": 0.18661248683929443,
      "learning_rate": 2.0398252538497527e-06,
      "loss": 0.0111,
      "step": 2436120
    },
    {
      "epoch": 3.986796541047243,
      "grad_norm": 0.33467409014701843,
      "learning_rate": 2.039759361636236e-06,
      "loss": 0.0116,
      "step": 2436140
    },
    {
      "epoch": 3.9868292714858966,
      "grad_norm": 0.18212102353572845,
      "learning_rate": 2.0396934694227186e-06,
      "loss": 0.0055,
      "step": 2436160
    },
    {
      "epoch": 3.9868620019245498,
      "grad_norm": 0.7656100392341614,
      "learning_rate": 2.0396275772092014e-06,
      "loss": 0.0126,
      "step": 2436180
    },
    {
      "epoch": 3.9868947323632034,
      "grad_norm": 0.2622532248497009,
      "learning_rate": 2.039561684995684e-06,
      "loss": 0.0122,
      "step": 2436200
    },
    {
      "epoch": 3.9869274628018565,
      "grad_norm": 0.07850285619497299,
      "learning_rate": 2.0394957927821673e-06,
      "loss": 0.0093,
      "step": 2436220
    },
    {
      "epoch": 3.9869601932405097,
      "grad_norm": 0.11885318905115128,
      "learning_rate": 2.03942990056865e-06,
      "loss": 0.0105,
      "step": 2436240
    },
    {
      "epoch": 3.9869929236791632,
      "grad_norm": 0.4713025391101837,
      "learning_rate": 2.0393640083551327e-06,
      "loss": 0.0097,
      "step": 2436260
    },
    {
      "epoch": 3.9870256541178164,
      "grad_norm": 0.4989776611328125,
      "learning_rate": 2.0392981161416155e-06,
      "loss": 0.0107,
      "step": 2436280
    },
    {
      "epoch": 3.98705838455647,
      "grad_norm": 0.1614576280117035,
      "learning_rate": 2.0392322239280987e-06,
      "loss": 0.0066,
      "step": 2436300
    },
    {
      "epoch": 3.987091114995123,
      "grad_norm": 0.30725690722465515,
      "learning_rate": 2.0391663317145814e-06,
      "loss": 0.0141,
      "step": 2436320
    },
    {
      "epoch": 3.9871238454337767,
      "grad_norm": 0.1581905037164688,
      "learning_rate": 2.0391004395010646e-06,
      "loss": 0.0078,
      "step": 2436340
    },
    {
      "epoch": 3.98715657587243,
      "grad_norm": 0.08109467476606369,
      "learning_rate": 2.0390345472875473e-06,
      "loss": 0.007,
      "step": 2436360
    },
    {
      "epoch": 3.987189306311083,
      "grad_norm": 0.3661118745803833,
      "learning_rate": 2.03896865507403e-06,
      "loss": 0.0112,
      "step": 2436380
    },
    {
      "epoch": 3.9872220367497366,
      "grad_norm": 0.20168368518352509,
      "learning_rate": 2.0389027628605128e-06,
      "loss": 0.0078,
      "step": 2436400
    },
    {
      "epoch": 3.98725476718839,
      "grad_norm": 0.17146749794483185,
      "learning_rate": 2.038836870646996e-06,
      "loss": 0.0122,
      "step": 2436420
    },
    {
      "epoch": 3.987287497627043,
      "grad_norm": 0.16740591824054718,
      "learning_rate": 2.0387709784334787e-06,
      "loss": 0.0092,
      "step": 2436440
    },
    {
      "epoch": 3.9873202280656965,
      "grad_norm": 0.4481806755065918,
      "learning_rate": 2.0387050862199614e-06,
      "loss": 0.0127,
      "step": 2436460
    },
    {
      "epoch": 3.98735295850435,
      "grad_norm": 0.2895519733428955,
      "learning_rate": 2.0386391940064446e-06,
      "loss": 0.0086,
      "step": 2436480
    },
    {
      "epoch": 3.9873856889430033,
      "grad_norm": 0.2694528102874756,
      "learning_rate": 2.0385733017929273e-06,
      "loss": 0.0069,
      "step": 2436500
    },
    {
      "epoch": 3.9874184193816564,
      "grad_norm": 0.44138625264167786,
      "learning_rate": 2.03850740957941e-06,
      "loss": 0.0129,
      "step": 2436520
    },
    {
      "epoch": 3.98745114982031,
      "grad_norm": 0.9293754696846008,
      "learning_rate": 2.0384415173658932e-06,
      "loss": 0.0097,
      "step": 2436540
    },
    {
      "epoch": 3.987483880258963,
      "grad_norm": 0.23582686483860016,
      "learning_rate": 2.038375625152376e-06,
      "loss": 0.0092,
      "step": 2436560
    },
    {
      "epoch": 3.9875166106976163,
      "grad_norm": 0.2042117714881897,
      "learning_rate": 2.0383097329388587e-06,
      "loss": 0.0141,
      "step": 2436580
    },
    {
      "epoch": 3.98754934113627,
      "grad_norm": 0.33222851157188416,
      "learning_rate": 2.0382438407253414e-06,
      "loss": 0.0164,
      "step": 2436600
    },
    {
      "epoch": 3.9875820715749235,
      "grad_norm": 0.31790199875831604,
      "learning_rate": 2.0381779485118246e-06,
      "loss": 0.0115,
      "step": 2436620
    },
    {
      "epoch": 3.9876148020135767,
      "grad_norm": 0.2365991622209549,
      "learning_rate": 2.0381120562983073e-06,
      "loss": 0.0072,
      "step": 2436640
    },
    {
      "epoch": 3.98764753245223,
      "grad_norm": 0.12638571858406067,
      "learning_rate": 2.0380461640847905e-06,
      "loss": 0.0082,
      "step": 2436660
    },
    {
      "epoch": 3.9876802628908834,
      "grad_norm": 0.16047407686710358,
      "learning_rate": 2.0379802718712732e-06,
      "loss": 0.0075,
      "step": 2436680
    },
    {
      "epoch": 3.9877129933295365,
      "grad_norm": 0.11114031821489334,
      "learning_rate": 2.037914379657756e-06,
      "loss": 0.0092,
      "step": 2436700
    },
    {
      "epoch": 3.9877457237681897,
      "grad_norm": 0.09110979735851288,
      "learning_rate": 2.0378484874442387e-06,
      "loss": 0.0115,
      "step": 2436720
    },
    {
      "epoch": 3.9877784542068433,
      "grad_norm": 0.0722070038318634,
      "learning_rate": 2.037782595230722e-06,
      "loss": 0.0129,
      "step": 2436740
    },
    {
      "epoch": 3.987811184645497,
      "grad_norm": 0.2437194585800171,
      "learning_rate": 2.0377167030172046e-06,
      "loss": 0.0061,
      "step": 2436760
    },
    {
      "epoch": 3.98784391508415,
      "grad_norm": 0.5940342545509338,
      "learning_rate": 2.0376508108036874e-06,
      "loss": 0.01,
      "step": 2436780
    },
    {
      "epoch": 3.987876645522803,
      "grad_norm": 0.12380902469158173,
      "learning_rate": 2.03758491859017e-06,
      "loss": 0.0138,
      "step": 2436800
    },
    {
      "epoch": 3.987909375961457,
      "grad_norm": 0.07025918364524841,
      "learning_rate": 2.0375190263766533e-06,
      "loss": 0.0082,
      "step": 2436820
    },
    {
      "epoch": 3.98794210640011,
      "grad_norm": 0.21996310353279114,
      "learning_rate": 2.037453134163136e-06,
      "loss": 0.0115,
      "step": 2436840
    },
    {
      "epoch": 3.987974836838763,
      "grad_norm": 0.19660623371601105,
      "learning_rate": 2.037387241949619e-06,
      "loss": 0.0128,
      "step": 2436860
    },
    {
      "epoch": 3.9880075672774167,
      "grad_norm": 0.1158115416765213,
      "learning_rate": 2.037321349736102e-06,
      "loss": 0.0137,
      "step": 2436880
    },
    {
      "epoch": 3.98804029771607,
      "grad_norm": 0.2724062204360962,
      "learning_rate": 2.0372554575225847e-06,
      "loss": 0.0155,
      "step": 2436900
    },
    {
      "epoch": 3.9880730281547234,
      "grad_norm": 0.22064566612243652,
      "learning_rate": 2.0371895653090674e-06,
      "loss": 0.0138,
      "step": 2436920
    },
    {
      "epoch": 3.9881057585933766,
      "grad_norm": 0.3514382243156433,
      "learning_rate": 2.0371236730955506e-06,
      "loss": 0.0153,
      "step": 2436940
    },
    {
      "epoch": 3.98813848903203,
      "grad_norm": 0.1975966840982437,
      "learning_rate": 2.0370577808820333e-06,
      "loss": 0.0124,
      "step": 2436960
    },
    {
      "epoch": 3.9881712194706833,
      "grad_norm": 0.11505355685949326,
      "learning_rate": 2.036991888668516e-06,
      "loss": 0.0166,
      "step": 2436980
    },
    {
      "epoch": 3.9882039499093365,
      "grad_norm": 0.43274614214897156,
      "learning_rate": 2.0369259964549988e-06,
      "loss": 0.007,
      "step": 2437000
    },
    {
      "epoch": 3.98823668034799,
      "grad_norm": 0.49543142318725586,
      "learning_rate": 2.036860104241482e-06,
      "loss": 0.0118,
      "step": 2437020
    },
    {
      "epoch": 3.988269410786643,
      "grad_norm": 0.18138927221298218,
      "learning_rate": 2.036794212027965e-06,
      "loss": 0.0086,
      "step": 2437040
    },
    {
      "epoch": 3.988302141225297,
      "grad_norm": 0.222867950797081,
      "learning_rate": 2.036728319814448e-06,
      "loss": 0.0088,
      "step": 2437060
    },
    {
      "epoch": 3.98833487166395,
      "grad_norm": 0.11467912793159485,
      "learning_rate": 2.0366624276009306e-06,
      "loss": 0.0069,
      "step": 2437080
    },
    {
      "epoch": 3.9883676021026035,
      "grad_norm": 0.27544623613357544,
      "learning_rate": 2.0365965353874133e-06,
      "loss": 0.0131,
      "step": 2437100
    },
    {
      "epoch": 3.9884003325412567,
      "grad_norm": 0.25430336594581604,
      "learning_rate": 2.036530643173896e-06,
      "loss": 0.0116,
      "step": 2437120
    },
    {
      "epoch": 3.98843306297991,
      "grad_norm": 0.17908066511154175,
      "learning_rate": 2.0364647509603792e-06,
      "loss": 0.01,
      "step": 2437140
    },
    {
      "epoch": 3.9884657934185634,
      "grad_norm": 0.08189104497432709,
      "learning_rate": 2.036398858746862e-06,
      "loss": 0.0083,
      "step": 2437160
    },
    {
      "epoch": 3.9884985238572166,
      "grad_norm": 0.26105964183807373,
      "learning_rate": 2.036332966533345e-06,
      "loss": 0.0135,
      "step": 2437180
    },
    {
      "epoch": 3.98853125429587,
      "grad_norm": 0.20294420421123505,
      "learning_rate": 2.036267074319828e-06,
      "loss": 0.0093,
      "step": 2437200
    },
    {
      "epoch": 3.9885639847345233,
      "grad_norm": 0.29916900396347046,
      "learning_rate": 2.0362011821063106e-06,
      "loss": 0.0134,
      "step": 2437220
    },
    {
      "epoch": 3.988596715173177,
      "grad_norm": 0.2070860117673874,
      "learning_rate": 2.0361352898927938e-06,
      "loss": 0.0088,
      "step": 2437240
    },
    {
      "epoch": 3.98862944561183,
      "grad_norm": 0.2842364013195038,
      "learning_rate": 2.0360693976792765e-06,
      "loss": 0.0102,
      "step": 2437260
    },
    {
      "epoch": 3.9886621760504832,
      "grad_norm": 0.1234053447842598,
      "learning_rate": 2.0360035054657593e-06,
      "loss": 0.007,
      "step": 2437280
    },
    {
      "epoch": 3.988694906489137,
      "grad_norm": 0.5121088027954102,
      "learning_rate": 2.035937613252242e-06,
      "loss": 0.0119,
      "step": 2437300
    },
    {
      "epoch": 3.98872763692779,
      "grad_norm": 0.2093723863363266,
      "learning_rate": 2.0358717210387247e-06,
      "loss": 0.0146,
      "step": 2437320
    },
    {
      "epoch": 3.9887603673664436,
      "grad_norm": 0.3624022901058197,
      "learning_rate": 2.035805828825208e-06,
      "loss": 0.0153,
      "step": 2437340
    },
    {
      "epoch": 3.9887930978050967,
      "grad_norm": 0.37834450602531433,
      "learning_rate": 2.035739936611691e-06,
      "loss": 0.011,
      "step": 2437360
    },
    {
      "epoch": 3.9888258282437503,
      "grad_norm": 0.19114412367343903,
      "learning_rate": 2.035674044398174e-06,
      "loss": 0.0116,
      "step": 2437380
    },
    {
      "epoch": 3.9888585586824035,
      "grad_norm": 0.23285682499408722,
      "learning_rate": 2.0356081521846565e-06,
      "loss": 0.0094,
      "step": 2437400
    },
    {
      "epoch": 3.9888912891210566,
      "grad_norm": 0.1566976010799408,
      "learning_rate": 2.0355422599711393e-06,
      "loss": 0.0118,
      "step": 2437420
    },
    {
      "epoch": 3.98892401955971,
      "grad_norm": 0.08403105288743973,
      "learning_rate": 2.0354763677576224e-06,
      "loss": 0.0189,
      "step": 2437440
    },
    {
      "epoch": 3.9889567499983634,
      "grad_norm": 0.6877047419548035,
      "learning_rate": 2.035410475544105e-06,
      "loss": 0.0151,
      "step": 2437460
    },
    {
      "epoch": 3.988989480437017,
      "grad_norm": 0.10068828612565994,
      "learning_rate": 2.035344583330588e-06,
      "loss": 0.0082,
      "step": 2437480
    },
    {
      "epoch": 3.98902221087567,
      "grad_norm": 0.4588870406150818,
      "learning_rate": 2.0352786911170707e-06,
      "loss": 0.0085,
      "step": 2437500
    },
    {
      "epoch": 3.9890549413143237,
      "grad_norm": 0.3298433721065521,
      "learning_rate": 2.0352127989035534e-06,
      "loss": 0.0096,
      "step": 2437520
    },
    {
      "epoch": 3.989087671752977,
      "grad_norm": 0.32346460223197937,
      "learning_rate": 2.0351469066900366e-06,
      "loss": 0.0095,
      "step": 2437540
    },
    {
      "epoch": 3.98912040219163,
      "grad_norm": 0.30626997351646423,
      "learning_rate": 2.0350810144765197e-06,
      "loss": 0.0124,
      "step": 2437560
    },
    {
      "epoch": 3.9891531326302836,
      "grad_norm": 0.31514886021614075,
      "learning_rate": 2.0350151222630025e-06,
      "loss": 0.0061,
      "step": 2437580
    },
    {
      "epoch": 3.9891858630689367,
      "grad_norm": 0.28018370270729065,
      "learning_rate": 2.034949230049485e-06,
      "loss": 0.0071,
      "step": 2437600
    },
    {
      "epoch": 3.9892185935075903,
      "grad_norm": 0.16581210494041443,
      "learning_rate": 2.034883337835968e-06,
      "loss": 0.0138,
      "step": 2437620
    },
    {
      "epoch": 3.9892513239462435,
      "grad_norm": 0.12796904146671295,
      "learning_rate": 2.034817445622451e-06,
      "loss": 0.013,
      "step": 2437640
    },
    {
      "epoch": 3.989284054384897,
      "grad_norm": 0.24949170649051666,
      "learning_rate": 2.034751553408934e-06,
      "loss": 0.0128,
      "step": 2437660
    },
    {
      "epoch": 3.9893167848235502,
      "grad_norm": 0.34828048944473267,
      "learning_rate": 2.0346856611954166e-06,
      "loss": 0.0069,
      "step": 2437680
    },
    {
      "epoch": 3.9893495152622034,
      "grad_norm": 0.3152014911174774,
      "learning_rate": 2.0346197689818993e-06,
      "loss": 0.0087,
      "step": 2437700
    },
    {
      "epoch": 3.989382245700857,
      "grad_norm": 0.41086143255233765,
      "learning_rate": 2.0345538767683825e-06,
      "loss": 0.0113,
      "step": 2437720
    },
    {
      "epoch": 3.98941497613951,
      "grad_norm": 0.24499580264091492,
      "learning_rate": 2.0344879845548652e-06,
      "loss": 0.0116,
      "step": 2437740
    },
    {
      "epoch": 3.9894477065781637,
      "grad_norm": 0.24890972673892975,
      "learning_rate": 2.0344220923413484e-06,
      "loss": 0.0124,
      "step": 2437760
    },
    {
      "epoch": 3.989480437016817,
      "grad_norm": 1.3030914068222046,
      "learning_rate": 2.034356200127831e-06,
      "loss": 0.0086,
      "step": 2437780
    },
    {
      "epoch": 3.9895131674554705,
      "grad_norm": 0.16278599202632904,
      "learning_rate": 2.034290307914314e-06,
      "loss": 0.0082,
      "step": 2437800
    },
    {
      "epoch": 3.9895458978941236,
      "grad_norm": 0.12126391381025314,
      "learning_rate": 2.0342244157007966e-06,
      "loss": 0.0092,
      "step": 2437820
    },
    {
      "epoch": 3.9895786283327768,
      "grad_norm": 0.25354522466659546,
      "learning_rate": 2.0341585234872798e-06,
      "loss": 0.0122,
      "step": 2437840
    },
    {
      "epoch": 3.9896113587714304,
      "grad_norm": 0.21629026532173157,
      "learning_rate": 2.0340926312737625e-06,
      "loss": 0.0076,
      "step": 2437860
    },
    {
      "epoch": 3.9896440892100835,
      "grad_norm": 0.11102083325386047,
      "learning_rate": 2.0340267390602453e-06,
      "loss": 0.01,
      "step": 2437880
    },
    {
      "epoch": 3.9896768196487367,
      "grad_norm": 0.11115575581789017,
      "learning_rate": 2.0339608468467284e-06,
      "loss": 0.0117,
      "step": 2437900
    },
    {
      "epoch": 3.9897095500873903,
      "grad_norm": 0.10628458112478256,
      "learning_rate": 2.033894954633211e-06,
      "loss": 0.0121,
      "step": 2437920
    },
    {
      "epoch": 3.989742280526044,
      "grad_norm": 0.2563031315803528,
      "learning_rate": 2.033829062419694e-06,
      "loss": 0.0115,
      "step": 2437940
    },
    {
      "epoch": 3.989775010964697,
      "grad_norm": 0.06619397550821304,
      "learning_rate": 2.033763170206177e-06,
      "loss": 0.01,
      "step": 2437960
    },
    {
      "epoch": 3.98980774140335,
      "grad_norm": 0.36625245213508606,
      "learning_rate": 2.03369727799266e-06,
      "loss": 0.0083,
      "step": 2437980
    },
    {
      "epoch": 3.9898404718420037,
      "grad_norm": 1.0021477937698364,
      "learning_rate": 2.0336313857791425e-06,
      "loss": 0.0114,
      "step": 2438000
    },
    {
      "epoch": 3.989873202280657,
      "grad_norm": 0.2541784644126892,
      "learning_rate": 2.0335654935656253e-06,
      "loss": 0.0086,
      "step": 2438020
    },
    {
      "epoch": 3.98990593271931,
      "grad_norm": 0.11221306771039963,
      "learning_rate": 2.0334996013521084e-06,
      "loss": 0.0086,
      "step": 2438040
    },
    {
      "epoch": 3.9899386631579636,
      "grad_norm": 0.17503516376018524,
      "learning_rate": 2.033433709138591e-06,
      "loss": 0.0078,
      "step": 2438060
    },
    {
      "epoch": 3.9899713935966172,
      "grad_norm": 0.25099584460258484,
      "learning_rate": 2.0333678169250743e-06,
      "loss": 0.0133,
      "step": 2438080
    },
    {
      "epoch": 3.9900041240352704,
      "grad_norm": 0.05535368248820305,
      "learning_rate": 2.033301924711557e-06,
      "loss": 0.0083,
      "step": 2438100
    },
    {
      "epoch": 3.9900368544739235,
      "grad_norm": 0.08646636456251144,
      "learning_rate": 2.03323603249804e-06,
      "loss": 0.0112,
      "step": 2438120
    },
    {
      "epoch": 3.990069584912577,
      "grad_norm": 0.34657320380210876,
      "learning_rate": 2.0331701402845226e-06,
      "loss": 0.0123,
      "step": 2438140
    },
    {
      "epoch": 3.9901023153512303,
      "grad_norm": 0.2490902841091156,
      "learning_rate": 2.0331042480710057e-06,
      "loss": 0.0121,
      "step": 2438160
    },
    {
      "epoch": 3.9901350457898834,
      "grad_norm": 0.32385799288749695,
      "learning_rate": 2.0330383558574885e-06,
      "loss": 0.0101,
      "step": 2438180
    },
    {
      "epoch": 3.990167776228537,
      "grad_norm": 0.44598588347435,
      "learning_rate": 2.032972463643971e-06,
      "loss": 0.0141,
      "step": 2438200
    },
    {
      "epoch": 3.9902005066671906,
      "grad_norm": 0.0874539315700531,
      "learning_rate": 2.032906571430454e-06,
      "loss": 0.011,
      "step": 2438220
    },
    {
      "epoch": 3.9902332371058438,
      "grad_norm": 0.4455387592315674,
      "learning_rate": 2.032840679216937e-06,
      "loss": 0.0109,
      "step": 2438240
    },
    {
      "epoch": 3.990265967544497,
      "grad_norm": 0.08653873950242996,
      "learning_rate": 2.0327747870034203e-06,
      "loss": 0.0121,
      "step": 2438260
    },
    {
      "epoch": 3.9902986979831505,
      "grad_norm": 0.31690406799316406,
      "learning_rate": 2.032708894789903e-06,
      "loss": 0.0086,
      "step": 2438280
    },
    {
      "epoch": 3.9903314284218037,
      "grad_norm": 0.4063427448272705,
      "learning_rate": 2.0326430025763858e-06,
      "loss": 0.0131,
      "step": 2438300
    },
    {
      "epoch": 3.990364158860457,
      "grad_norm": 0.21615274250507355,
      "learning_rate": 2.0325771103628685e-06,
      "loss": 0.0139,
      "step": 2438320
    },
    {
      "epoch": 3.9903968892991104,
      "grad_norm": 0.18772827088832855,
      "learning_rate": 2.0325112181493512e-06,
      "loss": 0.0104,
      "step": 2438340
    },
    {
      "epoch": 3.990429619737764,
      "grad_norm": 0.535797119140625,
      "learning_rate": 2.0324453259358344e-06,
      "loss": 0.0136,
      "step": 2438360
    },
    {
      "epoch": 3.990462350176417,
      "grad_norm": 0.06809397041797638,
      "learning_rate": 2.032379433722317e-06,
      "loss": 0.0101,
      "step": 2438380
    },
    {
      "epoch": 3.9904950806150703,
      "grad_norm": 0.15114186704158783,
      "learning_rate": 2.0323135415088e-06,
      "loss": 0.0098,
      "step": 2438400
    },
    {
      "epoch": 3.990527811053724,
      "grad_norm": 0.7896395325660706,
      "learning_rate": 2.032247649295283e-06,
      "loss": 0.0114,
      "step": 2438420
    },
    {
      "epoch": 3.990560541492377,
      "grad_norm": 0.1555217057466507,
      "learning_rate": 2.0321817570817658e-06,
      "loss": 0.0109,
      "step": 2438440
    },
    {
      "epoch": 3.99059327193103,
      "grad_norm": 0.2752892076969147,
      "learning_rate": 2.032115864868249e-06,
      "loss": 0.0084,
      "step": 2438460
    },
    {
      "epoch": 3.990626002369684,
      "grad_norm": 0.41410619020462036,
      "learning_rate": 2.0320499726547317e-06,
      "loss": 0.0143,
      "step": 2438480
    },
    {
      "epoch": 3.990658732808337,
      "grad_norm": 0.39497166872024536,
      "learning_rate": 2.0319840804412144e-06,
      "loss": 0.0085,
      "step": 2438500
    },
    {
      "epoch": 3.9906914632469905,
      "grad_norm": 0.48247987031936646,
      "learning_rate": 2.031918188227697e-06,
      "loss": 0.0099,
      "step": 2438520
    },
    {
      "epoch": 3.9907241936856437,
      "grad_norm": 0.46577703952789307,
      "learning_rate": 2.03185229601418e-06,
      "loss": 0.0087,
      "step": 2438540
    },
    {
      "epoch": 3.9907569241242973,
      "grad_norm": 0.06776297092437744,
      "learning_rate": 2.031786403800663e-06,
      "loss": 0.009,
      "step": 2438560
    },
    {
      "epoch": 3.9907896545629504,
      "grad_norm": 0.20783580839633942,
      "learning_rate": 2.031720511587146e-06,
      "loss": 0.0101,
      "step": 2438580
    },
    {
      "epoch": 3.9908223850016036,
      "grad_norm": 0.12622280418872833,
      "learning_rate": 2.031654619373629e-06,
      "loss": 0.0065,
      "step": 2438600
    },
    {
      "epoch": 3.990855115440257,
      "grad_norm": 0.09415731579065323,
      "learning_rate": 2.0315887271601117e-06,
      "loss": 0.0116,
      "step": 2438620
    },
    {
      "epoch": 3.9908878458789103,
      "grad_norm": 0.1173032894730568,
      "learning_rate": 2.0315228349465944e-06,
      "loss": 0.0132,
      "step": 2438640
    },
    {
      "epoch": 3.990920576317564,
      "grad_norm": 0.11061809957027435,
      "learning_rate": 2.0314569427330776e-06,
      "loss": 0.0124,
      "step": 2438660
    },
    {
      "epoch": 3.990953306756217,
      "grad_norm": 0.12281189113855362,
      "learning_rate": 2.0313910505195604e-06,
      "loss": 0.0068,
      "step": 2438680
    },
    {
      "epoch": 3.9909860371948707,
      "grad_norm": 0.25379452109336853,
      "learning_rate": 2.031325158306043e-06,
      "loss": 0.0096,
      "step": 2438700
    },
    {
      "epoch": 3.991018767633524,
      "grad_norm": 0.31948959827423096,
      "learning_rate": 2.031259266092526e-06,
      "loss": 0.0104,
      "step": 2438720
    },
    {
      "epoch": 3.991051498072177,
      "grad_norm": 0.17537425458431244,
      "learning_rate": 2.0311933738790086e-06,
      "loss": 0.01,
      "step": 2438740
    },
    {
      "epoch": 3.9910842285108306,
      "grad_norm": 0.25158894062042236,
      "learning_rate": 2.0311274816654917e-06,
      "loss": 0.0081,
      "step": 2438760
    },
    {
      "epoch": 3.9911169589494837,
      "grad_norm": 0.11319831013679504,
      "learning_rate": 2.031061589451975e-06,
      "loss": 0.015,
      "step": 2438780
    },
    {
      "epoch": 3.9911496893881373,
      "grad_norm": 0.12792114913463593,
      "learning_rate": 2.0309956972384576e-06,
      "loss": 0.0086,
      "step": 2438800
    },
    {
      "epoch": 3.9911824198267905,
      "grad_norm": 0.22045955061912537,
      "learning_rate": 2.0309298050249404e-06,
      "loss": 0.0111,
      "step": 2438820
    },
    {
      "epoch": 3.991215150265444,
      "grad_norm": 0.16764505207538605,
      "learning_rate": 2.030863912811423e-06,
      "loss": 0.0151,
      "step": 2438840
    },
    {
      "epoch": 3.991247880704097,
      "grad_norm": 1.0519695281982422,
      "learning_rate": 2.0307980205979063e-06,
      "loss": 0.007,
      "step": 2438860
    },
    {
      "epoch": 3.9912806111427503,
      "grad_norm": 0.20666275918483734,
      "learning_rate": 2.030732128384389e-06,
      "loss": 0.012,
      "step": 2438880
    },
    {
      "epoch": 3.991313341581404,
      "grad_norm": 0.21947655081748962,
      "learning_rate": 2.0306662361708718e-06,
      "loss": 0.0097,
      "step": 2438900
    },
    {
      "epoch": 3.991346072020057,
      "grad_norm": 0.15870343148708344,
      "learning_rate": 2.0306003439573545e-06,
      "loss": 0.008,
      "step": 2438920
    },
    {
      "epoch": 3.9913788024587107,
      "grad_norm": 0.3029544949531555,
      "learning_rate": 2.0305344517438377e-06,
      "loss": 0.0116,
      "step": 2438940
    },
    {
      "epoch": 3.991411532897364,
      "grad_norm": 0.48174041509628296,
      "learning_rate": 2.0304685595303204e-06,
      "loss": 0.0132,
      "step": 2438960
    },
    {
      "epoch": 3.9914442633360174,
      "grad_norm": 0.25378596782684326,
      "learning_rate": 2.0304026673168036e-06,
      "loss": 0.0059,
      "step": 2438980
    },
    {
      "epoch": 3.9914769937746706,
      "grad_norm": 0.662421703338623,
      "learning_rate": 2.0303367751032863e-06,
      "loss": 0.011,
      "step": 2439000
    },
    {
      "epoch": 3.9915097242133237,
      "grad_norm": 0.5332221984863281,
      "learning_rate": 2.030270882889769e-06,
      "loss": 0.0131,
      "step": 2439020
    },
    {
      "epoch": 3.9915424546519773,
      "grad_norm": 0.8464275598526001,
      "learning_rate": 2.0302049906762518e-06,
      "loss": 0.0124,
      "step": 2439040
    },
    {
      "epoch": 3.9915751850906305,
      "grad_norm": 0.13009199500083923,
      "learning_rate": 2.030139098462735e-06,
      "loss": 0.0077,
      "step": 2439060
    },
    {
      "epoch": 3.991607915529284,
      "grad_norm": 0.16740748286247253,
      "learning_rate": 2.0300732062492177e-06,
      "loss": 0.0068,
      "step": 2439080
    },
    {
      "epoch": 3.991640645967937,
      "grad_norm": 0.10291068255901337,
      "learning_rate": 2.0300073140357004e-06,
      "loss": 0.0107,
      "step": 2439100
    },
    {
      "epoch": 3.991673376406591,
      "grad_norm": 0.25323379039764404,
      "learning_rate": 2.0299414218221836e-06,
      "loss": 0.007,
      "step": 2439120
    },
    {
      "epoch": 3.991706106845244,
      "grad_norm": 0.2054937183856964,
      "learning_rate": 2.0298755296086663e-06,
      "loss": 0.0087,
      "step": 2439140
    },
    {
      "epoch": 3.991738837283897,
      "grad_norm": 0.2921266555786133,
      "learning_rate": 2.029809637395149e-06,
      "loss": 0.0085,
      "step": 2439160
    },
    {
      "epoch": 3.9917715677225507,
      "grad_norm": 0.30620232224464417,
      "learning_rate": 2.0297437451816322e-06,
      "loss": 0.0133,
      "step": 2439180
    },
    {
      "epoch": 3.991804298161204,
      "grad_norm": 0.19923323392868042,
      "learning_rate": 2.029677852968115e-06,
      "loss": 0.0084,
      "step": 2439200
    },
    {
      "epoch": 3.9918370285998575,
      "grad_norm": 0.20911681652069092,
      "learning_rate": 2.0296119607545977e-06,
      "loss": 0.0119,
      "step": 2439220
    },
    {
      "epoch": 3.9918697590385106,
      "grad_norm": 0.23637478053569794,
      "learning_rate": 2.0295460685410805e-06,
      "loss": 0.0144,
      "step": 2439240
    },
    {
      "epoch": 3.991902489477164,
      "grad_norm": 0.24096345901489258,
      "learning_rate": 2.0294801763275636e-06,
      "loss": 0.0103,
      "step": 2439260
    },
    {
      "epoch": 3.9919352199158173,
      "grad_norm": 0.23933306336402893,
      "learning_rate": 2.0294142841140464e-06,
      "loss": 0.0086,
      "step": 2439280
    },
    {
      "epoch": 3.9919679503544705,
      "grad_norm": 0.34167730808258057,
      "learning_rate": 2.0293483919005295e-06,
      "loss": 0.0117,
      "step": 2439300
    },
    {
      "epoch": 3.992000680793124,
      "grad_norm": 0.21145373582839966,
      "learning_rate": 2.0292824996870123e-06,
      "loss": 0.0087,
      "step": 2439320
    },
    {
      "epoch": 3.9920334112317772,
      "grad_norm": 0.1403009444475174,
      "learning_rate": 2.029216607473495e-06,
      "loss": 0.0117,
      "step": 2439340
    },
    {
      "epoch": 3.9920661416704304,
      "grad_norm": 0.2578454613685608,
      "learning_rate": 2.0291507152599777e-06,
      "loss": 0.0111,
      "step": 2439360
    },
    {
      "epoch": 3.992098872109084,
      "grad_norm": 0.10788608342409134,
      "learning_rate": 2.029084823046461e-06,
      "loss": 0.011,
      "step": 2439380
    },
    {
      "epoch": 3.9921316025477376,
      "grad_norm": 0.18969234824180603,
      "learning_rate": 2.0290189308329436e-06,
      "loss": 0.0094,
      "step": 2439400
    },
    {
      "epoch": 3.9921643329863907,
      "grad_norm": 0.09467963129281998,
      "learning_rate": 2.0289530386194264e-06,
      "loss": 0.0089,
      "step": 2439420
    },
    {
      "epoch": 3.992197063425044,
      "grad_norm": 0.15491120517253876,
      "learning_rate": 2.028887146405909e-06,
      "loss": 0.0173,
      "step": 2439440
    },
    {
      "epoch": 3.9922297938636975,
      "grad_norm": 0.10142336040735245,
      "learning_rate": 2.0288212541923923e-06,
      "loss": 0.0109,
      "step": 2439460
    },
    {
      "epoch": 3.9922625243023506,
      "grad_norm": 0.3607586920261383,
      "learning_rate": 2.0287553619788754e-06,
      "loss": 0.0137,
      "step": 2439480
    },
    {
      "epoch": 3.9922952547410038,
      "grad_norm": 0.422893226146698,
      "learning_rate": 2.028689469765358e-06,
      "loss": 0.0111,
      "step": 2439500
    },
    {
      "epoch": 3.9923279851796574,
      "grad_norm": 0.2553673982620239,
      "learning_rate": 2.028623577551841e-06,
      "loss": 0.0068,
      "step": 2439520
    },
    {
      "epoch": 3.992360715618311,
      "grad_norm": 0.46525558829307556,
      "learning_rate": 2.0285576853383237e-06,
      "loss": 0.0091,
      "step": 2439540
    },
    {
      "epoch": 3.992393446056964,
      "grad_norm": 0.4205866754055023,
      "learning_rate": 2.0284917931248064e-06,
      "loss": 0.0111,
      "step": 2439560
    },
    {
      "epoch": 3.9924261764956173,
      "grad_norm": 0.2583548426628113,
      "learning_rate": 2.0284259009112896e-06,
      "loss": 0.0094,
      "step": 2439580
    },
    {
      "epoch": 3.992458906934271,
      "grad_norm": 0.06483812630176544,
      "learning_rate": 2.0283600086977723e-06,
      "loss": 0.009,
      "step": 2439600
    },
    {
      "epoch": 3.992491637372924,
      "grad_norm": 0.08569316565990448,
      "learning_rate": 2.028294116484255e-06,
      "loss": 0.0127,
      "step": 2439620
    },
    {
      "epoch": 3.992524367811577,
      "grad_norm": 0.0963042676448822,
      "learning_rate": 2.028228224270738e-06,
      "loss": 0.0091,
      "step": 2439640
    },
    {
      "epoch": 3.9925570982502308,
      "grad_norm": 0.2521837055683136,
      "learning_rate": 2.028162332057221e-06,
      "loss": 0.0146,
      "step": 2439660
    },
    {
      "epoch": 3.9925898286888843,
      "grad_norm": 0.07900980859994888,
      "learning_rate": 2.028096439843704e-06,
      "loss": 0.0135,
      "step": 2439680
    },
    {
      "epoch": 3.9926225591275375,
      "grad_norm": 0.2847840189933777,
      "learning_rate": 2.028030547630187e-06,
      "loss": 0.0128,
      "step": 2439700
    },
    {
      "epoch": 3.9926552895661906,
      "grad_norm": 0.5103359222412109,
      "learning_rate": 2.0279646554166696e-06,
      "loss": 0.0091,
      "step": 2439720
    },
    {
      "epoch": 3.9926880200048442,
      "grad_norm": 0.4891694486141205,
      "learning_rate": 2.0278987632031523e-06,
      "loss": 0.0126,
      "step": 2439740
    },
    {
      "epoch": 3.9927207504434974,
      "grad_norm": 0.15742962062358856,
      "learning_rate": 2.027832870989635e-06,
      "loss": 0.0113,
      "step": 2439760
    },
    {
      "epoch": 3.9927534808821505,
      "grad_norm": 0.1620253026485443,
      "learning_rate": 2.0277669787761182e-06,
      "loss": 0.0083,
      "step": 2439780
    },
    {
      "epoch": 3.992786211320804,
      "grad_norm": 0.38832131028175354,
      "learning_rate": 2.027701086562601e-06,
      "loss": 0.0094,
      "step": 2439800
    },
    {
      "epoch": 3.9928189417594577,
      "grad_norm": 0.24635319411754608,
      "learning_rate": 2.027635194349084e-06,
      "loss": 0.0107,
      "step": 2439820
    },
    {
      "epoch": 3.992851672198111,
      "grad_norm": 0.17502930760383606,
      "learning_rate": 2.027569302135567e-06,
      "loss": 0.0108,
      "step": 2439840
    },
    {
      "epoch": 3.992884402636764,
      "grad_norm": 0.8410230278968811,
      "learning_rate": 2.0275034099220496e-06,
      "loss": 0.0088,
      "step": 2439860
    },
    {
      "epoch": 3.9929171330754176,
      "grad_norm": 0.0663428008556366,
      "learning_rate": 2.0274375177085328e-06,
      "loss": 0.0096,
      "step": 2439880
    },
    {
      "epoch": 3.9929498635140708,
      "grad_norm": 0.23646263778209686,
      "learning_rate": 2.0273716254950155e-06,
      "loss": 0.0104,
      "step": 2439900
    },
    {
      "epoch": 3.992982593952724,
      "grad_norm": 0.26877525448799133,
      "learning_rate": 2.0273057332814983e-06,
      "loss": 0.0085,
      "step": 2439920
    },
    {
      "epoch": 3.9930153243913775,
      "grad_norm": 0.1861714869737625,
      "learning_rate": 2.027239841067981e-06,
      "loss": 0.0078,
      "step": 2439940
    },
    {
      "epoch": 3.9930480548300307,
      "grad_norm": 0.09087586402893066,
      "learning_rate": 2.0271739488544637e-06,
      "loss": 0.0098,
      "step": 2439960
    },
    {
      "epoch": 3.9930807852686843,
      "grad_norm": 0.645720362663269,
      "learning_rate": 2.027108056640947e-06,
      "loss": 0.0137,
      "step": 2439980
    },
    {
      "epoch": 3.9931135157073374,
      "grad_norm": 0.166725292801857,
      "learning_rate": 2.02704216442743e-06,
      "loss": 0.0087,
      "step": 2440000
    },
    {
      "epoch": 3.993146246145991,
      "grad_norm": 0.1070922315120697,
      "learning_rate": 2.026976272213913e-06,
      "loss": 0.0076,
      "step": 2440020
    },
    {
      "epoch": 3.993178976584644,
      "grad_norm": 0.15304550528526306,
      "learning_rate": 2.0269103800003955e-06,
      "loss": 0.0137,
      "step": 2440040
    },
    {
      "epoch": 3.9932117070232973,
      "grad_norm": 0.2437462955713272,
      "learning_rate": 2.0268444877868783e-06,
      "loss": 0.0109,
      "step": 2440060
    },
    {
      "epoch": 3.993244437461951,
      "grad_norm": 0.12711279094219208,
      "learning_rate": 2.0267785955733615e-06,
      "loss": 0.0091,
      "step": 2440080
    },
    {
      "epoch": 3.993277167900604,
      "grad_norm": 0.24437330663204193,
      "learning_rate": 2.026712703359844e-06,
      "loss": 0.0084,
      "step": 2440100
    },
    {
      "epoch": 3.9933098983392576,
      "grad_norm": 1.7315770387649536,
      "learning_rate": 2.026646811146327e-06,
      "loss": 0.0086,
      "step": 2440120
    },
    {
      "epoch": 3.993342628777911,
      "grad_norm": 0.29100367426872253,
      "learning_rate": 2.0265809189328097e-06,
      "loss": 0.0063,
      "step": 2440140
    },
    {
      "epoch": 3.9933753592165644,
      "grad_norm": 0.16802240908145905,
      "learning_rate": 2.0265150267192924e-06,
      "loss": 0.0132,
      "step": 2440160
    },
    {
      "epoch": 3.9934080896552175,
      "grad_norm": 0.030736226588487625,
      "learning_rate": 2.0264491345057756e-06,
      "loss": 0.0102,
      "step": 2440180
    },
    {
      "epoch": 3.9934408200938707,
      "grad_norm": 0.20688556134700775,
      "learning_rate": 2.0263832422922587e-06,
      "loss": 0.0085,
      "step": 2440200
    },
    {
      "epoch": 3.9934735505325243,
      "grad_norm": 0.16936366260051727,
      "learning_rate": 2.0263173500787415e-06,
      "loss": 0.0096,
      "step": 2440220
    },
    {
      "epoch": 3.9935062809711774,
      "grad_norm": 0.19593437016010284,
      "learning_rate": 2.0262514578652242e-06,
      "loss": 0.0081,
      "step": 2440240
    },
    {
      "epoch": 3.993539011409831,
      "grad_norm": 0.13914653658866882,
      "learning_rate": 2.026185565651707e-06,
      "loss": 0.0089,
      "step": 2440260
    },
    {
      "epoch": 3.993571741848484,
      "grad_norm": 0.34722691774368286,
      "learning_rate": 2.02611967343819e-06,
      "loss": 0.0087,
      "step": 2440280
    },
    {
      "epoch": 3.9936044722871378,
      "grad_norm": 0.31892532110214233,
      "learning_rate": 2.026053781224673e-06,
      "loss": 0.0145,
      "step": 2440300
    },
    {
      "epoch": 3.993637202725791,
      "grad_norm": 0.08883222192525864,
      "learning_rate": 2.0259878890111556e-06,
      "loss": 0.0097,
      "step": 2440320
    },
    {
      "epoch": 3.993669933164444,
      "grad_norm": 0.04128836840391159,
      "learning_rate": 2.0259219967976383e-06,
      "loss": 0.0083,
      "step": 2440340
    },
    {
      "epoch": 3.9937026636030977,
      "grad_norm": 0.1378447413444519,
      "learning_rate": 2.0258561045841215e-06,
      "loss": 0.0083,
      "step": 2440360
    },
    {
      "epoch": 3.993735394041751,
      "grad_norm": 0.30455100536346436,
      "learning_rate": 2.0257902123706042e-06,
      "loss": 0.0136,
      "step": 2440380
    },
    {
      "epoch": 3.9937681244804044,
      "grad_norm": 0.42794686555862427,
      "learning_rate": 2.0257243201570874e-06,
      "loss": 0.009,
      "step": 2440400
    },
    {
      "epoch": 3.9938008549190576,
      "grad_norm": 0.5779326558113098,
      "learning_rate": 2.02565842794357e-06,
      "loss": 0.0114,
      "step": 2440420
    },
    {
      "epoch": 3.993833585357711,
      "grad_norm": 0.6859046816825867,
      "learning_rate": 2.025592535730053e-06,
      "loss": 0.0136,
      "step": 2440440
    },
    {
      "epoch": 3.9938663157963643,
      "grad_norm": 0.04278275743126869,
      "learning_rate": 2.0255266435165356e-06,
      "loss": 0.0103,
      "step": 2440460
    },
    {
      "epoch": 3.9938990462350175,
      "grad_norm": 0.1732369363307953,
      "learning_rate": 2.025460751303019e-06,
      "loss": 0.0091,
      "step": 2440480
    },
    {
      "epoch": 3.993931776673671,
      "grad_norm": 0.4408474862575531,
      "learning_rate": 2.0253948590895015e-06,
      "loss": 0.0073,
      "step": 2440500
    },
    {
      "epoch": 3.993964507112324,
      "grad_norm": 0.14042600989341736,
      "learning_rate": 2.0253289668759843e-06,
      "loss": 0.0118,
      "step": 2440520
    },
    {
      "epoch": 3.993997237550978,
      "grad_norm": 0.08407728374004364,
      "learning_rate": 2.0252630746624674e-06,
      "loss": 0.0079,
      "step": 2440540
    },
    {
      "epoch": 3.994029967989631,
      "grad_norm": 0.1327839493751526,
      "learning_rate": 2.02519718244895e-06,
      "loss": 0.0099,
      "step": 2440560
    },
    {
      "epoch": 3.9940626984282845,
      "grad_norm": 0.21795853972434998,
      "learning_rate": 2.025131290235433e-06,
      "loss": 0.0122,
      "step": 2440580
    },
    {
      "epoch": 3.9940954288669377,
      "grad_norm": 0.14876970648765564,
      "learning_rate": 2.025065398021916e-06,
      "loss": 0.0103,
      "step": 2440600
    },
    {
      "epoch": 3.994128159305591,
      "grad_norm": 0.19841545820236206,
      "learning_rate": 2.024999505808399e-06,
      "loss": 0.0084,
      "step": 2440620
    },
    {
      "epoch": 3.9941608897442444,
      "grad_norm": 0.17285266518592834,
      "learning_rate": 2.0249336135948816e-06,
      "loss": 0.0133,
      "step": 2440640
    },
    {
      "epoch": 3.9941936201828976,
      "grad_norm": 0.43353143334388733,
      "learning_rate": 2.0248677213813643e-06,
      "loss": 0.0114,
      "step": 2440660
    },
    {
      "epoch": 3.994226350621551,
      "grad_norm": 0.08322270214557648,
      "learning_rate": 2.0248018291678475e-06,
      "loss": 0.0086,
      "step": 2440680
    },
    {
      "epoch": 3.9942590810602043,
      "grad_norm": 0.19777022302150726,
      "learning_rate": 2.02473593695433e-06,
      "loss": 0.0104,
      "step": 2440700
    },
    {
      "epoch": 3.994291811498858,
      "grad_norm": 0.25420936942100525,
      "learning_rate": 2.0246700447408134e-06,
      "loss": 0.0162,
      "step": 2440720
    },
    {
      "epoch": 3.994324541937511,
      "grad_norm": 0.38656774163246155,
      "learning_rate": 2.024604152527296e-06,
      "loss": 0.0164,
      "step": 2440740
    },
    {
      "epoch": 3.9943572723761642,
      "grad_norm": 0.41174718737602234,
      "learning_rate": 2.024538260313779e-06,
      "loss": 0.0156,
      "step": 2440760
    },
    {
      "epoch": 3.994390002814818,
      "grad_norm": 0.18556594848632812,
      "learning_rate": 2.0244723681002616e-06,
      "loss": 0.0075,
      "step": 2440780
    },
    {
      "epoch": 3.994422733253471,
      "grad_norm": 0.233199343085289,
      "learning_rate": 2.0244064758867447e-06,
      "loss": 0.0093,
      "step": 2440800
    },
    {
      "epoch": 3.9944554636921246,
      "grad_norm": 0.34342247247695923,
      "learning_rate": 2.0243405836732275e-06,
      "loss": 0.0075,
      "step": 2440820
    },
    {
      "epoch": 3.9944881941307777,
      "grad_norm": 0.1890273541212082,
      "learning_rate": 2.0242746914597102e-06,
      "loss": 0.0132,
      "step": 2440840
    },
    {
      "epoch": 3.9945209245694313,
      "grad_norm": 0.13917987048625946,
      "learning_rate": 2.024208799246193e-06,
      "loss": 0.0124,
      "step": 2440860
    },
    {
      "epoch": 3.9945536550080845,
      "grad_norm": 0.12564809620380402,
      "learning_rate": 2.024142907032676e-06,
      "loss": 0.0072,
      "step": 2440880
    },
    {
      "epoch": 3.9945863854467376,
      "grad_norm": 0.09098893404006958,
      "learning_rate": 2.0240770148191593e-06,
      "loss": 0.0091,
      "step": 2440900
    },
    {
      "epoch": 3.994619115885391,
      "grad_norm": 0.8310502171516418,
      "learning_rate": 2.024011122605642e-06,
      "loss": 0.0064,
      "step": 2440920
    },
    {
      "epoch": 3.9946518463240444,
      "grad_norm": 0.27718573808670044,
      "learning_rate": 2.0239452303921248e-06,
      "loss": 0.0094,
      "step": 2440940
    },
    {
      "epoch": 3.9946845767626975,
      "grad_norm": 0.21771731972694397,
      "learning_rate": 2.0238793381786075e-06,
      "loss": 0.015,
      "step": 2440960
    },
    {
      "epoch": 3.994717307201351,
      "grad_norm": 0.6472374200820923,
      "learning_rate": 2.0238134459650902e-06,
      "loss": 0.008,
      "step": 2440980
    },
    {
      "epoch": 3.9947500376400047,
      "grad_norm": 0.1675766408443451,
      "learning_rate": 2.0237475537515734e-06,
      "loss": 0.0087,
      "step": 2441000
    },
    {
      "epoch": 3.994782768078658,
      "grad_norm": 0.13001538813114166,
      "learning_rate": 2.023681661538056e-06,
      "loss": 0.0084,
      "step": 2441020
    },
    {
      "epoch": 3.994815498517311,
      "grad_norm": 0.1821983903646469,
      "learning_rate": 2.023615769324539e-06,
      "loss": 0.0093,
      "step": 2441040
    },
    {
      "epoch": 3.9948482289559646,
      "grad_norm": 0.6205982565879822,
      "learning_rate": 2.023549877111022e-06,
      "loss": 0.0098,
      "step": 2441060
    },
    {
      "epoch": 3.9948809593946177,
      "grad_norm": 0.10131893306970596,
      "learning_rate": 2.023483984897505e-06,
      "loss": 0.0104,
      "step": 2441080
    },
    {
      "epoch": 3.994913689833271,
      "grad_norm": 0.13975659012794495,
      "learning_rate": 2.023418092683988e-06,
      "loss": 0.0119,
      "step": 2441100
    },
    {
      "epoch": 3.9949464202719245,
      "grad_norm": 0.41462740302085876,
      "learning_rate": 2.0233522004704707e-06,
      "loss": 0.0147,
      "step": 2441120
    },
    {
      "epoch": 3.994979150710578,
      "grad_norm": 0.6064678430557251,
      "learning_rate": 2.0232863082569534e-06,
      "loss": 0.0174,
      "step": 2441140
    },
    {
      "epoch": 3.9950118811492312,
      "grad_norm": 0.10191735625267029,
      "learning_rate": 2.023220416043436e-06,
      "loss": 0.0094,
      "step": 2441160
    },
    {
      "epoch": 3.9950446115878844,
      "grad_norm": 0.24462981522083282,
      "learning_rate": 2.023154523829919e-06,
      "loss": 0.0115,
      "step": 2441180
    },
    {
      "epoch": 3.995077342026538,
      "grad_norm": 0.1530115306377411,
      "learning_rate": 2.023088631616402e-06,
      "loss": 0.01,
      "step": 2441200
    },
    {
      "epoch": 3.995110072465191,
      "grad_norm": 0.2599707245826721,
      "learning_rate": 2.023022739402885e-06,
      "loss": 0.0092,
      "step": 2441220
    },
    {
      "epoch": 3.9951428029038443,
      "grad_norm": 0.08130547404289246,
      "learning_rate": 2.022956847189368e-06,
      "loss": 0.0074,
      "step": 2441240
    },
    {
      "epoch": 3.995175533342498,
      "grad_norm": 0.39441466331481934,
      "learning_rate": 2.0228909549758507e-06,
      "loss": 0.0146,
      "step": 2441260
    },
    {
      "epoch": 3.9952082637811515,
      "grad_norm": 0.17725832760334015,
      "learning_rate": 2.0228250627623335e-06,
      "loss": 0.0196,
      "step": 2441280
    },
    {
      "epoch": 3.9952409942198046,
      "grad_norm": 0.1296449452638626,
      "learning_rate": 2.0227591705488166e-06,
      "loss": 0.012,
      "step": 2441300
    },
    {
      "epoch": 3.9952737246584578,
      "grad_norm": 0.36271530389785767,
      "learning_rate": 2.0226932783352994e-06,
      "loss": 0.0119,
      "step": 2441320
    },
    {
      "epoch": 3.9953064550971114,
      "grad_norm": 0.3441864848136902,
      "learning_rate": 2.022627386121782e-06,
      "loss": 0.0101,
      "step": 2441340
    },
    {
      "epoch": 3.9953391855357645,
      "grad_norm": 0.150899738073349,
      "learning_rate": 2.022561493908265e-06,
      "loss": 0.0098,
      "step": 2441360
    },
    {
      "epoch": 3.9953719159744177,
      "grad_norm": 0.1366633027791977,
      "learning_rate": 2.0224956016947476e-06,
      "loss": 0.0134,
      "step": 2441380
    },
    {
      "epoch": 3.9954046464130712,
      "grad_norm": 0.25390008091926575,
      "learning_rate": 2.0224297094812307e-06,
      "loss": 0.0133,
      "step": 2441400
    },
    {
      "epoch": 3.995437376851725,
      "grad_norm": 0.08370647579431534,
      "learning_rate": 2.022363817267714e-06,
      "loss": 0.0084,
      "step": 2441420
    },
    {
      "epoch": 3.995470107290378,
      "grad_norm": 0.1937716007232666,
      "learning_rate": 2.0222979250541966e-06,
      "loss": 0.0071,
      "step": 2441440
    },
    {
      "epoch": 3.995502837729031,
      "grad_norm": 0.1413583606481552,
      "learning_rate": 2.0222320328406794e-06,
      "loss": 0.0151,
      "step": 2441460
    },
    {
      "epoch": 3.9955355681676847,
      "grad_norm": 0.28251388669013977,
      "learning_rate": 2.022166140627162e-06,
      "loss": 0.0123,
      "step": 2441480
    },
    {
      "epoch": 3.995568298606338,
      "grad_norm": 0.32619237899780273,
      "learning_rate": 2.0221002484136453e-06,
      "loss": 0.0091,
      "step": 2441500
    },
    {
      "epoch": 3.995601029044991,
      "grad_norm": 0.0460171177983284,
      "learning_rate": 2.022034356200128e-06,
      "loss": 0.0091,
      "step": 2441520
    },
    {
      "epoch": 3.9956337594836446,
      "grad_norm": 0.3351763188838959,
      "learning_rate": 2.0219684639866108e-06,
      "loss": 0.0163,
      "step": 2441540
    },
    {
      "epoch": 3.995666489922298,
      "grad_norm": 0.1854024976491928,
      "learning_rate": 2.0219025717730935e-06,
      "loss": 0.0136,
      "step": 2441560
    },
    {
      "epoch": 3.9956992203609514,
      "grad_norm": 0.21863044798374176,
      "learning_rate": 2.0218366795595767e-06,
      "loss": 0.0099,
      "step": 2441580
    },
    {
      "epoch": 3.9957319507996045,
      "grad_norm": 0.11661090701818466,
      "learning_rate": 2.0217707873460594e-06,
      "loss": 0.0118,
      "step": 2441600
    },
    {
      "epoch": 3.995764681238258,
      "grad_norm": 0.21447154879570007,
      "learning_rate": 2.0217048951325426e-06,
      "loss": 0.0079,
      "step": 2441620
    },
    {
      "epoch": 3.9957974116769113,
      "grad_norm": 0.21764999628067017,
      "learning_rate": 2.0216390029190253e-06,
      "loss": 0.0158,
      "step": 2441640
    },
    {
      "epoch": 3.9958301421155644,
      "grad_norm": 0.6058312058448792,
      "learning_rate": 2.021573110705508e-06,
      "loss": 0.0089,
      "step": 2441660
    },
    {
      "epoch": 3.995862872554218,
      "grad_norm": 0.1569364070892334,
      "learning_rate": 2.021507218491991e-06,
      "loss": 0.008,
      "step": 2441680
    },
    {
      "epoch": 3.995895602992871,
      "grad_norm": 0.12225475162267685,
      "learning_rate": 2.021441326278474e-06,
      "loss": 0.0108,
      "step": 2441700
    },
    {
      "epoch": 3.9959283334315248,
      "grad_norm": 0.7444326877593994,
      "learning_rate": 2.0213754340649567e-06,
      "loss": 0.0143,
      "step": 2441720
    },
    {
      "epoch": 3.995961063870178,
      "grad_norm": 0.5517690181732178,
      "learning_rate": 2.0213095418514394e-06,
      "loss": 0.0069,
      "step": 2441740
    },
    {
      "epoch": 3.9959937943088315,
      "grad_norm": 0.15288253128528595,
      "learning_rate": 2.0212436496379226e-06,
      "loss": 0.007,
      "step": 2441760
    },
    {
      "epoch": 3.9960265247474847,
      "grad_norm": 0.5669128894805908,
      "learning_rate": 2.0211777574244053e-06,
      "loss": 0.0151,
      "step": 2441780
    },
    {
      "epoch": 3.996059255186138,
      "grad_norm": 0.17619472742080688,
      "learning_rate": 2.021111865210888e-06,
      "loss": 0.0076,
      "step": 2441800
    },
    {
      "epoch": 3.9960919856247914,
      "grad_norm": 0.18570677936077118,
      "learning_rate": 2.0210459729973712e-06,
      "loss": 0.0143,
      "step": 2441820
    },
    {
      "epoch": 3.9961247160634445,
      "grad_norm": 0.7478382587432861,
      "learning_rate": 2.020980080783854e-06,
      "loss": 0.0101,
      "step": 2441840
    },
    {
      "epoch": 3.996157446502098,
      "grad_norm": 0.2124798446893692,
      "learning_rate": 2.0209141885703367e-06,
      "loss": 0.0088,
      "step": 2441860
    },
    {
      "epoch": 3.9961901769407513,
      "grad_norm": 0.3305036425590515,
      "learning_rate": 2.0208482963568195e-06,
      "loss": 0.0077,
      "step": 2441880
    },
    {
      "epoch": 3.996222907379405,
      "grad_norm": 0.27584308385849,
      "learning_rate": 2.0207824041433026e-06,
      "loss": 0.008,
      "step": 2441900
    },
    {
      "epoch": 3.996255637818058,
      "grad_norm": 0.22541090846061707,
      "learning_rate": 2.0207165119297854e-06,
      "loss": 0.0079,
      "step": 2441920
    },
    {
      "epoch": 3.996288368256711,
      "grad_norm": 0.16079401969909668,
      "learning_rate": 2.0206506197162685e-06,
      "loss": 0.0113,
      "step": 2441940
    },
    {
      "epoch": 3.996321098695365,
      "grad_norm": 0.3312261998653412,
      "learning_rate": 2.0205847275027513e-06,
      "loss": 0.0073,
      "step": 2441960
    },
    {
      "epoch": 3.996353829134018,
      "grad_norm": 0.590711236000061,
      "learning_rate": 2.020518835289234e-06,
      "loss": 0.0082,
      "step": 2441980
    },
    {
      "epoch": 3.9963865595726715,
      "grad_norm": 0.33194372057914734,
      "learning_rate": 2.0204529430757168e-06,
      "loss": 0.0111,
      "step": 2442000
    },
    {
      "epoch": 3.9964192900113247,
      "grad_norm": 1.3761178255081177,
      "learning_rate": 2.0203870508622e-06,
      "loss": 0.0104,
      "step": 2442020
    },
    {
      "epoch": 3.9964520204499783,
      "grad_norm": 0.4527873694896698,
      "learning_rate": 2.0203211586486827e-06,
      "loss": 0.0069,
      "step": 2442040
    },
    {
      "epoch": 3.9964847508886314,
      "grad_norm": 0.19222193956375122,
      "learning_rate": 2.0202552664351654e-06,
      "loss": 0.0133,
      "step": 2442060
    },
    {
      "epoch": 3.9965174813272846,
      "grad_norm": 0.31844082474708557,
      "learning_rate": 2.020189374221648e-06,
      "loss": 0.0085,
      "step": 2442080
    },
    {
      "epoch": 3.996550211765938,
      "grad_norm": 0.10834746062755585,
      "learning_rate": 2.0201234820081313e-06,
      "loss": 0.0143,
      "step": 2442100
    },
    {
      "epoch": 3.9965829422045913,
      "grad_norm": 0.2123177945613861,
      "learning_rate": 2.0200575897946145e-06,
      "loss": 0.0062,
      "step": 2442120
    },
    {
      "epoch": 3.996615672643245,
      "grad_norm": 0.15756650269031525,
      "learning_rate": 2.019991697581097e-06,
      "loss": 0.01,
      "step": 2442140
    },
    {
      "epoch": 3.996648403081898,
      "grad_norm": 0.46009838581085205,
      "learning_rate": 2.01992580536758e-06,
      "loss": 0.0134,
      "step": 2442160
    },
    {
      "epoch": 3.9966811335205517,
      "grad_norm": 0.3462590277194977,
      "learning_rate": 2.0198599131540627e-06,
      "loss": 0.0135,
      "step": 2442180
    },
    {
      "epoch": 3.996713863959205,
      "grad_norm": 0.17746365070343018,
      "learning_rate": 2.0197940209405454e-06,
      "loss": 0.0144,
      "step": 2442200
    },
    {
      "epoch": 3.996746594397858,
      "grad_norm": 0.5599973201751709,
      "learning_rate": 2.0197281287270286e-06,
      "loss": 0.0085,
      "step": 2442220
    },
    {
      "epoch": 3.9967793248365115,
      "grad_norm": 0.4030069410800934,
      "learning_rate": 2.0196622365135113e-06,
      "loss": 0.0132,
      "step": 2442240
    },
    {
      "epoch": 3.9968120552751647,
      "grad_norm": 0.25973060727119446,
      "learning_rate": 2.019596344299994e-06,
      "loss": 0.0108,
      "step": 2442260
    },
    {
      "epoch": 3.9968447857138183,
      "grad_norm": 0.20190343260765076,
      "learning_rate": 2.0195304520864772e-06,
      "loss": 0.0102,
      "step": 2442280
    },
    {
      "epoch": 3.9968775161524714,
      "grad_norm": 0.15800678730010986,
      "learning_rate": 2.01946455987296e-06,
      "loss": 0.0095,
      "step": 2442300
    },
    {
      "epoch": 3.996910246591125,
      "grad_norm": 0.035435501486063004,
      "learning_rate": 2.019398667659443e-06,
      "loss": 0.0083,
      "step": 2442320
    },
    {
      "epoch": 3.996942977029778,
      "grad_norm": 0.2630592882633209,
      "learning_rate": 2.019332775445926e-06,
      "loss": 0.0138,
      "step": 2442340
    },
    {
      "epoch": 3.9969757074684313,
      "grad_norm": 0.2937852442264557,
      "learning_rate": 2.0192668832324086e-06,
      "loss": 0.0104,
      "step": 2442360
    },
    {
      "epoch": 3.997008437907085,
      "grad_norm": 0.23196718096733093,
      "learning_rate": 2.0192009910188913e-06,
      "loss": 0.0085,
      "step": 2442380
    },
    {
      "epoch": 3.997041168345738,
      "grad_norm": 0.458905428647995,
      "learning_rate": 2.019135098805374e-06,
      "loss": 0.0098,
      "step": 2442400
    },
    {
      "epoch": 3.9970738987843912,
      "grad_norm": 0.21901653707027435,
      "learning_rate": 2.0190692065918572e-06,
      "loss": 0.0132,
      "step": 2442420
    },
    {
      "epoch": 3.997106629223045,
      "grad_norm": 0.1810069978237152,
      "learning_rate": 2.01900331437834e-06,
      "loss": 0.0103,
      "step": 2442440
    },
    {
      "epoch": 3.9971393596616984,
      "grad_norm": 0.44351473450660706,
      "learning_rate": 2.018937422164823e-06,
      "loss": 0.0086,
      "step": 2442460
    },
    {
      "epoch": 3.9971720901003516,
      "grad_norm": 0.18090982735157013,
      "learning_rate": 2.018871529951306e-06,
      "loss": 0.0101,
      "step": 2442480
    },
    {
      "epoch": 3.9972048205390047,
      "grad_norm": 0.15324489772319794,
      "learning_rate": 2.0188056377377886e-06,
      "loss": 0.0071,
      "step": 2442500
    },
    {
      "epoch": 3.9972375509776583,
      "grad_norm": 0.2856408655643463,
      "learning_rate": 2.018739745524272e-06,
      "loss": 0.0096,
      "step": 2442520
    },
    {
      "epoch": 3.9972702814163115,
      "grad_norm": 0.28820478916168213,
      "learning_rate": 2.0186738533107545e-06,
      "loss": 0.0103,
      "step": 2442540
    },
    {
      "epoch": 3.9973030118549646,
      "grad_norm": 0.2726248502731323,
      "learning_rate": 2.0186079610972373e-06,
      "loss": 0.0076,
      "step": 2442560
    },
    {
      "epoch": 3.997335742293618,
      "grad_norm": 0.2526392936706543,
      "learning_rate": 2.01854206888372e-06,
      "loss": 0.0126,
      "step": 2442580
    },
    {
      "epoch": 3.997368472732272,
      "grad_norm": 0.6328631043434143,
      "learning_rate": 2.0184761766702028e-06,
      "loss": 0.0084,
      "step": 2442600
    },
    {
      "epoch": 3.997401203170925,
      "grad_norm": 0.12214435636997223,
      "learning_rate": 2.018410284456686e-06,
      "loss": 0.011,
      "step": 2442620
    },
    {
      "epoch": 3.997433933609578,
      "grad_norm": 0.2846583425998688,
      "learning_rate": 2.018344392243169e-06,
      "loss": 0.012,
      "step": 2442640
    },
    {
      "epoch": 3.9974666640482317,
      "grad_norm": 0.18465369939804077,
      "learning_rate": 2.018278500029652e-06,
      "loss": 0.0118,
      "step": 2442660
    },
    {
      "epoch": 3.997499394486885,
      "grad_norm": 0.2175404131412506,
      "learning_rate": 2.0182126078161346e-06,
      "loss": 0.0089,
      "step": 2442680
    },
    {
      "epoch": 3.997532124925538,
      "grad_norm": 0.35400938987731934,
      "learning_rate": 2.0181467156026173e-06,
      "loss": 0.0091,
      "step": 2442700
    },
    {
      "epoch": 3.9975648553641916,
      "grad_norm": 0.706687867641449,
      "learning_rate": 2.0180808233891005e-06,
      "loss": 0.0108,
      "step": 2442720
    },
    {
      "epoch": 3.997597585802845,
      "grad_norm": 0.12761995196342468,
      "learning_rate": 2.018014931175583e-06,
      "loss": 0.008,
      "step": 2442740
    },
    {
      "epoch": 3.9976303162414983,
      "grad_norm": 0.16038933396339417,
      "learning_rate": 2.017949038962066e-06,
      "loss": 0.0117,
      "step": 2442760
    },
    {
      "epoch": 3.9976630466801515,
      "grad_norm": 0.4143489599227905,
      "learning_rate": 2.0178831467485487e-06,
      "loss": 0.0068,
      "step": 2442780
    },
    {
      "epoch": 3.997695777118805,
      "grad_norm": 0.4194601774215698,
      "learning_rate": 2.0178172545350314e-06,
      "loss": 0.0086,
      "step": 2442800
    },
    {
      "epoch": 3.9977285075574582,
      "grad_norm": 0.12897101044654846,
      "learning_rate": 2.0177513623215146e-06,
      "loss": 0.0119,
      "step": 2442820
    },
    {
      "epoch": 3.9977612379961114,
      "grad_norm": 0.4301679730415344,
      "learning_rate": 2.0176854701079977e-06,
      "loss": 0.0087,
      "step": 2442840
    },
    {
      "epoch": 3.997793968434765,
      "grad_norm": 0.45962440967559814,
      "learning_rate": 2.0176195778944805e-06,
      "loss": 0.0142,
      "step": 2442860
    },
    {
      "epoch": 3.9978266988734186,
      "grad_norm": 0.22063951194286346,
      "learning_rate": 2.0175536856809632e-06,
      "loss": 0.0105,
      "step": 2442880
    },
    {
      "epoch": 3.9978594293120717,
      "grad_norm": 0.05935217812657356,
      "learning_rate": 2.017487793467446e-06,
      "loss": 0.0118,
      "step": 2442900
    },
    {
      "epoch": 3.997892159750725,
      "grad_norm": 0.15831147134304047,
      "learning_rate": 2.017421901253929e-06,
      "loss": 0.0109,
      "step": 2442920
    },
    {
      "epoch": 3.9979248901893785,
      "grad_norm": 0.11055798083543777,
      "learning_rate": 2.017356009040412e-06,
      "loss": 0.0114,
      "step": 2442940
    },
    {
      "epoch": 3.9979576206280316,
      "grad_norm": 0.27752140164375305,
      "learning_rate": 2.0172901168268946e-06,
      "loss": 0.0069,
      "step": 2442960
    },
    {
      "epoch": 3.9979903510666848,
      "grad_norm": 0.10227847844362259,
      "learning_rate": 2.0172242246133774e-06,
      "loss": 0.0058,
      "step": 2442980
    },
    {
      "epoch": 3.9980230815053384,
      "grad_norm": 0.19785383343696594,
      "learning_rate": 2.0171583323998605e-06,
      "loss": 0.0146,
      "step": 2443000
    },
    {
      "epoch": 3.9980558119439915,
      "grad_norm": 0.6793071627616882,
      "learning_rate": 2.0170924401863433e-06,
      "loss": 0.0108,
      "step": 2443020
    },
    {
      "epoch": 3.998088542382645,
      "grad_norm": 0.10576828569173813,
      "learning_rate": 2.0170265479728264e-06,
      "loss": 0.0144,
      "step": 2443040
    },
    {
      "epoch": 3.9981212728212983,
      "grad_norm": 0.5773659944534302,
      "learning_rate": 2.016960655759309e-06,
      "loss": 0.0086,
      "step": 2443060
    },
    {
      "epoch": 3.998154003259952,
      "grad_norm": 0.5465980768203735,
      "learning_rate": 2.016894763545792e-06,
      "loss": 0.0081,
      "step": 2443080
    },
    {
      "epoch": 3.998186733698605,
      "grad_norm": 0.3445148468017578,
      "learning_rate": 2.0168288713322746e-06,
      "loss": 0.0138,
      "step": 2443100
    },
    {
      "epoch": 3.998219464137258,
      "grad_norm": 0.1525084376335144,
      "learning_rate": 2.016762979118758e-06,
      "loss": 0.0061,
      "step": 2443120
    },
    {
      "epoch": 3.9982521945759117,
      "grad_norm": 0.11717471480369568,
      "learning_rate": 2.0166970869052405e-06,
      "loss": 0.0151,
      "step": 2443140
    },
    {
      "epoch": 3.998284925014565,
      "grad_norm": 0.1697714626789093,
      "learning_rate": 2.0166311946917237e-06,
      "loss": 0.011,
      "step": 2443160
    },
    {
      "epoch": 3.9983176554532185,
      "grad_norm": 0.2571583688259125,
      "learning_rate": 2.0165653024782064e-06,
      "loss": 0.0074,
      "step": 2443180
    },
    {
      "epoch": 3.9983503858918716,
      "grad_norm": 0.1539464294910431,
      "learning_rate": 2.016499410264689e-06,
      "loss": 0.0111,
      "step": 2443200
    },
    {
      "epoch": 3.9983831163305252,
      "grad_norm": 0.30719372630119324,
      "learning_rate": 2.016433518051172e-06,
      "loss": 0.0114,
      "step": 2443220
    },
    {
      "epoch": 3.9984158467691784,
      "grad_norm": 0.21524862945079803,
      "learning_rate": 2.016367625837655e-06,
      "loss": 0.0123,
      "step": 2443240
    },
    {
      "epoch": 3.9984485772078315,
      "grad_norm": 0.2704087495803833,
      "learning_rate": 2.016301733624138e-06,
      "loss": 0.0109,
      "step": 2443260
    },
    {
      "epoch": 3.998481307646485,
      "grad_norm": 0.3157568871974945,
      "learning_rate": 2.0162358414106206e-06,
      "loss": 0.015,
      "step": 2443280
    },
    {
      "epoch": 3.9985140380851383,
      "grad_norm": 0.5935197472572327,
      "learning_rate": 2.0161699491971033e-06,
      "loss": 0.011,
      "step": 2443300
    },
    {
      "epoch": 3.998546768523792,
      "grad_norm": 0.0666496530175209,
      "learning_rate": 2.0161040569835865e-06,
      "loss": 0.0062,
      "step": 2443320
    },
    {
      "epoch": 3.998579498962445,
      "grad_norm": 0.608234703540802,
      "learning_rate": 2.016038164770069e-06,
      "loss": 0.0089,
      "step": 2443340
    },
    {
      "epoch": 3.9986122294010986,
      "grad_norm": 0.5080336928367615,
      "learning_rate": 2.0159722725565524e-06,
      "loss": 0.0075,
      "step": 2443360
    },
    {
      "epoch": 3.9986449598397518,
      "grad_norm": 0.1221216544508934,
      "learning_rate": 2.015906380343035e-06,
      "loss": 0.0077,
      "step": 2443380
    },
    {
      "epoch": 3.998677690278405,
      "grad_norm": 0.11744924634695053,
      "learning_rate": 2.015840488129518e-06,
      "loss": 0.0129,
      "step": 2443400
    },
    {
      "epoch": 3.9987104207170585,
      "grad_norm": 0.17390727996826172,
      "learning_rate": 2.0157745959160006e-06,
      "loss": 0.0118,
      "step": 2443420
    },
    {
      "epoch": 3.9987431511557117,
      "grad_norm": 0.4248145520687103,
      "learning_rate": 2.0157087037024838e-06,
      "loss": 0.0086,
      "step": 2443440
    },
    {
      "epoch": 3.9987758815943653,
      "grad_norm": 0.5222241282463074,
      "learning_rate": 2.0156428114889665e-06,
      "loss": 0.0104,
      "step": 2443460
    },
    {
      "epoch": 3.9988086120330184,
      "grad_norm": 0.16902035474777222,
      "learning_rate": 2.0155769192754492e-06,
      "loss": 0.0084,
      "step": 2443480
    },
    {
      "epoch": 3.998841342471672,
      "grad_norm": 0.1525401473045349,
      "learning_rate": 2.015511027061932e-06,
      "loss": 0.007,
      "step": 2443500
    },
    {
      "epoch": 3.998874072910325,
      "grad_norm": 0.1802034229040146,
      "learning_rate": 2.015445134848415e-06,
      "loss": 0.0095,
      "step": 2443520
    },
    {
      "epoch": 3.9989068033489783,
      "grad_norm": 0.12708017230033875,
      "learning_rate": 2.0153792426348983e-06,
      "loss": 0.0093,
      "step": 2443540
    },
    {
      "epoch": 3.998939533787632,
      "grad_norm": 0.22689904272556305,
      "learning_rate": 2.015313350421381e-06,
      "loss": 0.0086,
      "step": 2443560
    },
    {
      "epoch": 3.998972264226285,
      "grad_norm": 0.22037683427333832,
      "learning_rate": 2.0152474582078638e-06,
      "loss": 0.0099,
      "step": 2443580
    },
    {
      "epoch": 3.9990049946649386,
      "grad_norm": 0.6066218614578247,
      "learning_rate": 2.0151815659943465e-06,
      "loss": 0.0172,
      "step": 2443600
    },
    {
      "epoch": 3.999037725103592,
      "grad_norm": 0.41674062609672546,
      "learning_rate": 2.0151156737808293e-06,
      "loss": 0.0109,
      "step": 2443620
    },
    {
      "epoch": 3.9990704555422454,
      "grad_norm": 0.26360082626342773,
      "learning_rate": 2.0150497815673124e-06,
      "loss": 0.0072,
      "step": 2443640
    },
    {
      "epoch": 3.9991031859808985,
      "grad_norm": 0.33138716220855713,
      "learning_rate": 2.014983889353795e-06,
      "loss": 0.0134,
      "step": 2443660
    },
    {
      "epoch": 3.9991359164195517,
      "grad_norm": 0.5856678485870361,
      "learning_rate": 2.014917997140278e-06,
      "loss": 0.0111,
      "step": 2443680
    },
    {
      "epoch": 3.9991686468582053,
      "grad_norm": 0.4317050874233246,
      "learning_rate": 2.014852104926761e-06,
      "loss": 0.0129,
      "step": 2443700
    },
    {
      "epoch": 3.9992013772968584,
      "grad_norm": 0.15814830362796783,
      "learning_rate": 2.014786212713244e-06,
      "loss": 0.011,
      "step": 2443720
    },
    {
      "epoch": 3.999234107735512,
      "grad_norm": 0.2037338763475418,
      "learning_rate": 2.014720320499727e-06,
      "loss": 0.0134,
      "step": 2443740
    },
    {
      "epoch": 3.999266838174165,
      "grad_norm": 0.24019865691661835,
      "learning_rate": 2.0146544282862097e-06,
      "loss": 0.0115,
      "step": 2443760
    },
    {
      "epoch": 3.9992995686128188,
      "grad_norm": 0.19201941788196564,
      "learning_rate": 2.0145885360726924e-06,
      "loss": 0.0068,
      "step": 2443780
    },
    {
      "epoch": 3.999332299051472,
      "grad_norm": 0.214557945728302,
      "learning_rate": 2.014522643859175e-06,
      "loss": 0.0083,
      "step": 2443800
    },
    {
      "epoch": 3.999365029490125,
      "grad_norm": 0.19408199191093445,
      "learning_rate": 2.014456751645658e-06,
      "loss": 0.0107,
      "step": 2443820
    },
    {
      "epoch": 3.9993977599287787,
      "grad_norm": 0.6927603483200073,
      "learning_rate": 2.014390859432141e-06,
      "loss": 0.0133,
      "step": 2443840
    },
    {
      "epoch": 3.999430490367432,
      "grad_norm": 0.23339056968688965,
      "learning_rate": 2.014324967218624e-06,
      "loss": 0.0124,
      "step": 2443860
    },
    {
      "epoch": 3.9994632208060854,
      "grad_norm": 0.14560920000076294,
      "learning_rate": 2.014259075005107e-06,
      "loss": 0.0091,
      "step": 2443880
    },
    {
      "epoch": 3.9994959512447386,
      "grad_norm": 0.16158413887023926,
      "learning_rate": 2.0141931827915897e-06,
      "loss": 0.0133,
      "step": 2443900
    },
    {
      "epoch": 3.999528681683392,
      "grad_norm": 0.3122238218784332,
      "learning_rate": 2.0141272905780725e-06,
      "loss": 0.0071,
      "step": 2443920
    },
    {
      "epoch": 3.9995614121220453,
      "grad_norm": 0.2677235007286072,
      "learning_rate": 2.0140613983645556e-06,
      "loss": 0.0077,
      "step": 2443940
    },
    {
      "epoch": 3.9995941425606985,
      "grad_norm": 0.10144486278295517,
      "learning_rate": 2.0139955061510384e-06,
      "loss": 0.006,
      "step": 2443960
    },
    {
      "epoch": 3.999626872999352,
      "grad_norm": 0.1642548143863678,
      "learning_rate": 2.013929613937521e-06,
      "loss": 0.0084,
      "step": 2443980
    },
    {
      "epoch": 3.999659603438005,
      "grad_norm": 0.37883055210113525,
      "learning_rate": 2.013863721724004e-06,
      "loss": 0.0099,
      "step": 2444000
    },
    {
      "epoch": 3.9996923338766583,
      "grad_norm": 0.0801982432603836,
      "learning_rate": 2.0137978295104866e-06,
      "loss": 0.0084,
      "step": 2444020
    },
    {
      "epoch": 3.999725064315312,
      "grad_norm": 0.3051705062389374,
      "learning_rate": 2.0137319372969698e-06,
      "loss": 0.0081,
      "step": 2444040
    },
    {
      "epoch": 3.9997577947539655,
      "grad_norm": 0.22526800632476807,
      "learning_rate": 2.013666045083453e-06,
      "loss": 0.0118,
      "step": 2444060
    },
    {
      "epoch": 3.9997905251926187,
      "grad_norm": 0.13876460492610931,
      "learning_rate": 2.0136001528699357e-06,
      "loss": 0.0081,
      "step": 2444080
    },
    {
      "epoch": 3.999823255631272,
      "grad_norm": 0.2136104553937912,
      "learning_rate": 2.0135342606564184e-06,
      "loss": 0.0116,
      "step": 2444100
    },
    {
      "epoch": 3.9998559860699254,
      "grad_norm": 0.3757779002189636,
      "learning_rate": 2.013468368442901e-06,
      "loss": 0.0116,
      "step": 2444120
    },
    {
      "epoch": 3.9998887165085786,
      "grad_norm": 0.7757243514060974,
      "learning_rate": 2.0134024762293843e-06,
      "loss": 0.0099,
      "step": 2444140
    },
    {
      "epoch": 3.9999214469472317,
      "grad_norm": 0.16918914020061493,
      "learning_rate": 2.013336584015867e-06,
      "loss": 0.0141,
      "step": 2444160
    },
    {
      "epoch": 3.9999541773858853,
      "grad_norm": 0.14853259921073914,
      "learning_rate": 2.0132706918023498e-06,
      "loss": 0.01,
      "step": 2444180
    },
    {
      "epoch": 3.999986907824539,
      "grad_norm": 0.2080973982810974,
      "learning_rate": 2.0132047995888325e-06,
      "loss": 0.0086,
      "step": 2444200
    },
    {
      "epoch": 4.000019638263192,
      "grad_norm": 0.12041240185499191,
      "learning_rate": 2.0131389073753157e-06,
      "loss": 0.0081,
      "step": 2444220
    },
    {
      "epoch": 4.000052368701845,
      "grad_norm": 0.05788257718086243,
      "learning_rate": 2.0130730151617984e-06,
      "loss": 0.0117,
      "step": 2444240
    },
    {
      "epoch": 4.000085099140499,
      "grad_norm": 0.371587872505188,
      "learning_rate": 2.0130071229482816e-06,
      "loss": 0.007,
      "step": 2444260
    },
    {
      "epoch": 4.000117829579152,
      "grad_norm": 0.2915360629558563,
      "learning_rate": 2.0129412307347643e-06,
      "loss": 0.0071,
      "step": 2444280
    },
    {
      "epoch": 4.000150560017805,
      "grad_norm": 0.41072168946266174,
      "learning_rate": 2.012875338521247e-06,
      "loss": 0.0075,
      "step": 2444300
    },
    {
      "epoch": 4.000183290456459,
      "grad_norm": 0.3052045404911041,
      "learning_rate": 2.01280944630773e-06,
      "loss": 0.0106,
      "step": 2444320
    },
    {
      "epoch": 4.000216020895112,
      "grad_norm": 0.1106540784239769,
      "learning_rate": 2.012743554094213e-06,
      "loss": 0.0062,
      "step": 2444340
    },
    {
      "epoch": 4.000248751333765,
      "grad_norm": 0.4950283467769623,
      "learning_rate": 2.0126776618806957e-06,
      "loss": 0.0095,
      "step": 2444360
    },
    {
      "epoch": 4.000281481772419,
      "grad_norm": 0.2529151737689972,
      "learning_rate": 2.0126117696671784e-06,
      "loss": 0.0061,
      "step": 2444380
    },
    {
      "epoch": 4.000314212211072,
      "grad_norm": 0.2575804591178894,
      "learning_rate": 2.0125458774536616e-06,
      "loss": 0.0076,
      "step": 2444400
    },
    {
      "epoch": 4.000346942649726,
      "grad_norm": 0.12272139638662338,
      "learning_rate": 2.0124799852401444e-06,
      "loss": 0.0076,
      "step": 2444420
    },
    {
      "epoch": 4.0003796730883785,
      "grad_norm": 0.11057127267122269,
      "learning_rate": 2.012414093026627e-06,
      "loss": 0.009,
      "step": 2444440
    },
    {
      "epoch": 4.000412403527032,
      "grad_norm": 0.25754883885383606,
      "learning_rate": 2.0123482008131103e-06,
      "loss": 0.0068,
      "step": 2444460
    },
    {
      "epoch": 4.000445133965686,
      "grad_norm": 0.11129175126552582,
      "learning_rate": 2.012282308599593e-06,
      "loss": 0.008,
      "step": 2444480
    },
    {
      "epoch": 4.000477864404338,
      "grad_norm": 0.44545072317123413,
      "learning_rate": 2.0122164163860757e-06,
      "loss": 0.0083,
      "step": 2444500
    },
    {
      "epoch": 4.000510594842992,
      "grad_norm": 0.6007369756698608,
      "learning_rate": 2.0121505241725585e-06,
      "loss": 0.011,
      "step": 2444520
    },
    {
      "epoch": 4.000543325281646,
      "grad_norm": 0.18477320671081543,
      "learning_rate": 2.0120846319590416e-06,
      "loss": 0.0072,
      "step": 2444540
    },
    {
      "epoch": 4.000576055720299,
      "grad_norm": 0.4542735517024994,
      "learning_rate": 2.0120187397455244e-06,
      "loss": 0.0084,
      "step": 2444560
    },
    {
      "epoch": 4.000608786158952,
      "grad_norm": 0.23800428211688995,
      "learning_rate": 2.0119528475320075e-06,
      "loss": 0.01,
      "step": 2444580
    },
    {
      "epoch": 4.0006415165976055,
      "grad_norm": 0.22563447058200836,
      "learning_rate": 2.0118869553184903e-06,
      "loss": 0.0078,
      "step": 2444600
    },
    {
      "epoch": 4.000674247036259,
      "grad_norm": 0.3424263000488281,
      "learning_rate": 2.011821063104973e-06,
      "loss": 0.0074,
      "step": 2444620
    },
    {
      "epoch": 4.000706977474912,
      "grad_norm": 0.03939291462302208,
      "learning_rate": 2.0117551708914558e-06,
      "loss": 0.0087,
      "step": 2444640
    },
    {
      "epoch": 4.000739707913565,
      "grad_norm": 0.041638877242803574,
      "learning_rate": 2.011689278677939e-06,
      "loss": 0.0087,
      "step": 2444660
    },
    {
      "epoch": 4.000772438352219,
      "grad_norm": 0.17367498576641083,
      "learning_rate": 2.0116233864644217e-06,
      "loss": 0.0102,
      "step": 2444680
    },
    {
      "epoch": 4.000805168790873,
      "grad_norm": 0.3539736270904541,
      "learning_rate": 2.0115574942509044e-06,
      "loss": 0.0113,
      "step": 2444700
    },
    {
      "epoch": 4.000837899229525,
      "grad_norm": 0.6034227013587952,
      "learning_rate": 2.011491602037387e-06,
      "loss": 0.0137,
      "step": 2444720
    },
    {
      "epoch": 4.000870629668179,
      "grad_norm": 0.11897887289524078,
      "learning_rate": 2.0114257098238703e-06,
      "loss": 0.0082,
      "step": 2444740
    },
    {
      "epoch": 4.0009033601068325,
      "grad_norm": 0.22955915331840515,
      "learning_rate": 2.0113598176103535e-06,
      "loss": 0.0083,
      "step": 2444760
    },
    {
      "epoch": 4.000936090545485,
      "grad_norm": 0.2632094919681549,
      "learning_rate": 2.011293925396836e-06,
      "loss": 0.0075,
      "step": 2444780
    },
    {
      "epoch": 4.000968820984139,
      "grad_norm": 0.524836540222168,
      "learning_rate": 2.011228033183319e-06,
      "loss": 0.0119,
      "step": 2444800
    },
    {
      "epoch": 4.001001551422792,
      "grad_norm": 0.36804547905921936,
      "learning_rate": 2.0111621409698017e-06,
      "loss": 0.0108,
      "step": 2444820
    },
    {
      "epoch": 4.001034281861446,
      "grad_norm": 0.07059861719608307,
      "learning_rate": 2.0110962487562844e-06,
      "loss": 0.0092,
      "step": 2444840
    },
    {
      "epoch": 4.001067012300099,
      "grad_norm": 0.3240664005279541,
      "learning_rate": 2.0110303565427676e-06,
      "loss": 0.0073,
      "step": 2444860
    },
    {
      "epoch": 4.001099742738752,
      "grad_norm": 0.1856621950864792,
      "learning_rate": 2.0109644643292503e-06,
      "loss": 0.0099,
      "step": 2444880
    },
    {
      "epoch": 4.001132473177406,
      "grad_norm": 0.40515369176864624,
      "learning_rate": 2.010898572115733e-06,
      "loss": 0.0085,
      "step": 2444900
    },
    {
      "epoch": 4.0011652036160585,
      "grad_norm": 0.05780733376741409,
      "learning_rate": 2.0108326799022162e-06,
      "loss": 0.0086,
      "step": 2444920
    },
    {
      "epoch": 4.001197934054712,
      "grad_norm": 0.15780889987945557,
      "learning_rate": 2.010766787688699e-06,
      "loss": 0.0108,
      "step": 2444940
    },
    {
      "epoch": 4.001230664493366,
      "grad_norm": 0.08883680403232574,
      "learning_rate": 2.010700895475182e-06,
      "loss": 0.0051,
      "step": 2444960
    },
    {
      "epoch": 4.001263394932018,
      "grad_norm": 0.2783157527446747,
      "learning_rate": 2.010635003261665e-06,
      "loss": 0.0075,
      "step": 2444980
    },
    {
      "epoch": 4.001296125370672,
      "grad_norm": 0.25027787685394287,
      "learning_rate": 2.0105691110481476e-06,
      "loss": 0.0061,
      "step": 2445000
    },
    {
      "epoch": 4.001328855809326,
      "grad_norm": 0.24135443568229675,
      "learning_rate": 2.0105032188346304e-06,
      "loss": 0.007,
      "step": 2445020
    },
    {
      "epoch": 4.001361586247979,
      "grad_norm": 0.17112179100513458,
      "learning_rate": 2.010437326621113e-06,
      "loss": 0.0119,
      "step": 2445040
    },
    {
      "epoch": 4.001394316686632,
      "grad_norm": 0.15842022001743317,
      "learning_rate": 2.0103714344075963e-06,
      "loss": 0.0088,
      "step": 2445060
    },
    {
      "epoch": 4.0014270471252855,
      "grad_norm": 0.153486967086792,
      "learning_rate": 2.010305542194079e-06,
      "loss": 0.0119,
      "step": 2445080
    },
    {
      "epoch": 4.001459777563939,
      "grad_norm": 0.2776987850666046,
      "learning_rate": 2.010239649980562e-06,
      "loss": 0.0069,
      "step": 2445100
    },
    {
      "epoch": 4.001492508002592,
      "grad_norm": 0.20218294858932495,
      "learning_rate": 2.010173757767045e-06,
      "loss": 0.0118,
      "step": 2445120
    },
    {
      "epoch": 4.001525238441245,
      "grad_norm": 0.1616136133670807,
      "learning_rate": 2.0101078655535276e-06,
      "loss": 0.0065,
      "step": 2445140
    },
    {
      "epoch": 4.001557968879899,
      "grad_norm": 0.04859156906604767,
      "learning_rate": 2.010041973340011e-06,
      "loss": 0.0088,
      "step": 2445160
    },
    {
      "epoch": 4.001590699318553,
      "grad_norm": 0.5346560478210449,
      "learning_rate": 2.0099760811264935e-06,
      "loss": 0.0097,
      "step": 2445180
    },
    {
      "epoch": 4.001623429757205,
      "grad_norm": 0.14656919240951538,
      "learning_rate": 2.0099101889129763e-06,
      "loss": 0.0079,
      "step": 2445200
    },
    {
      "epoch": 4.001656160195859,
      "grad_norm": 0.09189753234386444,
      "learning_rate": 2.009844296699459e-06,
      "loss": 0.0078,
      "step": 2445220
    },
    {
      "epoch": 4.0016888906345125,
      "grad_norm": 0.2971211075782776,
      "learning_rate": 2.0097784044859418e-06,
      "loss": 0.0083,
      "step": 2445240
    },
    {
      "epoch": 4.001721621073165,
      "grad_norm": 0.2657288908958435,
      "learning_rate": 2.009712512272425e-06,
      "loss": 0.0079,
      "step": 2445260
    },
    {
      "epoch": 4.001754351511819,
      "grad_norm": 0.575692892074585,
      "learning_rate": 2.009646620058908e-06,
      "loss": 0.0084,
      "step": 2445280
    },
    {
      "epoch": 4.001787081950472,
      "grad_norm": 0.4678056240081787,
      "learning_rate": 2.009580727845391e-06,
      "loss": 0.0096,
      "step": 2445300
    },
    {
      "epoch": 4.001819812389126,
      "grad_norm": 0.16884192824363708,
      "learning_rate": 2.0095148356318736e-06,
      "loss": 0.008,
      "step": 2445320
    },
    {
      "epoch": 4.001852542827779,
      "grad_norm": 0.1156795546412468,
      "learning_rate": 2.0094489434183563e-06,
      "loss": 0.0108,
      "step": 2445340
    },
    {
      "epoch": 4.001885273266432,
      "grad_norm": 0.17160549759864807,
      "learning_rate": 2.0093830512048395e-06,
      "loss": 0.0091,
      "step": 2445360
    },
    {
      "epoch": 4.001918003705086,
      "grad_norm": 0.45698827505111694,
      "learning_rate": 2.0093171589913222e-06,
      "loss": 0.0156,
      "step": 2445380
    },
    {
      "epoch": 4.001950734143739,
      "grad_norm": 0.19264261424541473,
      "learning_rate": 2.009251266777805e-06,
      "loss": 0.0058,
      "step": 2445400
    },
    {
      "epoch": 4.001983464582392,
      "grad_norm": 0.40283867716789246,
      "learning_rate": 2.0091853745642877e-06,
      "loss": 0.0058,
      "step": 2445420
    },
    {
      "epoch": 4.002016195021046,
      "grad_norm": 0.6446503400802612,
      "learning_rate": 2.009119482350771e-06,
      "loss": 0.0073,
      "step": 2445440
    },
    {
      "epoch": 4.002048925459699,
      "grad_norm": 0.2931218147277832,
      "learning_rate": 2.0090535901372536e-06,
      "loss": 0.0099,
      "step": 2445460
    },
    {
      "epoch": 4.002081655898352,
      "grad_norm": 0.4969382584095001,
      "learning_rate": 2.0089876979237368e-06,
      "loss": 0.0078,
      "step": 2445480
    },
    {
      "epoch": 4.002114386337006,
      "grad_norm": 0.4171883761882782,
      "learning_rate": 2.0089218057102195e-06,
      "loss": 0.0153,
      "step": 2445500
    },
    {
      "epoch": 4.002147116775659,
      "grad_norm": 0.8365857005119324,
      "learning_rate": 2.0088559134967022e-06,
      "loss": 0.0074,
      "step": 2445520
    },
    {
      "epoch": 4.002179847214312,
      "grad_norm": 0.17306175827980042,
      "learning_rate": 2.008790021283185e-06,
      "loss": 0.0068,
      "step": 2445540
    },
    {
      "epoch": 4.002212577652966,
      "grad_norm": 0.33212894201278687,
      "learning_rate": 2.008724129069668e-06,
      "loss": 0.0136,
      "step": 2445560
    },
    {
      "epoch": 4.002245308091619,
      "grad_norm": 0.17638032138347626,
      "learning_rate": 2.008658236856151e-06,
      "loss": 0.0062,
      "step": 2445580
    },
    {
      "epoch": 4.002278038530273,
      "grad_norm": 0.5666139125823975,
      "learning_rate": 2.0085923446426336e-06,
      "loss": 0.0141,
      "step": 2445600
    },
    {
      "epoch": 4.0023107689689255,
      "grad_norm": 0.14077985286712646,
      "learning_rate": 2.0085264524291164e-06,
      "loss": 0.0077,
      "step": 2445620
    },
    {
      "epoch": 4.002343499407579,
      "grad_norm": 0.1393594890832901,
      "learning_rate": 2.0084605602155995e-06,
      "loss": 0.0068,
      "step": 2445640
    },
    {
      "epoch": 4.002376229846233,
      "grad_norm": 0.2567697763442993,
      "learning_rate": 2.0083946680020823e-06,
      "loss": 0.013,
      "step": 2445660
    },
    {
      "epoch": 4.002408960284885,
      "grad_norm": 0.12583701312541962,
      "learning_rate": 2.0083287757885654e-06,
      "loss": 0.006,
      "step": 2445680
    },
    {
      "epoch": 4.002441690723539,
      "grad_norm": 0.1292080134153366,
      "learning_rate": 2.008262883575048e-06,
      "loss": 0.0093,
      "step": 2445700
    },
    {
      "epoch": 4.0024744211621925,
      "grad_norm": 0.09665367007255554,
      "learning_rate": 2.008196991361531e-06,
      "loss": 0.0068,
      "step": 2445720
    },
    {
      "epoch": 4.002507151600846,
      "grad_norm": 0.07750868052244186,
      "learning_rate": 2.0081310991480136e-06,
      "loss": 0.0073,
      "step": 2445740
    },
    {
      "epoch": 4.002539882039499,
      "grad_norm": 0.18787501752376556,
      "learning_rate": 2.008065206934497e-06,
      "loss": 0.0066,
      "step": 2445760
    },
    {
      "epoch": 4.002572612478152,
      "grad_norm": 0.4722253680229187,
      "learning_rate": 2.0079993147209795e-06,
      "loss": 0.012,
      "step": 2445780
    },
    {
      "epoch": 4.002605342916806,
      "grad_norm": 0.34852510690689087,
      "learning_rate": 2.0079334225074627e-06,
      "loss": 0.0104,
      "step": 2445800
    },
    {
      "epoch": 4.002638073355459,
      "grad_norm": 0.2779209315776825,
      "learning_rate": 2.0078675302939455e-06,
      "loss": 0.0089,
      "step": 2445820
    },
    {
      "epoch": 4.002670803794112,
      "grad_norm": 0.2580406367778778,
      "learning_rate": 2.007801638080428e-06,
      "loss": 0.0085,
      "step": 2445840
    },
    {
      "epoch": 4.002703534232766,
      "grad_norm": 0.19283540546894073,
      "learning_rate": 2.007735745866911e-06,
      "loss": 0.0088,
      "step": 2445860
    },
    {
      "epoch": 4.0027362646714195,
      "grad_norm": 0.17773887515068054,
      "learning_rate": 2.007669853653394e-06,
      "loss": 0.0084,
      "step": 2445880
    },
    {
      "epoch": 4.002768995110072,
      "grad_norm": 0.09493967890739441,
      "learning_rate": 2.007603961439877e-06,
      "loss": 0.0129,
      "step": 2445900
    },
    {
      "epoch": 4.002801725548726,
      "grad_norm": 0.09760699421167374,
      "learning_rate": 2.0075380692263596e-06,
      "loss": 0.0086,
      "step": 2445920
    },
    {
      "epoch": 4.002834455987379,
      "grad_norm": 0.5278247594833374,
      "learning_rate": 2.0074721770128423e-06,
      "loss": 0.0055,
      "step": 2445940
    },
    {
      "epoch": 4.002867186426032,
      "grad_norm": 0.6816791296005249,
      "learning_rate": 2.0074062847993255e-06,
      "loss": 0.0118,
      "step": 2445960
    },
    {
      "epoch": 4.002899916864686,
      "grad_norm": 0.2726955711841583,
      "learning_rate": 2.0073403925858082e-06,
      "loss": 0.0108,
      "step": 2445980
    },
    {
      "epoch": 4.002932647303339,
      "grad_norm": 0.9745604991912842,
      "learning_rate": 2.0072745003722914e-06,
      "loss": 0.0118,
      "step": 2446000
    },
    {
      "epoch": 4.002965377741993,
      "grad_norm": 0.4465898871421814,
      "learning_rate": 2.007208608158774e-06,
      "loss": 0.0112,
      "step": 2446020
    },
    {
      "epoch": 4.002998108180646,
      "grad_norm": 0.2736065089702606,
      "learning_rate": 2.007142715945257e-06,
      "loss": 0.0113,
      "step": 2446040
    },
    {
      "epoch": 4.003030838619299,
      "grad_norm": 0.3673577308654785,
      "learning_rate": 2.0070768237317396e-06,
      "loss": 0.0091,
      "step": 2446060
    },
    {
      "epoch": 4.003063569057953,
      "grad_norm": 0.24471648037433624,
      "learning_rate": 2.0070109315182228e-06,
      "loss": 0.0075,
      "step": 2446080
    },
    {
      "epoch": 4.0030962994966055,
      "grad_norm": 0.15480583906173706,
      "learning_rate": 2.0069450393047055e-06,
      "loss": 0.0171,
      "step": 2446100
    },
    {
      "epoch": 4.003129029935259,
      "grad_norm": 0.6499472260475159,
      "learning_rate": 2.0068791470911882e-06,
      "loss": 0.0052,
      "step": 2446120
    },
    {
      "epoch": 4.003161760373913,
      "grad_norm": 0.15654316544532776,
      "learning_rate": 2.006813254877671e-06,
      "loss": 0.0069,
      "step": 2446140
    },
    {
      "epoch": 4.003194490812566,
      "grad_norm": 0.18758560717105865,
      "learning_rate": 2.006747362664154e-06,
      "loss": 0.0116,
      "step": 2446160
    },
    {
      "epoch": 4.003227221251219,
      "grad_norm": 0.1705266535282135,
      "learning_rate": 2.0066814704506373e-06,
      "loss": 0.0126,
      "step": 2446180
    },
    {
      "epoch": 4.003259951689873,
      "grad_norm": 0.6926968693733215,
      "learning_rate": 2.00661557823712e-06,
      "loss": 0.0118,
      "step": 2446200
    },
    {
      "epoch": 4.003292682128526,
      "grad_norm": 0.12445911020040512,
      "learning_rate": 2.006549686023603e-06,
      "loss": 0.0117,
      "step": 2446220
    },
    {
      "epoch": 4.003325412567179,
      "grad_norm": 0.2931655943393707,
      "learning_rate": 2.0064837938100855e-06,
      "loss": 0.0082,
      "step": 2446240
    },
    {
      "epoch": 4.0033581430058325,
      "grad_norm": 0.09027477353811264,
      "learning_rate": 2.0064179015965683e-06,
      "loss": 0.0087,
      "step": 2446260
    },
    {
      "epoch": 4.003390873444486,
      "grad_norm": 0.24040353298187256,
      "learning_rate": 2.0063520093830514e-06,
      "loss": 0.0061,
      "step": 2446280
    },
    {
      "epoch": 4.00342360388314,
      "grad_norm": 0.5016928911209106,
      "learning_rate": 2.006286117169534e-06,
      "loss": 0.01,
      "step": 2446300
    },
    {
      "epoch": 4.003456334321792,
      "grad_norm": 0.298350065946579,
      "learning_rate": 2.006220224956017e-06,
      "loss": 0.0096,
      "step": 2446320
    },
    {
      "epoch": 4.003489064760446,
      "grad_norm": 0.07845793664455414,
      "learning_rate": 2.0061543327425e-06,
      "loss": 0.007,
      "step": 2446340
    },
    {
      "epoch": 4.0035217951991,
      "grad_norm": 0.11596845835447311,
      "learning_rate": 2.006088440528983e-06,
      "loss": 0.01,
      "step": 2446360
    },
    {
      "epoch": 4.003554525637752,
      "grad_norm": 0.12308167666196823,
      "learning_rate": 2.006022548315466e-06,
      "loss": 0.0077,
      "step": 2446380
    },
    {
      "epoch": 4.003587256076406,
      "grad_norm": 0.08330333232879639,
      "learning_rate": 2.0059566561019487e-06,
      "loss": 0.0076,
      "step": 2446400
    },
    {
      "epoch": 4.0036199865150595,
      "grad_norm": 0.3064728379249573,
      "learning_rate": 2.0058907638884315e-06,
      "loss": 0.0113,
      "step": 2446420
    },
    {
      "epoch": 4.003652716953713,
      "grad_norm": 0.1732526272535324,
      "learning_rate": 2.005824871674914e-06,
      "loss": 0.0072,
      "step": 2446440
    },
    {
      "epoch": 4.003685447392366,
      "grad_norm": 0.11147557199001312,
      "learning_rate": 2.005758979461397e-06,
      "loss": 0.0095,
      "step": 2446460
    },
    {
      "epoch": 4.003718177831019,
      "grad_norm": 0.16771021485328674,
      "learning_rate": 2.00569308724788e-06,
      "loss": 0.0073,
      "step": 2446480
    },
    {
      "epoch": 4.003750908269673,
      "grad_norm": 0.15555360913276672,
      "learning_rate": 2.005627195034363e-06,
      "loss": 0.0074,
      "step": 2446500
    },
    {
      "epoch": 4.003783638708326,
      "grad_norm": 0.09360010176897049,
      "learning_rate": 2.005561302820846e-06,
      "loss": 0.0073,
      "step": 2446520
    },
    {
      "epoch": 4.003816369146979,
      "grad_norm": 0.346923828125,
      "learning_rate": 2.0054954106073287e-06,
      "loss": 0.0088,
      "step": 2446540
    },
    {
      "epoch": 4.003849099585633,
      "grad_norm": 0.1567944586277008,
      "learning_rate": 2.0054295183938115e-06,
      "loss": 0.0087,
      "step": 2446560
    },
    {
      "epoch": 4.0038818300242855,
      "grad_norm": 0.16789309680461884,
      "learning_rate": 2.0053636261802946e-06,
      "loss": 0.0087,
      "step": 2446580
    },
    {
      "epoch": 4.003914560462939,
      "grad_norm": 0.24564263224601746,
      "learning_rate": 2.0052977339667774e-06,
      "loss": 0.0145,
      "step": 2446600
    },
    {
      "epoch": 4.003947290901593,
      "grad_norm": 0.36835095286369324,
      "learning_rate": 2.00523184175326e-06,
      "loss": 0.0087,
      "step": 2446620
    },
    {
      "epoch": 4.003980021340246,
      "grad_norm": 0.19130843877792358,
      "learning_rate": 2.005165949539743e-06,
      "loss": 0.0065,
      "step": 2446640
    },
    {
      "epoch": 4.004012751778899,
      "grad_norm": 0.8177940845489502,
      "learning_rate": 2.0051000573262256e-06,
      "loss": 0.0078,
      "step": 2446660
    },
    {
      "epoch": 4.004045482217553,
      "grad_norm": 0.14493028819561005,
      "learning_rate": 2.0050341651127088e-06,
      "loss": 0.0086,
      "step": 2446680
    },
    {
      "epoch": 4.004078212656206,
      "grad_norm": 0.16865421831607819,
      "learning_rate": 2.004968272899192e-06,
      "loss": 0.0108,
      "step": 2446700
    },
    {
      "epoch": 4.004110943094859,
      "grad_norm": 0.2612287104129791,
      "learning_rate": 2.0049023806856747e-06,
      "loss": 0.0089,
      "step": 2446720
    },
    {
      "epoch": 4.0041436735335125,
      "grad_norm": 0.2135453224182129,
      "learning_rate": 2.0048364884721574e-06,
      "loss": 0.0069,
      "step": 2446740
    },
    {
      "epoch": 4.004176403972166,
      "grad_norm": 0.24134275317192078,
      "learning_rate": 2.00477059625864e-06,
      "loss": 0.0088,
      "step": 2446760
    },
    {
      "epoch": 4.00420913441082,
      "grad_norm": 0.17023785412311554,
      "learning_rate": 2.0047047040451233e-06,
      "loss": 0.0081,
      "step": 2446780
    },
    {
      "epoch": 4.004241864849472,
      "grad_norm": 0.37081676721572876,
      "learning_rate": 2.004638811831606e-06,
      "loss": 0.0101,
      "step": 2446800
    },
    {
      "epoch": 4.004274595288126,
      "grad_norm": 0.16061826050281525,
      "learning_rate": 2.004572919618089e-06,
      "loss": 0.0093,
      "step": 2446820
    },
    {
      "epoch": 4.00430732572678,
      "grad_norm": 0.06774170696735382,
      "learning_rate": 2.0045070274045715e-06,
      "loss": 0.0095,
      "step": 2446840
    },
    {
      "epoch": 4.004340056165432,
      "grad_norm": 0.3753727674484253,
      "learning_rate": 2.0044411351910547e-06,
      "loss": 0.0083,
      "step": 2446860
    },
    {
      "epoch": 4.004372786604086,
      "grad_norm": 0.26370546221733093,
      "learning_rate": 2.0043752429775374e-06,
      "loss": 0.0058,
      "step": 2446880
    },
    {
      "epoch": 4.0044055170427395,
      "grad_norm": 0.2987414598464966,
      "learning_rate": 2.0043093507640206e-06,
      "loss": 0.0062,
      "step": 2446900
    },
    {
      "epoch": 4.004438247481393,
      "grad_norm": 0.23103106021881104,
      "learning_rate": 2.0042434585505033e-06,
      "loss": 0.0122,
      "step": 2446920
    },
    {
      "epoch": 4.004470977920046,
      "grad_norm": 0.17595139145851135,
      "learning_rate": 2.004177566336986e-06,
      "loss": 0.009,
      "step": 2446940
    },
    {
      "epoch": 4.004503708358699,
      "grad_norm": 0.30136042833328247,
      "learning_rate": 2.004111674123469e-06,
      "loss": 0.0079,
      "step": 2446960
    },
    {
      "epoch": 4.004536438797353,
      "grad_norm": 0.4122432470321655,
      "learning_rate": 2.004045781909952e-06,
      "loss": 0.0133,
      "step": 2446980
    },
    {
      "epoch": 4.004569169236006,
      "grad_norm": 0.22200191020965576,
      "learning_rate": 2.0039798896964347e-06,
      "loss": 0.0064,
      "step": 2447000
    },
    {
      "epoch": 4.004601899674659,
      "grad_norm": 0.23350325226783752,
      "learning_rate": 2.0039139974829175e-06,
      "loss": 0.0068,
      "step": 2447020
    },
    {
      "epoch": 4.004634630113313,
      "grad_norm": 0.13114503026008606,
      "learning_rate": 2.0038481052694006e-06,
      "loss": 0.0076,
      "step": 2447040
    },
    {
      "epoch": 4.0046673605519665,
      "grad_norm": 0.1752239465713501,
      "learning_rate": 2.0037822130558834e-06,
      "loss": 0.0073,
      "step": 2447060
    },
    {
      "epoch": 4.004700090990619,
      "grad_norm": 0.29802191257476807,
      "learning_rate": 2.003716320842366e-06,
      "loss": 0.0085,
      "step": 2447080
    },
    {
      "epoch": 4.004732821429273,
      "grad_norm": 0.23719432950019836,
      "learning_rate": 2.0036504286288493e-06,
      "loss": 0.0069,
      "step": 2447100
    },
    {
      "epoch": 4.004765551867926,
      "grad_norm": 0.07880543172359467,
      "learning_rate": 2.003584536415332e-06,
      "loss": 0.0052,
      "step": 2447120
    },
    {
      "epoch": 4.004798282306579,
      "grad_norm": 0.45355653762817383,
      "learning_rate": 2.0035186442018147e-06,
      "loss": 0.0088,
      "step": 2447140
    },
    {
      "epoch": 4.004831012745233,
      "grad_norm": 0.165873184800148,
      "learning_rate": 2.0034527519882975e-06,
      "loss": 0.0111,
      "step": 2447160
    },
    {
      "epoch": 4.004863743183886,
      "grad_norm": 0.11741778999567032,
      "learning_rate": 2.0033868597747806e-06,
      "loss": 0.009,
      "step": 2447180
    },
    {
      "epoch": 4.00489647362254,
      "grad_norm": 0.5967668294906616,
      "learning_rate": 2.0033209675612634e-06,
      "loss": 0.0104,
      "step": 2447200
    },
    {
      "epoch": 4.004929204061193,
      "grad_norm": 0.24309368431568146,
      "learning_rate": 2.0032550753477466e-06,
      "loss": 0.0091,
      "step": 2447220
    },
    {
      "epoch": 4.004961934499846,
      "grad_norm": 0.11625652760267258,
      "learning_rate": 2.0031891831342293e-06,
      "loss": 0.0069,
      "step": 2447240
    },
    {
      "epoch": 4.0049946649385,
      "grad_norm": 0.25580891966819763,
      "learning_rate": 2.003123290920712e-06,
      "loss": 0.0108,
      "step": 2447260
    },
    {
      "epoch": 4.0050273953771525,
      "grad_norm": 0.2265956997871399,
      "learning_rate": 2.0030573987071948e-06,
      "loss": 0.0103,
      "step": 2447280
    },
    {
      "epoch": 4.005060125815806,
      "grad_norm": 0.22385720908641815,
      "learning_rate": 2.002991506493678e-06,
      "loss": 0.0085,
      "step": 2447300
    },
    {
      "epoch": 4.00509285625446,
      "grad_norm": 0.2944807708263397,
      "learning_rate": 2.0029256142801607e-06,
      "loss": 0.0108,
      "step": 2447320
    },
    {
      "epoch": 4.005125586693113,
      "grad_norm": 0.09136463701725006,
      "learning_rate": 2.0028597220666434e-06,
      "loss": 0.0104,
      "step": 2447340
    },
    {
      "epoch": 4.005158317131766,
      "grad_norm": 0.20509420335292816,
      "learning_rate": 2.002793829853126e-06,
      "loss": 0.0083,
      "step": 2447360
    },
    {
      "epoch": 4.0051910475704195,
      "grad_norm": 0.43595418334007263,
      "learning_rate": 2.0027279376396093e-06,
      "loss": 0.0097,
      "step": 2447380
    },
    {
      "epoch": 4.005223778009073,
      "grad_norm": 0.6824538707733154,
      "learning_rate": 2.0026620454260925e-06,
      "loss": 0.0079,
      "step": 2447400
    },
    {
      "epoch": 4.005256508447726,
      "grad_norm": 0.17630046606063843,
      "learning_rate": 2.0025961532125752e-06,
      "loss": 0.0082,
      "step": 2447420
    },
    {
      "epoch": 4.005289238886379,
      "grad_norm": 0.13207195699214935,
      "learning_rate": 2.002530260999058e-06,
      "loss": 0.0067,
      "step": 2447440
    },
    {
      "epoch": 4.005321969325033,
      "grad_norm": 0.1685616374015808,
      "learning_rate": 2.0024643687855407e-06,
      "loss": 0.0105,
      "step": 2447460
    },
    {
      "epoch": 4.005354699763687,
      "grad_norm": 0.134919211268425,
      "learning_rate": 2.0023984765720234e-06,
      "loss": 0.0094,
      "step": 2447480
    },
    {
      "epoch": 4.005387430202339,
      "grad_norm": 0.2662484049797058,
      "learning_rate": 2.0023325843585066e-06,
      "loss": 0.0121,
      "step": 2447500
    },
    {
      "epoch": 4.005420160640993,
      "grad_norm": 0.26490405201911926,
      "learning_rate": 2.0022666921449893e-06,
      "loss": 0.0141,
      "step": 2447520
    },
    {
      "epoch": 4.0054528910796465,
      "grad_norm": 0.26435205340385437,
      "learning_rate": 2.002200799931472e-06,
      "loss": 0.0111,
      "step": 2447540
    },
    {
      "epoch": 4.005485621518299,
      "grad_norm": 0.3098689615726471,
      "learning_rate": 2.0021349077179552e-06,
      "loss": 0.0096,
      "step": 2447560
    },
    {
      "epoch": 4.005518351956953,
      "grad_norm": 0.18241699039936066,
      "learning_rate": 2.002069015504438e-06,
      "loss": 0.0111,
      "step": 2447580
    },
    {
      "epoch": 4.005551082395606,
      "grad_norm": 0.08082026243209839,
      "learning_rate": 2.002003123290921e-06,
      "loss": 0.0057,
      "step": 2447600
    },
    {
      "epoch": 4.00558381283426,
      "grad_norm": 0.06687343865633011,
      "learning_rate": 2.001937231077404e-06,
      "loss": 0.0093,
      "step": 2447620
    },
    {
      "epoch": 4.005616543272913,
      "grad_norm": 0.16572512686252594,
      "learning_rate": 2.0018713388638866e-06,
      "loss": 0.0064,
      "step": 2447640
    },
    {
      "epoch": 4.005649273711566,
      "grad_norm": 0.8204965591430664,
      "learning_rate": 2.0018054466503694e-06,
      "loss": 0.0129,
      "step": 2447660
    },
    {
      "epoch": 4.00568200415022,
      "grad_norm": 0.16805683076381683,
      "learning_rate": 2.001739554436852e-06,
      "loss": 0.0073,
      "step": 2447680
    },
    {
      "epoch": 4.005714734588873,
      "grad_norm": 0.2847181260585785,
      "learning_rate": 2.0016736622233353e-06,
      "loss": 0.0102,
      "step": 2447700
    },
    {
      "epoch": 4.005747465027526,
      "grad_norm": 0.13573309779167175,
      "learning_rate": 2.001607770009818e-06,
      "loss": 0.0065,
      "step": 2447720
    },
    {
      "epoch": 4.00578019546618,
      "grad_norm": 0.0619560144841671,
      "learning_rate": 2.001541877796301e-06,
      "loss": 0.0072,
      "step": 2447740
    },
    {
      "epoch": 4.005812925904833,
      "grad_norm": 0.41131895780563354,
      "learning_rate": 2.001475985582784e-06,
      "loss": 0.0092,
      "step": 2447760
    },
    {
      "epoch": 4.005845656343486,
      "grad_norm": 1.0415503978729248,
      "learning_rate": 2.0014100933692667e-06,
      "loss": 0.0096,
      "step": 2447780
    },
    {
      "epoch": 4.00587838678214,
      "grad_norm": 0.1541421115398407,
      "learning_rate": 2.00134420115575e-06,
      "loss": 0.0074,
      "step": 2447800
    },
    {
      "epoch": 4.005911117220793,
      "grad_norm": 0.17853978276252747,
      "learning_rate": 2.0012783089422326e-06,
      "loss": 0.0081,
      "step": 2447820
    },
    {
      "epoch": 4.005943847659446,
      "grad_norm": 0.14165639877319336,
      "learning_rate": 2.0012124167287153e-06,
      "loss": 0.0073,
      "step": 2447840
    },
    {
      "epoch": 4.0059765780981,
      "grad_norm": 0.2781871259212494,
      "learning_rate": 2.001146524515198e-06,
      "loss": 0.0074,
      "step": 2447860
    },
    {
      "epoch": 4.006009308536753,
      "grad_norm": 0.161823570728302,
      "learning_rate": 2.0010806323016808e-06,
      "loss": 0.0095,
      "step": 2447880
    },
    {
      "epoch": 4.006042038975407,
      "grad_norm": 0.23528793454170227,
      "learning_rate": 2.001014740088164e-06,
      "loss": 0.0107,
      "step": 2447900
    },
    {
      "epoch": 4.0060747694140595,
      "grad_norm": 0.11052504926919937,
      "learning_rate": 2.000948847874647e-06,
      "loss": 0.0095,
      "step": 2447920
    },
    {
      "epoch": 4.006107499852713,
      "grad_norm": 0.0920429676771164,
      "learning_rate": 2.00088295566113e-06,
      "loss": 0.0101,
      "step": 2447940
    },
    {
      "epoch": 4.006140230291367,
      "grad_norm": 0.11533405631780624,
      "learning_rate": 2.0008170634476126e-06,
      "loss": 0.0063,
      "step": 2447960
    },
    {
      "epoch": 4.006172960730019,
      "grad_norm": 0.12213695794343948,
      "learning_rate": 2.0007511712340953e-06,
      "loss": 0.0082,
      "step": 2447980
    },
    {
      "epoch": 4.006205691168673,
      "grad_norm": 0.2751288115978241,
      "learning_rate": 2.0006852790205785e-06,
      "loss": 0.0075,
      "step": 2448000
    },
    {
      "epoch": 4.006238421607327,
      "grad_norm": 0.1116570383310318,
      "learning_rate": 2.0006193868070612e-06,
      "loss": 0.0104,
      "step": 2448020
    },
    {
      "epoch": 4.006271152045979,
      "grad_norm": 0.23032836616039276,
      "learning_rate": 2.000553494593544e-06,
      "loss": 0.0095,
      "step": 2448040
    },
    {
      "epoch": 4.006303882484633,
      "grad_norm": 0.12969323992729187,
      "learning_rate": 2.0004876023800267e-06,
      "loss": 0.0079,
      "step": 2448060
    },
    {
      "epoch": 4.0063366129232865,
      "grad_norm": 0.22310835123062134,
      "learning_rate": 2.00042171016651e-06,
      "loss": 0.0115,
      "step": 2448080
    },
    {
      "epoch": 4.00636934336194,
      "grad_norm": 0.38743528723716736,
      "learning_rate": 2.0003558179529926e-06,
      "loss": 0.0091,
      "step": 2448100
    },
    {
      "epoch": 4.006402073800593,
      "grad_norm": 0.48472338914871216,
      "learning_rate": 2.0002899257394758e-06,
      "loss": 0.0085,
      "step": 2448120
    },
    {
      "epoch": 4.006434804239246,
      "grad_norm": 0.15483982861042023,
      "learning_rate": 2.0002240335259585e-06,
      "loss": 0.0102,
      "step": 2448140
    },
    {
      "epoch": 4.0064675346779,
      "grad_norm": 0.1552741378545761,
      "learning_rate": 2.0001581413124412e-06,
      "loss": 0.009,
      "step": 2448160
    },
    {
      "epoch": 4.006500265116553,
      "grad_norm": 0.16639339923858643,
      "learning_rate": 2.000092249098924e-06,
      "loss": 0.008,
      "step": 2448180
    },
    {
      "epoch": 4.006532995555206,
      "grad_norm": 0.15555891394615173,
      "learning_rate": 2.000026356885407e-06,
      "loss": 0.0097,
      "step": 2448200
    },
    {
      "epoch": 4.00656572599386,
      "grad_norm": 0.16771508753299713,
      "learning_rate": 1.99996046467189e-06,
      "loss": 0.0087,
      "step": 2448220
    },
    {
      "epoch": 4.0065984564325134,
      "grad_norm": 0.4959583580493927,
      "learning_rate": 1.9998945724583726e-06,
      "loss": 0.0088,
      "step": 2448240
    },
    {
      "epoch": 4.006631186871166,
      "grad_norm": 0.19789575040340424,
      "learning_rate": 1.9998286802448554e-06,
      "loss": 0.0101,
      "step": 2448260
    },
    {
      "epoch": 4.00666391730982,
      "grad_norm": 0.39900583028793335,
      "learning_rate": 1.9997627880313385e-06,
      "loss": 0.0059,
      "step": 2448280
    },
    {
      "epoch": 4.006696647748473,
      "grad_norm": 0.8127413392066956,
      "learning_rate": 1.9996968958178213e-06,
      "loss": 0.0094,
      "step": 2448300
    },
    {
      "epoch": 4.006729378187126,
      "grad_norm": 0.29401183128356934,
      "learning_rate": 1.9996310036043044e-06,
      "loss": 0.0109,
      "step": 2448320
    },
    {
      "epoch": 4.00676210862578,
      "grad_norm": 0.32069510221481323,
      "learning_rate": 1.999565111390787e-06,
      "loss": 0.011,
      "step": 2448340
    },
    {
      "epoch": 4.006794839064433,
      "grad_norm": 0.26677703857421875,
      "learning_rate": 1.99949921917727e-06,
      "loss": 0.008,
      "step": 2448360
    },
    {
      "epoch": 4.006827569503087,
      "grad_norm": 0.05064079910516739,
      "learning_rate": 1.9994333269637527e-06,
      "loss": 0.0066,
      "step": 2448380
    },
    {
      "epoch": 4.0068602999417395,
      "grad_norm": 0.4079957604408264,
      "learning_rate": 1.999367434750236e-06,
      "loss": 0.0095,
      "step": 2448400
    },
    {
      "epoch": 4.006893030380393,
      "grad_norm": 0.18836888670921326,
      "learning_rate": 1.9993015425367186e-06,
      "loss": 0.0099,
      "step": 2448420
    },
    {
      "epoch": 4.006925760819047,
      "grad_norm": 0.19078609347343445,
      "learning_rate": 1.9992356503232017e-06,
      "loss": 0.0101,
      "step": 2448440
    },
    {
      "epoch": 4.006958491257699,
      "grad_norm": 0.17001540958881378,
      "learning_rate": 1.9991697581096845e-06,
      "loss": 0.0074,
      "step": 2448460
    },
    {
      "epoch": 4.006991221696353,
      "grad_norm": 0.16997823119163513,
      "learning_rate": 1.999103865896167e-06,
      "loss": 0.008,
      "step": 2448480
    },
    {
      "epoch": 4.007023952135007,
      "grad_norm": 0.2458990514278412,
      "learning_rate": 1.99903797368265e-06,
      "loss": 0.0114,
      "step": 2448500
    },
    {
      "epoch": 4.00705668257366,
      "grad_norm": 0.3421851694583893,
      "learning_rate": 1.998972081469133e-06,
      "loss": 0.0084,
      "step": 2448520
    },
    {
      "epoch": 4.007089413012313,
      "grad_norm": 0.2611069679260254,
      "learning_rate": 1.998906189255616e-06,
      "loss": 0.0061,
      "step": 2448540
    },
    {
      "epoch": 4.0071221434509665,
      "grad_norm": 0.14572295546531677,
      "learning_rate": 1.9988402970420986e-06,
      "loss": 0.0072,
      "step": 2448560
    },
    {
      "epoch": 4.00715487388962,
      "grad_norm": 0.4297020137310028,
      "learning_rate": 1.9987744048285813e-06,
      "loss": 0.0118,
      "step": 2448580
    },
    {
      "epoch": 4.007187604328273,
      "grad_norm": 0.36012569069862366,
      "learning_rate": 1.9987085126150645e-06,
      "loss": 0.0062,
      "step": 2448600
    },
    {
      "epoch": 4.007220334766926,
      "grad_norm": 0.27454236149787903,
      "learning_rate": 1.9986426204015472e-06,
      "loss": 0.007,
      "step": 2448620
    },
    {
      "epoch": 4.00725306520558,
      "grad_norm": 0.6059873104095459,
      "learning_rate": 1.9985767281880304e-06,
      "loss": 0.0123,
      "step": 2448640
    },
    {
      "epoch": 4.007285795644234,
      "grad_norm": 0.12322792410850525,
      "learning_rate": 1.998510835974513e-06,
      "loss": 0.0104,
      "step": 2448660
    },
    {
      "epoch": 4.007318526082886,
      "grad_norm": 0.2908639907836914,
      "learning_rate": 1.998444943760996e-06,
      "loss": 0.0071,
      "step": 2448680
    },
    {
      "epoch": 4.00735125652154,
      "grad_norm": 0.3261604905128479,
      "learning_rate": 1.9983790515474786e-06,
      "loss": 0.0099,
      "step": 2448700
    },
    {
      "epoch": 4.0073839869601935,
      "grad_norm": 0.07320434600114822,
      "learning_rate": 1.9983131593339618e-06,
      "loss": 0.0091,
      "step": 2448720
    },
    {
      "epoch": 4.007416717398846,
      "grad_norm": 0.16882488131523132,
      "learning_rate": 1.9982472671204445e-06,
      "loss": 0.0068,
      "step": 2448740
    },
    {
      "epoch": 4.0074494478375,
      "grad_norm": 0.06361480057239532,
      "learning_rate": 1.9981813749069273e-06,
      "loss": 0.0066,
      "step": 2448760
    },
    {
      "epoch": 4.007482178276153,
      "grad_norm": 0.45916104316711426,
      "learning_rate": 1.99811548269341e-06,
      "loss": 0.014,
      "step": 2448780
    },
    {
      "epoch": 4.007514908714807,
      "grad_norm": 0.2937960624694824,
      "learning_rate": 1.998049590479893e-06,
      "loss": 0.0064,
      "step": 2448800
    },
    {
      "epoch": 4.00754763915346,
      "grad_norm": 0.1131867989897728,
      "learning_rate": 1.9979836982663763e-06,
      "loss": 0.0073,
      "step": 2448820
    },
    {
      "epoch": 4.007580369592113,
      "grad_norm": 0.22128081321716309,
      "learning_rate": 1.997917806052859e-06,
      "loss": 0.0109,
      "step": 2448840
    },
    {
      "epoch": 4.007613100030767,
      "grad_norm": 0.15836769342422485,
      "learning_rate": 1.997851913839342e-06,
      "loss": 0.0111,
      "step": 2448860
    },
    {
      "epoch": 4.00764583046942,
      "grad_norm": 0.04065878316760063,
      "learning_rate": 1.9977860216258245e-06,
      "loss": 0.0064,
      "step": 2448880
    },
    {
      "epoch": 4.007678560908073,
      "grad_norm": 0.12768931686878204,
      "learning_rate": 1.9977201294123073e-06,
      "loss": 0.0056,
      "step": 2448900
    },
    {
      "epoch": 4.007711291346727,
      "grad_norm": 0.07027902454137802,
      "learning_rate": 1.9976542371987904e-06,
      "loss": 0.0073,
      "step": 2448920
    },
    {
      "epoch": 4.00774402178538,
      "grad_norm": 0.31902310252189636,
      "learning_rate": 1.997588344985273e-06,
      "loss": 0.0125,
      "step": 2448940
    },
    {
      "epoch": 4.007776752224033,
      "grad_norm": 0.6787755489349365,
      "learning_rate": 1.997522452771756e-06,
      "loss": 0.0104,
      "step": 2448960
    },
    {
      "epoch": 4.007809482662687,
      "grad_norm": 0.31711554527282715,
      "learning_rate": 1.997456560558239e-06,
      "loss": 0.009,
      "step": 2448980
    },
    {
      "epoch": 4.00784221310134,
      "grad_norm": 0.20743808150291443,
      "learning_rate": 1.997390668344722e-06,
      "loss": 0.016,
      "step": 2449000
    },
    {
      "epoch": 4.007874943539993,
      "grad_norm": 0.035890378057956696,
      "learning_rate": 1.997324776131205e-06,
      "loss": 0.0051,
      "step": 2449020
    },
    {
      "epoch": 4.007907673978647,
      "grad_norm": 0.25467953085899353,
      "learning_rate": 1.9972588839176877e-06,
      "loss": 0.0081,
      "step": 2449040
    },
    {
      "epoch": 4.0079404044173,
      "grad_norm": 0.09856350719928741,
      "learning_rate": 1.9971929917041705e-06,
      "loss": 0.0126,
      "step": 2449060
    },
    {
      "epoch": 4.007973134855954,
      "grad_norm": 0.11479409784078598,
      "learning_rate": 1.997127099490653e-06,
      "loss": 0.0153,
      "step": 2449080
    },
    {
      "epoch": 4.0080058652946065,
      "grad_norm": 0.22768625617027283,
      "learning_rate": 1.997061207277136e-06,
      "loss": 0.0085,
      "step": 2449100
    },
    {
      "epoch": 4.00803859573326,
      "grad_norm": 0.153400257229805,
      "learning_rate": 1.996995315063619e-06,
      "loss": 0.0126,
      "step": 2449120
    },
    {
      "epoch": 4.008071326171914,
      "grad_norm": 0.20519201457500458,
      "learning_rate": 1.996929422850102e-06,
      "loss": 0.0082,
      "step": 2449140
    },
    {
      "epoch": 4.008104056610566,
      "grad_norm": 0.24580320715904236,
      "learning_rate": 1.996863530636585e-06,
      "loss": 0.0056,
      "step": 2449160
    },
    {
      "epoch": 4.00813678704922,
      "grad_norm": 0.1746658831834793,
      "learning_rate": 1.9967976384230678e-06,
      "loss": 0.0093,
      "step": 2449180
    },
    {
      "epoch": 4.0081695174878735,
      "grad_norm": 0.08119946718215942,
      "learning_rate": 1.9967317462095505e-06,
      "loss": 0.0089,
      "step": 2449200
    },
    {
      "epoch": 4.008202247926527,
      "grad_norm": 0.120836041867733,
      "learning_rate": 1.9966658539960337e-06,
      "loss": 0.0071,
      "step": 2449220
    },
    {
      "epoch": 4.00823497836518,
      "grad_norm": 0.514630138874054,
      "learning_rate": 1.9965999617825164e-06,
      "loss": 0.0093,
      "step": 2449240
    },
    {
      "epoch": 4.008267708803833,
      "grad_norm": 0.24366888403892517,
      "learning_rate": 1.996534069568999e-06,
      "loss": 0.0088,
      "step": 2449260
    },
    {
      "epoch": 4.008300439242487,
      "grad_norm": 0.08974490314722061,
      "learning_rate": 1.996468177355482e-06,
      "loss": 0.0095,
      "step": 2449280
    },
    {
      "epoch": 4.00833316968114,
      "grad_norm": 0.12893158197402954,
      "learning_rate": 1.9964022851419646e-06,
      "loss": 0.0136,
      "step": 2449300
    },
    {
      "epoch": 4.008365900119793,
      "grad_norm": 0.31238606572151184,
      "learning_rate": 1.9963363929284478e-06,
      "loss": 0.0103,
      "step": 2449320
    },
    {
      "epoch": 4.008398630558447,
      "grad_norm": 0.129412442445755,
      "learning_rate": 1.996270500714931e-06,
      "loss": 0.0079,
      "step": 2449340
    },
    {
      "epoch": 4.0084313609971005,
      "grad_norm": 0.3588886559009552,
      "learning_rate": 1.9962046085014137e-06,
      "loss": 0.0137,
      "step": 2449360
    },
    {
      "epoch": 4.008464091435753,
      "grad_norm": 0.20365212857723236,
      "learning_rate": 1.9961387162878964e-06,
      "loss": 0.0072,
      "step": 2449380
    },
    {
      "epoch": 4.008496821874407,
      "grad_norm": 0.45087730884552,
      "learning_rate": 1.996072824074379e-06,
      "loss": 0.0093,
      "step": 2449400
    },
    {
      "epoch": 4.00852955231306,
      "grad_norm": 0.06091839820146561,
      "learning_rate": 1.9960069318608623e-06,
      "loss": 0.0127,
      "step": 2449420
    },
    {
      "epoch": 4.008562282751713,
      "grad_norm": 0.24392594397068024,
      "learning_rate": 1.995941039647345e-06,
      "loss": 0.0104,
      "step": 2449440
    },
    {
      "epoch": 4.008595013190367,
      "grad_norm": 0.69314044713974,
      "learning_rate": 1.995875147433828e-06,
      "loss": 0.0077,
      "step": 2449460
    },
    {
      "epoch": 4.00862774362902,
      "grad_norm": 0.25186076760292053,
      "learning_rate": 1.9958092552203105e-06,
      "loss": 0.0108,
      "step": 2449480
    },
    {
      "epoch": 4.008660474067674,
      "grad_norm": 0.12747707962989807,
      "learning_rate": 1.9957433630067937e-06,
      "loss": 0.01,
      "step": 2449500
    },
    {
      "epoch": 4.008693204506327,
      "grad_norm": 0.1289069801568985,
      "learning_rate": 1.9956774707932764e-06,
      "loss": 0.0083,
      "step": 2449520
    },
    {
      "epoch": 4.00872593494498,
      "grad_norm": 1.6897774934768677,
      "learning_rate": 1.9956115785797596e-06,
      "loss": 0.0087,
      "step": 2449540
    },
    {
      "epoch": 4.008758665383634,
      "grad_norm": 0.3934706449508667,
      "learning_rate": 1.9955456863662423e-06,
      "loss": 0.0095,
      "step": 2449560
    },
    {
      "epoch": 4.0087913958222865,
      "grad_norm": 0.043337151408195496,
      "learning_rate": 1.995479794152725e-06,
      "loss": 0.0103,
      "step": 2449580
    },
    {
      "epoch": 4.00882412626094,
      "grad_norm": 0.13879409432411194,
      "learning_rate": 1.995413901939208e-06,
      "loss": 0.0089,
      "step": 2449600
    },
    {
      "epoch": 4.008856856699594,
      "grad_norm": 0.056618016213178635,
      "learning_rate": 1.995348009725691e-06,
      "loss": 0.0139,
      "step": 2449620
    },
    {
      "epoch": 4.008889587138246,
      "grad_norm": 0.19105958938598633,
      "learning_rate": 1.9952821175121737e-06,
      "loss": 0.0108,
      "step": 2449640
    },
    {
      "epoch": 4.0089223175769,
      "grad_norm": 0.23448546230793,
      "learning_rate": 1.9952162252986565e-06,
      "loss": 0.0094,
      "step": 2449660
    },
    {
      "epoch": 4.008955048015554,
      "grad_norm": 0.10172967612743378,
      "learning_rate": 1.9951503330851396e-06,
      "loss": 0.0124,
      "step": 2449680
    },
    {
      "epoch": 4.008987778454207,
      "grad_norm": 0.34491345286369324,
      "learning_rate": 1.9950844408716224e-06,
      "loss": 0.0085,
      "step": 2449700
    },
    {
      "epoch": 4.00902050889286,
      "grad_norm": 0.2637825608253479,
      "learning_rate": 1.995018548658105e-06,
      "loss": 0.0085,
      "step": 2449720
    },
    {
      "epoch": 4.0090532393315135,
      "grad_norm": 0.32082775235176086,
      "learning_rate": 1.9949526564445883e-06,
      "loss": 0.0097,
      "step": 2449740
    },
    {
      "epoch": 4.009085969770167,
      "grad_norm": 0.1957537978887558,
      "learning_rate": 1.994886764231071e-06,
      "loss": 0.0064,
      "step": 2449760
    },
    {
      "epoch": 4.00911870020882,
      "grad_norm": 0.20213134586811066,
      "learning_rate": 1.9948208720175538e-06,
      "loss": 0.0087,
      "step": 2449780
    },
    {
      "epoch": 4.009151430647473,
      "grad_norm": 0.3908798098564148,
      "learning_rate": 1.9947549798040365e-06,
      "loss": 0.0057,
      "step": 2449800
    },
    {
      "epoch": 4.009184161086127,
      "grad_norm": 0.19477775692939758,
      "learning_rate": 1.9946890875905197e-06,
      "loss": 0.0095,
      "step": 2449820
    },
    {
      "epoch": 4.009216891524781,
      "grad_norm": 0.08751069009304047,
      "learning_rate": 1.9946231953770024e-06,
      "loss": 0.0097,
      "step": 2449840
    },
    {
      "epoch": 4.009249621963433,
      "grad_norm": 0.15830273926258087,
      "learning_rate": 1.9945573031634856e-06,
      "loss": 0.0065,
      "step": 2449860
    },
    {
      "epoch": 4.009282352402087,
      "grad_norm": 0.7055877447128296,
      "learning_rate": 1.9944914109499683e-06,
      "loss": 0.0093,
      "step": 2449880
    },
    {
      "epoch": 4.0093150828407405,
      "grad_norm": 0.3495638966560364,
      "learning_rate": 1.994425518736451e-06,
      "loss": 0.0059,
      "step": 2449900
    },
    {
      "epoch": 4.009347813279393,
      "grad_norm": 0.30001160502433777,
      "learning_rate": 1.9943596265229338e-06,
      "loss": 0.0124,
      "step": 2449920
    },
    {
      "epoch": 4.009380543718047,
      "grad_norm": 0.17224493622779846,
      "learning_rate": 1.994293734309417e-06,
      "loss": 0.0094,
      "step": 2449940
    },
    {
      "epoch": 4.0094132741567,
      "grad_norm": 0.15266692638397217,
      "learning_rate": 1.9942278420958997e-06,
      "loss": 0.0093,
      "step": 2449960
    },
    {
      "epoch": 4.009446004595354,
      "grad_norm": 0.19201970100402832,
      "learning_rate": 1.9941619498823824e-06,
      "loss": 0.0069,
      "step": 2449980
    },
    {
      "epoch": 4.009478735034007,
      "grad_norm": 0.2586151957511902,
      "learning_rate": 1.994096057668865e-06,
      "loss": 0.0108,
      "step": 2450000
    },
    {
      "epoch": 4.009478735034007,
      "eval_loss": 0.006164144724607468,
      "eval_runtime": 6504.3922,
      "eval_samples_per_second": 158.025,
      "eval_steps_per_second": 15.803,
      "eval_sts-dev_pearson_cosine": 0.9860561644929997,
      "eval_sts-dev_spearman_cosine": 0.8962554877925237,
      "step": 2450000
    },
    {
      "epoch": 4.00951146547266,
      "grad_norm": 0.21288077533245087,
      "learning_rate": 1.9940301654553483e-06,
      "loss": 0.0077,
      "step": 2450020
    },
    {
      "epoch": 4.009544195911314,
      "grad_norm": 0.12474262714385986,
      "learning_rate": 1.9939642732418315e-06,
      "loss": 0.0082,
      "step": 2450040
    },
    {
      "epoch": 4.0095769263499665,
      "grad_norm": 0.08262043446302414,
      "learning_rate": 1.9938983810283142e-06,
      "loss": 0.009,
      "step": 2450060
    },
    {
      "epoch": 4.00960965678862,
      "grad_norm": 0.19466102123260498,
      "learning_rate": 1.993832488814797e-06,
      "loss": 0.0057,
      "step": 2450080
    },
    {
      "epoch": 4.009642387227274,
      "grad_norm": 0.12201815843582153,
      "learning_rate": 1.9937665966012797e-06,
      "loss": 0.0096,
      "step": 2450100
    },
    {
      "epoch": 4.009675117665927,
      "grad_norm": 0.2811064124107361,
      "learning_rate": 1.9937007043877625e-06,
      "loss": 0.0066,
      "step": 2450120
    },
    {
      "epoch": 4.00970784810458,
      "grad_norm": 0.30966803431510925,
      "learning_rate": 1.9936348121742456e-06,
      "loss": 0.0116,
      "step": 2450140
    },
    {
      "epoch": 4.009740578543234,
      "grad_norm": 0.3917589485645294,
      "learning_rate": 1.9935689199607284e-06,
      "loss": 0.0124,
      "step": 2450160
    },
    {
      "epoch": 4.009773308981887,
      "grad_norm": 0.21231241524219513,
      "learning_rate": 1.993503027747211e-06,
      "loss": 0.0062,
      "step": 2450180
    },
    {
      "epoch": 4.00980603942054,
      "grad_norm": 0.4060792028903961,
      "learning_rate": 1.9934371355336943e-06,
      "loss": 0.0105,
      "step": 2450200
    },
    {
      "epoch": 4.0098387698591935,
      "grad_norm": 0.2016114741563797,
      "learning_rate": 1.993371243320177e-06,
      "loss": 0.0082,
      "step": 2450220
    },
    {
      "epoch": 4.009871500297847,
      "grad_norm": 0.2005046308040619,
      "learning_rate": 1.99330535110666e-06,
      "loss": 0.0097,
      "step": 2450240
    },
    {
      "epoch": 4.009904230736501,
      "grad_norm": 0.11644759774208069,
      "learning_rate": 1.993239458893143e-06,
      "loss": 0.0136,
      "step": 2450260
    },
    {
      "epoch": 4.009936961175153,
      "grad_norm": 0.1489734798669815,
      "learning_rate": 1.9931735666796256e-06,
      "loss": 0.009,
      "step": 2450280
    },
    {
      "epoch": 4.009969691613807,
      "grad_norm": 0.6506524682044983,
      "learning_rate": 1.9931076744661084e-06,
      "loss": 0.0062,
      "step": 2450300
    },
    {
      "epoch": 4.010002422052461,
      "grad_norm": 0.27160272002220154,
      "learning_rate": 1.993041782252591e-06,
      "loss": 0.0091,
      "step": 2450320
    },
    {
      "epoch": 4.010035152491113,
      "grad_norm": 0.4736017882823944,
      "learning_rate": 1.9929758900390743e-06,
      "loss": 0.0106,
      "step": 2450340
    },
    {
      "epoch": 4.010067882929767,
      "grad_norm": 0.19337187707424164,
      "learning_rate": 1.992909997825557e-06,
      "loss": 0.01,
      "step": 2450360
    },
    {
      "epoch": 4.0101006133684205,
      "grad_norm": 0.23211468756198883,
      "learning_rate": 1.99284410561204e-06,
      "loss": 0.0096,
      "step": 2450380
    },
    {
      "epoch": 4.010133343807074,
      "grad_norm": 0.3860054612159729,
      "learning_rate": 1.992778213398523e-06,
      "loss": 0.011,
      "step": 2450400
    },
    {
      "epoch": 4.010166074245727,
      "grad_norm": 0.16484299302101135,
      "learning_rate": 1.9927123211850057e-06,
      "loss": 0.0074,
      "step": 2450420
    },
    {
      "epoch": 4.01019880468438,
      "grad_norm": 0.2077799290418625,
      "learning_rate": 1.992646428971489e-06,
      "loss": 0.0106,
      "step": 2450440
    },
    {
      "epoch": 4.010231535123034,
      "grad_norm": 0.2552834749221802,
      "learning_rate": 1.9925805367579716e-06,
      "loss": 0.0087,
      "step": 2450460
    },
    {
      "epoch": 4.010264265561687,
      "grad_norm": 0.06965570896863937,
      "learning_rate": 1.9925146445444543e-06,
      "loss": 0.0069,
      "step": 2450480
    },
    {
      "epoch": 4.01029699600034,
      "grad_norm": 0.38161176443099976,
      "learning_rate": 1.992448752330937e-06,
      "loss": 0.0071,
      "step": 2450500
    },
    {
      "epoch": 4.010329726438994,
      "grad_norm": 0.11335492879152298,
      "learning_rate": 1.9923828601174198e-06,
      "loss": 0.0133,
      "step": 2450520
    },
    {
      "epoch": 4.0103624568776475,
      "grad_norm": 0.13044404983520508,
      "learning_rate": 1.992316967903903e-06,
      "loss": 0.0097,
      "step": 2450540
    },
    {
      "epoch": 4.0103951873163,
      "grad_norm": 0.1208038330078125,
      "learning_rate": 1.992251075690386e-06,
      "loss": 0.0089,
      "step": 2450560
    },
    {
      "epoch": 4.010427917754954,
      "grad_norm": 0.12331084907054901,
      "learning_rate": 1.992185183476869e-06,
      "loss": 0.0084,
      "step": 2450580
    },
    {
      "epoch": 4.010460648193607,
      "grad_norm": 0.2306557595729828,
      "learning_rate": 1.9921192912633516e-06,
      "loss": 0.0095,
      "step": 2450600
    },
    {
      "epoch": 4.01049337863226,
      "grad_norm": 0.42395639419555664,
      "learning_rate": 1.9920533990498343e-06,
      "loss": 0.0102,
      "step": 2450620
    },
    {
      "epoch": 4.010526109070914,
      "grad_norm": 0.19850949943065643,
      "learning_rate": 1.9919875068363175e-06,
      "loss": 0.0104,
      "step": 2450640
    },
    {
      "epoch": 4.010558839509567,
      "grad_norm": 0.10899937152862549,
      "learning_rate": 1.9919216146228002e-06,
      "loss": 0.0081,
      "step": 2450660
    },
    {
      "epoch": 4.010591569948221,
      "grad_norm": 0.1393529623746872,
      "learning_rate": 1.991855722409283e-06,
      "loss": 0.0119,
      "step": 2450680
    },
    {
      "epoch": 4.010624300386874,
      "grad_norm": 0.33335188031196594,
      "learning_rate": 1.9917898301957657e-06,
      "loss": 0.0088,
      "step": 2450700
    },
    {
      "epoch": 4.010657030825527,
      "grad_norm": 0.33654260635375977,
      "learning_rate": 1.991723937982249e-06,
      "loss": 0.0109,
      "step": 2450720
    },
    {
      "epoch": 4.010689761264181,
      "grad_norm": 0.1576181948184967,
      "learning_rate": 1.9916580457687316e-06,
      "loss": 0.0065,
      "step": 2450740
    },
    {
      "epoch": 4.0107224917028335,
      "grad_norm": 0.23017510771751404,
      "learning_rate": 1.9915921535552148e-06,
      "loss": 0.0095,
      "step": 2450760
    },
    {
      "epoch": 4.010755222141487,
      "grad_norm": 0.21566131711006165,
      "learning_rate": 1.9915262613416975e-06,
      "loss": 0.0084,
      "step": 2450780
    },
    {
      "epoch": 4.010787952580141,
      "grad_norm": 0.14823757112026215,
      "learning_rate": 1.9914603691281803e-06,
      "loss": 0.0089,
      "step": 2450800
    },
    {
      "epoch": 4.010820683018794,
      "grad_norm": 0.39697960019111633,
      "learning_rate": 1.991394476914663e-06,
      "loss": 0.013,
      "step": 2450820
    },
    {
      "epoch": 4.010853413457447,
      "grad_norm": 0.6263295412063599,
      "learning_rate": 1.991328584701146e-06,
      "loss": 0.0091,
      "step": 2450840
    },
    {
      "epoch": 4.0108861438961005,
      "grad_norm": 0.11206728219985962,
      "learning_rate": 1.991262692487629e-06,
      "loss": 0.0078,
      "step": 2450860
    },
    {
      "epoch": 4.010918874334754,
      "grad_norm": 0.4415474534034729,
      "learning_rate": 1.9911968002741116e-06,
      "loss": 0.0095,
      "step": 2450880
    },
    {
      "epoch": 4.010951604773407,
      "grad_norm": 0.28077778220176697,
      "learning_rate": 1.991130908060595e-06,
      "loss": 0.0091,
      "step": 2450900
    },
    {
      "epoch": 4.01098433521206,
      "grad_norm": 0.6389549374580383,
      "learning_rate": 1.9910650158470775e-06,
      "loss": 0.0085,
      "step": 2450920
    },
    {
      "epoch": 4.011017065650714,
      "grad_norm": 0.2993335425853729,
      "learning_rate": 1.9909991236335603e-06,
      "loss": 0.0082,
      "step": 2450940
    },
    {
      "epoch": 4.011049796089368,
      "grad_norm": 0.269786536693573,
      "learning_rate": 1.9909332314200434e-06,
      "loss": 0.0094,
      "step": 2450960
    },
    {
      "epoch": 4.01108252652802,
      "grad_norm": 0.11371960490942001,
      "learning_rate": 1.990867339206526e-06,
      "loss": 0.0119,
      "step": 2450980
    },
    {
      "epoch": 4.011115256966674,
      "grad_norm": 0.4091716706752777,
      "learning_rate": 1.990801446993009e-06,
      "loss": 0.0135,
      "step": 2451000
    },
    {
      "epoch": 4.0111479874053275,
      "grad_norm": 0.15815016627311707,
      "learning_rate": 1.9907355547794917e-06,
      "loss": 0.0123,
      "step": 2451020
    },
    {
      "epoch": 4.01118071784398,
      "grad_norm": 0.6166518330574036,
      "learning_rate": 1.990669662565975e-06,
      "loss": 0.0109,
      "step": 2451040
    },
    {
      "epoch": 4.011213448282634,
      "grad_norm": 0.8737619519233704,
      "learning_rate": 1.9906037703524576e-06,
      "loss": 0.0111,
      "step": 2451060
    },
    {
      "epoch": 4.011246178721287,
      "grad_norm": 0.2520962357521057,
      "learning_rate": 1.9905378781389407e-06,
      "loss": 0.0111,
      "step": 2451080
    },
    {
      "epoch": 4.01127890915994,
      "grad_norm": 0.15823869407176971,
      "learning_rate": 1.9904719859254235e-06,
      "loss": 0.0059,
      "step": 2451100
    },
    {
      "epoch": 4.011311639598594,
      "grad_norm": 0.26250264048576355,
      "learning_rate": 1.9904060937119062e-06,
      "loss": 0.011,
      "step": 2451120
    },
    {
      "epoch": 4.011344370037247,
      "grad_norm": 0.3874812722206116,
      "learning_rate": 1.990340201498389e-06,
      "loss": 0.0092,
      "step": 2451140
    },
    {
      "epoch": 4.011377100475901,
      "grad_norm": 0.417121022939682,
      "learning_rate": 1.990274309284872e-06,
      "loss": 0.0102,
      "step": 2451160
    },
    {
      "epoch": 4.011409830914554,
      "grad_norm": 0.35372480750083923,
      "learning_rate": 1.990208417071355e-06,
      "loss": 0.0078,
      "step": 2451180
    },
    {
      "epoch": 4.011442561353207,
      "grad_norm": 0.24466831982135773,
      "learning_rate": 1.9901425248578376e-06,
      "loss": 0.0111,
      "step": 2451200
    },
    {
      "epoch": 4.011475291791861,
      "grad_norm": 0.18971875309944153,
      "learning_rate": 1.9900766326443203e-06,
      "loss": 0.0129,
      "step": 2451220
    },
    {
      "epoch": 4.0115080222305135,
      "grad_norm": 0.034340061247348785,
      "learning_rate": 1.9900107404308035e-06,
      "loss": 0.0077,
      "step": 2451240
    },
    {
      "epoch": 4.011540752669167,
      "grad_norm": 0.2792574465274811,
      "learning_rate": 1.9899448482172867e-06,
      "loss": 0.012,
      "step": 2451260
    },
    {
      "epoch": 4.011573483107821,
      "grad_norm": 0.2180120050907135,
      "learning_rate": 1.9898789560037694e-06,
      "loss": 0.0083,
      "step": 2451280
    },
    {
      "epoch": 4.011606213546474,
      "grad_norm": 0.3565017580986023,
      "learning_rate": 1.989813063790252e-06,
      "loss": 0.0064,
      "step": 2451300
    },
    {
      "epoch": 4.011638943985127,
      "grad_norm": 0.15345098078250885,
      "learning_rate": 1.989747171576735e-06,
      "loss": 0.0077,
      "step": 2451320
    },
    {
      "epoch": 4.011671674423781,
      "grad_norm": 0.16192308068275452,
      "learning_rate": 1.9896812793632176e-06,
      "loss": 0.0069,
      "step": 2451340
    },
    {
      "epoch": 4.011704404862434,
      "grad_norm": 0.24217581748962402,
      "learning_rate": 1.9896153871497008e-06,
      "loss": 0.0074,
      "step": 2451360
    },
    {
      "epoch": 4.011737135301087,
      "grad_norm": 0.30215126276016235,
      "learning_rate": 1.9895494949361835e-06,
      "loss": 0.0148,
      "step": 2451380
    },
    {
      "epoch": 4.0117698657397405,
      "grad_norm": 0.2335321009159088,
      "learning_rate": 1.9894836027226663e-06,
      "loss": 0.0072,
      "step": 2451400
    },
    {
      "epoch": 4.011802596178394,
      "grad_norm": 0.2567679286003113,
      "learning_rate": 1.989417710509149e-06,
      "loss": 0.0104,
      "step": 2451420
    },
    {
      "epoch": 4.011835326617048,
      "grad_norm": 0.1842532455921173,
      "learning_rate": 1.989351818295632e-06,
      "loss": 0.0092,
      "step": 2451440
    },
    {
      "epoch": 4.0118680570557,
      "grad_norm": 0.43321487307548523,
      "learning_rate": 1.9892859260821153e-06,
      "loss": 0.0087,
      "step": 2451460
    },
    {
      "epoch": 4.011900787494354,
      "grad_norm": 0.17574447393417358,
      "learning_rate": 1.989220033868598e-06,
      "loss": 0.0068,
      "step": 2451480
    },
    {
      "epoch": 4.011933517933008,
      "grad_norm": 0.3117317259311676,
      "learning_rate": 1.989154141655081e-06,
      "loss": 0.0131,
      "step": 2451500
    },
    {
      "epoch": 4.01196624837166,
      "grad_norm": 0.8132809996604919,
      "learning_rate": 1.9890882494415635e-06,
      "loss": 0.0099,
      "step": 2451520
    },
    {
      "epoch": 4.011998978810314,
      "grad_norm": 0.16217684745788574,
      "learning_rate": 1.9890223572280463e-06,
      "loss": 0.0093,
      "step": 2451540
    },
    {
      "epoch": 4.0120317092489675,
      "grad_norm": 0.16376057267189026,
      "learning_rate": 1.9889564650145295e-06,
      "loss": 0.0087,
      "step": 2451560
    },
    {
      "epoch": 4.012064439687621,
      "grad_norm": 0.38745200634002686,
      "learning_rate": 1.988890572801012e-06,
      "loss": 0.0073,
      "step": 2451580
    },
    {
      "epoch": 4.012097170126274,
      "grad_norm": 0.1658165454864502,
      "learning_rate": 1.988824680587495e-06,
      "loss": 0.0078,
      "step": 2451600
    },
    {
      "epoch": 4.012129900564927,
      "grad_norm": 0.06012779846787453,
      "learning_rate": 1.988758788373978e-06,
      "loss": 0.0082,
      "step": 2451620
    },
    {
      "epoch": 4.012162631003581,
      "grad_norm": 0.1499224752187729,
      "learning_rate": 1.988692896160461e-06,
      "loss": 0.01,
      "step": 2451640
    },
    {
      "epoch": 4.012195361442234,
      "grad_norm": 0.5868371725082397,
      "learning_rate": 1.988627003946944e-06,
      "loss": 0.0093,
      "step": 2451660
    },
    {
      "epoch": 4.012228091880887,
      "grad_norm": 0.30973193049430847,
      "learning_rate": 1.9885611117334267e-06,
      "loss": 0.0112,
      "step": 2451680
    },
    {
      "epoch": 4.012260822319541,
      "grad_norm": 0.08339831233024597,
      "learning_rate": 1.9884952195199095e-06,
      "loss": 0.0131,
      "step": 2451700
    },
    {
      "epoch": 4.012293552758194,
      "grad_norm": 0.5216249823570251,
      "learning_rate": 1.9884293273063922e-06,
      "loss": 0.0093,
      "step": 2451720
    },
    {
      "epoch": 4.012326283196847,
      "grad_norm": 0.08459430187940598,
      "learning_rate": 1.988363435092875e-06,
      "loss": 0.0076,
      "step": 2451740
    },
    {
      "epoch": 4.012359013635501,
      "grad_norm": 0.5510620474815369,
      "learning_rate": 1.988297542879358e-06,
      "loss": 0.0119,
      "step": 2451760
    },
    {
      "epoch": 4.012391744074154,
      "grad_norm": 0.1216774508357048,
      "learning_rate": 1.9882316506658413e-06,
      "loss": 0.0067,
      "step": 2451780
    },
    {
      "epoch": 4.012424474512807,
      "grad_norm": 0.13990908861160278,
      "learning_rate": 1.988165758452324e-06,
      "loss": 0.0093,
      "step": 2451800
    },
    {
      "epoch": 4.012457204951461,
      "grad_norm": 0.12057159096002579,
      "learning_rate": 1.9880998662388068e-06,
      "loss": 0.0076,
      "step": 2451820
    },
    {
      "epoch": 4.012489935390114,
      "grad_norm": 0.20587058365345,
      "learning_rate": 1.9880339740252895e-06,
      "loss": 0.0079,
      "step": 2451840
    },
    {
      "epoch": 4.012522665828768,
      "grad_norm": 0.3348354399204254,
      "learning_rate": 1.9879680818117727e-06,
      "loss": 0.0086,
      "step": 2451860
    },
    {
      "epoch": 4.0125553962674205,
      "grad_norm": 0.2235981822013855,
      "learning_rate": 1.9879021895982554e-06,
      "loss": 0.007,
      "step": 2451880
    },
    {
      "epoch": 4.012588126706074,
      "grad_norm": 0.33004313707351685,
      "learning_rate": 1.987836297384738e-06,
      "loss": 0.0079,
      "step": 2451900
    },
    {
      "epoch": 4.012620857144728,
      "grad_norm": 0.46028241515159607,
      "learning_rate": 1.987770405171221e-06,
      "loss": 0.0053,
      "step": 2451920
    },
    {
      "epoch": 4.01265358758338,
      "grad_norm": 0.5364697575569153,
      "learning_rate": 1.9877045129577036e-06,
      "loss": 0.0149,
      "step": 2451940
    },
    {
      "epoch": 4.012686318022034,
      "grad_norm": 0.12531863152980804,
      "learning_rate": 1.987638620744187e-06,
      "loss": 0.0048,
      "step": 2451960
    },
    {
      "epoch": 4.012719048460688,
      "grad_norm": 0.28225812315940857,
      "learning_rate": 1.98757272853067e-06,
      "loss": 0.01,
      "step": 2451980
    },
    {
      "epoch": 4.012751778899341,
      "grad_norm": 0.18895377218723297,
      "learning_rate": 1.9875068363171527e-06,
      "loss": 0.0095,
      "step": 2452000
    },
    {
      "epoch": 4.012784509337994,
      "grad_norm": 0.2990952134132385,
      "learning_rate": 1.9874409441036354e-06,
      "loss": 0.0059,
      "step": 2452020
    },
    {
      "epoch": 4.0128172397766475,
      "grad_norm": 0.20151589810848236,
      "learning_rate": 1.987375051890118e-06,
      "loss": 0.0106,
      "step": 2452040
    },
    {
      "epoch": 4.012849970215301,
      "grad_norm": 0.43765750527381897,
      "learning_rate": 1.9873091596766013e-06,
      "loss": 0.0097,
      "step": 2452060
    },
    {
      "epoch": 4.012882700653954,
      "grad_norm": 0.22632566094398499,
      "learning_rate": 1.987243267463084e-06,
      "loss": 0.0117,
      "step": 2452080
    },
    {
      "epoch": 4.012915431092607,
      "grad_norm": 0.5108802318572998,
      "learning_rate": 1.987177375249567e-06,
      "loss": 0.0073,
      "step": 2452100
    },
    {
      "epoch": 4.012948161531261,
      "grad_norm": 0.19734971225261688,
      "learning_rate": 1.9871114830360496e-06,
      "loss": 0.016,
      "step": 2452120
    },
    {
      "epoch": 4.012980891969915,
      "grad_norm": 0.21427038311958313,
      "learning_rate": 1.9870455908225327e-06,
      "loss": 0.0062,
      "step": 2452140
    },
    {
      "epoch": 4.013013622408567,
      "grad_norm": 0.12153905630111694,
      "learning_rate": 1.9869796986090155e-06,
      "loss": 0.0125,
      "step": 2452160
    },
    {
      "epoch": 4.013046352847221,
      "grad_norm": 0.23229849338531494,
      "learning_rate": 1.9869138063954986e-06,
      "loss": 0.0084,
      "step": 2452180
    },
    {
      "epoch": 4.0130790832858745,
      "grad_norm": 0.07234112918376923,
      "learning_rate": 1.9868479141819814e-06,
      "loss": 0.0078,
      "step": 2452200
    },
    {
      "epoch": 4.013111813724527,
      "grad_norm": 0.1876266747713089,
      "learning_rate": 1.986782021968464e-06,
      "loss": 0.0092,
      "step": 2452220
    },
    {
      "epoch": 4.013144544163181,
      "grad_norm": 0.16729801893234253,
      "learning_rate": 1.986716129754947e-06,
      "loss": 0.0062,
      "step": 2452240
    },
    {
      "epoch": 4.013177274601834,
      "grad_norm": 0.07399491965770721,
      "learning_rate": 1.98665023754143e-06,
      "loss": 0.0082,
      "step": 2452260
    },
    {
      "epoch": 4.013210005040488,
      "grad_norm": 0.05569644272327423,
      "learning_rate": 1.9865843453279127e-06,
      "loss": 0.0079,
      "step": 2452280
    },
    {
      "epoch": 4.013242735479141,
      "grad_norm": 0.255644828081131,
      "learning_rate": 1.9865184531143955e-06,
      "loss": 0.0087,
      "step": 2452300
    },
    {
      "epoch": 4.013275465917794,
      "grad_norm": 0.11121693253517151,
      "learning_rate": 1.9864525609008786e-06,
      "loss": 0.0076,
      "step": 2452320
    },
    {
      "epoch": 4.013308196356448,
      "grad_norm": 0.2703632414340973,
      "learning_rate": 1.9863866686873614e-06,
      "loss": 0.0068,
      "step": 2452340
    },
    {
      "epoch": 4.013340926795101,
      "grad_norm": 0.11476828902959824,
      "learning_rate": 1.986320776473844e-06,
      "loss": 0.0089,
      "step": 2452360
    },
    {
      "epoch": 4.013373657233754,
      "grad_norm": 0.5111899971961975,
      "learning_rate": 1.9862548842603273e-06,
      "loss": 0.0114,
      "step": 2452380
    },
    {
      "epoch": 4.013406387672408,
      "grad_norm": 0.13561449944972992,
      "learning_rate": 1.98618899204681e-06,
      "loss": 0.0104,
      "step": 2452400
    },
    {
      "epoch": 4.013439118111061,
      "grad_norm": 0.11239027231931686,
      "learning_rate": 1.9861230998332928e-06,
      "loss": 0.0114,
      "step": 2452420
    },
    {
      "epoch": 4.013471848549714,
      "grad_norm": 0.11958302557468414,
      "learning_rate": 1.9860572076197755e-06,
      "loss": 0.0136,
      "step": 2452440
    },
    {
      "epoch": 4.013504578988368,
      "grad_norm": 0.5364336967468262,
      "learning_rate": 1.9859913154062587e-06,
      "loss": 0.0083,
      "step": 2452460
    },
    {
      "epoch": 4.013537309427021,
      "grad_norm": 0.21765431761741638,
      "learning_rate": 1.9859254231927414e-06,
      "loss": 0.0112,
      "step": 2452480
    },
    {
      "epoch": 4.013570039865674,
      "grad_norm": 0.31214046478271484,
      "learning_rate": 1.9858595309792246e-06,
      "loss": 0.0133,
      "step": 2452500
    },
    {
      "epoch": 4.0136027703043275,
      "grad_norm": 0.3763395845890045,
      "learning_rate": 1.9857936387657073e-06,
      "loss": 0.0076,
      "step": 2452520
    },
    {
      "epoch": 4.013635500742981,
      "grad_norm": 0.28502002358436584,
      "learning_rate": 1.98572774655219e-06,
      "loss": 0.013,
      "step": 2452540
    },
    {
      "epoch": 4.013668231181635,
      "grad_norm": 0.2643261253833771,
      "learning_rate": 1.985661854338673e-06,
      "loss": 0.0091,
      "step": 2452560
    },
    {
      "epoch": 4.013700961620287,
      "grad_norm": 0.5916217565536499,
      "learning_rate": 1.985595962125156e-06,
      "loss": 0.0072,
      "step": 2452580
    },
    {
      "epoch": 4.013733692058941,
      "grad_norm": 0.10067286342382431,
      "learning_rate": 1.9855300699116387e-06,
      "loss": 0.0088,
      "step": 2452600
    },
    {
      "epoch": 4.013766422497595,
      "grad_norm": 0.30872464179992676,
      "learning_rate": 1.9854641776981214e-06,
      "loss": 0.0121,
      "step": 2452620
    },
    {
      "epoch": 4.013799152936247,
      "grad_norm": 0.24002870917320251,
      "learning_rate": 1.985398285484604e-06,
      "loss": 0.0061,
      "step": 2452640
    },
    {
      "epoch": 4.013831883374901,
      "grad_norm": 0.25097987055778503,
      "learning_rate": 1.9853323932710873e-06,
      "loss": 0.0107,
      "step": 2452660
    },
    {
      "epoch": 4.0138646138135545,
      "grad_norm": 0.14598779380321503,
      "learning_rate": 1.9852665010575705e-06,
      "loss": 0.0073,
      "step": 2452680
    },
    {
      "epoch": 4.013897344252207,
      "grad_norm": 0.3941625654697418,
      "learning_rate": 1.9852006088440532e-06,
      "loss": 0.0089,
      "step": 2452700
    },
    {
      "epoch": 4.013930074690861,
      "grad_norm": 0.09115058183670044,
      "learning_rate": 1.985134716630536e-06,
      "loss": 0.0085,
      "step": 2452720
    },
    {
      "epoch": 4.013962805129514,
      "grad_norm": 0.23679254949092865,
      "learning_rate": 1.9850688244170187e-06,
      "loss": 0.0062,
      "step": 2452740
    },
    {
      "epoch": 4.013995535568168,
      "grad_norm": 0.2697768807411194,
      "learning_rate": 1.9850029322035015e-06,
      "loss": 0.0072,
      "step": 2452760
    },
    {
      "epoch": 4.014028266006821,
      "grad_norm": 0.3393287658691406,
      "learning_rate": 1.9849370399899846e-06,
      "loss": 0.0097,
      "step": 2452780
    },
    {
      "epoch": 4.014060996445474,
      "grad_norm": 0.1380181610584259,
      "learning_rate": 1.9848711477764674e-06,
      "loss": 0.0117,
      "step": 2452800
    },
    {
      "epoch": 4.014093726884128,
      "grad_norm": 0.10120023787021637,
      "learning_rate": 1.98480525556295e-06,
      "loss": 0.0079,
      "step": 2452820
    },
    {
      "epoch": 4.014126457322781,
      "grad_norm": 0.0838446319103241,
      "learning_rate": 1.9847393633494333e-06,
      "loss": 0.0099,
      "step": 2452840
    },
    {
      "epoch": 4.014159187761434,
      "grad_norm": 0.11002141982316971,
      "learning_rate": 1.984673471135916e-06,
      "loss": 0.0118,
      "step": 2452860
    },
    {
      "epoch": 4.014191918200088,
      "grad_norm": 0.2131020873785019,
      "learning_rate": 1.984607578922399e-06,
      "loss": 0.0096,
      "step": 2452880
    },
    {
      "epoch": 4.014224648638741,
      "grad_norm": 0.08342532813549042,
      "learning_rate": 1.984541686708882e-06,
      "loss": 0.0052,
      "step": 2452900
    },
    {
      "epoch": 4.014257379077394,
      "grad_norm": 0.1030159518122673,
      "learning_rate": 1.9844757944953646e-06,
      "loss": 0.0118,
      "step": 2452920
    },
    {
      "epoch": 4.014290109516048,
      "grad_norm": 0.2150011509656906,
      "learning_rate": 1.9844099022818474e-06,
      "loss": 0.0076,
      "step": 2452940
    },
    {
      "epoch": 4.014322839954701,
      "grad_norm": 0.34566381573677063,
      "learning_rate": 1.98434401006833e-06,
      "loss": 0.0109,
      "step": 2452960
    },
    {
      "epoch": 4.014355570393354,
      "grad_norm": 0.22906753420829773,
      "learning_rate": 1.9842781178548133e-06,
      "loss": 0.0109,
      "step": 2452980
    },
    {
      "epoch": 4.014388300832008,
      "grad_norm": 0.2597680985927582,
      "learning_rate": 1.984212225641296e-06,
      "loss": 0.0099,
      "step": 2453000
    },
    {
      "epoch": 4.014421031270661,
      "grad_norm": 0.42860203981399536,
      "learning_rate": 1.984146333427779e-06,
      "loss": 0.0067,
      "step": 2453020
    },
    {
      "epoch": 4.014453761709315,
      "grad_norm": 0.6808133721351624,
      "learning_rate": 1.984080441214262e-06,
      "loss": 0.0095,
      "step": 2453040
    },
    {
      "epoch": 4.0144864921479675,
      "grad_norm": 0.1297769546508789,
      "learning_rate": 1.9840145490007447e-06,
      "loss": 0.0098,
      "step": 2453060
    },
    {
      "epoch": 4.014519222586621,
      "grad_norm": 0.24056048691272736,
      "learning_rate": 1.983948656787228e-06,
      "loss": 0.007,
      "step": 2453080
    },
    {
      "epoch": 4.014551953025275,
      "grad_norm": 0.43627920746803284,
      "learning_rate": 1.9838827645737106e-06,
      "loss": 0.0078,
      "step": 2453100
    },
    {
      "epoch": 4.014584683463927,
      "grad_norm": 0.21492764353752136,
      "learning_rate": 1.9838168723601933e-06,
      "loss": 0.0067,
      "step": 2453120
    },
    {
      "epoch": 4.014617413902581,
      "grad_norm": 1.1877459287643433,
      "learning_rate": 1.983750980146676e-06,
      "loss": 0.0109,
      "step": 2453140
    },
    {
      "epoch": 4.014650144341235,
      "grad_norm": 0.1488259881734848,
      "learning_rate": 1.983685087933159e-06,
      "loss": 0.0088,
      "step": 2453160
    },
    {
      "epoch": 4.014682874779888,
      "grad_norm": 0.2867007851600647,
      "learning_rate": 1.983619195719642e-06,
      "loss": 0.0071,
      "step": 2453180
    },
    {
      "epoch": 4.014715605218541,
      "grad_norm": 0.04771077260375023,
      "learning_rate": 1.983553303506125e-06,
      "loss": 0.0047,
      "step": 2453200
    },
    {
      "epoch": 4.0147483356571945,
      "grad_norm": 0.19665618240833282,
      "learning_rate": 1.983487411292608e-06,
      "loss": 0.0086,
      "step": 2453220
    },
    {
      "epoch": 4.014781066095848,
      "grad_norm": 0.3832237124443054,
      "learning_rate": 1.9834215190790906e-06,
      "loss": 0.0119,
      "step": 2453240
    },
    {
      "epoch": 4.014813796534501,
      "grad_norm": 0.2563903331756592,
      "learning_rate": 1.9833556268655733e-06,
      "loss": 0.0098,
      "step": 2453260
    },
    {
      "epoch": 4.014846526973154,
      "grad_norm": 0.4119521975517273,
      "learning_rate": 1.9832897346520565e-06,
      "loss": 0.0118,
      "step": 2453280
    },
    {
      "epoch": 4.014879257411808,
      "grad_norm": 0.2232334166765213,
      "learning_rate": 1.9832238424385392e-06,
      "loss": 0.0117,
      "step": 2453300
    },
    {
      "epoch": 4.0149119878504615,
      "grad_norm": 0.08785048872232437,
      "learning_rate": 1.983157950225022e-06,
      "loss": 0.0088,
      "step": 2453320
    },
    {
      "epoch": 4.014944718289114,
      "grad_norm": 0.1734892874956131,
      "learning_rate": 1.9830920580115047e-06,
      "loss": 0.008,
      "step": 2453340
    },
    {
      "epoch": 4.014977448727768,
      "grad_norm": 0.16803596913814545,
      "learning_rate": 1.983026165797988e-06,
      "loss": 0.0069,
      "step": 2453360
    },
    {
      "epoch": 4.0150101791664214,
      "grad_norm": 0.1791483461856842,
      "learning_rate": 1.9829602735844706e-06,
      "loss": 0.0103,
      "step": 2453380
    },
    {
      "epoch": 4.015042909605074,
      "grad_norm": 0.45368725061416626,
      "learning_rate": 1.982894381370954e-06,
      "loss": 0.0101,
      "step": 2453400
    },
    {
      "epoch": 4.015075640043728,
      "grad_norm": 0.3015972971916199,
      "learning_rate": 1.9828284891574365e-06,
      "loss": 0.0088,
      "step": 2453420
    },
    {
      "epoch": 4.015108370482381,
      "grad_norm": 0.5842041373252869,
      "learning_rate": 1.9827625969439193e-06,
      "loss": 0.0072,
      "step": 2453440
    },
    {
      "epoch": 4.015141100921035,
      "grad_norm": 0.42740851640701294,
      "learning_rate": 1.982696704730402e-06,
      "loss": 0.0078,
      "step": 2453460
    },
    {
      "epoch": 4.015173831359688,
      "grad_norm": 0.5528463125228882,
      "learning_rate": 1.982630812516885e-06,
      "loss": 0.0113,
      "step": 2453480
    },
    {
      "epoch": 4.015206561798341,
      "grad_norm": 0.15071046352386475,
      "learning_rate": 1.982564920303368e-06,
      "loss": 0.0048,
      "step": 2453500
    },
    {
      "epoch": 4.015239292236995,
      "grad_norm": 0.14806917309761047,
      "learning_rate": 1.9824990280898507e-06,
      "loss": 0.0068,
      "step": 2453520
    },
    {
      "epoch": 4.0152720226756475,
      "grad_norm": 0.23541881144046783,
      "learning_rate": 1.982433135876334e-06,
      "loss": 0.0101,
      "step": 2453540
    },
    {
      "epoch": 4.015304753114301,
      "grad_norm": 0.09323496371507645,
      "learning_rate": 1.9823672436628166e-06,
      "loss": 0.0089,
      "step": 2453560
    },
    {
      "epoch": 4.015337483552955,
      "grad_norm": 0.12773512303829193,
      "learning_rate": 1.9823013514492993e-06,
      "loss": 0.0068,
      "step": 2453580
    },
    {
      "epoch": 4.015370213991608,
      "grad_norm": 0.5410537719726562,
      "learning_rate": 1.9822354592357825e-06,
      "loss": 0.0084,
      "step": 2453600
    },
    {
      "epoch": 4.015402944430261,
      "grad_norm": 0.24880337715148926,
      "learning_rate": 1.982169567022265e-06,
      "loss": 0.0136,
      "step": 2453620
    },
    {
      "epoch": 4.015435674868915,
      "grad_norm": 0.4354083836078644,
      "learning_rate": 1.982103674808748e-06,
      "loss": 0.0125,
      "step": 2453640
    },
    {
      "epoch": 4.015468405307568,
      "grad_norm": 0.14835011959075928,
      "learning_rate": 1.9820377825952307e-06,
      "loss": 0.0164,
      "step": 2453660
    },
    {
      "epoch": 4.015501135746221,
      "grad_norm": 0.3718385696411133,
      "learning_rate": 1.981971890381714e-06,
      "loss": 0.0074,
      "step": 2453680
    },
    {
      "epoch": 4.0155338661848745,
      "grad_norm": 0.33999380469322205,
      "learning_rate": 1.9819059981681966e-06,
      "loss": 0.0103,
      "step": 2453700
    },
    {
      "epoch": 4.015566596623528,
      "grad_norm": 0.08208733052015305,
      "learning_rate": 1.9818401059546797e-06,
      "loss": 0.0069,
      "step": 2453720
    },
    {
      "epoch": 4.015599327062182,
      "grad_norm": 0.27474987506866455,
      "learning_rate": 1.9817742137411625e-06,
      "loss": 0.0071,
      "step": 2453740
    },
    {
      "epoch": 4.015632057500834,
      "grad_norm": 0.23262324929237366,
      "learning_rate": 1.9817083215276452e-06,
      "loss": 0.0067,
      "step": 2453760
    },
    {
      "epoch": 4.015664787939488,
      "grad_norm": 0.05622962862253189,
      "learning_rate": 1.981642429314128e-06,
      "loss": 0.0062,
      "step": 2453780
    },
    {
      "epoch": 4.015697518378142,
      "grad_norm": 0.4615531265735626,
      "learning_rate": 1.981576537100611e-06,
      "loss": 0.0091,
      "step": 2453800
    },
    {
      "epoch": 4.015730248816794,
      "grad_norm": 0.2795833945274353,
      "learning_rate": 1.981510644887094e-06,
      "loss": 0.0121,
      "step": 2453820
    },
    {
      "epoch": 4.015762979255448,
      "grad_norm": 0.14659108221530914,
      "learning_rate": 1.9814447526735766e-06,
      "loss": 0.0073,
      "step": 2453840
    },
    {
      "epoch": 4.0157957096941015,
      "grad_norm": 0.4122163653373718,
      "learning_rate": 1.9813788604600593e-06,
      "loss": 0.0109,
      "step": 2453860
    },
    {
      "epoch": 4.015828440132755,
      "grad_norm": 0.15480516850948334,
      "learning_rate": 1.9813129682465425e-06,
      "loss": 0.0097,
      "step": 2453880
    },
    {
      "epoch": 4.015861170571408,
      "grad_norm": 0.08266346901655197,
      "learning_rate": 1.9812470760330257e-06,
      "loss": 0.0112,
      "step": 2453900
    },
    {
      "epoch": 4.015893901010061,
      "grad_norm": 0.16700170934200287,
      "learning_rate": 1.9811811838195084e-06,
      "loss": 0.0066,
      "step": 2453920
    },
    {
      "epoch": 4.015926631448715,
      "grad_norm": 0.2367163449525833,
      "learning_rate": 1.981115291605991e-06,
      "loss": 0.0074,
      "step": 2453940
    },
    {
      "epoch": 4.015959361887368,
      "grad_norm": 0.3332424759864807,
      "learning_rate": 1.981049399392474e-06,
      "loss": 0.0095,
      "step": 2453960
    },
    {
      "epoch": 4.015992092326021,
      "grad_norm": 0.09324804693460464,
      "learning_rate": 1.9809835071789566e-06,
      "loss": 0.0099,
      "step": 2453980
    },
    {
      "epoch": 4.016024822764675,
      "grad_norm": 0.24389217793941498,
      "learning_rate": 1.98091761496544e-06,
      "loss": 0.009,
      "step": 2454000
    },
    {
      "epoch": 4.0160575532033285,
      "grad_norm": 0.10903652012348175,
      "learning_rate": 1.9808517227519225e-06,
      "loss": 0.0071,
      "step": 2454020
    },
    {
      "epoch": 4.016090283641981,
      "grad_norm": 0.6354357004165649,
      "learning_rate": 1.9807858305384053e-06,
      "loss": 0.0138,
      "step": 2454040
    },
    {
      "epoch": 4.016123014080635,
      "grad_norm": 0.09134242683649063,
      "learning_rate": 1.980719938324888e-06,
      "loss": 0.0079,
      "step": 2454060
    },
    {
      "epoch": 4.016155744519288,
      "grad_norm": 0.20059889554977417,
      "learning_rate": 1.980654046111371e-06,
      "loss": 0.0095,
      "step": 2454080
    },
    {
      "epoch": 4.016188474957941,
      "grad_norm": 0.07758799940347672,
      "learning_rate": 1.9805881538978543e-06,
      "loss": 0.014,
      "step": 2454100
    },
    {
      "epoch": 4.016221205396595,
      "grad_norm": 0.35987135767936707,
      "learning_rate": 1.980522261684337e-06,
      "loss": 0.0086,
      "step": 2454120
    },
    {
      "epoch": 4.016253935835248,
      "grad_norm": 0.1682361662387848,
      "learning_rate": 1.98045636947082e-06,
      "loss": 0.0067,
      "step": 2454140
    },
    {
      "epoch": 4.016286666273901,
      "grad_norm": 0.16270048916339874,
      "learning_rate": 1.9803904772573026e-06,
      "loss": 0.0078,
      "step": 2454160
    },
    {
      "epoch": 4.016319396712555,
      "grad_norm": 1.1434754133224487,
      "learning_rate": 1.9803245850437853e-06,
      "loss": 0.0092,
      "step": 2454180
    },
    {
      "epoch": 4.016352127151208,
      "grad_norm": 0.20147639513015747,
      "learning_rate": 1.9802586928302685e-06,
      "loss": 0.0074,
      "step": 2454200
    },
    {
      "epoch": 4.016384857589862,
      "grad_norm": 0.29976722598075867,
      "learning_rate": 1.980192800616751e-06,
      "loss": 0.0077,
      "step": 2454220
    },
    {
      "epoch": 4.0164175880285145,
      "grad_norm": 0.17473985254764557,
      "learning_rate": 1.980126908403234e-06,
      "loss": 0.0072,
      "step": 2454240
    },
    {
      "epoch": 4.016450318467168,
      "grad_norm": 0.23557879030704498,
      "learning_rate": 1.980061016189717e-06,
      "loss": 0.0101,
      "step": 2454260
    },
    {
      "epoch": 4.016483048905822,
      "grad_norm": 0.5534399747848511,
      "learning_rate": 1.9799951239762e-06,
      "loss": 0.0104,
      "step": 2454280
    },
    {
      "epoch": 4.016515779344474,
      "grad_norm": 0.3071949779987335,
      "learning_rate": 1.979929231762683e-06,
      "loss": 0.0079,
      "step": 2454300
    },
    {
      "epoch": 4.016548509783128,
      "grad_norm": 0.1723150908946991,
      "learning_rate": 1.9798633395491657e-06,
      "loss": 0.0072,
      "step": 2454320
    },
    {
      "epoch": 4.0165812402217815,
      "grad_norm": 0.1557648479938507,
      "learning_rate": 1.9797974473356485e-06,
      "loss": 0.0106,
      "step": 2454340
    },
    {
      "epoch": 4.016613970660435,
      "grad_norm": 0.053241878747940063,
      "learning_rate": 1.9797315551221312e-06,
      "loss": 0.0085,
      "step": 2454360
    },
    {
      "epoch": 4.016646701099088,
      "grad_norm": 0.32499590516090393,
      "learning_rate": 1.979665662908614e-06,
      "loss": 0.0072,
      "step": 2454380
    },
    {
      "epoch": 4.016679431537741,
      "grad_norm": 0.09822342544794083,
      "learning_rate": 1.979599770695097e-06,
      "loss": 0.0068,
      "step": 2454400
    },
    {
      "epoch": 4.016712161976395,
      "grad_norm": 0.44662582874298096,
      "learning_rate": 1.9795338784815803e-06,
      "loss": 0.0079,
      "step": 2454420
    },
    {
      "epoch": 4.016744892415048,
      "grad_norm": 0.33030664920806885,
      "learning_rate": 1.979467986268063e-06,
      "loss": 0.0097,
      "step": 2454440
    },
    {
      "epoch": 4.016777622853701,
      "grad_norm": 0.14750240743160248,
      "learning_rate": 1.9794020940545458e-06,
      "loss": 0.0092,
      "step": 2454460
    },
    {
      "epoch": 4.016810353292355,
      "grad_norm": 0.10686878114938736,
      "learning_rate": 1.9793362018410285e-06,
      "loss": 0.0089,
      "step": 2454480
    },
    {
      "epoch": 4.0168430837310085,
      "grad_norm": 0.42202720046043396,
      "learning_rate": 1.9792703096275117e-06,
      "loss": 0.0091,
      "step": 2454500
    },
    {
      "epoch": 4.016875814169661,
      "grad_norm": 0.42282044887542725,
      "learning_rate": 1.9792044174139944e-06,
      "loss": 0.0083,
      "step": 2454520
    },
    {
      "epoch": 4.016908544608315,
      "grad_norm": 0.15408426523208618,
      "learning_rate": 1.979138525200477e-06,
      "loss": 0.0096,
      "step": 2454540
    },
    {
      "epoch": 4.016941275046968,
      "grad_norm": 0.47291409969329834,
      "learning_rate": 1.97907263298696e-06,
      "loss": 0.014,
      "step": 2454560
    },
    {
      "epoch": 4.016974005485621,
      "grad_norm": 0.39227738976478577,
      "learning_rate": 1.9790067407734426e-06,
      "loss": 0.0095,
      "step": 2454580
    },
    {
      "epoch": 4.017006735924275,
      "grad_norm": 0.40975117683410645,
      "learning_rate": 1.978940848559926e-06,
      "loss": 0.0092,
      "step": 2454600
    },
    {
      "epoch": 4.017039466362928,
      "grad_norm": 0.2695058286190033,
      "learning_rate": 1.978874956346409e-06,
      "loss": 0.0105,
      "step": 2454620
    },
    {
      "epoch": 4.017072196801582,
      "grad_norm": 0.25509968400001526,
      "learning_rate": 1.9788090641328917e-06,
      "loss": 0.0093,
      "step": 2454640
    },
    {
      "epoch": 4.017104927240235,
      "grad_norm": 0.3059737980365753,
      "learning_rate": 1.9787431719193744e-06,
      "loss": 0.0117,
      "step": 2454660
    },
    {
      "epoch": 4.017137657678888,
      "grad_norm": 0.1737862229347229,
      "learning_rate": 1.978677279705857e-06,
      "loss": 0.0098,
      "step": 2454680
    },
    {
      "epoch": 4.017170388117542,
      "grad_norm": 1.2977283000946045,
      "learning_rate": 1.9786113874923403e-06,
      "loss": 0.0117,
      "step": 2454700
    },
    {
      "epoch": 4.0172031185561945,
      "grad_norm": 0.4534766972064972,
      "learning_rate": 1.978545495278823e-06,
      "loss": 0.0093,
      "step": 2454720
    },
    {
      "epoch": 4.017235848994848,
      "grad_norm": 0.11960161477327347,
      "learning_rate": 1.978479603065306e-06,
      "loss": 0.0086,
      "step": 2454740
    },
    {
      "epoch": 4.017268579433502,
      "grad_norm": 0.11330565810203552,
      "learning_rate": 1.9784137108517886e-06,
      "loss": 0.0082,
      "step": 2454760
    },
    {
      "epoch": 4.017301309872155,
      "grad_norm": 0.10981199890375137,
      "learning_rate": 1.9783478186382717e-06,
      "loss": 0.0082,
      "step": 2454780
    },
    {
      "epoch": 4.017334040310808,
      "grad_norm": 0.07946164160966873,
      "learning_rate": 1.9782819264247545e-06,
      "loss": 0.0152,
      "step": 2454800
    },
    {
      "epoch": 4.017366770749462,
      "grad_norm": 0.07237984985113144,
      "learning_rate": 1.9782160342112376e-06,
      "loss": 0.0064,
      "step": 2454820
    },
    {
      "epoch": 4.017399501188115,
      "grad_norm": 0.10521283000707626,
      "learning_rate": 1.9781501419977204e-06,
      "loss": 0.0093,
      "step": 2454840
    },
    {
      "epoch": 4.017432231626768,
      "grad_norm": 0.1966942399740219,
      "learning_rate": 1.978084249784203e-06,
      "loss": 0.0104,
      "step": 2454860
    },
    {
      "epoch": 4.0174649620654215,
      "grad_norm": 0.12946052849292755,
      "learning_rate": 1.978018357570686e-06,
      "loss": 0.0087,
      "step": 2454880
    },
    {
      "epoch": 4.017497692504075,
      "grad_norm": 0.31110092997550964,
      "learning_rate": 1.977952465357169e-06,
      "loss": 0.0118,
      "step": 2454900
    },
    {
      "epoch": 4.017530422942729,
      "grad_norm": 0.04231394827365875,
      "learning_rate": 1.9778865731436518e-06,
      "loss": 0.0073,
      "step": 2454920
    },
    {
      "epoch": 4.017563153381381,
      "grad_norm": 0.10588450729846954,
      "learning_rate": 1.9778206809301345e-06,
      "loss": 0.0085,
      "step": 2454940
    },
    {
      "epoch": 4.017595883820035,
      "grad_norm": 0.3556689918041229,
      "learning_rate": 1.9777547887166177e-06,
      "loss": 0.0081,
      "step": 2454960
    },
    {
      "epoch": 4.017628614258689,
      "grad_norm": 0.5512582659721375,
      "learning_rate": 1.9776888965031004e-06,
      "loss": 0.0084,
      "step": 2454980
    },
    {
      "epoch": 4.017661344697341,
      "grad_norm": 0.3220093548297882,
      "learning_rate": 1.977623004289583e-06,
      "loss": 0.0061,
      "step": 2455000
    },
    {
      "epoch": 4.017694075135995,
      "grad_norm": 0.1668078750371933,
      "learning_rate": 1.9775571120760663e-06,
      "loss": 0.014,
      "step": 2455020
    },
    {
      "epoch": 4.0177268055746485,
      "grad_norm": 0.1342494934797287,
      "learning_rate": 1.977491219862549e-06,
      "loss": 0.0082,
      "step": 2455040
    },
    {
      "epoch": 4.017759536013302,
      "grad_norm": 0.2923148274421692,
      "learning_rate": 1.9774253276490318e-06,
      "loss": 0.0092,
      "step": 2455060
    },
    {
      "epoch": 4.017792266451955,
      "grad_norm": 0.34900203347206116,
      "learning_rate": 1.9773594354355145e-06,
      "loss": 0.0081,
      "step": 2455080
    },
    {
      "epoch": 4.017824996890608,
      "grad_norm": 0.3846285939216614,
      "learning_rate": 1.9772935432219977e-06,
      "loss": 0.0082,
      "step": 2455100
    },
    {
      "epoch": 4.017857727329262,
      "grad_norm": 0.4426897466182709,
      "learning_rate": 1.9772276510084804e-06,
      "loss": 0.008,
      "step": 2455120
    },
    {
      "epoch": 4.017890457767915,
      "grad_norm": 0.5268521904945374,
      "learning_rate": 1.9771617587949636e-06,
      "loss": 0.0119,
      "step": 2455140
    },
    {
      "epoch": 4.017923188206568,
      "grad_norm": 0.19057704508304596,
      "learning_rate": 1.9770958665814463e-06,
      "loss": 0.0083,
      "step": 2455160
    },
    {
      "epoch": 4.017955918645222,
      "grad_norm": 0.22999811172485352,
      "learning_rate": 1.977029974367929e-06,
      "loss": 0.0092,
      "step": 2455180
    },
    {
      "epoch": 4.017988649083875,
      "grad_norm": 0.5363501906394958,
      "learning_rate": 1.976964082154412e-06,
      "loss": 0.0115,
      "step": 2455200
    },
    {
      "epoch": 4.018021379522528,
      "grad_norm": 0.0924382135272026,
      "learning_rate": 1.976898189940895e-06,
      "loss": 0.0088,
      "step": 2455220
    },
    {
      "epoch": 4.018054109961182,
      "grad_norm": 0.6212369203567505,
      "learning_rate": 1.9768322977273777e-06,
      "loss": 0.0073,
      "step": 2455240
    },
    {
      "epoch": 4.018086840399835,
      "grad_norm": 0.15362578630447388,
      "learning_rate": 1.9767664055138604e-06,
      "loss": 0.0081,
      "step": 2455260
    },
    {
      "epoch": 4.018119570838488,
      "grad_norm": 0.20280824601650238,
      "learning_rate": 1.976700513300343e-06,
      "loss": 0.0096,
      "step": 2455280
    },
    {
      "epoch": 4.018152301277142,
      "grad_norm": 0.3674589991569519,
      "learning_rate": 1.9766346210868263e-06,
      "loss": 0.0086,
      "step": 2455300
    },
    {
      "epoch": 4.018185031715795,
      "grad_norm": 0.07455689460039139,
      "learning_rate": 1.9765687288733095e-06,
      "loss": 0.0086,
      "step": 2455320
    },
    {
      "epoch": 4.018217762154449,
      "grad_norm": 0.15145179629325867,
      "learning_rate": 1.9765028366597923e-06,
      "loss": 0.0096,
      "step": 2455340
    },
    {
      "epoch": 4.0182504925931015,
      "grad_norm": 0.08257117867469788,
      "learning_rate": 1.976436944446275e-06,
      "loss": 0.008,
      "step": 2455360
    },
    {
      "epoch": 4.018283223031755,
      "grad_norm": 0.056866057217121124,
      "learning_rate": 1.9763710522327577e-06,
      "loss": 0.0115,
      "step": 2455380
    },
    {
      "epoch": 4.018315953470409,
      "grad_norm": 0.578125,
      "learning_rate": 1.9763051600192405e-06,
      "loss": 0.0097,
      "step": 2455400
    },
    {
      "epoch": 4.018348683909061,
      "grad_norm": 0.29533275961875916,
      "learning_rate": 1.9762392678057236e-06,
      "loss": 0.0101,
      "step": 2455420
    },
    {
      "epoch": 4.018381414347715,
      "grad_norm": 0.07766525447368622,
      "learning_rate": 1.9761733755922064e-06,
      "loss": 0.0082,
      "step": 2455440
    },
    {
      "epoch": 4.018414144786369,
      "grad_norm": 0.2952856719493866,
      "learning_rate": 1.976107483378689e-06,
      "loss": 0.0115,
      "step": 2455460
    },
    {
      "epoch": 4.018446875225022,
      "grad_norm": 0.6114879250526428,
      "learning_rate": 1.9760415911651723e-06,
      "loss": 0.0104,
      "step": 2455480
    },
    {
      "epoch": 4.018479605663675,
      "grad_norm": 0.19544516503810883,
      "learning_rate": 1.975975698951655e-06,
      "loss": 0.0106,
      "step": 2455500
    },
    {
      "epoch": 4.0185123361023285,
      "grad_norm": 0.10541170835494995,
      "learning_rate": 1.975909806738138e-06,
      "loss": 0.0074,
      "step": 2455520
    },
    {
      "epoch": 4.018545066540982,
      "grad_norm": 0.10616720467805862,
      "learning_rate": 1.975843914524621e-06,
      "loss": 0.0074,
      "step": 2455540
    },
    {
      "epoch": 4.018577796979635,
      "grad_norm": 0.40510258078575134,
      "learning_rate": 1.9757780223111037e-06,
      "loss": 0.0091,
      "step": 2455560
    },
    {
      "epoch": 4.018610527418288,
      "grad_norm": 0.2655109167098999,
      "learning_rate": 1.9757121300975864e-06,
      "loss": 0.008,
      "step": 2455580
    },
    {
      "epoch": 4.018643257856942,
      "grad_norm": 0.5361175537109375,
      "learning_rate": 1.975646237884069e-06,
      "loss": 0.0072,
      "step": 2455600
    },
    {
      "epoch": 4.018675988295595,
      "grad_norm": 3.6316094398498535,
      "learning_rate": 1.9755803456705523e-06,
      "loss": 0.0081,
      "step": 2455620
    },
    {
      "epoch": 4.018708718734248,
      "grad_norm": 0.20850738883018494,
      "learning_rate": 1.975514453457035e-06,
      "loss": 0.0092,
      "step": 2455640
    },
    {
      "epoch": 4.018741449172902,
      "grad_norm": 0.2193533480167389,
      "learning_rate": 1.975448561243518e-06,
      "loss": 0.0099,
      "step": 2455660
    },
    {
      "epoch": 4.0187741796115555,
      "grad_norm": 0.19684605300426483,
      "learning_rate": 1.975382669030001e-06,
      "loss": 0.0119,
      "step": 2455680
    },
    {
      "epoch": 4.018806910050208,
      "grad_norm": 0.01680772379040718,
      "learning_rate": 1.9753167768164837e-06,
      "loss": 0.0098,
      "step": 2455700
    },
    {
      "epoch": 4.018839640488862,
      "grad_norm": 1.0955225229263306,
      "learning_rate": 1.975250884602967e-06,
      "loss": 0.0074,
      "step": 2455720
    },
    {
      "epoch": 4.018872370927515,
      "grad_norm": 0.05129380151629448,
      "learning_rate": 1.9751849923894496e-06,
      "loss": 0.0106,
      "step": 2455740
    },
    {
      "epoch": 4.018905101366168,
      "grad_norm": 0.3810478746891022,
      "learning_rate": 1.9751191001759323e-06,
      "loss": 0.0098,
      "step": 2455760
    },
    {
      "epoch": 4.018937831804822,
      "grad_norm": 0.266157865524292,
      "learning_rate": 1.975053207962415e-06,
      "loss": 0.0068,
      "step": 2455780
    },
    {
      "epoch": 4.018970562243475,
      "grad_norm": 0.1237201914191246,
      "learning_rate": 1.974987315748898e-06,
      "loss": 0.009,
      "step": 2455800
    },
    {
      "epoch": 4.019003292682129,
      "grad_norm": 0.5975465774536133,
      "learning_rate": 1.974921423535381e-06,
      "loss": 0.0072,
      "step": 2455820
    },
    {
      "epoch": 4.019036023120782,
      "grad_norm": 0.36250752210617065,
      "learning_rate": 1.974855531321864e-06,
      "loss": 0.0074,
      "step": 2455840
    },
    {
      "epoch": 4.019068753559435,
      "grad_norm": 0.2506866157054901,
      "learning_rate": 1.974789639108347e-06,
      "loss": 0.0112,
      "step": 2455860
    },
    {
      "epoch": 4.019101483998089,
      "grad_norm": 0.25840333104133606,
      "learning_rate": 1.9747237468948296e-06,
      "loss": 0.0112,
      "step": 2455880
    },
    {
      "epoch": 4.0191342144367415,
      "grad_norm": 0.3484954833984375,
      "learning_rate": 1.9746578546813124e-06,
      "loss": 0.0062,
      "step": 2455900
    },
    {
      "epoch": 4.019166944875395,
      "grad_norm": 0.1193343847990036,
      "learning_rate": 1.9745919624677955e-06,
      "loss": 0.009,
      "step": 2455920
    },
    {
      "epoch": 4.019199675314049,
      "grad_norm": 0.44765177369117737,
      "learning_rate": 1.9745260702542783e-06,
      "loss": 0.0111,
      "step": 2455940
    },
    {
      "epoch": 4.019232405752702,
      "grad_norm": 0.4774378836154938,
      "learning_rate": 1.974460178040761e-06,
      "loss": 0.01,
      "step": 2455960
    },
    {
      "epoch": 4.019265136191355,
      "grad_norm": 0.2058579921722412,
      "learning_rate": 1.9743942858272437e-06,
      "loss": 0.0064,
      "step": 2455980
    },
    {
      "epoch": 4.0192978666300085,
      "grad_norm": 0.27783966064453125,
      "learning_rate": 1.974328393613727e-06,
      "loss": 0.0082,
      "step": 2456000
    },
    {
      "epoch": 4.019330597068662,
      "grad_norm": 0.23976929485797882,
      "learning_rate": 1.9742625014002096e-06,
      "loss": 0.0088,
      "step": 2456020
    },
    {
      "epoch": 4.019363327507315,
      "grad_norm": 0.14843659102916718,
      "learning_rate": 1.974196609186693e-06,
      "loss": 0.0105,
      "step": 2456040
    },
    {
      "epoch": 4.019396057945968,
      "grad_norm": 0.24982210993766785,
      "learning_rate": 1.9741307169731755e-06,
      "loss": 0.0068,
      "step": 2456060
    },
    {
      "epoch": 4.019428788384622,
      "grad_norm": 0.23191489279270172,
      "learning_rate": 1.9740648247596583e-06,
      "loss": 0.0115,
      "step": 2456080
    },
    {
      "epoch": 4.019461518823276,
      "grad_norm": 0.37216877937316895,
      "learning_rate": 1.973998932546141e-06,
      "loss": 0.0138,
      "step": 2456100
    },
    {
      "epoch": 4.019494249261928,
      "grad_norm": 0.41445285081863403,
      "learning_rate": 1.973933040332624e-06,
      "loss": 0.0088,
      "step": 2456120
    },
    {
      "epoch": 4.019526979700582,
      "grad_norm": 0.17801785469055176,
      "learning_rate": 1.973867148119107e-06,
      "loss": 0.0074,
      "step": 2456140
    },
    {
      "epoch": 4.0195597101392355,
      "grad_norm": 0.17525255680084229,
      "learning_rate": 1.9738012559055897e-06,
      "loss": 0.011,
      "step": 2456160
    },
    {
      "epoch": 4.019592440577888,
      "grad_norm": 0.3296813368797302,
      "learning_rate": 1.973735363692073e-06,
      "loss": 0.0055,
      "step": 2456180
    },
    {
      "epoch": 4.019625171016542,
      "grad_norm": 0.25876709818840027,
      "learning_rate": 1.9736694714785556e-06,
      "loss": 0.0076,
      "step": 2456200
    },
    {
      "epoch": 4.019657901455195,
      "grad_norm": 0.2031106948852539,
      "learning_rate": 1.9736035792650383e-06,
      "loss": 0.0098,
      "step": 2456220
    },
    {
      "epoch": 4.019690631893849,
      "grad_norm": 0.2541038691997528,
      "learning_rate": 1.9735376870515215e-06,
      "loss": 0.0102,
      "step": 2456240
    },
    {
      "epoch": 4.019723362332502,
      "grad_norm": 0.24651095271110535,
      "learning_rate": 1.973471794838004e-06,
      "loss": 0.0068,
      "step": 2456260
    },
    {
      "epoch": 4.019756092771155,
      "grad_norm": 0.06630342453718185,
      "learning_rate": 1.973405902624487e-06,
      "loss": 0.0068,
      "step": 2456280
    },
    {
      "epoch": 4.019788823209809,
      "grad_norm": 0.20880383253097534,
      "learning_rate": 1.9733400104109697e-06,
      "loss": 0.0085,
      "step": 2456300
    },
    {
      "epoch": 4.019821553648462,
      "grad_norm": 0.20126253366470337,
      "learning_rate": 1.973274118197453e-06,
      "loss": 0.0072,
      "step": 2456320
    },
    {
      "epoch": 4.019854284087115,
      "grad_norm": 0.11835081875324249,
      "learning_rate": 1.9732082259839356e-06,
      "loss": 0.0072,
      "step": 2456340
    },
    {
      "epoch": 4.019887014525769,
      "grad_norm": 0.2901519238948822,
      "learning_rate": 1.9731423337704188e-06,
      "loss": 0.0112,
      "step": 2456360
    },
    {
      "epoch": 4.019919744964422,
      "grad_norm": 0.3315839469432831,
      "learning_rate": 1.9730764415569015e-06,
      "loss": 0.0093,
      "step": 2456380
    },
    {
      "epoch": 4.019952475403075,
      "grad_norm": 0.3567568361759186,
      "learning_rate": 1.9730105493433842e-06,
      "loss": 0.0114,
      "step": 2456400
    },
    {
      "epoch": 4.019985205841729,
      "grad_norm": 0.13927499949932098,
      "learning_rate": 1.972944657129867e-06,
      "loss": 0.0088,
      "step": 2456420
    },
    {
      "epoch": 4.020017936280382,
      "grad_norm": 0.36852699518203735,
      "learning_rate": 1.97287876491635e-06,
      "loss": 0.0088,
      "step": 2456440
    },
    {
      "epoch": 4.020050666719035,
      "grad_norm": 0.26531460881233215,
      "learning_rate": 1.972812872702833e-06,
      "loss": 0.0096,
      "step": 2456460
    },
    {
      "epoch": 4.020083397157689,
      "grad_norm": 0.2213265746831894,
      "learning_rate": 1.9727469804893156e-06,
      "loss": 0.0106,
      "step": 2456480
    },
    {
      "epoch": 4.020116127596342,
      "grad_norm": 0.17808733880519867,
      "learning_rate": 1.9726810882757984e-06,
      "loss": 0.0137,
      "step": 2456500
    },
    {
      "epoch": 4.020148858034996,
      "grad_norm": 0.2610103487968445,
      "learning_rate": 1.9726151960622815e-06,
      "loss": 0.0071,
      "step": 2456520
    },
    {
      "epoch": 4.0201815884736485,
      "grad_norm": 0.2142634242773056,
      "learning_rate": 1.9725493038487647e-06,
      "loss": 0.0117,
      "step": 2456540
    },
    {
      "epoch": 4.020214318912302,
      "grad_norm": 0.14731508493423462,
      "learning_rate": 1.9724834116352474e-06,
      "loss": 0.0072,
      "step": 2456560
    },
    {
      "epoch": 4.020247049350956,
      "grad_norm": 0.40706586837768555,
      "learning_rate": 1.97241751942173e-06,
      "loss": 0.0093,
      "step": 2456580
    },
    {
      "epoch": 4.020279779789608,
      "grad_norm": 0.1346122771501541,
      "learning_rate": 1.972351627208213e-06,
      "loss": 0.0099,
      "step": 2456600
    },
    {
      "epoch": 4.020312510228262,
      "grad_norm": 0.3749527335166931,
      "learning_rate": 1.9722857349946956e-06,
      "loss": 0.0117,
      "step": 2456620
    },
    {
      "epoch": 4.020345240666916,
      "grad_norm": 0.34260448813438416,
      "learning_rate": 1.972219842781179e-06,
      "loss": 0.01,
      "step": 2456640
    },
    {
      "epoch": 4.020377971105569,
      "grad_norm": 0.12749765813350677,
      "learning_rate": 1.9721539505676615e-06,
      "loss": 0.009,
      "step": 2456660
    },
    {
      "epoch": 4.020410701544222,
      "grad_norm": 0.23899851739406586,
      "learning_rate": 1.9720880583541443e-06,
      "loss": 0.0066,
      "step": 2456680
    },
    {
      "epoch": 4.0204434319828755,
      "grad_norm": 0.12797662615776062,
      "learning_rate": 1.972022166140627e-06,
      "loss": 0.0076,
      "step": 2456700
    },
    {
      "epoch": 4.020476162421529,
      "grad_norm": 0.30191701650619507,
      "learning_rate": 1.97195627392711e-06,
      "loss": 0.0074,
      "step": 2456720
    },
    {
      "epoch": 4.020508892860182,
      "grad_norm": 0.22355087101459503,
      "learning_rate": 1.9718903817135934e-06,
      "loss": 0.0118,
      "step": 2456740
    },
    {
      "epoch": 4.020541623298835,
      "grad_norm": 0.22722561657428741,
      "learning_rate": 1.971824489500076e-06,
      "loss": 0.0104,
      "step": 2456760
    },
    {
      "epoch": 4.020574353737489,
      "grad_norm": 0.2203834354877472,
      "learning_rate": 1.971758597286559e-06,
      "loss": 0.0072,
      "step": 2456780
    },
    {
      "epoch": 4.0206070841761425,
      "grad_norm": 0.24665051698684692,
      "learning_rate": 1.9716927050730416e-06,
      "loss": 0.0099,
      "step": 2456800
    },
    {
      "epoch": 4.020639814614795,
      "grad_norm": 0.03839166462421417,
      "learning_rate": 1.9716268128595243e-06,
      "loss": 0.0104,
      "step": 2456820
    },
    {
      "epoch": 4.020672545053449,
      "grad_norm": 0.1046125516295433,
      "learning_rate": 1.9715609206460075e-06,
      "loss": 0.0095,
      "step": 2456840
    },
    {
      "epoch": 4.020705275492102,
      "grad_norm": 0.17930038273334503,
      "learning_rate": 1.9714950284324902e-06,
      "loss": 0.0084,
      "step": 2456860
    },
    {
      "epoch": 4.020738005930755,
      "grad_norm": 0.26412540674209595,
      "learning_rate": 1.971429136218973e-06,
      "loss": 0.0113,
      "step": 2456880
    },
    {
      "epoch": 4.020770736369409,
      "grad_norm": 0.5740055441856384,
      "learning_rate": 1.971363244005456e-06,
      "loss": 0.0098,
      "step": 2456900
    },
    {
      "epoch": 4.020803466808062,
      "grad_norm": 0.1631590873003006,
      "learning_rate": 1.971297351791939e-06,
      "loss": 0.0092,
      "step": 2456920
    },
    {
      "epoch": 4.020836197246716,
      "grad_norm": 0.15414094924926758,
      "learning_rate": 1.971231459578422e-06,
      "loss": 0.0103,
      "step": 2456940
    },
    {
      "epoch": 4.020868927685369,
      "grad_norm": 0.12161612510681152,
      "learning_rate": 1.9711655673649048e-06,
      "loss": 0.0069,
      "step": 2456960
    },
    {
      "epoch": 4.020901658124022,
      "grad_norm": 0.2532859146595001,
      "learning_rate": 1.9710996751513875e-06,
      "loss": 0.0117,
      "step": 2456980
    },
    {
      "epoch": 4.020934388562676,
      "grad_norm": 0.09812027961015701,
      "learning_rate": 1.9710337829378702e-06,
      "loss": 0.0057,
      "step": 2457000
    },
    {
      "epoch": 4.0209671190013285,
      "grad_norm": 0.18337957561016083,
      "learning_rate": 1.970967890724353e-06,
      "loss": 0.0075,
      "step": 2457020
    },
    {
      "epoch": 4.020999849439982,
      "grad_norm": 0.24045133590698242,
      "learning_rate": 1.970901998510836e-06,
      "loss": 0.0167,
      "step": 2457040
    },
    {
      "epoch": 4.021032579878636,
      "grad_norm": 0.13764940202236176,
      "learning_rate": 1.9708361062973193e-06,
      "loss": 0.0093,
      "step": 2457060
    },
    {
      "epoch": 4.021065310317289,
      "grad_norm": 0.3760949373245239,
      "learning_rate": 1.970770214083802e-06,
      "loss": 0.0081,
      "step": 2457080
    },
    {
      "epoch": 4.021098040755942,
      "grad_norm": 0.10219451785087585,
      "learning_rate": 1.9707043218702848e-06,
      "loss": 0.0108,
      "step": 2457100
    },
    {
      "epoch": 4.021130771194596,
      "grad_norm": 0.232084259390831,
      "learning_rate": 1.9706384296567675e-06,
      "loss": 0.0089,
      "step": 2457120
    },
    {
      "epoch": 4.021163501633249,
      "grad_norm": 0.0799192562699318,
      "learning_rate": 1.9705725374432507e-06,
      "loss": 0.0125,
      "step": 2457140
    },
    {
      "epoch": 4.021196232071902,
      "grad_norm": 0.14186736941337585,
      "learning_rate": 1.9705066452297334e-06,
      "loss": 0.0108,
      "step": 2457160
    },
    {
      "epoch": 4.0212289625105555,
      "grad_norm": 0.22640958428382874,
      "learning_rate": 1.970440753016216e-06,
      "loss": 0.0068,
      "step": 2457180
    },
    {
      "epoch": 4.021261692949209,
      "grad_norm": 0.0915311872959137,
      "learning_rate": 1.970374860802699e-06,
      "loss": 0.0069,
      "step": 2457200
    },
    {
      "epoch": 4.021294423387862,
      "grad_norm": 0.36153027415275574,
      "learning_rate": 1.970308968589182e-06,
      "loss": 0.0069,
      "step": 2457220
    },
    {
      "epoch": 4.021327153826515,
      "grad_norm": 0.3859338164329529,
      "learning_rate": 1.970243076375665e-06,
      "loss": 0.0108,
      "step": 2457240
    },
    {
      "epoch": 4.021359884265169,
      "grad_norm": 0.8264204859733582,
      "learning_rate": 1.970177184162148e-06,
      "loss": 0.0097,
      "step": 2457260
    },
    {
      "epoch": 4.021392614703823,
      "grad_norm": 0.19426216185092926,
      "learning_rate": 1.9701112919486307e-06,
      "loss": 0.0101,
      "step": 2457280
    },
    {
      "epoch": 4.021425345142475,
      "grad_norm": 0.3938596248626709,
      "learning_rate": 1.9700453997351135e-06,
      "loss": 0.0097,
      "step": 2457300
    },
    {
      "epoch": 4.021458075581129,
      "grad_norm": 0.48145803809165955,
      "learning_rate": 1.969979507521596e-06,
      "loss": 0.0133,
      "step": 2457320
    },
    {
      "epoch": 4.0214908060197825,
      "grad_norm": 0.32913827896118164,
      "learning_rate": 1.9699136153080794e-06,
      "loss": 0.0096,
      "step": 2457340
    },
    {
      "epoch": 4.021523536458435,
      "grad_norm": 0.2642628848552704,
      "learning_rate": 1.969847723094562e-06,
      "loss": 0.0079,
      "step": 2457360
    },
    {
      "epoch": 4.021556266897089,
      "grad_norm": 0.18208743631839752,
      "learning_rate": 1.969781830881045e-06,
      "loss": 0.0093,
      "step": 2457380
    },
    {
      "epoch": 4.021588997335742,
      "grad_norm": 0.18172231316566467,
      "learning_rate": 1.9697159386675276e-06,
      "loss": 0.0112,
      "step": 2457400
    },
    {
      "epoch": 4.021621727774396,
      "grad_norm": 0.16249921917915344,
      "learning_rate": 1.9696500464540107e-06,
      "loss": 0.0159,
      "step": 2457420
    },
    {
      "epoch": 4.021654458213049,
      "grad_norm": 0.045552074909210205,
      "learning_rate": 1.9695841542404935e-06,
      "loss": 0.0078,
      "step": 2457440
    },
    {
      "epoch": 4.021687188651702,
      "grad_norm": 0.12990525364875793,
      "learning_rate": 1.9695182620269766e-06,
      "loss": 0.0077,
      "step": 2457460
    },
    {
      "epoch": 4.021719919090356,
      "grad_norm": 0.41278260946273804,
      "learning_rate": 1.9694523698134594e-06,
      "loss": 0.0102,
      "step": 2457480
    },
    {
      "epoch": 4.021752649529009,
      "grad_norm": 0.3330758213996887,
      "learning_rate": 1.969386477599942e-06,
      "loss": 0.0105,
      "step": 2457500
    },
    {
      "epoch": 4.021785379967662,
      "grad_norm": 0.24762113392353058,
      "learning_rate": 1.969320585386425e-06,
      "loss": 0.0062,
      "step": 2457520
    },
    {
      "epoch": 4.021818110406316,
      "grad_norm": 0.2697201073169708,
      "learning_rate": 1.969254693172908e-06,
      "loss": 0.0056,
      "step": 2457540
    },
    {
      "epoch": 4.021850840844969,
      "grad_norm": 0.34486258029937744,
      "learning_rate": 1.9691888009593908e-06,
      "loss": 0.0072,
      "step": 2457560
    },
    {
      "epoch": 4.021883571283622,
      "grad_norm": 0.32899677753448486,
      "learning_rate": 1.9691229087458735e-06,
      "loss": 0.0095,
      "step": 2457580
    },
    {
      "epoch": 4.021916301722276,
      "grad_norm": 0.1791454553604126,
      "learning_rate": 1.9690570165323567e-06,
      "loss": 0.0075,
      "step": 2457600
    },
    {
      "epoch": 4.021949032160929,
      "grad_norm": 0.3749013841152191,
      "learning_rate": 1.9689911243188394e-06,
      "loss": 0.0085,
      "step": 2457620
    },
    {
      "epoch": 4.021981762599582,
      "grad_norm": 0.09407392144203186,
      "learning_rate": 1.968925232105322e-06,
      "loss": 0.0127,
      "step": 2457640
    },
    {
      "epoch": 4.0220144930382355,
      "grad_norm": 0.19487214088439941,
      "learning_rate": 1.9688593398918053e-06,
      "loss": 0.0077,
      "step": 2457660
    },
    {
      "epoch": 4.022047223476889,
      "grad_norm": 0.31135430932044983,
      "learning_rate": 1.968793447678288e-06,
      "loss": 0.0126,
      "step": 2457680
    },
    {
      "epoch": 4.022079953915543,
      "grad_norm": 0.7113467454910278,
      "learning_rate": 1.968727555464771e-06,
      "loss": 0.0091,
      "step": 2457700
    },
    {
      "epoch": 4.0221126843541954,
      "grad_norm": 0.21298837661743164,
      "learning_rate": 1.9686616632512535e-06,
      "loss": 0.0094,
      "step": 2457720
    },
    {
      "epoch": 4.022145414792849,
      "grad_norm": 0.1773902326822281,
      "learning_rate": 1.9685957710377367e-06,
      "loss": 0.0108,
      "step": 2457740
    },
    {
      "epoch": 4.022178145231503,
      "grad_norm": 0.15488436818122864,
      "learning_rate": 1.9685298788242194e-06,
      "loss": 0.0098,
      "step": 2457760
    },
    {
      "epoch": 4.022210875670155,
      "grad_norm": 0.34322604537010193,
      "learning_rate": 1.9684639866107026e-06,
      "loss": 0.0068,
      "step": 2457780
    },
    {
      "epoch": 4.022243606108809,
      "grad_norm": 0.41635504364967346,
      "learning_rate": 1.9683980943971853e-06,
      "loss": 0.0116,
      "step": 2457800
    },
    {
      "epoch": 4.0222763365474625,
      "grad_norm": 0.3631152808666229,
      "learning_rate": 1.968332202183668e-06,
      "loss": 0.0121,
      "step": 2457820
    },
    {
      "epoch": 4.022309066986116,
      "grad_norm": 0.15064425766468048,
      "learning_rate": 1.968266309970151e-06,
      "loss": 0.0057,
      "step": 2457840
    },
    {
      "epoch": 4.022341797424769,
      "grad_norm": 0.2576327621936798,
      "learning_rate": 1.968200417756634e-06,
      "loss": 0.0115,
      "step": 2457860
    },
    {
      "epoch": 4.022374527863422,
      "grad_norm": 0.11410198360681534,
      "learning_rate": 1.9681345255431167e-06,
      "loss": 0.01,
      "step": 2457880
    },
    {
      "epoch": 4.022407258302076,
      "grad_norm": 0.35177546739578247,
      "learning_rate": 1.9680686333295995e-06,
      "loss": 0.0093,
      "step": 2457900
    },
    {
      "epoch": 4.022439988740729,
      "grad_norm": 0.14731527864933014,
      "learning_rate": 1.968002741116082e-06,
      "loss": 0.0105,
      "step": 2457920
    },
    {
      "epoch": 4.022472719179382,
      "grad_norm": 0.21867869794368744,
      "learning_rate": 1.9679368489025654e-06,
      "loss": 0.0062,
      "step": 2457940
    },
    {
      "epoch": 4.022505449618036,
      "grad_norm": 0.10994322597980499,
      "learning_rate": 1.9678709566890485e-06,
      "loss": 0.0161,
      "step": 2457960
    },
    {
      "epoch": 4.0225381800566895,
      "grad_norm": 0.054633233696222305,
      "learning_rate": 1.9678050644755313e-06,
      "loss": 0.0066,
      "step": 2457980
    },
    {
      "epoch": 4.022570910495342,
      "grad_norm": 0.10294241458177567,
      "learning_rate": 1.967739172262014e-06,
      "loss": 0.0075,
      "step": 2458000
    },
    {
      "epoch": 4.022603640933996,
      "grad_norm": 0.2862323820590973,
      "learning_rate": 1.9676732800484967e-06,
      "loss": 0.0137,
      "step": 2458020
    },
    {
      "epoch": 4.022636371372649,
      "grad_norm": 0.21994662284851074,
      "learning_rate": 1.9676073878349795e-06,
      "loss": 0.0074,
      "step": 2458040
    },
    {
      "epoch": 4.022669101811302,
      "grad_norm": 0.10522636026144028,
      "learning_rate": 1.9675414956214626e-06,
      "loss": 0.0077,
      "step": 2458060
    },
    {
      "epoch": 4.022701832249956,
      "grad_norm": 0.22580371797084808,
      "learning_rate": 1.9674756034079454e-06,
      "loss": 0.0097,
      "step": 2458080
    },
    {
      "epoch": 4.022734562688609,
      "grad_norm": 0.1246328130364418,
      "learning_rate": 1.967409711194428e-06,
      "loss": 0.0132,
      "step": 2458100
    },
    {
      "epoch": 4.022767293127263,
      "grad_norm": 0.12142500281333923,
      "learning_rate": 1.9673438189809113e-06,
      "loss": 0.0097,
      "step": 2458120
    },
    {
      "epoch": 4.022800023565916,
      "grad_norm": 0.2995971441268921,
      "learning_rate": 1.967277926767394e-06,
      "loss": 0.0148,
      "step": 2458140
    },
    {
      "epoch": 4.022832754004569,
      "grad_norm": 0.2900071442127228,
      "learning_rate": 1.967212034553877e-06,
      "loss": 0.0062,
      "step": 2458160
    },
    {
      "epoch": 4.022865484443223,
      "grad_norm": 0.42986002564430237,
      "learning_rate": 1.96714614234036e-06,
      "loss": 0.012,
      "step": 2458180
    },
    {
      "epoch": 4.0228982148818755,
      "grad_norm": 0.12468887865543365,
      "learning_rate": 1.9670802501268427e-06,
      "loss": 0.0051,
      "step": 2458200
    },
    {
      "epoch": 4.022930945320529,
      "grad_norm": 0.3872656524181366,
      "learning_rate": 1.9670143579133254e-06,
      "loss": 0.0062,
      "step": 2458220
    },
    {
      "epoch": 4.022963675759183,
      "grad_norm": 0.24896593391895294,
      "learning_rate": 1.966948465699808e-06,
      "loss": 0.0055,
      "step": 2458240
    },
    {
      "epoch": 4.022996406197836,
      "grad_norm": 0.20370431244373322,
      "learning_rate": 1.9668825734862913e-06,
      "loss": 0.0115,
      "step": 2458260
    },
    {
      "epoch": 4.023029136636489,
      "grad_norm": 0.27187931537628174,
      "learning_rate": 1.966816681272774e-06,
      "loss": 0.0078,
      "step": 2458280
    },
    {
      "epoch": 4.023061867075143,
      "grad_norm": 0.11913152784109116,
      "learning_rate": 1.9667507890592572e-06,
      "loss": 0.0125,
      "step": 2458300
    },
    {
      "epoch": 4.023094597513796,
      "grad_norm": 0.11749600619077682,
      "learning_rate": 1.96668489684574e-06,
      "loss": 0.0096,
      "step": 2458320
    },
    {
      "epoch": 4.023127327952449,
      "grad_norm": 0.07377569377422333,
      "learning_rate": 1.9666190046322227e-06,
      "loss": 0.0115,
      "step": 2458340
    },
    {
      "epoch": 4.0231600583911025,
      "grad_norm": 0.27880656719207764,
      "learning_rate": 1.966553112418706e-06,
      "loss": 0.0108,
      "step": 2458360
    },
    {
      "epoch": 4.023192788829756,
      "grad_norm": 0.3024500608444214,
      "learning_rate": 1.9664872202051886e-06,
      "loss": 0.0127,
      "step": 2458380
    },
    {
      "epoch": 4.02322551926841,
      "grad_norm": 0.2868939936161041,
      "learning_rate": 1.9664213279916713e-06,
      "loss": 0.012,
      "step": 2458400
    },
    {
      "epoch": 4.023258249707062,
      "grad_norm": 0.2788093686103821,
      "learning_rate": 1.966355435778154e-06,
      "loss": 0.0099,
      "step": 2458420
    },
    {
      "epoch": 4.023290980145716,
      "grad_norm": 0.10097987949848175,
      "learning_rate": 1.966289543564637e-06,
      "loss": 0.01,
      "step": 2458440
    },
    {
      "epoch": 4.0233237105843696,
      "grad_norm": 0.10307294130325317,
      "learning_rate": 1.96622365135112e-06,
      "loss": 0.0067,
      "step": 2458460
    },
    {
      "epoch": 4.023356441023022,
      "grad_norm": 0.041455578058958054,
      "learning_rate": 1.966157759137603e-06,
      "loss": 0.009,
      "step": 2458480
    },
    {
      "epoch": 4.023389171461676,
      "grad_norm": 0.23797132074832916,
      "learning_rate": 1.966091866924086e-06,
      "loss": 0.0089,
      "step": 2458500
    },
    {
      "epoch": 4.0234219019003294,
      "grad_norm": 0.24638505280017853,
      "learning_rate": 1.9660259747105686e-06,
      "loss": 0.0124,
      "step": 2458520
    },
    {
      "epoch": 4.023454632338983,
      "grad_norm": 0.17115002870559692,
      "learning_rate": 1.9659600824970514e-06,
      "loss": 0.0098,
      "step": 2458540
    },
    {
      "epoch": 4.023487362777636,
      "grad_norm": 0.10709239542484283,
      "learning_rate": 1.9658941902835345e-06,
      "loss": 0.0051,
      "step": 2458560
    },
    {
      "epoch": 4.023520093216289,
      "grad_norm": 0.12658940255641937,
      "learning_rate": 1.9658282980700173e-06,
      "loss": 0.0076,
      "step": 2458580
    },
    {
      "epoch": 4.023552823654943,
      "grad_norm": 0.21341627836227417,
      "learning_rate": 1.9657624058565e-06,
      "loss": 0.0074,
      "step": 2458600
    },
    {
      "epoch": 4.023585554093596,
      "grad_norm": 1.0190585851669312,
      "learning_rate": 1.9656965136429827e-06,
      "loss": 0.0127,
      "step": 2458620
    },
    {
      "epoch": 4.023618284532249,
      "grad_norm": 0.058096226304769516,
      "learning_rate": 1.965630621429466e-06,
      "loss": 0.0086,
      "step": 2458640
    },
    {
      "epoch": 4.023651014970903,
      "grad_norm": 0.21232901513576508,
      "learning_rate": 1.9655647292159486e-06,
      "loss": 0.0086,
      "step": 2458660
    },
    {
      "epoch": 4.023683745409556,
      "grad_norm": 0.5450069904327393,
      "learning_rate": 1.965498837002432e-06,
      "loss": 0.0077,
      "step": 2458680
    },
    {
      "epoch": 4.023716475848209,
      "grad_norm": 0.16770747303962708,
      "learning_rate": 1.9654329447889146e-06,
      "loss": 0.0083,
      "step": 2458700
    },
    {
      "epoch": 4.023749206286863,
      "grad_norm": 0.12853899598121643,
      "learning_rate": 1.9653670525753973e-06,
      "loss": 0.0075,
      "step": 2458720
    },
    {
      "epoch": 4.023781936725516,
      "grad_norm": 0.17906554043293,
      "learning_rate": 1.96530116036188e-06,
      "loss": 0.0126,
      "step": 2458740
    },
    {
      "epoch": 4.023814667164169,
      "grad_norm": 0.25687819719314575,
      "learning_rate": 1.965235268148363e-06,
      "loss": 0.0113,
      "step": 2458760
    },
    {
      "epoch": 4.023847397602823,
      "grad_norm": 0.23391957581043243,
      "learning_rate": 1.965169375934846e-06,
      "loss": 0.0088,
      "step": 2458780
    },
    {
      "epoch": 4.023880128041476,
      "grad_norm": 0.21013124287128448,
      "learning_rate": 1.9651034837213287e-06,
      "loss": 0.0142,
      "step": 2458800
    },
    {
      "epoch": 4.023912858480129,
      "grad_norm": 0.1115550547838211,
      "learning_rate": 1.965037591507812e-06,
      "loss": 0.0092,
      "step": 2458820
    },
    {
      "epoch": 4.0239455889187825,
      "grad_norm": 0.14674736559391022,
      "learning_rate": 1.9649716992942946e-06,
      "loss": 0.0088,
      "step": 2458840
    },
    {
      "epoch": 4.023978319357436,
      "grad_norm": 0.151894673705101,
      "learning_rate": 1.9649058070807773e-06,
      "loss": 0.0125,
      "step": 2458860
    },
    {
      "epoch": 4.02401104979609,
      "grad_norm": 0.2150830626487732,
      "learning_rate": 1.9648399148672605e-06,
      "loss": 0.0067,
      "step": 2458880
    },
    {
      "epoch": 4.024043780234742,
      "grad_norm": 0.19208364188671112,
      "learning_rate": 1.9647740226537432e-06,
      "loss": 0.0084,
      "step": 2458900
    },
    {
      "epoch": 4.024076510673396,
      "grad_norm": 0.09927106648683548,
      "learning_rate": 1.964708130440226e-06,
      "loss": 0.0144,
      "step": 2458920
    },
    {
      "epoch": 4.02410924111205,
      "grad_norm": 0.22849120199680328,
      "learning_rate": 1.9646422382267087e-06,
      "loss": 0.0086,
      "step": 2458940
    },
    {
      "epoch": 4.024141971550702,
      "grad_norm": 0.15182015299797058,
      "learning_rate": 1.964576346013192e-06,
      "loss": 0.0077,
      "step": 2458960
    },
    {
      "epoch": 4.024174701989356,
      "grad_norm": 0.25098538398742676,
      "learning_rate": 1.9645104537996746e-06,
      "loss": 0.0089,
      "step": 2458980
    },
    {
      "epoch": 4.0242074324280095,
      "grad_norm": 0.25912702083587646,
      "learning_rate": 1.9644445615861578e-06,
      "loss": 0.008,
      "step": 2459000
    },
    {
      "epoch": 4.024240162866663,
      "grad_norm": 0.346877783536911,
      "learning_rate": 1.9643786693726405e-06,
      "loss": 0.01,
      "step": 2459020
    },
    {
      "epoch": 4.024272893305316,
      "grad_norm": 0.10542262345552444,
      "learning_rate": 1.9643127771591232e-06,
      "loss": 0.0092,
      "step": 2459040
    },
    {
      "epoch": 4.024305623743969,
      "grad_norm": 0.3624114394187927,
      "learning_rate": 1.964246884945606e-06,
      "loss": 0.0103,
      "step": 2459060
    },
    {
      "epoch": 4.024338354182623,
      "grad_norm": 0.39745116233825684,
      "learning_rate": 1.964180992732089e-06,
      "loss": 0.009,
      "step": 2459080
    },
    {
      "epoch": 4.024371084621276,
      "grad_norm": 0.6104511022567749,
      "learning_rate": 1.964115100518572e-06,
      "loss": 0.0108,
      "step": 2459100
    },
    {
      "epoch": 4.024403815059929,
      "grad_norm": 0.19904685020446777,
      "learning_rate": 1.9640492083050546e-06,
      "loss": 0.0069,
      "step": 2459120
    },
    {
      "epoch": 4.024436545498583,
      "grad_norm": 0.1515677124261856,
      "learning_rate": 1.9639833160915374e-06,
      "loss": 0.008,
      "step": 2459140
    },
    {
      "epoch": 4.0244692759372365,
      "grad_norm": 0.08415798842906952,
      "learning_rate": 1.9639174238780205e-06,
      "loss": 0.0078,
      "step": 2459160
    },
    {
      "epoch": 4.024502006375889,
      "grad_norm": 0.199160635471344,
      "learning_rate": 1.9638515316645037e-06,
      "loss": 0.006,
      "step": 2459180
    },
    {
      "epoch": 4.024534736814543,
      "grad_norm": 0.13961757719516754,
      "learning_rate": 1.9637856394509864e-06,
      "loss": 0.0093,
      "step": 2459200
    },
    {
      "epoch": 4.024567467253196,
      "grad_norm": 0.2422124743461609,
      "learning_rate": 1.963719747237469e-06,
      "loss": 0.0123,
      "step": 2459220
    },
    {
      "epoch": 4.024600197691849,
      "grad_norm": 0.16897854208946228,
      "learning_rate": 1.963653855023952e-06,
      "loss": 0.0081,
      "step": 2459240
    },
    {
      "epoch": 4.024632928130503,
      "grad_norm": 0.11037269234657288,
      "learning_rate": 1.9635879628104347e-06,
      "loss": 0.0063,
      "step": 2459260
    },
    {
      "epoch": 4.024665658569156,
      "grad_norm": 0.08318913727998734,
      "learning_rate": 1.963522070596918e-06,
      "loss": 0.0084,
      "step": 2459280
    },
    {
      "epoch": 4.02469838900781,
      "grad_norm": 0.13775983452796936,
      "learning_rate": 1.9634561783834006e-06,
      "loss": 0.0116,
      "step": 2459300
    },
    {
      "epoch": 4.024731119446463,
      "grad_norm": 0.336294025182724,
      "learning_rate": 1.9633902861698833e-06,
      "loss": 0.0118,
      "step": 2459320
    },
    {
      "epoch": 4.024763849885116,
      "grad_norm": 0.2570245862007141,
      "learning_rate": 1.963324393956366e-06,
      "loss": 0.0073,
      "step": 2459340
    },
    {
      "epoch": 4.02479658032377,
      "grad_norm": 0.1946510374546051,
      "learning_rate": 1.963258501742849e-06,
      "loss": 0.0067,
      "step": 2459360
    },
    {
      "epoch": 4.0248293107624225,
      "grad_norm": 0.4865476191043854,
      "learning_rate": 1.9631926095293324e-06,
      "loss": 0.0125,
      "step": 2459380
    },
    {
      "epoch": 4.024862041201076,
      "grad_norm": 0.13226325809955597,
      "learning_rate": 1.963126717315815e-06,
      "loss": 0.0081,
      "step": 2459400
    },
    {
      "epoch": 4.02489477163973,
      "grad_norm": 0.1814102679491043,
      "learning_rate": 1.963060825102298e-06,
      "loss": 0.0078,
      "step": 2459420
    },
    {
      "epoch": 4.024927502078383,
      "grad_norm": 0.29630741477012634,
      "learning_rate": 1.9629949328887806e-06,
      "loss": 0.0107,
      "step": 2459440
    },
    {
      "epoch": 4.024960232517036,
      "grad_norm": 0.16491304337978363,
      "learning_rate": 1.9629290406752633e-06,
      "loss": 0.0091,
      "step": 2459460
    },
    {
      "epoch": 4.0249929629556895,
      "grad_norm": 0.1967639923095703,
      "learning_rate": 1.9628631484617465e-06,
      "loss": 0.0089,
      "step": 2459480
    },
    {
      "epoch": 4.025025693394343,
      "grad_norm": 0.11140865087509155,
      "learning_rate": 1.9627972562482292e-06,
      "loss": 0.0094,
      "step": 2459500
    },
    {
      "epoch": 4.025058423832996,
      "grad_norm": 0.2563015818595886,
      "learning_rate": 1.9627313640347124e-06,
      "loss": 0.0077,
      "step": 2459520
    },
    {
      "epoch": 4.025091154271649,
      "grad_norm": 0.04815705865621567,
      "learning_rate": 1.962665471821195e-06,
      "loss": 0.011,
      "step": 2459540
    },
    {
      "epoch": 4.025123884710303,
      "grad_norm": 0.6743685603141785,
      "learning_rate": 1.962599579607678e-06,
      "loss": 0.0104,
      "step": 2459560
    },
    {
      "epoch": 4.025156615148957,
      "grad_norm": 0.19195719063282013,
      "learning_rate": 1.962533687394161e-06,
      "loss": 0.0137,
      "step": 2459580
    },
    {
      "epoch": 4.025189345587609,
      "grad_norm": 0.5633828043937683,
      "learning_rate": 1.9624677951806438e-06,
      "loss": 0.0089,
      "step": 2459600
    },
    {
      "epoch": 4.025222076026263,
      "grad_norm": 0.13272012770175934,
      "learning_rate": 1.9624019029671265e-06,
      "loss": 0.0058,
      "step": 2459620
    },
    {
      "epoch": 4.0252548064649165,
      "grad_norm": 0.4036106765270233,
      "learning_rate": 1.9623360107536092e-06,
      "loss": 0.0081,
      "step": 2459640
    },
    {
      "epoch": 4.025287536903569,
      "grad_norm": 0.25143253803253174,
      "learning_rate": 1.962270118540092e-06,
      "loss": 0.0072,
      "step": 2459660
    },
    {
      "epoch": 4.025320267342223,
      "grad_norm": 0.39168956875801086,
      "learning_rate": 1.962204226326575e-06,
      "loss": 0.0073,
      "step": 2459680
    },
    {
      "epoch": 4.025352997780876,
      "grad_norm": 0.3619518578052521,
      "learning_rate": 1.9621383341130583e-06,
      "loss": 0.0063,
      "step": 2459700
    },
    {
      "epoch": 4.02538572821953,
      "grad_norm": 0.09530499577522278,
      "learning_rate": 1.962072441899541e-06,
      "loss": 0.0095,
      "step": 2459720
    },
    {
      "epoch": 4.025418458658183,
      "grad_norm": 0.5350338220596313,
      "learning_rate": 1.962006549686024e-06,
      "loss": 0.0062,
      "step": 2459740
    },
    {
      "epoch": 4.025451189096836,
      "grad_norm": 0.13667170703411102,
      "learning_rate": 1.9619406574725065e-06,
      "loss": 0.0079,
      "step": 2459760
    },
    {
      "epoch": 4.02548391953549,
      "grad_norm": 0.4147340655326843,
      "learning_rate": 1.9618747652589897e-06,
      "loss": 0.0106,
      "step": 2459780
    },
    {
      "epoch": 4.025516649974143,
      "grad_norm": 0.21125447750091553,
      "learning_rate": 1.9618088730454724e-06,
      "loss": 0.0132,
      "step": 2459800
    },
    {
      "epoch": 4.025549380412796,
      "grad_norm": 0.4310982823371887,
      "learning_rate": 1.961742980831955e-06,
      "loss": 0.0067,
      "step": 2459820
    },
    {
      "epoch": 4.02558211085145,
      "grad_norm": 0.25729185342788696,
      "learning_rate": 1.961677088618438e-06,
      "loss": 0.0069,
      "step": 2459840
    },
    {
      "epoch": 4.025614841290103,
      "grad_norm": 0.5411153435707092,
      "learning_rate": 1.961611196404921e-06,
      "loss": 0.008,
      "step": 2459860
    },
    {
      "epoch": 4.025647571728756,
      "grad_norm": 0.3814813792705536,
      "learning_rate": 1.961545304191404e-06,
      "loss": 0.0082,
      "step": 2459880
    },
    {
      "epoch": 4.02568030216741,
      "grad_norm": 0.5004116892814636,
      "learning_rate": 1.961479411977887e-06,
      "loss": 0.0094,
      "step": 2459900
    },
    {
      "epoch": 4.025713032606063,
      "grad_norm": 0.6377372145652771,
      "learning_rate": 1.9614135197643697e-06,
      "loss": 0.0104,
      "step": 2459920
    },
    {
      "epoch": 4.025745763044716,
      "grad_norm": 0.06718064844608307,
      "learning_rate": 1.9613476275508525e-06,
      "loss": 0.0115,
      "step": 2459940
    },
    {
      "epoch": 4.02577849348337,
      "grad_norm": 0.712755024433136,
      "learning_rate": 1.961281735337335e-06,
      "loss": 0.0104,
      "step": 2459960
    },
    {
      "epoch": 4.025811223922023,
      "grad_norm": 0.0811559185385704,
      "learning_rate": 1.9612158431238184e-06,
      "loss": 0.008,
      "step": 2459980
    },
    {
      "epoch": 4.025843954360677,
      "grad_norm": 0.13727673888206482,
      "learning_rate": 1.961149950910301e-06,
      "loss": 0.0086,
      "step": 2460000
    },
    {
      "epoch": 4.0258766847993295,
      "grad_norm": 0.3813233971595764,
      "learning_rate": 1.961084058696784e-06,
      "loss": 0.0092,
      "step": 2460020
    },
    {
      "epoch": 4.025909415237983,
      "grad_norm": 0.1684296578168869,
      "learning_rate": 1.9610181664832666e-06,
      "loss": 0.0128,
      "step": 2460040
    },
    {
      "epoch": 4.025942145676637,
      "grad_norm": 0.2672744393348694,
      "learning_rate": 1.9609522742697497e-06,
      "loss": 0.0087,
      "step": 2460060
    },
    {
      "epoch": 4.025974876115289,
      "grad_norm": 0.3920278251171112,
      "learning_rate": 1.9608863820562325e-06,
      "loss": 0.008,
      "step": 2460080
    },
    {
      "epoch": 4.026007606553943,
      "grad_norm": 0.23811978101730347,
      "learning_rate": 1.9608204898427157e-06,
      "loss": 0.0074,
      "step": 2460100
    },
    {
      "epoch": 4.026040336992597,
      "grad_norm": 0.1158982515335083,
      "learning_rate": 1.9607545976291984e-06,
      "loss": 0.0055,
      "step": 2460120
    },
    {
      "epoch": 4.02607306743125,
      "grad_norm": 0.3185729384422302,
      "learning_rate": 1.960688705415681e-06,
      "loss": 0.0088,
      "step": 2460140
    },
    {
      "epoch": 4.026105797869903,
      "grad_norm": 0.12334030121564865,
      "learning_rate": 1.960622813202164e-06,
      "loss": 0.0104,
      "step": 2460160
    },
    {
      "epoch": 4.0261385283085565,
      "grad_norm": 0.5282499194145203,
      "learning_rate": 1.960556920988647e-06,
      "loss": 0.0088,
      "step": 2460180
    },
    {
      "epoch": 4.02617125874721,
      "grad_norm": 0.18235749006271362,
      "learning_rate": 1.9604910287751298e-06,
      "loss": 0.0083,
      "step": 2460200
    },
    {
      "epoch": 4.026203989185863,
      "grad_norm": 0.431607186794281,
      "learning_rate": 1.9604251365616125e-06,
      "loss": 0.0125,
      "step": 2460220
    },
    {
      "epoch": 4.026236719624516,
      "grad_norm": 0.5203619599342346,
      "learning_rate": 1.9603592443480957e-06,
      "loss": 0.0078,
      "step": 2460240
    },
    {
      "epoch": 4.02626945006317,
      "grad_norm": 0.1784922182559967,
      "learning_rate": 1.9602933521345784e-06,
      "loss": 0.0116,
      "step": 2460260
    },
    {
      "epoch": 4.026302180501823,
      "grad_norm": 0.1398245096206665,
      "learning_rate": 1.960227459921061e-06,
      "loss": 0.0084,
      "step": 2460280
    },
    {
      "epoch": 4.026334910940476,
      "grad_norm": 0.16072887182235718,
      "learning_rate": 1.9601615677075443e-06,
      "loss": 0.0123,
      "step": 2460300
    },
    {
      "epoch": 4.02636764137913,
      "grad_norm": 0.03844393789768219,
      "learning_rate": 1.960095675494027e-06,
      "loss": 0.007,
      "step": 2460320
    },
    {
      "epoch": 4.026400371817783,
      "grad_norm": 0.2643568813800812,
      "learning_rate": 1.96002978328051e-06,
      "loss": 0.0092,
      "step": 2460340
    },
    {
      "epoch": 4.026433102256436,
      "grad_norm": 0.15131962299346924,
      "learning_rate": 1.9599638910669925e-06,
      "loss": 0.007,
      "step": 2460360
    },
    {
      "epoch": 4.02646583269509,
      "grad_norm": 0.12300613522529602,
      "learning_rate": 1.9598979988534757e-06,
      "loss": 0.0088,
      "step": 2460380
    },
    {
      "epoch": 4.026498563133743,
      "grad_norm": 0.5612669587135315,
      "learning_rate": 1.9598321066399584e-06,
      "loss": 0.0146,
      "step": 2460400
    },
    {
      "epoch": 4.026531293572396,
      "grad_norm": 0.239237442612648,
      "learning_rate": 1.9597662144264416e-06,
      "loss": 0.0073,
      "step": 2460420
    },
    {
      "epoch": 4.02656402401105,
      "grad_norm": 0.06394102424383163,
      "learning_rate": 1.9597003222129243e-06,
      "loss": 0.0096,
      "step": 2460440
    },
    {
      "epoch": 4.026596754449703,
      "grad_norm": 0.17520974576473236,
      "learning_rate": 1.959634429999407e-06,
      "loss": 0.0089,
      "step": 2460460
    },
    {
      "epoch": 4.026629484888357,
      "grad_norm": 0.3318140208721161,
      "learning_rate": 1.95956853778589e-06,
      "loss": 0.009,
      "step": 2460480
    },
    {
      "epoch": 4.0266622153270095,
      "grad_norm": 0.08218913525342941,
      "learning_rate": 1.959502645572373e-06,
      "loss": 0.0053,
      "step": 2460500
    },
    {
      "epoch": 4.026694945765663,
      "grad_norm": 0.1630568504333496,
      "learning_rate": 1.9594367533588557e-06,
      "loss": 0.0061,
      "step": 2460520
    },
    {
      "epoch": 4.026727676204317,
      "grad_norm": 0.3516599237918854,
      "learning_rate": 1.9593708611453385e-06,
      "loss": 0.0075,
      "step": 2460540
    },
    {
      "epoch": 4.026760406642969,
      "grad_norm": 0.33591428399086,
      "learning_rate": 1.959304968931821e-06,
      "loss": 0.005,
      "step": 2460560
    },
    {
      "epoch": 4.026793137081623,
      "grad_norm": 0.26554062962532043,
      "learning_rate": 1.9592390767183044e-06,
      "loss": 0.0086,
      "step": 2460580
    },
    {
      "epoch": 4.026825867520277,
      "grad_norm": 0.44535014033317566,
      "learning_rate": 1.9591731845047875e-06,
      "loss": 0.0086,
      "step": 2460600
    },
    {
      "epoch": 4.02685859795893,
      "grad_norm": 0.3542102575302124,
      "learning_rate": 1.9591072922912703e-06,
      "loss": 0.0065,
      "step": 2460620
    },
    {
      "epoch": 4.026891328397583,
      "grad_norm": 0.16342133283615112,
      "learning_rate": 1.959041400077753e-06,
      "loss": 0.0052,
      "step": 2460640
    },
    {
      "epoch": 4.0269240588362365,
      "grad_norm": 0.28639093041419983,
      "learning_rate": 1.9589755078642358e-06,
      "loss": 0.0053,
      "step": 2460660
    },
    {
      "epoch": 4.02695678927489,
      "grad_norm": 0.5365907549858093,
      "learning_rate": 1.9589096156507185e-06,
      "loss": 0.0067,
      "step": 2460680
    },
    {
      "epoch": 4.026989519713543,
      "grad_norm": 0.27861863374710083,
      "learning_rate": 1.9588437234372017e-06,
      "loss": 0.0102,
      "step": 2460700
    },
    {
      "epoch": 4.027022250152196,
      "grad_norm": 0.2097417116165161,
      "learning_rate": 1.9587778312236844e-06,
      "loss": 0.0115,
      "step": 2460720
    },
    {
      "epoch": 4.02705498059085,
      "grad_norm": 0.06064033508300781,
      "learning_rate": 1.958711939010167e-06,
      "loss": 0.0078,
      "step": 2460740
    },
    {
      "epoch": 4.027087711029504,
      "grad_norm": 0.12821070849895477,
      "learning_rate": 1.9586460467966503e-06,
      "loss": 0.0094,
      "step": 2460760
    },
    {
      "epoch": 4.027120441468156,
      "grad_norm": 0.12459776550531387,
      "learning_rate": 1.958580154583133e-06,
      "loss": 0.0087,
      "step": 2460780
    },
    {
      "epoch": 4.02715317190681,
      "grad_norm": 0.051085684448480606,
      "learning_rate": 1.958514262369616e-06,
      "loss": 0.006,
      "step": 2460800
    },
    {
      "epoch": 4.0271859023454635,
      "grad_norm": 0.1575252115726471,
      "learning_rate": 1.958448370156099e-06,
      "loss": 0.0069,
      "step": 2460820
    },
    {
      "epoch": 4.027218632784116,
      "grad_norm": 0.1582622528076172,
      "learning_rate": 1.9583824779425817e-06,
      "loss": 0.0097,
      "step": 2460840
    },
    {
      "epoch": 4.02725136322277,
      "grad_norm": 0.09041822701692581,
      "learning_rate": 1.9583165857290644e-06,
      "loss": 0.0117,
      "step": 2460860
    },
    {
      "epoch": 4.027284093661423,
      "grad_norm": 0.37021300196647644,
      "learning_rate": 1.958250693515547e-06,
      "loss": 0.0124,
      "step": 2460880
    },
    {
      "epoch": 4.027316824100077,
      "grad_norm": 0.2293064147233963,
      "learning_rate": 1.9581848013020303e-06,
      "loss": 0.0091,
      "step": 2460900
    },
    {
      "epoch": 4.02734955453873,
      "grad_norm": 0.2338695228099823,
      "learning_rate": 1.958118909088513e-06,
      "loss": 0.0083,
      "step": 2460920
    },
    {
      "epoch": 4.027382284977383,
      "grad_norm": 0.10786450654268265,
      "learning_rate": 1.9580530168749962e-06,
      "loss": 0.0057,
      "step": 2460940
    },
    {
      "epoch": 4.027415015416037,
      "grad_norm": 0.06461118906736374,
      "learning_rate": 1.957987124661479e-06,
      "loss": 0.0102,
      "step": 2460960
    },
    {
      "epoch": 4.02744774585469,
      "grad_norm": 0.301619291305542,
      "learning_rate": 1.9579212324479617e-06,
      "loss": 0.0084,
      "step": 2460980
    },
    {
      "epoch": 4.027480476293343,
      "grad_norm": 0.1628316342830658,
      "learning_rate": 1.957855340234445e-06,
      "loss": 0.0083,
      "step": 2461000
    },
    {
      "epoch": 4.027513206731997,
      "grad_norm": 0.5095358490943909,
      "learning_rate": 1.9577894480209276e-06,
      "loss": 0.0084,
      "step": 2461020
    },
    {
      "epoch": 4.02754593717065,
      "grad_norm": 0.5048893690109253,
      "learning_rate": 1.9577235558074103e-06,
      "loss": 0.0111,
      "step": 2461040
    },
    {
      "epoch": 4.027578667609303,
      "grad_norm": 0.08732186257839203,
      "learning_rate": 1.957657663593893e-06,
      "loss": 0.0117,
      "step": 2461060
    },
    {
      "epoch": 4.027611398047957,
      "grad_norm": 0.22178077697753906,
      "learning_rate": 1.957591771380376e-06,
      "loss": 0.0075,
      "step": 2461080
    },
    {
      "epoch": 4.02764412848661,
      "grad_norm": 0.17273491621017456,
      "learning_rate": 1.957525879166859e-06,
      "loss": 0.005,
      "step": 2461100
    },
    {
      "epoch": 4.027676858925263,
      "grad_norm": 0.2042054980993271,
      "learning_rate": 1.957459986953342e-06,
      "loss": 0.0071,
      "step": 2461120
    },
    {
      "epoch": 4.0277095893639165,
      "grad_norm": 0.2117854803800583,
      "learning_rate": 1.957394094739825e-06,
      "loss": 0.0138,
      "step": 2461140
    },
    {
      "epoch": 4.02774231980257,
      "grad_norm": 0.10673896968364716,
      "learning_rate": 1.9573282025263076e-06,
      "loss": 0.0089,
      "step": 2461160
    },
    {
      "epoch": 4.027775050241224,
      "grad_norm": 0.21082629263401031,
      "learning_rate": 1.9572623103127904e-06,
      "loss": 0.0116,
      "step": 2461180
    },
    {
      "epoch": 4.027807780679876,
      "grad_norm": 0.12221921980381012,
      "learning_rate": 1.9571964180992735e-06,
      "loss": 0.008,
      "step": 2461200
    },
    {
      "epoch": 4.02784051111853,
      "grad_norm": 0.09332055598497391,
      "learning_rate": 1.9571305258857563e-06,
      "loss": 0.0077,
      "step": 2461220
    },
    {
      "epoch": 4.027873241557184,
      "grad_norm": 0.21707803010940552,
      "learning_rate": 1.957064633672239e-06,
      "loss": 0.0138,
      "step": 2461240
    },
    {
      "epoch": 4.027905971995836,
      "grad_norm": 0.7018348574638367,
      "learning_rate": 1.9569987414587218e-06,
      "loss": 0.0087,
      "step": 2461260
    },
    {
      "epoch": 4.02793870243449,
      "grad_norm": 0.23061759769916534,
      "learning_rate": 1.956932849245205e-06,
      "loss": 0.0109,
      "step": 2461280
    },
    {
      "epoch": 4.0279714328731435,
      "grad_norm": 0.2366516888141632,
      "learning_rate": 1.9568669570316877e-06,
      "loss": 0.0143,
      "step": 2461300
    },
    {
      "epoch": 4.028004163311797,
      "grad_norm": 0.3101608455181122,
      "learning_rate": 1.956801064818171e-06,
      "loss": 0.0078,
      "step": 2461320
    },
    {
      "epoch": 4.02803689375045,
      "grad_norm": 0.2655923366546631,
      "learning_rate": 1.9567351726046536e-06,
      "loss": 0.0088,
      "step": 2461340
    },
    {
      "epoch": 4.028069624189103,
      "grad_norm": 0.8880305290222168,
      "learning_rate": 1.9566692803911363e-06,
      "loss": 0.0125,
      "step": 2461360
    },
    {
      "epoch": 4.028102354627757,
      "grad_norm": 0.3173297047615051,
      "learning_rate": 1.956603388177619e-06,
      "loss": 0.0098,
      "step": 2461380
    },
    {
      "epoch": 4.02813508506641,
      "grad_norm": 0.4109678268432617,
      "learning_rate": 1.956537495964102e-06,
      "loss": 0.006,
      "step": 2461400
    },
    {
      "epoch": 4.028167815505063,
      "grad_norm": 0.36441314220428467,
      "learning_rate": 1.956471603750585e-06,
      "loss": 0.0114,
      "step": 2461420
    },
    {
      "epoch": 4.028200545943717,
      "grad_norm": 0.17536336183547974,
      "learning_rate": 1.9564057115370677e-06,
      "loss": 0.0079,
      "step": 2461440
    },
    {
      "epoch": 4.0282332763823705,
      "grad_norm": 0.46086037158966064,
      "learning_rate": 1.956339819323551e-06,
      "loss": 0.0082,
      "step": 2461460
    },
    {
      "epoch": 4.028266006821023,
      "grad_norm": 0.23133714497089386,
      "learning_rate": 1.9562739271100336e-06,
      "loss": 0.012,
      "step": 2461480
    },
    {
      "epoch": 4.028298737259677,
      "grad_norm": 0.3769785463809967,
      "learning_rate": 1.9562080348965163e-06,
      "loss": 0.0156,
      "step": 2461500
    },
    {
      "epoch": 4.02833146769833,
      "grad_norm": 0.2743220627307892,
      "learning_rate": 1.9561421426829995e-06,
      "loss": 0.0094,
      "step": 2461520
    },
    {
      "epoch": 4.028364198136983,
      "grad_norm": 0.09096802771091461,
      "learning_rate": 1.9560762504694822e-06,
      "loss": 0.0137,
      "step": 2461540
    },
    {
      "epoch": 4.028396928575637,
      "grad_norm": 0.262140691280365,
      "learning_rate": 1.956010358255965e-06,
      "loss": 0.0069,
      "step": 2461560
    },
    {
      "epoch": 4.02842965901429,
      "grad_norm": 0.20100535452365875,
      "learning_rate": 1.9559444660424477e-06,
      "loss": 0.0141,
      "step": 2461580
    },
    {
      "epoch": 4.028462389452944,
      "grad_norm": 0.4032479226589203,
      "learning_rate": 1.955878573828931e-06,
      "loss": 0.0118,
      "step": 2461600
    },
    {
      "epoch": 4.028495119891597,
      "grad_norm": 0.2687181234359741,
      "learning_rate": 1.9558126816154136e-06,
      "loss": 0.0065,
      "step": 2461620
    },
    {
      "epoch": 4.02852785033025,
      "grad_norm": 0.32190826535224915,
      "learning_rate": 1.9557467894018968e-06,
      "loss": 0.0093,
      "step": 2461640
    },
    {
      "epoch": 4.028560580768904,
      "grad_norm": 0.09686078131198883,
      "learning_rate": 1.9556808971883795e-06,
      "loss": 0.0144,
      "step": 2461660
    },
    {
      "epoch": 4.0285933112075565,
      "grad_norm": 0.14734649658203125,
      "learning_rate": 1.9556150049748623e-06,
      "loss": 0.0076,
      "step": 2461680
    },
    {
      "epoch": 4.02862604164621,
      "grad_norm": 0.061238765716552734,
      "learning_rate": 1.955549112761345e-06,
      "loss": 0.0144,
      "step": 2461700
    },
    {
      "epoch": 4.028658772084864,
      "grad_norm": 0.21768227219581604,
      "learning_rate": 1.955483220547828e-06,
      "loss": 0.0096,
      "step": 2461720
    },
    {
      "epoch": 4.028691502523516,
      "grad_norm": 0.15014202892780304,
      "learning_rate": 1.955417328334311e-06,
      "loss": 0.0107,
      "step": 2461740
    },
    {
      "epoch": 4.02872423296217,
      "grad_norm": 0.045071814209222794,
      "learning_rate": 1.9553514361207936e-06,
      "loss": 0.0094,
      "step": 2461760
    },
    {
      "epoch": 4.028756963400824,
      "grad_norm": 0.32020238041877747,
      "learning_rate": 1.9552855439072764e-06,
      "loss": 0.0095,
      "step": 2461780
    },
    {
      "epoch": 4.028789693839477,
      "grad_norm": 0.28920722007751465,
      "learning_rate": 1.9552196516937595e-06,
      "loss": 0.0122,
      "step": 2461800
    },
    {
      "epoch": 4.02882242427813,
      "grad_norm": 0.1042509526014328,
      "learning_rate": 1.9551537594802427e-06,
      "loss": 0.0068,
      "step": 2461820
    },
    {
      "epoch": 4.0288551547167835,
      "grad_norm": 0.12668342888355255,
      "learning_rate": 1.9550878672667254e-06,
      "loss": 0.0127,
      "step": 2461840
    },
    {
      "epoch": 4.028887885155437,
      "grad_norm": 0.09533432871103287,
      "learning_rate": 1.955021975053208e-06,
      "loss": 0.0088,
      "step": 2461860
    },
    {
      "epoch": 4.02892061559409,
      "grad_norm": 0.12510228157043457,
      "learning_rate": 1.954956082839691e-06,
      "loss": 0.0085,
      "step": 2461880
    },
    {
      "epoch": 4.028953346032743,
      "grad_norm": 0.10342497378587723,
      "learning_rate": 1.9548901906261737e-06,
      "loss": 0.0072,
      "step": 2461900
    },
    {
      "epoch": 4.028986076471397,
      "grad_norm": 0.38631027936935425,
      "learning_rate": 1.954824298412657e-06,
      "loss": 0.0082,
      "step": 2461920
    },
    {
      "epoch": 4.0290188069100505,
      "grad_norm": 0.1746017038822174,
      "learning_rate": 1.9547584061991396e-06,
      "loss": 0.0086,
      "step": 2461940
    },
    {
      "epoch": 4.029051537348703,
      "grad_norm": 0.3293020725250244,
      "learning_rate": 1.9546925139856223e-06,
      "loss": 0.0068,
      "step": 2461960
    },
    {
      "epoch": 4.029084267787357,
      "grad_norm": 0.18630126118659973,
      "learning_rate": 1.954626621772105e-06,
      "loss": 0.0059,
      "step": 2461980
    },
    {
      "epoch": 4.02911699822601,
      "grad_norm": 0.1562243551015854,
      "learning_rate": 1.954560729558588e-06,
      "loss": 0.0073,
      "step": 2462000
    },
    {
      "epoch": 4.029149728664663,
      "grad_norm": 0.2037496268749237,
      "learning_rate": 1.9544948373450714e-06,
      "loss": 0.0068,
      "step": 2462020
    },
    {
      "epoch": 4.029182459103317,
      "grad_norm": 0.1562003344297409,
      "learning_rate": 1.954428945131554e-06,
      "loss": 0.0074,
      "step": 2462040
    },
    {
      "epoch": 4.02921518954197,
      "grad_norm": 0.24731820821762085,
      "learning_rate": 1.954363052918037e-06,
      "loss": 0.0092,
      "step": 2462060
    },
    {
      "epoch": 4.029247919980624,
      "grad_norm": 0.4441448152065277,
      "learning_rate": 1.9542971607045196e-06,
      "loss": 0.0112,
      "step": 2462080
    },
    {
      "epoch": 4.029280650419277,
      "grad_norm": 0.23874607682228088,
      "learning_rate": 1.9542312684910023e-06,
      "loss": 0.0119,
      "step": 2462100
    },
    {
      "epoch": 4.02931338085793,
      "grad_norm": 0.11214320361614227,
      "learning_rate": 1.9541653762774855e-06,
      "loss": 0.0086,
      "step": 2462120
    },
    {
      "epoch": 4.029346111296584,
      "grad_norm": 0.35477811098098755,
      "learning_rate": 1.9540994840639682e-06,
      "loss": 0.0125,
      "step": 2462140
    },
    {
      "epoch": 4.0293788417352365,
      "grad_norm": 0.29855605959892273,
      "learning_rate": 1.9540335918504514e-06,
      "loss": 0.011,
      "step": 2462160
    },
    {
      "epoch": 4.02941157217389,
      "grad_norm": 0.0957728922367096,
      "learning_rate": 1.953967699636934e-06,
      "loss": 0.0072,
      "step": 2462180
    },
    {
      "epoch": 4.029444302612544,
      "grad_norm": 0.18679319322109222,
      "learning_rate": 1.953901807423417e-06,
      "loss": 0.0099,
      "step": 2462200
    },
    {
      "epoch": 4.029477033051197,
      "grad_norm": 0.09318701177835464,
      "learning_rate": 1.9538359152099e-06,
      "loss": 0.0072,
      "step": 2462220
    },
    {
      "epoch": 4.02950976348985,
      "grad_norm": 0.13578443229198456,
      "learning_rate": 1.9537700229963828e-06,
      "loss": 0.0074,
      "step": 2462240
    },
    {
      "epoch": 4.029542493928504,
      "grad_norm": 0.5443562269210815,
      "learning_rate": 1.9537041307828655e-06,
      "loss": 0.007,
      "step": 2462260
    },
    {
      "epoch": 4.029575224367157,
      "grad_norm": 0.3107396960258484,
      "learning_rate": 1.9536382385693483e-06,
      "loss": 0.007,
      "step": 2462280
    },
    {
      "epoch": 4.02960795480581,
      "grad_norm": 0.0775565505027771,
      "learning_rate": 1.953572346355831e-06,
      "loss": 0.011,
      "step": 2462300
    },
    {
      "epoch": 4.0296406852444635,
      "grad_norm": 0.30377647280693054,
      "learning_rate": 1.953506454142314e-06,
      "loss": 0.0125,
      "step": 2462320
    },
    {
      "epoch": 4.029673415683117,
      "grad_norm": 0.09693601727485657,
      "learning_rate": 1.9534405619287973e-06,
      "loss": 0.009,
      "step": 2462340
    },
    {
      "epoch": 4.029706146121771,
      "grad_norm": 0.31618621945381165,
      "learning_rate": 1.95337466971528e-06,
      "loss": 0.009,
      "step": 2462360
    },
    {
      "epoch": 4.029738876560423,
      "grad_norm": 0.7047346830368042,
      "learning_rate": 1.953308777501763e-06,
      "loss": 0.0134,
      "step": 2462380
    },
    {
      "epoch": 4.029771606999077,
      "grad_norm": 0.06409790366888046,
      "learning_rate": 1.9532428852882455e-06,
      "loss": 0.0092,
      "step": 2462400
    },
    {
      "epoch": 4.029804337437731,
      "grad_norm": 0.19079998135566711,
      "learning_rate": 1.9531769930747287e-06,
      "loss": 0.0093,
      "step": 2462420
    },
    {
      "epoch": 4.029837067876383,
      "grad_norm": 0.3824528455734253,
      "learning_rate": 1.9531111008612114e-06,
      "loss": 0.009,
      "step": 2462440
    },
    {
      "epoch": 4.029869798315037,
      "grad_norm": 0.15241847932338715,
      "learning_rate": 1.953045208647694e-06,
      "loss": 0.013,
      "step": 2462460
    },
    {
      "epoch": 4.0299025287536905,
      "grad_norm": 0.27405232191085815,
      "learning_rate": 1.952979316434177e-06,
      "loss": 0.0101,
      "step": 2462480
    },
    {
      "epoch": 4.029935259192344,
      "grad_norm": 0.09451175481081009,
      "learning_rate": 1.95291342422066e-06,
      "loss": 0.0166,
      "step": 2462500
    },
    {
      "epoch": 4.029967989630997,
      "grad_norm": 0.6123459935188293,
      "learning_rate": 1.952847532007143e-06,
      "loss": 0.0112,
      "step": 2462520
    },
    {
      "epoch": 4.03000072006965,
      "grad_norm": 0.12219134718179703,
      "learning_rate": 1.952781639793626e-06,
      "loss": 0.0065,
      "step": 2462540
    },
    {
      "epoch": 4.030033450508304,
      "grad_norm": 0.32821229100227356,
      "learning_rate": 1.9527157475801087e-06,
      "loss": 0.0054,
      "step": 2462560
    },
    {
      "epoch": 4.030066180946957,
      "grad_norm": 0.28555065393447876,
      "learning_rate": 1.9526498553665915e-06,
      "loss": 0.0091,
      "step": 2462580
    },
    {
      "epoch": 4.03009891138561,
      "grad_norm": 0.22987666726112366,
      "learning_rate": 1.9525839631530742e-06,
      "loss": 0.0104,
      "step": 2462600
    },
    {
      "epoch": 4.030131641824264,
      "grad_norm": 0.099583201110363,
      "learning_rate": 1.9525180709395574e-06,
      "loss": 0.0084,
      "step": 2462620
    },
    {
      "epoch": 4.0301643722629175,
      "grad_norm": 0.10978962481021881,
      "learning_rate": 1.95245217872604e-06,
      "loss": 0.0059,
      "step": 2462640
    },
    {
      "epoch": 4.03019710270157,
      "grad_norm": 0.5635766386985779,
      "learning_rate": 1.952386286512523e-06,
      "loss": 0.0163,
      "step": 2462660
    },
    {
      "epoch": 4.030229833140224,
      "grad_norm": 0.15044578909873962,
      "learning_rate": 1.9523203942990056e-06,
      "loss": 0.006,
      "step": 2462680
    },
    {
      "epoch": 4.030262563578877,
      "grad_norm": 0.6343495845794678,
      "learning_rate": 1.9522545020854888e-06,
      "loss": 0.0096,
      "step": 2462700
    },
    {
      "epoch": 4.03029529401753,
      "grad_norm": 0.0458759069442749,
      "learning_rate": 1.9521886098719715e-06,
      "loss": 0.011,
      "step": 2462720
    },
    {
      "epoch": 4.030328024456184,
      "grad_norm": 0.3508075177669525,
      "learning_rate": 1.9521227176584547e-06,
      "loss": 0.0111,
      "step": 2462740
    },
    {
      "epoch": 4.030360754894837,
      "grad_norm": 0.23258927464485168,
      "learning_rate": 1.9520568254449374e-06,
      "loss": 0.0078,
      "step": 2462760
    },
    {
      "epoch": 4.030393485333491,
      "grad_norm": 0.5772624611854553,
      "learning_rate": 1.95199093323142e-06,
      "loss": 0.011,
      "step": 2462780
    },
    {
      "epoch": 4.0304262157721435,
      "grad_norm": 0.2595968246459961,
      "learning_rate": 1.951925041017903e-06,
      "loss": 0.0108,
      "step": 2462800
    },
    {
      "epoch": 4.030458946210797,
      "grad_norm": 0.09462809562683105,
      "learning_rate": 1.951859148804386e-06,
      "loss": 0.0148,
      "step": 2462820
    },
    {
      "epoch": 4.030491676649451,
      "grad_norm": 0.07810511440038681,
      "learning_rate": 1.9517932565908688e-06,
      "loss": 0.0087,
      "step": 2462840
    },
    {
      "epoch": 4.0305244070881034,
      "grad_norm": 0.11530053615570068,
      "learning_rate": 1.9517273643773515e-06,
      "loss": 0.007,
      "step": 2462860
    },
    {
      "epoch": 4.030557137526757,
      "grad_norm": 0.8297198414802551,
      "learning_rate": 1.9516614721638347e-06,
      "loss": 0.0112,
      "step": 2462880
    },
    {
      "epoch": 4.030589867965411,
      "grad_norm": 0.1377813220024109,
      "learning_rate": 1.9515955799503174e-06,
      "loss": 0.0082,
      "step": 2462900
    },
    {
      "epoch": 4.030622598404064,
      "grad_norm": 0.2020787000656128,
      "learning_rate": 1.9515296877368e-06,
      "loss": 0.0074,
      "step": 2462920
    },
    {
      "epoch": 4.030655328842717,
      "grad_norm": 0.40957996249198914,
      "learning_rate": 1.9514637955232833e-06,
      "loss": 0.0085,
      "step": 2462940
    },
    {
      "epoch": 4.0306880592813705,
      "grad_norm": 0.15955808758735657,
      "learning_rate": 1.951397903309766e-06,
      "loss": 0.0142,
      "step": 2462960
    },
    {
      "epoch": 4.030720789720024,
      "grad_norm": 0.16290266811847687,
      "learning_rate": 1.951332011096249e-06,
      "loss": 0.0069,
      "step": 2462980
    },
    {
      "epoch": 4.030753520158677,
      "grad_norm": 0.5045920014381409,
      "learning_rate": 1.9512661188827315e-06,
      "loss": 0.0061,
      "step": 2463000
    },
    {
      "epoch": 4.03078625059733,
      "grad_norm": 0.6620188355445862,
      "learning_rate": 1.9512002266692147e-06,
      "loss": 0.0165,
      "step": 2463020
    },
    {
      "epoch": 4.030818981035984,
      "grad_norm": 0.3143284320831299,
      "learning_rate": 1.951134334455698e-06,
      "loss": 0.0123,
      "step": 2463040
    },
    {
      "epoch": 4.030851711474638,
      "grad_norm": 0.21602092683315277,
      "learning_rate": 1.9510684422421806e-06,
      "loss": 0.0083,
      "step": 2463060
    },
    {
      "epoch": 4.03088444191329,
      "grad_norm": 0.2717735469341278,
      "learning_rate": 1.9510025500286634e-06,
      "loss": 0.0074,
      "step": 2463080
    },
    {
      "epoch": 4.030917172351944,
      "grad_norm": 0.34929850697517395,
      "learning_rate": 1.950936657815146e-06,
      "loss": 0.0074,
      "step": 2463100
    },
    {
      "epoch": 4.0309499027905975,
      "grad_norm": 0.061560019850730896,
      "learning_rate": 1.950870765601629e-06,
      "loss": 0.0068,
      "step": 2463120
    },
    {
      "epoch": 4.03098263322925,
      "grad_norm": 0.29233095049858093,
      "learning_rate": 1.950804873388112e-06,
      "loss": 0.0111,
      "step": 2463140
    },
    {
      "epoch": 4.031015363667904,
      "grad_norm": 0.34959155321121216,
      "learning_rate": 1.9507389811745947e-06,
      "loss": 0.0066,
      "step": 2463160
    },
    {
      "epoch": 4.031048094106557,
      "grad_norm": 0.15843816101551056,
      "learning_rate": 1.9506730889610775e-06,
      "loss": 0.0105,
      "step": 2463180
    },
    {
      "epoch": 4.031080824545211,
      "grad_norm": 0.6262444853782654,
      "learning_rate": 1.9506071967475602e-06,
      "loss": 0.0052,
      "step": 2463200
    },
    {
      "epoch": 4.031113554983864,
      "grad_norm": 0.17553608119487762,
      "learning_rate": 1.9505413045340434e-06,
      "loss": 0.0067,
      "step": 2463220
    },
    {
      "epoch": 4.031146285422517,
      "grad_norm": 0.34017494320869446,
      "learning_rate": 1.9504754123205265e-06,
      "loss": 0.0097,
      "step": 2463240
    },
    {
      "epoch": 4.031179015861171,
      "grad_norm": 0.1590772122144699,
      "learning_rate": 1.9504095201070093e-06,
      "loss": 0.0101,
      "step": 2463260
    },
    {
      "epoch": 4.031211746299824,
      "grad_norm": 0.18693193793296814,
      "learning_rate": 1.950343627893492e-06,
      "loss": 0.0095,
      "step": 2463280
    },
    {
      "epoch": 4.031244476738477,
      "grad_norm": 0.3437880277633667,
      "learning_rate": 1.9502777356799748e-06,
      "loss": 0.0084,
      "step": 2463300
    },
    {
      "epoch": 4.031277207177131,
      "grad_norm": 0.2419937252998352,
      "learning_rate": 1.9502118434664575e-06,
      "loss": 0.0116,
      "step": 2463320
    },
    {
      "epoch": 4.0313099376157835,
      "grad_norm": 0.3367001712322235,
      "learning_rate": 1.9501459512529407e-06,
      "loss": 0.0081,
      "step": 2463340
    },
    {
      "epoch": 4.031342668054437,
      "grad_norm": 0.18383774161338806,
      "learning_rate": 1.9500800590394234e-06,
      "loss": 0.0212,
      "step": 2463360
    },
    {
      "epoch": 4.031375398493091,
      "grad_norm": 0.23410432040691376,
      "learning_rate": 1.950014166825906e-06,
      "loss": 0.0076,
      "step": 2463380
    },
    {
      "epoch": 4.031408128931744,
      "grad_norm": 0.1916472613811493,
      "learning_rate": 1.9499482746123893e-06,
      "loss": 0.0055,
      "step": 2463400
    },
    {
      "epoch": 4.031440859370397,
      "grad_norm": 0.3695494830608368,
      "learning_rate": 1.949882382398872e-06,
      "loss": 0.01,
      "step": 2463420
    },
    {
      "epoch": 4.031473589809051,
      "grad_norm": 0.2261231690645218,
      "learning_rate": 1.9498164901853552e-06,
      "loss": 0.0089,
      "step": 2463440
    },
    {
      "epoch": 4.031506320247704,
      "grad_norm": 0.2910727262496948,
      "learning_rate": 1.949750597971838e-06,
      "loss": 0.0116,
      "step": 2463460
    },
    {
      "epoch": 4.031539050686357,
      "grad_norm": 0.3268176019191742,
      "learning_rate": 1.9496847057583207e-06,
      "loss": 0.0104,
      "step": 2463480
    },
    {
      "epoch": 4.0315717811250105,
      "grad_norm": 0.06399096548557281,
      "learning_rate": 1.9496188135448034e-06,
      "loss": 0.0086,
      "step": 2463500
    },
    {
      "epoch": 4.031604511563664,
      "grad_norm": 0.1292296200990677,
      "learning_rate": 1.949552921331286e-06,
      "loss": 0.0087,
      "step": 2463520
    },
    {
      "epoch": 4.031637242002318,
      "grad_norm": 0.2647121548652649,
      "learning_rate": 1.9494870291177693e-06,
      "loss": 0.0101,
      "step": 2463540
    },
    {
      "epoch": 4.03166997244097,
      "grad_norm": 0.32528427243232727,
      "learning_rate": 1.949421136904252e-06,
      "loss": 0.0094,
      "step": 2463560
    },
    {
      "epoch": 4.031702702879624,
      "grad_norm": 0.2599562108516693,
      "learning_rate": 1.9493552446907352e-06,
      "loss": 0.0137,
      "step": 2463580
    },
    {
      "epoch": 4.0317354333182776,
      "grad_norm": 0.2501438558101654,
      "learning_rate": 1.949289352477218e-06,
      "loss": 0.0089,
      "step": 2463600
    },
    {
      "epoch": 4.03176816375693,
      "grad_norm": 0.4206993281841278,
      "learning_rate": 1.9492234602637007e-06,
      "loss": 0.0108,
      "step": 2463620
    },
    {
      "epoch": 4.031800894195584,
      "grad_norm": 0.08200445771217346,
      "learning_rate": 1.949157568050184e-06,
      "loss": 0.0083,
      "step": 2463640
    },
    {
      "epoch": 4.0318336246342374,
      "grad_norm": 0.500296413898468,
      "learning_rate": 1.9490916758366666e-06,
      "loss": 0.007,
      "step": 2463660
    },
    {
      "epoch": 4.031866355072891,
      "grad_norm": 0.37819841504096985,
      "learning_rate": 1.9490257836231494e-06,
      "loss": 0.0065,
      "step": 2463680
    },
    {
      "epoch": 4.031899085511544,
      "grad_norm": 0.2897235155105591,
      "learning_rate": 1.948959891409632e-06,
      "loss": 0.011,
      "step": 2463700
    },
    {
      "epoch": 4.031931815950197,
      "grad_norm": 0.08583647012710571,
      "learning_rate": 1.948893999196115e-06,
      "loss": 0.0098,
      "step": 2463720
    },
    {
      "epoch": 4.031964546388851,
      "grad_norm": 0.32946547865867615,
      "learning_rate": 1.948828106982598e-06,
      "loss": 0.0095,
      "step": 2463740
    },
    {
      "epoch": 4.031997276827504,
      "grad_norm": 0.19348636269569397,
      "learning_rate": 1.948762214769081e-06,
      "loss": 0.0069,
      "step": 2463760
    },
    {
      "epoch": 4.032030007266157,
      "grad_norm": 0.17734816670417786,
      "learning_rate": 1.948696322555564e-06,
      "loss": 0.0119,
      "step": 2463780
    },
    {
      "epoch": 4.032062737704811,
      "grad_norm": 0.28981921076774597,
      "learning_rate": 1.9486304303420466e-06,
      "loss": 0.0103,
      "step": 2463800
    },
    {
      "epoch": 4.032095468143464,
      "grad_norm": 0.7837846279144287,
      "learning_rate": 1.9485645381285294e-06,
      "loss": 0.0099,
      "step": 2463820
    },
    {
      "epoch": 4.032128198582117,
      "grad_norm": 0.16838166117668152,
      "learning_rate": 1.9484986459150125e-06,
      "loss": 0.009,
      "step": 2463840
    },
    {
      "epoch": 4.032160929020771,
      "grad_norm": 0.18275225162506104,
      "learning_rate": 1.9484327537014953e-06,
      "loss": 0.0067,
      "step": 2463860
    },
    {
      "epoch": 4.032193659459424,
      "grad_norm": 0.2971245348453522,
      "learning_rate": 1.948366861487978e-06,
      "loss": 0.0101,
      "step": 2463880
    },
    {
      "epoch": 4.032226389898077,
      "grad_norm": 0.20374587178230286,
      "learning_rate": 1.9483009692744608e-06,
      "loss": 0.009,
      "step": 2463900
    },
    {
      "epoch": 4.032259120336731,
      "grad_norm": 0.256832093000412,
      "learning_rate": 1.948235077060944e-06,
      "loss": 0.0074,
      "step": 2463920
    },
    {
      "epoch": 4.032291850775384,
      "grad_norm": 0.1318911761045456,
      "learning_rate": 1.9481691848474267e-06,
      "loss": 0.0068,
      "step": 2463940
    },
    {
      "epoch": 4.032324581214038,
      "grad_norm": 0.47128549218177795,
      "learning_rate": 1.94810329263391e-06,
      "loss": 0.0096,
      "step": 2463960
    },
    {
      "epoch": 4.0323573116526905,
      "grad_norm": 0.09914743900299072,
      "learning_rate": 1.9480374004203926e-06,
      "loss": 0.0079,
      "step": 2463980
    },
    {
      "epoch": 4.032390042091344,
      "grad_norm": 0.07068346440792084,
      "learning_rate": 1.9479715082068753e-06,
      "loss": 0.0081,
      "step": 2464000
    },
    {
      "epoch": 4.032422772529998,
      "grad_norm": 0.14777810871601105,
      "learning_rate": 1.947905615993358e-06,
      "loss": 0.0113,
      "step": 2464020
    },
    {
      "epoch": 4.03245550296865,
      "grad_norm": 0.07154612988233566,
      "learning_rate": 1.9478397237798412e-06,
      "loss": 0.0095,
      "step": 2464040
    },
    {
      "epoch": 4.032488233407304,
      "grad_norm": 0.458715558052063,
      "learning_rate": 1.947773831566324e-06,
      "loss": 0.0113,
      "step": 2464060
    },
    {
      "epoch": 4.032520963845958,
      "grad_norm": 0.4816160798072815,
      "learning_rate": 1.9477079393528067e-06,
      "loss": 0.0101,
      "step": 2464080
    },
    {
      "epoch": 4.032553694284611,
      "grad_norm": 0.26621973514556885,
      "learning_rate": 1.94764204713929e-06,
      "loss": 0.0073,
      "step": 2464100
    },
    {
      "epoch": 4.032586424723264,
      "grad_norm": 0.04743395373225212,
      "learning_rate": 1.9475761549257726e-06,
      "loss": 0.0077,
      "step": 2464120
    },
    {
      "epoch": 4.0326191551619175,
      "grad_norm": 0.15139234066009521,
      "learning_rate": 1.9475102627122553e-06,
      "loss": 0.0081,
      "step": 2464140
    },
    {
      "epoch": 4.032651885600571,
      "grad_norm": 0.5830156207084656,
      "learning_rate": 1.9474443704987385e-06,
      "loss": 0.0095,
      "step": 2464160
    },
    {
      "epoch": 4.032684616039224,
      "grad_norm": 0.23057691752910614,
      "learning_rate": 1.9473784782852212e-06,
      "loss": 0.0092,
      "step": 2464180
    },
    {
      "epoch": 4.032717346477877,
      "grad_norm": 0.24075844883918762,
      "learning_rate": 1.947312586071704e-06,
      "loss": 0.0127,
      "step": 2464200
    },
    {
      "epoch": 4.032750076916531,
      "grad_norm": 0.22092242538928986,
      "learning_rate": 1.9472466938581867e-06,
      "loss": 0.006,
      "step": 2464220
    },
    {
      "epoch": 4.032782807355185,
      "grad_norm": 0.24388156831264496,
      "learning_rate": 1.94718080164467e-06,
      "loss": 0.0107,
      "step": 2464240
    },
    {
      "epoch": 4.032815537793837,
      "grad_norm": 0.3266463577747345,
      "learning_rate": 1.9471149094311526e-06,
      "loss": 0.0078,
      "step": 2464260
    },
    {
      "epoch": 4.032848268232491,
      "grad_norm": 0.5035091638565063,
      "learning_rate": 1.9470490172176358e-06,
      "loss": 0.01,
      "step": 2464280
    },
    {
      "epoch": 4.0328809986711445,
      "grad_norm": 0.20195510983467102,
      "learning_rate": 1.9469831250041185e-06,
      "loss": 0.0077,
      "step": 2464300
    },
    {
      "epoch": 4.032913729109797,
      "grad_norm": 0.1999293863773346,
      "learning_rate": 1.9469172327906013e-06,
      "loss": 0.0065,
      "step": 2464320
    },
    {
      "epoch": 4.032946459548451,
      "grad_norm": 0.16700617969036102,
      "learning_rate": 1.946851340577084e-06,
      "loss": 0.0088,
      "step": 2464340
    },
    {
      "epoch": 4.032979189987104,
      "grad_norm": 0.18439917266368866,
      "learning_rate": 1.946785448363567e-06,
      "loss": 0.0112,
      "step": 2464360
    },
    {
      "epoch": 4.033011920425758,
      "grad_norm": 0.25560733675956726,
      "learning_rate": 1.94671955615005e-06,
      "loss": 0.0079,
      "step": 2464380
    },
    {
      "epoch": 4.033044650864411,
      "grad_norm": 0.02426869049668312,
      "learning_rate": 1.9466536639365326e-06,
      "loss": 0.0092,
      "step": 2464400
    },
    {
      "epoch": 4.033077381303064,
      "grad_norm": 0.40223920345306396,
      "learning_rate": 1.9465877717230154e-06,
      "loss": 0.0065,
      "step": 2464420
    },
    {
      "epoch": 4.033110111741718,
      "grad_norm": 0.6426642537117004,
      "learning_rate": 1.9465218795094986e-06,
      "loss": 0.0096,
      "step": 2464440
    },
    {
      "epoch": 4.033142842180371,
      "grad_norm": 0.22624997794628143,
      "learning_rate": 1.9464559872959817e-06,
      "loss": 0.0113,
      "step": 2464460
    },
    {
      "epoch": 4.033175572619024,
      "grad_norm": 0.27399131655693054,
      "learning_rate": 1.9463900950824645e-06,
      "loss": 0.0095,
      "step": 2464480
    },
    {
      "epoch": 4.033208303057678,
      "grad_norm": 0.17513994872570038,
      "learning_rate": 1.946324202868947e-06,
      "loss": 0.0111,
      "step": 2464500
    },
    {
      "epoch": 4.033241033496331,
      "grad_norm": 0.16221216320991516,
      "learning_rate": 1.94625831065543e-06,
      "loss": 0.0093,
      "step": 2464520
    },
    {
      "epoch": 4.033273763934984,
      "grad_norm": 0.2833549380302429,
      "learning_rate": 1.9461924184419127e-06,
      "loss": 0.0111,
      "step": 2464540
    },
    {
      "epoch": 4.033306494373638,
      "grad_norm": 0.1601777821779251,
      "learning_rate": 1.946126526228396e-06,
      "loss": 0.0113,
      "step": 2464560
    },
    {
      "epoch": 4.033339224812291,
      "grad_norm": 0.10833525657653809,
      "learning_rate": 1.9460606340148786e-06,
      "loss": 0.0052,
      "step": 2464580
    },
    {
      "epoch": 4.033371955250944,
      "grad_norm": 0.16394232213497162,
      "learning_rate": 1.9459947418013613e-06,
      "loss": 0.0093,
      "step": 2464600
    },
    {
      "epoch": 4.0334046856895975,
      "grad_norm": 0.986208975315094,
      "learning_rate": 1.945928849587844e-06,
      "loss": 0.0104,
      "step": 2464620
    },
    {
      "epoch": 4.033437416128251,
      "grad_norm": 0.30709078907966614,
      "learning_rate": 1.9458629573743272e-06,
      "loss": 0.0095,
      "step": 2464640
    },
    {
      "epoch": 4.033470146566905,
      "grad_norm": 0.2309694141149521,
      "learning_rate": 1.9457970651608104e-06,
      "loss": 0.0132,
      "step": 2464660
    },
    {
      "epoch": 4.033502877005557,
      "grad_norm": 0.0646657794713974,
      "learning_rate": 1.945731172947293e-06,
      "loss": 0.009,
      "step": 2464680
    },
    {
      "epoch": 4.033535607444211,
      "grad_norm": 0.5163036584854126,
      "learning_rate": 1.945665280733776e-06,
      "loss": 0.0086,
      "step": 2464700
    },
    {
      "epoch": 4.033568337882865,
      "grad_norm": 0.1979682743549347,
      "learning_rate": 1.9455993885202586e-06,
      "loss": 0.0108,
      "step": 2464720
    },
    {
      "epoch": 4.033601068321517,
      "grad_norm": 0.24745596945285797,
      "learning_rate": 1.9455334963067413e-06,
      "loss": 0.0097,
      "step": 2464740
    },
    {
      "epoch": 4.033633798760171,
      "grad_norm": 0.11593610793352127,
      "learning_rate": 1.9454676040932245e-06,
      "loss": 0.0097,
      "step": 2464760
    },
    {
      "epoch": 4.0336665291988245,
      "grad_norm": 0.4674791991710663,
      "learning_rate": 1.9454017118797072e-06,
      "loss": 0.0166,
      "step": 2464780
    },
    {
      "epoch": 4.033699259637478,
      "grad_norm": 0.3413006663322449,
      "learning_rate": 1.9453358196661904e-06,
      "loss": 0.0069,
      "step": 2464800
    },
    {
      "epoch": 4.033731990076131,
      "grad_norm": 0.11957535147666931,
      "learning_rate": 1.945269927452673e-06,
      "loss": 0.01,
      "step": 2464820
    },
    {
      "epoch": 4.033764720514784,
      "grad_norm": 0.15555156767368317,
      "learning_rate": 1.945204035239156e-06,
      "loss": 0.0107,
      "step": 2464840
    },
    {
      "epoch": 4.033797450953438,
      "grad_norm": 0.21876834332942963,
      "learning_rate": 1.945138143025639e-06,
      "loss": 0.0074,
      "step": 2464860
    },
    {
      "epoch": 4.033830181392091,
      "grad_norm": 0.12688802182674408,
      "learning_rate": 1.945072250812122e-06,
      "loss": 0.0079,
      "step": 2464880
    },
    {
      "epoch": 4.033862911830744,
      "grad_norm": 0.2892538011074066,
      "learning_rate": 1.9450063585986045e-06,
      "loss": 0.0104,
      "step": 2464900
    },
    {
      "epoch": 4.033895642269398,
      "grad_norm": 0.20284797251224518,
      "learning_rate": 1.9449404663850873e-06,
      "loss": 0.0095,
      "step": 2464920
    },
    {
      "epoch": 4.033928372708051,
      "grad_norm": 0.43220263719558716,
      "learning_rate": 1.94487457417157e-06,
      "loss": 0.0109,
      "step": 2464940
    },
    {
      "epoch": 4.033961103146704,
      "grad_norm": 0.1351560801267624,
      "learning_rate": 1.944808681958053e-06,
      "loss": 0.0106,
      "step": 2464960
    },
    {
      "epoch": 4.033993833585358,
      "grad_norm": 0.25352516770362854,
      "learning_rate": 1.9447427897445363e-06,
      "loss": 0.0093,
      "step": 2464980
    },
    {
      "epoch": 4.034026564024011,
      "grad_norm": 0.17453895509243011,
      "learning_rate": 1.944676897531019e-06,
      "loss": 0.0109,
      "step": 2465000
    },
    {
      "epoch": 4.034059294462664,
      "grad_norm": 0.1920982450246811,
      "learning_rate": 1.944611005317502e-06,
      "loss": 0.0136,
      "step": 2465020
    },
    {
      "epoch": 4.034092024901318,
      "grad_norm": 0.2783021330833435,
      "learning_rate": 1.9445451131039846e-06,
      "loss": 0.0079,
      "step": 2465040
    },
    {
      "epoch": 4.034124755339971,
      "grad_norm": 0.4091772437095642,
      "learning_rate": 1.9444792208904677e-06,
      "loss": 0.0107,
      "step": 2465060
    },
    {
      "epoch": 4.034157485778624,
      "grad_norm": 0.3064349889755249,
      "learning_rate": 1.9444133286769505e-06,
      "loss": 0.0095,
      "step": 2465080
    },
    {
      "epoch": 4.034190216217278,
      "grad_norm": 0.3003176152706146,
      "learning_rate": 1.944347436463433e-06,
      "loss": 0.0107,
      "step": 2465100
    },
    {
      "epoch": 4.034222946655931,
      "grad_norm": 0.036472272127866745,
      "learning_rate": 1.944281544249916e-06,
      "loss": 0.0092,
      "step": 2465120
    },
    {
      "epoch": 4.034255677094585,
      "grad_norm": 0.19374318420886993,
      "learning_rate": 1.944215652036399e-06,
      "loss": 0.0092,
      "step": 2465140
    },
    {
      "epoch": 4.0342884075332375,
      "grad_norm": 0.11277733743190765,
      "learning_rate": 1.944149759822882e-06,
      "loss": 0.0116,
      "step": 2465160
    },
    {
      "epoch": 4.034321137971891,
      "grad_norm": 1.2651516199111938,
      "learning_rate": 1.944083867609365e-06,
      "loss": 0.0102,
      "step": 2465180
    },
    {
      "epoch": 4.034353868410545,
      "grad_norm": 0.2005985826253891,
      "learning_rate": 1.9440179753958477e-06,
      "loss": 0.0116,
      "step": 2465200
    },
    {
      "epoch": 4.034386598849197,
      "grad_norm": 0.3075673282146454,
      "learning_rate": 1.9439520831823305e-06,
      "loss": 0.0077,
      "step": 2465220
    },
    {
      "epoch": 4.034419329287851,
      "grad_norm": 0.37004438042640686,
      "learning_rate": 1.9438861909688132e-06,
      "loss": 0.0124,
      "step": 2465240
    },
    {
      "epoch": 4.034452059726505,
      "grad_norm": 0.3425590991973877,
      "learning_rate": 1.9438202987552964e-06,
      "loss": 0.0104,
      "step": 2465260
    },
    {
      "epoch": 4.034484790165158,
      "grad_norm": 0.08659746497869492,
      "learning_rate": 1.943754406541779e-06,
      "loss": 0.0098,
      "step": 2465280
    },
    {
      "epoch": 4.034517520603811,
      "grad_norm": 0.49868738651275635,
      "learning_rate": 1.943688514328262e-06,
      "loss": 0.0074,
      "step": 2465300
    },
    {
      "epoch": 4.0345502510424645,
      "grad_norm": 0.166338711977005,
      "learning_rate": 1.9436226221147446e-06,
      "loss": 0.0094,
      "step": 2465320
    },
    {
      "epoch": 4.034582981481118,
      "grad_norm": 0.2908964157104492,
      "learning_rate": 1.9435567299012278e-06,
      "loss": 0.0106,
      "step": 2465340
    },
    {
      "epoch": 4.034615711919771,
      "grad_norm": 0.18364663422107697,
      "learning_rate": 1.9434908376877105e-06,
      "loss": 0.0096,
      "step": 2465360
    },
    {
      "epoch": 4.034648442358424,
      "grad_norm": 0.17073869705200195,
      "learning_rate": 1.9434249454741937e-06,
      "loss": 0.0103,
      "step": 2465380
    },
    {
      "epoch": 4.034681172797078,
      "grad_norm": 0.2440916746854782,
      "learning_rate": 1.9433590532606764e-06,
      "loss": 0.0111,
      "step": 2465400
    },
    {
      "epoch": 4.0347139032357315,
      "grad_norm": 0.057238828390836716,
      "learning_rate": 1.943293161047159e-06,
      "loss": 0.011,
      "step": 2465420
    },
    {
      "epoch": 4.034746633674384,
      "grad_norm": 0.2769477367401123,
      "learning_rate": 1.943227268833642e-06,
      "loss": 0.0087,
      "step": 2465440
    },
    {
      "epoch": 4.034779364113038,
      "grad_norm": 0.5751652717590332,
      "learning_rate": 1.943161376620125e-06,
      "loss": 0.0112,
      "step": 2465460
    },
    {
      "epoch": 4.034812094551691,
      "grad_norm": 0.06608820706605911,
      "learning_rate": 1.943095484406608e-06,
      "loss": 0.0093,
      "step": 2465480
    },
    {
      "epoch": 4.034844824990344,
      "grad_norm": 0.08569040149450302,
      "learning_rate": 1.9430295921930905e-06,
      "loss": 0.0104,
      "step": 2465500
    },
    {
      "epoch": 4.034877555428998,
      "grad_norm": 0.10102371871471405,
      "learning_rate": 1.9429636999795737e-06,
      "loss": 0.0081,
      "step": 2465520
    },
    {
      "epoch": 4.034910285867651,
      "grad_norm": 0.1543181985616684,
      "learning_rate": 1.9428978077660564e-06,
      "loss": 0.0097,
      "step": 2465540
    },
    {
      "epoch": 4.034943016306305,
      "grad_norm": 0.19491487741470337,
      "learning_rate": 1.942831915552539e-06,
      "loss": 0.0078,
      "step": 2465560
    },
    {
      "epoch": 4.034975746744958,
      "grad_norm": 0.19787104427814484,
      "learning_rate": 1.9427660233390223e-06,
      "loss": 0.0087,
      "step": 2465580
    },
    {
      "epoch": 4.035008477183611,
      "grad_norm": 0.32818710803985596,
      "learning_rate": 1.942700131125505e-06,
      "loss": 0.0084,
      "step": 2465600
    },
    {
      "epoch": 4.035041207622265,
      "grad_norm": 0.4721108376979828,
      "learning_rate": 1.942634238911988e-06,
      "loss": 0.0082,
      "step": 2465620
    },
    {
      "epoch": 4.0350739380609175,
      "grad_norm": 0.33587613701820374,
      "learning_rate": 1.9425683466984706e-06,
      "loss": 0.009,
      "step": 2465640
    },
    {
      "epoch": 4.035106668499571,
      "grad_norm": 0.12259060889482498,
      "learning_rate": 1.9425024544849537e-06,
      "loss": 0.0096,
      "step": 2465660
    },
    {
      "epoch": 4.035139398938225,
      "grad_norm": 0.2348627746105194,
      "learning_rate": 1.942436562271437e-06,
      "loss": 0.0093,
      "step": 2465680
    },
    {
      "epoch": 4.035172129376878,
      "grad_norm": 0.29927393794059753,
      "learning_rate": 1.9423706700579196e-06,
      "loss": 0.0104,
      "step": 2465700
    },
    {
      "epoch": 4.035204859815531,
      "grad_norm": 0.18155129253864288,
      "learning_rate": 1.9423047778444024e-06,
      "loss": 0.0081,
      "step": 2465720
    },
    {
      "epoch": 4.035237590254185,
      "grad_norm": 0.2473323941230774,
      "learning_rate": 1.942238885630885e-06,
      "loss": 0.0125,
      "step": 2465740
    },
    {
      "epoch": 4.035270320692838,
      "grad_norm": 0.4560278654098511,
      "learning_rate": 1.942172993417368e-06,
      "loss": 0.0091,
      "step": 2465760
    },
    {
      "epoch": 4.035303051131491,
      "grad_norm": 0.6833194494247437,
      "learning_rate": 1.942107101203851e-06,
      "loss": 0.0087,
      "step": 2465780
    },
    {
      "epoch": 4.0353357815701445,
      "grad_norm": 0.10571543127298355,
      "learning_rate": 1.9420412089903337e-06,
      "loss": 0.0074,
      "step": 2465800
    },
    {
      "epoch": 4.035368512008798,
      "grad_norm": 0.3648535907268524,
      "learning_rate": 1.9419753167768165e-06,
      "loss": 0.0086,
      "step": 2465820
    },
    {
      "epoch": 4.035401242447452,
      "grad_norm": 0.4101877808570862,
      "learning_rate": 1.9419094245632992e-06,
      "loss": 0.011,
      "step": 2465840
    },
    {
      "epoch": 4.035433972886104,
      "grad_norm": 0.3706638514995575,
      "learning_rate": 1.9418435323497824e-06,
      "loss": 0.0109,
      "step": 2465860
    },
    {
      "epoch": 4.035466703324758,
      "grad_norm": 0.18291671574115753,
      "learning_rate": 1.9417776401362656e-06,
      "loss": 0.0104,
      "step": 2465880
    },
    {
      "epoch": 4.035499433763412,
      "grad_norm": 0.3281306028366089,
      "learning_rate": 1.9417117479227483e-06,
      "loss": 0.0058,
      "step": 2465900
    },
    {
      "epoch": 4.035532164202064,
      "grad_norm": 0.13490264117717743,
      "learning_rate": 1.941645855709231e-06,
      "loss": 0.0078,
      "step": 2465920
    },
    {
      "epoch": 4.035564894640718,
      "grad_norm": 0.0572698637843132,
      "learning_rate": 1.9415799634957138e-06,
      "loss": 0.0091,
      "step": 2465940
    },
    {
      "epoch": 4.0355976250793715,
      "grad_norm": 0.21107293665409088,
      "learning_rate": 1.9415140712821965e-06,
      "loss": 0.0106,
      "step": 2465960
    },
    {
      "epoch": 4.035630355518025,
      "grad_norm": 0.14197532832622528,
      "learning_rate": 1.9414481790686797e-06,
      "loss": 0.0084,
      "step": 2465980
    },
    {
      "epoch": 4.035663085956678,
      "grad_norm": 0.1259383112192154,
      "learning_rate": 1.9413822868551624e-06,
      "loss": 0.0103,
      "step": 2466000
    },
    {
      "epoch": 4.035695816395331,
      "grad_norm": 0.19525223970413208,
      "learning_rate": 1.941316394641645e-06,
      "loss": 0.0117,
      "step": 2466020
    },
    {
      "epoch": 4.035728546833985,
      "grad_norm": 0.0574052631855011,
      "learning_rate": 1.9412505024281283e-06,
      "loss": 0.0095,
      "step": 2466040
    },
    {
      "epoch": 4.035761277272638,
      "grad_norm": 0.09093382954597473,
      "learning_rate": 1.941184610214611e-06,
      "loss": 0.0109,
      "step": 2466060
    },
    {
      "epoch": 4.035794007711291,
      "grad_norm": 0.15468688309192657,
      "learning_rate": 1.9411187180010942e-06,
      "loss": 0.0066,
      "step": 2466080
    },
    {
      "epoch": 4.035826738149945,
      "grad_norm": 0.2653087377548218,
      "learning_rate": 1.941052825787577e-06,
      "loss": 0.0077,
      "step": 2466100
    },
    {
      "epoch": 4.0358594685885985,
      "grad_norm": 0.29225412011146545,
      "learning_rate": 1.9409869335740597e-06,
      "loss": 0.0084,
      "step": 2466120
    },
    {
      "epoch": 4.035892199027251,
      "grad_norm": 0.22826185822486877,
      "learning_rate": 1.9409210413605424e-06,
      "loss": 0.0092,
      "step": 2466140
    },
    {
      "epoch": 4.035924929465905,
      "grad_norm": 0.4789360761642456,
      "learning_rate": 1.940855149147025e-06,
      "loss": 0.008,
      "step": 2466160
    },
    {
      "epoch": 4.035957659904558,
      "grad_norm": 0.25738757848739624,
      "learning_rate": 1.9407892569335083e-06,
      "loss": 0.0145,
      "step": 2466180
    },
    {
      "epoch": 4.035990390343211,
      "grad_norm": 0.17164580523967743,
      "learning_rate": 1.940723364719991e-06,
      "loss": 0.0102,
      "step": 2466200
    },
    {
      "epoch": 4.036023120781865,
      "grad_norm": 0.1809808611869812,
      "learning_rate": 1.9406574725064742e-06,
      "loss": 0.0079,
      "step": 2466220
    },
    {
      "epoch": 4.036055851220518,
      "grad_norm": 0.144443079829216,
      "learning_rate": 1.940591580292957e-06,
      "loss": 0.0093,
      "step": 2466240
    },
    {
      "epoch": 4.036088581659172,
      "grad_norm": 0.18197014927864075,
      "learning_rate": 1.9405256880794397e-06,
      "loss": 0.0052,
      "step": 2466260
    },
    {
      "epoch": 4.0361213120978245,
      "grad_norm": 0.13139165937900543,
      "learning_rate": 1.940459795865923e-06,
      "loss": 0.006,
      "step": 2466280
    },
    {
      "epoch": 4.036154042536478,
      "grad_norm": 0.24482600390911102,
      "learning_rate": 1.9403939036524056e-06,
      "loss": 0.0081,
      "step": 2466300
    },
    {
      "epoch": 4.036186772975132,
      "grad_norm": 0.024633122608065605,
      "learning_rate": 1.9403280114388884e-06,
      "loss": 0.0095,
      "step": 2466320
    },
    {
      "epoch": 4.036219503413784,
      "grad_norm": 0.3770963251590729,
      "learning_rate": 1.940262119225371e-06,
      "loss": 0.0106,
      "step": 2466340
    },
    {
      "epoch": 4.036252233852438,
      "grad_norm": 0.08626295626163483,
      "learning_rate": 1.940196227011854e-06,
      "loss": 0.0101,
      "step": 2466360
    },
    {
      "epoch": 4.036284964291092,
      "grad_norm": 0.20858249068260193,
      "learning_rate": 1.940130334798337e-06,
      "loss": 0.0078,
      "step": 2466380
    },
    {
      "epoch": 4.036317694729744,
      "grad_norm": 0.19223114848136902,
      "learning_rate": 1.94006444258482e-06,
      "loss": 0.0126,
      "step": 2466400
    },
    {
      "epoch": 4.036350425168398,
      "grad_norm": 0.13011261820793152,
      "learning_rate": 1.939998550371303e-06,
      "loss": 0.0082,
      "step": 2466420
    },
    {
      "epoch": 4.0363831556070515,
      "grad_norm": 0.04992340877652168,
      "learning_rate": 1.9399326581577857e-06,
      "loss": 0.0082,
      "step": 2466440
    },
    {
      "epoch": 4.036415886045705,
      "grad_norm": 0.09623292833566666,
      "learning_rate": 1.9398667659442684e-06,
      "loss": 0.0074,
      "step": 2466460
    },
    {
      "epoch": 4.036448616484358,
      "grad_norm": 0.09432204812765121,
      "learning_rate": 1.9398008737307516e-06,
      "loss": 0.0075,
      "step": 2466480
    },
    {
      "epoch": 4.036481346923011,
      "grad_norm": 0.43540772795677185,
      "learning_rate": 1.9397349815172343e-06,
      "loss": 0.0114,
      "step": 2466500
    },
    {
      "epoch": 4.036514077361665,
      "grad_norm": 0.2387826144695282,
      "learning_rate": 1.939669089303717e-06,
      "loss": 0.0067,
      "step": 2466520
    },
    {
      "epoch": 4.036546807800318,
      "grad_norm": 0.2566980719566345,
      "learning_rate": 1.9396031970901998e-06,
      "loss": 0.0131,
      "step": 2466540
    },
    {
      "epoch": 4.036579538238971,
      "grad_norm": 0.24862933158874512,
      "learning_rate": 1.939537304876683e-06,
      "loss": 0.0078,
      "step": 2466560
    },
    {
      "epoch": 4.036612268677625,
      "grad_norm": 0.28170254826545715,
      "learning_rate": 1.9394714126631657e-06,
      "loss": 0.0113,
      "step": 2466580
    },
    {
      "epoch": 4.0366449991162785,
      "grad_norm": 0.29164910316467285,
      "learning_rate": 1.939405520449649e-06,
      "loss": 0.0117,
      "step": 2466600
    },
    {
      "epoch": 4.036677729554931,
      "grad_norm": 0.08412628620862961,
      "learning_rate": 1.9393396282361316e-06,
      "loss": 0.0086,
      "step": 2466620
    },
    {
      "epoch": 4.036710459993585,
      "grad_norm": 0.6788550615310669,
      "learning_rate": 1.9392737360226143e-06,
      "loss": 0.0089,
      "step": 2466640
    },
    {
      "epoch": 4.036743190432238,
      "grad_norm": 0.38947200775146484,
      "learning_rate": 1.939207843809097e-06,
      "loss": 0.0108,
      "step": 2466660
    },
    {
      "epoch": 4.036775920870891,
      "grad_norm": 0.20349839329719543,
      "learning_rate": 1.9391419515955802e-06,
      "loss": 0.0068,
      "step": 2466680
    },
    {
      "epoch": 4.036808651309545,
      "grad_norm": 0.27395495772361755,
      "learning_rate": 1.939076059382063e-06,
      "loss": 0.008,
      "step": 2466700
    },
    {
      "epoch": 4.036841381748198,
      "grad_norm": 0.15743020176887512,
      "learning_rate": 1.9390101671685457e-06,
      "loss": 0.0124,
      "step": 2466720
    },
    {
      "epoch": 4.036874112186852,
      "grad_norm": 0.3768674433231354,
      "learning_rate": 1.938944274955029e-06,
      "loss": 0.0058,
      "step": 2466740
    },
    {
      "epoch": 4.036906842625505,
      "grad_norm": 0.2191239297389984,
      "learning_rate": 1.9388783827415116e-06,
      "loss": 0.0095,
      "step": 2466760
    },
    {
      "epoch": 4.036939573064158,
      "grad_norm": 0.37882089614868164,
      "learning_rate": 1.9388124905279943e-06,
      "loss": 0.0127,
      "step": 2466780
    },
    {
      "epoch": 4.036972303502812,
      "grad_norm": 0.19173893332481384,
      "learning_rate": 1.9387465983144775e-06,
      "loss": 0.008,
      "step": 2466800
    },
    {
      "epoch": 4.0370050339414645,
      "grad_norm": 0.17469939589500427,
      "learning_rate": 1.9386807061009603e-06,
      "loss": 0.0108,
      "step": 2466820
    },
    {
      "epoch": 4.037037764380118,
      "grad_norm": 0.22816580533981323,
      "learning_rate": 1.938614813887443e-06,
      "loss": 0.0117,
      "step": 2466840
    },
    {
      "epoch": 4.037070494818772,
      "grad_norm": 0.5344343781471252,
      "learning_rate": 1.9385489216739257e-06,
      "loss": 0.0111,
      "step": 2466860
    },
    {
      "epoch": 4.037103225257425,
      "grad_norm": 0.07110651582479477,
      "learning_rate": 1.938483029460409e-06,
      "loss": 0.0086,
      "step": 2466880
    },
    {
      "epoch": 4.037135955696078,
      "grad_norm": 0.3627127707004547,
      "learning_rate": 1.9384171372468916e-06,
      "loss": 0.006,
      "step": 2466900
    },
    {
      "epoch": 4.037168686134732,
      "grad_norm": 0.2642582058906555,
      "learning_rate": 1.938351245033375e-06,
      "loss": 0.0107,
      "step": 2466920
    },
    {
      "epoch": 4.037201416573385,
      "grad_norm": 0.24565796554088593,
      "learning_rate": 1.9382853528198575e-06,
      "loss": 0.0141,
      "step": 2466940
    },
    {
      "epoch": 4.037234147012038,
      "grad_norm": 0.295188844203949,
      "learning_rate": 1.9382194606063403e-06,
      "loss": 0.0114,
      "step": 2466960
    },
    {
      "epoch": 4.0372668774506915,
      "grad_norm": 0.12810829281806946,
      "learning_rate": 1.938153568392823e-06,
      "loss": 0.0059,
      "step": 2466980
    },
    {
      "epoch": 4.037299607889345,
      "grad_norm": 0.5949307084083557,
      "learning_rate": 1.938087676179306e-06,
      "loss": 0.0063,
      "step": 2467000
    },
    {
      "epoch": 4.037332338327999,
      "grad_norm": 0.10984794050455093,
      "learning_rate": 1.938021783965789e-06,
      "loss": 0.0092,
      "step": 2467020
    },
    {
      "epoch": 4.037365068766651,
      "grad_norm": 0.10206624120473862,
      "learning_rate": 1.9379558917522717e-06,
      "loss": 0.01,
      "step": 2467040
    },
    {
      "epoch": 4.037397799205305,
      "grad_norm": 0.12301836907863617,
      "learning_rate": 1.9378899995387544e-06,
      "loss": 0.0068,
      "step": 2467060
    },
    {
      "epoch": 4.0374305296439585,
      "grad_norm": 0.4025832712650299,
      "learning_rate": 1.9378241073252376e-06,
      "loss": 0.0074,
      "step": 2467080
    },
    {
      "epoch": 4.037463260082611,
      "grad_norm": 0.31657466292381287,
      "learning_rate": 1.9377582151117207e-06,
      "loss": 0.0069,
      "step": 2467100
    },
    {
      "epoch": 4.037495990521265,
      "grad_norm": 0.06469438225030899,
      "learning_rate": 1.9376923228982035e-06,
      "loss": 0.008,
      "step": 2467120
    },
    {
      "epoch": 4.037528720959918,
      "grad_norm": 0.20907847583293915,
      "learning_rate": 1.937626430684686e-06,
      "loss": 0.0135,
      "step": 2467140
    },
    {
      "epoch": 4.037561451398572,
      "grad_norm": 0.23990708589553833,
      "learning_rate": 1.937560538471169e-06,
      "loss": 0.0093,
      "step": 2467160
    },
    {
      "epoch": 4.037594181837225,
      "grad_norm": 0.14448542892932892,
      "learning_rate": 1.9374946462576517e-06,
      "loss": 0.006,
      "step": 2467180
    },
    {
      "epoch": 4.037626912275878,
      "grad_norm": 0.263925701379776,
      "learning_rate": 1.937428754044135e-06,
      "loss": 0.011,
      "step": 2467200
    },
    {
      "epoch": 4.037659642714532,
      "grad_norm": 0.10719146579504013,
      "learning_rate": 1.9373628618306176e-06,
      "loss": 0.0085,
      "step": 2467220
    },
    {
      "epoch": 4.037692373153185,
      "grad_norm": 0.18827663362026215,
      "learning_rate": 1.9372969696171003e-06,
      "loss": 0.0106,
      "step": 2467240
    },
    {
      "epoch": 4.037725103591838,
      "grad_norm": 0.3061203062534332,
      "learning_rate": 1.9372310774035835e-06,
      "loss": 0.0081,
      "step": 2467260
    },
    {
      "epoch": 4.037757834030492,
      "grad_norm": 0.14396850764751434,
      "learning_rate": 1.9371651851900662e-06,
      "loss": 0.0104,
      "step": 2467280
    },
    {
      "epoch": 4.037790564469145,
      "grad_norm": 0.6482890248298645,
      "learning_rate": 1.9370992929765494e-06,
      "loss": 0.01,
      "step": 2467300
    },
    {
      "epoch": 4.037823294907798,
      "grad_norm": 0.10812584310770035,
      "learning_rate": 1.937033400763032e-06,
      "loss": 0.0087,
      "step": 2467320
    },
    {
      "epoch": 4.037856025346452,
      "grad_norm": 0.3489552438259125,
      "learning_rate": 1.936967508549515e-06,
      "loss": 0.0103,
      "step": 2467340
    },
    {
      "epoch": 4.037888755785105,
      "grad_norm": 0.13308963179588318,
      "learning_rate": 1.9369016163359976e-06,
      "loss": 0.0092,
      "step": 2467360
    },
    {
      "epoch": 4.037921486223758,
      "grad_norm": 0.18815067410469055,
      "learning_rate": 1.9368357241224804e-06,
      "loss": 0.0096,
      "step": 2467380
    },
    {
      "epoch": 4.037954216662412,
      "grad_norm": 0.146505206823349,
      "learning_rate": 1.9367698319089635e-06,
      "loss": 0.0105,
      "step": 2467400
    },
    {
      "epoch": 4.037986947101065,
      "grad_norm": 0.2220386266708374,
      "learning_rate": 1.9367039396954463e-06,
      "loss": 0.0091,
      "step": 2467420
    },
    {
      "epoch": 4.038019677539719,
      "grad_norm": 0.0970010980963707,
      "learning_rate": 1.9366380474819294e-06,
      "loss": 0.0085,
      "step": 2467440
    },
    {
      "epoch": 4.0380524079783715,
      "grad_norm": 0.04778361693024635,
      "learning_rate": 1.936572155268412e-06,
      "loss": 0.0052,
      "step": 2467460
    },
    {
      "epoch": 4.038085138417025,
      "grad_norm": 0.21050800383090973,
      "learning_rate": 1.936506263054895e-06,
      "loss": 0.0082,
      "step": 2467480
    },
    {
      "epoch": 4.038117868855679,
      "grad_norm": 0.36643654108047485,
      "learning_rate": 1.936440370841378e-06,
      "loss": 0.0082,
      "step": 2467500
    },
    {
      "epoch": 4.038150599294331,
      "grad_norm": 0.26327991485595703,
      "learning_rate": 1.936374478627861e-06,
      "loss": 0.0116,
      "step": 2467520
    },
    {
      "epoch": 4.038183329732985,
      "grad_norm": 0.16856242716312408,
      "learning_rate": 1.9363085864143435e-06,
      "loss": 0.0074,
      "step": 2467540
    },
    {
      "epoch": 4.038216060171639,
      "grad_norm": 0.26348599791526794,
      "learning_rate": 1.9362426942008263e-06,
      "loss": 0.0146,
      "step": 2467560
    },
    {
      "epoch": 4.038248790610292,
      "grad_norm": 0.33133721351623535,
      "learning_rate": 1.936176801987309e-06,
      "loss": 0.0062,
      "step": 2467580
    },
    {
      "epoch": 4.038281521048945,
      "grad_norm": 0.24944697320461273,
      "learning_rate": 1.936110909773792e-06,
      "loss": 0.0089,
      "step": 2467600
    },
    {
      "epoch": 4.0383142514875985,
      "grad_norm": 0.24805936217308044,
      "learning_rate": 1.9360450175602753e-06,
      "loss": 0.0067,
      "step": 2467620
    },
    {
      "epoch": 4.038346981926252,
      "grad_norm": 0.18752636015415192,
      "learning_rate": 1.935979125346758e-06,
      "loss": 0.0113,
      "step": 2467640
    },
    {
      "epoch": 4.038379712364905,
      "grad_norm": 0.12927302718162537,
      "learning_rate": 1.935913233133241e-06,
      "loss": 0.0113,
      "step": 2467660
    },
    {
      "epoch": 4.038412442803558,
      "grad_norm": 0.19388523697853088,
      "learning_rate": 1.9358473409197236e-06,
      "loss": 0.0087,
      "step": 2467680
    },
    {
      "epoch": 4.038445173242212,
      "grad_norm": 0.05429527163505554,
      "learning_rate": 1.9357814487062067e-06,
      "loss": 0.0081,
      "step": 2467700
    },
    {
      "epoch": 4.038477903680866,
      "grad_norm": 1.2812819480895996,
      "learning_rate": 1.9357155564926895e-06,
      "loss": 0.0081,
      "step": 2467720
    },
    {
      "epoch": 4.038510634119518,
      "grad_norm": 0.37275487184524536,
      "learning_rate": 1.935649664279172e-06,
      "loss": 0.0074,
      "step": 2467740
    },
    {
      "epoch": 4.038543364558172,
      "grad_norm": 0.14636541903018951,
      "learning_rate": 1.935583772065655e-06,
      "loss": 0.0058,
      "step": 2467760
    },
    {
      "epoch": 4.0385760949968255,
      "grad_norm": 0.19199471175670624,
      "learning_rate": 1.935517879852138e-06,
      "loss": 0.0098,
      "step": 2467780
    },
    {
      "epoch": 4.038608825435478,
      "grad_norm": 0.15278862416744232,
      "learning_rate": 1.935451987638621e-06,
      "loss": 0.0113,
      "step": 2467800
    },
    {
      "epoch": 4.038641555874132,
      "grad_norm": 0.18858154118061066,
      "learning_rate": 1.935386095425104e-06,
      "loss": 0.0072,
      "step": 2467820
    },
    {
      "epoch": 4.038674286312785,
      "grad_norm": 0.23846535384655,
      "learning_rate": 1.9353202032115868e-06,
      "loss": 0.0063,
      "step": 2467840
    },
    {
      "epoch": 4.038707016751438,
      "grad_norm": 0.1850149929523468,
      "learning_rate": 1.9352543109980695e-06,
      "loss": 0.0088,
      "step": 2467860
    },
    {
      "epoch": 4.038739747190092,
      "grad_norm": 0.1081787571310997,
      "learning_rate": 1.9351884187845522e-06,
      "loss": 0.0124,
      "step": 2467880
    },
    {
      "epoch": 4.038772477628745,
      "grad_norm": 0.06315451115369797,
      "learning_rate": 1.9351225265710354e-06,
      "loss": 0.0064,
      "step": 2467900
    },
    {
      "epoch": 4.038805208067399,
      "grad_norm": 0.2325190007686615,
      "learning_rate": 1.935056634357518e-06,
      "loss": 0.0084,
      "step": 2467920
    },
    {
      "epoch": 4.0388379385060516,
      "grad_norm": 0.20151573419570923,
      "learning_rate": 1.934990742144001e-06,
      "loss": 0.0063,
      "step": 2467940
    },
    {
      "epoch": 4.038870668944705,
      "grad_norm": 0.15776515007019043,
      "learning_rate": 1.9349248499304836e-06,
      "loss": 0.0092,
      "step": 2467960
    },
    {
      "epoch": 4.038903399383359,
      "grad_norm": 0.24472016096115112,
      "learning_rate": 1.9348589577169668e-06,
      "loss": 0.0154,
      "step": 2467980
    },
    {
      "epoch": 4.0389361298220114,
      "grad_norm": 0.2960777282714844,
      "learning_rate": 1.9347930655034495e-06,
      "loss": 0.011,
      "step": 2468000
    },
    {
      "epoch": 4.038968860260665,
      "grad_norm": 0.24858269095420837,
      "learning_rate": 1.9347271732899327e-06,
      "loss": 0.007,
      "step": 2468020
    },
    {
      "epoch": 4.039001590699319,
      "grad_norm": 0.19767630100250244,
      "learning_rate": 1.9346612810764154e-06,
      "loss": 0.0117,
      "step": 2468040
    },
    {
      "epoch": 4.039034321137972,
      "grad_norm": 0.18578994274139404,
      "learning_rate": 1.934595388862898e-06,
      "loss": 0.0096,
      "step": 2468060
    },
    {
      "epoch": 4.039067051576625,
      "grad_norm": 0.3644813299179077,
      "learning_rate": 1.934529496649381e-06,
      "loss": 0.0132,
      "step": 2468080
    },
    {
      "epoch": 4.0390997820152785,
      "grad_norm": 0.20106998085975647,
      "learning_rate": 1.934463604435864e-06,
      "loss": 0.0078,
      "step": 2468100
    },
    {
      "epoch": 4.039132512453932,
      "grad_norm": 0.12361668795347214,
      "learning_rate": 1.934397712222347e-06,
      "loss": 0.0114,
      "step": 2468120
    },
    {
      "epoch": 4.039165242892585,
      "grad_norm": 0.5282320976257324,
      "learning_rate": 1.93433182000883e-06,
      "loss": 0.0107,
      "step": 2468140
    },
    {
      "epoch": 4.039197973331238,
      "grad_norm": 0.30432403087615967,
      "learning_rate": 1.9342659277953127e-06,
      "loss": 0.0149,
      "step": 2468160
    },
    {
      "epoch": 4.039230703769892,
      "grad_norm": 0.09437035024166107,
      "learning_rate": 1.9342000355817954e-06,
      "loss": 0.007,
      "step": 2468180
    },
    {
      "epoch": 4.039263434208546,
      "grad_norm": 0.1413813680410385,
      "learning_rate": 1.934134143368278e-06,
      "loss": 0.0101,
      "step": 2468200
    },
    {
      "epoch": 4.039296164647198,
      "grad_norm": 0.2234833538532257,
      "learning_rate": 1.9340682511547614e-06,
      "loss": 0.0105,
      "step": 2468220
    },
    {
      "epoch": 4.039328895085852,
      "grad_norm": 0.11441447585821152,
      "learning_rate": 1.934002358941244e-06,
      "loss": 0.0107,
      "step": 2468240
    },
    {
      "epoch": 4.0393616255245055,
      "grad_norm": 0.2071395367383957,
      "learning_rate": 1.933936466727727e-06,
      "loss": 0.0091,
      "step": 2468260
    },
    {
      "epoch": 4.039394355963158,
      "grad_norm": 0.507941722869873,
      "learning_rate": 1.9338705745142096e-06,
      "loss": 0.0067,
      "step": 2468280
    },
    {
      "epoch": 4.039427086401812,
      "grad_norm": 0.9158573150634766,
      "learning_rate": 1.9338046823006927e-06,
      "loss": 0.0084,
      "step": 2468300
    },
    {
      "epoch": 4.039459816840465,
      "grad_norm": 0.057048946619033813,
      "learning_rate": 1.933738790087176e-06,
      "loss": 0.0079,
      "step": 2468320
    },
    {
      "epoch": 4.039492547279119,
      "grad_norm": 0.17452657222747803,
      "learning_rate": 1.9336728978736586e-06,
      "loss": 0.0091,
      "step": 2468340
    },
    {
      "epoch": 4.039525277717772,
      "grad_norm": 0.18357659876346588,
      "learning_rate": 1.9336070056601414e-06,
      "loss": 0.0075,
      "step": 2468360
    },
    {
      "epoch": 4.039558008156425,
      "grad_norm": 0.15928079187870026,
      "learning_rate": 1.933541113446624e-06,
      "loss": 0.0087,
      "step": 2468380
    },
    {
      "epoch": 4.039590738595079,
      "grad_norm": 0.11403398215770721,
      "learning_rate": 1.933475221233107e-06,
      "loss": 0.0087,
      "step": 2468400
    },
    {
      "epoch": 4.039623469033732,
      "grad_norm": 0.14342930912971497,
      "learning_rate": 1.93340932901959e-06,
      "loss": 0.0132,
      "step": 2468420
    },
    {
      "epoch": 4.039656199472385,
      "grad_norm": 0.17322756350040436,
      "learning_rate": 1.9333434368060728e-06,
      "loss": 0.008,
      "step": 2468440
    },
    {
      "epoch": 4.039688929911039,
      "grad_norm": 0.4146667718887329,
      "learning_rate": 1.9332775445925555e-06,
      "loss": 0.0073,
      "step": 2468460
    },
    {
      "epoch": 4.039721660349692,
      "grad_norm": 0.25017979741096497,
      "learning_rate": 1.9332116523790382e-06,
      "loss": 0.0132,
      "step": 2468480
    },
    {
      "epoch": 4.039754390788345,
      "grad_norm": 2.6848883628845215,
      "learning_rate": 1.9331457601655214e-06,
      "loss": 0.009,
      "step": 2468500
    },
    {
      "epoch": 4.039787121226999,
      "grad_norm": 0.36642900109291077,
      "learning_rate": 1.9330798679520046e-06,
      "loss": 0.0082,
      "step": 2468520
    },
    {
      "epoch": 4.039819851665652,
      "grad_norm": 0.1590634435415268,
      "learning_rate": 1.9330139757384873e-06,
      "loss": 0.0106,
      "step": 2468540
    },
    {
      "epoch": 4.039852582104305,
      "grad_norm": 0.29235249757766724,
      "learning_rate": 1.93294808352497e-06,
      "loss": 0.0111,
      "step": 2468560
    },
    {
      "epoch": 4.039885312542959,
      "grad_norm": 0.4239997863769531,
      "learning_rate": 1.9328821913114528e-06,
      "loss": 0.007,
      "step": 2468580
    },
    {
      "epoch": 4.039918042981612,
      "grad_norm": 0.5602862238883972,
      "learning_rate": 1.9328162990979355e-06,
      "loss": 0.0102,
      "step": 2468600
    },
    {
      "epoch": 4.039950773420266,
      "grad_norm": 0.06966477632522583,
      "learning_rate": 1.9327504068844187e-06,
      "loss": 0.012,
      "step": 2468620
    },
    {
      "epoch": 4.0399835038589185,
      "grad_norm": 0.08651944994926453,
      "learning_rate": 1.9326845146709014e-06,
      "loss": 0.0133,
      "step": 2468640
    },
    {
      "epoch": 4.040016234297572,
      "grad_norm": 0.06416546553373337,
      "learning_rate": 1.932618622457384e-06,
      "loss": 0.0075,
      "step": 2468660
    },
    {
      "epoch": 4.040048964736226,
      "grad_norm": 0.11418947577476501,
      "learning_rate": 1.9325527302438673e-06,
      "loss": 0.0132,
      "step": 2468680
    },
    {
      "epoch": 4.040081695174878,
      "grad_norm": 0.17753642797470093,
      "learning_rate": 1.93248683803035e-06,
      "loss": 0.0074,
      "step": 2468700
    },
    {
      "epoch": 4.040114425613532,
      "grad_norm": 0.10768816620111465,
      "learning_rate": 1.9324209458168332e-06,
      "loss": 0.0115,
      "step": 2468720
    },
    {
      "epoch": 4.0401471560521856,
      "grad_norm": 0.17665034532546997,
      "learning_rate": 1.932355053603316e-06,
      "loss": 0.0094,
      "step": 2468740
    },
    {
      "epoch": 4.040179886490839,
      "grad_norm": 0.18694369494915009,
      "learning_rate": 1.9322891613897987e-06,
      "loss": 0.0129,
      "step": 2468760
    },
    {
      "epoch": 4.040212616929492,
      "grad_norm": 0.15079766511917114,
      "learning_rate": 1.9322232691762815e-06,
      "loss": 0.0095,
      "step": 2468780
    },
    {
      "epoch": 4.0402453473681454,
      "grad_norm": 0.12907017767429352,
      "learning_rate": 1.932157376962764e-06,
      "loss": 0.0125,
      "step": 2468800
    },
    {
      "epoch": 4.040278077806799,
      "grad_norm": 0.16336271166801453,
      "learning_rate": 1.9320914847492474e-06,
      "loss": 0.0079,
      "step": 2468820
    },
    {
      "epoch": 4.040310808245452,
      "grad_norm": 0.12583056092262268,
      "learning_rate": 1.93202559253573e-06,
      "loss": 0.0094,
      "step": 2468840
    },
    {
      "epoch": 4.040343538684105,
      "grad_norm": 0.17983989417552948,
      "learning_rate": 1.9319597003222133e-06,
      "loss": 0.0112,
      "step": 2468860
    },
    {
      "epoch": 4.040376269122759,
      "grad_norm": 0.3440326154232025,
      "learning_rate": 1.931893808108696e-06,
      "loss": 0.0101,
      "step": 2468880
    },
    {
      "epoch": 4.0404089995614125,
      "grad_norm": 0.09859973192214966,
      "learning_rate": 1.9318279158951787e-06,
      "loss": 0.012,
      "step": 2468900
    },
    {
      "epoch": 4.040441730000065,
      "grad_norm": 0.6514437198638916,
      "learning_rate": 1.931762023681662e-06,
      "loss": 0.016,
      "step": 2468920
    },
    {
      "epoch": 4.040474460438719,
      "grad_norm": 0.20089520514011383,
      "learning_rate": 1.9316961314681446e-06,
      "loss": 0.0105,
      "step": 2468940
    },
    {
      "epoch": 4.040507190877372,
      "grad_norm": 0.4482753276824951,
      "learning_rate": 1.9316302392546274e-06,
      "loss": 0.0114,
      "step": 2468960
    },
    {
      "epoch": 4.040539921316025,
      "grad_norm": 0.09938953071832657,
      "learning_rate": 1.93156434704111e-06,
      "loss": 0.0127,
      "step": 2468980
    },
    {
      "epoch": 4.040572651754679,
      "grad_norm": 0.19837984442710876,
      "learning_rate": 1.9314984548275933e-06,
      "loss": 0.0075,
      "step": 2469000
    },
    {
      "epoch": 4.040605382193332,
      "grad_norm": 0.22480283677577972,
      "learning_rate": 1.931432562614076e-06,
      "loss": 0.0093,
      "step": 2469020
    },
    {
      "epoch": 4.040638112631986,
      "grad_norm": 0.3608088195323944,
      "learning_rate": 1.931366670400559e-06,
      "loss": 0.0075,
      "step": 2469040
    },
    {
      "epoch": 4.040670843070639,
      "grad_norm": 0.48651477694511414,
      "learning_rate": 1.931300778187042e-06,
      "loss": 0.0103,
      "step": 2469060
    },
    {
      "epoch": 4.040703573509292,
      "grad_norm": 0.2918727993965149,
      "learning_rate": 1.9312348859735247e-06,
      "loss": 0.006,
      "step": 2469080
    },
    {
      "epoch": 4.040736303947946,
      "grad_norm": 0.15291394293308258,
      "learning_rate": 1.9311689937600074e-06,
      "loss": 0.0072,
      "step": 2469100
    },
    {
      "epoch": 4.0407690343865985,
      "grad_norm": 0.4986618757247925,
      "learning_rate": 1.9311031015464906e-06,
      "loss": 0.012,
      "step": 2469120
    },
    {
      "epoch": 4.040801764825252,
      "grad_norm": 0.11026311665773392,
      "learning_rate": 1.9310372093329733e-06,
      "loss": 0.0089,
      "step": 2469140
    },
    {
      "epoch": 4.040834495263906,
      "grad_norm": 0.09758705645799637,
      "learning_rate": 1.930971317119456e-06,
      "loss": 0.0068,
      "step": 2469160
    },
    {
      "epoch": 4.040867225702559,
      "grad_norm": 0.4084555208683014,
      "learning_rate": 1.930905424905939e-06,
      "loss": 0.0085,
      "step": 2469180
    },
    {
      "epoch": 4.040899956141212,
      "grad_norm": 0.1072973981499672,
      "learning_rate": 1.930839532692422e-06,
      "loss": 0.0109,
      "step": 2469200
    },
    {
      "epoch": 4.040932686579866,
      "grad_norm": 0.18723276257514954,
      "learning_rate": 1.9307736404789047e-06,
      "loss": 0.01,
      "step": 2469220
    },
    {
      "epoch": 4.040965417018519,
      "grad_norm": 0.29867875576019287,
      "learning_rate": 1.930707748265388e-06,
      "loss": 0.0088,
      "step": 2469240
    },
    {
      "epoch": 4.040998147457172,
      "grad_norm": 0.4024618864059448,
      "learning_rate": 1.9306418560518706e-06,
      "loss": 0.0107,
      "step": 2469260
    },
    {
      "epoch": 4.0410308778958255,
      "grad_norm": 0.11541985720396042,
      "learning_rate": 1.9305759638383533e-06,
      "loss": 0.0114,
      "step": 2469280
    },
    {
      "epoch": 4.041063608334479,
      "grad_norm": 0.3133303225040436,
      "learning_rate": 1.930510071624836e-06,
      "loss": 0.0136,
      "step": 2469300
    },
    {
      "epoch": 4.041096338773132,
      "grad_norm": 0.11326151341199875,
      "learning_rate": 1.9304441794113192e-06,
      "loss": 0.0105,
      "step": 2469320
    },
    {
      "epoch": 4.041129069211785,
      "grad_norm": 0.3228048086166382,
      "learning_rate": 1.930378287197802e-06,
      "loss": 0.0094,
      "step": 2469340
    },
    {
      "epoch": 4.041161799650439,
      "grad_norm": 0.24648311734199524,
      "learning_rate": 1.9303123949842847e-06,
      "loss": 0.0104,
      "step": 2469360
    },
    {
      "epoch": 4.041194530089093,
      "grad_norm": 0.6467482447624207,
      "learning_rate": 1.930246502770768e-06,
      "loss": 0.0122,
      "step": 2469380
    },
    {
      "epoch": 4.041227260527745,
      "grad_norm": 0.7964125275611877,
      "learning_rate": 1.9301806105572506e-06,
      "loss": 0.0081,
      "step": 2469400
    },
    {
      "epoch": 4.041259990966399,
      "grad_norm": 0.1579514890909195,
      "learning_rate": 1.9301147183437334e-06,
      "loss": 0.01,
      "step": 2469420
    },
    {
      "epoch": 4.0412927214050525,
      "grad_norm": 0.3929920196533203,
      "learning_rate": 1.9300488261302165e-06,
      "loss": 0.0097,
      "step": 2469440
    },
    {
      "epoch": 4.041325451843705,
      "grad_norm": 0.08879304677248001,
      "learning_rate": 1.9299829339166993e-06,
      "loss": 0.005,
      "step": 2469460
    },
    {
      "epoch": 4.041358182282359,
      "grad_norm": 0.43175575137138367,
      "learning_rate": 1.929917041703182e-06,
      "loss": 0.0077,
      "step": 2469480
    },
    {
      "epoch": 4.041390912721012,
      "grad_norm": 0.1630249321460724,
      "learning_rate": 1.9298511494896647e-06,
      "loss": 0.0079,
      "step": 2469500
    },
    {
      "epoch": 4.041423643159666,
      "grad_norm": 0.2905883193016052,
      "learning_rate": 1.929785257276148e-06,
      "loss": 0.0137,
      "step": 2469520
    },
    {
      "epoch": 4.041456373598319,
      "grad_norm": 0.4728321433067322,
      "learning_rate": 1.9297193650626306e-06,
      "loss": 0.0103,
      "step": 2469540
    },
    {
      "epoch": 4.041489104036972,
      "grad_norm": 0.29992184042930603,
      "learning_rate": 1.929653472849114e-06,
      "loss": 0.0142,
      "step": 2469560
    },
    {
      "epoch": 4.041521834475626,
      "grad_norm": 0.3880153298377991,
      "learning_rate": 1.9295875806355965e-06,
      "loss": 0.0102,
      "step": 2469580
    },
    {
      "epoch": 4.041554564914279,
      "grad_norm": 0.4024014472961426,
      "learning_rate": 1.9295216884220793e-06,
      "loss": 0.0098,
      "step": 2469600
    },
    {
      "epoch": 4.041587295352932,
      "grad_norm": 0.19225144386291504,
      "learning_rate": 1.929455796208562e-06,
      "loss": 0.0092,
      "step": 2469620
    },
    {
      "epoch": 4.041620025791586,
      "grad_norm": 0.19869330525398254,
      "learning_rate": 1.929389903995045e-06,
      "loss": 0.0062,
      "step": 2469640
    },
    {
      "epoch": 4.041652756230239,
      "grad_norm": 0.2366001456975937,
      "learning_rate": 1.929324011781528e-06,
      "loss": 0.0063,
      "step": 2469660
    },
    {
      "epoch": 4.041685486668892,
      "grad_norm": 0.1953430324792862,
      "learning_rate": 1.9292581195680107e-06,
      "loss": 0.0088,
      "step": 2469680
    },
    {
      "epoch": 4.041718217107546,
      "grad_norm": 0.4570253789424896,
      "learning_rate": 1.9291922273544934e-06,
      "loss": 0.0103,
      "step": 2469700
    },
    {
      "epoch": 4.041750947546199,
      "grad_norm": 0.16860531270503998,
      "learning_rate": 1.9291263351409766e-06,
      "loss": 0.0087,
      "step": 2469720
    },
    {
      "epoch": 4.041783677984852,
      "grad_norm": 0.18211543560028076,
      "learning_rate": 1.9290604429274597e-06,
      "loss": 0.0077,
      "step": 2469740
    },
    {
      "epoch": 4.0418164084235055,
      "grad_norm": 0.22716854512691498,
      "learning_rate": 1.9289945507139425e-06,
      "loss": 0.0104,
      "step": 2469760
    },
    {
      "epoch": 4.041849138862159,
      "grad_norm": 0.10330457240343094,
      "learning_rate": 1.9289286585004252e-06,
      "loss": 0.0132,
      "step": 2469780
    },
    {
      "epoch": 4.041881869300813,
      "grad_norm": 0.19861812889575958,
      "learning_rate": 1.928862766286908e-06,
      "loss": 0.0061,
      "step": 2469800
    },
    {
      "epoch": 4.041914599739465,
      "grad_norm": 0.13950929045677185,
      "learning_rate": 1.9287968740733907e-06,
      "loss": 0.008,
      "step": 2469820
    },
    {
      "epoch": 4.041947330178119,
      "grad_norm": 0.16556158661842346,
      "learning_rate": 1.928730981859874e-06,
      "loss": 0.008,
      "step": 2469840
    },
    {
      "epoch": 4.041980060616773,
      "grad_norm": 0.15072470903396606,
      "learning_rate": 1.9286650896463566e-06,
      "loss": 0.0094,
      "step": 2469860
    },
    {
      "epoch": 4.042012791055425,
      "grad_norm": 0.11345744878053665,
      "learning_rate": 1.9285991974328393e-06,
      "loss": 0.0101,
      "step": 2469880
    },
    {
      "epoch": 4.042045521494079,
      "grad_norm": 0.21407882869243622,
      "learning_rate": 1.9285333052193225e-06,
      "loss": 0.0114,
      "step": 2469900
    },
    {
      "epoch": 4.0420782519327325,
      "grad_norm": 0.30742210149765015,
      "learning_rate": 1.9284674130058052e-06,
      "loss": 0.0078,
      "step": 2469920
    },
    {
      "epoch": 4.042110982371386,
      "grad_norm": 0.057689521461725235,
      "learning_rate": 1.9284015207922884e-06,
      "loss": 0.0092,
      "step": 2469940
    },
    {
      "epoch": 4.042143712810039,
      "grad_norm": 0.21728631854057312,
      "learning_rate": 1.928335628578771e-06,
      "loss": 0.0108,
      "step": 2469960
    },
    {
      "epoch": 4.042176443248692,
      "grad_norm": 0.2062036693096161,
      "learning_rate": 1.928269736365254e-06,
      "loss": 0.0114,
      "step": 2469980
    },
    {
      "epoch": 4.042209173687346,
      "grad_norm": 0.31352952122688293,
      "learning_rate": 1.9282038441517366e-06,
      "loss": 0.0073,
      "step": 2470000
    },
    {
      "epoch": 4.042241904125999,
      "grad_norm": 0.10983042418956757,
      "learning_rate": 1.9281379519382194e-06,
      "loss": 0.007,
      "step": 2470020
    },
    {
      "epoch": 4.042274634564652,
      "grad_norm": 0.33989569544792175,
      "learning_rate": 1.9280720597247025e-06,
      "loss": 0.0092,
      "step": 2470040
    },
    {
      "epoch": 4.042307365003306,
      "grad_norm": 0.2076987475156784,
      "learning_rate": 1.9280061675111853e-06,
      "loss": 0.0061,
      "step": 2470060
    },
    {
      "epoch": 4.0423400954419595,
      "grad_norm": 0.2646152377128601,
      "learning_rate": 1.9279402752976684e-06,
      "loss": 0.0061,
      "step": 2470080
    },
    {
      "epoch": 4.042372825880612,
      "grad_norm": 0.16599202156066895,
      "learning_rate": 1.927874383084151e-06,
      "loss": 0.0056,
      "step": 2470100
    },
    {
      "epoch": 4.042405556319266,
      "grad_norm": 0.2856036424636841,
      "learning_rate": 1.927808490870634e-06,
      "loss": 0.0092,
      "step": 2470120
    },
    {
      "epoch": 4.042438286757919,
      "grad_norm": 0.234644815325737,
      "learning_rate": 1.927742598657117e-06,
      "loss": 0.0093,
      "step": 2470140
    },
    {
      "epoch": 4.042471017196572,
      "grad_norm": 0.34691303968429565,
      "learning_rate": 1.9276767064436e-06,
      "loss": 0.0101,
      "step": 2470160
    },
    {
      "epoch": 4.042503747635226,
      "grad_norm": 0.1317458301782608,
      "learning_rate": 1.9276108142300826e-06,
      "loss": 0.0079,
      "step": 2470180
    },
    {
      "epoch": 4.042536478073879,
      "grad_norm": 0.1468559056520462,
      "learning_rate": 1.9275449220165653e-06,
      "loss": 0.0065,
      "step": 2470200
    },
    {
      "epoch": 4.042569208512533,
      "grad_norm": 0.1519959270954132,
      "learning_rate": 1.927479029803048e-06,
      "loss": 0.0078,
      "step": 2470220
    },
    {
      "epoch": 4.042601938951186,
      "grad_norm": 0.3600241541862488,
      "learning_rate": 1.927413137589531e-06,
      "loss": 0.0066,
      "step": 2470240
    },
    {
      "epoch": 4.042634669389839,
      "grad_norm": 0.14128117263317108,
      "learning_rate": 1.9273472453760144e-06,
      "loss": 0.0092,
      "step": 2470260
    },
    {
      "epoch": 4.042667399828493,
      "grad_norm": 1.6393580436706543,
      "learning_rate": 1.927281353162497e-06,
      "loss": 0.0118,
      "step": 2470280
    },
    {
      "epoch": 4.0427001302671455,
      "grad_norm": 0.1421177089214325,
      "learning_rate": 1.92721546094898e-06,
      "loss": 0.0142,
      "step": 2470300
    },
    {
      "epoch": 4.042732860705799,
      "grad_norm": 0.36635327339172363,
      "learning_rate": 1.9271495687354626e-06,
      "loss": 0.0087,
      "step": 2470320
    },
    {
      "epoch": 4.042765591144453,
      "grad_norm": 0.1438872069120407,
      "learning_rate": 1.9270836765219457e-06,
      "loss": 0.0086,
      "step": 2470340
    },
    {
      "epoch": 4.042798321583106,
      "grad_norm": 0.40644320845603943,
      "learning_rate": 1.9270177843084285e-06,
      "loss": 0.0098,
      "step": 2470360
    },
    {
      "epoch": 4.042831052021759,
      "grad_norm": 0.45732182264328003,
      "learning_rate": 1.9269518920949112e-06,
      "loss": 0.0084,
      "step": 2470380
    },
    {
      "epoch": 4.042863782460413,
      "grad_norm": 0.0960029661655426,
      "learning_rate": 1.926885999881394e-06,
      "loss": 0.0092,
      "step": 2470400
    },
    {
      "epoch": 4.042896512899066,
      "grad_norm": 0.31470736861228943,
      "learning_rate": 1.926820107667877e-06,
      "loss": 0.0093,
      "step": 2470420
    },
    {
      "epoch": 4.042929243337719,
      "grad_norm": 1.0720715522766113,
      "learning_rate": 1.92675421545436e-06,
      "loss": 0.0057,
      "step": 2470440
    },
    {
      "epoch": 4.0429619737763725,
      "grad_norm": 0.09462010115385056,
      "learning_rate": 1.926688323240843e-06,
      "loss": 0.0069,
      "step": 2470460
    },
    {
      "epoch": 4.042994704215026,
      "grad_norm": 0.1992831826210022,
      "learning_rate": 1.9266224310273258e-06,
      "loss": 0.0074,
      "step": 2470480
    },
    {
      "epoch": 4.04302743465368,
      "grad_norm": 0.34698596596717834,
      "learning_rate": 1.9265565388138085e-06,
      "loss": 0.0078,
      "step": 2470500
    },
    {
      "epoch": 4.043060165092332,
      "grad_norm": 0.21825607120990753,
      "learning_rate": 1.9264906466002912e-06,
      "loss": 0.0068,
      "step": 2470520
    },
    {
      "epoch": 4.043092895530986,
      "grad_norm": 0.23296469449996948,
      "learning_rate": 1.9264247543867744e-06,
      "loss": 0.009,
      "step": 2470540
    },
    {
      "epoch": 4.0431256259696395,
      "grad_norm": 0.6408627033233643,
      "learning_rate": 1.926358862173257e-06,
      "loss": 0.0109,
      "step": 2470560
    },
    {
      "epoch": 4.043158356408292,
      "grad_norm": 0.21466687321662903,
      "learning_rate": 1.92629296995974e-06,
      "loss": 0.0089,
      "step": 2470580
    },
    {
      "epoch": 4.043191086846946,
      "grad_norm": 0.23589284718036652,
      "learning_rate": 1.9262270777462226e-06,
      "loss": 0.0068,
      "step": 2470600
    },
    {
      "epoch": 4.043223817285599,
      "grad_norm": 0.09642594307661057,
      "learning_rate": 1.926161185532706e-06,
      "loss": 0.0074,
      "step": 2470620
    },
    {
      "epoch": 4.043256547724253,
      "grad_norm": 0.14310523867607117,
      "learning_rate": 1.9260952933191885e-06,
      "loss": 0.0077,
      "step": 2470640
    },
    {
      "epoch": 4.043289278162906,
      "grad_norm": 0.1632288694381714,
      "learning_rate": 1.9260294011056717e-06,
      "loss": 0.0143,
      "step": 2470660
    },
    {
      "epoch": 4.043322008601559,
      "grad_norm": 0.19441798329353333,
      "learning_rate": 1.9259635088921544e-06,
      "loss": 0.007,
      "step": 2470680
    },
    {
      "epoch": 4.043354739040213,
      "grad_norm": 0.3257935643196106,
      "learning_rate": 1.925897616678637e-06,
      "loss": 0.009,
      "step": 2470700
    },
    {
      "epoch": 4.043387469478866,
      "grad_norm": 0.057064127177000046,
      "learning_rate": 1.92583172446512e-06,
      "loss": 0.0073,
      "step": 2470720
    },
    {
      "epoch": 4.043420199917519,
      "grad_norm": 0.2176124006509781,
      "learning_rate": 1.925765832251603e-06,
      "loss": 0.0071,
      "step": 2470740
    },
    {
      "epoch": 4.043452930356173,
      "grad_norm": 0.33462661504745483,
      "learning_rate": 1.925699940038086e-06,
      "loss": 0.0098,
      "step": 2470760
    },
    {
      "epoch": 4.043485660794826,
      "grad_norm": 0.6616427302360535,
      "learning_rate": 1.925634047824569e-06,
      "loss": 0.015,
      "step": 2470780
    },
    {
      "epoch": 4.043518391233479,
      "grad_norm": 0.24504104256629944,
      "learning_rate": 1.9255681556110517e-06,
      "loss": 0.0128,
      "step": 2470800
    },
    {
      "epoch": 4.043551121672133,
      "grad_norm": 0.3683338463306427,
      "learning_rate": 1.9255022633975345e-06,
      "loss": 0.01,
      "step": 2470820
    },
    {
      "epoch": 4.043583852110786,
      "grad_norm": 0.13892383873462677,
      "learning_rate": 1.925436371184017e-06,
      "loss": 0.0068,
      "step": 2470840
    },
    {
      "epoch": 4.043616582549439,
      "grad_norm": 0.21375438570976257,
      "learning_rate": 1.9253704789705004e-06,
      "loss": 0.0078,
      "step": 2470860
    },
    {
      "epoch": 4.043649312988093,
      "grad_norm": 0.09107078611850739,
      "learning_rate": 1.925304586756983e-06,
      "loss": 0.0121,
      "step": 2470880
    },
    {
      "epoch": 4.043682043426746,
      "grad_norm": 0.18427391350269318,
      "learning_rate": 1.925238694543466e-06,
      "loss": 0.0089,
      "step": 2470900
    },
    {
      "epoch": 4.043714773865399,
      "grad_norm": 0.07778220623731613,
      "learning_rate": 1.9251728023299486e-06,
      "loss": 0.0106,
      "step": 2470920
    },
    {
      "epoch": 4.0437475043040525,
      "grad_norm": 0.2747160792350769,
      "learning_rate": 1.9251069101164317e-06,
      "loss": 0.0062,
      "step": 2470940
    },
    {
      "epoch": 4.043780234742706,
      "grad_norm": 0.3891908824443817,
      "learning_rate": 1.925041017902915e-06,
      "loss": 0.0117,
      "step": 2470960
    },
    {
      "epoch": 4.04381296518136,
      "grad_norm": 0.32617437839508057,
      "learning_rate": 1.9249751256893976e-06,
      "loss": 0.0132,
      "step": 2470980
    },
    {
      "epoch": 4.043845695620012,
      "grad_norm": 0.3279179334640503,
      "learning_rate": 1.9249092334758804e-06,
      "loss": 0.0095,
      "step": 2471000
    },
    {
      "epoch": 4.043878426058666,
      "grad_norm": 0.5751053690910339,
      "learning_rate": 1.924843341262363e-06,
      "loss": 0.0123,
      "step": 2471020
    },
    {
      "epoch": 4.04391115649732,
      "grad_norm": 0.08225686848163605,
      "learning_rate": 1.924777449048846e-06,
      "loss": 0.0092,
      "step": 2471040
    },
    {
      "epoch": 4.043943886935972,
      "grad_norm": 0.1252388209104538,
      "learning_rate": 1.924711556835329e-06,
      "loss": 0.011,
      "step": 2471060
    },
    {
      "epoch": 4.043976617374626,
      "grad_norm": 0.2962913513183594,
      "learning_rate": 1.9246456646218118e-06,
      "loss": 0.0089,
      "step": 2471080
    },
    {
      "epoch": 4.0440093478132795,
      "grad_norm": 0.16461586952209473,
      "learning_rate": 1.9245797724082945e-06,
      "loss": 0.0099,
      "step": 2471100
    },
    {
      "epoch": 4.044042078251933,
      "grad_norm": 0.16009590029716492,
      "learning_rate": 1.9245138801947772e-06,
      "loss": 0.0102,
      "step": 2471120
    },
    {
      "epoch": 4.044074808690586,
      "grad_norm": 1.0516128540039062,
      "learning_rate": 1.9244479879812604e-06,
      "loss": 0.0104,
      "step": 2471140
    },
    {
      "epoch": 4.044107539129239,
      "grad_norm": 0.24794727563858032,
      "learning_rate": 1.9243820957677436e-06,
      "loss": 0.0073,
      "step": 2471160
    },
    {
      "epoch": 4.044140269567893,
      "grad_norm": 0.2777285575866699,
      "learning_rate": 1.9243162035542263e-06,
      "loss": 0.0084,
      "step": 2471180
    },
    {
      "epoch": 4.044173000006546,
      "grad_norm": 0.5523113012313843,
      "learning_rate": 1.924250311340709e-06,
      "loss": 0.0113,
      "step": 2471200
    },
    {
      "epoch": 4.044205730445199,
      "grad_norm": 0.2134203165769577,
      "learning_rate": 1.924184419127192e-06,
      "loss": 0.0052,
      "step": 2471220
    },
    {
      "epoch": 4.044238460883853,
      "grad_norm": 0.15028421580791473,
      "learning_rate": 1.9241185269136745e-06,
      "loss": 0.0081,
      "step": 2471240
    },
    {
      "epoch": 4.0442711913225065,
      "grad_norm": 0.4613131284713745,
      "learning_rate": 1.9240526347001577e-06,
      "loss": 0.0058,
      "step": 2471260
    },
    {
      "epoch": 4.044303921761159,
      "grad_norm": 0.06878886371850967,
      "learning_rate": 1.9239867424866404e-06,
      "loss": 0.0083,
      "step": 2471280
    },
    {
      "epoch": 4.044336652199813,
      "grad_norm": 0.3723664879798889,
      "learning_rate": 1.923920850273123e-06,
      "loss": 0.0085,
      "step": 2471300
    },
    {
      "epoch": 4.044369382638466,
      "grad_norm": 0.26335617899894714,
      "learning_rate": 1.9238549580596063e-06,
      "loss": 0.0083,
      "step": 2471320
    },
    {
      "epoch": 4.044402113077119,
      "grad_norm": 1.1051310300827026,
      "learning_rate": 1.923789065846089e-06,
      "loss": 0.0076,
      "step": 2471340
    },
    {
      "epoch": 4.044434843515773,
      "grad_norm": 0.18228574097156525,
      "learning_rate": 1.9237231736325722e-06,
      "loss": 0.0118,
      "step": 2471360
    },
    {
      "epoch": 4.044467573954426,
      "grad_norm": 0.15324705839157104,
      "learning_rate": 1.923657281419055e-06,
      "loss": 0.007,
      "step": 2471380
    },
    {
      "epoch": 4.04450030439308,
      "grad_norm": 0.2955102026462555,
      "learning_rate": 1.9235913892055377e-06,
      "loss": 0.0086,
      "step": 2471400
    },
    {
      "epoch": 4.0445330348317325,
      "grad_norm": 0.23899900913238525,
      "learning_rate": 1.9235254969920205e-06,
      "loss": 0.0105,
      "step": 2471420
    },
    {
      "epoch": 4.044565765270386,
      "grad_norm": 0.37999051809310913,
      "learning_rate": 1.923459604778503e-06,
      "loss": 0.0084,
      "step": 2471440
    },
    {
      "epoch": 4.04459849570904,
      "grad_norm": 0.23567408323287964,
      "learning_rate": 1.9233937125649864e-06,
      "loss": 0.0059,
      "step": 2471460
    },
    {
      "epoch": 4.044631226147692,
      "grad_norm": 0.13948777318000793,
      "learning_rate": 1.923327820351469e-06,
      "loss": 0.0065,
      "step": 2471480
    },
    {
      "epoch": 4.044663956586346,
      "grad_norm": 0.12932312488555908,
      "learning_rate": 1.9232619281379523e-06,
      "loss": 0.0111,
      "step": 2471500
    },
    {
      "epoch": 4.044696687025,
      "grad_norm": 0.2694675922393799,
      "learning_rate": 1.923196035924435e-06,
      "loss": 0.0091,
      "step": 2471520
    },
    {
      "epoch": 4.044729417463653,
      "grad_norm": 0.3329046964645386,
      "learning_rate": 1.9231301437109177e-06,
      "loss": 0.011,
      "step": 2471540
    },
    {
      "epoch": 4.044762147902306,
      "grad_norm": 0.33552810549736023,
      "learning_rate": 1.923064251497401e-06,
      "loss": 0.0072,
      "step": 2471560
    },
    {
      "epoch": 4.0447948783409595,
      "grad_norm": 0.22620506584644318,
      "learning_rate": 1.9229983592838837e-06,
      "loss": 0.015,
      "step": 2471580
    },
    {
      "epoch": 4.044827608779613,
      "grad_norm": 0.21055205166339874,
      "learning_rate": 1.9229324670703664e-06,
      "loss": 0.0116,
      "step": 2471600
    },
    {
      "epoch": 4.044860339218266,
      "grad_norm": 0.1389947235584259,
      "learning_rate": 1.922866574856849e-06,
      "loss": 0.0086,
      "step": 2471620
    },
    {
      "epoch": 4.044893069656919,
      "grad_norm": 0.5209145545959473,
      "learning_rate": 1.9228006826433323e-06,
      "loss": 0.0062,
      "step": 2471640
    },
    {
      "epoch": 4.044925800095573,
      "grad_norm": 0.19067774713039398,
      "learning_rate": 1.922734790429815e-06,
      "loss": 0.0074,
      "step": 2471660
    },
    {
      "epoch": 4.044958530534227,
      "grad_norm": 0.13994121551513672,
      "learning_rate": 1.922668898216298e-06,
      "loss": 0.0061,
      "step": 2471680
    },
    {
      "epoch": 4.044991260972879,
      "grad_norm": 0.13959218561649323,
      "learning_rate": 1.922603006002781e-06,
      "loss": 0.0075,
      "step": 2471700
    },
    {
      "epoch": 4.045023991411533,
      "grad_norm": 0.0831686332821846,
      "learning_rate": 1.9225371137892637e-06,
      "loss": 0.0083,
      "step": 2471720
    },
    {
      "epoch": 4.0450567218501865,
      "grad_norm": 0.22192756831645966,
      "learning_rate": 1.9224712215757464e-06,
      "loss": 0.0082,
      "step": 2471740
    },
    {
      "epoch": 4.045089452288839,
      "grad_norm": 0.6683735847473145,
      "learning_rate": 1.9224053293622296e-06,
      "loss": 0.0066,
      "step": 2471760
    },
    {
      "epoch": 4.045122182727493,
      "grad_norm": 0.23816166818141937,
      "learning_rate": 1.9223394371487123e-06,
      "loss": 0.0101,
      "step": 2471780
    },
    {
      "epoch": 4.045154913166146,
      "grad_norm": 0.10561276972293854,
      "learning_rate": 1.922273544935195e-06,
      "loss": 0.0144,
      "step": 2471800
    },
    {
      "epoch": 4.0451876436048,
      "grad_norm": 0.09237015247344971,
      "learning_rate": 1.922207652721678e-06,
      "loss": 0.011,
      "step": 2471820
    },
    {
      "epoch": 4.045220374043453,
      "grad_norm": 0.19019077718257904,
      "learning_rate": 1.922141760508161e-06,
      "loss": 0.0095,
      "step": 2471840
    },
    {
      "epoch": 4.045253104482106,
      "grad_norm": 0.05576525628566742,
      "learning_rate": 1.9220758682946437e-06,
      "loss": 0.013,
      "step": 2471860
    },
    {
      "epoch": 4.04528583492076,
      "grad_norm": 0.05652511119842529,
      "learning_rate": 1.922009976081127e-06,
      "loss": 0.0098,
      "step": 2471880
    },
    {
      "epoch": 4.045318565359413,
      "grad_norm": 0.48160940408706665,
      "learning_rate": 1.9219440838676096e-06,
      "loss": 0.0099,
      "step": 2471900
    },
    {
      "epoch": 4.045351295798066,
      "grad_norm": 0.04427463933825493,
      "learning_rate": 1.9218781916540923e-06,
      "loss": 0.0071,
      "step": 2471920
    },
    {
      "epoch": 4.04538402623672,
      "grad_norm": 0.40493500232696533,
      "learning_rate": 1.921812299440575e-06,
      "loss": 0.0065,
      "step": 2471940
    },
    {
      "epoch": 4.045416756675373,
      "grad_norm": 0.2973361313343048,
      "learning_rate": 1.9217464072270582e-06,
      "loss": 0.0066,
      "step": 2471960
    },
    {
      "epoch": 4.045449487114026,
      "grad_norm": 0.03465086221694946,
      "learning_rate": 1.921680515013541e-06,
      "loss": 0.0166,
      "step": 2471980
    },
    {
      "epoch": 4.04548221755268,
      "grad_norm": 0.30986183881759644,
      "learning_rate": 1.9216146228000237e-06,
      "loss": 0.0069,
      "step": 2472000
    },
    {
      "epoch": 4.045514947991333,
      "grad_norm": 0.3045060336589813,
      "learning_rate": 1.921548730586507e-06,
      "loss": 0.0121,
      "step": 2472020
    },
    {
      "epoch": 4.045547678429986,
      "grad_norm": 0.3486820161342621,
      "learning_rate": 1.9214828383729896e-06,
      "loss": 0.0104,
      "step": 2472040
    },
    {
      "epoch": 4.04558040886864,
      "grad_norm": 0.4955958425998688,
      "learning_rate": 1.9214169461594724e-06,
      "loss": 0.0079,
      "step": 2472060
    },
    {
      "epoch": 4.045613139307293,
      "grad_norm": 0.1691804975271225,
      "learning_rate": 1.9213510539459555e-06,
      "loss": 0.008,
      "step": 2472080
    },
    {
      "epoch": 4.045645869745947,
      "grad_norm": 0.3405502140522003,
      "learning_rate": 1.9212851617324383e-06,
      "loss": 0.0069,
      "step": 2472100
    },
    {
      "epoch": 4.0456786001845995,
      "grad_norm": 0.27937251329421997,
      "learning_rate": 1.921219269518921e-06,
      "loss": 0.0089,
      "step": 2472120
    },
    {
      "epoch": 4.045711330623253,
      "grad_norm": 0.32678747177124023,
      "learning_rate": 1.9211533773054038e-06,
      "loss": 0.0137,
      "step": 2472140
    },
    {
      "epoch": 4.045744061061907,
      "grad_norm": 0.2671923041343689,
      "learning_rate": 1.921087485091887e-06,
      "loss": 0.0102,
      "step": 2472160
    },
    {
      "epoch": 4.045776791500559,
      "grad_norm": 0.4086519181728363,
      "learning_rate": 1.9210215928783697e-06,
      "loss": 0.0101,
      "step": 2472180
    },
    {
      "epoch": 4.045809521939213,
      "grad_norm": 0.22635139524936676,
      "learning_rate": 1.920955700664853e-06,
      "loss": 0.0082,
      "step": 2472200
    },
    {
      "epoch": 4.0458422523778665,
      "grad_norm": 0.338724821805954,
      "learning_rate": 1.9208898084513356e-06,
      "loss": 0.0083,
      "step": 2472220
    },
    {
      "epoch": 4.04587498281652,
      "grad_norm": 0.20215721428394318,
      "learning_rate": 1.9208239162378183e-06,
      "loss": 0.0072,
      "step": 2472240
    },
    {
      "epoch": 4.045907713255173,
      "grad_norm": 0.36998477578163147,
      "learning_rate": 1.920758024024301e-06,
      "loss": 0.0093,
      "step": 2472260
    },
    {
      "epoch": 4.045940443693826,
      "grad_norm": 0.27011334896087646,
      "learning_rate": 1.920692131810784e-06,
      "loss": 0.0131,
      "step": 2472280
    },
    {
      "epoch": 4.04597317413248,
      "grad_norm": 0.20942775905132294,
      "learning_rate": 1.920626239597267e-06,
      "loss": 0.0081,
      "step": 2472300
    },
    {
      "epoch": 4.046005904571133,
      "grad_norm": 0.16558513045310974,
      "learning_rate": 1.9205603473837497e-06,
      "loss": 0.0057,
      "step": 2472320
    },
    {
      "epoch": 4.046038635009786,
      "grad_norm": 0.13278868794441223,
      "learning_rate": 1.9204944551702324e-06,
      "loss": 0.0091,
      "step": 2472340
    },
    {
      "epoch": 4.04607136544844,
      "grad_norm": 0.2910383641719818,
      "learning_rate": 1.9204285629567156e-06,
      "loss": 0.01,
      "step": 2472360
    },
    {
      "epoch": 4.0461040958870935,
      "grad_norm": 0.2331441342830658,
      "learning_rate": 1.9203626707431987e-06,
      "loss": 0.0093,
      "step": 2472380
    },
    {
      "epoch": 4.046136826325746,
      "grad_norm": 0.22560752928256989,
      "learning_rate": 1.9202967785296815e-06,
      "loss": 0.0089,
      "step": 2472400
    },
    {
      "epoch": 4.0461695567644,
      "grad_norm": 0.28607234358787537,
      "learning_rate": 1.9202308863161642e-06,
      "loss": 0.0075,
      "step": 2472420
    },
    {
      "epoch": 4.046202287203053,
      "grad_norm": 0.24629861116409302,
      "learning_rate": 1.920164994102647e-06,
      "loss": 0.0074,
      "step": 2472440
    },
    {
      "epoch": 4.046235017641706,
      "grad_norm": 0.29392877221107483,
      "learning_rate": 1.9200991018891297e-06,
      "loss": 0.0099,
      "step": 2472460
    },
    {
      "epoch": 4.04626774808036,
      "grad_norm": 0.2375309020280838,
      "learning_rate": 1.920033209675613e-06,
      "loss": 0.013,
      "step": 2472480
    },
    {
      "epoch": 4.046300478519013,
      "grad_norm": 0.1501566618680954,
      "learning_rate": 1.9199673174620956e-06,
      "loss": 0.0096,
      "step": 2472500
    },
    {
      "epoch": 4.046333208957666,
      "grad_norm": 0.13522584736347198,
      "learning_rate": 1.9199014252485783e-06,
      "loss": 0.0111,
      "step": 2472520
    },
    {
      "epoch": 4.04636593939632,
      "grad_norm": 0.5033820271492004,
      "learning_rate": 1.9198355330350615e-06,
      "loss": 0.0116,
      "step": 2472540
    },
    {
      "epoch": 4.046398669834973,
      "grad_norm": 0.29475393891334534,
      "learning_rate": 1.9197696408215443e-06,
      "loss": 0.0097,
      "step": 2472560
    },
    {
      "epoch": 4.046431400273627,
      "grad_norm": 0.1053297147154808,
      "learning_rate": 1.9197037486080274e-06,
      "loss": 0.0104,
      "step": 2472580
    },
    {
      "epoch": 4.0464641307122795,
      "grad_norm": 0.08271247148513794,
      "learning_rate": 1.91963785639451e-06,
      "loss": 0.0126,
      "step": 2472600
    },
    {
      "epoch": 4.046496861150933,
      "grad_norm": 0.16288530826568604,
      "learning_rate": 1.919571964180993e-06,
      "loss": 0.012,
      "step": 2472620
    },
    {
      "epoch": 4.046529591589587,
      "grad_norm": 0.1698189526796341,
      "learning_rate": 1.9195060719674756e-06,
      "loss": 0.0051,
      "step": 2472640
    },
    {
      "epoch": 4.046562322028239,
      "grad_norm": 0.13929717242717743,
      "learning_rate": 1.9194401797539584e-06,
      "loss": 0.011,
      "step": 2472660
    },
    {
      "epoch": 4.046595052466893,
      "grad_norm": 0.40043163299560547,
      "learning_rate": 1.9193742875404415e-06,
      "loss": 0.005,
      "step": 2472680
    },
    {
      "epoch": 4.046627782905547,
      "grad_norm": 0.01871827058494091,
      "learning_rate": 1.9193083953269243e-06,
      "loss": 0.0059,
      "step": 2472700
    },
    {
      "epoch": 4.0466605133442,
      "grad_norm": 0.0814773440361023,
      "learning_rate": 1.9192425031134074e-06,
      "loss": 0.0071,
      "step": 2472720
    },
    {
      "epoch": 4.046693243782853,
      "grad_norm": 0.49728789925575256,
      "learning_rate": 1.91917661089989e-06,
      "loss": 0.01,
      "step": 2472740
    },
    {
      "epoch": 4.0467259742215065,
      "grad_norm": 0.49220022559165955,
      "learning_rate": 1.919110718686373e-06,
      "loss": 0.0095,
      "step": 2472760
    },
    {
      "epoch": 4.04675870466016,
      "grad_norm": 0.09022296965122223,
      "learning_rate": 1.919044826472856e-06,
      "loss": 0.0071,
      "step": 2472780
    },
    {
      "epoch": 4.046791435098813,
      "grad_norm": 0.06530383229255676,
      "learning_rate": 1.918978934259339e-06,
      "loss": 0.009,
      "step": 2472800
    },
    {
      "epoch": 4.046824165537466,
      "grad_norm": 0.22622983157634735,
      "learning_rate": 1.9189130420458216e-06,
      "loss": 0.0074,
      "step": 2472820
    },
    {
      "epoch": 4.04685689597612,
      "grad_norm": 0.10771956294775009,
      "learning_rate": 1.9188471498323043e-06,
      "loss": 0.0067,
      "step": 2472840
    },
    {
      "epoch": 4.046889626414774,
      "grad_norm": 0.4600001871585846,
      "learning_rate": 1.918781257618787e-06,
      "loss": 0.0075,
      "step": 2472860
    },
    {
      "epoch": 4.046922356853426,
      "grad_norm": 0.645642876625061,
      "learning_rate": 1.91871536540527e-06,
      "loss": 0.0072,
      "step": 2472880
    },
    {
      "epoch": 4.04695508729208,
      "grad_norm": 0.0817720964550972,
      "learning_rate": 1.9186494731917534e-06,
      "loss": 0.0084,
      "step": 2472900
    },
    {
      "epoch": 4.0469878177307335,
      "grad_norm": 0.1979495882987976,
      "learning_rate": 1.918583580978236e-06,
      "loss": 0.013,
      "step": 2472920
    },
    {
      "epoch": 4.047020548169386,
      "grad_norm": 0.4664424955844879,
      "learning_rate": 1.918517688764719e-06,
      "loss": 0.0106,
      "step": 2472940
    },
    {
      "epoch": 4.04705327860804,
      "grad_norm": 0.22219593822956085,
      "learning_rate": 1.9184517965512016e-06,
      "loss": 0.01,
      "step": 2472960
    },
    {
      "epoch": 4.047086009046693,
      "grad_norm": 0.1320827156305313,
      "learning_rate": 1.9183859043376848e-06,
      "loss": 0.007,
      "step": 2472980
    },
    {
      "epoch": 4.047118739485347,
      "grad_norm": 0.10296419262886047,
      "learning_rate": 1.9183200121241675e-06,
      "loss": 0.0069,
      "step": 2473000
    },
    {
      "epoch": 4.047151469924,
      "grad_norm": 0.23658360540866852,
      "learning_rate": 1.9182541199106502e-06,
      "loss": 0.009,
      "step": 2473020
    },
    {
      "epoch": 4.047184200362653,
      "grad_norm": 0.32554733753204346,
      "learning_rate": 1.918188227697133e-06,
      "loss": 0.0086,
      "step": 2473040
    },
    {
      "epoch": 4.047216930801307,
      "grad_norm": 0.19581426680088043,
      "learning_rate": 1.918122335483616e-06,
      "loss": 0.0105,
      "step": 2473060
    },
    {
      "epoch": 4.0472496612399596,
      "grad_norm": 0.14402861893177032,
      "learning_rate": 1.918056443270099e-06,
      "loss": 0.0098,
      "step": 2473080
    },
    {
      "epoch": 4.047282391678613,
      "grad_norm": 0.1281682252883911,
      "learning_rate": 1.917990551056582e-06,
      "loss": 0.0076,
      "step": 2473100
    },
    {
      "epoch": 4.047315122117267,
      "grad_norm": 0.3743465542793274,
      "learning_rate": 1.9179246588430648e-06,
      "loss": 0.0066,
      "step": 2473120
    },
    {
      "epoch": 4.04734785255592,
      "grad_norm": 0.14131782948970795,
      "learning_rate": 1.9178587666295475e-06,
      "loss": 0.0092,
      "step": 2473140
    },
    {
      "epoch": 4.047380582994573,
      "grad_norm": 0.3633726239204407,
      "learning_rate": 1.9177928744160303e-06,
      "loss": 0.0114,
      "step": 2473160
    },
    {
      "epoch": 4.047413313433227,
      "grad_norm": 0.235775887966156,
      "learning_rate": 1.9177269822025134e-06,
      "loss": 0.0102,
      "step": 2473180
    },
    {
      "epoch": 4.04744604387188,
      "grad_norm": 0.2391158640384674,
      "learning_rate": 1.917661089988996e-06,
      "loss": 0.0068,
      "step": 2473200
    },
    {
      "epoch": 4.047478774310533,
      "grad_norm": 0.028637783601880074,
      "learning_rate": 1.917595197775479e-06,
      "loss": 0.0062,
      "step": 2473220
    },
    {
      "epoch": 4.0475115047491865,
      "grad_norm": 0.5192508101463318,
      "learning_rate": 1.9175293055619616e-06,
      "loss": 0.0136,
      "step": 2473240
    },
    {
      "epoch": 4.04754423518784,
      "grad_norm": 0.07257362455129623,
      "learning_rate": 1.917463413348445e-06,
      "loss": 0.01,
      "step": 2473260
    },
    {
      "epoch": 4.047576965626494,
      "grad_norm": 0.35978156328201294,
      "learning_rate": 1.9173975211349275e-06,
      "loss": 0.0081,
      "step": 2473280
    },
    {
      "epoch": 4.047609696065146,
      "grad_norm": 0.1070762351155281,
      "learning_rate": 1.9173316289214107e-06,
      "loss": 0.0067,
      "step": 2473300
    },
    {
      "epoch": 4.0476424265038,
      "grad_norm": 0.11029544472694397,
      "learning_rate": 1.9172657367078934e-06,
      "loss": 0.0073,
      "step": 2473320
    },
    {
      "epoch": 4.047675156942454,
      "grad_norm": 0.18051645159721375,
      "learning_rate": 1.917199844494376e-06,
      "loss": 0.0092,
      "step": 2473340
    },
    {
      "epoch": 4.047707887381106,
      "grad_norm": 0.528895378112793,
      "learning_rate": 1.917133952280859e-06,
      "loss": 0.0102,
      "step": 2473360
    },
    {
      "epoch": 4.04774061781976,
      "grad_norm": 0.32694971561431885,
      "learning_rate": 1.917068060067342e-06,
      "loss": 0.0161,
      "step": 2473380
    },
    {
      "epoch": 4.0477733482584135,
      "grad_norm": 0.2100011706352234,
      "learning_rate": 1.917002167853825e-06,
      "loss": 0.0085,
      "step": 2473400
    },
    {
      "epoch": 4.047806078697067,
      "grad_norm": 0.09785044193267822,
      "learning_rate": 1.916936275640308e-06,
      "loss": 0.0094,
      "step": 2473420
    },
    {
      "epoch": 4.04783880913572,
      "grad_norm": 0.7361001372337341,
      "learning_rate": 1.9168703834267907e-06,
      "loss": 0.0094,
      "step": 2473440
    },
    {
      "epoch": 4.047871539574373,
      "grad_norm": 0.37787681818008423,
      "learning_rate": 1.9168044912132735e-06,
      "loss": 0.0077,
      "step": 2473460
    },
    {
      "epoch": 4.047904270013027,
      "grad_norm": 0.1772078275680542,
      "learning_rate": 1.916738598999756e-06,
      "loss": 0.0076,
      "step": 2473480
    },
    {
      "epoch": 4.04793700045168,
      "grad_norm": 0.3220560848712921,
      "learning_rate": 1.9166727067862394e-06,
      "loss": 0.0055,
      "step": 2473500
    },
    {
      "epoch": 4.047969730890333,
      "grad_norm": 0.0508851483464241,
      "learning_rate": 1.916606814572722e-06,
      "loss": 0.0099,
      "step": 2473520
    },
    {
      "epoch": 4.048002461328987,
      "grad_norm": 0.10229140520095825,
      "learning_rate": 1.916540922359205e-06,
      "loss": 0.0057,
      "step": 2473540
    },
    {
      "epoch": 4.0480351917676405,
      "grad_norm": 0.23334357142448425,
      "learning_rate": 1.9164750301456876e-06,
      "loss": 0.0076,
      "step": 2473560
    },
    {
      "epoch": 4.048067922206293,
      "grad_norm": 0.18861375749111176,
      "learning_rate": 1.9164091379321708e-06,
      "loss": 0.0088,
      "step": 2473580
    },
    {
      "epoch": 4.048100652644947,
      "grad_norm": 0.34377843141555786,
      "learning_rate": 1.916343245718654e-06,
      "loss": 0.0085,
      "step": 2473600
    },
    {
      "epoch": 4.0481333830836,
      "grad_norm": 0.14947892725467682,
      "learning_rate": 1.9162773535051367e-06,
      "loss": 0.0085,
      "step": 2473620
    },
    {
      "epoch": 4.048166113522253,
      "grad_norm": 0.3682997524738312,
      "learning_rate": 1.9162114612916194e-06,
      "loss": 0.0102,
      "step": 2473640
    },
    {
      "epoch": 4.048198843960907,
      "grad_norm": 0.21364137530326843,
      "learning_rate": 1.916145569078102e-06,
      "loss": 0.0086,
      "step": 2473660
    },
    {
      "epoch": 4.04823157439956,
      "grad_norm": 0.2195139080286026,
      "learning_rate": 1.916079676864585e-06,
      "loss": 0.0082,
      "step": 2473680
    },
    {
      "epoch": 4.048264304838214,
      "grad_norm": 0.1174388900399208,
      "learning_rate": 1.916013784651068e-06,
      "loss": 0.0064,
      "step": 2473700
    },
    {
      "epoch": 4.048297035276867,
      "grad_norm": 0.17952559888362885,
      "learning_rate": 1.9159478924375508e-06,
      "loss": 0.0077,
      "step": 2473720
    },
    {
      "epoch": 4.04832976571552,
      "grad_norm": 0.42246490716934204,
      "learning_rate": 1.9158820002240335e-06,
      "loss": 0.0104,
      "step": 2473740
    },
    {
      "epoch": 4.048362496154174,
      "grad_norm": 0.5314141511917114,
      "learning_rate": 1.9158161080105163e-06,
      "loss": 0.0065,
      "step": 2473760
    },
    {
      "epoch": 4.0483952265928265,
      "grad_norm": 0.05647552013397217,
      "learning_rate": 1.9157502157969994e-06,
      "loss": 0.0079,
      "step": 2473780
    },
    {
      "epoch": 4.04842795703148,
      "grad_norm": 0.23336829245090485,
      "learning_rate": 1.9156843235834826e-06,
      "loss": 0.0117,
      "step": 2473800
    },
    {
      "epoch": 4.048460687470134,
      "grad_norm": 0.24645331501960754,
      "learning_rate": 1.9156184313699653e-06,
      "loss": 0.0128,
      "step": 2473820
    },
    {
      "epoch": 4.048493417908787,
      "grad_norm": 0.2283165007829666,
      "learning_rate": 1.915552539156448e-06,
      "loss": 0.0109,
      "step": 2473840
    },
    {
      "epoch": 4.04852614834744,
      "grad_norm": 0.23731404542922974,
      "learning_rate": 1.915486646942931e-06,
      "loss": 0.0064,
      "step": 2473860
    },
    {
      "epoch": 4.0485588787860936,
      "grad_norm": 0.45432043075561523,
      "learning_rate": 1.9154207547294135e-06,
      "loss": 0.0086,
      "step": 2473880
    },
    {
      "epoch": 4.048591609224747,
      "grad_norm": 0.14000077545642853,
      "learning_rate": 1.9153548625158967e-06,
      "loss": 0.0092,
      "step": 2473900
    },
    {
      "epoch": 4.0486243396634,
      "grad_norm": 0.17073237895965576,
      "learning_rate": 1.9152889703023794e-06,
      "loss": 0.0149,
      "step": 2473920
    },
    {
      "epoch": 4.0486570701020534,
      "grad_norm": 0.057778023183345795,
      "learning_rate": 1.915223078088862e-06,
      "loss": 0.0138,
      "step": 2473940
    },
    {
      "epoch": 4.048689800540707,
      "grad_norm": 0.24428197741508484,
      "learning_rate": 1.9151571858753454e-06,
      "loss": 0.0109,
      "step": 2473960
    },
    {
      "epoch": 4.04872253097936,
      "grad_norm": 0.1446625143289566,
      "learning_rate": 1.915091293661828e-06,
      "loss": 0.0111,
      "step": 2473980
    },
    {
      "epoch": 4.048755261418013,
      "grad_norm": 0.32444924116134644,
      "learning_rate": 1.9150254014483113e-06,
      "loss": 0.0112,
      "step": 2474000
    },
    {
      "epoch": 4.048787991856667,
      "grad_norm": 0.5648304224014282,
      "learning_rate": 1.914959509234794e-06,
      "loss": 0.0071,
      "step": 2474020
    },
    {
      "epoch": 4.0488207222953205,
      "grad_norm": 0.3202015459537506,
      "learning_rate": 1.9148936170212767e-06,
      "loss": 0.0141,
      "step": 2474040
    },
    {
      "epoch": 4.048853452733973,
      "grad_norm": 0.14573225378990173,
      "learning_rate": 1.9148277248077595e-06,
      "loss": 0.006,
      "step": 2474060
    },
    {
      "epoch": 4.048886183172627,
      "grad_norm": 0.12985242903232574,
      "learning_rate": 1.9147618325942422e-06,
      "loss": 0.0109,
      "step": 2474080
    },
    {
      "epoch": 4.04891891361128,
      "grad_norm": 0.44368603825569153,
      "learning_rate": 1.9146959403807254e-06,
      "loss": 0.0102,
      "step": 2474100
    },
    {
      "epoch": 4.048951644049933,
      "grad_norm": 0.3211233615875244,
      "learning_rate": 1.914630048167208e-06,
      "loss": 0.0091,
      "step": 2474120
    },
    {
      "epoch": 4.048984374488587,
      "grad_norm": 0.42387446761131287,
      "learning_rate": 1.9145641559536913e-06,
      "loss": 0.0079,
      "step": 2474140
    },
    {
      "epoch": 4.04901710492724,
      "grad_norm": 0.3817855417728424,
      "learning_rate": 1.914498263740174e-06,
      "loss": 0.0119,
      "step": 2474160
    },
    {
      "epoch": 4.049049835365894,
      "grad_norm": 0.14594314992427826,
      "learning_rate": 1.9144323715266568e-06,
      "loss": 0.0084,
      "step": 2474180
    },
    {
      "epoch": 4.049082565804547,
      "grad_norm": 0.14918309450149536,
      "learning_rate": 1.91436647931314e-06,
      "loss": 0.0071,
      "step": 2474200
    },
    {
      "epoch": 4.0491152962432,
      "grad_norm": 0.2699323892593384,
      "learning_rate": 1.9143005870996227e-06,
      "loss": 0.0056,
      "step": 2474220
    },
    {
      "epoch": 4.049148026681854,
      "grad_norm": 0.3993828296661377,
      "learning_rate": 1.9142346948861054e-06,
      "loss": 0.0085,
      "step": 2474240
    },
    {
      "epoch": 4.0491807571205065,
      "grad_norm": 0.19084712862968445,
      "learning_rate": 1.914168802672588e-06,
      "loss": 0.012,
      "step": 2474260
    },
    {
      "epoch": 4.04921348755916,
      "grad_norm": 0.5238045454025269,
      "learning_rate": 1.9141029104590713e-06,
      "loss": 0.0077,
      "step": 2474280
    },
    {
      "epoch": 4.049246217997814,
      "grad_norm": 0.11667972058057785,
      "learning_rate": 1.914037018245554e-06,
      "loss": 0.0051,
      "step": 2474300
    },
    {
      "epoch": 4.049278948436467,
      "grad_norm": 0.2619624435901642,
      "learning_rate": 1.913971126032037e-06,
      "loss": 0.012,
      "step": 2474320
    },
    {
      "epoch": 4.04931167887512,
      "grad_norm": 0.098340705037117,
      "learning_rate": 1.91390523381852e-06,
      "loss": 0.006,
      "step": 2474340
    },
    {
      "epoch": 4.049344409313774,
      "grad_norm": 0.26043182611465454,
      "learning_rate": 1.9138393416050027e-06,
      "loss": 0.0097,
      "step": 2474360
    },
    {
      "epoch": 4.049377139752427,
      "grad_norm": 0.03503746539354324,
      "learning_rate": 1.9137734493914854e-06,
      "loss": 0.01,
      "step": 2474380
    },
    {
      "epoch": 4.04940987019108,
      "grad_norm": 0.07977171987295151,
      "learning_rate": 1.9137075571779686e-06,
      "loss": 0.0081,
      "step": 2474400
    },
    {
      "epoch": 4.0494426006297335,
      "grad_norm": 0.13753247261047363,
      "learning_rate": 1.9136416649644513e-06,
      "loss": 0.0089,
      "step": 2474420
    },
    {
      "epoch": 4.049475331068387,
      "grad_norm": 0.2384122759103775,
      "learning_rate": 1.913575772750934e-06,
      "loss": 0.0088,
      "step": 2474440
    },
    {
      "epoch": 4.049508061507041,
      "grad_norm": 0.08493375033140182,
      "learning_rate": 1.913509880537417e-06,
      "loss": 0.0103,
      "step": 2474460
    },
    {
      "epoch": 4.049540791945693,
      "grad_norm": 0.27513977885246277,
      "learning_rate": 1.9134439883239e-06,
      "loss": 0.0082,
      "step": 2474480
    },
    {
      "epoch": 4.049573522384347,
      "grad_norm": 0.2969302237033844,
      "learning_rate": 1.9133780961103827e-06,
      "loss": 0.0123,
      "step": 2474500
    },
    {
      "epoch": 4.049606252823001,
      "grad_norm": 0.08418844640254974,
      "learning_rate": 1.913312203896866e-06,
      "loss": 0.0084,
      "step": 2474520
    },
    {
      "epoch": 4.049638983261653,
      "grad_norm": 0.18862849473953247,
      "learning_rate": 1.9132463116833486e-06,
      "loss": 0.012,
      "step": 2474540
    },
    {
      "epoch": 4.049671713700307,
      "grad_norm": 0.11213614791631699,
      "learning_rate": 1.9131804194698314e-06,
      "loss": 0.0065,
      "step": 2474560
    },
    {
      "epoch": 4.0497044441389605,
      "grad_norm": 0.1318703144788742,
      "learning_rate": 1.913114527256314e-06,
      "loss": 0.012,
      "step": 2474580
    },
    {
      "epoch": 4.049737174577614,
      "grad_norm": 0.1784142553806305,
      "learning_rate": 1.9130486350427973e-06,
      "loss": 0.0116,
      "step": 2474600
    },
    {
      "epoch": 4.049769905016267,
      "grad_norm": 0.2035202831029892,
      "learning_rate": 1.91298274282928e-06,
      "loss": 0.0069,
      "step": 2474620
    },
    {
      "epoch": 4.04980263545492,
      "grad_norm": 0.2713516056537628,
      "learning_rate": 1.9129168506157627e-06,
      "loss": 0.0145,
      "step": 2474640
    },
    {
      "epoch": 4.049835365893574,
      "grad_norm": 0.21958771347999573,
      "learning_rate": 1.912850958402246e-06,
      "loss": 0.0108,
      "step": 2474660
    },
    {
      "epoch": 4.049868096332227,
      "grad_norm": 0.13151006400585175,
      "learning_rate": 1.9127850661887286e-06,
      "loss": 0.0115,
      "step": 2474680
    },
    {
      "epoch": 4.04990082677088,
      "grad_norm": 0.12734989821910858,
      "learning_rate": 1.9127191739752114e-06,
      "loss": 0.0114,
      "step": 2474700
    },
    {
      "epoch": 4.049933557209534,
      "grad_norm": 0.12861120700836182,
      "learning_rate": 1.9126532817616945e-06,
      "loss": 0.0092,
      "step": 2474720
    },
    {
      "epoch": 4.0499662876481874,
      "grad_norm": 0.24790674448013306,
      "learning_rate": 1.9125873895481773e-06,
      "loss": 0.007,
      "step": 2474740
    },
    {
      "epoch": 4.04999901808684,
      "grad_norm": 0.25965267419815063,
      "learning_rate": 1.91252149733466e-06,
      "loss": 0.0071,
      "step": 2474760
    },
    {
      "epoch": 4.050031748525494,
      "grad_norm": 0.25382715463638306,
      "learning_rate": 1.9124556051211428e-06,
      "loss": 0.0106,
      "step": 2474780
    },
    {
      "epoch": 4.050064478964147,
      "grad_norm": 0.18301504850387573,
      "learning_rate": 1.912389712907626e-06,
      "loss": 0.0097,
      "step": 2474800
    },
    {
      "epoch": 4.0500972094028,
      "grad_norm": 0.09746292233467102,
      "learning_rate": 1.9123238206941087e-06,
      "loss": 0.0064,
      "step": 2474820
    },
    {
      "epoch": 4.050129939841454,
      "grad_norm": 0.24186910688877106,
      "learning_rate": 1.912257928480592e-06,
      "loss": 0.0099,
      "step": 2474840
    },
    {
      "epoch": 4.050162670280107,
      "grad_norm": 0.15923023223876953,
      "learning_rate": 1.9121920362670746e-06,
      "loss": 0.0092,
      "step": 2474860
    },
    {
      "epoch": 4.050195400718761,
      "grad_norm": 0.06451866030693054,
      "learning_rate": 1.9121261440535573e-06,
      "loss": 0.0081,
      "step": 2474880
    },
    {
      "epoch": 4.0502281311574135,
      "grad_norm": 0.08229131251573563,
      "learning_rate": 1.91206025184004e-06,
      "loss": 0.0079,
      "step": 2474900
    },
    {
      "epoch": 4.050260861596067,
      "grad_norm": 0.25053805112838745,
      "learning_rate": 1.9119943596265232e-06,
      "loss": 0.0093,
      "step": 2474920
    },
    {
      "epoch": 4.050293592034721,
      "grad_norm": 0.22472843527793884,
      "learning_rate": 1.911928467413006e-06,
      "loss": 0.0051,
      "step": 2474940
    },
    {
      "epoch": 4.050326322473373,
      "grad_norm": 0.2623312771320343,
      "learning_rate": 1.9118625751994887e-06,
      "loss": 0.0078,
      "step": 2474960
    },
    {
      "epoch": 4.050359052912027,
      "grad_norm": 0.1530712991952896,
      "learning_rate": 1.9117966829859714e-06,
      "loss": 0.0068,
      "step": 2474980
    },
    {
      "epoch": 4.050391783350681,
      "grad_norm": 0.04124685376882553,
      "learning_rate": 1.9117307907724546e-06,
      "loss": 0.0093,
      "step": 2475000
    },
    {
      "epoch": 4.050424513789334,
      "grad_norm": 0.07118142396211624,
      "learning_rate": 1.9116648985589378e-06,
      "loss": 0.006,
      "step": 2475020
    },
    {
      "epoch": 4.050457244227987,
      "grad_norm": 0.0926918163895607,
      "learning_rate": 1.9115990063454205e-06,
      "loss": 0.0087,
      "step": 2475040
    },
    {
      "epoch": 4.0504899746666405,
      "grad_norm": 0.5581172704696655,
      "learning_rate": 1.9115331141319032e-06,
      "loss": 0.0099,
      "step": 2475060
    },
    {
      "epoch": 4.050522705105294,
      "grad_norm": 0.13053107261657715,
      "learning_rate": 1.911467221918386e-06,
      "loss": 0.0094,
      "step": 2475080
    },
    {
      "epoch": 4.050555435543947,
      "grad_norm": 0.08995005488395691,
      "learning_rate": 1.9114013297048687e-06,
      "loss": 0.0104,
      "step": 2475100
    },
    {
      "epoch": 4.0505881659826,
      "grad_norm": 0.08314045518636703,
      "learning_rate": 1.911335437491352e-06,
      "loss": 0.0082,
      "step": 2475120
    },
    {
      "epoch": 4.050620896421254,
      "grad_norm": 0.39607205986976624,
      "learning_rate": 1.9112695452778346e-06,
      "loss": 0.0095,
      "step": 2475140
    },
    {
      "epoch": 4.050653626859908,
      "grad_norm": 0.45718643069267273,
      "learning_rate": 1.9112036530643174e-06,
      "loss": 0.0113,
      "step": 2475160
    },
    {
      "epoch": 4.05068635729856,
      "grad_norm": 0.24081911146640778,
      "learning_rate": 1.9111377608508005e-06,
      "loss": 0.0072,
      "step": 2475180
    },
    {
      "epoch": 4.050719087737214,
      "grad_norm": 0.214015930891037,
      "learning_rate": 1.9110718686372833e-06,
      "loss": 0.0103,
      "step": 2475200
    },
    {
      "epoch": 4.0507518181758675,
      "grad_norm": 0.22370260953903198,
      "learning_rate": 1.9110059764237664e-06,
      "loss": 0.0089,
      "step": 2475220
    },
    {
      "epoch": 4.05078454861452,
      "grad_norm": 0.1277017444372177,
      "learning_rate": 1.910940084210249e-06,
      "loss": 0.0077,
      "step": 2475240
    },
    {
      "epoch": 4.050817279053174,
      "grad_norm": 0.17079748213291168,
      "learning_rate": 1.910874191996732e-06,
      "loss": 0.0104,
      "step": 2475260
    },
    {
      "epoch": 4.050850009491827,
      "grad_norm": 0.09111574292182922,
      "learning_rate": 1.9108082997832146e-06,
      "loss": 0.0131,
      "step": 2475280
    },
    {
      "epoch": 4.050882739930481,
      "grad_norm": 0.2883293032646179,
      "learning_rate": 1.9107424075696974e-06,
      "loss": 0.0103,
      "step": 2475300
    },
    {
      "epoch": 4.050915470369134,
      "grad_norm": 0.32480794191360474,
      "learning_rate": 1.9106765153561805e-06,
      "loss": 0.0086,
      "step": 2475320
    },
    {
      "epoch": 4.050948200807787,
      "grad_norm": 0.32889270782470703,
      "learning_rate": 1.9106106231426633e-06,
      "loss": 0.0061,
      "step": 2475340
    },
    {
      "epoch": 4.050980931246441,
      "grad_norm": 0.08699113875627518,
      "learning_rate": 1.9105447309291465e-06,
      "loss": 0.0091,
      "step": 2475360
    },
    {
      "epoch": 4.051013661685094,
      "grad_norm": 0.33684486150741577,
      "learning_rate": 1.910478838715629e-06,
      "loss": 0.0063,
      "step": 2475380
    },
    {
      "epoch": 4.051046392123747,
      "grad_norm": 0.2488565295934677,
      "learning_rate": 1.910412946502112e-06,
      "loss": 0.009,
      "step": 2475400
    },
    {
      "epoch": 4.051079122562401,
      "grad_norm": 0.7238748669624329,
      "learning_rate": 1.910347054288595e-06,
      "loss": 0.0087,
      "step": 2475420
    },
    {
      "epoch": 4.0511118530010535,
      "grad_norm": 0.43611234426498413,
      "learning_rate": 1.910281162075078e-06,
      "loss": 0.0116,
      "step": 2475440
    },
    {
      "epoch": 4.051144583439707,
      "grad_norm": 0.17797645926475525,
      "learning_rate": 1.9102152698615606e-06,
      "loss": 0.0093,
      "step": 2475460
    },
    {
      "epoch": 4.051177313878361,
      "grad_norm": 0.2738445997238159,
      "learning_rate": 1.9101493776480433e-06,
      "loss": 0.0086,
      "step": 2475480
    },
    {
      "epoch": 4.051210044317014,
      "grad_norm": 0.22015689313411713,
      "learning_rate": 1.910083485434526e-06,
      "loss": 0.0119,
      "step": 2475500
    },
    {
      "epoch": 4.051242774755667,
      "grad_norm": 0.40637925267219543,
      "learning_rate": 1.9100175932210092e-06,
      "loss": 0.0109,
      "step": 2475520
    },
    {
      "epoch": 4.051275505194321,
      "grad_norm": 0.35163041949272156,
      "learning_rate": 1.9099517010074924e-06,
      "loss": 0.0092,
      "step": 2475540
    },
    {
      "epoch": 4.051308235632974,
      "grad_norm": 0.2725850045681,
      "learning_rate": 1.909885808793975e-06,
      "loss": 0.0086,
      "step": 2475560
    },
    {
      "epoch": 4.051340966071627,
      "grad_norm": 0.2483089566230774,
      "learning_rate": 1.909819916580458e-06,
      "loss": 0.0086,
      "step": 2475580
    },
    {
      "epoch": 4.0513736965102805,
      "grad_norm": 0.09215406328439713,
      "learning_rate": 1.9097540243669406e-06,
      "loss": 0.0121,
      "step": 2475600
    },
    {
      "epoch": 4.051406426948934,
      "grad_norm": 0.2959200441837311,
      "learning_rate": 1.9096881321534238e-06,
      "loss": 0.008,
      "step": 2475620
    },
    {
      "epoch": 4.051439157387588,
      "grad_norm": 0.09160731732845306,
      "learning_rate": 1.9096222399399065e-06,
      "loss": 0.0082,
      "step": 2475640
    },
    {
      "epoch": 4.05147188782624,
      "grad_norm": 0.43533721566200256,
      "learning_rate": 1.9095563477263892e-06,
      "loss": 0.0073,
      "step": 2475660
    },
    {
      "epoch": 4.051504618264894,
      "grad_norm": 0.12861432135105133,
      "learning_rate": 1.909490455512872e-06,
      "loss": 0.0126,
      "step": 2475680
    },
    {
      "epoch": 4.0515373487035475,
      "grad_norm": 0.2141294926404953,
      "learning_rate": 1.909424563299355e-06,
      "loss": 0.0098,
      "step": 2475700
    },
    {
      "epoch": 4.0515700791422,
      "grad_norm": 0.37403157353401184,
      "learning_rate": 1.909358671085838e-06,
      "loss": 0.0101,
      "step": 2475720
    },
    {
      "epoch": 4.051602809580854,
      "grad_norm": 0.5163472294807434,
      "learning_rate": 1.909292778872321e-06,
      "loss": 0.0092,
      "step": 2475740
    },
    {
      "epoch": 4.051635540019507,
      "grad_norm": 0.19131994247436523,
      "learning_rate": 1.9092268866588038e-06,
      "loss": 0.0081,
      "step": 2475760
    },
    {
      "epoch": 4.051668270458161,
      "grad_norm": 0.2894609272480011,
      "learning_rate": 1.9091609944452865e-06,
      "loss": 0.0127,
      "step": 2475780
    },
    {
      "epoch": 4.051701000896814,
      "grad_norm": 0.19486388564109802,
      "learning_rate": 1.9090951022317693e-06,
      "loss": 0.0064,
      "step": 2475800
    },
    {
      "epoch": 4.051733731335467,
      "grad_norm": 0.4013439416885376,
      "learning_rate": 1.9090292100182524e-06,
      "loss": 0.0095,
      "step": 2475820
    },
    {
      "epoch": 4.051766461774121,
      "grad_norm": 0.2007705122232437,
      "learning_rate": 1.908963317804735e-06,
      "loss": 0.012,
      "step": 2475840
    },
    {
      "epoch": 4.051799192212774,
      "grad_norm": 0.19764773547649384,
      "learning_rate": 1.908897425591218e-06,
      "loss": 0.007,
      "step": 2475860
    },
    {
      "epoch": 4.051831922651427,
      "grad_norm": 0.15419326722621918,
      "learning_rate": 1.908831533377701e-06,
      "loss": 0.0068,
      "step": 2475880
    },
    {
      "epoch": 4.051864653090081,
      "grad_norm": 0.335257351398468,
      "learning_rate": 1.908765641164184e-06,
      "loss": 0.009,
      "step": 2475900
    },
    {
      "epoch": 4.051897383528734,
      "grad_norm": 0.18760153651237488,
      "learning_rate": 1.9086997489506666e-06,
      "loss": 0.0096,
      "step": 2475920
    },
    {
      "epoch": 4.051930113967387,
      "grad_norm": 0.8926862478256226,
      "learning_rate": 1.9086338567371497e-06,
      "loss": 0.0093,
      "step": 2475940
    },
    {
      "epoch": 4.051962844406041,
      "grad_norm": 0.4760468006134033,
      "learning_rate": 1.9085679645236325e-06,
      "loss": 0.0134,
      "step": 2475960
    },
    {
      "epoch": 4.051995574844694,
      "grad_norm": 1.3564471006393433,
      "learning_rate": 1.908502072310115e-06,
      "loss": 0.0055,
      "step": 2475980
    },
    {
      "epoch": 4.052028305283347,
      "grad_norm": 0.22601987421512604,
      "learning_rate": 1.908436180096598e-06,
      "loss": 0.0093,
      "step": 2476000
    },
    {
      "epoch": 4.052061035722001,
      "grad_norm": 0.14406903088092804,
      "learning_rate": 1.908370287883081e-06,
      "loss": 0.0104,
      "step": 2476020
    },
    {
      "epoch": 4.052093766160654,
      "grad_norm": 0.14570510387420654,
      "learning_rate": 1.908304395669564e-06,
      "loss": 0.0095,
      "step": 2476040
    },
    {
      "epoch": 4.052126496599308,
      "grad_norm": 0.27417969703674316,
      "learning_rate": 1.908238503456047e-06,
      "loss": 0.0097,
      "step": 2476060
    },
    {
      "epoch": 4.0521592270379605,
      "grad_norm": 0.3950106203556061,
      "learning_rate": 1.9081726112425297e-06,
      "loss": 0.0123,
      "step": 2476080
    },
    {
      "epoch": 4.052191957476614,
      "grad_norm": 0.2555662989616394,
      "learning_rate": 1.9081067190290125e-06,
      "loss": 0.011,
      "step": 2476100
    },
    {
      "epoch": 4.052224687915268,
      "grad_norm": 0.3686172068119049,
      "learning_rate": 1.9080408268154952e-06,
      "loss": 0.0076,
      "step": 2476120
    },
    {
      "epoch": 4.05225741835392,
      "grad_norm": 0.2556755840778351,
      "learning_rate": 1.9079749346019784e-06,
      "loss": 0.0071,
      "step": 2476140
    },
    {
      "epoch": 4.052290148792574,
      "grad_norm": 0.1950826197862625,
      "learning_rate": 1.907909042388461e-06,
      "loss": 0.0114,
      "step": 2476160
    },
    {
      "epoch": 4.052322879231228,
      "grad_norm": 0.13324441015720367,
      "learning_rate": 1.907843150174944e-06,
      "loss": 0.0096,
      "step": 2476180
    },
    {
      "epoch": 4.052355609669881,
      "grad_norm": 0.15706579387187958,
      "learning_rate": 1.9077772579614266e-06,
      "loss": 0.0081,
      "step": 2476200
    },
    {
      "epoch": 4.052388340108534,
      "grad_norm": 0.06040612980723381,
      "learning_rate": 1.9077113657479098e-06,
      "loss": 0.006,
      "step": 2476220
    },
    {
      "epoch": 4.0524210705471875,
      "grad_norm": 0.18919149041175842,
      "learning_rate": 1.907645473534393e-06,
      "loss": 0.0099,
      "step": 2476240
    },
    {
      "epoch": 4.052453800985841,
      "grad_norm": 0.18016454577445984,
      "learning_rate": 1.9075795813208757e-06,
      "loss": 0.0072,
      "step": 2476260
    },
    {
      "epoch": 4.052486531424494,
      "grad_norm": 0.37074199318885803,
      "learning_rate": 1.9075136891073584e-06,
      "loss": 0.0077,
      "step": 2476280
    },
    {
      "epoch": 4.052519261863147,
      "grad_norm": 0.08504895120859146,
      "learning_rate": 1.907447796893841e-06,
      "loss": 0.0086,
      "step": 2476300
    },
    {
      "epoch": 4.052551992301801,
      "grad_norm": 0.17557984590530396,
      "learning_rate": 1.907381904680324e-06,
      "loss": 0.0094,
      "step": 2476320
    },
    {
      "epoch": 4.052584722740455,
      "grad_norm": 0.16656863689422607,
      "learning_rate": 1.9073160124668068e-06,
      "loss": 0.0076,
      "step": 2476340
    },
    {
      "epoch": 4.052617453179107,
      "grad_norm": 0.248809814453125,
      "learning_rate": 1.9072501202532898e-06,
      "loss": 0.0077,
      "step": 2476360
    },
    {
      "epoch": 4.052650183617761,
      "grad_norm": 0.10552015900611877,
      "learning_rate": 1.9071842280397725e-06,
      "loss": 0.0116,
      "step": 2476380
    },
    {
      "epoch": 4.0526829140564145,
      "grad_norm": 0.30780163407325745,
      "learning_rate": 1.9071183358262555e-06,
      "loss": 0.0143,
      "step": 2476400
    },
    {
      "epoch": 4.052715644495067,
      "grad_norm": 0.19455845654010773,
      "learning_rate": 1.9070524436127386e-06,
      "loss": 0.01,
      "step": 2476420
    },
    {
      "epoch": 4.052748374933721,
      "grad_norm": 0.2120075523853302,
      "learning_rate": 1.9069865513992214e-06,
      "loss": 0.0102,
      "step": 2476440
    },
    {
      "epoch": 4.052781105372374,
      "grad_norm": 0.16353291273117065,
      "learning_rate": 1.9069206591857043e-06,
      "loss": 0.0063,
      "step": 2476460
    },
    {
      "epoch": 4.052813835811028,
      "grad_norm": 0.2305123209953308,
      "learning_rate": 1.906854766972187e-06,
      "loss": 0.0081,
      "step": 2476480
    },
    {
      "epoch": 4.052846566249681,
      "grad_norm": 0.19039054214954376,
      "learning_rate": 1.9067888747586698e-06,
      "loss": 0.0069,
      "step": 2476500
    },
    {
      "epoch": 4.052879296688334,
      "grad_norm": 0.17025162279605865,
      "learning_rate": 1.9067229825451528e-06,
      "loss": 0.01,
      "step": 2476520
    },
    {
      "epoch": 4.052912027126988,
      "grad_norm": 0.13681872189044952,
      "learning_rate": 1.9066570903316355e-06,
      "loss": 0.0079,
      "step": 2476540
    },
    {
      "epoch": 4.0529447575656405,
      "grad_norm": 0.35293158888816833,
      "learning_rate": 1.9065911981181185e-06,
      "loss": 0.008,
      "step": 2476560
    },
    {
      "epoch": 4.052977488004294,
      "grad_norm": 0.1281193196773529,
      "learning_rate": 1.9065253059046012e-06,
      "loss": 0.0114,
      "step": 2476580
    },
    {
      "epoch": 4.053010218442948,
      "grad_norm": 0.07206409424543381,
      "learning_rate": 1.9064594136910844e-06,
      "loss": 0.0089,
      "step": 2476600
    },
    {
      "epoch": 4.053042948881601,
      "grad_norm": 0.3894323706626892,
      "learning_rate": 1.9063935214775673e-06,
      "loss": 0.0079,
      "step": 2476620
    },
    {
      "epoch": 4.053075679320254,
      "grad_norm": 0.054321758449077606,
      "learning_rate": 1.90632762926405e-06,
      "loss": 0.0088,
      "step": 2476640
    },
    {
      "epoch": 4.053108409758908,
      "grad_norm": 0.21122756600379944,
      "learning_rate": 1.906261737050533e-06,
      "loss": 0.0086,
      "step": 2476660
    },
    {
      "epoch": 4.053141140197561,
      "grad_norm": 0.433357834815979,
      "learning_rate": 1.9061958448370157e-06,
      "loss": 0.0053,
      "step": 2476680
    },
    {
      "epoch": 4.053173870636214,
      "grad_norm": 0.2795366048812866,
      "learning_rate": 1.9061299526234987e-06,
      "loss": 0.008,
      "step": 2476700
    },
    {
      "epoch": 4.0532066010748675,
      "grad_norm": 0.17245115339756012,
      "learning_rate": 1.9060640604099814e-06,
      "loss": 0.0073,
      "step": 2476720
    },
    {
      "epoch": 4.053239331513521,
      "grad_norm": 0.4733794629573822,
      "learning_rate": 1.9059981681964642e-06,
      "loss": 0.0103,
      "step": 2476740
    },
    {
      "epoch": 4.053272061952175,
      "grad_norm": 0.5306231379508972,
      "learning_rate": 1.9059322759829473e-06,
      "loss": 0.0111,
      "step": 2476760
    },
    {
      "epoch": 4.053304792390827,
      "grad_norm": 0.5217226147651672,
      "learning_rate": 1.9058663837694303e-06,
      "loss": 0.0076,
      "step": 2476780
    },
    {
      "epoch": 4.053337522829481,
      "grad_norm": 0.1835319697856903,
      "learning_rate": 1.905800491555913e-06,
      "loss": 0.0135,
      "step": 2476800
    },
    {
      "epoch": 4.053370253268135,
      "grad_norm": 0.6272450089454651,
      "learning_rate": 1.905734599342396e-06,
      "loss": 0.0112,
      "step": 2476820
    },
    {
      "epoch": 4.053402983706787,
      "grad_norm": 0.384952574968338,
      "learning_rate": 1.9056687071288787e-06,
      "loss": 0.0101,
      "step": 2476840
    },
    {
      "epoch": 4.053435714145441,
      "grad_norm": 0.6703420281410217,
      "learning_rate": 1.9056028149153617e-06,
      "loss": 0.0073,
      "step": 2476860
    },
    {
      "epoch": 4.0534684445840945,
      "grad_norm": 0.09439749270677567,
      "learning_rate": 1.9055369227018444e-06,
      "loss": 0.0085,
      "step": 2476880
    },
    {
      "epoch": 4.053501175022748,
      "grad_norm": 0.3174128532409668,
      "learning_rate": 1.9054710304883274e-06,
      "loss": 0.0136,
      "step": 2476900
    },
    {
      "epoch": 4.053533905461401,
      "grad_norm": 0.13202162086963654,
      "learning_rate": 1.90540513827481e-06,
      "loss": 0.0059,
      "step": 2476920
    },
    {
      "epoch": 4.053566635900054,
      "grad_norm": 0.4609423875808716,
      "learning_rate": 1.9053392460612933e-06,
      "loss": 0.0099,
      "step": 2476940
    },
    {
      "epoch": 4.053599366338708,
      "grad_norm": 0.22819606959819794,
      "learning_rate": 1.905273353847776e-06,
      "loss": 0.0098,
      "step": 2476960
    },
    {
      "epoch": 4.053632096777361,
      "grad_norm": 0.30759042501449585,
      "learning_rate": 1.905207461634259e-06,
      "loss": 0.0093,
      "step": 2476980
    },
    {
      "epoch": 4.053664827216014,
      "grad_norm": 0.3340904414653778,
      "learning_rate": 1.9051415694207417e-06,
      "loss": 0.0084,
      "step": 2477000
    },
    {
      "epoch": 4.053697557654668,
      "grad_norm": 0.15474942326545715,
      "learning_rate": 1.9050756772072247e-06,
      "loss": 0.0135,
      "step": 2477020
    },
    {
      "epoch": 4.053730288093321,
      "grad_norm": 0.8103299140930176,
      "learning_rate": 1.9050097849937074e-06,
      "loss": 0.0127,
      "step": 2477040
    },
    {
      "epoch": 4.053763018531974,
      "grad_norm": 0.1578003615140915,
      "learning_rate": 1.9049438927801903e-06,
      "loss": 0.0073,
      "step": 2477060
    },
    {
      "epoch": 4.053795748970628,
      "grad_norm": 0.1938258558511734,
      "learning_rate": 1.904878000566673e-06,
      "loss": 0.0086,
      "step": 2477080
    },
    {
      "epoch": 4.053828479409281,
      "grad_norm": 0.18595336377620697,
      "learning_rate": 1.904812108353156e-06,
      "loss": 0.0108,
      "step": 2477100
    },
    {
      "epoch": 4.053861209847934,
      "grad_norm": 0.1773243099451065,
      "learning_rate": 1.904746216139639e-06,
      "loss": 0.0065,
      "step": 2477120
    },
    {
      "epoch": 4.053893940286588,
      "grad_norm": 0.201960951089859,
      "learning_rate": 1.904680323926122e-06,
      "loss": 0.0094,
      "step": 2477140
    },
    {
      "epoch": 4.053926670725241,
      "grad_norm": 0.3547213077545166,
      "learning_rate": 1.9046144317126047e-06,
      "loss": 0.0086,
      "step": 2477160
    },
    {
      "epoch": 4.053959401163894,
      "grad_norm": 0.19056148827075958,
      "learning_rate": 1.9045485394990876e-06,
      "loss": 0.0081,
      "step": 2477180
    },
    {
      "epoch": 4.053992131602548,
      "grad_norm": 0.2723631262779236,
      "learning_rate": 1.9044826472855704e-06,
      "loss": 0.0106,
      "step": 2477200
    },
    {
      "epoch": 4.054024862041201,
      "grad_norm": 0.3268004357814789,
      "learning_rate": 1.9044167550720533e-06,
      "loss": 0.0066,
      "step": 2477220
    },
    {
      "epoch": 4.054057592479855,
      "grad_norm": 0.3334922194480896,
      "learning_rate": 1.904350862858536e-06,
      "loss": 0.0099,
      "step": 2477240
    },
    {
      "epoch": 4.0540903229185075,
      "grad_norm": 0.073305644094944,
      "learning_rate": 1.904284970645019e-06,
      "loss": 0.0061,
      "step": 2477260
    },
    {
      "epoch": 4.054123053357161,
      "grad_norm": 0.09978409111499786,
      "learning_rate": 1.9042190784315017e-06,
      "loss": 0.0116,
      "step": 2477280
    },
    {
      "epoch": 4.054155783795815,
      "grad_norm": 0.24002549052238464,
      "learning_rate": 1.904153186217985e-06,
      "loss": 0.0111,
      "step": 2477300
    },
    {
      "epoch": 4.054188514234467,
      "grad_norm": 0.3401836156845093,
      "learning_rate": 1.9040872940044677e-06,
      "loss": 0.0124,
      "step": 2477320
    },
    {
      "epoch": 4.054221244673121,
      "grad_norm": 0.13397638499736786,
      "learning_rate": 1.9040214017909506e-06,
      "loss": 0.0109,
      "step": 2477340
    },
    {
      "epoch": 4.0542539751117745,
      "grad_norm": 0.3686973750591278,
      "learning_rate": 1.9039555095774333e-06,
      "loss": 0.0128,
      "step": 2477360
    },
    {
      "epoch": 4.054286705550428,
      "grad_norm": 0.32989898324012756,
      "learning_rate": 1.9038896173639163e-06,
      "loss": 0.0094,
      "step": 2477380
    },
    {
      "epoch": 4.054319435989081,
      "grad_norm": 0.2885746657848358,
      "learning_rate": 1.903823725150399e-06,
      "loss": 0.0043,
      "step": 2477400
    },
    {
      "epoch": 4.054352166427734,
      "grad_norm": 0.18708638846874237,
      "learning_rate": 1.903757832936882e-06,
      "loss": 0.0084,
      "step": 2477420
    },
    {
      "epoch": 4.054384896866388,
      "grad_norm": 0.42701974511146545,
      "learning_rate": 1.9036919407233647e-06,
      "loss": 0.0096,
      "step": 2477440
    },
    {
      "epoch": 4.054417627305041,
      "grad_norm": 0.4896373152732849,
      "learning_rate": 1.9036260485098477e-06,
      "loss": 0.0109,
      "step": 2477460
    },
    {
      "epoch": 4.054450357743694,
      "grad_norm": 0.28647249937057495,
      "learning_rate": 1.9035601562963306e-06,
      "loss": 0.0088,
      "step": 2477480
    },
    {
      "epoch": 4.054483088182348,
      "grad_norm": 0.26116326451301575,
      "learning_rate": 1.9034942640828136e-06,
      "loss": 0.0062,
      "step": 2477500
    },
    {
      "epoch": 4.0545158186210015,
      "grad_norm": 0.26512014865875244,
      "learning_rate": 1.9034283718692963e-06,
      "loss": 0.0109,
      "step": 2477520
    },
    {
      "epoch": 4.054548549059654,
      "grad_norm": 0.09617554396390915,
      "learning_rate": 1.9033624796557793e-06,
      "loss": 0.0096,
      "step": 2477540
    },
    {
      "epoch": 4.054581279498308,
      "grad_norm": 0.7274169921875,
      "learning_rate": 1.903296587442262e-06,
      "loss": 0.0112,
      "step": 2477560
    },
    {
      "epoch": 4.054614009936961,
      "grad_norm": 0.21056362986564636,
      "learning_rate": 1.903230695228745e-06,
      "loss": 0.0115,
      "step": 2477580
    },
    {
      "epoch": 4.054646740375614,
      "grad_norm": 0.655654788017273,
      "learning_rate": 1.9031648030152277e-06,
      "loss": 0.0064,
      "step": 2477600
    },
    {
      "epoch": 4.054679470814268,
      "grad_norm": 0.13989004492759705,
      "learning_rate": 1.9030989108017107e-06,
      "loss": 0.0107,
      "step": 2477620
    },
    {
      "epoch": 4.054712201252921,
      "grad_norm": 1.5124328136444092,
      "learning_rate": 1.9030330185881938e-06,
      "loss": 0.0099,
      "step": 2477640
    },
    {
      "epoch": 4.054744931691575,
      "grad_norm": 0.47974297404289246,
      "learning_rate": 1.9029671263746766e-06,
      "loss": 0.009,
      "step": 2477660
    },
    {
      "epoch": 4.054777662130228,
      "grad_norm": 0.2629404664039612,
      "learning_rate": 1.9029012341611595e-06,
      "loss": 0.014,
      "step": 2477680
    },
    {
      "epoch": 4.054810392568881,
      "grad_norm": 0.2478255331516266,
      "learning_rate": 1.9028353419476422e-06,
      "loss": 0.0085,
      "step": 2477700
    },
    {
      "epoch": 4.054843123007535,
      "grad_norm": 0.3029397428035736,
      "learning_rate": 1.902769449734125e-06,
      "loss": 0.0072,
      "step": 2477720
    },
    {
      "epoch": 4.0548758534461875,
      "grad_norm": 0.2304825484752655,
      "learning_rate": 1.902703557520608e-06,
      "loss": 0.007,
      "step": 2477740
    },
    {
      "epoch": 4.054908583884841,
      "grad_norm": 0.6345230340957642,
      "learning_rate": 1.9026376653070907e-06,
      "loss": 0.0124,
      "step": 2477760
    },
    {
      "epoch": 4.054941314323495,
      "grad_norm": 0.22318165004253387,
      "learning_rate": 1.9025717730935736e-06,
      "loss": 0.012,
      "step": 2477780
    },
    {
      "epoch": 4.054974044762148,
      "grad_norm": 0.21141833066940308,
      "learning_rate": 1.9025058808800564e-06,
      "loss": 0.0112,
      "step": 2477800
    },
    {
      "epoch": 4.055006775200801,
      "grad_norm": 0.18897080421447754,
      "learning_rate": 1.9024399886665395e-06,
      "loss": 0.0058,
      "step": 2477820
    },
    {
      "epoch": 4.055039505639455,
      "grad_norm": 0.06977913528680801,
      "learning_rate": 1.9023740964530225e-06,
      "loss": 0.008,
      "step": 2477840
    },
    {
      "epoch": 4.055072236078108,
      "grad_norm": 0.29586586356163025,
      "learning_rate": 1.9023082042395052e-06,
      "loss": 0.0117,
      "step": 2477860
    },
    {
      "epoch": 4.055104966516761,
      "grad_norm": 0.5742699503898621,
      "learning_rate": 1.9022423120259882e-06,
      "loss": 0.0083,
      "step": 2477880
    },
    {
      "epoch": 4.0551376969554145,
      "grad_norm": 0.31384286284446716,
      "learning_rate": 1.902176419812471e-06,
      "loss": 0.0072,
      "step": 2477900
    },
    {
      "epoch": 4.055170427394068,
      "grad_norm": 0.17594431340694427,
      "learning_rate": 1.9021105275989537e-06,
      "loss": 0.0072,
      "step": 2477920
    },
    {
      "epoch": 4.055203157832722,
      "grad_norm": 0.19375824928283691,
      "learning_rate": 1.9020446353854366e-06,
      "loss": 0.0094,
      "step": 2477940
    },
    {
      "epoch": 4.055235888271374,
      "grad_norm": 0.5420067310333252,
      "learning_rate": 1.9019787431719193e-06,
      "loss": 0.0137,
      "step": 2477960
    },
    {
      "epoch": 4.055268618710028,
      "grad_norm": 0.38696548342704773,
      "learning_rate": 1.9019128509584023e-06,
      "loss": 0.0074,
      "step": 2477980
    },
    {
      "epoch": 4.055301349148682,
      "grad_norm": 0.30044081807136536,
      "learning_rate": 1.9018469587448855e-06,
      "loss": 0.0064,
      "step": 2478000
    },
    {
      "epoch": 4.055334079587334,
      "grad_norm": 0.1543189138174057,
      "learning_rate": 1.9017810665313682e-06,
      "loss": 0.009,
      "step": 2478020
    },
    {
      "epoch": 4.055366810025988,
      "grad_norm": 0.04664258658885956,
      "learning_rate": 1.9017151743178512e-06,
      "loss": 0.0102,
      "step": 2478040
    },
    {
      "epoch": 4.0553995404646415,
      "grad_norm": 0.3267250061035156,
      "learning_rate": 1.9016492821043339e-06,
      "loss": 0.0087,
      "step": 2478060
    },
    {
      "epoch": 4.055432270903295,
      "grad_norm": 0.5040903091430664,
      "learning_rate": 1.9015833898908168e-06,
      "loss": 0.0084,
      "step": 2478080
    },
    {
      "epoch": 4.055465001341948,
      "grad_norm": 0.1463291198015213,
      "learning_rate": 1.9015174976772996e-06,
      "loss": 0.0073,
      "step": 2478100
    },
    {
      "epoch": 4.055497731780601,
      "grad_norm": 0.10330011695623398,
      "learning_rate": 1.9014516054637825e-06,
      "loss": 0.0178,
      "step": 2478120
    },
    {
      "epoch": 4.055530462219255,
      "grad_norm": 0.13381598889827728,
      "learning_rate": 1.9013857132502653e-06,
      "loss": 0.0068,
      "step": 2478140
    },
    {
      "epoch": 4.055563192657908,
      "grad_norm": 0.3975409269332886,
      "learning_rate": 1.901319821036748e-06,
      "loss": 0.0081,
      "step": 2478160
    },
    {
      "epoch": 4.055595923096561,
      "grad_norm": 0.19580470025539398,
      "learning_rate": 1.9012539288232312e-06,
      "loss": 0.0088,
      "step": 2478180
    },
    {
      "epoch": 4.055628653535215,
      "grad_norm": 0.10047069191932678,
      "learning_rate": 1.9011880366097141e-06,
      "loss": 0.009,
      "step": 2478200
    },
    {
      "epoch": 4.055661383973868,
      "grad_norm": 0.12091230601072311,
      "learning_rate": 1.9011221443961969e-06,
      "loss": 0.007,
      "step": 2478220
    },
    {
      "epoch": 4.055694114412521,
      "grad_norm": 0.13951614499092102,
      "learning_rate": 1.9010562521826798e-06,
      "loss": 0.0095,
      "step": 2478240
    },
    {
      "epoch": 4.055726844851175,
      "grad_norm": 0.16801941394805908,
      "learning_rate": 1.9009903599691626e-06,
      "loss": 0.0128,
      "step": 2478260
    },
    {
      "epoch": 4.055759575289828,
      "grad_norm": 0.27189600467681885,
      "learning_rate": 1.9009244677556455e-06,
      "loss": 0.0126,
      "step": 2478280
    },
    {
      "epoch": 4.055792305728481,
      "grad_norm": 1.1513243913650513,
      "learning_rate": 1.9008585755421283e-06,
      "loss": 0.0079,
      "step": 2478300
    },
    {
      "epoch": 4.055825036167135,
      "grad_norm": 0.2511240243911743,
      "learning_rate": 1.9007926833286112e-06,
      "loss": 0.0087,
      "step": 2478320
    },
    {
      "epoch": 4.055857766605788,
      "grad_norm": 0.1387101113796234,
      "learning_rate": 1.900726791115094e-06,
      "loss": 0.0117,
      "step": 2478340
    },
    {
      "epoch": 4.055890497044442,
      "grad_norm": 0.3036450743675232,
      "learning_rate": 1.9006608989015771e-06,
      "loss": 0.0069,
      "step": 2478360
    },
    {
      "epoch": 4.0559232274830945,
      "grad_norm": 0.17735174298286438,
      "learning_rate": 1.9005950066880598e-06,
      "loss": 0.011,
      "step": 2478380
    },
    {
      "epoch": 4.055955957921748,
      "grad_norm": 0.17409634590148926,
      "learning_rate": 1.9005291144745428e-06,
      "loss": 0.0082,
      "step": 2478400
    },
    {
      "epoch": 4.055988688360402,
      "grad_norm": 0.16137318313121796,
      "learning_rate": 1.9004632222610255e-06,
      "loss": 0.0138,
      "step": 2478420
    },
    {
      "epoch": 4.056021418799054,
      "grad_norm": 0.06867310404777527,
      "learning_rate": 1.9003973300475085e-06,
      "loss": 0.0089,
      "step": 2478440
    },
    {
      "epoch": 4.056054149237708,
      "grad_norm": 0.1393313705921173,
      "learning_rate": 1.9003314378339912e-06,
      "loss": 0.0068,
      "step": 2478460
    },
    {
      "epoch": 4.056086879676362,
      "grad_norm": 0.207672581076622,
      "learning_rate": 1.9002655456204742e-06,
      "loss": 0.0134,
      "step": 2478480
    },
    {
      "epoch": 4.056119610115015,
      "grad_norm": 0.5706000328063965,
      "learning_rate": 1.900199653406957e-06,
      "loss": 0.0109,
      "step": 2478500
    },
    {
      "epoch": 4.056152340553668,
      "grad_norm": 0.3418414890766144,
      "learning_rate": 1.90013376119344e-06,
      "loss": 0.0091,
      "step": 2478520
    },
    {
      "epoch": 4.0561850709923215,
      "grad_norm": 0.10368300974369049,
      "learning_rate": 1.9000678689799228e-06,
      "loss": 0.0091,
      "step": 2478540
    },
    {
      "epoch": 4.056217801430975,
      "grad_norm": 0.09813348203897476,
      "learning_rate": 1.9000019767664058e-06,
      "loss": 0.0079,
      "step": 2478560
    },
    {
      "epoch": 4.056250531869628,
      "grad_norm": 0.8220952153205872,
      "learning_rate": 1.8999360845528885e-06,
      "loss": 0.0096,
      "step": 2478580
    },
    {
      "epoch": 4.056283262308281,
      "grad_norm": 0.17644834518432617,
      "learning_rate": 1.8998701923393715e-06,
      "loss": 0.0067,
      "step": 2478600
    },
    {
      "epoch": 4.056315992746935,
      "grad_norm": 0.1312175691127777,
      "learning_rate": 1.8998043001258542e-06,
      "loss": 0.0081,
      "step": 2478620
    },
    {
      "epoch": 4.056348723185588,
      "grad_norm": 0.19227665662765503,
      "learning_rate": 1.8997384079123372e-06,
      "loss": 0.0106,
      "step": 2478640
    },
    {
      "epoch": 4.056381453624241,
      "grad_norm": 0.06543270498514175,
      "learning_rate": 1.89967251569882e-06,
      "loss": 0.0109,
      "step": 2478660
    },
    {
      "epoch": 4.056414184062895,
      "grad_norm": 0.06113792583346367,
      "learning_rate": 1.8996066234853028e-06,
      "loss": 0.0117,
      "step": 2478680
    },
    {
      "epoch": 4.0564469145015485,
      "grad_norm": 0.1412423700094223,
      "learning_rate": 1.8995407312717858e-06,
      "loss": 0.0103,
      "step": 2478700
    },
    {
      "epoch": 4.056479644940201,
      "grad_norm": 0.2167762815952301,
      "learning_rate": 1.8994748390582688e-06,
      "loss": 0.009,
      "step": 2478720
    },
    {
      "epoch": 4.056512375378855,
      "grad_norm": 0.38046470284461975,
      "learning_rate": 1.8994089468447515e-06,
      "loss": 0.011,
      "step": 2478740
    },
    {
      "epoch": 4.056545105817508,
      "grad_norm": 0.24453561007976532,
      "learning_rate": 1.8993430546312344e-06,
      "loss": 0.0085,
      "step": 2478760
    },
    {
      "epoch": 4.056577836256161,
      "grad_norm": 0.30978909134864807,
      "learning_rate": 1.8992771624177172e-06,
      "loss": 0.0093,
      "step": 2478780
    },
    {
      "epoch": 4.056610566694815,
      "grad_norm": 0.11075877398252487,
      "learning_rate": 1.8992112702042001e-06,
      "loss": 0.0096,
      "step": 2478800
    },
    {
      "epoch": 4.056643297133468,
      "grad_norm": 0.08475746214389801,
      "learning_rate": 1.8991453779906829e-06,
      "loss": 0.01,
      "step": 2478820
    },
    {
      "epoch": 4.056676027572122,
      "grad_norm": 0.14277911186218262,
      "learning_rate": 1.8990794857771658e-06,
      "loss": 0.0088,
      "step": 2478840
    },
    {
      "epoch": 4.056708758010775,
      "grad_norm": 0.22552792727947235,
      "learning_rate": 1.8990135935636486e-06,
      "loss": 0.0135,
      "step": 2478860
    },
    {
      "epoch": 4.056741488449428,
      "grad_norm": 1.0840543508529663,
      "learning_rate": 1.8989477013501317e-06,
      "loss": 0.0098,
      "step": 2478880
    },
    {
      "epoch": 4.056774218888082,
      "grad_norm": 0.19224877655506134,
      "learning_rate": 1.8988818091366145e-06,
      "loss": 0.0066,
      "step": 2478900
    },
    {
      "epoch": 4.0568069493267345,
      "grad_norm": 0.20088323950767517,
      "learning_rate": 1.8988159169230974e-06,
      "loss": 0.0065,
      "step": 2478920
    },
    {
      "epoch": 4.056839679765388,
      "grad_norm": 0.1324542909860611,
      "learning_rate": 1.8987500247095802e-06,
      "loss": 0.0068,
      "step": 2478940
    },
    {
      "epoch": 4.056872410204042,
      "grad_norm": 0.12326978892087936,
      "learning_rate": 1.8986841324960631e-06,
      "loss": 0.0084,
      "step": 2478960
    },
    {
      "epoch": 4.056905140642695,
      "grad_norm": 0.12123669683933258,
      "learning_rate": 1.8986182402825459e-06,
      "loss": 0.0102,
      "step": 2478980
    },
    {
      "epoch": 4.056937871081348,
      "grad_norm": 0.0830889642238617,
      "learning_rate": 1.8985523480690288e-06,
      "loss": 0.0068,
      "step": 2479000
    },
    {
      "epoch": 4.0569706015200016,
      "grad_norm": 0.05464145913720131,
      "learning_rate": 1.8984864558555115e-06,
      "loss": 0.0053,
      "step": 2479020
    },
    {
      "epoch": 4.057003331958655,
      "grad_norm": 0.2188483029603958,
      "learning_rate": 1.8984205636419945e-06,
      "loss": 0.0121,
      "step": 2479040
    },
    {
      "epoch": 4.057036062397308,
      "grad_norm": 0.165805384516716,
      "learning_rate": 1.8983546714284777e-06,
      "loss": 0.0058,
      "step": 2479060
    },
    {
      "epoch": 4.0570687928359614,
      "grad_norm": 0.2490171641111374,
      "learning_rate": 1.8982887792149604e-06,
      "loss": 0.0098,
      "step": 2479080
    },
    {
      "epoch": 4.057101523274615,
      "grad_norm": 0.14442169666290283,
      "learning_rate": 1.8982228870014433e-06,
      "loss": 0.0075,
      "step": 2479100
    },
    {
      "epoch": 4.057134253713269,
      "grad_norm": 0.27372056245803833,
      "learning_rate": 1.898156994787926e-06,
      "loss": 0.0091,
      "step": 2479120
    },
    {
      "epoch": 4.057166984151921,
      "grad_norm": 0.23724061250686646,
      "learning_rate": 1.8980911025744088e-06,
      "loss": 0.0086,
      "step": 2479140
    },
    {
      "epoch": 4.057199714590575,
      "grad_norm": 0.3329407274723053,
      "learning_rate": 1.8980252103608918e-06,
      "loss": 0.0081,
      "step": 2479160
    },
    {
      "epoch": 4.0572324450292285,
      "grad_norm": 0.1896287202835083,
      "learning_rate": 1.8979593181473745e-06,
      "loss": 0.014,
      "step": 2479180
    },
    {
      "epoch": 4.057265175467881,
      "grad_norm": 0.1331050992012024,
      "learning_rate": 1.8978934259338575e-06,
      "loss": 0.0094,
      "step": 2479200
    },
    {
      "epoch": 4.057297905906535,
      "grad_norm": 0.21192695200443268,
      "learning_rate": 1.8978275337203402e-06,
      "loss": 0.0092,
      "step": 2479220
    },
    {
      "epoch": 4.057330636345188,
      "grad_norm": 0.2754910886287689,
      "learning_rate": 1.8977616415068234e-06,
      "loss": 0.0069,
      "step": 2479240
    },
    {
      "epoch": 4.057363366783842,
      "grad_norm": 0.3102701008319855,
      "learning_rate": 1.8976957492933063e-06,
      "loss": 0.0108,
      "step": 2479260
    },
    {
      "epoch": 4.057396097222495,
      "grad_norm": 0.10278692841529846,
      "learning_rate": 1.897629857079789e-06,
      "loss": 0.016,
      "step": 2479280
    },
    {
      "epoch": 4.057428827661148,
      "grad_norm": 0.38385194540023804,
      "learning_rate": 1.897563964866272e-06,
      "loss": 0.0173,
      "step": 2479300
    },
    {
      "epoch": 4.057461558099802,
      "grad_norm": 0.07343081384897232,
      "learning_rate": 1.8974980726527548e-06,
      "loss": 0.0052,
      "step": 2479320
    },
    {
      "epoch": 4.057494288538455,
      "grad_norm": 0.39919421076774597,
      "learning_rate": 1.8974321804392377e-06,
      "loss": 0.0077,
      "step": 2479340
    },
    {
      "epoch": 4.057527018977108,
      "grad_norm": 0.23467320203781128,
      "learning_rate": 1.8973662882257204e-06,
      "loss": 0.0073,
      "step": 2479360
    },
    {
      "epoch": 4.057559749415762,
      "grad_norm": 1.0159848928451538,
      "learning_rate": 1.8973003960122032e-06,
      "loss": 0.0084,
      "step": 2479380
    },
    {
      "epoch": 4.057592479854415,
      "grad_norm": 0.4819920063018799,
      "learning_rate": 1.8972345037986863e-06,
      "loss": 0.0064,
      "step": 2479400
    },
    {
      "epoch": 4.057625210293068,
      "grad_norm": 0.2143940031528473,
      "learning_rate": 1.8971686115851693e-06,
      "loss": 0.0069,
      "step": 2479420
    },
    {
      "epoch": 4.057657940731722,
      "grad_norm": 0.1715487837791443,
      "learning_rate": 1.897102719371652e-06,
      "loss": 0.0072,
      "step": 2479440
    },
    {
      "epoch": 4.057690671170375,
      "grad_norm": 0.23706065118312836,
      "learning_rate": 1.897036827158135e-06,
      "loss": 0.0081,
      "step": 2479460
    },
    {
      "epoch": 4.057723401609028,
      "grad_norm": 0.27433210611343384,
      "learning_rate": 1.8969709349446177e-06,
      "loss": 0.0091,
      "step": 2479480
    },
    {
      "epoch": 4.057756132047682,
      "grad_norm": 0.30803337693214417,
      "learning_rate": 1.8969050427311007e-06,
      "loss": 0.0129,
      "step": 2479500
    },
    {
      "epoch": 4.057788862486335,
      "grad_norm": 0.13253284990787506,
      "learning_rate": 1.8968391505175834e-06,
      "loss": 0.0119,
      "step": 2479520
    },
    {
      "epoch": 4.057821592924989,
      "grad_norm": 0.1420975774526596,
      "learning_rate": 1.8967732583040664e-06,
      "loss": 0.0099,
      "step": 2479540
    },
    {
      "epoch": 4.0578543233636415,
      "grad_norm": 0.238535538315773,
      "learning_rate": 1.8967073660905491e-06,
      "loss": 0.0077,
      "step": 2479560
    },
    {
      "epoch": 4.057887053802295,
      "grad_norm": 0.23852121829986572,
      "learning_rate": 1.8966414738770323e-06,
      "loss": 0.0084,
      "step": 2479580
    },
    {
      "epoch": 4.057919784240949,
      "grad_norm": 0.2910034656524658,
      "learning_rate": 1.896575581663515e-06,
      "loss": 0.0079,
      "step": 2479600
    },
    {
      "epoch": 4.057952514679601,
      "grad_norm": 0.1595635563135147,
      "learning_rate": 1.896509689449998e-06,
      "loss": 0.0095,
      "step": 2479620
    },
    {
      "epoch": 4.057985245118255,
      "grad_norm": 1.730075716972351,
      "learning_rate": 1.8964437972364807e-06,
      "loss": 0.0099,
      "step": 2479640
    },
    {
      "epoch": 4.058017975556909,
      "grad_norm": 0.6149814128875732,
      "learning_rate": 1.8963779050229637e-06,
      "loss": 0.0071,
      "step": 2479660
    },
    {
      "epoch": 4.058050705995562,
      "grad_norm": 0.44534656405448914,
      "learning_rate": 1.8963120128094464e-06,
      "loss": 0.0086,
      "step": 2479680
    },
    {
      "epoch": 4.058083436434215,
      "grad_norm": 0.20074597001075745,
      "learning_rate": 1.8962461205959294e-06,
      "loss": 0.0115,
      "step": 2479700
    },
    {
      "epoch": 4.0581161668728685,
      "grad_norm": 0.201655775308609,
      "learning_rate": 1.896180228382412e-06,
      "loss": 0.0089,
      "step": 2479720
    },
    {
      "epoch": 4.058148897311522,
      "grad_norm": 0.19579103589057922,
      "learning_rate": 1.896114336168895e-06,
      "loss": 0.0108,
      "step": 2479740
    },
    {
      "epoch": 4.058181627750175,
      "grad_norm": 0.18816794455051422,
      "learning_rate": 1.896048443955378e-06,
      "loss": 0.0067,
      "step": 2479760
    },
    {
      "epoch": 4.058214358188828,
      "grad_norm": 0.4160611629486084,
      "learning_rate": 1.895982551741861e-06,
      "loss": 0.0122,
      "step": 2479780
    },
    {
      "epoch": 4.058247088627482,
      "grad_norm": 0.46392232179641724,
      "learning_rate": 1.8959166595283437e-06,
      "loss": 0.0115,
      "step": 2479800
    },
    {
      "epoch": 4.0582798190661356,
      "grad_norm": 0.10763175785541534,
      "learning_rate": 1.8958507673148266e-06,
      "loss": 0.0129,
      "step": 2479820
    },
    {
      "epoch": 4.058312549504788,
      "grad_norm": 0.6123563051223755,
      "learning_rate": 1.8957848751013094e-06,
      "loss": 0.0186,
      "step": 2479840
    },
    {
      "epoch": 4.058345279943442,
      "grad_norm": 0.06913859397172928,
      "learning_rate": 1.8957189828877923e-06,
      "loss": 0.0093,
      "step": 2479860
    },
    {
      "epoch": 4.0583780103820954,
      "grad_norm": 0.23046091198921204,
      "learning_rate": 1.895653090674275e-06,
      "loss": 0.0073,
      "step": 2479880
    },
    {
      "epoch": 4.058410740820748,
      "grad_norm": 0.17035898566246033,
      "learning_rate": 1.895587198460758e-06,
      "loss": 0.0069,
      "step": 2479900
    },
    {
      "epoch": 4.058443471259402,
      "grad_norm": 0.19491463899612427,
      "learning_rate": 1.8955213062472408e-06,
      "loss": 0.0122,
      "step": 2479920
    },
    {
      "epoch": 4.058476201698055,
      "grad_norm": 0.12384332716464996,
      "learning_rate": 1.895455414033724e-06,
      "loss": 0.0056,
      "step": 2479940
    },
    {
      "epoch": 4.058508932136709,
      "grad_norm": 0.07482261210680008,
      "learning_rate": 1.8953895218202067e-06,
      "loss": 0.0059,
      "step": 2479960
    },
    {
      "epoch": 4.058541662575362,
      "grad_norm": 0.11043001711368561,
      "learning_rate": 1.8953236296066896e-06,
      "loss": 0.0088,
      "step": 2479980
    },
    {
      "epoch": 4.058574393014015,
      "grad_norm": 0.3824775218963623,
      "learning_rate": 1.8952577373931724e-06,
      "loss": 0.0092,
      "step": 2480000
    },
    {
      "epoch": 4.058607123452669,
      "grad_norm": 0.2900117039680481,
      "learning_rate": 1.8951918451796553e-06,
      "loss": 0.0097,
      "step": 2480020
    },
    {
      "epoch": 4.0586398538913215,
      "grad_norm": 0.43497589230537415,
      "learning_rate": 1.895125952966138e-06,
      "loss": 0.0099,
      "step": 2480040
    },
    {
      "epoch": 4.058672584329975,
      "grad_norm": 0.4674544930458069,
      "learning_rate": 1.895060060752621e-06,
      "loss": 0.0074,
      "step": 2480060
    },
    {
      "epoch": 4.058705314768629,
      "grad_norm": 0.2616345286369324,
      "learning_rate": 1.8949941685391037e-06,
      "loss": 0.0083,
      "step": 2480080
    },
    {
      "epoch": 4.058738045207281,
      "grad_norm": 0.21536855399608612,
      "learning_rate": 1.8949282763255867e-06,
      "loss": 0.0127,
      "step": 2480100
    },
    {
      "epoch": 4.058770775645935,
      "grad_norm": 0.6412104964256287,
      "learning_rate": 1.8948623841120696e-06,
      "loss": 0.0119,
      "step": 2480120
    },
    {
      "epoch": 4.058803506084589,
      "grad_norm": 0.4266324043273926,
      "learning_rate": 1.8947964918985526e-06,
      "loss": 0.012,
      "step": 2480140
    },
    {
      "epoch": 4.058836236523242,
      "grad_norm": 0.36920666694641113,
      "learning_rate": 1.8947305996850353e-06,
      "loss": 0.0121,
      "step": 2480160
    },
    {
      "epoch": 4.058868966961895,
      "grad_norm": 0.11767901480197906,
      "learning_rate": 1.8946647074715183e-06,
      "loss": 0.0068,
      "step": 2480180
    },
    {
      "epoch": 4.0589016974005485,
      "grad_norm": 0.5709248781204224,
      "learning_rate": 1.894598815258001e-06,
      "loss": 0.0095,
      "step": 2480200
    },
    {
      "epoch": 4.058934427839202,
      "grad_norm": 0.13089652359485626,
      "learning_rate": 1.894532923044484e-06,
      "loss": 0.0084,
      "step": 2480220
    },
    {
      "epoch": 4.058967158277855,
      "grad_norm": 0.20363402366638184,
      "learning_rate": 1.8944670308309667e-06,
      "loss": 0.0095,
      "step": 2480240
    },
    {
      "epoch": 4.058999888716508,
      "grad_norm": 0.10952401906251907,
      "learning_rate": 1.8944011386174497e-06,
      "loss": 0.0075,
      "step": 2480260
    },
    {
      "epoch": 4.059032619155162,
      "grad_norm": 0.3926589787006378,
      "learning_rate": 1.8943352464039328e-06,
      "loss": 0.0109,
      "step": 2480280
    },
    {
      "epoch": 4.059065349593816,
      "grad_norm": 0.2696487307548523,
      "learning_rate": 1.8942693541904156e-06,
      "loss": 0.0132,
      "step": 2480300
    },
    {
      "epoch": 4.059098080032468,
      "grad_norm": 0.34402191638946533,
      "learning_rate": 1.8942034619768985e-06,
      "loss": 0.0058,
      "step": 2480320
    },
    {
      "epoch": 4.059130810471122,
      "grad_norm": 0.555817186832428,
      "learning_rate": 1.8941375697633813e-06,
      "loss": 0.0106,
      "step": 2480340
    },
    {
      "epoch": 4.0591635409097755,
      "grad_norm": 0.07444237172603607,
      "learning_rate": 1.894071677549864e-06,
      "loss": 0.006,
      "step": 2480360
    },
    {
      "epoch": 4.059196271348428,
      "grad_norm": 0.39688581228256226,
      "learning_rate": 1.894005785336347e-06,
      "loss": 0.0089,
      "step": 2480380
    },
    {
      "epoch": 4.059229001787082,
      "grad_norm": 0.14274051785469055,
      "learning_rate": 1.8939398931228297e-06,
      "loss": 0.0085,
      "step": 2480400
    },
    {
      "epoch": 4.059261732225735,
      "grad_norm": 0.13809286057949066,
      "learning_rate": 1.8938740009093126e-06,
      "loss": 0.007,
      "step": 2480420
    },
    {
      "epoch": 4.059294462664389,
      "grad_norm": 0.06922811269760132,
      "learning_rate": 1.8938081086957954e-06,
      "loss": 0.0102,
      "step": 2480440
    },
    {
      "epoch": 4.059327193103042,
      "grad_norm": 0.1274486482143402,
      "learning_rate": 1.8937422164822785e-06,
      "loss": 0.01,
      "step": 2480460
    },
    {
      "epoch": 4.059359923541695,
      "grad_norm": 0.1781964898109436,
      "learning_rate": 1.8936763242687615e-06,
      "loss": 0.0094,
      "step": 2480480
    },
    {
      "epoch": 4.059392653980349,
      "grad_norm": 0.31234613060951233,
      "learning_rate": 1.8936104320552442e-06,
      "loss": 0.0085,
      "step": 2480500
    },
    {
      "epoch": 4.059425384419002,
      "grad_norm": 0.11722753196954727,
      "learning_rate": 1.8935445398417272e-06,
      "loss": 0.0091,
      "step": 2480520
    },
    {
      "epoch": 4.059458114857655,
      "grad_norm": 0.1322236806154251,
      "learning_rate": 1.89347864762821e-06,
      "loss": 0.0107,
      "step": 2480540
    },
    {
      "epoch": 4.059490845296309,
      "grad_norm": 0.10317883640527725,
      "learning_rate": 1.8934127554146927e-06,
      "loss": 0.0085,
      "step": 2480560
    },
    {
      "epoch": 4.059523575734962,
      "grad_norm": 0.14297696948051453,
      "learning_rate": 1.8933468632011756e-06,
      "loss": 0.0107,
      "step": 2480580
    },
    {
      "epoch": 4.059556306173615,
      "grad_norm": 0.21621020138263702,
      "learning_rate": 1.8932809709876584e-06,
      "loss": 0.0088,
      "step": 2480600
    },
    {
      "epoch": 4.059589036612269,
      "grad_norm": 0.21685735881328583,
      "learning_rate": 1.8932150787741413e-06,
      "loss": 0.0085,
      "step": 2480620
    },
    {
      "epoch": 4.059621767050922,
      "grad_norm": 0.1826734095811844,
      "learning_rate": 1.8931491865606245e-06,
      "loss": 0.0055,
      "step": 2480640
    },
    {
      "epoch": 4.059654497489575,
      "grad_norm": 0.3133062422275543,
      "learning_rate": 1.8930832943471072e-06,
      "loss": 0.0081,
      "step": 2480660
    },
    {
      "epoch": 4.059687227928229,
      "grad_norm": 0.21262921392917633,
      "learning_rate": 1.8930174021335902e-06,
      "loss": 0.0096,
      "step": 2480680
    },
    {
      "epoch": 4.059719958366882,
      "grad_norm": 0.20334483683109283,
      "learning_rate": 1.892951509920073e-06,
      "loss": 0.01,
      "step": 2480700
    },
    {
      "epoch": 4.059752688805536,
      "grad_norm": 0.10999710112810135,
      "learning_rate": 1.8928856177065559e-06,
      "loss": 0.007,
      "step": 2480720
    },
    {
      "epoch": 4.0597854192441885,
      "grad_norm": 0.2379007339477539,
      "learning_rate": 1.8928197254930386e-06,
      "loss": 0.01,
      "step": 2480740
    },
    {
      "epoch": 4.059818149682842,
      "grad_norm": 0.27318742871284485,
      "learning_rate": 1.8927538332795215e-06,
      "loss": 0.0087,
      "step": 2480760
    },
    {
      "epoch": 4.059850880121496,
      "grad_norm": 0.2650395929813385,
      "learning_rate": 1.8926879410660043e-06,
      "loss": 0.0095,
      "step": 2480780
    },
    {
      "epoch": 4.059883610560148,
      "grad_norm": 0.3524336516857147,
      "learning_rate": 1.892622048852487e-06,
      "loss": 0.008,
      "step": 2480800
    },
    {
      "epoch": 4.059916340998802,
      "grad_norm": 0.1782875508069992,
      "learning_rate": 1.8925561566389702e-06,
      "loss": 0.0087,
      "step": 2480820
    },
    {
      "epoch": 4.0599490714374555,
      "grad_norm": 0.21974530816078186,
      "learning_rate": 1.8924902644254531e-06,
      "loss": 0.0094,
      "step": 2480840
    },
    {
      "epoch": 4.059981801876109,
      "grad_norm": 0.2767765522003174,
      "learning_rate": 1.8924243722119359e-06,
      "loss": 0.0063,
      "step": 2480860
    },
    {
      "epoch": 4.060014532314762,
      "grad_norm": 0.14240799844264984,
      "learning_rate": 1.8923584799984188e-06,
      "loss": 0.0081,
      "step": 2480880
    },
    {
      "epoch": 4.060047262753415,
      "grad_norm": 0.12649770081043243,
      "learning_rate": 1.8922925877849016e-06,
      "loss": 0.0081,
      "step": 2480900
    },
    {
      "epoch": 4.060079993192069,
      "grad_norm": 0.33147531747817993,
      "learning_rate": 1.8922266955713845e-06,
      "loss": 0.0115,
      "step": 2480920
    },
    {
      "epoch": 4.060112723630722,
      "grad_norm": 0.49119892716407776,
      "learning_rate": 1.8921608033578673e-06,
      "loss": 0.014,
      "step": 2480940
    },
    {
      "epoch": 4.060145454069375,
      "grad_norm": 0.19508689641952515,
      "learning_rate": 1.8920949111443502e-06,
      "loss": 0.0075,
      "step": 2480960
    },
    {
      "epoch": 4.060178184508029,
      "grad_norm": 0.276848703622818,
      "learning_rate": 1.892029018930833e-06,
      "loss": 0.0086,
      "step": 2480980
    },
    {
      "epoch": 4.0602109149466825,
      "grad_norm": 0.2531469166278839,
      "learning_rate": 1.8919631267173161e-06,
      "loss": 0.0098,
      "step": 2481000
    },
    {
      "epoch": 4.060243645385335,
      "grad_norm": 0.19353847205638885,
      "learning_rate": 1.8918972345037989e-06,
      "loss": 0.0087,
      "step": 2481020
    },
    {
      "epoch": 4.060276375823989,
      "grad_norm": 0.32909491658210754,
      "learning_rate": 1.8918313422902818e-06,
      "loss": 0.0092,
      "step": 2481040
    },
    {
      "epoch": 4.060309106262642,
      "grad_norm": 0.4328368306159973,
      "learning_rate": 1.8917654500767645e-06,
      "loss": 0.0087,
      "step": 2481060
    },
    {
      "epoch": 4.060341836701295,
      "grad_norm": 0.33197924494743347,
      "learning_rate": 1.8916995578632475e-06,
      "loss": 0.0097,
      "step": 2481080
    },
    {
      "epoch": 4.060374567139949,
      "grad_norm": 0.7967948317527771,
      "learning_rate": 1.8916336656497302e-06,
      "loss": 0.0091,
      "step": 2481100
    },
    {
      "epoch": 4.060407297578602,
      "grad_norm": 0.09641886502504349,
      "learning_rate": 1.8915677734362132e-06,
      "loss": 0.0128,
      "step": 2481120
    },
    {
      "epoch": 4.060440028017256,
      "grad_norm": 0.062287136912345886,
      "learning_rate": 1.891501881222696e-06,
      "loss": 0.0102,
      "step": 2481140
    },
    {
      "epoch": 4.060472758455909,
      "grad_norm": 1.3731040954589844,
      "learning_rate": 1.891435989009179e-06,
      "loss": 0.0107,
      "step": 2481160
    },
    {
      "epoch": 4.060505488894562,
      "grad_norm": 0.27436187863349915,
      "learning_rate": 1.8913700967956618e-06,
      "loss": 0.0086,
      "step": 2481180
    },
    {
      "epoch": 4.060538219333216,
      "grad_norm": 0.22377736866474152,
      "learning_rate": 1.8913042045821448e-06,
      "loss": 0.0117,
      "step": 2481200
    },
    {
      "epoch": 4.0605709497718685,
      "grad_norm": 0.25921767950057983,
      "learning_rate": 1.8912383123686275e-06,
      "loss": 0.0098,
      "step": 2481220
    },
    {
      "epoch": 4.060603680210522,
      "grad_norm": 0.1236182153224945,
      "learning_rate": 1.8911724201551105e-06,
      "loss": 0.0085,
      "step": 2481240
    },
    {
      "epoch": 4.060636410649176,
      "grad_norm": 0.22624951601028442,
      "learning_rate": 1.8911065279415932e-06,
      "loss": 0.0062,
      "step": 2481260
    },
    {
      "epoch": 4.060669141087829,
      "grad_norm": 0.21989360451698303,
      "learning_rate": 1.8910406357280762e-06,
      "loss": 0.0077,
      "step": 2481280
    },
    {
      "epoch": 4.060701871526482,
      "grad_norm": 0.3798332214355469,
      "learning_rate": 1.890974743514559e-06,
      "loss": 0.0105,
      "step": 2481300
    },
    {
      "epoch": 4.060734601965136,
      "grad_norm": 0.17978620529174805,
      "learning_rate": 1.8909088513010419e-06,
      "loss": 0.0071,
      "step": 2481320
    },
    {
      "epoch": 4.060767332403789,
      "grad_norm": 0.1953287124633789,
      "learning_rate": 1.8908429590875248e-06,
      "loss": 0.0068,
      "step": 2481340
    },
    {
      "epoch": 4.060800062842442,
      "grad_norm": 0.2051543891429901,
      "learning_rate": 1.8907770668740078e-06,
      "loss": 0.0071,
      "step": 2481360
    },
    {
      "epoch": 4.0608327932810955,
      "grad_norm": 0.13561902940273285,
      "learning_rate": 1.8907111746604905e-06,
      "loss": 0.012,
      "step": 2481380
    },
    {
      "epoch": 4.060865523719749,
      "grad_norm": 0.13546885550022125,
      "learning_rate": 1.8906452824469735e-06,
      "loss": 0.0071,
      "step": 2481400
    },
    {
      "epoch": 4.060898254158403,
      "grad_norm": 0.1549072116613388,
      "learning_rate": 1.8905793902334562e-06,
      "loss": 0.0057,
      "step": 2481420
    },
    {
      "epoch": 4.060930984597055,
      "grad_norm": 0.14874476194381714,
      "learning_rate": 1.8905134980199391e-06,
      "loss": 0.0072,
      "step": 2481440
    },
    {
      "epoch": 4.060963715035709,
      "grad_norm": 0.13460703194141388,
      "learning_rate": 1.8904476058064219e-06,
      "loss": 0.0128,
      "step": 2481460
    },
    {
      "epoch": 4.060996445474363,
      "grad_norm": 0.4485491216182709,
      "learning_rate": 1.8903817135929048e-06,
      "loss": 0.0106,
      "step": 2481480
    },
    {
      "epoch": 4.061029175913015,
      "grad_norm": 0.40360894799232483,
      "learning_rate": 1.8903158213793876e-06,
      "loss": 0.0174,
      "step": 2481500
    },
    {
      "epoch": 4.061061906351669,
      "grad_norm": 0.12206284701824188,
      "learning_rate": 1.8902499291658707e-06,
      "loss": 0.0101,
      "step": 2481520
    },
    {
      "epoch": 4.0610946367903225,
      "grad_norm": 0.12630124390125275,
      "learning_rate": 1.8901840369523537e-06,
      "loss": 0.0159,
      "step": 2481540
    },
    {
      "epoch": 4.061127367228975,
      "grad_norm": 0.5583906769752502,
      "learning_rate": 1.8901181447388364e-06,
      "loss": 0.0083,
      "step": 2481560
    },
    {
      "epoch": 4.061160097667629,
      "grad_norm": 0.20005010068416595,
      "learning_rate": 1.8900522525253192e-06,
      "loss": 0.0094,
      "step": 2481580
    },
    {
      "epoch": 4.061192828106282,
      "grad_norm": 0.08318650722503662,
      "learning_rate": 1.8899863603118021e-06,
      "loss": 0.0107,
      "step": 2481600
    },
    {
      "epoch": 4.061225558544936,
      "grad_norm": 0.3064122498035431,
      "learning_rate": 1.8899204680982849e-06,
      "loss": 0.0093,
      "step": 2481620
    },
    {
      "epoch": 4.061258288983589,
      "grad_norm": 0.48115259408950806,
      "learning_rate": 1.8898545758847678e-06,
      "loss": 0.0138,
      "step": 2481640
    },
    {
      "epoch": 4.061291019422242,
      "grad_norm": 0.12809203565120697,
      "learning_rate": 1.8897886836712506e-06,
      "loss": 0.0106,
      "step": 2481660
    },
    {
      "epoch": 4.061323749860896,
      "grad_norm": 0.15193690359592438,
      "learning_rate": 1.8897227914577335e-06,
      "loss": 0.0091,
      "step": 2481680
    },
    {
      "epoch": 4.0613564802995485,
      "grad_norm": 0.24381330609321594,
      "learning_rate": 1.8896568992442167e-06,
      "loss": 0.0095,
      "step": 2481700
    },
    {
      "epoch": 4.061389210738202,
      "grad_norm": 0.1835765689611435,
      "learning_rate": 1.8895910070306994e-06,
      "loss": 0.0094,
      "step": 2481720
    },
    {
      "epoch": 4.061421941176856,
      "grad_norm": 0.2162655144929886,
      "learning_rate": 1.8895251148171824e-06,
      "loss": 0.0066,
      "step": 2481740
    },
    {
      "epoch": 4.061454671615509,
      "grad_norm": 0.13356322050094604,
      "learning_rate": 1.889459222603665e-06,
      "loss": 0.0098,
      "step": 2481760
    },
    {
      "epoch": 4.061487402054162,
      "grad_norm": 0.13207219541072845,
      "learning_rate": 1.8893933303901478e-06,
      "loss": 0.0069,
      "step": 2481780
    },
    {
      "epoch": 4.061520132492816,
      "grad_norm": 0.20917166769504547,
      "learning_rate": 1.8893274381766308e-06,
      "loss": 0.0089,
      "step": 2481800
    },
    {
      "epoch": 4.061552862931469,
      "grad_norm": 0.12058956921100616,
      "learning_rate": 1.8892615459631135e-06,
      "loss": 0.0054,
      "step": 2481820
    },
    {
      "epoch": 4.061585593370122,
      "grad_norm": 0.09118808805942535,
      "learning_rate": 1.8891956537495965e-06,
      "loss": 0.0068,
      "step": 2481840
    },
    {
      "epoch": 4.0616183238087755,
      "grad_norm": 0.24010196328163147,
      "learning_rate": 1.8891297615360792e-06,
      "loss": 0.0057,
      "step": 2481860
    },
    {
      "epoch": 4.061651054247429,
      "grad_norm": 0.06527244299650192,
      "learning_rate": 1.8890638693225624e-06,
      "loss": 0.0079,
      "step": 2481880
    },
    {
      "epoch": 4.061683784686083,
      "grad_norm": 0.3582214117050171,
      "learning_rate": 1.8889979771090453e-06,
      "loss": 0.0095,
      "step": 2481900
    },
    {
      "epoch": 4.061716515124735,
      "grad_norm": 0.24597124755382538,
      "learning_rate": 1.888932084895528e-06,
      "loss": 0.0099,
      "step": 2481920
    },
    {
      "epoch": 4.061749245563389,
      "grad_norm": 0.10780610889196396,
      "learning_rate": 1.888866192682011e-06,
      "loss": 0.0067,
      "step": 2481940
    },
    {
      "epoch": 4.061781976002043,
      "grad_norm": 0.10843458771705627,
      "learning_rate": 1.8888003004684938e-06,
      "loss": 0.0084,
      "step": 2481960
    },
    {
      "epoch": 4.061814706440695,
      "grad_norm": 0.16188320517539978,
      "learning_rate": 1.8887344082549767e-06,
      "loss": 0.0098,
      "step": 2481980
    },
    {
      "epoch": 4.061847436879349,
      "grad_norm": 0.2253323793411255,
      "learning_rate": 1.8886685160414595e-06,
      "loss": 0.0085,
      "step": 2482000
    },
    {
      "epoch": 4.0618801673180025,
      "grad_norm": 0.14735211431980133,
      "learning_rate": 1.8886026238279422e-06,
      "loss": 0.0073,
      "step": 2482020
    },
    {
      "epoch": 4.061912897756656,
      "grad_norm": 0.14664390683174133,
      "learning_rate": 1.8885367316144254e-06,
      "loss": 0.0121,
      "step": 2482040
    },
    {
      "epoch": 4.061945628195309,
      "grad_norm": 0.10654628276824951,
      "learning_rate": 1.8884708394009083e-06,
      "loss": 0.0083,
      "step": 2482060
    },
    {
      "epoch": 4.061978358633962,
      "grad_norm": 0.07426600903272629,
      "learning_rate": 1.888404947187391e-06,
      "loss": 0.0085,
      "step": 2482080
    },
    {
      "epoch": 4.062011089072616,
      "grad_norm": 0.15539556741714478,
      "learning_rate": 1.888339054973874e-06,
      "loss": 0.0072,
      "step": 2482100
    },
    {
      "epoch": 4.062043819511269,
      "grad_norm": 0.35527074337005615,
      "learning_rate": 1.8882731627603567e-06,
      "loss": 0.0075,
      "step": 2482120
    },
    {
      "epoch": 4.062076549949922,
      "grad_norm": 0.09456954896450043,
      "learning_rate": 1.8882072705468397e-06,
      "loss": 0.0128,
      "step": 2482140
    },
    {
      "epoch": 4.062109280388576,
      "grad_norm": 0.17103220522403717,
      "learning_rate": 1.8881413783333224e-06,
      "loss": 0.0115,
      "step": 2482160
    },
    {
      "epoch": 4.0621420108272295,
      "grad_norm": 0.16295918822288513,
      "learning_rate": 1.8880754861198054e-06,
      "loss": 0.0098,
      "step": 2482180
    },
    {
      "epoch": 4.062174741265882,
      "grad_norm": 0.4495104253292084,
      "learning_rate": 1.8880095939062881e-06,
      "loss": 0.01,
      "step": 2482200
    },
    {
      "epoch": 4.062207471704536,
      "grad_norm": 0.7221174836158752,
      "learning_rate": 1.8879437016927713e-06,
      "loss": 0.0101,
      "step": 2482220
    },
    {
      "epoch": 4.062240202143189,
      "grad_norm": 0.26847103238105774,
      "learning_rate": 1.887877809479254e-06,
      "loss": 0.0075,
      "step": 2482240
    },
    {
      "epoch": 4.062272932581842,
      "grad_norm": 0.2896336615085602,
      "learning_rate": 1.887811917265737e-06,
      "loss": 0.0064,
      "step": 2482260
    },
    {
      "epoch": 4.062305663020496,
      "grad_norm": 0.23593774437904358,
      "learning_rate": 1.8877460250522197e-06,
      "loss": 0.0065,
      "step": 2482280
    },
    {
      "epoch": 4.062338393459149,
      "grad_norm": 0.7722190022468567,
      "learning_rate": 1.8876801328387027e-06,
      "loss": 0.0093,
      "step": 2482300
    },
    {
      "epoch": 4.062371123897803,
      "grad_norm": 0.3133322596549988,
      "learning_rate": 1.8876142406251854e-06,
      "loss": 0.0085,
      "step": 2482320
    },
    {
      "epoch": 4.062403854336456,
      "grad_norm": 0.2786673605442047,
      "learning_rate": 1.8875483484116684e-06,
      "loss": 0.0076,
      "step": 2482340
    },
    {
      "epoch": 4.062436584775109,
      "grad_norm": 0.15395720303058624,
      "learning_rate": 1.887482456198151e-06,
      "loss": 0.0127,
      "step": 2482360
    },
    {
      "epoch": 4.062469315213763,
      "grad_norm": 0.09790774434804916,
      "learning_rate": 1.887416563984634e-06,
      "loss": 0.008,
      "step": 2482380
    },
    {
      "epoch": 4.0625020456524155,
      "grad_norm": 0.07487567514181137,
      "learning_rate": 1.887350671771117e-06,
      "loss": 0.0088,
      "step": 2482400
    },
    {
      "epoch": 4.062534776091069,
      "grad_norm": 0.14839352667331696,
      "learning_rate": 1.8872847795576e-06,
      "loss": 0.0078,
      "step": 2482420
    },
    {
      "epoch": 4.062567506529723,
      "grad_norm": 0.14621369540691376,
      "learning_rate": 1.8872188873440827e-06,
      "loss": 0.0078,
      "step": 2482440
    },
    {
      "epoch": 4.062600236968376,
      "grad_norm": 0.32256314158439636,
      "learning_rate": 1.8871529951305656e-06,
      "loss": 0.0131,
      "step": 2482460
    },
    {
      "epoch": 4.062632967407029,
      "grad_norm": 0.6025042533874512,
      "learning_rate": 1.8870871029170484e-06,
      "loss": 0.009,
      "step": 2482480
    },
    {
      "epoch": 4.0626656978456825,
      "grad_norm": 0.512938380241394,
      "learning_rate": 1.8870212107035313e-06,
      "loss": 0.0103,
      "step": 2482500
    },
    {
      "epoch": 4.062698428284336,
      "grad_norm": 0.192206472158432,
      "learning_rate": 1.886955318490014e-06,
      "loss": 0.0071,
      "step": 2482520
    },
    {
      "epoch": 4.062731158722989,
      "grad_norm": 0.5944742560386658,
      "learning_rate": 1.886889426276497e-06,
      "loss": 0.0084,
      "step": 2482540
    },
    {
      "epoch": 4.062763889161642,
      "grad_norm": 0.5601383447647095,
      "learning_rate": 1.8868235340629798e-06,
      "loss": 0.0147,
      "step": 2482560
    },
    {
      "epoch": 4.062796619600296,
      "grad_norm": 0.286968469619751,
      "learning_rate": 1.886757641849463e-06,
      "loss": 0.0096,
      "step": 2482580
    },
    {
      "epoch": 4.06282935003895,
      "grad_norm": 0.3026239573955536,
      "learning_rate": 1.8866917496359457e-06,
      "loss": 0.0075,
      "step": 2482600
    },
    {
      "epoch": 4.062862080477602,
      "grad_norm": 0.3849417567253113,
      "learning_rate": 1.8866258574224286e-06,
      "loss": 0.0103,
      "step": 2482620
    },
    {
      "epoch": 4.062894810916256,
      "grad_norm": 0.3276582956314087,
      "learning_rate": 1.8865599652089114e-06,
      "loss": 0.0136,
      "step": 2482640
    },
    {
      "epoch": 4.0629275413549095,
      "grad_norm": 0.2725491523742676,
      "learning_rate": 1.8864940729953943e-06,
      "loss": 0.0124,
      "step": 2482660
    },
    {
      "epoch": 4.062960271793562,
      "grad_norm": 0.3211348056793213,
      "learning_rate": 1.886428180781877e-06,
      "loss": 0.0091,
      "step": 2482680
    },
    {
      "epoch": 4.062993002232216,
      "grad_norm": 0.1220489889383316,
      "learning_rate": 1.88636228856836e-06,
      "loss": 0.0076,
      "step": 2482700
    },
    {
      "epoch": 4.063025732670869,
      "grad_norm": 0.36093443632125854,
      "learning_rate": 1.8862963963548427e-06,
      "loss": 0.007,
      "step": 2482720
    },
    {
      "epoch": 4.063058463109523,
      "grad_norm": 0.08762840926647186,
      "learning_rate": 1.8862305041413257e-06,
      "loss": 0.012,
      "step": 2482740
    },
    {
      "epoch": 4.063091193548176,
      "grad_norm": 0.7512195706367493,
      "learning_rate": 1.8861646119278087e-06,
      "loss": 0.0169,
      "step": 2482760
    },
    {
      "epoch": 4.063123923986829,
      "grad_norm": 0.19548051059246063,
      "learning_rate": 1.8860987197142916e-06,
      "loss": 0.01,
      "step": 2482780
    },
    {
      "epoch": 4.063156654425483,
      "grad_norm": 0.24632541835308075,
      "learning_rate": 1.8860328275007743e-06,
      "loss": 0.0064,
      "step": 2482800
    },
    {
      "epoch": 4.063189384864136,
      "grad_norm": 0.3110790252685547,
      "learning_rate": 1.8859669352872573e-06,
      "loss": 0.0101,
      "step": 2482820
    },
    {
      "epoch": 4.063222115302789,
      "grad_norm": 0.22200815379619598,
      "learning_rate": 1.88590104307374e-06,
      "loss": 0.0091,
      "step": 2482840
    },
    {
      "epoch": 4.063254845741443,
      "grad_norm": 0.16021813452243805,
      "learning_rate": 1.885835150860223e-06,
      "loss": 0.0055,
      "step": 2482860
    },
    {
      "epoch": 4.063287576180096,
      "grad_norm": 0.37989482283592224,
      "learning_rate": 1.8857692586467057e-06,
      "loss": 0.0105,
      "step": 2482880
    },
    {
      "epoch": 4.063320306618749,
      "grad_norm": 0.23040087521076202,
      "learning_rate": 1.8857033664331887e-06,
      "loss": 0.0126,
      "step": 2482900
    },
    {
      "epoch": 4.063353037057403,
      "grad_norm": 0.23396873474121094,
      "learning_rate": 1.8856374742196718e-06,
      "loss": 0.0122,
      "step": 2482920
    },
    {
      "epoch": 4.063385767496056,
      "grad_norm": 0.44168394804000854,
      "learning_rate": 1.8855715820061546e-06,
      "loss": 0.0092,
      "step": 2482940
    },
    {
      "epoch": 4.063418497934709,
      "grad_norm": 0.1565287709236145,
      "learning_rate": 1.8855056897926375e-06,
      "loss": 0.0099,
      "step": 2482960
    },
    {
      "epoch": 4.063451228373363,
      "grad_norm": 0.13976506888866425,
      "learning_rate": 1.8854397975791203e-06,
      "loss": 0.0083,
      "step": 2482980
    },
    {
      "epoch": 4.063483958812016,
      "grad_norm": 0.47017285227775574,
      "learning_rate": 1.885373905365603e-06,
      "loss": 0.0069,
      "step": 2483000
    },
    {
      "epoch": 4.063516689250669,
      "grad_norm": 0.44331490993499756,
      "learning_rate": 1.885308013152086e-06,
      "loss": 0.0103,
      "step": 2483020
    },
    {
      "epoch": 4.0635494196893225,
      "grad_norm": 0.28519827127456665,
      "learning_rate": 1.8852421209385687e-06,
      "loss": 0.0133,
      "step": 2483040
    },
    {
      "epoch": 4.063582150127976,
      "grad_norm": 0.15663950145244598,
      "learning_rate": 1.8851762287250517e-06,
      "loss": 0.006,
      "step": 2483060
    },
    {
      "epoch": 4.06361488056663,
      "grad_norm": 0.12057230621576309,
      "learning_rate": 1.8851103365115344e-06,
      "loss": 0.0067,
      "step": 2483080
    },
    {
      "epoch": 4.063647611005282,
      "grad_norm": 0.12867562472820282,
      "learning_rate": 1.8850444442980176e-06,
      "loss": 0.0133,
      "step": 2483100
    },
    {
      "epoch": 4.063680341443936,
      "grad_norm": 0.09959204494953156,
      "learning_rate": 1.8849785520845005e-06,
      "loss": 0.0078,
      "step": 2483120
    },
    {
      "epoch": 4.06371307188259,
      "grad_norm": 0.22881877422332764,
      "learning_rate": 1.8849126598709832e-06,
      "loss": 0.0074,
      "step": 2483140
    },
    {
      "epoch": 4.063745802321242,
      "grad_norm": 0.23171250522136688,
      "learning_rate": 1.8848467676574662e-06,
      "loss": 0.0115,
      "step": 2483160
    },
    {
      "epoch": 4.063778532759896,
      "grad_norm": 0.139394149184227,
      "learning_rate": 1.884780875443949e-06,
      "loss": 0.0105,
      "step": 2483180
    },
    {
      "epoch": 4.0638112631985495,
      "grad_norm": 0.6909946799278259,
      "learning_rate": 1.8847149832304317e-06,
      "loss": 0.0127,
      "step": 2483200
    },
    {
      "epoch": 4.063843993637203,
      "grad_norm": 0.39310169219970703,
      "learning_rate": 1.8846490910169146e-06,
      "loss": 0.0106,
      "step": 2483220
    },
    {
      "epoch": 4.063876724075856,
      "grad_norm": 0.06770948320627213,
      "learning_rate": 1.8845831988033974e-06,
      "loss": 0.0145,
      "step": 2483240
    },
    {
      "epoch": 4.063909454514509,
      "grad_norm": 0.10723162442445755,
      "learning_rate": 1.8845173065898803e-06,
      "loss": 0.0101,
      "step": 2483260
    },
    {
      "epoch": 4.063942184953163,
      "grad_norm": 0.40965011715888977,
      "learning_rate": 1.8844514143763635e-06,
      "loss": 0.0135,
      "step": 2483280
    },
    {
      "epoch": 4.063974915391816,
      "grad_norm": 0.2411818653345108,
      "learning_rate": 1.8843855221628462e-06,
      "loss": 0.0092,
      "step": 2483300
    },
    {
      "epoch": 4.064007645830469,
      "grad_norm": 0.09914645552635193,
      "learning_rate": 1.8843196299493292e-06,
      "loss": 0.0053,
      "step": 2483320
    },
    {
      "epoch": 4.064040376269123,
      "grad_norm": 0.3059573471546173,
      "learning_rate": 1.884253737735812e-06,
      "loss": 0.009,
      "step": 2483340
    },
    {
      "epoch": 4.064073106707776,
      "grad_norm": 0.15535297989845276,
      "learning_rate": 1.8841878455222949e-06,
      "loss": 0.0086,
      "step": 2483360
    },
    {
      "epoch": 4.064105837146429,
      "grad_norm": 0.2459544837474823,
      "learning_rate": 1.8841219533087776e-06,
      "loss": 0.0083,
      "step": 2483380
    },
    {
      "epoch": 4.064138567585083,
      "grad_norm": 0.0813923254609108,
      "learning_rate": 1.8840560610952606e-06,
      "loss": 0.0075,
      "step": 2483400
    },
    {
      "epoch": 4.064171298023736,
      "grad_norm": 0.2500807046890259,
      "learning_rate": 1.8839901688817433e-06,
      "loss": 0.0081,
      "step": 2483420
    },
    {
      "epoch": 4.064204028462389,
      "grad_norm": 0.5701866149902344,
      "learning_rate": 1.883924276668226e-06,
      "loss": 0.0084,
      "step": 2483440
    },
    {
      "epoch": 4.064236758901043,
      "grad_norm": 0.22967074811458588,
      "learning_rate": 1.8838583844547092e-06,
      "loss": 0.0064,
      "step": 2483460
    },
    {
      "epoch": 4.064269489339696,
      "grad_norm": 0.09287866204977036,
      "learning_rate": 1.8837924922411922e-06,
      "loss": 0.0063,
      "step": 2483480
    },
    {
      "epoch": 4.06430221977835,
      "grad_norm": 0.22735080122947693,
      "learning_rate": 1.8837266000276749e-06,
      "loss": 0.01,
      "step": 2483500
    },
    {
      "epoch": 4.0643349502170025,
      "grad_norm": 0.3532602787017822,
      "learning_rate": 1.8836607078141578e-06,
      "loss": 0.0094,
      "step": 2483520
    },
    {
      "epoch": 4.064367680655656,
      "grad_norm": 0.32661017775535583,
      "learning_rate": 1.8835948156006406e-06,
      "loss": 0.0072,
      "step": 2483540
    },
    {
      "epoch": 4.06440041109431,
      "grad_norm": 0.5297463536262512,
      "learning_rate": 1.8835289233871235e-06,
      "loss": 0.0082,
      "step": 2483560
    },
    {
      "epoch": 4.064433141532962,
      "grad_norm": 1.0545973777770996,
      "learning_rate": 1.8834630311736063e-06,
      "loss": 0.0085,
      "step": 2483580
    },
    {
      "epoch": 4.064465871971616,
      "grad_norm": 0.2043713927268982,
      "learning_rate": 1.8833971389600892e-06,
      "loss": 0.009,
      "step": 2483600
    },
    {
      "epoch": 4.06449860241027,
      "grad_norm": 0.15505972504615784,
      "learning_rate": 1.8833312467465722e-06,
      "loss": 0.006,
      "step": 2483620
    },
    {
      "epoch": 4.064531332848923,
      "grad_norm": 0.16012579202651978,
      "learning_rate": 1.8832653545330551e-06,
      "loss": 0.0111,
      "step": 2483640
    },
    {
      "epoch": 4.064564063287576,
      "grad_norm": 2.143409252166748,
      "learning_rate": 1.8831994623195379e-06,
      "loss": 0.0073,
      "step": 2483660
    },
    {
      "epoch": 4.0645967937262295,
      "grad_norm": 0.12446778267621994,
      "learning_rate": 1.8831335701060208e-06,
      "loss": 0.01,
      "step": 2483680
    },
    {
      "epoch": 4.064629524164883,
      "grad_norm": 0.2194977104663849,
      "learning_rate": 1.8830676778925036e-06,
      "loss": 0.0113,
      "step": 2483700
    },
    {
      "epoch": 4.064662254603536,
      "grad_norm": 0.07625995576381683,
      "learning_rate": 1.8830017856789865e-06,
      "loss": 0.0129,
      "step": 2483720
    },
    {
      "epoch": 4.064694985042189,
      "grad_norm": 0.08108022809028625,
      "learning_rate": 1.8829358934654693e-06,
      "loss": 0.0118,
      "step": 2483740
    },
    {
      "epoch": 4.064727715480843,
      "grad_norm": 0.18805095553398132,
      "learning_rate": 1.8828700012519522e-06,
      "loss": 0.007,
      "step": 2483760
    },
    {
      "epoch": 4.064760445919497,
      "grad_norm": 0.22921447455883026,
      "learning_rate": 1.882804109038435e-06,
      "loss": 0.0091,
      "step": 2483780
    },
    {
      "epoch": 4.064793176358149,
      "grad_norm": 0.45011383295059204,
      "learning_rate": 1.882738216824918e-06,
      "loss": 0.0095,
      "step": 2483800
    },
    {
      "epoch": 4.064825906796803,
      "grad_norm": 0.13600879907608032,
      "learning_rate": 1.8826723246114008e-06,
      "loss": 0.0077,
      "step": 2483820
    },
    {
      "epoch": 4.0648586372354565,
      "grad_norm": 0.095128633081913,
      "learning_rate": 1.8826064323978838e-06,
      "loss": 0.0107,
      "step": 2483840
    },
    {
      "epoch": 4.064891367674109,
      "grad_norm": 0.0831311047077179,
      "learning_rate": 1.8825405401843665e-06,
      "loss": 0.0099,
      "step": 2483860
    },
    {
      "epoch": 4.064924098112763,
      "grad_norm": 0.1517130434513092,
      "learning_rate": 1.8824746479708495e-06,
      "loss": 0.0089,
      "step": 2483880
    },
    {
      "epoch": 4.064956828551416,
      "grad_norm": 0.23046743869781494,
      "learning_rate": 1.8824087557573322e-06,
      "loss": 0.0065,
      "step": 2483900
    },
    {
      "epoch": 4.06498955899007,
      "grad_norm": 0.10133926570415497,
      "learning_rate": 1.8823428635438152e-06,
      "loss": 0.0115,
      "step": 2483920
    },
    {
      "epoch": 4.065022289428723,
      "grad_norm": 0.5662698149681091,
      "learning_rate": 1.882276971330298e-06,
      "loss": 0.0078,
      "step": 2483940
    },
    {
      "epoch": 4.065055019867376,
      "grad_norm": 0.23302307724952698,
      "learning_rate": 1.8822110791167809e-06,
      "loss": 0.0091,
      "step": 2483960
    },
    {
      "epoch": 4.06508775030603,
      "grad_norm": 0.3502025902271271,
      "learning_rate": 1.8821451869032638e-06,
      "loss": 0.0132,
      "step": 2483980
    },
    {
      "epoch": 4.065120480744683,
      "grad_norm": 0.07706773281097412,
      "learning_rate": 1.8820792946897468e-06,
      "loss": 0.0082,
      "step": 2484000
    },
    {
      "epoch": 4.065153211183336,
      "grad_norm": 0.2881457805633545,
      "learning_rate": 1.8820134024762295e-06,
      "loss": 0.0087,
      "step": 2484020
    },
    {
      "epoch": 4.06518594162199,
      "grad_norm": 0.17646624147891998,
      "learning_rate": 1.8819475102627125e-06,
      "loss": 0.0071,
      "step": 2484040
    },
    {
      "epoch": 4.065218672060643,
      "grad_norm": 0.30076706409454346,
      "learning_rate": 1.8818816180491952e-06,
      "loss": 0.007,
      "step": 2484060
    },
    {
      "epoch": 4.065251402499296,
      "grad_norm": 0.16047175228595734,
      "learning_rate": 1.8818157258356782e-06,
      "loss": 0.0122,
      "step": 2484080
    },
    {
      "epoch": 4.06528413293795,
      "grad_norm": 0.033498454838991165,
      "learning_rate": 1.881749833622161e-06,
      "loss": 0.0088,
      "step": 2484100
    },
    {
      "epoch": 4.065316863376603,
      "grad_norm": 0.3646528720855713,
      "learning_rate": 1.8816839414086438e-06,
      "loss": 0.0098,
      "step": 2484120
    },
    {
      "epoch": 4.065349593815256,
      "grad_norm": 0.16920721530914307,
      "learning_rate": 1.8816180491951266e-06,
      "loss": 0.0066,
      "step": 2484140
    },
    {
      "epoch": 4.0653823242539096,
      "grad_norm": 0.228785440325737,
      "learning_rate": 1.8815521569816097e-06,
      "loss": 0.0067,
      "step": 2484160
    },
    {
      "epoch": 4.065415054692563,
      "grad_norm": 0.40345561504364014,
      "learning_rate": 1.8814862647680927e-06,
      "loss": 0.0123,
      "step": 2484180
    },
    {
      "epoch": 4.065447785131217,
      "grad_norm": 0.11576808989048004,
      "learning_rate": 1.8814203725545754e-06,
      "loss": 0.0121,
      "step": 2484200
    },
    {
      "epoch": 4.0654805155698694,
      "grad_norm": 0.4101001024246216,
      "learning_rate": 1.8813544803410582e-06,
      "loss": 0.0148,
      "step": 2484220
    },
    {
      "epoch": 4.065513246008523,
      "grad_norm": 0.1835348755121231,
      "learning_rate": 1.8812885881275411e-06,
      "loss": 0.01,
      "step": 2484240
    },
    {
      "epoch": 4.065545976447177,
      "grad_norm": 0.3589887022972107,
      "learning_rate": 1.8812226959140239e-06,
      "loss": 0.0066,
      "step": 2484260
    },
    {
      "epoch": 4.065578706885829,
      "grad_norm": 0.34591206908226013,
      "learning_rate": 1.8811568037005068e-06,
      "loss": 0.0169,
      "step": 2484280
    },
    {
      "epoch": 4.065611437324483,
      "grad_norm": 0.14530371129512787,
      "learning_rate": 1.8810909114869896e-06,
      "loss": 0.0108,
      "step": 2484300
    },
    {
      "epoch": 4.0656441677631365,
      "grad_norm": 0.5212814807891846,
      "learning_rate": 1.8810250192734725e-06,
      "loss": 0.0108,
      "step": 2484320
    },
    {
      "epoch": 4.06567689820179,
      "grad_norm": 0.04847118631005287,
      "learning_rate": 1.8809591270599557e-06,
      "loss": 0.009,
      "step": 2484340
    },
    {
      "epoch": 4.065709628640443,
      "grad_norm": 0.11922218650579453,
      "learning_rate": 1.8808932348464384e-06,
      "loss": 0.0062,
      "step": 2484360
    },
    {
      "epoch": 4.065742359079096,
      "grad_norm": 0.1699179857969284,
      "learning_rate": 1.8808273426329214e-06,
      "loss": 0.0108,
      "step": 2484380
    },
    {
      "epoch": 4.06577508951775,
      "grad_norm": 0.1811368465423584,
      "learning_rate": 1.8807614504194041e-06,
      "loss": 0.0059,
      "step": 2484400
    },
    {
      "epoch": 4.065807819956403,
      "grad_norm": 0.22442804276943207,
      "learning_rate": 1.8806955582058868e-06,
      "loss": 0.0065,
      "step": 2484420
    },
    {
      "epoch": 4.065840550395056,
      "grad_norm": 0.6791605949401855,
      "learning_rate": 1.8806296659923698e-06,
      "loss": 0.0103,
      "step": 2484440
    },
    {
      "epoch": 4.06587328083371,
      "grad_norm": 0.113788902759552,
      "learning_rate": 1.8805637737788525e-06,
      "loss": 0.0084,
      "step": 2484460
    },
    {
      "epoch": 4.0659060112723635,
      "grad_norm": 0.11945552378892899,
      "learning_rate": 1.8804978815653355e-06,
      "loss": 0.0106,
      "step": 2484480
    },
    {
      "epoch": 4.065938741711016,
      "grad_norm": 0.254250168800354,
      "learning_rate": 1.8804319893518187e-06,
      "loss": 0.0128,
      "step": 2484500
    },
    {
      "epoch": 4.06597147214967,
      "grad_norm": 0.30704453587532043,
      "learning_rate": 1.8803660971383014e-06,
      "loss": 0.0071,
      "step": 2484520
    },
    {
      "epoch": 4.066004202588323,
      "grad_norm": 0.2448931187391281,
      "learning_rate": 1.8803002049247843e-06,
      "loss": 0.0068,
      "step": 2484540
    },
    {
      "epoch": 4.066036933026976,
      "grad_norm": 0.319863885641098,
      "learning_rate": 1.880234312711267e-06,
      "loss": 0.0098,
      "step": 2484560
    },
    {
      "epoch": 4.06606966346563,
      "grad_norm": 0.1194775402545929,
      "learning_rate": 1.88016842049775e-06,
      "loss": 0.0082,
      "step": 2484580
    },
    {
      "epoch": 4.066102393904283,
      "grad_norm": 0.661536455154419,
      "learning_rate": 1.8801025282842328e-06,
      "loss": 0.0103,
      "step": 2484600
    },
    {
      "epoch": 4.066135124342937,
      "grad_norm": 0.20764347910881042,
      "learning_rate": 1.8800366360707157e-06,
      "loss": 0.0061,
      "step": 2484620
    },
    {
      "epoch": 4.06616785478159,
      "grad_norm": 0.6002263426780701,
      "learning_rate": 1.8799707438571985e-06,
      "loss": 0.0073,
      "step": 2484640
    },
    {
      "epoch": 4.066200585220243,
      "grad_norm": 0.14419694244861603,
      "learning_rate": 1.8799048516436812e-06,
      "loss": 0.0058,
      "step": 2484660
    },
    {
      "epoch": 4.066233315658897,
      "grad_norm": 0.22720551490783691,
      "learning_rate": 1.8798389594301644e-06,
      "loss": 0.007,
      "step": 2484680
    },
    {
      "epoch": 4.0662660460975495,
      "grad_norm": 0.20409135520458221,
      "learning_rate": 1.8797730672166473e-06,
      "loss": 0.0105,
      "step": 2484700
    },
    {
      "epoch": 4.066298776536203,
      "grad_norm": 0.3815332055091858,
      "learning_rate": 1.87970717500313e-06,
      "loss": 0.0094,
      "step": 2484720
    },
    {
      "epoch": 4.066331506974857,
      "grad_norm": 0.15957199037075043,
      "learning_rate": 1.879641282789613e-06,
      "loss": 0.0087,
      "step": 2484740
    },
    {
      "epoch": 4.066364237413509,
      "grad_norm": 0.34403520822525024,
      "learning_rate": 1.8795753905760958e-06,
      "loss": 0.0088,
      "step": 2484760
    },
    {
      "epoch": 4.066396967852163,
      "grad_norm": 0.8110169768333435,
      "learning_rate": 1.8795094983625787e-06,
      "loss": 0.0089,
      "step": 2484780
    },
    {
      "epoch": 4.066429698290817,
      "grad_norm": 0.22083939611911774,
      "learning_rate": 1.8794436061490614e-06,
      "loss": 0.0066,
      "step": 2484800
    },
    {
      "epoch": 4.06646242872947,
      "grad_norm": 0.43710222840309143,
      "learning_rate": 1.8793777139355444e-06,
      "loss": 0.0085,
      "step": 2484820
    },
    {
      "epoch": 4.066495159168123,
      "grad_norm": 0.25286394357681274,
      "learning_rate": 1.8793118217220271e-06,
      "loss": 0.0105,
      "step": 2484840
    },
    {
      "epoch": 4.0665278896067765,
      "grad_norm": 0.2917766273021698,
      "learning_rate": 1.8792459295085103e-06,
      "loss": 0.0084,
      "step": 2484860
    },
    {
      "epoch": 4.06656062004543,
      "grad_norm": 0.31268182396888733,
      "learning_rate": 1.879180037294993e-06,
      "loss": 0.0101,
      "step": 2484880
    },
    {
      "epoch": 4.066593350484083,
      "grad_norm": 0.7066778540611267,
      "learning_rate": 1.879114145081476e-06,
      "loss": 0.0105,
      "step": 2484900
    },
    {
      "epoch": 4.066626080922736,
      "grad_norm": 0.7753735780715942,
      "learning_rate": 1.8790482528679587e-06,
      "loss": 0.0117,
      "step": 2484920
    },
    {
      "epoch": 4.06665881136139,
      "grad_norm": 0.1310318410396576,
      "learning_rate": 1.8789823606544417e-06,
      "loss": 0.0097,
      "step": 2484940
    },
    {
      "epoch": 4.0666915418000436,
      "grad_norm": 0.20303259789943695,
      "learning_rate": 1.8789164684409244e-06,
      "loss": 0.01,
      "step": 2484960
    },
    {
      "epoch": 4.066724272238696,
      "grad_norm": 0.15117241442203522,
      "learning_rate": 1.8788505762274074e-06,
      "loss": 0.0061,
      "step": 2484980
    },
    {
      "epoch": 4.06675700267735,
      "grad_norm": 0.04984771087765694,
      "learning_rate": 1.8787846840138901e-06,
      "loss": 0.0098,
      "step": 2485000
    },
    {
      "epoch": 4.0667897331160034,
      "grad_norm": 0.39687010645866394,
      "learning_rate": 1.878718791800373e-06,
      "loss": 0.0108,
      "step": 2485020
    },
    {
      "epoch": 4.066822463554656,
      "grad_norm": 0.2739059031009674,
      "learning_rate": 1.878652899586856e-06,
      "loss": 0.009,
      "step": 2485040
    },
    {
      "epoch": 4.06685519399331,
      "grad_norm": 0.8325156569480896,
      "learning_rate": 1.878587007373339e-06,
      "loss": 0.0124,
      "step": 2485060
    },
    {
      "epoch": 4.066887924431963,
      "grad_norm": 0.6224021315574646,
      "learning_rate": 1.8785211151598217e-06,
      "loss": 0.0078,
      "step": 2485080
    },
    {
      "epoch": 4.066920654870617,
      "grad_norm": 0.5727613568305969,
      "learning_rate": 1.8784552229463047e-06,
      "loss": 0.0071,
      "step": 2485100
    },
    {
      "epoch": 4.06695338530927,
      "grad_norm": 0.1762482076883316,
      "learning_rate": 1.8783893307327874e-06,
      "loss": 0.0096,
      "step": 2485120
    },
    {
      "epoch": 4.066986115747923,
      "grad_norm": 0.31723564863204956,
      "learning_rate": 1.8783234385192703e-06,
      "loss": 0.0066,
      "step": 2485140
    },
    {
      "epoch": 4.067018846186577,
      "grad_norm": 0.23543182015419006,
      "learning_rate": 1.878257546305753e-06,
      "loss": 0.009,
      "step": 2485160
    },
    {
      "epoch": 4.0670515766252295,
      "grad_norm": 0.08892004936933517,
      "learning_rate": 1.878191654092236e-06,
      "loss": 0.0105,
      "step": 2485180
    },
    {
      "epoch": 4.067084307063883,
      "grad_norm": 0.1789240539073944,
      "learning_rate": 1.8781257618787188e-06,
      "loss": 0.0055,
      "step": 2485200
    },
    {
      "epoch": 4.067117037502537,
      "grad_norm": 0.26450496912002563,
      "learning_rate": 1.878059869665202e-06,
      "loss": 0.0067,
      "step": 2485220
    },
    {
      "epoch": 4.06714976794119,
      "grad_norm": 0.7685301899909973,
      "learning_rate": 1.8779939774516847e-06,
      "loss": 0.0131,
      "step": 2485240
    },
    {
      "epoch": 4.067182498379843,
      "grad_norm": 0.8025800585746765,
      "learning_rate": 1.8779280852381676e-06,
      "loss": 0.0099,
      "step": 2485260
    },
    {
      "epoch": 4.067215228818497,
      "grad_norm": 0.7351618409156799,
      "learning_rate": 1.8778621930246504e-06,
      "loss": 0.0138,
      "step": 2485280
    },
    {
      "epoch": 4.06724795925715,
      "grad_norm": 0.23259302973747253,
      "learning_rate": 1.8777963008111333e-06,
      "loss": 0.0096,
      "step": 2485300
    },
    {
      "epoch": 4.067280689695803,
      "grad_norm": 0.16643083095550537,
      "learning_rate": 1.877730408597616e-06,
      "loss": 0.0197,
      "step": 2485320
    },
    {
      "epoch": 4.0673134201344565,
      "grad_norm": 0.15909595787525177,
      "learning_rate": 1.877664516384099e-06,
      "loss": 0.0078,
      "step": 2485340
    },
    {
      "epoch": 4.06734615057311,
      "grad_norm": 0.23445011675357819,
      "learning_rate": 1.8775986241705818e-06,
      "loss": 0.0081,
      "step": 2485360
    },
    {
      "epoch": 4.067378881011764,
      "grad_norm": 0.2292523831129074,
      "learning_rate": 1.877532731957065e-06,
      "loss": 0.011,
      "step": 2485380
    },
    {
      "epoch": 4.067411611450416,
      "grad_norm": 0.2814333438873291,
      "learning_rate": 1.8774668397435477e-06,
      "loss": 0.0126,
      "step": 2485400
    },
    {
      "epoch": 4.06744434188907,
      "grad_norm": 0.18361298739910126,
      "learning_rate": 1.8774009475300306e-06,
      "loss": 0.0094,
      "step": 2485420
    },
    {
      "epoch": 4.067477072327724,
      "grad_norm": 0.2659570276737213,
      "learning_rate": 1.8773350553165134e-06,
      "loss": 0.0114,
      "step": 2485440
    },
    {
      "epoch": 4.067509802766376,
      "grad_norm": 0.3119525611400604,
      "learning_rate": 1.8772691631029963e-06,
      "loss": 0.0099,
      "step": 2485460
    },
    {
      "epoch": 4.06754253320503,
      "grad_norm": 0.2024828940629959,
      "learning_rate": 1.877203270889479e-06,
      "loss": 0.0083,
      "step": 2485480
    },
    {
      "epoch": 4.0675752636436835,
      "grad_norm": 0.5224347710609436,
      "learning_rate": 1.877137378675962e-06,
      "loss": 0.01,
      "step": 2485500
    },
    {
      "epoch": 4.067607994082337,
      "grad_norm": 0.05310516059398651,
      "learning_rate": 1.8770714864624447e-06,
      "loss": 0.0096,
      "step": 2485520
    },
    {
      "epoch": 4.06764072452099,
      "grad_norm": 0.40134531259536743,
      "learning_rate": 1.8770055942489277e-06,
      "loss": 0.0147,
      "step": 2485540
    },
    {
      "epoch": 4.067673454959643,
      "grad_norm": 0.5286167860031128,
      "learning_rate": 1.8769397020354108e-06,
      "loss": 0.0126,
      "step": 2485560
    },
    {
      "epoch": 4.067706185398297,
      "grad_norm": 0.11874961853027344,
      "learning_rate": 1.8768738098218936e-06,
      "loss": 0.0101,
      "step": 2485580
    },
    {
      "epoch": 4.06773891583695,
      "grad_norm": 0.1637433022260666,
      "learning_rate": 1.8768079176083765e-06,
      "loss": 0.0062,
      "step": 2485600
    },
    {
      "epoch": 4.067771646275603,
      "grad_norm": 0.08880064636468887,
      "learning_rate": 1.8767420253948593e-06,
      "loss": 0.0106,
      "step": 2485620
    },
    {
      "epoch": 4.067804376714257,
      "grad_norm": 0.48592284321784973,
      "learning_rate": 1.876676133181342e-06,
      "loss": 0.0059,
      "step": 2485640
    },
    {
      "epoch": 4.0678371071529105,
      "grad_norm": 0.44471630454063416,
      "learning_rate": 1.876610240967825e-06,
      "loss": 0.0148,
      "step": 2485660
    },
    {
      "epoch": 4.067869837591563,
      "grad_norm": 0.15616312623023987,
      "learning_rate": 1.8765443487543077e-06,
      "loss": 0.0127,
      "step": 2485680
    },
    {
      "epoch": 4.067902568030217,
      "grad_norm": 0.22153116762638092,
      "learning_rate": 1.8764784565407907e-06,
      "loss": 0.0087,
      "step": 2485700
    },
    {
      "epoch": 4.06793529846887,
      "grad_norm": 0.16931071877479553,
      "learning_rate": 1.8764125643272734e-06,
      "loss": 0.0079,
      "step": 2485720
    },
    {
      "epoch": 4.067968028907523,
      "grad_norm": 0.11991124600172043,
      "learning_rate": 1.8763466721137566e-06,
      "loss": 0.0097,
      "step": 2485740
    },
    {
      "epoch": 4.068000759346177,
      "grad_norm": 0.24430899322032928,
      "learning_rate": 1.8762807799002395e-06,
      "loss": 0.0101,
      "step": 2485760
    },
    {
      "epoch": 4.06803348978483,
      "grad_norm": 0.3090682327747345,
      "learning_rate": 1.8762148876867223e-06,
      "loss": 0.0163,
      "step": 2485780
    },
    {
      "epoch": 4.068066220223484,
      "grad_norm": 0.16218611598014832,
      "learning_rate": 1.8761489954732052e-06,
      "loss": 0.0086,
      "step": 2485800
    },
    {
      "epoch": 4.068098950662137,
      "grad_norm": 0.09494607150554657,
      "learning_rate": 1.876083103259688e-06,
      "loss": 0.0102,
      "step": 2485820
    },
    {
      "epoch": 4.06813168110079,
      "grad_norm": 0.3651203513145447,
      "learning_rate": 1.876017211046171e-06,
      "loss": 0.0079,
      "step": 2485840
    },
    {
      "epoch": 4.068164411539444,
      "grad_norm": 0.06010860577225685,
      "learning_rate": 1.8759513188326536e-06,
      "loss": 0.0082,
      "step": 2485860
    },
    {
      "epoch": 4.0681971419780965,
      "grad_norm": 0.2662031650543213,
      "learning_rate": 1.8758854266191364e-06,
      "loss": 0.0078,
      "step": 2485880
    },
    {
      "epoch": 4.06822987241675,
      "grad_norm": 0.3635019361972809,
      "learning_rate": 1.8758195344056193e-06,
      "loss": 0.0065,
      "step": 2485900
    },
    {
      "epoch": 4.068262602855404,
      "grad_norm": 0.11186354607343674,
      "learning_rate": 1.8757536421921025e-06,
      "loss": 0.0093,
      "step": 2485920
    },
    {
      "epoch": 4.068295333294057,
      "grad_norm": 0.33603355288505554,
      "learning_rate": 1.8756877499785852e-06,
      "loss": 0.0062,
      "step": 2485940
    },
    {
      "epoch": 4.06832806373271,
      "grad_norm": 0.36750394105911255,
      "learning_rate": 1.8756218577650682e-06,
      "loss": 0.0112,
      "step": 2485960
    },
    {
      "epoch": 4.0683607941713635,
      "grad_norm": 0.1372527927160263,
      "learning_rate": 1.875555965551551e-06,
      "loss": 0.0082,
      "step": 2485980
    },
    {
      "epoch": 4.068393524610017,
      "grad_norm": 0.30002033710479736,
      "learning_rate": 1.8754900733380339e-06,
      "loss": 0.0057,
      "step": 2486000
    },
    {
      "epoch": 4.06842625504867,
      "grad_norm": 0.1449204832315445,
      "learning_rate": 1.8754241811245166e-06,
      "loss": 0.0059,
      "step": 2486020
    },
    {
      "epoch": 4.068458985487323,
      "grad_norm": 0.3198868930339813,
      "learning_rate": 1.8753582889109996e-06,
      "loss": 0.0077,
      "step": 2486040
    },
    {
      "epoch": 4.068491715925977,
      "grad_norm": 0.29653793573379517,
      "learning_rate": 1.8752923966974823e-06,
      "loss": 0.0114,
      "step": 2486060
    },
    {
      "epoch": 4.068524446364631,
      "grad_norm": 0.30121487379074097,
      "learning_rate": 1.875226504483965e-06,
      "loss": 0.0084,
      "step": 2486080
    },
    {
      "epoch": 4.068557176803283,
      "grad_norm": 0.1267615705728531,
      "learning_rate": 1.8751606122704482e-06,
      "loss": 0.0179,
      "step": 2486100
    },
    {
      "epoch": 4.068589907241937,
      "grad_norm": 0.351381778717041,
      "learning_rate": 1.8750947200569312e-06,
      "loss": 0.0091,
      "step": 2486120
    },
    {
      "epoch": 4.0686226376805905,
      "grad_norm": 0.14539794623851776,
      "learning_rate": 1.875028827843414e-06,
      "loss": 0.0084,
      "step": 2486140
    },
    {
      "epoch": 4.068655368119243,
      "grad_norm": 0.08647473156452179,
      "learning_rate": 1.8749629356298969e-06,
      "loss": 0.007,
      "step": 2486160
    },
    {
      "epoch": 4.068688098557897,
      "grad_norm": 0.26309990882873535,
      "learning_rate": 1.8748970434163796e-06,
      "loss": 0.0096,
      "step": 2486180
    },
    {
      "epoch": 4.06872082899655,
      "grad_norm": 0.09646566212177277,
      "learning_rate": 1.8748311512028625e-06,
      "loss": 0.0087,
      "step": 2486200
    },
    {
      "epoch": 4.068753559435203,
      "grad_norm": 0.2277485430240631,
      "learning_rate": 1.8747652589893453e-06,
      "loss": 0.0098,
      "step": 2486220
    },
    {
      "epoch": 4.068786289873857,
      "grad_norm": 0.491549551486969,
      "learning_rate": 1.8746993667758282e-06,
      "loss": 0.0097,
      "step": 2486240
    },
    {
      "epoch": 4.06881902031251,
      "grad_norm": 0.11082915961742401,
      "learning_rate": 1.8746334745623112e-06,
      "loss": 0.0121,
      "step": 2486260
    },
    {
      "epoch": 4.068851750751164,
      "grad_norm": 0.4691583812236786,
      "learning_rate": 1.8745675823487941e-06,
      "loss": 0.0096,
      "step": 2486280
    },
    {
      "epoch": 4.068884481189817,
      "grad_norm": 0.17211924493312836,
      "learning_rate": 1.8745016901352769e-06,
      "loss": 0.0059,
      "step": 2486300
    },
    {
      "epoch": 4.06891721162847,
      "grad_norm": 0.36172816157341003,
      "learning_rate": 1.8744357979217598e-06,
      "loss": 0.009,
      "step": 2486320
    },
    {
      "epoch": 4.068949942067124,
      "grad_norm": 0.44650083780288696,
      "learning_rate": 1.8743699057082426e-06,
      "loss": 0.0096,
      "step": 2486340
    },
    {
      "epoch": 4.0689826725057765,
      "grad_norm": 0.31631380319595337,
      "learning_rate": 1.8743040134947255e-06,
      "loss": 0.0079,
      "step": 2486360
    },
    {
      "epoch": 4.06901540294443,
      "grad_norm": 0.18422657251358032,
      "learning_rate": 1.8742381212812083e-06,
      "loss": 0.0097,
      "step": 2486380
    },
    {
      "epoch": 4.069048133383084,
      "grad_norm": 0.4194602072238922,
      "learning_rate": 1.8741722290676912e-06,
      "loss": 0.0115,
      "step": 2486400
    },
    {
      "epoch": 4.069080863821737,
      "grad_norm": 0.5734637975692749,
      "learning_rate": 1.874106336854174e-06,
      "loss": 0.0149,
      "step": 2486420
    },
    {
      "epoch": 4.06911359426039,
      "grad_norm": 0.22236379981040955,
      "learning_rate": 1.8740404446406571e-06,
      "loss": 0.0103,
      "step": 2486440
    },
    {
      "epoch": 4.069146324699044,
      "grad_norm": 0.27738019824028015,
      "learning_rate": 1.8739745524271399e-06,
      "loss": 0.0119,
      "step": 2486460
    },
    {
      "epoch": 4.069179055137697,
      "grad_norm": 0.18112628161907196,
      "learning_rate": 1.8739086602136228e-06,
      "loss": 0.0162,
      "step": 2486480
    },
    {
      "epoch": 4.06921178557635,
      "grad_norm": 0.09544035792350769,
      "learning_rate": 1.8738427680001055e-06,
      "loss": 0.0079,
      "step": 2486500
    },
    {
      "epoch": 4.0692445160150035,
      "grad_norm": 0.0874907523393631,
      "learning_rate": 1.8737768757865885e-06,
      "loss": 0.0116,
      "step": 2486520
    },
    {
      "epoch": 4.069277246453657,
      "grad_norm": 0.8852921724319458,
      "learning_rate": 1.8737109835730712e-06,
      "loss": 0.0111,
      "step": 2486540
    },
    {
      "epoch": 4.069309976892311,
      "grad_norm": 0.11140402406454086,
      "learning_rate": 1.8736450913595542e-06,
      "loss": 0.0113,
      "step": 2486560
    },
    {
      "epoch": 4.069342707330963,
      "grad_norm": 0.05065188184380531,
      "learning_rate": 1.873579199146037e-06,
      "loss": 0.009,
      "step": 2486580
    },
    {
      "epoch": 4.069375437769617,
      "grad_norm": 0.21871605515480042,
      "learning_rate": 1.8735133069325199e-06,
      "loss": 0.01,
      "step": 2486600
    },
    {
      "epoch": 4.069408168208271,
      "grad_norm": 0.3280174732208252,
      "learning_rate": 1.8734474147190028e-06,
      "loss": 0.0065,
      "step": 2486620
    },
    {
      "epoch": 4.069440898646923,
      "grad_norm": 0.4975249469280243,
      "learning_rate": 1.8733815225054858e-06,
      "loss": 0.0075,
      "step": 2486640
    },
    {
      "epoch": 4.069473629085577,
      "grad_norm": 0.4058217704296112,
      "learning_rate": 1.8733156302919685e-06,
      "loss": 0.0072,
      "step": 2486660
    },
    {
      "epoch": 4.0695063595242305,
      "grad_norm": 0.3573983311653137,
      "learning_rate": 1.8732497380784515e-06,
      "loss": 0.0072,
      "step": 2486680
    },
    {
      "epoch": 4.069539089962884,
      "grad_norm": 0.49620556831359863,
      "learning_rate": 1.8731838458649342e-06,
      "loss": 0.0071,
      "step": 2486700
    },
    {
      "epoch": 4.069571820401537,
      "grad_norm": 0.4328329265117645,
      "learning_rate": 1.8731179536514172e-06,
      "loss": 0.0132,
      "step": 2486720
    },
    {
      "epoch": 4.06960455084019,
      "grad_norm": 0.15756826102733612,
      "learning_rate": 1.8730520614379e-06,
      "loss": 0.0071,
      "step": 2486740
    },
    {
      "epoch": 4.069637281278844,
      "grad_norm": 0.3136383593082428,
      "learning_rate": 1.8729861692243829e-06,
      "loss": 0.0105,
      "step": 2486760
    },
    {
      "epoch": 4.069670011717497,
      "grad_norm": 0.22171568870544434,
      "learning_rate": 1.8729202770108656e-06,
      "loss": 0.0099,
      "step": 2486780
    },
    {
      "epoch": 4.06970274215615,
      "grad_norm": 0.1878325343132019,
      "learning_rate": 1.8728543847973488e-06,
      "loss": 0.0099,
      "step": 2486800
    },
    {
      "epoch": 4.069735472594804,
      "grad_norm": 0.3689281642436981,
      "learning_rate": 1.8727884925838317e-06,
      "loss": 0.0104,
      "step": 2486820
    },
    {
      "epoch": 4.069768203033457,
      "grad_norm": 0.12529775500297546,
      "learning_rate": 1.8727226003703145e-06,
      "loss": 0.0102,
      "step": 2486840
    },
    {
      "epoch": 4.06980093347211,
      "grad_norm": 0.1884351521730423,
      "learning_rate": 1.8726567081567972e-06,
      "loss": 0.0084,
      "step": 2486860
    },
    {
      "epoch": 4.069833663910764,
      "grad_norm": 0.09065721929073334,
      "learning_rate": 1.8725908159432801e-06,
      "loss": 0.0104,
      "step": 2486880
    },
    {
      "epoch": 4.069866394349417,
      "grad_norm": 0.7002540230751038,
      "learning_rate": 1.8725249237297629e-06,
      "loss": 0.0101,
      "step": 2486900
    },
    {
      "epoch": 4.06989912478807,
      "grad_norm": 0.08907495439052582,
      "learning_rate": 1.8724590315162458e-06,
      "loss": 0.009,
      "step": 2486920
    },
    {
      "epoch": 4.069931855226724,
      "grad_norm": 0.6319735050201416,
      "learning_rate": 1.8723931393027286e-06,
      "loss": 0.0113,
      "step": 2486940
    },
    {
      "epoch": 4.069964585665377,
      "grad_norm": 0.2523215115070343,
      "learning_rate": 1.8723272470892115e-06,
      "loss": 0.0094,
      "step": 2486960
    },
    {
      "epoch": 4.069997316104031,
      "grad_norm": 0.39381733536720276,
      "learning_rate": 1.8722613548756947e-06,
      "loss": 0.0082,
      "step": 2486980
    },
    {
      "epoch": 4.0700300465426835,
      "grad_norm": 0.04954196885228157,
      "learning_rate": 1.8721954626621774e-06,
      "loss": 0.0075,
      "step": 2487000
    },
    {
      "epoch": 4.070062776981337,
      "grad_norm": 0.5566972494125366,
      "learning_rate": 1.8721295704486604e-06,
      "loss": 0.0087,
      "step": 2487020
    },
    {
      "epoch": 4.070095507419991,
      "grad_norm": 0.4017723798751831,
      "learning_rate": 1.8720636782351431e-06,
      "loss": 0.0123,
      "step": 2487040
    },
    {
      "epoch": 4.070128237858643,
      "grad_norm": 0.11678958684206009,
      "learning_rate": 1.8719977860216259e-06,
      "loss": 0.0078,
      "step": 2487060
    },
    {
      "epoch": 4.070160968297297,
      "grad_norm": 0.1544867753982544,
      "learning_rate": 1.8719318938081088e-06,
      "loss": 0.0071,
      "step": 2487080
    },
    {
      "epoch": 4.070193698735951,
      "grad_norm": 0.35396668314933777,
      "learning_rate": 1.8718660015945916e-06,
      "loss": 0.0139,
      "step": 2487100
    },
    {
      "epoch": 4.070226429174604,
      "grad_norm": 0.1488555669784546,
      "learning_rate": 1.8718001093810745e-06,
      "loss": 0.0088,
      "step": 2487120
    },
    {
      "epoch": 4.070259159613257,
      "grad_norm": 0.4317264258861542,
      "learning_rate": 1.8717342171675577e-06,
      "loss": 0.0099,
      "step": 2487140
    },
    {
      "epoch": 4.0702918900519105,
      "grad_norm": 0.2665625810623169,
      "learning_rate": 1.8716683249540404e-06,
      "loss": 0.0059,
      "step": 2487160
    },
    {
      "epoch": 4.070324620490564,
      "grad_norm": 0.1593126505613327,
      "learning_rate": 1.8716024327405234e-06,
      "loss": 0.0129,
      "step": 2487180
    },
    {
      "epoch": 4.070357350929217,
      "grad_norm": 0.08221203088760376,
      "learning_rate": 1.871536540527006e-06,
      "loss": 0.0068,
      "step": 2487200
    },
    {
      "epoch": 4.07039008136787,
      "grad_norm": 0.32076963782310486,
      "learning_rate": 1.871470648313489e-06,
      "loss": 0.0042,
      "step": 2487220
    },
    {
      "epoch": 4.070422811806524,
      "grad_norm": 0.44923537969589233,
      "learning_rate": 1.8714047560999718e-06,
      "loss": 0.0088,
      "step": 2487240
    },
    {
      "epoch": 4.070455542245178,
      "grad_norm": 0.11949548870325089,
      "learning_rate": 1.8713388638864547e-06,
      "loss": 0.008,
      "step": 2487260
    },
    {
      "epoch": 4.07048827268383,
      "grad_norm": 0.44347628951072693,
      "learning_rate": 1.8712729716729375e-06,
      "loss": 0.008,
      "step": 2487280
    },
    {
      "epoch": 4.070521003122484,
      "grad_norm": 0.14083503186702728,
      "learning_rate": 1.8712070794594202e-06,
      "loss": 0.0093,
      "step": 2487300
    },
    {
      "epoch": 4.0705537335611375,
      "grad_norm": 0.23630166053771973,
      "learning_rate": 1.8711411872459034e-06,
      "loss": 0.0101,
      "step": 2487320
    },
    {
      "epoch": 4.07058646399979,
      "grad_norm": 0.28947341442108154,
      "learning_rate": 1.8710752950323863e-06,
      "loss": 0.0132,
      "step": 2487340
    },
    {
      "epoch": 4.070619194438444,
      "grad_norm": 0.21964378654956818,
      "learning_rate": 1.871009402818869e-06,
      "loss": 0.0087,
      "step": 2487360
    },
    {
      "epoch": 4.070651924877097,
      "grad_norm": 0.518335223197937,
      "learning_rate": 1.870943510605352e-06,
      "loss": 0.0145,
      "step": 2487380
    },
    {
      "epoch": 4.070684655315751,
      "grad_norm": 0.12007253617048264,
      "learning_rate": 1.8708776183918348e-06,
      "loss": 0.0071,
      "step": 2487400
    },
    {
      "epoch": 4.070717385754404,
      "grad_norm": 0.28883811831474304,
      "learning_rate": 1.8708117261783177e-06,
      "loss": 0.0063,
      "step": 2487420
    },
    {
      "epoch": 4.070750116193057,
      "grad_norm": 0.3459213972091675,
      "learning_rate": 1.8707458339648005e-06,
      "loss": 0.0055,
      "step": 2487440
    },
    {
      "epoch": 4.070782846631711,
      "grad_norm": 0.24886305630207062,
      "learning_rate": 1.8706799417512834e-06,
      "loss": 0.0099,
      "step": 2487460
    },
    {
      "epoch": 4.070815577070364,
      "grad_norm": 0.24430416524410248,
      "learning_rate": 1.8706140495377661e-06,
      "loss": 0.0129,
      "step": 2487480
    },
    {
      "epoch": 4.070848307509017,
      "grad_norm": 0.21689534187316895,
      "learning_rate": 1.8705481573242493e-06,
      "loss": 0.009,
      "step": 2487500
    },
    {
      "epoch": 4.070881037947671,
      "grad_norm": 0.17279136180877686,
      "learning_rate": 1.870482265110732e-06,
      "loss": 0.0096,
      "step": 2487520
    },
    {
      "epoch": 4.070913768386324,
      "grad_norm": 0.13411493599414825,
      "learning_rate": 1.870416372897215e-06,
      "loss": 0.0085,
      "step": 2487540
    },
    {
      "epoch": 4.070946498824977,
      "grad_norm": 0.2739475965499878,
      "learning_rate": 1.8703504806836977e-06,
      "loss": 0.0081,
      "step": 2487560
    },
    {
      "epoch": 4.070979229263631,
      "grad_norm": 0.36138543486595154,
      "learning_rate": 1.8702845884701807e-06,
      "loss": 0.0086,
      "step": 2487580
    },
    {
      "epoch": 4.071011959702284,
      "grad_norm": 0.2161940336227417,
      "learning_rate": 1.8702186962566634e-06,
      "loss": 0.0106,
      "step": 2487600
    },
    {
      "epoch": 4.071044690140937,
      "grad_norm": 0.1327565759420395,
      "learning_rate": 1.8701528040431464e-06,
      "loss": 0.0076,
      "step": 2487620
    },
    {
      "epoch": 4.0710774205795905,
      "grad_norm": 0.15547162294387817,
      "learning_rate": 1.8700869118296291e-06,
      "loss": 0.0094,
      "step": 2487640
    },
    {
      "epoch": 4.071110151018244,
      "grad_norm": 0.28002864122390747,
      "learning_rate": 1.870021019616112e-06,
      "loss": 0.0075,
      "step": 2487660
    },
    {
      "epoch": 4.071142881456897,
      "grad_norm": 0.6940739154815674,
      "learning_rate": 1.869955127402595e-06,
      "loss": 0.01,
      "step": 2487680
    },
    {
      "epoch": 4.07117561189555,
      "grad_norm": 0.1675979495048523,
      "learning_rate": 1.869889235189078e-06,
      "loss": 0.0091,
      "step": 2487700
    },
    {
      "epoch": 4.071208342334204,
      "grad_norm": 0.12275329232215881,
      "learning_rate": 1.8698233429755607e-06,
      "loss": 0.0113,
      "step": 2487720
    },
    {
      "epoch": 4.071241072772858,
      "grad_norm": 0.2478485256433487,
      "learning_rate": 1.8697574507620437e-06,
      "loss": 0.012,
      "step": 2487740
    },
    {
      "epoch": 4.07127380321151,
      "grad_norm": 0.4149697721004486,
      "learning_rate": 1.8696915585485264e-06,
      "loss": 0.0073,
      "step": 2487760
    },
    {
      "epoch": 4.071306533650164,
      "grad_norm": 0.24061070382595062,
      "learning_rate": 1.8696256663350094e-06,
      "loss": 0.0127,
      "step": 2487780
    },
    {
      "epoch": 4.0713392640888175,
      "grad_norm": 0.21114696562290192,
      "learning_rate": 1.869559774121492e-06,
      "loss": 0.0079,
      "step": 2487800
    },
    {
      "epoch": 4.07137199452747,
      "grad_norm": 0.09981738030910492,
      "learning_rate": 1.869493881907975e-06,
      "loss": 0.0075,
      "step": 2487820
    },
    {
      "epoch": 4.071404724966124,
      "grad_norm": 0.6824477314949036,
      "learning_rate": 1.8694279896944578e-06,
      "loss": 0.0112,
      "step": 2487840
    },
    {
      "epoch": 4.071437455404777,
      "grad_norm": 0.0835619792342186,
      "learning_rate": 1.869362097480941e-06,
      "loss": 0.0096,
      "step": 2487860
    },
    {
      "epoch": 4.071470185843431,
      "grad_norm": 0.22761009633541107,
      "learning_rate": 1.8692962052674237e-06,
      "loss": 0.0071,
      "step": 2487880
    },
    {
      "epoch": 4.071502916282084,
      "grad_norm": 0.28724315762519836,
      "learning_rate": 1.8692303130539066e-06,
      "loss": 0.0062,
      "step": 2487900
    },
    {
      "epoch": 4.071535646720737,
      "grad_norm": 0.5981116890907288,
      "learning_rate": 1.8691644208403894e-06,
      "loss": 0.0086,
      "step": 2487920
    },
    {
      "epoch": 4.071568377159391,
      "grad_norm": 0.08748644590377808,
      "learning_rate": 1.8690985286268723e-06,
      "loss": 0.0125,
      "step": 2487940
    },
    {
      "epoch": 4.071601107598044,
      "grad_norm": 0.29811573028564453,
      "learning_rate": 1.869032636413355e-06,
      "loss": 0.0112,
      "step": 2487960
    },
    {
      "epoch": 4.071633838036697,
      "grad_norm": 0.2709614634513855,
      "learning_rate": 1.868966744199838e-06,
      "loss": 0.0071,
      "step": 2487980
    },
    {
      "epoch": 4.071666568475351,
      "grad_norm": 2.1057374477386475,
      "learning_rate": 1.8689008519863208e-06,
      "loss": 0.0103,
      "step": 2488000
    },
    {
      "epoch": 4.071699298914004,
      "grad_norm": 0.27948397397994995,
      "learning_rate": 1.868834959772804e-06,
      "loss": 0.0052,
      "step": 2488020
    },
    {
      "epoch": 4.071732029352657,
      "grad_norm": 0.37747249007225037,
      "learning_rate": 1.8687690675592867e-06,
      "loss": 0.0137,
      "step": 2488040
    },
    {
      "epoch": 4.071764759791311,
      "grad_norm": 0.23091930150985718,
      "learning_rate": 1.8687031753457696e-06,
      "loss": 0.0085,
      "step": 2488060
    },
    {
      "epoch": 4.071797490229964,
      "grad_norm": 0.17732839286327362,
      "learning_rate": 1.8686372831322524e-06,
      "loss": 0.0084,
      "step": 2488080
    },
    {
      "epoch": 4.071830220668617,
      "grad_norm": 0.13855360448360443,
      "learning_rate": 1.8685713909187353e-06,
      "loss": 0.0136,
      "step": 2488100
    },
    {
      "epoch": 4.071862951107271,
      "grad_norm": 0.06783885508775711,
      "learning_rate": 1.868505498705218e-06,
      "loss": 0.007,
      "step": 2488120
    },
    {
      "epoch": 4.071895681545924,
      "grad_norm": 0.04478265345096588,
      "learning_rate": 1.868439606491701e-06,
      "loss": 0.0089,
      "step": 2488140
    },
    {
      "epoch": 4.071928411984578,
      "grad_norm": 0.28420063853263855,
      "learning_rate": 1.8683737142781837e-06,
      "loss": 0.007,
      "step": 2488160
    },
    {
      "epoch": 4.0719611424232305,
      "grad_norm": 0.13352054357528687,
      "learning_rate": 1.8683078220646667e-06,
      "loss": 0.0073,
      "step": 2488180
    },
    {
      "epoch": 4.071993872861884,
      "grad_norm": 0.21015134453773499,
      "learning_rate": 1.8682419298511499e-06,
      "loss": 0.0055,
      "step": 2488200
    },
    {
      "epoch": 4.072026603300538,
      "grad_norm": 0.20836080610752106,
      "learning_rate": 1.8681760376376326e-06,
      "loss": 0.0127,
      "step": 2488220
    },
    {
      "epoch": 4.07205933373919,
      "grad_norm": 0.2277565449476242,
      "learning_rate": 1.8681101454241156e-06,
      "loss": 0.0081,
      "step": 2488240
    },
    {
      "epoch": 4.072092064177844,
      "grad_norm": 0.2063201516866684,
      "learning_rate": 1.8680442532105983e-06,
      "loss": 0.0151,
      "step": 2488260
    },
    {
      "epoch": 4.072124794616498,
      "grad_norm": 0.18785639107227325,
      "learning_rate": 1.867978360997081e-06,
      "loss": 0.0119,
      "step": 2488280
    },
    {
      "epoch": 4.072157525055151,
      "grad_norm": 0.08867954462766647,
      "learning_rate": 1.867912468783564e-06,
      "loss": 0.0063,
      "step": 2488300
    },
    {
      "epoch": 4.072190255493804,
      "grad_norm": 0.14694806933403015,
      "learning_rate": 1.8678465765700467e-06,
      "loss": 0.0077,
      "step": 2488320
    },
    {
      "epoch": 4.0722229859324575,
      "grad_norm": 0.5818942189216614,
      "learning_rate": 1.8677806843565297e-06,
      "loss": 0.0106,
      "step": 2488340
    },
    {
      "epoch": 4.072255716371111,
      "grad_norm": 0.6339529156684875,
      "learning_rate": 1.8677147921430124e-06,
      "loss": 0.0083,
      "step": 2488360
    },
    {
      "epoch": 4.072288446809764,
      "grad_norm": 0.07174161076545715,
      "learning_rate": 1.8676488999294956e-06,
      "loss": 0.0059,
      "step": 2488380
    },
    {
      "epoch": 4.072321177248417,
      "grad_norm": 0.06807800382375717,
      "learning_rate": 1.8675830077159785e-06,
      "loss": 0.0123,
      "step": 2488400
    },
    {
      "epoch": 4.072353907687071,
      "grad_norm": 0.35950759053230286,
      "learning_rate": 1.8675171155024613e-06,
      "loss": 0.0123,
      "step": 2488420
    },
    {
      "epoch": 4.0723866381257245,
      "grad_norm": 0.14269374310970306,
      "learning_rate": 1.8674512232889442e-06,
      "loss": 0.0095,
      "step": 2488440
    },
    {
      "epoch": 4.072419368564377,
      "grad_norm": 0.36992716789245605,
      "learning_rate": 1.867385331075427e-06,
      "loss": 0.0109,
      "step": 2488460
    },
    {
      "epoch": 4.072452099003031,
      "grad_norm": 0.2813253700733185,
      "learning_rate": 1.86731943886191e-06,
      "loss": 0.0102,
      "step": 2488480
    },
    {
      "epoch": 4.072484829441684,
      "grad_norm": 0.3200969696044922,
      "learning_rate": 1.8672535466483927e-06,
      "loss": 0.0051,
      "step": 2488500
    },
    {
      "epoch": 4.072517559880337,
      "grad_norm": 0.21906153857707977,
      "learning_rate": 1.8671876544348754e-06,
      "loss": 0.0127,
      "step": 2488520
    },
    {
      "epoch": 4.072550290318991,
      "grad_norm": 0.06388506293296814,
      "learning_rate": 1.8671217622213583e-06,
      "loss": 0.009,
      "step": 2488540
    },
    {
      "epoch": 4.072583020757644,
      "grad_norm": 0.4675019681453705,
      "learning_rate": 1.8670558700078415e-06,
      "loss": 0.0095,
      "step": 2488560
    },
    {
      "epoch": 4.072615751196298,
      "grad_norm": 0.34723326563835144,
      "learning_rate": 1.8669899777943242e-06,
      "loss": 0.0084,
      "step": 2488580
    },
    {
      "epoch": 4.072648481634951,
      "grad_norm": 0.11906018853187561,
      "learning_rate": 1.8669240855808072e-06,
      "loss": 0.0084,
      "step": 2488600
    },
    {
      "epoch": 4.072681212073604,
      "grad_norm": 0.13927078247070312,
      "learning_rate": 1.86685819336729e-06,
      "loss": 0.0132,
      "step": 2488620
    },
    {
      "epoch": 4.072713942512258,
      "grad_norm": 0.13332216441631317,
      "learning_rate": 1.8667923011537729e-06,
      "loss": 0.0065,
      "step": 2488640
    },
    {
      "epoch": 4.0727466729509105,
      "grad_norm": 0.3338732421398163,
      "learning_rate": 1.8667264089402556e-06,
      "loss": 0.0085,
      "step": 2488660
    },
    {
      "epoch": 4.072779403389564,
      "grad_norm": 4.048300266265869,
      "learning_rate": 1.8666605167267386e-06,
      "loss": 0.0094,
      "step": 2488680
    },
    {
      "epoch": 4.072812133828218,
      "grad_norm": 0.29513105750083923,
      "learning_rate": 1.8665946245132213e-06,
      "loss": 0.0128,
      "step": 2488700
    },
    {
      "epoch": 4.072844864266871,
      "grad_norm": 0.18750664591789246,
      "learning_rate": 1.866528732299704e-06,
      "loss": 0.0089,
      "step": 2488720
    },
    {
      "epoch": 4.072877594705524,
      "grad_norm": 0.06095787510275841,
      "learning_rate": 1.8664628400861872e-06,
      "loss": 0.0108,
      "step": 2488740
    },
    {
      "epoch": 4.072910325144178,
      "grad_norm": 0.5699955224990845,
      "learning_rate": 1.8663969478726702e-06,
      "loss": 0.0107,
      "step": 2488760
    },
    {
      "epoch": 4.072943055582831,
      "grad_norm": 0.42722952365875244,
      "learning_rate": 1.866331055659153e-06,
      "loss": 0.0085,
      "step": 2488780
    },
    {
      "epoch": 4.072975786021484,
      "grad_norm": 0.40353503823280334,
      "learning_rate": 1.8662651634456359e-06,
      "loss": 0.0093,
      "step": 2488800
    },
    {
      "epoch": 4.0730085164601375,
      "grad_norm": 0.035794783383607864,
      "learning_rate": 1.8661992712321186e-06,
      "loss": 0.0051,
      "step": 2488820
    },
    {
      "epoch": 4.073041246898791,
      "grad_norm": 0.07197731733322144,
      "learning_rate": 1.8661333790186016e-06,
      "loss": 0.009,
      "step": 2488840
    },
    {
      "epoch": 4.073073977337445,
      "grad_norm": 0.423306941986084,
      "learning_rate": 1.8660674868050843e-06,
      "loss": 0.0123,
      "step": 2488860
    },
    {
      "epoch": 4.073106707776097,
      "grad_norm": 0.29866495728492737,
      "learning_rate": 1.8660015945915672e-06,
      "loss": 0.0099,
      "step": 2488880
    },
    {
      "epoch": 4.073139438214751,
      "grad_norm": 0.7353354096412659,
      "learning_rate": 1.8659357023780502e-06,
      "loss": 0.0096,
      "step": 2488900
    },
    {
      "epoch": 4.073172168653405,
      "grad_norm": 0.19389981031417847,
      "learning_rate": 1.8658698101645331e-06,
      "loss": 0.011,
      "step": 2488920
    },
    {
      "epoch": 4.073204899092057,
      "grad_norm": 0.18950194120407104,
      "learning_rate": 1.8658039179510159e-06,
      "loss": 0.0063,
      "step": 2488940
    },
    {
      "epoch": 4.073237629530711,
      "grad_norm": 0.32303208112716675,
      "learning_rate": 1.8657380257374988e-06,
      "loss": 0.0081,
      "step": 2488960
    },
    {
      "epoch": 4.0732703599693645,
      "grad_norm": 0.10505449026823044,
      "learning_rate": 1.8656721335239816e-06,
      "loss": 0.01,
      "step": 2488980
    },
    {
      "epoch": 4.073303090408018,
      "grad_norm": 0.2827666103839874,
      "learning_rate": 1.8656062413104645e-06,
      "loss": 0.0088,
      "step": 2489000
    },
    {
      "epoch": 4.073335820846671,
      "grad_norm": 0.20462986826896667,
      "learning_rate": 1.8655403490969473e-06,
      "loss": 0.0067,
      "step": 2489020
    },
    {
      "epoch": 4.073368551285324,
      "grad_norm": 0.29399120807647705,
      "learning_rate": 1.8654744568834302e-06,
      "loss": 0.008,
      "step": 2489040
    },
    {
      "epoch": 4.073401281723978,
      "grad_norm": 0.4835036098957062,
      "learning_rate": 1.865408564669913e-06,
      "loss": 0.0136,
      "step": 2489060
    },
    {
      "epoch": 4.073434012162631,
      "grad_norm": 0.35379651188850403,
      "learning_rate": 1.8653426724563961e-06,
      "loss": 0.0099,
      "step": 2489080
    },
    {
      "epoch": 4.073466742601284,
      "grad_norm": 0.6084886193275452,
      "learning_rate": 1.8652767802428789e-06,
      "loss": 0.0085,
      "step": 2489100
    },
    {
      "epoch": 4.073499473039938,
      "grad_norm": 0.2630217671394348,
      "learning_rate": 1.8652108880293618e-06,
      "loss": 0.0088,
      "step": 2489120
    },
    {
      "epoch": 4.073532203478591,
      "grad_norm": 0.05652043595910072,
      "learning_rate": 1.8651449958158446e-06,
      "loss": 0.0065,
      "step": 2489140
    },
    {
      "epoch": 4.073564933917244,
      "grad_norm": 0.35154837369918823,
      "learning_rate": 1.8650791036023275e-06,
      "loss": 0.0119,
      "step": 2489160
    },
    {
      "epoch": 4.073597664355898,
      "grad_norm": 0.28160011768341064,
      "learning_rate": 1.8650132113888102e-06,
      "loss": 0.0115,
      "step": 2489180
    },
    {
      "epoch": 4.073630394794551,
      "grad_norm": 0.21621569991111755,
      "learning_rate": 1.8649473191752932e-06,
      "loss": 0.0117,
      "step": 2489200
    },
    {
      "epoch": 4.073663125233204,
      "grad_norm": 0.599147379398346,
      "learning_rate": 1.864881426961776e-06,
      "loss": 0.0081,
      "step": 2489220
    },
    {
      "epoch": 4.073695855671858,
      "grad_norm": 0.45513635873794556,
      "learning_rate": 1.8648155347482589e-06,
      "loss": 0.0089,
      "step": 2489240
    },
    {
      "epoch": 4.073728586110511,
      "grad_norm": 0.1745550036430359,
      "learning_rate": 1.8647496425347418e-06,
      "loss": 0.0165,
      "step": 2489260
    },
    {
      "epoch": 4.073761316549164,
      "grad_norm": 0.22425539791584015,
      "learning_rate": 1.8646837503212248e-06,
      "loss": 0.0087,
      "step": 2489280
    },
    {
      "epoch": 4.0737940469878176,
      "grad_norm": 0.20116789638996124,
      "learning_rate": 1.8646178581077075e-06,
      "loss": 0.0082,
      "step": 2489300
    },
    {
      "epoch": 4.073826777426471,
      "grad_norm": 0.42490530014038086,
      "learning_rate": 1.8645519658941905e-06,
      "loss": 0.0135,
      "step": 2489320
    },
    {
      "epoch": 4.073859507865125,
      "grad_norm": 0.2100813388824463,
      "learning_rate": 1.8644860736806732e-06,
      "loss": 0.0072,
      "step": 2489340
    },
    {
      "epoch": 4.0738922383037774,
      "grad_norm": 0.17666840553283691,
      "learning_rate": 1.8644201814671562e-06,
      "loss": 0.0113,
      "step": 2489360
    },
    {
      "epoch": 4.073924968742431,
      "grad_norm": 1.01048743724823,
      "learning_rate": 1.864354289253639e-06,
      "loss": 0.0069,
      "step": 2489380
    },
    {
      "epoch": 4.073957699181085,
      "grad_norm": 0.15492381155490875,
      "learning_rate": 1.8642883970401219e-06,
      "loss": 0.0089,
      "step": 2489400
    },
    {
      "epoch": 4.073990429619737,
      "grad_norm": 0.055688291788101196,
      "learning_rate": 1.8642225048266046e-06,
      "loss": 0.0069,
      "step": 2489420
    },
    {
      "epoch": 4.074023160058391,
      "grad_norm": 0.256277471780777,
      "learning_rate": 1.8641566126130878e-06,
      "loss": 0.0091,
      "step": 2489440
    },
    {
      "epoch": 4.0740558904970445,
      "grad_norm": 0.32274526357650757,
      "learning_rate": 1.8640907203995707e-06,
      "loss": 0.0098,
      "step": 2489460
    },
    {
      "epoch": 4.074088620935698,
      "grad_norm": 0.16810856759548187,
      "learning_rate": 1.8640248281860535e-06,
      "loss": 0.011,
      "step": 2489480
    },
    {
      "epoch": 4.074121351374351,
      "grad_norm": 0.1086733490228653,
      "learning_rate": 1.8639589359725362e-06,
      "loss": 0.0108,
      "step": 2489500
    },
    {
      "epoch": 4.074154081813004,
      "grad_norm": 0.3503910005092621,
      "learning_rate": 1.8638930437590192e-06,
      "loss": 0.0098,
      "step": 2489520
    },
    {
      "epoch": 4.074186812251658,
      "grad_norm": 0.9280670881271362,
      "learning_rate": 1.8638271515455019e-06,
      "loss": 0.0128,
      "step": 2489540
    },
    {
      "epoch": 4.074219542690311,
      "grad_norm": 0.4001612067222595,
      "learning_rate": 1.8637612593319848e-06,
      "loss": 0.007,
      "step": 2489560
    },
    {
      "epoch": 4.074252273128964,
      "grad_norm": 0.42597532272338867,
      "learning_rate": 1.8636953671184676e-06,
      "loss": 0.0132,
      "step": 2489580
    },
    {
      "epoch": 4.074285003567618,
      "grad_norm": 0.11877270042896271,
      "learning_rate": 1.8636294749049505e-06,
      "loss": 0.0087,
      "step": 2489600
    },
    {
      "epoch": 4.0743177340062715,
      "grad_norm": 0.4312584102153778,
      "learning_rate": 1.8635635826914337e-06,
      "loss": 0.0106,
      "step": 2489620
    },
    {
      "epoch": 4.074350464444924,
      "grad_norm": 0.29398810863494873,
      "learning_rate": 1.8634976904779164e-06,
      "loss": 0.01,
      "step": 2489640
    },
    {
      "epoch": 4.074383194883578,
      "grad_norm": 0.13865196704864502,
      "learning_rate": 1.8634317982643994e-06,
      "loss": 0.0082,
      "step": 2489660
    },
    {
      "epoch": 4.074415925322231,
      "grad_norm": 0.6664394736289978,
      "learning_rate": 1.8633659060508821e-06,
      "loss": 0.0087,
      "step": 2489680
    },
    {
      "epoch": 4.074448655760884,
      "grad_norm": 0.4096795320510864,
      "learning_rate": 1.8633000138373649e-06,
      "loss": 0.009,
      "step": 2489700
    },
    {
      "epoch": 4.074481386199538,
      "grad_norm": 0.40895968675613403,
      "learning_rate": 1.8632341216238478e-06,
      "loss": 0.0184,
      "step": 2489720
    },
    {
      "epoch": 4.074514116638191,
      "grad_norm": 0.2672140598297119,
      "learning_rate": 1.8631682294103306e-06,
      "loss": 0.0074,
      "step": 2489740
    },
    {
      "epoch": 4.074546847076845,
      "grad_norm": 0.2791602313518524,
      "learning_rate": 1.8631023371968135e-06,
      "loss": 0.0107,
      "step": 2489760
    },
    {
      "epoch": 4.074579577515498,
      "grad_norm": 0.32722756266593933,
      "learning_rate": 1.8630364449832967e-06,
      "loss": 0.0069,
      "step": 2489780
    },
    {
      "epoch": 4.074612307954151,
      "grad_norm": 0.2835632264614105,
      "learning_rate": 1.8629705527697794e-06,
      "loss": 0.0074,
      "step": 2489800
    },
    {
      "epoch": 4.074645038392805,
      "grad_norm": 0.393694669008255,
      "learning_rate": 1.8629046605562624e-06,
      "loss": 0.0055,
      "step": 2489820
    },
    {
      "epoch": 4.0746777688314575,
      "grad_norm": 0.22268831729888916,
      "learning_rate": 1.8628387683427451e-06,
      "loss": 0.0075,
      "step": 2489840
    },
    {
      "epoch": 4.074710499270111,
      "grad_norm": 0.06314297020435333,
      "learning_rate": 1.862772876129228e-06,
      "loss": 0.0087,
      "step": 2489860
    },
    {
      "epoch": 4.074743229708765,
      "grad_norm": 0.3300212025642395,
      "learning_rate": 1.8627069839157108e-06,
      "loss": 0.0077,
      "step": 2489880
    },
    {
      "epoch": 4.074775960147418,
      "grad_norm": 0.12395971268415451,
      "learning_rate": 1.8626410917021937e-06,
      "loss": 0.0091,
      "step": 2489900
    },
    {
      "epoch": 4.074808690586071,
      "grad_norm": 0.09731192141771317,
      "learning_rate": 1.8625751994886765e-06,
      "loss": 0.0072,
      "step": 2489920
    },
    {
      "epoch": 4.074841421024725,
      "grad_norm": 0.14807681739330292,
      "learning_rate": 1.8625093072751592e-06,
      "loss": 0.0119,
      "step": 2489940
    },
    {
      "epoch": 4.074874151463378,
      "grad_norm": 0.4717048704624176,
      "learning_rate": 1.8624434150616424e-06,
      "loss": 0.0071,
      "step": 2489960
    },
    {
      "epoch": 4.074906881902031,
      "grad_norm": 0.29503539204597473,
      "learning_rate": 1.8623775228481253e-06,
      "loss": 0.0066,
      "step": 2489980
    },
    {
      "epoch": 4.0749396123406845,
      "grad_norm": 0.061836302280426025,
      "learning_rate": 1.862311630634608e-06,
      "loss": 0.0113,
      "step": 2490000
    },
    {
      "epoch": 4.074972342779338,
      "grad_norm": 1.4094746112823486,
      "learning_rate": 1.862245738421091e-06,
      "loss": 0.0092,
      "step": 2490020
    },
    {
      "epoch": 4.075005073217992,
      "grad_norm": 0.06934959441423416,
      "learning_rate": 1.8621798462075738e-06,
      "loss": 0.0065,
      "step": 2490040
    },
    {
      "epoch": 4.075037803656644,
      "grad_norm": 0.1877303123474121,
      "learning_rate": 1.8621139539940567e-06,
      "loss": 0.0076,
      "step": 2490060
    },
    {
      "epoch": 4.075070534095298,
      "grad_norm": 0.2830697000026703,
      "learning_rate": 1.8620480617805395e-06,
      "loss": 0.0084,
      "step": 2490080
    },
    {
      "epoch": 4.0751032645339516,
      "grad_norm": 0.09847734123468399,
      "learning_rate": 1.8619821695670224e-06,
      "loss": 0.0078,
      "step": 2490100
    },
    {
      "epoch": 4.075135994972604,
      "grad_norm": 0.170128732919693,
      "learning_rate": 1.8619162773535052e-06,
      "loss": 0.0079,
      "step": 2490120
    },
    {
      "epoch": 4.075168725411258,
      "grad_norm": 0.44441837072372437,
      "learning_rate": 1.8618503851399883e-06,
      "loss": 0.0077,
      "step": 2490140
    },
    {
      "epoch": 4.0752014558499114,
      "grad_norm": 0.06509093195199966,
      "learning_rate": 1.861784492926471e-06,
      "loss": 0.008,
      "step": 2490160
    },
    {
      "epoch": 4.075234186288565,
      "grad_norm": 0.25045958161354065,
      "learning_rate": 1.861718600712954e-06,
      "loss": 0.009,
      "step": 2490180
    },
    {
      "epoch": 4.075266916727218,
      "grad_norm": 0.47743406891822815,
      "learning_rate": 1.8616527084994368e-06,
      "loss": 0.0106,
      "step": 2490200
    },
    {
      "epoch": 4.075299647165871,
      "grad_norm": 0.40500912070274353,
      "learning_rate": 1.8615868162859197e-06,
      "loss": 0.0085,
      "step": 2490220
    },
    {
      "epoch": 4.075332377604525,
      "grad_norm": 0.24425601959228516,
      "learning_rate": 1.8615209240724024e-06,
      "loss": 0.0079,
      "step": 2490240
    },
    {
      "epoch": 4.075365108043178,
      "grad_norm": 0.20847244560718536,
      "learning_rate": 1.8614550318588854e-06,
      "loss": 0.0079,
      "step": 2490260
    },
    {
      "epoch": 4.075397838481831,
      "grad_norm": 0.2742958962917328,
      "learning_rate": 1.8613891396453681e-06,
      "loss": 0.0056,
      "step": 2490280
    },
    {
      "epoch": 4.075430568920485,
      "grad_norm": 0.0982040986418724,
      "learning_rate": 1.861323247431851e-06,
      "loss": 0.0082,
      "step": 2490300
    },
    {
      "epoch": 4.075463299359138,
      "grad_norm": 0.5358339548110962,
      "learning_rate": 1.861257355218334e-06,
      "loss": 0.008,
      "step": 2490320
    },
    {
      "epoch": 4.075496029797791,
      "grad_norm": 0.31722843647003174,
      "learning_rate": 1.861191463004817e-06,
      "loss": 0.0124,
      "step": 2490340
    },
    {
      "epoch": 4.075528760236445,
      "grad_norm": 0.10696299374103546,
      "learning_rate": 1.8611255707912997e-06,
      "loss": 0.0073,
      "step": 2490360
    },
    {
      "epoch": 4.075561490675098,
      "grad_norm": 0.19564832746982574,
      "learning_rate": 1.8610596785777827e-06,
      "loss": 0.0085,
      "step": 2490380
    },
    {
      "epoch": 4.075594221113751,
      "grad_norm": 0.2605978548526764,
      "learning_rate": 1.8609937863642654e-06,
      "loss": 0.0118,
      "step": 2490400
    },
    {
      "epoch": 4.075626951552405,
      "grad_norm": 0.381163090467453,
      "learning_rate": 1.8609278941507484e-06,
      "loss": 0.0116,
      "step": 2490420
    },
    {
      "epoch": 4.075659681991058,
      "grad_norm": 0.3236897587776184,
      "learning_rate": 1.8608620019372311e-06,
      "loss": 0.0061,
      "step": 2490440
    },
    {
      "epoch": 4.075692412429712,
      "grad_norm": 0.1160556823015213,
      "learning_rate": 1.860796109723714e-06,
      "loss": 0.0083,
      "step": 2490460
    },
    {
      "epoch": 4.0757251428683645,
      "grad_norm": 0.24185898900032043,
      "learning_rate": 1.8607302175101968e-06,
      "loss": 0.0082,
      "step": 2490480
    },
    {
      "epoch": 4.075757873307018,
      "grad_norm": 0.37463921308517456,
      "learning_rate": 1.86066432529668e-06,
      "loss": 0.01,
      "step": 2490500
    },
    {
      "epoch": 4.075790603745672,
      "grad_norm": 0.39729759097099304,
      "learning_rate": 1.8605984330831627e-06,
      "loss": 0.0069,
      "step": 2490520
    },
    {
      "epoch": 4.075823334184324,
      "grad_norm": 0.30623817443847656,
      "learning_rate": 1.8605325408696457e-06,
      "loss": 0.0085,
      "step": 2490540
    },
    {
      "epoch": 4.075856064622978,
      "grad_norm": 0.1079140305519104,
      "learning_rate": 1.8604666486561284e-06,
      "loss": 0.0117,
      "step": 2490560
    },
    {
      "epoch": 4.075888795061632,
      "grad_norm": 0.34724661707878113,
      "learning_rate": 1.8604007564426113e-06,
      "loss": 0.0083,
      "step": 2490580
    },
    {
      "epoch": 4.075921525500285,
      "grad_norm": 0.057359758764505386,
      "learning_rate": 1.860334864229094e-06,
      "loss": 0.0086,
      "step": 2490600
    },
    {
      "epoch": 4.075954255938938,
      "grad_norm": 0.20519302785396576,
      "learning_rate": 1.860268972015577e-06,
      "loss": 0.0087,
      "step": 2490620
    },
    {
      "epoch": 4.0759869863775915,
      "grad_norm": 0.11813700944185257,
      "learning_rate": 1.8602030798020598e-06,
      "loss": 0.0072,
      "step": 2490640
    },
    {
      "epoch": 4.076019716816245,
      "grad_norm": 0.12570977210998535,
      "learning_rate": 1.860137187588543e-06,
      "loss": 0.0089,
      "step": 2490660
    },
    {
      "epoch": 4.076052447254898,
      "grad_norm": 0.22435319423675537,
      "learning_rate": 1.8600712953750257e-06,
      "loss": 0.0087,
      "step": 2490680
    },
    {
      "epoch": 4.076085177693551,
      "grad_norm": 0.17189007997512817,
      "learning_rate": 1.8600054031615086e-06,
      "loss": 0.0096,
      "step": 2490700
    },
    {
      "epoch": 4.076117908132205,
      "grad_norm": 0.12642721831798553,
      "learning_rate": 1.8599395109479914e-06,
      "loss": 0.0085,
      "step": 2490720
    },
    {
      "epoch": 4.076150638570859,
      "grad_norm": 0.0896899402141571,
      "learning_rate": 1.8598736187344743e-06,
      "loss": 0.0087,
      "step": 2490740
    },
    {
      "epoch": 4.076183369009511,
      "grad_norm": 0.23465996980667114,
      "learning_rate": 1.859807726520957e-06,
      "loss": 0.01,
      "step": 2490760
    },
    {
      "epoch": 4.076216099448165,
      "grad_norm": 0.08316062390804291,
      "learning_rate": 1.85974183430744e-06,
      "loss": 0.0053,
      "step": 2490780
    },
    {
      "epoch": 4.0762488298868185,
      "grad_norm": 0.2425185590982437,
      "learning_rate": 1.8596759420939228e-06,
      "loss": 0.0136,
      "step": 2490800
    },
    {
      "epoch": 4.076281560325471,
      "grad_norm": 0.22632601857185364,
      "learning_rate": 1.8596100498804057e-06,
      "loss": 0.01,
      "step": 2490820
    },
    {
      "epoch": 4.076314290764125,
      "grad_norm": 0.343322217464447,
      "learning_rate": 1.8595441576668889e-06,
      "loss": 0.0119,
      "step": 2490840
    },
    {
      "epoch": 4.076347021202778,
      "grad_norm": 0.430392861366272,
      "learning_rate": 1.8594782654533716e-06,
      "loss": 0.0128,
      "step": 2490860
    },
    {
      "epoch": 4.076379751641431,
      "grad_norm": 0.16892464458942413,
      "learning_rate": 1.8594123732398546e-06,
      "loss": 0.0079,
      "step": 2490880
    },
    {
      "epoch": 4.076412482080085,
      "grad_norm": 0.22812910377979279,
      "learning_rate": 1.8593464810263373e-06,
      "loss": 0.0083,
      "step": 2490900
    },
    {
      "epoch": 4.076445212518738,
      "grad_norm": 0.2772979438304901,
      "learning_rate": 1.85928058881282e-06,
      "loss": 0.0093,
      "step": 2490920
    },
    {
      "epoch": 4.076477942957392,
      "grad_norm": 0.09938423335552216,
      "learning_rate": 1.859214696599303e-06,
      "loss": 0.0103,
      "step": 2490940
    },
    {
      "epoch": 4.076510673396045,
      "grad_norm": 0.5202136039733887,
      "learning_rate": 1.8591488043857857e-06,
      "loss": 0.0119,
      "step": 2490960
    },
    {
      "epoch": 4.076543403834698,
      "grad_norm": 0.9137936234474182,
      "learning_rate": 1.8590829121722687e-06,
      "loss": 0.0094,
      "step": 2490980
    },
    {
      "epoch": 4.076576134273352,
      "grad_norm": 0.13750173151493073,
      "learning_rate": 1.8590170199587514e-06,
      "loss": 0.0068,
      "step": 2491000
    },
    {
      "epoch": 4.0766088647120045,
      "grad_norm": 0.17819392681121826,
      "learning_rate": 1.8589511277452346e-06,
      "loss": 0.0072,
      "step": 2491020
    },
    {
      "epoch": 4.076641595150658,
      "grad_norm": 0.1357787698507309,
      "learning_rate": 1.8588852355317175e-06,
      "loss": 0.0089,
      "step": 2491040
    },
    {
      "epoch": 4.076674325589312,
      "grad_norm": 0.3900963366031647,
      "learning_rate": 1.8588193433182003e-06,
      "loss": 0.0094,
      "step": 2491060
    },
    {
      "epoch": 4.076707056027965,
      "grad_norm": 0.10464224219322205,
      "learning_rate": 1.8587534511046832e-06,
      "loss": 0.011,
      "step": 2491080
    },
    {
      "epoch": 4.076739786466618,
      "grad_norm": 0.23406930267810822,
      "learning_rate": 1.858687558891166e-06,
      "loss": 0.0117,
      "step": 2491100
    },
    {
      "epoch": 4.0767725169052715,
      "grad_norm": 0.2026393860578537,
      "learning_rate": 1.858621666677649e-06,
      "loss": 0.0098,
      "step": 2491120
    },
    {
      "epoch": 4.076805247343925,
      "grad_norm": 0.7154890894889832,
      "learning_rate": 1.8585557744641317e-06,
      "loss": 0.0078,
      "step": 2491140
    },
    {
      "epoch": 4.076837977782578,
      "grad_norm": 0.2983578145503998,
      "learning_rate": 1.8584898822506144e-06,
      "loss": 0.0084,
      "step": 2491160
    },
    {
      "epoch": 4.076870708221231,
      "grad_norm": 0.12692835927009583,
      "learning_rate": 1.8584239900370974e-06,
      "loss": 0.0074,
      "step": 2491180
    },
    {
      "epoch": 4.076903438659885,
      "grad_norm": 0.1788514107465744,
      "learning_rate": 1.8583580978235805e-06,
      "loss": 0.0125,
      "step": 2491200
    },
    {
      "epoch": 4.076936169098539,
      "grad_norm": 0.06659792363643646,
      "learning_rate": 1.8582922056100633e-06,
      "loss": 0.008,
      "step": 2491220
    },
    {
      "epoch": 4.076968899537191,
      "grad_norm": 0.4237472414970398,
      "learning_rate": 1.8582263133965462e-06,
      "loss": 0.0113,
      "step": 2491240
    },
    {
      "epoch": 4.077001629975845,
      "grad_norm": 0.19156640768051147,
      "learning_rate": 1.858160421183029e-06,
      "loss": 0.0113,
      "step": 2491260
    },
    {
      "epoch": 4.0770343604144985,
      "grad_norm": 0.1447153091430664,
      "learning_rate": 1.858094528969512e-06,
      "loss": 0.0125,
      "step": 2491280
    },
    {
      "epoch": 4.077067090853151,
      "grad_norm": 0.5788946747779846,
      "learning_rate": 1.8580286367559946e-06,
      "loss": 0.0113,
      "step": 2491300
    },
    {
      "epoch": 4.077099821291805,
      "grad_norm": 0.07638709247112274,
      "learning_rate": 1.8579627445424776e-06,
      "loss": 0.0077,
      "step": 2491320
    },
    {
      "epoch": 4.077132551730458,
      "grad_norm": 0.29329442977905273,
      "learning_rate": 1.8578968523289603e-06,
      "loss": 0.0124,
      "step": 2491340
    },
    {
      "epoch": 4.077165282169112,
      "grad_norm": 0.29010888934135437,
      "learning_rate": 1.857830960115443e-06,
      "loss": 0.007,
      "step": 2491360
    },
    {
      "epoch": 4.077198012607765,
      "grad_norm": 0.11421481519937515,
      "learning_rate": 1.8577650679019262e-06,
      "loss": 0.0055,
      "step": 2491380
    },
    {
      "epoch": 4.077230743046418,
      "grad_norm": 0.45196327567100525,
      "learning_rate": 1.8576991756884092e-06,
      "loss": 0.0078,
      "step": 2491400
    },
    {
      "epoch": 4.077263473485072,
      "grad_norm": 0.1836141049861908,
      "learning_rate": 1.857633283474892e-06,
      "loss": 0.0141,
      "step": 2491420
    },
    {
      "epoch": 4.077296203923725,
      "grad_norm": 0.148562952876091,
      "learning_rate": 1.8575673912613749e-06,
      "loss": 0.0083,
      "step": 2491440
    },
    {
      "epoch": 4.077328934362378,
      "grad_norm": 0.1898794174194336,
      "learning_rate": 1.8575014990478576e-06,
      "loss": 0.0103,
      "step": 2491460
    },
    {
      "epoch": 4.077361664801032,
      "grad_norm": 0.6092569828033447,
      "learning_rate": 1.8574356068343406e-06,
      "loss": 0.0086,
      "step": 2491480
    },
    {
      "epoch": 4.077394395239685,
      "grad_norm": 0.1649574637413025,
      "learning_rate": 1.8573697146208233e-06,
      "loss": 0.0077,
      "step": 2491500
    },
    {
      "epoch": 4.077427125678338,
      "grad_norm": 0.11961246281862259,
      "learning_rate": 1.8573038224073063e-06,
      "loss": 0.0071,
      "step": 2491520
    },
    {
      "epoch": 4.077459856116992,
      "grad_norm": 0.1248989999294281,
      "learning_rate": 1.8572379301937892e-06,
      "loss": 0.0068,
      "step": 2491540
    },
    {
      "epoch": 4.077492586555645,
      "grad_norm": 0.14754045009613037,
      "learning_rate": 1.8571720379802722e-06,
      "loss": 0.0065,
      "step": 2491560
    },
    {
      "epoch": 4.077525316994298,
      "grad_norm": 0.443856805562973,
      "learning_rate": 1.857106145766755e-06,
      "loss": 0.0076,
      "step": 2491580
    },
    {
      "epoch": 4.077558047432952,
      "grad_norm": 0.07040904462337494,
      "learning_rate": 1.8570402535532379e-06,
      "loss": 0.0082,
      "step": 2491600
    },
    {
      "epoch": 4.077590777871605,
      "grad_norm": 0.28005966544151306,
      "learning_rate": 1.8569743613397206e-06,
      "loss": 0.0114,
      "step": 2491620
    },
    {
      "epoch": 4.077623508310259,
      "grad_norm": 0.1443072408437729,
      "learning_rate": 1.8569084691262035e-06,
      "loss": 0.0096,
      "step": 2491640
    },
    {
      "epoch": 4.0776562387489115,
      "grad_norm": 0.16838662326335907,
      "learning_rate": 1.8568425769126863e-06,
      "loss": 0.011,
      "step": 2491660
    },
    {
      "epoch": 4.077688969187565,
      "grad_norm": 0.07948702573776245,
      "learning_rate": 1.8567766846991692e-06,
      "loss": 0.0111,
      "step": 2491680
    },
    {
      "epoch": 4.077721699626219,
      "grad_norm": 0.0707160010933876,
      "learning_rate": 1.856710792485652e-06,
      "loss": 0.0088,
      "step": 2491700
    },
    {
      "epoch": 4.077754430064871,
      "grad_norm": 0.5713914036750793,
      "learning_rate": 1.8566449002721351e-06,
      "loss": 0.0083,
      "step": 2491720
    },
    {
      "epoch": 4.077787160503525,
      "grad_norm": 0.24062828719615936,
      "learning_rate": 1.8565790080586179e-06,
      "loss": 0.0087,
      "step": 2491740
    },
    {
      "epoch": 4.077819890942179,
      "grad_norm": 0.22770795226097107,
      "learning_rate": 1.8565131158451008e-06,
      "loss": 0.0123,
      "step": 2491760
    },
    {
      "epoch": 4.077852621380832,
      "grad_norm": 0.23100152611732483,
      "learning_rate": 1.8564472236315836e-06,
      "loss": 0.0072,
      "step": 2491780
    },
    {
      "epoch": 4.077885351819485,
      "grad_norm": 0.21483592689037323,
      "learning_rate": 1.8563813314180665e-06,
      "loss": 0.008,
      "step": 2491800
    },
    {
      "epoch": 4.0779180822581385,
      "grad_norm": 0.23484060168266296,
      "learning_rate": 1.8563154392045493e-06,
      "loss": 0.0104,
      "step": 2491820
    },
    {
      "epoch": 4.077950812696792,
      "grad_norm": 0.19842611253261566,
      "learning_rate": 1.8562495469910322e-06,
      "loss": 0.0096,
      "step": 2491840
    },
    {
      "epoch": 4.077983543135445,
      "grad_norm": 0.3771637976169586,
      "learning_rate": 1.856183654777515e-06,
      "loss": 0.0072,
      "step": 2491860
    },
    {
      "epoch": 4.078016273574098,
      "grad_norm": 0.8456020951271057,
      "learning_rate": 1.856117762563998e-06,
      "loss": 0.0058,
      "step": 2491880
    },
    {
      "epoch": 4.078049004012752,
      "grad_norm": 0.24922005832195282,
      "learning_rate": 1.8560518703504809e-06,
      "loss": 0.0111,
      "step": 2491900
    },
    {
      "epoch": 4.0780817344514055,
      "grad_norm": 0.21737685799598694,
      "learning_rate": 1.8559859781369638e-06,
      "loss": 0.0122,
      "step": 2491920
    },
    {
      "epoch": 4.078114464890058,
      "grad_norm": 0.10153954476118088,
      "learning_rate": 1.8559200859234465e-06,
      "loss": 0.016,
      "step": 2491940
    },
    {
      "epoch": 4.078147195328712,
      "grad_norm": 0.33288300037384033,
      "learning_rate": 1.8558541937099295e-06,
      "loss": 0.0136,
      "step": 2491960
    },
    {
      "epoch": 4.078179925767365,
      "grad_norm": 0.20876194536685944,
      "learning_rate": 1.8557883014964122e-06,
      "loss": 0.0086,
      "step": 2491980
    },
    {
      "epoch": 4.078212656206018,
      "grad_norm": 0.3183116614818573,
      "learning_rate": 1.8557224092828952e-06,
      "loss": 0.0137,
      "step": 2492000
    },
    {
      "epoch": 4.078245386644672,
      "grad_norm": 0.1308230608701706,
      "learning_rate": 1.855656517069378e-06,
      "loss": 0.0107,
      "step": 2492020
    },
    {
      "epoch": 4.078278117083325,
      "grad_norm": 0.11637655645608902,
      "learning_rate": 1.8555906248558609e-06,
      "loss": 0.0094,
      "step": 2492040
    },
    {
      "epoch": 4.078310847521979,
      "grad_norm": 0.3735467493534088,
      "learning_rate": 1.8555247326423436e-06,
      "loss": 0.0082,
      "step": 2492060
    },
    {
      "epoch": 4.078343577960632,
      "grad_norm": 0.07489790767431259,
      "learning_rate": 1.8554588404288268e-06,
      "loss": 0.0081,
      "step": 2492080
    },
    {
      "epoch": 4.078376308399285,
      "grad_norm": 0.31317535042762756,
      "learning_rate": 1.8553929482153097e-06,
      "loss": 0.011,
      "step": 2492100
    },
    {
      "epoch": 4.078409038837939,
      "grad_norm": 0.17380079627037048,
      "learning_rate": 1.8553270560017925e-06,
      "loss": 0.0106,
      "step": 2492120
    },
    {
      "epoch": 4.0784417692765915,
      "grad_norm": 0.12289588153362274,
      "learning_rate": 1.8552611637882752e-06,
      "loss": 0.0071,
      "step": 2492140
    },
    {
      "epoch": 4.078474499715245,
      "grad_norm": 0.18115365505218506,
      "learning_rate": 1.8551952715747582e-06,
      "loss": 0.0099,
      "step": 2492160
    },
    {
      "epoch": 4.078507230153899,
      "grad_norm": 0.16995753347873688,
      "learning_rate": 1.855129379361241e-06,
      "loss": 0.0069,
      "step": 2492180
    },
    {
      "epoch": 4.078539960592552,
      "grad_norm": 0.23497061431407928,
      "learning_rate": 1.8550634871477239e-06,
      "loss": 0.0117,
      "step": 2492200
    },
    {
      "epoch": 4.078572691031205,
      "grad_norm": 0.32262396812438965,
      "learning_rate": 1.8549975949342066e-06,
      "loss": 0.0103,
      "step": 2492220
    },
    {
      "epoch": 4.078605421469859,
      "grad_norm": 0.34152865409851074,
      "learning_rate": 1.8549317027206898e-06,
      "loss": 0.0106,
      "step": 2492240
    },
    {
      "epoch": 4.078638151908512,
      "grad_norm": 0.36900267004966736,
      "learning_rate": 1.8548658105071727e-06,
      "loss": 0.0074,
      "step": 2492260
    },
    {
      "epoch": 4.078670882347165,
      "grad_norm": 0.15333712100982666,
      "learning_rate": 1.8547999182936554e-06,
      "loss": 0.0072,
      "step": 2492280
    },
    {
      "epoch": 4.0787036127858185,
      "grad_norm": 0.17048586905002594,
      "learning_rate": 1.8547340260801384e-06,
      "loss": 0.0092,
      "step": 2492300
    },
    {
      "epoch": 4.078736343224472,
      "grad_norm": 0.22850605845451355,
      "learning_rate": 1.8546681338666211e-06,
      "loss": 0.0092,
      "step": 2492320
    },
    {
      "epoch": 4.078769073663125,
      "grad_norm": 0.378906786441803,
      "learning_rate": 1.8546022416531039e-06,
      "loss": 0.0121,
      "step": 2492340
    },
    {
      "epoch": 4.078801804101778,
      "grad_norm": 0.06245722621679306,
      "learning_rate": 1.8545363494395868e-06,
      "loss": 0.0118,
      "step": 2492360
    },
    {
      "epoch": 4.078834534540432,
      "grad_norm": 0.1598185896873474,
      "learning_rate": 1.8544704572260696e-06,
      "loss": 0.0082,
      "step": 2492380
    },
    {
      "epoch": 4.078867264979086,
      "grad_norm": 0.1695467084646225,
      "learning_rate": 1.8544045650125525e-06,
      "loss": 0.0105,
      "step": 2492400
    },
    {
      "epoch": 4.078899995417738,
      "grad_norm": 0.10893407464027405,
      "learning_rate": 1.8543386727990357e-06,
      "loss": 0.0073,
      "step": 2492420
    },
    {
      "epoch": 4.078932725856392,
      "grad_norm": 0.29705625772476196,
      "learning_rate": 1.8542727805855184e-06,
      "loss": 0.0084,
      "step": 2492440
    },
    {
      "epoch": 4.0789654562950455,
      "grad_norm": 0.1393154263496399,
      "learning_rate": 1.8542068883720014e-06,
      "loss": 0.0062,
      "step": 2492460
    },
    {
      "epoch": 4.078998186733698,
      "grad_norm": 0.1310977190732956,
      "learning_rate": 1.8541409961584841e-06,
      "loss": 0.0074,
      "step": 2492480
    },
    {
      "epoch": 4.079030917172352,
      "grad_norm": 0.18226009607315063,
      "learning_rate": 1.854075103944967e-06,
      "loss": 0.0109,
      "step": 2492500
    },
    {
      "epoch": 4.079063647611005,
      "grad_norm": 0.4951072037220001,
      "learning_rate": 1.8540092117314498e-06,
      "loss": 0.0112,
      "step": 2492520
    },
    {
      "epoch": 4.079096378049659,
      "grad_norm": 0.4690830707550049,
      "learning_rate": 1.8539433195179328e-06,
      "loss": 0.0118,
      "step": 2492540
    },
    {
      "epoch": 4.079129108488312,
      "grad_norm": 0.38092783093452454,
      "learning_rate": 1.8538774273044155e-06,
      "loss": 0.0117,
      "step": 2492560
    },
    {
      "epoch": 4.079161838926965,
      "grad_norm": 0.16589665412902832,
      "learning_rate": 1.8538115350908982e-06,
      "loss": 0.0065,
      "step": 2492580
    },
    {
      "epoch": 4.079194569365619,
      "grad_norm": 0.2802887558937073,
      "learning_rate": 1.8537456428773814e-06,
      "loss": 0.0096,
      "step": 2492600
    },
    {
      "epoch": 4.079227299804272,
      "grad_norm": 1.474045753479004,
      "learning_rate": 1.8536797506638644e-06,
      "loss": 0.0161,
      "step": 2492620
    },
    {
      "epoch": 4.079260030242925,
      "grad_norm": 0.25905197858810425,
      "learning_rate": 1.853613858450347e-06,
      "loss": 0.0091,
      "step": 2492640
    },
    {
      "epoch": 4.079292760681579,
      "grad_norm": 0.5196036696434021,
      "learning_rate": 1.85354796623683e-06,
      "loss": 0.007,
      "step": 2492660
    },
    {
      "epoch": 4.079325491120232,
      "grad_norm": 0.11719600856304169,
      "learning_rate": 1.8534820740233128e-06,
      "loss": 0.0123,
      "step": 2492680
    },
    {
      "epoch": 4.079358221558885,
      "grad_norm": 0.12201792001724243,
      "learning_rate": 1.8534161818097957e-06,
      "loss": 0.0096,
      "step": 2492700
    },
    {
      "epoch": 4.079390951997539,
      "grad_norm": 0.7706858515739441,
      "learning_rate": 1.8533502895962785e-06,
      "loss": 0.0125,
      "step": 2492720
    },
    {
      "epoch": 4.079423682436192,
      "grad_norm": 0.15089042484760284,
      "learning_rate": 1.8532843973827614e-06,
      "loss": 0.0076,
      "step": 2492740
    },
    {
      "epoch": 4.079456412874845,
      "grad_norm": 0.23587776720523834,
      "learning_rate": 1.8532185051692442e-06,
      "loss": 0.0076,
      "step": 2492760
    },
    {
      "epoch": 4.0794891433134985,
      "grad_norm": 0.3709139823913574,
      "learning_rate": 1.8531526129557273e-06,
      "loss": 0.0129,
      "step": 2492780
    },
    {
      "epoch": 4.079521873752152,
      "grad_norm": 0.3687956631183624,
      "learning_rate": 1.85308672074221e-06,
      "loss": 0.0066,
      "step": 2492800
    },
    {
      "epoch": 4.079554604190806,
      "grad_norm": 0.16813291609287262,
      "learning_rate": 1.853020828528693e-06,
      "loss": 0.0122,
      "step": 2492820
    },
    {
      "epoch": 4.079587334629458,
      "grad_norm": 0.15381528437137604,
      "learning_rate": 1.8529549363151758e-06,
      "loss": 0.0079,
      "step": 2492840
    },
    {
      "epoch": 4.079620065068112,
      "grad_norm": 0.09350059181451797,
      "learning_rate": 1.8528890441016587e-06,
      "loss": 0.0104,
      "step": 2492860
    },
    {
      "epoch": 4.079652795506766,
      "grad_norm": 0.19410699605941772,
      "learning_rate": 1.8528231518881415e-06,
      "loss": 0.0082,
      "step": 2492880
    },
    {
      "epoch": 4.079685525945418,
      "grad_norm": 0.2407512664794922,
      "learning_rate": 1.8527572596746244e-06,
      "loss": 0.0164,
      "step": 2492900
    },
    {
      "epoch": 4.079718256384072,
      "grad_norm": 0.14599864184856415,
      "learning_rate": 1.8526913674611071e-06,
      "loss": 0.013,
      "step": 2492920
    },
    {
      "epoch": 4.0797509868227255,
      "grad_norm": 0.16168203949928284,
      "learning_rate": 1.85262547524759e-06,
      "loss": 0.0092,
      "step": 2492940
    },
    {
      "epoch": 4.079783717261379,
      "grad_norm": 0.15974383056163788,
      "learning_rate": 1.852559583034073e-06,
      "loss": 0.0067,
      "step": 2492960
    },
    {
      "epoch": 4.079816447700032,
      "grad_norm": 0.24686923623085022,
      "learning_rate": 1.852493690820556e-06,
      "loss": 0.0087,
      "step": 2492980
    },
    {
      "epoch": 4.079849178138685,
      "grad_norm": 0.025266282260417938,
      "learning_rate": 1.8524277986070387e-06,
      "loss": 0.0125,
      "step": 2493000
    },
    {
      "epoch": 4.079881908577339,
      "grad_norm": 0.23157382011413574,
      "learning_rate": 1.8523619063935217e-06,
      "loss": 0.0087,
      "step": 2493020
    },
    {
      "epoch": 4.079914639015992,
      "grad_norm": 0.6441547870635986,
      "learning_rate": 1.8522960141800044e-06,
      "loss": 0.0145,
      "step": 2493040
    },
    {
      "epoch": 4.079947369454645,
      "grad_norm": 0.21295671164989471,
      "learning_rate": 1.8522301219664874e-06,
      "loss": 0.0099,
      "step": 2493060
    },
    {
      "epoch": 4.079980099893299,
      "grad_norm": 0.1309754103422165,
      "learning_rate": 1.8521642297529701e-06,
      "loss": 0.0107,
      "step": 2493080
    },
    {
      "epoch": 4.0800128303319525,
      "grad_norm": 0.1903403103351593,
      "learning_rate": 1.852098337539453e-06,
      "loss": 0.0096,
      "step": 2493100
    },
    {
      "epoch": 4.080045560770605,
      "grad_norm": 0.4014466404914856,
      "learning_rate": 1.852032445325936e-06,
      "loss": 0.0091,
      "step": 2493120
    },
    {
      "epoch": 4.080078291209259,
      "grad_norm": 0.06285931169986725,
      "learning_rate": 1.851966553112419e-06,
      "loss": 0.0064,
      "step": 2493140
    },
    {
      "epoch": 4.080111021647912,
      "grad_norm": 0.16441302001476288,
      "learning_rate": 1.8519006608989017e-06,
      "loss": 0.0119,
      "step": 2493160
    },
    {
      "epoch": 4.080143752086565,
      "grad_norm": 0.15312284231185913,
      "learning_rate": 1.8518347686853847e-06,
      "loss": 0.009,
      "step": 2493180
    },
    {
      "epoch": 4.080176482525219,
      "grad_norm": 0.08420879393815994,
      "learning_rate": 1.8517688764718674e-06,
      "loss": 0.0088,
      "step": 2493200
    },
    {
      "epoch": 4.080209212963872,
      "grad_norm": 0.4518014192581177,
      "learning_rate": 1.8517029842583504e-06,
      "loss": 0.0067,
      "step": 2493220
    },
    {
      "epoch": 4.080241943402526,
      "grad_norm": 0.0760684534907341,
      "learning_rate": 1.851637092044833e-06,
      "loss": 0.0069,
      "step": 2493240
    },
    {
      "epoch": 4.080274673841179,
      "grad_norm": 0.06203579902648926,
      "learning_rate": 1.851571199831316e-06,
      "loss": 0.006,
      "step": 2493260
    },
    {
      "epoch": 4.080307404279832,
      "grad_norm": 0.49166297912597656,
      "learning_rate": 1.8515053076177988e-06,
      "loss": 0.0103,
      "step": 2493280
    },
    {
      "epoch": 4.080340134718486,
      "grad_norm": 0.3861933648586273,
      "learning_rate": 1.851439415404282e-06,
      "loss": 0.0072,
      "step": 2493300
    },
    {
      "epoch": 4.0803728651571385,
      "grad_norm": 0.07029673457145691,
      "learning_rate": 1.851373523190765e-06,
      "loss": 0.0071,
      "step": 2493320
    },
    {
      "epoch": 4.080405595595792,
      "grad_norm": 0.6414455771446228,
      "learning_rate": 1.8513076309772476e-06,
      "loss": 0.0116,
      "step": 2493340
    },
    {
      "epoch": 4.080438326034446,
      "grad_norm": 0.39640915393829346,
      "learning_rate": 1.8512417387637304e-06,
      "loss": 0.0087,
      "step": 2493360
    },
    {
      "epoch": 4.080471056473099,
      "grad_norm": 0.2509637773036957,
      "learning_rate": 1.8511758465502133e-06,
      "loss": 0.0075,
      "step": 2493380
    },
    {
      "epoch": 4.080503786911752,
      "grad_norm": 0.16269013285636902,
      "learning_rate": 1.851109954336696e-06,
      "loss": 0.006,
      "step": 2493400
    },
    {
      "epoch": 4.080536517350406,
      "grad_norm": 0.0838596299290657,
      "learning_rate": 1.851044062123179e-06,
      "loss": 0.0088,
      "step": 2493420
    },
    {
      "epoch": 4.080569247789059,
      "grad_norm": 0.38817834854125977,
      "learning_rate": 1.8509781699096618e-06,
      "loss": 0.0058,
      "step": 2493440
    },
    {
      "epoch": 4.080601978227712,
      "grad_norm": 0.09515578299760818,
      "learning_rate": 1.8509122776961447e-06,
      "loss": 0.0053,
      "step": 2493460
    },
    {
      "epoch": 4.0806347086663655,
      "grad_norm": 0.2828221917152405,
      "learning_rate": 1.8508463854826279e-06,
      "loss": 0.0087,
      "step": 2493480
    },
    {
      "epoch": 4.080667439105019,
      "grad_norm": 0.19135171175003052,
      "learning_rate": 1.8507804932691106e-06,
      "loss": 0.01,
      "step": 2493500
    },
    {
      "epoch": 4.080700169543673,
      "grad_norm": 0.23541854321956635,
      "learning_rate": 1.8507146010555936e-06,
      "loss": 0.0084,
      "step": 2493520
    },
    {
      "epoch": 4.080732899982325,
      "grad_norm": 0.2657884657382965,
      "learning_rate": 1.8506487088420763e-06,
      "loss": 0.0117,
      "step": 2493540
    },
    {
      "epoch": 4.080765630420979,
      "grad_norm": 0.16285671293735504,
      "learning_rate": 1.850582816628559e-06,
      "loss": 0.0072,
      "step": 2493560
    },
    {
      "epoch": 4.0807983608596325,
      "grad_norm": 0.4360610544681549,
      "learning_rate": 1.850516924415042e-06,
      "loss": 0.0106,
      "step": 2493580
    },
    {
      "epoch": 4.080831091298285,
      "grad_norm": 0.3281109631061554,
      "learning_rate": 1.8504510322015247e-06,
      "loss": 0.0058,
      "step": 2493600
    },
    {
      "epoch": 4.080863821736939,
      "grad_norm": 0.06602799892425537,
      "learning_rate": 1.8503851399880077e-06,
      "loss": 0.007,
      "step": 2493620
    },
    {
      "epoch": 4.080896552175592,
      "grad_norm": 0.05909908190369606,
      "learning_rate": 1.8503192477744904e-06,
      "loss": 0.007,
      "step": 2493640
    },
    {
      "epoch": 4.080929282614246,
      "grad_norm": 0.33057957887649536,
      "learning_rate": 1.8502533555609736e-06,
      "loss": 0.008,
      "step": 2493660
    },
    {
      "epoch": 4.080962013052899,
      "grad_norm": 0.15940561890602112,
      "learning_rate": 1.8501874633474565e-06,
      "loss": 0.0129,
      "step": 2493680
    },
    {
      "epoch": 4.080994743491552,
      "grad_norm": 0.12440091371536255,
      "learning_rate": 1.8501215711339393e-06,
      "loss": 0.0068,
      "step": 2493700
    },
    {
      "epoch": 4.081027473930206,
      "grad_norm": 0.19808775186538696,
      "learning_rate": 1.8500556789204222e-06,
      "loss": 0.0093,
      "step": 2493720
    },
    {
      "epoch": 4.081060204368859,
      "grad_norm": 0.10607779026031494,
      "learning_rate": 1.849989786706905e-06,
      "loss": 0.0058,
      "step": 2493740
    },
    {
      "epoch": 4.081092934807512,
      "grad_norm": 0.22754034399986267,
      "learning_rate": 1.849923894493388e-06,
      "loss": 0.0079,
      "step": 2493760
    },
    {
      "epoch": 4.081125665246166,
      "grad_norm": 0.1508316695690155,
      "learning_rate": 1.8498580022798707e-06,
      "loss": 0.0062,
      "step": 2493780
    },
    {
      "epoch": 4.0811583956848185,
      "grad_norm": 0.12889550626277924,
      "learning_rate": 1.8497921100663534e-06,
      "loss": 0.0151,
      "step": 2493800
    },
    {
      "epoch": 4.081191126123472,
      "grad_norm": 0.12077049911022186,
      "learning_rate": 1.8497262178528364e-06,
      "loss": 0.011,
      "step": 2493820
    },
    {
      "epoch": 4.081223856562126,
      "grad_norm": 0.11465790867805481,
      "learning_rate": 1.8496603256393195e-06,
      "loss": 0.0099,
      "step": 2493840
    },
    {
      "epoch": 4.081256587000779,
      "grad_norm": 0.2243850976228714,
      "learning_rate": 1.8495944334258023e-06,
      "loss": 0.0095,
      "step": 2493860
    },
    {
      "epoch": 4.081289317439432,
      "grad_norm": 0.12388405203819275,
      "learning_rate": 1.8495285412122852e-06,
      "loss": 0.0096,
      "step": 2493880
    },
    {
      "epoch": 4.081322047878086,
      "grad_norm": 0.22325141727924347,
      "learning_rate": 1.849462648998768e-06,
      "loss": 0.0087,
      "step": 2493900
    },
    {
      "epoch": 4.081354778316739,
      "grad_norm": 0.2101222723722458,
      "learning_rate": 1.849396756785251e-06,
      "loss": 0.0092,
      "step": 2493920
    },
    {
      "epoch": 4.081387508755392,
      "grad_norm": 0.21295888721942902,
      "learning_rate": 1.8493308645717336e-06,
      "loss": 0.0092,
      "step": 2493940
    },
    {
      "epoch": 4.0814202391940455,
      "grad_norm": 0.27562567591667175,
      "learning_rate": 1.8492649723582166e-06,
      "loss": 0.0109,
      "step": 2493960
    },
    {
      "epoch": 4.081452969632699,
      "grad_norm": 0.15724444389343262,
      "learning_rate": 1.8491990801446993e-06,
      "loss": 0.0053,
      "step": 2493980
    },
    {
      "epoch": 4.081485700071353,
      "grad_norm": 0.1338244080543518,
      "learning_rate": 1.8491331879311825e-06,
      "loss": 0.0123,
      "step": 2494000
    },
    {
      "epoch": 4.081518430510005,
      "grad_norm": 0.2683809995651245,
      "learning_rate": 1.8490672957176652e-06,
      "loss": 0.0111,
      "step": 2494020
    },
    {
      "epoch": 4.081551160948659,
      "grad_norm": 0.2001372128725052,
      "learning_rate": 1.8490014035041482e-06,
      "loss": 0.0124,
      "step": 2494040
    },
    {
      "epoch": 4.081583891387313,
      "grad_norm": 0.631260335445404,
      "learning_rate": 1.848935511290631e-06,
      "loss": 0.0109,
      "step": 2494060
    },
    {
      "epoch": 4.081616621825965,
      "grad_norm": 0.2056966871023178,
      "learning_rate": 1.8488696190771139e-06,
      "loss": 0.0115,
      "step": 2494080
    },
    {
      "epoch": 4.081649352264619,
      "grad_norm": 0.19929209351539612,
      "learning_rate": 1.8488037268635966e-06,
      "loss": 0.0114,
      "step": 2494100
    },
    {
      "epoch": 4.0816820827032725,
      "grad_norm": 0.4207361936569214,
      "learning_rate": 1.8487378346500796e-06,
      "loss": 0.014,
      "step": 2494120
    },
    {
      "epoch": 4.081714813141926,
      "grad_norm": 0.1676119565963745,
      "learning_rate": 1.8486719424365623e-06,
      "loss": 0.0096,
      "step": 2494140
    },
    {
      "epoch": 4.081747543580579,
      "grad_norm": 0.27701953053474426,
      "learning_rate": 1.8486060502230453e-06,
      "loss": 0.0083,
      "step": 2494160
    },
    {
      "epoch": 4.081780274019232,
      "grad_norm": 0.3341085612773895,
      "learning_rate": 1.8485401580095282e-06,
      "loss": 0.0109,
      "step": 2494180
    },
    {
      "epoch": 4.081813004457886,
      "grad_norm": 0.1045583188533783,
      "learning_rate": 1.8484742657960112e-06,
      "loss": 0.0059,
      "step": 2494200
    },
    {
      "epoch": 4.081845734896539,
      "grad_norm": 0.1401987373828888,
      "learning_rate": 1.848408373582494e-06,
      "loss": 0.0086,
      "step": 2494220
    },
    {
      "epoch": 4.081878465335192,
      "grad_norm": 0.34291139245033264,
      "learning_rate": 1.8483424813689769e-06,
      "loss": 0.0124,
      "step": 2494240
    },
    {
      "epoch": 4.081911195773846,
      "grad_norm": 0.039496660232543945,
      "learning_rate": 1.8482765891554596e-06,
      "loss": 0.0076,
      "step": 2494260
    },
    {
      "epoch": 4.0819439262124995,
      "grad_norm": 0.2485351264476776,
      "learning_rate": 1.8482106969419426e-06,
      "loss": 0.0074,
      "step": 2494280
    },
    {
      "epoch": 4.081976656651152,
      "grad_norm": 0.09233039617538452,
      "learning_rate": 1.8481448047284253e-06,
      "loss": 0.0087,
      "step": 2494300
    },
    {
      "epoch": 4.082009387089806,
      "grad_norm": 0.3950823247432709,
      "learning_rate": 1.8480789125149082e-06,
      "loss": 0.0075,
      "step": 2494320
    },
    {
      "epoch": 4.082042117528459,
      "grad_norm": 0.39063823223114014,
      "learning_rate": 1.848013020301391e-06,
      "loss": 0.0073,
      "step": 2494340
    },
    {
      "epoch": 4.082074847967112,
      "grad_norm": 0.1791609674692154,
      "learning_rate": 1.8479471280878741e-06,
      "loss": 0.0122,
      "step": 2494360
    },
    {
      "epoch": 4.082107578405766,
      "grad_norm": 0.12853741645812988,
      "learning_rate": 1.8478812358743569e-06,
      "loss": 0.0073,
      "step": 2494380
    },
    {
      "epoch": 4.082140308844419,
      "grad_norm": 0.27103519439697266,
      "learning_rate": 1.8478153436608398e-06,
      "loss": 0.0106,
      "step": 2494400
    },
    {
      "epoch": 4.082173039283073,
      "grad_norm": 0.11616657674312592,
      "learning_rate": 1.8477494514473226e-06,
      "loss": 0.0075,
      "step": 2494420
    },
    {
      "epoch": 4.0822057697217256,
      "grad_norm": 0.06085439398884773,
      "learning_rate": 1.8476835592338055e-06,
      "loss": 0.0078,
      "step": 2494440
    },
    {
      "epoch": 4.082238500160379,
      "grad_norm": 0.18281644582748413,
      "learning_rate": 1.8476176670202883e-06,
      "loss": 0.0109,
      "step": 2494460
    },
    {
      "epoch": 4.082271230599033,
      "grad_norm": 0.0816541537642479,
      "learning_rate": 1.8475517748067712e-06,
      "loss": 0.0085,
      "step": 2494480
    },
    {
      "epoch": 4.0823039610376854,
      "grad_norm": 0.3030765950679779,
      "learning_rate": 1.847485882593254e-06,
      "loss": 0.0098,
      "step": 2494500
    },
    {
      "epoch": 4.082336691476339,
      "grad_norm": 0.32071399688720703,
      "learning_rate": 1.847419990379737e-06,
      "loss": 0.0082,
      "step": 2494520
    },
    {
      "epoch": 4.082369421914993,
      "grad_norm": 0.34662437438964844,
      "learning_rate": 1.8473540981662199e-06,
      "loss": 0.0109,
      "step": 2494540
    },
    {
      "epoch": 4.082402152353646,
      "grad_norm": 0.17983724176883698,
      "learning_rate": 1.8472882059527028e-06,
      "loss": 0.0155,
      "step": 2494560
    },
    {
      "epoch": 4.082434882792299,
      "grad_norm": 0.13143444061279297,
      "learning_rate": 1.8472223137391856e-06,
      "loss": 0.0106,
      "step": 2494580
    },
    {
      "epoch": 4.0824676132309525,
      "grad_norm": 0.46121877431869507,
      "learning_rate": 1.8471564215256685e-06,
      "loss": 0.0158,
      "step": 2494600
    },
    {
      "epoch": 4.082500343669606,
      "grad_norm": 0.3238814175128937,
      "learning_rate": 1.8470905293121512e-06,
      "loss": 0.0126,
      "step": 2494620
    },
    {
      "epoch": 4.082533074108259,
      "grad_norm": 0.24989522993564606,
      "learning_rate": 1.8470246370986342e-06,
      "loss": 0.0085,
      "step": 2494640
    },
    {
      "epoch": 4.082565804546912,
      "grad_norm": 0.24238748848438263,
      "learning_rate": 1.846958744885117e-06,
      "loss": 0.01,
      "step": 2494660
    },
    {
      "epoch": 4.082598534985566,
      "grad_norm": 0.0759459063410759,
      "learning_rate": 1.8468928526715999e-06,
      "loss": 0.0103,
      "step": 2494680
    },
    {
      "epoch": 4.08263126542422,
      "grad_norm": 0.34490737318992615,
      "learning_rate": 1.8468269604580826e-06,
      "loss": 0.0073,
      "step": 2494700
    },
    {
      "epoch": 4.082663995862872,
      "grad_norm": 0.3858252465724945,
      "learning_rate": 1.8467610682445658e-06,
      "loss": 0.0062,
      "step": 2494720
    },
    {
      "epoch": 4.082696726301526,
      "grad_norm": 0.47837215662002563,
      "learning_rate": 1.8466951760310487e-06,
      "loss": 0.0117,
      "step": 2494740
    },
    {
      "epoch": 4.0827294567401795,
      "grad_norm": 0.08358445763587952,
      "learning_rate": 1.8466292838175315e-06,
      "loss": 0.0112,
      "step": 2494760
    },
    {
      "epoch": 4.082762187178832,
      "grad_norm": 0.1900259405374527,
      "learning_rate": 1.8465633916040142e-06,
      "loss": 0.011,
      "step": 2494780
    },
    {
      "epoch": 4.082794917617486,
      "grad_norm": 0.3195764124393463,
      "learning_rate": 1.8464974993904972e-06,
      "loss": 0.0111,
      "step": 2494800
    },
    {
      "epoch": 4.082827648056139,
      "grad_norm": 0.2806587517261505,
      "learning_rate": 1.84643160717698e-06,
      "loss": 0.0089,
      "step": 2494820
    },
    {
      "epoch": 4.082860378494793,
      "grad_norm": 0.2094869315624237,
      "learning_rate": 1.8463657149634629e-06,
      "loss": 0.0078,
      "step": 2494840
    },
    {
      "epoch": 4.082893108933446,
      "grad_norm": 0.166129931807518,
      "learning_rate": 1.8462998227499456e-06,
      "loss": 0.0071,
      "step": 2494860
    },
    {
      "epoch": 4.082925839372099,
      "grad_norm": 0.14249873161315918,
      "learning_rate": 1.8462339305364288e-06,
      "loss": 0.0091,
      "step": 2494880
    },
    {
      "epoch": 4.082958569810753,
      "grad_norm": 0.3190220594406128,
      "learning_rate": 1.8461680383229117e-06,
      "loss": 0.0082,
      "step": 2494900
    },
    {
      "epoch": 4.082991300249406,
      "grad_norm": 0.4919571578502655,
      "learning_rate": 1.8461021461093945e-06,
      "loss": 0.0102,
      "step": 2494920
    },
    {
      "epoch": 4.083024030688059,
      "grad_norm": 0.18917183578014374,
      "learning_rate": 1.8460362538958774e-06,
      "loss": 0.0091,
      "step": 2494940
    },
    {
      "epoch": 4.083056761126713,
      "grad_norm": 0.13551905751228333,
      "learning_rate": 1.8459703616823602e-06,
      "loss": 0.0076,
      "step": 2494960
    },
    {
      "epoch": 4.083089491565366,
      "grad_norm": 0.4255533516407013,
      "learning_rate": 1.8459044694688429e-06,
      "loss": 0.0084,
      "step": 2494980
    },
    {
      "epoch": 4.083122222004019,
      "grad_norm": 0.19315814971923828,
      "learning_rate": 1.8458385772553258e-06,
      "loss": 0.0115,
      "step": 2495000
    },
    {
      "epoch": 4.083154952442673,
      "grad_norm": 0.4756406545639038,
      "learning_rate": 1.8457726850418086e-06,
      "loss": 0.0151,
      "step": 2495020
    },
    {
      "epoch": 4.083187682881326,
      "grad_norm": 0.5372658371925354,
      "learning_rate": 1.8457067928282915e-06,
      "loss": 0.0089,
      "step": 2495040
    },
    {
      "epoch": 4.083220413319979,
      "grad_norm": 0.30903124809265137,
      "learning_rate": 1.8456409006147747e-06,
      "loss": 0.0125,
      "step": 2495060
    },
    {
      "epoch": 4.083253143758633,
      "grad_norm": 0.04385483264923096,
      "learning_rate": 1.8455750084012574e-06,
      "loss": 0.0079,
      "step": 2495080
    },
    {
      "epoch": 4.083285874197286,
      "grad_norm": 0.12306272983551025,
      "learning_rate": 1.8455091161877404e-06,
      "loss": 0.0083,
      "step": 2495100
    },
    {
      "epoch": 4.08331860463594,
      "grad_norm": 0.21940261125564575,
      "learning_rate": 1.8454432239742231e-06,
      "loss": 0.0102,
      "step": 2495120
    },
    {
      "epoch": 4.0833513350745925,
      "grad_norm": 0.13864131271839142,
      "learning_rate": 1.845377331760706e-06,
      "loss": 0.0066,
      "step": 2495140
    },
    {
      "epoch": 4.083384065513246,
      "grad_norm": 0.14948029816150665,
      "learning_rate": 1.8453114395471888e-06,
      "loss": 0.01,
      "step": 2495160
    },
    {
      "epoch": 4.0834167959519,
      "grad_norm": 0.47654643654823303,
      "learning_rate": 1.8452455473336718e-06,
      "loss": 0.0071,
      "step": 2495180
    },
    {
      "epoch": 4.083449526390552,
      "grad_norm": 0.20579765737056732,
      "learning_rate": 1.8451796551201545e-06,
      "loss": 0.0109,
      "step": 2495200
    },
    {
      "epoch": 4.083482256829206,
      "grad_norm": 0.13497348129749298,
      "learning_rate": 1.8451137629066373e-06,
      "loss": 0.0159,
      "step": 2495220
    },
    {
      "epoch": 4.0835149872678596,
      "grad_norm": 0.0973416343331337,
      "learning_rate": 1.8450478706931204e-06,
      "loss": 0.0084,
      "step": 2495240
    },
    {
      "epoch": 4.083547717706512,
      "grad_norm": 0.13990165293216705,
      "learning_rate": 1.8449819784796034e-06,
      "loss": 0.0147,
      "step": 2495260
    },
    {
      "epoch": 4.083580448145166,
      "grad_norm": 0.4502643644809723,
      "learning_rate": 1.844916086266086e-06,
      "loss": 0.0109,
      "step": 2495280
    },
    {
      "epoch": 4.0836131785838194,
      "grad_norm": 0.6444922685623169,
      "learning_rate": 1.844850194052569e-06,
      "loss": 0.0109,
      "step": 2495300
    },
    {
      "epoch": 4.083645909022473,
      "grad_norm": 0.13111987709999084,
      "learning_rate": 1.8447843018390518e-06,
      "loss": 0.0102,
      "step": 2495320
    },
    {
      "epoch": 4.083678639461126,
      "grad_norm": 0.17524607479572296,
      "learning_rate": 1.8447184096255347e-06,
      "loss": 0.0092,
      "step": 2495340
    },
    {
      "epoch": 4.083711369899779,
      "grad_norm": 0.08083526045084,
      "learning_rate": 1.8446525174120175e-06,
      "loss": 0.0081,
      "step": 2495360
    },
    {
      "epoch": 4.083744100338433,
      "grad_norm": 0.11939606070518494,
      "learning_rate": 1.8445866251985004e-06,
      "loss": 0.0091,
      "step": 2495380
    },
    {
      "epoch": 4.083776830777086,
      "grad_norm": 0.21112290024757385,
      "learning_rate": 1.8445207329849832e-06,
      "loss": 0.0068,
      "step": 2495400
    },
    {
      "epoch": 4.083809561215739,
      "grad_norm": 0.22385655343532562,
      "learning_rate": 1.8444548407714663e-06,
      "loss": 0.0098,
      "step": 2495420
    },
    {
      "epoch": 4.083842291654393,
      "grad_norm": 0.3217537999153137,
      "learning_rate": 1.844388948557949e-06,
      "loss": 0.0099,
      "step": 2495440
    },
    {
      "epoch": 4.083875022093046,
      "grad_norm": 0.17956514656543732,
      "learning_rate": 1.844323056344432e-06,
      "loss": 0.0068,
      "step": 2495460
    },
    {
      "epoch": 4.083907752531699,
      "grad_norm": 0.17402082681655884,
      "learning_rate": 1.8442571641309148e-06,
      "loss": 0.0084,
      "step": 2495480
    },
    {
      "epoch": 4.083940482970353,
      "grad_norm": 0.19698315858840942,
      "learning_rate": 1.8441912719173977e-06,
      "loss": 0.0066,
      "step": 2495500
    },
    {
      "epoch": 4.083973213409006,
      "grad_norm": 0.3965725898742676,
      "learning_rate": 1.8441253797038805e-06,
      "loss": 0.0097,
      "step": 2495520
    },
    {
      "epoch": 4.084005943847659,
      "grad_norm": 0.17783023416996002,
      "learning_rate": 1.8440594874903634e-06,
      "loss": 0.0082,
      "step": 2495540
    },
    {
      "epoch": 4.084038674286313,
      "grad_norm": 0.10854313522577286,
      "learning_rate": 1.8439935952768462e-06,
      "loss": 0.0059,
      "step": 2495560
    },
    {
      "epoch": 4.084071404724966,
      "grad_norm": 0.3874731659889221,
      "learning_rate": 1.8439277030633291e-06,
      "loss": 0.0087,
      "step": 2495580
    },
    {
      "epoch": 4.08410413516362,
      "grad_norm": 0.09688615053892136,
      "learning_rate": 1.843861810849812e-06,
      "loss": 0.0089,
      "step": 2495600
    },
    {
      "epoch": 4.0841368656022725,
      "grad_norm": 0.3730261027812958,
      "learning_rate": 1.843795918636295e-06,
      "loss": 0.0076,
      "step": 2495620
    },
    {
      "epoch": 4.084169596040926,
      "grad_norm": 0.1643485277891159,
      "learning_rate": 1.8437300264227777e-06,
      "loss": 0.0065,
      "step": 2495640
    },
    {
      "epoch": 4.08420232647958,
      "grad_norm": 0.48575830459594727,
      "learning_rate": 1.8436641342092607e-06,
      "loss": 0.0095,
      "step": 2495660
    },
    {
      "epoch": 4.084235056918232,
      "grad_norm": 0.1972721964120865,
      "learning_rate": 1.8435982419957434e-06,
      "loss": 0.0106,
      "step": 2495680
    },
    {
      "epoch": 4.084267787356886,
      "grad_norm": 0.1231764480471611,
      "learning_rate": 1.8435323497822264e-06,
      "loss": 0.0103,
      "step": 2495700
    },
    {
      "epoch": 4.08430051779554,
      "grad_norm": 0.48461484909057617,
      "learning_rate": 1.8434664575687091e-06,
      "loss": 0.0091,
      "step": 2495720
    },
    {
      "epoch": 4.084333248234193,
      "grad_norm": 0.4168930947780609,
      "learning_rate": 1.843400565355192e-06,
      "loss": 0.0115,
      "step": 2495740
    },
    {
      "epoch": 4.084365978672846,
      "grad_norm": 0.5220869779586792,
      "learning_rate": 1.843334673141675e-06,
      "loss": 0.0123,
      "step": 2495760
    },
    {
      "epoch": 4.0843987091114995,
      "grad_norm": 0.07775428891181946,
      "learning_rate": 1.843268780928158e-06,
      "loss": 0.0077,
      "step": 2495780
    },
    {
      "epoch": 4.084431439550153,
      "grad_norm": 0.07267403602600098,
      "learning_rate": 1.8432028887146407e-06,
      "loss": 0.0062,
      "step": 2495800
    },
    {
      "epoch": 4.084464169988806,
      "grad_norm": 0.38465553522109985,
      "learning_rate": 1.8431369965011237e-06,
      "loss": 0.0067,
      "step": 2495820
    },
    {
      "epoch": 4.084496900427459,
      "grad_norm": 0.15973900258541107,
      "learning_rate": 1.8430711042876064e-06,
      "loss": 0.0096,
      "step": 2495840
    },
    {
      "epoch": 4.084529630866113,
      "grad_norm": 0.2746429741382599,
      "learning_rate": 1.8430052120740894e-06,
      "loss": 0.0127,
      "step": 2495860
    },
    {
      "epoch": 4.084562361304767,
      "grad_norm": 0.267494261264801,
      "learning_rate": 1.8429393198605721e-06,
      "loss": 0.0103,
      "step": 2495880
    },
    {
      "epoch": 4.084595091743419,
      "grad_norm": 0.3314119875431061,
      "learning_rate": 1.842873427647055e-06,
      "loss": 0.0125,
      "step": 2495900
    },
    {
      "epoch": 4.084627822182073,
      "grad_norm": 0.2813606858253479,
      "learning_rate": 1.8428075354335378e-06,
      "loss": 0.008,
      "step": 2495920
    },
    {
      "epoch": 4.0846605526207265,
      "grad_norm": 0.16465328633785248,
      "learning_rate": 1.842741643220021e-06,
      "loss": 0.0078,
      "step": 2495940
    },
    {
      "epoch": 4.084693283059379,
      "grad_norm": 0.1804123818874359,
      "learning_rate": 1.842675751006504e-06,
      "loss": 0.0105,
      "step": 2495960
    },
    {
      "epoch": 4.084726013498033,
      "grad_norm": 0.11743806302547455,
      "learning_rate": 1.8426098587929867e-06,
      "loss": 0.0067,
      "step": 2495980
    },
    {
      "epoch": 4.084758743936686,
      "grad_norm": 0.3002709448337555,
      "learning_rate": 1.8425439665794694e-06,
      "loss": 0.0091,
      "step": 2496000
    },
    {
      "epoch": 4.08479147437534,
      "grad_norm": 0.3513071835041046,
      "learning_rate": 1.8424780743659523e-06,
      "loss": 0.0104,
      "step": 2496020
    },
    {
      "epoch": 4.084824204813993,
      "grad_norm": 0.17234352231025696,
      "learning_rate": 1.842412182152435e-06,
      "loss": 0.0164,
      "step": 2496040
    },
    {
      "epoch": 4.084856935252646,
      "grad_norm": 0.2359764724969864,
      "learning_rate": 1.842346289938918e-06,
      "loss": 0.0096,
      "step": 2496060
    },
    {
      "epoch": 4.0848896656913,
      "grad_norm": 0.18544341623783112,
      "learning_rate": 1.8422803977254008e-06,
      "loss": 0.0096,
      "step": 2496080
    },
    {
      "epoch": 4.084922396129953,
      "grad_norm": 0.18108804523944855,
      "learning_rate": 1.8422145055118837e-06,
      "loss": 0.0079,
      "step": 2496100
    },
    {
      "epoch": 4.084955126568606,
      "grad_norm": 0.2616637945175171,
      "learning_rate": 1.8421486132983669e-06,
      "loss": 0.0095,
      "step": 2496120
    },
    {
      "epoch": 4.08498785700726,
      "grad_norm": 0.1190904825925827,
      "learning_rate": 1.8420827210848496e-06,
      "loss": 0.0113,
      "step": 2496140
    },
    {
      "epoch": 4.085020587445913,
      "grad_norm": 0.16719889640808105,
      "learning_rate": 1.8420168288713326e-06,
      "loss": 0.0063,
      "step": 2496160
    },
    {
      "epoch": 4.085053317884566,
      "grad_norm": 0.47306960821151733,
      "learning_rate": 1.8419509366578153e-06,
      "loss": 0.0094,
      "step": 2496180
    },
    {
      "epoch": 4.08508604832322,
      "grad_norm": 0.2744508385658264,
      "learning_rate": 1.841885044444298e-06,
      "loss": 0.0063,
      "step": 2496200
    },
    {
      "epoch": 4.085118778761873,
      "grad_norm": 0.4131731390953064,
      "learning_rate": 1.841819152230781e-06,
      "loss": 0.0132,
      "step": 2496220
    },
    {
      "epoch": 4.085151509200526,
      "grad_norm": 0.12757807970046997,
      "learning_rate": 1.8417532600172638e-06,
      "loss": 0.0056,
      "step": 2496240
    },
    {
      "epoch": 4.0851842396391795,
      "grad_norm": 0.4254103899002075,
      "learning_rate": 1.8416873678037467e-06,
      "loss": 0.0098,
      "step": 2496260
    },
    {
      "epoch": 4.085216970077833,
      "grad_norm": 0.21238835155963898,
      "learning_rate": 1.8416214755902294e-06,
      "loss": 0.0097,
      "step": 2496280
    },
    {
      "epoch": 4.085249700516487,
      "grad_norm": 0.6319851875305176,
      "learning_rate": 1.8415555833767126e-06,
      "loss": 0.0096,
      "step": 2496300
    },
    {
      "epoch": 4.085282430955139,
      "grad_norm": 0.4373190104961395,
      "learning_rate": 1.8414896911631956e-06,
      "loss": 0.0131,
      "step": 2496320
    },
    {
      "epoch": 4.085315161393793,
      "grad_norm": 0.16001011431217194,
      "learning_rate": 1.8414237989496783e-06,
      "loss": 0.0064,
      "step": 2496340
    },
    {
      "epoch": 4.085347891832447,
      "grad_norm": 0.13525548577308655,
      "learning_rate": 1.8413579067361613e-06,
      "loss": 0.0112,
      "step": 2496360
    },
    {
      "epoch": 4.085380622271099,
      "grad_norm": 0.05575569346547127,
      "learning_rate": 1.841292014522644e-06,
      "loss": 0.0087,
      "step": 2496380
    },
    {
      "epoch": 4.085413352709753,
      "grad_norm": 0.1261434406042099,
      "learning_rate": 1.841226122309127e-06,
      "loss": 0.0101,
      "step": 2496400
    },
    {
      "epoch": 4.0854460831484065,
      "grad_norm": 0.4332773685455322,
      "learning_rate": 1.8411602300956097e-06,
      "loss": 0.0093,
      "step": 2496420
    },
    {
      "epoch": 4.08547881358706,
      "grad_norm": 0.23209570348262787,
      "learning_rate": 1.8410943378820924e-06,
      "loss": 0.0059,
      "step": 2496440
    },
    {
      "epoch": 4.085511544025713,
      "grad_norm": 0.19071662425994873,
      "learning_rate": 1.8410284456685754e-06,
      "loss": 0.0082,
      "step": 2496460
    },
    {
      "epoch": 4.085544274464366,
      "grad_norm": 0.24346671998500824,
      "learning_rate": 1.8409625534550585e-06,
      "loss": 0.0091,
      "step": 2496480
    },
    {
      "epoch": 4.08557700490302,
      "grad_norm": 0.12645237147808075,
      "learning_rate": 1.8408966612415413e-06,
      "loss": 0.0072,
      "step": 2496500
    },
    {
      "epoch": 4.085609735341673,
      "grad_norm": 0.21147416532039642,
      "learning_rate": 1.8408307690280242e-06,
      "loss": 0.012,
      "step": 2496520
    },
    {
      "epoch": 4.085642465780326,
      "grad_norm": 0.29055899381637573,
      "learning_rate": 1.840764876814507e-06,
      "loss": 0.0103,
      "step": 2496540
    },
    {
      "epoch": 4.08567519621898,
      "grad_norm": 0.17934606969356537,
      "learning_rate": 1.84069898460099e-06,
      "loss": 0.0103,
      "step": 2496560
    },
    {
      "epoch": 4.0857079266576335,
      "grad_norm": 0.12405422329902649,
      "learning_rate": 1.8406330923874727e-06,
      "loss": 0.0111,
      "step": 2496580
    },
    {
      "epoch": 4.085740657096286,
      "grad_norm": 0.3523593544960022,
      "learning_rate": 1.8405672001739556e-06,
      "loss": 0.0128,
      "step": 2496600
    },
    {
      "epoch": 4.08577338753494,
      "grad_norm": 0.19885578751564026,
      "learning_rate": 1.8405013079604384e-06,
      "loss": 0.0149,
      "step": 2496620
    },
    {
      "epoch": 4.085806117973593,
      "grad_norm": 0.47154882550239563,
      "learning_rate": 1.8404354157469215e-06,
      "loss": 0.0061,
      "step": 2496640
    },
    {
      "epoch": 4.085838848412246,
      "grad_norm": 0.1877845823764801,
      "learning_rate": 1.8403695235334043e-06,
      "loss": 0.0108,
      "step": 2496660
    },
    {
      "epoch": 4.0858715788509,
      "grad_norm": 0.48495250940322876,
      "learning_rate": 1.8403036313198872e-06,
      "loss": 0.0064,
      "step": 2496680
    },
    {
      "epoch": 4.085904309289553,
      "grad_norm": 0.271994948387146,
      "learning_rate": 1.84023773910637e-06,
      "loss": 0.0078,
      "step": 2496700
    },
    {
      "epoch": 4.085937039728206,
      "grad_norm": 0.5358341932296753,
      "learning_rate": 1.840171846892853e-06,
      "loss": 0.0073,
      "step": 2496720
    },
    {
      "epoch": 4.08596977016686,
      "grad_norm": 0.15113717317581177,
      "learning_rate": 1.8401059546793356e-06,
      "loss": 0.01,
      "step": 2496740
    },
    {
      "epoch": 4.086002500605513,
      "grad_norm": 0.07712294161319733,
      "learning_rate": 1.8400400624658186e-06,
      "loss": 0.0074,
      "step": 2496760
    },
    {
      "epoch": 4.086035231044167,
      "grad_norm": 0.7907889485359192,
      "learning_rate": 1.8399741702523013e-06,
      "loss": 0.0118,
      "step": 2496780
    },
    {
      "epoch": 4.0860679614828195,
      "grad_norm": 0.12087923288345337,
      "learning_rate": 1.8399082780387843e-06,
      "loss": 0.0087,
      "step": 2496800
    },
    {
      "epoch": 4.086100691921473,
      "grad_norm": 0.34722334146499634,
      "learning_rate": 1.8398423858252672e-06,
      "loss": 0.0142,
      "step": 2496820
    },
    {
      "epoch": 4.086133422360127,
      "grad_norm": 0.22519543766975403,
      "learning_rate": 1.8397764936117502e-06,
      "loss": 0.0062,
      "step": 2496840
    },
    {
      "epoch": 4.08616615279878,
      "grad_norm": 0.18560917675495148,
      "learning_rate": 1.839710601398233e-06,
      "loss": 0.0084,
      "step": 2496860
    },
    {
      "epoch": 4.086198883237433,
      "grad_norm": 0.25118085741996765,
      "learning_rate": 1.8396447091847159e-06,
      "loss": 0.0087,
      "step": 2496880
    },
    {
      "epoch": 4.086231613676087,
      "grad_norm": 0.3450399339199066,
      "learning_rate": 1.8395788169711986e-06,
      "loss": 0.0071,
      "step": 2496900
    },
    {
      "epoch": 4.08626434411474,
      "grad_norm": 0.18896669149398804,
      "learning_rate": 1.8395129247576816e-06,
      "loss": 0.0121,
      "step": 2496920
    },
    {
      "epoch": 4.086297074553393,
      "grad_norm": 0.1508384644985199,
      "learning_rate": 1.8394470325441643e-06,
      "loss": 0.0103,
      "step": 2496940
    },
    {
      "epoch": 4.0863298049920465,
      "grad_norm": 0.20703589916229248,
      "learning_rate": 1.8393811403306473e-06,
      "loss": 0.0102,
      "step": 2496960
    },
    {
      "epoch": 4.0863625354307,
      "grad_norm": 0.39664632081985474,
      "learning_rate": 1.83931524811713e-06,
      "loss": 0.0068,
      "step": 2496980
    },
    {
      "epoch": 4.086395265869353,
      "grad_norm": 1.0454808473587036,
      "learning_rate": 1.8392493559036132e-06,
      "loss": 0.01,
      "step": 2497000
    },
    {
      "epoch": 4.086427996308006,
      "grad_norm": 0.7168017625808716,
      "learning_rate": 1.839183463690096e-06,
      "loss": 0.0107,
      "step": 2497020
    },
    {
      "epoch": 4.08646072674666,
      "grad_norm": 0.5043568015098572,
      "learning_rate": 1.8391175714765788e-06,
      "loss": 0.0114,
      "step": 2497040
    },
    {
      "epoch": 4.0864934571853135,
      "grad_norm": 0.18357110023498535,
      "learning_rate": 1.8390516792630616e-06,
      "loss": 0.0146,
      "step": 2497060
    },
    {
      "epoch": 4.086526187623966,
      "grad_norm": 0.2138383388519287,
      "learning_rate": 1.8389857870495445e-06,
      "loss": 0.0068,
      "step": 2497080
    },
    {
      "epoch": 4.08655891806262,
      "grad_norm": 0.30199092626571655,
      "learning_rate": 1.8389198948360273e-06,
      "loss": 0.0119,
      "step": 2497100
    },
    {
      "epoch": 4.086591648501273,
      "grad_norm": 0.22315850853919983,
      "learning_rate": 1.8388540026225102e-06,
      "loss": 0.0078,
      "step": 2497120
    },
    {
      "epoch": 4.086624378939926,
      "grad_norm": 0.24045583605766296,
      "learning_rate": 1.838788110408993e-06,
      "loss": 0.0096,
      "step": 2497140
    },
    {
      "epoch": 4.08665710937858,
      "grad_norm": 0.19564948976039886,
      "learning_rate": 1.838722218195476e-06,
      "loss": 0.0106,
      "step": 2497160
    },
    {
      "epoch": 4.086689839817233,
      "grad_norm": 0.17413051426410675,
      "learning_rate": 1.8386563259819589e-06,
      "loss": 0.0074,
      "step": 2497180
    },
    {
      "epoch": 4.086722570255887,
      "grad_norm": 0.3526421785354614,
      "learning_rate": 1.8385904337684418e-06,
      "loss": 0.009,
      "step": 2497200
    },
    {
      "epoch": 4.08675530069454,
      "grad_norm": 0.17141352593898773,
      "learning_rate": 1.8385245415549246e-06,
      "loss": 0.0092,
      "step": 2497220
    },
    {
      "epoch": 4.086788031133193,
      "grad_norm": 0.13634072244167328,
      "learning_rate": 1.8384586493414075e-06,
      "loss": 0.012,
      "step": 2497240
    },
    {
      "epoch": 4.086820761571847,
      "grad_norm": 0.31298306584358215,
      "learning_rate": 1.8383927571278903e-06,
      "loss": 0.0095,
      "step": 2497260
    },
    {
      "epoch": 4.0868534920104995,
      "grad_norm": 0.3444262444972992,
      "learning_rate": 1.8383268649143732e-06,
      "loss": 0.0132,
      "step": 2497280
    },
    {
      "epoch": 4.086886222449153,
      "grad_norm": 0.23102517426013947,
      "learning_rate": 1.838260972700856e-06,
      "loss": 0.0093,
      "step": 2497300
    },
    {
      "epoch": 4.086918952887807,
      "grad_norm": 0.19983093440532684,
      "learning_rate": 1.838195080487339e-06,
      "loss": 0.0073,
      "step": 2497320
    },
    {
      "epoch": 4.08695168332646,
      "grad_norm": 0.1294063776731491,
      "learning_rate": 1.8381291882738216e-06,
      "loss": 0.0128,
      "step": 2497340
    },
    {
      "epoch": 4.086984413765113,
      "grad_norm": 0.06278753280639648,
      "learning_rate": 1.8380632960603048e-06,
      "loss": 0.006,
      "step": 2497360
    },
    {
      "epoch": 4.087017144203767,
      "grad_norm": 0.11913667619228363,
      "learning_rate": 1.8379974038467878e-06,
      "loss": 0.0115,
      "step": 2497380
    },
    {
      "epoch": 4.08704987464242,
      "grad_norm": 0.4346316456794739,
      "learning_rate": 1.8379315116332705e-06,
      "loss": 0.0107,
      "step": 2497400
    },
    {
      "epoch": 4.087082605081073,
      "grad_norm": 0.35237735509872437,
      "learning_rate": 1.8378656194197532e-06,
      "loss": 0.0064,
      "step": 2497420
    },
    {
      "epoch": 4.0871153355197265,
      "grad_norm": 0.42305538058280945,
      "learning_rate": 1.8377997272062362e-06,
      "loss": 0.0085,
      "step": 2497440
    },
    {
      "epoch": 4.08714806595838,
      "grad_norm": 0.2362576574087143,
      "learning_rate": 1.837733834992719e-06,
      "loss": 0.0084,
      "step": 2497460
    },
    {
      "epoch": 4.087180796397034,
      "grad_norm": 0.13890422880649567,
      "learning_rate": 1.8376679427792019e-06,
      "loss": 0.0094,
      "step": 2497480
    },
    {
      "epoch": 4.087213526835686,
      "grad_norm": 0.28211259841918945,
      "learning_rate": 1.8376020505656846e-06,
      "loss": 0.0087,
      "step": 2497500
    },
    {
      "epoch": 4.08724625727434,
      "grad_norm": 0.6949740052223206,
      "learning_rate": 1.8375361583521678e-06,
      "loss": 0.0081,
      "step": 2497520
    },
    {
      "epoch": 4.087278987712994,
      "grad_norm": 0.07847361266613007,
      "learning_rate": 1.8374702661386507e-06,
      "loss": 0.01,
      "step": 2497540
    },
    {
      "epoch": 4.087311718151646,
      "grad_norm": 0.19099169969558716,
      "learning_rate": 1.8374043739251335e-06,
      "loss": 0.0063,
      "step": 2497560
    },
    {
      "epoch": 4.0873444485903,
      "grad_norm": 0.20098289847373962,
      "learning_rate": 1.8373384817116164e-06,
      "loss": 0.0126,
      "step": 2497580
    },
    {
      "epoch": 4.0873771790289535,
      "grad_norm": 0.2693791389465332,
      "learning_rate": 1.8372725894980992e-06,
      "loss": 0.0069,
      "step": 2497600
    },
    {
      "epoch": 4.087409909467607,
      "grad_norm": 1.02407968044281,
      "learning_rate": 1.8372066972845821e-06,
      "loss": 0.0106,
      "step": 2497620
    },
    {
      "epoch": 4.08744263990626,
      "grad_norm": 0.4759902358055115,
      "learning_rate": 1.8371408050710649e-06,
      "loss": 0.0069,
      "step": 2497640
    },
    {
      "epoch": 4.087475370344913,
      "grad_norm": 0.06017996370792389,
      "learning_rate": 1.8370749128575476e-06,
      "loss": 0.008,
      "step": 2497660
    },
    {
      "epoch": 4.087508100783567,
      "grad_norm": 0.09902921319007874,
      "learning_rate": 1.8370090206440305e-06,
      "loss": 0.0077,
      "step": 2497680
    },
    {
      "epoch": 4.08754083122222,
      "grad_norm": 0.07476217299699783,
      "learning_rate": 1.8369431284305137e-06,
      "loss": 0.0063,
      "step": 2497700
    },
    {
      "epoch": 4.087573561660873,
      "grad_norm": 0.0714002251625061,
      "learning_rate": 1.8368772362169964e-06,
      "loss": 0.0123,
      "step": 2497720
    },
    {
      "epoch": 4.087606292099527,
      "grad_norm": 0.08741310983896255,
      "learning_rate": 1.8368113440034794e-06,
      "loss": 0.0111,
      "step": 2497740
    },
    {
      "epoch": 4.0876390225381805,
      "grad_norm": 0.3677402436733246,
      "learning_rate": 1.8367454517899621e-06,
      "loss": 0.0087,
      "step": 2497760
    },
    {
      "epoch": 4.087671752976833,
      "grad_norm": 0.10928446799516678,
      "learning_rate": 1.836679559576445e-06,
      "loss": 0.0146,
      "step": 2497780
    },
    {
      "epoch": 4.087704483415487,
      "grad_norm": 0.3005862534046173,
      "learning_rate": 1.8366136673629278e-06,
      "loss": 0.0114,
      "step": 2497800
    },
    {
      "epoch": 4.08773721385414,
      "grad_norm": 0.13617639243602753,
      "learning_rate": 1.8365477751494108e-06,
      "loss": 0.0078,
      "step": 2497820
    },
    {
      "epoch": 4.087769944292793,
      "grad_norm": 0.18732792139053345,
      "learning_rate": 1.8364818829358935e-06,
      "loss": 0.0079,
      "step": 2497840
    },
    {
      "epoch": 4.087802674731447,
      "grad_norm": 0.20667162537574768,
      "learning_rate": 1.8364159907223763e-06,
      "loss": 0.0057,
      "step": 2497860
    },
    {
      "epoch": 4.0878354051701,
      "grad_norm": 0.0741024985909462,
      "learning_rate": 1.8363500985088594e-06,
      "loss": 0.0077,
      "step": 2497880
    },
    {
      "epoch": 4.087868135608754,
      "grad_norm": 0.21558719873428345,
      "learning_rate": 1.8362842062953424e-06,
      "loss": 0.0127,
      "step": 2497900
    },
    {
      "epoch": 4.0879008660474065,
      "grad_norm": 0.21982605755329132,
      "learning_rate": 1.8362183140818251e-06,
      "loss": 0.0078,
      "step": 2497920
    },
    {
      "epoch": 4.08793359648606,
      "grad_norm": 0.20392464101314545,
      "learning_rate": 1.836152421868308e-06,
      "loss": 0.0101,
      "step": 2497940
    },
    {
      "epoch": 4.087966326924714,
      "grad_norm": 0.7139570116996765,
      "learning_rate": 1.8360865296547908e-06,
      "loss": 0.009,
      "step": 2497960
    },
    {
      "epoch": 4.087999057363366,
      "grad_norm": 0.19296860694885254,
      "learning_rate": 1.8360206374412738e-06,
      "loss": 0.0079,
      "step": 2497980
    },
    {
      "epoch": 4.08803178780202,
      "grad_norm": 0.17948967218399048,
      "learning_rate": 1.8359547452277565e-06,
      "loss": 0.0106,
      "step": 2498000
    },
    {
      "epoch": 4.088064518240674,
      "grad_norm": 0.30525436997413635,
      "learning_rate": 1.8358888530142394e-06,
      "loss": 0.0087,
      "step": 2498020
    },
    {
      "epoch": 4.088097248679327,
      "grad_norm": 0.22023563086986542,
      "learning_rate": 1.8358229608007222e-06,
      "loss": 0.0078,
      "step": 2498040
    },
    {
      "epoch": 4.08812997911798,
      "grad_norm": 0.10429145395755768,
      "learning_rate": 1.8357570685872054e-06,
      "loss": 0.0093,
      "step": 2498060
    },
    {
      "epoch": 4.0881627095566335,
      "grad_norm": 0.2760659456253052,
      "learning_rate": 1.835691176373688e-06,
      "loss": 0.013,
      "step": 2498080
    },
    {
      "epoch": 4.088195439995287,
      "grad_norm": 0.6582292914390564,
      "learning_rate": 1.835625284160171e-06,
      "loss": 0.0092,
      "step": 2498100
    },
    {
      "epoch": 4.08822817043394,
      "grad_norm": 0.19918939471244812,
      "learning_rate": 1.8355593919466538e-06,
      "loss": 0.0065,
      "step": 2498120
    },
    {
      "epoch": 4.088260900872593,
      "grad_norm": 0.12728463113307953,
      "learning_rate": 1.8354934997331367e-06,
      "loss": 0.0137,
      "step": 2498140
    },
    {
      "epoch": 4.088293631311247,
      "grad_norm": 0.32491403818130493,
      "learning_rate": 1.8354276075196195e-06,
      "loss": 0.0107,
      "step": 2498160
    },
    {
      "epoch": 4.088326361749901,
      "grad_norm": 0.1924176961183548,
      "learning_rate": 1.8353617153061024e-06,
      "loss": 0.0071,
      "step": 2498180
    },
    {
      "epoch": 4.088359092188553,
      "grad_norm": 0.32158732414245605,
      "learning_rate": 1.8352958230925852e-06,
      "loss": 0.0116,
      "step": 2498200
    },
    {
      "epoch": 4.088391822627207,
      "grad_norm": 0.3191646337509155,
      "learning_rate": 1.8352299308790681e-06,
      "loss": 0.0097,
      "step": 2498220
    },
    {
      "epoch": 4.0884245530658605,
      "grad_norm": 0.24607747793197632,
      "learning_rate": 1.835164038665551e-06,
      "loss": 0.0075,
      "step": 2498240
    },
    {
      "epoch": 4.088457283504513,
      "grad_norm": 0.3569401800632477,
      "learning_rate": 1.835098146452034e-06,
      "loss": 0.0082,
      "step": 2498260
    },
    {
      "epoch": 4.088490013943167,
      "grad_norm": 0.4321438670158386,
      "learning_rate": 1.8350322542385168e-06,
      "loss": 0.0101,
      "step": 2498280
    },
    {
      "epoch": 4.08852274438182,
      "grad_norm": 0.11638657003641129,
      "learning_rate": 1.8349663620249997e-06,
      "loss": 0.0122,
      "step": 2498300
    },
    {
      "epoch": 4.088555474820474,
      "grad_norm": 0.2076341062784195,
      "learning_rate": 1.8349004698114825e-06,
      "loss": 0.0088,
      "step": 2498320
    },
    {
      "epoch": 4.088588205259127,
      "grad_norm": 0.19828465580940247,
      "learning_rate": 1.8348345775979654e-06,
      "loss": 0.0099,
      "step": 2498340
    },
    {
      "epoch": 4.08862093569778,
      "grad_norm": 0.08249907195568085,
      "learning_rate": 1.8347686853844481e-06,
      "loss": 0.0078,
      "step": 2498360
    },
    {
      "epoch": 4.088653666136434,
      "grad_norm": 0.2624775767326355,
      "learning_rate": 1.834702793170931e-06,
      "loss": 0.0074,
      "step": 2498380
    },
    {
      "epoch": 4.088686396575087,
      "grad_norm": 0.7529426217079163,
      "learning_rate": 1.834636900957414e-06,
      "loss": 0.0094,
      "step": 2498400
    },
    {
      "epoch": 4.08871912701374,
      "grad_norm": 0.10650109499692917,
      "learning_rate": 1.834571008743897e-06,
      "loss": 0.0085,
      "step": 2498420
    },
    {
      "epoch": 4.088751857452394,
      "grad_norm": 0.1872694492340088,
      "learning_rate": 1.8345051165303797e-06,
      "loss": 0.0059,
      "step": 2498440
    },
    {
      "epoch": 4.0887845878910465,
      "grad_norm": 0.3036805987358093,
      "learning_rate": 1.8344392243168627e-06,
      "loss": 0.0096,
      "step": 2498460
    },
    {
      "epoch": 4.0888173183297,
      "grad_norm": 0.17411518096923828,
      "learning_rate": 1.8343733321033454e-06,
      "loss": 0.0095,
      "step": 2498480
    },
    {
      "epoch": 4.088850048768354,
      "grad_norm": 0.4636315405368805,
      "learning_rate": 1.8343074398898284e-06,
      "loss": 0.008,
      "step": 2498500
    },
    {
      "epoch": 4.088882779207007,
      "grad_norm": 0.20802278816699982,
      "learning_rate": 1.8342415476763111e-06,
      "loss": 0.0106,
      "step": 2498520
    },
    {
      "epoch": 4.08891550964566,
      "grad_norm": 0.20468240976333618,
      "learning_rate": 1.834175655462794e-06,
      "loss": 0.0073,
      "step": 2498540
    },
    {
      "epoch": 4.088948240084314,
      "grad_norm": 0.1729060560464859,
      "learning_rate": 1.8341097632492768e-06,
      "loss": 0.0075,
      "step": 2498560
    },
    {
      "epoch": 4.088980970522967,
      "grad_norm": 0.19720284640789032,
      "learning_rate": 1.83404387103576e-06,
      "loss": 0.0091,
      "step": 2498580
    },
    {
      "epoch": 4.08901370096162,
      "grad_norm": 0.18406276404857635,
      "learning_rate": 1.833977978822243e-06,
      "loss": 0.0095,
      "step": 2498600
    },
    {
      "epoch": 4.0890464314002735,
      "grad_norm": 0.43203434348106384,
      "learning_rate": 1.8339120866087257e-06,
      "loss": 0.0087,
      "step": 2498620
    },
    {
      "epoch": 4.089079161838927,
      "grad_norm": 0.10820186883211136,
      "learning_rate": 1.8338461943952084e-06,
      "loss": 0.0114,
      "step": 2498640
    },
    {
      "epoch": 4.089111892277581,
      "grad_norm": 0.20597942173480988,
      "learning_rate": 1.8337803021816914e-06,
      "loss": 0.0073,
      "step": 2498660
    },
    {
      "epoch": 4.089144622716233,
      "grad_norm": 0.2989674508571625,
      "learning_rate": 1.833714409968174e-06,
      "loss": 0.0088,
      "step": 2498680
    },
    {
      "epoch": 4.089177353154887,
      "grad_norm": 0.22293627262115479,
      "learning_rate": 1.833648517754657e-06,
      "loss": 0.009,
      "step": 2498700
    },
    {
      "epoch": 4.0892100835935405,
      "grad_norm": 0.168105810880661,
      "learning_rate": 1.8335826255411398e-06,
      "loss": 0.0076,
      "step": 2498720
    },
    {
      "epoch": 4.089242814032193,
      "grad_norm": 0.09904132783412933,
      "learning_rate": 1.8335167333276227e-06,
      "loss": 0.0126,
      "step": 2498740
    },
    {
      "epoch": 4.089275544470847,
      "grad_norm": 0.4137655794620514,
      "learning_rate": 1.833450841114106e-06,
      "loss": 0.0078,
      "step": 2498760
    },
    {
      "epoch": 4.0893082749095,
      "grad_norm": 0.09524575620889664,
      "learning_rate": 1.8333849489005886e-06,
      "loss": 0.0084,
      "step": 2498780
    },
    {
      "epoch": 4.089341005348154,
      "grad_norm": 0.44839781522750854,
      "learning_rate": 1.8333190566870716e-06,
      "loss": 0.0092,
      "step": 2498800
    },
    {
      "epoch": 4.089373735786807,
      "grad_norm": 0.29878273606300354,
      "learning_rate": 1.8332531644735543e-06,
      "loss": 0.01,
      "step": 2498820
    },
    {
      "epoch": 4.08940646622546,
      "grad_norm": 0.44874638319015503,
      "learning_rate": 1.833187272260037e-06,
      "loss": 0.0096,
      "step": 2498840
    },
    {
      "epoch": 4.089439196664114,
      "grad_norm": 0.18605442345142365,
      "learning_rate": 1.83312138004652e-06,
      "loss": 0.0083,
      "step": 2498860
    },
    {
      "epoch": 4.089471927102767,
      "grad_norm": 0.2706218957901001,
      "learning_rate": 1.8330554878330028e-06,
      "loss": 0.013,
      "step": 2498880
    },
    {
      "epoch": 4.08950465754142,
      "grad_norm": 0.107023224234581,
      "learning_rate": 1.8329895956194857e-06,
      "loss": 0.0101,
      "step": 2498900
    },
    {
      "epoch": 4.089537387980074,
      "grad_norm": 0.12190599739551544,
      "learning_rate": 1.8329237034059685e-06,
      "loss": 0.0135,
      "step": 2498920
    },
    {
      "epoch": 4.089570118418727,
      "grad_norm": 0.35199975967407227,
      "learning_rate": 1.8328578111924516e-06,
      "loss": 0.0111,
      "step": 2498940
    },
    {
      "epoch": 4.08960284885738,
      "grad_norm": 0.37303292751312256,
      "learning_rate": 1.8327919189789346e-06,
      "loss": 0.0075,
      "step": 2498960
    },
    {
      "epoch": 4.089635579296034,
      "grad_norm": 0.07085027545690536,
      "learning_rate": 1.8327260267654173e-06,
      "loss": 0.0058,
      "step": 2498980
    },
    {
      "epoch": 4.089668309734687,
      "grad_norm": 0.5816282629966736,
      "learning_rate": 1.8326601345519003e-06,
      "loss": 0.0105,
      "step": 2499000
    },
    {
      "epoch": 4.08970104017334,
      "grad_norm": 0.23226061463356018,
      "learning_rate": 1.832594242338383e-06,
      "loss": 0.0077,
      "step": 2499020
    },
    {
      "epoch": 4.089733770611994,
      "grad_norm": 0.5171412825584412,
      "learning_rate": 1.832528350124866e-06,
      "loss": 0.0107,
      "step": 2499040
    },
    {
      "epoch": 4.089766501050647,
      "grad_norm": 0.32784411311149597,
      "learning_rate": 1.8324624579113487e-06,
      "loss": 0.009,
      "step": 2499060
    },
    {
      "epoch": 4.089799231489301,
      "grad_norm": 0.13405799865722656,
      "learning_rate": 1.8323965656978314e-06,
      "loss": 0.0097,
      "step": 2499080
    },
    {
      "epoch": 4.0898319619279535,
      "grad_norm": 0.3957807421684265,
      "learning_rate": 1.8323306734843144e-06,
      "loss": 0.0093,
      "step": 2499100
    },
    {
      "epoch": 4.089864692366607,
      "grad_norm": 0.20149387419223785,
      "learning_rate": 1.8322647812707975e-06,
      "loss": 0.0074,
      "step": 2499120
    },
    {
      "epoch": 4.089897422805261,
      "grad_norm": 0.06751707196235657,
      "learning_rate": 1.8321988890572803e-06,
      "loss": 0.0072,
      "step": 2499140
    },
    {
      "epoch": 4.089930153243913,
      "grad_norm": 0.20368966460227966,
      "learning_rate": 1.8321329968437632e-06,
      "loss": 0.0103,
      "step": 2499160
    },
    {
      "epoch": 4.089962883682567,
      "grad_norm": 0.5088498592376709,
      "learning_rate": 1.832067104630246e-06,
      "loss": 0.0092,
      "step": 2499180
    },
    {
      "epoch": 4.089995614121221,
      "grad_norm": 0.16988462209701538,
      "learning_rate": 1.832001212416729e-06,
      "loss": 0.0064,
      "step": 2499200
    },
    {
      "epoch": 4.090028344559874,
      "grad_norm": 0.11239775270223618,
      "learning_rate": 1.8319353202032117e-06,
      "loss": 0.0112,
      "step": 2499220
    },
    {
      "epoch": 4.090061074998527,
      "grad_norm": 0.5069089531898499,
      "learning_rate": 1.8318694279896946e-06,
      "loss": 0.0088,
      "step": 2499240
    },
    {
      "epoch": 4.0900938054371805,
      "grad_norm": 0.10450921952724457,
      "learning_rate": 1.8318035357761774e-06,
      "loss": 0.0065,
      "step": 2499260
    },
    {
      "epoch": 4.090126535875834,
      "grad_norm": 0.059756774455308914,
      "learning_rate": 1.8317376435626605e-06,
      "loss": 0.0088,
      "step": 2499280
    },
    {
      "epoch": 4.090159266314487,
      "grad_norm": 0.422024667263031,
      "learning_rate": 1.8316717513491433e-06,
      "loss": 0.0119,
      "step": 2499300
    },
    {
      "epoch": 4.09019199675314,
      "grad_norm": 0.2086825668811798,
      "learning_rate": 1.8316058591356262e-06,
      "loss": 0.0108,
      "step": 2499320
    },
    {
      "epoch": 4.090224727191794,
      "grad_norm": 0.2966010272502899,
      "learning_rate": 1.831539966922109e-06,
      "loss": 0.0067,
      "step": 2499340
    },
    {
      "epoch": 4.090257457630448,
      "grad_norm": 0.1328263282775879,
      "learning_rate": 1.831474074708592e-06,
      "loss": 0.0105,
      "step": 2499360
    },
    {
      "epoch": 4.0902901880691,
      "grad_norm": 0.3261360228061676,
      "learning_rate": 1.8314081824950746e-06,
      "loss": 0.0098,
      "step": 2499380
    },
    {
      "epoch": 4.090322918507754,
      "grad_norm": 0.33185961842536926,
      "learning_rate": 1.8313422902815576e-06,
      "loss": 0.0082,
      "step": 2499400
    },
    {
      "epoch": 4.0903556489464075,
      "grad_norm": 0.3987271785736084,
      "learning_rate": 1.8312763980680403e-06,
      "loss": 0.0098,
      "step": 2499420
    },
    {
      "epoch": 4.09038837938506,
      "grad_norm": 0.3634132742881775,
      "learning_rate": 1.8312105058545233e-06,
      "loss": 0.0114,
      "step": 2499440
    },
    {
      "epoch": 4.090421109823714,
      "grad_norm": 0.11012590676546097,
      "learning_rate": 1.8311446136410062e-06,
      "loss": 0.0108,
      "step": 2499460
    },
    {
      "epoch": 4.090453840262367,
      "grad_norm": 0.1287451982498169,
      "learning_rate": 1.8310787214274892e-06,
      "loss": 0.0086,
      "step": 2499480
    },
    {
      "epoch": 4.090486570701021,
      "grad_norm": 0.22854802012443542,
      "learning_rate": 1.831012829213972e-06,
      "loss": 0.0057,
      "step": 2499500
    },
    {
      "epoch": 4.090519301139674,
      "grad_norm": 0.22192242741584778,
      "learning_rate": 1.8309469370004549e-06,
      "loss": 0.0075,
      "step": 2499520
    },
    {
      "epoch": 4.090552031578327,
      "grad_norm": 0.5625189542770386,
      "learning_rate": 1.8308810447869376e-06,
      "loss": 0.0112,
      "step": 2499540
    },
    {
      "epoch": 4.090584762016981,
      "grad_norm": 0.4673953056335449,
      "learning_rate": 1.8308151525734206e-06,
      "loss": 0.0125,
      "step": 2499560
    },
    {
      "epoch": 4.0906174924556336,
      "grad_norm": 0.4526125490665436,
      "learning_rate": 1.8307492603599033e-06,
      "loss": 0.0081,
      "step": 2499580
    },
    {
      "epoch": 4.090650222894287,
      "grad_norm": 0.19625668227672577,
      "learning_rate": 1.8306833681463863e-06,
      "loss": 0.0105,
      "step": 2499600
    },
    {
      "epoch": 4.090682953332941,
      "grad_norm": 0.14646723866462708,
      "learning_rate": 1.830617475932869e-06,
      "loss": 0.0077,
      "step": 2499620
    },
    {
      "epoch": 4.090715683771594,
      "grad_norm": 0.3078155517578125,
      "learning_rate": 1.8305515837193522e-06,
      "loss": 0.0087,
      "step": 2499640
    },
    {
      "epoch": 4.090748414210247,
      "grad_norm": 0.07485762238502502,
      "learning_rate": 1.830485691505835e-06,
      "loss": 0.0057,
      "step": 2499660
    },
    {
      "epoch": 4.090781144648901,
      "grad_norm": 0.49572286009788513,
      "learning_rate": 1.8304197992923179e-06,
      "loss": 0.0117,
      "step": 2499680
    },
    {
      "epoch": 4.090813875087554,
      "grad_norm": 0.17316730320453644,
      "learning_rate": 1.8303539070788006e-06,
      "loss": 0.0093,
      "step": 2499700
    },
    {
      "epoch": 4.090846605526207,
      "grad_norm": 0.15023677051067352,
      "learning_rate": 1.8302880148652836e-06,
      "loss": 0.0068,
      "step": 2499720
    },
    {
      "epoch": 4.0908793359648605,
      "grad_norm": 0.3034098148345947,
      "learning_rate": 1.8302221226517663e-06,
      "loss": 0.0067,
      "step": 2499740
    },
    {
      "epoch": 4.090912066403514,
      "grad_norm": 0.5134701132774353,
      "learning_rate": 1.8301562304382492e-06,
      "loss": 0.008,
      "step": 2499760
    },
    {
      "epoch": 4.090944796842168,
      "grad_norm": 0.2069547325372696,
      "learning_rate": 1.830090338224732e-06,
      "loss": 0.012,
      "step": 2499780
    },
    {
      "epoch": 4.09097752728082,
      "grad_norm": 0.2904443144798279,
      "learning_rate": 1.830024446011215e-06,
      "loss": 0.0089,
      "step": 2499800
    },
    {
      "epoch": 4.091010257719474,
      "grad_norm": 0.08967322111129761,
      "learning_rate": 1.8299585537976979e-06,
      "loss": 0.0051,
      "step": 2499820
    },
    {
      "epoch": 4.091042988158128,
      "grad_norm": 0.111689992249012,
      "learning_rate": 1.8298926615841808e-06,
      "loss": 0.0084,
      "step": 2499840
    },
    {
      "epoch": 4.09107571859678,
      "grad_norm": 0.10717877000570297,
      "learning_rate": 1.8298267693706636e-06,
      "loss": 0.0081,
      "step": 2499860
    },
    {
      "epoch": 4.091108449035434,
      "grad_norm": 0.0981130599975586,
      "learning_rate": 1.8297608771571465e-06,
      "loss": 0.0094,
      "step": 2499880
    },
    {
      "epoch": 4.0911411794740875,
      "grad_norm": 0.12489370256662369,
      "learning_rate": 1.8296949849436293e-06,
      "loss": 0.0126,
      "step": 2499900
    },
    {
      "epoch": 4.09117390991274,
      "grad_norm": 0.23083384335041046,
      "learning_rate": 1.8296290927301122e-06,
      "loss": 0.0105,
      "step": 2499920
    },
    {
      "epoch": 4.091206640351394,
      "grad_norm": 0.10563904792070389,
      "learning_rate": 1.829563200516595e-06,
      "loss": 0.0131,
      "step": 2499940
    },
    {
      "epoch": 4.091239370790047,
      "grad_norm": 0.07923988252878189,
      "learning_rate": 1.829497308303078e-06,
      "loss": 0.01,
      "step": 2499960
    },
    {
      "epoch": 4.091272101228701,
      "grad_norm": 0.5458239912986755,
      "learning_rate": 1.8294314160895607e-06,
      "loss": 0.0068,
      "step": 2499980
    },
    {
      "epoch": 4.091304831667354,
      "grad_norm": 0.17962393164634705,
      "learning_rate": 1.8293655238760438e-06,
      "loss": 0.013,
      "step": 2500000
    },
    {
      "epoch": 4.091304831667354,
      "eval_loss": 0.006118923891335726,
      "eval_runtime": 6456.7305,
      "eval_samples_per_second": 159.192,
      "eval_steps_per_second": 15.919,
      "eval_sts-dev_pearson_cosine": 0.9861136412914893,
      "eval_sts-dev_spearman_cosine": 0.8962676984682117,
      "step": 2500000
    },
    {
      "epoch": 4.091337562106007,
      "grad_norm": 0.16580364108085632,
      "learning_rate": 1.8292996316625268e-06,
      "loss": 0.0083,
      "step": 2500020
    },
    {
      "epoch": 4.091370292544661,
      "grad_norm": 0.4574635624885559,
      "learning_rate": 1.8292337394490095e-06,
      "loss": 0.0083,
      "step": 2500040
    },
    {
      "epoch": 4.091403022983314,
      "grad_norm": 0.20091845095157623,
      "learning_rate": 1.8291678472354922e-06,
      "loss": 0.0065,
      "step": 2500060
    },
    {
      "epoch": 4.091435753421967,
      "grad_norm": 0.4154842793941498,
      "learning_rate": 1.8291019550219752e-06,
      "loss": 0.0065,
      "step": 2500080
    },
    {
      "epoch": 4.091468483860621,
      "grad_norm": 0.12065146863460541,
      "learning_rate": 1.829036062808458e-06,
      "loss": 0.0096,
      "step": 2500100
    },
    {
      "epoch": 4.091501214299274,
      "grad_norm": 0.477481484413147,
      "learning_rate": 1.8289701705949409e-06,
      "loss": 0.0117,
      "step": 2500120
    },
    {
      "epoch": 4.091533944737927,
      "grad_norm": 0.20301423966884613,
      "learning_rate": 1.8289042783814236e-06,
      "loss": 0.0107,
      "step": 2500140
    },
    {
      "epoch": 4.091566675176581,
      "grad_norm": 0.17553940415382385,
      "learning_rate": 1.8288383861679068e-06,
      "loss": 0.008,
      "step": 2500160
    },
    {
      "epoch": 4.091599405615234,
      "grad_norm": 0.2836867570877075,
      "learning_rate": 1.8287724939543897e-06,
      "loss": 0.0073,
      "step": 2500180
    },
    {
      "epoch": 4.091632136053887,
      "grad_norm": 0.5176183581352234,
      "learning_rate": 1.8287066017408725e-06,
      "loss": 0.0094,
      "step": 2500200
    },
    {
      "epoch": 4.091664866492541,
      "grad_norm": 0.2755998373031616,
      "learning_rate": 1.8286407095273554e-06,
      "loss": 0.0094,
      "step": 2500220
    },
    {
      "epoch": 4.091697596931194,
      "grad_norm": 0.5131638646125793,
      "learning_rate": 1.8285748173138382e-06,
      "loss": 0.0072,
      "step": 2500240
    },
    {
      "epoch": 4.091730327369848,
      "grad_norm": 0.12510167062282562,
      "learning_rate": 1.8285089251003211e-06,
      "loss": 0.0091,
      "step": 2500260
    },
    {
      "epoch": 4.0917630578085005,
      "grad_norm": 0.4634367823600769,
      "learning_rate": 1.8284430328868039e-06,
      "loss": 0.0095,
      "step": 2500280
    },
    {
      "epoch": 4.091795788247154,
      "grad_norm": 0.2939738929271698,
      "learning_rate": 1.8283771406732866e-06,
      "loss": 0.0092,
      "step": 2500300
    },
    {
      "epoch": 4.091828518685808,
      "grad_norm": 0.07398127764463425,
      "learning_rate": 1.8283112484597696e-06,
      "loss": 0.0063,
      "step": 2500320
    },
    {
      "epoch": 4.09186124912446,
      "grad_norm": 0.3493979573249817,
      "learning_rate": 1.8282453562462527e-06,
      "loss": 0.0078,
      "step": 2500340
    },
    {
      "epoch": 4.091893979563114,
      "grad_norm": 0.20342226326465607,
      "learning_rate": 1.8281794640327355e-06,
      "loss": 0.0092,
      "step": 2500360
    },
    {
      "epoch": 4.0919267100017676,
      "grad_norm": 0.10175461322069168,
      "learning_rate": 1.8281135718192184e-06,
      "loss": 0.0068,
      "step": 2500380
    },
    {
      "epoch": 4.091959440440421,
      "grad_norm": 0.27189338207244873,
      "learning_rate": 1.8280476796057011e-06,
      "loss": 0.0065,
      "step": 2500400
    },
    {
      "epoch": 4.091992170879074,
      "grad_norm": 0.13164132833480835,
      "learning_rate": 1.827981787392184e-06,
      "loss": 0.0084,
      "step": 2500420
    },
    {
      "epoch": 4.0920249013177274,
      "grad_norm": 0.38791462779045105,
      "learning_rate": 1.8279158951786668e-06,
      "loss": 0.0094,
      "step": 2500440
    },
    {
      "epoch": 4.092057631756381,
      "grad_norm": 0.14443790912628174,
      "learning_rate": 1.8278500029651498e-06,
      "loss": 0.0103,
      "step": 2500460
    },
    {
      "epoch": 4.092090362195034,
      "grad_norm": 0.5532789826393127,
      "learning_rate": 1.8277841107516325e-06,
      "loss": 0.0108,
      "step": 2500480
    },
    {
      "epoch": 4.092123092633687,
      "grad_norm": 0.1702631413936615,
      "learning_rate": 1.8277182185381153e-06,
      "loss": 0.0092,
      "step": 2500500
    },
    {
      "epoch": 4.092155823072341,
      "grad_norm": 0.22748667001724243,
      "learning_rate": 1.8276523263245984e-06,
      "loss": 0.0093,
      "step": 2500520
    },
    {
      "epoch": 4.0921885535109945,
      "grad_norm": 0.24193772673606873,
      "learning_rate": 1.8275864341110814e-06,
      "loss": 0.0088,
      "step": 2500540
    },
    {
      "epoch": 4.092221283949647,
      "grad_norm": 0.0996449738740921,
      "learning_rate": 1.8275205418975641e-06,
      "loss": 0.0081,
      "step": 2500560
    },
    {
      "epoch": 4.092254014388301,
      "grad_norm": 0.3691173195838928,
      "learning_rate": 1.827454649684047e-06,
      "loss": 0.0131,
      "step": 2500580
    },
    {
      "epoch": 4.092286744826954,
      "grad_norm": 0.15145866572856903,
      "learning_rate": 1.8273887574705298e-06,
      "loss": 0.0081,
      "step": 2500600
    },
    {
      "epoch": 4.092319475265607,
      "grad_norm": 0.24352805316448212,
      "learning_rate": 1.8273228652570128e-06,
      "loss": 0.0105,
      "step": 2500620
    },
    {
      "epoch": 4.092352205704261,
      "grad_norm": 0.32993465662002563,
      "learning_rate": 1.8272569730434955e-06,
      "loss": 0.0088,
      "step": 2500640
    },
    {
      "epoch": 4.092384936142914,
      "grad_norm": 0.10215449333190918,
      "learning_rate": 1.8271910808299785e-06,
      "loss": 0.0078,
      "step": 2500660
    },
    {
      "epoch": 4.092417666581568,
      "grad_norm": 0.489544540643692,
      "learning_rate": 1.8271251886164612e-06,
      "loss": 0.01,
      "step": 2500680
    },
    {
      "epoch": 4.092450397020221,
      "grad_norm": 0.3192632794380188,
      "learning_rate": 1.8270592964029444e-06,
      "loss": 0.0082,
      "step": 2500700
    },
    {
      "epoch": 4.092483127458874,
      "grad_norm": 0.23323194682598114,
      "learning_rate": 1.826993404189427e-06,
      "loss": 0.0068,
      "step": 2500720
    },
    {
      "epoch": 4.092515857897528,
      "grad_norm": 0.12491415441036224,
      "learning_rate": 1.82692751197591e-06,
      "loss": 0.0118,
      "step": 2500740
    },
    {
      "epoch": 4.0925485883361805,
      "grad_norm": 0.18789063394069672,
      "learning_rate": 1.8268616197623928e-06,
      "loss": 0.0055,
      "step": 2500760
    },
    {
      "epoch": 4.092581318774834,
      "grad_norm": 0.12446064502000809,
      "learning_rate": 1.8267957275488757e-06,
      "loss": 0.0122,
      "step": 2500780
    },
    {
      "epoch": 4.092614049213488,
      "grad_norm": 0.4622133672237396,
      "learning_rate": 1.8267298353353585e-06,
      "loss": 0.0085,
      "step": 2500800
    },
    {
      "epoch": 4.092646779652141,
      "grad_norm": 0.20167897641658783,
      "learning_rate": 1.8266639431218414e-06,
      "loss": 0.0102,
      "step": 2500820
    },
    {
      "epoch": 4.092679510090794,
      "grad_norm": 0.20843243598937988,
      "learning_rate": 1.8265980509083242e-06,
      "loss": 0.0112,
      "step": 2500840
    },
    {
      "epoch": 4.092712240529448,
      "grad_norm": 0.3403942584991455,
      "learning_rate": 1.8265321586948073e-06,
      "loss": 0.0099,
      "step": 2500860
    },
    {
      "epoch": 4.092744970968101,
      "grad_norm": 0.3963351845741272,
      "learning_rate": 1.82646626648129e-06,
      "loss": 0.0158,
      "step": 2500880
    },
    {
      "epoch": 4.092777701406754,
      "grad_norm": 0.2718196511268616,
      "learning_rate": 1.826400374267773e-06,
      "loss": 0.011,
      "step": 2500900
    },
    {
      "epoch": 4.0928104318454075,
      "grad_norm": 0.1607125699520111,
      "learning_rate": 1.8263344820542558e-06,
      "loss": 0.0058,
      "step": 2500920
    },
    {
      "epoch": 4.092843162284061,
      "grad_norm": 0.19279274344444275,
      "learning_rate": 1.8262685898407387e-06,
      "loss": 0.0065,
      "step": 2500940
    },
    {
      "epoch": 4.092875892722715,
      "grad_norm": 0.12496443837881088,
      "learning_rate": 1.8262026976272215e-06,
      "loss": 0.015,
      "step": 2500960
    },
    {
      "epoch": 4.092908623161367,
      "grad_norm": 0.19179576635360718,
      "learning_rate": 1.8261368054137044e-06,
      "loss": 0.007,
      "step": 2500980
    },
    {
      "epoch": 4.092941353600021,
      "grad_norm": 0.0991741493344307,
      "learning_rate": 1.8260709132001872e-06,
      "loss": 0.017,
      "step": 2501000
    },
    {
      "epoch": 4.092974084038675,
      "grad_norm": 0.4047788381576538,
      "learning_rate": 1.82600502098667e-06,
      "loss": 0.01,
      "step": 2501020
    },
    {
      "epoch": 4.093006814477327,
      "grad_norm": 0.22575843334197998,
      "learning_rate": 1.825939128773153e-06,
      "loss": 0.0085,
      "step": 2501040
    },
    {
      "epoch": 4.093039544915981,
      "grad_norm": 0.3489914834499359,
      "learning_rate": 1.825873236559636e-06,
      "loss": 0.0058,
      "step": 2501060
    },
    {
      "epoch": 4.0930722753546345,
      "grad_norm": 0.0766158178448677,
      "learning_rate": 1.8258073443461187e-06,
      "loss": 0.0081,
      "step": 2501080
    },
    {
      "epoch": 4.093105005793288,
      "grad_norm": 0.3336566686630249,
      "learning_rate": 1.8257414521326017e-06,
      "loss": 0.0053,
      "step": 2501100
    },
    {
      "epoch": 4.093137736231941,
      "grad_norm": 0.1314043253660202,
      "learning_rate": 1.8256755599190844e-06,
      "loss": 0.0134,
      "step": 2501120
    },
    {
      "epoch": 4.093170466670594,
      "grad_norm": 0.2631435990333557,
      "learning_rate": 1.8256096677055674e-06,
      "loss": 0.0074,
      "step": 2501140
    },
    {
      "epoch": 4.093203197109248,
      "grad_norm": 0.2844320237636566,
      "learning_rate": 1.8255437754920501e-06,
      "loss": 0.0076,
      "step": 2501160
    },
    {
      "epoch": 4.093235927547901,
      "grad_norm": 0.11130928248167038,
      "learning_rate": 1.825477883278533e-06,
      "loss": 0.0097,
      "step": 2501180
    },
    {
      "epoch": 4.093268657986554,
      "grad_norm": 0.06338737905025482,
      "learning_rate": 1.8254119910650158e-06,
      "loss": 0.0084,
      "step": 2501200
    },
    {
      "epoch": 4.093301388425208,
      "grad_norm": 0.11529045552015305,
      "learning_rate": 1.825346098851499e-06,
      "loss": 0.0146,
      "step": 2501220
    },
    {
      "epoch": 4.0933341188638614,
      "grad_norm": 0.24899017810821533,
      "learning_rate": 1.825280206637982e-06,
      "loss": 0.0054,
      "step": 2501240
    },
    {
      "epoch": 4.093366849302514,
      "grad_norm": 0.15865446627140045,
      "learning_rate": 1.8252143144244647e-06,
      "loss": 0.0059,
      "step": 2501260
    },
    {
      "epoch": 4.093399579741168,
      "grad_norm": 0.14164897799491882,
      "learning_rate": 1.8251484222109474e-06,
      "loss": 0.0081,
      "step": 2501280
    },
    {
      "epoch": 4.093432310179821,
      "grad_norm": 0.34105509519577026,
      "learning_rate": 1.8250825299974304e-06,
      "loss": 0.0102,
      "step": 2501300
    },
    {
      "epoch": 4.093465040618474,
      "grad_norm": 0.06584823876619339,
      "learning_rate": 1.8250166377839131e-06,
      "loss": 0.0075,
      "step": 2501320
    },
    {
      "epoch": 4.093497771057128,
      "grad_norm": 0.14049501717090607,
      "learning_rate": 1.824950745570396e-06,
      "loss": 0.0069,
      "step": 2501340
    },
    {
      "epoch": 4.093530501495781,
      "grad_norm": 0.5174347162246704,
      "learning_rate": 1.8248848533568788e-06,
      "loss": 0.0076,
      "step": 2501360
    },
    {
      "epoch": 4.093563231934434,
      "grad_norm": 0.2534168064594269,
      "learning_rate": 1.8248189611433617e-06,
      "loss": 0.0148,
      "step": 2501380
    },
    {
      "epoch": 4.0935959623730875,
      "grad_norm": 0.1731727570295334,
      "learning_rate": 1.824753068929845e-06,
      "loss": 0.0106,
      "step": 2501400
    },
    {
      "epoch": 4.093628692811741,
      "grad_norm": 0.25146710872650146,
      "learning_rate": 1.8246871767163277e-06,
      "loss": 0.0136,
      "step": 2501420
    },
    {
      "epoch": 4.093661423250395,
      "grad_norm": 0.1449568122625351,
      "learning_rate": 1.8246212845028106e-06,
      "loss": 0.0115,
      "step": 2501440
    },
    {
      "epoch": 4.093694153689047,
      "grad_norm": 0.5941711068153381,
      "learning_rate": 1.8245553922892933e-06,
      "loss": 0.0122,
      "step": 2501460
    },
    {
      "epoch": 4.093726884127701,
      "grad_norm": 0.5584302544593811,
      "learning_rate": 1.824489500075776e-06,
      "loss": 0.0093,
      "step": 2501480
    },
    {
      "epoch": 4.093759614566355,
      "grad_norm": 0.15344533324241638,
      "learning_rate": 1.824423607862259e-06,
      "loss": 0.0067,
      "step": 2501500
    },
    {
      "epoch": 4.093792345005007,
      "grad_norm": 0.18453611433506012,
      "learning_rate": 1.8243577156487418e-06,
      "loss": 0.0072,
      "step": 2501520
    },
    {
      "epoch": 4.093825075443661,
      "grad_norm": 0.21436826884746552,
      "learning_rate": 1.8242918234352247e-06,
      "loss": 0.0118,
      "step": 2501540
    },
    {
      "epoch": 4.0938578058823145,
      "grad_norm": 0.23726339638233185,
      "learning_rate": 1.8242259312217075e-06,
      "loss": 0.0113,
      "step": 2501560
    },
    {
      "epoch": 4.093890536320968,
      "grad_norm": 0.3588615655899048,
      "learning_rate": 1.8241600390081906e-06,
      "loss": 0.0089,
      "step": 2501580
    },
    {
      "epoch": 4.093923266759621,
      "grad_norm": 0.5405933260917664,
      "learning_rate": 1.8240941467946736e-06,
      "loss": 0.0075,
      "step": 2501600
    },
    {
      "epoch": 4.093955997198274,
      "grad_norm": 0.623788595199585,
      "learning_rate": 1.8240282545811563e-06,
      "loss": 0.0116,
      "step": 2501620
    },
    {
      "epoch": 4.093988727636928,
      "grad_norm": 0.39774638414382935,
      "learning_rate": 1.8239623623676393e-06,
      "loss": 0.0102,
      "step": 2501640
    },
    {
      "epoch": 4.094021458075581,
      "grad_norm": 0.12286130338907242,
      "learning_rate": 1.823896470154122e-06,
      "loss": 0.0052,
      "step": 2501660
    },
    {
      "epoch": 4.094054188514234,
      "grad_norm": 0.14017555117607117,
      "learning_rate": 1.823830577940605e-06,
      "loss": 0.009,
      "step": 2501680
    },
    {
      "epoch": 4.094086918952888,
      "grad_norm": 0.2669007480144501,
      "learning_rate": 1.8237646857270877e-06,
      "loss": 0.0096,
      "step": 2501700
    },
    {
      "epoch": 4.0941196493915415,
      "grad_norm": 0.3792878985404968,
      "learning_rate": 1.8236987935135704e-06,
      "loss": 0.0072,
      "step": 2501720
    },
    {
      "epoch": 4.094152379830194,
      "grad_norm": 0.3305966556072235,
      "learning_rate": 1.8236329013000536e-06,
      "loss": 0.0055,
      "step": 2501740
    },
    {
      "epoch": 4.094185110268848,
      "grad_norm": 0.09506744146347046,
      "learning_rate": 1.8235670090865366e-06,
      "loss": 0.0093,
      "step": 2501760
    },
    {
      "epoch": 4.094217840707501,
      "grad_norm": 0.16482780873775482,
      "learning_rate": 1.8235011168730193e-06,
      "loss": 0.0054,
      "step": 2501780
    },
    {
      "epoch": 4.094250571146154,
      "grad_norm": 0.1655770242214203,
      "learning_rate": 1.8234352246595022e-06,
      "loss": 0.0083,
      "step": 2501800
    },
    {
      "epoch": 4.094283301584808,
      "grad_norm": 0.255224347114563,
      "learning_rate": 1.823369332445985e-06,
      "loss": 0.011,
      "step": 2501820
    },
    {
      "epoch": 4.094316032023461,
      "grad_norm": 0.022069798782467842,
      "learning_rate": 1.823303440232468e-06,
      "loss": 0.0108,
      "step": 2501840
    },
    {
      "epoch": 4.094348762462115,
      "grad_norm": 0.3402666449546814,
      "learning_rate": 1.8232375480189507e-06,
      "loss": 0.0094,
      "step": 2501860
    },
    {
      "epoch": 4.094381492900768,
      "grad_norm": 0.17832884192466736,
      "learning_rate": 1.8231716558054336e-06,
      "loss": 0.0059,
      "step": 2501880
    },
    {
      "epoch": 4.094414223339421,
      "grad_norm": 0.13056232035160065,
      "learning_rate": 1.8231057635919164e-06,
      "loss": 0.0109,
      "step": 2501900
    },
    {
      "epoch": 4.094446953778075,
      "grad_norm": 0.1982521265745163,
      "learning_rate": 1.8230398713783995e-06,
      "loss": 0.0077,
      "step": 2501920
    },
    {
      "epoch": 4.0944796842167275,
      "grad_norm": 0.32270336151123047,
      "learning_rate": 1.8229739791648823e-06,
      "loss": 0.0109,
      "step": 2501940
    },
    {
      "epoch": 4.094512414655381,
      "grad_norm": 0.40655988454818726,
      "learning_rate": 1.8229080869513652e-06,
      "loss": 0.0089,
      "step": 2501960
    },
    {
      "epoch": 4.094545145094035,
      "grad_norm": 0.6450439095497131,
      "learning_rate": 1.822842194737848e-06,
      "loss": 0.0082,
      "step": 2501980
    },
    {
      "epoch": 4.094577875532688,
      "grad_norm": 0.06451873481273651,
      "learning_rate": 1.822776302524331e-06,
      "loss": 0.0071,
      "step": 2502000
    },
    {
      "epoch": 4.094610605971341,
      "grad_norm": 0.33659523725509644,
      "learning_rate": 1.8227104103108137e-06,
      "loss": 0.0107,
      "step": 2502020
    },
    {
      "epoch": 4.094643336409995,
      "grad_norm": 0.18478462100028992,
      "learning_rate": 1.8226445180972966e-06,
      "loss": 0.0071,
      "step": 2502040
    },
    {
      "epoch": 4.094676066848648,
      "grad_norm": 0.28173038363456726,
      "learning_rate": 1.8225786258837793e-06,
      "loss": 0.0079,
      "step": 2502060
    },
    {
      "epoch": 4.094708797287301,
      "grad_norm": 0.25785696506500244,
      "learning_rate": 1.8225127336702623e-06,
      "loss": 0.0075,
      "step": 2502080
    },
    {
      "epoch": 4.0947415277259545,
      "grad_norm": 0.4231455326080322,
      "learning_rate": 1.8224468414567453e-06,
      "loss": 0.0142,
      "step": 2502100
    },
    {
      "epoch": 4.094774258164608,
      "grad_norm": 0.11011995375156403,
      "learning_rate": 1.8223809492432282e-06,
      "loss": 0.0091,
      "step": 2502120
    },
    {
      "epoch": 4.094806988603262,
      "grad_norm": 0.17424213886260986,
      "learning_rate": 1.822315057029711e-06,
      "loss": 0.0056,
      "step": 2502140
    },
    {
      "epoch": 4.094839719041914,
      "grad_norm": 0.12048675119876862,
      "learning_rate": 1.8222491648161939e-06,
      "loss": 0.0068,
      "step": 2502160
    },
    {
      "epoch": 4.094872449480568,
      "grad_norm": 0.07085616141557693,
      "learning_rate": 1.8221832726026766e-06,
      "loss": 0.0081,
      "step": 2502180
    },
    {
      "epoch": 4.0949051799192215,
      "grad_norm": 0.25072693824768066,
      "learning_rate": 1.8221173803891596e-06,
      "loss": 0.0107,
      "step": 2502200
    },
    {
      "epoch": 4.094937910357874,
      "grad_norm": 0.08406529575586319,
      "learning_rate": 1.8220514881756423e-06,
      "loss": 0.0091,
      "step": 2502220
    },
    {
      "epoch": 4.094970640796528,
      "grad_norm": 0.34627389907836914,
      "learning_rate": 1.8219855959621253e-06,
      "loss": 0.0079,
      "step": 2502240
    },
    {
      "epoch": 4.095003371235181,
      "grad_norm": 0.1501571089029312,
      "learning_rate": 1.821919703748608e-06,
      "loss": 0.0095,
      "step": 2502260
    },
    {
      "epoch": 4.095036101673835,
      "grad_norm": 0.1834753453731537,
      "learning_rate": 1.8218538115350912e-06,
      "loss": 0.0104,
      "step": 2502280
    },
    {
      "epoch": 4.095068832112488,
      "grad_norm": 0.40652400255203247,
      "learning_rate": 1.821787919321574e-06,
      "loss": 0.0065,
      "step": 2502300
    },
    {
      "epoch": 4.095101562551141,
      "grad_norm": 0.7636032700538635,
      "learning_rate": 1.8217220271080569e-06,
      "loss": 0.0116,
      "step": 2502320
    },
    {
      "epoch": 4.095134292989795,
      "grad_norm": 0.1751127690076828,
      "learning_rate": 1.8216561348945396e-06,
      "loss": 0.0066,
      "step": 2502340
    },
    {
      "epoch": 4.095167023428448,
      "grad_norm": 0.24879713356494904,
      "learning_rate": 1.8215902426810226e-06,
      "loss": 0.0065,
      "step": 2502360
    },
    {
      "epoch": 4.095199753867101,
      "grad_norm": 0.14655020833015442,
      "learning_rate": 1.8215243504675053e-06,
      "loss": 0.0096,
      "step": 2502380
    },
    {
      "epoch": 4.095232484305755,
      "grad_norm": 0.13554878532886505,
      "learning_rate": 1.8214584582539883e-06,
      "loss": 0.0059,
      "step": 2502400
    },
    {
      "epoch": 4.095265214744408,
      "grad_norm": 0.5282433032989502,
      "learning_rate": 1.821392566040471e-06,
      "loss": 0.0101,
      "step": 2502420
    },
    {
      "epoch": 4.095297945183061,
      "grad_norm": 0.21191221475601196,
      "learning_rate": 1.821326673826954e-06,
      "loss": 0.0092,
      "step": 2502440
    },
    {
      "epoch": 4.095330675621715,
      "grad_norm": 0.30812498927116394,
      "learning_rate": 1.821260781613437e-06,
      "loss": 0.0102,
      "step": 2502460
    },
    {
      "epoch": 4.095363406060368,
      "grad_norm": 0.23699454963207245,
      "learning_rate": 1.8211948893999198e-06,
      "loss": 0.0159,
      "step": 2502480
    },
    {
      "epoch": 4.095396136499021,
      "grad_norm": 0.5309798717498779,
      "learning_rate": 1.8211289971864026e-06,
      "loss": 0.0106,
      "step": 2502500
    },
    {
      "epoch": 4.095428866937675,
      "grad_norm": 0.4816530644893646,
      "learning_rate": 1.8210631049728855e-06,
      "loss": 0.0142,
      "step": 2502520
    },
    {
      "epoch": 4.095461597376328,
      "grad_norm": 0.7228235602378845,
      "learning_rate": 1.8209972127593683e-06,
      "loss": 0.0104,
      "step": 2502540
    },
    {
      "epoch": 4.095494327814982,
      "grad_norm": 0.07952036708593369,
      "learning_rate": 1.8209313205458512e-06,
      "loss": 0.0059,
      "step": 2502560
    },
    {
      "epoch": 4.0955270582536345,
      "grad_norm": 0.09461917728185654,
      "learning_rate": 1.820865428332334e-06,
      "loss": 0.007,
      "step": 2502580
    },
    {
      "epoch": 4.095559788692288,
      "grad_norm": 0.29346808791160583,
      "learning_rate": 1.820799536118817e-06,
      "loss": 0.0088,
      "step": 2502600
    },
    {
      "epoch": 4.095592519130942,
      "grad_norm": 0.13469339907169342,
      "learning_rate": 1.8207336439053e-06,
      "loss": 0.0131,
      "step": 2502620
    },
    {
      "epoch": 4.095625249569594,
      "grad_norm": 0.1064433678984642,
      "learning_rate": 1.8206677516917828e-06,
      "loss": 0.0107,
      "step": 2502640
    },
    {
      "epoch": 4.095657980008248,
      "grad_norm": 0.6793294548988342,
      "learning_rate": 1.8206018594782658e-06,
      "loss": 0.013,
      "step": 2502660
    },
    {
      "epoch": 4.095690710446902,
      "grad_norm": 0.21681877970695496,
      "learning_rate": 1.8205359672647485e-06,
      "loss": 0.009,
      "step": 2502680
    },
    {
      "epoch": 4.095723440885555,
      "grad_norm": 0.1868111491203308,
      "learning_rate": 1.8204700750512313e-06,
      "loss": 0.0061,
      "step": 2502700
    },
    {
      "epoch": 4.095756171324208,
      "grad_norm": 0.21504269540309906,
      "learning_rate": 1.8204041828377142e-06,
      "loss": 0.0093,
      "step": 2502720
    },
    {
      "epoch": 4.0957889017628615,
      "grad_norm": 0.625191330909729,
      "learning_rate": 1.820338290624197e-06,
      "loss": 0.0097,
      "step": 2502740
    },
    {
      "epoch": 4.095821632201515,
      "grad_norm": 0.3384256660938263,
      "learning_rate": 1.82027239841068e-06,
      "loss": 0.0104,
      "step": 2502760
    },
    {
      "epoch": 4.095854362640168,
      "grad_norm": 0.36900755763053894,
      "learning_rate": 1.8202065061971626e-06,
      "loss": 0.008,
      "step": 2502780
    },
    {
      "epoch": 4.095887093078821,
      "grad_norm": 0.5201182961463928,
      "learning_rate": 1.8201406139836458e-06,
      "loss": 0.0118,
      "step": 2502800
    },
    {
      "epoch": 4.095919823517475,
      "grad_norm": 0.22280484437942505,
      "learning_rate": 1.8200747217701288e-06,
      "loss": 0.0137,
      "step": 2502820
    },
    {
      "epoch": 4.095952553956128,
      "grad_norm": 0.3695373237133026,
      "learning_rate": 1.8200088295566115e-06,
      "loss": 0.01,
      "step": 2502840
    },
    {
      "epoch": 4.095985284394781,
      "grad_norm": 0.44983887672424316,
      "learning_rate": 1.8199429373430944e-06,
      "loss": 0.0099,
      "step": 2502860
    },
    {
      "epoch": 4.096018014833435,
      "grad_norm": 0.14814572036266327,
      "learning_rate": 1.8198770451295772e-06,
      "loss": 0.0081,
      "step": 2502880
    },
    {
      "epoch": 4.0960507452720885,
      "grad_norm": 0.1829707771539688,
      "learning_rate": 1.8198111529160601e-06,
      "loss": 0.0123,
      "step": 2502900
    },
    {
      "epoch": 4.096083475710741,
      "grad_norm": 0.45750659704208374,
      "learning_rate": 1.8197452607025429e-06,
      "loss": 0.0086,
      "step": 2502920
    },
    {
      "epoch": 4.096116206149395,
      "grad_norm": 0.2034003585577011,
      "learning_rate": 1.8196793684890256e-06,
      "loss": 0.0114,
      "step": 2502940
    },
    {
      "epoch": 4.096148936588048,
      "grad_norm": 0.16173073649406433,
      "learning_rate": 1.8196134762755086e-06,
      "loss": 0.0101,
      "step": 2502960
    },
    {
      "epoch": 4.096181667026701,
      "grad_norm": 0.07437152415513992,
      "learning_rate": 1.8195475840619917e-06,
      "loss": 0.0101,
      "step": 2502980
    },
    {
      "epoch": 4.096214397465355,
      "grad_norm": 0.16805250942707062,
      "learning_rate": 1.8194816918484745e-06,
      "loss": 0.0055,
      "step": 2503000
    },
    {
      "epoch": 4.096247127904008,
      "grad_norm": 0.3316422402858734,
      "learning_rate": 1.8194157996349574e-06,
      "loss": 0.0079,
      "step": 2503020
    },
    {
      "epoch": 4.096279858342662,
      "grad_norm": 0.5238664150238037,
      "learning_rate": 1.8193499074214402e-06,
      "loss": 0.0088,
      "step": 2503040
    },
    {
      "epoch": 4.0963125887813145,
      "grad_norm": 0.5012524127960205,
      "learning_rate": 1.8192840152079231e-06,
      "loss": 0.0109,
      "step": 2503060
    },
    {
      "epoch": 4.096345319219968,
      "grad_norm": 0.14989332854747772,
      "learning_rate": 1.8192181229944059e-06,
      "loss": 0.0163,
      "step": 2503080
    },
    {
      "epoch": 4.096378049658622,
      "grad_norm": 0.2532312273979187,
      "learning_rate": 1.8191522307808888e-06,
      "loss": 0.0101,
      "step": 2503100
    },
    {
      "epoch": 4.096410780097274,
      "grad_norm": 0.26904618740081787,
      "learning_rate": 1.8190863385673715e-06,
      "loss": 0.0095,
      "step": 2503120
    },
    {
      "epoch": 4.096443510535928,
      "grad_norm": 0.3267708122730255,
      "learning_rate": 1.8190204463538543e-06,
      "loss": 0.0122,
      "step": 2503140
    },
    {
      "epoch": 4.096476240974582,
      "grad_norm": 0.20282036066055298,
      "learning_rate": 1.8189545541403374e-06,
      "loss": 0.0081,
      "step": 2503160
    },
    {
      "epoch": 4.096508971413235,
      "grad_norm": 0.5060815811157227,
      "learning_rate": 1.8188886619268204e-06,
      "loss": 0.0109,
      "step": 2503180
    },
    {
      "epoch": 4.096541701851888,
      "grad_norm": 0.27904418110847473,
      "learning_rate": 1.8188227697133031e-06,
      "loss": 0.011,
      "step": 2503200
    },
    {
      "epoch": 4.0965744322905415,
      "grad_norm": 0.29386499524116516,
      "learning_rate": 1.818756877499786e-06,
      "loss": 0.0077,
      "step": 2503220
    },
    {
      "epoch": 4.096607162729195,
      "grad_norm": 0.27180472016334534,
      "learning_rate": 1.8186909852862688e-06,
      "loss": 0.0118,
      "step": 2503240
    },
    {
      "epoch": 4.096639893167848,
      "grad_norm": 0.44209596514701843,
      "learning_rate": 1.8186250930727518e-06,
      "loss": 0.0099,
      "step": 2503260
    },
    {
      "epoch": 4.096672623606501,
      "grad_norm": 0.192853644490242,
      "learning_rate": 1.8185592008592345e-06,
      "loss": 0.0099,
      "step": 2503280
    },
    {
      "epoch": 4.096705354045155,
      "grad_norm": 0.06104092672467232,
      "learning_rate": 1.8184933086457175e-06,
      "loss": 0.0135,
      "step": 2503300
    },
    {
      "epoch": 4.096738084483809,
      "grad_norm": 0.592322826385498,
      "learning_rate": 1.8184274164322002e-06,
      "loss": 0.0086,
      "step": 2503320
    },
    {
      "epoch": 4.096770814922461,
      "grad_norm": 0.1702473759651184,
      "learning_rate": 1.8183615242186834e-06,
      "loss": 0.0111,
      "step": 2503340
    },
    {
      "epoch": 4.096803545361115,
      "grad_norm": 1.4799177646636963,
      "learning_rate": 1.8182956320051661e-06,
      "loss": 0.0111,
      "step": 2503360
    },
    {
      "epoch": 4.0968362757997685,
      "grad_norm": 0.19518639147281647,
      "learning_rate": 1.818229739791649e-06,
      "loss": 0.0074,
      "step": 2503380
    },
    {
      "epoch": 4.096869006238421,
      "grad_norm": 0.24082572758197784,
      "learning_rate": 1.8181638475781318e-06,
      "loss": 0.0096,
      "step": 2503400
    },
    {
      "epoch": 4.096901736677075,
      "grad_norm": 0.21967928111553192,
      "learning_rate": 1.8180979553646148e-06,
      "loss": 0.0098,
      "step": 2503420
    },
    {
      "epoch": 4.096934467115728,
      "grad_norm": 0.19058473408222198,
      "learning_rate": 1.8180320631510975e-06,
      "loss": 0.0067,
      "step": 2503440
    },
    {
      "epoch": 4.096967197554382,
      "grad_norm": 0.2128833383321762,
      "learning_rate": 1.8179661709375804e-06,
      "loss": 0.0057,
      "step": 2503460
    },
    {
      "epoch": 4.096999927993035,
      "grad_norm": 0.10128101706504822,
      "learning_rate": 1.8179002787240632e-06,
      "loss": 0.0087,
      "step": 2503480
    },
    {
      "epoch": 4.097032658431688,
      "grad_norm": 0.2239527553319931,
      "learning_rate": 1.8178343865105464e-06,
      "loss": 0.0104,
      "step": 2503500
    },
    {
      "epoch": 4.097065388870342,
      "grad_norm": 0.1396002322435379,
      "learning_rate": 1.817768494297029e-06,
      "loss": 0.0115,
      "step": 2503520
    },
    {
      "epoch": 4.097098119308995,
      "grad_norm": 0.285689115524292,
      "learning_rate": 1.817702602083512e-06,
      "loss": 0.0111,
      "step": 2503540
    },
    {
      "epoch": 4.097130849747648,
      "grad_norm": 0.11557622998952866,
      "learning_rate": 1.8176367098699948e-06,
      "loss": 0.0092,
      "step": 2503560
    },
    {
      "epoch": 4.097163580186302,
      "grad_norm": 0.29681679606437683,
      "learning_rate": 1.8175708176564777e-06,
      "loss": 0.0085,
      "step": 2503580
    },
    {
      "epoch": 4.097196310624955,
      "grad_norm": 0.2039189338684082,
      "learning_rate": 1.8175049254429605e-06,
      "loss": 0.0122,
      "step": 2503600
    },
    {
      "epoch": 4.097229041063608,
      "grad_norm": 0.23276486992835999,
      "learning_rate": 1.8174390332294434e-06,
      "loss": 0.0138,
      "step": 2503620
    },
    {
      "epoch": 4.097261771502262,
      "grad_norm": 0.4782922565937042,
      "learning_rate": 1.8173731410159262e-06,
      "loss": 0.0142,
      "step": 2503640
    },
    {
      "epoch": 4.097294501940915,
      "grad_norm": 0.2994470000267029,
      "learning_rate": 1.8173072488024091e-06,
      "loss": 0.0086,
      "step": 2503660
    },
    {
      "epoch": 4.097327232379568,
      "grad_norm": 0.13157258927822113,
      "learning_rate": 1.817241356588892e-06,
      "loss": 0.0094,
      "step": 2503680
    },
    {
      "epoch": 4.097359962818222,
      "grad_norm": 0.27256065607070923,
      "learning_rate": 1.817175464375375e-06,
      "loss": 0.0117,
      "step": 2503700
    },
    {
      "epoch": 4.097392693256875,
      "grad_norm": 0.27919071912765503,
      "learning_rate": 1.8171095721618578e-06,
      "loss": 0.0085,
      "step": 2503720
    },
    {
      "epoch": 4.097425423695529,
      "grad_norm": 0.16554564237594604,
      "learning_rate": 1.8170436799483407e-06,
      "loss": 0.0109,
      "step": 2503740
    },
    {
      "epoch": 4.0974581541341815,
      "grad_norm": 0.34782207012176514,
      "learning_rate": 1.8169777877348234e-06,
      "loss": 0.0097,
      "step": 2503760
    },
    {
      "epoch": 4.097490884572835,
      "grad_norm": 0.16953875124454498,
      "learning_rate": 1.8169118955213064e-06,
      "loss": 0.0078,
      "step": 2503780
    },
    {
      "epoch": 4.097523615011489,
      "grad_norm": 0.3817456364631653,
      "learning_rate": 1.8168460033077891e-06,
      "loss": 0.0084,
      "step": 2503800
    },
    {
      "epoch": 4.097556345450141,
      "grad_norm": 0.07288726419210434,
      "learning_rate": 1.816780111094272e-06,
      "loss": 0.0065,
      "step": 2503820
    },
    {
      "epoch": 4.097589075888795,
      "grad_norm": 0.3200860917568207,
      "learning_rate": 1.8167142188807548e-06,
      "loss": 0.0075,
      "step": 2503840
    },
    {
      "epoch": 4.0976218063274485,
      "grad_norm": 0.2479056864976883,
      "learning_rate": 1.816648326667238e-06,
      "loss": 0.0112,
      "step": 2503860
    },
    {
      "epoch": 4.097654536766102,
      "grad_norm": 0.20544643700122833,
      "learning_rate": 1.816582434453721e-06,
      "loss": 0.0059,
      "step": 2503880
    },
    {
      "epoch": 4.097687267204755,
      "grad_norm": 0.2930449843406677,
      "learning_rate": 1.8165165422402037e-06,
      "loss": 0.0095,
      "step": 2503900
    },
    {
      "epoch": 4.097719997643408,
      "grad_norm": 0.14078330993652344,
      "learning_rate": 1.8164506500266864e-06,
      "loss": 0.0084,
      "step": 2503920
    },
    {
      "epoch": 4.097752728082062,
      "grad_norm": 0.14143231511116028,
      "learning_rate": 1.8163847578131694e-06,
      "loss": 0.0075,
      "step": 2503940
    },
    {
      "epoch": 4.097785458520715,
      "grad_norm": 0.666706383228302,
      "learning_rate": 1.8163188655996521e-06,
      "loss": 0.0113,
      "step": 2503960
    },
    {
      "epoch": 4.097818188959368,
      "grad_norm": 0.07263961434364319,
      "learning_rate": 1.816252973386135e-06,
      "loss": 0.0092,
      "step": 2503980
    },
    {
      "epoch": 4.097850919398022,
      "grad_norm": 0.05547713488340378,
      "learning_rate": 1.8161870811726178e-06,
      "loss": 0.0119,
      "step": 2504000
    },
    {
      "epoch": 4.0978836498366755,
      "grad_norm": 0.13022840023040771,
      "learning_rate": 1.8161211889591008e-06,
      "loss": 0.0099,
      "step": 2504020
    },
    {
      "epoch": 4.097916380275328,
      "grad_norm": 0.48030611872673035,
      "learning_rate": 1.816055296745584e-06,
      "loss": 0.0096,
      "step": 2504040
    },
    {
      "epoch": 4.097949110713982,
      "grad_norm": 0.4936445355415344,
      "learning_rate": 1.8159894045320667e-06,
      "loss": 0.0085,
      "step": 2504060
    },
    {
      "epoch": 4.097981841152635,
      "grad_norm": 0.1309189349412918,
      "learning_rate": 1.8159235123185496e-06,
      "loss": 0.0129,
      "step": 2504080
    },
    {
      "epoch": 4.098014571591288,
      "grad_norm": 0.35034847259521484,
      "learning_rate": 1.8158576201050324e-06,
      "loss": 0.0135,
      "step": 2504100
    },
    {
      "epoch": 4.098047302029942,
      "grad_norm": 0.19794128835201263,
      "learning_rate": 1.815791727891515e-06,
      "loss": 0.0065,
      "step": 2504120
    },
    {
      "epoch": 4.098080032468595,
      "grad_norm": 0.1034216657280922,
      "learning_rate": 1.815725835677998e-06,
      "loss": 0.0073,
      "step": 2504140
    },
    {
      "epoch": 4.098112762907249,
      "grad_norm": 0.06370184570550919,
      "learning_rate": 1.8156599434644808e-06,
      "loss": 0.0075,
      "step": 2504160
    },
    {
      "epoch": 4.098145493345902,
      "grad_norm": 0.0905001312494278,
      "learning_rate": 1.8155940512509637e-06,
      "loss": 0.0094,
      "step": 2504180
    },
    {
      "epoch": 4.098178223784555,
      "grad_norm": 0.2606932818889618,
      "learning_rate": 1.8155281590374465e-06,
      "loss": 0.0121,
      "step": 2504200
    },
    {
      "epoch": 4.098210954223209,
      "grad_norm": 0.10215549916028976,
      "learning_rate": 1.8154622668239296e-06,
      "loss": 0.0101,
      "step": 2504220
    },
    {
      "epoch": 4.0982436846618615,
      "grad_norm": 0.1335803121328354,
      "learning_rate": 1.8153963746104126e-06,
      "loss": 0.0072,
      "step": 2504240
    },
    {
      "epoch": 4.098276415100515,
      "grad_norm": 0.187159925699234,
      "learning_rate": 1.8153304823968953e-06,
      "loss": 0.0104,
      "step": 2504260
    },
    {
      "epoch": 4.098309145539169,
      "grad_norm": 0.09085270017385483,
      "learning_rate": 1.8152645901833783e-06,
      "loss": 0.0068,
      "step": 2504280
    },
    {
      "epoch": 4.098341875977822,
      "grad_norm": 0.20338335633277893,
      "learning_rate": 1.815198697969861e-06,
      "loss": 0.01,
      "step": 2504300
    },
    {
      "epoch": 4.098374606416475,
      "grad_norm": 0.3115518093109131,
      "learning_rate": 1.815132805756344e-06,
      "loss": 0.007,
      "step": 2504320
    },
    {
      "epoch": 4.098407336855129,
      "grad_norm": 0.15642909705638885,
      "learning_rate": 1.8150669135428267e-06,
      "loss": 0.0087,
      "step": 2504340
    },
    {
      "epoch": 4.098440067293782,
      "grad_norm": 0.31072163581848145,
      "learning_rate": 1.8150010213293095e-06,
      "loss": 0.0111,
      "step": 2504360
    },
    {
      "epoch": 4.098472797732435,
      "grad_norm": 0.15968799591064453,
      "learning_rate": 1.8149351291157926e-06,
      "loss": 0.0086,
      "step": 2504380
    },
    {
      "epoch": 4.0985055281710885,
      "grad_norm": 0.21712012588977814,
      "learning_rate": 1.8148692369022756e-06,
      "loss": 0.0071,
      "step": 2504400
    },
    {
      "epoch": 4.098538258609742,
      "grad_norm": 0.5044819712638855,
      "learning_rate": 1.8148033446887583e-06,
      "loss": 0.0077,
      "step": 2504420
    },
    {
      "epoch": 4.098570989048396,
      "grad_norm": 0.08549150824546814,
      "learning_rate": 1.8147374524752413e-06,
      "loss": 0.0105,
      "step": 2504440
    },
    {
      "epoch": 4.098603719487048,
      "grad_norm": 0.14334695041179657,
      "learning_rate": 1.814671560261724e-06,
      "loss": 0.0085,
      "step": 2504460
    },
    {
      "epoch": 4.098636449925702,
      "grad_norm": 0.043180353939533234,
      "learning_rate": 1.814605668048207e-06,
      "loss": 0.0079,
      "step": 2504480
    },
    {
      "epoch": 4.098669180364356,
      "grad_norm": 0.23253417015075684,
      "learning_rate": 1.8145397758346897e-06,
      "loss": 0.008,
      "step": 2504500
    },
    {
      "epoch": 4.098701910803008,
      "grad_norm": 0.0952383428812027,
      "learning_rate": 1.8144738836211726e-06,
      "loss": 0.0111,
      "step": 2504520
    },
    {
      "epoch": 4.098734641241662,
      "grad_norm": 0.9381251931190491,
      "learning_rate": 1.8144079914076554e-06,
      "loss": 0.0093,
      "step": 2504540
    },
    {
      "epoch": 4.0987673716803155,
      "grad_norm": 0.15718688070774078,
      "learning_rate": 1.8143420991941385e-06,
      "loss": 0.014,
      "step": 2504560
    },
    {
      "epoch": 4.098800102118968,
      "grad_norm": 0.12813083827495575,
      "learning_rate": 1.8142762069806213e-06,
      "loss": 0.0066,
      "step": 2504580
    },
    {
      "epoch": 4.098832832557622,
      "grad_norm": 0.07610809057950974,
      "learning_rate": 1.8142103147671042e-06,
      "loss": 0.0068,
      "step": 2504600
    },
    {
      "epoch": 4.098865562996275,
      "grad_norm": 0.43338412046432495,
      "learning_rate": 1.814144422553587e-06,
      "loss": 0.0098,
      "step": 2504620
    },
    {
      "epoch": 4.098898293434929,
      "grad_norm": 0.214401513338089,
      "learning_rate": 1.81407853034007e-06,
      "loss": 0.0058,
      "step": 2504640
    },
    {
      "epoch": 4.098931023873582,
      "grad_norm": 0.11018558591604233,
      "learning_rate": 1.8140126381265527e-06,
      "loss": 0.0087,
      "step": 2504660
    },
    {
      "epoch": 4.098963754312235,
      "grad_norm": 0.10433615744113922,
      "learning_rate": 1.8139467459130356e-06,
      "loss": 0.0121,
      "step": 2504680
    },
    {
      "epoch": 4.098996484750889,
      "grad_norm": 0.18669584393501282,
      "learning_rate": 1.8138808536995184e-06,
      "loss": 0.0087,
      "step": 2504700
    },
    {
      "epoch": 4.0990292151895416,
      "grad_norm": 0.12208712846040726,
      "learning_rate": 1.8138149614860013e-06,
      "loss": 0.0077,
      "step": 2504720
    },
    {
      "epoch": 4.099061945628195,
      "grad_norm": 0.0755484476685524,
      "learning_rate": 1.8137490692724843e-06,
      "loss": 0.0083,
      "step": 2504740
    },
    {
      "epoch": 4.099094676066849,
      "grad_norm": 0.8007254004478455,
      "learning_rate": 1.8136831770589672e-06,
      "loss": 0.0114,
      "step": 2504760
    },
    {
      "epoch": 4.099127406505502,
      "grad_norm": 0.15026117861270905,
      "learning_rate": 1.81361728484545e-06,
      "loss": 0.0071,
      "step": 2504780
    },
    {
      "epoch": 4.099160136944155,
      "grad_norm": 0.2414296418428421,
      "learning_rate": 1.813551392631933e-06,
      "loss": 0.0092,
      "step": 2504800
    },
    {
      "epoch": 4.099192867382809,
      "grad_norm": 0.2410280555486679,
      "learning_rate": 1.8134855004184156e-06,
      "loss": 0.0062,
      "step": 2504820
    },
    {
      "epoch": 4.099225597821462,
      "grad_norm": 0.08113531768321991,
      "learning_rate": 1.8134196082048986e-06,
      "loss": 0.0094,
      "step": 2504840
    },
    {
      "epoch": 4.099258328260115,
      "grad_norm": 0.1581263691186905,
      "learning_rate": 1.8133537159913813e-06,
      "loss": 0.0077,
      "step": 2504860
    },
    {
      "epoch": 4.0992910586987685,
      "grad_norm": 0.10824643075466156,
      "learning_rate": 1.8132878237778643e-06,
      "loss": 0.0097,
      "step": 2504880
    },
    {
      "epoch": 4.099323789137422,
      "grad_norm": 0.2332180142402649,
      "learning_rate": 1.813221931564347e-06,
      "loss": 0.0088,
      "step": 2504900
    },
    {
      "epoch": 4.099356519576076,
      "grad_norm": 0.46488168835639954,
      "learning_rate": 1.8131560393508302e-06,
      "loss": 0.0097,
      "step": 2504920
    },
    {
      "epoch": 4.099389250014728,
      "grad_norm": 0.31168562173843384,
      "learning_rate": 1.813090147137313e-06,
      "loss": 0.0083,
      "step": 2504940
    },
    {
      "epoch": 4.099421980453382,
      "grad_norm": 0.1622452288866043,
      "learning_rate": 1.8130242549237959e-06,
      "loss": 0.0059,
      "step": 2504960
    },
    {
      "epoch": 4.099454710892036,
      "grad_norm": 0.2757854163646698,
      "learning_rate": 1.8129583627102786e-06,
      "loss": 0.0097,
      "step": 2504980
    },
    {
      "epoch": 4.099487441330688,
      "grad_norm": 0.1526491343975067,
      "learning_rate": 1.8128924704967616e-06,
      "loss": 0.0116,
      "step": 2505000
    },
    {
      "epoch": 4.099520171769342,
      "grad_norm": 0.14503271877765656,
      "learning_rate": 1.8128265782832443e-06,
      "loss": 0.0068,
      "step": 2505020
    },
    {
      "epoch": 4.0995529022079955,
      "grad_norm": 0.93625807762146,
      "learning_rate": 1.8127606860697273e-06,
      "loss": 0.008,
      "step": 2505040
    },
    {
      "epoch": 4.099585632646649,
      "grad_norm": 0.06633337587118149,
      "learning_rate": 1.81269479385621e-06,
      "loss": 0.0085,
      "step": 2505060
    },
    {
      "epoch": 4.099618363085302,
      "grad_norm": 0.22868598997592926,
      "learning_rate": 1.812628901642693e-06,
      "loss": 0.0083,
      "step": 2505080
    },
    {
      "epoch": 4.099651093523955,
      "grad_norm": 0.6727590560913086,
      "learning_rate": 1.8125630094291761e-06,
      "loss": 0.0081,
      "step": 2505100
    },
    {
      "epoch": 4.099683823962609,
      "grad_norm": 0.21765977144241333,
      "learning_rate": 1.8124971172156589e-06,
      "loss": 0.0071,
      "step": 2505120
    },
    {
      "epoch": 4.099716554401262,
      "grad_norm": 0.38066351413726807,
      "learning_rate": 1.8124312250021416e-06,
      "loss": 0.0122,
      "step": 2505140
    },
    {
      "epoch": 4.099749284839915,
      "grad_norm": 0.4585017263889313,
      "learning_rate": 1.8123653327886245e-06,
      "loss": 0.0112,
      "step": 2505160
    },
    {
      "epoch": 4.099782015278569,
      "grad_norm": 0.2995741069316864,
      "learning_rate": 1.8122994405751073e-06,
      "loss": 0.0093,
      "step": 2505180
    },
    {
      "epoch": 4.0998147457172225,
      "grad_norm": 0.18011252582073212,
      "learning_rate": 1.8122335483615902e-06,
      "loss": 0.0106,
      "step": 2505200
    },
    {
      "epoch": 4.099847476155875,
      "grad_norm": 0.09187548607587814,
      "learning_rate": 1.812167656148073e-06,
      "loss": 0.0042,
      "step": 2505220
    },
    {
      "epoch": 4.099880206594529,
      "grad_norm": 0.4039210081100464,
      "learning_rate": 1.812101763934556e-06,
      "loss": 0.007,
      "step": 2505240
    },
    {
      "epoch": 4.099912937033182,
      "grad_norm": 0.23831942677497864,
      "learning_rate": 1.812035871721039e-06,
      "loss": 0.0081,
      "step": 2505260
    },
    {
      "epoch": 4.099945667471835,
      "grad_norm": 0.1483348160982132,
      "learning_rate": 1.8119699795075218e-06,
      "loss": 0.0066,
      "step": 2505280
    },
    {
      "epoch": 4.099978397910489,
      "grad_norm": 0.28400978446006775,
      "learning_rate": 1.8119040872940048e-06,
      "loss": 0.009,
      "step": 2505300
    },
    {
      "epoch": 4.100011128349142,
      "grad_norm": 0.1393352597951889,
      "learning_rate": 1.8118381950804875e-06,
      "loss": 0.0083,
      "step": 2505320
    },
    {
      "epoch": 4.100043858787796,
      "grad_norm": 0.3400075137615204,
      "learning_rate": 1.8117723028669703e-06,
      "loss": 0.009,
      "step": 2505340
    },
    {
      "epoch": 4.100076589226449,
      "grad_norm": 0.4514988660812378,
      "learning_rate": 1.8117064106534532e-06,
      "loss": 0.011,
      "step": 2505360
    },
    {
      "epoch": 4.100109319665102,
      "grad_norm": 0.21141602098941803,
      "learning_rate": 1.811640518439936e-06,
      "loss": 0.0103,
      "step": 2505380
    },
    {
      "epoch": 4.100142050103756,
      "grad_norm": 0.15424826741218567,
      "learning_rate": 1.811574626226419e-06,
      "loss": 0.0058,
      "step": 2505400
    },
    {
      "epoch": 4.1001747805424085,
      "grad_norm": 0.1909416913986206,
      "learning_rate": 1.8115087340129016e-06,
      "loss": 0.0104,
      "step": 2505420
    },
    {
      "epoch": 4.100207510981062,
      "grad_norm": 0.07611442357301712,
      "learning_rate": 1.8114428417993848e-06,
      "loss": 0.0088,
      "step": 2505440
    },
    {
      "epoch": 4.100240241419716,
      "grad_norm": 0.14179235696792603,
      "learning_rate": 1.8113769495858678e-06,
      "loss": 0.0076,
      "step": 2505460
    },
    {
      "epoch": 4.100272971858369,
      "grad_norm": 0.26951536536216736,
      "learning_rate": 1.8113110573723505e-06,
      "loss": 0.0062,
      "step": 2505480
    },
    {
      "epoch": 4.100305702297022,
      "grad_norm": 0.523835301399231,
      "learning_rate": 1.8112451651588335e-06,
      "loss": 0.0112,
      "step": 2505500
    },
    {
      "epoch": 4.1003384327356756,
      "grad_norm": 0.31474730372428894,
      "learning_rate": 1.8111792729453162e-06,
      "loss": 0.0086,
      "step": 2505520
    },
    {
      "epoch": 4.100371163174329,
      "grad_norm": 0.965907633304596,
      "learning_rate": 1.8111133807317991e-06,
      "loss": 0.0124,
      "step": 2505540
    },
    {
      "epoch": 4.100403893612982,
      "grad_norm": 0.16775676608085632,
      "learning_rate": 1.8110474885182819e-06,
      "loss": 0.0132,
      "step": 2505560
    },
    {
      "epoch": 4.1004366240516354,
      "grad_norm": 0.2517101764678955,
      "learning_rate": 1.8109815963047646e-06,
      "loss": 0.0094,
      "step": 2505580
    },
    {
      "epoch": 4.100469354490289,
      "grad_norm": 0.28880587220191956,
      "learning_rate": 1.8109157040912476e-06,
      "loss": 0.0075,
      "step": 2505600
    },
    {
      "epoch": 4.100502084928943,
      "grad_norm": 1.12143874168396,
      "learning_rate": 1.8108498118777307e-06,
      "loss": 0.0096,
      "step": 2505620
    },
    {
      "epoch": 4.100534815367595,
      "grad_norm": 0.11232717335224152,
      "learning_rate": 1.8107839196642135e-06,
      "loss": 0.0078,
      "step": 2505640
    },
    {
      "epoch": 4.100567545806249,
      "grad_norm": 0.507780134677887,
      "learning_rate": 1.8107180274506964e-06,
      "loss": 0.0137,
      "step": 2505660
    },
    {
      "epoch": 4.1006002762449025,
      "grad_norm": 0.18178007006645203,
      "learning_rate": 1.8106521352371792e-06,
      "loss": 0.0105,
      "step": 2505680
    },
    {
      "epoch": 4.100633006683555,
      "grad_norm": 0.4515218436717987,
      "learning_rate": 1.8105862430236621e-06,
      "loss": 0.0096,
      "step": 2505700
    },
    {
      "epoch": 4.100665737122209,
      "grad_norm": 0.2596423923969269,
      "learning_rate": 1.8105203508101449e-06,
      "loss": 0.0075,
      "step": 2505720
    },
    {
      "epoch": 4.100698467560862,
      "grad_norm": 0.28507256507873535,
      "learning_rate": 1.8104544585966278e-06,
      "loss": 0.0114,
      "step": 2505740
    },
    {
      "epoch": 4.100731197999516,
      "grad_norm": 0.08562621474266052,
      "learning_rate": 1.8103885663831106e-06,
      "loss": 0.009,
      "step": 2505760
    },
    {
      "epoch": 4.100763928438169,
      "grad_norm": 0.5896912217140198,
      "learning_rate": 1.8103226741695933e-06,
      "loss": 0.0113,
      "step": 2505780
    },
    {
      "epoch": 4.100796658876822,
      "grad_norm": 0.2516976594924927,
      "learning_rate": 1.8102567819560765e-06,
      "loss": 0.0088,
      "step": 2505800
    },
    {
      "epoch": 4.100829389315476,
      "grad_norm": 0.3332596719264984,
      "learning_rate": 1.8101908897425594e-06,
      "loss": 0.0086,
      "step": 2505820
    },
    {
      "epoch": 4.100862119754129,
      "grad_norm": 0.07380975037813187,
      "learning_rate": 1.8101249975290421e-06,
      "loss": 0.0108,
      "step": 2505840
    },
    {
      "epoch": 4.100894850192782,
      "grad_norm": 0.23955176770687103,
      "learning_rate": 1.810059105315525e-06,
      "loss": 0.0095,
      "step": 2505860
    },
    {
      "epoch": 4.100927580631436,
      "grad_norm": 0.14788232743740082,
      "learning_rate": 1.8099932131020078e-06,
      "loss": 0.0079,
      "step": 2505880
    },
    {
      "epoch": 4.100960311070089,
      "grad_norm": 0.18203508853912354,
      "learning_rate": 1.8099273208884908e-06,
      "loss": 0.0091,
      "step": 2505900
    },
    {
      "epoch": 4.100993041508742,
      "grad_norm": 0.10827353596687317,
      "learning_rate": 1.8098614286749735e-06,
      "loss": 0.0104,
      "step": 2505920
    },
    {
      "epoch": 4.101025771947396,
      "grad_norm": 0.11190822720527649,
      "learning_rate": 1.8097955364614565e-06,
      "loss": 0.0099,
      "step": 2505940
    },
    {
      "epoch": 4.101058502386049,
      "grad_norm": 0.14661520719528198,
      "learning_rate": 1.8097296442479392e-06,
      "loss": 0.0085,
      "step": 2505960
    },
    {
      "epoch": 4.101091232824702,
      "grad_norm": 0.2974153459072113,
      "learning_rate": 1.8096637520344224e-06,
      "loss": 0.0106,
      "step": 2505980
    },
    {
      "epoch": 4.101123963263356,
      "grad_norm": 0.2831878662109375,
      "learning_rate": 1.8095978598209051e-06,
      "loss": 0.0075,
      "step": 2506000
    },
    {
      "epoch": 4.101156693702009,
      "grad_norm": 0.32971644401550293,
      "learning_rate": 1.809531967607388e-06,
      "loss": 0.0087,
      "step": 2506020
    },
    {
      "epoch": 4.101189424140662,
      "grad_norm": 0.16301240026950836,
      "learning_rate": 1.8094660753938708e-06,
      "loss": 0.0085,
      "step": 2506040
    },
    {
      "epoch": 4.1012221545793155,
      "grad_norm": 0.08720961213111877,
      "learning_rate": 1.8094001831803538e-06,
      "loss": 0.0081,
      "step": 2506060
    },
    {
      "epoch": 4.101254885017969,
      "grad_norm": 0.07534494251012802,
      "learning_rate": 1.8093342909668365e-06,
      "loss": 0.0074,
      "step": 2506080
    },
    {
      "epoch": 4.101287615456623,
      "grad_norm": 0.20759107172489166,
      "learning_rate": 1.8092683987533195e-06,
      "loss": 0.0072,
      "step": 2506100
    },
    {
      "epoch": 4.101320345895275,
      "grad_norm": 0.2851339876651764,
      "learning_rate": 1.8092025065398022e-06,
      "loss": 0.0075,
      "step": 2506120
    },
    {
      "epoch": 4.101353076333929,
      "grad_norm": 0.785369873046875,
      "learning_rate": 1.8091366143262854e-06,
      "loss": 0.0093,
      "step": 2506140
    },
    {
      "epoch": 4.101385806772583,
      "grad_norm": 0.1878933608531952,
      "learning_rate": 1.809070722112768e-06,
      "loss": 0.0096,
      "step": 2506160
    },
    {
      "epoch": 4.101418537211235,
      "grad_norm": 0.41874274611473083,
      "learning_rate": 1.809004829899251e-06,
      "loss": 0.009,
      "step": 2506180
    },
    {
      "epoch": 4.101451267649889,
      "grad_norm": 0.1486722230911255,
      "learning_rate": 1.8089389376857338e-06,
      "loss": 0.0155,
      "step": 2506200
    },
    {
      "epoch": 4.1014839980885425,
      "grad_norm": 0.400072306394577,
      "learning_rate": 1.8088730454722167e-06,
      "loss": 0.0109,
      "step": 2506220
    },
    {
      "epoch": 4.101516728527196,
      "grad_norm": 0.5350610613822937,
      "learning_rate": 1.8088071532586995e-06,
      "loss": 0.0087,
      "step": 2506240
    },
    {
      "epoch": 4.101549458965849,
      "grad_norm": 0.07315494120121002,
      "learning_rate": 1.8087412610451824e-06,
      "loss": 0.0085,
      "step": 2506260
    },
    {
      "epoch": 4.101582189404502,
      "grad_norm": 0.4115666449069977,
      "learning_rate": 1.8086753688316652e-06,
      "loss": 0.0081,
      "step": 2506280
    },
    {
      "epoch": 4.101614919843156,
      "grad_norm": 0.2717879116535187,
      "learning_rate": 1.8086094766181481e-06,
      "loss": 0.0078,
      "step": 2506300
    },
    {
      "epoch": 4.101647650281809,
      "grad_norm": 0.3289944529533386,
      "learning_rate": 1.808543584404631e-06,
      "loss": 0.0073,
      "step": 2506320
    },
    {
      "epoch": 4.101680380720462,
      "grad_norm": 0.3999786674976349,
      "learning_rate": 1.808477692191114e-06,
      "loss": 0.0105,
      "step": 2506340
    },
    {
      "epoch": 4.101713111159116,
      "grad_norm": 0.21382251381874084,
      "learning_rate": 1.8084117999775968e-06,
      "loss": 0.0083,
      "step": 2506360
    },
    {
      "epoch": 4.1017458415977694,
      "grad_norm": 0.44832098484039307,
      "learning_rate": 1.8083459077640797e-06,
      "loss": 0.0099,
      "step": 2506380
    },
    {
      "epoch": 4.101778572036422,
      "grad_norm": 0.41886356472969055,
      "learning_rate": 1.8082800155505625e-06,
      "loss": 0.0112,
      "step": 2506400
    },
    {
      "epoch": 4.101811302475076,
      "grad_norm": 0.05667392536997795,
      "learning_rate": 1.8082141233370454e-06,
      "loss": 0.0079,
      "step": 2506420
    },
    {
      "epoch": 4.101844032913729,
      "grad_norm": 0.26628145575523376,
      "learning_rate": 1.8081482311235282e-06,
      "loss": 0.0093,
      "step": 2506440
    },
    {
      "epoch": 4.101876763352382,
      "grad_norm": 0.1015147790312767,
      "learning_rate": 1.808082338910011e-06,
      "loss": 0.0076,
      "step": 2506460
    },
    {
      "epoch": 4.101909493791036,
      "grad_norm": 0.15603198111057281,
      "learning_rate": 1.8080164466964938e-06,
      "loss": 0.0074,
      "step": 2506480
    },
    {
      "epoch": 4.101942224229689,
      "grad_norm": 0.373288094997406,
      "learning_rate": 1.807950554482977e-06,
      "loss": 0.0104,
      "step": 2506500
    },
    {
      "epoch": 4.101974954668343,
      "grad_norm": 0.3073269724845886,
      "learning_rate": 1.80788466226946e-06,
      "loss": 0.008,
      "step": 2506520
    },
    {
      "epoch": 4.1020076851069955,
      "grad_norm": 0.15887324512004852,
      "learning_rate": 1.8078187700559427e-06,
      "loss": 0.0114,
      "step": 2506540
    },
    {
      "epoch": 4.102040415545649,
      "grad_norm": 0.4659368395805359,
      "learning_rate": 1.8077528778424254e-06,
      "loss": 0.0091,
      "step": 2506560
    },
    {
      "epoch": 4.102073145984303,
      "grad_norm": 0.10191722214221954,
      "learning_rate": 1.8076869856289084e-06,
      "loss": 0.0073,
      "step": 2506580
    },
    {
      "epoch": 4.102105876422955,
      "grad_norm": 0.13263723254203796,
      "learning_rate": 1.8076210934153911e-06,
      "loss": 0.0089,
      "step": 2506600
    },
    {
      "epoch": 4.102138606861609,
      "grad_norm": 0.2815028727054596,
      "learning_rate": 1.807555201201874e-06,
      "loss": 0.0091,
      "step": 2506620
    },
    {
      "epoch": 4.102171337300263,
      "grad_norm": 0.40097740292549133,
      "learning_rate": 1.8074893089883568e-06,
      "loss": 0.013,
      "step": 2506640
    },
    {
      "epoch": 4.102204067738916,
      "grad_norm": 0.30568140745162964,
      "learning_rate": 1.8074234167748398e-06,
      "loss": 0.0073,
      "step": 2506660
    },
    {
      "epoch": 4.102236798177569,
      "grad_norm": 0.456485778093338,
      "learning_rate": 1.807357524561323e-06,
      "loss": 0.0085,
      "step": 2506680
    },
    {
      "epoch": 4.1022695286162225,
      "grad_norm": 0.11380480974912643,
      "learning_rate": 1.8072916323478057e-06,
      "loss": 0.0081,
      "step": 2506700
    },
    {
      "epoch": 4.102302259054876,
      "grad_norm": 0.22370651364326477,
      "learning_rate": 1.8072257401342886e-06,
      "loss": 0.0094,
      "step": 2506720
    },
    {
      "epoch": 4.102334989493529,
      "grad_norm": 0.07042728364467621,
      "learning_rate": 1.8071598479207714e-06,
      "loss": 0.0106,
      "step": 2506740
    },
    {
      "epoch": 4.102367719932182,
      "grad_norm": 0.3205365240573883,
      "learning_rate": 1.807093955707254e-06,
      "loss": 0.01,
      "step": 2506760
    },
    {
      "epoch": 4.102400450370836,
      "grad_norm": 0.4022229015827179,
      "learning_rate": 1.807028063493737e-06,
      "loss": 0.0099,
      "step": 2506780
    },
    {
      "epoch": 4.10243318080949,
      "grad_norm": 0.1454184204339981,
      "learning_rate": 1.8069621712802198e-06,
      "loss": 0.0124,
      "step": 2506800
    },
    {
      "epoch": 4.102465911248142,
      "grad_norm": 0.07766667753458023,
      "learning_rate": 1.8068962790667027e-06,
      "loss": 0.0068,
      "step": 2506820
    },
    {
      "epoch": 4.102498641686796,
      "grad_norm": 0.09663448482751846,
      "learning_rate": 1.8068303868531855e-06,
      "loss": 0.0078,
      "step": 2506840
    },
    {
      "epoch": 4.1025313721254495,
      "grad_norm": 0.14814241230487823,
      "learning_rate": 1.8067644946396687e-06,
      "loss": 0.0093,
      "step": 2506860
    },
    {
      "epoch": 4.102564102564102,
      "grad_norm": 0.2085757553577423,
      "learning_rate": 1.8066986024261516e-06,
      "loss": 0.0129,
      "step": 2506880
    },
    {
      "epoch": 4.102596833002756,
      "grad_norm": 0.2542511522769928,
      "learning_rate": 1.8066327102126343e-06,
      "loss": 0.0079,
      "step": 2506900
    },
    {
      "epoch": 4.102629563441409,
      "grad_norm": 0.14026965200901031,
      "learning_rate": 1.8065668179991173e-06,
      "loss": 0.0065,
      "step": 2506920
    },
    {
      "epoch": 4.102662293880063,
      "grad_norm": 0.11455415189266205,
      "learning_rate": 1.8065009257856e-06,
      "loss": 0.0078,
      "step": 2506940
    },
    {
      "epoch": 4.102695024318716,
      "grad_norm": 0.04743519052863121,
      "learning_rate": 1.806435033572083e-06,
      "loss": 0.007,
      "step": 2506960
    },
    {
      "epoch": 4.102727754757369,
      "grad_norm": 0.02033010497689247,
      "learning_rate": 1.8063691413585657e-06,
      "loss": 0.0059,
      "step": 2506980
    },
    {
      "epoch": 4.102760485196023,
      "grad_norm": 0.07294730097055435,
      "learning_rate": 1.8063032491450485e-06,
      "loss": 0.0104,
      "step": 2507000
    },
    {
      "epoch": 4.102793215634676,
      "grad_norm": 0.22305680811405182,
      "learning_rate": 1.8062373569315316e-06,
      "loss": 0.0074,
      "step": 2507020
    },
    {
      "epoch": 4.102825946073329,
      "grad_norm": 0.05891482159495354,
      "learning_rate": 1.8061714647180146e-06,
      "loss": 0.0111,
      "step": 2507040
    },
    {
      "epoch": 4.102858676511983,
      "grad_norm": 0.3019038736820221,
      "learning_rate": 1.8061055725044973e-06,
      "loss": 0.0064,
      "step": 2507060
    },
    {
      "epoch": 4.102891406950636,
      "grad_norm": 0.2961047887802124,
      "learning_rate": 1.8060396802909803e-06,
      "loss": 0.0108,
      "step": 2507080
    },
    {
      "epoch": 4.102924137389289,
      "grad_norm": 0.11633244901895523,
      "learning_rate": 1.805973788077463e-06,
      "loss": 0.0131,
      "step": 2507100
    },
    {
      "epoch": 4.102956867827943,
      "grad_norm": 0.3112597167491913,
      "learning_rate": 1.805907895863946e-06,
      "loss": 0.0073,
      "step": 2507120
    },
    {
      "epoch": 4.102989598266596,
      "grad_norm": 0.05017450079321861,
      "learning_rate": 1.8058420036504287e-06,
      "loss": 0.0079,
      "step": 2507140
    },
    {
      "epoch": 4.103022328705249,
      "grad_norm": 0.04801088571548462,
      "learning_rate": 1.8057761114369117e-06,
      "loss": 0.0103,
      "step": 2507160
    },
    {
      "epoch": 4.103055059143903,
      "grad_norm": 0.15566511452198029,
      "learning_rate": 1.8057102192233944e-06,
      "loss": 0.0116,
      "step": 2507180
    },
    {
      "epoch": 4.103087789582556,
      "grad_norm": 0.061206042766571045,
      "learning_rate": 1.8056443270098776e-06,
      "loss": 0.0103,
      "step": 2507200
    },
    {
      "epoch": 4.10312052002121,
      "grad_norm": 0.16020567715168,
      "learning_rate": 1.8055784347963603e-06,
      "loss": 0.0083,
      "step": 2507220
    },
    {
      "epoch": 4.1031532504598625,
      "grad_norm": 0.32584530115127563,
      "learning_rate": 1.8055125425828432e-06,
      "loss": 0.008,
      "step": 2507240
    },
    {
      "epoch": 4.103185980898516,
      "grad_norm": 0.25848308205604553,
      "learning_rate": 1.805446650369326e-06,
      "loss": 0.0117,
      "step": 2507260
    },
    {
      "epoch": 4.10321871133717,
      "grad_norm": 0.09992052614688873,
      "learning_rate": 1.805380758155809e-06,
      "loss": 0.0096,
      "step": 2507280
    },
    {
      "epoch": 4.103251441775822,
      "grad_norm": 0.16893944144248962,
      "learning_rate": 1.8053148659422917e-06,
      "loss": 0.0107,
      "step": 2507300
    },
    {
      "epoch": 4.103284172214476,
      "grad_norm": 0.3814389705657959,
      "learning_rate": 1.8052489737287746e-06,
      "loss": 0.007,
      "step": 2507320
    },
    {
      "epoch": 4.1033169026531295,
      "grad_norm": 0.26766008138656616,
      "learning_rate": 1.8051830815152574e-06,
      "loss": 0.0098,
      "step": 2507340
    },
    {
      "epoch": 4.103349633091783,
      "grad_norm": 0.21623818576335907,
      "learning_rate": 1.8051171893017403e-06,
      "loss": 0.0113,
      "step": 2507360
    },
    {
      "epoch": 4.103382363530436,
      "grad_norm": 0.3985329866409302,
      "learning_rate": 1.8050512970882233e-06,
      "loss": 0.0078,
      "step": 2507380
    },
    {
      "epoch": 4.103415093969089,
      "grad_norm": 0.0974043756723404,
      "learning_rate": 1.8049854048747062e-06,
      "loss": 0.0091,
      "step": 2507400
    },
    {
      "epoch": 4.103447824407743,
      "grad_norm": 0.29609668254852295,
      "learning_rate": 1.804919512661189e-06,
      "loss": 0.0105,
      "step": 2507420
    },
    {
      "epoch": 4.103480554846396,
      "grad_norm": 0.4286160469055176,
      "learning_rate": 1.804853620447672e-06,
      "loss": 0.0139,
      "step": 2507440
    },
    {
      "epoch": 4.103513285285049,
      "grad_norm": 0.26889562606811523,
      "learning_rate": 1.8047877282341547e-06,
      "loss": 0.0097,
      "step": 2507460
    },
    {
      "epoch": 4.103546015723703,
      "grad_norm": 0.1067335456609726,
      "learning_rate": 1.8047218360206376e-06,
      "loss": 0.0086,
      "step": 2507480
    },
    {
      "epoch": 4.103578746162356,
      "grad_norm": 0.10955870151519775,
      "learning_rate": 1.8046559438071203e-06,
      "loss": 0.0089,
      "step": 2507500
    },
    {
      "epoch": 4.103611476601009,
      "grad_norm": 0.09734507650136948,
      "learning_rate": 1.8045900515936033e-06,
      "loss": 0.0088,
      "step": 2507520
    },
    {
      "epoch": 4.103644207039663,
      "grad_norm": 0.41442587971687317,
      "learning_rate": 1.804524159380086e-06,
      "loss": 0.0141,
      "step": 2507540
    },
    {
      "epoch": 4.103676937478316,
      "grad_norm": 0.2684953212738037,
      "learning_rate": 1.8044582671665692e-06,
      "loss": 0.0132,
      "step": 2507560
    },
    {
      "epoch": 4.103709667916969,
      "grad_norm": 0.1715892106294632,
      "learning_rate": 1.804392374953052e-06,
      "loss": 0.0076,
      "step": 2507580
    },
    {
      "epoch": 4.103742398355623,
      "grad_norm": 0.041298359632492065,
      "learning_rate": 1.8043264827395349e-06,
      "loss": 0.01,
      "step": 2507600
    },
    {
      "epoch": 4.103775128794276,
      "grad_norm": 0.21977706253528595,
      "learning_rate": 1.8042605905260176e-06,
      "loss": 0.0127,
      "step": 2507620
    },
    {
      "epoch": 4.103807859232929,
      "grad_norm": 0.43812164664268494,
      "learning_rate": 1.8041946983125006e-06,
      "loss": 0.0105,
      "step": 2507640
    },
    {
      "epoch": 4.103840589671583,
      "grad_norm": 0.9098349213600159,
      "learning_rate": 1.8041288060989833e-06,
      "loss": 0.0103,
      "step": 2507660
    },
    {
      "epoch": 4.103873320110236,
      "grad_norm": 0.2786908745765686,
      "learning_rate": 1.8040629138854663e-06,
      "loss": 0.0082,
      "step": 2507680
    },
    {
      "epoch": 4.10390605054889,
      "grad_norm": 0.18379466235637665,
      "learning_rate": 1.803997021671949e-06,
      "loss": 0.0093,
      "step": 2507700
    },
    {
      "epoch": 4.1039387809875425,
      "grad_norm": 0.2436012625694275,
      "learning_rate": 1.803931129458432e-06,
      "loss": 0.0074,
      "step": 2507720
    },
    {
      "epoch": 4.103971511426196,
      "grad_norm": 0.2509857714176178,
      "learning_rate": 1.8038652372449151e-06,
      "loss": 0.008,
      "step": 2507740
    },
    {
      "epoch": 4.10400424186485,
      "grad_norm": 0.5042016506195068,
      "learning_rate": 1.8037993450313979e-06,
      "loss": 0.0084,
      "step": 2507760
    },
    {
      "epoch": 4.104036972303502,
      "grad_norm": 0.1783599853515625,
      "learning_rate": 1.8037334528178806e-06,
      "loss": 0.0124,
      "step": 2507780
    },
    {
      "epoch": 4.104069702742156,
      "grad_norm": 0.23425057530403137,
      "learning_rate": 1.8036675606043636e-06,
      "loss": 0.0125,
      "step": 2507800
    },
    {
      "epoch": 4.10410243318081,
      "grad_norm": 0.5125846862792969,
      "learning_rate": 1.8036016683908463e-06,
      "loss": 0.0119,
      "step": 2507820
    },
    {
      "epoch": 4.104135163619463,
      "grad_norm": 0.5516136884689331,
      "learning_rate": 1.8035357761773293e-06,
      "loss": 0.0112,
      "step": 2507840
    },
    {
      "epoch": 4.104167894058116,
      "grad_norm": 0.12545785307884216,
      "learning_rate": 1.803469883963812e-06,
      "loss": 0.0089,
      "step": 2507860
    },
    {
      "epoch": 4.1042006244967695,
      "grad_norm": 0.38345223665237427,
      "learning_rate": 1.803403991750295e-06,
      "loss": 0.0086,
      "step": 2507880
    },
    {
      "epoch": 4.104233354935423,
      "grad_norm": 0.20878712832927704,
      "learning_rate": 1.803338099536778e-06,
      "loss": 0.0069,
      "step": 2507900
    },
    {
      "epoch": 4.104266085374076,
      "grad_norm": 0.12660977244377136,
      "learning_rate": 1.8032722073232608e-06,
      "loss": 0.0081,
      "step": 2507920
    },
    {
      "epoch": 4.104298815812729,
      "grad_norm": 0.5764968991279602,
      "learning_rate": 1.8032063151097438e-06,
      "loss": 0.0129,
      "step": 2507940
    },
    {
      "epoch": 4.104331546251383,
      "grad_norm": 0.1519317477941513,
      "learning_rate": 1.8031404228962265e-06,
      "loss": 0.0059,
      "step": 2507960
    },
    {
      "epoch": 4.104364276690037,
      "grad_norm": 0.2697322368621826,
      "learning_rate": 1.8030745306827093e-06,
      "loss": 0.008,
      "step": 2507980
    },
    {
      "epoch": 4.104397007128689,
      "grad_norm": 0.2659744620323181,
      "learning_rate": 1.8030086384691922e-06,
      "loss": 0.0112,
      "step": 2508000
    },
    {
      "epoch": 4.104429737567343,
      "grad_norm": 0.2151973992586136,
      "learning_rate": 1.802942746255675e-06,
      "loss": 0.0087,
      "step": 2508020
    },
    {
      "epoch": 4.1044624680059965,
      "grad_norm": 0.3827754557132721,
      "learning_rate": 1.802876854042158e-06,
      "loss": 0.0085,
      "step": 2508040
    },
    {
      "epoch": 4.104495198444649,
      "grad_norm": 0.25313839316368103,
      "learning_rate": 1.8028109618286407e-06,
      "loss": 0.0072,
      "step": 2508060
    },
    {
      "epoch": 4.104527928883303,
      "grad_norm": 0.05592167004942894,
      "learning_rate": 1.8027450696151238e-06,
      "loss": 0.012,
      "step": 2508080
    },
    {
      "epoch": 4.104560659321956,
      "grad_norm": 0.19533774256706238,
      "learning_rate": 1.8026791774016068e-06,
      "loss": 0.0119,
      "step": 2508100
    },
    {
      "epoch": 4.10459338976061,
      "grad_norm": 0.26428303122520447,
      "learning_rate": 1.8026132851880895e-06,
      "loss": 0.0096,
      "step": 2508120
    },
    {
      "epoch": 4.104626120199263,
      "grad_norm": 0.20770908892154694,
      "learning_rate": 1.8025473929745725e-06,
      "loss": 0.0104,
      "step": 2508140
    },
    {
      "epoch": 4.104658850637916,
      "grad_norm": 0.2119038999080658,
      "learning_rate": 1.8024815007610552e-06,
      "loss": 0.0099,
      "step": 2508160
    },
    {
      "epoch": 4.10469158107657,
      "grad_norm": 0.25746867060661316,
      "learning_rate": 1.8024156085475382e-06,
      "loss": 0.0123,
      "step": 2508180
    },
    {
      "epoch": 4.1047243115152225,
      "grad_norm": 0.048674579709768295,
      "learning_rate": 1.802349716334021e-06,
      "loss": 0.0102,
      "step": 2508200
    },
    {
      "epoch": 4.104757041953876,
      "grad_norm": 0.37311193346977234,
      "learning_rate": 1.8022838241205036e-06,
      "loss": 0.012,
      "step": 2508220
    },
    {
      "epoch": 4.10478977239253,
      "grad_norm": 0.24531327188014984,
      "learning_rate": 1.8022179319069866e-06,
      "loss": 0.0073,
      "step": 2508240
    },
    {
      "epoch": 4.104822502831183,
      "grad_norm": 0.18156877160072327,
      "learning_rate": 1.8021520396934698e-06,
      "loss": 0.0087,
      "step": 2508260
    },
    {
      "epoch": 4.104855233269836,
      "grad_norm": 0.2791510820388794,
      "learning_rate": 1.8020861474799525e-06,
      "loss": 0.0068,
      "step": 2508280
    },
    {
      "epoch": 4.10488796370849,
      "grad_norm": 0.15375809371471405,
      "learning_rate": 1.8020202552664354e-06,
      "loss": 0.0116,
      "step": 2508300
    },
    {
      "epoch": 4.104920694147143,
      "grad_norm": 0.22534853219985962,
      "learning_rate": 1.8019543630529182e-06,
      "loss": 0.0098,
      "step": 2508320
    },
    {
      "epoch": 4.104953424585796,
      "grad_norm": 0.12376191467046738,
      "learning_rate": 1.8018884708394011e-06,
      "loss": 0.0067,
      "step": 2508340
    },
    {
      "epoch": 4.1049861550244495,
      "grad_norm": 0.15541648864746094,
      "learning_rate": 1.8018225786258839e-06,
      "loss": 0.0127,
      "step": 2508360
    },
    {
      "epoch": 4.105018885463103,
      "grad_norm": 0.2015283703804016,
      "learning_rate": 1.8017566864123668e-06,
      "loss": 0.0087,
      "step": 2508380
    },
    {
      "epoch": 4.105051615901757,
      "grad_norm": 0.0403052419424057,
      "learning_rate": 1.8016907941988496e-06,
      "loss": 0.0107,
      "step": 2508400
    },
    {
      "epoch": 4.105084346340409,
      "grad_norm": 0.12397091835737228,
      "learning_rate": 1.8016249019853323e-06,
      "loss": 0.0064,
      "step": 2508420
    },
    {
      "epoch": 4.105117076779063,
      "grad_norm": 0.2940948009490967,
      "learning_rate": 1.8015590097718155e-06,
      "loss": 0.012,
      "step": 2508440
    },
    {
      "epoch": 4.105149807217717,
      "grad_norm": 1.6102977991104126,
      "learning_rate": 1.8014931175582984e-06,
      "loss": 0.0115,
      "step": 2508460
    },
    {
      "epoch": 4.105182537656369,
      "grad_norm": 0.07427007704973221,
      "learning_rate": 1.8014272253447812e-06,
      "loss": 0.0112,
      "step": 2508480
    },
    {
      "epoch": 4.105215268095023,
      "grad_norm": 0.13516053557395935,
      "learning_rate": 1.8013613331312641e-06,
      "loss": 0.0081,
      "step": 2508500
    },
    {
      "epoch": 4.1052479985336765,
      "grad_norm": 0.05279405042529106,
      "learning_rate": 1.8012954409177468e-06,
      "loss": 0.0149,
      "step": 2508520
    },
    {
      "epoch": 4.10528072897233,
      "grad_norm": 0.09992988407611847,
      "learning_rate": 1.8012295487042298e-06,
      "loss": 0.0155,
      "step": 2508540
    },
    {
      "epoch": 4.105313459410983,
      "grad_norm": 0.37953081727027893,
      "learning_rate": 1.8011636564907125e-06,
      "loss": 0.0079,
      "step": 2508560
    },
    {
      "epoch": 4.105346189849636,
      "grad_norm": 0.22066693007946014,
      "learning_rate": 1.8010977642771955e-06,
      "loss": 0.0068,
      "step": 2508580
    },
    {
      "epoch": 4.10537892028829,
      "grad_norm": 0.7322492003440857,
      "learning_rate": 1.8010318720636782e-06,
      "loss": 0.0111,
      "step": 2508600
    },
    {
      "epoch": 4.105411650726943,
      "grad_norm": 0.5241908431053162,
      "learning_rate": 1.8009659798501614e-06,
      "loss": 0.0094,
      "step": 2508620
    },
    {
      "epoch": 4.105444381165596,
      "grad_norm": 0.3840876817703247,
      "learning_rate": 1.8009000876366441e-06,
      "loss": 0.0079,
      "step": 2508640
    },
    {
      "epoch": 4.10547711160425,
      "grad_norm": 0.09835449606180191,
      "learning_rate": 1.800834195423127e-06,
      "loss": 0.01,
      "step": 2508660
    },
    {
      "epoch": 4.1055098420429035,
      "grad_norm": 0.3873753845691681,
      "learning_rate": 1.8007683032096098e-06,
      "loss": 0.0107,
      "step": 2508680
    },
    {
      "epoch": 4.105542572481556,
      "grad_norm": 0.17014259099960327,
      "learning_rate": 1.8007024109960928e-06,
      "loss": 0.0075,
      "step": 2508700
    },
    {
      "epoch": 4.10557530292021,
      "grad_norm": 0.4132021367549896,
      "learning_rate": 1.8006365187825755e-06,
      "loss": 0.0142,
      "step": 2508720
    },
    {
      "epoch": 4.105608033358863,
      "grad_norm": 0.2150305211544037,
      "learning_rate": 1.8005706265690585e-06,
      "loss": 0.005,
      "step": 2508740
    },
    {
      "epoch": 4.105640763797516,
      "grad_norm": 0.5865663290023804,
      "learning_rate": 1.8005047343555412e-06,
      "loss": 0.0094,
      "step": 2508760
    },
    {
      "epoch": 4.10567349423617,
      "grad_norm": 0.24777212738990784,
      "learning_rate": 1.8004388421420244e-06,
      "loss": 0.0071,
      "step": 2508780
    },
    {
      "epoch": 4.105706224674823,
      "grad_norm": 0.45096370577812195,
      "learning_rate": 1.8003729499285071e-06,
      "loss": 0.0093,
      "step": 2508800
    },
    {
      "epoch": 4.105738955113477,
      "grad_norm": 0.5052004456520081,
      "learning_rate": 1.80030705771499e-06,
      "loss": 0.0109,
      "step": 2508820
    },
    {
      "epoch": 4.10577168555213,
      "grad_norm": 0.19147293269634247,
      "learning_rate": 1.8002411655014728e-06,
      "loss": 0.0097,
      "step": 2508840
    },
    {
      "epoch": 4.105804415990783,
      "grad_norm": 0.10573475062847137,
      "learning_rate": 1.8001752732879558e-06,
      "loss": 0.0083,
      "step": 2508860
    },
    {
      "epoch": 4.105837146429437,
      "grad_norm": 0.21891336143016815,
      "learning_rate": 1.8001093810744385e-06,
      "loss": 0.0143,
      "step": 2508880
    },
    {
      "epoch": 4.1058698768680895,
      "grad_norm": 0.4530329406261444,
      "learning_rate": 1.8000434888609214e-06,
      "loss": 0.0102,
      "step": 2508900
    },
    {
      "epoch": 4.105902607306743,
      "grad_norm": 0.096692755818367,
      "learning_rate": 1.7999775966474042e-06,
      "loss": 0.0086,
      "step": 2508920
    },
    {
      "epoch": 4.105935337745397,
      "grad_norm": 0.36057618260383606,
      "learning_rate": 1.7999117044338871e-06,
      "loss": 0.0083,
      "step": 2508940
    },
    {
      "epoch": 4.105968068184049,
      "grad_norm": 0.1300947070121765,
      "learning_rate": 1.79984581222037e-06,
      "loss": 0.0091,
      "step": 2508960
    },
    {
      "epoch": 4.106000798622703,
      "grad_norm": 0.3569791615009308,
      "learning_rate": 1.799779920006853e-06,
      "loss": 0.0104,
      "step": 2508980
    },
    {
      "epoch": 4.1060335290613565,
      "grad_norm": 0.28756943345069885,
      "learning_rate": 1.7997140277933358e-06,
      "loss": 0.0102,
      "step": 2509000
    },
    {
      "epoch": 4.10606625950001,
      "grad_norm": 0.20828700065612793,
      "learning_rate": 1.7996481355798187e-06,
      "loss": 0.01,
      "step": 2509020
    },
    {
      "epoch": 4.106098989938663,
      "grad_norm": 0.0729881301522255,
      "learning_rate": 1.7995822433663015e-06,
      "loss": 0.0098,
      "step": 2509040
    },
    {
      "epoch": 4.106131720377316,
      "grad_norm": 0.11682648956775665,
      "learning_rate": 1.7995163511527844e-06,
      "loss": 0.01,
      "step": 2509060
    },
    {
      "epoch": 4.10616445081597,
      "grad_norm": 0.39969494938850403,
      "learning_rate": 1.7994504589392672e-06,
      "loss": 0.0097,
      "step": 2509080
    },
    {
      "epoch": 4.106197181254623,
      "grad_norm": 0.3746732771396637,
      "learning_rate": 1.7993845667257501e-06,
      "loss": 0.0088,
      "step": 2509100
    },
    {
      "epoch": 4.106229911693276,
      "grad_norm": 0.056086745113134384,
      "learning_rate": 1.7993186745122329e-06,
      "loss": 0.0091,
      "step": 2509120
    },
    {
      "epoch": 4.10626264213193,
      "grad_norm": 0.13207337260246277,
      "learning_rate": 1.799252782298716e-06,
      "loss": 0.0113,
      "step": 2509140
    },
    {
      "epoch": 4.1062953725705835,
      "grad_norm": 0.3349781334400177,
      "learning_rate": 1.799186890085199e-06,
      "loss": 0.005,
      "step": 2509160
    },
    {
      "epoch": 4.106328103009236,
      "grad_norm": 0.1763836145401001,
      "learning_rate": 1.7991209978716817e-06,
      "loss": 0.0103,
      "step": 2509180
    },
    {
      "epoch": 4.10636083344789,
      "grad_norm": 0.10280741751194,
      "learning_rate": 1.7990551056581644e-06,
      "loss": 0.0115,
      "step": 2509200
    },
    {
      "epoch": 4.106393563886543,
      "grad_norm": 0.19672490656375885,
      "learning_rate": 1.7989892134446474e-06,
      "loss": 0.0101,
      "step": 2509220
    },
    {
      "epoch": 4.106426294325196,
      "grad_norm": 0.13832369446754456,
      "learning_rate": 1.7989233212311301e-06,
      "loss": 0.0099,
      "step": 2509240
    },
    {
      "epoch": 4.10645902476385,
      "grad_norm": 0.1634463667869568,
      "learning_rate": 1.798857429017613e-06,
      "loss": 0.0087,
      "step": 2509260
    },
    {
      "epoch": 4.106491755202503,
      "grad_norm": 0.28546229004859924,
      "learning_rate": 1.7987915368040958e-06,
      "loss": 0.0137,
      "step": 2509280
    },
    {
      "epoch": 4.106524485641157,
      "grad_norm": 0.57220059633255,
      "learning_rate": 1.7987256445905788e-06,
      "loss": 0.0124,
      "step": 2509300
    },
    {
      "epoch": 4.10655721607981,
      "grad_norm": 0.1459515541791916,
      "learning_rate": 1.798659752377062e-06,
      "loss": 0.0107,
      "step": 2509320
    },
    {
      "epoch": 4.106589946518463,
      "grad_norm": 0.6004477143287659,
      "learning_rate": 1.7985938601635447e-06,
      "loss": 0.0085,
      "step": 2509340
    },
    {
      "epoch": 4.106622676957117,
      "grad_norm": 0.7136882543563843,
      "learning_rate": 1.7985279679500276e-06,
      "loss": 0.0086,
      "step": 2509360
    },
    {
      "epoch": 4.1066554073957695,
      "grad_norm": 0.3269358277320862,
      "learning_rate": 1.7984620757365104e-06,
      "loss": 0.0116,
      "step": 2509380
    },
    {
      "epoch": 4.106688137834423,
      "grad_norm": 0.48185211420059204,
      "learning_rate": 1.7983961835229933e-06,
      "loss": 0.0086,
      "step": 2509400
    },
    {
      "epoch": 4.106720868273077,
      "grad_norm": 0.17405962944030762,
      "learning_rate": 1.798330291309476e-06,
      "loss": 0.0075,
      "step": 2509420
    },
    {
      "epoch": 4.10675359871173,
      "grad_norm": 0.11100947856903076,
      "learning_rate": 1.7982643990959588e-06,
      "loss": 0.0067,
      "step": 2509440
    },
    {
      "epoch": 4.106786329150383,
      "grad_norm": 0.21502624452114105,
      "learning_rate": 1.7981985068824418e-06,
      "loss": 0.0146,
      "step": 2509460
    },
    {
      "epoch": 4.106819059589037,
      "grad_norm": 0.2594389021396637,
      "learning_rate": 1.798132614668925e-06,
      "loss": 0.0093,
      "step": 2509480
    },
    {
      "epoch": 4.10685179002769,
      "grad_norm": 0.11030631512403488,
      "learning_rate": 1.7980667224554077e-06,
      "loss": 0.0099,
      "step": 2509500
    },
    {
      "epoch": 4.106884520466343,
      "grad_norm": 0.24728304147720337,
      "learning_rate": 1.7980008302418906e-06,
      "loss": 0.008,
      "step": 2509520
    },
    {
      "epoch": 4.1069172509049965,
      "grad_norm": 0.19803069531917572,
      "learning_rate": 1.7979349380283734e-06,
      "loss": 0.0113,
      "step": 2509540
    },
    {
      "epoch": 4.10694998134365,
      "grad_norm": 0.4026953876018524,
      "learning_rate": 1.7978690458148563e-06,
      "loss": 0.0138,
      "step": 2509560
    },
    {
      "epoch": 4.106982711782304,
      "grad_norm": 0.22575496137142181,
      "learning_rate": 1.797803153601339e-06,
      "loss": 0.0097,
      "step": 2509580
    },
    {
      "epoch": 4.107015442220956,
      "grad_norm": 0.22774048149585724,
      "learning_rate": 1.797737261387822e-06,
      "loss": 0.0061,
      "step": 2509600
    },
    {
      "epoch": 4.10704817265961,
      "grad_norm": 0.1092594787478447,
      "learning_rate": 1.7976713691743047e-06,
      "loss": 0.0112,
      "step": 2509620
    },
    {
      "epoch": 4.107080903098264,
      "grad_norm": 0.5912957787513733,
      "learning_rate": 1.7976054769607875e-06,
      "loss": 0.0107,
      "step": 2509640
    },
    {
      "epoch": 4.107113633536916,
      "grad_norm": 0.15430369973182678,
      "learning_rate": 1.7975395847472706e-06,
      "loss": 0.0077,
      "step": 2509660
    },
    {
      "epoch": 4.10714636397557,
      "grad_norm": 0.24303452670574188,
      "learning_rate": 1.7974736925337536e-06,
      "loss": 0.009,
      "step": 2509680
    },
    {
      "epoch": 4.1071790944142235,
      "grad_norm": 0.14604416489601135,
      "learning_rate": 1.7974078003202363e-06,
      "loss": 0.008,
      "step": 2509700
    },
    {
      "epoch": 4.107211824852877,
      "grad_norm": 0.34640756249427795,
      "learning_rate": 1.7973419081067193e-06,
      "loss": 0.0084,
      "step": 2509720
    },
    {
      "epoch": 4.10724455529153,
      "grad_norm": 0.5200274586677551,
      "learning_rate": 1.797276015893202e-06,
      "loss": 0.0076,
      "step": 2509740
    },
    {
      "epoch": 4.107277285730183,
      "grad_norm": 0.24545016884803772,
      "learning_rate": 1.797210123679685e-06,
      "loss": 0.0094,
      "step": 2509760
    },
    {
      "epoch": 4.107310016168837,
      "grad_norm": 0.362163245677948,
      "learning_rate": 1.7971442314661677e-06,
      "loss": 0.0092,
      "step": 2509780
    },
    {
      "epoch": 4.10734274660749,
      "grad_norm": 0.12481234222650528,
      "learning_rate": 1.7970783392526507e-06,
      "loss": 0.0077,
      "step": 2509800
    },
    {
      "epoch": 4.107375477046143,
      "grad_norm": 0.0723162293434143,
      "learning_rate": 1.7970124470391334e-06,
      "loss": 0.006,
      "step": 2509820
    },
    {
      "epoch": 4.107408207484797,
      "grad_norm": 0.16045735776424408,
      "learning_rate": 1.7969465548256166e-06,
      "loss": 0.0136,
      "step": 2509840
    },
    {
      "epoch": 4.10744093792345,
      "grad_norm": 0.1024930402636528,
      "learning_rate": 1.7968806626120993e-06,
      "loss": 0.0113,
      "step": 2509860
    },
    {
      "epoch": 4.107473668362103,
      "grad_norm": 0.25013279914855957,
      "learning_rate": 1.7968147703985823e-06,
      "loss": 0.0068,
      "step": 2509880
    },
    {
      "epoch": 4.107506398800757,
      "grad_norm": 1.9628076553344727,
      "learning_rate": 1.796748878185065e-06,
      "loss": 0.0133,
      "step": 2509900
    },
    {
      "epoch": 4.10753912923941,
      "grad_norm": 0.3477628231048584,
      "learning_rate": 1.796682985971548e-06,
      "loss": 0.0072,
      "step": 2509920
    },
    {
      "epoch": 4.107571859678063,
      "grad_norm": 0.11732925474643707,
      "learning_rate": 1.7966170937580307e-06,
      "loss": 0.008,
      "step": 2509940
    },
    {
      "epoch": 4.107604590116717,
      "grad_norm": 0.10284657776355743,
      "learning_rate": 1.7965512015445136e-06,
      "loss": 0.007,
      "step": 2509960
    },
    {
      "epoch": 4.10763732055537,
      "grad_norm": 0.11211704462766647,
      "learning_rate": 1.7964853093309964e-06,
      "loss": 0.0126,
      "step": 2509980
    },
    {
      "epoch": 4.107670050994024,
      "grad_norm": 0.16453875601291656,
      "learning_rate": 1.7964194171174793e-06,
      "loss": 0.0081,
      "step": 2510000
    },
    {
      "epoch": 4.1077027814326765,
      "grad_norm": 0.5592421889305115,
      "learning_rate": 1.7963535249039623e-06,
      "loss": 0.0092,
      "step": 2510020
    },
    {
      "epoch": 4.10773551187133,
      "grad_norm": 0.19222328066825867,
      "learning_rate": 1.7962876326904452e-06,
      "loss": 0.0071,
      "step": 2510040
    },
    {
      "epoch": 4.107768242309984,
      "grad_norm": 0.24825285375118256,
      "learning_rate": 1.796221740476928e-06,
      "loss": 0.0087,
      "step": 2510060
    },
    {
      "epoch": 4.107800972748636,
      "grad_norm": 0.1954459846019745,
      "learning_rate": 1.796155848263411e-06,
      "loss": 0.0097,
      "step": 2510080
    },
    {
      "epoch": 4.10783370318729,
      "grad_norm": 0.11724846065044403,
      "learning_rate": 1.7960899560498937e-06,
      "loss": 0.0139,
      "step": 2510100
    },
    {
      "epoch": 4.107866433625944,
      "grad_norm": 0.17125053703784943,
      "learning_rate": 1.7960240638363766e-06,
      "loss": 0.0098,
      "step": 2510120
    },
    {
      "epoch": 4.107899164064597,
      "grad_norm": 0.23391716182231903,
      "learning_rate": 1.7959581716228594e-06,
      "loss": 0.0153,
      "step": 2510140
    },
    {
      "epoch": 4.10793189450325,
      "grad_norm": 0.1908261626958847,
      "learning_rate": 1.7958922794093423e-06,
      "loss": 0.0112,
      "step": 2510160
    },
    {
      "epoch": 4.1079646249419035,
      "grad_norm": 0.383343905210495,
      "learning_rate": 1.795826387195825e-06,
      "loss": 0.0053,
      "step": 2510180
    },
    {
      "epoch": 4.107997355380557,
      "grad_norm": 0.08123596757650375,
      "learning_rate": 1.7957604949823082e-06,
      "loss": 0.0111,
      "step": 2510200
    },
    {
      "epoch": 4.10803008581921,
      "grad_norm": 0.3727135956287384,
      "learning_rate": 1.795694602768791e-06,
      "loss": 0.0111,
      "step": 2510220
    },
    {
      "epoch": 4.108062816257863,
      "grad_norm": 0.06749852746725082,
      "learning_rate": 1.795628710555274e-06,
      "loss": 0.0102,
      "step": 2510240
    },
    {
      "epoch": 4.108095546696517,
      "grad_norm": 0.31484630703926086,
      "learning_rate": 1.7955628183417566e-06,
      "loss": 0.009,
      "step": 2510260
    },
    {
      "epoch": 4.108128277135171,
      "grad_norm": 0.5158416628837585,
      "learning_rate": 1.7954969261282396e-06,
      "loss": 0.0106,
      "step": 2510280
    },
    {
      "epoch": 4.108161007573823,
      "grad_norm": 0.5558421015739441,
      "learning_rate": 1.7954310339147223e-06,
      "loss": 0.0091,
      "step": 2510300
    },
    {
      "epoch": 4.108193738012477,
      "grad_norm": 0.09665872156620026,
      "learning_rate": 1.7953651417012053e-06,
      "loss": 0.0079,
      "step": 2510320
    },
    {
      "epoch": 4.1082264684511305,
      "grad_norm": 0.1605774611234665,
      "learning_rate": 1.795299249487688e-06,
      "loss": 0.0115,
      "step": 2510340
    },
    {
      "epoch": 4.108259198889783,
      "grad_norm": 0.1375034898519516,
      "learning_rate": 1.7952333572741712e-06,
      "loss": 0.0103,
      "step": 2510360
    },
    {
      "epoch": 4.108291929328437,
      "grad_norm": 0.18012243509292603,
      "learning_rate": 1.7951674650606541e-06,
      "loss": 0.0136,
      "step": 2510380
    },
    {
      "epoch": 4.10832465976709,
      "grad_norm": 0.15295179188251495,
      "learning_rate": 1.7951015728471369e-06,
      "loss": 0.0111,
      "step": 2510400
    },
    {
      "epoch": 4.108357390205743,
      "grad_norm": 0.16638080775737762,
      "learning_rate": 1.7950356806336196e-06,
      "loss": 0.0081,
      "step": 2510420
    },
    {
      "epoch": 4.108390120644397,
      "grad_norm": 0.3668672442436218,
      "learning_rate": 1.7949697884201026e-06,
      "loss": 0.0108,
      "step": 2510440
    },
    {
      "epoch": 4.10842285108305,
      "grad_norm": 0.07163896411657333,
      "learning_rate": 1.7949038962065853e-06,
      "loss": 0.0095,
      "step": 2510460
    },
    {
      "epoch": 4.108455581521704,
      "grad_norm": 0.1716431975364685,
      "learning_rate": 1.7948380039930683e-06,
      "loss": 0.0093,
      "step": 2510480
    },
    {
      "epoch": 4.108488311960357,
      "grad_norm": 0.1737508475780487,
      "learning_rate": 1.794772111779551e-06,
      "loss": 0.0086,
      "step": 2510500
    },
    {
      "epoch": 4.10852104239901,
      "grad_norm": 0.08703478425741196,
      "learning_rate": 1.794706219566034e-06,
      "loss": 0.0081,
      "step": 2510520
    },
    {
      "epoch": 4.108553772837664,
      "grad_norm": 0.40024533867836,
      "learning_rate": 1.7946403273525171e-06,
      "loss": 0.0098,
      "step": 2510540
    },
    {
      "epoch": 4.108586503276317,
      "grad_norm": 0.2652447521686554,
      "learning_rate": 1.7945744351389999e-06,
      "loss": 0.0086,
      "step": 2510560
    },
    {
      "epoch": 4.10861923371497,
      "grad_norm": 0.2604975700378418,
      "learning_rate": 1.7945085429254828e-06,
      "loss": 0.0069,
      "step": 2510580
    },
    {
      "epoch": 4.108651964153624,
      "grad_norm": 0.24971787631511688,
      "learning_rate": 1.7944426507119655e-06,
      "loss": 0.0086,
      "step": 2510600
    },
    {
      "epoch": 4.108684694592277,
      "grad_norm": 0.04660353064537048,
      "learning_rate": 1.7943767584984483e-06,
      "loss": 0.013,
      "step": 2510620
    },
    {
      "epoch": 4.10871742503093,
      "grad_norm": 0.12966641783714294,
      "learning_rate": 1.7943108662849312e-06,
      "loss": 0.0099,
      "step": 2510640
    },
    {
      "epoch": 4.1087501554695836,
      "grad_norm": 0.27202510833740234,
      "learning_rate": 1.794244974071414e-06,
      "loss": 0.0092,
      "step": 2510660
    },
    {
      "epoch": 4.108782885908237,
      "grad_norm": 0.12275394797325134,
      "learning_rate": 1.794179081857897e-06,
      "loss": 0.0107,
      "step": 2510680
    },
    {
      "epoch": 4.10881561634689,
      "grad_norm": 0.14060832560062408,
      "learning_rate": 1.7941131896443797e-06,
      "loss": 0.0102,
      "step": 2510700
    },
    {
      "epoch": 4.1088483467855434,
      "grad_norm": 0.2318282425403595,
      "learning_rate": 1.7940472974308628e-06,
      "loss": 0.0074,
      "step": 2510720
    },
    {
      "epoch": 4.108881077224197,
      "grad_norm": 0.12263211607933044,
      "learning_rate": 1.7939814052173458e-06,
      "loss": 0.0125,
      "step": 2510740
    },
    {
      "epoch": 4.108913807662851,
      "grad_norm": 0.13542424142360687,
      "learning_rate": 1.7939155130038285e-06,
      "loss": 0.0087,
      "step": 2510760
    },
    {
      "epoch": 4.108946538101503,
      "grad_norm": 0.30636635422706604,
      "learning_rate": 1.7938496207903115e-06,
      "loss": 0.0083,
      "step": 2510780
    },
    {
      "epoch": 4.108979268540157,
      "grad_norm": 0.3518441319465637,
      "learning_rate": 1.7937837285767942e-06,
      "loss": 0.0059,
      "step": 2510800
    },
    {
      "epoch": 4.1090119989788105,
      "grad_norm": 0.123589426279068,
      "learning_rate": 1.7937178363632772e-06,
      "loss": 0.0063,
      "step": 2510820
    },
    {
      "epoch": 4.109044729417463,
      "grad_norm": 0.4838915765285492,
      "learning_rate": 1.79365194414976e-06,
      "loss": 0.0094,
      "step": 2510840
    },
    {
      "epoch": 4.109077459856117,
      "grad_norm": 0.08758093416690826,
      "learning_rate": 1.7935860519362426e-06,
      "loss": 0.0113,
      "step": 2510860
    },
    {
      "epoch": 4.10911019029477,
      "grad_norm": 0.27976182103157043,
      "learning_rate": 1.7935201597227256e-06,
      "loss": 0.0127,
      "step": 2510880
    },
    {
      "epoch": 4.109142920733424,
      "grad_norm": 0.4412095844745636,
      "learning_rate": 1.7934542675092088e-06,
      "loss": 0.0083,
      "step": 2510900
    },
    {
      "epoch": 4.109175651172077,
      "grad_norm": 0.2169947773218155,
      "learning_rate": 1.7933883752956915e-06,
      "loss": 0.0081,
      "step": 2510920
    },
    {
      "epoch": 4.10920838161073,
      "grad_norm": 0.2302704155445099,
      "learning_rate": 1.7933224830821745e-06,
      "loss": 0.0099,
      "step": 2510940
    },
    {
      "epoch": 4.109241112049384,
      "grad_norm": 0.23396840691566467,
      "learning_rate": 1.7932565908686572e-06,
      "loss": 0.0086,
      "step": 2510960
    },
    {
      "epoch": 4.109273842488037,
      "grad_norm": 0.1267388015985489,
      "learning_rate": 1.7931906986551401e-06,
      "loss": 0.0085,
      "step": 2510980
    },
    {
      "epoch": 4.10930657292669,
      "grad_norm": 0.5762522220611572,
      "learning_rate": 1.7931248064416229e-06,
      "loss": 0.0095,
      "step": 2511000
    },
    {
      "epoch": 4.109339303365344,
      "grad_norm": 0.0738813504576683,
      "learning_rate": 1.7930589142281058e-06,
      "loss": 0.0095,
      "step": 2511020
    },
    {
      "epoch": 4.109372033803997,
      "grad_norm": 0.14147202670574188,
      "learning_rate": 1.7929930220145886e-06,
      "loss": 0.0113,
      "step": 2511040
    },
    {
      "epoch": 4.10940476424265,
      "grad_norm": 0.39266952872276306,
      "learning_rate": 1.7929271298010713e-06,
      "loss": 0.0071,
      "step": 2511060
    },
    {
      "epoch": 4.109437494681304,
      "grad_norm": 0.6783484816551208,
      "learning_rate": 1.7928612375875545e-06,
      "loss": 0.0162,
      "step": 2511080
    },
    {
      "epoch": 4.109470225119957,
      "grad_norm": 0.17989037930965424,
      "learning_rate": 1.7927953453740374e-06,
      "loss": 0.0087,
      "step": 2511100
    },
    {
      "epoch": 4.10950295555861,
      "grad_norm": 0.30118390917778015,
      "learning_rate": 1.7927294531605202e-06,
      "loss": 0.0071,
      "step": 2511120
    },
    {
      "epoch": 4.109535685997264,
      "grad_norm": 0.1647266298532486,
      "learning_rate": 1.7926635609470031e-06,
      "loss": 0.0114,
      "step": 2511140
    },
    {
      "epoch": 4.109568416435917,
      "grad_norm": 0.10860169678926468,
      "learning_rate": 1.7925976687334859e-06,
      "loss": 0.0073,
      "step": 2511160
    },
    {
      "epoch": 4.109601146874571,
      "grad_norm": 0.08933956921100616,
      "learning_rate": 1.7925317765199688e-06,
      "loss": 0.0103,
      "step": 2511180
    },
    {
      "epoch": 4.1096338773132235,
      "grad_norm": 0.2110390067100525,
      "learning_rate": 1.7924658843064516e-06,
      "loss": 0.01,
      "step": 2511200
    },
    {
      "epoch": 4.109666607751877,
      "grad_norm": 0.2022973746061325,
      "learning_rate": 1.7923999920929345e-06,
      "loss": 0.0084,
      "step": 2511220
    },
    {
      "epoch": 4.109699338190531,
      "grad_norm": 0.26956629753112793,
      "learning_rate": 1.7923340998794175e-06,
      "loss": 0.0075,
      "step": 2511240
    },
    {
      "epoch": 4.109732068629183,
      "grad_norm": 0.37245792150497437,
      "learning_rate": 1.7922682076659004e-06,
      "loss": 0.009,
      "step": 2511260
    },
    {
      "epoch": 4.109764799067837,
      "grad_norm": 0.09628139436244965,
      "learning_rate": 1.7922023154523831e-06,
      "loss": 0.0053,
      "step": 2511280
    },
    {
      "epoch": 4.109797529506491,
      "grad_norm": 0.27333691716194153,
      "learning_rate": 1.792136423238866e-06,
      "loss": 0.0079,
      "step": 2511300
    },
    {
      "epoch": 4.109830259945144,
      "grad_norm": 0.19053326547145844,
      "learning_rate": 1.7920705310253488e-06,
      "loss": 0.0109,
      "step": 2511320
    },
    {
      "epoch": 4.109862990383797,
      "grad_norm": 0.17134813964366913,
      "learning_rate": 1.7920046388118318e-06,
      "loss": 0.0073,
      "step": 2511340
    },
    {
      "epoch": 4.1098957208224505,
      "grad_norm": 0.2263685166835785,
      "learning_rate": 1.7919387465983145e-06,
      "loss": 0.0061,
      "step": 2511360
    },
    {
      "epoch": 4.109928451261104,
      "grad_norm": 0.1428775042295456,
      "learning_rate": 1.7918728543847975e-06,
      "loss": 0.0142,
      "step": 2511380
    },
    {
      "epoch": 4.109961181699757,
      "grad_norm": 0.10971161723136902,
      "learning_rate": 1.7918069621712802e-06,
      "loss": 0.0064,
      "step": 2511400
    },
    {
      "epoch": 4.10999391213841,
      "grad_norm": 0.24678455293178558,
      "learning_rate": 1.7917410699577634e-06,
      "loss": 0.0101,
      "step": 2511420
    },
    {
      "epoch": 4.110026642577064,
      "grad_norm": 0.12087111920118332,
      "learning_rate": 1.7916751777442461e-06,
      "loss": 0.0091,
      "step": 2511440
    },
    {
      "epoch": 4.1100593730157176,
      "grad_norm": 0.27870893478393555,
      "learning_rate": 1.791609285530729e-06,
      "loss": 0.0079,
      "step": 2511460
    },
    {
      "epoch": 4.11009210345437,
      "grad_norm": 0.22265146672725677,
      "learning_rate": 1.7915433933172118e-06,
      "loss": 0.0071,
      "step": 2511480
    },
    {
      "epoch": 4.110124833893024,
      "grad_norm": 0.37047678232192993,
      "learning_rate": 1.7914775011036948e-06,
      "loss": 0.0075,
      "step": 2511500
    },
    {
      "epoch": 4.1101575643316774,
      "grad_norm": 0.10842972248792648,
      "learning_rate": 1.7914116088901775e-06,
      "loss": 0.013,
      "step": 2511520
    },
    {
      "epoch": 4.11019029477033,
      "grad_norm": 0.4209376871585846,
      "learning_rate": 1.7913457166766605e-06,
      "loss": 0.009,
      "step": 2511540
    },
    {
      "epoch": 4.110223025208984,
      "grad_norm": 0.14143304526805878,
      "learning_rate": 1.7912798244631432e-06,
      "loss": 0.0053,
      "step": 2511560
    },
    {
      "epoch": 4.110255755647637,
      "grad_norm": 0.20717217028141022,
      "learning_rate": 1.7912139322496261e-06,
      "loss": 0.0068,
      "step": 2511580
    },
    {
      "epoch": 4.110288486086291,
      "grad_norm": 0.48552483320236206,
      "learning_rate": 1.791148040036109e-06,
      "loss": 0.0119,
      "step": 2511600
    },
    {
      "epoch": 4.110321216524944,
      "grad_norm": 0.27878591418266296,
      "learning_rate": 1.791082147822592e-06,
      "loss": 0.0071,
      "step": 2511620
    },
    {
      "epoch": 4.110353946963597,
      "grad_norm": 0.29021307826042175,
      "learning_rate": 1.7910162556090748e-06,
      "loss": 0.0087,
      "step": 2511640
    },
    {
      "epoch": 4.110386677402251,
      "grad_norm": 0.17265857756137848,
      "learning_rate": 1.7909503633955577e-06,
      "loss": 0.0102,
      "step": 2511660
    },
    {
      "epoch": 4.1104194078409035,
      "grad_norm": 0.09798257052898407,
      "learning_rate": 1.7908844711820405e-06,
      "loss": 0.0089,
      "step": 2511680
    },
    {
      "epoch": 4.110452138279557,
      "grad_norm": 0.14659616351127625,
      "learning_rate": 1.7908185789685234e-06,
      "loss": 0.0103,
      "step": 2511700
    },
    {
      "epoch": 4.110484868718211,
      "grad_norm": 0.2599177062511444,
      "learning_rate": 1.7907526867550062e-06,
      "loss": 0.009,
      "step": 2511720
    },
    {
      "epoch": 4.110517599156864,
      "grad_norm": 0.09207101166248322,
      "learning_rate": 1.7906867945414891e-06,
      "loss": 0.0067,
      "step": 2511740
    },
    {
      "epoch": 4.110550329595517,
      "grad_norm": 0.2881009578704834,
      "learning_rate": 1.7906209023279719e-06,
      "loss": 0.0088,
      "step": 2511760
    },
    {
      "epoch": 4.110583060034171,
      "grad_norm": 0.2752365469932556,
      "learning_rate": 1.790555010114455e-06,
      "loss": 0.0092,
      "step": 2511780
    },
    {
      "epoch": 4.110615790472824,
      "grad_norm": 0.4521416425704956,
      "learning_rate": 1.790489117900938e-06,
      "loss": 0.0115,
      "step": 2511800
    },
    {
      "epoch": 4.110648520911477,
      "grad_norm": 0.1878470480442047,
      "learning_rate": 1.7904232256874207e-06,
      "loss": 0.0112,
      "step": 2511820
    },
    {
      "epoch": 4.1106812513501305,
      "grad_norm": 0.1731671392917633,
      "learning_rate": 1.7903573334739035e-06,
      "loss": 0.0082,
      "step": 2511840
    },
    {
      "epoch": 4.110713981788784,
      "grad_norm": 0.20958437025547028,
      "learning_rate": 1.7902914412603864e-06,
      "loss": 0.0104,
      "step": 2511860
    },
    {
      "epoch": 4.110746712227438,
      "grad_norm": 0.13096733391284943,
      "learning_rate": 1.7902255490468691e-06,
      "loss": 0.0076,
      "step": 2511880
    },
    {
      "epoch": 4.11077944266609,
      "grad_norm": 1.0102770328521729,
      "learning_rate": 1.790159656833352e-06,
      "loss": 0.0081,
      "step": 2511900
    },
    {
      "epoch": 4.110812173104744,
      "grad_norm": 0.23510727286338806,
      "learning_rate": 1.7900937646198348e-06,
      "loss": 0.0122,
      "step": 2511920
    },
    {
      "epoch": 4.110844903543398,
      "grad_norm": 0.13196758925914764,
      "learning_rate": 1.7900278724063178e-06,
      "loss": 0.0062,
      "step": 2511940
    },
    {
      "epoch": 4.11087763398205,
      "grad_norm": 0.12873627245426178,
      "learning_rate": 1.789961980192801e-06,
      "loss": 0.0081,
      "step": 2511960
    },
    {
      "epoch": 4.110910364420704,
      "grad_norm": 0.15466292202472687,
      "learning_rate": 1.7898960879792837e-06,
      "loss": 0.0199,
      "step": 2511980
    },
    {
      "epoch": 4.1109430948593575,
      "grad_norm": 0.3838813304901123,
      "learning_rate": 1.7898301957657666e-06,
      "loss": 0.0081,
      "step": 2512000
    },
    {
      "epoch": 4.110975825298011,
      "grad_norm": 0.11231148988008499,
      "learning_rate": 1.7897643035522494e-06,
      "loss": 0.0114,
      "step": 2512020
    },
    {
      "epoch": 4.111008555736664,
      "grad_norm": 0.2017967402935028,
      "learning_rate": 1.7896984113387323e-06,
      "loss": 0.0087,
      "step": 2512040
    },
    {
      "epoch": 4.111041286175317,
      "grad_norm": 0.21064941585063934,
      "learning_rate": 1.789632519125215e-06,
      "loss": 0.0132,
      "step": 2512060
    },
    {
      "epoch": 4.111074016613971,
      "grad_norm": 0.10776418447494507,
      "learning_rate": 1.7895666269116978e-06,
      "loss": 0.0056,
      "step": 2512080
    },
    {
      "epoch": 4.111106747052624,
      "grad_norm": 0.27454185485839844,
      "learning_rate": 1.7895007346981808e-06,
      "loss": 0.0111,
      "step": 2512100
    },
    {
      "epoch": 4.111139477491277,
      "grad_norm": 0.16685615479946136,
      "learning_rate": 1.789434842484664e-06,
      "loss": 0.0109,
      "step": 2512120
    },
    {
      "epoch": 4.111172207929931,
      "grad_norm": 0.2738211154937744,
      "learning_rate": 1.7893689502711467e-06,
      "loss": 0.011,
      "step": 2512140
    },
    {
      "epoch": 4.111204938368584,
      "grad_norm": 0.2247401475906372,
      "learning_rate": 1.7893030580576296e-06,
      "loss": 0.0111,
      "step": 2512160
    },
    {
      "epoch": 4.111237668807237,
      "grad_norm": 0.14810100197792053,
      "learning_rate": 1.7892371658441124e-06,
      "loss": 0.0093,
      "step": 2512180
    },
    {
      "epoch": 4.111270399245891,
      "grad_norm": 0.19516700506210327,
      "learning_rate": 1.7891712736305953e-06,
      "loss": 0.0073,
      "step": 2512200
    },
    {
      "epoch": 4.111303129684544,
      "grad_norm": 0.06641359627246857,
      "learning_rate": 1.789105381417078e-06,
      "loss": 0.0082,
      "step": 2512220
    },
    {
      "epoch": 4.111335860123197,
      "grad_norm": 0.22371387481689453,
      "learning_rate": 1.789039489203561e-06,
      "loss": 0.0107,
      "step": 2512240
    },
    {
      "epoch": 4.111368590561851,
      "grad_norm": 0.1055692508816719,
      "learning_rate": 1.7889735969900437e-06,
      "loss": 0.009,
      "step": 2512260
    },
    {
      "epoch": 4.111401321000504,
      "grad_norm": 0.3175196945667267,
      "learning_rate": 1.7889077047765265e-06,
      "loss": 0.0067,
      "step": 2512280
    },
    {
      "epoch": 4.111434051439157,
      "grad_norm": 0.11947676539421082,
      "learning_rate": 1.7888418125630096e-06,
      "loss": 0.0081,
      "step": 2512300
    },
    {
      "epoch": 4.111466781877811,
      "grad_norm": 0.844459593296051,
      "learning_rate": 1.7887759203494926e-06,
      "loss": 0.0095,
      "step": 2512320
    },
    {
      "epoch": 4.111499512316464,
      "grad_norm": 0.276101291179657,
      "learning_rate": 1.7887100281359753e-06,
      "loss": 0.0111,
      "step": 2512340
    },
    {
      "epoch": 4.111532242755118,
      "grad_norm": 0.18882930278778076,
      "learning_rate": 1.7886441359224583e-06,
      "loss": 0.0097,
      "step": 2512360
    },
    {
      "epoch": 4.1115649731937705,
      "grad_norm": 0.1673252284526825,
      "learning_rate": 1.788578243708941e-06,
      "loss": 0.0063,
      "step": 2512380
    },
    {
      "epoch": 4.111597703632424,
      "grad_norm": 0.19514694809913635,
      "learning_rate": 1.788512351495424e-06,
      "loss": 0.0064,
      "step": 2512400
    },
    {
      "epoch": 4.111630434071078,
      "grad_norm": 0.3434920310974121,
      "learning_rate": 1.7884464592819067e-06,
      "loss": 0.0086,
      "step": 2512420
    },
    {
      "epoch": 4.11166316450973,
      "grad_norm": 0.2912479639053345,
      "learning_rate": 1.7883805670683897e-06,
      "loss": 0.0067,
      "step": 2512440
    },
    {
      "epoch": 4.111695894948384,
      "grad_norm": 0.1754811406135559,
      "learning_rate": 1.7883146748548724e-06,
      "loss": 0.0091,
      "step": 2512460
    },
    {
      "epoch": 4.1117286253870375,
      "grad_norm": 0.08687590062618256,
      "learning_rate": 1.7882487826413556e-06,
      "loss": 0.008,
      "step": 2512480
    },
    {
      "epoch": 4.111761355825691,
      "grad_norm": 0.6878403425216675,
      "learning_rate": 1.7881828904278383e-06,
      "loss": 0.0097,
      "step": 2512500
    },
    {
      "epoch": 4.111794086264344,
      "grad_norm": 0.2198522835969925,
      "learning_rate": 1.7881169982143213e-06,
      "loss": 0.0057,
      "step": 2512520
    },
    {
      "epoch": 4.111826816702997,
      "grad_norm": 0.10240685194730759,
      "learning_rate": 1.788051106000804e-06,
      "loss": 0.0123,
      "step": 2512540
    },
    {
      "epoch": 4.111859547141651,
      "grad_norm": 0.7248988151550293,
      "learning_rate": 1.787985213787287e-06,
      "loss": 0.0088,
      "step": 2512560
    },
    {
      "epoch": 4.111892277580304,
      "grad_norm": 0.232192724943161,
      "learning_rate": 1.7879193215737697e-06,
      "loss": 0.0089,
      "step": 2512580
    },
    {
      "epoch": 4.111925008018957,
      "grad_norm": 0.4047695994377136,
      "learning_rate": 1.7878534293602527e-06,
      "loss": 0.0119,
      "step": 2512600
    },
    {
      "epoch": 4.111957738457611,
      "grad_norm": 0.1383488029241562,
      "learning_rate": 1.7877875371467354e-06,
      "loss": 0.0079,
      "step": 2512620
    },
    {
      "epoch": 4.1119904688962645,
      "grad_norm": 0.19208984076976776,
      "learning_rate": 1.7877216449332183e-06,
      "loss": 0.0069,
      "step": 2512640
    },
    {
      "epoch": 4.112023199334917,
      "grad_norm": 0.11934389919042587,
      "learning_rate": 1.7876557527197013e-06,
      "loss": 0.0094,
      "step": 2512660
    },
    {
      "epoch": 4.112055929773571,
      "grad_norm": 0.26836472749710083,
      "learning_rate": 1.7875898605061842e-06,
      "loss": 0.0069,
      "step": 2512680
    },
    {
      "epoch": 4.112088660212224,
      "grad_norm": 0.26043182611465454,
      "learning_rate": 1.787523968292667e-06,
      "loss": 0.0104,
      "step": 2512700
    },
    {
      "epoch": 4.112121390650877,
      "grad_norm": 0.04971108213067055,
      "learning_rate": 1.78745807607915e-06,
      "loss": 0.0099,
      "step": 2512720
    },
    {
      "epoch": 4.112154121089531,
      "grad_norm": 0.0925680473446846,
      "learning_rate": 1.7873921838656327e-06,
      "loss": 0.0097,
      "step": 2512740
    },
    {
      "epoch": 4.112186851528184,
      "grad_norm": 0.7326258420944214,
      "learning_rate": 1.7873262916521156e-06,
      "loss": 0.0097,
      "step": 2512760
    },
    {
      "epoch": 4.112219581966838,
      "grad_norm": 0.18027280271053314,
      "learning_rate": 1.7872603994385984e-06,
      "loss": 0.0069,
      "step": 2512780
    },
    {
      "epoch": 4.112252312405491,
      "grad_norm": 0.2051127552986145,
      "learning_rate": 1.7871945072250813e-06,
      "loss": 0.0089,
      "step": 2512800
    },
    {
      "epoch": 4.112285042844144,
      "grad_norm": 0.2309454083442688,
      "learning_rate": 1.787128615011564e-06,
      "loss": 0.0095,
      "step": 2512820
    },
    {
      "epoch": 4.112317773282798,
      "grad_norm": 0.3344309628009796,
      "learning_rate": 1.7870627227980472e-06,
      "loss": 0.0075,
      "step": 2512840
    },
    {
      "epoch": 4.1123505037214505,
      "grad_norm": 0.446371853351593,
      "learning_rate": 1.78699683058453e-06,
      "loss": 0.0097,
      "step": 2512860
    },
    {
      "epoch": 4.112383234160104,
      "grad_norm": 0.049313172698020935,
      "learning_rate": 1.786930938371013e-06,
      "loss": 0.013,
      "step": 2512880
    },
    {
      "epoch": 4.112415964598758,
      "grad_norm": 0.10615862905979156,
      "learning_rate": 1.7868650461574957e-06,
      "loss": 0.0079,
      "step": 2512900
    },
    {
      "epoch": 4.112448695037411,
      "grad_norm": 0.4249117076396942,
      "learning_rate": 1.7867991539439786e-06,
      "loss": 0.0072,
      "step": 2512920
    },
    {
      "epoch": 4.112481425476064,
      "grad_norm": 0.07239614427089691,
      "learning_rate": 1.7867332617304613e-06,
      "loss": 0.0107,
      "step": 2512940
    },
    {
      "epoch": 4.112514155914718,
      "grad_norm": 0.15929889678955078,
      "learning_rate": 1.7866673695169443e-06,
      "loss": 0.007,
      "step": 2512960
    },
    {
      "epoch": 4.112546886353371,
      "grad_norm": 0.04973367974162102,
      "learning_rate": 1.786601477303427e-06,
      "loss": 0.0086,
      "step": 2512980
    },
    {
      "epoch": 4.112579616792024,
      "grad_norm": 0.1944834142923355,
      "learning_rate": 1.7865355850899102e-06,
      "loss": 0.01,
      "step": 2513000
    },
    {
      "epoch": 4.1126123472306775,
      "grad_norm": 0.13730935752391815,
      "learning_rate": 1.7864696928763932e-06,
      "loss": 0.0067,
      "step": 2513020
    },
    {
      "epoch": 4.112645077669331,
      "grad_norm": 0.6336929202079773,
      "learning_rate": 1.7864038006628759e-06,
      "loss": 0.0093,
      "step": 2513040
    },
    {
      "epoch": 4.112677808107985,
      "grad_norm": 0.10514029115438461,
      "learning_rate": 1.7863379084493586e-06,
      "loss": 0.0087,
      "step": 2513060
    },
    {
      "epoch": 4.112710538546637,
      "grad_norm": 0.20772850513458252,
      "learning_rate": 1.7862720162358416e-06,
      "loss": 0.0117,
      "step": 2513080
    },
    {
      "epoch": 4.112743268985291,
      "grad_norm": 0.26872366666793823,
      "learning_rate": 1.7862061240223243e-06,
      "loss": 0.0071,
      "step": 2513100
    },
    {
      "epoch": 4.112775999423945,
      "grad_norm": 0.169568732380867,
      "learning_rate": 1.7861402318088073e-06,
      "loss": 0.0119,
      "step": 2513120
    },
    {
      "epoch": 4.112808729862597,
      "grad_norm": 0.12861280143260956,
      "learning_rate": 1.78607433959529e-06,
      "loss": 0.0106,
      "step": 2513140
    },
    {
      "epoch": 4.112841460301251,
      "grad_norm": 0.1602691113948822,
      "learning_rate": 1.786008447381773e-06,
      "loss": 0.0093,
      "step": 2513160
    },
    {
      "epoch": 4.1128741907399045,
      "grad_norm": 0.1921968162059784,
      "learning_rate": 1.7859425551682561e-06,
      "loss": 0.0077,
      "step": 2513180
    },
    {
      "epoch": 4.112906921178558,
      "grad_norm": 0.08232594281435013,
      "learning_rate": 1.7858766629547389e-06,
      "loss": 0.0064,
      "step": 2513200
    },
    {
      "epoch": 4.112939651617211,
      "grad_norm": 0.26812854409217834,
      "learning_rate": 1.7858107707412218e-06,
      "loss": 0.0082,
      "step": 2513220
    },
    {
      "epoch": 4.112972382055864,
      "grad_norm": 0.7477742433547974,
      "learning_rate": 1.7857448785277046e-06,
      "loss": 0.0095,
      "step": 2513240
    },
    {
      "epoch": 4.113005112494518,
      "grad_norm": 0.07734141498804092,
      "learning_rate": 1.7856789863141873e-06,
      "loss": 0.0093,
      "step": 2513260
    },
    {
      "epoch": 4.113037842933171,
      "grad_norm": 0.2254873365163803,
      "learning_rate": 1.7856130941006702e-06,
      "loss": 0.0069,
      "step": 2513280
    },
    {
      "epoch": 4.113070573371824,
      "grad_norm": 0.6240102052688599,
      "learning_rate": 1.785547201887153e-06,
      "loss": 0.0121,
      "step": 2513300
    },
    {
      "epoch": 4.113103303810478,
      "grad_norm": 0.7321357131004333,
      "learning_rate": 1.785481309673636e-06,
      "loss": 0.0065,
      "step": 2513320
    },
    {
      "epoch": 4.113136034249131,
      "grad_norm": 0.22223791480064392,
      "learning_rate": 1.7854154174601187e-06,
      "loss": 0.0089,
      "step": 2513340
    },
    {
      "epoch": 4.113168764687784,
      "grad_norm": 0.6171671152114868,
      "learning_rate": 1.7853495252466018e-06,
      "loss": 0.0084,
      "step": 2513360
    },
    {
      "epoch": 4.113201495126438,
      "grad_norm": 2.0370230674743652,
      "learning_rate": 1.7852836330330848e-06,
      "loss": 0.0073,
      "step": 2513380
    },
    {
      "epoch": 4.113234225565091,
      "grad_norm": 0.25152912735939026,
      "learning_rate": 1.7852177408195675e-06,
      "loss": 0.0087,
      "step": 2513400
    },
    {
      "epoch": 4.113266956003744,
      "grad_norm": 0.23389458656311035,
      "learning_rate": 1.7851518486060505e-06,
      "loss": 0.0083,
      "step": 2513420
    },
    {
      "epoch": 4.113299686442398,
      "grad_norm": 0.4179733097553253,
      "learning_rate": 1.7850859563925332e-06,
      "loss": 0.0138,
      "step": 2513440
    },
    {
      "epoch": 4.113332416881051,
      "grad_norm": 0.23355543613433838,
      "learning_rate": 1.7850200641790162e-06,
      "loss": 0.0091,
      "step": 2513460
    },
    {
      "epoch": 4.113365147319705,
      "grad_norm": 0.25957009196281433,
      "learning_rate": 1.784954171965499e-06,
      "loss": 0.0044,
      "step": 2513480
    },
    {
      "epoch": 4.1133978777583575,
      "grad_norm": 0.3741177022457123,
      "learning_rate": 1.7848882797519817e-06,
      "loss": 0.0077,
      "step": 2513500
    },
    {
      "epoch": 4.113430608197011,
      "grad_norm": 0.2310192584991455,
      "learning_rate": 1.7848223875384646e-06,
      "loss": 0.0111,
      "step": 2513520
    },
    {
      "epoch": 4.113463338635665,
      "grad_norm": 0.2421884685754776,
      "learning_rate": 1.7847564953249478e-06,
      "loss": 0.0078,
      "step": 2513540
    },
    {
      "epoch": 4.113496069074317,
      "grad_norm": 0.09087784588336945,
      "learning_rate": 1.7846906031114305e-06,
      "loss": 0.01,
      "step": 2513560
    },
    {
      "epoch": 4.113528799512971,
      "grad_norm": 0.35686200857162476,
      "learning_rate": 1.7846247108979135e-06,
      "loss": 0.0076,
      "step": 2513580
    },
    {
      "epoch": 4.113561529951625,
      "grad_norm": 0.1451563686132431,
      "learning_rate": 1.7845588186843962e-06,
      "loss": 0.008,
      "step": 2513600
    },
    {
      "epoch": 4.113594260390277,
      "grad_norm": 0.2670195996761322,
      "learning_rate": 1.7844929264708792e-06,
      "loss": 0.0075,
      "step": 2513620
    },
    {
      "epoch": 4.113626990828931,
      "grad_norm": 0.17585758864879608,
      "learning_rate": 1.7844270342573619e-06,
      "loss": 0.0081,
      "step": 2513640
    },
    {
      "epoch": 4.1136597212675845,
      "grad_norm": 0.3411364257335663,
      "learning_rate": 1.7843611420438448e-06,
      "loss": 0.0103,
      "step": 2513660
    },
    {
      "epoch": 4.113692451706238,
      "grad_norm": 0.12246473133563995,
      "learning_rate": 1.7842952498303276e-06,
      "loss": 0.0128,
      "step": 2513680
    },
    {
      "epoch": 4.113725182144891,
      "grad_norm": 0.16311068832874298,
      "learning_rate": 1.7842293576168105e-06,
      "loss": 0.008,
      "step": 2513700
    },
    {
      "epoch": 4.113757912583544,
      "grad_norm": 0.05461672693490982,
      "learning_rate": 1.7841634654032935e-06,
      "loss": 0.0082,
      "step": 2513720
    },
    {
      "epoch": 4.113790643022198,
      "grad_norm": 0.22013850510120392,
      "learning_rate": 1.7840975731897764e-06,
      "loss": 0.0072,
      "step": 2513740
    },
    {
      "epoch": 4.113823373460851,
      "grad_norm": 0.4240999221801758,
      "learning_rate": 1.7840316809762592e-06,
      "loss": 0.0147,
      "step": 2513760
    },
    {
      "epoch": 4.113856103899504,
      "grad_norm": 0.13536253571510315,
      "learning_rate": 1.7839657887627421e-06,
      "loss": 0.0085,
      "step": 2513780
    },
    {
      "epoch": 4.113888834338158,
      "grad_norm": 0.286239892244339,
      "learning_rate": 1.7838998965492249e-06,
      "loss": 0.0073,
      "step": 2513800
    },
    {
      "epoch": 4.1139215647768115,
      "grad_norm": 0.34440523386001587,
      "learning_rate": 1.7838340043357078e-06,
      "loss": 0.0089,
      "step": 2513820
    },
    {
      "epoch": 4.113954295215464,
      "grad_norm": 0.3264099359512329,
      "learning_rate": 1.7837681121221906e-06,
      "loss": 0.0134,
      "step": 2513840
    },
    {
      "epoch": 4.113987025654118,
      "grad_norm": 0.12466465681791306,
      "learning_rate": 1.7837022199086735e-06,
      "loss": 0.0082,
      "step": 2513860
    },
    {
      "epoch": 4.114019756092771,
      "grad_norm": 0.30504217743873596,
      "learning_rate": 1.7836363276951565e-06,
      "loss": 0.0076,
      "step": 2513880
    },
    {
      "epoch": 4.114052486531424,
      "grad_norm": 0.2709514796733856,
      "learning_rate": 1.7835704354816394e-06,
      "loss": 0.0109,
      "step": 2513900
    },
    {
      "epoch": 4.114085216970078,
      "grad_norm": 0.05871531739830971,
      "learning_rate": 1.7835045432681222e-06,
      "loss": 0.0069,
      "step": 2513920
    },
    {
      "epoch": 4.114117947408731,
      "grad_norm": 0.11168713122606277,
      "learning_rate": 1.7834386510546051e-06,
      "loss": 0.0078,
      "step": 2513940
    },
    {
      "epoch": 4.114150677847385,
      "grad_norm": 0.8280779123306274,
      "learning_rate": 1.7833727588410878e-06,
      "loss": 0.008,
      "step": 2513960
    },
    {
      "epoch": 4.114183408286038,
      "grad_norm": 0.1825437694787979,
      "learning_rate": 1.7833068666275708e-06,
      "loss": 0.007,
      "step": 2513980
    },
    {
      "epoch": 4.114216138724691,
      "grad_norm": 0.07872647792100906,
      "learning_rate": 1.7832409744140535e-06,
      "loss": 0.0065,
      "step": 2514000
    },
    {
      "epoch": 4.114248869163345,
      "grad_norm": 0.06454050540924072,
      "learning_rate": 1.7831750822005365e-06,
      "loss": 0.0097,
      "step": 2514020
    },
    {
      "epoch": 4.1142815996019975,
      "grad_norm": 0.12076391279697418,
      "learning_rate": 1.7831091899870192e-06,
      "loss": 0.0105,
      "step": 2514040
    },
    {
      "epoch": 4.114314330040651,
      "grad_norm": 0.24788892269134521,
      "learning_rate": 1.7830432977735024e-06,
      "loss": 0.0083,
      "step": 2514060
    },
    {
      "epoch": 4.114347060479305,
      "grad_norm": 0.25011423230171204,
      "learning_rate": 1.7829774055599851e-06,
      "loss": 0.0078,
      "step": 2514080
    },
    {
      "epoch": 4.114379790917958,
      "grad_norm": 0.3664475083351135,
      "learning_rate": 1.782911513346468e-06,
      "loss": 0.0089,
      "step": 2514100
    },
    {
      "epoch": 4.114412521356611,
      "grad_norm": 0.10955142974853516,
      "learning_rate": 1.7828456211329508e-06,
      "loss": 0.0113,
      "step": 2514120
    },
    {
      "epoch": 4.1144452517952645,
      "grad_norm": 0.3972066640853882,
      "learning_rate": 1.7827797289194338e-06,
      "loss": 0.0098,
      "step": 2514140
    },
    {
      "epoch": 4.114477982233918,
      "grad_norm": 0.5426285862922668,
      "learning_rate": 1.7827138367059165e-06,
      "loss": 0.0125,
      "step": 2514160
    },
    {
      "epoch": 4.114510712672571,
      "grad_norm": 0.2474392205476761,
      "learning_rate": 1.7826479444923995e-06,
      "loss": 0.011,
      "step": 2514180
    },
    {
      "epoch": 4.114543443111224,
      "grad_norm": 0.20876473188400269,
      "learning_rate": 1.7825820522788822e-06,
      "loss": 0.0163,
      "step": 2514200
    },
    {
      "epoch": 4.114576173549878,
      "grad_norm": 0.44728735089302063,
      "learning_rate": 1.7825161600653652e-06,
      "loss": 0.0106,
      "step": 2514220
    },
    {
      "epoch": 4.114608903988532,
      "grad_norm": 0.15915118157863617,
      "learning_rate": 1.7824502678518481e-06,
      "loss": 0.0087,
      "step": 2514240
    },
    {
      "epoch": 4.114641634427184,
      "grad_norm": 0.3187949061393738,
      "learning_rate": 1.782384375638331e-06,
      "loss": 0.0117,
      "step": 2514260
    },
    {
      "epoch": 4.114674364865838,
      "grad_norm": 0.30031219124794006,
      "learning_rate": 1.7823184834248138e-06,
      "loss": 0.0109,
      "step": 2514280
    },
    {
      "epoch": 4.1147070953044915,
      "grad_norm": 0.12377031147480011,
      "learning_rate": 1.7822525912112968e-06,
      "loss": 0.0068,
      "step": 2514300
    },
    {
      "epoch": 4.114739825743144,
      "grad_norm": 0.28499990701675415,
      "learning_rate": 1.7821866989977795e-06,
      "loss": 0.0101,
      "step": 2514320
    },
    {
      "epoch": 4.114772556181798,
      "grad_norm": 0.13250979781150818,
      "learning_rate": 1.7821208067842624e-06,
      "loss": 0.0113,
      "step": 2514340
    },
    {
      "epoch": 4.114805286620451,
      "grad_norm": 0.13131290674209595,
      "learning_rate": 1.7820549145707452e-06,
      "loss": 0.0064,
      "step": 2514360
    },
    {
      "epoch": 4.114838017059105,
      "grad_norm": 0.3994971811771393,
      "learning_rate": 1.7819890223572281e-06,
      "loss": 0.0097,
      "step": 2514380
    },
    {
      "epoch": 4.114870747497758,
      "grad_norm": 0.07803458720445633,
      "learning_rate": 1.7819231301437109e-06,
      "loss": 0.0108,
      "step": 2514400
    },
    {
      "epoch": 4.114903477936411,
      "grad_norm": 0.2696166932582855,
      "learning_rate": 1.781857237930194e-06,
      "loss": 0.0107,
      "step": 2514420
    },
    {
      "epoch": 4.114936208375065,
      "grad_norm": 0.18986579775810242,
      "learning_rate": 1.781791345716677e-06,
      "loss": 0.0072,
      "step": 2514440
    },
    {
      "epoch": 4.114968938813718,
      "grad_norm": 0.23173032701015472,
      "learning_rate": 1.7817254535031597e-06,
      "loss": 0.008,
      "step": 2514460
    },
    {
      "epoch": 4.115001669252371,
      "grad_norm": 0.3667590022087097,
      "learning_rate": 1.7816595612896425e-06,
      "loss": 0.0089,
      "step": 2514480
    },
    {
      "epoch": 4.115034399691025,
      "grad_norm": 0.09593626111745834,
      "learning_rate": 1.7815936690761254e-06,
      "loss": 0.0092,
      "step": 2514500
    },
    {
      "epoch": 4.115067130129678,
      "grad_norm": 0.13101693987846375,
      "learning_rate": 1.7815277768626082e-06,
      "loss": 0.0073,
      "step": 2514520
    },
    {
      "epoch": 4.115099860568331,
      "grad_norm": 0.1876891553401947,
      "learning_rate": 1.7814618846490911e-06,
      "loss": 0.0109,
      "step": 2514540
    },
    {
      "epoch": 4.115132591006985,
      "grad_norm": 0.053578976541757584,
      "learning_rate": 1.7813959924355739e-06,
      "loss": 0.0079,
      "step": 2514560
    },
    {
      "epoch": 4.115165321445638,
      "grad_norm": 0.058297961950302124,
      "learning_rate": 1.7813301002220568e-06,
      "loss": 0.0106,
      "step": 2514580
    },
    {
      "epoch": 4.115198051884291,
      "grad_norm": 0.10797702521085739,
      "learning_rate": 1.78126420800854e-06,
      "loss": 0.0082,
      "step": 2514600
    },
    {
      "epoch": 4.115230782322945,
      "grad_norm": 0.22051851451396942,
      "learning_rate": 1.7811983157950227e-06,
      "loss": 0.0092,
      "step": 2514620
    },
    {
      "epoch": 4.115263512761598,
      "grad_norm": 0.04244885966181755,
      "learning_rate": 1.7811324235815057e-06,
      "loss": 0.0109,
      "step": 2514640
    },
    {
      "epoch": 4.115296243200252,
      "grad_norm": 0.36156848073005676,
      "learning_rate": 1.7810665313679884e-06,
      "loss": 0.0072,
      "step": 2514660
    },
    {
      "epoch": 4.1153289736389045,
      "grad_norm": 0.3780183792114258,
      "learning_rate": 1.7810006391544713e-06,
      "loss": 0.0134,
      "step": 2514680
    },
    {
      "epoch": 4.115361704077558,
      "grad_norm": 0.13508276641368866,
      "learning_rate": 1.780934746940954e-06,
      "loss": 0.0072,
      "step": 2514700
    },
    {
      "epoch": 4.115394434516212,
      "grad_norm": 0.2338152378797531,
      "learning_rate": 1.7808688547274368e-06,
      "loss": 0.0081,
      "step": 2514720
    },
    {
      "epoch": 4.115427164954864,
      "grad_norm": 0.17025969922542572,
      "learning_rate": 1.7808029625139198e-06,
      "loss": 0.0156,
      "step": 2514740
    },
    {
      "epoch": 4.115459895393518,
      "grad_norm": 0.06110784411430359,
      "learning_rate": 1.780737070300403e-06,
      "loss": 0.0166,
      "step": 2514760
    },
    {
      "epoch": 4.115492625832172,
      "grad_norm": 0.5962832570075989,
      "learning_rate": 1.7806711780868857e-06,
      "loss": 0.0121,
      "step": 2514780
    },
    {
      "epoch": 4.115525356270825,
      "grad_norm": 0.14558075368404388,
      "learning_rate": 1.7806052858733686e-06,
      "loss": 0.0098,
      "step": 2514800
    },
    {
      "epoch": 4.115558086709478,
      "grad_norm": 0.11628711968660355,
      "learning_rate": 1.7805393936598514e-06,
      "loss": 0.0102,
      "step": 2514820
    },
    {
      "epoch": 4.1155908171481315,
      "grad_norm": 0.27604034543037415,
      "learning_rate": 1.7804735014463343e-06,
      "loss": 0.0121,
      "step": 2514840
    },
    {
      "epoch": 4.115623547586785,
      "grad_norm": 0.3300211429595947,
      "learning_rate": 1.780407609232817e-06,
      "loss": 0.0167,
      "step": 2514860
    },
    {
      "epoch": 4.115656278025438,
      "grad_norm": 0.2784669101238251,
      "learning_rate": 1.7803417170193e-06,
      "loss": 0.0103,
      "step": 2514880
    },
    {
      "epoch": 4.115689008464091,
      "grad_norm": 0.36235737800598145,
      "learning_rate": 1.7802758248057828e-06,
      "loss": 0.0061,
      "step": 2514900
    },
    {
      "epoch": 4.115721738902745,
      "grad_norm": 0.3745734691619873,
      "learning_rate": 1.7802099325922655e-06,
      "loss": 0.0059,
      "step": 2514920
    },
    {
      "epoch": 4.1157544693413985,
      "grad_norm": 0.24139563739299774,
      "learning_rate": 1.7801440403787487e-06,
      "loss": 0.0059,
      "step": 2514940
    },
    {
      "epoch": 4.115787199780051,
      "grad_norm": 0.14354431629180908,
      "learning_rate": 1.7800781481652316e-06,
      "loss": 0.0067,
      "step": 2514960
    },
    {
      "epoch": 4.115819930218705,
      "grad_norm": 0.25761598348617554,
      "learning_rate": 1.7800122559517144e-06,
      "loss": 0.0081,
      "step": 2514980
    },
    {
      "epoch": 4.115852660657358,
      "grad_norm": 0.2588960826396942,
      "learning_rate": 1.7799463637381973e-06,
      "loss": 0.0077,
      "step": 2515000
    },
    {
      "epoch": 4.115885391096011,
      "grad_norm": 0.1844974011182785,
      "learning_rate": 1.77988047152468e-06,
      "loss": 0.007,
      "step": 2515020
    },
    {
      "epoch": 4.115918121534665,
      "grad_norm": 0.20194649696350098,
      "learning_rate": 1.779814579311163e-06,
      "loss": 0.0089,
      "step": 2515040
    },
    {
      "epoch": 4.115950851973318,
      "grad_norm": 0.3484744131565094,
      "learning_rate": 1.7797486870976457e-06,
      "loss": 0.0094,
      "step": 2515060
    },
    {
      "epoch": 4.115983582411971,
      "grad_norm": 0.3838065266609192,
      "learning_rate": 1.7796827948841287e-06,
      "loss": 0.0099,
      "step": 2515080
    },
    {
      "epoch": 4.116016312850625,
      "grad_norm": 0.09638664126396179,
      "learning_rate": 1.7796169026706114e-06,
      "loss": 0.0085,
      "step": 2515100
    },
    {
      "epoch": 4.116049043289278,
      "grad_norm": 0.08210498094558716,
      "learning_rate": 1.7795510104570946e-06,
      "loss": 0.0089,
      "step": 2515120
    },
    {
      "epoch": 4.116081773727932,
      "grad_norm": 0.15601859986782074,
      "learning_rate": 1.7794851182435773e-06,
      "loss": 0.0066,
      "step": 2515140
    },
    {
      "epoch": 4.1161145041665845,
      "grad_norm": 0.28126466274261475,
      "learning_rate": 1.7794192260300603e-06,
      "loss": 0.0143,
      "step": 2515160
    },
    {
      "epoch": 4.116147234605238,
      "grad_norm": 0.34784483909606934,
      "learning_rate": 1.779353333816543e-06,
      "loss": 0.006,
      "step": 2515180
    },
    {
      "epoch": 4.116179965043892,
      "grad_norm": 0.24117037653923035,
      "learning_rate": 1.779287441603026e-06,
      "loss": 0.0078,
      "step": 2515200
    },
    {
      "epoch": 4.116212695482544,
      "grad_norm": 0.10980406403541565,
      "learning_rate": 1.7792215493895087e-06,
      "loss": 0.0108,
      "step": 2515220
    },
    {
      "epoch": 4.116245425921198,
      "grad_norm": 0.18105198442935944,
      "learning_rate": 1.7791556571759917e-06,
      "loss": 0.0082,
      "step": 2515240
    },
    {
      "epoch": 4.116278156359852,
      "grad_norm": 0.21923479437828064,
      "learning_rate": 1.7790897649624744e-06,
      "loss": 0.0062,
      "step": 2515260
    },
    {
      "epoch": 4.116310886798505,
      "grad_norm": 0.3088719844818115,
      "learning_rate": 1.7790238727489574e-06,
      "loss": 0.0052,
      "step": 2515280
    },
    {
      "epoch": 4.116343617237158,
      "grad_norm": 0.6593748331069946,
      "learning_rate": 1.7789579805354403e-06,
      "loss": 0.0135,
      "step": 2515300
    },
    {
      "epoch": 4.1163763476758115,
      "grad_norm": 0.1261124312877655,
      "learning_rate": 1.7788920883219233e-06,
      "loss": 0.015,
      "step": 2515320
    },
    {
      "epoch": 4.116409078114465,
      "grad_norm": 0.1066969633102417,
      "learning_rate": 1.778826196108406e-06,
      "loss": 0.0097,
      "step": 2515340
    },
    {
      "epoch": 4.116441808553118,
      "grad_norm": 0.3401421308517456,
      "learning_rate": 1.778760303894889e-06,
      "loss": 0.0069,
      "step": 2515360
    },
    {
      "epoch": 4.116474538991771,
      "grad_norm": 0.09929516166448593,
      "learning_rate": 1.7786944116813717e-06,
      "loss": 0.0077,
      "step": 2515380
    },
    {
      "epoch": 4.116507269430425,
      "grad_norm": 0.3354766070842743,
      "learning_rate": 1.7786285194678546e-06,
      "loss": 0.0096,
      "step": 2515400
    },
    {
      "epoch": 4.116539999869079,
      "grad_norm": 0.22494205832481384,
      "learning_rate": 1.7785626272543374e-06,
      "loss": 0.0071,
      "step": 2515420
    },
    {
      "epoch": 4.116572730307731,
      "grad_norm": 0.5112863183021545,
      "learning_rate": 1.7784967350408203e-06,
      "loss": 0.0113,
      "step": 2515440
    },
    {
      "epoch": 4.116605460746385,
      "grad_norm": 0.05142691731452942,
      "learning_rate": 1.778430842827303e-06,
      "loss": 0.0107,
      "step": 2515460
    },
    {
      "epoch": 4.1166381911850385,
      "grad_norm": 0.9027359485626221,
      "learning_rate": 1.7783649506137862e-06,
      "loss": 0.0095,
      "step": 2515480
    },
    {
      "epoch": 4.116670921623691,
      "grad_norm": 0.13168181478977203,
      "learning_rate": 1.778299058400269e-06,
      "loss": 0.0075,
      "step": 2515500
    },
    {
      "epoch": 4.116703652062345,
      "grad_norm": 0.10470122843980789,
      "learning_rate": 1.778233166186752e-06,
      "loss": 0.0057,
      "step": 2515520
    },
    {
      "epoch": 4.116736382500998,
      "grad_norm": 0.9397165775299072,
      "learning_rate": 1.7781672739732347e-06,
      "loss": 0.0107,
      "step": 2515540
    },
    {
      "epoch": 4.116769112939652,
      "grad_norm": 0.2729246914386749,
      "learning_rate": 1.7781013817597176e-06,
      "loss": 0.0088,
      "step": 2515560
    },
    {
      "epoch": 4.116801843378305,
      "grad_norm": 0.16292700171470642,
      "learning_rate": 1.7780354895462004e-06,
      "loss": 0.0096,
      "step": 2515580
    },
    {
      "epoch": 4.116834573816958,
      "grad_norm": 0.2975587844848633,
      "learning_rate": 1.7779695973326833e-06,
      "loss": 0.0076,
      "step": 2515600
    },
    {
      "epoch": 4.116867304255612,
      "grad_norm": 0.2508552670478821,
      "learning_rate": 1.777903705119166e-06,
      "loss": 0.0068,
      "step": 2515620
    },
    {
      "epoch": 4.116900034694265,
      "grad_norm": 0.17835460603237152,
      "learning_rate": 1.7778378129056492e-06,
      "loss": 0.0077,
      "step": 2515640
    },
    {
      "epoch": 4.116932765132918,
      "grad_norm": 0.3150831162929535,
      "learning_rate": 1.7777719206921322e-06,
      "loss": 0.0106,
      "step": 2515660
    },
    {
      "epoch": 4.116965495571572,
      "grad_norm": 0.2917137145996094,
      "learning_rate": 1.777706028478615e-06,
      "loss": 0.0098,
      "step": 2515680
    },
    {
      "epoch": 4.116998226010225,
      "grad_norm": 0.1083051785826683,
      "learning_rate": 1.7776401362650976e-06,
      "loss": 0.0072,
      "step": 2515700
    },
    {
      "epoch": 4.117030956448878,
      "grad_norm": 0.2326529324054718,
      "learning_rate": 1.7775742440515806e-06,
      "loss": 0.0104,
      "step": 2515720
    },
    {
      "epoch": 4.117063686887532,
      "grad_norm": 0.4494255781173706,
      "learning_rate": 1.7775083518380633e-06,
      "loss": 0.0064,
      "step": 2515740
    },
    {
      "epoch": 4.117096417326185,
      "grad_norm": 0.3958969712257385,
      "learning_rate": 1.7774424596245463e-06,
      "loss": 0.0078,
      "step": 2515760
    },
    {
      "epoch": 4.117129147764838,
      "grad_norm": 0.07150208950042725,
      "learning_rate": 1.777376567411029e-06,
      "loss": 0.0067,
      "step": 2515780
    },
    {
      "epoch": 4.1171618782034916,
      "grad_norm": 0.249755397439003,
      "learning_rate": 1.777310675197512e-06,
      "loss": 0.0076,
      "step": 2515800
    },
    {
      "epoch": 4.117194608642145,
      "grad_norm": 0.20482078194618225,
      "learning_rate": 1.7772447829839951e-06,
      "loss": 0.0114,
      "step": 2515820
    },
    {
      "epoch": 4.117227339080799,
      "grad_norm": 0.23537960648536682,
      "learning_rate": 1.7771788907704779e-06,
      "loss": 0.0107,
      "step": 2515840
    },
    {
      "epoch": 4.1172600695194514,
      "grad_norm": 0.11452997475862503,
      "learning_rate": 1.7771129985569608e-06,
      "loss": 0.0081,
      "step": 2515860
    },
    {
      "epoch": 4.117292799958105,
      "grad_norm": 0.5016213059425354,
      "learning_rate": 1.7770471063434436e-06,
      "loss": 0.0154,
      "step": 2515880
    },
    {
      "epoch": 4.117325530396759,
      "grad_norm": 0.4764876663684845,
      "learning_rate": 1.7769812141299263e-06,
      "loss": 0.0136,
      "step": 2515900
    },
    {
      "epoch": 4.117358260835411,
      "grad_norm": 0.2915777862071991,
      "learning_rate": 1.7769153219164093e-06,
      "loss": 0.0087,
      "step": 2515920
    },
    {
      "epoch": 4.117390991274065,
      "grad_norm": 0.18447229266166687,
      "learning_rate": 1.776849429702892e-06,
      "loss": 0.0093,
      "step": 2515940
    },
    {
      "epoch": 4.1174237217127185,
      "grad_norm": 0.2382851541042328,
      "learning_rate": 1.776783537489375e-06,
      "loss": 0.0138,
      "step": 2515960
    },
    {
      "epoch": 4.117456452151372,
      "grad_norm": 0.14099538326263428,
      "learning_rate": 1.7767176452758577e-06,
      "loss": 0.0078,
      "step": 2515980
    },
    {
      "epoch": 4.117489182590025,
      "grad_norm": 0.15197505056858063,
      "learning_rate": 1.7766517530623409e-06,
      "loss": 0.007,
      "step": 2516000
    },
    {
      "epoch": 4.117521913028678,
      "grad_norm": 0.27087128162384033,
      "learning_rate": 1.7765858608488238e-06,
      "loss": 0.0093,
      "step": 2516020
    },
    {
      "epoch": 4.117554643467332,
      "grad_norm": 0.0929396003484726,
      "learning_rate": 1.7765199686353065e-06,
      "loss": 0.0094,
      "step": 2516040
    },
    {
      "epoch": 4.117587373905985,
      "grad_norm": 0.254561185836792,
      "learning_rate": 1.7764540764217895e-06,
      "loss": 0.0083,
      "step": 2516060
    },
    {
      "epoch": 4.117620104344638,
      "grad_norm": 0.5279591679573059,
      "learning_rate": 1.7763881842082722e-06,
      "loss": 0.0096,
      "step": 2516080
    },
    {
      "epoch": 4.117652834783292,
      "grad_norm": 0.20521143078804016,
      "learning_rate": 1.7763222919947552e-06,
      "loss": 0.0126,
      "step": 2516100
    },
    {
      "epoch": 4.1176855652219455,
      "grad_norm": 0.026230836287140846,
      "learning_rate": 1.776256399781238e-06,
      "loss": 0.0087,
      "step": 2516120
    },
    {
      "epoch": 4.117718295660598,
      "grad_norm": 0.24492911994457245,
      "learning_rate": 1.7761905075677207e-06,
      "loss": 0.008,
      "step": 2516140
    },
    {
      "epoch": 4.117751026099252,
      "grad_norm": 0.2782308757305145,
      "learning_rate": 1.7761246153542036e-06,
      "loss": 0.007,
      "step": 2516160
    },
    {
      "epoch": 4.117783756537905,
      "grad_norm": 0.321833997964859,
      "learning_rate": 1.7760587231406868e-06,
      "loss": 0.0133,
      "step": 2516180
    },
    {
      "epoch": 4.117816486976558,
      "grad_norm": 0.6940553188323975,
      "learning_rate": 1.7759928309271695e-06,
      "loss": 0.0102,
      "step": 2516200
    },
    {
      "epoch": 4.117849217415212,
      "grad_norm": 0.4451323449611664,
      "learning_rate": 1.7759269387136525e-06,
      "loss": 0.0079,
      "step": 2516220
    },
    {
      "epoch": 4.117881947853865,
      "grad_norm": 0.24851204454898834,
      "learning_rate": 1.7758610465001352e-06,
      "loss": 0.0093,
      "step": 2516240
    },
    {
      "epoch": 4.117914678292519,
      "grad_norm": 1.0129417181015015,
      "learning_rate": 1.7757951542866182e-06,
      "loss": 0.0136,
      "step": 2516260
    },
    {
      "epoch": 4.117947408731172,
      "grad_norm": 0.14550577104091644,
      "learning_rate": 1.775729262073101e-06,
      "loss": 0.0054,
      "step": 2516280
    },
    {
      "epoch": 4.117980139169825,
      "grad_norm": 0.2638980448246002,
      "learning_rate": 1.7756633698595839e-06,
      "loss": 0.009,
      "step": 2516300
    },
    {
      "epoch": 4.118012869608479,
      "grad_norm": 0.10630390793085098,
      "learning_rate": 1.7755974776460666e-06,
      "loss": 0.0065,
      "step": 2516320
    },
    {
      "epoch": 4.1180456000471315,
      "grad_norm": 0.055413760244846344,
      "learning_rate": 1.7755315854325495e-06,
      "loss": 0.0109,
      "step": 2516340
    },
    {
      "epoch": 4.118078330485785,
      "grad_norm": 0.22974355518817902,
      "learning_rate": 1.7754656932190325e-06,
      "loss": 0.0073,
      "step": 2516360
    },
    {
      "epoch": 4.118111060924439,
      "grad_norm": 0.17726224660873413,
      "learning_rate": 1.7753998010055155e-06,
      "loss": 0.006,
      "step": 2516380
    },
    {
      "epoch": 4.118143791363092,
      "grad_norm": 0.22632400691509247,
      "learning_rate": 1.7753339087919982e-06,
      "loss": 0.0091,
      "step": 2516400
    },
    {
      "epoch": 4.118176521801745,
      "grad_norm": 0.16165541112422943,
      "learning_rate": 1.7752680165784811e-06,
      "loss": 0.0082,
      "step": 2516420
    },
    {
      "epoch": 4.118209252240399,
      "grad_norm": 0.23616957664489746,
      "learning_rate": 1.7752021243649639e-06,
      "loss": 0.0078,
      "step": 2516440
    },
    {
      "epoch": 4.118241982679052,
      "grad_norm": 0.4405977725982666,
      "learning_rate": 1.7751362321514468e-06,
      "loss": 0.0141,
      "step": 2516460
    },
    {
      "epoch": 4.118274713117705,
      "grad_norm": 0.2038608342409134,
      "learning_rate": 1.7750703399379296e-06,
      "loss": 0.0094,
      "step": 2516480
    },
    {
      "epoch": 4.1183074435563585,
      "grad_norm": 0.3078569173812866,
      "learning_rate": 1.7750044477244125e-06,
      "loss": 0.0093,
      "step": 2516500
    },
    {
      "epoch": 4.118340173995012,
      "grad_norm": 0.41937923431396484,
      "learning_rate": 1.7749385555108955e-06,
      "loss": 0.0073,
      "step": 2516520
    },
    {
      "epoch": 4.118372904433665,
      "grad_norm": 0.46322187781333923,
      "learning_rate": 1.7748726632973784e-06,
      "loss": 0.0123,
      "step": 2516540
    },
    {
      "epoch": 4.118405634872318,
      "grad_norm": 0.3073069155216217,
      "learning_rate": 1.7748067710838612e-06,
      "loss": 0.0123,
      "step": 2516560
    },
    {
      "epoch": 4.118438365310972,
      "grad_norm": 0.13447622954845428,
      "learning_rate": 1.7747408788703441e-06,
      "loss": 0.0079,
      "step": 2516580
    },
    {
      "epoch": 4.1184710957496256,
      "grad_norm": 0.1333315223455429,
      "learning_rate": 1.7746749866568269e-06,
      "loss": 0.007,
      "step": 2516600
    },
    {
      "epoch": 4.118503826188278,
      "grad_norm": 0.1656835973262787,
      "learning_rate": 1.7746090944433098e-06,
      "loss": 0.0082,
      "step": 2516620
    },
    {
      "epoch": 4.118536556626932,
      "grad_norm": 0.21342121064662933,
      "learning_rate": 1.7745432022297925e-06,
      "loss": 0.012,
      "step": 2516640
    },
    {
      "epoch": 4.1185692870655854,
      "grad_norm": 0.16637389361858368,
      "learning_rate": 1.7744773100162755e-06,
      "loss": 0.0059,
      "step": 2516660
    },
    {
      "epoch": 4.118602017504238,
      "grad_norm": 0.17989085614681244,
      "learning_rate": 1.7744114178027582e-06,
      "loss": 0.01,
      "step": 2516680
    },
    {
      "epoch": 4.118634747942892,
      "grad_norm": 0.14776676893234253,
      "learning_rate": 1.7743455255892414e-06,
      "loss": 0.0069,
      "step": 2516700
    },
    {
      "epoch": 4.118667478381545,
      "grad_norm": 0.16231340169906616,
      "learning_rate": 1.7742796333757241e-06,
      "loss": 0.0111,
      "step": 2516720
    },
    {
      "epoch": 4.118700208820199,
      "grad_norm": 0.2142377644777298,
      "learning_rate": 1.774213741162207e-06,
      "loss": 0.0133,
      "step": 2516740
    },
    {
      "epoch": 4.118732939258852,
      "grad_norm": 0.2833593189716339,
      "learning_rate": 1.7741478489486898e-06,
      "loss": 0.0094,
      "step": 2516760
    },
    {
      "epoch": 4.118765669697505,
      "grad_norm": 0.240937277674675,
      "learning_rate": 1.7740819567351728e-06,
      "loss": 0.0081,
      "step": 2516780
    },
    {
      "epoch": 4.118798400136159,
      "grad_norm": 0.158638596534729,
      "learning_rate": 1.7740160645216555e-06,
      "loss": 0.0079,
      "step": 2516800
    },
    {
      "epoch": 4.1188311305748115,
      "grad_norm": 0.3489130735397339,
      "learning_rate": 1.7739501723081385e-06,
      "loss": 0.0083,
      "step": 2516820
    },
    {
      "epoch": 4.118863861013465,
      "grad_norm": 0.3177329897880554,
      "learning_rate": 1.7738842800946212e-06,
      "loss": 0.0108,
      "step": 2516840
    },
    {
      "epoch": 4.118896591452119,
      "grad_norm": 0.11757343262434006,
      "learning_rate": 1.7738183878811042e-06,
      "loss": 0.01,
      "step": 2516860
    },
    {
      "epoch": 4.118929321890772,
      "grad_norm": 0.6889833211898804,
      "learning_rate": 1.7737524956675871e-06,
      "loss": 0.0086,
      "step": 2516880
    },
    {
      "epoch": 4.118962052329425,
      "grad_norm": 0.13846451044082642,
      "learning_rate": 1.77368660345407e-06,
      "loss": 0.014,
      "step": 2516900
    },
    {
      "epoch": 4.118994782768079,
      "grad_norm": 0.17169314622879028,
      "learning_rate": 1.7736207112405528e-06,
      "loss": 0.0059,
      "step": 2516920
    },
    {
      "epoch": 4.119027513206732,
      "grad_norm": 0.9736229777336121,
      "learning_rate": 1.7735548190270358e-06,
      "loss": 0.0078,
      "step": 2516940
    },
    {
      "epoch": 4.119060243645385,
      "grad_norm": 0.12881043553352356,
      "learning_rate": 1.7734889268135185e-06,
      "loss": 0.0097,
      "step": 2516960
    },
    {
      "epoch": 4.1190929740840385,
      "grad_norm": 0.3987056612968445,
      "learning_rate": 1.7734230346000015e-06,
      "loss": 0.009,
      "step": 2516980
    },
    {
      "epoch": 4.119125704522692,
      "grad_norm": 0.15314091742038727,
      "learning_rate": 1.7733571423864842e-06,
      "loss": 0.0109,
      "step": 2517000
    },
    {
      "epoch": 4.119158434961346,
      "grad_norm": 0.24812766909599304,
      "learning_rate": 1.7732912501729671e-06,
      "loss": 0.0073,
      "step": 2517020
    },
    {
      "epoch": 4.119191165399998,
      "grad_norm": 0.35281938314437866,
      "learning_rate": 1.7732253579594499e-06,
      "loss": 0.0103,
      "step": 2517040
    },
    {
      "epoch": 4.119223895838652,
      "grad_norm": 0.49604031443595886,
      "learning_rate": 1.773159465745933e-06,
      "loss": 0.0115,
      "step": 2517060
    },
    {
      "epoch": 4.119256626277306,
      "grad_norm": 0.14261291921138763,
      "learning_rate": 1.773093573532416e-06,
      "loss": 0.0076,
      "step": 2517080
    },
    {
      "epoch": 4.119289356715958,
      "grad_norm": 0.5395607352256775,
      "learning_rate": 1.7730276813188987e-06,
      "loss": 0.0083,
      "step": 2517100
    },
    {
      "epoch": 4.119322087154612,
      "grad_norm": 0.3001807630062103,
      "learning_rate": 1.7729617891053815e-06,
      "loss": 0.0125,
      "step": 2517120
    },
    {
      "epoch": 4.1193548175932655,
      "grad_norm": 0.23626622557640076,
      "learning_rate": 1.7728958968918644e-06,
      "loss": 0.0119,
      "step": 2517140
    },
    {
      "epoch": 4.119387548031919,
      "grad_norm": 0.2662865221500397,
      "learning_rate": 1.7728300046783472e-06,
      "loss": 0.0157,
      "step": 2517160
    },
    {
      "epoch": 4.119420278470572,
      "grad_norm": 0.4142078757286072,
      "learning_rate": 1.7727641124648301e-06,
      "loss": 0.0128,
      "step": 2517180
    },
    {
      "epoch": 4.119453008909225,
      "grad_norm": 0.18981748819351196,
      "learning_rate": 1.7726982202513129e-06,
      "loss": 0.0094,
      "step": 2517200
    },
    {
      "epoch": 4.119485739347879,
      "grad_norm": 0.3064652383327484,
      "learning_rate": 1.772632328037796e-06,
      "loss": 0.0127,
      "step": 2517220
    },
    {
      "epoch": 4.119518469786532,
      "grad_norm": 0.28175824880599976,
      "learning_rate": 1.772566435824279e-06,
      "loss": 0.0082,
      "step": 2517240
    },
    {
      "epoch": 4.119551200225185,
      "grad_norm": 0.2326193004846573,
      "learning_rate": 1.7725005436107617e-06,
      "loss": 0.0091,
      "step": 2517260
    },
    {
      "epoch": 4.119583930663839,
      "grad_norm": 0.2358948290348053,
      "learning_rate": 1.7724346513972447e-06,
      "loss": 0.0101,
      "step": 2517280
    },
    {
      "epoch": 4.1196166611024925,
      "grad_norm": 0.3358345627784729,
      "learning_rate": 1.7723687591837274e-06,
      "loss": 0.0139,
      "step": 2517300
    },
    {
      "epoch": 4.119649391541145,
      "grad_norm": 0.1923382729291916,
      "learning_rate": 1.7723028669702104e-06,
      "loss": 0.0072,
      "step": 2517320
    },
    {
      "epoch": 4.119682121979799,
      "grad_norm": 0.11724275350570679,
      "learning_rate": 1.772236974756693e-06,
      "loss": 0.0112,
      "step": 2517340
    },
    {
      "epoch": 4.119714852418452,
      "grad_norm": 0.2877262532711029,
      "learning_rate": 1.7721710825431758e-06,
      "loss": 0.0075,
      "step": 2517360
    },
    {
      "epoch": 4.119747582857105,
      "grad_norm": 0.06294648349285126,
      "learning_rate": 1.7721051903296588e-06,
      "loss": 0.0064,
      "step": 2517380
    },
    {
      "epoch": 4.119780313295759,
      "grad_norm": 0.2656603753566742,
      "learning_rate": 1.772039298116142e-06,
      "loss": 0.0068,
      "step": 2517400
    },
    {
      "epoch": 4.119813043734412,
      "grad_norm": 0.2592352628707886,
      "learning_rate": 1.7719734059026247e-06,
      "loss": 0.0071,
      "step": 2517420
    },
    {
      "epoch": 4.119845774173066,
      "grad_norm": 0.40407881140708923,
      "learning_rate": 1.7719075136891076e-06,
      "loss": 0.0077,
      "step": 2517440
    },
    {
      "epoch": 4.119878504611719,
      "grad_norm": 0.4764256179332733,
      "learning_rate": 1.7718416214755904e-06,
      "loss": 0.0088,
      "step": 2517460
    },
    {
      "epoch": 4.119911235050372,
      "grad_norm": 0.2142011672258377,
      "learning_rate": 1.7717757292620733e-06,
      "loss": 0.0076,
      "step": 2517480
    },
    {
      "epoch": 4.119943965489026,
      "grad_norm": 0.27453094720840454,
      "learning_rate": 1.771709837048556e-06,
      "loss": 0.0089,
      "step": 2517500
    },
    {
      "epoch": 4.1199766959276785,
      "grad_norm": 0.22491365671157837,
      "learning_rate": 1.771643944835039e-06,
      "loss": 0.0069,
      "step": 2517520
    },
    {
      "epoch": 4.120009426366332,
      "grad_norm": 0.22452260553836823,
      "learning_rate": 1.7715780526215218e-06,
      "loss": 0.0059,
      "step": 2517540
    },
    {
      "epoch": 4.120042156804986,
      "grad_norm": 0.13382276892662048,
      "learning_rate": 1.7715121604080045e-06,
      "loss": 0.0078,
      "step": 2517560
    },
    {
      "epoch": 4.120074887243639,
      "grad_norm": 0.16020603477954865,
      "learning_rate": 1.7714462681944877e-06,
      "loss": 0.0138,
      "step": 2517580
    },
    {
      "epoch": 4.120107617682292,
      "grad_norm": 0.18854567408561707,
      "learning_rate": 1.7713803759809706e-06,
      "loss": 0.0084,
      "step": 2517600
    },
    {
      "epoch": 4.1201403481209455,
      "grad_norm": 0.09339015930891037,
      "learning_rate": 1.7713144837674534e-06,
      "loss": 0.0087,
      "step": 2517620
    },
    {
      "epoch": 4.120173078559599,
      "grad_norm": 0.8519827127456665,
      "learning_rate": 1.7712485915539363e-06,
      "loss": 0.0116,
      "step": 2517640
    },
    {
      "epoch": 4.120205808998252,
      "grad_norm": 0.05672883242368698,
      "learning_rate": 1.771182699340419e-06,
      "loss": 0.0104,
      "step": 2517660
    },
    {
      "epoch": 4.120238539436905,
      "grad_norm": 0.12178866565227509,
      "learning_rate": 1.771116807126902e-06,
      "loss": 0.0125,
      "step": 2517680
    },
    {
      "epoch": 4.120271269875559,
      "grad_norm": 0.1418115496635437,
      "learning_rate": 1.7710509149133847e-06,
      "loss": 0.0098,
      "step": 2517700
    },
    {
      "epoch": 4.120304000314213,
      "grad_norm": 0.2461521327495575,
      "learning_rate": 1.7709850226998677e-06,
      "loss": 0.0091,
      "step": 2517720
    },
    {
      "epoch": 4.120336730752865,
      "grad_norm": 0.19380730390548706,
      "learning_rate": 1.7709191304863504e-06,
      "loss": 0.0078,
      "step": 2517740
    },
    {
      "epoch": 4.120369461191519,
      "grad_norm": 0.35759639739990234,
      "learning_rate": 1.7708532382728336e-06,
      "loss": 0.0089,
      "step": 2517760
    },
    {
      "epoch": 4.1204021916301725,
      "grad_norm": 0.2659982442855835,
      "learning_rate": 1.7707873460593163e-06,
      "loss": 0.0073,
      "step": 2517780
    },
    {
      "epoch": 4.120434922068825,
      "grad_norm": 0.13099946081638336,
      "learning_rate": 1.7707214538457993e-06,
      "loss": 0.0114,
      "step": 2517800
    },
    {
      "epoch": 4.120467652507479,
      "grad_norm": 0.36216631531715393,
      "learning_rate": 1.770655561632282e-06,
      "loss": 0.0102,
      "step": 2517820
    },
    {
      "epoch": 4.120500382946132,
      "grad_norm": 0.053300511091947556,
      "learning_rate": 1.770589669418765e-06,
      "loss": 0.0124,
      "step": 2517840
    },
    {
      "epoch": 4.120533113384786,
      "grad_norm": 0.3010888993740082,
      "learning_rate": 1.7705237772052477e-06,
      "loss": 0.0114,
      "step": 2517860
    },
    {
      "epoch": 4.120565843823439,
      "grad_norm": 0.33342400193214417,
      "learning_rate": 1.7704578849917307e-06,
      "loss": 0.0066,
      "step": 2517880
    },
    {
      "epoch": 4.120598574262092,
      "grad_norm": 0.1212281733751297,
      "learning_rate": 1.7703919927782134e-06,
      "loss": 0.0073,
      "step": 2517900
    },
    {
      "epoch": 4.120631304700746,
      "grad_norm": 0.20204223692417145,
      "learning_rate": 1.7703261005646964e-06,
      "loss": 0.0094,
      "step": 2517920
    },
    {
      "epoch": 4.120664035139399,
      "grad_norm": 0.3689211905002594,
      "learning_rate": 1.7702602083511793e-06,
      "loss": 0.0099,
      "step": 2517940
    },
    {
      "epoch": 4.120696765578052,
      "grad_norm": 0.1670064479112625,
      "learning_rate": 1.7701943161376623e-06,
      "loss": 0.0142,
      "step": 2517960
    },
    {
      "epoch": 4.120729496016706,
      "grad_norm": 0.2257324606180191,
      "learning_rate": 1.770128423924145e-06,
      "loss": 0.0104,
      "step": 2517980
    },
    {
      "epoch": 4.120762226455359,
      "grad_norm": 0.07993827015161514,
      "learning_rate": 1.770062531710628e-06,
      "loss": 0.0082,
      "step": 2518000
    },
    {
      "epoch": 4.120794956894012,
      "grad_norm": 0.28223371505737305,
      "learning_rate": 1.7699966394971107e-06,
      "loss": 0.0119,
      "step": 2518020
    },
    {
      "epoch": 4.120827687332666,
      "grad_norm": 0.08041703701019287,
      "learning_rate": 1.7699307472835936e-06,
      "loss": 0.0054,
      "step": 2518040
    },
    {
      "epoch": 4.120860417771319,
      "grad_norm": 0.07295195013284683,
      "learning_rate": 1.7698648550700764e-06,
      "loss": 0.0086,
      "step": 2518060
    },
    {
      "epoch": 4.120893148209972,
      "grad_norm": 0.30365902185440063,
      "learning_rate": 1.7697989628565593e-06,
      "loss": 0.0082,
      "step": 2518080
    },
    {
      "epoch": 4.120925878648626,
      "grad_norm": 0.13081178069114685,
      "learning_rate": 1.7697330706430423e-06,
      "loss": 0.01,
      "step": 2518100
    },
    {
      "epoch": 4.120958609087279,
      "grad_norm": 0.023436129093170166,
      "learning_rate": 1.7696671784295252e-06,
      "loss": 0.0059,
      "step": 2518120
    },
    {
      "epoch": 4.120991339525933,
      "grad_norm": 0.25201737880706787,
      "learning_rate": 1.769601286216008e-06,
      "loss": 0.013,
      "step": 2518140
    },
    {
      "epoch": 4.1210240699645855,
      "grad_norm": 0.32528406381607056,
      "learning_rate": 1.769535394002491e-06,
      "loss": 0.0108,
      "step": 2518160
    },
    {
      "epoch": 4.121056800403239,
      "grad_norm": 0.1790023148059845,
      "learning_rate": 1.7694695017889737e-06,
      "loss": 0.009,
      "step": 2518180
    },
    {
      "epoch": 4.121089530841893,
      "grad_norm": 0.8490768074989319,
      "learning_rate": 1.7694036095754566e-06,
      "loss": 0.0126,
      "step": 2518200
    },
    {
      "epoch": 4.121122261280545,
      "grad_norm": 0.5410172343254089,
      "learning_rate": 1.7693377173619394e-06,
      "loss": 0.0089,
      "step": 2518220
    },
    {
      "epoch": 4.121154991719199,
      "grad_norm": 0.34970560669898987,
      "learning_rate": 1.7692718251484223e-06,
      "loss": 0.0101,
      "step": 2518240
    },
    {
      "epoch": 4.121187722157853,
      "grad_norm": 0.0948454886674881,
      "learning_rate": 1.769205932934905e-06,
      "loss": 0.0079,
      "step": 2518260
    },
    {
      "epoch": 4.121220452596505,
      "grad_norm": 0.13822387158870697,
      "learning_rate": 1.7691400407213882e-06,
      "loss": 0.007,
      "step": 2518280
    },
    {
      "epoch": 4.121253183035159,
      "grad_norm": 0.06772672384977341,
      "learning_rate": 1.7690741485078712e-06,
      "loss": 0.01,
      "step": 2518300
    },
    {
      "epoch": 4.1212859134738125,
      "grad_norm": 0.1674804985523224,
      "learning_rate": 1.769008256294354e-06,
      "loss": 0.0089,
      "step": 2518320
    },
    {
      "epoch": 4.121318643912466,
      "grad_norm": 0.7963833212852478,
      "learning_rate": 1.7689423640808367e-06,
      "loss": 0.0087,
      "step": 2518340
    },
    {
      "epoch": 4.121351374351119,
      "grad_norm": 0.22552192211151123,
      "learning_rate": 1.7688764718673196e-06,
      "loss": 0.0088,
      "step": 2518360
    },
    {
      "epoch": 4.121384104789772,
      "grad_norm": 0.15765365958213806,
      "learning_rate": 1.7688105796538023e-06,
      "loss": 0.0073,
      "step": 2518380
    },
    {
      "epoch": 4.121416835228426,
      "grad_norm": 0.19580157101154327,
      "learning_rate": 1.7687446874402853e-06,
      "loss": 0.0147,
      "step": 2518400
    },
    {
      "epoch": 4.121449565667079,
      "grad_norm": 0.1702456772327423,
      "learning_rate": 1.768678795226768e-06,
      "loss": 0.0081,
      "step": 2518420
    },
    {
      "epoch": 4.121482296105732,
      "grad_norm": 0.05949726700782776,
      "learning_rate": 1.768612903013251e-06,
      "loss": 0.014,
      "step": 2518440
    },
    {
      "epoch": 4.121515026544386,
      "grad_norm": 0.12875428795814514,
      "learning_rate": 1.7685470107997341e-06,
      "loss": 0.0071,
      "step": 2518460
    },
    {
      "epoch": 4.121547756983039,
      "grad_norm": 0.3209358751773834,
      "learning_rate": 1.7684811185862169e-06,
      "loss": 0.0078,
      "step": 2518480
    },
    {
      "epoch": 4.121580487421692,
      "grad_norm": 0.33512455224990845,
      "learning_rate": 1.7684152263726998e-06,
      "loss": 0.0151,
      "step": 2518500
    },
    {
      "epoch": 4.121613217860346,
      "grad_norm": 0.18780465424060822,
      "learning_rate": 1.7683493341591826e-06,
      "loss": 0.007,
      "step": 2518520
    },
    {
      "epoch": 4.121645948298999,
      "grad_norm": 0.09886226803064346,
      "learning_rate": 1.7682834419456653e-06,
      "loss": 0.0103,
      "step": 2518540
    },
    {
      "epoch": 4.121678678737652,
      "grad_norm": 0.37999945878982544,
      "learning_rate": 1.7682175497321483e-06,
      "loss": 0.0071,
      "step": 2518560
    },
    {
      "epoch": 4.121711409176306,
      "grad_norm": 0.17010533809661865,
      "learning_rate": 1.768151657518631e-06,
      "loss": 0.0129,
      "step": 2518580
    },
    {
      "epoch": 4.121744139614959,
      "grad_norm": 0.1263977289199829,
      "learning_rate": 1.768085765305114e-06,
      "loss": 0.0073,
      "step": 2518600
    },
    {
      "epoch": 4.121776870053613,
      "grad_norm": 0.2747191786766052,
      "learning_rate": 1.7680198730915967e-06,
      "loss": 0.0099,
      "step": 2518620
    },
    {
      "epoch": 4.1218096004922655,
      "grad_norm": 0.6748102307319641,
      "learning_rate": 1.7679539808780799e-06,
      "loss": 0.0105,
      "step": 2518640
    },
    {
      "epoch": 4.121842330930919,
      "grad_norm": 0.736630380153656,
      "learning_rate": 1.7678880886645628e-06,
      "loss": 0.0112,
      "step": 2518660
    },
    {
      "epoch": 4.121875061369573,
      "grad_norm": 0.5457138419151306,
      "learning_rate": 1.7678221964510456e-06,
      "loss": 0.0062,
      "step": 2518680
    },
    {
      "epoch": 4.121907791808225,
      "grad_norm": 0.13250966370105743,
      "learning_rate": 1.7677563042375285e-06,
      "loss": 0.0118,
      "step": 2518700
    },
    {
      "epoch": 4.121940522246879,
      "grad_norm": 0.10451535880565643,
      "learning_rate": 1.7676904120240112e-06,
      "loss": 0.0088,
      "step": 2518720
    },
    {
      "epoch": 4.121973252685533,
      "grad_norm": 0.2009231150150299,
      "learning_rate": 1.7676245198104942e-06,
      "loss": 0.0073,
      "step": 2518740
    },
    {
      "epoch": 4.122005983124186,
      "grad_norm": 0.09785378724336624,
      "learning_rate": 1.767558627596977e-06,
      "loss": 0.0129,
      "step": 2518760
    },
    {
      "epoch": 4.122038713562839,
      "grad_norm": 0.3910430669784546,
      "learning_rate": 1.7674927353834597e-06,
      "loss": 0.0118,
      "step": 2518780
    },
    {
      "epoch": 4.1220714440014925,
      "grad_norm": 0.2235795110464096,
      "learning_rate": 1.7674268431699426e-06,
      "loss": 0.0069,
      "step": 2518800
    },
    {
      "epoch": 4.122104174440146,
      "grad_norm": 0.15563620626926422,
      "learning_rate": 1.7673609509564258e-06,
      "loss": 0.0131,
      "step": 2518820
    },
    {
      "epoch": 4.122136904878799,
      "grad_norm": 0.2841165065765381,
      "learning_rate": 1.7672950587429085e-06,
      "loss": 0.0078,
      "step": 2518840
    },
    {
      "epoch": 4.122169635317452,
      "grad_norm": 0.275677889585495,
      "learning_rate": 1.7672291665293915e-06,
      "loss": 0.0106,
      "step": 2518860
    },
    {
      "epoch": 4.122202365756106,
      "grad_norm": 0.2532963156700134,
      "learning_rate": 1.7671632743158742e-06,
      "loss": 0.0085,
      "step": 2518880
    },
    {
      "epoch": 4.12223509619476,
      "grad_norm": 0.2791796326637268,
      "learning_rate": 1.7670973821023572e-06,
      "loss": 0.0127,
      "step": 2518900
    },
    {
      "epoch": 4.122267826633412,
      "grad_norm": 0.2215910106897354,
      "learning_rate": 1.76703148988884e-06,
      "loss": 0.0087,
      "step": 2518920
    },
    {
      "epoch": 4.122300557072066,
      "grad_norm": 0.13929641246795654,
      "learning_rate": 1.7669655976753229e-06,
      "loss": 0.0081,
      "step": 2518940
    },
    {
      "epoch": 4.1223332875107195,
      "grad_norm": 0.07339242845773697,
      "learning_rate": 1.7668997054618056e-06,
      "loss": 0.0113,
      "step": 2518960
    },
    {
      "epoch": 4.122366017949372,
      "grad_norm": 0.18655221164226532,
      "learning_rate": 1.7668338132482888e-06,
      "loss": 0.0094,
      "step": 2518980
    },
    {
      "epoch": 4.122398748388026,
      "grad_norm": 0.13820616900920868,
      "learning_rate": 1.7667679210347715e-06,
      "loss": 0.0049,
      "step": 2519000
    },
    {
      "epoch": 4.122431478826679,
      "grad_norm": 0.15606847405433655,
      "learning_rate": 1.7667020288212545e-06,
      "loss": 0.0081,
      "step": 2519020
    },
    {
      "epoch": 4.122464209265333,
      "grad_norm": 0.1889684647321701,
      "learning_rate": 1.7666361366077372e-06,
      "loss": 0.0086,
      "step": 2519040
    },
    {
      "epoch": 4.122496939703986,
      "grad_norm": 0.2678545415401459,
      "learning_rate": 1.7665702443942202e-06,
      "loss": 0.0112,
      "step": 2519060
    },
    {
      "epoch": 4.122529670142639,
      "grad_norm": 0.1110696792602539,
      "learning_rate": 1.7665043521807029e-06,
      "loss": 0.0089,
      "step": 2519080
    },
    {
      "epoch": 4.122562400581293,
      "grad_norm": 0.6265450716018677,
      "learning_rate": 1.7664384599671858e-06,
      "loss": 0.0081,
      "step": 2519100
    },
    {
      "epoch": 4.122595131019946,
      "grad_norm": 0.12642726302146912,
      "learning_rate": 1.7663725677536686e-06,
      "loss": 0.0098,
      "step": 2519120
    },
    {
      "epoch": 4.122627861458599,
      "grad_norm": 0.07733567804098129,
      "learning_rate": 1.7663066755401515e-06,
      "loss": 0.0079,
      "step": 2519140
    },
    {
      "epoch": 4.122660591897253,
      "grad_norm": 0.07461399585008621,
      "learning_rate": 1.7662407833266345e-06,
      "loss": 0.0055,
      "step": 2519160
    },
    {
      "epoch": 4.122693322335906,
      "grad_norm": 0.1430552899837494,
      "learning_rate": 1.7661748911131174e-06,
      "loss": 0.0071,
      "step": 2519180
    },
    {
      "epoch": 4.122726052774559,
      "grad_norm": 0.15775221586227417,
      "learning_rate": 1.7661089988996002e-06,
      "loss": 0.0064,
      "step": 2519200
    },
    {
      "epoch": 4.122758783213213,
      "grad_norm": 0.15630589425563812,
      "learning_rate": 1.7660431066860831e-06,
      "loss": 0.0101,
      "step": 2519220
    },
    {
      "epoch": 4.122791513651866,
      "grad_norm": 0.06068416312336922,
      "learning_rate": 1.7659772144725659e-06,
      "loss": 0.0092,
      "step": 2519240
    },
    {
      "epoch": 4.122824244090519,
      "grad_norm": 0.14188852906227112,
      "learning_rate": 1.7659113222590488e-06,
      "loss": 0.008,
      "step": 2519260
    },
    {
      "epoch": 4.1228569745291725,
      "grad_norm": 0.3257627487182617,
      "learning_rate": 1.7658454300455316e-06,
      "loss": 0.0066,
      "step": 2519280
    },
    {
      "epoch": 4.122889704967826,
      "grad_norm": 0.151612788438797,
      "learning_rate": 1.7657795378320145e-06,
      "loss": 0.0103,
      "step": 2519300
    },
    {
      "epoch": 4.12292243540648,
      "grad_norm": 0.20023716986179352,
      "learning_rate": 1.7657136456184973e-06,
      "loss": 0.0092,
      "step": 2519320
    },
    {
      "epoch": 4.122955165845132,
      "grad_norm": 0.10383497178554535,
      "learning_rate": 1.7656477534049804e-06,
      "loss": 0.0096,
      "step": 2519340
    },
    {
      "epoch": 4.122987896283786,
      "grad_norm": 0.2281186729669571,
      "learning_rate": 1.7655818611914632e-06,
      "loss": 0.012,
      "step": 2519360
    },
    {
      "epoch": 4.12302062672244,
      "grad_norm": 0.7848563194274902,
      "learning_rate": 1.7655159689779461e-06,
      "loss": 0.0094,
      "step": 2519380
    },
    {
      "epoch": 4.123053357161092,
      "grad_norm": 0.5399349927902222,
      "learning_rate": 1.7654500767644288e-06,
      "loss": 0.0081,
      "step": 2519400
    },
    {
      "epoch": 4.123086087599746,
      "grad_norm": 0.15327788889408112,
      "learning_rate": 1.7653841845509118e-06,
      "loss": 0.011,
      "step": 2519420
    },
    {
      "epoch": 4.1231188180383995,
      "grad_norm": 0.17601969838142395,
      "learning_rate": 1.7653182923373945e-06,
      "loss": 0.0105,
      "step": 2519440
    },
    {
      "epoch": 4.123151548477053,
      "grad_norm": 0.3009933829307556,
      "learning_rate": 1.7652524001238775e-06,
      "loss": 0.0085,
      "step": 2519460
    },
    {
      "epoch": 4.123184278915706,
      "grad_norm": 0.9684013724327087,
      "learning_rate": 1.7651865079103602e-06,
      "loss": 0.013,
      "step": 2519480
    },
    {
      "epoch": 4.123217009354359,
      "grad_norm": 0.05114123225212097,
      "learning_rate": 1.7651206156968432e-06,
      "loss": 0.0082,
      "step": 2519500
    },
    {
      "epoch": 4.123249739793013,
      "grad_norm": 0.19932417571544647,
      "learning_rate": 1.7650547234833263e-06,
      "loss": 0.0085,
      "step": 2519520
    },
    {
      "epoch": 4.123282470231666,
      "grad_norm": 0.07290231436491013,
      "learning_rate": 1.764988831269809e-06,
      "loss": 0.0082,
      "step": 2519540
    },
    {
      "epoch": 4.123315200670319,
      "grad_norm": 0.4335591197013855,
      "learning_rate": 1.7649229390562918e-06,
      "loss": 0.0193,
      "step": 2519560
    },
    {
      "epoch": 4.123347931108973,
      "grad_norm": 0.06602273881435394,
      "learning_rate": 1.7648570468427748e-06,
      "loss": 0.0109,
      "step": 2519580
    },
    {
      "epoch": 4.1233806615476265,
      "grad_norm": 0.2818892300128937,
      "learning_rate": 1.7647911546292575e-06,
      "loss": 0.008,
      "step": 2519600
    },
    {
      "epoch": 4.123413391986279,
      "grad_norm": 0.08150076866149902,
      "learning_rate": 1.7647252624157405e-06,
      "loss": 0.0128,
      "step": 2519620
    },
    {
      "epoch": 4.123446122424933,
      "grad_norm": 0.17000481486320496,
      "learning_rate": 1.7646593702022232e-06,
      "loss": 0.0152,
      "step": 2519640
    },
    {
      "epoch": 4.123478852863586,
      "grad_norm": 0.3106594383716583,
      "learning_rate": 1.7645934779887062e-06,
      "loss": 0.0071,
      "step": 2519660
    },
    {
      "epoch": 4.123511583302239,
      "grad_norm": 0.46960949897766113,
      "learning_rate": 1.764527585775189e-06,
      "loss": 0.0109,
      "step": 2519680
    },
    {
      "epoch": 4.123544313740893,
      "grad_norm": 0.3631522059440613,
      "learning_rate": 1.764461693561672e-06,
      "loss": 0.0113,
      "step": 2519700
    },
    {
      "epoch": 4.123577044179546,
      "grad_norm": 0.10683251172304153,
      "learning_rate": 1.764395801348155e-06,
      "loss": 0.0085,
      "step": 2519720
    },
    {
      "epoch": 4.123609774618199,
      "grad_norm": 0.1972101777791977,
      "learning_rate": 1.7643299091346378e-06,
      "loss": 0.0072,
      "step": 2519740
    },
    {
      "epoch": 4.123642505056853,
      "grad_norm": 0.28255757689476013,
      "learning_rate": 1.7642640169211205e-06,
      "loss": 0.0076,
      "step": 2519760
    },
    {
      "epoch": 4.123675235495506,
      "grad_norm": 0.23595379292964935,
      "learning_rate": 1.7641981247076034e-06,
      "loss": 0.009,
      "step": 2519780
    },
    {
      "epoch": 4.12370796593416,
      "grad_norm": 0.1408848613500595,
      "learning_rate": 1.7641322324940862e-06,
      "loss": 0.0071,
      "step": 2519800
    },
    {
      "epoch": 4.1237406963728125,
      "grad_norm": 0.1779957264661789,
      "learning_rate": 1.7640663402805691e-06,
      "loss": 0.008,
      "step": 2519820
    },
    {
      "epoch": 4.123773426811466,
      "grad_norm": 0.0867501050233841,
      "learning_rate": 1.7640004480670519e-06,
      "loss": 0.0105,
      "step": 2519840
    },
    {
      "epoch": 4.12380615725012,
      "grad_norm": 0.3262206017971039,
      "learning_rate": 1.763934555853535e-06,
      "loss": 0.0076,
      "step": 2519860
    },
    {
      "epoch": 4.123838887688772,
      "grad_norm": 0.21336054801940918,
      "learning_rate": 1.763868663640018e-06,
      "loss": 0.0079,
      "step": 2519880
    },
    {
      "epoch": 4.123871618127426,
      "grad_norm": 0.8377826809883118,
      "learning_rate": 1.7638027714265007e-06,
      "loss": 0.0097,
      "step": 2519900
    },
    {
      "epoch": 4.12390434856608,
      "grad_norm": 0.16356661915779114,
      "learning_rate": 1.7637368792129837e-06,
      "loss": 0.007,
      "step": 2519920
    },
    {
      "epoch": 4.123937079004733,
      "grad_norm": 0.07888460159301758,
      "learning_rate": 1.7636709869994664e-06,
      "loss": 0.0093,
      "step": 2519940
    },
    {
      "epoch": 4.123969809443386,
      "grad_norm": 0.6755910515785217,
      "learning_rate": 1.7636050947859494e-06,
      "loss": 0.0095,
      "step": 2519960
    },
    {
      "epoch": 4.1240025398820395,
      "grad_norm": 0.2512719929218292,
      "learning_rate": 1.7635392025724321e-06,
      "loss": 0.0118,
      "step": 2519980
    },
    {
      "epoch": 4.124035270320693,
      "grad_norm": 0.06144800782203674,
      "learning_rate": 1.7634733103589148e-06,
      "loss": 0.0054,
      "step": 2520000
    },
    {
      "epoch": 4.124068000759346,
      "grad_norm": 0.47571152448654175,
      "learning_rate": 1.7634074181453978e-06,
      "loss": 0.0103,
      "step": 2520020
    },
    {
      "epoch": 4.124100731197999,
      "grad_norm": 0.4705367386341095,
      "learning_rate": 1.763341525931881e-06,
      "loss": 0.0101,
      "step": 2520040
    },
    {
      "epoch": 4.124133461636653,
      "grad_norm": 0.08870577812194824,
      "learning_rate": 1.7632756337183637e-06,
      "loss": 0.0126,
      "step": 2520060
    },
    {
      "epoch": 4.1241661920753065,
      "grad_norm": 0.10470930486917496,
      "learning_rate": 1.7632097415048467e-06,
      "loss": 0.0096,
      "step": 2520080
    },
    {
      "epoch": 4.124198922513959,
      "grad_norm": 0.137304425239563,
      "learning_rate": 1.7631438492913294e-06,
      "loss": 0.0081,
      "step": 2520100
    },
    {
      "epoch": 4.124231652952613,
      "grad_norm": 0.31753313541412354,
      "learning_rate": 1.7630779570778123e-06,
      "loss": 0.0074,
      "step": 2520120
    },
    {
      "epoch": 4.124264383391266,
      "grad_norm": 0.16820266842842102,
      "learning_rate": 1.763012064864295e-06,
      "loss": 0.0123,
      "step": 2520140
    },
    {
      "epoch": 4.124297113829919,
      "grad_norm": 0.11921633780002594,
      "learning_rate": 1.762946172650778e-06,
      "loss": 0.009,
      "step": 2520160
    },
    {
      "epoch": 4.124329844268573,
      "grad_norm": 0.1585301160812378,
      "learning_rate": 1.7628802804372608e-06,
      "loss": 0.0081,
      "step": 2520180
    },
    {
      "epoch": 4.124362574707226,
      "grad_norm": 0.7498784065246582,
      "learning_rate": 1.7628143882237435e-06,
      "loss": 0.0114,
      "step": 2520200
    },
    {
      "epoch": 4.12439530514588,
      "grad_norm": 0.25351497530937195,
      "learning_rate": 1.7627484960102267e-06,
      "loss": 0.0081,
      "step": 2520220
    },
    {
      "epoch": 4.124428035584533,
      "grad_norm": 0.10373286157846451,
      "learning_rate": 1.7626826037967096e-06,
      "loss": 0.0128,
      "step": 2520240
    },
    {
      "epoch": 4.124460766023186,
      "grad_norm": 0.25239500403404236,
      "learning_rate": 1.7626167115831924e-06,
      "loss": 0.0129,
      "step": 2520260
    },
    {
      "epoch": 4.12449349646184,
      "grad_norm": 0.3446474075317383,
      "learning_rate": 1.7625508193696753e-06,
      "loss": 0.009,
      "step": 2520280
    },
    {
      "epoch": 4.1245262269004925,
      "grad_norm": 0.12854811549186707,
      "learning_rate": 1.762484927156158e-06,
      "loss": 0.0076,
      "step": 2520300
    },
    {
      "epoch": 4.124558957339146,
      "grad_norm": 0.26819881796836853,
      "learning_rate": 1.762419034942641e-06,
      "loss": 0.0093,
      "step": 2520320
    },
    {
      "epoch": 4.1245916877778,
      "grad_norm": 0.11042053997516632,
      "learning_rate": 1.7623531427291238e-06,
      "loss": 0.0081,
      "step": 2520340
    },
    {
      "epoch": 4.124624418216453,
      "grad_norm": 0.11760867387056351,
      "learning_rate": 1.7622872505156067e-06,
      "loss": 0.0068,
      "step": 2520360
    },
    {
      "epoch": 4.124657148655106,
      "grad_norm": 0.34638911485671997,
      "learning_rate": 1.7622213583020894e-06,
      "loss": 0.0102,
      "step": 2520380
    },
    {
      "epoch": 4.12468987909376,
      "grad_norm": 0.5079326629638672,
      "learning_rate": 1.7621554660885726e-06,
      "loss": 0.0105,
      "step": 2520400
    },
    {
      "epoch": 4.124722609532413,
      "grad_norm": 0.16287672519683838,
      "learning_rate": 1.7620895738750553e-06,
      "loss": 0.0075,
      "step": 2520420
    },
    {
      "epoch": 4.124755339971066,
      "grad_norm": 0.2065332680940628,
      "learning_rate": 1.7620236816615383e-06,
      "loss": 0.0052,
      "step": 2520440
    },
    {
      "epoch": 4.1247880704097195,
      "grad_norm": 0.6608378887176514,
      "learning_rate": 1.761957789448021e-06,
      "loss": 0.0088,
      "step": 2520460
    },
    {
      "epoch": 4.124820800848373,
      "grad_norm": 0.20272304117679596,
      "learning_rate": 1.761891897234504e-06,
      "loss": 0.0107,
      "step": 2520480
    },
    {
      "epoch": 4.124853531287027,
      "grad_norm": 0.2046731859445572,
      "learning_rate": 1.7618260050209867e-06,
      "loss": 0.0126,
      "step": 2520500
    },
    {
      "epoch": 4.124886261725679,
      "grad_norm": 0.11030799895524979,
      "learning_rate": 1.7617601128074697e-06,
      "loss": 0.0114,
      "step": 2520520
    },
    {
      "epoch": 4.124918992164333,
      "grad_norm": 0.3611745536327362,
      "learning_rate": 1.7616942205939524e-06,
      "loss": 0.0077,
      "step": 2520540
    },
    {
      "epoch": 4.124951722602987,
      "grad_norm": 0.24273015558719635,
      "learning_rate": 1.7616283283804354e-06,
      "loss": 0.0108,
      "step": 2520560
    },
    {
      "epoch": 4.124984453041639,
      "grad_norm": 0.11147440969944,
      "learning_rate": 1.7615624361669183e-06,
      "loss": 0.0101,
      "step": 2520580
    },
    {
      "epoch": 4.125017183480293,
      "grad_norm": 0.11177405714988708,
      "learning_rate": 1.7614965439534013e-06,
      "loss": 0.0102,
      "step": 2520600
    },
    {
      "epoch": 4.1250499139189465,
      "grad_norm": 0.25489741563796997,
      "learning_rate": 1.761430651739884e-06,
      "loss": 0.0078,
      "step": 2520620
    },
    {
      "epoch": 4.1250826443576,
      "grad_norm": 0.2978282868862152,
      "learning_rate": 1.761364759526367e-06,
      "loss": 0.0098,
      "step": 2520640
    },
    {
      "epoch": 4.125115374796253,
      "grad_norm": 0.1097632572054863,
      "learning_rate": 1.7612988673128497e-06,
      "loss": 0.0072,
      "step": 2520660
    },
    {
      "epoch": 4.125148105234906,
      "grad_norm": 0.5699754953384399,
      "learning_rate": 1.7612329750993327e-06,
      "loss": 0.0101,
      "step": 2520680
    },
    {
      "epoch": 4.12518083567356,
      "grad_norm": 0.5991564989089966,
      "learning_rate": 1.7611670828858154e-06,
      "loss": 0.0093,
      "step": 2520700
    },
    {
      "epoch": 4.125213566112213,
      "grad_norm": 0.6529653668403625,
      "learning_rate": 1.7611011906722984e-06,
      "loss": 0.0102,
      "step": 2520720
    },
    {
      "epoch": 4.125246296550866,
      "grad_norm": 0.33974242210388184,
      "learning_rate": 1.7610352984587813e-06,
      "loss": 0.0108,
      "step": 2520740
    },
    {
      "epoch": 4.12527902698952,
      "grad_norm": 0.3403601050376892,
      "learning_rate": 1.7609694062452643e-06,
      "loss": 0.0095,
      "step": 2520760
    },
    {
      "epoch": 4.1253117574281735,
      "grad_norm": 0.25012338161468506,
      "learning_rate": 1.760903514031747e-06,
      "loss": 0.0047,
      "step": 2520780
    },
    {
      "epoch": 4.125344487866826,
      "grad_norm": 0.24426059424877167,
      "learning_rate": 1.76083762181823e-06,
      "loss": 0.0074,
      "step": 2520800
    },
    {
      "epoch": 4.12537721830548,
      "grad_norm": 0.5531142354011536,
      "learning_rate": 1.7607717296047127e-06,
      "loss": 0.0126,
      "step": 2520820
    },
    {
      "epoch": 4.125409948744133,
      "grad_norm": 0.10987438261508942,
      "learning_rate": 1.7607058373911956e-06,
      "loss": 0.0084,
      "step": 2520840
    },
    {
      "epoch": 4.125442679182786,
      "grad_norm": 0.2202475368976593,
      "learning_rate": 1.7606399451776784e-06,
      "loss": 0.0092,
      "step": 2520860
    },
    {
      "epoch": 4.12547540962144,
      "grad_norm": 0.21842554211616516,
      "learning_rate": 1.7605740529641613e-06,
      "loss": 0.0074,
      "step": 2520880
    },
    {
      "epoch": 4.125508140060093,
      "grad_norm": 0.17557691037654877,
      "learning_rate": 1.760508160750644e-06,
      "loss": 0.0093,
      "step": 2520900
    },
    {
      "epoch": 4.125540870498747,
      "grad_norm": 0.27009376883506775,
      "learning_rate": 1.7604422685371272e-06,
      "loss": 0.0141,
      "step": 2520920
    },
    {
      "epoch": 4.1255736009373996,
      "grad_norm": 0.3627719283103943,
      "learning_rate": 1.7603763763236102e-06,
      "loss": 0.0163,
      "step": 2520940
    },
    {
      "epoch": 4.125606331376053,
      "grad_norm": 0.36681288480758667,
      "learning_rate": 1.760310484110093e-06,
      "loss": 0.0121,
      "step": 2520960
    },
    {
      "epoch": 4.125639061814707,
      "grad_norm": 0.09275560081005096,
      "learning_rate": 1.7602445918965757e-06,
      "loss": 0.01,
      "step": 2520980
    },
    {
      "epoch": 4.1256717922533594,
      "grad_norm": 0.13295209407806396,
      "learning_rate": 1.7601786996830586e-06,
      "loss": 0.0103,
      "step": 2521000
    },
    {
      "epoch": 4.125704522692013,
      "grad_norm": 0.1440664827823639,
      "learning_rate": 1.7601128074695414e-06,
      "loss": 0.0055,
      "step": 2521020
    },
    {
      "epoch": 4.125737253130667,
      "grad_norm": 0.12002016603946686,
      "learning_rate": 1.7600469152560243e-06,
      "loss": 0.0108,
      "step": 2521040
    },
    {
      "epoch": 4.12576998356932,
      "grad_norm": 0.15941688418388367,
      "learning_rate": 1.759981023042507e-06,
      "loss": 0.009,
      "step": 2521060
    },
    {
      "epoch": 4.125802714007973,
      "grad_norm": 0.2996505796909332,
      "learning_rate": 1.75991513082899e-06,
      "loss": 0.0069,
      "step": 2521080
    },
    {
      "epoch": 4.1258354444466265,
      "grad_norm": 0.24387453496456146,
      "learning_rate": 1.7598492386154732e-06,
      "loss": 0.0078,
      "step": 2521100
    },
    {
      "epoch": 4.12586817488528,
      "grad_norm": 0.20964166522026062,
      "learning_rate": 1.759783346401956e-06,
      "loss": 0.0104,
      "step": 2521120
    },
    {
      "epoch": 4.125900905323933,
      "grad_norm": 0.06771013885736465,
      "learning_rate": 1.7597174541884389e-06,
      "loss": 0.0101,
      "step": 2521140
    },
    {
      "epoch": 4.125933635762586,
      "grad_norm": 0.13814722001552582,
      "learning_rate": 1.7596515619749216e-06,
      "loss": 0.0079,
      "step": 2521160
    },
    {
      "epoch": 4.12596636620124,
      "grad_norm": 0.17069076001644135,
      "learning_rate": 1.7595856697614045e-06,
      "loss": 0.0097,
      "step": 2521180
    },
    {
      "epoch": 4.125999096639893,
      "grad_norm": 0.34936073422431946,
      "learning_rate": 1.7595197775478873e-06,
      "loss": 0.0066,
      "step": 2521200
    },
    {
      "epoch": 4.126031827078546,
      "grad_norm": 0.15521308779716492,
      "learning_rate": 1.75945388533437e-06,
      "loss": 0.0081,
      "step": 2521220
    },
    {
      "epoch": 4.1260645575172,
      "grad_norm": 0.4062498211860657,
      "learning_rate": 1.759387993120853e-06,
      "loss": 0.0083,
      "step": 2521240
    },
    {
      "epoch": 4.1260972879558535,
      "grad_norm": 0.1262306272983551,
      "learning_rate": 1.7593221009073357e-06,
      "loss": 0.0121,
      "step": 2521260
    },
    {
      "epoch": 4.126130018394506,
      "grad_norm": 0.14803341031074524,
      "learning_rate": 1.7592562086938189e-06,
      "loss": 0.0071,
      "step": 2521280
    },
    {
      "epoch": 4.12616274883316,
      "grad_norm": 0.07343989610671997,
      "learning_rate": 1.7591903164803018e-06,
      "loss": 0.0112,
      "step": 2521300
    },
    {
      "epoch": 4.126195479271813,
      "grad_norm": 0.22933454811573029,
      "learning_rate": 1.7591244242667846e-06,
      "loss": 0.0093,
      "step": 2521320
    },
    {
      "epoch": 4.126228209710466,
      "grad_norm": 0.34741586446762085,
      "learning_rate": 1.7590585320532675e-06,
      "loss": 0.0081,
      "step": 2521340
    },
    {
      "epoch": 4.12626094014912,
      "grad_norm": 0.36923298239707947,
      "learning_rate": 1.7589926398397503e-06,
      "loss": 0.0077,
      "step": 2521360
    },
    {
      "epoch": 4.126293670587773,
      "grad_norm": 0.2378738820552826,
      "learning_rate": 1.7589267476262332e-06,
      "loss": 0.0108,
      "step": 2521380
    },
    {
      "epoch": 4.126326401026427,
      "grad_norm": 0.14214158058166504,
      "learning_rate": 1.758860855412716e-06,
      "loss": 0.0089,
      "step": 2521400
    },
    {
      "epoch": 4.12635913146508,
      "grad_norm": 0.5219599008560181,
      "learning_rate": 1.7587949631991987e-06,
      "loss": 0.0102,
      "step": 2521420
    },
    {
      "epoch": 4.126391861903733,
      "grad_norm": 0.2638265788555145,
      "learning_rate": 1.7587290709856816e-06,
      "loss": 0.0068,
      "step": 2521440
    },
    {
      "epoch": 4.126424592342387,
      "grad_norm": 0.1483444720506668,
      "learning_rate": 1.7586631787721648e-06,
      "loss": 0.0145,
      "step": 2521460
    },
    {
      "epoch": 4.1264573227810395,
      "grad_norm": 0.2107880413532257,
      "learning_rate": 1.7585972865586475e-06,
      "loss": 0.007,
      "step": 2521480
    },
    {
      "epoch": 4.126490053219693,
      "grad_norm": 0.1923637092113495,
      "learning_rate": 1.7585313943451305e-06,
      "loss": 0.0067,
      "step": 2521500
    },
    {
      "epoch": 4.126522783658347,
      "grad_norm": 0.23852768540382385,
      "learning_rate": 1.7584655021316132e-06,
      "loss": 0.0091,
      "step": 2521520
    },
    {
      "epoch": 4.126555514097,
      "grad_norm": 0.3369525074958801,
      "learning_rate": 1.7583996099180962e-06,
      "loss": 0.0119,
      "step": 2521540
    },
    {
      "epoch": 4.126588244535653,
      "grad_norm": 0.075260229408741,
      "learning_rate": 1.758333717704579e-06,
      "loss": 0.007,
      "step": 2521560
    },
    {
      "epoch": 4.126620974974307,
      "grad_norm": 0.2886284291744232,
      "learning_rate": 1.7582678254910619e-06,
      "loss": 0.0089,
      "step": 2521580
    },
    {
      "epoch": 4.12665370541296,
      "grad_norm": 0.30894702672958374,
      "learning_rate": 1.7582019332775446e-06,
      "loss": 0.0118,
      "step": 2521600
    },
    {
      "epoch": 4.126686435851613,
      "grad_norm": 0.3274219036102295,
      "learning_rate": 1.7581360410640278e-06,
      "loss": 0.0087,
      "step": 2521620
    },
    {
      "epoch": 4.1267191662902665,
      "grad_norm": 0.3195502460002899,
      "learning_rate": 1.7580701488505105e-06,
      "loss": 0.0072,
      "step": 2521640
    },
    {
      "epoch": 4.12675189672892,
      "grad_norm": 0.24025656282901764,
      "learning_rate": 1.7580042566369935e-06,
      "loss": 0.0101,
      "step": 2521660
    },
    {
      "epoch": 4.126784627167574,
      "grad_norm": 0.10742450505495071,
      "learning_rate": 1.7579383644234762e-06,
      "loss": 0.008,
      "step": 2521680
    },
    {
      "epoch": 4.126817357606226,
      "grad_norm": 0.2935532033443451,
      "learning_rate": 1.7578724722099592e-06,
      "loss": 0.0095,
      "step": 2521700
    },
    {
      "epoch": 4.12685008804488,
      "grad_norm": 0.29968753457069397,
      "learning_rate": 1.757806579996442e-06,
      "loss": 0.0076,
      "step": 2521720
    },
    {
      "epoch": 4.1268828184835336,
      "grad_norm": 0.0588383749127388,
      "learning_rate": 1.7577406877829249e-06,
      "loss": 0.0075,
      "step": 2521740
    },
    {
      "epoch": 4.126915548922186,
      "grad_norm": 0.4730244576931,
      "learning_rate": 1.7576747955694076e-06,
      "loss": 0.0112,
      "step": 2521760
    },
    {
      "epoch": 4.12694827936084,
      "grad_norm": 0.2927992045879364,
      "learning_rate": 1.7576089033558905e-06,
      "loss": 0.0114,
      "step": 2521780
    },
    {
      "epoch": 4.1269810097994934,
      "grad_norm": 0.9665529727935791,
      "learning_rate": 1.7575430111423735e-06,
      "loss": 0.0083,
      "step": 2521800
    },
    {
      "epoch": 4.127013740238147,
      "grad_norm": 0.1704109013080597,
      "learning_rate": 1.7574771189288564e-06,
      "loss": 0.0093,
      "step": 2521820
    },
    {
      "epoch": 4.1270464706768,
      "grad_norm": 0.09042907506227493,
      "learning_rate": 1.7574112267153392e-06,
      "loss": 0.0109,
      "step": 2521840
    },
    {
      "epoch": 4.127079201115453,
      "grad_norm": 0.3458721339702606,
      "learning_rate": 1.7573453345018221e-06,
      "loss": 0.0071,
      "step": 2521860
    },
    {
      "epoch": 4.127111931554107,
      "grad_norm": 0.4918555021286011,
      "learning_rate": 1.7572794422883049e-06,
      "loss": 0.0104,
      "step": 2521880
    },
    {
      "epoch": 4.12714466199276,
      "grad_norm": 0.1516096442937851,
      "learning_rate": 1.7572135500747878e-06,
      "loss": 0.0063,
      "step": 2521900
    },
    {
      "epoch": 4.127177392431413,
      "grad_norm": 0.12897875905036926,
      "learning_rate": 1.7571476578612706e-06,
      "loss": 0.0083,
      "step": 2521920
    },
    {
      "epoch": 4.127210122870067,
      "grad_norm": 0.5426676869392395,
      "learning_rate": 1.7570817656477535e-06,
      "loss": 0.0078,
      "step": 2521940
    },
    {
      "epoch": 4.12724285330872,
      "grad_norm": 0.047037724405527115,
      "learning_rate": 1.7570158734342363e-06,
      "loss": 0.0123,
      "step": 2521960
    },
    {
      "epoch": 4.127275583747373,
      "grad_norm": 0.1213529035449028,
      "learning_rate": 1.7569499812207194e-06,
      "loss": 0.0078,
      "step": 2521980
    },
    {
      "epoch": 4.127308314186027,
      "grad_norm": 0.05454562231898308,
      "learning_rate": 1.7568840890072022e-06,
      "loss": 0.0056,
      "step": 2522000
    },
    {
      "epoch": 4.12734104462468,
      "grad_norm": 0.22730380296707153,
      "learning_rate": 1.7568181967936851e-06,
      "loss": 0.0092,
      "step": 2522020
    },
    {
      "epoch": 4.127373775063333,
      "grad_norm": 0.18260130286216736,
      "learning_rate": 1.7567523045801679e-06,
      "loss": 0.0114,
      "step": 2522040
    },
    {
      "epoch": 4.127406505501987,
      "grad_norm": 0.08379184454679489,
      "learning_rate": 1.7566864123666508e-06,
      "loss": 0.008,
      "step": 2522060
    },
    {
      "epoch": 4.12743923594064,
      "grad_norm": 0.11929155141115189,
      "learning_rate": 1.7566205201531335e-06,
      "loss": 0.0053,
      "step": 2522080
    },
    {
      "epoch": 4.127471966379294,
      "grad_norm": 0.27490079402923584,
      "learning_rate": 1.7565546279396165e-06,
      "loss": 0.0113,
      "step": 2522100
    },
    {
      "epoch": 4.1275046968179465,
      "grad_norm": 0.15108558535575867,
      "learning_rate": 1.7564887357260992e-06,
      "loss": 0.0104,
      "step": 2522120
    },
    {
      "epoch": 4.1275374272566,
      "grad_norm": 0.05050918832421303,
      "learning_rate": 1.7564228435125822e-06,
      "loss": 0.0059,
      "step": 2522140
    },
    {
      "epoch": 4.127570157695254,
      "grad_norm": 0.280975878238678,
      "learning_rate": 1.7563569512990654e-06,
      "loss": 0.013,
      "step": 2522160
    },
    {
      "epoch": 4.127602888133906,
      "grad_norm": 0.288931280374527,
      "learning_rate": 1.756291059085548e-06,
      "loss": 0.009,
      "step": 2522180
    },
    {
      "epoch": 4.12763561857256,
      "grad_norm": 0.2255222201347351,
      "learning_rate": 1.7562251668720308e-06,
      "loss": 0.0085,
      "step": 2522200
    },
    {
      "epoch": 4.127668349011214,
      "grad_norm": 0.5732218623161316,
      "learning_rate": 1.7561592746585138e-06,
      "loss": 0.0116,
      "step": 2522220
    },
    {
      "epoch": 4.127701079449867,
      "grad_norm": 0.16055512428283691,
      "learning_rate": 1.7560933824449965e-06,
      "loss": 0.0109,
      "step": 2522240
    },
    {
      "epoch": 4.12773380988852,
      "grad_norm": 0.17305149137973785,
      "learning_rate": 1.7560274902314795e-06,
      "loss": 0.0078,
      "step": 2522260
    },
    {
      "epoch": 4.1277665403271735,
      "grad_norm": 0.08653876930475235,
      "learning_rate": 1.7559615980179622e-06,
      "loss": 0.0099,
      "step": 2522280
    },
    {
      "epoch": 4.127799270765827,
      "grad_norm": 0.37567979097366333,
      "learning_rate": 1.7558957058044452e-06,
      "loss": 0.0117,
      "step": 2522300
    },
    {
      "epoch": 4.12783200120448,
      "grad_norm": 0.30162858963012695,
      "learning_rate": 1.755829813590928e-06,
      "loss": 0.0073,
      "step": 2522320
    },
    {
      "epoch": 4.127864731643133,
      "grad_norm": 0.38650596141815186,
      "learning_rate": 1.755763921377411e-06,
      "loss": 0.0118,
      "step": 2522340
    },
    {
      "epoch": 4.127897462081787,
      "grad_norm": 0.1646844893693924,
      "learning_rate": 1.755698029163894e-06,
      "loss": 0.0078,
      "step": 2522360
    },
    {
      "epoch": 4.127930192520441,
      "grad_norm": 0.23922663927078247,
      "learning_rate": 1.7556321369503768e-06,
      "loss": 0.0081,
      "step": 2522380
    },
    {
      "epoch": 4.127962922959093,
      "grad_norm": 0.2804501950740814,
      "learning_rate": 1.7555662447368595e-06,
      "loss": 0.0069,
      "step": 2522400
    },
    {
      "epoch": 4.127995653397747,
      "grad_norm": 0.0965801328420639,
      "learning_rate": 1.7555003525233425e-06,
      "loss": 0.0099,
      "step": 2522420
    },
    {
      "epoch": 4.1280283838364005,
      "grad_norm": 0.327266663312912,
      "learning_rate": 1.7554344603098252e-06,
      "loss": 0.0122,
      "step": 2522440
    },
    {
      "epoch": 4.128061114275053,
      "grad_norm": 0.42917144298553467,
      "learning_rate": 1.7553685680963081e-06,
      "loss": 0.0083,
      "step": 2522460
    },
    {
      "epoch": 4.128093844713707,
      "grad_norm": 0.05348379164934158,
      "learning_rate": 1.7553026758827909e-06,
      "loss": 0.0088,
      "step": 2522480
    },
    {
      "epoch": 4.12812657515236,
      "grad_norm": 0.24960638582706451,
      "learning_rate": 1.755236783669274e-06,
      "loss": 0.0146,
      "step": 2522500
    },
    {
      "epoch": 4.128159305591014,
      "grad_norm": 0.07323592156171799,
      "learning_rate": 1.755170891455757e-06,
      "loss": 0.0074,
      "step": 2522520
    },
    {
      "epoch": 4.128192036029667,
      "grad_norm": 0.19710269570350647,
      "learning_rate": 1.7551049992422397e-06,
      "loss": 0.0106,
      "step": 2522540
    },
    {
      "epoch": 4.12822476646832,
      "grad_norm": 0.2529127597808838,
      "learning_rate": 1.7550391070287227e-06,
      "loss": 0.0084,
      "step": 2522560
    },
    {
      "epoch": 4.128257496906974,
      "grad_norm": 0.04461219534277916,
      "learning_rate": 1.7549732148152054e-06,
      "loss": 0.0049,
      "step": 2522580
    },
    {
      "epoch": 4.128290227345627,
      "grad_norm": 0.22175438702106476,
      "learning_rate": 1.7549073226016884e-06,
      "loss": 0.0075,
      "step": 2522600
    },
    {
      "epoch": 4.12832295778428,
      "grad_norm": 0.1440257728099823,
      "learning_rate": 1.7548414303881711e-06,
      "loss": 0.0083,
      "step": 2522620
    },
    {
      "epoch": 4.128355688222934,
      "grad_norm": 0.6936763525009155,
      "learning_rate": 1.7547755381746539e-06,
      "loss": 0.0087,
      "step": 2522640
    },
    {
      "epoch": 4.1283884186615865,
      "grad_norm": 0.14205734431743622,
      "learning_rate": 1.7547096459611368e-06,
      "loss": 0.0079,
      "step": 2522660
    },
    {
      "epoch": 4.12842114910024,
      "grad_norm": 0.08109258860349655,
      "learning_rate": 1.75464375374762e-06,
      "loss": 0.0116,
      "step": 2522680
    },
    {
      "epoch": 4.128453879538894,
      "grad_norm": 0.28834572434425354,
      "learning_rate": 1.7545778615341027e-06,
      "loss": 0.0066,
      "step": 2522700
    },
    {
      "epoch": 4.128486609977547,
      "grad_norm": 0.0859130471944809,
      "learning_rate": 1.7545119693205857e-06,
      "loss": 0.01,
      "step": 2522720
    },
    {
      "epoch": 4.1285193404162,
      "grad_norm": 0.3833884596824646,
      "learning_rate": 1.7544460771070684e-06,
      "loss": 0.0079,
      "step": 2522740
    },
    {
      "epoch": 4.1285520708548535,
      "grad_norm": 0.41394954919815063,
      "learning_rate": 1.7543801848935514e-06,
      "loss": 0.0105,
      "step": 2522760
    },
    {
      "epoch": 4.128584801293507,
      "grad_norm": 0.2495218962430954,
      "learning_rate": 1.754314292680034e-06,
      "loss": 0.0108,
      "step": 2522780
    },
    {
      "epoch": 4.128617531732161,
      "grad_norm": 0.33653524518013,
      "learning_rate": 1.754248400466517e-06,
      "loss": 0.0073,
      "step": 2522800
    },
    {
      "epoch": 4.128650262170813,
      "grad_norm": 0.33303168416023254,
      "learning_rate": 1.7541825082529998e-06,
      "loss": 0.0087,
      "step": 2522820
    },
    {
      "epoch": 4.128682992609467,
      "grad_norm": 0.21230794489383698,
      "learning_rate": 1.7541166160394825e-06,
      "loss": 0.0086,
      "step": 2522840
    },
    {
      "epoch": 4.128715723048121,
      "grad_norm": 0.15407125651836395,
      "learning_rate": 1.7540507238259657e-06,
      "loss": 0.0098,
      "step": 2522860
    },
    {
      "epoch": 4.128748453486773,
      "grad_norm": 0.14542578160762787,
      "learning_rate": 1.7539848316124486e-06,
      "loss": 0.0092,
      "step": 2522880
    },
    {
      "epoch": 4.128781183925427,
      "grad_norm": 0.031289517879486084,
      "learning_rate": 1.7539189393989314e-06,
      "loss": 0.0133,
      "step": 2522900
    },
    {
      "epoch": 4.1288139143640805,
      "grad_norm": 0.4854905307292938,
      "learning_rate": 1.7538530471854143e-06,
      "loss": 0.0091,
      "step": 2522920
    },
    {
      "epoch": 4.128846644802733,
      "grad_norm": 0.28606027364730835,
      "learning_rate": 1.753787154971897e-06,
      "loss": 0.0127,
      "step": 2522940
    },
    {
      "epoch": 4.128879375241387,
      "grad_norm": 0.1447579562664032,
      "learning_rate": 1.75372126275838e-06,
      "loss": 0.0087,
      "step": 2522960
    },
    {
      "epoch": 4.12891210568004,
      "grad_norm": 0.18941593170166016,
      "learning_rate": 1.7536553705448628e-06,
      "loss": 0.0159,
      "step": 2522980
    },
    {
      "epoch": 4.128944836118694,
      "grad_norm": 0.16632302105426788,
      "learning_rate": 1.7535894783313457e-06,
      "loss": 0.0106,
      "step": 2523000
    },
    {
      "epoch": 4.128977566557347,
      "grad_norm": 0.33761653304100037,
      "learning_rate": 1.7535235861178285e-06,
      "loss": 0.0124,
      "step": 2523020
    },
    {
      "epoch": 4.129010296996,
      "grad_norm": 0.4752020537853241,
      "learning_rate": 1.7534576939043116e-06,
      "loss": 0.0119,
      "step": 2523040
    },
    {
      "epoch": 4.129043027434654,
      "grad_norm": 0.32472312450408936,
      "learning_rate": 1.7533918016907944e-06,
      "loss": 0.0094,
      "step": 2523060
    },
    {
      "epoch": 4.129075757873307,
      "grad_norm": 0.18370600044727325,
      "learning_rate": 1.7533259094772773e-06,
      "loss": 0.0095,
      "step": 2523080
    },
    {
      "epoch": 4.12910848831196,
      "grad_norm": 0.42348751425743103,
      "learning_rate": 1.75326001726376e-06,
      "loss": 0.0074,
      "step": 2523100
    },
    {
      "epoch": 4.129141218750614,
      "grad_norm": 0.17926684021949768,
      "learning_rate": 1.753194125050243e-06,
      "loss": 0.0093,
      "step": 2523120
    },
    {
      "epoch": 4.129173949189267,
      "grad_norm": 0.35075393319129944,
      "learning_rate": 1.7531282328367257e-06,
      "loss": 0.0108,
      "step": 2523140
    },
    {
      "epoch": 4.12920667962792,
      "grad_norm": 0.3469144105911255,
      "learning_rate": 1.7530623406232087e-06,
      "loss": 0.0134,
      "step": 2523160
    },
    {
      "epoch": 4.129239410066574,
      "grad_norm": 0.344546914100647,
      "learning_rate": 1.7529964484096914e-06,
      "loss": 0.0081,
      "step": 2523180
    },
    {
      "epoch": 4.129272140505227,
      "grad_norm": 0.3343513309955597,
      "learning_rate": 1.7529305561961744e-06,
      "loss": 0.0105,
      "step": 2523200
    },
    {
      "epoch": 4.12930487094388,
      "grad_norm": 0.48135554790496826,
      "learning_rate": 1.7528646639826573e-06,
      "loss": 0.0124,
      "step": 2523220
    },
    {
      "epoch": 4.129337601382534,
      "grad_norm": 0.05923345312476158,
      "learning_rate": 1.7527987717691403e-06,
      "loss": 0.0104,
      "step": 2523240
    },
    {
      "epoch": 4.129370331821187,
      "grad_norm": 0.5706438422203064,
      "learning_rate": 1.752732879555623e-06,
      "loss": 0.0098,
      "step": 2523260
    },
    {
      "epoch": 4.129403062259841,
      "grad_norm": 0.6007322072982788,
      "learning_rate": 1.752666987342106e-06,
      "loss": 0.0077,
      "step": 2523280
    },
    {
      "epoch": 4.1294357926984935,
      "grad_norm": 0.33341366052627563,
      "learning_rate": 1.7526010951285887e-06,
      "loss": 0.0089,
      "step": 2523300
    },
    {
      "epoch": 4.129468523137147,
      "grad_norm": 0.3016536831855774,
      "learning_rate": 1.7525352029150717e-06,
      "loss": 0.0087,
      "step": 2523320
    },
    {
      "epoch": 4.129501253575801,
      "grad_norm": 0.1619395911693573,
      "learning_rate": 1.7524693107015544e-06,
      "loss": 0.0114,
      "step": 2523340
    },
    {
      "epoch": 4.129533984014453,
      "grad_norm": 0.41196584701538086,
      "learning_rate": 1.7524034184880374e-06,
      "loss": 0.009,
      "step": 2523360
    },
    {
      "epoch": 4.129566714453107,
      "grad_norm": 0.25374847650527954,
      "learning_rate": 1.7523375262745203e-06,
      "loss": 0.0085,
      "step": 2523380
    },
    {
      "epoch": 4.129599444891761,
      "grad_norm": 0.2570624053478241,
      "learning_rate": 1.7522716340610033e-06,
      "loss": 0.0109,
      "step": 2523400
    },
    {
      "epoch": 4.129632175330414,
      "grad_norm": 0.366585910320282,
      "learning_rate": 1.752205741847486e-06,
      "loss": 0.0066,
      "step": 2523420
    },
    {
      "epoch": 4.129664905769067,
      "grad_norm": 0.6258923411369324,
      "learning_rate": 1.752139849633969e-06,
      "loss": 0.0065,
      "step": 2523440
    },
    {
      "epoch": 4.1296976362077205,
      "grad_norm": 0.35767462849617004,
      "learning_rate": 1.7520739574204517e-06,
      "loss": 0.0084,
      "step": 2523460
    },
    {
      "epoch": 4.129730366646374,
      "grad_norm": 0.2105429619550705,
      "learning_rate": 1.7520080652069346e-06,
      "loss": 0.0113,
      "step": 2523480
    },
    {
      "epoch": 4.129763097085027,
      "grad_norm": 0.3330303430557251,
      "learning_rate": 1.7519421729934174e-06,
      "loss": 0.0124,
      "step": 2523500
    },
    {
      "epoch": 4.12979582752368,
      "grad_norm": 0.11640841513872147,
      "learning_rate": 1.7518762807799003e-06,
      "loss": 0.0096,
      "step": 2523520
    },
    {
      "epoch": 4.129828557962334,
      "grad_norm": 0.21863754093647003,
      "learning_rate": 1.751810388566383e-06,
      "loss": 0.0058,
      "step": 2523540
    },
    {
      "epoch": 4.1298612884009875,
      "grad_norm": 0.053365450352430344,
      "learning_rate": 1.7517444963528662e-06,
      "loss": 0.0098,
      "step": 2523560
    },
    {
      "epoch": 4.12989401883964,
      "grad_norm": 0.10858410596847534,
      "learning_rate": 1.7516786041393492e-06,
      "loss": 0.0086,
      "step": 2523580
    },
    {
      "epoch": 4.129926749278294,
      "grad_norm": 0.09174346178770065,
      "learning_rate": 1.751612711925832e-06,
      "loss": 0.0068,
      "step": 2523600
    },
    {
      "epoch": 4.129959479716947,
      "grad_norm": 0.08097517490386963,
      "learning_rate": 1.7515468197123147e-06,
      "loss": 0.0108,
      "step": 2523620
    },
    {
      "epoch": 4.1299922101556,
      "grad_norm": 0.03761669248342514,
      "learning_rate": 1.7514809274987976e-06,
      "loss": 0.0069,
      "step": 2523640
    },
    {
      "epoch": 4.130024940594254,
      "grad_norm": 0.2129969298839569,
      "learning_rate": 1.7514150352852804e-06,
      "loss": 0.0061,
      "step": 2523660
    },
    {
      "epoch": 4.130057671032907,
      "grad_norm": 0.1543034166097641,
      "learning_rate": 1.7513491430717633e-06,
      "loss": 0.012,
      "step": 2523680
    },
    {
      "epoch": 4.130090401471561,
      "grad_norm": 0.14172916114330292,
      "learning_rate": 1.751283250858246e-06,
      "loss": 0.0058,
      "step": 2523700
    },
    {
      "epoch": 4.130123131910214,
      "grad_norm": 0.6962503790855408,
      "learning_rate": 1.751217358644729e-06,
      "loss": 0.0108,
      "step": 2523720
    },
    {
      "epoch": 4.130155862348867,
      "grad_norm": 0.2689257264137268,
      "learning_rate": 1.7511514664312122e-06,
      "loss": 0.0112,
      "step": 2523740
    },
    {
      "epoch": 4.130188592787521,
      "grad_norm": 0.17804978787899017,
      "learning_rate": 1.751085574217695e-06,
      "loss": 0.0101,
      "step": 2523760
    },
    {
      "epoch": 4.1302213232261735,
      "grad_norm": 0.370106965303421,
      "learning_rate": 1.7510196820041779e-06,
      "loss": 0.0083,
      "step": 2523780
    },
    {
      "epoch": 4.130254053664827,
      "grad_norm": 0.3986801207065582,
      "learning_rate": 1.7509537897906606e-06,
      "loss": 0.0078,
      "step": 2523800
    },
    {
      "epoch": 4.130286784103481,
      "grad_norm": 0.08951995521783829,
      "learning_rate": 1.7508878975771436e-06,
      "loss": 0.0077,
      "step": 2523820
    },
    {
      "epoch": 4.130319514542134,
      "grad_norm": 0.1369733363389969,
      "learning_rate": 1.7508220053636263e-06,
      "loss": 0.0131,
      "step": 2523840
    },
    {
      "epoch": 4.130352244980787,
      "grad_norm": 0.3367284834384918,
      "learning_rate": 1.750756113150109e-06,
      "loss": 0.0077,
      "step": 2523860
    },
    {
      "epoch": 4.130384975419441,
      "grad_norm": 0.13137568533420563,
      "learning_rate": 1.750690220936592e-06,
      "loss": 0.0111,
      "step": 2523880
    },
    {
      "epoch": 4.130417705858094,
      "grad_norm": 0.06249193847179413,
      "learning_rate": 1.7506243287230747e-06,
      "loss": 0.008,
      "step": 2523900
    },
    {
      "epoch": 4.130450436296747,
      "grad_norm": 0.11941629648208618,
      "learning_rate": 1.7505584365095579e-06,
      "loss": 0.0081,
      "step": 2523920
    },
    {
      "epoch": 4.1304831667354005,
      "grad_norm": 0.10465756803750992,
      "learning_rate": 1.7504925442960408e-06,
      "loss": 0.0064,
      "step": 2523940
    },
    {
      "epoch": 4.130515897174054,
      "grad_norm": 0.3414759933948517,
      "learning_rate": 1.7504266520825236e-06,
      "loss": 0.007,
      "step": 2523960
    },
    {
      "epoch": 4.130548627612708,
      "grad_norm": 0.09515506774187088,
      "learning_rate": 1.7503607598690065e-06,
      "loss": 0.0102,
      "step": 2523980
    },
    {
      "epoch": 4.13058135805136,
      "grad_norm": 0.7105404734611511,
      "learning_rate": 1.7502948676554893e-06,
      "loss": 0.0082,
      "step": 2524000
    },
    {
      "epoch": 4.130614088490014,
      "grad_norm": 0.5929921865463257,
      "learning_rate": 1.7502289754419722e-06,
      "loss": 0.0096,
      "step": 2524020
    },
    {
      "epoch": 4.130646818928668,
      "grad_norm": 0.12585996091365814,
      "learning_rate": 1.750163083228455e-06,
      "loss": 0.0118,
      "step": 2524040
    },
    {
      "epoch": 4.13067954936732,
      "grad_norm": 0.5485764145851135,
      "learning_rate": 1.7500971910149377e-06,
      "loss": 0.0131,
      "step": 2524060
    },
    {
      "epoch": 4.130712279805974,
      "grad_norm": 0.35980990529060364,
      "learning_rate": 1.7500312988014207e-06,
      "loss": 0.0099,
      "step": 2524080
    },
    {
      "epoch": 4.1307450102446275,
      "grad_norm": 0.3295248746871948,
      "learning_rate": 1.7499654065879038e-06,
      "loss": 0.0061,
      "step": 2524100
    },
    {
      "epoch": 4.13077774068328,
      "grad_norm": 0.30661463737487793,
      "learning_rate": 1.7498995143743866e-06,
      "loss": 0.0059,
      "step": 2524120
    },
    {
      "epoch": 4.130810471121934,
      "grad_norm": 0.1855730265378952,
      "learning_rate": 1.7498336221608695e-06,
      "loss": 0.0076,
      "step": 2524140
    },
    {
      "epoch": 4.130843201560587,
      "grad_norm": 0.22645056247711182,
      "learning_rate": 1.7497677299473522e-06,
      "loss": 0.0057,
      "step": 2524160
    },
    {
      "epoch": 4.130875931999241,
      "grad_norm": 0.1426428258419037,
      "learning_rate": 1.7497018377338352e-06,
      "loss": 0.0084,
      "step": 2524180
    },
    {
      "epoch": 4.130908662437894,
      "grad_norm": 0.459915429353714,
      "learning_rate": 1.749635945520318e-06,
      "loss": 0.0085,
      "step": 2524200
    },
    {
      "epoch": 4.130941392876547,
      "grad_norm": 0.1494460254907608,
      "learning_rate": 1.7495700533068009e-06,
      "loss": 0.0135,
      "step": 2524220
    },
    {
      "epoch": 4.130974123315201,
      "grad_norm": 0.28998419642448425,
      "learning_rate": 1.7495041610932836e-06,
      "loss": 0.0077,
      "step": 2524240
    },
    {
      "epoch": 4.1310068537538545,
      "grad_norm": 0.3796786367893219,
      "learning_rate": 1.7494382688797668e-06,
      "loss": 0.0124,
      "step": 2524260
    },
    {
      "epoch": 4.131039584192507,
      "grad_norm": 0.42600294947624207,
      "learning_rate": 1.7493723766662495e-06,
      "loss": 0.0082,
      "step": 2524280
    },
    {
      "epoch": 4.131072314631161,
      "grad_norm": 0.17761678993701935,
      "learning_rate": 1.7493064844527325e-06,
      "loss": 0.0067,
      "step": 2524300
    },
    {
      "epoch": 4.131105045069814,
      "grad_norm": 0.10555288940668106,
      "learning_rate": 1.7492405922392152e-06,
      "loss": 0.0071,
      "step": 2524320
    },
    {
      "epoch": 4.131137775508467,
      "grad_norm": 1.4089494943618774,
      "learning_rate": 1.7491747000256982e-06,
      "loss": 0.0113,
      "step": 2524340
    },
    {
      "epoch": 4.131170505947121,
      "grad_norm": 0.29464593529701233,
      "learning_rate": 1.749108807812181e-06,
      "loss": 0.0086,
      "step": 2524360
    },
    {
      "epoch": 4.131203236385774,
      "grad_norm": 0.4525447487831116,
      "learning_rate": 1.7490429155986639e-06,
      "loss": 0.0094,
      "step": 2524380
    },
    {
      "epoch": 4.131235966824427,
      "grad_norm": 0.03946399316191673,
      "learning_rate": 1.7489770233851466e-06,
      "loss": 0.0091,
      "step": 2524400
    },
    {
      "epoch": 4.1312686972630805,
      "grad_norm": 0.25034379959106445,
      "learning_rate": 1.7489111311716296e-06,
      "loss": 0.0105,
      "step": 2524420
    },
    {
      "epoch": 4.131301427701734,
      "grad_norm": 0.48982828855514526,
      "learning_rate": 1.7488452389581125e-06,
      "loss": 0.0123,
      "step": 2524440
    },
    {
      "epoch": 4.131334158140388,
      "grad_norm": 0.2925196588039398,
      "learning_rate": 1.7487793467445955e-06,
      "loss": 0.0128,
      "step": 2524460
    },
    {
      "epoch": 4.13136688857904,
      "grad_norm": 0.1056746393442154,
      "learning_rate": 1.7487134545310782e-06,
      "loss": 0.0095,
      "step": 2524480
    },
    {
      "epoch": 4.131399619017694,
      "grad_norm": 0.6313177347183228,
      "learning_rate": 1.7486475623175612e-06,
      "loss": 0.0132,
      "step": 2524500
    },
    {
      "epoch": 4.131432349456348,
      "grad_norm": 0.1633373647928238,
      "learning_rate": 1.7485816701040439e-06,
      "loss": 0.0067,
      "step": 2524520
    },
    {
      "epoch": 4.131465079895,
      "grad_norm": 0.15277014672756195,
      "learning_rate": 1.7485157778905268e-06,
      "loss": 0.0098,
      "step": 2524540
    },
    {
      "epoch": 4.131497810333654,
      "grad_norm": 0.11020223051309586,
      "learning_rate": 1.7484498856770096e-06,
      "loss": 0.0069,
      "step": 2524560
    },
    {
      "epoch": 4.1315305407723075,
      "grad_norm": 0.16935637593269348,
      "learning_rate": 1.7483839934634925e-06,
      "loss": 0.012,
      "step": 2524580
    },
    {
      "epoch": 4.131563271210961,
      "grad_norm": 0.2293582409620285,
      "learning_rate": 1.7483181012499753e-06,
      "loss": 0.0069,
      "step": 2524600
    },
    {
      "epoch": 4.131596001649614,
      "grad_norm": 0.17663420736789703,
      "learning_rate": 1.7482522090364584e-06,
      "loss": 0.009,
      "step": 2524620
    },
    {
      "epoch": 4.131628732088267,
      "grad_norm": 0.11506682634353638,
      "learning_rate": 1.7481863168229412e-06,
      "loss": 0.0086,
      "step": 2524640
    },
    {
      "epoch": 4.131661462526921,
      "grad_norm": 0.3374611437320709,
      "learning_rate": 1.7481204246094241e-06,
      "loss": 0.012,
      "step": 2524660
    },
    {
      "epoch": 4.131694192965574,
      "grad_norm": 0.3975512385368347,
      "learning_rate": 1.7480545323959069e-06,
      "loss": 0.009,
      "step": 2524680
    },
    {
      "epoch": 4.131726923404227,
      "grad_norm": 0.15042544901371002,
      "learning_rate": 1.7479886401823898e-06,
      "loss": 0.0089,
      "step": 2524700
    },
    {
      "epoch": 4.131759653842881,
      "grad_norm": 0.17852583527565002,
      "learning_rate": 1.7479227479688726e-06,
      "loss": 0.0087,
      "step": 2524720
    },
    {
      "epoch": 4.1317923842815345,
      "grad_norm": 0.418514609336853,
      "learning_rate": 1.7478568557553555e-06,
      "loss": 0.01,
      "step": 2524740
    },
    {
      "epoch": 4.131825114720187,
      "grad_norm": 0.14762412011623383,
      "learning_rate": 1.7477909635418382e-06,
      "loss": 0.0156,
      "step": 2524760
    },
    {
      "epoch": 4.131857845158841,
      "grad_norm": 0.15851768851280212,
      "learning_rate": 1.7477250713283212e-06,
      "loss": 0.0083,
      "step": 2524780
    },
    {
      "epoch": 4.131890575597494,
      "grad_norm": 0.22927585244178772,
      "learning_rate": 1.7476591791148044e-06,
      "loss": 0.0131,
      "step": 2524800
    },
    {
      "epoch": 4.131923306036147,
      "grad_norm": 0.5409616231918335,
      "learning_rate": 1.747593286901287e-06,
      "loss": 0.0088,
      "step": 2524820
    },
    {
      "epoch": 4.131956036474801,
      "grad_norm": 0.33543381094932556,
      "learning_rate": 1.7475273946877698e-06,
      "loss": 0.0089,
      "step": 2524840
    },
    {
      "epoch": 4.131988766913454,
      "grad_norm": 0.3073705732822418,
      "learning_rate": 1.7474615024742528e-06,
      "loss": 0.0092,
      "step": 2524860
    },
    {
      "epoch": 4.132021497352108,
      "grad_norm": 0.4704696536064148,
      "learning_rate": 1.7473956102607355e-06,
      "loss": 0.0099,
      "step": 2524880
    },
    {
      "epoch": 4.132054227790761,
      "grad_norm": 0.3111906945705414,
      "learning_rate": 1.7473297180472185e-06,
      "loss": 0.0115,
      "step": 2524900
    },
    {
      "epoch": 4.132086958229414,
      "grad_norm": 0.17991095781326294,
      "learning_rate": 1.7472638258337012e-06,
      "loss": 0.0079,
      "step": 2524920
    },
    {
      "epoch": 4.132119688668068,
      "grad_norm": 0.40064528584480286,
      "learning_rate": 1.7471979336201842e-06,
      "loss": 0.0093,
      "step": 2524940
    },
    {
      "epoch": 4.1321524191067205,
      "grad_norm": 0.1904618889093399,
      "learning_rate": 1.747132041406667e-06,
      "loss": 0.0107,
      "step": 2524960
    },
    {
      "epoch": 4.132185149545374,
      "grad_norm": 0.36295926570892334,
      "learning_rate": 1.74706614919315e-06,
      "loss": 0.0077,
      "step": 2524980
    },
    {
      "epoch": 4.132217879984028,
      "grad_norm": 0.36913159489631653,
      "learning_rate": 1.747000256979633e-06,
      "loss": 0.0092,
      "step": 2525000
    },
    {
      "epoch": 4.132250610422681,
      "grad_norm": 0.38039541244506836,
      "learning_rate": 1.7469343647661158e-06,
      "loss": 0.0055,
      "step": 2525020
    },
    {
      "epoch": 4.132283340861334,
      "grad_norm": 0.15388567745685577,
      "learning_rate": 1.7468684725525985e-06,
      "loss": 0.0094,
      "step": 2525040
    },
    {
      "epoch": 4.132316071299988,
      "grad_norm": 0.3597903847694397,
      "learning_rate": 1.7468025803390815e-06,
      "loss": 0.0107,
      "step": 2525060
    },
    {
      "epoch": 4.132348801738641,
      "grad_norm": 0.24078737199306488,
      "learning_rate": 1.7467366881255642e-06,
      "loss": 0.0123,
      "step": 2525080
    },
    {
      "epoch": 4.132381532177294,
      "grad_norm": 0.2506161034107208,
      "learning_rate": 1.7466707959120472e-06,
      "loss": 0.01,
      "step": 2525100
    },
    {
      "epoch": 4.1324142626159475,
      "grad_norm": 0.27808669209480286,
      "learning_rate": 1.74660490369853e-06,
      "loss": 0.0141,
      "step": 2525120
    },
    {
      "epoch": 4.132446993054601,
      "grad_norm": 0.25317028164863586,
      "learning_rate": 1.746539011485013e-06,
      "loss": 0.0126,
      "step": 2525140
    },
    {
      "epoch": 4.132479723493255,
      "grad_norm": 0.18295741081237793,
      "learning_rate": 1.746473119271496e-06,
      "loss": 0.0064,
      "step": 2525160
    },
    {
      "epoch": 4.132512453931907,
      "grad_norm": 0.14725139737129211,
      "learning_rate": 1.7464072270579787e-06,
      "loss": 0.0063,
      "step": 2525180
    },
    {
      "epoch": 4.132545184370561,
      "grad_norm": 0.14853021502494812,
      "learning_rate": 1.7463413348444617e-06,
      "loss": 0.0085,
      "step": 2525200
    },
    {
      "epoch": 4.1325779148092145,
      "grad_norm": 0.17821760475635529,
      "learning_rate": 1.7462754426309444e-06,
      "loss": 0.0062,
      "step": 2525220
    },
    {
      "epoch": 4.132610645247867,
      "grad_norm": 0.20112666487693787,
      "learning_rate": 1.7462095504174274e-06,
      "loss": 0.0106,
      "step": 2525240
    },
    {
      "epoch": 4.132643375686521,
      "grad_norm": 0.2574687898159027,
      "learning_rate": 1.7461436582039101e-06,
      "loss": 0.0085,
      "step": 2525260
    },
    {
      "epoch": 4.132676106125174,
      "grad_norm": 0.2618907392024994,
      "learning_rate": 1.7460777659903929e-06,
      "loss": 0.0093,
      "step": 2525280
    },
    {
      "epoch": 4.132708836563828,
      "grad_norm": 0.07368289679288864,
      "learning_rate": 1.7460118737768758e-06,
      "loss": 0.005,
      "step": 2525300
    },
    {
      "epoch": 4.132741567002481,
      "grad_norm": 0.19317027926445007,
      "learning_rate": 1.745945981563359e-06,
      "loss": 0.008,
      "step": 2525320
    },
    {
      "epoch": 4.132774297441134,
      "grad_norm": 0.39094218611717224,
      "learning_rate": 1.7458800893498417e-06,
      "loss": 0.0104,
      "step": 2525340
    },
    {
      "epoch": 4.132807027879788,
      "grad_norm": 0.42765364050865173,
      "learning_rate": 1.7458141971363247e-06,
      "loss": 0.0098,
      "step": 2525360
    },
    {
      "epoch": 4.132839758318441,
      "grad_norm": 0.2985762357711792,
      "learning_rate": 1.7457483049228074e-06,
      "loss": 0.0076,
      "step": 2525380
    },
    {
      "epoch": 4.132872488757094,
      "grad_norm": 0.19941817224025726,
      "learning_rate": 1.7456824127092904e-06,
      "loss": 0.0124,
      "step": 2525400
    },
    {
      "epoch": 4.132905219195748,
      "grad_norm": 0.19460907578468323,
      "learning_rate": 1.7456165204957731e-06,
      "loss": 0.0077,
      "step": 2525420
    },
    {
      "epoch": 4.132937949634401,
      "grad_norm": 0.2274388074874878,
      "learning_rate": 1.745550628282256e-06,
      "loss": 0.0117,
      "step": 2525440
    },
    {
      "epoch": 4.132970680073054,
      "grad_norm": 0.0983641967177391,
      "learning_rate": 1.7454847360687388e-06,
      "loss": 0.0101,
      "step": 2525460
    },
    {
      "epoch": 4.133003410511708,
      "grad_norm": 0.3934471011161804,
      "learning_rate": 1.7454188438552218e-06,
      "loss": 0.0157,
      "step": 2525480
    },
    {
      "epoch": 4.133036140950361,
      "grad_norm": 0.09224525094032288,
      "learning_rate": 1.7453529516417047e-06,
      "loss": 0.006,
      "step": 2525500
    },
    {
      "epoch": 4.133068871389014,
      "grad_norm": 0.25400856137275696,
      "learning_rate": 1.7452870594281877e-06,
      "loss": 0.01,
      "step": 2525520
    },
    {
      "epoch": 4.133101601827668,
      "grad_norm": 0.062062907963991165,
      "learning_rate": 1.7452211672146704e-06,
      "loss": 0.0137,
      "step": 2525540
    },
    {
      "epoch": 4.133134332266321,
      "grad_norm": 0.19713819026947021,
      "learning_rate": 1.7451552750011533e-06,
      "loss": 0.0117,
      "step": 2525560
    },
    {
      "epoch": 4.133167062704974,
      "grad_norm": 0.30371779203414917,
      "learning_rate": 1.745089382787636e-06,
      "loss": 0.008,
      "step": 2525580
    },
    {
      "epoch": 4.1331997931436275,
      "grad_norm": 0.696520209312439,
      "learning_rate": 1.745023490574119e-06,
      "loss": 0.0162,
      "step": 2525600
    },
    {
      "epoch": 4.133232523582281,
      "grad_norm": 0.4060618579387665,
      "learning_rate": 1.7449575983606018e-06,
      "loss": 0.0068,
      "step": 2525620
    },
    {
      "epoch": 4.133265254020935,
      "grad_norm": 0.157315194606781,
      "learning_rate": 1.7448917061470847e-06,
      "loss": 0.0103,
      "step": 2525640
    },
    {
      "epoch": 4.133297984459587,
      "grad_norm": 0.1337638646364212,
      "learning_rate": 1.7448258139335675e-06,
      "loss": 0.0103,
      "step": 2525660
    },
    {
      "epoch": 4.133330714898241,
      "grad_norm": 0.4373396337032318,
      "learning_rate": 1.7447599217200506e-06,
      "loss": 0.0148,
      "step": 2525680
    },
    {
      "epoch": 4.133363445336895,
      "grad_norm": 0.30226075649261475,
      "learning_rate": 1.7446940295065334e-06,
      "loss": 0.0083,
      "step": 2525700
    },
    {
      "epoch": 4.133396175775548,
      "grad_norm": 0.2609293460845947,
      "learning_rate": 1.7446281372930163e-06,
      "loss": 0.008,
      "step": 2525720
    },
    {
      "epoch": 4.133428906214201,
      "grad_norm": 0.2565600574016571,
      "learning_rate": 1.744562245079499e-06,
      "loss": 0.0107,
      "step": 2525740
    },
    {
      "epoch": 4.1334616366528545,
      "grad_norm": 0.26566755771636963,
      "learning_rate": 1.744496352865982e-06,
      "loss": 0.0102,
      "step": 2525760
    },
    {
      "epoch": 4.133494367091508,
      "grad_norm": 0.16075566411018372,
      "learning_rate": 1.7444304606524648e-06,
      "loss": 0.0079,
      "step": 2525780
    },
    {
      "epoch": 4.133527097530161,
      "grad_norm": 0.41268324851989746,
      "learning_rate": 1.7443645684389477e-06,
      "loss": 0.0084,
      "step": 2525800
    },
    {
      "epoch": 4.133559827968814,
      "grad_norm": 0.2003137320280075,
      "learning_rate": 1.7442986762254304e-06,
      "loss": 0.0095,
      "step": 2525820
    },
    {
      "epoch": 4.133592558407468,
      "grad_norm": 0.20472748577594757,
      "learning_rate": 1.7442327840119136e-06,
      "loss": 0.0077,
      "step": 2525840
    },
    {
      "epoch": 4.133625288846121,
      "grad_norm": 0.24339079856872559,
      "learning_rate": 1.7441668917983963e-06,
      "loss": 0.0052,
      "step": 2525860
    },
    {
      "epoch": 4.133658019284774,
      "grad_norm": 0.3869537115097046,
      "learning_rate": 1.7441009995848793e-06,
      "loss": 0.0092,
      "step": 2525880
    },
    {
      "epoch": 4.133690749723428,
      "grad_norm": 0.46324479579925537,
      "learning_rate": 1.744035107371362e-06,
      "loss": 0.0082,
      "step": 2525900
    },
    {
      "epoch": 4.1337234801620815,
      "grad_norm": 0.21374200284481049,
      "learning_rate": 1.743969215157845e-06,
      "loss": 0.0081,
      "step": 2525920
    },
    {
      "epoch": 4.133756210600734,
      "grad_norm": 0.27950403094291687,
      "learning_rate": 1.7439033229443277e-06,
      "loss": 0.0112,
      "step": 2525940
    },
    {
      "epoch": 4.133788941039388,
      "grad_norm": 0.09539492428302765,
      "learning_rate": 1.7438374307308107e-06,
      "loss": 0.0124,
      "step": 2525960
    },
    {
      "epoch": 4.133821671478041,
      "grad_norm": 0.5332852005958557,
      "learning_rate": 1.7437715385172934e-06,
      "loss": 0.0113,
      "step": 2525980
    },
    {
      "epoch": 4.133854401916694,
      "grad_norm": 0.25899869203567505,
      "learning_rate": 1.7437056463037764e-06,
      "loss": 0.0058,
      "step": 2526000
    },
    {
      "epoch": 4.133887132355348,
      "grad_norm": 0.33034226298332214,
      "learning_rate": 1.7436397540902593e-06,
      "loss": 0.0092,
      "step": 2526020
    },
    {
      "epoch": 4.133919862794001,
      "grad_norm": 0.12001951783895493,
      "learning_rate": 1.7435738618767423e-06,
      "loss": 0.0081,
      "step": 2526040
    },
    {
      "epoch": 4.133952593232655,
      "grad_norm": 0.05796130746603012,
      "learning_rate": 1.743507969663225e-06,
      "loss": 0.0059,
      "step": 2526060
    },
    {
      "epoch": 4.1339853236713076,
      "grad_norm": 0.16302001476287842,
      "learning_rate": 1.743442077449708e-06,
      "loss": 0.0071,
      "step": 2526080
    },
    {
      "epoch": 4.134018054109961,
      "grad_norm": 0.22010833024978638,
      "learning_rate": 1.7433761852361907e-06,
      "loss": 0.0086,
      "step": 2526100
    },
    {
      "epoch": 4.134050784548615,
      "grad_norm": 0.15668129920959473,
      "learning_rate": 1.7433102930226737e-06,
      "loss": 0.01,
      "step": 2526120
    },
    {
      "epoch": 4.1340835149872674,
      "grad_norm": 0.13586729764938354,
      "learning_rate": 1.7432444008091564e-06,
      "loss": 0.0073,
      "step": 2526140
    },
    {
      "epoch": 4.134116245425921,
      "grad_norm": 0.1285160779953003,
      "learning_rate": 1.7431785085956393e-06,
      "loss": 0.0115,
      "step": 2526160
    },
    {
      "epoch": 4.134148975864575,
      "grad_norm": 0.24250735342502594,
      "learning_rate": 1.743112616382122e-06,
      "loss": 0.0075,
      "step": 2526180
    },
    {
      "epoch": 4.134181706303228,
      "grad_norm": 0.07310477644205093,
      "learning_rate": 1.7430467241686053e-06,
      "loss": 0.0135,
      "step": 2526200
    },
    {
      "epoch": 4.134214436741881,
      "grad_norm": 0.20113341510295868,
      "learning_rate": 1.7429808319550882e-06,
      "loss": 0.0081,
      "step": 2526220
    },
    {
      "epoch": 4.1342471671805345,
      "grad_norm": 0.7239835262298584,
      "learning_rate": 1.742914939741571e-06,
      "loss": 0.0112,
      "step": 2526240
    },
    {
      "epoch": 4.134279897619188,
      "grad_norm": 0.10227293521165848,
      "learning_rate": 1.7428490475280537e-06,
      "loss": 0.0058,
      "step": 2526260
    },
    {
      "epoch": 4.134312628057841,
      "grad_norm": 0.4267674386501312,
      "learning_rate": 1.7427831553145366e-06,
      "loss": 0.0091,
      "step": 2526280
    },
    {
      "epoch": 4.134345358496494,
      "grad_norm": 0.5070005059242249,
      "learning_rate": 1.7427172631010194e-06,
      "loss": 0.0091,
      "step": 2526300
    },
    {
      "epoch": 4.134378088935148,
      "grad_norm": 0.30341336131095886,
      "learning_rate": 1.7426513708875023e-06,
      "loss": 0.0099,
      "step": 2526320
    },
    {
      "epoch": 4.134410819373802,
      "grad_norm": 0.07452944666147232,
      "learning_rate": 1.742585478673985e-06,
      "loss": 0.0103,
      "step": 2526340
    },
    {
      "epoch": 4.134443549812454,
      "grad_norm": 0.3147754371166229,
      "learning_rate": 1.742519586460468e-06,
      "loss": 0.0078,
      "step": 2526360
    },
    {
      "epoch": 4.134476280251108,
      "grad_norm": 0.451195627450943,
      "learning_rate": 1.7424536942469512e-06,
      "loss": 0.0092,
      "step": 2526380
    },
    {
      "epoch": 4.1345090106897615,
      "grad_norm": 0.11622083932161331,
      "learning_rate": 1.742387802033434e-06,
      "loss": 0.0086,
      "step": 2526400
    },
    {
      "epoch": 4.134541741128414,
      "grad_norm": 0.4396915137767792,
      "learning_rate": 1.7423219098199169e-06,
      "loss": 0.0102,
      "step": 2526420
    },
    {
      "epoch": 4.134574471567068,
      "grad_norm": 0.17539986968040466,
      "learning_rate": 1.7422560176063996e-06,
      "loss": 0.0078,
      "step": 2526440
    },
    {
      "epoch": 4.134607202005721,
      "grad_norm": 0.16652671992778778,
      "learning_rate": 1.7421901253928826e-06,
      "loss": 0.0082,
      "step": 2526460
    },
    {
      "epoch": 4.134639932444375,
      "grad_norm": 0.09966041147708893,
      "learning_rate": 1.7421242331793653e-06,
      "loss": 0.0154,
      "step": 2526480
    },
    {
      "epoch": 4.134672662883028,
      "grad_norm": 0.3218271732330322,
      "learning_rate": 1.742058340965848e-06,
      "loss": 0.0122,
      "step": 2526500
    },
    {
      "epoch": 4.134705393321681,
      "grad_norm": 0.09294775873422623,
      "learning_rate": 1.741992448752331e-06,
      "loss": 0.0062,
      "step": 2526520
    },
    {
      "epoch": 4.134738123760335,
      "grad_norm": 0.257546603679657,
      "learning_rate": 1.7419265565388137e-06,
      "loss": 0.0074,
      "step": 2526540
    },
    {
      "epoch": 4.134770854198988,
      "grad_norm": 0.3990415930747986,
      "learning_rate": 1.741860664325297e-06,
      "loss": 0.0107,
      "step": 2526560
    },
    {
      "epoch": 4.134803584637641,
      "grad_norm": 0.2940381169319153,
      "learning_rate": 1.7417947721117798e-06,
      "loss": 0.0093,
      "step": 2526580
    },
    {
      "epoch": 4.134836315076295,
      "grad_norm": 0.2990853190422058,
      "learning_rate": 1.7417288798982626e-06,
      "loss": 0.0118,
      "step": 2526600
    },
    {
      "epoch": 4.134869045514948,
      "grad_norm": 0.11349447816610336,
      "learning_rate": 1.7416629876847455e-06,
      "loss": 0.0084,
      "step": 2526620
    },
    {
      "epoch": 4.134901775953601,
      "grad_norm": 0.22369593381881714,
      "learning_rate": 1.7415970954712283e-06,
      "loss": 0.0087,
      "step": 2526640
    },
    {
      "epoch": 4.134934506392255,
      "grad_norm": 0.13694676756858826,
      "learning_rate": 1.7415312032577112e-06,
      "loss": 0.0083,
      "step": 2526660
    },
    {
      "epoch": 4.134967236830908,
      "grad_norm": 0.1665431261062622,
      "learning_rate": 1.741465311044194e-06,
      "loss": 0.007,
      "step": 2526680
    },
    {
      "epoch": 4.134999967269561,
      "grad_norm": 0.10051286965608597,
      "learning_rate": 1.7413994188306767e-06,
      "loss": 0.0107,
      "step": 2526700
    },
    {
      "epoch": 4.135032697708215,
      "grad_norm": 0.5473529100418091,
      "learning_rate": 1.7413335266171599e-06,
      "loss": 0.0097,
      "step": 2526720
    },
    {
      "epoch": 4.135065428146868,
      "grad_norm": 0.1372804492712021,
      "learning_rate": 1.7412676344036428e-06,
      "loss": 0.0147,
      "step": 2526740
    },
    {
      "epoch": 4.135098158585522,
      "grad_norm": 0.09343196451663971,
      "learning_rate": 1.7412017421901256e-06,
      "loss": 0.0126,
      "step": 2526760
    },
    {
      "epoch": 4.1351308890241745,
      "grad_norm": 0.23295952379703522,
      "learning_rate": 1.7411358499766085e-06,
      "loss": 0.009,
      "step": 2526780
    },
    {
      "epoch": 4.135163619462828,
      "grad_norm": 0.32625502347946167,
      "learning_rate": 1.7410699577630913e-06,
      "loss": 0.0074,
      "step": 2526800
    },
    {
      "epoch": 4.135196349901482,
      "grad_norm": 0.81708824634552,
      "learning_rate": 1.7410040655495742e-06,
      "loss": 0.0099,
      "step": 2526820
    },
    {
      "epoch": 4.135229080340134,
      "grad_norm": 0.4630732238292694,
      "learning_rate": 1.740938173336057e-06,
      "loss": 0.0194,
      "step": 2526840
    },
    {
      "epoch": 4.135261810778788,
      "grad_norm": 0.2747921347618103,
      "learning_rate": 1.74087228112254e-06,
      "loss": 0.0122,
      "step": 2526860
    },
    {
      "epoch": 4.1352945412174416,
      "grad_norm": 0.1540222465991974,
      "learning_rate": 1.7408063889090226e-06,
      "loss": 0.0113,
      "step": 2526880
    },
    {
      "epoch": 4.135327271656095,
      "grad_norm": 0.20255759358406067,
      "learning_rate": 1.7407404966955058e-06,
      "loss": 0.0074,
      "step": 2526900
    },
    {
      "epoch": 4.135360002094748,
      "grad_norm": 0.20696820318698883,
      "learning_rate": 1.7406746044819885e-06,
      "loss": 0.0133,
      "step": 2526920
    },
    {
      "epoch": 4.1353927325334014,
      "grad_norm": 0.16457286477088928,
      "learning_rate": 1.7406087122684715e-06,
      "loss": 0.0094,
      "step": 2526940
    },
    {
      "epoch": 4.135425462972055,
      "grad_norm": 0.2769452631473541,
      "learning_rate": 1.7405428200549542e-06,
      "loss": 0.0092,
      "step": 2526960
    },
    {
      "epoch": 4.135458193410708,
      "grad_norm": 0.7907114624977112,
      "learning_rate": 1.7404769278414372e-06,
      "loss": 0.0157,
      "step": 2526980
    },
    {
      "epoch": 4.135490923849361,
      "grad_norm": 0.25539514422416687,
      "learning_rate": 1.74041103562792e-06,
      "loss": 0.0097,
      "step": 2527000
    },
    {
      "epoch": 4.135523654288015,
      "grad_norm": 0.5136005282402039,
      "learning_rate": 1.7403451434144029e-06,
      "loss": 0.0135,
      "step": 2527020
    },
    {
      "epoch": 4.1355563847266685,
      "grad_norm": 0.13345447182655334,
      "learning_rate": 1.7402792512008856e-06,
      "loss": 0.0082,
      "step": 2527040
    },
    {
      "epoch": 4.135589115165321,
      "grad_norm": 0.13638222217559814,
      "learning_rate": 1.7402133589873686e-06,
      "loss": 0.0083,
      "step": 2527060
    },
    {
      "epoch": 4.135621845603975,
      "grad_norm": 0.5975067019462585,
      "learning_rate": 1.7401474667738515e-06,
      "loss": 0.0109,
      "step": 2527080
    },
    {
      "epoch": 4.135654576042628,
      "grad_norm": 0.11530280113220215,
      "learning_rate": 1.7400815745603345e-06,
      "loss": 0.0116,
      "step": 2527100
    },
    {
      "epoch": 4.135687306481281,
      "grad_norm": 0.09237652271986008,
      "learning_rate": 1.7400156823468172e-06,
      "loss": 0.0125,
      "step": 2527120
    },
    {
      "epoch": 4.135720036919935,
      "grad_norm": 0.2019513100385666,
      "learning_rate": 1.7399497901333002e-06,
      "loss": 0.0093,
      "step": 2527140
    },
    {
      "epoch": 4.135752767358588,
      "grad_norm": 0.39088407158851624,
      "learning_rate": 1.739883897919783e-06,
      "loss": 0.0082,
      "step": 2527160
    },
    {
      "epoch": 4.135785497797242,
      "grad_norm": 0.09935244172811508,
      "learning_rate": 1.7398180057062659e-06,
      "loss": 0.0097,
      "step": 2527180
    },
    {
      "epoch": 4.135818228235895,
      "grad_norm": 1.3966026306152344,
      "learning_rate": 1.7397521134927486e-06,
      "loss": 0.0173,
      "step": 2527200
    },
    {
      "epoch": 4.135850958674548,
      "grad_norm": 0.3603748083114624,
      "learning_rate": 1.7396862212792315e-06,
      "loss": 0.0073,
      "step": 2527220
    },
    {
      "epoch": 4.135883689113202,
      "grad_norm": 0.2789715528488159,
      "learning_rate": 1.7396203290657143e-06,
      "loss": 0.0086,
      "step": 2527240
    },
    {
      "epoch": 4.1359164195518545,
      "grad_norm": 0.13988108932971954,
      "learning_rate": 1.7395544368521974e-06,
      "loss": 0.0074,
      "step": 2527260
    },
    {
      "epoch": 4.135949149990508,
      "grad_norm": 0.25566622614860535,
      "learning_rate": 1.7394885446386802e-06,
      "loss": 0.0086,
      "step": 2527280
    },
    {
      "epoch": 4.135981880429162,
      "grad_norm": 0.14000347256660461,
      "learning_rate": 1.7394226524251631e-06,
      "loss": 0.0085,
      "step": 2527300
    },
    {
      "epoch": 4.136014610867814,
      "grad_norm": 0.20525550842285156,
      "learning_rate": 1.7393567602116459e-06,
      "loss": 0.0092,
      "step": 2527320
    },
    {
      "epoch": 4.136047341306468,
      "grad_norm": 0.38116520643234253,
      "learning_rate": 1.7392908679981288e-06,
      "loss": 0.0073,
      "step": 2527340
    },
    {
      "epoch": 4.136080071745122,
      "grad_norm": 0.2138892412185669,
      "learning_rate": 1.7392249757846116e-06,
      "loss": 0.0096,
      "step": 2527360
    },
    {
      "epoch": 4.136112802183775,
      "grad_norm": 0.47591590881347656,
      "learning_rate": 1.7391590835710945e-06,
      "loss": 0.0059,
      "step": 2527380
    },
    {
      "epoch": 4.136145532622428,
      "grad_norm": 0.3577915132045746,
      "learning_rate": 1.7390931913575773e-06,
      "loss": 0.0071,
      "step": 2527400
    },
    {
      "epoch": 4.1361782630610815,
      "grad_norm": 0.7147997617721558,
      "learning_rate": 1.7390272991440602e-06,
      "loss": 0.0089,
      "step": 2527420
    },
    {
      "epoch": 4.136210993499735,
      "grad_norm": 0.10977619886398315,
      "learning_rate": 1.7389614069305434e-06,
      "loss": 0.0075,
      "step": 2527440
    },
    {
      "epoch": 4.136243723938388,
      "grad_norm": 0.8119930624961853,
      "learning_rate": 1.7388955147170261e-06,
      "loss": 0.0098,
      "step": 2527460
    },
    {
      "epoch": 4.136276454377041,
      "grad_norm": 0.3988696336746216,
      "learning_rate": 1.7388296225035089e-06,
      "loss": 0.0071,
      "step": 2527480
    },
    {
      "epoch": 4.136309184815695,
      "grad_norm": 0.4273388683795929,
      "learning_rate": 1.7387637302899918e-06,
      "loss": 0.0062,
      "step": 2527500
    },
    {
      "epoch": 4.136341915254349,
      "grad_norm": 0.03776533156633377,
      "learning_rate": 1.7386978380764745e-06,
      "loss": 0.0073,
      "step": 2527520
    },
    {
      "epoch": 4.136374645693001,
      "grad_norm": 0.22292368113994598,
      "learning_rate": 1.7386319458629575e-06,
      "loss": 0.0119,
      "step": 2527540
    },
    {
      "epoch": 4.136407376131655,
      "grad_norm": 0.16163037717342377,
      "learning_rate": 1.7385660536494402e-06,
      "loss": 0.008,
      "step": 2527560
    },
    {
      "epoch": 4.1364401065703085,
      "grad_norm": 0.23654833436012268,
      "learning_rate": 1.7385001614359232e-06,
      "loss": 0.0071,
      "step": 2527580
    },
    {
      "epoch": 4.136472837008961,
      "grad_norm": 0.13259945809841156,
      "learning_rate": 1.7384342692224064e-06,
      "loss": 0.0095,
      "step": 2527600
    },
    {
      "epoch": 4.136505567447615,
      "grad_norm": 0.34517332911491394,
      "learning_rate": 1.738368377008889e-06,
      "loss": 0.0097,
      "step": 2527620
    },
    {
      "epoch": 4.136538297886268,
      "grad_norm": 0.2696247398853302,
      "learning_rate": 1.738302484795372e-06,
      "loss": 0.012,
      "step": 2527640
    },
    {
      "epoch": 4.136571028324922,
      "grad_norm": 0.23846085369586945,
      "learning_rate": 1.7382365925818548e-06,
      "loss": 0.0075,
      "step": 2527660
    },
    {
      "epoch": 4.136603758763575,
      "grad_norm": 0.32626137137413025,
      "learning_rate": 1.7381707003683375e-06,
      "loss": 0.0084,
      "step": 2527680
    },
    {
      "epoch": 4.136636489202228,
      "grad_norm": 0.12568601965904236,
      "learning_rate": 1.7381048081548205e-06,
      "loss": 0.0094,
      "step": 2527700
    },
    {
      "epoch": 4.136669219640882,
      "grad_norm": 0.458771288394928,
      "learning_rate": 1.7380389159413032e-06,
      "loss": 0.013,
      "step": 2527720
    },
    {
      "epoch": 4.136701950079535,
      "grad_norm": 0.16572748124599457,
      "learning_rate": 1.7379730237277862e-06,
      "loss": 0.01,
      "step": 2527740
    },
    {
      "epoch": 4.136734680518188,
      "grad_norm": 0.11228065192699432,
      "learning_rate": 1.737907131514269e-06,
      "loss": 0.0162,
      "step": 2527760
    },
    {
      "epoch": 4.136767410956842,
      "grad_norm": 0.18644757568836212,
      "learning_rate": 1.737841239300752e-06,
      "loss": 0.0081,
      "step": 2527780
    },
    {
      "epoch": 4.136800141395495,
      "grad_norm": 0.09592968225479126,
      "learning_rate": 1.737775347087235e-06,
      "loss": 0.0069,
      "step": 2527800
    },
    {
      "epoch": 4.136832871834148,
      "grad_norm": 0.14976716041564941,
      "learning_rate": 1.7377094548737178e-06,
      "loss": 0.0089,
      "step": 2527820
    },
    {
      "epoch": 4.136865602272802,
      "grad_norm": 0.4253811240196228,
      "learning_rate": 1.7376435626602007e-06,
      "loss": 0.0142,
      "step": 2527840
    },
    {
      "epoch": 4.136898332711455,
      "grad_norm": 0.13642510771751404,
      "learning_rate": 1.7375776704466835e-06,
      "loss": 0.0112,
      "step": 2527860
    },
    {
      "epoch": 4.136931063150108,
      "grad_norm": 0.25540170073509216,
      "learning_rate": 1.7375117782331664e-06,
      "loss": 0.0089,
      "step": 2527880
    },
    {
      "epoch": 4.1369637935887615,
      "grad_norm": 0.14183270931243896,
      "learning_rate": 1.7374458860196491e-06,
      "loss": 0.0095,
      "step": 2527900
    },
    {
      "epoch": 4.136996524027415,
      "grad_norm": 0.5707359910011292,
      "learning_rate": 1.7373799938061319e-06,
      "loss": 0.0078,
      "step": 2527920
    },
    {
      "epoch": 4.137029254466069,
      "grad_norm": 0.04615972936153412,
      "learning_rate": 1.7373141015926148e-06,
      "loss": 0.0152,
      "step": 2527940
    },
    {
      "epoch": 4.137061984904721,
      "grad_norm": 0.13788984715938568,
      "learning_rate": 1.737248209379098e-06,
      "loss": 0.0068,
      "step": 2527960
    },
    {
      "epoch": 4.137094715343375,
      "grad_norm": 0.2625223398208618,
      "learning_rate": 1.7371823171655807e-06,
      "loss": 0.0063,
      "step": 2527980
    },
    {
      "epoch": 4.137127445782029,
      "grad_norm": 0.1458137482404709,
      "learning_rate": 1.7371164249520637e-06,
      "loss": 0.0091,
      "step": 2528000
    },
    {
      "epoch": 4.137160176220681,
      "grad_norm": 0.14894916117191315,
      "learning_rate": 1.7370505327385464e-06,
      "loss": 0.0067,
      "step": 2528020
    },
    {
      "epoch": 4.137192906659335,
      "grad_norm": 0.1162431463599205,
      "learning_rate": 1.7369846405250294e-06,
      "loss": 0.0122,
      "step": 2528040
    },
    {
      "epoch": 4.1372256370979885,
      "grad_norm": 0.34631502628326416,
      "learning_rate": 1.7369187483115121e-06,
      "loss": 0.0122,
      "step": 2528060
    },
    {
      "epoch": 4.137258367536642,
      "grad_norm": 0.07451117783784866,
      "learning_rate": 1.736852856097995e-06,
      "loss": 0.0096,
      "step": 2528080
    },
    {
      "epoch": 4.137291097975295,
      "grad_norm": 0.31442511081695557,
      "learning_rate": 1.7367869638844778e-06,
      "loss": 0.0131,
      "step": 2528100
    },
    {
      "epoch": 4.137323828413948,
      "grad_norm": 0.2617841362953186,
      "learning_rate": 1.7367210716709608e-06,
      "loss": 0.0139,
      "step": 2528120
    },
    {
      "epoch": 4.137356558852602,
      "grad_norm": 0.5599940419197083,
      "learning_rate": 1.7366551794574437e-06,
      "loss": 0.007,
      "step": 2528140
    },
    {
      "epoch": 4.137389289291255,
      "grad_norm": 0.06343527883291245,
      "learning_rate": 1.7365892872439267e-06,
      "loss": 0.0095,
      "step": 2528160
    },
    {
      "epoch": 4.137422019729908,
      "grad_norm": 0.26137450337409973,
      "learning_rate": 1.7365233950304094e-06,
      "loss": 0.0054,
      "step": 2528180
    },
    {
      "epoch": 4.137454750168562,
      "grad_norm": 0.12602631747722626,
      "learning_rate": 1.7364575028168924e-06,
      "loss": 0.0077,
      "step": 2528200
    },
    {
      "epoch": 4.1374874806072155,
      "grad_norm": 0.3391605317592621,
      "learning_rate": 1.736391610603375e-06,
      "loss": 0.0081,
      "step": 2528220
    },
    {
      "epoch": 4.137520211045868,
      "grad_norm": 0.24792587757110596,
      "learning_rate": 1.736325718389858e-06,
      "loss": 0.0108,
      "step": 2528240
    },
    {
      "epoch": 4.137552941484522,
      "grad_norm": 0.11662918329238892,
      "learning_rate": 1.7362598261763408e-06,
      "loss": 0.0063,
      "step": 2528260
    },
    {
      "epoch": 4.137585671923175,
      "grad_norm": 0.17083238065242767,
      "learning_rate": 1.7361939339628237e-06,
      "loss": 0.0079,
      "step": 2528280
    },
    {
      "epoch": 4.137618402361828,
      "grad_norm": 0.25208938121795654,
      "learning_rate": 1.7361280417493065e-06,
      "loss": 0.0106,
      "step": 2528300
    },
    {
      "epoch": 4.137651132800482,
      "grad_norm": 0.2997196614742279,
      "learning_rate": 1.7360621495357896e-06,
      "loss": 0.0105,
      "step": 2528320
    },
    {
      "epoch": 4.137683863239135,
      "grad_norm": 0.7812262177467346,
      "learning_rate": 1.7359962573222724e-06,
      "loss": 0.0063,
      "step": 2528340
    },
    {
      "epoch": 4.137716593677789,
      "grad_norm": 0.25302600860595703,
      "learning_rate": 1.7359303651087553e-06,
      "loss": 0.009,
      "step": 2528360
    },
    {
      "epoch": 4.137749324116442,
      "grad_norm": 0.5284522771835327,
      "learning_rate": 1.735864472895238e-06,
      "loss": 0.0072,
      "step": 2528380
    },
    {
      "epoch": 4.137782054555095,
      "grad_norm": 0.071539506316185,
      "learning_rate": 1.735798580681721e-06,
      "loss": 0.0101,
      "step": 2528400
    },
    {
      "epoch": 4.137814784993749,
      "grad_norm": 0.2523842453956604,
      "learning_rate": 1.7357326884682038e-06,
      "loss": 0.0069,
      "step": 2528420
    },
    {
      "epoch": 4.1378475154324015,
      "grad_norm": 0.14432474970817566,
      "learning_rate": 1.7356667962546867e-06,
      "loss": 0.0088,
      "step": 2528440
    },
    {
      "epoch": 4.137880245871055,
      "grad_norm": 0.1595458835363388,
      "learning_rate": 1.7356009040411695e-06,
      "loss": 0.0084,
      "step": 2528460
    },
    {
      "epoch": 4.137912976309709,
      "grad_norm": 0.13856609165668488,
      "learning_rate": 1.7355350118276526e-06,
      "loss": 0.0138,
      "step": 2528480
    },
    {
      "epoch": 4.137945706748362,
      "grad_norm": 0.8196695446968079,
      "learning_rate": 1.7354691196141354e-06,
      "loss": 0.0116,
      "step": 2528500
    },
    {
      "epoch": 4.137978437187015,
      "grad_norm": 0.21405157446861267,
      "learning_rate": 1.7354032274006183e-06,
      "loss": 0.0094,
      "step": 2528520
    },
    {
      "epoch": 4.138011167625669,
      "grad_norm": 0.16319260001182556,
      "learning_rate": 1.735337335187101e-06,
      "loss": 0.0092,
      "step": 2528540
    },
    {
      "epoch": 4.138043898064322,
      "grad_norm": 0.2643047571182251,
      "learning_rate": 1.735271442973584e-06,
      "loss": 0.009,
      "step": 2528560
    },
    {
      "epoch": 4.138076628502975,
      "grad_norm": 0.16930943727493286,
      "learning_rate": 1.7352055507600667e-06,
      "loss": 0.01,
      "step": 2528580
    },
    {
      "epoch": 4.1381093589416285,
      "grad_norm": 0.110023632645607,
      "learning_rate": 1.7351396585465497e-06,
      "loss": 0.013,
      "step": 2528600
    },
    {
      "epoch": 4.138142089380282,
      "grad_norm": 0.5974609851837158,
      "learning_rate": 1.7350737663330324e-06,
      "loss": 0.0103,
      "step": 2528620
    },
    {
      "epoch": 4.138174819818936,
      "grad_norm": 0.4344959259033203,
      "learning_rate": 1.7350078741195154e-06,
      "loss": 0.0141,
      "step": 2528640
    },
    {
      "epoch": 4.138207550257588,
      "grad_norm": 0.17762532830238342,
      "learning_rate": 1.7349419819059983e-06,
      "loss": 0.011,
      "step": 2528660
    },
    {
      "epoch": 4.138240280696242,
      "grad_norm": 0.29211363196372986,
      "learning_rate": 1.7348760896924813e-06,
      "loss": 0.0086,
      "step": 2528680
    },
    {
      "epoch": 4.1382730111348955,
      "grad_norm": 0.058771245181560516,
      "learning_rate": 1.734810197478964e-06,
      "loss": 0.01,
      "step": 2528700
    },
    {
      "epoch": 4.138305741573548,
      "grad_norm": 0.07349766045808792,
      "learning_rate": 1.734744305265447e-06,
      "loss": 0.0097,
      "step": 2528720
    },
    {
      "epoch": 4.138338472012202,
      "grad_norm": 0.35980692505836487,
      "learning_rate": 1.7346784130519297e-06,
      "loss": 0.0147,
      "step": 2528740
    },
    {
      "epoch": 4.138371202450855,
      "grad_norm": 0.09793967008590698,
      "learning_rate": 1.7346125208384127e-06,
      "loss": 0.017,
      "step": 2528760
    },
    {
      "epoch": 4.138403932889508,
      "grad_norm": 0.16654813289642334,
      "learning_rate": 1.7345466286248954e-06,
      "loss": 0.0086,
      "step": 2528780
    },
    {
      "epoch": 4.138436663328162,
      "grad_norm": 0.2627749741077423,
      "learning_rate": 1.7344807364113784e-06,
      "loss": 0.0082,
      "step": 2528800
    },
    {
      "epoch": 4.138469393766815,
      "grad_norm": 0.11654888093471527,
      "learning_rate": 1.734414844197861e-06,
      "loss": 0.0112,
      "step": 2528820
    },
    {
      "epoch": 4.138502124205469,
      "grad_norm": 0.6163298487663269,
      "learning_rate": 1.7343489519843443e-06,
      "loss": 0.0108,
      "step": 2528840
    },
    {
      "epoch": 4.138534854644122,
      "grad_norm": 0.415973424911499,
      "learning_rate": 1.7342830597708272e-06,
      "loss": 0.0086,
      "step": 2528860
    },
    {
      "epoch": 4.138567585082775,
      "grad_norm": 0.21133677661418915,
      "learning_rate": 1.73421716755731e-06,
      "loss": 0.0089,
      "step": 2528880
    },
    {
      "epoch": 4.138600315521429,
      "grad_norm": 0.3247852623462677,
      "learning_rate": 1.7341512753437927e-06,
      "loss": 0.0132,
      "step": 2528900
    },
    {
      "epoch": 4.138633045960082,
      "grad_norm": 0.09119772166013718,
      "learning_rate": 1.7340853831302756e-06,
      "loss": 0.0128,
      "step": 2528920
    },
    {
      "epoch": 4.138665776398735,
      "grad_norm": 0.1903010904788971,
      "learning_rate": 1.7340194909167584e-06,
      "loss": 0.0065,
      "step": 2528940
    },
    {
      "epoch": 4.138698506837389,
      "grad_norm": 0.31128260493278503,
      "learning_rate": 1.7339535987032413e-06,
      "loss": 0.0092,
      "step": 2528960
    },
    {
      "epoch": 4.138731237276042,
      "grad_norm": 0.069587841629982,
      "learning_rate": 1.733887706489724e-06,
      "loss": 0.008,
      "step": 2528980
    },
    {
      "epoch": 4.138763967714695,
      "grad_norm": 0.30466827750205994,
      "learning_rate": 1.733821814276207e-06,
      "loss": 0.0068,
      "step": 2529000
    },
    {
      "epoch": 4.138796698153349,
      "grad_norm": 0.5888504385948181,
      "learning_rate": 1.7337559220626902e-06,
      "loss": 0.0061,
      "step": 2529020
    },
    {
      "epoch": 4.138829428592002,
      "grad_norm": 0.14578773081302643,
      "learning_rate": 1.733690029849173e-06,
      "loss": 0.0093,
      "step": 2529040
    },
    {
      "epoch": 4.138862159030655,
      "grad_norm": 0.25286465883255005,
      "learning_rate": 1.7336241376356559e-06,
      "loss": 0.0105,
      "step": 2529060
    },
    {
      "epoch": 4.1388948894693085,
      "grad_norm": 0.34030649065971375,
      "learning_rate": 1.7335582454221386e-06,
      "loss": 0.0101,
      "step": 2529080
    },
    {
      "epoch": 4.138927619907962,
      "grad_norm": 0.6037870645523071,
      "learning_rate": 1.7334923532086216e-06,
      "loss": 0.0067,
      "step": 2529100
    },
    {
      "epoch": 4.138960350346616,
      "grad_norm": 0.24026992917060852,
      "learning_rate": 1.7334264609951043e-06,
      "loss": 0.0068,
      "step": 2529120
    },
    {
      "epoch": 4.138993080785268,
      "grad_norm": 0.1425953507423401,
      "learning_rate": 1.733360568781587e-06,
      "loss": 0.0134,
      "step": 2529140
    },
    {
      "epoch": 4.139025811223922,
      "grad_norm": 0.36573073267936707,
      "learning_rate": 1.73329467656807e-06,
      "loss": 0.0121,
      "step": 2529160
    },
    {
      "epoch": 4.139058541662576,
      "grad_norm": 0.1790042668581009,
      "learning_rate": 1.7332287843545527e-06,
      "loss": 0.0053,
      "step": 2529180
    },
    {
      "epoch": 4.139091272101228,
      "grad_norm": 0.1799904853105545,
      "learning_rate": 1.733162892141036e-06,
      "loss": 0.011,
      "step": 2529200
    },
    {
      "epoch": 4.139124002539882,
      "grad_norm": 0.2886802554130554,
      "learning_rate": 1.7330969999275189e-06,
      "loss": 0.0081,
      "step": 2529220
    },
    {
      "epoch": 4.1391567329785355,
      "grad_norm": 0.6441323161125183,
      "learning_rate": 1.7330311077140016e-06,
      "loss": 0.0112,
      "step": 2529240
    },
    {
      "epoch": 4.139189463417189,
      "grad_norm": 0.14448152482509613,
      "learning_rate": 1.7329652155004846e-06,
      "loss": 0.0119,
      "step": 2529260
    },
    {
      "epoch": 4.139222193855842,
      "grad_norm": 0.2090555727481842,
      "learning_rate": 1.7328993232869673e-06,
      "loss": 0.0078,
      "step": 2529280
    },
    {
      "epoch": 4.139254924294495,
      "grad_norm": 0.0775439515709877,
      "learning_rate": 1.7328334310734502e-06,
      "loss": 0.0064,
      "step": 2529300
    },
    {
      "epoch": 4.139287654733149,
      "grad_norm": 0.06797286868095398,
      "learning_rate": 1.732767538859933e-06,
      "loss": 0.0089,
      "step": 2529320
    },
    {
      "epoch": 4.139320385171802,
      "grad_norm": 0.5960614681243896,
      "learning_rate": 1.7327016466464157e-06,
      "loss": 0.0076,
      "step": 2529340
    },
    {
      "epoch": 4.139353115610455,
      "grad_norm": 0.23445580899715424,
      "learning_rate": 1.7326357544328989e-06,
      "loss": 0.0106,
      "step": 2529360
    },
    {
      "epoch": 4.139385846049109,
      "grad_norm": 0.14670170843601227,
      "learning_rate": 1.7325698622193818e-06,
      "loss": 0.0077,
      "step": 2529380
    },
    {
      "epoch": 4.1394185764877625,
      "grad_norm": 0.14666928350925446,
      "learning_rate": 1.7325039700058646e-06,
      "loss": 0.008,
      "step": 2529400
    },
    {
      "epoch": 4.139451306926415,
      "grad_norm": 0.1662711203098297,
      "learning_rate": 1.7324380777923475e-06,
      "loss": 0.009,
      "step": 2529420
    },
    {
      "epoch": 4.139484037365069,
      "grad_norm": 0.2675095498561859,
      "learning_rate": 1.7323721855788303e-06,
      "loss": 0.0102,
      "step": 2529440
    },
    {
      "epoch": 4.139516767803722,
      "grad_norm": 0.1449136734008789,
      "learning_rate": 1.7323062933653132e-06,
      "loss": 0.0116,
      "step": 2529460
    },
    {
      "epoch": 4.139549498242375,
      "grad_norm": 0.8735433220863342,
      "learning_rate": 1.732240401151796e-06,
      "loss": 0.0081,
      "step": 2529480
    },
    {
      "epoch": 4.139582228681029,
      "grad_norm": 0.27106720209121704,
      "learning_rate": 1.732174508938279e-06,
      "loss": 0.0135,
      "step": 2529500
    },
    {
      "epoch": 4.139614959119682,
      "grad_norm": 0.11238664388656616,
      "learning_rate": 1.7321086167247616e-06,
      "loss": 0.0115,
      "step": 2529520
    },
    {
      "epoch": 4.139647689558336,
      "grad_norm": 0.29466333985328674,
      "learning_rate": 1.7320427245112448e-06,
      "loss": 0.0168,
      "step": 2529540
    },
    {
      "epoch": 4.1396804199969885,
      "grad_norm": 0.5819699764251709,
      "learning_rate": 1.7319768322977276e-06,
      "loss": 0.0097,
      "step": 2529560
    },
    {
      "epoch": 4.139713150435642,
      "grad_norm": 0.2781721353530884,
      "learning_rate": 1.7319109400842105e-06,
      "loss": 0.0097,
      "step": 2529580
    },
    {
      "epoch": 4.139745880874296,
      "grad_norm": 0.17616517841815948,
      "learning_rate": 1.7318450478706932e-06,
      "loss": 0.0052,
      "step": 2529600
    },
    {
      "epoch": 4.139778611312948,
      "grad_norm": 0.21283085644245148,
      "learning_rate": 1.7317791556571762e-06,
      "loss": 0.0088,
      "step": 2529620
    },
    {
      "epoch": 4.139811341751602,
      "grad_norm": 0.10260631889104843,
      "learning_rate": 1.731713263443659e-06,
      "loss": 0.0057,
      "step": 2529640
    },
    {
      "epoch": 4.139844072190256,
      "grad_norm": 0.3835379481315613,
      "learning_rate": 1.7316473712301419e-06,
      "loss": 0.0085,
      "step": 2529660
    },
    {
      "epoch": 4.139876802628909,
      "grad_norm": 0.20771150290966034,
      "learning_rate": 1.7315814790166246e-06,
      "loss": 0.0097,
      "step": 2529680
    },
    {
      "epoch": 4.139909533067562,
      "grad_norm": 0.14964263141155243,
      "learning_rate": 1.7315155868031076e-06,
      "loss": 0.0111,
      "step": 2529700
    },
    {
      "epoch": 4.1399422635062155,
      "grad_norm": 0.4276290833950043,
      "learning_rate": 1.7314496945895905e-06,
      "loss": 0.0082,
      "step": 2529720
    },
    {
      "epoch": 4.139974993944869,
      "grad_norm": 0.1067531630396843,
      "learning_rate": 1.7313838023760735e-06,
      "loss": 0.0104,
      "step": 2529740
    },
    {
      "epoch": 4.140007724383522,
      "grad_norm": 0.12311865389347076,
      "learning_rate": 1.7313179101625562e-06,
      "loss": 0.0137,
      "step": 2529760
    },
    {
      "epoch": 4.140040454822175,
      "grad_norm": 0.24056757986545563,
      "learning_rate": 1.7312520179490392e-06,
      "loss": 0.0077,
      "step": 2529780
    },
    {
      "epoch": 4.140073185260829,
      "grad_norm": 0.08780647069215775,
      "learning_rate": 1.731186125735522e-06,
      "loss": 0.0098,
      "step": 2529800
    },
    {
      "epoch": 4.140105915699483,
      "grad_norm": 0.07201822847127914,
      "learning_rate": 1.7311202335220049e-06,
      "loss": 0.014,
      "step": 2529820
    },
    {
      "epoch": 4.140138646138135,
      "grad_norm": 0.10144670307636261,
      "learning_rate": 1.7310543413084876e-06,
      "loss": 0.0116,
      "step": 2529840
    },
    {
      "epoch": 4.140171376576789,
      "grad_norm": 0.44033437967300415,
      "learning_rate": 1.7309884490949706e-06,
      "loss": 0.0103,
      "step": 2529860
    },
    {
      "epoch": 4.1402041070154425,
      "grad_norm": 0.7033384442329407,
      "learning_rate": 1.7309225568814533e-06,
      "loss": 0.0119,
      "step": 2529880
    },
    {
      "epoch": 4.140236837454095,
      "grad_norm": 0.1891854703426361,
      "learning_rate": 1.7308566646679365e-06,
      "loss": 0.0077,
      "step": 2529900
    },
    {
      "epoch": 4.140269567892749,
      "grad_norm": 0.0980297178030014,
      "learning_rate": 1.7307907724544192e-06,
      "loss": 0.0064,
      "step": 2529920
    },
    {
      "epoch": 4.140302298331402,
      "grad_norm": 0.24312271177768707,
      "learning_rate": 1.7307248802409021e-06,
      "loss": 0.0121,
      "step": 2529940
    },
    {
      "epoch": 4.140335028770056,
      "grad_norm": 0.3661110997200012,
      "learning_rate": 1.7306589880273849e-06,
      "loss": 0.0129,
      "step": 2529960
    },
    {
      "epoch": 4.140367759208709,
      "grad_norm": 0.42492443323135376,
      "learning_rate": 1.7305930958138678e-06,
      "loss": 0.0117,
      "step": 2529980
    },
    {
      "epoch": 4.140400489647362,
      "grad_norm": 0.19103269279003143,
      "learning_rate": 1.7305272036003506e-06,
      "loss": 0.0062,
      "step": 2530000
    },
    {
      "epoch": 4.140433220086016,
      "grad_norm": 0.43087294697761536,
      "learning_rate": 1.7304613113868335e-06,
      "loss": 0.0117,
      "step": 2530020
    },
    {
      "epoch": 4.140465950524669,
      "grad_norm": 0.09333667904138565,
      "learning_rate": 1.7303954191733163e-06,
      "loss": 0.0117,
      "step": 2530040
    },
    {
      "epoch": 4.140498680963322,
      "grad_norm": 0.11345172673463821,
      "learning_rate": 1.7303295269597992e-06,
      "loss": 0.0054,
      "step": 2530060
    },
    {
      "epoch": 4.140531411401976,
      "grad_norm": 0.09785241633653641,
      "learning_rate": 1.7302636347462824e-06,
      "loss": 0.0092,
      "step": 2530080
    },
    {
      "epoch": 4.140564141840629,
      "grad_norm": 0.5111515522003174,
      "learning_rate": 1.7301977425327651e-06,
      "loss": 0.0085,
      "step": 2530100
    },
    {
      "epoch": 4.140596872279282,
      "grad_norm": 0.3274957239627838,
      "learning_rate": 1.7301318503192479e-06,
      "loss": 0.0138,
      "step": 2530120
    },
    {
      "epoch": 4.140629602717936,
      "grad_norm": 0.5833374857902527,
      "learning_rate": 1.7300659581057308e-06,
      "loss": 0.0104,
      "step": 2530140
    },
    {
      "epoch": 4.140662333156589,
      "grad_norm": 0.22268454730510712,
      "learning_rate": 1.7300000658922136e-06,
      "loss": 0.01,
      "step": 2530160
    },
    {
      "epoch": 4.140695063595242,
      "grad_norm": 0.4032396078109741,
      "learning_rate": 1.7299341736786965e-06,
      "loss": 0.0086,
      "step": 2530180
    },
    {
      "epoch": 4.140727794033896,
      "grad_norm": 0.9610629677772522,
      "learning_rate": 1.7298682814651792e-06,
      "loss": 0.0074,
      "step": 2530200
    },
    {
      "epoch": 4.140760524472549,
      "grad_norm": 0.3108672797679901,
      "learning_rate": 1.7298023892516622e-06,
      "loss": 0.0105,
      "step": 2530220
    },
    {
      "epoch": 4.140793254911202,
      "grad_norm": 0.6046691536903381,
      "learning_rate": 1.7297364970381454e-06,
      "loss": 0.0099,
      "step": 2530240
    },
    {
      "epoch": 4.1408259853498555,
      "grad_norm": 0.2597093880176544,
      "learning_rate": 1.729670604824628e-06,
      "loss": 0.01,
      "step": 2530260
    },
    {
      "epoch": 4.140858715788509,
      "grad_norm": 0.3425983488559723,
      "learning_rate": 1.729604712611111e-06,
      "loss": 0.0085,
      "step": 2530280
    },
    {
      "epoch": 4.140891446227163,
      "grad_norm": 0.1834394484758377,
      "learning_rate": 1.7295388203975938e-06,
      "loss": 0.0058,
      "step": 2530300
    },
    {
      "epoch": 4.140924176665815,
      "grad_norm": 0.13900642096996307,
      "learning_rate": 1.7294729281840765e-06,
      "loss": 0.011,
      "step": 2530320
    },
    {
      "epoch": 4.140956907104469,
      "grad_norm": 0.2514221966266632,
      "learning_rate": 1.7294070359705595e-06,
      "loss": 0.0089,
      "step": 2530340
    },
    {
      "epoch": 4.1409896375431225,
      "grad_norm": 0.31731078028678894,
      "learning_rate": 1.7293411437570422e-06,
      "loss": 0.0137,
      "step": 2530360
    },
    {
      "epoch": 4.141022367981776,
      "grad_norm": 0.08385395258665085,
      "learning_rate": 1.7292752515435252e-06,
      "loss": 0.009,
      "step": 2530380
    },
    {
      "epoch": 4.141055098420429,
      "grad_norm": 0.32832616567611694,
      "learning_rate": 1.729209359330008e-06,
      "loss": 0.008,
      "step": 2530400
    },
    {
      "epoch": 4.141087828859082,
      "grad_norm": 0.3862949013710022,
      "learning_rate": 1.729143467116491e-06,
      "loss": 0.0134,
      "step": 2530420
    },
    {
      "epoch": 4.141120559297736,
      "grad_norm": 0.1918637454509735,
      "learning_rate": 1.729077574902974e-06,
      "loss": 0.0111,
      "step": 2530440
    },
    {
      "epoch": 4.141153289736389,
      "grad_norm": 0.5263293385505676,
      "learning_rate": 1.7290116826894568e-06,
      "loss": 0.013,
      "step": 2530460
    },
    {
      "epoch": 4.141186020175042,
      "grad_norm": 0.3582525849342346,
      "learning_rate": 1.7289457904759397e-06,
      "loss": 0.012,
      "step": 2530480
    },
    {
      "epoch": 4.141218750613696,
      "grad_norm": 0.3316361606121063,
      "learning_rate": 1.7288798982624225e-06,
      "loss": 0.0139,
      "step": 2530500
    },
    {
      "epoch": 4.141251481052349,
      "grad_norm": 0.4792872965335846,
      "learning_rate": 1.7288140060489054e-06,
      "loss": 0.0141,
      "step": 2530520
    },
    {
      "epoch": 4.141284211491002,
      "grad_norm": 0.3015107214450836,
      "learning_rate": 1.7287481138353882e-06,
      "loss": 0.0091,
      "step": 2530540
    },
    {
      "epoch": 4.141316941929656,
      "grad_norm": 0.1433485895395279,
      "learning_rate": 1.7286822216218709e-06,
      "loss": 0.0115,
      "step": 2530560
    },
    {
      "epoch": 4.141349672368309,
      "grad_norm": 0.23403160274028778,
      "learning_rate": 1.7286163294083538e-06,
      "loss": 0.0071,
      "step": 2530580
    },
    {
      "epoch": 4.141382402806962,
      "grad_norm": 0.09809217602014542,
      "learning_rate": 1.728550437194837e-06,
      "loss": 0.011,
      "step": 2530600
    },
    {
      "epoch": 4.141415133245616,
      "grad_norm": 0.3160167932510376,
      "learning_rate": 1.7284845449813197e-06,
      "loss": 0.0053,
      "step": 2530620
    },
    {
      "epoch": 4.141447863684269,
      "grad_norm": 0.14001749455928802,
      "learning_rate": 1.7284186527678027e-06,
      "loss": 0.0106,
      "step": 2530640
    },
    {
      "epoch": 4.141480594122922,
      "grad_norm": 0.18058976531028748,
      "learning_rate": 1.7283527605542854e-06,
      "loss": 0.0111,
      "step": 2530660
    },
    {
      "epoch": 4.141513324561576,
      "grad_norm": 0.11615787446498871,
      "learning_rate": 1.7282868683407684e-06,
      "loss": 0.0102,
      "step": 2530680
    },
    {
      "epoch": 4.141546055000229,
      "grad_norm": 0.0849592387676239,
      "learning_rate": 1.7282209761272511e-06,
      "loss": 0.0094,
      "step": 2530700
    },
    {
      "epoch": 4.141578785438883,
      "grad_norm": 0.14020782709121704,
      "learning_rate": 1.728155083913734e-06,
      "loss": 0.0111,
      "step": 2530720
    },
    {
      "epoch": 4.1416115158775355,
      "grad_norm": 0.23862741887569427,
      "learning_rate": 1.7280891917002168e-06,
      "loss": 0.0112,
      "step": 2530740
    },
    {
      "epoch": 4.141644246316189,
      "grad_norm": 0.14874976873397827,
      "learning_rate": 1.7280232994866998e-06,
      "loss": 0.0132,
      "step": 2530760
    },
    {
      "epoch": 4.141676976754843,
      "grad_norm": 0.07338284701108932,
      "learning_rate": 1.7279574072731827e-06,
      "loss": 0.0098,
      "step": 2530780
    },
    {
      "epoch": 4.141709707193495,
      "grad_norm": 0.26293396949768066,
      "learning_rate": 1.7278915150596657e-06,
      "loss": 0.0085,
      "step": 2530800
    },
    {
      "epoch": 4.141742437632149,
      "grad_norm": 0.06455067545175552,
      "learning_rate": 1.7278256228461484e-06,
      "loss": 0.0126,
      "step": 2530820
    },
    {
      "epoch": 4.141775168070803,
      "grad_norm": 0.13509224355220795,
      "learning_rate": 1.7277597306326314e-06,
      "loss": 0.0096,
      "step": 2530840
    },
    {
      "epoch": 4.141807898509456,
      "grad_norm": 0.2033119946718216,
      "learning_rate": 1.7276938384191141e-06,
      "loss": 0.011,
      "step": 2530860
    },
    {
      "epoch": 4.141840628948109,
      "grad_norm": 0.4999692142009735,
      "learning_rate": 1.727627946205597e-06,
      "loss": 0.0056,
      "step": 2530880
    },
    {
      "epoch": 4.1418733593867625,
      "grad_norm": 0.10890737175941467,
      "learning_rate": 1.7275620539920798e-06,
      "loss": 0.0097,
      "step": 2530900
    },
    {
      "epoch": 4.141906089825416,
      "grad_norm": 0.12771610915660858,
      "learning_rate": 1.7274961617785627e-06,
      "loss": 0.0145,
      "step": 2530920
    },
    {
      "epoch": 4.141938820264069,
      "grad_norm": 0.13827435672283173,
      "learning_rate": 1.7274302695650455e-06,
      "loss": 0.0084,
      "step": 2530940
    },
    {
      "epoch": 4.141971550702722,
      "grad_norm": 0.035645630210638046,
      "learning_rate": 1.7273643773515287e-06,
      "loss": 0.008,
      "step": 2530960
    },
    {
      "epoch": 4.142004281141376,
      "grad_norm": 0.4630122482776642,
      "learning_rate": 1.7272984851380114e-06,
      "loss": 0.0127,
      "step": 2530980
    },
    {
      "epoch": 4.14203701158003,
      "grad_norm": 0.19146934151649475,
      "learning_rate": 1.7272325929244943e-06,
      "loss": 0.0077,
      "step": 2531000
    },
    {
      "epoch": 4.142069742018682,
      "grad_norm": 0.1859883815050125,
      "learning_rate": 1.727166700710977e-06,
      "loss": 0.0092,
      "step": 2531020
    },
    {
      "epoch": 4.142102472457336,
      "grad_norm": 0.185709148645401,
      "learning_rate": 1.72710080849746e-06,
      "loss": 0.0063,
      "step": 2531040
    },
    {
      "epoch": 4.1421352028959895,
      "grad_norm": 0.45766380429267883,
      "learning_rate": 1.7270349162839428e-06,
      "loss": 0.0102,
      "step": 2531060
    },
    {
      "epoch": 4.142167933334642,
      "grad_norm": 0.4707889258861542,
      "learning_rate": 1.7269690240704257e-06,
      "loss": 0.0098,
      "step": 2531080
    },
    {
      "epoch": 4.142200663773296,
      "grad_norm": 0.2684285640716553,
      "learning_rate": 1.7269031318569085e-06,
      "loss": 0.0093,
      "step": 2531100
    },
    {
      "epoch": 4.142233394211949,
      "grad_norm": 0.05562213808298111,
      "learning_rate": 1.7268372396433916e-06,
      "loss": 0.0074,
      "step": 2531120
    },
    {
      "epoch": 4.142266124650603,
      "grad_norm": 0.15166090428829193,
      "learning_rate": 1.7267713474298744e-06,
      "loss": 0.0091,
      "step": 2531140
    },
    {
      "epoch": 4.142298855089256,
      "grad_norm": 0.34055188298225403,
      "learning_rate": 1.7267054552163573e-06,
      "loss": 0.0074,
      "step": 2531160
    },
    {
      "epoch": 4.142331585527909,
      "grad_norm": 0.12533420324325562,
      "learning_rate": 1.72663956300284e-06,
      "loss": 0.0097,
      "step": 2531180
    },
    {
      "epoch": 4.142364315966563,
      "grad_norm": 0.14634600281715393,
      "learning_rate": 1.726573670789323e-06,
      "loss": 0.0079,
      "step": 2531200
    },
    {
      "epoch": 4.1423970464052156,
      "grad_norm": 0.21590696275234222,
      "learning_rate": 1.7265077785758058e-06,
      "loss": 0.0063,
      "step": 2531220
    },
    {
      "epoch": 4.142429776843869,
      "grad_norm": 0.20021504163742065,
      "learning_rate": 1.7264418863622887e-06,
      "loss": 0.0078,
      "step": 2531240
    },
    {
      "epoch": 4.142462507282523,
      "grad_norm": 0.3296937644481659,
      "learning_rate": 1.7263759941487714e-06,
      "loss": 0.0085,
      "step": 2531260
    },
    {
      "epoch": 4.142495237721176,
      "grad_norm": 0.0832001268863678,
      "learning_rate": 1.7263101019352544e-06,
      "loss": 0.0097,
      "step": 2531280
    },
    {
      "epoch": 4.142527968159829,
      "grad_norm": 0.3858489394187927,
      "learning_rate": 1.7262442097217376e-06,
      "loss": 0.0075,
      "step": 2531300
    },
    {
      "epoch": 4.142560698598483,
      "grad_norm": 0.2182672768831253,
      "learning_rate": 1.7261783175082203e-06,
      "loss": 0.0081,
      "step": 2531320
    },
    {
      "epoch": 4.142593429037136,
      "grad_norm": 0.12115258723497391,
      "learning_rate": 1.726112425294703e-06,
      "loss": 0.0108,
      "step": 2531340
    },
    {
      "epoch": 4.142626159475789,
      "grad_norm": 0.09306533634662628,
      "learning_rate": 1.726046533081186e-06,
      "loss": 0.0079,
      "step": 2531360
    },
    {
      "epoch": 4.1426588899144425,
      "grad_norm": 0.06237288936972618,
      "learning_rate": 1.7259806408676687e-06,
      "loss": 0.0049,
      "step": 2531380
    },
    {
      "epoch": 4.142691620353096,
      "grad_norm": 0.35351306200027466,
      "learning_rate": 1.7259147486541517e-06,
      "loss": 0.0058,
      "step": 2531400
    },
    {
      "epoch": 4.14272435079175,
      "grad_norm": 0.22079581022262573,
      "learning_rate": 1.7258488564406344e-06,
      "loss": 0.0086,
      "step": 2531420
    },
    {
      "epoch": 4.142757081230402,
      "grad_norm": 0.1501421481370926,
      "learning_rate": 1.7257829642271174e-06,
      "loss": 0.0126,
      "step": 2531440
    },
    {
      "epoch": 4.142789811669056,
      "grad_norm": 0.07879316806793213,
      "learning_rate": 1.7257170720136001e-06,
      "loss": 0.0073,
      "step": 2531460
    },
    {
      "epoch": 4.14282254210771,
      "grad_norm": 0.07190236449241638,
      "learning_rate": 1.7256511798000833e-06,
      "loss": 0.0049,
      "step": 2531480
    },
    {
      "epoch": 4.142855272546362,
      "grad_norm": 0.5269808173179626,
      "learning_rate": 1.7255852875865662e-06,
      "loss": 0.0112,
      "step": 2531500
    },
    {
      "epoch": 4.142888002985016,
      "grad_norm": 0.13567644357681274,
      "learning_rate": 1.725519395373049e-06,
      "loss": 0.0055,
      "step": 2531520
    },
    {
      "epoch": 4.1429207334236695,
      "grad_norm": 0.35352250933647156,
      "learning_rate": 1.7254535031595317e-06,
      "loss": 0.0071,
      "step": 2531540
    },
    {
      "epoch": 4.142953463862323,
      "grad_norm": 0.3654807209968567,
      "learning_rate": 1.7253876109460147e-06,
      "loss": 0.0101,
      "step": 2531560
    },
    {
      "epoch": 4.142986194300976,
      "grad_norm": 0.09737545996904373,
      "learning_rate": 1.7253217187324974e-06,
      "loss": 0.0075,
      "step": 2531580
    },
    {
      "epoch": 4.143018924739629,
      "grad_norm": 0.04008384793996811,
      "learning_rate": 1.7252558265189803e-06,
      "loss": 0.0078,
      "step": 2531600
    },
    {
      "epoch": 4.143051655178283,
      "grad_norm": 0.31432822346687317,
      "learning_rate": 1.725189934305463e-06,
      "loss": 0.0072,
      "step": 2531620
    },
    {
      "epoch": 4.143084385616936,
      "grad_norm": 0.42105168104171753,
      "learning_rate": 1.725124042091946e-06,
      "loss": 0.0078,
      "step": 2531640
    },
    {
      "epoch": 4.143117116055589,
      "grad_norm": 0.2846865653991699,
      "learning_rate": 1.7250581498784292e-06,
      "loss": 0.0118,
      "step": 2531660
    },
    {
      "epoch": 4.143149846494243,
      "grad_norm": 0.12819217145442963,
      "learning_rate": 1.724992257664912e-06,
      "loss": 0.0054,
      "step": 2531680
    },
    {
      "epoch": 4.143182576932896,
      "grad_norm": 0.36280080676078796,
      "learning_rate": 1.7249263654513949e-06,
      "loss": 0.0083,
      "step": 2531700
    },
    {
      "epoch": 4.143215307371549,
      "grad_norm": 0.08825644105672836,
      "learning_rate": 1.7248604732378776e-06,
      "loss": 0.0103,
      "step": 2531720
    },
    {
      "epoch": 4.143248037810203,
      "grad_norm": 0.1944570541381836,
      "learning_rate": 1.7247945810243606e-06,
      "loss": 0.0082,
      "step": 2531740
    },
    {
      "epoch": 4.143280768248856,
      "grad_norm": 0.0795738473534584,
      "learning_rate": 1.7247286888108433e-06,
      "loss": 0.0079,
      "step": 2531760
    },
    {
      "epoch": 4.143313498687509,
      "grad_norm": 0.1640685498714447,
      "learning_rate": 1.724662796597326e-06,
      "loss": 0.0075,
      "step": 2531780
    },
    {
      "epoch": 4.143346229126163,
      "grad_norm": 0.243946835398674,
      "learning_rate": 1.724596904383809e-06,
      "loss": 0.0063,
      "step": 2531800
    },
    {
      "epoch": 4.143378959564816,
      "grad_norm": 0.3570186197757721,
      "learning_rate": 1.7245310121702918e-06,
      "loss": 0.0098,
      "step": 2531820
    },
    {
      "epoch": 4.14341169000347,
      "grad_norm": 0.05758202075958252,
      "learning_rate": 1.724465119956775e-06,
      "loss": 0.0073,
      "step": 2531840
    },
    {
      "epoch": 4.143444420442123,
      "grad_norm": 0.49517184495925903,
      "learning_rate": 1.7243992277432579e-06,
      "loss": 0.0112,
      "step": 2531860
    },
    {
      "epoch": 4.143477150880776,
      "grad_norm": 0.17202404141426086,
      "learning_rate": 1.7243333355297406e-06,
      "loss": 0.0103,
      "step": 2531880
    },
    {
      "epoch": 4.14350988131943,
      "grad_norm": 0.06716157495975494,
      "learning_rate": 1.7242674433162236e-06,
      "loss": 0.008,
      "step": 2531900
    },
    {
      "epoch": 4.1435426117580825,
      "grad_norm": 0.22930459678173065,
      "learning_rate": 1.7242015511027063e-06,
      "loss": 0.008,
      "step": 2531920
    },
    {
      "epoch": 4.143575342196736,
      "grad_norm": 0.3627682328224182,
      "learning_rate": 1.7241356588891893e-06,
      "loss": 0.0118,
      "step": 2531940
    },
    {
      "epoch": 4.14360807263539,
      "grad_norm": 0.25137192010879517,
      "learning_rate": 1.724069766675672e-06,
      "loss": 0.0119,
      "step": 2531960
    },
    {
      "epoch": 4.143640803074042,
      "grad_norm": 0.35685548186302185,
      "learning_rate": 1.7240038744621547e-06,
      "loss": 0.0096,
      "step": 2531980
    },
    {
      "epoch": 4.143673533512696,
      "grad_norm": 0.1984611451625824,
      "learning_rate": 1.723937982248638e-06,
      "loss": 0.0096,
      "step": 2532000
    },
    {
      "epoch": 4.1437062639513496,
      "grad_norm": 0.23081545531749725,
      "learning_rate": 1.7238720900351208e-06,
      "loss": 0.0066,
      "step": 2532020
    },
    {
      "epoch": 4.143738994390003,
      "grad_norm": 0.18849875032901764,
      "learning_rate": 1.7238061978216036e-06,
      "loss": 0.0098,
      "step": 2532040
    },
    {
      "epoch": 4.143771724828656,
      "grad_norm": 0.2843873202800751,
      "learning_rate": 1.7237403056080865e-06,
      "loss": 0.0094,
      "step": 2532060
    },
    {
      "epoch": 4.1438044552673095,
      "grad_norm": 0.08313686400651932,
      "learning_rate": 1.7236744133945693e-06,
      "loss": 0.0087,
      "step": 2532080
    },
    {
      "epoch": 4.143837185705963,
      "grad_norm": 0.4036320745944977,
      "learning_rate": 1.7236085211810522e-06,
      "loss": 0.0127,
      "step": 2532100
    },
    {
      "epoch": 4.143869916144616,
      "grad_norm": 0.2800474464893341,
      "learning_rate": 1.723542628967535e-06,
      "loss": 0.0062,
      "step": 2532120
    },
    {
      "epoch": 4.143902646583269,
      "grad_norm": 0.2684093415737152,
      "learning_rate": 1.723476736754018e-06,
      "loss": 0.0093,
      "step": 2532140
    },
    {
      "epoch": 4.143935377021923,
      "grad_norm": 0.8195027112960815,
      "learning_rate": 1.7234108445405007e-06,
      "loss": 0.0096,
      "step": 2532160
    },
    {
      "epoch": 4.1439681074605765,
      "grad_norm": 0.2664385139942169,
      "learning_rate": 1.7233449523269838e-06,
      "loss": 0.0077,
      "step": 2532180
    },
    {
      "epoch": 4.144000837899229,
      "grad_norm": 0.46527397632598877,
      "learning_rate": 1.7232790601134666e-06,
      "loss": 0.0113,
      "step": 2532200
    },
    {
      "epoch": 4.144033568337883,
      "grad_norm": 0.1424003541469574,
      "learning_rate": 1.7232131678999495e-06,
      "loss": 0.0083,
      "step": 2532220
    },
    {
      "epoch": 4.144066298776536,
      "grad_norm": 0.3571864664554596,
      "learning_rate": 1.7231472756864323e-06,
      "loss": 0.0092,
      "step": 2532240
    },
    {
      "epoch": 4.144099029215189,
      "grad_norm": 0.31114882230758667,
      "learning_rate": 1.7230813834729152e-06,
      "loss": 0.0085,
      "step": 2532260
    },
    {
      "epoch": 4.144131759653843,
      "grad_norm": 0.26384642720222473,
      "learning_rate": 1.723015491259398e-06,
      "loss": 0.0099,
      "step": 2532280
    },
    {
      "epoch": 4.144164490092496,
      "grad_norm": 0.14537087082862854,
      "learning_rate": 1.722949599045881e-06,
      "loss": 0.0093,
      "step": 2532300
    },
    {
      "epoch": 4.14419722053115,
      "grad_norm": 0.1821628361940384,
      "learning_rate": 1.7228837068323636e-06,
      "loss": 0.0083,
      "step": 2532320
    },
    {
      "epoch": 4.144229950969803,
      "grad_norm": 0.39427998661994934,
      "learning_rate": 1.7228178146188466e-06,
      "loss": 0.0099,
      "step": 2532340
    },
    {
      "epoch": 4.144262681408456,
      "grad_norm": 0.35739877820014954,
      "learning_rate": 1.7227519224053295e-06,
      "loss": 0.0122,
      "step": 2532360
    },
    {
      "epoch": 4.14429541184711,
      "grad_norm": 0.283041387796402,
      "learning_rate": 1.7226860301918125e-06,
      "loss": 0.0105,
      "step": 2532380
    },
    {
      "epoch": 4.1443281422857625,
      "grad_norm": 0.093906931579113,
      "learning_rate": 1.7226201379782952e-06,
      "loss": 0.0062,
      "step": 2532400
    },
    {
      "epoch": 4.144360872724416,
      "grad_norm": 0.28797873854637146,
      "learning_rate": 1.7225542457647782e-06,
      "loss": 0.014,
      "step": 2532420
    },
    {
      "epoch": 4.14439360316307,
      "grad_norm": 0.1840502768754959,
      "learning_rate": 1.722488353551261e-06,
      "loss": 0.0109,
      "step": 2532440
    },
    {
      "epoch": 4.144426333601723,
      "grad_norm": 0.18905413150787354,
      "learning_rate": 1.7224224613377439e-06,
      "loss": 0.0116,
      "step": 2532460
    },
    {
      "epoch": 4.144459064040376,
      "grad_norm": 0.306919127702713,
      "learning_rate": 1.7223565691242266e-06,
      "loss": 0.0118,
      "step": 2532480
    },
    {
      "epoch": 4.14449179447903,
      "grad_norm": 0.13645943999290466,
      "learning_rate": 1.7222906769107096e-06,
      "loss": 0.0055,
      "step": 2532500
    },
    {
      "epoch": 4.144524524917683,
      "grad_norm": 0.4494178295135498,
      "learning_rate": 1.7222247846971923e-06,
      "loss": 0.0085,
      "step": 2532520
    },
    {
      "epoch": 4.144557255356336,
      "grad_norm": 0.13224074244499207,
      "learning_rate": 1.7221588924836755e-06,
      "loss": 0.0113,
      "step": 2532540
    },
    {
      "epoch": 4.1445899857949895,
      "grad_norm": 0.3270057141780853,
      "learning_rate": 1.7220930002701582e-06,
      "loss": 0.0062,
      "step": 2532560
    },
    {
      "epoch": 4.144622716233643,
      "grad_norm": 0.5085372924804688,
      "learning_rate": 1.7220271080566412e-06,
      "loss": 0.009,
      "step": 2532580
    },
    {
      "epoch": 4.144655446672297,
      "grad_norm": 0.18461492657661438,
      "learning_rate": 1.721961215843124e-06,
      "loss": 0.0104,
      "step": 2532600
    },
    {
      "epoch": 4.144688177110949,
      "grad_norm": 0.28050482273101807,
      "learning_rate": 1.7218953236296069e-06,
      "loss": 0.0127,
      "step": 2532620
    },
    {
      "epoch": 4.144720907549603,
      "grad_norm": 0.3958558440208435,
      "learning_rate": 1.7218294314160896e-06,
      "loss": 0.0081,
      "step": 2532640
    },
    {
      "epoch": 4.144753637988257,
      "grad_norm": 0.08300738781690598,
      "learning_rate": 1.7217635392025725e-06,
      "loss": 0.0141,
      "step": 2532660
    },
    {
      "epoch": 4.144786368426909,
      "grad_norm": 0.4676346778869629,
      "learning_rate": 1.7216976469890553e-06,
      "loss": 0.0098,
      "step": 2532680
    },
    {
      "epoch": 4.144819098865563,
      "grad_norm": 0.6325018405914307,
      "learning_rate": 1.7216317547755382e-06,
      "loss": 0.011,
      "step": 2532700
    },
    {
      "epoch": 4.1448518293042165,
      "grad_norm": 0.6211917400360107,
      "learning_rate": 1.7215658625620214e-06,
      "loss": 0.0107,
      "step": 2532720
    },
    {
      "epoch": 4.14488455974287,
      "grad_norm": 0.09099319577217102,
      "learning_rate": 1.7214999703485041e-06,
      "loss": 0.0115,
      "step": 2532740
    },
    {
      "epoch": 4.144917290181523,
      "grad_norm": 0.07648853957653046,
      "learning_rate": 1.7214340781349869e-06,
      "loss": 0.0098,
      "step": 2532760
    },
    {
      "epoch": 4.144950020620176,
      "grad_norm": 0.628429651260376,
      "learning_rate": 1.7213681859214698e-06,
      "loss": 0.0114,
      "step": 2532780
    },
    {
      "epoch": 4.14498275105883,
      "grad_norm": 0.52889484167099,
      "learning_rate": 1.7213022937079526e-06,
      "loss": 0.0083,
      "step": 2532800
    },
    {
      "epoch": 4.145015481497483,
      "grad_norm": 0.20360469818115234,
      "learning_rate": 1.7212364014944355e-06,
      "loss": 0.0078,
      "step": 2532820
    },
    {
      "epoch": 4.145048211936136,
      "grad_norm": 0.2668600082397461,
      "learning_rate": 1.7211705092809183e-06,
      "loss": 0.0164,
      "step": 2532840
    },
    {
      "epoch": 4.14508094237479,
      "grad_norm": 0.25528451800346375,
      "learning_rate": 1.7211046170674012e-06,
      "loss": 0.0081,
      "step": 2532860
    },
    {
      "epoch": 4.1451136728134435,
      "grad_norm": 0.11174708604812622,
      "learning_rate": 1.7210387248538844e-06,
      "loss": 0.0104,
      "step": 2532880
    },
    {
      "epoch": 4.145146403252096,
      "grad_norm": 0.21988064050674438,
      "learning_rate": 1.7209728326403671e-06,
      "loss": 0.0059,
      "step": 2532900
    },
    {
      "epoch": 4.14517913369075,
      "grad_norm": 0.1181025505065918,
      "learning_rate": 1.72090694042685e-06,
      "loss": 0.0122,
      "step": 2532920
    },
    {
      "epoch": 4.145211864129403,
      "grad_norm": 0.2266741693019867,
      "learning_rate": 1.7208410482133328e-06,
      "loss": 0.0086,
      "step": 2532940
    },
    {
      "epoch": 4.145244594568056,
      "grad_norm": 0.19577956199645996,
      "learning_rate": 1.7207751559998158e-06,
      "loss": 0.008,
      "step": 2532960
    },
    {
      "epoch": 4.14527732500671,
      "grad_norm": 0.16037757694721222,
      "learning_rate": 1.7207092637862985e-06,
      "loss": 0.0079,
      "step": 2532980
    },
    {
      "epoch": 4.145310055445363,
      "grad_norm": 0.3589434027671814,
      "learning_rate": 1.7206433715727812e-06,
      "loss": 0.0063,
      "step": 2533000
    },
    {
      "epoch": 4.145342785884017,
      "grad_norm": 0.6219384074211121,
      "learning_rate": 1.7205774793592642e-06,
      "loss": 0.0128,
      "step": 2533020
    },
    {
      "epoch": 4.1453755163226695,
      "grad_norm": 0.5855976939201355,
      "learning_rate": 1.720511587145747e-06,
      "loss": 0.0169,
      "step": 2533040
    },
    {
      "epoch": 4.145408246761323,
      "grad_norm": 0.19702117145061493,
      "learning_rate": 1.72044569493223e-06,
      "loss": 0.008,
      "step": 2533060
    },
    {
      "epoch": 4.145440977199977,
      "grad_norm": 0.3282056152820587,
      "learning_rate": 1.720379802718713e-06,
      "loss": 0.0062,
      "step": 2533080
    },
    {
      "epoch": 4.145473707638629,
      "grad_norm": 0.19354388117790222,
      "learning_rate": 1.7203139105051958e-06,
      "loss": 0.0077,
      "step": 2533100
    },
    {
      "epoch": 4.145506438077283,
      "grad_norm": 0.24743057787418365,
      "learning_rate": 1.7202480182916787e-06,
      "loss": 0.0085,
      "step": 2533120
    },
    {
      "epoch": 4.145539168515937,
      "grad_norm": 0.2321668416261673,
      "learning_rate": 1.7201821260781615e-06,
      "loss": 0.0074,
      "step": 2533140
    },
    {
      "epoch": 4.14557189895459,
      "grad_norm": 0.13968618214130402,
      "learning_rate": 1.7201162338646444e-06,
      "loss": 0.0133,
      "step": 2533160
    },
    {
      "epoch": 4.145604629393243,
      "grad_norm": 0.6897426843643188,
      "learning_rate": 1.7200503416511272e-06,
      "loss": 0.0088,
      "step": 2533180
    },
    {
      "epoch": 4.1456373598318965,
      "grad_norm": 0.0869278758764267,
      "learning_rate": 1.71998444943761e-06,
      "loss": 0.0084,
      "step": 2533200
    },
    {
      "epoch": 4.14567009027055,
      "grad_norm": 0.15144699811935425,
      "learning_rate": 1.7199185572240929e-06,
      "loss": 0.0102,
      "step": 2533220
    },
    {
      "epoch": 4.145702820709203,
      "grad_norm": 0.416475385427475,
      "learning_rate": 1.719852665010576e-06,
      "loss": 0.009,
      "step": 2533240
    },
    {
      "epoch": 4.145735551147856,
      "grad_norm": 0.5139749050140381,
      "learning_rate": 1.7197867727970588e-06,
      "loss": 0.0115,
      "step": 2533260
    },
    {
      "epoch": 4.14576828158651,
      "grad_norm": 0.10702270269393921,
      "learning_rate": 1.7197208805835417e-06,
      "loss": 0.0081,
      "step": 2533280
    },
    {
      "epoch": 4.145801012025164,
      "grad_norm": 0.24530074000358582,
      "learning_rate": 1.7196549883700244e-06,
      "loss": 0.0114,
      "step": 2533300
    },
    {
      "epoch": 4.145833742463816,
      "grad_norm": 0.3184646964073181,
      "learning_rate": 1.7195890961565074e-06,
      "loss": 0.0071,
      "step": 2533320
    },
    {
      "epoch": 4.14586647290247,
      "grad_norm": 0.3023677468299866,
      "learning_rate": 1.7195232039429901e-06,
      "loss": 0.0083,
      "step": 2533340
    },
    {
      "epoch": 4.1458992033411235,
      "grad_norm": 0.12608429789543152,
      "learning_rate": 1.719457311729473e-06,
      "loss": 0.011,
      "step": 2533360
    },
    {
      "epoch": 4.145931933779776,
      "grad_norm": 0.0913316160440445,
      "learning_rate": 1.7193914195159558e-06,
      "loss": 0.008,
      "step": 2533380
    },
    {
      "epoch": 4.14596466421843,
      "grad_norm": 0.05973028764128685,
      "learning_rate": 1.7193255273024388e-06,
      "loss": 0.0083,
      "step": 2533400
    },
    {
      "epoch": 4.145997394657083,
      "grad_norm": 0.27529376745224,
      "learning_rate": 1.7192596350889217e-06,
      "loss": 0.0093,
      "step": 2533420
    },
    {
      "epoch": 4.146030125095736,
      "grad_norm": 0.44172871112823486,
      "learning_rate": 1.7191937428754047e-06,
      "loss": 0.0102,
      "step": 2533440
    },
    {
      "epoch": 4.14606285553439,
      "grad_norm": 0.2993534207344055,
      "learning_rate": 1.7191278506618874e-06,
      "loss": 0.0122,
      "step": 2533460
    },
    {
      "epoch": 4.146095585973043,
      "grad_norm": 0.3352852463722229,
      "learning_rate": 1.7190619584483704e-06,
      "loss": 0.0078,
      "step": 2533480
    },
    {
      "epoch": 4.146128316411697,
      "grad_norm": 0.11437826603651047,
      "learning_rate": 1.7189960662348531e-06,
      "loss": 0.0094,
      "step": 2533500
    },
    {
      "epoch": 4.14616104685035,
      "grad_norm": 0.16739045083522797,
      "learning_rate": 1.718930174021336e-06,
      "loss": 0.0074,
      "step": 2533520
    },
    {
      "epoch": 4.146193777289003,
      "grad_norm": 0.23211751878261566,
      "learning_rate": 1.7188642818078188e-06,
      "loss": 0.008,
      "step": 2533540
    },
    {
      "epoch": 4.146226507727657,
      "grad_norm": 0.04225907847285271,
      "learning_rate": 1.7187983895943018e-06,
      "loss": 0.011,
      "step": 2533560
    },
    {
      "epoch": 4.1462592381663095,
      "grad_norm": 0.33076998591423035,
      "learning_rate": 1.7187324973807845e-06,
      "loss": 0.0119,
      "step": 2533580
    },
    {
      "epoch": 4.146291968604963,
      "grad_norm": 0.18917527794837952,
      "learning_rate": 1.7186666051672677e-06,
      "loss": 0.0049,
      "step": 2533600
    },
    {
      "epoch": 4.146324699043617,
      "grad_norm": 0.0862160250544548,
      "learning_rate": 1.7186007129537504e-06,
      "loss": 0.0091,
      "step": 2533620
    },
    {
      "epoch": 4.14635742948227,
      "grad_norm": 0.21190330386161804,
      "learning_rate": 1.7185348207402334e-06,
      "loss": 0.0087,
      "step": 2533640
    },
    {
      "epoch": 4.146390159920923,
      "grad_norm": 0.23628109693527222,
      "learning_rate": 1.718468928526716e-06,
      "loss": 0.0133,
      "step": 2533660
    },
    {
      "epoch": 4.146422890359577,
      "grad_norm": 0.18840835988521576,
      "learning_rate": 1.718403036313199e-06,
      "loss": 0.0081,
      "step": 2533680
    },
    {
      "epoch": 4.14645562079823,
      "grad_norm": 0.1388694941997528,
      "learning_rate": 1.7183371440996818e-06,
      "loss": 0.009,
      "step": 2533700
    },
    {
      "epoch": 4.146488351236883,
      "grad_norm": 0.06687258929014206,
      "learning_rate": 1.7182712518861647e-06,
      "loss": 0.0062,
      "step": 2533720
    },
    {
      "epoch": 4.1465210816755365,
      "grad_norm": 0.18569931387901306,
      "learning_rate": 1.7182053596726475e-06,
      "loss": 0.0109,
      "step": 2533740
    },
    {
      "epoch": 4.14655381211419,
      "grad_norm": 0.18939024209976196,
      "learning_rate": 1.7181394674591306e-06,
      "loss": 0.0114,
      "step": 2533760
    },
    {
      "epoch": 4.146586542552844,
      "grad_norm": 0.08640719205141068,
      "learning_rate": 1.7180735752456134e-06,
      "loss": 0.0082,
      "step": 2533780
    },
    {
      "epoch": 4.146619272991496,
      "grad_norm": 0.461972177028656,
      "learning_rate": 1.7180076830320963e-06,
      "loss": 0.0089,
      "step": 2533800
    },
    {
      "epoch": 4.14665200343015,
      "grad_norm": 0.2644352316856384,
      "learning_rate": 1.717941790818579e-06,
      "loss": 0.0067,
      "step": 2533820
    },
    {
      "epoch": 4.1466847338688035,
      "grad_norm": 0.31498199701309204,
      "learning_rate": 1.717875898605062e-06,
      "loss": 0.0112,
      "step": 2533840
    },
    {
      "epoch": 4.146717464307456,
      "grad_norm": 0.2431482970714569,
      "learning_rate": 1.7178100063915448e-06,
      "loss": 0.0074,
      "step": 2533860
    },
    {
      "epoch": 4.14675019474611,
      "grad_norm": 0.04243215173482895,
      "learning_rate": 1.7177441141780277e-06,
      "loss": 0.0079,
      "step": 2533880
    },
    {
      "epoch": 4.146782925184763,
      "grad_norm": 0.14765657484531403,
      "learning_rate": 1.7176782219645105e-06,
      "loss": 0.0102,
      "step": 2533900
    },
    {
      "epoch": 4.146815655623417,
      "grad_norm": 0.25562402606010437,
      "learning_rate": 1.7176123297509934e-06,
      "loss": 0.0069,
      "step": 2533920
    },
    {
      "epoch": 4.14684838606207,
      "grad_norm": 0.25484225153923035,
      "learning_rate": 1.7175464375374766e-06,
      "loss": 0.0076,
      "step": 2533940
    },
    {
      "epoch": 4.146881116500723,
      "grad_norm": 0.4778938293457031,
      "learning_rate": 1.7174805453239593e-06,
      "loss": 0.013,
      "step": 2533960
    },
    {
      "epoch": 4.146913846939377,
      "grad_norm": 0.32965293526649475,
      "learning_rate": 1.717414653110442e-06,
      "loss": 0.01,
      "step": 2533980
    },
    {
      "epoch": 4.14694657737803,
      "grad_norm": 0.5043191909790039,
      "learning_rate": 1.717348760896925e-06,
      "loss": 0.0094,
      "step": 2534000
    },
    {
      "epoch": 4.146979307816683,
      "grad_norm": 0.2367716282606125,
      "learning_rate": 1.7172828686834077e-06,
      "loss": 0.0144,
      "step": 2534020
    },
    {
      "epoch": 4.147012038255337,
      "grad_norm": 0.31286415457725525,
      "learning_rate": 1.7172169764698907e-06,
      "loss": 0.0066,
      "step": 2534040
    },
    {
      "epoch": 4.14704476869399,
      "grad_norm": 0.4245232045650482,
      "learning_rate": 1.7171510842563734e-06,
      "loss": 0.0078,
      "step": 2534060
    },
    {
      "epoch": 4.147077499132643,
      "grad_norm": 0.21721863746643066,
      "learning_rate": 1.7170851920428564e-06,
      "loss": 0.0104,
      "step": 2534080
    },
    {
      "epoch": 4.147110229571297,
      "grad_norm": 0.17975375056266785,
      "learning_rate": 1.7170192998293391e-06,
      "loss": 0.0086,
      "step": 2534100
    },
    {
      "epoch": 4.14714296000995,
      "grad_norm": 0.17641949653625488,
      "learning_rate": 1.7169534076158223e-06,
      "loss": 0.0114,
      "step": 2534120
    },
    {
      "epoch": 4.147175690448603,
      "grad_norm": 0.14838182926177979,
      "learning_rate": 1.7168875154023052e-06,
      "loss": 0.0092,
      "step": 2534140
    },
    {
      "epoch": 4.147208420887257,
      "grad_norm": 0.0689774751663208,
      "learning_rate": 1.716821623188788e-06,
      "loss": 0.0094,
      "step": 2534160
    },
    {
      "epoch": 4.14724115132591,
      "grad_norm": 0.11927524954080582,
      "learning_rate": 1.7167557309752707e-06,
      "loss": 0.0126,
      "step": 2534180
    },
    {
      "epoch": 4.147273881764564,
      "grad_norm": 0.2517898976802826,
      "learning_rate": 1.7166898387617537e-06,
      "loss": 0.0078,
      "step": 2534200
    },
    {
      "epoch": 4.1473066122032165,
      "grad_norm": 0.477600634098053,
      "learning_rate": 1.7166239465482364e-06,
      "loss": 0.0087,
      "step": 2534220
    },
    {
      "epoch": 4.14733934264187,
      "grad_norm": 0.2555437386035919,
      "learning_rate": 1.7165580543347194e-06,
      "loss": 0.0086,
      "step": 2534240
    },
    {
      "epoch": 4.147372073080524,
      "grad_norm": 0.2544604539871216,
      "learning_rate": 1.716492162121202e-06,
      "loss": 0.0112,
      "step": 2534260
    },
    {
      "epoch": 4.147404803519176,
      "grad_norm": 0.13116230070590973,
      "learning_rate": 1.716426269907685e-06,
      "loss": 0.0097,
      "step": 2534280
    },
    {
      "epoch": 4.14743753395783,
      "grad_norm": 0.24192272126674652,
      "learning_rate": 1.7163603776941682e-06,
      "loss": 0.0104,
      "step": 2534300
    },
    {
      "epoch": 4.147470264396484,
      "grad_norm": 0.37889179587364197,
      "learning_rate": 1.716294485480651e-06,
      "loss": 0.0102,
      "step": 2534320
    },
    {
      "epoch": 4.147502994835137,
      "grad_norm": 0.12327142059803009,
      "learning_rate": 1.716228593267134e-06,
      "loss": 0.0072,
      "step": 2534340
    },
    {
      "epoch": 4.14753572527379,
      "grad_norm": 0.30371734499931335,
      "learning_rate": 1.7161627010536166e-06,
      "loss": 0.0062,
      "step": 2534360
    },
    {
      "epoch": 4.1475684557124435,
      "grad_norm": 0.15217363834381104,
      "learning_rate": 1.7160968088400996e-06,
      "loss": 0.0086,
      "step": 2534380
    },
    {
      "epoch": 4.147601186151097,
      "grad_norm": 0.1479988545179367,
      "learning_rate": 1.7160309166265823e-06,
      "loss": 0.0118,
      "step": 2534400
    },
    {
      "epoch": 4.14763391658975,
      "grad_norm": 0.22649788856506348,
      "learning_rate": 1.715965024413065e-06,
      "loss": 0.0102,
      "step": 2534420
    },
    {
      "epoch": 4.147666647028403,
      "grad_norm": 0.22349262237548828,
      "learning_rate": 1.715899132199548e-06,
      "loss": 0.0092,
      "step": 2534440
    },
    {
      "epoch": 4.147699377467057,
      "grad_norm": 0.3878813683986664,
      "learning_rate": 1.7158332399860312e-06,
      "loss": 0.0101,
      "step": 2534460
    },
    {
      "epoch": 4.147732107905711,
      "grad_norm": 0.2202989161014557,
      "learning_rate": 1.715767347772514e-06,
      "loss": 0.0095,
      "step": 2534480
    },
    {
      "epoch": 4.147764838344363,
      "grad_norm": 0.22438958287239075,
      "learning_rate": 1.7157014555589969e-06,
      "loss": 0.008,
      "step": 2534500
    },
    {
      "epoch": 4.147797568783017,
      "grad_norm": 0.3807966411113739,
      "learning_rate": 1.7156355633454796e-06,
      "loss": 0.0094,
      "step": 2534520
    },
    {
      "epoch": 4.1478302992216705,
      "grad_norm": 0.3061167597770691,
      "learning_rate": 1.7155696711319626e-06,
      "loss": 0.0078,
      "step": 2534540
    },
    {
      "epoch": 4.147863029660323,
      "grad_norm": 0.6916025876998901,
      "learning_rate": 1.7155037789184453e-06,
      "loss": 0.0118,
      "step": 2534560
    },
    {
      "epoch": 4.147895760098977,
      "grad_norm": 0.10605652630329132,
      "learning_rate": 1.7154378867049283e-06,
      "loss": 0.0078,
      "step": 2534580
    },
    {
      "epoch": 4.14792849053763,
      "grad_norm": 0.07379446923732758,
      "learning_rate": 1.715371994491411e-06,
      "loss": 0.0101,
      "step": 2534600
    },
    {
      "epoch": 4.147961220976284,
      "grad_norm": 0.22973164916038513,
      "learning_rate": 1.7153061022778937e-06,
      "loss": 0.0115,
      "step": 2534620
    },
    {
      "epoch": 4.147993951414937,
      "grad_norm": 0.23516206443309784,
      "learning_rate": 1.715240210064377e-06,
      "loss": 0.0088,
      "step": 2534640
    },
    {
      "epoch": 4.14802668185359,
      "grad_norm": 0.7802267670631409,
      "learning_rate": 1.7151743178508599e-06,
      "loss": 0.0106,
      "step": 2534660
    },
    {
      "epoch": 4.148059412292244,
      "grad_norm": 0.5073937773704529,
      "learning_rate": 1.7151084256373426e-06,
      "loss": 0.0087,
      "step": 2534680
    },
    {
      "epoch": 4.1480921427308965,
      "grad_norm": 0.11811534315347672,
      "learning_rate": 1.7150425334238255e-06,
      "loss": 0.0087,
      "step": 2534700
    },
    {
      "epoch": 4.14812487316955,
      "grad_norm": 0.30087727308273315,
      "learning_rate": 1.7149766412103083e-06,
      "loss": 0.0154,
      "step": 2534720
    },
    {
      "epoch": 4.148157603608204,
      "grad_norm": 0.23104190826416016,
      "learning_rate": 1.7149107489967912e-06,
      "loss": 0.0063,
      "step": 2534740
    },
    {
      "epoch": 4.148190334046857,
      "grad_norm": 0.1302877813577652,
      "learning_rate": 1.714844856783274e-06,
      "loss": 0.0065,
      "step": 2534760
    },
    {
      "epoch": 4.14822306448551,
      "grad_norm": 0.24506410956382751,
      "learning_rate": 1.714778964569757e-06,
      "loss": 0.0098,
      "step": 2534780
    },
    {
      "epoch": 4.148255794924164,
      "grad_norm": 0.23285798728466034,
      "learning_rate": 1.7147130723562397e-06,
      "loss": 0.0077,
      "step": 2534800
    },
    {
      "epoch": 4.148288525362817,
      "grad_norm": 0.20195001363754272,
      "learning_rate": 1.7146471801427228e-06,
      "loss": 0.0075,
      "step": 2534820
    },
    {
      "epoch": 4.14832125580147,
      "grad_norm": 0.17068327963352203,
      "learning_rate": 1.7145812879292056e-06,
      "loss": 0.0072,
      "step": 2534840
    },
    {
      "epoch": 4.1483539862401235,
      "grad_norm": 0.39806225895881653,
      "learning_rate": 1.7145153957156885e-06,
      "loss": 0.0076,
      "step": 2534860
    },
    {
      "epoch": 4.148386716678777,
      "grad_norm": 0.29455316066741943,
      "learning_rate": 1.7144495035021713e-06,
      "loss": 0.0097,
      "step": 2534880
    },
    {
      "epoch": 4.14841944711743,
      "grad_norm": 0.2338808923959732,
      "learning_rate": 1.7143836112886542e-06,
      "loss": 0.0121,
      "step": 2534900
    },
    {
      "epoch": 4.148452177556083,
      "grad_norm": 0.21525686979293823,
      "learning_rate": 1.714317719075137e-06,
      "loss": 0.0072,
      "step": 2534920
    },
    {
      "epoch": 4.148484907994737,
      "grad_norm": 1.8160016536712646,
      "learning_rate": 1.71425182686162e-06,
      "loss": 0.0075,
      "step": 2534940
    },
    {
      "epoch": 4.148517638433391,
      "grad_norm": 0.1474851816892624,
      "learning_rate": 1.7141859346481026e-06,
      "loss": 0.0065,
      "step": 2534960
    },
    {
      "epoch": 4.148550368872043,
      "grad_norm": 0.09757820516824722,
      "learning_rate": 1.7141200424345856e-06,
      "loss": 0.0121,
      "step": 2534980
    },
    {
      "epoch": 4.148583099310697,
      "grad_norm": 0.13258393108844757,
      "learning_rate": 1.7140541502210686e-06,
      "loss": 0.0098,
      "step": 2535000
    },
    {
      "epoch": 4.1486158297493505,
      "grad_norm": 0.16613377630710602,
      "learning_rate": 1.7139882580075515e-06,
      "loss": 0.0094,
      "step": 2535020
    },
    {
      "epoch": 4.148648560188004,
      "grad_norm": 0.12757332623004913,
      "learning_rate": 1.7139223657940342e-06,
      "loss": 0.0107,
      "step": 2535040
    },
    {
      "epoch": 4.148681290626657,
      "grad_norm": 0.20999546349048615,
      "learning_rate": 1.7138564735805172e-06,
      "loss": 0.0113,
      "step": 2535060
    },
    {
      "epoch": 4.14871402106531,
      "grad_norm": 0.5144140124320984,
      "learning_rate": 1.713790581367e-06,
      "loss": 0.0099,
      "step": 2535080
    },
    {
      "epoch": 4.148746751503964,
      "grad_norm": 0.16598881781101227,
      "learning_rate": 1.7137246891534829e-06,
      "loss": 0.0119,
      "step": 2535100
    },
    {
      "epoch": 4.148779481942617,
      "grad_norm": 0.1937849074602127,
      "learning_rate": 1.7136587969399656e-06,
      "loss": 0.0095,
      "step": 2535120
    },
    {
      "epoch": 4.14881221238127,
      "grad_norm": 0.057925980538129807,
      "learning_rate": 1.7135929047264486e-06,
      "loss": 0.0107,
      "step": 2535140
    },
    {
      "epoch": 4.148844942819924,
      "grad_norm": 0.17119169235229492,
      "learning_rate": 1.7135270125129313e-06,
      "loss": 0.0111,
      "step": 2535160
    },
    {
      "epoch": 4.148877673258577,
      "grad_norm": 0.10127609223127365,
      "learning_rate": 1.7134611202994145e-06,
      "loss": 0.0065,
      "step": 2535180
    },
    {
      "epoch": 4.14891040369723,
      "grad_norm": 0.33789247274398804,
      "learning_rate": 1.7133952280858972e-06,
      "loss": 0.0071,
      "step": 2535200
    },
    {
      "epoch": 4.148943134135884,
      "grad_norm": 0.3578639328479767,
      "learning_rate": 1.7133293358723802e-06,
      "loss": 0.0065,
      "step": 2535220
    },
    {
      "epoch": 4.148975864574537,
      "grad_norm": 0.22739657759666443,
      "learning_rate": 1.713263443658863e-06,
      "loss": 0.0072,
      "step": 2535240
    },
    {
      "epoch": 4.14900859501319,
      "grad_norm": 0.19174110889434814,
      "learning_rate": 1.7131975514453459e-06,
      "loss": 0.0084,
      "step": 2535260
    },
    {
      "epoch": 4.149041325451844,
      "grad_norm": 0.35504257678985596,
      "learning_rate": 1.7131316592318286e-06,
      "loss": 0.0101,
      "step": 2535280
    },
    {
      "epoch": 4.149074055890497,
      "grad_norm": 0.39924106001853943,
      "learning_rate": 1.7130657670183116e-06,
      "loss": 0.0154,
      "step": 2535300
    },
    {
      "epoch": 4.14910678632915,
      "grad_norm": 0.5294577479362488,
      "learning_rate": 1.7129998748047943e-06,
      "loss": 0.0073,
      "step": 2535320
    },
    {
      "epoch": 4.149139516767804,
      "grad_norm": 0.1801338940858841,
      "learning_rate": 1.7129339825912775e-06,
      "loss": 0.0092,
      "step": 2535340
    },
    {
      "epoch": 4.149172247206457,
      "grad_norm": 0.45752596855163574,
      "learning_rate": 1.7128680903777604e-06,
      "loss": 0.0101,
      "step": 2535360
    },
    {
      "epoch": 4.149204977645111,
      "grad_norm": 0.4278584420681,
      "learning_rate": 1.7128021981642431e-06,
      "loss": 0.011,
      "step": 2535380
    },
    {
      "epoch": 4.1492377080837635,
      "grad_norm": 0.5595760941505432,
      "learning_rate": 1.7127363059507259e-06,
      "loss": 0.0117,
      "step": 2535400
    },
    {
      "epoch": 4.149270438522417,
      "grad_norm": 0.22203429043293,
      "learning_rate": 1.7126704137372088e-06,
      "loss": 0.0107,
      "step": 2535420
    },
    {
      "epoch": 4.149303168961071,
      "grad_norm": 0.44479504227638245,
      "learning_rate": 1.7126045215236916e-06,
      "loss": 0.0094,
      "step": 2535440
    },
    {
      "epoch": 4.149335899399723,
      "grad_norm": 0.1148100346326828,
      "learning_rate": 1.7125386293101745e-06,
      "loss": 0.0109,
      "step": 2535460
    },
    {
      "epoch": 4.149368629838377,
      "grad_norm": 0.31942567229270935,
      "learning_rate": 1.7124727370966573e-06,
      "loss": 0.012,
      "step": 2535480
    },
    {
      "epoch": 4.1494013602770305,
      "grad_norm": 0.1860371232032776,
      "learning_rate": 1.7124068448831402e-06,
      "loss": 0.0081,
      "step": 2535500
    },
    {
      "epoch": 4.149434090715684,
      "grad_norm": 0.6536728143692017,
      "learning_rate": 1.7123409526696234e-06,
      "loss": 0.012,
      "step": 2535520
    },
    {
      "epoch": 4.149466821154337,
      "grad_norm": 0.09976876527070999,
      "learning_rate": 1.7122750604561061e-06,
      "loss": 0.0126,
      "step": 2535540
    },
    {
      "epoch": 4.14949955159299,
      "grad_norm": 0.12290062010288239,
      "learning_rate": 1.712209168242589e-06,
      "loss": 0.0087,
      "step": 2535560
    },
    {
      "epoch": 4.149532282031644,
      "grad_norm": 0.5703703761100769,
      "learning_rate": 1.7121432760290718e-06,
      "loss": 0.0074,
      "step": 2535580
    },
    {
      "epoch": 4.149565012470297,
      "grad_norm": 0.3127206861972809,
      "learning_rate": 1.7120773838155548e-06,
      "loss": 0.0089,
      "step": 2535600
    },
    {
      "epoch": 4.14959774290895,
      "grad_norm": 0.27262142300605774,
      "learning_rate": 1.7120114916020375e-06,
      "loss": 0.009,
      "step": 2535620
    },
    {
      "epoch": 4.149630473347604,
      "grad_norm": 0.16048702597618103,
      "learning_rate": 1.7119455993885202e-06,
      "loss": 0.0049,
      "step": 2535640
    },
    {
      "epoch": 4.1496632037862575,
      "grad_norm": 0.3151363432407379,
      "learning_rate": 1.7118797071750032e-06,
      "loss": 0.0092,
      "step": 2535660
    },
    {
      "epoch": 4.14969593422491,
      "grad_norm": 0.1816035658121109,
      "learning_rate": 1.711813814961486e-06,
      "loss": 0.0089,
      "step": 2535680
    },
    {
      "epoch": 4.149728664663564,
      "grad_norm": 0.1375182420015335,
      "learning_rate": 1.711747922747969e-06,
      "loss": 0.0089,
      "step": 2535700
    },
    {
      "epoch": 4.149761395102217,
      "grad_norm": 0.2882782518863678,
      "learning_rate": 1.711682030534452e-06,
      "loss": 0.013,
      "step": 2535720
    },
    {
      "epoch": 4.14979412554087,
      "grad_norm": 0.310387521982193,
      "learning_rate": 1.7116161383209348e-06,
      "loss": 0.0064,
      "step": 2535740
    },
    {
      "epoch": 4.149826855979524,
      "grad_norm": 0.07139623910188675,
      "learning_rate": 1.7115502461074177e-06,
      "loss": 0.0088,
      "step": 2535760
    },
    {
      "epoch": 4.149859586418177,
      "grad_norm": 0.09661075472831726,
      "learning_rate": 1.7114843538939005e-06,
      "loss": 0.0092,
      "step": 2535780
    },
    {
      "epoch": 4.149892316856831,
      "grad_norm": 0.3342743217945099,
      "learning_rate": 1.7114184616803834e-06,
      "loss": 0.0107,
      "step": 2535800
    },
    {
      "epoch": 4.149925047295484,
      "grad_norm": 0.27713674306869507,
      "learning_rate": 1.7113525694668662e-06,
      "loss": 0.0092,
      "step": 2535820
    },
    {
      "epoch": 4.149957777734137,
      "grad_norm": 0.2776215374469757,
      "learning_rate": 1.711286677253349e-06,
      "loss": 0.0061,
      "step": 2535840
    },
    {
      "epoch": 4.149990508172791,
      "grad_norm": 0.2435033768415451,
      "learning_rate": 1.7112207850398319e-06,
      "loss": 0.0098,
      "step": 2535860
    },
    {
      "epoch": 4.1500232386114435,
      "grad_norm": 0.2196788489818573,
      "learning_rate": 1.711154892826315e-06,
      "loss": 0.0075,
      "step": 2535880
    },
    {
      "epoch": 4.150055969050097,
      "grad_norm": 0.42180976271629333,
      "learning_rate": 1.7110890006127978e-06,
      "loss": 0.0107,
      "step": 2535900
    },
    {
      "epoch": 4.150088699488751,
      "grad_norm": 0.1165786013007164,
      "learning_rate": 1.7110231083992807e-06,
      "loss": 0.0087,
      "step": 2535920
    },
    {
      "epoch": 4.150121429927404,
      "grad_norm": 0.2055443674325943,
      "learning_rate": 1.7109572161857635e-06,
      "loss": 0.0113,
      "step": 2535940
    },
    {
      "epoch": 4.150154160366057,
      "grad_norm": 0.2641375958919525,
      "learning_rate": 1.7108913239722464e-06,
      "loss": 0.012,
      "step": 2535960
    },
    {
      "epoch": 4.150186890804711,
      "grad_norm": 0.18814335763454437,
      "learning_rate": 1.7108254317587292e-06,
      "loss": 0.0118,
      "step": 2535980
    },
    {
      "epoch": 4.150219621243364,
      "grad_norm": 0.3773961067199707,
      "learning_rate": 1.710759539545212e-06,
      "loss": 0.0134,
      "step": 2536000
    },
    {
      "epoch": 4.150252351682017,
      "grad_norm": 0.34074217081069946,
      "learning_rate": 1.7106936473316948e-06,
      "loss": 0.008,
      "step": 2536020
    },
    {
      "epoch": 4.1502850821206705,
      "grad_norm": 0.35948577523231506,
      "learning_rate": 1.7106277551181778e-06,
      "loss": 0.0096,
      "step": 2536040
    },
    {
      "epoch": 4.150317812559324,
      "grad_norm": 0.3107788562774658,
      "learning_rate": 1.7105618629046607e-06,
      "loss": 0.0107,
      "step": 2536060
    },
    {
      "epoch": 4.150350542997978,
      "grad_norm": 0.5646107792854309,
      "learning_rate": 1.7104959706911437e-06,
      "loss": 0.009,
      "step": 2536080
    },
    {
      "epoch": 4.15038327343663,
      "grad_norm": 0.28007540106773376,
      "learning_rate": 1.7104300784776264e-06,
      "loss": 0.0117,
      "step": 2536100
    },
    {
      "epoch": 4.150416003875284,
      "grad_norm": 0.4767910838127136,
      "learning_rate": 1.7103641862641094e-06,
      "loss": 0.0097,
      "step": 2536120
    },
    {
      "epoch": 4.150448734313938,
      "grad_norm": 0.08391114324331284,
      "learning_rate": 1.7102982940505921e-06,
      "loss": 0.0092,
      "step": 2536140
    },
    {
      "epoch": 4.15048146475259,
      "grad_norm": 0.1819237619638443,
      "learning_rate": 1.710232401837075e-06,
      "loss": 0.0109,
      "step": 2536160
    },
    {
      "epoch": 4.150514195191244,
      "grad_norm": 0.38996103405952454,
      "learning_rate": 1.7101665096235578e-06,
      "loss": 0.0064,
      "step": 2536180
    },
    {
      "epoch": 4.1505469256298975,
      "grad_norm": 0.059793271124362946,
      "learning_rate": 1.7101006174100408e-06,
      "loss": 0.0086,
      "step": 2536200
    },
    {
      "epoch": 4.150579656068551,
      "grad_norm": 0.3581989109516144,
      "learning_rate": 1.7100347251965237e-06,
      "loss": 0.008,
      "step": 2536220
    },
    {
      "epoch": 4.150612386507204,
      "grad_norm": 0.2677951753139496,
      "learning_rate": 1.7099688329830067e-06,
      "loss": 0.0128,
      "step": 2536240
    },
    {
      "epoch": 4.150645116945857,
      "grad_norm": 0.25700119137763977,
      "learning_rate": 1.7099029407694894e-06,
      "loss": 0.0077,
      "step": 2536260
    },
    {
      "epoch": 4.150677847384511,
      "grad_norm": 0.04800308123230934,
      "learning_rate": 1.7098370485559724e-06,
      "loss": 0.0098,
      "step": 2536280
    },
    {
      "epoch": 4.150710577823164,
      "grad_norm": 0.16133925318717957,
      "learning_rate": 1.709771156342455e-06,
      "loss": 0.0075,
      "step": 2536300
    },
    {
      "epoch": 4.150743308261817,
      "grad_norm": 0.11074210703372955,
      "learning_rate": 1.709705264128938e-06,
      "loss": 0.0086,
      "step": 2536320
    },
    {
      "epoch": 4.150776038700471,
      "grad_norm": 0.2923094928264618,
      "learning_rate": 1.7096393719154208e-06,
      "loss": 0.0083,
      "step": 2536340
    },
    {
      "epoch": 4.1508087691391236,
      "grad_norm": 0.7762713432312012,
      "learning_rate": 1.7095734797019037e-06,
      "loss": 0.0129,
      "step": 2536360
    },
    {
      "epoch": 4.150841499577777,
      "grad_norm": 0.41571739315986633,
      "learning_rate": 1.7095075874883865e-06,
      "loss": 0.0083,
      "step": 2536380
    },
    {
      "epoch": 4.150874230016431,
      "grad_norm": 0.415812611579895,
      "learning_rate": 1.7094416952748696e-06,
      "loss": 0.0088,
      "step": 2536400
    },
    {
      "epoch": 4.150906960455084,
      "grad_norm": 0.4519861936569214,
      "learning_rate": 1.7093758030613524e-06,
      "loss": 0.0103,
      "step": 2536420
    },
    {
      "epoch": 4.150939690893737,
      "grad_norm": 0.5302689671516418,
      "learning_rate": 1.7093099108478353e-06,
      "loss": 0.0144,
      "step": 2536440
    },
    {
      "epoch": 4.150972421332391,
      "grad_norm": 0.1549912989139557,
      "learning_rate": 1.709244018634318e-06,
      "loss": 0.0078,
      "step": 2536460
    },
    {
      "epoch": 4.151005151771044,
      "grad_norm": 0.07952745258808136,
      "learning_rate": 1.709178126420801e-06,
      "loss": 0.006,
      "step": 2536480
    },
    {
      "epoch": 4.151037882209698,
      "grad_norm": 0.33150872588157654,
      "learning_rate": 1.7091122342072838e-06,
      "loss": 0.008,
      "step": 2536500
    },
    {
      "epoch": 4.1510706126483505,
      "grad_norm": 0.2842226028442383,
      "learning_rate": 1.7090463419937667e-06,
      "loss": 0.0102,
      "step": 2536520
    },
    {
      "epoch": 4.151103343087004,
      "grad_norm": 0.49589282274246216,
      "learning_rate": 1.7089804497802495e-06,
      "loss": 0.0093,
      "step": 2536540
    },
    {
      "epoch": 4.151136073525658,
      "grad_norm": 0.2049710601568222,
      "learning_rate": 1.7089145575667324e-06,
      "loss": 0.0092,
      "step": 2536560
    },
    {
      "epoch": 4.15116880396431,
      "grad_norm": 0.31277939677238464,
      "learning_rate": 1.7088486653532156e-06,
      "loss": 0.0093,
      "step": 2536580
    },
    {
      "epoch": 4.151201534402964,
      "grad_norm": 0.4952768385410309,
      "learning_rate": 1.7087827731396983e-06,
      "loss": 0.0052,
      "step": 2536600
    },
    {
      "epoch": 4.151234264841618,
      "grad_norm": 0.7138606309890747,
      "learning_rate": 1.708716880926181e-06,
      "loss": 0.0065,
      "step": 2536620
    },
    {
      "epoch": 4.15126699528027,
      "grad_norm": 0.03679461404681206,
      "learning_rate": 1.708650988712664e-06,
      "loss": 0.0069,
      "step": 2536640
    },
    {
      "epoch": 4.151299725718924,
      "grad_norm": 0.09049321711063385,
      "learning_rate": 1.7085850964991467e-06,
      "loss": 0.008,
      "step": 2536660
    },
    {
      "epoch": 4.1513324561575775,
      "grad_norm": 0.12835822999477386,
      "learning_rate": 1.7085192042856297e-06,
      "loss": 0.0137,
      "step": 2536680
    },
    {
      "epoch": 4.151365186596231,
      "grad_norm": 0.213968425989151,
      "learning_rate": 1.7084533120721124e-06,
      "loss": 0.0092,
      "step": 2536700
    },
    {
      "epoch": 4.151397917034884,
      "grad_norm": 0.09437903016805649,
      "learning_rate": 1.7083874198585954e-06,
      "loss": 0.0092,
      "step": 2536720
    },
    {
      "epoch": 4.151430647473537,
      "grad_norm": 0.09872827678918839,
      "learning_rate": 1.7083215276450781e-06,
      "loss": 0.0127,
      "step": 2536740
    },
    {
      "epoch": 4.151463377912191,
      "grad_norm": 0.23121857643127441,
      "learning_rate": 1.7082556354315613e-06,
      "loss": 0.0083,
      "step": 2536760
    },
    {
      "epoch": 4.151496108350844,
      "grad_norm": 0.0959576666355133,
      "learning_rate": 1.7081897432180442e-06,
      "loss": 0.0071,
      "step": 2536780
    },
    {
      "epoch": 4.151528838789497,
      "grad_norm": 0.2548765540122986,
      "learning_rate": 1.708123851004527e-06,
      "loss": 0.0066,
      "step": 2536800
    },
    {
      "epoch": 4.151561569228151,
      "grad_norm": 0.08710701763629913,
      "learning_rate": 1.7080579587910097e-06,
      "loss": 0.0062,
      "step": 2536820
    },
    {
      "epoch": 4.1515942996668045,
      "grad_norm": 0.372188925743103,
      "learning_rate": 1.7079920665774927e-06,
      "loss": 0.0079,
      "step": 2536840
    },
    {
      "epoch": 4.151627030105457,
      "grad_norm": 0.18563567101955414,
      "learning_rate": 1.7079261743639754e-06,
      "loss": 0.008,
      "step": 2536860
    },
    {
      "epoch": 4.151659760544111,
      "grad_norm": 0.15816880762577057,
      "learning_rate": 1.7078602821504584e-06,
      "loss": 0.0074,
      "step": 2536880
    },
    {
      "epoch": 4.151692490982764,
      "grad_norm": 0.37546563148498535,
      "learning_rate": 1.7077943899369411e-06,
      "loss": 0.0115,
      "step": 2536900
    },
    {
      "epoch": 4.151725221421417,
      "grad_norm": 0.18738579750061035,
      "learning_rate": 1.707728497723424e-06,
      "loss": 0.0048,
      "step": 2536920
    },
    {
      "epoch": 4.151757951860071,
      "grad_norm": 0.1577771008014679,
      "learning_rate": 1.7076626055099072e-06,
      "loss": 0.009,
      "step": 2536940
    },
    {
      "epoch": 4.151790682298724,
      "grad_norm": 0.08474797010421753,
      "learning_rate": 1.70759671329639e-06,
      "loss": 0.0072,
      "step": 2536960
    },
    {
      "epoch": 4.151823412737378,
      "grad_norm": 0.6032292246818542,
      "learning_rate": 1.707530821082873e-06,
      "loss": 0.0185,
      "step": 2536980
    },
    {
      "epoch": 4.151856143176031,
      "grad_norm": 0.298116534948349,
      "learning_rate": 1.7074649288693557e-06,
      "loss": 0.0142,
      "step": 2537000
    },
    {
      "epoch": 4.151888873614684,
      "grad_norm": 0.2678591012954712,
      "learning_rate": 1.7073990366558386e-06,
      "loss": 0.0097,
      "step": 2537020
    },
    {
      "epoch": 4.151921604053338,
      "grad_norm": 0.18162904679775238,
      "learning_rate": 1.7073331444423213e-06,
      "loss": 0.0072,
      "step": 2537040
    },
    {
      "epoch": 4.1519543344919905,
      "grad_norm": 0.07353945076465607,
      "learning_rate": 1.707267252228804e-06,
      "loss": 0.0062,
      "step": 2537060
    },
    {
      "epoch": 4.151987064930644,
      "grad_norm": 0.2858079671859741,
      "learning_rate": 1.707201360015287e-06,
      "loss": 0.0094,
      "step": 2537080
    },
    {
      "epoch": 4.152019795369298,
      "grad_norm": 0.18517829477787018,
      "learning_rate": 1.7071354678017702e-06,
      "loss": 0.012,
      "step": 2537100
    },
    {
      "epoch": 4.152052525807951,
      "grad_norm": 0.21711906790733337,
      "learning_rate": 1.707069575588253e-06,
      "loss": 0.0109,
      "step": 2537120
    },
    {
      "epoch": 4.152085256246604,
      "grad_norm": 0.17863835394382477,
      "learning_rate": 1.7070036833747359e-06,
      "loss": 0.007,
      "step": 2537140
    },
    {
      "epoch": 4.1521179866852576,
      "grad_norm": 0.17315857112407684,
      "learning_rate": 1.7069377911612186e-06,
      "loss": 0.0112,
      "step": 2537160
    },
    {
      "epoch": 4.152150717123911,
      "grad_norm": 0.3339942991733551,
      "learning_rate": 1.7068718989477016e-06,
      "loss": 0.0122,
      "step": 2537180
    },
    {
      "epoch": 4.152183447562564,
      "grad_norm": 0.07387083768844604,
      "learning_rate": 1.7068060067341843e-06,
      "loss": 0.0097,
      "step": 2537200
    },
    {
      "epoch": 4.1522161780012175,
      "grad_norm": 0.16069628298282623,
      "learning_rate": 1.7067401145206673e-06,
      "loss": 0.008,
      "step": 2537220
    },
    {
      "epoch": 4.152248908439871,
      "grad_norm": 0.10038493573665619,
      "learning_rate": 1.70667422230715e-06,
      "loss": 0.0102,
      "step": 2537240
    },
    {
      "epoch": 4.152281638878525,
      "grad_norm": 0.05162005126476288,
      "learning_rate": 1.706608330093633e-06,
      "loss": 0.0069,
      "step": 2537260
    },
    {
      "epoch": 4.152314369317177,
      "grad_norm": 0.4445277750492096,
      "learning_rate": 1.706542437880116e-06,
      "loss": 0.0087,
      "step": 2537280
    },
    {
      "epoch": 4.152347099755831,
      "grad_norm": 0.09640637785196304,
      "learning_rate": 1.7064765456665989e-06,
      "loss": 0.0062,
      "step": 2537300
    },
    {
      "epoch": 4.1523798301944845,
      "grad_norm": 0.1966915726661682,
      "learning_rate": 1.7064106534530816e-06,
      "loss": 0.0141,
      "step": 2537320
    },
    {
      "epoch": 4.152412560633137,
      "grad_norm": 0.050466448068618774,
      "learning_rate": 1.7063447612395646e-06,
      "loss": 0.0066,
      "step": 2537340
    },
    {
      "epoch": 4.152445291071791,
      "grad_norm": 0.2718709707260132,
      "learning_rate": 1.7062788690260473e-06,
      "loss": 0.0092,
      "step": 2537360
    },
    {
      "epoch": 4.152478021510444,
      "grad_norm": 0.15653365850448608,
      "learning_rate": 1.7062129768125303e-06,
      "loss": 0.0079,
      "step": 2537380
    },
    {
      "epoch": 4.152510751949098,
      "grad_norm": 0.2420295774936676,
      "learning_rate": 1.706147084599013e-06,
      "loss": 0.0145,
      "step": 2537400
    },
    {
      "epoch": 4.152543482387751,
      "grad_norm": 0.23797768354415894,
      "learning_rate": 1.706081192385496e-06,
      "loss": 0.009,
      "step": 2537420
    },
    {
      "epoch": 4.152576212826404,
      "grad_norm": 0.4062839448451996,
      "learning_rate": 1.7060153001719787e-06,
      "loss": 0.0095,
      "step": 2537440
    },
    {
      "epoch": 4.152608943265058,
      "grad_norm": 0.4182264804840088,
      "learning_rate": 1.7059494079584618e-06,
      "loss": 0.0126,
      "step": 2537460
    },
    {
      "epoch": 4.152641673703711,
      "grad_norm": 0.5586190223693848,
      "learning_rate": 1.7058835157449446e-06,
      "loss": 0.0087,
      "step": 2537480
    },
    {
      "epoch": 4.152674404142364,
      "grad_norm": 0.41576194763183594,
      "learning_rate": 1.7058176235314275e-06,
      "loss": 0.0109,
      "step": 2537500
    },
    {
      "epoch": 4.152707134581018,
      "grad_norm": 0.4011673033237457,
      "learning_rate": 1.7057517313179103e-06,
      "loss": 0.0078,
      "step": 2537520
    },
    {
      "epoch": 4.152739865019671,
      "grad_norm": 0.1646193563938141,
      "learning_rate": 1.7056858391043932e-06,
      "loss": 0.0074,
      "step": 2537540
    },
    {
      "epoch": 4.152772595458324,
      "grad_norm": 0.3861582279205322,
      "learning_rate": 1.705619946890876e-06,
      "loss": 0.0067,
      "step": 2537560
    },
    {
      "epoch": 4.152805325896978,
      "grad_norm": 0.3010480999946594,
      "learning_rate": 1.705554054677359e-06,
      "loss": 0.0103,
      "step": 2537580
    },
    {
      "epoch": 4.152838056335631,
      "grad_norm": 0.2070438116788864,
      "learning_rate": 1.7054881624638417e-06,
      "loss": 0.0102,
      "step": 2537600
    },
    {
      "epoch": 4.152870786774284,
      "grad_norm": 0.20068423449993134,
      "learning_rate": 1.7054222702503246e-06,
      "loss": 0.0062,
      "step": 2537620
    },
    {
      "epoch": 4.152903517212938,
      "grad_norm": 0.19516129791736603,
      "learning_rate": 1.7053563780368076e-06,
      "loss": 0.0106,
      "step": 2537640
    },
    {
      "epoch": 4.152936247651591,
      "grad_norm": 0.2110883742570877,
      "learning_rate": 1.7052904858232905e-06,
      "loss": 0.0095,
      "step": 2537660
    },
    {
      "epoch": 4.152968978090245,
      "grad_norm": 0.3876383304595947,
      "learning_rate": 1.7052245936097733e-06,
      "loss": 0.0103,
      "step": 2537680
    },
    {
      "epoch": 4.1530017085288975,
      "grad_norm": 0.20309337973594666,
      "learning_rate": 1.7051587013962562e-06,
      "loss": 0.0101,
      "step": 2537700
    },
    {
      "epoch": 4.153034438967551,
      "grad_norm": 0.21516400575637817,
      "learning_rate": 1.705092809182739e-06,
      "loss": 0.009,
      "step": 2537720
    },
    {
      "epoch": 4.153067169406205,
      "grad_norm": 0.06240496784448624,
      "learning_rate": 1.705026916969222e-06,
      "loss": 0.0081,
      "step": 2537740
    },
    {
      "epoch": 4.153099899844857,
      "grad_norm": 0.32451605796813965,
      "learning_rate": 1.7049610247557046e-06,
      "loss": 0.009,
      "step": 2537760
    },
    {
      "epoch": 4.153132630283511,
      "grad_norm": 0.11104820668697357,
      "learning_rate": 1.7048951325421876e-06,
      "loss": 0.0082,
      "step": 2537780
    },
    {
      "epoch": 4.153165360722165,
      "grad_norm": 0.11870696395635605,
      "learning_rate": 1.7048292403286703e-06,
      "loss": 0.009,
      "step": 2537800
    },
    {
      "epoch": 4.153198091160817,
      "grad_norm": 0.1831192821264267,
      "learning_rate": 1.7047633481151535e-06,
      "loss": 0.0097,
      "step": 2537820
    },
    {
      "epoch": 4.153230821599471,
      "grad_norm": 0.21519505977630615,
      "learning_rate": 1.7046974559016362e-06,
      "loss": 0.0074,
      "step": 2537840
    },
    {
      "epoch": 4.1532635520381245,
      "grad_norm": 0.15502123534679413,
      "learning_rate": 1.7046315636881192e-06,
      "loss": 0.0128,
      "step": 2537860
    },
    {
      "epoch": 4.153296282476778,
      "grad_norm": 0.26211923360824585,
      "learning_rate": 1.704565671474602e-06,
      "loss": 0.014,
      "step": 2537880
    },
    {
      "epoch": 4.153329012915431,
      "grad_norm": 0.24107369780540466,
      "learning_rate": 1.7044997792610849e-06,
      "loss": 0.0091,
      "step": 2537900
    },
    {
      "epoch": 4.153361743354084,
      "grad_norm": 0.16349026560783386,
      "learning_rate": 1.7044338870475676e-06,
      "loss": 0.011,
      "step": 2537920
    },
    {
      "epoch": 4.153394473792738,
      "grad_norm": 0.4337565004825592,
      "learning_rate": 1.7043679948340506e-06,
      "loss": 0.0093,
      "step": 2537940
    },
    {
      "epoch": 4.153427204231392,
      "grad_norm": 0.2864252030849457,
      "learning_rate": 1.7043021026205333e-06,
      "loss": 0.0131,
      "step": 2537960
    },
    {
      "epoch": 4.153459934670044,
      "grad_norm": 0.09604833275079727,
      "learning_rate": 1.7042362104070165e-06,
      "loss": 0.0074,
      "step": 2537980
    },
    {
      "epoch": 4.153492665108698,
      "grad_norm": 0.13049350678920746,
      "learning_rate": 1.7041703181934994e-06,
      "loss": 0.0093,
      "step": 2538000
    },
    {
      "epoch": 4.1535253955473515,
      "grad_norm": 0.8543447256088257,
      "learning_rate": 1.7041044259799822e-06,
      "loss": 0.0095,
      "step": 2538020
    },
    {
      "epoch": 4.153558125986004,
      "grad_norm": 0.19737328588962555,
      "learning_rate": 1.704038533766465e-06,
      "loss": 0.0089,
      "step": 2538040
    },
    {
      "epoch": 4.153590856424658,
      "grad_norm": 0.09815342724323273,
      "learning_rate": 1.7039726415529478e-06,
      "loss": 0.012,
      "step": 2538060
    },
    {
      "epoch": 4.153623586863311,
      "grad_norm": 0.2255447506904602,
      "learning_rate": 1.7039067493394306e-06,
      "loss": 0.0118,
      "step": 2538080
    },
    {
      "epoch": 4.153656317301964,
      "grad_norm": 0.17005060613155365,
      "learning_rate": 1.7038408571259135e-06,
      "loss": 0.0093,
      "step": 2538100
    },
    {
      "epoch": 4.153689047740618,
      "grad_norm": 0.12880729138851166,
      "learning_rate": 1.7037749649123963e-06,
      "loss": 0.0111,
      "step": 2538120
    },
    {
      "epoch": 4.153721778179271,
      "grad_norm": 0.10515792667865753,
      "learning_rate": 1.7037090726988792e-06,
      "loss": 0.0066,
      "step": 2538140
    },
    {
      "epoch": 4.153754508617925,
      "grad_norm": 0.22907395660877228,
      "learning_rate": 1.7036431804853624e-06,
      "loss": 0.0112,
      "step": 2538160
    },
    {
      "epoch": 4.1537872390565775,
      "grad_norm": 0.27592405676841736,
      "learning_rate": 1.7035772882718451e-06,
      "loss": 0.0118,
      "step": 2538180
    },
    {
      "epoch": 4.153819969495231,
      "grad_norm": 0.250262975692749,
      "learning_rate": 1.703511396058328e-06,
      "loss": 0.0077,
      "step": 2538200
    },
    {
      "epoch": 4.153852699933885,
      "grad_norm": 0.7860801815986633,
      "learning_rate": 1.7034455038448108e-06,
      "loss": 0.0074,
      "step": 2538220
    },
    {
      "epoch": 4.153885430372537,
      "grad_norm": 0.33750516176223755,
      "learning_rate": 1.7033796116312938e-06,
      "loss": 0.0136,
      "step": 2538240
    },
    {
      "epoch": 4.153918160811191,
      "grad_norm": 0.14598849415779114,
      "learning_rate": 1.7033137194177765e-06,
      "loss": 0.0073,
      "step": 2538260
    },
    {
      "epoch": 4.153950891249845,
      "grad_norm": 0.32972070574760437,
      "learning_rate": 1.7032478272042593e-06,
      "loss": 0.0123,
      "step": 2538280
    },
    {
      "epoch": 4.153983621688498,
      "grad_norm": 0.18834756314754486,
      "learning_rate": 1.7031819349907422e-06,
      "loss": 0.0088,
      "step": 2538300
    },
    {
      "epoch": 4.154016352127151,
      "grad_norm": 0.10562600940465927,
      "learning_rate": 1.703116042777225e-06,
      "loss": 0.0146,
      "step": 2538320
    },
    {
      "epoch": 4.1540490825658045,
      "grad_norm": 0.1416858583688736,
      "learning_rate": 1.7030501505637081e-06,
      "loss": 0.0062,
      "step": 2538340
    },
    {
      "epoch": 4.154081813004458,
      "grad_norm": 0.46110841631889343,
      "learning_rate": 1.702984258350191e-06,
      "loss": 0.0118,
      "step": 2538360
    },
    {
      "epoch": 4.154114543443111,
      "grad_norm": 0.36043182015419006,
      "learning_rate": 1.7029183661366738e-06,
      "loss": 0.007,
      "step": 2538380
    },
    {
      "epoch": 4.154147273881764,
      "grad_norm": 0.10554535686969757,
      "learning_rate": 1.7028524739231568e-06,
      "loss": 0.0119,
      "step": 2538400
    },
    {
      "epoch": 4.154180004320418,
      "grad_norm": 0.22259287536144257,
      "learning_rate": 1.7027865817096395e-06,
      "loss": 0.0109,
      "step": 2538420
    },
    {
      "epoch": 4.154212734759072,
      "grad_norm": 0.3554295599460602,
      "learning_rate": 1.7027206894961224e-06,
      "loss": 0.0089,
      "step": 2538440
    },
    {
      "epoch": 4.154245465197724,
      "grad_norm": 0.42228907346725464,
      "learning_rate": 1.7026547972826052e-06,
      "loss": 0.0092,
      "step": 2538460
    },
    {
      "epoch": 4.154278195636378,
      "grad_norm": 0.13268879055976868,
      "learning_rate": 1.702588905069088e-06,
      "loss": 0.0119,
      "step": 2538480
    },
    {
      "epoch": 4.1543109260750315,
      "grad_norm": 0.26021239161491394,
      "learning_rate": 1.7025230128555709e-06,
      "loss": 0.0079,
      "step": 2538500
    },
    {
      "epoch": 4.154343656513684,
      "grad_norm": 0.48151499032974243,
      "learning_rate": 1.702457120642054e-06,
      "loss": 0.0106,
      "step": 2538520
    },
    {
      "epoch": 4.154376386952338,
      "grad_norm": 0.2632114589214325,
      "learning_rate": 1.7023912284285368e-06,
      "loss": 0.0123,
      "step": 2538540
    },
    {
      "epoch": 4.154409117390991,
      "grad_norm": 0.2529422342777252,
      "learning_rate": 1.7023253362150197e-06,
      "loss": 0.0094,
      "step": 2538560
    },
    {
      "epoch": 4.154441847829645,
      "grad_norm": 0.18233342468738556,
      "learning_rate": 1.7022594440015025e-06,
      "loss": 0.0067,
      "step": 2538580
    },
    {
      "epoch": 4.154474578268298,
      "grad_norm": 0.05364852771162987,
      "learning_rate": 1.7021935517879854e-06,
      "loss": 0.0063,
      "step": 2538600
    },
    {
      "epoch": 4.154507308706951,
      "grad_norm": 0.09006532281637192,
      "learning_rate": 1.7021276595744682e-06,
      "loss": 0.0098,
      "step": 2538620
    },
    {
      "epoch": 4.154540039145605,
      "grad_norm": 0.26395946741104126,
      "learning_rate": 1.7020617673609511e-06,
      "loss": 0.0086,
      "step": 2538640
    },
    {
      "epoch": 4.154572769584258,
      "grad_norm": 0.4409533739089966,
      "learning_rate": 1.7019958751474339e-06,
      "loss": 0.0125,
      "step": 2538660
    },
    {
      "epoch": 4.154605500022911,
      "grad_norm": 0.4014309346675873,
      "learning_rate": 1.7019299829339168e-06,
      "loss": 0.009,
      "step": 2538680
    },
    {
      "epoch": 4.154638230461565,
      "grad_norm": 0.1952359974384308,
      "learning_rate": 1.7018640907203998e-06,
      "loss": 0.0123,
      "step": 2538700
    },
    {
      "epoch": 4.154670960900218,
      "grad_norm": 0.2720140814781189,
      "learning_rate": 1.7017981985068827e-06,
      "loss": 0.0085,
      "step": 2538720
    },
    {
      "epoch": 4.154703691338871,
      "grad_norm": 0.5180040597915649,
      "learning_rate": 1.7017323062933654e-06,
      "loss": 0.0075,
      "step": 2538740
    },
    {
      "epoch": 4.154736421777525,
      "grad_norm": 0.1850624531507492,
      "learning_rate": 1.7016664140798484e-06,
      "loss": 0.0065,
      "step": 2538760
    },
    {
      "epoch": 4.154769152216178,
      "grad_norm": 0.18897320330142975,
      "learning_rate": 1.7016005218663311e-06,
      "loss": 0.0107,
      "step": 2538780
    },
    {
      "epoch": 4.154801882654831,
      "grad_norm": 0.07186294347047806,
      "learning_rate": 1.701534629652814e-06,
      "loss": 0.0153,
      "step": 2538800
    },
    {
      "epoch": 4.154834613093485,
      "grad_norm": 0.308256596326828,
      "learning_rate": 1.7014687374392968e-06,
      "loss": 0.0064,
      "step": 2538820
    },
    {
      "epoch": 4.154867343532138,
      "grad_norm": 0.22062617540359497,
      "learning_rate": 1.7014028452257798e-06,
      "loss": 0.0109,
      "step": 2538840
    },
    {
      "epoch": 4.154900073970792,
      "grad_norm": 0.5108299851417542,
      "learning_rate": 1.7013369530122627e-06,
      "loss": 0.0092,
      "step": 2538860
    },
    {
      "epoch": 4.1549328044094445,
      "grad_norm": 0.15875521302223206,
      "learning_rate": 1.7012710607987457e-06,
      "loss": 0.0093,
      "step": 2538880
    },
    {
      "epoch": 4.154965534848098,
      "grad_norm": 0.09985960274934769,
      "learning_rate": 1.7012051685852284e-06,
      "loss": 0.009,
      "step": 2538900
    },
    {
      "epoch": 4.154998265286752,
      "grad_norm": 0.10397925227880478,
      "learning_rate": 1.7011392763717114e-06,
      "loss": 0.0086,
      "step": 2538920
    },
    {
      "epoch": 4.155030995725404,
      "grad_norm": 0.5815580487251282,
      "learning_rate": 1.7010733841581941e-06,
      "loss": 0.0083,
      "step": 2538940
    },
    {
      "epoch": 4.155063726164058,
      "grad_norm": 0.13025550544261932,
      "learning_rate": 1.701007491944677e-06,
      "loss": 0.0059,
      "step": 2538960
    },
    {
      "epoch": 4.1550964566027115,
      "grad_norm": 0.14714832603931427,
      "learning_rate": 1.7009415997311598e-06,
      "loss": 0.0076,
      "step": 2538980
    },
    {
      "epoch": 4.155129187041365,
      "grad_norm": 0.3969508409500122,
      "learning_rate": 1.7008757075176428e-06,
      "loss": 0.0084,
      "step": 2539000
    },
    {
      "epoch": 4.155161917480018,
      "grad_norm": 0.6002926826477051,
      "learning_rate": 1.7008098153041255e-06,
      "loss": 0.0089,
      "step": 2539020
    },
    {
      "epoch": 4.155194647918671,
      "grad_norm": 0.3101322650909424,
      "learning_rate": 1.7007439230906087e-06,
      "loss": 0.01,
      "step": 2539040
    },
    {
      "epoch": 4.155227378357325,
      "grad_norm": 0.11184950172901154,
      "learning_rate": 1.7006780308770914e-06,
      "loss": 0.009,
      "step": 2539060
    },
    {
      "epoch": 4.155260108795978,
      "grad_norm": 0.2638698220252991,
      "learning_rate": 1.7006121386635744e-06,
      "loss": 0.0106,
      "step": 2539080
    },
    {
      "epoch": 4.155292839234631,
      "grad_norm": 0.19838090240955353,
      "learning_rate": 1.700546246450057e-06,
      "loss": 0.0092,
      "step": 2539100
    },
    {
      "epoch": 4.155325569673285,
      "grad_norm": 0.2264391928911209,
      "learning_rate": 1.70048035423654e-06,
      "loss": 0.0096,
      "step": 2539120
    },
    {
      "epoch": 4.1553583001119385,
      "grad_norm": 0.2427951544523239,
      "learning_rate": 1.7004144620230228e-06,
      "loss": 0.007,
      "step": 2539140
    },
    {
      "epoch": 4.155391030550591,
      "grad_norm": 0.2681577503681183,
      "learning_rate": 1.7003485698095057e-06,
      "loss": 0.0095,
      "step": 2539160
    },
    {
      "epoch": 4.155423760989245,
      "grad_norm": 0.18233045935630798,
      "learning_rate": 1.7002826775959885e-06,
      "loss": 0.0079,
      "step": 2539180
    },
    {
      "epoch": 4.155456491427898,
      "grad_norm": 0.44975796341896057,
      "learning_rate": 1.7002167853824714e-06,
      "loss": 0.0092,
      "step": 2539200
    },
    {
      "epoch": 4.155489221866551,
      "grad_norm": 0.13901859521865845,
      "learning_rate": 1.7001508931689546e-06,
      "loss": 0.0064,
      "step": 2539220
    },
    {
      "epoch": 4.155521952305205,
      "grad_norm": 0.22931301593780518,
      "learning_rate": 1.7000850009554373e-06,
      "loss": 0.0076,
      "step": 2539240
    },
    {
      "epoch": 4.155554682743858,
      "grad_norm": 0.35175278782844543,
      "learning_rate": 1.70001910874192e-06,
      "loss": 0.0065,
      "step": 2539260
    },
    {
      "epoch": 4.155587413182512,
      "grad_norm": 0.0853227972984314,
      "learning_rate": 1.699953216528403e-06,
      "loss": 0.0089,
      "step": 2539280
    },
    {
      "epoch": 4.155620143621165,
      "grad_norm": 0.1253298968076706,
      "learning_rate": 1.6998873243148858e-06,
      "loss": 0.0078,
      "step": 2539300
    },
    {
      "epoch": 4.155652874059818,
      "grad_norm": 0.15034079551696777,
      "learning_rate": 1.6998214321013687e-06,
      "loss": 0.0083,
      "step": 2539320
    },
    {
      "epoch": 4.155685604498472,
      "grad_norm": 0.2838810086250305,
      "learning_rate": 1.6997555398878515e-06,
      "loss": 0.0137,
      "step": 2539340
    },
    {
      "epoch": 4.1557183349371245,
      "grad_norm": 0.16275250911712646,
      "learning_rate": 1.6996896476743344e-06,
      "loss": 0.0078,
      "step": 2539360
    },
    {
      "epoch": 4.155751065375778,
      "grad_norm": 0.07428350299596786,
      "learning_rate": 1.6996237554608171e-06,
      "loss": 0.0094,
      "step": 2539380
    },
    {
      "epoch": 4.155783795814432,
      "grad_norm": 0.12799575924873352,
      "learning_rate": 1.6995578632473003e-06,
      "loss": 0.0095,
      "step": 2539400
    },
    {
      "epoch": 4.155816526253085,
      "grad_norm": 0.21364521980285645,
      "learning_rate": 1.6994919710337833e-06,
      "loss": 0.0089,
      "step": 2539420
    },
    {
      "epoch": 4.155849256691738,
      "grad_norm": 0.1677134782075882,
      "learning_rate": 1.699426078820266e-06,
      "loss": 0.0074,
      "step": 2539440
    },
    {
      "epoch": 4.155881987130392,
      "grad_norm": 0.17485298216342926,
      "learning_rate": 1.6993601866067487e-06,
      "loss": 0.0089,
      "step": 2539460
    },
    {
      "epoch": 4.155914717569045,
      "grad_norm": 0.13979977369308472,
      "learning_rate": 1.6992942943932317e-06,
      "loss": 0.0077,
      "step": 2539480
    },
    {
      "epoch": 4.155947448007698,
      "grad_norm": 0.08183443546295166,
      "learning_rate": 1.6992284021797144e-06,
      "loss": 0.012,
      "step": 2539500
    },
    {
      "epoch": 4.1559801784463515,
      "grad_norm": 0.6194332838058472,
      "learning_rate": 1.6991625099661974e-06,
      "loss": 0.0098,
      "step": 2539520
    },
    {
      "epoch": 4.156012908885005,
      "grad_norm": 0.14660696685314178,
      "learning_rate": 1.6990966177526801e-06,
      "loss": 0.0089,
      "step": 2539540
    },
    {
      "epoch": 4.156045639323658,
      "grad_norm": 0.3158745765686035,
      "learning_rate": 1.699030725539163e-06,
      "loss": 0.0066,
      "step": 2539560
    },
    {
      "epoch": 4.156078369762311,
      "grad_norm": 0.6192981600761414,
      "learning_rate": 1.6989648333256462e-06,
      "loss": 0.0105,
      "step": 2539580
    },
    {
      "epoch": 4.156111100200965,
      "grad_norm": 0.18747322261333466,
      "learning_rate": 1.698898941112129e-06,
      "loss": 0.0088,
      "step": 2539600
    },
    {
      "epoch": 4.156143830639619,
      "grad_norm": 0.6114217042922974,
      "learning_rate": 1.698833048898612e-06,
      "loss": 0.0071,
      "step": 2539620
    },
    {
      "epoch": 4.156176561078271,
      "grad_norm": 0.275395005941391,
      "learning_rate": 1.6987671566850947e-06,
      "loss": 0.0073,
      "step": 2539640
    },
    {
      "epoch": 4.156209291516925,
      "grad_norm": 0.12714087963104248,
      "learning_rate": 1.6987012644715776e-06,
      "loss": 0.0099,
      "step": 2539660
    },
    {
      "epoch": 4.1562420219555785,
      "grad_norm": 0.40059930086135864,
      "learning_rate": 1.6986353722580604e-06,
      "loss": 0.0096,
      "step": 2539680
    },
    {
      "epoch": 4.156274752394231,
      "grad_norm": 0.30404192209243774,
      "learning_rate": 1.698569480044543e-06,
      "loss": 0.0087,
      "step": 2539700
    },
    {
      "epoch": 4.156307482832885,
      "grad_norm": 0.16318553686141968,
      "learning_rate": 1.698503587831026e-06,
      "loss": 0.0105,
      "step": 2539720
    },
    {
      "epoch": 4.156340213271538,
      "grad_norm": 0.3056926727294922,
      "learning_rate": 1.6984376956175092e-06,
      "loss": 0.0079,
      "step": 2539740
    },
    {
      "epoch": 4.156372943710192,
      "grad_norm": 0.1751459687948227,
      "learning_rate": 1.698371803403992e-06,
      "loss": 0.0113,
      "step": 2539760
    },
    {
      "epoch": 4.156405674148845,
      "grad_norm": 0.3606063425540924,
      "learning_rate": 1.698305911190475e-06,
      "loss": 0.0073,
      "step": 2539780
    },
    {
      "epoch": 4.156438404587498,
      "grad_norm": 0.08992160111665726,
      "learning_rate": 1.6982400189769576e-06,
      "loss": 0.0108,
      "step": 2539800
    },
    {
      "epoch": 4.156471135026152,
      "grad_norm": 0.20459356904029846,
      "learning_rate": 1.6981741267634406e-06,
      "loss": 0.0064,
      "step": 2539820
    },
    {
      "epoch": 4.1565038654648045,
      "grad_norm": 0.14737102389335632,
      "learning_rate": 1.6981082345499233e-06,
      "loss": 0.0083,
      "step": 2539840
    },
    {
      "epoch": 4.156536595903458,
      "grad_norm": 0.13010840117931366,
      "learning_rate": 1.6980423423364063e-06,
      "loss": 0.0132,
      "step": 2539860
    },
    {
      "epoch": 4.156569326342112,
      "grad_norm": 0.3298814594745636,
      "learning_rate": 1.697976450122889e-06,
      "loss": 0.0104,
      "step": 2539880
    },
    {
      "epoch": 4.156602056780765,
      "grad_norm": 0.20128631591796875,
      "learning_rate": 1.697910557909372e-06,
      "loss": 0.0104,
      "step": 2539900
    },
    {
      "epoch": 4.156634787219418,
      "grad_norm": 0.5299994945526123,
      "learning_rate": 1.697844665695855e-06,
      "loss": 0.006,
      "step": 2539920
    },
    {
      "epoch": 4.156667517658072,
      "grad_norm": 0.22227944433689117,
      "learning_rate": 1.6977787734823379e-06,
      "loss": 0.0066,
      "step": 2539940
    },
    {
      "epoch": 4.156700248096725,
      "grad_norm": 0.1610904037952423,
      "learning_rate": 1.6977128812688206e-06,
      "loss": 0.0066,
      "step": 2539960
    },
    {
      "epoch": 4.156732978535378,
      "grad_norm": 0.1820964217185974,
      "learning_rate": 1.6976469890553036e-06,
      "loss": 0.0085,
      "step": 2539980
    },
    {
      "epoch": 4.1567657089740315,
      "grad_norm": 0.1619054675102234,
      "learning_rate": 1.6975810968417863e-06,
      "loss": 0.0059,
      "step": 2540000
    },
    {
      "epoch": 4.156798439412685,
      "grad_norm": 1.0900124311447144,
      "learning_rate": 1.6975152046282693e-06,
      "loss": 0.0127,
      "step": 2540020
    },
    {
      "epoch": 4.156831169851339,
      "grad_norm": 0.34644389152526855,
      "learning_rate": 1.697449312414752e-06,
      "loss": 0.01,
      "step": 2540040
    },
    {
      "epoch": 4.156863900289991,
      "grad_norm": 0.23336490988731384,
      "learning_rate": 1.697383420201235e-06,
      "loss": 0.0092,
      "step": 2540060
    },
    {
      "epoch": 4.156896630728645,
      "grad_norm": 0.24447564780712128,
      "learning_rate": 1.6973175279877177e-06,
      "loss": 0.0091,
      "step": 2540080
    },
    {
      "epoch": 4.156929361167299,
      "grad_norm": 0.2690562605857849,
      "learning_rate": 1.6972516357742009e-06,
      "loss": 0.0101,
      "step": 2540100
    },
    {
      "epoch": 4.156962091605951,
      "grad_norm": 0.12446203082799911,
      "learning_rate": 1.6971857435606836e-06,
      "loss": 0.0082,
      "step": 2540120
    },
    {
      "epoch": 4.156994822044605,
      "grad_norm": 0.12298864871263504,
      "learning_rate": 1.6971198513471665e-06,
      "loss": 0.0079,
      "step": 2540140
    },
    {
      "epoch": 4.1570275524832585,
      "grad_norm": 0.33858829736709595,
      "learning_rate": 1.6970539591336493e-06,
      "loss": 0.0094,
      "step": 2540160
    },
    {
      "epoch": 4.157060282921912,
      "grad_norm": 0.09492526203393936,
      "learning_rate": 1.6969880669201322e-06,
      "loss": 0.0055,
      "step": 2540180
    },
    {
      "epoch": 4.157093013360565,
      "grad_norm": 0.3898647427558899,
      "learning_rate": 1.696922174706615e-06,
      "loss": 0.009,
      "step": 2540200
    },
    {
      "epoch": 4.157125743799218,
      "grad_norm": 0.3166138231754303,
      "learning_rate": 1.696856282493098e-06,
      "loss": 0.0073,
      "step": 2540220
    },
    {
      "epoch": 4.157158474237872,
      "grad_norm": 0.20470641553401947,
      "learning_rate": 1.6967903902795807e-06,
      "loss": 0.0086,
      "step": 2540240
    },
    {
      "epoch": 4.157191204676525,
      "grad_norm": 0.1861993670463562,
      "learning_rate": 1.6967244980660636e-06,
      "loss": 0.0098,
      "step": 2540260
    },
    {
      "epoch": 4.157223935115178,
      "grad_norm": 0.27697354555130005,
      "learning_rate": 1.6966586058525466e-06,
      "loss": 0.0072,
      "step": 2540280
    },
    {
      "epoch": 4.157256665553832,
      "grad_norm": 0.1541631817817688,
      "learning_rate": 1.6965927136390295e-06,
      "loss": 0.0062,
      "step": 2540300
    },
    {
      "epoch": 4.1572893959924855,
      "grad_norm": 0.14529059827327728,
      "learning_rate": 1.6965268214255123e-06,
      "loss": 0.0073,
      "step": 2540320
    },
    {
      "epoch": 4.157322126431138,
      "grad_norm": 0.14465564489364624,
      "learning_rate": 1.6964609292119952e-06,
      "loss": 0.0117,
      "step": 2540340
    },
    {
      "epoch": 4.157354856869792,
      "grad_norm": 0.6005632877349854,
      "learning_rate": 1.696395036998478e-06,
      "loss": 0.0125,
      "step": 2540360
    },
    {
      "epoch": 4.157387587308445,
      "grad_norm": 0.3214987814426422,
      "learning_rate": 1.696329144784961e-06,
      "loss": 0.0092,
      "step": 2540380
    },
    {
      "epoch": 4.157420317747098,
      "grad_norm": 0.3089221715927124,
      "learning_rate": 1.6962632525714436e-06,
      "loss": 0.0077,
      "step": 2540400
    },
    {
      "epoch": 4.157453048185752,
      "grad_norm": 0.16957543790340424,
      "learning_rate": 1.6961973603579266e-06,
      "loss": 0.0075,
      "step": 2540420
    },
    {
      "epoch": 4.157485778624405,
      "grad_norm": 0.11559902876615524,
      "learning_rate": 1.6961314681444093e-06,
      "loss": 0.0072,
      "step": 2540440
    },
    {
      "epoch": 4.157518509063059,
      "grad_norm": 0.2374778538942337,
      "learning_rate": 1.6960655759308925e-06,
      "loss": 0.0075,
      "step": 2540460
    },
    {
      "epoch": 4.157551239501712,
      "grad_norm": 0.1990048885345459,
      "learning_rate": 1.6959996837173752e-06,
      "loss": 0.0089,
      "step": 2540480
    },
    {
      "epoch": 4.157583969940365,
      "grad_norm": 0.4475461542606354,
      "learning_rate": 1.6959337915038582e-06,
      "loss": 0.0114,
      "step": 2540500
    },
    {
      "epoch": 4.157616700379019,
      "grad_norm": 0.2700546979904175,
      "learning_rate": 1.695867899290341e-06,
      "loss": 0.0072,
      "step": 2540520
    },
    {
      "epoch": 4.1576494308176715,
      "grad_norm": 0.13983315229415894,
      "learning_rate": 1.6958020070768239e-06,
      "loss": 0.0097,
      "step": 2540540
    },
    {
      "epoch": 4.157682161256325,
      "grad_norm": 0.6540202498435974,
      "learning_rate": 1.6957361148633066e-06,
      "loss": 0.0074,
      "step": 2540560
    },
    {
      "epoch": 4.157714891694979,
      "grad_norm": 0.3496663272380829,
      "learning_rate": 1.6956702226497896e-06,
      "loss": 0.0081,
      "step": 2540580
    },
    {
      "epoch": 4.157747622133632,
      "grad_norm": 0.28688502311706543,
      "learning_rate": 1.6956043304362723e-06,
      "loss": 0.0105,
      "step": 2540600
    },
    {
      "epoch": 4.157780352572285,
      "grad_norm": 0.2908709943294525,
      "learning_rate": 1.6955384382227555e-06,
      "loss": 0.0069,
      "step": 2540620
    },
    {
      "epoch": 4.1578130830109385,
      "grad_norm": 0.21021197736263275,
      "learning_rate": 1.6954725460092384e-06,
      "loss": 0.0081,
      "step": 2540640
    },
    {
      "epoch": 4.157845813449592,
      "grad_norm": 0.1679278165102005,
      "learning_rate": 1.6954066537957212e-06,
      "loss": 0.0111,
      "step": 2540660
    },
    {
      "epoch": 4.157878543888245,
      "grad_norm": 0.19537334144115448,
      "learning_rate": 1.695340761582204e-06,
      "loss": 0.0063,
      "step": 2540680
    },
    {
      "epoch": 4.157911274326898,
      "grad_norm": 0.2536303997039795,
      "learning_rate": 1.6952748693686869e-06,
      "loss": 0.0069,
      "step": 2540700
    },
    {
      "epoch": 4.157944004765552,
      "grad_norm": 0.3222963809967041,
      "learning_rate": 1.6952089771551696e-06,
      "loss": 0.0097,
      "step": 2540720
    },
    {
      "epoch": 4.157976735204206,
      "grad_norm": 0.2707911729812622,
      "learning_rate": 1.6951430849416526e-06,
      "loss": 0.0102,
      "step": 2540740
    },
    {
      "epoch": 4.158009465642858,
      "grad_norm": 0.08583581447601318,
      "learning_rate": 1.6950771927281353e-06,
      "loss": 0.0091,
      "step": 2540760
    },
    {
      "epoch": 4.158042196081512,
      "grad_norm": 0.20611636340618134,
      "learning_rate": 1.6950113005146182e-06,
      "loss": 0.0125,
      "step": 2540780
    },
    {
      "epoch": 4.1580749265201655,
      "grad_norm": 0.23902294039726257,
      "learning_rate": 1.6949454083011014e-06,
      "loss": 0.0093,
      "step": 2540800
    },
    {
      "epoch": 4.158107656958818,
      "grad_norm": 0.505229115486145,
      "learning_rate": 1.6948795160875841e-06,
      "loss": 0.0084,
      "step": 2540820
    },
    {
      "epoch": 4.158140387397472,
      "grad_norm": 0.25337156653404236,
      "learning_rate": 1.694813623874067e-06,
      "loss": 0.0114,
      "step": 2540840
    },
    {
      "epoch": 4.158173117836125,
      "grad_norm": 0.16365857422351837,
      "learning_rate": 1.6947477316605498e-06,
      "loss": 0.0102,
      "step": 2540860
    },
    {
      "epoch": 4.158205848274779,
      "grad_norm": 0.28320440649986267,
      "learning_rate": 1.6946818394470328e-06,
      "loss": 0.0119,
      "step": 2540880
    },
    {
      "epoch": 4.158238578713432,
      "grad_norm": 0.11700636148452759,
      "learning_rate": 1.6946159472335155e-06,
      "loss": 0.0091,
      "step": 2540900
    },
    {
      "epoch": 4.158271309152085,
      "grad_norm": 0.4486585259437561,
      "learning_rate": 1.6945500550199983e-06,
      "loss": 0.0102,
      "step": 2540920
    },
    {
      "epoch": 4.158304039590739,
      "grad_norm": 0.41045352816581726,
      "learning_rate": 1.6944841628064812e-06,
      "loss": 0.0094,
      "step": 2540940
    },
    {
      "epoch": 4.158336770029392,
      "grad_norm": 0.29353296756744385,
      "learning_rate": 1.694418270592964e-06,
      "loss": 0.0089,
      "step": 2540960
    },
    {
      "epoch": 4.158369500468045,
      "grad_norm": 0.1003570482134819,
      "learning_rate": 1.6943523783794471e-06,
      "loss": 0.0149,
      "step": 2540980
    },
    {
      "epoch": 4.158402230906699,
      "grad_norm": 0.2093055695295334,
      "learning_rate": 1.69428648616593e-06,
      "loss": 0.0085,
      "step": 2541000
    },
    {
      "epoch": 4.1584349613453515,
      "grad_norm": 0.3387924134731293,
      "learning_rate": 1.6942205939524128e-06,
      "loss": 0.0079,
      "step": 2541020
    },
    {
      "epoch": 4.158467691784005,
      "grad_norm": 0.201130673289299,
      "learning_rate": 1.6941547017388958e-06,
      "loss": 0.0117,
      "step": 2541040
    },
    {
      "epoch": 4.158500422222659,
      "grad_norm": 0.28383198380470276,
      "learning_rate": 1.6940888095253785e-06,
      "loss": 0.0083,
      "step": 2541060
    },
    {
      "epoch": 4.158533152661312,
      "grad_norm": 0.5209376811981201,
      "learning_rate": 1.6940229173118615e-06,
      "loss": 0.008,
      "step": 2541080
    },
    {
      "epoch": 4.158565883099965,
      "grad_norm": 0.7822871208190918,
      "learning_rate": 1.6939570250983442e-06,
      "loss": 0.0091,
      "step": 2541100
    },
    {
      "epoch": 4.158598613538619,
      "grad_norm": 0.38048234581947327,
      "learning_rate": 1.693891132884827e-06,
      "loss": 0.0068,
      "step": 2541120
    },
    {
      "epoch": 4.158631343977272,
      "grad_norm": 0.506064236164093,
      "learning_rate": 1.6938252406713099e-06,
      "loss": 0.0104,
      "step": 2541140
    },
    {
      "epoch": 4.158664074415925,
      "grad_norm": 0.03319795802235603,
      "learning_rate": 1.693759348457793e-06,
      "loss": 0.0077,
      "step": 2541160
    },
    {
      "epoch": 4.1586968048545785,
      "grad_norm": 0.2688392996788025,
      "learning_rate": 1.6936934562442758e-06,
      "loss": 0.0078,
      "step": 2541180
    },
    {
      "epoch": 4.158729535293232,
      "grad_norm": 0.13859501481056213,
      "learning_rate": 1.6936275640307587e-06,
      "loss": 0.0077,
      "step": 2541200
    },
    {
      "epoch": 4.158762265731886,
      "grad_norm": 0.18689768016338348,
      "learning_rate": 1.6935616718172415e-06,
      "loss": 0.0104,
      "step": 2541220
    },
    {
      "epoch": 4.158794996170538,
      "grad_norm": 0.6026764512062073,
      "learning_rate": 1.6934957796037244e-06,
      "loss": 0.009,
      "step": 2541240
    },
    {
      "epoch": 4.158827726609192,
      "grad_norm": 0.28332358598709106,
      "learning_rate": 1.6934298873902072e-06,
      "loss": 0.0084,
      "step": 2541260
    },
    {
      "epoch": 4.158860457047846,
      "grad_norm": 0.23286446928977966,
      "learning_rate": 1.6933639951766901e-06,
      "loss": 0.0067,
      "step": 2541280
    },
    {
      "epoch": 4.158893187486498,
      "grad_norm": 0.23306752741336823,
      "learning_rate": 1.6932981029631729e-06,
      "loss": 0.0058,
      "step": 2541300
    },
    {
      "epoch": 4.158925917925152,
      "grad_norm": 0.11968240141868591,
      "learning_rate": 1.6932322107496558e-06,
      "loss": 0.0152,
      "step": 2541320
    },
    {
      "epoch": 4.1589586483638055,
      "grad_norm": 0.27679815888404846,
      "learning_rate": 1.6931663185361388e-06,
      "loss": 0.006,
      "step": 2541340
    },
    {
      "epoch": 4.158991378802459,
      "grad_norm": 0.4521831274032593,
      "learning_rate": 1.6931004263226217e-06,
      "loss": 0.016,
      "step": 2541360
    },
    {
      "epoch": 4.159024109241112,
      "grad_norm": 1.3235070705413818,
      "learning_rate": 1.6930345341091045e-06,
      "loss": 0.0074,
      "step": 2541380
    },
    {
      "epoch": 4.159056839679765,
      "grad_norm": 0.35723263025283813,
      "learning_rate": 1.6929686418955874e-06,
      "loss": 0.0093,
      "step": 2541400
    },
    {
      "epoch": 4.159089570118419,
      "grad_norm": 0.15429773926734924,
      "learning_rate": 1.6929027496820701e-06,
      "loss": 0.0079,
      "step": 2541420
    },
    {
      "epoch": 4.159122300557072,
      "grad_norm": 0.14607463777065277,
      "learning_rate": 1.692836857468553e-06,
      "loss": 0.0062,
      "step": 2541440
    },
    {
      "epoch": 4.159155030995725,
      "grad_norm": 0.134759321808815,
      "learning_rate": 1.6927709652550358e-06,
      "loss": 0.0069,
      "step": 2541460
    },
    {
      "epoch": 4.159187761434379,
      "grad_norm": 0.7657227516174316,
      "learning_rate": 1.6927050730415188e-06,
      "loss": 0.012,
      "step": 2541480
    },
    {
      "epoch": 4.159220491873032,
      "grad_norm": 0.48959916830062866,
      "learning_rate": 1.6926391808280017e-06,
      "loss": 0.0101,
      "step": 2541500
    },
    {
      "epoch": 4.159253222311685,
      "grad_norm": 0.10502559691667557,
      "learning_rate": 1.6925732886144847e-06,
      "loss": 0.006,
      "step": 2541520
    },
    {
      "epoch": 4.159285952750339,
      "grad_norm": 0.1272527575492859,
      "learning_rate": 1.6925073964009674e-06,
      "loss": 0.0116,
      "step": 2541540
    },
    {
      "epoch": 4.159318683188992,
      "grad_norm": 0.7908769845962524,
      "learning_rate": 1.6924415041874504e-06,
      "loss": 0.0112,
      "step": 2541560
    },
    {
      "epoch": 4.159351413627645,
      "grad_norm": 0.18769919872283936,
      "learning_rate": 1.6923756119739331e-06,
      "loss": 0.0103,
      "step": 2541580
    },
    {
      "epoch": 4.159384144066299,
      "grad_norm": 0.2152261584997177,
      "learning_rate": 1.692309719760416e-06,
      "loss": 0.0096,
      "step": 2541600
    },
    {
      "epoch": 4.159416874504952,
      "grad_norm": 0.1629917323589325,
      "learning_rate": 1.6922438275468988e-06,
      "loss": 0.0089,
      "step": 2541620
    },
    {
      "epoch": 4.159449604943606,
      "grad_norm": 0.07579521089792252,
      "learning_rate": 1.6921779353333818e-06,
      "loss": 0.005,
      "step": 2541640
    },
    {
      "epoch": 4.1594823353822585,
      "grad_norm": 0.7672526240348816,
      "learning_rate": 1.6921120431198645e-06,
      "loss": 0.008,
      "step": 2541660
    },
    {
      "epoch": 4.159515065820912,
      "grad_norm": 0.26357513666152954,
      "learning_rate": 1.6920461509063477e-06,
      "loss": 0.0084,
      "step": 2541680
    },
    {
      "epoch": 4.159547796259566,
      "grad_norm": 0.30644503235816956,
      "learning_rate": 1.6919802586928304e-06,
      "loss": 0.009,
      "step": 2541700
    },
    {
      "epoch": 4.159580526698218,
      "grad_norm": 0.23327915370464325,
      "learning_rate": 1.6919143664793134e-06,
      "loss": 0.009,
      "step": 2541720
    },
    {
      "epoch": 4.159613257136872,
      "grad_norm": 0.17385384440422058,
      "learning_rate": 1.691848474265796e-06,
      "loss": 0.0107,
      "step": 2541740
    },
    {
      "epoch": 4.159645987575526,
      "grad_norm": 0.4184066653251648,
      "learning_rate": 1.691782582052279e-06,
      "loss": 0.0115,
      "step": 2541760
    },
    {
      "epoch": 4.159678718014179,
      "grad_norm": 0.17314143478870392,
      "learning_rate": 1.6917166898387618e-06,
      "loss": 0.0101,
      "step": 2541780
    },
    {
      "epoch": 4.159711448452832,
      "grad_norm": 0.44309884309768677,
      "learning_rate": 1.6916507976252447e-06,
      "loss": 0.0067,
      "step": 2541800
    },
    {
      "epoch": 4.1597441788914855,
      "grad_norm": 0.12052404880523682,
      "learning_rate": 1.6915849054117275e-06,
      "loss": 0.0103,
      "step": 2541820
    },
    {
      "epoch": 4.159776909330139,
      "grad_norm": 0.18702737987041473,
      "learning_rate": 1.6915190131982104e-06,
      "loss": 0.0115,
      "step": 2541840
    },
    {
      "epoch": 4.159809639768792,
      "grad_norm": 0.1265457570552826,
      "learning_rate": 1.6914531209846936e-06,
      "loss": 0.0095,
      "step": 2541860
    },
    {
      "epoch": 4.159842370207445,
      "grad_norm": 0.1868198961019516,
      "learning_rate": 1.6913872287711763e-06,
      "loss": 0.0081,
      "step": 2541880
    },
    {
      "epoch": 4.159875100646099,
      "grad_norm": 0.4262300729751587,
      "learning_rate": 1.691321336557659e-06,
      "loss": 0.0135,
      "step": 2541900
    },
    {
      "epoch": 4.159907831084753,
      "grad_norm": 0.24869169294834137,
      "learning_rate": 1.691255444344142e-06,
      "loss": 0.0105,
      "step": 2541920
    },
    {
      "epoch": 4.159940561523405,
      "grad_norm": 0.04739269241690636,
      "learning_rate": 1.6911895521306248e-06,
      "loss": 0.0047,
      "step": 2541940
    },
    {
      "epoch": 4.159973291962059,
      "grad_norm": 0.15791170299053192,
      "learning_rate": 1.6911236599171077e-06,
      "loss": 0.0104,
      "step": 2541960
    },
    {
      "epoch": 4.1600060224007125,
      "grad_norm": 0.14396226406097412,
      "learning_rate": 1.6910577677035905e-06,
      "loss": 0.0068,
      "step": 2541980
    },
    {
      "epoch": 4.160038752839365,
      "grad_norm": 0.15902191400527954,
      "learning_rate": 1.6909918754900734e-06,
      "loss": 0.009,
      "step": 2542000
    },
    {
      "epoch": 4.160071483278019,
      "grad_norm": 0.24364332854747772,
      "learning_rate": 1.6909259832765562e-06,
      "loss": 0.0062,
      "step": 2542020
    },
    {
      "epoch": 4.160104213716672,
      "grad_norm": 0.32146623730659485,
      "learning_rate": 1.6908600910630393e-06,
      "loss": 0.0121,
      "step": 2542040
    },
    {
      "epoch": 4.160136944155326,
      "grad_norm": 0.5609022378921509,
      "learning_rate": 1.6907941988495223e-06,
      "loss": 0.0093,
      "step": 2542060
    },
    {
      "epoch": 4.160169674593979,
      "grad_norm": 0.261859655380249,
      "learning_rate": 1.690728306636005e-06,
      "loss": 0.0124,
      "step": 2542080
    },
    {
      "epoch": 4.160202405032632,
      "grad_norm": 0.33018818497657776,
      "learning_rate": 1.6906624144224877e-06,
      "loss": 0.0124,
      "step": 2542100
    },
    {
      "epoch": 4.160235135471286,
      "grad_norm": 0.1555820256471634,
      "learning_rate": 1.6905965222089707e-06,
      "loss": 0.0071,
      "step": 2542120
    },
    {
      "epoch": 4.160267865909939,
      "grad_norm": 0.1084468886256218,
      "learning_rate": 1.6905306299954534e-06,
      "loss": 0.0064,
      "step": 2542140
    },
    {
      "epoch": 4.160300596348592,
      "grad_norm": 0.37432122230529785,
      "learning_rate": 1.6904647377819364e-06,
      "loss": 0.0097,
      "step": 2542160
    },
    {
      "epoch": 4.160333326787246,
      "grad_norm": 0.4007066786289215,
      "learning_rate": 1.6903988455684191e-06,
      "loss": 0.0056,
      "step": 2542180
    },
    {
      "epoch": 4.160366057225899,
      "grad_norm": 0.2247503697872162,
      "learning_rate": 1.690332953354902e-06,
      "loss": 0.0086,
      "step": 2542200
    },
    {
      "epoch": 4.160398787664552,
      "grad_norm": 0.3366762697696686,
      "learning_rate": 1.6902670611413852e-06,
      "loss": 0.0064,
      "step": 2542220
    },
    {
      "epoch": 4.160431518103206,
      "grad_norm": 0.2639201581478119,
      "learning_rate": 1.690201168927868e-06,
      "loss": 0.0059,
      "step": 2542240
    },
    {
      "epoch": 4.160464248541859,
      "grad_norm": 0.4282551109790802,
      "learning_rate": 1.690135276714351e-06,
      "loss": 0.0095,
      "step": 2542260
    },
    {
      "epoch": 4.160496978980512,
      "grad_norm": 0.3636094927787781,
      "learning_rate": 1.6900693845008337e-06,
      "loss": 0.0132,
      "step": 2542280
    },
    {
      "epoch": 4.160529709419166,
      "grad_norm": 0.2817068099975586,
      "learning_rate": 1.6900034922873166e-06,
      "loss": 0.0117,
      "step": 2542300
    },
    {
      "epoch": 4.160562439857819,
      "grad_norm": 0.5763305425643921,
      "learning_rate": 1.6899376000737994e-06,
      "loss": 0.0096,
      "step": 2542320
    },
    {
      "epoch": 4.160595170296473,
      "grad_norm": 0.365953654050827,
      "learning_rate": 1.6898717078602821e-06,
      "loss": 0.0066,
      "step": 2542340
    },
    {
      "epoch": 4.1606279007351255,
      "grad_norm": 0.3388462960720062,
      "learning_rate": 1.689805815646765e-06,
      "loss": 0.0081,
      "step": 2542360
    },
    {
      "epoch": 4.160660631173779,
      "grad_norm": 0.2391732633113861,
      "learning_rate": 1.6897399234332482e-06,
      "loss": 0.0083,
      "step": 2542380
    },
    {
      "epoch": 4.160693361612433,
      "grad_norm": 0.1465996354818344,
      "learning_rate": 1.689674031219731e-06,
      "loss": 0.0111,
      "step": 2542400
    },
    {
      "epoch": 4.160726092051085,
      "grad_norm": 0.1354081928730011,
      "learning_rate": 1.689608139006214e-06,
      "loss": 0.0125,
      "step": 2542420
    },
    {
      "epoch": 4.160758822489739,
      "grad_norm": 0.1817300170660019,
      "learning_rate": 1.6895422467926967e-06,
      "loss": 0.0078,
      "step": 2542440
    },
    {
      "epoch": 4.1607915529283925,
      "grad_norm": 0.2175704836845398,
      "learning_rate": 1.6894763545791796e-06,
      "loss": 0.0075,
      "step": 2542460
    },
    {
      "epoch": 4.160824283367045,
      "grad_norm": 0.1692802608013153,
      "learning_rate": 1.6894104623656623e-06,
      "loss": 0.0115,
      "step": 2542480
    },
    {
      "epoch": 4.160857013805699,
      "grad_norm": 0.15811611711978912,
      "learning_rate": 1.6893445701521453e-06,
      "loss": 0.008,
      "step": 2542500
    },
    {
      "epoch": 4.160889744244352,
      "grad_norm": 0.8732289671897888,
      "learning_rate": 1.689278677938628e-06,
      "loss": 0.0105,
      "step": 2542520
    },
    {
      "epoch": 4.160922474683006,
      "grad_norm": 0.13567230105400085,
      "learning_rate": 1.689212785725111e-06,
      "loss": 0.0114,
      "step": 2542540
    },
    {
      "epoch": 4.160955205121659,
      "grad_norm": 0.20515106618404388,
      "learning_rate": 1.689146893511594e-06,
      "loss": 0.0076,
      "step": 2542560
    },
    {
      "epoch": 4.160987935560312,
      "grad_norm": 0.24894993007183075,
      "learning_rate": 1.6890810012980769e-06,
      "loss": 0.009,
      "step": 2542580
    },
    {
      "epoch": 4.161020665998966,
      "grad_norm": 0.05107230693101883,
      "learning_rate": 1.6890151090845596e-06,
      "loss": 0.0092,
      "step": 2542600
    },
    {
      "epoch": 4.1610533964376195,
      "grad_norm": 0.2712893486022949,
      "learning_rate": 1.6889492168710426e-06,
      "loss": 0.0094,
      "step": 2542620
    },
    {
      "epoch": 4.161086126876272,
      "grad_norm": 0.6372936964035034,
      "learning_rate": 1.6888833246575253e-06,
      "loss": 0.0126,
      "step": 2542640
    },
    {
      "epoch": 4.161118857314926,
      "grad_norm": 0.14412589371204376,
      "learning_rate": 1.6888174324440083e-06,
      "loss": 0.0043,
      "step": 2542660
    },
    {
      "epoch": 4.161151587753579,
      "grad_norm": 0.09544851630926132,
      "learning_rate": 1.688751540230491e-06,
      "loss": 0.0061,
      "step": 2542680
    },
    {
      "epoch": 4.161184318192232,
      "grad_norm": 0.3931344747543335,
      "learning_rate": 1.688685648016974e-06,
      "loss": 0.0056,
      "step": 2542700
    },
    {
      "epoch": 4.161217048630886,
      "grad_norm": 0.1927151083946228,
      "learning_rate": 1.6886197558034567e-06,
      "loss": 0.009,
      "step": 2542720
    },
    {
      "epoch": 4.161249779069539,
      "grad_norm": 0.076880544424057,
      "learning_rate": 1.6885538635899399e-06,
      "loss": 0.0118,
      "step": 2542740
    },
    {
      "epoch": 4.161282509508192,
      "grad_norm": 0.13844604790210724,
      "learning_rate": 1.6884879713764226e-06,
      "loss": 0.0091,
      "step": 2542760
    },
    {
      "epoch": 4.161315239946846,
      "grad_norm": 0.5763823986053467,
      "learning_rate": 1.6884220791629056e-06,
      "loss": 0.0111,
      "step": 2542780
    },
    {
      "epoch": 4.161347970385499,
      "grad_norm": 0.2861657738685608,
      "learning_rate": 1.6883561869493883e-06,
      "loss": 0.0104,
      "step": 2542800
    },
    {
      "epoch": 4.161380700824153,
      "grad_norm": 0.23836755752563477,
      "learning_rate": 1.6882902947358712e-06,
      "loss": 0.0103,
      "step": 2542820
    },
    {
      "epoch": 4.1614134312628055,
      "grad_norm": 0.06206110492348671,
      "learning_rate": 1.688224402522354e-06,
      "loss": 0.0072,
      "step": 2542840
    },
    {
      "epoch": 4.161446161701459,
      "grad_norm": 0.28439587354660034,
      "learning_rate": 1.688158510308837e-06,
      "loss": 0.0105,
      "step": 2542860
    },
    {
      "epoch": 4.161478892140113,
      "grad_norm": 0.37021327018737793,
      "learning_rate": 1.6880926180953197e-06,
      "loss": 0.0088,
      "step": 2542880
    },
    {
      "epoch": 4.161511622578765,
      "grad_norm": 0.45767199993133545,
      "learning_rate": 1.6880267258818026e-06,
      "loss": 0.0088,
      "step": 2542900
    },
    {
      "epoch": 4.161544353017419,
      "grad_norm": 0.10036439448595047,
      "learning_rate": 1.6879608336682856e-06,
      "loss": 0.0082,
      "step": 2542920
    },
    {
      "epoch": 4.161577083456073,
      "grad_norm": 0.13483910262584686,
      "learning_rate": 1.6878949414547685e-06,
      "loss": 0.0075,
      "step": 2542940
    },
    {
      "epoch": 4.161609813894726,
      "grad_norm": 0.1926261931657791,
      "learning_rate": 1.6878290492412513e-06,
      "loss": 0.0098,
      "step": 2542960
    },
    {
      "epoch": 4.161642544333379,
      "grad_norm": 1.1071807146072388,
      "learning_rate": 1.6877631570277342e-06,
      "loss": 0.0115,
      "step": 2542980
    },
    {
      "epoch": 4.1616752747720325,
      "grad_norm": 0.19042660295963287,
      "learning_rate": 1.687697264814217e-06,
      "loss": 0.0068,
      "step": 2543000
    },
    {
      "epoch": 4.161708005210686,
      "grad_norm": 0.3977263867855072,
      "learning_rate": 1.6876313726007e-06,
      "loss": 0.0131,
      "step": 2543020
    },
    {
      "epoch": 4.161740735649339,
      "grad_norm": 0.13735103607177734,
      "learning_rate": 1.6875654803871827e-06,
      "loss": 0.0096,
      "step": 2543040
    },
    {
      "epoch": 4.161773466087992,
      "grad_norm": 0.1578589826822281,
      "learning_rate": 1.6874995881736656e-06,
      "loss": 0.0106,
      "step": 2543060
    },
    {
      "epoch": 4.161806196526646,
      "grad_norm": 0.14998342096805573,
      "learning_rate": 1.6874336959601488e-06,
      "loss": 0.0082,
      "step": 2543080
    },
    {
      "epoch": 4.1618389269653,
      "grad_norm": 0.1969609558582306,
      "learning_rate": 1.6873678037466315e-06,
      "loss": 0.0094,
      "step": 2543100
    },
    {
      "epoch": 4.161871657403952,
      "grad_norm": 0.3215574324131012,
      "learning_rate": 1.6873019115331143e-06,
      "loss": 0.0103,
      "step": 2543120
    },
    {
      "epoch": 4.161904387842606,
      "grad_norm": 0.4035719931125641,
      "learning_rate": 1.6872360193195972e-06,
      "loss": 0.0085,
      "step": 2543140
    },
    {
      "epoch": 4.1619371182812595,
      "grad_norm": 0.17233407497406006,
      "learning_rate": 1.68717012710608e-06,
      "loss": 0.0135,
      "step": 2543160
    },
    {
      "epoch": 4.161969848719912,
      "grad_norm": 0.2027888149023056,
      "learning_rate": 1.6871042348925629e-06,
      "loss": 0.006,
      "step": 2543180
    },
    {
      "epoch": 4.162002579158566,
      "grad_norm": 0.15286217629909515,
      "learning_rate": 1.6870383426790456e-06,
      "loss": 0.0084,
      "step": 2543200
    },
    {
      "epoch": 4.162035309597219,
      "grad_norm": 0.3839210569858551,
      "learning_rate": 1.6869724504655286e-06,
      "loss": 0.012,
      "step": 2543220
    },
    {
      "epoch": 4.162068040035873,
      "grad_norm": 0.15434330701828003,
      "learning_rate": 1.6869065582520113e-06,
      "loss": 0.0084,
      "step": 2543240
    },
    {
      "epoch": 4.162100770474526,
      "grad_norm": 0.4738502502441406,
      "learning_rate": 1.6868406660384945e-06,
      "loss": 0.0113,
      "step": 2543260
    },
    {
      "epoch": 4.162133500913179,
      "grad_norm": 0.1697186976671219,
      "learning_rate": 1.6867747738249774e-06,
      "loss": 0.0062,
      "step": 2543280
    },
    {
      "epoch": 4.162166231351833,
      "grad_norm": 0.3516773283481598,
      "learning_rate": 1.6867088816114602e-06,
      "loss": 0.0079,
      "step": 2543300
    },
    {
      "epoch": 4.1621989617904855,
      "grad_norm": 0.09731768071651459,
      "learning_rate": 1.686642989397943e-06,
      "loss": 0.0081,
      "step": 2543320
    },
    {
      "epoch": 4.162231692229139,
      "grad_norm": 0.10718391835689545,
      "learning_rate": 1.6865770971844259e-06,
      "loss": 0.0092,
      "step": 2543340
    },
    {
      "epoch": 4.162264422667793,
      "grad_norm": 0.11636144667863846,
      "learning_rate": 1.6865112049709086e-06,
      "loss": 0.0063,
      "step": 2543360
    },
    {
      "epoch": 4.162297153106446,
      "grad_norm": 0.2094033807516098,
      "learning_rate": 1.6864453127573916e-06,
      "loss": 0.0085,
      "step": 2543380
    },
    {
      "epoch": 4.162329883545099,
      "grad_norm": 0.1523628532886505,
      "learning_rate": 1.6863794205438743e-06,
      "loss": 0.0098,
      "step": 2543400
    },
    {
      "epoch": 4.162362613983753,
      "grad_norm": 0.7367880940437317,
      "learning_rate": 1.6863135283303573e-06,
      "loss": 0.0088,
      "step": 2543420
    },
    {
      "epoch": 4.162395344422406,
      "grad_norm": 0.47187137603759766,
      "learning_rate": 1.6862476361168404e-06,
      "loss": 0.0154,
      "step": 2543440
    },
    {
      "epoch": 4.162428074861059,
      "grad_norm": 0.06873849779367447,
      "learning_rate": 1.6861817439033232e-06,
      "loss": 0.0067,
      "step": 2543460
    },
    {
      "epoch": 4.1624608052997125,
      "grad_norm": 0.48895952105522156,
      "learning_rate": 1.6861158516898061e-06,
      "loss": 0.0108,
      "step": 2543480
    },
    {
      "epoch": 4.162493535738366,
      "grad_norm": 0.40972840785980225,
      "learning_rate": 1.6860499594762888e-06,
      "loss": 0.0077,
      "step": 2543500
    },
    {
      "epoch": 4.16252626617702,
      "grad_norm": 0.20177048444747925,
      "learning_rate": 1.6859840672627718e-06,
      "loss": 0.0083,
      "step": 2543520
    },
    {
      "epoch": 4.162558996615672,
      "grad_norm": 0.08145882934331894,
      "learning_rate": 1.6859181750492545e-06,
      "loss": 0.0106,
      "step": 2543540
    },
    {
      "epoch": 4.162591727054326,
      "grad_norm": 0.4319765269756317,
      "learning_rate": 1.6858522828357373e-06,
      "loss": 0.0095,
      "step": 2543560
    },
    {
      "epoch": 4.16262445749298,
      "grad_norm": 0.3603261411190033,
      "learning_rate": 1.6857863906222202e-06,
      "loss": 0.0071,
      "step": 2543580
    },
    {
      "epoch": 4.162657187931632,
      "grad_norm": 0.03911576420068741,
      "learning_rate": 1.685720498408703e-06,
      "loss": 0.0075,
      "step": 2543600
    },
    {
      "epoch": 4.162689918370286,
      "grad_norm": 0.31952396035194397,
      "learning_rate": 1.6856546061951861e-06,
      "loss": 0.0073,
      "step": 2543620
    },
    {
      "epoch": 4.1627226488089395,
      "grad_norm": 0.25382763147354126,
      "learning_rate": 1.685588713981669e-06,
      "loss": 0.0116,
      "step": 2543640
    },
    {
      "epoch": 4.162755379247593,
      "grad_norm": 0.23488619923591614,
      "learning_rate": 1.6855228217681518e-06,
      "loss": 0.0072,
      "step": 2543660
    },
    {
      "epoch": 4.162788109686246,
      "grad_norm": 0.384137362241745,
      "learning_rate": 1.6854569295546348e-06,
      "loss": 0.0104,
      "step": 2543680
    },
    {
      "epoch": 4.162820840124899,
      "grad_norm": 0.5170185565948486,
      "learning_rate": 1.6853910373411175e-06,
      "loss": 0.0085,
      "step": 2543700
    },
    {
      "epoch": 4.162853570563553,
      "grad_norm": 0.11201796680688858,
      "learning_rate": 1.6853251451276005e-06,
      "loss": 0.0061,
      "step": 2543720
    },
    {
      "epoch": 4.162886301002206,
      "grad_norm": 0.06501790136098862,
      "learning_rate": 1.6852592529140832e-06,
      "loss": 0.0119,
      "step": 2543740
    },
    {
      "epoch": 4.162919031440859,
      "grad_norm": 0.211093470454216,
      "learning_rate": 1.685193360700566e-06,
      "loss": 0.0081,
      "step": 2543760
    },
    {
      "epoch": 4.162951761879513,
      "grad_norm": 0.17194506525993347,
      "learning_rate": 1.685127468487049e-06,
      "loss": 0.0082,
      "step": 2543780
    },
    {
      "epoch": 4.1629844923181665,
      "grad_norm": 0.24302738904953003,
      "learning_rate": 1.685061576273532e-06,
      "loss": 0.0096,
      "step": 2543800
    },
    {
      "epoch": 4.163017222756819,
      "grad_norm": 0.0760354995727539,
      "learning_rate": 1.6849956840600148e-06,
      "loss": 0.0069,
      "step": 2543820
    },
    {
      "epoch": 4.163049953195473,
      "grad_norm": 0.1745603382587433,
      "learning_rate": 1.6849297918464978e-06,
      "loss": 0.0103,
      "step": 2543840
    },
    {
      "epoch": 4.163082683634126,
      "grad_norm": 0.35344210267066956,
      "learning_rate": 1.6848638996329805e-06,
      "loss": 0.0089,
      "step": 2543860
    },
    {
      "epoch": 4.163115414072779,
      "grad_norm": 0.4087444245815277,
      "learning_rate": 1.6847980074194634e-06,
      "loss": 0.0098,
      "step": 2543880
    },
    {
      "epoch": 4.163148144511433,
      "grad_norm": 0.17206959426403046,
      "learning_rate": 1.6847321152059462e-06,
      "loss": 0.0131,
      "step": 2543900
    },
    {
      "epoch": 4.163180874950086,
      "grad_norm": 0.07644560188055038,
      "learning_rate": 1.6846662229924291e-06,
      "loss": 0.0068,
      "step": 2543920
    },
    {
      "epoch": 4.163213605388739,
      "grad_norm": 0.2683064043521881,
      "learning_rate": 1.6846003307789119e-06,
      "loss": 0.0114,
      "step": 2543940
    },
    {
      "epoch": 4.163246335827393,
      "grad_norm": 0.18809586763381958,
      "learning_rate": 1.684534438565395e-06,
      "loss": 0.0103,
      "step": 2543960
    },
    {
      "epoch": 4.163279066266046,
      "grad_norm": 0.12502828240394592,
      "learning_rate": 1.6844685463518778e-06,
      "loss": 0.0055,
      "step": 2543980
    },
    {
      "epoch": 4.1633117967047,
      "grad_norm": 0.1192837804555893,
      "learning_rate": 1.6844026541383607e-06,
      "loss": 0.0065,
      "step": 2544000
    },
    {
      "epoch": 4.1633445271433525,
      "grad_norm": 0.1819114089012146,
      "learning_rate": 1.6843367619248435e-06,
      "loss": 0.0087,
      "step": 2544020
    },
    {
      "epoch": 4.163377257582006,
      "grad_norm": 0.14224699139595032,
      "learning_rate": 1.6842708697113264e-06,
      "loss": 0.0094,
      "step": 2544040
    },
    {
      "epoch": 4.16340998802066,
      "grad_norm": 0.3696281611919403,
      "learning_rate": 1.6842049774978092e-06,
      "loss": 0.0125,
      "step": 2544060
    },
    {
      "epoch": 4.163442718459313,
      "grad_norm": 0.1374402791261673,
      "learning_rate": 1.6841390852842921e-06,
      "loss": 0.0076,
      "step": 2544080
    },
    {
      "epoch": 4.163475448897966,
      "grad_norm": 0.721116304397583,
      "learning_rate": 1.6840731930707749e-06,
      "loss": 0.0129,
      "step": 2544100
    },
    {
      "epoch": 4.1635081793366195,
      "grad_norm": 0.08501612395048141,
      "learning_rate": 1.6840073008572578e-06,
      "loss": 0.0116,
      "step": 2544120
    },
    {
      "epoch": 4.163540909775273,
      "grad_norm": 0.3064102530479431,
      "learning_rate": 1.6839414086437408e-06,
      "loss": 0.0094,
      "step": 2544140
    },
    {
      "epoch": 4.163573640213926,
      "grad_norm": 0.28332701325416565,
      "learning_rate": 1.6838755164302237e-06,
      "loss": 0.01,
      "step": 2544160
    },
    {
      "epoch": 4.163606370652579,
      "grad_norm": 0.3032219707965851,
      "learning_rate": 1.6838096242167064e-06,
      "loss": 0.0096,
      "step": 2544180
    },
    {
      "epoch": 4.163639101091233,
      "grad_norm": 0.44873467087745667,
      "learning_rate": 1.6837437320031894e-06,
      "loss": 0.0064,
      "step": 2544200
    },
    {
      "epoch": 4.163671831529886,
      "grad_norm": 0.16017533838748932,
      "learning_rate": 1.6836778397896721e-06,
      "loss": 0.0087,
      "step": 2544220
    },
    {
      "epoch": 4.163704561968539,
      "grad_norm": 0.16579031944274902,
      "learning_rate": 1.683611947576155e-06,
      "loss": 0.0095,
      "step": 2544240
    },
    {
      "epoch": 4.163737292407193,
      "grad_norm": 0.07882101833820343,
      "learning_rate": 1.6835460553626378e-06,
      "loss": 0.0116,
      "step": 2544260
    },
    {
      "epoch": 4.1637700228458465,
      "grad_norm": 0.17543990910053253,
      "learning_rate": 1.6834801631491208e-06,
      "loss": 0.0098,
      "step": 2544280
    },
    {
      "epoch": 4.163802753284499,
      "grad_norm": 0.38397565484046936,
      "learning_rate": 1.6834142709356035e-06,
      "loss": 0.0094,
      "step": 2544300
    },
    {
      "epoch": 4.163835483723153,
      "grad_norm": 0.08491315692663193,
      "learning_rate": 1.6833483787220867e-06,
      "loss": 0.0121,
      "step": 2544320
    },
    {
      "epoch": 4.163868214161806,
      "grad_norm": 0.26371416449546814,
      "learning_rate": 1.6832824865085694e-06,
      "loss": 0.0083,
      "step": 2544340
    },
    {
      "epoch": 4.163900944600459,
      "grad_norm": 0.221784770488739,
      "learning_rate": 1.6832165942950524e-06,
      "loss": 0.0101,
      "step": 2544360
    },
    {
      "epoch": 4.163933675039113,
      "grad_norm": 0.5742854475975037,
      "learning_rate": 1.6831507020815351e-06,
      "loss": 0.016,
      "step": 2544380
    },
    {
      "epoch": 4.163966405477766,
      "grad_norm": 0.10094893723726273,
      "learning_rate": 1.683084809868018e-06,
      "loss": 0.0107,
      "step": 2544400
    },
    {
      "epoch": 4.16399913591642,
      "grad_norm": 0.3105891942977905,
      "learning_rate": 1.6830189176545008e-06,
      "loss": 0.0089,
      "step": 2544420
    },
    {
      "epoch": 4.164031866355073,
      "grad_norm": 0.18890784680843353,
      "learning_rate": 1.6829530254409838e-06,
      "loss": 0.0112,
      "step": 2544440
    },
    {
      "epoch": 4.164064596793726,
      "grad_norm": 0.2279389649629593,
      "learning_rate": 1.6828871332274665e-06,
      "loss": 0.0122,
      "step": 2544460
    },
    {
      "epoch": 4.16409732723238,
      "grad_norm": 0.17775502800941467,
      "learning_rate": 1.6828212410139494e-06,
      "loss": 0.0082,
      "step": 2544480
    },
    {
      "epoch": 4.1641300576710325,
      "grad_norm": 0.11729811877012253,
      "learning_rate": 1.6827553488004326e-06,
      "loss": 0.0087,
      "step": 2544500
    },
    {
      "epoch": 4.164162788109686,
      "grad_norm": 0.14585617184638977,
      "learning_rate": 1.6826894565869153e-06,
      "loss": 0.0077,
      "step": 2544520
    },
    {
      "epoch": 4.16419551854834,
      "grad_norm": 0.26606398820877075,
      "learning_rate": 1.682623564373398e-06,
      "loss": 0.0094,
      "step": 2544540
    },
    {
      "epoch": 4.164228248986993,
      "grad_norm": 0.1542162150144577,
      "learning_rate": 1.682557672159881e-06,
      "loss": 0.0154,
      "step": 2544560
    },
    {
      "epoch": 4.164260979425646,
      "grad_norm": 0.37699875235557556,
      "learning_rate": 1.6824917799463638e-06,
      "loss": 0.0053,
      "step": 2544580
    },
    {
      "epoch": 4.1642937098643,
      "grad_norm": 0.289474755525589,
      "learning_rate": 1.6824258877328467e-06,
      "loss": 0.0092,
      "step": 2544600
    },
    {
      "epoch": 4.164326440302953,
      "grad_norm": 0.342862069606781,
      "learning_rate": 1.6823599955193295e-06,
      "loss": 0.0092,
      "step": 2544620
    },
    {
      "epoch": 4.164359170741606,
      "grad_norm": 0.11118859052658081,
      "learning_rate": 1.6822941033058124e-06,
      "loss": 0.0099,
      "step": 2544640
    },
    {
      "epoch": 4.1643919011802595,
      "grad_norm": 0.2166634351015091,
      "learning_rate": 1.6822282110922952e-06,
      "loss": 0.0085,
      "step": 2544660
    },
    {
      "epoch": 4.164424631618913,
      "grad_norm": 1.0327273607254028,
      "learning_rate": 1.6821623188787783e-06,
      "loss": 0.0113,
      "step": 2544680
    },
    {
      "epoch": 4.164457362057567,
      "grad_norm": 0.0811816155910492,
      "learning_rate": 1.6820964266652613e-06,
      "loss": 0.0073,
      "step": 2544700
    },
    {
      "epoch": 4.164490092496219,
      "grad_norm": 0.15109838545322418,
      "learning_rate": 1.682030534451744e-06,
      "loss": 0.0099,
      "step": 2544720
    },
    {
      "epoch": 4.164522822934873,
      "grad_norm": 0.24365463852882385,
      "learning_rate": 1.6819646422382268e-06,
      "loss": 0.0117,
      "step": 2544740
    },
    {
      "epoch": 4.164555553373527,
      "grad_norm": 0.1514364331960678,
      "learning_rate": 1.6818987500247097e-06,
      "loss": 0.0138,
      "step": 2544760
    },
    {
      "epoch": 4.164588283812179,
      "grad_norm": 0.21014408767223358,
      "learning_rate": 1.6818328578111924e-06,
      "loss": 0.0082,
      "step": 2544780
    },
    {
      "epoch": 4.164621014250833,
      "grad_norm": 0.29336705803871155,
      "learning_rate": 1.6817669655976754e-06,
      "loss": 0.0125,
      "step": 2544800
    },
    {
      "epoch": 4.1646537446894865,
      "grad_norm": 0.28285983204841614,
      "learning_rate": 1.6817010733841581e-06,
      "loss": 0.0134,
      "step": 2544820
    },
    {
      "epoch": 4.16468647512814,
      "grad_norm": 0.156459242105484,
      "learning_rate": 1.6816351811706413e-06,
      "loss": 0.0079,
      "step": 2544840
    },
    {
      "epoch": 4.164719205566793,
      "grad_norm": 0.061765156686306,
      "learning_rate": 1.6815692889571243e-06,
      "loss": 0.0101,
      "step": 2544860
    },
    {
      "epoch": 4.164751936005446,
      "grad_norm": 0.05980536341667175,
      "learning_rate": 1.681503396743607e-06,
      "loss": 0.0096,
      "step": 2544880
    },
    {
      "epoch": 4.1647846664441,
      "grad_norm": 0.3267693817615509,
      "learning_rate": 1.68143750453009e-06,
      "loss": 0.0074,
      "step": 2544900
    },
    {
      "epoch": 4.164817396882753,
      "grad_norm": 0.10738402605056763,
      "learning_rate": 1.6813716123165727e-06,
      "loss": 0.0061,
      "step": 2544920
    },
    {
      "epoch": 4.164850127321406,
      "grad_norm": 0.28695911169052124,
      "learning_rate": 1.6813057201030556e-06,
      "loss": 0.0087,
      "step": 2544940
    },
    {
      "epoch": 4.16488285776006,
      "grad_norm": 0.26509132981300354,
      "learning_rate": 1.6812398278895384e-06,
      "loss": 0.0108,
      "step": 2544960
    },
    {
      "epoch": 4.164915588198713,
      "grad_norm": 0.11988984793424606,
      "learning_rate": 1.6811739356760211e-06,
      "loss": 0.0075,
      "step": 2544980
    },
    {
      "epoch": 4.164948318637366,
      "grad_norm": 0.22848893702030182,
      "learning_rate": 1.681108043462504e-06,
      "loss": 0.0111,
      "step": 2545000
    },
    {
      "epoch": 4.16498104907602,
      "grad_norm": 0.46427467465400696,
      "learning_rate": 1.6810421512489872e-06,
      "loss": 0.0096,
      "step": 2545020
    },
    {
      "epoch": 4.165013779514673,
      "grad_norm": 0.39779147505760193,
      "learning_rate": 1.68097625903547e-06,
      "loss": 0.0107,
      "step": 2545040
    },
    {
      "epoch": 4.165046509953326,
      "grad_norm": 0.1271318793296814,
      "learning_rate": 1.680910366821953e-06,
      "loss": 0.0061,
      "step": 2545060
    },
    {
      "epoch": 4.16507924039198,
      "grad_norm": 0.4345819056034088,
      "learning_rate": 1.6808444746084357e-06,
      "loss": 0.0118,
      "step": 2545080
    },
    {
      "epoch": 4.165111970830633,
      "grad_norm": 0.08524860441684723,
      "learning_rate": 1.6807785823949186e-06,
      "loss": 0.0119,
      "step": 2545100
    },
    {
      "epoch": 4.165144701269287,
      "grad_norm": 0.12783993780612946,
      "learning_rate": 1.6807126901814014e-06,
      "loss": 0.0091,
      "step": 2545120
    },
    {
      "epoch": 4.1651774317079395,
      "grad_norm": 0.10398589074611664,
      "learning_rate": 1.6806467979678843e-06,
      "loss": 0.0127,
      "step": 2545140
    },
    {
      "epoch": 4.165210162146593,
      "grad_norm": 0.37816333770751953,
      "learning_rate": 1.680580905754367e-06,
      "loss": 0.0107,
      "step": 2545160
    },
    {
      "epoch": 4.165242892585247,
      "grad_norm": 0.2618578374385834,
      "learning_rate": 1.68051501354085e-06,
      "loss": 0.0145,
      "step": 2545180
    },
    {
      "epoch": 4.165275623023899,
      "grad_norm": 0.11277100443840027,
      "learning_rate": 1.680449121327333e-06,
      "loss": 0.0079,
      "step": 2545200
    },
    {
      "epoch": 4.165308353462553,
      "grad_norm": 0.04889312759041786,
      "learning_rate": 1.680383229113816e-06,
      "loss": 0.0102,
      "step": 2545220
    },
    {
      "epoch": 4.165341083901207,
      "grad_norm": 0.24931223690509796,
      "learning_rate": 1.6803173369002986e-06,
      "loss": 0.0102,
      "step": 2545240
    },
    {
      "epoch": 4.16537381433986,
      "grad_norm": 0.25189223885536194,
      "learning_rate": 1.6802514446867816e-06,
      "loss": 0.0078,
      "step": 2545260
    },
    {
      "epoch": 4.165406544778513,
      "grad_norm": 0.2127499133348465,
      "learning_rate": 1.6801855524732643e-06,
      "loss": 0.0149,
      "step": 2545280
    },
    {
      "epoch": 4.1654392752171665,
      "grad_norm": 0.29414233565330505,
      "learning_rate": 1.6801196602597473e-06,
      "loss": 0.01,
      "step": 2545300
    },
    {
      "epoch": 4.16547200565582,
      "grad_norm": 0.28008198738098145,
      "learning_rate": 1.68005376804623e-06,
      "loss": 0.0082,
      "step": 2545320
    },
    {
      "epoch": 4.165504736094473,
      "grad_norm": 0.063296377658844,
      "learning_rate": 1.679987875832713e-06,
      "loss": 0.0115,
      "step": 2545340
    },
    {
      "epoch": 4.165537466533126,
      "grad_norm": 0.46070048213005066,
      "learning_rate": 1.6799219836191957e-06,
      "loss": 0.0167,
      "step": 2545360
    },
    {
      "epoch": 4.16557019697178,
      "grad_norm": 0.22070743143558502,
      "learning_rate": 1.6798560914056789e-06,
      "loss": 0.0086,
      "step": 2545380
    },
    {
      "epoch": 4.165602927410433,
      "grad_norm": 0.1419125199317932,
      "learning_rate": 1.6797901991921616e-06,
      "loss": 0.0122,
      "step": 2545400
    },
    {
      "epoch": 4.165635657849086,
      "grad_norm": 0.2317989021539688,
      "learning_rate": 1.6797243069786446e-06,
      "loss": 0.0106,
      "step": 2545420
    },
    {
      "epoch": 4.16566838828774,
      "grad_norm": 0.37188661098480225,
      "learning_rate": 1.6796584147651273e-06,
      "loss": 0.0058,
      "step": 2545440
    },
    {
      "epoch": 4.1657011187263935,
      "grad_norm": 0.46909013390541077,
      "learning_rate": 1.6795925225516103e-06,
      "loss": 0.011,
      "step": 2545460
    },
    {
      "epoch": 4.165733849165046,
      "grad_norm": 0.16283872723579407,
      "learning_rate": 1.679526630338093e-06,
      "loss": 0.0097,
      "step": 2545480
    },
    {
      "epoch": 4.1657665796037,
      "grad_norm": 0.040187858045101166,
      "learning_rate": 1.679460738124576e-06,
      "loss": 0.0083,
      "step": 2545500
    },
    {
      "epoch": 4.165799310042353,
      "grad_norm": 0.3507612645626068,
      "learning_rate": 1.6793948459110587e-06,
      "loss": 0.0144,
      "step": 2545520
    },
    {
      "epoch": 4.165832040481007,
      "grad_norm": 0.10427405685186386,
      "learning_rate": 1.6793289536975416e-06,
      "loss": 0.0104,
      "step": 2545540
    },
    {
      "epoch": 4.16586477091966,
      "grad_norm": 0.17763586342334747,
      "learning_rate": 1.6792630614840246e-06,
      "loss": 0.0105,
      "step": 2545560
    },
    {
      "epoch": 4.165897501358313,
      "grad_norm": 0.12382840365171432,
      "learning_rate": 1.6791971692705075e-06,
      "loss": 0.0089,
      "step": 2545580
    },
    {
      "epoch": 4.165930231796967,
      "grad_norm": 0.4242725074291229,
      "learning_rate": 1.6791312770569903e-06,
      "loss": 0.0096,
      "step": 2545600
    },
    {
      "epoch": 4.16596296223562,
      "grad_norm": 0.3099038004875183,
      "learning_rate": 1.6790653848434732e-06,
      "loss": 0.0121,
      "step": 2545620
    },
    {
      "epoch": 4.165995692674273,
      "grad_norm": 0.2477729320526123,
      "learning_rate": 1.678999492629956e-06,
      "loss": 0.0076,
      "step": 2545640
    },
    {
      "epoch": 4.166028423112927,
      "grad_norm": 0.2257090061903,
      "learning_rate": 1.678933600416439e-06,
      "loss": 0.0109,
      "step": 2545660
    },
    {
      "epoch": 4.1660611535515795,
      "grad_norm": 0.20518450438976288,
      "learning_rate": 1.6788677082029217e-06,
      "loss": 0.0089,
      "step": 2545680
    },
    {
      "epoch": 4.166093883990233,
      "grad_norm": 0.19915257394313812,
      "learning_rate": 1.6788018159894046e-06,
      "loss": 0.0089,
      "step": 2545700
    },
    {
      "epoch": 4.166126614428887,
      "grad_norm": 0.031898051500320435,
      "learning_rate": 1.6787359237758878e-06,
      "loss": 0.0065,
      "step": 2545720
    },
    {
      "epoch": 4.16615934486754,
      "grad_norm": 0.6412889361381531,
      "learning_rate": 1.6786700315623705e-06,
      "loss": 0.0109,
      "step": 2545740
    },
    {
      "epoch": 4.166192075306193,
      "grad_norm": 0.17636413872241974,
      "learning_rate": 1.6786041393488533e-06,
      "loss": 0.007,
      "step": 2545760
    },
    {
      "epoch": 4.1662248057448465,
      "grad_norm": 0.12841133773326874,
      "learning_rate": 1.6785382471353362e-06,
      "loss": 0.0081,
      "step": 2545780
    },
    {
      "epoch": 4.1662575361835,
      "grad_norm": 0.6618142127990723,
      "learning_rate": 1.678472354921819e-06,
      "loss": 0.0068,
      "step": 2545800
    },
    {
      "epoch": 4.166290266622153,
      "grad_norm": 0.29686209559440613,
      "learning_rate": 1.678406462708302e-06,
      "loss": 0.0117,
      "step": 2545820
    },
    {
      "epoch": 4.166322997060806,
      "grad_norm": 0.2053069919347763,
      "learning_rate": 1.6783405704947846e-06,
      "loss": 0.0085,
      "step": 2545840
    },
    {
      "epoch": 4.16635572749946,
      "grad_norm": 0.12243447452783585,
      "learning_rate": 1.6782746782812676e-06,
      "loss": 0.0098,
      "step": 2545860
    },
    {
      "epoch": 4.166388457938114,
      "grad_norm": 0.1534804254770279,
      "learning_rate": 1.6782087860677503e-06,
      "loss": 0.009,
      "step": 2545880
    },
    {
      "epoch": 4.166421188376766,
      "grad_norm": 0.3506163954734802,
      "learning_rate": 1.6781428938542335e-06,
      "loss": 0.0098,
      "step": 2545900
    },
    {
      "epoch": 4.16645391881542,
      "grad_norm": 0.2992973029613495,
      "learning_rate": 1.6780770016407164e-06,
      "loss": 0.0089,
      "step": 2545920
    },
    {
      "epoch": 4.1664866492540735,
      "grad_norm": 0.47153061628341675,
      "learning_rate": 1.6780111094271992e-06,
      "loss": 0.0118,
      "step": 2545940
    },
    {
      "epoch": 4.166519379692726,
      "grad_norm": 0.08486252278089523,
      "learning_rate": 1.677945217213682e-06,
      "loss": 0.0076,
      "step": 2545960
    },
    {
      "epoch": 4.16655211013138,
      "grad_norm": 0.28479525446891785,
      "learning_rate": 1.6778793250001649e-06,
      "loss": 0.0126,
      "step": 2545980
    },
    {
      "epoch": 4.166584840570033,
      "grad_norm": 0.09943514317274094,
      "learning_rate": 1.6778134327866476e-06,
      "loss": 0.0072,
      "step": 2546000
    },
    {
      "epoch": 4.166617571008687,
      "grad_norm": 0.1519172042608261,
      "learning_rate": 1.6777475405731306e-06,
      "loss": 0.0127,
      "step": 2546020
    },
    {
      "epoch": 4.16665030144734,
      "grad_norm": 0.27244818210601807,
      "learning_rate": 1.6776816483596133e-06,
      "loss": 0.009,
      "step": 2546040
    },
    {
      "epoch": 4.166683031885993,
      "grad_norm": 0.21876230835914612,
      "learning_rate": 1.6776157561460963e-06,
      "loss": 0.0087,
      "step": 2546060
    },
    {
      "epoch": 4.166715762324647,
      "grad_norm": 0.23535692691802979,
      "learning_rate": 1.6775498639325794e-06,
      "loss": 0.0106,
      "step": 2546080
    },
    {
      "epoch": 4.1667484927633,
      "grad_norm": 0.12309370189905167,
      "learning_rate": 1.6774839717190622e-06,
      "loss": 0.0077,
      "step": 2546100
    },
    {
      "epoch": 4.166781223201953,
      "grad_norm": 0.333503782749176,
      "learning_rate": 1.6774180795055451e-06,
      "loss": 0.0132,
      "step": 2546120
    },
    {
      "epoch": 4.166813953640607,
      "grad_norm": 0.47779929637908936,
      "learning_rate": 1.6773521872920279e-06,
      "loss": 0.0084,
      "step": 2546140
    },
    {
      "epoch": 4.16684668407926,
      "grad_norm": 0.34823930263519287,
      "learning_rate": 1.6772862950785108e-06,
      "loss": 0.0097,
      "step": 2546160
    },
    {
      "epoch": 4.166879414517913,
      "grad_norm": 0.24284130334854126,
      "learning_rate": 1.6772204028649935e-06,
      "loss": 0.0077,
      "step": 2546180
    },
    {
      "epoch": 4.166912144956567,
      "grad_norm": 0.17043536901474,
      "learning_rate": 1.6771545106514763e-06,
      "loss": 0.0063,
      "step": 2546200
    },
    {
      "epoch": 4.16694487539522,
      "grad_norm": 0.15586179494857788,
      "learning_rate": 1.6770886184379592e-06,
      "loss": 0.0065,
      "step": 2546220
    },
    {
      "epoch": 4.166977605833873,
      "grad_norm": 0.1753830462694168,
      "learning_rate": 1.677022726224442e-06,
      "loss": 0.0098,
      "step": 2546240
    },
    {
      "epoch": 4.167010336272527,
      "grad_norm": 0.11517314612865448,
      "learning_rate": 1.6769568340109251e-06,
      "loss": 0.0069,
      "step": 2546260
    },
    {
      "epoch": 4.16704306671118,
      "grad_norm": 0.09452515095472336,
      "learning_rate": 1.676890941797408e-06,
      "loss": 0.0133,
      "step": 2546280
    },
    {
      "epoch": 4.167075797149834,
      "grad_norm": 0.09260173887014389,
      "learning_rate": 1.6768250495838908e-06,
      "loss": 0.0063,
      "step": 2546300
    },
    {
      "epoch": 4.1671085275884865,
      "grad_norm": 0.09950456768274307,
      "learning_rate": 1.6767591573703738e-06,
      "loss": 0.0072,
      "step": 2546320
    },
    {
      "epoch": 4.16714125802714,
      "grad_norm": 0.21078011393547058,
      "learning_rate": 1.6766932651568565e-06,
      "loss": 0.0086,
      "step": 2546340
    },
    {
      "epoch": 4.167173988465794,
      "grad_norm": 0.26514631509780884,
      "learning_rate": 1.6766273729433395e-06,
      "loss": 0.0081,
      "step": 2546360
    },
    {
      "epoch": 4.167206718904446,
      "grad_norm": 0.15403901040554047,
      "learning_rate": 1.6765614807298222e-06,
      "loss": 0.0157,
      "step": 2546380
    },
    {
      "epoch": 4.1672394493431,
      "grad_norm": 0.1522533893585205,
      "learning_rate": 1.676495588516305e-06,
      "loss": 0.0111,
      "step": 2546400
    },
    {
      "epoch": 4.167272179781754,
      "grad_norm": 0.14557915925979614,
      "learning_rate": 1.676429696302788e-06,
      "loss": 0.0119,
      "step": 2546420
    },
    {
      "epoch": 4.167304910220407,
      "grad_norm": 0.17117011547088623,
      "learning_rate": 1.676363804089271e-06,
      "loss": 0.0106,
      "step": 2546440
    },
    {
      "epoch": 4.16733764065906,
      "grad_norm": 0.08364718407392502,
      "learning_rate": 1.6762979118757538e-06,
      "loss": 0.0071,
      "step": 2546460
    },
    {
      "epoch": 4.1673703710977135,
      "grad_norm": 0.31682369112968445,
      "learning_rate": 1.6762320196622368e-06,
      "loss": 0.0097,
      "step": 2546480
    },
    {
      "epoch": 4.167403101536367,
      "grad_norm": 0.08010636270046234,
      "learning_rate": 1.6761661274487195e-06,
      "loss": 0.0087,
      "step": 2546500
    },
    {
      "epoch": 4.16743583197502,
      "grad_norm": 0.3540790379047394,
      "learning_rate": 1.6761002352352025e-06,
      "loss": 0.0072,
      "step": 2546520
    },
    {
      "epoch": 4.167468562413673,
      "grad_norm": 0.08470888435840607,
      "learning_rate": 1.6760343430216852e-06,
      "loss": 0.0069,
      "step": 2546540
    },
    {
      "epoch": 4.167501292852327,
      "grad_norm": 0.21272803843021393,
      "learning_rate": 1.6759684508081681e-06,
      "loss": 0.0099,
      "step": 2546560
    },
    {
      "epoch": 4.1675340232909805,
      "grad_norm": 0.11572083830833435,
      "learning_rate": 1.6759025585946509e-06,
      "loss": 0.0079,
      "step": 2546580
    },
    {
      "epoch": 4.167566753729633,
      "grad_norm": 0.2511866092681885,
      "learning_rate": 1.675836666381134e-06,
      "loss": 0.0053,
      "step": 2546600
    },
    {
      "epoch": 4.167599484168287,
      "grad_norm": 0.1553780436515808,
      "learning_rate": 1.6757707741676168e-06,
      "loss": 0.0064,
      "step": 2546620
    },
    {
      "epoch": 4.16763221460694,
      "grad_norm": 0.664757251739502,
      "learning_rate": 1.6757048819540997e-06,
      "loss": 0.014,
      "step": 2546640
    },
    {
      "epoch": 4.167664945045593,
      "grad_norm": 0.35098105669021606,
      "learning_rate": 1.6756389897405825e-06,
      "loss": 0.0067,
      "step": 2546660
    },
    {
      "epoch": 4.167697675484247,
      "grad_norm": 0.23334607481956482,
      "learning_rate": 1.6755730975270654e-06,
      "loss": 0.0102,
      "step": 2546680
    },
    {
      "epoch": 4.1677304059229,
      "grad_norm": 0.10911405086517334,
      "learning_rate": 1.6755072053135482e-06,
      "loss": 0.0058,
      "step": 2546700
    },
    {
      "epoch": 4.167763136361554,
      "grad_norm": 0.29427066445350647,
      "learning_rate": 1.6754413131000311e-06,
      "loss": 0.01,
      "step": 2546720
    },
    {
      "epoch": 4.167795866800207,
      "grad_norm": 0.1721949726343155,
      "learning_rate": 1.6753754208865139e-06,
      "loss": 0.0089,
      "step": 2546740
    },
    {
      "epoch": 4.16782859723886,
      "grad_norm": 0.3788105547428131,
      "learning_rate": 1.6753095286729968e-06,
      "loss": 0.0097,
      "step": 2546760
    },
    {
      "epoch": 4.167861327677514,
      "grad_norm": 0.11261873692274094,
      "learning_rate": 1.6752436364594798e-06,
      "loss": 0.0064,
      "step": 2546780
    },
    {
      "epoch": 4.1678940581161665,
      "grad_norm": 0.05168101564049721,
      "learning_rate": 1.6751777442459627e-06,
      "loss": 0.0103,
      "step": 2546800
    },
    {
      "epoch": 4.16792678855482,
      "grad_norm": 0.284111350774765,
      "learning_rate": 1.6751118520324455e-06,
      "loss": 0.0117,
      "step": 2546820
    },
    {
      "epoch": 4.167959518993474,
      "grad_norm": 0.16129562258720398,
      "learning_rate": 1.6750459598189284e-06,
      "loss": 0.0071,
      "step": 2546840
    },
    {
      "epoch": 4.167992249432127,
      "grad_norm": 0.328677237033844,
      "learning_rate": 1.6749800676054111e-06,
      "loss": 0.0074,
      "step": 2546860
    },
    {
      "epoch": 4.16802497987078,
      "grad_norm": 0.03175054118037224,
      "learning_rate": 1.674914175391894e-06,
      "loss": 0.0089,
      "step": 2546880
    },
    {
      "epoch": 4.168057710309434,
      "grad_norm": 0.06355154514312744,
      "learning_rate": 1.6748482831783768e-06,
      "loss": 0.0153,
      "step": 2546900
    },
    {
      "epoch": 4.168090440748087,
      "grad_norm": 0.23172655701637268,
      "learning_rate": 1.6747823909648598e-06,
      "loss": 0.0083,
      "step": 2546920
    },
    {
      "epoch": 4.16812317118674,
      "grad_norm": 0.1260339319705963,
      "learning_rate": 1.6747164987513425e-06,
      "loss": 0.0075,
      "step": 2546940
    },
    {
      "epoch": 4.1681559016253935,
      "grad_norm": 0.19210657477378845,
      "learning_rate": 1.6746506065378257e-06,
      "loss": 0.008,
      "step": 2546960
    },
    {
      "epoch": 4.168188632064047,
      "grad_norm": 0.1974802166223526,
      "learning_rate": 1.6745847143243084e-06,
      "loss": 0.0069,
      "step": 2546980
    },
    {
      "epoch": 4.168221362502701,
      "grad_norm": 0.1536508947610855,
      "learning_rate": 1.6745188221107914e-06,
      "loss": 0.0095,
      "step": 2547000
    },
    {
      "epoch": 4.168254092941353,
      "grad_norm": 0.47220414876937866,
      "learning_rate": 1.6744529298972741e-06,
      "loss": 0.012,
      "step": 2547020
    },
    {
      "epoch": 4.168286823380007,
      "grad_norm": 0.4775489568710327,
      "learning_rate": 1.674387037683757e-06,
      "loss": 0.0084,
      "step": 2547040
    },
    {
      "epoch": 4.168319553818661,
      "grad_norm": 0.15162530541419983,
      "learning_rate": 1.6743211454702398e-06,
      "loss": 0.01,
      "step": 2547060
    },
    {
      "epoch": 4.168352284257313,
      "grad_norm": 0.15731501579284668,
      "learning_rate": 1.6742552532567228e-06,
      "loss": 0.0068,
      "step": 2547080
    },
    {
      "epoch": 4.168385014695967,
      "grad_norm": 0.3516336679458618,
      "learning_rate": 1.6741893610432055e-06,
      "loss": 0.0075,
      "step": 2547100
    },
    {
      "epoch": 4.1684177451346205,
      "grad_norm": 0.4158088266849518,
      "learning_rate": 1.6741234688296885e-06,
      "loss": 0.0093,
      "step": 2547120
    },
    {
      "epoch": 4.168450475573273,
      "grad_norm": 0.2633157968521118,
      "learning_rate": 1.6740575766161716e-06,
      "loss": 0.0114,
      "step": 2547140
    },
    {
      "epoch": 4.168483206011927,
      "grad_norm": 0.06158938631415367,
      "learning_rate": 1.6739916844026544e-06,
      "loss": 0.0107,
      "step": 2547160
    },
    {
      "epoch": 4.16851593645058,
      "grad_norm": 0.27412936091423035,
      "learning_rate": 1.673925792189137e-06,
      "loss": 0.0073,
      "step": 2547180
    },
    {
      "epoch": 4.168548666889234,
      "grad_norm": 0.28934022784233093,
      "learning_rate": 1.67385989997562e-06,
      "loss": 0.0055,
      "step": 2547200
    },
    {
      "epoch": 4.168581397327887,
      "grad_norm": 0.2373509258031845,
      "learning_rate": 1.6737940077621028e-06,
      "loss": 0.0104,
      "step": 2547220
    },
    {
      "epoch": 4.16861412776654,
      "grad_norm": 0.11089643836021423,
      "learning_rate": 1.6737281155485857e-06,
      "loss": 0.0086,
      "step": 2547240
    },
    {
      "epoch": 4.168646858205194,
      "grad_norm": 0.11187182366847992,
      "learning_rate": 1.6736622233350685e-06,
      "loss": 0.011,
      "step": 2547260
    },
    {
      "epoch": 4.168679588643847,
      "grad_norm": 0.23249393701553345,
      "learning_rate": 1.6735963311215514e-06,
      "loss": 0.0116,
      "step": 2547280
    },
    {
      "epoch": 4.1687123190825,
      "grad_norm": 0.1685498058795929,
      "learning_rate": 1.6735304389080342e-06,
      "loss": 0.0068,
      "step": 2547300
    },
    {
      "epoch": 4.168745049521154,
      "grad_norm": 0.17652006447315216,
      "learning_rate": 1.6734645466945173e-06,
      "loss": 0.0065,
      "step": 2547320
    },
    {
      "epoch": 4.168777779959807,
      "grad_norm": 0.4955916404724121,
      "learning_rate": 1.6733986544810003e-06,
      "loss": 0.0079,
      "step": 2547340
    },
    {
      "epoch": 4.16881051039846,
      "grad_norm": 0.7929712533950806,
      "learning_rate": 1.673332762267483e-06,
      "loss": 0.0075,
      "step": 2547360
    },
    {
      "epoch": 4.168843240837114,
      "grad_norm": 0.10620471835136414,
      "learning_rate": 1.673266870053966e-06,
      "loss": 0.0093,
      "step": 2547380
    },
    {
      "epoch": 4.168875971275767,
      "grad_norm": 0.10356371849775314,
      "learning_rate": 1.6732009778404487e-06,
      "loss": 0.012,
      "step": 2547400
    },
    {
      "epoch": 4.16890870171442,
      "grad_norm": 0.07240637391805649,
      "learning_rate": 1.6731350856269315e-06,
      "loss": 0.0077,
      "step": 2547420
    },
    {
      "epoch": 4.168941432153074,
      "grad_norm": 0.1004413515329361,
      "learning_rate": 1.6730691934134144e-06,
      "loss": 0.0107,
      "step": 2547440
    },
    {
      "epoch": 4.168974162591727,
      "grad_norm": 0.2274247407913208,
      "learning_rate": 1.6730033011998972e-06,
      "loss": 0.0107,
      "step": 2547460
    },
    {
      "epoch": 4.169006893030381,
      "grad_norm": 0.8138441443443298,
      "learning_rate": 1.6729374089863803e-06,
      "loss": 0.006,
      "step": 2547480
    },
    {
      "epoch": 4.1690396234690335,
      "grad_norm": 0.1293477863073349,
      "learning_rate": 1.6728715167728633e-06,
      "loss": 0.0085,
      "step": 2547500
    },
    {
      "epoch": 4.169072353907687,
      "grad_norm": 0.1068962961435318,
      "learning_rate": 1.672805624559346e-06,
      "loss": 0.0088,
      "step": 2547520
    },
    {
      "epoch": 4.169105084346341,
      "grad_norm": 0.10296889394521713,
      "learning_rate": 1.672739732345829e-06,
      "loss": 0.0087,
      "step": 2547540
    },
    {
      "epoch": 4.169137814784993,
      "grad_norm": 0.21437908709049225,
      "learning_rate": 1.6726738401323117e-06,
      "loss": 0.0101,
      "step": 2547560
    },
    {
      "epoch": 4.169170545223647,
      "grad_norm": 0.22334134578704834,
      "learning_rate": 1.6726079479187946e-06,
      "loss": 0.0095,
      "step": 2547580
    },
    {
      "epoch": 4.1692032756623005,
      "grad_norm": 0.2619722783565521,
      "learning_rate": 1.6725420557052774e-06,
      "loss": 0.0086,
      "step": 2547600
    },
    {
      "epoch": 4.169236006100954,
      "grad_norm": 0.5818944573402405,
      "learning_rate": 1.6724761634917601e-06,
      "loss": 0.0108,
      "step": 2547620
    },
    {
      "epoch": 4.169268736539607,
      "grad_norm": 0.663521945476532,
      "learning_rate": 1.672410271278243e-06,
      "loss": 0.0062,
      "step": 2547640
    },
    {
      "epoch": 4.16930146697826,
      "grad_norm": 0.2329137623310089,
      "learning_rate": 1.6723443790647262e-06,
      "loss": 0.0109,
      "step": 2547660
    },
    {
      "epoch": 4.169334197416914,
      "grad_norm": 0.23816490173339844,
      "learning_rate": 1.672278486851209e-06,
      "loss": 0.0072,
      "step": 2547680
    },
    {
      "epoch": 4.169366927855567,
      "grad_norm": 0.579088568687439,
      "learning_rate": 1.672212594637692e-06,
      "loss": 0.0129,
      "step": 2547700
    },
    {
      "epoch": 4.16939965829422,
      "grad_norm": 0.37396132946014404,
      "learning_rate": 1.6721467024241747e-06,
      "loss": 0.0105,
      "step": 2547720
    },
    {
      "epoch": 4.169432388732874,
      "grad_norm": 0.18753117322921753,
      "learning_rate": 1.6720808102106576e-06,
      "loss": 0.0071,
      "step": 2547740
    },
    {
      "epoch": 4.1694651191715275,
      "grad_norm": 0.1455792933702469,
      "learning_rate": 1.6720149179971404e-06,
      "loss": 0.0083,
      "step": 2547760
    },
    {
      "epoch": 4.16949784961018,
      "grad_norm": 0.2269269824028015,
      "learning_rate": 1.6719490257836233e-06,
      "loss": 0.0092,
      "step": 2547780
    },
    {
      "epoch": 4.169530580048834,
      "grad_norm": 0.15352624654769897,
      "learning_rate": 1.671883133570106e-06,
      "loss": 0.0086,
      "step": 2547800
    },
    {
      "epoch": 4.169563310487487,
      "grad_norm": 0.1300496906042099,
      "learning_rate": 1.671817241356589e-06,
      "loss": 0.0069,
      "step": 2547820
    },
    {
      "epoch": 4.16959604092614,
      "grad_norm": 0.19634442031383514,
      "learning_rate": 1.671751349143072e-06,
      "loss": 0.0089,
      "step": 2547840
    },
    {
      "epoch": 4.169628771364794,
      "grad_norm": 0.42261505126953125,
      "learning_rate": 1.671685456929555e-06,
      "loss": 0.0081,
      "step": 2547860
    },
    {
      "epoch": 4.169661501803447,
      "grad_norm": 0.07729378342628479,
      "learning_rate": 1.6716195647160377e-06,
      "loss": 0.0089,
      "step": 2547880
    },
    {
      "epoch": 4.169694232242101,
      "grad_norm": 0.2624422609806061,
      "learning_rate": 1.6715536725025206e-06,
      "loss": 0.0121,
      "step": 2547900
    },
    {
      "epoch": 4.169726962680754,
      "grad_norm": 0.2387382984161377,
      "learning_rate": 1.6714877802890033e-06,
      "loss": 0.0081,
      "step": 2547920
    },
    {
      "epoch": 4.169759693119407,
      "grad_norm": 0.26001378893852234,
      "learning_rate": 1.6714218880754863e-06,
      "loss": 0.0091,
      "step": 2547940
    },
    {
      "epoch": 4.169792423558061,
      "grad_norm": 0.42160162329673767,
      "learning_rate": 1.671355995861969e-06,
      "loss": 0.0084,
      "step": 2547960
    },
    {
      "epoch": 4.1698251539967135,
      "grad_norm": 0.16525687277317047,
      "learning_rate": 1.671290103648452e-06,
      "loss": 0.0179,
      "step": 2547980
    },
    {
      "epoch": 4.169857884435367,
      "grad_norm": 0.6166527271270752,
      "learning_rate": 1.6712242114349347e-06,
      "loss": 0.0106,
      "step": 2548000
    },
    {
      "epoch": 4.169890614874021,
      "grad_norm": 0.21864810585975647,
      "learning_rate": 1.6711583192214179e-06,
      "loss": 0.0092,
      "step": 2548020
    },
    {
      "epoch": 4.169923345312674,
      "grad_norm": 0.3089832067489624,
      "learning_rate": 1.6710924270079006e-06,
      "loss": 0.0081,
      "step": 2548040
    },
    {
      "epoch": 4.169956075751327,
      "grad_norm": 0.2808091640472412,
      "learning_rate": 1.6710265347943836e-06,
      "loss": 0.0078,
      "step": 2548060
    },
    {
      "epoch": 4.169988806189981,
      "grad_norm": 0.0980231761932373,
      "learning_rate": 1.6709606425808663e-06,
      "loss": 0.0143,
      "step": 2548080
    },
    {
      "epoch": 4.170021536628634,
      "grad_norm": 0.5301011800765991,
      "learning_rate": 1.6708947503673493e-06,
      "loss": 0.0128,
      "step": 2548100
    },
    {
      "epoch": 4.170054267067287,
      "grad_norm": 0.2601625323295593,
      "learning_rate": 1.670828858153832e-06,
      "loss": 0.0094,
      "step": 2548120
    },
    {
      "epoch": 4.1700869975059405,
      "grad_norm": 0.1425696760416031,
      "learning_rate": 1.670762965940315e-06,
      "loss": 0.0125,
      "step": 2548140
    },
    {
      "epoch": 4.170119727944594,
      "grad_norm": 0.10521586239337921,
      "learning_rate": 1.6706970737267977e-06,
      "loss": 0.0073,
      "step": 2548160
    },
    {
      "epoch": 4.170152458383248,
      "grad_norm": 0.5463671088218689,
      "learning_rate": 1.6706311815132807e-06,
      "loss": 0.0081,
      "step": 2548180
    },
    {
      "epoch": 4.1701851888219,
      "grad_norm": 0.09705411642789841,
      "learning_rate": 1.6705652892997636e-06,
      "loss": 0.0072,
      "step": 2548200
    },
    {
      "epoch": 4.170217919260554,
      "grad_norm": 0.2069646120071411,
      "learning_rate": 1.6704993970862466e-06,
      "loss": 0.0106,
      "step": 2548220
    },
    {
      "epoch": 4.170250649699208,
      "grad_norm": 0.187580406665802,
      "learning_rate": 1.6704335048727293e-06,
      "loss": 0.0107,
      "step": 2548240
    },
    {
      "epoch": 4.17028338013786,
      "grad_norm": 0.18992400169372559,
      "learning_rate": 1.6703676126592122e-06,
      "loss": 0.0084,
      "step": 2548260
    },
    {
      "epoch": 4.170316110576514,
      "grad_norm": 0.18261326849460602,
      "learning_rate": 1.670301720445695e-06,
      "loss": 0.009,
      "step": 2548280
    },
    {
      "epoch": 4.1703488410151675,
      "grad_norm": 0.2710900604724884,
      "learning_rate": 1.670235828232178e-06,
      "loss": 0.0096,
      "step": 2548300
    },
    {
      "epoch": 4.170381571453821,
      "grad_norm": 0.09782695025205612,
      "learning_rate": 1.6701699360186607e-06,
      "loss": 0.0119,
      "step": 2548320
    },
    {
      "epoch": 4.170414301892474,
      "grad_norm": 0.39583006501197815,
      "learning_rate": 1.6701040438051436e-06,
      "loss": 0.0082,
      "step": 2548340
    },
    {
      "epoch": 4.170447032331127,
      "grad_norm": 0.27386561036109924,
      "learning_rate": 1.6700381515916268e-06,
      "loss": 0.0092,
      "step": 2548360
    },
    {
      "epoch": 4.170479762769781,
      "grad_norm": 0.222591832280159,
      "learning_rate": 1.6699722593781095e-06,
      "loss": 0.0088,
      "step": 2548380
    },
    {
      "epoch": 4.170512493208434,
      "grad_norm": 0.09631532430648804,
      "learning_rate": 1.6699063671645923e-06,
      "loss": 0.0097,
      "step": 2548400
    },
    {
      "epoch": 4.170545223647087,
      "grad_norm": 0.20353513956069946,
      "learning_rate": 1.6698404749510752e-06,
      "loss": 0.0125,
      "step": 2548420
    },
    {
      "epoch": 4.170577954085741,
      "grad_norm": 0.3429495096206665,
      "learning_rate": 1.669774582737558e-06,
      "loss": 0.0063,
      "step": 2548440
    },
    {
      "epoch": 4.170610684524394,
      "grad_norm": 0.189832404255867,
      "learning_rate": 1.669708690524041e-06,
      "loss": 0.0073,
      "step": 2548460
    },
    {
      "epoch": 4.170643414963047,
      "grad_norm": 0.2540546655654907,
      "learning_rate": 1.6696427983105237e-06,
      "loss": 0.0053,
      "step": 2548480
    },
    {
      "epoch": 4.170676145401701,
      "grad_norm": 0.20577748119831085,
      "learning_rate": 1.6695769060970066e-06,
      "loss": 0.0122,
      "step": 2548500
    },
    {
      "epoch": 4.170708875840354,
      "grad_norm": 0.09049060195684433,
      "learning_rate": 1.6695110138834893e-06,
      "loss": 0.0094,
      "step": 2548520
    },
    {
      "epoch": 4.170741606279007,
      "grad_norm": 0.2061326652765274,
      "learning_rate": 1.6694451216699725e-06,
      "loss": 0.0082,
      "step": 2548540
    },
    {
      "epoch": 4.170774336717661,
      "grad_norm": 0.23771332204341888,
      "learning_rate": 1.6693792294564555e-06,
      "loss": 0.0093,
      "step": 2548560
    },
    {
      "epoch": 4.170807067156314,
      "grad_norm": 0.19028440117835999,
      "learning_rate": 1.6693133372429382e-06,
      "loss": 0.0111,
      "step": 2548580
    },
    {
      "epoch": 4.170839797594967,
      "grad_norm": 0.3601546883583069,
      "learning_rate": 1.669247445029421e-06,
      "loss": 0.0087,
      "step": 2548600
    },
    {
      "epoch": 4.1708725280336205,
      "grad_norm": 0.21233044564723969,
      "learning_rate": 1.6691815528159039e-06,
      "loss": 0.0137,
      "step": 2548620
    },
    {
      "epoch": 4.170905258472274,
      "grad_norm": 0.27533701062202454,
      "learning_rate": 1.6691156606023866e-06,
      "loss": 0.0114,
      "step": 2548640
    },
    {
      "epoch": 4.170937988910928,
      "grad_norm": 0.13984379172325134,
      "learning_rate": 1.6690497683888696e-06,
      "loss": 0.0056,
      "step": 2548660
    },
    {
      "epoch": 4.17097071934958,
      "grad_norm": 0.7059153318405151,
      "learning_rate": 1.6689838761753523e-06,
      "loss": 0.0053,
      "step": 2548680
    },
    {
      "epoch": 4.171003449788234,
      "grad_norm": 0.11817022413015366,
      "learning_rate": 1.6689179839618353e-06,
      "loss": 0.0089,
      "step": 2548700
    },
    {
      "epoch": 4.171036180226888,
      "grad_norm": 0.4980534017086029,
      "learning_rate": 1.6688520917483184e-06,
      "loss": 0.0103,
      "step": 2548720
    },
    {
      "epoch": 4.171068910665541,
      "grad_norm": 0.17008361220359802,
      "learning_rate": 1.6687861995348012e-06,
      "loss": 0.0084,
      "step": 2548740
    },
    {
      "epoch": 4.171101641104194,
      "grad_norm": 0.09221905469894409,
      "learning_rate": 1.6687203073212841e-06,
      "loss": 0.0081,
      "step": 2548760
    },
    {
      "epoch": 4.1711343715428475,
      "grad_norm": 0.16474199295043945,
      "learning_rate": 1.6686544151077669e-06,
      "loss": 0.008,
      "step": 2548780
    },
    {
      "epoch": 4.171167101981501,
      "grad_norm": 0.14774471521377563,
      "learning_rate": 1.6685885228942498e-06,
      "loss": 0.0133,
      "step": 2548800
    },
    {
      "epoch": 4.171199832420154,
      "grad_norm": 0.16401785612106323,
      "learning_rate": 1.6685226306807326e-06,
      "loss": 0.0064,
      "step": 2548820
    },
    {
      "epoch": 4.171232562858807,
      "grad_norm": 0.3470202684402466,
      "learning_rate": 1.6684567384672153e-06,
      "loss": 0.008,
      "step": 2548840
    },
    {
      "epoch": 4.171265293297461,
      "grad_norm": 0.20342400670051575,
      "learning_rate": 1.6683908462536983e-06,
      "loss": 0.0093,
      "step": 2548860
    },
    {
      "epoch": 4.171298023736114,
      "grad_norm": 0.6910628080368042,
      "learning_rate": 1.668324954040181e-06,
      "loss": 0.0089,
      "step": 2548880
    },
    {
      "epoch": 4.171330754174767,
      "grad_norm": 0.3563805818557739,
      "learning_rate": 1.6682590618266642e-06,
      "loss": 0.0068,
      "step": 2548900
    },
    {
      "epoch": 4.171363484613421,
      "grad_norm": 0.31816065311431885,
      "learning_rate": 1.668193169613147e-06,
      "loss": 0.0075,
      "step": 2548920
    },
    {
      "epoch": 4.1713962150520745,
      "grad_norm": 0.1923930048942566,
      "learning_rate": 1.6681272773996298e-06,
      "loss": 0.0115,
      "step": 2548940
    },
    {
      "epoch": 4.171428945490727,
      "grad_norm": 0.1744890958070755,
      "learning_rate": 1.6680613851861128e-06,
      "loss": 0.0081,
      "step": 2548960
    },
    {
      "epoch": 4.171461675929381,
      "grad_norm": 0.17601242661476135,
      "learning_rate": 1.6679954929725955e-06,
      "loss": 0.0083,
      "step": 2548980
    },
    {
      "epoch": 4.171494406368034,
      "grad_norm": 0.2338847815990448,
      "learning_rate": 1.6679296007590785e-06,
      "loss": 0.0074,
      "step": 2549000
    },
    {
      "epoch": 4.171527136806687,
      "grad_norm": 0.22706057131290436,
      "learning_rate": 1.6678637085455612e-06,
      "loss": 0.0082,
      "step": 2549020
    },
    {
      "epoch": 4.171559867245341,
      "grad_norm": 0.5728304982185364,
      "learning_rate": 1.6677978163320442e-06,
      "loss": 0.01,
      "step": 2549040
    },
    {
      "epoch": 4.171592597683994,
      "grad_norm": 0.26271986961364746,
      "learning_rate": 1.667731924118527e-06,
      "loss": 0.0107,
      "step": 2549060
    },
    {
      "epoch": 4.171625328122648,
      "grad_norm": 0.31845059990882874,
      "learning_rate": 1.66766603190501e-06,
      "loss": 0.0063,
      "step": 2549080
    },
    {
      "epoch": 4.171658058561301,
      "grad_norm": 0.1009163185954094,
      "learning_rate": 1.6676001396914928e-06,
      "loss": 0.0103,
      "step": 2549100
    },
    {
      "epoch": 4.171690788999954,
      "grad_norm": 0.10732538253068924,
      "learning_rate": 1.6675342474779758e-06,
      "loss": 0.0109,
      "step": 2549120
    },
    {
      "epoch": 4.171723519438608,
      "grad_norm": 0.39376893639564514,
      "learning_rate": 1.6674683552644585e-06,
      "loss": 0.0072,
      "step": 2549140
    },
    {
      "epoch": 4.1717562498772605,
      "grad_norm": 0.15547406673431396,
      "learning_rate": 1.6674024630509415e-06,
      "loss": 0.0083,
      "step": 2549160
    },
    {
      "epoch": 4.171788980315914,
      "grad_norm": 0.10018593072891235,
      "learning_rate": 1.6673365708374242e-06,
      "loss": 0.01,
      "step": 2549180
    },
    {
      "epoch": 4.171821710754568,
      "grad_norm": 0.4131527245044708,
      "learning_rate": 1.6672706786239072e-06,
      "loss": 0.0081,
      "step": 2549200
    },
    {
      "epoch": 4.171854441193221,
      "grad_norm": 0.5001488327980042,
      "learning_rate": 1.66720478641039e-06,
      "loss": 0.0128,
      "step": 2549220
    },
    {
      "epoch": 4.171887171631874,
      "grad_norm": 0.2074715793132782,
      "learning_rate": 1.667138894196873e-06,
      "loss": 0.0071,
      "step": 2549240
    },
    {
      "epoch": 4.1719199020705275,
      "grad_norm": 0.2179168164730072,
      "learning_rate": 1.6670730019833558e-06,
      "loss": 0.0087,
      "step": 2549260
    },
    {
      "epoch": 4.171952632509181,
      "grad_norm": 0.31828346848487854,
      "learning_rate": 1.6670071097698387e-06,
      "loss": 0.0057,
      "step": 2549280
    },
    {
      "epoch": 4.171985362947834,
      "grad_norm": 0.20191125571727753,
      "learning_rate": 1.6669412175563215e-06,
      "loss": 0.0063,
      "step": 2549300
    },
    {
      "epoch": 4.172018093386487,
      "grad_norm": 0.28159815073013306,
      "learning_rate": 1.6668753253428044e-06,
      "loss": 0.0098,
      "step": 2549320
    },
    {
      "epoch": 4.172050823825141,
      "grad_norm": 0.25113654136657715,
      "learning_rate": 1.6668094331292872e-06,
      "loss": 0.0075,
      "step": 2549340
    },
    {
      "epoch": 4.172083554263795,
      "grad_norm": 0.164204940199852,
      "learning_rate": 1.6667435409157701e-06,
      "loss": 0.0087,
      "step": 2549360
    },
    {
      "epoch": 4.172116284702447,
      "grad_norm": 0.5359553098678589,
      "learning_rate": 1.6666776487022529e-06,
      "loss": 0.0077,
      "step": 2549380
    },
    {
      "epoch": 4.172149015141101,
      "grad_norm": 0.5556538701057434,
      "learning_rate": 1.6666117564887358e-06,
      "loss": 0.0067,
      "step": 2549400
    },
    {
      "epoch": 4.1721817455797545,
      "grad_norm": 0.14547167718410492,
      "learning_rate": 1.6665458642752188e-06,
      "loss": 0.009,
      "step": 2549420
    },
    {
      "epoch": 4.172214476018407,
      "grad_norm": 0.3142187297344208,
      "learning_rate": 1.6664799720617017e-06,
      "loss": 0.0152,
      "step": 2549440
    },
    {
      "epoch": 4.172247206457061,
      "grad_norm": 1.6163184642791748,
      "learning_rate": 1.6664140798481845e-06,
      "loss": 0.0089,
      "step": 2549460
    },
    {
      "epoch": 4.172279936895714,
      "grad_norm": 0.20023752748966217,
      "learning_rate": 1.6663481876346674e-06,
      "loss": 0.008,
      "step": 2549480
    },
    {
      "epoch": 4.172312667334368,
      "grad_norm": 0.06602344661951065,
      "learning_rate": 1.6662822954211502e-06,
      "loss": 0.0083,
      "step": 2549500
    },
    {
      "epoch": 4.172345397773021,
      "grad_norm": 0.3211405873298645,
      "learning_rate": 1.6662164032076331e-06,
      "loss": 0.0111,
      "step": 2549520
    },
    {
      "epoch": 4.172378128211674,
      "grad_norm": 0.13161355257034302,
      "learning_rate": 1.6661505109941158e-06,
      "loss": 0.0074,
      "step": 2549540
    },
    {
      "epoch": 4.172410858650328,
      "grad_norm": 0.12669414281845093,
      "learning_rate": 1.6660846187805988e-06,
      "loss": 0.0111,
      "step": 2549560
    },
    {
      "epoch": 4.172443589088981,
      "grad_norm": 0.21068164706230164,
      "learning_rate": 1.6660187265670815e-06,
      "loss": 0.0142,
      "step": 2549580
    },
    {
      "epoch": 4.172476319527634,
      "grad_norm": 0.2984287738800049,
      "learning_rate": 1.6659528343535647e-06,
      "loss": 0.0072,
      "step": 2549600
    },
    {
      "epoch": 4.172509049966288,
      "grad_norm": 0.45412129163742065,
      "learning_rate": 1.6658869421400474e-06,
      "loss": 0.0097,
      "step": 2549620
    },
    {
      "epoch": 4.172541780404941,
      "grad_norm": 0.22053079307079315,
      "learning_rate": 1.6658210499265304e-06,
      "loss": 0.0114,
      "step": 2549640
    },
    {
      "epoch": 4.172574510843594,
      "grad_norm": 0.20295466482639313,
      "learning_rate": 1.6657551577130131e-06,
      "loss": 0.0087,
      "step": 2549660
    },
    {
      "epoch": 4.172607241282248,
      "grad_norm": 0.1942872554063797,
      "learning_rate": 1.665689265499496e-06,
      "loss": 0.0072,
      "step": 2549680
    },
    {
      "epoch": 4.172639971720901,
      "grad_norm": 0.6575092673301697,
      "learning_rate": 1.6656233732859788e-06,
      "loss": 0.0115,
      "step": 2549700
    },
    {
      "epoch": 4.172672702159554,
      "grad_norm": 0.14868709444999695,
      "learning_rate": 1.6655574810724618e-06,
      "loss": 0.0108,
      "step": 2549720
    },
    {
      "epoch": 4.172705432598208,
      "grad_norm": 0.08661341667175293,
      "learning_rate": 1.6654915888589445e-06,
      "loss": 0.0095,
      "step": 2549740
    },
    {
      "epoch": 4.172738163036861,
      "grad_norm": 0.22775202989578247,
      "learning_rate": 1.6654256966454275e-06,
      "loss": 0.008,
      "step": 2549760
    },
    {
      "epoch": 4.172770893475515,
      "grad_norm": 0.1127883791923523,
      "learning_rate": 1.6653598044319106e-06,
      "loss": 0.0127,
      "step": 2549780
    },
    {
      "epoch": 4.1728036239141675,
      "grad_norm": 0.0823410302400589,
      "learning_rate": 1.6652939122183934e-06,
      "loss": 0.0096,
      "step": 2549800
    },
    {
      "epoch": 4.172836354352821,
      "grad_norm": 0.4719502925872803,
      "learning_rate": 1.6652280200048761e-06,
      "loss": 0.0074,
      "step": 2549820
    },
    {
      "epoch": 4.172869084791475,
      "grad_norm": 0.12294028699398041,
      "learning_rate": 1.665162127791359e-06,
      "loss": 0.0097,
      "step": 2549840
    },
    {
      "epoch": 4.172901815230127,
      "grad_norm": 0.15771539509296417,
      "learning_rate": 1.6650962355778418e-06,
      "loss": 0.0094,
      "step": 2549860
    },
    {
      "epoch": 4.172934545668781,
      "grad_norm": 0.09799963980913162,
      "learning_rate": 1.6650303433643248e-06,
      "loss": 0.0086,
      "step": 2549880
    },
    {
      "epoch": 4.172967276107435,
      "grad_norm": 0.10765861719846725,
      "learning_rate": 1.6649644511508075e-06,
      "loss": 0.0105,
      "step": 2549900
    },
    {
      "epoch": 4.173000006546088,
      "grad_norm": 0.281341552734375,
      "learning_rate": 1.6648985589372904e-06,
      "loss": 0.0103,
      "step": 2549920
    },
    {
      "epoch": 4.173032736984741,
      "grad_norm": 0.4212259352207184,
      "learning_rate": 1.6648326667237732e-06,
      "loss": 0.0155,
      "step": 2549940
    },
    {
      "epoch": 4.1730654674233945,
      "grad_norm": 0.4933341443538666,
      "learning_rate": 1.6647667745102563e-06,
      "loss": 0.0113,
      "step": 2549960
    },
    {
      "epoch": 4.173098197862048,
      "grad_norm": 0.053882937878370285,
      "learning_rate": 1.6647008822967393e-06,
      "loss": 0.0116,
      "step": 2549980
    },
    {
      "epoch": 4.173130928300701,
      "grad_norm": 0.32363229990005493,
      "learning_rate": 1.664634990083222e-06,
      "loss": 0.0125,
      "step": 2550000
    },
    {
      "epoch": 4.173130928300701,
      "eval_loss": 0.006093584932386875,
      "eval_runtime": 6472.1056,
      "eval_samples_per_second": 158.814,
      "eval_steps_per_second": 15.881,
      "eval_sts-dev_pearson_cosine": 0.9862189138413954,
      "eval_sts-dev_spearman_cosine": 0.896324283652199,
      "step": 2550000
    },
    {
      "epoch": 4.173163658739354,
      "grad_norm": 0.4129864275455475,
      "learning_rate": 1.664569097869705e-06,
      "loss": 0.0074,
      "step": 2550020
    },
    {
      "epoch": 4.173196389178008,
      "grad_norm": 0.08164840191602707,
      "learning_rate": 1.6645032056561877e-06,
      "loss": 0.0067,
      "step": 2550040
    },
    {
      "epoch": 4.173229119616661,
      "grad_norm": 0.1743803322315216,
      "learning_rate": 1.6644373134426705e-06,
      "loss": 0.0094,
      "step": 2550060
    },
    {
      "epoch": 4.173261850055314,
      "grad_norm": 0.21825622022151947,
      "learning_rate": 1.6643714212291534e-06,
      "loss": 0.0105,
      "step": 2550080
    },
    {
      "epoch": 4.173294580493968,
      "grad_norm": 0.12815794348716736,
      "learning_rate": 1.6643055290156362e-06,
      "loss": 0.0091,
      "step": 2550100
    },
    {
      "epoch": 4.173327310932621,
      "grad_norm": 0.3054415285587311,
      "learning_rate": 1.6642396368021193e-06,
      "loss": 0.0113,
      "step": 2550120
    },
    {
      "epoch": 4.173360041371274,
      "grad_norm": 0.3436654806137085,
      "learning_rate": 1.6641737445886023e-06,
      "loss": 0.0136,
      "step": 2550140
    },
    {
      "epoch": 4.173392771809928,
      "grad_norm": 0.1885063648223877,
      "learning_rate": 1.664107852375085e-06,
      "loss": 0.0076,
      "step": 2550160
    },
    {
      "epoch": 4.173425502248581,
      "grad_norm": 0.16485168039798737,
      "learning_rate": 1.664041960161568e-06,
      "loss": 0.0081,
      "step": 2550180
    },
    {
      "epoch": 4.173458232687235,
      "grad_norm": 0.33155474066734314,
      "learning_rate": 1.6639760679480507e-06,
      "loss": 0.0103,
      "step": 2550200
    },
    {
      "epoch": 4.173490963125888,
      "grad_norm": 0.16289499402046204,
      "learning_rate": 1.6639101757345337e-06,
      "loss": 0.0104,
      "step": 2550220
    },
    {
      "epoch": 4.173523693564541,
      "grad_norm": 0.2653627395629883,
      "learning_rate": 1.6638442835210164e-06,
      "loss": 0.0062,
      "step": 2550240
    },
    {
      "epoch": 4.173556424003195,
      "grad_norm": 0.40200647711753845,
      "learning_rate": 1.6637783913074991e-06,
      "loss": 0.0069,
      "step": 2550260
    },
    {
      "epoch": 4.1735891544418475,
      "grad_norm": 0.3085024356842041,
      "learning_rate": 1.663712499093982e-06,
      "loss": 0.0111,
      "step": 2550280
    },
    {
      "epoch": 4.173621884880501,
      "grad_norm": 0.04813402518630028,
      "learning_rate": 1.6636466068804653e-06,
      "loss": 0.0087,
      "step": 2550300
    },
    {
      "epoch": 4.173654615319155,
      "grad_norm": 0.18652230501174927,
      "learning_rate": 1.663580714666948e-06,
      "loss": 0.0112,
      "step": 2550320
    },
    {
      "epoch": 4.173687345757807,
      "grad_norm": 0.2588421702384949,
      "learning_rate": 1.663514822453431e-06,
      "loss": 0.0121,
      "step": 2550340
    },
    {
      "epoch": 4.173720076196461,
      "grad_norm": 0.17756249010562897,
      "learning_rate": 1.6634489302399137e-06,
      "loss": 0.0122,
      "step": 2550360
    },
    {
      "epoch": 4.173752806635115,
      "grad_norm": 0.13348129391670227,
      "learning_rate": 1.6633830380263966e-06,
      "loss": 0.0102,
      "step": 2550380
    },
    {
      "epoch": 4.173785537073768,
      "grad_norm": 0.11223522573709488,
      "learning_rate": 1.6633171458128794e-06,
      "loss": 0.008,
      "step": 2550400
    },
    {
      "epoch": 4.173818267512421,
      "grad_norm": 0.28442513942718506,
      "learning_rate": 1.6632512535993623e-06,
      "loss": 0.0152,
      "step": 2550420
    },
    {
      "epoch": 4.1738509979510745,
      "grad_norm": 0.1997261941432953,
      "learning_rate": 1.663185361385845e-06,
      "loss": 0.0074,
      "step": 2550440
    },
    {
      "epoch": 4.173883728389728,
      "grad_norm": 0.23130840063095093,
      "learning_rate": 1.663119469172328e-06,
      "loss": 0.0078,
      "step": 2550460
    },
    {
      "epoch": 4.173916458828381,
      "grad_norm": 0.19262723624706268,
      "learning_rate": 1.663053576958811e-06,
      "loss": 0.0144,
      "step": 2550480
    },
    {
      "epoch": 4.173949189267034,
      "grad_norm": 0.12297109514474869,
      "learning_rate": 1.662987684745294e-06,
      "loss": 0.0072,
      "step": 2550500
    },
    {
      "epoch": 4.173981919705688,
      "grad_norm": 0.06820202618837357,
      "learning_rate": 1.6629217925317767e-06,
      "loss": 0.0073,
      "step": 2550520
    },
    {
      "epoch": 4.174014650144342,
      "grad_norm": 0.271182656288147,
      "learning_rate": 1.6628559003182596e-06,
      "loss": 0.0086,
      "step": 2550540
    },
    {
      "epoch": 4.174047380582994,
      "grad_norm": 0.12005122750997543,
      "learning_rate": 1.6627900081047424e-06,
      "loss": 0.0054,
      "step": 2550560
    },
    {
      "epoch": 4.174080111021648,
      "grad_norm": 0.049366261810064316,
      "learning_rate": 1.6627241158912253e-06,
      "loss": 0.0139,
      "step": 2550580
    },
    {
      "epoch": 4.1741128414603015,
      "grad_norm": 0.3263077735900879,
      "learning_rate": 1.662658223677708e-06,
      "loss": 0.0082,
      "step": 2550600
    },
    {
      "epoch": 4.174145571898954,
      "grad_norm": 0.08513663709163666,
      "learning_rate": 1.662592331464191e-06,
      "loss": 0.0086,
      "step": 2550620
    },
    {
      "epoch": 4.174178302337608,
      "grad_norm": 0.2545110583305359,
      "learning_rate": 1.6625264392506737e-06,
      "loss": 0.008,
      "step": 2550640
    },
    {
      "epoch": 4.174211032776261,
      "grad_norm": 0.22986572980880737,
      "learning_rate": 1.662460547037157e-06,
      "loss": 0.0092,
      "step": 2550660
    },
    {
      "epoch": 4.174243763214915,
      "grad_norm": 0.41271311044692993,
      "learning_rate": 1.6623946548236396e-06,
      "loss": 0.0095,
      "step": 2550680
    },
    {
      "epoch": 4.174276493653568,
      "grad_norm": 0.13447536528110504,
      "learning_rate": 1.6623287626101226e-06,
      "loss": 0.0151,
      "step": 2550700
    },
    {
      "epoch": 4.174309224092221,
      "grad_norm": 0.2916390299797058,
      "learning_rate": 1.6622628703966053e-06,
      "loss": 0.0094,
      "step": 2550720
    },
    {
      "epoch": 4.174341954530875,
      "grad_norm": 0.1474970281124115,
      "learning_rate": 1.6621969781830883e-06,
      "loss": 0.0123,
      "step": 2550740
    },
    {
      "epoch": 4.174374684969528,
      "grad_norm": 0.4890979528427124,
      "learning_rate": 1.662131085969571e-06,
      "loss": 0.0084,
      "step": 2550760
    },
    {
      "epoch": 4.174407415408181,
      "grad_norm": 0.45407503843307495,
      "learning_rate": 1.662065193756054e-06,
      "loss": 0.0067,
      "step": 2550780
    },
    {
      "epoch": 4.174440145846835,
      "grad_norm": 0.15699325501918793,
      "learning_rate": 1.6619993015425367e-06,
      "loss": 0.0101,
      "step": 2550800
    },
    {
      "epoch": 4.174472876285488,
      "grad_norm": 0.17800362408161163,
      "learning_rate": 1.6619334093290199e-06,
      "loss": 0.0086,
      "step": 2550820
    },
    {
      "epoch": 4.174505606724141,
      "grad_norm": 0.21618829667568207,
      "learning_rate": 1.6618675171155026e-06,
      "loss": 0.0083,
      "step": 2550840
    },
    {
      "epoch": 4.174538337162795,
      "grad_norm": 0.26702529191970825,
      "learning_rate": 1.6618016249019856e-06,
      "loss": 0.0081,
      "step": 2550860
    },
    {
      "epoch": 4.174571067601448,
      "grad_norm": 0.2891235053539276,
      "learning_rate": 1.6617357326884683e-06,
      "loss": 0.0095,
      "step": 2550880
    },
    {
      "epoch": 4.174603798040101,
      "grad_norm": 0.19979612529277802,
      "learning_rate": 1.6616698404749513e-06,
      "loss": 0.0126,
      "step": 2550900
    },
    {
      "epoch": 4.1746365284787545,
      "grad_norm": 0.2836568057537079,
      "learning_rate": 1.661603948261434e-06,
      "loss": 0.0101,
      "step": 2550920
    },
    {
      "epoch": 4.174669258917408,
      "grad_norm": 0.7351651191711426,
      "learning_rate": 1.661538056047917e-06,
      "loss": 0.0076,
      "step": 2550940
    },
    {
      "epoch": 4.174701989356062,
      "grad_norm": 0.17683862149715424,
      "learning_rate": 1.6614721638343997e-06,
      "loss": 0.0073,
      "step": 2550960
    },
    {
      "epoch": 4.174734719794714,
      "grad_norm": 0.18874329328536987,
      "learning_rate": 1.6614062716208826e-06,
      "loss": 0.0129,
      "step": 2550980
    },
    {
      "epoch": 4.174767450233368,
      "grad_norm": 0.22350800037384033,
      "learning_rate": 1.6613403794073658e-06,
      "loss": 0.0073,
      "step": 2551000
    },
    {
      "epoch": 4.174800180672022,
      "grad_norm": 0.19260580837726593,
      "learning_rate": 1.6612744871938485e-06,
      "loss": 0.0099,
      "step": 2551020
    },
    {
      "epoch": 4.174832911110674,
      "grad_norm": 0.1296510398387909,
      "learning_rate": 1.6612085949803313e-06,
      "loss": 0.0057,
      "step": 2551040
    },
    {
      "epoch": 4.174865641549328,
      "grad_norm": 0.3182852864265442,
      "learning_rate": 1.6611427027668142e-06,
      "loss": 0.0136,
      "step": 2551060
    },
    {
      "epoch": 4.1748983719879815,
      "grad_norm": 0.14917966723442078,
      "learning_rate": 1.661076810553297e-06,
      "loss": 0.0096,
      "step": 2551080
    },
    {
      "epoch": 4.174931102426635,
      "grad_norm": 0.1481006145477295,
      "learning_rate": 1.66101091833978e-06,
      "loss": 0.0074,
      "step": 2551100
    },
    {
      "epoch": 4.174963832865288,
      "grad_norm": 0.298095703125,
      "learning_rate": 1.6609450261262627e-06,
      "loss": 0.0065,
      "step": 2551120
    },
    {
      "epoch": 4.174996563303941,
      "grad_norm": 0.058388594537973404,
      "learning_rate": 1.6608791339127456e-06,
      "loss": 0.0083,
      "step": 2551140
    },
    {
      "epoch": 4.175029293742595,
      "grad_norm": 0.294371634721756,
      "learning_rate": 1.6608132416992284e-06,
      "loss": 0.0068,
      "step": 2551160
    },
    {
      "epoch": 4.175062024181248,
      "grad_norm": 0.23179090023040771,
      "learning_rate": 1.6607473494857115e-06,
      "loss": 0.0132,
      "step": 2551180
    },
    {
      "epoch": 4.175094754619901,
      "grad_norm": 0.10481097549200058,
      "learning_rate": 1.6606814572721945e-06,
      "loss": 0.0056,
      "step": 2551200
    },
    {
      "epoch": 4.175127485058555,
      "grad_norm": 0.39242783188819885,
      "learning_rate": 1.6606155650586772e-06,
      "loss": 0.0113,
      "step": 2551220
    },
    {
      "epoch": 4.1751602154972085,
      "grad_norm": 0.06986541301012039,
      "learning_rate": 1.66054967284516e-06,
      "loss": 0.0099,
      "step": 2551240
    },
    {
      "epoch": 4.175192945935861,
      "grad_norm": 0.04375016689300537,
      "learning_rate": 1.660483780631643e-06,
      "loss": 0.0172,
      "step": 2551260
    },
    {
      "epoch": 4.175225676374515,
      "grad_norm": 0.42438873648643494,
      "learning_rate": 1.6604178884181256e-06,
      "loss": 0.0066,
      "step": 2551280
    },
    {
      "epoch": 4.175258406813168,
      "grad_norm": 0.38340502977371216,
      "learning_rate": 1.6603519962046086e-06,
      "loss": 0.0089,
      "step": 2551300
    },
    {
      "epoch": 4.175291137251821,
      "grad_norm": 0.2855359613895416,
      "learning_rate": 1.6602861039910913e-06,
      "loss": 0.0083,
      "step": 2551320
    },
    {
      "epoch": 4.175323867690475,
      "grad_norm": 0.2354705035686493,
      "learning_rate": 1.6602202117775743e-06,
      "loss": 0.0123,
      "step": 2551340
    },
    {
      "epoch": 4.175356598129128,
      "grad_norm": 0.5858416557312012,
      "learning_rate": 1.6601543195640574e-06,
      "loss": 0.0104,
      "step": 2551360
    },
    {
      "epoch": 4.175389328567782,
      "grad_norm": 0.21853898465633392,
      "learning_rate": 1.6600884273505402e-06,
      "loss": 0.0084,
      "step": 2551380
    },
    {
      "epoch": 4.175422059006435,
      "grad_norm": 0.23343269526958466,
      "learning_rate": 1.6600225351370231e-06,
      "loss": 0.0061,
      "step": 2551400
    },
    {
      "epoch": 4.175454789445088,
      "grad_norm": 0.3804628551006317,
      "learning_rate": 1.6599566429235059e-06,
      "loss": 0.0081,
      "step": 2551420
    },
    {
      "epoch": 4.175487519883742,
      "grad_norm": 0.4131748676300049,
      "learning_rate": 1.6598907507099888e-06,
      "loss": 0.0086,
      "step": 2551440
    },
    {
      "epoch": 4.1755202503223945,
      "grad_norm": 0.4670770764350891,
      "learning_rate": 1.6598248584964716e-06,
      "loss": 0.0101,
      "step": 2551460
    },
    {
      "epoch": 4.175552980761048,
      "grad_norm": 0.3860536813735962,
      "learning_rate": 1.6597589662829543e-06,
      "loss": 0.0107,
      "step": 2551480
    },
    {
      "epoch": 4.175585711199702,
      "grad_norm": 0.4815213680267334,
      "learning_rate": 1.6596930740694373e-06,
      "loss": 0.0066,
      "step": 2551500
    },
    {
      "epoch": 4.175618441638354,
      "grad_norm": 0.29996296763420105,
      "learning_rate": 1.65962718185592e-06,
      "loss": 0.0125,
      "step": 2551520
    },
    {
      "epoch": 4.175651172077008,
      "grad_norm": 0.4068642258644104,
      "learning_rate": 1.6595612896424032e-06,
      "loss": 0.0131,
      "step": 2551540
    },
    {
      "epoch": 4.175683902515662,
      "grad_norm": 0.17629851400852203,
      "learning_rate": 1.6594953974288861e-06,
      "loss": 0.0085,
      "step": 2551560
    },
    {
      "epoch": 4.175716632954315,
      "grad_norm": 0.1672157496213913,
      "learning_rate": 1.6594295052153689e-06,
      "loss": 0.0069,
      "step": 2551580
    },
    {
      "epoch": 4.175749363392968,
      "grad_norm": 0.0700613409280777,
      "learning_rate": 1.6593636130018518e-06,
      "loss": 0.013,
      "step": 2551600
    },
    {
      "epoch": 4.1757820938316215,
      "grad_norm": 0.27266308665275574,
      "learning_rate": 1.6592977207883345e-06,
      "loss": 0.0083,
      "step": 2551620
    },
    {
      "epoch": 4.175814824270275,
      "grad_norm": 0.07264405488967896,
      "learning_rate": 1.6592318285748175e-06,
      "loss": 0.0108,
      "step": 2551640
    },
    {
      "epoch": 4.175847554708929,
      "grad_norm": 0.5648015141487122,
      "learning_rate": 1.6591659363613002e-06,
      "loss": 0.0123,
      "step": 2551660
    },
    {
      "epoch": 4.175880285147581,
      "grad_norm": 0.9244750142097473,
      "learning_rate": 1.6591000441477832e-06,
      "loss": 0.0089,
      "step": 2551680
    },
    {
      "epoch": 4.175913015586235,
      "grad_norm": 0.35687386989593506,
      "learning_rate": 1.6590341519342661e-06,
      "loss": 0.0125,
      "step": 2551700
    },
    {
      "epoch": 4.1759457460248885,
      "grad_norm": 0.03768865019083023,
      "learning_rate": 1.658968259720749e-06,
      "loss": 0.0102,
      "step": 2551720
    },
    {
      "epoch": 4.175978476463541,
      "grad_norm": 0.08741139620542526,
      "learning_rate": 1.6589023675072318e-06,
      "loss": 0.0069,
      "step": 2551740
    },
    {
      "epoch": 4.176011206902195,
      "grad_norm": 0.3416028916835785,
      "learning_rate": 1.6588364752937148e-06,
      "loss": 0.0105,
      "step": 2551760
    },
    {
      "epoch": 4.1760439373408484,
      "grad_norm": 0.45672664046287537,
      "learning_rate": 1.6587705830801975e-06,
      "loss": 0.0083,
      "step": 2551780
    },
    {
      "epoch": 4.176076667779501,
      "grad_norm": 0.3967185914516449,
      "learning_rate": 1.6587046908666805e-06,
      "loss": 0.0152,
      "step": 2551800
    },
    {
      "epoch": 4.176109398218155,
      "grad_norm": 0.04384833574295044,
      "learning_rate": 1.6586387986531632e-06,
      "loss": 0.014,
      "step": 2551820
    },
    {
      "epoch": 4.176142128656808,
      "grad_norm": 0.07971368730068207,
      "learning_rate": 1.6585729064396462e-06,
      "loss": 0.0088,
      "step": 2551840
    },
    {
      "epoch": 4.176174859095462,
      "grad_norm": 0.3840072751045227,
      "learning_rate": 1.658507014226129e-06,
      "loss": 0.0103,
      "step": 2551860
    },
    {
      "epoch": 4.176207589534115,
      "grad_norm": 0.21101495623588562,
      "learning_rate": 1.658441122012612e-06,
      "loss": 0.0101,
      "step": 2551880
    },
    {
      "epoch": 4.176240319972768,
      "grad_norm": 0.3029267489910126,
      "learning_rate": 1.6583752297990948e-06,
      "loss": 0.0081,
      "step": 2551900
    },
    {
      "epoch": 4.176273050411422,
      "grad_norm": 0.14197054505348206,
      "learning_rate": 1.6583093375855778e-06,
      "loss": 0.0091,
      "step": 2551920
    },
    {
      "epoch": 4.1763057808500745,
      "grad_norm": 0.06230299919843674,
      "learning_rate": 1.6582434453720605e-06,
      "loss": 0.0098,
      "step": 2551940
    },
    {
      "epoch": 4.176338511288728,
      "grad_norm": 0.32680457830429077,
      "learning_rate": 1.6581775531585435e-06,
      "loss": 0.0049,
      "step": 2551960
    },
    {
      "epoch": 4.176371241727382,
      "grad_norm": 0.08966349065303802,
      "learning_rate": 1.6581116609450262e-06,
      "loss": 0.0127,
      "step": 2551980
    },
    {
      "epoch": 4.176403972166035,
      "grad_norm": 0.10343348979949951,
      "learning_rate": 1.6580457687315091e-06,
      "loss": 0.0098,
      "step": 2552000
    },
    {
      "epoch": 4.176436702604688,
      "grad_norm": 0.32031723856925964,
      "learning_rate": 1.6579798765179919e-06,
      "loss": 0.0099,
      "step": 2552020
    },
    {
      "epoch": 4.176469433043342,
      "grad_norm": 0.06787171959877014,
      "learning_rate": 1.6579139843044748e-06,
      "loss": 0.0069,
      "step": 2552040
    },
    {
      "epoch": 4.176502163481995,
      "grad_norm": 0.08534614741802216,
      "learning_rate": 1.6578480920909578e-06,
      "loss": 0.0061,
      "step": 2552060
    },
    {
      "epoch": 4.176534893920648,
      "grad_norm": 0.31458181142807007,
      "learning_rate": 1.6577821998774407e-06,
      "loss": 0.0101,
      "step": 2552080
    },
    {
      "epoch": 4.1765676243593015,
      "grad_norm": 0.30368703603744507,
      "learning_rate": 1.6577163076639235e-06,
      "loss": 0.0089,
      "step": 2552100
    },
    {
      "epoch": 4.176600354797955,
      "grad_norm": 0.25100910663604736,
      "learning_rate": 1.6576504154504064e-06,
      "loss": 0.014,
      "step": 2552120
    },
    {
      "epoch": 4.176633085236609,
      "grad_norm": 0.05414653941988945,
      "learning_rate": 1.6575845232368892e-06,
      "loss": 0.0053,
      "step": 2552140
    },
    {
      "epoch": 4.176665815675261,
      "grad_norm": 0.293383926153183,
      "learning_rate": 1.6575186310233721e-06,
      "loss": 0.0101,
      "step": 2552160
    },
    {
      "epoch": 4.176698546113915,
      "grad_norm": 0.07412361353635788,
      "learning_rate": 1.6574527388098549e-06,
      "loss": 0.008,
      "step": 2552180
    },
    {
      "epoch": 4.176731276552569,
      "grad_norm": 0.09418945014476776,
      "learning_rate": 1.6573868465963378e-06,
      "loss": 0.0117,
      "step": 2552200
    },
    {
      "epoch": 4.176764006991221,
      "grad_norm": 0.2664591372013092,
      "learning_rate": 1.6573209543828206e-06,
      "loss": 0.0078,
      "step": 2552220
    },
    {
      "epoch": 4.176796737429875,
      "grad_norm": 0.5096360445022583,
      "learning_rate": 1.6572550621693037e-06,
      "loss": 0.0131,
      "step": 2552240
    },
    {
      "epoch": 4.1768294678685285,
      "grad_norm": 0.3391747772693634,
      "learning_rate": 1.6571891699557865e-06,
      "loss": 0.011,
      "step": 2552260
    },
    {
      "epoch": 4.176862198307182,
      "grad_norm": 0.20789064466953278,
      "learning_rate": 1.6571232777422694e-06,
      "loss": 0.0078,
      "step": 2552280
    },
    {
      "epoch": 4.176894928745835,
      "grad_norm": 0.2468797117471695,
      "learning_rate": 1.6570573855287521e-06,
      "loss": 0.0081,
      "step": 2552300
    },
    {
      "epoch": 4.176927659184488,
      "grad_norm": 0.26474595069885254,
      "learning_rate": 1.656991493315235e-06,
      "loss": 0.0088,
      "step": 2552320
    },
    {
      "epoch": 4.176960389623142,
      "grad_norm": 0.3523035943508148,
      "learning_rate": 1.6569256011017178e-06,
      "loss": 0.0085,
      "step": 2552340
    },
    {
      "epoch": 4.176993120061795,
      "grad_norm": 0.44940805435180664,
      "learning_rate": 1.6568597088882008e-06,
      "loss": 0.0114,
      "step": 2552360
    },
    {
      "epoch": 4.177025850500448,
      "grad_norm": 0.2633157968521118,
      "learning_rate": 1.6567938166746835e-06,
      "loss": 0.008,
      "step": 2552380
    },
    {
      "epoch": 4.177058580939102,
      "grad_norm": 0.26876822113990784,
      "learning_rate": 1.6567279244611665e-06,
      "loss": 0.0158,
      "step": 2552400
    },
    {
      "epoch": 4.1770913113777555,
      "grad_norm": 0.13106392323970795,
      "learning_rate": 1.6566620322476496e-06,
      "loss": 0.007,
      "step": 2552420
    },
    {
      "epoch": 4.177124041816408,
      "grad_norm": 0.2945581078529358,
      "learning_rate": 1.6565961400341324e-06,
      "loss": 0.0097,
      "step": 2552440
    },
    {
      "epoch": 4.177156772255062,
      "grad_norm": 0.3803587257862091,
      "learning_rate": 1.6565302478206151e-06,
      "loss": 0.0183,
      "step": 2552460
    },
    {
      "epoch": 4.177189502693715,
      "grad_norm": 0.38637110590934753,
      "learning_rate": 1.656464355607098e-06,
      "loss": 0.0118,
      "step": 2552480
    },
    {
      "epoch": 4.177222233132368,
      "grad_norm": 0.20029330253601074,
      "learning_rate": 1.6563984633935808e-06,
      "loss": 0.0092,
      "step": 2552500
    },
    {
      "epoch": 4.177254963571022,
      "grad_norm": 0.19490647315979004,
      "learning_rate": 1.6563325711800638e-06,
      "loss": 0.01,
      "step": 2552520
    },
    {
      "epoch": 4.177287694009675,
      "grad_norm": 0.39338603615760803,
      "learning_rate": 1.6562666789665465e-06,
      "loss": 0.009,
      "step": 2552540
    },
    {
      "epoch": 4.177320424448329,
      "grad_norm": 0.4131554365158081,
      "learning_rate": 1.6562007867530295e-06,
      "loss": 0.0088,
      "step": 2552560
    },
    {
      "epoch": 4.177353154886982,
      "grad_norm": 0.2801242172718048,
      "learning_rate": 1.6561348945395126e-06,
      "loss": 0.0098,
      "step": 2552580
    },
    {
      "epoch": 4.177385885325635,
      "grad_norm": 0.35772615671157837,
      "learning_rate": 1.6560690023259954e-06,
      "loss": 0.0063,
      "step": 2552600
    },
    {
      "epoch": 4.177418615764289,
      "grad_norm": 0.370471328496933,
      "learning_rate": 1.6560031101124783e-06,
      "loss": 0.0114,
      "step": 2552620
    },
    {
      "epoch": 4.1774513462029415,
      "grad_norm": 0.15182439982891083,
      "learning_rate": 1.655937217898961e-06,
      "loss": 0.01,
      "step": 2552640
    },
    {
      "epoch": 4.177484076641595,
      "grad_norm": 0.44260138273239136,
      "learning_rate": 1.655871325685444e-06,
      "loss": 0.0063,
      "step": 2552660
    },
    {
      "epoch": 4.177516807080249,
      "grad_norm": 0.25308772921562195,
      "learning_rate": 1.6558054334719267e-06,
      "loss": 0.0088,
      "step": 2552680
    },
    {
      "epoch": 4.177549537518902,
      "grad_norm": 0.28405553102493286,
      "learning_rate": 1.6557395412584095e-06,
      "loss": 0.0118,
      "step": 2552700
    },
    {
      "epoch": 4.177582267957555,
      "grad_norm": 0.12664738297462463,
      "learning_rate": 1.6556736490448924e-06,
      "loss": 0.008,
      "step": 2552720
    },
    {
      "epoch": 4.1776149983962085,
      "grad_norm": 0.05799071490764618,
      "learning_rate": 1.6556077568313752e-06,
      "loss": 0.0109,
      "step": 2552740
    },
    {
      "epoch": 4.177647728834862,
      "grad_norm": 0.3567327857017517,
      "learning_rate": 1.6555418646178583e-06,
      "loss": 0.0089,
      "step": 2552760
    },
    {
      "epoch": 4.177680459273515,
      "grad_norm": 0.1440250426530838,
      "learning_rate": 1.6554759724043413e-06,
      "loss": 0.0075,
      "step": 2552780
    },
    {
      "epoch": 4.177713189712168,
      "grad_norm": 0.3263859748840332,
      "learning_rate": 1.655410080190824e-06,
      "loss": 0.0078,
      "step": 2552800
    },
    {
      "epoch": 4.177745920150822,
      "grad_norm": 0.4223531484603882,
      "learning_rate": 1.655344187977307e-06,
      "loss": 0.0131,
      "step": 2552820
    },
    {
      "epoch": 4.177778650589476,
      "grad_norm": 0.1172846257686615,
      "learning_rate": 1.6552782957637897e-06,
      "loss": 0.0115,
      "step": 2552840
    },
    {
      "epoch": 4.177811381028128,
      "grad_norm": 0.1547325700521469,
      "learning_rate": 1.6552124035502727e-06,
      "loss": 0.0063,
      "step": 2552860
    },
    {
      "epoch": 4.177844111466782,
      "grad_norm": 0.12529021501541138,
      "learning_rate": 1.6551465113367554e-06,
      "loss": 0.0121,
      "step": 2552880
    },
    {
      "epoch": 4.1778768419054355,
      "grad_norm": 0.10846824944019318,
      "learning_rate": 1.6550806191232381e-06,
      "loss": 0.0107,
      "step": 2552900
    },
    {
      "epoch": 4.177909572344088,
      "grad_norm": 0.22522681951522827,
      "learning_rate": 1.655014726909721e-06,
      "loss": 0.0078,
      "step": 2552920
    },
    {
      "epoch": 4.177942302782742,
      "grad_norm": 0.08061403036117554,
      "learning_rate": 1.6549488346962043e-06,
      "loss": 0.0057,
      "step": 2552940
    },
    {
      "epoch": 4.177975033221395,
      "grad_norm": 0.09035276621580124,
      "learning_rate": 1.654882942482687e-06,
      "loss": 0.0091,
      "step": 2552960
    },
    {
      "epoch": 4.178007763660049,
      "grad_norm": 0.6719376444816589,
      "learning_rate": 1.65481705026917e-06,
      "loss": 0.0146,
      "step": 2552980
    },
    {
      "epoch": 4.178040494098702,
      "grad_norm": 0.32533758878707886,
      "learning_rate": 1.6547511580556527e-06,
      "loss": 0.013,
      "step": 2553000
    },
    {
      "epoch": 4.178073224537355,
      "grad_norm": 0.04620160907506943,
      "learning_rate": 1.6546852658421356e-06,
      "loss": 0.0063,
      "step": 2553020
    },
    {
      "epoch": 4.178105954976009,
      "grad_norm": 0.6602732539176941,
      "learning_rate": 1.6546193736286184e-06,
      "loss": 0.0089,
      "step": 2553040
    },
    {
      "epoch": 4.178138685414662,
      "grad_norm": 0.3460543155670166,
      "learning_rate": 1.6545534814151013e-06,
      "loss": 0.0118,
      "step": 2553060
    },
    {
      "epoch": 4.178171415853315,
      "grad_norm": 0.19810768961906433,
      "learning_rate": 1.654487589201584e-06,
      "loss": 0.0081,
      "step": 2553080
    },
    {
      "epoch": 4.178204146291969,
      "grad_norm": 0.2251800298690796,
      "learning_rate": 1.654421696988067e-06,
      "loss": 0.0086,
      "step": 2553100
    },
    {
      "epoch": 4.178236876730622,
      "grad_norm": 0.16922059655189514,
      "learning_rate": 1.65435580477455e-06,
      "loss": 0.0065,
      "step": 2553120
    },
    {
      "epoch": 4.178269607169275,
      "grad_norm": 0.1737285852432251,
      "learning_rate": 1.654289912561033e-06,
      "loss": 0.007,
      "step": 2553140
    },
    {
      "epoch": 4.178302337607929,
      "grad_norm": 0.154258131980896,
      "learning_rate": 1.6542240203475157e-06,
      "loss": 0.007,
      "step": 2553160
    },
    {
      "epoch": 4.178335068046582,
      "grad_norm": 0.1522711217403412,
      "learning_rate": 1.6541581281339986e-06,
      "loss": 0.0066,
      "step": 2553180
    },
    {
      "epoch": 4.178367798485235,
      "grad_norm": 0.3189893066883087,
      "learning_rate": 1.6540922359204814e-06,
      "loss": 0.0144,
      "step": 2553200
    },
    {
      "epoch": 4.178400528923889,
      "grad_norm": 0.09802649915218353,
      "learning_rate": 1.6540263437069643e-06,
      "loss": 0.0061,
      "step": 2553220
    },
    {
      "epoch": 4.178433259362542,
      "grad_norm": 0.24657562375068665,
      "learning_rate": 1.653960451493447e-06,
      "loss": 0.0088,
      "step": 2553240
    },
    {
      "epoch": 4.178465989801195,
      "grad_norm": 0.9758524894714355,
      "learning_rate": 1.65389455927993e-06,
      "loss": 0.0125,
      "step": 2553260
    },
    {
      "epoch": 4.1784987202398485,
      "grad_norm": 0.08121660351753235,
      "learning_rate": 1.6538286670664127e-06,
      "loss": 0.0133,
      "step": 2553280
    },
    {
      "epoch": 4.178531450678502,
      "grad_norm": 0.2309417724609375,
      "learning_rate": 1.653762774852896e-06,
      "loss": 0.0107,
      "step": 2553300
    },
    {
      "epoch": 4.178564181117156,
      "grad_norm": 0.2043483853340149,
      "learning_rate": 1.6536968826393786e-06,
      "loss": 0.0069,
      "step": 2553320
    },
    {
      "epoch": 4.178596911555808,
      "grad_norm": 0.08819923549890518,
      "learning_rate": 1.6536309904258616e-06,
      "loss": 0.0118,
      "step": 2553340
    },
    {
      "epoch": 4.178629641994462,
      "grad_norm": 0.14887070655822754,
      "learning_rate": 1.6535650982123443e-06,
      "loss": 0.0139,
      "step": 2553360
    },
    {
      "epoch": 4.178662372433116,
      "grad_norm": 0.41447049379348755,
      "learning_rate": 1.6534992059988273e-06,
      "loss": 0.0122,
      "step": 2553380
    },
    {
      "epoch": 4.178695102871768,
      "grad_norm": 0.28956037759780884,
      "learning_rate": 1.65343331378531e-06,
      "loss": 0.0084,
      "step": 2553400
    },
    {
      "epoch": 4.178727833310422,
      "grad_norm": 0.15629522502422333,
      "learning_rate": 1.653367421571793e-06,
      "loss": 0.0081,
      "step": 2553420
    },
    {
      "epoch": 4.1787605637490755,
      "grad_norm": 0.1771591603755951,
      "learning_rate": 1.6533015293582757e-06,
      "loss": 0.0123,
      "step": 2553440
    },
    {
      "epoch": 4.178793294187729,
      "grad_norm": 0.5260421633720398,
      "learning_rate": 1.6532356371447589e-06,
      "loss": 0.0063,
      "step": 2553460
    },
    {
      "epoch": 4.178826024626382,
      "grad_norm": 0.38898879289627075,
      "learning_rate": 1.6531697449312416e-06,
      "loss": 0.0122,
      "step": 2553480
    },
    {
      "epoch": 4.178858755065035,
      "grad_norm": 0.21828188002109528,
      "learning_rate": 1.6531038527177246e-06,
      "loss": 0.0076,
      "step": 2553500
    },
    {
      "epoch": 4.178891485503689,
      "grad_norm": 0.2066792994737625,
      "learning_rate": 1.6530379605042073e-06,
      "loss": 0.0124,
      "step": 2553520
    },
    {
      "epoch": 4.178924215942342,
      "grad_norm": 0.25384774804115295,
      "learning_rate": 1.6529720682906903e-06,
      "loss": 0.0058,
      "step": 2553540
    },
    {
      "epoch": 4.178956946380995,
      "grad_norm": 0.16757743060588837,
      "learning_rate": 1.652906176077173e-06,
      "loss": 0.0065,
      "step": 2553560
    },
    {
      "epoch": 4.178989676819649,
      "grad_norm": 0.15122535824775696,
      "learning_rate": 1.652840283863656e-06,
      "loss": 0.0158,
      "step": 2553580
    },
    {
      "epoch": 4.179022407258302,
      "grad_norm": 0.14033950865268707,
      "learning_rate": 1.6527743916501387e-06,
      "loss": 0.009,
      "step": 2553600
    },
    {
      "epoch": 4.179055137696955,
      "grad_norm": 0.11272816359996796,
      "learning_rate": 1.6527084994366217e-06,
      "loss": 0.0095,
      "step": 2553620
    },
    {
      "epoch": 4.179087868135609,
      "grad_norm": 0.3906879425048828,
      "learning_rate": 1.6526426072231048e-06,
      "loss": 0.0106,
      "step": 2553640
    },
    {
      "epoch": 4.179120598574262,
      "grad_norm": 0.36928999423980713,
      "learning_rate": 1.6525767150095876e-06,
      "loss": 0.0085,
      "step": 2553660
    },
    {
      "epoch": 4.179153329012915,
      "grad_norm": 0.2925512194633484,
      "learning_rate": 1.6525108227960703e-06,
      "loss": 0.0135,
      "step": 2553680
    },
    {
      "epoch": 4.179186059451569,
      "grad_norm": 0.12469184398651123,
      "learning_rate": 1.6524449305825532e-06,
      "loss": 0.0067,
      "step": 2553700
    },
    {
      "epoch": 4.179218789890222,
      "grad_norm": 0.5177540183067322,
      "learning_rate": 1.652379038369036e-06,
      "loss": 0.0088,
      "step": 2553720
    },
    {
      "epoch": 4.179251520328876,
      "grad_norm": 0.21334271132946014,
      "learning_rate": 1.652313146155519e-06,
      "loss": 0.009,
      "step": 2553740
    },
    {
      "epoch": 4.1792842507675285,
      "grad_norm": 0.05721557140350342,
      "learning_rate": 1.6522472539420017e-06,
      "loss": 0.0072,
      "step": 2553760
    },
    {
      "epoch": 4.179316981206182,
      "grad_norm": 0.04552651196718216,
      "learning_rate": 1.6521813617284846e-06,
      "loss": 0.0105,
      "step": 2553780
    },
    {
      "epoch": 4.179349711644836,
      "grad_norm": 0.17261408269405365,
      "learning_rate": 1.6521154695149674e-06,
      "loss": 0.0074,
      "step": 2553800
    },
    {
      "epoch": 4.179382442083488,
      "grad_norm": 0.17934028804302216,
      "learning_rate": 1.6520495773014505e-06,
      "loss": 0.0067,
      "step": 2553820
    },
    {
      "epoch": 4.179415172522142,
      "grad_norm": 0.30593252182006836,
      "learning_rate": 1.6519836850879335e-06,
      "loss": 0.0153,
      "step": 2553840
    },
    {
      "epoch": 4.179447902960796,
      "grad_norm": 0.2542681396007538,
      "learning_rate": 1.6519177928744162e-06,
      "loss": 0.0108,
      "step": 2553860
    },
    {
      "epoch": 4.179480633399449,
      "grad_norm": 0.4453730285167694,
      "learning_rate": 1.651851900660899e-06,
      "loss": 0.0141,
      "step": 2553880
    },
    {
      "epoch": 4.179513363838102,
      "grad_norm": 0.14479675889015198,
      "learning_rate": 1.651786008447382e-06,
      "loss": 0.0071,
      "step": 2553900
    },
    {
      "epoch": 4.1795460942767555,
      "grad_norm": 0.20787088572978973,
      "learning_rate": 1.6517201162338647e-06,
      "loss": 0.0108,
      "step": 2553920
    },
    {
      "epoch": 4.179578824715409,
      "grad_norm": 0.19183343648910522,
      "learning_rate": 1.6516542240203476e-06,
      "loss": 0.0081,
      "step": 2553940
    },
    {
      "epoch": 4.179611555154062,
      "grad_norm": 0.43628057837486267,
      "learning_rate": 1.6515883318068303e-06,
      "loss": 0.0096,
      "step": 2553960
    },
    {
      "epoch": 4.179644285592715,
      "grad_norm": 0.24087636172771454,
      "learning_rate": 1.6515224395933133e-06,
      "loss": 0.0075,
      "step": 2553980
    },
    {
      "epoch": 4.179677016031369,
      "grad_norm": 0.11982422322034836,
      "learning_rate": 1.6514565473797965e-06,
      "loss": 0.0094,
      "step": 2554000
    },
    {
      "epoch": 4.179709746470023,
      "grad_norm": 0.2940010726451874,
      "learning_rate": 1.6513906551662792e-06,
      "loss": 0.0101,
      "step": 2554020
    },
    {
      "epoch": 4.179742476908675,
      "grad_norm": 0.2506527006626129,
      "learning_rate": 1.6513247629527621e-06,
      "loss": 0.0105,
      "step": 2554040
    },
    {
      "epoch": 4.179775207347329,
      "grad_norm": 0.1668194681406021,
      "learning_rate": 1.6512588707392449e-06,
      "loss": 0.0062,
      "step": 2554060
    },
    {
      "epoch": 4.1798079377859825,
      "grad_norm": 0.44027647376060486,
      "learning_rate": 1.6511929785257278e-06,
      "loss": 0.0132,
      "step": 2554080
    },
    {
      "epoch": 4.179840668224635,
      "grad_norm": 0.5975481867790222,
      "learning_rate": 1.6511270863122106e-06,
      "loss": 0.0056,
      "step": 2554100
    },
    {
      "epoch": 4.179873398663289,
      "grad_norm": 0.06597917526960373,
      "learning_rate": 1.6510611940986933e-06,
      "loss": 0.0106,
      "step": 2554120
    },
    {
      "epoch": 4.179906129101942,
      "grad_norm": 0.18825027346611023,
      "learning_rate": 1.6509953018851763e-06,
      "loss": 0.0055,
      "step": 2554140
    },
    {
      "epoch": 4.179938859540596,
      "grad_norm": 0.09817937761545181,
      "learning_rate": 1.650929409671659e-06,
      "loss": 0.0095,
      "step": 2554160
    },
    {
      "epoch": 4.179971589979249,
      "grad_norm": 0.22581827640533447,
      "learning_rate": 1.6508635174581422e-06,
      "loss": 0.0098,
      "step": 2554180
    },
    {
      "epoch": 4.180004320417902,
      "grad_norm": 0.11020845919847488,
      "learning_rate": 1.6507976252446251e-06,
      "loss": 0.0071,
      "step": 2554200
    },
    {
      "epoch": 4.180037050856556,
      "grad_norm": 0.3033095598220825,
      "learning_rate": 1.6507317330311079e-06,
      "loss": 0.0086,
      "step": 2554220
    },
    {
      "epoch": 4.180069781295209,
      "grad_norm": 0.36458316445350647,
      "learning_rate": 1.6506658408175908e-06,
      "loss": 0.0153,
      "step": 2554240
    },
    {
      "epoch": 4.180102511733862,
      "grad_norm": 0.07761501520872116,
      "learning_rate": 1.6505999486040736e-06,
      "loss": 0.0074,
      "step": 2554260
    },
    {
      "epoch": 4.180135242172516,
      "grad_norm": 0.20457002520561218,
      "learning_rate": 1.6505340563905565e-06,
      "loss": 0.0098,
      "step": 2554280
    },
    {
      "epoch": 4.180167972611169,
      "grad_norm": 0.11687961220741272,
      "learning_rate": 1.6504681641770392e-06,
      "loss": 0.0049,
      "step": 2554300
    },
    {
      "epoch": 4.180200703049822,
      "grad_norm": 0.38228487968444824,
      "learning_rate": 1.6504022719635222e-06,
      "loss": 0.0124,
      "step": 2554320
    },
    {
      "epoch": 4.180233433488476,
      "grad_norm": 0.11870323866605759,
      "learning_rate": 1.6503363797500052e-06,
      "loss": 0.0104,
      "step": 2554340
    },
    {
      "epoch": 4.180266163927129,
      "grad_norm": 0.1656547635793686,
      "learning_rate": 1.650270487536488e-06,
      "loss": 0.0088,
      "step": 2554360
    },
    {
      "epoch": 4.180298894365782,
      "grad_norm": 0.11080410331487656,
      "learning_rate": 1.6502045953229708e-06,
      "loss": 0.0086,
      "step": 2554380
    },
    {
      "epoch": 4.1803316248044355,
      "grad_norm": 0.24932250380516052,
      "learning_rate": 1.6501387031094538e-06,
      "loss": 0.0093,
      "step": 2554400
    },
    {
      "epoch": 4.180364355243089,
      "grad_norm": 0.19410914182662964,
      "learning_rate": 1.6500728108959365e-06,
      "loss": 0.0096,
      "step": 2554420
    },
    {
      "epoch": 4.180397085681743,
      "grad_norm": 0.4386909306049347,
      "learning_rate": 1.6500069186824195e-06,
      "loss": 0.0076,
      "step": 2554440
    },
    {
      "epoch": 4.180429816120395,
      "grad_norm": 0.36597272753715515,
      "learning_rate": 1.6499410264689022e-06,
      "loss": 0.0088,
      "step": 2554460
    },
    {
      "epoch": 4.180462546559049,
      "grad_norm": 0.1867021769285202,
      "learning_rate": 1.6498751342553852e-06,
      "loss": 0.0101,
      "step": 2554480
    },
    {
      "epoch": 4.180495276997703,
      "grad_norm": 0.4541454315185547,
      "learning_rate": 1.649809242041868e-06,
      "loss": 0.0087,
      "step": 2554500
    },
    {
      "epoch": 4.180528007436355,
      "grad_norm": 0.15356512367725372,
      "learning_rate": 1.649743349828351e-06,
      "loss": 0.0089,
      "step": 2554520
    },
    {
      "epoch": 4.180560737875009,
      "grad_norm": 0.15304218232631683,
      "learning_rate": 1.6496774576148338e-06,
      "loss": 0.0083,
      "step": 2554540
    },
    {
      "epoch": 4.1805934683136625,
      "grad_norm": 0.4581451714038849,
      "learning_rate": 1.6496115654013168e-06,
      "loss": 0.0125,
      "step": 2554560
    },
    {
      "epoch": 4.180626198752316,
      "grad_norm": 0.07996417582035065,
      "learning_rate": 1.6495456731877995e-06,
      "loss": 0.0066,
      "step": 2554580
    },
    {
      "epoch": 4.180658929190969,
      "grad_norm": 0.30151912569999695,
      "learning_rate": 1.6494797809742825e-06,
      "loss": 0.0102,
      "step": 2554600
    },
    {
      "epoch": 4.180691659629622,
      "grad_norm": 0.14581741392612457,
      "learning_rate": 1.6494138887607652e-06,
      "loss": 0.0075,
      "step": 2554620
    },
    {
      "epoch": 4.180724390068276,
      "grad_norm": 0.5441003441810608,
      "learning_rate": 1.6493479965472482e-06,
      "loss": 0.0081,
      "step": 2554640
    },
    {
      "epoch": 4.180757120506929,
      "grad_norm": 0.2618129849433899,
      "learning_rate": 1.6492821043337309e-06,
      "loss": 0.0085,
      "step": 2554660
    },
    {
      "epoch": 4.180789850945582,
      "grad_norm": 0.14036644995212555,
      "learning_rate": 1.6492162121202138e-06,
      "loss": 0.0082,
      "step": 2554680
    },
    {
      "epoch": 4.180822581384236,
      "grad_norm": 0.28831058740615845,
      "learning_rate": 1.6491503199066968e-06,
      "loss": 0.0097,
      "step": 2554700
    },
    {
      "epoch": 4.180855311822889,
      "grad_norm": 0.26700127124786377,
      "learning_rate": 1.6490844276931797e-06,
      "loss": 0.0087,
      "step": 2554720
    },
    {
      "epoch": 4.180888042261542,
      "grad_norm": 0.16588592529296875,
      "learning_rate": 1.6490185354796625e-06,
      "loss": 0.0099,
      "step": 2554740
    },
    {
      "epoch": 4.180920772700196,
      "grad_norm": 0.10141799598932266,
      "learning_rate": 1.6489526432661454e-06,
      "loss": 0.0066,
      "step": 2554760
    },
    {
      "epoch": 4.180953503138849,
      "grad_norm": 0.168443500995636,
      "learning_rate": 1.6488867510526282e-06,
      "loss": 0.0101,
      "step": 2554780
    },
    {
      "epoch": 4.180986233577502,
      "grad_norm": 0.22988316416740417,
      "learning_rate": 1.6488208588391111e-06,
      "loss": 0.01,
      "step": 2554800
    },
    {
      "epoch": 4.181018964016156,
      "grad_norm": 0.5739952921867371,
      "learning_rate": 1.6487549666255939e-06,
      "loss": 0.0105,
      "step": 2554820
    },
    {
      "epoch": 4.181051694454809,
      "grad_norm": 0.26316067576408386,
      "learning_rate": 1.6486890744120768e-06,
      "loss": 0.0122,
      "step": 2554840
    },
    {
      "epoch": 4.181084424893462,
      "grad_norm": 0.32072755694389343,
      "learning_rate": 1.6486231821985596e-06,
      "loss": 0.0099,
      "step": 2554860
    },
    {
      "epoch": 4.181117155332116,
      "grad_norm": 0.1420949250459671,
      "learning_rate": 1.6485572899850427e-06,
      "loss": 0.0088,
      "step": 2554880
    },
    {
      "epoch": 4.181149885770769,
      "grad_norm": 0.324238121509552,
      "learning_rate": 1.6484913977715255e-06,
      "loss": 0.0102,
      "step": 2554900
    },
    {
      "epoch": 4.181182616209423,
      "grad_norm": 0.31172654032707214,
      "learning_rate": 1.6484255055580084e-06,
      "loss": 0.0082,
      "step": 2554920
    },
    {
      "epoch": 4.1812153466480755,
      "grad_norm": 0.2572705149650574,
      "learning_rate": 1.6483596133444912e-06,
      "loss": 0.0101,
      "step": 2554940
    },
    {
      "epoch": 4.181248077086729,
      "grad_norm": 0.5910317897796631,
      "learning_rate": 1.6482937211309741e-06,
      "loss": 0.01,
      "step": 2554960
    },
    {
      "epoch": 4.181280807525383,
      "grad_norm": 0.3023409843444824,
      "learning_rate": 1.6482278289174568e-06,
      "loss": 0.0136,
      "step": 2554980
    },
    {
      "epoch": 4.181313537964035,
      "grad_norm": 0.28727394342422485,
      "learning_rate": 1.6481619367039398e-06,
      "loss": 0.0121,
      "step": 2555000
    },
    {
      "epoch": 4.181346268402689,
      "grad_norm": 0.2944599390029907,
      "learning_rate": 1.6480960444904225e-06,
      "loss": 0.0104,
      "step": 2555020
    },
    {
      "epoch": 4.181378998841343,
      "grad_norm": 0.1754215955734253,
      "learning_rate": 1.6480301522769055e-06,
      "loss": 0.0065,
      "step": 2555040
    },
    {
      "epoch": 4.181411729279996,
      "grad_norm": 0.18631142377853394,
      "learning_rate": 1.6479642600633887e-06,
      "loss": 0.0157,
      "step": 2555060
    },
    {
      "epoch": 4.181444459718649,
      "grad_norm": 0.2286129891872406,
      "learning_rate": 1.6478983678498714e-06,
      "loss": 0.0064,
      "step": 2555080
    },
    {
      "epoch": 4.1814771901573025,
      "grad_norm": 0.99270099401474,
      "learning_rate": 1.6478324756363541e-06,
      "loss": 0.0085,
      "step": 2555100
    },
    {
      "epoch": 4.181509920595956,
      "grad_norm": 0.20025767385959625,
      "learning_rate": 1.647766583422837e-06,
      "loss": 0.0113,
      "step": 2555120
    },
    {
      "epoch": 4.181542651034609,
      "grad_norm": 0.4438316226005554,
      "learning_rate": 1.6477006912093198e-06,
      "loss": 0.0132,
      "step": 2555140
    },
    {
      "epoch": 4.181575381473262,
      "grad_norm": 0.3545026183128357,
      "learning_rate": 1.6476347989958028e-06,
      "loss": 0.0064,
      "step": 2555160
    },
    {
      "epoch": 4.181608111911916,
      "grad_norm": 0.34164541959762573,
      "learning_rate": 1.6475689067822855e-06,
      "loss": 0.009,
      "step": 2555180
    },
    {
      "epoch": 4.1816408423505695,
      "grad_norm": 0.3661482632160187,
      "learning_rate": 1.6475030145687685e-06,
      "loss": 0.0107,
      "step": 2555200
    },
    {
      "epoch": 4.181673572789222,
      "grad_norm": 0.22559794783592224,
      "learning_rate": 1.6474371223552516e-06,
      "loss": 0.0101,
      "step": 2555220
    },
    {
      "epoch": 4.181706303227876,
      "grad_norm": 0.21709619462490082,
      "learning_rate": 1.6473712301417344e-06,
      "loss": 0.0095,
      "step": 2555240
    },
    {
      "epoch": 4.181739033666529,
      "grad_norm": 0.21592579782009125,
      "learning_rate": 1.6473053379282173e-06,
      "loss": 0.0087,
      "step": 2555260
    },
    {
      "epoch": 4.181771764105182,
      "grad_norm": 0.223271906375885,
      "learning_rate": 1.6472394457147e-06,
      "loss": 0.01,
      "step": 2555280
    },
    {
      "epoch": 4.181804494543836,
      "grad_norm": 0.16598844528198242,
      "learning_rate": 1.647173553501183e-06,
      "loss": 0.0065,
      "step": 2555300
    },
    {
      "epoch": 4.181837224982489,
      "grad_norm": 0.5091468691825867,
      "learning_rate": 1.6471076612876658e-06,
      "loss": 0.0093,
      "step": 2555320
    },
    {
      "epoch": 4.181869955421143,
      "grad_norm": 0.23604273796081543,
      "learning_rate": 1.6470417690741485e-06,
      "loss": 0.0085,
      "step": 2555340
    },
    {
      "epoch": 4.181902685859796,
      "grad_norm": 0.4014119505882263,
      "learning_rate": 1.6469758768606314e-06,
      "loss": 0.0116,
      "step": 2555360
    },
    {
      "epoch": 4.181935416298449,
      "grad_norm": 0.25274989008903503,
      "learning_rate": 1.6469099846471142e-06,
      "loss": 0.0079,
      "step": 2555380
    },
    {
      "epoch": 4.181968146737103,
      "grad_norm": 0.038120124489068985,
      "learning_rate": 1.6468440924335973e-06,
      "loss": 0.0089,
      "step": 2555400
    },
    {
      "epoch": 4.1820008771757555,
      "grad_norm": 0.16991102695465088,
      "learning_rate": 1.6467782002200803e-06,
      "loss": 0.0096,
      "step": 2555420
    },
    {
      "epoch": 4.182033607614409,
      "grad_norm": 0.3183586001396179,
      "learning_rate": 1.646712308006563e-06,
      "loss": 0.0087,
      "step": 2555440
    },
    {
      "epoch": 4.182066338053063,
      "grad_norm": 0.1190151497721672,
      "learning_rate": 1.646646415793046e-06,
      "loss": 0.0118,
      "step": 2555460
    },
    {
      "epoch": 4.182099068491716,
      "grad_norm": 0.3101310431957245,
      "learning_rate": 1.6465805235795287e-06,
      "loss": 0.0087,
      "step": 2555480
    },
    {
      "epoch": 4.182131798930369,
      "grad_norm": 0.18278080224990845,
      "learning_rate": 1.6465146313660117e-06,
      "loss": 0.0104,
      "step": 2555500
    },
    {
      "epoch": 4.182164529369023,
      "grad_norm": 0.2016928642988205,
      "learning_rate": 1.6464487391524944e-06,
      "loss": 0.0112,
      "step": 2555520
    },
    {
      "epoch": 4.182197259807676,
      "grad_norm": 0.09047175198793411,
      "learning_rate": 1.6463828469389772e-06,
      "loss": 0.0099,
      "step": 2555540
    },
    {
      "epoch": 4.182229990246329,
      "grad_norm": 0.21244698762893677,
      "learning_rate": 1.6463169547254601e-06,
      "loss": 0.0111,
      "step": 2555560
    },
    {
      "epoch": 4.1822627206849825,
      "grad_norm": 0.32614660263061523,
      "learning_rate": 1.6462510625119433e-06,
      "loss": 0.0083,
      "step": 2555580
    },
    {
      "epoch": 4.182295451123636,
      "grad_norm": 0.3024519085884094,
      "learning_rate": 1.646185170298426e-06,
      "loss": 0.0116,
      "step": 2555600
    },
    {
      "epoch": 4.18232818156229,
      "grad_norm": 0.15899735689163208,
      "learning_rate": 1.646119278084909e-06,
      "loss": 0.0131,
      "step": 2555620
    },
    {
      "epoch": 4.182360912000942,
      "grad_norm": 0.11555604636669159,
      "learning_rate": 1.6460533858713917e-06,
      "loss": 0.006,
      "step": 2555640
    },
    {
      "epoch": 4.182393642439596,
      "grad_norm": 0.22399885952472687,
      "learning_rate": 1.6459874936578747e-06,
      "loss": 0.0064,
      "step": 2555660
    },
    {
      "epoch": 4.18242637287825,
      "grad_norm": 0.1352589726448059,
      "learning_rate": 1.6459216014443574e-06,
      "loss": 0.0108,
      "step": 2555680
    },
    {
      "epoch": 4.182459103316902,
      "grad_norm": 0.2987053692340851,
      "learning_rate": 1.6458557092308403e-06,
      "loss": 0.0128,
      "step": 2555700
    },
    {
      "epoch": 4.182491833755556,
      "grad_norm": 0.3687860071659088,
      "learning_rate": 1.645789817017323e-06,
      "loss": 0.0077,
      "step": 2555720
    },
    {
      "epoch": 4.1825245641942095,
      "grad_norm": 0.28969570994377136,
      "learning_rate": 1.645723924803806e-06,
      "loss": 0.0081,
      "step": 2555740
    },
    {
      "epoch": 4.182557294632863,
      "grad_norm": 0.32426002621650696,
      "learning_rate": 1.645658032590289e-06,
      "loss": 0.0101,
      "step": 2555760
    },
    {
      "epoch": 4.182590025071516,
      "grad_norm": 0.48954930901527405,
      "learning_rate": 1.645592140376772e-06,
      "loss": 0.0096,
      "step": 2555780
    },
    {
      "epoch": 4.182622755510169,
      "grad_norm": 0.2762114703655243,
      "learning_rate": 1.6455262481632547e-06,
      "loss": 0.0077,
      "step": 2555800
    },
    {
      "epoch": 4.182655485948823,
      "grad_norm": 0.16085654497146606,
      "learning_rate": 1.6454603559497376e-06,
      "loss": 0.0096,
      "step": 2555820
    },
    {
      "epoch": 4.182688216387476,
      "grad_norm": 0.21141478419303894,
      "learning_rate": 1.6453944637362204e-06,
      "loss": 0.0079,
      "step": 2555840
    },
    {
      "epoch": 4.182720946826129,
      "grad_norm": 0.2994612157344818,
      "learning_rate": 1.6453285715227033e-06,
      "loss": 0.0108,
      "step": 2555860
    },
    {
      "epoch": 4.182753677264783,
      "grad_norm": 0.3743368983268738,
      "learning_rate": 1.645262679309186e-06,
      "loss": 0.0145,
      "step": 2555880
    },
    {
      "epoch": 4.1827864077034365,
      "grad_norm": 0.389241099357605,
      "learning_rate": 1.645196787095669e-06,
      "loss": 0.012,
      "step": 2555900
    },
    {
      "epoch": 4.182819138142089,
      "grad_norm": 0.3499990403652191,
      "learning_rate": 1.6451308948821518e-06,
      "loss": 0.0104,
      "step": 2555920
    },
    {
      "epoch": 4.182851868580743,
      "grad_norm": 0.1734987050294876,
      "learning_rate": 1.645065002668635e-06,
      "loss": 0.0123,
      "step": 2555940
    },
    {
      "epoch": 4.182884599019396,
      "grad_norm": 0.6317214369773865,
      "learning_rate": 1.6449991104551177e-06,
      "loss": 0.0187,
      "step": 2555960
    },
    {
      "epoch": 4.182917329458049,
      "grad_norm": 0.13961069285869598,
      "learning_rate": 1.6449332182416006e-06,
      "loss": 0.0083,
      "step": 2555980
    },
    {
      "epoch": 4.182950059896703,
      "grad_norm": 0.18668276071548462,
      "learning_rate": 1.6448673260280834e-06,
      "loss": 0.0082,
      "step": 2556000
    },
    {
      "epoch": 4.182982790335356,
      "grad_norm": 0.14272554218769073,
      "learning_rate": 1.6448014338145663e-06,
      "loss": 0.0093,
      "step": 2556020
    },
    {
      "epoch": 4.18301552077401,
      "grad_norm": 0.13036948442459106,
      "learning_rate": 1.644735541601049e-06,
      "loss": 0.0099,
      "step": 2556040
    },
    {
      "epoch": 4.1830482512126625,
      "grad_norm": 0.38313084840774536,
      "learning_rate": 1.644669649387532e-06,
      "loss": 0.0074,
      "step": 2556060
    },
    {
      "epoch": 4.183080981651316,
      "grad_norm": 0.29847946763038635,
      "learning_rate": 1.6446037571740147e-06,
      "loss": 0.0133,
      "step": 2556080
    },
    {
      "epoch": 4.18311371208997,
      "grad_norm": 0.3184802234172821,
      "learning_rate": 1.644537864960498e-06,
      "loss": 0.0066,
      "step": 2556100
    },
    {
      "epoch": 4.183146442528622,
      "grad_norm": 0.14928536117076874,
      "learning_rate": 1.6444719727469806e-06,
      "loss": 0.0069,
      "step": 2556120
    },
    {
      "epoch": 4.183179172967276,
      "grad_norm": 0.1086498498916626,
      "learning_rate": 1.6444060805334636e-06,
      "loss": 0.0069,
      "step": 2556140
    },
    {
      "epoch": 4.18321190340593,
      "grad_norm": 0.41046804189682007,
      "learning_rate": 1.6443401883199463e-06,
      "loss": 0.0105,
      "step": 2556160
    },
    {
      "epoch": 4.183244633844582,
      "grad_norm": 0.17531819641590118,
      "learning_rate": 1.6442742961064293e-06,
      "loss": 0.0102,
      "step": 2556180
    },
    {
      "epoch": 4.183277364283236,
      "grad_norm": 0.24226845800876617,
      "learning_rate": 1.644208403892912e-06,
      "loss": 0.0095,
      "step": 2556200
    },
    {
      "epoch": 4.1833100947218895,
      "grad_norm": 0.2005053162574768,
      "learning_rate": 1.644142511679395e-06,
      "loss": 0.0069,
      "step": 2556220
    },
    {
      "epoch": 4.183342825160543,
      "grad_norm": 0.28651729226112366,
      "learning_rate": 1.6440766194658777e-06,
      "loss": 0.009,
      "step": 2556240
    },
    {
      "epoch": 4.183375555599196,
      "grad_norm": 0.07437869161367416,
      "learning_rate": 1.6440107272523607e-06,
      "loss": 0.007,
      "step": 2556260
    },
    {
      "epoch": 4.183408286037849,
      "grad_norm": 0.6226508617401123,
      "learning_rate": 1.6439448350388438e-06,
      "loss": 0.0096,
      "step": 2556280
    },
    {
      "epoch": 4.183441016476503,
      "grad_norm": 0.1556771695613861,
      "learning_rate": 1.6438789428253266e-06,
      "loss": 0.0074,
      "step": 2556300
    },
    {
      "epoch": 4.183473746915157,
      "grad_norm": 0.23731614649295807,
      "learning_rate": 1.6438130506118093e-06,
      "loss": 0.0143,
      "step": 2556320
    },
    {
      "epoch": 4.183506477353809,
      "grad_norm": 0.6071305274963379,
      "learning_rate": 1.6437471583982923e-06,
      "loss": 0.0099,
      "step": 2556340
    },
    {
      "epoch": 4.183539207792463,
      "grad_norm": 0.34460529685020447,
      "learning_rate": 1.643681266184775e-06,
      "loss": 0.0056,
      "step": 2556360
    },
    {
      "epoch": 4.1835719382311165,
      "grad_norm": 0.14081791043281555,
      "learning_rate": 1.643615373971258e-06,
      "loss": 0.0107,
      "step": 2556380
    },
    {
      "epoch": 4.183604668669769,
      "grad_norm": 0.2115364968776703,
      "learning_rate": 1.6435494817577407e-06,
      "loss": 0.0091,
      "step": 2556400
    },
    {
      "epoch": 4.183637399108423,
      "grad_norm": 0.1973928064107895,
      "learning_rate": 1.6434835895442236e-06,
      "loss": 0.0082,
      "step": 2556420
    },
    {
      "epoch": 4.183670129547076,
      "grad_norm": 0.08878731727600098,
      "learning_rate": 1.6434176973307064e-06,
      "loss": 0.0062,
      "step": 2556440
    },
    {
      "epoch": 4.183702859985729,
      "grad_norm": 0.20278716087341309,
      "learning_rate": 1.6433518051171895e-06,
      "loss": 0.0094,
      "step": 2556460
    },
    {
      "epoch": 4.183735590424383,
      "grad_norm": 0.5672487020492554,
      "learning_rate": 1.6432859129036725e-06,
      "loss": 0.0085,
      "step": 2556480
    },
    {
      "epoch": 4.183768320863036,
      "grad_norm": 0.20579805970191956,
      "learning_rate": 1.6432200206901552e-06,
      "loss": 0.0086,
      "step": 2556500
    },
    {
      "epoch": 4.18380105130169,
      "grad_norm": 0.4408644735813141,
      "learning_rate": 1.643154128476638e-06,
      "loss": 0.0069,
      "step": 2556520
    },
    {
      "epoch": 4.183833781740343,
      "grad_norm": 0.25227248668670654,
      "learning_rate": 1.643088236263121e-06,
      "loss": 0.0081,
      "step": 2556540
    },
    {
      "epoch": 4.183866512178996,
      "grad_norm": 0.16728080809116364,
      "learning_rate": 1.6430223440496037e-06,
      "loss": 0.0089,
      "step": 2556560
    },
    {
      "epoch": 4.18389924261765,
      "grad_norm": 0.14466649293899536,
      "learning_rate": 1.6429564518360866e-06,
      "loss": 0.0101,
      "step": 2556580
    },
    {
      "epoch": 4.1839319730563025,
      "grad_norm": 0.1516883671283722,
      "learning_rate": 1.6428905596225694e-06,
      "loss": 0.0095,
      "step": 2556600
    },
    {
      "epoch": 4.183964703494956,
      "grad_norm": 0.500619649887085,
      "learning_rate": 1.6428246674090523e-06,
      "loss": 0.0158,
      "step": 2556620
    },
    {
      "epoch": 4.18399743393361,
      "grad_norm": 0.4160151481628418,
      "learning_rate": 1.6427587751955355e-06,
      "loss": 0.01,
      "step": 2556640
    },
    {
      "epoch": 4.184030164372263,
      "grad_norm": 0.3443698585033417,
      "learning_rate": 1.6426928829820182e-06,
      "loss": 0.0076,
      "step": 2556660
    },
    {
      "epoch": 4.184062894810916,
      "grad_norm": 0.3542906641960144,
      "learning_rate": 1.6426269907685012e-06,
      "loss": 0.0083,
      "step": 2556680
    },
    {
      "epoch": 4.18409562524957,
      "grad_norm": 0.25089511275291443,
      "learning_rate": 1.642561098554984e-06,
      "loss": 0.014,
      "step": 2556700
    },
    {
      "epoch": 4.184128355688223,
      "grad_norm": 0.15228882431983948,
      "learning_rate": 1.6424952063414669e-06,
      "loss": 0.0167,
      "step": 2556720
    },
    {
      "epoch": 4.184161086126876,
      "grad_norm": 0.41771483421325684,
      "learning_rate": 1.6424293141279496e-06,
      "loss": 0.0052,
      "step": 2556740
    },
    {
      "epoch": 4.1841938165655295,
      "grad_norm": 0.18022529780864716,
      "learning_rate": 1.6423634219144323e-06,
      "loss": 0.0086,
      "step": 2556760
    },
    {
      "epoch": 4.184226547004183,
      "grad_norm": 0.2588675618171692,
      "learning_rate": 1.6422975297009153e-06,
      "loss": 0.0098,
      "step": 2556780
    },
    {
      "epoch": 4.184259277442837,
      "grad_norm": 0.27012771368026733,
      "learning_rate": 1.642231637487398e-06,
      "loss": 0.0117,
      "step": 2556800
    },
    {
      "epoch": 4.184292007881489,
      "grad_norm": 0.1452115774154663,
      "learning_rate": 1.6421657452738812e-06,
      "loss": 0.0116,
      "step": 2556820
    },
    {
      "epoch": 4.184324738320143,
      "grad_norm": 0.22783365845680237,
      "learning_rate": 1.6420998530603641e-06,
      "loss": 0.0138,
      "step": 2556840
    },
    {
      "epoch": 4.1843574687587965,
      "grad_norm": 0.09949459880590439,
      "learning_rate": 1.6420339608468469e-06,
      "loss": 0.0075,
      "step": 2556860
    },
    {
      "epoch": 4.184390199197449,
      "grad_norm": 0.1021280512213707,
      "learning_rate": 1.6419680686333298e-06,
      "loss": 0.0051,
      "step": 2556880
    },
    {
      "epoch": 4.184422929636103,
      "grad_norm": 0.3764004111289978,
      "learning_rate": 1.6419021764198126e-06,
      "loss": 0.0111,
      "step": 2556900
    },
    {
      "epoch": 4.1844556600747564,
      "grad_norm": 0.44554850459098816,
      "learning_rate": 1.6418362842062955e-06,
      "loss": 0.012,
      "step": 2556920
    },
    {
      "epoch": 4.18448839051341,
      "grad_norm": 0.23728564381599426,
      "learning_rate": 1.6417703919927783e-06,
      "loss": 0.0082,
      "step": 2556940
    },
    {
      "epoch": 4.184521120952063,
      "grad_norm": 0.1627417504787445,
      "learning_rate": 1.6417044997792612e-06,
      "loss": 0.0101,
      "step": 2556960
    },
    {
      "epoch": 4.184553851390716,
      "grad_norm": 0.20743684470653534,
      "learning_rate": 1.6416386075657442e-06,
      "loss": 0.0093,
      "step": 2556980
    },
    {
      "epoch": 4.18458658182937,
      "grad_norm": 0.5556531548500061,
      "learning_rate": 1.6415727153522271e-06,
      "loss": 0.0111,
      "step": 2557000
    },
    {
      "epoch": 4.184619312268023,
      "grad_norm": 0.31367987394332886,
      "learning_rate": 1.6415068231387099e-06,
      "loss": 0.0093,
      "step": 2557020
    },
    {
      "epoch": 4.184652042706676,
      "grad_norm": 0.13923567533493042,
      "learning_rate": 1.6414409309251928e-06,
      "loss": 0.0101,
      "step": 2557040
    },
    {
      "epoch": 4.18468477314533,
      "grad_norm": 0.09466133266687393,
      "learning_rate": 1.6413750387116755e-06,
      "loss": 0.0118,
      "step": 2557060
    },
    {
      "epoch": 4.184717503583983,
      "grad_norm": 0.060656964778900146,
      "learning_rate": 1.6413091464981585e-06,
      "loss": 0.0086,
      "step": 2557080
    },
    {
      "epoch": 4.184750234022636,
      "grad_norm": 0.18735505640506744,
      "learning_rate": 1.6412432542846412e-06,
      "loss": 0.008,
      "step": 2557100
    },
    {
      "epoch": 4.18478296446129,
      "grad_norm": 0.2231091409921646,
      "learning_rate": 1.6411773620711242e-06,
      "loss": 0.0074,
      "step": 2557120
    },
    {
      "epoch": 4.184815694899943,
      "grad_norm": 0.33737677335739136,
      "learning_rate": 1.641111469857607e-06,
      "loss": 0.0146,
      "step": 2557140
    },
    {
      "epoch": 4.184848425338596,
      "grad_norm": 0.2176259607076645,
      "learning_rate": 1.64104557764409e-06,
      "loss": 0.0089,
      "step": 2557160
    },
    {
      "epoch": 4.18488115577725,
      "grad_norm": 0.0717061311006546,
      "learning_rate": 1.6409796854305728e-06,
      "loss": 0.0079,
      "step": 2557180
    },
    {
      "epoch": 4.184913886215903,
      "grad_norm": 0.14324180781841278,
      "learning_rate": 1.6409137932170558e-06,
      "loss": 0.0068,
      "step": 2557200
    },
    {
      "epoch": 4.184946616654557,
      "grad_norm": 0.7033243775367737,
      "learning_rate": 1.6408479010035385e-06,
      "loss": 0.0132,
      "step": 2557220
    },
    {
      "epoch": 4.1849793470932095,
      "grad_norm": 0.034995000809431076,
      "learning_rate": 1.6407820087900215e-06,
      "loss": 0.0077,
      "step": 2557240
    },
    {
      "epoch": 4.185012077531863,
      "grad_norm": 0.2855982184410095,
      "learning_rate": 1.6407161165765042e-06,
      "loss": 0.0069,
      "step": 2557260
    },
    {
      "epoch": 4.185044807970517,
      "grad_norm": 0.1984378844499588,
      "learning_rate": 1.6406502243629872e-06,
      "loss": 0.0067,
      "step": 2557280
    },
    {
      "epoch": 4.185077538409169,
      "grad_norm": 0.11324532330036163,
      "learning_rate": 1.64058433214947e-06,
      "loss": 0.0056,
      "step": 2557300
    },
    {
      "epoch": 4.185110268847823,
      "grad_norm": 0.11013314127922058,
      "learning_rate": 1.6405184399359529e-06,
      "loss": 0.0097,
      "step": 2557320
    },
    {
      "epoch": 4.185142999286477,
      "grad_norm": 0.14607778191566467,
      "learning_rate": 1.6404525477224358e-06,
      "loss": 0.008,
      "step": 2557340
    },
    {
      "epoch": 4.18517572972513,
      "grad_norm": 0.49599266052246094,
      "learning_rate": 1.6403866555089188e-06,
      "loss": 0.01,
      "step": 2557360
    },
    {
      "epoch": 4.185208460163783,
      "grad_norm": 0.4213326871395111,
      "learning_rate": 1.6403207632954015e-06,
      "loss": 0.0092,
      "step": 2557380
    },
    {
      "epoch": 4.1852411906024365,
      "grad_norm": 0.20947842299938202,
      "learning_rate": 1.6402548710818844e-06,
      "loss": 0.0077,
      "step": 2557400
    },
    {
      "epoch": 4.18527392104109,
      "grad_norm": 0.11845578998327255,
      "learning_rate": 1.6401889788683672e-06,
      "loss": 0.0073,
      "step": 2557420
    },
    {
      "epoch": 4.185306651479743,
      "grad_norm": 0.22535599768161774,
      "learning_rate": 1.6401230866548501e-06,
      "loss": 0.0062,
      "step": 2557440
    },
    {
      "epoch": 4.185339381918396,
      "grad_norm": 0.3245371878147125,
      "learning_rate": 1.6400571944413329e-06,
      "loss": 0.0059,
      "step": 2557460
    },
    {
      "epoch": 4.18537211235705,
      "grad_norm": 0.5621273517608643,
      "learning_rate": 1.6399913022278158e-06,
      "loss": 0.0075,
      "step": 2557480
    },
    {
      "epoch": 4.185404842795704,
      "grad_norm": 0.3911060690879822,
      "learning_rate": 1.6399254100142986e-06,
      "loss": 0.0066,
      "step": 2557500
    },
    {
      "epoch": 4.185437573234356,
      "grad_norm": 0.06003778055310249,
      "learning_rate": 1.6398595178007817e-06,
      "loss": 0.0092,
      "step": 2557520
    },
    {
      "epoch": 4.18547030367301,
      "grad_norm": 0.3315083086490631,
      "learning_rate": 1.6397936255872645e-06,
      "loss": 0.011,
      "step": 2557540
    },
    {
      "epoch": 4.1855030341116635,
      "grad_norm": 0.1174948662519455,
      "learning_rate": 1.6397277333737474e-06,
      "loss": 0.0118,
      "step": 2557560
    },
    {
      "epoch": 4.185535764550316,
      "grad_norm": 0.18278950452804565,
      "learning_rate": 1.6396618411602302e-06,
      "loss": 0.0065,
      "step": 2557580
    },
    {
      "epoch": 4.18556849498897,
      "grad_norm": 0.13565737009048462,
      "learning_rate": 1.6395959489467131e-06,
      "loss": 0.0065,
      "step": 2557600
    },
    {
      "epoch": 4.185601225427623,
      "grad_norm": 0.2913669943809509,
      "learning_rate": 1.6395300567331959e-06,
      "loss": 0.01,
      "step": 2557620
    },
    {
      "epoch": 4.185633955866276,
      "grad_norm": 0.0789947509765625,
      "learning_rate": 1.6394641645196788e-06,
      "loss": 0.0064,
      "step": 2557640
    },
    {
      "epoch": 4.18566668630493,
      "grad_norm": 0.1017959862947464,
      "learning_rate": 1.6393982723061615e-06,
      "loss": 0.008,
      "step": 2557660
    },
    {
      "epoch": 4.185699416743583,
      "grad_norm": 0.18497620522975922,
      "learning_rate": 1.6393323800926445e-06,
      "loss": 0.0093,
      "step": 2557680
    },
    {
      "epoch": 4.185732147182237,
      "grad_norm": 0.846572995185852,
      "learning_rate": 1.6392664878791277e-06,
      "loss": 0.0122,
      "step": 2557700
    },
    {
      "epoch": 4.18576487762089,
      "grad_norm": 0.14887413382530212,
      "learning_rate": 1.6392005956656104e-06,
      "loss": 0.0071,
      "step": 2557720
    },
    {
      "epoch": 4.185797608059543,
      "grad_norm": 0.7244493365287781,
      "learning_rate": 1.6391347034520931e-06,
      "loss": 0.0093,
      "step": 2557740
    },
    {
      "epoch": 4.185830338498197,
      "grad_norm": 0.07008335739374161,
      "learning_rate": 1.639068811238576e-06,
      "loss": 0.0091,
      "step": 2557760
    },
    {
      "epoch": 4.18586306893685,
      "grad_norm": 0.6778724193572998,
      "learning_rate": 1.6390029190250588e-06,
      "loss": 0.0087,
      "step": 2557780
    },
    {
      "epoch": 4.185895799375503,
      "grad_norm": 0.4479947090148926,
      "learning_rate": 1.6389370268115418e-06,
      "loss": 0.0058,
      "step": 2557800
    },
    {
      "epoch": 4.185928529814157,
      "grad_norm": 0.03715872764587402,
      "learning_rate": 1.6388711345980245e-06,
      "loss": 0.0069,
      "step": 2557820
    },
    {
      "epoch": 4.18596126025281,
      "grad_norm": 0.09608175605535507,
      "learning_rate": 1.6388052423845075e-06,
      "loss": 0.0107,
      "step": 2557840
    },
    {
      "epoch": 4.185993990691463,
      "grad_norm": 0.18163615465164185,
      "learning_rate": 1.6387393501709906e-06,
      "loss": 0.0099,
      "step": 2557860
    },
    {
      "epoch": 4.1860267211301165,
      "grad_norm": 0.32311493158340454,
      "learning_rate": 1.6386734579574734e-06,
      "loss": 0.009,
      "step": 2557880
    },
    {
      "epoch": 4.18605945156877,
      "grad_norm": 0.13413545489311218,
      "learning_rate": 1.6386075657439563e-06,
      "loss": 0.0132,
      "step": 2557900
    },
    {
      "epoch": 4.186092182007423,
      "grad_norm": 0.11343928426504135,
      "learning_rate": 1.638541673530439e-06,
      "loss": 0.0074,
      "step": 2557920
    },
    {
      "epoch": 4.186124912446076,
      "grad_norm": 0.11218280345201492,
      "learning_rate": 1.638475781316922e-06,
      "loss": 0.0093,
      "step": 2557940
    },
    {
      "epoch": 4.18615764288473,
      "grad_norm": 0.3094944953918457,
      "learning_rate": 1.6384098891034048e-06,
      "loss": 0.0131,
      "step": 2557960
    },
    {
      "epoch": 4.186190373323384,
      "grad_norm": 0.14290261268615723,
      "learning_rate": 1.6383439968898875e-06,
      "loss": 0.013,
      "step": 2557980
    },
    {
      "epoch": 4.186223103762036,
      "grad_norm": 0.24705620110034943,
      "learning_rate": 1.6382781046763705e-06,
      "loss": 0.013,
      "step": 2558000
    },
    {
      "epoch": 4.18625583420069,
      "grad_norm": 0.20757009088993073,
      "learning_rate": 1.6382122124628532e-06,
      "loss": 0.0081,
      "step": 2558020
    },
    {
      "epoch": 4.1862885646393435,
      "grad_norm": 0.9361110925674438,
      "learning_rate": 1.6381463202493364e-06,
      "loss": 0.0135,
      "step": 2558040
    },
    {
      "epoch": 4.186321295077996,
      "grad_norm": 0.3353850245475769,
      "learning_rate": 1.6380804280358193e-06,
      "loss": 0.0116,
      "step": 2558060
    },
    {
      "epoch": 4.18635402551665,
      "grad_norm": 0.21516890823841095,
      "learning_rate": 1.638014535822302e-06,
      "loss": 0.01,
      "step": 2558080
    },
    {
      "epoch": 4.186386755955303,
      "grad_norm": 0.3073294162750244,
      "learning_rate": 1.637948643608785e-06,
      "loss": 0.007,
      "step": 2558100
    },
    {
      "epoch": 4.186419486393957,
      "grad_norm": 0.5559150576591492,
      "learning_rate": 1.6378827513952677e-06,
      "loss": 0.0138,
      "step": 2558120
    },
    {
      "epoch": 4.18645221683261,
      "grad_norm": 0.13347800076007843,
      "learning_rate": 1.6378168591817507e-06,
      "loss": 0.0107,
      "step": 2558140
    },
    {
      "epoch": 4.186484947271263,
      "grad_norm": 0.5303454995155334,
      "learning_rate": 1.6377509669682334e-06,
      "loss": 0.0106,
      "step": 2558160
    },
    {
      "epoch": 4.186517677709917,
      "grad_norm": 0.10699532926082611,
      "learning_rate": 1.6376850747547162e-06,
      "loss": 0.0089,
      "step": 2558180
    },
    {
      "epoch": 4.18655040814857,
      "grad_norm": 0.1634121686220169,
      "learning_rate": 1.6376191825411991e-06,
      "loss": 0.0078,
      "step": 2558200
    },
    {
      "epoch": 4.186583138587223,
      "grad_norm": 0.34307458996772766,
      "learning_rate": 1.6375532903276823e-06,
      "loss": 0.0078,
      "step": 2558220
    },
    {
      "epoch": 4.186615869025877,
      "grad_norm": 0.1573069989681244,
      "learning_rate": 1.637487398114165e-06,
      "loss": 0.0077,
      "step": 2558240
    },
    {
      "epoch": 4.18664859946453,
      "grad_norm": 0.11389622092247009,
      "learning_rate": 1.637421505900648e-06,
      "loss": 0.0078,
      "step": 2558260
    },
    {
      "epoch": 4.186681329903183,
      "grad_norm": 0.08029929548501968,
      "learning_rate": 1.6373556136871307e-06,
      "loss": 0.0081,
      "step": 2558280
    },
    {
      "epoch": 4.186714060341837,
      "grad_norm": 0.15633155405521393,
      "learning_rate": 1.6372897214736137e-06,
      "loss": 0.0153,
      "step": 2558300
    },
    {
      "epoch": 4.18674679078049,
      "grad_norm": 0.2039722502231598,
      "learning_rate": 1.6372238292600964e-06,
      "loss": 0.0112,
      "step": 2558320
    },
    {
      "epoch": 4.186779521219143,
      "grad_norm": 0.16489258408546448,
      "learning_rate": 1.6371579370465794e-06,
      "loss": 0.0065,
      "step": 2558340
    },
    {
      "epoch": 4.186812251657797,
      "grad_norm": 0.2960878014564514,
      "learning_rate": 1.637092044833062e-06,
      "loss": 0.0065,
      "step": 2558360
    },
    {
      "epoch": 4.18684498209645,
      "grad_norm": 0.1451079398393631,
      "learning_rate": 1.637026152619545e-06,
      "loss": 0.0065,
      "step": 2558380
    },
    {
      "epoch": 4.186877712535104,
      "grad_norm": 0.08830566704273224,
      "learning_rate": 1.636960260406028e-06,
      "loss": 0.0065,
      "step": 2558400
    },
    {
      "epoch": 4.1869104429737565,
      "grad_norm": 0.1807658076286316,
      "learning_rate": 1.636894368192511e-06,
      "loss": 0.0086,
      "step": 2558420
    },
    {
      "epoch": 4.18694317341241,
      "grad_norm": 0.3641328811645508,
      "learning_rate": 1.6368284759789937e-06,
      "loss": 0.0117,
      "step": 2558440
    },
    {
      "epoch": 4.186975903851064,
      "grad_norm": 0.13325625658035278,
      "learning_rate": 1.6367625837654766e-06,
      "loss": 0.0083,
      "step": 2558460
    },
    {
      "epoch": 4.187008634289716,
      "grad_norm": 0.24226514995098114,
      "learning_rate": 1.6366966915519594e-06,
      "loss": 0.0098,
      "step": 2558480
    },
    {
      "epoch": 4.18704136472837,
      "grad_norm": 0.3263857662677765,
      "learning_rate": 1.6366307993384423e-06,
      "loss": 0.0106,
      "step": 2558500
    },
    {
      "epoch": 4.187074095167024,
      "grad_norm": 0.3521743714809418,
      "learning_rate": 1.636564907124925e-06,
      "loss": 0.0091,
      "step": 2558520
    },
    {
      "epoch": 4.187106825605677,
      "grad_norm": 0.39008277654647827,
      "learning_rate": 1.636499014911408e-06,
      "loss": 0.0131,
      "step": 2558540
    },
    {
      "epoch": 4.18713955604433,
      "grad_norm": 0.22356978058815002,
      "learning_rate": 1.6364331226978908e-06,
      "loss": 0.0064,
      "step": 2558560
    },
    {
      "epoch": 4.1871722864829835,
      "grad_norm": 0.11347486823797226,
      "learning_rate": 1.636367230484374e-06,
      "loss": 0.0131,
      "step": 2558580
    },
    {
      "epoch": 4.187205016921637,
      "grad_norm": 0.21826386451721191,
      "learning_rate": 1.6363013382708567e-06,
      "loss": 0.009,
      "step": 2558600
    },
    {
      "epoch": 4.18723774736029,
      "grad_norm": 0.16129671037197113,
      "learning_rate": 1.6362354460573396e-06,
      "loss": 0.007,
      "step": 2558620
    },
    {
      "epoch": 4.187270477798943,
      "grad_norm": 0.49271851778030396,
      "learning_rate": 1.6361695538438224e-06,
      "loss": 0.0079,
      "step": 2558640
    },
    {
      "epoch": 4.187303208237597,
      "grad_norm": 0.13550683856010437,
      "learning_rate": 1.6361036616303053e-06,
      "loss": 0.0075,
      "step": 2558660
    },
    {
      "epoch": 4.1873359386762505,
      "grad_norm": 0.0988888144493103,
      "learning_rate": 1.636037769416788e-06,
      "loss": 0.0047,
      "step": 2558680
    },
    {
      "epoch": 4.187368669114903,
      "grad_norm": 0.0742511972784996,
      "learning_rate": 1.635971877203271e-06,
      "loss": 0.0082,
      "step": 2558700
    },
    {
      "epoch": 4.187401399553557,
      "grad_norm": 0.1721745729446411,
      "learning_rate": 1.6359059849897537e-06,
      "loss": 0.0074,
      "step": 2558720
    },
    {
      "epoch": 4.18743412999221,
      "grad_norm": 0.07725678384304047,
      "learning_rate": 1.635840092776237e-06,
      "loss": 0.0094,
      "step": 2558740
    },
    {
      "epoch": 4.187466860430863,
      "grad_norm": 0.3171665668487549,
      "learning_rate": 1.6357742005627196e-06,
      "loss": 0.0101,
      "step": 2558760
    },
    {
      "epoch": 4.187499590869517,
      "grad_norm": 0.2476929873228073,
      "learning_rate": 1.6357083083492026e-06,
      "loss": 0.0121,
      "step": 2558780
    },
    {
      "epoch": 4.18753232130817,
      "grad_norm": 0.24069342017173767,
      "learning_rate": 1.6356424161356853e-06,
      "loss": 0.0084,
      "step": 2558800
    },
    {
      "epoch": 4.187565051746824,
      "grad_norm": 0.2154485583305359,
      "learning_rate": 1.6355765239221683e-06,
      "loss": 0.0067,
      "step": 2558820
    },
    {
      "epoch": 4.187597782185477,
      "grad_norm": 0.2731674909591675,
      "learning_rate": 1.635510631708651e-06,
      "loss": 0.0067,
      "step": 2558840
    },
    {
      "epoch": 4.18763051262413,
      "grad_norm": 0.19933107495307922,
      "learning_rate": 1.635444739495134e-06,
      "loss": 0.0101,
      "step": 2558860
    },
    {
      "epoch": 4.187663243062784,
      "grad_norm": 0.09908883273601532,
      "learning_rate": 1.6353788472816167e-06,
      "loss": 0.0084,
      "step": 2558880
    },
    {
      "epoch": 4.1876959735014365,
      "grad_norm": 0.3319222629070282,
      "learning_rate": 1.6353129550680997e-06,
      "loss": 0.0114,
      "step": 2558900
    },
    {
      "epoch": 4.18772870394009,
      "grad_norm": 0.3371559977531433,
      "learning_rate": 1.6352470628545828e-06,
      "loss": 0.0161,
      "step": 2558920
    },
    {
      "epoch": 4.187761434378744,
      "grad_norm": 0.18132930994033813,
      "learning_rate": 1.6351811706410656e-06,
      "loss": 0.0085,
      "step": 2558940
    },
    {
      "epoch": 4.187794164817397,
      "grad_norm": 0.29130494594573975,
      "learning_rate": 1.6351152784275483e-06,
      "loss": 0.0114,
      "step": 2558960
    },
    {
      "epoch": 4.18782689525605,
      "grad_norm": 0.14157305657863617,
      "learning_rate": 1.6350493862140313e-06,
      "loss": 0.0058,
      "step": 2558980
    },
    {
      "epoch": 4.187859625694704,
      "grad_norm": 0.8008214235305786,
      "learning_rate": 1.634983494000514e-06,
      "loss": 0.0129,
      "step": 2559000
    },
    {
      "epoch": 4.187892356133357,
      "grad_norm": 0.174437016248703,
      "learning_rate": 1.634917601786997e-06,
      "loss": 0.0098,
      "step": 2559020
    },
    {
      "epoch": 4.18792508657201,
      "grad_norm": 0.10471055656671524,
      "learning_rate": 1.6348517095734797e-06,
      "loss": 0.0063,
      "step": 2559040
    },
    {
      "epoch": 4.1879578170106635,
      "grad_norm": 0.37863537669181824,
      "learning_rate": 1.6347858173599626e-06,
      "loss": 0.0078,
      "step": 2559060
    },
    {
      "epoch": 4.187990547449317,
      "grad_norm": 0.24639908969402313,
      "learning_rate": 1.6347199251464454e-06,
      "loss": 0.0118,
      "step": 2559080
    },
    {
      "epoch": 4.18802327788797,
      "grad_norm": 0.18724054098129272,
      "learning_rate": 1.6346540329329286e-06,
      "loss": 0.01,
      "step": 2559100
    },
    {
      "epoch": 4.188056008326623,
      "grad_norm": 0.1051585003733635,
      "learning_rate": 1.6345881407194115e-06,
      "loss": 0.0089,
      "step": 2559120
    },
    {
      "epoch": 4.188088738765277,
      "grad_norm": 0.5115349292755127,
      "learning_rate": 1.6345222485058942e-06,
      "loss": 0.0078,
      "step": 2559140
    },
    {
      "epoch": 4.188121469203931,
      "grad_norm": 0.19820483028888702,
      "learning_rate": 1.6344563562923772e-06,
      "loss": 0.0111,
      "step": 2559160
    },
    {
      "epoch": 4.188154199642583,
      "grad_norm": 0.3765811324119568,
      "learning_rate": 1.63439046407886e-06,
      "loss": 0.0075,
      "step": 2559180
    },
    {
      "epoch": 4.188186930081237,
      "grad_norm": 0.07991534471511841,
      "learning_rate": 1.6343245718653427e-06,
      "loss": 0.0123,
      "step": 2559200
    },
    {
      "epoch": 4.1882196605198905,
      "grad_norm": 0.17894114553928375,
      "learning_rate": 1.6342586796518256e-06,
      "loss": 0.0086,
      "step": 2559220
    },
    {
      "epoch": 4.188252390958544,
      "grad_norm": 0.21127359569072723,
      "learning_rate": 1.6341927874383084e-06,
      "loss": 0.0091,
      "step": 2559240
    },
    {
      "epoch": 4.188285121397197,
      "grad_norm": 0.29215723276138306,
      "learning_rate": 1.6341268952247913e-06,
      "loss": 0.0077,
      "step": 2559260
    },
    {
      "epoch": 4.18831785183585,
      "grad_norm": 0.35283908247947693,
      "learning_rate": 1.6340610030112745e-06,
      "loss": 0.0082,
      "step": 2559280
    },
    {
      "epoch": 4.188350582274504,
      "grad_norm": 0.2782343327999115,
      "learning_rate": 1.6339951107977572e-06,
      "loss": 0.0088,
      "step": 2559300
    },
    {
      "epoch": 4.188383312713157,
      "grad_norm": 0.4442988634109497,
      "learning_rate": 1.6339292185842402e-06,
      "loss": 0.0083,
      "step": 2559320
    },
    {
      "epoch": 4.18841604315181,
      "grad_norm": 0.5384564399719238,
      "learning_rate": 1.633863326370723e-06,
      "loss": 0.0126,
      "step": 2559340
    },
    {
      "epoch": 4.188448773590464,
      "grad_norm": 0.1007712334394455,
      "learning_rate": 1.6337974341572059e-06,
      "loss": 0.0082,
      "step": 2559360
    },
    {
      "epoch": 4.188481504029117,
      "grad_norm": 0.3768825829029083,
      "learning_rate": 1.6337315419436886e-06,
      "loss": 0.01,
      "step": 2559380
    },
    {
      "epoch": 4.18851423446777,
      "grad_norm": 0.4850327670574188,
      "learning_rate": 1.6336656497301713e-06,
      "loss": 0.0114,
      "step": 2559400
    },
    {
      "epoch": 4.188546964906424,
      "grad_norm": 0.3531835377216339,
      "learning_rate": 1.6335997575166543e-06,
      "loss": 0.0111,
      "step": 2559420
    },
    {
      "epoch": 4.188579695345077,
      "grad_norm": 1.1727800369262695,
      "learning_rate": 1.6335338653031375e-06,
      "loss": 0.0075,
      "step": 2559440
    },
    {
      "epoch": 4.18861242578373,
      "grad_norm": 0.138522207736969,
      "learning_rate": 1.6334679730896202e-06,
      "loss": 0.0056,
      "step": 2559460
    },
    {
      "epoch": 4.188645156222384,
      "grad_norm": 0.45051074028015137,
      "learning_rate": 1.6334020808761031e-06,
      "loss": 0.0099,
      "step": 2559480
    },
    {
      "epoch": 4.188677886661037,
      "grad_norm": 0.9992761611938477,
      "learning_rate": 1.6333361886625859e-06,
      "loss": 0.0116,
      "step": 2559500
    },
    {
      "epoch": 4.18871061709969,
      "grad_norm": 0.3324545621871948,
      "learning_rate": 1.6332702964490688e-06,
      "loss": 0.0099,
      "step": 2559520
    },
    {
      "epoch": 4.1887433475383435,
      "grad_norm": 0.5942549705505371,
      "learning_rate": 1.6332044042355516e-06,
      "loss": 0.0108,
      "step": 2559540
    },
    {
      "epoch": 4.188776077976997,
      "grad_norm": 0.1381569802761078,
      "learning_rate": 1.6331385120220345e-06,
      "loss": 0.012,
      "step": 2559560
    },
    {
      "epoch": 4.188808808415651,
      "grad_norm": 0.1573343575000763,
      "learning_rate": 1.6330726198085173e-06,
      "loss": 0.0105,
      "step": 2559580
    },
    {
      "epoch": 4.188841538854303,
      "grad_norm": 0.20913895964622498,
      "learning_rate": 1.6330067275950002e-06,
      "loss": 0.009,
      "step": 2559600
    },
    {
      "epoch": 4.188874269292957,
      "grad_norm": 0.18611837923526764,
      "learning_rate": 1.6329408353814832e-06,
      "loss": 0.0094,
      "step": 2559620
    },
    {
      "epoch": 4.188906999731611,
      "grad_norm": 0.12479475140571594,
      "learning_rate": 1.6328749431679661e-06,
      "loss": 0.0095,
      "step": 2559640
    },
    {
      "epoch": 4.188939730170263,
      "grad_norm": 0.09430424124002457,
      "learning_rate": 1.6328090509544489e-06,
      "loss": 0.0152,
      "step": 2559660
    },
    {
      "epoch": 4.188972460608917,
      "grad_norm": 0.05662558972835541,
      "learning_rate": 1.6327431587409318e-06,
      "loss": 0.0104,
      "step": 2559680
    },
    {
      "epoch": 4.1890051910475705,
      "grad_norm": 0.146116703748703,
      "learning_rate": 1.6326772665274146e-06,
      "loss": 0.007,
      "step": 2559700
    },
    {
      "epoch": 4.189037921486224,
      "grad_norm": 0.29799288511276245,
      "learning_rate": 1.6326113743138975e-06,
      "loss": 0.0056,
      "step": 2559720
    },
    {
      "epoch": 4.189070651924877,
      "grad_norm": 0.12958884239196777,
      "learning_rate": 1.6325454821003802e-06,
      "loss": 0.0114,
      "step": 2559740
    },
    {
      "epoch": 4.18910338236353,
      "grad_norm": 0.39976462721824646,
      "learning_rate": 1.6324795898868632e-06,
      "loss": 0.0076,
      "step": 2559760
    },
    {
      "epoch": 4.189136112802184,
      "grad_norm": 0.3158850371837616,
      "learning_rate": 1.632413697673346e-06,
      "loss": 0.0065,
      "step": 2559780
    },
    {
      "epoch": 4.189168843240837,
      "grad_norm": 0.07339165359735489,
      "learning_rate": 1.632347805459829e-06,
      "loss": 0.0102,
      "step": 2559800
    },
    {
      "epoch": 4.18920157367949,
      "grad_norm": 0.27177947759628296,
      "learning_rate": 1.6322819132463118e-06,
      "loss": 0.0099,
      "step": 2559820
    },
    {
      "epoch": 4.189234304118144,
      "grad_norm": 0.16861556470394135,
      "learning_rate": 1.6322160210327948e-06,
      "loss": 0.0064,
      "step": 2559840
    },
    {
      "epoch": 4.1892670345567975,
      "grad_norm": 0.5699747800827026,
      "learning_rate": 1.6321501288192775e-06,
      "loss": 0.009,
      "step": 2559860
    },
    {
      "epoch": 4.18929976499545,
      "grad_norm": 0.24274544417858124,
      "learning_rate": 1.6320842366057605e-06,
      "loss": 0.0074,
      "step": 2559880
    },
    {
      "epoch": 4.189332495434104,
      "grad_norm": 0.39074069261550903,
      "learning_rate": 1.6320183443922432e-06,
      "loss": 0.0087,
      "step": 2559900
    },
    {
      "epoch": 4.189365225872757,
      "grad_norm": 0.2018348127603531,
      "learning_rate": 1.6319524521787262e-06,
      "loss": 0.0082,
      "step": 2559920
    },
    {
      "epoch": 4.18939795631141,
      "grad_norm": 0.040452226996421814,
      "learning_rate": 1.631886559965209e-06,
      "loss": 0.0085,
      "step": 2559940
    },
    {
      "epoch": 4.189430686750064,
      "grad_norm": 0.3838880658149719,
      "learning_rate": 1.6318206677516919e-06,
      "loss": 0.0071,
      "step": 2559960
    },
    {
      "epoch": 4.189463417188717,
      "grad_norm": 0.3773820102214813,
      "learning_rate": 1.6317547755381748e-06,
      "loss": 0.0076,
      "step": 2559980
    },
    {
      "epoch": 4.189496147627371,
      "grad_norm": 0.2756357789039612,
      "learning_rate": 1.6316888833246578e-06,
      "loss": 0.0097,
      "step": 2560000
    },
    {
      "epoch": 4.189528878066024,
      "grad_norm": 0.4005860388278961,
      "learning_rate": 1.6316229911111405e-06,
      "loss": 0.0122,
      "step": 2560020
    },
    {
      "epoch": 4.189561608504677,
      "grad_norm": 0.20649106800556183,
      "learning_rate": 1.6315570988976235e-06,
      "loss": 0.006,
      "step": 2560040
    },
    {
      "epoch": 4.189594338943331,
      "grad_norm": 0.16232754290103912,
      "learning_rate": 1.6314912066841062e-06,
      "loss": 0.007,
      "step": 2560060
    },
    {
      "epoch": 4.1896270693819835,
      "grad_norm": 0.21450187265872955,
      "learning_rate": 1.6314253144705892e-06,
      "loss": 0.0094,
      "step": 2560080
    },
    {
      "epoch": 4.189659799820637,
      "grad_norm": 0.08638296276330948,
      "learning_rate": 1.6313594222570719e-06,
      "loss": 0.0083,
      "step": 2560100
    },
    {
      "epoch": 4.189692530259291,
      "grad_norm": 0.17507502436637878,
      "learning_rate": 1.6312935300435548e-06,
      "loss": 0.0055,
      "step": 2560120
    },
    {
      "epoch": 4.189725260697944,
      "grad_norm": 0.2125839740037918,
      "learning_rate": 1.6312276378300376e-06,
      "loss": 0.0059,
      "step": 2560140
    },
    {
      "epoch": 4.189757991136597,
      "grad_norm": 1.0868557691574097,
      "learning_rate": 1.6311617456165207e-06,
      "loss": 0.0112,
      "step": 2560160
    },
    {
      "epoch": 4.189790721575251,
      "grad_norm": 0.4723927974700928,
      "learning_rate": 1.6310958534030035e-06,
      "loss": 0.0092,
      "step": 2560180
    },
    {
      "epoch": 4.189823452013904,
      "grad_norm": 0.11763466894626617,
      "learning_rate": 1.6310299611894864e-06,
      "loss": 0.0101,
      "step": 2560200
    },
    {
      "epoch": 4.189856182452557,
      "grad_norm": 0.1911623477935791,
      "learning_rate": 1.6309640689759692e-06,
      "loss": 0.0106,
      "step": 2560220
    },
    {
      "epoch": 4.1898889128912105,
      "grad_norm": 0.08016879856586456,
      "learning_rate": 1.6308981767624521e-06,
      "loss": 0.0088,
      "step": 2560240
    },
    {
      "epoch": 4.189921643329864,
      "grad_norm": 0.20839719474315643,
      "learning_rate": 1.6308322845489349e-06,
      "loss": 0.0068,
      "step": 2560260
    },
    {
      "epoch": 4.189954373768518,
      "grad_norm": 0.14339645206928253,
      "learning_rate": 1.6307663923354178e-06,
      "loss": 0.0072,
      "step": 2560280
    },
    {
      "epoch": 4.18998710420717,
      "grad_norm": 0.1833990514278412,
      "learning_rate": 1.6307005001219006e-06,
      "loss": 0.0092,
      "step": 2560300
    },
    {
      "epoch": 4.190019834645824,
      "grad_norm": 0.07989616692066193,
      "learning_rate": 1.6306346079083837e-06,
      "loss": 0.0091,
      "step": 2560320
    },
    {
      "epoch": 4.1900525650844775,
      "grad_norm": 0.7039548754692078,
      "learning_rate": 1.6305687156948667e-06,
      "loss": 0.0095,
      "step": 2560340
    },
    {
      "epoch": 4.19008529552313,
      "grad_norm": 0.4644545316696167,
      "learning_rate": 1.6305028234813494e-06,
      "loss": 0.0088,
      "step": 2560360
    },
    {
      "epoch": 4.190118025961784,
      "grad_norm": 0.433319628238678,
      "learning_rate": 1.6304369312678322e-06,
      "loss": 0.0093,
      "step": 2560380
    },
    {
      "epoch": 4.190150756400437,
      "grad_norm": 0.2214149385690689,
      "learning_rate": 1.630371039054315e-06,
      "loss": 0.0073,
      "step": 2560400
    },
    {
      "epoch": 4.190183486839091,
      "grad_norm": 0.3891938030719757,
      "learning_rate": 1.6303051468407978e-06,
      "loss": 0.0085,
      "step": 2560420
    },
    {
      "epoch": 4.190216217277744,
      "grad_norm": 0.3569503724575043,
      "learning_rate": 1.6302392546272808e-06,
      "loss": 0.0126,
      "step": 2560440
    },
    {
      "epoch": 4.190248947716397,
      "grad_norm": 0.17890749871730804,
      "learning_rate": 1.6301733624137635e-06,
      "loss": 0.0118,
      "step": 2560460
    },
    {
      "epoch": 4.190281678155051,
      "grad_norm": 0.05582687631249428,
      "learning_rate": 1.6301074702002465e-06,
      "loss": 0.0132,
      "step": 2560480
    },
    {
      "epoch": 4.190314408593704,
      "grad_norm": 0.5679279565811157,
      "learning_rate": 1.6300415779867297e-06,
      "loss": 0.0121,
      "step": 2560500
    },
    {
      "epoch": 4.190347139032357,
      "grad_norm": 0.17728236317634583,
      "learning_rate": 1.6299756857732124e-06,
      "loss": 0.0108,
      "step": 2560520
    },
    {
      "epoch": 4.190379869471011,
      "grad_norm": 0.14717380702495575,
      "learning_rate": 1.6299097935596953e-06,
      "loss": 0.0123,
      "step": 2560540
    },
    {
      "epoch": 4.190412599909664,
      "grad_norm": 0.3942105174064636,
      "learning_rate": 1.629843901346178e-06,
      "loss": 0.0132,
      "step": 2560560
    },
    {
      "epoch": 4.190445330348317,
      "grad_norm": 0.24037814140319824,
      "learning_rate": 1.629778009132661e-06,
      "loss": 0.0089,
      "step": 2560580
    },
    {
      "epoch": 4.190478060786971,
      "grad_norm": 0.17455585300922394,
      "learning_rate": 1.6297121169191438e-06,
      "loss": 0.012,
      "step": 2560600
    },
    {
      "epoch": 4.190510791225624,
      "grad_norm": 0.33828186988830566,
      "learning_rate": 1.6296462247056265e-06,
      "loss": 0.0079,
      "step": 2560620
    },
    {
      "epoch": 4.190543521664277,
      "grad_norm": 0.35264283418655396,
      "learning_rate": 1.6295803324921095e-06,
      "loss": 0.006,
      "step": 2560640
    },
    {
      "epoch": 4.190576252102931,
      "grad_norm": 0.1461716741323471,
      "learning_rate": 1.6295144402785922e-06,
      "loss": 0.008,
      "step": 2560660
    },
    {
      "epoch": 4.190608982541584,
      "grad_norm": 0.14947929978370667,
      "learning_rate": 1.6294485480650754e-06,
      "loss": 0.0077,
      "step": 2560680
    },
    {
      "epoch": 4.190641712980238,
      "grad_norm": 0.12360657751560211,
      "learning_rate": 1.6293826558515583e-06,
      "loss": 0.0073,
      "step": 2560700
    },
    {
      "epoch": 4.1906744434188905,
      "grad_norm": 0.19655518233776093,
      "learning_rate": 1.629316763638041e-06,
      "loss": 0.01,
      "step": 2560720
    },
    {
      "epoch": 4.190707173857544,
      "grad_norm": 0.4640733301639557,
      "learning_rate": 1.629250871424524e-06,
      "loss": 0.0107,
      "step": 2560740
    },
    {
      "epoch": 4.190739904296198,
      "grad_norm": 0.19214212894439697,
      "learning_rate": 1.6291849792110067e-06,
      "loss": 0.0066,
      "step": 2560760
    },
    {
      "epoch": 4.19077263473485,
      "grad_norm": 0.5101373791694641,
      "learning_rate": 1.6291190869974897e-06,
      "loss": 0.0094,
      "step": 2560780
    },
    {
      "epoch": 4.190805365173504,
      "grad_norm": 0.31339144706726074,
      "learning_rate": 1.6290531947839724e-06,
      "loss": 0.0097,
      "step": 2560800
    },
    {
      "epoch": 4.190838095612158,
      "grad_norm": 0.3979280889034271,
      "learning_rate": 1.6289873025704554e-06,
      "loss": 0.0106,
      "step": 2560820
    },
    {
      "epoch": 4.19087082605081,
      "grad_norm": 0.5168794393539429,
      "learning_rate": 1.6289214103569381e-06,
      "loss": 0.0089,
      "step": 2560840
    },
    {
      "epoch": 4.190903556489464,
      "grad_norm": 0.6433573365211487,
      "learning_rate": 1.6288555181434213e-06,
      "loss": 0.0105,
      "step": 2560860
    },
    {
      "epoch": 4.1909362869281175,
      "grad_norm": 0.2779540419578552,
      "learning_rate": 1.628789625929904e-06,
      "loss": 0.011,
      "step": 2560880
    },
    {
      "epoch": 4.190969017366771,
      "grad_norm": 0.12615740299224854,
      "learning_rate": 1.628723733716387e-06,
      "loss": 0.0091,
      "step": 2560900
    },
    {
      "epoch": 4.191001747805424,
      "grad_norm": 0.21249200403690338,
      "learning_rate": 1.6286578415028697e-06,
      "loss": 0.007,
      "step": 2560920
    },
    {
      "epoch": 4.191034478244077,
      "grad_norm": 0.5374142527580261,
      "learning_rate": 1.6285919492893527e-06,
      "loss": 0.0075,
      "step": 2560940
    },
    {
      "epoch": 4.191067208682731,
      "grad_norm": 0.27927350997924805,
      "learning_rate": 1.6285260570758354e-06,
      "loss": 0.008,
      "step": 2560960
    },
    {
      "epoch": 4.191099939121384,
      "grad_norm": 0.1680246740579605,
      "learning_rate": 1.6284601648623184e-06,
      "loss": 0.0091,
      "step": 2560980
    },
    {
      "epoch": 4.191132669560037,
      "grad_norm": 0.22583843767642975,
      "learning_rate": 1.6283942726488011e-06,
      "loss": 0.0077,
      "step": 2561000
    },
    {
      "epoch": 4.191165399998691,
      "grad_norm": 0.6011762619018555,
      "learning_rate": 1.628328380435284e-06,
      "loss": 0.0098,
      "step": 2561020
    },
    {
      "epoch": 4.1911981304373445,
      "grad_norm": 0.13152454793453217,
      "learning_rate": 1.628262488221767e-06,
      "loss": 0.0087,
      "step": 2561040
    },
    {
      "epoch": 4.191230860875997,
      "grad_norm": 0.09007765352725983,
      "learning_rate": 1.62819659600825e-06,
      "loss": 0.0099,
      "step": 2561060
    },
    {
      "epoch": 4.191263591314651,
      "grad_norm": 0.4623114764690399,
      "learning_rate": 1.6281307037947327e-06,
      "loss": 0.0145,
      "step": 2561080
    },
    {
      "epoch": 4.191296321753304,
      "grad_norm": 0.16238901019096375,
      "learning_rate": 1.6280648115812157e-06,
      "loss": 0.0051,
      "step": 2561100
    },
    {
      "epoch": 4.191329052191957,
      "grad_norm": 0.10861633718013763,
      "learning_rate": 1.6279989193676984e-06,
      "loss": 0.0138,
      "step": 2561120
    },
    {
      "epoch": 4.191361782630611,
      "grad_norm": 0.30340632796287537,
      "learning_rate": 1.6279330271541813e-06,
      "loss": 0.0115,
      "step": 2561140
    },
    {
      "epoch": 4.191394513069264,
      "grad_norm": 0.3030671775341034,
      "learning_rate": 1.627867134940664e-06,
      "loss": 0.0074,
      "step": 2561160
    },
    {
      "epoch": 4.191427243507918,
      "grad_norm": 0.09190385788679123,
      "learning_rate": 1.627801242727147e-06,
      "loss": 0.0075,
      "step": 2561180
    },
    {
      "epoch": 4.1914599739465705,
      "grad_norm": 0.22886504232883453,
      "learning_rate": 1.62773535051363e-06,
      "loss": 0.0096,
      "step": 2561200
    },
    {
      "epoch": 4.191492704385224,
      "grad_norm": 0.4396745562553406,
      "learning_rate": 1.627669458300113e-06,
      "loss": 0.0129,
      "step": 2561220
    },
    {
      "epoch": 4.191525434823878,
      "grad_norm": 0.10694262385368347,
      "learning_rate": 1.6276035660865957e-06,
      "loss": 0.0054,
      "step": 2561240
    },
    {
      "epoch": 4.1915581652625304,
      "grad_norm": 0.14688198268413544,
      "learning_rate": 1.6275376738730786e-06,
      "loss": 0.0083,
      "step": 2561260
    },
    {
      "epoch": 4.191590895701184,
      "grad_norm": 0.2285917103290558,
      "learning_rate": 1.6274717816595614e-06,
      "loss": 0.0071,
      "step": 2561280
    },
    {
      "epoch": 4.191623626139838,
      "grad_norm": 0.3361179828643799,
      "learning_rate": 1.6274058894460443e-06,
      "loss": 0.0083,
      "step": 2561300
    },
    {
      "epoch": 4.191656356578491,
      "grad_norm": 0.10751109570264816,
      "learning_rate": 1.627339997232527e-06,
      "loss": 0.0069,
      "step": 2561320
    },
    {
      "epoch": 4.191689087017144,
      "grad_norm": 0.34941157698631287,
      "learning_rate": 1.62727410501901e-06,
      "loss": 0.0096,
      "step": 2561340
    },
    {
      "epoch": 4.1917218174557975,
      "grad_norm": 0.22975823283195496,
      "learning_rate": 1.6272082128054928e-06,
      "loss": 0.0052,
      "step": 2561360
    },
    {
      "epoch": 4.191754547894451,
      "grad_norm": 0.21225395798683167,
      "learning_rate": 1.627142320591976e-06,
      "loss": 0.0092,
      "step": 2561380
    },
    {
      "epoch": 4.191787278333104,
      "grad_norm": 0.2183598279953003,
      "learning_rate": 1.6270764283784587e-06,
      "loss": 0.0078,
      "step": 2561400
    },
    {
      "epoch": 4.191820008771757,
      "grad_norm": 0.29997518658638,
      "learning_rate": 1.6270105361649416e-06,
      "loss": 0.0094,
      "step": 2561420
    },
    {
      "epoch": 4.191852739210411,
      "grad_norm": 0.43177834153175354,
      "learning_rate": 1.6269446439514243e-06,
      "loss": 0.0092,
      "step": 2561440
    },
    {
      "epoch": 4.191885469649065,
      "grad_norm": 0.16172273457050323,
      "learning_rate": 1.6268787517379073e-06,
      "loss": 0.0118,
      "step": 2561460
    },
    {
      "epoch": 4.191918200087717,
      "grad_norm": 0.14105810225009918,
      "learning_rate": 1.62681285952439e-06,
      "loss": 0.0071,
      "step": 2561480
    },
    {
      "epoch": 4.191950930526371,
      "grad_norm": 0.3927798867225647,
      "learning_rate": 1.626746967310873e-06,
      "loss": 0.0075,
      "step": 2561500
    },
    {
      "epoch": 4.1919836609650245,
      "grad_norm": 0.6531257033348083,
      "learning_rate": 1.6266810750973557e-06,
      "loss": 0.013,
      "step": 2561520
    },
    {
      "epoch": 4.192016391403677,
      "grad_norm": 0.18563103675842285,
      "learning_rate": 1.6266151828838387e-06,
      "loss": 0.0102,
      "step": 2561540
    },
    {
      "epoch": 4.192049121842331,
      "grad_norm": 0.14444328844547272,
      "learning_rate": 1.6265492906703218e-06,
      "loss": 0.0084,
      "step": 2561560
    },
    {
      "epoch": 4.192081852280984,
      "grad_norm": 0.12143493443727493,
      "learning_rate": 1.6264833984568046e-06,
      "loss": 0.0122,
      "step": 2561580
    },
    {
      "epoch": 4.192114582719638,
      "grad_norm": 0.3414393663406372,
      "learning_rate": 1.6264175062432873e-06,
      "loss": 0.0079,
      "step": 2561600
    },
    {
      "epoch": 4.192147313158291,
      "grad_norm": 0.16581343114376068,
      "learning_rate": 1.6263516140297703e-06,
      "loss": 0.0093,
      "step": 2561620
    },
    {
      "epoch": 4.192180043596944,
      "grad_norm": 0.13991479575634003,
      "learning_rate": 1.626285721816253e-06,
      "loss": 0.0095,
      "step": 2561640
    },
    {
      "epoch": 4.192212774035598,
      "grad_norm": 0.15485535562038422,
      "learning_rate": 1.626219829602736e-06,
      "loss": 0.0089,
      "step": 2561660
    },
    {
      "epoch": 4.192245504474251,
      "grad_norm": 0.3984930217266083,
      "learning_rate": 1.6261539373892187e-06,
      "loss": 0.0081,
      "step": 2561680
    },
    {
      "epoch": 4.192278234912904,
      "grad_norm": 0.11263791471719742,
      "learning_rate": 1.6260880451757017e-06,
      "loss": 0.008,
      "step": 2561700
    },
    {
      "epoch": 4.192310965351558,
      "grad_norm": 0.21088124811649323,
      "learning_rate": 1.6260221529621844e-06,
      "loss": 0.0103,
      "step": 2561720
    },
    {
      "epoch": 4.192343695790211,
      "grad_norm": 0.0686073824763298,
      "learning_rate": 1.6259562607486676e-06,
      "loss": 0.0092,
      "step": 2561740
    },
    {
      "epoch": 4.192376426228864,
      "grad_norm": 0.20278064906597137,
      "learning_rate": 1.6258903685351505e-06,
      "loss": 0.0127,
      "step": 2561760
    },
    {
      "epoch": 4.192409156667518,
      "grad_norm": 0.2150200754404068,
      "learning_rate": 1.6258244763216333e-06,
      "loss": 0.0087,
      "step": 2561780
    },
    {
      "epoch": 4.192441887106171,
      "grad_norm": 0.04099459573626518,
      "learning_rate": 1.6257585841081162e-06,
      "loss": 0.0146,
      "step": 2561800
    },
    {
      "epoch": 4.192474617544824,
      "grad_norm": 0.18542620539665222,
      "learning_rate": 1.625692691894599e-06,
      "loss": 0.0074,
      "step": 2561820
    },
    {
      "epoch": 4.192507347983478,
      "grad_norm": 0.17951223254203796,
      "learning_rate": 1.6256267996810817e-06,
      "loss": 0.0062,
      "step": 2561840
    },
    {
      "epoch": 4.192540078422131,
      "grad_norm": 0.31343016028404236,
      "learning_rate": 1.6255609074675646e-06,
      "loss": 0.0119,
      "step": 2561860
    },
    {
      "epoch": 4.192572808860785,
      "grad_norm": 0.2594258487224579,
      "learning_rate": 1.6254950152540474e-06,
      "loss": 0.0092,
      "step": 2561880
    },
    {
      "epoch": 4.1926055392994375,
      "grad_norm": 0.13080637156963348,
      "learning_rate": 1.6254291230405303e-06,
      "loss": 0.0069,
      "step": 2561900
    },
    {
      "epoch": 4.192638269738091,
      "grad_norm": 0.5419875979423523,
      "learning_rate": 1.6253632308270135e-06,
      "loss": 0.0136,
      "step": 2561920
    },
    {
      "epoch": 4.192671000176745,
      "grad_norm": 0.42742693424224854,
      "learning_rate": 1.6252973386134962e-06,
      "loss": 0.014,
      "step": 2561940
    },
    {
      "epoch": 4.192703730615397,
      "grad_norm": 0.13509108126163483,
      "learning_rate": 1.6252314463999792e-06,
      "loss": 0.0099,
      "step": 2561960
    },
    {
      "epoch": 4.192736461054051,
      "grad_norm": 0.2002839297056198,
      "learning_rate": 1.625165554186462e-06,
      "loss": 0.0117,
      "step": 2561980
    },
    {
      "epoch": 4.1927691914927046,
      "grad_norm": 0.07482987642288208,
      "learning_rate": 1.6250996619729449e-06,
      "loss": 0.01,
      "step": 2562000
    },
    {
      "epoch": 4.192801921931358,
      "grad_norm": 0.25109899044036865,
      "learning_rate": 1.6250337697594276e-06,
      "loss": 0.0091,
      "step": 2562020
    },
    {
      "epoch": 4.192834652370011,
      "grad_norm": 0.11672088503837585,
      "learning_rate": 1.6249678775459104e-06,
      "loss": 0.0082,
      "step": 2562040
    },
    {
      "epoch": 4.1928673828086644,
      "grad_norm": 0.1589365005493164,
      "learning_rate": 1.6249019853323933e-06,
      "loss": 0.0098,
      "step": 2562060
    },
    {
      "epoch": 4.192900113247318,
      "grad_norm": 0.04278702661395073,
      "learning_rate": 1.6248360931188765e-06,
      "loss": 0.0063,
      "step": 2562080
    },
    {
      "epoch": 4.192932843685971,
      "grad_norm": 0.14733608067035675,
      "learning_rate": 1.6247702009053592e-06,
      "loss": 0.0072,
      "step": 2562100
    },
    {
      "epoch": 4.192965574124624,
      "grad_norm": 0.17673291265964508,
      "learning_rate": 1.6247043086918422e-06,
      "loss": 0.0091,
      "step": 2562120
    },
    {
      "epoch": 4.192998304563278,
      "grad_norm": 0.40105369687080383,
      "learning_rate": 1.624638416478325e-06,
      "loss": 0.0104,
      "step": 2562140
    },
    {
      "epoch": 4.1930310350019315,
      "grad_norm": 0.5284786224365234,
      "learning_rate": 1.6245725242648078e-06,
      "loss": 0.0092,
      "step": 2562160
    },
    {
      "epoch": 4.193063765440584,
      "grad_norm": 0.3131633698940277,
      "learning_rate": 1.6245066320512906e-06,
      "loss": 0.0067,
      "step": 2562180
    },
    {
      "epoch": 4.193096495879238,
      "grad_norm": 0.41379937529563904,
      "learning_rate": 1.6244407398377735e-06,
      "loss": 0.0092,
      "step": 2562200
    },
    {
      "epoch": 4.193129226317891,
      "grad_norm": 0.21194308996200562,
      "learning_rate": 1.6243748476242563e-06,
      "loss": 0.0088,
      "step": 2562220
    },
    {
      "epoch": 4.193161956756544,
      "grad_norm": 0.11859983205795288,
      "learning_rate": 1.6243089554107392e-06,
      "loss": 0.0142,
      "step": 2562240
    },
    {
      "epoch": 4.193194687195198,
      "grad_norm": 0.4087376296520233,
      "learning_rate": 1.6242430631972222e-06,
      "loss": 0.0104,
      "step": 2562260
    },
    {
      "epoch": 4.193227417633851,
      "grad_norm": 0.107215516269207,
      "learning_rate": 1.6241771709837051e-06,
      "loss": 0.0082,
      "step": 2562280
    },
    {
      "epoch": 4.193260148072504,
      "grad_norm": 0.20817174017429352,
      "learning_rate": 1.6241112787701879e-06,
      "loss": 0.0104,
      "step": 2562300
    },
    {
      "epoch": 4.193292878511158,
      "grad_norm": 0.580838143825531,
      "learning_rate": 1.6240453865566708e-06,
      "loss": 0.013,
      "step": 2562320
    },
    {
      "epoch": 4.193325608949811,
      "grad_norm": 0.08841767907142639,
      "learning_rate": 1.6239794943431536e-06,
      "loss": 0.0074,
      "step": 2562340
    },
    {
      "epoch": 4.193358339388465,
      "grad_norm": 0.056052710860967636,
      "learning_rate": 1.6239136021296365e-06,
      "loss": 0.0098,
      "step": 2562360
    },
    {
      "epoch": 4.1933910698271175,
      "grad_norm": 0.4461725950241089,
      "learning_rate": 1.6238477099161193e-06,
      "loss": 0.0055,
      "step": 2562380
    },
    {
      "epoch": 4.193423800265771,
      "grad_norm": 0.16327042877674103,
      "learning_rate": 1.6237818177026022e-06,
      "loss": 0.0098,
      "step": 2562400
    },
    {
      "epoch": 4.193456530704425,
      "grad_norm": 0.1019497811794281,
      "learning_rate": 1.623715925489085e-06,
      "loss": 0.0112,
      "step": 2562420
    },
    {
      "epoch": 4.193489261143078,
      "grad_norm": 0.2324080467224121,
      "learning_rate": 1.6236500332755681e-06,
      "loss": 0.0085,
      "step": 2562440
    },
    {
      "epoch": 4.193521991581731,
      "grad_norm": 0.5476143956184387,
      "learning_rate": 1.6235841410620509e-06,
      "loss": 0.0074,
      "step": 2562460
    },
    {
      "epoch": 4.193554722020385,
      "grad_norm": 0.2550482153892517,
      "learning_rate": 1.6235182488485338e-06,
      "loss": 0.0088,
      "step": 2562480
    },
    {
      "epoch": 4.193587452459038,
      "grad_norm": 0.24952031672000885,
      "learning_rate": 1.6234523566350165e-06,
      "loss": 0.0073,
      "step": 2562500
    },
    {
      "epoch": 4.193620182897691,
      "grad_norm": 0.3473653197288513,
      "learning_rate": 1.6233864644214995e-06,
      "loss": 0.0088,
      "step": 2562520
    },
    {
      "epoch": 4.1936529133363445,
      "grad_norm": 0.6124002933502197,
      "learning_rate": 1.6233205722079822e-06,
      "loss": 0.0091,
      "step": 2562540
    },
    {
      "epoch": 4.193685643774998,
      "grad_norm": 0.44957661628723145,
      "learning_rate": 1.6232546799944652e-06,
      "loss": 0.0133,
      "step": 2562560
    },
    {
      "epoch": 4.193718374213651,
      "grad_norm": 0.24240116775035858,
      "learning_rate": 1.623188787780948e-06,
      "loss": 0.0044,
      "step": 2562580
    },
    {
      "epoch": 4.193751104652304,
      "grad_norm": 0.17827697098255157,
      "learning_rate": 1.6231228955674309e-06,
      "loss": 0.0131,
      "step": 2562600
    },
    {
      "epoch": 4.193783835090958,
      "grad_norm": 0.5138174891471863,
      "learning_rate": 1.6230570033539138e-06,
      "loss": 0.0081,
      "step": 2562620
    },
    {
      "epoch": 4.193816565529612,
      "grad_norm": 0.2050524353981018,
      "learning_rate": 1.6229911111403968e-06,
      "loss": 0.0102,
      "step": 2562640
    },
    {
      "epoch": 4.193849295968264,
      "grad_norm": 0.13461516797542572,
      "learning_rate": 1.6229252189268795e-06,
      "loss": 0.0084,
      "step": 2562660
    },
    {
      "epoch": 4.193882026406918,
      "grad_norm": 0.12321220338344574,
      "learning_rate": 1.6228593267133625e-06,
      "loss": 0.0098,
      "step": 2562680
    },
    {
      "epoch": 4.1939147568455715,
      "grad_norm": 0.1676526516675949,
      "learning_rate": 1.6227934344998452e-06,
      "loss": 0.0071,
      "step": 2562700
    },
    {
      "epoch": 4.193947487284224,
      "grad_norm": 0.14410428702831268,
      "learning_rate": 1.6227275422863282e-06,
      "loss": 0.0111,
      "step": 2562720
    },
    {
      "epoch": 4.193980217722878,
      "grad_norm": 0.26858532428741455,
      "learning_rate": 1.622661650072811e-06,
      "loss": 0.007,
      "step": 2562740
    },
    {
      "epoch": 4.194012948161531,
      "grad_norm": 0.16771762073040009,
      "learning_rate": 1.6225957578592939e-06,
      "loss": 0.0082,
      "step": 2562760
    },
    {
      "epoch": 4.194045678600185,
      "grad_norm": 0.20745421946048737,
      "learning_rate": 1.6225298656457766e-06,
      "loss": 0.0088,
      "step": 2562780
    },
    {
      "epoch": 4.194078409038838,
      "grad_norm": 0.0644364207983017,
      "learning_rate": 1.6224639734322598e-06,
      "loss": 0.0105,
      "step": 2562800
    },
    {
      "epoch": 4.194111139477491,
      "grad_norm": 0.4884479343891144,
      "learning_rate": 1.6223980812187425e-06,
      "loss": 0.01,
      "step": 2562820
    },
    {
      "epoch": 4.194143869916145,
      "grad_norm": 0.36657682061195374,
      "learning_rate": 1.6223321890052254e-06,
      "loss": 0.005,
      "step": 2562840
    },
    {
      "epoch": 4.194176600354798,
      "grad_norm": 0.2066502720117569,
      "learning_rate": 1.6222662967917082e-06,
      "loss": 0.0076,
      "step": 2562860
    },
    {
      "epoch": 4.194209330793451,
      "grad_norm": 0.18169152736663818,
      "learning_rate": 1.6222004045781911e-06,
      "loss": 0.0089,
      "step": 2562880
    },
    {
      "epoch": 4.194242061232105,
      "grad_norm": 0.19970422983169556,
      "learning_rate": 1.6221345123646739e-06,
      "loss": 0.0088,
      "step": 2562900
    },
    {
      "epoch": 4.194274791670758,
      "grad_norm": 0.238102987408638,
      "learning_rate": 1.6220686201511568e-06,
      "loss": 0.0109,
      "step": 2562920
    },
    {
      "epoch": 4.194307522109411,
      "grad_norm": 0.1364898681640625,
      "learning_rate": 1.6220027279376396e-06,
      "loss": 0.0086,
      "step": 2562940
    },
    {
      "epoch": 4.194340252548065,
      "grad_norm": 0.11734599620103836,
      "learning_rate": 1.6219368357241227e-06,
      "loss": 0.0053,
      "step": 2562960
    },
    {
      "epoch": 4.194372982986718,
      "grad_norm": 0.12574899196624756,
      "learning_rate": 1.6218709435106057e-06,
      "loss": 0.0073,
      "step": 2562980
    },
    {
      "epoch": 4.194405713425371,
      "grad_norm": 0.22716407477855682,
      "learning_rate": 1.6218050512970884e-06,
      "loss": 0.0084,
      "step": 2563000
    },
    {
      "epoch": 4.1944384438640245,
      "grad_norm": 0.4783438742160797,
      "learning_rate": 1.6217391590835712e-06,
      "loss": 0.0134,
      "step": 2563020
    },
    {
      "epoch": 4.194471174302678,
      "grad_norm": 0.11072157323360443,
      "learning_rate": 1.6216732668700541e-06,
      "loss": 0.0089,
      "step": 2563040
    },
    {
      "epoch": 4.194503904741332,
      "grad_norm": 0.2574271261692047,
      "learning_rate": 1.6216073746565369e-06,
      "loss": 0.0073,
      "step": 2563060
    },
    {
      "epoch": 4.194536635179984,
      "grad_norm": 0.20320011675357819,
      "learning_rate": 1.6215414824430198e-06,
      "loss": 0.007,
      "step": 2563080
    },
    {
      "epoch": 4.194569365618638,
      "grad_norm": 0.29106661677360535,
      "learning_rate": 1.6214755902295025e-06,
      "loss": 0.0089,
      "step": 2563100
    },
    {
      "epoch": 4.194602096057292,
      "grad_norm": 0.15614807605743408,
      "learning_rate": 1.6214096980159855e-06,
      "loss": 0.0085,
      "step": 2563120
    },
    {
      "epoch": 4.194634826495944,
      "grad_norm": 0.3310021460056305,
      "learning_rate": 1.6213438058024687e-06,
      "loss": 0.0106,
      "step": 2563140
    },
    {
      "epoch": 4.194667556934598,
      "grad_norm": 0.1832728236913681,
      "learning_rate": 1.6212779135889514e-06,
      "loss": 0.0087,
      "step": 2563160
    },
    {
      "epoch": 4.1947002873732515,
      "grad_norm": 0.06198468059301376,
      "learning_rate": 1.6212120213754344e-06,
      "loss": 0.0105,
      "step": 2563180
    },
    {
      "epoch": 4.194733017811905,
      "grad_norm": 0.34185776114463806,
      "learning_rate": 1.621146129161917e-06,
      "loss": 0.0087,
      "step": 2563200
    },
    {
      "epoch": 4.194765748250558,
      "grad_norm": 0.19500437378883362,
      "learning_rate": 1.6210802369484e-06,
      "loss": 0.0079,
      "step": 2563220
    },
    {
      "epoch": 4.194798478689211,
      "grad_norm": 0.45217078924179077,
      "learning_rate": 1.6210143447348828e-06,
      "loss": 0.0106,
      "step": 2563240
    },
    {
      "epoch": 4.194831209127865,
      "grad_norm": 0.08898103982210159,
      "learning_rate": 1.6209484525213655e-06,
      "loss": 0.0086,
      "step": 2563260
    },
    {
      "epoch": 4.194863939566518,
      "grad_norm": 0.07713217288255692,
      "learning_rate": 1.6208825603078485e-06,
      "loss": 0.0099,
      "step": 2563280
    },
    {
      "epoch": 4.194896670005171,
      "grad_norm": 0.1475183218717575,
      "learning_rate": 1.6208166680943312e-06,
      "loss": 0.0105,
      "step": 2563300
    },
    {
      "epoch": 4.194929400443825,
      "grad_norm": 0.19549226760864258,
      "learning_rate": 1.6207507758808144e-06,
      "loss": 0.0115,
      "step": 2563320
    },
    {
      "epoch": 4.1949621308824785,
      "grad_norm": 0.31728532910346985,
      "learning_rate": 1.6206848836672973e-06,
      "loss": 0.0112,
      "step": 2563340
    },
    {
      "epoch": 4.194994861321131,
      "grad_norm": 0.4101220667362213,
      "learning_rate": 1.62061899145378e-06,
      "loss": 0.0087,
      "step": 2563360
    },
    {
      "epoch": 4.195027591759785,
      "grad_norm": 0.41588687896728516,
      "learning_rate": 1.620553099240263e-06,
      "loss": 0.0092,
      "step": 2563380
    },
    {
      "epoch": 4.195060322198438,
      "grad_norm": 0.2315942943096161,
      "learning_rate": 1.6204872070267458e-06,
      "loss": 0.0076,
      "step": 2563400
    },
    {
      "epoch": 4.195093052637091,
      "grad_norm": 0.3959772288799286,
      "learning_rate": 1.6204213148132287e-06,
      "loss": 0.011,
      "step": 2563420
    },
    {
      "epoch": 4.195125783075745,
      "grad_norm": 0.12210456281900406,
      "learning_rate": 1.6203554225997115e-06,
      "loss": 0.0068,
      "step": 2563440
    },
    {
      "epoch": 4.195158513514398,
      "grad_norm": 0.14905354380607605,
      "learning_rate": 1.6202895303861944e-06,
      "loss": 0.006,
      "step": 2563460
    },
    {
      "epoch": 4.195191243953052,
      "grad_norm": 0.3482235372066498,
      "learning_rate": 1.6202236381726771e-06,
      "loss": 0.0135,
      "step": 2563480
    },
    {
      "epoch": 4.195223974391705,
      "grad_norm": 0.16558480262756348,
      "learning_rate": 1.6201577459591603e-06,
      "loss": 0.0071,
      "step": 2563500
    },
    {
      "epoch": 4.195256704830358,
      "grad_norm": 0.08674091845750809,
      "learning_rate": 1.620091853745643e-06,
      "loss": 0.0134,
      "step": 2563520
    },
    {
      "epoch": 4.195289435269012,
      "grad_norm": 0.4372076392173767,
      "learning_rate": 1.620025961532126e-06,
      "loss": 0.0105,
      "step": 2563540
    },
    {
      "epoch": 4.1953221657076645,
      "grad_norm": 0.11476258188486099,
      "learning_rate": 1.6199600693186087e-06,
      "loss": 0.0074,
      "step": 2563560
    },
    {
      "epoch": 4.195354896146318,
      "grad_norm": 0.338510662317276,
      "learning_rate": 1.6198941771050917e-06,
      "loss": 0.009,
      "step": 2563580
    },
    {
      "epoch": 4.195387626584972,
      "grad_norm": 0.2805310785770416,
      "learning_rate": 1.6198282848915744e-06,
      "loss": 0.0059,
      "step": 2563600
    },
    {
      "epoch": 4.195420357023625,
      "grad_norm": 0.2624432444572449,
      "learning_rate": 1.6197623926780574e-06,
      "loss": 0.0101,
      "step": 2563620
    },
    {
      "epoch": 4.195453087462278,
      "grad_norm": 0.3033173382282257,
      "learning_rate": 1.6196965004645401e-06,
      "loss": 0.0058,
      "step": 2563640
    },
    {
      "epoch": 4.195485817900932,
      "grad_norm": 0.22411487996578217,
      "learning_rate": 1.619630608251023e-06,
      "loss": 0.0131,
      "step": 2563660
    },
    {
      "epoch": 4.195518548339585,
      "grad_norm": 0.2061067819595337,
      "learning_rate": 1.619564716037506e-06,
      "loss": 0.0157,
      "step": 2563680
    },
    {
      "epoch": 4.195551278778238,
      "grad_norm": 0.11877459287643433,
      "learning_rate": 1.619498823823989e-06,
      "loss": 0.0086,
      "step": 2563700
    },
    {
      "epoch": 4.1955840092168915,
      "grad_norm": 0.2013462781906128,
      "learning_rate": 1.6194329316104717e-06,
      "loss": 0.0093,
      "step": 2563720
    },
    {
      "epoch": 4.195616739655545,
      "grad_norm": 0.38096708059310913,
      "learning_rate": 1.6193670393969547e-06,
      "loss": 0.007,
      "step": 2563740
    },
    {
      "epoch": 4.195649470094198,
      "grad_norm": 0.11848483234643936,
      "learning_rate": 1.6193011471834374e-06,
      "loss": 0.0074,
      "step": 2563760
    },
    {
      "epoch": 4.195682200532851,
      "grad_norm": 0.09689509123563766,
      "learning_rate": 1.6192352549699204e-06,
      "loss": 0.0137,
      "step": 2563780
    },
    {
      "epoch": 4.195714930971505,
      "grad_norm": 0.4156533479690552,
      "learning_rate": 1.619169362756403e-06,
      "loss": 0.0092,
      "step": 2563800
    },
    {
      "epoch": 4.1957476614101585,
      "grad_norm": 0.4910154938697815,
      "learning_rate": 1.619103470542886e-06,
      "loss": 0.009,
      "step": 2563820
    },
    {
      "epoch": 4.195780391848811,
      "grad_norm": 0.2895163595676422,
      "learning_rate": 1.619037578329369e-06,
      "loss": 0.0072,
      "step": 2563840
    },
    {
      "epoch": 4.195813122287465,
      "grad_norm": 0.19701558351516724,
      "learning_rate": 1.618971686115852e-06,
      "loss": 0.0103,
      "step": 2563860
    },
    {
      "epoch": 4.195845852726118,
      "grad_norm": 0.15439949929714203,
      "learning_rate": 1.6189057939023347e-06,
      "loss": 0.0082,
      "step": 2563880
    },
    {
      "epoch": 4.195878583164772,
      "grad_norm": 0.04729144647717476,
      "learning_rate": 1.6188399016888176e-06,
      "loss": 0.0074,
      "step": 2563900
    },
    {
      "epoch": 4.195911313603425,
      "grad_norm": 0.2773247957229614,
      "learning_rate": 1.6187740094753004e-06,
      "loss": 0.0096,
      "step": 2563920
    },
    {
      "epoch": 4.195944044042078,
      "grad_norm": 0.1871621012687683,
      "learning_rate": 1.6187081172617833e-06,
      "loss": 0.0123,
      "step": 2563940
    },
    {
      "epoch": 4.195976774480732,
      "grad_norm": 0.05012325569987297,
      "learning_rate": 1.618642225048266e-06,
      "loss": 0.0081,
      "step": 2563960
    },
    {
      "epoch": 4.196009504919385,
      "grad_norm": 0.0628063902258873,
      "learning_rate": 1.618576332834749e-06,
      "loss": 0.0065,
      "step": 2563980
    },
    {
      "epoch": 4.196042235358038,
      "grad_norm": 0.46507665514945984,
      "learning_rate": 1.6185104406212318e-06,
      "loss": 0.0158,
      "step": 2564000
    },
    {
      "epoch": 4.196074965796692,
      "grad_norm": 0.24365520477294922,
      "learning_rate": 1.618444548407715e-06,
      "loss": 0.0082,
      "step": 2564020
    },
    {
      "epoch": 4.1961076962353445,
      "grad_norm": 0.20471669733524323,
      "learning_rate": 1.6183786561941977e-06,
      "loss": 0.0075,
      "step": 2564040
    },
    {
      "epoch": 4.196140426673998,
      "grad_norm": 0.11092328280210495,
      "learning_rate": 1.6183127639806806e-06,
      "loss": 0.0107,
      "step": 2564060
    },
    {
      "epoch": 4.196173157112652,
      "grad_norm": 0.21057915687561035,
      "learning_rate": 1.6182468717671634e-06,
      "loss": 0.0067,
      "step": 2564080
    },
    {
      "epoch": 4.196205887551305,
      "grad_norm": 0.032819122076034546,
      "learning_rate": 1.6181809795536463e-06,
      "loss": 0.0118,
      "step": 2564100
    },
    {
      "epoch": 4.196238617989958,
      "grad_norm": 0.17008036375045776,
      "learning_rate": 1.618115087340129e-06,
      "loss": 0.0091,
      "step": 2564120
    },
    {
      "epoch": 4.196271348428612,
      "grad_norm": 0.22798259556293488,
      "learning_rate": 1.618049195126612e-06,
      "loss": 0.0086,
      "step": 2564140
    },
    {
      "epoch": 4.196304078867265,
      "grad_norm": 0.2388555407524109,
      "learning_rate": 1.6179833029130947e-06,
      "loss": 0.0108,
      "step": 2564160
    },
    {
      "epoch": 4.196336809305918,
      "grad_norm": 0.3253054916858673,
      "learning_rate": 1.6179174106995777e-06,
      "loss": 0.0095,
      "step": 2564180
    },
    {
      "epoch": 4.1963695397445715,
      "grad_norm": 0.4603767693042755,
      "learning_rate": 1.6178515184860609e-06,
      "loss": 0.0108,
      "step": 2564200
    },
    {
      "epoch": 4.196402270183225,
      "grad_norm": 0.3392769694328308,
      "learning_rate": 1.6177856262725436e-06,
      "loss": 0.0101,
      "step": 2564220
    },
    {
      "epoch": 4.196435000621879,
      "grad_norm": 0.2131684124469757,
      "learning_rate": 1.6177197340590263e-06,
      "loss": 0.0086,
      "step": 2564240
    },
    {
      "epoch": 4.196467731060531,
      "grad_norm": 0.10305408388376236,
      "learning_rate": 1.6176538418455093e-06,
      "loss": 0.0106,
      "step": 2564260
    },
    {
      "epoch": 4.196500461499185,
      "grad_norm": 0.11554311215877533,
      "learning_rate": 1.617587949631992e-06,
      "loss": 0.0082,
      "step": 2564280
    },
    {
      "epoch": 4.196533191937839,
      "grad_norm": 0.20131249725818634,
      "learning_rate": 1.617522057418475e-06,
      "loss": 0.0101,
      "step": 2564300
    },
    {
      "epoch": 4.196565922376491,
      "grad_norm": 0.3658001720905304,
      "learning_rate": 1.6174561652049577e-06,
      "loss": 0.0122,
      "step": 2564320
    },
    {
      "epoch": 4.196598652815145,
      "grad_norm": 0.2339421510696411,
      "learning_rate": 1.6173902729914407e-06,
      "loss": 0.0101,
      "step": 2564340
    },
    {
      "epoch": 4.1966313832537985,
      "grad_norm": 0.8083124756813049,
      "learning_rate": 1.6173243807779234e-06,
      "loss": 0.0116,
      "step": 2564360
    },
    {
      "epoch": 4.196664113692452,
      "grad_norm": 0.3625955283641815,
      "learning_rate": 1.6172584885644066e-06,
      "loss": 0.0078,
      "step": 2564380
    },
    {
      "epoch": 4.196696844131105,
      "grad_norm": 0.16903792321681976,
      "learning_rate": 1.6171925963508895e-06,
      "loss": 0.0085,
      "step": 2564400
    },
    {
      "epoch": 4.196729574569758,
      "grad_norm": 0.12771949172019958,
      "learning_rate": 1.6171267041373723e-06,
      "loss": 0.0054,
      "step": 2564420
    },
    {
      "epoch": 4.196762305008412,
      "grad_norm": 0.18336988985538483,
      "learning_rate": 1.6170608119238552e-06,
      "loss": 0.0075,
      "step": 2564440
    },
    {
      "epoch": 4.196795035447065,
      "grad_norm": 0.42524975538253784,
      "learning_rate": 1.616994919710338e-06,
      "loss": 0.0087,
      "step": 2564460
    },
    {
      "epoch": 4.196827765885718,
      "grad_norm": 0.3851957321166992,
      "learning_rate": 1.6169290274968207e-06,
      "loss": 0.008,
      "step": 2564480
    },
    {
      "epoch": 4.196860496324372,
      "grad_norm": 0.2763865292072296,
      "learning_rate": 1.6168631352833036e-06,
      "loss": 0.0097,
      "step": 2564500
    },
    {
      "epoch": 4.1968932267630255,
      "grad_norm": 0.13259364664554596,
      "learning_rate": 1.6167972430697864e-06,
      "loss": 0.0142,
      "step": 2564520
    },
    {
      "epoch": 4.196925957201678,
      "grad_norm": 0.20446908473968506,
      "learning_rate": 1.6167313508562693e-06,
      "loss": 0.007,
      "step": 2564540
    },
    {
      "epoch": 4.196958687640332,
      "grad_norm": 0.2943211793899536,
      "learning_rate": 1.6166654586427525e-06,
      "loss": 0.0101,
      "step": 2564560
    },
    {
      "epoch": 4.196991418078985,
      "grad_norm": 0.20727667212486267,
      "learning_rate": 1.6165995664292352e-06,
      "loss": 0.0068,
      "step": 2564580
    },
    {
      "epoch": 4.197024148517638,
      "grad_norm": 0.4537433087825775,
      "learning_rate": 1.6165336742157182e-06,
      "loss": 0.0077,
      "step": 2564600
    },
    {
      "epoch": 4.197056878956292,
      "grad_norm": 0.2930522561073303,
      "learning_rate": 1.616467782002201e-06,
      "loss": 0.0131,
      "step": 2564620
    },
    {
      "epoch": 4.197089609394945,
      "grad_norm": 0.10139280557632446,
      "learning_rate": 1.6164018897886839e-06,
      "loss": 0.0092,
      "step": 2564640
    },
    {
      "epoch": 4.197122339833599,
      "grad_norm": 0.4734441637992859,
      "learning_rate": 1.6163359975751666e-06,
      "loss": 0.0147,
      "step": 2564660
    },
    {
      "epoch": 4.1971550702722515,
      "grad_norm": 0.06619133800268173,
      "learning_rate": 1.6162701053616494e-06,
      "loss": 0.0114,
      "step": 2564680
    },
    {
      "epoch": 4.197187800710905,
      "grad_norm": 0.1542324274778366,
      "learning_rate": 1.6162042131481323e-06,
      "loss": 0.0074,
      "step": 2564700
    },
    {
      "epoch": 4.197220531149559,
      "grad_norm": 0.1970720738172531,
      "learning_rate": 1.6161383209346155e-06,
      "loss": 0.0086,
      "step": 2564720
    },
    {
      "epoch": 4.197253261588211,
      "grad_norm": 0.1197286918759346,
      "learning_rate": 1.6160724287210982e-06,
      "loss": 0.0084,
      "step": 2564740
    },
    {
      "epoch": 4.197285992026865,
      "grad_norm": 0.24095562100410461,
      "learning_rate": 1.6160065365075812e-06,
      "loss": 0.0111,
      "step": 2564760
    },
    {
      "epoch": 4.197318722465519,
      "grad_norm": 0.20529402792453766,
      "learning_rate": 1.615940644294064e-06,
      "loss": 0.0092,
      "step": 2564780
    },
    {
      "epoch": 4.197351452904172,
      "grad_norm": 0.3719734847545624,
      "learning_rate": 1.6158747520805469e-06,
      "loss": 0.0113,
      "step": 2564800
    },
    {
      "epoch": 4.197384183342825,
      "grad_norm": 0.06239713355898857,
      "learning_rate": 1.6158088598670296e-06,
      "loss": 0.0059,
      "step": 2564820
    },
    {
      "epoch": 4.1974169137814785,
      "grad_norm": 0.0357072651386261,
      "learning_rate": 1.6157429676535126e-06,
      "loss": 0.0084,
      "step": 2564840
    },
    {
      "epoch": 4.197449644220132,
      "grad_norm": 0.1596464365720749,
      "learning_rate": 1.6156770754399953e-06,
      "loss": 0.0081,
      "step": 2564860
    },
    {
      "epoch": 4.197482374658785,
      "grad_norm": 0.11741086095571518,
      "learning_rate": 1.6156111832264782e-06,
      "loss": 0.0094,
      "step": 2564880
    },
    {
      "epoch": 4.197515105097438,
      "grad_norm": 0.05830935388803482,
      "learning_rate": 1.6155452910129612e-06,
      "loss": 0.0084,
      "step": 2564900
    },
    {
      "epoch": 4.197547835536092,
      "grad_norm": 0.5466744899749756,
      "learning_rate": 1.6154793987994441e-06,
      "loss": 0.0138,
      "step": 2564920
    },
    {
      "epoch": 4.197580565974746,
      "grad_norm": 0.30632030963897705,
      "learning_rate": 1.6154135065859269e-06,
      "loss": 0.0092,
      "step": 2564940
    },
    {
      "epoch": 4.197613296413398,
      "grad_norm": 0.4094628393650055,
      "learning_rate": 1.6153476143724098e-06,
      "loss": 0.0068,
      "step": 2564960
    },
    {
      "epoch": 4.197646026852052,
      "grad_norm": 0.15942278504371643,
      "learning_rate": 1.6152817221588926e-06,
      "loss": 0.0104,
      "step": 2564980
    },
    {
      "epoch": 4.1976787572907055,
      "grad_norm": 0.3492216169834137,
      "learning_rate": 1.6152158299453755e-06,
      "loss": 0.0097,
      "step": 2565000
    },
    {
      "epoch": 4.197711487729358,
      "grad_norm": 0.1066063642501831,
      "learning_rate": 1.6151499377318583e-06,
      "loss": 0.0077,
      "step": 2565020
    },
    {
      "epoch": 4.197744218168012,
      "grad_norm": 0.32500484585762024,
      "learning_rate": 1.6150840455183412e-06,
      "loss": 0.0088,
      "step": 2565040
    },
    {
      "epoch": 4.197776948606665,
      "grad_norm": 0.23365262150764465,
      "learning_rate": 1.615018153304824e-06,
      "loss": 0.0094,
      "step": 2565060
    },
    {
      "epoch": 4.197809679045319,
      "grad_norm": 0.3066061735153198,
      "learning_rate": 1.6149522610913071e-06,
      "loss": 0.0074,
      "step": 2565080
    },
    {
      "epoch": 4.197842409483972,
      "grad_norm": 0.1874360591173172,
      "learning_rate": 1.6148863688777899e-06,
      "loss": 0.0102,
      "step": 2565100
    },
    {
      "epoch": 4.197875139922625,
      "grad_norm": 0.16039936244487762,
      "learning_rate": 1.6148204766642728e-06,
      "loss": 0.0055,
      "step": 2565120
    },
    {
      "epoch": 4.197907870361279,
      "grad_norm": 0.2588765323162079,
      "learning_rate": 1.6147545844507556e-06,
      "loss": 0.009,
      "step": 2565140
    },
    {
      "epoch": 4.197940600799932,
      "grad_norm": 0.4259953200817108,
      "learning_rate": 1.6146886922372385e-06,
      "loss": 0.012,
      "step": 2565160
    },
    {
      "epoch": 4.197973331238585,
      "grad_norm": 0.138011634349823,
      "learning_rate": 1.6146228000237212e-06,
      "loss": 0.0143,
      "step": 2565180
    },
    {
      "epoch": 4.198006061677239,
      "grad_norm": 0.09322749823331833,
      "learning_rate": 1.6145569078102042e-06,
      "loss": 0.0092,
      "step": 2565200
    },
    {
      "epoch": 4.1980387921158915,
      "grad_norm": 0.199142724275589,
      "learning_rate": 1.614491015596687e-06,
      "loss": 0.0086,
      "step": 2565220
    },
    {
      "epoch": 4.198071522554545,
      "grad_norm": 0.10403479635715485,
      "learning_rate": 1.6144251233831699e-06,
      "loss": 0.0057,
      "step": 2565240
    },
    {
      "epoch": 4.198104252993199,
      "grad_norm": 0.163699209690094,
      "learning_rate": 1.6143592311696528e-06,
      "loss": 0.0078,
      "step": 2565260
    },
    {
      "epoch": 4.198136983431852,
      "grad_norm": 0.28561970591545105,
      "learning_rate": 1.6142933389561358e-06,
      "loss": 0.0077,
      "step": 2565280
    },
    {
      "epoch": 4.198169713870505,
      "grad_norm": 0.12152916938066483,
      "learning_rate": 1.6142274467426185e-06,
      "loss": 0.0082,
      "step": 2565300
    },
    {
      "epoch": 4.198202444309159,
      "grad_norm": 0.09894222021102905,
      "learning_rate": 1.6141615545291015e-06,
      "loss": 0.0127,
      "step": 2565320
    },
    {
      "epoch": 4.198235174747812,
      "grad_norm": 0.1784931719303131,
      "learning_rate": 1.6140956623155842e-06,
      "loss": 0.009,
      "step": 2565340
    },
    {
      "epoch": 4.198267905186466,
      "grad_norm": 0.22970914840698242,
      "learning_rate": 1.6140297701020672e-06,
      "loss": 0.0061,
      "step": 2565360
    },
    {
      "epoch": 4.1983006356251185,
      "grad_norm": 0.08078210800886154,
      "learning_rate": 1.61396387788855e-06,
      "loss": 0.0122,
      "step": 2565380
    },
    {
      "epoch": 4.198333366063772,
      "grad_norm": 0.13344666361808777,
      "learning_rate": 1.6138979856750329e-06,
      "loss": 0.0089,
      "step": 2565400
    },
    {
      "epoch": 4.198366096502426,
      "grad_norm": 0.07606218755245209,
      "learning_rate": 1.6138320934615156e-06,
      "loss": 0.0087,
      "step": 2565420
    },
    {
      "epoch": 4.198398826941078,
      "grad_norm": 0.22388234734535217,
      "learning_rate": 1.6137662012479988e-06,
      "loss": 0.0067,
      "step": 2565440
    },
    {
      "epoch": 4.198431557379732,
      "grad_norm": 0.18691007792949677,
      "learning_rate": 1.6137003090344815e-06,
      "loss": 0.0075,
      "step": 2565460
    },
    {
      "epoch": 4.1984642878183855,
      "grad_norm": 0.16861078143119812,
      "learning_rate": 1.6136344168209645e-06,
      "loss": 0.0097,
      "step": 2565480
    },
    {
      "epoch": 4.198497018257038,
      "grad_norm": 0.12347337603569031,
      "learning_rate": 1.6135685246074472e-06,
      "loss": 0.008,
      "step": 2565500
    },
    {
      "epoch": 4.198529748695692,
      "grad_norm": 0.40046846866607666,
      "learning_rate": 1.6135026323939301e-06,
      "loss": 0.0065,
      "step": 2565520
    },
    {
      "epoch": 4.198562479134345,
      "grad_norm": 0.4538555145263672,
      "learning_rate": 1.6134367401804129e-06,
      "loss": 0.0105,
      "step": 2565540
    },
    {
      "epoch": 4.198595209572999,
      "grad_norm": 0.4038727581501007,
      "learning_rate": 1.6133708479668958e-06,
      "loss": 0.0122,
      "step": 2565560
    },
    {
      "epoch": 4.198627940011652,
      "grad_norm": 0.1547846645116806,
      "learning_rate": 1.6133049557533786e-06,
      "loss": 0.0054,
      "step": 2565580
    },
    {
      "epoch": 4.198660670450305,
      "grad_norm": 0.17398852109909058,
      "learning_rate": 1.6132390635398617e-06,
      "loss": 0.0072,
      "step": 2565600
    },
    {
      "epoch": 4.198693400888959,
      "grad_norm": 0.19114500284194946,
      "learning_rate": 1.6131731713263447e-06,
      "loss": 0.0069,
      "step": 2565620
    },
    {
      "epoch": 4.198726131327612,
      "grad_norm": 0.24609005451202393,
      "learning_rate": 1.6131072791128274e-06,
      "loss": 0.0097,
      "step": 2565640
    },
    {
      "epoch": 4.198758861766265,
      "grad_norm": 0.25830262899398804,
      "learning_rate": 1.6130413868993102e-06,
      "loss": 0.0079,
      "step": 2565660
    },
    {
      "epoch": 4.198791592204919,
      "grad_norm": 0.09190791100263596,
      "learning_rate": 1.6129754946857931e-06,
      "loss": 0.0097,
      "step": 2565680
    },
    {
      "epoch": 4.198824322643572,
      "grad_norm": 0.26800084114074707,
      "learning_rate": 1.6129096024722759e-06,
      "loss": 0.0099,
      "step": 2565700
    },
    {
      "epoch": 4.198857053082225,
      "grad_norm": 0.19580639898777008,
      "learning_rate": 1.6128437102587588e-06,
      "loss": 0.0068,
      "step": 2565720
    },
    {
      "epoch": 4.198889783520879,
      "grad_norm": 0.5904344916343689,
      "learning_rate": 1.6127778180452416e-06,
      "loss": 0.0112,
      "step": 2565740
    },
    {
      "epoch": 4.198922513959532,
      "grad_norm": 0.22230669856071472,
      "learning_rate": 1.6127119258317245e-06,
      "loss": 0.0086,
      "step": 2565760
    },
    {
      "epoch": 4.198955244398185,
      "grad_norm": 0.2536504566669464,
      "learning_rate": 1.6126460336182077e-06,
      "loss": 0.0071,
      "step": 2565780
    },
    {
      "epoch": 4.198987974836839,
      "grad_norm": 0.32039856910705566,
      "learning_rate": 1.6125801414046904e-06,
      "loss": 0.0165,
      "step": 2565800
    },
    {
      "epoch": 4.199020705275492,
      "grad_norm": 0.4075160324573517,
      "learning_rate": 1.6125142491911734e-06,
      "loss": 0.0108,
      "step": 2565820
    },
    {
      "epoch": 4.199053435714146,
      "grad_norm": 0.28157898783683777,
      "learning_rate": 1.612448356977656e-06,
      "loss": 0.0064,
      "step": 2565840
    },
    {
      "epoch": 4.1990861661527985,
      "grad_norm": 0.18220417201519012,
      "learning_rate": 1.612382464764139e-06,
      "loss": 0.0081,
      "step": 2565860
    },
    {
      "epoch": 4.199118896591452,
      "grad_norm": 0.25226542353630066,
      "learning_rate": 1.6123165725506218e-06,
      "loss": 0.0098,
      "step": 2565880
    },
    {
      "epoch": 4.199151627030106,
      "grad_norm": 0.33577245473861694,
      "learning_rate": 1.6122506803371045e-06,
      "loss": 0.0149,
      "step": 2565900
    },
    {
      "epoch": 4.199184357468758,
      "grad_norm": 0.24608148634433746,
      "learning_rate": 1.6121847881235875e-06,
      "loss": 0.0086,
      "step": 2565920
    },
    {
      "epoch": 4.199217087907412,
      "grad_norm": 0.24623435735702515,
      "learning_rate": 1.6121188959100702e-06,
      "loss": 0.0082,
      "step": 2565940
    },
    {
      "epoch": 4.199249818346066,
      "grad_norm": 0.15954433381557465,
      "learning_rate": 1.6120530036965534e-06,
      "loss": 0.0074,
      "step": 2565960
    },
    {
      "epoch": 4.199282548784719,
      "grad_norm": 0.6872597336769104,
      "learning_rate": 1.6119871114830363e-06,
      "loss": 0.0149,
      "step": 2565980
    },
    {
      "epoch": 4.199315279223372,
      "grad_norm": 0.2512635588645935,
      "learning_rate": 1.611921219269519e-06,
      "loss": 0.0079,
      "step": 2566000
    },
    {
      "epoch": 4.1993480096620255,
      "grad_norm": 0.24364112317562103,
      "learning_rate": 1.611855327056002e-06,
      "loss": 0.0082,
      "step": 2566020
    },
    {
      "epoch": 4.199380740100679,
      "grad_norm": 0.26730215549468994,
      "learning_rate": 1.6117894348424848e-06,
      "loss": 0.0079,
      "step": 2566040
    },
    {
      "epoch": 4.199413470539332,
      "grad_norm": 0.6594322323799133,
      "learning_rate": 1.6117235426289677e-06,
      "loss": 0.0121,
      "step": 2566060
    },
    {
      "epoch": 4.199446200977985,
      "grad_norm": 0.1267094612121582,
      "learning_rate": 1.6116576504154505e-06,
      "loss": 0.0068,
      "step": 2566080
    },
    {
      "epoch": 4.199478931416639,
      "grad_norm": 0.2047104835510254,
      "learning_rate": 1.6115917582019334e-06,
      "loss": 0.0099,
      "step": 2566100
    },
    {
      "epoch": 4.199511661855293,
      "grad_norm": 0.5275930166244507,
      "learning_rate": 1.6115258659884162e-06,
      "loss": 0.0068,
      "step": 2566120
    },
    {
      "epoch": 4.199544392293945,
      "grad_norm": 0.09263282269239426,
      "learning_rate": 1.6114599737748993e-06,
      "loss": 0.0076,
      "step": 2566140
    },
    {
      "epoch": 4.199577122732599,
      "grad_norm": 0.14164026081562042,
      "learning_rate": 1.611394081561382e-06,
      "loss": 0.0084,
      "step": 2566160
    },
    {
      "epoch": 4.1996098531712525,
      "grad_norm": 0.18455448746681213,
      "learning_rate": 1.611328189347865e-06,
      "loss": 0.0073,
      "step": 2566180
    },
    {
      "epoch": 4.199642583609905,
      "grad_norm": 0.3000550866127014,
      "learning_rate": 1.6112622971343477e-06,
      "loss": 0.0077,
      "step": 2566200
    },
    {
      "epoch": 4.199675314048559,
      "grad_norm": 0.1091860756278038,
      "learning_rate": 1.6111964049208307e-06,
      "loss": 0.0104,
      "step": 2566220
    },
    {
      "epoch": 4.199708044487212,
      "grad_norm": 0.06717702746391296,
      "learning_rate": 1.6111305127073134e-06,
      "loss": 0.0074,
      "step": 2566240
    },
    {
      "epoch": 4.199740774925866,
      "grad_norm": 0.2286812961101532,
      "learning_rate": 1.6110646204937964e-06,
      "loss": 0.0065,
      "step": 2566260
    },
    {
      "epoch": 4.199773505364519,
      "grad_norm": 0.1920487880706787,
      "learning_rate": 1.6109987282802791e-06,
      "loss": 0.0126,
      "step": 2566280
    },
    {
      "epoch": 4.199806235803172,
      "grad_norm": 0.36626675724983215,
      "learning_rate": 1.610932836066762e-06,
      "loss": 0.0086,
      "step": 2566300
    },
    {
      "epoch": 4.199838966241826,
      "grad_norm": 0.10205435752868652,
      "learning_rate": 1.610866943853245e-06,
      "loss": 0.014,
      "step": 2566320
    },
    {
      "epoch": 4.1998716966804785,
      "grad_norm": 0.285045862197876,
      "learning_rate": 1.610801051639728e-06,
      "loss": 0.0119,
      "step": 2566340
    },
    {
      "epoch": 4.199904427119132,
      "grad_norm": 0.35854122042655945,
      "learning_rate": 1.6107351594262107e-06,
      "loss": 0.0102,
      "step": 2566360
    },
    {
      "epoch": 4.199937157557786,
      "grad_norm": 0.3212631642818451,
      "learning_rate": 1.6106692672126937e-06,
      "loss": 0.0062,
      "step": 2566380
    },
    {
      "epoch": 4.199969887996439,
      "grad_norm": 0.24275965988636017,
      "learning_rate": 1.6106033749991764e-06,
      "loss": 0.0068,
      "step": 2566400
    },
    {
      "epoch": 4.200002618435092,
      "grad_norm": 0.43249809741973877,
      "learning_rate": 1.6105374827856594e-06,
      "loss": 0.0073,
      "step": 2566420
    },
    {
      "epoch": 4.200035348873746,
      "grad_norm": 0.4553326666355133,
      "learning_rate": 1.6104715905721421e-06,
      "loss": 0.0103,
      "step": 2566440
    },
    {
      "epoch": 4.200068079312399,
      "grad_norm": 0.08085539937019348,
      "learning_rate": 1.610405698358625e-06,
      "loss": 0.0121,
      "step": 2566460
    },
    {
      "epoch": 4.200100809751052,
      "grad_norm": 0.1769091933965683,
      "learning_rate": 1.610339806145108e-06,
      "loss": 0.012,
      "step": 2566480
    },
    {
      "epoch": 4.2001335401897055,
      "grad_norm": 0.348555326461792,
      "learning_rate": 1.610273913931591e-06,
      "loss": 0.0082,
      "step": 2566500
    },
    {
      "epoch": 4.200166270628359,
      "grad_norm": 0.527396559715271,
      "learning_rate": 1.6102080217180737e-06,
      "loss": 0.0086,
      "step": 2566520
    },
    {
      "epoch": 4.200199001067013,
      "grad_norm": 0.12889093160629272,
      "learning_rate": 1.6101421295045567e-06,
      "loss": 0.0117,
      "step": 2566540
    },
    {
      "epoch": 4.200231731505665,
      "grad_norm": 0.08755629509687424,
      "learning_rate": 1.6100762372910394e-06,
      "loss": 0.0097,
      "step": 2566560
    },
    {
      "epoch": 4.200264461944319,
      "grad_norm": 0.16763970255851746,
      "learning_rate": 1.6100103450775223e-06,
      "loss": 0.007,
      "step": 2566580
    },
    {
      "epoch": 4.200297192382973,
      "grad_norm": 0.14259131252765656,
      "learning_rate": 1.609944452864005e-06,
      "loss": 0.0082,
      "step": 2566600
    },
    {
      "epoch": 4.200329922821625,
      "grad_norm": 0.2896086275577545,
      "learning_rate": 1.609878560650488e-06,
      "loss": 0.0077,
      "step": 2566620
    },
    {
      "epoch": 4.200362653260279,
      "grad_norm": 0.09988432377576828,
      "learning_rate": 1.6098126684369708e-06,
      "loss": 0.0095,
      "step": 2566640
    },
    {
      "epoch": 4.2003953836989325,
      "grad_norm": 0.39470434188842773,
      "learning_rate": 1.609746776223454e-06,
      "loss": 0.008,
      "step": 2566660
    },
    {
      "epoch": 4.200428114137586,
      "grad_norm": 0.5122210383415222,
      "learning_rate": 1.6096808840099367e-06,
      "loss": 0.0071,
      "step": 2566680
    },
    {
      "epoch": 4.200460844576239,
      "grad_norm": 0.10678644478321075,
      "learning_rate": 1.6096149917964196e-06,
      "loss": 0.0081,
      "step": 2566700
    },
    {
      "epoch": 4.200493575014892,
      "grad_norm": 0.1781076192855835,
      "learning_rate": 1.6095490995829024e-06,
      "loss": 0.0085,
      "step": 2566720
    },
    {
      "epoch": 4.200526305453546,
      "grad_norm": 0.06718593835830688,
      "learning_rate": 1.6094832073693853e-06,
      "loss": 0.0077,
      "step": 2566740
    },
    {
      "epoch": 4.200559035892199,
      "grad_norm": 0.1622910350561142,
      "learning_rate": 1.609417315155868e-06,
      "loss": 0.0115,
      "step": 2566760
    },
    {
      "epoch": 4.200591766330852,
      "grad_norm": 0.1535528600215912,
      "learning_rate": 1.609351422942351e-06,
      "loss": 0.0114,
      "step": 2566780
    },
    {
      "epoch": 4.200624496769506,
      "grad_norm": 0.2480749487876892,
      "learning_rate": 1.6092855307288338e-06,
      "loss": 0.0108,
      "step": 2566800
    },
    {
      "epoch": 4.2006572272081595,
      "grad_norm": 0.1906326562166214,
      "learning_rate": 1.6092196385153167e-06,
      "loss": 0.0075,
      "step": 2566820
    },
    {
      "epoch": 4.200689957646812,
      "grad_norm": 0.7304900884628296,
      "learning_rate": 1.6091537463017999e-06,
      "loss": 0.0053,
      "step": 2566840
    },
    {
      "epoch": 4.200722688085466,
      "grad_norm": 0.21674661338329315,
      "learning_rate": 1.6090878540882826e-06,
      "loss": 0.0048,
      "step": 2566860
    },
    {
      "epoch": 4.200755418524119,
      "grad_norm": 0.26867976784706116,
      "learning_rate": 1.6090219618747653e-06,
      "loss": 0.0118,
      "step": 2566880
    },
    {
      "epoch": 4.200788148962772,
      "grad_norm": 0.1759447306394577,
      "learning_rate": 1.6089560696612483e-06,
      "loss": 0.0094,
      "step": 2566900
    },
    {
      "epoch": 4.200820879401426,
      "grad_norm": 0.330333948135376,
      "learning_rate": 1.608890177447731e-06,
      "loss": 0.0049,
      "step": 2566920
    },
    {
      "epoch": 4.200853609840079,
      "grad_norm": 0.15110556781291962,
      "learning_rate": 1.608824285234214e-06,
      "loss": 0.0091,
      "step": 2566940
    },
    {
      "epoch": 4.200886340278732,
      "grad_norm": 0.39945054054260254,
      "learning_rate": 1.6087583930206967e-06,
      "loss": 0.0107,
      "step": 2566960
    },
    {
      "epoch": 4.200919070717386,
      "grad_norm": 0.19987404346466064,
      "learning_rate": 1.6086925008071797e-06,
      "loss": 0.0081,
      "step": 2566980
    },
    {
      "epoch": 4.200951801156039,
      "grad_norm": 0.36145058274269104,
      "learning_rate": 1.6086266085936624e-06,
      "loss": 0.0128,
      "step": 2567000
    },
    {
      "epoch": 4.200984531594693,
      "grad_norm": 0.11059459298849106,
      "learning_rate": 1.6085607163801456e-06,
      "loss": 0.0074,
      "step": 2567020
    },
    {
      "epoch": 4.2010172620333455,
      "grad_norm": 0.41633304953575134,
      "learning_rate": 1.6084948241666285e-06,
      "loss": 0.0093,
      "step": 2567040
    },
    {
      "epoch": 4.201049992471999,
      "grad_norm": 0.368086576461792,
      "learning_rate": 1.6084289319531113e-06,
      "loss": 0.008,
      "step": 2567060
    },
    {
      "epoch": 4.201082722910653,
      "grad_norm": 0.13363084197044373,
      "learning_rate": 1.6083630397395942e-06,
      "loss": 0.0096,
      "step": 2567080
    },
    {
      "epoch": 4.201115453349305,
      "grad_norm": 0.15162688493728638,
      "learning_rate": 1.608297147526077e-06,
      "loss": 0.0058,
      "step": 2567100
    },
    {
      "epoch": 4.201148183787959,
      "grad_norm": 0.13381469249725342,
      "learning_rate": 1.6082312553125597e-06,
      "loss": 0.008,
      "step": 2567120
    },
    {
      "epoch": 4.2011809142266126,
      "grad_norm": 0.2512633800506592,
      "learning_rate": 1.6081653630990427e-06,
      "loss": 0.01,
      "step": 2567140
    },
    {
      "epoch": 4.201213644665266,
      "grad_norm": 0.27081021666526794,
      "learning_rate": 1.6080994708855254e-06,
      "loss": 0.008,
      "step": 2567160
    },
    {
      "epoch": 4.201246375103919,
      "grad_norm": 0.19679595530033112,
      "learning_rate": 1.6080335786720083e-06,
      "loss": 0.0063,
      "step": 2567180
    },
    {
      "epoch": 4.2012791055425724,
      "grad_norm": 0.2563166618347168,
      "learning_rate": 1.6079676864584915e-06,
      "loss": 0.0094,
      "step": 2567200
    },
    {
      "epoch": 4.201311835981226,
      "grad_norm": 0.15176904201507568,
      "learning_rate": 1.6079017942449743e-06,
      "loss": 0.0106,
      "step": 2567220
    },
    {
      "epoch": 4.201344566419879,
      "grad_norm": 0.3415049910545349,
      "learning_rate": 1.6078359020314572e-06,
      "loss": 0.0146,
      "step": 2567240
    },
    {
      "epoch": 4.201377296858532,
      "grad_norm": 0.07228453457355499,
      "learning_rate": 1.60777000981794e-06,
      "loss": 0.0082,
      "step": 2567260
    },
    {
      "epoch": 4.201410027297186,
      "grad_norm": 0.08712167292833328,
      "learning_rate": 1.6077041176044229e-06,
      "loss": 0.0084,
      "step": 2567280
    },
    {
      "epoch": 4.2014427577358395,
      "grad_norm": 0.1815863847732544,
      "learning_rate": 1.6076382253909056e-06,
      "loss": 0.0083,
      "step": 2567300
    },
    {
      "epoch": 4.201475488174492,
      "grad_norm": 0.314157634973526,
      "learning_rate": 1.6075723331773884e-06,
      "loss": 0.0111,
      "step": 2567320
    },
    {
      "epoch": 4.201508218613146,
      "grad_norm": 0.09846344590187073,
      "learning_rate": 1.6075064409638713e-06,
      "loss": 0.0053,
      "step": 2567340
    },
    {
      "epoch": 4.201540949051799,
      "grad_norm": 0.6811351776123047,
      "learning_rate": 1.6074405487503545e-06,
      "loss": 0.0132,
      "step": 2567360
    },
    {
      "epoch": 4.201573679490452,
      "grad_norm": 0.09374053031206131,
      "learning_rate": 1.6073746565368372e-06,
      "loss": 0.0103,
      "step": 2567380
    },
    {
      "epoch": 4.201606409929106,
      "grad_norm": 0.2992241382598877,
      "learning_rate": 1.6073087643233202e-06,
      "loss": 0.0096,
      "step": 2567400
    },
    {
      "epoch": 4.201639140367759,
      "grad_norm": 0.09772404283285141,
      "learning_rate": 1.607242872109803e-06,
      "loss": 0.0088,
      "step": 2567420
    },
    {
      "epoch": 4.201671870806413,
      "grad_norm": 0.24178090691566467,
      "learning_rate": 1.6071769798962859e-06,
      "loss": 0.0094,
      "step": 2567440
    },
    {
      "epoch": 4.201704601245066,
      "grad_norm": 0.18465736508369446,
      "learning_rate": 1.6071110876827686e-06,
      "loss": 0.0086,
      "step": 2567460
    },
    {
      "epoch": 4.201737331683719,
      "grad_norm": 0.2715727388858795,
      "learning_rate": 1.6070451954692516e-06,
      "loss": 0.0114,
      "step": 2567480
    },
    {
      "epoch": 4.201770062122373,
      "grad_norm": 0.5832329988479614,
      "learning_rate": 1.6069793032557343e-06,
      "loss": 0.0112,
      "step": 2567500
    },
    {
      "epoch": 4.2018027925610255,
      "grad_norm": 0.3120434284210205,
      "learning_rate": 1.6069134110422173e-06,
      "loss": 0.0134,
      "step": 2567520
    },
    {
      "epoch": 4.201835522999679,
      "grad_norm": 0.23410305380821228,
      "learning_rate": 1.6068475188287002e-06,
      "loss": 0.0088,
      "step": 2567540
    },
    {
      "epoch": 4.201868253438333,
      "grad_norm": 0.5916516184806824,
      "learning_rate": 1.6067816266151832e-06,
      "loss": 0.009,
      "step": 2567560
    },
    {
      "epoch": 4.201900983876986,
      "grad_norm": 0.07765837758779526,
      "learning_rate": 1.606715734401666e-06,
      "loss": 0.0078,
      "step": 2567580
    },
    {
      "epoch": 4.201933714315639,
      "grad_norm": 0.4339340925216675,
      "learning_rate": 1.6066498421881488e-06,
      "loss": 0.0109,
      "step": 2567600
    },
    {
      "epoch": 4.201966444754293,
      "grad_norm": 0.36013859510421753,
      "learning_rate": 1.6065839499746316e-06,
      "loss": 0.0085,
      "step": 2567620
    },
    {
      "epoch": 4.201999175192946,
      "grad_norm": 0.1323661059141159,
      "learning_rate": 1.6065180577611145e-06,
      "loss": 0.0132,
      "step": 2567640
    },
    {
      "epoch": 4.202031905631599,
      "grad_norm": 0.29024457931518555,
      "learning_rate": 1.6064521655475973e-06,
      "loss": 0.0065,
      "step": 2567660
    },
    {
      "epoch": 4.2020646360702525,
      "grad_norm": 0.44897523522377014,
      "learning_rate": 1.6063862733340802e-06,
      "loss": 0.0067,
      "step": 2567680
    },
    {
      "epoch": 4.202097366508906,
      "grad_norm": 0.2557968497276306,
      "learning_rate": 1.606320381120563e-06,
      "loss": 0.0109,
      "step": 2567700
    },
    {
      "epoch": 4.20213009694756,
      "grad_norm": 0.09265433996915817,
      "learning_rate": 1.6062544889070461e-06,
      "loss": 0.0124,
      "step": 2567720
    },
    {
      "epoch": 4.202162827386212,
      "grad_norm": 0.2852216958999634,
      "learning_rate": 1.6061885966935289e-06,
      "loss": 0.0146,
      "step": 2567740
    },
    {
      "epoch": 4.202195557824866,
      "grad_norm": 0.21347187459468842,
      "learning_rate": 1.6061227044800118e-06,
      "loss": 0.0065,
      "step": 2567760
    },
    {
      "epoch": 4.20222828826352,
      "grad_norm": 0.19763332605361938,
      "learning_rate": 1.6060568122664946e-06,
      "loss": 0.009,
      "step": 2567780
    },
    {
      "epoch": 4.202261018702172,
      "grad_norm": 0.2718861699104309,
      "learning_rate": 1.6059909200529775e-06,
      "loss": 0.0106,
      "step": 2567800
    },
    {
      "epoch": 4.202293749140826,
      "grad_norm": 0.2918863892555237,
      "learning_rate": 1.6059250278394603e-06,
      "loss": 0.0133,
      "step": 2567820
    },
    {
      "epoch": 4.2023264795794795,
      "grad_norm": 0.9077256917953491,
      "learning_rate": 1.6058591356259432e-06,
      "loss": 0.0125,
      "step": 2567840
    },
    {
      "epoch": 4.202359210018133,
      "grad_norm": 0.11156340688467026,
      "learning_rate": 1.605793243412426e-06,
      "loss": 0.0084,
      "step": 2567860
    },
    {
      "epoch": 4.202391940456786,
      "grad_norm": 0.16427655518054962,
      "learning_rate": 1.605727351198909e-06,
      "loss": 0.014,
      "step": 2567880
    },
    {
      "epoch": 4.202424670895439,
      "grad_norm": 0.18470363318920135,
      "learning_rate": 1.6056614589853918e-06,
      "loss": 0.0076,
      "step": 2567900
    },
    {
      "epoch": 4.202457401334093,
      "grad_norm": 0.060281623154878616,
      "learning_rate": 1.6055955667718748e-06,
      "loss": 0.0134,
      "step": 2567920
    },
    {
      "epoch": 4.202490131772746,
      "grad_norm": 0.1808042675256729,
      "learning_rate": 1.6055296745583575e-06,
      "loss": 0.0062,
      "step": 2567940
    },
    {
      "epoch": 4.202522862211399,
      "grad_norm": 0.7698687314987183,
      "learning_rate": 1.6054637823448405e-06,
      "loss": 0.0132,
      "step": 2567960
    },
    {
      "epoch": 4.202555592650053,
      "grad_norm": 0.2729038894176483,
      "learning_rate": 1.6053978901313232e-06,
      "loss": 0.0111,
      "step": 2567980
    },
    {
      "epoch": 4.2025883230887064,
      "grad_norm": 0.2625955045223236,
      "learning_rate": 1.6053319979178062e-06,
      "loss": 0.0123,
      "step": 2568000
    },
    {
      "epoch": 4.202621053527359,
      "grad_norm": 0.3205689489841461,
      "learning_rate": 1.605266105704289e-06,
      "loss": 0.0085,
      "step": 2568020
    },
    {
      "epoch": 4.202653783966013,
      "grad_norm": 0.1214631050825119,
      "learning_rate": 1.6052002134907719e-06,
      "loss": 0.0089,
      "step": 2568040
    },
    {
      "epoch": 4.202686514404666,
      "grad_norm": 0.11487498879432678,
      "learning_rate": 1.605134321277255e-06,
      "loss": 0.0101,
      "step": 2568060
    },
    {
      "epoch": 4.202719244843319,
      "grad_norm": 0.20957134664058685,
      "learning_rate": 1.6050684290637378e-06,
      "loss": 0.0101,
      "step": 2568080
    },
    {
      "epoch": 4.202751975281973,
      "grad_norm": 0.41936254501342773,
      "learning_rate": 1.6050025368502205e-06,
      "loss": 0.0089,
      "step": 2568100
    },
    {
      "epoch": 4.202784705720626,
      "grad_norm": 0.10868123173713684,
      "learning_rate": 1.6049366446367035e-06,
      "loss": 0.0079,
      "step": 2568120
    },
    {
      "epoch": 4.20281743615928,
      "grad_norm": 0.1509358286857605,
      "learning_rate": 1.6048707524231862e-06,
      "loss": 0.0071,
      "step": 2568140
    },
    {
      "epoch": 4.2028501665979325,
      "grad_norm": 0.10336264222860336,
      "learning_rate": 1.6048048602096692e-06,
      "loss": 0.0079,
      "step": 2568160
    },
    {
      "epoch": 4.202882897036586,
      "grad_norm": 0.2788877785205841,
      "learning_rate": 1.604738967996152e-06,
      "loss": 0.0092,
      "step": 2568180
    },
    {
      "epoch": 4.20291562747524,
      "grad_norm": 0.2746908366680145,
      "learning_rate": 1.6046730757826349e-06,
      "loss": 0.0084,
      "step": 2568200
    },
    {
      "epoch": 4.202948357913892,
      "grad_norm": 0.20278392732143402,
      "learning_rate": 1.6046071835691176e-06,
      "loss": 0.0068,
      "step": 2568220
    },
    {
      "epoch": 4.202981088352546,
      "grad_norm": 0.6148506999015808,
      "learning_rate": 1.6045412913556008e-06,
      "loss": 0.0071,
      "step": 2568240
    },
    {
      "epoch": 4.2030138187912,
      "grad_norm": 0.21531763672828674,
      "learning_rate": 1.6044753991420837e-06,
      "loss": 0.0108,
      "step": 2568260
    },
    {
      "epoch": 4.203046549229853,
      "grad_norm": 0.36009976267814636,
      "learning_rate": 1.6044095069285664e-06,
      "loss": 0.0111,
      "step": 2568280
    },
    {
      "epoch": 4.203079279668506,
      "grad_norm": 0.19517788290977478,
      "learning_rate": 1.6043436147150492e-06,
      "loss": 0.0073,
      "step": 2568300
    },
    {
      "epoch": 4.2031120101071595,
      "grad_norm": 0.08164974302053452,
      "learning_rate": 1.6042777225015321e-06,
      "loss": 0.007,
      "step": 2568320
    },
    {
      "epoch": 4.203144740545813,
      "grad_norm": 0.21419447660446167,
      "learning_rate": 1.6042118302880149e-06,
      "loss": 0.0128,
      "step": 2568340
    },
    {
      "epoch": 4.203177470984466,
      "grad_norm": 0.07627562433481216,
      "learning_rate": 1.6041459380744978e-06,
      "loss": 0.0094,
      "step": 2568360
    },
    {
      "epoch": 4.203210201423119,
      "grad_norm": 0.4042874872684479,
      "learning_rate": 1.6040800458609806e-06,
      "loss": 0.0075,
      "step": 2568380
    },
    {
      "epoch": 4.203242931861773,
      "grad_norm": 0.226464182138443,
      "learning_rate": 1.6040141536474635e-06,
      "loss": 0.0068,
      "step": 2568400
    },
    {
      "epoch": 4.203275662300426,
      "grad_norm": 0.34679949283599854,
      "learning_rate": 1.6039482614339467e-06,
      "loss": 0.0105,
      "step": 2568420
    },
    {
      "epoch": 4.203308392739079,
      "grad_norm": 0.09733112901449203,
      "learning_rate": 1.6038823692204294e-06,
      "loss": 0.0061,
      "step": 2568440
    },
    {
      "epoch": 4.203341123177733,
      "grad_norm": 0.24656996130943298,
      "learning_rate": 1.6038164770069124e-06,
      "loss": 0.0081,
      "step": 2568460
    },
    {
      "epoch": 4.2033738536163865,
      "grad_norm": 0.24423746764659882,
      "learning_rate": 1.6037505847933951e-06,
      "loss": 0.007,
      "step": 2568480
    },
    {
      "epoch": 4.203406584055039,
      "grad_norm": 0.16859282553195953,
      "learning_rate": 1.603684692579878e-06,
      "loss": 0.0077,
      "step": 2568500
    },
    {
      "epoch": 4.203439314493693,
      "grad_norm": 0.15384019911289215,
      "learning_rate": 1.6036188003663608e-06,
      "loss": 0.0079,
      "step": 2568520
    },
    {
      "epoch": 4.203472044932346,
      "grad_norm": 3.0852363109588623,
      "learning_rate": 1.6035529081528435e-06,
      "loss": 0.0113,
      "step": 2568540
    },
    {
      "epoch": 4.203504775371,
      "grad_norm": 0.1274716556072235,
      "learning_rate": 1.6034870159393265e-06,
      "loss": 0.0089,
      "step": 2568560
    },
    {
      "epoch": 4.203537505809653,
      "grad_norm": 0.2281140834093094,
      "learning_rate": 1.6034211237258092e-06,
      "loss": 0.0062,
      "step": 2568580
    },
    {
      "epoch": 4.203570236248306,
      "grad_norm": 0.684769332408905,
      "learning_rate": 1.6033552315122924e-06,
      "loss": 0.0124,
      "step": 2568600
    },
    {
      "epoch": 4.20360296668696,
      "grad_norm": 0.30461129546165466,
      "learning_rate": 1.6032893392987754e-06,
      "loss": 0.0105,
      "step": 2568620
    },
    {
      "epoch": 4.203635697125613,
      "grad_norm": 0.35659265518188477,
      "learning_rate": 1.603223447085258e-06,
      "loss": 0.014,
      "step": 2568640
    },
    {
      "epoch": 4.203668427564266,
      "grad_norm": 0.3447340428829193,
      "learning_rate": 1.603157554871741e-06,
      "loss": 0.0067,
      "step": 2568660
    },
    {
      "epoch": 4.20370115800292,
      "grad_norm": 0.3675774037837982,
      "learning_rate": 1.6030916626582238e-06,
      "loss": 0.0075,
      "step": 2568680
    },
    {
      "epoch": 4.2037338884415725,
      "grad_norm": 0.08725496381521225,
      "learning_rate": 1.6030257704447067e-06,
      "loss": 0.0104,
      "step": 2568700
    },
    {
      "epoch": 4.203766618880226,
      "grad_norm": 0.18877878785133362,
      "learning_rate": 1.6029598782311895e-06,
      "loss": 0.0087,
      "step": 2568720
    },
    {
      "epoch": 4.20379934931888,
      "grad_norm": 0.06475521624088287,
      "learning_rate": 1.6028939860176724e-06,
      "loss": 0.0079,
      "step": 2568740
    },
    {
      "epoch": 4.203832079757533,
      "grad_norm": 0.20506617426872253,
      "learning_rate": 1.6028280938041552e-06,
      "loss": 0.009,
      "step": 2568760
    },
    {
      "epoch": 4.203864810196186,
      "grad_norm": 0.26599404215812683,
      "learning_rate": 1.6027622015906383e-06,
      "loss": 0.0111,
      "step": 2568780
    },
    {
      "epoch": 4.20389754063484,
      "grad_norm": 0.4276164770126343,
      "learning_rate": 1.602696309377121e-06,
      "loss": 0.0073,
      "step": 2568800
    },
    {
      "epoch": 4.203930271073493,
      "grad_norm": 0.158926859498024,
      "learning_rate": 1.602630417163604e-06,
      "loss": 0.0053,
      "step": 2568820
    },
    {
      "epoch": 4.203963001512146,
      "grad_norm": 0.2252940535545349,
      "learning_rate": 1.6025645249500868e-06,
      "loss": 0.0104,
      "step": 2568840
    },
    {
      "epoch": 4.2039957319507995,
      "grad_norm": 0.5763282179832458,
      "learning_rate": 1.6024986327365697e-06,
      "loss": 0.0114,
      "step": 2568860
    },
    {
      "epoch": 4.204028462389453,
      "grad_norm": 0.6378570199012756,
      "learning_rate": 1.6024327405230524e-06,
      "loss": 0.0104,
      "step": 2568880
    },
    {
      "epoch": 4.204061192828107,
      "grad_norm": 0.1564033329486847,
      "learning_rate": 1.6023668483095354e-06,
      "loss": 0.0073,
      "step": 2568900
    },
    {
      "epoch": 4.204093923266759,
      "grad_norm": 0.16368107497692108,
      "learning_rate": 1.6023009560960181e-06,
      "loss": 0.0096,
      "step": 2568920
    },
    {
      "epoch": 4.204126653705413,
      "grad_norm": 0.20181624591350555,
      "learning_rate": 1.6022350638825013e-06,
      "loss": 0.0082,
      "step": 2568940
    },
    {
      "epoch": 4.2041593841440665,
      "grad_norm": 0.18989552557468414,
      "learning_rate": 1.602169171668984e-06,
      "loss": 0.0115,
      "step": 2568960
    },
    {
      "epoch": 4.204192114582719,
      "grad_norm": 0.4566086530685425,
      "learning_rate": 1.602103279455467e-06,
      "loss": 0.0143,
      "step": 2568980
    },
    {
      "epoch": 4.204224845021373,
      "grad_norm": 0.2554766833782196,
      "learning_rate": 1.6020373872419497e-06,
      "loss": 0.0074,
      "step": 2569000
    },
    {
      "epoch": 4.204257575460026,
      "grad_norm": 0.12587161362171173,
      "learning_rate": 1.6019714950284327e-06,
      "loss": 0.0082,
      "step": 2569020
    },
    {
      "epoch": 4.20429030589868,
      "grad_norm": 0.17835041880607605,
      "learning_rate": 1.6019056028149154e-06,
      "loss": 0.0085,
      "step": 2569040
    },
    {
      "epoch": 4.204323036337333,
      "grad_norm": 0.44237691164016724,
      "learning_rate": 1.6018397106013984e-06,
      "loss": 0.0119,
      "step": 2569060
    },
    {
      "epoch": 4.204355766775986,
      "grad_norm": 0.4107399880886078,
      "learning_rate": 1.6017738183878811e-06,
      "loss": 0.0149,
      "step": 2569080
    },
    {
      "epoch": 4.20438849721464,
      "grad_norm": 0.13055913150310516,
      "learning_rate": 1.601707926174364e-06,
      "loss": 0.0084,
      "step": 2569100
    },
    {
      "epoch": 4.204421227653293,
      "grad_norm": 0.19534865021705627,
      "learning_rate": 1.601642033960847e-06,
      "loss": 0.0077,
      "step": 2569120
    },
    {
      "epoch": 4.204453958091946,
      "grad_norm": 0.4085230529308319,
      "learning_rate": 1.60157614174733e-06,
      "loss": 0.0148,
      "step": 2569140
    },
    {
      "epoch": 4.2044866885306,
      "grad_norm": 0.26925885677337646,
      "learning_rate": 1.6015102495338127e-06,
      "loss": 0.0098,
      "step": 2569160
    },
    {
      "epoch": 4.204519418969253,
      "grad_norm": 0.18465378880500793,
      "learning_rate": 1.6014443573202957e-06,
      "loss": 0.0085,
      "step": 2569180
    },
    {
      "epoch": 4.204552149407906,
      "grad_norm": 0.14242957532405853,
      "learning_rate": 1.6013784651067784e-06,
      "loss": 0.0091,
      "step": 2569200
    },
    {
      "epoch": 4.20458487984656,
      "grad_norm": 0.3095526397228241,
      "learning_rate": 1.6013125728932614e-06,
      "loss": 0.0092,
      "step": 2569220
    },
    {
      "epoch": 4.204617610285213,
      "grad_norm": 0.30934441089630127,
      "learning_rate": 1.601246680679744e-06,
      "loss": 0.0085,
      "step": 2569240
    },
    {
      "epoch": 4.204650340723866,
      "grad_norm": 0.2670319676399231,
      "learning_rate": 1.601180788466227e-06,
      "loss": 0.0088,
      "step": 2569260
    },
    {
      "epoch": 4.20468307116252,
      "grad_norm": 0.25633612275123596,
      "learning_rate": 1.6011148962527098e-06,
      "loss": 0.0074,
      "step": 2569280
    },
    {
      "epoch": 4.204715801601173,
      "grad_norm": 0.13782472908496857,
      "learning_rate": 1.601049004039193e-06,
      "loss": 0.0082,
      "step": 2569300
    },
    {
      "epoch": 4.204748532039827,
      "grad_norm": 0.47409409284591675,
      "learning_rate": 1.6009831118256757e-06,
      "loss": 0.0082,
      "step": 2569320
    },
    {
      "epoch": 4.2047812624784795,
      "grad_norm": 0.1644003689289093,
      "learning_rate": 1.6009172196121586e-06,
      "loss": 0.0084,
      "step": 2569340
    },
    {
      "epoch": 4.204813992917133,
      "grad_norm": 0.11744099110364914,
      "learning_rate": 1.6008513273986414e-06,
      "loss": 0.0115,
      "step": 2569360
    },
    {
      "epoch": 4.204846723355787,
      "grad_norm": 0.22070910036563873,
      "learning_rate": 1.6007854351851243e-06,
      "loss": 0.0077,
      "step": 2569380
    },
    {
      "epoch": 4.204879453794439,
      "grad_norm": 0.1536640226840973,
      "learning_rate": 1.600719542971607e-06,
      "loss": 0.0079,
      "step": 2569400
    },
    {
      "epoch": 4.204912184233093,
      "grad_norm": 0.3163706064224243,
      "learning_rate": 1.60065365075809e-06,
      "loss": 0.0122,
      "step": 2569420
    },
    {
      "epoch": 4.204944914671747,
      "grad_norm": 0.4157364070415497,
      "learning_rate": 1.6005877585445728e-06,
      "loss": 0.0142,
      "step": 2569440
    },
    {
      "epoch": 4.2049776451104,
      "grad_norm": 0.10010027885437012,
      "learning_rate": 1.6005218663310557e-06,
      "loss": 0.0078,
      "step": 2569460
    },
    {
      "epoch": 4.205010375549053,
      "grad_norm": 0.14473602175712585,
      "learning_rate": 1.6004559741175389e-06,
      "loss": 0.0064,
      "step": 2569480
    },
    {
      "epoch": 4.2050431059877065,
      "grad_norm": 0.3210604786872864,
      "learning_rate": 1.6003900819040216e-06,
      "loss": 0.0066,
      "step": 2569500
    },
    {
      "epoch": 4.20507583642636,
      "grad_norm": 0.23356066644191742,
      "learning_rate": 1.6003241896905044e-06,
      "loss": 0.0128,
      "step": 2569520
    },
    {
      "epoch": 4.205108566865013,
      "grad_norm": 0.7439905405044556,
      "learning_rate": 1.6002582974769873e-06,
      "loss": 0.0099,
      "step": 2569540
    },
    {
      "epoch": 4.205141297303666,
      "grad_norm": 0.16785605251789093,
      "learning_rate": 1.60019240526347e-06,
      "loss": 0.0102,
      "step": 2569560
    },
    {
      "epoch": 4.20517402774232,
      "grad_norm": 0.19596077501773834,
      "learning_rate": 1.600126513049953e-06,
      "loss": 0.0104,
      "step": 2569580
    },
    {
      "epoch": 4.205206758180974,
      "grad_norm": 0.2932863235473633,
      "learning_rate": 1.6000606208364357e-06,
      "loss": 0.006,
      "step": 2569600
    },
    {
      "epoch": 4.205239488619626,
      "grad_norm": 0.23634767532348633,
      "learning_rate": 1.5999947286229187e-06,
      "loss": 0.0077,
      "step": 2569620
    },
    {
      "epoch": 4.20527221905828,
      "grad_norm": 0.27790290117263794,
      "learning_rate": 1.5999288364094014e-06,
      "loss": 0.008,
      "step": 2569640
    },
    {
      "epoch": 4.2053049494969335,
      "grad_norm": 0.02685662731528282,
      "learning_rate": 1.5998629441958846e-06,
      "loss": 0.0137,
      "step": 2569660
    },
    {
      "epoch": 4.205337679935586,
      "grad_norm": 0.12412849813699722,
      "learning_rate": 1.5997970519823675e-06,
      "loss": 0.0096,
      "step": 2569680
    },
    {
      "epoch": 4.20537041037424,
      "grad_norm": 0.23695120215415955,
      "learning_rate": 1.5997311597688503e-06,
      "loss": 0.0094,
      "step": 2569700
    },
    {
      "epoch": 4.205403140812893,
      "grad_norm": 0.5517207980155945,
      "learning_rate": 1.5996652675553332e-06,
      "loss": 0.0153,
      "step": 2569720
    },
    {
      "epoch": 4.205435871251547,
      "grad_norm": 0.20636014640331268,
      "learning_rate": 1.599599375341816e-06,
      "loss": 0.0078,
      "step": 2569740
    },
    {
      "epoch": 4.2054686016902,
      "grad_norm": 0.25518569350242615,
      "learning_rate": 1.5995334831282987e-06,
      "loss": 0.0129,
      "step": 2569760
    },
    {
      "epoch": 4.205501332128853,
      "grad_norm": 0.12614361941814423,
      "learning_rate": 1.5994675909147817e-06,
      "loss": 0.0099,
      "step": 2569780
    },
    {
      "epoch": 4.205534062567507,
      "grad_norm": 0.14782962203025818,
      "learning_rate": 1.5994016987012644e-06,
      "loss": 0.0063,
      "step": 2569800
    },
    {
      "epoch": 4.2055667930061595,
      "grad_norm": 0.11066178232431412,
      "learning_rate": 1.5993358064877476e-06,
      "loss": 0.0057,
      "step": 2569820
    },
    {
      "epoch": 4.205599523444813,
      "grad_norm": 0.39879459142684937,
      "learning_rate": 1.5992699142742305e-06,
      "loss": 0.0131,
      "step": 2569840
    },
    {
      "epoch": 4.205632253883467,
      "grad_norm": 0.35186222195625305,
      "learning_rate": 1.5992040220607133e-06,
      "loss": 0.0088,
      "step": 2569860
    },
    {
      "epoch": 4.205664984322119,
      "grad_norm": 0.36781272292137146,
      "learning_rate": 1.5991381298471962e-06,
      "loss": 0.0092,
      "step": 2569880
    },
    {
      "epoch": 4.205697714760773,
      "grad_norm": 0.2652580440044403,
      "learning_rate": 1.599072237633679e-06,
      "loss": 0.0077,
      "step": 2569900
    },
    {
      "epoch": 4.205730445199427,
      "grad_norm": 0.07368647307157516,
      "learning_rate": 1.599006345420162e-06,
      "loss": 0.0088,
      "step": 2569920
    },
    {
      "epoch": 4.20576317563808,
      "grad_norm": 0.26910457015037537,
      "learning_rate": 1.5989404532066446e-06,
      "loss": 0.0097,
      "step": 2569940
    },
    {
      "epoch": 4.205795906076733,
      "grad_norm": 0.3251025676727295,
      "learning_rate": 1.5988745609931274e-06,
      "loss": 0.0069,
      "step": 2569960
    },
    {
      "epoch": 4.2058286365153865,
      "grad_norm": 0.1684996485710144,
      "learning_rate": 1.5988086687796103e-06,
      "loss": 0.0077,
      "step": 2569980
    },
    {
      "epoch": 4.20586136695404,
      "grad_norm": 0.26174062490463257,
      "learning_rate": 1.5987427765660935e-06,
      "loss": 0.0078,
      "step": 2570000
    },
    {
      "epoch": 4.205894097392694,
      "grad_norm": 0.8557156324386597,
      "learning_rate": 1.5986768843525762e-06,
      "loss": 0.009,
      "step": 2570020
    },
    {
      "epoch": 4.205926827831346,
      "grad_norm": 0.5376473069190979,
      "learning_rate": 1.5986109921390592e-06,
      "loss": 0.0091,
      "step": 2570040
    },
    {
      "epoch": 4.20595955827,
      "grad_norm": 0.21596182882785797,
      "learning_rate": 1.598545099925542e-06,
      "loss": 0.0109,
      "step": 2570060
    },
    {
      "epoch": 4.205992288708654,
      "grad_norm": 0.4288347661495209,
      "learning_rate": 1.5984792077120249e-06,
      "loss": 0.0055,
      "step": 2570080
    },
    {
      "epoch": 4.206025019147306,
      "grad_norm": 0.6117822527885437,
      "learning_rate": 1.5984133154985076e-06,
      "loss": 0.0094,
      "step": 2570100
    },
    {
      "epoch": 4.20605774958596,
      "grad_norm": 0.4123106598854065,
      "learning_rate": 1.5983474232849906e-06,
      "loss": 0.0142,
      "step": 2570120
    },
    {
      "epoch": 4.2060904800246135,
      "grad_norm": 0.28603580594062805,
      "learning_rate": 1.5982815310714733e-06,
      "loss": 0.0098,
      "step": 2570140
    },
    {
      "epoch": 4.206123210463266,
      "grad_norm": 0.1625417172908783,
      "learning_rate": 1.5982156388579563e-06,
      "loss": 0.0116,
      "step": 2570160
    },
    {
      "epoch": 4.20615594090192,
      "grad_norm": 0.1127619594335556,
      "learning_rate": 1.5981497466444392e-06,
      "loss": 0.0125,
      "step": 2570180
    },
    {
      "epoch": 4.206188671340573,
      "grad_norm": 0.6291455626487732,
      "learning_rate": 1.5980838544309222e-06,
      "loss": 0.0088,
      "step": 2570200
    },
    {
      "epoch": 4.206221401779227,
      "grad_norm": 0.5294998288154602,
      "learning_rate": 1.598017962217405e-06,
      "loss": 0.0086,
      "step": 2570220
    },
    {
      "epoch": 4.20625413221788,
      "grad_norm": 0.48960942029953003,
      "learning_rate": 1.5979520700038879e-06,
      "loss": 0.0105,
      "step": 2570240
    },
    {
      "epoch": 4.206286862656533,
      "grad_norm": 0.45079734921455383,
      "learning_rate": 1.5978861777903706e-06,
      "loss": 0.0121,
      "step": 2570260
    },
    {
      "epoch": 4.206319593095187,
      "grad_norm": 0.1267690658569336,
      "learning_rate": 1.5978202855768535e-06,
      "loss": 0.0162,
      "step": 2570280
    },
    {
      "epoch": 4.20635232353384,
      "grad_norm": 0.16305652260780334,
      "learning_rate": 1.5977543933633363e-06,
      "loss": 0.0094,
      "step": 2570300
    },
    {
      "epoch": 4.206385053972493,
      "grad_norm": 0.058067675679922104,
      "learning_rate": 1.5976885011498192e-06,
      "loss": 0.0069,
      "step": 2570320
    },
    {
      "epoch": 4.206417784411147,
      "grad_norm": 0.234469935297966,
      "learning_rate": 1.597622608936302e-06,
      "loss": 0.0128,
      "step": 2570340
    },
    {
      "epoch": 4.2064505148498,
      "grad_norm": 0.21765069663524628,
      "learning_rate": 1.5975567167227851e-06,
      "loss": 0.0114,
      "step": 2570360
    },
    {
      "epoch": 4.206483245288453,
      "grad_norm": 0.4099350869655609,
      "learning_rate": 1.5974908245092679e-06,
      "loss": 0.0092,
      "step": 2570380
    },
    {
      "epoch": 4.206515975727107,
      "grad_norm": 0.2242080271244049,
      "learning_rate": 1.5974249322957508e-06,
      "loss": 0.0091,
      "step": 2570400
    },
    {
      "epoch": 4.20654870616576,
      "grad_norm": 1.8141945600509644,
      "learning_rate": 1.5973590400822336e-06,
      "loss": 0.0126,
      "step": 2570420
    },
    {
      "epoch": 4.206581436604413,
      "grad_norm": 0.23230354487895966,
      "learning_rate": 1.5972931478687165e-06,
      "loss": 0.0102,
      "step": 2570440
    },
    {
      "epoch": 4.206614167043067,
      "grad_norm": 0.25481733679771423,
      "learning_rate": 1.5972272556551993e-06,
      "loss": 0.0131,
      "step": 2570460
    },
    {
      "epoch": 4.20664689748172,
      "grad_norm": 0.343578040599823,
      "learning_rate": 1.5971613634416822e-06,
      "loss": 0.0069,
      "step": 2570480
    },
    {
      "epoch": 4.206679627920374,
      "grad_norm": 0.28910037875175476,
      "learning_rate": 1.597095471228165e-06,
      "loss": 0.0099,
      "step": 2570500
    },
    {
      "epoch": 4.2067123583590265,
      "grad_norm": 0.23180997371673584,
      "learning_rate": 1.597029579014648e-06,
      "loss": 0.0096,
      "step": 2570520
    },
    {
      "epoch": 4.20674508879768,
      "grad_norm": 0.32424113154411316,
      "learning_rate": 1.5969636868011309e-06,
      "loss": 0.0104,
      "step": 2570540
    },
    {
      "epoch": 4.206777819236334,
      "grad_norm": 0.17282183468341827,
      "learning_rate": 1.5968977945876138e-06,
      "loss": 0.0081,
      "step": 2570560
    },
    {
      "epoch": 4.206810549674986,
      "grad_norm": 0.5923894047737122,
      "learning_rate": 1.5968319023740966e-06,
      "loss": 0.0058,
      "step": 2570580
    },
    {
      "epoch": 4.20684328011364,
      "grad_norm": 0.3488830029964447,
      "learning_rate": 1.5967660101605795e-06,
      "loss": 0.0147,
      "step": 2570600
    },
    {
      "epoch": 4.2068760105522935,
      "grad_norm": 0.2563028335571289,
      "learning_rate": 1.5967001179470622e-06,
      "loss": 0.0098,
      "step": 2570620
    },
    {
      "epoch": 4.206908740990947,
      "grad_norm": 0.27375394105911255,
      "learning_rate": 1.5966342257335452e-06,
      "loss": 0.011,
      "step": 2570640
    },
    {
      "epoch": 4.2069414714296,
      "grad_norm": 0.2631237208843231,
      "learning_rate": 1.596568333520028e-06,
      "loss": 0.0073,
      "step": 2570660
    },
    {
      "epoch": 4.206974201868253,
      "grad_norm": 0.2275298833847046,
      "learning_rate": 1.5965024413065109e-06,
      "loss": 0.0071,
      "step": 2570680
    },
    {
      "epoch": 4.207006932306907,
      "grad_norm": 0.30044347047805786,
      "learning_rate": 1.596436549092994e-06,
      "loss": 0.0094,
      "step": 2570700
    },
    {
      "epoch": 4.20703966274556,
      "grad_norm": 0.5274991393089294,
      "learning_rate": 1.5963706568794768e-06,
      "loss": 0.0119,
      "step": 2570720
    },
    {
      "epoch": 4.207072393184213,
      "grad_norm": 0.2724684476852417,
      "learning_rate": 1.5963047646659595e-06,
      "loss": 0.0069,
      "step": 2570740
    },
    {
      "epoch": 4.207105123622867,
      "grad_norm": 0.11005134135484695,
      "learning_rate": 1.5962388724524425e-06,
      "loss": 0.0105,
      "step": 2570760
    },
    {
      "epoch": 4.2071378540615205,
      "grad_norm": 0.3216192424297333,
      "learning_rate": 1.5961729802389252e-06,
      "loss": 0.0096,
      "step": 2570780
    },
    {
      "epoch": 4.207170584500173,
      "grad_norm": 0.23033230006694794,
      "learning_rate": 1.5961070880254082e-06,
      "loss": 0.0069,
      "step": 2570800
    },
    {
      "epoch": 4.207203314938827,
      "grad_norm": 0.30268383026123047,
      "learning_rate": 1.596041195811891e-06,
      "loss": 0.0093,
      "step": 2570820
    },
    {
      "epoch": 4.20723604537748,
      "grad_norm": 0.6028051972389221,
      "learning_rate": 1.5959753035983739e-06,
      "loss": 0.0136,
      "step": 2570840
    },
    {
      "epoch": 4.207268775816133,
      "grad_norm": 0.15251798927783966,
      "learning_rate": 1.5959094113848566e-06,
      "loss": 0.0063,
      "step": 2570860
    },
    {
      "epoch": 4.207301506254787,
      "grad_norm": 0.5355291366577148,
      "learning_rate": 1.5958435191713398e-06,
      "loss": 0.0093,
      "step": 2570880
    },
    {
      "epoch": 4.20733423669344,
      "grad_norm": 0.1033792495727539,
      "learning_rate": 1.5957776269578227e-06,
      "loss": 0.0096,
      "step": 2570900
    },
    {
      "epoch": 4.207366967132094,
      "grad_norm": 0.13841891288757324,
      "learning_rate": 1.5957117347443055e-06,
      "loss": 0.0107,
      "step": 2570920
    },
    {
      "epoch": 4.207399697570747,
      "grad_norm": 0.6066703200340271,
      "learning_rate": 1.5956458425307884e-06,
      "loss": 0.0122,
      "step": 2570940
    },
    {
      "epoch": 4.2074324280094,
      "grad_norm": 0.22160185873508453,
      "learning_rate": 1.5955799503172711e-06,
      "loss": 0.0091,
      "step": 2570960
    },
    {
      "epoch": 4.207465158448054,
      "grad_norm": 0.29078930616378784,
      "learning_rate": 1.5955140581037539e-06,
      "loss": 0.0072,
      "step": 2570980
    },
    {
      "epoch": 4.2074978888867065,
      "grad_norm": 0.07216782867908478,
      "learning_rate": 1.5954481658902368e-06,
      "loss": 0.0078,
      "step": 2571000
    },
    {
      "epoch": 4.20753061932536,
      "grad_norm": 0.20333530008792877,
      "learning_rate": 1.5953822736767196e-06,
      "loss": 0.0086,
      "step": 2571020
    },
    {
      "epoch": 4.207563349764014,
      "grad_norm": 0.30948638916015625,
      "learning_rate": 1.5953163814632025e-06,
      "loss": 0.0089,
      "step": 2571040
    },
    {
      "epoch": 4.207596080202667,
      "grad_norm": 0.0692911446094513,
      "learning_rate": 1.5952504892496857e-06,
      "loss": 0.0097,
      "step": 2571060
    },
    {
      "epoch": 4.20762881064132,
      "grad_norm": 0.19568908214569092,
      "learning_rate": 1.5951845970361684e-06,
      "loss": 0.0067,
      "step": 2571080
    },
    {
      "epoch": 4.207661541079974,
      "grad_norm": 0.5414146780967712,
      "learning_rate": 1.5951187048226514e-06,
      "loss": 0.0114,
      "step": 2571100
    },
    {
      "epoch": 4.207694271518627,
      "grad_norm": 0.20337797701358795,
      "learning_rate": 1.5950528126091341e-06,
      "loss": 0.0104,
      "step": 2571120
    },
    {
      "epoch": 4.20772700195728,
      "grad_norm": 0.1262156069278717,
      "learning_rate": 1.594986920395617e-06,
      "loss": 0.0117,
      "step": 2571140
    },
    {
      "epoch": 4.2077597323959335,
      "grad_norm": 0.2343445122241974,
      "learning_rate": 1.5949210281820998e-06,
      "loss": 0.0097,
      "step": 2571160
    },
    {
      "epoch": 4.207792462834587,
      "grad_norm": 0.17464040219783783,
      "learning_rate": 1.5948551359685826e-06,
      "loss": 0.0104,
      "step": 2571180
    },
    {
      "epoch": 4.207825193273241,
      "grad_norm": 0.15678392350673676,
      "learning_rate": 1.5947892437550655e-06,
      "loss": 0.0107,
      "step": 2571200
    },
    {
      "epoch": 4.207857923711893,
      "grad_norm": 0.26228973269462585,
      "learning_rate": 1.5947233515415482e-06,
      "loss": 0.0118,
      "step": 2571220
    },
    {
      "epoch": 4.207890654150547,
      "grad_norm": 0.37070053815841675,
      "learning_rate": 1.5946574593280314e-06,
      "loss": 0.0075,
      "step": 2571240
    },
    {
      "epoch": 4.207923384589201,
      "grad_norm": 0.2661745250225067,
      "learning_rate": 1.5945915671145144e-06,
      "loss": 0.0139,
      "step": 2571260
    },
    {
      "epoch": 4.207956115027853,
      "grad_norm": 0.40284594893455505,
      "learning_rate": 1.594525674900997e-06,
      "loss": 0.0124,
      "step": 2571280
    },
    {
      "epoch": 4.207988845466507,
      "grad_norm": 0.48111480474472046,
      "learning_rate": 1.59445978268748e-06,
      "loss": 0.0071,
      "step": 2571300
    },
    {
      "epoch": 4.2080215759051605,
      "grad_norm": 0.3571912348270416,
      "learning_rate": 1.5943938904739628e-06,
      "loss": 0.0102,
      "step": 2571320
    },
    {
      "epoch": 4.208054306343813,
      "grad_norm": 0.4210638403892517,
      "learning_rate": 1.5943279982604457e-06,
      "loss": 0.0085,
      "step": 2571340
    },
    {
      "epoch": 4.208087036782467,
      "grad_norm": 0.2439815104007721,
      "learning_rate": 1.5942621060469285e-06,
      "loss": 0.0135,
      "step": 2571360
    },
    {
      "epoch": 4.20811976722112,
      "grad_norm": 0.06198691576719284,
      "learning_rate": 1.5941962138334114e-06,
      "loss": 0.008,
      "step": 2571380
    },
    {
      "epoch": 4.208152497659774,
      "grad_norm": 0.370109885931015,
      "learning_rate": 1.5941303216198942e-06,
      "loss": 0.007,
      "step": 2571400
    },
    {
      "epoch": 4.208185228098427,
      "grad_norm": 0.08917287737131119,
      "learning_rate": 1.5940644294063773e-06,
      "loss": 0.0115,
      "step": 2571420
    },
    {
      "epoch": 4.20821795853708,
      "grad_norm": 0.1319301873445511,
      "learning_rate": 1.59399853719286e-06,
      "loss": 0.0083,
      "step": 2571440
    },
    {
      "epoch": 4.208250688975734,
      "grad_norm": 0.20053401589393616,
      "learning_rate": 1.593932644979343e-06,
      "loss": 0.0112,
      "step": 2571460
    },
    {
      "epoch": 4.208283419414387,
      "grad_norm": 0.13739071786403656,
      "learning_rate": 1.5938667527658258e-06,
      "loss": 0.0084,
      "step": 2571480
    },
    {
      "epoch": 4.20831614985304,
      "grad_norm": 0.31287699937820435,
      "learning_rate": 1.5938008605523087e-06,
      "loss": 0.0084,
      "step": 2571500
    },
    {
      "epoch": 4.208348880291694,
      "grad_norm": 0.08808522671461105,
      "learning_rate": 1.5937349683387915e-06,
      "loss": 0.0081,
      "step": 2571520
    },
    {
      "epoch": 4.208381610730347,
      "grad_norm": 0.23568843305110931,
      "learning_rate": 1.5936690761252744e-06,
      "loss": 0.0075,
      "step": 2571540
    },
    {
      "epoch": 4.208414341169,
      "grad_norm": 0.196852445602417,
      "learning_rate": 1.5936031839117572e-06,
      "loss": 0.0117,
      "step": 2571560
    },
    {
      "epoch": 4.208447071607654,
      "grad_norm": 0.11422429978847504,
      "learning_rate": 1.5935372916982403e-06,
      "loss": 0.0096,
      "step": 2571580
    },
    {
      "epoch": 4.208479802046307,
      "grad_norm": 0.12200885266065598,
      "learning_rate": 1.593471399484723e-06,
      "loss": 0.0102,
      "step": 2571600
    },
    {
      "epoch": 4.20851253248496,
      "grad_norm": 0.15450230240821838,
      "learning_rate": 1.593405507271206e-06,
      "loss": 0.0076,
      "step": 2571620
    },
    {
      "epoch": 4.2085452629236135,
      "grad_norm": 0.3104604184627533,
      "learning_rate": 1.5933396150576887e-06,
      "loss": 0.0089,
      "step": 2571640
    },
    {
      "epoch": 4.208577993362267,
      "grad_norm": 0.2510455846786499,
      "learning_rate": 1.5932737228441717e-06,
      "loss": 0.0079,
      "step": 2571660
    },
    {
      "epoch": 4.208610723800921,
      "grad_norm": 0.09819741547107697,
      "learning_rate": 1.5932078306306544e-06,
      "loss": 0.007,
      "step": 2571680
    },
    {
      "epoch": 4.208643454239573,
      "grad_norm": 0.49081724882125854,
      "learning_rate": 1.5931419384171374e-06,
      "loss": 0.0092,
      "step": 2571700
    },
    {
      "epoch": 4.208676184678227,
      "grad_norm": 0.17714932560920715,
      "learning_rate": 1.5930760462036201e-06,
      "loss": 0.0083,
      "step": 2571720
    },
    {
      "epoch": 4.208708915116881,
      "grad_norm": 0.3875007629394531,
      "learning_rate": 1.593010153990103e-06,
      "loss": 0.0084,
      "step": 2571740
    },
    {
      "epoch": 4.208741645555533,
      "grad_norm": 0.2672773599624634,
      "learning_rate": 1.592944261776586e-06,
      "loss": 0.0129,
      "step": 2571760
    },
    {
      "epoch": 4.208774375994187,
      "grad_norm": 0.2647644281387329,
      "learning_rate": 1.592878369563069e-06,
      "loss": 0.0119,
      "step": 2571780
    },
    {
      "epoch": 4.2088071064328405,
      "grad_norm": 0.19696903228759766,
      "learning_rate": 1.5928124773495517e-06,
      "loss": 0.0124,
      "step": 2571800
    },
    {
      "epoch": 4.208839836871494,
      "grad_norm": 0.16677206754684448,
      "learning_rate": 1.5927465851360347e-06,
      "loss": 0.0066,
      "step": 2571820
    },
    {
      "epoch": 4.208872567310147,
      "grad_norm": 0.08051463961601257,
      "learning_rate": 1.5926806929225174e-06,
      "loss": 0.0057,
      "step": 2571840
    },
    {
      "epoch": 4.2089052977488,
      "grad_norm": 0.1270945966243744,
      "learning_rate": 1.5926148007090004e-06,
      "loss": 0.0076,
      "step": 2571860
    },
    {
      "epoch": 4.208938028187454,
      "grad_norm": 0.546312153339386,
      "learning_rate": 1.592548908495483e-06,
      "loss": 0.0104,
      "step": 2571880
    },
    {
      "epoch": 4.208970758626107,
      "grad_norm": 0.4361240267753601,
      "learning_rate": 1.592483016281966e-06,
      "loss": 0.0113,
      "step": 2571900
    },
    {
      "epoch": 4.20900348906476,
      "grad_norm": 0.20620068907737732,
      "learning_rate": 1.5924171240684488e-06,
      "loss": 0.0095,
      "step": 2571920
    },
    {
      "epoch": 4.209036219503414,
      "grad_norm": 0.19512850046157837,
      "learning_rate": 1.592351231854932e-06,
      "loss": 0.0055,
      "step": 2571940
    },
    {
      "epoch": 4.2090689499420675,
      "grad_norm": 0.2527201473712921,
      "learning_rate": 1.5922853396414147e-06,
      "loss": 0.0083,
      "step": 2571960
    },
    {
      "epoch": 4.20910168038072,
      "grad_norm": 0.6943866014480591,
      "learning_rate": 1.5922194474278977e-06,
      "loss": 0.0117,
      "step": 2571980
    },
    {
      "epoch": 4.209134410819374,
      "grad_norm": 0.2942425310611725,
      "learning_rate": 1.5921535552143804e-06,
      "loss": 0.0059,
      "step": 2572000
    },
    {
      "epoch": 4.209167141258027,
      "grad_norm": 0.4614786207675934,
      "learning_rate": 1.5920876630008633e-06,
      "loss": 0.0088,
      "step": 2572020
    },
    {
      "epoch": 4.20919987169668,
      "grad_norm": 0.29532837867736816,
      "learning_rate": 1.592021770787346e-06,
      "loss": 0.0172,
      "step": 2572040
    },
    {
      "epoch": 4.209232602135334,
      "grad_norm": 0.1412540078163147,
      "learning_rate": 1.591955878573829e-06,
      "loss": 0.007,
      "step": 2572060
    },
    {
      "epoch": 4.209265332573987,
      "grad_norm": 0.4108256995677948,
      "learning_rate": 1.5918899863603118e-06,
      "loss": 0.0109,
      "step": 2572080
    },
    {
      "epoch": 4.209298063012641,
      "grad_norm": 0.17512798309326172,
      "learning_rate": 1.5918240941467947e-06,
      "loss": 0.0092,
      "step": 2572100
    },
    {
      "epoch": 4.209330793451294,
      "grad_norm": 0.1205584853887558,
      "learning_rate": 1.5917582019332779e-06,
      "loss": 0.012,
      "step": 2572120
    },
    {
      "epoch": 4.209363523889947,
      "grad_norm": 0.14988909661769867,
      "learning_rate": 1.5916923097197606e-06,
      "loss": 0.0077,
      "step": 2572140
    },
    {
      "epoch": 4.209396254328601,
      "grad_norm": 0.1871686726808548,
      "learning_rate": 1.5916264175062434e-06,
      "loss": 0.0041,
      "step": 2572160
    },
    {
      "epoch": 4.2094289847672535,
      "grad_norm": 0.6807025671005249,
      "learning_rate": 1.5915605252927263e-06,
      "loss": 0.0092,
      "step": 2572180
    },
    {
      "epoch": 4.209461715205907,
      "grad_norm": 0.16145505011081696,
      "learning_rate": 1.591494633079209e-06,
      "loss": 0.007,
      "step": 2572200
    },
    {
      "epoch": 4.209494445644561,
      "grad_norm": 0.14901864528656006,
      "learning_rate": 1.591428740865692e-06,
      "loss": 0.0106,
      "step": 2572220
    },
    {
      "epoch": 4.209527176083214,
      "grad_norm": 0.5942396521568298,
      "learning_rate": 1.5913628486521747e-06,
      "loss": 0.0087,
      "step": 2572240
    },
    {
      "epoch": 4.209559906521867,
      "grad_norm": 0.14990025758743286,
      "learning_rate": 1.5912969564386577e-06,
      "loss": 0.0078,
      "step": 2572260
    },
    {
      "epoch": 4.2095926369605206,
      "grad_norm": 0.12359818816184998,
      "learning_rate": 1.5912310642251404e-06,
      "loss": 0.0142,
      "step": 2572280
    },
    {
      "epoch": 4.209625367399174,
      "grad_norm": 0.6863386631011963,
      "learning_rate": 1.5911651720116236e-06,
      "loss": 0.0092,
      "step": 2572300
    },
    {
      "epoch": 4.209658097837827,
      "grad_norm": 0.10273978859186172,
      "learning_rate": 1.5910992797981066e-06,
      "loss": 0.0105,
      "step": 2572320
    },
    {
      "epoch": 4.2096908282764804,
      "grad_norm": 0.2017902433872223,
      "learning_rate": 1.5910333875845893e-06,
      "loss": 0.0085,
      "step": 2572340
    },
    {
      "epoch": 4.209723558715134,
      "grad_norm": 0.17067120969295502,
      "learning_rate": 1.5909674953710722e-06,
      "loss": 0.0126,
      "step": 2572360
    },
    {
      "epoch": 4.209756289153788,
      "grad_norm": 0.24049898982048035,
      "learning_rate": 1.590901603157555e-06,
      "loss": 0.0123,
      "step": 2572380
    },
    {
      "epoch": 4.20978901959244,
      "grad_norm": 0.41562893986701965,
      "learning_rate": 1.5908357109440377e-06,
      "loss": 0.0096,
      "step": 2572400
    },
    {
      "epoch": 4.209821750031094,
      "grad_norm": 0.057645685970783234,
      "learning_rate": 1.5907698187305207e-06,
      "loss": 0.009,
      "step": 2572420
    },
    {
      "epoch": 4.2098544804697475,
      "grad_norm": 0.19405771791934967,
      "learning_rate": 1.5907039265170034e-06,
      "loss": 0.009,
      "step": 2572440
    },
    {
      "epoch": 4.2098872109084,
      "grad_norm": 0.5621118545532227,
      "learning_rate": 1.5906380343034866e-06,
      "loss": 0.0125,
      "step": 2572460
    },
    {
      "epoch": 4.209919941347054,
      "grad_norm": 0.7766737341880798,
      "learning_rate": 1.5905721420899695e-06,
      "loss": 0.008,
      "step": 2572480
    },
    {
      "epoch": 4.209952671785707,
      "grad_norm": 0.4994679391384125,
      "learning_rate": 1.5905062498764523e-06,
      "loss": 0.0114,
      "step": 2572500
    },
    {
      "epoch": 4.209985402224361,
      "grad_norm": 0.08458109200000763,
      "learning_rate": 1.5904403576629352e-06,
      "loss": 0.0073,
      "step": 2572520
    },
    {
      "epoch": 4.210018132663014,
      "grad_norm": 0.2190840095281601,
      "learning_rate": 1.590374465449418e-06,
      "loss": 0.0091,
      "step": 2572540
    },
    {
      "epoch": 4.210050863101667,
      "grad_norm": 0.1155579686164856,
      "learning_rate": 1.590308573235901e-06,
      "loss": 0.0077,
      "step": 2572560
    },
    {
      "epoch": 4.210083593540321,
      "grad_norm": 0.7791092395782471,
      "learning_rate": 1.5902426810223837e-06,
      "loss": 0.0085,
      "step": 2572580
    },
    {
      "epoch": 4.210116323978974,
      "grad_norm": 0.3730841875076294,
      "learning_rate": 1.5901767888088666e-06,
      "loss": 0.0086,
      "step": 2572600
    },
    {
      "epoch": 4.210149054417627,
      "grad_norm": 0.14997205138206482,
      "learning_rate": 1.5901108965953493e-06,
      "loss": 0.0127,
      "step": 2572620
    },
    {
      "epoch": 4.210181784856281,
      "grad_norm": 0.27642568945884705,
      "learning_rate": 1.5900450043818325e-06,
      "loss": 0.0085,
      "step": 2572640
    },
    {
      "epoch": 4.210214515294934,
      "grad_norm": 0.2515999972820282,
      "learning_rate": 1.5899791121683152e-06,
      "loss": 0.0084,
      "step": 2572660
    },
    {
      "epoch": 4.210247245733587,
      "grad_norm": 0.3906853199005127,
      "learning_rate": 1.5899132199547982e-06,
      "loss": 0.0142,
      "step": 2572680
    },
    {
      "epoch": 4.210279976172241,
      "grad_norm": 0.1560918092727661,
      "learning_rate": 1.589847327741281e-06,
      "loss": 0.0072,
      "step": 2572700
    },
    {
      "epoch": 4.210312706610894,
      "grad_norm": 0.13288982212543488,
      "learning_rate": 1.5897814355277639e-06,
      "loss": 0.0058,
      "step": 2572720
    },
    {
      "epoch": 4.210345437049547,
      "grad_norm": 0.23217713832855225,
      "learning_rate": 1.5897155433142466e-06,
      "loss": 0.0101,
      "step": 2572740
    },
    {
      "epoch": 4.210378167488201,
      "grad_norm": 0.2609565556049347,
      "learning_rate": 1.5896496511007296e-06,
      "loss": 0.0136,
      "step": 2572760
    },
    {
      "epoch": 4.210410897926854,
      "grad_norm": 0.36683663725852966,
      "learning_rate": 1.5895837588872123e-06,
      "loss": 0.0077,
      "step": 2572780
    },
    {
      "epoch": 4.210443628365508,
      "grad_norm": 0.11775314807891846,
      "learning_rate": 1.5895178666736953e-06,
      "loss": 0.0094,
      "step": 2572800
    },
    {
      "epoch": 4.2104763588041605,
      "grad_norm": 0.10149600356817245,
      "learning_rate": 1.5894519744601782e-06,
      "loss": 0.0074,
      "step": 2572820
    },
    {
      "epoch": 4.210509089242814,
      "grad_norm": 0.19597364962100983,
      "learning_rate": 1.5893860822466612e-06,
      "loss": 0.0099,
      "step": 2572840
    },
    {
      "epoch": 4.210541819681468,
      "grad_norm": 0.09985890239477158,
      "learning_rate": 1.589320190033144e-06,
      "loss": 0.0083,
      "step": 2572860
    },
    {
      "epoch": 4.21057455012012,
      "grad_norm": 0.32086485624313354,
      "learning_rate": 1.5892542978196269e-06,
      "loss": 0.0102,
      "step": 2572880
    },
    {
      "epoch": 4.210607280558774,
      "grad_norm": 0.18994322419166565,
      "learning_rate": 1.5891884056061096e-06,
      "loss": 0.0148,
      "step": 2572900
    },
    {
      "epoch": 4.210640010997428,
      "grad_norm": 0.43962639570236206,
      "learning_rate": 1.5891225133925926e-06,
      "loss": 0.0102,
      "step": 2572920
    },
    {
      "epoch": 4.210672741436081,
      "grad_norm": 0.6226228475570679,
      "learning_rate": 1.5890566211790753e-06,
      "loss": 0.01,
      "step": 2572940
    },
    {
      "epoch": 4.210705471874734,
      "grad_norm": 0.5392870306968689,
      "learning_rate": 1.5889907289655583e-06,
      "loss": 0.0165,
      "step": 2572960
    },
    {
      "epoch": 4.2107382023133875,
      "grad_norm": 0.2506019175052643,
      "learning_rate": 1.588924836752041e-06,
      "loss": 0.0063,
      "step": 2572980
    },
    {
      "epoch": 4.210770932752041,
      "grad_norm": 0.1971401870250702,
      "learning_rate": 1.5888589445385242e-06,
      "loss": 0.0105,
      "step": 2573000
    },
    {
      "epoch": 4.210803663190694,
      "grad_norm": 0.2102225422859192,
      "learning_rate": 1.5887930523250069e-06,
      "loss": 0.0112,
      "step": 2573020
    },
    {
      "epoch": 4.210836393629347,
      "grad_norm": 0.23066067695617676,
      "learning_rate": 1.5887271601114898e-06,
      "loss": 0.0068,
      "step": 2573040
    },
    {
      "epoch": 4.210869124068001,
      "grad_norm": 0.3017893135547638,
      "learning_rate": 1.5886612678979726e-06,
      "loss": 0.0076,
      "step": 2573060
    },
    {
      "epoch": 4.210901854506654,
      "grad_norm": 0.27454873919487,
      "learning_rate": 1.5885953756844555e-06,
      "loss": 0.0104,
      "step": 2573080
    },
    {
      "epoch": 4.210934584945307,
      "grad_norm": 0.14953328669071198,
      "learning_rate": 1.5885294834709383e-06,
      "loss": 0.0066,
      "step": 2573100
    },
    {
      "epoch": 4.210967315383961,
      "grad_norm": 0.6891928315162659,
      "learning_rate": 1.5884635912574212e-06,
      "loss": 0.0076,
      "step": 2573120
    },
    {
      "epoch": 4.2110000458226144,
      "grad_norm": 0.2450050711631775,
      "learning_rate": 1.588397699043904e-06,
      "loss": 0.0099,
      "step": 2573140
    },
    {
      "epoch": 4.211032776261267,
      "grad_norm": 0.11275093257427216,
      "learning_rate": 1.588331806830387e-06,
      "loss": 0.0091,
      "step": 2573160
    },
    {
      "epoch": 4.211065506699921,
      "grad_norm": 0.2625747323036194,
      "learning_rate": 1.5882659146168699e-06,
      "loss": 0.0111,
      "step": 2573180
    },
    {
      "epoch": 4.211098237138574,
      "grad_norm": 0.2737007737159729,
      "learning_rate": 1.5882000224033528e-06,
      "loss": 0.0092,
      "step": 2573200
    },
    {
      "epoch": 4.211130967577227,
      "grad_norm": 0.1868468075990677,
      "learning_rate": 1.5881341301898356e-06,
      "loss": 0.0095,
      "step": 2573220
    },
    {
      "epoch": 4.211163698015881,
      "grad_norm": 0.1643075942993164,
      "learning_rate": 1.5880682379763185e-06,
      "loss": 0.0061,
      "step": 2573240
    },
    {
      "epoch": 4.211196428454534,
      "grad_norm": 0.5457753539085388,
      "learning_rate": 1.5880023457628013e-06,
      "loss": 0.0075,
      "step": 2573260
    },
    {
      "epoch": 4.211229158893188,
      "grad_norm": 0.033079106360673904,
      "learning_rate": 1.5879364535492842e-06,
      "loss": 0.008,
      "step": 2573280
    },
    {
      "epoch": 4.2112618893318405,
      "grad_norm": 0.12687744200229645,
      "learning_rate": 1.587870561335767e-06,
      "loss": 0.0075,
      "step": 2573300
    },
    {
      "epoch": 4.211294619770494,
      "grad_norm": 0.11854517459869385,
      "learning_rate": 1.58780466912225e-06,
      "loss": 0.0098,
      "step": 2573320
    },
    {
      "epoch": 4.211327350209148,
      "grad_norm": 0.24932987987995148,
      "learning_rate": 1.587738776908733e-06,
      "loss": 0.0089,
      "step": 2573340
    },
    {
      "epoch": 4.2113600806478,
      "grad_norm": 0.17733554542064667,
      "learning_rate": 1.5876728846952158e-06,
      "loss": 0.0081,
      "step": 2573360
    },
    {
      "epoch": 4.211392811086454,
      "grad_norm": 0.1647806614637375,
      "learning_rate": 1.5876069924816985e-06,
      "loss": 0.0093,
      "step": 2573380
    },
    {
      "epoch": 4.211425541525108,
      "grad_norm": 0.1776733249425888,
      "learning_rate": 1.5875411002681815e-06,
      "loss": 0.0104,
      "step": 2573400
    },
    {
      "epoch": 4.211458271963761,
      "grad_norm": 0.10081898421049118,
      "learning_rate": 1.5874752080546642e-06,
      "loss": 0.0076,
      "step": 2573420
    },
    {
      "epoch": 4.211491002402414,
      "grad_norm": 0.2041962891817093,
      "learning_rate": 1.5874093158411472e-06,
      "loss": 0.0109,
      "step": 2573440
    },
    {
      "epoch": 4.2115237328410675,
      "grad_norm": 0.20458026230335236,
      "learning_rate": 1.58734342362763e-06,
      "loss": 0.0126,
      "step": 2573460
    },
    {
      "epoch": 4.211556463279721,
      "grad_norm": 0.16429391503334045,
      "learning_rate": 1.5872775314141129e-06,
      "loss": 0.0098,
      "step": 2573480
    },
    {
      "epoch": 4.211589193718374,
      "grad_norm": 0.19744616746902466,
      "learning_rate": 1.5872116392005956e-06,
      "loss": 0.0141,
      "step": 2573500
    },
    {
      "epoch": 4.211621924157027,
      "grad_norm": 0.18038487434387207,
      "learning_rate": 1.5871457469870788e-06,
      "loss": 0.0077,
      "step": 2573520
    },
    {
      "epoch": 4.211654654595681,
      "grad_norm": 0.21293297410011292,
      "learning_rate": 1.5870798547735617e-06,
      "loss": 0.0076,
      "step": 2573540
    },
    {
      "epoch": 4.211687385034335,
      "grad_norm": 0.080734983086586,
      "learning_rate": 1.5870139625600445e-06,
      "loss": 0.008,
      "step": 2573560
    },
    {
      "epoch": 4.211720115472987,
      "grad_norm": 0.3652240037918091,
      "learning_rate": 1.5869480703465274e-06,
      "loss": 0.0081,
      "step": 2573580
    },
    {
      "epoch": 4.211752845911641,
      "grad_norm": 0.5018811225891113,
      "learning_rate": 1.5868821781330102e-06,
      "loss": 0.0132,
      "step": 2573600
    },
    {
      "epoch": 4.2117855763502945,
      "grad_norm": 0.19983232021331787,
      "learning_rate": 1.586816285919493e-06,
      "loss": 0.0083,
      "step": 2573620
    },
    {
      "epoch": 4.211818306788947,
      "grad_norm": 0.20937108993530273,
      "learning_rate": 1.5867503937059758e-06,
      "loss": 0.0077,
      "step": 2573640
    },
    {
      "epoch": 4.211851037227601,
      "grad_norm": 0.08813294768333435,
      "learning_rate": 1.5866845014924586e-06,
      "loss": 0.01,
      "step": 2573660
    },
    {
      "epoch": 4.211883767666254,
      "grad_norm": 0.20904330909252167,
      "learning_rate": 1.5866186092789415e-06,
      "loss": 0.009,
      "step": 2573680
    },
    {
      "epoch": 4.211916498104908,
      "grad_norm": 0.41975757479667664,
      "learning_rate": 1.5865527170654247e-06,
      "loss": 0.008,
      "step": 2573700
    },
    {
      "epoch": 4.211949228543561,
      "grad_norm": 0.22010177373886108,
      "learning_rate": 1.5864868248519074e-06,
      "loss": 0.0123,
      "step": 2573720
    },
    {
      "epoch": 4.211981958982214,
      "grad_norm": 0.24794796109199524,
      "learning_rate": 1.5864209326383904e-06,
      "loss": 0.0119,
      "step": 2573740
    },
    {
      "epoch": 4.212014689420868,
      "grad_norm": 0.2647159993648529,
      "learning_rate": 1.5863550404248731e-06,
      "loss": 0.0082,
      "step": 2573760
    },
    {
      "epoch": 4.212047419859521,
      "grad_norm": 0.38363775610923767,
      "learning_rate": 1.586289148211356e-06,
      "loss": 0.0072,
      "step": 2573780
    },
    {
      "epoch": 4.212080150298174,
      "grad_norm": 0.08833673596382141,
      "learning_rate": 1.5862232559978388e-06,
      "loss": 0.0088,
      "step": 2573800
    },
    {
      "epoch": 4.212112880736828,
      "grad_norm": 0.159412682056427,
      "learning_rate": 1.5861573637843216e-06,
      "loss": 0.0089,
      "step": 2573820
    },
    {
      "epoch": 4.212145611175481,
      "grad_norm": 0.3795146942138672,
      "learning_rate": 1.5860914715708045e-06,
      "loss": 0.0111,
      "step": 2573840
    },
    {
      "epoch": 4.212178341614134,
      "grad_norm": 0.17865712940692902,
      "learning_rate": 1.5860255793572873e-06,
      "loss": 0.0064,
      "step": 2573860
    },
    {
      "epoch": 4.212211072052788,
      "grad_norm": 0.2556995153427124,
      "learning_rate": 1.5859596871437704e-06,
      "loss": 0.0078,
      "step": 2573880
    },
    {
      "epoch": 4.212243802491441,
      "grad_norm": 0.34045642614364624,
      "learning_rate": 1.5858937949302534e-06,
      "loss": 0.0086,
      "step": 2573900
    },
    {
      "epoch": 4.212276532930094,
      "grad_norm": 0.23600032925605774,
      "learning_rate": 1.5858279027167361e-06,
      "loss": 0.0076,
      "step": 2573920
    },
    {
      "epoch": 4.212309263368748,
      "grad_norm": 0.4860050678253174,
      "learning_rate": 1.585762010503219e-06,
      "loss": 0.0081,
      "step": 2573940
    },
    {
      "epoch": 4.212341993807401,
      "grad_norm": 0.17844942212104797,
      "learning_rate": 1.5856961182897018e-06,
      "loss": 0.0088,
      "step": 2573960
    },
    {
      "epoch": 4.212374724246055,
      "grad_norm": 0.371811181306839,
      "learning_rate": 1.5856302260761848e-06,
      "loss": 0.016,
      "step": 2573980
    },
    {
      "epoch": 4.2124074546847075,
      "grad_norm": 0.7302453517913818,
      "learning_rate": 1.5855643338626675e-06,
      "loss": 0.0113,
      "step": 2574000
    },
    {
      "epoch": 4.212440185123361,
      "grad_norm": 0.1237836554646492,
      "learning_rate": 1.5854984416491504e-06,
      "loss": 0.0084,
      "step": 2574020
    },
    {
      "epoch": 4.212472915562015,
      "grad_norm": 0.23111781477928162,
      "learning_rate": 1.5854325494356332e-06,
      "loss": 0.0106,
      "step": 2574040
    },
    {
      "epoch": 4.212505646000667,
      "grad_norm": 0.3083568811416626,
      "learning_rate": 1.5853666572221163e-06,
      "loss": 0.0091,
      "step": 2574060
    },
    {
      "epoch": 4.212538376439321,
      "grad_norm": 0.2627733051776886,
      "learning_rate": 1.585300765008599e-06,
      "loss": 0.0105,
      "step": 2574080
    },
    {
      "epoch": 4.2125711068779745,
      "grad_norm": 0.3491552770137787,
      "learning_rate": 1.585234872795082e-06,
      "loss": 0.0115,
      "step": 2574100
    },
    {
      "epoch": 4.212603837316628,
      "grad_norm": 0.10254690796136856,
      "learning_rate": 1.5851689805815648e-06,
      "loss": 0.0112,
      "step": 2574120
    },
    {
      "epoch": 4.212636567755281,
      "grad_norm": 0.1646769642829895,
      "learning_rate": 1.5851030883680477e-06,
      "loss": 0.0088,
      "step": 2574140
    },
    {
      "epoch": 4.212669298193934,
      "grad_norm": 0.09703218191862106,
      "learning_rate": 1.5850371961545305e-06,
      "loss": 0.0078,
      "step": 2574160
    },
    {
      "epoch": 4.212702028632588,
      "grad_norm": 0.10941947996616364,
      "learning_rate": 1.5849713039410134e-06,
      "loss": 0.0109,
      "step": 2574180
    },
    {
      "epoch": 4.212734759071241,
      "grad_norm": 0.304043173789978,
      "learning_rate": 1.5849054117274962e-06,
      "loss": 0.0155,
      "step": 2574200
    },
    {
      "epoch": 4.212767489509894,
      "grad_norm": 0.4153214395046234,
      "learning_rate": 1.5848395195139793e-06,
      "loss": 0.0104,
      "step": 2574220
    },
    {
      "epoch": 4.212800219948548,
      "grad_norm": 0.30441075563430786,
      "learning_rate": 1.584773627300462e-06,
      "loss": 0.0069,
      "step": 2574240
    },
    {
      "epoch": 4.2128329503872015,
      "grad_norm": 0.45347902178764343,
      "learning_rate": 1.584707735086945e-06,
      "loss": 0.0112,
      "step": 2574260
    },
    {
      "epoch": 4.212865680825854,
      "grad_norm": 0.23100608587265015,
      "learning_rate": 1.5846418428734278e-06,
      "loss": 0.0065,
      "step": 2574280
    },
    {
      "epoch": 4.212898411264508,
      "grad_norm": 0.06283916532993317,
      "learning_rate": 1.5845759506599107e-06,
      "loss": 0.0069,
      "step": 2574300
    },
    {
      "epoch": 4.212931141703161,
      "grad_norm": 0.20516124367713928,
      "learning_rate": 1.5845100584463934e-06,
      "loss": 0.0084,
      "step": 2574320
    },
    {
      "epoch": 4.212963872141814,
      "grad_norm": 0.3377413749694824,
      "learning_rate": 1.5844441662328764e-06,
      "loss": 0.0087,
      "step": 2574340
    },
    {
      "epoch": 4.212996602580468,
      "grad_norm": 0.12853381037712097,
      "learning_rate": 1.5843782740193591e-06,
      "loss": 0.0138,
      "step": 2574360
    },
    {
      "epoch": 4.213029333019121,
      "grad_norm": 0.1998896449804306,
      "learning_rate": 1.584312381805842e-06,
      "loss": 0.0095,
      "step": 2574380
    },
    {
      "epoch": 4.213062063457775,
      "grad_norm": 0.08154894411563873,
      "learning_rate": 1.584246489592325e-06,
      "loss": 0.0132,
      "step": 2574400
    },
    {
      "epoch": 4.213094793896428,
      "grad_norm": 0.3049267530441284,
      "learning_rate": 1.584180597378808e-06,
      "loss": 0.0073,
      "step": 2574420
    },
    {
      "epoch": 4.213127524335081,
      "grad_norm": 0.24107980728149414,
      "learning_rate": 1.5841147051652907e-06,
      "loss": 0.006,
      "step": 2574440
    },
    {
      "epoch": 4.213160254773735,
      "grad_norm": 0.16720731556415558,
      "learning_rate": 1.5840488129517737e-06,
      "loss": 0.0093,
      "step": 2574460
    },
    {
      "epoch": 4.2131929852123875,
      "grad_norm": 0.23409754037857056,
      "learning_rate": 1.5839829207382564e-06,
      "loss": 0.008,
      "step": 2574480
    },
    {
      "epoch": 4.213225715651041,
      "grad_norm": 0.33709394931793213,
      "learning_rate": 1.5839170285247394e-06,
      "loss": 0.01,
      "step": 2574500
    },
    {
      "epoch": 4.213258446089695,
      "grad_norm": 0.28074613213539124,
      "learning_rate": 1.5838511363112221e-06,
      "loss": 0.0102,
      "step": 2574520
    },
    {
      "epoch": 4.213291176528347,
      "grad_norm": 0.07013419270515442,
      "learning_rate": 1.583785244097705e-06,
      "loss": 0.0062,
      "step": 2574540
    },
    {
      "epoch": 4.213323906967001,
      "grad_norm": 1.1846494674682617,
      "learning_rate": 1.5837193518841878e-06,
      "loss": 0.0069,
      "step": 2574560
    },
    {
      "epoch": 4.213356637405655,
      "grad_norm": 0.48226600885391235,
      "learning_rate": 1.583653459670671e-06,
      "loss": 0.0065,
      "step": 2574580
    },
    {
      "epoch": 4.213389367844308,
      "grad_norm": 0.10621108114719391,
      "learning_rate": 1.5835875674571537e-06,
      "loss": 0.0121,
      "step": 2574600
    },
    {
      "epoch": 4.213422098282961,
      "grad_norm": 0.16405829787254333,
      "learning_rate": 1.5835216752436367e-06,
      "loss": 0.0067,
      "step": 2574620
    },
    {
      "epoch": 4.2134548287216145,
      "grad_norm": 0.2262117564678192,
      "learning_rate": 1.5834557830301194e-06,
      "loss": 0.0074,
      "step": 2574640
    },
    {
      "epoch": 4.213487559160268,
      "grad_norm": 0.21387150883674622,
      "learning_rate": 1.5833898908166024e-06,
      "loss": 0.0074,
      "step": 2574660
    },
    {
      "epoch": 4.213520289598921,
      "grad_norm": 0.14726287126541138,
      "learning_rate": 1.583323998603085e-06,
      "loss": 0.0134,
      "step": 2574680
    },
    {
      "epoch": 4.213553020037574,
      "grad_norm": 0.37597543001174927,
      "learning_rate": 1.583258106389568e-06,
      "loss": 0.0099,
      "step": 2574700
    },
    {
      "epoch": 4.213585750476228,
      "grad_norm": 0.09676450490951538,
      "learning_rate": 1.5831922141760508e-06,
      "loss": 0.0087,
      "step": 2574720
    },
    {
      "epoch": 4.213618480914882,
      "grad_norm": 0.37028372287750244,
      "learning_rate": 1.5831263219625337e-06,
      "loss": 0.0099,
      "step": 2574740
    },
    {
      "epoch": 4.213651211353534,
      "grad_norm": 0.20060358941555023,
      "learning_rate": 1.583060429749017e-06,
      "loss": 0.0133,
      "step": 2574760
    },
    {
      "epoch": 4.213683941792188,
      "grad_norm": 0.6590564250946045,
      "learning_rate": 1.5829945375354996e-06,
      "loss": 0.0097,
      "step": 2574780
    },
    {
      "epoch": 4.2137166722308415,
      "grad_norm": 0.13159491121768951,
      "learning_rate": 1.5829286453219824e-06,
      "loss": 0.0099,
      "step": 2574800
    },
    {
      "epoch": 4.213749402669494,
      "grad_norm": 0.12078331410884857,
      "learning_rate": 1.5828627531084653e-06,
      "loss": 0.0076,
      "step": 2574820
    },
    {
      "epoch": 4.213782133108148,
      "grad_norm": 0.12403175234794617,
      "learning_rate": 1.582796860894948e-06,
      "loss": 0.0112,
      "step": 2574840
    },
    {
      "epoch": 4.213814863546801,
      "grad_norm": 0.44897201657295227,
      "learning_rate": 1.582730968681431e-06,
      "loss": 0.0088,
      "step": 2574860
    },
    {
      "epoch": 4.213847593985455,
      "grad_norm": 0.3481806814670563,
      "learning_rate": 1.5826650764679138e-06,
      "loss": 0.0115,
      "step": 2574880
    },
    {
      "epoch": 4.213880324424108,
      "grad_norm": 0.17991435527801514,
      "learning_rate": 1.5825991842543967e-06,
      "loss": 0.0096,
      "step": 2574900
    },
    {
      "epoch": 4.213913054862761,
      "grad_norm": 0.11728726327419281,
      "learning_rate": 1.5825332920408795e-06,
      "loss": 0.0072,
      "step": 2574920
    },
    {
      "epoch": 4.213945785301415,
      "grad_norm": 0.4705822467803955,
      "learning_rate": 1.5824673998273626e-06,
      "loss": 0.0085,
      "step": 2574940
    },
    {
      "epoch": 4.2139785157400675,
      "grad_norm": 0.29344862699508667,
      "learning_rate": 1.5824015076138456e-06,
      "loss": 0.0111,
      "step": 2574960
    },
    {
      "epoch": 4.214011246178721,
      "grad_norm": 0.2606111168861389,
      "learning_rate": 1.5823356154003283e-06,
      "loss": 0.0072,
      "step": 2574980
    },
    {
      "epoch": 4.214043976617375,
      "grad_norm": 0.17112335562705994,
      "learning_rate": 1.5822697231868113e-06,
      "loss": 0.0085,
      "step": 2575000
    },
    {
      "epoch": 4.214076707056028,
      "grad_norm": 0.08175766468048096,
      "learning_rate": 1.582203830973294e-06,
      "loss": 0.0116,
      "step": 2575020
    },
    {
      "epoch": 4.214109437494681,
      "grad_norm": 0.15135180950164795,
      "learning_rate": 1.5821379387597767e-06,
      "loss": 0.006,
      "step": 2575040
    },
    {
      "epoch": 4.214142167933335,
      "grad_norm": 0.5789019465446472,
      "learning_rate": 1.5820720465462597e-06,
      "loss": 0.0134,
      "step": 2575060
    },
    {
      "epoch": 4.214174898371988,
      "grad_norm": 0.5477737784385681,
      "learning_rate": 1.5820061543327424e-06,
      "loss": 0.0068,
      "step": 2575080
    },
    {
      "epoch": 4.214207628810641,
      "grad_norm": 0.30334457755088806,
      "learning_rate": 1.5819402621192256e-06,
      "loss": 0.0101,
      "step": 2575100
    },
    {
      "epoch": 4.2142403592492945,
      "grad_norm": 0.09185595065355301,
      "learning_rate": 1.5818743699057085e-06,
      "loss": 0.0126,
      "step": 2575120
    },
    {
      "epoch": 4.214273089687948,
      "grad_norm": 0.19632597267627716,
      "learning_rate": 1.5818084776921913e-06,
      "loss": 0.0074,
      "step": 2575140
    },
    {
      "epoch": 4.214305820126602,
      "grad_norm": 0.30817562341690063,
      "learning_rate": 1.5817425854786742e-06,
      "loss": 0.0122,
      "step": 2575160
    },
    {
      "epoch": 4.214338550565254,
      "grad_norm": 0.4278947412967682,
      "learning_rate": 1.581676693265157e-06,
      "loss": 0.0122,
      "step": 2575180
    },
    {
      "epoch": 4.214371281003908,
      "grad_norm": 0.26650577783584595,
      "learning_rate": 1.58161080105164e-06,
      "loss": 0.0101,
      "step": 2575200
    },
    {
      "epoch": 4.214404011442562,
      "grad_norm": 0.20706476271152496,
      "learning_rate": 1.5815449088381227e-06,
      "loss": 0.0084,
      "step": 2575220
    },
    {
      "epoch": 4.214436741881214,
      "grad_norm": 0.26570257544517517,
      "learning_rate": 1.5814790166246056e-06,
      "loss": 0.0089,
      "step": 2575240
    },
    {
      "epoch": 4.214469472319868,
      "grad_norm": 0.3073257505893707,
      "learning_rate": 1.5814131244110884e-06,
      "loss": 0.0094,
      "step": 2575260
    },
    {
      "epoch": 4.2145022027585215,
      "grad_norm": 0.181036114692688,
      "learning_rate": 1.5813472321975715e-06,
      "loss": 0.0088,
      "step": 2575280
    },
    {
      "epoch": 4.214534933197175,
      "grad_norm": 0.06773597747087479,
      "learning_rate": 1.5812813399840543e-06,
      "loss": 0.0107,
      "step": 2575300
    },
    {
      "epoch": 4.214567663635828,
      "grad_norm": 0.12285743653774261,
      "learning_rate": 1.5812154477705372e-06,
      "loss": 0.0054,
      "step": 2575320
    },
    {
      "epoch": 4.214600394074481,
      "grad_norm": 0.10223572701215744,
      "learning_rate": 1.58114955555702e-06,
      "loss": 0.0088,
      "step": 2575340
    },
    {
      "epoch": 4.214633124513135,
      "grad_norm": 0.325980544090271,
      "learning_rate": 1.581083663343503e-06,
      "loss": 0.0116,
      "step": 2575360
    },
    {
      "epoch": 4.214665854951788,
      "grad_norm": 0.22222085297107697,
      "learning_rate": 1.5810177711299856e-06,
      "loss": 0.0124,
      "step": 2575380
    },
    {
      "epoch": 4.214698585390441,
      "grad_norm": 0.24695762991905212,
      "learning_rate": 1.5809518789164686e-06,
      "loss": 0.0175,
      "step": 2575400
    },
    {
      "epoch": 4.214731315829095,
      "grad_norm": 0.5734165906906128,
      "learning_rate": 1.5808859867029513e-06,
      "loss": 0.008,
      "step": 2575420
    },
    {
      "epoch": 4.2147640462677485,
      "grad_norm": 0.584746241569519,
      "learning_rate": 1.5808200944894343e-06,
      "loss": 0.0082,
      "step": 2575440
    },
    {
      "epoch": 4.214796776706401,
      "grad_norm": 0.2992194592952728,
      "learning_rate": 1.5807542022759172e-06,
      "loss": 0.0093,
      "step": 2575460
    },
    {
      "epoch": 4.214829507145055,
      "grad_norm": 0.3796009123325348,
      "learning_rate": 1.5806883100624002e-06,
      "loss": 0.0082,
      "step": 2575480
    },
    {
      "epoch": 4.214862237583708,
      "grad_norm": 0.16017059981822968,
      "learning_rate": 1.580622417848883e-06,
      "loss": 0.0121,
      "step": 2575500
    },
    {
      "epoch": 4.214894968022361,
      "grad_norm": 0.08458704501390457,
      "learning_rate": 1.5805565256353659e-06,
      "loss": 0.0099,
      "step": 2575520
    },
    {
      "epoch": 4.214927698461015,
      "grad_norm": 0.14628563821315765,
      "learning_rate": 1.5804906334218486e-06,
      "loss": 0.0092,
      "step": 2575540
    },
    {
      "epoch": 4.214960428899668,
      "grad_norm": 0.6434344053268433,
      "learning_rate": 1.5804247412083316e-06,
      "loss": 0.008,
      "step": 2575560
    },
    {
      "epoch": 4.214993159338322,
      "grad_norm": 0.10573425143957138,
      "learning_rate": 1.5803588489948143e-06,
      "loss": 0.008,
      "step": 2575580
    },
    {
      "epoch": 4.215025889776975,
      "grad_norm": 0.17704354226589203,
      "learning_rate": 1.5802929567812973e-06,
      "loss": 0.0056,
      "step": 2575600
    },
    {
      "epoch": 4.215058620215628,
      "grad_norm": 0.24224697053432465,
      "learning_rate": 1.58022706456778e-06,
      "loss": 0.008,
      "step": 2575620
    },
    {
      "epoch": 4.215091350654282,
      "grad_norm": 0.20946335792541504,
      "learning_rate": 1.5801611723542632e-06,
      "loss": 0.0087,
      "step": 2575640
    },
    {
      "epoch": 4.2151240810929345,
      "grad_norm": 0.32889148592948914,
      "learning_rate": 1.580095280140746e-06,
      "loss": 0.0062,
      "step": 2575660
    },
    {
      "epoch": 4.215156811531588,
      "grad_norm": 0.25611573457717896,
      "learning_rate": 1.5800293879272289e-06,
      "loss": 0.011,
      "step": 2575680
    },
    {
      "epoch": 4.215189541970242,
      "grad_norm": 0.27900850772857666,
      "learning_rate": 1.5799634957137116e-06,
      "loss": 0.0135,
      "step": 2575700
    },
    {
      "epoch": 4.215222272408895,
      "grad_norm": 0.214151069521904,
      "learning_rate": 1.5798976035001945e-06,
      "loss": 0.01,
      "step": 2575720
    },
    {
      "epoch": 4.215255002847548,
      "grad_norm": 0.07140924781560898,
      "learning_rate": 1.5798317112866773e-06,
      "loss": 0.0094,
      "step": 2575740
    },
    {
      "epoch": 4.2152877332862015,
      "grad_norm": 0.31566235423088074,
      "learning_rate": 1.5797658190731602e-06,
      "loss": 0.0082,
      "step": 2575760
    },
    {
      "epoch": 4.215320463724855,
      "grad_norm": 0.13915345072746277,
      "learning_rate": 1.579699926859643e-06,
      "loss": 0.0078,
      "step": 2575780
    },
    {
      "epoch": 4.215353194163508,
      "grad_norm": 0.16197247803211212,
      "learning_rate": 1.579634034646126e-06,
      "loss": 0.0076,
      "step": 2575800
    },
    {
      "epoch": 4.215385924602161,
      "grad_norm": 0.062304217368364334,
      "learning_rate": 1.5795681424326089e-06,
      "loss": 0.0102,
      "step": 2575820
    },
    {
      "epoch": 4.215418655040815,
      "grad_norm": 0.25191694498062134,
      "learning_rate": 1.5795022502190918e-06,
      "loss": 0.0104,
      "step": 2575840
    },
    {
      "epoch": 4.215451385479469,
      "grad_norm": 0.14199432730674744,
      "learning_rate": 1.5794363580055746e-06,
      "loss": 0.0127,
      "step": 2575860
    },
    {
      "epoch": 4.215484115918121,
      "grad_norm": 0.151505246758461,
      "learning_rate": 1.5793704657920575e-06,
      "loss": 0.0084,
      "step": 2575880
    },
    {
      "epoch": 4.215516846356775,
      "grad_norm": 0.1730058193206787,
      "learning_rate": 1.5793045735785403e-06,
      "loss": 0.0126,
      "step": 2575900
    },
    {
      "epoch": 4.2155495767954285,
      "grad_norm": 0.2836114764213562,
      "learning_rate": 1.5792386813650232e-06,
      "loss": 0.008,
      "step": 2575920
    },
    {
      "epoch": 4.215582307234081,
      "grad_norm": 0.2895870804786682,
      "learning_rate": 1.579172789151506e-06,
      "loss": 0.0082,
      "step": 2575940
    },
    {
      "epoch": 4.215615037672735,
      "grad_norm": 0.28653138875961304,
      "learning_rate": 1.579106896937989e-06,
      "loss": 0.0106,
      "step": 2575960
    },
    {
      "epoch": 4.215647768111388,
      "grad_norm": 0.2130700647830963,
      "learning_rate": 1.579041004724472e-06,
      "loss": 0.0097,
      "step": 2575980
    },
    {
      "epoch": 4.215680498550041,
      "grad_norm": 0.05001242458820343,
      "learning_rate": 1.5789751125109548e-06,
      "loss": 0.0047,
      "step": 2576000
    },
    {
      "epoch": 4.215713228988695,
      "grad_norm": 0.21342414617538452,
      "learning_rate": 1.5789092202974375e-06,
      "loss": 0.0091,
      "step": 2576020
    },
    {
      "epoch": 4.215745959427348,
      "grad_norm": 0.43523067235946655,
      "learning_rate": 1.5788433280839205e-06,
      "loss": 0.0122,
      "step": 2576040
    },
    {
      "epoch": 4.215778689866002,
      "grad_norm": 0.37168633937835693,
      "learning_rate": 1.5787774358704032e-06,
      "loss": 0.0148,
      "step": 2576060
    },
    {
      "epoch": 4.215811420304655,
      "grad_norm": 0.1082494705915451,
      "learning_rate": 1.5787115436568862e-06,
      "loss": 0.0109,
      "step": 2576080
    },
    {
      "epoch": 4.215844150743308,
      "grad_norm": 0.27442851662635803,
      "learning_rate": 1.578645651443369e-06,
      "loss": 0.0082,
      "step": 2576100
    },
    {
      "epoch": 4.215876881181962,
      "grad_norm": 0.2767787575721741,
      "learning_rate": 1.5785797592298519e-06,
      "loss": 0.0077,
      "step": 2576120
    },
    {
      "epoch": 4.215909611620615,
      "grad_norm": 0.10524153709411621,
      "learning_rate": 1.5785138670163346e-06,
      "loss": 0.0064,
      "step": 2576140
    },
    {
      "epoch": 4.215942342059268,
      "grad_norm": 0.138308584690094,
      "learning_rate": 1.5784479748028178e-06,
      "loss": 0.0068,
      "step": 2576160
    },
    {
      "epoch": 4.215975072497922,
      "grad_norm": 0.5128949880599976,
      "learning_rate": 1.5783820825893007e-06,
      "loss": 0.0108,
      "step": 2576180
    },
    {
      "epoch": 4.216007802936575,
      "grad_norm": 0.14674434065818787,
      "learning_rate": 1.5783161903757835e-06,
      "loss": 0.0082,
      "step": 2576200
    },
    {
      "epoch": 4.216040533375228,
      "grad_norm": 0.23039120435714722,
      "learning_rate": 1.5782502981622664e-06,
      "loss": 0.0095,
      "step": 2576220
    },
    {
      "epoch": 4.216073263813882,
      "grad_norm": 0.2337656021118164,
      "learning_rate": 1.5781844059487492e-06,
      "loss": 0.0084,
      "step": 2576240
    },
    {
      "epoch": 4.216105994252535,
      "grad_norm": 0.1535663902759552,
      "learning_rate": 1.578118513735232e-06,
      "loss": 0.0085,
      "step": 2576260
    },
    {
      "epoch": 4.216138724691188,
      "grad_norm": 0.1914062649011612,
      "learning_rate": 1.5780526215217149e-06,
      "loss": 0.0192,
      "step": 2576280
    },
    {
      "epoch": 4.2161714551298415,
      "grad_norm": 0.10105456411838531,
      "learning_rate": 1.5779867293081976e-06,
      "loss": 0.0095,
      "step": 2576300
    },
    {
      "epoch": 4.216204185568495,
      "grad_norm": 0.07859394699335098,
      "learning_rate": 1.5779208370946806e-06,
      "loss": 0.0082,
      "step": 2576320
    },
    {
      "epoch": 4.216236916007149,
      "grad_norm": 0.12159714847803116,
      "learning_rate": 1.5778549448811637e-06,
      "loss": 0.0085,
      "step": 2576340
    },
    {
      "epoch": 4.216269646445801,
      "grad_norm": 0.5711756348609924,
      "learning_rate": 1.5777890526676465e-06,
      "loss": 0.0093,
      "step": 2576360
    },
    {
      "epoch": 4.216302376884455,
      "grad_norm": 0.13296984136104584,
      "learning_rate": 1.5777231604541294e-06,
      "loss": 0.0086,
      "step": 2576380
    },
    {
      "epoch": 4.216335107323109,
      "grad_norm": 0.1956270933151245,
      "learning_rate": 1.5776572682406121e-06,
      "loss": 0.0068,
      "step": 2576400
    },
    {
      "epoch": 4.216367837761761,
      "grad_norm": 0.20167696475982666,
      "learning_rate": 1.577591376027095e-06,
      "loss": 0.0079,
      "step": 2576420
    },
    {
      "epoch": 4.216400568200415,
      "grad_norm": 0.10270152986049652,
      "learning_rate": 1.5775254838135778e-06,
      "loss": 0.0174,
      "step": 2576440
    },
    {
      "epoch": 4.2164332986390685,
      "grad_norm": 0.5116804242134094,
      "learning_rate": 1.5774595916000606e-06,
      "loss": 0.0113,
      "step": 2576460
    },
    {
      "epoch": 4.216466029077722,
      "grad_norm": 0.24171027541160583,
      "learning_rate": 1.5773936993865435e-06,
      "loss": 0.0109,
      "step": 2576480
    },
    {
      "epoch": 4.216498759516375,
      "grad_norm": 0.7561182379722595,
      "learning_rate": 1.5773278071730263e-06,
      "loss": 0.0124,
      "step": 2576500
    },
    {
      "epoch": 4.216531489955028,
      "grad_norm": 0.20025193691253662,
      "learning_rate": 1.5772619149595094e-06,
      "loss": 0.0122,
      "step": 2576520
    },
    {
      "epoch": 4.216564220393682,
      "grad_norm": 0.14352259039878845,
      "learning_rate": 1.5771960227459924e-06,
      "loss": 0.0058,
      "step": 2576540
    },
    {
      "epoch": 4.216596950832335,
      "grad_norm": 0.13034315407276154,
      "learning_rate": 1.5771301305324751e-06,
      "loss": 0.0112,
      "step": 2576560
    },
    {
      "epoch": 4.216629681270988,
      "grad_norm": 0.17990808188915253,
      "learning_rate": 1.577064238318958e-06,
      "loss": 0.0124,
      "step": 2576580
    },
    {
      "epoch": 4.216662411709642,
      "grad_norm": 0.11544795334339142,
      "learning_rate": 1.5769983461054408e-06,
      "loss": 0.01,
      "step": 2576600
    },
    {
      "epoch": 4.216695142148295,
      "grad_norm": 0.25385645031929016,
      "learning_rate": 1.5769324538919238e-06,
      "loss": 0.0065,
      "step": 2576620
    },
    {
      "epoch": 4.216727872586948,
      "grad_norm": 0.2041160613298416,
      "learning_rate": 1.5768665616784065e-06,
      "loss": 0.007,
      "step": 2576640
    },
    {
      "epoch": 4.216760603025602,
      "grad_norm": 0.1999984234571457,
      "learning_rate": 1.5768006694648895e-06,
      "loss": 0.0126,
      "step": 2576660
    },
    {
      "epoch": 4.216793333464255,
      "grad_norm": 0.045304134488105774,
      "learning_rate": 1.5767347772513724e-06,
      "loss": 0.01,
      "step": 2576680
    },
    {
      "epoch": 4.216826063902908,
      "grad_norm": 0.2691858112812042,
      "learning_rate": 1.5766688850378554e-06,
      "loss": 0.0063,
      "step": 2576700
    },
    {
      "epoch": 4.216858794341562,
      "grad_norm": 0.09406455606222153,
      "learning_rate": 1.576602992824338e-06,
      "loss": 0.0132,
      "step": 2576720
    },
    {
      "epoch": 4.216891524780215,
      "grad_norm": 0.22249700129032135,
      "learning_rate": 1.576537100610821e-06,
      "loss": 0.0078,
      "step": 2576740
    },
    {
      "epoch": 4.216924255218869,
      "grad_norm": 0.1641906201839447,
      "learning_rate": 1.5764712083973038e-06,
      "loss": 0.008,
      "step": 2576760
    },
    {
      "epoch": 4.2169569856575215,
      "grad_norm": 0.3740677237510681,
      "learning_rate": 1.5764053161837867e-06,
      "loss": 0.0088,
      "step": 2576780
    },
    {
      "epoch": 4.216989716096175,
      "grad_norm": 0.2573763430118561,
      "learning_rate": 1.5763394239702695e-06,
      "loss": 0.0067,
      "step": 2576800
    },
    {
      "epoch": 4.217022446534829,
      "grad_norm": 0.05948730185627937,
      "learning_rate": 1.5762735317567524e-06,
      "loss": 0.0076,
      "step": 2576820
    },
    {
      "epoch": 4.217055176973481,
      "grad_norm": 0.14274124801158905,
      "learning_rate": 1.5762076395432352e-06,
      "loss": 0.007,
      "step": 2576840
    },
    {
      "epoch": 4.217087907412135,
      "grad_norm": 0.5035994648933411,
      "learning_rate": 1.5761417473297183e-06,
      "loss": 0.0122,
      "step": 2576860
    },
    {
      "epoch": 4.217120637850789,
      "grad_norm": 0.18694083392620087,
      "learning_rate": 1.576075855116201e-06,
      "loss": 0.0085,
      "step": 2576880
    },
    {
      "epoch": 4.217153368289442,
      "grad_norm": 0.4524804353713989,
      "learning_rate": 1.576009962902684e-06,
      "loss": 0.0064,
      "step": 2576900
    },
    {
      "epoch": 4.217186098728095,
      "grad_norm": NaN,
      "learning_rate": 1.5759440706891668e-06,
      "loss": 0.0094,
      "step": 2576920
    },
    {
      "epoch": 4.2172188291667485,
      "grad_norm": 0.2161259949207306,
      "learning_rate": 1.5758781784756497e-06,
      "loss": 0.016,
      "step": 2576940
    },
    {
      "epoch": 4.217251559605402,
      "grad_norm": 0.28192394971847534,
      "learning_rate": 1.5758122862621325e-06,
      "loss": 0.0165,
      "step": 2576960
    },
    {
      "epoch": 4.217284290044055,
      "grad_norm": 0.1518208235502243,
      "learning_rate": 1.5757463940486154e-06,
      "loss": 0.0102,
      "step": 2576980
    },
    {
      "epoch": 4.217317020482708,
      "grad_norm": 0.2024184614419937,
      "learning_rate": 1.5756805018350981e-06,
      "loss": 0.0079,
      "step": 2577000
    },
    {
      "epoch": 4.217349750921362,
      "grad_norm": 0.23973889648914337,
      "learning_rate": 1.575614609621581e-06,
      "loss": 0.0084,
      "step": 2577020
    },
    {
      "epoch": 4.217382481360016,
      "grad_norm": 0.2640278935432434,
      "learning_rate": 1.575548717408064e-06,
      "loss": 0.0105,
      "step": 2577040
    },
    {
      "epoch": 4.217415211798668,
      "grad_norm": 0.25010931491851807,
      "learning_rate": 1.575482825194547e-06,
      "loss": 0.0088,
      "step": 2577060
    },
    {
      "epoch": 4.217447942237322,
      "grad_norm": 0.13988466560840607,
      "learning_rate": 1.5754169329810297e-06,
      "loss": 0.0115,
      "step": 2577080
    },
    {
      "epoch": 4.2174806726759755,
      "grad_norm": 0.22140640020370483,
      "learning_rate": 1.5753510407675127e-06,
      "loss": 0.0067,
      "step": 2577100
    },
    {
      "epoch": 4.217513403114628,
      "grad_norm": 0.13818076252937317,
      "learning_rate": 1.5752851485539954e-06,
      "loss": 0.0075,
      "step": 2577120
    },
    {
      "epoch": 4.217546133553282,
      "grad_norm": 0.07209181040525436,
      "learning_rate": 1.5752192563404784e-06,
      "loss": 0.0086,
      "step": 2577140
    },
    {
      "epoch": 4.217578863991935,
      "grad_norm": 0.08483992516994476,
      "learning_rate": 1.5751533641269611e-06,
      "loss": 0.0084,
      "step": 2577160
    },
    {
      "epoch": 4.217611594430589,
      "grad_norm": 0.07194613665342331,
      "learning_rate": 1.575087471913444e-06,
      "loss": 0.0072,
      "step": 2577180
    },
    {
      "epoch": 4.217644324869242,
      "grad_norm": 0.11080141365528107,
      "learning_rate": 1.5750215796999268e-06,
      "loss": 0.0091,
      "step": 2577200
    },
    {
      "epoch": 4.217677055307895,
      "grad_norm": 0.11915401369333267,
      "learning_rate": 1.57495568748641e-06,
      "loss": 0.0071,
      "step": 2577220
    },
    {
      "epoch": 4.217709785746549,
      "grad_norm": 0.11022031307220459,
      "learning_rate": 1.5748897952728927e-06,
      "loss": 0.0094,
      "step": 2577240
    },
    {
      "epoch": 4.217742516185202,
      "grad_norm": 0.5270012617111206,
      "learning_rate": 1.5748239030593757e-06,
      "loss": 0.0064,
      "step": 2577260
    },
    {
      "epoch": 4.217775246623855,
      "grad_norm": 0.30048850178718567,
      "learning_rate": 1.5747580108458584e-06,
      "loss": 0.008,
      "step": 2577280
    },
    {
      "epoch": 4.217807977062509,
      "grad_norm": 0.30634069442749023,
      "learning_rate": 1.5746921186323414e-06,
      "loss": 0.0115,
      "step": 2577300
    },
    {
      "epoch": 4.217840707501162,
      "grad_norm": 0.13018567860126495,
      "learning_rate": 1.574626226418824e-06,
      "loss": 0.0063,
      "step": 2577320
    },
    {
      "epoch": 4.217873437939815,
      "grad_norm": 0.904776930809021,
      "learning_rate": 1.574560334205307e-06,
      "loss": 0.0098,
      "step": 2577340
    },
    {
      "epoch": 4.217906168378469,
      "grad_norm": 0.1360659897327423,
      "learning_rate": 1.5744944419917898e-06,
      "loss": 0.011,
      "step": 2577360
    },
    {
      "epoch": 4.217938898817122,
      "grad_norm": 0.26472362875938416,
      "learning_rate": 1.5744285497782727e-06,
      "loss": 0.0123,
      "step": 2577380
    },
    {
      "epoch": 4.217971629255775,
      "grad_norm": 0.5494146943092346,
      "learning_rate": 1.574362657564756e-06,
      "loss": 0.0095,
      "step": 2577400
    },
    {
      "epoch": 4.2180043596944286,
      "grad_norm": 0.19589252769947052,
      "learning_rate": 1.5742967653512386e-06,
      "loss": 0.014,
      "step": 2577420
    },
    {
      "epoch": 4.218037090133082,
      "grad_norm": 0.24065425992012024,
      "learning_rate": 1.5742308731377214e-06,
      "loss": 0.0086,
      "step": 2577440
    },
    {
      "epoch": 4.218069820571735,
      "grad_norm": 0.32392019033432007,
      "learning_rate": 1.5741649809242043e-06,
      "loss": 0.0123,
      "step": 2577460
    },
    {
      "epoch": 4.2181025510103884,
      "grad_norm": 0.21587085723876953,
      "learning_rate": 1.574099088710687e-06,
      "loss": 0.0127,
      "step": 2577480
    },
    {
      "epoch": 4.218135281449042,
      "grad_norm": 0.17842893302440643,
      "learning_rate": 1.57403319649717e-06,
      "loss": 0.0112,
      "step": 2577500
    },
    {
      "epoch": 4.218168011887696,
      "grad_norm": 0.11307986825704575,
      "learning_rate": 1.5739673042836528e-06,
      "loss": 0.0069,
      "step": 2577520
    },
    {
      "epoch": 4.218200742326348,
      "grad_norm": 0.2776855528354645,
      "learning_rate": 1.5739014120701357e-06,
      "loss": 0.0078,
      "step": 2577540
    },
    {
      "epoch": 4.218233472765002,
      "grad_norm": 0.12202806025743484,
      "learning_rate": 1.5738355198566189e-06,
      "loss": 0.0084,
      "step": 2577560
    },
    {
      "epoch": 4.2182662032036555,
      "grad_norm": 0.17016807198524475,
      "learning_rate": 1.5737696276431016e-06,
      "loss": 0.0101,
      "step": 2577580
    },
    {
      "epoch": 4.218298933642309,
      "grad_norm": 0.12221553921699524,
      "learning_rate": 1.5737037354295846e-06,
      "loss": 0.0093,
      "step": 2577600
    },
    {
      "epoch": 4.218331664080962,
      "grad_norm": 0.19025185704231262,
      "learning_rate": 1.5736378432160673e-06,
      "loss": 0.0132,
      "step": 2577620
    },
    {
      "epoch": 4.218364394519615,
      "grad_norm": 0.06807388365268707,
      "learning_rate": 1.5735719510025503e-06,
      "loss": 0.0138,
      "step": 2577640
    },
    {
      "epoch": 4.218397124958269,
      "grad_norm": 0.10239206254482269,
      "learning_rate": 1.573506058789033e-06,
      "loss": 0.0069,
      "step": 2577660
    },
    {
      "epoch": 4.218429855396922,
      "grad_norm": 0.19528856873512268,
      "learning_rate": 1.5734401665755157e-06,
      "loss": 0.0078,
      "step": 2577680
    },
    {
      "epoch": 4.218462585835575,
      "grad_norm": 0.19693182408809662,
      "learning_rate": 1.5733742743619987e-06,
      "loss": 0.011,
      "step": 2577700
    },
    {
      "epoch": 4.218495316274229,
      "grad_norm": 0.4767419397830963,
      "learning_rate": 1.5733083821484814e-06,
      "loss": 0.0084,
      "step": 2577720
    },
    {
      "epoch": 4.218528046712882,
      "grad_norm": 0.19858001172542572,
      "learning_rate": 1.5732424899349646e-06,
      "loss": 0.0069,
      "step": 2577740
    },
    {
      "epoch": 4.218560777151535,
      "grad_norm": 0.6015466451644897,
      "learning_rate": 1.5731765977214476e-06,
      "loss": 0.0087,
      "step": 2577760
    },
    {
      "epoch": 4.218593507590189,
      "grad_norm": 0.1183868944644928,
      "learning_rate": 1.5731107055079303e-06,
      "loss": 0.0099,
      "step": 2577780
    },
    {
      "epoch": 4.218626238028842,
      "grad_norm": 0.05145832896232605,
      "learning_rate": 1.5730448132944132e-06,
      "loss": 0.0072,
      "step": 2577800
    },
    {
      "epoch": 4.218658968467495,
      "grad_norm": 0.09979882091283798,
      "learning_rate": 1.572978921080896e-06,
      "loss": 0.0059,
      "step": 2577820
    },
    {
      "epoch": 4.218691698906149,
      "grad_norm": 0.3586668074131012,
      "learning_rate": 1.572913028867379e-06,
      "loss": 0.0103,
      "step": 2577840
    },
    {
      "epoch": 4.218724429344802,
      "grad_norm": 0.22964349389076233,
      "learning_rate": 1.5728471366538617e-06,
      "loss": 0.0086,
      "step": 2577860
    },
    {
      "epoch": 4.218757159783455,
      "grad_norm": 0.567499041557312,
      "learning_rate": 1.5727812444403446e-06,
      "loss": 0.0128,
      "step": 2577880
    },
    {
      "epoch": 4.218789890222109,
      "grad_norm": 0.13517409563064575,
      "learning_rate": 1.5727153522268274e-06,
      "loss": 0.0103,
      "step": 2577900
    },
    {
      "epoch": 4.218822620660762,
      "grad_norm": 0.27336394786834717,
      "learning_rate": 1.5726494600133105e-06,
      "loss": 0.0117,
      "step": 2577920
    },
    {
      "epoch": 4.218855351099416,
      "grad_norm": 0.17493753135204315,
      "learning_rate": 1.5725835677997933e-06,
      "loss": 0.0112,
      "step": 2577940
    },
    {
      "epoch": 4.2188880815380685,
      "grad_norm": 0.14136134088039398,
      "learning_rate": 1.5725176755862762e-06,
      "loss": 0.0094,
      "step": 2577960
    },
    {
      "epoch": 4.218920811976722,
      "grad_norm": 0.38512253761291504,
      "learning_rate": 1.572451783372759e-06,
      "loss": 0.0067,
      "step": 2577980
    },
    {
      "epoch": 4.218953542415376,
      "grad_norm": 0.13019335269927979,
      "learning_rate": 1.572385891159242e-06,
      "loss": 0.0053,
      "step": 2578000
    },
    {
      "epoch": 4.218986272854028,
      "grad_norm": 0.25436294078826904,
      "learning_rate": 1.5723199989457247e-06,
      "loss": 0.0112,
      "step": 2578020
    },
    {
      "epoch": 4.219019003292682,
      "grad_norm": 0.41192731261253357,
      "learning_rate": 1.5722541067322076e-06,
      "loss": 0.0085,
      "step": 2578040
    },
    {
      "epoch": 4.219051733731336,
      "grad_norm": 0.258306622505188,
      "learning_rate": 1.5721882145186903e-06,
      "loss": 0.0074,
      "step": 2578060
    },
    {
      "epoch": 4.219084464169989,
      "grad_norm": 0.45043423771858215,
      "learning_rate": 1.5721223223051733e-06,
      "loss": 0.0119,
      "step": 2578080
    },
    {
      "epoch": 4.219117194608642,
      "grad_norm": 0.05776823312044144,
      "learning_rate": 1.5720564300916562e-06,
      "loss": 0.0085,
      "step": 2578100
    },
    {
      "epoch": 4.2191499250472955,
      "grad_norm": 0.1773882508277893,
      "learning_rate": 1.5719905378781392e-06,
      "loss": 0.0081,
      "step": 2578120
    },
    {
      "epoch": 4.219182655485949,
      "grad_norm": 0.2417026311159134,
      "learning_rate": 1.571924645664622e-06,
      "loss": 0.0087,
      "step": 2578140
    },
    {
      "epoch": 4.219215385924602,
      "grad_norm": 0.07477176934480667,
      "learning_rate": 1.5718587534511049e-06,
      "loss": 0.0076,
      "step": 2578160
    },
    {
      "epoch": 4.219248116363255,
      "grad_norm": 0.42797133326530457,
      "learning_rate": 1.5717928612375876e-06,
      "loss": 0.0083,
      "step": 2578180
    },
    {
      "epoch": 4.219280846801909,
      "grad_norm": 0.27120277285575867,
      "learning_rate": 1.5717269690240706e-06,
      "loss": 0.0095,
      "step": 2578200
    },
    {
      "epoch": 4.2193135772405626,
      "grad_norm": 0.6981369853019714,
      "learning_rate": 1.5716610768105533e-06,
      "loss": 0.0091,
      "step": 2578220
    },
    {
      "epoch": 4.219346307679215,
      "grad_norm": 0.30000901222229004,
      "learning_rate": 1.5715951845970363e-06,
      "loss": 0.0126,
      "step": 2578240
    },
    {
      "epoch": 4.219379038117869,
      "grad_norm": 0.26891085505485535,
      "learning_rate": 1.571529292383519e-06,
      "loss": 0.0086,
      "step": 2578260
    },
    {
      "epoch": 4.2194117685565224,
      "grad_norm": 0.19765020906925201,
      "learning_rate": 1.5714634001700022e-06,
      "loss": 0.0104,
      "step": 2578280
    },
    {
      "epoch": 4.219444498995175,
      "grad_norm": 0.1422143429517746,
      "learning_rate": 1.571397507956485e-06,
      "loss": 0.0085,
      "step": 2578300
    },
    {
      "epoch": 4.219477229433829,
      "grad_norm": 0.07963521778583527,
      "learning_rate": 1.5713316157429679e-06,
      "loss": 0.0053,
      "step": 2578320
    },
    {
      "epoch": 4.219509959872482,
      "grad_norm": 0.21122203767299652,
      "learning_rate": 1.5712657235294506e-06,
      "loss": 0.0061,
      "step": 2578340
    },
    {
      "epoch": 4.219542690311136,
      "grad_norm": 0.11919108778238297,
      "learning_rate": 1.5711998313159336e-06,
      "loss": 0.0089,
      "step": 2578360
    },
    {
      "epoch": 4.219575420749789,
      "grad_norm": 0.14185774326324463,
      "learning_rate": 1.5711339391024163e-06,
      "loss": 0.0088,
      "step": 2578380
    },
    {
      "epoch": 4.219608151188442,
      "grad_norm": 0.526156485080719,
      "learning_rate": 1.5710680468888992e-06,
      "loss": 0.0068,
      "step": 2578400
    },
    {
      "epoch": 4.219640881627096,
      "grad_norm": 0.13298121094703674,
      "learning_rate": 1.571002154675382e-06,
      "loss": 0.0126,
      "step": 2578420
    },
    {
      "epoch": 4.2196736120657485,
      "grad_norm": 0.13066036999225616,
      "learning_rate": 1.5709362624618652e-06,
      "loss": 0.0092,
      "step": 2578440
    },
    {
      "epoch": 4.219706342504402,
      "grad_norm": 0.23749259114265442,
      "learning_rate": 1.5708703702483479e-06,
      "loss": 0.0113,
      "step": 2578460
    },
    {
      "epoch": 4.219739072943056,
      "grad_norm": 0.5516652464866638,
      "learning_rate": 1.5708044780348308e-06,
      "loss": 0.0144,
      "step": 2578480
    },
    {
      "epoch": 4.219771803381709,
      "grad_norm": 0.18217143416404724,
      "learning_rate": 1.5707385858213136e-06,
      "loss": 0.0072,
      "step": 2578500
    },
    {
      "epoch": 4.219804533820362,
      "grad_norm": 0.06316136568784714,
      "learning_rate": 1.5706726936077965e-06,
      "loss": 0.0145,
      "step": 2578520
    },
    {
      "epoch": 4.219837264259016,
      "grad_norm": 0.2444336861371994,
      "learning_rate": 1.5706068013942793e-06,
      "loss": 0.0115,
      "step": 2578540
    },
    {
      "epoch": 4.219869994697669,
      "grad_norm": 0.11584407836198807,
      "learning_rate": 1.5705409091807622e-06,
      "loss": 0.0141,
      "step": 2578560
    },
    {
      "epoch": 4.219902725136322,
      "grad_norm": 0.45062318444252014,
      "learning_rate": 1.570475016967245e-06,
      "loss": 0.0181,
      "step": 2578580
    },
    {
      "epoch": 4.2199354555749755,
      "grad_norm": 0.48625314235687256,
      "learning_rate": 1.570409124753728e-06,
      "loss": 0.0059,
      "step": 2578600
    },
    {
      "epoch": 4.219968186013629,
      "grad_norm": 0.11954876780509949,
      "learning_rate": 1.570343232540211e-06,
      "loss": 0.0093,
      "step": 2578620
    },
    {
      "epoch": 4.220000916452283,
      "grad_norm": 0.41071033477783203,
      "learning_rate": 1.5702773403266938e-06,
      "loss": 0.01,
      "step": 2578640
    },
    {
      "epoch": 4.220033646890935,
      "grad_norm": 0.25849077105522156,
      "learning_rate": 1.5702114481131766e-06,
      "loss": 0.0081,
      "step": 2578660
    },
    {
      "epoch": 4.220066377329589,
      "grad_norm": 0.19164910912513733,
      "learning_rate": 1.5701455558996595e-06,
      "loss": 0.0096,
      "step": 2578680
    },
    {
      "epoch": 4.220099107768243,
      "grad_norm": 0.24288111925125122,
      "learning_rate": 1.5700796636861423e-06,
      "loss": 0.009,
      "step": 2578700
    },
    {
      "epoch": 4.220131838206895,
      "grad_norm": 0.27472400665283203,
      "learning_rate": 1.5700137714726252e-06,
      "loss": 0.0091,
      "step": 2578720
    },
    {
      "epoch": 4.220164568645549,
      "grad_norm": 0.1528834104537964,
      "learning_rate": 1.569947879259108e-06,
      "loss": 0.0092,
      "step": 2578740
    },
    {
      "epoch": 4.2201972990842025,
      "grad_norm": 0.12808433175086975,
      "learning_rate": 1.5698819870455909e-06,
      "loss": 0.011,
      "step": 2578760
    },
    {
      "epoch": 4.220230029522856,
      "grad_norm": 0.14826051890850067,
      "learning_rate": 1.5698160948320736e-06,
      "loss": 0.0079,
      "step": 2578780
    },
    {
      "epoch": 4.220262759961509,
      "grad_norm": 0.544135570526123,
      "learning_rate": 1.5697502026185568e-06,
      "loss": 0.0114,
      "step": 2578800
    },
    {
      "epoch": 4.220295490400162,
      "grad_norm": 0.507032573223114,
      "learning_rate": 1.5696843104050397e-06,
      "loss": 0.0093,
      "step": 2578820
    },
    {
      "epoch": 4.220328220838816,
      "grad_norm": 0.12911392748355865,
      "learning_rate": 1.5696184181915225e-06,
      "loss": 0.0078,
      "step": 2578840
    },
    {
      "epoch": 4.220360951277469,
      "grad_norm": 0.11973593384027481,
      "learning_rate": 1.5695525259780054e-06,
      "loss": 0.0096,
      "step": 2578860
    },
    {
      "epoch": 4.220393681716122,
      "grad_norm": 0.2670619487762451,
      "learning_rate": 1.5694866337644882e-06,
      "loss": 0.0069,
      "step": 2578880
    },
    {
      "epoch": 4.220426412154776,
      "grad_norm": 0.12115258723497391,
      "learning_rate": 1.569420741550971e-06,
      "loss": 0.0112,
      "step": 2578900
    },
    {
      "epoch": 4.220459142593429,
      "grad_norm": 0.2130388766527176,
      "learning_rate": 1.5693548493374539e-06,
      "loss": 0.0078,
      "step": 2578920
    },
    {
      "epoch": 4.220491873032082,
      "grad_norm": 0.10572496801614761,
      "learning_rate": 1.5692889571239366e-06,
      "loss": 0.0052,
      "step": 2578940
    },
    {
      "epoch": 4.220524603470736,
      "grad_norm": 0.10533148050308228,
      "learning_rate": 1.5692230649104196e-06,
      "loss": 0.0077,
      "step": 2578960
    },
    {
      "epoch": 4.220557333909389,
      "grad_norm": 0.38049888610839844,
      "learning_rate": 1.5691571726969027e-06,
      "loss": 0.0106,
      "step": 2578980
    },
    {
      "epoch": 4.220590064348042,
      "grad_norm": 0.25893282890319824,
      "learning_rate": 1.5690912804833855e-06,
      "loss": 0.0066,
      "step": 2579000
    },
    {
      "epoch": 4.220622794786696,
      "grad_norm": 0.31326058506965637,
      "learning_rate": 1.5690253882698684e-06,
      "loss": 0.0095,
      "step": 2579020
    },
    {
      "epoch": 4.220655525225349,
      "grad_norm": 0.422268271446228,
      "learning_rate": 1.5689594960563512e-06,
      "loss": 0.0088,
      "step": 2579040
    },
    {
      "epoch": 4.220688255664003,
      "grad_norm": 0.053243111819028854,
      "learning_rate": 1.5688936038428341e-06,
      "loss": 0.006,
      "step": 2579060
    },
    {
      "epoch": 4.220720986102656,
      "grad_norm": 0.13598166406154633,
      "learning_rate": 1.5688277116293168e-06,
      "loss": 0.0076,
      "step": 2579080
    },
    {
      "epoch": 4.220753716541309,
      "grad_norm": 0.25982666015625,
      "learning_rate": 1.5687618194157996e-06,
      "loss": 0.008,
      "step": 2579100
    },
    {
      "epoch": 4.220786446979963,
      "grad_norm": 0.578121542930603,
      "learning_rate": 1.5686959272022825e-06,
      "loss": 0.0081,
      "step": 2579120
    },
    {
      "epoch": 4.2208191774186155,
      "grad_norm": 0.13610127568244934,
      "learning_rate": 1.5686300349887653e-06,
      "loss": 0.0064,
      "step": 2579140
    },
    {
      "epoch": 4.220851907857269,
      "grad_norm": 0.27538633346557617,
      "learning_rate": 1.5685641427752484e-06,
      "loss": 0.0083,
      "step": 2579160
    },
    {
      "epoch": 4.220884638295923,
      "grad_norm": 0.12863300740718842,
      "learning_rate": 1.5684982505617314e-06,
      "loss": 0.0095,
      "step": 2579180
    },
    {
      "epoch": 4.220917368734575,
      "grad_norm": 0.32169079780578613,
      "learning_rate": 1.5684323583482141e-06,
      "loss": 0.0084,
      "step": 2579200
    },
    {
      "epoch": 4.220950099173229,
      "grad_norm": 0.19759239256381989,
      "learning_rate": 1.568366466134697e-06,
      "loss": 0.0137,
      "step": 2579220
    },
    {
      "epoch": 4.2209828296118825,
      "grad_norm": 0.35946378111839294,
      "learning_rate": 1.5683005739211798e-06,
      "loss": 0.011,
      "step": 2579240
    },
    {
      "epoch": 4.221015560050536,
      "grad_norm": 0.08521901071071625,
      "learning_rate": 1.5682346817076628e-06,
      "loss": 0.0078,
      "step": 2579260
    },
    {
      "epoch": 4.221048290489189,
      "grad_norm": 0.12582558393478394,
      "learning_rate": 1.5681687894941455e-06,
      "loss": 0.0122,
      "step": 2579280
    },
    {
      "epoch": 4.221081020927842,
      "grad_norm": 0.27356433868408203,
      "learning_rate": 1.5681028972806285e-06,
      "loss": 0.0122,
      "step": 2579300
    },
    {
      "epoch": 4.221113751366496,
      "grad_norm": 0.10521169751882553,
      "learning_rate": 1.5680370050671114e-06,
      "loss": 0.0065,
      "step": 2579320
    },
    {
      "epoch": 4.221146481805149,
      "grad_norm": 0.3317280113697052,
      "learning_rate": 1.5679711128535944e-06,
      "loss": 0.009,
      "step": 2579340
    },
    {
      "epoch": 4.221179212243802,
      "grad_norm": 0.5578175187110901,
      "learning_rate": 1.5679052206400771e-06,
      "loss": 0.0109,
      "step": 2579360
    },
    {
      "epoch": 4.221211942682456,
      "grad_norm": 0.12555910646915436,
      "learning_rate": 1.56783932842656e-06,
      "loss": 0.0078,
      "step": 2579380
    },
    {
      "epoch": 4.2212446731211095,
      "grad_norm": 0.19989243149757385,
      "learning_rate": 1.5677734362130428e-06,
      "loss": 0.0114,
      "step": 2579400
    },
    {
      "epoch": 4.221277403559762,
      "grad_norm": 0.279528945684433,
      "learning_rate": 1.5677075439995258e-06,
      "loss": 0.006,
      "step": 2579420
    },
    {
      "epoch": 4.221310133998416,
      "grad_norm": 0.13320475816726685,
      "learning_rate": 1.5676416517860085e-06,
      "loss": 0.0094,
      "step": 2579440
    },
    {
      "epoch": 4.221342864437069,
      "grad_norm": 0.16034744679927826,
      "learning_rate": 1.5675757595724914e-06,
      "loss": 0.0125,
      "step": 2579460
    },
    {
      "epoch": 4.221375594875722,
      "grad_norm": 0.5094723105430603,
      "learning_rate": 1.5675098673589742e-06,
      "loss": 0.0096,
      "step": 2579480
    },
    {
      "epoch": 4.221408325314376,
      "grad_norm": 0.21071042120456696,
      "learning_rate": 1.5674439751454573e-06,
      "loss": 0.0073,
      "step": 2579500
    },
    {
      "epoch": 4.221441055753029,
      "grad_norm": 0.07370026409626007,
      "learning_rate": 1.56737808293194e-06,
      "loss": 0.007,
      "step": 2579520
    },
    {
      "epoch": 4.221473786191683,
      "grad_norm": 0.19616755843162537,
      "learning_rate": 1.567312190718423e-06,
      "loss": 0.0102,
      "step": 2579540
    },
    {
      "epoch": 4.221506516630336,
      "grad_norm": 0.3969120979309082,
      "learning_rate": 1.5672462985049058e-06,
      "loss": 0.008,
      "step": 2579560
    },
    {
      "epoch": 4.221539247068989,
      "grad_norm": 0.1844053715467453,
      "learning_rate": 1.5671804062913887e-06,
      "loss": 0.0092,
      "step": 2579580
    },
    {
      "epoch": 4.221571977507643,
      "grad_norm": 0.3167138695716858,
      "learning_rate": 1.5671145140778715e-06,
      "loss": 0.0084,
      "step": 2579600
    },
    {
      "epoch": 4.2216047079462955,
      "grad_norm": 0.08252836763858795,
      "learning_rate": 1.5670486218643544e-06,
      "loss": 0.0079,
      "step": 2579620
    },
    {
      "epoch": 4.221637438384949,
      "grad_norm": 0.11321593821048737,
      "learning_rate": 1.5669827296508372e-06,
      "loss": 0.0108,
      "step": 2579640
    },
    {
      "epoch": 4.221670168823603,
      "grad_norm": 0.09665147960186005,
      "learning_rate": 1.5669168374373201e-06,
      "loss": 0.0118,
      "step": 2579660
    },
    {
      "epoch": 4.221702899262256,
      "grad_norm": 0.10362239181995392,
      "learning_rate": 1.566850945223803e-06,
      "loss": 0.0071,
      "step": 2579680
    },
    {
      "epoch": 4.221735629700909,
      "grad_norm": 0.17900149524211884,
      "learning_rate": 1.566785053010286e-06,
      "loss": 0.0103,
      "step": 2579700
    },
    {
      "epoch": 4.221768360139563,
      "grad_norm": 0.7765052914619446,
      "learning_rate": 1.5667191607967688e-06,
      "loss": 0.0097,
      "step": 2579720
    },
    {
      "epoch": 4.221801090578216,
      "grad_norm": 0.2366192489862442,
      "learning_rate": 1.5666532685832517e-06,
      "loss": 0.0148,
      "step": 2579740
    },
    {
      "epoch": 4.221833821016869,
      "grad_norm": 0.38435786962509155,
      "learning_rate": 1.5665873763697344e-06,
      "loss": 0.006,
      "step": 2579760
    },
    {
      "epoch": 4.2218665514555225,
      "grad_norm": 0.28046271204948425,
      "learning_rate": 1.5665214841562174e-06,
      "loss": 0.0111,
      "step": 2579780
    },
    {
      "epoch": 4.221899281894176,
      "grad_norm": 0.2875286936759949,
      "learning_rate": 1.5664555919427001e-06,
      "loss": 0.0104,
      "step": 2579800
    },
    {
      "epoch": 4.22193201233283,
      "grad_norm": 0.07690829038619995,
      "learning_rate": 1.566389699729183e-06,
      "loss": 0.0125,
      "step": 2579820
    },
    {
      "epoch": 4.221964742771482,
      "grad_norm": 0.49774131178855896,
      "learning_rate": 1.5663238075156658e-06,
      "loss": 0.0099,
      "step": 2579840
    },
    {
      "epoch": 4.221997473210136,
      "grad_norm": 0.13313919305801392,
      "learning_rate": 1.566257915302149e-06,
      "loss": 0.0128,
      "step": 2579860
    },
    {
      "epoch": 4.22203020364879,
      "grad_norm": 0.3010176420211792,
      "learning_rate": 1.5661920230886317e-06,
      "loss": 0.0123,
      "step": 2579880
    },
    {
      "epoch": 4.222062934087442,
      "grad_norm": 0.27945971488952637,
      "learning_rate": 1.5661261308751147e-06,
      "loss": 0.0116,
      "step": 2579900
    },
    {
      "epoch": 4.222095664526096,
      "grad_norm": 0.26287421584129333,
      "learning_rate": 1.5660602386615974e-06,
      "loss": 0.0091,
      "step": 2579920
    },
    {
      "epoch": 4.2221283949647495,
      "grad_norm": 0.2636200785636902,
      "learning_rate": 1.5659943464480804e-06,
      "loss": 0.0117,
      "step": 2579940
    },
    {
      "epoch": 4.222161125403403,
      "grad_norm": 0.1962737888097763,
      "learning_rate": 1.5659284542345631e-06,
      "loss": 0.0113,
      "step": 2579960
    },
    {
      "epoch": 4.222193855842056,
      "grad_norm": 0.1762128472328186,
      "learning_rate": 1.565862562021046e-06,
      "loss": 0.0082,
      "step": 2579980
    },
    {
      "epoch": 4.222226586280709,
      "grad_norm": 0.23129162192344666,
      "learning_rate": 1.5657966698075288e-06,
      "loss": 0.0103,
      "step": 2580000
    },
    {
      "epoch": 4.222259316719363,
      "grad_norm": 0.40225350856781006,
      "learning_rate": 1.5657307775940118e-06,
      "loss": 0.0099,
      "step": 2580020
    },
    {
      "epoch": 4.222292047158016,
      "grad_norm": 0.12904420495033264,
      "learning_rate": 1.565664885380495e-06,
      "loss": 0.0067,
      "step": 2580040
    },
    {
      "epoch": 4.222324777596669,
      "grad_norm": 0.43699342012405396,
      "learning_rate": 1.5655989931669777e-06,
      "loss": 0.0077,
      "step": 2580060
    },
    {
      "epoch": 4.222357508035323,
      "grad_norm": 0.27567118406295776,
      "learning_rate": 1.5655331009534604e-06,
      "loss": 0.0097,
      "step": 2580080
    },
    {
      "epoch": 4.222390238473976,
      "grad_norm": 0.2879788875579834,
      "learning_rate": 1.5654672087399434e-06,
      "loss": 0.0114,
      "step": 2580100
    },
    {
      "epoch": 4.222422968912629,
      "grad_norm": 0.12084678560495377,
      "learning_rate": 1.565401316526426e-06,
      "loss": 0.0075,
      "step": 2580120
    },
    {
      "epoch": 4.222455699351283,
      "grad_norm": 0.16935016214847565,
      "learning_rate": 1.565335424312909e-06,
      "loss": 0.0112,
      "step": 2580140
    },
    {
      "epoch": 4.222488429789936,
      "grad_norm": 0.08982691913843155,
      "learning_rate": 1.5652695320993918e-06,
      "loss": 0.0147,
      "step": 2580160
    },
    {
      "epoch": 4.222521160228589,
      "grad_norm": 0.15649165213108063,
      "learning_rate": 1.5652036398858747e-06,
      "loss": 0.0117,
      "step": 2580180
    },
    {
      "epoch": 4.222553890667243,
      "grad_norm": 0.24075038731098175,
      "learning_rate": 1.565137747672358e-06,
      "loss": 0.0156,
      "step": 2580200
    },
    {
      "epoch": 4.222586621105896,
      "grad_norm": 0.3569578230381012,
      "learning_rate": 1.5650718554588406e-06,
      "loss": 0.0136,
      "step": 2580220
    },
    {
      "epoch": 4.22261935154455,
      "grad_norm": 0.1403852105140686,
      "learning_rate": 1.5650059632453236e-06,
      "loss": 0.0094,
      "step": 2580240
    },
    {
      "epoch": 4.2226520819832025,
      "grad_norm": 0.2583155333995819,
      "learning_rate": 1.5649400710318063e-06,
      "loss": 0.0101,
      "step": 2580260
    },
    {
      "epoch": 4.222684812421856,
      "grad_norm": 0.27730703353881836,
      "learning_rate": 1.5648741788182893e-06,
      "loss": 0.0093,
      "step": 2580280
    },
    {
      "epoch": 4.22271754286051,
      "grad_norm": 0.1310190111398697,
      "learning_rate": 1.564808286604772e-06,
      "loss": 0.0072,
      "step": 2580300
    },
    {
      "epoch": 4.222750273299162,
      "grad_norm": 0.2968846559524536,
      "learning_rate": 1.5647423943912548e-06,
      "loss": 0.0091,
      "step": 2580320
    },
    {
      "epoch": 4.222783003737816,
      "grad_norm": 0.5247693061828613,
      "learning_rate": 1.5646765021777377e-06,
      "loss": 0.0081,
      "step": 2580340
    },
    {
      "epoch": 4.22281573417647,
      "grad_norm": 0.172962486743927,
      "learning_rate": 1.5646106099642204e-06,
      "loss": 0.0054,
      "step": 2580360
    },
    {
      "epoch": 4.222848464615123,
      "grad_norm": 0.21976351737976074,
      "learning_rate": 1.5645447177507036e-06,
      "loss": 0.0101,
      "step": 2580380
    },
    {
      "epoch": 4.222881195053776,
      "grad_norm": 0.18396680057048798,
      "learning_rate": 1.5644788255371866e-06,
      "loss": 0.0102,
      "step": 2580400
    },
    {
      "epoch": 4.2229139254924295,
      "grad_norm": 0.194691002368927,
      "learning_rate": 1.5644129333236693e-06,
      "loss": 0.0117,
      "step": 2580420
    },
    {
      "epoch": 4.222946655931083,
      "grad_norm": 0.3363838195800781,
      "learning_rate": 1.5643470411101523e-06,
      "loss": 0.0066,
      "step": 2580440
    },
    {
      "epoch": 4.222979386369736,
      "grad_norm": 0.33526700735092163,
      "learning_rate": 1.564281148896635e-06,
      "loss": 0.0082,
      "step": 2580460
    },
    {
      "epoch": 4.223012116808389,
      "grad_norm": 0.5636516809463501,
      "learning_rate": 1.564215256683118e-06,
      "loss": 0.0069,
      "step": 2580480
    },
    {
      "epoch": 4.223044847247043,
      "grad_norm": 0.44963937997817993,
      "learning_rate": 1.5641493644696007e-06,
      "loss": 0.0073,
      "step": 2580500
    },
    {
      "epoch": 4.223077577685697,
      "grad_norm": 0.38555261492729187,
      "learning_rate": 1.5640834722560836e-06,
      "loss": 0.0116,
      "step": 2580520
    },
    {
      "epoch": 4.223110308124349,
      "grad_norm": 0.35498467087745667,
      "learning_rate": 1.5640175800425664e-06,
      "loss": 0.0123,
      "step": 2580540
    },
    {
      "epoch": 4.223143038563003,
      "grad_norm": 0.49224966764450073,
      "learning_rate": 1.5639516878290495e-06,
      "loss": 0.0087,
      "step": 2580560
    },
    {
      "epoch": 4.2231757690016565,
      "grad_norm": 0.27973976731300354,
      "learning_rate": 1.5638857956155323e-06,
      "loss": 0.0081,
      "step": 2580580
    },
    {
      "epoch": 4.223208499440309,
      "grad_norm": 0.43730926513671875,
      "learning_rate": 1.5638199034020152e-06,
      "loss": 0.0079,
      "step": 2580600
    },
    {
      "epoch": 4.223241229878963,
      "grad_norm": 1.0863829851150513,
      "learning_rate": 1.563754011188498e-06,
      "loss": 0.0105,
      "step": 2580620
    },
    {
      "epoch": 4.223273960317616,
      "grad_norm": 0.13574658334255219,
      "learning_rate": 1.563688118974981e-06,
      "loss": 0.0116,
      "step": 2580640
    },
    {
      "epoch": 4.223306690756269,
      "grad_norm": 0.6416475772857666,
      "learning_rate": 1.5636222267614637e-06,
      "loss": 0.0113,
      "step": 2580660
    },
    {
      "epoch": 4.223339421194923,
      "grad_norm": 0.2747911512851715,
      "learning_rate": 1.5635563345479466e-06,
      "loss": 0.0072,
      "step": 2580680
    },
    {
      "epoch": 4.223372151633576,
      "grad_norm": 0.17769016325473785,
      "learning_rate": 1.5634904423344294e-06,
      "loss": 0.0075,
      "step": 2580700
    },
    {
      "epoch": 4.22340488207223,
      "grad_norm": 0.11199413239955902,
      "learning_rate": 1.5634245501209123e-06,
      "loss": 0.0091,
      "step": 2580720
    },
    {
      "epoch": 4.223437612510883,
      "grad_norm": 0.5161463022232056,
      "learning_rate": 1.5633586579073953e-06,
      "loss": 0.0112,
      "step": 2580740
    },
    {
      "epoch": 4.223470342949536,
      "grad_norm": 0.2357318103313446,
      "learning_rate": 1.5632927656938782e-06,
      "loss": 0.0109,
      "step": 2580760
    },
    {
      "epoch": 4.22350307338819,
      "grad_norm": 0.624849796295166,
      "learning_rate": 1.563226873480361e-06,
      "loss": 0.0073,
      "step": 2580780
    },
    {
      "epoch": 4.2235358038268425,
      "grad_norm": 0.3545793890953064,
      "learning_rate": 1.563160981266844e-06,
      "loss": 0.0132,
      "step": 2580800
    },
    {
      "epoch": 4.223568534265496,
      "grad_norm": 0.23182177543640137,
      "learning_rate": 1.5630950890533266e-06,
      "loss": 0.0087,
      "step": 2580820
    },
    {
      "epoch": 4.22360126470415,
      "grad_norm": 0.3653641939163208,
      "learning_rate": 1.5630291968398096e-06,
      "loss": 0.0092,
      "step": 2580840
    },
    {
      "epoch": 4.223633995142803,
      "grad_norm": 0.13343842327594757,
      "learning_rate": 1.5629633046262923e-06,
      "loss": 0.0083,
      "step": 2580860
    },
    {
      "epoch": 4.223666725581456,
      "grad_norm": 0.3162135183811188,
      "learning_rate": 1.5628974124127753e-06,
      "loss": 0.0098,
      "step": 2580880
    },
    {
      "epoch": 4.2236994560201095,
      "grad_norm": 0.1957070231437683,
      "learning_rate": 1.562831520199258e-06,
      "loss": 0.008,
      "step": 2580900
    },
    {
      "epoch": 4.223732186458763,
      "grad_norm": 0.24851472675800323,
      "learning_rate": 1.5627656279857412e-06,
      "loss": 0.0098,
      "step": 2580920
    },
    {
      "epoch": 4.223764916897416,
      "grad_norm": 0.3971792161464691,
      "learning_rate": 1.562699735772224e-06,
      "loss": 0.0096,
      "step": 2580940
    },
    {
      "epoch": 4.223797647336069,
      "grad_norm": 0.2942838966846466,
      "learning_rate": 1.5626338435587069e-06,
      "loss": 0.0071,
      "step": 2580960
    },
    {
      "epoch": 4.223830377774723,
      "grad_norm": 0.2277361899614334,
      "learning_rate": 1.5625679513451896e-06,
      "loss": 0.0067,
      "step": 2580980
    },
    {
      "epoch": 4.223863108213377,
      "grad_norm": 0.17866134643554688,
      "learning_rate": 1.5625020591316726e-06,
      "loss": 0.008,
      "step": 2581000
    },
    {
      "epoch": 4.223895838652029,
      "grad_norm": 0.3792024552822113,
      "learning_rate": 1.5624361669181553e-06,
      "loss": 0.0079,
      "step": 2581020
    },
    {
      "epoch": 4.223928569090683,
      "grad_norm": 0.48170989751815796,
      "learning_rate": 1.5623702747046383e-06,
      "loss": 0.0098,
      "step": 2581040
    },
    {
      "epoch": 4.2239612995293365,
      "grad_norm": 0.4258577227592468,
      "learning_rate": 1.562304382491121e-06,
      "loss": 0.0101,
      "step": 2581060
    },
    {
      "epoch": 4.223994029967989,
      "grad_norm": 0.5175296068191528,
      "learning_rate": 1.5622384902776042e-06,
      "loss": 0.0088,
      "step": 2581080
    },
    {
      "epoch": 4.224026760406643,
      "grad_norm": 0.13231857120990753,
      "learning_rate": 1.562172598064087e-06,
      "loss": 0.0108,
      "step": 2581100
    },
    {
      "epoch": 4.224059490845296,
      "grad_norm": 0.23601458966732025,
      "learning_rate": 1.5621067058505699e-06,
      "loss": 0.0088,
      "step": 2581120
    },
    {
      "epoch": 4.22409222128395,
      "grad_norm": 0.2617882788181305,
      "learning_rate": 1.5620408136370526e-06,
      "loss": 0.012,
      "step": 2581140
    },
    {
      "epoch": 4.224124951722603,
      "grad_norm": 0.2400309443473816,
      "learning_rate": 1.5619749214235355e-06,
      "loss": 0.0063,
      "step": 2581160
    },
    {
      "epoch": 4.224157682161256,
      "grad_norm": 0.20385921001434326,
      "learning_rate": 1.5619090292100183e-06,
      "loss": 0.0076,
      "step": 2581180
    },
    {
      "epoch": 4.22419041259991,
      "grad_norm": 0.17589350044727325,
      "learning_rate": 1.5618431369965012e-06,
      "loss": 0.0065,
      "step": 2581200
    },
    {
      "epoch": 4.224223143038563,
      "grad_norm": 0.022797590121626854,
      "learning_rate": 1.561777244782984e-06,
      "loss": 0.0097,
      "step": 2581220
    },
    {
      "epoch": 4.224255873477216,
      "grad_norm": 0.3983113467693329,
      "learning_rate": 1.561711352569467e-06,
      "loss": 0.0065,
      "step": 2581240
    },
    {
      "epoch": 4.22428860391587,
      "grad_norm": 0.19967268407344818,
      "learning_rate": 1.56164546035595e-06,
      "loss": 0.0093,
      "step": 2581260
    },
    {
      "epoch": 4.224321334354523,
      "grad_norm": 0.45594510436058044,
      "learning_rate": 1.5615795681424328e-06,
      "loss": 0.0112,
      "step": 2581280
    },
    {
      "epoch": 4.224354064793176,
      "grad_norm": 0.12686553597450256,
      "learning_rate": 1.5615136759289156e-06,
      "loss": 0.0086,
      "step": 2581300
    },
    {
      "epoch": 4.22438679523183,
      "grad_norm": 0.19979141652584076,
      "learning_rate": 1.5614477837153985e-06,
      "loss": 0.0086,
      "step": 2581320
    },
    {
      "epoch": 4.224419525670483,
      "grad_norm": 0.38778501749038696,
      "learning_rate": 1.5613818915018813e-06,
      "loss": 0.0131,
      "step": 2581340
    },
    {
      "epoch": 4.224452256109136,
      "grad_norm": 0.16204002499580383,
      "learning_rate": 1.5613159992883642e-06,
      "loss": 0.0069,
      "step": 2581360
    },
    {
      "epoch": 4.22448498654779,
      "grad_norm": 0.12508682906627655,
      "learning_rate": 1.561250107074847e-06,
      "loss": 0.0073,
      "step": 2581380
    },
    {
      "epoch": 4.224517716986443,
      "grad_norm": 0.10517320036888123,
      "learning_rate": 1.56118421486133e-06,
      "loss": 0.0068,
      "step": 2581400
    },
    {
      "epoch": 4.224550447425097,
      "grad_norm": 0.18214772641658783,
      "learning_rate": 1.5611183226478126e-06,
      "loss": 0.0073,
      "step": 2581420
    },
    {
      "epoch": 4.2245831778637495,
      "grad_norm": 0.24433661997318268,
      "learning_rate": 1.5610524304342958e-06,
      "loss": 0.0098,
      "step": 2581440
    },
    {
      "epoch": 4.224615908302403,
      "grad_norm": 0.15360386669635773,
      "learning_rate": 1.5609865382207788e-06,
      "loss": 0.007,
      "step": 2581460
    },
    {
      "epoch": 4.224648638741057,
      "grad_norm": 0.36679673194885254,
      "learning_rate": 1.5609206460072615e-06,
      "loss": 0.0076,
      "step": 2581480
    },
    {
      "epoch": 4.224681369179709,
      "grad_norm": 0.12747420370578766,
      "learning_rate": 1.5608547537937445e-06,
      "loss": 0.0109,
      "step": 2581500
    },
    {
      "epoch": 4.224714099618363,
      "grad_norm": 0.11794266849756241,
      "learning_rate": 1.5607888615802272e-06,
      "loss": 0.0085,
      "step": 2581520
    },
    {
      "epoch": 4.224746830057017,
      "grad_norm": 0.256449431180954,
      "learning_rate": 1.56072296936671e-06,
      "loss": 0.0102,
      "step": 2581540
    },
    {
      "epoch": 4.22477956049567,
      "grad_norm": 0.5434733629226685,
      "learning_rate": 1.5606570771531929e-06,
      "loss": 0.007,
      "step": 2581560
    },
    {
      "epoch": 4.224812290934323,
      "grad_norm": 0.37524548172950745,
      "learning_rate": 1.5605911849396756e-06,
      "loss": 0.0101,
      "step": 2581580
    },
    {
      "epoch": 4.2248450213729765,
      "grad_norm": 0.47991427779197693,
      "learning_rate": 1.5605252927261586e-06,
      "loss": 0.0094,
      "step": 2581600
    },
    {
      "epoch": 4.22487775181163,
      "grad_norm": 0.14645346999168396,
      "learning_rate": 1.5604594005126417e-06,
      "loss": 0.0091,
      "step": 2581620
    },
    {
      "epoch": 4.224910482250283,
      "grad_norm": 0.5202609896659851,
      "learning_rate": 1.5603935082991245e-06,
      "loss": 0.0127,
      "step": 2581640
    },
    {
      "epoch": 4.224943212688936,
      "grad_norm": 0.1634151041507721,
      "learning_rate": 1.5603276160856074e-06,
      "loss": 0.0062,
      "step": 2581660
    },
    {
      "epoch": 4.22497594312759,
      "grad_norm": 0.26621556282043457,
      "learning_rate": 1.5602617238720902e-06,
      "loss": 0.0069,
      "step": 2581680
    },
    {
      "epoch": 4.2250086735662435,
      "grad_norm": 0.16145837306976318,
      "learning_rate": 1.5601958316585731e-06,
      "loss": 0.0087,
      "step": 2581700
    },
    {
      "epoch": 4.225041404004896,
      "grad_norm": 0.7606035470962524,
      "learning_rate": 1.5601299394450559e-06,
      "loss": 0.0096,
      "step": 2581720
    },
    {
      "epoch": 4.22507413444355,
      "grad_norm": 0.16295818984508514,
      "learning_rate": 1.5600640472315386e-06,
      "loss": 0.0116,
      "step": 2581740
    },
    {
      "epoch": 4.225106864882203,
      "grad_norm": 0.1350051313638687,
      "learning_rate": 1.5599981550180215e-06,
      "loss": 0.0103,
      "step": 2581760
    },
    {
      "epoch": 4.225139595320856,
      "grad_norm": 0.5971418619155884,
      "learning_rate": 1.5599322628045043e-06,
      "loss": 0.0088,
      "step": 2581780
    },
    {
      "epoch": 4.22517232575951,
      "grad_norm": 0.31305044889450073,
      "learning_rate": 1.5598663705909875e-06,
      "loss": 0.0079,
      "step": 2581800
    },
    {
      "epoch": 4.225205056198163,
      "grad_norm": 0.2182558923959732,
      "learning_rate": 1.5598004783774704e-06,
      "loss": 0.0053,
      "step": 2581820
    },
    {
      "epoch": 4.225237786636817,
      "grad_norm": 0.19780083000659943,
      "learning_rate": 1.5597345861639531e-06,
      "loss": 0.0076,
      "step": 2581840
    },
    {
      "epoch": 4.22527051707547,
      "grad_norm": 0.10496008396148682,
      "learning_rate": 1.559668693950436e-06,
      "loss": 0.0061,
      "step": 2581860
    },
    {
      "epoch": 4.225303247514123,
      "grad_norm": 0.3007005751132965,
      "learning_rate": 1.5596028017369188e-06,
      "loss": 0.0094,
      "step": 2581880
    },
    {
      "epoch": 4.225335977952777,
      "grad_norm": 0.5782009959220886,
      "learning_rate": 1.5595369095234018e-06,
      "loss": 0.009,
      "step": 2581900
    },
    {
      "epoch": 4.2253687083914295,
      "grad_norm": 0.2086464911699295,
      "learning_rate": 1.5594710173098845e-06,
      "loss": 0.0152,
      "step": 2581920
    },
    {
      "epoch": 4.225401438830083,
      "grad_norm": 0.3623727560043335,
      "learning_rate": 1.5594051250963675e-06,
      "loss": 0.0088,
      "step": 2581940
    },
    {
      "epoch": 4.225434169268737,
      "grad_norm": 0.4656566381454468,
      "learning_rate": 1.5593392328828504e-06,
      "loss": 0.0098,
      "step": 2581960
    },
    {
      "epoch": 4.22546689970739,
      "grad_norm": 0.23547959327697754,
      "learning_rate": 1.5592733406693334e-06,
      "loss": 0.0062,
      "step": 2581980
    },
    {
      "epoch": 4.225499630146043,
      "grad_norm": 0.13411568105220795,
      "learning_rate": 1.5592074484558161e-06,
      "loss": 0.0083,
      "step": 2582000
    },
    {
      "epoch": 4.225532360584697,
      "grad_norm": 0.3486670255661011,
      "learning_rate": 1.559141556242299e-06,
      "loss": 0.0105,
      "step": 2582020
    },
    {
      "epoch": 4.22556509102335,
      "grad_norm": 0.32392874360084534,
      "learning_rate": 1.5590756640287818e-06,
      "loss": 0.0111,
      "step": 2582040
    },
    {
      "epoch": 4.225597821462003,
      "grad_norm": 0.09772053360939026,
      "learning_rate": 1.5590097718152648e-06,
      "loss": 0.0087,
      "step": 2582060
    },
    {
      "epoch": 4.2256305519006565,
      "grad_norm": 0.1648665815591812,
      "learning_rate": 1.5589438796017475e-06,
      "loss": 0.0069,
      "step": 2582080
    },
    {
      "epoch": 4.22566328233931,
      "grad_norm": 0.2973008453845978,
      "learning_rate": 1.5588779873882305e-06,
      "loss": 0.0111,
      "step": 2582100
    },
    {
      "epoch": 4.225696012777963,
      "grad_norm": 0.35447508096694946,
      "learning_rate": 1.5588120951747132e-06,
      "loss": 0.0091,
      "step": 2582120
    },
    {
      "epoch": 4.225728743216616,
      "grad_norm": 0.13858123123645782,
      "learning_rate": 1.5587462029611964e-06,
      "loss": 0.0134,
      "step": 2582140
    },
    {
      "epoch": 4.22576147365527,
      "grad_norm": 0.4081527590751648,
      "learning_rate": 1.558680310747679e-06,
      "loss": 0.0092,
      "step": 2582160
    },
    {
      "epoch": 4.225794204093924,
      "grad_norm": 0.23701776564121246,
      "learning_rate": 1.558614418534162e-06,
      "loss": 0.0094,
      "step": 2582180
    },
    {
      "epoch": 4.225826934532576,
      "grad_norm": 0.06283306330442429,
      "learning_rate": 1.5585485263206448e-06,
      "loss": 0.0143,
      "step": 2582200
    },
    {
      "epoch": 4.22585966497123,
      "grad_norm": 0.23897172510623932,
      "learning_rate": 1.5584826341071277e-06,
      "loss": 0.0099,
      "step": 2582220
    },
    {
      "epoch": 4.2258923954098835,
      "grad_norm": 0.05627330020070076,
      "learning_rate": 1.5584167418936105e-06,
      "loss": 0.0078,
      "step": 2582240
    },
    {
      "epoch": 4.225925125848537,
      "grad_norm": 0.16851891577243805,
      "learning_rate": 1.5583508496800934e-06,
      "loss": 0.0075,
      "step": 2582260
    },
    {
      "epoch": 4.22595785628719,
      "grad_norm": 0.1927950531244278,
      "learning_rate": 1.5582849574665762e-06,
      "loss": 0.007,
      "step": 2582280
    },
    {
      "epoch": 4.225990586725843,
      "grad_norm": 0.1281706690788269,
      "learning_rate": 1.5582190652530591e-06,
      "loss": 0.0057,
      "step": 2582300
    },
    {
      "epoch": 4.226023317164497,
      "grad_norm": 0.29842400550842285,
      "learning_rate": 1.558153173039542e-06,
      "loss": 0.0088,
      "step": 2582320
    },
    {
      "epoch": 4.22605604760315,
      "grad_norm": 0.13329452276229858,
      "learning_rate": 1.558087280826025e-06,
      "loss": 0.0093,
      "step": 2582340
    },
    {
      "epoch": 4.226088778041803,
      "grad_norm": 0.1369411200284958,
      "learning_rate": 1.5580213886125078e-06,
      "loss": 0.007,
      "step": 2582360
    },
    {
      "epoch": 4.226121508480457,
      "grad_norm": 0.24229539930820465,
      "learning_rate": 1.5579554963989907e-06,
      "loss": 0.0144,
      "step": 2582380
    },
    {
      "epoch": 4.22615423891911,
      "grad_norm": 0.17827577888965607,
      "learning_rate": 1.5578896041854735e-06,
      "loss": 0.0124,
      "step": 2582400
    },
    {
      "epoch": 4.226186969357763,
      "grad_norm": 0.29816243052482605,
      "learning_rate": 1.5578237119719564e-06,
      "loss": 0.0132,
      "step": 2582420
    },
    {
      "epoch": 4.226219699796417,
      "grad_norm": 0.39655670523643494,
      "learning_rate": 1.5577578197584391e-06,
      "loss": 0.0103,
      "step": 2582440
    },
    {
      "epoch": 4.22625243023507,
      "grad_norm": 0.2208663821220398,
      "learning_rate": 1.557691927544922e-06,
      "loss": 0.0089,
      "step": 2582460
    },
    {
      "epoch": 4.226285160673723,
      "grad_norm": 0.5607355237007141,
      "learning_rate": 1.5576260353314048e-06,
      "loss": 0.0087,
      "step": 2582480
    },
    {
      "epoch": 4.226317891112377,
      "grad_norm": 0.16427850723266602,
      "learning_rate": 1.557560143117888e-06,
      "loss": 0.0068,
      "step": 2582500
    },
    {
      "epoch": 4.22635062155103,
      "grad_norm": 0.2536080479621887,
      "learning_rate": 1.5574942509043707e-06,
      "loss": 0.0145,
      "step": 2582520
    },
    {
      "epoch": 4.226383351989683,
      "grad_norm": 0.14821337163448334,
      "learning_rate": 1.5574283586908537e-06,
      "loss": 0.0088,
      "step": 2582540
    },
    {
      "epoch": 4.2264160824283366,
      "grad_norm": 0.05941418185830116,
      "learning_rate": 1.5573624664773364e-06,
      "loss": 0.0101,
      "step": 2582560
    },
    {
      "epoch": 4.22644881286699,
      "grad_norm": 0.14688003063201904,
      "learning_rate": 1.5572965742638194e-06,
      "loss": 0.0096,
      "step": 2582580
    },
    {
      "epoch": 4.226481543305644,
      "grad_norm": 0.27308645844459534,
      "learning_rate": 1.5572306820503021e-06,
      "loss": 0.0096,
      "step": 2582600
    },
    {
      "epoch": 4.2265142737442964,
      "grad_norm": 0.31040599942207336,
      "learning_rate": 1.557164789836785e-06,
      "loss": 0.0115,
      "step": 2582620
    },
    {
      "epoch": 4.22654700418295,
      "grad_norm": 0.38360628485679626,
      "learning_rate": 1.5570988976232678e-06,
      "loss": 0.0069,
      "step": 2582640
    },
    {
      "epoch": 4.226579734621604,
      "grad_norm": 0.3307371735572815,
      "learning_rate": 1.5570330054097508e-06,
      "loss": 0.0142,
      "step": 2582660
    },
    {
      "epoch": 4.226612465060256,
      "grad_norm": 0.4092751443386078,
      "learning_rate": 1.556967113196234e-06,
      "loss": 0.0061,
      "step": 2582680
    },
    {
      "epoch": 4.22664519549891,
      "grad_norm": 0.12137049436569214,
      "learning_rate": 1.5569012209827167e-06,
      "loss": 0.009,
      "step": 2582700
    },
    {
      "epoch": 4.2266779259375635,
      "grad_norm": 0.1513378918170929,
      "learning_rate": 1.5568353287691996e-06,
      "loss": 0.0067,
      "step": 2582720
    },
    {
      "epoch": 4.226710656376217,
      "grad_norm": 0.3692396581172943,
      "learning_rate": 1.5567694365556824e-06,
      "loss": 0.0121,
      "step": 2582740
    },
    {
      "epoch": 4.22674338681487,
      "grad_norm": 0.10405732691287994,
      "learning_rate": 1.556703544342165e-06,
      "loss": 0.0069,
      "step": 2582760
    },
    {
      "epoch": 4.226776117253523,
      "grad_norm": 0.18446354568004608,
      "learning_rate": 1.556637652128648e-06,
      "loss": 0.0048,
      "step": 2582780
    },
    {
      "epoch": 4.226808847692177,
      "grad_norm": 0.2351151406764984,
      "learning_rate": 1.5565717599151308e-06,
      "loss": 0.0123,
      "step": 2582800
    },
    {
      "epoch": 4.22684157813083,
      "grad_norm": 0.16562840342521667,
      "learning_rate": 1.5565058677016137e-06,
      "loss": 0.0075,
      "step": 2582820
    },
    {
      "epoch": 4.226874308569483,
      "grad_norm": 0.08752599358558655,
      "learning_rate": 1.556439975488097e-06,
      "loss": 0.0079,
      "step": 2582840
    },
    {
      "epoch": 4.226907039008137,
      "grad_norm": 0.23725925385951996,
      "learning_rate": 1.5563740832745796e-06,
      "loss": 0.0142,
      "step": 2582860
    },
    {
      "epoch": 4.2269397694467905,
      "grad_norm": 1.0272120237350464,
      "learning_rate": 1.5563081910610626e-06,
      "loss": 0.0121,
      "step": 2582880
    },
    {
      "epoch": 4.226972499885443,
      "grad_norm": 0.12585224211215973,
      "learning_rate": 1.5562422988475453e-06,
      "loss": 0.0077,
      "step": 2582900
    },
    {
      "epoch": 4.227005230324097,
      "grad_norm": 0.12911635637283325,
      "learning_rate": 1.5561764066340283e-06,
      "loss": 0.0125,
      "step": 2582920
    },
    {
      "epoch": 4.22703796076275,
      "grad_norm": 0.2352851778268814,
      "learning_rate": 1.556110514420511e-06,
      "loss": 0.0092,
      "step": 2582940
    },
    {
      "epoch": 4.227070691201403,
      "grad_norm": 0.22448880970478058,
      "learning_rate": 1.5560446222069938e-06,
      "loss": 0.0091,
      "step": 2582960
    },
    {
      "epoch": 4.227103421640057,
      "grad_norm": 0.17453810572624207,
      "learning_rate": 1.5559787299934767e-06,
      "loss": 0.0085,
      "step": 2582980
    },
    {
      "epoch": 4.22713615207871,
      "grad_norm": 0.17086626589298248,
      "learning_rate": 1.5559128377799595e-06,
      "loss": 0.0075,
      "step": 2583000
    },
    {
      "epoch": 4.227168882517364,
      "grad_norm": 0.34787604212760925,
      "learning_rate": 1.5558469455664426e-06,
      "loss": 0.009,
      "step": 2583020
    },
    {
      "epoch": 4.227201612956017,
      "grad_norm": 0.18421748280525208,
      "learning_rate": 1.5557810533529256e-06,
      "loss": 0.0105,
      "step": 2583040
    },
    {
      "epoch": 4.22723434339467,
      "grad_norm": 0.2058771848678589,
      "learning_rate": 1.5557151611394083e-06,
      "loss": 0.0136,
      "step": 2583060
    },
    {
      "epoch": 4.227267073833324,
      "grad_norm": 0.2525031566619873,
      "learning_rate": 1.5556492689258913e-06,
      "loss": 0.0085,
      "step": 2583080
    },
    {
      "epoch": 4.2272998042719765,
      "grad_norm": 0.14909720420837402,
      "learning_rate": 1.555583376712374e-06,
      "loss": 0.0146,
      "step": 2583100
    },
    {
      "epoch": 4.22733253471063,
      "grad_norm": 0.18382388353347778,
      "learning_rate": 1.555517484498857e-06,
      "loss": 0.0106,
      "step": 2583120
    },
    {
      "epoch": 4.227365265149284,
      "grad_norm": 0.09581976383924484,
      "learning_rate": 1.5554515922853397e-06,
      "loss": 0.0101,
      "step": 2583140
    },
    {
      "epoch": 4.227397995587937,
      "grad_norm": 0.07756111025810242,
      "learning_rate": 1.5553857000718226e-06,
      "loss": 0.0142,
      "step": 2583160
    },
    {
      "epoch": 4.22743072602659,
      "grad_norm": 0.12309230118989944,
      "learning_rate": 1.5553198078583054e-06,
      "loss": 0.0083,
      "step": 2583180
    },
    {
      "epoch": 4.227463456465244,
      "grad_norm": 0.2892124056816101,
      "learning_rate": 1.5552539156447886e-06,
      "loss": 0.0098,
      "step": 2583200
    },
    {
      "epoch": 4.227496186903897,
      "grad_norm": 0.10540919005870819,
      "learning_rate": 1.5551880234312713e-06,
      "loss": 0.01,
      "step": 2583220
    },
    {
      "epoch": 4.22752891734255,
      "grad_norm": 0.13341458141803741,
      "learning_rate": 1.5551221312177542e-06,
      "loss": 0.0078,
      "step": 2583240
    },
    {
      "epoch": 4.2275616477812035,
      "grad_norm": 0.10901340842247009,
      "learning_rate": 1.555056239004237e-06,
      "loss": 0.0065,
      "step": 2583260
    },
    {
      "epoch": 4.227594378219857,
      "grad_norm": 0.2823975384235382,
      "learning_rate": 1.55499034679072e-06,
      "loss": 0.0076,
      "step": 2583280
    },
    {
      "epoch": 4.227627108658511,
      "grad_norm": 0.45578786730766296,
      "learning_rate": 1.5549244545772027e-06,
      "loss": 0.0132,
      "step": 2583300
    },
    {
      "epoch": 4.227659839097163,
      "grad_norm": 0.1532667726278305,
      "learning_rate": 1.5548585623636856e-06,
      "loss": 0.0101,
      "step": 2583320
    },
    {
      "epoch": 4.227692569535817,
      "grad_norm": 0.08734907954931259,
      "learning_rate": 1.5547926701501684e-06,
      "loss": 0.0062,
      "step": 2583340
    },
    {
      "epoch": 4.2277252999744706,
      "grad_norm": 0.21241481602191925,
      "learning_rate": 1.5547267779366513e-06,
      "loss": 0.0125,
      "step": 2583360
    },
    {
      "epoch": 4.227758030413123,
      "grad_norm": 0.3337683379650116,
      "learning_rate": 1.5546608857231343e-06,
      "loss": 0.0064,
      "step": 2583380
    },
    {
      "epoch": 4.227790760851777,
      "grad_norm": 0.21927322447299957,
      "learning_rate": 1.5545949935096172e-06,
      "loss": 0.0098,
      "step": 2583400
    },
    {
      "epoch": 4.2278234912904304,
      "grad_norm": 0.24050050973892212,
      "learning_rate": 1.5545291012961e-06,
      "loss": 0.0071,
      "step": 2583420
    },
    {
      "epoch": 4.227856221729084,
      "grad_norm": 0.08696074038743973,
      "learning_rate": 1.554463209082583e-06,
      "loss": 0.0106,
      "step": 2583440
    },
    {
      "epoch": 4.227888952167737,
      "grad_norm": 0.07963992655277252,
      "learning_rate": 1.5543973168690657e-06,
      "loss": 0.0109,
      "step": 2583460
    },
    {
      "epoch": 4.22792168260639,
      "grad_norm": 0.26316434144973755,
      "learning_rate": 1.5543314246555486e-06,
      "loss": 0.009,
      "step": 2583480
    },
    {
      "epoch": 4.227954413045044,
      "grad_norm": 0.38318103551864624,
      "learning_rate": 1.5542655324420313e-06,
      "loss": 0.0104,
      "step": 2583500
    },
    {
      "epoch": 4.227987143483697,
      "grad_norm": 0.16746832430362701,
      "learning_rate": 1.5541996402285143e-06,
      "loss": 0.0118,
      "step": 2583520
    },
    {
      "epoch": 4.22801987392235,
      "grad_norm": 0.17310863733291626,
      "learning_rate": 1.554133748014997e-06,
      "loss": 0.0057,
      "step": 2583540
    },
    {
      "epoch": 4.228052604361004,
      "grad_norm": 0.19417418539524078,
      "learning_rate": 1.5540678558014802e-06,
      "loss": 0.0127,
      "step": 2583560
    },
    {
      "epoch": 4.2280853347996565,
      "grad_norm": 0.24067258834838867,
      "learning_rate": 1.554001963587963e-06,
      "loss": 0.0109,
      "step": 2583580
    },
    {
      "epoch": 4.22811806523831,
      "grad_norm": 0.24481062591075897,
      "learning_rate": 1.5539360713744459e-06,
      "loss": 0.0089,
      "step": 2583600
    },
    {
      "epoch": 4.228150795676964,
      "grad_norm": 0.2768707275390625,
      "learning_rate": 1.5538701791609286e-06,
      "loss": 0.0057,
      "step": 2583620
    },
    {
      "epoch": 4.228183526115617,
      "grad_norm": 0.5083656907081604,
      "learning_rate": 1.5538042869474116e-06,
      "loss": 0.0139,
      "step": 2583640
    },
    {
      "epoch": 4.22821625655427,
      "grad_norm": 0.42336300015449524,
      "learning_rate": 1.5537383947338943e-06,
      "loss": 0.0127,
      "step": 2583660
    },
    {
      "epoch": 4.228248986992924,
      "grad_norm": 0.32100605964660645,
      "learning_rate": 1.5536725025203773e-06,
      "loss": 0.0078,
      "step": 2583680
    },
    {
      "epoch": 4.228281717431577,
      "grad_norm": 0.24804741144180298,
      "learning_rate": 1.55360661030686e-06,
      "loss": 0.0116,
      "step": 2583700
    },
    {
      "epoch": 4.228314447870231,
      "grad_norm": 0.22024238109588623,
      "learning_rate": 1.5535407180933432e-06,
      "loss": 0.0096,
      "step": 2583720
    },
    {
      "epoch": 4.2283471783088835,
      "grad_norm": 0.4041595458984375,
      "learning_rate": 1.553474825879826e-06,
      "loss": 0.012,
      "step": 2583740
    },
    {
      "epoch": 4.228379908747537,
      "grad_norm": 0.6098740100860596,
      "learning_rate": 1.5534089336663089e-06,
      "loss": 0.0069,
      "step": 2583760
    },
    {
      "epoch": 4.228412639186191,
      "grad_norm": 0.1593082994222641,
      "learning_rate": 1.5533430414527916e-06,
      "loss": 0.0077,
      "step": 2583780
    },
    {
      "epoch": 4.228445369624843,
      "grad_norm": 0.11955231428146362,
      "learning_rate": 1.5532771492392746e-06,
      "loss": 0.0101,
      "step": 2583800
    },
    {
      "epoch": 4.228478100063497,
      "grad_norm": 0.17700794339179993,
      "learning_rate": 1.5532112570257573e-06,
      "loss": 0.0078,
      "step": 2583820
    },
    {
      "epoch": 4.228510830502151,
      "grad_norm": 0.20838989317417145,
      "learning_rate": 1.5531453648122402e-06,
      "loss": 0.0128,
      "step": 2583840
    },
    {
      "epoch": 4.228543560940803,
      "grad_norm": 0.2960761785507202,
      "learning_rate": 1.553079472598723e-06,
      "loss": 0.0096,
      "step": 2583860
    },
    {
      "epoch": 4.228576291379457,
      "grad_norm": 0.23670287430286407,
      "learning_rate": 1.553013580385206e-06,
      "loss": 0.0073,
      "step": 2583880
    },
    {
      "epoch": 4.2286090218181105,
      "grad_norm": 0.21343079209327698,
      "learning_rate": 1.552947688171689e-06,
      "loss": 0.0097,
      "step": 2583900
    },
    {
      "epoch": 4.228641752256764,
      "grad_norm": 0.12358453124761581,
      "learning_rate": 1.5528817959581718e-06,
      "loss": 0.0098,
      "step": 2583920
    },
    {
      "epoch": 4.228674482695417,
      "grad_norm": 0.1822502464056015,
      "learning_rate": 1.5528159037446546e-06,
      "loss": 0.014,
      "step": 2583940
    },
    {
      "epoch": 4.22870721313407,
      "grad_norm": 0.45837336778640747,
      "learning_rate": 1.5527500115311375e-06,
      "loss": 0.0105,
      "step": 2583960
    },
    {
      "epoch": 4.228739943572724,
      "grad_norm": 0.5187504291534424,
      "learning_rate": 1.5526841193176203e-06,
      "loss": 0.0074,
      "step": 2583980
    },
    {
      "epoch": 4.228772674011377,
      "grad_norm": 0.2672439217567444,
      "learning_rate": 1.5526182271041032e-06,
      "loss": 0.0102,
      "step": 2584000
    },
    {
      "epoch": 4.22880540445003,
      "grad_norm": 0.4902194142341614,
      "learning_rate": 1.552552334890586e-06,
      "loss": 0.0063,
      "step": 2584020
    },
    {
      "epoch": 4.228838134888684,
      "grad_norm": 0.10042832046747208,
      "learning_rate": 1.552486442677069e-06,
      "loss": 0.0143,
      "step": 2584040
    },
    {
      "epoch": 4.2288708653273375,
      "grad_norm": 0.3862466812133789,
      "learning_rate": 1.5524205504635517e-06,
      "loss": 0.0067,
      "step": 2584060
    },
    {
      "epoch": 4.22890359576599,
      "grad_norm": 0.17061269283294678,
      "learning_rate": 1.5523546582500348e-06,
      "loss": 0.0059,
      "step": 2584080
    },
    {
      "epoch": 4.228936326204644,
      "grad_norm": 0.18913914263248444,
      "learning_rate": 1.5522887660365178e-06,
      "loss": 0.0148,
      "step": 2584100
    },
    {
      "epoch": 4.228969056643297,
      "grad_norm": 0.27739617228507996,
      "learning_rate": 1.5522228738230005e-06,
      "loss": 0.0089,
      "step": 2584120
    },
    {
      "epoch": 4.22900178708195,
      "grad_norm": 0.12623359262943268,
      "learning_rate": 1.5521569816094835e-06,
      "loss": 0.0146,
      "step": 2584140
    },
    {
      "epoch": 4.229034517520604,
      "grad_norm": 0.6183459162712097,
      "learning_rate": 1.5520910893959662e-06,
      "loss": 0.0134,
      "step": 2584160
    },
    {
      "epoch": 4.229067247959257,
      "grad_norm": 0.4846222698688507,
      "learning_rate": 1.552025197182449e-06,
      "loss": 0.008,
      "step": 2584180
    },
    {
      "epoch": 4.229099978397911,
      "grad_norm": 0.46412864327430725,
      "learning_rate": 1.5519593049689319e-06,
      "loss": 0.0121,
      "step": 2584200
    },
    {
      "epoch": 4.229132708836564,
      "grad_norm": 0.5011910796165466,
      "learning_rate": 1.5518934127554146e-06,
      "loss": 0.0114,
      "step": 2584220
    },
    {
      "epoch": 4.229165439275217,
      "grad_norm": 0.21401630342006683,
      "learning_rate": 1.5518275205418976e-06,
      "loss": 0.0101,
      "step": 2584240
    },
    {
      "epoch": 4.229198169713871,
      "grad_norm": 0.15766330063343048,
      "learning_rate": 1.5517616283283807e-06,
      "loss": 0.0113,
      "step": 2584260
    },
    {
      "epoch": 4.2292309001525235,
      "grad_norm": 0.0970623642206192,
      "learning_rate": 1.5516957361148635e-06,
      "loss": 0.0074,
      "step": 2584280
    },
    {
      "epoch": 4.229263630591177,
      "grad_norm": 0.2254248857498169,
      "learning_rate": 1.5516298439013464e-06,
      "loss": 0.0103,
      "step": 2584300
    },
    {
      "epoch": 4.229296361029831,
      "grad_norm": 0.4907442331314087,
      "learning_rate": 1.5515639516878292e-06,
      "loss": 0.0083,
      "step": 2584320
    },
    {
      "epoch": 4.229329091468484,
      "grad_norm": 0.16912032663822174,
      "learning_rate": 1.5514980594743121e-06,
      "loss": 0.0122,
      "step": 2584340
    },
    {
      "epoch": 4.229361821907137,
      "grad_norm": 0.18426114320755005,
      "learning_rate": 1.5514321672607949e-06,
      "loss": 0.0084,
      "step": 2584360
    },
    {
      "epoch": 4.2293945523457905,
      "grad_norm": 0.2793909013271332,
      "learning_rate": 1.5513662750472776e-06,
      "loss": 0.0115,
      "step": 2584380
    },
    {
      "epoch": 4.229427282784444,
      "grad_norm": 0.18139822781085968,
      "learning_rate": 1.5513003828337606e-06,
      "loss": 0.0087,
      "step": 2584400
    },
    {
      "epoch": 4.229460013223097,
      "grad_norm": 0.0696060061454773,
      "learning_rate": 1.5512344906202437e-06,
      "loss": 0.0059,
      "step": 2584420
    },
    {
      "epoch": 4.22949274366175,
      "grad_norm": 0.159481018781662,
      "learning_rate": 1.5511685984067265e-06,
      "loss": 0.0101,
      "step": 2584440
    },
    {
      "epoch": 4.229525474100404,
      "grad_norm": 0.14322462677955627,
      "learning_rate": 1.5511027061932094e-06,
      "loss": 0.0185,
      "step": 2584460
    },
    {
      "epoch": 4.229558204539058,
      "grad_norm": 0.22229059040546417,
      "learning_rate": 1.5510368139796922e-06,
      "loss": 0.0077,
      "step": 2584480
    },
    {
      "epoch": 4.22959093497771,
      "grad_norm": 0.20069563388824463,
      "learning_rate": 1.550970921766175e-06,
      "loss": 0.0111,
      "step": 2584500
    },
    {
      "epoch": 4.229623665416364,
      "grad_norm": 0.05152792111039162,
      "learning_rate": 1.5509050295526578e-06,
      "loss": 0.0042,
      "step": 2584520
    },
    {
      "epoch": 4.2296563958550175,
      "grad_norm": 0.20255737006664276,
      "learning_rate": 1.5508391373391408e-06,
      "loss": 0.0092,
      "step": 2584540
    },
    {
      "epoch": 4.22968912629367,
      "grad_norm": 0.11965250968933105,
      "learning_rate": 1.5507732451256235e-06,
      "loss": 0.0092,
      "step": 2584560
    },
    {
      "epoch": 4.229721856732324,
      "grad_norm": 0.14693768322467804,
      "learning_rate": 1.5507073529121065e-06,
      "loss": 0.007,
      "step": 2584580
    },
    {
      "epoch": 4.229754587170977,
      "grad_norm": 0.6051482558250427,
      "learning_rate": 1.5506414606985894e-06,
      "loss": 0.0098,
      "step": 2584600
    },
    {
      "epoch": 4.229787317609631,
      "grad_norm": 0.11079081147909164,
      "learning_rate": 1.5505755684850724e-06,
      "loss": 0.0125,
      "step": 2584620
    },
    {
      "epoch": 4.229820048048284,
      "grad_norm": 0.03556221351027489,
      "learning_rate": 1.5505096762715551e-06,
      "loss": 0.0151,
      "step": 2584640
    },
    {
      "epoch": 4.229852778486937,
      "grad_norm": 0.21958065032958984,
      "learning_rate": 1.550443784058038e-06,
      "loss": 0.0115,
      "step": 2584660
    },
    {
      "epoch": 4.229885508925591,
      "grad_norm": 0.2743409276008606,
      "learning_rate": 1.5503778918445208e-06,
      "loss": 0.008,
      "step": 2584680
    },
    {
      "epoch": 4.229918239364244,
      "grad_norm": 0.33811864256858826,
      "learning_rate": 1.5503119996310038e-06,
      "loss": 0.0083,
      "step": 2584700
    },
    {
      "epoch": 4.229950969802897,
      "grad_norm": 0.21445997059345245,
      "learning_rate": 1.5502461074174865e-06,
      "loss": 0.0101,
      "step": 2584720
    },
    {
      "epoch": 4.229983700241551,
      "grad_norm": 0.11112194508314133,
      "learning_rate": 1.5501802152039695e-06,
      "loss": 0.007,
      "step": 2584740
    },
    {
      "epoch": 4.230016430680204,
      "grad_norm": 0.0507885105907917,
      "learning_rate": 1.5501143229904522e-06,
      "loss": 0.0045,
      "step": 2584760
    },
    {
      "epoch": 4.230049161118857,
      "grad_norm": 0.17348268628120422,
      "learning_rate": 1.5500484307769354e-06,
      "loss": 0.0083,
      "step": 2584780
    },
    {
      "epoch": 4.230081891557511,
      "grad_norm": 0.17781144380569458,
      "learning_rate": 1.5499825385634181e-06,
      "loss": 0.0099,
      "step": 2584800
    },
    {
      "epoch": 4.230114621996164,
      "grad_norm": 0.0692354366183281,
      "learning_rate": 1.549916646349901e-06,
      "loss": 0.0083,
      "step": 2584820
    },
    {
      "epoch": 4.230147352434817,
      "grad_norm": 0.10308502614498138,
      "learning_rate": 1.5498507541363838e-06,
      "loss": 0.0107,
      "step": 2584840
    },
    {
      "epoch": 4.230180082873471,
      "grad_norm": 0.2030790001153946,
      "learning_rate": 1.5497848619228668e-06,
      "loss": 0.012,
      "step": 2584860
    },
    {
      "epoch": 4.230212813312124,
      "grad_norm": 0.5341060757637024,
      "learning_rate": 1.5497189697093495e-06,
      "loss": 0.0117,
      "step": 2584880
    },
    {
      "epoch": 4.230245543750778,
      "grad_norm": 0.39154455065727234,
      "learning_rate": 1.5496530774958324e-06,
      "loss": 0.009,
      "step": 2584900
    },
    {
      "epoch": 4.2302782741894305,
      "grad_norm": 0.2904173731803894,
      "learning_rate": 1.5495871852823152e-06,
      "loss": 0.012,
      "step": 2584920
    },
    {
      "epoch": 4.230311004628084,
      "grad_norm": 0.15537990629673004,
      "learning_rate": 1.5495212930687981e-06,
      "loss": 0.0171,
      "step": 2584940
    },
    {
      "epoch": 4.230343735066738,
      "grad_norm": 0.115739606320858,
      "learning_rate": 1.549455400855281e-06,
      "loss": 0.0066,
      "step": 2584960
    },
    {
      "epoch": 4.23037646550539,
      "grad_norm": 0.4307159185409546,
      "learning_rate": 1.549389508641764e-06,
      "loss": 0.0113,
      "step": 2584980
    },
    {
      "epoch": 4.230409195944044,
      "grad_norm": 0.05708927661180496,
      "learning_rate": 1.5493236164282468e-06,
      "loss": 0.0158,
      "step": 2585000
    },
    {
      "epoch": 4.230441926382698,
      "grad_norm": 0.12262428551912308,
      "learning_rate": 1.5492577242147297e-06,
      "loss": 0.0089,
      "step": 2585020
    },
    {
      "epoch": 4.23047465682135,
      "grad_norm": 0.1254272311925888,
      "learning_rate": 1.5491918320012125e-06,
      "loss": 0.0111,
      "step": 2585040
    },
    {
      "epoch": 4.230507387260004,
      "grad_norm": 0.5580016374588013,
      "learning_rate": 1.5491259397876954e-06,
      "loss": 0.0084,
      "step": 2585060
    },
    {
      "epoch": 4.2305401176986575,
      "grad_norm": 0.37191638350486755,
      "learning_rate": 1.5490600475741782e-06,
      "loss": 0.0059,
      "step": 2585080
    },
    {
      "epoch": 4.230572848137311,
      "grad_norm": 0.2214985340833664,
      "learning_rate": 1.5489941553606611e-06,
      "loss": 0.0108,
      "step": 2585100
    },
    {
      "epoch": 4.230605578575964,
      "grad_norm": 0.2586888372898102,
      "learning_rate": 1.5489282631471438e-06,
      "loss": 0.0072,
      "step": 2585120
    },
    {
      "epoch": 4.230638309014617,
      "grad_norm": 0.26920926570892334,
      "learning_rate": 1.548862370933627e-06,
      "loss": 0.0086,
      "step": 2585140
    },
    {
      "epoch": 4.230671039453271,
      "grad_norm": 0.4523114562034607,
      "learning_rate": 1.5487964787201098e-06,
      "loss": 0.0147,
      "step": 2585160
    },
    {
      "epoch": 4.2307037698919245,
      "grad_norm": 0.08179689943790436,
      "learning_rate": 1.5487305865065927e-06,
      "loss": 0.0105,
      "step": 2585180
    },
    {
      "epoch": 4.230736500330577,
      "grad_norm": 0.1577453315258026,
      "learning_rate": 1.5486646942930754e-06,
      "loss": 0.0076,
      "step": 2585200
    },
    {
      "epoch": 4.230769230769231,
      "grad_norm": 0.3139204978942871,
      "learning_rate": 1.5485988020795584e-06,
      "loss": 0.0076,
      "step": 2585220
    },
    {
      "epoch": 4.230801961207884,
      "grad_norm": 0.26901406049728394,
      "learning_rate": 1.5485329098660411e-06,
      "loss": 0.007,
      "step": 2585240
    },
    {
      "epoch": 4.230834691646537,
      "grad_norm": 0.16297547519207,
      "learning_rate": 1.548467017652524e-06,
      "loss": 0.0056,
      "step": 2585260
    },
    {
      "epoch": 4.230867422085191,
      "grad_norm": 0.2080228179693222,
      "learning_rate": 1.5484011254390068e-06,
      "loss": 0.0058,
      "step": 2585280
    },
    {
      "epoch": 4.230900152523844,
      "grad_norm": 0.2380935251712799,
      "learning_rate": 1.54833523322549e-06,
      "loss": 0.0093,
      "step": 2585300
    },
    {
      "epoch": 4.230932882962497,
      "grad_norm": 0.07200954109430313,
      "learning_rate": 1.548269341011973e-06,
      "loss": 0.0106,
      "step": 2585320
    },
    {
      "epoch": 4.230965613401151,
      "grad_norm": 0.19405867159366608,
      "learning_rate": 1.5482034487984557e-06,
      "loss": 0.0072,
      "step": 2585340
    },
    {
      "epoch": 4.230998343839804,
      "grad_norm": 0.38889196515083313,
      "learning_rate": 1.5481375565849386e-06,
      "loss": 0.0076,
      "step": 2585360
    },
    {
      "epoch": 4.231031074278458,
      "grad_norm": 0.16175463795661926,
      "learning_rate": 1.5480716643714214e-06,
      "loss": 0.0162,
      "step": 2585380
    },
    {
      "epoch": 4.2310638047171105,
      "grad_norm": 0.1946858912706375,
      "learning_rate": 1.5480057721579041e-06,
      "loss": 0.0106,
      "step": 2585400
    },
    {
      "epoch": 4.231096535155764,
      "grad_norm": 0.6913724541664124,
      "learning_rate": 1.547939879944387e-06,
      "loss": 0.0121,
      "step": 2585420
    },
    {
      "epoch": 4.231129265594418,
      "grad_norm": 0.145081028342247,
      "learning_rate": 1.5478739877308698e-06,
      "loss": 0.0079,
      "step": 2585440
    },
    {
      "epoch": 4.23116199603307,
      "grad_norm": 0.05659569799900055,
      "learning_rate": 1.5478080955173528e-06,
      "loss": 0.0078,
      "step": 2585460
    },
    {
      "epoch": 4.231194726471724,
      "grad_norm": 0.15443497896194458,
      "learning_rate": 1.547742203303836e-06,
      "loss": 0.0099,
      "step": 2585480
    },
    {
      "epoch": 4.231227456910378,
      "grad_norm": 0.5990456342697144,
      "learning_rate": 1.5476763110903187e-06,
      "loss": 0.0069,
      "step": 2585500
    },
    {
      "epoch": 4.231260187349031,
      "grad_norm": 0.1750825047492981,
      "learning_rate": 1.5476104188768016e-06,
      "loss": 0.0079,
      "step": 2585520
    },
    {
      "epoch": 4.231292917787684,
      "grad_norm": 0.051492754369974136,
      "learning_rate": 1.5475445266632843e-06,
      "loss": 0.0102,
      "step": 2585540
    },
    {
      "epoch": 4.2313256482263375,
      "grad_norm": 0.14604070782661438,
      "learning_rate": 1.5474786344497673e-06,
      "loss": 0.0111,
      "step": 2585560
    },
    {
      "epoch": 4.231358378664991,
      "grad_norm": 0.3304482698440552,
      "learning_rate": 1.54741274223625e-06,
      "loss": 0.0089,
      "step": 2585580
    },
    {
      "epoch": 4.231391109103644,
      "grad_norm": 0.06415937095880508,
      "learning_rate": 1.5473468500227328e-06,
      "loss": 0.0133,
      "step": 2585600
    },
    {
      "epoch": 4.231423839542297,
      "grad_norm": 0.1428670436143875,
      "learning_rate": 1.5472809578092157e-06,
      "loss": 0.0082,
      "step": 2585620
    },
    {
      "epoch": 4.231456569980951,
      "grad_norm": 0.1438770294189453,
      "learning_rate": 1.5472150655956985e-06,
      "loss": 0.0114,
      "step": 2585640
    },
    {
      "epoch": 4.231489300419605,
      "grad_norm": 0.062398992478847504,
      "learning_rate": 1.5471491733821816e-06,
      "loss": 0.0087,
      "step": 2585660
    },
    {
      "epoch": 4.231522030858257,
      "grad_norm": 0.3372862637042999,
      "learning_rate": 1.5470832811686646e-06,
      "loss": 0.0085,
      "step": 2585680
    },
    {
      "epoch": 4.231554761296911,
      "grad_norm": 0.5680255889892578,
      "learning_rate": 1.5470173889551473e-06,
      "loss": 0.0089,
      "step": 2585700
    },
    {
      "epoch": 4.2315874917355645,
      "grad_norm": 0.23076826333999634,
      "learning_rate": 1.5469514967416303e-06,
      "loss": 0.0062,
      "step": 2585720
    },
    {
      "epoch": 4.231620222174217,
      "grad_norm": 0.055914800614118576,
      "learning_rate": 1.546885604528113e-06,
      "loss": 0.0087,
      "step": 2585740
    },
    {
      "epoch": 4.231652952612871,
      "grad_norm": 0.40478095412254333,
      "learning_rate": 1.546819712314596e-06,
      "loss": 0.0103,
      "step": 2585760
    },
    {
      "epoch": 4.231685683051524,
      "grad_norm": 0.20668482780456543,
      "learning_rate": 1.5467538201010787e-06,
      "loss": 0.0094,
      "step": 2585780
    },
    {
      "epoch": 4.231718413490178,
      "grad_norm": 0.2134350836277008,
      "learning_rate": 1.5466879278875617e-06,
      "loss": 0.0131,
      "step": 2585800
    },
    {
      "epoch": 4.231751143928831,
      "grad_norm": 0.3106350600719452,
      "learning_rate": 1.5466220356740444e-06,
      "loss": 0.0078,
      "step": 2585820
    },
    {
      "epoch": 4.231783874367484,
      "grad_norm": 0.6810034513473511,
      "learning_rate": 1.5465561434605276e-06,
      "loss": 0.0079,
      "step": 2585840
    },
    {
      "epoch": 4.231816604806138,
      "grad_norm": 0.20034480094909668,
      "learning_rate": 1.5464902512470103e-06,
      "loss": 0.0127,
      "step": 2585860
    },
    {
      "epoch": 4.231849335244791,
      "grad_norm": 0.3008626103401184,
      "learning_rate": 1.5464243590334933e-06,
      "loss": 0.01,
      "step": 2585880
    },
    {
      "epoch": 4.231882065683444,
      "grad_norm": 0.2938206195831299,
      "learning_rate": 1.546358466819976e-06,
      "loss": 0.0086,
      "step": 2585900
    },
    {
      "epoch": 4.231914796122098,
      "grad_norm": 0.2677653133869171,
      "learning_rate": 1.546292574606459e-06,
      "loss": 0.007,
      "step": 2585920
    },
    {
      "epoch": 4.231947526560751,
      "grad_norm": 0.3498486578464508,
      "learning_rate": 1.5462266823929417e-06,
      "loss": 0.0083,
      "step": 2585940
    },
    {
      "epoch": 4.231980256999404,
      "grad_norm": 0.061303671449422836,
      "learning_rate": 1.5461607901794246e-06,
      "loss": 0.0106,
      "step": 2585960
    },
    {
      "epoch": 4.232012987438058,
      "grad_norm": 0.4193769097328186,
      "learning_rate": 1.5460948979659074e-06,
      "loss": 0.0106,
      "step": 2585980
    },
    {
      "epoch": 4.232045717876711,
      "grad_norm": 0.3215706944465637,
      "learning_rate": 1.5460290057523903e-06,
      "loss": 0.0096,
      "step": 2586000
    },
    {
      "epoch": 4.232078448315364,
      "grad_norm": 0.4472244083881378,
      "learning_rate": 1.5459631135388733e-06,
      "loss": 0.0098,
      "step": 2586020
    },
    {
      "epoch": 4.2321111787540175,
      "grad_norm": 0.04137710481882095,
      "learning_rate": 1.5458972213253562e-06,
      "loss": 0.0078,
      "step": 2586040
    },
    {
      "epoch": 4.232143909192671,
      "grad_norm": 0.1326894313097,
      "learning_rate": 1.545831329111839e-06,
      "loss": 0.0074,
      "step": 2586060
    },
    {
      "epoch": 4.232176639631325,
      "grad_norm": 0.1605963110923767,
      "learning_rate": 1.545765436898322e-06,
      "loss": 0.0081,
      "step": 2586080
    },
    {
      "epoch": 4.232209370069977,
      "grad_norm": 0.4152747392654419,
      "learning_rate": 1.5456995446848047e-06,
      "loss": 0.0073,
      "step": 2586100
    },
    {
      "epoch": 4.232242100508631,
      "grad_norm": 0.456297367811203,
      "learning_rate": 1.5456336524712876e-06,
      "loss": 0.0084,
      "step": 2586120
    },
    {
      "epoch": 4.232274830947285,
      "grad_norm": 0.25149548053741455,
      "learning_rate": 1.5455677602577704e-06,
      "loss": 0.0107,
      "step": 2586140
    },
    {
      "epoch": 4.232307561385937,
      "grad_norm": 0.062476444989442825,
      "learning_rate": 1.5455018680442533e-06,
      "loss": 0.0128,
      "step": 2586160
    },
    {
      "epoch": 4.232340291824591,
      "grad_norm": 0.1649169623851776,
      "learning_rate": 1.5454359758307363e-06,
      "loss": 0.0104,
      "step": 2586180
    },
    {
      "epoch": 4.2323730222632445,
      "grad_norm": 0.15776340663433075,
      "learning_rate": 1.5453700836172192e-06,
      "loss": 0.0082,
      "step": 2586200
    },
    {
      "epoch": 4.232405752701898,
      "grad_norm": 0.10760532319545746,
      "learning_rate": 1.545304191403702e-06,
      "loss": 0.0102,
      "step": 2586220
    },
    {
      "epoch": 4.232438483140551,
      "grad_norm": 0.36323627829551697,
      "learning_rate": 1.545238299190185e-06,
      "loss": 0.0085,
      "step": 2586240
    },
    {
      "epoch": 4.232471213579204,
      "grad_norm": 0.14553053677082062,
      "learning_rate": 1.5451724069766676e-06,
      "loss": 0.0063,
      "step": 2586260
    },
    {
      "epoch": 4.232503944017858,
      "grad_norm": 0.10603775084018707,
      "learning_rate": 1.5451065147631506e-06,
      "loss": 0.0091,
      "step": 2586280
    },
    {
      "epoch": 4.232536674456511,
      "grad_norm": 0.6199564337730408,
      "learning_rate": 1.5450406225496333e-06,
      "loss": 0.0122,
      "step": 2586300
    },
    {
      "epoch": 4.232569404895164,
      "grad_norm": 0.3068094551563263,
      "learning_rate": 1.5449747303361163e-06,
      "loss": 0.0072,
      "step": 2586320
    },
    {
      "epoch": 4.232602135333818,
      "grad_norm": 0.3826674222946167,
      "learning_rate": 1.544908838122599e-06,
      "loss": 0.012,
      "step": 2586340
    },
    {
      "epoch": 4.2326348657724715,
      "grad_norm": 0.16830109059810638,
      "learning_rate": 1.5448429459090822e-06,
      "loss": 0.0064,
      "step": 2586360
    },
    {
      "epoch": 4.232667596211124,
      "grad_norm": 0.09533213824033737,
      "learning_rate": 1.544777053695565e-06,
      "loss": 0.0107,
      "step": 2586380
    },
    {
      "epoch": 4.232700326649778,
      "grad_norm": 0.1283249408006668,
      "learning_rate": 1.5447111614820479e-06,
      "loss": 0.0053,
      "step": 2586400
    },
    {
      "epoch": 4.232733057088431,
      "grad_norm": 0.21700838208198547,
      "learning_rate": 1.5446452692685306e-06,
      "loss": 0.009,
      "step": 2586420
    },
    {
      "epoch": 4.232765787527084,
      "grad_norm": 0.19278118014335632,
      "learning_rate": 1.5445793770550136e-06,
      "loss": 0.0054,
      "step": 2586440
    },
    {
      "epoch": 4.232798517965738,
      "grad_norm": 0.37629464268684387,
      "learning_rate": 1.5445134848414963e-06,
      "loss": 0.0112,
      "step": 2586460
    },
    {
      "epoch": 4.232831248404391,
      "grad_norm": 0.730903148651123,
      "learning_rate": 1.5444475926279793e-06,
      "loss": 0.0156,
      "step": 2586480
    },
    {
      "epoch": 4.232863978843045,
      "grad_norm": 0.20488373935222626,
      "learning_rate": 1.544381700414462e-06,
      "loss": 0.0104,
      "step": 2586500
    },
    {
      "epoch": 4.232896709281698,
      "grad_norm": 0.19047485291957855,
      "learning_rate": 1.544315808200945e-06,
      "loss": 0.0093,
      "step": 2586520
    },
    {
      "epoch": 4.232929439720351,
      "grad_norm": 0.33558961749076843,
      "learning_rate": 1.5442499159874281e-06,
      "loss": 0.0091,
      "step": 2586540
    },
    {
      "epoch": 4.232962170159005,
      "grad_norm": 0.14660830795764923,
      "learning_rate": 1.5441840237739109e-06,
      "loss": 0.0112,
      "step": 2586560
    },
    {
      "epoch": 4.2329949005976575,
      "grad_norm": 0.32199037075042725,
      "learning_rate": 1.5441181315603936e-06,
      "loss": 0.0078,
      "step": 2586580
    },
    {
      "epoch": 4.233027631036311,
      "grad_norm": 0.04441847652196884,
      "learning_rate": 1.5440522393468765e-06,
      "loss": 0.0126,
      "step": 2586600
    },
    {
      "epoch": 4.233060361474965,
      "grad_norm": 0.2992459535598755,
      "learning_rate": 1.5439863471333593e-06,
      "loss": 0.0073,
      "step": 2586620
    },
    {
      "epoch": 4.233093091913618,
      "grad_norm": 0.13996665179729462,
      "learning_rate": 1.5439204549198422e-06,
      "loss": 0.0109,
      "step": 2586640
    },
    {
      "epoch": 4.233125822352271,
      "grad_norm": 0.175765722990036,
      "learning_rate": 1.543854562706325e-06,
      "loss": 0.0066,
      "step": 2586660
    },
    {
      "epoch": 4.233158552790925,
      "grad_norm": 0.0814453586935997,
      "learning_rate": 1.543788670492808e-06,
      "loss": 0.0121,
      "step": 2586680
    },
    {
      "epoch": 4.233191283229578,
      "grad_norm": 0.3504563271999359,
      "learning_rate": 1.5437227782792907e-06,
      "loss": 0.0088,
      "step": 2586700
    },
    {
      "epoch": 4.233224013668231,
      "grad_norm": 0.37290725111961365,
      "learning_rate": 1.5436568860657738e-06,
      "loss": 0.0052,
      "step": 2586720
    },
    {
      "epoch": 4.2332567441068845,
      "grad_norm": 0.1647658497095108,
      "learning_rate": 1.5435909938522568e-06,
      "loss": 0.0075,
      "step": 2586740
    },
    {
      "epoch": 4.233289474545538,
      "grad_norm": 0.1607544869184494,
      "learning_rate": 1.5435251016387395e-06,
      "loss": 0.0074,
      "step": 2586760
    },
    {
      "epoch": 4.233322204984191,
      "grad_norm": 0.17594212293624878,
      "learning_rate": 1.5434592094252225e-06,
      "loss": 0.0068,
      "step": 2586780
    },
    {
      "epoch": 4.233354935422844,
      "grad_norm": 0.28638577461242676,
      "learning_rate": 1.5433933172117052e-06,
      "loss": 0.0079,
      "step": 2586800
    },
    {
      "epoch": 4.233387665861498,
      "grad_norm": 0.11950238794088364,
      "learning_rate": 1.543327424998188e-06,
      "loss": 0.0108,
      "step": 2586820
    },
    {
      "epoch": 4.2334203963001515,
      "grad_norm": 0.19231180846691132,
      "learning_rate": 1.543261532784671e-06,
      "loss": 0.0105,
      "step": 2586840
    },
    {
      "epoch": 4.233453126738804,
      "grad_norm": 0.4084605574607849,
      "learning_rate": 1.5431956405711536e-06,
      "loss": 0.0063,
      "step": 2586860
    },
    {
      "epoch": 4.233485857177458,
      "grad_norm": 0.5649709701538086,
      "learning_rate": 1.5431297483576366e-06,
      "loss": 0.0125,
      "step": 2586880
    },
    {
      "epoch": 4.233518587616111,
      "grad_norm": 0.2484842985868454,
      "learning_rate": 1.5430638561441198e-06,
      "loss": 0.0092,
      "step": 2586900
    },
    {
      "epoch": 4.233551318054764,
      "grad_norm": 0.13669444620609283,
      "learning_rate": 1.5429979639306025e-06,
      "loss": 0.0084,
      "step": 2586920
    },
    {
      "epoch": 4.233584048493418,
      "grad_norm": 0.29710638523101807,
      "learning_rate": 1.5429320717170854e-06,
      "loss": 0.0061,
      "step": 2586940
    },
    {
      "epoch": 4.233616778932071,
      "grad_norm": 0.030711937695741653,
      "learning_rate": 1.5428661795035682e-06,
      "loss": 0.0115,
      "step": 2586960
    },
    {
      "epoch": 4.233649509370725,
      "grad_norm": 0.09745762497186661,
      "learning_rate": 1.5428002872900511e-06,
      "loss": 0.0103,
      "step": 2586980
    },
    {
      "epoch": 4.233682239809378,
      "grad_norm": 0.08474363386631012,
      "learning_rate": 1.5427343950765339e-06,
      "loss": 0.0062,
      "step": 2587000
    },
    {
      "epoch": 4.233714970248031,
      "grad_norm": 0.16204310953617096,
      "learning_rate": 1.5426685028630168e-06,
      "loss": 0.0104,
      "step": 2587020
    },
    {
      "epoch": 4.233747700686685,
      "grad_norm": 0.25269854068756104,
      "learning_rate": 1.5426026106494996e-06,
      "loss": 0.0125,
      "step": 2587040
    },
    {
      "epoch": 4.2337804311253375,
      "grad_norm": 0.20709049701690674,
      "learning_rate": 1.5425367184359827e-06,
      "loss": 0.0087,
      "step": 2587060
    },
    {
      "epoch": 4.233813161563991,
      "grad_norm": 0.33930516242980957,
      "learning_rate": 1.5424708262224655e-06,
      "loss": 0.0093,
      "step": 2587080
    },
    {
      "epoch": 4.233845892002645,
      "grad_norm": 0.07096558064222336,
      "learning_rate": 1.5424049340089484e-06,
      "loss": 0.006,
      "step": 2587100
    },
    {
      "epoch": 4.233878622441298,
      "grad_norm": 0.08817104250192642,
      "learning_rate": 1.5423390417954312e-06,
      "loss": 0.0097,
      "step": 2587120
    },
    {
      "epoch": 4.233911352879951,
      "grad_norm": 0.12339898198843002,
      "learning_rate": 1.5422731495819141e-06,
      "loss": 0.0078,
      "step": 2587140
    },
    {
      "epoch": 4.233944083318605,
      "grad_norm": 0.16738685965538025,
      "learning_rate": 1.5422072573683969e-06,
      "loss": 0.0094,
      "step": 2587160
    },
    {
      "epoch": 4.233976813757258,
      "grad_norm": 0.17728236317634583,
      "learning_rate": 1.5421413651548798e-06,
      "loss": 0.0087,
      "step": 2587180
    },
    {
      "epoch": 4.234009544195911,
      "grad_norm": 0.17543525993824005,
      "learning_rate": 1.5420754729413625e-06,
      "loss": 0.0075,
      "step": 2587200
    },
    {
      "epoch": 4.2340422746345645,
      "grad_norm": 0.17974194884300232,
      "learning_rate": 1.5420095807278455e-06,
      "loss": 0.0098,
      "step": 2587220
    },
    {
      "epoch": 4.234075005073218,
      "grad_norm": 0.07683699578046799,
      "learning_rate": 1.5419436885143285e-06,
      "loss": 0.011,
      "step": 2587240
    },
    {
      "epoch": 4.234107735511872,
      "grad_norm": 0.18286366760730743,
      "learning_rate": 1.5418777963008114e-06,
      "loss": 0.0096,
      "step": 2587260
    },
    {
      "epoch": 4.234140465950524,
      "grad_norm": 0.12772177159786224,
      "learning_rate": 1.5418119040872941e-06,
      "loss": 0.0061,
      "step": 2587280
    },
    {
      "epoch": 4.234173196389178,
      "grad_norm": 0.5320173501968384,
      "learning_rate": 1.541746011873777e-06,
      "loss": 0.0086,
      "step": 2587300
    },
    {
      "epoch": 4.234205926827832,
      "grad_norm": 0.29115715622901917,
      "learning_rate": 1.5416801196602598e-06,
      "loss": 0.0073,
      "step": 2587320
    },
    {
      "epoch": 4.234238657266484,
      "grad_norm": 0.15113075077533722,
      "learning_rate": 1.5416142274467428e-06,
      "loss": 0.0094,
      "step": 2587340
    },
    {
      "epoch": 4.234271387705138,
      "grad_norm": 0.2627732753753662,
      "learning_rate": 1.5415483352332255e-06,
      "loss": 0.0111,
      "step": 2587360
    },
    {
      "epoch": 4.2343041181437915,
      "grad_norm": 0.24077482521533966,
      "learning_rate": 1.5414824430197085e-06,
      "loss": 0.0089,
      "step": 2587380
    },
    {
      "epoch": 4.234336848582445,
      "grad_norm": 0.215309739112854,
      "learning_rate": 1.5414165508061912e-06,
      "loss": 0.0085,
      "step": 2587400
    },
    {
      "epoch": 4.234369579021098,
      "grad_norm": 0.31467875838279724,
      "learning_rate": 1.5413506585926744e-06,
      "loss": 0.0133,
      "step": 2587420
    },
    {
      "epoch": 4.234402309459751,
      "grad_norm": 0.6395063400268555,
      "learning_rate": 1.5412847663791571e-06,
      "loss": 0.0065,
      "step": 2587440
    },
    {
      "epoch": 4.234435039898405,
      "grad_norm": 0.2282591015100479,
      "learning_rate": 1.54121887416564e-06,
      "loss": 0.008,
      "step": 2587460
    },
    {
      "epoch": 4.234467770337058,
      "grad_norm": 0.0678895115852356,
      "learning_rate": 1.5411529819521228e-06,
      "loss": 0.0114,
      "step": 2587480
    },
    {
      "epoch": 4.234500500775711,
      "grad_norm": 0.16406363248825073,
      "learning_rate": 1.5410870897386058e-06,
      "loss": 0.0088,
      "step": 2587500
    },
    {
      "epoch": 4.234533231214365,
      "grad_norm": 0.09920912235975266,
      "learning_rate": 1.5410211975250885e-06,
      "loss": 0.0079,
      "step": 2587520
    },
    {
      "epoch": 4.2345659616530185,
      "grad_norm": 0.16379967331886292,
      "learning_rate": 1.5409553053115715e-06,
      "loss": 0.0113,
      "step": 2587540
    },
    {
      "epoch": 4.234598692091671,
      "grad_norm": 0.47276732325553894,
      "learning_rate": 1.5408894130980542e-06,
      "loss": 0.0084,
      "step": 2587560
    },
    {
      "epoch": 4.234631422530325,
      "grad_norm": 0.2434830665588379,
      "learning_rate": 1.5408235208845371e-06,
      "loss": 0.0059,
      "step": 2587580
    },
    {
      "epoch": 4.234664152968978,
      "grad_norm": 0.2869971990585327,
      "learning_rate": 1.54075762867102e-06,
      "loss": 0.0093,
      "step": 2587600
    },
    {
      "epoch": 4.234696883407631,
      "grad_norm": 0.13877157866954803,
      "learning_rate": 1.540691736457503e-06,
      "loss": 0.0063,
      "step": 2587620
    },
    {
      "epoch": 4.234729613846285,
      "grad_norm": 0.06395860761404037,
      "learning_rate": 1.5406258442439858e-06,
      "loss": 0.0081,
      "step": 2587640
    },
    {
      "epoch": 4.234762344284938,
      "grad_norm": 0.43182310461997986,
      "learning_rate": 1.5405599520304687e-06,
      "loss": 0.0097,
      "step": 2587660
    },
    {
      "epoch": 4.234795074723592,
      "grad_norm": 0.47110429406166077,
      "learning_rate": 1.5404940598169515e-06,
      "loss": 0.005,
      "step": 2587680
    },
    {
      "epoch": 4.2348278051622446,
      "grad_norm": 0.0980377271771431,
      "learning_rate": 1.5404281676034344e-06,
      "loss": 0.0073,
      "step": 2587700
    },
    {
      "epoch": 4.234860535600898,
      "grad_norm": 0.25351253151893616,
      "learning_rate": 1.5403622753899172e-06,
      "loss": 0.0105,
      "step": 2587720
    },
    {
      "epoch": 4.234893266039552,
      "grad_norm": 0.053594548255205154,
      "learning_rate": 1.5402963831764001e-06,
      "loss": 0.0049,
      "step": 2587740
    },
    {
      "epoch": 4.2349259964782044,
      "grad_norm": 0.30724453926086426,
      "learning_rate": 1.5402304909628829e-06,
      "loss": 0.0145,
      "step": 2587760
    },
    {
      "epoch": 4.234958726916858,
      "grad_norm": 0.31518465280532837,
      "learning_rate": 1.540164598749366e-06,
      "loss": 0.0102,
      "step": 2587780
    },
    {
      "epoch": 4.234991457355512,
      "grad_norm": 0.1851610243320465,
      "learning_rate": 1.5400987065358488e-06,
      "loss": 0.0076,
      "step": 2587800
    },
    {
      "epoch": 4.235024187794165,
      "grad_norm": 0.1803436130285263,
      "learning_rate": 1.5400328143223317e-06,
      "loss": 0.0084,
      "step": 2587820
    },
    {
      "epoch": 4.235056918232818,
      "grad_norm": 0.2163316309452057,
      "learning_rate": 1.5399669221088145e-06,
      "loss": 0.0072,
      "step": 2587840
    },
    {
      "epoch": 4.2350896486714715,
      "grad_norm": 0.11743572354316711,
      "learning_rate": 1.5399010298952974e-06,
      "loss": 0.0077,
      "step": 2587860
    },
    {
      "epoch": 4.235122379110125,
      "grad_norm": 0.6765616536140442,
      "learning_rate": 1.5398351376817801e-06,
      "loss": 0.0081,
      "step": 2587880
    },
    {
      "epoch": 4.235155109548778,
      "grad_norm": 0.22364680469036102,
      "learning_rate": 1.539769245468263e-06,
      "loss": 0.01,
      "step": 2587900
    },
    {
      "epoch": 4.235187839987431,
      "grad_norm": 0.1252378225326538,
      "learning_rate": 1.5397033532547458e-06,
      "loss": 0.0078,
      "step": 2587920
    },
    {
      "epoch": 4.235220570426085,
      "grad_norm": 0.09021355956792831,
      "learning_rate": 1.539637461041229e-06,
      "loss": 0.0106,
      "step": 2587940
    },
    {
      "epoch": 4.235253300864739,
      "grad_norm": 0.4522588551044464,
      "learning_rate": 1.539571568827712e-06,
      "loss": 0.0082,
      "step": 2587960
    },
    {
      "epoch": 4.235286031303391,
      "grad_norm": 0.10076849162578583,
      "learning_rate": 1.5395056766141947e-06,
      "loss": 0.0096,
      "step": 2587980
    },
    {
      "epoch": 4.235318761742045,
      "grad_norm": 0.39464133977890015,
      "learning_rate": 1.5394397844006776e-06,
      "loss": 0.0098,
      "step": 2588000
    },
    {
      "epoch": 4.2353514921806985,
      "grad_norm": 0.2921207845211029,
      "learning_rate": 1.5393738921871604e-06,
      "loss": 0.0059,
      "step": 2588020
    },
    {
      "epoch": 4.235384222619351,
      "grad_norm": 0.24165141582489014,
      "learning_rate": 1.5393079999736431e-06,
      "loss": 0.008,
      "step": 2588040
    },
    {
      "epoch": 4.235416953058005,
      "grad_norm": 0.3319554328918457,
      "learning_rate": 1.539242107760126e-06,
      "loss": 0.0062,
      "step": 2588060
    },
    {
      "epoch": 4.235449683496658,
      "grad_norm": 0.18111051619052887,
      "learning_rate": 1.5391762155466088e-06,
      "loss": 0.0098,
      "step": 2588080
    },
    {
      "epoch": 4.235482413935312,
      "grad_norm": 0.2506665289402008,
      "learning_rate": 1.5391103233330918e-06,
      "loss": 0.009,
      "step": 2588100
    },
    {
      "epoch": 4.235515144373965,
      "grad_norm": 0.28552812337875366,
      "learning_rate": 1.539044431119575e-06,
      "loss": 0.0057,
      "step": 2588120
    },
    {
      "epoch": 4.235547874812618,
      "grad_norm": 0.2747434079647064,
      "learning_rate": 1.5389785389060577e-06,
      "loss": 0.0118,
      "step": 2588140
    },
    {
      "epoch": 4.235580605251272,
      "grad_norm": 0.13996806740760803,
      "learning_rate": 1.5389126466925406e-06,
      "loss": 0.0078,
      "step": 2588160
    },
    {
      "epoch": 4.235613335689925,
      "grad_norm": 0.1686566323041916,
      "learning_rate": 1.5388467544790234e-06,
      "loss": 0.0111,
      "step": 2588180
    },
    {
      "epoch": 4.235646066128578,
      "grad_norm": 0.13091571629047394,
      "learning_rate": 1.5387808622655063e-06,
      "loss": 0.0059,
      "step": 2588200
    },
    {
      "epoch": 4.235678796567232,
      "grad_norm": 0.4254227876663208,
      "learning_rate": 1.538714970051989e-06,
      "loss": 0.0096,
      "step": 2588220
    },
    {
      "epoch": 4.2357115270058845,
      "grad_norm": 0.16440324485301971,
      "learning_rate": 1.5386490778384718e-06,
      "loss": 0.009,
      "step": 2588240
    },
    {
      "epoch": 4.235744257444538,
      "grad_norm": 0.08282470703125,
      "learning_rate": 1.5385831856249547e-06,
      "loss": 0.0074,
      "step": 2588260
    },
    {
      "epoch": 4.235776987883192,
      "grad_norm": 0.33894482254981995,
      "learning_rate": 1.5385172934114375e-06,
      "loss": 0.007,
      "step": 2588280
    },
    {
      "epoch": 4.235809718321845,
      "grad_norm": 0.26742011308670044,
      "learning_rate": 1.5384514011979206e-06,
      "loss": 0.0071,
      "step": 2588300
    },
    {
      "epoch": 4.235842448760498,
      "grad_norm": 0.22412782907485962,
      "learning_rate": 1.5383855089844036e-06,
      "loss": 0.014,
      "step": 2588320
    },
    {
      "epoch": 4.235875179199152,
      "grad_norm": 0.17495985329151154,
      "learning_rate": 1.5383196167708863e-06,
      "loss": 0.0084,
      "step": 2588340
    },
    {
      "epoch": 4.235907909637805,
      "grad_norm": 0.26544123888015747,
      "learning_rate": 1.5382537245573693e-06,
      "loss": 0.007,
      "step": 2588360
    },
    {
      "epoch": 4.235940640076458,
      "grad_norm": 0.12616878747940063,
      "learning_rate": 1.538187832343852e-06,
      "loss": 0.0088,
      "step": 2588380
    },
    {
      "epoch": 4.2359733705151115,
      "grad_norm": 0.5571750402450562,
      "learning_rate": 1.538121940130335e-06,
      "loss": 0.0076,
      "step": 2588400
    },
    {
      "epoch": 4.236006100953765,
      "grad_norm": 0.22134073078632355,
      "learning_rate": 1.5380560479168177e-06,
      "loss": 0.0105,
      "step": 2588420
    },
    {
      "epoch": 4.236038831392419,
      "grad_norm": 0.4581640362739563,
      "learning_rate": 1.5379901557033007e-06,
      "loss": 0.0106,
      "step": 2588440
    },
    {
      "epoch": 4.236071561831071,
      "grad_norm": 0.18902379274368286,
      "learning_rate": 1.5379242634897834e-06,
      "loss": 0.0089,
      "step": 2588460
    },
    {
      "epoch": 4.236104292269725,
      "grad_norm": 0.20766068994998932,
      "learning_rate": 1.5378583712762666e-06,
      "loss": 0.0097,
      "step": 2588480
    },
    {
      "epoch": 4.2361370227083786,
      "grad_norm": 0.38659048080444336,
      "learning_rate": 1.5377924790627493e-06,
      "loss": 0.0074,
      "step": 2588500
    },
    {
      "epoch": 4.236169753147031,
      "grad_norm": 0.1690596640110016,
      "learning_rate": 1.5377265868492323e-06,
      "loss": 0.0056,
      "step": 2588520
    },
    {
      "epoch": 4.236202483585685,
      "grad_norm": 0.23756037652492523,
      "learning_rate": 1.537660694635715e-06,
      "loss": 0.0103,
      "step": 2588540
    },
    {
      "epoch": 4.2362352140243384,
      "grad_norm": 0.14856714010238647,
      "learning_rate": 1.537594802422198e-06,
      "loss": 0.0113,
      "step": 2588560
    },
    {
      "epoch": 4.236267944462992,
      "grad_norm": 0.20705099403858185,
      "learning_rate": 1.5375289102086807e-06,
      "loss": 0.0103,
      "step": 2588580
    },
    {
      "epoch": 4.236300674901645,
      "grad_norm": 0.1709287315607071,
      "learning_rate": 1.5374630179951636e-06,
      "loss": 0.0099,
      "step": 2588600
    },
    {
      "epoch": 4.236333405340298,
      "grad_norm": 0.14726467430591583,
      "learning_rate": 1.5373971257816464e-06,
      "loss": 0.0068,
      "step": 2588620
    },
    {
      "epoch": 4.236366135778952,
      "grad_norm": 0.08260763436555862,
      "learning_rate": 1.5373312335681293e-06,
      "loss": 0.0072,
      "step": 2588640
    },
    {
      "epoch": 4.236398866217605,
      "grad_norm": 0.08112317323684692,
      "learning_rate": 1.5372653413546123e-06,
      "loss": 0.0105,
      "step": 2588660
    },
    {
      "epoch": 4.236431596656258,
      "grad_norm": 0.15790820121765137,
      "learning_rate": 1.5371994491410952e-06,
      "loss": 0.0094,
      "step": 2588680
    },
    {
      "epoch": 4.236464327094912,
      "grad_norm": 0.05482712388038635,
      "learning_rate": 1.537133556927578e-06,
      "loss": 0.0117,
      "step": 2588700
    },
    {
      "epoch": 4.236497057533565,
      "grad_norm": 0.06205722317099571,
      "learning_rate": 1.537067664714061e-06,
      "loss": 0.011,
      "step": 2588720
    },
    {
      "epoch": 4.236529787972218,
      "grad_norm": 0.48140913248062134,
      "learning_rate": 1.5370017725005437e-06,
      "loss": 0.0127,
      "step": 2588740
    },
    {
      "epoch": 4.236562518410872,
      "grad_norm": 0.21757221221923828,
      "learning_rate": 1.5369358802870266e-06,
      "loss": 0.0118,
      "step": 2588760
    },
    {
      "epoch": 4.236595248849525,
      "grad_norm": 0.36731138825416565,
      "learning_rate": 1.5368699880735094e-06,
      "loss": 0.0148,
      "step": 2588780
    },
    {
      "epoch": 4.236627979288178,
      "grad_norm": 0.08886026591062546,
      "learning_rate": 1.5368040958599923e-06,
      "loss": 0.0083,
      "step": 2588800
    },
    {
      "epoch": 4.236660709726832,
      "grad_norm": 0.46960607171058655,
      "learning_rate": 1.5367382036464753e-06,
      "loss": 0.012,
      "step": 2588820
    },
    {
      "epoch": 4.236693440165485,
      "grad_norm": 0.08037689328193665,
      "learning_rate": 1.5366723114329582e-06,
      "loss": 0.0066,
      "step": 2588840
    },
    {
      "epoch": 4.236726170604139,
      "grad_norm": 0.23667097091674805,
      "learning_rate": 1.536606419219441e-06,
      "loss": 0.0124,
      "step": 2588860
    },
    {
      "epoch": 4.2367589010427915,
      "grad_norm": 0.5138086676597595,
      "learning_rate": 1.536540527005924e-06,
      "loss": 0.0063,
      "step": 2588880
    },
    {
      "epoch": 4.236791631481445,
      "grad_norm": 0.745686948299408,
      "learning_rate": 1.5364746347924066e-06,
      "loss": 0.0094,
      "step": 2588900
    },
    {
      "epoch": 4.236824361920099,
      "grad_norm": 0.44738972187042236,
      "learning_rate": 1.5364087425788896e-06,
      "loss": 0.0108,
      "step": 2588920
    },
    {
      "epoch": 4.236857092358751,
      "grad_norm": 0.09594622999429703,
      "learning_rate": 1.5363428503653723e-06,
      "loss": 0.0088,
      "step": 2588940
    },
    {
      "epoch": 4.236889822797405,
      "grad_norm": 0.5331364870071411,
      "learning_rate": 1.5362769581518553e-06,
      "loss": 0.0093,
      "step": 2588960
    },
    {
      "epoch": 4.236922553236059,
      "grad_norm": 0.755928635597229,
      "learning_rate": 1.536211065938338e-06,
      "loss": 0.0087,
      "step": 2588980
    },
    {
      "epoch": 4.236955283674712,
      "grad_norm": 0.07147257775068283,
      "learning_rate": 1.5361451737248212e-06,
      "loss": 0.0068,
      "step": 2589000
    },
    {
      "epoch": 4.236988014113365,
      "grad_norm": 0.0922817513346672,
      "learning_rate": 1.536079281511304e-06,
      "loss": 0.0094,
      "step": 2589020
    },
    {
      "epoch": 4.2370207445520185,
      "grad_norm": 0.2993188202381134,
      "learning_rate": 1.5360133892977869e-06,
      "loss": 0.0103,
      "step": 2589040
    },
    {
      "epoch": 4.237053474990672,
      "grad_norm": 0.23751433193683624,
      "learning_rate": 1.5359474970842696e-06,
      "loss": 0.0076,
      "step": 2589060
    },
    {
      "epoch": 4.237086205429325,
      "grad_norm": 0.2001550793647766,
      "learning_rate": 1.5358816048707526e-06,
      "loss": 0.0066,
      "step": 2589080
    },
    {
      "epoch": 4.237118935867978,
      "grad_norm": 0.28225618600845337,
      "learning_rate": 1.5358157126572353e-06,
      "loss": 0.0103,
      "step": 2589100
    },
    {
      "epoch": 4.237151666306632,
      "grad_norm": 0.1341029554605484,
      "learning_rate": 1.5357498204437183e-06,
      "loss": 0.0096,
      "step": 2589120
    },
    {
      "epoch": 4.237184396745286,
      "grad_norm": 0.24556341767311096,
      "learning_rate": 1.535683928230201e-06,
      "loss": 0.0106,
      "step": 2589140
    },
    {
      "epoch": 4.237217127183938,
      "grad_norm": 0.6012829542160034,
      "learning_rate": 1.535618036016684e-06,
      "loss": 0.0103,
      "step": 2589160
    },
    {
      "epoch": 4.237249857622592,
      "grad_norm": 0.15421165525913239,
      "learning_rate": 1.5355521438031671e-06,
      "loss": 0.0096,
      "step": 2589180
    },
    {
      "epoch": 4.2372825880612455,
      "grad_norm": 0.3171643018722534,
      "learning_rate": 1.5354862515896499e-06,
      "loss": 0.0082,
      "step": 2589200
    },
    {
      "epoch": 4.237315318499898,
      "grad_norm": 0.23499390482902527,
      "learning_rate": 1.5354203593761326e-06,
      "loss": 0.0053,
      "step": 2589220
    },
    {
      "epoch": 4.237348048938552,
      "grad_norm": 0.6694077849388123,
      "learning_rate": 1.5353544671626156e-06,
      "loss": 0.0077,
      "step": 2589240
    },
    {
      "epoch": 4.237380779377205,
      "grad_norm": 0.07649262994527817,
      "learning_rate": 1.5352885749490983e-06,
      "loss": 0.0082,
      "step": 2589260
    },
    {
      "epoch": 4.237413509815859,
      "grad_norm": 0.06162384897470474,
      "learning_rate": 1.5352226827355812e-06,
      "loss": 0.0102,
      "step": 2589280
    },
    {
      "epoch": 4.237446240254512,
      "grad_norm": 0.5228750109672546,
      "learning_rate": 1.535156790522064e-06,
      "loss": 0.0052,
      "step": 2589300
    },
    {
      "epoch": 4.237478970693165,
      "grad_norm": 0.14194050431251526,
      "learning_rate": 1.535090898308547e-06,
      "loss": 0.01,
      "step": 2589320
    },
    {
      "epoch": 4.237511701131819,
      "grad_norm": 0.36306172609329224,
      "learning_rate": 1.5350250060950297e-06,
      "loss": 0.0079,
      "step": 2589340
    },
    {
      "epoch": 4.237544431570472,
      "grad_norm": 0.3705713450908661,
      "learning_rate": 1.5349591138815128e-06,
      "loss": 0.0076,
      "step": 2589360
    },
    {
      "epoch": 4.237577162009125,
      "grad_norm": 0.19469612836837769,
      "learning_rate": 1.5348932216679958e-06,
      "loss": 0.0069,
      "step": 2589380
    },
    {
      "epoch": 4.237609892447779,
      "grad_norm": 0.05818391218781471,
      "learning_rate": 1.5348273294544785e-06,
      "loss": 0.0064,
      "step": 2589400
    },
    {
      "epoch": 4.237642622886432,
      "grad_norm": 0.17117813229560852,
      "learning_rate": 1.5347614372409615e-06,
      "loss": 0.0172,
      "step": 2589420
    },
    {
      "epoch": 4.237675353325085,
      "grad_norm": 0.39162954688072205,
      "learning_rate": 1.5346955450274442e-06,
      "loss": 0.0071,
      "step": 2589440
    },
    {
      "epoch": 4.237708083763739,
      "grad_norm": 0.08230997622013092,
      "learning_rate": 1.534629652813927e-06,
      "loss": 0.0106,
      "step": 2589460
    },
    {
      "epoch": 4.237740814202392,
      "grad_norm": 0.3308776021003723,
      "learning_rate": 1.53456376060041e-06,
      "loss": 0.0097,
      "step": 2589480
    },
    {
      "epoch": 4.237773544641045,
      "grad_norm": 0.13336384296417236,
      "learning_rate": 1.5344978683868927e-06,
      "loss": 0.0117,
      "step": 2589500
    },
    {
      "epoch": 4.2378062750796985,
      "grad_norm": 0.10444027185440063,
      "learning_rate": 1.5344319761733756e-06,
      "loss": 0.0073,
      "step": 2589520
    },
    {
      "epoch": 4.237839005518352,
      "grad_norm": 0.35213714838027954,
      "learning_rate": 1.5343660839598588e-06,
      "loss": 0.0077,
      "step": 2589540
    },
    {
      "epoch": 4.237871735957006,
      "grad_norm": 0.25933724641799927,
      "learning_rate": 1.5343001917463415e-06,
      "loss": 0.0109,
      "step": 2589560
    },
    {
      "epoch": 4.237904466395658,
      "grad_norm": 0.1844237595796585,
      "learning_rate": 1.5342342995328245e-06,
      "loss": 0.0083,
      "step": 2589580
    },
    {
      "epoch": 4.237937196834312,
      "grad_norm": 0.158376544713974,
      "learning_rate": 1.5341684073193072e-06,
      "loss": 0.0059,
      "step": 2589600
    },
    {
      "epoch": 4.237969927272966,
      "grad_norm": 0.1910346895456314,
      "learning_rate": 1.5341025151057902e-06,
      "loss": 0.0096,
      "step": 2589620
    },
    {
      "epoch": 4.238002657711618,
      "grad_norm": 0.19981735944747925,
      "learning_rate": 1.5340366228922729e-06,
      "loss": 0.0138,
      "step": 2589640
    },
    {
      "epoch": 4.238035388150272,
      "grad_norm": 0.0928754210472107,
      "learning_rate": 1.5339707306787558e-06,
      "loss": 0.0102,
      "step": 2589660
    },
    {
      "epoch": 4.2380681185889255,
      "grad_norm": 0.1393015831708908,
      "learning_rate": 1.5339048384652386e-06,
      "loss": 0.0073,
      "step": 2589680
    },
    {
      "epoch": 4.238100849027578,
      "grad_norm": 0.6414217352867126,
      "learning_rate": 1.5338389462517217e-06,
      "loss": 0.0103,
      "step": 2589700
    },
    {
      "epoch": 4.238133579466232,
      "grad_norm": 0.2639021575450897,
      "learning_rate": 1.5337730540382045e-06,
      "loss": 0.0089,
      "step": 2589720
    },
    {
      "epoch": 4.238166309904885,
      "grad_norm": 0.32297348976135254,
      "learning_rate": 1.5337071618246874e-06,
      "loss": 0.0127,
      "step": 2589740
    },
    {
      "epoch": 4.238199040343539,
      "grad_norm": 0.09337335079908371,
      "learning_rate": 1.5336412696111702e-06,
      "loss": 0.0067,
      "step": 2589760
    },
    {
      "epoch": 4.238231770782192,
      "grad_norm": 0.3186992406845093,
      "learning_rate": 1.5335753773976531e-06,
      "loss": 0.0081,
      "step": 2589780
    },
    {
      "epoch": 4.238264501220845,
      "grad_norm": 0.45836547017097473,
      "learning_rate": 1.5335094851841359e-06,
      "loss": 0.0094,
      "step": 2589800
    },
    {
      "epoch": 4.238297231659499,
      "grad_norm": 0.13391615450382233,
      "learning_rate": 1.5334435929706188e-06,
      "loss": 0.0065,
      "step": 2589820
    },
    {
      "epoch": 4.2383299620981525,
      "grad_norm": 0.39652401208877563,
      "learning_rate": 1.5333777007571016e-06,
      "loss": 0.0138,
      "step": 2589840
    },
    {
      "epoch": 4.238362692536805,
      "grad_norm": 0.31035128235816956,
      "learning_rate": 1.5333118085435845e-06,
      "loss": 0.0119,
      "step": 2589860
    },
    {
      "epoch": 4.238395422975459,
      "grad_norm": 0.08434144407510757,
      "learning_rate": 1.5332459163300675e-06,
      "loss": 0.0064,
      "step": 2589880
    },
    {
      "epoch": 4.238428153414112,
      "grad_norm": 0.20018616318702698,
      "learning_rate": 1.5331800241165504e-06,
      "loss": 0.0119,
      "step": 2589900
    },
    {
      "epoch": 4.238460883852765,
      "grad_norm": 0.42825645208358765,
      "learning_rate": 1.5331141319030332e-06,
      "loss": 0.0092,
      "step": 2589920
    },
    {
      "epoch": 4.238493614291419,
      "grad_norm": 0.22572265565395355,
      "learning_rate": 1.533048239689516e-06,
      "loss": 0.0129,
      "step": 2589940
    },
    {
      "epoch": 4.238526344730072,
      "grad_norm": 0.05765156075358391,
      "learning_rate": 1.5329823474759988e-06,
      "loss": 0.0114,
      "step": 2589960
    },
    {
      "epoch": 4.238559075168725,
      "grad_norm": 0.6201874613761902,
      "learning_rate": 1.5329164552624818e-06,
      "loss": 0.0104,
      "step": 2589980
    },
    {
      "epoch": 4.238591805607379,
      "grad_norm": 0.21259705722332,
      "learning_rate": 1.5328505630489645e-06,
      "loss": 0.0074,
      "step": 2590000
    },
    {
      "epoch": 4.238624536046032,
      "grad_norm": 0.1854465752840042,
      "learning_rate": 1.5327846708354475e-06,
      "loss": 0.008,
      "step": 2590020
    },
    {
      "epoch": 4.238657266484686,
      "grad_norm": 0.26035645604133606,
      "learning_rate": 1.5327187786219302e-06,
      "loss": 0.0104,
      "step": 2590040
    },
    {
      "epoch": 4.2386899969233385,
      "grad_norm": 0.18344226479530334,
      "learning_rate": 1.5326528864084134e-06,
      "loss": 0.0103,
      "step": 2590060
    },
    {
      "epoch": 4.238722727361992,
      "grad_norm": 0.4759429693222046,
      "learning_rate": 1.5325869941948961e-06,
      "loss": 0.0125,
      "step": 2590080
    },
    {
      "epoch": 4.238755457800646,
      "grad_norm": 0.11150193959474564,
      "learning_rate": 1.532521101981379e-06,
      "loss": 0.0073,
      "step": 2590100
    },
    {
      "epoch": 4.238788188239298,
      "grad_norm": 0.21358580887317657,
      "learning_rate": 1.5324552097678618e-06,
      "loss": 0.0125,
      "step": 2590120
    },
    {
      "epoch": 4.238820918677952,
      "grad_norm": 0.08739013224840164,
      "learning_rate": 1.5323893175543448e-06,
      "loss": 0.0104,
      "step": 2590140
    },
    {
      "epoch": 4.238853649116606,
      "grad_norm": 0.18268270790576935,
      "learning_rate": 1.5323234253408275e-06,
      "loss": 0.0066,
      "step": 2590160
    },
    {
      "epoch": 4.238886379555259,
      "grad_norm": 0.07350665330886841,
      "learning_rate": 1.5322575331273105e-06,
      "loss": 0.0065,
      "step": 2590180
    },
    {
      "epoch": 4.238919109993912,
      "grad_norm": 0.17807114124298096,
      "learning_rate": 1.5321916409137932e-06,
      "loss": 0.0099,
      "step": 2590200
    },
    {
      "epoch": 4.2389518404325655,
      "grad_norm": 0.2777601182460785,
      "learning_rate": 1.5321257487002762e-06,
      "loss": 0.0094,
      "step": 2590220
    },
    {
      "epoch": 4.238984570871219,
      "grad_norm": 0.15056471526622772,
      "learning_rate": 1.5320598564867591e-06,
      "loss": 0.0092,
      "step": 2590240
    },
    {
      "epoch": 4.239017301309872,
      "grad_norm": 0.18688268959522247,
      "learning_rate": 1.531993964273242e-06,
      "loss": 0.0102,
      "step": 2590260
    },
    {
      "epoch": 4.239050031748525,
      "grad_norm": 0.09849308431148529,
      "learning_rate": 1.5319280720597248e-06,
      "loss": 0.0112,
      "step": 2590280
    },
    {
      "epoch": 4.239082762187179,
      "grad_norm": 0.141329824924469,
      "learning_rate": 1.5318621798462077e-06,
      "loss": 0.008,
      "step": 2590300
    },
    {
      "epoch": 4.2391154926258325,
      "grad_norm": 0.2540319859981537,
      "learning_rate": 1.5317962876326905e-06,
      "loss": 0.0089,
      "step": 2590320
    },
    {
      "epoch": 4.239148223064485,
      "grad_norm": 0.19617995619773865,
      "learning_rate": 1.5317303954191734e-06,
      "loss": 0.0122,
      "step": 2590340
    },
    {
      "epoch": 4.239180953503139,
      "grad_norm": 0.3447589576244354,
      "learning_rate": 1.5316645032056562e-06,
      "loss": 0.0077,
      "step": 2590360
    },
    {
      "epoch": 4.239213683941792,
      "grad_norm": 0.16001026332378387,
      "learning_rate": 1.5315986109921391e-06,
      "loss": 0.0093,
      "step": 2590380
    },
    {
      "epoch": 4.239246414380445,
      "grad_norm": 0.21104390919208527,
      "learning_rate": 1.5315327187786219e-06,
      "loss": 0.0114,
      "step": 2590400
    },
    {
      "epoch": 4.239279144819099,
      "grad_norm": 0.5708539485931396,
      "learning_rate": 1.531466826565105e-06,
      "loss": 0.0097,
      "step": 2590420
    },
    {
      "epoch": 4.239311875257752,
      "grad_norm": 0.32584941387176514,
      "learning_rate": 1.5314009343515878e-06,
      "loss": 0.0055,
      "step": 2590440
    },
    {
      "epoch": 4.239344605696406,
      "grad_norm": 0.21311229467391968,
      "learning_rate": 1.5313350421380707e-06,
      "loss": 0.009,
      "step": 2590460
    },
    {
      "epoch": 4.239377336135059,
      "grad_norm": 0.1648583561182022,
      "learning_rate": 1.5312691499245535e-06,
      "loss": 0.0079,
      "step": 2590480
    },
    {
      "epoch": 4.239410066573712,
      "grad_norm": 0.08494524657726288,
      "learning_rate": 1.5312032577110364e-06,
      "loss": 0.0082,
      "step": 2590500
    },
    {
      "epoch": 4.239442797012366,
      "grad_norm": 0.21404819190502167,
      "learning_rate": 1.5311373654975192e-06,
      "loss": 0.0119,
      "step": 2590520
    },
    {
      "epoch": 4.2394755274510185,
      "grad_norm": 0.5921867489814758,
      "learning_rate": 1.5310714732840021e-06,
      "loss": 0.0079,
      "step": 2590540
    },
    {
      "epoch": 4.239508257889672,
      "grad_norm": 0.3129068613052368,
      "learning_rate": 1.5310055810704848e-06,
      "loss": 0.0105,
      "step": 2590560
    },
    {
      "epoch": 4.239540988328326,
      "grad_norm": 0.20451921224594116,
      "learning_rate": 1.530939688856968e-06,
      "loss": 0.008,
      "step": 2590580
    },
    {
      "epoch": 4.239573718766979,
      "grad_norm": 0.28910157084465027,
      "learning_rate": 1.530873796643451e-06,
      "loss": 0.0124,
      "step": 2590600
    },
    {
      "epoch": 4.239606449205632,
      "grad_norm": 0.07697376608848572,
      "learning_rate": 1.5308079044299337e-06,
      "loss": 0.0119,
      "step": 2590620
    },
    {
      "epoch": 4.239639179644286,
      "grad_norm": 0.1616797149181366,
      "learning_rate": 1.5307420122164167e-06,
      "loss": 0.0081,
      "step": 2590640
    },
    {
      "epoch": 4.239671910082939,
      "grad_norm": 0.18720245361328125,
      "learning_rate": 1.5306761200028994e-06,
      "loss": 0.0098,
      "step": 2590660
    },
    {
      "epoch": 4.239704640521592,
      "grad_norm": 0.1544356495141983,
      "learning_rate": 1.5306102277893821e-06,
      "loss": 0.007,
      "step": 2590680
    },
    {
      "epoch": 4.2397373709602455,
      "grad_norm": 0.24777983129024506,
      "learning_rate": 1.530544335575865e-06,
      "loss": 0.0104,
      "step": 2590700
    },
    {
      "epoch": 4.239770101398899,
      "grad_norm": 0.2619471251964569,
      "learning_rate": 1.5304784433623478e-06,
      "loss": 0.0107,
      "step": 2590720
    },
    {
      "epoch": 4.239802831837553,
      "grad_norm": 0.0844559296965599,
      "learning_rate": 1.5304125511488308e-06,
      "loss": 0.0095,
      "step": 2590740
    },
    {
      "epoch": 4.239835562276205,
      "grad_norm": 0.131469264626503,
      "learning_rate": 1.530346658935314e-06,
      "loss": 0.0085,
      "step": 2590760
    },
    {
      "epoch": 4.239868292714859,
      "grad_norm": 0.2077670395374298,
      "learning_rate": 1.5302807667217967e-06,
      "loss": 0.0081,
      "step": 2590780
    },
    {
      "epoch": 4.239901023153513,
      "grad_norm": 0.12134639918804169,
      "learning_rate": 1.5302148745082796e-06,
      "loss": 0.0132,
      "step": 2590800
    },
    {
      "epoch": 4.239933753592165,
      "grad_norm": 0.07382318377494812,
      "learning_rate": 1.5301489822947624e-06,
      "loss": 0.0081,
      "step": 2590820
    },
    {
      "epoch": 4.239966484030819,
      "grad_norm": 0.22559350728988647,
      "learning_rate": 1.5300830900812453e-06,
      "loss": 0.0082,
      "step": 2590840
    },
    {
      "epoch": 4.2399992144694725,
      "grad_norm": 0.19644580781459808,
      "learning_rate": 1.530017197867728e-06,
      "loss": 0.0055,
      "step": 2590860
    },
    {
      "epoch": 4.240031944908126,
      "grad_norm": 0.18682675063610077,
      "learning_rate": 1.5299513056542108e-06,
      "loss": 0.0071,
      "step": 2590880
    },
    {
      "epoch": 4.240064675346779,
      "grad_norm": 0.22831778228282928,
      "learning_rate": 1.5298854134406938e-06,
      "loss": 0.0095,
      "step": 2590900
    },
    {
      "epoch": 4.240097405785432,
      "grad_norm": 0.4208735525608063,
      "learning_rate": 1.5298195212271765e-06,
      "loss": 0.0085,
      "step": 2590920
    },
    {
      "epoch": 4.240130136224086,
      "grad_norm": 0.40393877029418945,
      "learning_rate": 1.5297536290136597e-06,
      "loss": 0.0151,
      "step": 2590940
    },
    {
      "epoch": 4.240162866662739,
      "grad_norm": 0.14246846735477448,
      "learning_rate": 1.5296877368001426e-06,
      "loss": 0.0093,
      "step": 2590960
    },
    {
      "epoch": 4.240195597101392,
      "grad_norm": 0.15588457882404327,
      "learning_rate": 1.5296218445866253e-06,
      "loss": 0.0091,
      "step": 2590980
    },
    {
      "epoch": 4.240228327540046,
      "grad_norm": 0.2833327054977417,
      "learning_rate": 1.5295559523731083e-06,
      "loss": 0.0059,
      "step": 2591000
    },
    {
      "epoch": 4.2402610579786995,
      "grad_norm": 0.24666012823581696,
      "learning_rate": 1.529490060159591e-06,
      "loss": 0.0083,
      "step": 2591020
    },
    {
      "epoch": 4.240293788417352,
      "grad_norm": 0.11227726936340332,
      "learning_rate": 1.529424167946074e-06,
      "loss": 0.0103,
      "step": 2591040
    },
    {
      "epoch": 4.240326518856006,
      "grad_norm": 0.304143488407135,
      "learning_rate": 1.5293582757325567e-06,
      "loss": 0.007,
      "step": 2591060
    },
    {
      "epoch": 4.240359249294659,
      "grad_norm": 0.26232779026031494,
      "learning_rate": 1.5292923835190397e-06,
      "loss": 0.0099,
      "step": 2591080
    },
    {
      "epoch": 4.240391979733312,
      "grad_norm": 0.12990407645702362,
      "learning_rate": 1.5292264913055224e-06,
      "loss": 0.0084,
      "step": 2591100
    },
    {
      "epoch": 4.240424710171966,
      "grad_norm": 0.39469876885414124,
      "learning_rate": 1.5291605990920056e-06,
      "loss": 0.0084,
      "step": 2591120
    },
    {
      "epoch": 4.240457440610619,
      "grad_norm": 0.11886382102966309,
      "learning_rate": 1.5290947068784883e-06,
      "loss": 0.0117,
      "step": 2591140
    },
    {
      "epoch": 4.240490171049272,
      "grad_norm": 0.2709430158138275,
      "learning_rate": 1.5290288146649713e-06,
      "loss": 0.01,
      "step": 2591160
    },
    {
      "epoch": 4.2405229014879255,
      "grad_norm": 0.6050872206687927,
      "learning_rate": 1.528962922451454e-06,
      "loss": 0.0103,
      "step": 2591180
    },
    {
      "epoch": 4.240555631926579,
      "grad_norm": 0.4809059798717499,
      "learning_rate": 1.528897030237937e-06,
      "loss": 0.0089,
      "step": 2591200
    },
    {
      "epoch": 4.240588362365233,
      "grad_norm": 0.0721428170800209,
      "learning_rate": 1.5288311380244197e-06,
      "loss": 0.0077,
      "step": 2591220
    },
    {
      "epoch": 4.240621092803885,
      "grad_norm": 0.17459340393543243,
      "learning_rate": 1.5287652458109027e-06,
      "loss": 0.006,
      "step": 2591240
    },
    {
      "epoch": 4.240653823242539,
      "grad_norm": 0.29036974906921387,
      "learning_rate": 1.5286993535973854e-06,
      "loss": 0.0089,
      "step": 2591260
    },
    {
      "epoch": 4.240686553681193,
      "grad_norm": 0.5330584049224854,
      "learning_rate": 1.5286334613838683e-06,
      "loss": 0.012,
      "step": 2591280
    },
    {
      "epoch": 4.240719284119846,
      "grad_norm": 0.12066533416509628,
      "learning_rate": 1.5285675691703513e-06,
      "loss": 0.0098,
      "step": 2591300
    },
    {
      "epoch": 4.240752014558499,
      "grad_norm": 0.12733323872089386,
      "learning_rate": 1.5285016769568343e-06,
      "loss": 0.0084,
      "step": 2591320
    },
    {
      "epoch": 4.2407847449971525,
      "grad_norm": 0.14641231298446655,
      "learning_rate": 1.528435784743317e-06,
      "loss": 0.0099,
      "step": 2591340
    },
    {
      "epoch": 4.240817475435806,
      "grad_norm": 0.20201605558395386,
      "learning_rate": 1.5283698925298e-06,
      "loss": 0.0073,
      "step": 2591360
    },
    {
      "epoch": 4.240850205874459,
      "grad_norm": 0.17613628506660461,
      "learning_rate": 1.5283040003162827e-06,
      "loss": 0.0105,
      "step": 2591380
    },
    {
      "epoch": 4.240882936313112,
      "grad_norm": 0.19554899632930756,
      "learning_rate": 1.5282381081027656e-06,
      "loss": 0.0107,
      "step": 2591400
    },
    {
      "epoch": 4.240915666751766,
      "grad_norm": 0.3444315195083618,
      "learning_rate": 1.5281722158892484e-06,
      "loss": 0.0085,
      "step": 2591420
    },
    {
      "epoch": 4.240948397190419,
      "grad_norm": 0.18308551609516144,
      "learning_rate": 1.5281063236757313e-06,
      "loss": 0.0088,
      "step": 2591440
    },
    {
      "epoch": 4.240981127629072,
      "grad_norm": 0.2424076497554779,
      "learning_rate": 1.5280404314622143e-06,
      "loss": 0.007,
      "step": 2591460
    },
    {
      "epoch": 4.241013858067726,
      "grad_norm": 0.13530148565769196,
      "learning_rate": 1.5279745392486972e-06,
      "loss": 0.0094,
      "step": 2591480
    },
    {
      "epoch": 4.2410465885063795,
      "grad_norm": 0.1417393833398819,
      "learning_rate": 1.52790864703518e-06,
      "loss": 0.0112,
      "step": 2591500
    },
    {
      "epoch": 4.241079318945032,
      "grad_norm": 0.26847201585769653,
      "learning_rate": 1.527842754821663e-06,
      "loss": 0.009,
      "step": 2591520
    },
    {
      "epoch": 4.241112049383686,
      "grad_norm": 0.5945264101028442,
      "learning_rate": 1.5277768626081457e-06,
      "loss": 0.0084,
      "step": 2591540
    },
    {
      "epoch": 4.241144779822339,
      "grad_norm": 0.6719282865524292,
      "learning_rate": 1.5277109703946286e-06,
      "loss": 0.007,
      "step": 2591560
    },
    {
      "epoch": 4.241177510260992,
      "grad_norm": 0.39467963576316833,
      "learning_rate": 1.5276450781811114e-06,
      "loss": 0.012,
      "step": 2591580
    },
    {
      "epoch": 4.241210240699646,
      "grad_norm": 0.09689111262559891,
      "learning_rate": 1.5275791859675943e-06,
      "loss": 0.0084,
      "step": 2591600
    },
    {
      "epoch": 4.241242971138299,
      "grad_norm": 0.3608560860157013,
      "learning_rate": 1.527513293754077e-06,
      "loss": 0.0063,
      "step": 2591620
    },
    {
      "epoch": 4.241275701576953,
      "grad_norm": 0.09000103175640106,
      "learning_rate": 1.5274474015405602e-06,
      "loss": 0.008,
      "step": 2591640
    },
    {
      "epoch": 4.241308432015606,
      "grad_norm": 0.18223729729652405,
      "learning_rate": 1.527381509327043e-06,
      "loss": 0.0098,
      "step": 2591660
    },
    {
      "epoch": 4.241341162454259,
      "grad_norm": 0.18148837983608246,
      "learning_rate": 1.527315617113526e-06,
      "loss": 0.0087,
      "step": 2591680
    },
    {
      "epoch": 4.241373892892913,
      "grad_norm": 0.3971845209598541,
      "learning_rate": 1.5272497249000086e-06,
      "loss": 0.0148,
      "step": 2591700
    },
    {
      "epoch": 4.2414066233315655,
      "grad_norm": 0.19841057062149048,
      "learning_rate": 1.5271838326864916e-06,
      "loss": 0.0108,
      "step": 2591720
    },
    {
      "epoch": 4.241439353770219,
      "grad_norm": 0.2527744174003601,
      "learning_rate": 1.5271179404729743e-06,
      "loss": 0.0113,
      "step": 2591740
    },
    {
      "epoch": 4.241472084208873,
      "grad_norm": 0.37237748503685,
      "learning_rate": 1.5270520482594573e-06,
      "loss": 0.0109,
      "step": 2591760
    },
    {
      "epoch": 4.241504814647526,
      "grad_norm": 0.18264886736869812,
      "learning_rate": 1.52698615604594e-06,
      "loss": 0.0066,
      "step": 2591780
    },
    {
      "epoch": 4.241537545086179,
      "grad_norm": 0.30059096217155457,
      "learning_rate": 1.526920263832423e-06,
      "loss": 0.007,
      "step": 2591800
    },
    {
      "epoch": 4.241570275524833,
      "grad_norm": 0.18438051640987396,
      "learning_rate": 1.5268543716189061e-06,
      "loss": 0.0118,
      "step": 2591820
    },
    {
      "epoch": 4.241603005963486,
      "grad_norm": 0.2631853222846985,
      "learning_rate": 1.5267884794053889e-06,
      "loss": 0.0107,
      "step": 2591840
    },
    {
      "epoch": 4.241635736402139,
      "grad_norm": 0.3973073661327362,
      "learning_rate": 1.5267225871918716e-06,
      "loss": 0.0097,
      "step": 2591860
    },
    {
      "epoch": 4.2416684668407925,
      "grad_norm": 0.3526718318462372,
      "learning_rate": 1.5266566949783546e-06,
      "loss": 0.0124,
      "step": 2591880
    },
    {
      "epoch": 4.241701197279446,
      "grad_norm": 0.23943451046943665,
      "learning_rate": 1.5265908027648373e-06,
      "loss": 0.0091,
      "step": 2591900
    },
    {
      "epoch": 4.2417339277181,
      "grad_norm": 0.18951362371444702,
      "learning_rate": 1.5265249105513203e-06,
      "loss": 0.0073,
      "step": 2591920
    },
    {
      "epoch": 4.241766658156752,
      "grad_norm": 0.06349314749240875,
      "learning_rate": 1.526459018337803e-06,
      "loss": 0.0096,
      "step": 2591940
    },
    {
      "epoch": 4.241799388595406,
      "grad_norm": 0.16597402095794678,
      "learning_rate": 1.526393126124286e-06,
      "loss": 0.008,
      "step": 2591960
    },
    {
      "epoch": 4.2418321190340595,
      "grad_norm": 0.1116458848118782,
      "learning_rate": 1.5263272339107687e-06,
      "loss": 0.0082,
      "step": 2591980
    },
    {
      "epoch": 4.241864849472712,
      "grad_norm": 0.31136658787727356,
      "learning_rate": 1.5262613416972519e-06,
      "loss": 0.0103,
      "step": 2592000
    },
    {
      "epoch": 4.241897579911366,
      "grad_norm": 0.05379737168550491,
      "learning_rate": 1.5261954494837348e-06,
      "loss": 0.0085,
      "step": 2592020
    },
    {
      "epoch": 4.241930310350019,
      "grad_norm": 0.08070411533117294,
      "learning_rate": 1.5261295572702175e-06,
      "loss": 0.0094,
      "step": 2592040
    },
    {
      "epoch": 4.241963040788673,
      "grad_norm": 0.16718702018260956,
      "learning_rate": 1.5260636650567005e-06,
      "loss": 0.013,
      "step": 2592060
    },
    {
      "epoch": 4.241995771227326,
      "grad_norm": 0.35928910970687866,
      "learning_rate": 1.5259977728431832e-06,
      "loss": 0.0088,
      "step": 2592080
    },
    {
      "epoch": 4.242028501665979,
      "grad_norm": 0.08427014946937561,
      "learning_rate": 1.525931880629666e-06,
      "loss": 0.0071,
      "step": 2592100
    },
    {
      "epoch": 4.242061232104633,
      "grad_norm": 0.23351450264453888,
      "learning_rate": 1.525865988416149e-06,
      "loss": 0.0133,
      "step": 2592120
    },
    {
      "epoch": 4.242093962543286,
      "grad_norm": 0.1337081789970398,
      "learning_rate": 1.5258000962026317e-06,
      "loss": 0.0078,
      "step": 2592140
    },
    {
      "epoch": 4.242126692981939,
      "grad_norm": 0.2973692715167999,
      "learning_rate": 1.5257342039891146e-06,
      "loss": 0.0085,
      "step": 2592160
    },
    {
      "epoch": 4.242159423420593,
      "grad_norm": 0.42240381240844727,
      "learning_rate": 1.5256683117755978e-06,
      "loss": 0.0112,
      "step": 2592180
    },
    {
      "epoch": 4.242192153859246,
      "grad_norm": 0.5107331275939941,
      "learning_rate": 1.5256024195620805e-06,
      "loss": 0.0159,
      "step": 2592200
    },
    {
      "epoch": 4.242224884297899,
      "grad_norm": 0.25183019042015076,
      "learning_rate": 1.5255365273485635e-06,
      "loss": 0.0086,
      "step": 2592220
    },
    {
      "epoch": 4.242257614736553,
      "grad_norm": 0.05934299901127815,
      "learning_rate": 1.5254706351350462e-06,
      "loss": 0.0092,
      "step": 2592240
    },
    {
      "epoch": 4.242290345175206,
      "grad_norm": 0.19959282875061035,
      "learning_rate": 1.5254047429215292e-06,
      "loss": 0.007,
      "step": 2592260
    },
    {
      "epoch": 4.242323075613859,
      "grad_norm": 0.13001923263072968,
      "learning_rate": 1.525338850708012e-06,
      "loss": 0.0061,
      "step": 2592280
    },
    {
      "epoch": 4.242355806052513,
      "grad_norm": 0.1966256946325302,
      "learning_rate": 1.5252729584944949e-06,
      "loss": 0.0102,
      "step": 2592300
    },
    {
      "epoch": 4.242388536491166,
      "grad_norm": 0.1383514553308487,
      "learning_rate": 1.5252070662809776e-06,
      "loss": 0.0073,
      "step": 2592320
    },
    {
      "epoch": 4.24242126692982,
      "grad_norm": 0.10410328954458237,
      "learning_rate": 1.5251411740674608e-06,
      "loss": 0.0066,
      "step": 2592340
    },
    {
      "epoch": 4.2424539973684725,
      "grad_norm": 0.25153860449790955,
      "learning_rate": 1.5250752818539435e-06,
      "loss": 0.0076,
      "step": 2592360
    },
    {
      "epoch": 4.242486727807126,
      "grad_norm": 0.448228120803833,
      "learning_rate": 1.5250093896404264e-06,
      "loss": 0.012,
      "step": 2592380
    },
    {
      "epoch": 4.24251945824578,
      "grad_norm": 0.1436627358198166,
      "learning_rate": 1.5249434974269092e-06,
      "loss": 0.0059,
      "step": 2592400
    },
    {
      "epoch": 4.242552188684432,
      "grad_norm": 0.1739593893289566,
      "learning_rate": 1.5248776052133921e-06,
      "loss": 0.0066,
      "step": 2592420
    },
    {
      "epoch": 4.242584919123086,
      "grad_norm": 0.2414546012878418,
      "learning_rate": 1.5248117129998749e-06,
      "loss": 0.005,
      "step": 2592440
    },
    {
      "epoch": 4.24261764956174,
      "grad_norm": 0.10020004212856293,
      "learning_rate": 1.5247458207863578e-06,
      "loss": 0.0129,
      "step": 2592460
    },
    {
      "epoch": 4.242650380000393,
      "grad_norm": 0.3475329875946045,
      "learning_rate": 1.5246799285728406e-06,
      "loss": 0.0081,
      "step": 2592480
    },
    {
      "epoch": 4.242683110439046,
      "grad_norm": 0.10450766980648041,
      "learning_rate": 1.5246140363593235e-06,
      "loss": 0.0073,
      "step": 2592500
    },
    {
      "epoch": 4.2427158408776995,
      "grad_norm": 0.31011447310447693,
      "learning_rate": 1.5245481441458065e-06,
      "loss": 0.0148,
      "step": 2592520
    },
    {
      "epoch": 4.242748571316353,
      "grad_norm": 0.601746141910553,
      "learning_rate": 1.5244822519322894e-06,
      "loss": 0.0112,
      "step": 2592540
    },
    {
      "epoch": 4.242781301755006,
      "grad_norm": 0.797684371471405,
      "learning_rate": 1.5244163597187722e-06,
      "loss": 0.0091,
      "step": 2592560
    },
    {
      "epoch": 4.242814032193659,
      "grad_norm": 0.3305127024650574,
      "learning_rate": 1.5243504675052551e-06,
      "loss": 0.0136,
      "step": 2592580
    },
    {
      "epoch": 4.242846762632313,
      "grad_norm": 0.1240328848361969,
      "learning_rate": 1.5242845752917379e-06,
      "loss": 0.006,
      "step": 2592600
    },
    {
      "epoch": 4.242879493070966,
      "grad_norm": 0.16878201067447662,
      "learning_rate": 1.5242186830782208e-06,
      "loss": 0.0109,
      "step": 2592620
    },
    {
      "epoch": 4.242912223509619,
      "grad_norm": 0.1998298466205597,
      "learning_rate": 1.5241527908647035e-06,
      "loss": 0.0084,
      "step": 2592640
    },
    {
      "epoch": 4.242944953948273,
      "grad_norm": 0.32063889503479004,
      "learning_rate": 1.5240868986511865e-06,
      "loss": 0.0093,
      "step": 2592660
    },
    {
      "epoch": 4.2429776843869265,
      "grad_norm": 0.3206738531589508,
      "learning_rate": 1.5240210064376692e-06,
      "loss": 0.0122,
      "step": 2592680
    },
    {
      "epoch": 4.243010414825579,
      "grad_norm": 0.16779278218746185,
      "learning_rate": 1.5239551142241524e-06,
      "loss": 0.0109,
      "step": 2592700
    },
    {
      "epoch": 4.243043145264233,
      "grad_norm": 0.20667538046836853,
      "learning_rate": 1.5238892220106351e-06,
      "loss": 0.006,
      "step": 2592720
    },
    {
      "epoch": 4.243075875702886,
      "grad_norm": 0.23273450136184692,
      "learning_rate": 1.523823329797118e-06,
      "loss": 0.0077,
      "step": 2592740
    },
    {
      "epoch": 4.24310860614154,
      "grad_norm": 0.3554569184780121,
      "learning_rate": 1.5237574375836008e-06,
      "loss": 0.0078,
      "step": 2592760
    },
    {
      "epoch": 4.243141336580193,
      "grad_norm": 0.01646708883345127,
      "learning_rate": 1.5236915453700838e-06,
      "loss": 0.0074,
      "step": 2592780
    },
    {
      "epoch": 4.243174067018846,
      "grad_norm": 0.0989440456032753,
      "learning_rate": 1.5236256531565665e-06,
      "loss": 0.0101,
      "step": 2592800
    },
    {
      "epoch": 4.2432067974575,
      "grad_norm": 0.4123024046421051,
      "learning_rate": 1.5235597609430495e-06,
      "loss": 0.0089,
      "step": 2592820
    },
    {
      "epoch": 4.2432395278961526,
      "grad_norm": 0.0701294019818306,
      "learning_rate": 1.5234938687295322e-06,
      "loss": 0.0094,
      "step": 2592840
    },
    {
      "epoch": 4.243272258334806,
      "grad_norm": 0.39750590920448303,
      "learning_rate": 1.5234279765160152e-06,
      "loss": 0.0122,
      "step": 2592860
    },
    {
      "epoch": 4.24330498877346,
      "grad_norm": 0.2373049259185791,
      "learning_rate": 1.5233620843024981e-06,
      "loss": 0.0084,
      "step": 2592880
    },
    {
      "epoch": 4.2433377192121124,
      "grad_norm": 0.24366609752178192,
      "learning_rate": 1.523296192088981e-06,
      "loss": 0.0096,
      "step": 2592900
    },
    {
      "epoch": 4.243370449650766,
      "grad_norm": 0.21220223605632782,
      "learning_rate": 1.5232302998754638e-06,
      "loss": 0.0063,
      "step": 2592920
    },
    {
      "epoch": 4.24340318008942,
      "grad_norm": 0.18122802674770355,
      "learning_rate": 1.5231644076619468e-06,
      "loss": 0.0098,
      "step": 2592940
    },
    {
      "epoch": 4.243435910528073,
      "grad_norm": 0.3233509957790375,
      "learning_rate": 1.5230985154484295e-06,
      "loss": 0.0087,
      "step": 2592960
    },
    {
      "epoch": 4.243468640966726,
      "grad_norm": 0.13998043537139893,
      "learning_rate": 1.5230326232349125e-06,
      "loss": 0.0058,
      "step": 2592980
    },
    {
      "epoch": 4.2435013714053795,
      "grad_norm": 0.17009343206882477,
      "learning_rate": 1.5229667310213952e-06,
      "loss": 0.0069,
      "step": 2593000
    },
    {
      "epoch": 4.243534101844033,
      "grad_norm": 0.2578273117542267,
      "learning_rate": 1.5229008388078781e-06,
      "loss": 0.0078,
      "step": 2593020
    },
    {
      "epoch": 4.243566832282686,
      "grad_norm": 0.3386576473712921,
      "learning_rate": 1.5228349465943613e-06,
      "loss": 0.01,
      "step": 2593040
    },
    {
      "epoch": 4.243599562721339,
      "grad_norm": 0.021499505266547203,
      "learning_rate": 1.522769054380844e-06,
      "loss": 0.0041,
      "step": 2593060
    },
    {
      "epoch": 4.243632293159993,
      "grad_norm": 0.5966932773590088,
      "learning_rate": 1.5227031621673268e-06,
      "loss": 0.0125,
      "step": 2593080
    },
    {
      "epoch": 4.243665023598647,
      "grad_norm": 0.12653256952762604,
      "learning_rate": 1.5226372699538097e-06,
      "loss": 0.013,
      "step": 2593100
    },
    {
      "epoch": 4.243697754037299,
      "grad_norm": 0.22611171007156372,
      "learning_rate": 1.5225713777402925e-06,
      "loss": 0.0126,
      "step": 2593120
    },
    {
      "epoch": 4.243730484475953,
      "grad_norm": 0.1263650804758072,
      "learning_rate": 1.5225054855267754e-06,
      "loss": 0.0074,
      "step": 2593140
    },
    {
      "epoch": 4.2437632149146065,
      "grad_norm": 0.23105072975158691,
      "learning_rate": 1.5224395933132582e-06,
      "loss": 0.0115,
      "step": 2593160
    },
    {
      "epoch": 4.243795945353259,
      "grad_norm": 0.1341988891363144,
      "learning_rate": 1.5223737010997411e-06,
      "loss": 0.0067,
      "step": 2593180
    },
    {
      "epoch": 4.243828675791913,
      "grad_norm": 0.40277859568595886,
      "learning_rate": 1.5223078088862239e-06,
      "loss": 0.0072,
      "step": 2593200
    },
    {
      "epoch": 4.243861406230566,
      "grad_norm": 0.0936964601278305,
      "learning_rate": 1.522241916672707e-06,
      "loss": 0.0075,
      "step": 2593220
    },
    {
      "epoch": 4.24389413666922,
      "grad_norm": 0.09408608824014664,
      "learning_rate": 1.52217602445919e-06,
      "loss": 0.014,
      "step": 2593240
    },
    {
      "epoch": 4.243926867107873,
      "grad_norm": 0.31096646189689636,
      "learning_rate": 1.5221101322456727e-06,
      "loss": 0.0096,
      "step": 2593260
    },
    {
      "epoch": 4.243959597546526,
      "grad_norm": 0.23301613330841064,
      "learning_rate": 1.5220442400321557e-06,
      "loss": 0.011,
      "step": 2593280
    },
    {
      "epoch": 4.24399232798518,
      "grad_norm": 0.1593572199344635,
      "learning_rate": 1.5219783478186384e-06,
      "loss": 0.0084,
      "step": 2593300
    },
    {
      "epoch": 4.244025058423833,
      "grad_norm": 0.3533666133880615,
      "learning_rate": 1.5219124556051211e-06,
      "loss": 0.011,
      "step": 2593320
    },
    {
      "epoch": 4.244057788862486,
      "grad_norm": 0.2528364956378937,
      "learning_rate": 1.521846563391604e-06,
      "loss": 0.0171,
      "step": 2593340
    },
    {
      "epoch": 4.24409051930114,
      "grad_norm": 0.05576116219162941,
      "learning_rate": 1.5217806711780868e-06,
      "loss": 0.0097,
      "step": 2593360
    },
    {
      "epoch": 4.244123249739793,
      "grad_norm": 0.15042580664157867,
      "learning_rate": 1.5217147789645698e-06,
      "loss": 0.0119,
      "step": 2593380
    },
    {
      "epoch": 4.244155980178446,
      "grad_norm": 0.4319334924221039,
      "learning_rate": 1.521648886751053e-06,
      "loss": 0.0091,
      "step": 2593400
    },
    {
      "epoch": 4.2441887106171,
      "grad_norm": 0.21076373755931854,
      "learning_rate": 1.5215829945375357e-06,
      "loss": 0.0118,
      "step": 2593420
    },
    {
      "epoch": 4.244221441055753,
      "grad_norm": 1.6337244510650635,
      "learning_rate": 1.5215171023240186e-06,
      "loss": 0.0092,
      "step": 2593440
    },
    {
      "epoch": 4.244254171494406,
      "grad_norm": 0.10150425136089325,
      "learning_rate": 1.5214512101105014e-06,
      "loss": 0.0117,
      "step": 2593460
    },
    {
      "epoch": 4.24428690193306,
      "grad_norm": 0.15647345781326294,
      "learning_rate": 1.5213853178969843e-06,
      "loss": 0.0095,
      "step": 2593480
    },
    {
      "epoch": 4.244319632371713,
      "grad_norm": 0.12686002254486084,
      "learning_rate": 1.521319425683467e-06,
      "loss": 0.0073,
      "step": 2593500
    },
    {
      "epoch": 4.244352362810367,
      "grad_norm": 0.1890726089477539,
      "learning_rate": 1.5212535334699498e-06,
      "loss": 0.0057,
      "step": 2593520
    },
    {
      "epoch": 4.2443850932490195,
      "grad_norm": 0.2865816652774811,
      "learning_rate": 1.5211876412564328e-06,
      "loss": 0.0076,
      "step": 2593540
    },
    {
      "epoch": 4.244417823687673,
      "grad_norm": 0.19180582463741302,
      "learning_rate": 1.5211217490429155e-06,
      "loss": 0.0164,
      "step": 2593560
    },
    {
      "epoch": 4.244450554126327,
      "grad_norm": 0.24000367522239685,
      "learning_rate": 1.5210558568293987e-06,
      "loss": 0.007,
      "step": 2593580
    },
    {
      "epoch": 4.244483284564979,
      "grad_norm": 0.7400784492492676,
      "learning_rate": 1.5209899646158816e-06,
      "loss": 0.0126,
      "step": 2593600
    },
    {
      "epoch": 4.244516015003633,
      "grad_norm": 0.27888360619544983,
      "learning_rate": 1.5209240724023644e-06,
      "loss": 0.0083,
      "step": 2593620
    },
    {
      "epoch": 4.2445487454422866,
      "grad_norm": 0.27984505891799927,
      "learning_rate": 1.5208581801888473e-06,
      "loss": 0.0092,
      "step": 2593640
    },
    {
      "epoch": 4.24458147588094,
      "grad_norm": 0.0716734379529953,
      "learning_rate": 1.52079228797533e-06,
      "loss": 0.011,
      "step": 2593660
    },
    {
      "epoch": 4.244614206319593,
      "grad_norm": 0.232204407453537,
      "learning_rate": 1.520726395761813e-06,
      "loss": 0.007,
      "step": 2593680
    },
    {
      "epoch": 4.2446469367582464,
      "grad_norm": 0.12874838709831238,
      "learning_rate": 1.5206605035482957e-06,
      "loss": 0.014,
      "step": 2593700
    },
    {
      "epoch": 4.2446796671969,
      "grad_norm": 0.13666807115077972,
      "learning_rate": 1.5205946113347787e-06,
      "loss": 0.0077,
      "step": 2593720
    },
    {
      "epoch": 4.244712397635553,
      "grad_norm": 0.16900552809238434,
      "learning_rate": 1.5205287191212614e-06,
      "loss": 0.0117,
      "step": 2593740
    },
    {
      "epoch": 4.244745128074206,
      "grad_norm": 0.19864705204963684,
      "learning_rate": 1.5204628269077446e-06,
      "loss": 0.0119,
      "step": 2593760
    },
    {
      "epoch": 4.24477785851286,
      "grad_norm": 0.05541883036494255,
      "learning_rate": 1.5203969346942273e-06,
      "loss": 0.0099,
      "step": 2593780
    },
    {
      "epoch": 4.2448105889515135,
      "grad_norm": 0.16544196009635925,
      "learning_rate": 1.5203310424807103e-06,
      "loss": 0.0107,
      "step": 2593800
    },
    {
      "epoch": 4.244843319390166,
      "grad_norm": 0.1630023866891861,
      "learning_rate": 1.520265150267193e-06,
      "loss": 0.0124,
      "step": 2593820
    },
    {
      "epoch": 4.24487604982882,
      "grad_norm": 0.12928830087184906,
      "learning_rate": 1.520199258053676e-06,
      "loss": 0.0085,
      "step": 2593840
    },
    {
      "epoch": 4.244908780267473,
      "grad_norm": 0.1965702325105667,
      "learning_rate": 1.5201333658401587e-06,
      "loss": 0.0129,
      "step": 2593860
    },
    {
      "epoch": 4.244941510706126,
      "grad_norm": 0.15278363227844238,
      "learning_rate": 1.5200674736266417e-06,
      "loss": 0.0122,
      "step": 2593880
    },
    {
      "epoch": 4.24497424114478,
      "grad_norm": 0.22420310974121094,
      "learning_rate": 1.5200015814131244e-06,
      "loss": 0.0075,
      "step": 2593900
    },
    {
      "epoch": 4.245006971583433,
      "grad_norm": 0.24577511847019196,
      "learning_rate": 1.5199356891996076e-06,
      "loss": 0.0085,
      "step": 2593920
    },
    {
      "epoch": 4.245039702022087,
      "grad_norm": 0.12947537004947662,
      "learning_rate": 1.5198697969860903e-06,
      "loss": 0.0053,
      "step": 2593940
    },
    {
      "epoch": 4.24507243246074,
      "grad_norm": 0.20873968303203583,
      "learning_rate": 1.5198039047725733e-06,
      "loss": 0.0092,
      "step": 2593960
    },
    {
      "epoch": 4.245105162899393,
      "grad_norm": 0.19217252731323242,
      "learning_rate": 1.519738012559056e-06,
      "loss": 0.0096,
      "step": 2593980
    },
    {
      "epoch": 4.245137893338047,
      "grad_norm": 0.3258827328681946,
      "learning_rate": 1.519672120345539e-06,
      "loss": 0.0083,
      "step": 2594000
    },
    {
      "epoch": 4.2451706237766995,
      "grad_norm": 0.1320674568414688,
      "learning_rate": 1.5196062281320217e-06,
      "loss": 0.0076,
      "step": 2594020
    },
    {
      "epoch": 4.245203354215353,
      "grad_norm": 0.5802741646766663,
      "learning_rate": 1.5195403359185046e-06,
      "loss": 0.0161,
      "step": 2594040
    },
    {
      "epoch": 4.245236084654007,
      "grad_norm": 0.20422108471393585,
      "learning_rate": 1.5194744437049874e-06,
      "loss": 0.0066,
      "step": 2594060
    },
    {
      "epoch": 4.24526881509266,
      "grad_norm": 1.04729425907135,
      "learning_rate": 1.5194085514914703e-06,
      "loss": 0.0089,
      "step": 2594080
    },
    {
      "epoch": 4.245301545531313,
      "grad_norm": 0.38029348850250244,
      "learning_rate": 1.5193426592779533e-06,
      "loss": 0.0074,
      "step": 2594100
    },
    {
      "epoch": 4.245334275969967,
      "grad_norm": 0.3182590901851654,
      "learning_rate": 1.5192767670644362e-06,
      "loss": 0.0095,
      "step": 2594120
    },
    {
      "epoch": 4.24536700640862,
      "grad_norm": 0.305448442697525,
      "learning_rate": 1.519210874850919e-06,
      "loss": 0.013,
      "step": 2594140
    },
    {
      "epoch": 4.245399736847273,
      "grad_norm": 0.29024985432624817,
      "learning_rate": 1.519144982637402e-06,
      "loss": 0.0117,
      "step": 2594160
    },
    {
      "epoch": 4.2454324672859265,
      "grad_norm": 0.07698836922645569,
      "learning_rate": 1.5190790904238847e-06,
      "loss": 0.0106,
      "step": 2594180
    },
    {
      "epoch": 4.24546519772458,
      "grad_norm": 0.20053397119045258,
      "learning_rate": 1.5190131982103676e-06,
      "loss": 0.0114,
      "step": 2594200
    },
    {
      "epoch": 4.245497928163234,
      "grad_norm": 0.0954848974943161,
      "learning_rate": 1.5189473059968504e-06,
      "loss": 0.0106,
      "step": 2594220
    },
    {
      "epoch": 4.245530658601886,
      "grad_norm": 0.19849109649658203,
      "learning_rate": 1.5188814137833333e-06,
      "loss": 0.0129,
      "step": 2594240
    },
    {
      "epoch": 4.24556338904054,
      "grad_norm": 0.2892729341983795,
      "learning_rate": 1.518815521569816e-06,
      "loss": 0.0067,
      "step": 2594260
    },
    {
      "epoch": 4.245596119479194,
      "grad_norm": 0.21069678664207458,
      "learning_rate": 1.5187496293562992e-06,
      "loss": 0.0116,
      "step": 2594280
    },
    {
      "epoch": 4.245628849917846,
      "grad_norm": 0.20894907414913177,
      "learning_rate": 1.518683737142782e-06,
      "loss": 0.0098,
      "step": 2594300
    },
    {
      "epoch": 4.2456615803565,
      "grad_norm": 0.13324108719825745,
      "learning_rate": 1.518617844929265e-06,
      "loss": 0.0074,
      "step": 2594320
    },
    {
      "epoch": 4.2456943107951535,
      "grad_norm": 0.27627235651016235,
      "learning_rate": 1.5185519527157476e-06,
      "loss": 0.0115,
      "step": 2594340
    },
    {
      "epoch": 4.245727041233806,
      "grad_norm": 0.08113621920347214,
      "learning_rate": 1.5184860605022306e-06,
      "loss": 0.0071,
      "step": 2594360
    },
    {
      "epoch": 4.24575977167246,
      "grad_norm": 0.2569465637207031,
      "learning_rate": 1.5184201682887133e-06,
      "loss": 0.0098,
      "step": 2594380
    },
    {
      "epoch": 4.245792502111113,
      "grad_norm": 0.12526185810565948,
      "learning_rate": 1.5183542760751963e-06,
      "loss": 0.0083,
      "step": 2594400
    },
    {
      "epoch": 4.245825232549767,
      "grad_norm": 0.15243959426879883,
      "learning_rate": 1.518288383861679e-06,
      "loss": 0.0097,
      "step": 2594420
    },
    {
      "epoch": 4.24585796298842,
      "grad_norm": 0.11777444183826447,
      "learning_rate": 1.518222491648162e-06,
      "loss": 0.0132,
      "step": 2594440
    },
    {
      "epoch": 4.245890693427073,
      "grad_norm": 0.2434346079826355,
      "learning_rate": 1.5181565994346451e-06,
      "loss": 0.0112,
      "step": 2594460
    },
    {
      "epoch": 4.245923423865727,
      "grad_norm": 0.1452742964029312,
      "learning_rate": 1.5180907072211279e-06,
      "loss": 0.0088,
      "step": 2594480
    },
    {
      "epoch": 4.24595615430438,
      "grad_norm": 0.2772679328918457,
      "learning_rate": 1.5180248150076108e-06,
      "loss": 0.0142,
      "step": 2594500
    },
    {
      "epoch": 4.245988884743033,
      "grad_norm": 1.1632952690124512,
      "learning_rate": 1.5179589227940936e-06,
      "loss": 0.0092,
      "step": 2594520
    },
    {
      "epoch": 4.246021615181687,
      "grad_norm": 0.09039673209190369,
      "learning_rate": 1.5178930305805763e-06,
      "loss": 0.0071,
      "step": 2594540
    },
    {
      "epoch": 4.24605434562034,
      "grad_norm": 0.30089643597602844,
      "learning_rate": 1.5178271383670593e-06,
      "loss": 0.0092,
      "step": 2594560
    },
    {
      "epoch": 4.246087076058993,
      "grad_norm": 0.10332779586315155,
      "learning_rate": 1.517761246153542e-06,
      "loss": 0.0066,
      "step": 2594580
    },
    {
      "epoch": 4.246119806497647,
      "grad_norm": 0.2754308879375458,
      "learning_rate": 1.517695353940025e-06,
      "loss": 0.0137,
      "step": 2594600
    },
    {
      "epoch": 4.2461525369363,
      "grad_norm": 0.14142869412899017,
      "learning_rate": 1.5176294617265077e-06,
      "loss": 0.0117,
      "step": 2594620
    },
    {
      "epoch": 4.246185267374953,
      "grad_norm": 0.5741734504699707,
      "learning_rate": 1.5175635695129909e-06,
      "loss": 0.0093,
      "step": 2594640
    },
    {
      "epoch": 4.2462179978136065,
      "grad_norm": 0.10931207984685898,
      "learning_rate": 1.5174976772994738e-06,
      "loss": 0.0089,
      "step": 2594660
    },
    {
      "epoch": 4.24625072825226,
      "grad_norm": 0.1805538833141327,
      "learning_rate": 1.5174317850859566e-06,
      "loss": 0.0083,
      "step": 2594680
    },
    {
      "epoch": 4.246283458690914,
      "grad_norm": 0.0745219737291336,
      "learning_rate": 1.5173658928724395e-06,
      "loss": 0.0092,
      "step": 2594700
    },
    {
      "epoch": 4.246316189129566,
      "grad_norm": 0.15528114140033722,
      "learning_rate": 1.5173000006589222e-06,
      "loss": 0.0066,
      "step": 2594720
    },
    {
      "epoch": 4.24634891956822,
      "grad_norm": 0.3189740777015686,
      "learning_rate": 1.517234108445405e-06,
      "loss": 0.0134,
      "step": 2594740
    },
    {
      "epoch": 4.246381650006874,
      "grad_norm": 0.22676461935043335,
      "learning_rate": 1.517168216231888e-06,
      "loss": 0.0108,
      "step": 2594760
    },
    {
      "epoch": 4.246414380445526,
      "grad_norm": 0.29953375458717346,
      "learning_rate": 1.5171023240183707e-06,
      "loss": 0.0085,
      "step": 2594780
    },
    {
      "epoch": 4.24644711088418,
      "grad_norm": 0.08185907453298569,
      "learning_rate": 1.5170364318048538e-06,
      "loss": 0.0097,
      "step": 2594800
    },
    {
      "epoch": 4.2464798413228335,
      "grad_norm": 0.1694325953722,
      "learning_rate": 1.5169705395913368e-06,
      "loss": 0.0101,
      "step": 2594820
    },
    {
      "epoch": 4.246512571761487,
      "grad_norm": 0.12930099666118622,
      "learning_rate": 1.5169046473778195e-06,
      "loss": 0.0082,
      "step": 2594840
    },
    {
      "epoch": 4.24654530220014,
      "grad_norm": 0.306607723236084,
      "learning_rate": 1.5168387551643025e-06,
      "loss": 0.0148,
      "step": 2594860
    },
    {
      "epoch": 4.246578032638793,
      "grad_norm": 0.212474063038826,
      "learning_rate": 1.5167728629507852e-06,
      "loss": 0.0139,
      "step": 2594880
    },
    {
      "epoch": 4.246610763077447,
      "grad_norm": 0.20093035697937012,
      "learning_rate": 1.5167069707372682e-06,
      "loss": 0.0082,
      "step": 2594900
    },
    {
      "epoch": 4.2466434935161,
      "grad_norm": 0.7847955822944641,
      "learning_rate": 1.516641078523751e-06,
      "loss": 0.0084,
      "step": 2594920
    },
    {
      "epoch": 4.246676223954753,
      "grad_norm": 0.10879378020763397,
      "learning_rate": 1.5165751863102339e-06,
      "loss": 0.0103,
      "step": 2594940
    },
    {
      "epoch": 4.246708954393407,
      "grad_norm": 0.41737523674964905,
      "learning_rate": 1.5165092940967166e-06,
      "loss": 0.0075,
      "step": 2594960
    },
    {
      "epoch": 4.2467416848320605,
      "grad_norm": 0.29015660285949707,
      "learning_rate": 1.5164434018831998e-06,
      "loss": 0.0069,
      "step": 2594980
    },
    {
      "epoch": 4.246774415270713,
      "grad_norm": 0.2574407160282135,
      "learning_rate": 1.5163775096696825e-06,
      "loss": 0.0096,
      "step": 2595000
    },
    {
      "epoch": 4.246807145709367,
      "grad_norm": 0.6343698501586914,
      "learning_rate": 1.5163116174561655e-06,
      "loss": 0.0151,
      "step": 2595020
    },
    {
      "epoch": 4.24683987614802,
      "grad_norm": 0.08620613813400269,
      "learning_rate": 1.5162457252426482e-06,
      "loss": 0.0083,
      "step": 2595040
    },
    {
      "epoch": 4.246872606586673,
      "grad_norm": 0.19314436614513397,
      "learning_rate": 1.5161798330291311e-06,
      "loss": 0.016,
      "step": 2595060
    },
    {
      "epoch": 4.246905337025327,
      "grad_norm": 0.571587860584259,
      "learning_rate": 1.5161139408156139e-06,
      "loss": 0.0083,
      "step": 2595080
    },
    {
      "epoch": 4.24693806746398,
      "grad_norm": 0.0773412436246872,
      "learning_rate": 1.5160480486020968e-06,
      "loss": 0.0106,
      "step": 2595100
    },
    {
      "epoch": 4.246970797902634,
      "grad_norm": 0.2318008691072464,
      "learning_rate": 1.5159821563885796e-06,
      "loss": 0.0107,
      "step": 2595120
    },
    {
      "epoch": 4.247003528341287,
      "grad_norm": 0.09720000624656677,
      "learning_rate": 1.5159162641750625e-06,
      "loss": 0.0099,
      "step": 2595140
    },
    {
      "epoch": 4.24703625877994,
      "grad_norm": 4.224024295806885,
      "learning_rate": 1.5158503719615455e-06,
      "loss": 0.0138,
      "step": 2595160
    },
    {
      "epoch": 4.247068989218594,
      "grad_norm": 0.3303861618041992,
      "learning_rate": 1.5157844797480284e-06,
      "loss": 0.0076,
      "step": 2595180
    },
    {
      "epoch": 4.2471017196572465,
      "grad_norm": 0.46811625361442566,
      "learning_rate": 1.5157185875345112e-06,
      "loss": 0.0082,
      "step": 2595200
    },
    {
      "epoch": 4.2471344500959,
      "grad_norm": 0.09474781900644302,
      "learning_rate": 1.5156526953209941e-06,
      "loss": 0.0155,
      "step": 2595220
    },
    {
      "epoch": 4.247167180534554,
      "grad_norm": 0.17403830587863922,
      "learning_rate": 1.5155868031074769e-06,
      "loss": 0.0074,
      "step": 2595240
    },
    {
      "epoch": 4.247199910973207,
      "grad_norm": 0.44892698526382446,
      "learning_rate": 1.5155209108939598e-06,
      "loss": 0.0075,
      "step": 2595260
    },
    {
      "epoch": 4.24723264141186,
      "grad_norm": 0.03639284148812294,
      "learning_rate": 1.5154550186804426e-06,
      "loss": 0.0071,
      "step": 2595280
    },
    {
      "epoch": 4.247265371850514,
      "grad_norm": 0.19340534508228302,
      "learning_rate": 1.5153891264669255e-06,
      "loss": 0.0096,
      "step": 2595300
    },
    {
      "epoch": 4.247298102289167,
      "grad_norm": 0.05806939676403999,
      "learning_rate": 1.5153232342534082e-06,
      "loss": 0.0074,
      "step": 2595320
    },
    {
      "epoch": 4.24733083272782,
      "grad_norm": 0.20196962356567383,
      "learning_rate": 1.5152573420398914e-06,
      "loss": 0.0108,
      "step": 2595340
    },
    {
      "epoch": 4.2473635631664735,
      "grad_norm": 0.21215787529945374,
      "learning_rate": 1.5151914498263742e-06,
      "loss": 0.0102,
      "step": 2595360
    },
    {
      "epoch": 4.247396293605127,
      "grad_norm": 0.15843281149864197,
      "learning_rate": 1.515125557612857e-06,
      "loss": 0.0096,
      "step": 2595380
    },
    {
      "epoch": 4.247429024043781,
      "grad_norm": 0.2565014660358429,
      "learning_rate": 1.5150596653993398e-06,
      "loss": 0.0173,
      "step": 2595400
    },
    {
      "epoch": 4.247461754482433,
      "grad_norm": 0.18557587265968323,
      "learning_rate": 1.5149937731858228e-06,
      "loss": 0.0082,
      "step": 2595420
    },
    {
      "epoch": 4.247494484921087,
      "grad_norm": 0.19294686615467072,
      "learning_rate": 1.5149278809723055e-06,
      "loss": 0.0063,
      "step": 2595440
    },
    {
      "epoch": 4.2475272153597405,
      "grad_norm": 0.17505675554275513,
      "learning_rate": 1.5148619887587885e-06,
      "loss": 0.0108,
      "step": 2595460
    },
    {
      "epoch": 4.247559945798393,
      "grad_norm": 0.11758922040462494,
      "learning_rate": 1.5147960965452712e-06,
      "loss": 0.0078,
      "step": 2595480
    },
    {
      "epoch": 4.247592676237047,
      "grad_norm": 0.12714242935180664,
      "learning_rate": 1.5147302043317542e-06,
      "loss": 0.0101,
      "step": 2595500
    },
    {
      "epoch": 4.2476254066757,
      "grad_norm": 0.2964421808719635,
      "learning_rate": 1.5146643121182371e-06,
      "loss": 0.0092,
      "step": 2595520
    },
    {
      "epoch": 4.247658137114354,
      "grad_norm": 0.29420745372772217,
      "learning_rate": 1.51459841990472e-06,
      "loss": 0.0074,
      "step": 2595540
    },
    {
      "epoch": 4.247690867553007,
      "grad_norm": 0.08111481368541718,
      "learning_rate": 1.5145325276912028e-06,
      "loss": 0.0127,
      "step": 2595560
    },
    {
      "epoch": 4.24772359799166,
      "grad_norm": 0.2164716273546219,
      "learning_rate": 1.5144666354776858e-06,
      "loss": 0.0085,
      "step": 2595580
    },
    {
      "epoch": 4.247756328430314,
      "grad_norm": 0.30980822443962097,
      "learning_rate": 1.5144007432641685e-06,
      "loss": 0.0081,
      "step": 2595600
    },
    {
      "epoch": 4.247789058868967,
      "grad_norm": 0.4441327452659607,
      "learning_rate": 1.5143348510506515e-06,
      "loss": 0.0104,
      "step": 2595620
    },
    {
      "epoch": 4.24782178930762,
      "grad_norm": 0.24436329305171967,
      "learning_rate": 1.5142689588371342e-06,
      "loss": 0.014,
      "step": 2595640
    },
    {
      "epoch": 4.247854519746274,
      "grad_norm": 0.08809265494346619,
      "learning_rate": 1.5142030666236172e-06,
      "loss": 0.0045,
      "step": 2595660
    },
    {
      "epoch": 4.247887250184927,
      "grad_norm": 0.4718145728111267,
      "learning_rate": 1.5141371744101003e-06,
      "loss": 0.0102,
      "step": 2595680
    },
    {
      "epoch": 4.24791998062358,
      "grad_norm": 0.7077084183692932,
      "learning_rate": 1.514071282196583e-06,
      "loss": 0.0089,
      "step": 2595700
    },
    {
      "epoch": 4.247952711062234,
      "grad_norm": 0.14012335240840912,
      "learning_rate": 1.5140053899830658e-06,
      "loss": 0.0096,
      "step": 2595720
    },
    {
      "epoch": 4.247985441500887,
      "grad_norm": 0.3444232642650604,
      "learning_rate": 1.5139394977695487e-06,
      "loss": 0.006,
      "step": 2595740
    },
    {
      "epoch": 4.24801817193954,
      "grad_norm": 0.3937820494174957,
      "learning_rate": 1.5138736055560315e-06,
      "loss": 0.0102,
      "step": 2595760
    },
    {
      "epoch": 4.248050902378194,
      "grad_norm": 0.22517919540405273,
      "learning_rate": 1.5138077133425144e-06,
      "loss": 0.0098,
      "step": 2595780
    },
    {
      "epoch": 4.248083632816847,
      "grad_norm": 0.1386110931634903,
      "learning_rate": 1.5137418211289972e-06,
      "loss": 0.0063,
      "step": 2595800
    },
    {
      "epoch": 4.2481163632555,
      "grad_norm": 0.1520204395055771,
      "learning_rate": 1.5136759289154801e-06,
      "loss": 0.0085,
      "step": 2595820
    },
    {
      "epoch": 4.2481490936941535,
      "grad_norm": 0.2506507933139801,
      "learning_rate": 1.5136100367019629e-06,
      "loss": 0.0099,
      "step": 2595840
    },
    {
      "epoch": 4.248181824132807,
      "grad_norm": 0.13741986453533173,
      "learning_rate": 1.513544144488446e-06,
      "loss": 0.0088,
      "step": 2595860
    },
    {
      "epoch": 4.248214554571461,
      "grad_norm": 0.27639055252075195,
      "learning_rate": 1.513478252274929e-06,
      "loss": 0.0184,
      "step": 2595880
    },
    {
      "epoch": 4.248247285010113,
      "grad_norm": 0.371368408203125,
      "learning_rate": 1.5134123600614117e-06,
      "loss": 0.0104,
      "step": 2595900
    },
    {
      "epoch": 4.248280015448767,
      "grad_norm": 0.081553615629673,
      "learning_rate": 1.5133464678478947e-06,
      "loss": 0.01,
      "step": 2595920
    },
    {
      "epoch": 4.248312745887421,
      "grad_norm": 0.13145136833190918,
      "learning_rate": 1.5132805756343774e-06,
      "loss": 0.0074,
      "step": 2595940
    },
    {
      "epoch": 4.248345476326074,
      "grad_norm": 0.14013038575649261,
      "learning_rate": 1.5132146834208602e-06,
      "loss": 0.0114,
      "step": 2595960
    },
    {
      "epoch": 4.248378206764727,
      "grad_norm": 0.19131676852703094,
      "learning_rate": 1.5131487912073431e-06,
      "loss": 0.0089,
      "step": 2595980
    },
    {
      "epoch": 4.2484109372033805,
      "grad_norm": 0.08330237865447998,
      "learning_rate": 1.5130828989938258e-06,
      "loss": 0.0115,
      "step": 2596000
    },
    {
      "epoch": 4.248443667642034,
      "grad_norm": 0.15448175370693207,
      "learning_rate": 1.5130170067803088e-06,
      "loss": 0.0082,
      "step": 2596020
    },
    {
      "epoch": 4.248476398080687,
      "grad_norm": 0.27312350273132324,
      "learning_rate": 1.512951114566792e-06,
      "loss": 0.0104,
      "step": 2596040
    },
    {
      "epoch": 4.24850912851934,
      "grad_norm": 0.07352757453918457,
      "learning_rate": 1.5128852223532747e-06,
      "loss": 0.0067,
      "step": 2596060
    },
    {
      "epoch": 4.248541858957994,
      "grad_norm": 0.1684902310371399,
      "learning_rate": 1.5128193301397577e-06,
      "loss": 0.0091,
      "step": 2596080
    },
    {
      "epoch": 4.248574589396647,
      "grad_norm": 0.1105450764298439,
      "learning_rate": 1.5127534379262404e-06,
      "loss": 0.0117,
      "step": 2596100
    },
    {
      "epoch": 4.2486073198353,
      "grad_norm": 0.3403169512748718,
      "learning_rate": 1.5126875457127233e-06,
      "loss": 0.0091,
      "step": 2596120
    },
    {
      "epoch": 4.248640050273954,
      "grad_norm": 0.0813777819275856,
      "learning_rate": 1.512621653499206e-06,
      "loss": 0.0102,
      "step": 2596140
    },
    {
      "epoch": 4.2486727807126075,
      "grad_norm": 0.26277875900268555,
      "learning_rate": 1.5125557612856888e-06,
      "loss": 0.0129,
      "step": 2596160
    },
    {
      "epoch": 4.24870551115126,
      "grad_norm": 0.14057676494121552,
      "learning_rate": 1.5124898690721718e-06,
      "loss": 0.0085,
      "step": 2596180
    },
    {
      "epoch": 4.248738241589914,
      "grad_norm": 0.39034196734428406,
      "learning_rate": 1.5124239768586545e-06,
      "loss": 0.0098,
      "step": 2596200
    },
    {
      "epoch": 4.248770972028567,
      "grad_norm": 0.2402833104133606,
      "learning_rate": 1.5123580846451377e-06,
      "loss": 0.0123,
      "step": 2596220
    },
    {
      "epoch": 4.24880370246722,
      "grad_norm": 0.16968899965286255,
      "learning_rate": 1.5122921924316206e-06,
      "loss": 0.0055,
      "step": 2596240
    },
    {
      "epoch": 4.248836432905874,
      "grad_norm": 0.3027327060699463,
      "learning_rate": 1.5122263002181034e-06,
      "loss": 0.0091,
      "step": 2596260
    },
    {
      "epoch": 4.248869163344527,
      "grad_norm": 0.3684659004211426,
      "learning_rate": 1.5121604080045863e-06,
      "loss": 0.009,
      "step": 2596280
    },
    {
      "epoch": 4.248901893783181,
      "grad_norm": 0.5298458337783813,
      "learning_rate": 1.512094515791069e-06,
      "loss": 0.0106,
      "step": 2596300
    },
    {
      "epoch": 4.2489346242218335,
      "grad_norm": 0.2487122267484665,
      "learning_rate": 1.512028623577552e-06,
      "loss": 0.0083,
      "step": 2596320
    },
    {
      "epoch": 4.248967354660487,
      "grad_norm": 0.14018020033836365,
      "learning_rate": 1.5119627313640348e-06,
      "loss": 0.0123,
      "step": 2596340
    },
    {
      "epoch": 4.249000085099141,
      "grad_norm": 0.26403293013572693,
      "learning_rate": 1.5118968391505177e-06,
      "loss": 0.0109,
      "step": 2596360
    },
    {
      "epoch": 4.249032815537793,
      "grad_norm": 0.2174237221479416,
      "learning_rate": 1.5118309469370004e-06,
      "loss": 0.0082,
      "step": 2596380
    },
    {
      "epoch": 4.249065545976447,
      "grad_norm": 0.4681810736656189,
      "learning_rate": 1.5117650547234836e-06,
      "loss": 0.0085,
      "step": 2596400
    },
    {
      "epoch": 4.249098276415101,
      "grad_norm": 0.19576947391033173,
      "learning_rate": 1.5116991625099663e-06,
      "loss": 0.0142,
      "step": 2596420
    },
    {
      "epoch": 4.249131006853754,
      "grad_norm": 0.13328930735588074,
      "learning_rate": 1.5116332702964493e-06,
      "loss": 0.0074,
      "step": 2596440
    },
    {
      "epoch": 4.249163737292407,
      "grad_norm": 0.07652805745601654,
      "learning_rate": 1.511567378082932e-06,
      "loss": 0.0109,
      "step": 2596460
    },
    {
      "epoch": 4.2491964677310605,
      "grad_norm": 0.38948529958724976,
      "learning_rate": 1.511501485869415e-06,
      "loss": 0.0061,
      "step": 2596480
    },
    {
      "epoch": 4.249229198169714,
      "grad_norm": 0.09971777349710464,
      "learning_rate": 1.5114355936558977e-06,
      "loss": 0.0082,
      "step": 2596500
    },
    {
      "epoch": 4.249261928608367,
      "grad_norm": 0.2470836490392685,
      "learning_rate": 1.5113697014423807e-06,
      "loss": 0.0068,
      "step": 2596520
    },
    {
      "epoch": 4.24929465904702,
      "grad_norm": 0.8594002723693848,
      "learning_rate": 1.5113038092288634e-06,
      "loss": 0.0101,
      "step": 2596540
    },
    {
      "epoch": 4.249327389485674,
      "grad_norm": 0.4677552282810211,
      "learning_rate": 1.5112379170153466e-06,
      "loss": 0.0061,
      "step": 2596560
    },
    {
      "epoch": 4.249360119924328,
      "grad_norm": 0.3501158058643341,
      "learning_rate": 1.5111720248018293e-06,
      "loss": 0.0156,
      "step": 2596580
    },
    {
      "epoch": 4.24939285036298,
      "grad_norm": 0.1723618507385254,
      "learning_rate": 1.5111061325883123e-06,
      "loss": 0.0078,
      "step": 2596600
    },
    {
      "epoch": 4.249425580801634,
      "grad_norm": 0.357673317193985,
      "learning_rate": 1.511040240374795e-06,
      "loss": 0.0081,
      "step": 2596620
    },
    {
      "epoch": 4.2494583112402875,
      "grad_norm": 0.2634187340736389,
      "learning_rate": 1.510974348161278e-06,
      "loss": 0.0099,
      "step": 2596640
    },
    {
      "epoch": 4.24949104167894,
      "grad_norm": 0.39007651805877686,
      "learning_rate": 1.5109084559477607e-06,
      "loss": 0.0107,
      "step": 2596660
    },
    {
      "epoch": 4.249523772117594,
      "grad_norm": 0.8954223990440369,
      "learning_rate": 1.5108425637342437e-06,
      "loss": 0.0086,
      "step": 2596680
    },
    {
      "epoch": 4.249556502556247,
      "grad_norm": 0.2071547657251358,
      "learning_rate": 1.5107766715207264e-06,
      "loss": 0.0083,
      "step": 2596700
    },
    {
      "epoch": 4.249589232994901,
      "grad_norm": 0.2968750298023224,
      "learning_rate": 1.5107107793072093e-06,
      "loss": 0.0108,
      "step": 2596720
    },
    {
      "epoch": 4.249621963433554,
      "grad_norm": 0.12256374210119247,
      "learning_rate": 1.5106448870936923e-06,
      "loss": 0.0117,
      "step": 2596740
    },
    {
      "epoch": 4.249654693872207,
      "grad_norm": 1.788724660873413,
      "learning_rate": 1.5105789948801753e-06,
      "loss": 0.0141,
      "step": 2596760
    },
    {
      "epoch": 4.249687424310861,
      "grad_norm": 0.2234165221452713,
      "learning_rate": 1.510513102666658e-06,
      "loss": 0.0063,
      "step": 2596780
    },
    {
      "epoch": 4.249720154749514,
      "grad_norm": 0.1506877988576889,
      "learning_rate": 1.510447210453141e-06,
      "loss": 0.0075,
      "step": 2596800
    },
    {
      "epoch": 4.249752885188167,
      "grad_norm": 0.16015389561653137,
      "learning_rate": 1.5103813182396237e-06,
      "loss": 0.0119,
      "step": 2596820
    },
    {
      "epoch": 4.249785615626821,
      "grad_norm": 0.36034247279167175,
      "learning_rate": 1.5103154260261066e-06,
      "loss": 0.011,
      "step": 2596840
    },
    {
      "epoch": 4.249818346065474,
      "grad_norm": 0.12299586087465286,
      "learning_rate": 1.5102495338125894e-06,
      "loss": 0.0074,
      "step": 2596860
    },
    {
      "epoch": 4.249851076504127,
      "grad_norm": 0.4128148555755615,
      "learning_rate": 1.5101836415990723e-06,
      "loss": 0.0088,
      "step": 2596880
    },
    {
      "epoch": 4.249883806942781,
      "grad_norm": 0.17998455464839935,
      "learning_rate": 1.510117749385555e-06,
      "loss": 0.0065,
      "step": 2596900
    },
    {
      "epoch": 4.249916537381434,
      "grad_norm": 0.2365991473197937,
      "learning_rate": 1.5100518571720382e-06,
      "loss": 0.0078,
      "step": 2596920
    },
    {
      "epoch": 4.249949267820087,
      "grad_norm": 0.3771764934062958,
      "learning_rate": 1.509985964958521e-06,
      "loss": 0.0083,
      "step": 2596940
    },
    {
      "epoch": 4.249981998258741,
      "grad_norm": 0.3466845750808716,
      "learning_rate": 1.509920072745004e-06,
      "loss": 0.0095,
      "step": 2596960
    },
    {
      "epoch": 4.250014728697394,
      "grad_norm": 0.388759970664978,
      "learning_rate": 1.5098541805314867e-06,
      "loss": 0.0058,
      "step": 2596980
    },
    {
      "epoch": 4.250047459136048,
      "grad_norm": 0.10317466408014297,
      "learning_rate": 1.5097882883179696e-06,
      "loss": 0.007,
      "step": 2597000
    },
    {
      "epoch": 4.2500801895747005,
      "grad_norm": 0.7991026639938354,
      "learning_rate": 1.5097223961044523e-06,
      "loss": 0.0075,
      "step": 2597020
    },
    {
      "epoch": 4.250112920013354,
      "grad_norm": 0.14830590784549713,
      "learning_rate": 1.5096565038909353e-06,
      "loss": 0.0098,
      "step": 2597040
    },
    {
      "epoch": 4.250145650452008,
      "grad_norm": 0.1747744381427765,
      "learning_rate": 1.509590611677418e-06,
      "loss": 0.0106,
      "step": 2597060
    },
    {
      "epoch": 4.25017838089066,
      "grad_norm": 0.14541932940483093,
      "learning_rate": 1.509524719463901e-06,
      "loss": 0.0123,
      "step": 2597080
    },
    {
      "epoch": 4.250211111329314,
      "grad_norm": 0.16256849467754364,
      "learning_rate": 1.5094588272503842e-06,
      "loss": 0.0132,
      "step": 2597100
    },
    {
      "epoch": 4.2502438417679675,
      "grad_norm": 0.27123674750328064,
      "learning_rate": 1.509392935036867e-06,
      "loss": 0.0077,
      "step": 2597120
    },
    {
      "epoch": 4.250276572206621,
      "grad_norm": 0.3506162464618683,
      "learning_rate": 1.5093270428233498e-06,
      "loss": 0.0086,
      "step": 2597140
    },
    {
      "epoch": 4.250309302645274,
      "grad_norm": 0.13841207325458527,
      "learning_rate": 1.5092611506098326e-06,
      "loss": 0.0075,
      "step": 2597160
    },
    {
      "epoch": 4.250342033083927,
      "grad_norm": 0.17134974896907806,
      "learning_rate": 1.5091952583963153e-06,
      "loss": 0.0081,
      "step": 2597180
    },
    {
      "epoch": 4.250374763522581,
      "grad_norm": 0.15515896677970886,
      "learning_rate": 1.5091293661827983e-06,
      "loss": 0.0059,
      "step": 2597200
    },
    {
      "epoch": 4.250407493961234,
      "grad_norm": 0.39300015568733215,
      "learning_rate": 1.509063473969281e-06,
      "loss": 0.0061,
      "step": 2597220
    },
    {
      "epoch": 4.250440224399887,
      "grad_norm": 0.1213618814945221,
      "learning_rate": 1.508997581755764e-06,
      "loss": 0.0104,
      "step": 2597240
    },
    {
      "epoch": 4.250472954838541,
      "grad_norm": 0.24276167154312134,
      "learning_rate": 1.5089316895422467e-06,
      "loss": 0.0054,
      "step": 2597260
    },
    {
      "epoch": 4.250505685277194,
      "grad_norm": 0.5059400796890259,
      "learning_rate": 1.5088657973287299e-06,
      "loss": 0.0103,
      "step": 2597280
    },
    {
      "epoch": 4.250538415715847,
      "grad_norm": 0.28686732053756714,
      "learning_rate": 1.5087999051152128e-06,
      "loss": 0.0086,
      "step": 2597300
    },
    {
      "epoch": 4.250571146154501,
      "grad_norm": 0.5659241080284119,
      "learning_rate": 1.5087340129016956e-06,
      "loss": 0.0098,
      "step": 2597320
    },
    {
      "epoch": 4.250603876593154,
      "grad_norm": 0.16047297418117523,
      "learning_rate": 1.5086681206881785e-06,
      "loss": 0.0082,
      "step": 2597340
    },
    {
      "epoch": 4.250636607031807,
      "grad_norm": 0.08661287277936935,
      "learning_rate": 1.5086022284746613e-06,
      "loss": 0.0071,
      "step": 2597360
    },
    {
      "epoch": 4.250669337470461,
      "grad_norm": 0.3223237991333008,
      "learning_rate": 1.508536336261144e-06,
      "loss": 0.0094,
      "step": 2597380
    },
    {
      "epoch": 4.250702067909114,
      "grad_norm": 0.217718705534935,
      "learning_rate": 1.508470444047627e-06,
      "loss": 0.0128,
      "step": 2597400
    },
    {
      "epoch": 4.250734798347768,
      "grad_norm": 0.17534947395324707,
      "learning_rate": 1.5084045518341097e-06,
      "loss": 0.0101,
      "step": 2597420
    },
    {
      "epoch": 4.250767528786421,
      "grad_norm": 0.14121735095977783,
      "learning_rate": 1.5083386596205928e-06,
      "loss": 0.0072,
      "step": 2597440
    },
    {
      "epoch": 4.250800259225074,
      "grad_norm": 0.9745044112205505,
      "learning_rate": 1.5082727674070758e-06,
      "loss": 0.0103,
      "step": 2597460
    },
    {
      "epoch": 4.250832989663728,
      "grad_norm": 0.33050432801246643,
      "learning_rate": 1.5082068751935585e-06,
      "loss": 0.0118,
      "step": 2597480
    },
    {
      "epoch": 4.2508657201023805,
      "grad_norm": 0.21459925174713135,
      "learning_rate": 1.5081409829800415e-06,
      "loss": 0.0075,
      "step": 2597500
    },
    {
      "epoch": 4.250898450541034,
      "grad_norm": 0.32914289832115173,
      "learning_rate": 1.5080750907665242e-06,
      "loss": 0.0078,
      "step": 2597520
    },
    {
      "epoch": 4.250931180979688,
      "grad_norm": 0.304713636636734,
      "learning_rate": 1.5080091985530072e-06,
      "loss": 0.0083,
      "step": 2597540
    },
    {
      "epoch": 4.25096391141834,
      "grad_norm": 0.2480839043855667,
      "learning_rate": 1.50794330633949e-06,
      "loss": 0.0067,
      "step": 2597560
    },
    {
      "epoch": 4.250996641856994,
      "grad_norm": 0.6966927647590637,
      "learning_rate": 1.5078774141259729e-06,
      "loss": 0.0117,
      "step": 2597580
    },
    {
      "epoch": 4.251029372295648,
      "grad_norm": 0.7139354944229126,
      "learning_rate": 1.5078115219124556e-06,
      "loss": 0.0119,
      "step": 2597600
    },
    {
      "epoch": 4.251062102734301,
      "grad_norm": 2.6381924152374268,
      "learning_rate": 1.5077456296989388e-06,
      "loss": 0.0073,
      "step": 2597620
    },
    {
      "epoch": 4.251094833172954,
      "grad_norm": 0.33753839135169983,
      "learning_rate": 1.5076797374854215e-06,
      "loss": 0.0106,
      "step": 2597640
    },
    {
      "epoch": 4.2511275636116075,
      "grad_norm": 0.2569151520729065,
      "learning_rate": 1.5076138452719045e-06,
      "loss": 0.0082,
      "step": 2597660
    },
    {
      "epoch": 4.251160294050261,
      "grad_norm": 0.1212029829621315,
      "learning_rate": 1.5075479530583872e-06,
      "loss": 0.0099,
      "step": 2597680
    },
    {
      "epoch": 4.251193024488914,
      "grad_norm": 0.08778513222932816,
      "learning_rate": 1.5074820608448702e-06,
      "loss": 0.0117,
      "step": 2597700
    },
    {
      "epoch": 4.251225754927567,
      "grad_norm": 0.8479941487312317,
      "learning_rate": 1.507416168631353e-06,
      "loss": 0.0082,
      "step": 2597720
    },
    {
      "epoch": 4.251258485366221,
      "grad_norm": 0.24790319800376892,
      "learning_rate": 1.5073502764178359e-06,
      "loss": 0.0125,
      "step": 2597740
    },
    {
      "epoch": 4.251291215804875,
      "grad_norm": 0.29791831970214844,
      "learning_rate": 1.5072843842043186e-06,
      "loss": 0.007,
      "step": 2597760
    },
    {
      "epoch": 4.251323946243527,
      "grad_norm": 0.05834118276834488,
      "learning_rate": 1.5072184919908015e-06,
      "loss": 0.0094,
      "step": 2597780
    },
    {
      "epoch": 4.251356676682181,
      "grad_norm": 0.15355545282363892,
      "learning_rate": 1.5071525997772845e-06,
      "loss": 0.0097,
      "step": 2597800
    },
    {
      "epoch": 4.2513894071208345,
      "grad_norm": 0.24557608366012573,
      "learning_rate": 1.5070867075637674e-06,
      "loss": 0.0082,
      "step": 2597820
    },
    {
      "epoch": 4.251422137559487,
      "grad_norm": 0.37670791149139404,
      "learning_rate": 1.5070208153502502e-06,
      "loss": 0.009,
      "step": 2597840
    },
    {
      "epoch": 4.251454867998141,
      "grad_norm": 0.07344602048397064,
      "learning_rate": 1.5069549231367331e-06,
      "loss": 0.0066,
      "step": 2597860
    },
    {
      "epoch": 4.251487598436794,
      "grad_norm": 0.16923049092292786,
      "learning_rate": 1.5068890309232159e-06,
      "loss": 0.0075,
      "step": 2597880
    },
    {
      "epoch": 4.251520328875448,
      "grad_norm": 0.26859337091445923,
      "learning_rate": 1.5068231387096988e-06,
      "loss": 0.0096,
      "step": 2597900
    },
    {
      "epoch": 4.251553059314101,
      "grad_norm": 0.30937737226486206,
      "learning_rate": 1.5067572464961816e-06,
      "loss": 0.0169,
      "step": 2597920
    },
    {
      "epoch": 4.251585789752754,
      "grad_norm": 0.1371598094701767,
      "learning_rate": 1.5066913542826645e-06,
      "loss": 0.0114,
      "step": 2597940
    },
    {
      "epoch": 4.251618520191408,
      "grad_norm": 0.1503189206123352,
      "learning_rate": 1.5066254620691473e-06,
      "loss": 0.0085,
      "step": 2597960
    },
    {
      "epoch": 4.2516512506300606,
      "grad_norm": 0.39000222086906433,
      "learning_rate": 1.5065595698556304e-06,
      "loss": 0.0135,
      "step": 2597980
    },
    {
      "epoch": 4.251683981068714,
      "grad_norm": 0.21401965618133545,
      "learning_rate": 1.5064936776421132e-06,
      "loss": 0.0079,
      "step": 2598000
    },
    {
      "epoch": 4.251716711507368,
      "grad_norm": 0.21355809271335602,
      "learning_rate": 1.5064277854285961e-06,
      "loss": 0.0129,
      "step": 2598020
    },
    {
      "epoch": 4.251749441946021,
      "grad_norm": 0.18301451206207275,
      "learning_rate": 1.5063618932150789e-06,
      "loss": 0.011,
      "step": 2598040
    },
    {
      "epoch": 4.251782172384674,
      "grad_norm": 0.2640332579612732,
      "learning_rate": 1.5062960010015618e-06,
      "loss": 0.0073,
      "step": 2598060
    },
    {
      "epoch": 4.251814902823328,
      "grad_norm": 0.364117830991745,
      "learning_rate": 1.5062301087880445e-06,
      "loss": 0.014,
      "step": 2598080
    },
    {
      "epoch": 4.251847633261981,
      "grad_norm": 0.09072691947221756,
      "learning_rate": 1.5061642165745275e-06,
      "loss": 0.0078,
      "step": 2598100
    },
    {
      "epoch": 4.251880363700634,
      "grad_norm": 0.07629933208227158,
      "learning_rate": 1.5060983243610102e-06,
      "loss": 0.0122,
      "step": 2598120
    },
    {
      "epoch": 4.2519130941392875,
      "grad_norm": Infinity,
      "learning_rate": 1.5060324321474932e-06,
      "loss": 0.0057,
      "step": 2598140
    },
    {
      "epoch": 4.251945824577941,
      "grad_norm": 0.10351510345935822,
      "learning_rate": 1.5059665399339761e-06,
      "loss": 0.0073,
      "step": 2598160
    },
    {
      "epoch": 4.251978555016595,
      "grad_norm": 0.215328186750412,
      "learning_rate": 1.505900647720459e-06,
      "loss": 0.0098,
      "step": 2598180
    },
    {
      "epoch": 4.252011285455247,
      "grad_norm": 0.3195681571960449,
      "learning_rate": 1.5058347555069418e-06,
      "loss": 0.01,
      "step": 2598200
    },
    {
      "epoch": 4.252044015893901,
      "grad_norm": 0.3584448993206024,
      "learning_rate": 1.5057688632934248e-06,
      "loss": 0.0085,
      "step": 2598220
    },
    {
      "epoch": 4.252076746332555,
      "grad_norm": 0.6363696455955505,
      "learning_rate": 1.5057029710799075e-06,
      "loss": 0.0078,
      "step": 2598240
    },
    {
      "epoch": 4.252109476771207,
      "grad_norm": 0.06404871493577957,
      "learning_rate": 1.5056370788663905e-06,
      "loss": 0.0111,
      "step": 2598260
    },
    {
      "epoch": 4.252142207209861,
      "grad_norm": 0.2818003296852112,
      "learning_rate": 1.5055711866528732e-06,
      "loss": 0.0096,
      "step": 2598280
    },
    {
      "epoch": 4.2521749376485145,
      "grad_norm": 0.05486944317817688,
      "learning_rate": 1.5055052944393562e-06,
      "loss": 0.0081,
      "step": 2598300
    },
    {
      "epoch": 4.252207668087168,
      "grad_norm": 0.1870797872543335,
      "learning_rate": 1.5054394022258393e-06,
      "loss": 0.0063,
      "step": 2598320
    },
    {
      "epoch": 4.252240398525821,
      "grad_norm": 1.5441040992736816,
      "learning_rate": 1.505373510012322e-06,
      "loss": 0.0166,
      "step": 2598340
    },
    {
      "epoch": 4.252273128964474,
      "grad_norm": 0.413690447807312,
      "learning_rate": 1.5053076177988048e-06,
      "loss": 0.0123,
      "step": 2598360
    },
    {
      "epoch": 4.252305859403128,
      "grad_norm": 0.1813618391752243,
      "learning_rate": 1.5052417255852878e-06,
      "loss": 0.0068,
      "step": 2598380
    },
    {
      "epoch": 4.252338589841781,
      "grad_norm": 0.0744633823633194,
      "learning_rate": 1.5051758333717705e-06,
      "loss": 0.0054,
      "step": 2598400
    },
    {
      "epoch": 4.252371320280434,
      "grad_norm": 0.29475605487823486,
      "learning_rate": 1.5051099411582534e-06,
      "loss": 0.0105,
      "step": 2598420
    },
    {
      "epoch": 4.252404050719088,
      "grad_norm": 0.12364999949932098,
      "learning_rate": 1.5050440489447362e-06,
      "loss": 0.0116,
      "step": 2598440
    },
    {
      "epoch": 4.2524367811577415,
      "grad_norm": 0.20489351451396942,
      "learning_rate": 1.5049781567312191e-06,
      "loss": 0.0088,
      "step": 2598460
    },
    {
      "epoch": 4.252469511596394,
      "grad_norm": 0.3144262731075287,
      "learning_rate": 1.5049122645177019e-06,
      "loss": 0.0154,
      "step": 2598480
    },
    {
      "epoch": 4.252502242035048,
      "grad_norm": 0.31655555963516235,
      "learning_rate": 1.504846372304185e-06,
      "loss": 0.0099,
      "step": 2598500
    },
    {
      "epoch": 4.252534972473701,
      "grad_norm": 0.17569606006145477,
      "learning_rate": 1.504780480090668e-06,
      "loss": 0.0087,
      "step": 2598520
    },
    {
      "epoch": 4.252567702912354,
      "grad_norm": 0.5155979990959167,
      "learning_rate": 1.5047145878771507e-06,
      "loss": 0.0092,
      "step": 2598540
    },
    {
      "epoch": 4.252600433351008,
      "grad_norm": 0.9212320446968079,
      "learning_rate": 1.5046486956636337e-06,
      "loss": 0.0072,
      "step": 2598560
    },
    {
      "epoch": 4.252633163789661,
      "grad_norm": 0.1988283097743988,
      "learning_rate": 1.5045828034501164e-06,
      "loss": 0.01,
      "step": 2598580
    },
    {
      "epoch": 4.252665894228315,
      "grad_norm": 0.2287992388010025,
      "learning_rate": 1.5045169112365992e-06,
      "loss": 0.0057,
      "step": 2598600
    },
    {
      "epoch": 4.252698624666968,
      "grad_norm": 0.15386748313903809,
      "learning_rate": 1.5044510190230821e-06,
      "loss": 0.0065,
      "step": 2598620
    },
    {
      "epoch": 4.252731355105621,
      "grad_norm": 0.47303858399391174,
      "learning_rate": 1.5043851268095649e-06,
      "loss": 0.0113,
      "step": 2598640
    },
    {
      "epoch": 4.252764085544275,
      "grad_norm": 0.365181565284729,
      "learning_rate": 1.5043192345960478e-06,
      "loss": 0.009,
      "step": 2598660
    },
    {
      "epoch": 4.2527968159829275,
      "grad_norm": 0.10741693526506424,
      "learning_rate": 1.504253342382531e-06,
      "loss": 0.0083,
      "step": 2598680
    },
    {
      "epoch": 4.252829546421581,
      "grad_norm": 0.07185299694538116,
      "learning_rate": 1.5041874501690137e-06,
      "loss": 0.0053,
      "step": 2598700
    },
    {
      "epoch": 4.252862276860235,
      "grad_norm": 0.23570142686367035,
      "learning_rate": 1.5041215579554967e-06,
      "loss": 0.0091,
      "step": 2598720
    },
    {
      "epoch": 4.252895007298887,
      "grad_norm": 0.23994186520576477,
      "learning_rate": 1.5040556657419794e-06,
      "loss": 0.0086,
      "step": 2598740
    },
    {
      "epoch": 4.252927737737541,
      "grad_norm": 0.4412033259868622,
      "learning_rate": 1.5039897735284624e-06,
      "loss": 0.0073,
      "step": 2598760
    },
    {
      "epoch": 4.2529604681761946,
      "grad_norm": 0.33060815930366516,
      "learning_rate": 1.503923881314945e-06,
      "loss": 0.0085,
      "step": 2598780
    },
    {
      "epoch": 4.252993198614848,
      "grad_norm": 0.14268803596496582,
      "learning_rate": 1.503857989101428e-06,
      "loss": 0.0109,
      "step": 2598800
    },
    {
      "epoch": 4.253025929053501,
      "grad_norm": 0.1360331028699875,
      "learning_rate": 1.5037920968879108e-06,
      "loss": 0.0072,
      "step": 2598820
    },
    {
      "epoch": 4.2530586594921544,
      "grad_norm": 0.15998812019824982,
      "learning_rate": 1.5037262046743935e-06,
      "loss": 0.0082,
      "step": 2598840
    },
    {
      "epoch": 4.253091389930808,
      "grad_norm": 0.12597329914569855,
      "learning_rate": 1.5036603124608767e-06,
      "loss": 0.0089,
      "step": 2598860
    },
    {
      "epoch": 4.253124120369462,
      "grad_norm": 0.6007862091064453,
      "learning_rate": 1.5035944202473596e-06,
      "loss": 0.0106,
      "step": 2598880
    },
    {
      "epoch": 4.253156850808114,
      "grad_norm": 0.21739082038402557,
      "learning_rate": 1.5035285280338424e-06,
      "loss": 0.0078,
      "step": 2598900
    },
    {
      "epoch": 4.253189581246768,
      "grad_norm": 0.40940093994140625,
      "learning_rate": 1.5034626358203253e-06,
      "loss": 0.0091,
      "step": 2598920
    },
    {
      "epoch": 4.2532223116854215,
      "grad_norm": 0.1476621776819229,
      "learning_rate": 1.503396743606808e-06,
      "loss": 0.0073,
      "step": 2598940
    },
    {
      "epoch": 4.253255042124074,
      "grad_norm": 0.21244868636131287,
      "learning_rate": 1.503330851393291e-06,
      "loss": 0.0077,
      "step": 2598960
    },
    {
      "epoch": 4.253287772562728,
      "grad_norm": 0.9996280670166016,
      "learning_rate": 1.5032649591797738e-06,
      "loss": 0.0074,
      "step": 2598980
    },
    {
      "epoch": 4.253320503001381,
      "grad_norm": 0.25789523124694824,
      "learning_rate": 1.5031990669662567e-06,
      "loss": 0.0091,
      "step": 2599000
    },
    {
      "epoch": 4.253353233440034,
      "grad_norm": 0.21038506925106049,
      "learning_rate": 1.5031331747527395e-06,
      "loss": 0.0095,
      "step": 2599020
    },
    {
      "epoch": 4.253385963878688,
      "grad_norm": 0.15930293500423431,
      "learning_rate": 1.5030672825392226e-06,
      "loss": 0.0074,
      "step": 2599040
    },
    {
      "epoch": 4.253418694317341,
      "grad_norm": 0.17404372990131378,
      "learning_rate": 1.5030013903257054e-06,
      "loss": 0.01,
      "step": 2599060
    },
    {
      "epoch": 4.253451424755995,
      "grad_norm": 0.15525390207767487,
      "learning_rate": 1.5029354981121883e-06,
      "loss": 0.0087,
      "step": 2599080
    },
    {
      "epoch": 4.253484155194648,
      "grad_norm": 0.4514303207397461,
      "learning_rate": 1.502869605898671e-06,
      "loss": 0.01,
      "step": 2599100
    },
    {
      "epoch": 4.253516885633301,
      "grad_norm": 0.43269169330596924,
      "learning_rate": 1.502803713685154e-06,
      "loss": 0.0115,
      "step": 2599120
    },
    {
      "epoch": 4.253549616071955,
      "grad_norm": 0.4269585907459259,
      "learning_rate": 1.5027378214716367e-06,
      "loss": 0.0086,
      "step": 2599140
    },
    {
      "epoch": 4.253582346510608,
      "grad_norm": 0.30562624335289,
      "learning_rate": 1.5026719292581197e-06,
      "loss": 0.0148,
      "step": 2599160
    },
    {
      "epoch": 4.253615076949261,
      "grad_norm": 0.26197296380996704,
      "learning_rate": 1.5026060370446024e-06,
      "loss": 0.0076,
      "step": 2599180
    },
    {
      "epoch": 4.253647807387915,
      "grad_norm": 0.21413156390190125,
      "learning_rate": 1.5025401448310856e-06,
      "loss": 0.0103,
      "step": 2599200
    },
    {
      "epoch": 4.253680537826568,
      "grad_norm": 0.12226516008377075,
      "learning_rate": 1.5024742526175683e-06,
      "loss": 0.0089,
      "step": 2599220
    },
    {
      "epoch": 4.253713268265221,
      "grad_norm": 0.33242741227149963,
      "learning_rate": 1.5024083604040513e-06,
      "loss": 0.0067,
      "step": 2599240
    },
    {
      "epoch": 4.253745998703875,
      "grad_norm": 0.2045939713716507,
      "learning_rate": 1.502342468190534e-06,
      "loss": 0.0085,
      "step": 2599260
    },
    {
      "epoch": 4.253778729142528,
      "grad_norm": 0.1100408211350441,
      "learning_rate": 1.502276575977017e-06,
      "loss": 0.006,
      "step": 2599280
    },
    {
      "epoch": 4.253811459581181,
      "grad_norm": 0.43453356623649597,
      "learning_rate": 1.5022106837634997e-06,
      "loss": 0.0086,
      "step": 2599300
    },
    {
      "epoch": 4.2538441900198345,
      "grad_norm": 0.19858166575431824,
      "learning_rate": 1.5021447915499827e-06,
      "loss": 0.0094,
      "step": 2599320
    },
    {
      "epoch": 4.253876920458488,
      "grad_norm": 0.4289824366569519,
      "learning_rate": 1.5020788993364654e-06,
      "loss": 0.011,
      "step": 2599340
    },
    {
      "epoch": 4.253909650897142,
      "grad_norm": 0.08222739398479462,
      "learning_rate": 1.5020130071229484e-06,
      "loss": 0.0101,
      "step": 2599360
    },
    {
      "epoch": 4.253942381335794,
      "grad_norm": 0.1571006327867508,
      "learning_rate": 1.5019471149094313e-06,
      "loss": 0.0087,
      "step": 2599380
    },
    {
      "epoch": 4.253975111774448,
      "grad_norm": 0.07354068756103516,
      "learning_rate": 1.5018812226959143e-06,
      "loss": 0.0097,
      "step": 2599400
    },
    {
      "epoch": 4.254007842213102,
      "grad_norm": 0.2052668035030365,
      "learning_rate": 1.501815330482397e-06,
      "loss": 0.0083,
      "step": 2599420
    },
    {
      "epoch": 4.254040572651754,
      "grad_norm": 0.15449365973472595,
      "learning_rate": 1.50174943826888e-06,
      "loss": 0.0094,
      "step": 2599440
    },
    {
      "epoch": 4.254073303090408,
      "grad_norm": 0.25560271739959717,
      "learning_rate": 1.5016835460553627e-06,
      "loss": 0.0116,
      "step": 2599460
    },
    {
      "epoch": 4.2541060335290615,
      "grad_norm": 0.36706534028053284,
      "learning_rate": 1.5016176538418456e-06,
      "loss": 0.0117,
      "step": 2599480
    },
    {
      "epoch": 4.254138763967715,
      "grad_norm": 0.2233567237854004,
      "learning_rate": 1.5015517616283284e-06,
      "loss": 0.0059,
      "step": 2599500
    },
    {
      "epoch": 4.254171494406368,
      "grad_norm": 0.18732993304729462,
      "learning_rate": 1.5014858694148113e-06,
      "loss": 0.0153,
      "step": 2599520
    },
    {
      "epoch": 4.254204224845021,
      "grad_norm": 0.11089032143354416,
      "learning_rate": 1.501419977201294e-06,
      "loss": 0.0062,
      "step": 2599540
    },
    {
      "epoch": 4.254236955283675,
      "grad_norm": 0.2893344759941101,
      "learning_rate": 1.5013540849877772e-06,
      "loss": 0.0142,
      "step": 2599560
    },
    {
      "epoch": 4.254269685722328,
      "grad_norm": 0.07866028696298599,
      "learning_rate": 1.50128819277426e-06,
      "loss": 0.01,
      "step": 2599580
    },
    {
      "epoch": 4.254302416160981,
      "grad_norm": 0.1063971221446991,
      "learning_rate": 1.501222300560743e-06,
      "loss": 0.0087,
      "step": 2599600
    },
    {
      "epoch": 4.254335146599635,
      "grad_norm": 0.09060989320278168,
      "learning_rate": 1.5011564083472257e-06,
      "loss": 0.0106,
      "step": 2599620
    },
    {
      "epoch": 4.2543678770382884,
      "grad_norm": 0.19130729138851166,
      "learning_rate": 1.5010905161337086e-06,
      "loss": 0.0087,
      "step": 2599640
    },
    {
      "epoch": 4.254400607476941,
      "grad_norm": 0.3503887951374054,
      "learning_rate": 1.5010246239201914e-06,
      "loss": 0.0101,
      "step": 2599660
    },
    {
      "epoch": 4.254433337915595,
      "grad_norm": 0.6294217705726624,
      "learning_rate": 1.5009587317066743e-06,
      "loss": 0.008,
      "step": 2599680
    },
    {
      "epoch": 4.254466068354248,
      "grad_norm": 0.19583095610141754,
      "learning_rate": 1.500892839493157e-06,
      "loss": 0.0071,
      "step": 2599700
    },
    {
      "epoch": 4.254498798792901,
      "grad_norm": 0.3062536418437958,
      "learning_rate": 1.50082694727964e-06,
      "loss": 0.0078,
      "step": 2599720
    },
    {
      "epoch": 4.254531529231555,
      "grad_norm": 0.061869971454143524,
      "learning_rate": 1.5007610550661232e-06,
      "loss": 0.0071,
      "step": 2599740
    },
    {
      "epoch": 4.254564259670208,
      "grad_norm": 0.4172190725803375,
      "learning_rate": 1.500695162852606e-06,
      "loss": 0.0108,
      "step": 2599760
    },
    {
      "epoch": 4.254596990108862,
      "grad_norm": 0.10566229373216629,
      "learning_rate": 1.5006292706390889e-06,
      "loss": 0.0082,
      "step": 2599780
    },
    {
      "epoch": 4.2546297205475145,
      "grad_norm": 0.7678902745246887,
      "learning_rate": 1.5005633784255716e-06,
      "loss": 0.0136,
      "step": 2599800
    },
    {
      "epoch": 4.254662450986168,
      "grad_norm": 0.25524356961250305,
      "learning_rate": 1.5004974862120543e-06,
      "loss": 0.01,
      "step": 2599820
    },
    {
      "epoch": 4.254695181424822,
      "grad_norm": 0.10112173110246658,
      "learning_rate": 1.5004315939985373e-06,
      "loss": 0.0075,
      "step": 2599840
    },
    {
      "epoch": 4.254727911863474,
      "grad_norm": 0.1891370713710785,
      "learning_rate": 1.50036570178502e-06,
      "loss": 0.0124,
      "step": 2599860
    },
    {
      "epoch": 4.254760642302128,
      "grad_norm": 0.40320518612861633,
      "learning_rate": 1.500299809571503e-06,
      "loss": 0.0082,
      "step": 2599880
    },
    {
      "epoch": 4.254793372740782,
      "grad_norm": 0.20294833183288574,
      "learning_rate": 1.5002339173579857e-06,
      "loss": 0.0102,
      "step": 2599900
    },
    {
      "epoch": 4.254826103179435,
      "grad_norm": 0.2914011478424072,
      "learning_rate": 1.5001680251444689e-06,
      "loss": 0.0142,
      "step": 2599920
    },
    {
      "epoch": 4.254858833618088,
      "grad_norm": 0.19816072285175323,
      "learning_rate": 1.5001021329309518e-06,
      "loss": 0.0079,
      "step": 2599940
    },
    {
      "epoch": 4.2548915640567415,
      "grad_norm": 0.15419329702854156,
      "learning_rate": 1.5000362407174346e-06,
      "loss": 0.0072,
      "step": 2599960
    },
    {
      "epoch": 4.254924294495395,
      "grad_norm": 0.30775710940361023,
      "learning_rate": 1.4999703485039175e-06,
      "loss": 0.0077,
      "step": 2599980
    },
    {
      "epoch": 4.254957024934048,
      "grad_norm": 0.10439653694629669,
      "learning_rate": 1.4999044562904003e-06,
      "loss": 0.0148,
      "step": 2600000
    },
    {
      "epoch": 4.254957024934048,
      "eval_loss": 0.005996686406433582,
      "eval_runtime": 6470.8698,
      "eval_samples_per_second": 158.844,
      "eval_steps_per_second": 15.884,
      "eval_sts-dev_pearson_cosine": 0.986411667861422,
      "eval_sts-dev_spearman_cosine": 0.896479345576279,
      "step": 2600000
    },
    {
      "epoch": 4.254989755372701,
      "grad_norm": 0.3227784335613251,
      "learning_rate": 1.499838564076883e-06,
      "loss": 0.0071,
      "step": 2600020
    },
    {
      "epoch": 4.255022485811355,
      "grad_norm": 0.15537405014038086,
      "learning_rate": 1.499772671863366e-06,
      "loss": 0.0082,
      "step": 2600040
    },
    {
      "epoch": 4.255055216250009,
      "grad_norm": 0.40331292152404785,
      "learning_rate": 1.4997067796498487e-06,
      "loss": 0.0129,
      "step": 2600060
    },
    {
      "epoch": 4.255087946688661,
      "grad_norm": 0.15489691495895386,
      "learning_rate": 1.4996408874363319e-06,
      "loss": 0.0072,
      "step": 2600080
    },
    {
      "epoch": 4.255120677127315,
      "grad_norm": 0.4398527443408966,
      "learning_rate": 1.4995749952228148e-06,
      "loss": 0.0102,
      "step": 2600100
    },
    {
      "epoch": 4.2551534075659685,
      "grad_norm": 0.3079877197742462,
      "learning_rate": 1.4995091030092976e-06,
      "loss": 0.0106,
      "step": 2600120
    },
    {
      "epoch": 4.255186138004621,
      "grad_norm": 0.591844379901886,
      "learning_rate": 1.4994432107957805e-06,
      "loss": 0.0088,
      "step": 2600140
    },
    {
      "epoch": 4.255218868443275,
      "grad_norm": 0.4405900239944458,
      "learning_rate": 1.4993773185822632e-06,
      "loss": 0.0105,
      "step": 2600160
    },
    {
      "epoch": 4.255251598881928,
      "grad_norm": 0.1620301604270935,
      "learning_rate": 1.4993114263687462e-06,
      "loss": 0.0098,
      "step": 2600180
    },
    {
      "epoch": 4.255284329320581,
      "grad_norm": 0.12533727288246155,
      "learning_rate": 1.499245534155229e-06,
      "loss": 0.0073,
      "step": 2600200
    },
    {
      "epoch": 4.255317059759235,
      "grad_norm": 0.30716365575790405,
      "learning_rate": 1.4991796419417119e-06,
      "loss": 0.0099,
      "step": 2600220
    },
    {
      "epoch": 4.255349790197888,
      "grad_norm": 0.549890398979187,
      "learning_rate": 1.4991137497281946e-06,
      "loss": 0.0124,
      "step": 2600240
    },
    {
      "epoch": 4.255382520636542,
      "grad_norm": 0.16254284977912903,
      "learning_rate": 1.4990478575146778e-06,
      "loss": 0.0059,
      "step": 2600260
    },
    {
      "epoch": 4.255415251075195,
      "grad_norm": 1.2456930875778198,
      "learning_rate": 1.4989819653011605e-06,
      "loss": 0.0077,
      "step": 2600280
    },
    {
      "epoch": 4.255447981513848,
      "grad_norm": 0.3420133888721466,
      "learning_rate": 1.4989160730876435e-06,
      "loss": 0.0112,
      "step": 2600300
    },
    {
      "epoch": 4.255480711952502,
      "grad_norm": 0.8095391988754272,
      "learning_rate": 1.4988501808741262e-06,
      "loss": 0.0115,
      "step": 2600320
    },
    {
      "epoch": 4.255513442391155,
      "grad_norm": 0.22327181696891785,
      "learning_rate": 1.4987842886606092e-06,
      "loss": 0.0072,
      "step": 2600340
    },
    {
      "epoch": 4.255546172829808,
      "grad_norm": 0.08514045923948288,
      "learning_rate": 1.498718396447092e-06,
      "loss": 0.0067,
      "step": 2600360
    },
    {
      "epoch": 4.255578903268462,
      "grad_norm": 0.14186277985572815,
      "learning_rate": 1.4986525042335749e-06,
      "loss": 0.0086,
      "step": 2600380
    },
    {
      "epoch": 4.255611633707115,
      "grad_norm": 0.4494892656803131,
      "learning_rate": 1.4985866120200576e-06,
      "loss": 0.0133,
      "step": 2600400
    },
    {
      "epoch": 4.255644364145768,
      "grad_norm": 0.43329957127571106,
      "learning_rate": 1.4985207198065406e-06,
      "loss": 0.0097,
      "step": 2600420
    },
    {
      "epoch": 4.255677094584422,
      "grad_norm": 0.0892934501171112,
      "learning_rate": 1.4984548275930235e-06,
      "loss": 0.0073,
      "step": 2600440
    },
    {
      "epoch": 4.255709825023075,
      "grad_norm": 0.3679283857345581,
      "learning_rate": 1.4983889353795065e-06,
      "loss": 0.0105,
      "step": 2600460
    },
    {
      "epoch": 4.255742555461728,
      "grad_norm": 0.32734668254852295,
      "learning_rate": 1.4983230431659892e-06,
      "loss": 0.0062,
      "step": 2600480
    },
    {
      "epoch": 4.2557752859003815,
      "grad_norm": 0.09979258477687836,
      "learning_rate": 1.4982571509524721e-06,
      "loss": 0.0113,
      "step": 2600500
    },
    {
      "epoch": 4.255808016339035,
      "grad_norm": 0.07338917255401611,
      "learning_rate": 1.4981912587389549e-06,
      "loss": 0.0076,
      "step": 2600520
    },
    {
      "epoch": 4.255840746777689,
      "grad_norm": 0.07273515313863754,
      "learning_rate": 1.4981253665254378e-06,
      "loss": 0.0067,
      "step": 2600540
    },
    {
      "epoch": 4.255873477216341,
      "grad_norm": 0.6173761487007141,
      "learning_rate": 1.4980594743119206e-06,
      "loss": 0.0107,
      "step": 2600560
    },
    {
      "epoch": 4.255906207654995,
      "grad_norm": 0.1633664220571518,
      "learning_rate": 1.4979935820984035e-06,
      "loss": 0.0091,
      "step": 2600580
    },
    {
      "epoch": 4.2559389380936485,
      "grad_norm": 0.6572149991989136,
      "learning_rate": 1.4979276898848863e-06,
      "loss": 0.013,
      "step": 2600600
    },
    {
      "epoch": 4.255971668532302,
      "grad_norm": 0.3444197177886963,
      "learning_rate": 1.4978617976713694e-06,
      "loss": 0.0068,
      "step": 2600620
    },
    {
      "epoch": 4.256004398970955,
      "grad_norm": 0.17332960665225983,
      "learning_rate": 1.4977959054578522e-06,
      "loss": 0.0081,
      "step": 2600640
    },
    {
      "epoch": 4.256037129409608,
      "grad_norm": 0.20000678300857544,
      "learning_rate": 1.4977300132443351e-06,
      "loss": 0.0067,
      "step": 2600660
    },
    {
      "epoch": 4.256069859848262,
      "grad_norm": 0.22830310463905334,
      "learning_rate": 1.4976641210308179e-06,
      "loss": 0.0083,
      "step": 2600680
    },
    {
      "epoch": 4.256102590286915,
      "grad_norm": 0.619851291179657,
      "learning_rate": 1.4975982288173008e-06,
      "loss": 0.01,
      "step": 2600700
    },
    {
      "epoch": 4.256135320725568,
      "grad_norm": 0.3113965392112732,
      "learning_rate": 1.4975323366037836e-06,
      "loss": 0.0105,
      "step": 2600720
    },
    {
      "epoch": 4.256168051164222,
      "grad_norm": 0.2719542980194092,
      "learning_rate": 1.4974664443902665e-06,
      "loss": 0.0107,
      "step": 2600740
    },
    {
      "epoch": 4.256200781602875,
      "grad_norm": 0.5464512705802917,
      "learning_rate": 1.4974005521767492e-06,
      "loss": 0.0109,
      "step": 2600760
    },
    {
      "epoch": 4.256233512041528,
      "grad_norm": 0.3940833508968353,
      "learning_rate": 1.4973346599632322e-06,
      "loss": 0.0105,
      "step": 2600780
    },
    {
      "epoch": 4.256266242480182,
      "grad_norm": 0.3672524094581604,
      "learning_rate": 1.4972687677497151e-06,
      "loss": 0.0058,
      "step": 2600800
    },
    {
      "epoch": 4.256298972918835,
      "grad_norm": 0.26035282015800476,
      "learning_rate": 1.497202875536198e-06,
      "loss": 0.0076,
      "step": 2600820
    },
    {
      "epoch": 4.256331703357488,
      "grad_norm": 0.3654175102710724,
      "learning_rate": 1.4971369833226808e-06,
      "loss": 0.0095,
      "step": 2600840
    },
    {
      "epoch": 4.256364433796142,
      "grad_norm": 0.6100460290908813,
      "learning_rate": 1.4970710911091638e-06,
      "loss": 0.011,
      "step": 2600860
    },
    {
      "epoch": 4.256397164234795,
      "grad_norm": 0.5945279002189636,
      "learning_rate": 1.4970051988956465e-06,
      "loss": 0.0083,
      "step": 2600880
    },
    {
      "epoch": 4.256429894673448,
      "grad_norm": 0.056457649916410446,
      "learning_rate": 1.4969393066821295e-06,
      "loss": 0.0088,
      "step": 2600900
    },
    {
      "epoch": 4.256462625112102,
      "grad_norm": 0.3013734817504883,
      "learning_rate": 1.4968734144686122e-06,
      "loss": 0.0083,
      "step": 2600920
    },
    {
      "epoch": 4.256495355550755,
      "grad_norm": 0.4866048991680145,
      "learning_rate": 1.4968075222550952e-06,
      "loss": 0.0124,
      "step": 2600940
    },
    {
      "epoch": 4.256528085989409,
      "grad_norm": 0.21810628473758698,
      "learning_rate": 1.4967416300415783e-06,
      "loss": 0.0095,
      "step": 2600960
    },
    {
      "epoch": 4.2565608164280615,
      "grad_norm": 0.14616195857524872,
      "learning_rate": 1.496675737828061e-06,
      "loss": 0.0087,
      "step": 2600980
    },
    {
      "epoch": 4.256593546866715,
      "grad_norm": 0.3151485323905945,
      "learning_rate": 1.4966098456145438e-06,
      "loss": 0.0139,
      "step": 2601000
    },
    {
      "epoch": 4.256626277305369,
      "grad_norm": 0.0637829527258873,
      "learning_rate": 1.4965439534010268e-06,
      "loss": 0.0084,
      "step": 2601020
    },
    {
      "epoch": 4.256659007744021,
      "grad_norm": 0.3457753360271454,
      "learning_rate": 1.4964780611875095e-06,
      "loss": 0.0148,
      "step": 2601040
    },
    {
      "epoch": 4.256691738182675,
      "grad_norm": 0.15143658220767975,
      "learning_rate": 1.4964121689739925e-06,
      "loss": 0.0061,
      "step": 2601060
    },
    {
      "epoch": 4.256724468621329,
      "grad_norm": 0.2269037365913391,
      "learning_rate": 1.4963462767604752e-06,
      "loss": 0.0098,
      "step": 2601080
    },
    {
      "epoch": 4.256757199059982,
      "grad_norm": 0.5078712701797485,
      "learning_rate": 1.4962803845469582e-06,
      "loss": 0.0097,
      "step": 2601100
    },
    {
      "epoch": 4.256789929498635,
      "grad_norm": 0.16831563413143158,
      "learning_rate": 1.4962144923334409e-06,
      "loss": 0.0096,
      "step": 2601120
    },
    {
      "epoch": 4.2568226599372885,
      "grad_norm": 0.3861437737941742,
      "learning_rate": 1.496148600119924e-06,
      "loss": 0.0145,
      "step": 2601140
    },
    {
      "epoch": 4.256855390375942,
      "grad_norm": 0.23397544026374817,
      "learning_rate": 1.496082707906407e-06,
      "loss": 0.0075,
      "step": 2601160
    },
    {
      "epoch": 4.256888120814595,
      "grad_norm": 0.30431613326072693,
      "learning_rate": 1.4960168156928897e-06,
      "loss": 0.0108,
      "step": 2601180
    },
    {
      "epoch": 4.256920851253248,
      "grad_norm": 0.23202867805957794,
      "learning_rate": 1.4959509234793727e-06,
      "loss": 0.0103,
      "step": 2601200
    },
    {
      "epoch": 4.256953581691902,
      "grad_norm": 0.03705417364835739,
      "learning_rate": 1.4958850312658554e-06,
      "loss": 0.0085,
      "step": 2601220
    },
    {
      "epoch": 4.256986312130556,
      "grad_norm": 0.17184865474700928,
      "learning_rate": 1.4958191390523382e-06,
      "loss": 0.0069,
      "step": 2601240
    },
    {
      "epoch": 4.257019042569208,
      "grad_norm": 0.3806796371936798,
      "learning_rate": 1.4957532468388211e-06,
      "loss": 0.0087,
      "step": 2601260
    },
    {
      "epoch": 4.257051773007862,
      "grad_norm": 0.07870713621377945,
      "learning_rate": 1.4956873546253039e-06,
      "loss": 0.0096,
      "step": 2601280
    },
    {
      "epoch": 4.2570845034465155,
      "grad_norm": 0.08188523352146149,
      "learning_rate": 1.4956214624117868e-06,
      "loss": 0.0064,
      "step": 2601300
    },
    {
      "epoch": 4.257117233885168,
      "grad_norm": 0.06335031986236572,
      "learning_rate": 1.49555557019827e-06,
      "loss": 0.0074,
      "step": 2601320
    },
    {
      "epoch": 4.257149964323822,
      "grad_norm": 0.22114144265651703,
      "learning_rate": 1.4954896779847527e-06,
      "loss": 0.0081,
      "step": 2601340
    },
    {
      "epoch": 4.257182694762475,
      "grad_norm": 0.09667860716581345,
      "learning_rate": 1.4954237857712357e-06,
      "loss": 0.0117,
      "step": 2601360
    },
    {
      "epoch": 4.257215425201129,
      "grad_norm": 0.26274874806404114,
      "learning_rate": 1.4953578935577184e-06,
      "loss": 0.0088,
      "step": 2601380
    },
    {
      "epoch": 4.257248155639782,
      "grad_norm": 0.025227336212992668,
      "learning_rate": 1.4952920013442014e-06,
      "loss": 0.0102,
      "step": 2601400
    },
    {
      "epoch": 4.257280886078435,
      "grad_norm": 0.19942577183246613,
      "learning_rate": 1.495226109130684e-06,
      "loss": 0.0101,
      "step": 2601420
    },
    {
      "epoch": 4.257313616517089,
      "grad_norm": 0.4515269100666046,
      "learning_rate": 1.495160216917167e-06,
      "loss": 0.0123,
      "step": 2601440
    },
    {
      "epoch": 4.2573463469557415,
      "grad_norm": 0.1585126370191574,
      "learning_rate": 1.4950943247036498e-06,
      "loss": 0.0105,
      "step": 2601460
    },
    {
      "epoch": 4.257379077394395,
      "grad_norm": 0.1583409309387207,
      "learning_rate": 1.4950284324901325e-06,
      "loss": 0.0118,
      "step": 2601480
    },
    {
      "epoch": 4.257411807833049,
      "grad_norm": 0.18770521879196167,
      "learning_rate": 1.4949625402766157e-06,
      "loss": 0.0085,
      "step": 2601500
    },
    {
      "epoch": 4.257444538271702,
      "grad_norm": 1.0167615413665771,
      "learning_rate": 1.4948966480630986e-06,
      "loss": 0.0081,
      "step": 2601520
    },
    {
      "epoch": 4.257477268710355,
      "grad_norm": 0.29042425751686096,
      "learning_rate": 1.4948307558495814e-06,
      "loss": 0.0069,
      "step": 2601540
    },
    {
      "epoch": 4.257509999149009,
      "grad_norm": 0.23347711563110352,
      "learning_rate": 1.4947648636360643e-06,
      "loss": 0.0083,
      "step": 2601560
    },
    {
      "epoch": 4.257542729587662,
      "grad_norm": 0.6883378624916077,
      "learning_rate": 1.494698971422547e-06,
      "loss": 0.0109,
      "step": 2601580
    },
    {
      "epoch": 4.257575460026315,
      "grad_norm": 0.6359086036682129,
      "learning_rate": 1.49463307920903e-06,
      "loss": 0.0122,
      "step": 2601600
    },
    {
      "epoch": 4.2576081904649685,
      "grad_norm": 0.27491095662117004,
      "learning_rate": 1.4945671869955128e-06,
      "loss": 0.0088,
      "step": 2601620
    },
    {
      "epoch": 4.257640920903622,
      "grad_norm": 0.2968543767929077,
      "learning_rate": 1.4945012947819957e-06,
      "loss": 0.0127,
      "step": 2601640
    },
    {
      "epoch": 4.257673651342275,
      "grad_norm": 0.12955570220947266,
      "learning_rate": 1.4944354025684787e-06,
      "loss": 0.0152,
      "step": 2601660
    },
    {
      "epoch": 4.257706381780928,
      "grad_norm": 0.4133068919181824,
      "learning_rate": 1.4943695103549616e-06,
      "loss": 0.0096,
      "step": 2601680
    },
    {
      "epoch": 4.257739112219582,
      "grad_norm": 0.17526690661907196,
      "learning_rate": 1.4943036181414444e-06,
      "loss": 0.0072,
      "step": 2601700
    },
    {
      "epoch": 4.257771842658236,
      "grad_norm": 0.3812103271484375,
      "learning_rate": 1.4942377259279273e-06,
      "loss": 0.0077,
      "step": 2601720
    },
    {
      "epoch": 4.257804573096888,
      "grad_norm": 0.07692957669496536,
      "learning_rate": 1.49417183371441e-06,
      "loss": 0.0079,
      "step": 2601740
    },
    {
      "epoch": 4.257837303535542,
      "grad_norm": 0.14212845265865326,
      "learning_rate": 1.494105941500893e-06,
      "loss": 0.0073,
      "step": 2601760
    },
    {
      "epoch": 4.2578700339741955,
      "grad_norm": 0.06224972382187843,
      "learning_rate": 1.4940400492873757e-06,
      "loss": 0.0074,
      "step": 2601780
    },
    {
      "epoch": 4.257902764412849,
      "grad_norm": 0.3560888171195984,
      "learning_rate": 1.4939741570738587e-06,
      "loss": 0.0077,
      "step": 2601800
    },
    {
      "epoch": 4.257935494851502,
      "grad_norm": 0.17243598401546478,
      "learning_rate": 1.4939082648603414e-06,
      "loss": 0.0088,
      "step": 2601820
    },
    {
      "epoch": 4.257968225290155,
      "grad_norm": 0.23522181808948517,
      "learning_rate": 1.4938423726468246e-06,
      "loss": 0.0084,
      "step": 2601840
    },
    {
      "epoch": 4.258000955728809,
      "grad_norm": 0.3493460416793823,
      "learning_rate": 1.4937764804333073e-06,
      "loss": 0.0098,
      "step": 2601860
    },
    {
      "epoch": 4.258033686167462,
      "grad_norm": 0.20441076159477234,
      "learning_rate": 1.4937105882197903e-06,
      "loss": 0.0087,
      "step": 2601880
    },
    {
      "epoch": 4.258066416606115,
      "grad_norm": 0.23653243482112885,
      "learning_rate": 1.493644696006273e-06,
      "loss": 0.0077,
      "step": 2601900
    },
    {
      "epoch": 4.258099147044769,
      "grad_norm": 0.12732066214084625,
      "learning_rate": 1.493578803792756e-06,
      "loss": 0.0084,
      "step": 2601920
    },
    {
      "epoch": 4.258131877483422,
      "grad_norm": 0.17657974362373352,
      "learning_rate": 1.4935129115792387e-06,
      "loss": 0.009,
      "step": 2601940
    },
    {
      "epoch": 4.258164607922075,
      "grad_norm": 0.36371704936027527,
      "learning_rate": 1.4934470193657217e-06,
      "loss": 0.0081,
      "step": 2601960
    },
    {
      "epoch": 4.258197338360729,
      "grad_norm": 0.5806627869606018,
      "learning_rate": 1.4933811271522044e-06,
      "loss": 0.0077,
      "step": 2601980
    },
    {
      "epoch": 4.258230068799382,
      "grad_norm": 0.4468185305595398,
      "learning_rate": 1.4933152349386874e-06,
      "loss": 0.0106,
      "step": 2602000
    },
    {
      "epoch": 4.258262799238035,
      "grad_norm": 0.2748583257198334,
      "learning_rate": 1.4932493427251703e-06,
      "loss": 0.0095,
      "step": 2602020
    },
    {
      "epoch": 4.258295529676689,
      "grad_norm": 0.22783850133419037,
      "learning_rate": 1.4931834505116533e-06,
      "loss": 0.0087,
      "step": 2602040
    },
    {
      "epoch": 4.258328260115342,
      "grad_norm": 0.183487668633461,
      "learning_rate": 1.493117558298136e-06,
      "loss": 0.0128,
      "step": 2602060
    },
    {
      "epoch": 4.258360990553996,
      "grad_norm": 0.3315974175930023,
      "learning_rate": 1.493051666084619e-06,
      "loss": 0.0125,
      "step": 2602080
    },
    {
      "epoch": 4.258393720992649,
      "grad_norm": 0.3753478229045868,
      "learning_rate": 1.4929857738711017e-06,
      "loss": 0.0122,
      "step": 2602100
    },
    {
      "epoch": 4.258426451431302,
      "grad_norm": 0.3941431939601898,
      "learning_rate": 1.4929198816575847e-06,
      "loss": 0.0116,
      "step": 2602120
    },
    {
      "epoch": 4.258459181869956,
      "grad_norm": 0.5496563911437988,
      "learning_rate": 1.4928539894440674e-06,
      "loss": 0.0087,
      "step": 2602140
    },
    {
      "epoch": 4.2584919123086085,
      "grad_norm": 0.17085282504558563,
      "learning_rate": 1.4927880972305503e-06,
      "loss": 0.0083,
      "step": 2602160
    },
    {
      "epoch": 4.258524642747262,
      "grad_norm": 0.1892334222793579,
      "learning_rate": 1.492722205017033e-06,
      "loss": 0.0066,
      "step": 2602180
    },
    {
      "epoch": 4.258557373185916,
      "grad_norm": 0.4942946135997772,
      "learning_rate": 1.4926563128035162e-06,
      "loss": 0.0136,
      "step": 2602200
    },
    {
      "epoch": 4.258590103624568,
      "grad_norm": 0.4103468060493469,
      "learning_rate": 1.492590420589999e-06,
      "loss": 0.0081,
      "step": 2602220
    },
    {
      "epoch": 4.258622834063222,
      "grad_norm": 0.47144582867622375,
      "learning_rate": 1.492524528376482e-06,
      "loss": 0.0058,
      "step": 2602240
    },
    {
      "epoch": 4.2586555645018755,
      "grad_norm": 0.23837526142597198,
      "learning_rate": 1.4924586361629647e-06,
      "loss": 0.0109,
      "step": 2602260
    },
    {
      "epoch": 4.258688294940529,
      "grad_norm": 0.15381431579589844,
      "learning_rate": 1.4923927439494476e-06,
      "loss": 0.0119,
      "step": 2602280
    },
    {
      "epoch": 4.258721025379182,
      "grad_norm": 0.02899683266878128,
      "learning_rate": 1.4923268517359304e-06,
      "loss": 0.0056,
      "step": 2602300
    },
    {
      "epoch": 4.258753755817835,
      "grad_norm": 0.15909673273563385,
      "learning_rate": 1.4922609595224133e-06,
      "loss": 0.0074,
      "step": 2602320
    },
    {
      "epoch": 4.258786486256489,
      "grad_norm": 0.399724543094635,
      "learning_rate": 1.492195067308896e-06,
      "loss": 0.0078,
      "step": 2602340
    },
    {
      "epoch": 4.258819216695142,
      "grad_norm": 0.32454049587249756,
      "learning_rate": 1.492129175095379e-06,
      "loss": 0.0069,
      "step": 2602360
    },
    {
      "epoch": 4.258851947133795,
      "grad_norm": 0.22143986821174622,
      "learning_rate": 1.4920632828818622e-06,
      "loss": 0.0079,
      "step": 2602380
    },
    {
      "epoch": 4.258884677572449,
      "grad_norm": 0.08336521685123444,
      "learning_rate": 1.491997390668345e-06,
      "loss": 0.0097,
      "step": 2602400
    },
    {
      "epoch": 4.2589174080111025,
      "grad_norm": 0.4651048481464386,
      "learning_rate": 1.4919314984548279e-06,
      "loss": 0.0089,
      "step": 2602420
    },
    {
      "epoch": 4.258950138449755,
      "grad_norm": 0.2214018702507019,
      "learning_rate": 1.4918656062413106e-06,
      "loss": 0.0063,
      "step": 2602440
    },
    {
      "epoch": 4.258982868888409,
      "grad_norm": 0.2207966148853302,
      "learning_rate": 1.4917997140277933e-06,
      "loss": 0.0092,
      "step": 2602460
    },
    {
      "epoch": 4.259015599327062,
      "grad_norm": 0.08387713134288788,
      "learning_rate": 1.4917338218142763e-06,
      "loss": 0.0083,
      "step": 2602480
    },
    {
      "epoch": 4.259048329765715,
      "grad_norm": 0.5038619041442871,
      "learning_rate": 1.491667929600759e-06,
      "loss": 0.0086,
      "step": 2602500
    },
    {
      "epoch": 4.259081060204369,
      "grad_norm": 0.3084646761417389,
      "learning_rate": 1.491602037387242e-06,
      "loss": 0.0083,
      "step": 2602520
    },
    {
      "epoch": 4.259113790643022,
      "grad_norm": 0.1682870090007782,
      "learning_rate": 1.4915361451737252e-06,
      "loss": 0.0119,
      "step": 2602540
    },
    {
      "epoch": 4.259146521081676,
      "grad_norm": 0.2735094428062439,
      "learning_rate": 1.4914702529602079e-06,
      "loss": 0.0083,
      "step": 2602560
    },
    {
      "epoch": 4.259179251520329,
      "grad_norm": 0.12077794224023819,
      "learning_rate": 1.4914043607466908e-06,
      "loss": 0.0113,
      "step": 2602580
    },
    {
      "epoch": 4.259211981958982,
      "grad_norm": 0.39110296964645386,
      "learning_rate": 1.4913384685331736e-06,
      "loss": 0.0083,
      "step": 2602600
    },
    {
      "epoch": 4.259244712397636,
      "grad_norm": 0.09312349557876587,
      "learning_rate": 1.4912725763196565e-06,
      "loss": 0.0096,
      "step": 2602620
    },
    {
      "epoch": 4.2592774428362885,
      "grad_norm": 0.3183676600456238,
      "learning_rate": 1.4912066841061393e-06,
      "loss": 0.0088,
      "step": 2602640
    },
    {
      "epoch": 4.259310173274942,
      "grad_norm": 0.2645263671875,
      "learning_rate": 1.491140791892622e-06,
      "loss": 0.0086,
      "step": 2602660
    },
    {
      "epoch": 4.259342903713596,
      "grad_norm": 0.3338841497898102,
      "learning_rate": 1.491074899679105e-06,
      "loss": 0.0076,
      "step": 2602680
    },
    {
      "epoch": 4.259375634152249,
      "grad_norm": 0.1261933594942093,
      "learning_rate": 1.4910090074655877e-06,
      "loss": 0.0096,
      "step": 2602700
    },
    {
      "epoch": 4.259408364590902,
      "grad_norm": 0.20545999705791473,
      "learning_rate": 1.4909431152520709e-06,
      "loss": 0.0088,
      "step": 2602720
    },
    {
      "epoch": 4.259441095029556,
      "grad_norm": 0.23104441165924072,
      "learning_rate": 1.4908772230385538e-06,
      "loss": 0.0084,
      "step": 2602740
    },
    {
      "epoch": 4.259473825468209,
      "grad_norm": 0.21366296708583832,
      "learning_rate": 1.4908113308250366e-06,
      "loss": 0.005,
      "step": 2602760
    },
    {
      "epoch": 4.259506555906862,
      "grad_norm": 0.26143935322761536,
      "learning_rate": 1.4907454386115195e-06,
      "loss": 0.0095,
      "step": 2602780
    },
    {
      "epoch": 4.2595392863455155,
      "grad_norm": 0.26040157675743103,
      "learning_rate": 1.4906795463980023e-06,
      "loss": 0.0073,
      "step": 2602800
    },
    {
      "epoch": 4.259572016784169,
      "grad_norm": 0.11392100900411606,
      "learning_rate": 1.4906136541844852e-06,
      "loss": 0.0061,
      "step": 2602820
    },
    {
      "epoch": 4.259604747222823,
      "grad_norm": 0.19486857950687408,
      "learning_rate": 1.490547761970968e-06,
      "loss": 0.0062,
      "step": 2602840
    },
    {
      "epoch": 4.259637477661475,
      "grad_norm": 0.19715183973312378,
      "learning_rate": 1.490481869757451e-06,
      "loss": 0.0108,
      "step": 2602860
    },
    {
      "epoch": 4.259670208100129,
      "grad_norm": 0.11607275158166885,
      "learning_rate": 1.4904159775439336e-06,
      "loss": 0.0054,
      "step": 2602880
    },
    {
      "epoch": 4.259702938538783,
      "grad_norm": 0.08792082220315933,
      "learning_rate": 1.4903500853304168e-06,
      "loss": 0.0079,
      "step": 2602900
    },
    {
      "epoch": 4.259735668977435,
      "grad_norm": 0.46742507815361023,
      "learning_rate": 1.4902841931168995e-06,
      "loss": 0.0093,
      "step": 2602920
    },
    {
      "epoch": 4.259768399416089,
      "grad_norm": 0.1048688217997551,
      "learning_rate": 1.4902183009033825e-06,
      "loss": 0.0083,
      "step": 2602940
    },
    {
      "epoch": 4.2598011298547425,
      "grad_norm": 0.05925052613019943,
      "learning_rate": 1.4901524086898652e-06,
      "loss": 0.0089,
      "step": 2602960
    },
    {
      "epoch": 4.259833860293396,
      "grad_norm": 0.1708684116601944,
      "learning_rate": 1.4900865164763482e-06,
      "loss": 0.0133,
      "step": 2602980
    },
    {
      "epoch": 4.259866590732049,
      "grad_norm": 0.3630346357822418,
      "learning_rate": 1.490020624262831e-06,
      "loss": 0.0077,
      "step": 2603000
    },
    {
      "epoch": 4.259899321170702,
      "grad_norm": 0.3979022204875946,
      "learning_rate": 1.4899547320493139e-06,
      "loss": 0.0091,
      "step": 2603020
    },
    {
      "epoch": 4.259932051609356,
      "grad_norm": 0.09719301760196686,
      "learning_rate": 1.4898888398357966e-06,
      "loss": 0.0081,
      "step": 2603040
    },
    {
      "epoch": 4.259964782048009,
      "grad_norm": 0.14733333885669708,
      "learning_rate": 1.4898229476222796e-06,
      "loss": 0.0085,
      "step": 2603060
    },
    {
      "epoch": 4.259997512486662,
      "grad_norm": 0.20884405076503754,
      "learning_rate": 1.4897570554087625e-06,
      "loss": 0.0086,
      "step": 2603080
    },
    {
      "epoch": 4.260030242925316,
      "grad_norm": 0.07398412376642227,
      "learning_rate": 1.4896911631952455e-06,
      "loss": 0.0085,
      "step": 2603100
    },
    {
      "epoch": 4.2600629733639686,
      "grad_norm": 0.137146458029747,
      "learning_rate": 1.4896252709817282e-06,
      "loss": 0.0078,
      "step": 2603120
    },
    {
      "epoch": 4.260095703802622,
      "grad_norm": 0.26950007677078247,
      "learning_rate": 1.4895593787682112e-06,
      "loss": 0.0141,
      "step": 2603140
    },
    {
      "epoch": 4.260128434241276,
      "grad_norm": 0.3989239037036896,
      "learning_rate": 1.489493486554694e-06,
      "loss": 0.009,
      "step": 2603160
    },
    {
      "epoch": 4.260161164679929,
      "grad_norm": 0.3411606550216675,
      "learning_rate": 1.4894275943411768e-06,
      "loss": 0.0104,
      "step": 2603180
    },
    {
      "epoch": 4.260193895118582,
      "grad_norm": 0.2104453444480896,
      "learning_rate": 1.4893617021276596e-06,
      "loss": 0.0086,
      "step": 2603200
    },
    {
      "epoch": 4.260226625557236,
      "grad_norm": 0.1534784436225891,
      "learning_rate": 1.4892958099141425e-06,
      "loss": 0.0109,
      "step": 2603220
    },
    {
      "epoch": 4.260259355995889,
      "grad_norm": 0.17286987602710724,
      "learning_rate": 1.4892299177006253e-06,
      "loss": 0.0067,
      "step": 2603240
    },
    {
      "epoch": 4.260292086434543,
      "grad_norm": 0.34363383054733276,
      "learning_rate": 1.4891640254871084e-06,
      "loss": 0.0114,
      "step": 2603260
    },
    {
      "epoch": 4.2603248168731955,
      "grad_norm": 0.11545542627573013,
      "learning_rate": 1.4890981332735912e-06,
      "loss": 0.0063,
      "step": 2603280
    },
    {
      "epoch": 4.260357547311849,
      "grad_norm": 0.16739031672477722,
      "learning_rate": 1.4890322410600741e-06,
      "loss": 0.0076,
      "step": 2603300
    },
    {
      "epoch": 4.260390277750503,
      "grad_norm": 0.7301487922668457,
      "learning_rate": 1.4889663488465569e-06,
      "loss": 0.0102,
      "step": 2603320
    },
    {
      "epoch": 4.260423008189155,
      "grad_norm": 0.5337967872619629,
      "learning_rate": 1.4889004566330398e-06,
      "loss": 0.0079,
      "step": 2603340
    },
    {
      "epoch": 4.260455738627809,
      "grad_norm": 0.10959601402282715,
      "learning_rate": 1.4888345644195226e-06,
      "loss": 0.0081,
      "step": 2603360
    },
    {
      "epoch": 4.260488469066463,
      "grad_norm": 0.6020537614822388,
      "learning_rate": 1.4887686722060055e-06,
      "loss": 0.0129,
      "step": 2603380
    },
    {
      "epoch": 4.260521199505115,
      "grad_norm": 0.27913418412208557,
      "learning_rate": 1.4887027799924883e-06,
      "loss": 0.0085,
      "step": 2603400
    },
    {
      "epoch": 4.260553929943769,
      "grad_norm": 0.15661127865314484,
      "learning_rate": 1.4886368877789714e-06,
      "loss": 0.0091,
      "step": 2603420
    },
    {
      "epoch": 4.2605866603824225,
      "grad_norm": 0.3617088496685028,
      "learning_rate": 1.4885709955654542e-06,
      "loss": 0.0097,
      "step": 2603440
    },
    {
      "epoch": 4.260619390821076,
      "grad_norm": 0.12060538679361343,
      "learning_rate": 1.4885051033519371e-06,
      "loss": 0.0123,
      "step": 2603460
    },
    {
      "epoch": 4.260652121259729,
      "grad_norm": 0.18018287420272827,
      "learning_rate": 1.4884392111384199e-06,
      "loss": 0.0065,
      "step": 2603480
    },
    {
      "epoch": 4.260684851698382,
      "grad_norm": 0.12233356386423111,
      "learning_rate": 1.4883733189249028e-06,
      "loss": 0.0087,
      "step": 2603500
    },
    {
      "epoch": 4.260717582137036,
      "grad_norm": 0.20873573422431946,
      "learning_rate": 1.4883074267113855e-06,
      "loss": 0.0096,
      "step": 2603520
    },
    {
      "epoch": 4.26075031257569,
      "grad_norm": 0.1447901874780655,
      "learning_rate": 1.4882415344978685e-06,
      "loss": 0.0103,
      "step": 2603540
    },
    {
      "epoch": 4.260783043014342,
      "grad_norm": 0.10382899641990662,
      "learning_rate": 1.4881756422843512e-06,
      "loss": 0.0127,
      "step": 2603560
    },
    {
      "epoch": 4.260815773452996,
      "grad_norm": 0.34837788343429565,
      "learning_rate": 1.4881097500708342e-06,
      "loss": 0.0086,
      "step": 2603580
    },
    {
      "epoch": 4.2608485038916495,
      "grad_norm": 0.5174976587295532,
      "learning_rate": 1.4880438578573173e-06,
      "loss": 0.0083,
      "step": 2603600
    },
    {
      "epoch": 4.260881234330302,
      "grad_norm": 0.15982238948345184,
      "learning_rate": 1.4879779656438e-06,
      "loss": 0.0063,
      "step": 2603620
    },
    {
      "epoch": 4.260913964768956,
      "grad_norm": 0.20484128594398499,
      "learning_rate": 1.4879120734302828e-06,
      "loss": 0.0113,
      "step": 2603640
    },
    {
      "epoch": 4.260946695207609,
      "grad_norm": 0.14763574302196503,
      "learning_rate": 1.4878461812167658e-06,
      "loss": 0.0074,
      "step": 2603660
    },
    {
      "epoch": 4.260979425646262,
      "grad_norm": 0.24098318815231323,
      "learning_rate": 1.4877802890032485e-06,
      "loss": 0.0145,
      "step": 2603680
    },
    {
      "epoch": 4.261012156084916,
      "grad_norm": 0.22871102392673492,
      "learning_rate": 1.4877143967897315e-06,
      "loss": 0.0058,
      "step": 2603700
    },
    {
      "epoch": 4.261044886523569,
      "grad_norm": 0.3038635849952698,
      "learning_rate": 1.4876485045762142e-06,
      "loss": 0.0106,
      "step": 2603720
    },
    {
      "epoch": 4.261077616962223,
      "grad_norm": 0.2168973982334137,
      "learning_rate": 1.4875826123626972e-06,
      "loss": 0.0115,
      "step": 2603740
    },
    {
      "epoch": 4.261110347400876,
      "grad_norm": 0.1981421411037445,
      "learning_rate": 1.48751672014918e-06,
      "loss": 0.0087,
      "step": 2603760
    },
    {
      "epoch": 4.261143077839529,
      "grad_norm": 0.3605337142944336,
      "learning_rate": 1.487450827935663e-06,
      "loss": 0.0149,
      "step": 2603780
    },
    {
      "epoch": 4.261175808278183,
      "grad_norm": 0.11922101676464081,
      "learning_rate": 1.487384935722146e-06,
      "loss": 0.0102,
      "step": 2603800
    },
    {
      "epoch": 4.2612085387168355,
      "grad_norm": 0.18165823817253113,
      "learning_rate": 1.4873190435086288e-06,
      "loss": 0.0077,
      "step": 2603820
    },
    {
      "epoch": 4.261241269155489,
      "grad_norm": 0.15174534916877747,
      "learning_rate": 1.4872531512951117e-06,
      "loss": 0.0092,
      "step": 2603840
    },
    {
      "epoch": 4.261273999594143,
      "grad_norm": 0.15207532048225403,
      "learning_rate": 1.4871872590815944e-06,
      "loss": 0.0116,
      "step": 2603860
    },
    {
      "epoch": 4.261306730032796,
      "grad_norm": 0.3056078255176544,
      "learning_rate": 1.4871213668680772e-06,
      "loss": 0.0074,
      "step": 2603880
    },
    {
      "epoch": 4.261339460471449,
      "grad_norm": 0.1510191261768341,
      "learning_rate": 1.4870554746545601e-06,
      "loss": 0.0078,
      "step": 2603900
    },
    {
      "epoch": 4.2613721909101026,
      "grad_norm": 0.33777761459350586,
      "learning_rate": 1.4869895824410429e-06,
      "loss": 0.0083,
      "step": 2603920
    },
    {
      "epoch": 4.261404921348756,
      "grad_norm": 0.383858859539032,
      "learning_rate": 1.4869236902275258e-06,
      "loss": 0.0072,
      "step": 2603940
    },
    {
      "epoch": 4.261437651787409,
      "grad_norm": 0.3391401767730713,
      "learning_rate": 1.486857798014009e-06,
      "loss": 0.0118,
      "step": 2603960
    },
    {
      "epoch": 4.2614703822260624,
      "grad_norm": 0.19142621755599976,
      "learning_rate": 1.4867919058004917e-06,
      "loss": 0.0096,
      "step": 2603980
    },
    {
      "epoch": 4.261503112664716,
      "grad_norm": 0.3608824908733368,
      "learning_rate": 1.4867260135869747e-06,
      "loss": 0.0056,
      "step": 2604000
    },
    {
      "epoch": 4.26153584310337,
      "grad_norm": 0.12931248545646667,
      "learning_rate": 1.4866601213734574e-06,
      "loss": 0.0094,
      "step": 2604020
    },
    {
      "epoch": 4.261568573542022,
      "grad_norm": 0.2572975158691406,
      "learning_rate": 1.4865942291599404e-06,
      "loss": 0.0085,
      "step": 2604040
    },
    {
      "epoch": 4.261601303980676,
      "grad_norm": 0.23004330694675446,
      "learning_rate": 1.4865283369464231e-06,
      "loss": 0.0107,
      "step": 2604060
    },
    {
      "epoch": 4.2616340344193295,
      "grad_norm": 0.15673203766345978,
      "learning_rate": 1.486462444732906e-06,
      "loss": 0.0128,
      "step": 2604080
    },
    {
      "epoch": 4.261666764857982,
      "grad_norm": 0.5303133726119995,
      "learning_rate": 1.4863965525193888e-06,
      "loss": 0.0084,
      "step": 2604100
    },
    {
      "epoch": 4.261699495296636,
      "grad_norm": 0.2288585603237152,
      "learning_rate": 1.4863306603058715e-06,
      "loss": 0.0092,
      "step": 2604120
    },
    {
      "epoch": 4.261732225735289,
      "grad_norm": 0.11936305463314056,
      "learning_rate": 1.4862647680923547e-06,
      "loss": 0.0091,
      "step": 2604140
    },
    {
      "epoch": 4.261764956173943,
      "grad_norm": 0.43018871545791626,
      "learning_rate": 1.4861988758788377e-06,
      "loss": 0.0118,
      "step": 2604160
    },
    {
      "epoch": 4.261797686612596,
      "grad_norm": 0.19443121552467346,
      "learning_rate": 1.4861329836653204e-06,
      "loss": 0.0064,
      "step": 2604180
    },
    {
      "epoch": 4.261830417051249,
      "grad_norm": 0.20157258212566376,
      "learning_rate": 1.4860670914518034e-06,
      "loss": 0.0086,
      "step": 2604200
    },
    {
      "epoch": 4.261863147489903,
      "grad_norm": 0.31269872188568115,
      "learning_rate": 1.486001199238286e-06,
      "loss": 0.0116,
      "step": 2604220
    },
    {
      "epoch": 4.261895877928556,
      "grad_norm": 0.2544630467891693,
      "learning_rate": 1.485935307024769e-06,
      "loss": 0.0109,
      "step": 2604240
    },
    {
      "epoch": 4.261928608367209,
      "grad_norm": 0.1518402099609375,
      "learning_rate": 1.4858694148112518e-06,
      "loss": 0.006,
      "step": 2604260
    },
    {
      "epoch": 4.261961338805863,
      "grad_norm": 0.5737155079841614,
      "learning_rate": 1.4858035225977347e-06,
      "loss": 0.0109,
      "step": 2604280
    },
    {
      "epoch": 4.261994069244516,
      "grad_norm": 0.5063631534576416,
      "learning_rate": 1.4857376303842177e-06,
      "loss": 0.0093,
      "step": 2604300
    },
    {
      "epoch": 4.262026799683169,
      "grad_norm": 0.24399659037590027,
      "learning_rate": 1.4856717381707006e-06,
      "loss": 0.007,
      "step": 2604320
    },
    {
      "epoch": 4.262059530121823,
      "grad_norm": 0.368882417678833,
      "learning_rate": 1.4856058459571834e-06,
      "loss": 0.0134,
      "step": 2604340
    },
    {
      "epoch": 4.262092260560476,
      "grad_norm": 0.3596512973308563,
      "learning_rate": 1.4855399537436663e-06,
      "loss": 0.0106,
      "step": 2604360
    },
    {
      "epoch": 4.262124990999129,
      "grad_norm": 0.1823146641254425,
      "learning_rate": 1.485474061530149e-06,
      "loss": 0.0078,
      "step": 2604380
    },
    {
      "epoch": 4.262157721437783,
      "grad_norm": 0.19057472050189972,
      "learning_rate": 1.485408169316632e-06,
      "loss": 0.0076,
      "step": 2604400
    },
    {
      "epoch": 4.262190451876436,
      "grad_norm": 0.1220807209610939,
      "learning_rate": 1.4853422771031148e-06,
      "loss": 0.0123,
      "step": 2604420
    },
    {
      "epoch": 4.26222318231509,
      "grad_norm": 0.15640245378017426,
      "learning_rate": 1.4852763848895977e-06,
      "loss": 0.0084,
      "step": 2604440
    },
    {
      "epoch": 4.2622559127537425,
      "grad_norm": 0.2705236077308655,
      "learning_rate": 1.4852104926760805e-06,
      "loss": 0.0085,
      "step": 2604460
    },
    {
      "epoch": 4.262288643192396,
      "grad_norm": 0.15283456444740295,
      "learning_rate": 1.4851446004625636e-06,
      "loss": 0.0086,
      "step": 2604480
    },
    {
      "epoch": 4.26232137363105,
      "grad_norm": 0.40063413977622986,
      "learning_rate": 1.4850787082490464e-06,
      "loss": 0.0077,
      "step": 2604500
    },
    {
      "epoch": 4.262354104069702,
      "grad_norm": 0.3083113133907318,
      "learning_rate": 1.4850128160355293e-06,
      "loss": 0.0152,
      "step": 2604520
    },
    {
      "epoch": 4.262386834508356,
      "grad_norm": 0.19113105535507202,
      "learning_rate": 1.484946923822012e-06,
      "loss": 0.0118,
      "step": 2604540
    },
    {
      "epoch": 4.26241956494701,
      "grad_norm": 0.3377140760421753,
      "learning_rate": 1.484881031608495e-06,
      "loss": 0.01,
      "step": 2604560
    },
    {
      "epoch": 4.262452295385663,
      "grad_norm": 0.09112755209207535,
      "learning_rate": 1.4848151393949777e-06,
      "loss": 0.0088,
      "step": 2604580
    },
    {
      "epoch": 4.262485025824316,
      "grad_norm": 0.2520093619823456,
      "learning_rate": 1.4847492471814607e-06,
      "loss": 0.0087,
      "step": 2604600
    },
    {
      "epoch": 4.2625177562629695,
      "grad_norm": 0.315836638212204,
      "learning_rate": 1.4846833549679434e-06,
      "loss": 0.0063,
      "step": 2604620
    },
    {
      "epoch": 4.262550486701623,
      "grad_norm": 0.16634871065616608,
      "learning_rate": 1.4846174627544264e-06,
      "loss": 0.0091,
      "step": 2604640
    },
    {
      "epoch": 4.262583217140276,
      "grad_norm": 0.3200230002403259,
      "learning_rate": 1.4845515705409093e-06,
      "loss": 0.0098,
      "step": 2604660
    },
    {
      "epoch": 4.262615947578929,
      "grad_norm": 0.157027930021286,
      "learning_rate": 1.4844856783273923e-06,
      "loss": 0.0134,
      "step": 2604680
    },
    {
      "epoch": 4.262648678017583,
      "grad_norm": 0.4225926101207733,
      "learning_rate": 1.484419786113875e-06,
      "loss": 0.0091,
      "step": 2604700
    },
    {
      "epoch": 4.2626814084562366,
      "grad_norm": 0.14513005316257477,
      "learning_rate": 1.484353893900358e-06,
      "loss": 0.0098,
      "step": 2604720
    },
    {
      "epoch": 4.262714138894889,
      "grad_norm": 0.1767074316740036,
      "learning_rate": 1.4842880016868407e-06,
      "loss": 0.0112,
      "step": 2604740
    },
    {
      "epoch": 4.262746869333543,
      "grad_norm": 0.4148978590965271,
      "learning_rate": 1.4842221094733237e-06,
      "loss": 0.0098,
      "step": 2604760
    },
    {
      "epoch": 4.2627795997721964,
      "grad_norm": 0.42808663845062256,
      "learning_rate": 1.4841562172598064e-06,
      "loss": 0.0133,
      "step": 2604780
    },
    {
      "epoch": 4.262812330210849,
      "grad_norm": 0.10343755781650543,
      "learning_rate": 1.4840903250462894e-06,
      "loss": 0.0077,
      "step": 2604800
    },
    {
      "epoch": 4.262845060649503,
      "grad_norm": 0.423789918422699,
      "learning_rate": 1.484024432832772e-06,
      "loss": 0.0121,
      "step": 2604820
    },
    {
      "epoch": 4.262877791088156,
      "grad_norm": 0.23952627182006836,
      "learning_rate": 1.4839585406192553e-06,
      "loss": 0.011,
      "step": 2604840
    },
    {
      "epoch": 4.262910521526809,
      "grad_norm": 0.22546520829200745,
      "learning_rate": 1.483892648405738e-06,
      "loss": 0.0096,
      "step": 2604860
    },
    {
      "epoch": 4.262943251965463,
      "grad_norm": 0.1327800303697586,
      "learning_rate": 1.483826756192221e-06,
      "loss": 0.0108,
      "step": 2604880
    },
    {
      "epoch": 4.262975982404116,
      "grad_norm": 0.1810431331396103,
      "learning_rate": 1.4837608639787037e-06,
      "loss": 0.0088,
      "step": 2604900
    },
    {
      "epoch": 4.26300871284277,
      "grad_norm": 0.2360415905714035,
      "learning_rate": 1.4836949717651866e-06,
      "loss": 0.0091,
      "step": 2604920
    },
    {
      "epoch": 4.2630414432814225,
      "grad_norm": 0.10924546420574188,
      "learning_rate": 1.4836290795516694e-06,
      "loss": 0.0158,
      "step": 2604940
    },
    {
      "epoch": 4.263074173720076,
      "grad_norm": 0.07332548499107361,
      "learning_rate": 1.4835631873381523e-06,
      "loss": 0.0109,
      "step": 2604960
    },
    {
      "epoch": 4.26310690415873,
      "grad_norm": 0.10928279161453247,
      "learning_rate": 1.483497295124635e-06,
      "loss": 0.0109,
      "step": 2604980
    },
    {
      "epoch": 4.263139634597383,
      "grad_norm": 0.4710118770599365,
      "learning_rate": 1.483431402911118e-06,
      "loss": 0.0129,
      "step": 2605000
    },
    {
      "epoch": 4.263172365036036,
      "grad_norm": 0.18253804743289948,
      "learning_rate": 1.4833655106976012e-06,
      "loss": 0.0103,
      "step": 2605020
    },
    {
      "epoch": 4.26320509547469,
      "grad_norm": 0.4301680624485016,
      "learning_rate": 1.483299618484084e-06,
      "loss": 0.0105,
      "step": 2605040
    },
    {
      "epoch": 4.263237825913343,
      "grad_norm": 0.20623856782913208,
      "learning_rate": 1.4832337262705669e-06,
      "loss": 0.0091,
      "step": 2605060
    },
    {
      "epoch": 4.263270556351996,
      "grad_norm": 0.17804314196109772,
      "learning_rate": 1.4831678340570496e-06,
      "loss": 0.0089,
      "step": 2605080
    },
    {
      "epoch": 4.2633032867906495,
      "grad_norm": 0.17113550007343292,
      "learning_rate": 1.4831019418435324e-06,
      "loss": 0.0124,
      "step": 2605100
    },
    {
      "epoch": 4.263336017229303,
      "grad_norm": 0.3764917254447937,
      "learning_rate": 1.4830360496300153e-06,
      "loss": 0.0063,
      "step": 2605120
    },
    {
      "epoch": 4.263368747667956,
      "grad_norm": 0.3198655843734741,
      "learning_rate": 1.482970157416498e-06,
      "loss": 0.0106,
      "step": 2605140
    },
    {
      "epoch": 4.263401478106609,
      "grad_norm": 0.04286790266633034,
      "learning_rate": 1.482904265202981e-06,
      "loss": 0.0072,
      "step": 2605160
    },
    {
      "epoch": 4.263434208545263,
      "grad_norm": 0.3036644160747528,
      "learning_rate": 1.4828383729894642e-06,
      "loss": 0.0134,
      "step": 2605180
    },
    {
      "epoch": 4.263466938983917,
      "grad_norm": 0.2590968608856201,
      "learning_rate": 1.482772480775947e-06,
      "loss": 0.0094,
      "step": 2605200
    },
    {
      "epoch": 4.263499669422569,
      "grad_norm": 0.24586062133312225,
      "learning_rate": 1.4827065885624299e-06,
      "loss": 0.0101,
      "step": 2605220
    },
    {
      "epoch": 4.263532399861223,
      "grad_norm": 0.10420514643192291,
      "learning_rate": 1.4826406963489126e-06,
      "loss": 0.0077,
      "step": 2605240
    },
    {
      "epoch": 4.2635651302998765,
      "grad_norm": 0.4208305776119232,
      "learning_rate": 1.4825748041353955e-06,
      "loss": 0.0137,
      "step": 2605260
    },
    {
      "epoch": 4.26359786073853,
      "grad_norm": 0.24398183822631836,
      "learning_rate": 1.4825089119218783e-06,
      "loss": 0.0095,
      "step": 2605280
    },
    {
      "epoch": 4.263630591177183,
      "grad_norm": 0.28985509276390076,
      "learning_rate": 1.482443019708361e-06,
      "loss": 0.0108,
      "step": 2605300
    },
    {
      "epoch": 4.263663321615836,
      "grad_norm": 0.3653247654438019,
      "learning_rate": 1.482377127494844e-06,
      "loss": 0.0132,
      "step": 2605320
    },
    {
      "epoch": 4.26369605205449,
      "grad_norm": 0.18517407774925232,
      "learning_rate": 1.4823112352813267e-06,
      "loss": 0.0091,
      "step": 2605340
    },
    {
      "epoch": 4.263728782493143,
      "grad_norm": 0.36747846007347107,
      "learning_rate": 1.4822453430678099e-06,
      "loss": 0.008,
      "step": 2605360
    },
    {
      "epoch": 4.263761512931796,
      "grad_norm": 0.09466331452131271,
      "learning_rate": 1.4821794508542928e-06,
      "loss": 0.0079,
      "step": 2605380
    },
    {
      "epoch": 4.26379424337045,
      "grad_norm": 0.1864478886127472,
      "learning_rate": 1.4821135586407756e-06,
      "loss": 0.0102,
      "step": 2605400
    },
    {
      "epoch": 4.263826973809103,
      "grad_norm": 0.19727195799350739,
      "learning_rate": 1.4820476664272585e-06,
      "loss": 0.0073,
      "step": 2605420
    },
    {
      "epoch": 4.263859704247756,
      "grad_norm": 0.44736239314079285,
      "learning_rate": 1.4819817742137413e-06,
      "loss": 0.0083,
      "step": 2605440
    },
    {
      "epoch": 4.26389243468641,
      "grad_norm": 0.13391943275928497,
      "learning_rate": 1.4819158820002242e-06,
      "loss": 0.0098,
      "step": 2605460
    },
    {
      "epoch": 4.263925165125063,
      "grad_norm": 0.3752986192703247,
      "learning_rate": 1.481849989786707e-06,
      "loss": 0.0096,
      "step": 2605480
    },
    {
      "epoch": 4.263957895563716,
      "grad_norm": 0.3266690969467163,
      "learning_rate": 1.48178409757319e-06,
      "loss": 0.0127,
      "step": 2605500
    },
    {
      "epoch": 4.26399062600237,
      "grad_norm": 0.11997612565755844,
      "learning_rate": 1.4817182053596726e-06,
      "loss": 0.0117,
      "step": 2605520
    },
    {
      "epoch": 4.264023356441023,
      "grad_norm": 0.3470092713832855,
      "learning_rate": 1.4816523131461558e-06,
      "loss": 0.015,
      "step": 2605540
    },
    {
      "epoch": 4.264056086879676,
      "grad_norm": 0.12043318152427673,
      "learning_rate": 1.4815864209326385e-06,
      "loss": 0.01,
      "step": 2605560
    },
    {
      "epoch": 4.26408881731833,
      "grad_norm": 0.15296953916549683,
      "learning_rate": 1.4815205287191215e-06,
      "loss": 0.0134,
      "step": 2605580
    },
    {
      "epoch": 4.264121547756983,
      "grad_norm": 0.16209374368190765,
      "learning_rate": 1.4814546365056042e-06,
      "loss": 0.0128,
      "step": 2605600
    },
    {
      "epoch": 4.264154278195637,
      "grad_norm": 0.26009371876716614,
      "learning_rate": 1.4813887442920872e-06,
      "loss": 0.0069,
      "step": 2605620
    },
    {
      "epoch": 4.2641870086342895,
      "grad_norm": 0.0970119759440422,
      "learning_rate": 1.48132285207857e-06,
      "loss": 0.0084,
      "step": 2605640
    },
    {
      "epoch": 4.264219739072943,
      "grad_norm": 0.17175592482089996,
      "learning_rate": 1.4812569598650529e-06,
      "loss": 0.0086,
      "step": 2605660
    },
    {
      "epoch": 4.264252469511597,
      "grad_norm": 0.4527466893196106,
      "learning_rate": 1.4811910676515356e-06,
      "loss": 0.0142,
      "step": 2605680
    },
    {
      "epoch": 4.264285199950249,
      "grad_norm": 0.3018457293510437,
      "learning_rate": 1.4811251754380186e-06,
      "loss": 0.0092,
      "step": 2605700
    },
    {
      "epoch": 4.264317930388903,
      "grad_norm": 0.1725832223892212,
      "learning_rate": 1.4810592832245015e-06,
      "loss": 0.0067,
      "step": 2605720
    },
    {
      "epoch": 4.2643506608275565,
      "grad_norm": 0.22840820252895355,
      "learning_rate": 1.4809933910109845e-06,
      "loss": 0.0075,
      "step": 2605740
    },
    {
      "epoch": 4.26438339126621,
      "grad_norm": 0.37940722703933716,
      "learning_rate": 1.4809274987974672e-06,
      "loss": 0.0088,
      "step": 2605760
    },
    {
      "epoch": 4.264416121704863,
      "grad_norm": 0.27802419662475586,
      "learning_rate": 1.4808616065839502e-06,
      "loss": 0.0078,
      "step": 2605780
    },
    {
      "epoch": 4.264448852143516,
      "grad_norm": 0.21720346808433533,
      "learning_rate": 1.480795714370433e-06,
      "loss": 0.0109,
      "step": 2605800
    },
    {
      "epoch": 4.26448158258217,
      "grad_norm": 0.35120144486427307,
      "learning_rate": 1.4807298221569159e-06,
      "loss": 0.0133,
      "step": 2605820
    },
    {
      "epoch": 4.264514313020823,
      "grad_norm": 0.6705139875411987,
      "learning_rate": 1.4806639299433986e-06,
      "loss": 0.0114,
      "step": 2605840
    },
    {
      "epoch": 4.264547043459476,
      "grad_norm": 0.47708672285079956,
      "learning_rate": 1.4805980377298816e-06,
      "loss": 0.011,
      "step": 2605860
    },
    {
      "epoch": 4.26457977389813,
      "grad_norm": 0.25930023193359375,
      "learning_rate": 1.4805321455163643e-06,
      "loss": 0.0063,
      "step": 2605880
    },
    {
      "epoch": 4.2646125043367835,
      "grad_norm": 0.1831667423248291,
      "learning_rate": 1.4804662533028475e-06,
      "loss": 0.0072,
      "step": 2605900
    },
    {
      "epoch": 4.264645234775436,
      "grad_norm": 0.560624361038208,
      "learning_rate": 1.4804003610893302e-06,
      "loss": 0.0121,
      "step": 2605920
    },
    {
      "epoch": 4.26467796521409,
      "grad_norm": 0.23355205357074738,
      "learning_rate": 1.4803344688758131e-06,
      "loss": 0.0088,
      "step": 2605940
    },
    {
      "epoch": 4.264710695652743,
      "grad_norm": 0.17825856804847717,
      "learning_rate": 1.4802685766622959e-06,
      "loss": 0.0114,
      "step": 2605960
    },
    {
      "epoch": 4.264743426091396,
      "grad_norm": 0.11079849302768707,
      "learning_rate": 1.4802026844487788e-06,
      "loss": 0.0102,
      "step": 2605980
    },
    {
      "epoch": 4.26477615653005,
      "grad_norm": 0.11083267629146576,
      "learning_rate": 1.4801367922352616e-06,
      "loss": 0.01,
      "step": 2606000
    },
    {
      "epoch": 4.264808886968703,
      "grad_norm": 0.45993512868881226,
      "learning_rate": 1.4800709000217445e-06,
      "loss": 0.009,
      "step": 2606020
    },
    {
      "epoch": 4.264841617407357,
      "grad_norm": 0.16630619764328003,
      "learning_rate": 1.4800050078082273e-06,
      "loss": 0.0089,
      "step": 2606040
    },
    {
      "epoch": 4.26487434784601,
      "grad_norm": 0.09355348348617554,
      "learning_rate": 1.4799391155947104e-06,
      "loss": 0.008,
      "step": 2606060
    },
    {
      "epoch": 4.264907078284663,
      "grad_norm": 0.6797126531600952,
      "learning_rate": 1.4798732233811932e-06,
      "loss": 0.0075,
      "step": 2606080
    },
    {
      "epoch": 4.264939808723317,
      "grad_norm": 0.3417940139770508,
      "learning_rate": 1.4798073311676761e-06,
      "loss": 0.0099,
      "step": 2606100
    },
    {
      "epoch": 4.2649725391619695,
      "grad_norm": 0.10309181362390518,
      "learning_rate": 1.4797414389541589e-06,
      "loss": 0.0122,
      "step": 2606120
    },
    {
      "epoch": 4.265005269600623,
      "grad_norm": 0.20844516158103943,
      "learning_rate": 1.4796755467406418e-06,
      "loss": 0.0061,
      "step": 2606140
    },
    {
      "epoch": 4.265038000039277,
      "grad_norm": 0.18097911775112152,
      "learning_rate": 1.4796096545271246e-06,
      "loss": 0.0097,
      "step": 2606160
    },
    {
      "epoch": 4.26507073047793,
      "grad_norm": 0.410652220249176,
      "learning_rate": 1.4795437623136075e-06,
      "loss": 0.0098,
      "step": 2606180
    },
    {
      "epoch": 4.265103460916583,
      "grad_norm": 0.07006294280290604,
      "learning_rate": 1.4794778701000902e-06,
      "loss": 0.0089,
      "step": 2606200
    },
    {
      "epoch": 4.265136191355237,
      "grad_norm": 0.4863455295562744,
      "learning_rate": 1.4794119778865732e-06,
      "loss": 0.008,
      "step": 2606220
    },
    {
      "epoch": 4.26516892179389,
      "grad_norm": 0.16782978177070618,
      "learning_rate": 1.4793460856730564e-06,
      "loss": 0.0085,
      "step": 2606240
    },
    {
      "epoch": 4.265201652232543,
      "grad_norm": 0.3797822594642639,
      "learning_rate": 1.479280193459539e-06,
      "loss": 0.0095,
      "step": 2606260
    },
    {
      "epoch": 4.2652343826711965,
      "grad_norm": 0.20957084000110626,
      "learning_rate": 1.479214301246022e-06,
      "loss": 0.0091,
      "step": 2606280
    },
    {
      "epoch": 4.26526711310985,
      "grad_norm": 0.5161478519439697,
      "learning_rate": 1.4791484090325048e-06,
      "loss": 0.0132,
      "step": 2606300
    },
    {
      "epoch": 4.265299843548503,
      "grad_norm": 0.34404271841049194,
      "learning_rate": 1.4790825168189875e-06,
      "loss": 0.0079,
      "step": 2606320
    },
    {
      "epoch": 4.265332573987156,
      "grad_norm": 0.23908884823322296,
      "learning_rate": 1.4790166246054705e-06,
      "loss": 0.0098,
      "step": 2606340
    },
    {
      "epoch": 4.26536530442581,
      "grad_norm": 0.1639571487903595,
      "learning_rate": 1.4789507323919532e-06,
      "loss": 0.0069,
      "step": 2606360
    },
    {
      "epoch": 4.265398034864464,
      "grad_norm": 0.09034236520528793,
      "learning_rate": 1.4788848401784362e-06,
      "loss": 0.0069,
      "step": 2606380
    },
    {
      "epoch": 4.265430765303116,
      "grad_norm": 0.1311791092157364,
      "learning_rate": 1.478818947964919e-06,
      "loss": 0.0127,
      "step": 2606400
    },
    {
      "epoch": 4.26546349574177,
      "grad_norm": 0.18792298436164856,
      "learning_rate": 1.478753055751402e-06,
      "loss": 0.006,
      "step": 2606420
    },
    {
      "epoch": 4.2654962261804235,
      "grad_norm": 0.4597950577735901,
      "learning_rate": 1.478687163537885e-06,
      "loss": 0.0091,
      "step": 2606440
    },
    {
      "epoch": 4.265528956619077,
      "grad_norm": 0.08958031237125397,
      "learning_rate": 1.4786212713243678e-06,
      "loss": 0.0081,
      "step": 2606460
    },
    {
      "epoch": 4.26556168705773,
      "grad_norm": 0.35744374990463257,
      "learning_rate": 1.4785553791108507e-06,
      "loss": 0.0124,
      "step": 2606480
    },
    {
      "epoch": 4.265594417496383,
      "grad_norm": 0.03344961628317833,
      "learning_rate": 1.4784894868973335e-06,
      "loss": 0.0069,
      "step": 2606500
    },
    {
      "epoch": 4.265627147935037,
      "grad_norm": 0.20292280614376068,
      "learning_rate": 1.4784235946838162e-06,
      "loss": 0.0119,
      "step": 2606520
    },
    {
      "epoch": 4.26565987837369,
      "grad_norm": 0.33068278431892395,
      "learning_rate": 1.4783577024702991e-06,
      "loss": 0.0098,
      "step": 2606540
    },
    {
      "epoch": 4.265692608812343,
      "grad_norm": 0.15184424817562103,
      "learning_rate": 1.4782918102567819e-06,
      "loss": 0.0105,
      "step": 2606560
    },
    {
      "epoch": 4.265725339250997,
      "grad_norm": 0.2795689105987549,
      "learning_rate": 1.4782259180432648e-06,
      "loss": 0.0087,
      "step": 2606580
    },
    {
      "epoch": 4.2657580696896495,
      "grad_norm": 0.125715970993042,
      "learning_rate": 1.478160025829748e-06,
      "loss": 0.012,
      "step": 2606600
    },
    {
      "epoch": 4.265790800128303,
      "grad_norm": 0.19343777000904083,
      "learning_rate": 1.4780941336162307e-06,
      "loss": 0.0072,
      "step": 2606620
    },
    {
      "epoch": 4.265823530566957,
      "grad_norm": 0.23265397548675537,
      "learning_rate": 1.4780282414027137e-06,
      "loss": 0.0111,
      "step": 2606640
    },
    {
      "epoch": 4.26585626100561,
      "grad_norm": 0.2788643538951874,
      "learning_rate": 1.4779623491891964e-06,
      "loss": 0.0097,
      "step": 2606660
    },
    {
      "epoch": 4.265888991444263,
      "grad_norm": 0.3743145763874054,
      "learning_rate": 1.4778964569756794e-06,
      "loss": 0.0067,
      "step": 2606680
    },
    {
      "epoch": 4.265921721882917,
      "grad_norm": 0.1331159621477127,
      "learning_rate": 1.4778305647621621e-06,
      "loss": 0.0101,
      "step": 2606700
    },
    {
      "epoch": 4.26595445232157,
      "grad_norm": 0.36119940876960754,
      "learning_rate": 1.477764672548645e-06,
      "loss": 0.0079,
      "step": 2606720
    },
    {
      "epoch": 4.265987182760224,
      "grad_norm": 0.1955288201570511,
      "learning_rate": 1.4776987803351278e-06,
      "loss": 0.0069,
      "step": 2606740
    },
    {
      "epoch": 4.2660199131988765,
      "grad_norm": 0.21989870071411133,
      "learning_rate": 1.4776328881216106e-06,
      "loss": 0.008,
      "step": 2606760
    },
    {
      "epoch": 4.26605264363753,
      "grad_norm": 0.17315168678760529,
      "learning_rate": 1.4775669959080937e-06,
      "loss": 0.0077,
      "step": 2606780
    },
    {
      "epoch": 4.266085374076184,
      "grad_norm": 0.15209290385246277,
      "learning_rate": 1.4775011036945767e-06,
      "loss": 0.0063,
      "step": 2606800
    },
    {
      "epoch": 4.266118104514836,
      "grad_norm": 0.26715001463890076,
      "learning_rate": 1.4774352114810594e-06,
      "loss": 0.0135,
      "step": 2606820
    },
    {
      "epoch": 4.26615083495349,
      "grad_norm": 0.13191907107830048,
      "learning_rate": 1.4773693192675424e-06,
      "loss": 0.0093,
      "step": 2606840
    },
    {
      "epoch": 4.266183565392144,
      "grad_norm": 0.32738128304481506,
      "learning_rate": 1.477303427054025e-06,
      "loss": 0.0081,
      "step": 2606860
    },
    {
      "epoch": 4.266216295830796,
      "grad_norm": 0.7752569913864136,
      "learning_rate": 1.477237534840508e-06,
      "loss": 0.0131,
      "step": 2606880
    },
    {
      "epoch": 4.26624902626945,
      "grad_norm": 0.14490129053592682,
      "learning_rate": 1.4771716426269908e-06,
      "loss": 0.0051,
      "step": 2606900
    },
    {
      "epoch": 4.2662817567081035,
      "grad_norm": 0.1637343168258667,
      "learning_rate": 1.4771057504134737e-06,
      "loss": 0.0075,
      "step": 2606920
    },
    {
      "epoch": 4.266314487146757,
      "grad_norm": 0.11404173076152802,
      "learning_rate": 1.4770398581999567e-06,
      "loss": 0.0082,
      "step": 2606940
    },
    {
      "epoch": 4.26634721758541,
      "grad_norm": 0.19009815156459808,
      "learning_rate": 1.4769739659864396e-06,
      "loss": 0.0104,
      "step": 2606960
    },
    {
      "epoch": 4.266379948024063,
      "grad_norm": 0.2620452642440796,
      "learning_rate": 1.4769080737729224e-06,
      "loss": 0.0086,
      "step": 2606980
    },
    {
      "epoch": 4.266412678462717,
      "grad_norm": 0.47028928995132446,
      "learning_rate": 1.4768421815594053e-06,
      "loss": 0.0093,
      "step": 2607000
    },
    {
      "epoch": 4.26644540890137,
      "grad_norm": 0.16682785749435425,
      "learning_rate": 1.476776289345888e-06,
      "loss": 0.0071,
      "step": 2607020
    },
    {
      "epoch": 4.266478139340023,
      "grad_norm": 0.3818802535533905,
      "learning_rate": 1.476710397132371e-06,
      "loss": 0.0108,
      "step": 2607040
    },
    {
      "epoch": 4.266510869778677,
      "grad_norm": 0.2037438303232193,
      "learning_rate": 1.4766445049188538e-06,
      "loss": 0.0097,
      "step": 2607060
    },
    {
      "epoch": 4.2665436002173305,
      "grad_norm": 0.3211073875427246,
      "learning_rate": 1.4765786127053367e-06,
      "loss": 0.0155,
      "step": 2607080
    },
    {
      "epoch": 4.266576330655983,
      "grad_norm": 0.10819941759109497,
      "learning_rate": 1.4765127204918195e-06,
      "loss": 0.0113,
      "step": 2607100
    },
    {
      "epoch": 4.266609061094637,
      "grad_norm": 0.16575288772583008,
      "learning_rate": 1.4764468282783026e-06,
      "loss": 0.0073,
      "step": 2607120
    },
    {
      "epoch": 4.26664179153329,
      "grad_norm": 0.2762625515460968,
      "learning_rate": 1.4763809360647854e-06,
      "loss": 0.007,
      "step": 2607140
    },
    {
      "epoch": 4.266674521971943,
      "grad_norm": 0.23619157075881958,
      "learning_rate": 1.4763150438512683e-06,
      "loss": 0.0074,
      "step": 2607160
    },
    {
      "epoch": 4.266707252410597,
      "grad_norm": 0.28669488430023193,
      "learning_rate": 1.476249151637751e-06,
      "loss": 0.0086,
      "step": 2607180
    },
    {
      "epoch": 4.26673998284925,
      "grad_norm": 0.129556342959404,
      "learning_rate": 1.476183259424234e-06,
      "loss": 0.0096,
      "step": 2607200
    },
    {
      "epoch": 4.266772713287904,
      "grad_norm": 0.297154039144516,
      "learning_rate": 1.4761173672107167e-06,
      "loss": 0.0109,
      "step": 2607220
    },
    {
      "epoch": 4.266805443726557,
      "grad_norm": 0.20580460131168365,
      "learning_rate": 1.4760514749971997e-06,
      "loss": 0.0104,
      "step": 2607240
    },
    {
      "epoch": 4.26683817416521,
      "grad_norm": 0.39794522523880005,
      "learning_rate": 1.4759855827836824e-06,
      "loss": 0.0066,
      "step": 2607260
    },
    {
      "epoch": 4.266870904603864,
      "grad_norm": 0.31838560104370117,
      "learning_rate": 1.4759196905701654e-06,
      "loss": 0.0073,
      "step": 2607280
    },
    {
      "epoch": 4.2669036350425165,
      "grad_norm": 0.12974533438682556,
      "learning_rate": 1.4758537983566483e-06,
      "loss": 0.0047,
      "step": 2607300
    },
    {
      "epoch": 4.26693636548117,
      "grad_norm": 0.2828749120235443,
      "learning_rate": 1.4757879061431313e-06,
      "loss": 0.0082,
      "step": 2607320
    },
    {
      "epoch": 4.266969095919824,
      "grad_norm": 0.18453162908554077,
      "learning_rate": 1.475722013929614e-06,
      "loss": 0.0103,
      "step": 2607340
    },
    {
      "epoch": 4.267001826358477,
      "grad_norm": 0.12865091860294342,
      "learning_rate": 1.475656121716097e-06,
      "loss": 0.0082,
      "step": 2607360
    },
    {
      "epoch": 4.26703455679713,
      "grad_norm": 0.17069809138774872,
      "learning_rate": 1.4755902295025797e-06,
      "loss": 0.0105,
      "step": 2607380
    },
    {
      "epoch": 4.2670672872357835,
      "grad_norm": 0.22409343719482422,
      "learning_rate": 1.4755243372890627e-06,
      "loss": 0.0077,
      "step": 2607400
    },
    {
      "epoch": 4.267100017674437,
      "grad_norm": 0.15970765054225922,
      "learning_rate": 1.4754584450755454e-06,
      "loss": 0.0089,
      "step": 2607420
    },
    {
      "epoch": 4.26713274811309,
      "grad_norm": 0.49303874373435974,
      "learning_rate": 1.4753925528620284e-06,
      "loss": 0.0119,
      "step": 2607440
    },
    {
      "epoch": 4.267165478551743,
      "grad_norm": 0.28365087509155273,
      "learning_rate": 1.4753266606485111e-06,
      "loss": 0.0093,
      "step": 2607460
    },
    {
      "epoch": 4.267198208990397,
      "grad_norm": 0.30782055854797363,
      "learning_rate": 1.4752607684349943e-06,
      "loss": 0.0101,
      "step": 2607480
    },
    {
      "epoch": 4.267230939429051,
      "grad_norm": 0.140935480594635,
      "learning_rate": 1.475194876221477e-06,
      "loss": 0.0077,
      "step": 2607500
    },
    {
      "epoch": 4.267263669867703,
      "grad_norm": 0.5438224077224731,
      "learning_rate": 1.47512898400796e-06,
      "loss": 0.0109,
      "step": 2607520
    },
    {
      "epoch": 4.267296400306357,
      "grad_norm": 0.061599764972925186,
      "learning_rate": 1.4750630917944427e-06,
      "loss": 0.0094,
      "step": 2607540
    },
    {
      "epoch": 4.2673291307450105,
      "grad_norm": 0.16826419532299042,
      "learning_rate": 1.4749971995809257e-06,
      "loss": 0.0072,
      "step": 2607560
    },
    {
      "epoch": 4.267361861183663,
      "grad_norm": 0.11632008105516434,
      "learning_rate": 1.4749313073674084e-06,
      "loss": 0.0083,
      "step": 2607580
    },
    {
      "epoch": 4.267394591622317,
      "grad_norm": 0.08188357204198837,
      "learning_rate": 1.4748654151538913e-06,
      "loss": 0.007,
      "step": 2607600
    },
    {
      "epoch": 4.26742732206097,
      "grad_norm": 0.1963082253932953,
      "learning_rate": 1.474799522940374e-06,
      "loss": 0.0113,
      "step": 2607620
    },
    {
      "epoch": 4.267460052499624,
      "grad_norm": 0.42179352045059204,
      "learning_rate": 1.474733630726857e-06,
      "loss": 0.0069,
      "step": 2607640
    },
    {
      "epoch": 4.267492782938277,
      "grad_norm": 0.14431282877922058,
      "learning_rate": 1.4746677385133402e-06,
      "loss": 0.0097,
      "step": 2607660
    },
    {
      "epoch": 4.26752551337693,
      "grad_norm": 0.07396247237920761,
      "learning_rate": 1.474601846299823e-06,
      "loss": 0.0064,
      "step": 2607680
    },
    {
      "epoch": 4.267558243815584,
      "grad_norm": 0.1966385394334793,
      "learning_rate": 1.4745359540863059e-06,
      "loss": 0.0071,
      "step": 2607700
    },
    {
      "epoch": 4.267590974254237,
      "grad_norm": 0.24306988716125488,
      "learning_rate": 1.4744700618727886e-06,
      "loss": 0.0069,
      "step": 2607720
    },
    {
      "epoch": 4.26762370469289,
      "grad_norm": 0.05154484510421753,
      "learning_rate": 1.4744041696592714e-06,
      "loss": 0.0089,
      "step": 2607740
    },
    {
      "epoch": 4.267656435131544,
      "grad_norm": 0.14407092332839966,
      "learning_rate": 1.4743382774457543e-06,
      "loss": 0.0081,
      "step": 2607760
    },
    {
      "epoch": 4.2676891655701965,
      "grad_norm": 1.1020596027374268,
      "learning_rate": 1.474272385232237e-06,
      "loss": 0.0143,
      "step": 2607780
    },
    {
      "epoch": 4.26772189600885,
      "grad_norm": 0.3316502869129181,
      "learning_rate": 1.47420649301872e-06,
      "loss": 0.0106,
      "step": 2607800
    },
    {
      "epoch": 4.267754626447504,
      "grad_norm": 0.1599394530057907,
      "learning_rate": 1.4741406008052032e-06,
      "loss": 0.0083,
      "step": 2607820
    },
    {
      "epoch": 4.267787356886157,
      "grad_norm": 0.10400810092687607,
      "learning_rate": 1.474074708591686e-06,
      "loss": 0.0063,
      "step": 2607840
    },
    {
      "epoch": 4.26782008732481,
      "grad_norm": 1.082684874534607,
      "learning_rate": 1.4740088163781689e-06,
      "loss": 0.0095,
      "step": 2607860
    },
    {
      "epoch": 4.267852817763464,
      "grad_norm": 0.20931987464427948,
      "learning_rate": 1.4739429241646516e-06,
      "loss": 0.0093,
      "step": 2607880
    },
    {
      "epoch": 4.267885548202117,
      "grad_norm": 0.14713317155838013,
      "learning_rate": 1.4738770319511346e-06,
      "loss": 0.0113,
      "step": 2607900
    },
    {
      "epoch": 4.267918278640771,
      "grad_norm": 0.265746533870697,
      "learning_rate": 1.4738111397376173e-06,
      "loss": 0.0091,
      "step": 2607920
    },
    {
      "epoch": 4.2679510090794235,
      "grad_norm": 0.3002847135066986,
      "learning_rate": 1.4737452475241e-06,
      "loss": 0.0141,
      "step": 2607940
    },
    {
      "epoch": 4.267983739518077,
      "grad_norm": 0.34475207328796387,
      "learning_rate": 1.473679355310583e-06,
      "loss": 0.0079,
      "step": 2607960
    },
    {
      "epoch": 4.268016469956731,
      "grad_norm": 0.07639706879854202,
      "learning_rate": 1.4736134630970657e-06,
      "loss": 0.0092,
      "step": 2607980
    },
    {
      "epoch": 4.268049200395383,
      "grad_norm": 0.18878844380378723,
      "learning_rate": 1.4735475708835489e-06,
      "loss": 0.0067,
      "step": 2608000
    },
    {
      "epoch": 4.268081930834037,
      "grad_norm": 0.10209067910909653,
      "learning_rate": 1.4734816786700318e-06,
      "loss": 0.0099,
      "step": 2608020
    },
    {
      "epoch": 4.268114661272691,
      "grad_norm": 0.28843170404434204,
      "learning_rate": 1.4734157864565146e-06,
      "loss": 0.0087,
      "step": 2608040
    },
    {
      "epoch": 4.268147391711343,
      "grad_norm": 0.6790910959243774,
      "learning_rate": 1.4733498942429975e-06,
      "loss": 0.01,
      "step": 2608060
    },
    {
      "epoch": 4.268180122149997,
      "grad_norm": 0.1848931759595871,
      "learning_rate": 1.4732840020294803e-06,
      "loss": 0.015,
      "step": 2608080
    },
    {
      "epoch": 4.2682128525886505,
      "grad_norm": 0.09120865166187286,
      "learning_rate": 1.4732181098159632e-06,
      "loss": 0.0078,
      "step": 2608100
    },
    {
      "epoch": 4.268245583027304,
      "grad_norm": 0.06241372227668762,
      "learning_rate": 1.473152217602446e-06,
      "loss": 0.0109,
      "step": 2608120
    },
    {
      "epoch": 4.268278313465957,
      "grad_norm": 0.1407051980495453,
      "learning_rate": 1.473086325388929e-06,
      "loss": 0.0056,
      "step": 2608140
    },
    {
      "epoch": 4.26831104390461,
      "grad_norm": 0.19177821278572083,
      "learning_rate": 1.4730204331754117e-06,
      "loss": 0.0088,
      "step": 2608160
    },
    {
      "epoch": 4.268343774343264,
      "grad_norm": 0.21236126124858856,
      "learning_rate": 1.4729545409618948e-06,
      "loss": 0.0093,
      "step": 2608180
    },
    {
      "epoch": 4.2683765047819175,
      "grad_norm": 0.17977377772331238,
      "learning_rate": 1.4728886487483776e-06,
      "loss": 0.0073,
      "step": 2608200
    },
    {
      "epoch": 4.26840923522057,
      "grad_norm": 0.09972655773162842,
      "learning_rate": 1.4728227565348605e-06,
      "loss": 0.0086,
      "step": 2608220
    },
    {
      "epoch": 4.268441965659224,
      "grad_norm": 0.18747837841510773,
      "learning_rate": 1.4727568643213433e-06,
      "loss": 0.0067,
      "step": 2608240
    },
    {
      "epoch": 4.268474696097877,
      "grad_norm": 0.0642366036772728,
      "learning_rate": 1.4726909721078262e-06,
      "loss": 0.0051,
      "step": 2608260
    },
    {
      "epoch": 4.26850742653653,
      "grad_norm": 0.32248035073280334,
      "learning_rate": 1.472625079894309e-06,
      "loss": 0.0117,
      "step": 2608280
    },
    {
      "epoch": 4.268540156975184,
      "grad_norm": 0.23225921392440796,
      "learning_rate": 1.4725591876807919e-06,
      "loss": 0.0155,
      "step": 2608300
    },
    {
      "epoch": 4.268572887413837,
      "grad_norm": 0.5566606521606445,
      "learning_rate": 1.4724932954672746e-06,
      "loss": 0.0103,
      "step": 2608320
    },
    {
      "epoch": 4.26860561785249,
      "grad_norm": 0.15814414620399475,
      "learning_rate": 1.4724274032537576e-06,
      "loss": 0.0094,
      "step": 2608340
    },
    {
      "epoch": 4.268638348291144,
      "grad_norm": 0.13329890370368958,
      "learning_rate": 1.4723615110402405e-06,
      "loss": 0.0077,
      "step": 2608360
    },
    {
      "epoch": 4.268671078729797,
      "grad_norm": 0.28562307357788086,
      "learning_rate": 1.4722956188267235e-06,
      "loss": 0.0085,
      "step": 2608380
    },
    {
      "epoch": 4.268703809168451,
      "grad_norm": 0.21448969841003418,
      "learning_rate": 1.4722297266132062e-06,
      "loss": 0.0069,
      "step": 2608400
    },
    {
      "epoch": 4.2687365396071035,
      "grad_norm": 0.0897359624505043,
      "learning_rate": 1.4721638343996892e-06,
      "loss": 0.0116,
      "step": 2608420
    },
    {
      "epoch": 4.268769270045757,
      "grad_norm": 0.6269552707672119,
      "learning_rate": 1.472097942186172e-06,
      "loss": 0.0135,
      "step": 2608440
    },
    {
      "epoch": 4.268802000484411,
      "grad_norm": 0.3757430613040924,
      "learning_rate": 1.4720320499726549e-06,
      "loss": 0.0113,
      "step": 2608460
    },
    {
      "epoch": 4.268834730923063,
      "grad_norm": 0.27882152795791626,
      "learning_rate": 1.4719661577591376e-06,
      "loss": 0.0049,
      "step": 2608480
    },
    {
      "epoch": 4.268867461361717,
      "grad_norm": 0.20657755434513092,
      "learning_rate": 1.4719002655456206e-06,
      "loss": 0.0076,
      "step": 2608500
    },
    {
      "epoch": 4.268900191800371,
      "grad_norm": 0.3764539957046509,
      "learning_rate": 1.4718343733321033e-06,
      "loss": 0.0112,
      "step": 2608520
    },
    {
      "epoch": 4.268932922239024,
      "grad_norm": 0.1622070074081421,
      "learning_rate": 1.4717684811185865e-06,
      "loss": 0.0143,
      "step": 2608540
    },
    {
      "epoch": 4.268965652677677,
      "grad_norm": 0.053918756544589996,
      "learning_rate": 1.4717025889050692e-06,
      "loss": 0.0053,
      "step": 2608560
    },
    {
      "epoch": 4.2689983831163305,
      "grad_norm": 0.046792153269052505,
      "learning_rate": 1.4716366966915522e-06,
      "loss": 0.0093,
      "step": 2608580
    },
    {
      "epoch": 4.269031113554984,
      "grad_norm": 0.1132749542593956,
      "learning_rate": 1.471570804478035e-06,
      "loss": 0.0104,
      "step": 2608600
    },
    {
      "epoch": 4.269063843993637,
      "grad_norm": 0.1309986263513565,
      "learning_rate": 1.4715049122645178e-06,
      "loss": 0.0069,
      "step": 2608620
    },
    {
      "epoch": 4.26909657443229,
      "grad_norm": 1.174574613571167,
      "learning_rate": 1.4714390200510006e-06,
      "loss": 0.0087,
      "step": 2608640
    },
    {
      "epoch": 4.269129304870944,
      "grad_norm": 0.22612757980823517,
      "learning_rate": 1.4713731278374835e-06,
      "loss": 0.008,
      "step": 2608660
    },
    {
      "epoch": 4.269162035309598,
      "grad_norm": 0.29910239577293396,
      "learning_rate": 1.4713072356239663e-06,
      "loss": 0.012,
      "step": 2608680
    },
    {
      "epoch": 4.26919476574825,
      "grad_norm": 0.23582372069358826,
      "learning_rate": 1.4712413434104494e-06,
      "loss": 0.011,
      "step": 2608700
    },
    {
      "epoch": 4.269227496186904,
      "grad_norm": 0.141267329454422,
      "learning_rate": 1.4711754511969322e-06,
      "loss": 0.0125,
      "step": 2608720
    },
    {
      "epoch": 4.2692602266255575,
      "grad_norm": 0.53107750415802,
      "learning_rate": 1.4711095589834151e-06,
      "loss": 0.0069,
      "step": 2608740
    },
    {
      "epoch": 4.26929295706421,
      "grad_norm": 0.08775622397661209,
      "learning_rate": 1.4710436667698979e-06,
      "loss": 0.006,
      "step": 2608760
    },
    {
      "epoch": 4.269325687502864,
      "grad_norm": 0.5147798657417297,
      "learning_rate": 1.4709777745563808e-06,
      "loss": 0.0099,
      "step": 2608780
    },
    {
      "epoch": 4.269358417941517,
      "grad_norm": 0.20260080695152283,
      "learning_rate": 1.4709118823428636e-06,
      "loss": 0.0082,
      "step": 2608800
    },
    {
      "epoch": 4.269391148380171,
      "grad_norm": 0.3236468434333801,
      "learning_rate": 1.4708459901293465e-06,
      "loss": 0.008,
      "step": 2608820
    },
    {
      "epoch": 4.269423878818824,
      "grad_norm": 0.15842664241790771,
      "learning_rate": 1.4707800979158293e-06,
      "loss": 0.0078,
      "step": 2608840
    },
    {
      "epoch": 4.269456609257477,
      "grad_norm": 0.38122424483299255,
      "learning_rate": 1.4707142057023122e-06,
      "loss": 0.0108,
      "step": 2608860
    },
    {
      "epoch": 4.269489339696131,
      "grad_norm": 0.4486795961856842,
      "learning_rate": 1.4706483134887954e-06,
      "loss": 0.0105,
      "step": 2608880
    },
    {
      "epoch": 4.269522070134784,
      "grad_norm": 0.26056939363479614,
      "learning_rate": 1.4705824212752781e-06,
      "loss": 0.0083,
      "step": 2608900
    },
    {
      "epoch": 4.269554800573437,
      "grad_norm": 0.24741488695144653,
      "learning_rate": 1.470516529061761e-06,
      "loss": 0.0071,
      "step": 2608920
    },
    {
      "epoch": 4.269587531012091,
      "grad_norm": 0.24508880078792572,
      "learning_rate": 1.4704506368482438e-06,
      "loss": 0.0123,
      "step": 2608940
    },
    {
      "epoch": 4.269620261450744,
      "grad_norm": 0.24464736878871918,
      "learning_rate": 1.4703847446347265e-06,
      "loss": 0.0095,
      "step": 2608960
    },
    {
      "epoch": 4.269652991889397,
      "grad_norm": 0.2250148504972458,
      "learning_rate": 1.4703188524212095e-06,
      "loss": 0.0147,
      "step": 2608980
    },
    {
      "epoch": 4.269685722328051,
      "grad_norm": 0.2467096596956253,
      "learning_rate": 1.4702529602076922e-06,
      "loss": 0.0085,
      "step": 2609000
    },
    {
      "epoch": 4.269718452766704,
      "grad_norm": 0.19522809982299805,
      "learning_rate": 1.4701870679941752e-06,
      "loss": 0.0142,
      "step": 2609020
    },
    {
      "epoch": 4.269751183205357,
      "grad_norm": 0.3861441910266876,
      "learning_rate": 1.470121175780658e-06,
      "loss": 0.0062,
      "step": 2609040
    },
    {
      "epoch": 4.2697839136440106,
      "grad_norm": 0.5142433643341064,
      "learning_rate": 1.470055283567141e-06,
      "loss": 0.0099,
      "step": 2609060
    },
    {
      "epoch": 4.269816644082664,
      "grad_norm": 0.1477338671684265,
      "learning_rate": 1.469989391353624e-06,
      "loss": 0.0069,
      "step": 2609080
    },
    {
      "epoch": 4.269849374521318,
      "grad_norm": 0.605373203754425,
      "learning_rate": 1.4699234991401068e-06,
      "loss": 0.0087,
      "step": 2609100
    },
    {
      "epoch": 4.2698821049599704,
      "grad_norm": 0.1804630607366562,
      "learning_rate": 1.4698576069265897e-06,
      "loss": 0.0102,
      "step": 2609120
    },
    {
      "epoch": 4.269914835398624,
      "grad_norm": 0.47304660081863403,
      "learning_rate": 1.4697917147130725e-06,
      "loss": 0.0144,
      "step": 2609140
    },
    {
      "epoch": 4.269947565837278,
      "grad_norm": 0.319148451089859,
      "learning_rate": 1.4697258224995552e-06,
      "loss": 0.009,
      "step": 2609160
    },
    {
      "epoch": 4.26998029627593,
      "grad_norm": 0.13008911907672882,
      "learning_rate": 1.4696599302860382e-06,
      "loss": 0.0096,
      "step": 2609180
    },
    {
      "epoch": 4.270013026714584,
      "grad_norm": 0.1996401995420456,
      "learning_rate": 1.469594038072521e-06,
      "loss": 0.0136,
      "step": 2609200
    },
    {
      "epoch": 4.2700457571532375,
      "grad_norm": 0.21204479038715363,
      "learning_rate": 1.4695281458590039e-06,
      "loss": 0.009,
      "step": 2609220
    },
    {
      "epoch": 4.27007848759189,
      "grad_norm": 0.2492552250623703,
      "learning_rate": 1.469462253645487e-06,
      "loss": 0.0084,
      "step": 2609240
    },
    {
      "epoch": 4.270111218030544,
      "grad_norm": 0.34302157163619995,
      "learning_rate": 1.4693963614319698e-06,
      "loss": 0.0079,
      "step": 2609260
    },
    {
      "epoch": 4.270143948469197,
      "grad_norm": 0.22454801201820374,
      "learning_rate": 1.4693304692184527e-06,
      "loss": 0.0067,
      "step": 2609280
    },
    {
      "epoch": 4.270176678907851,
      "grad_norm": 1.2321434020996094,
      "learning_rate": 1.4692645770049354e-06,
      "loss": 0.0104,
      "step": 2609300
    },
    {
      "epoch": 4.270209409346504,
      "grad_norm": 0.09008380025625229,
      "learning_rate": 1.4691986847914184e-06,
      "loss": 0.0073,
      "step": 2609320
    },
    {
      "epoch": 4.270242139785157,
      "grad_norm": 0.5812004208564758,
      "learning_rate": 1.4691327925779011e-06,
      "loss": 0.0084,
      "step": 2609340
    },
    {
      "epoch": 4.270274870223811,
      "grad_norm": 0.30188167095184326,
      "learning_rate": 1.469066900364384e-06,
      "loss": 0.0062,
      "step": 2609360
    },
    {
      "epoch": 4.2703076006624645,
      "grad_norm": 0.23471379280090332,
      "learning_rate": 1.4690010081508668e-06,
      "loss": 0.0107,
      "step": 2609380
    },
    {
      "epoch": 4.270340331101117,
      "grad_norm": 0.09213593602180481,
      "learning_rate": 1.4689351159373496e-06,
      "loss": 0.0085,
      "step": 2609400
    },
    {
      "epoch": 4.270373061539771,
      "grad_norm": 0.35225921869277954,
      "learning_rate": 1.4688692237238327e-06,
      "loss": 0.0063,
      "step": 2609420
    },
    {
      "epoch": 4.270405791978424,
      "grad_norm": 0.18988993763923645,
      "learning_rate": 1.4688033315103157e-06,
      "loss": 0.0078,
      "step": 2609440
    },
    {
      "epoch": 4.270438522417077,
      "grad_norm": 0.27474457025527954,
      "learning_rate": 1.4687374392967984e-06,
      "loss": 0.0092,
      "step": 2609460
    },
    {
      "epoch": 4.270471252855731,
      "grad_norm": 0.2680261731147766,
      "learning_rate": 1.4686715470832814e-06,
      "loss": 0.0087,
      "step": 2609480
    },
    {
      "epoch": 4.270503983294384,
      "grad_norm": 0.2264888435602188,
      "learning_rate": 1.4686056548697641e-06,
      "loss": 0.0075,
      "step": 2609500
    },
    {
      "epoch": 4.270536713733037,
      "grad_norm": 0.16073939204216003,
      "learning_rate": 1.468539762656247e-06,
      "loss": 0.0083,
      "step": 2609520
    },
    {
      "epoch": 4.270569444171691,
      "grad_norm": 0.445757120847702,
      "learning_rate": 1.4684738704427298e-06,
      "loss": 0.0131,
      "step": 2609540
    },
    {
      "epoch": 4.270602174610344,
      "grad_norm": 0.08543327450752258,
      "learning_rate": 1.4684079782292128e-06,
      "loss": 0.0088,
      "step": 2609560
    },
    {
      "epoch": 4.270634905048998,
      "grad_norm": 0.08397193253040314,
      "learning_rate": 1.4683420860156957e-06,
      "loss": 0.0105,
      "step": 2609580
    },
    {
      "epoch": 4.2706676354876505,
      "grad_norm": 0.16676294803619385,
      "learning_rate": 1.4682761938021787e-06,
      "loss": 0.0069,
      "step": 2609600
    },
    {
      "epoch": 4.270700365926304,
      "grad_norm": 0.24495986104011536,
      "learning_rate": 1.4682103015886614e-06,
      "loss": 0.011,
      "step": 2609620
    },
    {
      "epoch": 4.270733096364958,
      "grad_norm": 0.8399665951728821,
      "learning_rate": 1.4681444093751443e-06,
      "loss": 0.0096,
      "step": 2609640
    },
    {
      "epoch": 4.270765826803611,
      "grad_norm": 0.4409242272377014,
      "learning_rate": 1.468078517161627e-06,
      "loss": 0.0097,
      "step": 2609660
    },
    {
      "epoch": 4.270798557242264,
      "grad_norm": 0.11972390860319138,
      "learning_rate": 1.46801262494811e-06,
      "loss": 0.0114,
      "step": 2609680
    },
    {
      "epoch": 4.270831287680918,
      "grad_norm": 0.08618192374706268,
      "learning_rate": 1.4679467327345928e-06,
      "loss": 0.0064,
      "step": 2609700
    },
    {
      "epoch": 4.270864018119571,
      "grad_norm": 0.09566039592027664,
      "learning_rate": 1.4678808405210757e-06,
      "loss": 0.0133,
      "step": 2609720
    },
    {
      "epoch": 4.270896748558224,
      "grad_norm": 0.12303692102432251,
      "learning_rate": 1.4678149483075585e-06,
      "loss": 0.0136,
      "step": 2609740
    },
    {
      "epoch": 4.2709294789968775,
      "grad_norm": 0.14362549781799316,
      "learning_rate": 1.4677490560940416e-06,
      "loss": 0.0088,
      "step": 2609760
    },
    {
      "epoch": 4.270962209435531,
      "grad_norm": 0.19943299889564514,
      "learning_rate": 1.4676831638805244e-06,
      "loss": 0.0057,
      "step": 2609780
    },
    {
      "epoch": 4.270994939874184,
      "grad_norm": 0.04340583086013794,
      "learning_rate": 1.4676172716670073e-06,
      "loss": 0.0089,
      "step": 2609800
    },
    {
      "epoch": 4.271027670312837,
      "grad_norm": 0.17220403254032135,
      "learning_rate": 1.46755137945349e-06,
      "loss": 0.0092,
      "step": 2609820
    },
    {
      "epoch": 4.271060400751491,
      "grad_norm": 0.20101870596408844,
      "learning_rate": 1.467485487239973e-06,
      "loss": 0.0085,
      "step": 2609840
    },
    {
      "epoch": 4.2710931311901446,
      "grad_norm": 0.14064612984657288,
      "learning_rate": 1.4674195950264558e-06,
      "loss": 0.0079,
      "step": 2609860
    },
    {
      "epoch": 4.271125861628797,
      "grad_norm": 0.19433358311653137,
      "learning_rate": 1.4673537028129387e-06,
      "loss": 0.0065,
      "step": 2609880
    },
    {
      "epoch": 4.271158592067451,
      "grad_norm": 0.16019713878631592,
      "learning_rate": 1.4672878105994214e-06,
      "loss": 0.0118,
      "step": 2609900
    },
    {
      "epoch": 4.2711913225061044,
      "grad_norm": 0.30389344692230225,
      "learning_rate": 1.4672219183859044e-06,
      "loss": 0.0066,
      "step": 2609920
    },
    {
      "epoch": 4.271224052944757,
      "grad_norm": 0.10317397117614746,
      "learning_rate": 1.4671560261723874e-06,
      "loss": 0.0092,
      "step": 2609940
    },
    {
      "epoch": 4.271256783383411,
      "grad_norm": 0.15002116560935974,
      "learning_rate": 1.4670901339588703e-06,
      "loss": 0.0079,
      "step": 2609960
    },
    {
      "epoch": 4.271289513822064,
      "grad_norm": 0.6806030869483948,
      "learning_rate": 1.467024241745353e-06,
      "loss": 0.0087,
      "step": 2609980
    },
    {
      "epoch": 4.271322244260718,
      "grad_norm": 0.04037733003497124,
      "learning_rate": 1.466958349531836e-06,
      "loss": 0.0111,
      "step": 2610000
    },
    {
      "epoch": 4.271354974699371,
      "grad_norm": 0.14559690654277802,
      "learning_rate": 1.4668924573183187e-06,
      "loss": 0.0106,
      "step": 2610020
    },
    {
      "epoch": 4.271387705138024,
      "grad_norm": 0.10228773206472397,
      "learning_rate": 1.4668265651048017e-06,
      "loss": 0.0068,
      "step": 2610040
    },
    {
      "epoch": 4.271420435576678,
      "grad_norm": 0.40829896926879883,
      "learning_rate": 1.4667606728912844e-06,
      "loss": 0.0116,
      "step": 2610060
    },
    {
      "epoch": 4.2714531660153305,
      "grad_norm": 0.10092774778604507,
      "learning_rate": 1.4666947806777674e-06,
      "loss": 0.006,
      "step": 2610080
    },
    {
      "epoch": 4.271485896453984,
      "grad_norm": 0.1024036705493927,
      "learning_rate": 1.4666288884642501e-06,
      "loss": 0.0118,
      "step": 2610100
    },
    {
      "epoch": 4.271518626892638,
      "grad_norm": 0.09015796333551407,
      "learning_rate": 1.4665629962507333e-06,
      "loss": 0.007,
      "step": 2610120
    },
    {
      "epoch": 4.271551357331291,
      "grad_norm": 0.33261892199516296,
      "learning_rate": 1.466497104037216e-06,
      "loss": 0.0066,
      "step": 2610140
    },
    {
      "epoch": 4.271584087769944,
      "grad_norm": 0.16331854462623596,
      "learning_rate": 1.466431211823699e-06,
      "loss": 0.01,
      "step": 2610160
    },
    {
      "epoch": 4.271616818208598,
      "grad_norm": 0.22798310220241547,
      "learning_rate": 1.4663653196101817e-06,
      "loss": 0.0084,
      "step": 2610180
    },
    {
      "epoch": 4.271649548647251,
      "grad_norm": 0.3385009467601776,
      "learning_rate": 1.4662994273966647e-06,
      "loss": 0.008,
      "step": 2610200
    },
    {
      "epoch": 4.271682279085904,
      "grad_norm": 0.22755171358585358,
      "learning_rate": 1.4662335351831474e-06,
      "loss": 0.0146,
      "step": 2610220
    },
    {
      "epoch": 4.2717150095245575,
      "grad_norm": 0.27488407492637634,
      "learning_rate": 1.4661676429696304e-06,
      "loss": 0.0081,
      "step": 2610240
    },
    {
      "epoch": 4.271747739963211,
      "grad_norm": 0.3151397109031677,
      "learning_rate": 1.466101750756113e-06,
      "loss": 0.0092,
      "step": 2610260
    },
    {
      "epoch": 4.271780470401865,
      "grad_norm": 0.40829259157180786,
      "learning_rate": 1.4660358585425963e-06,
      "loss": 0.0105,
      "step": 2610280
    },
    {
      "epoch": 4.271813200840517,
      "grad_norm": 0.10843690484762192,
      "learning_rate": 1.4659699663290792e-06,
      "loss": 0.0088,
      "step": 2610300
    },
    {
      "epoch": 4.271845931279171,
      "grad_norm": 0.21668967604637146,
      "learning_rate": 1.465904074115562e-06,
      "loss": 0.0069,
      "step": 2610320
    },
    {
      "epoch": 4.271878661717825,
      "grad_norm": 0.10099266469478607,
      "learning_rate": 1.465838181902045e-06,
      "loss": 0.0098,
      "step": 2610340
    },
    {
      "epoch": 4.271911392156477,
      "grad_norm": 0.13784490525722504,
      "learning_rate": 1.4657722896885276e-06,
      "loss": 0.0083,
      "step": 2610360
    },
    {
      "epoch": 4.271944122595131,
      "grad_norm": 0.05973856523633003,
      "learning_rate": 1.4657063974750104e-06,
      "loss": 0.0108,
      "step": 2610380
    },
    {
      "epoch": 4.2719768530337845,
      "grad_norm": 0.1873743087053299,
      "learning_rate": 1.4656405052614933e-06,
      "loss": 0.0099,
      "step": 2610400
    },
    {
      "epoch": 4.272009583472438,
      "grad_norm": 0.370509535074234,
      "learning_rate": 1.465574613047976e-06,
      "loss": 0.0104,
      "step": 2610420
    },
    {
      "epoch": 4.272042313911091,
      "grad_norm": 0.36155325174331665,
      "learning_rate": 1.465508720834459e-06,
      "loss": 0.0095,
      "step": 2610440
    },
    {
      "epoch": 4.272075044349744,
      "grad_norm": 0.29532262682914734,
      "learning_rate": 1.4654428286209422e-06,
      "loss": 0.0089,
      "step": 2610460
    },
    {
      "epoch": 4.272107774788398,
      "grad_norm": 0.18289139866828918,
      "learning_rate": 1.465376936407425e-06,
      "loss": 0.0103,
      "step": 2610480
    },
    {
      "epoch": 4.272140505227051,
      "grad_norm": 0.30939605832099915,
      "learning_rate": 1.4653110441939079e-06,
      "loss": 0.0092,
      "step": 2610500
    },
    {
      "epoch": 4.272173235665704,
      "grad_norm": 0.4647730886936188,
      "learning_rate": 1.4652451519803906e-06,
      "loss": 0.0066,
      "step": 2610520
    },
    {
      "epoch": 4.272205966104358,
      "grad_norm": 0.40598535537719727,
      "learning_rate": 1.4651792597668736e-06,
      "loss": 0.0082,
      "step": 2610540
    },
    {
      "epoch": 4.2722386965430115,
      "grad_norm": 0.42253103852272034,
      "learning_rate": 1.4651133675533563e-06,
      "loss": 0.0069,
      "step": 2610560
    },
    {
      "epoch": 4.272271426981664,
      "grad_norm": 0.209339901804924,
      "learning_rate": 1.4650474753398393e-06,
      "loss": 0.0079,
      "step": 2610580
    },
    {
      "epoch": 4.272304157420318,
      "grad_norm": 0.1394234299659729,
      "learning_rate": 1.464981583126322e-06,
      "loss": 0.0115,
      "step": 2610600
    },
    {
      "epoch": 4.272336887858971,
      "grad_norm": 0.31550583243370056,
      "learning_rate": 1.4649156909128047e-06,
      "loss": 0.0106,
      "step": 2610620
    },
    {
      "epoch": 4.272369618297624,
      "grad_norm": 0.40344828367233276,
      "learning_rate": 1.464849798699288e-06,
      "loss": 0.0091,
      "step": 2610640
    },
    {
      "epoch": 4.272402348736278,
      "grad_norm": 0.21229279041290283,
      "learning_rate": 1.4647839064857709e-06,
      "loss": 0.0089,
      "step": 2610660
    },
    {
      "epoch": 4.272435079174931,
      "grad_norm": 0.06205571070313454,
      "learning_rate": 1.4647180142722536e-06,
      "loss": 0.0067,
      "step": 2610680
    },
    {
      "epoch": 4.272467809613585,
      "grad_norm": 0.06986464560031891,
      "learning_rate": 1.4646521220587365e-06,
      "loss": 0.01,
      "step": 2610700
    },
    {
      "epoch": 4.272500540052238,
      "grad_norm": 0.1457417607307434,
      "learning_rate": 1.4645862298452193e-06,
      "loss": 0.0053,
      "step": 2610720
    },
    {
      "epoch": 4.272533270490891,
      "grad_norm": 0.16102737188339233,
      "learning_rate": 1.4645203376317022e-06,
      "loss": 0.013,
      "step": 2610740
    },
    {
      "epoch": 4.272566000929545,
      "grad_norm": 0.20287685096263885,
      "learning_rate": 1.464454445418185e-06,
      "loss": 0.0075,
      "step": 2610760
    },
    {
      "epoch": 4.2725987313681975,
      "grad_norm": 0.4109516739845276,
      "learning_rate": 1.464388553204668e-06,
      "loss": 0.0083,
      "step": 2610780
    },
    {
      "epoch": 4.272631461806851,
      "grad_norm": 0.1714663803577423,
      "learning_rate": 1.4643226609911507e-06,
      "loss": 0.0076,
      "step": 2610800
    },
    {
      "epoch": 4.272664192245505,
      "grad_norm": 0.18396885693073273,
      "learning_rate": 1.4642567687776338e-06,
      "loss": 0.0091,
      "step": 2610820
    },
    {
      "epoch": 4.272696922684158,
      "grad_norm": 0.10814658552408218,
      "learning_rate": 1.4641908765641166e-06,
      "loss": 0.0075,
      "step": 2610840
    },
    {
      "epoch": 4.272729653122811,
      "grad_norm": 0.3240831792354584,
      "learning_rate": 1.4641249843505995e-06,
      "loss": 0.0126,
      "step": 2610860
    },
    {
      "epoch": 4.2727623835614645,
      "grad_norm": 0.36520686745643616,
      "learning_rate": 1.4640590921370823e-06,
      "loss": 0.0073,
      "step": 2610880
    },
    {
      "epoch": 4.272795114000118,
      "grad_norm": 0.33265453577041626,
      "learning_rate": 1.4639931999235652e-06,
      "loss": 0.01,
      "step": 2610900
    },
    {
      "epoch": 4.272827844438771,
      "grad_norm": 0.22664512693881989,
      "learning_rate": 1.463927307710048e-06,
      "loss": 0.014,
      "step": 2610920
    },
    {
      "epoch": 4.272860574877424,
      "grad_norm": 0.21745260059833527,
      "learning_rate": 1.463861415496531e-06,
      "loss": 0.0084,
      "step": 2610940
    },
    {
      "epoch": 4.272893305316078,
      "grad_norm": 0.3268372118473053,
      "learning_rate": 1.4637955232830136e-06,
      "loss": 0.0091,
      "step": 2610960
    },
    {
      "epoch": 4.272926035754731,
      "grad_norm": 0.12203221023082733,
      "learning_rate": 1.4637296310694966e-06,
      "loss": 0.0074,
      "step": 2610980
    },
    {
      "epoch": 4.272958766193384,
      "grad_norm": 0.4221394658088684,
      "learning_rate": 1.4636637388559795e-06,
      "loss": 0.0087,
      "step": 2611000
    },
    {
      "epoch": 4.272991496632038,
      "grad_norm": 0.07837715744972229,
      "learning_rate": 1.4635978466424625e-06,
      "loss": 0.0081,
      "step": 2611020
    },
    {
      "epoch": 4.2730242270706915,
      "grad_norm": 0.23800191283226013,
      "learning_rate": 1.4635319544289452e-06,
      "loss": 0.0085,
      "step": 2611040
    },
    {
      "epoch": 4.273056957509344,
      "grad_norm": 0.13315601646900177,
      "learning_rate": 1.4634660622154282e-06,
      "loss": 0.0097,
      "step": 2611060
    },
    {
      "epoch": 4.273089687947998,
      "grad_norm": 0.19924375414848328,
      "learning_rate": 1.463400170001911e-06,
      "loss": 0.0101,
      "step": 2611080
    },
    {
      "epoch": 4.273122418386651,
      "grad_norm": 0.3223366141319275,
      "learning_rate": 1.4633342777883939e-06,
      "loss": 0.0088,
      "step": 2611100
    },
    {
      "epoch": 4.273155148825305,
      "grad_norm": 0.4920785427093506,
      "learning_rate": 1.4632683855748766e-06,
      "loss": 0.0097,
      "step": 2611120
    },
    {
      "epoch": 4.273187879263958,
      "grad_norm": 0.21128828823566437,
      "learning_rate": 1.4632024933613596e-06,
      "loss": 0.0136,
      "step": 2611140
    },
    {
      "epoch": 4.273220609702611,
      "grad_norm": 0.2049926519393921,
      "learning_rate": 1.4631366011478425e-06,
      "loss": 0.0054,
      "step": 2611160
    },
    {
      "epoch": 4.273253340141265,
      "grad_norm": 0.2224792093038559,
      "learning_rate": 1.4630707089343255e-06,
      "loss": 0.0065,
      "step": 2611180
    },
    {
      "epoch": 4.273286070579918,
      "grad_norm": 0.18026326596736908,
      "learning_rate": 1.4630048167208082e-06,
      "loss": 0.0119,
      "step": 2611200
    },
    {
      "epoch": 4.273318801018571,
      "grad_norm": 0.372455894947052,
      "learning_rate": 1.4629389245072912e-06,
      "loss": 0.0085,
      "step": 2611220
    },
    {
      "epoch": 4.273351531457225,
      "grad_norm": 0.4151265025138855,
      "learning_rate": 1.462873032293774e-06,
      "loss": 0.0084,
      "step": 2611240
    },
    {
      "epoch": 4.2733842618958775,
      "grad_norm": 0.12600617110729218,
      "learning_rate": 1.4628071400802569e-06,
      "loss": 0.0064,
      "step": 2611260
    },
    {
      "epoch": 4.273416992334531,
      "grad_norm": 0.21178914606571198,
      "learning_rate": 1.4627412478667396e-06,
      "loss": 0.0076,
      "step": 2611280
    },
    {
      "epoch": 4.273449722773185,
      "grad_norm": 0.37575870752334595,
      "learning_rate": 1.4626753556532225e-06,
      "loss": 0.0139,
      "step": 2611300
    },
    {
      "epoch": 4.273482453211838,
      "grad_norm": 0.3259734511375427,
      "learning_rate": 1.4626094634397053e-06,
      "loss": 0.0114,
      "step": 2611320
    },
    {
      "epoch": 4.273515183650491,
      "grad_norm": 0.39995503425598145,
      "learning_rate": 1.4625435712261885e-06,
      "loss": 0.0176,
      "step": 2611340
    },
    {
      "epoch": 4.273547914089145,
      "grad_norm": 0.13653168082237244,
      "learning_rate": 1.4624776790126712e-06,
      "loss": 0.0123,
      "step": 2611360
    },
    {
      "epoch": 4.273580644527798,
      "grad_norm": 0.05850823223590851,
      "learning_rate": 1.4624117867991541e-06,
      "loss": 0.0099,
      "step": 2611380
    },
    {
      "epoch": 4.273613374966451,
      "grad_norm": 0.15051445364952087,
      "learning_rate": 1.4623458945856369e-06,
      "loss": 0.012,
      "step": 2611400
    },
    {
      "epoch": 4.2736461054051045,
      "grad_norm": 0.19579605758190155,
      "learning_rate": 1.4622800023721198e-06,
      "loss": 0.0092,
      "step": 2611420
    },
    {
      "epoch": 4.273678835843758,
      "grad_norm": 0.11791010946035385,
      "learning_rate": 1.4622141101586026e-06,
      "loss": 0.0079,
      "step": 2611440
    },
    {
      "epoch": 4.273711566282412,
      "grad_norm": 0.0761762484908104,
      "learning_rate": 1.4621482179450855e-06,
      "loss": 0.0075,
      "step": 2611460
    },
    {
      "epoch": 4.273744296721064,
      "grad_norm": 0.2210012823343277,
      "learning_rate": 1.4620823257315683e-06,
      "loss": 0.0053,
      "step": 2611480
    },
    {
      "epoch": 4.273777027159718,
      "grad_norm": 0.21801871061325073,
      "learning_rate": 1.4620164335180512e-06,
      "loss": 0.0088,
      "step": 2611500
    },
    {
      "epoch": 4.273809757598372,
      "grad_norm": 0.22413809597492218,
      "learning_rate": 1.4619505413045344e-06,
      "loss": 0.0105,
      "step": 2611520
    },
    {
      "epoch": 4.273842488037024,
      "grad_norm": 0.18397678434848785,
      "learning_rate": 1.4618846490910171e-06,
      "loss": 0.0109,
      "step": 2611540
    },
    {
      "epoch": 4.273875218475678,
      "grad_norm": 0.30070406198501587,
      "learning_rate": 1.4618187568775e-06,
      "loss": 0.01,
      "step": 2611560
    },
    {
      "epoch": 4.2739079489143315,
      "grad_norm": 0.01587172970175743,
      "learning_rate": 1.4617528646639828e-06,
      "loss": 0.0107,
      "step": 2611580
    },
    {
      "epoch": 4.273940679352985,
      "grad_norm": 0.15697132050991058,
      "learning_rate": 1.4616869724504656e-06,
      "loss": 0.0119,
      "step": 2611600
    },
    {
      "epoch": 4.273973409791638,
      "grad_norm": 0.17540942132472992,
      "learning_rate": 1.4616210802369485e-06,
      "loss": 0.0065,
      "step": 2611620
    },
    {
      "epoch": 4.274006140230291,
      "grad_norm": 0.3709440529346466,
      "learning_rate": 1.4615551880234312e-06,
      "loss": 0.0075,
      "step": 2611640
    },
    {
      "epoch": 4.274038870668945,
      "grad_norm": 2.542112350463867,
      "learning_rate": 1.4614892958099142e-06,
      "loss": 0.0127,
      "step": 2611660
    },
    {
      "epoch": 4.274071601107598,
      "grad_norm": 0.17801572382450104,
      "learning_rate": 1.461423403596397e-06,
      "loss": 0.0066,
      "step": 2611680
    },
    {
      "epoch": 4.274104331546251,
      "grad_norm": 0.4352790415287018,
      "learning_rate": 1.46135751138288e-06,
      "loss": 0.009,
      "step": 2611700
    },
    {
      "epoch": 4.274137061984905,
      "grad_norm": 0.37720438838005066,
      "learning_rate": 1.461291619169363e-06,
      "loss": 0.0087,
      "step": 2611720
    },
    {
      "epoch": 4.274169792423558,
      "grad_norm": 0.16947273910045624,
      "learning_rate": 1.4612257269558458e-06,
      "loss": 0.0071,
      "step": 2611740
    },
    {
      "epoch": 4.274202522862211,
      "grad_norm": 0.26737526059150696,
      "learning_rate": 1.4611598347423287e-06,
      "loss": 0.0123,
      "step": 2611760
    },
    {
      "epoch": 4.274235253300865,
      "grad_norm": 0.22737643122673035,
      "learning_rate": 1.4610939425288115e-06,
      "loss": 0.0085,
      "step": 2611780
    },
    {
      "epoch": 4.274267983739518,
      "grad_norm": 0.08563204854726791,
      "learning_rate": 1.4610280503152942e-06,
      "loss": 0.0058,
      "step": 2611800
    },
    {
      "epoch": 4.274300714178171,
      "grad_norm": 0.14290329813957214,
      "learning_rate": 1.4609621581017772e-06,
      "loss": 0.0075,
      "step": 2611820
    },
    {
      "epoch": 4.274333444616825,
      "grad_norm": 0.21870717406272888,
      "learning_rate": 1.46089626588826e-06,
      "loss": 0.0076,
      "step": 2611840
    },
    {
      "epoch": 4.274366175055478,
      "grad_norm": 0.4479549527168274,
      "learning_rate": 1.4608303736747429e-06,
      "loss": 0.0063,
      "step": 2611860
    },
    {
      "epoch": 4.274398905494132,
      "grad_norm": 0.1479375809431076,
      "learning_rate": 1.460764481461226e-06,
      "loss": 0.0117,
      "step": 2611880
    },
    {
      "epoch": 4.2744316359327845,
      "grad_norm": 0.2419763058423996,
      "learning_rate": 1.4606985892477088e-06,
      "loss": 0.0165,
      "step": 2611900
    },
    {
      "epoch": 4.274464366371438,
      "grad_norm": 0.31056612730026245,
      "learning_rate": 1.4606326970341917e-06,
      "loss": 0.0148,
      "step": 2611920
    },
    {
      "epoch": 4.274497096810092,
      "grad_norm": 0.130302295088768,
      "learning_rate": 1.4605668048206745e-06,
      "loss": 0.0065,
      "step": 2611940
    },
    {
      "epoch": 4.274529827248744,
      "grad_norm": 0.09377875179052353,
      "learning_rate": 1.4605009126071574e-06,
      "loss": 0.0101,
      "step": 2611960
    },
    {
      "epoch": 4.274562557687398,
      "grad_norm": 0.1508205533027649,
      "learning_rate": 1.4604350203936401e-06,
      "loss": 0.008,
      "step": 2611980
    },
    {
      "epoch": 4.274595288126052,
      "grad_norm": 0.36541733145713806,
      "learning_rate": 1.460369128180123e-06,
      "loss": 0.0105,
      "step": 2612000
    },
    {
      "epoch": 4.274628018564705,
      "grad_norm": 0.4448338747024536,
      "learning_rate": 1.4603032359666058e-06,
      "loss": 0.0085,
      "step": 2612020
    },
    {
      "epoch": 4.274660749003358,
      "grad_norm": 0.3358120918273926,
      "learning_rate": 1.460237343753089e-06,
      "loss": 0.0075,
      "step": 2612040
    },
    {
      "epoch": 4.2746934794420115,
      "grad_norm": 0.14673610031604767,
      "learning_rate": 1.4601714515395717e-06,
      "loss": 0.0082,
      "step": 2612060
    },
    {
      "epoch": 4.274726209880665,
      "grad_norm": 0.1745055615901947,
      "learning_rate": 1.4601055593260547e-06,
      "loss": 0.0103,
      "step": 2612080
    },
    {
      "epoch": 4.274758940319318,
      "grad_norm": 0.19683553278446198,
      "learning_rate": 1.4600396671125374e-06,
      "loss": 0.0056,
      "step": 2612100
    },
    {
      "epoch": 4.274791670757971,
      "grad_norm": 0.15230359137058258,
      "learning_rate": 1.4599737748990204e-06,
      "loss": 0.0098,
      "step": 2612120
    },
    {
      "epoch": 4.274824401196625,
      "grad_norm": 0.21728135645389557,
      "learning_rate": 1.4599078826855031e-06,
      "loss": 0.0068,
      "step": 2612140
    },
    {
      "epoch": 4.274857131635279,
      "grad_norm": 0.07616138458251953,
      "learning_rate": 1.459841990471986e-06,
      "loss": 0.007,
      "step": 2612160
    },
    {
      "epoch": 4.274889862073931,
      "grad_norm": 0.4412126839160919,
      "learning_rate": 1.4597760982584688e-06,
      "loss": 0.0105,
      "step": 2612180
    },
    {
      "epoch": 4.274922592512585,
      "grad_norm": 0.20844554901123047,
      "learning_rate": 1.4597102060449518e-06,
      "loss": 0.0076,
      "step": 2612200
    },
    {
      "epoch": 4.2749553229512385,
      "grad_norm": 0.3185330927371979,
      "learning_rate": 1.4596443138314347e-06,
      "loss": 0.009,
      "step": 2612220
    },
    {
      "epoch": 4.274988053389891,
      "grad_norm": 0.13139748573303223,
      "learning_rate": 1.4595784216179177e-06,
      "loss": 0.0171,
      "step": 2612240
    },
    {
      "epoch": 4.275020783828545,
      "grad_norm": 0.07299575954675674,
      "learning_rate": 1.4595125294044004e-06,
      "loss": 0.0109,
      "step": 2612260
    },
    {
      "epoch": 4.275053514267198,
      "grad_norm": 0.067063108086586,
      "learning_rate": 1.4594466371908834e-06,
      "loss": 0.0088,
      "step": 2612280
    },
    {
      "epoch": 4.275086244705852,
      "grad_norm": 0.13253110647201538,
      "learning_rate": 1.459380744977366e-06,
      "loss": 0.0078,
      "step": 2612300
    },
    {
      "epoch": 4.275118975144505,
      "grad_norm": 0.23853150010108948,
      "learning_rate": 1.459314852763849e-06,
      "loss": 0.0099,
      "step": 2612320
    },
    {
      "epoch": 4.275151705583158,
      "grad_norm": 0.4690153896808624,
      "learning_rate": 1.4592489605503318e-06,
      "loss": 0.0103,
      "step": 2612340
    },
    {
      "epoch": 4.275184436021812,
      "grad_norm": 0.5334571003913879,
      "learning_rate": 1.4591830683368147e-06,
      "loss": 0.0088,
      "step": 2612360
    },
    {
      "epoch": 4.275217166460465,
      "grad_norm": 0.18446655571460724,
      "learning_rate": 1.4591171761232975e-06,
      "loss": 0.0069,
      "step": 2612380
    },
    {
      "epoch": 4.275249896899118,
      "grad_norm": 0.6920446157455444,
      "learning_rate": 1.4590512839097806e-06,
      "loss": 0.0122,
      "step": 2612400
    },
    {
      "epoch": 4.275282627337772,
      "grad_norm": 0.14990608394145966,
      "learning_rate": 1.4589853916962634e-06,
      "loss": 0.0071,
      "step": 2612420
    },
    {
      "epoch": 4.2753153577764245,
      "grad_norm": 0.5797408819198608,
      "learning_rate": 1.4589194994827463e-06,
      "loss": 0.0117,
      "step": 2612440
    },
    {
      "epoch": 4.275348088215078,
      "grad_norm": 0.20991337299346924,
      "learning_rate": 1.458853607269229e-06,
      "loss": 0.0079,
      "step": 2612460
    },
    {
      "epoch": 4.275380818653732,
      "grad_norm": 0.3349592089653015,
      "learning_rate": 1.458787715055712e-06,
      "loss": 0.0075,
      "step": 2612480
    },
    {
      "epoch": 4.275413549092385,
      "grad_norm": 0.14120937883853912,
      "learning_rate": 1.4587218228421948e-06,
      "loss": 0.0118,
      "step": 2612500
    },
    {
      "epoch": 4.275446279531038,
      "grad_norm": 0.2826552093029022,
      "learning_rate": 1.4586559306286777e-06,
      "loss": 0.0066,
      "step": 2612520
    },
    {
      "epoch": 4.2754790099696915,
      "grad_norm": 0.04645747318863869,
      "learning_rate": 1.4585900384151605e-06,
      "loss": 0.0061,
      "step": 2612540
    },
    {
      "epoch": 4.275511740408345,
      "grad_norm": 0.2605859339237213,
      "learning_rate": 1.4585241462016434e-06,
      "loss": 0.0088,
      "step": 2612560
    },
    {
      "epoch": 4.275544470846999,
      "grad_norm": 0.7122051119804382,
      "learning_rate": 1.4584582539881264e-06,
      "loss": 0.0107,
      "step": 2612580
    },
    {
      "epoch": 4.275577201285651,
      "grad_norm": 0.2663221061229706,
      "learning_rate": 1.4583923617746093e-06,
      "loss": 0.0102,
      "step": 2612600
    },
    {
      "epoch": 4.275609931724305,
      "grad_norm": 0.23894721269607544,
      "learning_rate": 1.458326469561092e-06,
      "loss": 0.0092,
      "step": 2612620
    },
    {
      "epoch": 4.275642662162959,
      "grad_norm": 0.1483364999294281,
      "learning_rate": 1.458260577347575e-06,
      "loss": 0.0063,
      "step": 2612640
    },
    {
      "epoch": 4.275675392601611,
      "grad_norm": 0.24577972292900085,
      "learning_rate": 1.4581946851340577e-06,
      "loss": 0.0077,
      "step": 2612660
    },
    {
      "epoch": 4.275708123040265,
      "grad_norm": 0.12324025481939316,
      "learning_rate": 1.4581287929205407e-06,
      "loss": 0.0085,
      "step": 2612680
    },
    {
      "epoch": 4.2757408534789185,
      "grad_norm": 0.32091397047042847,
      "learning_rate": 1.4580629007070234e-06,
      "loss": 0.0093,
      "step": 2612700
    },
    {
      "epoch": 4.275773583917571,
      "grad_norm": 0.12095008045434952,
      "learning_rate": 1.4579970084935064e-06,
      "loss": 0.0125,
      "step": 2612720
    },
    {
      "epoch": 4.275806314356225,
      "grad_norm": 0.5433627367019653,
      "learning_rate": 1.4579311162799891e-06,
      "loss": 0.0128,
      "step": 2612740
    },
    {
      "epoch": 4.275839044794878,
      "grad_norm": 0.391190767288208,
      "learning_rate": 1.4578652240664723e-06,
      "loss": 0.0112,
      "step": 2612760
    },
    {
      "epoch": 4.275871775233532,
      "grad_norm": 0.6729639172554016,
      "learning_rate": 1.457799331852955e-06,
      "loss": 0.0133,
      "step": 2612780
    },
    {
      "epoch": 4.275904505672185,
      "grad_norm": 0.4154665768146515,
      "learning_rate": 1.457733439639438e-06,
      "loss": 0.0085,
      "step": 2612800
    },
    {
      "epoch": 4.275937236110838,
      "grad_norm": 0.1259513944387436,
      "learning_rate": 1.4576675474259207e-06,
      "loss": 0.0102,
      "step": 2612820
    },
    {
      "epoch": 4.275969966549492,
      "grad_norm": 0.08780184388160706,
      "learning_rate": 1.4576016552124037e-06,
      "loss": 0.0079,
      "step": 2612840
    },
    {
      "epoch": 4.2760026969881455,
      "grad_norm": 0.5573586225509644,
      "learning_rate": 1.4575357629988864e-06,
      "loss": 0.0074,
      "step": 2612860
    },
    {
      "epoch": 4.276035427426798,
      "grad_norm": 0.19916124641895294,
      "learning_rate": 1.4574698707853694e-06,
      "loss": 0.0067,
      "step": 2612880
    },
    {
      "epoch": 4.276068157865452,
      "grad_norm": 0.2979084849357605,
      "learning_rate": 1.457403978571852e-06,
      "loss": 0.0113,
      "step": 2612900
    },
    {
      "epoch": 4.276100888304105,
      "grad_norm": 0.2700852155685425,
      "learning_rate": 1.4573380863583353e-06,
      "loss": 0.0091,
      "step": 2612920
    },
    {
      "epoch": 4.276133618742758,
      "grad_norm": 0.15753862261772156,
      "learning_rate": 1.4572721941448182e-06,
      "loss": 0.0146,
      "step": 2612940
    },
    {
      "epoch": 4.276166349181412,
      "grad_norm": 0.06315365433692932,
      "learning_rate": 1.457206301931301e-06,
      "loss": 0.0106,
      "step": 2612960
    },
    {
      "epoch": 4.276199079620065,
      "grad_norm": 0.6069176197052002,
      "learning_rate": 1.457140409717784e-06,
      "loss": 0.0085,
      "step": 2612980
    },
    {
      "epoch": 4.276231810058718,
      "grad_norm": 0.3310045003890991,
      "learning_rate": 1.4570745175042666e-06,
      "loss": 0.0081,
      "step": 2613000
    },
    {
      "epoch": 4.276264540497372,
      "grad_norm": 0.7597686648368835,
      "learning_rate": 1.4570086252907494e-06,
      "loss": 0.0104,
      "step": 2613020
    },
    {
      "epoch": 4.276297270936025,
      "grad_norm": 0.10725335031747818,
      "learning_rate": 1.4569427330772323e-06,
      "loss": 0.0091,
      "step": 2613040
    },
    {
      "epoch": 4.276330001374679,
      "grad_norm": 0.33274009823799133,
      "learning_rate": 1.456876840863715e-06,
      "loss": 0.0117,
      "step": 2613060
    },
    {
      "epoch": 4.2763627318133315,
      "grad_norm": 0.2152624875307083,
      "learning_rate": 1.456810948650198e-06,
      "loss": 0.0115,
      "step": 2613080
    },
    {
      "epoch": 4.276395462251985,
      "grad_norm": 0.1446882039308548,
      "learning_rate": 1.4567450564366812e-06,
      "loss": 0.0115,
      "step": 2613100
    },
    {
      "epoch": 4.276428192690639,
      "grad_norm": 0.20599515736103058,
      "learning_rate": 1.456679164223164e-06,
      "loss": 0.0083,
      "step": 2613120
    },
    {
      "epoch": 4.276460923129291,
      "grad_norm": 0.25541746616363525,
      "learning_rate": 1.4566132720096469e-06,
      "loss": 0.0127,
      "step": 2613140
    },
    {
      "epoch": 4.276493653567945,
      "grad_norm": 0.3612145185470581,
      "learning_rate": 1.4565473797961296e-06,
      "loss": 0.0094,
      "step": 2613160
    },
    {
      "epoch": 4.276526384006599,
      "grad_norm": 0.5217035412788391,
      "learning_rate": 1.4564814875826126e-06,
      "loss": 0.0133,
      "step": 2613180
    },
    {
      "epoch": 4.276559114445252,
      "grad_norm": 0.1740013211965561,
      "learning_rate": 1.4564155953690953e-06,
      "loss": 0.0088,
      "step": 2613200
    },
    {
      "epoch": 4.276591844883905,
      "grad_norm": 0.35709014534950256,
      "learning_rate": 1.4563497031555783e-06,
      "loss": 0.014,
      "step": 2613220
    },
    {
      "epoch": 4.2766245753225585,
      "grad_norm": 0.47898373007774353,
      "learning_rate": 1.456283810942061e-06,
      "loss": 0.0056,
      "step": 2613240
    },
    {
      "epoch": 4.276657305761212,
      "grad_norm": 0.14582966268062592,
      "learning_rate": 1.4562179187285437e-06,
      "loss": 0.0058,
      "step": 2613260
    },
    {
      "epoch": 4.276690036199865,
      "grad_norm": 0.31417301297187805,
      "learning_rate": 1.456152026515027e-06,
      "loss": 0.0084,
      "step": 2613280
    },
    {
      "epoch": 4.276722766638518,
      "grad_norm": 0.10501682758331299,
      "learning_rate": 1.4560861343015099e-06,
      "loss": 0.005,
      "step": 2613300
    },
    {
      "epoch": 4.276755497077172,
      "grad_norm": 0.05532434582710266,
      "learning_rate": 1.4560202420879926e-06,
      "loss": 0.0092,
      "step": 2613320
    },
    {
      "epoch": 4.2767882275158255,
      "grad_norm": 0.23381204903125763,
      "learning_rate": 1.4559543498744756e-06,
      "loss": 0.0084,
      "step": 2613340
    },
    {
      "epoch": 4.276820957954478,
      "grad_norm": 0.25878390669822693,
      "learning_rate": 1.4558884576609583e-06,
      "loss": 0.0067,
      "step": 2613360
    },
    {
      "epoch": 4.276853688393132,
      "grad_norm": 0.3640865087509155,
      "learning_rate": 1.4558225654474412e-06,
      "loss": 0.0078,
      "step": 2613380
    },
    {
      "epoch": 4.276886418831785,
      "grad_norm": 0.3875509798526764,
      "learning_rate": 1.455756673233924e-06,
      "loss": 0.0083,
      "step": 2613400
    },
    {
      "epoch": 4.276919149270438,
      "grad_norm": 0.6454323530197144,
      "learning_rate": 1.455690781020407e-06,
      "loss": 0.0125,
      "step": 2613420
    },
    {
      "epoch": 4.276951879709092,
      "grad_norm": 0.2688184976577759,
      "learning_rate": 1.4556248888068897e-06,
      "loss": 0.009,
      "step": 2613440
    },
    {
      "epoch": 4.276984610147745,
      "grad_norm": 0.10815474390983582,
      "learning_rate": 1.4555589965933728e-06,
      "loss": 0.0076,
      "step": 2613460
    },
    {
      "epoch": 4.277017340586399,
      "grad_norm": 0.4971250891685486,
      "learning_rate": 1.4554931043798556e-06,
      "loss": 0.0088,
      "step": 2613480
    },
    {
      "epoch": 4.277050071025052,
      "grad_norm": 0.15133732557296753,
      "learning_rate": 1.4554272121663385e-06,
      "loss": 0.0078,
      "step": 2613500
    },
    {
      "epoch": 4.277082801463705,
      "grad_norm": 0.11743241548538208,
      "learning_rate": 1.4553613199528213e-06,
      "loss": 0.0121,
      "step": 2613520
    },
    {
      "epoch": 4.277115531902359,
      "grad_norm": 0.7518404126167297,
      "learning_rate": 1.4552954277393042e-06,
      "loss": 0.0115,
      "step": 2613540
    },
    {
      "epoch": 4.2771482623410115,
      "grad_norm": 0.1972910761833191,
      "learning_rate": 1.455229535525787e-06,
      "loss": 0.0089,
      "step": 2613560
    },
    {
      "epoch": 4.277180992779665,
      "grad_norm": 0.8586506843566895,
      "learning_rate": 1.45516364331227e-06,
      "loss": 0.0136,
      "step": 2613580
    },
    {
      "epoch": 4.277213723218319,
      "grad_norm": 0.3190729320049286,
      "learning_rate": 1.4550977510987527e-06,
      "loss": 0.0088,
      "step": 2613600
    },
    {
      "epoch": 4.277246453656972,
      "grad_norm": 0.1169830933213234,
      "learning_rate": 1.4550318588852356e-06,
      "loss": 0.0077,
      "step": 2613620
    },
    {
      "epoch": 4.277279184095625,
      "grad_norm": 0.096001997590065,
      "learning_rate": 1.4549659666717186e-06,
      "loss": 0.0101,
      "step": 2613640
    },
    {
      "epoch": 4.277311914534279,
      "grad_norm": 0.05756850913167,
      "learning_rate": 1.4549000744582015e-06,
      "loss": 0.0125,
      "step": 2613660
    },
    {
      "epoch": 4.277344644972932,
      "grad_norm": 0.30274659395217896,
      "learning_rate": 1.4548341822446842e-06,
      "loss": 0.0071,
      "step": 2613680
    },
    {
      "epoch": 4.277377375411585,
      "grad_norm": 0.5922231078147888,
      "learning_rate": 1.4547682900311672e-06,
      "loss": 0.0165,
      "step": 2613700
    },
    {
      "epoch": 4.2774101058502385,
      "grad_norm": 0.25365567207336426,
      "learning_rate": 1.45470239781765e-06,
      "loss": 0.0087,
      "step": 2613720
    },
    {
      "epoch": 4.277442836288892,
      "grad_norm": 0.1081952229142189,
      "learning_rate": 1.4546365056041329e-06,
      "loss": 0.009,
      "step": 2613740
    },
    {
      "epoch": 4.277475566727546,
      "grad_norm": 0.212809219956398,
      "learning_rate": 1.4545706133906156e-06,
      "loss": 0.0092,
      "step": 2613760
    },
    {
      "epoch": 4.277508297166198,
      "grad_norm": 0.15008990466594696,
      "learning_rate": 1.4545047211770986e-06,
      "loss": 0.0104,
      "step": 2613780
    },
    {
      "epoch": 4.277541027604852,
      "grad_norm": 0.685828447341919,
      "learning_rate": 1.4544388289635815e-06,
      "loss": 0.0084,
      "step": 2613800
    },
    {
      "epoch": 4.277573758043506,
      "grad_norm": 0.3156285583972931,
      "learning_rate": 1.4543729367500645e-06,
      "loss": 0.0109,
      "step": 2613820
    },
    {
      "epoch": 4.277606488482158,
      "grad_norm": 0.11221179366111755,
      "learning_rate": 1.4543070445365472e-06,
      "loss": 0.0085,
      "step": 2613840
    },
    {
      "epoch": 4.277639218920812,
      "grad_norm": 0.3239067792892456,
      "learning_rate": 1.4542411523230302e-06,
      "loss": 0.0108,
      "step": 2613860
    },
    {
      "epoch": 4.2776719493594655,
      "grad_norm": 0.33391159772872925,
      "learning_rate": 1.454175260109513e-06,
      "loss": 0.0105,
      "step": 2613880
    },
    {
      "epoch": 4.277704679798118,
      "grad_norm": 0.3422410190105438,
      "learning_rate": 1.4541093678959959e-06,
      "loss": 0.0085,
      "step": 2613900
    },
    {
      "epoch": 4.277737410236772,
      "grad_norm": 0.04642082378268242,
      "learning_rate": 1.4540434756824786e-06,
      "loss": 0.0098,
      "step": 2613920
    },
    {
      "epoch": 4.277770140675425,
      "grad_norm": 0.10615411400794983,
      "learning_rate": 1.4539775834689616e-06,
      "loss": 0.0086,
      "step": 2613940
    },
    {
      "epoch": 4.277802871114079,
      "grad_norm": 0.22451263666152954,
      "learning_rate": 1.4539116912554443e-06,
      "loss": 0.0107,
      "step": 2613960
    },
    {
      "epoch": 4.277835601552732,
      "grad_norm": 0.14373177289962769,
      "learning_rate": 1.4538457990419275e-06,
      "loss": 0.01,
      "step": 2613980
    },
    {
      "epoch": 4.277868331991385,
      "grad_norm": 0.5940423607826233,
      "learning_rate": 1.4537799068284102e-06,
      "loss": 0.0104,
      "step": 2614000
    },
    {
      "epoch": 4.277901062430039,
      "grad_norm": 0.24647720158100128,
      "learning_rate": 1.4537140146148932e-06,
      "loss": 0.0144,
      "step": 2614020
    },
    {
      "epoch": 4.2779337928686925,
      "grad_norm": 0.173190638422966,
      "learning_rate": 1.4536481224013759e-06,
      "loss": 0.0077,
      "step": 2614040
    },
    {
      "epoch": 4.277966523307345,
      "grad_norm": 0.23187853395938873,
      "learning_rate": 1.4535822301878588e-06,
      "loss": 0.009,
      "step": 2614060
    },
    {
      "epoch": 4.277999253745999,
      "grad_norm": 0.19969750940799713,
      "learning_rate": 1.4535163379743416e-06,
      "loss": 0.0083,
      "step": 2614080
    },
    {
      "epoch": 4.278031984184652,
      "grad_norm": 0.20384395122528076,
      "learning_rate": 1.4534504457608245e-06,
      "loss": 0.0118,
      "step": 2614100
    },
    {
      "epoch": 4.278064714623305,
      "grad_norm": 0.2706773579120636,
      "learning_rate": 1.4533845535473073e-06,
      "loss": 0.0111,
      "step": 2614120
    },
    {
      "epoch": 4.278097445061959,
      "grad_norm": 0.18638436496257782,
      "learning_rate": 1.4533186613337902e-06,
      "loss": 0.0111,
      "step": 2614140
    },
    {
      "epoch": 4.278130175500612,
      "grad_norm": 0.1524437516927719,
      "learning_rate": 1.4532527691202734e-06,
      "loss": 0.0077,
      "step": 2614160
    },
    {
      "epoch": 4.278162905939265,
      "grad_norm": 0.20717033743858337,
      "learning_rate": 1.4531868769067561e-06,
      "loss": 0.0079,
      "step": 2614180
    },
    {
      "epoch": 4.2781956363779186,
      "grad_norm": 0.41117408871650696,
      "learning_rate": 1.453120984693239e-06,
      "loss": 0.0082,
      "step": 2614200
    },
    {
      "epoch": 4.278228366816572,
      "grad_norm": 0.10797210782766342,
      "learning_rate": 1.4530550924797218e-06,
      "loss": 0.0102,
      "step": 2614220
    },
    {
      "epoch": 4.278261097255226,
      "grad_norm": 0.25817030668258667,
      "learning_rate": 1.4529892002662046e-06,
      "loss": 0.0074,
      "step": 2614240
    },
    {
      "epoch": 4.2782938276938784,
      "grad_norm": 0.08435238152742386,
      "learning_rate": 1.4529233080526875e-06,
      "loss": 0.0067,
      "step": 2614260
    },
    {
      "epoch": 4.278326558132532,
      "grad_norm": 0.34810543060302734,
      "learning_rate": 1.4528574158391703e-06,
      "loss": 0.0073,
      "step": 2614280
    },
    {
      "epoch": 4.278359288571186,
      "grad_norm": 0.18326199054718018,
      "learning_rate": 1.4527915236256532e-06,
      "loss": 0.0066,
      "step": 2614300
    },
    {
      "epoch": 4.278392019009839,
      "grad_norm": 0.06677331030368805,
      "learning_rate": 1.452725631412136e-06,
      "loss": 0.0128,
      "step": 2614320
    },
    {
      "epoch": 4.278424749448492,
      "grad_norm": 0.7933924198150635,
      "learning_rate": 1.4526597391986191e-06,
      "loss": 0.007,
      "step": 2614340
    },
    {
      "epoch": 4.2784574798871455,
      "grad_norm": 0.4546512961387634,
      "learning_rate": 1.452593846985102e-06,
      "loss": 0.0074,
      "step": 2614360
    },
    {
      "epoch": 4.278490210325799,
      "grad_norm": 0.05792642757296562,
      "learning_rate": 1.4525279547715848e-06,
      "loss": 0.0083,
      "step": 2614380
    },
    {
      "epoch": 4.278522940764452,
      "grad_norm": 0.16192853450775146,
      "learning_rate": 1.4524620625580677e-06,
      "loss": 0.0101,
      "step": 2614400
    },
    {
      "epoch": 4.278555671203105,
      "grad_norm": 0.11367084085941315,
      "learning_rate": 1.4523961703445505e-06,
      "loss": 0.0074,
      "step": 2614420
    },
    {
      "epoch": 4.278588401641759,
      "grad_norm": 0.25599405169487,
      "learning_rate": 1.4523302781310332e-06,
      "loss": 0.0075,
      "step": 2614440
    },
    {
      "epoch": 4.278621132080412,
      "grad_norm": 0.22642621397972107,
      "learning_rate": 1.4522643859175162e-06,
      "loss": 0.01,
      "step": 2614460
    },
    {
      "epoch": 4.278653862519065,
      "grad_norm": 0.14385047554969788,
      "learning_rate": 1.452198493703999e-06,
      "loss": 0.0084,
      "step": 2614480
    },
    {
      "epoch": 4.278686592957719,
      "grad_norm": 0.24450044333934784,
      "learning_rate": 1.4521326014904819e-06,
      "loss": 0.0097,
      "step": 2614500
    },
    {
      "epoch": 4.2787193233963725,
      "grad_norm": 0.1937263011932373,
      "learning_rate": 1.452066709276965e-06,
      "loss": 0.0072,
      "step": 2614520
    },
    {
      "epoch": 4.278752053835025,
      "grad_norm": 0.07549912482500076,
      "learning_rate": 1.4520008170634478e-06,
      "loss": 0.0105,
      "step": 2614540
    },
    {
      "epoch": 4.278784784273679,
      "grad_norm": 0.12065018713474274,
      "learning_rate": 1.4519349248499307e-06,
      "loss": 0.0142,
      "step": 2614560
    },
    {
      "epoch": 4.278817514712332,
      "grad_norm": 0.06795597821474075,
      "learning_rate": 1.4518690326364135e-06,
      "loss": 0.0073,
      "step": 2614580
    },
    {
      "epoch": 4.278850245150985,
      "grad_norm": 0.1730530709028244,
      "learning_rate": 1.4518031404228964e-06,
      "loss": 0.009,
      "step": 2614600
    },
    {
      "epoch": 4.278882975589639,
      "grad_norm": 0.2089926302433014,
      "learning_rate": 1.4517372482093792e-06,
      "loss": 0.0055,
      "step": 2614620
    },
    {
      "epoch": 4.278915706028292,
      "grad_norm": 0.21987859904766083,
      "learning_rate": 1.4516713559958621e-06,
      "loss": 0.0134,
      "step": 2614640
    },
    {
      "epoch": 4.278948436466946,
      "grad_norm": 0.05052679404616356,
      "learning_rate": 1.4516054637823448e-06,
      "loss": 0.0102,
      "step": 2614660
    },
    {
      "epoch": 4.278981166905599,
      "grad_norm": 0.35981911420822144,
      "learning_rate": 1.451539571568828e-06,
      "loss": 0.0123,
      "step": 2614680
    },
    {
      "epoch": 4.279013897344252,
      "grad_norm": 0.08728601783514023,
      "learning_rate": 1.4514736793553108e-06,
      "loss": 0.0091,
      "step": 2614700
    },
    {
      "epoch": 4.279046627782906,
      "grad_norm": 0.43634846806526184,
      "learning_rate": 1.4514077871417937e-06,
      "loss": 0.011,
      "step": 2614720
    },
    {
      "epoch": 4.2790793582215585,
      "grad_norm": 0.33956459164619446,
      "learning_rate": 1.4513418949282764e-06,
      "loss": 0.0048,
      "step": 2614740
    },
    {
      "epoch": 4.279112088660212,
      "grad_norm": 0.04557104408740997,
      "learning_rate": 1.4512760027147594e-06,
      "loss": 0.0096,
      "step": 2614760
    },
    {
      "epoch": 4.279144819098866,
      "grad_norm": 0.5536532998085022,
      "learning_rate": 1.4512101105012421e-06,
      "loss": 0.013,
      "step": 2614780
    },
    {
      "epoch": 4.279177549537519,
      "grad_norm": 0.29280519485473633,
      "learning_rate": 1.451144218287725e-06,
      "loss": 0.0116,
      "step": 2614800
    },
    {
      "epoch": 4.279210279976172,
      "grad_norm": 0.1546606421470642,
      "learning_rate": 1.4510783260742078e-06,
      "loss": 0.0078,
      "step": 2614820
    },
    {
      "epoch": 4.279243010414826,
      "grad_norm": 0.044279225170612335,
      "learning_rate": 1.4510124338606908e-06,
      "loss": 0.0137,
      "step": 2614840
    },
    {
      "epoch": 4.279275740853479,
      "grad_norm": 0.09477963298559189,
      "learning_rate": 1.4509465416471737e-06,
      "loss": 0.0073,
      "step": 2614860
    },
    {
      "epoch": 4.279308471292132,
      "grad_norm": 0.2230660766363144,
      "learning_rate": 1.4508806494336567e-06,
      "loss": 0.0083,
      "step": 2614880
    },
    {
      "epoch": 4.2793412017307855,
      "grad_norm": 0.08786050975322723,
      "learning_rate": 1.4508147572201394e-06,
      "loss": 0.0065,
      "step": 2614900
    },
    {
      "epoch": 4.279373932169439,
      "grad_norm": 1.14667809009552,
      "learning_rate": 1.4507488650066224e-06,
      "loss": 0.0086,
      "step": 2614920
    },
    {
      "epoch": 4.279406662608093,
      "grad_norm": 0.21579314768314362,
      "learning_rate": 1.4506829727931051e-06,
      "loss": 0.0099,
      "step": 2614940
    },
    {
      "epoch": 4.279439393046745,
      "grad_norm": 0.31954121589660645,
      "learning_rate": 1.450617080579588e-06,
      "loss": 0.0092,
      "step": 2614960
    },
    {
      "epoch": 4.279472123485399,
      "grad_norm": 0.08659664541482925,
      "learning_rate": 1.4505511883660708e-06,
      "loss": 0.0083,
      "step": 2614980
    },
    {
      "epoch": 4.2795048539240526,
      "grad_norm": 0.25372514128685,
      "learning_rate": 1.4504852961525538e-06,
      "loss": 0.01,
      "step": 2615000
    },
    {
      "epoch": 4.279537584362705,
      "grad_norm": 0.32881632447242737,
      "learning_rate": 1.4504194039390365e-06,
      "loss": 0.0059,
      "step": 2615020
    },
    {
      "epoch": 4.279570314801359,
      "grad_norm": 0.43519386649131775,
      "learning_rate": 1.4503535117255197e-06,
      "loss": 0.0076,
      "step": 2615040
    },
    {
      "epoch": 4.2796030452400124,
      "grad_norm": 0.27329736948013306,
      "learning_rate": 1.4502876195120024e-06,
      "loss": 0.008,
      "step": 2615060
    },
    {
      "epoch": 4.279635775678666,
      "grad_norm": 0.04457807168364525,
      "learning_rate": 1.4502217272984853e-06,
      "loss": 0.0068,
      "step": 2615080
    },
    {
      "epoch": 4.279668506117319,
      "grad_norm": 0.1923820823431015,
      "learning_rate": 1.450155835084968e-06,
      "loss": 0.0077,
      "step": 2615100
    },
    {
      "epoch": 4.279701236555972,
      "grad_norm": 0.4890753924846649,
      "learning_rate": 1.450089942871451e-06,
      "loss": 0.0113,
      "step": 2615120
    },
    {
      "epoch": 4.279733966994626,
      "grad_norm": 0.38030797243118286,
      "learning_rate": 1.4500240506579338e-06,
      "loss": 0.0092,
      "step": 2615140
    },
    {
      "epoch": 4.279766697433279,
      "grad_norm": 0.13290190696716309,
      "learning_rate": 1.4499581584444167e-06,
      "loss": 0.011,
      "step": 2615160
    },
    {
      "epoch": 4.279799427871932,
      "grad_norm": 0.1512264609336853,
      "learning_rate": 1.4498922662308995e-06,
      "loss": 0.0082,
      "step": 2615180
    },
    {
      "epoch": 4.279832158310586,
      "grad_norm": 0.5449579954147339,
      "learning_rate": 1.4498263740173824e-06,
      "loss": 0.01,
      "step": 2615200
    },
    {
      "epoch": 4.279864888749239,
      "grad_norm": 0.5659292340278625,
      "learning_rate": 1.4497604818038654e-06,
      "loss": 0.01,
      "step": 2615220
    },
    {
      "epoch": 4.279897619187892,
      "grad_norm": 0.11047221720218658,
      "learning_rate": 1.4496945895903483e-06,
      "loss": 0.0089,
      "step": 2615240
    },
    {
      "epoch": 4.279930349626546,
      "grad_norm": 0.1547086089849472,
      "learning_rate": 1.449628697376831e-06,
      "loss": 0.0108,
      "step": 2615260
    },
    {
      "epoch": 4.279963080065199,
      "grad_norm": 0.15975362062454224,
      "learning_rate": 1.449562805163314e-06,
      "loss": 0.0103,
      "step": 2615280
    },
    {
      "epoch": 4.279995810503852,
      "grad_norm": 0.18114478886127472,
      "learning_rate": 1.4494969129497968e-06,
      "loss": 0.0093,
      "step": 2615300
    },
    {
      "epoch": 4.280028540942506,
      "grad_norm": 0.15180960297584534,
      "learning_rate": 1.4494310207362797e-06,
      "loss": 0.0113,
      "step": 2615320
    },
    {
      "epoch": 4.280061271381159,
      "grad_norm": 0.19310103356838226,
      "learning_rate": 1.4493651285227624e-06,
      "loss": 0.009,
      "step": 2615340
    },
    {
      "epoch": 4.280094001819812,
      "grad_norm": 0.40282562375068665,
      "learning_rate": 1.4492992363092454e-06,
      "loss": 0.0067,
      "step": 2615360
    },
    {
      "epoch": 4.2801267322584655,
      "grad_norm": 0.13217587769031525,
      "learning_rate": 1.4492333440957281e-06,
      "loss": 0.0099,
      "step": 2615380
    },
    {
      "epoch": 4.280159462697119,
      "grad_norm": 0.12000957876443863,
      "learning_rate": 1.4491674518822113e-06,
      "loss": 0.0083,
      "step": 2615400
    },
    {
      "epoch": 4.280192193135773,
      "grad_norm": 0.12118139117956161,
      "learning_rate": 1.449101559668694e-06,
      "loss": 0.0167,
      "step": 2615420
    },
    {
      "epoch": 4.280224923574425,
      "grad_norm": 0.43943408131599426,
      "learning_rate": 1.449035667455177e-06,
      "loss": 0.0096,
      "step": 2615440
    },
    {
      "epoch": 4.280257654013079,
      "grad_norm": 0.34767019748687744,
      "learning_rate": 1.4489697752416597e-06,
      "loss": 0.0114,
      "step": 2615460
    },
    {
      "epoch": 4.280290384451733,
      "grad_norm": 0.2963997721672058,
      "learning_rate": 1.4489038830281427e-06,
      "loss": 0.0127,
      "step": 2615480
    },
    {
      "epoch": 4.280323114890386,
      "grad_norm": 0.1176132783293724,
      "learning_rate": 1.4488379908146254e-06,
      "loss": 0.0059,
      "step": 2615500
    },
    {
      "epoch": 4.280355845329039,
      "grad_norm": 0.0792560875415802,
      "learning_rate": 1.4487720986011084e-06,
      "loss": 0.008,
      "step": 2615520
    },
    {
      "epoch": 4.2803885757676925,
      "grad_norm": 0.7022693753242493,
      "learning_rate": 1.4487062063875911e-06,
      "loss": 0.0126,
      "step": 2615540
    },
    {
      "epoch": 4.280421306206346,
      "grad_norm": 0.24770669639110565,
      "learning_rate": 1.4486403141740743e-06,
      "loss": 0.0076,
      "step": 2615560
    },
    {
      "epoch": 4.280454036644999,
      "grad_norm": 0.20077604055404663,
      "learning_rate": 1.4485744219605572e-06,
      "loss": 0.0085,
      "step": 2615580
    },
    {
      "epoch": 4.280486767083652,
      "grad_norm": 0.2515724301338196,
      "learning_rate": 1.44850852974704e-06,
      "loss": 0.0086,
      "step": 2615600
    },
    {
      "epoch": 4.280519497522306,
      "grad_norm": 0.2683149576187134,
      "learning_rate": 1.448442637533523e-06,
      "loss": 0.0091,
      "step": 2615620
    },
    {
      "epoch": 4.280552227960959,
      "grad_norm": 0.12137095630168915,
      "learning_rate": 1.4483767453200057e-06,
      "loss": 0.0084,
      "step": 2615640
    },
    {
      "epoch": 4.280584958399612,
      "grad_norm": 0.5086819529533386,
      "learning_rate": 1.4483108531064884e-06,
      "loss": 0.0088,
      "step": 2615660
    },
    {
      "epoch": 4.280617688838266,
      "grad_norm": 0.1822439581155777,
      "learning_rate": 1.4482449608929714e-06,
      "loss": 0.0102,
      "step": 2615680
    },
    {
      "epoch": 4.2806504192769195,
      "grad_norm": 0.30384811758995056,
      "learning_rate": 1.448179068679454e-06,
      "loss": 0.0156,
      "step": 2615700
    },
    {
      "epoch": 4.280683149715572,
      "grad_norm": 0.44571584463119507,
      "learning_rate": 1.448113176465937e-06,
      "loss": 0.0103,
      "step": 2615720
    },
    {
      "epoch": 4.280715880154226,
      "grad_norm": 0.33367088437080383,
      "learning_rate": 1.4480472842524202e-06,
      "loss": 0.0133,
      "step": 2615740
    },
    {
      "epoch": 4.280748610592879,
      "grad_norm": 0.23466305434703827,
      "learning_rate": 1.447981392038903e-06,
      "loss": 0.0091,
      "step": 2615760
    },
    {
      "epoch": 4.280781341031533,
      "grad_norm": 0.38803476095199585,
      "learning_rate": 1.447915499825386e-06,
      "loss": 0.0072,
      "step": 2615780
    },
    {
      "epoch": 4.280814071470186,
      "grad_norm": 0.20044226944446564,
      "learning_rate": 1.4478496076118686e-06,
      "loss": 0.0113,
      "step": 2615800
    },
    {
      "epoch": 4.280846801908839,
      "grad_norm": 0.5170036554336548,
      "learning_rate": 1.4477837153983516e-06,
      "loss": 0.0057,
      "step": 2615820
    },
    {
      "epoch": 4.280879532347493,
      "grad_norm": 0.10656622052192688,
      "learning_rate": 1.4477178231848343e-06,
      "loss": 0.0126,
      "step": 2615840
    },
    {
      "epoch": 4.280912262786146,
      "grad_norm": 0.11759799718856812,
      "learning_rate": 1.4476519309713173e-06,
      "loss": 0.0103,
      "step": 2615860
    },
    {
      "epoch": 4.280944993224799,
      "grad_norm": 0.15848767757415771,
      "learning_rate": 1.4475860387578e-06,
      "loss": 0.0104,
      "step": 2615880
    },
    {
      "epoch": 4.280977723663453,
      "grad_norm": 0.19548803567886353,
      "learning_rate": 1.4475201465442828e-06,
      "loss": 0.0151,
      "step": 2615900
    },
    {
      "epoch": 4.2810104541021055,
      "grad_norm": 0.12313124537467957,
      "learning_rate": 1.447454254330766e-06,
      "loss": 0.0077,
      "step": 2615920
    },
    {
      "epoch": 4.281043184540759,
      "grad_norm": 0.23178762197494507,
      "learning_rate": 1.4473883621172489e-06,
      "loss": 0.0106,
      "step": 2615940
    },
    {
      "epoch": 4.281075914979413,
      "grad_norm": 0.15271128714084625,
      "learning_rate": 1.4473224699037316e-06,
      "loss": 0.0067,
      "step": 2615960
    },
    {
      "epoch": 4.281108645418066,
      "grad_norm": 0.12020926177501678,
      "learning_rate": 1.4472565776902146e-06,
      "loss": 0.0104,
      "step": 2615980
    },
    {
      "epoch": 4.281141375856719,
      "grad_norm": 0.08518093824386597,
      "learning_rate": 1.4471906854766973e-06,
      "loss": 0.0091,
      "step": 2616000
    },
    {
      "epoch": 4.2811741062953725,
      "grad_norm": 0.19899433851242065,
      "learning_rate": 1.4471247932631803e-06,
      "loss": 0.0083,
      "step": 2616020
    },
    {
      "epoch": 4.281206836734026,
      "grad_norm": 0.804345428943634,
      "learning_rate": 1.447058901049663e-06,
      "loss": 0.0107,
      "step": 2616040
    },
    {
      "epoch": 4.281239567172679,
      "grad_norm": 0.07980503141880035,
      "learning_rate": 1.446993008836146e-06,
      "loss": 0.0081,
      "step": 2616060
    },
    {
      "epoch": 4.281272297611332,
      "grad_norm": 0.2780320942401886,
      "learning_rate": 1.4469271166226287e-06,
      "loss": 0.0106,
      "step": 2616080
    },
    {
      "epoch": 4.281305028049986,
      "grad_norm": 0.08184216171503067,
      "learning_rate": 1.4468612244091119e-06,
      "loss": 0.0095,
      "step": 2616100
    },
    {
      "epoch": 4.28133775848864,
      "grad_norm": 0.2565007507801056,
      "learning_rate": 1.4467953321955946e-06,
      "loss": 0.0102,
      "step": 2616120
    },
    {
      "epoch": 4.281370488927292,
      "grad_norm": 0.11613889783620834,
      "learning_rate": 1.4467294399820775e-06,
      "loss": 0.0057,
      "step": 2616140
    },
    {
      "epoch": 4.281403219365946,
      "grad_norm": 0.14962273836135864,
      "learning_rate": 1.4466635477685603e-06,
      "loss": 0.0096,
      "step": 2616160
    },
    {
      "epoch": 4.2814359498045995,
      "grad_norm": 0.0765044093132019,
      "learning_rate": 1.4465976555550432e-06,
      "loss": 0.0094,
      "step": 2616180
    },
    {
      "epoch": 4.281468680243252,
      "grad_norm": 0.24239970743656158,
      "learning_rate": 1.446531763341526e-06,
      "loss": 0.0085,
      "step": 2616200
    },
    {
      "epoch": 4.281501410681906,
      "grad_norm": 0.19886372983455658,
      "learning_rate": 1.446465871128009e-06,
      "loss": 0.0085,
      "step": 2616220
    },
    {
      "epoch": 4.281534141120559,
      "grad_norm": 0.20390407741069794,
      "learning_rate": 1.4463999789144917e-06,
      "loss": 0.007,
      "step": 2616240
    },
    {
      "epoch": 4.281566871559213,
      "grad_norm": 0.20117758214473724,
      "learning_rate": 1.4463340867009746e-06,
      "loss": 0.0127,
      "step": 2616260
    },
    {
      "epoch": 4.281599601997866,
      "grad_norm": 0.31809985637664795,
      "learning_rate": 1.4462681944874576e-06,
      "loss": 0.0096,
      "step": 2616280
    },
    {
      "epoch": 4.281632332436519,
      "grad_norm": 0.2981865406036377,
      "learning_rate": 1.4462023022739405e-06,
      "loss": 0.0073,
      "step": 2616300
    },
    {
      "epoch": 4.281665062875173,
      "grad_norm": 0.072099968791008,
      "learning_rate": 1.4461364100604233e-06,
      "loss": 0.0095,
      "step": 2616320
    },
    {
      "epoch": 4.281697793313826,
      "grad_norm": 0.054292261600494385,
      "learning_rate": 1.4460705178469062e-06,
      "loss": 0.0086,
      "step": 2616340
    },
    {
      "epoch": 4.281730523752479,
      "grad_norm": 0.1923930048942566,
      "learning_rate": 1.446004625633389e-06,
      "loss": 0.0132,
      "step": 2616360
    },
    {
      "epoch": 4.281763254191133,
      "grad_norm": 0.06787093728780746,
      "learning_rate": 1.445938733419872e-06,
      "loss": 0.0123,
      "step": 2616380
    },
    {
      "epoch": 4.281795984629786,
      "grad_norm": 0.289031982421875,
      "learning_rate": 1.4458728412063546e-06,
      "loss": 0.0084,
      "step": 2616400
    },
    {
      "epoch": 4.281828715068439,
      "grad_norm": 0.3280222713947296,
      "learning_rate": 1.4458069489928376e-06,
      "loss": 0.0062,
      "step": 2616420
    },
    {
      "epoch": 4.281861445507093,
      "grad_norm": 0.40231600403785706,
      "learning_rate": 1.4457410567793205e-06,
      "loss": 0.0104,
      "step": 2616440
    },
    {
      "epoch": 4.281894175945746,
      "grad_norm": 0.21676208078861237,
      "learning_rate": 1.4456751645658035e-06,
      "loss": 0.0093,
      "step": 2616460
    },
    {
      "epoch": 4.281926906384399,
      "grad_norm": 0.11049530655145645,
      "learning_rate": 1.4456092723522862e-06,
      "loss": 0.0105,
      "step": 2616480
    },
    {
      "epoch": 4.281959636823053,
      "grad_norm": 0.0656030997633934,
      "learning_rate": 1.4455433801387692e-06,
      "loss": 0.0105,
      "step": 2616500
    },
    {
      "epoch": 4.281992367261706,
      "grad_norm": 0.6929419040679932,
      "learning_rate": 1.445477487925252e-06,
      "loss": 0.0131,
      "step": 2616520
    },
    {
      "epoch": 4.28202509770036,
      "grad_norm": 0.24406428635120392,
      "learning_rate": 1.4454115957117349e-06,
      "loss": 0.0098,
      "step": 2616540
    },
    {
      "epoch": 4.2820578281390125,
      "grad_norm": 0.3416273891925812,
      "learning_rate": 1.4453457034982176e-06,
      "loss": 0.0117,
      "step": 2616560
    },
    {
      "epoch": 4.282090558577666,
      "grad_norm": 0.21905672550201416,
      "learning_rate": 1.4452798112847006e-06,
      "loss": 0.0067,
      "step": 2616580
    },
    {
      "epoch": 4.28212328901632,
      "grad_norm": 0.269377201795578,
      "learning_rate": 1.4452139190711833e-06,
      "loss": 0.0067,
      "step": 2616600
    },
    {
      "epoch": 4.282156019454972,
      "grad_norm": 0.14841249585151672,
      "learning_rate": 1.4451480268576665e-06,
      "loss": 0.0078,
      "step": 2616620
    },
    {
      "epoch": 4.282188749893626,
      "grad_norm": 0.20778490602970123,
      "learning_rate": 1.4450821346441492e-06,
      "loss": 0.0109,
      "step": 2616640
    },
    {
      "epoch": 4.28222148033228,
      "grad_norm": 0.05543004348874092,
      "learning_rate": 1.4450162424306322e-06,
      "loss": 0.0055,
      "step": 2616660
    },
    {
      "epoch": 4.282254210770933,
      "grad_norm": 0.16749033331871033,
      "learning_rate": 1.444950350217115e-06,
      "loss": 0.0061,
      "step": 2616680
    },
    {
      "epoch": 4.282286941209586,
      "grad_norm": 0.18602699041366577,
      "learning_rate": 1.4448844580035979e-06,
      "loss": 0.0077,
      "step": 2616700
    },
    {
      "epoch": 4.2823196716482395,
      "grad_norm": 0.2034422755241394,
      "learning_rate": 1.4448185657900806e-06,
      "loss": 0.0074,
      "step": 2616720
    },
    {
      "epoch": 4.282352402086893,
      "grad_norm": 0.17949360609054565,
      "learning_rate": 1.4447526735765635e-06,
      "loss": 0.0115,
      "step": 2616740
    },
    {
      "epoch": 4.282385132525546,
      "grad_norm": 0.14350897073745728,
      "learning_rate": 1.4446867813630463e-06,
      "loss": 0.0087,
      "step": 2616760
    },
    {
      "epoch": 4.282417862964199,
      "grad_norm": 0.13711418211460114,
      "learning_rate": 1.4446208891495292e-06,
      "loss": 0.0073,
      "step": 2616780
    },
    {
      "epoch": 4.282450593402853,
      "grad_norm": 0.36356183886528015,
      "learning_rate": 1.4445549969360124e-06,
      "loss": 0.0087,
      "step": 2616800
    },
    {
      "epoch": 4.2824833238415065,
      "grad_norm": 0.17374767363071442,
      "learning_rate": 1.4444891047224951e-06,
      "loss": 0.0096,
      "step": 2616820
    },
    {
      "epoch": 4.282516054280159,
      "grad_norm": 0.324001669883728,
      "learning_rate": 1.444423212508978e-06,
      "loss": 0.007,
      "step": 2616840
    },
    {
      "epoch": 4.282548784718813,
      "grad_norm": 0.17394785583019257,
      "learning_rate": 1.4443573202954608e-06,
      "loss": 0.0073,
      "step": 2616860
    },
    {
      "epoch": 4.282581515157466,
      "grad_norm": 0.20786966383457184,
      "learning_rate": 1.4442914280819436e-06,
      "loss": 0.0102,
      "step": 2616880
    },
    {
      "epoch": 4.282614245596119,
      "grad_norm": 0.31269997358322144,
      "learning_rate": 1.4442255358684265e-06,
      "loss": 0.0097,
      "step": 2616900
    },
    {
      "epoch": 4.282646976034773,
      "grad_norm": 0.08501763641834259,
      "learning_rate": 1.4441596436549093e-06,
      "loss": 0.0088,
      "step": 2616920
    },
    {
      "epoch": 4.282679706473426,
      "grad_norm": 0.1847470998764038,
      "learning_rate": 1.4440937514413922e-06,
      "loss": 0.0068,
      "step": 2616940
    },
    {
      "epoch": 4.28271243691208,
      "grad_norm": 0.0793222188949585,
      "learning_rate": 1.444027859227875e-06,
      "loss": 0.0092,
      "step": 2616960
    },
    {
      "epoch": 4.282745167350733,
      "grad_norm": 0.20295417308807373,
      "learning_rate": 1.4439619670143581e-06,
      "loss": 0.0092,
      "step": 2616980
    },
    {
      "epoch": 4.282777897789386,
      "grad_norm": 0.20691438019275665,
      "learning_rate": 1.443896074800841e-06,
      "loss": 0.0105,
      "step": 2617000
    },
    {
      "epoch": 4.28281062822804,
      "grad_norm": 0.057224903255701065,
      "learning_rate": 1.4438301825873238e-06,
      "loss": 0.0102,
      "step": 2617020
    },
    {
      "epoch": 4.2828433586666925,
      "grad_norm": 0.44488978385925293,
      "learning_rate": 1.4437642903738068e-06,
      "loss": 0.012,
      "step": 2617040
    },
    {
      "epoch": 4.282876089105346,
      "grad_norm": 0.22979125380516052,
      "learning_rate": 1.4436983981602895e-06,
      "loss": 0.012,
      "step": 2617060
    },
    {
      "epoch": 4.282908819544,
      "grad_norm": 0.1652367264032364,
      "learning_rate": 1.4436325059467722e-06,
      "loss": 0.0066,
      "step": 2617080
    },
    {
      "epoch": 4.282941549982652,
      "grad_norm": 0.4412885904312134,
      "learning_rate": 1.4435666137332552e-06,
      "loss": 0.01,
      "step": 2617100
    },
    {
      "epoch": 4.282974280421306,
      "grad_norm": 0.3253869414329529,
      "learning_rate": 1.443500721519738e-06,
      "loss": 0.0113,
      "step": 2617120
    },
    {
      "epoch": 4.28300701085996,
      "grad_norm": 0.32954108715057373,
      "learning_rate": 1.4434348293062209e-06,
      "loss": 0.0096,
      "step": 2617140
    },
    {
      "epoch": 4.283039741298613,
      "grad_norm": 0.18631544709205627,
      "learning_rate": 1.443368937092704e-06,
      "loss": 0.0137,
      "step": 2617160
    },
    {
      "epoch": 4.283072471737266,
      "grad_norm": 0.14303646981716156,
      "learning_rate": 1.4433030448791868e-06,
      "loss": 0.0082,
      "step": 2617180
    },
    {
      "epoch": 4.2831052021759195,
      "grad_norm": 0.41363030672073364,
      "learning_rate": 1.4432371526656697e-06,
      "loss": 0.0133,
      "step": 2617200
    },
    {
      "epoch": 4.283137932614573,
      "grad_norm": 0.12543992698192596,
      "learning_rate": 1.4431712604521525e-06,
      "loss": 0.0081,
      "step": 2617220
    },
    {
      "epoch": 4.283170663053227,
      "grad_norm": 0.3340130150318146,
      "learning_rate": 1.4431053682386354e-06,
      "loss": 0.0099,
      "step": 2617240
    },
    {
      "epoch": 4.283203393491879,
      "grad_norm": 0.5998038649559021,
      "learning_rate": 1.4430394760251182e-06,
      "loss": 0.0138,
      "step": 2617260
    },
    {
      "epoch": 4.283236123930533,
      "grad_norm": 0.1783187985420227,
      "learning_rate": 1.4429735838116011e-06,
      "loss": 0.0089,
      "step": 2617280
    },
    {
      "epoch": 4.283268854369187,
      "grad_norm": 0.1380978673696518,
      "learning_rate": 1.4429076915980839e-06,
      "loss": 0.0135,
      "step": 2617300
    },
    {
      "epoch": 4.283301584807839,
      "grad_norm": 0.26979008316993713,
      "learning_rate": 1.442841799384567e-06,
      "loss": 0.0101,
      "step": 2617320
    },
    {
      "epoch": 4.283334315246493,
      "grad_norm": 0.38525158166885376,
      "learning_rate": 1.4427759071710498e-06,
      "loss": 0.0066,
      "step": 2617340
    },
    {
      "epoch": 4.2833670456851465,
      "grad_norm": 0.5427358746528625,
      "learning_rate": 1.4427100149575327e-06,
      "loss": 0.0073,
      "step": 2617360
    },
    {
      "epoch": 4.283399776123799,
      "grad_norm": 0.4208977520465851,
      "learning_rate": 1.4426441227440155e-06,
      "loss": 0.0074,
      "step": 2617380
    },
    {
      "epoch": 4.283432506562453,
      "grad_norm": 0.05688565969467163,
      "learning_rate": 1.4425782305304984e-06,
      "loss": 0.0072,
      "step": 2617400
    },
    {
      "epoch": 4.283465237001106,
      "grad_norm": 0.10018540173768997,
      "learning_rate": 1.4425123383169811e-06,
      "loss": 0.0065,
      "step": 2617420
    },
    {
      "epoch": 4.28349796743976,
      "grad_norm": 0.296733558177948,
      "learning_rate": 1.442446446103464e-06,
      "loss": 0.0152,
      "step": 2617440
    },
    {
      "epoch": 4.283530697878413,
      "grad_norm": 0.4309537708759308,
      "learning_rate": 1.4423805538899468e-06,
      "loss": 0.0114,
      "step": 2617460
    },
    {
      "epoch": 4.283563428317066,
      "grad_norm": 0.171661838889122,
      "learning_rate": 1.4423146616764298e-06,
      "loss": 0.0092,
      "step": 2617480
    },
    {
      "epoch": 4.28359615875572,
      "grad_norm": 0.27477458119392395,
      "learning_rate": 1.4422487694629127e-06,
      "loss": 0.0104,
      "step": 2617500
    },
    {
      "epoch": 4.283628889194373,
      "grad_norm": 0.32100433111190796,
      "learning_rate": 1.4421828772493957e-06,
      "loss": 0.0129,
      "step": 2617520
    },
    {
      "epoch": 4.283661619633026,
      "grad_norm": 0.6547892093658447,
      "learning_rate": 1.4421169850358784e-06,
      "loss": 0.0095,
      "step": 2617540
    },
    {
      "epoch": 4.28369435007168,
      "grad_norm": 0.24862147867679596,
      "learning_rate": 1.4420510928223614e-06,
      "loss": 0.0071,
      "step": 2617560
    },
    {
      "epoch": 4.283727080510333,
      "grad_norm": 0.2190781980752945,
      "learning_rate": 1.4419852006088441e-06,
      "loss": 0.0115,
      "step": 2617580
    },
    {
      "epoch": 4.283759810948986,
      "grad_norm": 0.18600745499134064,
      "learning_rate": 1.441919308395327e-06,
      "loss": 0.0061,
      "step": 2617600
    },
    {
      "epoch": 4.28379254138764,
      "grad_norm": 0.3359116017818451,
      "learning_rate": 1.4418534161818098e-06,
      "loss": 0.0104,
      "step": 2617620
    },
    {
      "epoch": 4.283825271826293,
      "grad_norm": 0.1756298840045929,
      "learning_rate": 1.4417875239682928e-06,
      "loss": 0.0109,
      "step": 2617640
    },
    {
      "epoch": 4.283858002264946,
      "grad_norm": 0.2507665753364563,
      "learning_rate": 1.4417216317547755e-06,
      "loss": 0.0092,
      "step": 2617660
    },
    {
      "epoch": 4.2838907327035995,
      "grad_norm": 0.06779811531305313,
      "learning_rate": 1.4416557395412587e-06,
      "loss": 0.0093,
      "step": 2617680
    },
    {
      "epoch": 4.283923463142253,
      "grad_norm": 0.08224938809871674,
      "learning_rate": 1.4415898473277414e-06,
      "loss": 0.0092,
      "step": 2617700
    },
    {
      "epoch": 4.283956193580907,
      "grad_norm": 0.09291590005159378,
      "learning_rate": 1.4415239551142244e-06,
      "loss": 0.0054,
      "step": 2617720
    },
    {
      "epoch": 4.283988924019559,
      "grad_norm": 0.34032514691352844,
      "learning_rate": 1.441458062900707e-06,
      "loss": 0.0151,
      "step": 2617740
    },
    {
      "epoch": 4.284021654458213,
      "grad_norm": 0.22509285807609558,
      "learning_rate": 1.44139217068719e-06,
      "loss": 0.009,
      "step": 2617760
    },
    {
      "epoch": 4.284054384896867,
      "grad_norm": 0.26117023825645447,
      "learning_rate": 1.4413262784736728e-06,
      "loss": 0.0078,
      "step": 2617780
    },
    {
      "epoch": 4.284087115335519,
      "grad_norm": 0.4667571783065796,
      "learning_rate": 1.4412603862601557e-06,
      "loss": 0.0093,
      "step": 2617800
    },
    {
      "epoch": 4.284119845774173,
      "grad_norm": 0.019820256158709526,
      "learning_rate": 1.4411944940466385e-06,
      "loss": 0.0102,
      "step": 2617820
    },
    {
      "epoch": 4.2841525762128265,
      "grad_norm": 0.056756384670734406,
      "learning_rate": 1.4411286018331214e-06,
      "loss": 0.0049,
      "step": 2617840
    },
    {
      "epoch": 4.28418530665148,
      "grad_norm": 0.1948583424091339,
      "learning_rate": 1.4410627096196044e-06,
      "loss": 0.0098,
      "step": 2617860
    },
    {
      "epoch": 4.284218037090133,
      "grad_norm": 0.35303837060928345,
      "learning_rate": 1.4409968174060873e-06,
      "loss": 0.0089,
      "step": 2617880
    },
    {
      "epoch": 4.284250767528786,
      "grad_norm": 0.2242901623249054,
      "learning_rate": 1.44093092519257e-06,
      "loss": 0.0131,
      "step": 2617900
    },
    {
      "epoch": 4.28428349796744,
      "grad_norm": 0.5276578664779663,
      "learning_rate": 1.440865032979053e-06,
      "loss": 0.0107,
      "step": 2617920
    },
    {
      "epoch": 4.284316228406093,
      "grad_norm": 0.06488031893968582,
      "learning_rate": 1.4407991407655358e-06,
      "loss": 0.0084,
      "step": 2617940
    },
    {
      "epoch": 4.284348958844746,
      "grad_norm": 0.12586438655853271,
      "learning_rate": 1.4407332485520187e-06,
      "loss": 0.0093,
      "step": 2617960
    },
    {
      "epoch": 4.2843816892834,
      "grad_norm": 0.18358568847179413,
      "learning_rate": 1.4406673563385015e-06,
      "loss": 0.0063,
      "step": 2617980
    },
    {
      "epoch": 4.2844144197220535,
      "grad_norm": 0.19763167202472687,
      "learning_rate": 1.4406014641249844e-06,
      "loss": 0.0077,
      "step": 2618000
    },
    {
      "epoch": 4.284447150160706,
      "grad_norm": 0.5535642504692078,
      "learning_rate": 1.4405355719114671e-06,
      "loss": 0.0072,
      "step": 2618020
    },
    {
      "epoch": 4.28447988059936,
      "grad_norm": 0.16712743043899536,
      "learning_rate": 1.4404696796979503e-06,
      "loss": 0.0083,
      "step": 2618040
    },
    {
      "epoch": 4.284512611038013,
      "grad_norm": 0.2486886978149414,
      "learning_rate": 1.4404037874844333e-06,
      "loss": 0.0091,
      "step": 2618060
    },
    {
      "epoch": 4.284545341476666,
      "grad_norm": 0.38948869705200195,
      "learning_rate": 1.440337895270916e-06,
      "loss": 0.0076,
      "step": 2618080
    },
    {
      "epoch": 4.28457807191532,
      "grad_norm": 0.1948399394750595,
      "learning_rate": 1.4402720030573987e-06,
      "loss": 0.0078,
      "step": 2618100
    },
    {
      "epoch": 4.284610802353973,
      "grad_norm": 0.18515798449516296,
      "learning_rate": 1.4402061108438817e-06,
      "loss": 0.008,
      "step": 2618120
    },
    {
      "epoch": 4.284643532792627,
      "grad_norm": 0.14520196616649628,
      "learning_rate": 1.4401402186303644e-06,
      "loss": 0.013,
      "step": 2618140
    },
    {
      "epoch": 4.28467626323128,
      "grad_norm": 0.15111276507377625,
      "learning_rate": 1.4400743264168474e-06,
      "loss": 0.0087,
      "step": 2618160
    },
    {
      "epoch": 4.284708993669933,
      "grad_norm": 0.11434466391801834,
      "learning_rate": 1.4400084342033301e-06,
      "loss": 0.0083,
      "step": 2618180
    },
    {
      "epoch": 4.284741724108587,
      "grad_norm": 0.2674539387226105,
      "learning_rate": 1.4399425419898133e-06,
      "loss": 0.0084,
      "step": 2618200
    },
    {
      "epoch": 4.2847744545472395,
      "grad_norm": 0.09229370206594467,
      "learning_rate": 1.4398766497762962e-06,
      "loss": 0.008,
      "step": 2618220
    },
    {
      "epoch": 4.284807184985893,
      "grad_norm": 0.3673308789730072,
      "learning_rate": 1.439810757562779e-06,
      "loss": 0.0094,
      "step": 2618240
    },
    {
      "epoch": 4.284839915424547,
      "grad_norm": 0.2260955274105072,
      "learning_rate": 1.439744865349262e-06,
      "loss": 0.0127,
      "step": 2618260
    },
    {
      "epoch": 4.2848726458632,
      "grad_norm": 0.5791577100753784,
      "learning_rate": 1.4396789731357447e-06,
      "loss": 0.0075,
      "step": 2618280
    },
    {
      "epoch": 4.284905376301853,
      "grad_norm": 0.42747583985328674,
      "learning_rate": 1.4396130809222274e-06,
      "loss": 0.0086,
      "step": 2618300
    },
    {
      "epoch": 4.284938106740507,
      "grad_norm": 0.17171117663383484,
      "learning_rate": 1.4395471887087104e-06,
      "loss": 0.0065,
      "step": 2618320
    },
    {
      "epoch": 4.28497083717916,
      "grad_norm": 0.21213701367378235,
      "learning_rate": 1.439481296495193e-06,
      "loss": 0.014,
      "step": 2618340
    },
    {
      "epoch": 4.285003567617813,
      "grad_norm": 0.12914393842220306,
      "learning_rate": 1.439415404281676e-06,
      "loss": 0.0089,
      "step": 2618360
    },
    {
      "epoch": 4.2850362980564665,
      "grad_norm": 0.16357046365737915,
      "learning_rate": 1.4393495120681592e-06,
      "loss": 0.0088,
      "step": 2618380
    },
    {
      "epoch": 4.28506902849512,
      "grad_norm": 0.07554356008768082,
      "learning_rate": 1.439283619854642e-06,
      "loss": 0.009,
      "step": 2618400
    },
    {
      "epoch": 4.285101758933774,
      "grad_norm": 0.2738840878009796,
      "learning_rate": 1.439217727641125e-06,
      "loss": 0.0083,
      "step": 2618420
    },
    {
      "epoch": 4.285134489372426,
      "grad_norm": 0.6438072323799133,
      "learning_rate": 1.4391518354276076e-06,
      "loss": 0.0078,
      "step": 2618440
    },
    {
      "epoch": 4.28516721981108,
      "grad_norm": 0.6125614047050476,
      "learning_rate": 1.4390859432140906e-06,
      "loss": 0.0095,
      "step": 2618460
    },
    {
      "epoch": 4.2851999502497335,
      "grad_norm": 0.3696343004703522,
      "learning_rate": 1.4390200510005733e-06,
      "loss": 0.0109,
      "step": 2618480
    },
    {
      "epoch": 4.285232680688386,
      "grad_norm": 0.2496040314435959,
      "learning_rate": 1.4389541587870563e-06,
      "loss": 0.0074,
      "step": 2618500
    },
    {
      "epoch": 4.28526541112704,
      "grad_norm": 0.17874811589717865,
      "learning_rate": 1.438888266573539e-06,
      "loss": 0.0154,
      "step": 2618520
    },
    {
      "epoch": 4.285298141565693,
      "grad_norm": 0.35274064540863037,
      "learning_rate": 1.4388223743600218e-06,
      "loss": 0.0074,
      "step": 2618540
    },
    {
      "epoch": 4.285330872004346,
      "grad_norm": 0.24094592034816742,
      "learning_rate": 1.438756482146505e-06,
      "loss": 0.0129,
      "step": 2618560
    },
    {
      "epoch": 4.285363602443,
      "grad_norm": 0.06504935771226883,
      "learning_rate": 1.4386905899329879e-06,
      "loss": 0.0077,
      "step": 2618580
    },
    {
      "epoch": 4.285396332881653,
      "grad_norm": 0.15695427358150482,
      "learning_rate": 1.4386246977194706e-06,
      "loss": 0.0135,
      "step": 2618600
    },
    {
      "epoch": 4.285429063320307,
      "grad_norm": 0.3080559968948364,
      "learning_rate": 1.4385588055059536e-06,
      "loss": 0.0144,
      "step": 2618620
    },
    {
      "epoch": 4.28546179375896,
      "grad_norm": 0.4251953959465027,
      "learning_rate": 1.4384929132924363e-06,
      "loss": 0.0106,
      "step": 2618640
    },
    {
      "epoch": 4.285494524197613,
      "grad_norm": 0.19712430238723755,
      "learning_rate": 1.4384270210789193e-06,
      "loss": 0.0073,
      "step": 2618660
    },
    {
      "epoch": 4.285527254636267,
      "grad_norm": 0.0724126398563385,
      "learning_rate": 1.438361128865402e-06,
      "loss": 0.0096,
      "step": 2618680
    },
    {
      "epoch": 4.28555998507492,
      "grad_norm": 0.11406070739030838,
      "learning_rate": 1.438295236651885e-06,
      "loss": 0.01,
      "step": 2618700
    },
    {
      "epoch": 4.285592715513573,
      "grad_norm": 0.28000348806381226,
      "learning_rate": 1.4382293444383677e-06,
      "loss": 0.0092,
      "step": 2618720
    },
    {
      "epoch": 4.285625445952227,
      "grad_norm": 0.8555408716201782,
      "learning_rate": 1.4381634522248509e-06,
      "loss": 0.0132,
      "step": 2618740
    },
    {
      "epoch": 4.28565817639088,
      "grad_norm": 0.5098962187767029,
      "learning_rate": 1.4380975600113336e-06,
      "loss": 0.0068,
      "step": 2618760
    },
    {
      "epoch": 4.285690906829533,
      "grad_norm": 0.09842903167009354,
      "learning_rate": 1.4380316677978166e-06,
      "loss": 0.0109,
      "step": 2618780
    },
    {
      "epoch": 4.285723637268187,
      "grad_norm": 0.09075689315795898,
      "learning_rate": 1.4379657755842993e-06,
      "loss": 0.0065,
      "step": 2618800
    },
    {
      "epoch": 4.28575636770684,
      "grad_norm": 0.0585191436111927,
      "learning_rate": 1.4378998833707822e-06,
      "loss": 0.0093,
      "step": 2618820
    },
    {
      "epoch": 4.285789098145493,
      "grad_norm": 0.284195214509964,
      "learning_rate": 1.437833991157265e-06,
      "loss": 0.0058,
      "step": 2618840
    },
    {
      "epoch": 4.2858218285841465,
      "grad_norm": 0.15569978952407837,
      "learning_rate": 1.437768098943748e-06,
      "loss": 0.0083,
      "step": 2618860
    },
    {
      "epoch": 4.2858545590228,
      "grad_norm": 0.43069562315940857,
      "learning_rate": 1.4377022067302307e-06,
      "loss": 0.0124,
      "step": 2618880
    },
    {
      "epoch": 4.285887289461454,
      "grad_norm": 0.09511999040842056,
      "learning_rate": 1.4376363145167138e-06,
      "loss": 0.0071,
      "step": 2618900
    },
    {
      "epoch": 4.285920019900106,
      "grad_norm": 0.2337442934513092,
      "learning_rate": 1.4375704223031966e-06,
      "loss": 0.0081,
      "step": 2618920
    },
    {
      "epoch": 4.28595275033876,
      "grad_norm": 0.3379322290420532,
      "learning_rate": 1.4375045300896795e-06,
      "loss": 0.0099,
      "step": 2618940
    },
    {
      "epoch": 4.285985480777414,
      "grad_norm": 0.4379580020904541,
      "learning_rate": 1.4374386378761623e-06,
      "loss": 0.0081,
      "step": 2618960
    },
    {
      "epoch": 4.286018211216067,
      "grad_norm": 0.28826719522476196,
      "learning_rate": 1.4373727456626452e-06,
      "loss": 0.0119,
      "step": 2618980
    },
    {
      "epoch": 4.28605094165472,
      "grad_norm": 0.21081021428108215,
      "learning_rate": 1.437306853449128e-06,
      "loss": 0.0112,
      "step": 2619000
    },
    {
      "epoch": 4.2860836720933735,
      "grad_norm": 0.07836813479661942,
      "learning_rate": 1.437240961235611e-06,
      "loss": 0.0056,
      "step": 2619020
    },
    {
      "epoch": 4.286116402532027,
      "grad_norm": 0.3082050681114197,
      "learning_rate": 1.4371750690220937e-06,
      "loss": 0.018,
      "step": 2619040
    },
    {
      "epoch": 4.28614913297068,
      "grad_norm": 0.6549033522605896,
      "learning_rate": 1.4371091768085766e-06,
      "loss": 0.0072,
      "step": 2619060
    },
    {
      "epoch": 4.286181863409333,
      "grad_norm": 0.1835658848285675,
      "learning_rate": 1.4370432845950596e-06,
      "loss": 0.0081,
      "step": 2619080
    },
    {
      "epoch": 4.286214593847987,
      "grad_norm": 0.19697213172912598,
      "learning_rate": 1.4369773923815425e-06,
      "loss": 0.0101,
      "step": 2619100
    },
    {
      "epoch": 4.28624732428664,
      "grad_norm": 0.11949849128723145,
      "learning_rate": 1.4369115001680252e-06,
      "loss": 0.0089,
      "step": 2619120
    },
    {
      "epoch": 4.286280054725293,
      "grad_norm": 0.2881557047367096,
      "learning_rate": 1.4368456079545082e-06,
      "loss": 0.0151,
      "step": 2619140
    },
    {
      "epoch": 4.286312785163947,
      "grad_norm": 0.2989611327648163,
      "learning_rate": 1.436779715740991e-06,
      "loss": 0.0074,
      "step": 2619160
    },
    {
      "epoch": 4.2863455156026005,
      "grad_norm": 0.336320698261261,
      "learning_rate": 1.4367138235274739e-06,
      "loss": 0.0074,
      "step": 2619180
    },
    {
      "epoch": 4.286378246041253,
      "grad_norm": 0.16806943714618683,
      "learning_rate": 1.4366479313139566e-06,
      "loss": 0.0076,
      "step": 2619200
    },
    {
      "epoch": 4.286410976479907,
      "grad_norm": 0.22250741720199585,
      "learning_rate": 1.4365820391004396e-06,
      "loss": 0.0073,
      "step": 2619220
    },
    {
      "epoch": 4.28644370691856,
      "grad_norm": 0.5417260527610779,
      "learning_rate": 1.4365161468869223e-06,
      "loss": 0.0075,
      "step": 2619240
    },
    {
      "epoch": 4.286476437357213,
      "grad_norm": 0.1801992952823639,
      "learning_rate": 1.4364502546734055e-06,
      "loss": 0.0096,
      "step": 2619260
    },
    {
      "epoch": 4.286509167795867,
      "grad_norm": 0.07976279407739639,
      "learning_rate": 1.4363843624598882e-06,
      "loss": 0.0126,
      "step": 2619280
    },
    {
      "epoch": 4.28654189823452,
      "grad_norm": 0.18270888924598694,
      "learning_rate": 1.4363184702463712e-06,
      "loss": 0.0107,
      "step": 2619300
    },
    {
      "epoch": 4.286574628673174,
      "grad_norm": 0.6405348777770996,
      "learning_rate": 1.436252578032854e-06,
      "loss": 0.011,
      "step": 2619320
    },
    {
      "epoch": 4.2866073591118266,
      "grad_norm": 0.39702609181404114,
      "learning_rate": 1.4361866858193369e-06,
      "loss": 0.0078,
      "step": 2619340
    },
    {
      "epoch": 4.28664008955048,
      "grad_norm": 0.18965856730937958,
      "learning_rate": 1.4361207936058196e-06,
      "loss": 0.0088,
      "step": 2619360
    },
    {
      "epoch": 4.286672819989134,
      "grad_norm": 0.17260350286960602,
      "learning_rate": 1.4360549013923026e-06,
      "loss": 0.0115,
      "step": 2619380
    },
    {
      "epoch": 4.2867055504277864,
      "grad_norm": 0.1446772813796997,
      "learning_rate": 1.4359890091787853e-06,
      "loss": 0.0106,
      "step": 2619400
    },
    {
      "epoch": 4.28673828086644,
      "grad_norm": 0.032087381929159164,
      "learning_rate": 1.4359231169652682e-06,
      "loss": 0.0121,
      "step": 2619420
    },
    {
      "epoch": 4.286771011305094,
      "grad_norm": 0.6146745681762695,
      "learning_rate": 1.4358572247517514e-06,
      "loss": 0.0128,
      "step": 2619440
    },
    {
      "epoch": 4.286803741743747,
      "grad_norm": 0.23311303555965424,
      "learning_rate": 1.4357913325382342e-06,
      "loss": 0.0086,
      "step": 2619460
    },
    {
      "epoch": 4.2868364721824,
      "grad_norm": 0.44438228011131287,
      "learning_rate": 1.435725440324717e-06,
      "loss": 0.0067,
      "step": 2619480
    },
    {
      "epoch": 4.2868692026210535,
      "grad_norm": 0.35191062092781067,
      "learning_rate": 1.4356595481111998e-06,
      "loss": 0.009,
      "step": 2619500
    },
    {
      "epoch": 4.286901933059707,
      "grad_norm": 0.30625268816947937,
      "learning_rate": 1.4355936558976826e-06,
      "loss": 0.0071,
      "step": 2619520
    },
    {
      "epoch": 4.28693466349836,
      "grad_norm": 0.518202006816864,
      "learning_rate": 1.4355277636841655e-06,
      "loss": 0.0125,
      "step": 2619540
    },
    {
      "epoch": 4.286967393937013,
      "grad_norm": 0.20786763727664948,
      "learning_rate": 1.4354618714706483e-06,
      "loss": 0.0076,
      "step": 2619560
    },
    {
      "epoch": 4.287000124375667,
      "grad_norm": 0.10937619209289551,
      "learning_rate": 1.4353959792571312e-06,
      "loss": 0.0084,
      "step": 2619580
    },
    {
      "epoch": 4.287032854814321,
      "grad_norm": 0.15797743201255798,
      "learning_rate": 1.435330087043614e-06,
      "loss": 0.0076,
      "step": 2619600
    },
    {
      "epoch": 4.287065585252973,
      "grad_norm": 0.14211437106132507,
      "learning_rate": 1.4352641948300971e-06,
      "loss": 0.0086,
      "step": 2619620
    },
    {
      "epoch": 4.287098315691627,
      "grad_norm": 0.19689947366714478,
      "learning_rate": 1.43519830261658e-06,
      "loss": 0.0116,
      "step": 2619640
    },
    {
      "epoch": 4.2871310461302805,
      "grad_norm": 0.34893620014190674,
      "learning_rate": 1.4351324104030628e-06,
      "loss": 0.0101,
      "step": 2619660
    },
    {
      "epoch": 4.287163776568933,
      "grad_norm": 0.16288556158542633,
      "learning_rate": 1.4350665181895458e-06,
      "loss": 0.0084,
      "step": 2619680
    },
    {
      "epoch": 4.287196507007587,
      "grad_norm": 0.4032246470451355,
      "learning_rate": 1.4350006259760285e-06,
      "loss": 0.0084,
      "step": 2619700
    },
    {
      "epoch": 4.28722923744624,
      "grad_norm": 0.4359998106956482,
      "learning_rate": 1.4349347337625113e-06,
      "loss": 0.0126,
      "step": 2619720
    },
    {
      "epoch": 4.287261967884894,
      "grad_norm": 0.056626997888088226,
      "learning_rate": 1.4348688415489942e-06,
      "loss": 0.0079,
      "step": 2619740
    },
    {
      "epoch": 4.287294698323547,
      "grad_norm": 0.7190895080566406,
      "learning_rate": 1.434802949335477e-06,
      "loss": 0.0111,
      "step": 2619760
    },
    {
      "epoch": 4.2873274287622,
      "grad_norm": 0.19175857305526733,
      "learning_rate": 1.43473705712196e-06,
      "loss": 0.0106,
      "step": 2619780
    },
    {
      "epoch": 4.287360159200854,
      "grad_norm": 0.08088439702987671,
      "learning_rate": 1.434671164908443e-06,
      "loss": 0.0062,
      "step": 2619800
    },
    {
      "epoch": 4.287392889639507,
      "grad_norm": 0.10829873383045197,
      "learning_rate": 1.4346052726949258e-06,
      "loss": 0.0099,
      "step": 2619820
    },
    {
      "epoch": 4.28742562007816,
      "grad_norm": 0.1063651591539383,
      "learning_rate": 1.4345393804814087e-06,
      "loss": 0.0085,
      "step": 2619840
    },
    {
      "epoch": 4.287458350516814,
      "grad_norm": 0.17180965840816498,
      "learning_rate": 1.4344734882678915e-06,
      "loss": 0.008,
      "step": 2619860
    },
    {
      "epoch": 4.287491080955467,
      "grad_norm": 0.9884955883026123,
      "learning_rate": 1.4344075960543744e-06,
      "loss": 0.0096,
      "step": 2619880
    },
    {
      "epoch": 4.28752381139412,
      "grad_norm": 0.4545135200023651,
      "learning_rate": 1.4343417038408572e-06,
      "loss": 0.0129,
      "step": 2619900
    },
    {
      "epoch": 4.287556541832774,
      "grad_norm": 0.23018404841423035,
      "learning_rate": 1.4342758116273401e-06,
      "loss": 0.0084,
      "step": 2619920
    },
    {
      "epoch": 4.287589272271427,
      "grad_norm": 0.3654709458351135,
      "learning_rate": 1.4342099194138229e-06,
      "loss": 0.0115,
      "step": 2619940
    },
    {
      "epoch": 4.28762200271008,
      "grad_norm": 0.2019844949245453,
      "learning_rate": 1.434144027200306e-06,
      "loss": 0.0044,
      "step": 2619960
    },
    {
      "epoch": 4.287654733148734,
      "grad_norm": 0.518639087677002,
      "learning_rate": 1.4340781349867888e-06,
      "loss": 0.008,
      "step": 2619980
    },
    {
      "epoch": 4.287687463587387,
      "grad_norm": 0.5234628915786743,
      "learning_rate": 1.4340122427732717e-06,
      "loss": 0.0116,
      "step": 2620000
    },
    {
      "epoch": 4.28772019402604,
      "grad_norm": 0.260925829410553,
      "learning_rate": 1.4339463505597545e-06,
      "loss": 0.0059,
      "step": 2620020
    },
    {
      "epoch": 4.2877529244646935,
      "grad_norm": 0.377754271030426,
      "learning_rate": 1.4338804583462374e-06,
      "loss": 0.0164,
      "step": 2620040
    },
    {
      "epoch": 4.287785654903347,
      "grad_norm": 0.3557788133621216,
      "learning_rate": 1.4338145661327202e-06,
      "loss": 0.0068,
      "step": 2620060
    },
    {
      "epoch": 4.287818385342001,
      "grad_norm": 0.11087044328451157,
      "learning_rate": 1.4337486739192031e-06,
      "loss": 0.0064,
      "step": 2620080
    },
    {
      "epoch": 4.287851115780653,
      "grad_norm": 0.2614234983921051,
      "learning_rate": 1.4336827817056858e-06,
      "loss": 0.0062,
      "step": 2620100
    },
    {
      "epoch": 4.287883846219307,
      "grad_norm": 0.06390871107578278,
      "learning_rate": 1.4336168894921688e-06,
      "loss": 0.0099,
      "step": 2620120
    },
    {
      "epoch": 4.2879165766579606,
      "grad_norm": 0.13553008437156677,
      "learning_rate": 1.4335509972786517e-06,
      "loss": 0.0071,
      "step": 2620140
    },
    {
      "epoch": 4.287949307096614,
      "grad_norm": 0.33526739478111267,
      "learning_rate": 1.4334851050651347e-06,
      "loss": 0.0078,
      "step": 2620160
    },
    {
      "epoch": 4.287982037535267,
      "grad_norm": 0.20830245316028595,
      "learning_rate": 1.4334192128516174e-06,
      "loss": 0.0087,
      "step": 2620180
    },
    {
      "epoch": 4.2880147679739204,
      "grad_norm": 0.12023237347602844,
      "learning_rate": 1.4333533206381004e-06,
      "loss": 0.0093,
      "step": 2620200
    },
    {
      "epoch": 4.288047498412574,
      "grad_norm": 0.10627567023038864,
      "learning_rate": 1.4332874284245831e-06,
      "loss": 0.0088,
      "step": 2620220
    },
    {
      "epoch": 4.288080228851227,
      "grad_norm": 0.17097805440425873,
      "learning_rate": 1.433221536211066e-06,
      "loss": 0.0072,
      "step": 2620240
    },
    {
      "epoch": 4.28811295928988,
      "grad_norm": 0.05492469295859337,
      "learning_rate": 1.4331556439975488e-06,
      "loss": 0.0081,
      "step": 2620260
    },
    {
      "epoch": 4.288145689728534,
      "grad_norm": 0.21096186339855194,
      "learning_rate": 1.4330897517840318e-06,
      "loss": 0.0128,
      "step": 2620280
    },
    {
      "epoch": 4.288178420167187,
      "grad_norm": 1.499115228652954,
      "learning_rate": 1.4330238595705145e-06,
      "loss": 0.0108,
      "step": 2620300
    },
    {
      "epoch": 4.28821115060584,
      "grad_norm": 0.15706205368041992,
      "learning_rate": 1.4329579673569977e-06,
      "loss": 0.0092,
      "step": 2620320
    },
    {
      "epoch": 4.288243881044494,
      "grad_norm": 0.23940196633338928,
      "learning_rate": 1.4328920751434804e-06,
      "loss": 0.0094,
      "step": 2620340
    },
    {
      "epoch": 4.288276611483147,
      "grad_norm": 0.43198123574256897,
      "learning_rate": 1.4328261829299634e-06,
      "loss": 0.0122,
      "step": 2620360
    },
    {
      "epoch": 4.2883093419218,
      "grad_norm": 0.16166920959949493,
      "learning_rate": 1.4327602907164461e-06,
      "loss": 0.0164,
      "step": 2620380
    },
    {
      "epoch": 4.288342072360454,
      "grad_norm": 0.4588073194026947,
      "learning_rate": 1.432694398502929e-06,
      "loss": 0.0133,
      "step": 2620400
    },
    {
      "epoch": 4.288374802799107,
      "grad_norm": 0.1699734479188919,
      "learning_rate": 1.4326285062894118e-06,
      "loss": 0.0085,
      "step": 2620420
    },
    {
      "epoch": 4.288407533237761,
      "grad_norm": 0.09924479573965073,
      "learning_rate": 1.4325626140758948e-06,
      "loss": 0.0057,
      "step": 2620440
    },
    {
      "epoch": 4.288440263676414,
      "grad_norm": 0.46043482422828674,
      "learning_rate": 1.4324967218623775e-06,
      "loss": 0.0123,
      "step": 2620460
    },
    {
      "epoch": 4.288472994115067,
      "grad_norm": 0.16793809831142426,
      "learning_rate": 1.4324308296488604e-06,
      "loss": 0.0056,
      "step": 2620480
    },
    {
      "epoch": 4.288505724553721,
      "grad_norm": 0.19281503558158875,
      "learning_rate": 1.4323649374353434e-06,
      "loss": 0.0103,
      "step": 2620500
    },
    {
      "epoch": 4.2885384549923735,
      "grad_norm": 0.39580556750297546,
      "learning_rate": 1.4322990452218263e-06,
      "loss": 0.0104,
      "step": 2620520
    },
    {
      "epoch": 4.288571185431027,
      "grad_norm": 0.5009986758232117,
      "learning_rate": 1.432233153008309e-06,
      "loss": 0.0089,
      "step": 2620540
    },
    {
      "epoch": 4.288603915869681,
      "grad_norm": 0.38103923201560974,
      "learning_rate": 1.432167260794792e-06,
      "loss": 0.0105,
      "step": 2620560
    },
    {
      "epoch": 4.288636646308333,
      "grad_norm": 0.6293689012527466,
      "learning_rate": 1.4321013685812748e-06,
      "loss": 0.0115,
      "step": 2620580
    },
    {
      "epoch": 4.288669376746987,
      "grad_norm": 0.23173725605010986,
      "learning_rate": 1.4320354763677577e-06,
      "loss": 0.0107,
      "step": 2620600
    },
    {
      "epoch": 4.288702107185641,
      "grad_norm": 0.25118717551231384,
      "learning_rate": 1.4319695841542405e-06,
      "loss": 0.0084,
      "step": 2620620
    },
    {
      "epoch": 4.288734837624294,
      "grad_norm": 0.040039513260126114,
      "learning_rate": 1.4319036919407234e-06,
      "loss": 0.0086,
      "step": 2620640
    },
    {
      "epoch": 4.288767568062947,
      "grad_norm": 0.11249709129333496,
      "learning_rate": 1.4318377997272066e-06,
      "loss": 0.0069,
      "step": 2620660
    },
    {
      "epoch": 4.2888002985016005,
      "grad_norm": 0.10094226896762848,
      "learning_rate": 1.4317719075136893e-06,
      "loss": 0.009,
      "step": 2620680
    },
    {
      "epoch": 4.288833028940254,
      "grad_norm": 0.4321695566177368,
      "learning_rate": 1.4317060153001723e-06,
      "loss": 0.006,
      "step": 2620700
    },
    {
      "epoch": 4.288865759378907,
      "grad_norm": 0.11015329509973526,
      "learning_rate": 1.431640123086655e-06,
      "loss": 0.0069,
      "step": 2620720
    },
    {
      "epoch": 4.28889848981756,
      "grad_norm": 0.322460412979126,
      "learning_rate": 1.4315742308731378e-06,
      "loss": 0.0083,
      "step": 2620740
    },
    {
      "epoch": 4.288931220256214,
      "grad_norm": 0.23007304966449738,
      "learning_rate": 1.4315083386596207e-06,
      "loss": 0.006,
      "step": 2620760
    },
    {
      "epoch": 4.288963950694868,
      "grad_norm": 0.28473615646362305,
      "learning_rate": 1.4314424464461034e-06,
      "loss": 0.0065,
      "step": 2620780
    },
    {
      "epoch": 4.28899668113352,
      "grad_norm": 0.08517942577600479,
      "learning_rate": 1.4313765542325864e-06,
      "loss": 0.0135,
      "step": 2620800
    },
    {
      "epoch": 4.289029411572174,
      "grad_norm": 0.40227973461151123,
      "learning_rate": 1.4313106620190691e-06,
      "loss": 0.0102,
      "step": 2620820
    },
    {
      "epoch": 4.2890621420108275,
      "grad_norm": 0.15397502481937408,
      "learning_rate": 1.4312447698055523e-06,
      "loss": 0.0065,
      "step": 2620840
    },
    {
      "epoch": 4.28909487244948,
      "grad_norm": 0.2107899934053421,
      "learning_rate": 1.4311788775920353e-06,
      "loss": 0.0071,
      "step": 2620860
    },
    {
      "epoch": 4.289127602888134,
      "grad_norm": 0.28472575545310974,
      "learning_rate": 1.431112985378518e-06,
      "loss": 0.0087,
      "step": 2620880
    },
    {
      "epoch": 4.289160333326787,
      "grad_norm": 0.19049787521362305,
      "learning_rate": 1.431047093165001e-06,
      "loss": 0.0174,
      "step": 2620900
    },
    {
      "epoch": 4.289193063765441,
      "grad_norm": 0.5166947245597839,
      "learning_rate": 1.4309812009514837e-06,
      "loss": 0.0095,
      "step": 2620920
    },
    {
      "epoch": 4.289225794204094,
      "grad_norm": 0.1974065899848938,
      "learning_rate": 1.4309153087379664e-06,
      "loss": 0.0115,
      "step": 2620940
    },
    {
      "epoch": 4.289258524642747,
      "grad_norm": 0.41729462146759033,
      "learning_rate": 1.4308494165244494e-06,
      "loss": 0.0092,
      "step": 2620960
    },
    {
      "epoch": 4.289291255081401,
      "grad_norm": 0.14090900123119354,
      "learning_rate": 1.4307835243109321e-06,
      "loss": 0.0112,
      "step": 2620980
    },
    {
      "epoch": 4.289323985520054,
      "grad_norm": 0.12243400514125824,
      "learning_rate": 1.430717632097415e-06,
      "loss": 0.0091,
      "step": 2621000
    },
    {
      "epoch": 4.289356715958707,
      "grad_norm": 0.22010482847690582,
      "learning_rate": 1.4306517398838982e-06,
      "loss": 0.0093,
      "step": 2621020
    },
    {
      "epoch": 4.289389446397361,
      "grad_norm": 0.1238744929432869,
      "learning_rate": 1.430585847670381e-06,
      "loss": 0.0056,
      "step": 2621040
    },
    {
      "epoch": 4.289422176836014,
      "grad_norm": 0.25966253876686096,
      "learning_rate": 1.430519955456864e-06,
      "loss": 0.006,
      "step": 2621060
    },
    {
      "epoch": 4.289454907274667,
      "grad_norm": 0.08884765952825546,
      "learning_rate": 1.4304540632433467e-06,
      "loss": 0.0142,
      "step": 2621080
    },
    {
      "epoch": 4.289487637713321,
      "grad_norm": 0.5399922728538513,
      "learning_rate": 1.4303881710298296e-06,
      "loss": 0.0124,
      "step": 2621100
    },
    {
      "epoch": 4.289520368151974,
      "grad_norm": 0.40419432520866394,
      "learning_rate": 1.4303222788163123e-06,
      "loss": 0.0104,
      "step": 2621120
    },
    {
      "epoch": 4.289553098590627,
      "grad_norm": 0.18977011740207672,
      "learning_rate": 1.4302563866027953e-06,
      "loss": 0.0084,
      "step": 2621140
    },
    {
      "epoch": 4.2895858290292805,
      "grad_norm": 0.184505432844162,
      "learning_rate": 1.430190494389278e-06,
      "loss": 0.0094,
      "step": 2621160
    },
    {
      "epoch": 4.289618559467934,
      "grad_norm": 2.751375913619995,
      "learning_rate": 1.4301246021757608e-06,
      "loss": 0.0076,
      "step": 2621180
    },
    {
      "epoch": 4.289651289906588,
      "grad_norm": 0.3596791625022888,
      "learning_rate": 1.430058709962244e-06,
      "loss": 0.0084,
      "step": 2621200
    },
    {
      "epoch": 4.28968402034524,
      "grad_norm": 0.29501089453697205,
      "learning_rate": 1.429992817748727e-06,
      "loss": 0.008,
      "step": 2621220
    },
    {
      "epoch": 4.289716750783894,
      "grad_norm": 0.2895287275314331,
      "learning_rate": 1.4299269255352096e-06,
      "loss": 0.0092,
      "step": 2621240
    },
    {
      "epoch": 4.289749481222548,
      "grad_norm": 0.19633428752422333,
      "learning_rate": 1.4298610333216926e-06,
      "loss": 0.0078,
      "step": 2621260
    },
    {
      "epoch": 4.2897822116612,
      "grad_norm": 0.32838863134384155,
      "learning_rate": 1.4297951411081753e-06,
      "loss": 0.0118,
      "step": 2621280
    },
    {
      "epoch": 4.289814942099854,
      "grad_norm": 0.2845717668533325,
      "learning_rate": 1.4297292488946583e-06,
      "loss": 0.015,
      "step": 2621300
    },
    {
      "epoch": 4.2898476725385075,
      "grad_norm": 0.1243610829114914,
      "learning_rate": 1.429663356681141e-06,
      "loss": 0.0096,
      "step": 2621320
    },
    {
      "epoch": 4.289880402977161,
      "grad_norm": 0.3166123032569885,
      "learning_rate": 1.429597464467624e-06,
      "loss": 0.01,
      "step": 2621340
    },
    {
      "epoch": 4.289913133415814,
      "grad_norm": 0.4305533468723297,
      "learning_rate": 1.4295315722541067e-06,
      "loss": 0.0085,
      "step": 2621360
    },
    {
      "epoch": 4.289945863854467,
      "grad_norm": 0.38467302918434143,
      "learning_rate": 1.4294656800405899e-06,
      "loss": 0.0072,
      "step": 2621380
    },
    {
      "epoch": 4.289978594293121,
      "grad_norm": 0.16505354642868042,
      "learning_rate": 1.4293997878270726e-06,
      "loss": 0.0068,
      "step": 2621400
    },
    {
      "epoch": 4.290011324731774,
      "grad_norm": 0.13608212769031525,
      "learning_rate": 1.4293338956135556e-06,
      "loss": 0.0083,
      "step": 2621420
    },
    {
      "epoch": 4.290044055170427,
      "grad_norm": 0.49022966623306274,
      "learning_rate": 1.4292680034000383e-06,
      "loss": 0.0129,
      "step": 2621440
    },
    {
      "epoch": 4.290076785609081,
      "grad_norm": 0.2945195436477661,
      "learning_rate": 1.4292021111865213e-06,
      "loss": 0.0163,
      "step": 2621460
    },
    {
      "epoch": 4.290109516047734,
      "grad_norm": 0.11054132878780365,
      "learning_rate": 1.429136218973004e-06,
      "loss": 0.005,
      "step": 2621480
    },
    {
      "epoch": 4.290142246486387,
      "grad_norm": 1.1664031744003296,
      "learning_rate": 1.429070326759487e-06,
      "loss": 0.0136,
      "step": 2621500
    },
    {
      "epoch": 4.290174976925041,
      "grad_norm": 0.1669846624135971,
      "learning_rate": 1.4290044345459697e-06,
      "loss": 0.0056,
      "step": 2621520
    },
    {
      "epoch": 4.290207707363694,
      "grad_norm": 0.1347692757844925,
      "learning_rate": 1.4289385423324528e-06,
      "loss": 0.0095,
      "step": 2621540
    },
    {
      "epoch": 4.290240437802347,
      "grad_norm": 0.3134009838104248,
      "learning_rate": 1.4288726501189356e-06,
      "loss": 0.0094,
      "step": 2621560
    },
    {
      "epoch": 4.290273168241001,
      "grad_norm": 0.7964878678321838,
      "learning_rate": 1.4288067579054185e-06,
      "loss": 0.0171,
      "step": 2621580
    },
    {
      "epoch": 4.290305898679654,
      "grad_norm": 1.2855684757232666,
      "learning_rate": 1.4287408656919013e-06,
      "loss": 0.0086,
      "step": 2621600
    },
    {
      "epoch": 4.290338629118308,
      "grad_norm": 0.10695696622133255,
      "learning_rate": 1.4286749734783842e-06,
      "loss": 0.0106,
      "step": 2621620
    },
    {
      "epoch": 4.290371359556961,
      "grad_norm": 0.2616387903690338,
      "learning_rate": 1.428609081264867e-06,
      "loss": 0.0115,
      "step": 2621640
    },
    {
      "epoch": 4.290404089995614,
      "grad_norm": 0.03523683920502663,
      "learning_rate": 1.42854318905135e-06,
      "loss": 0.0095,
      "step": 2621660
    },
    {
      "epoch": 4.290436820434268,
      "grad_norm": 0.31758689880371094,
      "learning_rate": 1.4284772968378327e-06,
      "loss": 0.0064,
      "step": 2621680
    },
    {
      "epoch": 4.2904695508729205,
      "grad_norm": 0.4466569423675537,
      "learning_rate": 1.4284114046243156e-06,
      "loss": 0.0087,
      "step": 2621700
    },
    {
      "epoch": 4.290502281311574,
      "grad_norm": 0.6352417469024658,
      "learning_rate": 1.4283455124107986e-06,
      "loss": 0.0125,
      "step": 2621720
    },
    {
      "epoch": 4.290535011750228,
      "grad_norm": 0.11703075468540192,
      "learning_rate": 1.4282796201972815e-06,
      "loss": 0.0144,
      "step": 2621740
    },
    {
      "epoch": 4.29056774218888,
      "grad_norm": 0.2190723419189453,
      "learning_rate": 1.4282137279837643e-06,
      "loss": 0.0094,
      "step": 2621760
    },
    {
      "epoch": 4.290600472627534,
      "grad_norm": 0.10461422801017761,
      "learning_rate": 1.4281478357702472e-06,
      "loss": 0.0107,
      "step": 2621780
    },
    {
      "epoch": 4.290633203066188,
      "grad_norm": 0.9482490420341492,
      "learning_rate": 1.42808194355673e-06,
      "loss": 0.0087,
      "step": 2621800
    },
    {
      "epoch": 4.290665933504841,
      "grad_norm": 0.25835010409355164,
      "learning_rate": 1.428016051343213e-06,
      "loss": 0.0065,
      "step": 2621820
    },
    {
      "epoch": 4.290698663943494,
      "grad_norm": 0.38209304213523865,
      "learning_rate": 1.4279501591296956e-06,
      "loss": 0.0106,
      "step": 2621840
    },
    {
      "epoch": 4.2907313943821475,
      "grad_norm": 0.34473639726638794,
      "learning_rate": 1.4278842669161786e-06,
      "loss": 0.0075,
      "step": 2621860
    },
    {
      "epoch": 4.290764124820801,
      "grad_norm": 0.19407068192958832,
      "learning_rate": 1.4278183747026613e-06,
      "loss": 0.0121,
      "step": 2621880
    },
    {
      "epoch": 4.290796855259455,
      "grad_norm": 2.3627700805664062,
      "learning_rate": 1.4277524824891445e-06,
      "loss": 0.0076,
      "step": 2621900
    },
    {
      "epoch": 4.290829585698107,
      "grad_norm": 0.5271826982498169,
      "learning_rate": 1.4276865902756272e-06,
      "loss": 0.0058,
      "step": 2621920
    },
    {
      "epoch": 4.290862316136761,
      "grad_norm": 0.7312009930610657,
      "learning_rate": 1.4276206980621102e-06,
      "loss": 0.0117,
      "step": 2621940
    },
    {
      "epoch": 4.2908950465754145,
      "grad_norm": 0.09194231033325195,
      "learning_rate": 1.427554805848593e-06,
      "loss": 0.0086,
      "step": 2621960
    },
    {
      "epoch": 4.290927777014067,
      "grad_norm": 0.13378803431987762,
      "learning_rate": 1.4274889136350759e-06,
      "loss": 0.0089,
      "step": 2621980
    },
    {
      "epoch": 4.290960507452721,
      "grad_norm": 0.2798287868499756,
      "learning_rate": 1.4274230214215586e-06,
      "loss": 0.0108,
      "step": 2622000
    },
    {
      "epoch": 4.290993237891374,
      "grad_norm": 0.14399375021457672,
      "learning_rate": 1.4273571292080416e-06,
      "loss": 0.0082,
      "step": 2622020
    },
    {
      "epoch": 4.291025968330027,
      "grad_norm": 0.041850414127111435,
      "learning_rate": 1.4272912369945243e-06,
      "loss": 0.0136,
      "step": 2622040
    },
    {
      "epoch": 4.291058698768681,
      "grad_norm": 0.13759659230709076,
      "learning_rate": 1.4272253447810073e-06,
      "loss": 0.0083,
      "step": 2622060
    },
    {
      "epoch": 4.291091429207334,
      "grad_norm": 0.15717215836048126,
      "learning_rate": 1.4271594525674904e-06,
      "loss": 0.0082,
      "step": 2622080
    },
    {
      "epoch": 4.291124159645988,
      "grad_norm": 0.2918180525302887,
      "learning_rate": 1.4270935603539732e-06,
      "loss": 0.0136,
      "step": 2622100
    },
    {
      "epoch": 4.291156890084641,
      "grad_norm": 0.24906115233898163,
      "learning_rate": 1.4270276681404561e-06,
      "loss": 0.0079,
      "step": 2622120
    },
    {
      "epoch": 4.291189620523294,
      "grad_norm": 0.4449140727519989,
      "learning_rate": 1.4269617759269389e-06,
      "loss": 0.0111,
      "step": 2622140
    },
    {
      "epoch": 4.291222350961948,
      "grad_norm": 0.16765137016773224,
      "learning_rate": 1.4268958837134216e-06,
      "loss": 0.0089,
      "step": 2622160
    },
    {
      "epoch": 4.2912550814006005,
      "grad_norm": 0.2259238064289093,
      "learning_rate": 1.4268299914999045e-06,
      "loss": 0.011,
      "step": 2622180
    },
    {
      "epoch": 4.291287811839254,
      "grad_norm": 0.15653926134109497,
      "learning_rate": 1.4267640992863873e-06,
      "loss": 0.0074,
      "step": 2622200
    },
    {
      "epoch": 4.291320542277908,
      "grad_norm": 0.2551618814468384,
      "learning_rate": 1.4266982070728702e-06,
      "loss": 0.0129,
      "step": 2622220
    },
    {
      "epoch": 4.291353272716561,
      "grad_norm": 0.177446648478508,
      "learning_rate": 1.426632314859353e-06,
      "loss": 0.0075,
      "step": 2622240
    },
    {
      "epoch": 4.291386003155214,
      "grad_norm": 0.2774340510368347,
      "learning_rate": 1.4265664226458361e-06,
      "loss": 0.0112,
      "step": 2622260
    },
    {
      "epoch": 4.291418733593868,
      "grad_norm": 0.18668626248836517,
      "learning_rate": 1.426500530432319e-06,
      "loss": 0.0082,
      "step": 2622280
    },
    {
      "epoch": 4.291451464032521,
      "grad_norm": 0.1944916993379593,
      "learning_rate": 1.4264346382188018e-06,
      "loss": 0.0074,
      "step": 2622300
    },
    {
      "epoch": 4.291484194471174,
      "grad_norm": 0.19681575894355774,
      "learning_rate": 1.4263687460052848e-06,
      "loss": 0.0068,
      "step": 2622320
    },
    {
      "epoch": 4.2915169249098275,
      "grad_norm": 0.19213443994522095,
      "learning_rate": 1.4263028537917675e-06,
      "loss": 0.0067,
      "step": 2622340
    },
    {
      "epoch": 4.291549655348481,
      "grad_norm": 0.19733601808547974,
      "learning_rate": 1.4262369615782505e-06,
      "loss": 0.014,
      "step": 2622360
    },
    {
      "epoch": 4.291582385787135,
      "grad_norm": 0.1747843623161316,
      "learning_rate": 1.4261710693647332e-06,
      "loss": 0.0076,
      "step": 2622380
    },
    {
      "epoch": 4.291615116225787,
      "grad_norm": 0.8775355219841003,
      "learning_rate": 1.426105177151216e-06,
      "loss": 0.0111,
      "step": 2622400
    },
    {
      "epoch": 4.291647846664441,
      "grad_norm": 0.16754598915576935,
      "learning_rate": 1.4260392849376991e-06,
      "loss": 0.0087,
      "step": 2622420
    },
    {
      "epoch": 4.291680577103095,
      "grad_norm": 0.10200139880180359,
      "learning_rate": 1.425973392724182e-06,
      "loss": 0.0048,
      "step": 2622440
    },
    {
      "epoch": 4.291713307541747,
      "grad_norm": 0.1973116397857666,
      "learning_rate": 1.4259075005106648e-06,
      "loss": 0.0088,
      "step": 2622460
    },
    {
      "epoch": 4.291746037980401,
      "grad_norm": 0.3983471691608429,
      "learning_rate": 1.4258416082971478e-06,
      "loss": 0.0104,
      "step": 2622480
    },
    {
      "epoch": 4.2917787684190545,
      "grad_norm": 0.17729373276233673,
      "learning_rate": 1.4257757160836305e-06,
      "loss": 0.007,
      "step": 2622500
    },
    {
      "epoch": 4.291811498857708,
      "grad_norm": 0.2708541452884674,
      "learning_rate": 1.4257098238701134e-06,
      "loss": 0.0076,
      "step": 2622520
    },
    {
      "epoch": 4.291844229296361,
      "grad_norm": 0.1824415773153305,
      "learning_rate": 1.4256439316565962e-06,
      "loss": 0.0063,
      "step": 2622540
    },
    {
      "epoch": 4.291876959735014,
      "grad_norm": 0.17178265750408173,
      "learning_rate": 1.4255780394430791e-06,
      "loss": 0.0134,
      "step": 2622560
    },
    {
      "epoch": 4.291909690173668,
      "grad_norm": 0.138187438249588,
      "learning_rate": 1.4255121472295619e-06,
      "loss": 0.0116,
      "step": 2622580
    },
    {
      "epoch": 4.291942420612321,
      "grad_norm": 0.5108861327171326,
      "learning_rate": 1.425446255016045e-06,
      "loss": 0.0127,
      "step": 2622600
    },
    {
      "epoch": 4.291975151050974,
      "grad_norm": 0.5534672737121582,
      "learning_rate": 1.4253803628025278e-06,
      "loss": 0.0091,
      "step": 2622620
    },
    {
      "epoch": 4.292007881489628,
      "grad_norm": 0.2623455226421356,
      "learning_rate": 1.4253144705890107e-06,
      "loss": 0.0139,
      "step": 2622640
    },
    {
      "epoch": 4.2920406119282815,
      "grad_norm": 0.1140926405787468,
      "learning_rate": 1.4252485783754935e-06,
      "loss": 0.0069,
      "step": 2622660
    },
    {
      "epoch": 4.292073342366934,
      "grad_norm": 0.6649526953697205,
      "learning_rate": 1.4251826861619764e-06,
      "loss": 0.008,
      "step": 2622680
    },
    {
      "epoch": 4.292106072805588,
      "grad_norm": 0.12243839353322983,
      "learning_rate": 1.4251167939484592e-06,
      "loss": 0.0106,
      "step": 2622700
    },
    {
      "epoch": 4.292138803244241,
      "grad_norm": 0.4366234242916107,
      "learning_rate": 1.4250509017349421e-06,
      "loss": 0.0073,
      "step": 2622720
    },
    {
      "epoch": 4.292171533682894,
      "grad_norm": 0.22948133945465088,
      "learning_rate": 1.4249850095214249e-06,
      "loss": 0.0105,
      "step": 2622740
    },
    {
      "epoch": 4.292204264121548,
      "grad_norm": 0.225971519947052,
      "learning_rate": 1.4249191173079078e-06,
      "loss": 0.0061,
      "step": 2622760
    },
    {
      "epoch": 4.292236994560201,
      "grad_norm": 0.47691792249679565,
      "learning_rate": 1.4248532250943908e-06,
      "loss": 0.0156,
      "step": 2622780
    },
    {
      "epoch": 4.292269724998855,
      "grad_norm": 0.12484484165906906,
      "learning_rate": 1.4247873328808737e-06,
      "loss": 0.009,
      "step": 2622800
    },
    {
      "epoch": 4.2923024554375075,
      "grad_norm": 0.3787398040294647,
      "learning_rate": 1.4247214406673565e-06,
      "loss": 0.0081,
      "step": 2622820
    },
    {
      "epoch": 4.292335185876161,
      "grad_norm": 0.24383047223091125,
      "learning_rate": 1.4246555484538394e-06,
      "loss": 0.0088,
      "step": 2622840
    },
    {
      "epoch": 4.292367916314815,
      "grad_norm": 0.6229495406150818,
      "learning_rate": 1.4245896562403221e-06,
      "loss": 0.0099,
      "step": 2622860
    },
    {
      "epoch": 4.292400646753467,
      "grad_norm": 0.8295838236808777,
      "learning_rate": 1.424523764026805e-06,
      "loss": 0.0111,
      "step": 2622880
    },
    {
      "epoch": 4.292433377192121,
      "grad_norm": 0.08182569593191147,
      "learning_rate": 1.4244578718132878e-06,
      "loss": 0.0147,
      "step": 2622900
    },
    {
      "epoch": 4.292466107630775,
      "grad_norm": 0.3126528263092041,
      "learning_rate": 1.4243919795997708e-06,
      "loss": 0.0056,
      "step": 2622920
    },
    {
      "epoch": 4.292498838069427,
      "grad_norm": 0.2947416603565216,
      "learning_rate": 1.4243260873862535e-06,
      "loss": 0.0137,
      "step": 2622940
    },
    {
      "epoch": 4.292531568508081,
      "grad_norm": 0.38416311144828796,
      "learning_rate": 1.4242601951727367e-06,
      "loss": 0.0107,
      "step": 2622960
    },
    {
      "epoch": 4.2925642989467345,
      "grad_norm": 0.12982498109340668,
      "learning_rate": 1.4241943029592194e-06,
      "loss": 0.0152,
      "step": 2622980
    },
    {
      "epoch": 4.292597029385388,
      "grad_norm": 0.1948888897895813,
      "learning_rate": 1.4241284107457024e-06,
      "loss": 0.0098,
      "step": 2623000
    },
    {
      "epoch": 4.292629759824041,
      "grad_norm": 0.10557042807340622,
      "learning_rate": 1.4240625185321851e-06,
      "loss": 0.0129,
      "step": 2623020
    },
    {
      "epoch": 4.292662490262694,
      "grad_norm": 0.07973618060350418,
      "learning_rate": 1.423996626318668e-06,
      "loss": 0.0089,
      "step": 2623040
    },
    {
      "epoch": 4.292695220701348,
      "grad_norm": 0.23101820051670074,
      "learning_rate": 1.4239307341051508e-06,
      "loss": 0.0117,
      "step": 2623060
    },
    {
      "epoch": 4.292727951140002,
      "grad_norm": 0.09596505761146545,
      "learning_rate": 1.4238648418916338e-06,
      "loss": 0.0093,
      "step": 2623080
    },
    {
      "epoch": 4.292760681578654,
      "grad_norm": 0.16958172619342804,
      "learning_rate": 1.4237989496781165e-06,
      "loss": 0.0089,
      "step": 2623100
    },
    {
      "epoch": 4.292793412017308,
      "grad_norm": 0.3150230348110199,
      "learning_rate": 1.4237330574645995e-06,
      "loss": 0.0069,
      "step": 2623120
    },
    {
      "epoch": 4.2928261424559615,
      "grad_norm": 0.2021370679140091,
      "learning_rate": 1.4236671652510824e-06,
      "loss": 0.0073,
      "step": 2623140
    },
    {
      "epoch": 4.292858872894614,
      "grad_norm": 0.09790292382240295,
      "learning_rate": 1.4236012730375654e-06,
      "loss": 0.0162,
      "step": 2623160
    },
    {
      "epoch": 4.292891603333268,
      "grad_norm": 0.33149129152297974,
      "learning_rate": 1.423535380824048e-06,
      "loss": 0.0058,
      "step": 2623180
    },
    {
      "epoch": 4.292924333771921,
      "grad_norm": 0.1598992496728897,
      "learning_rate": 1.423469488610531e-06,
      "loss": 0.0103,
      "step": 2623200
    },
    {
      "epoch": 4.292957064210574,
      "grad_norm": 0.4145726263523102,
      "learning_rate": 1.4234035963970138e-06,
      "loss": 0.0071,
      "step": 2623220
    },
    {
      "epoch": 4.292989794649228,
      "grad_norm": 0.3397746980190277,
      "learning_rate": 1.4233377041834967e-06,
      "loss": 0.0123,
      "step": 2623240
    },
    {
      "epoch": 4.293022525087881,
      "grad_norm": 0.2667883038520813,
      "learning_rate": 1.4232718119699795e-06,
      "loss": 0.0083,
      "step": 2623260
    },
    {
      "epoch": 4.293055255526535,
      "grad_norm": 0.36598047614097595,
      "learning_rate": 1.4232059197564624e-06,
      "loss": 0.0075,
      "step": 2623280
    },
    {
      "epoch": 4.293087985965188,
      "grad_norm": 0.059214696288108826,
      "learning_rate": 1.4231400275429456e-06,
      "loss": 0.0102,
      "step": 2623300
    },
    {
      "epoch": 4.293120716403841,
      "grad_norm": 0.08592381328344345,
      "learning_rate": 1.4230741353294283e-06,
      "loss": 0.0073,
      "step": 2623320
    },
    {
      "epoch": 4.293153446842495,
      "grad_norm": 0.40664759278297424,
      "learning_rate": 1.4230082431159113e-06,
      "loss": 0.0089,
      "step": 2623340
    },
    {
      "epoch": 4.293186177281148,
      "grad_norm": 0.22556659579277039,
      "learning_rate": 1.422942350902394e-06,
      "loss": 0.0115,
      "step": 2623360
    },
    {
      "epoch": 4.293218907719801,
      "grad_norm": 0.23775529861450195,
      "learning_rate": 1.4228764586888768e-06,
      "loss": 0.0095,
      "step": 2623380
    },
    {
      "epoch": 4.293251638158455,
      "grad_norm": 0.3987143337726593,
      "learning_rate": 1.4228105664753597e-06,
      "loss": 0.0146,
      "step": 2623400
    },
    {
      "epoch": 4.293284368597108,
      "grad_norm": 0.024928461760282516,
      "learning_rate": 1.4227446742618425e-06,
      "loss": 0.0069,
      "step": 2623420
    },
    {
      "epoch": 4.293317099035761,
      "grad_norm": 0.48743966221809387,
      "learning_rate": 1.4226787820483254e-06,
      "loss": 0.0078,
      "step": 2623440
    },
    {
      "epoch": 4.293349829474415,
      "grad_norm": 0.32907891273498535,
      "learning_rate": 1.4226128898348081e-06,
      "loss": 0.006,
      "step": 2623460
    },
    {
      "epoch": 4.293382559913068,
      "grad_norm": 0.4065207540988922,
      "learning_rate": 1.4225469976212913e-06,
      "loss": 0.0078,
      "step": 2623480
    },
    {
      "epoch": 4.293415290351721,
      "grad_norm": 0.49815890192985535,
      "learning_rate": 1.4224811054077743e-06,
      "loss": 0.0079,
      "step": 2623500
    },
    {
      "epoch": 4.2934480207903745,
      "grad_norm": 0.1908789575099945,
      "learning_rate": 1.422415213194257e-06,
      "loss": 0.0093,
      "step": 2623520
    },
    {
      "epoch": 4.293480751229028,
      "grad_norm": 0.3489437401294708,
      "learning_rate": 1.42234932098074e-06,
      "loss": 0.0079,
      "step": 2623540
    },
    {
      "epoch": 4.293513481667682,
      "grad_norm": 0.12205337733030319,
      "learning_rate": 1.4222834287672227e-06,
      "loss": 0.0054,
      "step": 2623560
    },
    {
      "epoch": 4.293546212106334,
      "grad_norm": 0.5163812041282654,
      "learning_rate": 1.4222175365537054e-06,
      "loss": 0.0076,
      "step": 2623580
    },
    {
      "epoch": 4.293578942544988,
      "grad_norm": 0.24147562682628632,
      "learning_rate": 1.4221516443401884e-06,
      "loss": 0.011,
      "step": 2623600
    },
    {
      "epoch": 4.2936116729836415,
      "grad_norm": 0.25341933965682983,
      "learning_rate": 1.4220857521266711e-06,
      "loss": 0.0103,
      "step": 2623620
    },
    {
      "epoch": 4.293644403422294,
      "grad_norm": 0.23383784294128418,
      "learning_rate": 1.422019859913154e-06,
      "loss": 0.0121,
      "step": 2623640
    },
    {
      "epoch": 4.293677133860948,
      "grad_norm": 0.0658140555024147,
      "learning_rate": 1.4219539676996372e-06,
      "loss": 0.0088,
      "step": 2623660
    },
    {
      "epoch": 4.293709864299601,
      "grad_norm": 0.15681391954421997,
      "learning_rate": 1.42188807548612e-06,
      "loss": 0.0148,
      "step": 2623680
    },
    {
      "epoch": 4.293742594738255,
      "grad_norm": 0.3242960572242737,
      "learning_rate": 1.421822183272603e-06,
      "loss": 0.0095,
      "step": 2623700
    },
    {
      "epoch": 4.293775325176908,
      "grad_norm": 0.10445846617221832,
      "learning_rate": 1.4217562910590857e-06,
      "loss": 0.0111,
      "step": 2623720
    },
    {
      "epoch": 4.293808055615561,
      "grad_norm": 0.15689301490783691,
      "learning_rate": 1.4216903988455686e-06,
      "loss": 0.0076,
      "step": 2623740
    },
    {
      "epoch": 4.293840786054215,
      "grad_norm": 0.328196257352829,
      "learning_rate": 1.4216245066320514e-06,
      "loss": 0.0108,
      "step": 2623760
    },
    {
      "epoch": 4.293873516492868,
      "grad_norm": 0.11733490973711014,
      "learning_rate": 1.4215586144185343e-06,
      "loss": 0.0067,
      "step": 2623780
    },
    {
      "epoch": 4.293906246931521,
      "grad_norm": 0.20325379073619843,
      "learning_rate": 1.421492722205017e-06,
      "loss": 0.0109,
      "step": 2623800
    },
    {
      "epoch": 4.293938977370175,
      "grad_norm": 0.1772395223379135,
      "learning_rate": 1.4214268299914998e-06,
      "loss": 0.0081,
      "step": 2623820
    },
    {
      "epoch": 4.293971707808828,
      "grad_norm": 0.35503077507019043,
      "learning_rate": 1.421360937777983e-06,
      "loss": 0.0085,
      "step": 2623840
    },
    {
      "epoch": 4.294004438247481,
      "grad_norm": 0.2608054280281067,
      "learning_rate": 1.421295045564466e-06,
      "loss": 0.0082,
      "step": 2623860
    },
    {
      "epoch": 4.294037168686135,
      "grad_norm": 0.052218131721019745,
      "learning_rate": 1.4212291533509486e-06,
      "loss": 0.011,
      "step": 2623880
    },
    {
      "epoch": 4.294069899124788,
      "grad_norm": 0.19731907546520233,
      "learning_rate": 1.4211632611374316e-06,
      "loss": 0.0079,
      "step": 2623900
    },
    {
      "epoch": 4.294102629563441,
      "grad_norm": 0.24929742515087128,
      "learning_rate": 1.4210973689239143e-06,
      "loss": 0.0111,
      "step": 2623920
    },
    {
      "epoch": 4.294135360002095,
      "grad_norm": 0.26169711351394653,
      "learning_rate": 1.4210314767103973e-06,
      "loss": 0.0058,
      "step": 2623940
    },
    {
      "epoch": 4.294168090440748,
      "grad_norm": 0.3666750490665436,
      "learning_rate": 1.42096558449688e-06,
      "loss": 0.009,
      "step": 2623960
    },
    {
      "epoch": 4.294200820879402,
      "grad_norm": 0.07766184210777283,
      "learning_rate": 1.420899692283363e-06,
      "loss": 0.0098,
      "step": 2623980
    },
    {
      "epoch": 4.2942335513180545,
      "grad_norm": 0.17333360016345978,
      "learning_rate": 1.4208338000698457e-06,
      "loss": 0.0051,
      "step": 2624000
    },
    {
      "epoch": 4.294266281756708,
      "grad_norm": 0.4048529267311096,
      "learning_rate": 1.4207679078563289e-06,
      "loss": 0.008,
      "step": 2624020
    },
    {
      "epoch": 4.294299012195362,
      "grad_norm": 0.09186400473117828,
      "learning_rate": 1.4207020156428116e-06,
      "loss": 0.0126,
      "step": 2624040
    },
    {
      "epoch": 4.294331742634014,
      "grad_norm": 0.06019987165927887,
      "learning_rate": 1.4206361234292946e-06,
      "loss": 0.0049,
      "step": 2624060
    },
    {
      "epoch": 4.294364473072668,
      "grad_norm": 0.1463903784751892,
      "learning_rate": 1.4205702312157773e-06,
      "loss": 0.0067,
      "step": 2624080
    },
    {
      "epoch": 4.294397203511322,
      "grad_norm": 0.5624643564224243,
      "learning_rate": 1.4205043390022603e-06,
      "loss": 0.0108,
      "step": 2624100
    },
    {
      "epoch": 4.294429933949975,
      "grad_norm": 0.3878478407859802,
      "learning_rate": 1.420438446788743e-06,
      "loss": 0.0091,
      "step": 2624120
    },
    {
      "epoch": 4.294462664388628,
      "grad_norm": 0.22971664369106293,
      "learning_rate": 1.420372554575226e-06,
      "loss": 0.0112,
      "step": 2624140
    },
    {
      "epoch": 4.2944953948272815,
      "grad_norm": 0.11358608305454254,
      "learning_rate": 1.4203066623617087e-06,
      "loss": 0.0155,
      "step": 2624160
    },
    {
      "epoch": 4.294528125265935,
      "grad_norm": 0.2289983630180359,
      "learning_rate": 1.4202407701481919e-06,
      "loss": 0.0097,
      "step": 2624180
    },
    {
      "epoch": 4.294560855704588,
      "grad_norm": 0.2543078362941742,
      "learning_rate": 1.4201748779346746e-06,
      "loss": 0.0071,
      "step": 2624200
    },
    {
      "epoch": 4.294593586143241,
      "grad_norm": 0.1850663721561432,
      "learning_rate": 1.4201089857211576e-06,
      "loss": 0.0109,
      "step": 2624220
    },
    {
      "epoch": 4.294626316581895,
      "grad_norm": 0.5092349648475647,
      "learning_rate": 1.4200430935076403e-06,
      "loss": 0.0101,
      "step": 2624240
    },
    {
      "epoch": 4.294659047020549,
      "grad_norm": 0.3954554796218872,
      "learning_rate": 1.4199772012941232e-06,
      "loss": 0.0141,
      "step": 2624260
    },
    {
      "epoch": 4.294691777459201,
      "grad_norm": 0.13342206180095673,
      "learning_rate": 1.419911309080606e-06,
      "loss": 0.0101,
      "step": 2624280
    },
    {
      "epoch": 4.294724507897855,
      "grad_norm": 0.08470290899276733,
      "learning_rate": 1.419845416867089e-06,
      "loss": 0.006,
      "step": 2624300
    },
    {
      "epoch": 4.2947572383365085,
      "grad_norm": 0.1700187623500824,
      "learning_rate": 1.4197795246535717e-06,
      "loss": 0.0107,
      "step": 2624320
    },
    {
      "epoch": 4.294789968775161,
      "grad_norm": 0.13559836149215698,
      "learning_rate": 1.4197136324400546e-06,
      "loss": 0.0101,
      "step": 2624340
    },
    {
      "epoch": 4.294822699213815,
      "grad_norm": 0.11741587519645691,
      "learning_rate": 1.4196477402265376e-06,
      "loss": 0.0099,
      "step": 2624360
    },
    {
      "epoch": 4.294855429652468,
      "grad_norm": 0.5687644481658936,
      "learning_rate": 1.4195818480130205e-06,
      "loss": 0.0084,
      "step": 2624380
    },
    {
      "epoch": 4.294888160091122,
      "grad_norm": 0.09592776000499725,
      "learning_rate": 1.4195159557995033e-06,
      "loss": 0.0109,
      "step": 2624400
    },
    {
      "epoch": 4.294920890529775,
      "grad_norm": 0.12663765251636505,
      "learning_rate": 1.4194500635859862e-06,
      "loss": 0.0097,
      "step": 2624420
    },
    {
      "epoch": 4.294953620968428,
      "grad_norm": 0.16189076006412506,
      "learning_rate": 1.419384171372469e-06,
      "loss": 0.0086,
      "step": 2624440
    },
    {
      "epoch": 4.294986351407082,
      "grad_norm": 0.14402322471141815,
      "learning_rate": 1.419318279158952e-06,
      "loss": 0.0095,
      "step": 2624460
    },
    {
      "epoch": 4.2950190818457346,
      "grad_norm": 0.35543903708457947,
      "learning_rate": 1.4192523869454347e-06,
      "loss": 0.0098,
      "step": 2624480
    },
    {
      "epoch": 4.295051812284388,
      "grad_norm": 0.20457723736763,
      "learning_rate": 1.4191864947319176e-06,
      "loss": 0.0085,
      "step": 2624500
    },
    {
      "epoch": 4.295084542723042,
      "grad_norm": 0.1360054910182953,
      "learning_rate": 1.4191206025184003e-06,
      "loss": 0.0097,
      "step": 2624520
    },
    {
      "epoch": 4.295117273161695,
      "grad_norm": 0.15837310254573822,
      "learning_rate": 1.4190547103048835e-06,
      "loss": 0.0091,
      "step": 2624540
    },
    {
      "epoch": 4.295150003600348,
      "grad_norm": 0.5265401601791382,
      "learning_rate": 1.4189888180913662e-06,
      "loss": 0.0087,
      "step": 2624560
    },
    {
      "epoch": 4.295182734039002,
      "grad_norm": 0.14908114075660706,
      "learning_rate": 1.4189229258778492e-06,
      "loss": 0.0079,
      "step": 2624580
    },
    {
      "epoch": 4.295215464477655,
      "grad_norm": 0.1394001841545105,
      "learning_rate": 1.418857033664332e-06,
      "loss": 0.0058,
      "step": 2624600
    },
    {
      "epoch": 4.295248194916308,
      "grad_norm": 0.14741961658000946,
      "learning_rate": 1.4187911414508149e-06,
      "loss": 0.0056,
      "step": 2624620
    },
    {
      "epoch": 4.2952809253549615,
      "grad_norm": 0.14073781669139862,
      "learning_rate": 1.4187252492372976e-06,
      "loss": 0.0106,
      "step": 2624640
    },
    {
      "epoch": 4.295313655793615,
      "grad_norm": 0.27816545963287354,
      "learning_rate": 1.4186593570237806e-06,
      "loss": 0.0105,
      "step": 2624660
    },
    {
      "epoch": 4.295346386232268,
      "grad_norm": 0.13008977472782135,
      "learning_rate": 1.4185934648102633e-06,
      "loss": 0.0058,
      "step": 2624680
    },
    {
      "epoch": 4.295379116670921,
      "grad_norm": 0.2193828523159027,
      "learning_rate": 1.4185275725967463e-06,
      "loss": 0.0091,
      "step": 2624700
    },
    {
      "epoch": 4.295411847109575,
      "grad_norm": 0.07075494527816772,
      "learning_rate": 1.4184616803832294e-06,
      "loss": 0.0071,
      "step": 2624720
    },
    {
      "epoch": 4.295444577548229,
      "grad_norm": 0.07198450714349747,
      "learning_rate": 1.4183957881697122e-06,
      "loss": 0.0088,
      "step": 2624740
    },
    {
      "epoch": 4.295477307986881,
      "grad_norm": 0.06948522478342056,
      "learning_rate": 1.4183298959561951e-06,
      "loss": 0.0052,
      "step": 2624760
    },
    {
      "epoch": 4.295510038425535,
      "grad_norm": 0.21483242511749268,
      "learning_rate": 1.4182640037426779e-06,
      "loss": 0.0088,
      "step": 2624780
    },
    {
      "epoch": 4.2955427688641885,
      "grad_norm": 0.11792270839214325,
      "learning_rate": 1.4181981115291606e-06,
      "loss": 0.0103,
      "step": 2624800
    },
    {
      "epoch": 4.295575499302842,
      "grad_norm": 0.26154640316963196,
      "learning_rate": 1.4181322193156436e-06,
      "loss": 0.0076,
      "step": 2624820
    },
    {
      "epoch": 4.295608229741495,
      "grad_norm": 0.09008044004440308,
      "learning_rate": 1.4180663271021263e-06,
      "loss": 0.0076,
      "step": 2624840
    },
    {
      "epoch": 4.295640960180148,
      "grad_norm": 0.21446166932582855,
      "learning_rate": 1.4180004348886092e-06,
      "loss": 0.0107,
      "step": 2624860
    },
    {
      "epoch": 4.295673690618802,
      "grad_norm": 0.2535569369792938,
      "learning_rate": 1.417934542675092e-06,
      "loss": 0.0079,
      "step": 2624880
    },
    {
      "epoch": 4.295706421057455,
      "grad_norm": 0.26846808195114136,
      "learning_rate": 1.4178686504615751e-06,
      "loss": 0.0091,
      "step": 2624900
    },
    {
      "epoch": 4.295739151496108,
      "grad_norm": 0.11748959869146347,
      "learning_rate": 1.417802758248058e-06,
      "loss": 0.0091,
      "step": 2624920
    },
    {
      "epoch": 4.295771881934762,
      "grad_norm": 0.23525996506214142,
      "learning_rate": 1.4177368660345408e-06,
      "loss": 0.0084,
      "step": 2624940
    },
    {
      "epoch": 4.295804612373415,
      "grad_norm": 0.07770693302154541,
      "learning_rate": 1.4176709738210238e-06,
      "loss": 0.0103,
      "step": 2624960
    },
    {
      "epoch": 4.295837342812068,
      "grad_norm": 0.17708539962768555,
      "learning_rate": 1.4176050816075065e-06,
      "loss": 0.0075,
      "step": 2624980
    },
    {
      "epoch": 4.295870073250722,
      "grad_norm": 0.14456923305988312,
      "learning_rate": 1.4175391893939895e-06,
      "loss": 0.0073,
      "step": 2625000
    },
    {
      "epoch": 4.295902803689375,
      "grad_norm": 0.17586050927639008,
      "learning_rate": 1.4174732971804722e-06,
      "loss": 0.0083,
      "step": 2625020
    },
    {
      "epoch": 4.295935534128028,
      "grad_norm": 0.2621002793312073,
      "learning_rate": 1.417407404966955e-06,
      "loss": 0.0105,
      "step": 2625040
    },
    {
      "epoch": 4.295968264566682,
      "grad_norm": 0.25646287202835083,
      "learning_rate": 1.4173415127534381e-06,
      "loss": 0.0105,
      "step": 2625060
    },
    {
      "epoch": 4.296000995005335,
      "grad_norm": 0.471755713224411,
      "learning_rate": 1.417275620539921e-06,
      "loss": 0.0079,
      "step": 2625080
    },
    {
      "epoch": 4.296033725443989,
      "grad_norm": 0.7709492444992065,
      "learning_rate": 1.4172097283264038e-06,
      "loss": 0.0086,
      "step": 2625100
    },
    {
      "epoch": 4.296066455882642,
      "grad_norm": 0.4766939580440521,
      "learning_rate": 1.4171438361128868e-06,
      "loss": 0.0089,
      "step": 2625120
    },
    {
      "epoch": 4.296099186321295,
      "grad_norm": 0.34371045231819153,
      "learning_rate": 1.4170779438993695e-06,
      "loss": 0.0164,
      "step": 2625140
    },
    {
      "epoch": 4.296131916759949,
      "grad_norm": 0.12458100914955139,
      "learning_rate": 1.4170120516858525e-06,
      "loss": 0.0103,
      "step": 2625160
    },
    {
      "epoch": 4.2961646471986015,
      "grad_norm": 0.1584232598543167,
      "learning_rate": 1.4169461594723352e-06,
      "loss": 0.0084,
      "step": 2625180
    },
    {
      "epoch": 4.296197377637255,
      "grad_norm": 0.2686781883239746,
      "learning_rate": 1.4168802672588182e-06,
      "loss": 0.0083,
      "step": 2625200
    },
    {
      "epoch": 4.296230108075909,
      "grad_norm": 0.4204404652118683,
      "learning_rate": 1.4168143750453009e-06,
      "loss": 0.0126,
      "step": 2625220
    },
    {
      "epoch": 4.296262838514561,
      "grad_norm": 0.14127367734909058,
      "learning_rate": 1.416748482831784e-06,
      "loss": 0.0084,
      "step": 2625240
    },
    {
      "epoch": 4.296295568953215,
      "grad_norm": 0.10822773724794388,
      "learning_rate": 1.4166825906182668e-06,
      "loss": 0.0065,
      "step": 2625260
    },
    {
      "epoch": 4.2963282993918686,
      "grad_norm": 0.5832738876342773,
      "learning_rate": 1.4166166984047497e-06,
      "loss": 0.0121,
      "step": 2625280
    },
    {
      "epoch": 4.296361029830522,
      "grad_norm": 0.11799635738134384,
      "learning_rate": 1.4165508061912325e-06,
      "loss": 0.0117,
      "step": 2625300
    },
    {
      "epoch": 4.296393760269175,
      "grad_norm": 0.5843645930290222,
      "learning_rate": 1.4164849139777154e-06,
      "loss": 0.0123,
      "step": 2625320
    },
    {
      "epoch": 4.2964264907078284,
      "grad_norm": 0.46070289611816406,
      "learning_rate": 1.4164190217641982e-06,
      "loss": 0.0115,
      "step": 2625340
    },
    {
      "epoch": 4.296459221146482,
      "grad_norm": 0.42733487486839294,
      "learning_rate": 1.4163531295506811e-06,
      "loss": 0.0127,
      "step": 2625360
    },
    {
      "epoch": 4.296491951585135,
      "grad_norm": 0.10776252299547195,
      "learning_rate": 1.4162872373371639e-06,
      "loss": 0.0055,
      "step": 2625380
    },
    {
      "epoch": 4.296524682023788,
      "grad_norm": 0.14520494639873505,
      "learning_rate": 1.4162213451236468e-06,
      "loss": 0.007,
      "step": 2625400
    },
    {
      "epoch": 4.296557412462442,
      "grad_norm": 0.5319310426712036,
      "learning_rate": 1.4161554529101298e-06,
      "loss": 0.0073,
      "step": 2625420
    },
    {
      "epoch": 4.2965901429010955,
      "grad_norm": 0.6058721542358398,
      "learning_rate": 1.4160895606966127e-06,
      "loss": 0.0112,
      "step": 2625440
    },
    {
      "epoch": 4.296622873339748,
      "grad_norm": 0.15222875773906708,
      "learning_rate": 1.4160236684830955e-06,
      "loss": 0.0065,
      "step": 2625460
    },
    {
      "epoch": 4.296655603778402,
      "grad_norm": 0.2314796894788742,
      "learning_rate": 1.4159577762695784e-06,
      "loss": 0.0075,
      "step": 2625480
    },
    {
      "epoch": 4.296688334217055,
      "grad_norm": 0.11822456121444702,
      "learning_rate": 1.4158918840560612e-06,
      "loss": 0.0062,
      "step": 2625500
    },
    {
      "epoch": 4.296721064655708,
      "grad_norm": 0.4101858139038086,
      "learning_rate": 1.415825991842544e-06,
      "loss": 0.0077,
      "step": 2625520
    },
    {
      "epoch": 4.296753795094362,
      "grad_norm": 0.06771422177553177,
      "learning_rate": 1.4157600996290268e-06,
      "loss": 0.0136,
      "step": 2625540
    },
    {
      "epoch": 4.296786525533015,
      "grad_norm": 0.34742504358291626,
      "learning_rate": 1.4156942074155098e-06,
      "loss": 0.0102,
      "step": 2625560
    },
    {
      "epoch": 4.296819255971669,
      "grad_norm": 0.45640555024147034,
      "learning_rate": 1.4156283152019925e-06,
      "loss": 0.0104,
      "step": 2625580
    },
    {
      "epoch": 4.296851986410322,
      "grad_norm": 0.20836500823497772,
      "learning_rate": 1.4155624229884757e-06,
      "loss": 0.0088,
      "step": 2625600
    },
    {
      "epoch": 4.296884716848975,
      "grad_norm": 0.10922230780124664,
      "learning_rate": 1.4154965307749584e-06,
      "loss": 0.011,
      "step": 2625620
    },
    {
      "epoch": 4.296917447287629,
      "grad_norm": 0.14449933171272278,
      "learning_rate": 1.4154306385614414e-06,
      "loss": 0.0123,
      "step": 2625640
    },
    {
      "epoch": 4.2969501777262815,
      "grad_norm": 0.21615174412727356,
      "learning_rate": 1.4153647463479241e-06,
      "loss": 0.0097,
      "step": 2625660
    },
    {
      "epoch": 4.296982908164935,
      "grad_norm": 0.2088339775800705,
      "learning_rate": 1.415298854134407e-06,
      "loss": 0.0092,
      "step": 2625680
    },
    {
      "epoch": 4.297015638603589,
      "grad_norm": 0.47345930337905884,
      "learning_rate": 1.4152329619208898e-06,
      "loss": 0.0117,
      "step": 2625700
    },
    {
      "epoch": 4.297048369042242,
      "grad_norm": 0.14684733748435974,
      "learning_rate": 1.4151670697073728e-06,
      "loss": 0.0101,
      "step": 2625720
    },
    {
      "epoch": 4.297081099480895,
      "grad_norm": 0.3125949800014496,
      "learning_rate": 1.4151011774938555e-06,
      "loss": 0.0124,
      "step": 2625740
    },
    {
      "epoch": 4.297113829919549,
      "grad_norm": 0.4450751841068268,
      "learning_rate": 1.4150352852803385e-06,
      "loss": 0.0109,
      "step": 2625760
    },
    {
      "epoch": 4.297146560358202,
      "grad_norm": 0.2776382565498352,
      "learning_rate": 1.4149693930668214e-06,
      "loss": 0.0102,
      "step": 2625780
    },
    {
      "epoch": 4.297179290796855,
      "grad_norm": 0.8420187830924988,
      "learning_rate": 1.4149035008533044e-06,
      "loss": 0.0128,
      "step": 2625800
    },
    {
      "epoch": 4.2972120212355085,
      "grad_norm": 0.2939705550670624,
      "learning_rate": 1.4148376086397871e-06,
      "loss": 0.012,
      "step": 2625820
    },
    {
      "epoch": 4.297244751674162,
      "grad_norm": 0.3137911260128021,
      "learning_rate": 1.41477171642627e-06,
      "loss": 0.0088,
      "step": 2625840
    },
    {
      "epoch": 4.297277482112816,
      "grad_norm": 0.20501463115215302,
      "learning_rate": 1.4147058242127528e-06,
      "loss": 0.0052,
      "step": 2625860
    },
    {
      "epoch": 4.297310212551468,
      "grad_norm": 0.26698049902915955,
      "learning_rate": 1.4146399319992357e-06,
      "loss": 0.0109,
      "step": 2625880
    },
    {
      "epoch": 4.297342942990122,
      "grad_norm": 0.19781385362148285,
      "learning_rate": 1.4145740397857185e-06,
      "loss": 0.0102,
      "step": 2625900
    },
    {
      "epoch": 4.297375673428776,
      "grad_norm": 0.14043612778186798,
      "learning_rate": 1.4145081475722014e-06,
      "loss": 0.0078,
      "step": 2625920
    },
    {
      "epoch": 4.297408403867428,
      "grad_norm": 0.1827927678823471,
      "learning_rate": 1.4144422553586846e-06,
      "loss": 0.009,
      "step": 2625940
    },
    {
      "epoch": 4.297441134306082,
      "grad_norm": 0.32226377725601196,
      "learning_rate": 1.4143763631451673e-06,
      "loss": 0.0124,
      "step": 2625960
    },
    {
      "epoch": 4.2974738647447355,
      "grad_norm": 0.18149495124816895,
      "learning_rate": 1.4143104709316503e-06,
      "loss": 0.009,
      "step": 2625980
    },
    {
      "epoch": 4.297506595183389,
      "grad_norm": 0.2914436161518097,
      "learning_rate": 1.414244578718133e-06,
      "loss": 0.0081,
      "step": 2626000
    },
    {
      "epoch": 4.297539325622042,
      "grad_norm": 0.1891370713710785,
      "learning_rate": 1.4141786865046158e-06,
      "loss": 0.0096,
      "step": 2626020
    },
    {
      "epoch": 4.297572056060695,
      "grad_norm": 1.027124047279358,
      "learning_rate": 1.4141127942910987e-06,
      "loss": 0.009,
      "step": 2626040
    },
    {
      "epoch": 4.297604786499349,
      "grad_norm": 0.17940658330917358,
      "learning_rate": 1.4140469020775815e-06,
      "loss": 0.0086,
      "step": 2626060
    },
    {
      "epoch": 4.297637516938002,
      "grad_norm": 0.24524541199207306,
      "learning_rate": 1.4139810098640644e-06,
      "loss": 0.0079,
      "step": 2626080
    },
    {
      "epoch": 4.297670247376655,
      "grad_norm": 0.05026579648256302,
      "learning_rate": 1.4139151176505472e-06,
      "loss": 0.0086,
      "step": 2626100
    },
    {
      "epoch": 4.297702977815309,
      "grad_norm": 0.3041371703147888,
      "learning_rate": 1.4138492254370303e-06,
      "loss": 0.0086,
      "step": 2626120
    },
    {
      "epoch": 4.297735708253962,
      "grad_norm": 0.0793888047337532,
      "learning_rate": 1.4137833332235133e-06,
      "loss": 0.008,
      "step": 2626140
    },
    {
      "epoch": 4.297768438692615,
      "grad_norm": 0.4191626012325287,
      "learning_rate": 1.413717441009996e-06,
      "loss": 0.0131,
      "step": 2626160
    },
    {
      "epoch": 4.297801169131269,
      "grad_norm": 0.2773641049861908,
      "learning_rate": 1.413651548796479e-06,
      "loss": 0.0077,
      "step": 2626180
    },
    {
      "epoch": 4.297833899569922,
      "grad_norm": 0.1952514797449112,
      "learning_rate": 1.4135856565829617e-06,
      "loss": 0.0133,
      "step": 2626200
    },
    {
      "epoch": 4.297866630008575,
      "grad_norm": 0.4026212692260742,
      "learning_rate": 1.4135197643694444e-06,
      "loss": 0.0072,
      "step": 2626220
    },
    {
      "epoch": 4.297899360447229,
      "grad_norm": 0.502405047416687,
      "learning_rate": 1.4134538721559274e-06,
      "loss": 0.0082,
      "step": 2626240
    },
    {
      "epoch": 4.297932090885882,
      "grad_norm": 0.1799265593290329,
      "learning_rate": 1.4133879799424101e-06,
      "loss": 0.0073,
      "step": 2626260
    },
    {
      "epoch": 4.297964821324536,
      "grad_norm": 0.32713082432746887,
      "learning_rate": 1.413322087728893e-06,
      "loss": 0.0072,
      "step": 2626280
    },
    {
      "epoch": 4.2979975517631885,
      "grad_norm": 0.31744906306266785,
      "learning_rate": 1.4132561955153762e-06,
      "loss": 0.0113,
      "step": 2626300
    },
    {
      "epoch": 4.298030282201842,
      "grad_norm": 0.1225207969546318,
      "learning_rate": 1.413190303301859e-06,
      "loss": 0.0118,
      "step": 2626320
    },
    {
      "epoch": 4.298063012640496,
      "grad_norm": 0.15977789461612701,
      "learning_rate": 1.413124411088342e-06,
      "loss": 0.0065,
      "step": 2626340
    },
    {
      "epoch": 4.298095743079148,
      "grad_norm": 0.36038216948509216,
      "learning_rate": 1.4130585188748247e-06,
      "loss": 0.0114,
      "step": 2626360
    },
    {
      "epoch": 4.298128473517802,
      "grad_norm": 0.2686925232410431,
      "learning_rate": 1.4129926266613076e-06,
      "loss": 0.0108,
      "step": 2626380
    },
    {
      "epoch": 4.298161203956456,
      "grad_norm": 0.4873201847076416,
      "learning_rate": 1.4129267344477904e-06,
      "loss": 0.007,
      "step": 2626400
    },
    {
      "epoch": 4.298193934395108,
      "grad_norm": 0.04295917600393295,
      "learning_rate": 1.4128608422342733e-06,
      "loss": 0.011,
      "step": 2626420
    },
    {
      "epoch": 4.298226664833762,
      "grad_norm": 0.4057770371437073,
      "learning_rate": 1.412794950020756e-06,
      "loss": 0.0107,
      "step": 2626440
    },
    {
      "epoch": 4.2982593952724155,
      "grad_norm": 0.12897492945194244,
      "learning_rate": 1.4127290578072388e-06,
      "loss": 0.0063,
      "step": 2626460
    },
    {
      "epoch": 4.298292125711069,
      "grad_norm": 0.21301434934139252,
      "learning_rate": 1.412663165593722e-06,
      "loss": 0.0115,
      "step": 2626480
    },
    {
      "epoch": 4.298324856149722,
      "grad_norm": 0.3478308618068695,
      "learning_rate": 1.412597273380205e-06,
      "loss": 0.0086,
      "step": 2626500
    },
    {
      "epoch": 4.298357586588375,
      "grad_norm": 0.2756693363189697,
      "learning_rate": 1.4125313811666877e-06,
      "loss": 0.0089,
      "step": 2626520
    },
    {
      "epoch": 4.298390317027029,
      "grad_norm": 0.04994606599211693,
      "learning_rate": 1.4124654889531706e-06,
      "loss": 0.0079,
      "step": 2626540
    },
    {
      "epoch": 4.298423047465683,
      "grad_norm": 0.1973164677619934,
      "learning_rate": 1.4123995967396533e-06,
      "loss": 0.0055,
      "step": 2626560
    },
    {
      "epoch": 4.298455777904335,
      "grad_norm": 0.18390655517578125,
      "learning_rate": 1.4123337045261363e-06,
      "loss": 0.0139,
      "step": 2626580
    },
    {
      "epoch": 4.298488508342989,
      "grad_norm": 0.41301417350769043,
      "learning_rate": 1.412267812312619e-06,
      "loss": 0.0077,
      "step": 2626600
    },
    {
      "epoch": 4.2985212387816425,
      "grad_norm": 0.7545034885406494,
      "learning_rate": 1.412201920099102e-06,
      "loss": 0.0079,
      "step": 2626620
    },
    {
      "epoch": 4.298553969220295,
      "grad_norm": 0.14933264255523682,
      "learning_rate": 1.412136027885585e-06,
      "loss": 0.0054,
      "step": 2626640
    },
    {
      "epoch": 4.298586699658949,
      "grad_norm": 0.6440715193748474,
      "learning_rate": 1.4120701356720679e-06,
      "loss": 0.0108,
      "step": 2626660
    },
    {
      "epoch": 4.298619430097602,
      "grad_norm": 0.16613292694091797,
      "learning_rate": 1.4120042434585506e-06,
      "loss": 0.0082,
      "step": 2626680
    },
    {
      "epoch": 4.298652160536255,
      "grad_norm": 0.4389766454696655,
      "learning_rate": 1.4119383512450336e-06,
      "loss": 0.0085,
      "step": 2626700
    },
    {
      "epoch": 4.298684890974909,
      "grad_norm": 0.15001621842384338,
      "learning_rate": 1.4118724590315163e-06,
      "loss": 0.0167,
      "step": 2626720
    },
    {
      "epoch": 4.298717621413562,
      "grad_norm": 0.4277879595756531,
      "learning_rate": 1.4118065668179993e-06,
      "loss": 0.0134,
      "step": 2626740
    },
    {
      "epoch": 4.298750351852216,
      "grad_norm": 0.29873690009117126,
      "learning_rate": 1.411740674604482e-06,
      "loss": 0.0106,
      "step": 2626760
    },
    {
      "epoch": 4.298783082290869,
      "grad_norm": 0.09070111066102982,
      "learning_rate": 1.411674782390965e-06,
      "loss": 0.0118,
      "step": 2626780
    },
    {
      "epoch": 4.298815812729522,
      "grad_norm": 0.5883406400680542,
      "learning_rate": 1.4116088901774477e-06,
      "loss": 0.0096,
      "step": 2626800
    },
    {
      "epoch": 4.298848543168176,
      "grad_norm": 0.2597724497318268,
      "learning_rate": 1.4115429979639309e-06,
      "loss": 0.0151,
      "step": 2626820
    },
    {
      "epoch": 4.2988812736068285,
      "grad_norm": 0.19929884374141693,
      "learning_rate": 1.4114771057504136e-06,
      "loss": 0.0074,
      "step": 2626840
    },
    {
      "epoch": 4.298914004045482,
      "grad_norm": 0.2832704782485962,
      "learning_rate": 1.4114112135368966e-06,
      "loss": 0.0087,
      "step": 2626860
    },
    {
      "epoch": 4.298946734484136,
      "grad_norm": 0.25055086612701416,
      "learning_rate": 1.4113453213233793e-06,
      "loss": 0.0122,
      "step": 2626880
    },
    {
      "epoch": 4.298979464922789,
      "grad_norm": 0.20677877962589264,
      "learning_rate": 1.4112794291098623e-06,
      "loss": 0.0075,
      "step": 2626900
    },
    {
      "epoch": 4.299012195361442,
      "grad_norm": 0.1587819755077362,
      "learning_rate": 1.411213536896345e-06,
      "loss": 0.008,
      "step": 2626920
    },
    {
      "epoch": 4.299044925800096,
      "grad_norm": 0.25433704257011414,
      "learning_rate": 1.411147644682828e-06,
      "loss": 0.0076,
      "step": 2626940
    },
    {
      "epoch": 4.299077656238749,
      "grad_norm": 0.22484302520751953,
      "learning_rate": 1.4110817524693107e-06,
      "loss": 0.0079,
      "step": 2626960
    },
    {
      "epoch": 4.299110386677402,
      "grad_norm": 0.24331454932689667,
      "learning_rate": 1.4110158602557936e-06,
      "loss": 0.0057,
      "step": 2626980
    },
    {
      "epoch": 4.2991431171160555,
      "grad_norm": 0.08367276936769485,
      "learning_rate": 1.4109499680422766e-06,
      "loss": 0.008,
      "step": 2627000
    },
    {
      "epoch": 4.299175847554709,
      "grad_norm": 1.0450750589370728,
      "learning_rate": 1.4108840758287595e-06,
      "loss": 0.01,
      "step": 2627020
    },
    {
      "epoch": 4.299208577993363,
      "grad_norm": 0.15308904647827148,
      "learning_rate": 1.4108181836152423e-06,
      "loss": 0.0077,
      "step": 2627040
    },
    {
      "epoch": 4.299241308432015,
      "grad_norm": 0.5849051475524902,
      "learning_rate": 1.4107522914017252e-06,
      "loss": 0.0131,
      "step": 2627060
    },
    {
      "epoch": 4.299274038870669,
      "grad_norm": 0.18257364630699158,
      "learning_rate": 1.410686399188208e-06,
      "loss": 0.006,
      "step": 2627080
    },
    {
      "epoch": 4.2993067693093225,
      "grad_norm": 0.17232240736484528,
      "learning_rate": 1.410620506974691e-06,
      "loss": 0.0115,
      "step": 2627100
    },
    {
      "epoch": 4.299339499747975,
      "grad_norm": 0.3461354970932007,
      "learning_rate": 1.4105546147611737e-06,
      "loss": 0.0096,
      "step": 2627120
    },
    {
      "epoch": 4.299372230186629,
      "grad_norm": 0.28409767150878906,
      "learning_rate": 1.4104887225476566e-06,
      "loss": 0.0074,
      "step": 2627140
    },
    {
      "epoch": 4.299404960625282,
      "grad_norm": 0.5767233967781067,
      "learning_rate": 1.4104228303341394e-06,
      "loss": 0.0077,
      "step": 2627160
    },
    {
      "epoch": 4.299437691063936,
      "grad_norm": 0.24246962368488312,
      "learning_rate": 1.4103569381206225e-06,
      "loss": 0.0091,
      "step": 2627180
    },
    {
      "epoch": 4.299470421502589,
      "grad_norm": 0.06742766499519348,
      "learning_rate": 1.4102910459071053e-06,
      "loss": 0.0083,
      "step": 2627200
    },
    {
      "epoch": 4.299503151941242,
      "grad_norm": 0.44428953528404236,
      "learning_rate": 1.4102251536935882e-06,
      "loss": 0.0117,
      "step": 2627220
    },
    {
      "epoch": 4.299535882379896,
      "grad_norm": 0.4220871329307556,
      "learning_rate": 1.410159261480071e-06,
      "loss": 0.0063,
      "step": 2627240
    },
    {
      "epoch": 4.299568612818549,
      "grad_norm": 0.4367847442626953,
      "learning_rate": 1.410093369266554e-06,
      "loss": 0.0129,
      "step": 2627260
    },
    {
      "epoch": 4.299601343257202,
      "grad_norm": 0.15034440159797668,
      "learning_rate": 1.4100274770530366e-06,
      "loss": 0.0068,
      "step": 2627280
    },
    {
      "epoch": 4.299634073695856,
      "grad_norm": 0.36256468296051025,
      "learning_rate": 1.4099615848395196e-06,
      "loss": 0.0077,
      "step": 2627300
    },
    {
      "epoch": 4.299666804134509,
      "grad_norm": 0.23386527597904205,
      "learning_rate": 1.4098956926260023e-06,
      "loss": 0.0091,
      "step": 2627320
    },
    {
      "epoch": 4.299699534573162,
      "grad_norm": 0.2066255658864975,
      "learning_rate": 1.4098298004124853e-06,
      "loss": 0.007,
      "step": 2627340
    },
    {
      "epoch": 4.299732265011816,
      "grad_norm": 0.18709935247898102,
      "learning_rate": 1.4097639081989684e-06,
      "loss": 0.0087,
      "step": 2627360
    },
    {
      "epoch": 4.299764995450469,
      "grad_norm": 0.4537818729877472,
      "learning_rate": 1.4096980159854512e-06,
      "loss": 0.0119,
      "step": 2627380
    },
    {
      "epoch": 4.299797725889122,
      "grad_norm": 0.06558765470981598,
      "learning_rate": 1.4096321237719341e-06,
      "loss": 0.0076,
      "step": 2627400
    },
    {
      "epoch": 4.299830456327776,
      "grad_norm": 0.4920240640640259,
      "learning_rate": 1.4095662315584169e-06,
      "loss": 0.0118,
      "step": 2627420
    },
    {
      "epoch": 4.299863186766429,
      "grad_norm": 0.3632703721523285,
      "learning_rate": 1.4095003393448996e-06,
      "loss": 0.0094,
      "step": 2627440
    },
    {
      "epoch": 4.299895917205083,
      "grad_norm": 0.3389129936695099,
      "learning_rate": 1.4094344471313826e-06,
      "loss": 0.0087,
      "step": 2627460
    },
    {
      "epoch": 4.2999286476437355,
      "grad_norm": 0.08479679375886917,
      "learning_rate": 1.4093685549178653e-06,
      "loss": 0.01,
      "step": 2627480
    },
    {
      "epoch": 4.299961378082389,
      "grad_norm": 0.22256936132907867,
      "learning_rate": 1.4093026627043483e-06,
      "loss": 0.0117,
      "step": 2627500
    },
    {
      "epoch": 4.299994108521043,
      "grad_norm": 0.11236820369958878,
      "learning_rate": 1.4092367704908314e-06,
      "loss": 0.0144,
      "step": 2627520
    },
    {
      "epoch": 4.300026838959695,
      "grad_norm": 0.24489793181419373,
      "learning_rate": 1.4091708782773142e-06,
      "loss": 0.0108,
      "step": 2627540
    },
    {
      "epoch": 4.300059569398349,
      "grad_norm": 0.23392610251903534,
      "learning_rate": 1.4091049860637971e-06,
      "loss": 0.0093,
      "step": 2627560
    },
    {
      "epoch": 4.300092299837003,
      "grad_norm": 0.21736659109592438,
      "learning_rate": 1.4090390938502799e-06,
      "loss": 0.0048,
      "step": 2627580
    },
    {
      "epoch": 4.300125030275655,
      "grad_norm": 0.43200045824050903,
      "learning_rate": 1.4089732016367628e-06,
      "loss": 0.0091,
      "step": 2627600
    },
    {
      "epoch": 4.300157760714309,
      "grad_norm": 0.23489448428153992,
      "learning_rate": 1.4089073094232455e-06,
      "loss": 0.0079,
      "step": 2627620
    },
    {
      "epoch": 4.3001904911529625,
      "grad_norm": 0.6492862701416016,
      "learning_rate": 1.4088414172097285e-06,
      "loss": 0.0101,
      "step": 2627640
    },
    {
      "epoch": 4.300223221591616,
      "grad_norm": 0.36005064845085144,
      "learning_rate": 1.4087755249962112e-06,
      "loss": 0.011,
      "step": 2627660
    },
    {
      "epoch": 4.300255952030269,
      "grad_norm": 0.2026229053735733,
      "learning_rate": 1.408709632782694e-06,
      "loss": 0.0095,
      "step": 2627680
    },
    {
      "epoch": 4.300288682468922,
      "grad_norm": 0.18748442828655243,
      "learning_rate": 1.4086437405691771e-06,
      "loss": 0.01,
      "step": 2627700
    },
    {
      "epoch": 4.300321412907576,
      "grad_norm": 0.11222429573535919,
      "learning_rate": 1.40857784835566e-06,
      "loss": 0.0044,
      "step": 2627720
    },
    {
      "epoch": 4.30035414334623,
      "grad_norm": 0.11071042716503143,
      "learning_rate": 1.4085119561421428e-06,
      "loss": 0.0065,
      "step": 2627740
    },
    {
      "epoch": 4.300386873784882,
      "grad_norm": 0.11297962814569473,
      "learning_rate": 1.4084460639286258e-06,
      "loss": 0.0118,
      "step": 2627760
    },
    {
      "epoch": 4.300419604223536,
      "grad_norm": 0.11458001285791397,
      "learning_rate": 1.4083801717151085e-06,
      "loss": 0.005,
      "step": 2627780
    },
    {
      "epoch": 4.3004523346621895,
      "grad_norm": 0.08741842210292816,
      "learning_rate": 1.4083142795015915e-06,
      "loss": 0.0117,
      "step": 2627800
    },
    {
      "epoch": 4.300485065100842,
      "grad_norm": 0.34619808197021484,
      "learning_rate": 1.4082483872880742e-06,
      "loss": 0.0063,
      "step": 2627820
    },
    {
      "epoch": 4.300517795539496,
      "grad_norm": 0.11552777886390686,
      "learning_rate": 1.4081824950745572e-06,
      "loss": 0.0082,
      "step": 2627840
    },
    {
      "epoch": 4.300550525978149,
      "grad_norm": 0.1655571162700653,
      "learning_rate": 1.40811660286104e-06,
      "loss": 0.0068,
      "step": 2627860
    },
    {
      "epoch": 4.300583256416802,
      "grad_norm": 0.19759276509284973,
      "learning_rate": 1.408050710647523e-06,
      "loss": 0.0079,
      "step": 2627880
    },
    {
      "epoch": 4.300615986855456,
      "grad_norm": 0.5248704552650452,
      "learning_rate": 1.4079848184340058e-06,
      "loss": 0.0084,
      "step": 2627900
    },
    {
      "epoch": 4.300648717294109,
      "grad_norm": 0.13062533736228943,
      "learning_rate": 1.4079189262204888e-06,
      "loss": 0.0112,
      "step": 2627920
    },
    {
      "epoch": 4.300681447732763,
      "grad_norm": 0.28801384568214417,
      "learning_rate": 1.4078530340069715e-06,
      "loss": 0.0101,
      "step": 2627940
    },
    {
      "epoch": 4.3007141781714155,
      "grad_norm": 0.06636903434991837,
      "learning_rate": 1.4077871417934544e-06,
      "loss": 0.0064,
      "step": 2627960
    },
    {
      "epoch": 4.300746908610069,
      "grad_norm": 0.554664134979248,
      "learning_rate": 1.4077212495799372e-06,
      "loss": 0.0084,
      "step": 2627980
    },
    {
      "epoch": 4.300779639048723,
      "grad_norm": 0.3794456124305725,
      "learning_rate": 1.4076553573664201e-06,
      "loss": 0.0087,
      "step": 2628000
    },
    {
      "epoch": 4.300812369487376,
      "grad_norm": 0.4982714354991913,
      "learning_rate": 1.4075894651529029e-06,
      "loss": 0.0071,
      "step": 2628020
    },
    {
      "epoch": 4.300845099926029,
      "grad_norm": 0.20782460272312164,
      "learning_rate": 1.4075235729393858e-06,
      "loss": 0.0124,
      "step": 2628040
    },
    {
      "epoch": 4.300877830364683,
      "grad_norm": 0.05707379803061485,
      "learning_rate": 1.4074576807258688e-06,
      "loss": 0.0082,
      "step": 2628060
    },
    {
      "epoch": 4.300910560803336,
      "grad_norm": 0.11553484201431274,
      "learning_rate": 1.4073917885123517e-06,
      "loss": 0.0099,
      "step": 2628080
    },
    {
      "epoch": 4.300943291241989,
      "grad_norm": 0.2633667588233948,
      "learning_rate": 1.4073258962988345e-06,
      "loss": 0.0058,
      "step": 2628100
    },
    {
      "epoch": 4.3009760216806425,
      "grad_norm": 0.30590128898620605,
      "learning_rate": 1.4072600040853174e-06,
      "loss": 0.0076,
      "step": 2628120
    },
    {
      "epoch": 4.301008752119296,
      "grad_norm": 0.16739459335803986,
      "learning_rate": 1.4071941118718002e-06,
      "loss": 0.0088,
      "step": 2628140
    },
    {
      "epoch": 4.301041482557949,
      "grad_norm": 0.2685529887676239,
      "learning_rate": 1.4071282196582831e-06,
      "loss": 0.0083,
      "step": 2628160
    },
    {
      "epoch": 4.301074212996602,
      "grad_norm": 0.19153867661952972,
      "learning_rate": 1.4070623274447659e-06,
      "loss": 0.0123,
      "step": 2628180
    },
    {
      "epoch": 4.301106943435256,
      "grad_norm": 0.3359125852584839,
      "learning_rate": 1.4069964352312488e-06,
      "loss": 0.0115,
      "step": 2628200
    },
    {
      "epoch": 4.30113967387391,
      "grad_norm": 0.20922048389911652,
      "learning_rate": 1.4069305430177315e-06,
      "loss": 0.0095,
      "step": 2628220
    },
    {
      "epoch": 4.301172404312562,
      "grad_norm": 0.3121957778930664,
      "learning_rate": 1.4068646508042147e-06,
      "loss": 0.009,
      "step": 2628240
    },
    {
      "epoch": 4.301205134751216,
      "grad_norm": 0.4892089366912842,
      "learning_rate": 1.4067987585906974e-06,
      "loss": 0.0095,
      "step": 2628260
    },
    {
      "epoch": 4.3012378651898695,
      "grad_norm": 0.34568655490875244,
      "learning_rate": 1.4067328663771804e-06,
      "loss": 0.0072,
      "step": 2628280
    },
    {
      "epoch": 4.301270595628522,
      "grad_norm": 0.3502736985683441,
      "learning_rate": 1.4066669741636631e-06,
      "loss": 0.0082,
      "step": 2628300
    },
    {
      "epoch": 4.301303326067176,
      "grad_norm": 0.13821113109588623,
      "learning_rate": 1.406601081950146e-06,
      "loss": 0.0069,
      "step": 2628320
    },
    {
      "epoch": 4.301336056505829,
      "grad_norm": 0.3459879159927368,
      "learning_rate": 1.4065351897366288e-06,
      "loss": 0.0079,
      "step": 2628340
    },
    {
      "epoch": 4.301368786944483,
      "grad_norm": 0.40946996212005615,
      "learning_rate": 1.4064692975231118e-06,
      "loss": 0.006,
      "step": 2628360
    },
    {
      "epoch": 4.301401517383136,
      "grad_norm": 0.33901992440223694,
      "learning_rate": 1.4064034053095945e-06,
      "loss": 0.012,
      "step": 2628380
    },
    {
      "epoch": 4.301434247821789,
      "grad_norm": 0.24179305136203766,
      "learning_rate": 1.4063375130960777e-06,
      "loss": 0.0102,
      "step": 2628400
    },
    {
      "epoch": 4.301466978260443,
      "grad_norm": 0.3222414553165436,
      "learning_rate": 1.4062716208825604e-06,
      "loss": 0.013,
      "step": 2628420
    },
    {
      "epoch": 4.301499708699096,
      "grad_norm": 0.13046486675739288,
      "learning_rate": 1.4062057286690434e-06,
      "loss": 0.0098,
      "step": 2628440
    },
    {
      "epoch": 4.301532439137749,
      "grad_norm": 0.29529789090156555,
      "learning_rate": 1.4061398364555261e-06,
      "loss": 0.0126,
      "step": 2628460
    },
    {
      "epoch": 4.301565169576403,
      "grad_norm": 0.3746805191040039,
      "learning_rate": 1.406073944242009e-06,
      "loss": 0.0089,
      "step": 2628480
    },
    {
      "epoch": 4.301597900015056,
      "grad_norm": 0.16189786791801453,
      "learning_rate": 1.4060080520284918e-06,
      "loss": 0.011,
      "step": 2628500
    },
    {
      "epoch": 4.301630630453709,
      "grad_norm": 0.17357753217220306,
      "learning_rate": 1.4059421598149748e-06,
      "loss": 0.0079,
      "step": 2628520
    },
    {
      "epoch": 4.301663360892363,
      "grad_norm": 0.0952010378241539,
      "learning_rate": 1.4058762676014575e-06,
      "loss": 0.0094,
      "step": 2628540
    },
    {
      "epoch": 4.301696091331016,
      "grad_norm": 0.23802195489406586,
      "learning_rate": 1.4058103753879405e-06,
      "loss": 0.0106,
      "step": 2628560
    },
    {
      "epoch": 4.301728821769669,
      "grad_norm": 0.1727951169013977,
      "learning_rate": 1.4057444831744236e-06,
      "loss": 0.0078,
      "step": 2628580
    },
    {
      "epoch": 4.301761552208323,
      "grad_norm": 0.16724921762943268,
      "learning_rate": 1.4056785909609064e-06,
      "loss": 0.0131,
      "step": 2628600
    },
    {
      "epoch": 4.301794282646976,
      "grad_norm": 0.22895969450473785,
      "learning_rate": 1.4056126987473893e-06,
      "loss": 0.0061,
      "step": 2628620
    },
    {
      "epoch": 4.30182701308563,
      "grad_norm": 0.28581926226615906,
      "learning_rate": 1.405546806533872e-06,
      "loss": 0.014,
      "step": 2628640
    },
    {
      "epoch": 4.3018597435242825,
      "grad_norm": 0.10283727198839188,
      "learning_rate": 1.4054809143203548e-06,
      "loss": 0.0088,
      "step": 2628660
    },
    {
      "epoch": 4.301892473962936,
      "grad_norm": 0.557300329208374,
      "learning_rate": 1.4054150221068377e-06,
      "loss": 0.0076,
      "step": 2628680
    },
    {
      "epoch": 4.30192520440159,
      "grad_norm": 0.3292423486709595,
      "learning_rate": 1.4053491298933205e-06,
      "loss": 0.009,
      "step": 2628700
    },
    {
      "epoch": 4.301957934840242,
      "grad_norm": 0.0657791793346405,
      "learning_rate": 1.4052832376798034e-06,
      "loss": 0.0089,
      "step": 2628720
    },
    {
      "epoch": 4.301990665278896,
      "grad_norm": 0.31093457341194153,
      "learning_rate": 1.4052173454662862e-06,
      "loss": 0.0098,
      "step": 2628740
    },
    {
      "epoch": 4.3020233957175495,
      "grad_norm": 0.41140255331993103,
      "learning_rate": 1.4051514532527693e-06,
      "loss": 0.0121,
      "step": 2628760
    },
    {
      "epoch": 4.302056126156203,
      "grad_norm": 0.8064634203910828,
      "learning_rate": 1.4050855610392523e-06,
      "loss": 0.0096,
      "step": 2628780
    },
    {
      "epoch": 4.302088856594856,
      "grad_norm": 0.13447530567646027,
      "learning_rate": 1.405019668825735e-06,
      "loss": 0.01,
      "step": 2628800
    },
    {
      "epoch": 4.302121587033509,
      "grad_norm": 0.20790864527225494,
      "learning_rate": 1.404953776612218e-06,
      "loss": 0.0066,
      "step": 2628820
    },
    {
      "epoch": 4.302154317472163,
      "grad_norm": 0.06984365731477737,
      "learning_rate": 1.4048878843987007e-06,
      "loss": 0.0077,
      "step": 2628840
    },
    {
      "epoch": 4.302187047910816,
      "grad_norm": 0.3143981099128723,
      "learning_rate": 1.4048219921851835e-06,
      "loss": 0.0099,
      "step": 2628860
    },
    {
      "epoch": 4.302219778349469,
      "grad_norm": 0.25816047191619873,
      "learning_rate": 1.4047560999716664e-06,
      "loss": 0.0089,
      "step": 2628880
    },
    {
      "epoch": 4.302252508788123,
      "grad_norm": 0.5529329776763916,
      "learning_rate": 1.4046902077581491e-06,
      "loss": 0.0156,
      "step": 2628900
    },
    {
      "epoch": 4.3022852392267765,
      "grad_norm": 0.17031696438789368,
      "learning_rate": 1.404624315544632e-06,
      "loss": 0.0091,
      "step": 2628920
    },
    {
      "epoch": 4.302317969665429,
      "grad_norm": 0.2976253628730774,
      "learning_rate": 1.4045584233311153e-06,
      "loss": 0.009,
      "step": 2628940
    },
    {
      "epoch": 4.302350700104083,
      "grad_norm": 0.1860472559928894,
      "learning_rate": 1.404492531117598e-06,
      "loss": 0.0058,
      "step": 2628960
    },
    {
      "epoch": 4.302383430542736,
      "grad_norm": 0.28441905975341797,
      "learning_rate": 1.404426638904081e-06,
      "loss": 0.0113,
      "step": 2628980
    },
    {
      "epoch": 4.302416160981389,
      "grad_norm": 0.1208849847316742,
      "learning_rate": 1.4043607466905637e-06,
      "loss": 0.0074,
      "step": 2629000
    },
    {
      "epoch": 4.302448891420043,
      "grad_norm": 0.09021767973899841,
      "learning_rate": 1.4042948544770466e-06,
      "loss": 0.0075,
      "step": 2629020
    },
    {
      "epoch": 4.302481621858696,
      "grad_norm": 0.30996397137641907,
      "learning_rate": 1.4042289622635294e-06,
      "loss": 0.0125,
      "step": 2629040
    },
    {
      "epoch": 4.302514352297349,
      "grad_norm": 0.291932612657547,
      "learning_rate": 1.4041630700500123e-06,
      "loss": 0.0076,
      "step": 2629060
    },
    {
      "epoch": 4.302547082736003,
      "grad_norm": 0.3926922678947449,
      "learning_rate": 1.404097177836495e-06,
      "loss": 0.0133,
      "step": 2629080
    },
    {
      "epoch": 4.302579813174656,
      "grad_norm": 0.6157454252243042,
      "learning_rate": 1.4040312856229778e-06,
      "loss": 0.0102,
      "step": 2629100
    },
    {
      "epoch": 4.30261254361331,
      "grad_norm": 0.17952503263950348,
      "learning_rate": 1.403965393409461e-06,
      "loss": 0.0104,
      "step": 2629120
    },
    {
      "epoch": 4.3026452740519625,
      "grad_norm": 0.3062426745891571,
      "learning_rate": 1.403899501195944e-06,
      "loss": 0.0121,
      "step": 2629140
    },
    {
      "epoch": 4.302678004490616,
      "grad_norm": 0.16696785390377045,
      "learning_rate": 1.4038336089824267e-06,
      "loss": 0.0068,
      "step": 2629160
    },
    {
      "epoch": 4.30271073492927,
      "grad_norm": 0.10492584109306335,
      "learning_rate": 1.4037677167689096e-06,
      "loss": 0.0159,
      "step": 2629180
    },
    {
      "epoch": 4.302743465367923,
      "grad_norm": 0.15766438841819763,
      "learning_rate": 1.4037018245553924e-06,
      "loss": 0.0077,
      "step": 2629200
    },
    {
      "epoch": 4.302776195806576,
      "grad_norm": 0.15832790732383728,
      "learning_rate": 1.4036359323418753e-06,
      "loss": 0.0078,
      "step": 2629220
    },
    {
      "epoch": 4.30280892624523,
      "grad_norm": 0.16305509209632874,
      "learning_rate": 1.403570040128358e-06,
      "loss": 0.0098,
      "step": 2629240
    },
    {
      "epoch": 4.302841656683883,
      "grad_norm": 0.17029818892478943,
      "learning_rate": 1.403504147914841e-06,
      "loss": 0.0097,
      "step": 2629260
    },
    {
      "epoch": 4.302874387122536,
      "grad_norm": 0.24917514622211456,
      "learning_rate": 1.403438255701324e-06,
      "loss": 0.0114,
      "step": 2629280
    },
    {
      "epoch": 4.3029071175611895,
      "grad_norm": 0.5162044167518616,
      "learning_rate": 1.403372363487807e-06,
      "loss": 0.0064,
      "step": 2629300
    },
    {
      "epoch": 4.302939847999843,
      "grad_norm": 0.22770754992961884,
      "learning_rate": 1.4033064712742896e-06,
      "loss": 0.0076,
      "step": 2629320
    },
    {
      "epoch": 4.302972578438496,
      "grad_norm": 0.4065494239330292,
      "learning_rate": 1.4032405790607726e-06,
      "loss": 0.0119,
      "step": 2629340
    },
    {
      "epoch": 4.303005308877149,
      "grad_norm": 0.3745168447494507,
      "learning_rate": 1.4031746868472553e-06,
      "loss": 0.0087,
      "step": 2629360
    },
    {
      "epoch": 4.303038039315803,
      "grad_norm": 0.31082773208618164,
      "learning_rate": 1.4031087946337383e-06,
      "loss": 0.0066,
      "step": 2629380
    },
    {
      "epoch": 4.303070769754457,
      "grad_norm": 0.12977035343647003,
      "learning_rate": 1.403042902420221e-06,
      "loss": 0.0058,
      "step": 2629400
    },
    {
      "epoch": 4.303103500193109,
      "grad_norm": 0.3512972295284271,
      "learning_rate": 1.402977010206704e-06,
      "loss": 0.0077,
      "step": 2629420
    },
    {
      "epoch": 4.303136230631763,
      "grad_norm": 0.31605473160743713,
      "learning_rate": 1.4029111179931867e-06,
      "loss": 0.0089,
      "step": 2629440
    },
    {
      "epoch": 4.3031689610704165,
      "grad_norm": 0.12486691027879715,
      "learning_rate": 1.4028452257796699e-06,
      "loss": 0.0084,
      "step": 2629460
    },
    {
      "epoch": 4.30320169150907,
      "grad_norm": 0.6605022549629211,
      "learning_rate": 1.4027793335661526e-06,
      "loss": 0.0141,
      "step": 2629480
    },
    {
      "epoch": 4.303234421947723,
      "grad_norm": 0.6403170824050903,
      "learning_rate": 1.4027134413526356e-06,
      "loss": 0.0147,
      "step": 2629500
    },
    {
      "epoch": 4.303267152386376,
      "grad_norm": 0.21659445762634277,
      "learning_rate": 1.4026475491391183e-06,
      "loss": 0.0077,
      "step": 2629520
    },
    {
      "epoch": 4.30329988282503,
      "grad_norm": 0.26033809781074524,
      "learning_rate": 1.4025816569256013e-06,
      "loss": 0.0107,
      "step": 2629540
    },
    {
      "epoch": 4.303332613263683,
      "grad_norm": 0.3554435074329376,
      "learning_rate": 1.402515764712084e-06,
      "loss": 0.0117,
      "step": 2629560
    },
    {
      "epoch": 4.303365343702336,
      "grad_norm": 0.16671043634414673,
      "learning_rate": 1.402449872498567e-06,
      "loss": 0.0085,
      "step": 2629580
    },
    {
      "epoch": 4.30339807414099,
      "grad_norm": 0.06291521340608597,
      "learning_rate": 1.4023839802850497e-06,
      "loss": 0.0074,
      "step": 2629600
    },
    {
      "epoch": 4.3034308045796426,
      "grad_norm": 0.055493004620075226,
      "learning_rate": 1.4023180880715326e-06,
      "loss": 0.0075,
      "step": 2629620
    },
    {
      "epoch": 4.303463535018296,
      "grad_norm": 0.15166501700878143,
      "learning_rate": 1.4022521958580156e-06,
      "loss": 0.0084,
      "step": 2629640
    },
    {
      "epoch": 4.30349626545695,
      "grad_norm": 0.13222184777259827,
      "learning_rate": 1.4021863036444985e-06,
      "loss": 0.0084,
      "step": 2629660
    },
    {
      "epoch": 4.303528995895603,
      "grad_norm": 0.3972357511520386,
      "learning_rate": 1.4021204114309813e-06,
      "loss": 0.0065,
      "step": 2629680
    },
    {
      "epoch": 4.303561726334256,
      "grad_norm": 0.47731438279151917,
      "learning_rate": 1.4020545192174642e-06,
      "loss": 0.0077,
      "step": 2629700
    },
    {
      "epoch": 4.30359445677291,
      "grad_norm": 0.16298124194145203,
      "learning_rate": 1.401988627003947e-06,
      "loss": 0.0093,
      "step": 2629720
    },
    {
      "epoch": 4.303627187211563,
      "grad_norm": 0.21983566880226135,
      "learning_rate": 1.40192273479043e-06,
      "loss": 0.0069,
      "step": 2629740
    },
    {
      "epoch": 4.303659917650216,
      "grad_norm": 0.30258801579475403,
      "learning_rate": 1.4018568425769127e-06,
      "loss": 0.0079,
      "step": 2629760
    },
    {
      "epoch": 4.3036926480888695,
      "grad_norm": 0.3215146064758301,
      "learning_rate": 1.4017909503633956e-06,
      "loss": 0.0075,
      "step": 2629780
    },
    {
      "epoch": 4.303725378527523,
      "grad_norm": 0.08756539225578308,
      "learning_rate": 1.4017250581498784e-06,
      "loss": 0.0092,
      "step": 2629800
    },
    {
      "epoch": 4.303758108966177,
      "grad_norm": 0.1686224490404129,
      "learning_rate": 1.4016591659363615e-06,
      "loss": 0.0093,
      "step": 2629820
    },
    {
      "epoch": 4.303790839404829,
      "grad_norm": 0.4986540377140045,
      "learning_rate": 1.4015932737228445e-06,
      "loss": 0.0118,
      "step": 2629840
    },
    {
      "epoch": 4.303823569843483,
      "grad_norm": 0.6079303026199341,
      "learning_rate": 1.4015273815093272e-06,
      "loss": 0.0072,
      "step": 2629860
    },
    {
      "epoch": 4.303856300282137,
      "grad_norm": 0.27454429864883423,
      "learning_rate": 1.40146148929581e-06,
      "loss": 0.0068,
      "step": 2629880
    },
    {
      "epoch": 4.303889030720789,
      "grad_norm": 0.2235439121723175,
      "learning_rate": 1.401395597082293e-06,
      "loss": 0.0082,
      "step": 2629900
    },
    {
      "epoch": 4.303921761159443,
      "grad_norm": 0.08285695314407349,
      "learning_rate": 1.4013297048687756e-06,
      "loss": 0.0081,
      "step": 2629920
    },
    {
      "epoch": 4.3039544915980965,
      "grad_norm": 0.060703061521053314,
      "learning_rate": 1.4012638126552586e-06,
      "loss": 0.0072,
      "step": 2629940
    },
    {
      "epoch": 4.30398722203675,
      "grad_norm": 0.1264924705028534,
      "learning_rate": 1.4011979204417413e-06,
      "loss": 0.0096,
      "step": 2629960
    },
    {
      "epoch": 4.304019952475403,
      "grad_norm": 0.2969714403152466,
      "learning_rate": 1.4011320282282243e-06,
      "loss": 0.0073,
      "step": 2629980
    },
    {
      "epoch": 4.304052682914056,
      "grad_norm": 0.3251379430294037,
      "learning_rate": 1.4010661360147075e-06,
      "loss": 0.0087,
      "step": 2630000
    },
    {
      "epoch": 4.30408541335271,
      "grad_norm": 0.20145206153392792,
      "learning_rate": 1.4010002438011902e-06,
      "loss": 0.0127,
      "step": 2630020
    },
    {
      "epoch": 4.304118143791363,
      "grad_norm": 0.30704379081726074,
      "learning_rate": 1.4009343515876731e-06,
      "loss": 0.0062,
      "step": 2630040
    },
    {
      "epoch": 4.304150874230016,
      "grad_norm": 0.12262583523988724,
      "learning_rate": 1.4008684593741559e-06,
      "loss": 0.0067,
      "step": 2630060
    },
    {
      "epoch": 4.30418360466867,
      "grad_norm": 0.6586158871650696,
      "learning_rate": 1.4008025671606386e-06,
      "loss": 0.0095,
      "step": 2630080
    },
    {
      "epoch": 4.3042163351073235,
      "grad_norm": 0.34565311670303345,
      "learning_rate": 1.4007366749471216e-06,
      "loss": 0.007,
      "step": 2630100
    },
    {
      "epoch": 4.304249065545976,
      "grad_norm": 0.25492385029792786,
      "learning_rate": 1.4006707827336043e-06,
      "loss": 0.0077,
      "step": 2630120
    },
    {
      "epoch": 4.30428179598463,
      "grad_norm": 0.059564996510744095,
      "learning_rate": 1.4006048905200873e-06,
      "loss": 0.0067,
      "step": 2630140
    },
    {
      "epoch": 4.304314526423283,
      "grad_norm": 0.28381505608558655,
      "learning_rate": 1.4005389983065704e-06,
      "loss": 0.012,
      "step": 2630160
    },
    {
      "epoch": 4.304347256861936,
      "grad_norm": 0.06188994646072388,
      "learning_rate": 1.4004731060930532e-06,
      "loss": 0.0086,
      "step": 2630180
    },
    {
      "epoch": 4.30437998730059,
      "grad_norm": 0.4229414463043213,
      "learning_rate": 1.4004072138795361e-06,
      "loss": 0.009,
      "step": 2630200
    },
    {
      "epoch": 4.304412717739243,
      "grad_norm": 0.305385559797287,
      "learning_rate": 1.4003413216660189e-06,
      "loss": 0.0076,
      "step": 2630220
    },
    {
      "epoch": 4.304445448177897,
      "grad_norm": 0.10121921449899673,
      "learning_rate": 1.4002754294525018e-06,
      "loss": 0.0059,
      "step": 2630240
    },
    {
      "epoch": 4.30447817861655,
      "grad_norm": 0.5038895010948181,
      "learning_rate": 1.4002095372389846e-06,
      "loss": 0.0078,
      "step": 2630260
    },
    {
      "epoch": 4.304510909055203,
      "grad_norm": 0.3283756375312805,
      "learning_rate": 1.4001436450254675e-06,
      "loss": 0.0114,
      "step": 2630280
    },
    {
      "epoch": 4.304543639493857,
      "grad_norm": 0.20730577409267426,
      "learning_rate": 1.4000777528119502e-06,
      "loss": 0.0072,
      "step": 2630300
    },
    {
      "epoch": 4.3045763699325095,
      "grad_norm": 0.2526349723339081,
      "learning_rate": 1.400011860598433e-06,
      "loss": 0.0093,
      "step": 2630320
    },
    {
      "epoch": 4.304609100371163,
      "grad_norm": 0.07925397157669067,
      "learning_rate": 1.3999459683849161e-06,
      "loss": 0.0081,
      "step": 2630340
    },
    {
      "epoch": 4.304641830809817,
      "grad_norm": 0.12108653038740158,
      "learning_rate": 1.399880076171399e-06,
      "loss": 0.0105,
      "step": 2630360
    },
    {
      "epoch": 4.30467456124847,
      "grad_norm": 0.13869386911392212,
      "learning_rate": 1.3998141839578818e-06,
      "loss": 0.0087,
      "step": 2630380
    },
    {
      "epoch": 4.304707291687123,
      "grad_norm": 0.11322671920061111,
      "learning_rate": 1.3997482917443648e-06,
      "loss": 0.0112,
      "step": 2630400
    },
    {
      "epoch": 4.3047400221257766,
      "grad_norm": 0.27671903371810913,
      "learning_rate": 1.3996823995308475e-06,
      "loss": 0.0093,
      "step": 2630420
    },
    {
      "epoch": 4.30477275256443,
      "grad_norm": 0.2814132869243622,
      "learning_rate": 1.3996165073173305e-06,
      "loss": 0.009,
      "step": 2630440
    },
    {
      "epoch": 4.304805483003083,
      "grad_norm": 0.17846985161304474,
      "learning_rate": 1.3995506151038132e-06,
      "loss": 0.0062,
      "step": 2630460
    },
    {
      "epoch": 4.3048382134417364,
      "grad_norm": 0.16772377490997314,
      "learning_rate": 1.3994847228902962e-06,
      "loss": 0.0093,
      "step": 2630480
    },
    {
      "epoch": 4.30487094388039,
      "grad_norm": 0.12231168150901794,
      "learning_rate": 1.399418830676779e-06,
      "loss": 0.0091,
      "step": 2630500
    },
    {
      "epoch": 4.304903674319044,
      "grad_norm": 0.37817713618278503,
      "learning_rate": 1.399352938463262e-06,
      "loss": 0.0137,
      "step": 2630520
    },
    {
      "epoch": 4.304936404757696,
      "grad_norm": 0.23726876080036163,
      "learning_rate": 1.3992870462497448e-06,
      "loss": 0.0069,
      "step": 2630540
    },
    {
      "epoch": 4.30496913519635,
      "grad_norm": 0.4869949221611023,
      "learning_rate": 1.3992211540362278e-06,
      "loss": 0.0095,
      "step": 2630560
    },
    {
      "epoch": 4.3050018656350035,
      "grad_norm": 0.20442932844161987,
      "learning_rate": 1.3991552618227105e-06,
      "loss": 0.0059,
      "step": 2630580
    },
    {
      "epoch": 4.305034596073656,
      "grad_norm": 0.1269894540309906,
      "learning_rate": 1.3990893696091935e-06,
      "loss": 0.0088,
      "step": 2630600
    },
    {
      "epoch": 4.30506732651231,
      "grad_norm": 0.6404164433479309,
      "learning_rate": 1.3990234773956762e-06,
      "loss": 0.0097,
      "step": 2630620
    },
    {
      "epoch": 4.305100056950963,
      "grad_norm": 0.358227401971817,
      "learning_rate": 1.3989575851821591e-06,
      "loss": 0.0116,
      "step": 2630640
    },
    {
      "epoch": 4.305132787389617,
      "grad_norm": 0.15656627714633942,
      "learning_rate": 1.3988916929686419e-06,
      "loss": 0.0123,
      "step": 2630660
    },
    {
      "epoch": 4.30516551782827,
      "grad_norm": 0.10798176378011703,
      "learning_rate": 1.3988258007551248e-06,
      "loss": 0.0094,
      "step": 2630680
    },
    {
      "epoch": 4.305198248266923,
      "grad_norm": 0.05411382019519806,
      "learning_rate": 1.3987599085416078e-06,
      "loss": 0.0085,
      "step": 2630700
    },
    {
      "epoch": 4.305230978705577,
      "grad_norm": 0.46115127205848694,
      "learning_rate": 1.3986940163280907e-06,
      "loss": 0.0134,
      "step": 2630720
    },
    {
      "epoch": 4.30526370914423,
      "grad_norm": 0.12359654903411865,
      "learning_rate": 1.3986281241145735e-06,
      "loss": 0.012,
      "step": 2630740
    },
    {
      "epoch": 4.305296439582883,
      "grad_norm": 0.481723427772522,
      "learning_rate": 1.3985622319010564e-06,
      "loss": 0.0128,
      "step": 2630760
    },
    {
      "epoch": 4.305329170021537,
      "grad_norm": 0.45096468925476074,
      "learning_rate": 1.3984963396875392e-06,
      "loss": 0.0097,
      "step": 2630780
    },
    {
      "epoch": 4.3053619004601895,
      "grad_norm": 0.08811011165380478,
      "learning_rate": 1.3984304474740221e-06,
      "loss": 0.0043,
      "step": 2630800
    },
    {
      "epoch": 4.305394630898843,
      "grad_norm": 0.522966206073761,
      "learning_rate": 1.3983645552605049e-06,
      "loss": 0.0079,
      "step": 2630820
    },
    {
      "epoch": 4.305427361337497,
      "grad_norm": 0.2405634969472885,
      "learning_rate": 1.3982986630469878e-06,
      "loss": 0.0063,
      "step": 2630840
    },
    {
      "epoch": 4.30546009177615,
      "grad_norm": 0.09495680779218674,
      "learning_rate": 1.3982327708334706e-06,
      "loss": 0.0094,
      "step": 2630860
    },
    {
      "epoch": 4.305492822214803,
      "grad_norm": 0.25223350524902344,
      "learning_rate": 1.3981668786199537e-06,
      "loss": 0.0076,
      "step": 2630880
    },
    {
      "epoch": 4.305525552653457,
      "grad_norm": 0.29813528060913086,
      "learning_rate": 1.3981009864064365e-06,
      "loss": 0.0106,
      "step": 2630900
    },
    {
      "epoch": 4.30555828309211,
      "grad_norm": 0.3243032693862915,
      "learning_rate": 1.3980350941929194e-06,
      "loss": 0.0096,
      "step": 2630920
    },
    {
      "epoch": 4.305591013530764,
      "grad_norm": 0.3044542968273163,
      "learning_rate": 1.3979692019794022e-06,
      "loss": 0.006,
      "step": 2630940
    },
    {
      "epoch": 4.3056237439694165,
      "grad_norm": 0.42819374799728394,
      "learning_rate": 1.397903309765885e-06,
      "loss": 0.0134,
      "step": 2630960
    },
    {
      "epoch": 4.30565647440807,
      "grad_norm": 0.3485570251941681,
      "learning_rate": 1.3978374175523678e-06,
      "loss": 0.0106,
      "step": 2630980
    },
    {
      "epoch": 4.305689204846724,
      "grad_norm": 0.4304218888282776,
      "learning_rate": 1.3977715253388508e-06,
      "loss": 0.0092,
      "step": 2631000
    },
    {
      "epoch": 4.305721935285376,
      "grad_norm": 0.05062103644013405,
      "learning_rate": 1.3977056331253335e-06,
      "loss": 0.0093,
      "step": 2631020
    },
    {
      "epoch": 4.30575466572403,
      "grad_norm": 0.34885019063949585,
      "learning_rate": 1.3976397409118167e-06,
      "loss": 0.0093,
      "step": 2631040
    },
    {
      "epoch": 4.305787396162684,
      "grad_norm": 0.2554420828819275,
      "learning_rate": 1.3975738486982994e-06,
      "loss": 0.0073,
      "step": 2631060
    },
    {
      "epoch": 4.305820126601336,
      "grad_norm": 0.16057713329792023,
      "learning_rate": 1.3975079564847824e-06,
      "loss": 0.0079,
      "step": 2631080
    },
    {
      "epoch": 4.30585285703999,
      "grad_norm": 0.10685594379901886,
      "learning_rate": 1.3974420642712651e-06,
      "loss": 0.0091,
      "step": 2631100
    },
    {
      "epoch": 4.3058855874786435,
      "grad_norm": 0.29229164123535156,
      "learning_rate": 1.397376172057748e-06,
      "loss": 0.0077,
      "step": 2631120
    },
    {
      "epoch": 4.305918317917297,
      "grad_norm": 0.5826478004455566,
      "learning_rate": 1.3973102798442308e-06,
      "loss": 0.0113,
      "step": 2631140
    },
    {
      "epoch": 4.30595104835595,
      "grad_norm": 0.2234116792678833,
      "learning_rate": 1.3972443876307138e-06,
      "loss": 0.0062,
      "step": 2631160
    },
    {
      "epoch": 4.305983778794603,
      "grad_norm": 0.6624338030815125,
      "learning_rate": 1.3971784954171965e-06,
      "loss": 0.0087,
      "step": 2631180
    },
    {
      "epoch": 4.306016509233257,
      "grad_norm": 0.2172601819038391,
      "learning_rate": 1.3971126032036795e-06,
      "loss": 0.0095,
      "step": 2631200
    },
    {
      "epoch": 4.30604923967191,
      "grad_norm": 0.17519326508045197,
      "learning_rate": 1.3970467109901626e-06,
      "loss": 0.0089,
      "step": 2631220
    },
    {
      "epoch": 4.306081970110563,
      "grad_norm": 0.214316263794899,
      "learning_rate": 1.3969808187766454e-06,
      "loss": 0.0119,
      "step": 2631240
    },
    {
      "epoch": 4.306114700549217,
      "grad_norm": 0.401958703994751,
      "learning_rate": 1.3969149265631283e-06,
      "loss": 0.008,
      "step": 2631260
    },
    {
      "epoch": 4.3061474309878705,
      "grad_norm": 0.6446911096572876,
      "learning_rate": 1.396849034349611e-06,
      "loss": 0.0128,
      "step": 2631280
    },
    {
      "epoch": 4.306180161426523,
      "grad_norm": 0.6492939591407776,
      "learning_rate": 1.3967831421360938e-06,
      "loss": 0.01,
      "step": 2631300
    },
    {
      "epoch": 4.306212891865177,
      "grad_norm": 0.9083209037780762,
      "learning_rate": 1.3967172499225767e-06,
      "loss": 0.0103,
      "step": 2631320
    },
    {
      "epoch": 4.30624562230383,
      "grad_norm": 0.3194040358066559,
      "learning_rate": 1.3966513577090595e-06,
      "loss": 0.0048,
      "step": 2631340
    },
    {
      "epoch": 4.306278352742483,
      "grad_norm": 0.3400713801383972,
      "learning_rate": 1.3965854654955424e-06,
      "loss": 0.0082,
      "step": 2631360
    },
    {
      "epoch": 4.306311083181137,
      "grad_norm": 0.09541655331850052,
      "learning_rate": 1.3965195732820252e-06,
      "loss": 0.0087,
      "step": 2631380
    },
    {
      "epoch": 4.30634381361979,
      "grad_norm": 0.09991076588630676,
      "learning_rate": 1.3964536810685083e-06,
      "loss": 0.0092,
      "step": 2631400
    },
    {
      "epoch": 4.306376544058444,
      "grad_norm": 0.4213064908981323,
      "learning_rate": 1.3963877888549913e-06,
      "loss": 0.0083,
      "step": 2631420
    },
    {
      "epoch": 4.3064092744970965,
      "grad_norm": 0.1732291430234909,
      "learning_rate": 1.396321896641474e-06,
      "loss": 0.0111,
      "step": 2631440
    },
    {
      "epoch": 4.30644200493575,
      "grad_norm": 0.35709890723228455,
      "learning_rate": 1.396256004427957e-06,
      "loss": 0.0114,
      "step": 2631460
    },
    {
      "epoch": 4.306474735374404,
      "grad_norm": 0.15274979174137115,
      "learning_rate": 1.3961901122144397e-06,
      "loss": 0.0089,
      "step": 2631480
    },
    {
      "epoch": 4.306507465813056,
      "grad_norm": 0.23836298286914825,
      "learning_rate": 1.3961242200009225e-06,
      "loss": 0.0091,
      "step": 2631500
    },
    {
      "epoch": 4.30654019625171,
      "grad_norm": 0.17494548857212067,
      "learning_rate": 1.3960583277874054e-06,
      "loss": 0.0118,
      "step": 2631520
    },
    {
      "epoch": 4.306572926690364,
      "grad_norm": 0.1714784950017929,
      "learning_rate": 1.3959924355738882e-06,
      "loss": 0.0071,
      "step": 2631540
    },
    {
      "epoch": 4.306605657129017,
      "grad_norm": 0.38840436935424805,
      "learning_rate": 1.3959265433603711e-06,
      "loss": 0.0081,
      "step": 2631560
    },
    {
      "epoch": 4.30663838756767,
      "grad_norm": 0.3117368221282959,
      "learning_rate": 1.3958606511468543e-06,
      "loss": 0.0119,
      "step": 2631580
    },
    {
      "epoch": 4.3066711180063235,
      "grad_norm": 0.09481090307235718,
      "learning_rate": 1.395794758933337e-06,
      "loss": 0.0114,
      "step": 2631600
    },
    {
      "epoch": 4.306703848444977,
      "grad_norm": 0.6169362664222717,
      "learning_rate": 1.39572886671982e-06,
      "loss": 0.0102,
      "step": 2631620
    },
    {
      "epoch": 4.30673657888363,
      "grad_norm": 0.189359650015831,
      "learning_rate": 1.3956629745063027e-06,
      "loss": 0.0083,
      "step": 2631640
    },
    {
      "epoch": 4.306769309322283,
      "grad_norm": 0.3515624701976776,
      "learning_rate": 1.3955970822927857e-06,
      "loss": 0.008,
      "step": 2631660
    },
    {
      "epoch": 4.306802039760937,
      "grad_norm": 0.1313236504793167,
      "learning_rate": 1.3955311900792684e-06,
      "loss": 0.008,
      "step": 2631680
    },
    {
      "epoch": 4.306834770199591,
      "grad_norm": 0.09274075925350189,
      "learning_rate": 1.3954652978657513e-06,
      "loss": 0.0106,
      "step": 2631700
    },
    {
      "epoch": 4.306867500638243,
      "grad_norm": 0.1610349714756012,
      "learning_rate": 1.395399405652234e-06,
      "loss": 0.0093,
      "step": 2631720
    },
    {
      "epoch": 4.306900231076897,
      "grad_norm": 0.31376710534095764,
      "learning_rate": 1.3953335134387168e-06,
      "loss": 0.0109,
      "step": 2631740
    },
    {
      "epoch": 4.3069329615155505,
      "grad_norm": 0.31906262040138245,
      "learning_rate": 1.3952676212252e-06,
      "loss": 0.0098,
      "step": 2631760
    },
    {
      "epoch": 4.306965691954203,
      "grad_norm": 0.14107747375965118,
      "learning_rate": 1.395201729011683e-06,
      "loss": 0.0081,
      "step": 2631780
    },
    {
      "epoch": 4.306998422392857,
      "grad_norm": 0.32672908902168274,
      "learning_rate": 1.3951358367981657e-06,
      "loss": 0.0099,
      "step": 2631800
    },
    {
      "epoch": 4.30703115283151,
      "grad_norm": 0.15485043823719025,
      "learning_rate": 1.3950699445846486e-06,
      "loss": 0.0059,
      "step": 2631820
    },
    {
      "epoch": 4.307063883270164,
      "grad_norm": 0.19125330448150635,
      "learning_rate": 1.3950040523711314e-06,
      "loss": 0.0074,
      "step": 2631840
    },
    {
      "epoch": 4.307096613708817,
      "grad_norm": 0.22186626493930817,
      "learning_rate": 1.3949381601576143e-06,
      "loss": 0.0112,
      "step": 2631860
    },
    {
      "epoch": 4.30712934414747,
      "grad_norm": 0.09535738080739975,
      "learning_rate": 1.394872267944097e-06,
      "loss": 0.0094,
      "step": 2631880
    },
    {
      "epoch": 4.307162074586124,
      "grad_norm": 0.22734954953193665,
      "learning_rate": 1.39480637573058e-06,
      "loss": 0.0077,
      "step": 2631900
    },
    {
      "epoch": 4.307194805024777,
      "grad_norm": 0.2668752670288086,
      "learning_rate": 1.394740483517063e-06,
      "loss": 0.0136,
      "step": 2631920
    },
    {
      "epoch": 4.30722753546343,
      "grad_norm": 0.6671692132949829,
      "learning_rate": 1.394674591303546e-06,
      "loss": 0.0074,
      "step": 2631940
    },
    {
      "epoch": 4.307260265902084,
      "grad_norm": 0.09130392968654633,
      "learning_rate": 1.3946086990900287e-06,
      "loss": 0.0085,
      "step": 2631960
    },
    {
      "epoch": 4.307292996340737,
      "grad_norm": 0.17465336620807648,
      "learning_rate": 1.3945428068765116e-06,
      "loss": 0.0104,
      "step": 2631980
    },
    {
      "epoch": 4.30732572677939,
      "grad_norm": 0.28311046957969666,
      "learning_rate": 1.3944769146629943e-06,
      "loss": 0.0104,
      "step": 2632000
    },
    {
      "epoch": 4.307358457218044,
      "grad_norm": 0.3355170488357544,
      "learning_rate": 1.3944110224494773e-06,
      "loss": 0.0066,
      "step": 2632020
    },
    {
      "epoch": 4.307391187656697,
      "grad_norm": 0.13738922774791718,
      "learning_rate": 1.39434513023596e-06,
      "loss": 0.0073,
      "step": 2632040
    },
    {
      "epoch": 4.30742391809535,
      "grad_norm": 0.34933972358703613,
      "learning_rate": 1.394279238022443e-06,
      "loss": 0.0097,
      "step": 2632060
    },
    {
      "epoch": 4.307456648534004,
      "grad_norm": 0.12091284990310669,
      "learning_rate": 1.3942133458089257e-06,
      "loss": 0.0083,
      "step": 2632080
    },
    {
      "epoch": 4.307489378972657,
      "grad_norm": 0.18503177165985107,
      "learning_rate": 1.3941474535954089e-06,
      "loss": 0.0063,
      "step": 2632100
    },
    {
      "epoch": 4.307522109411311,
      "grad_norm": 0.16256004571914673,
      "learning_rate": 1.3940815613818916e-06,
      "loss": 0.0079,
      "step": 2632120
    },
    {
      "epoch": 4.3075548398499635,
      "grad_norm": 0.15816594660282135,
      "learning_rate": 1.3940156691683746e-06,
      "loss": 0.0072,
      "step": 2632140
    },
    {
      "epoch": 4.307587570288617,
      "grad_norm": 0.2313460111618042,
      "learning_rate": 1.3939497769548573e-06,
      "loss": 0.0082,
      "step": 2632160
    },
    {
      "epoch": 4.307620300727271,
      "grad_norm": 0.10935898870229721,
      "learning_rate": 1.3938838847413403e-06,
      "loss": 0.0094,
      "step": 2632180
    },
    {
      "epoch": 4.307653031165923,
      "grad_norm": 0.38199383020401,
      "learning_rate": 1.393817992527823e-06,
      "loss": 0.0155,
      "step": 2632200
    },
    {
      "epoch": 4.307685761604577,
      "grad_norm": 1.3341474533081055,
      "learning_rate": 1.393752100314306e-06,
      "loss": 0.0082,
      "step": 2632220
    },
    {
      "epoch": 4.3077184920432305,
      "grad_norm": 0.32696014642715454,
      "learning_rate": 1.3936862081007887e-06,
      "loss": 0.0066,
      "step": 2632240
    },
    {
      "epoch": 4.307751222481883,
      "grad_norm": 0.16076073050498962,
      "learning_rate": 1.3936203158872717e-06,
      "loss": 0.0079,
      "step": 2632260
    },
    {
      "epoch": 4.307783952920537,
      "grad_norm": 0.10412407666444778,
      "learning_rate": 1.3935544236737546e-06,
      "loss": 0.0066,
      "step": 2632280
    },
    {
      "epoch": 4.30781668335919,
      "grad_norm": 0.04762724041938782,
      "learning_rate": 1.3934885314602376e-06,
      "loss": 0.0063,
      "step": 2632300
    },
    {
      "epoch": 4.307849413797844,
      "grad_norm": 0.34290337562561035,
      "learning_rate": 1.3934226392467203e-06,
      "loss": 0.011,
      "step": 2632320
    },
    {
      "epoch": 4.307882144236497,
      "grad_norm": 0.11896660923957825,
      "learning_rate": 1.3933567470332033e-06,
      "loss": 0.0169,
      "step": 2632340
    },
    {
      "epoch": 4.30791487467515,
      "grad_norm": 0.2490084022283554,
      "learning_rate": 1.393290854819686e-06,
      "loss": 0.0075,
      "step": 2632360
    },
    {
      "epoch": 4.307947605113804,
      "grad_norm": 0.2824920415878296,
      "learning_rate": 1.393224962606169e-06,
      "loss": 0.013,
      "step": 2632380
    },
    {
      "epoch": 4.3079803355524575,
      "grad_norm": 0.36488935351371765,
      "learning_rate": 1.3931590703926517e-06,
      "loss": 0.0112,
      "step": 2632400
    },
    {
      "epoch": 4.30801306599111,
      "grad_norm": 0.1880701333284378,
      "learning_rate": 1.3930931781791346e-06,
      "loss": 0.0085,
      "step": 2632420
    },
    {
      "epoch": 4.308045796429764,
      "grad_norm": 0.2448655068874359,
      "learning_rate": 1.3930272859656174e-06,
      "loss": 0.0081,
      "step": 2632440
    },
    {
      "epoch": 4.308078526868417,
      "grad_norm": 0.16372816264629364,
      "learning_rate": 1.3929613937521005e-06,
      "loss": 0.0107,
      "step": 2632460
    },
    {
      "epoch": 4.30811125730707,
      "grad_norm": 0.1442141830921173,
      "learning_rate": 1.3928955015385835e-06,
      "loss": 0.0067,
      "step": 2632480
    },
    {
      "epoch": 4.308143987745724,
      "grad_norm": 0.3105299472808838,
      "learning_rate": 1.3928296093250662e-06,
      "loss": 0.0093,
      "step": 2632500
    },
    {
      "epoch": 4.308176718184377,
      "grad_norm": 0.1770043671131134,
      "learning_rate": 1.392763717111549e-06,
      "loss": 0.0074,
      "step": 2632520
    },
    {
      "epoch": 4.30820944862303,
      "grad_norm": 0.2147296816110611,
      "learning_rate": 1.392697824898032e-06,
      "loss": 0.0077,
      "step": 2632540
    },
    {
      "epoch": 4.308242179061684,
      "grad_norm": 0.09168046712875366,
      "learning_rate": 1.3926319326845147e-06,
      "loss": 0.0087,
      "step": 2632560
    },
    {
      "epoch": 4.308274909500337,
      "grad_norm": 0.4672239124774933,
      "learning_rate": 1.3925660404709976e-06,
      "loss": 0.0119,
      "step": 2632580
    },
    {
      "epoch": 4.308307639938991,
      "grad_norm": 0.1786791980266571,
      "learning_rate": 1.3925001482574804e-06,
      "loss": 0.0118,
      "step": 2632600
    },
    {
      "epoch": 4.3083403703776435,
      "grad_norm": 0.25547483563423157,
      "learning_rate": 1.3924342560439633e-06,
      "loss": 0.0122,
      "step": 2632620
    },
    {
      "epoch": 4.308373100816297,
      "grad_norm": 0.25116869807243347,
      "learning_rate": 1.3923683638304465e-06,
      "loss": 0.0127,
      "step": 2632640
    },
    {
      "epoch": 4.308405831254951,
      "grad_norm": 0.17997407913208008,
      "learning_rate": 1.3923024716169292e-06,
      "loss": 0.0106,
      "step": 2632660
    },
    {
      "epoch": 4.308438561693604,
      "grad_norm": 0.07322253286838531,
      "learning_rate": 1.3922365794034122e-06,
      "loss": 0.0123,
      "step": 2632680
    },
    {
      "epoch": 4.308471292132257,
      "grad_norm": 0.12171895056962967,
      "learning_rate": 1.392170687189895e-06,
      "loss": 0.0122,
      "step": 2632700
    },
    {
      "epoch": 4.308504022570911,
      "grad_norm": 0.16465243697166443,
      "learning_rate": 1.3921047949763776e-06,
      "loss": 0.0065,
      "step": 2632720
    },
    {
      "epoch": 4.308536753009564,
      "grad_norm": 0.3508445918560028,
      "learning_rate": 1.3920389027628606e-06,
      "loss": 0.0133,
      "step": 2632740
    },
    {
      "epoch": 4.308569483448217,
      "grad_norm": 0.1648816019296646,
      "learning_rate": 1.3919730105493433e-06,
      "loss": 0.0123,
      "step": 2632760
    },
    {
      "epoch": 4.3086022138868705,
      "grad_norm": 0.1370803415775299,
      "learning_rate": 1.3919071183358263e-06,
      "loss": 0.0062,
      "step": 2632780
    },
    {
      "epoch": 4.308634944325524,
      "grad_norm": 0.24994446337223053,
      "learning_rate": 1.3918412261223094e-06,
      "loss": 0.0077,
      "step": 2632800
    },
    {
      "epoch": 4.308667674764177,
      "grad_norm": 0.40333297848701477,
      "learning_rate": 1.3917753339087922e-06,
      "loss": 0.0069,
      "step": 2632820
    },
    {
      "epoch": 4.30870040520283,
      "grad_norm": 0.15751133859157562,
      "learning_rate": 1.3917094416952751e-06,
      "loss": 0.0108,
      "step": 2632840
    },
    {
      "epoch": 4.308733135641484,
      "grad_norm": 0.195917010307312,
      "learning_rate": 1.3916435494817579e-06,
      "loss": 0.0127,
      "step": 2632860
    },
    {
      "epoch": 4.308765866080138,
      "grad_norm": 0.3620143234729767,
      "learning_rate": 1.3915776572682408e-06,
      "loss": 0.0081,
      "step": 2632880
    },
    {
      "epoch": 4.30879859651879,
      "grad_norm": 0.26405787467956543,
      "learning_rate": 1.3915117650547236e-06,
      "loss": 0.0073,
      "step": 2632900
    },
    {
      "epoch": 4.308831326957444,
      "grad_norm": 0.08470389991998672,
      "learning_rate": 1.3914458728412065e-06,
      "loss": 0.0057,
      "step": 2632920
    },
    {
      "epoch": 4.3088640573960975,
      "grad_norm": 0.0937715619802475,
      "learning_rate": 1.3913799806276893e-06,
      "loss": 0.0061,
      "step": 2632940
    },
    {
      "epoch": 4.30889678783475,
      "grad_norm": 0.09505987912416458,
      "learning_rate": 1.391314088414172e-06,
      "loss": 0.0078,
      "step": 2632960
    },
    {
      "epoch": 4.308929518273404,
      "grad_norm": 0.11081866174936295,
      "learning_rate": 1.3912481962006552e-06,
      "loss": 0.0056,
      "step": 2632980
    },
    {
      "epoch": 4.308962248712057,
      "grad_norm": 0.2609643042087555,
      "learning_rate": 1.3911823039871381e-06,
      "loss": 0.0177,
      "step": 2633000
    },
    {
      "epoch": 4.308994979150711,
      "grad_norm": 0.132020503282547,
      "learning_rate": 1.3911164117736208e-06,
      "loss": 0.0128,
      "step": 2633020
    },
    {
      "epoch": 4.309027709589364,
      "grad_norm": 0.5764157176017761,
      "learning_rate": 1.3910505195601038e-06,
      "loss": 0.0145,
      "step": 2633040
    },
    {
      "epoch": 4.309060440028017,
      "grad_norm": 0.36559855937957764,
      "learning_rate": 1.3909846273465865e-06,
      "loss": 0.0091,
      "step": 2633060
    },
    {
      "epoch": 4.309093170466671,
      "grad_norm": 0.09782177209854126,
      "learning_rate": 1.3909187351330695e-06,
      "loss": 0.0113,
      "step": 2633080
    },
    {
      "epoch": 4.3091259009053235,
      "grad_norm": 0.29152536392211914,
      "learning_rate": 1.3908528429195522e-06,
      "loss": 0.0079,
      "step": 2633100
    },
    {
      "epoch": 4.309158631343977,
      "grad_norm": 0.22279508411884308,
      "learning_rate": 1.3907869507060352e-06,
      "loss": 0.0049,
      "step": 2633120
    },
    {
      "epoch": 4.309191361782631,
      "grad_norm": 0.4426174461841583,
      "learning_rate": 1.390721058492518e-06,
      "loss": 0.0101,
      "step": 2633140
    },
    {
      "epoch": 4.309224092221284,
      "grad_norm": 0.2255174070596695,
      "learning_rate": 1.390655166279001e-06,
      "loss": 0.0085,
      "step": 2633160
    },
    {
      "epoch": 4.309256822659937,
      "grad_norm": 0.22885465621948242,
      "learning_rate": 1.3905892740654838e-06,
      "loss": 0.0116,
      "step": 2633180
    },
    {
      "epoch": 4.309289553098591,
      "grad_norm": 0.3873712718486786,
      "learning_rate": 1.3905233818519668e-06,
      "loss": 0.0097,
      "step": 2633200
    },
    {
      "epoch": 4.309322283537244,
      "grad_norm": 0.1972377896308899,
      "learning_rate": 1.3904574896384495e-06,
      "loss": 0.0078,
      "step": 2633220
    },
    {
      "epoch": 4.309355013975897,
      "grad_norm": 0.24605922400951385,
      "learning_rate": 1.3903915974249325e-06,
      "loss": 0.0097,
      "step": 2633240
    },
    {
      "epoch": 4.3093877444145505,
      "grad_norm": 0.23157615959644318,
      "learning_rate": 1.3903257052114152e-06,
      "loss": 0.0075,
      "step": 2633260
    },
    {
      "epoch": 4.309420474853204,
      "grad_norm": 0.39908701181411743,
      "learning_rate": 1.3902598129978982e-06,
      "loss": 0.0087,
      "step": 2633280
    },
    {
      "epoch": 4.309453205291858,
      "grad_norm": 0.33722546696662903,
      "learning_rate": 1.390193920784381e-06,
      "loss": 0.007,
      "step": 2633300
    },
    {
      "epoch": 4.30948593573051,
      "grad_norm": 0.16758261620998383,
      "learning_rate": 1.3901280285708639e-06,
      "loss": 0.0097,
      "step": 2633320
    },
    {
      "epoch": 4.309518666169164,
      "grad_norm": 0.23386725783348083,
      "learning_rate": 1.3900621363573468e-06,
      "loss": 0.0116,
      "step": 2633340
    },
    {
      "epoch": 4.309551396607818,
      "grad_norm": 0.2465420365333557,
      "learning_rate": 1.3899962441438298e-06,
      "loss": 0.0088,
      "step": 2633360
    },
    {
      "epoch": 4.30958412704647,
      "grad_norm": 0.7829108834266663,
      "learning_rate": 1.3899303519303125e-06,
      "loss": 0.0084,
      "step": 2633380
    },
    {
      "epoch": 4.309616857485124,
      "grad_norm": 0.16246604919433594,
      "learning_rate": 1.3898644597167954e-06,
      "loss": 0.0085,
      "step": 2633400
    },
    {
      "epoch": 4.3096495879237775,
      "grad_norm": 0.3279304802417755,
      "learning_rate": 1.3897985675032782e-06,
      "loss": 0.0083,
      "step": 2633420
    },
    {
      "epoch": 4.309682318362431,
      "grad_norm": 0.11591684073209763,
      "learning_rate": 1.3897326752897611e-06,
      "loss": 0.0091,
      "step": 2633440
    },
    {
      "epoch": 4.309715048801084,
      "grad_norm": 0.11651929467916489,
      "learning_rate": 1.3896667830762439e-06,
      "loss": 0.008,
      "step": 2633460
    },
    {
      "epoch": 4.309747779239737,
      "grad_norm": 0.14833088219165802,
      "learning_rate": 1.3896008908627268e-06,
      "loss": 0.0086,
      "step": 2633480
    },
    {
      "epoch": 4.309780509678391,
      "grad_norm": 0.03427091985940933,
      "learning_rate": 1.3895349986492096e-06,
      "loss": 0.0101,
      "step": 2633500
    },
    {
      "epoch": 4.309813240117044,
      "grad_norm": 0.3628225028514862,
      "learning_rate": 1.3894691064356927e-06,
      "loss": 0.0105,
      "step": 2633520
    },
    {
      "epoch": 4.309845970555697,
      "grad_norm": 0.4979262948036194,
      "learning_rate": 1.3894032142221755e-06,
      "loss": 0.0116,
      "step": 2633540
    },
    {
      "epoch": 4.309878700994351,
      "grad_norm": 0.16593775153160095,
      "learning_rate": 1.3893373220086584e-06,
      "loss": 0.0063,
      "step": 2633560
    },
    {
      "epoch": 4.3099114314330045,
      "grad_norm": 0.2515893280506134,
      "learning_rate": 1.3892714297951412e-06,
      "loss": 0.0115,
      "step": 2633580
    },
    {
      "epoch": 4.309944161871657,
      "grad_norm": 0.19427180290222168,
      "learning_rate": 1.3892055375816241e-06,
      "loss": 0.0069,
      "step": 2633600
    },
    {
      "epoch": 4.309976892310311,
      "grad_norm": 0.2623339593410492,
      "learning_rate": 1.3891396453681069e-06,
      "loss": 0.0125,
      "step": 2633620
    },
    {
      "epoch": 4.310009622748964,
      "grad_norm": 0.05661526322364807,
      "learning_rate": 1.3890737531545898e-06,
      "loss": 0.0132,
      "step": 2633640
    },
    {
      "epoch": 4.310042353187617,
      "grad_norm": 0.15938100218772888,
      "learning_rate": 1.3890078609410725e-06,
      "loss": 0.0121,
      "step": 2633660
    },
    {
      "epoch": 4.310075083626271,
      "grad_norm": 0.12756264209747314,
      "learning_rate": 1.3889419687275557e-06,
      "loss": 0.0126,
      "step": 2633680
    },
    {
      "epoch": 4.310107814064924,
      "grad_norm": 0.10500328242778778,
      "learning_rate": 1.3888760765140384e-06,
      "loss": 0.0085,
      "step": 2633700
    },
    {
      "epoch": 4.310140544503577,
      "grad_norm": 0.29842954874038696,
      "learning_rate": 1.3888101843005214e-06,
      "loss": 0.0057,
      "step": 2633720
    },
    {
      "epoch": 4.310173274942231,
      "grad_norm": 0.14470696449279785,
      "learning_rate": 1.3887442920870041e-06,
      "loss": 0.0113,
      "step": 2633740
    },
    {
      "epoch": 4.310206005380884,
      "grad_norm": 0.1581989973783493,
      "learning_rate": 1.388678399873487e-06,
      "loss": 0.0066,
      "step": 2633760
    },
    {
      "epoch": 4.310238735819538,
      "grad_norm": 0.20527265965938568,
      "learning_rate": 1.3886125076599698e-06,
      "loss": 0.0113,
      "step": 2633780
    },
    {
      "epoch": 4.3102714662581905,
      "grad_norm": 0.3208184540271759,
      "learning_rate": 1.3885466154464528e-06,
      "loss": 0.0157,
      "step": 2633800
    },
    {
      "epoch": 4.310304196696844,
      "grad_norm": 0.4638202488422394,
      "learning_rate": 1.3884807232329355e-06,
      "loss": 0.0091,
      "step": 2633820
    },
    {
      "epoch": 4.310336927135498,
      "grad_norm": 0.4119557738304138,
      "learning_rate": 1.3884148310194185e-06,
      "loss": 0.0108,
      "step": 2633840
    },
    {
      "epoch": 4.310369657574151,
      "grad_norm": 0.10706513375043869,
      "learning_rate": 1.3883489388059016e-06,
      "loss": 0.0069,
      "step": 2633860
    },
    {
      "epoch": 4.310402388012804,
      "grad_norm": 0.33234530687332153,
      "learning_rate": 1.3882830465923844e-06,
      "loss": 0.0123,
      "step": 2633880
    },
    {
      "epoch": 4.3104351184514575,
      "grad_norm": 0.3219057619571686,
      "learning_rate": 1.3882171543788673e-06,
      "loss": 0.0084,
      "step": 2633900
    },
    {
      "epoch": 4.310467848890111,
      "grad_norm": 0.20483511686325073,
      "learning_rate": 1.38815126216535e-06,
      "loss": 0.0067,
      "step": 2633920
    },
    {
      "epoch": 4.310500579328764,
      "grad_norm": 0.3092086613178253,
      "learning_rate": 1.3880853699518328e-06,
      "loss": 0.0087,
      "step": 2633940
    },
    {
      "epoch": 4.310533309767417,
      "grad_norm": 0.16902759671211243,
      "learning_rate": 1.3880194777383158e-06,
      "loss": 0.0099,
      "step": 2633960
    },
    {
      "epoch": 4.310566040206071,
      "grad_norm": 0.3879280388355255,
      "learning_rate": 1.3879535855247985e-06,
      "loss": 0.0077,
      "step": 2633980
    },
    {
      "epoch": 4.310598770644724,
      "grad_norm": 0.45062100887298584,
      "learning_rate": 1.3878876933112814e-06,
      "loss": 0.0103,
      "step": 2634000
    },
    {
      "epoch": 4.310631501083377,
      "grad_norm": 0.1841209977865219,
      "learning_rate": 1.3878218010977642e-06,
      "loss": 0.0089,
      "step": 2634020
    },
    {
      "epoch": 4.310664231522031,
      "grad_norm": 0.33449819684028625,
      "learning_rate": 1.3877559088842474e-06,
      "loss": 0.0087,
      "step": 2634040
    },
    {
      "epoch": 4.3106969619606845,
      "grad_norm": 0.317513108253479,
      "learning_rate": 1.3876900166707303e-06,
      "loss": 0.0094,
      "step": 2634060
    },
    {
      "epoch": 4.310729692399337,
      "grad_norm": 0.02402541972696781,
      "learning_rate": 1.387624124457213e-06,
      "loss": 0.0062,
      "step": 2634080
    },
    {
      "epoch": 4.310762422837991,
      "grad_norm": 0.3472994565963745,
      "learning_rate": 1.387558232243696e-06,
      "loss": 0.0079,
      "step": 2634100
    },
    {
      "epoch": 4.310795153276644,
      "grad_norm": 0.2522318661212921,
      "learning_rate": 1.3874923400301787e-06,
      "loss": 0.007,
      "step": 2634120
    },
    {
      "epoch": 4.310827883715298,
      "grad_norm": 0.11137837916612625,
      "learning_rate": 1.3874264478166617e-06,
      "loss": 0.0086,
      "step": 2634140
    },
    {
      "epoch": 4.310860614153951,
      "grad_norm": 0.10092520713806152,
      "learning_rate": 1.3873605556031444e-06,
      "loss": 0.0119,
      "step": 2634160
    },
    {
      "epoch": 4.310893344592604,
      "grad_norm": 0.11768490076065063,
      "learning_rate": 1.3872946633896272e-06,
      "loss": 0.0084,
      "step": 2634180
    },
    {
      "epoch": 4.310926075031258,
      "grad_norm": 0.20314382016658783,
      "learning_rate": 1.3872287711761101e-06,
      "loss": 0.006,
      "step": 2634200
    },
    {
      "epoch": 4.310958805469911,
      "grad_norm": 0.05986682325601578,
      "learning_rate": 1.3871628789625933e-06,
      "loss": 0.0055,
      "step": 2634220
    },
    {
      "epoch": 4.310991535908564,
      "grad_norm": 0.3675563931465149,
      "learning_rate": 1.387096986749076e-06,
      "loss": 0.0086,
      "step": 2634240
    },
    {
      "epoch": 4.311024266347218,
      "grad_norm": 0.13624925911426544,
      "learning_rate": 1.387031094535559e-06,
      "loss": 0.0059,
      "step": 2634260
    },
    {
      "epoch": 4.3110569967858705,
      "grad_norm": 0.3757510781288147,
      "learning_rate": 1.3869652023220417e-06,
      "loss": 0.0071,
      "step": 2634280
    },
    {
      "epoch": 4.311089727224524,
      "grad_norm": 0.07203968614339828,
      "learning_rate": 1.3868993101085247e-06,
      "loss": 0.0084,
      "step": 2634300
    },
    {
      "epoch": 4.311122457663178,
      "grad_norm": 0.2173498570919037,
      "learning_rate": 1.3868334178950074e-06,
      "loss": 0.0072,
      "step": 2634320
    },
    {
      "epoch": 4.311155188101831,
      "grad_norm": 0.19373224675655365,
      "learning_rate": 1.3867675256814904e-06,
      "loss": 0.0112,
      "step": 2634340
    },
    {
      "epoch": 4.311187918540484,
      "grad_norm": 0.3275863528251648,
      "learning_rate": 1.386701633467973e-06,
      "loss": 0.0088,
      "step": 2634360
    },
    {
      "epoch": 4.311220648979138,
      "grad_norm": 0.258381724357605,
      "learning_rate": 1.3866357412544558e-06,
      "loss": 0.0062,
      "step": 2634380
    },
    {
      "epoch": 4.311253379417791,
      "grad_norm": 0.30944159626960754,
      "learning_rate": 1.386569849040939e-06,
      "loss": 0.0084,
      "step": 2634400
    },
    {
      "epoch": 4.311286109856444,
      "grad_norm": 0.7011997699737549,
      "learning_rate": 1.386503956827422e-06,
      "loss": 0.0121,
      "step": 2634420
    },
    {
      "epoch": 4.3113188402950975,
      "grad_norm": 0.38711991906166077,
      "learning_rate": 1.3864380646139047e-06,
      "loss": 0.0088,
      "step": 2634440
    },
    {
      "epoch": 4.311351570733751,
      "grad_norm": 1.4407275915145874,
      "learning_rate": 1.3863721724003876e-06,
      "loss": 0.0104,
      "step": 2634460
    },
    {
      "epoch": 4.311384301172405,
      "grad_norm": 0.30126601457595825,
      "learning_rate": 1.3863062801868704e-06,
      "loss": 0.0103,
      "step": 2634480
    },
    {
      "epoch": 4.311417031611057,
      "grad_norm": 0.16708879172801971,
      "learning_rate": 1.3862403879733533e-06,
      "loss": 0.0082,
      "step": 2634500
    },
    {
      "epoch": 4.311449762049711,
      "grad_norm": 0.1824602335691452,
      "learning_rate": 1.386174495759836e-06,
      "loss": 0.0107,
      "step": 2634520
    },
    {
      "epoch": 4.311482492488365,
      "grad_norm": 0.17887644469738007,
      "learning_rate": 1.386108603546319e-06,
      "loss": 0.0071,
      "step": 2634540
    },
    {
      "epoch": 4.311515222927017,
      "grad_norm": 0.224748894572258,
      "learning_rate": 1.386042711332802e-06,
      "loss": 0.0123,
      "step": 2634560
    },
    {
      "epoch": 4.311547953365671,
      "grad_norm": 0.18661659955978394,
      "learning_rate": 1.385976819119285e-06,
      "loss": 0.0128,
      "step": 2634580
    },
    {
      "epoch": 4.3115806838043245,
      "grad_norm": 0.3902915418148041,
      "learning_rate": 1.3859109269057677e-06,
      "loss": 0.0112,
      "step": 2634600
    },
    {
      "epoch": 4.311613414242978,
      "grad_norm": 0.3755488693714142,
      "learning_rate": 1.3858450346922506e-06,
      "loss": 0.0129,
      "step": 2634620
    },
    {
      "epoch": 4.311646144681631,
      "grad_norm": 0.08729760348796844,
      "learning_rate": 1.3857791424787334e-06,
      "loss": 0.0092,
      "step": 2634640
    },
    {
      "epoch": 4.311678875120284,
      "grad_norm": 0.28207817673683167,
      "learning_rate": 1.3857132502652163e-06,
      "loss": 0.0114,
      "step": 2634660
    },
    {
      "epoch": 4.311711605558938,
      "grad_norm": 0.2585209906101227,
      "learning_rate": 1.385647358051699e-06,
      "loss": 0.0055,
      "step": 2634680
    },
    {
      "epoch": 4.311744335997591,
      "grad_norm": 0.13778208196163177,
      "learning_rate": 1.385581465838182e-06,
      "loss": 0.0088,
      "step": 2634700
    },
    {
      "epoch": 4.311777066436244,
      "grad_norm": 0.1389317363500595,
      "learning_rate": 1.3855155736246647e-06,
      "loss": 0.0142,
      "step": 2634720
    },
    {
      "epoch": 4.311809796874898,
      "grad_norm": 0.12613524496555328,
      "learning_rate": 1.385449681411148e-06,
      "loss": 0.0072,
      "step": 2634740
    },
    {
      "epoch": 4.311842527313551,
      "grad_norm": 0.4183788299560547,
      "learning_rate": 1.3853837891976306e-06,
      "loss": 0.0098,
      "step": 2634760
    },
    {
      "epoch": 4.311875257752204,
      "grad_norm": 0.2611384093761444,
      "learning_rate": 1.3853178969841136e-06,
      "loss": 0.0079,
      "step": 2634780
    },
    {
      "epoch": 4.311907988190858,
      "grad_norm": 0.6832208037376404,
      "learning_rate": 1.3852520047705963e-06,
      "loss": 0.0092,
      "step": 2634800
    },
    {
      "epoch": 4.311940718629511,
      "grad_norm": 0.19163160026073456,
      "learning_rate": 1.3851861125570793e-06,
      "loss": 0.0073,
      "step": 2634820
    },
    {
      "epoch": 4.311973449068164,
      "grad_norm": 0.2867547273635864,
      "learning_rate": 1.385120220343562e-06,
      "loss": 0.0071,
      "step": 2634840
    },
    {
      "epoch": 4.312006179506818,
      "grad_norm": 0.19960655272006989,
      "learning_rate": 1.385054328130045e-06,
      "loss": 0.0109,
      "step": 2634860
    },
    {
      "epoch": 4.312038909945471,
      "grad_norm": 0.16156314313411713,
      "learning_rate": 1.3849884359165277e-06,
      "loss": 0.0122,
      "step": 2634880
    },
    {
      "epoch": 4.312071640384125,
      "grad_norm": 0.38298919796943665,
      "learning_rate": 1.3849225437030107e-06,
      "loss": 0.0082,
      "step": 2634900
    },
    {
      "epoch": 4.3121043708227775,
      "grad_norm": 0.23560293018817902,
      "learning_rate": 1.3848566514894936e-06,
      "loss": 0.0091,
      "step": 2634920
    },
    {
      "epoch": 4.312137101261431,
      "grad_norm": 0.28648272156715393,
      "learning_rate": 1.3847907592759766e-06,
      "loss": 0.0088,
      "step": 2634940
    },
    {
      "epoch": 4.312169831700085,
      "grad_norm": 0.28536859154701233,
      "learning_rate": 1.3847248670624593e-06,
      "loss": 0.0103,
      "step": 2634960
    },
    {
      "epoch": 4.312202562138737,
      "grad_norm": 0.3677480220794678,
      "learning_rate": 1.3846589748489423e-06,
      "loss": 0.012,
      "step": 2634980
    },
    {
      "epoch": 4.312235292577391,
      "grad_norm": 0.39179810881614685,
      "learning_rate": 1.384593082635425e-06,
      "loss": 0.0078,
      "step": 2635000
    },
    {
      "epoch": 4.312268023016045,
      "grad_norm": 0.1195029616355896,
      "learning_rate": 1.384527190421908e-06,
      "loss": 0.0081,
      "step": 2635020
    },
    {
      "epoch": 4.312300753454698,
      "grad_norm": 0.1395532786846161,
      "learning_rate": 1.3844612982083907e-06,
      "loss": 0.01,
      "step": 2635040
    },
    {
      "epoch": 4.312333483893351,
      "grad_norm": 0.3370278775691986,
      "learning_rate": 1.3843954059948736e-06,
      "loss": 0.0118,
      "step": 2635060
    },
    {
      "epoch": 4.3123662143320045,
      "grad_norm": 0.16138111054897308,
      "learning_rate": 1.3843295137813564e-06,
      "loss": 0.0097,
      "step": 2635080
    },
    {
      "epoch": 4.312398944770658,
      "grad_norm": 0.35852381587028503,
      "learning_rate": 1.3842636215678395e-06,
      "loss": 0.0085,
      "step": 2635100
    },
    {
      "epoch": 4.312431675209311,
      "grad_norm": 0.23176990449428558,
      "learning_rate": 1.3841977293543225e-06,
      "loss": 0.0063,
      "step": 2635120
    },
    {
      "epoch": 4.312464405647964,
      "grad_norm": 0.39140889048576355,
      "learning_rate": 1.3841318371408052e-06,
      "loss": 0.016,
      "step": 2635140
    },
    {
      "epoch": 4.312497136086618,
      "grad_norm": 0.04124049097299576,
      "learning_rate": 1.384065944927288e-06,
      "loss": 0.0123,
      "step": 2635160
    },
    {
      "epoch": 4.312529866525271,
      "grad_norm": 0.1999664157629013,
      "learning_rate": 1.384000052713771e-06,
      "loss": 0.0074,
      "step": 2635180
    },
    {
      "epoch": 4.312562596963924,
      "grad_norm": 0.1805664300918579,
      "learning_rate": 1.3839341605002537e-06,
      "loss": 0.0096,
      "step": 2635200
    },
    {
      "epoch": 4.312595327402578,
      "grad_norm": 0.1806786209344864,
      "learning_rate": 1.3838682682867366e-06,
      "loss": 0.0061,
      "step": 2635220
    },
    {
      "epoch": 4.3126280578412315,
      "grad_norm": 0.10532199591398239,
      "learning_rate": 1.3838023760732194e-06,
      "loss": 0.0068,
      "step": 2635240
    },
    {
      "epoch": 4.312660788279884,
      "grad_norm": 0.07855362445116043,
      "learning_rate": 1.3837364838597025e-06,
      "loss": 0.0089,
      "step": 2635260
    },
    {
      "epoch": 4.312693518718538,
      "grad_norm": 0.29820168018341064,
      "learning_rate": 1.3836705916461855e-06,
      "loss": 0.0096,
      "step": 2635280
    },
    {
      "epoch": 4.312726249157191,
      "grad_norm": 0.22617274522781372,
      "learning_rate": 1.3836046994326682e-06,
      "loss": 0.0072,
      "step": 2635300
    },
    {
      "epoch": 4.312758979595845,
      "grad_norm": 0.14237117767333984,
      "learning_rate": 1.3835388072191512e-06,
      "loss": 0.0046,
      "step": 2635320
    },
    {
      "epoch": 4.312791710034498,
      "grad_norm": 0.6033092141151428,
      "learning_rate": 1.383472915005634e-06,
      "loss": 0.0084,
      "step": 2635340
    },
    {
      "epoch": 4.312824440473151,
      "grad_norm": 0.11874760687351227,
      "learning_rate": 1.3834070227921166e-06,
      "loss": 0.009,
      "step": 2635360
    },
    {
      "epoch": 4.312857170911805,
      "grad_norm": 0.2667620778083801,
      "learning_rate": 1.3833411305785996e-06,
      "loss": 0.0085,
      "step": 2635380
    },
    {
      "epoch": 4.312889901350458,
      "grad_norm": 0.1284886747598648,
      "learning_rate": 1.3832752383650823e-06,
      "loss": 0.0107,
      "step": 2635400
    },
    {
      "epoch": 4.312922631789111,
      "grad_norm": 0.288499116897583,
      "learning_rate": 1.3832093461515653e-06,
      "loss": 0.011,
      "step": 2635420
    },
    {
      "epoch": 4.312955362227765,
      "grad_norm": 0.36480385065078735,
      "learning_rate": 1.3831434539380485e-06,
      "loss": 0.0127,
      "step": 2635440
    },
    {
      "epoch": 4.3129880926664175,
      "grad_norm": 0.3912937045097351,
      "learning_rate": 1.3830775617245312e-06,
      "loss": 0.0112,
      "step": 2635460
    },
    {
      "epoch": 4.313020823105071,
      "grad_norm": 0.1628178209066391,
      "learning_rate": 1.3830116695110141e-06,
      "loss": 0.0087,
      "step": 2635480
    },
    {
      "epoch": 4.313053553543725,
      "grad_norm": 0.34172186255455017,
      "learning_rate": 1.3829457772974969e-06,
      "loss": 0.0157,
      "step": 2635500
    },
    {
      "epoch": 4.313086283982378,
      "grad_norm": 0.09554692357778549,
      "learning_rate": 1.3828798850839798e-06,
      "loss": 0.0073,
      "step": 2635520
    },
    {
      "epoch": 4.313119014421031,
      "grad_norm": 0.6212758421897888,
      "learning_rate": 1.3828139928704626e-06,
      "loss": 0.01,
      "step": 2635540
    },
    {
      "epoch": 4.3131517448596846,
      "grad_norm": 0.23870176076889038,
      "learning_rate": 1.3827481006569455e-06,
      "loss": 0.0075,
      "step": 2635560
    },
    {
      "epoch": 4.313184475298338,
      "grad_norm": 0.3039054572582245,
      "learning_rate": 1.3826822084434283e-06,
      "loss": 0.0077,
      "step": 2635580
    },
    {
      "epoch": 4.313217205736992,
      "grad_norm": 0.0742277279496193,
      "learning_rate": 1.382616316229911e-06,
      "loss": 0.0079,
      "step": 2635600
    },
    {
      "epoch": 4.3132499361756445,
      "grad_norm": 0.6486314535140991,
      "learning_rate": 1.3825504240163942e-06,
      "loss": 0.0145,
      "step": 2635620
    },
    {
      "epoch": 4.313282666614298,
      "grad_norm": 0.2160673886537552,
      "learning_rate": 1.3824845318028771e-06,
      "loss": 0.0081,
      "step": 2635640
    },
    {
      "epoch": 4.313315397052952,
      "grad_norm": 0.23637013137340546,
      "learning_rate": 1.3824186395893599e-06,
      "loss": 0.0104,
      "step": 2635660
    },
    {
      "epoch": 4.313348127491604,
      "grad_norm": 0.0758199393749237,
      "learning_rate": 1.3823527473758428e-06,
      "loss": 0.01,
      "step": 2635680
    },
    {
      "epoch": 4.313380857930258,
      "grad_norm": 1.0194299221038818,
      "learning_rate": 1.3822868551623256e-06,
      "loss": 0.0142,
      "step": 2635700
    },
    {
      "epoch": 4.3134135883689115,
      "grad_norm": 0.1069083958864212,
      "learning_rate": 1.3822209629488085e-06,
      "loss": 0.0077,
      "step": 2635720
    },
    {
      "epoch": 4.313446318807564,
      "grad_norm": 0.5426058769226074,
      "learning_rate": 1.3821550707352912e-06,
      "loss": 0.0081,
      "step": 2635740
    },
    {
      "epoch": 4.313479049246218,
      "grad_norm": 0.037562258541584015,
      "learning_rate": 1.3820891785217742e-06,
      "loss": 0.0066,
      "step": 2635760
    },
    {
      "epoch": 4.313511779684871,
      "grad_norm": 0.4443370997905731,
      "learning_rate": 1.382023286308257e-06,
      "loss": 0.0087,
      "step": 2635780
    },
    {
      "epoch": 4.313544510123525,
      "grad_norm": 0.13561740517616272,
      "learning_rate": 1.38195739409474e-06,
      "loss": 0.0078,
      "step": 2635800
    },
    {
      "epoch": 4.313577240562178,
      "grad_norm": 0.2912459075450897,
      "learning_rate": 1.3818915018812228e-06,
      "loss": 0.0075,
      "step": 2635820
    },
    {
      "epoch": 4.313609971000831,
      "grad_norm": 0.2065121829509735,
      "learning_rate": 1.3818256096677058e-06,
      "loss": 0.0113,
      "step": 2635840
    },
    {
      "epoch": 4.313642701439485,
      "grad_norm": 0.29104262590408325,
      "learning_rate": 1.3817597174541885e-06,
      "loss": 0.0076,
      "step": 2635860
    },
    {
      "epoch": 4.313675431878138,
      "grad_norm": 0.2435942441225052,
      "learning_rate": 1.3816938252406715e-06,
      "loss": 0.0137,
      "step": 2635880
    },
    {
      "epoch": 4.313708162316791,
      "grad_norm": 0.24780218303203583,
      "learning_rate": 1.3816279330271542e-06,
      "loss": 0.0093,
      "step": 2635900
    },
    {
      "epoch": 4.313740892755445,
      "grad_norm": 0.22336024045944214,
      "learning_rate": 1.3815620408136372e-06,
      "loss": 0.01,
      "step": 2635920
    },
    {
      "epoch": 4.313773623194098,
      "grad_norm": 0.2124146968126297,
      "learning_rate": 1.38149614860012e-06,
      "loss": 0.0121,
      "step": 2635940
    },
    {
      "epoch": 4.313806353632751,
      "grad_norm": 0.2007746696472168,
      "learning_rate": 1.3814302563866029e-06,
      "loss": 0.0124,
      "step": 2635960
    },
    {
      "epoch": 4.313839084071405,
      "grad_norm": 0.1473720222711563,
      "learning_rate": 1.3813643641730858e-06,
      "loss": 0.0099,
      "step": 2635980
    },
    {
      "epoch": 4.313871814510058,
      "grad_norm": 0.106648288667202,
      "learning_rate": 1.3812984719595688e-06,
      "loss": 0.0059,
      "step": 2636000
    },
    {
      "epoch": 4.313904544948711,
      "grad_norm": 0.369892954826355,
      "learning_rate": 1.3812325797460515e-06,
      "loss": 0.0095,
      "step": 2636020
    },
    {
      "epoch": 4.313937275387365,
      "grad_norm": 0.09814347326755524,
      "learning_rate": 1.3811666875325345e-06,
      "loss": 0.0101,
      "step": 2636040
    },
    {
      "epoch": 4.313970005826018,
      "grad_norm": 0.8593887686729431,
      "learning_rate": 1.3811007953190172e-06,
      "loss": 0.0115,
      "step": 2636060
    },
    {
      "epoch": 4.314002736264672,
      "grad_norm": 0.28400132060050964,
      "learning_rate": 1.3810349031055001e-06,
      "loss": 0.0073,
      "step": 2636080
    },
    {
      "epoch": 4.3140354667033245,
      "grad_norm": 0.30397120118141174,
      "learning_rate": 1.3809690108919829e-06,
      "loss": 0.0073,
      "step": 2636100
    },
    {
      "epoch": 4.314068197141978,
      "grad_norm": 0.10097472369670868,
      "learning_rate": 1.3809031186784658e-06,
      "loss": 0.0038,
      "step": 2636120
    },
    {
      "epoch": 4.314100927580632,
      "grad_norm": 0.07668039202690125,
      "learning_rate": 1.3808372264649488e-06,
      "loss": 0.0084,
      "step": 2636140
    },
    {
      "epoch": 4.314133658019284,
      "grad_norm": 0.2715075612068176,
      "learning_rate": 1.3807713342514317e-06,
      "loss": 0.0136,
      "step": 2636160
    },
    {
      "epoch": 4.314166388457938,
      "grad_norm": 0.17060518264770508,
      "learning_rate": 1.3807054420379145e-06,
      "loss": 0.0073,
      "step": 2636180
    },
    {
      "epoch": 4.314199118896592,
      "grad_norm": 0.5091115832328796,
      "learning_rate": 1.3806395498243974e-06,
      "loss": 0.0103,
      "step": 2636200
    },
    {
      "epoch": 4.314231849335245,
      "grad_norm": 0.3109552264213562,
      "learning_rate": 1.3805736576108802e-06,
      "loss": 0.0096,
      "step": 2636220
    },
    {
      "epoch": 4.314264579773898,
      "grad_norm": 0.5088462829589844,
      "learning_rate": 1.3805077653973631e-06,
      "loss": 0.0153,
      "step": 2636240
    },
    {
      "epoch": 4.3142973102125515,
      "grad_norm": 0.14406412839889526,
      "learning_rate": 1.3804418731838459e-06,
      "loss": 0.0143,
      "step": 2636260
    },
    {
      "epoch": 4.314330040651205,
      "grad_norm": 0.1758163720369339,
      "learning_rate": 1.3803759809703288e-06,
      "loss": 0.0112,
      "step": 2636280
    },
    {
      "epoch": 4.314362771089858,
      "grad_norm": 0.366276353597641,
      "learning_rate": 1.3803100887568116e-06,
      "loss": 0.0059,
      "step": 2636300
    },
    {
      "epoch": 4.314395501528511,
      "grad_norm": 0.18388529121875763,
      "learning_rate": 1.3802441965432947e-06,
      "loss": 0.007,
      "step": 2636320
    },
    {
      "epoch": 4.314428231967165,
      "grad_norm": 0.6236804127693176,
      "learning_rate": 1.3801783043297775e-06,
      "loss": 0.0098,
      "step": 2636340
    },
    {
      "epoch": 4.314460962405819,
      "grad_norm": 0.27430468797683716,
      "learning_rate": 1.3801124121162604e-06,
      "loss": 0.0082,
      "step": 2636360
    },
    {
      "epoch": 4.314493692844471,
      "grad_norm": 0.20568816363811493,
      "learning_rate": 1.3800465199027431e-06,
      "loss": 0.0073,
      "step": 2636380
    },
    {
      "epoch": 4.314526423283125,
      "grad_norm": 0.4214814007282257,
      "learning_rate": 1.379980627689226e-06,
      "loss": 0.0071,
      "step": 2636400
    },
    {
      "epoch": 4.3145591537217785,
      "grad_norm": 0.1392512023448944,
      "learning_rate": 1.3799147354757088e-06,
      "loss": 0.0103,
      "step": 2636420
    },
    {
      "epoch": 4.314591884160431,
      "grad_norm": 0.18212097883224487,
      "learning_rate": 1.3798488432621918e-06,
      "loss": 0.0113,
      "step": 2636440
    },
    {
      "epoch": 4.314624614599085,
      "grad_norm": 0.1368253529071808,
      "learning_rate": 1.3797829510486745e-06,
      "loss": 0.0125,
      "step": 2636460
    },
    {
      "epoch": 4.314657345037738,
      "grad_norm": 0.30115026235580444,
      "learning_rate": 1.3797170588351575e-06,
      "loss": 0.0067,
      "step": 2636480
    },
    {
      "epoch": 4.314690075476392,
      "grad_norm": 0.5053278207778931,
      "learning_rate": 1.3796511666216406e-06,
      "loss": 0.0106,
      "step": 2636500
    },
    {
      "epoch": 4.314722805915045,
      "grad_norm": 0.3980344533920288,
      "learning_rate": 1.3795852744081234e-06,
      "loss": 0.0077,
      "step": 2636520
    },
    {
      "epoch": 4.314755536353698,
      "grad_norm": 0.5560427308082581,
      "learning_rate": 1.3795193821946063e-06,
      "loss": 0.0141,
      "step": 2636540
    },
    {
      "epoch": 4.314788266792352,
      "grad_norm": 0.3897537291049957,
      "learning_rate": 1.379453489981089e-06,
      "loss": 0.0129,
      "step": 2636560
    },
    {
      "epoch": 4.3148209972310045,
      "grad_norm": 0.1038084477186203,
      "learning_rate": 1.3793875977675718e-06,
      "loss": 0.0073,
      "step": 2636580
    },
    {
      "epoch": 4.314853727669658,
      "grad_norm": 0.6475068926811218,
      "learning_rate": 1.3793217055540548e-06,
      "loss": 0.0111,
      "step": 2636600
    },
    {
      "epoch": 4.314886458108312,
      "grad_norm": 0.126069113612175,
      "learning_rate": 1.3792558133405375e-06,
      "loss": 0.0085,
      "step": 2636620
    },
    {
      "epoch": 4.314919188546964,
      "grad_norm": 0.17213326692581177,
      "learning_rate": 1.3791899211270205e-06,
      "loss": 0.0077,
      "step": 2636640
    },
    {
      "epoch": 4.314951918985618,
      "grad_norm": 0.06983192265033722,
      "learning_rate": 1.3791240289135032e-06,
      "loss": 0.0086,
      "step": 2636660
    },
    {
      "epoch": 4.314984649424272,
      "grad_norm": 0.26714402437210083,
      "learning_rate": 1.3790581366999864e-06,
      "loss": 0.0072,
      "step": 2636680
    },
    {
      "epoch": 4.315017379862925,
      "grad_norm": 0.22646906971931458,
      "learning_rate": 1.3789922444864693e-06,
      "loss": 0.0069,
      "step": 2636700
    },
    {
      "epoch": 4.315050110301578,
      "grad_norm": 0.2547260820865631,
      "learning_rate": 1.378926352272952e-06,
      "loss": 0.0111,
      "step": 2636720
    },
    {
      "epoch": 4.3150828407402315,
      "grad_norm": 0.07923459261655807,
      "learning_rate": 1.378860460059435e-06,
      "loss": 0.0094,
      "step": 2636740
    },
    {
      "epoch": 4.315115571178885,
      "grad_norm": 0.10846833884716034,
      "learning_rate": 1.3787945678459177e-06,
      "loss": 0.0095,
      "step": 2636760
    },
    {
      "epoch": 4.315148301617539,
      "grad_norm": 0.25773686170578003,
      "learning_rate": 1.3787286756324007e-06,
      "loss": 0.0094,
      "step": 2636780
    },
    {
      "epoch": 4.315181032056191,
      "grad_norm": 0.2140043079853058,
      "learning_rate": 1.3786627834188834e-06,
      "loss": 0.0067,
      "step": 2636800
    },
    {
      "epoch": 4.315213762494845,
      "grad_norm": 0.23981259763240814,
      "learning_rate": 1.3785968912053662e-06,
      "loss": 0.0088,
      "step": 2636820
    },
    {
      "epoch": 4.315246492933499,
      "grad_norm": 0.18265549838542938,
      "learning_rate": 1.3785309989918491e-06,
      "loss": 0.0107,
      "step": 2636840
    },
    {
      "epoch": 4.315279223372151,
      "grad_norm": 0.4246740937232971,
      "learning_rate": 1.3784651067783323e-06,
      "loss": 0.0088,
      "step": 2636860
    },
    {
      "epoch": 4.315311953810805,
      "grad_norm": 0.1551523208618164,
      "learning_rate": 1.378399214564815e-06,
      "loss": 0.0087,
      "step": 2636880
    },
    {
      "epoch": 4.3153446842494585,
      "grad_norm": 0.2582201063632965,
      "learning_rate": 1.378333322351298e-06,
      "loss": 0.008,
      "step": 2636900
    },
    {
      "epoch": 4.315377414688111,
      "grad_norm": 0.23118989169597626,
      "learning_rate": 1.3782674301377807e-06,
      "loss": 0.0071,
      "step": 2636920
    },
    {
      "epoch": 4.315410145126765,
      "grad_norm": 0.09660688042640686,
      "learning_rate": 1.3782015379242637e-06,
      "loss": 0.0057,
      "step": 2636940
    },
    {
      "epoch": 4.315442875565418,
      "grad_norm": 0.13674919307231903,
      "learning_rate": 1.3781356457107464e-06,
      "loss": 0.0094,
      "step": 2636960
    },
    {
      "epoch": 4.315475606004072,
      "grad_norm": 0.2860938012599945,
      "learning_rate": 1.3780697534972294e-06,
      "loss": 0.0122,
      "step": 2636980
    },
    {
      "epoch": 4.315508336442725,
      "grad_norm": 0.29797691106796265,
      "learning_rate": 1.378003861283712e-06,
      "loss": 0.0117,
      "step": 2637000
    },
    {
      "epoch": 4.315541066881378,
      "grad_norm": 0.0764661580324173,
      "learning_rate": 1.3779379690701953e-06,
      "loss": 0.0107,
      "step": 2637020
    },
    {
      "epoch": 4.315573797320032,
      "grad_norm": 0.3161547780036926,
      "learning_rate": 1.377872076856678e-06,
      "loss": 0.0096,
      "step": 2637040
    },
    {
      "epoch": 4.3156065277586855,
      "grad_norm": 0.13096553087234497,
      "learning_rate": 1.377806184643161e-06,
      "loss": 0.0069,
      "step": 2637060
    },
    {
      "epoch": 4.315639258197338,
      "grad_norm": 0.07805491238832474,
      "learning_rate": 1.3777402924296437e-06,
      "loss": 0.0073,
      "step": 2637080
    },
    {
      "epoch": 4.315671988635992,
      "grad_norm": 0.25404882431030273,
      "learning_rate": 1.3776744002161267e-06,
      "loss": 0.0109,
      "step": 2637100
    },
    {
      "epoch": 4.315704719074645,
      "grad_norm": 0.2137603759765625,
      "learning_rate": 1.3776085080026094e-06,
      "loss": 0.0063,
      "step": 2637120
    },
    {
      "epoch": 4.315737449513298,
      "grad_norm": 0.41513511538505554,
      "learning_rate": 1.3775426157890923e-06,
      "loss": 0.0087,
      "step": 2637140
    },
    {
      "epoch": 4.315770179951952,
      "grad_norm": 0.5717189311981201,
      "learning_rate": 1.377476723575575e-06,
      "loss": 0.0109,
      "step": 2637160
    },
    {
      "epoch": 4.315802910390605,
      "grad_norm": 0.28334981203079224,
      "learning_rate": 1.377410831362058e-06,
      "loss": 0.0113,
      "step": 2637180
    },
    {
      "epoch": 4.315835640829258,
      "grad_norm": 0.14158841967582703,
      "learning_rate": 1.377344939148541e-06,
      "loss": 0.0092,
      "step": 2637200
    },
    {
      "epoch": 4.315868371267912,
      "grad_norm": 0.0911068394780159,
      "learning_rate": 1.377279046935024e-06,
      "loss": 0.0083,
      "step": 2637220
    },
    {
      "epoch": 4.315901101706565,
      "grad_norm": 0.4218587577342987,
      "learning_rate": 1.3772131547215067e-06,
      "loss": 0.0077,
      "step": 2637240
    },
    {
      "epoch": 4.315933832145219,
      "grad_norm": 0.3469727039337158,
      "learning_rate": 1.3771472625079896e-06,
      "loss": 0.0083,
      "step": 2637260
    },
    {
      "epoch": 4.3159665625838715,
      "grad_norm": 0.3824107348918915,
      "learning_rate": 1.3770813702944724e-06,
      "loss": 0.0073,
      "step": 2637280
    },
    {
      "epoch": 4.315999293022525,
      "grad_norm": 0.24562983214855194,
      "learning_rate": 1.3770154780809553e-06,
      "loss": 0.0091,
      "step": 2637300
    },
    {
      "epoch": 4.316032023461179,
      "grad_norm": 0.2187531441450119,
      "learning_rate": 1.376949585867438e-06,
      "loss": 0.0055,
      "step": 2637320
    },
    {
      "epoch": 4.316064753899831,
      "grad_norm": 0.2667783498764038,
      "learning_rate": 1.376883693653921e-06,
      "loss": 0.0098,
      "step": 2637340
    },
    {
      "epoch": 4.316097484338485,
      "grad_norm": 0.17151489853858948,
      "learning_rate": 1.3768178014404037e-06,
      "loss": 0.0088,
      "step": 2637360
    },
    {
      "epoch": 4.3161302147771385,
      "grad_norm": 0.23140230774879456,
      "learning_rate": 1.376751909226887e-06,
      "loss": 0.0068,
      "step": 2637380
    },
    {
      "epoch": 4.316162945215792,
      "grad_norm": 0.14858227968215942,
      "learning_rate": 1.3766860170133697e-06,
      "loss": 0.009,
      "step": 2637400
    },
    {
      "epoch": 4.316195675654445,
      "grad_norm": 0.5842164754867554,
      "learning_rate": 1.3766201247998526e-06,
      "loss": 0.0087,
      "step": 2637420
    },
    {
      "epoch": 4.316228406093098,
      "grad_norm": 0.0855911523103714,
      "learning_rate": 1.3765542325863353e-06,
      "loss": 0.0105,
      "step": 2637440
    },
    {
      "epoch": 4.316261136531752,
      "grad_norm": 0.13914014399051666,
      "learning_rate": 1.3764883403728183e-06,
      "loss": 0.0056,
      "step": 2637460
    },
    {
      "epoch": 4.316293866970405,
      "grad_norm": 0.13749192655086517,
      "learning_rate": 1.376422448159301e-06,
      "loss": 0.0061,
      "step": 2637480
    },
    {
      "epoch": 4.316326597409058,
      "grad_norm": 0.9867081642150879,
      "learning_rate": 1.376356555945784e-06,
      "loss": 0.0102,
      "step": 2637500
    },
    {
      "epoch": 4.316359327847712,
      "grad_norm": 0.22851069271564484,
      "learning_rate": 1.3762906637322667e-06,
      "loss": 0.0074,
      "step": 2637520
    },
    {
      "epoch": 4.3163920582863655,
      "grad_norm": 0.13008087873458862,
      "learning_rate": 1.3762247715187497e-06,
      "loss": 0.0059,
      "step": 2637540
    },
    {
      "epoch": 4.316424788725018,
      "grad_norm": 0.24280674755573273,
      "learning_rate": 1.3761588793052326e-06,
      "loss": 0.0047,
      "step": 2637560
    },
    {
      "epoch": 4.316457519163672,
      "grad_norm": 0.4914540648460388,
      "learning_rate": 1.3760929870917156e-06,
      "loss": 0.007,
      "step": 2637580
    },
    {
      "epoch": 4.316490249602325,
      "grad_norm": 0.2687729299068451,
      "learning_rate": 1.3760270948781983e-06,
      "loss": 0.0101,
      "step": 2637600
    },
    {
      "epoch": 4.316522980040978,
      "grad_norm": 0.4585195779800415,
      "learning_rate": 1.3759612026646813e-06,
      "loss": 0.0066,
      "step": 2637620
    },
    {
      "epoch": 4.316555710479632,
      "grad_norm": 0.7699733376502991,
      "learning_rate": 1.375895310451164e-06,
      "loss": 0.0083,
      "step": 2637640
    },
    {
      "epoch": 4.316588440918285,
      "grad_norm": 0.25229334831237793,
      "learning_rate": 1.375829418237647e-06,
      "loss": 0.0108,
      "step": 2637660
    },
    {
      "epoch": 4.316621171356939,
      "grad_norm": 0.19636942446231842,
      "learning_rate": 1.3757635260241297e-06,
      "loss": 0.0065,
      "step": 2637680
    },
    {
      "epoch": 4.316653901795592,
      "grad_norm": 0.14308586716651917,
      "learning_rate": 1.3756976338106127e-06,
      "loss": 0.0091,
      "step": 2637700
    },
    {
      "epoch": 4.316686632234245,
      "grad_norm": 0.1524253785610199,
      "learning_rate": 1.3756317415970954e-06,
      "loss": 0.0079,
      "step": 2637720
    },
    {
      "epoch": 4.316719362672899,
      "grad_norm": 0.1349051147699356,
      "learning_rate": 1.3755658493835786e-06,
      "loss": 0.0133,
      "step": 2637740
    },
    {
      "epoch": 4.3167520931115515,
      "grad_norm": 0.3437933325767517,
      "learning_rate": 1.3754999571700615e-06,
      "loss": 0.0098,
      "step": 2637760
    },
    {
      "epoch": 4.316784823550205,
      "grad_norm": 0.21326223015785217,
      "learning_rate": 1.3754340649565442e-06,
      "loss": 0.0049,
      "step": 2637780
    },
    {
      "epoch": 4.316817553988859,
      "grad_norm": 0.1007518321275711,
      "learning_rate": 1.375368172743027e-06,
      "loss": 0.0127,
      "step": 2637800
    },
    {
      "epoch": 4.316850284427512,
      "grad_norm": 0.18586373329162598,
      "learning_rate": 1.37530228052951e-06,
      "loss": 0.0137,
      "step": 2637820
    },
    {
      "epoch": 4.316883014866165,
      "grad_norm": 0.4891197681427002,
      "learning_rate": 1.3752363883159927e-06,
      "loss": 0.0127,
      "step": 2637840
    },
    {
      "epoch": 4.316915745304819,
      "grad_norm": 0.30593860149383545,
      "learning_rate": 1.3751704961024756e-06,
      "loss": 0.0088,
      "step": 2637860
    },
    {
      "epoch": 4.316948475743472,
      "grad_norm": 0.750989556312561,
      "learning_rate": 1.3751046038889584e-06,
      "loss": 0.0136,
      "step": 2637880
    },
    {
      "epoch": 4.316981206182125,
      "grad_norm": 0.1658755987882614,
      "learning_rate": 1.3750387116754415e-06,
      "loss": 0.008,
      "step": 2637900
    },
    {
      "epoch": 4.3170139366207785,
      "grad_norm": 0.08014898747205734,
      "learning_rate": 1.3749728194619245e-06,
      "loss": 0.0089,
      "step": 2637920
    },
    {
      "epoch": 4.317046667059432,
      "grad_norm": 0.5040479302406311,
      "learning_rate": 1.3749069272484072e-06,
      "loss": 0.0139,
      "step": 2637940
    },
    {
      "epoch": 4.317079397498086,
      "grad_norm": 0.14417767524719238,
      "learning_rate": 1.3748410350348902e-06,
      "loss": 0.0086,
      "step": 2637960
    },
    {
      "epoch": 4.317112127936738,
      "grad_norm": 0.1298421025276184,
      "learning_rate": 1.374775142821373e-06,
      "loss": 0.0074,
      "step": 2637980
    },
    {
      "epoch": 4.317144858375392,
      "grad_norm": 0.08925607800483704,
      "learning_rate": 1.3747092506078557e-06,
      "loss": 0.008,
      "step": 2638000
    },
    {
      "epoch": 4.317177588814046,
      "grad_norm": 1.094535231590271,
      "learning_rate": 1.3746433583943386e-06,
      "loss": 0.0116,
      "step": 2638020
    },
    {
      "epoch": 4.317210319252698,
      "grad_norm": 0.10464571416378021,
      "learning_rate": 1.3745774661808213e-06,
      "loss": 0.0083,
      "step": 2638040
    },
    {
      "epoch": 4.317243049691352,
      "grad_norm": 0.15619562566280365,
      "learning_rate": 1.3745115739673043e-06,
      "loss": 0.0112,
      "step": 2638060
    },
    {
      "epoch": 4.3172757801300055,
      "grad_norm": 0.31838715076446533,
      "learning_rate": 1.3744456817537875e-06,
      "loss": 0.0066,
      "step": 2638080
    },
    {
      "epoch": 4.317308510568659,
      "grad_norm": 0.05728403478860855,
      "learning_rate": 1.3743797895402702e-06,
      "loss": 0.0074,
      "step": 2638100
    },
    {
      "epoch": 4.317341241007312,
      "grad_norm": 0.5728105306625366,
      "learning_rate": 1.3743138973267532e-06,
      "loss": 0.0081,
      "step": 2638120
    },
    {
      "epoch": 4.317373971445965,
      "grad_norm": 0.33850976824760437,
      "learning_rate": 1.3742480051132359e-06,
      "loss": 0.0092,
      "step": 2638140
    },
    {
      "epoch": 4.317406701884619,
      "grad_norm": 0.3943972885608673,
      "learning_rate": 1.3741821128997188e-06,
      "loss": 0.0088,
      "step": 2638160
    },
    {
      "epoch": 4.317439432323272,
      "grad_norm": 0.5213943719863892,
      "learning_rate": 1.3741162206862016e-06,
      "loss": 0.0075,
      "step": 2638180
    },
    {
      "epoch": 4.317472162761925,
      "grad_norm": 0.20016929507255554,
      "learning_rate": 1.3740503284726845e-06,
      "loss": 0.0077,
      "step": 2638200
    },
    {
      "epoch": 4.317504893200579,
      "grad_norm": 0.22698825597763062,
      "learning_rate": 1.3739844362591673e-06,
      "loss": 0.0064,
      "step": 2638220
    },
    {
      "epoch": 4.317537623639232,
      "grad_norm": 0.11331141740083694,
      "learning_rate": 1.37391854404565e-06,
      "loss": 0.0098,
      "step": 2638240
    },
    {
      "epoch": 4.317570354077885,
      "grad_norm": 0.1750287115573883,
      "learning_rate": 1.3738526518321332e-06,
      "loss": 0.0094,
      "step": 2638260
    },
    {
      "epoch": 4.317603084516539,
      "grad_norm": 0.4095965325832367,
      "learning_rate": 1.3737867596186161e-06,
      "loss": 0.0074,
      "step": 2638280
    },
    {
      "epoch": 4.317635814955192,
      "grad_norm": 0.11069744825363159,
      "learning_rate": 1.3737208674050989e-06,
      "loss": 0.0088,
      "step": 2638300
    },
    {
      "epoch": 4.317668545393845,
      "grad_norm": 0.17252598702907562,
      "learning_rate": 1.3736549751915818e-06,
      "loss": 0.0079,
      "step": 2638320
    },
    {
      "epoch": 4.317701275832499,
      "grad_norm": 0.10856535285711288,
      "learning_rate": 1.3735890829780646e-06,
      "loss": 0.0081,
      "step": 2638340
    },
    {
      "epoch": 4.317734006271152,
      "grad_norm": 0.5281258821487427,
      "learning_rate": 1.3735231907645475e-06,
      "loss": 0.0064,
      "step": 2638360
    },
    {
      "epoch": 4.317766736709805,
      "grad_norm": 0.18267200887203217,
      "learning_rate": 1.3734572985510303e-06,
      "loss": 0.0068,
      "step": 2638380
    },
    {
      "epoch": 4.3177994671484585,
      "grad_norm": 0.11432163417339325,
      "learning_rate": 1.3733914063375132e-06,
      "loss": 0.0127,
      "step": 2638400
    },
    {
      "epoch": 4.317832197587112,
      "grad_norm": 0.4246198832988739,
      "learning_rate": 1.373325514123996e-06,
      "loss": 0.0129,
      "step": 2638420
    },
    {
      "epoch": 4.317864928025766,
      "grad_norm": 0.04389233514666557,
      "learning_rate": 1.3732596219104791e-06,
      "loss": 0.0053,
      "step": 2638440
    },
    {
      "epoch": 4.317897658464418,
      "grad_norm": 0.2876884639263153,
      "learning_rate": 1.3731937296969618e-06,
      "loss": 0.0108,
      "step": 2638460
    },
    {
      "epoch": 4.317930388903072,
      "grad_norm": 0.33746543526649475,
      "learning_rate": 1.3731278374834448e-06,
      "loss": 0.012,
      "step": 2638480
    },
    {
      "epoch": 4.317963119341726,
      "grad_norm": 0.10883895307779312,
      "learning_rate": 1.3730619452699275e-06,
      "loss": 0.0097,
      "step": 2638500
    },
    {
      "epoch": 4.317995849780379,
      "grad_norm": 0.026999780908226967,
      "learning_rate": 1.3729960530564105e-06,
      "loss": 0.0099,
      "step": 2638520
    },
    {
      "epoch": 4.318028580219032,
      "grad_norm": 0.26292452216148376,
      "learning_rate": 1.3729301608428932e-06,
      "loss": 0.0075,
      "step": 2638540
    },
    {
      "epoch": 4.3180613106576855,
      "grad_norm": 0.06384516507387161,
      "learning_rate": 1.3728642686293762e-06,
      "loss": 0.0078,
      "step": 2638560
    },
    {
      "epoch": 4.318094041096339,
      "grad_norm": 0.1320912390947342,
      "learning_rate": 1.372798376415859e-06,
      "loss": 0.0077,
      "step": 2638580
    },
    {
      "epoch": 4.318126771534992,
      "grad_norm": 0.14308299124240875,
      "learning_rate": 1.3727324842023419e-06,
      "loss": 0.0113,
      "step": 2638600
    },
    {
      "epoch": 4.318159501973645,
      "grad_norm": 0.19280225038528442,
      "learning_rate": 1.3726665919888248e-06,
      "loss": 0.0093,
      "step": 2638620
    },
    {
      "epoch": 4.318192232412299,
      "grad_norm": 0.09102527797222137,
      "learning_rate": 1.3726006997753078e-06,
      "loss": 0.0079,
      "step": 2638640
    },
    {
      "epoch": 4.318224962850952,
      "grad_norm": 0.14841167628765106,
      "learning_rate": 1.3725348075617905e-06,
      "loss": 0.0146,
      "step": 2638660
    },
    {
      "epoch": 4.318257693289605,
      "grad_norm": 0.41187912225723267,
      "learning_rate": 1.3724689153482735e-06,
      "loss": 0.0121,
      "step": 2638680
    },
    {
      "epoch": 4.318290423728259,
      "grad_norm": 0.2755061089992523,
      "learning_rate": 1.3724030231347562e-06,
      "loss": 0.0068,
      "step": 2638700
    },
    {
      "epoch": 4.3183231541669125,
      "grad_norm": 0.40987148880958557,
      "learning_rate": 1.3723371309212392e-06,
      "loss": 0.0137,
      "step": 2638720
    },
    {
      "epoch": 4.318355884605565,
      "grad_norm": 0.16127941012382507,
      "learning_rate": 1.372271238707722e-06,
      "loss": 0.0102,
      "step": 2638740
    },
    {
      "epoch": 4.318388615044219,
      "grad_norm": 0.14645101130008698,
      "learning_rate": 1.3722053464942048e-06,
      "loss": 0.0079,
      "step": 2638760
    },
    {
      "epoch": 4.318421345482872,
      "grad_norm": 0.10262848436832428,
      "learning_rate": 1.3721394542806878e-06,
      "loss": 0.009,
      "step": 2638780
    },
    {
      "epoch": 4.318454075921526,
      "grad_norm": 0.18772350251674652,
      "learning_rate": 1.3720735620671708e-06,
      "loss": 0.008,
      "step": 2638800
    },
    {
      "epoch": 4.318486806360179,
      "grad_norm": 0.10877809673547745,
      "learning_rate": 1.3720076698536535e-06,
      "loss": 0.0105,
      "step": 2638820
    },
    {
      "epoch": 4.318519536798832,
      "grad_norm": 0.10974832624197006,
      "learning_rate": 1.3719417776401364e-06,
      "loss": 0.0095,
      "step": 2638840
    },
    {
      "epoch": 4.318552267237486,
      "grad_norm": 0.26963353157043457,
      "learning_rate": 1.3718758854266192e-06,
      "loss": 0.0082,
      "step": 2638860
    },
    {
      "epoch": 4.318584997676139,
      "grad_norm": 0.15824663639068604,
      "learning_rate": 1.3718099932131021e-06,
      "loss": 0.01,
      "step": 2638880
    },
    {
      "epoch": 4.318617728114792,
      "grad_norm": 1.0729135274887085,
      "learning_rate": 1.3717441009995849e-06,
      "loss": 0.0107,
      "step": 2638900
    },
    {
      "epoch": 4.318650458553446,
      "grad_norm": 0.17600952088832855,
      "learning_rate": 1.3716782087860678e-06,
      "loss": 0.0055,
      "step": 2638920
    },
    {
      "epoch": 4.3186831889920985,
      "grad_norm": 0.2609829604625702,
      "learning_rate": 1.3716123165725506e-06,
      "loss": 0.0062,
      "step": 2638940
    },
    {
      "epoch": 4.318715919430752,
      "grad_norm": 0.34346750378608704,
      "learning_rate": 1.3715464243590337e-06,
      "loss": 0.0078,
      "step": 2638960
    },
    {
      "epoch": 4.318748649869406,
      "grad_norm": 0.47338902950286865,
      "learning_rate": 1.3714805321455165e-06,
      "loss": 0.0081,
      "step": 2638980
    },
    {
      "epoch": 4.318781380308059,
      "grad_norm": 0.26245608925819397,
      "learning_rate": 1.3714146399319994e-06,
      "loss": 0.009,
      "step": 2639000
    },
    {
      "epoch": 4.318814110746712,
      "grad_norm": 0.28674691915512085,
      "learning_rate": 1.3713487477184822e-06,
      "loss": 0.0124,
      "step": 2639020
    },
    {
      "epoch": 4.3188468411853655,
      "grad_norm": 0.13903376460075378,
      "learning_rate": 1.3712828555049651e-06,
      "loss": 0.008,
      "step": 2639040
    },
    {
      "epoch": 4.318879571624019,
      "grad_norm": 0.2651362717151642,
      "learning_rate": 1.3712169632914479e-06,
      "loss": 0.0076,
      "step": 2639060
    },
    {
      "epoch": 4.318912302062672,
      "grad_norm": 0.32256901264190674,
      "learning_rate": 1.3711510710779308e-06,
      "loss": 0.0139,
      "step": 2639080
    },
    {
      "epoch": 4.318945032501325,
      "grad_norm": 0.2618047297000885,
      "learning_rate": 1.3710851788644135e-06,
      "loss": 0.0087,
      "step": 2639100
    },
    {
      "epoch": 4.318977762939979,
      "grad_norm": 0.2773617208003998,
      "learning_rate": 1.3710192866508965e-06,
      "loss": 0.0106,
      "step": 2639120
    },
    {
      "epoch": 4.319010493378633,
      "grad_norm": 0.06624346971511841,
      "learning_rate": 1.3709533944373797e-06,
      "loss": 0.0081,
      "step": 2639140
    },
    {
      "epoch": 4.319043223817285,
      "grad_norm": 0.11378282308578491,
      "learning_rate": 1.3708875022238624e-06,
      "loss": 0.0076,
      "step": 2639160
    },
    {
      "epoch": 4.319075954255939,
      "grad_norm": 0.25827786326408386,
      "learning_rate": 1.3708216100103453e-06,
      "loss": 0.0072,
      "step": 2639180
    },
    {
      "epoch": 4.3191086846945925,
      "grad_norm": 0.08932697772979736,
      "learning_rate": 1.370755717796828e-06,
      "loss": 0.0104,
      "step": 2639200
    },
    {
      "epoch": 4.319141415133245,
      "grad_norm": 0.7859391570091248,
      "learning_rate": 1.3706898255833108e-06,
      "loss": 0.0062,
      "step": 2639220
    },
    {
      "epoch": 4.319174145571899,
      "grad_norm": 0.5724394917488098,
      "learning_rate": 1.3706239333697938e-06,
      "loss": 0.0089,
      "step": 2639240
    },
    {
      "epoch": 4.319206876010552,
      "grad_norm": 0.0691683292388916,
      "learning_rate": 1.3705580411562765e-06,
      "loss": 0.0101,
      "step": 2639260
    },
    {
      "epoch": 4.319239606449206,
      "grad_norm": 0.5162161588668823,
      "learning_rate": 1.3704921489427595e-06,
      "loss": 0.0089,
      "step": 2639280
    },
    {
      "epoch": 4.319272336887859,
      "grad_norm": 0.21556586027145386,
      "learning_rate": 1.3704262567292422e-06,
      "loss": 0.0075,
      "step": 2639300
    },
    {
      "epoch": 4.319305067326512,
      "grad_norm": 0.10497136414051056,
      "learning_rate": 1.3703603645157254e-06,
      "loss": 0.0101,
      "step": 2639320
    },
    {
      "epoch": 4.319337797765166,
      "grad_norm": 0.13784842193126678,
      "learning_rate": 1.3702944723022083e-06,
      "loss": 0.009,
      "step": 2639340
    },
    {
      "epoch": 4.319370528203819,
      "grad_norm": 0.22891539335250854,
      "learning_rate": 1.370228580088691e-06,
      "loss": 0.0066,
      "step": 2639360
    },
    {
      "epoch": 4.319403258642472,
      "grad_norm": 0.05364147946238518,
      "learning_rate": 1.370162687875174e-06,
      "loss": 0.0046,
      "step": 2639380
    },
    {
      "epoch": 4.319435989081126,
      "grad_norm": 0.27483928203582764,
      "learning_rate": 1.3700967956616568e-06,
      "loss": 0.011,
      "step": 2639400
    },
    {
      "epoch": 4.319468719519779,
      "grad_norm": 0.11727865785360336,
      "learning_rate": 1.3700309034481397e-06,
      "loss": 0.0067,
      "step": 2639420
    },
    {
      "epoch": 4.319501449958432,
      "grad_norm": 0.34352144598960876,
      "learning_rate": 1.3699650112346224e-06,
      "loss": 0.0066,
      "step": 2639440
    },
    {
      "epoch": 4.319534180397086,
      "grad_norm": 0.15975797176361084,
      "learning_rate": 1.3698991190211052e-06,
      "loss": 0.0081,
      "step": 2639460
    },
    {
      "epoch": 4.319566910835739,
      "grad_norm": 0.14479821920394897,
      "learning_rate": 1.3698332268075881e-06,
      "loss": 0.0093,
      "step": 2639480
    },
    {
      "epoch": 4.319599641274392,
      "grad_norm": 0.5141196250915527,
      "learning_rate": 1.3697673345940713e-06,
      "loss": 0.01,
      "step": 2639500
    },
    {
      "epoch": 4.319632371713046,
      "grad_norm": 0.3215985894203186,
      "learning_rate": 1.369701442380554e-06,
      "loss": 0.0118,
      "step": 2639520
    },
    {
      "epoch": 4.319665102151699,
      "grad_norm": 0.17650052905082703,
      "learning_rate": 1.369635550167037e-06,
      "loss": 0.0106,
      "step": 2639540
    },
    {
      "epoch": 4.319697832590353,
      "grad_norm": 0.2562812268733978,
      "learning_rate": 1.3695696579535197e-06,
      "loss": 0.0085,
      "step": 2639560
    },
    {
      "epoch": 4.3197305630290055,
      "grad_norm": 0.0492359958589077,
      "learning_rate": 1.3695037657400027e-06,
      "loss": 0.0094,
      "step": 2639580
    },
    {
      "epoch": 4.319763293467659,
      "grad_norm": 0.31276726722717285,
      "learning_rate": 1.3694378735264854e-06,
      "loss": 0.0076,
      "step": 2639600
    },
    {
      "epoch": 4.319796023906313,
      "grad_norm": 0.6599594354629517,
      "learning_rate": 1.3693719813129684e-06,
      "loss": 0.0127,
      "step": 2639620
    },
    {
      "epoch": 4.319828754344965,
      "grad_norm": 0.25320810079574585,
      "learning_rate": 1.3693060890994511e-06,
      "loss": 0.0094,
      "step": 2639640
    },
    {
      "epoch": 4.319861484783619,
      "grad_norm": 0.11665824055671692,
      "learning_rate": 1.3692401968859343e-06,
      "loss": 0.0111,
      "step": 2639660
    },
    {
      "epoch": 4.319894215222273,
      "grad_norm": 0.22250334918498993,
      "learning_rate": 1.369174304672417e-06,
      "loss": 0.0067,
      "step": 2639680
    },
    {
      "epoch": 4.319926945660926,
      "grad_norm": 0.06124911084771156,
      "learning_rate": 1.3691084124589e-06,
      "loss": 0.009,
      "step": 2639700
    },
    {
      "epoch": 4.319959676099579,
      "grad_norm": 0.3289323151111603,
      "learning_rate": 1.3690425202453827e-06,
      "loss": 0.0069,
      "step": 2639720
    },
    {
      "epoch": 4.3199924065382325,
      "grad_norm": 0.10445082187652588,
      "learning_rate": 1.3689766280318657e-06,
      "loss": 0.0102,
      "step": 2639740
    },
    {
      "epoch": 4.320025136976886,
      "grad_norm": 0.17579002678394318,
      "learning_rate": 1.3689107358183484e-06,
      "loss": 0.0087,
      "step": 2639760
    },
    {
      "epoch": 4.320057867415539,
      "grad_norm": 0.8213610053062439,
      "learning_rate": 1.3688448436048314e-06,
      "loss": 0.008,
      "step": 2639780
    },
    {
      "epoch": 4.320090597854192,
      "grad_norm": 0.1896369308233261,
      "learning_rate": 1.368778951391314e-06,
      "loss": 0.007,
      "step": 2639800
    },
    {
      "epoch": 4.320123328292846,
      "grad_norm": 0.3073245882987976,
      "learning_rate": 1.368713059177797e-06,
      "loss": 0.0063,
      "step": 2639820
    },
    {
      "epoch": 4.320156058731499,
      "grad_norm": 0.6351561546325684,
      "learning_rate": 1.36864716696428e-06,
      "loss": 0.0094,
      "step": 2639840
    },
    {
      "epoch": 4.320188789170152,
      "grad_norm": 0.34593382477760315,
      "learning_rate": 1.368581274750763e-06,
      "loss": 0.0141,
      "step": 2639860
    },
    {
      "epoch": 4.320221519608806,
      "grad_norm": 0.5342280864715576,
      "learning_rate": 1.3685153825372457e-06,
      "loss": 0.0112,
      "step": 2639880
    },
    {
      "epoch": 4.320254250047459,
      "grad_norm": 0.12211666256189346,
      "learning_rate": 1.3684494903237286e-06,
      "loss": 0.0065,
      "step": 2639900
    },
    {
      "epoch": 4.320286980486112,
      "grad_norm": 0.07437147945165634,
      "learning_rate": 1.3683835981102114e-06,
      "loss": 0.0072,
      "step": 2639920
    },
    {
      "epoch": 4.320319710924766,
      "grad_norm": 0.16934539377689362,
      "learning_rate": 1.3683177058966943e-06,
      "loss": 0.0073,
      "step": 2639940
    },
    {
      "epoch": 4.320352441363419,
      "grad_norm": 0.46152329444885254,
      "learning_rate": 1.368251813683177e-06,
      "loss": 0.0099,
      "step": 2639960
    },
    {
      "epoch": 4.320385171802073,
      "grad_norm": 0.14272251725196838,
      "learning_rate": 1.36818592146966e-06,
      "loss": 0.0093,
      "step": 2639980
    },
    {
      "epoch": 4.320417902240726,
      "grad_norm": 0.4536438286304474,
      "learning_rate": 1.3681200292561428e-06,
      "loss": 0.0069,
      "step": 2640000
    },
    {
      "epoch": 4.320450632679379,
      "grad_norm": 0.1976911872625351,
      "learning_rate": 1.368054137042626e-06,
      "loss": 0.0092,
      "step": 2640020
    },
    {
      "epoch": 4.320483363118033,
      "grad_norm": 0.14229682087898254,
      "learning_rate": 1.3679882448291087e-06,
      "loss": 0.01,
      "step": 2640040
    },
    {
      "epoch": 4.3205160935566855,
      "grad_norm": 0.17451204359531403,
      "learning_rate": 1.3679223526155916e-06,
      "loss": 0.0075,
      "step": 2640060
    },
    {
      "epoch": 4.320548823995339,
      "grad_norm": 0.2075566202402115,
      "learning_rate": 1.3678564604020744e-06,
      "loss": 0.0112,
      "step": 2640080
    },
    {
      "epoch": 4.320581554433993,
      "grad_norm": 0.09571433067321777,
      "learning_rate": 1.3677905681885573e-06,
      "loss": 0.0085,
      "step": 2640100
    },
    {
      "epoch": 4.320614284872645,
      "grad_norm": 0.4600728750228882,
      "learning_rate": 1.36772467597504e-06,
      "loss": 0.0077,
      "step": 2640120
    },
    {
      "epoch": 4.320647015311299,
      "grad_norm": 0.2364538162946701,
      "learning_rate": 1.367658783761523e-06,
      "loss": 0.0113,
      "step": 2640140
    },
    {
      "epoch": 4.320679745749953,
      "grad_norm": 1.5294548273086548,
      "learning_rate": 1.3675928915480057e-06,
      "loss": 0.0072,
      "step": 2640160
    },
    {
      "epoch": 4.320712476188606,
      "grad_norm": 0.25026988983154297,
      "learning_rate": 1.3675269993344887e-06,
      "loss": 0.0134,
      "step": 2640180
    },
    {
      "epoch": 4.320745206627259,
      "grad_norm": 0.11226194351911545,
      "learning_rate": 1.3674611071209716e-06,
      "loss": 0.0084,
      "step": 2640200
    },
    {
      "epoch": 4.3207779370659125,
      "grad_norm": 0.44300124049186707,
      "learning_rate": 1.3673952149074546e-06,
      "loss": 0.0087,
      "step": 2640220
    },
    {
      "epoch": 4.320810667504566,
      "grad_norm": 0.03990461304783821,
      "learning_rate": 1.3673293226939373e-06,
      "loss": 0.0073,
      "step": 2640240
    },
    {
      "epoch": 4.32084339794322,
      "grad_norm": 0.3485698699951172,
      "learning_rate": 1.3672634304804203e-06,
      "loss": 0.0179,
      "step": 2640260
    },
    {
      "epoch": 4.320876128381872,
      "grad_norm": 0.31193700432777405,
      "learning_rate": 1.367197538266903e-06,
      "loss": 0.0123,
      "step": 2640280
    },
    {
      "epoch": 4.320908858820526,
      "grad_norm": 0.4478323757648468,
      "learning_rate": 1.367131646053386e-06,
      "loss": 0.0087,
      "step": 2640300
    },
    {
      "epoch": 4.32094158925918,
      "grad_norm": 0.10903068631887436,
      "learning_rate": 1.3670657538398687e-06,
      "loss": 0.0081,
      "step": 2640320
    },
    {
      "epoch": 4.320974319697832,
      "grad_norm": 0.5255460143089294,
      "learning_rate": 1.3669998616263517e-06,
      "loss": 0.01,
      "step": 2640340
    },
    {
      "epoch": 4.321007050136486,
      "grad_norm": 0.41324087977409363,
      "learning_rate": 1.3669339694128344e-06,
      "loss": 0.011,
      "step": 2640360
    },
    {
      "epoch": 4.3210397805751395,
      "grad_norm": 0.22541671991348267,
      "learning_rate": 1.3668680771993176e-06,
      "loss": 0.0075,
      "step": 2640380
    },
    {
      "epoch": 4.321072511013792,
      "grad_norm": 0.24802984297275543,
      "learning_rate": 1.3668021849858005e-06,
      "loss": 0.0112,
      "step": 2640400
    },
    {
      "epoch": 4.321105241452446,
      "grad_norm": 0.05673404783010483,
      "learning_rate": 1.3667362927722833e-06,
      "loss": 0.0061,
      "step": 2640420
    },
    {
      "epoch": 4.321137971891099,
      "grad_norm": 0.2038033902645111,
      "learning_rate": 1.366670400558766e-06,
      "loss": 0.01,
      "step": 2640440
    },
    {
      "epoch": 4.321170702329753,
      "grad_norm": 0.21627365052700043,
      "learning_rate": 1.366604508345249e-06,
      "loss": 0.0106,
      "step": 2640460
    },
    {
      "epoch": 4.321203432768406,
      "grad_norm": 0.40354403853416443,
      "learning_rate": 1.3665386161317317e-06,
      "loss": 0.0061,
      "step": 2640480
    },
    {
      "epoch": 4.321236163207059,
      "grad_norm": 0.056023009121418,
      "learning_rate": 1.3664727239182146e-06,
      "loss": 0.0099,
      "step": 2640500
    },
    {
      "epoch": 4.321268893645713,
      "grad_norm": 0.2046113908290863,
      "learning_rate": 1.3664068317046974e-06,
      "loss": 0.0085,
      "step": 2640520
    },
    {
      "epoch": 4.321301624084366,
      "grad_norm": 0.5248158574104309,
      "learning_rate": 1.3663409394911805e-06,
      "loss": 0.011,
      "step": 2640540
    },
    {
      "epoch": 4.321334354523019,
      "grad_norm": 0.08479277044534683,
      "learning_rate": 1.3662750472776635e-06,
      "loss": 0.0091,
      "step": 2640560
    },
    {
      "epoch": 4.321367084961673,
      "grad_norm": 0.29805880784988403,
      "learning_rate": 1.3662091550641462e-06,
      "loss": 0.012,
      "step": 2640580
    },
    {
      "epoch": 4.321399815400326,
      "grad_norm": 0.4825737178325653,
      "learning_rate": 1.3661432628506292e-06,
      "loss": 0.0073,
      "step": 2640600
    },
    {
      "epoch": 4.321432545838979,
      "grad_norm": 0.16465012729167938,
      "learning_rate": 1.366077370637112e-06,
      "loss": 0.0092,
      "step": 2640620
    },
    {
      "epoch": 4.321465276277633,
      "grad_norm": 0.07043758779764175,
      "learning_rate": 1.3660114784235947e-06,
      "loss": 0.0066,
      "step": 2640640
    },
    {
      "epoch": 4.321498006716286,
      "grad_norm": 0.2181338369846344,
      "learning_rate": 1.3659455862100776e-06,
      "loss": 0.0088,
      "step": 2640660
    },
    {
      "epoch": 4.321530737154939,
      "grad_norm": 0.1744944155216217,
      "learning_rate": 1.3658796939965604e-06,
      "loss": 0.0095,
      "step": 2640680
    },
    {
      "epoch": 4.3215634675935926,
      "grad_norm": 0.2515020966529846,
      "learning_rate": 1.3658138017830433e-06,
      "loss": 0.0065,
      "step": 2640700
    },
    {
      "epoch": 4.321596198032246,
      "grad_norm": 0.19263750314712524,
      "learning_rate": 1.3657479095695265e-06,
      "loss": 0.0083,
      "step": 2640720
    },
    {
      "epoch": 4.3216289284709,
      "grad_norm": 0.07871830463409424,
      "learning_rate": 1.3656820173560092e-06,
      "loss": 0.0078,
      "step": 2640740
    },
    {
      "epoch": 4.3216616589095525,
      "grad_norm": 0.22407260537147522,
      "learning_rate": 1.3656161251424922e-06,
      "loss": 0.0093,
      "step": 2640760
    },
    {
      "epoch": 4.321694389348206,
      "grad_norm": 0.1730659306049347,
      "learning_rate": 1.365550232928975e-06,
      "loss": 0.0081,
      "step": 2640780
    },
    {
      "epoch": 4.32172711978686,
      "grad_norm": 0.0690227821469307,
      "learning_rate": 1.3654843407154579e-06,
      "loss": 0.0084,
      "step": 2640800
    },
    {
      "epoch": 4.321759850225512,
      "grad_norm": 0.31153902411460876,
      "learning_rate": 1.3654184485019406e-06,
      "loss": 0.0063,
      "step": 2640820
    },
    {
      "epoch": 4.321792580664166,
      "grad_norm": 0.18910253047943115,
      "learning_rate": 1.3653525562884235e-06,
      "loss": 0.0108,
      "step": 2640840
    },
    {
      "epoch": 4.3218253111028195,
      "grad_norm": 0.18032260239124298,
      "learning_rate": 1.3652866640749063e-06,
      "loss": 0.0069,
      "step": 2640860
    },
    {
      "epoch": 4.321858041541473,
      "grad_norm": 0.27481794357299805,
      "learning_rate": 1.365220771861389e-06,
      "loss": 0.009,
      "step": 2640880
    },
    {
      "epoch": 4.321890771980126,
      "grad_norm": 0.09132584929466248,
      "learning_rate": 1.3651548796478722e-06,
      "loss": 0.0108,
      "step": 2640900
    },
    {
      "epoch": 4.321923502418779,
      "grad_norm": 0.336291640996933,
      "learning_rate": 1.3650889874343551e-06,
      "loss": 0.0147,
      "step": 2640920
    },
    {
      "epoch": 4.321956232857433,
      "grad_norm": 0.3889521360397339,
      "learning_rate": 1.3650230952208379e-06,
      "loss": 0.0089,
      "step": 2640940
    },
    {
      "epoch": 4.321988963296086,
      "grad_norm": 0.1904873102903366,
      "learning_rate": 1.3649572030073208e-06,
      "loss": 0.0086,
      "step": 2640960
    },
    {
      "epoch": 4.322021693734739,
      "grad_norm": 0.0893479660153389,
      "learning_rate": 1.3648913107938036e-06,
      "loss": 0.0069,
      "step": 2640980
    },
    {
      "epoch": 4.322054424173393,
      "grad_norm": 0.28618165850639343,
      "learning_rate": 1.3648254185802865e-06,
      "loss": 0.0081,
      "step": 2641000
    },
    {
      "epoch": 4.3220871546120465,
      "grad_norm": 0.7191420793533325,
      "learning_rate": 1.3647595263667693e-06,
      "loss": 0.0102,
      "step": 2641020
    },
    {
      "epoch": 4.322119885050699,
      "grad_norm": 0.0607924647629261,
      "learning_rate": 1.3646936341532522e-06,
      "loss": 0.0113,
      "step": 2641040
    },
    {
      "epoch": 4.322152615489353,
      "grad_norm": 0.4208102524280548,
      "learning_rate": 1.364627741939735e-06,
      "loss": 0.0142,
      "step": 2641060
    },
    {
      "epoch": 4.322185345928006,
      "grad_norm": 0.031581055372953415,
      "learning_rate": 1.3645618497262181e-06,
      "loss": 0.0064,
      "step": 2641080
    },
    {
      "epoch": 4.322218076366659,
      "grad_norm": 0.21031314134597778,
      "learning_rate": 1.3644959575127009e-06,
      "loss": 0.0067,
      "step": 2641100
    },
    {
      "epoch": 4.322250806805313,
      "grad_norm": 0.1926705241203308,
      "learning_rate": 1.3644300652991838e-06,
      "loss": 0.0148,
      "step": 2641120
    },
    {
      "epoch": 4.322283537243966,
      "grad_norm": 0.11305516958236694,
      "learning_rate": 1.3643641730856665e-06,
      "loss": 0.007,
      "step": 2641140
    },
    {
      "epoch": 4.32231626768262,
      "grad_norm": 0.14853522181510925,
      "learning_rate": 1.3642982808721495e-06,
      "loss": 0.0092,
      "step": 2641160
    },
    {
      "epoch": 4.322348998121273,
      "grad_norm": 0.17189526557922363,
      "learning_rate": 1.3642323886586322e-06,
      "loss": 0.0096,
      "step": 2641180
    },
    {
      "epoch": 4.322381728559926,
      "grad_norm": 0.8631097078323364,
      "learning_rate": 1.3641664964451152e-06,
      "loss": 0.0089,
      "step": 2641200
    },
    {
      "epoch": 4.32241445899858,
      "grad_norm": 0.1996517777442932,
      "learning_rate": 1.364100604231598e-06,
      "loss": 0.0122,
      "step": 2641220
    },
    {
      "epoch": 4.3224471894372325,
      "grad_norm": 0.26998963952064514,
      "learning_rate": 1.3640347120180809e-06,
      "loss": 0.008,
      "step": 2641240
    },
    {
      "epoch": 4.322479919875886,
      "grad_norm": 0.18821685016155243,
      "learning_rate": 1.3639688198045638e-06,
      "loss": 0.0108,
      "step": 2641260
    },
    {
      "epoch": 4.32251265031454,
      "grad_norm": 0.16313205659389496,
      "learning_rate": 1.3639029275910468e-06,
      "loss": 0.0086,
      "step": 2641280
    },
    {
      "epoch": 4.322545380753192,
      "grad_norm": 0.2789755165576935,
      "learning_rate": 1.3638370353775295e-06,
      "loss": 0.0089,
      "step": 2641300
    },
    {
      "epoch": 4.322578111191846,
      "grad_norm": 0.12547916173934937,
      "learning_rate": 1.3637711431640125e-06,
      "loss": 0.0092,
      "step": 2641320
    },
    {
      "epoch": 4.3226108416305,
      "grad_norm": 0.2656703293323517,
      "learning_rate": 1.3637052509504952e-06,
      "loss": 0.0055,
      "step": 2641340
    },
    {
      "epoch": 4.322643572069153,
      "grad_norm": 0.18582946062088013,
      "learning_rate": 1.3636393587369782e-06,
      "loss": 0.01,
      "step": 2641360
    },
    {
      "epoch": 4.322676302507806,
      "grad_norm": 0.1264522224664688,
      "learning_rate": 1.363573466523461e-06,
      "loss": 0.0064,
      "step": 2641380
    },
    {
      "epoch": 4.3227090329464595,
      "grad_norm": 0.5194578170776367,
      "learning_rate": 1.3635075743099439e-06,
      "loss": 0.0066,
      "step": 2641400
    },
    {
      "epoch": 4.322741763385113,
      "grad_norm": 0.28136149048805237,
      "learning_rate": 1.3634416820964268e-06,
      "loss": 0.0113,
      "step": 2641420
    },
    {
      "epoch": 4.322774493823767,
      "grad_norm": 0.23325103521347046,
      "learning_rate": 1.3633757898829098e-06,
      "loss": 0.0105,
      "step": 2641440
    },
    {
      "epoch": 4.322807224262419,
      "grad_norm": 0.3124794065952301,
      "learning_rate": 1.3633098976693925e-06,
      "loss": 0.0131,
      "step": 2641460
    },
    {
      "epoch": 4.322839954701073,
      "grad_norm": 0.08115176856517792,
      "learning_rate": 1.3632440054558755e-06,
      "loss": 0.0077,
      "step": 2641480
    },
    {
      "epoch": 4.322872685139727,
      "grad_norm": 0.7134796380996704,
      "learning_rate": 1.3631781132423582e-06,
      "loss": 0.0155,
      "step": 2641500
    },
    {
      "epoch": 4.322905415578379,
      "grad_norm": 0.26824283599853516,
      "learning_rate": 1.3631122210288411e-06,
      "loss": 0.0101,
      "step": 2641520
    },
    {
      "epoch": 4.322938146017033,
      "grad_norm": 0.37548118829727173,
      "learning_rate": 1.3630463288153239e-06,
      "loss": 0.0156,
      "step": 2641540
    },
    {
      "epoch": 4.3229708764556865,
      "grad_norm": 0.2340039163827896,
      "learning_rate": 1.3629804366018068e-06,
      "loss": 0.0075,
      "step": 2641560
    },
    {
      "epoch": 4.323003606894339,
      "grad_norm": 0.22674085199832916,
      "learning_rate": 1.3629145443882896e-06,
      "loss": 0.0103,
      "step": 2641580
    },
    {
      "epoch": 4.323036337332993,
      "grad_norm": 0.06464364379644394,
      "learning_rate": 1.3628486521747727e-06,
      "loss": 0.0111,
      "step": 2641600
    },
    {
      "epoch": 4.323069067771646,
      "grad_norm": 0.04760250821709633,
      "learning_rate": 1.3627827599612557e-06,
      "loss": 0.0062,
      "step": 2641620
    },
    {
      "epoch": 4.3231017982103,
      "grad_norm": 0.12777306139469147,
      "learning_rate": 1.3627168677477384e-06,
      "loss": 0.0065,
      "step": 2641640
    },
    {
      "epoch": 4.323134528648953,
      "grad_norm": 0.32086631655693054,
      "learning_rate": 1.3626509755342212e-06,
      "loss": 0.0067,
      "step": 2641660
    },
    {
      "epoch": 4.323167259087606,
      "grad_norm": 0.21393516659736633,
      "learning_rate": 1.3625850833207041e-06,
      "loss": 0.0141,
      "step": 2641680
    },
    {
      "epoch": 4.32319998952626,
      "grad_norm": 0.40206414461135864,
      "learning_rate": 1.3625191911071869e-06,
      "loss": 0.0069,
      "step": 2641700
    },
    {
      "epoch": 4.323232719964913,
      "grad_norm": 0.2852543890476227,
      "learning_rate": 1.3624532988936698e-06,
      "loss": 0.0088,
      "step": 2641720
    },
    {
      "epoch": 4.323265450403566,
      "grad_norm": 0.2925545275211334,
      "learning_rate": 1.3623874066801526e-06,
      "loss": 0.012,
      "step": 2641740
    },
    {
      "epoch": 4.32329818084222,
      "grad_norm": 0.05897599086165428,
      "learning_rate": 1.3623215144666355e-06,
      "loss": 0.0111,
      "step": 2641760
    },
    {
      "epoch": 4.323330911280873,
      "grad_norm": 0.6515049934387207,
      "learning_rate": 1.3622556222531187e-06,
      "loss": 0.0108,
      "step": 2641780
    },
    {
      "epoch": 4.323363641719526,
      "grad_norm": 0.16530536115169525,
      "learning_rate": 1.3621897300396014e-06,
      "loss": 0.0079,
      "step": 2641800
    },
    {
      "epoch": 4.32339637215818,
      "grad_norm": 0.19827605783939362,
      "learning_rate": 1.3621238378260844e-06,
      "loss": 0.0061,
      "step": 2641820
    },
    {
      "epoch": 4.323429102596833,
      "grad_norm": 0.16017909348011017,
      "learning_rate": 1.362057945612567e-06,
      "loss": 0.0063,
      "step": 2641840
    },
    {
      "epoch": 4.323461833035486,
      "grad_norm": 0.545569658279419,
      "learning_rate": 1.3619920533990498e-06,
      "loss": 0.0103,
      "step": 2641860
    },
    {
      "epoch": 4.3234945634741395,
      "grad_norm": 0.10424293577671051,
      "learning_rate": 1.3619261611855328e-06,
      "loss": 0.0095,
      "step": 2641880
    },
    {
      "epoch": 4.323527293912793,
      "grad_norm": 0.36497652530670166,
      "learning_rate": 1.3618602689720155e-06,
      "loss": 0.0067,
      "step": 2641900
    },
    {
      "epoch": 4.323560024351447,
      "grad_norm": 0.36671632528305054,
      "learning_rate": 1.3617943767584985e-06,
      "loss": 0.0113,
      "step": 2641920
    },
    {
      "epoch": 4.323592754790099,
      "grad_norm": 0.13582243025302887,
      "learning_rate": 1.3617284845449812e-06,
      "loss": 0.0065,
      "step": 2641940
    },
    {
      "epoch": 4.323625485228753,
      "grad_norm": 0.09074344485998154,
      "learning_rate": 1.3616625923314644e-06,
      "loss": 0.0082,
      "step": 2641960
    },
    {
      "epoch": 4.323658215667407,
      "grad_norm": 0.3172341585159302,
      "learning_rate": 1.3615967001179473e-06,
      "loss": 0.0053,
      "step": 2641980
    },
    {
      "epoch": 4.323690946106059,
      "grad_norm": 0.22957228124141693,
      "learning_rate": 1.36153080790443e-06,
      "loss": 0.0133,
      "step": 2642000
    },
    {
      "epoch": 4.323723676544713,
      "grad_norm": 0.27184370160102844,
      "learning_rate": 1.361464915690913e-06,
      "loss": 0.0119,
      "step": 2642020
    },
    {
      "epoch": 4.3237564069833665,
      "grad_norm": 0.2966715693473816,
      "learning_rate": 1.3613990234773958e-06,
      "loss": 0.0105,
      "step": 2642040
    },
    {
      "epoch": 4.32378913742202,
      "grad_norm": 0.3507045805454254,
      "learning_rate": 1.3613331312638787e-06,
      "loss": 0.0082,
      "step": 2642060
    },
    {
      "epoch": 4.323821867860673,
      "grad_norm": 0.3956371247768402,
      "learning_rate": 1.3612672390503615e-06,
      "loss": 0.006,
      "step": 2642080
    },
    {
      "epoch": 4.323854598299326,
      "grad_norm": 0.8510723114013672,
      "learning_rate": 1.3612013468368442e-06,
      "loss": 0.0084,
      "step": 2642100
    },
    {
      "epoch": 4.32388732873798,
      "grad_norm": 0.10925082862377167,
      "learning_rate": 1.3611354546233271e-06,
      "loss": 0.0076,
      "step": 2642120
    },
    {
      "epoch": 4.323920059176633,
      "grad_norm": 0.11055903881788254,
      "learning_rate": 1.3610695624098103e-06,
      "loss": 0.0076,
      "step": 2642140
    },
    {
      "epoch": 4.323952789615286,
      "grad_norm": 0.18243153393268585,
      "learning_rate": 1.361003670196293e-06,
      "loss": 0.0104,
      "step": 2642160
    },
    {
      "epoch": 4.32398552005394,
      "grad_norm": 0.12771740555763245,
      "learning_rate": 1.360937777982776e-06,
      "loss": 0.0106,
      "step": 2642180
    },
    {
      "epoch": 4.3240182504925935,
      "grad_norm": 0.1262969672679901,
      "learning_rate": 1.3608718857692587e-06,
      "loss": 0.0086,
      "step": 2642200
    },
    {
      "epoch": 4.324050980931246,
      "grad_norm": 0.5070704221725464,
      "learning_rate": 1.3608059935557417e-06,
      "loss": 0.0112,
      "step": 2642220
    },
    {
      "epoch": 4.3240837113699,
      "grad_norm": 0.34036022424697876,
      "learning_rate": 1.3607401013422244e-06,
      "loss": 0.008,
      "step": 2642240
    },
    {
      "epoch": 4.324116441808553,
      "grad_norm": 0.11729545891284943,
      "learning_rate": 1.3606742091287074e-06,
      "loss": 0.0079,
      "step": 2642260
    },
    {
      "epoch": 4.324149172247206,
      "grad_norm": 0.38108307123184204,
      "learning_rate": 1.3606083169151901e-06,
      "loss": 0.0132,
      "step": 2642280
    },
    {
      "epoch": 4.32418190268586,
      "grad_norm": 0.29049912095069885,
      "learning_rate": 1.3605424247016733e-06,
      "loss": 0.0081,
      "step": 2642300
    },
    {
      "epoch": 4.324214633124513,
      "grad_norm": 0.11863541603088379,
      "learning_rate": 1.360476532488156e-06,
      "loss": 0.0092,
      "step": 2642320
    },
    {
      "epoch": 4.324247363563167,
      "grad_norm": 0.10868410021066666,
      "learning_rate": 1.360410640274639e-06,
      "loss": 0.0101,
      "step": 2642340
    },
    {
      "epoch": 4.32428009400182,
      "grad_norm": 0.06673026084899902,
      "learning_rate": 1.3603447480611217e-06,
      "loss": 0.0104,
      "step": 2642360
    },
    {
      "epoch": 4.324312824440473,
      "grad_norm": 0.1341218203306198,
      "learning_rate": 1.3602788558476047e-06,
      "loss": 0.0101,
      "step": 2642380
    },
    {
      "epoch": 4.324345554879127,
      "grad_norm": 0.12953689694404602,
      "learning_rate": 1.3602129636340874e-06,
      "loss": 0.0115,
      "step": 2642400
    },
    {
      "epoch": 4.3243782853177795,
      "grad_norm": 0.27911362051963806,
      "learning_rate": 1.3601470714205704e-06,
      "loss": 0.0102,
      "step": 2642420
    },
    {
      "epoch": 4.324411015756433,
      "grad_norm": 0.25922921299934387,
      "learning_rate": 1.360081179207053e-06,
      "loss": 0.0087,
      "step": 2642440
    },
    {
      "epoch": 4.324443746195087,
      "grad_norm": 0.3621373474597931,
      "learning_rate": 1.360015286993536e-06,
      "loss": 0.0084,
      "step": 2642460
    },
    {
      "epoch": 4.32447647663374,
      "grad_norm": 0.21323969960212708,
      "learning_rate": 1.359949394780019e-06,
      "loss": 0.0096,
      "step": 2642480
    },
    {
      "epoch": 4.324509207072393,
      "grad_norm": 0.35249534249305725,
      "learning_rate": 1.359883502566502e-06,
      "loss": 0.0081,
      "step": 2642500
    },
    {
      "epoch": 4.3245419375110465,
      "grad_norm": 0.5858892798423767,
      "learning_rate": 1.3598176103529847e-06,
      "loss": 0.009,
      "step": 2642520
    },
    {
      "epoch": 4.3245746679497,
      "grad_norm": 0.0984266996383667,
      "learning_rate": 1.3597517181394676e-06,
      "loss": 0.012,
      "step": 2642540
    },
    {
      "epoch": 4.324607398388353,
      "grad_norm": 0.11770013719797134,
      "learning_rate": 1.3596858259259504e-06,
      "loss": 0.0081,
      "step": 2642560
    },
    {
      "epoch": 4.324640128827006,
      "grad_norm": 0.06441918015480042,
      "learning_rate": 1.3596199337124333e-06,
      "loss": 0.0068,
      "step": 2642580
    },
    {
      "epoch": 4.32467285926566,
      "grad_norm": 0.12243374437093735,
      "learning_rate": 1.359554041498916e-06,
      "loss": 0.0093,
      "step": 2642600
    },
    {
      "epoch": 4.324705589704314,
      "grad_norm": 0.2146022915840149,
      "learning_rate": 1.359488149285399e-06,
      "loss": 0.0087,
      "step": 2642620
    },
    {
      "epoch": 4.324738320142966,
      "grad_norm": 0.06701955199241638,
      "learning_rate": 1.3594222570718818e-06,
      "loss": 0.0095,
      "step": 2642640
    },
    {
      "epoch": 4.32477105058162,
      "grad_norm": 0.17080065608024597,
      "learning_rate": 1.359356364858365e-06,
      "loss": 0.0076,
      "step": 2642660
    },
    {
      "epoch": 4.3248037810202735,
      "grad_norm": 0.8723108768463135,
      "learning_rate": 1.3592904726448477e-06,
      "loss": 0.0083,
      "step": 2642680
    },
    {
      "epoch": 4.324836511458926,
      "grad_norm": 0.1390688270330429,
      "learning_rate": 1.3592245804313306e-06,
      "loss": 0.0094,
      "step": 2642700
    },
    {
      "epoch": 4.32486924189758,
      "grad_norm": 0.16860806941986084,
      "learning_rate": 1.3591586882178134e-06,
      "loss": 0.0106,
      "step": 2642720
    },
    {
      "epoch": 4.324901972336233,
      "grad_norm": 0.18299038708209991,
      "learning_rate": 1.3590927960042963e-06,
      "loss": 0.0087,
      "step": 2642740
    },
    {
      "epoch": 4.324934702774886,
      "grad_norm": 0.3084390163421631,
      "learning_rate": 1.359026903790779e-06,
      "loss": 0.0072,
      "step": 2642760
    },
    {
      "epoch": 4.32496743321354,
      "grad_norm": 0.13890618085861206,
      "learning_rate": 1.358961011577262e-06,
      "loss": 0.0096,
      "step": 2642780
    },
    {
      "epoch": 4.325000163652193,
      "grad_norm": 0.16063277423381805,
      "learning_rate": 1.3588951193637447e-06,
      "loss": 0.0072,
      "step": 2642800
    },
    {
      "epoch": 4.325032894090847,
      "grad_norm": 0.08689382672309875,
      "learning_rate": 1.3588292271502277e-06,
      "loss": 0.0081,
      "step": 2642820
    },
    {
      "epoch": 4.3250656245295,
      "grad_norm": 0.18104571104049683,
      "learning_rate": 1.3587633349367107e-06,
      "loss": 0.0099,
      "step": 2642840
    },
    {
      "epoch": 4.325098354968153,
      "grad_norm": 0.22600147128105164,
      "learning_rate": 1.3586974427231936e-06,
      "loss": 0.0109,
      "step": 2642860
    },
    {
      "epoch": 4.325131085406807,
      "grad_norm": 0.07093106955289841,
      "learning_rate": 1.3586315505096763e-06,
      "loss": 0.0091,
      "step": 2642880
    },
    {
      "epoch": 4.32516381584546,
      "grad_norm": 0.4882013499736786,
      "learning_rate": 1.3585656582961593e-06,
      "loss": 0.0092,
      "step": 2642900
    },
    {
      "epoch": 4.325196546284113,
      "grad_norm": 0.08193125575780869,
      "learning_rate": 1.358499766082642e-06,
      "loss": 0.0079,
      "step": 2642920
    },
    {
      "epoch": 4.325229276722767,
      "grad_norm": 0.27171772718429565,
      "learning_rate": 1.358433873869125e-06,
      "loss": 0.0075,
      "step": 2642940
    },
    {
      "epoch": 4.32526200716142,
      "grad_norm": 0.3983982801437378,
      "learning_rate": 1.3583679816556077e-06,
      "loss": 0.0148,
      "step": 2642960
    },
    {
      "epoch": 4.325294737600073,
      "grad_norm": 0.28927239775657654,
      "learning_rate": 1.3583020894420907e-06,
      "loss": 0.0107,
      "step": 2642980
    },
    {
      "epoch": 4.325327468038727,
      "grad_norm": 0.12475060671567917,
      "learning_rate": 1.3582361972285734e-06,
      "loss": 0.0081,
      "step": 2643000
    },
    {
      "epoch": 4.32536019847738,
      "grad_norm": 0.2862342894077301,
      "learning_rate": 1.3581703050150566e-06,
      "loss": 0.0112,
      "step": 2643020
    },
    {
      "epoch": 4.325392928916033,
      "grad_norm": 0.15399231016635895,
      "learning_rate": 1.3581044128015395e-06,
      "loss": 0.0085,
      "step": 2643040
    },
    {
      "epoch": 4.3254256593546865,
      "grad_norm": 0.5276956558227539,
      "learning_rate": 1.3580385205880223e-06,
      "loss": 0.0079,
      "step": 2643060
    },
    {
      "epoch": 4.32545838979334,
      "grad_norm": 0.03753326088190079,
      "learning_rate": 1.357972628374505e-06,
      "loss": 0.0076,
      "step": 2643080
    },
    {
      "epoch": 4.325491120231994,
      "grad_norm": 0.10751093178987503,
      "learning_rate": 1.357906736160988e-06,
      "loss": 0.0074,
      "step": 2643100
    },
    {
      "epoch": 4.325523850670646,
      "grad_norm": 0.05197570100426674,
      "learning_rate": 1.3578408439474707e-06,
      "loss": 0.0142,
      "step": 2643120
    },
    {
      "epoch": 4.3255565811093,
      "grad_norm": 0.17041341960430145,
      "learning_rate": 1.3577749517339537e-06,
      "loss": 0.0113,
      "step": 2643140
    },
    {
      "epoch": 4.325589311547954,
      "grad_norm": 0.0838339552283287,
      "learning_rate": 1.3577090595204364e-06,
      "loss": 0.0067,
      "step": 2643160
    },
    {
      "epoch": 4.325622041986607,
      "grad_norm": 0.3540865182876587,
      "learning_rate": 1.3576431673069196e-06,
      "loss": 0.0101,
      "step": 2643180
    },
    {
      "epoch": 4.32565477242526,
      "grad_norm": 0.228994220495224,
      "learning_rate": 1.3575772750934025e-06,
      "loss": 0.0127,
      "step": 2643200
    },
    {
      "epoch": 4.3256875028639135,
      "grad_norm": 0.23246362805366516,
      "learning_rate": 1.3575113828798852e-06,
      "loss": 0.0084,
      "step": 2643220
    },
    {
      "epoch": 4.325720233302567,
      "grad_norm": 0.2969540059566498,
      "learning_rate": 1.3574454906663682e-06,
      "loss": 0.0084,
      "step": 2643240
    },
    {
      "epoch": 4.32575296374122,
      "grad_norm": 0.354464054107666,
      "learning_rate": 1.357379598452851e-06,
      "loss": 0.0116,
      "step": 2643260
    },
    {
      "epoch": 4.325785694179873,
      "grad_norm": 0.03732561320066452,
      "learning_rate": 1.3573137062393337e-06,
      "loss": 0.0097,
      "step": 2643280
    },
    {
      "epoch": 4.325818424618527,
      "grad_norm": 0.5400552153587341,
      "learning_rate": 1.3572478140258166e-06,
      "loss": 0.0134,
      "step": 2643300
    },
    {
      "epoch": 4.32585115505718,
      "grad_norm": 0.3103787899017334,
      "learning_rate": 1.3571819218122994e-06,
      "loss": 0.0099,
      "step": 2643320
    },
    {
      "epoch": 4.325883885495833,
      "grad_norm": 0.20399220287799835,
      "learning_rate": 1.3571160295987823e-06,
      "loss": 0.0121,
      "step": 2643340
    },
    {
      "epoch": 4.325916615934487,
      "grad_norm": 0.17927074432373047,
      "learning_rate": 1.3570501373852655e-06,
      "loss": 0.0095,
      "step": 2643360
    },
    {
      "epoch": 4.32594934637314,
      "grad_norm": 0.3327597975730896,
      "learning_rate": 1.3569842451717482e-06,
      "loss": 0.009,
      "step": 2643380
    },
    {
      "epoch": 4.325982076811793,
      "grad_norm": 0.41212019324302673,
      "learning_rate": 1.3569183529582312e-06,
      "loss": 0.0104,
      "step": 2643400
    },
    {
      "epoch": 4.326014807250447,
      "grad_norm": 0.22443480789661407,
      "learning_rate": 1.356852460744714e-06,
      "loss": 0.01,
      "step": 2643420
    },
    {
      "epoch": 4.3260475376891,
      "grad_norm": 0.1520453840494156,
      "learning_rate": 1.3567865685311969e-06,
      "loss": 0.0154,
      "step": 2643440
    },
    {
      "epoch": 4.326080268127753,
      "grad_norm": 0.12653949856758118,
      "learning_rate": 1.3567206763176796e-06,
      "loss": 0.0104,
      "step": 2643460
    },
    {
      "epoch": 4.326112998566407,
      "grad_norm": 0.03746639937162399,
      "learning_rate": 1.3566547841041626e-06,
      "loss": 0.0092,
      "step": 2643480
    },
    {
      "epoch": 4.32614572900506,
      "grad_norm": 0.26910915970802307,
      "learning_rate": 1.3565888918906453e-06,
      "loss": 0.0089,
      "step": 2643500
    },
    {
      "epoch": 4.326178459443714,
      "grad_norm": 0.18163061141967773,
      "learning_rate": 1.356522999677128e-06,
      "loss": 0.009,
      "step": 2643520
    },
    {
      "epoch": 4.3262111898823665,
      "grad_norm": 0.13934050500392914,
      "learning_rate": 1.3564571074636112e-06,
      "loss": 0.0094,
      "step": 2643540
    },
    {
      "epoch": 4.32624392032102,
      "grad_norm": 0.16033954918384552,
      "learning_rate": 1.3563912152500942e-06,
      "loss": 0.0166,
      "step": 2643560
    },
    {
      "epoch": 4.326276650759674,
      "grad_norm": 0.32877492904663086,
      "learning_rate": 1.3563253230365769e-06,
      "loss": 0.0101,
      "step": 2643580
    },
    {
      "epoch": 4.326309381198326,
      "grad_norm": 0.24880041182041168,
      "learning_rate": 1.3562594308230598e-06,
      "loss": 0.0099,
      "step": 2643600
    },
    {
      "epoch": 4.32634211163698,
      "grad_norm": 0.30617451667785645,
      "learning_rate": 1.3561935386095426e-06,
      "loss": 0.0158,
      "step": 2643620
    },
    {
      "epoch": 4.326374842075634,
      "grad_norm": 0.10830774158239365,
      "learning_rate": 1.3561276463960255e-06,
      "loss": 0.007,
      "step": 2643640
    },
    {
      "epoch": 4.326407572514287,
      "grad_norm": 0.2625664174556732,
      "learning_rate": 1.3560617541825083e-06,
      "loss": 0.0089,
      "step": 2643660
    },
    {
      "epoch": 4.32644030295294,
      "grad_norm": 0.18497183918952942,
      "learning_rate": 1.3559958619689912e-06,
      "loss": 0.0061,
      "step": 2643680
    },
    {
      "epoch": 4.3264730333915935,
      "grad_norm": 0.45211437344551086,
      "learning_rate": 1.355929969755474e-06,
      "loss": 0.0095,
      "step": 2643700
    },
    {
      "epoch": 4.326505763830247,
      "grad_norm": 0.08372373133897781,
      "learning_rate": 1.3558640775419571e-06,
      "loss": 0.0102,
      "step": 2643720
    },
    {
      "epoch": 4.3265384942689,
      "grad_norm": 0.2539072036743164,
      "learning_rate": 1.3557981853284399e-06,
      "loss": 0.006,
      "step": 2643740
    },
    {
      "epoch": 4.326571224707553,
      "grad_norm": 0.12354408204555511,
      "learning_rate": 1.3557322931149228e-06,
      "loss": 0.0104,
      "step": 2643760
    },
    {
      "epoch": 4.326603955146207,
      "grad_norm": 0.15729287266731262,
      "learning_rate": 1.3556664009014056e-06,
      "loss": 0.0072,
      "step": 2643780
    },
    {
      "epoch": 4.326636685584861,
      "grad_norm": 0.12285306304693222,
      "learning_rate": 1.3556005086878885e-06,
      "loss": 0.012,
      "step": 2643800
    },
    {
      "epoch": 4.326669416023513,
      "grad_norm": 0.25023719668388367,
      "learning_rate": 1.3555346164743713e-06,
      "loss": 0.0091,
      "step": 2643820
    },
    {
      "epoch": 4.326702146462167,
      "grad_norm": 0.37514013051986694,
      "learning_rate": 1.3554687242608542e-06,
      "loss": 0.0079,
      "step": 2643840
    },
    {
      "epoch": 4.3267348769008205,
      "grad_norm": 0.26187756657600403,
      "learning_rate": 1.355402832047337e-06,
      "loss": 0.0088,
      "step": 2643860
    },
    {
      "epoch": 4.326767607339473,
      "grad_norm": 0.23157131671905518,
      "learning_rate": 1.35533693983382e-06,
      "loss": 0.0105,
      "step": 2643880
    },
    {
      "epoch": 4.326800337778127,
      "grad_norm": 0.07680458575487137,
      "learning_rate": 1.3552710476203028e-06,
      "loss": 0.0085,
      "step": 2643900
    },
    {
      "epoch": 4.32683306821678,
      "grad_norm": 0.19718559086322784,
      "learning_rate": 1.3552051554067858e-06,
      "loss": 0.0109,
      "step": 2643920
    },
    {
      "epoch": 4.326865798655434,
      "grad_norm": 0.058340977877378464,
      "learning_rate": 1.3551392631932685e-06,
      "loss": 0.0111,
      "step": 2643940
    },
    {
      "epoch": 4.326898529094087,
      "grad_norm": 0.18104508519172668,
      "learning_rate": 1.3550733709797515e-06,
      "loss": 0.0089,
      "step": 2643960
    },
    {
      "epoch": 4.32693125953274,
      "grad_norm": 0.0975596010684967,
      "learning_rate": 1.3550074787662342e-06,
      "loss": 0.0093,
      "step": 2643980
    },
    {
      "epoch": 4.326963989971394,
      "grad_norm": 0.3119635283946991,
      "learning_rate": 1.3549415865527172e-06,
      "loss": 0.011,
      "step": 2644000
    },
    {
      "epoch": 4.326996720410047,
      "grad_norm": 0.31379520893096924,
      "learning_rate": 1.3548756943392e-06,
      "loss": 0.0081,
      "step": 2644020
    },
    {
      "epoch": 4.3270294508487,
      "grad_norm": 0.3051436245441437,
      "learning_rate": 1.3548098021256829e-06,
      "loss": 0.0079,
      "step": 2644040
    },
    {
      "epoch": 4.327062181287354,
      "grad_norm": 0.23718270659446716,
      "learning_rate": 1.3547439099121658e-06,
      "loss": 0.0124,
      "step": 2644060
    },
    {
      "epoch": 4.327094911726007,
      "grad_norm": 0.29632216691970825,
      "learning_rate": 1.3546780176986488e-06,
      "loss": 0.0089,
      "step": 2644080
    },
    {
      "epoch": 4.32712764216466,
      "grad_norm": 0.1551143229007721,
      "learning_rate": 1.3546121254851315e-06,
      "loss": 0.0128,
      "step": 2644100
    },
    {
      "epoch": 4.327160372603314,
      "grad_norm": 0.2064381092786789,
      "learning_rate": 1.3545462332716145e-06,
      "loss": 0.0106,
      "step": 2644120
    },
    {
      "epoch": 4.327193103041967,
      "grad_norm": 0.1451456993818283,
      "learning_rate": 1.3544803410580972e-06,
      "loss": 0.0111,
      "step": 2644140
    },
    {
      "epoch": 4.32722583348062,
      "grad_norm": 0.16430111229419708,
      "learning_rate": 1.3544144488445802e-06,
      "loss": 0.0098,
      "step": 2644160
    },
    {
      "epoch": 4.3272585639192735,
      "grad_norm": 0.09551916271448135,
      "learning_rate": 1.354348556631063e-06,
      "loss": 0.0092,
      "step": 2644180
    },
    {
      "epoch": 4.327291294357927,
      "grad_norm": 0.45912811160087585,
      "learning_rate": 1.3542826644175458e-06,
      "loss": 0.0123,
      "step": 2644200
    },
    {
      "epoch": 4.327324024796581,
      "grad_norm": 0.15680411458015442,
      "learning_rate": 1.3542167722040286e-06,
      "loss": 0.0088,
      "step": 2644220
    },
    {
      "epoch": 4.327356755235233,
      "grad_norm": 0.17521227896213531,
      "learning_rate": 1.3541508799905118e-06,
      "loss": 0.0069,
      "step": 2644240
    },
    {
      "epoch": 4.327389485673887,
      "grad_norm": 0.1404026448726654,
      "learning_rate": 1.3540849877769947e-06,
      "loss": 0.0083,
      "step": 2644260
    },
    {
      "epoch": 4.327422216112541,
      "grad_norm": 0.22237759828567505,
      "learning_rate": 1.3540190955634774e-06,
      "loss": 0.0091,
      "step": 2644280
    },
    {
      "epoch": 4.327454946551193,
      "grad_norm": 0.2620050311088562,
      "learning_rate": 1.3539532033499602e-06,
      "loss": 0.0087,
      "step": 2644300
    },
    {
      "epoch": 4.327487676989847,
      "grad_norm": 0.13118945062160492,
      "learning_rate": 1.3538873111364431e-06,
      "loss": 0.0079,
      "step": 2644320
    },
    {
      "epoch": 4.3275204074285005,
      "grad_norm": 0.0982956662774086,
      "learning_rate": 1.3538214189229259e-06,
      "loss": 0.0078,
      "step": 2644340
    },
    {
      "epoch": 4.327553137867154,
      "grad_norm": 0.0977378636598587,
      "learning_rate": 1.3537555267094088e-06,
      "loss": 0.0087,
      "step": 2644360
    },
    {
      "epoch": 4.327585868305807,
      "grad_norm": 0.6834329962730408,
      "learning_rate": 1.3536896344958916e-06,
      "loss": 0.0093,
      "step": 2644380
    },
    {
      "epoch": 4.32761859874446,
      "grad_norm": 0.271199494600296,
      "learning_rate": 1.3536237422823745e-06,
      "loss": 0.007,
      "step": 2644400
    },
    {
      "epoch": 4.327651329183114,
      "grad_norm": 0.6381902694702148,
      "learning_rate": 1.3535578500688577e-06,
      "loss": 0.0168,
      "step": 2644420
    },
    {
      "epoch": 4.327684059621767,
      "grad_norm": 0.205429345369339,
      "learning_rate": 1.3534919578553404e-06,
      "loss": 0.0165,
      "step": 2644440
    },
    {
      "epoch": 4.32771679006042,
      "grad_norm": 0.11829043924808502,
      "learning_rate": 1.3534260656418234e-06,
      "loss": 0.0078,
      "step": 2644460
    },
    {
      "epoch": 4.327749520499074,
      "grad_norm": 0.2467673420906067,
      "learning_rate": 1.3533601734283061e-06,
      "loss": 0.0109,
      "step": 2644480
    },
    {
      "epoch": 4.327782250937727,
      "grad_norm": 0.23955167829990387,
      "learning_rate": 1.3532942812147888e-06,
      "loss": 0.0081,
      "step": 2644500
    },
    {
      "epoch": 4.32781498137638,
      "grad_norm": 0.4507831931114197,
      "learning_rate": 1.3532283890012718e-06,
      "loss": 0.0126,
      "step": 2644520
    },
    {
      "epoch": 4.327847711815034,
      "grad_norm": 0.08411109447479248,
      "learning_rate": 1.3531624967877545e-06,
      "loss": 0.009,
      "step": 2644540
    },
    {
      "epoch": 4.327880442253687,
      "grad_norm": 0.09504342079162598,
      "learning_rate": 1.3530966045742375e-06,
      "loss": 0.0075,
      "step": 2644560
    },
    {
      "epoch": 4.32791317269234,
      "grad_norm": 0.18785841763019562,
      "learning_rate": 1.3530307123607202e-06,
      "loss": 0.0082,
      "step": 2644580
    },
    {
      "epoch": 4.327945903130994,
      "grad_norm": 0.3202352225780487,
      "learning_rate": 1.3529648201472034e-06,
      "loss": 0.0092,
      "step": 2644600
    },
    {
      "epoch": 4.327978633569647,
      "grad_norm": 0.17849482595920563,
      "learning_rate": 1.3528989279336863e-06,
      "loss": 0.0084,
      "step": 2644620
    },
    {
      "epoch": 4.328011364008301,
      "grad_norm": 0.15655837953090668,
      "learning_rate": 1.352833035720169e-06,
      "loss": 0.0066,
      "step": 2644640
    },
    {
      "epoch": 4.328044094446954,
      "grad_norm": 0.2686392068862915,
      "learning_rate": 1.352767143506652e-06,
      "loss": 0.0103,
      "step": 2644660
    },
    {
      "epoch": 4.328076824885607,
      "grad_norm": 0.37321317195892334,
      "learning_rate": 1.3527012512931348e-06,
      "loss": 0.0059,
      "step": 2644680
    },
    {
      "epoch": 4.328109555324261,
      "grad_norm": 0.16189274191856384,
      "learning_rate": 1.3526353590796177e-06,
      "loss": 0.0109,
      "step": 2644700
    },
    {
      "epoch": 4.3281422857629135,
      "grad_norm": 0.06641446053981781,
      "learning_rate": 1.3525694668661005e-06,
      "loss": 0.0083,
      "step": 2644720
    },
    {
      "epoch": 4.328175016201567,
      "grad_norm": 0.1207408607006073,
      "learning_rate": 1.3525035746525832e-06,
      "loss": 0.0076,
      "step": 2644740
    },
    {
      "epoch": 4.328207746640221,
      "grad_norm": 0.3285142183303833,
      "learning_rate": 1.3524376824390664e-06,
      "loss": 0.0073,
      "step": 2644760
    },
    {
      "epoch": 4.328240477078873,
      "grad_norm": 0.42545637488365173,
      "learning_rate": 1.3523717902255493e-06,
      "loss": 0.0108,
      "step": 2644780
    },
    {
      "epoch": 4.328273207517527,
      "grad_norm": 0.2197120040655136,
      "learning_rate": 1.352305898012032e-06,
      "loss": 0.0069,
      "step": 2644800
    },
    {
      "epoch": 4.328305937956181,
      "grad_norm": 0.44752562046051025,
      "learning_rate": 1.352240005798515e-06,
      "loss": 0.0137,
      "step": 2644820
    },
    {
      "epoch": 4.328338668394834,
      "grad_norm": 0.14875595271587372,
      "learning_rate": 1.3521741135849978e-06,
      "loss": 0.0063,
      "step": 2644840
    },
    {
      "epoch": 4.328371398833487,
      "grad_norm": 0.0572197362780571,
      "learning_rate": 1.3521082213714807e-06,
      "loss": 0.0065,
      "step": 2644860
    },
    {
      "epoch": 4.3284041292721405,
      "grad_norm": 0.27614742517471313,
      "learning_rate": 1.3520423291579634e-06,
      "loss": 0.0077,
      "step": 2644880
    },
    {
      "epoch": 4.328436859710794,
      "grad_norm": 0.46990931034088135,
      "learning_rate": 1.3519764369444464e-06,
      "loss": 0.0132,
      "step": 2644900
    },
    {
      "epoch": 4.328469590149448,
      "grad_norm": 0.09704186022281647,
      "learning_rate": 1.3519105447309291e-06,
      "loss": 0.0148,
      "step": 2644920
    },
    {
      "epoch": 4.3285023205881,
      "grad_norm": 0.10790839046239853,
      "learning_rate": 1.3518446525174123e-06,
      "loss": 0.0114,
      "step": 2644940
    },
    {
      "epoch": 4.328535051026754,
      "grad_norm": 0.2627246677875519,
      "learning_rate": 1.351778760303895e-06,
      "loss": 0.008,
      "step": 2644960
    },
    {
      "epoch": 4.3285677814654075,
      "grad_norm": 0.11042114347219467,
      "learning_rate": 1.351712868090378e-06,
      "loss": 0.0079,
      "step": 2644980
    },
    {
      "epoch": 4.32860051190406,
      "grad_norm": 0.20035701990127563,
      "learning_rate": 1.3516469758768607e-06,
      "loss": 0.0096,
      "step": 2645000
    },
    {
      "epoch": 4.328633242342714,
      "grad_norm": 0.6018006205558777,
      "learning_rate": 1.3515810836633437e-06,
      "loss": 0.0079,
      "step": 2645020
    },
    {
      "epoch": 4.328665972781367,
      "grad_norm": 0.11253579705953598,
      "learning_rate": 1.3515151914498264e-06,
      "loss": 0.0089,
      "step": 2645040
    },
    {
      "epoch": 4.32869870322002,
      "grad_norm": 0.13724206387996674,
      "learning_rate": 1.3514492992363094e-06,
      "loss": 0.006,
      "step": 2645060
    },
    {
      "epoch": 4.328731433658674,
      "grad_norm": 0.11878258734941483,
      "learning_rate": 1.3513834070227921e-06,
      "loss": 0.0092,
      "step": 2645080
    },
    {
      "epoch": 4.328764164097327,
      "grad_norm": 0.0995527058839798,
      "learning_rate": 1.351317514809275e-06,
      "loss": 0.0102,
      "step": 2645100
    },
    {
      "epoch": 4.328796894535981,
      "grad_norm": 0.30463266372680664,
      "learning_rate": 1.351251622595758e-06,
      "loss": 0.0182,
      "step": 2645120
    },
    {
      "epoch": 4.328829624974634,
      "grad_norm": 0.24050800502300262,
      "learning_rate": 1.351185730382241e-06,
      "loss": 0.0089,
      "step": 2645140
    },
    {
      "epoch": 4.328862355413287,
      "grad_norm": 0.2697768807411194,
      "learning_rate": 1.3511198381687237e-06,
      "loss": 0.0079,
      "step": 2645160
    },
    {
      "epoch": 4.328895085851941,
      "grad_norm": 0.16811099648475647,
      "learning_rate": 1.3510539459552067e-06,
      "loss": 0.0113,
      "step": 2645180
    },
    {
      "epoch": 4.3289278162905935,
      "grad_norm": 0.45464542508125305,
      "learning_rate": 1.3509880537416894e-06,
      "loss": 0.0068,
      "step": 2645200
    },
    {
      "epoch": 4.328960546729247,
      "grad_norm": 0.08081190288066864,
      "learning_rate": 1.3509221615281724e-06,
      "loss": 0.0081,
      "step": 2645220
    },
    {
      "epoch": 4.328993277167901,
      "grad_norm": 0.10903619974851608,
      "learning_rate": 1.350856269314655e-06,
      "loss": 0.0071,
      "step": 2645240
    },
    {
      "epoch": 4.329026007606554,
      "grad_norm": 0.14719967544078827,
      "learning_rate": 1.350790377101138e-06,
      "loss": 0.0094,
      "step": 2645260
    },
    {
      "epoch": 4.329058738045207,
      "grad_norm": 0.34344783425331116,
      "learning_rate": 1.3507244848876208e-06,
      "loss": 0.015,
      "step": 2645280
    },
    {
      "epoch": 4.329091468483861,
      "grad_norm": 0.2999991476535797,
      "learning_rate": 1.350658592674104e-06,
      "loss": 0.0098,
      "step": 2645300
    },
    {
      "epoch": 4.329124198922514,
      "grad_norm": 0.06039665639400482,
      "learning_rate": 1.3505927004605867e-06,
      "loss": 0.0096,
      "step": 2645320
    },
    {
      "epoch": 4.329156929361167,
      "grad_norm": 0.34515419602394104,
      "learning_rate": 1.3505268082470696e-06,
      "loss": 0.0079,
      "step": 2645340
    },
    {
      "epoch": 4.3291896597998205,
      "grad_norm": 0.20111995935440063,
      "learning_rate": 1.3504609160335524e-06,
      "loss": 0.0074,
      "step": 2645360
    },
    {
      "epoch": 4.329222390238474,
      "grad_norm": 0.6507815718650818,
      "learning_rate": 1.3503950238200353e-06,
      "loss": 0.013,
      "step": 2645380
    },
    {
      "epoch": 4.329255120677128,
      "grad_norm": 0.13401593267917633,
      "learning_rate": 1.350329131606518e-06,
      "loss": 0.0078,
      "step": 2645400
    },
    {
      "epoch": 4.32928785111578,
      "grad_norm": 2.3729875087738037,
      "learning_rate": 1.350263239393001e-06,
      "loss": 0.008,
      "step": 2645420
    },
    {
      "epoch": 4.329320581554434,
      "grad_norm": 0.3247526288032532,
      "learning_rate": 1.3501973471794838e-06,
      "loss": 0.0103,
      "step": 2645440
    },
    {
      "epoch": 4.329353311993088,
      "grad_norm": 0.1442420333623886,
      "learning_rate": 1.3501314549659667e-06,
      "loss": 0.0101,
      "step": 2645460
    },
    {
      "epoch": 4.32938604243174,
      "grad_norm": 0.3138825595378876,
      "learning_rate": 1.3500655627524497e-06,
      "loss": 0.0079,
      "step": 2645480
    },
    {
      "epoch": 4.329418772870394,
      "grad_norm": 0.2935122847557068,
      "learning_rate": 1.3499996705389326e-06,
      "loss": 0.0063,
      "step": 2645500
    },
    {
      "epoch": 4.3294515033090475,
      "grad_norm": 0.111166812479496,
      "learning_rate": 1.3499337783254154e-06,
      "loss": 0.0089,
      "step": 2645520
    },
    {
      "epoch": 4.329484233747701,
      "grad_norm": 0.38950955867767334,
      "learning_rate": 1.3498678861118983e-06,
      "loss": 0.0075,
      "step": 2645540
    },
    {
      "epoch": 4.329516964186354,
      "grad_norm": 0.11494900286197662,
      "learning_rate": 1.349801993898381e-06,
      "loss": 0.0071,
      "step": 2645560
    },
    {
      "epoch": 4.329549694625007,
      "grad_norm": 0.1597401350736618,
      "learning_rate": 1.349736101684864e-06,
      "loss": 0.0083,
      "step": 2645580
    },
    {
      "epoch": 4.329582425063661,
      "grad_norm": 0.15252307057380676,
      "learning_rate": 1.3496702094713467e-06,
      "loss": 0.0071,
      "step": 2645600
    },
    {
      "epoch": 4.329615155502314,
      "grad_norm": 0.39300331473350525,
      "learning_rate": 1.3496043172578297e-06,
      "loss": 0.0125,
      "step": 2645620
    },
    {
      "epoch": 4.329647885940967,
      "grad_norm": 0.32736432552337646,
      "learning_rate": 1.3495384250443129e-06,
      "loss": 0.0073,
      "step": 2645640
    },
    {
      "epoch": 4.329680616379621,
      "grad_norm": 0.14936430752277374,
      "learning_rate": 1.3494725328307956e-06,
      "loss": 0.0082,
      "step": 2645660
    },
    {
      "epoch": 4.3297133468182745,
      "grad_norm": 0.2972288429737091,
      "learning_rate": 1.3494066406172785e-06,
      "loss": 0.007,
      "step": 2645680
    },
    {
      "epoch": 4.329746077256927,
      "grad_norm": 0.2380729615688324,
      "learning_rate": 1.3493407484037613e-06,
      "loss": 0.0076,
      "step": 2645700
    },
    {
      "epoch": 4.329778807695581,
      "grad_norm": 0.13330326974391937,
      "learning_rate": 1.349274856190244e-06,
      "loss": 0.0137,
      "step": 2645720
    },
    {
      "epoch": 4.329811538134234,
      "grad_norm": 0.1015978530049324,
      "learning_rate": 1.349208963976727e-06,
      "loss": 0.0078,
      "step": 2645740
    },
    {
      "epoch": 4.329844268572887,
      "grad_norm": 0.11217129230499268,
      "learning_rate": 1.3491430717632097e-06,
      "loss": 0.0097,
      "step": 2645760
    },
    {
      "epoch": 4.329876999011541,
      "grad_norm": 0.36861681938171387,
      "learning_rate": 1.3490771795496927e-06,
      "loss": 0.0108,
      "step": 2645780
    },
    {
      "epoch": 4.329909729450194,
      "grad_norm": 0.07192590832710266,
      "learning_rate": 1.3490112873361754e-06,
      "loss": 0.0096,
      "step": 2645800
    },
    {
      "epoch": 4.329942459888848,
      "grad_norm": 0.26597070693969727,
      "learning_rate": 1.3489453951226586e-06,
      "loss": 0.0077,
      "step": 2645820
    },
    {
      "epoch": 4.329975190327501,
      "grad_norm": 0.20257188379764557,
      "learning_rate": 1.3488795029091415e-06,
      "loss": 0.0143,
      "step": 2645840
    },
    {
      "epoch": 4.330007920766154,
      "grad_norm": 0.05253675580024719,
      "learning_rate": 1.3488136106956243e-06,
      "loss": 0.0086,
      "step": 2645860
    },
    {
      "epoch": 4.330040651204808,
      "grad_norm": 0.24043422937393188,
      "learning_rate": 1.3487477184821072e-06,
      "loss": 0.0111,
      "step": 2645880
    },
    {
      "epoch": 4.3300733816434605,
      "grad_norm": 0.0735263004899025,
      "learning_rate": 1.34868182626859e-06,
      "loss": 0.0092,
      "step": 2645900
    },
    {
      "epoch": 4.330106112082114,
      "grad_norm": 0.08561215549707413,
      "learning_rate": 1.348615934055073e-06,
      "loss": 0.0072,
      "step": 2645920
    },
    {
      "epoch": 4.330138842520768,
      "grad_norm": 0.4183056652545929,
      "learning_rate": 1.3485500418415556e-06,
      "loss": 0.0131,
      "step": 2645940
    },
    {
      "epoch": 4.33017157295942,
      "grad_norm": 0.4289590120315552,
      "learning_rate": 1.3484841496280384e-06,
      "loss": 0.0084,
      "step": 2645960
    },
    {
      "epoch": 4.330204303398074,
      "grad_norm": 0.3025253713130951,
      "learning_rate": 1.3484182574145213e-06,
      "loss": 0.0115,
      "step": 2645980
    },
    {
      "epoch": 4.3302370338367275,
      "grad_norm": 0.30426838994026184,
      "learning_rate": 1.3483523652010045e-06,
      "loss": 0.0115,
      "step": 2646000
    },
    {
      "epoch": 4.330269764275381,
      "grad_norm": 0.5452393293380737,
      "learning_rate": 1.3482864729874872e-06,
      "loss": 0.0147,
      "step": 2646020
    },
    {
      "epoch": 4.330302494714034,
      "grad_norm": 0.32248756289482117,
      "learning_rate": 1.3482205807739702e-06,
      "loss": 0.0121,
      "step": 2646040
    },
    {
      "epoch": 4.330335225152687,
      "grad_norm": 0.33261027932167053,
      "learning_rate": 1.348154688560453e-06,
      "loss": 0.0106,
      "step": 2646060
    },
    {
      "epoch": 4.330367955591341,
      "grad_norm": 0.5377525687217712,
      "learning_rate": 1.3480887963469359e-06,
      "loss": 0.0101,
      "step": 2646080
    },
    {
      "epoch": 4.330400686029995,
      "grad_norm": 0.09960006177425385,
      "learning_rate": 1.3480229041334186e-06,
      "loss": 0.0064,
      "step": 2646100
    },
    {
      "epoch": 4.330433416468647,
      "grad_norm": 0.06893680989742279,
      "learning_rate": 1.3479570119199016e-06,
      "loss": 0.0099,
      "step": 2646120
    },
    {
      "epoch": 4.330466146907301,
      "grad_norm": 0.4084551930427551,
      "learning_rate": 1.3478911197063843e-06,
      "loss": 0.0103,
      "step": 2646140
    },
    {
      "epoch": 4.3304988773459545,
      "grad_norm": 0.45605918765068054,
      "learning_rate": 1.347825227492867e-06,
      "loss": 0.0094,
      "step": 2646160
    },
    {
      "epoch": 4.330531607784607,
      "grad_norm": 0.34068748354911804,
      "learning_rate": 1.3477593352793502e-06,
      "loss": 0.0093,
      "step": 2646180
    },
    {
      "epoch": 4.330564338223261,
      "grad_norm": 0.1758568286895752,
      "learning_rate": 1.3476934430658332e-06,
      "loss": 0.0057,
      "step": 2646200
    },
    {
      "epoch": 4.330597068661914,
      "grad_norm": 0.2205708920955658,
      "learning_rate": 1.347627550852316e-06,
      "loss": 0.0124,
      "step": 2646220
    },
    {
      "epoch": 4.330629799100567,
      "grad_norm": 0.06232447549700737,
      "learning_rate": 1.3475616586387989e-06,
      "loss": 0.0074,
      "step": 2646240
    },
    {
      "epoch": 4.330662529539221,
      "grad_norm": 0.21944303810596466,
      "learning_rate": 1.3474957664252816e-06,
      "loss": 0.0067,
      "step": 2646260
    },
    {
      "epoch": 4.330695259977874,
      "grad_norm": 0.4665617346763611,
      "learning_rate": 1.3474298742117645e-06,
      "loss": 0.0102,
      "step": 2646280
    },
    {
      "epoch": 4.330727990416528,
      "grad_norm": 0.27409347891807556,
      "learning_rate": 1.3473639819982473e-06,
      "loss": 0.0063,
      "step": 2646300
    },
    {
      "epoch": 4.330760720855181,
      "grad_norm": 0.14077065885066986,
      "learning_rate": 1.3472980897847302e-06,
      "loss": 0.007,
      "step": 2646320
    },
    {
      "epoch": 4.330793451293834,
      "grad_norm": 0.21232149004936218,
      "learning_rate": 1.347232197571213e-06,
      "loss": 0.0089,
      "step": 2646340
    },
    {
      "epoch": 4.330826181732488,
      "grad_norm": 0.1279313862323761,
      "learning_rate": 1.3471663053576961e-06,
      "loss": 0.0082,
      "step": 2646360
    },
    {
      "epoch": 4.330858912171141,
      "grad_norm": 1.309517741203308,
      "learning_rate": 1.3471004131441789e-06,
      "loss": 0.0116,
      "step": 2646380
    },
    {
      "epoch": 4.330891642609794,
      "grad_norm": 0.2117556631565094,
      "learning_rate": 1.3470345209306618e-06,
      "loss": 0.0083,
      "step": 2646400
    },
    {
      "epoch": 4.330924373048448,
      "grad_norm": 0.2524981200695038,
      "learning_rate": 1.3469686287171446e-06,
      "loss": 0.009,
      "step": 2646420
    },
    {
      "epoch": 4.330957103487101,
      "grad_norm": 0.36620450019836426,
      "learning_rate": 1.3469027365036275e-06,
      "loss": 0.0059,
      "step": 2646440
    },
    {
      "epoch": 4.330989833925754,
      "grad_norm": 0.19294947385787964,
      "learning_rate": 1.3468368442901103e-06,
      "loss": 0.0091,
      "step": 2646460
    },
    {
      "epoch": 4.331022564364408,
      "grad_norm": 0.23299145698547363,
      "learning_rate": 1.3467709520765932e-06,
      "loss": 0.012,
      "step": 2646480
    },
    {
      "epoch": 4.331055294803061,
      "grad_norm": 0.07998877018690109,
      "learning_rate": 1.346705059863076e-06,
      "loss": 0.0102,
      "step": 2646500
    },
    {
      "epoch": 4.331088025241714,
      "grad_norm": 0.3019804060459137,
      "learning_rate": 1.3466391676495591e-06,
      "loss": 0.0102,
      "step": 2646520
    },
    {
      "epoch": 4.3311207556803675,
      "grad_norm": 0.3082999289035797,
      "learning_rate": 1.3465732754360419e-06,
      "loss": 0.0068,
      "step": 2646540
    },
    {
      "epoch": 4.331153486119021,
      "grad_norm": 0.23593905568122864,
      "learning_rate": 1.3465073832225248e-06,
      "loss": 0.0086,
      "step": 2646560
    },
    {
      "epoch": 4.331186216557675,
      "grad_norm": 0.22516487538814545,
      "learning_rate": 1.3464414910090075e-06,
      "loss": 0.0103,
      "step": 2646580
    },
    {
      "epoch": 4.331218946996327,
      "grad_norm": 0.1862471103668213,
      "learning_rate": 1.3463755987954905e-06,
      "loss": 0.0079,
      "step": 2646600
    },
    {
      "epoch": 4.331251677434981,
      "grad_norm": 0.21696996688842773,
      "learning_rate": 1.3463097065819732e-06,
      "loss": 0.0145,
      "step": 2646620
    },
    {
      "epoch": 4.331284407873635,
      "grad_norm": 0.43257206678390503,
      "learning_rate": 1.3462438143684562e-06,
      "loss": 0.014,
      "step": 2646640
    },
    {
      "epoch": 4.331317138312287,
      "grad_norm": 0.18518763780593872,
      "learning_rate": 1.346177922154939e-06,
      "loss": 0.0101,
      "step": 2646660
    },
    {
      "epoch": 4.331349868750941,
      "grad_norm": 0.1506124585866928,
      "learning_rate": 1.3461120299414219e-06,
      "loss": 0.0089,
      "step": 2646680
    },
    {
      "epoch": 4.3313825991895945,
      "grad_norm": 0.5940453410148621,
      "learning_rate": 1.3460461377279048e-06,
      "loss": 0.0094,
      "step": 2646700
    },
    {
      "epoch": 4.331415329628248,
      "grad_norm": 0.3350251019001007,
      "learning_rate": 1.3459802455143878e-06,
      "loss": 0.0075,
      "step": 2646720
    },
    {
      "epoch": 4.331448060066901,
      "grad_norm": 0.2646643817424774,
      "learning_rate": 1.3459143533008705e-06,
      "loss": 0.0102,
      "step": 2646740
    },
    {
      "epoch": 4.331480790505554,
      "grad_norm": 0.10929659008979797,
      "learning_rate": 1.3458484610873535e-06,
      "loss": 0.006,
      "step": 2646760
    },
    {
      "epoch": 4.331513520944208,
      "grad_norm": 0.22488291561603546,
      "learning_rate": 1.3457825688738362e-06,
      "loss": 0.0086,
      "step": 2646780
    },
    {
      "epoch": 4.331546251382861,
      "grad_norm": 0.13025495409965515,
      "learning_rate": 1.3457166766603192e-06,
      "loss": 0.0109,
      "step": 2646800
    },
    {
      "epoch": 4.331578981821514,
      "grad_norm": 0.35457149147987366,
      "learning_rate": 1.345650784446802e-06,
      "loss": 0.0105,
      "step": 2646820
    },
    {
      "epoch": 4.331611712260168,
      "grad_norm": 0.42970383167266846,
      "learning_rate": 1.3455848922332849e-06,
      "loss": 0.0138,
      "step": 2646840
    },
    {
      "epoch": 4.331644442698821,
      "grad_norm": 0.11598078161478043,
      "learning_rate": 1.3455190000197676e-06,
      "loss": 0.0119,
      "step": 2646860
    },
    {
      "epoch": 4.331677173137474,
      "grad_norm": 0.39028462767601013,
      "learning_rate": 1.3454531078062508e-06,
      "loss": 0.0111,
      "step": 2646880
    },
    {
      "epoch": 4.331709903576128,
      "grad_norm": 0.20699435472488403,
      "learning_rate": 1.3453872155927337e-06,
      "loss": 0.0121,
      "step": 2646900
    },
    {
      "epoch": 4.331742634014781,
      "grad_norm": 0.3226875066757202,
      "learning_rate": 1.3453213233792165e-06,
      "loss": 0.0099,
      "step": 2646920
    },
    {
      "epoch": 4.331775364453434,
      "grad_norm": 0.2287016361951828,
      "learning_rate": 1.3452554311656992e-06,
      "loss": 0.0075,
      "step": 2646940
    },
    {
      "epoch": 4.331808094892088,
      "grad_norm": 0.3565830886363983,
      "learning_rate": 1.3451895389521821e-06,
      "loss": 0.0113,
      "step": 2646960
    },
    {
      "epoch": 4.331840825330741,
      "grad_norm": 0.7675545811653137,
      "learning_rate": 1.3451236467386649e-06,
      "loss": 0.012,
      "step": 2646980
    },
    {
      "epoch": 4.331873555769395,
      "grad_norm": 0.31780293583869934,
      "learning_rate": 1.3450577545251478e-06,
      "loss": 0.011,
      "step": 2647000
    },
    {
      "epoch": 4.3319062862080475,
      "grad_norm": 0.2299385815858841,
      "learning_rate": 1.3449918623116306e-06,
      "loss": 0.0082,
      "step": 2647020
    },
    {
      "epoch": 4.331939016646701,
      "grad_norm": 0.2143879383802414,
      "learning_rate": 1.3449259700981135e-06,
      "loss": 0.0071,
      "step": 2647040
    },
    {
      "epoch": 4.331971747085355,
      "grad_norm": 0.1780938357114792,
      "learning_rate": 1.3448600778845967e-06,
      "loss": 0.0078,
      "step": 2647060
    },
    {
      "epoch": 4.332004477524007,
      "grad_norm": 0.16840511560440063,
      "learning_rate": 1.3447941856710794e-06,
      "loss": 0.0088,
      "step": 2647080
    },
    {
      "epoch": 4.332037207962661,
      "grad_norm": 0.1495267003774643,
      "learning_rate": 1.3447282934575624e-06,
      "loss": 0.0076,
      "step": 2647100
    },
    {
      "epoch": 4.332069938401315,
      "grad_norm": 0.3011412024497986,
      "learning_rate": 1.3446624012440451e-06,
      "loss": 0.0071,
      "step": 2647120
    },
    {
      "epoch": 4.332102668839968,
      "grad_norm": 0.24161721765995026,
      "learning_rate": 1.3445965090305279e-06,
      "loss": 0.0067,
      "step": 2647140
    },
    {
      "epoch": 4.332135399278621,
      "grad_norm": 0.2925795912742615,
      "learning_rate": 1.3445306168170108e-06,
      "loss": 0.0056,
      "step": 2647160
    },
    {
      "epoch": 4.3321681297172745,
      "grad_norm": 0.36036956310272217,
      "learning_rate": 1.3444647246034936e-06,
      "loss": 0.0099,
      "step": 2647180
    },
    {
      "epoch": 4.332200860155928,
      "grad_norm": 0.3516834080219269,
      "learning_rate": 1.3443988323899765e-06,
      "loss": 0.0084,
      "step": 2647200
    },
    {
      "epoch": 4.332233590594581,
      "grad_norm": 0.25804403424263,
      "learning_rate": 1.3443329401764592e-06,
      "loss": 0.0099,
      "step": 2647220
    },
    {
      "epoch": 4.332266321033234,
      "grad_norm": 0.15609236061573029,
      "learning_rate": 1.3442670479629424e-06,
      "loss": 0.0111,
      "step": 2647240
    },
    {
      "epoch": 4.332299051471888,
      "grad_norm": 0.24608460068702698,
      "learning_rate": 1.3442011557494254e-06,
      "loss": 0.0109,
      "step": 2647260
    },
    {
      "epoch": 4.332331781910542,
      "grad_norm": 0.49146518111228943,
      "learning_rate": 1.344135263535908e-06,
      "loss": 0.0107,
      "step": 2647280
    },
    {
      "epoch": 4.332364512349194,
      "grad_norm": 0.2096172422170639,
      "learning_rate": 1.344069371322391e-06,
      "loss": 0.0099,
      "step": 2647300
    },
    {
      "epoch": 4.332397242787848,
      "grad_norm": 0.08349289000034332,
      "learning_rate": 1.3440034791088738e-06,
      "loss": 0.0067,
      "step": 2647320
    },
    {
      "epoch": 4.3324299732265015,
      "grad_norm": 0.13667123019695282,
      "learning_rate": 1.3439375868953567e-06,
      "loss": 0.0099,
      "step": 2647340
    },
    {
      "epoch": 4.332462703665154,
      "grad_norm": 0.21385188400745392,
      "learning_rate": 1.3438716946818395e-06,
      "loss": 0.0106,
      "step": 2647360
    },
    {
      "epoch": 4.332495434103808,
      "grad_norm": 0.14948780834674835,
      "learning_rate": 1.3438058024683222e-06,
      "loss": 0.0066,
      "step": 2647380
    },
    {
      "epoch": 4.332528164542461,
      "grad_norm": 0.07071106880903244,
      "learning_rate": 1.3437399102548054e-06,
      "loss": 0.0068,
      "step": 2647400
    },
    {
      "epoch": 4.332560894981114,
      "grad_norm": 0.47731447219848633,
      "learning_rate": 1.3436740180412883e-06,
      "loss": 0.0071,
      "step": 2647420
    },
    {
      "epoch": 4.332593625419768,
      "grad_norm": 0.12891125679016113,
      "learning_rate": 1.343608125827771e-06,
      "loss": 0.0059,
      "step": 2647440
    },
    {
      "epoch": 4.332626355858421,
      "grad_norm": 0.20397932827472687,
      "learning_rate": 1.343542233614254e-06,
      "loss": 0.0072,
      "step": 2647460
    },
    {
      "epoch": 4.332659086297075,
      "grad_norm": 0.13913056254386902,
      "learning_rate": 1.3434763414007368e-06,
      "loss": 0.0115,
      "step": 2647480
    },
    {
      "epoch": 4.332691816735728,
      "grad_norm": 0.22429072856903076,
      "learning_rate": 1.3434104491872197e-06,
      "loss": 0.013,
      "step": 2647500
    },
    {
      "epoch": 4.332724547174381,
      "grad_norm": 0.17326435446739197,
      "learning_rate": 1.3433445569737025e-06,
      "loss": 0.0115,
      "step": 2647520
    },
    {
      "epoch": 4.332757277613035,
      "grad_norm": 0.25236549973487854,
      "learning_rate": 1.3432786647601854e-06,
      "loss": 0.0067,
      "step": 2647540
    },
    {
      "epoch": 4.332790008051688,
      "grad_norm": 0.09942176192998886,
      "learning_rate": 1.3432127725466681e-06,
      "loss": 0.0144,
      "step": 2647560
    },
    {
      "epoch": 4.332822738490341,
      "grad_norm": 0.19079020619392395,
      "learning_rate": 1.3431468803331513e-06,
      "loss": 0.0075,
      "step": 2647580
    },
    {
      "epoch": 4.332855468928995,
      "grad_norm": 0.08841685950756073,
      "learning_rate": 1.343080988119634e-06,
      "loss": 0.0054,
      "step": 2647600
    },
    {
      "epoch": 4.332888199367648,
      "grad_norm": 0.16525524854660034,
      "learning_rate": 1.343015095906117e-06,
      "loss": 0.0092,
      "step": 2647620
    },
    {
      "epoch": 4.332920929806301,
      "grad_norm": 0.1456121802330017,
      "learning_rate": 1.3429492036925997e-06,
      "loss": 0.0058,
      "step": 2647640
    },
    {
      "epoch": 4.3329536602449545,
      "grad_norm": 0.41626089811325073,
      "learning_rate": 1.3428833114790827e-06,
      "loss": 0.0117,
      "step": 2647660
    },
    {
      "epoch": 4.332986390683608,
      "grad_norm": 0.1394147276878357,
      "learning_rate": 1.3428174192655654e-06,
      "loss": 0.0088,
      "step": 2647680
    },
    {
      "epoch": 4.333019121122261,
      "grad_norm": 0.32390540838241577,
      "learning_rate": 1.3427515270520484e-06,
      "loss": 0.0075,
      "step": 2647700
    },
    {
      "epoch": 4.333051851560914,
      "grad_norm": 0.17527785897254944,
      "learning_rate": 1.3426856348385311e-06,
      "loss": 0.0077,
      "step": 2647720
    },
    {
      "epoch": 4.333084581999568,
      "grad_norm": 0.1778860092163086,
      "learning_rate": 1.342619742625014e-06,
      "loss": 0.0157,
      "step": 2647740
    },
    {
      "epoch": 4.333117312438222,
      "grad_norm": 0.1257351040840149,
      "learning_rate": 1.342553850411497e-06,
      "loss": 0.0064,
      "step": 2647760
    },
    {
      "epoch": 4.333150042876874,
      "grad_norm": 0.3178040683269501,
      "learning_rate": 1.34248795819798e-06,
      "loss": 0.0088,
      "step": 2647780
    },
    {
      "epoch": 4.333182773315528,
      "grad_norm": 0.14486315846443176,
      "learning_rate": 1.3424220659844627e-06,
      "loss": 0.009,
      "step": 2647800
    },
    {
      "epoch": 4.3332155037541815,
      "grad_norm": 0.28249430656433105,
      "learning_rate": 1.3423561737709457e-06,
      "loss": 0.0108,
      "step": 2647820
    },
    {
      "epoch": 4.333248234192835,
      "grad_norm": 0.08416550606489182,
      "learning_rate": 1.3422902815574284e-06,
      "loss": 0.0051,
      "step": 2647840
    },
    {
      "epoch": 4.333280964631488,
      "grad_norm": 0.18326988816261292,
      "learning_rate": 1.3422243893439114e-06,
      "loss": 0.0148,
      "step": 2647860
    },
    {
      "epoch": 4.333313695070141,
      "grad_norm": 0.3931366801261902,
      "learning_rate": 1.342158497130394e-06,
      "loss": 0.0123,
      "step": 2647880
    },
    {
      "epoch": 4.333346425508795,
      "grad_norm": 0.12847863137722015,
      "learning_rate": 1.342092604916877e-06,
      "loss": 0.0103,
      "step": 2647900
    },
    {
      "epoch": 4.333379155947448,
      "grad_norm": 0.14422719180583954,
      "learning_rate": 1.3420267127033598e-06,
      "loss": 0.0088,
      "step": 2647920
    },
    {
      "epoch": 4.333411886386101,
      "grad_norm": 0.19832006096839905,
      "learning_rate": 1.341960820489843e-06,
      "loss": 0.0093,
      "step": 2647940
    },
    {
      "epoch": 4.333444616824755,
      "grad_norm": 0.16000334918498993,
      "learning_rate": 1.3418949282763257e-06,
      "loss": 0.0091,
      "step": 2647960
    },
    {
      "epoch": 4.333477347263408,
      "grad_norm": 0.1469813734292984,
      "learning_rate": 1.3418290360628086e-06,
      "loss": 0.0088,
      "step": 2647980
    },
    {
      "epoch": 4.333510077702061,
      "grad_norm": 0.0717206597328186,
      "learning_rate": 1.3417631438492914e-06,
      "loss": 0.0075,
      "step": 2648000
    },
    {
      "epoch": 4.333542808140715,
      "grad_norm": 0.2934105396270752,
      "learning_rate": 1.3416972516357743e-06,
      "loss": 0.0075,
      "step": 2648020
    },
    {
      "epoch": 4.333575538579368,
      "grad_norm": 0.41346192359924316,
      "learning_rate": 1.341631359422257e-06,
      "loss": 0.0096,
      "step": 2648040
    },
    {
      "epoch": 4.333608269018021,
      "grad_norm": 0.10175254195928574,
      "learning_rate": 1.34156546720874e-06,
      "loss": 0.0089,
      "step": 2648060
    },
    {
      "epoch": 4.333640999456675,
      "grad_norm": 0.05677688121795654,
      "learning_rate": 1.3414995749952228e-06,
      "loss": 0.0079,
      "step": 2648080
    },
    {
      "epoch": 4.333673729895328,
      "grad_norm": 0.1563078761100769,
      "learning_rate": 1.3414336827817057e-06,
      "loss": 0.0088,
      "step": 2648100
    },
    {
      "epoch": 4.333706460333981,
      "grad_norm": 0.5309476852416992,
      "learning_rate": 1.3413677905681887e-06,
      "loss": 0.0089,
      "step": 2648120
    },
    {
      "epoch": 4.333739190772635,
      "grad_norm": 0.5393126606941223,
      "learning_rate": 1.3413018983546716e-06,
      "loss": 0.0113,
      "step": 2648140
    },
    {
      "epoch": 4.333771921211288,
      "grad_norm": 0.31740090250968933,
      "learning_rate": 1.3412360061411544e-06,
      "loss": 0.0085,
      "step": 2648160
    },
    {
      "epoch": 4.333804651649942,
      "grad_norm": 0.13506685197353363,
      "learning_rate": 1.3411701139276373e-06,
      "loss": 0.0071,
      "step": 2648180
    },
    {
      "epoch": 4.3338373820885945,
      "grad_norm": 0.06941203027963638,
      "learning_rate": 1.34110422171412e-06,
      "loss": 0.0093,
      "step": 2648200
    },
    {
      "epoch": 4.333870112527248,
      "grad_norm": 0.24120472371578217,
      "learning_rate": 1.341038329500603e-06,
      "loss": 0.0061,
      "step": 2648220
    },
    {
      "epoch": 4.333902842965902,
      "grad_norm": 0.1854948103427887,
      "learning_rate": 1.3409724372870857e-06,
      "loss": 0.0104,
      "step": 2648240
    },
    {
      "epoch": 4.333935573404554,
      "grad_norm": 0.2546674907207489,
      "learning_rate": 1.3409065450735687e-06,
      "loss": 0.0107,
      "step": 2648260
    },
    {
      "epoch": 4.333968303843208,
      "grad_norm": 0.3516649305820465,
      "learning_rate": 1.3408406528600519e-06,
      "loss": 0.0094,
      "step": 2648280
    },
    {
      "epoch": 4.334001034281862,
      "grad_norm": 0.058982379734516144,
      "learning_rate": 1.3407747606465346e-06,
      "loss": 0.0063,
      "step": 2648300
    },
    {
      "epoch": 4.334033764720515,
      "grad_norm": 0.18187715113162994,
      "learning_rate": 1.3407088684330176e-06,
      "loss": 0.0071,
      "step": 2648320
    },
    {
      "epoch": 4.334066495159168,
      "grad_norm": 0.3716220557689667,
      "learning_rate": 1.3406429762195003e-06,
      "loss": 0.0102,
      "step": 2648340
    },
    {
      "epoch": 4.3340992255978215,
      "grad_norm": 0.15531831979751587,
      "learning_rate": 1.340577084005983e-06,
      "loss": 0.008,
      "step": 2648360
    },
    {
      "epoch": 4.334131956036475,
      "grad_norm": 0.5414766073226929,
      "learning_rate": 1.340511191792466e-06,
      "loss": 0.0103,
      "step": 2648380
    },
    {
      "epoch": 4.334164686475128,
      "grad_norm": 0.20496580004692078,
      "learning_rate": 1.3404452995789487e-06,
      "loss": 0.0154,
      "step": 2648400
    },
    {
      "epoch": 4.334197416913781,
      "grad_norm": 0.2562382221221924,
      "learning_rate": 1.3403794073654317e-06,
      "loss": 0.01,
      "step": 2648420
    },
    {
      "epoch": 4.334230147352435,
      "grad_norm": 0.15775679051876068,
      "learning_rate": 1.3403135151519144e-06,
      "loss": 0.0097,
      "step": 2648440
    },
    {
      "epoch": 4.3342628777910885,
      "grad_norm": 0.13078545033931732,
      "learning_rate": 1.3402476229383976e-06,
      "loss": 0.0087,
      "step": 2648460
    },
    {
      "epoch": 4.334295608229741,
      "grad_norm": 0.12475097924470901,
      "learning_rate": 1.3401817307248805e-06,
      "loss": 0.0083,
      "step": 2648480
    },
    {
      "epoch": 4.334328338668395,
      "grad_norm": 0.4813344478607178,
      "learning_rate": 1.3401158385113633e-06,
      "loss": 0.0066,
      "step": 2648500
    },
    {
      "epoch": 4.334361069107048,
      "grad_norm": 0.3861619532108307,
      "learning_rate": 1.3400499462978462e-06,
      "loss": 0.0079,
      "step": 2648520
    },
    {
      "epoch": 4.334393799545701,
      "grad_norm": 0.16495555639266968,
      "learning_rate": 1.339984054084329e-06,
      "loss": 0.0085,
      "step": 2648540
    },
    {
      "epoch": 4.334426529984355,
      "grad_norm": 0.20491883158683777,
      "learning_rate": 1.339918161870812e-06,
      "loss": 0.0053,
      "step": 2648560
    },
    {
      "epoch": 4.334459260423008,
      "grad_norm": 0.1204332485795021,
      "learning_rate": 1.3398522696572947e-06,
      "loss": 0.0109,
      "step": 2648580
    },
    {
      "epoch": 4.334491990861662,
      "grad_norm": 0.20863360166549683,
      "learning_rate": 1.3397863774437774e-06,
      "loss": 0.0079,
      "step": 2648600
    },
    {
      "epoch": 4.334524721300315,
      "grad_norm": 0.5486417412757874,
      "learning_rate": 1.3397204852302603e-06,
      "loss": 0.0085,
      "step": 2648620
    },
    {
      "epoch": 4.334557451738968,
      "grad_norm": 0.1840437948703766,
      "learning_rate": 1.3396545930167435e-06,
      "loss": 0.0079,
      "step": 2648640
    },
    {
      "epoch": 4.334590182177622,
      "grad_norm": 0.30800339579582214,
      "learning_rate": 1.3395887008032262e-06,
      "loss": 0.0083,
      "step": 2648660
    },
    {
      "epoch": 4.3346229126162745,
      "grad_norm": 0.17707718908786774,
      "learning_rate": 1.3395228085897092e-06,
      "loss": 0.0075,
      "step": 2648680
    },
    {
      "epoch": 4.334655643054928,
      "grad_norm": 0.2389179915189743,
      "learning_rate": 1.339456916376192e-06,
      "loss": 0.0094,
      "step": 2648700
    },
    {
      "epoch": 4.334688373493582,
      "grad_norm": 0.22495537996292114,
      "learning_rate": 1.3393910241626749e-06,
      "loss": 0.0088,
      "step": 2648720
    },
    {
      "epoch": 4.334721103932235,
      "grad_norm": 0.1050058901309967,
      "learning_rate": 1.3393251319491576e-06,
      "loss": 0.0064,
      "step": 2648740
    },
    {
      "epoch": 4.334753834370888,
      "grad_norm": 0.137791246175766,
      "learning_rate": 1.3392592397356406e-06,
      "loss": 0.0055,
      "step": 2648760
    },
    {
      "epoch": 4.334786564809542,
      "grad_norm": 0.09209803491830826,
      "learning_rate": 1.3391933475221233e-06,
      "loss": 0.0188,
      "step": 2648780
    },
    {
      "epoch": 4.334819295248195,
      "grad_norm": 0.3980531692504883,
      "learning_rate": 1.339127455308606e-06,
      "loss": 0.0095,
      "step": 2648800
    },
    {
      "epoch": 4.334852025686848,
      "grad_norm": 0.0903560072183609,
      "learning_rate": 1.3390615630950892e-06,
      "loss": 0.0061,
      "step": 2648820
    },
    {
      "epoch": 4.3348847561255015,
      "grad_norm": 0.09019642323255539,
      "learning_rate": 1.3389956708815722e-06,
      "loss": 0.0103,
      "step": 2648840
    },
    {
      "epoch": 4.334917486564155,
      "grad_norm": 0.14924779534339905,
      "learning_rate": 1.338929778668055e-06,
      "loss": 0.0076,
      "step": 2648860
    },
    {
      "epoch": 4.334950217002808,
      "grad_norm": 0.28917303681373596,
      "learning_rate": 1.3388638864545379e-06,
      "loss": 0.0145,
      "step": 2648880
    },
    {
      "epoch": 4.334982947441461,
      "grad_norm": 0.14967060089111328,
      "learning_rate": 1.3387979942410206e-06,
      "loss": 0.011,
      "step": 2648900
    },
    {
      "epoch": 4.335015677880115,
      "grad_norm": 0.7256864905357361,
      "learning_rate": 1.3387321020275036e-06,
      "loss": 0.0133,
      "step": 2648920
    },
    {
      "epoch": 4.335048408318769,
      "grad_norm": 0.4011385440826416,
      "learning_rate": 1.3386662098139863e-06,
      "loss": 0.0088,
      "step": 2648940
    },
    {
      "epoch": 4.335081138757421,
      "grad_norm": 0.17727839946746826,
      "learning_rate": 1.3386003176004692e-06,
      "loss": 0.0122,
      "step": 2648960
    },
    {
      "epoch": 4.335113869196075,
      "grad_norm": 0.42489197850227356,
      "learning_rate": 1.338534425386952e-06,
      "loss": 0.0103,
      "step": 2648980
    },
    {
      "epoch": 4.3351465996347285,
      "grad_norm": 0.13089978694915771,
      "learning_rate": 1.3384685331734352e-06,
      "loss": 0.0084,
      "step": 2649000
    },
    {
      "epoch": 4.335179330073382,
      "grad_norm": 0.20191077888011932,
      "learning_rate": 1.3384026409599179e-06,
      "loss": 0.0081,
      "step": 2649020
    },
    {
      "epoch": 4.335212060512035,
      "grad_norm": 0.16094015538692474,
      "learning_rate": 1.3383367487464008e-06,
      "loss": 0.0113,
      "step": 2649040
    },
    {
      "epoch": 4.335244790950688,
      "grad_norm": 0.28381088376045227,
      "learning_rate": 1.3382708565328836e-06,
      "loss": 0.0106,
      "step": 2649060
    },
    {
      "epoch": 4.335277521389342,
      "grad_norm": 0.22637006640434265,
      "learning_rate": 1.3382049643193665e-06,
      "loss": 0.0088,
      "step": 2649080
    },
    {
      "epoch": 4.335310251827995,
      "grad_norm": 0.2833192050457001,
      "learning_rate": 1.3381390721058493e-06,
      "loss": 0.0084,
      "step": 2649100
    },
    {
      "epoch": 4.335342982266648,
      "grad_norm": 0.42623579502105713,
      "learning_rate": 1.3380731798923322e-06,
      "loss": 0.009,
      "step": 2649120
    },
    {
      "epoch": 4.335375712705302,
      "grad_norm": 0.026001309975981712,
      "learning_rate": 1.338007287678815e-06,
      "loss": 0.0076,
      "step": 2649140
    },
    {
      "epoch": 4.335408443143955,
      "grad_norm": 0.12390082329511642,
      "learning_rate": 1.3379413954652981e-06,
      "loss": 0.0074,
      "step": 2649160
    },
    {
      "epoch": 4.335441173582608,
      "grad_norm": 0.21105867624282837,
      "learning_rate": 1.3378755032517809e-06,
      "loss": 0.0092,
      "step": 2649180
    },
    {
      "epoch": 4.335473904021262,
      "grad_norm": 0.07035619765520096,
      "learning_rate": 1.3378096110382638e-06,
      "loss": 0.011,
      "step": 2649200
    },
    {
      "epoch": 4.335506634459915,
      "grad_norm": 0.1929466426372528,
      "learning_rate": 1.3377437188247466e-06,
      "loss": 0.0092,
      "step": 2649220
    },
    {
      "epoch": 4.335539364898568,
      "grad_norm": 0.12069085985422134,
      "learning_rate": 1.3376778266112295e-06,
      "loss": 0.0066,
      "step": 2649240
    },
    {
      "epoch": 4.335572095337222,
      "grad_norm": 0.13196013867855072,
      "learning_rate": 1.3376119343977122e-06,
      "loss": 0.0082,
      "step": 2649260
    },
    {
      "epoch": 4.335604825775875,
      "grad_norm": 0.19836881756782532,
      "learning_rate": 1.3375460421841952e-06,
      "loss": 0.0074,
      "step": 2649280
    },
    {
      "epoch": 4.335637556214529,
      "grad_norm": 0.44576770067214966,
      "learning_rate": 1.337480149970678e-06,
      "loss": 0.0066,
      "step": 2649300
    },
    {
      "epoch": 4.3356702866531815,
      "grad_norm": 0.38301602005958557,
      "learning_rate": 1.3374142577571609e-06,
      "loss": 0.0102,
      "step": 2649320
    },
    {
      "epoch": 4.335703017091835,
      "grad_norm": 0.18583731353282928,
      "learning_rate": 1.3373483655436438e-06,
      "loss": 0.0133,
      "step": 2649340
    },
    {
      "epoch": 4.335735747530489,
      "grad_norm": 0.1544712483882904,
      "learning_rate": 1.3372824733301268e-06,
      "loss": 0.0095,
      "step": 2649360
    },
    {
      "epoch": 4.335768477969141,
      "grad_norm": 0.05478117614984512,
      "learning_rate": 1.3372165811166095e-06,
      "loss": 0.0074,
      "step": 2649380
    },
    {
      "epoch": 4.335801208407795,
      "grad_norm": 0.2510063052177429,
      "learning_rate": 1.3371506889030925e-06,
      "loss": 0.0086,
      "step": 2649400
    },
    {
      "epoch": 4.335833938846449,
      "grad_norm": 0.37396523356437683,
      "learning_rate": 1.3370847966895752e-06,
      "loss": 0.0068,
      "step": 2649420
    },
    {
      "epoch": 4.335866669285101,
      "grad_norm": 0.06794410198926926,
      "learning_rate": 1.3370189044760582e-06,
      "loss": 0.0069,
      "step": 2649440
    },
    {
      "epoch": 4.335899399723755,
      "grad_norm": 0.1268257200717926,
      "learning_rate": 1.336953012262541e-06,
      "loss": 0.0105,
      "step": 2649460
    },
    {
      "epoch": 4.3359321301624085,
      "grad_norm": 0.7064073085784912,
      "learning_rate": 1.3368871200490239e-06,
      "loss": 0.0118,
      "step": 2649480
    },
    {
      "epoch": 4.335964860601062,
      "grad_norm": 0.24059249460697174,
      "learning_rate": 1.3368212278355066e-06,
      "loss": 0.0087,
      "step": 2649500
    },
    {
      "epoch": 4.335997591039715,
      "grad_norm": 0.39056453108787537,
      "learning_rate": 1.3367553356219898e-06,
      "loss": 0.0084,
      "step": 2649520
    },
    {
      "epoch": 4.336030321478368,
      "grad_norm": 0.2365145981311798,
      "learning_rate": 1.3366894434084727e-06,
      "loss": 0.0106,
      "step": 2649540
    },
    {
      "epoch": 4.336063051917022,
      "grad_norm": 0.4762071967124939,
      "learning_rate": 1.3366235511949555e-06,
      "loss": 0.009,
      "step": 2649560
    },
    {
      "epoch": 4.336095782355675,
      "grad_norm": 0.28578001260757446,
      "learning_rate": 1.3365576589814382e-06,
      "loss": 0.0091,
      "step": 2649580
    },
    {
      "epoch": 4.336128512794328,
      "grad_norm": 0.08107783645391464,
      "learning_rate": 1.3364917667679212e-06,
      "loss": 0.0053,
      "step": 2649600
    },
    {
      "epoch": 4.336161243232982,
      "grad_norm": 0.1275886595249176,
      "learning_rate": 1.3364258745544039e-06,
      "loss": 0.0102,
      "step": 2649620
    },
    {
      "epoch": 4.3361939736716355,
      "grad_norm": 0.23310799896717072,
      "learning_rate": 1.3363599823408868e-06,
      "loss": 0.0085,
      "step": 2649640
    },
    {
      "epoch": 4.336226704110288,
      "grad_norm": 0.2542467415332794,
      "learning_rate": 1.3362940901273696e-06,
      "loss": 0.0063,
      "step": 2649660
    },
    {
      "epoch": 4.336259434548942,
      "grad_norm": 0.38335657119750977,
      "learning_rate": 1.3362281979138525e-06,
      "loss": 0.0056,
      "step": 2649680
    },
    {
      "epoch": 4.336292164987595,
      "grad_norm": 0.386821448802948,
      "learning_rate": 1.3361623057003357e-06,
      "loss": 0.0081,
      "step": 2649700
    },
    {
      "epoch": 4.336324895426248,
      "grad_norm": 0.09602554887533188,
      "learning_rate": 1.3360964134868184e-06,
      "loss": 0.0092,
      "step": 2649720
    },
    {
      "epoch": 4.336357625864902,
      "grad_norm": 0.2195776402950287,
      "learning_rate": 1.3360305212733014e-06,
      "loss": 0.0094,
      "step": 2649740
    },
    {
      "epoch": 4.336390356303555,
      "grad_norm": 0.3475624620914459,
      "learning_rate": 1.3359646290597841e-06,
      "loss": 0.0129,
      "step": 2649760
    },
    {
      "epoch": 4.336423086742209,
      "grad_norm": 0.14074969291687012,
      "learning_rate": 1.3358987368462669e-06,
      "loss": 0.0085,
      "step": 2649780
    },
    {
      "epoch": 4.336455817180862,
      "grad_norm": 0.607252299785614,
      "learning_rate": 1.3358328446327498e-06,
      "loss": 0.0093,
      "step": 2649800
    },
    {
      "epoch": 4.336488547619515,
      "grad_norm": 0.5457423329353333,
      "learning_rate": 1.3357669524192326e-06,
      "loss": 0.0076,
      "step": 2649820
    },
    {
      "epoch": 4.336521278058169,
      "grad_norm": 0.2290368527173996,
      "learning_rate": 1.3357010602057155e-06,
      "loss": 0.008,
      "step": 2649840
    },
    {
      "epoch": 4.3365540084968215,
      "grad_norm": 0.20686352252960205,
      "learning_rate": 1.3356351679921983e-06,
      "loss": 0.0082,
      "step": 2649860
    },
    {
      "epoch": 4.336586738935475,
      "grad_norm": 0.05812897905707359,
      "learning_rate": 1.3355692757786814e-06,
      "loss": 0.0059,
      "step": 2649880
    },
    {
      "epoch": 4.336619469374129,
      "grad_norm": 0.4426916241645813,
      "learning_rate": 1.3355033835651644e-06,
      "loss": 0.0096,
      "step": 2649900
    },
    {
      "epoch": 4.336652199812782,
      "grad_norm": 0.8129120469093323,
      "learning_rate": 1.3354374913516471e-06,
      "loss": 0.009,
      "step": 2649920
    },
    {
      "epoch": 4.336684930251435,
      "grad_norm": 0.16757093369960785,
      "learning_rate": 1.33537159913813e-06,
      "loss": 0.0108,
      "step": 2649940
    },
    {
      "epoch": 4.336717660690089,
      "grad_norm": 0.15682439506053925,
      "learning_rate": 1.3353057069246128e-06,
      "loss": 0.0069,
      "step": 2649960
    },
    {
      "epoch": 4.336750391128742,
      "grad_norm": 0.1365128606557846,
      "learning_rate": 1.3352398147110958e-06,
      "loss": 0.0089,
      "step": 2649980
    },
    {
      "epoch": 4.336783121567395,
      "grad_norm": 0.2737486660480499,
      "learning_rate": 1.3351739224975785e-06,
      "loss": 0.0082,
      "step": 2650000
    },
    {
      "epoch": 4.336783121567395,
      "eval_loss": 0.0059598954394459724,
      "eval_runtime": 6458.0435,
      "eval_samples_per_second": 159.159,
      "eval_steps_per_second": 15.916,
      "eval_sts-dev_pearson_cosine": 0.9865275371024901,
      "eval_sts-dev_spearman_cosine": 0.89661251064595,
      "step": 2650000
    },
    {
      "epoch": 4.3368158520060485,
      "grad_norm": 0.33374056220054626,
      "learning_rate": 1.3351080302840612e-06,
      "loss": 0.0136,
      "step": 2650020
    },
    {
      "epoch": 4.336848582444702,
      "grad_norm": 0.20647215843200684,
      "learning_rate": 1.3350421380705444e-06,
      "loss": 0.012,
      "step": 2650040
    },
    {
      "epoch": 4.336881312883356,
      "grad_norm": 0.07828110456466675,
      "learning_rate": 1.3349762458570273e-06,
      "loss": 0.0075,
      "step": 2650060
    },
    {
      "epoch": 4.336914043322008,
      "grad_norm": 0.40467968583106995,
      "learning_rate": 1.33491035364351e-06,
      "loss": 0.0083,
      "step": 2650080
    },
    {
      "epoch": 4.336946773760662,
      "grad_norm": 0.18833911418914795,
      "learning_rate": 1.334844461429993e-06,
      "loss": 0.0072,
      "step": 2650100
    },
    {
      "epoch": 4.3369795041993155,
      "grad_norm": 0.29669350385665894,
      "learning_rate": 1.3347785692164758e-06,
      "loss": 0.012,
      "step": 2650120
    },
    {
      "epoch": 4.337012234637968,
      "grad_norm": 0.11897400766611099,
      "learning_rate": 1.3347126770029587e-06,
      "loss": 0.0081,
      "step": 2650140
    },
    {
      "epoch": 4.337044965076622,
      "grad_norm": 0.37849169969558716,
      "learning_rate": 1.3346467847894415e-06,
      "loss": 0.0086,
      "step": 2650160
    },
    {
      "epoch": 4.337077695515275,
      "grad_norm": 0.3146859109401703,
      "learning_rate": 1.3345808925759244e-06,
      "loss": 0.0063,
      "step": 2650180
    },
    {
      "epoch": 4.337110425953929,
      "grad_norm": 0.11841592192649841,
      "learning_rate": 1.3345150003624072e-06,
      "loss": 0.0107,
      "step": 2650200
    },
    {
      "epoch": 4.337143156392582,
      "grad_norm": 0.6295311450958252,
      "learning_rate": 1.3344491081488903e-06,
      "loss": 0.0095,
      "step": 2650220
    },
    {
      "epoch": 4.337175886831235,
      "grad_norm": 0.11637365072965622,
      "learning_rate": 1.334383215935373e-06,
      "loss": 0.0116,
      "step": 2650240
    },
    {
      "epoch": 4.337208617269889,
      "grad_norm": 0.30650997161865234,
      "learning_rate": 1.334317323721856e-06,
      "loss": 0.007,
      "step": 2650260
    },
    {
      "epoch": 4.337241347708542,
      "grad_norm": 0.11481792479753494,
      "learning_rate": 1.3342514315083388e-06,
      "loss": 0.0076,
      "step": 2650280
    },
    {
      "epoch": 4.337274078147195,
      "grad_norm": 0.2953457832336426,
      "learning_rate": 1.3341855392948217e-06,
      "loss": 0.0092,
      "step": 2650300
    },
    {
      "epoch": 4.337306808585849,
      "grad_norm": 0.07610448449850082,
      "learning_rate": 1.3341196470813044e-06,
      "loss": 0.0114,
      "step": 2650320
    },
    {
      "epoch": 4.337339539024502,
      "grad_norm": 0.33959129452705383,
      "learning_rate": 1.3340537548677874e-06,
      "loss": 0.0126,
      "step": 2650340
    },
    {
      "epoch": 4.337372269463155,
      "grad_norm": 0.47248753905296326,
      "learning_rate": 1.3339878626542701e-06,
      "loss": 0.0143,
      "step": 2650360
    },
    {
      "epoch": 4.337404999901809,
      "grad_norm": 0.42725658416748047,
      "learning_rate": 1.333921970440753e-06,
      "loss": 0.009,
      "step": 2650380
    },
    {
      "epoch": 4.337437730340462,
      "grad_norm": 0.15380451083183289,
      "learning_rate": 1.333856078227236e-06,
      "loss": 0.0074,
      "step": 2650400
    },
    {
      "epoch": 4.337470460779115,
      "grad_norm": 0.5893048644065857,
      "learning_rate": 1.333790186013719e-06,
      "loss": 0.0098,
      "step": 2650420
    },
    {
      "epoch": 4.337503191217769,
      "grad_norm": 0.4843299686908722,
      "learning_rate": 1.3337242938002017e-06,
      "loss": 0.0151,
      "step": 2650440
    },
    {
      "epoch": 4.337535921656422,
      "grad_norm": 0.244973286986351,
      "learning_rate": 1.3336584015866847e-06,
      "loss": 0.0074,
      "step": 2650460
    },
    {
      "epoch": 4.337568652095076,
      "grad_norm": 0.7243137359619141,
      "learning_rate": 1.3335925093731674e-06,
      "loss": 0.0076,
      "step": 2650480
    },
    {
      "epoch": 4.3376013825337285,
      "grad_norm": 0.09050119668245316,
      "learning_rate": 1.3335266171596504e-06,
      "loss": 0.0084,
      "step": 2650500
    },
    {
      "epoch": 4.337634112972382,
      "grad_norm": 0.08732454478740692,
      "learning_rate": 1.3334607249461331e-06,
      "loss": 0.0077,
      "step": 2650520
    },
    {
      "epoch": 4.337666843411036,
      "grad_norm": 0.12626631557941437,
      "learning_rate": 1.333394832732616e-06,
      "loss": 0.0119,
      "step": 2650540
    },
    {
      "epoch": 4.337699573849688,
      "grad_norm": 0.14065471291542053,
      "learning_rate": 1.3333289405190988e-06,
      "loss": 0.0069,
      "step": 2650560
    },
    {
      "epoch": 4.337732304288342,
      "grad_norm": 0.20080813765525818,
      "learning_rate": 1.333263048305582e-06,
      "loss": 0.005,
      "step": 2650580
    },
    {
      "epoch": 4.337765034726996,
      "grad_norm": 0.1884503811597824,
      "learning_rate": 1.3331971560920647e-06,
      "loss": 0.008,
      "step": 2650600
    },
    {
      "epoch": 4.337797765165648,
      "grad_norm": 0.2952122092247009,
      "learning_rate": 1.3331312638785477e-06,
      "loss": 0.0122,
      "step": 2650620
    },
    {
      "epoch": 4.337830495604302,
      "grad_norm": 0.11480168253183365,
      "learning_rate": 1.3330653716650304e-06,
      "loss": 0.0097,
      "step": 2650640
    },
    {
      "epoch": 4.3378632260429555,
      "grad_norm": 0.21674500405788422,
      "learning_rate": 1.3329994794515133e-06,
      "loss": 0.0075,
      "step": 2650660
    },
    {
      "epoch": 4.337895956481609,
      "grad_norm": 0.13974839448928833,
      "learning_rate": 1.332933587237996e-06,
      "loss": 0.007,
      "step": 2650680
    },
    {
      "epoch": 4.337928686920262,
      "grad_norm": 0.4293220639228821,
      "learning_rate": 1.332867695024479e-06,
      "loss": 0.01,
      "step": 2650700
    },
    {
      "epoch": 4.337961417358915,
      "grad_norm": 0.3125801086425781,
      "learning_rate": 1.3328018028109618e-06,
      "loss": 0.0083,
      "step": 2650720
    },
    {
      "epoch": 4.337994147797569,
      "grad_norm": 0.2340846210718155,
      "learning_rate": 1.3327359105974447e-06,
      "loss": 0.0059,
      "step": 2650740
    },
    {
      "epoch": 4.338026878236223,
      "grad_norm": 0.1861630529165268,
      "learning_rate": 1.3326700183839277e-06,
      "loss": 0.01,
      "step": 2650760
    },
    {
      "epoch": 4.338059608674875,
      "grad_norm": 0.17808589339256287,
      "learning_rate": 1.3326041261704106e-06,
      "loss": 0.0073,
      "step": 2650780
    },
    {
      "epoch": 4.338092339113529,
      "grad_norm": 0.19049574434757233,
      "learning_rate": 1.3325382339568934e-06,
      "loss": 0.0096,
      "step": 2650800
    },
    {
      "epoch": 4.3381250695521825,
      "grad_norm": 0.600664496421814,
      "learning_rate": 1.3324723417433763e-06,
      "loss": 0.0078,
      "step": 2650820
    },
    {
      "epoch": 4.338157799990835,
      "grad_norm": 0.12735767662525177,
      "learning_rate": 1.332406449529859e-06,
      "loss": 0.0098,
      "step": 2650840
    },
    {
      "epoch": 4.338190530429489,
      "grad_norm": 0.32924091815948486,
      "learning_rate": 1.332340557316342e-06,
      "loss": 0.0111,
      "step": 2650860
    },
    {
      "epoch": 4.338223260868142,
      "grad_norm": 0.24117863178253174,
      "learning_rate": 1.3322746651028248e-06,
      "loss": 0.0086,
      "step": 2650880
    },
    {
      "epoch": 4.338255991306795,
      "grad_norm": 0.2221502959728241,
      "learning_rate": 1.3322087728893077e-06,
      "loss": 0.0074,
      "step": 2650900
    },
    {
      "epoch": 4.338288721745449,
      "grad_norm": 0.17853029072284698,
      "learning_rate": 1.3321428806757909e-06,
      "loss": 0.008,
      "step": 2650920
    },
    {
      "epoch": 4.338321452184102,
      "grad_norm": 0.2280963808298111,
      "learning_rate": 1.3320769884622736e-06,
      "loss": 0.0093,
      "step": 2650940
    },
    {
      "epoch": 4.338354182622756,
      "grad_norm": 0.47581714391708374,
      "learning_rate": 1.3320110962487566e-06,
      "loss": 0.014,
      "step": 2650960
    },
    {
      "epoch": 4.338386913061409,
      "grad_norm": 0.2984488308429718,
      "learning_rate": 1.3319452040352393e-06,
      "loss": 0.0081,
      "step": 2650980
    },
    {
      "epoch": 4.338419643500062,
      "grad_norm": 0.18250037729740143,
      "learning_rate": 1.331879311821722e-06,
      "loss": 0.0105,
      "step": 2651000
    },
    {
      "epoch": 4.338452373938716,
      "grad_norm": 0.20606981217861176,
      "learning_rate": 1.331813419608205e-06,
      "loss": 0.0109,
      "step": 2651020
    },
    {
      "epoch": 4.3384851043773685,
      "grad_norm": 0.26438459753990173,
      "learning_rate": 1.3317475273946877e-06,
      "loss": 0.0068,
      "step": 2651040
    },
    {
      "epoch": 4.338517834816022,
      "grad_norm": 0.10707559436559677,
      "learning_rate": 1.3316816351811707e-06,
      "loss": 0.0065,
      "step": 2651060
    },
    {
      "epoch": 4.338550565254676,
      "grad_norm": 0.09165006875991821,
      "learning_rate": 1.3316157429676534e-06,
      "loss": 0.0087,
      "step": 2651080
    },
    {
      "epoch": 4.338583295693329,
      "grad_norm": 0.15487360954284668,
      "learning_rate": 1.3315498507541366e-06,
      "loss": 0.0101,
      "step": 2651100
    },
    {
      "epoch": 4.338616026131982,
      "grad_norm": 0.09777654707431793,
      "learning_rate": 1.3314839585406195e-06,
      "loss": 0.0109,
      "step": 2651120
    },
    {
      "epoch": 4.3386487565706355,
      "grad_norm": 0.2277837097644806,
      "learning_rate": 1.3314180663271023e-06,
      "loss": 0.0058,
      "step": 2651140
    },
    {
      "epoch": 4.338681487009289,
      "grad_norm": 0.31202489137649536,
      "learning_rate": 1.3313521741135852e-06,
      "loss": 0.0069,
      "step": 2651160
    },
    {
      "epoch": 4.338714217447942,
      "grad_norm": 0.24328894913196564,
      "learning_rate": 1.331286281900068e-06,
      "loss": 0.0065,
      "step": 2651180
    },
    {
      "epoch": 4.338746947886595,
      "grad_norm": 0.32239866256713867,
      "learning_rate": 1.331220389686551e-06,
      "loss": 0.0098,
      "step": 2651200
    },
    {
      "epoch": 4.338779678325249,
      "grad_norm": 0.5605549812316895,
      "learning_rate": 1.3311544974730337e-06,
      "loss": 0.0122,
      "step": 2651220
    },
    {
      "epoch": 4.338812408763903,
      "grad_norm": 0.156882181763649,
      "learning_rate": 1.3310886052595164e-06,
      "loss": 0.0114,
      "step": 2651240
    },
    {
      "epoch": 4.338845139202555,
      "grad_norm": 0.08404605090618134,
      "learning_rate": 1.3310227130459994e-06,
      "loss": 0.0114,
      "step": 2651260
    },
    {
      "epoch": 4.338877869641209,
      "grad_norm": 0.5374959111213684,
      "learning_rate": 1.3309568208324825e-06,
      "loss": 0.0099,
      "step": 2651280
    },
    {
      "epoch": 4.3389106000798625,
      "grad_norm": 0.42803463339805603,
      "learning_rate": 1.3308909286189653e-06,
      "loss": 0.0087,
      "step": 2651300
    },
    {
      "epoch": 4.338943330518515,
      "grad_norm": 0.19247743487358093,
      "learning_rate": 1.3308250364054482e-06,
      "loss": 0.0152,
      "step": 2651320
    },
    {
      "epoch": 4.338976060957169,
      "grad_norm": 0.48427650332450867,
      "learning_rate": 1.330759144191931e-06,
      "loss": 0.0117,
      "step": 2651340
    },
    {
      "epoch": 4.339008791395822,
      "grad_norm": 0.20487649738788605,
      "learning_rate": 1.330693251978414e-06,
      "loss": 0.0094,
      "step": 2651360
    },
    {
      "epoch": 4.339041521834476,
      "grad_norm": 0.11754590272903442,
      "learning_rate": 1.3306273597648966e-06,
      "loss": 0.0131,
      "step": 2651380
    },
    {
      "epoch": 4.339074252273129,
      "grad_norm": 0.1629844307899475,
      "learning_rate": 1.3305614675513796e-06,
      "loss": 0.0127,
      "step": 2651400
    },
    {
      "epoch": 4.339106982711782,
      "grad_norm": 0.23936675488948822,
      "learning_rate": 1.3304955753378623e-06,
      "loss": 0.0075,
      "step": 2651420
    },
    {
      "epoch": 4.339139713150436,
      "grad_norm": 0.2536149322986603,
      "learning_rate": 1.330429683124345e-06,
      "loss": 0.0096,
      "step": 2651440
    },
    {
      "epoch": 4.339172443589089,
      "grad_norm": 0.23241381347179413,
      "learning_rate": 1.3303637909108282e-06,
      "loss": 0.0109,
      "step": 2651460
    },
    {
      "epoch": 4.339205174027742,
      "grad_norm": 0.17909112572669983,
      "learning_rate": 1.3302978986973112e-06,
      "loss": 0.0065,
      "step": 2651480
    },
    {
      "epoch": 4.339237904466396,
      "grad_norm": 0.33603325486183167,
      "learning_rate": 1.330232006483794e-06,
      "loss": 0.0119,
      "step": 2651500
    },
    {
      "epoch": 4.339270634905049,
      "grad_norm": 0.14846932888031006,
      "learning_rate": 1.3301661142702769e-06,
      "loss": 0.0141,
      "step": 2651520
    },
    {
      "epoch": 4.339303365343702,
      "grad_norm": 0.2644641399383545,
      "learning_rate": 1.3301002220567596e-06,
      "loss": 0.0085,
      "step": 2651540
    },
    {
      "epoch": 4.339336095782356,
      "grad_norm": 0.36309176683425903,
      "learning_rate": 1.3300343298432426e-06,
      "loss": 0.0091,
      "step": 2651560
    },
    {
      "epoch": 4.339368826221009,
      "grad_norm": 0.14220528304576874,
      "learning_rate": 1.3299684376297253e-06,
      "loss": 0.0106,
      "step": 2651580
    },
    {
      "epoch": 4.339401556659662,
      "grad_norm": 0.07861469686031342,
      "learning_rate": 1.3299025454162083e-06,
      "loss": 0.0097,
      "step": 2651600
    },
    {
      "epoch": 4.339434287098316,
      "grad_norm": 0.11774987727403641,
      "learning_rate": 1.329836653202691e-06,
      "loss": 0.0047,
      "step": 2651620
    },
    {
      "epoch": 4.339467017536969,
      "grad_norm": 0.22947102785110474,
      "learning_rate": 1.3297707609891742e-06,
      "loss": 0.0069,
      "step": 2651640
    },
    {
      "epoch": 4.339499747975623,
      "grad_norm": 0.6411157250404358,
      "learning_rate": 1.329704868775657e-06,
      "loss": 0.0134,
      "step": 2651660
    },
    {
      "epoch": 4.3395324784142755,
      "grad_norm": 0.392051637172699,
      "learning_rate": 1.3296389765621399e-06,
      "loss": 0.0078,
      "step": 2651680
    },
    {
      "epoch": 4.339565208852929,
      "grad_norm": 0.34072795510292053,
      "learning_rate": 1.3295730843486226e-06,
      "loss": 0.0056,
      "step": 2651700
    },
    {
      "epoch": 4.339597939291583,
      "grad_norm": 0.3186964690685272,
      "learning_rate": 1.3295071921351055e-06,
      "loss": 0.0081,
      "step": 2651720
    },
    {
      "epoch": 4.339630669730235,
      "grad_norm": 0.29065197706222534,
      "learning_rate": 1.3294412999215883e-06,
      "loss": 0.0146,
      "step": 2651740
    },
    {
      "epoch": 4.339663400168889,
      "grad_norm": 0.2023794949054718,
      "learning_rate": 1.3293754077080712e-06,
      "loss": 0.0105,
      "step": 2651760
    },
    {
      "epoch": 4.339696130607543,
      "grad_norm": 0.15807203948497772,
      "learning_rate": 1.329309515494554e-06,
      "loss": 0.015,
      "step": 2651780
    },
    {
      "epoch": 4.339728861046196,
      "grad_norm": 0.5685482621192932,
      "learning_rate": 1.3292436232810371e-06,
      "loss": 0.0122,
      "step": 2651800
    },
    {
      "epoch": 4.339761591484849,
      "grad_norm": 0.1431763619184494,
      "learning_rate": 1.3291777310675199e-06,
      "loss": 0.008,
      "step": 2651820
    },
    {
      "epoch": 4.3397943219235025,
      "grad_norm": 0.4433968663215637,
      "learning_rate": 1.3291118388540028e-06,
      "loss": 0.0088,
      "step": 2651840
    },
    {
      "epoch": 4.339827052362156,
      "grad_norm": 0.1765243411064148,
      "learning_rate": 1.3290459466404856e-06,
      "loss": 0.0085,
      "step": 2651860
    },
    {
      "epoch": 4.339859782800809,
      "grad_norm": 0.17080658674240112,
      "learning_rate": 1.3289800544269685e-06,
      "loss": 0.0074,
      "step": 2651880
    },
    {
      "epoch": 4.339892513239462,
      "grad_norm": 0.19884046912193298,
      "learning_rate": 1.3289141622134513e-06,
      "loss": 0.0089,
      "step": 2651900
    },
    {
      "epoch": 4.339925243678116,
      "grad_norm": 0.17075695097446442,
      "learning_rate": 1.3288482699999342e-06,
      "loss": 0.0093,
      "step": 2651920
    },
    {
      "epoch": 4.3399579741167695,
      "grad_norm": 0.1496511548757553,
      "learning_rate": 1.328782377786417e-06,
      "loss": 0.0085,
      "step": 2651940
    },
    {
      "epoch": 4.339990704555422,
      "grad_norm": 0.20628772675991058,
      "learning_rate": 1.3287164855729e-06,
      "loss": 0.0105,
      "step": 2651960
    },
    {
      "epoch": 4.340023434994076,
      "grad_norm": 0.1791459023952484,
      "learning_rate": 1.3286505933593829e-06,
      "loss": 0.0094,
      "step": 2651980
    },
    {
      "epoch": 4.340056165432729,
      "grad_norm": 0.09905976057052612,
      "learning_rate": 1.3285847011458658e-06,
      "loss": 0.0122,
      "step": 2652000
    },
    {
      "epoch": 4.340088895871382,
      "grad_norm": 0.20492783188819885,
      "learning_rate": 1.3285188089323485e-06,
      "loss": 0.013,
      "step": 2652020
    },
    {
      "epoch": 4.340121626310036,
      "grad_norm": 0.28349143266677856,
      "learning_rate": 1.3284529167188315e-06,
      "loss": 0.0092,
      "step": 2652040
    },
    {
      "epoch": 4.340154356748689,
      "grad_norm": 0.8953453302383423,
      "learning_rate": 1.3283870245053142e-06,
      "loss": 0.0079,
      "step": 2652060
    },
    {
      "epoch": 4.340187087187342,
      "grad_norm": 0.07123653590679169,
      "learning_rate": 1.3283211322917972e-06,
      "loss": 0.0072,
      "step": 2652080
    },
    {
      "epoch": 4.340219817625996,
      "grad_norm": 0.16143251955509186,
      "learning_rate": 1.32825524007828e-06,
      "loss": 0.0146,
      "step": 2652100
    },
    {
      "epoch": 4.340252548064649,
      "grad_norm": 0.25694745779037476,
      "learning_rate": 1.3281893478647629e-06,
      "loss": 0.0087,
      "step": 2652120
    },
    {
      "epoch": 4.340285278503303,
      "grad_norm": 0.29867660999298096,
      "learning_rate": 1.3281234556512456e-06,
      "loss": 0.0086,
      "step": 2652140
    },
    {
      "epoch": 4.3403180089419555,
      "grad_norm": 0.11296714842319489,
      "learning_rate": 1.3280575634377288e-06,
      "loss": 0.0136,
      "step": 2652160
    },
    {
      "epoch": 4.340350739380609,
      "grad_norm": 0.37704816460609436,
      "learning_rate": 1.3279916712242117e-06,
      "loss": 0.0065,
      "step": 2652180
    },
    {
      "epoch": 4.340383469819263,
      "grad_norm": 0.13505108654499054,
      "learning_rate": 1.3279257790106945e-06,
      "loss": 0.0108,
      "step": 2652200
    },
    {
      "epoch": 4.340416200257916,
      "grad_norm": 0.20250552892684937,
      "learning_rate": 1.3278598867971772e-06,
      "loss": 0.0101,
      "step": 2652220
    },
    {
      "epoch": 4.340448930696569,
      "grad_norm": 0.0747651606798172,
      "learning_rate": 1.3277939945836602e-06,
      "loss": 0.0074,
      "step": 2652240
    },
    {
      "epoch": 4.340481661135223,
      "grad_norm": 0.15732793509960175,
      "learning_rate": 1.327728102370143e-06,
      "loss": 0.0089,
      "step": 2652260
    },
    {
      "epoch": 4.340514391573876,
      "grad_norm": 0.18843556940555573,
      "learning_rate": 1.3276622101566259e-06,
      "loss": 0.0111,
      "step": 2652280
    },
    {
      "epoch": 4.340547122012529,
      "grad_norm": 0.31045445799827576,
      "learning_rate": 1.3275963179431086e-06,
      "loss": 0.0114,
      "step": 2652300
    },
    {
      "epoch": 4.3405798524511825,
      "grad_norm": 0.5239478349685669,
      "learning_rate": 1.3275304257295915e-06,
      "loss": 0.0083,
      "step": 2652320
    },
    {
      "epoch": 4.340612582889836,
      "grad_norm": 0.7873443365097046,
      "learning_rate": 1.3274645335160747e-06,
      "loss": 0.0122,
      "step": 2652340
    },
    {
      "epoch": 4.340645313328489,
      "grad_norm": 0.6882040500640869,
      "learning_rate": 1.3273986413025575e-06,
      "loss": 0.012,
      "step": 2652360
    },
    {
      "epoch": 4.340678043767142,
      "grad_norm": 0.12391238659620285,
      "learning_rate": 1.3273327490890404e-06,
      "loss": 0.0103,
      "step": 2652380
    },
    {
      "epoch": 4.340710774205796,
      "grad_norm": 0.36888644099235535,
      "learning_rate": 1.3272668568755231e-06,
      "loss": 0.0064,
      "step": 2652400
    },
    {
      "epoch": 4.34074350464445,
      "grad_norm": 0.2547222673892975,
      "learning_rate": 1.3272009646620059e-06,
      "loss": 0.0096,
      "step": 2652420
    },
    {
      "epoch": 4.340776235083102,
      "grad_norm": 0.1479184776544571,
      "learning_rate": 1.3271350724484888e-06,
      "loss": 0.0079,
      "step": 2652440
    },
    {
      "epoch": 4.340808965521756,
      "grad_norm": 1.0549308061599731,
      "learning_rate": 1.3270691802349716e-06,
      "loss": 0.0087,
      "step": 2652460
    },
    {
      "epoch": 4.3408416959604095,
      "grad_norm": 0.08380308747291565,
      "learning_rate": 1.3270032880214545e-06,
      "loss": 0.0109,
      "step": 2652480
    },
    {
      "epoch": 4.340874426399063,
      "grad_norm": 0.19419054687023163,
      "learning_rate": 1.3269373958079377e-06,
      "loss": 0.0117,
      "step": 2652500
    },
    {
      "epoch": 4.340907156837716,
      "grad_norm": 0.2526025176048279,
      "learning_rate": 1.3268715035944204e-06,
      "loss": 0.0123,
      "step": 2652520
    },
    {
      "epoch": 4.340939887276369,
      "grad_norm": 0.23840799927711487,
      "learning_rate": 1.3268056113809034e-06,
      "loss": 0.0117,
      "step": 2652540
    },
    {
      "epoch": 4.340972617715023,
      "grad_norm": 0.29344913363456726,
      "learning_rate": 1.3267397191673861e-06,
      "loss": 0.0083,
      "step": 2652560
    },
    {
      "epoch": 4.341005348153676,
      "grad_norm": 0.3962934911251068,
      "learning_rate": 1.326673826953869e-06,
      "loss": 0.0102,
      "step": 2652580
    },
    {
      "epoch": 4.341038078592329,
      "grad_norm": 0.5036739110946655,
      "learning_rate": 1.3266079347403518e-06,
      "loss": 0.0084,
      "step": 2652600
    },
    {
      "epoch": 4.341070809030983,
      "grad_norm": 0.2266106754541397,
      "learning_rate": 1.3265420425268348e-06,
      "loss": 0.008,
      "step": 2652620
    },
    {
      "epoch": 4.341103539469636,
      "grad_norm": 0.3165634870529175,
      "learning_rate": 1.3264761503133175e-06,
      "loss": 0.0107,
      "step": 2652640
    },
    {
      "epoch": 4.341136269908289,
      "grad_norm": 0.31621161103248596,
      "learning_rate": 1.3264102580998002e-06,
      "loss": 0.0089,
      "step": 2652660
    },
    {
      "epoch": 4.341169000346943,
      "grad_norm": 0.16568470001220703,
      "learning_rate": 1.3263443658862834e-06,
      "loss": 0.0115,
      "step": 2652680
    },
    {
      "epoch": 4.341201730785596,
      "grad_norm": 0.5527498722076416,
      "learning_rate": 1.3262784736727664e-06,
      "loss": 0.0136,
      "step": 2652700
    },
    {
      "epoch": 4.341234461224249,
      "grad_norm": 0.2646622657775879,
      "learning_rate": 1.326212581459249e-06,
      "loss": 0.0053,
      "step": 2652720
    },
    {
      "epoch": 4.341267191662903,
      "grad_norm": 0.5113509893417358,
      "learning_rate": 1.326146689245732e-06,
      "loss": 0.0119,
      "step": 2652740
    },
    {
      "epoch": 4.341299922101556,
      "grad_norm": 0.29633307456970215,
      "learning_rate": 1.3260807970322148e-06,
      "loss": 0.0093,
      "step": 2652760
    },
    {
      "epoch": 4.341332652540209,
      "grad_norm": 0.16460280120372772,
      "learning_rate": 1.3260149048186977e-06,
      "loss": 0.0116,
      "step": 2652780
    },
    {
      "epoch": 4.3413653829788625,
      "grad_norm": 0.28905752301216125,
      "learning_rate": 1.3259490126051805e-06,
      "loss": 0.012,
      "step": 2652800
    },
    {
      "epoch": 4.341398113417516,
      "grad_norm": 0.23771125078201294,
      "learning_rate": 1.3258831203916634e-06,
      "loss": 0.0067,
      "step": 2652820
    },
    {
      "epoch": 4.34143084385617,
      "grad_norm": 0.5979459285736084,
      "learning_rate": 1.3258172281781462e-06,
      "loss": 0.0104,
      "step": 2652840
    },
    {
      "epoch": 4.341463574294822,
      "grad_norm": 0.4091849625110626,
      "learning_rate": 1.3257513359646293e-06,
      "loss": 0.0081,
      "step": 2652860
    },
    {
      "epoch": 4.341496304733476,
      "grad_norm": 0.19043317437171936,
      "learning_rate": 1.325685443751112e-06,
      "loss": 0.0095,
      "step": 2652880
    },
    {
      "epoch": 4.34152903517213,
      "grad_norm": 0.14595787227153778,
      "learning_rate": 1.325619551537595e-06,
      "loss": 0.0102,
      "step": 2652900
    },
    {
      "epoch": 4.341561765610782,
      "grad_norm": 0.20215678215026855,
      "learning_rate": 1.3255536593240778e-06,
      "loss": 0.0112,
      "step": 2652920
    },
    {
      "epoch": 4.341594496049436,
      "grad_norm": 0.6205164790153503,
      "learning_rate": 1.3254877671105607e-06,
      "loss": 0.0098,
      "step": 2652940
    },
    {
      "epoch": 4.3416272264880895,
      "grad_norm": 0.1780509352684021,
      "learning_rate": 1.3254218748970435e-06,
      "loss": 0.0082,
      "step": 2652960
    },
    {
      "epoch": 4.341659956926743,
      "grad_norm": 0.18959000706672668,
      "learning_rate": 1.3253559826835264e-06,
      "loss": 0.0125,
      "step": 2652980
    },
    {
      "epoch": 4.341692687365396,
      "grad_norm": 0.264477401971817,
      "learning_rate": 1.3252900904700091e-06,
      "loss": 0.0092,
      "step": 2653000
    },
    {
      "epoch": 4.341725417804049,
      "grad_norm": 0.2973388135433197,
      "learning_rate": 1.325224198256492e-06,
      "loss": 0.0095,
      "step": 2653020
    },
    {
      "epoch": 4.341758148242703,
      "grad_norm": 0.43175625801086426,
      "learning_rate": 1.325158306042975e-06,
      "loss": 0.0131,
      "step": 2653040
    },
    {
      "epoch": 4.341790878681356,
      "grad_norm": 0.7138897180557251,
      "learning_rate": 1.325092413829458e-06,
      "loss": 0.0135,
      "step": 2653060
    },
    {
      "epoch": 4.341823609120009,
      "grad_norm": 0.34566614031791687,
      "learning_rate": 1.3250265216159407e-06,
      "loss": 0.0127,
      "step": 2653080
    },
    {
      "epoch": 4.341856339558663,
      "grad_norm": 0.2829243242740631,
      "learning_rate": 1.3249606294024237e-06,
      "loss": 0.0089,
      "step": 2653100
    },
    {
      "epoch": 4.3418890699973165,
      "grad_norm": 0.4048694968223572,
      "learning_rate": 1.3248947371889064e-06,
      "loss": 0.0088,
      "step": 2653120
    },
    {
      "epoch": 4.341921800435969,
      "grad_norm": 0.050010085105895996,
      "learning_rate": 1.3248288449753894e-06,
      "loss": 0.0086,
      "step": 2653140
    },
    {
      "epoch": 4.341954530874623,
      "grad_norm": 0.1464281976222992,
      "learning_rate": 1.3247629527618721e-06,
      "loss": 0.0121,
      "step": 2653160
    },
    {
      "epoch": 4.341987261313276,
      "grad_norm": 0.22641243040561676,
      "learning_rate": 1.324697060548355e-06,
      "loss": 0.0094,
      "step": 2653180
    },
    {
      "epoch": 4.342019991751929,
      "grad_norm": 0.29778966307640076,
      "learning_rate": 1.3246311683348378e-06,
      "loss": 0.0088,
      "step": 2653200
    },
    {
      "epoch": 4.342052722190583,
      "grad_norm": 0.25395357608795166,
      "learning_rate": 1.324565276121321e-06,
      "loss": 0.0095,
      "step": 2653220
    },
    {
      "epoch": 4.342085452629236,
      "grad_norm": 0.21528339385986328,
      "learning_rate": 1.3244993839078037e-06,
      "loss": 0.0174,
      "step": 2653240
    },
    {
      "epoch": 4.34211818306789,
      "grad_norm": 0.25598758459091187,
      "learning_rate": 1.3244334916942867e-06,
      "loss": 0.008,
      "step": 2653260
    },
    {
      "epoch": 4.342150913506543,
      "grad_norm": 0.10319702327251434,
      "learning_rate": 1.3243675994807694e-06,
      "loss": 0.0106,
      "step": 2653280
    },
    {
      "epoch": 4.342183643945196,
      "grad_norm": 0.2822124660015106,
      "learning_rate": 1.3243017072672524e-06,
      "loss": 0.0102,
      "step": 2653300
    },
    {
      "epoch": 4.34221637438385,
      "grad_norm": 0.2172473818063736,
      "learning_rate": 1.324235815053735e-06,
      "loss": 0.01,
      "step": 2653320
    },
    {
      "epoch": 4.3422491048225025,
      "grad_norm": 0.6425915956497192,
      "learning_rate": 1.324169922840218e-06,
      "loss": 0.0091,
      "step": 2653340
    },
    {
      "epoch": 4.342281835261156,
      "grad_norm": 0.5914556980133057,
      "learning_rate": 1.3241040306267008e-06,
      "loss": 0.0106,
      "step": 2653360
    },
    {
      "epoch": 4.34231456569981,
      "grad_norm": 0.5919675827026367,
      "learning_rate": 1.324038138413184e-06,
      "loss": 0.0119,
      "step": 2653380
    },
    {
      "epoch": 4.342347296138463,
      "grad_norm": 0.4833762049674988,
      "learning_rate": 1.323972246199667e-06,
      "loss": 0.0089,
      "step": 2653400
    },
    {
      "epoch": 4.342380026577116,
      "grad_norm": 0.05947083234786987,
      "learning_rate": 1.3239063539861496e-06,
      "loss": 0.0115,
      "step": 2653420
    },
    {
      "epoch": 4.34241275701577,
      "grad_norm": 0.08874371647834778,
      "learning_rate": 1.3238404617726324e-06,
      "loss": 0.012,
      "step": 2653440
    },
    {
      "epoch": 4.342445487454423,
      "grad_norm": 0.11852716654539108,
      "learning_rate": 1.3237745695591153e-06,
      "loss": 0.0133,
      "step": 2653460
    },
    {
      "epoch": 4.342478217893076,
      "grad_norm": 0.20738962292671204,
      "learning_rate": 1.323708677345598e-06,
      "loss": 0.0094,
      "step": 2653480
    },
    {
      "epoch": 4.3425109483317295,
      "grad_norm": 0.21257762610912323,
      "learning_rate": 1.323642785132081e-06,
      "loss": 0.0112,
      "step": 2653500
    },
    {
      "epoch": 4.342543678770383,
      "grad_norm": 0.22852425277233124,
      "learning_rate": 1.3235768929185638e-06,
      "loss": 0.0091,
      "step": 2653520
    },
    {
      "epoch": 4.342576409209036,
      "grad_norm": 0.42703747749328613,
      "learning_rate": 1.3235110007050467e-06,
      "loss": 0.0094,
      "step": 2653540
    },
    {
      "epoch": 4.342609139647689,
      "grad_norm": 0.3826711177825928,
      "learning_rate": 1.3234451084915299e-06,
      "loss": 0.0091,
      "step": 2653560
    },
    {
      "epoch": 4.342641870086343,
      "grad_norm": 0.2843964397907257,
      "learning_rate": 1.3233792162780126e-06,
      "loss": 0.0144,
      "step": 2653580
    },
    {
      "epoch": 4.3426746005249965,
      "grad_norm": 0.16167843341827393,
      "learning_rate": 1.3233133240644956e-06,
      "loss": 0.0077,
      "step": 2653600
    },
    {
      "epoch": 4.342707330963649,
      "grad_norm": 0.1570909172296524,
      "learning_rate": 1.3232474318509783e-06,
      "loss": 0.0109,
      "step": 2653620
    },
    {
      "epoch": 4.342740061402303,
      "grad_norm": 0.16905930638313293,
      "learning_rate": 1.323181539637461e-06,
      "loss": 0.0072,
      "step": 2653640
    },
    {
      "epoch": 4.342772791840956,
      "grad_norm": 0.6899138689041138,
      "learning_rate": 1.323115647423944e-06,
      "loss": 0.0125,
      "step": 2653660
    },
    {
      "epoch": 4.34280552227961,
      "grad_norm": 0.16633963584899902,
      "learning_rate": 1.3230497552104267e-06,
      "loss": 0.0091,
      "step": 2653680
    },
    {
      "epoch": 4.342838252718263,
      "grad_norm": 0.17622928321361542,
      "learning_rate": 1.3229838629969097e-06,
      "loss": 0.0076,
      "step": 2653700
    },
    {
      "epoch": 4.342870983156916,
      "grad_norm": 0.1331174373626709,
      "learning_rate": 1.3229179707833924e-06,
      "loss": 0.0095,
      "step": 2653720
    },
    {
      "epoch": 4.34290371359557,
      "grad_norm": 0.30502042174339294,
      "learning_rate": 1.3228520785698756e-06,
      "loss": 0.0089,
      "step": 2653740
    },
    {
      "epoch": 4.342936444034223,
      "grad_norm": 0.09978701919317245,
      "learning_rate": 1.3227861863563586e-06,
      "loss": 0.007,
      "step": 2653760
    },
    {
      "epoch": 4.342969174472876,
      "grad_norm": 0.19439293444156647,
      "learning_rate": 1.3227202941428413e-06,
      "loss": 0.0122,
      "step": 2653780
    },
    {
      "epoch": 4.34300190491153,
      "grad_norm": 0.06253641098737717,
      "learning_rate": 1.3226544019293242e-06,
      "loss": 0.0159,
      "step": 2653800
    },
    {
      "epoch": 4.3430346353501825,
      "grad_norm": 0.2482212781906128,
      "learning_rate": 1.322588509715807e-06,
      "loss": 0.0078,
      "step": 2653820
    },
    {
      "epoch": 4.343067365788836,
      "grad_norm": 0.24521470069885254,
      "learning_rate": 1.32252261750229e-06,
      "loss": 0.0136,
      "step": 2653840
    },
    {
      "epoch": 4.34310009622749,
      "grad_norm": 0.2722020745277405,
      "learning_rate": 1.3224567252887727e-06,
      "loss": 0.0082,
      "step": 2653860
    },
    {
      "epoch": 4.343132826666143,
      "grad_norm": 0.21689943969249725,
      "learning_rate": 1.3223908330752554e-06,
      "loss": 0.0055,
      "step": 2653880
    },
    {
      "epoch": 4.343165557104796,
      "grad_norm": 0.1345365345478058,
      "learning_rate": 1.3223249408617384e-06,
      "loss": 0.0081,
      "step": 2653900
    },
    {
      "epoch": 4.34319828754345,
      "grad_norm": 0.12289629131555557,
      "learning_rate": 1.3222590486482215e-06,
      "loss": 0.0059,
      "step": 2653920
    },
    {
      "epoch": 4.343231017982103,
      "grad_norm": 0.3307855427265167,
      "learning_rate": 1.3221931564347043e-06,
      "loss": 0.0105,
      "step": 2653940
    },
    {
      "epoch": 4.343263748420757,
      "grad_norm": 0.24006982147693634,
      "learning_rate": 1.3221272642211872e-06,
      "loss": 0.0102,
      "step": 2653960
    },
    {
      "epoch": 4.3432964788594095,
      "grad_norm": 0.3079862892627716,
      "learning_rate": 1.32206137200767e-06,
      "loss": 0.0108,
      "step": 2653980
    },
    {
      "epoch": 4.343329209298063,
      "grad_norm": 0.25386881828308105,
      "learning_rate": 1.321995479794153e-06,
      "loss": 0.0086,
      "step": 2654000
    },
    {
      "epoch": 4.343361939736717,
      "grad_norm": 0.3297194242477417,
      "learning_rate": 1.3219295875806356e-06,
      "loss": 0.0045,
      "step": 2654020
    },
    {
      "epoch": 4.343394670175369,
      "grad_norm": 0.28560972213745117,
      "learning_rate": 1.3218636953671186e-06,
      "loss": 0.0155,
      "step": 2654040
    },
    {
      "epoch": 4.343427400614023,
      "grad_norm": 0.3925970494747162,
      "learning_rate": 1.3217978031536013e-06,
      "loss": 0.0102,
      "step": 2654060
    },
    {
      "epoch": 4.343460131052677,
      "grad_norm": 0.08767562359571457,
      "learning_rate": 1.321731910940084e-06,
      "loss": 0.0086,
      "step": 2654080
    },
    {
      "epoch": 4.343492861491329,
      "grad_norm": 0.163851797580719,
      "learning_rate": 1.3216660187265672e-06,
      "loss": 0.0096,
      "step": 2654100
    },
    {
      "epoch": 4.343525591929983,
      "grad_norm": 0.20777395367622375,
      "learning_rate": 1.3216001265130502e-06,
      "loss": 0.0099,
      "step": 2654120
    },
    {
      "epoch": 4.3435583223686365,
      "grad_norm": 0.2041507065296173,
      "learning_rate": 1.321534234299533e-06,
      "loss": 0.0092,
      "step": 2654140
    },
    {
      "epoch": 4.34359105280729,
      "grad_norm": 0.26093941926956177,
      "learning_rate": 1.3214683420860159e-06,
      "loss": 0.0145,
      "step": 2654160
    },
    {
      "epoch": 4.343623783245943,
      "grad_norm": 0.07363690435886383,
      "learning_rate": 1.3214024498724986e-06,
      "loss": 0.0096,
      "step": 2654180
    },
    {
      "epoch": 4.343656513684596,
      "grad_norm": 0.1999393105506897,
      "learning_rate": 1.3213365576589816e-06,
      "loss": 0.0073,
      "step": 2654200
    },
    {
      "epoch": 4.34368924412325,
      "grad_norm": 0.3144610524177551,
      "learning_rate": 1.3212706654454643e-06,
      "loss": 0.0138,
      "step": 2654220
    },
    {
      "epoch": 4.343721974561903,
      "grad_norm": 0.07126357406377792,
      "learning_rate": 1.3212047732319473e-06,
      "loss": 0.0116,
      "step": 2654240
    },
    {
      "epoch": 4.343754705000556,
      "grad_norm": 0.2829074263572693,
      "learning_rate": 1.3211388810184302e-06,
      "loss": 0.0113,
      "step": 2654260
    },
    {
      "epoch": 4.34378743543921,
      "grad_norm": 0.3281378746032715,
      "learning_rate": 1.3210729888049132e-06,
      "loss": 0.0103,
      "step": 2654280
    },
    {
      "epoch": 4.3438201658778635,
      "grad_norm": 0.20790161192417145,
      "learning_rate": 1.321007096591396e-06,
      "loss": 0.0083,
      "step": 2654300
    },
    {
      "epoch": 4.343852896316516,
      "grad_norm": 0.288913756608963,
      "learning_rate": 1.3209412043778789e-06,
      "loss": 0.0068,
      "step": 2654320
    },
    {
      "epoch": 4.34388562675517,
      "grad_norm": 0.36271893978118896,
      "learning_rate": 1.3208753121643616e-06,
      "loss": 0.0113,
      "step": 2654340
    },
    {
      "epoch": 4.343918357193823,
      "grad_norm": 0.8770581483840942,
      "learning_rate": 1.3208094199508446e-06,
      "loss": 0.0168,
      "step": 2654360
    },
    {
      "epoch": 4.343951087632476,
      "grad_norm": 0.11205528676509857,
      "learning_rate": 1.3207435277373273e-06,
      "loss": 0.0106,
      "step": 2654380
    },
    {
      "epoch": 4.34398381807113,
      "grad_norm": 0.17864564061164856,
      "learning_rate": 1.3206776355238102e-06,
      "loss": 0.009,
      "step": 2654400
    },
    {
      "epoch": 4.344016548509783,
      "grad_norm": 0.10370869189500809,
      "learning_rate": 1.320611743310293e-06,
      "loss": 0.0063,
      "step": 2654420
    },
    {
      "epoch": 4.344049278948437,
      "grad_norm": 0.3200914263725281,
      "learning_rate": 1.3205458510967761e-06,
      "loss": 0.0056,
      "step": 2654440
    },
    {
      "epoch": 4.3440820093870895,
      "grad_norm": 0.23713527619838715,
      "learning_rate": 1.3204799588832589e-06,
      "loss": 0.0101,
      "step": 2654460
    },
    {
      "epoch": 4.344114739825743,
      "grad_norm": 0.1716606467962265,
      "learning_rate": 1.3204140666697418e-06,
      "loss": 0.0107,
      "step": 2654480
    },
    {
      "epoch": 4.344147470264397,
      "grad_norm": 0.21263109147548676,
      "learning_rate": 1.3203481744562246e-06,
      "loss": 0.0077,
      "step": 2654500
    },
    {
      "epoch": 4.344180200703049,
      "grad_norm": 0.4476194977760315,
      "learning_rate": 1.3202822822427075e-06,
      "loss": 0.0106,
      "step": 2654520
    },
    {
      "epoch": 4.344212931141703,
      "grad_norm": 0.10941849648952484,
      "learning_rate": 1.3202163900291903e-06,
      "loss": 0.008,
      "step": 2654540
    },
    {
      "epoch": 4.344245661580357,
      "grad_norm": 0.3762139678001404,
      "learning_rate": 1.3201504978156732e-06,
      "loss": 0.0094,
      "step": 2654560
    },
    {
      "epoch": 4.34427839201901,
      "grad_norm": 0.1725628525018692,
      "learning_rate": 1.320084605602156e-06,
      "loss": 0.0082,
      "step": 2654580
    },
    {
      "epoch": 4.344311122457663,
      "grad_norm": 0.1276698112487793,
      "learning_rate": 1.320018713388639e-06,
      "loss": 0.011,
      "step": 2654600
    },
    {
      "epoch": 4.3443438528963165,
      "grad_norm": 0.2109733372926712,
      "learning_rate": 1.3199528211751219e-06,
      "loss": 0.0099,
      "step": 2654620
    },
    {
      "epoch": 4.34437658333497,
      "grad_norm": 0.05852101370692253,
      "learning_rate": 1.3198869289616048e-06,
      "loss": 0.0073,
      "step": 2654640
    },
    {
      "epoch": 4.344409313773623,
      "grad_norm": 0.07065924257040024,
      "learning_rate": 1.3198210367480876e-06,
      "loss": 0.0071,
      "step": 2654660
    },
    {
      "epoch": 4.344442044212276,
      "grad_norm": 0.3190189599990845,
      "learning_rate": 1.3197551445345705e-06,
      "loss": 0.01,
      "step": 2654680
    },
    {
      "epoch": 4.34447477465093,
      "grad_norm": 0.26580068469047546,
      "learning_rate": 1.3196892523210532e-06,
      "loss": 0.0082,
      "step": 2654700
    },
    {
      "epoch": 4.344507505089584,
      "grad_norm": 0.31221047043800354,
      "learning_rate": 1.3196233601075362e-06,
      "loss": 0.0142,
      "step": 2654720
    },
    {
      "epoch": 4.344540235528236,
      "grad_norm": 0.28975576162338257,
      "learning_rate": 1.319557467894019e-06,
      "loss": 0.0119,
      "step": 2654740
    },
    {
      "epoch": 4.34457296596689,
      "grad_norm": 0.1911740005016327,
      "learning_rate": 1.3194915756805019e-06,
      "loss": 0.0098,
      "step": 2654760
    },
    {
      "epoch": 4.3446056964055435,
      "grad_norm": 0.1619403064250946,
      "learning_rate": 1.3194256834669846e-06,
      "loss": 0.0068,
      "step": 2654780
    },
    {
      "epoch": 4.344638426844196,
      "grad_norm": 0.17305602133274078,
      "learning_rate": 1.3193597912534678e-06,
      "loss": 0.0119,
      "step": 2654800
    },
    {
      "epoch": 4.34467115728285,
      "grad_norm": 0.27731969952583313,
      "learning_rate": 1.3192938990399507e-06,
      "loss": 0.0089,
      "step": 2654820
    },
    {
      "epoch": 4.344703887721503,
      "grad_norm": 0.3310924768447876,
      "learning_rate": 1.3192280068264335e-06,
      "loss": 0.0079,
      "step": 2654840
    },
    {
      "epoch": 4.344736618160157,
      "grad_norm": 0.23906481266021729,
      "learning_rate": 1.3191621146129162e-06,
      "loss": 0.0093,
      "step": 2654860
    },
    {
      "epoch": 4.34476934859881,
      "grad_norm": 1.107404351234436,
      "learning_rate": 1.3190962223993992e-06,
      "loss": 0.0078,
      "step": 2654880
    },
    {
      "epoch": 4.344802079037463,
      "grad_norm": 0.27817851305007935,
      "learning_rate": 1.319030330185882e-06,
      "loss": 0.0095,
      "step": 2654900
    },
    {
      "epoch": 4.344834809476117,
      "grad_norm": 0.28155970573425293,
      "learning_rate": 1.3189644379723649e-06,
      "loss": 0.0071,
      "step": 2654920
    },
    {
      "epoch": 4.34486753991477,
      "grad_norm": 0.3278081715106964,
      "learning_rate": 1.3188985457588476e-06,
      "loss": 0.0126,
      "step": 2654940
    },
    {
      "epoch": 4.344900270353423,
      "grad_norm": 0.25610658526420593,
      "learning_rate": 1.3188326535453306e-06,
      "loss": 0.0073,
      "step": 2654960
    },
    {
      "epoch": 4.344933000792077,
      "grad_norm": 0.13679009675979614,
      "learning_rate": 1.3187667613318137e-06,
      "loss": 0.0101,
      "step": 2654980
    },
    {
      "epoch": 4.3449657312307295,
      "grad_norm": 0.2680121064186096,
      "learning_rate": 1.3187008691182965e-06,
      "loss": 0.0067,
      "step": 2655000
    },
    {
      "epoch": 4.344998461669383,
      "grad_norm": 0.3661746382713318,
      "learning_rate": 1.3186349769047794e-06,
      "loss": 0.0092,
      "step": 2655020
    },
    {
      "epoch": 4.345031192108037,
      "grad_norm": 0.1573420912027359,
      "learning_rate": 1.3185690846912622e-06,
      "loss": 0.014,
      "step": 2655040
    },
    {
      "epoch": 4.34506392254669,
      "grad_norm": 0.1370518058538437,
      "learning_rate": 1.3185031924777449e-06,
      "loss": 0.0059,
      "step": 2655060
    },
    {
      "epoch": 4.345096652985343,
      "grad_norm": 0.24583163857460022,
      "learning_rate": 1.3184373002642278e-06,
      "loss": 0.0089,
      "step": 2655080
    },
    {
      "epoch": 4.345129383423997,
      "grad_norm": 0.7137806415557861,
      "learning_rate": 1.3183714080507106e-06,
      "loss": 0.0111,
      "step": 2655100
    },
    {
      "epoch": 4.34516211386265,
      "grad_norm": 0.13487713038921356,
      "learning_rate": 1.3183055158371935e-06,
      "loss": 0.0077,
      "step": 2655120
    },
    {
      "epoch": 4.345194844301304,
      "grad_norm": 0.23107768595218658,
      "learning_rate": 1.3182396236236767e-06,
      "loss": 0.0141,
      "step": 2655140
    },
    {
      "epoch": 4.3452275747399565,
      "grad_norm": 0.31946995854377747,
      "learning_rate": 1.3181737314101594e-06,
      "loss": 0.0091,
      "step": 2655160
    },
    {
      "epoch": 4.34526030517861,
      "grad_norm": 0.39445099234580994,
      "learning_rate": 1.3181078391966424e-06,
      "loss": 0.0143,
      "step": 2655180
    },
    {
      "epoch": 4.345293035617264,
      "grad_norm": 0.4912779927253723,
      "learning_rate": 1.3180419469831251e-06,
      "loss": 0.0113,
      "step": 2655200
    },
    {
      "epoch": 4.345325766055916,
      "grad_norm": 0.445199579000473,
      "learning_rate": 1.317976054769608e-06,
      "loss": 0.0139,
      "step": 2655220
    },
    {
      "epoch": 4.34535849649457,
      "grad_norm": 0.09268566966056824,
      "learning_rate": 1.3179101625560908e-06,
      "loss": 0.0133,
      "step": 2655240
    },
    {
      "epoch": 4.3453912269332235,
      "grad_norm": 0.8250933885574341,
      "learning_rate": 1.3178442703425738e-06,
      "loss": 0.0107,
      "step": 2655260
    },
    {
      "epoch": 4.345423957371876,
      "grad_norm": 0.185590922832489,
      "learning_rate": 1.3177783781290565e-06,
      "loss": 0.0067,
      "step": 2655280
    },
    {
      "epoch": 4.34545668781053,
      "grad_norm": 0.29299846291542053,
      "learning_rate": 1.3177124859155393e-06,
      "loss": 0.0076,
      "step": 2655300
    },
    {
      "epoch": 4.3454894182491834,
      "grad_norm": 0.05251799151301384,
      "learning_rate": 1.3176465937020224e-06,
      "loss": 0.0089,
      "step": 2655320
    },
    {
      "epoch": 4.345522148687837,
      "grad_norm": 0.38868582248687744,
      "learning_rate": 1.3175807014885054e-06,
      "loss": 0.0093,
      "step": 2655340
    },
    {
      "epoch": 4.34555487912649,
      "grad_norm": 0.8250215649604797,
      "learning_rate": 1.317514809274988e-06,
      "loss": 0.0158,
      "step": 2655360
    },
    {
      "epoch": 4.345587609565143,
      "grad_norm": 0.08108813315629959,
      "learning_rate": 1.317448917061471e-06,
      "loss": 0.0091,
      "step": 2655380
    },
    {
      "epoch": 4.345620340003797,
      "grad_norm": 0.05274774134159088,
      "learning_rate": 1.3173830248479538e-06,
      "loss": 0.0066,
      "step": 2655400
    },
    {
      "epoch": 4.3456530704424505,
      "grad_norm": 0.18003509938716888,
      "learning_rate": 1.3173171326344367e-06,
      "loss": 0.0072,
      "step": 2655420
    },
    {
      "epoch": 4.345685800881103,
      "grad_norm": 0.5454428195953369,
      "learning_rate": 1.3172512404209195e-06,
      "loss": 0.0095,
      "step": 2655440
    },
    {
      "epoch": 4.345718531319757,
      "grad_norm": 0.16854752600193024,
      "learning_rate": 1.3171853482074024e-06,
      "loss": 0.0067,
      "step": 2655460
    },
    {
      "epoch": 4.34575126175841,
      "grad_norm": 0.10234026610851288,
      "learning_rate": 1.3171194559938852e-06,
      "loss": 0.0078,
      "step": 2655480
    },
    {
      "epoch": 4.345783992197063,
      "grad_norm": 0.18861128389835358,
      "learning_rate": 1.3170535637803683e-06,
      "loss": 0.0159,
      "step": 2655500
    },
    {
      "epoch": 4.345816722635717,
      "grad_norm": 0.16200867295265198,
      "learning_rate": 1.316987671566851e-06,
      "loss": 0.0094,
      "step": 2655520
    },
    {
      "epoch": 4.34584945307437,
      "grad_norm": 0.30190345644950867,
      "learning_rate": 1.316921779353334e-06,
      "loss": 0.0168,
      "step": 2655540
    },
    {
      "epoch": 4.345882183513023,
      "grad_norm": 0.30355900526046753,
      "learning_rate": 1.3168558871398168e-06,
      "loss": 0.0096,
      "step": 2655560
    },
    {
      "epoch": 4.345914913951677,
      "grad_norm": 0.14640438556671143,
      "learning_rate": 1.3167899949262997e-06,
      "loss": 0.0065,
      "step": 2655580
    },
    {
      "epoch": 4.34594764439033,
      "grad_norm": 0.21056726574897766,
      "learning_rate": 1.3167241027127825e-06,
      "loss": 0.009,
      "step": 2655600
    },
    {
      "epoch": 4.345980374828984,
      "grad_norm": 0.23449872434139252,
      "learning_rate": 1.3166582104992654e-06,
      "loss": 0.0099,
      "step": 2655620
    },
    {
      "epoch": 4.3460131052676365,
      "grad_norm": 0.23412229120731354,
      "learning_rate": 1.3165923182857482e-06,
      "loss": 0.013,
      "step": 2655640
    },
    {
      "epoch": 4.34604583570629,
      "grad_norm": 0.35479259490966797,
      "learning_rate": 1.3165264260722311e-06,
      "loss": 0.01,
      "step": 2655660
    },
    {
      "epoch": 4.346078566144944,
      "grad_norm": 0.20601043105125427,
      "learning_rate": 1.316460533858714e-06,
      "loss": 0.0084,
      "step": 2655680
    },
    {
      "epoch": 4.346111296583596,
      "grad_norm": 0.8150526285171509,
      "learning_rate": 1.316394641645197e-06,
      "loss": 0.0089,
      "step": 2655700
    },
    {
      "epoch": 4.34614402702225,
      "grad_norm": 0.29855918884277344,
      "learning_rate": 1.3163287494316798e-06,
      "loss": 0.0102,
      "step": 2655720
    },
    {
      "epoch": 4.346176757460904,
      "grad_norm": 0.28411534428596497,
      "learning_rate": 1.3162628572181627e-06,
      "loss": 0.0114,
      "step": 2655740
    },
    {
      "epoch": 4.346209487899557,
      "grad_norm": 0.5641933083534241,
      "learning_rate": 1.3161969650046454e-06,
      "loss": 0.0076,
      "step": 2655760
    },
    {
      "epoch": 4.34624221833821,
      "grad_norm": 0.08343802392482758,
      "learning_rate": 1.3161310727911284e-06,
      "loss": 0.0095,
      "step": 2655780
    },
    {
      "epoch": 4.3462749487768635,
      "grad_norm": 0.10981421917676926,
      "learning_rate": 1.3160651805776111e-06,
      "loss": 0.0086,
      "step": 2655800
    },
    {
      "epoch": 4.346307679215517,
      "grad_norm": 0.11894068866968155,
      "learning_rate": 1.315999288364094e-06,
      "loss": 0.0085,
      "step": 2655820
    },
    {
      "epoch": 4.34634040965417,
      "grad_norm": 0.07921843975782394,
      "learning_rate": 1.3159333961505768e-06,
      "loss": 0.0092,
      "step": 2655840
    },
    {
      "epoch": 4.346373140092823,
      "grad_norm": 0.19900347292423248,
      "learning_rate": 1.31586750393706e-06,
      "loss": 0.0099,
      "step": 2655860
    },
    {
      "epoch": 4.346405870531477,
      "grad_norm": 0.2912503480911255,
      "learning_rate": 1.3158016117235427e-06,
      "loss": 0.0075,
      "step": 2655880
    },
    {
      "epoch": 4.346438600970131,
      "grad_norm": 0.09849748760461807,
      "learning_rate": 1.3157357195100257e-06,
      "loss": 0.0091,
      "step": 2655900
    },
    {
      "epoch": 4.346471331408783,
      "grad_norm": 0.17966800928115845,
      "learning_rate": 1.3156698272965084e-06,
      "loss": 0.008,
      "step": 2655920
    },
    {
      "epoch": 4.346504061847437,
      "grad_norm": 0.26284924149513245,
      "learning_rate": 1.3156039350829914e-06,
      "loss": 0.0097,
      "step": 2655940
    },
    {
      "epoch": 4.3465367922860905,
      "grad_norm": 0.2524690330028534,
      "learning_rate": 1.3155380428694741e-06,
      "loss": 0.0076,
      "step": 2655960
    },
    {
      "epoch": 4.346569522724743,
      "grad_norm": 0.307784765958786,
      "learning_rate": 1.315472150655957e-06,
      "loss": 0.0121,
      "step": 2655980
    },
    {
      "epoch": 4.346602253163397,
      "grad_norm": 0.3094483017921448,
      "learning_rate": 1.3154062584424398e-06,
      "loss": 0.0127,
      "step": 2656000
    },
    {
      "epoch": 4.34663498360205,
      "grad_norm": 0.4512327313423157,
      "learning_rate": 1.315340366228923e-06,
      "loss": 0.0121,
      "step": 2656020
    },
    {
      "epoch": 4.346667714040704,
      "grad_norm": 0.17728465795516968,
      "learning_rate": 1.315274474015406e-06,
      "loss": 0.0053,
      "step": 2656040
    },
    {
      "epoch": 4.346700444479357,
      "grad_norm": 0.320046603679657,
      "learning_rate": 1.3152085818018887e-06,
      "loss": 0.0104,
      "step": 2656060
    },
    {
      "epoch": 4.34673317491801,
      "grad_norm": 0.10464300215244293,
      "learning_rate": 1.3151426895883714e-06,
      "loss": 0.0084,
      "step": 2656080
    },
    {
      "epoch": 4.346765905356664,
      "grad_norm": 0.21132034063339233,
      "learning_rate": 1.3150767973748543e-06,
      "loss": 0.0068,
      "step": 2656100
    },
    {
      "epoch": 4.346798635795317,
      "grad_norm": 0.10422661155462265,
      "learning_rate": 1.315010905161337e-06,
      "loss": 0.0089,
      "step": 2656120
    },
    {
      "epoch": 4.34683136623397,
      "grad_norm": 0.1278727799654007,
      "learning_rate": 1.31494501294782e-06,
      "loss": 0.0133,
      "step": 2656140
    },
    {
      "epoch": 4.346864096672624,
      "grad_norm": 0.3587697744369507,
      "learning_rate": 1.3148791207343028e-06,
      "loss": 0.0145,
      "step": 2656160
    },
    {
      "epoch": 4.346896827111277,
      "grad_norm": 0.17060832679271698,
      "learning_rate": 1.3148132285207857e-06,
      "loss": 0.0073,
      "step": 2656180
    },
    {
      "epoch": 4.34692955754993,
      "grad_norm": 0.3680512309074402,
      "learning_rate": 1.3147473363072689e-06,
      "loss": 0.0073,
      "step": 2656200
    },
    {
      "epoch": 4.346962287988584,
      "grad_norm": 0.1683357059955597,
      "learning_rate": 1.3146814440937516e-06,
      "loss": 0.0062,
      "step": 2656220
    },
    {
      "epoch": 4.346995018427237,
      "grad_norm": 0.2536293864250183,
      "learning_rate": 1.3146155518802346e-06,
      "loss": 0.0087,
      "step": 2656240
    },
    {
      "epoch": 4.34702774886589,
      "grad_norm": 0.08147318661212921,
      "learning_rate": 1.3145496596667173e-06,
      "loss": 0.009,
      "step": 2656260
    },
    {
      "epoch": 4.3470604793045435,
      "grad_norm": 0.33342453837394714,
      "learning_rate": 1.3144837674532e-06,
      "loss": 0.0087,
      "step": 2656280
    },
    {
      "epoch": 4.347093209743197,
      "grad_norm": 0.2939963638782501,
      "learning_rate": 1.314417875239683e-06,
      "loss": 0.0076,
      "step": 2656300
    },
    {
      "epoch": 4.347125940181851,
      "grad_norm": 0.1124635562300682,
      "learning_rate": 1.3143519830261658e-06,
      "loss": 0.008,
      "step": 2656320
    },
    {
      "epoch": 4.347158670620503,
      "grad_norm": 0.3246454894542694,
      "learning_rate": 1.3142860908126487e-06,
      "loss": 0.013,
      "step": 2656340
    },
    {
      "epoch": 4.347191401059157,
      "grad_norm": 0.1649160087108612,
      "learning_rate": 1.3142201985991314e-06,
      "loss": 0.0097,
      "step": 2656360
    },
    {
      "epoch": 4.347224131497811,
      "grad_norm": 0.32619091868400574,
      "learning_rate": 1.3141543063856146e-06,
      "loss": 0.0104,
      "step": 2656380
    },
    {
      "epoch": 4.347256861936463,
      "grad_norm": 0.2557936906814575,
      "learning_rate": 1.3140884141720976e-06,
      "loss": 0.0077,
      "step": 2656400
    },
    {
      "epoch": 4.347289592375117,
      "grad_norm": 0.5938275456428528,
      "learning_rate": 1.3140225219585803e-06,
      "loss": 0.0084,
      "step": 2656420
    },
    {
      "epoch": 4.3473223228137705,
      "grad_norm": 0.11247040331363678,
      "learning_rate": 1.3139566297450633e-06,
      "loss": 0.0129,
      "step": 2656440
    },
    {
      "epoch": 4.347355053252423,
      "grad_norm": 0.49250301718711853,
      "learning_rate": 1.313890737531546e-06,
      "loss": 0.0127,
      "step": 2656460
    },
    {
      "epoch": 4.347387783691077,
      "grad_norm": 0.09930413961410522,
      "learning_rate": 1.313824845318029e-06,
      "loss": 0.013,
      "step": 2656480
    },
    {
      "epoch": 4.34742051412973,
      "grad_norm": 0.1942627876996994,
      "learning_rate": 1.3137589531045117e-06,
      "loss": 0.0075,
      "step": 2656500
    },
    {
      "epoch": 4.347453244568384,
      "grad_norm": 0.1958824098110199,
      "learning_rate": 1.3136930608909944e-06,
      "loss": 0.0065,
      "step": 2656520
    },
    {
      "epoch": 4.347485975007037,
      "grad_norm": 0.07270275801420212,
      "learning_rate": 1.3136271686774774e-06,
      "loss": 0.0096,
      "step": 2656540
    },
    {
      "epoch": 4.34751870544569,
      "grad_norm": 0.15874499082565308,
      "learning_rate": 1.3135612764639605e-06,
      "loss": 0.0094,
      "step": 2656560
    },
    {
      "epoch": 4.347551435884344,
      "grad_norm": 0.14579373598098755,
      "learning_rate": 1.3134953842504433e-06,
      "loss": 0.0083,
      "step": 2656580
    },
    {
      "epoch": 4.3475841663229975,
      "grad_norm": 0.2187991589307785,
      "learning_rate": 1.3134294920369262e-06,
      "loss": 0.0109,
      "step": 2656600
    },
    {
      "epoch": 4.34761689676165,
      "grad_norm": 0.14707963168621063,
      "learning_rate": 1.313363599823409e-06,
      "loss": 0.0103,
      "step": 2656620
    },
    {
      "epoch": 4.347649627200304,
      "grad_norm": 0.11687172949314117,
      "learning_rate": 1.313297707609892e-06,
      "loss": 0.0068,
      "step": 2656640
    },
    {
      "epoch": 4.347682357638957,
      "grad_norm": 0.24548973143100739,
      "learning_rate": 1.3132318153963747e-06,
      "loss": 0.0053,
      "step": 2656660
    },
    {
      "epoch": 4.34771508807761,
      "grad_norm": 0.38862231373786926,
      "learning_rate": 1.3131659231828576e-06,
      "loss": 0.0085,
      "step": 2656680
    },
    {
      "epoch": 4.347747818516264,
      "grad_norm": 0.3318473994731903,
      "learning_rate": 1.3131000309693404e-06,
      "loss": 0.01,
      "step": 2656700
    },
    {
      "epoch": 4.347780548954917,
      "grad_norm": 0.19520431756973267,
      "learning_rate": 1.313034138755823e-06,
      "loss": 0.0107,
      "step": 2656720
    },
    {
      "epoch": 4.34781327939357,
      "grad_norm": 0.36922162771224976,
      "learning_rate": 1.3129682465423063e-06,
      "loss": 0.0076,
      "step": 2656740
    },
    {
      "epoch": 4.347846009832224,
      "grad_norm": 0.279028058052063,
      "learning_rate": 1.3129023543287892e-06,
      "loss": 0.0076,
      "step": 2656760
    },
    {
      "epoch": 4.347878740270877,
      "grad_norm": 1.0283294916152954,
      "learning_rate": 1.312836462115272e-06,
      "loss": 0.0109,
      "step": 2656780
    },
    {
      "epoch": 4.347911470709531,
      "grad_norm": 0.2643623650074005,
      "learning_rate": 1.312770569901755e-06,
      "loss": 0.0146,
      "step": 2656800
    },
    {
      "epoch": 4.3479442011481835,
      "grad_norm": 0.11264308542013168,
      "learning_rate": 1.3127046776882376e-06,
      "loss": 0.0055,
      "step": 2656820
    },
    {
      "epoch": 4.347976931586837,
      "grad_norm": 0.18371447920799255,
      "learning_rate": 1.3126387854747206e-06,
      "loss": 0.0138,
      "step": 2656840
    },
    {
      "epoch": 4.348009662025491,
      "grad_norm": 0.11314947158098221,
      "learning_rate": 1.3125728932612033e-06,
      "loss": 0.0121,
      "step": 2656860
    },
    {
      "epoch": 4.348042392464144,
      "grad_norm": 0.17486552894115448,
      "learning_rate": 1.3125070010476863e-06,
      "loss": 0.0055,
      "step": 2656880
    },
    {
      "epoch": 4.348075122902797,
      "grad_norm": 0.1767406016588211,
      "learning_rate": 1.3124411088341692e-06,
      "loss": 0.0088,
      "step": 2656900
    },
    {
      "epoch": 4.348107853341451,
      "grad_norm": 0.23932458460330963,
      "learning_rate": 1.3123752166206522e-06,
      "loss": 0.0091,
      "step": 2656920
    },
    {
      "epoch": 4.348140583780104,
      "grad_norm": 0.278335839509964,
      "learning_rate": 1.312309324407135e-06,
      "loss": 0.0095,
      "step": 2656940
    },
    {
      "epoch": 4.348173314218757,
      "grad_norm": 0.16861945390701294,
      "learning_rate": 1.3122434321936179e-06,
      "loss": 0.0079,
      "step": 2656960
    },
    {
      "epoch": 4.3482060446574105,
      "grad_norm": 0.16889303922653198,
      "learning_rate": 1.3121775399801006e-06,
      "loss": 0.0118,
      "step": 2656980
    },
    {
      "epoch": 4.348238775096064,
      "grad_norm": 0.1164737418293953,
      "learning_rate": 1.3121116477665836e-06,
      "loss": 0.0082,
      "step": 2657000
    },
    {
      "epoch": 4.348271505534717,
      "grad_norm": 0.40570518374443054,
      "learning_rate": 1.3120457555530663e-06,
      "loss": 0.0092,
      "step": 2657020
    },
    {
      "epoch": 4.34830423597337,
      "grad_norm": 0.5090230703353882,
      "learning_rate": 1.3119798633395493e-06,
      "loss": 0.0109,
      "step": 2657040
    },
    {
      "epoch": 4.348336966412024,
      "grad_norm": 0.32271072268486023,
      "learning_rate": 1.311913971126032e-06,
      "loss": 0.0076,
      "step": 2657060
    },
    {
      "epoch": 4.3483696968506775,
      "grad_norm": 0.11807886511087418,
      "learning_rate": 1.3118480789125152e-06,
      "loss": 0.0054,
      "step": 2657080
    },
    {
      "epoch": 4.34840242728933,
      "grad_norm": 0.0889405906200409,
      "learning_rate": 1.311782186698998e-06,
      "loss": 0.008,
      "step": 2657100
    },
    {
      "epoch": 4.348435157727984,
      "grad_norm": 0.43255195021629333,
      "learning_rate": 1.3117162944854809e-06,
      "loss": 0.0126,
      "step": 2657120
    },
    {
      "epoch": 4.348467888166637,
      "grad_norm": 0.20391759276390076,
      "learning_rate": 1.3116504022719636e-06,
      "loss": 0.007,
      "step": 2657140
    },
    {
      "epoch": 4.34850061860529,
      "grad_norm": 0.08002155274152756,
      "learning_rate": 1.3115845100584465e-06,
      "loss": 0.0068,
      "step": 2657160
    },
    {
      "epoch": 4.348533349043944,
      "grad_norm": 0.06737244129180908,
      "learning_rate": 1.3115186178449293e-06,
      "loss": 0.0065,
      "step": 2657180
    },
    {
      "epoch": 4.348566079482597,
      "grad_norm": 0.20241189002990723,
      "learning_rate": 1.3114527256314122e-06,
      "loss": 0.0066,
      "step": 2657200
    },
    {
      "epoch": 4.348598809921251,
      "grad_norm": 0.3654628098011017,
      "learning_rate": 1.311386833417895e-06,
      "loss": 0.0069,
      "step": 2657220
    },
    {
      "epoch": 4.348631540359904,
      "grad_norm": 0.3098846971988678,
      "learning_rate": 1.311320941204378e-06,
      "loss": 0.0077,
      "step": 2657240
    },
    {
      "epoch": 4.348664270798557,
      "grad_norm": 0.02165396697819233,
      "learning_rate": 1.3112550489908609e-06,
      "loss": 0.0057,
      "step": 2657260
    },
    {
      "epoch": 4.348697001237211,
      "grad_norm": 0.207201287150383,
      "learning_rate": 1.3111891567773438e-06,
      "loss": 0.0089,
      "step": 2657280
    },
    {
      "epoch": 4.3487297316758635,
      "grad_norm": 0.2651427090167999,
      "learning_rate": 1.3111232645638266e-06,
      "loss": 0.0114,
      "step": 2657300
    },
    {
      "epoch": 4.348762462114517,
      "grad_norm": 0.291917085647583,
      "learning_rate": 1.3110573723503095e-06,
      "loss": 0.0096,
      "step": 2657320
    },
    {
      "epoch": 4.348795192553171,
      "grad_norm": 0.1875288039445877,
      "learning_rate": 1.3109914801367923e-06,
      "loss": 0.0115,
      "step": 2657340
    },
    {
      "epoch": 4.348827922991824,
      "grad_norm": 0.09714274108409882,
      "learning_rate": 1.3109255879232752e-06,
      "loss": 0.0079,
      "step": 2657360
    },
    {
      "epoch": 4.348860653430477,
      "grad_norm": 0.43284404277801514,
      "learning_rate": 1.310859695709758e-06,
      "loss": 0.0093,
      "step": 2657380
    },
    {
      "epoch": 4.348893383869131,
      "grad_norm": 0.10283536463975906,
      "learning_rate": 1.310793803496241e-06,
      "loss": 0.0063,
      "step": 2657400
    },
    {
      "epoch": 4.348926114307784,
      "grad_norm": 0.1643829345703125,
      "learning_rate": 1.3107279112827236e-06,
      "loss": 0.0059,
      "step": 2657420
    },
    {
      "epoch": 4.348958844746437,
      "grad_norm": 0.1688450574874878,
      "learning_rate": 1.3106620190692068e-06,
      "loss": 0.0085,
      "step": 2657440
    },
    {
      "epoch": 4.3489915751850905,
      "grad_norm": 0.2408478856086731,
      "learning_rate": 1.3105961268556898e-06,
      "loss": 0.0133,
      "step": 2657460
    },
    {
      "epoch": 4.349024305623744,
      "grad_norm": 0.3419577181339264,
      "learning_rate": 1.3105302346421725e-06,
      "loss": 0.0063,
      "step": 2657480
    },
    {
      "epoch": 4.349057036062398,
      "grad_norm": 0.2686263918876648,
      "learning_rate": 1.3104643424286552e-06,
      "loss": 0.0132,
      "step": 2657500
    },
    {
      "epoch": 4.34908976650105,
      "grad_norm": 0.48767831921577454,
      "learning_rate": 1.3103984502151382e-06,
      "loss": 0.0093,
      "step": 2657520
    },
    {
      "epoch": 4.349122496939704,
      "grad_norm": 0.08807080239057541,
      "learning_rate": 1.310332558001621e-06,
      "loss": 0.0077,
      "step": 2657540
    },
    {
      "epoch": 4.349155227378358,
      "grad_norm": 0.6580416560173035,
      "learning_rate": 1.3102666657881039e-06,
      "loss": 0.0108,
      "step": 2657560
    },
    {
      "epoch": 4.34918795781701,
      "grad_norm": 0.12974804639816284,
      "learning_rate": 1.3102007735745866e-06,
      "loss": 0.0076,
      "step": 2657580
    },
    {
      "epoch": 4.349220688255664,
      "grad_norm": 0.29458853602409363,
      "learning_rate": 1.3101348813610696e-06,
      "loss": 0.0098,
      "step": 2657600
    },
    {
      "epoch": 4.3492534186943175,
      "grad_norm": 0.36391758918762207,
      "learning_rate": 1.3100689891475527e-06,
      "loss": 0.0083,
      "step": 2657620
    },
    {
      "epoch": 4.349286149132971,
      "grad_norm": 0.23349761962890625,
      "learning_rate": 1.3100030969340355e-06,
      "loss": 0.0119,
      "step": 2657640
    },
    {
      "epoch": 4.349318879571624,
      "grad_norm": 0.0750998854637146,
      "learning_rate": 1.3099372047205184e-06,
      "loss": 0.01,
      "step": 2657660
    },
    {
      "epoch": 4.349351610010277,
      "grad_norm": 0.07149965316057205,
      "learning_rate": 1.3098713125070012e-06,
      "loss": 0.0072,
      "step": 2657680
    },
    {
      "epoch": 4.349384340448931,
      "grad_norm": 0.3075006902217865,
      "learning_rate": 1.3098054202934841e-06,
      "loss": 0.0086,
      "step": 2657700
    },
    {
      "epoch": 4.349417070887584,
      "grad_norm": 0.05206923186779022,
      "learning_rate": 1.3097395280799669e-06,
      "loss": 0.0061,
      "step": 2657720
    },
    {
      "epoch": 4.349449801326237,
      "grad_norm": 0.35326191782951355,
      "learning_rate": 1.3096736358664496e-06,
      "loss": 0.0143,
      "step": 2657740
    },
    {
      "epoch": 4.349482531764891,
      "grad_norm": 0.29918429255485535,
      "learning_rate": 1.3096077436529325e-06,
      "loss": 0.0093,
      "step": 2657760
    },
    {
      "epoch": 4.3495152622035445,
      "grad_norm": 0.19144710898399353,
      "learning_rate": 1.3095418514394157e-06,
      "loss": 0.0082,
      "step": 2657780
    },
    {
      "epoch": 4.349547992642197,
      "grad_norm": 0.3411364257335663,
      "learning_rate": 1.3094759592258984e-06,
      "loss": 0.0122,
      "step": 2657800
    },
    {
      "epoch": 4.349580723080851,
      "grad_norm": 0.20852099359035492,
      "learning_rate": 1.3094100670123814e-06,
      "loss": 0.0086,
      "step": 2657820
    },
    {
      "epoch": 4.349613453519504,
      "grad_norm": 0.12250693142414093,
      "learning_rate": 1.3093441747988641e-06,
      "loss": 0.0079,
      "step": 2657840
    },
    {
      "epoch": 4.349646183958157,
      "grad_norm": 0.5252413153648376,
      "learning_rate": 1.309278282585347e-06,
      "loss": 0.0069,
      "step": 2657860
    },
    {
      "epoch": 4.349678914396811,
      "grad_norm": 0.3809281587600708,
      "learning_rate": 1.3092123903718298e-06,
      "loss": 0.0137,
      "step": 2657880
    },
    {
      "epoch": 4.349711644835464,
      "grad_norm": 0.1861937791109085,
      "learning_rate": 1.3091464981583128e-06,
      "loss": 0.0095,
      "step": 2657900
    },
    {
      "epoch": 4.349744375274118,
      "grad_norm": 0.14653749763965607,
      "learning_rate": 1.3090806059447955e-06,
      "loss": 0.0103,
      "step": 2657920
    },
    {
      "epoch": 4.3497771057127705,
      "grad_norm": 0.3966936469078064,
      "learning_rate": 1.3090147137312783e-06,
      "loss": 0.0093,
      "step": 2657940
    },
    {
      "epoch": 4.349809836151424,
      "grad_norm": 0.14524638652801514,
      "learning_rate": 1.3089488215177614e-06,
      "loss": 0.0091,
      "step": 2657960
    },
    {
      "epoch": 4.349842566590078,
      "grad_norm": 0.5392890572547913,
      "learning_rate": 1.3088829293042444e-06,
      "loss": 0.0133,
      "step": 2657980
    },
    {
      "epoch": 4.34987529702873,
      "grad_norm": 0.17489810287952423,
      "learning_rate": 1.3088170370907271e-06,
      "loss": 0.006,
      "step": 2658000
    },
    {
      "epoch": 4.349908027467384,
      "grad_norm": 0.11360017955303192,
      "learning_rate": 1.30875114487721e-06,
      "loss": 0.0098,
      "step": 2658020
    },
    {
      "epoch": 4.349940757906038,
      "grad_norm": 0.41750192642211914,
      "learning_rate": 1.3086852526636928e-06,
      "loss": 0.0103,
      "step": 2658040
    },
    {
      "epoch": 4.349973488344691,
      "grad_norm": 0.1988721638917923,
      "learning_rate": 1.3086193604501758e-06,
      "loss": 0.0106,
      "step": 2658060
    },
    {
      "epoch": 4.350006218783344,
      "grad_norm": 0.03147977963089943,
      "learning_rate": 1.3085534682366585e-06,
      "loss": 0.0084,
      "step": 2658080
    },
    {
      "epoch": 4.3500389492219975,
      "grad_norm": 0.5143508315086365,
      "learning_rate": 1.3084875760231415e-06,
      "loss": 0.0081,
      "step": 2658100
    },
    {
      "epoch": 4.350071679660651,
      "grad_norm": 0.0655655488371849,
      "learning_rate": 1.3084216838096242e-06,
      "loss": 0.0066,
      "step": 2658120
    },
    {
      "epoch": 4.350104410099304,
      "grad_norm": 0.21861594915390015,
      "learning_rate": 1.3083557915961074e-06,
      "loss": 0.0091,
      "step": 2658140
    },
    {
      "epoch": 4.350137140537957,
      "grad_norm": 0.3930906653404236,
      "learning_rate": 1.30828989938259e-06,
      "loss": 0.0113,
      "step": 2658160
    },
    {
      "epoch": 4.350169870976611,
      "grad_norm": 0.13757579028606415,
      "learning_rate": 1.308224007169073e-06,
      "loss": 0.0095,
      "step": 2658180
    },
    {
      "epoch": 4.350202601415264,
      "grad_norm": 0.14962440729141235,
      "learning_rate": 1.3081581149555558e-06,
      "loss": 0.0066,
      "step": 2658200
    },
    {
      "epoch": 4.350235331853917,
      "grad_norm": 0.2828698456287384,
      "learning_rate": 1.3080922227420387e-06,
      "loss": 0.0093,
      "step": 2658220
    },
    {
      "epoch": 4.350268062292571,
      "grad_norm": 0.14992229640483856,
      "learning_rate": 1.3080263305285215e-06,
      "loss": 0.0077,
      "step": 2658240
    },
    {
      "epoch": 4.3503007927312245,
      "grad_norm": 0.10060688108205795,
      "learning_rate": 1.3079604383150044e-06,
      "loss": 0.0076,
      "step": 2658260
    },
    {
      "epoch": 4.350333523169877,
      "grad_norm": 0.17928539216518402,
      "learning_rate": 1.3078945461014872e-06,
      "loss": 0.0097,
      "step": 2658280
    },
    {
      "epoch": 4.350366253608531,
      "grad_norm": 0.11870956420898438,
      "learning_rate": 1.3078286538879701e-06,
      "loss": 0.0054,
      "step": 2658300
    },
    {
      "epoch": 4.350398984047184,
      "grad_norm": 0.6243616938591003,
      "learning_rate": 1.307762761674453e-06,
      "loss": 0.0102,
      "step": 2658320
    },
    {
      "epoch": 4.350431714485838,
      "grad_norm": 0.882490336894989,
      "learning_rate": 1.307696869460936e-06,
      "loss": 0.0105,
      "step": 2658340
    },
    {
      "epoch": 4.350464444924491,
      "grad_norm": 0.3304876983165741,
      "learning_rate": 1.3076309772474188e-06,
      "loss": 0.0116,
      "step": 2658360
    },
    {
      "epoch": 4.350497175363144,
      "grad_norm": 0.1706889122724533,
      "learning_rate": 1.3075650850339017e-06,
      "loss": 0.009,
      "step": 2658380
    },
    {
      "epoch": 4.350529905801798,
      "grad_norm": 0.06485595554113388,
      "learning_rate": 1.3074991928203845e-06,
      "loss": 0.0203,
      "step": 2658400
    },
    {
      "epoch": 4.350562636240451,
      "grad_norm": 0.15230906009674072,
      "learning_rate": 1.3074333006068674e-06,
      "loss": 0.0066,
      "step": 2658420
    },
    {
      "epoch": 4.350595366679104,
      "grad_norm": 0.14981505274772644,
      "learning_rate": 1.3073674083933501e-06,
      "loss": 0.0136,
      "step": 2658440
    },
    {
      "epoch": 4.350628097117758,
      "grad_norm": 0.18581955134868622,
      "learning_rate": 1.307301516179833e-06,
      "loss": 0.0089,
      "step": 2658460
    },
    {
      "epoch": 4.3506608275564105,
      "grad_norm": 0.19730937480926514,
      "learning_rate": 1.3072356239663158e-06,
      "loss": 0.0104,
      "step": 2658480
    },
    {
      "epoch": 4.350693557995064,
      "grad_norm": 0.10911183059215546,
      "learning_rate": 1.307169731752799e-06,
      "loss": 0.0073,
      "step": 2658500
    },
    {
      "epoch": 4.350726288433718,
      "grad_norm": 0.10097289830446243,
      "learning_rate": 1.3071038395392817e-06,
      "loss": 0.0092,
      "step": 2658520
    },
    {
      "epoch": 4.350759018872371,
      "grad_norm": 0.4917392134666443,
      "learning_rate": 1.3070379473257647e-06,
      "loss": 0.0076,
      "step": 2658540
    },
    {
      "epoch": 4.350791749311024,
      "grad_norm": 0.3822026252746582,
      "learning_rate": 1.3069720551122474e-06,
      "loss": 0.0077,
      "step": 2658560
    },
    {
      "epoch": 4.350824479749678,
      "grad_norm": 0.36353787779808044,
      "learning_rate": 1.3069061628987304e-06,
      "loss": 0.0096,
      "step": 2658580
    },
    {
      "epoch": 4.350857210188331,
      "grad_norm": 0.11558140069246292,
      "learning_rate": 1.3068402706852131e-06,
      "loss": 0.0096,
      "step": 2658600
    },
    {
      "epoch": 4.350889940626985,
      "grad_norm": 0.3942325711250305,
      "learning_rate": 1.306774378471696e-06,
      "loss": 0.0072,
      "step": 2658620
    },
    {
      "epoch": 4.3509226710656375,
      "grad_norm": 0.2432781457901001,
      "learning_rate": 1.3067084862581788e-06,
      "loss": 0.007,
      "step": 2658640
    },
    {
      "epoch": 4.350955401504291,
      "grad_norm": 0.22590067982673645,
      "learning_rate": 1.306642594044662e-06,
      "loss": 0.0061,
      "step": 2658660
    },
    {
      "epoch": 4.350988131942945,
      "grad_norm": 0.13900381326675415,
      "learning_rate": 1.306576701831145e-06,
      "loss": 0.0071,
      "step": 2658680
    },
    {
      "epoch": 4.351020862381597,
      "grad_norm": 0.2581966519355774,
      "learning_rate": 1.3065108096176277e-06,
      "loss": 0.0084,
      "step": 2658700
    },
    {
      "epoch": 4.351053592820251,
      "grad_norm": 0.20149248838424683,
      "learning_rate": 1.3064449174041104e-06,
      "loss": 0.0118,
      "step": 2658720
    },
    {
      "epoch": 4.3510863232589045,
      "grad_norm": 0.20542919635772705,
      "learning_rate": 1.3063790251905934e-06,
      "loss": 0.0159,
      "step": 2658740
    },
    {
      "epoch": 4.351119053697557,
      "grad_norm": 0.30179017782211304,
      "learning_rate": 1.306313132977076e-06,
      "loss": 0.0115,
      "step": 2658760
    },
    {
      "epoch": 4.351151784136211,
      "grad_norm": 0.32044851779937744,
      "learning_rate": 1.306247240763559e-06,
      "loss": 0.008,
      "step": 2658780
    },
    {
      "epoch": 4.351184514574864,
      "grad_norm": 0.07951007783412933,
      "learning_rate": 1.3061813485500418e-06,
      "loss": 0.0105,
      "step": 2658800
    },
    {
      "epoch": 4.351217245013518,
      "grad_norm": 0.25147613883018494,
      "learning_rate": 1.3061154563365247e-06,
      "loss": 0.0071,
      "step": 2658820
    },
    {
      "epoch": 4.351249975452171,
      "grad_norm": 0.3153103291988373,
      "learning_rate": 1.306049564123008e-06,
      "loss": 0.0122,
      "step": 2658840
    },
    {
      "epoch": 4.351282705890824,
      "grad_norm": 0.18953737616539001,
      "learning_rate": 1.3059836719094906e-06,
      "loss": 0.0095,
      "step": 2658860
    },
    {
      "epoch": 4.351315436329478,
      "grad_norm": 0.17993107438087463,
      "learning_rate": 1.3059177796959736e-06,
      "loss": 0.0097,
      "step": 2658880
    },
    {
      "epoch": 4.351348166768131,
      "grad_norm": 0.12619821727275848,
      "learning_rate": 1.3058518874824563e-06,
      "loss": 0.01,
      "step": 2658900
    },
    {
      "epoch": 4.351380897206784,
      "grad_norm": 0.4697582423686981,
      "learning_rate": 1.305785995268939e-06,
      "loss": 0.0067,
      "step": 2658920
    },
    {
      "epoch": 4.351413627645438,
      "grad_norm": 0.35488584637641907,
      "learning_rate": 1.305720103055422e-06,
      "loss": 0.0106,
      "step": 2658940
    },
    {
      "epoch": 4.351446358084091,
      "grad_norm": 0.34786662459373474,
      "learning_rate": 1.3056542108419048e-06,
      "loss": 0.0129,
      "step": 2658960
    },
    {
      "epoch": 4.351479088522744,
      "grad_norm": 0.1146516352891922,
      "learning_rate": 1.3055883186283877e-06,
      "loss": 0.009,
      "step": 2658980
    },
    {
      "epoch": 4.351511818961398,
      "grad_norm": 0.2300647497177124,
      "learning_rate": 1.3055224264148705e-06,
      "loss": 0.0067,
      "step": 2659000
    },
    {
      "epoch": 4.351544549400051,
      "grad_norm": 0.15431469678878784,
      "learning_rate": 1.3054565342013536e-06,
      "loss": 0.0083,
      "step": 2659020
    },
    {
      "epoch": 4.351577279838704,
      "grad_norm": 0.20543518662452698,
      "learning_rate": 1.3053906419878366e-06,
      "loss": 0.0124,
      "step": 2659040
    },
    {
      "epoch": 4.351610010277358,
      "grad_norm": 0.11606497317552567,
      "learning_rate": 1.3053247497743193e-06,
      "loss": 0.0096,
      "step": 2659060
    },
    {
      "epoch": 4.351642740716011,
      "grad_norm": 0.07313107699155807,
      "learning_rate": 1.3052588575608023e-06,
      "loss": 0.0085,
      "step": 2659080
    },
    {
      "epoch": 4.351675471154665,
      "grad_norm": 0.4356180727481842,
      "learning_rate": 1.305192965347285e-06,
      "loss": 0.0086,
      "step": 2659100
    },
    {
      "epoch": 4.3517082015933175,
      "grad_norm": 0.3049607574939728,
      "learning_rate": 1.305127073133768e-06,
      "loss": 0.0104,
      "step": 2659120
    },
    {
      "epoch": 4.351740932031971,
      "grad_norm": 0.44392621517181396,
      "learning_rate": 1.3050611809202507e-06,
      "loss": 0.0154,
      "step": 2659140
    },
    {
      "epoch": 4.351773662470625,
      "grad_norm": 0.10046383738517761,
      "learning_rate": 1.3049952887067334e-06,
      "loss": 0.0052,
      "step": 2659160
    },
    {
      "epoch": 4.351806392909277,
      "grad_norm": 0.24836833775043488,
      "learning_rate": 1.3049293964932164e-06,
      "loss": 0.0099,
      "step": 2659180
    },
    {
      "epoch": 4.351839123347931,
      "grad_norm": 0.19885537028312683,
      "learning_rate": 1.3048635042796995e-06,
      "loss": 0.0076,
      "step": 2659200
    },
    {
      "epoch": 4.351871853786585,
      "grad_norm": 0.4697747826576233,
      "learning_rate": 1.3047976120661823e-06,
      "loss": 0.0077,
      "step": 2659220
    },
    {
      "epoch": 4.351904584225238,
      "grad_norm": 0.1049443706870079,
      "learning_rate": 1.3047317198526652e-06,
      "loss": 0.01,
      "step": 2659240
    },
    {
      "epoch": 4.351937314663891,
      "grad_norm": 0.34245580434799194,
      "learning_rate": 1.304665827639148e-06,
      "loss": 0.0109,
      "step": 2659260
    },
    {
      "epoch": 4.3519700451025445,
      "grad_norm": 0.22567610442638397,
      "learning_rate": 1.304599935425631e-06,
      "loss": 0.0089,
      "step": 2659280
    },
    {
      "epoch": 4.352002775541198,
      "grad_norm": 0.5122858285903931,
      "learning_rate": 1.3045340432121137e-06,
      "loss": 0.0087,
      "step": 2659300
    },
    {
      "epoch": 4.352035505979851,
      "grad_norm": 0.1326749175786972,
      "learning_rate": 1.3044681509985966e-06,
      "loss": 0.0091,
      "step": 2659320
    },
    {
      "epoch": 4.352068236418504,
      "grad_norm": 0.23345482349395752,
      "learning_rate": 1.3044022587850794e-06,
      "loss": 0.0136,
      "step": 2659340
    },
    {
      "epoch": 4.352100966857158,
      "grad_norm": 0.2628231644630432,
      "learning_rate": 1.304336366571562e-06,
      "loss": 0.008,
      "step": 2659360
    },
    {
      "epoch": 4.352133697295812,
      "grad_norm": 0.06771662831306458,
      "learning_rate": 1.3042704743580453e-06,
      "loss": 0.0086,
      "step": 2659380
    },
    {
      "epoch": 4.352166427734464,
      "grad_norm": 0.1823098510503769,
      "learning_rate": 1.3042045821445282e-06,
      "loss": 0.0108,
      "step": 2659400
    },
    {
      "epoch": 4.352199158173118,
      "grad_norm": 0.21313777565956116,
      "learning_rate": 1.304138689931011e-06,
      "loss": 0.0068,
      "step": 2659420
    },
    {
      "epoch": 4.3522318886117715,
      "grad_norm": 0.22928792238235474,
      "learning_rate": 1.304072797717494e-06,
      "loss": 0.0056,
      "step": 2659440
    },
    {
      "epoch": 4.352264619050424,
      "grad_norm": 0.14125756919384003,
      "learning_rate": 1.3040069055039766e-06,
      "loss": 0.0125,
      "step": 2659460
    },
    {
      "epoch": 4.352297349489078,
      "grad_norm": 0.07082004845142365,
      "learning_rate": 1.3039410132904596e-06,
      "loss": 0.0072,
      "step": 2659480
    },
    {
      "epoch": 4.352330079927731,
      "grad_norm": 0.19697846472263336,
      "learning_rate": 1.3038751210769423e-06,
      "loss": 0.0115,
      "step": 2659500
    },
    {
      "epoch": 4.352362810366385,
      "grad_norm": 0.1452861726284027,
      "learning_rate": 1.3038092288634253e-06,
      "loss": 0.0083,
      "step": 2659520
    },
    {
      "epoch": 4.352395540805038,
      "grad_norm": 0.12037626653909683,
      "learning_rate": 1.3037433366499082e-06,
      "loss": 0.012,
      "step": 2659540
    },
    {
      "epoch": 4.352428271243691,
      "grad_norm": 0.14530380070209503,
      "learning_rate": 1.3036774444363912e-06,
      "loss": 0.0072,
      "step": 2659560
    },
    {
      "epoch": 4.352461001682345,
      "grad_norm": 0.1480659544467926,
      "learning_rate": 1.303611552222874e-06,
      "loss": 0.0077,
      "step": 2659580
    },
    {
      "epoch": 4.3524937321209975,
      "grad_norm": 0.14722023904323578,
      "learning_rate": 1.3035456600093569e-06,
      "loss": 0.0113,
      "step": 2659600
    },
    {
      "epoch": 4.352526462559651,
      "grad_norm": 0.09688951820135117,
      "learning_rate": 1.3034797677958396e-06,
      "loss": 0.0075,
      "step": 2659620
    },
    {
      "epoch": 4.352559192998305,
      "grad_norm": 0.14875447750091553,
      "learning_rate": 1.3034138755823226e-06,
      "loss": 0.0121,
      "step": 2659640
    },
    {
      "epoch": 4.352591923436957,
      "grad_norm": 0.21333295106887817,
      "learning_rate": 1.3033479833688053e-06,
      "loss": 0.0076,
      "step": 2659660
    },
    {
      "epoch": 4.352624653875611,
      "grad_norm": 1.365991473197937,
      "learning_rate": 1.3032820911552883e-06,
      "loss": 0.0094,
      "step": 2659680
    },
    {
      "epoch": 4.352657384314265,
      "grad_norm": 0.4443615674972534,
      "learning_rate": 1.303216198941771e-06,
      "loss": 0.0066,
      "step": 2659700
    },
    {
      "epoch": 4.352690114752918,
      "grad_norm": 0.06753858178853989,
      "learning_rate": 1.3031503067282542e-06,
      "loss": 0.0069,
      "step": 2659720
    },
    {
      "epoch": 4.352722845191571,
      "grad_norm": 0.21295253932476044,
      "learning_rate": 1.303084414514737e-06,
      "loss": 0.0085,
      "step": 2659740
    },
    {
      "epoch": 4.3527555756302245,
      "grad_norm": 0.2601534426212311,
      "learning_rate": 1.3030185223012199e-06,
      "loss": 0.0065,
      "step": 2659760
    },
    {
      "epoch": 4.352788306068878,
      "grad_norm": 0.7933477163314819,
      "learning_rate": 1.3029526300877026e-06,
      "loss": 0.0102,
      "step": 2659780
    },
    {
      "epoch": 4.352821036507532,
      "grad_norm": 0.2159263640642166,
      "learning_rate": 1.3028867378741856e-06,
      "loss": 0.0083,
      "step": 2659800
    },
    {
      "epoch": 4.352853766946184,
      "grad_norm": 0.11894822865724564,
      "learning_rate": 1.3028208456606683e-06,
      "loss": 0.0077,
      "step": 2659820
    },
    {
      "epoch": 4.352886497384838,
      "grad_norm": 0.27107733488082886,
      "learning_rate": 1.3027549534471512e-06,
      "loss": 0.0069,
      "step": 2659840
    },
    {
      "epoch": 4.352919227823492,
      "grad_norm": 0.26682141423225403,
      "learning_rate": 1.302689061233634e-06,
      "loss": 0.0109,
      "step": 2659860
    },
    {
      "epoch": 4.352951958262144,
      "grad_norm": 0.42626991868019104,
      "learning_rate": 1.302623169020117e-06,
      "loss": 0.0134,
      "step": 2659880
    },
    {
      "epoch": 4.352984688700798,
      "grad_norm": 0.20338642597198486,
      "learning_rate": 1.3025572768065999e-06,
      "loss": 0.0093,
      "step": 2659900
    },
    {
      "epoch": 4.3530174191394515,
      "grad_norm": 0.21873824298381805,
      "learning_rate": 1.3024913845930828e-06,
      "loss": 0.007,
      "step": 2659920
    },
    {
      "epoch": 4.353050149578104,
      "grad_norm": 0.3522131145000458,
      "learning_rate": 1.3024254923795656e-06,
      "loss": 0.0113,
      "step": 2659940
    },
    {
      "epoch": 4.353082880016758,
      "grad_norm": 0.05239614471793175,
      "learning_rate": 1.3023596001660485e-06,
      "loss": 0.0082,
      "step": 2659960
    },
    {
      "epoch": 4.353115610455411,
      "grad_norm": 0.39096924662590027,
      "learning_rate": 1.3022937079525313e-06,
      "loss": 0.0075,
      "step": 2659980
    },
    {
      "epoch": 4.353148340894065,
      "grad_norm": 0.3878544867038727,
      "learning_rate": 1.3022278157390142e-06,
      "loss": 0.0087,
      "step": 2660000
    },
    {
      "epoch": 4.353181071332718,
      "grad_norm": 0.09744326025247574,
      "learning_rate": 1.302161923525497e-06,
      "loss": 0.0092,
      "step": 2660020
    },
    {
      "epoch": 4.353213801771371,
      "grad_norm": 0.4048362970352173,
      "learning_rate": 1.30209603131198e-06,
      "loss": 0.0067,
      "step": 2660040
    },
    {
      "epoch": 4.353246532210025,
      "grad_norm": 0.29912886023521423,
      "learning_rate": 1.3020301390984627e-06,
      "loss": 0.0077,
      "step": 2660060
    },
    {
      "epoch": 4.3532792626486785,
      "grad_norm": 0.508970320224762,
      "learning_rate": 1.3019642468849458e-06,
      "loss": 0.0073,
      "step": 2660080
    },
    {
      "epoch": 4.353311993087331,
      "grad_norm": 0.33046308159828186,
      "learning_rate": 1.3018983546714288e-06,
      "loss": 0.012,
      "step": 2660100
    },
    {
      "epoch": 4.353344723525985,
      "grad_norm": 0.22435040771961212,
      "learning_rate": 1.3018324624579115e-06,
      "loss": 0.0086,
      "step": 2660120
    },
    {
      "epoch": 4.353377453964638,
      "grad_norm": 0.659983217716217,
      "learning_rate": 1.3017665702443942e-06,
      "loss": 0.0156,
      "step": 2660140
    },
    {
      "epoch": 4.353410184403291,
      "grad_norm": 0.12109127640724182,
      "learning_rate": 1.3017006780308772e-06,
      "loss": 0.0064,
      "step": 2660160
    },
    {
      "epoch": 4.353442914841945,
      "grad_norm": 0.21106432378292084,
      "learning_rate": 1.30163478581736e-06,
      "loss": 0.006,
      "step": 2660180
    },
    {
      "epoch": 4.353475645280598,
      "grad_norm": 0.07512245327234268,
      "learning_rate": 1.3015688936038429e-06,
      "loss": 0.0061,
      "step": 2660200
    },
    {
      "epoch": 4.353508375719251,
      "grad_norm": 0.09789950400590897,
      "learning_rate": 1.3015030013903256e-06,
      "loss": 0.0103,
      "step": 2660220
    },
    {
      "epoch": 4.353541106157905,
      "grad_norm": 0.12029583752155304,
      "learning_rate": 1.3014371091768088e-06,
      "loss": 0.0081,
      "step": 2660240
    },
    {
      "epoch": 4.353573836596558,
      "grad_norm": 0.16503706574440002,
      "learning_rate": 1.3013712169632917e-06,
      "loss": 0.0135,
      "step": 2660260
    },
    {
      "epoch": 4.353606567035212,
      "grad_norm": 0.14207187294960022,
      "learning_rate": 1.3013053247497745e-06,
      "loss": 0.0097,
      "step": 2660280
    },
    {
      "epoch": 4.3536392974738645,
      "grad_norm": 0.17065154016017914,
      "learning_rate": 1.3012394325362574e-06,
      "loss": 0.0103,
      "step": 2660300
    },
    {
      "epoch": 4.353672027912518,
      "grad_norm": 0.25641176104545593,
      "learning_rate": 1.3011735403227402e-06,
      "loss": 0.0071,
      "step": 2660320
    },
    {
      "epoch": 4.353704758351172,
      "grad_norm": 0.3956719934940338,
      "learning_rate": 1.3011076481092231e-06,
      "loss": 0.0063,
      "step": 2660340
    },
    {
      "epoch": 4.353737488789824,
      "grad_norm": 0.6197268962860107,
      "learning_rate": 1.3010417558957059e-06,
      "loss": 0.0099,
      "step": 2660360
    },
    {
      "epoch": 4.353770219228478,
      "grad_norm": 0.1660591959953308,
      "learning_rate": 1.3009758636821886e-06,
      "loss": 0.0057,
      "step": 2660380
    },
    {
      "epoch": 4.3538029496671315,
      "grad_norm": 0.1623406708240509,
      "learning_rate": 1.3009099714686716e-06,
      "loss": 0.0147,
      "step": 2660400
    },
    {
      "epoch": 4.353835680105785,
      "grad_norm": 0.22908209264278412,
      "learning_rate": 1.3008440792551547e-06,
      "loss": 0.0081,
      "step": 2660420
    },
    {
      "epoch": 4.353868410544438,
      "grad_norm": 0.11529592424631119,
      "learning_rate": 1.3007781870416375e-06,
      "loss": 0.0069,
      "step": 2660440
    },
    {
      "epoch": 4.3539011409830914,
      "grad_norm": 0.24254155158996582,
      "learning_rate": 1.3007122948281204e-06,
      "loss": 0.0109,
      "step": 2660460
    },
    {
      "epoch": 4.353933871421745,
      "grad_norm": 0.17377015948295593,
      "learning_rate": 1.3006464026146032e-06,
      "loss": 0.0071,
      "step": 2660480
    },
    {
      "epoch": 4.353966601860398,
      "grad_norm": 0.10956689715385437,
      "learning_rate": 1.300580510401086e-06,
      "loss": 0.007,
      "step": 2660500
    },
    {
      "epoch": 4.353999332299051,
      "grad_norm": 0.12650631368160248,
      "learning_rate": 1.3005146181875688e-06,
      "loss": 0.0099,
      "step": 2660520
    },
    {
      "epoch": 4.354032062737705,
      "grad_norm": 0.16983315348625183,
      "learning_rate": 1.3004487259740518e-06,
      "loss": 0.0085,
      "step": 2660540
    },
    {
      "epoch": 4.3540647931763585,
      "grad_norm": 0.08916213363409042,
      "learning_rate": 1.3003828337605345e-06,
      "loss": 0.0064,
      "step": 2660560
    },
    {
      "epoch": 4.354097523615011,
      "grad_norm": 0.13730047643184662,
      "learning_rate": 1.3003169415470173e-06,
      "loss": 0.0092,
      "step": 2660580
    },
    {
      "epoch": 4.354130254053665,
      "grad_norm": 0.0709889680147171,
      "learning_rate": 1.3002510493335004e-06,
      "loss": 0.009,
      "step": 2660600
    },
    {
      "epoch": 4.354162984492318,
      "grad_norm": 0.11794634908437729,
      "learning_rate": 1.3001851571199834e-06,
      "loss": 0.0111,
      "step": 2660620
    },
    {
      "epoch": 4.354195714930971,
      "grad_norm": 0.32352641224861145,
      "learning_rate": 1.3001192649064661e-06,
      "loss": 0.0089,
      "step": 2660640
    },
    {
      "epoch": 4.354228445369625,
      "grad_norm": 0.09767930954694748,
      "learning_rate": 1.300053372692949e-06,
      "loss": 0.0067,
      "step": 2660660
    },
    {
      "epoch": 4.354261175808278,
      "grad_norm": 0.251920223236084,
      "learning_rate": 1.2999874804794318e-06,
      "loss": 0.0097,
      "step": 2660680
    },
    {
      "epoch": 4.354293906246932,
      "grad_norm": 0.2133483588695526,
      "learning_rate": 1.2999215882659148e-06,
      "loss": 0.01,
      "step": 2660700
    },
    {
      "epoch": 4.354326636685585,
      "grad_norm": 0.21541514992713928,
      "learning_rate": 1.2998556960523975e-06,
      "loss": 0.0113,
      "step": 2660720
    },
    {
      "epoch": 4.354359367124238,
      "grad_norm": 0.7319387793540955,
      "learning_rate": 1.2997898038388805e-06,
      "loss": 0.0103,
      "step": 2660740
    },
    {
      "epoch": 4.354392097562892,
      "grad_norm": 0.3859289586544037,
      "learning_rate": 1.2997239116253632e-06,
      "loss": 0.0111,
      "step": 2660760
    },
    {
      "epoch": 4.3544248280015445,
      "grad_norm": 0.0516272597014904,
      "learning_rate": 1.2996580194118464e-06,
      "loss": 0.0066,
      "step": 2660780
    },
    {
      "epoch": 4.354457558440198,
      "grad_norm": 0.21913863718509674,
      "learning_rate": 1.299592127198329e-06,
      "loss": 0.0077,
      "step": 2660800
    },
    {
      "epoch": 4.354490288878852,
      "grad_norm": 0.29482024908065796,
      "learning_rate": 1.299526234984812e-06,
      "loss": 0.0095,
      "step": 2660820
    },
    {
      "epoch": 4.354523019317505,
      "grad_norm": 0.3457231819629669,
      "learning_rate": 1.2994603427712948e-06,
      "loss": 0.0096,
      "step": 2660840
    },
    {
      "epoch": 4.354555749756158,
      "grad_norm": 0.46198979020118713,
      "learning_rate": 1.2993944505577777e-06,
      "loss": 0.0073,
      "step": 2660860
    },
    {
      "epoch": 4.354588480194812,
      "grad_norm": 0.14851754903793335,
      "learning_rate": 1.2993285583442605e-06,
      "loss": 0.0103,
      "step": 2660880
    },
    {
      "epoch": 4.354621210633465,
      "grad_norm": 0.07557210326194763,
      "learning_rate": 1.2992626661307434e-06,
      "loss": 0.0067,
      "step": 2660900
    },
    {
      "epoch": 4.354653941072118,
      "grad_norm": 0.0835685133934021,
      "learning_rate": 1.2991967739172262e-06,
      "loss": 0.008,
      "step": 2660920
    },
    {
      "epoch": 4.3546866715107715,
      "grad_norm": 0.1283881664276123,
      "learning_rate": 1.2991308817037091e-06,
      "loss": 0.0072,
      "step": 2660940
    },
    {
      "epoch": 4.354719401949425,
      "grad_norm": 0.183692067861557,
      "learning_rate": 1.299064989490192e-06,
      "loss": 0.0094,
      "step": 2660960
    },
    {
      "epoch": 4.354752132388079,
      "grad_norm": 0.16405649483203888,
      "learning_rate": 1.298999097276675e-06,
      "loss": 0.0104,
      "step": 2660980
    },
    {
      "epoch": 4.354784862826731,
      "grad_norm": 0.14877600967884064,
      "learning_rate": 1.2989332050631578e-06,
      "loss": 0.0065,
      "step": 2661000
    },
    {
      "epoch": 4.354817593265385,
      "grad_norm": 0.08803140372037888,
      "learning_rate": 1.2988673128496407e-06,
      "loss": 0.0062,
      "step": 2661020
    },
    {
      "epoch": 4.354850323704039,
      "grad_norm": 0.18498872220516205,
      "learning_rate": 1.2988014206361235e-06,
      "loss": 0.006,
      "step": 2661040
    },
    {
      "epoch": 4.354883054142691,
      "grad_norm": 0.17858673632144928,
      "learning_rate": 1.2987355284226064e-06,
      "loss": 0.0074,
      "step": 2661060
    },
    {
      "epoch": 4.354915784581345,
      "grad_norm": 0.7142941951751709,
      "learning_rate": 1.2986696362090892e-06,
      "loss": 0.0119,
      "step": 2661080
    },
    {
      "epoch": 4.3549485150199985,
      "grad_norm": 0.3060733377933502,
      "learning_rate": 1.2986037439955721e-06,
      "loss": 0.01,
      "step": 2661100
    },
    {
      "epoch": 4.354981245458651,
      "grad_norm": 0.11071962118148804,
      "learning_rate": 1.298537851782055e-06,
      "loss": 0.0096,
      "step": 2661120
    },
    {
      "epoch": 4.355013975897305,
      "grad_norm": 0.13307307660579681,
      "learning_rate": 1.298471959568538e-06,
      "loss": 0.0093,
      "step": 2661140
    },
    {
      "epoch": 4.355046706335958,
      "grad_norm": 0.18119926750659943,
      "learning_rate": 1.2984060673550207e-06,
      "loss": 0.0077,
      "step": 2661160
    },
    {
      "epoch": 4.355079436774612,
      "grad_norm": 0.1358289271593094,
      "learning_rate": 1.2983401751415037e-06,
      "loss": 0.0073,
      "step": 2661180
    },
    {
      "epoch": 4.355112167213265,
      "grad_norm": 0.1205252930521965,
      "learning_rate": 1.2982742829279864e-06,
      "loss": 0.0106,
      "step": 2661200
    },
    {
      "epoch": 4.355144897651918,
      "grad_norm": 0.3670389950275421,
      "learning_rate": 1.2982083907144694e-06,
      "loss": 0.0101,
      "step": 2661220
    },
    {
      "epoch": 4.355177628090572,
      "grad_norm": 0.22664842009544373,
      "learning_rate": 1.2981424985009521e-06,
      "loss": 0.0071,
      "step": 2661240
    },
    {
      "epoch": 4.3552103585292254,
      "grad_norm": 0.10093836486339569,
      "learning_rate": 1.298076606287435e-06,
      "loss": 0.0113,
      "step": 2661260
    },
    {
      "epoch": 4.355243088967878,
      "grad_norm": 0.07026000320911407,
      "learning_rate": 1.2980107140739178e-06,
      "loss": 0.0117,
      "step": 2661280
    },
    {
      "epoch": 4.355275819406532,
      "grad_norm": 0.04669805243611336,
      "learning_rate": 1.297944821860401e-06,
      "loss": 0.009,
      "step": 2661300
    },
    {
      "epoch": 4.355308549845185,
      "grad_norm": 0.1805957704782486,
      "learning_rate": 1.297878929646884e-06,
      "loss": 0.01,
      "step": 2661320
    },
    {
      "epoch": 4.355341280283838,
      "grad_norm": 0.23504726588726044,
      "learning_rate": 1.2978130374333667e-06,
      "loss": 0.0049,
      "step": 2661340
    },
    {
      "epoch": 4.355374010722492,
      "grad_norm": 0.36439913511276245,
      "learning_rate": 1.2977471452198494e-06,
      "loss": 0.0078,
      "step": 2661360
    },
    {
      "epoch": 4.355406741161145,
      "grad_norm": 0.23311468958854675,
      "learning_rate": 1.2976812530063324e-06,
      "loss": 0.0078,
      "step": 2661380
    },
    {
      "epoch": 4.355439471599798,
      "grad_norm": 0.9372432827949524,
      "learning_rate": 1.2976153607928151e-06,
      "loss": 0.0068,
      "step": 2661400
    },
    {
      "epoch": 4.3554722020384515,
      "grad_norm": 0.155970498919487,
      "learning_rate": 1.297549468579298e-06,
      "loss": 0.0084,
      "step": 2661420
    },
    {
      "epoch": 4.355504932477105,
      "grad_norm": 0.9406988620758057,
      "learning_rate": 1.2974835763657808e-06,
      "loss": 0.0095,
      "step": 2661440
    },
    {
      "epoch": 4.355537662915759,
      "grad_norm": 0.3031079173088074,
      "learning_rate": 1.2974176841522638e-06,
      "loss": 0.0097,
      "step": 2661460
    },
    {
      "epoch": 4.355570393354411,
      "grad_norm": 0.07206898927688599,
      "learning_rate": 1.297351791938747e-06,
      "loss": 0.0037,
      "step": 2661480
    },
    {
      "epoch": 4.355603123793065,
      "grad_norm": 0.17958933115005493,
      "learning_rate": 1.2972858997252297e-06,
      "loss": 0.0063,
      "step": 2661500
    },
    {
      "epoch": 4.355635854231719,
      "grad_norm": 0.23220883309841156,
      "learning_rate": 1.2972200075117126e-06,
      "loss": 0.0082,
      "step": 2661520
    },
    {
      "epoch": 4.355668584670372,
      "grad_norm": 0.17058706283569336,
      "learning_rate": 1.2971541152981953e-06,
      "loss": 0.0085,
      "step": 2661540
    },
    {
      "epoch": 4.355701315109025,
      "grad_norm": 0.2151726335287094,
      "learning_rate": 1.297088223084678e-06,
      "loss": 0.0114,
      "step": 2661560
    },
    {
      "epoch": 4.3557340455476785,
      "grad_norm": 0.07066462188959122,
      "learning_rate": 1.297022330871161e-06,
      "loss": 0.0054,
      "step": 2661580
    },
    {
      "epoch": 4.355766775986332,
      "grad_norm": 0.17850728332996368,
      "learning_rate": 1.2969564386576438e-06,
      "loss": 0.0077,
      "step": 2661600
    },
    {
      "epoch": 4.355799506424985,
      "grad_norm": 0.11879372596740723,
      "learning_rate": 1.2968905464441267e-06,
      "loss": 0.0049,
      "step": 2661620
    },
    {
      "epoch": 4.355832236863638,
      "grad_norm": 0.3149089813232422,
      "learning_rate": 1.2968246542306095e-06,
      "loss": 0.0077,
      "step": 2661640
    },
    {
      "epoch": 4.355864967302292,
      "grad_norm": 0.3098202347755432,
      "learning_rate": 1.2967587620170926e-06,
      "loss": 0.0114,
      "step": 2661660
    },
    {
      "epoch": 4.355897697740945,
      "grad_norm": 0.2726348638534546,
      "learning_rate": 1.2966928698035756e-06,
      "loss": 0.0074,
      "step": 2661680
    },
    {
      "epoch": 4.355930428179598,
      "grad_norm": 0.26264989376068115,
      "learning_rate": 1.2966269775900583e-06,
      "loss": 0.0106,
      "step": 2661700
    },
    {
      "epoch": 4.355963158618252,
      "grad_norm": 0.21033920347690582,
      "learning_rate": 1.2965610853765413e-06,
      "loss": 0.0089,
      "step": 2661720
    },
    {
      "epoch": 4.3559958890569055,
      "grad_norm": 0.08096085488796234,
      "learning_rate": 1.296495193163024e-06,
      "loss": 0.0092,
      "step": 2661740
    },
    {
      "epoch": 4.356028619495558,
      "grad_norm": 0.3275934159755707,
      "learning_rate": 1.296429300949507e-06,
      "loss": 0.0087,
      "step": 2661760
    },
    {
      "epoch": 4.356061349934212,
      "grad_norm": 0.352460116147995,
      "learning_rate": 1.2963634087359897e-06,
      "loss": 0.0088,
      "step": 2661780
    },
    {
      "epoch": 4.356094080372865,
      "grad_norm": 0.10053136199712753,
      "learning_rate": 1.2962975165224724e-06,
      "loss": 0.0075,
      "step": 2661800
    },
    {
      "epoch": 4.356126810811518,
      "grad_norm": 0.3370497226715088,
      "learning_rate": 1.2962316243089554e-06,
      "loss": 0.0088,
      "step": 2661820
    },
    {
      "epoch": 4.356159541250172,
      "grad_norm": 0.5301944017410278,
      "learning_rate": 1.2961657320954386e-06,
      "loss": 0.0094,
      "step": 2661840
    },
    {
      "epoch": 4.356192271688825,
      "grad_norm": 0.22291767597198486,
      "learning_rate": 1.2960998398819213e-06,
      "loss": 0.0079,
      "step": 2661860
    },
    {
      "epoch": 4.356225002127479,
      "grad_norm": 0.28187432885169983,
      "learning_rate": 1.2960339476684042e-06,
      "loss": 0.0086,
      "step": 2661880
    },
    {
      "epoch": 4.356257732566132,
      "grad_norm": 0.04558568075299263,
      "learning_rate": 1.295968055454887e-06,
      "loss": 0.007,
      "step": 2661900
    },
    {
      "epoch": 4.356290463004785,
      "grad_norm": 0.25270983576774597,
      "learning_rate": 1.29590216324137e-06,
      "loss": 0.0077,
      "step": 2661920
    },
    {
      "epoch": 4.356323193443439,
      "grad_norm": 0.21185614168643951,
      "learning_rate": 1.2958362710278527e-06,
      "loss": 0.0105,
      "step": 2661940
    },
    {
      "epoch": 4.3563559238820915,
      "grad_norm": 0.27995702624320984,
      "learning_rate": 1.2957703788143356e-06,
      "loss": 0.011,
      "step": 2661960
    },
    {
      "epoch": 4.356388654320745,
      "grad_norm": 0.2787754535675049,
      "learning_rate": 1.2957044866008184e-06,
      "loss": 0.0064,
      "step": 2661980
    },
    {
      "epoch": 4.356421384759399,
      "grad_norm": 0.18335610628128052,
      "learning_rate": 1.2956385943873015e-06,
      "loss": 0.0114,
      "step": 2662000
    },
    {
      "epoch": 4.356454115198052,
      "grad_norm": 0.28962811827659607,
      "learning_rate": 1.2955727021737843e-06,
      "loss": 0.0093,
      "step": 2662020
    },
    {
      "epoch": 4.356486845636705,
      "grad_norm": 0.2668962776660919,
      "learning_rate": 1.2955068099602672e-06,
      "loss": 0.0081,
      "step": 2662040
    },
    {
      "epoch": 4.356519576075359,
      "grad_norm": 0.3034093976020813,
      "learning_rate": 1.29544091774675e-06,
      "loss": 0.0086,
      "step": 2662060
    },
    {
      "epoch": 4.356552306514012,
      "grad_norm": 0.2596152424812317,
      "learning_rate": 1.295375025533233e-06,
      "loss": 0.0078,
      "step": 2662080
    },
    {
      "epoch": 4.356585036952665,
      "grad_norm": 0.2797272503376007,
      "learning_rate": 1.2953091333197157e-06,
      "loss": 0.0086,
      "step": 2662100
    },
    {
      "epoch": 4.3566177673913185,
      "grad_norm": 0.08210176229476929,
      "learning_rate": 1.2952432411061986e-06,
      "loss": 0.0084,
      "step": 2662120
    },
    {
      "epoch": 4.356650497829972,
      "grad_norm": 0.46794065833091736,
      "learning_rate": 1.2951773488926813e-06,
      "loss": 0.0119,
      "step": 2662140
    },
    {
      "epoch": 4.356683228268626,
      "grad_norm": 0.09983203560113907,
      "learning_rate": 1.2951114566791643e-06,
      "loss": 0.0093,
      "step": 2662160
    },
    {
      "epoch": 4.356715958707278,
      "grad_norm": 0.3308180868625641,
      "learning_rate": 1.2950455644656473e-06,
      "loss": 0.006,
      "step": 2662180
    },
    {
      "epoch": 4.356748689145932,
      "grad_norm": 0.37741443514823914,
      "learning_rate": 1.2949796722521302e-06,
      "loss": 0.0113,
      "step": 2662200
    },
    {
      "epoch": 4.3567814195845855,
      "grad_norm": 0.20513544976711273,
      "learning_rate": 1.294913780038613e-06,
      "loss": 0.0086,
      "step": 2662220
    },
    {
      "epoch": 4.356814150023238,
      "grad_norm": 0.49738630652427673,
      "learning_rate": 1.294847887825096e-06,
      "loss": 0.0089,
      "step": 2662240
    },
    {
      "epoch": 4.356846880461892,
      "grad_norm": 0.3416459858417511,
      "learning_rate": 1.2947819956115786e-06,
      "loss": 0.007,
      "step": 2662260
    },
    {
      "epoch": 4.356879610900545,
      "grad_norm": 0.08090194314718246,
      "learning_rate": 1.2947161033980616e-06,
      "loss": 0.0076,
      "step": 2662280
    },
    {
      "epoch": 4.356912341339199,
      "grad_norm": 0.12718352675437927,
      "learning_rate": 1.2946502111845443e-06,
      "loss": 0.0079,
      "step": 2662300
    },
    {
      "epoch": 4.356945071777852,
      "grad_norm": 0.18145538866519928,
      "learning_rate": 1.2945843189710273e-06,
      "loss": 0.0097,
      "step": 2662320
    },
    {
      "epoch": 4.356977802216505,
      "grad_norm": Infinity,
      "learning_rate": 1.29451842675751e-06,
      "loss": 0.0103,
      "step": 2662340
    },
    {
      "epoch": 4.357010532655159,
      "grad_norm": 0.30106669664382935,
      "learning_rate": 1.2944525345439932e-06,
      "loss": 0.0103,
      "step": 2662360
    },
    {
      "epoch": 4.357043263093812,
      "grad_norm": 0.1840599626302719,
      "learning_rate": 1.294386642330476e-06,
      "loss": 0.0092,
      "step": 2662380
    },
    {
      "epoch": 4.357075993532465,
      "grad_norm": 0.21525819599628448,
      "learning_rate": 1.2943207501169589e-06,
      "loss": 0.0076,
      "step": 2662400
    },
    {
      "epoch": 4.357108723971119,
      "grad_norm": 0.1362966150045395,
      "learning_rate": 1.2942548579034416e-06,
      "loss": 0.0089,
      "step": 2662420
    },
    {
      "epoch": 4.357141454409772,
      "grad_norm": 0.3008873760700226,
      "learning_rate": 1.2941889656899246e-06,
      "loss": 0.0064,
      "step": 2662440
    },
    {
      "epoch": 4.357174184848425,
      "grad_norm": 0.532775342464447,
      "learning_rate": 1.2941230734764073e-06,
      "loss": 0.0082,
      "step": 2662460
    },
    {
      "epoch": 4.357206915287079,
      "grad_norm": 0.12234214693307877,
      "learning_rate": 1.2940571812628903e-06,
      "loss": 0.007,
      "step": 2662480
    },
    {
      "epoch": 4.357239645725732,
      "grad_norm": 0.11847022920846939,
      "learning_rate": 1.293991289049373e-06,
      "loss": 0.01,
      "step": 2662500
    },
    {
      "epoch": 4.357272376164385,
      "grad_norm": 0.27989912033081055,
      "learning_rate": 1.293925396835856e-06,
      "loss": 0.012,
      "step": 2662520
    },
    {
      "epoch": 4.357305106603039,
      "grad_norm": 0.08644954115152359,
      "learning_rate": 1.293859504622339e-06,
      "loss": 0.008,
      "step": 2662540
    },
    {
      "epoch": 4.357337837041692,
      "grad_norm": 0.3676488697528839,
      "learning_rate": 1.2937936124088218e-06,
      "loss": 0.011,
      "step": 2662560
    },
    {
      "epoch": 4.357370567480345,
      "grad_norm": 0.1013743132352829,
      "learning_rate": 1.2937277201953046e-06,
      "loss": 0.0078,
      "step": 2662580
    },
    {
      "epoch": 4.3574032979189985,
      "grad_norm": 0.3469694256782532,
      "learning_rate": 1.2936618279817875e-06,
      "loss": 0.0089,
      "step": 2662600
    },
    {
      "epoch": 4.357436028357652,
      "grad_norm": 0.4079306721687317,
      "learning_rate": 1.2935959357682703e-06,
      "loss": 0.0101,
      "step": 2662620
    },
    {
      "epoch": 4.357468758796306,
      "grad_norm": 0.19265183806419373,
      "learning_rate": 1.2935300435547532e-06,
      "loss": 0.0103,
      "step": 2662640
    },
    {
      "epoch": 4.357501489234958,
      "grad_norm": 0.10771748423576355,
      "learning_rate": 1.293464151341236e-06,
      "loss": 0.0073,
      "step": 2662660
    },
    {
      "epoch": 4.357534219673612,
      "grad_norm": 0.4462454319000244,
      "learning_rate": 1.293398259127719e-06,
      "loss": 0.0082,
      "step": 2662680
    },
    {
      "epoch": 4.357566950112266,
      "grad_norm": 0.10989390313625336,
      "learning_rate": 1.2933323669142017e-06,
      "loss": 0.0113,
      "step": 2662700
    },
    {
      "epoch": 4.357599680550919,
      "grad_norm": 0.06516308337450027,
      "learning_rate": 1.2932664747006848e-06,
      "loss": 0.0082,
      "step": 2662720
    },
    {
      "epoch": 4.357632410989572,
      "grad_norm": 0.442231148481369,
      "learning_rate": 1.2932005824871678e-06,
      "loss": 0.0088,
      "step": 2662740
    },
    {
      "epoch": 4.3576651414282255,
      "grad_norm": 0.47890350222587585,
      "learning_rate": 1.2931346902736505e-06,
      "loss": 0.0163,
      "step": 2662760
    },
    {
      "epoch": 4.357697871866879,
      "grad_norm": 0.1900368332862854,
      "learning_rate": 1.2930687980601333e-06,
      "loss": 0.0048,
      "step": 2662780
    },
    {
      "epoch": 4.357730602305532,
      "grad_norm": 0.11964180320501328,
      "learning_rate": 1.2930029058466162e-06,
      "loss": 0.0068,
      "step": 2662800
    },
    {
      "epoch": 4.357763332744185,
      "grad_norm": 0.15784555673599243,
      "learning_rate": 1.292937013633099e-06,
      "loss": 0.0111,
      "step": 2662820
    },
    {
      "epoch": 4.357796063182839,
      "grad_norm": 0.39575234055519104,
      "learning_rate": 1.292871121419582e-06,
      "loss": 0.0086,
      "step": 2662840
    },
    {
      "epoch": 4.357828793621492,
      "grad_norm": 0.10165847837924957,
      "learning_rate": 1.2928052292060646e-06,
      "loss": 0.0079,
      "step": 2662860
    },
    {
      "epoch": 4.357861524060145,
      "grad_norm": 0.2722109258174896,
      "learning_rate": 1.2927393369925478e-06,
      "loss": 0.0067,
      "step": 2662880
    },
    {
      "epoch": 4.357894254498799,
      "grad_norm": 0.2702338397502899,
      "learning_rate": 1.2926734447790308e-06,
      "loss": 0.0071,
      "step": 2662900
    },
    {
      "epoch": 4.3579269849374525,
      "grad_norm": 0.325641006231308,
      "learning_rate": 1.2926075525655135e-06,
      "loss": 0.0119,
      "step": 2662920
    },
    {
      "epoch": 4.357959715376105,
      "grad_norm": 0.4138306975364685,
      "learning_rate": 1.2925416603519964e-06,
      "loss": 0.0105,
      "step": 2662940
    },
    {
      "epoch": 4.357992445814759,
      "grad_norm": 0.1085595041513443,
      "learning_rate": 1.2924757681384792e-06,
      "loss": 0.0136,
      "step": 2662960
    },
    {
      "epoch": 4.358025176253412,
      "grad_norm": 0.3458186686038971,
      "learning_rate": 1.2924098759249621e-06,
      "loss": 0.0093,
      "step": 2662980
    },
    {
      "epoch": 4.358057906692066,
      "grad_norm": 0.19617468118667603,
      "learning_rate": 1.2923439837114449e-06,
      "loss": 0.012,
      "step": 2663000
    },
    {
      "epoch": 4.358090637130719,
      "grad_norm": 0.5157734155654907,
      "learning_rate": 1.2922780914979276e-06,
      "loss": 0.0084,
      "step": 2663020
    },
    {
      "epoch": 4.358123367569372,
      "grad_norm": 0.4460710287094116,
      "learning_rate": 1.2922121992844106e-06,
      "loss": 0.0126,
      "step": 2663040
    },
    {
      "epoch": 4.358156098008026,
      "grad_norm": 0.602007269859314,
      "learning_rate": 1.2921463070708937e-06,
      "loss": 0.0107,
      "step": 2663060
    },
    {
      "epoch": 4.3581888284466785,
      "grad_norm": 0.1651587337255478,
      "learning_rate": 1.2920804148573765e-06,
      "loss": 0.0085,
      "step": 2663080
    },
    {
      "epoch": 4.358221558885332,
      "grad_norm": 0.14560416340827942,
      "learning_rate": 1.2920145226438594e-06,
      "loss": 0.0057,
      "step": 2663100
    },
    {
      "epoch": 4.358254289323986,
      "grad_norm": 0.06432047486305237,
      "learning_rate": 1.2919486304303422e-06,
      "loss": 0.0059,
      "step": 2663120
    },
    {
      "epoch": 4.358287019762638,
      "grad_norm": 0.0887683555483818,
      "learning_rate": 1.2918827382168251e-06,
      "loss": 0.0122,
      "step": 2663140
    },
    {
      "epoch": 4.358319750201292,
      "grad_norm": 0.9578642845153809,
      "learning_rate": 1.2918168460033079e-06,
      "loss": 0.0105,
      "step": 2663160
    },
    {
      "epoch": 4.358352480639946,
      "grad_norm": 0.18147309124469757,
      "learning_rate": 1.2917509537897908e-06,
      "loss": 0.0082,
      "step": 2663180
    },
    {
      "epoch": 4.358385211078599,
      "grad_norm": 0.17340657114982605,
      "learning_rate": 1.2916850615762735e-06,
      "loss": 0.0126,
      "step": 2663200
    },
    {
      "epoch": 4.358417941517252,
      "grad_norm": 0.1670636087656021,
      "learning_rate": 1.2916191693627563e-06,
      "loss": 0.0091,
      "step": 2663220
    },
    {
      "epoch": 4.3584506719559055,
      "grad_norm": 0.17327409982681274,
      "learning_rate": 1.2915532771492394e-06,
      "loss": 0.0071,
      "step": 2663240
    },
    {
      "epoch": 4.358483402394559,
      "grad_norm": 0.14221854507923126,
      "learning_rate": 1.2914873849357224e-06,
      "loss": 0.0099,
      "step": 2663260
    },
    {
      "epoch": 4.358516132833212,
      "grad_norm": 0.1859121173620224,
      "learning_rate": 1.2914214927222051e-06,
      "loss": 0.0079,
      "step": 2663280
    },
    {
      "epoch": 4.358548863271865,
      "grad_norm": 0.2271367460489273,
      "learning_rate": 1.291355600508688e-06,
      "loss": 0.0083,
      "step": 2663300
    },
    {
      "epoch": 4.358581593710519,
      "grad_norm": 0.2319485992193222,
      "learning_rate": 1.2912897082951708e-06,
      "loss": 0.0123,
      "step": 2663320
    },
    {
      "epoch": 4.358614324149173,
      "grad_norm": 0.3995506167411804,
      "learning_rate": 1.2912238160816538e-06,
      "loss": 0.008,
      "step": 2663340
    },
    {
      "epoch": 4.358647054587825,
      "grad_norm": 0.07238762080669403,
      "learning_rate": 1.2911579238681365e-06,
      "loss": 0.008,
      "step": 2663360
    },
    {
      "epoch": 4.358679785026479,
      "grad_norm": 0.0773954838514328,
      "learning_rate": 1.2910920316546195e-06,
      "loss": 0.0072,
      "step": 2663380
    },
    {
      "epoch": 4.3587125154651325,
      "grad_norm": 0.1371738463640213,
      "learning_rate": 1.2910261394411022e-06,
      "loss": 0.014,
      "step": 2663400
    },
    {
      "epoch": 4.358745245903785,
      "grad_norm": 0.2689439058303833,
      "learning_rate": 1.2909602472275854e-06,
      "loss": 0.0084,
      "step": 2663420
    },
    {
      "epoch": 4.358777976342439,
      "grad_norm": 0.17593280971050262,
      "learning_rate": 1.2908943550140681e-06,
      "loss": 0.0113,
      "step": 2663440
    },
    {
      "epoch": 4.358810706781092,
      "grad_norm": 0.29398810863494873,
      "learning_rate": 1.290828462800551e-06,
      "loss": 0.0072,
      "step": 2663460
    },
    {
      "epoch": 4.358843437219746,
      "grad_norm": 0.07224420458078384,
      "learning_rate": 1.2907625705870338e-06,
      "loss": 0.0082,
      "step": 2663480
    },
    {
      "epoch": 4.358876167658399,
      "grad_norm": 0.2787233889102936,
      "learning_rate": 1.2906966783735168e-06,
      "loss": 0.0082,
      "step": 2663500
    },
    {
      "epoch": 4.358908898097052,
      "grad_norm": 0.6182164549827576,
      "learning_rate": 1.2906307861599995e-06,
      "loss": 0.0082,
      "step": 2663520
    },
    {
      "epoch": 4.358941628535706,
      "grad_norm": 0.14350301027297974,
      "learning_rate": 1.2905648939464824e-06,
      "loss": 0.0077,
      "step": 2663540
    },
    {
      "epoch": 4.358974358974359,
      "grad_norm": 0.16599582135677338,
      "learning_rate": 1.2904990017329652e-06,
      "loss": 0.0092,
      "step": 2663560
    },
    {
      "epoch": 4.359007089413012,
      "grad_norm": 0.28495198488235474,
      "learning_rate": 1.2904331095194481e-06,
      "loss": 0.0074,
      "step": 2663580
    },
    {
      "epoch": 4.359039819851666,
      "grad_norm": 0.16309119760990143,
      "learning_rate": 1.290367217305931e-06,
      "loss": 0.0112,
      "step": 2663600
    },
    {
      "epoch": 4.359072550290319,
      "grad_norm": 0.13799497485160828,
      "learning_rate": 1.290301325092414e-06,
      "loss": 0.0086,
      "step": 2663620
    },
    {
      "epoch": 4.359105280728972,
      "grad_norm": 0.07228326797485352,
      "learning_rate": 1.2902354328788968e-06,
      "loss": 0.011,
      "step": 2663640
    },
    {
      "epoch": 4.359138011167626,
      "grad_norm": 0.20259548723697662,
      "learning_rate": 1.2901695406653797e-06,
      "loss": 0.0078,
      "step": 2663660
    },
    {
      "epoch": 4.359170741606279,
      "grad_norm": 0.24666938185691833,
      "learning_rate": 1.2901036484518625e-06,
      "loss": 0.0099,
      "step": 2663680
    },
    {
      "epoch": 4.359203472044932,
      "grad_norm": 0.1606719195842743,
      "learning_rate": 1.2900377562383454e-06,
      "loss": 0.0097,
      "step": 2663700
    },
    {
      "epoch": 4.359236202483586,
      "grad_norm": 0.2496635913848877,
      "learning_rate": 1.2899718640248282e-06,
      "loss": 0.005,
      "step": 2663720
    },
    {
      "epoch": 4.359268932922239,
      "grad_norm": 0.050905801355838776,
      "learning_rate": 1.2899059718113111e-06,
      "loss": 0.0082,
      "step": 2663740
    },
    {
      "epoch": 4.359301663360893,
      "grad_norm": 0.20129583775997162,
      "learning_rate": 1.289840079597794e-06,
      "loss": 0.0101,
      "step": 2663760
    },
    {
      "epoch": 4.3593343937995455,
      "grad_norm": 0.25142940878868103,
      "learning_rate": 1.289774187384277e-06,
      "loss": 0.0069,
      "step": 2663780
    },
    {
      "epoch": 4.359367124238199,
      "grad_norm": 0.4981909990310669,
      "learning_rate": 1.2897082951707598e-06,
      "loss": 0.0099,
      "step": 2663800
    },
    {
      "epoch": 4.359399854676853,
      "grad_norm": 0.5325832366943359,
      "learning_rate": 1.2896424029572427e-06,
      "loss": 0.0105,
      "step": 2663820
    },
    {
      "epoch": 4.359432585115505,
      "grad_norm": 0.43189454078674316,
      "learning_rate": 1.2895765107437255e-06,
      "loss": 0.0105,
      "step": 2663840
    },
    {
      "epoch": 4.359465315554159,
      "grad_norm": 0.12816834449768066,
      "learning_rate": 1.2895106185302084e-06,
      "loss": 0.0128,
      "step": 2663860
    },
    {
      "epoch": 4.3594980459928125,
      "grad_norm": 0.32735973596572876,
      "learning_rate": 1.2894447263166911e-06,
      "loss": 0.009,
      "step": 2663880
    },
    {
      "epoch": 4.359530776431466,
      "grad_norm": 0.1896384060382843,
      "learning_rate": 1.289378834103174e-06,
      "loss": 0.0132,
      "step": 2663900
    },
    {
      "epoch": 4.359563506870119,
      "grad_norm": 0.13924641907215118,
      "learning_rate": 1.2893129418896568e-06,
      "loss": 0.0072,
      "step": 2663920
    },
    {
      "epoch": 4.359596237308772,
      "grad_norm": 0.3049612045288086,
      "learning_rate": 1.28924704967614e-06,
      "loss": 0.0098,
      "step": 2663940
    },
    {
      "epoch": 4.359628967747426,
      "grad_norm": 0.533937931060791,
      "learning_rate": 1.289181157462623e-06,
      "loss": 0.0144,
      "step": 2663960
    },
    {
      "epoch": 4.359661698186079,
      "grad_norm": 0.039975594729185104,
      "learning_rate": 1.2891152652491057e-06,
      "loss": 0.0073,
      "step": 2663980
    },
    {
      "epoch": 4.359694428624732,
      "grad_norm": 0.2734209895133972,
      "learning_rate": 1.2890493730355884e-06,
      "loss": 0.0094,
      "step": 2664000
    },
    {
      "epoch": 4.359727159063386,
      "grad_norm": 0.19399027526378632,
      "learning_rate": 1.2889834808220714e-06,
      "loss": 0.0145,
      "step": 2664020
    },
    {
      "epoch": 4.3597598895020395,
      "grad_norm": 0.3452916145324707,
      "learning_rate": 1.2889175886085541e-06,
      "loss": 0.0092,
      "step": 2664040
    },
    {
      "epoch": 4.359792619940692,
      "grad_norm": 0.2196362167596817,
      "learning_rate": 1.288851696395037e-06,
      "loss": 0.0079,
      "step": 2664060
    },
    {
      "epoch": 4.359825350379346,
      "grad_norm": 0.1414870321750641,
      "learning_rate": 1.2887858041815198e-06,
      "loss": 0.0077,
      "step": 2664080
    },
    {
      "epoch": 4.359858080817999,
      "grad_norm": 0.12354959547519684,
      "learning_rate": 1.2887199119680028e-06,
      "loss": 0.0095,
      "step": 2664100
    },
    {
      "epoch": 4.359890811256652,
      "grad_norm": 0.22813840210437775,
      "learning_rate": 1.288654019754486e-06,
      "loss": 0.0121,
      "step": 2664120
    },
    {
      "epoch": 4.359923541695306,
      "grad_norm": 0.06865508109331131,
      "learning_rate": 1.2885881275409687e-06,
      "loss": 0.0062,
      "step": 2664140
    },
    {
      "epoch": 4.359956272133959,
      "grad_norm": 0.16857250034809113,
      "learning_rate": 1.2885222353274516e-06,
      "loss": 0.0104,
      "step": 2664160
    },
    {
      "epoch": 4.359989002572613,
      "grad_norm": 0.6931039094924927,
      "learning_rate": 1.2884563431139344e-06,
      "loss": 0.0127,
      "step": 2664180
    },
    {
      "epoch": 4.360021733011266,
      "grad_norm": 0.26067861914634705,
      "learning_rate": 1.288390450900417e-06,
      "loss": 0.0065,
      "step": 2664200
    },
    {
      "epoch": 4.360054463449919,
      "grad_norm": 0.1450720876455307,
      "learning_rate": 1.2883245586869e-06,
      "loss": 0.0089,
      "step": 2664220
    },
    {
      "epoch": 4.360087193888573,
      "grad_norm": 0.18756118416786194,
      "learning_rate": 1.2882586664733828e-06,
      "loss": 0.0129,
      "step": 2664240
    },
    {
      "epoch": 4.3601199243272255,
      "grad_norm": 0.13404212892055511,
      "learning_rate": 1.2881927742598657e-06,
      "loss": 0.0103,
      "step": 2664260
    },
    {
      "epoch": 4.360152654765879,
      "grad_norm": 0.240838423371315,
      "learning_rate": 1.2881268820463485e-06,
      "loss": 0.0128,
      "step": 2664280
    },
    {
      "epoch": 4.360185385204533,
      "grad_norm": 0.23954829573631287,
      "learning_rate": 1.2880609898328316e-06,
      "loss": 0.0085,
      "step": 2664300
    },
    {
      "epoch": 4.360218115643185,
      "grad_norm": 0.1438618004322052,
      "learning_rate": 1.2879950976193146e-06,
      "loss": 0.007,
      "step": 2664320
    },
    {
      "epoch": 4.360250846081839,
      "grad_norm": 0.20412692427635193,
      "learning_rate": 1.2879292054057973e-06,
      "loss": 0.0088,
      "step": 2664340
    },
    {
      "epoch": 4.360283576520493,
      "grad_norm": 0.13233619928359985,
      "learning_rate": 1.2878633131922803e-06,
      "loss": 0.0103,
      "step": 2664360
    },
    {
      "epoch": 4.360316306959146,
      "grad_norm": 0.11676052212715149,
      "learning_rate": 1.287797420978763e-06,
      "loss": 0.0057,
      "step": 2664380
    },
    {
      "epoch": 4.360349037397799,
      "grad_norm": 0.36098387837409973,
      "learning_rate": 1.287731528765246e-06,
      "loss": 0.0131,
      "step": 2664400
    },
    {
      "epoch": 4.3603817678364525,
      "grad_norm": 0.2662762701511383,
      "learning_rate": 1.2876656365517287e-06,
      "loss": 0.011,
      "step": 2664420
    },
    {
      "epoch": 4.360414498275106,
      "grad_norm": 0.5177237391471863,
      "learning_rate": 1.2875997443382115e-06,
      "loss": 0.0078,
      "step": 2664440
    },
    {
      "epoch": 4.36044722871376,
      "grad_norm": 0.4154108464717865,
      "learning_rate": 1.2875338521246944e-06,
      "loss": 0.0119,
      "step": 2664460
    },
    {
      "epoch": 4.360479959152412,
      "grad_norm": 0.4447835385799408,
      "learning_rate": 1.2874679599111776e-06,
      "loss": 0.0113,
      "step": 2664480
    },
    {
      "epoch": 4.360512689591066,
      "grad_norm": 0.2588707208633423,
      "learning_rate": 1.2874020676976603e-06,
      "loss": 0.0092,
      "step": 2664500
    },
    {
      "epoch": 4.36054542002972,
      "grad_norm": 0.30782610177993774,
      "learning_rate": 1.2873361754841433e-06,
      "loss": 0.0092,
      "step": 2664520
    },
    {
      "epoch": 4.360578150468372,
      "grad_norm": 0.2810705304145813,
      "learning_rate": 1.287270283270626e-06,
      "loss": 0.0071,
      "step": 2664540
    },
    {
      "epoch": 4.360610880907026,
      "grad_norm": 0.16648028790950775,
      "learning_rate": 1.287204391057109e-06,
      "loss": 0.0077,
      "step": 2664560
    },
    {
      "epoch": 4.3606436113456795,
      "grad_norm": 0.30938026309013367,
      "learning_rate": 1.2871384988435917e-06,
      "loss": 0.0106,
      "step": 2664580
    },
    {
      "epoch": 4.360676341784332,
      "grad_norm": 0.203080415725708,
      "learning_rate": 1.2870726066300746e-06,
      "loss": 0.0075,
      "step": 2664600
    },
    {
      "epoch": 4.360709072222986,
      "grad_norm": 0.3124188780784607,
      "learning_rate": 1.2870067144165574e-06,
      "loss": 0.0152,
      "step": 2664620
    },
    {
      "epoch": 4.360741802661639,
      "grad_norm": 0.06311357021331787,
      "learning_rate": 1.2869408222030405e-06,
      "loss": 0.0083,
      "step": 2664640
    },
    {
      "epoch": 4.360774533100293,
      "grad_norm": 0.37682121992111206,
      "learning_rate": 1.2868749299895233e-06,
      "loss": 0.0145,
      "step": 2664660
    },
    {
      "epoch": 4.360807263538946,
      "grad_norm": 0.2398374080657959,
      "learning_rate": 1.2868090377760062e-06,
      "loss": 0.0087,
      "step": 2664680
    },
    {
      "epoch": 4.360839993977599,
      "grad_norm": 0.2689495384693146,
      "learning_rate": 1.286743145562489e-06,
      "loss": 0.01,
      "step": 2664700
    },
    {
      "epoch": 4.360872724416253,
      "grad_norm": 0.2888389825820923,
      "learning_rate": 1.286677253348972e-06,
      "loss": 0.0066,
      "step": 2664720
    },
    {
      "epoch": 4.3609054548549055,
      "grad_norm": 0.34673985838890076,
      "learning_rate": 1.2866113611354547e-06,
      "loss": 0.0079,
      "step": 2664740
    },
    {
      "epoch": 4.360938185293559,
      "grad_norm": 0.1920464187860489,
      "learning_rate": 1.2865454689219376e-06,
      "loss": 0.0071,
      "step": 2664760
    },
    {
      "epoch": 4.360970915732213,
      "grad_norm": 0.17262159287929535,
      "learning_rate": 1.2864795767084204e-06,
      "loss": 0.0115,
      "step": 2664780
    },
    {
      "epoch": 4.361003646170866,
      "grad_norm": 0.4016750752925873,
      "learning_rate": 1.2864136844949033e-06,
      "loss": 0.0098,
      "step": 2664800
    },
    {
      "epoch": 4.361036376609519,
      "grad_norm": 0.41124269366264343,
      "learning_rate": 1.2863477922813863e-06,
      "loss": 0.0091,
      "step": 2664820
    },
    {
      "epoch": 4.361069107048173,
      "grad_norm": 0.44397619366645813,
      "learning_rate": 1.2862819000678692e-06,
      "loss": 0.0107,
      "step": 2664840
    },
    {
      "epoch": 4.361101837486826,
      "grad_norm": 0.5142433047294617,
      "learning_rate": 1.286216007854352e-06,
      "loss": 0.0075,
      "step": 2664860
    },
    {
      "epoch": 4.361134567925479,
      "grad_norm": 0.3243106007575989,
      "learning_rate": 1.286150115640835e-06,
      "loss": 0.0088,
      "step": 2664880
    },
    {
      "epoch": 4.3611672983641325,
      "grad_norm": 0.34535840153694153,
      "learning_rate": 1.2860842234273176e-06,
      "loss": 0.0124,
      "step": 2664900
    },
    {
      "epoch": 4.361200028802786,
      "grad_norm": 0.22154340147972107,
      "learning_rate": 1.2860183312138006e-06,
      "loss": 0.0115,
      "step": 2664920
    },
    {
      "epoch": 4.36123275924144,
      "grad_norm": 0.2615903913974762,
      "learning_rate": 1.2859524390002833e-06,
      "loss": 0.0092,
      "step": 2664940
    },
    {
      "epoch": 4.361265489680092,
      "grad_norm": 0.19944344460964203,
      "learning_rate": 1.2858865467867663e-06,
      "loss": 0.0077,
      "step": 2664960
    },
    {
      "epoch": 4.361298220118746,
      "grad_norm": 0.19008341431617737,
      "learning_rate": 1.285820654573249e-06,
      "loss": 0.0118,
      "step": 2664980
    },
    {
      "epoch": 4.3613309505574,
      "grad_norm": 0.3722706735134125,
      "learning_rate": 1.2857547623597322e-06,
      "loss": 0.0092,
      "step": 2665000
    },
    {
      "epoch": 4.361363680996052,
      "grad_norm": 0.09229009598493576,
      "learning_rate": 1.285688870146215e-06,
      "loss": 0.0057,
      "step": 2665020
    },
    {
      "epoch": 4.361396411434706,
      "grad_norm": 0.22474786639213562,
      "learning_rate": 1.2856229779326979e-06,
      "loss": 0.0063,
      "step": 2665040
    },
    {
      "epoch": 4.3614291418733595,
      "grad_norm": 0.11949018388986588,
      "learning_rate": 1.2855570857191806e-06,
      "loss": 0.0069,
      "step": 2665060
    },
    {
      "epoch": 4.361461872312013,
      "grad_norm": 0.40723806619644165,
      "learning_rate": 1.2854911935056636e-06,
      "loss": 0.0098,
      "step": 2665080
    },
    {
      "epoch": 4.361494602750666,
      "grad_norm": 0.17795082926750183,
      "learning_rate": 1.2854253012921463e-06,
      "loss": 0.0111,
      "step": 2665100
    },
    {
      "epoch": 4.361527333189319,
      "grad_norm": 0.09054192900657654,
      "learning_rate": 1.2853594090786293e-06,
      "loss": 0.0097,
      "step": 2665120
    },
    {
      "epoch": 4.361560063627973,
      "grad_norm": 0.5283997058868408,
      "learning_rate": 1.285293516865112e-06,
      "loss": 0.0086,
      "step": 2665140
    },
    {
      "epoch": 4.361592794066626,
      "grad_norm": 0.6631073951721191,
      "learning_rate": 1.285227624651595e-06,
      "loss": 0.0093,
      "step": 2665160
    },
    {
      "epoch": 4.361625524505279,
      "grad_norm": 0.06918893754482269,
      "learning_rate": 1.2851617324380781e-06,
      "loss": 0.0064,
      "step": 2665180
    },
    {
      "epoch": 4.361658254943933,
      "grad_norm": 0.17956380546092987,
      "learning_rate": 1.2850958402245609e-06,
      "loss": 0.0079,
      "step": 2665200
    },
    {
      "epoch": 4.3616909853825865,
      "grad_norm": 0.6822400689125061,
      "learning_rate": 1.2850299480110436e-06,
      "loss": 0.0085,
      "step": 2665220
    },
    {
      "epoch": 4.361723715821239,
      "grad_norm": 0.5055719017982483,
      "learning_rate": 1.2849640557975266e-06,
      "loss": 0.0109,
      "step": 2665240
    },
    {
      "epoch": 4.361756446259893,
      "grad_norm": 0.20269422233104706,
      "learning_rate": 1.2848981635840093e-06,
      "loss": 0.0062,
      "step": 2665260
    },
    {
      "epoch": 4.361789176698546,
      "grad_norm": 0.27065515518188477,
      "learning_rate": 1.2848322713704922e-06,
      "loss": 0.0098,
      "step": 2665280
    },
    {
      "epoch": 4.361821907137199,
      "grad_norm": 0.5400820374488831,
      "learning_rate": 1.284766379156975e-06,
      "loss": 0.0072,
      "step": 2665300
    },
    {
      "epoch": 4.361854637575853,
      "grad_norm": 0.18476635217666626,
      "learning_rate": 1.284700486943458e-06,
      "loss": 0.0102,
      "step": 2665320
    },
    {
      "epoch": 4.361887368014506,
      "grad_norm": 0.21424807608127594,
      "learning_rate": 1.2846345947299407e-06,
      "loss": 0.0092,
      "step": 2665340
    },
    {
      "epoch": 4.36192009845316,
      "grad_norm": 0.16884273290634155,
      "learning_rate": 1.2845687025164238e-06,
      "loss": 0.0095,
      "step": 2665360
    },
    {
      "epoch": 4.361952828891813,
      "grad_norm": 0.18992669880390167,
      "learning_rate": 1.2845028103029068e-06,
      "loss": 0.0068,
      "step": 2665380
    },
    {
      "epoch": 4.361985559330466,
      "grad_norm": 1.3969906568527222,
      "learning_rate": 1.2844369180893895e-06,
      "loss": 0.0095,
      "step": 2665400
    },
    {
      "epoch": 4.36201828976912,
      "grad_norm": 0.15655241906642914,
      "learning_rate": 1.2843710258758723e-06,
      "loss": 0.0122,
      "step": 2665420
    },
    {
      "epoch": 4.3620510202077725,
      "grad_norm": 0.3087361752986908,
      "learning_rate": 1.2843051336623552e-06,
      "loss": 0.0096,
      "step": 2665440
    },
    {
      "epoch": 4.362083750646426,
      "grad_norm": 0.697098433971405,
      "learning_rate": 1.284239241448838e-06,
      "loss": 0.0105,
      "step": 2665460
    },
    {
      "epoch": 4.36211648108508,
      "grad_norm": 0.8455808162689209,
      "learning_rate": 1.284173349235321e-06,
      "loss": 0.0087,
      "step": 2665480
    },
    {
      "epoch": 4.362149211523733,
      "grad_norm": 0.28090494871139526,
      "learning_rate": 1.2841074570218036e-06,
      "loss": 0.0128,
      "step": 2665500
    },
    {
      "epoch": 4.362181941962386,
      "grad_norm": 0.14772574603557587,
      "learning_rate": 1.2840415648082868e-06,
      "loss": 0.0115,
      "step": 2665520
    },
    {
      "epoch": 4.3622146724010396,
      "grad_norm": 0.05813070014119148,
      "learning_rate": 1.2839756725947698e-06,
      "loss": 0.0092,
      "step": 2665540
    },
    {
      "epoch": 4.362247402839693,
      "grad_norm": 0.13520076870918274,
      "learning_rate": 1.2839097803812525e-06,
      "loss": 0.0106,
      "step": 2665560
    },
    {
      "epoch": 4.362280133278346,
      "grad_norm": 0.1743641048669815,
      "learning_rate": 1.2838438881677355e-06,
      "loss": 0.0083,
      "step": 2665580
    },
    {
      "epoch": 4.3623128637169994,
      "grad_norm": 0.16009415686130524,
      "learning_rate": 1.2837779959542182e-06,
      "loss": 0.0091,
      "step": 2665600
    },
    {
      "epoch": 4.362345594155653,
      "grad_norm": 0.15034352242946625,
      "learning_rate": 1.2837121037407011e-06,
      "loss": 0.0054,
      "step": 2665620
    },
    {
      "epoch": 4.362378324594307,
      "grad_norm": 0.2110854536294937,
      "learning_rate": 1.2836462115271839e-06,
      "loss": 0.0061,
      "step": 2665640
    },
    {
      "epoch": 4.362411055032959,
      "grad_norm": 0.11356008052825928,
      "learning_rate": 1.2835803193136666e-06,
      "loss": 0.0126,
      "step": 2665660
    },
    {
      "epoch": 4.362443785471613,
      "grad_norm": 0.0958426371216774,
      "learning_rate": 1.2835144271001496e-06,
      "loss": 0.0086,
      "step": 2665680
    },
    {
      "epoch": 4.3624765159102665,
      "grad_norm": 0.880466639995575,
      "learning_rate": 1.2834485348866327e-06,
      "loss": 0.0116,
      "step": 2665700
    },
    {
      "epoch": 4.362509246348919,
      "grad_norm": 0.42667239904403687,
      "learning_rate": 1.2833826426731155e-06,
      "loss": 0.009,
      "step": 2665720
    },
    {
      "epoch": 4.362541976787573,
      "grad_norm": 0.3700115382671356,
      "learning_rate": 1.2833167504595984e-06,
      "loss": 0.0099,
      "step": 2665740
    },
    {
      "epoch": 4.362574707226226,
      "grad_norm": 0.3624793291091919,
      "learning_rate": 1.2832508582460812e-06,
      "loss": 0.0083,
      "step": 2665760
    },
    {
      "epoch": 4.362607437664879,
      "grad_norm": 0.14785242080688477,
      "learning_rate": 1.2831849660325641e-06,
      "loss": 0.0082,
      "step": 2665780
    },
    {
      "epoch": 4.362640168103533,
      "grad_norm": 0.12264008074998856,
      "learning_rate": 1.2831190738190469e-06,
      "loss": 0.0072,
      "step": 2665800
    },
    {
      "epoch": 4.362672898542186,
      "grad_norm": 0.07937292754650116,
      "learning_rate": 1.2830531816055298e-06,
      "loss": 0.0091,
      "step": 2665820
    },
    {
      "epoch": 4.36270562898084,
      "grad_norm": 0.15230295062065125,
      "learning_rate": 1.2829872893920126e-06,
      "loss": 0.0111,
      "step": 2665840
    },
    {
      "epoch": 4.362738359419493,
      "grad_norm": 0.4448256194591522,
      "learning_rate": 1.2829213971784953e-06,
      "loss": 0.0118,
      "step": 2665860
    },
    {
      "epoch": 4.362771089858146,
      "grad_norm": 0.1713096797466278,
      "learning_rate": 1.2828555049649785e-06,
      "loss": 0.0093,
      "step": 2665880
    },
    {
      "epoch": 4.3628038202968,
      "grad_norm": 0.36047089099884033,
      "learning_rate": 1.2827896127514614e-06,
      "loss": 0.0091,
      "step": 2665900
    },
    {
      "epoch": 4.362836550735453,
      "grad_norm": 0.5084154009819031,
      "learning_rate": 1.2827237205379441e-06,
      "loss": 0.0106,
      "step": 2665920
    },
    {
      "epoch": 4.362869281174106,
      "grad_norm": 0.3315044939517975,
      "learning_rate": 1.282657828324427e-06,
      "loss": 0.0071,
      "step": 2665940
    },
    {
      "epoch": 4.36290201161276,
      "grad_norm": 0.16598109900951385,
      "learning_rate": 1.2825919361109098e-06,
      "loss": 0.0096,
      "step": 2665960
    },
    {
      "epoch": 4.362934742051413,
      "grad_norm": 0.24935199320316315,
      "learning_rate": 1.2825260438973928e-06,
      "loss": 0.0087,
      "step": 2665980
    },
    {
      "epoch": 4.362967472490066,
      "grad_norm": 0.21297475695610046,
      "learning_rate": 1.2824601516838755e-06,
      "loss": 0.009,
      "step": 2666000
    },
    {
      "epoch": 4.36300020292872,
      "grad_norm": 0.1799367517232895,
      "learning_rate": 1.2823942594703585e-06,
      "loss": 0.0085,
      "step": 2666020
    },
    {
      "epoch": 4.363032933367373,
      "grad_norm": 0.07785575836896896,
      "learning_rate": 1.2823283672568412e-06,
      "loss": 0.0089,
      "step": 2666040
    },
    {
      "epoch": 4.363065663806026,
      "grad_norm": 0.07511522620916367,
      "learning_rate": 1.2822624750433244e-06,
      "loss": 0.0059,
      "step": 2666060
    },
    {
      "epoch": 4.3630983942446795,
      "grad_norm": 0.8690464496612549,
      "learning_rate": 1.2821965828298071e-06,
      "loss": 0.0124,
      "step": 2666080
    },
    {
      "epoch": 4.363131124683333,
      "grad_norm": 0.12490913271903992,
      "learning_rate": 1.28213069061629e-06,
      "loss": 0.0068,
      "step": 2666100
    },
    {
      "epoch": 4.363163855121987,
      "grad_norm": 0.3233288526535034,
      "learning_rate": 1.2820647984027728e-06,
      "loss": 0.0053,
      "step": 2666120
    },
    {
      "epoch": 4.363196585560639,
      "grad_norm": 0.19392666220664978,
      "learning_rate": 1.2819989061892558e-06,
      "loss": 0.0109,
      "step": 2666140
    },
    {
      "epoch": 4.363229315999293,
      "grad_norm": 0.13637588918209076,
      "learning_rate": 1.2819330139757385e-06,
      "loss": 0.0085,
      "step": 2666160
    },
    {
      "epoch": 4.363262046437947,
      "grad_norm": 0.2465481013059616,
      "learning_rate": 1.2818671217622215e-06,
      "loss": 0.0114,
      "step": 2666180
    },
    {
      "epoch": 4.3632947768766,
      "grad_norm": 0.17230059206485748,
      "learning_rate": 1.2818012295487042e-06,
      "loss": 0.0112,
      "step": 2666200
    },
    {
      "epoch": 4.363327507315253,
      "grad_norm": 0.5729560852050781,
      "learning_rate": 1.2817353373351872e-06,
      "loss": 0.0082,
      "step": 2666220
    },
    {
      "epoch": 4.3633602377539065,
      "grad_norm": 0.30930179357528687,
      "learning_rate": 1.28166944512167e-06,
      "loss": 0.0084,
      "step": 2666240
    },
    {
      "epoch": 4.36339296819256,
      "grad_norm": 0.6674811840057373,
      "learning_rate": 1.281603552908153e-06,
      "loss": 0.012,
      "step": 2666260
    },
    {
      "epoch": 4.363425698631213,
      "grad_norm": 0.3774508535861969,
      "learning_rate": 1.2815376606946358e-06,
      "loss": 0.0084,
      "step": 2666280
    },
    {
      "epoch": 4.363458429069866,
      "grad_norm": 0.25647926330566406,
      "learning_rate": 1.2814717684811187e-06,
      "loss": 0.0058,
      "step": 2666300
    },
    {
      "epoch": 4.36349115950852,
      "grad_norm": 0.3451472520828247,
      "learning_rate": 1.2814058762676015e-06,
      "loss": 0.0077,
      "step": 2666320
    },
    {
      "epoch": 4.363523889947173,
      "grad_norm": 0.155326247215271,
      "learning_rate": 1.2813399840540844e-06,
      "loss": 0.0094,
      "step": 2666340
    },
    {
      "epoch": 4.363556620385826,
      "grad_norm": 0.09034890681505203,
      "learning_rate": 1.2812740918405672e-06,
      "loss": 0.0059,
      "step": 2666360
    },
    {
      "epoch": 4.36358935082448,
      "grad_norm": 0.1207762137055397,
      "learning_rate": 1.2812081996270501e-06,
      "loss": 0.0077,
      "step": 2666380
    },
    {
      "epoch": 4.3636220812631334,
      "grad_norm": 0.24986973404884338,
      "learning_rate": 1.281142307413533e-06,
      "loss": 0.0165,
      "step": 2666400
    },
    {
      "epoch": 4.363654811701786,
      "grad_norm": 0.2786218523979187,
      "learning_rate": 1.281076415200016e-06,
      "loss": 0.0071,
      "step": 2666420
    },
    {
      "epoch": 4.36368754214044,
      "grad_norm": 0.2886165976524353,
      "learning_rate": 1.2810105229864988e-06,
      "loss": 0.0083,
      "step": 2666440
    },
    {
      "epoch": 4.363720272579093,
      "grad_norm": 0.12602373957633972,
      "learning_rate": 1.2809446307729817e-06,
      "loss": 0.0123,
      "step": 2666460
    },
    {
      "epoch": 4.363753003017746,
      "grad_norm": 0.14113423228263855,
      "learning_rate": 1.2808787385594645e-06,
      "loss": 0.0118,
      "step": 2666480
    },
    {
      "epoch": 4.3637857334564,
      "grad_norm": 0.15181580185890198,
      "learning_rate": 1.2808128463459474e-06,
      "loss": 0.0091,
      "step": 2666500
    },
    {
      "epoch": 4.363818463895053,
      "grad_norm": 0.3528255224227905,
      "learning_rate": 1.2807469541324302e-06,
      "loss": 0.013,
      "step": 2666520
    },
    {
      "epoch": 4.363851194333707,
      "grad_norm": 0.32910844683647156,
      "learning_rate": 1.280681061918913e-06,
      "loss": 0.0211,
      "step": 2666540
    },
    {
      "epoch": 4.3638839247723595,
      "grad_norm": 0.2705238461494446,
      "learning_rate": 1.2806151697053958e-06,
      "loss": 0.0094,
      "step": 2666560
    },
    {
      "epoch": 4.363916655211013,
      "grad_norm": 0.32255464792251587,
      "learning_rate": 1.280549277491879e-06,
      "loss": 0.0113,
      "step": 2666580
    },
    {
      "epoch": 4.363949385649667,
      "grad_norm": 0.3245677351951599,
      "learning_rate": 1.280483385278362e-06,
      "loss": 0.0062,
      "step": 2666600
    },
    {
      "epoch": 4.363982116088319,
      "grad_norm": 0.14411360025405884,
      "learning_rate": 1.2804174930648447e-06,
      "loss": 0.0117,
      "step": 2666620
    },
    {
      "epoch": 4.364014846526973,
      "grad_norm": 0.06557340174913406,
      "learning_rate": 1.2803516008513274e-06,
      "loss": 0.0086,
      "step": 2666640
    },
    {
      "epoch": 4.364047576965627,
      "grad_norm": 0.24434895813465118,
      "learning_rate": 1.2802857086378104e-06,
      "loss": 0.0105,
      "step": 2666660
    },
    {
      "epoch": 4.36408030740428,
      "grad_norm": 0.24364598095417023,
      "learning_rate": 1.2802198164242931e-06,
      "loss": 0.0064,
      "step": 2666680
    },
    {
      "epoch": 4.364113037842933,
      "grad_norm": 0.24092204868793488,
      "learning_rate": 1.280153924210776e-06,
      "loss": 0.0057,
      "step": 2666700
    },
    {
      "epoch": 4.3641457682815865,
      "grad_norm": 0.12319650501012802,
      "learning_rate": 1.2800880319972588e-06,
      "loss": 0.0072,
      "step": 2666720
    },
    {
      "epoch": 4.36417849872024,
      "grad_norm": 0.10170228779315948,
      "learning_rate": 1.2800221397837418e-06,
      "loss": 0.0091,
      "step": 2666740
    },
    {
      "epoch": 4.364211229158893,
      "grad_norm": 0.09803275763988495,
      "learning_rate": 1.279956247570225e-06,
      "loss": 0.0143,
      "step": 2666760
    },
    {
      "epoch": 4.364243959597546,
      "grad_norm": 0.19551819562911987,
      "learning_rate": 1.2798903553567077e-06,
      "loss": 0.0087,
      "step": 2666780
    },
    {
      "epoch": 4.3642766900362,
      "grad_norm": 0.12763866782188416,
      "learning_rate": 1.2798244631431906e-06,
      "loss": 0.0066,
      "step": 2666800
    },
    {
      "epoch": 4.364309420474854,
      "grad_norm": 0.3133808374404907,
      "learning_rate": 1.2797585709296734e-06,
      "loss": 0.0113,
      "step": 2666820
    },
    {
      "epoch": 4.364342150913506,
      "grad_norm": 0.4396143853664398,
      "learning_rate": 1.2796926787161561e-06,
      "loss": 0.008,
      "step": 2666840
    },
    {
      "epoch": 4.36437488135216,
      "grad_norm": 0.37805458903312683,
      "learning_rate": 1.279626786502639e-06,
      "loss": 0.0078,
      "step": 2666860
    },
    {
      "epoch": 4.3644076117908135,
      "grad_norm": 0.15479621291160583,
      "learning_rate": 1.2795608942891218e-06,
      "loss": 0.0102,
      "step": 2666880
    },
    {
      "epoch": 4.364440342229466,
      "grad_norm": 0.42012640833854675,
      "learning_rate": 1.2794950020756047e-06,
      "loss": 0.009,
      "step": 2666900
    },
    {
      "epoch": 4.36447307266812,
      "grad_norm": 0.5210442543029785,
      "learning_rate": 1.2794291098620875e-06,
      "loss": 0.008,
      "step": 2666920
    },
    {
      "epoch": 4.364505803106773,
      "grad_norm": 0.2863600552082062,
      "learning_rate": 1.2793632176485707e-06,
      "loss": 0.0115,
      "step": 2666940
    },
    {
      "epoch": 4.364538533545427,
      "grad_norm": 0.15286864340305328,
      "learning_rate": 1.2792973254350536e-06,
      "loss": 0.0105,
      "step": 2666960
    },
    {
      "epoch": 4.36457126398408,
      "grad_norm": 0.5963016748428345,
      "learning_rate": 1.2792314332215363e-06,
      "loss": 0.0152,
      "step": 2666980
    },
    {
      "epoch": 4.364603994422733,
      "grad_norm": 0.13186095654964447,
      "learning_rate": 1.2791655410080193e-06,
      "loss": 0.0112,
      "step": 2667000
    },
    {
      "epoch": 4.364636724861387,
      "grad_norm": 0.08444175869226456,
      "learning_rate": 1.279099648794502e-06,
      "loss": 0.0099,
      "step": 2667020
    },
    {
      "epoch": 4.36466945530004,
      "grad_norm": 0.0990641638636589,
      "learning_rate": 1.279033756580985e-06,
      "loss": 0.0207,
      "step": 2667040
    },
    {
      "epoch": 4.364702185738693,
      "grad_norm": 0.3953950107097626,
      "learning_rate": 1.2789678643674677e-06,
      "loss": 0.013,
      "step": 2667060
    },
    {
      "epoch": 4.364734916177347,
      "grad_norm": 0.23257455229759216,
      "learning_rate": 1.2789019721539505e-06,
      "loss": 0.0106,
      "step": 2667080
    },
    {
      "epoch": 4.364767646616,
      "grad_norm": 0.19428543746471405,
      "learning_rate": 1.2788360799404334e-06,
      "loss": 0.0081,
      "step": 2667100
    },
    {
      "epoch": 4.364800377054653,
      "grad_norm": 0.08546119928359985,
      "learning_rate": 1.2787701877269166e-06,
      "loss": 0.0145,
      "step": 2667120
    },
    {
      "epoch": 4.364833107493307,
      "grad_norm": 0.13633456826210022,
      "learning_rate": 1.2787042955133993e-06,
      "loss": 0.0101,
      "step": 2667140
    },
    {
      "epoch": 4.36486583793196,
      "grad_norm": 0.234308660030365,
      "learning_rate": 1.2786384032998823e-06,
      "loss": 0.012,
      "step": 2667160
    },
    {
      "epoch": 4.364898568370613,
      "grad_norm": 0.24572135508060455,
      "learning_rate": 1.278572511086365e-06,
      "loss": 0.0081,
      "step": 2667180
    },
    {
      "epoch": 4.364931298809267,
      "grad_norm": 0.07320711016654968,
      "learning_rate": 1.278506618872848e-06,
      "loss": 0.0058,
      "step": 2667200
    },
    {
      "epoch": 4.36496402924792,
      "grad_norm": 0.1572830080986023,
      "learning_rate": 1.2784407266593307e-06,
      "loss": 0.0089,
      "step": 2667220
    },
    {
      "epoch": 4.364996759686573,
      "grad_norm": 0.2176382839679718,
      "learning_rate": 1.2783748344458137e-06,
      "loss": 0.0076,
      "step": 2667240
    },
    {
      "epoch": 4.3650294901252265,
      "grad_norm": 0.19822625815868378,
      "learning_rate": 1.2783089422322964e-06,
      "loss": 0.0126,
      "step": 2667260
    },
    {
      "epoch": 4.36506222056388,
      "grad_norm": 0.17777541279792786,
      "learning_rate": 1.2782430500187796e-06,
      "loss": 0.0117,
      "step": 2667280
    },
    {
      "epoch": 4.365094951002534,
      "grad_norm": 0.48767349123954773,
      "learning_rate": 1.2781771578052623e-06,
      "loss": 0.008,
      "step": 2667300
    },
    {
      "epoch": 4.365127681441186,
      "grad_norm": 0.17636185884475708,
      "learning_rate": 1.2781112655917452e-06,
      "loss": 0.0125,
      "step": 2667320
    },
    {
      "epoch": 4.36516041187984,
      "grad_norm": 0.7903162240982056,
      "learning_rate": 1.278045373378228e-06,
      "loss": 0.0118,
      "step": 2667340
    },
    {
      "epoch": 4.3651931423184935,
      "grad_norm": 0.11282648146152496,
      "learning_rate": 1.277979481164711e-06,
      "loss": 0.0113,
      "step": 2667360
    },
    {
      "epoch": 4.365225872757147,
      "grad_norm": 0.48746174573898315,
      "learning_rate": 1.2779135889511937e-06,
      "loss": 0.0158,
      "step": 2667380
    },
    {
      "epoch": 4.3652586031958,
      "grad_norm": 0.4599892199039459,
      "learning_rate": 1.2778476967376766e-06,
      "loss": 0.0067,
      "step": 2667400
    },
    {
      "epoch": 4.365291333634453,
      "grad_norm": 0.49925974011421204,
      "learning_rate": 1.2777818045241594e-06,
      "loss": 0.0113,
      "step": 2667420
    },
    {
      "epoch": 4.365324064073107,
      "grad_norm": 0.2771844267845154,
      "learning_rate": 1.2777159123106423e-06,
      "loss": 0.0166,
      "step": 2667440
    },
    {
      "epoch": 4.36535679451176,
      "grad_norm": 0.39696791768074036,
      "learning_rate": 1.2776500200971253e-06,
      "loss": 0.0073,
      "step": 2667460
    },
    {
      "epoch": 4.365389524950413,
      "grad_norm": 0.5991905331611633,
      "learning_rate": 1.2775841278836082e-06,
      "loss": 0.0086,
      "step": 2667480
    },
    {
      "epoch": 4.365422255389067,
      "grad_norm": 0.32018980383872986,
      "learning_rate": 1.277518235670091e-06,
      "loss": 0.0056,
      "step": 2667500
    },
    {
      "epoch": 4.36545498582772,
      "grad_norm": 0.15407013893127441,
      "learning_rate": 1.277452343456574e-06,
      "loss": 0.0134,
      "step": 2667520
    },
    {
      "epoch": 4.365487716266373,
      "grad_norm": 0.1987883597612381,
      "learning_rate": 1.2773864512430567e-06,
      "loss": 0.0067,
      "step": 2667540
    },
    {
      "epoch": 4.365520446705027,
      "grad_norm": 0.32871997356414795,
      "learning_rate": 1.2773205590295396e-06,
      "loss": 0.0089,
      "step": 2667560
    },
    {
      "epoch": 4.36555317714368,
      "grad_norm": 0.10698252171278,
      "learning_rate": 1.2772546668160223e-06,
      "loss": 0.0088,
      "step": 2667580
    },
    {
      "epoch": 4.365585907582333,
      "grad_norm": 0.7408015727996826,
      "learning_rate": 1.2771887746025053e-06,
      "loss": 0.0129,
      "step": 2667600
    },
    {
      "epoch": 4.365618638020987,
      "grad_norm": 0.1744043529033661,
      "learning_rate": 1.277122882388988e-06,
      "loss": 0.0066,
      "step": 2667620
    },
    {
      "epoch": 4.36565136845964,
      "grad_norm": 0.15977755188941956,
      "learning_rate": 1.2770569901754712e-06,
      "loss": 0.0126,
      "step": 2667640
    },
    {
      "epoch": 4.365684098898294,
      "grad_norm": 0.1543755680322647,
      "learning_rate": 1.276991097961954e-06,
      "loss": 0.0079,
      "step": 2667660
    },
    {
      "epoch": 4.365716829336947,
      "grad_norm": 0.09036378562450409,
      "learning_rate": 1.2769252057484369e-06,
      "loss": 0.0068,
      "step": 2667680
    },
    {
      "epoch": 4.3657495597756,
      "grad_norm": 0.44558048248291016,
      "learning_rate": 1.2768593135349196e-06,
      "loss": 0.0086,
      "step": 2667700
    },
    {
      "epoch": 4.365782290214254,
      "grad_norm": 0.0978182703256607,
      "learning_rate": 1.2767934213214026e-06,
      "loss": 0.0079,
      "step": 2667720
    },
    {
      "epoch": 4.3658150206529065,
      "grad_norm": 0.2274623066186905,
      "learning_rate": 1.2767275291078853e-06,
      "loss": 0.0106,
      "step": 2667740
    },
    {
      "epoch": 4.36584775109156,
      "grad_norm": 0.3477606475353241,
      "learning_rate": 1.2766616368943683e-06,
      "loss": 0.0096,
      "step": 2667760
    },
    {
      "epoch": 4.365880481530214,
      "grad_norm": 0.46563151478767395,
      "learning_rate": 1.276595744680851e-06,
      "loss": 0.0095,
      "step": 2667780
    },
    {
      "epoch": 4.365913211968866,
      "grad_norm": 0.16452008485794067,
      "learning_rate": 1.276529852467334e-06,
      "loss": 0.0088,
      "step": 2667800
    },
    {
      "epoch": 4.36594594240752,
      "grad_norm": 0.5305641293525696,
      "learning_rate": 1.2764639602538171e-06,
      "loss": 0.0111,
      "step": 2667820
    },
    {
      "epoch": 4.365978672846174,
      "grad_norm": 0.07162755727767944,
      "learning_rate": 1.2763980680402999e-06,
      "loss": 0.0068,
      "step": 2667840
    },
    {
      "epoch": 4.366011403284827,
      "grad_norm": 0.4767194092273712,
      "learning_rate": 1.2763321758267826e-06,
      "loss": 0.0081,
      "step": 2667860
    },
    {
      "epoch": 4.36604413372348,
      "grad_norm": 0.1636149138212204,
      "learning_rate": 1.2762662836132656e-06,
      "loss": 0.0077,
      "step": 2667880
    },
    {
      "epoch": 4.3660768641621335,
      "grad_norm": 0.19446061551570892,
      "learning_rate": 1.2762003913997483e-06,
      "loss": 0.0149,
      "step": 2667900
    },
    {
      "epoch": 4.366109594600787,
      "grad_norm": 0.11599549651145935,
      "learning_rate": 1.2761344991862313e-06,
      "loss": 0.0092,
      "step": 2667920
    },
    {
      "epoch": 4.36614232503944,
      "grad_norm": 0.32116255164146423,
      "learning_rate": 1.276068606972714e-06,
      "loss": 0.0105,
      "step": 2667940
    },
    {
      "epoch": 4.366175055478093,
      "grad_norm": 0.208008274435997,
      "learning_rate": 1.276002714759197e-06,
      "loss": 0.0085,
      "step": 2667960
    },
    {
      "epoch": 4.366207785916747,
      "grad_norm": 0.4073244631290436,
      "learning_rate": 1.2759368225456797e-06,
      "loss": 0.0064,
      "step": 2667980
    },
    {
      "epoch": 4.366240516355401,
      "grad_norm": 0.09303648769855499,
      "learning_rate": 1.2758709303321628e-06,
      "loss": 0.009,
      "step": 2668000
    },
    {
      "epoch": 4.366273246794053,
      "grad_norm": 0.13731661438941956,
      "learning_rate": 1.2758050381186458e-06,
      "loss": 0.0072,
      "step": 2668020
    },
    {
      "epoch": 4.366305977232707,
      "grad_norm": 0.14311198890209198,
      "learning_rate": 1.2757391459051285e-06,
      "loss": 0.0072,
      "step": 2668040
    },
    {
      "epoch": 4.3663387076713605,
      "grad_norm": 0.23642383515834808,
      "learning_rate": 1.2756732536916113e-06,
      "loss": 0.0074,
      "step": 2668060
    },
    {
      "epoch": 4.366371438110013,
      "grad_norm": 0.3171756863594055,
      "learning_rate": 1.2756073614780942e-06,
      "loss": 0.0082,
      "step": 2668080
    },
    {
      "epoch": 4.366404168548667,
      "grad_norm": 0.3198099732398987,
      "learning_rate": 1.275541469264577e-06,
      "loss": 0.0092,
      "step": 2668100
    },
    {
      "epoch": 4.36643689898732,
      "grad_norm": 0.1784115582704544,
      "learning_rate": 1.27547557705106e-06,
      "loss": 0.0057,
      "step": 2668120
    },
    {
      "epoch": 4.366469629425974,
      "grad_norm": 0.09714942425489426,
      "learning_rate": 1.2754096848375427e-06,
      "loss": 0.0094,
      "step": 2668140
    },
    {
      "epoch": 4.366502359864627,
      "grad_norm": 0.13655206561088562,
      "learning_rate": 1.2753437926240258e-06,
      "loss": 0.0058,
      "step": 2668160
    },
    {
      "epoch": 4.36653509030328,
      "grad_norm": 0.17042842507362366,
      "learning_rate": 1.2752779004105088e-06,
      "loss": 0.0056,
      "step": 2668180
    },
    {
      "epoch": 4.366567820741934,
      "grad_norm": 0.22930006682872772,
      "learning_rate": 1.2752120081969915e-06,
      "loss": 0.0131,
      "step": 2668200
    },
    {
      "epoch": 4.3666005511805865,
      "grad_norm": 0.20458747446537018,
      "learning_rate": 1.2751461159834745e-06,
      "loss": 0.0096,
      "step": 2668220
    },
    {
      "epoch": 4.36663328161924,
      "grad_norm": 0.27559536695480347,
      "learning_rate": 1.2750802237699572e-06,
      "loss": 0.0078,
      "step": 2668240
    },
    {
      "epoch": 4.366666012057894,
      "grad_norm": 0.24950630962848663,
      "learning_rate": 1.2750143315564402e-06,
      "loss": 0.0077,
      "step": 2668260
    },
    {
      "epoch": 4.366698742496547,
      "grad_norm": 0.15361031889915466,
      "learning_rate": 1.274948439342923e-06,
      "loss": 0.0078,
      "step": 2668280
    },
    {
      "epoch": 4.3667314729352,
      "grad_norm": 0.15044787526130676,
      "learning_rate": 1.2748825471294056e-06,
      "loss": 0.0063,
      "step": 2668300
    },
    {
      "epoch": 4.366764203373854,
      "grad_norm": 0.20246347784996033,
      "learning_rate": 1.2748166549158886e-06,
      "loss": 0.0078,
      "step": 2668320
    },
    {
      "epoch": 4.366796933812507,
      "grad_norm": 0.2791934311389923,
      "learning_rate": 1.2747507627023718e-06,
      "loss": 0.0071,
      "step": 2668340
    },
    {
      "epoch": 4.36682966425116,
      "grad_norm": 0.10546968877315521,
      "learning_rate": 1.2746848704888545e-06,
      "loss": 0.01,
      "step": 2668360
    },
    {
      "epoch": 4.3668623946898135,
      "grad_norm": 0.19438907504081726,
      "learning_rate": 1.2746189782753374e-06,
      "loss": 0.0077,
      "step": 2668380
    },
    {
      "epoch": 4.366895125128467,
      "grad_norm": 0.18218956887722015,
      "learning_rate": 1.2745530860618202e-06,
      "loss": 0.0098,
      "step": 2668400
    },
    {
      "epoch": 4.366927855567121,
      "grad_norm": 0.17769892513751984,
      "learning_rate": 1.2744871938483031e-06,
      "loss": 0.0078,
      "step": 2668420
    },
    {
      "epoch": 4.366960586005773,
      "grad_norm": 0.16335484385490417,
      "learning_rate": 1.2744213016347859e-06,
      "loss": 0.0067,
      "step": 2668440
    },
    {
      "epoch": 4.366993316444427,
      "grad_norm": 0.1305624544620514,
      "learning_rate": 1.2743554094212688e-06,
      "loss": 0.0078,
      "step": 2668460
    },
    {
      "epoch": 4.367026046883081,
      "grad_norm": 0.14200370013713837,
      "learning_rate": 1.2742895172077516e-06,
      "loss": 0.0095,
      "step": 2668480
    },
    {
      "epoch": 4.367058777321733,
      "grad_norm": 0.22714902460575104,
      "learning_rate": 1.2742236249942343e-06,
      "loss": 0.0095,
      "step": 2668500
    },
    {
      "epoch": 4.367091507760387,
      "grad_norm": 0.27454257011413574,
      "learning_rate": 1.2741577327807175e-06,
      "loss": 0.0082,
      "step": 2668520
    },
    {
      "epoch": 4.3671242381990405,
      "grad_norm": 0.41390570998191833,
      "learning_rate": 1.2740918405672004e-06,
      "loss": 0.0119,
      "step": 2668540
    },
    {
      "epoch": 4.367156968637694,
      "grad_norm": 0.14248961210250854,
      "learning_rate": 1.2740259483536832e-06,
      "loss": 0.0087,
      "step": 2668560
    },
    {
      "epoch": 4.367189699076347,
      "grad_norm": 0.19091877341270447,
      "learning_rate": 1.2739600561401661e-06,
      "loss": 0.0121,
      "step": 2668580
    },
    {
      "epoch": 4.367222429515,
      "grad_norm": 1.3680317401885986,
      "learning_rate": 1.2738941639266489e-06,
      "loss": 0.0135,
      "step": 2668600
    },
    {
      "epoch": 4.367255159953654,
      "grad_norm": 0.04862620681524277,
      "learning_rate": 1.2738282717131318e-06,
      "loss": 0.0087,
      "step": 2668620
    },
    {
      "epoch": 4.367287890392307,
      "grad_norm": 0.3316795229911804,
      "learning_rate": 1.2737623794996145e-06,
      "loss": 0.011,
      "step": 2668640
    },
    {
      "epoch": 4.36732062083096,
      "grad_norm": 0.09287059307098389,
      "learning_rate": 1.2736964872860975e-06,
      "loss": 0.0099,
      "step": 2668660
    },
    {
      "epoch": 4.367353351269614,
      "grad_norm": 0.7257905006408691,
      "learning_rate": 1.2736305950725802e-06,
      "loss": 0.0098,
      "step": 2668680
    },
    {
      "epoch": 4.367386081708267,
      "grad_norm": 0.15677201747894287,
      "learning_rate": 1.2735647028590634e-06,
      "loss": 0.0163,
      "step": 2668700
    },
    {
      "epoch": 4.36741881214692,
      "grad_norm": 0.4154847264289856,
      "learning_rate": 1.2734988106455461e-06,
      "loss": 0.0097,
      "step": 2668720
    },
    {
      "epoch": 4.367451542585574,
      "grad_norm": 0.3084239065647125,
      "learning_rate": 1.273432918432029e-06,
      "loss": 0.006,
      "step": 2668740
    },
    {
      "epoch": 4.367484273024227,
      "grad_norm": 0.24936525523662567,
      "learning_rate": 1.2733670262185118e-06,
      "loss": 0.0088,
      "step": 2668760
    },
    {
      "epoch": 4.36751700346288,
      "grad_norm": 0.27823591232299805,
      "learning_rate": 1.2733011340049948e-06,
      "loss": 0.0057,
      "step": 2668780
    },
    {
      "epoch": 4.367549733901534,
      "grad_norm": 0.2563747763633728,
      "learning_rate": 1.2732352417914775e-06,
      "loss": 0.0085,
      "step": 2668800
    },
    {
      "epoch": 4.367582464340187,
      "grad_norm": 0.27829882502555847,
      "learning_rate": 1.2731693495779605e-06,
      "loss": 0.0107,
      "step": 2668820
    },
    {
      "epoch": 4.367615194778841,
      "grad_norm": 0.3467239439487457,
      "learning_rate": 1.2731034573644432e-06,
      "loss": 0.0087,
      "step": 2668840
    },
    {
      "epoch": 4.367647925217494,
      "grad_norm": 0.10713919997215271,
      "learning_rate": 1.2730375651509264e-06,
      "loss": 0.0091,
      "step": 2668860
    },
    {
      "epoch": 4.367680655656147,
      "grad_norm": 0.1130153089761734,
      "learning_rate": 1.2729716729374091e-06,
      "loss": 0.0091,
      "step": 2668880
    },
    {
      "epoch": 4.367713386094801,
      "grad_norm": 0.08688383549451828,
      "learning_rate": 1.272905780723892e-06,
      "loss": 0.0065,
      "step": 2668900
    },
    {
      "epoch": 4.3677461165334535,
      "grad_norm": 0.25844669342041016,
      "learning_rate": 1.2728398885103748e-06,
      "loss": 0.0083,
      "step": 2668920
    },
    {
      "epoch": 4.367778846972107,
      "grad_norm": 0.028722718358039856,
      "learning_rate": 1.2727739962968578e-06,
      "loss": 0.0097,
      "step": 2668940
    },
    {
      "epoch": 4.367811577410761,
      "grad_norm": 0.14196008443832397,
      "learning_rate": 1.2727081040833405e-06,
      "loss": 0.0142,
      "step": 2668960
    },
    {
      "epoch": 4.367844307849413,
      "grad_norm": 0.27972546219825745,
      "learning_rate": 1.2726422118698234e-06,
      "loss": 0.0077,
      "step": 2668980
    },
    {
      "epoch": 4.367877038288067,
      "grad_norm": 0.31012582778930664,
      "learning_rate": 1.2725763196563062e-06,
      "loss": 0.0091,
      "step": 2669000
    },
    {
      "epoch": 4.3679097687267205,
      "grad_norm": 0.23184603452682495,
      "learning_rate": 1.2725104274427891e-06,
      "loss": 0.0101,
      "step": 2669020
    },
    {
      "epoch": 4.367942499165374,
      "grad_norm": 0.29033949971199036,
      "learning_rate": 1.272444535229272e-06,
      "loss": 0.0092,
      "step": 2669040
    },
    {
      "epoch": 4.367975229604027,
      "grad_norm": 0.24393971264362335,
      "learning_rate": 1.272378643015755e-06,
      "loss": 0.006,
      "step": 2669060
    },
    {
      "epoch": 4.36800796004268,
      "grad_norm": 0.057150669395923615,
      "learning_rate": 1.2723127508022378e-06,
      "loss": 0.0084,
      "step": 2669080
    },
    {
      "epoch": 4.368040690481334,
      "grad_norm": 0.18331201374530792,
      "learning_rate": 1.2722468585887207e-06,
      "loss": 0.0079,
      "step": 2669100
    },
    {
      "epoch": 4.368073420919988,
      "grad_norm": 0.06464269012212753,
      "learning_rate": 1.2721809663752035e-06,
      "loss": 0.0106,
      "step": 2669120
    },
    {
      "epoch": 4.36810615135864,
      "grad_norm": 0.17649365961551666,
      "learning_rate": 1.2721150741616864e-06,
      "loss": 0.005,
      "step": 2669140
    },
    {
      "epoch": 4.368138881797294,
      "grad_norm": 0.16573171317577362,
      "learning_rate": 1.2720491819481692e-06,
      "loss": 0.0115,
      "step": 2669160
    },
    {
      "epoch": 4.3681716122359475,
      "grad_norm": 0.2231760174036026,
      "learning_rate": 1.2719832897346521e-06,
      "loss": 0.0088,
      "step": 2669180
    },
    {
      "epoch": 4.3682043426746,
      "grad_norm": 0.1389179527759552,
      "learning_rate": 1.2719173975211349e-06,
      "loss": 0.0068,
      "step": 2669200
    },
    {
      "epoch": 4.368237073113254,
      "grad_norm": 0.49088677763938904,
      "learning_rate": 1.271851505307618e-06,
      "loss": 0.0067,
      "step": 2669220
    },
    {
      "epoch": 4.368269803551907,
      "grad_norm": 0.3847266137599945,
      "learning_rate": 1.271785613094101e-06,
      "loss": 0.01,
      "step": 2669240
    },
    {
      "epoch": 4.36830253399056,
      "grad_norm": 0.2261643409729004,
      "learning_rate": 1.2717197208805837e-06,
      "loss": 0.0072,
      "step": 2669260
    },
    {
      "epoch": 4.368335264429214,
      "grad_norm": 0.19109581410884857,
      "learning_rate": 1.2716538286670664e-06,
      "loss": 0.0127,
      "step": 2669280
    },
    {
      "epoch": 4.368367994867867,
      "grad_norm": 0.17812734842300415,
      "learning_rate": 1.2715879364535494e-06,
      "loss": 0.0128,
      "step": 2669300
    },
    {
      "epoch": 4.368400725306521,
      "grad_norm": 0.3154836893081665,
      "learning_rate": 1.2715220442400321e-06,
      "loss": 0.0082,
      "step": 2669320
    },
    {
      "epoch": 4.368433455745174,
      "grad_norm": 0.39547306299209595,
      "learning_rate": 1.271456152026515e-06,
      "loss": 0.0167,
      "step": 2669340
    },
    {
      "epoch": 4.368466186183827,
      "grad_norm": 0.10905737429857254,
      "learning_rate": 1.2713902598129978e-06,
      "loss": 0.0086,
      "step": 2669360
    },
    {
      "epoch": 4.368498916622481,
      "grad_norm": 0.27639809250831604,
      "learning_rate": 1.2713243675994808e-06,
      "loss": 0.0056,
      "step": 2669380
    },
    {
      "epoch": 4.3685316470611335,
      "grad_norm": 0.11118700355291367,
      "learning_rate": 1.271258475385964e-06,
      "loss": 0.007,
      "step": 2669400
    },
    {
      "epoch": 4.368564377499787,
      "grad_norm": 0.8215851783752441,
      "learning_rate": 1.2711925831724467e-06,
      "loss": 0.0122,
      "step": 2669420
    },
    {
      "epoch": 4.368597107938441,
      "grad_norm": 0.12167519330978394,
      "learning_rate": 1.2711266909589296e-06,
      "loss": 0.0083,
      "step": 2669440
    },
    {
      "epoch": 4.368629838377094,
      "grad_norm": 0.1992769092321396,
      "learning_rate": 1.2710607987454124e-06,
      "loss": 0.0082,
      "step": 2669460
    },
    {
      "epoch": 4.368662568815747,
      "grad_norm": 0.09381593763828278,
      "learning_rate": 1.2709949065318953e-06,
      "loss": 0.0085,
      "step": 2669480
    },
    {
      "epoch": 4.368695299254401,
      "grad_norm": 0.18414488434791565,
      "learning_rate": 1.270929014318378e-06,
      "loss": 0.0111,
      "step": 2669500
    },
    {
      "epoch": 4.368728029693054,
      "grad_norm": 0.2749162018299103,
      "learning_rate": 1.2708631221048608e-06,
      "loss": 0.01,
      "step": 2669520
    },
    {
      "epoch": 4.368760760131707,
      "grad_norm": 0.1775885671377182,
      "learning_rate": 1.2707972298913438e-06,
      "loss": 0.0165,
      "step": 2669540
    },
    {
      "epoch": 4.3687934905703605,
      "grad_norm": 0.1807984709739685,
      "learning_rate": 1.2707313376778265e-06,
      "loss": 0.0083,
      "step": 2669560
    },
    {
      "epoch": 4.368826221009014,
      "grad_norm": 0.23482991755008698,
      "learning_rate": 1.2706654454643097e-06,
      "loss": 0.0079,
      "step": 2669580
    },
    {
      "epoch": 4.368858951447668,
      "grad_norm": 0.12124400585889816,
      "learning_rate": 1.2705995532507926e-06,
      "loss": 0.0055,
      "step": 2669600
    },
    {
      "epoch": 4.36889168188632,
      "grad_norm": 0.2571104168891907,
      "learning_rate": 1.2705336610372754e-06,
      "loss": 0.0116,
      "step": 2669620
    },
    {
      "epoch": 4.368924412324974,
      "grad_norm": 0.25041401386260986,
      "learning_rate": 1.2704677688237583e-06,
      "loss": 0.0078,
      "step": 2669640
    },
    {
      "epoch": 4.368957142763628,
      "grad_norm": 0.3713242709636688,
      "learning_rate": 1.270401876610241e-06,
      "loss": 0.0101,
      "step": 2669660
    },
    {
      "epoch": 4.36898987320228,
      "grad_norm": 0.30066269636154175,
      "learning_rate": 1.270335984396724e-06,
      "loss": 0.0133,
      "step": 2669680
    },
    {
      "epoch": 4.369022603640934,
      "grad_norm": 0.23076024651527405,
      "learning_rate": 1.2702700921832067e-06,
      "loss": 0.008,
      "step": 2669700
    },
    {
      "epoch": 4.3690553340795875,
      "grad_norm": 0.36100709438323975,
      "learning_rate": 1.2702041999696895e-06,
      "loss": 0.0085,
      "step": 2669720
    },
    {
      "epoch": 4.369088064518241,
      "grad_norm": 0.44672852754592896,
      "learning_rate": 1.2701383077561726e-06,
      "loss": 0.0058,
      "step": 2669740
    },
    {
      "epoch": 4.369120794956894,
      "grad_norm": 0.3792712092399597,
      "learning_rate": 1.2700724155426556e-06,
      "loss": 0.0069,
      "step": 2669760
    },
    {
      "epoch": 4.369153525395547,
      "grad_norm": 0.28838247060775757,
      "learning_rate": 1.2700065233291383e-06,
      "loss": 0.0118,
      "step": 2669780
    },
    {
      "epoch": 4.369186255834201,
      "grad_norm": 0.2809664309024811,
      "learning_rate": 1.2699406311156213e-06,
      "loss": 0.006,
      "step": 2669800
    },
    {
      "epoch": 4.369218986272854,
      "grad_norm": 0.567575216293335,
      "learning_rate": 1.269874738902104e-06,
      "loss": 0.0097,
      "step": 2669820
    },
    {
      "epoch": 4.369251716711507,
      "grad_norm": 0.6773225665092468,
      "learning_rate": 1.269808846688587e-06,
      "loss": 0.0123,
      "step": 2669840
    },
    {
      "epoch": 4.369284447150161,
      "grad_norm": 0.12522877752780914,
      "learning_rate": 1.2697429544750697e-06,
      "loss": 0.0063,
      "step": 2669860
    },
    {
      "epoch": 4.369317177588814,
      "grad_norm": 0.45142385363578796,
      "learning_rate": 1.2696770622615527e-06,
      "loss": 0.0091,
      "step": 2669880
    },
    {
      "epoch": 4.369349908027467,
      "grad_norm": 0.19965267181396484,
      "learning_rate": 1.2696111700480354e-06,
      "loss": 0.0086,
      "step": 2669900
    },
    {
      "epoch": 4.369382638466121,
      "grad_norm": 0.6952590346336365,
      "learning_rate": 1.2695452778345186e-06,
      "loss": 0.0093,
      "step": 2669920
    },
    {
      "epoch": 4.369415368904774,
      "grad_norm": 0.18440470099449158,
      "learning_rate": 1.2694793856210013e-06,
      "loss": 0.0081,
      "step": 2669940
    },
    {
      "epoch": 4.369448099343427,
      "grad_norm": 0.11761767417192459,
      "learning_rate": 1.2694134934074843e-06,
      "loss": 0.0119,
      "step": 2669960
    },
    {
      "epoch": 4.369480829782081,
      "grad_norm": 0.12053502351045609,
      "learning_rate": 1.269347601193967e-06,
      "loss": 0.009,
      "step": 2669980
    },
    {
      "epoch": 4.369513560220734,
      "grad_norm": 0.3395122289657593,
      "learning_rate": 1.26928170898045e-06,
      "loss": 0.0118,
      "step": 2670000
    },
    {
      "epoch": 4.369546290659388,
      "grad_norm": 0.23142914474010468,
      "learning_rate": 1.2692158167669327e-06,
      "loss": 0.0082,
      "step": 2670020
    },
    {
      "epoch": 4.3695790210980405,
      "grad_norm": 0.12185437232255936,
      "learning_rate": 1.2691499245534156e-06,
      "loss": 0.008,
      "step": 2670040
    },
    {
      "epoch": 4.369611751536694,
      "grad_norm": 0.10594211518764496,
      "learning_rate": 1.2690840323398984e-06,
      "loss": 0.0096,
      "step": 2670060
    },
    {
      "epoch": 4.369644481975348,
      "grad_norm": 0.3033984303474426,
      "learning_rate": 1.2690181401263813e-06,
      "loss": 0.0072,
      "step": 2670080
    },
    {
      "epoch": 4.369677212414,
      "grad_norm": 0.3868124783039093,
      "learning_rate": 1.2689522479128643e-06,
      "loss": 0.0105,
      "step": 2670100
    },
    {
      "epoch": 4.369709942852654,
      "grad_norm": 0.05091642588376999,
      "learning_rate": 1.2688863556993472e-06,
      "loss": 0.0064,
      "step": 2670120
    },
    {
      "epoch": 4.369742673291308,
      "grad_norm": 0.2110859602689743,
      "learning_rate": 1.26882046348583e-06,
      "loss": 0.0116,
      "step": 2670140
    },
    {
      "epoch": 4.369775403729961,
      "grad_norm": 0.7450819611549377,
      "learning_rate": 1.268754571272313e-06,
      "loss": 0.0126,
      "step": 2670160
    },
    {
      "epoch": 4.369808134168614,
      "grad_norm": 0.2195814698934555,
      "learning_rate": 1.2686886790587957e-06,
      "loss": 0.0098,
      "step": 2670180
    },
    {
      "epoch": 4.3698408646072675,
      "grad_norm": 0.2114325761795044,
      "learning_rate": 1.2686227868452786e-06,
      "loss": 0.0084,
      "step": 2670200
    },
    {
      "epoch": 4.369873595045921,
      "grad_norm": 0.3992978632450104,
      "learning_rate": 1.2685568946317614e-06,
      "loss": 0.0071,
      "step": 2670220
    },
    {
      "epoch": 4.369906325484574,
      "grad_norm": 0.3333752751350403,
      "learning_rate": 1.2684910024182443e-06,
      "loss": 0.0118,
      "step": 2670240
    },
    {
      "epoch": 4.369939055923227,
      "grad_norm": 0.21052014827728271,
      "learning_rate": 1.268425110204727e-06,
      "loss": 0.0085,
      "step": 2670260
    },
    {
      "epoch": 4.369971786361881,
      "grad_norm": 0.4315575361251831,
      "learning_rate": 1.2683592179912102e-06,
      "loss": 0.0068,
      "step": 2670280
    },
    {
      "epoch": 4.370004516800535,
      "grad_norm": 0.42012035846710205,
      "learning_rate": 1.268293325777693e-06,
      "loss": 0.0125,
      "step": 2670300
    },
    {
      "epoch": 4.370037247239187,
      "grad_norm": 0.3011070191860199,
      "learning_rate": 1.268227433564176e-06,
      "loss": 0.0112,
      "step": 2670320
    },
    {
      "epoch": 4.370069977677841,
      "grad_norm": 0.1572616696357727,
      "learning_rate": 1.2681615413506586e-06,
      "loss": 0.0086,
      "step": 2670340
    },
    {
      "epoch": 4.3701027081164945,
      "grad_norm": 0.491671621799469,
      "learning_rate": 1.2680956491371416e-06,
      "loss": 0.0115,
      "step": 2670360
    },
    {
      "epoch": 4.370135438555147,
      "grad_norm": 0.36719831824302673,
      "learning_rate": 1.2680297569236243e-06,
      "loss": 0.0089,
      "step": 2670380
    },
    {
      "epoch": 4.370168168993801,
      "grad_norm": 0.19704759120941162,
      "learning_rate": 1.2679638647101073e-06,
      "loss": 0.0115,
      "step": 2670400
    },
    {
      "epoch": 4.370200899432454,
      "grad_norm": 0.34560728073120117,
      "learning_rate": 1.26789797249659e-06,
      "loss": 0.0123,
      "step": 2670420
    },
    {
      "epoch": 4.370233629871107,
      "grad_norm": 0.13029105961322784,
      "learning_rate": 1.267832080283073e-06,
      "loss": 0.0083,
      "step": 2670440
    },
    {
      "epoch": 4.370266360309761,
      "grad_norm": 0.4356772005558014,
      "learning_rate": 1.2677661880695561e-06,
      "loss": 0.0075,
      "step": 2670460
    },
    {
      "epoch": 4.370299090748414,
      "grad_norm": 0.0677621141076088,
      "learning_rate": 1.2677002958560389e-06,
      "loss": 0.0121,
      "step": 2670480
    },
    {
      "epoch": 4.370331821187068,
      "grad_norm": 0.13673032820224762,
      "learning_rate": 1.2676344036425216e-06,
      "loss": 0.0109,
      "step": 2670500
    },
    {
      "epoch": 4.370364551625721,
      "grad_norm": 0.212533637881279,
      "learning_rate": 1.2675685114290046e-06,
      "loss": 0.0122,
      "step": 2670520
    },
    {
      "epoch": 4.370397282064374,
      "grad_norm": 0.03236347809433937,
      "learning_rate": 1.2675026192154873e-06,
      "loss": 0.0093,
      "step": 2670540
    },
    {
      "epoch": 4.370430012503028,
      "grad_norm": 0.2083866000175476,
      "learning_rate": 1.2674367270019703e-06,
      "loss": 0.0087,
      "step": 2670560
    },
    {
      "epoch": 4.370462742941681,
      "grad_norm": 0.31320855021476746,
      "learning_rate": 1.267370834788453e-06,
      "loss": 0.0099,
      "step": 2670580
    },
    {
      "epoch": 4.370495473380334,
      "grad_norm": 0.3008778691291809,
      "learning_rate": 1.267304942574936e-06,
      "loss": 0.0141,
      "step": 2670600
    },
    {
      "epoch": 4.370528203818988,
      "grad_norm": 0.18984167277812958,
      "learning_rate": 1.2672390503614191e-06,
      "loss": 0.0108,
      "step": 2670620
    },
    {
      "epoch": 4.370560934257641,
      "grad_norm": 0.1819920837879181,
      "learning_rate": 1.2671731581479019e-06,
      "loss": 0.0097,
      "step": 2670640
    },
    {
      "epoch": 4.370593664696294,
      "grad_norm": 0.14946487545967102,
      "learning_rate": 1.2671072659343848e-06,
      "loss": 0.0105,
      "step": 2670660
    },
    {
      "epoch": 4.3706263951349476,
      "grad_norm": 0.08603036403656006,
      "learning_rate": 1.2670413737208675e-06,
      "loss": 0.0091,
      "step": 2670680
    },
    {
      "epoch": 4.370659125573601,
      "grad_norm": 0.32408928871154785,
      "learning_rate": 1.2669754815073503e-06,
      "loss": 0.0149,
      "step": 2670700
    },
    {
      "epoch": 4.370691856012254,
      "grad_norm": 0.16295459866523743,
      "learning_rate": 1.2669095892938332e-06,
      "loss": 0.0117,
      "step": 2670720
    },
    {
      "epoch": 4.3707245864509074,
      "grad_norm": 0.40175899863243103,
      "learning_rate": 1.266843697080316e-06,
      "loss": 0.0099,
      "step": 2670740
    },
    {
      "epoch": 4.370757316889561,
      "grad_norm": 0.1426563411951065,
      "learning_rate": 1.266777804866799e-06,
      "loss": 0.0078,
      "step": 2670760
    },
    {
      "epoch": 4.370790047328215,
      "grad_norm": 0.15280789136886597,
      "learning_rate": 1.2667119126532817e-06,
      "loss": 0.01,
      "step": 2670780
    },
    {
      "epoch": 4.370822777766867,
      "grad_norm": 0.14598581194877625,
      "learning_rate": 1.2666460204397648e-06,
      "loss": 0.0087,
      "step": 2670800
    },
    {
      "epoch": 4.370855508205521,
      "grad_norm": 0.4330284893512726,
      "learning_rate": 1.2665801282262478e-06,
      "loss": 0.0086,
      "step": 2670820
    },
    {
      "epoch": 4.3708882386441745,
      "grad_norm": 0.21628107130527496,
      "learning_rate": 1.2665142360127305e-06,
      "loss": 0.0069,
      "step": 2670840
    },
    {
      "epoch": 4.370920969082827,
      "grad_norm": 0.17187638580799103,
      "learning_rate": 1.2664483437992135e-06,
      "loss": 0.0119,
      "step": 2670860
    },
    {
      "epoch": 4.370953699521481,
      "grad_norm": 1.3660019636154175,
      "learning_rate": 1.2663824515856962e-06,
      "loss": 0.0087,
      "step": 2670880
    },
    {
      "epoch": 4.370986429960134,
      "grad_norm": 0.3373379111289978,
      "learning_rate": 1.2663165593721792e-06,
      "loss": 0.0094,
      "step": 2670900
    },
    {
      "epoch": 4.371019160398788,
      "grad_norm": 0.2917706370353699,
      "learning_rate": 1.266250667158662e-06,
      "loss": 0.0094,
      "step": 2670920
    },
    {
      "epoch": 4.371051890837441,
      "grad_norm": 0.044335346668958664,
      "learning_rate": 1.2661847749451446e-06,
      "loss": 0.0085,
      "step": 2670940
    },
    {
      "epoch": 4.371084621276094,
      "grad_norm": 0.12476355582475662,
      "learning_rate": 1.2661188827316276e-06,
      "loss": 0.0061,
      "step": 2670960
    },
    {
      "epoch": 4.371117351714748,
      "grad_norm": 0.47094160318374634,
      "learning_rate": 1.2660529905181108e-06,
      "loss": 0.0089,
      "step": 2670980
    },
    {
      "epoch": 4.371150082153401,
      "grad_norm": 0.24932557344436646,
      "learning_rate": 1.2659870983045935e-06,
      "loss": 0.0142,
      "step": 2671000
    },
    {
      "epoch": 4.371182812592054,
      "grad_norm": 0.19449038803577423,
      "learning_rate": 1.2659212060910765e-06,
      "loss": 0.007,
      "step": 2671020
    },
    {
      "epoch": 4.371215543030708,
      "grad_norm": 0.19960784912109375,
      "learning_rate": 1.2658553138775592e-06,
      "loss": 0.0106,
      "step": 2671040
    },
    {
      "epoch": 4.371248273469361,
      "grad_norm": 0.6687707901000977,
      "learning_rate": 1.2657894216640421e-06,
      "loss": 0.018,
      "step": 2671060
    },
    {
      "epoch": 4.371281003908014,
      "grad_norm": 0.5128897428512573,
      "learning_rate": 1.2657235294505249e-06,
      "loss": 0.0065,
      "step": 2671080
    },
    {
      "epoch": 4.371313734346668,
      "grad_norm": 0.18830542266368866,
      "learning_rate": 1.2656576372370078e-06,
      "loss": 0.0083,
      "step": 2671100
    },
    {
      "epoch": 4.371346464785321,
      "grad_norm": 0.357393741607666,
      "learning_rate": 1.2655917450234906e-06,
      "loss": 0.01,
      "step": 2671120
    },
    {
      "epoch": 4.371379195223974,
      "grad_norm": 0.17451059818267822,
      "learning_rate": 1.2655258528099733e-06,
      "loss": 0.0072,
      "step": 2671140
    },
    {
      "epoch": 4.371411925662628,
      "grad_norm": 0.6142624020576477,
      "learning_rate": 1.2654599605964565e-06,
      "loss": 0.0092,
      "step": 2671160
    },
    {
      "epoch": 4.371444656101281,
      "grad_norm": 0.6377134919166565,
      "learning_rate": 1.2653940683829394e-06,
      "loss": 0.012,
      "step": 2671180
    },
    {
      "epoch": 4.371477386539935,
      "grad_norm": 0.12202882766723633,
      "learning_rate": 1.2653281761694222e-06,
      "loss": 0.0112,
      "step": 2671200
    },
    {
      "epoch": 4.3715101169785875,
      "grad_norm": 0.13589759171009064,
      "learning_rate": 1.2652622839559051e-06,
      "loss": 0.0113,
      "step": 2671220
    },
    {
      "epoch": 4.371542847417241,
      "grad_norm": 0.30113813281059265,
      "learning_rate": 1.2651963917423879e-06,
      "loss": 0.0091,
      "step": 2671240
    },
    {
      "epoch": 4.371575577855895,
      "grad_norm": 0.2419261485338211,
      "learning_rate": 1.2651304995288708e-06,
      "loss": 0.014,
      "step": 2671260
    },
    {
      "epoch": 4.371608308294547,
      "grad_norm": 0.11362350732088089,
      "learning_rate": 1.2650646073153536e-06,
      "loss": 0.0118,
      "step": 2671280
    },
    {
      "epoch": 4.371641038733201,
      "grad_norm": 0.21408438682556152,
      "learning_rate": 1.2649987151018365e-06,
      "loss": 0.0114,
      "step": 2671300
    },
    {
      "epoch": 4.371673769171855,
      "grad_norm": 0.08267677575349808,
      "learning_rate": 1.2649328228883192e-06,
      "loss": 0.011,
      "step": 2671320
    },
    {
      "epoch": 4.371706499610508,
      "grad_norm": 0.3623538017272949,
      "learning_rate": 1.2648669306748024e-06,
      "loss": 0.0112,
      "step": 2671340
    },
    {
      "epoch": 4.371739230049161,
      "grad_norm": 0.17698681354522705,
      "learning_rate": 1.2648010384612851e-06,
      "loss": 0.0075,
      "step": 2671360
    },
    {
      "epoch": 4.3717719604878145,
      "grad_norm": 0.630734384059906,
      "learning_rate": 1.264735146247768e-06,
      "loss": 0.0054,
      "step": 2671380
    },
    {
      "epoch": 4.371804690926468,
      "grad_norm": 0.14507745206356049,
      "learning_rate": 1.2646692540342508e-06,
      "loss": 0.0094,
      "step": 2671400
    },
    {
      "epoch": 4.371837421365121,
      "grad_norm": 0.22969010472297668,
      "learning_rate": 1.2646033618207338e-06,
      "loss": 0.0082,
      "step": 2671420
    },
    {
      "epoch": 4.371870151803774,
      "grad_norm": 0.10422276705503464,
      "learning_rate": 1.2645374696072165e-06,
      "loss": 0.0062,
      "step": 2671440
    },
    {
      "epoch": 4.371902882242428,
      "grad_norm": 0.522238552570343,
      "learning_rate": 1.2644715773936995e-06,
      "loss": 0.0119,
      "step": 2671460
    },
    {
      "epoch": 4.3719356126810816,
      "grad_norm": 0.4641372859477997,
      "learning_rate": 1.2644056851801822e-06,
      "loss": 0.0126,
      "step": 2671480
    },
    {
      "epoch": 4.371968343119734,
      "grad_norm": 0.3116009533405304,
      "learning_rate": 1.2643397929666654e-06,
      "loss": 0.0132,
      "step": 2671500
    },
    {
      "epoch": 4.372001073558388,
      "grad_norm": 0.11622822284698486,
      "learning_rate": 1.2642739007531481e-06,
      "loss": 0.0089,
      "step": 2671520
    },
    {
      "epoch": 4.3720338039970414,
      "grad_norm": 0.10745575278997421,
      "learning_rate": 1.264208008539631e-06,
      "loss": 0.0083,
      "step": 2671540
    },
    {
      "epoch": 4.372066534435694,
      "grad_norm": 0.08824466168880463,
      "learning_rate": 1.2641421163261138e-06,
      "loss": 0.0111,
      "step": 2671560
    },
    {
      "epoch": 4.372099264874348,
      "grad_norm": 0.36751335859298706,
      "learning_rate": 1.2640762241125968e-06,
      "loss": 0.0141,
      "step": 2671580
    },
    {
      "epoch": 4.372131995313001,
      "grad_norm": 0.20112526416778564,
      "learning_rate": 1.2640103318990795e-06,
      "loss": 0.0099,
      "step": 2671600
    },
    {
      "epoch": 4.372164725751655,
      "grad_norm": 0.172600656747818,
      "learning_rate": 1.2639444396855625e-06,
      "loss": 0.011,
      "step": 2671620
    },
    {
      "epoch": 4.372197456190308,
      "grad_norm": 0.2467796504497528,
      "learning_rate": 1.2638785474720452e-06,
      "loss": 0.009,
      "step": 2671640
    },
    {
      "epoch": 4.372230186628961,
      "grad_norm": 0.07967234402894974,
      "learning_rate": 1.2638126552585281e-06,
      "loss": 0.0088,
      "step": 2671660
    },
    {
      "epoch": 4.372262917067615,
      "grad_norm": 0.28043630719184875,
      "learning_rate": 1.263746763045011e-06,
      "loss": 0.0092,
      "step": 2671680
    },
    {
      "epoch": 4.3722956475062675,
      "grad_norm": 0.08428459614515305,
      "learning_rate": 1.263680870831494e-06,
      "loss": 0.0075,
      "step": 2671700
    },
    {
      "epoch": 4.372328377944921,
      "grad_norm": 1.1833854913711548,
      "learning_rate": 1.2636149786179768e-06,
      "loss": 0.0149,
      "step": 2671720
    },
    {
      "epoch": 4.372361108383575,
      "grad_norm": 0.12161047756671906,
      "learning_rate": 1.2635490864044597e-06,
      "loss": 0.0083,
      "step": 2671740
    },
    {
      "epoch": 4.372393838822228,
      "grad_norm": 0.235249325633049,
      "learning_rate": 1.2634831941909425e-06,
      "loss": 0.01,
      "step": 2671760
    },
    {
      "epoch": 4.372426569260881,
      "grad_norm": 0.24677562713623047,
      "learning_rate": 1.2634173019774254e-06,
      "loss": 0.0096,
      "step": 2671780
    },
    {
      "epoch": 4.372459299699535,
      "grad_norm": 0.12203992903232574,
      "learning_rate": 1.2633514097639082e-06,
      "loss": 0.0055,
      "step": 2671800
    },
    {
      "epoch": 4.372492030138188,
      "grad_norm": 0.3120405972003937,
      "learning_rate": 1.2632855175503911e-06,
      "loss": 0.0123,
      "step": 2671820
    },
    {
      "epoch": 4.372524760576841,
      "grad_norm": 0.1489708125591278,
      "learning_rate": 1.2632196253368739e-06,
      "loss": 0.011,
      "step": 2671840
    },
    {
      "epoch": 4.3725574910154945,
      "grad_norm": 0.38770925998687744,
      "learning_rate": 1.263153733123357e-06,
      "loss": 0.0114,
      "step": 2671860
    },
    {
      "epoch": 4.372590221454148,
      "grad_norm": 0.5308681130409241,
      "learning_rate": 1.26308784090984e-06,
      "loss": 0.0065,
      "step": 2671880
    },
    {
      "epoch": 4.372622951892801,
      "grad_norm": 0.3200910985469818,
      "learning_rate": 1.2630219486963227e-06,
      "loss": 0.0108,
      "step": 2671900
    },
    {
      "epoch": 4.372655682331454,
      "grad_norm": 0.21103453636169434,
      "learning_rate": 1.2629560564828055e-06,
      "loss": 0.013,
      "step": 2671920
    },
    {
      "epoch": 4.372688412770108,
      "grad_norm": 0.2405044138431549,
      "learning_rate": 1.2628901642692884e-06,
      "loss": 0.0094,
      "step": 2671940
    },
    {
      "epoch": 4.372721143208762,
      "grad_norm": 0.11425832659006119,
      "learning_rate": 1.2628242720557712e-06,
      "loss": 0.0089,
      "step": 2671960
    },
    {
      "epoch": 4.372753873647414,
      "grad_norm": 0.1280488222837448,
      "learning_rate": 1.262758379842254e-06,
      "loss": 0.0137,
      "step": 2671980
    },
    {
      "epoch": 4.372786604086068,
      "grad_norm": 0.5328369140625,
      "learning_rate": 1.2626924876287368e-06,
      "loss": 0.0115,
      "step": 2672000
    },
    {
      "epoch": 4.3728193345247215,
      "grad_norm": 0.13941171765327454,
      "learning_rate": 1.2626265954152198e-06,
      "loss": 0.011,
      "step": 2672020
    },
    {
      "epoch": 4.372852064963375,
      "grad_norm": 0.3105655908584595,
      "learning_rate": 1.262560703201703e-06,
      "loss": 0.0095,
      "step": 2672040
    },
    {
      "epoch": 4.372884795402028,
      "grad_norm": 0.3441420793533325,
      "learning_rate": 1.2624948109881857e-06,
      "loss": 0.0075,
      "step": 2672060
    },
    {
      "epoch": 4.372917525840681,
      "grad_norm": 0.2625279128551483,
      "learning_rate": 1.2624289187746686e-06,
      "loss": 0.0113,
      "step": 2672080
    },
    {
      "epoch": 4.372950256279335,
      "grad_norm": 0.47304868698120117,
      "learning_rate": 1.2623630265611514e-06,
      "loss": 0.0092,
      "step": 2672100
    },
    {
      "epoch": 4.372982986717988,
      "grad_norm": 0.7563245892524719,
      "learning_rate": 1.2622971343476343e-06,
      "loss": 0.0102,
      "step": 2672120
    },
    {
      "epoch": 4.373015717156641,
      "grad_norm": 0.20663441717624664,
      "learning_rate": 1.262231242134117e-06,
      "loss": 0.0136,
      "step": 2672140
    },
    {
      "epoch": 4.373048447595295,
      "grad_norm": 0.06708867847919464,
      "learning_rate": 1.2621653499205998e-06,
      "loss": 0.0057,
      "step": 2672160
    },
    {
      "epoch": 4.373081178033948,
      "grad_norm": 0.7568283081054688,
      "learning_rate": 1.2620994577070828e-06,
      "loss": 0.0143,
      "step": 2672180
    },
    {
      "epoch": 4.373113908472601,
      "grad_norm": 0.4392610192298889,
      "learning_rate": 1.2620335654935655e-06,
      "loss": 0.0124,
      "step": 2672200
    },
    {
      "epoch": 4.373146638911255,
      "grad_norm": 0.13150295615196228,
      "learning_rate": 1.2619676732800487e-06,
      "loss": 0.006,
      "step": 2672220
    },
    {
      "epoch": 4.373179369349908,
      "grad_norm": 0.2886855900287628,
      "learning_rate": 1.2619017810665316e-06,
      "loss": 0.0069,
      "step": 2672240
    },
    {
      "epoch": 4.373212099788561,
      "grad_norm": 0.15147387981414795,
      "learning_rate": 1.2618358888530144e-06,
      "loss": 0.0123,
      "step": 2672260
    },
    {
      "epoch": 4.373244830227215,
      "grad_norm": 0.6933910846710205,
      "learning_rate": 1.2617699966394973e-06,
      "loss": 0.0075,
      "step": 2672280
    },
    {
      "epoch": 4.373277560665868,
      "grad_norm": 0.155575230717659,
      "learning_rate": 1.26170410442598e-06,
      "loss": 0.0088,
      "step": 2672300
    },
    {
      "epoch": 4.373310291104522,
      "grad_norm": 0.165333092212677,
      "learning_rate": 1.261638212212463e-06,
      "loss": 0.0126,
      "step": 2672320
    },
    {
      "epoch": 4.373343021543175,
      "grad_norm": 0.24734501540660858,
      "learning_rate": 1.2615723199989457e-06,
      "loss": 0.0139,
      "step": 2672340
    },
    {
      "epoch": 4.373375751981828,
      "grad_norm": 0.06958729028701782,
      "learning_rate": 1.2615064277854285e-06,
      "loss": 0.0104,
      "step": 2672360
    },
    {
      "epoch": 4.373408482420482,
      "grad_norm": 0.5806494951248169,
      "learning_rate": 1.2614405355719116e-06,
      "loss": 0.014,
      "step": 2672380
    },
    {
      "epoch": 4.3734412128591345,
      "grad_norm": 0.30570900440216064,
      "learning_rate": 1.2613746433583946e-06,
      "loss": 0.007,
      "step": 2672400
    },
    {
      "epoch": 4.373473943297788,
      "grad_norm": 0.44332483410835266,
      "learning_rate": 1.2613087511448773e-06,
      "loss": 0.0114,
      "step": 2672420
    },
    {
      "epoch": 4.373506673736442,
      "grad_norm": 0.2578599452972412,
      "learning_rate": 1.2612428589313603e-06,
      "loss": 0.0093,
      "step": 2672440
    },
    {
      "epoch": 4.373539404175094,
      "grad_norm": 0.1899930238723755,
      "learning_rate": 1.261176966717843e-06,
      "loss": 0.0086,
      "step": 2672460
    },
    {
      "epoch": 4.373572134613748,
      "grad_norm": 0.34818488359451294,
      "learning_rate": 1.261111074504326e-06,
      "loss": 0.0069,
      "step": 2672480
    },
    {
      "epoch": 4.3736048650524015,
      "grad_norm": 0.25133058428764343,
      "learning_rate": 1.2610451822908087e-06,
      "loss": 0.0087,
      "step": 2672500
    },
    {
      "epoch": 4.373637595491055,
      "grad_norm": 0.21284417808055878,
      "learning_rate": 1.2609792900772917e-06,
      "loss": 0.0077,
      "step": 2672520
    },
    {
      "epoch": 4.373670325929708,
      "grad_norm": 0.15210548043251038,
      "learning_rate": 1.2609133978637744e-06,
      "loss": 0.0091,
      "step": 2672540
    },
    {
      "epoch": 4.373703056368361,
      "grad_norm": 0.5122213959693909,
      "learning_rate": 1.2608475056502576e-06,
      "loss": 0.0074,
      "step": 2672560
    },
    {
      "epoch": 4.373735786807015,
      "grad_norm": 0.3610532283782959,
      "learning_rate": 1.2607816134367403e-06,
      "loss": 0.0173,
      "step": 2672580
    },
    {
      "epoch": 4.373768517245668,
      "grad_norm": 0.38581010699272156,
      "learning_rate": 1.2607157212232233e-06,
      "loss": 0.0072,
      "step": 2672600
    },
    {
      "epoch": 4.373801247684321,
      "grad_norm": 0.4273453652858734,
      "learning_rate": 1.260649829009706e-06,
      "loss": 0.0121,
      "step": 2672620
    },
    {
      "epoch": 4.373833978122975,
      "grad_norm": 0.5526641607284546,
      "learning_rate": 1.260583936796189e-06,
      "loss": 0.0087,
      "step": 2672640
    },
    {
      "epoch": 4.3738667085616285,
      "grad_norm": 0.1678839921951294,
      "learning_rate": 1.2605180445826717e-06,
      "loss": 0.0092,
      "step": 2672660
    },
    {
      "epoch": 4.373899439000281,
      "grad_norm": 0.3284186124801636,
      "learning_rate": 1.2604521523691547e-06,
      "loss": 0.0089,
      "step": 2672680
    },
    {
      "epoch": 4.373932169438935,
      "grad_norm": 0.362108439207077,
      "learning_rate": 1.2603862601556374e-06,
      "loss": 0.0069,
      "step": 2672700
    },
    {
      "epoch": 4.373964899877588,
      "grad_norm": 0.16592948138713837,
      "learning_rate": 1.2603203679421203e-06,
      "loss": 0.0082,
      "step": 2672720
    },
    {
      "epoch": 4.373997630316241,
      "grad_norm": 0.08288318663835526,
      "learning_rate": 1.2602544757286033e-06,
      "loss": 0.0077,
      "step": 2672740
    },
    {
      "epoch": 4.374030360754895,
      "grad_norm": 0.4645596742630005,
      "learning_rate": 1.2601885835150862e-06,
      "loss": 0.0053,
      "step": 2672760
    },
    {
      "epoch": 4.374063091193548,
      "grad_norm": 0.1359541416168213,
      "learning_rate": 1.260122691301569e-06,
      "loss": 0.0105,
      "step": 2672780
    },
    {
      "epoch": 4.374095821632202,
      "grad_norm": 0.2016822099685669,
      "learning_rate": 1.260056799088052e-06,
      "loss": 0.0053,
      "step": 2672800
    },
    {
      "epoch": 4.374128552070855,
      "grad_norm": 0.22363847494125366,
      "learning_rate": 1.2599909068745347e-06,
      "loss": 0.0115,
      "step": 2672820
    },
    {
      "epoch": 4.374161282509508,
      "grad_norm": 0.6007851958274841,
      "learning_rate": 1.2599250146610176e-06,
      "loss": 0.0098,
      "step": 2672840
    },
    {
      "epoch": 4.374194012948162,
      "grad_norm": 0.15487858653068542,
      "learning_rate": 1.2598591224475004e-06,
      "loss": 0.0056,
      "step": 2672860
    },
    {
      "epoch": 4.3742267433868145,
      "grad_norm": 0.09071490168571472,
      "learning_rate": 1.2597932302339833e-06,
      "loss": 0.0092,
      "step": 2672880
    },
    {
      "epoch": 4.374259473825468,
      "grad_norm": 0.2023857831954956,
      "learning_rate": 1.259727338020466e-06,
      "loss": 0.0091,
      "step": 2672900
    },
    {
      "epoch": 4.374292204264122,
      "grad_norm": 0.04719282686710358,
      "learning_rate": 1.2596614458069492e-06,
      "loss": 0.0092,
      "step": 2672920
    },
    {
      "epoch": 4.374324934702775,
      "grad_norm": 0.21097557246685028,
      "learning_rate": 1.259595553593432e-06,
      "loss": 0.0081,
      "step": 2672940
    },
    {
      "epoch": 4.374357665141428,
      "grad_norm": 0.2672337591648102,
      "learning_rate": 1.259529661379915e-06,
      "loss": 0.0085,
      "step": 2672960
    },
    {
      "epoch": 4.374390395580082,
      "grad_norm": 0.08120209723711014,
      "learning_rate": 1.2594637691663977e-06,
      "loss": 0.0091,
      "step": 2672980
    },
    {
      "epoch": 4.374423126018735,
      "grad_norm": 0.09155858308076859,
      "learning_rate": 1.2593978769528806e-06,
      "loss": 0.013,
      "step": 2673000
    },
    {
      "epoch": 4.374455856457388,
      "grad_norm": 0.1991375833749771,
      "learning_rate": 1.2593319847393633e-06,
      "loss": 0.0071,
      "step": 2673020
    },
    {
      "epoch": 4.3744885868960415,
      "grad_norm": 0.8062921166419983,
      "learning_rate": 1.2592660925258463e-06,
      "loss": 0.0158,
      "step": 2673040
    },
    {
      "epoch": 4.374521317334695,
      "grad_norm": 0.2309863269329071,
      "learning_rate": 1.259200200312329e-06,
      "loss": 0.0094,
      "step": 2673060
    },
    {
      "epoch": 4.374554047773349,
      "grad_norm": 0.19456101953983307,
      "learning_rate": 1.259134308098812e-06,
      "loss": 0.0068,
      "step": 2673080
    },
    {
      "epoch": 4.374586778212001,
      "grad_norm": 0.19337674975395203,
      "learning_rate": 1.2590684158852952e-06,
      "loss": 0.0096,
      "step": 2673100
    },
    {
      "epoch": 4.374619508650655,
      "grad_norm": 0.06463561952114105,
      "learning_rate": 1.2590025236717779e-06,
      "loss": 0.0083,
      "step": 2673120
    },
    {
      "epoch": 4.374652239089309,
      "grad_norm": 0.1971050649881363,
      "learning_rate": 1.2589366314582606e-06,
      "loss": 0.0122,
      "step": 2673140
    },
    {
      "epoch": 4.374684969527961,
      "grad_norm": 0.11915799230337143,
      "learning_rate": 1.2588707392447436e-06,
      "loss": 0.0063,
      "step": 2673160
    },
    {
      "epoch": 4.374717699966615,
      "grad_norm": 0.1267828792333603,
      "learning_rate": 1.2588048470312263e-06,
      "loss": 0.0098,
      "step": 2673180
    },
    {
      "epoch": 4.3747504304052685,
      "grad_norm": 0.19727182388305664,
      "learning_rate": 1.2587389548177093e-06,
      "loss": 0.0099,
      "step": 2673200
    },
    {
      "epoch": 4.374783160843922,
      "grad_norm": 0.5210342407226562,
      "learning_rate": 1.258673062604192e-06,
      "loss": 0.012,
      "step": 2673220
    },
    {
      "epoch": 4.374815891282575,
      "grad_norm": 0.3660704791545868,
      "learning_rate": 1.258607170390675e-06,
      "loss": 0.0123,
      "step": 2673240
    },
    {
      "epoch": 4.374848621721228,
      "grad_norm": 0.45695582032203674,
      "learning_rate": 1.2585412781771581e-06,
      "loss": 0.0085,
      "step": 2673260
    },
    {
      "epoch": 4.374881352159882,
      "grad_norm": 0.3312704563140869,
      "learning_rate": 1.2584753859636409e-06,
      "loss": 0.0083,
      "step": 2673280
    },
    {
      "epoch": 4.374914082598535,
      "grad_norm": 0.6901868581771851,
      "learning_rate": 1.2584094937501238e-06,
      "loss": 0.0162,
      "step": 2673300
    },
    {
      "epoch": 4.374946813037188,
      "grad_norm": 0.3897765278816223,
      "learning_rate": 1.2583436015366066e-06,
      "loss": 0.0099,
      "step": 2673320
    },
    {
      "epoch": 4.374979543475842,
      "grad_norm": 0.23150552809238434,
      "learning_rate": 1.2582777093230893e-06,
      "loss": 0.0115,
      "step": 2673340
    },
    {
      "epoch": 4.3750122739144945,
      "grad_norm": 0.25180307030677795,
      "learning_rate": 1.2582118171095723e-06,
      "loss": 0.0068,
      "step": 2673360
    },
    {
      "epoch": 4.375045004353148,
      "grad_norm": 0.45554330945014954,
      "learning_rate": 1.258145924896055e-06,
      "loss": 0.0126,
      "step": 2673380
    },
    {
      "epoch": 4.375077734791802,
      "grad_norm": 0.13386164605617523,
      "learning_rate": 1.258080032682538e-06,
      "loss": 0.007,
      "step": 2673400
    },
    {
      "epoch": 4.375110465230455,
      "grad_norm": 0.29243817925453186,
      "learning_rate": 1.2580141404690207e-06,
      "loss": 0.0074,
      "step": 2673420
    },
    {
      "epoch": 4.375143195669108,
      "grad_norm": 0.7430369853973389,
      "learning_rate": 1.2579482482555038e-06,
      "loss": 0.0088,
      "step": 2673440
    },
    {
      "epoch": 4.375175926107762,
      "grad_norm": 0.20777761936187744,
      "learning_rate": 1.2578823560419868e-06,
      "loss": 0.0108,
      "step": 2673460
    },
    {
      "epoch": 4.375208656546415,
      "grad_norm": 0.09441972523927689,
      "learning_rate": 1.2578164638284695e-06,
      "loss": 0.0066,
      "step": 2673480
    },
    {
      "epoch": 4.375241386985069,
      "grad_norm": 0.15308143198490143,
      "learning_rate": 1.2577505716149525e-06,
      "loss": 0.0099,
      "step": 2673500
    },
    {
      "epoch": 4.3752741174237215,
      "grad_norm": 0.5331951975822449,
      "learning_rate": 1.2576846794014352e-06,
      "loss": 0.0104,
      "step": 2673520
    },
    {
      "epoch": 4.375306847862375,
      "grad_norm": 0.07794950902462006,
      "learning_rate": 1.2576187871879182e-06,
      "loss": 0.0081,
      "step": 2673540
    },
    {
      "epoch": 4.375339578301029,
      "grad_norm": 0.5869662165641785,
      "learning_rate": 1.257552894974401e-06,
      "loss": 0.0078,
      "step": 2673560
    },
    {
      "epoch": 4.375372308739681,
      "grad_norm": 0.21969810128211975,
      "learning_rate": 1.2574870027608837e-06,
      "loss": 0.0082,
      "step": 2673580
    },
    {
      "epoch": 4.375405039178335,
      "grad_norm": 0.10428419709205627,
      "learning_rate": 1.2574211105473666e-06,
      "loss": 0.0057,
      "step": 2673600
    },
    {
      "epoch": 4.375437769616989,
      "grad_norm": 0.31364408135414124,
      "learning_rate": 1.2573552183338498e-06,
      "loss": 0.0141,
      "step": 2673620
    },
    {
      "epoch": 4.375470500055641,
      "grad_norm": 0.22536879777908325,
      "learning_rate": 1.2572893261203325e-06,
      "loss": 0.0086,
      "step": 2673640
    },
    {
      "epoch": 4.375503230494295,
      "grad_norm": 0.23291580379009247,
      "learning_rate": 1.2572234339068155e-06,
      "loss": 0.0153,
      "step": 2673660
    },
    {
      "epoch": 4.3755359609329485,
      "grad_norm": 0.2767915427684784,
      "learning_rate": 1.2571575416932982e-06,
      "loss": 0.0067,
      "step": 2673680
    },
    {
      "epoch": 4.375568691371602,
      "grad_norm": 0.10563521832227707,
      "learning_rate": 1.2570916494797812e-06,
      "loss": 0.0053,
      "step": 2673700
    },
    {
      "epoch": 4.375601421810255,
      "grad_norm": 0.13646312057971954,
      "learning_rate": 1.257025757266264e-06,
      "loss": 0.0073,
      "step": 2673720
    },
    {
      "epoch": 4.375634152248908,
      "grad_norm": 0.17486494779586792,
      "learning_rate": 1.2569598650527468e-06,
      "loss": 0.0058,
      "step": 2673740
    },
    {
      "epoch": 4.375666882687562,
      "grad_norm": 0.27292299270629883,
      "learning_rate": 1.2568939728392296e-06,
      "loss": 0.009,
      "step": 2673760
    },
    {
      "epoch": 4.375699613126216,
      "grad_norm": 0.27992820739746094,
      "learning_rate": 1.2568280806257125e-06,
      "loss": 0.0097,
      "step": 2673780
    },
    {
      "epoch": 4.375732343564868,
      "grad_norm": 0.2381121665239334,
      "learning_rate": 1.2567621884121955e-06,
      "loss": 0.0059,
      "step": 2673800
    },
    {
      "epoch": 4.375765074003522,
      "grad_norm": 0.15751619637012482,
      "learning_rate": 1.2566962961986784e-06,
      "loss": 0.0081,
      "step": 2673820
    },
    {
      "epoch": 4.3757978044421755,
      "grad_norm": 0.2501155138015747,
      "learning_rate": 1.2566304039851612e-06,
      "loss": 0.0066,
      "step": 2673840
    },
    {
      "epoch": 4.375830534880828,
      "grad_norm": 0.5000048875808716,
      "learning_rate": 1.2565645117716441e-06,
      "loss": 0.0091,
      "step": 2673860
    },
    {
      "epoch": 4.375863265319482,
      "grad_norm": 0.45363619923591614,
      "learning_rate": 1.2564986195581269e-06,
      "loss": 0.0112,
      "step": 2673880
    },
    {
      "epoch": 4.375895995758135,
      "grad_norm": 0.9358358979225159,
      "learning_rate": 1.2564327273446098e-06,
      "loss": 0.0126,
      "step": 2673900
    },
    {
      "epoch": 4.375928726196788,
      "grad_norm": 0.10757263749837875,
      "learning_rate": 1.2563668351310926e-06,
      "loss": 0.0083,
      "step": 2673920
    },
    {
      "epoch": 4.375961456635442,
      "grad_norm": 0.5759833455085754,
      "learning_rate": 1.2563009429175755e-06,
      "loss": 0.0139,
      "step": 2673940
    },
    {
      "epoch": 4.375994187074095,
      "grad_norm": 0.248989999294281,
      "learning_rate": 1.2562350507040583e-06,
      "loss": 0.012,
      "step": 2673960
    },
    {
      "epoch": 4.376026917512749,
      "grad_norm": 0.41106879711151123,
      "learning_rate": 1.2561691584905414e-06,
      "loss": 0.0098,
      "step": 2673980
    },
    {
      "epoch": 4.376059647951402,
      "grad_norm": 0.10930688679218292,
      "learning_rate": 1.2561032662770242e-06,
      "loss": 0.0074,
      "step": 2674000
    },
    {
      "epoch": 4.376092378390055,
      "grad_norm": 0.2999124526977539,
      "learning_rate": 1.2560373740635071e-06,
      "loss": 0.0067,
      "step": 2674020
    },
    {
      "epoch": 4.376125108828709,
      "grad_norm": 1.2195868492126465,
      "learning_rate": 1.2559714818499898e-06,
      "loss": 0.0116,
      "step": 2674040
    },
    {
      "epoch": 4.3761578392673615,
      "grad_norm": 0.906676173210144,
      "learning_rate": 1.2559055896364728e-06,
      "loss": 0.0086,
      "step": 2674060
    },
    {
      "epoch": 4.376190569706015,
      "grad_norm": 0.16564421355724335,
      "learning_rate": 1.2558396974229555e-06,
      "loss": 0.0107,
      "step": 2674080
    },
    {
      "epoch": 4.376223300144669,
      "grad_norm": 0.21908296644687653,
      "learning_rate": 1.2557738052094385e-06,
      "loss": 0.0076,
      "step": 2674100
    },
    {
      "epoch": 4.376256030583322,
      "grad_norm": 0.30188992619514465,
      "learning_rate": 1.2557079129959212e-06,
      "loss": 0.0104,
      "step": 2674120
    },
    {
      "epoch": 4.376288761021975,
      "grad_norm": 0.1554987132549286,
      "learning_rate": 1.2556420207824044e-06,
      "loss": 0.0057,
      "step": 2674140
    },
    {
      "epoch": 4.3763214914606285,
      "grad_norm": 0.18817314505577087,
      "learning_rate": 1.2555761285688871e-06,
      "loss": 0.0106,
      "step": 2674160
    },
    {
      "epoch": 4.376354221899282,
      "grad_norm": 0.07109710574150085,
      "learning_rate": 1.25551023635537e-06,
      "loss": 0.0092,
      "step": 2674180
    },
    {
      "epoch": 4.376386952337935,
      "grad_norm": 0.2872244417667389,
      "learning_rate": 1.2554443441418528e-06,
      "loss": 0.0077,
      "step": 2674200
    },
    {
      "epoch": 4.376419682776588,
      "grad_norm": 0.13047532737255096,
      "learning_rate": 1.2553784519283358e-06,
      "loss": 0.0071,
      "step": 2674220
    },
    {
      "epoch": 4.376452413215242,
      "grad_norm": 0.8858090043067932,
      "learning_rate": 1.2553125597148185e-06,
      "loss": 0.0115,
      "step": 2674240
    },
    {
      "epoch": 4.376485143653896,
      "grad_norm": 0.05417129024863243,
      "learning_rate": 1.2552466675013015e-06,
      "loss": 0.0097,
      "step": 2674260
    },
    {
      "epoch": 4.376517874092548,
      "grad_norm": 0.16632862389087677,
      "learning_rate": 1.2551807752877842e-06,
      "loss": 0.0069,
      "step": 2674280
    },
    {
      "epoch": 4.376550604531202,
      "grad_norm": 0.20028847455978394,
      "learning_rate": 1.2551148830742672e-06,
      "loss": 0.0057,
      "step": 2674300
    },
    {
      "epoch": 4.3765833349698555,
      "grad_norm": 0.17529752850532532,
      "learning_rate": 1.2550489908607501e-06,
      "loss": 0.0078,
      "step": 2674320
    },
    {
      "epoch": 4.376616065408508,
      "grad_norm": 0.17711825668811798,
      "learning_rate": 1.254983098647233e-06,
      "loss": 0.0079,
      "step": 2674340
    },
    {
      "epoch": 4.376648795847162,
      "grad_norm": 0.36893266439437866,
      "learning_rate": 1.2549172064337158e-06,
      "loss": 0.0086,
      "step": 2674360
    },
    {
      "epoch": 4.376681526285815,
      "grad_norm": 0.23761145770549774,
      "learning_rate": 1.2548513142201988e-06,
      "loss": 0.0096,
      "step": 2674380
    },
    {
      "epoch": 4.376714256724469,
      "grad_norm": 0.19990015029907227,
      "learning_rate": 1.2547854220066815e-06,
      "loss": 0.0105,
      "step": 2674400
    },
    {
      "epoch": 4.376746987163122,
      "grad_norm": 0.43782559037208557,
      "learning_rate": 1.2547195297931644e-06,
      "loss": 0.0058,
      "step": 2674420
    },
    {
      "epoch": 4.376779717601775,
      "grad_norm": 0.2779862880706787,
      "learning_rate": 1.2546536375796472e-06,
      "loss": 0.0069,
      "step": 2674440
    },
    {
      "epoch": 4.376812448040429,
      "grad_norm": 0.06474369764328003,
      "learning_rate": 1.2545877453661301e-06,
      "loss": 0.0059,
      "step": 2674460
    },
    {
      "epoch": 4.376845178479082,
      "grad_norm": 0.21537409722805023,
      "learning_rate": 1.2545218531526129e-06,
      "loss": 0.0055,
      "step": 2674480
    },
    {
      "epoch": 4.376877908917735,
      "grad_norm": 0.09570877254009247,
      "learning_rate": 1.254455960939096e-06,
      "loss": 0.0141,
      "step": 2674500
    },
    {
      "epoch": 4.376910639356389,
      "grad_norm": 0.10778966546058655,
      "learning_rate": 1.254390068725579e-06,
      "loss": 0.01,
      "step": 2674520
    },
    {
      "epoch": 4.376943369795042,
      "grad_norm": 0.2403375804424286,
      "learning_rate": 1.2543241765120617e-06,
      "loss": 0.008,
      "step": 2674540
    },
    {
      "epoch": 4.376976100233695,
      "grad_norm": 0.3245629668235779,
      "learning_rate": 1.2542582842985445e-06,
      "loss": 0.0101,
      "step": 2674560
    },
    {
      "epoch": 4.377008830672349,
      "grad_norm": 0.16113759577274323,
      "learning_rate": 1.2541923920850274e-06,
      "loss": 0.0094,
      "step": 2674580
    },
    {
      "epoch": 4.377041561111002,
      "grad_norm": 0.7357262969017029,
      "learning_rate": 1.2541264998715102e-06,
      "loss": 0.0076,
      "step": 2674600
    },
    {
      "epoch": 4.377074291549655,
      "grad_norm": 0.5130102634429932,
      "learning_rate": 1.2540606076579931e-06,
      "loss": 0.0143,
      "step": 2674620
    },
    {
      "epoch": 4.377107021988309,
      "grad_norm": 0.06183710694313049,
      "learning_rate": 1.2539947154444759e-06,
      "loss": 0.0086,
      "step": 2674640
    },
    {
      "epoch": 4.377139752426962,
      "grad_norm": 0.17586560547351837,
      "learning_rate": 1.2539288232309588e-06,
      "loss": 0.0066,
      "step": 2674660
    },
    {
      "epoch": 4.377172482865616,
      "grad_norm": 0.2077845185995102,
      "learning_rate": 1.253862931017442e-06,
      "loss": 0.0096,
      "step": 2674680
    },
    {
      "epoch": 4.3772052133042685,
      "grad_norm": 0.12504719197750092,
      "learning_rate": 1.2537970388039247e-06,
      "loss": 0.006,
      "step": 2674700
    },
    {
      "epoch": 4.377237943742922,
      "grad_norm": 0.11058948189020157,
      "learning_rate": 1.2537311465904077e-06,
      "loss": 0.0072,
      "step": 2674720
    },
    {
      "epoch": 4.377270674181576,
      "grad_norm": 0.11962122470140457,
      "learning_rate": 1.2536652543768904e-06,
      "loss": 0.0116,
      "step": 2674740
    },
    {
      "epoch": 4.377303404620228,
      "grad_norm": 0.4275937080383301,
      "learning_rate": 1.2535993621633733e-06,
      "loss": 0.0083,
      "step": 2674760
    },
    {
      "epoch": 4.377336135058882,
      "grad_norm": 0.18435169756412506,
      "learning_rate": 1.253533469949856e-06,
      "loss": 0.0107,
      "step": 2674780
    },
    {
      "epoch": 4.377368865497536,
      "grad_norm": 0.0662115067243576,
      "learning_rate": 1.2534675777363388e-06,
      "loss": 0.0085,
      "step": 2674800
    },
    {
      "epoch": 4.377401595936188,
      "grad_norm": 0.4706669747829437,
      "learning_rate": 1.2534016855228218e-06,
      "loss": 0.0057,
      "step": 2674820
    },
    {
      "epoch": 4.377434326374842,
      "grad_norm": 0.09599440544843674,
      "learning_rate": 1.2533357933093045e-06,
      "loss": 0.0102,
      "step": 2674840
    },
    {
      "epoch": 4.3774670568134955,
      "grad_norm": 0.16419033706188202,
      "learning_rate": 1.2532699010957877e-06,
      "loss": 0.0101,
      "step": 2674860
    },
    {
      "epoch": 4.377499787252149,
      "grad_norm": 0.1481764167547226,
      "learning_rate": 1.2532040088822706e-06,
      "loss": 0.0127,
      "step": 2674880
    },
    {
      "epoch": 4.377532517690802,
      "grad_norm": 0.15988662838935852,
      "learning_rate": 1.2531381166687534e-06,
      "loss": 0.0101,
      "step": 2674900
    },
    {
      "epoch": 4.377565248129455,
      "grad_norm": 0.06093277037143707,
      "learning_rate": 1.2530722244552363e-06,
      "loss": 0.005,
      "step": 2674920
    },
    {
      "epoch": 4.377597978568109,
      "grad_norm": 0.17900577187538147,
      "learning_rate": 1.253006332241719e-06,
      "loss": 0.0081,
      "step": 2674940
    },
    {
      "epoch": 4.3776307090067625,
      "grad_norm": 0.10741467773914337,
      "learning_rate": 1.252940440028202e-06,
      "loss": 0.0072,
      "step": 2674960
    },
    {
      "epoch": 4.377663439445415,
      "grad_norm": 0.24094349145889282,
      "learning_rate": 1.2528745478146848e-06,
      "loss": 0.006,
      "step": 2674980
    },
    {
      "epoch": 4.377696169884069,
      "grad_norm": 0.2666289210319519,
      "learning_rate": 1.2528086556011675e-06,
      "loss": 0.0081,
      "step": 2675000
    },
    {
      "epoch": 4.377728900322722,
      "grad_norm": 0.16596214473247528,
      "learning_rate": 1.2527427633876507e-06,
      "loss": 0.0073,
      "step": 2675020
    },
    {
      "epoch": 4.377761630761375,
      "grad_norm": 0.09473925083875656,
      "learning_rate": 1.2526768711741336e-06,
      "loss": 0.0105,
      "step": 2675040
    },
    {
      "epoch": 4.377794361200029,
      "grad_norm": 0.5579687356948853,
      "learning_rate": 1.2526109789606164e-06,
      "loss": 0.0074,
      "step": 2675060
    },
    {
      "epoch": 4.377827091638682,
      "grad_norm": 0.07436419278383255,
      "learning_rate": 1.2525450867470993e-06,
      "loss": 0.012,
      "step": 2675080
    },
    {
      "epoch": 4.377859822077335,
      "grad_norm": 0.4895350933074951,
      "learning_rate": 1.252479194533582e-06,
      "loss": 0.0094,
      "step": 2675100
    },
    {
      "epoch": 4.377892552515989,
      "grad_norm": 0.49663397669792175,
      "learning_rate": 1.252413302320065e-06,
      "loss": 0.0096,
      "step": 2675120
    },
    {
      "epoch": 4.377925282954642,
      "grad_norm": 0.4216550588607788,
      "learning_rate": 1.2523474101065477e-06,
      "loss": 0.0096,
      "step": 2675140
    },
    {
      "epoch": 4.377958013393296,
      "grad_norm": 0.552750825881958,
      "learning_rate": 1.2522815178930307e-06,
      "loss": 0.0069,
      "step": 2675160
    },
    {
      "epoch": 4.3779907438319485,
      "grad_norm": 0.3908432424068451,
      "learning_rate": 1.2522156256795134e-06,
      "loss": 0.012,
      "step": 2675180
    },
    {
      "epoch": 4.378023474270602,
      "grad_norm": 0.06732980906963348,
      "learning_rate": 1.2521497334659966e-06,
      "loss": 0.0089,
      "step": 2675200
    },
    {
      "epoch": 4.378056204709256,
      "grad_norm": 0.08051468431949615,
      "learning_rate": 1.2520838412524793e-06,
      "loss": 0.0124,
      "step": 2675220
    },
    {
      "epoch": 4.378088935147909,
      "grad_norm": 0.13154758512973785,
      "learning_rate": 1.2520179490389623e-06,
      "loss": 0.0077,
      "step": 2675240
    },
    {
      "epoch": 4.378121665586562,
      "grad_norm": 0.1583671271800995,
      "learning_rate": 1.251952056825445e-06,
      "loss": 0.009,
      "step": 2675260
    },
    {
      "epoch": 4.378154396025216,
      "grad_norm": 0.34286221861839294,
      "learning_rate": 1.251886164611928e-06,
      "loss": 0.009,
      "step": 2675280
    },
    {
      "epoch": 4.378187126463869,
      "grad_norm": 0.2826860845088959,
      "learning_rate": 1.2518202723984107e-06,
      "loss": 0.0074,
      "step": 2675300
    },
    {
      "epoch": 4.378219856902522,
      "grad_norm": 0.11343871057033539,
      "learning_rate": 1.2517543801848937e-06,
      "loss": 0.0114,
      "step": 2675320
    },
    {
      "epoch": 4.3782525873411755,
      "grad_norm": 0.33612895011901855,
      "learning_rate": 1.2516884879713764e-06,
      "loss": 0.0089,
      "step": 2675340
    },
    {
      "epoch": 4.378285317779829,
      "grad_norm": 0.13362818956375122,
      "learning_rate": 1.2516225957578594e-06,
      "loss": 0.0134,
      "step": 2675360
    },
    {
      "epoch": 4.378318048218482,
      "grad_norm": 0.8195638060569763,
      "learning_rate": 1.2515567035443423e-06,
      "loss": 0.0085,
      "step": 2675380
    },
    {
      "epoch": 4.378350778657135,
      "grad_norm": 0.6487324833869934,
      "learning_rate": 1.2514908113308253e-06,
      "loss": 0.01,
      "step": 2675400
    },
    {
      "epoch": 4.378383509095789,
      "grad_norm": 0.2738122344017029,
      "learning_rate": 1.251424919117308e-06,
      "loss": 0.0066,
      "step": 2675420
    },
    {
      "epoch": 4.378416239534443,
      "grad_norm": 0.33495479822158813,
      "learning_rate": 1.251359026903791e-06,
      "loss": 0.0073,
      "step": 2675440
    },
    {
      "epoch": 4.378448969973095,
      "grad_norm": 0.1380080282688141,
      "learning_rate": 1.2512931346902737e-06,
      "loss": 0.0097,
      "step": 2675460
    },
    {
      "epoch": 4.378481700411749,
      "grad_norm": 0.10966183990240097,
      "learning_rate": 1.2512272424767566e-06,
      "loss": 0.0078,
      "step": 2675480
    },
    {
      "epoch": 4.3785144308504025,
      "grad_norm": 0.3499739170074463,
      "learning_rate": 1.2511613502632394e-06,
      "loss": 0.01,
      "step": 2675500
    },
    {
      "epoch": 4.378547161289055,
      "grad_norm": 0.10686659812927246,
      "learning_rate": 1.2510954580497223e-06,
      "loss": 0.0095,
      "step": 2675520
    },
    {
      "epoch": 4.378579891727709,
      "grad_norm": 0.18782579898834229,
      "learning_rate": 1.251029565836205e-06,
      "loss": 0.0101,
      "step": 2675540
    },
    {
      "epoch": 4.378612622166362,
      "grad_norm": 0.41319021582603455,
      "learning_rate": 1.2509636736226882e-06,
      "loss": 0.0125,
      "step": 2675560
    },
    {
      "epoch": 4.378645352605016,
      "grad_norm": 0.29820284247398376,
      "learning_rate": 1.250897781409171e-06,
      "loss": 0.0124,
      "step": 2675580
    },
    {
      "epoch": 4.378678083043669,
      "grad_norm": 0.5098422765731812,
      "learning_rate": 1.250831889195654e-06,
      "loss": 0.0167,
      "step": 2675600
    },
    {
      "epoch": 4.378710813482322,
      "grad_norm": 0.10305575281381607,
      "learning_rate": 1.2507659969821367e-06,
      "loss": 0.0074,
      "step": 2675620
    },
    {
      "epoch": 4.378743543920976,
      "grad_norm": 0.17542707920074463,
      "learning_rate": 1.2507001047686196e-06,
      "loss": 0.0119,
      "step": 2675640
    },
    {
      "epoch": 4.378776274359629,
      "grad_norm": 0.2947365343570709,
      "learning_rate": 1.2506342125551024e-06,
      "loss": 0.0095,
      "step": 2675660
    },
    {
      "epoch": 4.378809004798282,
      "grad_norm": 0.235593780875206,
      "learning_rate": 1.2505683203415853e-06,
      "loss": 0.0117,
      "step": 2675680
    },
    {
      "epoch": 4.378841735236936,
      "grad_norm": 0.23547422885894775,
      "learning_rate": 1.250502428128068e-06,
      "loss": 0.0107,
      "step": 2675700
    },
    {
      "epoch": 4.378874465675589,
      "grad_norm": 0.15234173834323883,
      "learning_rate": 1.250436535914551e-06,
      "loss": 0.0091,
      "step": 2675720
    },
    {
      "epoch": 4.378907196114242,
      "grad_norm": 0.157954603433609,
      "learning_rate": 1.2503706437010342e-06,
      "loss": 0.0078,
      "step": 2675740
    },
    {
      "epoch": 4.378939926552896,
      "grad_norm": 0.17485250532627106,
      "learning_rate": 1.250304751487517e-06,
      "loss": 0.0081,
      "step": 2675760
    },
    {
      "epoch": 4.378972656991549,
      "grad_norm": 0.23396043479442596,
      "learning_rate": 1.2502388592739996e-06,
      "loss": 0.0072,
      "step": 2675780
    },
    {
      "epoch": 4.379005387430202,
      "grad_norm": 0.04078822210431099,
      "learning_rate": 1.2501729670604826e-06,
      "loss": 0.006,
      "step": 2675800
    },
    {
      "epoch": 4.3790381178688556,
      "grad_norm": 0.08789792656898499,
      "learning_rate": 1.2501070748469653e-06,
      "loss": 0.0116,
      "step": 2675820
    },
    {
      "epoch": 4.379070848307509,
      "grad_norm": 0.3536515533924103,
      "learning_rate": 1.2500411826334483e-06,
      "loss": 0.0076,
      "step": 2675840
    },
    {
      "epoch": 4.379103578746163,
      "grad_norm": 0.14817680418491364,
      "learning_rate": 1.2499752904199312e-06,
      "loss": 0.0066,
      "step": 2675860
    },
    {
      "epoch": 4.3791363091848154,
      "grad_norm": 0.4156963527202606,
      "learning_rate": 1.249909398206414e-06,
      "loss": 0.0108,
      "step": 2675880
    },
    {
      "epoch": 4.379169039623469,
      "grad_norm": 0.6408653855323792,
      "learning_rate": 1.249843505992897e-06,
      "loss": 0.0065,
      "step": 2675900
    },
    {
      "epoch": 4.379201770062123,
      "grad_norm": 0.16902759671211243,
      "learning_rate": 1.2497776137793797e-06,
      "loss": 0.0093,
      "step": 2675920
    },
    {
      "epoch": 4.379234500500775,
      "grad_norm": 0.2737821936607361,
      "learning_rate": 1.2497117215658626e-06,
      "loss": 0.0083,
      "step": 2675940
    },
    {
      "epoch": 4.379267230939429,
      "grad_norm": 0.48277994990348816,
      "learning_rate": 1.2496458293523456e-06,
      "loss": 0.0071,
      "step": 2675960
    },
    {
      "epoch": 4.3792999613780825,
      "grad_norm": 0.2013150006532669,
      "learning_rate": 1.2495799371388283e-06,
      "loss": 0.0077,
      "step": 2675980
    },
    {
      "epoch": 4.379332691816736,
      "grad_norm": 0.4831368029117584,
      "learning_rate": 1.2495140449253113e-06,
      "loss": 0.0079,
      "step": 2676000
    },
    {
      "epoch": 4.379365422255389,
      "grad_norm": 0.08634217083454132,
      "learning_rate": 1.249448152711794e-06,
      "loss": 0.0058,
      "step": 2676020
    },
    {
      "epoch": 4.379398152694042,
      "grad_norm": 0.16796062886714935,
      "learning_rate": 1.2493822604982772e-06,
      "loss": 0.0075,
      "step": 2676040
    },
    {
      "epoch": 4.379430883132696,
      "grad_norm": 0.11476041376590729,
      "learning_rate": 1.24931636828476e-06,
      "loss": 0.0088,
      "step": 2676060
    },
    {
      "epoch": 4.379463613571349,
      "grad_norm": 0.41843467950820923,
      "learning_rate": 1.2492504760712429e-06,
      "loss": 0.0117,
      "step": 2676080
    },
    {
      "epoch": 4.379496344010002,
      "grad_norm": 0.1860441267490387,
      "learning_rate": 1.2491845838577256e-06,
      "loss": 0.0117,
      "step": 2676100
    },
    {
      "epoch": 4.379529074448656,
      "grad_norm": 0.3103184401988983,
      "learning_rate": 1.2491186916442085e-06,
      "loss": 0.0081,
      "step": 2676120
    },
    {
      "epoch": 4.3795618048873095,
      "grad_norm": 0.09078618139028549,
      "learning_rate": 1.2490527994306915e-06,
      "loss": 0.0074,
      "step": 2676140
    },
    {
      "epoch": 4.379594535325962,
      "grad_norm": 0.12865643203258514,
      "learning_rate": 1.2489869072171742e-06,
      "loss": 0.0092,
      "step": 2676160
    },
    {
      "epoch": 4.379627265764616,
      "grad_norm": 0.09947176277637482,
      "learning_rate": 1.2489210150036572e-06,
      "loss": 0.0073,
      "step": 2676180
    },
    {
      "epoch": 4.379659996203269,
      "grad_norm": 0.26365533471107483,
      "learning_rate": 1.24885512279014e-06,
      "loss": 0.0079,
      "step": 2676200
    },
    {
      "epoch": 4.379692726641922,
      "grad_norm": 0.14526422321796417,
      "learning_rate": 1.2487892305766229e-06,
      "loss": 0.0094,
      "step": 2676220
    },
    {
      "epoch": 4.379725457080576,
      "grad_norm": 0.2538575232028961,
      "learning_rate": 1.2487233383631058e-06,
      "loss": 0.0084,
      "step": 2676240
    },
    {
      "epoch": 4.379758187519229,
      "grad_norm": 0.4162568747997284,
      "learning_rate": 1.2486574461495886e-06,
      "loss": 0.0141,
      "step": 2676260
    },
    {
      "epoch": 4.379790917957882,
      "grad_norm": 0.14517705142498016,
      "learning_rate": 1.2485915539360715e-06,
      "loss": 0.0117,
      "step": 2676280
    },
    {
      "epoch": 4.379823648396536,
      "grad_norm": 0.40778547525405884,
      "learning_rate": 1.2485256617225545e-06,
      "loss": 0.0126,
      "step": 2676300
    },
    {
      "epoch": 4.379856378835189,
      "grad_norm": 0.07869579643011093,
      "learning_rate": 1.2484597695090372e-06,
      "loss": 0.0117,
      "step": 2676320
    },
    {
      "epoch": 4.379889109273843,
      "grad_norm": 0.10364103317260742,
      "learning_rate": 1.2483938772955202e-06,
      "loss": 0.0079,
      "step": 2676340
    },
    {
      "epoch": 4.3799218397124955,
      "grad_norm": 0.17576107382774353,
      "learning_rate": 1.248327985082003e-06,
      "loss": 0.0115,
      "step": 2676360
    },
    {
      "epoch": 4.379954570151149,
      "grad_norm": 0.3365277051925659,
      "learning_rate": 1.2482620928684859e-06,
      "loss": 0.0079,
      "step": 2676380
    },
    {
      "epoch": 4.379987300589803,
      "grad_norm": 0.2415761798620224,
      "learning_rate": 1.2481962006549688e-06,
      "loss": 0.01,
      "step": 2676400
    },
    {
      "epoch": 4.380020031028456,
      "grad_norm": 0.26759669184684753,
      "learning_rate": 1.2481303084414515e-06,
      "loss": 0.0106,
      "step": 2676420
    },
    {
      "epoch": 4.380052761467109,
      "grad_norm": 0.2280879020690918,
      "learning_rate": 1.2480644162279345e-06,
      "loss": 0.0095,
      "step": 2676440
    },
    {
      "epoch": 4.380085491905763,
      "grad_norm": 0.10355162620544434,
      "learning_rate": 1.2479985240144172e-06,
      "loss": 0.0106,
      "step": 2676460
    },
    {
      "epoch": 4.380118222344416,
      "grad_norm": 0.4850555658340454,
      "learning_rate": 1.2479326318009002e-06,
      "loss": 0.0088,
      "step": 2676480
    },
    {
      "epoch": 4.380150952783069,
      "grad_norm": 0.22275765240192413,
      "learning_rate": 1.2478667395873831e-06,
      "loss": 0.0101,
      "step": 2676500
    },
    {
      "epoch": 4.3801836832217225,
      "grad_norm": 0.18081508576869965,
      "learning_rate": 1.2478008473738659e-06,
      "loss": 0.0101,
      "step": 2676520
    },
    {
      "epoch": 4.380216413660376,
      "grad_norm": 0.46558740735054016,
      "learning_rate": 1.2477349551603488e-06,
      "loss": 0.0112,
      "step": 2676540
    },
    {
      "epoch": 4.380249144099029,
      "grad_norm": 0.327679842710495,
      "learning_rate": 1.2476690629468318e-06,
      "loss": 0.013,
      "step": 2676560
    },
    {
      "epoch": 4.380281874537682,
      "grad_norm": 0.2498105764389038,
      "learning_rate": 1.2476031707333145e-06,
      "loss": 0.008,
      "step": 2676580
    },
    {
      "epoch": 4.380314604976336,
      "grad_norm": 0.41545143723487854,
      "learning_rate": 1.2475372785197975e-06,
      "loss": 0.0136,
      "step": 2676600
    },
    {
      "epoch": 4.3803473354149896,
      "grad_norm": 0.5485363006591797,
      "learning_rate": 1.2474713863062802e-06,
      "loss": 0.0107,
      "step": 2676620
    },
    {
      "epoch": 4.380380065853642,
      "grad_norm": 0.1895475685596466,
      "learning_rate": 1.2474054940927632e-06,
      "loss": 0.0062,
      "step": 2676640
    },
    {
      "epoch": 4.380412796292296,
      "grad_norm": 0.4339028298854828,
      "learning_rate": 1.2473396018792461e-06,
      "loss": 0.0076,
      "step": 2676660
    },
    {
      "epoch": 4.3804455267309494,
      "grad_norm": 0.11345105618238449,
      "learning_rate": 1.2472737096657289e-06,
      "loss": 0.0105,
      "step": 2676680
    },
    {
      "epoch": 4.380478257169603,
      "grad_norm": 0.1685975342988968,
      "learning_rate": 1.2472078174522118e-06,
      "loss": 0.0108,
      "step": 2676700
    },
    {
      "epoch": 4.380510987608256,
      "grad_norm": 0.28567591309547424,
      "learning_rate": 1.2471419252386946e-06,
      "loss": 0.0083,
      "step": 2676720
    },
    {
      "epoch": 4.380543718046909,
      "grad_norm": 0.13250936567783356,
      "learning_rate": 1.2470760330251775e-06,
      "loss": 0.0084,
      "step": 2676740
    },
    {
      "epoch": 4.380576448485563,
      "grad_norm": 0.21218296885490417,
      "learning_rate": 1.2470101408116605e-06,
      "loss": 0.0104,
      "step": 2676760
    },
    {
      "epoch": 4.380609178924216,
      "grad_norm": 0.4422377347946167,
      "learning_rate": 1.2469442485981432e-06,
      "loss": 0.0112,
      "step": 2676780
    },
    {
      "epoch": 4.380641909362869,
      "grad_norm": 0.19154933094978333,
      "learning_rate": 1.2468783563846261e-06,
      "loss": 0.0066,
      "step": 2676800
    },
    {
      "epoch": 4.380674639801523,
      "grad_norm": 0.09142256528139114,
      "learning_rate": 1.2468124641711089e-06,
      "loss": 0.0062,
      "step": 2676820
    },
    {
      "epoch": 4.3807073702401755,
      "grad_norm": 0.2787415087223053,
      "learning_rate": 1.2467465719575918e-06,
      "loss": 0.0098,
      "step": 2676840
    },
    {
      "epoch": 4.380740100678829,
      "grad_norm": 0.23303399980068207,
      "learning_rate": 1.2466806797440748e-06,
      "loss": 0.0064,
      "step": 2676860
    },
    {
      "epoch": 4.380772831117483,
      "grad_norm": 0.20367500185966492,
      "learning_rate": 1.2466147875305575e-06,
      "loss": 0.0093,
      "step": 2676880
    },
    {
      "epoch": 4.380805561556136,
      "grad_norm": 0.23104923963546753,
      "learning_rate": 1.2465488953170405e-06,
      "loss": 0.0105,
      "step": 2676900
    },
    {
      "epoch": 4.380838291994789,
      "grad_norm": 0.33422747254371643,
      "learning_rate": 1.2464830031035234e-06,
      "loss": 0.0083,
      "step": 2676920
    },
    {
      "epoch": 4.380871022433443,
      "grad_norm": 0.20123246312141418,
      "learning_rate": 1.2464171108900062e-06,
      "loss": 0.006,
      "step": 2676940
    },
    {
      "epoch": 4.380903752872096,
      "grad_norm": 0.28997355699539185,
      "learning_rate": 1.2463512186764891e-06,
      "loss": 0.0082,
      "step": 2676960
    },
    {
      "epoch": 4.380936483310749,
      "grad_norm": 0.4662390947341919,
      "learning_rate": 1.2462853264629719e-06,
      "loss": 0.0104,
      "step": 2676980
    },
    {
      "epoch": 4.3809692137494025,
      "grad_norm": 0.21690671145915985,
      "learning_rate": 1.2462194342494548e-06,
      "loss": 0.0079,
      "step": 2677000
    },
    {
      "epoch": 4.381001944188056,
      "grad_norm": 0.3039989769458771,
      "learning_rate": 1.2461535420359378e-06,
      "loss": 0.0072,
      "step": 2677020
    },
    {
      "epoch": 4.38103467462671,
      "grad_norm": 0.15159067511558533,
      "learning_rate": 1.2460876498224205e-06,
      "loss": 0.0078,
      "step": 2677040
    },
    {
      "epoch": 4.381067405065362,
      "grad_norm": 0.05926143005490303,
      "learning_rate": 1.2460217576089035e-06,
      "loss": 0.0086,
      "step": 2677060
    },
    {
      "epoch": 4.381100135504016,
      "grad_norm": 0.10523903369903564,
      "learning_rate": 1.2459558653953862e-06,
      "loss": 0.0075,
      "step": 2677080
    },
    {
      "epoch": 4.38113286594267,
      "grad_norm": 0.4125993251800537,
      "learning_rate": 1.2458899731818691e-06,
      "loss": 0.0133,
      "step": 2677100
    },
    {
      "epoch": 4.381165596381322,
      "grad_norm": 0.31322988867759705,
      "learning_rate": 1.245824080968352e-06,
      "loss": 0.0118,
      "step": 2677120
    },
    {
      "epoch": 4.381198326819976,
      "grad_norm": 0.08496017754077911,
      "learning_rate": 1.2457581887548348e-06,
      "loss": 0.0085,
      "step": 2677140
    },
    {
      "epoch": 4.3812310572586295,
      "grad_norm": 0.31584274768829346,
      "learning_rate": 1.2456922965413178e-06,
      "loss": 0.008,
      "step": 2677160
    },
    {
      "epoch": 4.381263787697283,
      "grad_norm": 0.16524074971675873,
      "learning_rate": 1.2456264043278007e-06,
      "loss": 0.0108,
      "step": 2677180
    },
    {
      "epoch": 4.381296518135936,
      "grad_norm": 0.12995587289333344,
      "learning_rate": 1.2455605121142835e-06,
      "loss": 0.0094,
      "step": 2677200
    },
    {
      "epoch": 4.381329248574589,
      "grad_norm": 0.46204671263694763,
      "learning_rate": 1.2454946199007664e-06,
      "loss": 0.0096,
      "step": 2677220
    },
    {
      "epoch": 4.381361979013243,
      "grad_norm": 0.18208539485931396,
      "learning_rate": 1.2454287276872492e-06,
      "loss": 0.0098,
      "step": 2677240
    },
    {
      "epoch": 4.381394709451896,
      "grad_norm": 0.18394625186920166,
      "learning_rate": 1.2453628354737323e-06,
      "loss": 0.0103,
      "step": 2677260
    },
    {
      "epoch": 4.381427439890549,
      "grad_norm": 0.17139172554016113,
      "learning_rate": 1.245296943260215e-06,
      "loss": 0.0095,
      "step": 2677280
    },
    {
      "epoch": 4.381460170329203,
      "grad_norm": 0.11230050027370453,
      "learning_rate": 1.2452310510466978e-06,
      "loss": 0.0099,
      "step": 2677300
    },
    {
      "epoch": 4.3814929007678565,
      "grad_norm": 0.04705962538719177,
      "learning_rate": 1.2451651588331808e-06,
      "loss": 0.0096,
      "step": 2677320
    },
    {
      "epoch": 4.381525631206509,
      "grad_norm": 0.048957910388708115,
      "learning_rate": 1.2450992666196635e-06,
      "loss": 0.0084,
      "step": 2677340
    },
    {
      "epoch": 4.381558361645163,
      "grad_norm": 0.24095791578292847,
      "learning_rate": 1.2450333744061467e-06,
      "loss": 0.0075,
      "step": 2677360
    },
    {
      "epoch": 4.381591092083816,
      "grad_norm": 0.6366411447525024,
      "learning_rate": 1.2449674821926294e-06,
      "loss": 0.0111,
      "step": 2677380
    },
    {
      "epoch": 4.381623822522469,
      "grad_norm": 0.6545272469520569,
      "learning_rate": 1.2449015899791124e-06,
      "loss": 0.0096,
      "step": 2677400
    },
    {
      "epoch": 4.381656552961123,
      "grad_norm": 0.3814339339733124,
      "learning_rate": 1.244835697765595e-06,
      "loss": 0.0059,
      "step": 2677420
    },
    {
      "epoch": 4.381689283399776,
      "grad_norm": 0.6845202445983887,
      "learning_rate": 1.244769805552078e-06,
      "loss": 0.0073,
      "step": 2677440
    },
    {
      "epoch": 4.38172201383843,
      "grad_norm": 0.19221647083759308,
      "learning_rate": 1.244703913338561e-06,
      "loss": 0.0089,
      "step": 2677460
    },
    {
      "epoch": 4.381754744277083,
      "grad_norm": 0.353582501411438,
      "learning_rate": 1.2446380211250437e-06,
      "loss": 0.0098,
      "step": 2677480
    },
    {
      "epoch": 4.381787474715736,
      "grad_norm": 0.20021982491016388,
      "learning_rate": 1.2445721289115267e-06,
      "loss": 0.0074,
      "step": 2677500
    },
    {
      "epoch": 4.38182020515439,
      "grad_norm": 0.19281432032585144,
      "learning_rate": 1.2445062366980094e-06,
      "loss": 0.0088,
      "step": 2677520
    },
    {
      "epoch": 4.3818529355930425,
      "grad_norm": 0.2466648519039154,
      "learning_rate": 1.2444403444844924e-06,
      "loss": 0.0057,
      "step": 2677540
    },
    {
      "epoch": 4.381885666031696,
      "grad_norm": 0.19439063966274261,
      "learning_rate": 1.2443744522709753e-06,
      "loss": 0.0112,
      "step": 2677560
    },
    {
      "epoch": 4.38191839647035,
      "grad_norm": 0.597744882106781,
      "learning_rate": 1.244308560057458e-06,
      "loss": 0.0105,
      "step": 2677580
    },
    {
      "epoch": 4.381951126909003,
      "grad_norm": 0.08147678524255753,
      "learning_rate": 1.244242667843941e-06,
      "loss": 0.0092,
      "step": 2677600
    },
    {
      "epoch": 4.381983857347656,
      "grad_norm": 0.1561093032360077,
      "learning_rate": 1.244176775630424e-06,
      "loss": 0.0123,
      "step": 2677620
    },
    {
      "epoch": 4.3820165877863095,
      "grad_norm": 0.2540493905544281,
      "learning_rate": 1.2441108834169067e-06,
      "loss": 0.0078,
      "step": 2677640
    },
    {
      "epoch": 4.382049318224963,
      "grad_norm": 0.27524039149284363,
      "learning_rate": 1.2440449912033897e-06,
      "loss": 0.0085,
      "step": 2677660
    },
    {
      "epoch": 4.382082048663616,
      "grad_norm": 0.1528725028038025,
      "learning_rate": 1.2439790989898724e-06,
      "loss": 0.0063,
      "step": 2677680
    },
    {
      "epoch": 4.382114779102269,
      "grad_norm": 0.24217185378074646,
      "learning_rate": 1.2439132067763554e-06,
      "loss": 0.0091,
      "step": 2677700
    },
    {
      "epoch": 4.382147509540923,
      "grad_norm": 0.20587629079818726,
      "learning_rate": 1.2438473145628383e-06,
      "loss": 0.0101,
      "step": 2677720
    },
    {
      "epoch": 4.382180239979577,
      "grad_norm": 0.5108029842376709,
      "learning_rate": 1.243781422349321e-06,
      "loss": 0.006,
      "step": 2677740
    },
    {
      "epoch": 4.382212970418229,
      "grad_norm": 0.36319440603256226,
      "learning_rate": 1.243715530135804e-06,
      "loss": 0.0092,
      "step": 2677760
    },
    {
      "epoch": 4.382245700856883,
      "grad_norm": 0.3541046977043152,
      "learning_rate": 1.2436496379222867e-06,
      "loss": 0.009,
      "step": 2677780
    },
    {
      "epoch": 4.3822784312955365,
      "grad_norm": 0.13041087985038757,
      "learning_rate": 1.2435837457087697e-06,
      "loss": 0.0089,
      "step": 2677800
    },
    {
      "epoch": 4.382311161734189,
      "grad_norm": 0.21515503525733948,
      "learning_rate": 1.2435178534952526e-06,
      "loss": 0.0136,
      "step": 2677820
    },
    {
      "epoch": 4.382343892172843,
      "grad_norm": 0.16970305144786835,
      "learning_rate": 1.2434519612817354e-06,
      "loss": 0.012,
      "step": 2677840
    },
    {
      "epoch": 4.382376622611496,
      "grad_norm": 0.5646638870239258,
      "learning_rate": 1.2433860690682183e-06,
      "loss": 0.0067,
      "step": 2677860
    },
    {
      "epoch": 4.38240935305015,
      "grad_norm": 0.7295994162559509,
      "learning_rate": 1.2433201768547013e-06,
      "loss": 0.0097,
      "step": 2677880
    },
    {
      "epoch": 4.382442083488803,
      "grad_norm": 0.19882827997207642,
      "learning_rate": 1.243254284641184e-06,
      "loss": 0.0122,
      "step": 2677900
    },
    {
      "epoch": 4.382474813927456,
      "grad_norm": 0.12085028737783432,
      "learning_rate": 1.243188392427667e-06,
      "loss": 0.0108,
      "step": 2677920
    },
    {
      "epoch": 4.38250754436611,
      "grad_norm": 0.0898870974779129,
      "learning_rate": 1.2431225002141497e-06,
      "loss": 0.0102,
      "step": 2677940
    },
    {
      "epoch": 4.382540274804763,
      "grad_norm": 0.3963358700275421,
      "learning_rate": 1.2430566080006327e-06,
      "loss": 0.0082,
      "step": 2677960
    },
    {
      "epoch": 4.382573005243416,
      "grad_norm": 0.18014487624168396,
      "learning_rate": 1.2429907157871156e-06,
      "loss": 0.0087,
      "step": 2677980
    },
    {
      "epoch": 4.38260573568207,
      "grad_norm": 0.13473881781101227,
      "learning_rate": 1.2429248235735984e-06,
      "loss": 0.0074,
      "step": 2678000
    },
    {
      "epoch": 4.3826384661207225,
      "grad_norm": 0.5187764763832092,
      "learning_rate": 1.2428589313600813e-06,
      "loss": 0.0134,
      "step": 2678020
    },
    {
      "epoch": 4.382671196559376,
      "grad_norm": 0.32350262999534607,
      "learning_rate": 1.242793039146564e-06,
      "loss": 0.0091,
      "step": 2678040
    },
    {
      "epoch": 4.38270392699803,
      "grad_norm": 0.11277582496404648,
      "learning_rate": 1.242727146933047e-06,
      "loss": 0.0102,
      "step": 2678060
    },
    {
      "epoch": 4.382736657436683,
      "grad_norm": 0.17743293941020966,
      "learning_rate": 1.24266125471953e-06,
      "loss": 0.0078,
      "step": 2678080
    },
    {
      "epoch": 4.382769387875336,
      "grad_norm": 0.1808960735797882,
      "learning_rate": 1.2425953625060127e-06,
      "loss": 0.0113,
      "step": 2678100
    },
    {
      "epoch": 4.38280211831399,
      "grad_norm": 0.3069412112236023,
      "learning_rate": 1.2425294702924956e-06,
      "loss": 0.0134,
      "step": 2678120
    },
    {
      "epoch": 4.382834848752643,
      "grad_norm": 0.27786189317703247,
      "learning_rate": 1.2424635780789786e-06,
      "loss": 0.0066,
      "step": 2678140
    },
    {
      "epoch": 4.382867579191297,
      "grad_norm": 0.07925011962652206,
      "learning_rate": 1.2423976858654613e-06,
      "loss": 0.0089,
      "step": 2678160
    },
    {
      "epoch": 4.3829003096299495,
      "grad_norm": 0.15517394244670868,
      "learning_rate": 1.2423317936519443e-06,
      "loss": 0.006,
      "step": 2678180
    },
    {
      "epoch": 4.382933040068603,
      "grad_norm": 0.2776808440685272,
      "learning_rate": 1.242265901438427e-06,
      "loss": 0.0068,
      "step": 2678200
    },
    {
      "epoch": 4.382965770507257,
      "grad_norm": 0.27069705724716187,
      "learning_rate": 1.24220000922491e-06,
      "loss": 0.0093,
      "step": 2678220
    },
    {
      "epoch": 4.382998500945909,
      "grad_norm": 0.35776597261428833,
      "learning_rate": 1.242134117011393e-06,
      "loss": 0.0073,
      "step": 2678240
    },
    {
      "epoch": 4.383031231384563,
      "grad_norm": 0.15191416442394257,
      "learning_rate": 1.2420682247978757e-06,
      "loss": 0.009,
      "step": 2678260
    },
    {
      "epoch": 4.383063961823217,
      "grad_norm": 0.08088363707065582,
      "learning_rate": 1.2420023325843586e-06,
      "loss": 0.0121,
      "step": 2678280
    },
    {
      "epoch": 4.383096692261869,
      "grad_norm": 0.13780510425567627,
      "learning_rate": 1.2419364403708414e-06,
      "loss": 0.0066,
      "step": 2678300
    },
    {
      "epoch": 4.383129422700523,
      "grad_norm": 0.07400362193584442,
      "learning_rate": 1.2418705481573243e-06,
      "loss": 0.0109,
      "step": 2678320
    },
    {
      "epoch": 4.3831621531391765,
      "grad_norm": 0.13435859978199005,
      "learning_rate": 1.2418046559438073e-06,
      "loss": 0.007,
      "step": 2678340
    },
    {
      "epoch": 4.38319488357783,
      "grad_norm": 0.5252230763435364,
      "learning_rate": 1.24173876373029e-06,
      "loss": 0.0125,
      "step": 2678360
    },
    {
      "epoch": 4.383227614016483,
      "grad_norm": 0.17627379298210144,
      "learning_rate": 1.241672871516773e-06,
      "loss": 0.0124,
      "step": 2678380
    },
    {
      "epoch": 4.383260344455136,
      "grad_norm": 0.1524137556552887,
      "learning_rate": 1.2416069793032557e-06,
      "loss": 0.0091,
      "step": 2678400
    },
    {
      "epoch": 4.38329307489379,
      "grad_norm": 0.29350006580352783,
      "learning_rate": 1.2415410870897387e-06,
      "loss": 0.0094,
      "step": 2678420
    },
    {
      "epoch": 4.3833258053324435,
      "grad_norm": 0.2479066699743271,
      "learning_rate": 1.2414751948762216e-06,
      "loss": 0.0075,
      "step": 2678440
    },
    {
      "epoch": 4.383358535771096,
      "grad_norm": 0.17869818210601807,
      "learning_rate": 1.2414093026627043e-06,
      "loss": 0.0161,
      "step": 2678460
    },
    {
      "epoch": 4.38339126620975,
      "grad_norm": 0.25071170926094055,
      "learning_rate": 1.2413434104491873e-06,
      "loss": 0.0085,
      "step": 2678480
    },
    {
      "epoch": 4.383423996648403,
      "grad_norm": 0.27724236249923706,
      "learning_rate": 1.2412775182356702e-06,
      "loss": 0.0079,
      "step": 2678500
    },
    {
      "epoch": 4.383456727087056,
      "grad_norm": 0.12016088515520096,
      "learning_rate": 1.241211626022153e-06,
      "loss": 0.0092,
      "step": 2678520
    },
    {
      "epoch": 4.38348945752571,
      "grad_norm": 0.2573292553424835,
      "learning_rate": 1.241145733808636e-06,
      "loss": 0.0098,
      "step": 2678540
    },
    {
      "epoch": 4.383522187964363,
      "grad_norm": 0.152311310172081,
      "learning_rate": 1.2410798415951187e-06,
      "loss": 0.0097,
      "step": 2678560
    },
    {
      "epoch": 4.383554918403016,
      "grad_norm": 0.11149299144744873,
      "learning_rate": 1.2410139493816018e-06,
      "loss": 0.0101,
      "step": 2678580
    },
    {
      "epoch": 4.38358764884167,
      "grad_norm": 0.18399900197982788,
      "learning_rate": 1.2409480571680846e-06,
      "loss": 0.0051,
      "step": 2678600
    },
    {
      "epoch": 4.383620379280323,
      "grad_norm": 0.16406138241291046,
      "learning_rate": 1.2408821649545673e-06,
      "loss": 0.0076,
      "step": 2678620
    },
    {
      "epoch": 4.383653109718977,
      "grad_norm": 0.4604381322860718,
      "learning_rate": 1.2408162727410503e-06,
      "loss": 0.0143,
      "step": 2678640
    },
    {
      "epoch": 4.3836858401576295,
      "grad_norm": 0.23655152320861816,
      "learning_rate": 1.240750380527533e-06,
      "loss": 0.0071,
      "step": 2678660
    },
    {
      "epoch": 4.383718570596283,
      "grad_norm": 0.11040381342172623,
      "learning_rate": 1.2406844883140162e-06,
      "loss": 0.0105,
      "step": 2678680
    },
    {
      "epoch": 4.383751301034937,
      "grad_norm": 0.4715735614299774,
      "learning_rate": 1.240618596100499e-06,
      "loss": 0.0059,
      "step": 2678700
    },
    {
      "epoch": 4.383784031473589,
      "grad_norm": 0.19442327320575714,
      "learning_rate": 1.2405527038869819e-06,
      "loss": 0.0068,
      "step": 2678720
    },
    {
      "epoch": 4.383816761912243,
      "grad_norm": 0.2936640679836273,
      "learning_rate": 1.2404868116734646e-06,
      "loss": 0.0116,
      "step": 2678740
    },
    {
      "epoch": 4.383849492350897,
      "grad_norm": 0.18317387998104095,
      "learning_rate": 1.2404209194599476e-06,
      "loss": 0.0071,
      "step": 2678760
    },
    {
      "epoch": 4.38388222278955,
      "grad_norm": 0.3071843683719635,
      "learning_rate": 1.2403550272464305e-06,
      "loss": 0.0096,
      "step": 2678780
    },
    {
      "epoch": 4.383914953228203,
      "grad_norm": 0.20054854452610016,
      "learning_rate": 1.2402891350329132e-06,
      "loss": 0.0083,
      "step": 2678800
    },
    {
      "epoch": 4.3839476836668565,
      "grad_norm": 0.12267983704805374,
      "learning_rate": 1.2402232428193962e-06,
      "loss": 0.0098,
      "step": 2678820
    },
    {
      "epoch": 4.38398041410551,
      "grad_norm": 0.06310953199863434,
      "learning_rate": 1.240157350605879e-06,
      "loss": 0.0066,
      "step": 2678840
    },
    {
      "epoch": 4.384013144544163,
      "grad_norm": 0.02109321765601635,
      "learning_rate": 1.2400914583923619e-06,
      "loss": 0.0089,
      "step": 2678860
    },
    {
      "epoch": 4.384045874982816,
      "grad_norm": 0.15878665447235107,
      "learning_rate": 1.2400255661788448e-06,
      "loss": 0.0109,
      "step": 2678880
    },
    {
      "epoch": 4.38407860542147,
      "grad_norm": 0.13956840336322784,
      "learning_rate": 1.2399596739653276e-06,
      "loss": 0.0091,
      "step": 2678900
    },
    {
      "epoch": 4.384111335860124,
      "grad_norm": 0.3146795630455017,
      "learning_rate": 1.2398937817518105e-06,
      "loss": 0.0091,
      "step": 2678920
    },
    {
      "epoch": 4.384144066298776,
      "grad_norm": 0.3548833727836609,
      "learning_rate": 1.2398278895382935e-06,
      "loss": 0.0088,
      "step": 2678940
    },
    {
      "epoch": 4.38417679673743,
      "grad_norm": 0.38531559705734253,
      "learning_rate": 1.2397619973247762e-06,
      "loss": 0.0083,
      "step": 2678960
    },
    {
      "epoch": 4.3842095271760835,
      "grad_norm": 0.10308771580457687,
      "learning_rate": 1.2396961051112592e-06,
      "loss": 0.01,
      "step": 2678980
    },
    {
      "epoch": 4.384242257614736,
      "grad_norm": 0.26902467012405396,
      "learning_rate": 1.239630212897742e-06,
      "loss": 0.0123,
      "step": 2679000
    },
    {
      "epoch": 4.38427498805339,
      "grad_norm": 0.2189837098121643,
      "learning_rate": 1.2395643206842249e-06,
      "loss": 0.0076,
      "step": 2679020
    },
    {
      "epoch": 4.384307718492043,
      "grad_norm": 0.3568318486213684,
      "learning_rate": 1.2394984284707078e-06,
      "loss": 0.0122,
      "step": 2679040
    },
    {
      "epoch": 4.384340448930697,
      "grad_norm": 0.1550581306219101,
      "learning_rate": 1.2394325362571906e-06,
      "loss": 0.0079,
      "step": 2679060
    },
    {
      "epoch": 4.38437317936935,
      "grad_norm": 0.2562357485294342,
      "learning_rate": 1.2393666440436735e-06,
      "loss": 0.0078,
      "step": 2679080
    },
    {
      "epoch": 4.384405909808003,
      "grad_norm": 0.21185851097106934,
      "learning_rate": 1.2393007518301563e-06,
      "loss": 0.009,
      "step": 2679100
    },
    {
      "epoch": 4.384438640246657,
      "grad_norm": 0.19691413640975952,
      "learning_rate": 1.2392348596166392e-06,
      "loss": 0.0097,
      "step": 2679120
    },
    {
      "epoch": 4.38447137068531,
      "grad_norm": 0.20261012017726898,
      "learning_rate": 1.2391689674031222e-06,
      "loss": 0.015,
      "step": 2679140
    },
    {
      "epoch": 4.384504101123963,
      "grad_norm": 0.18494439125061035,
      "learning_rate": 1.2391030751896049e-06,
      "loss": 0.0075,
      "step": 2679160
    },
    {
      "epoch": 4.384536831562617,
      "grad_norm": 0.24623143672943115,
      "learning_rate": 1.2390371829760878e-06,
      "loss": 0.01,
      "step": 2679180
    },
    {
      "epoch": 4.38456956200127,
      "grad_norm": 0.1342303603887558,
      "learning_rate": 1.2389712907625708e-06,
      "loss": 0.0084,
      "step": 2679200
    },
    {
      "epoch": 4.384602292439923,
      "grad_norm": 0.10186116397380829,
      "learning_rate": 1.2389053985490535e-06,
      "loss": 0.0118,
      "step": 2679220
    },
    {
      "epoch": 4.384635022878577,
      "grad_norm": 0.1610802710056305,
      "learning_rate": 1.2388395063355365e-06,
      "loss": 0.0079,
      "step": 2679240
    },
    {
      "epoch": 4.38466775331723,
      "grad_norm": 0.21958526968955994,
      "learning_rate": 1.2387736141220192e-06,
      "loss": 0.0124,
      "step": 2679260
    },
    {
      "epoch": 4.384700483755883,
      "grad_norm": 0.4462353587150574,
      "learning_rate": 1.2387077219085022e-06,
      "loss": 0.0118,
      "step": 2679280
    },
    {
      "epoch": 4.3847332141945365,
      "grad_norm": 0.13208550214767456,
      "learning_rate": 1.2386418296949851e-06,
      "loss": 0.0074,
      "step": 2679300
    },
    {
      "epoch": 4.38476594463319,
      "grad_norm": 0.681566596031189,
      "learning_rate": 1.2385759374814679e-06,
      "loss": 0.0123,
      "step": 2679320
    },
    {
      "epoch": 4.384798675071844,
      "grad_norm": 0.19616326689720154,
      "learning_rate": 1.2385100452679508e-06,
      "loss": 0.0065,
      "step": 2679340
    },
    {
      "epoch": 4.384831405510496,
      "grad_norm": 0.48762497305870056,
      "learning_rate": 1.2384441530544336e-06,
      "loss": 0.009,
      "step": 2679360
    },
    {
      "epoch": 4.38486413594915,
      "grad_norm": 0.2884407639503479,
      "learning_rate": 1.2383782608409165e-06,
      "loss": 0.0105,
      "step": 2679380
    },
    {
      "epoch": 4.384896866387804,
      "grad_norm": 0.25515711307525635,
      "learning_rate": 1.2383123686273995e-06,
      "loss": 0.0083,
      "step": 2679400
    },
    {
      "epoch": 4.384929596826456,
      "grad_norm": 0.11118761450052261,
      "learning_rate": 1.2382464764138822e-06,
      "loss": 0.0108,
      "step": 2679420
    },
    {
      "epoch": 4.38496232726511,
      "grad_norm": 0.362527459859848,
      "learning_rate": 1.2381805842003652e-06,
      "loss": 0.0089,
      "step": 2679440
    },
    {
      "epoch": 4.3849950577037635,
      "grad_norm": 0.24087250232696533,
      "learning_rate": 1.2381146919868481e-06,
      "loss": 0.0071,
      "step": 2679460
    },
    {
      "epoch": 4.385027788142416,
      "grad_norm": 0.10216233879327774,
      "learning_rate": 1.2380487997733308e-06,
      "loss": 0.0074,
      "step": 2679480
    },
    {
      "epoch": 4.38506051858107,
      "grad_norm": 0.0657673254609108,
      "learning_rate": 1.2379829075598138e-06,
      "loss": 0.0046,
      "step": 2679500
    },
    {
      "epoch": 4.385093249019723,
      "grad_norm": 0.4398542642593384,
      "learning_rate": 1.2379170153462965e-06,
      "loss": 0.0107,
      "step": 2679520
    },
    {
      "epoch": 4.385125979458377,
      "grad_norm": 0.6317284107208252,
      "learning_rate": 1.2378511231327795e-06,
      "loss": 0.0152,
      "step": 2679540
    },
    {
      "epoch": 4.38515870989703,
      "grad_norm": 0.31578972935676575,
      "learning_rate": 1.2377852309192624e-06,
      "loss": 0.0099,
      "step": 2679560
    },
    {
      "epoch": 4.385191440335683,
      "grad_norm": 0.37992116808891296,
      "learning_rate": 1.2377193387057452e-06,
      "loss": 0.006,
      "step": 2679580
    },
    {
      "epoch": 4.385224170774337,
      "grad_norm": 0.16100969910621643,
      "learning_rate": 1.2376534464922281e-06,
      "loss": 0.0091,
      "step": 2679600
    },
    {
      "epoch": 4.3852569012129905,
      "grad_norm": 0.3721902072429657,
      "learning_rate": 1.2375875542787109e-06,
      "loss": 0.009,
      "step": 2679620
    },
    {
      "epoch": 4.385289631651643,
      "grad_norm": 0.20782554149627686,
      "learning_rate": 1.2375216620651938e-06,
      "loss": 0.0059,
      "step": 2679640
    },
    {
      "epoch": 4.385322362090297,
      "grad_norm": 0.16629725694656372,
      "learning_rate": 1.2374557698516768e-06,
      "loss": 0.0096,
      "step": 2679660
    },
    {
      "epoch": 4.38535509252895,
      "grad_norm": 0.4339062571525574,
      "learning_rate": 1.2373898776381595e-06,
      "loss": 0.0081,
      "step": 2679680
    },
    {
      "epoch": 4.385387822967603,
      "grad_norm": 0.5768550634384155,
      "learning_rate": 1.2373239854246425e-06,
      "loss": 0.008,
      "step": 2679700
    },
    {
      "epoch": 4.385420553406257,
      "grad_norm": 0.10621384531259537,
      "learning_rate": 1.2372580932111252e-06,
      "loss": 0.0107,
      "step": 2679720
    },
    {
      "epoch": 4.38545328384491,
      "grad_norm": 0.2654082477092743,
      "learning_rate": 1.2371922009976082e-06,
      "loss": 0.0095,
      "step": 2679740
    },
    {
      "epoch": 4.385486014283563,
      "grad_norm": 0.15947632491588593,
      "learning_rate": 1.2371263087840911e-06,
      "loss": 0.0072,
      "step": 2679760
    },
    {
      "epoch": 4.385518744722217,
      "grad_norm": 0.09609196335077286,
      "learning_rate": 1.2370604165705738e-06,
      "loss": 0.0109,
      "step": 2679780
    },
    {
      "epoch": 4.38555147516087,
      "grad_norm": 0.09098035097122192,
      "learning_rate": 1.2369945243570568e-06,
      "loss": 0.0097,
      "step": 2679800
    },
    {
      "epoch": 4.385584205599524,
      "grad_norm": 0.1854187399148941,
      "learning_rate": 1.2369286321435398e-06,
      "loss": 0.0086,
      "step": 2679820
    },
    {
      "epoch": 4.3856169360381765,
      "grad_norm": 0.1535186916589737,
      "learning_rate": 1.2368627399300225e-06,
      "loss": 0.0104,
      "step": 2679840
    },
    {
      "epoch": 4.38564966647683,
      "grad_norm": 0.2515038251876831,
      "learning_rate": 1.2367968477165054e-06,
      "loss": 0.011,
      "step": 2679860
    },
    {
      "epoch": 4.385682396915484,
      "grad_norm": 0.5915703773498535,
      "learning_rate": 1.2367309555029882e-06,
      "loss": 0.0113,
      "step": 2679880
    },
    {
      "epoch": 4.385715127354137,
      "grad_norm": 0.11299513280391693,
      "learning_rate": 1.2366650632894713e-06,
      "loss": 0.0089,
      "step": 2679900
    },
    {
      "epoch": 4.38574785779279,
      "grad_norm": 0.2701158821582794,
      "learning_rate": 1.236599171075954e-06,
      "loss": 0.0106,
      "step": 2679920
    },
    {
      "epoch": 4.385780588231444,
      "grad_norm": 0.20286719501018524,
      "learning_rate": 1.2365332788624368e-06,
      "loss": 0.0082,
      "step": 2679940
    },
    {
      "epoch": 4.385813318670097,
      "grad_norm": 0.11449624598026276,
      "learning_rate": 1.2364673866489198e-06,
      "loss": 0.0074,
      "step": 2679960
    },
    {
      "epoch": 4.38584604910875,
      "grad_norm": 0.23956099152565002,
      "learning_rate": 1.2364014944354025e-06,
      "loss": 0.013,
      "step": 2679980
    },
    {
      "epoch": 4.3858787795474035,
      "grad_norm": 0.33255478739738464,
      "learning_rate": 1.2363356022218857e-06,
      "loss": 0.0077,
      "step": 2680000
    },
    {
      "epoch": 4.385911509986057,
      "grad_norm": 0.2787662446498871,
      "learning_rate": 1.2362697100083684e-06,
      "loss": 0.0093,
      "step": 2680020
    },
    {
      "epoch": 4.38594424042471,
      "grad_norm": 0.42423662543296814,
      "learning_rate": 1.2362038177948514e-06,
      "loss": 0.0095,
      "step": 2680040
    },
    {
      "epoch": 4.385976970863363,
      "grad_norm": 0.08585114032030106,
      "learning_rate": 1.2361379255813341e-06,
      "loss": 0.0093,
      "step": 2680060
    },
    {
      "epoch": 4.386009701302017,
      "grad_norm": 0.21420109272003174,
      "learning_rate": 1.236072033367817e-06,
      "loss": 0.0133,
      "step": 2680080
    },
    {
      "epoch": 4.3860424317406705,
      "grad_norm": 0.1512991040945053,
      "learning_rate": 1.2360061411543e-06,
      "loss": 0.0093,
      "step": 2680100
    },
    {
      "epoch": 4.386075162179323,
      "grad_norm": 0.5492339134216309,
      "learning_rate": 1.2359402489407828e-06,
      "loss": 0.0069,
      "step": 2680120
    },
    {
      "epoch": 4.386107892617977,
      "grad_norm": 0.09167704731225967,
      "learning_rate": 1.2358743567272657e-06,
      "loss": 0.0105,
      "step": 2680140
    },
    {
      "epoch": 4.38614062305663,
      "grad_norm": 0.15402644872665405,
      "learning_rate": 1.2358084645137484e-06,
      "loss": 0.0089,
      "step": 2680160
    },
    {
      "epoch": 4.386173353495283,
      "grad_norm": 0.3274058401584625,
      "learning_rate": 1.2357425723002314e-06,
      "loss": 0.0102,
      "step": 2680180
    },
    {
      "epoch": 4.386206083933937,
      "grad_norm": 0.27238306403160095,
      "learning_rate": 1.2356766800867143e-06,
      "loss": 0.0084,
      "step": 2680200
    },
    {
      "epoch": 4.38623881437259,
      "grad_norm": 0.1522742509841919,
      "learning_rate": 1.235610787873197e-06,
      "loss": 0.0076,
      "step": 2680220
    },
    {
      "epoch": 4.386271544811244,
      "grad_norm": 0.5321296453475952,
      "learning_rate": 1.23554489565968e-06,
      "loss": 0.0138,
      "step": 2680240
    },
    {
      "epoch": 4.386304275249897,
      "grad_norm": 0.11511722952127457,
      "learning_rate": 1.235479003446163e-06,
      "loss": 0.0063,
      "step": 2680260
    },
    {
      "epoch": 4.38633700568855,
      "grad_norm": 0.20191997289657593,
      "learning_rate": 1.2354131112326457e-06,
      "loss": 0.0066,
      "step": 2680280
    },
    {
      "epoch": 4.386369736127204,
      "grad_norm": 0.051664941012859344,
      "learning_rate": 1.2353472190191287e-06,
      "loss": 0.0073,
      "step": 2680300
    },
    {
      "epoch": 4.3864024665658565,
      "grad_norm": 0.368790864944458,
      "learning_rate": 1.2352813268056114e-06,
      "loss": 0.0098,
      "step": 2680320
    },
    {
      "epoch": 4.38643519700451,
      "grad_norm": 0.25703105330467224,
      "learning_rate": 1.2352154345920944e-06,
      "loss": 0.0091,
      "step": 2680340
    },
    {
      "epoch": 4.386467927443164,
      "grad_norm": 0.16971315443515778,
      "learning_rate": 1.2351495423785773e-06,
      "loss": 0.01,
      "step": 2680360
    },
    {
      "epoch": 4.386500657881817,
      "grad_norm": 0.14797891676425934,
      "learning_rate": 1.23508365016506e-06,
      "loss": 0.0074,
      "step": 2680380
    },
    {
      "epoch": 4.38653338832047,
      "grad_norm": 0.24555398523807526,
      "learning_rate": 1.235017757951543e-06,
      "loss": 0.0092,
      "step": 2680400
    },
    {
      "epoch": 4.386566118759124,
      "grad_norm": 0.10690432041883469,
      "learning_rate": 1.2349518657380258e-06,
      "loss": 0.0086,
      "step": 2680420
    },
    {
      "epoch": 4.386598849197777,
      "grad_norm": 0.3969445526599884,
      "learning_rate": 1.2348859735245087e-06,
      "loss": 0.0137,
      "step": 2680440
    },
    {
      "epoch": 4.38663157963643,
      "grad_norm": 0.14852763712406158,
      "learning_rate": 1.2348200813109917e-06,
      "loss": 0.0089,
      "step": 2680460
    },
    {
      "epoch": 4.3866643100750835,
      "grad_norm": 0.4856070876121521,
      "learning_rate": 1.2347541890974744e-06,
      "loss": 0.0091,
      "step": 2680480
    },
    {
      "epoch": 4.386697040513737,
      "grad_norm": 0.1783725470304489,
      "learning_rate": 1.2346882968839573e-06,
      "loss": 0.0143,
      "step": 2680500
    },
    {
      "epoch": 4.386729770952391,
      "grad_norm": 0.5390346646308899,
      "learning_rate": 1.2346224046704403e-06,
      "loss": 0.0115,
      "step": 2680520
    },
    {
      "epoch": 4.386762501391043,
      "grad_norm": 0.08233104646205902,
      "learning_rate": 1.234556512456923e-06,
      "loss": 0.0077,
      "step": 2680540
    },
    {
      "epoch": 4.386795231829697,
      "grad_norm": 0.409553587436676,
      "learning_rate": 1.234490620243406e-06,
      "loss": 0.0078,
      "step": 2680560
    },
    {
      "epoch": 4.386827962268351,
      "grad_norm": 0.09834412485361099,
      "learning_rate": 1.2344247280298887e-06,
      "loss": 0.0103,
      "step": 2680580
    },
    {
      "epoch": 4.386860692707003,
      "grad_norm": 0.08444394171237946,
      "learning_rate": 1.2343588358163717e-06,
      "loss": 0.0132,
      "step": 2680600
    },
    {
      "epoch": 4.386893423145657,
      "grad_norm": 0.25960710644721985,
      "learning_rate": 1.2342929436028546e-06,
      "loss": 0.0084,
      "step": 2680620
    },
    {
      "epoch": 4.3869261535843105,
      "grad_norm": 0.23076604306697845,
      "learning_rate": 1.2342270513893374e-06,
      "loss": 0.0081,
      "step": 2680640
    },
    {
      "epoch": 4.386958884022964,
      "grad_norm": 0.42088279128074646,
      "learning_rate": 1.2341611591758203e-06,
      "loss": 0.0095,
      "step": 2680660
    },
    {
      "epoch": 4.386991614461617,
      "grad_norm": 0.5663739442825317,
      "learning_rate": 1.234095266962303e-06,
      "loss": 0.0083,
      "step": 2680680
    },
    {
      "epoch": 4.38702434490027,
      "grad_norm": 0.2998652458190918,
      "learning_rate": 1.234029374748786e-06,
      "loss": 0.0088,
      "step": 2680700
    },
    {
      "epoch": 4.387057075338924,
      "grad_norm": 0.18651694059371948,
      "learning_rate": 1.233963482535269e-06,
      "loss": 0.0115,
      "step": 2680720
    },
    {
      "epoch": 4.387089805777577,
      "grad_norm": 0.24242515861988068,
      "learning_rate": 1.2338975903217517e-06,
      "loss": 0.012,
      "step": 2680740
    },
    {
      "epoch": 4.38712253621623,
      "grad_norm": 1.0305646657943726,
      "learning_rate": 1.2338316981082347e-06,
      "loss": 0.0119,
      "step": 2680760
    },
    {
      "epoch": 4.387155266654884,
      "grad_norm": 0.212349534034729,
      "learning_rate": 1.2337658058947176e-06,
      "loss": 0.0076,
      "step": 2680780
    },
    {
      "epoch": 4.3871879970935375,
      "grad_norm": 0.0642305240035057,
      "learning_rate": 1.2336999136812004e-06,
      "loss": 0.0091,
      "step": 2680800
    },
    {
      "epoch": 4.38722072753219,
      "grad_norm": 0.2565463185310364,
      "learning_rate": 1.2336340214676833e-06,
      "loss": 0.0062,
      "step": 2680820
    },
    {
      "epoch": 4.387253457970844,
      "grad_norm": 0.4576609134674072,
      "learning_rate": 1.233568129254166e-06,
      "loss": 0.009,
      "step": 2680840
    },
    {
      "epoch": 4.387286188409497,
      "grad_norm": 0.39690521359443665,
      "learning_rate": 1.233502237040649e-06,
      "loss": 0.0085,
      "step": 2680860
    },
    {
      "epoch": 4.38731891884815,
      "grad_norm": 0.23535498976707458,
      "learning_rate": 1.233436344827132e-06,
      "loss": 0.0106,
      "step": 2680880
    },
    {
      "epoch": 4.387351649286804,
      "grad_norm": 0.3144150376319885,
      "learning_rate": 1.2333704526136147e-06,
      "loss": 0.0072,
      "step": 2680900
    },
    {
      "epoch": 4.387384379725457,
      "grad_norm": 0.23207947611808777,
      "learning_rate": 1.2333045604000976e-06,
      "loss": 0.0123,
      "step": 2680920
    },
    {
      "epoch": 4.38741711016411,
      "grad_norm": 0.284018874168396,
      "learning_rate": 1.2332386681865804e-06,
      "loss": 0.009,
      "step": 2680940
    },
    {
      "epoch": 4.3874498406027636,
      "grad_norm": 0.23018355667591095,
      "learning_rate": 1.2331727759730633e-06,
      "loss": 0.008,
      "step": 2680960
    },
    {
      "epoch": 4.387482571041417,
      "grad_norm": 0.4566575586795807,
      "learning_rate": 1.2331068837595463e-06,
      "loss": 0.0081,
      "step": 2680980
    },
    {
      "epoch": 4.387515301480071,
      "grad_norm": 0.09564270079135895,
      "learning_rate": 1.233040991546029e-06,
      "loss": 0.006,
      "step": 2681000
    },
    {
      "epoch": 4.3875480319187234,
      "grad_norm": 0.4433310031890869,
      "learning_rate": 1.232975099332512e-06,
      "loss": 0.0152,
      "step": 2681020
    },
    {
      "epoch": 4.387580762357377,
      "grad_norm": 0.12254456430673599,
      "learning_rate": 1.2329092071189947e-06,
      "loss": 0.0086,
      "step": 2681040
    },
    {
      "epoch": 4.387613492796031,
      "grad_norm": 0.1999272108078003,
      "learning_rate": 1.2328433149054777e-06,
      "loss": 0.0095,
      "step": 2681060
    },
    {
      "epoch": 4.387646223234684,
      "grad_norm": 0.2360253632068634,
      "learning_rate": 1.2327774226919606e-06,
      "loss": 0.0125,
      "step": 2681080
    },
    {
      "epoch": 4.387678953673337,
      "grad_norm": 0.25388386845588684,
      "learning_rate": 1.2327115304784434e-06,
      "loss": 0.0093,
      "step": 2681100
    },
    {
      "epoch": 4.3877116841119905,
      "grad_norm": 0.07107578963041306,
      "learning_rate": 1.2326456382649263e-06,
      "loss": 0.0097,
      "step": 2681120
    },
    {
      "epoch": 4.387744414550644,
      "grad_norm": 0.2294481247663498,
      "learning_rate": 1.2325797460514093e-06,
      "loss": 0.0088,
      "step": 2681140
    },
    {
      "epoch": 4.387777144989297,
      "grad_norm": 0.2514398396015167,
      "learning_rate": 1.232513853837892e-06,
      "loss": 0.0081,
      "step": 2681160
    },
    {
      "epoch": 4.38780987542795,
      "grad_norm": 0.13454324007034302,
      "learning_rate": 1.232447961624375e-06,
      "loss": 0.0081,
      "step": 2681180
    },
    {
      "epoch": 4.387842605866604,
      "grad_norm": 0.3651191294193268,
      "learning_rate": 1.2323820694108577e-06,
      "loss": 0.0101,
      "step": 2681200
    },
    {
      "epoch": 4.387875336305257,
      "grad_norm": 0.11051806062459946,
      "learning_rate": 1.2323161771973409e-06,
      "loss": 0.0094,
      "step": 2681220
    },
    {
      "epoch": 4.38790806674391,
      "grad_norm": 0.5024678111076355,
      "learning_rate": 1.2322502849838236e-06,
      "loss": 0.0088,
      "step": 2681240
    },
    {
      "epoch": 4.387940797182564,
      "grad_norm": 0.3600355088710785,
      "learning_rate": 1.2321843927703065e-06,
      "loss": 0.0071,
      "step": 2681260
    },
    {
      "epoch": 4.3879735276212175,
      "grad_norm": 0.3625011146068573,
      "learning_rate": 1.2321185005567893e-06,
      "loss": 0.0094,
      "step": 2681280
    },
    {
      "epoch": 4.38800625805987,
      "grad_norm": 0.0728825107216835,
      "learning_rate": 1.232052608343272e-06,
      "loss": 0.0059,
      "step": 2681300
    },
    {
      "epoch": 4.388038988498524,
      "grad_norm": 0.5033314824104309,
      "learning_rate": 1.2319867161297552e-06,
      "loss": 0.0075,
      "step": 2681320
    },
    {
      "epoch": 4.388071718937177,
      "grad_norm": 0.10035203397274017,
      "learning_rate": 1.231920823916238e-06,
      "loss": 0.0092,
      "step": 2681340
    },
    {
      "epoch": 4.388104449375831,
      "grad_norm": 0.33761337399482727,
      "learning_rate": 1.2318549317027209e-06,
      "loss": 0.009,
      "step": 2681360
    },
    {
      "epoch": 4.388137179814484,
      "grad_norm": 0.43351101875305176,
      "learning_rate": 1.2317890394892036e-06,
      "loss": 0.0084,
      "step": 2681380
    },
    {
      "epoch": 4.388169910253137,
      "grad_norm": 0.264149010181427,
      "learning_rate": 1.2317231472756866e-06,
      "loss": 0.007,
      "step": 2681400
    },
    {
      "epoch": 4.388202640691791,
      "grad_norm": 0.21370279788970947,
      "learning_rate": 1.2316572550621695e-06,
      "loss": 0.0089,
      "step": 2681420
    },
    {
      "epoch": 4.388235371130444,
      "grad_norm": 0.24342688918113708,
      "learning_rate": 1.2315913628486523e-06,
      "loss": 0.0109,
      "step": 2681440
    },
    {
      "epoch": 4.388268101569097,
      "grad_norm": 0.11970183998346329,
      "learning_rate": 1.2315254706351352e-06,
      "loss": 0.0047,
      "step": 2681460
    },
    {
      "epoch": 4.388300832007751,
      "grad_norm": 0.2293037623167038,
      "learning_rate": 1.231459578421618e-06,
      "loss": 0.0076,
      "step": 2681480
    },
    {
      "epoch": 4.3883335624464035,
      "grad_norm": 0.09783323109149933,
      "learning_rate": 1.231393686208101e-06,
      "loss": 0.0072,
      "step": 2681500
    },
    {
      "epoch": 4.388366292885057,
      "grad_norm": 0.1428854763507843,
      "learning_rate": 1.2313277939945839e-06,
      "loss": 0.0057,
      "step": 2681520
    },
    {
      "epoch": 4.388399023323711,
      "grad_norm": 0.3721349537372589,
      "learning_rate": 1.2312619017810666e-06,
      "loss": 0.0093,
      "step": 2681540
    },
    {
      "epoch": 4.388431753762364,
      "grad_norm": 0.24748972058296204,
      "learning_rate": 1.2311960095675495e-06,
      "loss": 0.0114,
      "step": 2681560
    },
    {
      "epoch": 4.388464484201017,
      "grad_norm": 0.081231988966465,
      "learning_rate": 1.2311301173540325e-06,
      "loss": 0.0084,
      "step": 2681580
    },
    {
      "epoch": 4.388497214639671,
      "grad_norm": 0.19875362515449524,
      "learning_rate": 1.2310642251405152e-06,
      "loss": 0.0111,
      "step": 2681600
    },
    {
      "epoch": 4.388529945078324,
      "grad_norm": 0.09289738535881042,
      "learning_rate": 1.2309983329269982e-06,
      "loss": 0.0107,
      "step": 2681620
    },
    {
      "epoch": 4.388562675516977,
      "grad_norm": 0.24136362969875336,
      "learning_rate": 1.230932440713481e-06,
      "loss": 0.0097,
      "step": 2681640
    },
    {
      "epoch": 4.3885954059556305,
      "grad_norm": 0.36600595712661743,
      "learning_rate": 1.2308665484999639e-06,
      "loss": 0.011,
      "step": 2681660
    },
    {
      "epoch": 4.388628136394284,
      "grad_norm": 0.2015722543001175,
      "learning_rate": 1.2308006562864468e-06,
      "loss": 0.0119,
      "step": 2681680
    },
    {
      "epoch": 4.388660866832938,
      "grad_norm": 0.1267555207014084,
      "learning_rate": 1.2307347640729296e-06,
      "loss": 0.0132,
      "step": 2681700
    },
    {
      "epoch": 4.38869359727159,
      "grad_norm": 0.4094616174697876,
      "learning_rate": 1.2306688718594125e-06,
      "loss": 0.0109,
      "step": 2681720
    },
    {
      "epoch": 4.388726327710244,
      "grad_norm": 0.20707161724567413,
      "learning_rate": 1.2306029796458953e-06,
      "loss": 0.0108,
      "step": 2681740
    },
    {
      "epoch": 4.3887590581488976,
      "grad_norm": 0.5230227112770081,
      "learning_rate": 1.2305370874323782e-06,
      "loss": 0.0084,
      "step": 2681760
    },
    {
      "epoch": 4.38879178858755,
      "grad_norm": 0.3077232241630554,
      "learning_rate": 1.2304711952188612e-06,
      "loss": 0.0074,
      "step": 2681780
    },
    {
      "epoch": 4.388824519026204,
      "grad_norm": 0.14349082112312317,
      "learning_rate": 1.230405303005344e-06,
      "loss": 0.0114,
      "step": 2681800
    },
    {
      "epoch": 4.3888572494648574,
      "grad_norm": 0.13195259869098663,
      "learning_rate": 1.2303394107918269e-06,
      "loss": 0.0071,
      "step": 2681820
    },
    {
      "epoch": 4.388889979903511,
      "grad_norm": 0.1751110851764679,
      "learning_rate": 1.2302735185783098e-06,
      "loss": 0.0146,
      "step": 2681840
    },
    {
      "epoch": 4.388922710342164,
      "grad_norm": 0.15594467520713806,
      "learning_rate": 1.2302076263647925e-06,
      "loss": 0.0134,
      "step": 2681860
    },
    {
      "epoch": 4.388955440780817,
      "grad_norm": 0.2626103162765503,
      "learning_rate": 1.2301417341512755e-06,
      "loss": 0.0055,
      "step": 2681880
    },
    {
      "epoch": 4.388988171219471,
      "grad_norm": 0.03935869410634041,
      "learning_rate": 1.2300758419377582e-06,
      "loss": 0.006,
      "step": 2681900
    },
    {
      "epoch": 4.389020901658124,
      "grad_norm": 0.37635573744773865,
      "learning_rate": 1.2300099497242412e-06,
      "loss": 0.0077,
      "step": 2681920
    },
    {
      "epoch": 4.389053632096777,
      "grad_norm": 0.3797900676727295,
      "learning_rate": 1.2299440575107241e-06,
      "loss": 0.0149,
      "step": 2681940
    },
    {
      "epoch": 4.389086362535431,
      "grad_norm": 0.1282677799463272,
      "learning_rate": 1.2298781652972069e-06,
      "loss": 0.0086,
      "step": 2681960
    },
    {
      "epoch": 4.389119092974084,
      "grad_norm": 0.10574788600206375,
      "learning_rate": 1.2298122730836898e-06,
      "loss": 0.0116,
      "step": 2681980
    },
    {
      "epoch": 4.389151823412737,
      "grad_norm": 0.6545779705047607,
      "learning_rate": 1.2297463808701726e-06,
      "loss": 0.0139,
      "step": 2682000
    },
    {
      "epoch": 4.389184553851391,
      "grad_norm": 0.6868723034858704,
      "learning_rate": 1.2296804886566555e-06,
      "loss": 0.0083,
      "step": 2682020
    },
    {
      "epoch": 4.389217284290044,
      "grad_norm": 0.12924571335315704,
      "learning_rate": 1.2296145964431385e-06,
      "loss": 0.0078,
      "step": 2682040
    },
    {
      "epoch": 4.389250014728697,
      "grad_norm": 0.15957669913768768,
      "learning_rate": 1.2295487042296212e-06,
      "loss": 0.0067,
      "step": 2682060
    },
    {
      "epoch": 4.389282745167351,
      "grad_norm": 0.10738988220691681,
      "learning_rate": 1.2294828120161042e-06,
      "loss": 0.0078,
      "step": 2682080
    },
    {
      "epoch": 4.389315475606004,
      "grad_norm": 0.1379663646221161,
      "learning_rate": 1.2294169198025871e-06,
      "loss": 0.0074,
      "step": 2682100
    },
    {
      "epoch": 4.389348206044658,
      "grad_norm": 0.1738957315683365,
      "learning_rate": 1.2293510275890699e-06,
      "loss": 0.012,
      "step": 2682120
    },
    {
      "epoch": 4.3893809364833105,
      "grad_norm": 0.675556480884552,
      "learning_rate": 1.2292851353755528e-06,
      "loss": 0.0163,
      "step": 2682140
    },
    {
      "epoch": 4.389413666921964,
      "grad_norm": 0.1474568247795105,
      "learning_rate": 1.2292192431620355e-06,
      "loss": 0.0097,
      "step": 2682160
    },
    {
      "epoch": 4.389446397360618,
      "grad_norm": 0.22648602724075317,
      "learning_rate": 1.2291533509485185e-06,
      "loss": 0.0107,
      "step": 2682180
    },
    {
      "epoch": 4.38947912779927,
      "grad_norm": 0.28602737188339233,
      "learning_rate": 1.2290874587350015e-06,
      "loss": 0.0082,
      "step": 2682200
    },
    {
      "epoch": 4.389511858237924,
      "grad_norm": 0.22132104635238647,
      "learning_rate": 1.2290215665214842e-06,
      "loss": 0.0121,
      "step": 2682220
    },
    {
      "epoch": 4.389544588676578,
      "grad_norm": 0.3875625431537628,
      "learning_rate": 1.2289556743079671e-06,
      "loss": 0.0119,
      "step": 2682240
    },
    {
      "epoch": 4.389577319115231,
      "grad_norm": 0.162700355052948,
      "learning_rate": 1.2288897820944499e-06,
      "loss": 0.0091,
      "step": 2682260
    },
    {
      "epoch": 4.389610049553884,
      "grad_norm": 0.2892039120197296,
      "learning_rate": 1.2288238898809328e-06,
      "loss": 0.0078,
      "step": 2682280
    },
    {
      "epoch": 4.3896427799925375,
      "grad_norm": 0.387363076210022,
      "learning_rate": 1.2287579976674158e-06,
      "loss": 0.0076,
      "step": 2682300
    },
    {
      "epoch": 4.389675510431191,
      "grad_norm": 0.13290299475193024,
      "learning_rate": 1.2286921054538985e-06,
      "loss": 0.0105,
      "step": 2682320
    },
    {
      "epoch": 4.389708240869844,
      "grad_norm": 0.14664672315120697,
      "learning_rate": 1.2286262132403815e-06,
      "loss": 0.0119,
      "step": 2682340
    },
    {
      "epoch": 4.389740971308497,
      "grad_norm": 0.3536660373210907,
      "learning_rate": 1.2285603210268642e-06,
      "loss": 0.0058,
      "step": 2682360
    },
    {
      "epoch": 4.389773701747151,
      "grad_norm": 0.2670799195766449,
      "learning_rate": 1.2284944288133472e-06,
      "loss": 0.0125,
      "step": 2682380
    },
    {
      "epoch": 4.389806432185804,
      "grad_norm": 0.09307252615690231,
      "learning_rate": 1.2284285365998301e-06,
      "loss": 0.0084,
      "step": 2682400
    },
    {
      "epoch": 4.389839162624457,
      "grad_norm": 0.417468398809433,
      "learning_rate": 1.2283626443863129e-06,
      "loss": 0.0072,
      "step": 2682420
    },
    {
      "epoch": 4.389871893063111,
      "grad_norm": 0.17281164228916168,
      "learning_rate": 1.2282967521727958e-06,
      "loss": 0.0125,
      "step": 2682440
    },
    {
      "epoch": 4.3899046235017645,
      "grad_norm": 0.42776548862457275,
      "learning_rate": 1.2282308599592788e-06,
      "loss": 0.011,
      "step": 2682460
    },
    {
      "epoch": 4.389937353940417,
      "grad_norm": 0.05777335539460182,
      "learning_rate": 1.2281649677457615e-06,
      "loss": 0.0077,
      "step": 2682480
    },
    {
      "epoch": 4.389970084379071,
      "grad_norm": 0.0985647514462471,
      "learning_rate": 1.2280990755322445e-06,
      "loss": 0.0079,
      "step": 2682500
    },
    {
      "epoch": 4.390002814817724,
      "grad_norm": 0.40846434235572815,
      "learning_rate": 1.2280331833187272e-06,
      "loss": 0.012,
      "step": 2682520
    },
    {
      "epoch": 4.390035545256378,
      "grad_norm": 0.24398775398731232,
      "learning_rate": 1.2279672911052104e-06,
      "loss": 0.0111,
      "step": 2682540
    },
    {
      "epoch": 4.390068275695031,
      "grad_norm": 0.8573319911956787,
      "learning_rate": 1.227901398891693e-06,
      "loss": 0.0107,
      "step": 2682560
    },
    {
      "epoch": 4.390101006133684,
      "grad_norm": 0.15641912817955017,
      "learning_rate": 1.227835506678176e-06,
      "loss": 0.0071,
      "step": 2682580
    },
    {
      "epoch": 4.390133736572338,
      "grad_norm": 0.374595582485199,
      "learning_rate": 1.2277696144646588e-06,
      "loss": 0.0061,
      "step": 2682600
    },
    {
      "epoch": 4.390166467010991,
      "grad_norm": 0.21729567646980286,
      "learning_rate": 1.2277037222511415e-06,
      "loss": 0.0073,
      "step": 2682620
    },
    {
      "epoch": 4.390199197449644,
      "grad_norm": 0.1661292165517807,
      "learning_rate": 1.2276378300376247e-06,
      "loss": 0.0084,
      "step": 2682640
    },
    {
      "epoch": 4.390231927888298,
      "grad_norm": 0.09335500001907349,
      "learning_rate": 1.2275719378241074e-06,
      "loss": 0.0072,
      "step": 2682660
    },
    {
      "epoch": 4.3902646583269505,
      "grad_norm": 0.4149848222732544,
      "learning_rate": 1.2275060456105904e-06,
      "loss": 0.0083,
      "step": 2682680
    },
    {
      "epoch": 4.390297388765604,
      "grad_norm": 0.1568555235862732,
      "learning_rate": 1.2274401533970731e-06,
      "loss": 0.0084,
      "step": 2682700
    },
    {
      "epoch": 4.390330119204258,
      "grad_norm": 0.3310619592666626,
      "learning_rate": 1.227374261183556e-06,
      "loss": 0.0114,
      "step": 2682720
    },
    {
      "epoch": 4.390362849642911,
      "grad_norm": 0.244994655251503,
      "learning_rate": 1.227308368970039e-06,
      "loss": 0.0074,
      "step": 2682740
    },
    {
      "epoch": 4.390395580081564,
      "grad_norm": 0.21025174856185913,
      "learning_rate": 1.2272424767565218e-06,
      "loss": 0.0079,
      "step": 2682760
    },
    {
      "epoch": 4.3904283105202175,
      "grad_norm": 0.17483608424663544,
      "learning_rate": 1.2271765845430047e-06,
      "loss": 0.0075,
      "step": 2682780
    },
    {
      "epoch": 4.390461040958871,
      "grad_norm": 0.21884170174598694,
      "learning_rate": 1.2271106923294875e-06,
      "loss": 0.0062,
      "step": 2682800
    },
    {
      "epoch": 4.390493771397525,
      "grad_norm": 0.4099138677120209,
      "learning_rate": 1.2270448001159704e-06,
      "loss": 0.0098,
      "step": 2682820
    },
    {
      "epoch": 4.390526501836177,
      "grad_norm": 0.046399980783462524,
      "learning_rate": 1.2269789079024534e-06,
      "loss": 0.0085,
      "step": 2682840
    },
    {
      "epoch": 4.390559232274831,
      "grad_norm": 0.16983714699745178,
      "learning_rate": 1.226913015688936e-06,
      "loss": 0.0102,
      "step": 2682860
    },
    {
      "epoch": 4.390591962713485,
      "grad_norm": 0.14995045959949493,
      "learning_rate": 1.226847123475419e-06,
      "loss": 0.0129,
      "step": 2682880
    },
    {
      "epoch": 4.390624693152137,
      "grad_norm": 0.14917968213558197,
      "learning_rate": 1.226781231261902e-06,
      "loss": 0.007,
      "step": 2682900
    },
    {
      "epoch": 4.390657423590791,
      "grad_norm": 0.2919211685657501,
      "learning_rate": 1.2267153390483847e-06,
      "loss": 0.0069,
      "step": 2682920
    },
    {
      "epoch": 4.3906901540294445,
      "grad_norm": 0.17054475843906403,
      "learning_rate": 1.2266494468348677e-06,
      "loss": 0.0072,
      "step": 2682940
    },
    {
      "epoch": 4.390722884468097,
      "grad_norm": 0.19697688519954681,
      "learning_rate": 1.2265835546213504e-06,
      "loss": 0.0088,
      "step": 2682960
    },
    {
      "epoch": 4.390755614906751,
      "grad_norm": 0.06070950627326965,
      "learning_rate": 1.2265176624078334e-06,
      "loss": 0.0068,
      "step": 2682980
    },
    {
      "epoch": 4.390788345345404,
      "grad_norm": 0.19755682349205017,
      "learning_rate": 1.2264517701943163e-06,
      "loss": 0.0086,
      "step": 2683000
    },
    {
      "epoch": 4.390821075784058,
      "grad_norm": 0.2308725267648697,
      "learning_rate": 1.226385877980799e-06,
      "loss": 0.0069,
      "step": 2683020
    },
    {
      "epoch": 4.390853806222711,
      "grad_norm": 0.5951986908912659,
      "learning_rate": 1.226319985767282e-06,
      "loss": 0.0112,
      "step": 2683040
    },
    {
      "epoch": 4.390886536661364,
      "grad_norm": 0.6127519607543945,
      "learning_rate": 1.2262540935537648e-06,
      "loss": 0.0104,
      "step": 2683060
    },
    {
      "epoch": 4.390919267100018,
      "grad_norm": 0.17741639912128448,
      "learning_rate": 1.2261882013402477e-06,
      "loss": 0.0114,
      "step": 2683080
    },
    {
      "epoch": 4.390951997538671,
      "grad_norm": 0.24443043768405914,
      "learning_rate": 1.2261223091267307e-06,
      "loss": 0.009,
      "step": 2683100
    },
    {
      "epoch": 4.390984727977324,
      "grad_norm": 0.18530935049057007,
      "learning_rate": 1.2260564169132134e-06,
      "loss": 0.0108,
      "step": 2683120
    },
    {
      "epoch": 4.391017458415978,
      "grad_norm": 0.18419615924358368,
      "learning_rate": 1.2259905246996964e-06,
      "loss": 0.0119,
      "step": 2683140
    },
    {
      "epoch": 4.391050188854631,
      "grad_norm": 0.09987708926200867,
      "learning_rate": 1.2259246324861793e-06,
      "loss": 0.0059,
      "step": 2683160
    },
    {
      "epoch": 4.391082919293284,
      "grad_norm": 0.4445270895957947,
      "learning_rate": 1.225858740272662e-06,
      "loss": 0.0094,
      "step": 2683180
    },
    {
      "epoch": 4.391115649731938,
      "grad_norm": 0.240110382437706,
      "learning_rate": 1.225792848059145e-06,
      "loss": 0.0083,
      "step": 2683200
    },
    {
      "epoch": 4.391148380170591,
      "grad_norm": 0.30783310532569885,
      "learning_rate": 1.2257269558456277e-06,
      "loss": 0.0095,
      "step": 2683220
    },
    {
      "epoch": 4.391181110609244,
      "grad_norm": 0.23647508025169373,
      "learning_rate": 1.2256610636321107e-06,
      "loss": 0.0096,
      "step": 2683240
    },
    {
      "epoch": 4.391213841047898,
      "grad_norm": 0.27211031317710876,
      "learning_rate": 1.2255951714185936e-06,
      "loss": 0.0082,
      "step": 2683260
    },
    {
      "epoch": 4.391246571486551,
      "grad_norm": 0.12991929054260254,
      "learning_rate": 1.2255292792050764e-06,
      "loss": 0.0108,
      "step": 2683280
    },
    {
      "epoch": 4.391279301925205,
      "grad_norm": 0.3093882203102112,
      "learning_rate": 1.2254633869915593e-06,
      "loss": 0.0078,
      "step": 2683300
    },
    {
      "epoch": 4.3913120323638575,
      "grad_norm": 0.10256851464509964,
      "learning_rate": 1.225397494778042e-06,
      "loss": 0.0084,
      "step": 2683320
    },
    {
      "epoch": 4.391344762802511,
      "grad_norm": 0.3411833345890045,
      "learning_rate": 1.225331602564525e-06,
      "loss": 0.01,
      "step": 2683340
    },
    {
      "epoch": 4.391377493241165,
      "grad_norm": 0.14659860730171204,
      "learning_rate": 1.225265710351008e-06,
      "loss": 0.0055,
      "step": 2683360
    },
    {
      "epoch": 4.391410223679817,
      "grad_norm": 0.6990838646888733,
      "learning_rate": 1.2251998181374907e-06,
      "loss": 0.0105,
      "step": 2683380
    },
    {
      "epoch": 4.391442954118471,
      "grad_norm": 0.21486125886440277,
      "learning_rate": 1.2251339259239737e-06,
      "loss": 0.009,
      "step": 2683400
    },
    {
      "epoch": 4.391475684557125,
      "grad_norm": 0.2107197344303131,
      "learning_rate": 1.2250680337104566e-06,
      "loss": 0.0082,
      "step": 2683420
    },
    {
      "epoch": 4.391508414995778,
      "grad_norm": 0.1626502126455307,
      "learning_rate": 1.2250021414969394e-06,
      "loss": 0.0072,
      "step": 2683440
    },
    {
      "epoch": 4.391541145434431,
      "grad_norm": 0.1578192412853241,
      "learning_rate": 1.2249362492834223e-06,
      "loss": 0.0061,
      "step": 2683460
    },
    {
      "epoch": 4.3915738758730845,
      "grad_norm": 0.6478304266929626,
      "learning_rate": 1.224870357069905e-06,
      "loss": 0.0121,
      "step": 2683480
    },
    {
      "epoch": 4.391606606311738,
      "grad_norm": 0.36046576499938965,
      "learning_rate": 1.224804464856388e-06,
      "loss": 0.009,
      "step": 2683500
    },
    {
      "epoch": 4.391639336750391,
      "grad_norm": 0.10651466250419617,
      "learning_rate": 1.224738572642871e-06,
      "loss": 0.0067,
      "step": 2683520
    },
    {
      "epoch": 4.391672067189044,
      "grad_norm": 0.27107155323028564,
      "learning_rate": 1.2246726804293537e-06,
      "loss": 0.0086,
      "step": 2683540
    },
    {
      "epoch": 4.391704797627698,
      "grad_norm": 0.12509775161743164,
      "learning_rate": 1.2246067882158366e-06,
      "loss": 0.009,
      "step": 2683560
    },
    {
      "epoch": 4.3917375280663515,
      "grad_norm": 0.2872051000595093,
      "learning_rate": 1.2245408960023194e-06,
      "loss": 0.0091,
      "step": 2683580
    },
    {
      "epoch": 4.391770258505004,
      "grad_norm": 0.225351944565773,
      "learning_rate": 1.2244750037888023e-06,
      "loss": 0.0124,
      "step": 2683600
    },
    {
      "epoch": 4.391802988943658,
      "grad_norm": 0.23654915392398834,
      "learning_rate": 1.2244091115752853e-06,
      "loss": 0.0092,
      "step": 2683620
    },
    {
      "epoch": 4.391835719382311,
      "grad_norm": 0.19248944520950317,
      "learning_rate": 1.224343219361768e-06,
      "loss": 0.0082,
      "step": 2683640
    },
    {
      "epoch": 4.391868449820964,
      "grad_norm": 0.2876532971858978,
      "learning_rate": 1.224277327148251e-06,
      "loss": 0.0123,
      "step": 2683660
    },
    {
      "epoch": 4.391901180259618,
      "grad_norm": 0.10050006210803986,
      "learning_rate": 1.2242114349347337e-06,
      "loss": 0.0116,
      "step": 2683680
    },
    {
      "epoch": 4.391933910698271,
      "grad_norm": 0.10958985239267349,
      "learning_rate": 1.2241455427212167e-06,
      "loss": 0.0075,
      "step": 2683700
    },
    {
      "epoch": 4.391966641136925,
      "grad_norm": 0.23885418474674225,
      "learning_rate": 1.2240796505076996e-06,
      "loss": 0.0096,
      "step": 2683720
    },
    {
      "epoch": 4.391999371575578,
      "grad_norm": 0.15386562049388885,
      "learning_rate": 1.2240137582941824e-06,
      "loss": 0.0147,
      "step": 2683740
    },
    {
      "epoch": 4.392032102014231,
      "grad_norm": 0.16152916848659515,
      "learning_rate": 1.2239478660806653e-06,
      "loss": 0.0096,
      "step": 2683760
    },
    {
      "epoch": 4.392064832452885,
      "grad_norm": 0.08287844806909561,
      "learning_rate": 1.2238819738671483e-06,
      "loss": 0.0055,
      "step": 2683780
    },
    {
      "epoch": 4.3920975628915375,
      "grad_norm": 0.1270882785320282,
      "learning_rate": 1.223816081653631e-06,
      "loss": 0.0116,
      "step": 2683800
    },
    {
      "epoch": 4.392130293330191,
      "grad_norm": 0.07351557910442352,
      "learning_rate": 1.223750189440114e-06,
      "loss": 0.0096,
      "step": 2683820
    },
    {
      "epoch": 4.392163023768845,
      "grad_norm": 0.5418263077735901,
      "learning_rate": 1.2236842972265967e-06,
      "loss": 0.0119,
      "step": 2683840
    },
    {
      "epoch": 4.392195754207498,
      "grad_norm": 0.2612815201282501,
      "learning_rate": 1.2236184050130799e-06,
      "loss": 0.0075,
      "step": 2683860
    },
    {
      "epoch": 4.392228484646151,
      "grad_norm": 0.3385528326034546,
      "learning_rate": 1.2235525127995626e-06,
      "loss": 0.0108,
      "step": 2683880
    },
    {
      "epoch": 4.392261215084805,
      "grad_norm": 0.13723008334636688,
      "learning_rate": 1.2234866205860456e-06,
      "loss": 0.0116,
      "step": 2683900
    },
    {
      "epoch": 4.392293945523458,
      "grad_norm": 0.14798155426979065,
      "learning_rate": 1.2234207283725283e-06,
      "loss": 0.0059,
      "step": 2683920
    },
    {
      "epoch": 4.392326675962111,
      "grad_norm": 0.2339538335800171,
      "learning_rate": 1.223354836159011e-06,
      "loss": 0.0082,
      "step": 2683940
    },
    {
      "epoch": 4.3923594064007645,
      "grad_norm": 0.6632216572761536,
      "learning_rate": 1.2232889439454942e-06,
      "loss": 0.011,
      "step": 2683960
    },
    {
      "epoch": 4.392392136839418,
      "grad_norm": 0.4926770329475403,
      "learning_rate": 1.223223051731977e-06,
      "loss": 0.009,
      "step": 2683980
    },
    {
      "epoch": 4.392424867278072,
      "grad_norm": 0.1908583641052246,
      "learning_rate": 1.2231571595184599e-06,
      "loss": 0.0099,
      "step": 2684000
    },
    {
      "epoch": 4.392457597716724,
      "grad_norm": 0.36292919516563416,
      "learning_rate": 1.2230912673049426e-06,
      "loss": 0.0074,
      "step": 2684020
    },
    {
      "epoch": 4.392490328155378,
      "grad_norm": 0.47422733902931213,
      "learning_rate": 1.2230253750914256e-06,
      "loss": 0.0094,
      "step": 2684040
    },
    {
      "epoch": 4.392523058594032,
      "grad_norm": 0.4020082354545593,
      "learning_rate": 1.2229594828779085e-06,
      "loss": 0.0084,
      "step": 2684060
    },
    {
      "epoch": 4.392555789032684,
      "grad_norm": 0.31158381700515747,
      "learning_rate": 1.2228935906643913e-06,
      "loss": 0.0092,
      "step": 2684080
    },
    {
      "epoch": 4.392588519471338,
      "grad_norm": 0.1533764749765396,
      "learning_rate": 1.2228276984508742e-06,
      "loss": 0.0079,
      "step": 2684100
    },
    {
      "epoch": 4.3926212499099915,
      "grad_norm": 0.03881422057747841,
      "learning_rate": 1.222761806237357e-06,
      "loss": 0.0078,
      "step": 2684120
    },
    {
      "epoch": 4.392653980348644,
      "grad_norm": 0.09816155582666397,
      "learning_rate": 1.22269591402384e-06,
      "loss": 0.0097,
      "step": 2684140
    },
    {
      "epoch": 4.392686710787298,
      "grad_norm": 0.4820549488067627,
      "learning_rate": 1.2226300218103229e-06,
      "loss": 0.0124,
      "step": 2684160
    },
    {
      "epoch": 4.392719441225951,
      "grad_norm": 0.14050263166427612,
      "learning_rate": 1.2225641295968056e-06,
      "loss": 0.009,
      "step": 2684180
    },
    {
      "epoch": 4.392752171664605,
      "grad_norm": 0.29097479581832886,
      "learning_rate": 1.2224982373832886e-06,
      "loss": 0.0083,
      "step": 2684200
    },
    {
      "epoch": 4.392784902103258,
      "grad_norm": 0.6090983748435974,
      "learning_rate": 1.2224323451697715e-06,
      "loss": 0.0082,
      "step": 2684220
    },
    {
      "epoch": 4.392817632541911,
      "grad_norm": 0.4215409755706787,
      "learning_rate": 1.2223664529562542e-06,
      "loss": 0.0152,
      "step": 2684240
    },
    {
      "epoch": 4.392850362980565,
      "grad_norm": 0.30172961950302124,
      "learning_rate": 1.2223005607427372e-06,
      "loss": 0.0124,
      "step": 2684260
    },
    {
      "epoch": 4.3928830934192185,
      "grad_norm": 0.15032443404197693,
      "learning_rate": 1.22223466852922e-06,
      "loss": 0.0096,
      "step": 2684280
    },
    {
      "epoch": 4.392915823857871,
      "grad_norm": 0.21212150156497955,
      "learning_rate": 1.2221687763157029e-06,
      "loss": 0.0107,
      "step": 2684300
    },
    {
      "epoch": 4.392948554296525,
      "grad_norm": 0.8018659353256226,
      "learning_rate": 1.2221028841021858e-06,
      "loss": 0.0098,
      "step": 2684320
    },
    {
      "epoch": 4.392981284735178,
      "grad_norm": 0.4893840551376343,
      "learning_rate": 1.2220369918886686e-06,
      "loss": 0.008,
      "step": 2684340
    },
    {
      "epoch": 4.393014015173831,
      "grad_norm": 0.22577004134655,
      "learning_rate": 1.2219710996751515e-06,
      "loss": 0.0076,
      "step": 2684360
    },
    {
      "epoch": 4.393046745612485,
      "grad_norm": 0.1666528284549713,
      "learning_rate": 1.2219052074616343e-06,
      "loss": 0.0087,
      "step": 2684380
    },
    {
      "epoch": 4.393079476051138,
      "grad_norm": 0.08393606543540955,
      "learning_rate": 1.2218393152481172e-06,
      "loss": 0.0101,
      "step": 2684400
    },
    {
      "epoch": 4.393112206489791,
      "grad_norm": 0.16515974700450897,
      "learning_rate": 1.2217734230346002e-06,
      "loss": 0.0103,
      "step": 2684420
    },
    {
      "epoch": 4.3931449369284445,
      "grad_norm": 0.3679382801055908,
      "learning_rate": 1.221707530821083e-06,
      "loss": 0.0087,
      "step": 2684440
    },
    {
      "epoch": 4.393177667367098,
      "grad_norm": 0.12581796944141388,
      "learning_rate": 1.2216416386075659e-06,
      "loss": 0.0074,
      "step": 2684460
    },
    {
      "epoch": 4.393210397805752,
      "grad_norm": 0.1593499779701233,
      "learning_rate": 1.2215757463940488e-06,
      "loss": 0.0076,
      "step": 2684480
    },
    {
      "epoch": 4.393243128244404,
      "grad_norm": 0.3307301998138428,
      "learning_rate": 1.2215098541805316e-06,
      "loss": 0.0102,
      "step": 2684500
    },
    {
      "epoch": 4.393275858683058,
      "grad_norm": 0.1550767868757248,
      "learning_rate": 1.2214439619670145e-06,
      "loss": 0.0092,
      "step": 2684520
    },
    {
      "epoch": 4.393308589121712,
      "grad_norm": 0.09761473536491394,
      "learning_rate": 1.2213780697534972e-06,
      "loss": 0.0085,
      "step": 2684540
    },
    {
      "epoch": 4.393341319560364,
      "grad_norm": 0.05044788867235184,
      "learning_rate": 1.2213121775399802e-06,
      "loss": 0.0118,
      "step": 2684560
    },
    {
      "epoch": 4.393374049999018,
      "grad_norm": 0.1407213658094406,
      "learning_rate": 1.2212462853264632e-06,
      "loss": 0.0118,
      "step": 2684580
    },
    {
      "epoch": 4.3934067804376715,
      "grad_norm": 0.14554306864738464,
      "learning_rate": 1.2211803931129459e-06,
      "loss": 0.0131,
      "step": 2684600
    },
    {
      "epoch": 4.393439510876325,
      "grad_norm": 0.08868785947561264,
      "learning_rate": 1.2211145008994288e-06,
      "loss": 0.0085,
      "step": 2684620
    },
    {
      "epoch": 4.393472241314978,
      "grad_norm": 0.04447324946522713,
      "learning_rate": 1.2210486086859116e-06,
      "loss": 0.0087,
      "step": 2684640
    },
    {
      "epoch": 4.393504971753631,
      "grad_norm": 0.14260514080524445,
      "learning_rate": 1.2209827164723945e-06,
      "loss": 0.0127,
      "step": 2684660
    },
    {
      "epoch": 4.393537702192285,
      "grad_norm": 0.8792786002159119,
      "learning_rate": 1.2209168242588775e-06,
      "loss": 0.0105,
      "step": 2684680
    },
    {
      "epoch": 4.393570432630938,
      "grad_norm": 0.3911783993244171,
      "learning_rate": 1.2208509320453602e-06,
      "loss": 0.0092,
      "step": 2684700
    },
    {
      "epoch": 4.393603163069591,
      "grad_norm": 0.4146575331687927,
      "learning_rate": 1.2207850398318432e-06,
      "loss": 0.0071,
      "step": 2684720
    },
    {
      "epoch": 4.393635893508245,
      "grad_norm": 0.20206224918365479,
      "learning_rate": 1.2207191476183261e-06,
      "loss": 0.0092,
      "step": 2684740
    },
    {
      "epoch": 4.3936686239468985,
      "grad_norm": 0.33521151542663574,
      "learning_rate": 1.2206532554048089e-06,
      "loss": 0.0093,
      "step": 2684760
    },
    {
      "epoch": 4.393701354385551,
      "grad_norm": 0.2838490605354309,
      "learning_rate": 1.2205873631912918e-06,
      "loss": 0.0075,
      "step": 2684780
    },
    {
      "epoch": 4.393734084824205,
      "grad_norm": 0.6788820028305054,
      "learning_rate": 1.2205214709777746e-06,
      "loss": 0.0094,
      "step": 2684800
    },
    {
      "epoch": 4.393766815262858,
      "grad_norm": 0.11011723428964615,
      "learning_rate": 1.2204555787642575e-06,
      "loss": 0.0088,
      "step": 2684820
    },
    {
      "epoch": 4.393799545701511,
      "grad_norm": 0.6740853190422058,
      "learning_rate": 1.2203896865507405e-06,
      "loss": 0.0079,
      "step": 2684840
    },
    {
      "epoch": 4.393832276140165,
      "grad_norm": 0.471329927444458,
      "learning_rate": 1.2203237943372232e-06,
      "loss": 0.0112,
      "step": 2684860
    },
    {
      "epoch": 4.393865006578818,
      "grad_norm": 0.5809304118156433,
      "learning_rate": 1.2202579021237062e-06,
      "loss": 0.0087,
      "step": 2684880
    },
    {
      "epoch": 4.393897737017472,
      "grad_norm": 0.0408288910984993,
      "learning_rate": 1.2201920099101889e-06,
      "loss": 0.0099,
      "step": 2684900
    },
    {
      "epoch": 4.393930467456125,
      "grad_norm": 0.37009474635124207,
      "learning_rate": 1.2201261176966718e-06,
      "loss": 0.0099,
      "step": 2684920
    },
    {
      "epoch": 4.393963197894778,
      "grad_norm": 0.4239959716796875,
      "learning_rate": 1.2200602254831548e-06,
      "loss": 0.0081,
      "step": 2684940
    },
    {
      "epoch": 4.393995928333432,
      "grad_norm": 0.687611997127533,
      "learning_rate": 1.2199943332696375e-06,
      "loss": 0.017,
      "step": 2684960
    },
    {
      "epoch": 4.3940286587720845,
      "grad_norm": 0.18035206198692322,
      "learning_rate": 1.2199284410561205e-06,
      "loss": 0.0103,
      "step": 2684980
    },
    {
      "epoch": 4.394061389210738,
      "grad_norm": 0.3979664146900177,
      "learning_rate": 1.2198625488426032e-06,
      "loss": 0.0126,
      "step": 2685000
    },
    {
      "epoch": 4.394094119649392,
      "grad_norm": 0.08576424419879913,
      "learning_rate": 1.2197966566290862e-06,
      "loss": 0.0072,
      "step": 2685020
    },
    {
      "epoch": 4.394126850088045,
      "grad_norm": 0.7378429770469666,
      "learning_rate": 1.2197307644155691e-06,
      "loss": 0.0118,
      "step": 2685040
    },
    {
      "epoch": 4.394159580526698,
      "grad_norm": 0.2032434493303299,
      "learning_rate": 1.2196648722020519e-06,
      "loss": 0.0152,
      "step": 2685060
    },
    {
      "epoch": 4.394192310965352,
      "grad_norm": 0.1899988204240799,
      "learning_rate": 1.2195989799885348e-06,
      "loss": 0.0081,
      "step": 2685080
    },
    {
      "epoch": 4.394225041404005,
      "grad_norm": 0.5428319573402405,
      "learning_rate": 1.2195330877750178e-06,
      "loss": 0.0119,
      "step": 2685100
    },
    {
      "epoch": 4.394257771842658,
      "grad_norm": 0.4089304506778717,
      "learning_rate": 1.2194671955615005e-06,
      "loss": 0.0113,
      "step": 2685120
    },
    {
      "epoch": 4.3942905022813115,
      "grad_norm": 0.027636608108878136,
      "learning_rate": 1.2194013033479835e-06,
      "loss": 0.0113,
      "step": 2685140
    },
    {
      "epoch": 4.394323232719965,
      "grad_norm": 0.5678505301475525,
      "learning_rate": 1.2193354111344662e-06,
      "loss": 0.0121,
      "step": 2685160
    },
    {
      "epoch": 4.394355963158619,
      "grad_norm": 0.13984465599060059,
      "learning_rate": 1.2192695189209494e-06,
      "loss": 0.007,
      "step": 2685180
    },
    {
      "epoch": 4.394388693597271,
      "grad_norm": 0.31073087453842163,
      "learning_rate": 1.2192036267074321e-06,
      "loss": 0.0104,
      "step": 2685200
    },
    {
      "epoch": 4.394421424035925,
      "grad_norm": 0.2890399098396301,
      "learning_rate": 1.219137734493915e-06,
      "loss": 0.015,
      "step": 2685220
    },
    {
      "epoch": 4.3944541544745785,
      "grad_norm": 0.30196309089660645,
      "learning_rate": 1.2190718422803978e-06,
      "loss": 0.0091,
      "step": 2685240
    },
    {
      "epoch": 4.394486884913231,
      "grad_norm": 0.09470062702894211,
      "learning_rate": 1.2190059500668805e-06,
      "loss": 0.0074,
      "step": 2685260
    },
    {
      "epoch": 4.394519615351885,
      "grad_norm": 0.22639551758766174,
      "learning_rate": 1.2189400578533637e-06,
      "loss": 0.0054,
      "step": 2685280
    },
    {
      "epoch": 4.394552345790538,
      "grad_norm": 0.29216915369033813,
      "learning_rate": 1.2188741656398464e-06,
      "loss": 0.0087,
      "step": 2685300
    },
    {
      "epoch": 4.394585076229192,
      "grad_norm": 0.17670279741287231,
      "learning_rate": 1.2188082734263294e-06,
      "loss": 0.0091,
      "step": 2685320
    },
    {
      "epoch": 4.394617806667845,
      "grad_norm": 0.3745977580547333,
      "learning_rate": 1.2187423812128121e-06,
      "loss": 0.0094,
      "step": 2685340
    },
    {
      "epoch": 4.394650537106498,
      "grad_norm": 0.37780874967575073,
      "learning_rate": 1.218676488999295e-06,
      "loss": 0.0087,
      "step": 2685360
    },
    {
      "epoch": 4.394683267545152,
      "grad_norm": 0.1829063892364502,
      "learning_rate": 1.218610596785778e-06,
      "loss": 0.0104,
      "step": 2685380
    },
    {
      "epoch": 4.394715997983805,
      "grad_norm": 0.28101521730422974,
      "learning_rate": 1.2185447045722608e-06,
      "loss": 0.0065,
      "step": 2685400
    },
    {
      "epoch": 4.394748728422458,
      "grad_norm": 0.2287619411945343,
      "learning_rate": 1.2184788123587437e-06,
      "loss": 0.014,
      "step": 2685420
    },
    {
      "epoch": 4.394781458861112,
      "grad_norm": 0.11199299991130829,
      "learning_rate": 1.2184129201452267e-06,
      "loss": 0.0053,
      "step": 2685440
    },
    {
      "epoch": 4.394814189299765,
      "grad_norm": 0.15705667436122894,
      "learning_rate": 1.2183470279317094e-06,
      "loss": 0.0094,
      "step": 2685460
    },
    {
      "epoch": 4.394846919738418,
      "grad_norm": 0.17633908987045288,
      "learning_rate": 1.2182811357181924e-06,
      "loss": 0.0124,
      "step": 2685480
    },
    {
      "epoch": 4.394879650177072,
      "grad_norm": 0.15167778730392456,
      "learning_rate": 1.2182152435046751e-06,
      "loss": 0.0101,
      "step": 2685500
    },
    {
      "epoch": 4.394912380615725,
      "grad_norm": 0.18215954303741455,
      "learning_rate": 1.218149351291158e-06,
      "loss": 0.0121,
      "step": 2685520
    },
    {
      "epoch": 4.394945111054378,
      "grad_norm": 0.09514117240905762,
      "learning_rate": 1.218083459077641e-06,
      "loss": 0.013,
      "step": 2685540
    },
    {
      "epoch": 4.394977841493032,
      "grad_norm": 0.17319631576538086,
      "learning_rate": 1.2180175668641238e-06,
      "loss": 0.0061,
      "step": 2685560
    },
    {
      "epoch": 4.395010571931685,
      "grad_norm": 0.20113348960876465,
      "learning_rate": 1.2179516746506067e-06,
      "loss": 0.0107,
      "step": 2685580
    },
    {
      "epoch": 4.395043302370338,
      "grad_norm": 0.13972420990467072,
      "learning_rate": 1.2178857824370894e-06,
      "loss": 0.0083,
      "step": 2685600
    },
    {
      "epoch": 4.3950760328089915,
      "grad_norm": 0.5200448036193848,
      "learning_rate": 1.2178198902235724e-06,
      "loss": 0.0087,
      "step": 2685620
    },
    {
      "epoch": 4.395108763247645,
      "grad_norm": 0.25643953680992126,
      "learning_rate": 1.2177539980100553e-06,
      "loss": 0.0111,
      "step": 2685640
    },
    {
      "epoch": 4.395141493686299,
      "grad_norm": 0.411828875541687,
      "learning_rate": 1.217688105796538e-06,
      "loss": 0.0102,
      "step": 2685660
    },
    {
      "epoch": 4.395174224124951,
      "grad_norm": 0.23153550922870636,
      "learning_rate": 1.217622213583021e-06,
      "loss": 0.0068,
      "step": 2685680
    },
    {
      "epoch": 4.395206954563605,
      "grad_norm": 0.15139159560203552,
      "learning_rate": 1.2175563213695038e-06,
      "loss": 0.0127,
      "step": 2685700
    },
    {
      "epoch": 4.395239685002259,
      "grad_norm": 0.19063325226306915,
      "learning_rate": 1.2174904291559867e-06,
      "loss": 0.0079,
      "step": 2685720
    },
    {
      "epoch": 4.395272415440912,
      "grad_norm": 0.10941086709499359,
      "learning_rate": 1.2174245369424697e-06,
      "loss": 0.0081,
      "step": 2685740
    },
    {
      "epoch": 4.395305145879565,
      "grad_norm": 0.11488062143325806,
      "learning_rate": 1.2173586447289524e-06,
      "loss": 0.0062,
      "step": 2685760
    },
    {
      "epoch": 4.3953378763182185,
      "grad_norm": 0.15769512951374054,
      "learning_rate": 1.2172927525154354e-06,
      "loss": 0.0111,
      "step": 2685780
    },
    {
      "epoch": 4.395370606756872,
      "grad_norm": 0.15152110159397125,
      "learning_rate": 1.2172268603019183e-06,
      "loss": 0.0074,
      "step": 2685800
    },
    {
      "epoch": 4.395403337195525,
      "grad_norm": 0.5289398431777954,
      "learning_rate": 1.217160968088401e-06,
      "loss": 0.0122,
      "step": 2685820
    },
    {
      "epoch": 4.395436067634178,
      "grad_norm": 0.16773095726966858,
      "learning_rate": 1.217095075874884e-06,
      "loss": 0.0141,
      "step": 2685840
    },
    {
      "epoch": 4.395468798072832,
      "grad_norm": 0.2906998097896576,
      "learning_rate": 1.2170291836613668e-06,
      "loss": 0.0146,
      "step": 2685860
    },
    {
      "epoch": 4.395501528511485,
      "grad_norm": 0.21235410869121552,
      "learning_rate": 1.2169632914478497e-06,
      "loss": 0.017,
      "step": 2685880
    },
    {
      "epoch": 4.395534258950138,
      "grad_norm": 0.261916846036911,
      "learning_rate": 1.2168973992343327e-06,
      "loss": 0.0085,
      "step": 2685900
    },
    {
      "epoch": 4.395566989388792,
      "grad_norm": 0.30298224091529846,
      "learning_rate": 1.2168315070208154e-06,
      "loss": 0.0082,
      "step": 2685920
    },
    {
      "epoch": 4.3955997198274455,
      "grad_norm": 0.1832360327243805,
      "learning_rate": 1.2167656148072983e-06,
      "loss": 0.011,
      "step": 2685940
    },
    {
      "epoch": 4.395632450266098,
      "grad_norm": 0.13306526839733124,
      "learning_rate": 1.216699722593781e-06,
      "loss": 0.01,
      "step": 2685960
    },
    {
      "epoch": 4.395665180704752,
      "grad_norm": 0.1340281218290329,
      "learning_rate": 1.216633830380264e-06,
      "loss": 0.0076,
      "step": 2685980
    },
    {
      "epoch": 4.395697911143405,
      "grad_norm": 0.3970368504524231,
      "learning_rate": 1.216567938166747e-06,
      "loss": 0.0102,
      "step": 2686000
    },
    {
      "epoch": 4.395730641582059,
      "grad_norm": 0.20160843431949615,
      "learning_rate": 1.2165020459532297e-06,
      "loss": 0.0073,
      "step": 2686020
    },
    {
      "epoch": 4.395763372020712,
      "grad_norm": 0.40317079424858093,
      "learning_rate": 1.2164361537397127e-06,
      "loss": 0.0078,
      "step": 2686040
    },
    {
      "epoch": 4.395796102459365,
      "grad_norm": 0.17949631810188293,
      "learning_rate": 1.2163702615261956e-06,
      "loss": 0.0091,
      "step": 2686060
    },
    {
      "epoch": 4.395828832898019,
      "grad_norm": 0.05355178192257881,
      "learning_rate": 1.2163043693126784e-06,
      "loss": 0.0076,
      "step": 2686080
    },
    {
      "epoch": 4.3958615633366716,
      "grad_norm": 0.26594921946525574,
      "learning_rate": 1.2162384770991613e-06,
      "loss": 0.0103,
      "step": 2686100
    },
    {
      "epoch": 4.395894293775325,
      "grad_norm": 0.38205134868621826,
      "learning_rate": 1.216172584885644e-06,
      "loss": 0.0104,
      "step": 2686120
    },
    {
      "epoch": 4.395927024213979,
      "grad_norm": 0.27693235874176025,
      "learning_rate": 1.216106692672127e-06,
      "loss": 0.0108,
      "step": 2686140
    },
    {
      "epoch": 4.3959597546526314,
      "grad_norm": 0.22157545387744904,
      "learning_rate": 1.21604080045861e-06,
      "loss": 0.0111,
      "step": 2686160
    },
    {
      "epoch": 4.395992485091285,
      "grad_norm": 0.23602887988090515,
      "learning_rate": 1.2159749082450927e-06,
      "loss": 0.0071,
      "step": 2686180
    },
    {
      "epoch": 4.396025215529939,
      "grad_norm": 0.057253796607255936,
      "learning_rate": 1.2159090160315757e-06,
      "loss": 0.0071,
      "step": 2686200
    },
    {
      "epoch": 4.396057945968592,
      "grad_norm": 0.08208160847425461,
      "learning_rate": 1.2158431238180584e-06,
      "loss": 0.0097,
      "step": 2686220
    },
    {
      "epoch": 4.396090676407245,
      "grad_norm": 0.19468016922473907,
      "learning_rate": 1.2157772316045413e-06,
      "loss": 0.0062,
      "step": 2686240
    },
    {
      "epoch": 4.3961234068458985,
      "grad_norm": 0.1275988221168518,
      "learning_rate": 1.2157113393910243e-06,
      "loss": 0.0072,
      "step": 2686260
    },
    {
      "epoch": 4.396156137284552,
      "grad_norm": 0.4215734004974365,
      "learning_rate": 1.215645447177507e-06,
      "loss": 0.0091,
      "step": 2686280
    },
    {
      "epoch": 4.396188867723205,
      "grad_norm": 0.0946902260184288,
      "learning_rate": 1.21557955496399e-06,
      "loss": 0.0063,
      "step": 2686300
    },
    {
      "epoch": 4.396221598161858,
      "grad_norm": 0.132740318775177,
      "learning_rate": 1.215513662750473e-06,
      "loss": 0.0107,
      "step": 2686320
    },
    {
      "epoch": 4.396254328600512,
      "grad_norm": 0.17304857075214386,
      "learning_rate": 1.2154477705369557e-06,
      "loss": 0.0098,
      "step": 2686340
    },
    {
      "epoch": 4.396287059039166,
      "grad_norm": 0.2729671597480774,
      "learning_rate": 1.2153818783234386e-06,
      "loss": 0.0081,
      "step": 2686360
    },
    {
      "epoch": 4.396319789477818,
      "grad_norm": 0.21108701825141907,
      "learning_rate": 1.2153159861099214e-06,
      "loss": 0.0091,
      "step": 2686380
    },
    {
      "epoch": 4.396352519916472,
      "grad_norm": 0.2892322242259979,
      "learning_rate": 1.2152500938964043e-06,
      "loss": 0.0063,
      "step": 2686400
    },
    {
      "epoch": 4.3963852503551255,
      "grad_norm": 0.22135961055755615,
      "learning_rate": 1.2151842016828873e-06,
      "loss": 0.01,
      "step": 2686420
    },
    {
      "epoch": 4.396417980793778,
      "grad_norm": 0.19518762826919556,
      "learning_rate": 1.21511830946937e-06,
      "loss": 0.0104,
      "step": 2686440
    },
    {
      "epoch": 4.396450711232432,
      "grad_norm": 0.09717855602502823,
      "learning_rate": 1.215052417255853e-06,
      "loss": 0.0098,
      "step": 2686460
    },
    {
      "epoch": 4.396483441671085,
      "grad_norm": 0.31440281867980957,
      "learning_rate": 1.2149865250423357e-06,
      "loss": 0.0084,
      "step": 2686480
    },
    {
      "epoch": 4.396516172109739,
      "grad_norm": 0.5355858206748962,
      "learning_rate": 1.2149206328288189e-06,
      "loss": 0.0092,
      "step": 2686500
    },
    {
      "epoch": 4.396548902548392,
      "grad_norm": 0.09754358232021332,
      "learning_rate": 1.2148547406153016e-06,
      "loss": 0.009,
      "step": 2686520
    },
    {
      "epoch": 4.396581632987045,
      "grad_norm": 0.33441147208213806,
      "learning_rate": 1.2147888484017846e-06,
      "loss": 0.0128,
      "step": 2686540
    },
    {
      "epoch": 4.396614363425699,
      "grad_norm": 0.22138923406600952,
      "learning_rate": 1.2147229561882673e-06,
      "loss": 0.0132,
      "step": 2686560
    },
    {
      "epoch": 4.396647093864352,
      "grad_norm": 0.14640355110168457,
      "learning_rate": 1.21465706397475e-06,
      "loss": 0.0074,
      "step": 2686580
    },
    {
      "epoch": 4.396679824303005,
      "grad_norm": 0.6291572451591492,
      "learning_rate": 1.2145911717612332e-06,
      "loss": 0.014,
      "step": 2686600
    },
    {
      "epoch": 4.396712554741659,
      "grad_norm": 0.2687976360321045,
      "learning_rate": 1.214525279547716e-06,
      "loss": 0.0084,
      "step": 2686620
    },
    {
      "epoch": 4.396745285180312,
      "grad_norm": 0.15240806341171265,
      "learning_rate": 1.214459387334199e-06,
      "loss": 0.0113,
      "step": 2686640
    },
    {
      "epoch": 4.396778015618965,
      "grad_norm": 0.12326120585203171,
      "learning_rate": 1.2143934951206816e-06,
      "loss": 0.0063,
      "step": 2686660
    },
    {
      "epoch": 4.396810746057619,
      "grad_norm": 0.07243454456329346,
      "learning_rate": 1.2143276029071646e-06,
      "loss": 0.0107,
      "step": 2686680
    },
    {
      "epoch": 4.396843476496272,
      "grad_norm": 0.09231147170066833,
      "learning_rate": 1.2142617106936475e-06,
      "loss": 0.0089,
      "step": 2686700
    },
    {
      "epoch": 4.396876206934925,
      "grad_norm": 0.1380157470703125,
      "learning_rate": 1.2141958184801303e-06,
      "loss": 0.0057,
      "step": 2686720
    },
    {
      "epoch": 4.396908937373579,
      "grad_norm": 0.47149109840393066,
      "learning_rate": 1.2141299262666132e-06,
      "loss": 0.0084,
      "step": 2686740
    },
    {
      "epoch": 4.396941667812232,
      "grad_norm": 0.22672468423843384,
      "learning_rate": 1.2140640340530962e-06,
      "loss": 0.0074,
      "step": 2686760
    },
    {
      "epoch": 4.396974398250886,
      "grad_norm": 0.15596681833267212,
      "learning_rate": 1.213998141839579e-06,
      "loss": 0.0113,
      "step": 2686780
    },
    {
      "epoch": 4.3970071286895385,
      "grad_norm": 0.5189408659934998,
      "learning_rate": 1.2139322496260619e-06,
      "loss": 0.0065,
      "step": 2686800
    },
    {
      "epoch": 4.397039859128192,
      "grad_norm": 0.24035872519016266,
      "learning_rate": 1.2138663574125446e-06,
      "loss": 0.005,
      "step": 2686820
    },
    {
      "epoch": 4.397072589566846,
      "grad_norm": 0.2979322373867035,
      "learning_rate": 1.2138004651990276e-06,
      "loss": 0.0108,
      "step": 2686840
    },
    {
      "epoch": 4.397105320005498,
      "grad_norm": 0.23935802280902863,
      "learning_rate": 1.2137345729855105e-06,
      "loss": 0.0073,
      "step": 2686860
    },
    {
      "epoch": 4.397138050444152,
      "grad_norm": 0.7610305547714233,
      "learning_rate": 1.2136686807719933e-06,
      "loss": 0.008,
      "step": 2686880
    },
    {
      "epoch": 4.3971707808828056,
      "grad_norm": 0.3776581287384033,
      "learning_rate": 1.2136027885584762e-06,
      "loss": 0.0113,
      "step": 2686900
    },
    {
      "epoch": 4.397203511321459,
      "grad_norm": 0.3187156915664673,
      "learning_rate": 1.213536896344959e-06,
      "loss": 0.0067,
      "step": 2686920
    },
    {
      "epoch": 4.397236241760112,
      "grad_norm": 0.2495560646057129,
      "learning_rate": 1.213471004131442e-06,
      "loss": 0.0067,
      "step": 2686940
    },
    {
      "epoch": 4.3972689721987654,
      "grad_norm": 0.21923920512199402,
      "learning_rate": 1.2134051119179249e-06,
      "loss": 0.0066,
      "step": 2686960
    },
    {
      "epoch": 4.397301702637419,
      "grad_norm": 0.44774696230888367,
      "learning_rate": 1.2133392197044076e-06,
      "loss": 0.0083,
      "step": 2686980
    },
    {
      "epoch": 4.397334433076072,
      "grad_norm": 0.11928381025791168,
      "learning_rate": 1.2132733274908905e-06,
      "loss": 0.008,
      "step": 2687000
    },
    {
      "epoch": 4.397367163514725,
      "grad_norm": 0.23000383377075195,
      "learning_rate": 1.2132074352773733e-06,
      "loss": 0.0114,
      "step": 2687020
    },
    {
      "epoch": 4.397399893953379,
      "grad_norm": 0.04320961609482765,
      "learning_rate": 1.2131415430638562e-06,
      "loss": 0.0076,
      "step": 2687040
    },
    {
      "epoch": 4.397432624392032,
      "grad_norm": 0.49307918548583984,
      "learning_rate": 1.2130756508503392e-06,
      "loss": 0.0057,
      "step": 2687060
    },
    {
      "epoch": 4.397465354830685,
      "grad_norm": 0.07113152742385864,
      "learning_rate": 1.213009758636822e-06,
      "loss": 0.0093,
      "step": 2687080
    },
    {
      "epoch": 4.397498085269339,
      "grad_norm": 0.5703151822090149,
      "learning_rate": 1.2129438664233049e-06,
      "loss": 0.0135,
      "step": 2687100
    },
    {
      "epoch": 4.397530815707992,
      "grad_norm": 0.1335262805223465,
      "learning_rate": 1.2128779742097878e-06,
      "loss": 0.0079,
      "step": 2687120
    },
    {
      "epoch": 4.397563546146645,
      "grad_norm": 0.14525696635246277,
      "learning_rate": 1.2128120819962706e-06,
      "loss": 0.0067,
      "step": 2687140
    },
    {
      "epoch": 4.397596276585299,
      "grad_norm": 0.08189623057842255,
      "learning_rate": 1.2127461897827535e-06,
      "loss": 0.0104,
      "step": 2687160
    },
    {
      "epoch": 4.397629007023952,
      "grad_norm": 0.22120459377765656,
      "learning_rate": 1.2126802975692363e-06,
      "loss": 0.0086,
      "step": 2687180
    },
    {
      "epoch": 4.397661737462606,
      "grad_norm": 0.20808689296245575,
      "learning_rate": 1.2126144053557192e-06,
      "loss": 0.0076,
      "step": 2687200
    },
    {
      "epoch": 4.397694467901259,
      "grad_norm": 0.37135425209999084,
      "learning_rate": 1.2125485131422022e-06,
      "loss": 0.0093,
      "step": 2687220
    },
    {
      "epoch": 4.397727198339912,
      "grad_norm": 0.2186373472213745,
      "learning_rate": 1.212482620928685e-06,
      "loss": 0.0112,
      "step": 2687240
    },
    {
      "epoch": 4.397759928778566,
      "grad_norm": 0.35147741436958313,
      "learning_rate": 1.2124167287151679e-06,
      "loss": 0.0123,
      "step": 2687260
    },
    {
      "epoch": 4.3977926592172185,
      "grad_norm": 0.05554278939962387,
      "learning_rate": 1.2123508365016506e-06,
      "loss": 0.0112,
      "step": 2687280
    },
    {
      "epoch": 4.397825389655872,
      "grad_norm": 0.4021327495574951,
      "learning_rate": 1.2122849442881335e-06,
      "loss": 0.0076,
      "step": 2687300
    },
    {
      "epoch": 4.397858120094526,
      "grad_norm": 0.12642177939414978,
      "learning_rate": 1.2122190520746165e-06,
      "loss": 0.0067,
      "step": 2687320
    },
    {
      "epoch": 4.397890850533178,
      "grad_norm": 0.23315608501434326,
      "learning_rate": 1.2121531598610992e-06,
      "loss": 0.0102,
      "step": 2687340
    },
    {
      "epoch": 4.397923580971832,
      "grad_norm": 0.20935818552970886,
      "learning_rate": 1.2120872676475822e-06,
      "loss": 0.0112,
      "step": 2687360
    },
    {
      "epoch": 4.397956311410486,
      "grad_norm": 0.17497840523719788,
      "learning_rate": 1.2120213754340651e-06,
      "loss": 0.0095,
      "step": 2687380
    },
    {
      "epoch": 4.397989041849139,
      "grad_norm": 0.21820269525051117,
      "learning_rate": 1.2119554832205479e-06,
      "loss": 0.0092,
      "step": 2687400
    },
    {
      "epoch": 4.398021772287792,
      "grad_norm": 0.2055552452802658,
      "learning_rate": 1.2118895910070308e-06,
      "loss": 0.0148,
      "step": 2687420
    },
    {
      "epoch": 4.3980545027264455,
      "grad_norm": 0.08525407314300537,
      "learning_rate": 1.2118236987935136e-06,
      "loss": 0.0106,
      "step": 2687440
    },
    {
      "epoch": 4.398087233165099,
      "grad_norm": 0.2378547340631485,
      "learning_rate": 1.2117578065799965e-06,
      "loss": 0.0084,
      "step": 2687460
    },
    {
      "epoch": 4.398119963603753,
      "grad_norm": 0.42725425958633423,
      "learning_rate": 1.2116919143664795e-06,
      "loss": 0.0103,
      "step": 2687480
    },
    {
      "epoch": 4.398152694042405,
      "grad_norm": 0.11901474744081497,
      "learning_rate": 1.2116260221529622e-06,
      "loss": 0.0101,
      "step": 2687500
    },
    {
      "epoch": 4.398185424481059,
      "grad_norm": 0.05612282454967499,
      "learning_rate": 1.2115601299394452e-06,
      "loss": 0.0106,
      "step": 2687520
    },
    {
      "epoch": 4.398218154919713,
      "grad_norm": 0.20265354216098785,
      "learning_rate": 1.211494237725928e-06,
      "loss": 0.006,
      "step": 2687540
    },
    {
      "epoch": 4.398250885358365,
      "grad_norm": 0.13166935741901398,
      "learning_rate": 1.2114283455124109e-06,
      "loss": 0.0081,
      "step": 2687560
    },
    {
      "epoch": 4.398283615797019,
      "grad_norm": 0.04930755868554115,
      "learning_rate": 1.2113624532988938e-06,
      "loss": 0.0101,
      "step": 2687580
    },
    {
      "epoch": 4.3983163462356725,
      "grad_norm": 0.19158682227134705,
      "learning_rate": 1.2112965610853765e-06,
      "loss": 0.0085,
      "step": 2687600
    },
    {
      "epoch": 4.398349076674325,
      "grad_norm": 0.2653065621852875,
      "learning_rate": 1.2112306688718595e-06,
      "loss": 0.0077,
      "step": 2687620
    },
    {
      "epoch": 4.398381807112979,
      "grad_norm": 0.1401437669992447,
      "learning_rate": 1.2111647766583424e-06,
      "loss": 0.0102,
      "step": 2687640
    },
    {
      "epoch": 4.398414537551632,
      "grad_norm": 0.3313012421131134,
      "learning_rate": 1.2110988844448252e-06,
      "loss": 0.0101,
      "step": 2687660
    },
    {
      "epoch": 4.398447267990286,
      "grad_norm": 0.6798673868179321,
      "learning_rate": 1.2110329922313081e-06,
      "loss": 0.0155,
      "step": 2687680
    },
    {
      "epoch": 4.398479998428939,
      "grad_norm": 0.16445733606815338,
      "learning_rate": 1.2109671000177909e-06,
      "loss": 0.0079,
      "step": 2687700
    },
    {
      "epoch": 4.398512728867592,
      "grad_norm": 0.16990162432193756,
      "learning_rate": 1.2109012078042738e-06,
      "loss": 0.0114,
      "step": 2687720
    },
    {
      "epoch": 4.398545459306246,
      "grad_norm": 0.7944162487983704,
      "learning_rate": 1.2108353155907568e-06,
      "loss": 0.011,
      "step": 2687740
    },
    {
      "epoch": 4.398578189744899,
      "grad_norm": 0.17376156151294708,
      "learning_rate": 1.2107694233772395e-06,
      "loss": 0.0059,
      "step": 2687760
    },
    {
      "epoch": 4.398610920183552,
      "grad_norm": 0.17828026413917542,
      "learning_rate": 1.2107035311637225e-06,
      "loss": 0.0105,
      "step": 2687780
    },
    {
      "epoch": 4.398643650622206,
      "grad_norm": 0.050760410726070404,
      "learning_rate": 1.2106376389502052e-06,
      "loss": 0.0055,
      "step": 2687800
    },
    {
      "epoch": 4.398676381060859,
      "grad_norm": 0.0921824723482132,
      "learning_rate": 1.2105717467366884e-06,
      "loss": 0.0071,
      "step": 2687820
    },
    {
      "epoch": 4.398709111499512,
      "grad_norm": 0.1717800796031952,
      "learning_rate": 1.2105058545231711e-06,
      "loss": 0.007,
      "step": 2687840
    },
    {
      "epoch": 4.398741841938166,
      "grad_norm": 0.22145302593708038,
      "learning_rate": 1.210439962309654e-06,
      "loss": 0.0083,
      "step": 2687860
    },
    {
      "epoch": 4.398774572376819,
      "grad_norm": 0.19189894199371338,
      "learning_rate": 1.2103740700961368e-06,
      "loss": 0.0094,
      "step": 2687880
    },
    {
      "epoch": 4.398807302815472,
      "grad_norm": 0.3165581524372101,
      "learning_rate": 1.2103081778826195e-06,
      "loss": 0.0079,
      "step": 2687900
    },
    {
      "epoch": 4.3988400332541255,
      "grad_norm": 0.12690533697605133,
      "learning_rate": 1.2102422856691027e-06,
      "loss": 0.0062,
      "step": 2687920
    },
    {
      "epoch": 4.398872763692779,
      "grad_norm": 0.6491551995277405,
      "learning_rate": 1.2101763934555855e-06,
      "loss": 0.0089,
      "step": 2687940
    },
    {
      "epoch": 4.398905494131433,
      "grad_norm": 0.28169670701026917,
      "learning_rate": 1.2101105012420684e-06,
      "loss": 0.009,
      "step": 2687960
    },
    {
      "epoch": 4.398938224570085,
      "grad_norm": 0.4182243347167969,
      "learning_rate": 1.2100446090285511e-06,
      "loss": 0.0073,
      "step": 2687980
    },
    {
      "epoch": 4.398970955008739,
      "grad_norm": 0.3674106001853943,
      "learning_rate": 1.209978716815034e-06,
      "loss": 0.0066,
      "step": 2688000
    },
    {
      "epoch": 4.399003685447393,
      "grad_norm": 0.15931183099746704,
      "learning_rate": 1.209912824601517e-06,
      "loss": 0.0083,
      "step": 2688020
    },
    {
      "epoch": 4.399036415886045,
      "grad_norm": 0.6970563530921936,
      "learning_rate": 1.2098469323879998e-06,
      "loss": 0.0076,
      "step": 2688040
    },
    {
      "epoch": 4.399069146324699,
      "grad_norm": 0.15503133833408356,
      "learning_rate": 1.2097810401744827e-06,
      "loss": 0.006,
      "step": 2688060
    },
    {
      "epoch": 4.3991018767633525,
      "grad_norm": 0.13594293594360352,
      "learning_rate": 1.2097151479609657e-06,
      "loss": 0.0087,
      "step": 2688080
    },
    {
      "epoch": 4.399134607202006,
      "grad_norm": 0.19259963929653168,
      "learning_rate": 1.2096492557474484e-06,
      "loss": 0.0071,
      "step": 2688100
    },
    {
      "epoch": 4.399167337640659,
      "grad_norm": 0.1010715514421463,
      "learning_rate": 1.2095833635339314e-06,
      "loss": 0.008,
      "step": 2688120
    },
    {
      "epoch": 4.399200068079312,
      "grad_norm": 0.255428671836853,
      "learning_rate": 1.2095174713204141e-06,
      "loss": 0.0099,
      "step": 2688140
    },
    {
      "epoch": 4.399232798517966,
      "grad_norm": 0.2680053114891052,
      "learning_rate": 1.209451579106897e-06,
      "loss": 0.0114,
      "step": 2688160
    },
    {
      "epoch": 4.399265528956619,
      "grad_norm": 0.1714124232530594,
      "learning_rate": 1.20938568689338e-06,
      "loss": 0.0049,
      "step": 2688180
    },
    {
      "epoch": 4.399298259395272,
      "grad_norm": 0.26208439469337463,
      "learning_rate": 1.2093197946798628e-06,
      "loss": 0.0074,
      "step": 2688200
    },
    {
      "epoch": 4.399330989833926,
      "grad_norm": 0.21572168171405792,
      "learning_rate": 1.2092539024663457e-06,
      "loss": 0.0119,
      "step": 2688220
    },
    {
      "epoch": 4.3993637202725795,
      "grad_norm": 0.32101500034332275,
      "learning_rate": 1.2091880102528285e-06,
      "loss": 0.0064,
      "step": 2688240
    },
    {
      "epoch": 4.399396450711232,
      "grad_norm": 0.36138930916786194,
      "learning_rate": 1.2091221180393114e-06,
      "loss": 0.0099,
      "step": 2688260
    },
    {
      "epoch": 4.399429181149886,
      "grad_norm": 0.36979424953460693,
      "learning_rate": 1.2090562258257944e-06,
      "loss": 0.009,
      "step": 2688280
    },
    {
      "epoch": 4.399461911588539,
      "grad_norm": 0.3815266489982605,
      "learning_rate": 1.208990333612277e-06,
      "loss": 0.0098,
      "step": 2688300
    },
    {
      "epoch": 4.399494642027192,
      "grad_norm": 0.34526464343070984,
      "learning_rate": 1.20892444139876e-06,
      "loss": 0.0099,
      "step": 2688320
    },
    {
      "epoch": 4.399527372465846,
      "grad_norm": 0.22580356895923615,
      "learning_rate": 1.2088585491852428e-06,
      "loss": 0.0089,
      "step": 2688340
    },
    {
      "epoch": 4.399560102904499,
      "grad_norm": 0.3654707670211792,
      "learning_rate": 1.2087926569717257e-06,
      "loss": 0.0061,
      "step": 2688360
    },
    {
      "epoch": 4.399592833343153,
      "grad_norm": 0.07855218648910522,
      "learning_rate": 1.2087267647582087e-06,
      "loss": 0.0082,
      "step": 2688380
    },
    {
      "epoch": 4.399625563781806,
      "grad_norm": 0.14762699604034424,
      "learning_rate": 1.2086608725446914e-06,
      "loss": 0.0059,
      "step": 2688400
    },
    {
      "epoch": 4.399658294220459,
      "grad_norm": 0.0984513908624649,
      "learning_rate": 1.2085949803311744e-06,
      "loss": 0.0074,
      "step": 2688420
    },
    {
      "epoch": 4.399691024659113,
      "grad_norm": 0.26244643330574036,
      "learning_rate": 1.2085290881176573e-06,
      "loss": 0.0094,
      "step": 2688440
    },
    {
      "epoch": 4.3997237550977655,
      "grad_norm": 0.2031886875629425,
      "learning_rate": 1.20846319590414e-06,
      "loss": 0.0112,
      "step": 2688460
    },
    {
      "epoch": 4.399756485536419,
      "grad_norm": 0.08476865291595459,
      "learning_rate": 1.208397303690623e-06,
      "loss": 0.0089,
      "step": 2688480
    },
    {
      "epoch": 4.399789215975073,
      "grad_norm": 0.2183593064546585,
      "learning_rate": 1.2083314114771058e-06,
      "loss": 0.0128,
      "step": 2688500
    },
    {
      "epoch": 4.399821946413725,
      "grad_norm": 0.18404284119606018,
      "learning_rate": 1.2082655192635887e-06,
      "loss": 0.0077,
      "step": 2688520
    },
    {
      "epoch": 4.399854676852379,
      "grad_norm": 0.5434592366218567,
      "learning_rate": 1.2081996270500717e-06,
      "loss": 0.0149,
      "step": 2688540
    },
    {
      "epoch": 4.399887407291033,
      "grad_norm": 0.14191068708896637,
      "learning_rate": 1.2081337348365544e-06,
      "loss": 0.0097,
      "step": 2688560
    },
    {
      "epoch": 4.399920137729686,
      "grad_norm": 0.2626549303531647,
      "learning_rate": 1.2080678426230374e-06,
      "loss": 0.0066,
      "step": 2688580
    },
    {
      "epoch": 4.399952868168339,
      "grad_norm": 0.10625007748603821,
      "learning_rate": 1.20800195040952e-06,
      "loss": 0.0072,
      "step": 2688600
    },
    {
      "epoch": 4.3999855986069925,
      "grad_norm": 0.34457266330718994,
      "learning_rate": 1.207936058196003e-06,
      "loss": 0.0066,
      "step": 2688620
    },
    {
      "epoch": 4.400018329045646,
      "grad_norm": 0.10712811350822449,
      "learning_rate": 1.207870165982486e-06,
      "loss": 0.0115,
      "step": 2688640
    },
    {
      "epoch": 4.4000510594843,
      "grad_norm": 0.24322615563869476,
      "learning_rate": 1.2078042737689687e-06,
      "loss": 0.0058,
      "step": 2688660
    },
    {
      "epoch": 4.400083789922952,
      "grad_norm": 0.3509339392185211,
      "learning_rate": 1.2077383815554517e-06,
      "loss": 0.0078,
      "step": 2688680
    },
    {
      "epoch": 4.400116520361606,
      "grad_norm": 0.19648510217666626,
      "learning_rate": 1.2076724893419346e-06,
      "loss": 0.0081,
      "step": 2688700
    },
    {
      "epoch": 4.4001492508002595,
      "grad_norm": 0.3699682056903839,
      "learning_rate": 1.2076065971284174e-06,
      "loss": 0.0117,
      "step": 2688720
    },
    {
      "epoch": 4.400181981238912,
      "grad_norm": 0.3258121907711029,
      "learning_rate": 1.2075407049149003e-06,
      "loss": 0.007,
      "step": 2688740
    },
    {
      "epoch": 4.400214711677566,
      "grad_norm": 0.2242903709411621,
      "learning_rate": 1.207474812701383e-06,
      "loss": 0.0051,
      "step": 2688760
    },
    {
      "epoch": 4.400247442116219,
      "grad_norm": 0.22104550898075104,
      "learning_rate": 1.207408920487866e-06,
      "loss": 0.01,
      "step": 2688780
    },
    {
      "epoch": 4.400280172554872,
      "grad_norm": 0.12098925560712814,
      "learning_rate": 1.207343028274349e-06,
      "loss": 0.0065,
      "step": 2688800
    },
    {
      "epoch": 4.400312902993526,
      "grad_norm": 0.5119901299476624,
      "learning_rate": 1.2072771360608317e-06,
      "loss": 0.0112,
      "step": 2688820
    },
    {
      "epoch": 4.400345633432179,
      "grad_norm": 0.38694342970848083,
      "learning_rate": 1.2072112438473147e-06,
      "loss": 0.0107,
      "step": 2688840
    },
    {
      "epoch": 4.400378363870833,
      "grad_norm": 0.21710897982120514,
      "learning_rate": 1.2071453516337974e-06,
      "loss": 0.0091,
      "step": 2688860
    },
    {
      "epoch": 4.400411094309486,
      "grad_norm": 0.22355490922927856,
      "learning_rate": 1.2070794594202804e-06,
      "loss": 0.0051,
      "step": 2688880
    },
    {
      "epoch": 4.400443824748139,
      "grad_norm": 0.25575515627861023,
      "learning_rate": 1.2070135672067633e-06,
      "loss": 0.0118,
      "step": 2688900
    },
    {
      "epoch": 4.400476555186793,
      "grad_norm": 0.19938895106315613,
      "learning_rate": 1.206947674993246e-06,
      "loss": 0.0084,
      "step": 2688920
    },
    {
      "epoch": 4.400509285625446,
      "grad_norm": 0.43328890204429626,
      "learning_rate": 1.206881782779729e-06,
      "loss": 0.0091,
      "step": 2688940
    },
    {
      "epoch": 4.400542016064099,
      "grad_norm": 0.1822817623615265,
      "learning_rate": 1.206815890566212e-06,
      "loss": 0.0086,
      "step": 2688960
    },
    {
      "epoch": 4.400574746502753,
      "grad_norm": 0.16557128727436066,
      "learning_rate": 1.2067499983526947e-06,
      "loss": 0.0073,
      "step": 2688980
    },
    {
      "epoch": 4.400607476941406,
      "grad_norm": 0.5457091331481934,
      "learning_rate": 1.2066841061391776e-06,
      "loss": 0.0123,
      "step": 2689000
    },
    {
      "epoch": 4.400640207380059,
      "grad_norm": 0.045578956604003906,
      "learning_rate": 1.2066182139256604e-06,
      "loss": 0.0109,
      "step": 2689020
    },
    {
      "epoch": 4.400672937818713,
      "grad_norm": 0.6763023138046265,
      "learning_rate": 1.2065523217121433e-06,
      "loss": 0.0105,
      "step": 2689040
    },
    {
      "epoch": 4.400705668257366,
      "grad_norm": 0.38790634274482727,
      "learning_rate": 1.2064864294986263e-06,
      "loss": 0.0105,
      "step": 2689060
    },
    {
      "epoch": 4.400738398696019,
      "grad_norm": 0.08103910833597183,
      "learning_rate": 1.206420537285109e-06,
      "loss": 0.0053,
      "step": 2689080
    },
    {
      "epoch": 4.4007711291346725,
      "grad_norm": 0.157232403755188,
      "learning_rate": 1.206354645071592e-06,
      "loss": 0.0087,
      "step": 2689100
    },
    {
      "epoch": 4.400803859573326,
      "grad_norm": 0.19357722997665405,
      "learning_rate": 1.2062887528580747e-06,
      "loss": 0.0124,
      "step": 2689120
    },
    {
      "epoch": 4.40083659001198,
      "grad_norm": 0.06838799268007278,
      "learning_rate": 1.2062228606445579e-06,
      "loss": 0.0058,
      "step": 2689140
    },
    {
      "epoch": 4.400869320450632,
      "grad_norm": 0.21253645420074463,
      "learning_rate": 1.2061569684310406e-06,
      "loss": 0.0066,
      "step": 2689160
    },
    {
      "epoch": 4.400902050889286,
      "grad_norm": 0.10605953633785248,
      "learning_rate": 1.2060910762175236e-06,
      "loss": 0.0047,
      "step": 2689180
    },
    {
      "epoch": 4.40093478132794,
      "grad_norm": 0.30874669551849365,
      "learning_rate": 1.2060251840040063e-06,
      "loss": 0.0067,
      "step": 2689200
    },
    {
      "epoch": 4.400967511766592,
      "grad_norm": 0.41834691166877747,
      "learning_rate": 1.205959291790489e-06,
      "loss": 0.0067,
      "step": 2689220
    },
    {
      "epoch": 4.401000242205246,
      "grad_norm": 0.349225252866745,
      "learning_rate": 1.2058933995769722e-06,
      "loss": 0.008,
      "step": 2689240
    },
    {
      "epoch": 4.4010329726438995,
      "grad_norm": 0.12260212004184723,
      "learning_rate": 1.205827507363455e-06,
      "loss": 0.008,
      "step": 2689260
    },
    {
      "epoch": 4.401065703082553,
      "grad_norm": 0.12845231592655182,
      "learning_rate": 1.205761615149938e-06,
      "loss": 0.006,
      "step": 2689280
    },
    {
      "epoch": 4.401098433521206,
      "grad_norm": 0.04663322865962982,
      "learning_rate": 1.2056957229364206e-06,
      "loss": 0.0068,
      "step": 2689300
    },
    {
      "epoch": 4.401131163959859,
      "grad_norm": 0.18282756209373474,
      "learning_rate": 1.2056298307229036e-06,
      "loss": 0.0111,
      "step": 2689320
    },
    {
      "epoch": 4.401163894398513,
      "grad_norm": 0.2572399973869324,
      "learning_rate": 1.2055639385093866e-06,
      "loss": 0.0114,
      "step": 2689340
    },
    {
      "epoch": 4.401196624837166,
      "grad_norm": 0.30008038878440857,
      "learning_rate": 1.2054980462958693e-06,
      "loss": 0.0072,
      "step": 2689360
    },
    {
      "epoch": 4.401229355275819,
      "grad_norm": 0.15689446032047272,
      "learning_rate": 1.2054321540823522e-06,
      "loss": 0.0081,
      "step": 2689380
    },
    {
      "epoch": 4.401262085714473,
      "grad_norm": 0.16690106689929962,
      "learning_rate": 1.2053662618688352e-06,
      "loss": 0.0115,
      "step": 2689400
    },
    {
      "epoch": 4.4012948161531265,
      "grad_norm": 0.15109612047672272,
      "learning_rate": 1.205300369655318e-06,
      "loss": 0.0103,
      "step": 2689420
    },
    {
      "epoch": 4.401327546591779,
      "grad_norm": 0.20670239627361298,
      "learning_rate": 1.2052344774418009e-06,
      "loss": 0.0072,
      "step": 2689440
    },
    {
      "epoch": 4.401360277030433,
      "grad_norm": 0.12548065185546875,
      "learning_rate": 1.2051685852282836e-06,
      "loss": 0.0074,
      "step": 2689460
    },
    {
      "epoch": 4.401393007469086,
      "grad_norm": 0.19694192707538605,
      "learning_rate": 1.2051026930147666e-06,
      "loss": 0.0059,
      "step": 2689480
    },
    {
      "epoch": 4.401425737907739,
      "grad_norm": 0.43162667751312256,
      "learning_rate": 1.2050368008012495e-06,
      "loss": 0.0086,
      "step": 2689500
    },
    {
      "epoch": 4.401458468346393,
      "grad_norm": 0.08707918226718903,
      "learning_rate": 1.2049709085877323e-06,
      "loss": 0.0098,
      "step": 2689520
    },
    {
      "epoch": 4.401491198785046,
      "grad_norm": 0.07480176538228989,
      "learning_rate": 1.2049050163742152e-06,
      "loss": 0.0088,
      "step": 2689540
    },
    {
      "epoch": 4.4015239292237,
      "grad_norm": 0.252739816904068,
      "learning_rate": 1.204839124160698e-06,
      "loss": 0.0093,
      "step": 2689560
    },
    {
      "epoch": 4.4015566596623525,
      "grad_norm": 0.10891098529100418,
      "learning_rate": 1.204773231947181e-06,
      "loss": 0.0089,
      "step": 2689580
    },
    {
      "epoch": 4.401589390101006,
      "grad_norm": 0.14868035912513733,
      "learning_rate": 1.2047073397336639e-06,
      "loss": 0.0056,
      "step": 2689600
    },
    {
      "epoch": 4.40162212053966,
      "grad_norm": 0.5664507746696472,
      "learning_rate": 1.2046414475201466e-06,
      "loss": 0.0145,
      "step": 2689620
    },
    {
      "epoch": 4.401654850978312,
      "grad_norm": 0.146095871925354,
      "learning_rate": 1.2045755553066296e-06,
      "loss": 0.008,
      "step": 2689640
    },
    {
      "epoch": 4.401687581416966,
      "grad_norm": 0.06115484982728958,
      "learning_rate": 1.2045096630931123e-06,
      "loss": 0.007,
      "step": 2689660
    },
    {
      "epoch": 4.40172031185562,
      "grad_norm": 0.1576426476240158,
      "learning_rate": 1.2044437708795952e-06,
      "loss": 0.0078,
      "step": 2689680
    },
    {
      "epoch": 4.401753042294273,
      "grad_norm": 0.16368776559829712,
      "learning_rate": 1.2043778786660782e-06,
      "loss": 0.0088,
      "step": 2689700
    },
    {
      "epoch": 4.401785772732926,
      "grad_norm": 0.17540331184864044,
      "learning_rate": 1.204311986452561e-06,
      "loss": 0.007,
      "step": 2689720
    },
    {
      "epoch": 4.4018185031715795,
      "grad_norm": 0.1584102213382721,
      "learning_rate": 1.2042460942390439e-06,
      "loss": 0.0121,
      "step": 2689740
    },
    {
      "epoch": 4.401851233610233,
      "grad_norm": 0.1392831653356552,
      "learning_rate": 1.2041802020255268e-06,
      "loss": 0.0077,
      "step": 2689760
    },
    {
      "epoch": 4.401883964048886,
      "grad_norm": 0.19547972083091736,
      "learning_rate": 1.2041143098120096e-06,
      "loss": 0.0112,
      "step": 2689780
    },
    {
      "epoch": 4.401916694487539,
      "grad_norm": 0.2578560709953308,
      "learning_rate": 1.2040484175984925e-06,
      "loss": 0.0109,
      "step": 2689800
    },
    {
      "epoch": 4.401949424926193,
      "grad_norm": 0.1494985669851303,
      "learning_rate": 1.2039825253849753e-06,
      "loss": 0.0102,
      "step": 2689820
    },
    {
      "epoch": 4.401982155364847,
      "grad_norm": 0.4818970561027527,
      "learning_rate": 1.2039166331714582e-06,
      "loss": 0.0133,
      "step": 2689840
    },
    {
      "epoch": 4.402014885803499,
      "grad_norm": 0.17985425889492035,
      "learning_rate": 1.2038507409579412e-06,
      "loss": 0.0089,
      "step": 2689860
    },
    {
      "epoch": 4.402047616242153,
      "grad_norm": 0.13888512551784515,
      "learning_rate": 1.203784848744424e-06,
      "loss": 0.0105,
      "step": 2689880
    },
    {
      "epoch": 4.4020803466808065,
      "grad_norm": 0.32496941089630127,
      "learning_rate": 1.2037189565309069e-06,
      "loss": 0.009,
      "step": 2689900
    },
    {
      "epoch": 4.402113077119459,
      "grad_norm": 0.46160537004470825,
      "learning_rate": 1.2036530643173896e-06,
      "loss": 0.0095,
      "step": 2689920
    },
    {
      "epoch": 4.402145807558113,
      "grad_norm": 0.3260713517665863,
      "learning_rate": 1.2035871721038726e-06,
      "loss": 0.0075,
      "step": 2689940
    },
    {
      "epoch": 4.402178537996766,
      "grad_norm": 0.2489311695098877,
      "learning_rate": 1.2035212798903555e-06,
      "loss": 0.0108,
      "step": 2689960
    },
    {
      "epoch": 4.402211268435419,
      "grad_norm": 0.1933056116104126,
      "learning_rate": 1.2034553876768382e-06,
      "loss": 0.0092,
      "step": 2689980
    },
    {
      "epoch": 4.402243998874073,
      "grad_norm": 0.4084579646587372,
      "learning_rate": 1.2033894954633212e-06,
      "loss": 0.0096,
      "step": 2690000
    },
    {
      "epoch": 4.402276729312726,
      "grad_norm": 0.14031559228897095,
      "learning_rate": 1.2033236032498041e-06,
      "loss": 0.0081,
      "step": 2690020
    },
    {
      "epoch": 4.40230945975138,
      "grad_norm": 0.1325770914554596,
      "learning_rate": 1.2032577110362869e-06,
      "loss": 0.008,
      "step": 2690040
    },
    {
      "epoch": 4.402342190190033,
      "grad_norm": 0.4786839783191681,
      "learning_rate": 1.2031918188227698e-06,
      "loss": 0.0186,
      "step": 2690060
    },
    {
      "epoch": 4.402374920628686,
      "grad_norm": 0.13103918731212616,
      "learning_rate": 1.2031259266092526e-06,
      "loss": 0.0071,
      "step": 2690080
    },
    {
      "epoch": 4.40240765106734,
      "grad_norm": 0.6004688143730164,
      "learning_rate": 1.2030600343957355e-06,
      "loss": 0.0095,
      "step": 2690100
    },
    {
      "epoch": 4.402440381505993,
      "grad_norm": 0.33447685837745667,
      "learning_rate": 1.2029941421822185e-06,
      "loss": 0.0096,
      "step": 2690120
    },
    {
      "epoch": 4.402473111944646,
      "grad_norm": 0.17472705245018005,
      "learning_rate": 1.2029282499687012e-06,
      "loss": 0.0116,
      "step": 2690140
    },
    {
      "epoch": 4.4025058423833,
      "grad_norm": 0.16733318567276,
      "learning_rate": 1.2028623577551842e-06,
      "loss": 0.0095,
      "step": 2690160
    },
    {
      "epoch": 4.402538572821953,
      "grad_norm": 0.10438748449087143,
      "learning_rate": 1.202796465541667e-06,
      "loss": 0.0069,
      "step": 2690180
    },
    {
      "epoch": 4.402571303260606,
      "grad_norm": 0.26526644825935364,
      "learning_rate": 1.2027305733281499e-06,
      "loss": 0.0101,
      "step": 2690200
    },
    {
      "epoch": 4.40260403369926,
      "grad_norm": 0.20021198689937592,
      "learning_rate": 1.2026646811146328e-06,
      "loss": 0.0122,
      "step": 2690220
    },
    {
      "epoch": 4.402636764137913,
      "grad_norm": 0.10360635817050934,
      "learning_rate": 1.2025987889011156e-06,
      "loss": 0.0101,
      "step": 2690240
    },
    {
      "epoch": 4.402669494576566,
      "grad_norm": 0.11713017523288727,
      "learning_rate": 1.2025328966875985e-06,
      "loss": 0.0119,
      "step": 2690260
    },
    {
      "epoch": 4.4027022250152195,
      "grad_norm": 0.16218091547489166,
      "learning_rate": 1.2024670044740815e-06,
      "loss": 0.0154,
      "step": 2690280
    },
    {
      "epoch": 4.402734955453873,
      "grad_norm": 0.18184642493724823,
      "learning_rate": 1.2024011122605642e-06,
      "loss": 0.0106,
      "step": 2690300
    },
    {
      "epoch": 4.402767685892527,
      "grad_norm": 0.236213818192482,
      "learning_rate": 1.2023352200470472e-06,
      "loss": 0.0045,
      "step": 2690320
    },
    {
      "epoch": 4.402800416331179,
      "grad_norm": 0.20422004163265228,
      "learning_rate": 1.2022693278335299e-06,
      "loss": 0.0095,
      "step": 2690340
    },
    {
      "epoch": 4.402833146769833,
      "grad_norm": 0.11734103411436081,
      "learning_rate": 1.2022034356200128e-06,
      "loss": 0.0089,
      "step": 2690360
    },
    {
      "epoch": 4.4028658772084865,
      "grad_norm": 0.37349411845207214,
      "learning_rate": 1.2021375434064958e-06,
      "loss": 0.0063,
      "step": 2690380
    },
    {
      "epoch": 4.40289860764714,
      "grad_norm": 0.24094799160957336,
      "learning_rate": 1.2020716511929785e-06,
      "loss": 0.0072,
      "step": 2690400
    },
    {
      "epoch": 4.402931338085793,
      "grad_norm": 0.22552691400051117,
      "learning_rate": 1.2020057589794615e-06,
      "loss": 0.0143,
      "step": 2690420
    },
    {
      "epoch": 4.402964068524446,
      "grad_norm": 0.15170499682426453,
      "learning_rate": 1.2019398667659442e-06,
      "loss": 0.0071,
      "step": 2690440
    },
    {
      "epoch": 4.4029967989631,
      "grad_norm": 0.17093469202518463,
      "learning_rate": 1.2018739745524274e-06,
      "loss": 0.01,
      "step": 2690460
    },
    {
      "epoch": 4.403029529401753,
      "grad_norm": 0.5503678917884827,
      "learning_rate": 1.2018080823389101e-06,
      "loss": 0.0108,
      "step": 2690480
    },
    {
      "epoch": 4.403062259840406,
      "grad_norm": 0.38766852021217346,
      "learning_rate": 1.201742190125393e-06,
      "loss": 0.011,
      "step": 2690500
    },
    {
      "epoch": 4.40309499027906,
      "grad_norm": 0.2138081043958664,
      "learning_rate": 1.2016762979118758e-06,
      "loss": 0.0091,
      "step": 2690520
    },
    {
      "epoch": 4.403127720717713,
      "grad_norm": 0.41681408882141113,
      "learning_rate": 1.2016104056983586e-06,
      "loss": 0.0107,
      "step": 2690540
    },
    {
      "epoch": 4.403160451156366,
      "grad_norm": 0.19676461815834045,
      "learning_rate": 1.2015445134848417e-06,
      "loss": 0.0088,
      "step": 2690560
    },
    {
      "epoch": 4.40319318159502,
      "grad_norm": 0.790722668170929,
      "learning_rate": 1.2014786212713245e-06,
      "loss": 0.0116,
      "step": 2690580
    },
    {
      "epoch": 4.403225912033673,
      "grad_norm": 0.15972404181957245,
      "learning_rate": 1.2014127290578074e-06,
      "loss": 0.0108,
      "step": 2690600
    },
    {
      "epoch": 4.403258642472326,
      "grad_norm": 0.23546825349330902,
      "learning_rate": 1.2013468368442902e-06,
      "loss": 0.0096,
      "step": 2690620
    },
    {
      "epoch": 4.40329137291098,
      "grad_norm": 0.13334503769874573,
      "learning_rate": 1.201280944630773e-06,
      "loss": 0.0098,
      "step": 2690640
    },
    {
      "epoch": 4.403324103349633,
      "grad_norm": 0.22194387018680573,
      "learning_rate": 1.201215052417256e-06,
      "loss": 0.0078,
      "step": 2690660
    },
    {
      "epoch": 4.403356833788286,
      "grad_norm": 0.3655262291431427,
      "learning_rate": 1.2011491602037388e-06,
      "loss": 0.01,
      "step": 2690680
    },
    {
      "epoch": 4.40338956422694,
      "grad_norm": 0.18409374356269836,
      "learning_rate": 1.2010832679902217e-06,
      "loss": 0.0079,
      "step": 2690700
    },
    {
      "epoch": 4.403422294665593,
      "grad_norm": 0.14432737231254578,
      "learning_rate": 1.2010173757767047e-06,
      "loss": 0.0104,
      "step": 2690720
    },
    {
      "epoch": 4.403455025104247,
      "grad_norm": 0.30055612325668335,
      "learning_rate": 1.2009514835631874e-06,
      "loss": 0.0062,
      "step": 2690740
    },
    {
      "epoch": 4.4034877555428995,
      "grad_norm": 0.7987812757492065,
      "learning_rate": 1.2008855913496704e-06,
      "loss": 0.0112,
      "step": 2690760
    },
    {
      "epoch": 4.403520485981553,
      "grad_norm": 0.12799856066703796,
      "learning_rate": 1.2008196991361531e-06,
      "loss": 0.0081,
      "step": 2690780
    },
    {
      "epoch": 4.403553216420207,
      "grad_norm": 0.26838383078575134,
      "learning_rate": 1.200753806922636e-06,
      "loss": 0.0094,
      "step": 2690800
    },
    {
      "epoch": 4.403585946858859,
      "grad_norm": 0.09593075513839722,
      "learning_rate": 1.200687914709119e-06,
      "loss": 0.0072,
      "step": 2690820
    },
    {
      "epoch": 4.403618677297513,
      "grad_norm": 0.16660906374454498,
      "learning_rate": 1.2006220224956018e-06,
      "loss": 0.01,
      "step": 2690840
    },
    {
      "epoch": 4.403651407736167,
      "grad_norm": 0.32171300053596497,
      "learning_rate": 1.2005561302820847e-06,
      "loss": 0.0123,
      "step": 2690860
    },
    {
      "epoch": 4.40368413817482,
      "grad_norm": 0.7520900368690491,
      "learning_rate": 1.2004902380685675e-06,
      "loss": 0.0113,
      "step": 2690880
    },
    {
      "epoch": 4.403716868613473,
      "grad_norm": 0.15412560105323792,
      "learning_rate": 1.2004243458550504e-06,
      "loss": 0.0078,
      "step": 2690900
    },
    {
      "epoch": 4.4037495990521265,
      "grad_norm": 0.5184964537620544,
      "learning_rate": 1.2003584536415334e-06,
      "loss": 0.0111,
      "step": 2690920
    },
    {
      "epoch": 4.40378232949078,
      "grad_norm": 0.2509670853614807,
      "learning_rate": 1.2002925614280161e-06,
      "loss": 0.0072,
      "step": 2690940
    },
    {
      "epoch": 4.403815059929433,
      "grad_norm": 0.2323077917098999,
      "learning_rate": 1.200226669214499e-06,
      "loss": 0.0148,
      "step": 2690960
    },
    {
      "epoch": 4.403847790368086,
      "grad_norm": 0.19217103719711304,
      "learning_rate": 1.2001607770009818e-06,
      "loss": 0.0156,
      "step": 2690980
    },
    {
      "epoch": 4.40388052080674,
      "grad_norm": 0.36770251393318176,
      "learning_rate": 1.2000948847874647e-06,
      "loss": 0.0126,
      "step": 2691000
    },
    {
      "epoch": 4.403913251245394,
      "grad_norm": 0.11565960198640823,
      "learning_rate": 1.2000289925739477e-06,
      "loss": 0.0065,
      "step": 2691020
    },
    {
      "epoch": 4.403945981684046,
      "grad_norm": 0.3670443892478943,
      "learning_rate": 1.1999631003604304e-06,
      "loss": 0.0103,
      "step": 2691040
    },
    {
      "epoch": 4.4039787121227,
      "grad_norm": 0.06874346733093262,
      "learning_rate": 1.1998972081469134e-06,
      "loss": 0.0084,
      "step": 2691060
    },
    {
      "epoch": 4.4040114425613535,
      "grad_norm": 0.21461503207683563,
      "learning_rate": 1.1998313159333963e-06,
      "loss": 0.0095,
      "step": 2691080
    },
    {
      "epoch": 4.404044173000006,
      "grad_norm": 0.08815731108188629,
      "learning_rate": 1.199765423719879e-06,
      "loss": 0.0083,
      "step": 2691100
    },
    {
      "epoch": 4.40407690343866,
      "grad_norm": 0.05244392529129982,
      "learning_rate": 1.199699531506362e-06,
      "loss": 0.0068,
      "step": 2691120
    },
    {
      "epoch": 4.404109633877313,
      "grad_norm": 0.10522261261940002,
      "learning_rate": 1.1996336392928448e-06,
      "loss": 0.0061,
      "step": 2691140
    },
    {
      "epoch": 4.404142364315967,
      "grad_norm": 0.21945154666900635,
      "learning_rate": 1.1995677470793277e-06,
      "loss": 0.0133,
      "step": 2691160
    },
    {
      "epoch": 4.40417509475462,
      "grad_norm": 0.15132540464401245,
      "learning_rate": 1.1995018548658107e-06,
      "loss": 0.0077,
      "step": 2691180
    },
    {
      "epoch": 4.404207825193273,
      "grad_norm": 0.0902826115489006,
      "learning_rate": 1.1994359626522934e-06,
      "loss": 0.0075,
      "step": 2691200
    },
    {
      "epoch": 4.404240555631927,
      "grad_norm": 0.5232030153274536,
      "learning_rate": 1.1993700704387764e-06,
      "loss": 0.0109,
      "step": 2691220
    },
    {
      "epoch": 4.4042732860705796,
      "grad_norm": 0.09533815085887909,
      "learning_rate": 1.1993041782252591e-06,
      "loss": 0.0067,
      "step": 2691240
    },
    {
      "epoch": 4.404306016509233,
      "grad_norm": 0.5172936916351318,
      "learning_rate": 1.199238286011742e-06,
      "loss": 0.0106,
      "step": 2691260
    },
    {
      "epoch": 4.404338746947887,
      "grad_norm": 0.37969791889190674,
      "learning_rate": 1.199172393798225e-06,
      "loss": 0.0069,
      "step": 2691280
    },
    {
      "epoch": 4.40437147738654,
      "grad_norm": 0.08434402197599411,
      "learning_rate": 1.1991065015847078e-06,
      "loss": 0.0096,
      "step": 2691300
    },
    {
      "epoch": 4.404404207825193,
      "grad_norm": 0.45011216402053833,
      "learning_rate": 1.1990406093711907e-06,
      "loss": 0.009,
      "step": 2691320
    },
    {
      "epoch": 4.404436938263847,
      "grad_norm": 0.31889933347702026,
      "learning_rate": 1.1989747171576737e-06,
      "loss": 0.0096,
      "step": 2691340
    },
    {
      "epoch": 4.4044696687025,
      "grad_norm": 0.2933593988418579,
      "learning_rate": 1.1989088249441564e-06,
      "loss": 0.008,
      "step": 2691360
    },
    {
      "epoch": 4.404502399141153,
      "grad_norm": 0.18880881369113922,
      "learning_rate": 1.1988429327306393e-06,
      "loss": 0.0105,
      "step": 2691380
    },
    {
      "epoch": 4.4045351295798065,
      "grad_norm": 0.27338388562202454,
      "learning_rate": 1.198777040517122e-06,
      "loss": 0.0077,
      "step": 2691400
    },
    {
      "epoch": 4.40456786001846,
      "grad_norm": 0.39359545707702637,
      "learning_rate": 1.198711148303605e-06,
      "loss": 0.0105,
      "step": 2691420
    },
    {
      "epoch": 4.404600590457114,
      "grad_norm": 0.3136296570301056,
      "learning_rate": 1.198645256090088e-06,
      "loss": 0.0079,
      "step": 2691440
    },
    {
      "epoch": 4.404633320895766,
      "grad_norm": 0.14323320984840393,
      "learning_rate": 1.1985793638765707e-06,
      "loss": 0.0067,
      "step": 2691460
    },
    {
      "epoch": 4.40466605133442,
      "grad_norm": 0.3324543535709381,
      "learning_rate": 1.1985134716630537e-06,
      "loss": 0.0068,
      "step": 2691480
    },
    {
      "epoch": 4.404698781773074,
      "grad_norm": 0.15624544024467468,
      "learning_rate": 1.1984475794495364e-06,
      "loss": 0.0076,
      "step": 2691500
    },
    {
      "epoch": 4.404731512211726,
      "grad_norm": 0.17328067123889923,
      "learning_rate": 1.1983816872360194e-06,
      "loss": 0.0092,
      "step": 2691520
    },
    {
      "epoch": 4.40476424265038,
      "grad_norm": 0.08223628252744675,
      "learning_rate": 1.1983157950225023e-06,
      "loss": 0.0102,
      "step": 2691540
    },
    {
      "epoch": 4.4047969730890335,
      "grad_norm": 0.35006099939346313,
      "learning_rate": 1.198249902808985e-06,
      "loss": 0.0077,
      "step": 2691560
    },
    {
      "epoch": 4.404829703527687,
      "grad_norm": 0.14660312235355377,
      "learning_rate": 1.198184010595468e-06,
      "loss": 0.0065,
      "step": 2691580
    },
    {
      "epoch": 4.40486243396634,
      "grad_norm": 0.43690407276153564,
      "learning_rate": 1.198118118381951e-06,
      "loss": 0.0125,
      "step": 2691600
    },
    {
      "epoch": 4.404895164404993,
      "grad_norm": 0.24924474954605103,
      "learning_rate": 1.1980522261684337e-06,
      "loss": 0.006,
      "step": 2691620
    },
    {
      "epoch": 4.404927894843647,
      "grad_norm": 0.3856784403324127,
      "learning_rate": 1.1979863339549167e-06,
      "loss": 0.012,
      "step": 2691640
    },
    {
      "epoch": 4.4049606252823,
      "grad_norm": 0.12557491660118103,
      "learning_rate": 1.1979204417413994e-06,
      "loss": 0.0087,
      "step": 2691660
    },
    {
      "epoch": 4.404993355720953,
      "grad_norm": 0.0907035693526268,
      "learning_rate": 1.1978545495278823e-06,
      "loss": 0.0082,
      "step": 2691680
    },
    {
      "epoch": 4.405026086159607,
      "grad_norm": 0.386046826839447,
      "learning_rate": 1.1977886573143653e-06,
      "loss": 0.0082,
      "step": 2691700
    },
    {
      "epoch": 4.40505881659826,
      "grad_norm": 0.11868248879909515,
      "learning_rate": 1.197722765100848e-06,
      "loss": 0.0095,
      "step": 2691720
    },
    {
      "epoch": 4.405091547036913,
      "grad_norm": 0.171188086271286,
      "learning_rate": 1.197656872887331e-06,
      "loss": 0.0084,
      "step": 2691740
    },
    {
      "epoch": 4.405124277475567,
      "grad_norm": 0.08461946994066238,
      "learning_rate": 1.1975909806738137e-06,
      "loss": 0.011,
      "step": 2691760
    },
    {
      "epoch": 4.40515700791422,
      "grad_norm": 0.2721656858921051,
      "learning_rate": 1.1975250884602969e-06,
      "loss": 0.0131,
      "step": 2691780
    },
    {
      "epoch": 4.405189738352873,
      "grad_norm": 0.07942508161067963,
      "learning_rate": 1.1974591962467796e-06,
      "loss": 0.0083,
      "step": 2691800
    },
    {
      "epoch": 4.405222468791527,
      "grad_norm": 0.40848612785339355,
      "learning_rate": 1.1973933040332626e-06,
      "loss": 0.0123,
      "step": 2691820
    },
    {
      "epoch": 4.40525519923018,
      "grad_norm": 0.19614095985889435,
      "learning_rate": 1.1973274118197453e-06,
      "loss": 0.0115,
      "step": 2691840
    },
    {
      "epoch": 4.405287929668834,
      "grad_norm": 0.31872546672821045,
      "learning_rate": 1.197261519606228e-06,
      "loss": 0.0092,
      "step": 2691860
    },
    {
      "epoch": 4.405320660107487,
      "grad_norm": 0.13304263353347778,
      "learning_rate": 1.1971956273927112e-06,
      "loss": 0.0092,
      "step": 2691880
    },
    {
      "epoch": 4.40535339054614,
      "grad_norm": 0.23938657343387604,
      "learning_rate": 1.197129735179194e-06,
      "loss": 0.007,
      "step": 2691900
    },
    {
      "epoch": 4.405386120984794,
      "grad_norm": 0.13096262514591217,
      "learning_rate": 1.197063842965677e-06,
      "loss": 0.0107,
      "step": 2691920
    },
    {
      "epoch": 4.4054188514234465,
      "grad_norm": 0.07997513562440872,
      "learning_rate": 1.1969979507521597e-06,
      "loss": 0.0076,
      "step": 2691940
    },
    {
      "epoch": 4.4054515818621,
      "grad_norm": 0.5891764760017395,
      "learning_rate": 1.1969320585386426e-06,
      "loss": 0.0099,
      "step": 2691960
    },
    {
      "epoch": 4.405484312300754,
      "grad_norm": 0.5509078502655029,
      "learning_rate": 1.1968661663251256e-06,
      "loss": 0.0156,
      "step": 2691980
    },
    {
      "epoch": 4.405517042739406,
      "grad_norm": 0.19858311116695404,
      "learning_rate": 1.1968002741116083e-06,
      "loss": 0.0075,
      "step": 2692000
    },
    {
      "epoch": 4.40554977317806,
      "grad_norm": 0.26500198245048523,
      "learning_rate": 1.1967343818980913e-06,
      "loss": 0.0114,
      "step": 2692020
    },
    {
      "epoch": 4.4055825036167136,
      "grad_norm": 0.08396407961845398,
      "learning_rate": 1.1966684896845742e-06,
      "loss": 0.0095,
      "step": 2692040
    },
    {
      "epoch": 4.405615234055367,
      "grad_norm": 0.17694558203220367,
      "learning_rate": 1.196602597471057e-06,
      "loss": 0.009,
      "step": 2692060
    },
    {
      "epoch": 4.40564796449402,
      "grad_norm": 0.20800518989562988,
      "learning_rate": 1.19653670525754e-06,
      "loss": 0.011,
      "step": 2692080
    },
    {
      "epoch": 4.4056806949326734,
      "grad_norm": 0.5377690196037292,
      "learning_rate": 1.1964708130440226e-06,
      "loss": 0.0069,
      "step": 2692100
    },
    {
      "epoch": 4.405713425371327,
      "grad_norm": 0.09911812096834183,
      "learning_rate": 1.1964049208305056e-06,
      "loss": 0.0104,
      "step": 2692120
    },
    {
      "epoch": 4.405746155809981,
      "grad_norm": 0.3153802752494812,
      "learning_rate": 1.1963390286169885e-06,
      "loss": 0.0113,
      "step": 2692140
    },
    {
      "epoch": 4.405778886248633,
      "grad_norm": 0.08159932494163513,
      "learning_rate": 1.1962731364034713e-06,
      "loss": 0.0066,
      "step": 2692160
    },
    {
      "epoch": 4.405811616687287,
      "grad_norm": 0.24367746710777283,
      "learning_rate": 1.1962072441899542e-06,
      "loss": 0.0103,
      "step": 2692180
    },
    {
      "epoch": 4.4058443471259405,
      "grad_norm": 0.14788080751895905,
      "learning_rate": 1.196141351976437e-06,
      "loss": 0.0076,
      "step": 2692200
    },
    {
      "epoch": 4.405877077564593,
      "grad_norm": 0.14827902615070343,
      "learning_rate": 1.19607545976292e-06,
      "loss": 0.0076,
      "step": 2692220
    },
    {
      "epoch": 4.405909808003247,
      "grad_norm": 0.41242527961730957,
      "learning_rate": 1.1960095675494029e-06,
      "loss": 0.0096,
      "step": 2692240
    },
    {
      "epoch": 4.4059425384419,
      "grad_norm": 0.3441316783428192,
      "learning_rate": 1.1959436753358856e-06,
      "loss": 0.0083,
      "step": 2692260
    },
    {
      "epoch": 4.405975268880553,
      "grad_norm": 0.05780987814068794,
      "learning_rate": 1.1958777831223686e-06,
      "loss": 0.0068,
      "step": 2692280
    },
    {
      "epoch": 4.406007999319207,
      "grad_norm": 0.4487411081790924,
      "learning_rate": 1.1958118909088513e-06,
      "loss": 0.0075,
      "step": 2692300
    },
    {
      "epoch": 4.40604072975786,
      "grad_norm": 0.22012032568454742,
      "learning_rate": 1.1957459986953343e-06,
      "loss": 0.0149,
      "step": 2692320
    },
    {
      "epoch": 4.406073460196514,
      "grad_norm": 0.15866784751415253,
      "learning_rate": 1.1956801064818172e-06,
      "loss": 0.0089,
      "step": 2692340
    },
    {
      "epoch": 4.406106190635167,
      "grad_norm": 0.35007572174072266,
      "learning_rate": 1.1956142142683e-06,
      "loss": 0.006,
      "step": 2692360
    },
    {
      "epoch": 4.40613892107382,
      "grad_norm": 0.1999916285276413,
      "learning_rate": 1.195548322054783e-06,
      "loss": 0.0075,
      "step": 2692380
    },
    {
      "epoch": 4.406171651512474,
      "grad_norm": 0.10850345343351364,
      "learning_rate": 1.1954824298412658e-06,
      "loss": 0.0092,
      "step": 2692400
    },
    {
      "epoch": 4.4062043819511265,
      "grad_norm": 0.34815624356269836,
      "learning_rate": 1.1954165376277486e-06,
      "loss": 0.0068,
      "step": 2692420
    },
    {
      "epoch": 4.40623711238978,
      "grad_norm": 0.22983768582344055,
      "learning_rate": 1.1953506454142315e-06,
      "loss": 0.0104,
      "step": 2692440
    },
    {
      "epoch": 4.406269842828434,
      "grad_norm": 0.7025943398475647,
      "learning_rate": 1.1952847532007143e-06,
      "loss": 0.0118,
      "step": 2692460
    },
    {
      "epoch": 4.406302573267087,
      "grad_norm": 0.1091424748301506,
      "learning_rate": 1.1952188609871972e-06,
      "loss": 0.009,
      "step": 2692480
    },
    {
      "epoch": 4.40633530370574,
      "grad_norm": 0.18210482597351074,
      "learning_rate": 1.1951529687736802e-06,
      "loss": 0.0108,
      "step": 2692500
    },
    {
      "epoch": 4.406368034144394,
      "grad_norm": 0.17936275899410248,
      "learning_rate": 1.195087076560163e-06,
      "loss": 0.0127,
      "step": 2692520
    },
    {
      "epoch": 4.406400764583047,
      "grad_norm": 0.15076151490211487,
      "learning_rate": 1.1950211843466459e-06,
      "loss": 0.0128,
      "step": 2692540
    },
    {
      "epoch": 4.4064334950217,
      "grad_norm": 0.434184193611145,
      "learning_rate": 1.1949552921331286e-06,
      "loss": 0.0099,
      "step": 2692560
    },
    {
      "epoch": 4.4064662254603535,
      "grad_norm": 0.36298683285713196,
      "learning_rate": 1.1948893999196116e-06,
      "loss": 0.0144,
      "step": 2692580
    },
    {
      "epoch": 4.406498955899007,
      "grad_norm": 0.3566218614578247,
      "learning_rate": 1.1948235077060945e-06,
      "loss": 0.0116,
      "step": 2692600
    },
    {
      "epoch": 4.406531686337661,
      "grad_norm": 0.2515887916088104,
      "learning_rate": 1.1947576154925773e-06,
      "loss": 0.0119,
      "step": 2692620
    },
    {
      "epoch": 4.406564416776313,
      "grad_norm": 0.34008604288101196,
      "learning_rate": 1.1946917232790602e-06,
      "loss": 0.0069,
      "step": 2692640
    },
    {
      "epoch": 4.406597147214967,
      "grad_norm": 0.12195843458175659,
      "learning_rate": 1.1946258310655432e-06,
      "loss": 0.0101,
      "step": 2692660
    },
    {
      "epoch": 4.406629877653621,
      "grad_norm": 0.13784174621105194,
      "learning_rate": 1.194559938852026e-06,
      "loss": 0.0077,
      "step": 2692680
    },
    {
      "epoch": 4.406662608092273,
      "grad_norm": 0.14969396591186523,
      "learning_rate": 1.1944940466385089e-06,
      "loss": 0.0049,
      "step": 2692700
    },
    {
      "epoch": 4.406695338530927,
      "grad_norm": 0.32898855209350586,
      "learning_rate": 1.1944281544249916e-06,
      "loss": 0.0087,
      "step": 2692720
    },
    {
      "epoch": 4.4067280689695805,
      "grad_norm": 0.6920906901359558,
      "learning_rate": 1.1943622622114745e-06,
      "loss": 0.0108,
      "step": 2692740
    },
    {
      "epoch": 4.406760799408234,
      "grad_norm": 0.29523858428001404,
      "learning_rate": 1.1942963699979575e-06,
      "loss": 0.0069,
      "step": 2692760
    },
    {
      "epoch": 4.406793529846887,
      "grad_norm": 0.2731686532497406,
      "learning_rate": 1.1942304777844402e-06,
      "loss": 0.0075,
      "step": 2692780
    },
    {
      "epoch": 4.40682626028554,
      "grad_norm": 0.16750337183475494,
      "learning_rate": 1.1941645855709232e-06,
      "loss": 0.0067,
      "step": 2692800
    },
    {
      "epoch": 4.406858990724194,
      "grad_norm": 0.052085600793361664,
      "learning_rate": 1.194098693357406e-06,
      "loss": 0.0121,
      "step": 2692820
    },
    {
      "epoch": 4.406891721162847,
      "grad_norm": 0.14582093060016632,
      "learning_rate": 1.1940328011438889e-06,
      "loss": 0.0082,
      "step": 2692840
    },
    {
      "epoch": 4.4069244516015,
      "grad_norm": 0.03703390061855316,
      "learning_rate": 1.1939669089303718e-06,
      "loss": 0.0087,
      "step": 2692860
    },
    {
      "epoch": 4.406957182040154,
      "grad_norm": 0.17482508718967438,
      "learning_rate": 1.1939010167168546e-06,
      "loss": 0.007,
      "step": 2692880
    },
    {
      "epoch": 4.4069899124788074,
      "grad_norm": 0.07697873562574387,
      "learning_rate": 1.1938351245033375e-06,
      "loss": 0.0075,
      "step": 2692900
    },
    {
      "epoch": 4.40702264291746,
      "grad_norm": 0.2756149172782898,
      "learning_rate": 1.1937692322898205e-06,
      "loss": 0.0063,
      "step": 2692920
    },
    {
      "epoch": 4.407055373356114,
      "grad_norm": 0.1337508261203766,
      "learning_rate": 1.1937033400763032e-06,
      "loss": 0.0074,
      "step": 2692940
    },
    {
      "epoch": 4.407088103794767,
      "grad_norm": 0.1944059580564499,
      "learning_rate": 1.1936374478627862e-06,
      "loss": 0.0066,
      "step": 2692960
    },
    {
      "epoch": 4.40712083423342,
      "grad_norm": 0.3766777813434601,
      "learning_rate": 1.193571555649269e-06,
      "loss": 0.0139,
      "step": 2692980
    },
    {
      "epoch": 4.407153564672074,
      "grad_norm": 0.187510147690773,
      "learning_rate": 1.1935056634357519e-06,
      "loss": 0.0073,
      "step": 2693000
    },
    {
      "epoch": 4.407186295110727,
      "grad_norm": 0.0881812646985054,
      "learning_rate": 1.1934397712222348e-06,
      "loss": 0.0085,
      "step": 2693020
    },
    {
      "epoch": 4.407219025549381,
      "grad_norm": 0.350739061832428,
      "learning_rate": 1.1933738790087178e-06,
      "loss": 0.0069,
      "step": 2693040
    },
    {
      "epoch": 4.4072517559880335,
      "grad_norm": 0.24993737041950226,
      "learning_rate": 1.1933079867952005e-06,
      "loss": 0.0147,
      "step": 2693060
    },
    {
      "epoch": 4.407284486426687,
      "grad_norm": 0.6177244186401367,
      "learning_rate": 1.1932420945816832e-06,
      "loss": 0.0126,
      "step": 2693080
    },
    {
      "epoch": 4.407317216865341,
      "grad_norm": 0.16689938306808472,
      "learning_rate": 1.1931762023681664e-06,
      "loss": 0.0089,
      "step": 2693100
    },
    {
      "epoch": 4.407349947303993,
      "grad_norm": 0.47084322571754456,
      "learning_rate": 1.1931103101546491e-06,
      "loss": 0.0118,
      "step": 2693120
    },
    {
      "epoch": 4.407382677742647,
      "grad_norm": 0.33950191736221313,
      "learning_rate": 1.193044417941132e-06,
      "loss": 0.0102,
      "step": 2693140
    },
    {
      "epoch": 4.407415408181301,
      "grad_norm": 0.16596387326717377,
      "learning_rate": 1.1929785257276148e-06,
      "loss": 0.0094,
      "step": 2693160
    },
    {
      "epoch": 4.407448138619953,
      "grad_norm": 0.42766159772872925,
      "learning_rate": 1.1929126335140976e-06,
      "loss": 0.0091,
      "step": 2693180
    },
    {
      "epoch": 4.407480869058607,
      "grad_norm": 0.10383470356464386,
      "learning_rate": 1.1928467413005807e-06,
      "loss": 0.0089,
      "step": 2693200
    },
    {
      "epoch": 4.4075135994972605,
      "grad_norm": 0.4763985574245453,
      "learning_rate": 1.1927808490870635e-06,
      "loss": 0.0088,
      "step": 2693220
    },
    {
      "epoch": 4.407546329935914,
      "grad_norm": 0.22182871401309967,
      "learning_rate": 1.1927149568735464e-06,
      "loss": 0.0097,
      "step": 2693240
    },
    {
      "epoch": 4.407579060374567,
      "grad_norm": 0.2128346860408783,
      "learning_rate": 1.1926490646600292e-06,
      "loss": 0.0091,
      "step": 2693260
    },
    {
      "epoch": 4.40761179081322,
      "grad_norm": 0.06698043644428253,
      "learning_rate": 1.1925831724465121e-06,
      "loss": 0.0107,
      "step": 2693280
    },
    {
      "epoch": 4.407644521251874,
      "grad_norm": 0.3858911097049713,
      "learning_rate": 1.192517280232995e-06,
      "loss": 0.015,
      "step": 2693300
    },
    {
      "epoch": 4.407677251690528,
      "grad_norm": 0.034002579748630524,
      "learning_rate": 1.1924513880194778e-06,
      "loss": 0.0099,
      "step": 2693320
    },
    {
      "epoch": 4.40770998212918,
      "grad_norm": 0.2266678512096405,
      "learning_rate": 1.1923854958059608e-06,
      "loss": 0.0071,
      "step": 2693340
    },
    {
      "epoch": 4.407742712567834,
      "grad_norm": 0.16609741747379303,
      "learning_rate": 1.1923196035924437e-06,
      "loss": 0.0089,
      "step": 2693360
    },
    {
      "epoch": 4.4077754430064875,
      "grad_norm": 0.4440920352935791,
      "learning_rate": 1.1922537113789264e-06,
      "loss": 0.0077,
      "step": 2693380
    },
    {
      "epoch": 4.40780817344514,
      "grad_norm": 0.25623106956481934,
      "learning_rate": 1.1921878191654094e-06,
      "loss": 0.0062,
      "step": 2693400
    },
    {
      "epoch": 4.407840903883794,
      "grad_norm": 1.3180131912231445,
      "learning_rate": 1.1921219269518921e-06,
      "loss": 0.0086,
      "step": 2693420
    },
    {
      "epoch": 4.407873634322447,
      "grad_norm": 0.10776577144861221,
      "learning_rate": 1.192056034738375e-06,
      "loss": 0.0099,
      "step": 2693440
    },
    {
      "epoch": 4.4079063647611,
      "grad_norm": 0.18456341326236725,
      "learning_rate": 1.191990142524858e-06,
      "loss": 0.0053,
      "step": 2693460
    },
    {
      "epoch": 4.407939095199754,
      "grad_norm": 0.045119285583496094,
      "learning_rate": 1.1919242503113408e-06,
      "loss": 0.0084,
      "step": 2693480
    },
    {
      "epoch": 4.407971825638407,
      "grad_norm": 0.7174906134605408,
      "learning_rate": 1.1918583580978237e-06,
      "loss": 0.0092,
      "step": 2693500
    },
    {
      "epoch": 4.408004556077061,
      "grad_norm": 0.41899633407592773,
      "learning_rate": 1.1917924658843065e-06,
      "loss": 0.0091,
      "step": 2693520
    },
    {
      "epoch": 4.408037286515714,
      "grad_norm": 0.2916584312915802,
      "learning_rate": 1.1917265736707894e-06,
      "loss": 0.0125,
      "step": 2693540
    },
    {
      "epoch": 4.408070016954367,
      "grad_norm": 0.4349871873855591,
      "learning_rate": 1.1916606814572724e-06,
      "loss": 0.0084,
      "step": 2693560
    },
    {
      "epoch": 4.408102747393021,
      "grad_norm": 0.40756943821907043,
      "learning_rate": 1.1915947892437551e-06,
      "loss": 0.0069,
      "step": 2693580
    },
    {
      "epoch": 4.408135477831674,
      "grad_norm": 0.26751112937927246,
      "learning_rate": 1.191528897030238e-06,
      "loss": 0.0076,
      "step": 2693600
    },
    {
      "epoch": 4.408168208270327,
      "grad_norm": 0.37580543756484985,
      "learning_rate": 1.1914630048167208e-06,
      "loss": 0.0076,
      "step": 2693620
    },
    {
      "epoch": 4.408200938708981,
      "grad_norm": 0.31833916902542114,
      "learning_rate": 1.1913971126032038e-06,
      "loss": 0.0148,
      "step": 2693640
    },
    {
      "epoch": 4.408233669147634,
      "grad_norm": 0.23237226903438568,
      "learning_rate": 1.1913312203896867e-06,
      "loss": 0.0067,
      "step": 2693660
    },
    {
      "epoch": 4.408266399586287,
      "grad_norm": 0.3743514120578766,
      "learning_rate": 1.1912653281761695e-06,
      "loss": 0.0087,
      "step": 2693680
    },
    {
      "epoch": 4.408299130024941,
      "grad_norm": 0.2672274708747864,
      "learning_rate": 1.1911994359626524e-06,
      "loss": 0.0062,
      "step": 2693700
    },
    {
      "epoch": 4.408331860463594,
      "grad_norm": 0.2522486448287964,
      "learning_rate": 1.1911335437491354e-06,
      "loss": 0.0115,
      "step": 2693720
    },
    {
      "epoch": 4.408364590902247,
      "grad_norm": 0.10241475701332092,
      "learning_rate": 1.191067651535618e-06,
      "loss": 0.0092,
      "step": 2693740
    },
    {
      "epoch": 4.4083973213409005,
      "grad_norm": 0.18830865621566772,
      "learning_rate": 1.191001759322101e-06,
      "loss": 0.0094,
      "step": 2693760
    },
    {
      "epoch": 4.408430051779554,
      "grad_norm": 0.3678663969039917,
      "learning_rate": 1.1909358671085838e-06,
      "loss": 0.0064,
      "step": 2693780
    },
    {
      "epoch": 4.408462782218208,
      "grad_norm": 0.29826581478118896,
      "learning_rate": 1.1908699748950667e-06,
      "loss": 0.0088,
      "step": 2693800
    },
    {
      "epoch": 4.40849551265686,
      "grad_norm": 0.19297848641872406,
      "learning_rate": 1.1908040826815497e-06,
      "loss": 0.0084,
      "step": 2693820
    },
    {
      "epoch": 4.408528243095514,
      "grad_norm": 0.1279257833957672,
      "learning_rate": 1.1907381904680324e-06,
      "loss": 0.0072,
      "step": 2693840
    },
    {
      "epoch": 4.4085609735341675,
      "grad_norm": 0.20176254212856293,
      "learning_rate": 1.1906722982545154e-06,
      "loss": 0.0065,
      "step": 2693860
    },
    {
      "epoch": 4.40859370397282,
      "grad_norm": 0.42852362990379333,
      "learning_rate": 1.1906064060409981e-06,
      "loss": 0.0085,
      "step": 2693880
    },
    {
      "epoch": 4.408626434411474,
      "grad_norm": 0.3159581422805786,
      "learning_rate": 1.190540513827481e-06,
      "loss": 0.0066,
      "step": 2693900
    },
    {
      "epoch": 4.408659164850127,
      "grad_norm": 0.13486000895500183,
      "learning_rate": 1.190474621613964e-06,
      "loss": 0.0062,
      "step": 2693920
    },
    {
      "epoch": 4.408691895288781,
      "grad_norm": 0.25785908102989197,
      "learning_rate": 1.1904087294004468e-06,
      "loss": 0.012,
      "step": 2693940
    },
    {
      "epoch": 4.408724625727434,
      "grad_norm": 0.15768226981163025,
      "learning_rate": 1.1903428371869297e-06,
      "loss": 0.0096,
      "step": 2693960
    },
    {
      "epoch": 4.408757356166087,
      "grad_norm": 0.13874006271362305,
      "learning_rate": 1.1902769449734127e-06,
      "loss": 0.0082,
      "step": 2693980
    },
    {
      "epoch": 4.408790086604741,
      "grad_norm": 0.44973641633987427,
      "learning_rate": 1.1902110527598954e-06,
      "loss": 0.0109,
      "step": 2694000
    },
    {
      "epoch": 4.408822817043394,
      "grad_norm": 0.07216031849384308,
      "learning_rate": 1.1901451605463784e-06,
      "loss": 0.0086,
      "step": 2694020
    },
    {
      "epoch": 4.408855547482047,
      "grad_norm": 0.3051030933856964,
      "learning_rate": 1.190079268332861e-06,
      "loss": 0.0067,
      "step": 2694040
    },
    {
      "epoch": 4.408888277920701,
      "grad_norm": 0.1325814127922058,
      "learning_rate": 1.190013376119344e-06,
      "loss": 0.0113,
      "step": 2694060
    },
    {
      "epoch": 4.408921008359354,
      "grad_norm": 0.3701440095901489,
      "learning_rate": 1.189947483905827e-06,
      "loss": 0.0075,
      "step": 2694080
    },
    {
      "epoch": 4.408953738798007,
      "grad_norm": 0.15701161324977875,
      "learning_rate": 1.1898815916923097e-06,
      "loss": 0.0114,
      "step": 2694100
    },
    {
      "epoch": 4.408986469236661,
      "grad_norm": 0.3593209981918335,
      "learning_rate": 1.1898156994787927e-06,
      "loss": 0.007,
      "step": 2694120
    },
    {
      "epoch": 4.409019199675314,
      "grad_norm": 0.40076515078544617,
      "learning_rate": 1.1897498072652754e-06,
      "loss": 0.0094,
      "step": 2694140
    },
    {
      "epoch": 4.409051930113967,
      "grad_norm": 0.18365275859832764,
      "learning_rate": 1.1896839150517584e-06,
      "loss": 0.0133,
      "step": 2694160
    },
    {
      "epoch": 4.409084660552621,
      "grad_norm": 0.1341625154018402,
      "learning_rate": 1.1896180228382413e-06,
      "loss": 0.0058,
      "step": 2694180
    },
    {
      "epoch": 4.409117390991274,
      "grad_norm": 0.2101936787366867,
      "learning_rate": 1.189552130624724e-06,
      "loss": 0.0115,
      "step": 2694200
    },
    {
      "epoch": 4.409150121429928,
      "grad_norm": 0.3504299819469452,
      "learning_rate": 1.189486238411207e-06,
      "loss": 0.0082,
      "step": 2694220
    },
    {
      "epoch": 4.4091828518685805,
      "grad_norm": 0.2603839933872223,
      "learning_rate": 1.18942034619769e-06,
      "loss": 0.0074,
      "step": 2694240
    },
    {
      "epoch": 4.409215582307234,
      "grad_norm": 0.2883602976799011,
      "learning_rate": 1.1893544539841727e-06,
      "loss": 0.0064,
      "step": 2694260
    },
    {
      "epoch": 4.409248312745888,
      "grad_norm": 0.2920501232147217,
      "learning_rate": 1.1892885617706557e-06,
      "loss": 0.0091,
      "step": 2694280
    },
    {
      "epoch": 4.40928104318454,
      "grad_norm": 0.8623109459877014,
      "learning_rate": 1.1892226695571384e-06,
      "loss": 0.0095,
      "step": 2694300
    },
    {
      "epoch": 4.409313773623194,
      "grad_norm": 0.17522773146629333,
      "learning_rate": 1.1891567773436214e-06,
      "loss": 0.0107,
      "step": 2694320
    },
    {
      "epoch": 4.409346504061848,
      "grad_norm": 0.4084024429321289,
      "learning_rate": 1.1890908851301043e-06,
      "loss": 0.0192,
      "step": 2694340
    },
    {
      "epoch": 4.409379234500501,
      "grad_norm": 0.25723689794540405,
      "learning_rate": 1.1890249929165873e-06,
      "loss": 0.0076,
      "step": 2694360
    },
    {
      "epoch": 4.409411964939154,
      "grad_norm": 0.21938633918762207,
      "learning_rate": 1.18895910070307e-06,
      "loss": 0.009,
      "step": 2694380
    },
    {
      "epoch": 4.4094446953778075,
      "grad_norm": 0.08266430348157883,
      "learning_rate": 1.1888932084895527e-06,
      "loss": 0.0116,
      "step": 2694400
    },
    {
      "epoch": 4.409477425816461,
      "grad_norm": 0.16997899115085602,
      "learning_rate": 1.188827316276036e-06,
      "loss": 0.0096,
      "step": 2694420
    },
    {
      "epoch": 4.409510156255114,
      "grad_norm": 0.1565922498703003,
      "learning_rate": 1.1887614240625186e-06,
      "loss": 0.0067,
      "step": 2694440
    },
    {
      "epoch": 4.409542886693767,
      "grad_norm": 0.10428474098443985,
      "learning_rate": 1.1886955318490016e-06,
      "loss": 0.0114,
      "step": 2694460
    },
    {
      "epoch": 4.409575617132421,
      "grad_norm": 0.2687550187110901,
      "learning_rate": 1.1886296396354843e-06,
      "loss": 0.0103,
      "step": 2694480
    },
    {
      "epoch": 4.409608347571075,
      "grad_norm": 0.625918447971344,
      "learning_rate": 1.1885637474219673e-06,
      "loss": 0.0129,
      "step": 2694500
    },
    {
      "epoch": 4.409641078009727,
      "grad_norm": 0.06257610768079758,
      "learning_rate": 1.1884978552084502e-06,
      "loss": 0.0088,
      "step": 2694520
    },
    {
      "epoch": 4.409673808448381,
      "grad_norm": 0.32034552097320557,
      "learning_rate": 1.188431962994933e-06,
      "loss": 0.0109,
      "step": 2694540
    },
    {
      "epoch": 4.4097065388870345,
      "grad_norm": 0.10845422744750977,
      "learning_rate": 1.188366070781416e-06,
      "loss": 0.0115,
      "step": 2694560
    },
    {
      "epoch": 4.409739269325687,
      "grad_norm": 0.36059197783470154,
      "learning_rate": 1.1883001785678987e-06,
      "loss": 0.0062,
      "step": 2694580
    },
    {
      "epoch": 4.409771999764341,
      "grad_norm": 0.22048470377922058,
      "learning_rate": 1.1882342863543816e-06,
      "loss": 0.0059,
      "step": 2694600
    },
    {
      "epoch": 4.409804730202994,
      "grad_norm": 0.5101717114448547,
      "learning_rate": 1.1881683941408646e-06,
      "loss": 0.0091,
      "step": 2694620
    },
    {
      "epoch": 4.409837460641647,
      "grad_norm": 0.3589330315589905,
      "learning_rate": 1.1881025019273473e-06,
      "loss": 0.0097,
      "step": 2694640
    },
    {
      "epoch": 4.409870191080301,
      "grad_norm": 0.3786972463130951,
      "learning_rate": 1.1880366097138303e-06,
      "loss": 0.0085,
      "step": 2694660
    },
    {
      "epoch": 4.409902921518954,
      "grad_norm": 0.30950942635536194,
      "learning_rate": 1.1879707175003132e-06,
      "loss": 0.0051,
      "step": 2694680
    },
    {
      "epoch": 4.409935651957608,
      "grad_norm": 0.21985453367233276,
      "learning_rate": 1.187904825286796e-06,
      "loss": 0.0079,
      "step": 2694700
    },
    {
      "epoch": 4.4099683823962605,
      "grad_norm": 0.21103915572166443,
      "learning_rate": 1.187838933073279e-06,
      "loss": 0.0079,
      "step": 2694720
    },
    {
      "epoch": 4.410001112834914,
      "grad_norm": 0.24181659519672394,
      "learning_rate": 1.1877730408597616e-06,
      "loss": 0.0117,
      "step": 2694740
    },
    {
      "epoch": 4.410033843273568,
      "grad_norm": 0.16608500480651855,
      "learning_rate": 1.1877071486462446e-06,
      "loss": 0.0088,
      "step": 2694760
    },
    {
      "epoch": 4.410066573712221,
      "grad_norm": 0.1209823414683342,
      "learning_rate": 1.1876412564327275e-06,
      "loss": 0.0064,
      "step": 2694780
    },
    {
      "epoch": 4.410099304150874,
      "grad_norm": 0.4377480745315552,
      "learning_rate": 1.1875753642192103e-06,
      "loss": 0.0111,
      "step": 2694800
    },
    {
      "epoch": 4.410132034589528,
      "grad_norm": 0.10939483344554901,
      "learning_rate": 1.1875094720056932e-06,
      "loss": 0.0093,
      "step": 2694820
    },
    {
      "epoch": 4.410164765028181,
      "grad_norm": 0.30060315132141113,
      "learning_rate": 1.187443579792176e-06,
      "loss": 0.0063,
      "step": 2694840
    },
    {
      "epoch": 4.410197495466834,
      "grad_norm": 0.16226421296596527,
      "learning_rate": 1.187377687578659e-06,
      "loss": 0.0077,
      "step": 2694860
    },
    {
      "epoch": 4.4102302259054875,
      "grad_norm": 0.20064865052700043,
      "learning_rate": 1.1873117953651419e-06,
      "loss": 0.0061,
      "step": 2694880
    },
    {
      "epoch": 4.410262956344141,
      "grad_norm": 0.08340156823396683,
      "learning_rate": 1.1872459031516246e-06,
      "loss": 0.0071,
      "step": 2694900
    },
    {
      "epoch": 4.410295686782794,
      "grad_norm": 0.1412534862756729,
      "learning_rate": 1.1871800109381076e-06,
      "loss": 0.0097,
      "step": 2694920
    },
    {
      "epoch": 4.410328417221447,
      "grad_norm": 0.0918704941868782,
      "learning_rate": 1.1871141187245905e-06,
      "loss": 0.0059,
      "step": 2694940
    },
    {
      "epoch": 4.410361147660101,
      "grad_norm": 0.14161866903305054,
      "learning_rate": 1.1870482265110733e-06,
      "loss": 0.0102,
      "step": 2694960
    },
    {
      "epoch": 4.410393878098755,
      "grad_norm": 0.14641419053077698,
      "learning_rate": 1.1869823342975562e-06,
      "loss": 0.0085,
      "step": 2694980
    },
    {
      "epoch": 4.410426608537407,
      "grad_norm": 0.12406525015830994,
      "learning_rate": 1.186916442084039e-06,
      "loss": 0.0085,
      "step": 2695000
    },
    {
      "epoch": 4.410459338976061,
      "grad_norm": 0.06919819116592407,
      "learning_rate": 1.186850549870522e-06,
      "loss": 0.0099,
      "step": 2695020
    },
    {
      "epoch": 4.4104920694147145,
      "grad_norm": 0.474772185087204,
      "learning_rate": 1.1867846576570049e-06,
      "loss": 0.0085,
      "step": 2695040
    },
    {
      "epoch": 4.410524799853368,
      "grad_norm": 0.2243560254573822,
      "learning_rate": 1.1867187654434876e-06,
      "loss": 0.0116,
      "step": 2695060
    },
    {
      "epoch": 4.410557530292021,
      "grad_norm": 0.1264423280954361,
      "learning_rate": 1.1866528732299706e-06,
      "loss": 0.0067,
      "step": 2695080
    },
    {
      "epoch": 4.410590260730674,
      "grad_norm": 0.20372268557548523,
      "learning_rate": 1.1865869810164533e-06,
      "loss": 0.014,
      "step": 2695100
    },
    {
      "epoch": 4.410622991169328,
      "grad_norm": 0.19034945964813232,
      "learning_rate": 1.1865210888029362e-06,
      "loss": 0.0138,
      "step": 2695120
    },
    {
      "epoch": 4.410655721607981,
      "grad_norm": 0.3660264313220978,
      "learning_rate": 1.1864551965894192e-06,
      "loss": 0.0079,
      "step": 2695140
    },
    {
      "epoch": 4.410688452046634,
      "grad_norm": 0.30735403299331665,
      "learning_rate": 1.186389304375902e-06,
      "loss": 0.0081,
      "step": 2695160
    },
    {
      "epoch": 4.410721182485288,
      "grad_norm": 0.30837541818618774,
      "learning_rate": 1.1863234121623849e-06,
      "loss": 0.0128,
      "step": 2695180
    },
    {
      "epoch": 4.410753912923941,
      "grad_norm": 0.2382049560546875,
      "learning_rate": 1.1862575199488676e-06,
      "loss": 0.0075,
      "step": 2695200
    },
    {
      "epoch": 4.410786643362594,
      "grad_norm": Infinity,
      "learning_rate": 1.1861916277353506e-06,
      "loss": 0.0087,
      "step": 2695220
    },
    {
      "epoch": 4.410819373801248,
      "grad_norm": 0.26161789894104004,
      "learning_rate": 1.1861257355218335e-06,
      "loss": 0.0064,
      "step": 2695240
    },
    {
      "epoch": 4.410852104239901,
      "grad_norm": 0.12946641445159912,
      "learning_rate": 1.1860598433083163e-06,
      "loss": 0.0071,
      "step": 2695260
    },
    {
      "epoch": 4.410884834678554,
      "grad_norm": 0.32128193974494934,
      "learning_rate": 1.1859939510947992e-06,
      "loss": 0.0079,
      "step": 2695280
    },
    {
      "epoch": 4.410917565117208,
      "grad_norm": 0.1742032766342163,
      "learning_rate": 1.1859280588812822e-06,
      "loss": 0.0078,
      "step": 2695300
    },
    {
      "epoch": 4.410950295555861,
      "grad_norm": 0.19781121611595154,
      "learning_rate": 1.185862166667765e-06,
      "loss": 0.0086,
      "step": 2695320
    },
    {
      "epoch": 4.410983025994514,
      "grad_norm": 0.5282923579216003,
      "learning_rate": 1.1857962744542479e-06,
      "loss": 0.0195,
      "step": 2695340
    },
    {
      "epoch": 4.411015756433168,
      "grad_norm": 0.9944713115692139,
      "learning_rate": 1.1857303822407306e-06,
      "loss": 0.0109,
      "step": 2695360
    },
    {
      "epoch": 4.411048486871821,
      "grad_norm": 0.08147440105676651,
      "learning_rate": 1.1856644900272136e-06,
      "loss": 0.0064,
      "step": 2695380
    },
    {
      "epoch": 4.411081217310475,
      "grad_norm": 0.10457878559827805,
      "learning_rate": 1.1855985978136965e-06,
      "loss": 0.0064,
      "step": 2695400
    },
    {
      "epoch": 4.4111139477491275,
      "grad_norm": 0.13850592076778412,
      "learning_rate": 1.1855327056001792e-06,
      "loss": 0.0084,
      "step": 2695420
    },
    {
      "epoch": 4.411146678187781,
      "grad_norm": 0.14981809258460999,
      "learning_rate": 1.1854668133866622e-06,
      "loss": 0.0077,
      "step": 2695440
    },
    {
      "epoch": 4.411179408626435,
      "grad_norm": 0.19366098940372467,
      "learning_rate": 1.185400921173145e-06,
      "loss": 0.0116,
      "step": 2695460
    },
    {
      "epoch": 4.411212139065087,
      "grad_norm": 0.2007869929075241,
      "learning_rate": 1.1853350289596279e-06,
      "loss": 0.0078,
      "step": 2695480
    },
    {
      "epoch": 4.411244869503741,
      "grad_norm": 0.09027596563100815,
      "learning_rate": 1.1852691367461108e-06,
      "loss": 0.0082,
      "step": 2695500
    },
    {
      "epoch": 4.4112775999423945,
      "grad_norm": 0.46188217401504517,
      "learning_rate": 1.1852032445325936e-06,
      "loss": 0.0075,
      "step": 2695520
    },
    {
      "epoch": 4.411310330381048,
      "grad_norm": 0.18147122859954834,
      "learning_rate": 1.1851373523190765e-06,
      "loss": 0.0087,
      "step": 2695540
    },
    {
      "epoch": 4.411343060819701,
      "grad_norm": 0.2841951847076416,
      "learning_rate": 1.1850714601055595e-06,
      "loss": 0.0081,
      "step": 2695560
    },
    {
      "epoch": 4.411375791258354,
      "grad_norm": 0.27709534764289856,
      "learning_rate": 1.1850055678920422e-06,
      "loss": 0.0088,
      "step": 2695580
    },
    {
      "epoch": 4.411408521697008,
      "grad_norm": 0.18678243458271027,
      "learning_rate": 1.1849396756785252e-06,
      "loss": 0.0087,
      "step": 2695600
    },
    {
      "epoch": 4.411441252135661,
      "grad_norm": 0.4638694226741791,
      "learning_rate": 1.184873783465008e-06,
      "loss": 0.0144,
      "step": 2695620
    },
    {
      "epoch": 4.411473982574314,
      "grad_norm": 0.21454596519470215,
      "learning_rate": 1.1848078912514909e-06,
      "loss": 0.0169,
      "step": 2695640
    },
    {
      "epoch": 4.411506713012968,
      "grad_norm": 0.07238538563251495,
      "learning_rate": 1.1847419990379738e-06,
      "loss": 0.0105,
      "step": 2695660
    },
    {
      "epoch": 4.4115394434516215,
      "grad_norm": 0.2753142714500427,
      "learning_rate": 1.1846761068244568e-06,
      "loss": 0.0121,
      "step": 2695680
    },
    {
      "epoch": 4.411572173890274,
      "grad_norm": 0.15244793891906738,
      "learning_rate": 1.1846102146109395e-06,
      "loss": 0.0101,
      "step": 2695700
    },
    {
      "epoch": 4.411604904328928,
      "grad_norm": 0.11140358448028564,
      "learning_rate": 1.1845443223974222e-06,
      "loss": 0.0111,
      "step": 2695720
    },
    {
      "epoch": 4.411637634767581,
      "grad_norm": 0.18017034232616425,
      "learning_rate": 1.1844784301839054e-06,
      "loss": 0.0086,
      "step": 2695740
    },
    {
      "epoch": 4.411670365206234,
      "grad_norm": 0.10306936502456665,
      "learning_rate": 1.1844125379703881e-06,
      "loss": 0.0059,
      "step": 2695760
    },
    {
      "epoch": 4.411703095644888,
      "grad_norm": 0.1268351823091507,
      "learning_rate": 1.184346645756871e-06,
      "loss": 0.0071,
      "step": 2695780
    },
    {
      "epoch": 4.411735826083541,
      "grad_norm": 0.13288617134094238,
      "learning_rate": 1.1842807535433538e-06,
      "loss": 0.0092,
      "step": 2695800
    },
    {
      "epoch": 4.411768556522195,
      "grad_norm": 0.3574557900428772,
      "learning_rate": 1.1842148613298368e-06,
      "loss": 0.012,
      "step": 2695820
    },
    {
      "epoch": 4.411801286960848,
      "grad_norm": 0.17247244715690613,
      "learning_rate": 1.1841489691163197e-06,
      "loss": 0.0094,
      "step": 2695840
    },
    {
      "epoch": 4.411834017399501,
      "grad_norm": 0.19589367508888245,
      "learning_rate": 1.1840830769028025e-06,
      "loss": 0.0104,
      "step": 2695860
    },
    {
      "epoch": 4.411866747838155,
      "grad_norm": 0.1811472624540329,
      "learning_rate": 1.1840171846892854e-06,
      "loss": 0.0085,
      "step": 2695880
    },
    {
      "epoch": 4.4118994782768075,
      "grad_norm": 0.11771752685308456,
      "learning_rate": 1.1839512924757682e-06,
      "loss": 0.0063,
      "step": 2695900
    },
    {
      "epoch": 4.411932208715461,
      "grad_norm": 0.17892412841320038,
      "learning_rate": 1.1838854002622511e-06,
      "loss": 0.0073,
      "step": 2695920
    },
    {
      "epoch": 4.411964939154115,
      "grad_norm": 0.16595907509326935,
      "learning_rate": 1.183819508048734e-06,
      "loss": 0.0117,
      "step": 2695940
    },
    {
      "epoch": 4.411997669592768,
      "grad_norm": 0.15386398136615753,
      "learning_rate": 1.1837536158352168e-06,
      "loss": 0.0122,
      "step": 2695960
    },
    {
      "epoch": 4.412030400031421,
      "grad_norm": 0.2543710172176361,
      "learning_rate": 1.1836877236216998e-06,
      "loss": 0.0092,
      "step": 2695980
    },
    {
      "epoch": 4.412063130470075,
      "grad_norm": 0.6443891525268555,
      "learning_rate": 1.1836218314081827e-06,
      "loss": 0.01,
      "step": 2696000
    },
    {
      "epoch": 4.412095860908728,
      "grad_norm": 0.32211020588874817,
      "learning_rate": 1.1835559391946655e-06,
      "loss": 0.0107,
      "step": 2696020
    },
    {
      "epoch": 4.412128591347381,
      "grad_norm": 0.20021960139274597,
      "learning_rate": 1.1834900469811484e-06,
      "loss": 0.0063,
      "step": 2696040
    },
    {
      "epoch": 4.4121613217860345,
      "grad_norm": 0.09592857956886292,
      "learning_rate": 1.1834241547676312e-06,
      "loss": 0.0083,
      "step": 2696060
    },
    {
      "epoch": 4.412194052224688,
      "grad_norm": 0.9128332138061523,
      "learning_rate": 1.183358262554114e-06,
      "loss": 0.0114,
      "step": 2696080
    },
    {
      "epoch": 4.412226782663341,
      "grad_norm": 0.14788725972175598,
      "learning_rate": 1.183292370340597e-06,
      "loss": 0.0059,
      "step": 2696100
    },
    {
      "epoch": 4.412259513101994,
      "grad_norm": 0.19644682109355927,
      "learning_rate": 1.1832264781270798e-06,
      "loss": 0.0106,
      "step": 2696120
    },
    {
      "epoch": 4.412292243540648,
      "grad_norm": 0.09837284684181213,
      "learning_rate": 1.1831605859135627e-06,
      "loss": 0.0092,
      "step": 2696140
    },
    {
      "epoch": 4.412324973979302,
      "grad_norm": 0.1720091551542282,
      "learning_rate": 1.1830946937000455e-06,
      "loss": 0.0096,
      "step": 2696160
    },
    {
      "epoch": 4.412357704417954,
      "grad_norm": 0.5561909675598145,
      "learning_rate": 1.1830288014865284e-06,
      "loss": 0.0108,
      "step": 2696180
    },
    {
      "epoch": 4.412390434856608,
      "grad_norm": 0.4636140763759613,
      "learning_rate": 1.1829629092730114e-06,
      "loss": 0.0114,
      "step": 2696200
    },
    {
      "epoch": 4.4124231652952615,
      "grad_norm": 0.3861018121242523,
      "learning_rate": 1.1828970170594941e-06,
      "loss": 0.0116,
      "step": 2696220
    },
    {
      "epoch": 4.412455895733915,
      "grad_norm": 0.129571795463562,
      "learning_rate": 1.182831124845977e-06,
      "loss": 0.0119,
      "step": 2696240
    },
    {
      "epoch": 4.412488626172568,
      "grad_norm": 0.14882118999958038,
      "learning_rate": 1.18276523263246e-06,
      "loss": 0.0112,
      "step": 2696260
    },
    {
      "epoch": 4.412521356611221,
      "grad_norm": 0.2744408845901489,
      "learning_rate": 1.1826993404189428e-06,
      "loss": 0.0093,
      "step": 2696280
    },
    {
      "epoch": 4.412554087049875,
      "grad_norm": 0.11407763510942459,
      "learning_rate": 1.1826334482054257e-06,
      "loss": 0.0098,
      "step": 2696300
    },
    {
      "epoch": 4.412586817488528,
      "grad_norm": 0.22101081907749176,
      "learning_rate": 1.1825675559919085e-06,
      "loss": 0.0171,
      "step": 2696320
    },
    {
      "epoch": 4.412619547927181,
      "grad_norm": 0.23863273859024048,
      "learning_rate": 1.1825016637783914e-06,
      "loss": 0.0051,
      "step": 2696340
    },
    {
      "epoch": 4.412652278365835,
      "grad_norm": 0.19710449874401093,
      "learning_rate": 1.1824357715648744e-06,
      "loss": 0.0081,
      "step": 2696360
    },
    {
      "epoch": 4.4126850088044876,
      "grad_norm": 0.13229158520698547,
      "learning_rate": 1.182369879351357e-06,
      "loss": 0.0086,
      "step": 2696380
    },
    {
      "epoch": 4.412717739243141,
      "grad_norm": 0.45542478561401367,
      "learning_rate": 1.18230398713784e-06,
      "loss": 0.0106,
      "step": 2696400
    },
    {
      "epoch": 4.412750469681795,
      "grad_norm": 0.40462198853492737,
      "learning_rate": 1.1822380949243228e-06,
      "loss": 0.0061,
      "step": 2696420
    },
    {
      "epoch": 4.412783200120448,
      "grad_norm": 0.11810764670372009,
      "learning_rate": 1.1821722027108057e-06,
      "loss": 0.0087,
      "step": 2696440
    },
    {
      "epoch": 4.412815930559101,
      "grad_norm": 0.3516695499420166,
      "learning_rate": 1.1821063104972887e-06,
      "loss": 0.0074,
      "step": 2696460
    },
    {
      "epoch": 4.412848660997755,
      "grad_norm": 0.3935922384262085,
      "learning_rate": 1.1820404182837714e-06,
      "loss": 0.0112,
      "step": 2696480
    },
    {
      "epoch": 4.412881391436408,
      "grad_norm": 0.26438871026039124,
      "learning_rate": 1.1819745260702544e-06,
      "loss": 0.011,
      "step": 2696500
    },
    {
      "epoch": 4.412914121875062,
      "grad_norm": 0.20873771607875824,
      "learning_rate": 1.1819086338567371e-06,
      "loss": 0.0103,
      "step": 2696520
    },
    {
      "epoch": 4.4129468523137145,
      "grad_norm": 0.18835754692554474,
      "learning_rate": 1.18184274164322e-06,
      "loss": 0.0111,
      "step": 2696540
    },
    {
      "epoch": 4.412979582752368,
      "grad_norm": 0.3141980767250061,
      "learning_rate": 1.181776849429703e-06,
      "loss": 0.0106,
      "step": 2696560
    },
    {
      "epoch": 4.413012313191022,
      "grad_norm": 0.187279611825943,
      "learning_rate": 1.1817109572161858e-06,
      "loss": 0.0121,
      "step": 2696580
    },
    {
      "epoch": 4.413045043629674,
      "grad_norm": 0.0534881092607975,
      "learning_rate": 1.1816450650026687e-06,
      "loss": 0.0087,
      "step": 2696600
    },
    {
      "epoch": 4.413077774068328,
      "grad_norm": 0.7606180906295776,
      "learning_rate": 1.1815791727891517e-06,
      "loss": 0.0099,
      "step": 2696620
    },
    {
      "epoch": 4.413110504506982,
      "grad_norm": 0.10722800344228745,
      "learning_rate": 1.1815132805756344e-06,
      "loss": 0.0087,
      "step": 2696640
    },
    {
      "epoch": 4.413143234945634,
      "grad_norm": 0.16419938206672668,
      "learning_rate": 1.1814473883621174e-06,
      "loss": 0.0077,
      "step": 2696660
    },
    {
      "epoch": 4.413175965384288,
      "grad_norm": 0.2780739367008209,
      "learning_rate": 1.1813814961486001e-06,
      "loss": 0.0145,
      "step": 2696680
    },
    {
      "epoch": 4.4132086958229415,
      "grad_norm": 0.18221381306648254,
      "learning_rate": 1.181315603935083e-06,
      "loss": 0.0091,
      "step": 2696700
    },
    {
      "epoch": 4.413241426261595,
      "grad_norm": 0.11407160758972168,
      "learning_rate": 1.181249711721566e-06,
      "loss": 0.0106,
      "step": 2696720
    },
    {
      "epoch": 4.413274156700248,
      "grad_norm": 0.18343572318553925,
      "learning_rate": 1.1811838195080487e-06,
      "loss": 0.0107,
      "step": 2696740
    },
    {
      "epoch": 4.413306887138901,
      "grad_norm": 0.587300717830658,
      "learning_rate": 1.1811179272945317e-06,
      "loss": 0.0078,
      "step": 2696760
    },
    {
      "epoch": 4.413339617577555,
      "grad_norm": 0.11100917309522629,
      "learning_rate": 1.1810520350810144e-06,
      "loss": 0.0064,
      "step": 2696780
    },
    {
      "epoch": 4.413372348016208,
      "grad_norm": 0.21200166642665863,
      "learning_rate": 1.1809861428674974e-06,
      "loss": 0.0101,
      "step": 2696800
    },
    {
      "epoch": 4.413405078454861,
      "grad_norm": 0.4575185179710388,
      "learning_rate": 1.1809202506539803e-06,
      "loss": 0.011,
      "step": 2696820
    },
    {
      "epoch": 4.413437808893515,
      "grad_norm": 0.09411090612411499,
      "learning_rate": 1.180854358440463e-06,
      "loss": 0.0054,
      "step": 2696840
    },
    {
      "epoch": 4.4134705393321685,
      "grad_norm": 0.33961471915245056,
      "learning_rate": 1.180788466226946e-06,
      "loss": 0.0108,
      "step": 2696860
    },
    {
      "epoch": 4.413503269770821,
      "grad_norm": 0.26552093029022217,
      "learning_rate": 1.180722574013429e-06,
      "loss": 0.0073,
      "step": 2696880
    },
    {
      "epoch": 4.413536000209475,
      "grad_norm": 0.39024990797042847,
      "learning_rate": 1.1806566817999117e-06,
      "loss": 0.0108,
      "step": 2696900
    },
    {
      "epoch": 4.413568730648128,
      "grad_norm": 0.28394803404808044,
      "learning_rate": 1.1805907895863947e-06,
      "loss": 0.0087,
      "step": 2696920
    },
    {
      "epoch": 4.413601461086781,
      "grad_norm": 0.0787888839840889,
      "learning_rate": 1.1805248973728774e-06,
      "loss": 0.0104,
      "step": 2696940
    },
    {
      "epoch": 4.413634191525435,
      "grad_norm": 0.26708289980888367,
      "learning_rate": 1.1804590051593604e-06,
      "loss": 0.0074,
      "step": 2696960
    },
    {
      "epoch": 4.413666921964088,
      "grad_norm": 0.897735059261322,
      "learning_rate": 1.1803931129458433e-06,
      "loss": 0.0134,
      "step": 2696980
    },
    {
      "epoch": 4.413699652402742,
      "grad_norm": 0.14261175692081451,
      "learning_rate": 1.1803272207323263e-06,
      "loss": 0.0117,
      "step": 2697000
    },
    {
      "epoch": 4.413732382841395,
      "grad_norm": 0.1793278157711029,
      "learning_rate": 1.180261328518809e-06,
      "loss": 0.0088,
      "step": 2697020
    },
    {
      "epoch": 4.413765113280048,
      "grad_norm": 0.20143389701843262,
      "learning_rate": 1.1801954363052918e-06,
      "loss": 0.0089,
      "step": 2697040
    },
    {
      "epoch": 4.413797843718702,
      "grad_norm": 0.29829224944114685,
      "learning_rate": 1.180129544091775e-06,
      "loss": 0.0084,
      "step": 2697060
    },
    {
      "epoch": 4.4138305741573545,
      "grad_norm": 0.09342899918556213,
      "learning_rate": 1.1800636518782577e-06,
      "loss": 0.0081,
      "step": 2697080
    },
    {
      "epoch": 4.413863304596008,
      "grad_norm": 0.25224167108535767,
      "learning_rate": 1.1799977596647406e-06,
      "loss": 0.0076,
      "step": 2697100
    },
    {
      "epoch": 4.413896035034662,
      "grad_norm": 0.15431861579418182,
      "learning_rate": 1.1799318674512233e-06,
      "loss": 0.0083,
      "step": 2697120
    },
    {
      "epoch": 4.413928765473315,
      "grad_norm": 0.0370585136115551,
      "learning_rate": 1.1798659752377063e-06,
      "loss": 0.0084,
      "step": 2697140
    },
    {
      "epoch": 4.413961495911968,
      "grad_norm": 0.2380986511707306,
      "learning_rate": 1.1798000830241892e-06,
      "loss": 0.0081,
      "step": 2697160
    },
    {
      "epoch": 4.4139942263506216,
      "grad_norm": 0.20060285925865173,
      "learning_rate": 1.179734190810672e-06,
      "loss": 0.0065,
      "step": 2697180
    },
    {
      "epoch": 4.414026956789275,
      "grad_norm": 0.6658860445022583,
      "learning_rate": 1.179668298597155e-06,
      "loss": 0.0087,
      "step": 2697200
    },
    {
      "epoch": 4.414059687227928,
      "grad_norm": 0.39604756236076355,
      "learning_rate": 1.1796024063836377e-06,
      "loss": 0.0101,
      "step": 2697220
    },
    {
      "epoch": 4.4140924176665814,
      "grad_norm": 0.3597464859485626,
      "learning_rate": 1.1795365141701206e-06,
      "loss": 0.0076,
      "step": 2697240
    },
    {
      "epoch": 4.414125148105235,
      "grad_norm": 0.1508665531873703,
      "learning_rate": 1.1794706219566036e-06,
      "loss": 0.0069,
      "step": 2697260
    },
    {
      "epoch": 4.414157878543889,
      "grad_norm": 0.7023807764053345,
      "learning_rate": 1.1794047297430863e-06,
      "loss": 0.0079,
      "step": 2697280
    },
    {
      "epoch": 4.414190608982541,
      "grad_norm": 0.03958578407764435,
      "learning_rate": 1.1793388375295693e-06,
      "loss": 0.0034,
      "step": 2697300
    },
    {
      "epoch": 4.414223339421195,
      "grad_norm": 0.16400864720344543,
      "learning_rate": 1.1792729453160522e-06,
      "loss": 0.0167,
      "step": 2697320
    },
    {
      "epoch": 4.4142560698598485,
      "grad_norm": 0.11728457361459732,
      "learning_rate": 1.179207053102535e-06,
      "loss": 0.0115,
      "step": 2697340
    },
    {
      "epoch": 4.414288800298501,
      "grad_norm": 0.32699844241142273,
      "learning_rate": 1.179141160889018e-06,
      "loss": 0.0075,
      "step": 2697360
    },
    {
      "epoch": 4.414321530737155,
      "grad_norm": 0.10709276795387268,
      "learning_rate": 1.1790752686755007e-06,
      "loss": 0.0093,
      "step": 2697380
    },
    {
      "epoch": 4.414354261175808,
      "grad_norm": 0.16019876301288605,
      "learning_rate": 1.1790093764619836e-06,
      "loss": 0.0068,
      "step": 2697400
    },
    {
      "epoch": 4.414386991614462,
      "grad_norm": 0.20179885625839233,
      "learning_rate": 1.1789434842484666e-06,
      "loss": 0.0096,
      "step": 2697420
    },
    {
      "epoch": 4.414419722053115,
      "grad_norm": 0.24010662734508514,
      "learning_rate": 1.1788775920349493e-06,
      "loss": 0.0097,
      "step": 2697440
    },
    {
      "epoch": 4.414452452491768,
      "grad_norm": 0.43228161334991455,
      "learning_rate": 1.1788116998214323e-06,
      "loss": 0.0095,
      "step": 2697460
    },
    {
      "epoch": 4.414485182930422,
      "grad_norm": 0.20350104570388794,
      "learning_rate": 1.178745807607915e-06,
      "loss": 0.0114,
      "step": 2697480
    },
    {
      "epoch": 4.414517913369075,
      "grad_norm": 0.22852683067321777,
      "learning_rate": 1.178679915394398e-06,
      "loss": 0.0056,
      "step": 2697500
    },
    {
      "epoch": 4.414550643807728,
      "grad_norm": 0.44873329997062683,
      "learning_rate": 1.1786140231808809e-06,
      "loss": 0.0102,
      "step": 2697520
    },
    {
      "epoch": 4.414583374246382,
      "grad_norm": 0.40440717339515686,
      "learning_rate": 1.1785481309673636e-06,
      "loss": 0.0075,
      "step": 2697540
    },
    {
      "epoch": 4.414616104685035,
      "grad_norm": 0.3233703374862671,
      "learning_rate": 1.1784822387538466e-06,
      "loss": 0.0078,
      "step": 2697560
    },
    {
      "epoch": 4.414648835123688,
      "grad_norm": 0.8746769428253174,
      "learning_rate": 1.1784163465403295e-06,
      "loss": 0.0113,
      "step": 2697580
    },
    {
      "epoch": 4.414681565562342,
      "grad_norm": 0.20467528700828552,
      "learning_rate": 1.1783504543268123e-06,
      "loss": 0.0087,
      "step": 2697600
    },
    {
      "epoch": 4.414714296000995,
      "grad_norm": 0.2462197244167328,
      "learning_rate": 1.1782845621132952e-06,
      "loss": 0.0076,
      "step": 2697620
    },
    {
      "epoch": 4.414747026439648,
      "grad_norm": 0.3576398193836212,
      "learning_rate": 1.178218669899778e-06,
      "loss": 0.0095,
      "step": 2697640
    },
    {
      "epoch": 4.414779756878302,
      "grad_norm": 0.3358035683631897,
      "learning_rate": 1.178152777686261e-06,
      "loss": 0.0066,
      "step": 2697660
    },
    {
      "epoch": 4.414812487316955,
      "grad_norm": 0.36305177211761475,
      "learning_rate": 1.1780868854727439e-06,
      "loss": 0.0104,
      "step": 2697680
    },
    {
      "epoch": 4.414845217755609,
      "grad_norm": 0.2533411979675293,
      "learning_rate": 1.1780209932592266e-06,
      "loss": 0.0109,
      "step": 2697700
    },
    {
      "epoch": 4.4148779481942615,
      "grad_norm": 0.13662078976631165,
      "learning_rate": 1.1779551010457096e-06,
      "loss": 0.0088,
      "step": 2697720
    },
    {
      "epoch": 4.414910678632915,
      "grad_norm": 0.28161296248435974,
      "learning_rate": 1.1778892088321923e-06,
      "loss": 0.0077,
      "step": 2697740
    },
    {
      "epoch": 4.414943409071569,
      "grad_norm": 0.3556438386440277,
      "learning_rate": 1.1778233166186753e-06,
      "loss": 0.0099,
      "step": 2697760
    },
    {
      "epoch": 4.414976139510221,
      "grad_norm": 0.14265896379947662,
      "learning_rate": 1.1777574244051582e-06,
      "loss": 0.0094,
      "step": 2697780
    },
    {
      "epoch": 4.415008869948875,
      "grad_norm": 0.13052310049533844,
      "learning_rate": 1.177691532191641e-06,
      "loss": 0.0075,
      "step": 2697800
    },
    {
      "epoch": 4.415041600387529,
      "grad_norm": 0.15692274272441864,
      "learning_rate": 1.177625639978124e-06,
      "loss": 0.0075,
      "step": 2697820
    },
    {
      "epoch": 4.415074330826181,
      "grad_norm": 0.15538851916790009,
      "learning_rate": 1.1775597477646066e-06,
      "loss": 0.0067,
      "step": 2697840
    },
    {
      "epoch": 4.415107061264835,
      "grad_norm": 0.22867707908153534,
      "learning_rate": 1.1774938555510896e-06,
      "loss": 0.0089,
      "step": 2697860
    },
    {
      "epoch": 4.4151397917034885,
      "grad_norm": 0.3439691960811615,
      "learning_rate": 1.1774279633375725e-06,
      "loss": 0.0108,
      "step": 2697880
    },
    {
      "epoch": 4.415172522142142,
      "grad_norm": 0.3950843811035156,
      "learning_rate": 1.1773620711240553e-06,
      "loss": 0.0187,
      "step": 2697900
    },
    {
      "epoch": 4.415205252580795,
      "grad_norm": 0.24299661815166473,
      "learning_rate": 1.1772961789105382e-06,
      "loss": 0.0113,
      "step": 2697920
    },
    {
      "epoch": 4.415237983019448,
      "grad_norm": 0.2618773281574249,
      "learning_rate": 1.1772302866970212e-06,
      "loss": 0.008,
      "step": 2697940
    },
    {
      "epoch": 4.415270713458102,
      "grad_norm": 0.44611603021621704,
      "learning_rate": 1.177164394483504e-06,
      "loss": 0.0077,
      "step": 2697960
    },
    {
      "epoch": 4.4153034438967556,
      "grad_norm": 0.2096695452928543,
      "learning_rate": 1.1770985022699869e-06,
      "loss": 0.0118,
      "step": 2697980
    },
    {
      "epoch": 4.415336174335408,
      "grad_norm": 0.15268297493457794,
      "learning_rate": 1.1770326100564696e-06,
      "loss": 0.0103,
      "step": 2698000
    },
    {
      "epoch": 4.415368904774062,
      "grad_norm": 0.13121819496154785,
      "learning_rate": 1.1769667178429526e-06,
      "loss": 0.0087,
      "step": 2698020
    },
    {
      "epoch": 4.4154016352127154,
      "grad_norm": 0.2694849669933319,
      "learning_rate": 1.1769008256294355e-06,
      "loss": 0.0122,
      "step": 2698040
    },
    {
      "epoch": 4.415434365651368,
      "grad_norm": 0.1918911188840866,
      "learning_rate": 1.1768349334159183e-06,
      "loss": 0.0065,
      "step": 2698060
    },
    {
      "epoch": 4.415467096090022,
      "grad_norm": 0.28292545676231384,
      "learning_rate": 1.1767690412024012e-06,
      "loss": 0.0107,
      "step": 2698080
    },
    {
      "epoch": 4.415499826528675,
      "grad_norm": 0.3947354853153229,
      "learning_rate": 1.176703148988884e-06,
      "loss": 0.0083,
      "step": 2698100
    },
    {
      "epoch": 4.415532556967328,
      "grad_norm": 0.3620864450931549,
      "learning_rate": 1.176637256775367e-06,
      "loss": 0.0165,
      "step": 2698120
    },
    {
      "epoch": 4.415565287405982,
      "grad_norm": 0.2018805295228958,
      "learning_rate": 1.1765713645618498e-06,
      "loss": 0.0075,
      "step": 2698140
    },
    {
      "epoch": 4.415598017844635,
      "grad_norm": 0.09847333282232285,
      "learning_rate": 1.1765054723483326e-06,
      "loss": 0.0161,
      "step": 2698160
    },
    {
      "epoch": 4.415630748283289,
      "grad_norm": 0.1794617474079132,
      "learning_rate": 1.1764395801348155e-06,
      "loss": 0.0127,
      "step": 2698180
    },
    {
      "epoch": 4.4156634787219415,
      "grad_norm": 0.19694167375564575,
      "learning_rate": 1.1763736879212985e-06,
      "loss": 0.0088,
      "step": 2698200
    },
    {
      "epoch": 4.415696209160595,
      "grad_norm": 0.13482306897640228,
      "learning_rate": 1.1763077957077812e-06,
      "loss": 0.0068,
      "step": 2698220
    },
    {
      "epoch": 4.415728939599249,
      "grad_norm": 0.16091401875019073,
      "learning_rate": 1.1762419034942642e-06,
      "loss": 0.0086,
      "step": 2698240
    },
    {
      "epoch": 4.415761670037901,
      "grad_norm": 0.2362547665834427,
      "learning_rate": 1.176176011280747e-06,
      "loss": 0.0085,
      "step": 2698260
    },
    {
      "epoch": 4.415794400476555,
      "grad_norm": 0.35744762420654297,
      "learning_rate": 1.1761101190672299e-06,
      "loss": 0.0092,
      "step": 2698280
    },
    {
      "epoch": 4.415827130915209,
      "grad_norm": 0.13886426389217377,
      "learning_rate": 1.1760442268537128e-06,
      "loss": 0.0074,
      "step": 2698300
    },
    {
      "epoch": 4.415859861353862,
      "grad_norm": 0.20391707122325897,
      "learning_rate": 1.1759783346401958e-06,
      "loss": 0.0074,
      "step": 2698320
    },
    {
      "epoch": 4.415892591792515,
      "grad_norm": 0.11715307086706161,
      "learning_rate": 1.1759124424266785e-06,
      "loss": 0.0115,
      "step": 2698340
    },
    {
      "epoch": 4.4159253222311685,
      "grad_norm": 0.40610170364379883,
      "learning_rate": 1.1758465502131613e-06,
      "loss": 0.0095,
      "step": 2698360
    },
    {
      "epoch": 4.415958052669822,
      "grad_norm": 0.7417101860046387,
      "learning_rate": 1.1757806579996444e-06,
      "loss": 0.0097,
      "step": 2698380
    },
    {
      "epoch": 4.415990783108475,
      "grad_norm": 0.06367681920528412,
      "learning_rate": 1.1757147657861272e-06,
      "loss": 0.0092,
      "step": 2698400
    },
    {
      "epoch": 4.416023513547128,
      "grad_norm": 0.09650950878858566,
      "learning_rate": 1.1756488735726101e-06,
      "loss": 0.0063,
      "step": 2698420
    },
    {
      "epoch": 4.416056243985782,
      "grad_norm": 0.3235473036766052,
      "learning_rate": 1.1755829813590929e-06,
      "loss": 0.0063,
      "step": 2698440
    },
    {
      "epoch": 4.416088974424436,
      "grad_norm": 0.1804923713207245,
      "learning_rate": 1.1755170891455758e-06,
      "loss": 0.0084,
      "step": 2698460
    },
    {
      "epoch": 4.416121704863088,
      "grad_norm": 0.2987454831600189,
      "learning_rate": 1.1754511969320588e-06,
      "loss": 0.013,
      "step": 2698480
    },
    {
      "epoch": 4.416154435301742,
      "grad_norm": 0.3811657428741455,
      "learning_rate": 1.1753853047185415e-06,
      "loss": 0.0095,
      "step": 2698500
    },
    {
      "epoch": 4.4161871657403955,
      "grad_norm": 0.36615580320358276,
      "learning_rate": 1.1753194125050244e-06,
      "loss": 0.0068,
      "step": 2698520
    },
    {
      "epoch": 4.416219896179048,
      "grad_norm": 0.26409274339675903,
      "learning_rate": 1.1752535202915072e-06,
      "loss": 0.0103,
      "step": 2698540
    },
    {
      "epoch": 4.416252626617702,
      "grad_norm": 0.10428211092948914,
      "learning_rate": 1.1751876280779901e-06,
      "loss": 0.0116,
      "step": 2698560
    },
    {
      "epoch": 4.416285357056355,
      "grad_norm": 0.4644670784473419,
      "learning_rate": 1.175121735864473e-06,
      "loss": 0.0072,
      "step": 2698580
    },
    {
      "epoch": 4.416318087495009,
      "grad_norm": 0.2826478183269501,
      "learning_rate": 1.1750558436509558e-06,
      "loss": 0.0093,
      "step": 2698600
    },
    {
      "epoch": 4.416350817933662,
      "grad_norm": 0.5219681262969971,
      "learning_rate": 1.1749899514374388e-06,
      "loss": 0.0116,
      "step": 2698620
    },
    {
      "epoch": 4.416383548372315,
      "grad_norm": 0.12731775641441345,
      "learning_rate": 1.1749240592239217e-06,
      "loss": 0.0091,
      "step": 2698640
    },
    {
      "epoch": 4.416416278810969,
      "grad_norm": 0.42595455050468445,
      "learning_rate": 1.1748581670104045e-06,
      "loss": 0.0099,
      "step": 2698660
    },
    {
      "epoch": 4.416449009249622,
      "grad_norm": 0.1546032726764679,
      "learning_rate": 1.1747922747968874e-06,
      "loss": 0.0093,
      "step": 2698680
    },
    {
      "epoch": 4.416481739688275,
      "grad_norm": 0.16146568953990936,
      "learning_rate": 1.1747263825833702e-06,
      "loss": 0.013,
      "step": 2698700
    },
    {
      "epoch": 4.416514470126929,
      "grad_norm": 0.10505197942256927,
      "learning_rate": 1.1746604903698531e-06,
      "loss": 0.0129,
      "step": 2698720
    },
    {
      "epoch": 4.416547200565582,
      "grad_norm": 0.18107202649116516,
      "learning_rate": 1.174594598156336e-06,
      "loss": 0.0073,
      "step": 2698740
    },
    {
      "epoch": 4.416579931004235,
      "grad_norm": 0.1413949579000473,
      "learning_rate": 1.1745287059428188e-06,
      "loss": 0.0059,
      "step": 2698760
    },
    {
      "epoch": 4.416612661442889,
      "grad_norm": 0.2622201442718506,
      "learning_rate": 1.1744628137293018e-06,
      "loss": 0.0079,
      "step": 2698780
    },
    {
      "epoch": 4.416645391881542,
      "grad_norm": 0.09613332897424698,
      "learning_rate": 1.1743969215157845e-06,
      "loss": 0.0097,
      "step": 2698800
    },
    {
      "epoch": 4.416678122320195,
      "grad_norm": 0.4690084457397461,
      "learning_rate": 1.1743310293022674e-06,
      "loss": 0.007,
      "step": 2698820
    },
    {
      "epoch": 4.416710852758849,
      "grad_norm": 0.5664509534835815,
      "learning_rate": 1.1742651370887504e-06,
      "loss": 0.0214,
      "step": 2698840
    },
    {
      "epoch": 4.416743583197502,
      "grad_norm": 0.4446832537651062,
      "learning_rate": 1.1741992448752331e-06,
      "loss": 0.0069,
      "step": 2698860
    },
    {
      "epoch": 4.416776313636156,
      "grad_norm": 0.1851148158311844,
      "learning_rate": 1.174133352661716e-06,
      "loss": 0.0119,
      "step": 2698880
    },
    {
      "epoch": 4.4168090440748085,
      "grad_norm": 0.07595918327569962,
      "learning_rate": 1.174067460448199e-06,
      "loss": 0.0064,
      "step": 2698900
    },
    {
      "epoch": 4.416841774513462,
      "grad_norm": 0.13148120045661926,
      "learning_rate": 1.1740015682346818e-06,
      "loss": 0.0083,
      "step": 2698920
    },
    {
      "epoch": 4.416874504952116,
      "grad_norm": 0.29652315378189087,
      "learning_rate": 1.1739356760211647e-06,
      "loss": 0.0099,
      "step": 2698940
    },
    {
      "epoch": 4.416907235390768,
      "grad_norm": 0.4530346095561981,
      "learning_rate": 1.1738697838076475e-06,
      "loss": 0.009,
      "step": 2698960
    },
    {
      "epoch": 4.416939965829422,
      "grad_norm": 0.07742294669151306,
      "learning_rate": 1.1738038915941304e-06,
      "loss": 0.0102,
      "step": 2698980
    },
    {
      "epoch": 4.4169726962680755,
      "grad_norm": 0.17912894487380981,
      "learning_rate": 1.1737379993806134e-06,
      "loss": 0.0067,
      "step": 2699000
    },
    {
      "epoch": 4.417005426706729,
      "grad_norm": 0.04093894362449646,
      "learning_rate": 1.1736721071670961e-06,
      "loss": 0.0063,
      "step": 2699020
    },
    {
      "epoch": 4.417038157145382,
      "grad_norm": 0.10488045960664749,
      "learning_rate": 1.173606214953579e-06,
      "loss": 0.0088,
      "step": 2699040
    },
    {
      "epoch": 4.417070887584035,
      "grad_norm": 0.19589102268218994,
      "learning_rate": 1.1735403227400618e-06,
      "loss": 0.0056,
      "step": 2699060
    },
    {
      "epoch": 4.417103618022689,
      "grad_norm": 0.09677908569574356,
      "learning_rate": 1.1734744305265448e-06,
      "loss": 0.0118,
      "step": 2699080
    },
    {
      "epoch": 4.417136348461342,
      "grad_norm": 0.0878569483757019,
      "learning_rate": 1.1734085383130277e-06,
      "loss": 0.0084,
      "step": 2699100
    },
    {
      "epoch": 4.417169078899995,
      "grad_norm": 0.5910446643829346,
      "learning_rate": 1.1733426460995104e-06,
      "loss": 0.0112,
      "step": 2699120
    },
    {
      "epoch": 4.417201809338649,
      "grad_norm": 0.2124592661857605,
      "learning_rate": 1.1732767538859934e-06,
      "loss": 0.0059,
      "step": 2699140
    },
    {
      "epoch": 4.4172345397773025,
      "grad_norm": 0.31397584080696106,
      "learning_rate": 1.1732108616724761e-06,
      "loss": 0.008,
      "step": 2699160
    },
    {
      "epoch": 4.417267270215955,
      "grad_norm": 0.04260013625025749,
      "learning_rate": 1.173144969458959e-06,
      "loss": 0.009,
      "step": 2699180
    },
    {
      "epoch": 4.417300000654609,
      "grad_norm": 0.37389057874679565,
      "learning_rate": 1.173079077245442e-06,
      "loss": 0.0129,
      "step": 2699200
    },
    {
      "epoch": 4.417332731093262,
      "grad_norm": 0.49892523884773254,
      "learning_rate": 1.1730131850319248e-06,
      "loss": 0.0134,
      "step": 2699220
    },
    {
      "epoch": 4.417365461531915,
      "grad_norm": 0.26707252860069275,
      "learning_rate": 1.1729472928184077e-06,
      "loss": 0.0115,
      "step": 2699240
    },
    {
      "epoch": 4.417398191970569,
      "grad_norm": 0.3517119288444519,
      "learning_rate": 1.1728814006048907e-06,
      "loss": 0.0143,
      "step": 2699260
    },
    {
      "epoch": 4.417430922409222,
      "grad_norm": 0.6618213057518005,
      "learning_rate": 1.1728155083913734e-06,
      "loss": 0.01,
      "step": 2699280
    },
    {
      "epoch": 4.417463652847875,
      "grad_norm": 0.3487141728401184,
      "learning_rate": 1.1727496161778564e-06,
      "loss": 0.0116,
      "step": 2699300
    },
    {
      "epoch": 4.417496383286529,
      "grad_norm": 0.24326255917549133,
      "learning_rate": 1.1726837239643391e-06,
      "loss": 0.0089,
      "step": 2699320
    },
    {
      "epoch": 4.417529113725182,
      "grad_norm": 0.2221618890762329,
      "learning_rate": 1.172617831750822e-06,
      "loss": 0.0077,
      "step": 2699340
    },
    {
      "epoch": 4.417561844163836,
      "grad_norm": 0.12964002788066864,
      "learning_rate": 1.172551939537305e-06,
      "loss": 0.009,
      "step": 2699360
    },
    {
      "epoch": 4.4175945746024885,
      "grad_norm": 0.056219037622213364,
      "learning_rate": 1.1724860473237878e-06,
      "loss": 0.0093,
      "step": 2699380
    },
    {
      "epoch": 4.417627305041142,
      "grad_norm": 0.3154914975166321,
      "learning_rate": 1.1724201551102707e-06,
      "loss": 0.0097,
      "step": 2699400
    },
    {
      "epoch": 4.417660035479796,
      "grad_norm": 0.18319720029830933,
      "learning_rate": 1.1723542628967535e-06,
      "loss": 0.0099,
      "step": 2699420
    },
    {
      "epoch": 4.417692765918449,
      "grad_norm": 0.36656880378723145,
      "learning_rate": 1.1722883706832364e-06,
      "loss": 0.0118,
      "step": 2699440
    },
    {
      "epoch": 4.417725496357102,
      "grad_norm": 0.47881773114204407,
      "learning_rate": 1.1722224784697194e-06,
      "loss": 0.0076,
      "step": 2699460
    },
    {
      "epoch": 4.417758226795756,
      "grad_norm": 0.1553255319595337,
      "learning_rate": 1.172156586256202e-06,
      "loss": 0.0079,
      "step": 2699480
    },
    {
      "epoch": 4.417790957234409,
      "grad_norm": 0.8435018062591553,
      "learning_rate": 1.172090694042685e-06,
      "loss": 0.0109,
      "step": 2699500
    },
    {
      "epoch": 4.417823687673062,
      "grad_norm": 0.3621063232421875,
      "learning_rate": 1.172024801829168e-06,
      "loss": 0.0077,
      "step": 2699520
    },
    {
      "epoch": 4.4178564181117155,
      "grad_norm": 0.18387973308563232,
      "learning_rate": 1.1719589096156507e-06,
      "loss": 0.0076,
      "step": 2699540
    },
    {
      "epoch": 4.417889148550369,
      "grad_norm": 0.3200840950012207,
      "learning_rate": 1.1718930174021337e-06,
      "loss": 0.0099,
      "step": 2699560
    },
    {
      "epoch": 4.417921878989022,
      "grad_norm": 0.07868938148021698,
      "learning_rate": 1.1718271251886164e-06,
      "loss": 0.0103,
      "step": 2699580
    },
    {
      "epoch": 4.417954609427675,
      "grad_norm": 0.21703015267848969,
      "learning_rate": 1.1717612329750994e-06,
      "loss": 0.0096,
      "step": 2699600
    },
    {
      "epoch": 4.417987339866329,
      "grad_norm": 0.36564207077026367,
      "learning_rate": 1.1716953407615823e-06,
      "loss": 0.0068,
      "step": 2699620
    },
    {
      "epoch": 4.418020070304983,
      "grad_norm": 0.12999972701072693,
      "learning_rate": 1.1716294485480653e-06,
      "loss": 0.0071,
      "step": 2699640
    },
    {
      "epoch": 4.418052800743635,
      "grad_norm": 0.29572367668151855,
      "learning_rate": 1.171563556334548e-06,
      "loss": 0.0107,
      "step": 2699660
    },
    {
      "epoch": 4.418085531182289,
      "grad_norm": 0.34631603956222534,
      "learning_rate": 1.1714976641210308e-06,
      "loss": 0.0084,
      "step": 2699680
    },
    {
      "epoch": 4.4181182616209425,
      "grad_norm": 0.1191139742732048,
      "learning_rate": 1.171431771907514e-06,
      "loss": 0.0098,
      "step": 2699700
    },
    {
      "epoch": 4.418150992059596,
      "grad_norm": 0.06952661275863647,
      "learning_rate": 1.1713658796939967e-06,
      "loss": 0.0096,
      "step": 2699720
    },
    {
      "epoch": 4.418183722498249,
      "grad_norm": 0.029164765030145645,
      "learning_rate": 1.1712999874804796e-06,
      "loss": 0.0076,
      "step": 2699740
    },
    {
      "epoch": 4.418216452936902,
      "grad_norm": 0.16722938418388367,
      "learning_rate": 1.1712340952669624e-06,
      "loss": 0.0086,
      "step": 2699760
    },
    {
      "epoch": 4.418249183375556,
      "grad_norm": 0.21247448027133942,
      "learning_rate": 1.1711682030534453e-06,
      "loss": 0.0077,
      "step": 2699780
    },
    {
      "epoch": 4.418281913814209,
      "grad_norm": 0.24330046772956848,
      "learning_rate": 1.1711023108399283e-06,
      "loss": 0.0129,
      "step": 2699800
    },
    {
      "epoch": 4.418314644252862,
      "grad_norm": 0.15437282621860504,
      "learning_rate": 1.171036418626411e-06,
      "loss": 0.0111,
      "step": 2699820
    },
    {
      "epoch": 4.418347374691516,
      "grad_norm": 0.12493103742599487,
      "learning_rate": 1.170970526412894e-06,
      "loss": 0.0066,
      "step": 2699840
    },
    {
      "epoch": 4.4183801051301685,
      "grad_norm": 0.11528737843036652,
      "learning_rate": 1.1709046341993767e-06,
      "loss": 0.0086,
      "step": 2699860
    },
    {
      "epoch": 4.418412835568822,
      "grad_norm": 0.15675778687000275,
      "learning_rate": 1.1708387419858596e-06,
      "loss": 0.0133,
      "step": 2699880
    },
    {
      "epoch": 4.418445566007476,
      "grad_norm": 0.07822974771261215,
      "learning_rate": 1.1707728497723426e-06,
      "loss": 0.0087,
      "step": 2699900
    },
    {
      "epoch": 4.418478296446129,
      "grad_norm": 0.6773834228515625,
      "learning_rate": 1.1707069575588253e-06,
      "loss": 0.0115,
      "step": 2699920
    },
    {
      "epoch": 4.418511026884782,
      "grad_norm": 0.07288488000631332,
      "learning_rate": 1.1706410653453083e-06,
      "loss": 0.014,
      "step": 2699940
    },
    {
      "epoch": 4.418543757323436,
      "grad_norm": 0.28398385643959045,
      "learning_rate": 1.1705751731317912e-06,
      "loss": 0.0082,
      "step": 2699960
    },
    {
      "epoch": 4.418576487762089,
      "grad_norm": 0.07050122320652008,
      "learning_rate": 1.170509280918274e-06,
      "loss": 0.0067,
      "step": 2699980
    },
    {
      "epoch": 4.418609218200742,
      "grad_norm": 0.07975517958402634,
      "learning_rate": 1.170443388704757e-06,
      "loss": 0.0138,
      "step": 2700000
    },
    {
      "epoch": 4.418609218200742,
      "eval_loss": 0.005889034830033779,
      "eval_runtime": 6488.0747,
      "eval_samples_per_second": 158.423,
      "eval_steps_per_second": 15.842,
      "eval_sts-dev_pearson_cosine": 0.9866720286521835,
      "eval_sts-dev_spearman_cosine": 0.8966148060980689,
      "step": 2700000
    },
    {
      "epoch": 4.4186419486393955,
      "grad_norm": 0.5297145843505859,
      "learning_rate": 1.1703774964912397e-06,
      "loss": 0.0077,
      "step": 2700020
    },
    {
      "epoch": 4.418674679078049,
      "grad_norm": 0.15269272029399872,
      "learning_rate": 1.1703116042777226e-06,
      "loss": 0.0063,
      "step": 2700040
    },
    {
      "epoch": 4.418707409516703,
      "grad_norm": 0.5325776934623718,
      "learning_rate": 1.1702457120642056e-06,
      "loss": 0.0107,
      "step": 2700060
    },
    {
      "epoch": 4.418740139955355,
      "grad_norm": 0.15652136504650116,
      "learning_rate": 1.1701798198506883e-06,
      "loss": 0.0065,
      "step": 2700080
    },
    {
      "epoch": 4.418772870394009,
      "grad_norm": 0.22590678930282593,
      "learning_rate": 1.1701139276371713e-06,
      "loss": 0.0083,
      "step": 2700100
    },
    {
      "epoch": 4.418805600832663,
      "grad_norm": 0.26260989904403687,
      "learning_rate": 1.170048035423654e-06,
      "loss": 0.0099,
      "step": 2700120
    },
    {
      "epoch": 4.418838331271315,
      "grad_norm": 0.2593151032924652,
      "learning_rate": 1.169982143210137e-06,
      "loss": 0.0085,
      "step": 2700140
    },
    {
      "epoch": 4.418871061709969,
      "grad_norm": 0.1548573225736618,
      "learning_rate": 1.16991625099662e-06,
      "loss": 0.0091,
      "step": 2700160
    },
    {
      "epoch": 4.4189037921486225,
      "grad_norm": 0.1353396475315094,
      "learning_rate": 1.1698503587831026e-06,
      "loss": 0.0114,
      "step": 2700180
    },
    {
      "epoch": 4.418936522587276,
      "grad_norm": 0.09195762872695923,
      "learning_rate": 1.1697844665695856e-06,
      "loss": 0.0072,
      "step": 2700200
    },
    {
      "epoch": 4.418969253025929,
      "grad_norm": 0.6487134695053101,
      "learning_rate": 1.1697185743560685e-06,
      "loss": 0.0082,
      "step": 2700220
    },
    {
      "epoch": 4.419001983464582,
      "grad_norm": 0.2792055606842041,
      "learning_rate": 1.1696526821425513e-06,
      "loss": 0.008,
      "step": 2700240
    },
    {
      "epoch": 4.419034713903236,
      "grad_norm": 0.16125436127185822,
      "learning_rate": 1.1695867899290342e-06,
      "loss": 0.008,
      "step": 2700260
    },
    {
      "epoch": 4.419067444341889,
      "grad_norm": 0.16004955768585205,
      "learning_rate": 1.169520897715517e-06,
      "loss": 0.0074,
      "step": 2700280
    },
    {
      "epoch": 4.419100174780542,
      "grad_norm": 0.16310620307922363,
      "learning_rate": 1.169455005502e-06,
      "loss": 0.0089,
      "step": 2700300
    },
    {
      "epoch": 4.419132905219196,
      "grad_norm": 0.08884573727846146,
      "learning_rate": 1.1693891132884829e-06,
      "loss": 0.0098,
      "step": 2700320
    },
    {
      "epoch": 4.4191656356578495,
      "grad_norm": 0.2783878445625305,
      "learning_rate": 1.1693232210749656e-06,
      "loss": 0.0071,
      "step": 2700340
    },
    {
      "epoch": 4.419198366096502,
      "grad_norm": 0.1593024730682373,
      "learning_rate": 1.1692573288614486e-06,
      "loss": 0.0119,
      "step": 2700360
    },
    {
      "epoch": 4.419231096535156,
      "grad_norm": 0.17764979600906372,
      "learning_rate": 1.1691914366479313e-06,
      "loss": 0.0082,
      "step": 2700380
    },
    {
      "epoch": 4.419263826973809,
      "grad_norm": 0.20770326256752014,
      "learning_rate": 1.1691255444344143e-06,
      "loss": 0.0098,
      "step": 2700400
    },
    {
      "epoch": 4.419296557412462,
      "grad_norm": 0.5424600839614868,
      "learning_rate": 1.1690596522208972e-06,
      "loss": 0.0085,
      "step": 2700420
    },
    {
      "epoch": 4.419329287851116,
      "grad_norm": 0.18277566134929657,
      "learning_rate": 1.16899376000738e-06,
      "loss": 0.0113,
      "step": 2700440
    },
    {
      "epoch": 4.419362018289769,
      "grad_norm": 0.2971656918525696,
      "learning_rate": 1.168927867793863e-06,
      "loss": 0.0085,
      "step": 2700460
    },
    {
      "epoch": 4.419394748728423,
      "grad_norm": 0.3115695118904114,
      "learning_rate": 1.1688619755803456e-06,
      "loss": 0.0066,
      "step": 2700480
    },
    {
      "epoch": 4.419427479167076,
      "grad_norm": 0.11085252463817596,
      "learning_rate": 1.1687960833668286e-06,
      "loss": 0.0067,
      "step": 2700500
    },
    {
      "epoch": 4.419460209605729,
      "grad_norm": 0.222184419631958,
      "learning_rate": 1.1687301911533115e-06,
      "loss": 0.0063,
      "step": 2700520
    },
    {
      "epoch": 4.419492940044383,
      "grad_norm": 0.22915129363536835,
      "learning_rate": 1.1686642989397943e-06,
      "loss": 0.0091,
      "step": 2700540
    },
    {
      "epoch": 4.4195256704830355,
      "grad_norm": 0.15471868216991425,
      "learning_rate": 1.1685984067262772e-06,
      "loss": 0.0081,
      "step": 2700560
    },
    {
      "epoch": 4.419558400921689,
      "grad_norm": 0.3654625713825226,
      "learning_rate": 1.1685325145127602e-06,
      "loss": 0.0059,
      "step": 2700580
    },
    {
      "epoch": 4.419591131360343,
      "grad_norm": 0.13391198217868805,
      "learning_rate": 1.168466622299243e-06,
      "loss": 0.0052,
      "step": 2700600
    },
    {
      "epoch": 4.419623861798996,
      "grad_norm": 0.2723800241947174,
      "learning_rate": 1.1684007300857259e-06,
      "loss": 0.0091,
      "step": 2700620
    },
    {
      "epoch": 4.419656592237649,
      "grad_norm": 0.8183175325393677,
      "learning_rate": 1.1683348378722086e-06,
      "loss": 0.0089,
      "step": 2700640
    },
    {
      "epoch": 4.4196893226763025,
      "grad_norm": 0.29368165135383606,
      "learning_rate": 1.1682689456586916e-06,
      "loss": 0.0087,
      "step": 2700660
    },
    {
      "epoch": 4.419722053114956,
      "grad_norm": 0.4449767768383026,
      "learning_rate": 1.1682030534451745e-06,
      "loss": 0.0052,
      "step": 2700680
    },
    {
      "epoch": 4.419754783553609,
      "grad_norm": 0.19779929518699646,
      "learning_rate": 1.1681371612316573e-06,
      "loss": 0.0093,
      "step": 2700700
    },
    {
      "epoch": 4.419787513992262,
      "grad_norm": 0.2295062392950058,
      "learning_rate": 1.1680712690181402e-06,
      "loss": 0.0111,
      "step": 2700720
    },
    {
      "epoch": 4.419820244430916,
      "grad_norm": 0.27999261021614075,
      "learning_rate": 1.168005376804623e-06,
      "loss": 0.0094,
      "step": 2700740
    },
    {
      "epoch": 4.419852974869569,
      "grad_norm": 0.30772969126701355,
      "learning_rate": 1.167939484591106e-06,
      "loss": 0.0079,
      "step": 2700760
    },
    {
      "epoch": 4.419885705308222,
      "grad_norm": 0.07394464313983917,
      "learning_rate": 1.1678735923775889e-06,
      "loss": 0.0093,
      "step": 2700780
    },
    {
      "epoch": 4.419918435746876,
      "grad_norm": 0.1214461475610733,
      "learning_rate": 1.1678077001640716e-06,
      "loss": 0.011,
      "step": 2700800
    },
    {
      "epoch": 4.4199511661855295,
      "grad_norm": 0.11535327881574631,
      "learning_rate": 1.1677418079505546e-06,
      "loss": 0.0057,
      "step": 2700820
    },
    {
      "epoch": 4.419983896624182,
      "grad_norm": 1.585950493812561,
      "learning_rate": 1.1676759157370375e-06,
      "loss": 0.0068,
      "step": 2700840
    },
    {
      "epoch": 4.420016627062836,
      "grad_norm": 0.3055320382118225,
      "learning_rate": 1.1676100235235202e-06,
      "loss": 0.01,
      "step": 2700860
    },
    {
      "epoch": 4.420049357501489,
      "grad_norm": 0.03600439429283142,
      "learning_rate": 1.1675441313100032e-06,
      "loss": 0.0085,
      "step": 2700880
    },
    {
      "epoch": 4.420082087940143,
      "grad_norm": 0.24265366792678833,
      "learning_rate": 1.167478239096486e-06,
      "loss": 0.0071,
      "step": 2700900
    },
    {
      "epoch": 4.420114818378796,
      "grad_norm": 0.4846067428588867,
      "learning_rate": 1.1674123468829689e-06,
      "loss": 0.0085,
      "step": 2700920
    },
    {
      "epoch": 4.420147548817449,
      "grad_norm": 0.31028512120246887,
      "learning_rate": 1.1673464546694518e-06,
      "loss": 0.0098,
      "step": 2700940
    },
    {
      "epoch": 4.420180279256103,
      "grad_norm": 0.2713164687156677,
      "learning_rate": 1.1672805624559348e-06,
      "loss": 0.0102,
      "step": 2700960
    },
    {
      "epoch": 4.420213009694756,
      "grad_norm": 0.17846965789794922,
      "learning_rate": 1.1672146702424175e-06,
      "loss": 0.0111,
      "step": 2700980
    },
    {
      "epoch": 4.420245740133409,
      "grad_norm": 0.6400724649429321,
      "learning_rate": 1.1671487780289003e-06,
      "loss": 0.0101,
      "step": 2701000
    },
    {
      "epoch": 4.420278470572063,
      "grad_norm": 0.23398064076900482,
      "learning_rate": 1.1670828858153834e-06,
      "loss": 0.0096,
      "step": 2701020
    },
    {
      "epoch": 4.4203112010107155,
      "grad_norm": 0.3682302236557007,
      "learning_rate": 1.1670169936018662e-06,
      "loss": 0.0118,
      "step": 2701040
    },
    {
      "epoch": 4.420343931449369,
      "grad_norm": 0.10656028985977173,
      "learning_rate": 1.1669511013883491e-06,
      "loss": 0.0077,
      "step": 2701060
    },
    {
      "epoch": 4.420376661888023,
      "grad_norm": 0.18116967380046844,
      "learning_rate": 1.1668852091748319e-06,
      "loss": 0.0124,
      "step": 2701080
    },
    {
      "epoch": 4.420409392326676,
      "grad_norm": 0.34345874190330505,
      "learning_rate": 1.1668193169613148e-06,
      "loss": 0.0117,
      "step": 2701100
    },
    {
      "epoch": 4.420442122765329,
      "grad_norm": 0.11062581092119217,
      "learning_rate": 1.1667534247477978e-06,
      "loss": 0.0096,
      "step": 2701120
    },
    {
      "epoch": 4.420474853203983,
      "grad_norm": 0.33718934655189514,
      "learning_rate": 1.1666875325342805e-06,
      "loss": 0.0104,
      "step": 2701140
    },
    {
      "epoch": 4.420507583642636,
      "grad_norm": 0.48310205340385437,
      "learning_rate": 1.1666216403207635e-06,
      "loss": 0.0104,
      "step": 2701160
    },
    {
      "epoch": 4.42054031408129,
      "grad_norm": 0.21601195633411407,
      "learning_rate": 1.1665557481072462e-06,
      "loss": 0.008,
      "step": 2701180
    },
    {
      "epoch": 4.4205730445199425,
      "grad_norm": 0.18865413963794708,
      "learning_rate": 1.1664898558937291e-06,
      "loss": 0.0067,
      "step": 2701200
    },
    {
      "epoch": 4.420605774958596,
      "grad_norm": 0.503261387348175,
      "learning_rate": 1.166423963680212e-06,
      "loss": 0.0103,
      "step": 2701220
    },
    {
      "epoch": 4.42063850539725,
      "grad_norm": 0.41905972361564636,
      "learning_rate": 1.1663580714666948e-06,
      "loss": 0.009,
      "step": 2701240
    },
    {
      "epoch": 4.420671235835902,
      "grad_norm": 0.2769213318824768,
      "learning_rate": 1.1662921792531778e-06,
      "loss": 0.0092,
      "step": 2701260
    },
    {
      "epoch": 4.420703966274556,
      "grad_norm": 0.2533377707004547,
      "learning_rate": 1.1662262870396607e-06,
      "loss": 0.0125,
      "step": 2701280
    },
    {
      "epoch": 4.42073669671321,
      "grad_norm": 0.27339673042297363,
      "learning_rate": 1.1661603948261435e-06,
      "loss": 0.0068,
      "step": 2701300
    },
    {
      "epoch": 4.420769427151862,
      "grad_norm": 0.33696773648262024,
      "learning_rate": 1.1660945026126264e-06,
      "loss": 0.0071,
      "step": 2701320
    },
    {
      "epoch": 4.420802157590516,
      "grad_norm": 0.25889262557029724,
      "learning_rate": 1.1660286103991092e-06,
      "loss": 0.007,
      "step": 2701340
    },
    {
      "epoch": 4.4208348880291695,
      "grad_norm": 0.1654757708311081,
      "learning_rate": 1.1659627181855921e-06,
      "loss": 0.0103,
      "step": 2701360
    },
    {
      "epoch": 4.420867618467823,
      "grad_norm": 0.14387854933738708,
      "learning_rate": 1.165896825972075e-06,
      "loss": 0.0129,
      "step": 2701380
    },
    {
      "epoch": 4.420900348906476,
      "grad_norm": 0.2809809744358063,
      "learning_rate": 1.1658309337585578e-06,
      "loss": 0.0074,
      "step": 2701400
    },
    {
      "epoch": 4.420933079345129,
      "grad_norm": 0.12796315550804138,
      "learning_rate": 1.1657650415450408e-06,
      "loss": 0.0098,
      "step": 2701420
    },
    {
      "epoch": 4.420965809783783,
      "grad_norm": 0.1879235953092575,
      "learning_rate": 1.1656991493315235e-06,
      "loss": 0.0101,
      "step": 2701440
    },
    {
      "epoch": 4.420998540222436,
      "grad_norm": 0.1940000355243683,
      "learning_rate": 1.1656332571180065e-06,
      "loss": 0.0079,
      "step": 2701460
    },
    {
      "epoch": 4.421031270661089,
      "grad_norm": 0.07404254376888275,
      "learning_rate": 1.1655673649044894e-06,
      "loss": 0.0088,
      "step": 2701480
    },
    {
      "epoch": 4.421064001099743,
      "grad_norm": 0.33500292897224426,
      "learning_rate": 1.1655014726909721e-06,
      "loss": 0.0065,
      "step": 2701500
    },
    {
      "epoch": 4.421096731538396,
      "grad_norm": 0.4652925431728363,
      "learning_rate": 1.165435580477455e-06,
      "loss": 0.0077,
      "step": 2701520
    },
    {
      "epoch": 4.421129461977049,
      "grad_norm": 0.322115033864975,
      "learning_rate": 1.165369688263938e-06,
      "loss": 0.0063,
      "step": 2701540
    },
    {
      "epoch": 4.421162192415703,
      "grad_norm": 0.174587219953537,
      "learning_rate": 1.1653037960504208e-06,
      "loss": 0.0065,
      "step": 2701560
    },
    {
      "epoch": 4.421194922854356,
      "grad_norm": 0.19500957429409027,
      "learning_rate": 1.1652379038369037e-06,
      "loss": 0.0099,
      "step": 2701580
    },
    {
      "epoch": 4.421227653293009,
      "grad_norm": 0.1623481810092926,
      "learning_rate": 1.1651720116233865e-06,
      "loss": 0.0105,
      "step": 2701600
    },
    {
      "epoch": 4.421260383731663,
      "grad_norm": 0.1807912290096283,
      "learning_rate": 1.1651061194098694e-06,
      "loss": 0.0041,
      "step": 2701620
    },
    {
      "epoch": 4.421293114170316,
      "grad_norm": 0.4746701717376709,
      "learning_rate": 1.1650402271963524e-06,
      "loss": 0.0098,
      "step": 2701640
    },
    {
      "epoch": 4.42132584460897,
      "grad_norm": 0.38076040148735046,
      "learning_rate": 1.1649743349828351e-06,
      "loss": 0.0083,
      "step": 2701660
    },
    {
      "epoch": 4.4213585750476225,
      "grad_norm": 0.28264009952545166,
      "learning_rate": 1.164908442769318e-06,
      "loss": 0.0097,
      "step": 2701680
    },
    {
      "epoch": 4.421391305486276,
      "grad_norm": 0.2564244270324707,
      "learning_rate": 1.1648425505558008e-06,
      "loss": 0.0078,
      "step": 2701700
    },
    {
      "epoch": 4.42142403592493,
      "grad_norm": 0.19442205131053925,
      "learning_rate": 1.1647766583422838e-06,
      "loss": 0.0056,
      "step": 2701720
    },
    {
      "epoch": 4.421456766363582,
      "grad_norm": 0.4161520004272461,
      "learning_rate": 1.1647107661287667e-06,
      "loss": 0.006,
      "step": 2701740
    },
    {
      "epoch": 4.421489496802236,
      "grad_norm": 0.3509644865989685,
      "learning_rate": 1.1646448739152495e-06,
      "loss": 0.0081,
      "step": 2701760
    },
    {
      "epoch": 4.42152222724089,
      "grad_norm": 0.20400643348693848,
      "learning_rate": 1.1645789817017324e-06,
      "loss": 0.0098,
      "step": 2701780
    },
    {
      "epoch": 4.421554957679543,
      "grad_norm": 0.19314387440681458,
      "learning_rate": 1.1645130894882152e-06,
      "loss": 0.0075,
      "step": 2701800
    },
    {
      "epoch": 4.421587688118196,
      "grad_norm": 0.4043801724910736,
      "learning_rate": 1.164447197274698e-06,
      "loss": 0.0093,
      "step": 2701820
    },
    {
      "epoch": 4.4216204185568495,
      "grad_norm": 0.22957530617713928,
      "learning_rate": 1.164381305061181e-06,
      "loss": 0.0112,
      "step": 2701840
    },
    {
      "epoch": 4.421653148995503,
      "grad_norm": 0.17526961863040924,
      "learning_rate": 1.1643154128476638e-06,
      "loss": 0.0071,
      "step": 2701860
    },
    {
      "epoch": 4.421685879434156,
      "grad_norm": 0.3317698836326599,
      "learning_rate": 1.1642495206341467e-06,
      "loss": 0.0114,
      "step": 2701880
    },
    {
      "epoch": 4.421718609872809,
      "grad_norm": 0.09561486542224884,
      "learning_rate": 1.1641836284206297e-06,
      "loss": 0.0096,
      "step": 2701900
    },
    {
      "epoch": 4.421751340311463,
      "grad_norm": 0.08525500446557999,
      "learning_rate": 1.1641177362071124e-06,
      "loss": 0.0062,
      "step": 2701920
    },
    {
      "epoch": 4.421784070750117,
      "grad_norm": 0.3578491806983948,
      "learning_rate": 1.1640518439935954e-06,
      "loss": 0.0094,
      "step": 2701940
    },
    {
      "epoch": 4.421816801188769,
      "grad_norm": 0.3073475658893585,
      "learning_rate": 1.1639859517800781e-06,
      "loss": 0.0117,
      "step": 2701960
    },
    {
      "epoch": 4.421849531627423,
      "grad_norm": 0.14133863151073456,
      "learning_rate": 1.163920059566561e-06,
      "loss": 0.0079,
      "step": 2701980
    },
    {
      "epoch": 4.4218822620660765,
      "grad_norm": 0.3947320878505707,
      "learning_rate": 1.163854167353044e-06,
      "loss": 0.0101,
      "step": 2702000
    },
    {
      "epoch": 4.421914992504729,
      "grad_norm": 0.10162081569433212,
      "learning_rate": 1.1637882751395268e-06,
      "loss": 0.0117,
      "step": 2702020
    },
    {
      "epoch": 4.421947722943383,
      "grad_norm": 0.21278129518032074,
      "learning_rate": 1.1637223829260097e-06,
      "loss": 0.0072,
      "step": 2702040
    },
    {
      "epoch": 4.421980453382036,
      "grad_norm": 0.23042571544647217,
      "learning_rate": 1.1636564907124925e-06,
      "loss": 0.0094,
      "step": 2702060
    },
    {
      "epoch": 4.42201318382069,
      "grad_norm": 0.3738589882850647,
      "learning_rate": 1.1635905984989754e-06,
      "loss": 0.0128,
      "step": 2702080
    },
    {
      "epoch": 4.422045914259343,
      "grad_norm": 0.4724830090999603,
      "learning_rate": 1.1635247062854584e-06,
      "loss": 0.0083,
      "step": 2702100
    },
    {
      "epoch": 4.422078644697996,
      "grad_norm": 0.27377963066101074,
      "learning_rate": 1.163458814071941e-06,
      "loss": 0.0052,
      "step": 2702120
    },
    {
      "epoch": 4.42211137513665,
      "grad_norm": 0.4982771873474121,
      "learning_rate": 1.163392921858424e-06,
      "loss": 0.0113,
      "step": 2702140
    },
    {
      "epoch": 4.422144105575303,
      "grad_norm": 0.07002799212932587,
      "learning_rate": 1.163327029644907e-06,
      "loss": 0.0074,
      "step": 2702160
    },
    {
      "epoch": 4.422176836013956,
      "grad_norm": 0.15022116899490356,
      "learning_rate": 1.1632611374313897e-06,
      "loss": 0.0164,
      "step": 2702180
    },
    {
      "epoch": 4.42220956645261,
      "grad_norm": 0.6440767049789429,
      "learning_rate": 1.1631952452178727e-06,
      "loss": 0.0065,
      "step": 2702200
    },
    {
      "epoch": 4.4222422968912625,
      "grad_norm": 0.10246169567108154,
      "learning_rate": 1.1631293530043554e-06,
      "loss": 0.0057,
      "step": 2702220
    },
    {
      "epoch": 4.422275027329916,
      "grad_norm": 0.2741706371307373,
      "learning_rate": 1.1630634607908386e-06,
      "loss": 0.0079,
      "step": 2702240
    },
    {
      "epoch": 4.42230775776857,
      "grad_norm": 0.23063786327838898,
      "learning_rate": 1.1629975685773213e-06,
      "loss": 0.0076,
      "step": 2702260
    },
    {
      "epoch": 4.422340488207223,
      "grad_norm": 0.3507612943649292,
      "learning_rate": 1.1629316763638043e-06,
      "loss": 0.0097,
      "step": 2702280
    },
    {
      "epoch": 4.422373218645876,
      "grad_norm": 0.15578043460845947,
      "learning_rate": 1.162865784150287e-06,
      "loss": 0.0092,
      "step": 2702300
    },
    {
      "epoch": 4.4224059490845296,
      "grad_norm": 0.21316106617450714,
      "learning_rate": 1.1627998919367698e-06,
      "loss": 0.016,
      "step": 2702320
    },
    {
      "epoch": 4.422438679523183,
      "grad_norm": 0.44523900747299194,
      "learning_rate": 1.162733999723253e-06,
      "loss": 0.0094,
      "step": 2702340
    },
    {
      "epoch": 4.422471409961837,
      "grad_norm": 0.14576159417629242,
      "learning_rate": 1.1626681075097357e-06,
      "loss": 0.0073,
      "step": 2702360
    },
    {
      "epoch": 4.4225041404004894,
      "grad_norm": 0.2362249344587326,
      "learning_rate": 1.1626022152962186e-06,
      "loss": 0.0117,
      "step": 2702380
    },
    {
      "epoch": 4.422536870839143,
      "grad_norm": 0.23237012326717377,
      "learning_rate": 1.1625363230827014e-06,
      "loss": 0.0093,
      "step": 2702400
    },
    {
      "epoch": 4.422569601277797,
      "grad_norm": 0.7555650472640991,
      "learning_rate": 1.1624704308691843e-06,
      "loss": 0.0099,
      "step": 2702420
    },
    {
      "epoch": 4.422602331716449,
      "grad_norm": 0.11841884255409241,
      "learning_rate": 1.1624045386556673e-06,
      "loss": 0.0076,
      "step": 2702440
    },
    {
      "epoch": 4.422635062155103,
      "grad_norm": 0.10279695689678192,
      "learning_rate": 1.16233864644215e-06,
      "loss": 0.0137,
      "step": 2702460
    },
    {
      "epoch": 4.4226677925937565,
      "grad_norm": 0.3835427761077881,
      "learning_rate": 1.162272754228633e-06,
      "loss": 0.0142,
      "step": 2702480
    },
    {
      "epoch": 4.422700523032409,
      "grad_norm": 0.2997531592845917,
      "learning_rate": 1.1622068620151157e-06,
      "loss": 0.0079,
      "step": 2702500
    },
    {
      "epoch": 4.422733253471063,
      "grad_norm": 0.25768589973449707,
      "learning_rate": 1.1621409698015987e-06,
      "loss": 0.0094,
      "step": 2702520
    },
    {
      "epoch": 4.422765983909716,
      "grad_norm": 0.23287196457386017,
      "learning_rate": 1.1620750775880816e-06,
      "loss": 0.0112,
      "step": 2702540
    },
    {
      "epoch": 4.42279871434837,
      "grad_norm": 0.20390291512012482,
      "learning_rate": 1.1620091853745643e-06,
      "loss": 0.0175,
      "step": 2702560
    },
    {
      "epoch": 4.422831444787023,
      "grad_norm": 0.08451960980892181,
      "learning_rate": 1.1619432931610473e-06,
      "loss": 0.0109,
      "step": 2702580
    },
    {
      "epoch": 4.422864175225676,
      "grad_norm": 0.46113452315330505,
      "learning_rate": 1.1618774009475302e-06,
      "loss": 0.0094,
      "step": 2702600
    },
    {
      "epoch": 4.42289690566433,
      "grad_norm": 0.3066851794719696,
      "learning_rate": 1.161811508734013e-06,
      "loss": 0.0116,
      "step": 2702620
    },
    {
      "epoch": 4.4229296361029835,
      "grad_norm": 0.44703739881515503,
      "learning_rate": 1.161745616520496e-06,
      "loss": 0.011,
      "step": 2702640
    },
    {
      "epoch": 4.422962366541636,
      "grad_norm": 0.12154973298311234,
      "learning_rate": 1.1616797243069787e-06,
      "loss": 0.0099,
      "step": 2702660
    },
    {
      "epoch": 4.42299509698029,
      "grad_norm": 0.09657204896211624,
      "learning_rate": 1.1616138320934616e-06,
      "loss": 0.0113,
      "step": 2702680
    },
    {
      "epoch": 4.423027827418943,
      "grad_norm": 0.14791975915431976,
      "learning_rate": 1.1615479398799446e-06,
      "loss": 0.0076,
      "step": 2702700
    },
    {
      "epoch": 4.423060557857596,
      "grad_norm": 0.06778421252965927,
      "learning_rate": 1.1614820476664273e-06,
      "loss": 0.0122,
      "step": 2702720
    },
    {
      "epoch": 4.42309328829625,
      "grad_norm": 0.07696855813264847,
      "learning_rate": 1.1614161554529103e-06,
      "loss": 0.0076,
      "step": 2702740
    },
    {
      "epoch": 4.423126018734903,
      "grad_norm": 0.36956945061683655,
      "learning_rate": 1.161350263239393e-06,
      "loss": 0.0076,
      "step": 2702760
    },
    {
      "epoch": 4.423158749173556,
      "grad_norm": 0.1327878087759018,
      "learning_rate": 1.161284371025876e-06,
      "loss": 0.0057,
      "step": 2702780
    },
    {
      "epoch": 4.42319147961221,
      "grad_norm": 0.09130855649709702,
      "learning_rate": 1.161218478812359e-06,
      "loss": 0.0049,
      "step": 2702800
    },
    {
      "epoch": 4.423224210050863,
      "grad_norm": 0.20285911858081818,
      "learning_rate": 1.1611525865988417e-06,
      "loss": 0.0098,
      "step": 2702820
    },
    {
      "epoch": 4.423256940489517,
      "grad_norm": 0.35599377751350403,
      "learning_rate": 1.1610866943853246e-06,
      "loss": 0.0103,
      "step": 2702840
    },
    {
      "epoch": 4.4232896709281695,
      "grad_norm": 0.4410525858402252,
      "learning_rate": 1.1610208021718076e-06,
      "loss": 0.0143,
      "step": 2702860
    },
    {
      "epoch": 4.423322401366823,
      "grad_norm": 0.4688246548175812,
      "learning_rate": 1.1609549099582903e-06,
      "loss": 0.011,
      "step": 2702880
    },
    {
      "epoch": 4.423355131805477,
      "grad_norm": 0.20149822533130646,
      "learning_rate": 1.1608890177447732e-06,
      "loss": 0.008,
      "step": 2702900
    },
    {
      "epoch": 4.423387862244129,
      "grad_norm": 0.2780429720878601,
      "learning_rate": 1.160823125531256e-06,
      "loss": 0.0105,
      "step": 2702920
    },
    {
      "epoch": 4.423420592682783,
      "grad_norm": 0.1488049328327179,
      "learning_rate": 1.160757233317739e-06,
      "loss": 0.0079,
      "step": 2702940
    },
    {
      "epoch": 4.423453323121437,
      "grad_norm": 0.2604138255119324,
      "learning_rate": 1.1606913411042219e-06,
      "loss": 0.0076,
      "step": 2702960
    },
    {
      "epoch": 4.42348605356009,
      "grad_norm": 0.1754283905029297,
      "learning_rate": 1.1606254488907046e-06,
      "loss": 0.0062,
      "step": 2702980
    },
    {
      "epoch": 4.423518783998743,
      "grad_norm": 0.12555184960365295,
      "learning_rate": 1.1605595566771876e-06,
      "loss": 0.0101,
      "step": 2703000
    },
    {
      "epoch": 4.4235515144373965,
      "grad_norm": 0.20287589728832245,
      "learning_rate": 1.1604936644636703e-06,
      "loss": 0.0077,
      "step": 2703020
    },
    {
      "epoch": 4.42358424487605,
      "grad_norm": 0.24436621367931366,
      "learning_rate": 1.1604277722501533e-06,
      "loss": 0.0083,
      "step": 2703040
    },
    {
      "epoch": 4.423616975314703,
      "grad_norm": 0.5680069923400879,
      "learning_rate": 1.1603618800366362e-06,
      "loss": 0.0117,
      "step": 2703060
    },
    {
      "epoch": 4.423649705753356,
      "grad_norm": 0.18638445436954498,
      "learning_rate": 1.160295987823119e-06,
      "loss": 0.0104,
      "step": 2703080
    },
    {
      "epoch": 4.42368243619201,
      "grad_norm": 0.13209111988544464,
      "learning_rate": 1.160230095609602e-06,
      "loss": 0.0072,
      "step": 2703100
    },
    {
      "epoch": 4.4237151666306636,
      "grad_norm": 0.15719056129455566,
      "learning_rate": 1.1601642033960849e-06,
      "loss": 0.0093,
      "step": 2703120
    },
    {
      "epoch": 4.423747897069316,
      "grad_norm": 0.09918858110904694,
      "learning_rate": 1.1600983111825676e-06,
      "loss": 0.0068,
      "step": 2703140
    },
    {
      "epoch": 4.42378062750797,
      "grad_norm": 0.05762442573904991,
      "learning_rate": 1.1600324189690506e-06,
      "loss": 0.0096,
      "step": 2703160
    },
    {
      "epoch": 4.4238133579466234,
      "grad_norm": 0.29374054074287415,
      "learning_rate": 1.1599665267555333e-06,
      "loss": 0.0061,
      "step": 2703180
    },
    {
      "epoch": 4.423846088385276,
      "grad_norm": 0.08090005069971085,
      "learning_rate": 1.1599006345420163e-06,
      "loss": 0.0111,
      "step": 2703200
    },
    {
      "epoch": 4.42387881882393,
      "grad_norm": 0.22179536521434784,
      "learning_rate": 1.1598347423284992e-06,
      "loss": 0.0073,
      "step": 2703220
    },
    {
      "epoch": 4.423911549262583,
      "grad_norm": 0.25113821029663086,
      "learning_rate": 1.159768850114982e-06,
      "loss": 0.0074,
      "step": 2703240
    },
    {
      "epoch": 4.423944279701237,
      "grad_norm": 0.13072030246257782,
      "learning_rate": 1.1597029579014649e-06,
      "loss": 0.0061,
      "step": 2703260
    },
    {
      "epoch": 4.42397701013989,
      "grad_norm": 0.41870442032814026,
      "learning_rate": 1.1596370656879476e-06,
      "loss": 0.009,
      "step": 2703280
    },
    {
      "epoch": 4.424009740578543,
      "grad_norm": 0.15207213163375854,
      "learning_rate": 1.1595711734744306e-06,
      "loss": 0.0059,
      "step": 2703300
    },
    {
      "epoch": 4.424042471017197,
      "grad_norm": 0.25123047828674316,
      "learning_rate": 1.1595052812609135e-06,
      "loss": 0.0166,
      "step": 2703320
    },
    {
      "epoch": 4.4240752014558495,
      "grad_norm": 0.29505568742752075,
      "learning_rate": 1.1594393890473963e-06,
      "loss": 0.0097,
      "step": 2703340
    },
    {
      "epoch": 4.424107931894503,
      "grad_norm": 0.47826167941093445,
      "learning_rate": 1.1593734968338792e-06,
      "loss": 0.0103,
      "step": 2703360
    },
    {
      "epoch": 4.424140662333157,
      "grad_norm": 0.25308793783187866,
      "learning_rate": 1.159307604620362e-06,
      "loss": 0.0098,
      "step": 2703380
    },
    {
      "epoch": 4.42417339277181,
      "grad_norm": 0.13480372726917267,
      "learning_rate": 1.159241712406845e-06,
      "loss": 0.0071,
      "step": 2703400
    },
    {
      "epoch": 4.424206123210463,
      "grad_norm": 0.11974184215068817,
      "learning_rate": 1.1591758201933279e-06,
      "loss": 0.0111,
      "step": 2703420
    },
    {
      "epoch": 4.424238853649117,
      "grad_norm": 0.15895909070968628,
      "learning_rate": 1.1591099279798106e-06,
      "loss": 0.011,
      "step": 2703440
    },
    {
      "epoch": 4.42427158408777,
      "grad_norm": 0.17246030271053314,
      "learning_rate": 1.1590440357662936e-06,
      "loss": 0.0106,
      "step": 2703460
    },
    {
      "epoch": 4.424304314526423,
      "grad_norm": 0.16609318554401398,
      "learning_rate": 1.1589781435527765e-06,
      "loss": 0.0126,
      "step": 2703480
    },
    {
      "epoch": 4.4243370449650765,
      "grad_norm": 0.08635160326957703,
      "learning_rate": 1.1589122513392593e-06,
      "loss": 0.0125,
      "step": 2703500
    },
    {
      "epoch": 4.42436977540373,
      "grad_norm": 0.1020728275179863,
      "learning_rate": 1.1588463591257422e-06,
      "loss": 0.0115,
      "step": 2703520
    },
    {
      "epoch": 4.424402505842384,
      "grad_norm": 0.14109356701374054,
      "learning_rate": 1.158780466912225e-06,
      "loss": 0.0102,
      "step": 2703540
    },
    {
      "epoch": 4.424435236281036,
      "grad_norm": 0.2229529619216919,
      "learning_rate": 1.1587145746987081e-06,
      "loss": 0.01,
      "step": 2703560
    },
    {
      "epoch": 4.42446796671969,
      "grad_norm": 0.17644038796424866,
      "learning_rate": 1.1586486824851908e-06,
      "loss": 0.0067,
      "step": 2703580
    },
    {
      "epoch": 4.424500697158344,
      "grad_norm": 0.3092227280139923,
      "learning_rate": 1.1585827902716738e-06,
      "loss": 0.008,
      "step": 2703600
    },
    {
      "epoch": 4.424533427596996,
      "grad_norm": 0.1670031100511551,
      "learning_rate": 1.1585168980581565e-06,
      "loss": 0.007,
      "step": 2703620
    },
    {
      "epoch": 4.42456615803565,
      "grad_norm": 0.2528105676174164,
      "learning_rate": 1.1584510058446393e-06,
      "loss": 0.01,
      "step": 2703640
    },
    {
      "epoch": 4.4245988884743035,
      "grad_norm": 0.24182939529418945,
      "learning_rate": 1.1583851136311224e-06,
      "loss": 0.0082,
      "step": 2703660
    },
    {
      "epoch": 4.424631618912957,
      "grad_norm": 0.11709821224212646,
      "learning_rate": 1.1583192214176052e-06,
      "loss": 0.0084,
      "step": 2703680
    },
    {
      "epoch": 4.42466434935161,
      "grad_norm": 0.2257583737373352,
      "learning_rate": 1.1582533292040881e-06,
      "loss": 0.0084,
      "step": 2703700
    },
    {
      "epoch": 4.424697079790263,
      "grad_norm": 0.2164882868528366,
      "learning_rate": 1.1581874369905709e-06,
      "loss": 0.0099,
      "step": 2703720
    },
    {
      "epoch": 4.424729810228917,
      "grad_norm": 0.0642772763967514,
      "learning_rate": 1.1581215447770538e-06,
      "loss": 0.0074,
      "step": 2703740
    },
    {
      "epoch": 4.42476254066757,
      "grad_norm": 0.2268613874912262,
      "learning_rate": 1.1580556525635368e-06,
      "loss": 0.0083,
      "step": 2703760
    },
    {
      "epoch": 4.424795271106223,
      "grad_norm": 0.2701435983181,
      "learning_rate": 1.1579897603500195e-06,
      "loss": 0.0054,
      "step": 2703780
    },
    {
      "epoch": 4.424828001544877,
      "grad_norm": 0.11170415580272675,
      "learning_rate": 1.1579238681365025e-06,
      "loss": 0.0086,
      "step": 2703800
    },
    {
      "epoch": 4.4248607319835305,
      "grad_norm": 0.2748070955276489,
      "learning_rate": 1.1578579759229852e-06,
      "loss": 0.0081,
      "step": 2703820
    },
    {
      "epoch": 4.424893462422183,
      "grad_norm": 0.24406559765338898,
      "learning_rate": 1.1577920837094682e-06,
      "loss": 0.0149,
      "step": 2703840
    },
    {
      "epoch": 4.424926192860837,
      "grad_norm": 0.05990525707602501,
      "learning_rate": 1.1577261914959511e-06,
      "loss": 0.0076,
      "step": 2703860
    },
    {
      "epoch": 4.42495892329949,
      "grad_norm": 0.19304805994033813,
      "learning_rate": 1.1576602992824338e-06,
      "loss": 0.0075,
      "step": 2703880
    },
    {
      "epoch": 4.424991653738143,
      "grad_norm": 0.24777095019817352,
      "learning_rate": 1.1575944070689168e-06,
      "loss": 0.0074,
      "step": 2703900
    },
    {
      "epoch": 4.425024384176797,
      "grad_norm": 0.4387950897216797,
      "learning_rate": 1.1575285148553998e-06,
      "loss": 0.0095,
      "step": 2703920
    },
    {
      "epoch": 4.42505711461545,
      "grad_norm": 0.11478173732757568,
      "learning_rate": 1.1574626226418825e-06,
      "loss": 0.0113,
      "step": 2703940
    },
    {
      "epoch": 4.425089845054103,
      "grad_norm": 0.05308901518583298,
      "learning_rate": 1.1573967304283654e-06,
      "loss": 0.0061,
      "step": 2703960
    },
    {
      "epoch": 4.425122575492757,
      "grad_norm": 0.12143149226903915,
      "learning_rate": 1.1573308382148482e-06,
      "loss": 0.0107,
      "step": 2703980
    },
    {
      "epoch": 4.42515530593141,
      "grad_norm": 0.1692231297492981,
      "learning_rate": 1.1572649460013311e-06,
      "loss": 0.0093,
      "step": 2704000
    },
    {
      "epoch": 4.425188036370064,
      "grad_norm": 0.17537878453731537,
      "learning_rate": 1.157199053787814e-06,
      "loss": 0.0091,
      "step": 2704020
    },
    {
      "epoch": 4.4252207668087165,
      "grad_norm": 0.053172871470451355,
      "learning_rate": 1.1571331615742968e-06,
      "loss": 0.0051,
      "step": 2704040
    },
    {
      "epoch": 4.42525349724737,
      "grad_norm": 0.29207557439804077,
      "learning_rate": 1.1570672693607798e-06,
      "loss": 0.0077,
      "step": 2704060
    },
    {
      "epoch": 4.425286227686024,
      "grad_norm": 0.14847201108932495,
      "learning_rate": 1.1570013771472625e-06,
      "loss": 0.0142,
      "step": 2704080
    },
    {
      "epoch": 4.425318958124677,
      "grad_norm": 0.23954813182353973,
      "learning_rate": 1.1569354849337455e-06,
      "loss": 0.0051,
      "step": 2704100
    },
    {
      "epoch": 4.42535168856333,
      "grad_norm": 0.2746380567550659,
      "learning_rate": 1.1568695927202284e-06,
      "loss": 0.0096,
      "step": 2704120
    },
    {
      "epoch": 4.4253844190019835,
      "grad_norm": 0.19714206457138062,
      "learning_rate": 1.1568037005067112e-06,
      "loss": 0.0087,
      "step": 2704140
    },
    {
      "epoch": 4.425417149440637,
      "grad_norm": 0.37605759501457214,
      "learning_rate": 1.1567378082931941e-06,
      "loss": 0.0103,
      "step": 2704160
    },
    {
      "epoch": 4.42544987987929,
      "grad_norm": 0.24022774398326874,
      "learning_rate": 1.156671916079677e-06,
      "loss": 0.0116,
      "step": 2704180
    },
    {
      "epoch": 4.425482610317943,
      "grad_norm": 0.3209766149520874,
      "learning_rate": 1.1566060238661598e-06,
      "loss": 0.015,
      "step": 2704200
    },
    {
      "epoch": 4.425515340756597,
      "grad_norm": 0.19200415909290314,
      "learning_rate": 1.1565401316526428e-06,
      "loss": 0.0097,
      "step": 2704220
    },
    {
      "epoch": 4.42554807119525,
      "grad_norm": 0.5160360336303711,
      "learning_rate": 1.1564742394391255e-06,
      "loss": 0.0096,
      "step": 2704240
    },
    {
      "epoch": 4.425580801633903,
      "grad_norm": 0.41789495944976807,
      "learning_rate": 1.1564083472256084e-06,
      "loss": 0.0077,
      "step": 2704260
    },
    {
      "epoch": 4.425613532072557,
      "grad_norm": 0.2402772605419159,
      "learning_rate": 1.1563424550120914e-06,
      "loss": 0.0105,
      "step": 2704280
    },
    {
      "epoch": 4.4256462625112105,
      "grad_norm": 0.3459969460964203,
      "learning_rate": 1.1562765627985741e-06,
      "loss": 0.0096,
      "step": 2704300
    },
    {
      "epoch": 4.425678992949863,
      "grad_norm": 0.5203526020050049,
      "learning_rate": 1.156210670585057e-06,
      "loss": 0.0089,
      "step": 2704320
    },
    {
      "epoch": 4.425711723388517,
      "grad_norm": 0.3568667471408844,
      "learning_rate": 1.1561447783715398e-06,
      "loss": 0.0088,
      "step": 2704340
    },
    {
      "epoch": 4.42574445382717,
      "grad_norm": 0.13328716158866882,
      "learning_rate": 1.1560788861580228e-06,
      "loss": 0.0092,
      "step": 2704360
    },
    {
      "epoch": 4.425777184265823,
      "grad_norm": 0.20641322433948517,
      "learning_rate": 1.1560129939445057e-06,
      "loss": 0.007,
      "step": 2704380
    },
    {
      "epoch": 4.425809914704477,
      "grad_norm": 0.18255628645420074,
      "learning_rate": 1.1559471017309885e-06,
      "loss": 0.0069,
      "step": 2704400
    },
    {
      "epoch": 4.42584264514313,
      "grad_norm": 0.17876258492469788,
      "learning_rate": 1.1558812095174714e-06,
      "loss": 0.0108,
      "step": 2704420
    },
    {
      "epoch": 4.425875375581784,
      "grad_norm": 0.3431960344314575,
      "learning_rate": 1.1558153173039544e-06,
      "loss": 0.0058,
      "step": 2704440
    },
    {
      "epoch": 4.425908106020437,
      "grad_norm": 0.28832200169563293,
      "learning_rate": 1.1557494250904371e-06,
      "loss": 0.0151,
      "step": 2704460
    },
    {
      "epoch": 4.42594083645909,
      "grad_norm": 0.18966776132583618,
      "learning_rate": 1.15568353287692e-06,
      "loss": 0.0116,
      "step": 2704480
    },
    {
      "epoch": 4.425973566897744,
      "grad_norm": 0.13891659677028656,
      "learning_rate": 1.1556176406634028e-06,
      "loss": 0.0103,
      "step": 2704500
    },
    {
      "epoch": 4.4260062973363965,
      "grad_norm": 0.3893338143825531,
      "learning_rate": 1.1555517484498858e-06,
      "loss": 0.0097,
      "step": 2704520
    },
    {
      "epoch": 4.42603902777505,
      "grad_norm": 0.1985263079404831,
      "learning_rate": 1.1554858562363687e-06,
      "loss": 0.0067,
      "step": 2704540
    },
    {
      "epoch": 4.426071758213704,
      "grad_norm": 0.11317642778158188,
      "learning_rate": 1.1554199640228514e-06,
      "loss": 0.0075,
      "step": 2704560
    },
    {
      "epoch": 4.426104488652357,
      "grad_norm": 0.4409865736961365,
      "learning_rate": 1.1553540718093344e-06,
      "loss": 0.0118,
      "step": 2704580
    },
    {
      "epoch": 4.42613721909101,
      "grad_norm": 0.18114984035491943,
      "learning_rate": 1.1552881795958171e-06,
      "loss": 0.0077,
      "step": 2704600
    },
    {
      "epoch": 4.426169949529664,
      "grad_norm": 0.375761479139328,
      "learning_rate": 1.1552222873823e-06,
      "loss": 0.0116,
      "step": 2704620
    },
    {
      "epoch": 4.426202679968317,
      "grad_norm": 0.11938291788101196,
      "learning_rate": 1.155156395168783e-06,
      "loss": 0.0066,
      "step": 2704640
    },
    {
      "epoch": 4.42623541040697,
      "grad_norm": 0.41647908091545105,
      "learning_rate": 1.1550905029552658e-06,
      "loss": 0.0085,
      "step": 2704660
    },
    {
      "epoch": 4.4262681408456235,
      "grad_norm": 0.43997642397880554,
      "learning_rate": 1.1550246107417487e-06,
      "loss": 0.0127,
      "step": 2704680
    },
    {
      "epoch": 4.426300871284277,
      "grad_norm": 0.24554477632045746,
      "learning_rate": 1.1549587185282315e-06,
      "loss": 0.0101,
      "step": 2704700
    },
    {
      "epoch": 4.426333601722931,
      "grad_norm": 0.2518295645713806,
      "learning_rate": 1.1548928263147144e-06,
      "loss": 0.0081,
      "step": 2704720
    },
    {
      "epoch": 4.426366332161583,
      "grad_norm": 0.22909554839134216,
      "learning_rate": 1.1548269341011974e-06,
      "loss": 0.011,
      "step": 2704740
    },
    {
      "epoch": 4.426399062600237,
      "grad_norm": 0.027694914489984512,
      "learning_rate": 1.1547610418876801e-06,
      "loss": 0.0061,
      "step": 2704760
    },
    {
      "epoch": 4.426431793038891,
      "grad_norm": 0.21226269006729126,
      "learning_rate": 1.154695149674163e-06,
      "loss": 0.007,
      "step": 2704780
    },
    {
      "epoch": 4.426464523477543,
      "grad_norm": 0.2887396514415741,
      "learning_rate": 1.154629257460646e-06,
      "loss": 0.0099,
      "step": 2704800
    },
    {
      "epoch": 4.426497253916197,
      "grad_norm": 0.11841760575771332,
      "learning_rate": 1.154563365247129e-06,
      "loss": 0.0105,
      "step": 2704820
    },
    {
      "epoch": 4.4265299843548505,
      "grad_norm": 0.31418004631996155,
      "learning_rate": 1.1544974730336117e-06,
      "loss": 0.0117,
      "step": 2704840
    },
    {
      "epoch": 4.426562714793504,
      "grad_norm": 0.34241628646850586,
      "learning_rate": 1.1544315808200944e-06,
      "loss": 0.0093,
      "step": 2704860
    },
    {
      "epoch": 4.426595445232157,
      "grad_norm": 0.0825171023607254,
      "learning_rate": 1.1543656886065776e-06,
      "loss": 0.0104,
      "step": 2704880
    },
    {
      "epoch": 4.42662817567081,
      "grad_norm": 0.35522493720054626,
      "learning_rate": 1.1542997963930604e-06,
      "loss": 0.0114,
      "step": 2704900
    },
    {
      "epoch": 4.426660906109464,
      "grad_norm": 0.18856653571128845,
      "learning_rate": 1.1542339041795433e-06,
      "loss": 0.0078,
      "step": 2704920
    },
    {
      "epoch": 4.426693636548117,
      "grad_norm": 0.40144863724708557,
      "learning_rate": 1.154168011966026e-06,
      "loss": 0.0057,
      "step": 2704940
    },
    {
      "epoch": 4.42672636698677,
      "grad_norm": 0.2672041654586792,
      "learning_rate": 1.1541021197525088e-06,
      "loss": 0.0057,
      "step": 2704960
    },
    {
      "epoch": 4.426759097425424,
      "grad_norm": 0.2896764874458313,
      "learning_rate": 1.154036227538992e-06,
      "loss": 0.01,
      "step": 2704980
    },
    {
      "epoch": 4.426791827864077,
      "grad_norm": 0.11968879401683807,
      "learning_rate": 1.1539703353254747e-06,
      "loss": 0.0076,
      "step": 2705000
    },
    {
      "epoch": 4.42682455830273,
      "grad_norm": 0.20298954844474792,
      "learning_rate": 1.1539044431119576e-06,
      "loss": 0.0109,
      "step": 2705020
    },
    {
      "epoch": 4.426857288741384,
      "grad_norm": 0.17708948254585266,
      "learning_rate": 1.1538385508984404e-06,
      "loss": 0.0049,
      "step": 2705040
    },
    {
      "epoch": 4.426890019180037,
      "grad_norm": 0.2357461303472519,
      "learning_rate": 1.1537726586849233e-06,
      "loss": 0.0106,
      "step": 2705060
    },
    {
      "epoch": 4.42692274961869,
      "grad_norm": 0.48070845007896423,
      "learning_rate": 1.1537067664714063e-06,
      "loss": 0.0099,
      "step": 2705080
    },
    {
      "epoch": 4.426955480057344,
      "grad_norm": 0.28177034854888916,
      "learning_rate": 1.153640874257889e-06,
      "loss": 0.0109,
      "step": 2705100
    },
    {
      "epoch": 4.426988210495997,
      "grad_norm": 0.06796935945749283,
      "learning_rate": 1.153574982044372e-06,
      "loss": 0.009,
      "step": 2705120
    },
    {
      "epoch": 4.427020940934651,
      "grad_norm": 0.13711975514888763,
      "learning_rate": 1.1535090898308547e-06,
      "loss": 0.0076,
      "step": 2705140
    },
    {
      "epoch": 4.4270536713733035,
      "grad_norm": 0.202272430062294,
      "learning_rate": 1.1534431976173377e-06,
      "loss": 0.0084,
      "step": 2705160
    },
    {
      "epoch": 4.427086401811957,
      "grad_norm": 0.1393507570028305,
      "learning_rate": 1.1533773054038206e-06,
      "loss": 0.014,
      "step": 2705180
    },
    {
      "epoch": 4.427119132250611,
      "grad_norm": 0.13348117470741272,
      "learning_rate": 1.1533114131903034e-06,
      "loss": 0.0109,
      "step": 2705200
    },
    {
      "epoch": 4.427151862689263,
      "grad_norm": 0.24257348477840424,
      "learning_rate": 1.1532455209767863e-06,
      "loss": 0.0103,
      "step": 2705220
    },
    {
      "epoch": 4.427184593127917,
      "grad_norm": 0.1352018564939499,
      "learning_rate": 1.1531796287632693e-06,
      "loss": 0.0146,
      "step": 2705240
    },
    {
      "epoch": 4.427217323566571,
      "grad_norm": 0.3581346869468689,
      "learning_rate": 1.153113736549752e-06,
      "loss": 0.0049,
      "step": 2705260
    },
    {
      "epoch": 4.427250054005224,
      "grad_norm": 0.40183985233306885,
      "learning_rate": 1.153047844336235e-06,
      "loss": 0.008,
      "step": 2705280
    },
    {
      "epoch": 4.427282784443877,
      "grad_norm": 0.7348836660385132,
      "learning_rate": 1.1529819521227177e-06,
      "loss": 0.0139,
      "step": 2705300
    },
    {
      "epoch": 4.4273155148825305,
      "grad_norm": 0.7000883221626282,
      "learning_rate": 1.1529160599092006e-06,
      "loss": 0.011,
      "step": 2705320
    },
    {
      "epoch": 4.427348245321184,
      "grad_norm": 0.13493458926677704,
      "learning_rate": 1.1528501676956836e-06,
      "loss": 0.0068,
      "step": 2705340
    },
    {
      "epoch": 4.427380975759837,
      "grad_norm": 0.10204774886369705,
      "learning_rate": 1.1527842754821663e-06,
      "loss": 0.0065,
      "step": 2705360
    },
    {
      "epoch": 4.42741370619849,
      "grad_norm": 0.26119285821914673,
      "learning_rate": 1.1527183832686493e-06,
      "loss": 0.0109,
      "step": 2705380
    },
    {
      "epoch": 4.427446436637144,
      "grad_norm": 1.0122119188308716,
      "learning_rate": 1.152652491055132e-06,
      "loss": 0.0094,
      "step": 2705400
    },
    {
      "epoch": 4.427479167075797,
      "grad_norm": 0.15417663753032684,
      "learning_rate": 1.152586598841615e-06,
      "loss": 0.007,
      "step": 2705420
    },
    {
      "epoch": 4.42751189751445,
      "grad_norm": 0.24327346682548523,
      "learning_rate": 1.152520706628098e-06,
      "loss": 0.0091,
      "step": 2705440
    },
    {
      "epoch": 4.427544627953104,
      "grad_norm": 0.5501616597175598,
      "learning_rate": 1.1524548144145807e-06,
      "loss": 0.0118,
      "step": 2705460
    },
    {
      "epoch": 4.4275773583917575,
      "grad_norm": 0.5340140461921692,
      "learning_rate": 1.1523889222010636e-06,
      "loss": 0.0142,
      "step": 2705480
    },
    {
      "epoch": 4.42761008883041,
      "grad_norm": 0.33421942591667175,
      "learning_rate": 1.1523230299875466e-06,
      "loss": 0.0074,
      "step": 2705500
    },
    {
      "epoch": 4.427642819269064,
      "grad_norm": 0.1574985533952713,
      "learning_rate": 1.1522571377740293e-06,
      "loss": 0.0108,
      "step": 2705520
    },
    {
      "epoch": 4.427675549707717,
      "grad_norm": 0.13007768988609314,
      "learning_rate": 1.1521912455605123e-06,
      "loss": 0.0098,
      "step": 2705540
    },
    {
      "epoch": 4.427708280146371,
      "grad_norm": 0.40914231538772583,
      "learning_rate": 1.152125353346995e-06,
      "loss": 0.0056,
      "step": 2705560
    },
    {
      "epoch": 4.427741010585024,
      "grad_norm": 0.13407598435878754,
      "learning_rate": 1.152059461133478e-06,
      "loss": 0.0107,
      "step": 2705580
    },
    {
      "epoch": 4.427773741023677,
      "grad_norm": 0.5144072771072388,
      "learning_rate": 1.151993568919961e-06,
      "loss": 0.009,
      "step": 2705600
    },
    {
      "epoch": 4.427806471462331,
      "grad_norm": 0.5222956538200378,
      "learning_rate": 1.1519276767064436e-06,
      "loss": 0.0073,
      "step": 2705620
    },
    {
      "epoch": 4.427839201900984,
      "grad_norm": 0.5709604024887085,
      "learning_rate": 1.1518617844929266e-06,
      "loss": 0.0067,
      "step": 2705640
    },
    {
      "epoch": 4.427871932339637,
      "grad_norm": 0.3186815679073334,
      "learning_rate": 1.1517958922794093e-06,
      "loss": 0.0081,
      "step": 2705660
    },
    {
      "epoch": 4.427904662778291,
      "grad_norm": 0.6263177394866943,
      "learning_rate": 1.1517300000658923e-06,
      "loss": 0.0096,
      "step": 2705680
    },
    {
      "epoch": 4.4279373932169435,
      "grad_norm": 0.30762219429016113,
      "learning_rate": 1.1516641078523752e-06,
      "loss": 0.0105,
      "step": 2705700
    },
    {
      "epoch": 4.427970123655597,
      "grad_norm": 0.2950326204299927,
      "learning_rate": 1.151598215638858e-06,
      "loss": 0.0087,
      "step": 2705720
    },
    {
      "epoch": 4.428002854094251,
      "grad_norm": 0.29216325283050537,
      "learning_rate": 1.151532323425341e-06,
      "loss": 0.0108,
      "step": 2705740
    },
    {
      "epoch": 4.428035584532904,
      "grad_norm": 0.0807695984840393,
      "learning_rate": 1.1514664312118239e-06,
      "loss": 0.0087,
      "step": 2705760
    },
    {
      "epoch": 4.428068314971557,
      "grad_norm": 0.0685546025633812,
      "learning_rate": 1.1514005389983066e-06,
      "loss": 0.0102,
      "step": 2705780
    },
    {
      "epoch": 4.4281010454102105,
      "grad_norm": 0.0658806785941124,
      "learning_rate": 1.1513346467847896e-06,
      "loss": 0.0078,
      "step": 2705800
    },
    {
      "epoch": 4.428133775848864,
      "grad_norm": 0.16702218353748322,
      "learning_rate": 1.1512687545712723e-06,
      "loss": 0.0066,
      "step": 2705820
    },
    {
      "epoch": 4.428166506287518,
      "grad_norm": 0.28499117493629456,
      "learning_rate": 1.1512028623577553e-06,
      "loss": 0.01,
      "step": 2705840
    },
    {
      "epoch": 4.42819923672617,
      "grad_norm": 0.28595608472824097,
      "learning_rate": 1.1511369701442382e-06,
      "loss": 0.011,
      "step": 2705860
    },
    {
      "epoch": 4.428231967164824,
      "grad_norm": 1.6308947801589966,
      "learning_rate": 1.151071077930721e-06,
      "loss": 0.0065,
      "step": 2705880
    },
    {
      "epoch": 4.428264697603478,
      "grad_norm": 0.3407549560070038,
      "learning_rate": 1.151005185717204e-06,
      "loss": 0.0083,
      "step": 2705900
    },
    {
      "epoch": 4.42829742804213,
      "grad_norm": 0.28421565890312195,
      "learning_rate": 1.1509392935036866e-06,
      "loss": 0.0083,
      "step": 2705920
    },
    {
      "epoch": 4.428330158480784,
      "grad_norm": 0.6303917169570923,
      "learning_rate": 1.1508734012901696e-06,
      "loss": 0.0086,
      "step": 2705940
    },
    {
      "epoch": 4.4283628889194375,
      "grad_norm": 0.2347637563943863,
      "learning_rate": 1.1508075090766525e-06,
      "loss": 0.0084,
      "step": 2705960
    },
    {
      "epoch": 4.42839561935809,
      "grad_norm": 0.40884843468666077,
      "learning_rate": 1.1507416168631353e-06,
      "loss": 0.0075,
      "step": 2705980
    },
    {
      "epoch": 4.428428349796744,
      "grad_norm": 0.04634622111916542,
      "learning_rate": 1.1506757246496182e-06,
      "loss": 0.0074,
      "step": 2706000
    },
    {
      "epoch": 4.428461080235397,
      "grad_norm": 0.672582745552063,
      "learning_rate": 1.150609832436101e-06,
      "loss": 0.01,
      "step": 2706020
    },
    {
      "epoch": 4.428493810674051,
      "grad_norm": 0.3079632818698883,
      "learning_rate": 1.150543940222584e-06,
      "loss": 0.0069,
      "step": 2706040
    },
    {
      "epoch": 4.428526541112704,
      "grad_norm": 0.46242812275886536,
      "learning_rate": 1.1504780480090669e-06,
      "loss": 0.01,
      "step": 2706060
    },
    {
      "epoch": 4.428559271551357,
      "grad_norm": 0.685554027557373,
      "learning_rate": 1.1504121557955496e-06,
      "loss": 0.0075,
      "step": 2706080
    },
    {
      "epoch": 4.428592001990011,
      "grad_norm": 0.14027619361877441,
      "learning_rate": 1.1503462635820326e-06,
      "loss": 0.0117,
      "step": 2706100
    },
    {
      "epoch": 4.428624732428664,
      "grad_norm": 0.23984050750732422,
      "learning_rate": 1.1502803713685155e-06,
      "loss": 0.0071,
      "step": 2706120
    },
    {
      "epoch": 4.428657462867317,
      "grad_norm": 0.19480091333389282,
      "learning_rate": 1.1502144791549985e-06,
      "loss": 0.0074,
      "step": 2706140
    },
    {
      "epoch": 4.428690193305971,
      "grad_norm": 0.1808856874704361,
      "learning_rate": 1.1501485869414812e-06,
      "loss": 0.0121,
      "step": 2706160
    },
    {
      "epoch": 4.428722923744624,
      "grad_norm": 0.18857677280902863,
      "learning_rate": 1.150082694727964e-06,
      "loss": 0.005,
      "step": 2706180
    },
    {
      "epoch": 4.428755654183277,
      "grad_norm": 0.2156457155942917,
      "learning_rate": 1.1500168025144471e-06,
      "loss": 0.007,
      "step": 2706200
    },
    {
      "epoch": 4.428788384621931,
      "grad_norm": 0.43406689167022705,
      "learning_rate": 1.1499509103009299e-06,
      "loss": 0.0145,
      "step": 2706220
    },
    {
      "epoch": 4.428821115060584,
      "grad_norm": 0.29870107769966125,
      "learning_rate": 1.1498850180874128e-06,
      "loss": 0.009,
      "step": 2706240
    },
    {
      "epoch": 4.428853845499237,
      "grad_norm": 0.1640656292438507,
      "learning_rate": 1.1498191258738955e-06,
      "loss": 0.0094,
      "step": 2706260
    },
    {
      "epoch": 4.428886575937891,
      "grad_norm": 0.16884037852287292,
      "learning_rate": 1.1497532336603783e-06,
      "loss": 0.0108,
      "step": 2706280
    },
    {
      "epoch": 4.428919306376544,
      "grad_norm": 0.09266433864831924,
      "learning_rate": 1.1496873414468615e-06,
      "loss": 0.008,
      "step": 2706300
    },
    {
      "epoch": 4.428952036815198,
      "grad_norm": 0.5543876886367798,
      "learning_rate": 1.1496214492333442e-06,
      "loss": 0.0106,
      "step": 2706320
    },
    {
      "epoch": 4.4289847672538505,
      "grad_norm": 2.2810511589050293,
      "learning_rate": 1.1495555570198271e-06,
      "loss": 0.0064,
      "step": 2706340
    },
    {
      "epoch": 4.429017497692504,
      "grad_norm": 0.457059770822525,
      "learning_rate": 1.1494896648063099e-06,
      "loss": 0.0106,
      "step": 2706360
    },
    {
      "epoch": 4.429050228131158,
      "grad_norm": 0.20337653160095215,
      "learning_rate": 1.1494237725927928e-06,
      "loss": 0.0161,
      "step": 2706380
    },
    {
      "epoch": 4.42908295856981,
      "grad_norm": 0.20808562636375427,
      "learning_rate": 1.1493578803792758e-06,
      "loss": 0.0082,
      "step": 2706400
    },
    {
      "epoch": 4.429115689008464,
      "grad_norm": 0.23907555639743805,
      "learning_rate": 1.1492919881657585e-06,
      "loss": 0.0084,
      "step": 2706420
    },
    {
      "epoch": 4.429148419447118,
      "grad_norm": 0.2170337736606598,
      "learning_rate": 1.1492260959522415e-06,
      "loss": 0.0108,
      "step": 2706440
    },
    {
      "epoch": 4.429181149885771,
      "grad_norm": 0.518701434135437,
      "learning_rate": 1.1491602037387242e-06,
      "loss": 0.0114,
      "step": 2706460
    },
    {
      "epoch": 4.429213880324424,
      "grad_norm": 0.4458765387535095,
      "learning_rate": 1.1490943115252072e-06,
      "loss": 0.0123,
      "step": 2706480
    },
    {
      "epoch": 4.4292466107630775,
      "grad_norm": 0.4609488546848297,
      "learning_rate": 1.1490284193116901e-06,
      "loss": 0.0094,
      "step": 2706500
    },
    {
      "epoch": 4.429279341201731,
      "grad_norm": 0.11573633551597595,
      "learning_rate": 1.1489625270981729e-06,
      "loss": 0.012,
      "step": 2706520
    },
    {
      "epoch": 4.429312071640384,
      "grad_norm": 0.1536780446767807,
      "learning_rate": 1.1488966348846558e-06,
      "loss": 0.0078,
      "step": 2706540
    },
    {
      "epoch": 4.429344802079037,
      "grad_norm": 0.2212977111339569,
      "learning_rate": 1.1488307426711388e-06,
      "loss": 0.0089,
      "step": 2706560
    },
    {
      "epoch": 4.429377532517691,
      "grad_norm": 0.1107340082526207,
      "learning_rate": 1.1487648504576215e-06,
      "loss": 0.0052,
      "step": 2706580
    },
    {
      "epoch": 4.4294102629563445,
      "grad_norm": 0.23795929551124573,
      "learning_rate": 1.1486989582441045e-06,
      "loss": 0.0127,
      "step": 2706600
    },
    {
      "epoch": 4.429442993394997,
      "grad_norm": 0.03748365119099617,
      "learning_rate": 1.1486330660305872e-06,
      "loss": 0.0107,
      "step": 2706620
    },
    {
      "epoch": 4.429475723833651,
      "grad_norm": 0.31259143352508545,
      "learning_rate": 1.1485671738170701e-06,
      "loss": 0.0135,
      "step": 2706640
    },
    {
      "epoch": 4.429508454272304,
      "grad_norm": 0.2501693367958069,
      "learning_rate": 1.148501281603553e-06,
      "loss": 0.0118,
      "step": 2706660
    },
    {
      "epoch": 4.429541184710957,
      "grad_norm": 0.04051528126001358,
      "learning_rate": 1.1484353893900358e-06,
      "loss": 0.0136,
      "step": 2706680
    },
    {
      "epoch": 4.429573915149611,
      "grad_norm": 0.4071231484413147,
      "learning_rate": 1.1483694971765188e-06,
      "loss": 0.0081,
      "step": 2706700
    },
    {
      "epoch": 4.429606645588264,
      "grad_norm": 0.2565321922302246,
      "learning_rate": 1.1483036049630015e-06,
      "loss": 0.0127,
      "step": 2706720
    },
    {
      "epoch": 4.429639376026918,
      "grad_norm": 0.19897162914276123,
      "learning_rate": 1.1482377127494845e-06,
      "loss": 0.0079,
      "step": 2706740
    },
    {
      "epoch": 4.429672106465571,
      "grad_norm": 0.15931186079978943,
      "learning_rate": 1.1481718205359674e-06,
      "loss": 0.0102,
      "step": 2706760
    },
    {
      "epoch": 4.429704836904224,
      "grad_norm": 0.6280803084373474,
      "learning_rate": 1.1481059283224502e-06,
      "loss": 0.011,
      "step": 2706780
    },
    {
      "epoch": 4.429737567342878,
      "grad_norm": 0.16454237699508667,
      "learning_rate": 1.1480400361089331e-06,
      "loss": 0.0095,
      "step": 2706800
    },
    {
      "epoch": 4.4297702977815305,
      "grad_norm": 0.20983929932117462,
      "learning_rate": 1.147974143895416e-06,
      "loss": 0.0076,
      "step": 2706820
    },
    {
      "epoch": 4.429803028220184,
      "grad_norm": 0.41336992383003235,
      "learning_rate": 1.1479082516818988e-06,
      "loss": 0.0124,
      "step": 2706840
    },
    {
      "epoch": 4.429835758658838,
      "grad_norm": 0.27697810530662537,
      "learning_rate": 1.1478423594683818e-06,
      "loss": 0.0098,
      "step": 2706860
    },
    {
      "epoch": 4.42986848909749,
      "grad_norm": 0.21281088888645172,
      "learning_rate": 1.1477764672548645e-06,
      "loss": 0.0095,
      "step": 2706880
    },
    {
      "epoch": 4.429901219536144,
      "grad_norm": 0.24919138848781586,
      "learning_rate": 1.1477105750413475e-06,
      "loss": 0.0092,
      "step": 2706900
    },
    {
      "epoch": 4.429933949974798,
      "grad_norm": 0.19776824116706848,
      "learning_rate": 1.1476446828278304e-06,
      "loss": 0.0092,
      "step": 2706920
    },
    {
      "epoch": 4.429966680413451,
      "grad_norm": 0.19417189061641693,
      "learning_rate": 1.1475787906143131e-06,
      "loss": 0.0094,
      "step": 2706940
    },
    {
      "epoch": 4.429999410852104,
      "grad_norm": 0.26263394951820374,
      "learning_rate": 1.147512898400796e-06,
      "loss": 0.0098,
      "step": 2706960
    },
    {
      "epoch": 4.4300321412907575,
      "grad_norm": 0.2797594368457794,
      "learning_rate": 1.1474470061872788e-06,
      "loss": 0.0092,
      "step": 2706980
    },
    {
      "epoch": 4.430064871729411,
      "grad_norm": 0.2579750716686249,
      "learning_rate": 1.1473811139737618e-06,
      "loss": 0.0064,
      "step": 2707000
    },
    {
      "epoch": 4.430097602168065,
      "grad_norm": 0.36849504709243774,
      "learning_rate": 1.1473152217602447e-06,
      "loss": 0.0074,
      "step": 2707020
    },
    {
      "epoch": 4.430130332606717,
      "grad_norm": 0.3753449618816376,
      "learning_rate": 1.1472493295467275e-06,
      "loss": 0.0092,
      "step": 2707040
    },
    {
      "epoch": 4.430163063045371,
      "grad_norm": 0.2503838539123535,
      "learning_rate": 1.1471834373332104e-06,
      "loss": 0.0114,
      "step": 2707060
    },
    {
      "epoch": 4.430195793484025,
      "grad_norm": 0.4466360807418823,
      "learning_rate": 1.1471175451196934e-06,
      "loss": 0.0118,
      "step": 2707080
    },
    {
      "epoch": 4.430228523922677,
      "grad_norm": 0.4175889790058136,
      "learning_rate": 1.1470516529061761e-06,
      "loss": 0.0126,
      "step": 2707100
    },
    {
      "epoch": 4.430261254361331,
      "grad_norm": 0.1764553040266037,
      "learning_rate": 1.146985760692659e-06,
      "loss": 0.0064,
      "step": 2707120
    },
    {
      "epoch": 4.4302939847999845,
      "grad_norm": 0.2288772165775299,
      "learning_rate": 1.1469198684791418e-06,
      "loss": 0.0075,
      "step": 2707140
    },
    {
      "epoch": 4.430326715238637,
      "grad_norm": 0.22741587460041046,
      "learning_rate": 1.1468539762656248e-06,
      "loss": 0.0083,
      "step": 2707160
    },
    {
      "epoch": 4.430359445677291,
      "grad_norm": 0.2663058638572693,
      "learning_rate": 1.1467880840521077e-06,
      "loss": 0.0127,
      "step": 2707180
    },
    {
      "epoch": 4.430392176115944,
      "grad_norm": 0.14763908088207245,
      "learning_rate": 1.1467221918385905e-06,
      "loss": 0.0101,
      "step": 2707200
    },
    {
      "epoch": 4.430424906554598,
      "grad_norm": 0.05010484158992767,
      "learning_rate": 1.1466562996250734e-06,
      "loss": 0.0083,
      "step": 2707220
    },
    {
      "epoch": 4.430457636993251,
      "grad_norm": 0.1700458973646164,
      "learning_rate": 1.1465904074115561e-06,
      "loss": 0.0099,
      "step": 2707240
    },
    {
      "epoch": 4.430490367431904,
      "grad_norm": 0.6267514824867249,
      "learning_rate": 1.146524515198039e-06,
      "loss": 0.0124,
      "step": 2707260
    },
    {
      "epoch": 4.430523097870558,
      "grad_norm": 0.14336171746253967,
      "learning_rate": 1.146458622984522e-06,
      "loss": 0.0065,
      "step": 2707280
    },
    {
      "epoch": 4.4305558283092115,
      "grad_norm": 0.47609686851501465,
      "learning_rate": 1.1463927307710048e-06,
      "loss": 0.01,
      "step": 2707300
    },
    {
      "epoch": 4.430588558747864,
      "grad_norm": 0.24279046058654785,
      "learning_rate": 1.1463268385574877e-06,
      "loss": 0.0112,
      "step": 2707320
    },
    {
      "epoch": 4.430621289186518,
      "grad_norm": 0.1535770297050476,
      "learning_rate": 1.1462609463439705e-06,
      "loss": 0.0124,
      "step": 2707340
    },
    {
      "epoch": 4.430654019625171,
      "grad_norm": 0.09900081902742386,
      "learning_rate": 1.1461950541304534e-06,
      "loss": 0.0073,
      "step": 2707360
    },
    {
      "epoch": 4.430686750063824,
      "grad_norm": 0.1856042742729187,
      "learning_rate": 1.1461291619169364e-06,
      "loss": 0.0075,
      "step": 2707380
    },
    {
      "epoch": 4.430719480502478,
      "grad_norm": 0.2555597722530365,
      "learning_rate": 1.1460632697034191e-06,
      "loss": 0.0073,
      "step": 2707400
    },
    {
      "epoch": 4.430752210941131,
      "grad_norm": 0.27220767736434937,
      "learning_rate": 1.145997377489902e-06,
      "loss": 0.0091,
      "step": 2707420
    },
    {
      "epoch": 4.430784941379784,
      "grad_norm": 0.15060627460479736,
      "learning_rate": 1.145931485276385e-06,
      "loss": 0.0074,
      "step": 2707440
    },
    {
      "epoch": 4.4308176718184376,
      "grad_norm": 0.43511509895324707,
      "learning_rate": 1.145865593062868e-06,
      "loss": 0.0139,
      "step": 2707460
    },
    {
      "epoch": 4.430850402257091,
      "grad_norm": 0.12495692074298859,
      "learning_rate": 1.1457997008493507e-06,
      "loss": 0.0083,
      "step": 2707480
    },
    {
      "epoch": 4.430883132695745,
      "grad_norm": 0.15455040335655212,
      "learning_rate": 1.1457338086358335e-06,
      "loss": 0.0109,
      "step": 2707500
    },
    {
      "epoch": 4.4309158631343974,
      "grad_norm": 0.20724186301231384,
      "learning_rate": 1.1456679164223166e-06,
      "loss": 0.0066,
      "step": 2707520
    },
    {
      "epoch": 4.430948593573051,
      "grad_norm": 0.17171816527843475,
      "learning_rate": 1.1456020242087994e-06,
      "loss": 0.0101,
      "step": 2707540
    },
    {
      "epoch": 4.430981324011705,
      "grad_norm": 0.2651541829109192,
      "learning_rate": 1.1455361319952823e-06,
      "loss": 0.0092,
      "step": 2707560
    },
    {
      "epoch": 4.431014054450357,
      "grad_norm": 1.5204018354415894,
      "learning_rate": 1.145470239781765e-06,
      "loss": 0.0083,
      "step": 2707580
    },
    {
      "epoch": 4.431046784889011,
      "grad_norm": 0.21483205258846283,
      "learning_rate": 1.1454043475682478e-06,
      "loss": 0.0085,
      "step": 2707600
    },
    {
      "epoch": 4.4310795153276645,
      "grad_norm": 0.298929899930954,
      "learning_rate": 1.145338455354731e-06,
      "loss": 0.013,
      "step": 2707620
    },
    {
      "epoch": 4.431112245766318,
      "grad_norm": 0.15403828024864197,
      "learning_rate": 1.1452725631412137e-06,
      "loss": 0.0124,
      "step": 2707640
    },
    {
      "epoch": 4.431144976204971,
      "grad_norm": 0.2916939854621887,
      "learning_rate": 1.1452066709276966e-06,
      "loss": 0.01,
      "step": 2707660
    },
    {
      "epoch": 4.431177706643624,
      "grad_norm": 0.07393229007720947,
      "learning_rate": 1.1451407787141794e-06,
      "loss": 0.0108,
      "step": 2707680
    },
    {
      "epoch": 4.431210437082278,
      "grad_norm": 0.1154308021068573,
      "learning_rate": 1.1450748865006623e-06,
      "loss": 0.0073,
      "step": 2707700
    },
    {
      "epoch": 4.431243167520931,
      "grad_norm": 0.30028900504112244,
      "learning_rate": 1.1450089942871453e-06,
      "loss": 0.0133,
      "step": 2707720
    },
    {
      "epoch": 4.431275897959584,
      "grad_norm": 0.37023985385894775,
      "learning_rate": 1.144943102073628e-06,
      "loss": 0.0116,
      "step": 2707740
    },
    {
      "epoch": 4.431308628398238,
      "grad_norm": 0.05171871557831764,
      "learning_rate": 1.144877209860111e-06,
      "loss": 0.0134,
      "step": 2707760
    },
    {
      "epoch": 4.4313413588368915,
      "grad_norm": 0.1947762370109558,
      "learning_rate": 1.1448113176465937e-06,
      "loss": 0.0156,
      "step": 2707780
    },
    {
      "epoch": 4.431374089275544,
      "grad_norm": 0.087248295545578,
      "learning_rate": 1.1447454254330767e-06,
      "loss": 0.013,
      "step": 2707800
    },
    {
      "epoch": 4.431406819714198,
      "grad_norm": 0.4704149663448334,
      "learning_rate": 1.1446795332195596e-06,
      "loss": 0.0079,
      "step": 2707820
    },
    {
      "epoch": 4.431439550152851,
      "grad_norm": 0.22563782334327698,
      "learning_rate": 1.1446136410060424e-06,
      "loss": 0.0115,
      "step": 2707840
    },
    {
      "epoch": 4.431472280591504,
      "grad_norm": 0.2943112254142761,
      "learning_rate": 1.1445477487925253e-06,
      "loss": 0.0103,
      "step": 2707860
    },
    {
      "epoch": 4.431505011030158,
      "grad_norm": 0.07782665640115738,
      "learning_rate": 1.1444818565790083e-06,
      "loss": 0.0079,
      "step": 2707880
    },
    {
      "epoch": 4.431537741468811,
      "grad_norm": 0.30235543847084045,
      "learning_rate": 1.144415964365491e-06,
      "loss": 0.0088,
      "step": 2707900
    },
    {
      "epoch": 4.431570471907465,
      "grad_norm": 0.5453686118125916,
      "learning_rate": 1.144350072151974e-06,
      "loss": 0.0092,
      "step": 2707920
    },
    {
      "epoch": 4.431603202346118,
      "grad_norm": 0.06557110697031021,
      "learning_rate": 1.1442841799384567e-06,
      "loss": 0.0066,
      "step": 2707940
    },
    {
      "epoch": 4.431635932784771,
      "grad_norm": 0.16173726320266724,
      "learning_rate": 1.1442182877249397e-06,
      "loss": 0.0114,
      "step": 2707960
    },
    {
      "epoch": 4.431668663223425,
      "grad_norm": 0.1756526380777359,
      "learning_rate": 1.1441523955114226e-06,
      "loss": 0.0104,
      "step": 2707980
    },
    {
      "epoch": 4.4317013936620775,
      "grad_norm": 0.31186026334762573,
      "learning_rate": 1.1440865032979053e-06,
      "loss": 0.0058,
      "step": 2708000
    },
    {
      "epoch": 4.431734124100731,
      "grad_norm": 0.07417794317007065,
      "learning_rate": 1.1440206110843883e-06,
      "loss": 0.0098,
      "step": 2708020
    },
    {
      "epoch": 4.431766854539385,
      "grad_norm": 0.27500125765800476,
      "learning_rate": 1.143954718870871e-06,
      "loss": 0.0068,
      "step": 2708040
    },
    {
      "epoch": 4.431799584978038,
      "grad_norm": 0.18031036853790283,
      "learning_rate": 1.143888826657354e-06,
      "loss": 0.0146,
      "step": 2708060
    },
    {
      "epoch": 4.431832315416691,
      "grad_norm": 0.23361103236675262,
      "learning_rate": 1.143822934443837e-06,
      "loss": 0.0059,
      "step": 2708080
    },
    {
      "epoch": 4.431865045855345,
      "grad_norm": 0.17425648868083954,
      "learning_rate": 1.1437570422303197e-06,
      "loss": 0.0107,
      "step": 2708100
    },
    {
      "epoch": 4.431897776293998,
      "grad_norm": 0.17845697700977325,
      "learning_rate": 1.1436911500168026e-06,
      "loss": 0.0118,
      "step": 2708120
    },
    {
      "epoch": 4.431930506732651,
      "grad_norm": 0.4043292701244354,
      "learning_rate": 1.1436252578032856e-06,
      "loss": 0.0132,
      "step": 2708140
    },
    {
      "epoch": 4.4319632371713045,
      "grad_norm": 0.386107474565506,
      "learning_rate": 1.1435593655897683e-06,
      "loss": 0.0072,
      "step": 2708160
    },
    {
      "epoch": 4.431995967609958,
      "grad_norm": 0.1893954575061798,
      "learning_rate": 1.1434934733762513e-06,
      "loss": 0.0109,
      "step": 2708180
    },
    {
      "epoch": 4.432028698048612,
      "grad_norm": 0.26208579540252686,
      "learning_rate": 1.143427581162734e-06,
      "loss": 0.008,
      "step": 2708200
    },
    {
      "epoch": 4.432061428487264,
      "grad_norm": 0.3317038416862488,
      "learning_rate": 1.143361688949217e-06,
      "loss": 0.0066,
      "step": 2708220
    },
    {
      "epoch": 4.432094158925918,
      "grad_norm": 0.12962903082370758,
      "learning_rate": 1.1432957967357e-06,
      "loss": 0.0086,
      "step": 2708240
    },
    {
      "epoch": 4.4321268893645716,
      "grad_norm": 0.3212120532989502,
      "learning_rate": 1.1432299045221827e-06,
      "loss": 0.0111,
      "step": 2708260
    },
    {
      "epoch": 4.432159619803224,
      "grad_norm": 0.10551556199789047,
      "learning_rate": 1.1431640123086656e-06,
      "loss": 0.0101,
      "step": 2708280
    },
    {
      "epoch": 4.432192350241878,
      "grad_norm": 0.34244632720947266,
      "learning_rate": 1.1430981200951483e-06,
      "loss": 0.0094,
      "step": 2708300
    },
    {
      "epoch": 4.4322250806805314,
      "grad_norm": 0.2199675440788269,
      "learning_rate": 1.1430322278816313e-06,
      "loss": 0.0065,
      "step": 2708320
    },
    {
      "epoch": 4.432257811119184,
      "grad_norm": 0.17954900860786438,
      "learning_rate": 1.1429663356681142e-06,
      "loss": 0.0066,
      "step": 2708340
    },
    {
      "epoch": 4.432290541557838,
      "grad_norm": 0.2534450590610504,
      "learning_rate": 1.142900443454597e-06,
      "loss": 0.0071,
      "step": 2708360
    },
    {
      "epoch": 4.432323271996491,
      "grad_norm": 0.32828161120414734,
      "learning_rate": 1.14283455124108e-06,
      "loss": 0.0096,
      "step": 2708380
    },
    {
      "epoch": 4.432356002435145,
      "grad_norm": 0.24564266204833984,
      "learning_rate": 1.1427686590275629e-06,
      "loss": 0.0082,
      "step": 2708400
    },
    {
      "epoch": 4.432388732873798,
      "grad_norm": 0.22309477627277374,
      "learning_rate": 1.1427027668140456e-06,
      "loss": 0.0064,
      "step": 2708420
    },
    {
      "epoch": 4.432421463312451,
      "grad_norm": 0.13803695142269135,
      "learning_rate": 1.1426368746005286e-06,
      "loss": 0.0079,
      "step": 2708440
    },
    {
      "epoch": 4.432454193751105,
      "grad_norm": 0.1857786476612091,
      "learning_rate": 1.1425709823870113e-06,
      "loss": 0.0085,
      "step": 2708460
    },
    {
      "epoch": 4.432486924189758,
      "grad_norm": 0.08007746189832687,
      "learning_rate": 1.1425050901734943e-06,
      "loss": 0.0072,
      "step": 2708480
    },
    {
      "epoch": 4.432519654628411,
      "grad_norm": 0.3297215700149536,
      "learning_rate": 1.1424391979599772e-06,
      "loss": 0.0098,
      "step": 2708500
    },
    {
      "epoch": 4.432552385067065,
      "grad_norm": 0.3886224925518036,
      "learning_rate": 1.14237330574646e-06,
      "loss": 0.0113,
      "step": 2708520
    },
    {
      "epoch": 4.432585115505718,
      "grad_norm": 0.39268749952316284,
      "learning_rate": 1.142307413532943e-06,
      "loss": 0.0087,
      "step": 2708540
    },
    {
      "epoch": 4.432617845944371,
      "grad_norm": 0.12075049430131912,
      "learning_rate": 1.1422415213194257e-06,
      "loss": 0.0041,
      "step": 2708560
    },
    {
      "epoch": 4.432650576383025,
      "grad_norm": 0.403636634349823,
      "learning_rate": 1.1421756291059086e-06,
      "loss": 0.0105,
      "step": 2708580
    },
    {
      "epoch": 4.432683306821678,
      "grad_norm": 0.22907379269599915,
      "learning_rate": 1.1421097368923916e-06,
      "loss": 0.0138,
      "step": 2708600
    },
    {
      "epoch": 4.432716037260331,
      "grad_norm": 0.06545327603816986,
      "learning_rate": 1.1420438446788743e-06,
      "loss": 0.007,
      "step": 2708620
    },
    {
      "epoch": 4.4327487676989845,
      "grad_norm": 0.36039096117019653,
      "learning_rate": 1.1419779524653572e-06,
      "loss": 0.0097,
      "step": 2708640
    },
    {
      "epoch": 4.432781498137638,
      "grad_norm": 0.14203853905200958,
      "learning_rate": 1.14191206025184e-06,
      "loss": 0.0094,
      "step": 2708660
    },
    {
      "epoch": 4.432814228576292,
      "grad_norm": 0.14132550358772278,
      "learning_rate": 1.141846168038323e-06,
      "loss": 0.0063,
      "step": 2708680
    },
    {
      "epoch": 4.432846959014944,
      "grad_norm": 0.21116583049297333,
      "learning_rate": 1.1417802758248059e-06,
      "loss": 0.0157,
      "step": 2708700
    },
    {
      "epoch": 4.432879689453598,
      "grad_norm": 0.04312703013420105,
      "learning_rate": 1.1417143836112886e-06,
      "loss": 0.0114,
      "step": 2708720
    },
    {
      "epoch": 4.432912419892252,
      "grad_norm": 0.11726192384958267,
      "learning_rate": 1.1416484913977716e-06,
      "loss": 0.0091,
      "step": 2708740
    },
    {
      "epoch": 4.432945150330905,
      "grad_norm": 0.10774017870426178,
      "learning_rate": 1.1415825991842545e-06,
      "loss": 0.0071,
      "step": 2708760
    },
    {
      "epoch": 4.432977880769558,
      "grad_norm": 0.2552632987499237,
      "learning_rate": 1.1415167069707375e-06,
      "loss": 0.009,
      "step": 2708780
    },
    {
      "epoch": 4.4330106112082115,
      "grad_norm": 0.33194950222969055,
      "learning_rate": 1.1414508147572202e-06,
      "loss": 0.01,
      "step": 2708800
    },
    {
      "epoch": 4.433043341646865,
      "grad_norm": 0.5137142539024353,
      "learning_rate": 1.141384922543703e-06,
      "loss": 0.0106,
      "step": 2708820
    },
    {
      "epoch": 4.433076072085518,
      "grad_norm": 0.11400297284126282,
      "learning_rate": 1.1413190303301861e-06,
      "loss": 0.0091,
      "step": 2708840
    },
    {
      "epoch": 4.433108802524171,
      "grad_norm": 0.21692033112049103,
      "learning_rate": 1.1412531381166689e-06,
      "loss": 0.008,
      "step": 2708860
    },
    {
      "epoch": 4.433141532962825,
      "grad_norm": 0.23283950984477997,
      "learning_rate": 1.1411872459031518e-06,
      "loss": 0.0107,
      "step": 2708880
    },
    {
      "epoch": 4.433174263401478,
      "grad_norm": 0.44073379039764404,
      "learning_rate": 1.1411213536896346e-06,
      "loss": 0.0099,
      "step": 2708900
    },
    {
      "epoch": 4.433206993840131,
      "grad_norm": 0.10445107519626617,
      "learning_rate": 1.1410554614761173e-06,
      "loss": 0.0087,
      "step": 2708920
    },
    {
      "epoch": 4.433239724278785,
      "grad_norm": 0.12018617987632751,
      "learning_rate": 1.1409895692626005e-06,
      "loss": 0.0088,
      "step": 2708940
    },
    {
      "epoch": 4.4332724547174385,
      "grad_norm": 0.29417043924331665,
      "learning_rate": 1.1409236770490832e-06,
      "loss": 0.0111,
      "step": 2708960
    },
    {
      "epoch": 4.433305185156091,
      "grad_norm": 0.40959954261779785,
      "learning_rate": 1.1408577848355662e-06,
      "loss": 0.01,
      "step": 2708980
    },
    {
      "epoch": 4.433337915594745,
      "grad_norm": 0.34401264786720276,
      "learning_rate": 1.1407918926220489e-06,
      "loss": 0.0099,
      "step": 2709000
    },
    {
      "epoch": 4.433370646033398,
      "grad_norm": 0.26097947359085083,
      "learning_rate": 1.1407260004085318e-06,
      "loss": 0.0079,
      "step": 2709020
    },
    {
      "epoch": 4.433403376472051,
      "grad_norm": 0.21479423344135284,
      "learning_rate": 1.1406601081950148e-06,
      "loss": 0.0076,
      "step": 2709040
    },
    {
      "epoch": 4.433436106910705,
      "grad_norm": 0.5325578451156616,
      "learning_rate": 1.1405942159814975e-06,
      "loss": 0.0067,
      "step": 2709060
    },
    {
      "epoch": 4.433468837349358,
      "grad_norm": 0.10146979987621307,
      "learning_rate": 1.1405283237679805e-06,
      "loss": 0.0085,
      "step": 2709080
    },
    {
      "epoch": 4.433501567788012,
      "grad_norm": 1.7576876878738403,
      "learning_rate": 1.1404624315544632e-06,
      "loss": 0.0131,
      "step": 2709100
    },
    {
      "epoch": 4.433534298226665,
      "grad_norm": 0.24409160017967224,
      "learning_rate": 1.1403965393409462e-06,
      "loss": 0.0091,
      "step": 2709120
    },
    {
      "epoch": 4.433567028665318,
      "grad_norm": 0.07335909456014633,
      "learning_rate": 1.1403306471274291e-06,
      "loss": 0.0093,
      "step": 2709140
    },
    {
      "epoch": 4.433599759103972,
      "grad_norm": 0.2198246568441391,
      "learning_rate": 1.1402647549139119e-06,
      "loss": 0.01,
      "step": 2709160
    },
    {
      "epoch": 4.4336324895426245,
      "grad_norm": 0.0970158576965332,
      "learning_rate": 1.1401988627003948e-06,
      "loss": 0.0079,
      "step": 2709180
    },
    {
      "epoch": 4.433665219981278,
      "grad_norm": 0.12919871509075165,
      "learning_rate": 1.1401329704868778e-06,
      "loss": 0.0061,
      "step": 2709200
    },
    {
      "epoch": 4.433697950419932,
      "grad_norm": 0.21175748109817505,
      "learning_rate": 1.1400670782733605e-06,
      "loss": 0.0085,
      "step": 2709220
    },
    {
      "epoch": 4.433730680858585,
      "grad_norm": 0.2428782880306244,
      "learning_rate": 1.1400011860598435e-06,
      "loss": 0.0088,
      "step": 2709240
    },
    {
      "epoch": 4.433763411297238,
      "grad_norm": 0.44006145000457764,
      "learning_rate": 1.1399352938463262e-06,
      "loss": 0.0109,
      "step": 2709260
    },
    {
      "epoch": 4.4337961417358915,
      "grad_norm": 0.08474333584308624,
      "learning_rate": 1.1398694016328092e-06,
      "loss": 0.0066,
      "step": 2709280
    },
    {
      "epoch": 4.433828872174545,
      "grad_norm": 0.12129797041416168,
      "learning_rate": 1.1398035094192921e-06,
      "loss": 0.0107,
      "step": 2709300
    },
    {
      "epoch": 4.433861602613198,
      "grad_norm": 0.14827461540699005,
      "learning_rate": 1.1397376172057748e-06,
      "loss": 0.006,
      "step": 2709320
    },
    {
      "epoch": 4.433894333051851,
      "grad_norm": 0.35898470878601074,
      "learning_rate": 1.1396717249922578e-06,
      "loss": 0.011,
      "step": 2709340
    },
    {
      "epoch": 4.433927063490505,
      "grad_norm": 0.7753132581710815,
      "learning_rate": 1.1396058327787405e-06,
      "loss": 0.0073,
      "step": 2709360
    },
    {
      "epoch": 4.433959793929159,
      "grad_norm": 0.07734347879886627,
      "learning_rate": 1.1395399405652235e-06,
      "loss": 0.0115,
      "step": 2709380
    },
    {
      "epoch": 4.433992524367811,
      "grad_norm": 0.24647817015647888,
      "learning_rate": 1.1394740483517064e-06,
      "loss": 0.0065,
      "step": 2709400
    },
    {
      "epoch": 4.434025254806465,
      "grad_norm": 0.5981988906860352,
      "learning_rate": 1.1394081561381892e-06,
      "loss": 0.0067,
      "step": 2709420
    },
    {
      "epoch": 4.4340579852451185,
      "grad_norm": 0.18606989085674286,
      "learning_rate": 1.1393422639246721e-06,
      "loss": 0.0086,
      "step": 2709440
    },
    {
      "epoch": 4.434090715683771,
      "grad_norm": Infinity,
      "learning_rate": 1.139276371711155e-06,
      "loss": 0.0126,
      "step": 2709460
    },
    {
      "epoch": 4.434123446122425,
      "grad_norm": 0.19494011998176575,
      "learning_rate": 1.1392104794976378e-06,
      "loss": 0.0067,
      "step": 2709480
    },
    {
      "epoch": 4.434156176561078,
      "grad_norm": 0.40418973565101624,
      "learning_rate": 1.1391445872841208e-06,
      "loss": 0.0106,
      "step": 2709500
    },
    {
      "epoch": 4.434188906999732,
      "grad_norm": 0.7660145163536072,
      "learning_rate": 1.1390786950706035e-06,
      "loss": 0.0089,
      "step": 2709520
    },
    {
      "epoch": 4.434221637438385,
      "grad_norm": 0.11379973590373993,
      "learning_rate": 1.1390128028570865e-06,
      "loss": 0.0122,
      "step": 2709540
    },
    {
      "epoch": 4.434254367877038,
      "grad_norm": 0.49644115567207336,
      "learning_rate": 1.1389469106435694e-06,
      "loss": 0.0087,
      "step": 2709560
    },
    {
      "epoch": 4.434287098315692,
      "grad_norm": 0.2692751884460449,
      "learning_rate": 1.1388810184300522e-06,
      "loss": 0.0088,
      "step": 2709580
    },
    {
      "epoch": 4.434319828754345,
      "grad_norm": 0.13635829091072083,
      "learning_rate": 1.1388151262165351e-06,
      "loss": 0.0081,
      "step": 2709600
    },
    {
      "epoch": 4.434352559192998,
      "grad_norm": 0.17306172847747803,
      "learning_rate": 1.1387492340030178e-06,
      "loss": 0.0093,
      "step": 2709620
    },
    {
      "epoch": 4.434385289631652,
      "grad_norm": 0.1706412434577942,
      "learning_rate": 1.1386833417895008e-06,
      "loss": 0.0058,
      "step": 2709640
    },
    {
      "epoch": 4.434418020070305,
      "grad_norm": 0.271165132522583,
      "learning_rate": 1.1386174495759838e-06,
      "loss": 0.0103,
      "step": 2709660
    },
    {
      "epoch": 4.434450750508958,
      "grad_norm": 0.21758292615413666,
      "learning_rate": 1.1385515573624665e-06,
      "loss": 0.0076,
      "step": 2709680
    },
    {
      "epoch": 4.434483480947612,
      "grad_norm": 0.11880796402692795,
      "learning_rate": 1.1384856651489494e-06,
      "loss": 0.0055,
      "step": 2709700
    },
    {
      "epoch": 4.434516211386265,
      "grad_norm": 0.22238804399967194,
      "learning_rate": 1.1384197729354324e-06,
      "loss": 0.0097,
      "step": 2709720
    },
    {
      "epoch": 4.434548941824918,
      "grad_norm": 0.36890992522239685,
      "learning_rate": 1.1383538807219151e-06,
      "loss": 0.0085,
      "step": 2709740
    },
    {
      "epoch": 4.434581672263572,
      "grad_norm": 0.2867332398891449,
      "learning_rate": 1.138287988508398e-06,
      "loss": 0.0076,
      "step": 2709760
    },
    {
      "epoch": 4.434614402702225,
      "grad_norm": 0.2741233706474304,
      "learning_rate": 1.1382220962948808e-06,
      "loss": 0.0093,
      "step": 2709780
    },
    {
      "epoch": 4.434647133140878,
      "grad_norm": 0.11032521724700928,
      "learning_rate": 1.1381562040813638e-06,
      "loss": 0.0099,
      "step": 2709800
    },
    {
      "epoch": 4.4346798635795315,
      "grad_norm": 0.1867162436246872,
      "learning_rate": 1.1380903118678467e-06,
      "loss": 0.0098,
      "step": 2709820
    },
    {
      "epoch": 4.434712594018185,
      "grad_norm": 0.1649850755929947,
      "learning_rate": 1.1380244196543295e-06,
      "loss": 0.0082,
      "step": 2709840
    },
    {
      "epoch": 4.434745324456839,
      "grad_norm": 0.11817733198404312,
      "learning_rate": 1.1379585274408124e-06,
      "loss": 0.0076,
      "step": 2709860
    },
    {
      "epoch": 4.434778054895491,
      "grad_norm": 0.2093755006790161,
      "learning_rate": 1.1378926352272952e-06,
      "loss": 0.0155,
      "step": 2709880
    },
    {
      "epoch": 4.434810785334145,
      "grad_norm": 0.35717087984085083,
      "learning_rate": 1.1378267430137781e-06,
      "loss": 0.014,
      "step": 2709900
    },
    {
      "epoch": 4.434843515772799,
      "grad_norm": 0.2883475124835968,
      "learning_rate": 1.137760850800261e-06,
      "loss": 0.0074,
      "step": 2709920
    },
    {
      "epoch": 4.434876246211452,
      "grad_norm": 0.13338541984558105,
      "learning_rate": 1.1376949585867438e-06,
      "loss": 0.0088,
      "step": 2709940
    },
    {
      "epoch": 4.434908976650105,
      "grad_norm": 0.09643658995628357,
      "learning_rate": 1.1376290663732268e-06,
      "loss": 0.0091,
      "step": 2709960
    },
    {
      "epoch": 4.4349417070887585,
      "grad_norm": 0.1332012414932251,
      "learning_rate": 1.1375631741597095e-06,
      "loss": 0.012,
      "step": 2709980
    },
    {
      "epoch": 4.434974437527412,
      "grad_norm": 0.1564231514930725,
      "learning_rate": 1.1374972819461924e-06,
      "loss": 0.0071,
      "step": 2710000
    },
    {
      "epoch": 4.435007167966065,
      "grad_norm": 0.3304568827152252,
      "learning_rate": 1.1374313897326754e-06,
      "loss": 0.0073,
      "step": 2710020
    },
    {
      "epoch": 4.435039898404718,
      "grad_norm": 0.2586846947669983,
      "learning_rate": 1.1373654975191581e-06,
      "loss": 0.0083,
      "step": 2710040
    },
    {
      "epoch": 4.435072628843372,
      "grad_norm": 0.23486125469207764,
      "learning_rate": 1.137299605305641e-06,
      "loss": 0.0091,
      "step": 2710060
    },
    {
      "epoch": 4.435105359282025,
      "grad_norm": 0.39455854892730713,
      "learning_rate": 1.137233713092124e-06,
      "loss": 0.0127,
      "step": 2710080
    },
    {
      "epoch": 4.435138089720678,
      "grad_norm": 0.0978633463382721,
      "learning_rate": 1.137167820878607e-06,
      "loss": 0.008,
      "step": 2710100
    },
    {
      "epoch": 4.435170820159332,
      "grad_norm": 0.21074862778186798,
      "learning_rate": 1.1371019286650897e-06,
      "loss": 0.0067,
      "step": 2710120
    },
    {
      "epoch": 4.435203550597985,
      "grad_norm": 0.11860985308885574,
      "learning_rate": 1.1370360364515725e-06,
      "loss": 0.0066,
      "step": 2710140
    },
    {
      "epoch": 4.435236281036638,
      "grad_norm": 0.26139187812805176,
      "learning_rate": 1.1369701442380556e-06,
      "loss": 0.0109,
      "step": 2710160
    },
    {
      "epoch": 4.435269011475292,
      "grad_norm": 0.2149762064218521,
      "learning_rate": 1.1369042520245384e-06,
      "loss": 0.0077,
      "step": 2710180
    },
    {
      "epoch": 4.435301741913945,
      "grad_norm": 0.1314714401960373,
      "learning_rate": 1.1368383598110213e-06,
      "loss": 0.0111,
      "step": 2710200
    },
    {
      "epoch": 4.435334472352599,
      "grad_norm": 0.2701452672481537,
      "learning_rate": 1.136772467597504e-06,
      "loss": 0.0064,
      "step": 2710220
    },
    {
      "epoch": 4.435367202791252,
      "grad_norm": 0.13429135084152222,
      "learning_rate": 1.1367065753839868e-06,
      "loss": 0.01,
      "step": 2710240
    },
    {
      "epoch": 4.435399933229905,
      "grad_norm": 0.46100151538848877,
      "learning_rate": 1.13664068317047e-06,
      "loss": 0.0069,
      "step": 2710260
    },
    {
      "epoch": 4.435432663668559,
      "grad_norm": 0.27751049399375916,
      "learning_rate": 1.1365747909569527e-06,
      "loss": 0.0113,
      "step": 2710280
    },
    {
      "epoch": 4.4354653941072115,
      "grad_norm": 0.3741323947906494,
      "learning_rate": 1.1365088987434357e-06,
      "loss": 0.0094,
      "step": 2710300
    },
    {
      "epoch": 4.435498124545865,
      "grad_norm": 0.17864146828651428,
      "learning_rate": 1.1364430065299184e-06,
      "loss": 0.0098,
      "step": 2710320
    },
    {
      "epoch": 4.435530854984519,
      "grad_norm": 0.22905591130256653,
      "learning_rate": 1.1363771143164014e-06,
      "loss": 0.0091,
      "step": 2710340
    },
    {
      "epoch": 4.435563585423171,
      "grad_norm": 0.15810883045196533,
      "learning_rate": 1.1363112221028843e-06,
      "loss": 0.0067,
      "step": 2710360
    },
    {
      "epoch": 4.435596315861825,
      "grad_norm": 0.29126790165901184,
      "learning_rate": 1.136245329889367e-06,
      "loss": 0.0075,
      "step": 2710380
    },
    {
      "epoch": 4.435629046300479,
      "grad_norm": 0.6035855412483215,
      "learning_rate": 1.13617943767585e-06,
      "loss": 0.0105,
      "step": 2710400
    },
    {
      "epoch": 4.435661776739132,
      "grad_norm": 0.11680036783218384,
      "learning_rate": 1.1361135454623327e-06,
      "loss": 0.0076,
      "step": 2710420
    },
    {
      "epoch": 4.435694507177785,
      "grad_norm": 0.04828936606645584,
      "learning_rate": 1.1360476532488157e-06,
      "loss": 0.0122,
      "step": 2710440
    },
    {
      "epoch": 4.4357272376164385,
      "grad_norm": 0.6549502611160278,
      "learning_rate": 1.1359817610352986e-06,
      "loss": 0.0118,
      "step": 2710460
    },
    {
      "epoch": 4.435759968055092,
      "grad_norm": 0.19993606209754944,
      "learning_rate": 1.1359158688217814e-06,
      "loss": 0.0099,
      "step": 2710480
    },
    {
      "epoch": 4.435792698493745,
      "grad_norm": 0.23965904116630554,
      "learning_rate": 1.1358499766082643e-06,
      "loss": 0.0097,
      "step": 2710500
    },
    {
      "epoch": 4.435825428932398,
      "grad_norm": 0.31394705176353455,
      "learning_rate": 1.1357840843947473e-06,
      "loss": 0.0097,
      "step": 2710520
    },
    {
      "epoch": 4.435858159371052,
      "grad_norm": 0.2753456234931946,
      "learning_rate": 1.13571819218123e-06,
      "loss": 0.012,
      "step": 2710540
    },
    {
      "epoch": 4.435890889809706,
      "grad_norm": 0.2351934313774109,
      "learning_rate": 1.135652299967713e-06,
      "loss": 0.0118,
      "step": 2710560
    },
    {
      "epoch": 4.435923620248358,
      "grad_norm": 0.3958755433559418,
      "learning_rate": 1.1355864077541957e-06,
      "loss": 0.0145,
      "step": 2710580
    },
    {
      "epoch": 4.435956350687012,
      "grad_norm": 0.5088492035865784,
      "learning_rate": 1.1355205155406787e-06,
      "loss": 0.014,
      "step": 2710600
    },
    {
      "epoch": 4.4359890811256655,
      "grad_norm": 0.30554258823394775,
      "learning_rate": 1.1354546233271616e-06,
      "loss": 0.0083,
      "step": 2710620
    },
    {
      "epoch": 4.436021811564318,
      "grad_norm": 0.22075901925563812,
      "learning_rate": 1.1353887311136444e-06,
      "loss": 0.0082,
      "step": 2710640
    },
    {
      "epoch": 4.436054542002972,
      "grad_norm": 0.12749812006950378,
      "learning_rate": 1.1353228389001273e-06,
      "loss": 0.0121,
      "step": 2710660
    },
    {
      "epoch": 4.436087272441625,
      "grad_norm": 0.46692314743995667,
      "learning_rate": 1.13525694668661e-06,
      "loss": 0.0089,
      "step": 2710680
    },
    {
      "epoch": 4.436120002880279,
      "grad_norm": 0.225311279296875,
      "learning_rate": 1.135191054473093e-06,
      "loss": 0.0076,
      "step": 2710700
    },
    {
      "epoch": 4.436152733318932,
      "grad_norm": 0.31334638595581055,
      "learning_rate": 1.135125162259576e-06,
      "loss": 0.0079,
      "step": 2710720
    },
    {
      "epoch": 4.436185463757585,
      "grad_norm": 0.204436793923378,
      "learning_rate": 1.1350592700460587e-06,
      "loss": 0.0081,
      "step": 2710740
    },
    {
      "epoch": 4.436218194196239,
      "grad_norm": 0.14383722841739655,
      "learning_rate": 1.1349933778325416e-06,
      "loss": 0.007,
      "step": 2710760
    },
    {
      "epoch": 4.436250924634892,
      "grad_norm": 0.2559093236923218,
      "learning_rate": 1.1349274856190246e-06,
      "loss": 0.0104,
      "step": 2710780
    },
    {
      "epoch": 4.436283655073545,
      "grad_norm": 0.3155601918697357,
      "learning_rate": 1.1348615934055073e-06,
      "loss": 0.0108,
      "step": 2710800
    },
    {
      "epoch": 4.436316385512199,
      "grad_norm": 0.5286800265312195,
      "learning_rate": 1.1347957011919903e-06,
      "loss": 0.0086,
      "step": 2710820
    },
    {
      "epoch": 4.436349115950852,
      "grad_norm": 0.4876595139503479,
      "learning_rate": 1.134729808978473e-06,
      "loss": 0.006,
      "step": 2710840
    },
    {
      "epoch": 4.436381846389505,
      "grad_norm": 0.18293283879756927,
      "learning_rate": 1.134663916764956e-06,
      "loss": 0.0071,
      "step": 2710860
    },
    {
      "epoch": 4.436414576828159,
      "grad_norm": 0.23692987859249115,
      "learning_rate": 1.134598024551439e-06,
      "loss": 0.0078,
      "step": 2710880
    },
    {
      "epoch": 4.436447307266812,
      "grad_norm": 0.09987641125917435,
      "learning_rate": 1.1345321323379217e-06,
      "loss": 0.0081,
      "step": 2710900
    },
    {
      "epoch": 4.436480037705465,
      "grad_norm": 0.32750993967056274,
      "learning_rate": 1.1344662401244046e-06,
      "loss": 0.0072,
      "step": 2710920
    },
    {
      "epoch": 4.4365127681441185,
      "grad_norm": 0.2637329697608948,
      "learning_rate": 1.1344003479108874e-06,
      "loss": 0.0155,
      "step": 2710940
    },
    {
      "epoch": 4.436545498582772,
      "grad_norm": 0.2845720946788788,
      "learning_rate": 1.1343344556973703e-06,
      "loss": 0.005,
      "step": 2710960
    },
    {
      "epoch": 4.436578229021426,
      "grad_norm": 0.2975284457206726,
      "learning_rate": 1.1342685634838533e-06,
      "loss": 0.0102,
      "step": 2710980
    },
    {
      "epoch": 4.436610959460078,
      "grad_norm": 0.2510274052619934,
      "learning_rate": 1.134202671270336e-06,
      "loss": 0.0104,
      "step": 2711000
    },
    {
      "epoch": 4.436643689898732,
      "grad_norm": 0.29896900057792664,
      "learning_rate": 1.134136779056819e-06,
      "loss": 0.0083,
      "step": 2711020
    },
    {
      "epoch": 4.436676420337386,
      "grad_norm": 0.3145878314971924,
      "learning_rate": 1.134070886843302e-06,
      "loss": 0.0102,
      "step": 2711040
    },
    {
      "epoch": 4.436709150776038,
      "grad_norm": 0.13566724956035614,
      "learning_rate": 1.1340049946297846e-06,
      "loss": 0.0076,
      "step": 2711060
    },
    {
      "epoch": 4.436741881214692,
      "grad_norm": 0.1940474957227707,
      "learning_rate": 1.1339391024162676e-06,
      "loss": 0.0085,
      "step": 2711080
    },
    {
      "epoch": 4.4367746116533455,
      "grad_norm": 0.30166104435920715,
      "learning_rate": 1.1338732102027503e-06,
      "loss": 0.0102,
      "step": 2711100
    },
    {
      "epoch": 4.436807342091999,
      "grad_norm": 0.13591736555099487,
      "learning_rate": 1.1338073179892333e-06,
      "loss": 0.0103,
      "step": 2711120
    },
    {
      "epoch": 4.436840072530652,
      "grad_norm": 0.10326092690229416,
      "learning_rate": 1.1337414257757162e-06,
      "loss": 0.0064,
      "step": 2711140
    },
    {
      "epoch": 4.436872802969305,
      "grad_norm": 0.4999353587627411,
      "learning_rate": 1.133675533562199e-06,
      "loss": 0.0102,
      "step": 2711160
    },
    {
      "epoch": 4.436905533407959,
      "grad_norm": 0.2942371368408203,
      "learning_rate": 1.133609641348682e-06,
      "loss": 0.0123,
      "step": 2711180
    },
    {
      "epoch": 4.436938263846612,
      "grad_norm": 0.27987876534461975,
      "learning_rate": 1.1335437491351647e-06,
      "loss": 0.0069,
      "step": 2711200
    },
    {
      "epoch": 4.436970994285265,
      "grad_norm": 0.05605821684002876,
      "learning_rate": 1.1334778569216476e-06,
      "loss": 0.0103,
      "step": 2711220
    },
    {
      "epoch": 4.437003724723919,
      "grad_norm": 0.10241734981536865,
      "learning_rate": 1.1334119647081306e-06,
      "loss": 0.0067,
      "step": 2711240
    },
    {
      "epoch": 4.4370364551625725,
      "grad_norm": 0.22263820469379425,
      "learning_rate": 1.1333460724946133e-06,
      "loss": 0.0072,
      "step": 2711260
    },
    {
      "epoch": 4.437069185601225,
      "grad_norm": 0.1056601032614708,
      "learning_rate": 1.1332801802810963e-06,
      "loss": 0.0158,
      "step": 2711280
    },
    {
      "epoch": 4.437101916039879,
      "grad_norm": 0.18211162090301514,
      "learning_rate": 1.1332142880675792e-06,
      "loss": 0.0077,
      "step": 2711300
    },
    {
      "epoch": 4.437134646478532,
      "grad_norm": 0.3550761938095093,
      "learning_rate": 1.133148395854062e-06,
      "loss": 0.01,
      "step": 2711320
    },
    {
      "epoch": 4.437167376917185,
      "grad_norm": 0.2041037678718567,
      "learning_rate": 1.133082503640545e-06,
      "loss": 0.0049,
      "step": 2711340
    },
    {
      "epoch": 4.437200107355839,
      "grad_norm": 0.05896298587322235,
      "learning_rate": 1.1330166114270276e-06,
      "loss": 0.0104,
      "step": 2711360
    },
    {
      "epoch": 4.437232837794492,
      "grad_norm": 0.24585898220539093,
      "learning_rate": 1.1329507192135106e-06,
      "loss": 0.0129,
      "step": 2711380
    },
    {
      "epoch": 4.437265568233146,
      "grad_norm": 0.33020448684692383,
      "learning_rate": 1.1328848269999935e-06,
      "loss": 0.006,
      "step": 2711400
    },
    {
      "epoch": 4.437298298671799,
      "grad_norm": 0.11024932563304901,
      "learning_rate": 1.1328189347864765e-06,
      "loss": 0.0097,
      "step": 2711420
    },
    {
      "epoch": 4.437331029110452,
      "grad_norm": 0.4683779180049896,
      "learning_rate": 1.1327530425729592e-06,
      "loss": 0.0164,
      "step": 2711440
    },
    {
      "epoch": 4.437363759549106,
      "grad_norm": 0.31368905305862427,
      "learning_rate": 1.132687150359442e-06,
      "loss": 0.0076,
      "step": 2711460
    },
    {
      "epoch": 4.4373964899877585,
      "grad_norm": 0.060528624802827835,
      "learning_rate": 1.1326212581459251e-06,
      "loss": 0.0105,
      "step": 2711480
    },
    {
      "epoch": 4.437429220426412,
      "grad_norm": 0.11874731630086899,
      "learning_rate": 1.1325553659324079e-06,
      "loss": 0.0091,
      "step": 2711500
    },
    {
      "epoch": 4.437461950865066,
      "grad_norm": 0.3358610272407532,
      "learning_rate": 1.1324894737188908e-06,
      "loss": 0.0087,
      "step": 2711520
    },
    {
      "epoch": 4.437494681303718,
      "grad_norm": 0.13606804609298706,
      "learning_rate": 1.1324235815053736e-06,
      "loss": 0.0074,
      "step": 2711540
    },
    {
      "epoch": 4.437527411742372,
      "grad_norm": 0.0900639221072197,
      "learning_rate": 1.1323576892918563e-06,
      "loss": 0.0059,
      "step": 2711560
    },
    {
      "epoch": 4.437560142181026,
      "grad_norm": 0.09653298556804657,
      "learning_rate": 1.1322917970783395e-06,
      "loss": 0.0059,
      "step": 2711580
    },
    {
      "epoch": 4.437592872619679,
      "grad_norm": 0.23475773632526398,
      "learning_rate": 1.1322259048648222e-06,
      "loss": 0.009,
      "step": 2711600
    },
    {
      "epoch": 4.437625603058332,
      "grad_norm": 0.1377171277999878,
      "learning_rate": 1.1321600126513052e-06,
      "loss": 0.0102,
      "step": 2711620
    },
    {
      "epoch": 4.4376583334969855,
      "grad_norm": 0.12821246683597565,
      "learning_rate": 1.132094120437788e-06,
      "loss": 0.0058,
      "step": 2711640
    },
    {
      "epoch": 4.437691063935639,
      "grad_norm": 0.11153452843427658,
      "learning_rate": 1.1320282282242709e-06,
      "loss": 0.0059,
      "step": 2711660
    },
    {
      "epoch": 4.437723794374293,
      "grad_norm": 0.5481055974960327,
      "learning_rate": 1.1319623360107538e-06,
      "loss": 0.0117,
      "step": 2711680
    },
    {
      "epoch": 4.437756524812945,
      "grad_norm": 0.1828891485929489,
      "learning_rate": 1.1318964437972365e-06,
      "loss": 0.0086,
      "step": 2711700
    },
    {
      "epoch": 4.437789255251599,
      "grad_norm": 0.07336077094078064,
      "learning_rate": 1.1318305515837195e-06,
      "loss": 0.0057,
      "step": 2711720
    },
    {
      "epoch": 4.4378219856902525,
      "grad_norm": 0.34489524364471436,
      "learning_rate": 1.1317646593702025e-06,
      "loss": 0.015,
      "step": 2711740
    },
    {
      "epoch": 4.437854716128905,
      "grad_norm": 0.3725477457046509,
      "learning_rate": 1.1316987671566852e-06,
      "loss": 0.0117,
      "step": 2711760
    },
    {
      "epoch": 4.437887446567559,
      "grad_norm": 0.16329437494277954,
      "learning_rate": 1.1316328749431681e-06,
      "loss": 0.0064,
      "step": 2711780
    },
    {
      "epoch": 4.437920177006212,
      "grad_norm": 0.30574938654899597,
      "learning_rate": 1.1315669827296509e-06,
      "loss": 0.0132,
      "step": 2711800
    },
    {
      "epoch": 4.437952907444865,
      "grad_norm": 0.2990790903568268,
      "learning_rate": 1.1315010905161338e-06,
      "loss": 0.0076,
      "step": 2711820
    },
    {
      "epoch": 4.437985637883519,
      "grad_norm": 0.3467067778110504,
      "learning_rate": 1.1314351983026168e-06,
      "loss": 0.0073,
      "step": 2711840
    },
    {
      "epoch": 4.438018368322172,
      "grad_norm": 0.6583622694015503,
      "learning_rate": 1.1313693060890995e-06,
      "loss": 0.0086,
      "step": 2711860
    },
    {
      "epoch": 4.438051098760826,
      "grad_norm": 0.5599386096000671,
      "learning_rate": 1.1313034138755825e-06,
      "loss": 0.0092,
      "step": 2711880
    },
    {
      "epoch": 4.438083829199479,
      "grad_norm": 0.5613085031509399,
      "learning_rate": 1.1312375216620652e-06,
      "loss": 0.0111,
      "step": 2711900
    },
    {
      "epoch": 4.438116559638132,
      "grad_norm": 0.03480073809623718,
      "learning_rate": 1.1311716294485482e-06,
      "loss": 0.0078,
      "step": 2711920
    },
    {
      "epoch": 4.438149290076786,
      "grad_norm": 0.19241975247859955,
      "learning_rate": 1.1311057372350311e-06,
      "loss": 0.0116,
      "step": 2711940
    },
    {
      "epoch": 4.438182020515439,
      "grad_norm": 0.288457989692688,
      "learning_rate": 1.1310398450215139e-06,
      "loss": 0.0135,
      "step": 2711960
    },
    {
      "epoch": 4.438214750954092,
      "grad_norm": 0.6087725162506104,
      "learning_rate": 1.1309739528079968e-06,
      "loss": 0.0056,
      "step": 2711980
    },
    {
      "epoch": 4.438247481392746,
      "grad_norm": 0.06001351401209831,
      "learning_rate": 1.1309080605944795e-06,
      "loss": 0.0065,
      "step": 2712000
    },
    {
      "epoch": 4.438280211831399,
      "grad_norm": 0.07889225333929062,
      "learning_rate": 1.1308421683809625e-06,
      "loss": 0.0081,
      "step": 2712020
    },
    {
      "epoch": 4.438312942270052,
      "grad_norm": 0.11420165747404099,
      "learning_rate": 1.1307762761674455e-06,
      "loss": 0.0127,
      "step": 2712040
    },
    {
      "epoch": 4.438345672708706,
      "grad_norm": 0.2446233630180359,
      "learning_rate": 1.1307103839539282e-06,
      "loss": 0.0125,
      "step": 2712060
    },
    {
      "epoch": 4.438378403147359,
      "grad_norm": 0.3869556486606598,
      "learning_rate": 1.1306444917404111e-06,
      "loss": 0.0089,
      "step": 2712080
    },
    {
      "epoch": 4.438411133586012,
      "grad_norm": 0.34635549783706665,
      "learning_rate": 1.130578599526894e-06,
      "loss": 0.0076,
      "step": 2712100
    },
    {
      "epoch": 4.4384438640246655,
      "grad_norm": 0.24975810945034027,
      "learning_rate": 1.1305127073133768e-06,
      "loss": 0.0106,
      "step": 2712120
    },
    {
      "epoch": 4.438476594463319,
      "grad_norm": 0.7262377142906189,
      "learning_rate": 1.1304468150998598e-06,
      "loss": 0.0092,
      "step": 2712140
    },
    {
      "epoch": 4.438509324901973,
      "grad_norm": 0.15934406220912933,
      "learning_rate": 1.1303809228863425e-06,
      "loss": 0.0067,
      "step": 2712160
    },
    {
      "epoch": 4.438542055340625,
      "grad_norm": 0.07583829760551453,
      "learning_rate": 1.1303150306728255e-06,
      "loss": 0.0085,
      "step": 2712180
    },
    {
      "epoch": 4.438574785779279,
      "grad_norm": 0.1668800413608551,
      "learning_rate": 1.1302491384593084e-06,
      "loss": 0.0081,
      "step": 2712200
    },
    {
      "epoch": 4.438607516217933,
      "grad_norm": 0.37356430292129517,
      "learning_rate": 1.1301832462457912e-06,
      "loss": 0.0067,
      "step": 2712220
    },
    {
      "epoch": 4.438640246656585,
      "grad_norm": 0.23193630576133728,
      "learning_rate": 1.1301173540322741e-06,
      "loss": 0.0076,
      "step": 2712240
    },
    {
      "epoch": 4.438672977095239,
      "grad_norm": 0.07733768224716187,
      "learning_rate": 1.1300514618187569e-06,
      "loss": 0.0098,
      "step": 2712260
    },
    {
      "epoch": 4.4387057075338925,
      "grad_norm": 0.10812842845916748,
      "learning_rate": 1.1299855696052398e-06,
      "loss": 0.0113,
      "step": 2712280
    },
    {
      "epoch": 4.438738437972546,
      "grad_norm": 0.23031747341156006,
      "learning_rate": 1.1299196773917228e-06,
      "loss": 0.008,
      "step": 2712300
    },
    {
      "epoch": 4.438771168411199,
      "grad_norm": 0.17589254677295685,
      "learning_rate": 1.1298537851782055e-06,
      "loss": 0.0101,
      "step": 2712320
    },
    {
      "epoch": 4.438803898849852,
      "grad_norm": 0.31087106466293335,
      "learning_rate": 1.1297878929646885e-06,
      "loss": 0.0066,
      "step": 2712340
    },
    {
      "epoch": 4.438836629288506,
      "grad_norm": 0.5440537929534912,
      "learning_rate": 1.1297220007511714e-06,
      "loss": 0.0122,
      "step": 2712360
    },
    {
      "epoch": 4.438869359727159,
      "grad_norm": 0.10212678462266922,
      "learning_rate": 1.1296561085376541e-06,
      "loss": 0.0075,
      "step": 2712380
    },
    {
      "epoch": 4.438902090165812,
      "grad_norm": 0.38014093041419983,
      "learning_rate": 1.129590216324137e-06,
      "loss": 0.0083,
      "step": 2712400
    },
    {
      "epoch": 4.438934820604466,
      "grad_norm": 1.1234418153762817,
      "learning_rate": 1.1295243241106198e-06,
      "loss": 0.0127,
      "step": 2712420
    },
    {
      "epoch": 4.4389675510431195,
      "grad_norm": 0.4387832283973694,
      "learning_rate": 1.1294584318971028e-06,
      "loss": 0.0096,
      "step": 2712440
    },
    {
      "epoch": 4.439000281481772,
      "grad_norm": 0.1144653856754303,
      "learning_rate": 1.1293925396835857e-06,
      "loss": 0.0064,
      "step": 2712460
    },
    {
      "epoch": 4.439033011920426,
      "grad_norm": 0.31108808517456055,
      "learning_rate": 1.1293266474700685e-06,
      "loss": 0.01,
      "step": 2712480
    },
    {
      "epoch": 4.439065742359079,
      "grad_norm": 0.5968168377876282,
      "learning_rate": 1.1292607552565514e-06,
      "loss": 0.011,
      "step": 2712500
    },
    {
      "epoch": 4.439098472797732,
      "grad_norm": 0.19601261615753174,
      "learning_rate": 1.1291948630430342e-06,
      "loss": 0.0062,
      "step": 2712520
    },
    {
      "epoch": 4.439131203236386,
      "grad_norm": 0.20701970160007477,
      "learning_rate": 1.1291289708295171e-06,
      "loss": 0.0084,
      "step": 2712540
    },
    {
      "epoch": 4.439163933675039,
      "grad_norm": 0.09138984978199005,
      "learning_rate": 1.129063078616e-06,
      "loss": 0.0113,
      "step": 2712560
    },
    {
      "epoch": 4.439196664113693,
      "grad_norm": 0.29922059178352356,
      "learning_rate": 1.1289971864024828e-06,
      "loss": 0.0093,
      "step": 2712580
    },
    {
      "epoch": 4.4392293945523456,
      "grad_norm": 0.23087839782238007,
      "learning_rate": 1.1289312941889658e-06,
      "loss": 0.0079,
      "step": 2712600
    },
    {
      "epoch": 4.439262124990999,
      "grad_norm": 0.1466122716665268,
      "learning_rate": 1.1288654019754487e-06,
      "loss": 0.0107,
      "step": 2712620
    },
    {
      "epoch": 4.439294855429653,
      "grad_norm": 0.3157464563846588,
      "learning_rate": 1.1287995097619315e-06,
      "loss": 0.0104,
      "step": 2712640
    },
    {
      "epoch": 4.4393275858683054,
      "grad_norm": 0.16607432067394257,
      "learning_rate": 1.1287336175484144e-06,
      "loss": 0.012,
      "step": 2712660
    },
    {
      "epoch": 4.439360316306959,
      "grad_norm": 1.6231935024261475,
      "learning_rate": 1.1286677253348971e-06,
      "loss": 0.0098,
      "step": 2712680
    },
    {
      "epoch": 4.439393046745613,
      "grad_norm": 0.05498651787638664,
      "learning_rate": 1.12860183312138e-06,
      "loss": 0.0064,
      "step": 2712700
    },
    {
      "epoch": 4.439425777184266,
      "grad_norm": 0.25623175501823425,
      "learning_rate": 1.128535940907863e-06,
      "loss": 0.0201,
      "step": 2712720
    },
    {
      "epoch": 4.439458507622919,
      "grad_norm": 0.2581372559070587,
      "learning_rate": 1.128470048694346e-06,
      "loss": 0.0088,
      "step": 2712740
    },
    {
      "epoch": 4.4394912380615725,
      "grad_norm": 0.48738420009613037,
      "learning_rate": 1.1284041564808287e-06,
      "loss": 0.0058,
      "step": 2712760
    },
    {
      "epoch": 4.439523968500226,
      "grad_norm": 0.12336570769548416,
      "learning_rate": 1.1283382642673115e-06,
      "loss": 0.0109,
      "step": 2712780
    },
    {
      "epoch": 4.439556698938879,
      "grad_norm": 0.12460614740848541,
      "learning_rate": 1.1282723720537946e-06,
      "loss": 0.0098,
      "step": 2712800
    },
    {
      "epoch": 4.439589429377532,
      "grad_norm": 0.15241673588752747,
      "learning_rate": 1.1282064798402774e-06,
      "loss": 0.0094,
      "step": 2712820
    },
    {
      "epoch": 4.439622159816186,
      "grad_norm": 0.6992957592010498,
      "learning_rate": 1.1281405876267603e-06,
      "loss": 0.007,
      "step": 2712840
    },
    {
      "epoch": 4.43965489025484,
      "grad_norm": 0.17256253957748413,
      "learning_rate": 1.128074695413243e-06,
      "loss": 0.0101,
      "step": 2712860
    },
    {
      "epoch": 4.439687620693492,
      "grad_norm": 0.18018470704555511,
      "learning_rate": 1.1280088031997258e-06,
      "loss": 0.0073,
      "step": 2712880
    },
    {
      "epoch": 4.439720351132146,
      "grad_norm": 0.12210837006568909,
      "learning_rate": 1.127942910986209e-06,
      "loss": 0.0113,
      "step": 2712900
    },
    {
      "epoch": 4.4397530815707995,
      "grad_norm": 0.1514071226119995,
      "learning_rate": 1.1278770187726917e-06,
      "loss": 0.0068,
      "step": 2712920
    },
    {
      "epoch": 4.439785812009452,
      "grad_norm": 0.3357190191745758,
      "learning_rate": 1.1278111265591747e-06,
      "loss": 0.0106,
      "step": 2712940
    },
    {
      "epoch": 4.439818542448106,
      "grad_norm": 0.32261261343955994,
      "learning_rate": 1.1277452343456574e-06,
      "loss": 0.01,
      "step": 2712960
    },
    {
      "epoch": 4.439851272886759,
      "grad_norm": 0.3780442178249359,
      "learning_rate": 1.1276793421321404e-06,
      "loss": 0.0112,
      "step": 2712980
    },
    {
      "epoch": 4.439884003325412,
      "grad_norm": 0.40154317021369934,
      "learning_rate": 1.1276134499186233e-06,
      "loss": 0.0108,
      "step": 2713000
    },
    {
      "epoch": 4.439916733764066,
      "grad_norm": 0.36314746737480164,
      "learning_rate": 1.127547557705106e-06,
      "loss": 0.0138,
      "step": 2713020
    },
    {
      "epoch": 4.439949464202719,
      "grad_norm": 0.13411983847618103,
      "learning_rate": 1.127481665491589e-06,
      "loss": 0.0122,
      "step": 2713040
    },
    {
      "epoch": 4.439982194641373,
      "grad_norm": 0.13056360185146332,
      "learning_rate": 1.127415773278072e-06,
      "loss": 0.0115,
      "step": 2713060
    },
    {
      "epoch": 4.440014925080026,
      "grad_norm": 0.1903223693370819,
      "learning_rate": 1.1273498810645547e-06,
      "loss": 0.0093,
      "step": 2713080
    },
    {
      "epoch": 4.440047655518679,
      "grad_norm": 0.1538049429655075,
      "learning_rate": 1.1272839888510376e-06,
      "loss": 0.0072,
      "step": 2713100
    },
    {
      "epoch": 4.440080385957333,
      "grad_norm": 0.11292855441570282,
      "learning_rate": 1.1272180966375204e-06,
      "loss": 0.0079,
      "step": 2713120
    },
    {
      "epoch": 4.440113116395986,
      "grad_norm": 0.08305690437555313,
      "learning_rate": 1.1271522044240033e-06,
      "loss": 0.0057,
      "step": 2713140
    },
    {
      "epoch": 4.440145846834639,
      "grad_norm": 0.20148099958896637,
      "learning_rate": 1.1270863122104863e-06,
      "loss": 0.0085,
      "step": 2713160
    },
    {
      "epoch": 4.440178577273293,
      "grad_norm": 0.38418659567832947,
      "learning_rate": 1.127020419996969e-06,
      "loss": 0.0103,
      "step": 2713180
    },
    {
      "epoch": 4.440211307711946,
      "grad_norm": 0.2926473617553711,
      "learning_rate": 1.126954527783452e-06,
      "loss": 0.0099,
      "step": 2713200
    },
    {
      "epoch": 4.440244038150599,
      "grad_norm": 0.2808294892311096,
      "learning_rate": 1.1268886355699347e-06,
      "loss": 0.0093,
      "step": 2713220
    },
    {
      "epoch": 4.440276768589253,
      "grad_norm": 0.12375478446483612,
      "learning_rate": 1.1268227433564177e-06,
      "loss": 0.0126,
      "step": 2713240
    },
    {
      "epoch": 4.440309499027906,
      "grad_norm": 0.07284417748451233,
      "learning_rate": 1.1267568511429006e-06,
      "loss": 0.0095,
      "step": 2713260
    },
    {
      "epoch": 4.440342229466559,
      "grad_norm": 0.17990440130233765,
      "learning_rate": 1.1266909589293834e-06,
      "loss": 0.0064,
      "step": 2713280
    },
    {
      "epoch": 4.4403749599052125,
      "grad_norm": 0.3850801885128021,
      "learning_rate": 1.1266250667158663e-06,
      "loss": 0.0099,
      "step": 2713300
    },
    {
      "epoch": 4.440407690343866,
      "grad_norm": 0.08388780057430267,
      "learning_rate": 1.126559174502349e-06,
      "loss": 0.0123,
      "step": 2713320
    },
    {
      "epoch": 4.44044042078252,
      "grad_norm": 0.36276036500930786,
      "learning_rate": 1.126493282288832e-06,
      "loss": 0.0067,
      "step": 2713340
    },
    {
      "epoch": 4.440473151221172,
      "grad_norm": 0.21437907218933105,
      "learning_rate": 1.126427390075315e-06,
      "loss": 0.0119,
      "step": 2713360
    },
    {
      "epoch": 4.440505881659826,
      "grad_norm": 0.05846840515732765,
      "learning_rate": 1.1263614978617977e-06,
      "loss": 0.0094,
      "step": 2713380
    },
    {
      "epoch": 4.4405386120984796,
      "grad_norm": 0.13158831000328064,
      "learning_rate": 1.1262956056482806e-06,
      "loss": 0.0079,
      "step": 2713400
    },
    {
      "epoch": 4.440571342537133,
      "grad_norm": 0.24007326364517212,
      "learning_rate": 1.1262297134347636e-06,
      "loss": 0.0131,
      "step": 2713420
    },
    {
      "epoch": 4.440604072975786,
      "grad_norm": 0.3379132151603699,
      "learning_rate": 1.1261638212212463e-06,
      "loss": 0.007,
      "step": 2713440
    },
    {
      "epoch": 4.4406368034144394,
      "grad_norm": 0.07589634507894516,
      "learning_rate": 1.1260979290077293e-06,
      "loss": 0.0094,
      "step": 2713460
    },
    {
      "epoch": 4.440669533853093,
      "grad_norm": 0.20893919467926025,
      "learning_rate": 1.126032036794212e-06,
      "loss": 0.0073,
      "step": 2713480
    },
    {
      "epoch": 4.440702264291746,
      "grad_norm": 0.33207598328590393,
      "learning_rate": 1.125966144580695e-06,
      "loss": 0.008,
      "step": 2713500
    },
    {
      "epoch": 4.440734994730399,
      "grad_norm": 0.08331683278083801,
      "learning_rate": 1.125900252367178e-06,
      "loss": 0.0102,
      "step": 2713520
    },
    {
      "epoch": 4.440767725169053,
      "grad_norm": 0.15685710310935974,
      "learning_rate": 1.1258343601536607e-06,
      "loss": 0.0087,
      "step": 2713540
    },
    {
      "epoch": 4.440800455607706,
      "grad_norm": 0.1094147339463234,
      "learning_rate": 1.1257684679401436e-06,
      "loss": 0.0064,
      "step": 2713560
    },
    {
      "epoch": 4.440833186046359,
      "grad_norm": 0.26164910197257996,
      "learning_rate": 1.1257025757266264e-06,
      "loss": 0.0146,
      "step": 2713580
    },
    {
      "epoch": 4.440865916485013,
      "grad_norm": 0.32123100757598877,
      "learning_rate": 1.1256366835131093e-06,
      "loss": 0.0066,
      "step": 2713600
    },
    {
      "epoch": 4.440898646923666,
      "grad_norm": 0.17545582354068756,
      "learning_rate": 1.1255707912995923e-06,
      "loss": 0.0127,
      "step": 2713620
    },
    {
      "epoch": 4.440931377362319,
      "grad_norm": 0.1558254063129425,
      "learning_rate": 1.125504899086075e-06,
      "loss": 0.0081,
      "step": 2713640
    },
    {
      "epoch": 4.440964107800973,
      "grad_norm": 0.0930480882525444,
      "learning_rate": 1.125439006872558e-06,
      "loss": 0.0102,
      "step": 2713660
    },
    {
      "epoch": 4.440996838239626,
      "grad_norm": 0.11665327101945877,
      "learning_rate": 1.125373114659041e-06,
      "loss": 0.0083,
      "step": 2713680
    },
    {
      "epoch": 4.441029568678279,
      "grad_norm": 0.25022658705711365,
      "learning_rate": 1.1253072224455237e-06,
      "loss": 0.0081,
      "step": 2713700
    },
    {
      "epoch": 4.441062299116933,
      "grad_norm": 0.07172758132219315,
      "learning_rate": 1.1252413302320066e-06,
      "loss": 0.01,
      "step": 2713720
    },
    {
      "epoch": 4.441095029555586,
      "grad_norm": 0.09289761632680893,
      "learning_rate": 1.1251754380184893e-06,
      "loss": 0.0108,
      "step": 2713740
    },
    {
      "epoch": 4.44112775999424,
      "grad_norm": 0.1489613950252533,
      "learning_rate": 1.1251095458049723e-06,
      "loss": 0.0076,
      "step": 2713760
    },
    {
      "epoch": 4.4411604904328925,
      "grad_norm": 0.32815155386924744,
      "learning_rate": 1.1250436535914552e-06,
      "loss": 0.0079,
      "step": 2713780
    },
    {
      "epoch": 4.441193220871546,
      "grad_norm": 0.13123294711112976,
      "learning_rate": 1.124977761377938e-06,
      "loss": 0.0114,
      "step": 2713800
    },
    {
      "epoch": 4.4412259513102,
      "grad_norm": 0.10836289077997208,
      "learning_rate": 1.124911869164421e-06,
      "loss": 0.0087,
      "step": 2713820
    },
    {
      "epoch": 4.441258681748852,
      "grad_norm": 0.0491119883954525,
      "learning_rate": 1.1248459769509037e-06,
      "loss": 0.0088,
      "step": 2713840
    },
    {
      "epoch": 4.441291412187506,
      "grad_norm": 0.392260879278183,
      "learning_rate": 1.1247800847373866e-06,
      "loss": 0.0087,
      "step": 2713860
    },
    {
      "epoch": 4.44132414262616,
      "grad_norm": 0.1387714147567749,
      "learning_rate": 1.1247141925238696e-06,
      "loss": 0.0061,
      "step": 2713880
    },
    {
      "epoch": 4.441356873064813,
      "grad_norm": 0.25429272651672363,
      "learning_rate": 1.1246483003103523e-06,
      "loss": 0.0087,
      "step": 2713900
    },
    {
      "epoch": 4.441389603503466,
      "grad_norm": 0.12161888927221298,
      "learning_rate": 1.1245824080968353e-06,
      "loss": 0.0064,
      "step": 2713920
    },
    {
      "epoch": 4.4414223339421195,
      "grad_norm": 0.23537063598632812,
      "learning_rate": 1.1245165158833182e-06,
      "loss": 0.0133,
      "step": 2713940
    },
    {
      "epoch": 4.441455064380773,
      "grad_norm": 0.26589658856391907,
      "learning_rate": 1.124450623669801e-06,
      "loss": 0.0062,
      "step": 2713960
    },
    {
      "epoch": 4.441487794819426,
      "grad_norm": 0.06303151696920395,
      "learning_rate": 1.124384731456284e-06,
      "loss": 0.0099,
      "step": 2713980
    },
    {
      "epoch": 4.441520525258079,
      "grad_norm": 0.09184611588716507,
      "learning_rate": 1.1243188392427667e-06,
      "loss": 0.0117,
      "step": 2714000
    },
    {
      "epoch": 4.441553255696733,
      "grad_norm": 0.3599680960178375,
      "learning_rate": 1.1242529470292496e-06,
      "loss": 0.0082,
      "step": 2714020
    },
    {
      "epoch": 4.441585986135387,
      "grad_norm": 0.17110969126224518,
      "learning_rate": 1.1241870548157326e-06,
      "loss": 0.0115,
      "step": 2714040
    },
    {
      "epoch": 4.441618716574039,
      "grad_norm": 0.2605617046356201,
      "learning_rate": 1.1241211626022155e-06,
      "loss": 0.0087,
      "step": 2714060
    },
    {
      "epoch": 4.441651447012693,
      "grad_norm": 0.2495923489332199,
      "learning_rate": 1.1240552703886982e-06,
      "loss": 0.0105,
      "step": 2714080
    },
    {
      "epoch": 4.4416841774513465,
      "grad_norm": 0.27432411909103394,
      "learning_rate": 1.123989378175181e-06,
      "loss": 0.0099,
      "step": 2714100
    },
    {
      "epoch": 4.441716907889999,
      "grad_norm": 0.2370741218328476,
      "learning_rate": 1.1239234859616642e-06,
      "loss": 0.0109,
      "step": 2714120
    },
    {
      "epoch": 4.441749638328653,
      "grad_norm": 0.15437328815460205,
      "learning_rate": 1.1238575937481469e-06,
      "loss": 0.0105,
      "step": 2714140
    },
    {
      "epoch": 4.441782368767306,
      "grad_norm": 0.16076582670211792,
      "learning_rate": 1.1237917015346298e-06,
      "loss": 0.0087,
      "step": 2714160
    },
    {
      "epoch": 4.44181509920596,
      "grad_norm": 0.3111182451248169,
      "learning_rate": 1.1237258093211126e-06,
      "loss": 0.0091,
      "step": 2714180
    },
    {
      "epoch": 4.441847829644613,
      "grad_norm": 0.15717485547065735,
      "learning_rate": 1.1236599171075953e-06,
      "loss": 0.0147,
      "step": 2714200
    },
    {
      "epoch": 4.441880560083266,
      "grad_norm": 0.2276793122291565,
      "learning_rate": 1.1235940248940785e-06,
      "loss": 0.0077,
      "step": 2714220
    },
    {
      "epoch": 4.44191329052192,
      "grad_norm": 0.0978340283036232,
      "learning_rate": 1.1235281326805612e-06,
      "loss": 0.0098,
      "step": 2714240
    },
    {
      "epoch": 4.441946020960573,
      "grad_norm": 0.2192392647266388,
      "learning_rate": 1.1234622404670442e-06,
      "loss": 0.0064,
      "step": 2714260
    },
    {
      "epoch": 4.441978751399226,
      "grad_norm": 0.21849286556243896,
      "learning_rate": 1.123396348253527e-06,
      "loss": 0.0064,
      "step": 2714280
    },
    {
      "epoch": 4.44201148183788,
      "grad_norm": 0.07259415090084076,
      "learning_rate": 1.1233304560400099e-06,
      "loss": 0.011,
      "step": 2714300
    },
    {
      "epoch": 4.442044212276533,
      "grad_norm": 0.13291504979133606,
      "learning_rate": 1.1232645638264928e-06,
      "loss": 0.0108,
      "step": 2714320
    },
    {
      "epoch": 4.442076942715186,
      "grad_norm": 0.10312046110630035,
      "learning_rate": 1.1231986716129756e-06,
      "loss": 0.0092,
      "step": 2714340
    },
    {
      "epoch": 4.44210967315384,
      "grad_norm": 0.3785494267940521,
      "learning_rate": 1.1231327793994585e-06,
      "loss": 0.0069,
      "step": 2714360
    },
    {
      "epoch": 4.442142403592493,
      "grad_norm": 0.053446460515260696,
      "learning_rate": 1.1230668871859415e-06,
      "loss": 0.0123,
      "step": 2714380
    },
    {
      "epoch": 4.442175134031146,
      "grad_norm": 0.1918882131576538,
      "learning_rate": 1.1230009949724242e-06,
      "loss": 0.0078,
      "step": 2714400
    },
    {
      "epoch": 4.4422078644697995,
      "grad_norm": 0.35854730010032654,
      "learning_rate": 1.1229351027589072e-06,
      "loss": 0.0082,
      "step": 2714420
    },
    {
      "epoch": 4.442240594908453,
      "grad_norm": 0.07190285623073578,
      "learning_rate": 1.1228692105453899e-06,
      "loss": 0.0093,
      "step": 2714440
    },
    {
      "epoch": 4.442273325347106,
      "grad_norm": 0.17116454243659973,
      "learning_rate": 1.1228033183318728e-06,
      "loss": 0.0051,
      "step": 2714460
    },
    {
      "epoch": 4.442306055785759,
      "grad_norm": 0.3200087547302246,
      "learning_rate": 1.1227374261183558e-06,
      "loss": 0.0061,
      "step": 2714480
    },
    {
      "epoch": 4.442338786224413,
      "grad_norm": 0.13940708339214325,
      "learning_rate": 1.1226715339048385e-06,
      "loss": 0.0069,
      "step": 2714500
    },
    {
      "epoch": 4.442371516663067,
      "grad_norm": 0.11360447108745575,
      "learning_rate": 1.1226056416913215e-06,
      "loss": 0.0101,
      "step": 2714520
    },
    {
      "epoch": 4.442404247101719,
      "grad_norm": 0.4875680208206177,
      "learning_rate": 1.1225397494778042e-06,
      "loss": 0.0064,
      "step": 2714540
    },
    {
      "epoch": 4.442436977540373,
      "grad_norm": 0.06935756653547287,
      "learning_rate": 1.1224738572642872e-06,
      "loss": 0.0088,
      "step": 2714560
    },
    {
      "epoch": 4.4424697079790265,
      "grad_norm": 0.2292943149805069,
      "learning_rate": 1.1224079650507701e-06,
      "loss": 0.0086,
      "step": 2714580
    },
    {
      "epoch": 4.44250243841768,
      "grad_norm": 0.48280054330825806,
      "learning_rate": 1.1223420728372529e-06,
      "loss": 0.0083,
      "step": 2714600
    },
    {
      "epoch": 4.442535168856333,
      "grad_norm": 0.1748484969139099,
      "learning_rate": 1.1222761806237358e-06,
      "loss": 0.006,
      "step": 2714620
    },
    {
      "epoch": 4.442567899294986,
      "grad_norm": 0.07193991541862488,
      "learning_rate": 1.1222102884102186e-06,
      "loss": 0.0081,
      "step": 2714640
    },
    {
      "epoch": 4.44260062973364,
      "grad_norm": 0.6085957884788513,
      "learning_rate": 1.1221443961967015e-06,
      "loss": 0.0067,
      "step": 2714660
    },
    {
      "epoch": 4.442633360172293,
      "grad_norm": 0.1729053109884262,
      "learning_rate": 1.1220785039831845e-06,
      "loss": 0.0072,
      "step": 2714680
    },
    {
      "epoch": 4.442666090610946,
      "grad_norm": 0.1546190083026886,
      "learning_rate": 1.1220126117696672e-06,
      "loss": 0.0084,
      "step": 2714700
    },
    {
      "epoch": 4.4426988210496,
      "grad_norm": 0.35195010900497437,
      "learning_rate": 1.1219467195561502e-06,
      "loss": 0.0152,
      "step": 2714720
    },
    {
      "epoch": 4.442731551488253,
      "grad_norm": 0.18769516050815582,
      "learning_rate": 1.121880827342633e-06,
      "loss": 0.0084,
      "step": 2714740
    },
    {
      "epoch": 4.442764281926906,
      "grad_norm": 0.3144029676914215,
      "learning_rate": 1.1218149351291158e-06,
      "loss": 0.0079,
      "step": 2714760
    },
    {
      "epoch": 4.44279701236556,
      "grad_norm": 0.11106829345226288,
      "learning_rate": 1.1217490429155988e-06,
      "loss": 0.0111,
      "step": 2714780
    },
    {
      "epoch": 4.442829742804213,
      "grad_norm": 0.1412351131439209,
      "learning_rate": 1.1216831507020815e-06,
      "loss": 0.0079,
      "step": 2714800
    },
    {
      "epoch": 4.442862473242866,
      "grad_norm": 0.3258523643016815,
      "learning_rate": 1.1216172584885645e-06,
      "loss": 0.0118,
      "step": 2714820
    },
    {
      "epoch": 4.44289520368152,
      "grad_norm": 0.7293763756752014,
      "learning_rate": 1.1215513662750474e-06,
      "loss": 0.0072,
      "step": 2714840
    },
    {
      "epoch": 4.442927934120173,
      "grad_norm": 0.29373306035995483,
      "learning_rate": 1.1214854740615302e-06,
      "loss": 0.0092,
      "step": 2714860
    },
    {
      "epoch": 4.442960664558827,
      "grad_norm": 0.19532985985279083,
      "learning_rate": 1.1214195818480131e-06,
      "loss": 0.0076,
      "step": 2714880
    },
    {
      "epoch": 4.44299339499748,
      "grad_norm": 0.6145823001861572,
      "learning_rate": 1.1213536896344959e-06,
      "loss": 0.0096,
      "step": 2714900
    },
    {
      "epoch": 4.443026125436133,
      "grad_norm": 0.3345824182033539,
      "learning_rate": 1.1212877974209788e-06,
      "loss": 0.0072,
      "step": 2714920
    },
    {
      "epoch": 4.443058855874787,
      "grad_norm": 0.1055607721209526,
      "learning_rate": 1.1212219052074618e-06,
      "loss": 0.0091,
      "step": 2714940
    },
    {
      "epoch": 4.4430915863134395,
      "grad_norm": 0.24296417832374573,
      "learning_rate": 1.1211560129939445e-06,
      "loss": 0.0088,
      "step": 2714960
    },
    {
      "epoch": 4.443124316752093,
      "grad_norm": 1.3964437246322632,
      "learning_rate": 1.1210901207804275e-06,
      "loss": 0.0078,
      "step": 2714980
    },
    {
      "epoch": 4.443157047190747,
      "grad_norm": 0.10308637470006943,
      "learning_rate": 1.1210242285669104e-06,
      "loss": 0.007,
      "step": 2715000
    },
    {
      "epoch": 4.443189777629399,
      "grad_norm": 0.051923010498285294,
      "learning_rate": 1.1209583363533932e-06,
      "loss": 0.0105,
      "step": 2715020
    },
    {
      "epoch": 4.443222508068053,
      "grad_norm": 0.21865937113761902,
      "learning_rate": 1.1208924441398761e-06,
      "loss": 0.0087,
      "step": 2715040
    },
    {
      "epoch": 4.443255238506707,
      "grad_norm": 0.2269376516342163,
      "learning_rate": 1.1208265519263588e-06,
      "loss": 0.0083,
      "step": 2715060
    },
    {
      "epoch": 4.44328796894536,
      "grad_norm": 0.04995274543762207,
      "learning_rate": 1.1207606597128418e-06,
      "loss": 0.0086,
      "step": 2715080
    },
    {
      "epoch": 4.443320699384013,
      "grad_norm": 0.11604636162519455,
      "learning_rate": 1.1206947674993248e-06,
      "loss": 0.0089,
      "step": 2715100
    },
    {
      "epoch": 4.4433534298226665,
      "grad_norm": 0.28280359506607056,
      "learning_rate": 1.1206288752858075e-06,
      "loss": 0.0133,
      "step": 2715120
    },
    {
      "epoch": 4.44338616026132,
      "grad_norm": 0.20241166651248932,
      "learning_rate": 1.1205629830722904e-06,
      "loss": 0.0118,
      "step": 2715140
    },
    {
      "epoch": 4.443418890699973,
      "grad_norm": 0.1753654032945633,
      "learning_rate": 1.1204970908587732e-06,
      "loss": 0.0098,
      "step": 2715160
    },
    {
      "epoch": 4.443451621138626,
      "grad_norm": 0.14258898794651031,
      "learning_rate": 1.1204311986452561e-06,
      "loss": 0.006,
      "step": 2715180
    },
    {
      "epoch": 4.44348435157728,
      "grad_norm": 0.04421296715736389,
      "learning_rate": 1.120365306431739e-06,
      "loss": 0.0064,
      "step": 2715200
    },
    {
      "epoch": 4.4435170820159335,
      "grad_norm": 0.2506178021430969,
      "learning_rate": 1.1202994142182218e-06,
      "loss": 0.0125,
      "step": 2715220
    },
    {
      "epoch": 4.443549812454586,
      "grad_norm": 0.29450523853302,
      "learning_rate": 1.1202335220047048e-06,
      "loss": 0.0168,
      "step": 2715240
    },
    {
      "epoch": 4.44358254289324,
      "grad_norm": 0.6703157424926758,
      "learning_rate": 1.1201676297911877e-06,
      "loss": 0.0103,
      "step": 2715260
    },
    {
      "epoch": 4.443615273331893,
      "grad_norm": 0.3618015944957733,
      "learning_rate": 1.1201017375776705e-06,
      "loss": 0.0143,
      "step": 2715280
    },
    {
      "epoch": 4.443648003770546,
      "grad_norm": 0.2384507656097412,
      "learning_rate": 1.1200358453641534e-06,
      "loss": 0.009,
      "step": 2715300
    },
    {
      "epoch": 4.4436807342092,
      "grad_norm": 0.33124643564224243,
      "learning_rate": 1.1199699531506362e-06,
      "loss": 0.0081,
      "step": 2715320
    },
    {
      "epoch": 4.443713464647853,
      "grad_norm": 0.14047175645828247,
      "learning_rate": 1.1199040609371191e-06,
      "loss": 0.0072,
      "step": 2715340
    },
    {
      "epoch": 4.443746195086507,
      "grad_norm": 0.09480836242437363,
      "learning_rate": 1.119838168723602e-06,
      "loss": 0.0076,
      "step": 2715360
    },
    {
      "epoch": 4.44377892552516,
      "grad_norm": 0.14317502081394196,
      "learning_rate": 1.119772276510085e-06,
      "loss": 0.0066,
      "step": 2715380
    },
    {
      "epoch": 4.443811655963813,
      "grad_norm": 0.370137095451355,
      "learning_rate": 1.1197063842965678e-06,
      "loss": 0.0099,
      "step": 2715400
    },
    {
      "epoch": 4.443844386402467,
      "grad_norm": 0.1054164469242096,
      "learning_rate": 1.1196404920830505e-06,
      "loss": 0.0093,
      "step": 2715420
    },
    {
      "epoch": 4.4438771168411195,
      "grad_norm": 0.3257908225059509,
      "learning_rate": 1.1195745998695337e-06,
      "loss": 0.0087,
      "step": 2715440
    },
    {
      "epoch": 4.443909847279773,
      "grad_norm": 0.16189996898174286,
      "learning_rate": 1.1195087076560164e-06,
      "loss": 0.0147,
      "step": 2715460
    },
    {
      "epoch": 4.443942577718427,
      "grad_norm": 0.2654288411140442,
      "learning_rate": 1.1194428154424993e-06,
      "loss": 0.0077,
      "step": 2715480
    },
    {
      "epoch": 4.44397530815708,
      "grad_norm": 0.21317966282367706,
      "learning_rate": 1.119376923228982e-06,
      "loss": 0.0077,
      "step": 2715500
    },
    {
      "epoch": 4.444008038595733,
      "grad_norm": 0.40422704815864563,
      "learning_rate": 1.1193110310154648e-06,
      "loss": 0.0077,
      "step": 2715520
    },
    {
      "epoch": 4.444040769034387,
      "grad_norm": 0.3215864598751068,
      "learning_rate": 1.119245138801948e-06,
      "loss": 0.0129,
      "step": 2715540
    },
    {
      "epoch": 4.44407349947304,
      "grad_norm": 0.786089301109314,
      "learning_rate": 1.1191792465884307e-06,
      "loss": 0.0117,
      "step": 2715560
    },
    {
      "epoch": 4.444106229911693,
      "grad_norm": 0.9178720712661743,
      "learning_rate": 1.1191133543749137e-06,
      "loss": 0.0128,
      "step": 2715580
    },
    {
      "epoch": 4.4441389603503465,
      "grad_norm": 0.5673655271530151,
      "learning_rate": 1.1190474621613964e-06,
      "loss": 0.0089,
      "step": 2715600
    },
    {
      "epoch": 4.444171690789,
      "grad_norm": 0.47315993905067444,
      "learning_rate": 1.1189815699478794e-06,
      "loss": 0.0049,
      "step": 2715620
    },
    {
      "epoch": 4.444204421227654,
      "grad_norm": 0.19024334847927094,
      "learning_rate": 1.1189156777343623e-06,
      "loss": 0.0094,
      "step": 2715640
    },
    {
      "epoch": 4.444237151666306,
      "grad_norm": 0.07700739055871964,
      "learning_rate": 1.118849785520845e-06,
      "loss": 0.0114,
      "step": 2715660
    },
    {
      "epoch": 4.44426988210496,
      "grad_norm": 0.06705942004919052,
      "learning_rate": 1.118783893307328e-06,
      "loss": 0.0103,
      "step": 2715680
    },
    {
      "epoch": 4.444302612543614,
      "grad_norm": 0.11135196685791016,
      "learning_rate": 1.118718001093811e-06,
      "loss": 0.0082,
      "step": 2715700
    },
    {
      "epoch": 4.444335342982266,
      "grad_norm": 0.23234988749027252,
      "learning_rate": 1.1186521088802937e-06,
      "loss": 0.0116,
      "step": 2715720
    },
    {
      "epoch": 4.44436807342092,
      "grad_norm": 0.522384524345398,
      "learning_rate": 1.1185862166667767e-06,
      "loss": 0.0112,
      "step": 2715740
    },
    {
      "epoch": 4.4444008038595735,
      "grad_norm": 0.2316732406616211,
      "learning_rate": 1.1185203244532594e-06,
      "loss": 0.0075,
      "step": 2715760
    },
    {
      "epoch": 4.444433534298227,
      "grad_norm": 0.13172273337841034,
      "learning_rate": 1.1184544322397423e-06,
      "loss": 0.0107,
      "step": 2715780
    },
    {
      "epoch": 4.44446626473688,
      "grad_norm": 0.07454761117696762,
      "learning_rate": 1.1183885400262253e-06,
      "loss": 0.0067,
      "step": 2715800
    },
    {
      "epoch": 4.444498995175533,
      "grad_norm": 0.5273052453994751,
      "learning_rate": 1.118322647812708e-06,
      "loss": 0.009,
      "step": 2715820
    },
    {
      "epoch": 4.444531725614187,
      "grad_norm": 0.30709582567214966,
      "learning_rate": 1.118256755599191e-06,
      "loss": 0.0059,
      "step": 2715840
    },
    {
      "epoch": 4.44456445605284,
      "grad_norm": 0.22023296356201172,
      "learning_rate": 1.1181908633856737e-06,
      "loss": 0.0137,
      "step": 2715860
    },
    {
      "epoch": 4.444597186491493,
      "grad_norm": 0.27264779806137085,
      "learning_rate": 1.1181249711721567e-06,
      "loss": 0.0087,
      "step": 2715880
    },
    {
      "epoch": 4.444629916930147,
      "grad_norm": 0.13180050253868103,
      "learning_rate": 1.1180590789586396e-06,
      "loss": 0.0098,
      "step": 2715900
    },
    {
      "epoch": 4.4446626473688,
      "grad_norm": 0.32078781723976135,
      "learning_rate": 1.1179931867451224e-06,
      "loss": 0.0099,
      "step": 2715920
    },
    {
      "epoch": 4.444695377807453,
      "grad_norm": 0.16031500697135925,
      "learning_rate": 1.1179272945316053e-06,
      "loss": 0.0069,
      "step": 2715940
    },
    {
      "epoch": 4.444728108246107,
      "grad_norm": 0.22082139551639557,
      "learning_rate": 1.117861402318088e-06,
      "loss": 0.0105,
      "step": 2715960
    },
    {
      "epoch": 4.44476083868476,
      "grad_norm": 0.043723225593566895,
      "learning_rate": 1.117795510104571e-06,
      "loss": 0.0101,
      "step": 2715980
    },
    {
      "epoch": 4.444793569123413,
      "grad_norm": 0.22435037791728973,
      "learning_rate": 1.117729617891054e-06,
      "loss": 0.0077,
      "step": 2716000
    },
    {
      "epoch": 4.444826299562067,
      "grad_norm": 0.25835517048835754,
      "learning_rate": 1.1176637256775367e-06,
      "loss": 0.0118,
      "step": 2716020
    },
    {
      "epoch": 4.44485903000072,
      "grad_norm": 0.08174624294042587,
      "learning_rate": 1.1175978334640197e-06,
      "loss": 0.0077,
      "step": 2716040
    },
    {
      "epoch": 4.444891760439374,
      "grad_norm": 0.2727476954460144,
      "learning_rate": 1.1175319412505026e-06,
      "loss": 0.0109,
      "step": 2716060
    },
    {
      "epoch": 4.4449244908780265,
      "grad_norm": 0.10365934669971466,
      "learning_rate": 1.1174660490369854e-06,
      "loss": 0.0088,
      "step": 2716080
    },
    {
      "epoch": 4.44495722131668,
      "grad_norm": 0.15368705987930298,
      "learning_rate": 1.1174001568234683e-06,
      "loss": 0.0083,
      "step": 2716100
    },
    {
      "epoch": 4.444989951755334,
      "grad_norm": 0.34548845887184143,
      "learning_rate": 1.117334264609951e-06,
      "loss": 0.0108,
      "step": 2716120
    },
    {
      "epoch": 4.445022682193986,
      "grad_norm": 0.4908898174762726,
      "learning_rate": 1.117268372396434e-06,
      "loss": 0.0079,
      "step": 2716140
    },
    {
      "epoch": 4.44505541263264,
      "grad_norm": 0.10747223347425461,
      "learning_rate": 1.117202480182917e-06,
      "loss": 0.0062,
      "step": 2716160
    },
    {
      "epoch": 4.445088143071294,
      "grad_norm": 0.13582339882850647,
      "learning_rate": 1.1171365879693997e-06,
      "loss": 0.0094,
      "step": 2716180
    },
    {
      "epoch": 4.445120873509946,
      "grad_norm": 0.21352548897266388,
      "learning_rate": 1.1170706957558826e-06,
      "loss": 0.0181,
      "step": 2716200
    },
    {
      "epoch": 4.4451536039486,
      "grad_norm": 0.6617630124092102,
      "learning_rate": 1.1170048035423654e-06,
      "loss": 0.014,
      "step": 2716220
    },
    {
      "epoch": 4.4451863343872535,
      "grad_norm": 0.1413683295249939,
      "learning_rate": 1.1169389113288483e-06,
      "loss": 0.015,
      "step": 2716240
    },
    {
      "epoch": 4.445219064825907,
      "grad_norm": 0.1733495593070984,
      "learning_rate": 1.1168730191153313e-06,
      "loss": 0.0107,
      "step": 2716260
    },
    {
      "epoch": 4.44525179526456,
      "grad_norm": 0.10376463085412979,
      "learning_rate": 1.116807126901814e-06,
      "loss": 0.0099,
      "step": 2716280
    },
    {
      "epoch": 4.445284525703213,
      "grad_norm": 0.2126210331916809,
      "learning_rate": 1.116741234688297e-06,
      "loss": 0.0067,
      "step": 2716300
    },
    {
      "epoch": 4.445317256141867,
      "grad_norm": 0.307290643453598,
      "learning_rate": 1.11667534247478e-06,
      "loss": 0.0101,
      "step": 2716320
    },
    {
      "epoch": 4.445349986580521,
      "grad_norm": 0.11405208706855774,
      "learning_rate": 1.1166094502612627e-06,
      "loss": 0.0112,
      "step": 2716340
    },
    {
      "epoch": 4.445382717019173,
      "grad_norm": 0.2742723524570465,
      "learning_rate": 1.1165435580477456e-06,
      "loss": 0.0081,
      "step": 2716360
    },
    {
      "epoch": 4.445415447457827,
      "grad_norm": 0.2183755785226822,
      "learning_rate": 1.1164776658342284e-06,
      "loss": 0.0089,
      "step": 2716380
    },
    {
      "epoch": 4.4454481778964805,
      "grad_norm": 0.2674165964126587,
      "learning_rate": 1.1164117736207113e-06,
      "loss": 0.0076,
      "step": 2716400
    },
    {
      "epoch": 4.445480908335133,
      "grad_norm": 0.3493378162384033,
      "learning_rate": 1.1163458814071943e-06,
      "loss": 0.0078,
      "step": 2716420
    },
    {
      "epoch": 4.445513638773787,
      "grad_norm": 0.2533623278141022,
      "learning_rate": 1.116279989193677e-06,
      "loss": 0.0141,
      "step": 2716440
    },
    {
      "epoch": 4.44554636921244,
      "grad_norm": 0.1047648936510086,
      "learning_rate": 1.11621409698016e-06,
      "loss": 0.0097,
      "step": 2716460
    },
    {
      "epoch": 4.445579099651093,
      "grad_norm": 0.07357329875230789,
      "learning_rate": 1.1161482047666427e-06,
      "loss": 0.0049,
      "step": 2716480
    },
    {
      "epoch": 4.445611830089747,
      "grad_norm": 0.3707807660102844,
      "learning_rate": 1.1160823125531256e-06,
      "loss": 0.0098,
      "step": 2716500
    },
    {
      "epoch": 4.4456445605284,
      "grad_norm": 0.20905494689941406,
      "learning_rate": 1.1160164203396086e-06,
      "loss": 0.0057,
      "step": 2716520
    },
    {
      "epoch": 4.445677290967054,
      "grad_norm": 0.08365929126739502,
      "learning_rate": 1.1159505281260913e-06,
      "loss": 0.0125,
      "step": 2716540
    },
    {
      "epoch": 4.445710021405707,
      "grad_norm": 0.3242993652820587,
      "learning_rate": 1.1158846359125743e-06,
      "loss": 0.0079,
      "step": 2716560
    },
    {
      "epoch": 4.44574275184436,
      "grad_norm": 0.2287491410970688,
      "learning_rate": 1.1158187436990572e-06,
      "loss": 0.0089,
      "step": 2716580
    },
    {
      "epoch": 4.445775482283014,
      "grad_norm": 0.12479038536548615,
      "learning_rate": 1.1157528514855402e-06,
      "loss": 0.0102,
      "step": 2716600
    },
    {
      "epoch": 4.4458082127216665,
      "grad_norm": 0.07873556762933731,
      "learning_rate": 1.115686959272023e-06,
      "loss": 0.0162,
      "step": 2716620
    },
    {
      "epoch": 4.44584094316032,
      "grad_norm": 0.11954396218061447,
      "learning_rate": 1.1156210670585057e-06,
      "loss": 0.012,
      "step": 2716640
    },
    {
      "epoch": 4.445873673598974,
      "grad_norm": 0.3705401122570038,
      "learning_rate": 1.1155551748449886e-06,
      "loss": 0.0118,
      "step": 2716660
    },
    {
      "epoch": 4.445906404037627,
      "grad_norm": 0.1685929298400879,
      "learning_rate": 1.1154892826314716e-06,
      "loss": 0.0106,
      "step": 2716680
    },
    {
      "epoch": 4.44593913447628,
      "grad_norm": 0.3576066493988037,
      "learning_rate": 1.1154233904179545e-06,
      "loss": 0.0074,
      "step": 2716700
    },
    {
      "epoch": 4.445971864914934,
      "grad_norm": 0.07699235528707504,
      "learning_rate": 1.1153574982044373e-06,
      "loss": 0.0092,
      "step": 2716720
    },
    {
      "epoch": 4.446004595353587,
      "grad_norm": 0.06569716334342957,
      "learning_rate": 1.11529160599092e-06,
      "loss": 0.007,
      "step": 2716740
    },
    {
      "epoch": 4.44603732579224,
      "grad_norm": 0.4846785366535187,
      "learning_rate": 1.1152257137774032e-06,
      "loss": 0.0162,
      "step": 2716760
    },
    {
      "epoch": 4.4460700562308935,
      "grad_norm": 0.46290597319602966,
      "learning_rate": 1.115159821563886e-06,
      "loss": 0.0067,
      "step": 2716780
    },
    {
      "epoch": 4.446102786669547,
      "grad_norm": 0.33853140473365784,
      "learning_rate": 1.1150939293503689e-06,
      "loss": 0.0078,
      "step": 2716800
    },
    {
      "epoch": 4.446135517108201,
      "grad_norm": 0.2205723524093628,
      "learning_rate": 1.1150280371368516e-06,
      "loss": 0.0085,
      "step": 2716820
    },
    {
      "epoch": 4.446168247546853,
      "grad_norm": 0.08567209541797638,
      "learning_rate": 1.1149621449233343e-06,
      "loss": 0.0095,
      "step": 2716840
    },
    {
      "epoch": 4.446200977985507,
      "grad_norm": 0.2735172510147095,
      "learning_rate": 1.1148962527098175e-06,
      "loss": 0.0149,
      "step": 2716860
    },
    {
      "epoch": 4.4462337084241605,
      "grad_norm": 0.6156272888183594,
      "learning_rate": 1.1148303604963002e-06,
      "loss": 0.0096,
      "step": 2716880
    },
    {
      "epoch": 4.446266438862813,
      "grad_norm": 0.28114351630210876,
      "learning_rate": 1.1147644682827832e-06,
      "loss": 0.0089,
      "step": 2716900
    },
    {
      "epoch": 4.446299169301467,
      "grad_norm": 0.17680276930332184,
      "learning_rate": 1.114698576069266e-06,
      "loss": 0.0084,
      "step": 2716920
    },
    {
      "epoch": 4.44633189974012,
      "grad_norm": 0.4491787850856781,
      "learning_rate": 1.1146326838557489e-06,
      "loss": 0.0138,
      "step": 2716940
    },
    {
      "epoch": 4.446364630178774,
      "grad_norm": 0.10268409550189972,
      "learning_rate": 1.1145667916422318e-06,
      "loss": 0.0104,
      "step": 2716960
    },
    {
      "epoch": 4.446397360617427,
      "grad_norm": 0.21774926781654358,
      "learning_rate": 1.1145008994287146e-06,
      "loss": 0.0113,
      "step": 2716980
    },
    {
      "epoch": 4.44643009105608,
      "grad_norm": 0.6408517360687256,
      "learning_rate": 1.1144350072151975e-06,
      "loss": 0.0101,
      "step": 2717000
    },
    {
      "epoch": 4.446462821494734,
      "grad_norm": 0.2706526815891266,
      "learning_rate": 1.1143691150016805e-06,
      "loss": 0.0092,
      "step": 2717020
    },
    {
      "epoch": 4.446495551933387,
      "grad_norm": 0.13028275966644287,
      "learning_rate": 1.1143032227881632e-06,
      "loss": 0.0066,
      "step": 2717040
    },
    {
      "epoch": 4.44652828237204,
      "grad_norm": 0.2087884098291397,
      "learning_rate": 1.1142373305746462e-06,
      "loss": 0.0106,
      "step": 2717060
    },
    {
      "epoch": 4.446561012810694,
      "grad_norm": 0.07776790112257004,
      "learning_rate": 1.114171438361129e-06,
      "loss": 0.0076,
      "step": 2717080
    },
    {
      "epoch": 4.446593743249347,
      "grad_norm": 0.5142175555229187,
      "learning_rate": 1.1141055461476119e-06,
      "loss": 0.0092,
      "step": 2717100
    },
    {
      "epoch": 4.446626473688,
      "grad_norm": 0.10609211027622223,
      "learning_rate": 1.1140396539340948e-06,
      "loss": 0.009,
      "step": 2717120
    },
    {
      "epoch": 4.446659204126654,
      "grad_norm": 0.29911157488822937,
      "learning_rate": 1.1139737617205775e-06,
      "loss": 0.0098,
      "step": 2717140
    },
    {
      "epoch": 4.446691934565307,
      "grad_norm": 0.8167626261711121,
      "learning_rate": 1.1139078695070605e-06,
      "loss": 0.0138,
      "step": 2717160
    },
    {
      "epoch": 4.44672466500396,
      "grad_norm": 0.2940078377723694,
      "learning_rate": 1.1138419772935432e-06,
      "loss": 0.0072,
      "step": 2717180
    },
    {
      "epoch": 4.446757395442614,
      "grad_norm": 0.08865515887737274,
      "learning_rate": 1.1137760850800262e-06,
      "loss": 0.0059,
      "step": 2717200
    },
    {
      "epoch": 4.446790125881267,
      "grad_norm": 0.1340998411178589,
      "learning_rate": 1.1137101928665091e-06,
      "loss": 0.0074,
      "step": 2717220
    },
    {
      "epoch": 4.446822856319921,
      "grad_norm": 0.17226392030715942,
      "learning_rate": 1.1136443006529919e-06,
      "loss": 0.0079,
      "step": 2717240
    },
    {
      "epoch": 4.4468555867585735,
      "grad_norm": 0.632908046245575,
      "learning_rate": 1.1135784084394748e-06,
      "loss": 0.0114,
      "step": 2717260
    },
    {
      "epoch": 4.446888317197227,
      "grad_norm": 0.33996593952178955,
      "learning_rate": 1.1135125162259576e-06,
      "loss": 0.0073,
      "step": 2717280
    },
    {
      "epoch": 4.446921047635881,
      "grad_norm": 0.13863427937030792,
      "learning_rate": 1.1134466240124405e-06,
      "loss": 0.0056,
      "step": 2717300
    },
    {
      "epoch": 4.446953778074533,
      "grad_norm": 0.29787808656692505,
      "learning_rate": 1.1133807317989235e-06,
      "loss": 0.0096,
      "step": 2717320
    },
    {
      "epoch": 4.446986508513187,
      "grad_norm": 0.38506951928138733,
      "learning_rate": 1.1133148395854062e-06,
      "loss": 0.0118,
      "step": 2717340
    },
    {
      "epoch": 4.447019238951841,
      "grad_norm": 0.6738842725753784,
      "learning_rate": 1.1132489473718892e-06,
      "loss": 0.0095,
      "step": 2717360
    },
    {
      "epoch": 4.447051969390494,
      "grad_norm": 0.29205629229545593,
      "learning_rate": 1.1131830551583721e-06,
      "loss": 0.0081,
      "step": 2717380
    },
    {
      "epoch": 4.447084699829147,
      "grad_norm": 0.061105724424123764,
      "learning_rate": 1.1131171629448549e-06,
      "loss": 0.0076,
      "step": 2717400
    },
    {
      "epoch": 4.4471174302678005,
      "grad_norm": 0.3357928991317749,
      "learning_rate": 1.1130512707313378e-06,
      "loss": 0.0109,
      "step": 2717420
    },
    {
      "epoch": 4.447150160706454,
      "grad_norm": 0.0859166607260704,
      "learning_rate": 1.1129853785178205e-06,
      "loss": 0.0097,
      "step": 2717440
    },
    {
      "epoch": 4.447182891145107,
      "grad_norm": 0.2831228971481323,
      "learning_rate": 1.1129194863043035e-06,
      "loss": 0.0075,
      "step": 2717460
    },
    {
      "epoch": 4.44721562158376,
      "grad_norm": 0.07596661150455475,
      "learning_rate": 1.1128535940907865e-06,
      "loss": 0.0079,
      "step": 2717480
    },
    {
      "epoch": 4.447248352022414,
      "grad_norm": 0.12899170815944672,
      "learning_rate": 1.1127877018772692e-06,
      "loss": 0.0067,
      "step": 2717500
    },
    {
      "epoch": 4.447281082461068,
      "grad_norm": 0.22834104299545288,
      "learning_rate": 1.1127218096637521e-06,
      "loss": 0.0082,
      "step": 2717520
    },
    {
      "epoch": 4.44731381289972,
      "grad_norm": 0.40408170223236084,
      "learning_rate": 1.1126559174502349e-06,
      "loss": 0.0067,
      "step": 2717540
    },
    {
      "epoch": 4.447346543338374,
      "grad_norm": 0.3189259171485901,
      "learning_rate": 1.1125900252367178e-06,
      "loss": 0.0073,
      "step": 2717560
    },
    {
      "epoch": 4.4473792737770275,
      "grad_norm": 0.31425225734710693,
      "learning_rate": 1.1125241330232008e-06,
      "loss": 0.0079,
      "step": 2717580
    },
    {
      "epoch": 4.44741200421568,
      "grad_norm": 0.285230427980423,
      "learning_rate": 1.1124582408096835e-06,
      "loss": 0.0091,
      "step": 2717600
    },
    {
      "epoch": 4.447444734654334,
      "grad_norm": 0.21197910606861115,
      "learning_rate": 1.1123923485961665e-06,
      "loss": 0.0098,
      "step": 2717620
    },
    {
      "epoch": 4.447477465092987,
      "grad_norm": 0.2679455876350403,
      "learning_rate": 1.1123264563826494e-06,
      "loss": 0.0093,
      "step": 2717640
    },
    {
      "epoch": 4.44751019553164,
      "grad_norm": 0.18682751059532166,
      "learning_rate": 1.1122605641691322e-06,
      "loss": 0.0094,
      "step": 2717660
    },
    {
      "epoch": 4.447542925970294,
      "grad_norm": 0.2025873064994812,
      "learning_rate": 1.1121946719556151e-06,
      "loss": 0.0145,
      "step": 2717680
    },
    {
      "epoch": 4.447575656408947,
      "grad_norm": 0.2379341870546341,
      "learning_rate": 1.1121287797420979e-06,
      "loss": 0.0073,
      "step": 2717700
    },
    {
      "epoch": 4.447608386847601,
      "grad_norm": 0.05534716695547104,
      "learning_rate": 1.1120628875285808e-06,
      "loss": 0.0097,
      "step": 2717720
    },
    {
      "epoch": 4.4476411172862536,
      "grad_norm": 0.25598177313804626,
      "learning_rate": 1.1119969953150638e-06,
      "loss": 0.0082,
      "step": 2717740
    },
    {
      "epoch": 4.447673847724907,
      "grad_norm": 0.4894174039363861,
      "learning_rate": 1.1119311031015465e-06,
      "loss": 0.0074,
      "step": 2717760
    },
    {
      "epoch": 4.447706578163561,
      "grad_norm": 0.4486464858055115,
      "learning_rate": 1.1118652108880295e-06,
      "loss": 0.012,
      "step": 2717780
    },
    {
      "epoch": 4.447739308602214,
      "grad_norm": 0.16268689930438995,
      "learning_rate": 1.1117993186745122e-06,
      "loss": 0.0101,
      "step": 2717800
    },
    {
      "epoch": 4.447772039040867,
      "grad_norm": 0.11203169822692871,
      "learning_rate": 1.1117334264609951e-06,
      "loss": 0.0055,
      "step": 2717820
    },
    {
      "epoch": 4.447804769479521,
      "grad_norm": 0.34911754727363586,
      "learning_rate": 1.111667534247478e-06,
      "loss": 0.0128,
      "step": 2717840
    },
    {
      "epoch": 4.447837499918174,
      "grad_norm": 0.36702555418014526,
      "learning_rate": 1.1116016420339608e-06,
      "loss": 0.0116,
      "step": 2717860
    },
    {
      "epoch": 4.447870230356827,
      "grad_norm": 0.42235198616981506,
      "learning_rate": 1.1115357498204438e-06,
      "loss": 0.0098,
      "step": 2717880
    },
    {
      "epoch": 4.4479029607954805,
      "grad_norm": 0.12434236705303192,
      "learning_rate": 1.1114698576069267e-06,
      "loss": 0.0086,
      "step": 2717900
    },
    {
      "epoch": 4.447935691234134,
      "grad_norm": 0.14244790375232697,
      "learning_rate": 1.1114039653934097e-06,
      "loss": 0.0097,
      "step": 2717920
    },
    {
      "epoch": 4.447968421672787,
      "grad_norm": 0.11895663291215897,
      "learning_rate": 1.1113380731798924e-06,
      "loss": 0.0065,
      "step": 2717940
    },
    {
      "epoch": 4.44800115211144,
      "grad_norm": 0.3665267527103424,
      "learning_rate": 1.1112721809663752e-06,
      "loss": 0.0105,
      "step": 2717960
    },
    {
      "epoch": 4.448033882550094,
      "grad_norm": 0.2966555953025818,
      "learning_rate": 1.1112062887528581e-06,
      "loss": 0.012,
      "step": 2717980
    },
    {
      "epoch": 4.448066612988748,
      "grad_norm": 0.25624221563339233,
      "learning_rate": 1.111140396539341e-06,
      "loss": 0.0108,
      "step": 2718000
    },
    {
      "epoch": 4.4480993434274,
      "grad_norm": 0.2830445468425751,
      "learning_rate": 1.111074504325824e-06,
      "loss": 0.0085,
      "step": 2718020
    },
    {
      "epoch": 4.448132073866054,
      "grad_norm": 0.3854595422744751,
      "learning_rate": 1.1110086121123068e-06,
      "loss": 0.0082,
      "step": 2718040
    },
    {
      "epoch": 4.4481648043047075,
      "grad_norm": 0.2618546783924103,
      "learning_rate": 1.1109427198987895e-06,
      "loss": 0.0115,
      "step": 2718060
    },
    {
      "epoch": 4.44819753474336,
      "grad_norm": 0.5536642074584961,
      "learning_rate": 1.1108768276852727e-06,
      "loss": 0.0154,
      "step": 2718080
    },
    {
      "epoch": 4.448230265182014,
      "grad_norm": 0.4143003523349762,
      "learning_rate": 1.1108109354717554e-06,
      "loss": 0.0067,
      "step": 2718100
    },
    {
      "epoch": 4.448262995620667,
      "grad_norm": 0.23106853663921356,
      "learning_rate": 1.1107450432582384e-06,
      "loss": 0.0078,
      "step": 2718120
    },
    {
      "epoch": 4.448295726059321,
      "grad_norm": 0.15589368343353271,
      "learning_rate": 1.110679151044721e-06,
      "loss": 0.0117,
      "step": 2718140
    },
    {
      "epoch": 4.448328456497974,
      "grad_norm": 0.38351765275001526,
      "learning_rate": 1.1106132588312038e-06,
      "loss": 0.01,
      "step": 2718160
    },
    {
      "epoch": 4.448361186936627,
      "grad_norm": 0.22101975977420807,
      "learning_rate": 1.110547366617687e-06,
      "loss": 0.0121,
      "step": 2718180
    },
    {
      "epoch": 4.448393917375281,
      "grad_norm": 0.44587573409080505,
      "learning_rate": 1.1104814744041697e-06,
      "loss": 0.0107,
      "step": 2718200
    },
    {
      "epoch": 4.448426647813934,
      "grad_norm": 0.17888601124286652,
      "learning_rate": 1.1104155821906527e-06,
      "loss": 0.0093,
      "step": 2718220
    },
    {
      "epoch": 4.448459378252587,
      "grad_norm": 0.27975475788116455,
      "learning_rate": 1.1103496899771354e-06,
      "loss": 0.0113,
      "step": 2718240
    },
    {
      "epoch": 4.448492108691241,
      "grad_norm": 0.08984225988388062,
      "learning_rate": 1.1102837977636184e-06,
      "loss": 0.0071,
      "step": 2718260
    },
    {
      "epoch": 4.448524839129894,
      "grad_norm": 0.3675670027732849,
      "learning_rate": 1.1102179055501013e-06,
      "loss": 0.0061,
      "step": 2718280
    },
    {
      "epoch": 4.448557569568547,
      "grad_norm": 0.27870458364486694,
      "learning_rate": 1.110152013336584e-06,
      "loss": 0.0104,
      "step": 2718300
    },
    {
      "epoch": 4.448590300007201,
      "grad_norm": 0.1322915107011795,
      "learning_rate": 1.110086121123067e-06,
      "loss": 0.0094,
      "step": 2718320
    },
    {
      "epoch": 4.448623030445854,
      "grad_norm": 0.279656320810318,
      "learning_rate": 1.11002022890955e-06,
      "loss": 0.0081,
      "step": 2718340
    },
    {
      "epoch": 4.448655760884507,
      "grad_norm": 0.2805132567882538,
      "learning_rate": 1.1099543366960327e-06,
      "loss": 0.0088,
      "step": 2718360
    },
    {
      "epoch": 4.448688491323161,
      "grad_norm": 0.14298304915428162,
      "learning_rate": 1.1098884444825157e-06,
      "loss": 0.008,
      "step": 2718380
    },
    {
      "epoch": 4.448721221761814,
      "grad_norm": 0.14747872948646545,
      "learning_rate": 1.1098225522689984e-06,
      "loss": 0.0141,
      "step": 2718400
    },
    {
      "epoch": 4.448753952200468,
      "grad_norm": 0.449670672416687,
      "learning_rate": 1.1097566600554814e-06,
      "loss": 0.0055,
      "step": 2718420
    },
    {
      "epoch": 4.4487866826391205,
      "grad_norm": 0.3805029094219208,
      "learning_rate": 1.1096907678419643e-06,
      "loss": 0.009,
      "step": 2718440
    },
    {
      "epoch": 4.448819413077774,
      "grad_norm": 0.31004706025123596,
      "learning_rate": 1.109624875628447e-06,
      "loss": 0.0114,
      "step": 2718460
    },
    {
      "epoch": 4.448852143516428,
      "grad_norm": 0.2750982344150543,
      "learning_rate": 1.10955898341493e-06,
      "loss": 0.0133,
      "step": 2718480
    },
    {
      "epoch": 4.44888487395508,
      "grad_norm": 0.1463395357131958,
      "learning_rate": 1.1094930912014127e-06,
      "loss": 0.0193,
      "step": 2718500
    },
    {
      "epoch": 4.448917604393734,
      "grad_norm": 0.16663004457950592,
      "learning_rate": 1.1094271989878957e-06,
      "loss": 0.0088,
      "step": 2718520
    },
    {
      "epoch": 4.4489503348323876,
      "grad_norm": 0.19955089688301086,
      "learning_rate": 1.1093613067743786e-06,
      "loss": 0.013,
      "step": 2718540
    },
    {
      "epoch": 4.448983065271041,
      "grad_norm": 0.21889819204807281,
      "learning_rate": 1.1092954145608614e-06,
      "loss": 0.0077,
      "step": 2718560
    },
    {
      "epoch": 4.449015795709694,
      "grad_norm": 0.3673797845840454,
      "learning_rate": 1.1092295223473443e-06,
      "loss": 0.0112,
      "step": 2718580
    },
    {
      "epoch": 4.4490485261483474,
      "grad_norm": 0.19278688728809357,
      "learning_rate": 1.109163630133827e-06,
      "loss": 0.0087,
      "step": 2718600
    },
    {
      "epoch": 4.449081256587001,
      "grad_norm": 0.06080608814954758,
      "learning_rate": 1.10909773792031e-06,
      "loss": 0.012,
      "step": 2718620
    },
    {
      "epoch": 4.449113987025654,
      "grad_norm": 0.47071292996406555,
      "learning_rate": 1.109031845706793e-06,
      "loss": 0.0072,
      "step": 2718640
    },
    {
      "epoch": 4.449146717464307,
      "grad_norm": 0.17719905078411102,
      "learning_rate": 1.1089659534932757e-06,
      "loss": 0.0084,
      "step": 2718660
    },
    {
      "epoch": 4.449179447902961,
      "grad_norm": 0.25194069743156433,
      "learning_rate": 1.1089000612797587e-06,
      "loss": 0.0084,
      "step": 2718680
    },
    {
      "epoch": 4.4492121783416145,
      "grad_norm": 0.15140537917613983,
      "learning_rate": 1.1088341690662416e-06,
      "loss": 0.0077,
      "step": 2718700
    },
    {
      "epoch": 4.449244908780267,
      "grad_norm": 0.24757763743400574,
      "learning_rate": 1.1087682768527244e-06,
      "loss": 0.0077,
      "step": 2718720
    },
    {
      "epoch": 4.449277639218921,
      "grad_norm": 0.06948879361152649,
      "learning_rate": 1.1087023846392073e-06,
      "loss": 0.0094,
      "step": 2718740
    },
    {
      "epoch": 4.449310369657574,
      "grad_norm": 0.18125075101852417,
      "learning_rate": 1.10863649242569e-06,
      "loss": 0.0082,
      "step": 2718760
    },
    {
      "epoch": 4.449343100096227,
      "grad_norm": 0.04360027238726616,
      "learning_rate": 1.108570600212173e-06,
      "loss": 0.0067,
      "step": 2718780
    },
    {
      "epoch": 4.449375830534881,
      "grad_norm": 0.5393138527870178,
      "learning_rate": 1.108504707998656e-06,
      "loss": 0.0127,
      "step": 2718800
    },
    {
      "epoch": 4.449408560973534,
      "grad_norm": 0.5093534588813782,
      "learning_rate": 1.1084388157851387e-06,
      "loss": 0.0115,
      "step": 2718820
    },
    {
      "epoch": 4.449441291412188,
      "grad_norm": 0.11716014891862869,
      "learning_rate": 1.1083729235716216e-06,
      "loss": 0.0093,
      "step": 2718840
    },
    {
      "epoch": 4.449474021850841,
      "grad_norm": 0.3286055624485016,
      "learning_rate": 1.1083070313581044e-06,
      "loss": 0.0091,
      "step": 2718860
    },
    {
      "epoch": 4.449506752289494,
      "grad_norm": 0.28493690490722656,
      "learning_rate": 1.1082411391445873e-06,
      "loss": 0.0117,
      "step": 2718880
    },
    {
      "epoch": 4.449539482728148,
      "grad_norm": 0.18768972158432007,
      "learning_rate": 1.1081752469310703e-06,
      "loss": 0.0082,
      "step": 2718900
    },
    {
      "epoch": 4.4495722131668005,
      "grad_norm": 0.2888304889202118,
      "learning_rate": 1.108109354717553e-06,
      "loss": 0.0127,
      "step": 2718920
    },
    {
      "epoch": 4.449604943605454,
      "grad_norm": 0.3248338997364044,
      "learning_rate": 1.108043462504036e-06,
      "loss": 0.0052,
      "step": 2718940
    },
    {
      "epoch": 4.449637674044108,
      "grad_norm": 0.12716253101825714,
      "learning_rate": 1.107977570290519e-06,
      "loss": 0.009,
      "step": 2718960
    },
    {
      "epoch": 4.449670404482761,
      "grad_norm": 0.18384051322937012,
      "learning_rate": 1.1079116780770017e-06,
      "loss": 0.0095,
      "step": 2718980
    },
    {
      "epoch": 4.449703134921414,
      "grad_norm": 0.08009595423936844,
      "learning_rate": 1.1078457858634846e-06,
      "loss": 0.0055,
      "step": 2719000
    },
    {
      "epoch": 4.449735865360068,
      "grad_norm": 0.13424676656723022,
      "learning_rate": 1.1077798936499674e-06,
      "loss": 0.0104,
      "step": 2719020
    },
    {
      "epoch": 4.449768595798721,
      "grad_norm": 0.11023808270692825,
      "learning_rate": 1.1077140014364503e-06,
      "loss": 0.0061,
      "step": 2719040
    },
    {
      "epoch": 4.449801326237374,
      "grad_norm": 0.7913536429405212,
      "learning_rate": 1.1076481092229333e-06,
      "loss": 0.0125,
      "step": 2719060
    },
    {
      "epoch": 4.4498340566760275,
      "grad_norm": 0.2789595127105713,
      "learning_rate": 1.107582217009416e-06,
      "loss": 0.0118,
      "step": 2719080
    },
    {
      "epoch": 4.449866787114681,
      "grad_norm": 0.443981409072876,
      "learning_rate": 1.107516324795899e-06,
      "loss": 0.0138,
      "step": 2719100
    },
    {
      "epoch": 4.449899517553334,
      "grad_norm": 0.1970883458852768,
      "learning_rate": 1.1074504325823817e-06,
      "loss": 0.0116,
      "step": 2719120
    },
    {
      "epoch": 4.449932247991987,
      "grad_norm": 0.2714154124259949,
      "learning_rate": 1.1073845403688646e-06,
      "loss": 0.0075,
      "step": 2719140
    },
    {
      "epoch": 4.449964978430641,
      "grad_norm": 0.19901667535305023,
      "learning_rate": 1.1073186481553476e-06,
      "loss": 0.0105,
      "step": 2719160
    },
    {
      "epoch": 4.449997708869295,
      "grad_norm": 0.3838621973991394,
      "learning_rate": 1.1072527559418303e-06,
      "loss": 0.0061,
      "step": 2719180
    },
    {
      "epoch": 4.450030439307947,
      "grad_norm": 0.11147040873765945,
      "learning_rate": 1.1071868637283133e-06,
      "loss": 0.0083,
      "step": 2719200
    },
    {
      "epoch": 4.450063169746601,
      "grad_norm": 0.10166918486356735,
      "learning_rate": 1.1071209715147962e-06,
      "loss": 0.0081,
      "step": 2719220
    },
    {
      "epoch": 4.4500959001852545,
      "grad_norm": 0.15228523313999176,
      "learning_rate": 1.1070550793012792e-06,
      "loss": 0.0084,
      "step": 2719240
    },
    {
      "epoch": 4.450128630623908,
      "grad_norm": 0.0767621099948883,
      "learning_rate": 1.106989187087762e-06,
      "loss": 0.0082,
      "step": 2719260
    },
    {
      "epoch": 4.450161361062561,
      "grad_norm": 0.13766522705554962,
      "learning_rate": 1.1069232948742447e-06,
      "loss": 0.0074,
      "step": 2719280
    },
    {
      "epoch": 4.450194091501214,
      "grad_norm": 0.2488720715045929,
      "learning_rate": 1.1068574026607276e-06,
      "loss": 0.0063,
      "step": 2719300
    },
    {
      "epoch": 4.450226821939868,
      "grad_norm": 0.3663955628871918,
      "learning_rate": 1.1067915104472106e-06,
      "loss": 0.0076,
      "step": 2719320
    },
    {
      "epoch": 4.450259552378521,
      "grad_norm": 0.19821129739284515,
      "learning_rate": 1.1067256182336935e-06,
      "loss": 0.0056,
      "step": 2719340
    },
    {
      "epoch": 4.450292282817174,
      "grad_norm": 0.3298276662826538,
      "learning_rate": 1.1066597260201763e-06,
      "loss": 0.0096,
      "step": 2719360
    },
    {
      "epoch": 4.450325013255828,
      "grad_norm": 0.5395799875259399,
      "learning_rate": 1.106593833806659e-06,
      "loss": 0.0088,
      "step": 2719380
    },
    {
      "epoch": 4.450357743694481,
      "grad_norm": 0.31044721603393555,
      "learning_rate": 1.1065279415931422e-06,
      "loss": 0.0096,
      "step": 2719400
    },
    {
      "epoch": 4.450390474133134,
      "grad_norm": 0.11874764412641525,
      "learning_rate": 1.106462049379625e-06,
      "loss": 0.0118,
      "step": 2719420
    },
    {
      "epoch": 4.450423204571788,
      "grad_norm": 0.2526727616786957,
      "learning_rate": 1.1063961571661079e-06,
      "loss": 0.0088,
      "step": 2719440
    },
    {
      "epoch": 4.450455935010441,
      "grad_norm": 0.23541398346424103,
      "learning_rate": 1.1063302649525906e-06,
      "loss": 0.01,
      "step": 2719460
    },
    {
      "epoch": 4.450488665449094,
      "grad_norm": 0.0974186509847641,
      "learning_rate": 1.1062643727390736e-06,
      "loss": 0.0086,
      "step": 2719480
    },
    {
      "epoch": 4.450521395887748,
      "grad_norm": 0.20183688402175903,
      "learning_rate": 1.1061984805255565e-06,
      "loss": 0.0068,
      "step": 2719500
    },
    {
      "epoch": 4.450554126326401,
      "grad_norm": 0.26451289653778076,
      "learning_rate": 1.1061325883120392e-06,
      "loss": 0.0129,
      "step": 2719520
    },
    {
      "epoch": 4.450586856765055,
      "grad_norm": 0.08314495533704758,
      "learning_rate": 1.1060666960985222e-06,
      "loss": 0.0096,
      "step": 2719540
    },
    {
      "epoch": 4.4506195872037075,
      "grad_norm": 0.25642767548561096,
      "learning_rate": 1.106000803885005e-06,
      "loss": 0.0062,
      "step": 2719560
    },
    {
      "epoch": 4.450652317642361,
      "grad_norm": 0.10082893073558807,
      "learning_rate": 1.1059349116714879e-06,
      "loss": 0.0068,
      "step": 2719580
    },
    {
      "epoch": 4.450685048081015,
      "grad_norm": 0.23973728716373444,
      "learning_rate": 1.1058690194579708e-06,
      "loss": 0.0075,
      "step": 2719600
    },
    {
      "epoch": 4.450717778519667,
      "grad_norm": 0.5211926698684692,
      "learning_rate": 1.1058031272444536e-06,
      "loss": 0.0121,
      "step": 2719620
    },
    {
      "epoch": 4.450750508958321,
      "grad_norm": 0.4798237681388855,
      "learning_rate": 1.1057372350309365e-06,
      "loss": 0.0071,
      "step": 2719640
    },
    {
      "epoch": 4.450783239396975,
      "grad_norm": 0.24606852233409882,
      "learning_rate": 1.1056713428174195e-06,
      "loss": 0.0097,
      "step": 2719660
    },
    {
      "epoch": 4.450815969835627,
      "grad_norm": 0.19546149671077728,
      "learning_rate": 1.1056054506039022e-06,
      "loss": 0.0076,
      "step": 2719680
    },
    {
      "epoch": 4.450848700274281,
      "grad_norm": 0.16246695816516876,
      "learning_rate": 1.1055395583903852e-06,
      "loss": 0.0081,
      "step": 2719700
    },
    {
      "epoch": 4.4508814307129345,
      "grad_norm": 0.32318127155303955,
      "learning_rate": 1.105473666176868e-06,
      "loss": 0.0071,
      "step": 2719720
    },
    {
      "epoch": 4.450914161151588,
      "grad_norm": 0.21572506427764893,
      "learning_rate": 1.1054077739633509e-06,
      "loss": 0.0117,
      "step": 2719740
    },
    {
      "epoch": 4.450946891590241,
      "grad_norm": 0.0676712691783905,
      "learning_rate": 1.1053418817498338e-06,
      "loss": 0.0097,
      "step": 2719760
    },
    {
      "epoch": 4.450979622028894,
      "grad_norm": 0.07933397591114044,
      "learning_rate": 1.1052759895363166e-06,
      "loss": 0.0065,
      "step": 2719780
    },
    {
      "epoch": 4.451012352467548,
      "grad_norm": 0.10073932260274887,
      "learning_rate": 1.1052100973227995e-06,
      "loss": 0.0102,
      "step": 2719800
    },
    {
      "epoch": 4.451045082906201,
      "grad_norm": 0.23088014125823975,
      "learning_rate": 1.1051442051092822e-06,
      "loss": 0.0101,
      "step": 2719820
    },
    {
      "epoch": 4.451077813344854,
      "grad_norm": 0.1124839037656784,
      "learning_rate": 1.1050783128957652e-06,
      "loss": 0.0071,
      "step": 2719840
    },
    {
      "epoch": 4.451110543783508,
      "grad_norm": 0.2156178057193756,
      "learning_rate": 1.1050124206822482e-06,
      "loss": 0.0133,
      "step": 2719860
    },
    {
      "epoch": 4.4511432742221615,
      "grad_norm": 0.1372671276330948,
      "learning_rate": 1.1049465284687309e-06,
      "loss": 0.008,
      "step": 2719880
    },
    {
      "epoch": 4.451176004660814,
      "grad_norm": 0.1141744926571846,
      "learning_rate": 1.1048806362552138e-06,
      "loss": 0.0102,
      "step": 2719900
    },
    {
      "epoch": 4.451208735099468,
      "grad_norm": 0.15782929956912994,
      "learning_rate": 1.1048147440416968e-06,
      "loss": 0.0086,
      "step": 2719920
    },
    {
      "epoch": 4.451241465538121,
      "grad_norm": 0.5433392524719238,
      "learning_rate": 1.1047488518281795e-06,
      "loss": 0.0098,
      "step": 2719940
    },
    {
      "epoch": 4.451274195976774,
      "grad_norm": 0.1584022492170334,
      "learning_rate": 1.1046829596146625e-06,
      "loss": 0.0084,
      "step": 2719960
    },
    {
      "epoch": 4.451306926415428,
      "grad_norm": 0.5120691061019897,
      "learning_rate": 1.1046170674011452e-06,
      "loss": 0.0069,
      "step": 2719980
    },
    {
      "epoch": 4.451339656854081,
      "grad_norm": 0.058385759592056274,
      "learning_rate": 1.1045511751876282e-06,
      "loss": 0.0102,
      "step": 2720000
    },
    {
      "epoch": 4.451372387292735,
      "grad_norm": 0.34402868151664734,
      "learning_rate": 1.1044852829741111e-06,
      "loss": 0.0085,
      "step": 2720020
    },
    {
      "epoch": 4.451405117731388,
      "grad_norm": 0.2134018838405609,
      "learning_rate": 1.1044193907605939e-06,
      "loss": 0.0049,
      "step": 2720040
    },
    {
      "epoch": 4.451437848170041,
      "grad_norm": 0.16090115904808044,
      "learning_rate": 1.1043534985470768e-06,
      "loss": 0.0068,
      "step": 2720060
    },
    {
      "epoch": 4.451470578608695,
      "grad_norm": 0.229648619890213,
      "learning_rate": 1.1042876063335596e-06,
      "loss": 0.01,
      "step": 2720080
    },
    {
      "epoch": 4.4515033090473475,
      "grad_norm": 0.23712843656539917,
      "learning_rate": 1.1042217141200425e-06,
      "loss": 0.007,
      "step": 2720100
    },
    {
      "epoch": 4.451536039486001,
      "grad_norm": 0.26676681637763977,
      "learning_rate": 1.1041558219065255e-06,
      "loss": 0.0073,
      "step": 2720120
    },
    {
      "epoch": 4.451568769924655,
      "grad_norm": 0.07408235967159271,
      "learning_rate": 1.1040899296930082e-06,
      "loss": 0.0099,
      "step": 2720140
    },
    {
      "epoch": 4.451601500363308,
      "grad_norm": 0.8535014390945435,
      "learning_rate": 1.1040240374794912e-06,
      "loss": 0.0092,
      "step": 2720160
    },
    {
      "epoch": 4.451634230801961,
      "grad_norm": 0.31459182500839233,
      "learning_rate": 1.1039581452659739e-06,
      "loss": 0.0134,
      "step": 2720180
    },
    {
      "epoch": 4.451666961240615,
      "grad_norm": 0.33137592673301697,
      "learning_rate": 1.1038922530524568e-06,
      "loss": 0.007,
      "step": 2720200
    },
    {
      "epoch": 4.451699691679268,
      "grad_norm": 0.1436498463153839,
      "learning_rate": 1.1038263608389398e-06,
      "loss": 0.0109,
      "step": 2720220
    },
    {
      "epoch": 4.451732422117921,
      "grad_norm": 0.21225589513778687,
      "learning_rate": 1.1037604686254225e-06,
      "loss": 0.0083,
      "step": 2720240
    },
    {
      "epoch": 4.4517651525565745,
      "grad_norm": 0.6730862855911255,
      "learning_rate": 1.1036945764119055e-06,
      "loss": 0.0147,
      "step": 2720260
    },
    {
      "epoch": 4.451797882995228,
      "grad_norm": 0.054320208728313446,
      "learning_rate": 1.1036286841983884e-06,
      "loss": 0.0099,
      "step": 2720280
    },
    {
      "epoch": 4.451830613433882,
      "grad_norm": 0.12651260197162628,
      "learning_rate": 1.1035627919848712e-06,
      "loss": 0.0111,
      "step": 2720300
    },
    {
      "epoch": 4.451863343872534,
      "grad_norm": 0.0926741287112236,
      "learning_rate": 1.1034968997713541e-06,
      "loss": 0.0068,
      "step": 2720320
    },
    {
      "epoch": 4.451896074311188,
      "grad_norm": 0.17592814564704895,
      "learning_rate": 1.1034310075578369e-06,
      "loss": 0.0086,
      "step": 2720340
    },
    {
      "epoch": 4.4519288047498415,
      "grad_norm": 0.14773690700531006,
      "learning_rate": 1.1033651153443198e-06,
      "loss": 0.0104,
      "step": 2720360
    },
    {
      "epoch": 4.451961535188494,
      "grad_norm": 0.1387905478477478,
      "learning_rate": 1.1032992231308028e-06,
      "loss": 0.009,
      "step": 2720380
    },
    {
      "epoch": 4.451994265627148,
      "grad_norm": 0.18124982714653015,
      "learning_rate": 1.1032333309172855e-06,
      "loss": 0.0105,
      "step": 2720400
    },
    {
      "epoch": 4.452026996065801,
      "grad_norm": 0.3060310482978821,
      "learning_rate": 1.1031674387037685e-06,
      "loss": 0.0104,
      "step": 2720420
    },
    {
      "epoch": 4.452059726504455,
      "grad_norm": 0.04199871048331261,
      "learning_rate": 1.1031015464902512e-06,
      "loss": 0.008,
      "step": 2720440
    },
    {
      "epoch": 4.452092456943108,
      "grad_norm": 1.0526269674301147,
      "learning_rate": 1.1030356542767342e-06,
      "loss": 0.0102,
      "step": 2720460
    },
    {
      "epoch": 4.452125187381761,
      "grad_norm": 0.15241007506847382,
      "learning_rate": 1.102969762063217e-06,
      "loss": 0.0083,
      "step": 2720480
    },
    {
      "epoch": 4.452157917820415,
      "grad_norm": 0.21275222301483154,
      "learning_rate": 1.1029038698496998e-06,
      "loss": 0.0102,
      "step": 2720500
    },
    {
      "epoch": 4.452190648259068,
      "grad_norm": 0.08927235007286072,
      "learning_rate": 1.1028379776361828e-06,
      "loss": 0.0096,
      "step": 2720520
    },
    {
      "epoch": 4.452223378697721,
      "grad_norm": 0.31617656350135803,
      "learning_rate": 1.1027720854226657e-06,
      "loss": 0.0083,
      "step": 2720540
    },
    {
      "epoch": 4.452256109136375,
      "grad_norm": 0.31829628348350525,
      "learning_rate": 1.1027061932091487e-06,
      "loss": 0.0144,
      "step": 2720560
    },
    {
      "epoch": 4.4522888395750275,
      "grad_norm": 0.14047041535377502,
      "learning_rate": 1.1026403009956314e-06,
      "loss": 0.0064,
      "step": 2720580
    },
    {
      "epoch": 4.452321570013681,
      "grad_norm": 0.4506893455982208,
      "learning_rate": 1.1025744087821142e-06,
      "loss": 0.0148,
      "step": 2720600
    },
    {
      "epoch": 4.452354300452335,
      "grad_norm": 0.1762802004814148,
      "learning_rate": 1.1025085165685971e-06,
      "loss": 0.0079,
      "step": 2720620
    },
    {
      "epoch": 4.452387030890988,
      "grad_norm": 0.35070738196372986,
      "learning_rate": 1.10244262435508e-06,
      "loss": 0.0091,
      "step": 2720640
    },
    {
      "epoch": 4.452419761329641,
      "grad_norm": 0.04869979992508888,
      "learning_rate": 1.102376732141563e-06,
      "loss": 0.0104,
      "step": 2720660
    },
    {
      "epoch": 4.452452491768295,
      "grad_norm": 0.06808311492204666,
      "learning_rate": 1.1023108399280458e-06,
      "loss": 0.0078,
      "step": 2720680
    },
    {
      "epoch": 4.452485222206948,
      "grad_norm": 0.17528288066387177,
      "learning_rate": 1.1022449477145285e-06,
      "loss": 0.0129,
      "step": 2720700
    },
    {
      "epoch": 4.452517952645602,
      "grad_norm": 0.20405344665050507,
      "learning_rate": 1.1021790555010117e-06,
      "loss": 0.0065,
      "step": 2720720
    },
    {
      "epoch": 4.4525506830842545,
      "grad_norm": 0.2045295387506485,
      "learning_rate": 1.1021131632874944e-06,
      "loss": 0.0118,
      "step": 2720740
    },
    {
      "epoch": 4.452583413522908,
      "grad_norm": 0.11575386673212051,
      "learning_rate": 1.1020472710739774e-06,
      "loss": 0.0062,
      "step": 2720760
    },
    {
      "epoch": 4.452616143961562,
      "grad_norm": 0.11744403094053268,
      "learning_rate": 1.1019813788604601e-06,
      "loss": 0.0066,
      "step": 2720780
    },
    {
      "epoch": 4.452648874400214,
      "grad_norm": 0.14270508289337158,
      "learning_rate": 1.101915486646943e-06,
      "loss": 0.0132,
      "step": 2720800
    },
    {
      "epoch": 4.452681604838868,
      "grad_norm": 0.16741670668125153,
      "learning_rate": 1.101849594433426e-06,
      "loss": 0.0126,
      "step": 2720820
    },
    {
      "epoch": 4.452714335277522,
      "grad_norm": 0.33614668250083923,
      "learning_rate": 1.1017837022199088e-06,
      "loss": 0.0079,
      "step": 2720840
    },
    {
      "epoch": 4.452747065716174,
      "grad_norm": 0.39877238869667053,
      "learning_rate": 1.1017178100063917e-06,
      "loss": 0.0098,
      "step": 2720860
    },
    {
      "epoch": 4.452779796154828,
      "grad_norm": 0.19229336082935333,
      "learning_rate": 1.1016519177928744e-06,
      "loss": 0.0065,
      "step": 2720880
    },
    {
      "epoch": 4.4528125265934815,
      "grad_norm": 0.15743136405944824,
      "learning_rate": 1.1015860255793574e-06,
      "loss": 0.015,
      "step": 2720900
    },
    {
      "epoch": 4.452845257032135,
      "grad_norm": 0.25610747933387756,
      "learning_rate": 1.1015201333658403e-06,
      "loss": 0.0061,
      "step": 2720920
    },
    {
      "epoch": 4.452877987470788,
      "grad_norm": 0.4194723963737488,
      "learning_rate": 1.101454241152323e-06,
      "loss": 0.012,
      "step": 2720940
    },
    {
      "epoch": 4.452910717909441,
      "grad_norm": 0.25375959277153015,
      "learning_rate": 1.101388348938806e-06,
      "loss": 0.0069,
      "step": 2720960
    },
    {
      "epoch": 4.452943448348095,
      "grad_norm": 0.09211806952953339,
      "learning_rate": 1.101322456725289e-06,
      "loss": 0.0069,
      "step": 2720980
    },
    {
      "epoch": 4.452976178786749,
      "grad_norm": 0.09080462157726288,
      "learning_rate": 1.1012565645117717e-06,
      "loss": 0.012,
      "step": 2721000
    },
    {
      "epoch": 4.453008909225401,
      "grad_norm": 0.10645576566457748,
      "learning_rate": 1.1011906722982547e-06,
      "loss": 0.0095,
      "step": 2721020
    },
    {
      "epoch": 4.453041639664055,
      "grad_norm": 0.12227282673120499,
      "learning_rate": 1.1011247800847374e-06,
      "loss": 0.0096,
      "step": 2721040
    },
    {
      "epoch": 4.4530743701027085,
      "grad_norm": 0.1791793704032898,
      "learning_rate": 1.1010588878712204e-06,
      "loss": 0.0132,
      "step": 2721060
    },
    {
      "epoch": 4.453107100541361,
      "grad_norm": 0.1250353902578354,
      "learning_rate": 1.1009929956577033e-06,
      "loss": 0.0121,
      "step": 2721080
    },
    {
      "epoch": 4.453139830980015,
      "grad_norm": 0.25063273310661316,
      "learning_rate": 1.100927103444186e-06,
      "loss": 0.0085,
      "step": 2721100
    },
    {
      "epoch": 4.453172561418668,
      "grad_norm": 0.10818953067064285,
      "learning_rate": 1.100861211230669e-06,
      "loss": 0.0063,
      "step": 2721120
    },
    {
      "epoch": 4.453205291857321,
      "grad_norm": 0.17371031641960144,
      "learning_rate": 1.1007953190171518e-06,
      "loss": 0.0066,
      "step": 2721140
    },
    {
      "epoch": 4.453238022295975,
      "grad_norm": 0.42983388900756836,
      "learning_rate": 1.1007294268036347e-06,
      "loss": 0.0073,
      "step": 2721160
    },
    {
      "epoch": 4.453270752734628,
      "grad_norm": 0.05047757178544998,
      "learning_rate": 1.1006635345901177e-06,
      "loss": 0.01,
      "step": 2721180
    },
    {
      "epoch": 4.453303483173282,
      "grad_norm": 0.2170916646718979,
      "learning_rate": 1.1005976423766004e-06,
      "loss": 0.0104,
      "step": 2721200
    },
    {
      "epoch": 4.4533362136119345,
      "grad_norm": 0.12971194088459015,
      "learning_rate": 1.1005317501630833e-06,
      "loss": 0.0079,
      "step": 2721220
    },
    {
      "epoch": 4.453368944050588,
      "grad_norm": 0.058750659227371216,
      "learning_rate": 1.1004658579495663e-06,
      "loss": 0.0114,
      "step": 2721240
    },
    {
      "epoch": 4.453401674489242,
      "grad_norm": 0.1509813368320465,
      "learning_rate": 1.100399965736049e-06,
      "loss": 0.0102,
      "step": 2721260
    },
    {
      "epoch": 4.453434404927894,
      "grad_norm": 0.20059962570667267,
      "learning_rate": 1.100334073522532e-06,
      "loss": 0.0103,
      "step": 2721280
    },
    {
      "epoch": 4.453467135366548,
      "grad_norm": 0.33099666237831116,
      "learning_rate": 1.1002681813090147e-06,
      "loss": 0.0101,
      "step": 2721300
    },
    {
      "epoch": 4.453499865805202,
      "grad_norm": 0.20051485300064087,
      "learning_rate": 1.1002022890954977e-06,
      "loss": 0.0076,
      "step": 2721320
    },
    {
      "epoch": 4.453532596243855,
      "grad_norm": 0.08709393441677094,
      "learning_rate": 1.1001363968819806e-06,
      "loss": 0.0077,
      "step": 2721340
    },
    {
      "epoch": 4.453565326682508,
      "grad_norm": 0.5946020483970642,
      "learning_rate": 1.1000705046684634e-06,
      "loss": 0.0098,
      "step": 2721360
    },
    {
      "epoch": 4.4535980571211615,
      "grad_norm": 0.41425591707229614,
      "learning_rate": 1.1000046124549463e-06,
      "loss": 0.0112,
      "step": 2721380
    },
    {
      "epoch": 4.453630787559815,
      "grad_norm": 0.34403514862060547,
      "learning_rate": 1.099938720241429e-06,
      "loss": 0.011,
      "step": 2721400
    },
    {
      "epoch": 4.453663517998468,
      "grad_norm": 0.17134855687618256,
      "learning_rate": 1.099872828027912e-06,
      "loss": 0.0105,
      "step": 2721420
    },
    {
      "epoch": 4.453696248437121,
      "grad_norm": 0.38663458824157715,
      "learning_rate": 1.099806935814395e-06,
      "loss": 0.0079,
      "step": 2721440
    },
    {
      "epoch": 4.453728978875775,
      "grad_norm": 0.33767879009246826,
      "learning_rate": 1.0997410436008777e-06,
      "loss": 0.007,
      "step": 2721460
    },
    {
      "epoch": 4.453761709314429,
      "grad_norm": 0.4313328266143799,
      "learning_rate": 1.0996751513873607e-06,
      "loss": 0.0077,
      "step": 2721480
    },
    {
      "epoch": 4.453794439753081,
      "grad_norm": 0.19758276641368866,
      "learning_rate": 1.0996092591738434e-06,
      "loss": 0.0059,
      "step": 2721500
    },
    {
      "epoch": 4.453827170191735,
      "grad_norm": 0.23138868808746338,
      "learning_rate": 1.0995433669603263e-06,
      "loss": 0.0113,
      "step": 2721520
    },
    {
      "epoch": 4.4538599006303885,
      "grad_norm": 0.03811050206422806,
      "learning_rate": 1.0994774747468093e-06,
      "loss": 0.0092,
      "step": 2721540
    },
    {
      "epoch": 4.453892631069041,
      "grad_norm": 0.18528494238853455,
      "learning_rate": 1.099411582533292e-06,
      "loss": 0.0085,
      "step": 2721560
    },
    {
      "epoch": 4.453925361507695,
      "grad_norm": 0.5532967448234558,
      "learning_rate": 1.099345690319775e-06,
      "loss": 0.0107,
      "step": 2721580
    },
    {
      "epoch": 4.453958091946348,
      "grad_norm": 0.2665795087814331,
      "learning_rate": 1.099279798106258e-06,
      "loss": 0.0092,
      "step": 2721600
    },
    {
      "epoch": 4.453990822385002,
      "grad_norm": 0.22655409574508667,
      "learning_rate": 1.0992139058927407e-06,
      "loss": 0.0091,
      "step": 2721620
    },
    {
      "epoch": 4.454023552823655,
      "grad_norm": 0.17112593352794647,
      "learning_rate": 1.0991480136792236e-06,
      "loss": 0.0065,
      "step": 2721640
    },
    {
      "epoch": 4.454056283262308,
      "grad_norm": 0.15275225043296814,
      "learning_rate": 1.0990821214657064e-06,
      "loss": 0.008,
      "step": 2721660
    },
    {
      "epoch": 4.454089013700962,
      "grad_norm": 0.5749959349632263,
      "learning_rate": 1.0990162292521893e-06,
      "loss": 0.0114,
      "step": 2721680
    },
    {
      "epoch": 4.454121744139615,
      "grad_norm": 0.1199348047375679,
      "learning_rate": 1.0989503370386723e-06,
      "loss": 0.0094,
      "step": 2721700
    },
    {
      "epoch": 4.454154474578268,
      "grad_norm": 1.0951708555221558,
      "learning_rate": 1.098884444825155e-06,
      "loss": 0.0141,
      "step": 2721720
    },
    {
      "epoch": 4.454187205016922,
      "grad_norm": 0.355503112077713,
      "learning_rate": 1.098818552611638e-06,
      "loss": 0.0098,
      "step": 2721740
    },
    {
      "epoch": 4.454219935455575,
      "grad_norm": 0.08044411242008209,
      "learning_rate": 1.0987526603981207e-06,
      "loss": 0.0107,
      "step": 2721760
    },
    {
      "epoch": 4.454252665894228,
      "grad_norm": 0.4013921320438385,
      "learning_rate": 1.0986867681846037e-06,
      "loss": 0.0118,
      "step": 2721780
    },
    {
      "epoch": 4.454285396332882,
      "grad_norm": 0.316832959651947,
      "learning_rate": 1.0986208759710866e-06,
      "loss": 0.0138,
      "step": 2721800
    },
    {
      "epoch": 4.454318126771535,
      "grad_norm": 0.10284244269132614,
      "learning_rate": 1.0985549837575694e-06,
      "loss": 0.0095,
      "step": 2721820
    },
    {
      "epoch": 4.454350857210188,
      "grad_norm": 0.24544987082481384,
      "learning_rate": 1.0984890915440523e-06,
      "loss": 0.0094,
      "step": 2721840
    },
    {
      "epoch": 4.454383587648842,
      "grad_norm": 0.2450844645500183,
      "learning_rate": 1.0984231993305353e-06,
      "loss": 0.0086,
      "step": 2721860
    },
    {
      "epoch": 4.454416318087495,
      "grad_norm": 0.22681088745594025,
      "learning_rate": 1.0983573071170182e-06,
      "loss": 0.0053,
      "step": 2721880
    },
    {
      "epoch": 4.454449048526149,
      "grad_norm": 0.9720516204833984,
      "learning_rate": 1.098291414903501e-06,
      "loss": 0.0093,
      "step": 2721900
    },
    {
      "epoch": 4.4544817789648015,
      "grad_norm": 0.45081087946891785,
      "learning_rate": 1.0982255226899837e-06,
      "loss": 0.008,
      "step": 2721920
    },
    {
      "epoch": 4.454514509403455,
      "grad_norm": 0.23276188969612122,
      "learning_rate": 1.0981596304764666e-06,
      "loss": 0.0101,
      "step": 2721940
    },
    {
      "epoch": 4.454547239842109,
      "grad_norm": 0.1229214072227478,
      "learning_rate": 1.0980937382629496e-06,
      "loss": 0.0068,
      "step": 2721960
    },
    {
      "epoch": 4.454579970280761,
      "grad_norm": 0.6249932050704956,
      "learning_rate": 1.0980278460494325e-06,
      "loss": 0.0111,
      "step": 2721980
    },
    {
      "epoch": 4.454612700719415,
      "grad_norm": 0.21783019602298737,
      "learning_rate": 1.0979619538359153e-06,
      "loss": 0.0062,
      "step": 2722000
    },
    {
      "epoch": 4.4546454311580685,
      "grad_norm": 0.060202956199645996,
      "learning_rate": 1.097896061622398e-06,
      "loss": 0.0092,
      "step": 2722020
    },
    {
      "epoch": 4.454678161596721,
      "grad_norm": 0.14279553294181824,
      "learning_rate": 1.0978301694088812e-06,
      "loss": 0.0139,
      "step": 2722040
    },
    {
      "epoch": 4.454710892035375,
      "grad_norm": 0.2245418131351471,
      "learning_rate": 1.097764277195364e-06,
      "loss": 0.0151,
      "step": 2722060
    },
    {
      "epoch": 4.454743622474028,
      "grad_norm": 0.1796305775642395,
      "learning_rate": 1.0976983849818469e-06,
      "loss": 0.0094,
      "step": 2722080
    },
    {
      "epoch": 4.454776352912682,
      "grad_norm": 0.21241427958011627,
      "learning_rate": 1.0976324927683296e-06,
      "loss": 0.011,
      "step": 2722100
    },
    {
      "epoch": 4.454809083351335,
      "grad_norm": 0.2434375286102295,
      "learning_rate": 1.0975666005548126e-06,
      "loss": 0.0102,
      "step": 2722120
    },
    {
      "epoch": 4.454841813789988,
      "grad_norm": 0.29677489399909973,
      "learning_rate": 1.0975007083412955e-06,
      "loss": 0.0062,
      "step": 2722140
    },
    {
      "epoch": 4.454874544228642,
      "grad_norm": 0.40547996759414673,
      "learning_rate": 1.0974348161277783e-06,
      "loss": 0.0116,
      "step": 2722160
    },
    {
      "epoch": 4.4549072746672955,
      "grad_norm": 0.23350635170936584,
      "learning_rate": 1.0973689239142612e-06,
      "loss": 0.0078,
      "step": 2722180
    },
    {
      "epoch": 4.454940005105948,
      "grad_norm": 0.4746961295604706,
      "learning_rate": 1.097303031700744e-06,
      "loss": 0.01,
      "step": 2722200
    },
    {
      "epoch": 4.454972735544602,
      "grad_norm": 0.18995928764343262,
      "learning_rate": 1.097237139487227e-06,
      "loss": 0.0116,
      "step": 2722220
    },
    {
      "epoch": 4.455005465983255,
      "grad_norm": 0.18473005294799805,
      "learning_rate": 1.0971712472737099e-06,
      "loss": 0.0108,
      "step": 2722240
    },
    {
      "epoch": 4.455038196421908,
      "grad_norm": 0.12935493886470795,
      "learning_rate": 1.0971053550601926e-06,
      "loss": 0.0118,
      "step": 2722260
    },
    {
      "epoch": 4.455070926860562,
      "grad_norm": 0.19362425804138184,
      "learning_rate": 1.0970394628466755e-06,
      "loss": 0.0056,
      "step": 2722280
    },
    {
      "epoch": 4.455103657299215,
      "grad_norm": 0.13978472352027893,
      "learning_rate": 1.0969735706331585e-06,
      "loss": 0.0095,
      "step": 2722300
    },
    {
      "epoch": 4.455136387737868,
      "grad_norm": 0.6943541169166565,
      "learning_rate": 1.0969076784196412e-06,
      "loss": 0.0082,
      "step": 2722320
    },
    {
      "epoch": 4.455169118176522,
      "grad_norm": 0.5926822423934937,
      "learning_rate": 1.0968417862061242e-06,
      "loss": 0.0131,
      "step": 2722340
    },
    {
      "epoch": 4.455201848615175,
      "grad_norm": 0.03581605479121208,
      "learning_rate": 1.096775893992607e-06,
      "loss": 0.0098,
      "step": 2722360
    },
    {
      "epoch": 4.455234579053829,
      "grad_norm": 0.07760917395353317,
      "learning_rate": 1.0967100017790899e-06,
      "loss": 0.0065,
      "step": 2722380
    },
    {
      "epoch": 4.4552673094924815,
      "grad_norm": 0.08497109264135361,
      "learning_rate": 1.0966441095655728e-06,
      "loss": 0.0088,
      "step": 2722400
    },
    {
      "epoch": 4.455300039931135,
      "grad_norm": 0.1284797191619873,
      "learning_rate": 1.0965782173520556e-06,
      "loss": 0.009,
      "step": 2722420
    },
    {
      "epoch": 4.455332770369789,
      "grad_norm": 0.03901861235499382,
      "learning_rate": 1.0965123251385385e-06,
      "loss": 0.0079,
      "step": 2722440
    },
    {
      "epoch": 4.455365500808442,
      "grad_norm": 0.284631609916687,
      "learning_rate": 1.0964464329250213e-06,
      "loss": 0.0084,
      "step": 2722460
    },
    {
      "epoch": 4.455398231247095,
      "grad_norm": 0.19786155223846436,
      "learning_rate": 1.0963805407115042e-06,
      "loss": 0.009,
      "step": 2722480
    },
    {
      "epoch": 4.455430961685749,
      "grad_norm": 0.3285730481147766,
      "learning_rate": 1.0963146484979872e-06,
      "loss": 0.0098,
      "step": 2722500
    },
    {
      "epoch": 4.455463692124402,
      "grad_norm": 0.441962331533432,
      "learning_rate": 1.09624875628447e-06,
      "loss": 0.0131,
      "step": 2722520
    },
    {
      "epoch": 4.455496422563055,
      "grad_norm": 0.1954222172498703,
      "learning_rate": 1.0961828640709529e-06,
      "loss": 0.0117,
      "step": 2722540
    },
    {
      "epoch": 4.4555291530017085,
      "grad_norm": 0.10396675765514374,
      "learning_rate": 1.0961169718574358e-06,
      "loss": 0.0112,
      "step": 2722560
    },
    {
      "epoch": 4.455561883440362,
      "grad_norm": 0.12149722874164581,
      "learning_rate": 1.0960510796439185e-06,
      "loss": 0.0104,
      "step": 2722580
    },
    {
      "epoch": 4.455594613879015,
      "grad_norm": 0.47131314873695374,
      "learning_rate": 1.0959851874304015e-06,
      "loss": 0.0078,
      "step": 2722600
    },
    {
      "epoch": 4.455627344317668,
      "grad_norm": 0.23449908196926117,
      "learning_rate": 1.0959192952168842e-06,
      "loss": 0.01,
      "step": 2722620
    },
    {
      "epoch": 4.455660074756322,
      "grad_norm": 0.3724523186683655,
      "learning_rate": 1.0958534030033672e-06,
      "loss": 0.0108,
      "step": 2722640
    },
    {
      "epoch": 4.455692805194976,
      "grad_norm": 0.09737374633550644,
      "learning_rate": 1.0957875107898501e-06,
      "loss": 0.0089,
      "step": 2722660
    },
    {
      "epoch": 4.455725535633628,
      "grad_norm": 0.24853600561618805,
      "learning_rate": 1.0957216185763329e-06,
      "loss": 0.0095,
      "step": 2722680
    },
    {
      "epoch": 4.455758266072282,
      "grad_norm": 0.43860483169555664,
      "learning_rate": 1.0956557263628158e-06,
      "loss": 0.0114,
      "step": 2722700
    },
    {
      "epoch": 4.4557909965109355,
      "grad_norm": 0.4181737005710602,
      "learning_rate": 1.0955898341492986e-06,
      "loss": 0.0065,
      "step": 2722720
    },
    {
      "epoch": 4.455823726949588,
      "grad_norm": 0.14400914311408997,
      "learning_rate": 1.0955239419357815e-06,
      "loss": 0.0081,
      "step": 2722740
    },
    {
      "epoch": 4.455856457388242,
      "grad_norm": 0.11406178772449493,
      "learning_rate": 1.0954580497222645e-06,
      "loss": 0.0085,
      "step": 2722760
    },
    {
      "epoch": 4.455889187826895,
      "grad_norm": 0.06983522325754166,
      "learning_rate": 1.0953921575087472e-06,
      "loss": 0.0077,
      "step": 2722780
    },
    {
      "epoch": 4.455921918265549,
      "grad_norm": 0.6379736065864563,
      "learning_rate": 1.0953262652952302e-06,
      "loss": 0.0106,
      "step": 2722800
    },
    {
      "epoch": 4.455954648704202,
      "grad_norm": 0.26857703924179077,
      "learning_rate": 1.095260373081713e-06,
      "loss": 0.0106,
      "step": 2722820
    },
    {
      "epoch": 4.455987379142855,
      "grad_norm": 0.20018140971660614,
      "learning_rate": 1.0951944808681959e-06,
      "loss": 0.0165,
      "step": 2722840
    },
    {
      "epoch": 4.456020109581509,
      "grad_norm": 0.5040633082389832,
      "learning_rate": 1.0951285886546788e-06,
      "loss": 0.0083,
      "step": 2722860
    },
    {
      "epoch": 4.4560528400201616,
      "grad_norm": 0.15485769510269165,
      "learning_rate": 1.0950626964411615e-06,
      "loss": 0.0088,
      "step": 2722880
    },
    {
      "epoch": 4.456085570458815,
      "grad_norm": 0.39951762557029724,
      "learning_rate": 1.0949968042276445e-06,
      "loss": 0.0072,
      "step": 2722900
    },
    {
      "epoch": 4.456118300897469,
      "grad_norm": 0.2037792056798935,
      "learning_rate": 1.0949309120141274e-06,
      "loss": 0.0106,
      "step": 2722920
    },
    {
      "epoch": 4.456151031336122,
      "grad_norm": 0.3181985914707184,
      "learning_rate": 1.0948650198006102e-06,
      "loss": 0.0098,
      "step": 2722940
    },
    {
      "epoch": 4.456183761774775,
      "grad_norm": 0.1501154750585556,
      "learning_rate": 1.0947991275870931e-06,
      "loss": 0.0086,
      "step": 2722960
    },
    {
      "epoch": 4.456216492213429,
      "grad_norm": 0.39814645051956177,
      "learning_rate": 1.0947332353735759e-06,
      "loss": 0.0079,
      "step": 2722980
    },
    {
      "epoch": 4.456249222652082,
      "grad_norm": 0.7705731987953186,
      "learning_rate": 1.0946673431600588e-06,
      "loss": 0.0133,
      "step": 2723000
    },
    {
      "epoch": 4.456281953090735,
      "grad_norm": 0.550381600856781,
      "learning_rate": 1.0946014509465418e-06,
      "loss": 0.0102,
      "step": 2723020
    },
    {
      "epoch": 4.4563146835293885,
      "grad_norm": 0.17668288946151733,
      "learning_rate": 1.0945355587330245e-06,
      "loss": 0.0073,
      "step": 2723040
    },
    {
      "epoch": 4.456347413968042,
      "grad_norm": 0.20260460674762726,
      "learning_rate": 1.0944696665195075e-06,
      "loss": 0.0069,
      "step": 2723060
    },
    {
      "epoch": 4.456380144406696,
      "grad_norm": 0.14583635330200195,
      "learning_rate": 1.0944037743059902e-06,
      "loss": 0.0094,
      "step": 2723080
    },
    {
      "epoch": 4.456412874845348,
      "grad_norm": 0.18758289515972137,
      "learning_rate": 1.0943378820924732e-06,
      "loss": 0.0106,
      "step": 2723100
    },
    {
      "epoch": 4.456445605284002,
      "grad_norm": 0.59814453125,
      "learning_rate": 1.0942719898789561e-06,
      "loss": 0.0084,
      "step": 2723120
    },
    {
      "epoch": 4.456478335722656,
      "grad_norm": 0.1684730350971222,
      "learning_rate": 1.0942060976654389e-06,
      "loss": 0.0069,
      "step": 2723140
    },
    {
      "epoch": 4.456511066161308,
      "grad_norm": 0.22343944013118744,
      "learning_rate": 1.0941402054519218e-06,
      "loss": 0.0094,
      "step": 2723160
    },
    {
      "epoch": 4.456543796599962,
      "grad_norm": 0.25505003333091736,
      "learning_rate": 1.0940743132384048e-06,
      "loss": 0.0061,
      "step": 2723180
    },
    {
      "epoch": 4.4565765270386155,
      "grad_norm": 0.265745609998703,
      "learning_rate": 1.0940084210248877e-06,
      "loss": 0.0104,
      "step": 2723200
    },
    {
      "epoch": 4.456609257477269,
      "grad_norm": 0.15177416801452637,
      "learning_rate": 1.0939425288113705e-06,
      "loss": 0.0116,
      "step": 2723220
    },
    {
      "epoch": 4.456641987915922,
      "grad_norm": 0.12339483201503754,
      "learning_rate": 1.0938766365978532e-06,
      "loss": 0.0104,
      "step": 2723240
    },
    {
      "epoch": 4.456674718354575,
      "grad_norm": 0.1571032553911209,
      "learning_rate": 1.0938107443843361e-06,
      "loss": 0.0072,
      "step": 2723260
    },
    {
      "epoch": 4.456707448793229,
      "grad_norm": 0.236354261636734,
      "learning_rate": 1.093744852170819e-06,
      "loss": 0.0121,
      "step": 2723280
    },
    {
      "epoch": 4.456740179231882,
      "grad_norm": 0.27520254254341125,
      "learning_rate": 1.093678959957302e-06,
      "loss": 0.0091,
      "step": 2723300
    },
    {
      "epoch": 4.456772909670535,
      "grad_norm": 0.4911995530128479,
      "learning_rate": 1.0936130677437848e-06,
      "loss": 0.0064,
      "step": 2723320
    },
    {
      "epoch": 4.456805640109189,
      "grad_norm": 0.2890198528766632,
      "learning_rate": 1.0935471755302675e-06,
      "loss": 0.0122,
      "step": 2723340
    },
    {
      "epoch": 4.4568383705478425,
      "grad_norm": 0.2023663967847824,
      "learning_rate": 1.0934812833167507e-06,
      "loss": 0.013,
      "step": 2723360
    },
    {
      "epoch": 4.456871100986495,
      "grad_norm": 0.1644384115934372,
      "learning_rate": 1.0934153911032334e-06,
      "loss": 0.0056,
      "step": 2723380
    },
    {
      "epoch": 4.456903831425149,
      "grad_norm": 0.35815054178237915,
      "learning_rate": 1.0933494988897164e-06,
      "loss": 0.0098,
      "step": 2723400
    },
    {
      "epoch": 4.456936561863802,
      "grad_norm": 0.4418851137161255,
      "learning_rate": 1.0932836066761991e-06,
      "loss": 0.0094,
      "step": 2723420
    },
    {
      "epoch": 4.456969292302455,
      "grad_norm": 0.3765667676925659,
      "learning_rate": 1.093217714462682e-06,
      "loss": 0.0084,
      "step": 2723440
    },
    {
      "epoch": 4.457002022741109,
      "grad_norm": 0.10734022408723831,
      "learning_rate": 1.093151822249165e-06,
      "loss": 0.0082,
      "step": 2723460
    },
    {
      "epoch": 4.457034753179762,
      "grad_norm": 0.13528400659561157,
      "learning_rate": 1.0930859300356478e-06,
      "loss": 0.0078,
      "step": 2723480
    },
    {
      "epoch": 4.457067483618415,
      "grad_norm": 0.1541460156440735,
      "learning_rate": 1.0930200378221307e-06,
      "loss": 0.0088,
      "step": 2723500
    },
    {
      "epoch": 4.457100214057069,
      "grad_norm": 0.1859157830476761,
      "learning_rate": 1.0929541456086135e-06,
      "loss": 0.0064,
      "step": 2723520
    },
    {
      "epoch": 4.457132944495722,
      "grad_norm": 0.22701101005077362,
      "learning_rate": 1.0928882533950964e-06,
      "loss": 0.0063,
      "step": 2723540
    },
    {
      "epoch": 4.457165674934376,
      "grad_norm": 0.13127148151397705,
      "learning_rate": 1.0928223611815794e-06,
      "loss": 0.0103,
      "step": 2723560
    },
    {
      "epoch": 4.4571984053730285,
      "grad_norm": 0.07043656706809998,
      "learning_rate": 1.092756468968062e-06,
      "loss": 0.0094,
      "step": 2723580
    },
    {
      "epoch": 4.457231135811682,
      "grad_norm": 0.22011347115039825,
      "learning_rate": 1.092690576754545e-06,
      "loss": 0.0107,
      "step": 2723600
    },
    {
      "epoch": 4.457263866250336,
      "grad_norm": 0.18699786067008972,
      "learning_rate": 1.092624684541028e-06,
      "loss": 0.005,
      "step": 2723620
    },
    {
      "epoch": 4.457296596688989,
      "grad_norm": 0.09837023168802261,
      "learning_rate": 1.0925587923275107e-06,
      "loss": 0.0081,
      "step": 2723640
    },
    {
      "epoch": 4.457329327127642,
      "grad_norm": 0.2886776030063629,
      "learning_rate": 1.0924929001139937e-06,
      "loss": 0.0064,
      "step": 2723660
    },
    {
      "epoch": 4.4573620575662956,
      "grad_norm": 0.12518586218357086,
      "learning_rate": 1.0924270079004764e-06,
      "loss": 0.01,
      "step": 2723680
    },
    {
      "epoch": 4.457394788004949,
      "grad_norm": 0.19583643972873688,
      "learning_rate": 1.0923611156869594e-06,
      "loss": 0.0077,
      "step": 2723700
    },
    {
      "epoch": 4.457427518443602,
      "grad_norm": 0.2149156779050827,
      "learning_rate": 1.0922952234734423e-06,
      "loss": 0.0117,
      "step": 2723720
    },
    {
      "epoch": 4.4574602488822554,
      "grad_norm": 0.19850240647792816,
      "learning_rate": 1.092229331259925e-06,
      "loss": 0.0082,
      "step": 2723740
    },
    {
      "epoch": 4.457492979320909,
      "grad_norm": 0.0977746769785881,
      "learning_rate": 1.092163439046408e-06,
      "loss": 0.0073,
      "step": 2723760
    },
    {
      "epoch": 4.457525709759562,
      "grad_norm": 0.3471772372722626,
      "learning_rate": 1.0920975468328908e-06,
      "loss": 0.0086,
      "step": 2723780
    },
    {
      "epoch": 4.457558440198215,
      "grad_norm": 0.11592422425746918,
      "learning_rate": 1.0920316546193737e-06,
      "loss": 0.0073,
      "step": 2723800
    },
    {
      "epoch": 4.457591170636869,
      "grad_norm": 0.1830911487340927,
      "learning_rate": 1.0919657624058567e-06,
      "loss": 0.0077,
      "step": 2723820
    },
    {
      "epoch": 4.4576239010755225,
      "grad_norm": 0.21081754565238953,
      "learning_rate": 1.0918998701923394e-06,
      "loss": 0.0099,
      "step": 2723840
    },
    {
      "epoch": 4.457656631514175,
      "grad_norm": 0.09986703097820282,
      "learning_rate": 1.0918339779788224e-06,
      "loss": 0.0103,
      "step": 2723860
    },
    {
      "epoch": 4.457689361952829,
      "grad_norm": 0.3502763509750366,
      "learning_rate": 1.0917680857653053e-06,
      "loss": 0.0139,
      "step": 2723880
    },
    {
      "epoch": 4.457722092391482,
      "grad_norm": 0.43388012051582336,
      "learning_rate": 1.091702193551788e-06,
      "loss": 0.0072,
      "step": 2723900
    },
    {
      "epoch": 4.457754822830136,
      "grad_norm": 0.19058814644813538,
      "learning_rate": 1.091636301338271e-06,
      "loss": 0.0144,
      "step": 2723920
    },
    {
      "epoch": 4.457787553268789,
      "grad_norm": 0.29912540316581726,
      "learning_rate": 1.0915704091247537e-06,
      "loss": 0.0139,
      "step": 2723940
    },
    {
      "epoch": 4.457820283707442,
      "grad_norm": 0.179646834731102,
      "learning_rate": 1.0915045169112367e-06,
      "loss": 0.0123,
      "step": 2723960
    },
    {
      "epoch": 4.457853014146096,
      "grad_norm": 0.19989891350269318,
      "learning_rate": 1.0914386246977196e-06,
      "loss": 0.0127,
      "step": 2723980
    },
    {
      "epoch": 4.457885744584749,
      "grad_norm": 0.35210397839546204,
      "learning_rate": 1.0913727324842024e-06,
      "loss": 0.0086,
      "step": 2724000
    },
    {
      "epoch": 4.457918475023402,
      "grad_norm": 0.19485178589820862,
      "learning_rate": 1.0913068402706853e-06,
      "loss": 0.0105,
      "step": 2724020
    },
    {
      "epoch": 4.457951205462056,
      "grad_norm": 0.07777135074138641,
      "learning_rate": 1.091240948057168e-06,
      "loss": 0.0079,
      "step": 2724040
    },
    {
      "epoch": 4.4579839359007085,
      "grad_norm": 0.07429666817188263,
      "learning_rate": 1.091175055843651e-06,
      "loss": 0.0093,
      "step": 2724060
    },
    {
      "epoch": 4.458016666339362,
      "grad_norm": 0.045799802988767624,
      "learning_rate": 1.091109163630134e-06,
      "loss": 0.0093,
      "step": 2724080
    },
    {
      "epoch": 4.458049396778016,
      "grad_norm": 0.3445689380168915,
      "learning_rate": 1.0910432714166167e-06,
      "loss": 0.0068,
      "step": 2724100
    },
    {
      "epoch": 4.458082127216669,
      "grad_norm": 0.22306224703788757,
      "learning_rate": 1.0909773792030997e-06,
      "loss": 0.0066,
      "step": 2724120
    },
    {
      "epoch": 4.458114857655322,
      "grad_norm": 0.10115017741918564,
      "learning_rate": 1.0909114869895824e-06,
      "loss": 0.0102,
      "step": 2724140
    },
    {
      "epoch": 4.458147588093976,
      "grad_norm": 0.2028343230485916,
      "learning_rate": 1.0908455947760654e-06,
      "loss": 0.0091,
      "step": 2724160
    },
    {
      "epoch": 4.458180318532629,
      "grad_norm": 0.22648420929908752,
      "learning_rate": 1.0907797025625483e-06,
      "loss": 0.0077,
      "step": 2724180
    },
    {
      "epoch": 4.458213048971282,
      "grad_norm": 0.30788499116897583,
      "learning_rate": 1.090713810349031e-06,
      "loss": 0.011,
      "step": 2724200
    },
    {
      "epoch": 4.4582457794099355,
      "grad_norm": 0.22602479159832,
      "learning_rate": 1.090647918135514e-06,
      "loss": 0.0071,
      "step": 2724220
    },
    {
      "epoch": 4.458278509848589,
      "grad_norm": 0.24396538734436035,
      "learning_rate": 1.090582025921997e-06,
      "loss": 0.01,
      "step": 2724240
    },
    {
      "epoch": 4.458311240287243,
      "grad_norm": 0.32855692505836487,
      "learning_rate": 1.0905161337084797e-06,
      "loss": 0.0089,
      "step": 2724260
    },
    {
      "epoch": 4.458343970725895,
      "grad_norm": 0.15287402272224426,
      "learning_rate": 1.0904502414949626e-06,
      "loss": 0.0088,
      "step": 2724280
    },
    {
      "epoch": 4.458376701164549,
      "grad_norm": 0.4460858404636383,
      "learning_rate": 1.0903843492814454e-06,
      "loss": 0.0074,
      "step": 2724300
    },
    {
      "epoch": 4.458409431603203,
      "grad_norm": 0.629802942276001,
      "learning_rate": 1.0903184570679283e-06,
      "loss": 0.0081,
      "step": 2724320
    },
    {
      "epoch": 4.458442162041855,
      "grad_norm": 0.33137890696525574,
      "learning_rate": 1.0902525648544113e-06,
      "loss": 0.0053,
      "step": 2724340
    },
    {
      "epoch": 4.458474892480509,
      "grad_norm": 0.5063884258270264,
      "learning_rate": 1.090186672640894e-06,
      "loss": 0.0073,
      "step": 2724360
    },
    {
      "epoch": 4.4585076229191625,
      "grad_norm": 0.11592172086238861,
      "learning_rate": 1.090120780427377e-06,
      "loss": 0.0104,
      "step": 2724380
    },
    {
      "epoch": 4.458540353357816,
      "grad_norm": 0.18880531191825867,
      "learning_rate": 1.0900548882138597e-06,
      "loss": 0.0093,
      "step": 2724400
    },
    {
      "epoch": 4.458573083796469,
      "grad_norm": 0.1390402466058731,
      "learning_rate": 1.0899889960003427e-06,
      "loss": 0.0108,
      "step": 2724420
    },
    {
      "epoch": 4.458605814235122,
      "grad_norm": 0.32520392537117004,
      "learning_rate": 1.0899231037868256e-06,
      "loss": 0.0091,
      "step": 2724440
    },
    {
      "epoch": 4.458638544673776,
      "grad_norm": 0.11585886031389236,
      "learning_rate": 1.0898572115733084e-06,
      "loss": 0.0071,
      "step": 2724460
    },
    {
      "epoch": 4.458671275112429,
      "grad_norm": 0.46309319138526917,
      "learning_rate": 1.0897913193597913e-06,
      "loss": 0.0103,
      "step": 2724480
    },
    {
      "epoch": 4.458704005551082,
      "grad_norm": 0.3384445607662201,
      "learning_rate": 1.0897254271462743e-06,
      "loss": 0.0099,
      "step": 2724500
    },
    {
      "epoch": 4.458736735989736,
      "grad_norm": 0.15052585303783417,
      "learning_rate": 1.0896595349327572e-06,
      "loss": 0.005,
      "step": 2724520
    },
    {
      "epoch": 4.4587694664283894,
      "grad_norm": 0.38214269280433655,
      "learning_rate": 1.08959364271924e-06,
      "loss": 0.0061,
      "step": 2724540
    },
    {
      "epoch": 4.458802196867042,
      "grad_norm": 0.33389681577682495,
      "learning_rate": 1.0895277505057227e-06,
      "loss": 0.0107,
      "step": 2724560
    },
    {
      "epoch": 4.458834927305696,
      "grad_norm": 0.5308367609977722,
      "learning_rate": 1.0894618582922056e-06,
      "loss": 0.0111,
      "step": 2724580
    },
    {
      "epoch": 4.458867657744349,
      "grad_norm": 0.21213676035404205,
      "learning_rate": 1.0893959660786886e-06,
      "loss": 0.0074,
      "step": 2724600
    },
    {
      "epoch": 4.458900388183002,
      "grad_norm": 0.09848575294017792,
      "learning_rate": 1.0893300738651716e-06,
      "loss": 0.0075,
      "step": 2724620
    },
    {
      "epoch": 4.458933118621656,
      "grad_norm": 0.08076409995555878,
      "learning_rate": 1.0892641816516543e-06,
      "loss": 0.0082,
      "step": 2724640
    },
    {
      "epoch": 4.458965849060309,
      "grad_norm": 0.40023958683013916,
      "learning_rate": 1.089198289438137e-06,
      "loss": 0.0108,
      "step": 2724660
    },
    {
      "epoch": 4.458998579498963,
      "grad_norm": 0.06339818239212036,
      "learning_rate": 1.0891323972246202e-06,
      "loss": 0.0111,
      "step": 2724680
    },
    {
      "epoch": 4.4590313099376155,
      "grad_norm": 0.11386791616678238,
      "learning_rate": 1.089066505011103e-06,
      "loss": 0.0066,
      "step": 2724700
    },
    {
      "epoch": 4.459064040376269,
      "grad_norm": 0.17646856606006622,
      "learning_rate": 1.0890006127975859e-06,
      "loss": 0.0179,
      "step": 2724720
    },
    {
      "epoch": 4.459096770814923,
      "grad_norm": 0.6985676884651184,
      "learning_rate": 1.0889347205840686e-06,
      "loss": 0.0098,
      "step": 2724740
    },
    {
      "epoch": 4.459129501253575,
      "grad_norm": 0.1434895396232605,
      "learning_rate": 1.0888688283705516e-06,
      "loss": 0.0079,
      "step": 2724760
    },
    {
      "epoch": 4.459162231692229,
      "grad_norm": 0.11504171043634415,
      "learning_rate": 1.0888029361570345e-06,
      "loss": 0.0052,
      "step": 2724780
    },
    {
      "epoch": 4.459194962130883,
      "grad_norm": 0.18411226570606232,
      "learning_rate": 1.0887370439435173e-06,
      "loss": 0.0097,
      "step": 2724800
    },
    {
      "epoch": 4.459227692569536,
      "grad_norm": 0.3021971881389618,
      "learning_rate": 1.0886711517300002e-06,
      "loss": 0.0067,
      "step": 2724820
    },
    {
      "epoch": 4.459260423008189,
      "grad_norm": 0.1054452434182167,
      "learning_rate": 1.088605259516483e-06,
      "loss": 0.0082,
      "step": 2724840
    },
    {
      "epoch": 4.4592931534468425,
      "grad_norm": 0.20940084755420685,
      "learning_rate": 1.088539367302966e-06,
      "loss": 0.0087,
      "step": 2724860
    },
    {
      "epoch": 4.459325883885496,
      "grad_norm": 0.18629449605941772,
      "learning_rate": 1.0884734750894489e-06,
      "loss": 0.0083,
      "step": 2724880
    },
    {
      "epoch": 4.459358614324149,
      "grad_norm": 0.7920154929161072,
      "learning_rate": 1.0884075828759316e-06,
      "loss": 0.0123,
      "step": 2724900
    },
    {
      "epoch": 4.459391344762802,
      "grad_norm": 0.08571338653564453,
      "learning_rate": 1.0883416906624146e-06,
      "loss": 0.0092,
      "step": 2724920
    },
    {
      "epoch": 4.459424075201456,
      "grad_norm": 0.1566402167081833,
      "learning_rate": 1.0882757984488975e-06,
      "loss": 0.0067,
      "step": 2724940
    },
    {
      "epoch": 4.45945680564011,
      "grad_norm": 0.28966224193573,
      "learning_rate": 1.0882099062353802e-06,
      "loss": 0.0093,
      "step": 2724960
    },
    {
      "epoch": 4.459489536078762,
      "grad_norm": 0.17743141949176788,
      "learning_rate": 1.0881440140218632e-06,
      "loss": 0.0064,
      "step": 2724980
    },
    {
      "epoch": 4.459522266517416,
      "grad_norm": 0.15002211928367615,
      "learning_rate": 1.088078121808346e-06,
      "loss": 0.0061,
      "step": 2725000
    },
    {
      "epoch": 4.4595549969560695,
      "grad_norm": 0.14296461641788483,
      "learning_rate": 1.0880122295948289e-06,
      "loss": 0.0052,
      "step": 2725020
    },
    {
      "epoch": 4.459587727394722,
      "grad_norm": 0.27671775221824646,
      "learning_rate": 1.0879463373813118e-06,
      "loss": 0.012,
      "step": 2725040
    },
    {
      "epoch": 4.459620457833376,
      "grad_norm": 0.7653356790542603,
      "learning_rate": 1.0878804451677946e-06,
      "loss": 0.0089,
      "step": 2725060
    },
    {
      "epoch": 4.459653188272029,
      "grad_norm": 0.3107094168663025,
      "learning_rate": 1.0878145529542775e-06,
      "loss": 0.0122,
      "step": 2725080
    },
    {
      "epoch": 4.459685918710683,
      "grad_norm": 0.12474311143159866,
      "learning_rate": 1.0877486607407603e-06,
      "loss": 0.0145,
      "step": 2725100
    },
    {
      "epoch": 4.459718649149336,
      "grad_norm": 0.22145487368106842,
      "learning_rate": 1.0876827685272432e-06,
      "loss": 0.0109,
      "step": 2725120
    },
    {
      "epoch": 4.459751379587989,
      "grad_norm": 0.1683414727449417,
      "learning_rate": 1.0876168763137262e-06,
      "loss": 0.0155,
      "step": 2725140
    },
    {
      "epoch": 4.459784110026643,
      "grad_norm": 0.409790962934494,
      "learning_rate": 1.087550984100209e-06,
      "loss": 0.0109,
      "step": 2725160
    },
    {
      "epoch": 4.459816840465296,
      "grad_norm": 0.1073550209403038,
      "learning_rate": 1.0874850918866919e-06,
      "loss": 0.0126,
      "step": 2725180
    },
    {
      "epoch": 4.459849570903949,
      "grad_norm": 0.12264152616262436,
      "learning_rate": 1.0874191996731748e-06,
      "loss": 0.0133,
      "step": 2725200
    },
    {
      "epoch": 4.459882301342603,
      "grad_norm": 0.1382703334093094,
      "learning_rate": 1.0873533074596576e-06,
      "loss": 0.0073,
      "step": 2725220
    },
    {
      "epoch": 4.4599150317812555,
      "grad_norm": 0.5241022706031799,
      "learning_rate": 1.0872874152461405e-06,
      "loss": 0.0142,
      "step": 2725240
    },
    {
      "epoch": 4.459947762219909,
      "grad_norm": 0.6913233399391174,
      "learning_rate": 1.0872215230326232e-06,
      "loss": 0.0125,
      "step": 2725260
    },
    {
      "epoch": 4.459980492658563,
      "grad_norm": 0.12054935842752457,
      "learning_rate": 1.0871556308191062e-06,
      "loss": 0.0135,
      "step": 2725280
    },
    {
      "epoch": 4.460013223097216,
      "grad_norm": 0.2333635836839676,
      "learning_rate": 1.0870897386055891e-06,
      "loss": 0.0102,
      "step": 2725300
    },
    {
      "epoch": 4.460045953535869,
      "grad_norm": 0.2449057549238205,
      "learning_rate": 1.0870238463920719e-06,
      "loss": 0.0093,
      "step": 2725320
    },
    {
      "epoch": 4.460078683974523,
      "grad_norm": 0.16829824447631836,
      "learning_rate": 1.0869579541785548e-06,
      "loss": 0.0098,
      "step": 2725340
    },
    {
      "epoch": 4.460111414413176,
      "grad_norm": 0.09364306181669235,
      "learning_rate": 1.0868920619650376e-06,
      "loss": 0.0098,
      "step": 2725360
    },
    {
      "epoch": 4.46014414485183,
      "grad_norm": 0.47890567779541016,
      "learning_rate": 1.0868261697515205e-06,
      "loss": 0.0078,
      "step": 2725380
    },
    {
      "epoch": 4.4601768752904825,
      "grad_norm": 0.23461678624153137,
      "learning_rate": 1.0867602775380035e-06,
      "loss": 0.0143,
      "step": 2725400
    },
    {
      "epoch": 4.460209605729136,
      "grad_norm": 0.20436148345470428,
      "learning_rate": 1.0866943853244862e-06,
      "loss": 0.0083,
      "step": 2725420
    },
    {
      "epoch": 4.46024233616779,
      "grad_norm": 0.15470924973487854,
      "learning_rate": 1.0866284931109692e-06,
      "loss": 0.007,
      "step": 2725440
    },
    {
      "epoch": 4.460275066606442,
      "grad_norm": 0.29696398973464966,
      "learning_rate": 1.086562600897452e-06,
      "loss": 0.0138,
      "step": 2725460
    },
    {
      "epoch": 4.460307797045096,
      "grad_norm": 0.07508084923028946,
      "learning_rate": 1.0864967086839349e-06,
      "loss": 0.0141,
      "step": 2725480
    },
    {
      "epoch": 4.4603405274837495,
      "grad_norm": 0.33513525128364563,
      "learning_rate": 1.0864308164704178e-06,
      "loss": 0.0087,
      "step": 2725500
    },
    {
      "epoch": 4.460373257922402,
      "grad_norm": 0.31642940640449524,
      "learning_rate": 1.0863649242569006e-06,
      "loss": 0.0102,
      "step": 2725520
    },
    {
      "epoch": 4.460405988361056,
      "grad_norm": 0.29225388169288635,
      "learning_rate": 1.0862990320433835e-06,
      "loss": 0.008,
      "step": 2725540
    },
    {
      "epoch": 4.460438718799709,
      "grad_norm": 0.2281600534915924,
      "learning_rate": 1.0862331398298665e-06,
      "loss": 0.0082,
      "step": 2725560
    },
    {
      "epoch": 4.460471449238363,
      "grad_norm": 0.4841071665287018,
      "learning_rate": 1.0861672476163492e-06,
      "loss": 0.0088,
      "step": 2725580
    },
    {
      "epoch": 4.460504179677016,
      "grad_norm": 0.1500466763973236,
      "learning_rate": 1.0861013554028322e-06,
      "loss": 0.0151,
      "step": 2725600
    },
    {
      "epoch": 4.460536910115669,
      "grad_norm": 0.19699762761592865,
      "learning_rate": 1.0860354631893149e-06,
      "loss": 0.0097,
      "step": 2725620
    },
    {
      "epoch": 4.460569640554323,
      "grad_norm": 0.5013997554779053,
      "learning_rate": 1.0859695709757978e-06,
      "loss": 0.0117,
      "step": 2725640
    },
    {
      "epoch": 4.4606023709929765,
      "grad_norm": 0.20527732372283936,
      "learning_rate": 1.0859036787622808e-06,
      "loss": 0.0091,
      "step": 2725660
    },
    {
      "epoch": 4.460635101431629,
      "grad_norm": 0.3420987129211426,
      "learning_rate": 1.0858377865487635e-06,
      "loss": 0.0077,
      "step": 2725680
    },
    {
      "epoch": 4.460667831870283,
      "grad_norm": 0.11121194809675217,
      "learning_rate": 1.0857718943352465e-06,
      "loss": 0.0094,
      "step": 2725700
    },
    {
      "epoch": 4.460700562308936,
      "grad_norm": 0.38969892263412476,
      "learning_rate": 1.0857060021217292e-06,
      "loss": 0.0129,
      "step": 2725720
    },
    {
      "epoch": 4.460733292747589,
      "grad_norm": 0.15416644513607025,
      "learning_rate": 1.0856401099082122e-06,
      "loss": 0.0085,
      "step": 2725740
    },
    {
      "epoch": 4.460766023186243,
      "grad_norm": 0.3889580965042114,
      "learning_rate": 1.0855742176946951e-06,
      "loss": 0.008,
      "step": 2725760
    },
    {
      "epoch": 4.460798753624896,
      "grad_norm": 0.16824018955230713,
      "learning_rate": 1.0855083254811779e-06,
      "loss": 0.0075,
      "step": 2725780
    },
    {
      "epoch": 4.460831484063549,
      "grad_norm": 0.3941391110420227,
      "learning_rate": 1.0854424332676608e-06,
      "loss": 0.0086,
      "step": 2725800
    },
    {
      "epoch": 4.460864214502203,
      "grad_norm": 0.24836312234401703,
      "learning_rate": 1.0853765410541438e-06,
      "loss": 0.0129,
      "step": 2725820
    },
    {
      "epoch": 4.460896944940856,
      "grad_norm": 0.10819056630134583,
      "learning_rate": 1.0853106488406267e-06,
      "loss": 0.0091,
      "step": 2725840
    },
    {
      "epoch": 4.46092967537951,
      "grad_norm": 0.23083406686782837,
      "learning_rate": 1.0852447566271095e-06,
      "loss": 0.0103,
      "step": 2725860
    },
    {
      "epoch": 4.4609624058181625,
      "grad_norm": 0.2991134524345398,
      "learning_rate": 1.0851788644135922e-06,
      "loss": 0.0121,
      "step": 2725880
    },
    {
      "epoch": 4.460995136256816,
      "grad_norm": 0.3290199637413025,
      "learning_rate": 1.0851129722000752e-06,
      "loss": 0.0082,
      "step": 2725900
    },
    {
      "epoch": 4.46102786669547,
      "grad_norm": 0.6677100658416748,
      "learning_rate": 1.085047079986558e-06,
      "loss": 0.0065,
      "step": 2725920
    },
    {
      "epoch": 4.461060597134122,
      "grad_norm": 0.1641179472208023,
      "learning_rate": 1.084981187773041e-06,
      "loss": 0.0092,
      "step": 2725940
    },
    {
      "epoch": 4.461093327572776,
      "grad_norm": 0.15761640667915344,
      "learning_rate": 1.0849152955595238e-06,
      "loss": 0.0093,
      "step": 2725960
    },
    {
      "epoch": 4.46112605801143,
      "grad_norm": 0.32017385959625244,
      "learning_rate": 1.0848494033460065e-06,
      "loss": 0.0059,
      "step": 2725980
    },
    {
      "epoch": 4.461158788450083,
      "grad_norm": 0.18546725809574127,
      "learning_rate": 1.0847835111324897e-06,
      "loss": 0.0111,
      "step": 2726000
    },
    {
      "epoch": 4.461191518888736,
      "grad_norm": 0.1426311582326889,
      "learning_rate": 1.0847176189189724e-06,
      "loss": 0.0105,
      "step": 2726020
    },
    {
      "epoch": 4.4612242493273895,
      "grad_norm": 0.07080230116844177,
      "learning_rate": 1.0846517267054554e-06,
      "loss": 0.0132,
      "step": 2726040
    },
    {
      "epoch": 4.461256979766043,
      "grad_norm": 0.0988469123840332,
      "learning_rate": 1.0845858344919381e-06,
      "loss": 0.0106,
      "step": 2726060
    },
    {
      "epoch": 4.461289710204696,
      "grad_norm": 0.4058282673358917,
      "learning_rate": 1.084519942278421e-06,
      "loss": 0.0131,
      "step": 2726080
    },
    {
      "epoch": 4.461322440643349,
      "grad_norm": 0.40541812777519226,
      "learning_rate": 1.084454050064904e-06,
      "loss": 0.0085,
      "step": 2726100
    },
    {
      "epoch": 4.461355171082003,
      "grad_norm": 0.2044178545475006,
      "learning_rate": 1.0843881578513868e-06,
      "loss": 0.0154,
      "step": 2726120
    },
    {
      "epoch": 4.461387901520657,
      "grad_norm": 0.13590264320373535,
      "learning_rate": 1.0843222656378697e-06,
      "loss": 0.0084,
      "step": 2726140
    },
    {
      "epoch": 4.461420631959309,
      "grad_norm": 0.13559338450431824,
      "learning_rate": 1.0842563734243525e-06,
      "loss": 0.015,
      "step": 2726160
    },
    {
      "epoch": 4.461453362397963,
      "grad_norm": 0.141990065574646,
      "learning_rate": 1.0841904812108354e-06,
      "loss": 0.0072,
      "step": 2726180
    },
    {
      "epoch": 4.4614860928366165,
      "grad_norm": 0.767862856388092,
      "learning_rate": 1.0841245889973184e-06,
      "loss": 0.0075,
      "step": 2726200
    },
    {
      "epoch": 4.461518823275269,
      "grad_norm": 0.27015867829322815,
      "learning_rate": 1.0840586967838011e-06,
      "loss": 0.0063,
      "step": 2726220
    },
    {
      "epoch": 4.461551553713923,
      "grad_norm": 0.23782864212989807,
      "learning_rate": 1.083992804570284e-06,
      "loss": 0.0118,
      "step": 2726240
    },
    {
      "epoch": 4.461584284152576,
      "grad_norm": 0.5705509185791016,
      "learning_rate": 1.083926912356767e-06,
      "loss": 0.0147,
      "step": 2726260
    },
    {
      "epoch": 4.46161701459123,
      "grad_norm": 0.05476357415318489,
      "learning_rate": 1.0838610201432497e-06,
      "loss": 0.0078,
      "step": 2726280
    },
    {
      "epoch": 4.461649745029883,
      "grad_norm": 0.30193910002708435,
      "learning_rate": 1.0837951279297327e-06,
      "loss": 0.0074,
      "step": 2726300
    },
    {
      "epoch": 4.461682475468536,
      "grad_norm": 0.23761935532093048,
      "learning_rate": 1.0837292357162154e-06,
      "loss": 0.0065,
      "step": 2726320
    },
    {
      "epoch": 4.46171520590719,
      "grad_norm": 0.10426720976829529,
      "learning_rate": 1.0836633435026984e-06,
      "loss": 0.0084,
      "step": 2726340
    },
    {
      "epoch": 4.4617479363458425,
      "grad_norm": 0.1160731241106987,
      "learning_rate": 1.0835974512891813e-06,
      "loss": 0.0109,
      "step": 2726360
    },
    {
      "epoch": 4.461780666784496,
      "grad_norm": 0.36437490582466125,
      "learning_rate": 1.083531559075664e-06,
      "loss": 0.0135,
      "step": 2726380
    },
    {
      "epoch": 4.46181339722315,
      "grad_norm": 0.20748785138130188,
      "learning_rate": 1.083465666862147e-06,
      "loss": 0.0096,
      "step": 2726400
    },
    {
      "epoch": 4.461846127661803,
      "grad_norm": 0.3330425024032593,
      "learning_rate": 1.0833997746486298e-06,
      "loss": 0.0088,
      "step": 2726420
    },
    {
      "epoch": 4.461878858100456,
      "grad_norm": 0.7231074571609497,
      "learning_rate": 1.0833338824351127e-06,
      "loss": 0.0059,
      "step": 2726440
    },
    {
      "epoch": 4.46191158853911,
      "grad_norm": 0.1326683759689331,
      "learning_rate": 1.0832679902215957e-06,
      "loss": 0.0105,
      "step": 2726460
    },
    {
      "epoch": 4.461944318977763,
      "grad_norm": 0.26237785816192627,
      "learning_rate": 1.0832020980080784e-06,
      "loss": 0.01,
      "step": 2726480
    },
    {
      "epoch": 4.461977049416416,
      "grad_norm": 0.16412866115570068,
      "learning_rate": 1.0831362057945614e-06,
      "loss": 0.0072,
      "step": 2726500
    },
    {
      "epoch": 4.4620097798550695,
      "grad_norm": 0.8659104704856873,
      "learning_rate": 1.0830703135810443e-06,
      "loss": 0.0079,
      "step": 2726520
    },
    {
      "epoch": 4.462042510293723,
      "grad_norm": 0.13928017020225525,
      "learning_rate": 1.083004421367527e-06,
      "loss": 0.0091,
      "step": 2726540
    },
    {
      "epoch": 4.462075240732377,
      "grad_norm": 0.08512851595878601,
      "learning_rate": 1.08293852915401e-06,
      "loss": 0.0107,
      "step": 2726560
    },
    {
      "epoch": 4.462107971171029,
      "grad_norm": 0.31516796350479126,
      "learning_rate": 1.0828726369404928e-06,
      "loss": 0.0112,
      "step": 2726580
    },
    {
      "epoch": 4.462140701609683,
      "grad_norm": 0.1635739505290985,
      "learning_rate": 1.0828067447269757e-06,
      "loss": 0.0102,
      "step": 2726600
    },
    {
      "epoch": 4.462173432048337,
      "grad_norm": 0.06741786748170853,
      "learning_rate": 1.0827408525134587e-06,
      "loss": 0.0048,
      "step": 2726620
    },
    {
      "epoch": 4.462206162486989,
      "grad_norm": 0.11675385385751724,
      "learning_rate": 1.0826749602999414e-06,
      "loss": 0.0128,
      "step": 2726640
    },
    {
      "epoch": 4.462238892925643,
      "grad_norm": 0.4441366493701935,
      "learning_rate": 1.0826090680864243e-06,
      "loss": 0.0079,
      "step": 2726660
    },
    {
      "epoch": 4.4622716233642965,
      "grad_norm": 0.11291974782943726,
      "learning_rate": 1.082543175872907e-06,
      "loss": 0.0083,
      "step": 2726680
    },
    {
      "epoch": 4.462304353802949,
      "grad_norm": 0.34072577953338623,
      "learning_rate": 1.08247728365939e-06,
      "loss": 0.0089,
      "step": 2726700
    },
    {
      "epoch": 4.462337084241603,
      "grad_norm": 0.11082575470209122,
      "learning_rate": 1.082411391445873e-06,
      "loss": 0.0117,
      "step": 2726720
    },
    {
      "epoch": 4.462369814680256,
      "grad_norm": 0.11440970748662949,
      "learning_rate": 1.0823454992323557e-06,
      "loss": 0.0134,
      "step": 2726740
    },
    {
      "epoch": 4.46240254511891,
      "grad_norm": 0.2695731520652771,
      "learning_rate": 1.0822796070188387e-06,
      "loss": 0.0091,
      "step": 2726760
    },
    {
      "epoch": 4.462435275557563,
      "grad_norm": 0.23364630341529846,
      "learning_rate": 1.0822137148053214e-06,
      "loss": 0.0063,
      "step": 2726780
    },
    {
      "epoch": 4.462468005996216,
      "grad_norm": 0.09477120637893677,
      "learning_rate": 1.0821478225918044e-06,
      "loss": 0.0115,
      "step": 2726800
    },
    {
      "epoch": 4.46250073643487,
      "grad_norm": 0.11512509733438492,
      "learning_rate": 1.0820819303782873e-06,
      "loss": 0.0086,
      "step": 2726820
    },
    {
      "epoch": 4.4625334668735235,
      "grad_norm": 0.24619190394878387,
      "learning_rate": 1.08201603816477e-06,
      "loss": 0.006,
      "step": 2726840
    },
    {
      "epoch": 4.462566197312176,
      "grad_norm": 0.3205813765525818,
      "learning_rate": 1.081950145951253e-06,
      "loss": 0.0117,
      "step": 2726860
    },
    {
      "epoch": 4.46259892775083,
      "grad_norm": 0.07957245409488678,
      "learning_rate": 1.081884253737736e-06,
      "loss": 0.0064,
      "step": 2726880
    },
    {
      "epoch": 4.462631658189483,
      "grad_norm": 0.353040874004364,
      "learning_rate": 1.0818183615242187e-06,
      "loss": 0.0065,
      "step": 2726900
    },
    {
      "epoch": 4.462664388628136,
      "grad_norm": 0.32018718123435974,
      "learning_rate": 1.0817524693107017e-06,
      "loss": 0.009,
      "step": 2726920
    },
    {
      "epoch": 4.46269711906679,
      "grad_norm": 0.12659768760204315,
      "learning_rate": 1.0816865770971844e-06,
      "loss": 0.0066,
      "step": 2726940
    },
    {
      "epoch": 4.462729849505443,
      "grad_norm": 0.5166477560997009,
      "learning_rate": 1.0816206848836673e-06,
      "loss": 0.011,
      "step": 2726960
    },
    {
      "epoch": 4.462762579944096,
      "grad_norm": 0.23096224665641785,
      "learning_rate": 1.0815547926701503e-06,
      "loss": 0.008,
      "step": 2726980
    },
    {
      "epoch": 4.46279531038275,
      "grad_norm": 0.3106488883495331,
      "learning_rate": 1.081488900456633e-06,
      "loss": 0.0137,
      "step": 2727000
    },
    {
      "epoch": 4.462828040821403,
      "grad_norm": 0.2354038655757904,
      "learning_rate": 1.081423008243116e-06,
      "loss": 0.0095,
      "step": 2727020
    },
    {
      "epoch": 4.462860771260057,
      "grad_norm": 0.15543818473815918,
      "learning_rate": 1.0813571160295987e-06,
      "loss": 0.01,
      "step": 2727040
    },
    {
      "epoch": 4.4628935016987095,
      "grad_norm": 0.1947753131389618,
      "learning_rate": 1.0812912238160817e-06,
      "loss": 0.0102,
      "step": 2727060
    },
    {
      "epoch": 4.462926232137363,
      "grad_norm": 0.21353428065776825,
      "learning_rate": 1.0812253316025646e-06,
      "loss": 0.009,
      "step": 2727080
    },
    {
      "epoch": 4.462958962576017,
      "grad_norm": 0.17224569618701935,
      "learning_rate": 1.0811594393890474e-06,
      "loss": 0.0077,
      "step": 2727100
    },
    {
      "epoch": 4.46299169301467,
      "grad_norm": 0.16300565004348755,
      "learning_rate": 1.0810935471755303e-06,
      "loss": 0.0088,
      "step": 2727120
    },
    {
      "epoch": 4.463024423453323,
      "grad_norm": 0.5693866610527039,
      "learning_rate": 1.0810276549620133e-06,
      "loss": 0.0132,
      "step": 2727140
    },
    {
      "epoch": 4.4630571538919765,
      "grad_norm": 0.051326651126146317,
      "learning_rate": 1.0809617627484962e-06,
      "loss": 0.0071,
      "step": 2727160
    },
    {
      "epoch": 4.46308988433063,
      "grad_norm": 0.43714290857315063,
      "learning_rate": 1.080895870534979e-06,
      "loss": 0.0096,
      "step": 2727180
    },
    {
      "epoch": 4.463122614769283,
      "grad_norm": 0.26148349046707153,
      "learning_rate": 1.0808299783214617e-06,
      "loss": 0.0059,
      "step": 2727200
    },
    {
      "epoch": 4.463155345207936,
      "grad_norm": 0.10869539529085159,
      "learning_rate": 1.0807640861079447e-06,
      "loss": 0.0087,
      "step": 2727220
    },
    {
      "epoch": 4.46318807564659,
      "grad_norm": 0.4116392731666565,
      "learning_rate": 1.0806981938944276e-06,
      "loss": 0.0089,
      "step": 2727240
    },
    {
      "epoch": 4.463220806085243,
      "grad_norm": 0.10462965071201324,
      "learning_rate": 1.0806323016809106e-06,
      "loss": 0.0092,
      "step": 2727260
    },
    {
      "epoch": 4.463253536523896,
      "grad_norm": 0.3116236627101898,
      "learning_rate": 1.0805664094673933e-06,
      "loss": 0.0062,
      "step": 2727280
    },
    {
      "epoch": 4.46328626696255,
      "grad_norm": 0.8668280839920044,
      "learning_rate": 1.080500517253876e-06,
      "loss": 0.0084,
      "step": 2727300
    },
    {
      "epoch": 4.4633189974012035,
      "grad_norm": 0.16435398161411285,
      "learning_rate": 1.0804346250403592e-06,
      "loss": 0.0092,
      "step": 2727320
    },
    {
      "epoch": 4.463351727839856,
      "grad_norm": 0.10014130920171738,
      "learning_rate": 1.080368732826842e-06,
      "loss": 0.01,
      "step": 2727340
    },
    {
      "epoch": 4.46338445827851,
      "grad_norm": 0.04957225173711777,
      "learning_rate": 1.080302840613325e-06,
      "loss": 0.0101,
      "step": 2727360
    },
    {
      "epoch": 4.463417188717163,
      "grad_norm": 0.295919269323349,
      "learning_rate": 1.0802369483998076e-06,
      "loss": 0.0079,
      "step": 2727380
    },
    {
      "epoch": 4.463449919155816,
      "grad_norm": 0.06454864144325256,
      "learning_rate": 1.0801710561862906e-06,
      "loss": 0.0067,
      "step": 2727400
    },
    {
      "epoch": 4.46348264959447,
      "grad_norm": 0.40343207120895386,
      "learning_rate": 1.0801051639727735e-06,
      "loss": 0.0104,
      "step": 2727420
    },
    {
      "epoch": 4.463515380033123,
      "grad_norm": 0.324407160282135,
      "learning_rate": 1.0800392717592563e-06,
      "loss": 0.0107,
      "step": 2727440
    },
    {
      "epoch": 4.463548110471777,
      "grad_norm": 0.28825780749320984,
      "learning_rate": 1.0799733795457392e-06,
      "loss": 0.0109,
      "step": 2727460
    },
    {
      "epoch": 4.46358084091043,
      "grad_norm": 0.15134088695049286,
      "learning_rate": 1.079907487332222e-06,
      "loss": 0.0072,
      "step": 2727480
    },
    {
      "epoch": 4.463613571349083,
      "grad_norm": 0.400773823261261,
      "learning_rate": 1.079841595118705e-06,
      "loss": 0.0082,
      "step": 2727500
    },
    {
      "epoch": 4.463646301787737,
      "grad_norm": 0.13153664767742157,
      "learning_rate": 1.0797757029051879e-06,
      "loss": 0.0107,
      "step": 2727520
    },
    {
      "epoch": 4.4636790322263895,
      "grad_norm": 0.2240007072687149,
      "learning_rate": 1.0797098106916706e-06,
      "loss": 0.0116,
      "step": 2727540
    },
    {
      "epoch": 4.463711762665043,
      "grad_norm": 0.2112564742565155,
      "learning_rate": 1.0796439184781536e-06,
      "loss": 0.0081,
      "step": 2727560
    },
    {
      "epoch": 4.463744493103697,
      "grad_norm": 0.17149658501148224,
      "learning_rate": 1.0795780262646365e-06,
      "loss": 0.0079,
      "step": 2727580
    },
    {
      "epoch": 4.46377722354235,
      "grad_norm": 0.25035086274147034,
      "learning_rate": 1.0795121340511193e-06,
      "loss": 0.008,
      "step": 2727600
    },
    {
      "epoch": 4.463809953981003,
      "grad_norm": 0.20791397988796234,
      "learning_rate": 1.0794462418376022e-06,
      "loss": 0.0096,
      "step": 2727620
    },
    {
      "epoch": 4.463842684419657,
      "grad_norm": 0.2619830369949341,
      "learning_rate": 1.079380349624085e-06,
      "loss": 0.0131,
      "step": 2727640
    },
    {
      "epoch": 4.46387541485831,
      "grad_norm": 0.11845724284648895,
      "learning_rate": 1.079314457410568e-06,
      "loss": 0.0094,
      "step": 2727660
    },
    {
      "epoch": 4.463908145296963,
      "grad_norm": 0.10592304170131683,
      "learning_rate": 1.0792485651970508e-06,
      "loss": 0.0076,
      "step": 2727680
    },
    {
      "epoch": 4.4639408757356165,
      "grad_norm": 0.16660813987255096,
      "learning_rate": 1.0791826729835336e-06,
      "loss": 0.0088,
      "step": 2727700
    },
    {
      "epoch": 4.46397360617427,
      "grad_norm": 0.5833410620689392,
      "learning_rate": 1.0791167807700165e-06,
      "loss": 0.0073,
      "step": 2727720
    },
    {
      "epoch": 4.464006336612924,
      "grad_norm": 0.0961773470044136,
      "learning_rate": 1.0790508885564993e-06,
      "loss": 0.0099,
      "step": 2727740
    },
    {
      "epoch": 4.464039067051576,
      "grad_norm": 0.19629479944705963,
      "learning_rate": 1.0789849963429822e-06,
      "loss": 0.0092,
      "step": 2727760
    },
    {
      "epoch": 4.46407179749023,
      "grad_norm": 0.134980246424675,
      "learning_rate": 1.0789191041294652e-06,
      "loss": 0.0057,
      "step": 2727780
    },
    {
      "epoch": 4.464104527928884,
      "grad_norm": 0.12247654050588608,
      "learning_rate": 1.078853211915948e-06,
      "loss": 0.0138,
      "step": 2727800
    },
    {
      "epoch": 4.464137258367536,
      "grad_norm": 0.28003111481666565,
      "learning_rate": 1.0787873197024309e-06,
      "loss": 0.0086,
      "step": 2727820
    },
    {
      "epoch": 4.46416998880619,
      "grad_norm": 0.14278188347816467,
      "learning_rate": 1.0787214274889138e-06,
      "loss": 0.0068,
      "step": 2727840
    },
    {
      "epoch": 4.4642027192448435,
      "grad_norm": 0.1722487211227417,
      "learning_rate": 1.0786555352753966e-06,
      "loss": 0.009,
      "step": 2727860
    },
    {
      "epoch": 4.464235449683497,
      "grad_norm": 0.15657217800617218,
      "learning_rate": 1.0785896430618795e-06,
      "loss": 0.0085,
      "step": 2727880
    },
    {
      "epoch": 4.46426818012215,
      "grad_norm": 0.1909288465976715,
      "learning_rate": 1.0785237508483623e-06,
      "loss": 0.0093,
      "step": 2727900
    },
    {
      "epoch": 4.464300910560803,
      "grad_norm": 0.17919613420963287,
      "learning_rate": 1.0784578586348452e-06,
      "loss": 0.0088,
      "step": 2727920
    },
    {
      "epoch": 4.464333640999457,
      "grad_norm": 0.12115289270877838,
      "learning_rate": 1.0783919664213282e-06,
      "loss": 0.009,
      "step": 2727940
    },
    {
      "epoch": 4.46436637143811,
      "grad_norm": 0.2562602758407593,
      "learning_rate": 1.078326074207811e-06,
      "loss": 0.006,
      "step": 2727960
    },
    {
      "epoch": 4.464399101876763,
      "grad_norm": 0.09549117833375931,
      "learning_rate": 1.0782601819942939e-06,
      "loss": 0.0066,
      "step": 2727980
    },
    {
      "epoch": 4.464431832315417,
      "grad_norm": 0.17586539685726166,
      "learning_rate": 1.0781942897807766e-06,
      "loss": 0.0114,
      "step": 2728000
    },
    {
      "epoch": 4.46446456275407,
      "grad_norm": 0.3126366436481476,
      "learning_rate": 1.0781283975672595e-06,
      "loss": 0.0092,
      "step": 2728020
    },
    {
      "epoch": 4.464497293192723,
      "grad_norm": 0.16848592460155487,
      "learning_rate": 1.0780625053537425e-06,
      "loss": 0.0088,
      "step": 2728040
    },
    {
      "epoch": 4.464530023631377,
      "grad_norm": 0.11220210045576096,
      "learning_rate": 1.0779966131402252e-06,
      "loss": 0.0095,
      "step": 2728060
    },
    {
      "epoch": 4.46456275407003,
      "grad_norm": 0.4614231586456299,
      "learning_rate": 1.0779307209267082e-06,
      "loss": 0.012,
      "step": 2728080
    },
    {
      "epoch": 4.464595484508683,
      "grad_norm": 0.2507772445678711,
      "learning_rate": 1.0778648287131911e-06,
      "loss": 0.0074,
      "step": 2728100
    },
    {
      "epoch": 4.464628214947337,
      "grad_norm": 0.11966496706008911,
      "learning_rate": 1.0777989364996739e-06,
      "loss": 0.0085,
      "step": 2728120
    },
    {
      "epoch": 4.46466094538599,
      "grad_norm": 0.12688732147216797,
      "learning_rate": 1.0777330442861568e-06,
      "loss": 0.0098,
      "step": 2728140
    },
    {
      "epoch": 4.464693675824643,
      "grad_norm": 0.06671027839183807,
      "learning_rate": 1.0776671520726396e-06,
      "loss": 0.0088,
      "step": 2728160
    },
    {
      "epoch": 4.4647264062632965,
      "grad_norm": 0.32256242632865906,
      "learning_rate": 1.0776012598591225e-06,
      "loss": 0.0047,
      "step": 2728180
    },
    {
      "epoch": 4.46475913670195,
      "grad_norm": 0.4526457190513611,
      "learning_rate": 1.0775353676456055e-06,
      "loss": 0.012,
      "step": 2728200
    },
    {
      "epoch": 4.464791867140604,
      "grad_norm": 0.14872385561466217,
      "learning_rate": 1.0774694754320882e-06,
      "loss": 0.0137,
      "step": 2728220
    },
    {
      "epoch": 4.464824597579256,
      "grad_norm": 0.16794812679290771,
      "learning_rate": 1.0774035832185712e-06,
      "loss": 0.0069,
      "step": 2728240
    },
    {
      "epoch": 4.46485732801791,
      "grad_norm": 0.5069239735603333,
      "learning_rate": 1.077337691005054e-06,
      "loss": 0.0083,
      "step": 2728260
    },
    {
      "epoch": 4.464890058456564,
      "grad_norm": 0.3164243698120117,
      "learning_rate": 1.0772717987915369e-06,
      "loss": 0.0122,
      "step": 2728280
    },
    {
      "epoch": 4.464922788895217,
      "grad_norm": 0.2645949721336365,
      "learning_rate": 1.0772059065780198e-06,
      "loss": 0.0075,
      "step": 2728300
    },
    {
      "epoch": 4.46495551933387,
      "grad_norm": 0.24081869423389435,
      "learning_rate": 1.0771400143645025e-06,
      "loss": 0.0088,
      "step": 2728320
    },
    {
      "epoch": 4.4649882497725235,
      "grad_norm": 0.34426257014274597,
      "learning_rate": 1.0770741221509855e-06,
      "loss": 0.0096,
      "step": 2728340
    },
    {
      "epoch": 4.465020980211177,
      "grad_norm": 0.12855264544487,
      "learning_rate": 1.0770082299374682e-06,
      "loss": 0.0166,
      "step": 2728360
    },
    {
      "epoch": 4.46505371064983,
      "grad_norm": 0.3257683515548706,
      "learning_rate": 1.0769423377239514e-06,
      "loss": 0.009,
      "step": 2728380
    },
    {
      "epoch": 4.465086441088483,
      "grad_norm": 0.22232213616371155,
      "learning_rate": 1.0768764455104341e-06,
      "loss": 0.008,
      "step": 2728400
    },
    {
      "epoch": 4.465119171527137,
      "grad_norm": 0.025011377409100533,
      "learning_rate": 1.0768105532969169e-06,
      "loss": 0.0146,
      "step": 2728420
    },
    {
      "epoch": 4.46515190196579,
      "grad_norm": 0.1726304292678833,
      "learning_rate": 1.0767446610833998e-06,
      "loss": 0.0066,
      "step": 2728440
    },
    {
      "epoch": 4.465184632404443,
      "grad_norm": 0.24530117213726044,
      "learning_rate": 1.0766787688698828e-06,
      "loss": 0.0085,
      "step": 2728460
    },
    {
      "epoch": 4.465217362843097,
      "grad_norm": 0.13126030564308167,
      "learning_rate": 1.0766128766563657e-06,
      "loss": 0.0082,
      "step": 2728480
    },
    {
      "epoch": 4.4652500932817505,
      "grad_norm": 0.20763054490089417,
      "learning_rate": 1.0765469844428485e-06,
      "loss": 0.0113,
      "step": 2728500
    },
    {
      "epoch": 4.465282823720403,
      "grad_norm": 0.2347032129764557,
      "learning_rate": 1.0764810922293312e-06,
      "loss": 0.0173,
      "step": 2728520
    },
    {
      "epoch": 4.465315554159057,
      "grad_norm": 0.0627707988023758,
      "learning_rate": 1.0764152000158144e-06,
      "loss": 0.0071,
      "step": 2728540
    },
    {
      "epoch": 4.46534828459771,
      "grad_norm": 0.06402541697025299,
      "learning_rate": 1.0763493078022971e-06,
      "loss": 0.0076,
      "step": 2728560
    },
    {
      "epoch": 4.465381015036364,
      "grad_norm": 0.5060054659843445,
      "learning_rate": 1.07628341558878e-06,
      "loss": 0.0094,
      "step": 2728580
    },
    {
      "epoch": 4.465413745475017,
      "grad_norm": 0.2306138575077057,
      "learning_rate": 1.0762175233752628e-06,
      "loss": 0.0106,
      "step": 2728600
    },
    {
      "epoch": 4.46544647591367,
      "grad_norm": 0.5100239515304565,
      "learning_rate": 1.0761516311617455e-06,
      "loss": 0.009,
      "step": 2728620
    },
    {
      "epoch": 4.465479206352324,
      "grad_norm": 0.21097953617572784,
      "learning_rate": 1.0760857389482287e-06,
      "loss": 0.0101,
      "step": 2728640
    },
    {
      "epoch": 4.465511936790977,
      "grad_norm": 0.2837631106376648,
      "learning_rate": 1.0760198467347114e-06,
      "loss": 0.0104,
      "step": 2728660
    },
    {
      "epoch": 4.46554466722963,
      "grad_norm": 0.49592578411102295,
      "learning_rate": 1.0759539545211944e-06,
      "loss": 0.0066,
      "step": 2728680
    },
    {
      "epoch": 4.465577397668284,
      "grad_norm": 0.22727583348751068,
      "learning_rate": 1.0758880623076771e-06,
      "loss": 0.0083,
      "step": 2728700
    },
    {
      "epoch": 4.4656101281069365,
      "grad_norm": 0.09185774624347687,
      "learning_rate": 1.07582217009416e-06,
      "loss": 0.009,
      "step": 2728720
    },
    {
      "epoch": 4.46564285854559,
      "grad_norm": 0.2530839443206787,
      "learning_rate": 1.075756277880643e-06,
      "loss": 0.0088,
      "step": 2728740
    },
    {
      "epoch": 4.465675588984244,
      "grad_norm": 0.1782544106245041,
      "learning_rate": 1.0756903856671258e-06,
      "loss": 0.0069,
      "step": 2728760
    },
    {
      "epoch": 4.465708319422897,
      "grad_norm": 0.36981233954429626,
      "learning_rate": 1.0756244934536087e-06,
      "loss": 0.0118,
      "step": 2728780
    },
    {
      "epoch": 4.46574104986155,
      "grad_norm": 0.504366397857666,
      "learning_rate": 1.0755586012400915e-06,
      "loss": 0.0071,
      "step": 2728800
    },
    {
      "epoch": 4.4657737803002036,
      "grad_norm": 0.1450776904821396,
      "learning_rate": 1.0754927090265744e-06,
      "loss": 0.014,
      "step": 2728820
    },
    {
      "epoch": 4.465806510738857,
      "grad_norm": 0.05127067491412163,
      "learning_rate": 1.0754268168130574e-06,
      "loss": 0.0087,
      "step": 2728840
    },
    {
      "epoch": 4.46583924117751,
      "grad_norm": 0.179127499461174,
      "learning_rate": 1.0753609245995401e-06,
      "loss": 0.0058,
      "step": 2728860
    },
    {
      "epoch": 4.4658719716161634,
      "grad_norm": 0.09574957937002182,
      "learning_rate": 1.075295032386023e-06,
      "loss": 0.0091,
      "step": 2728880
    },
    {
      "epoch": 4.465904702054817,
      "grad_norm": 0.19645515084266663,
      "learning_rate": 1.075229140172506e-06,
      "loss": 0.0078,
      "step": 2728900
    },
    {
      "epoch": 4.465937432493471,
      "grad_norm": 0.134914368391037,
      "learning_rate": 1.0751632479589888e-06,
      "loss": 0.0079,
      "step": 2728920
    },
    {
      "epoch": 4.465970162932123,
      "grad_norm": 0.14787784218788147,
      "learning_rate": 1.0750973557454717e-06,
      "loss": 0.0064,
      "step": 2728940
    },
    {
      "epoch": 4.466002893370777,
      "grad_norm": 0.535964846611023,
      "learning_rate": 1.0750314635319545e-06,
      "loss": 0.0092,
      "step": 2728960
    },
    {
      "epoch": 4.4660356238094305,
      "grad_norm": 0.6642784476280212,
      "learning_rate": 1.0749655713184374e-06,
      "loss": 0.0094,
      "step": 2728980
    },
    {
      "epoch": 4.466068354248083,
      "grad_norm": 0.18638162314891815,
      "learning_rate": 1.0748996791049204e-06,
      "loss": 0.0067,
      "step": 2729000
    },
    {
      "epoch": 4.466101084686737,
      "grad_norm": 0.6107308268547058,
      "learning_rate": 1.074833786891403e-06,
      "loss": 0.0082,
      "step": 2729020
    },
    {
      "epoch": 4.46613381512539,
      "grad_norm": 0.3363141119480133,
      "learning_rate": 1.074767894677886e-06,
      "loss": 0.0072,
      "step": 2729040
    },
    {
      "epoch": 4.466166545564044,
      "grad_norm": 0.33710426092147827,
      "learning_rate": 1.0747020024643688e-06,
      "loss": 0.0107,
      "step": 2729060
    },
    {
      "epoch": 4.466199276002697,
      "grad_norm": 0.21023420989513397,
      "learning_rate": 1.0746361102508517e-06,
      "loss": 0.0083,
      "step": 2729080
    },
    {
      "epoch": 4.46623200644135,
      "grad_norm": 0.25498077273368835,
      "learning_rate": 1.0745702180373347e-06,
      "loss": 0.0128,
      "step": 2729100
    },
    {
      "epoch": 4.466264736880004,
      "grad_norm": 0.25304722785949707,
      "learning_rate": 1.0745043258238174e-06,
      "loss": 0.0081,
      "step": 2729120
    },
    {
      "epoch": 4.466297467318657,
      "grad_norm": 0.4193922281265259,
      "learning_rate": 1.0744384336103004e-06,
      "loss": 0.0089,
      "step": 2729140
    },
    {
      "epoch": 4.46633019775731,
      "grad_norm": 0.4270179271697998,
      "learning_rate": 1.0743725413967833e-06,
      "loss": 0.0111,
      "step": 2729160
    },
    {
      "epoch": 4.466362928195964,
      "grad_norm": 0.6989090442657471,
      "learning_rate": 1.074306649183266e-06,
      "loss": 0.0113,
      "step": 2729180
    },
    {
      "epoch": 4.466395658634617,
      "grad_norm": 0.20666441321372986,
      "learning_rate": 1.074240756969749e-06,
      "loss": 0.016,
      "step": 2729200
    },
    {
      "epoch": 4.46642838907327,
      "grad_norm": 0.12907719612121582,
      "learning_rate": 1.0741748647562318e-06,
      "loss": 0.0069,
      "step": 2729220
    },
    {
      "epoch": 4.466461119511924,
      "grad_norm": 0.189304381608963,
      "learning_rate": 1.0741089725427147e-06,
      "loss": 0.0111,
      "step": 2729240
    },
    {
      "epoch": 4.466493849950577,
      "grad_norm": 0.1664131134748459,
      "learning_rate": 1.0740430803291977e-06,
      "loss": 0.0076,
      "step": 2729260
    },
    {
      "epoch": 4.46652658038923,
      "grad_norm": 0.24997057020664215,
      "learning_rate": 1.0739771881156804e-06,
      "loss": 0.007,
      "step": 2729280
    },
    {
      "epoch": 4.466559310827884,
      "grad_norm": 0.24811387062072754,
      "learning_rate": 1.0739112959021634e-06,
      "loss": 0.0101,
      "step": 2729300
    },
    {
      "epoch": 4.466592041266537,
      "grad_norm": 0.25149106979370117,
      "learning_rate": 1.073845403688646e-06,
      "loss": 0.0096,
      "step": 2729320
    },
    {
      "epoch": 4.466624771705191,
      "grad_norm": 0.23134154081344604,
      "learning_rate": 1.073779511475129e-06,
      "loss": 0.0074,
      "step": 2729340
    },
    {
      "epoch": 4.4666575021438435,
      "grad_norm": 0.11898908764123917,
      "learning_rate": 1.073713619261612e-06,
      "loss": 0.0064,
      "step": 2729360
    },
    {
      "epoch": 4.466690232582497,
      "grad_norm": 0.13252131640911102,
      "learning_rate": 1.0736477270480947e-06,
      "loss": 0.0108,
      "step": 2729380
    },
    {
      "epoch": 4.466722963021151,
      "grad_norm": 0.23811496794223785,
      "learning_rate": 1.0735818348345777e-06,
      "loss": 0.0098,
      "step": 2729400
    },
    {
      "epoch": 4.466755693459803,
      "grad_norm": 0.26483216881752014,
      "learning_rate": 1.0735159426210606e-06,
      "loss": 0.0079,
      "step": 2729420
    },
    {
      "epoch": 4.466788423898457,
      "grad_norm": 0.5007882714271545,
      "learning_rate": 1.0734500504075434e-06,
      "loss": 0.0123,
      "step": 2729440
    },
    {
      "epoch": 4.466821154337111,
      "grad_norm": 0.09489022940397263,
      "learning_rate": 1.0733841581940263e-06,
      "loss": 0.0091,
      "step": 2729460
    },
    {
      "epoch": 4.466853884775764,
      "grad_norm": 0.2401197850704193,
      "learning_rate": 1.073318265980509e-06,
      "loss": 0.0122,
      "step": 2729480
    },
    {
      "epoch": 4.466886615214417,
      "grad_norm": 0.21740731596946716,
      "learning_rate": 1.073252373766992e-06,
      "loss": 0.0084,
      "step": 2729500
    },
    {
      "epoch": 4.4669193456530705,
      "grad_norm": 0.1382293999195099,
      "learning_rate": 1.073186481553475e-06,
      "loss": 0.0102,
      "step": 2729520
    },
    {
      "epoch": 4.466952076091724,
      "grad_norm": 0.15678690373897552,
      "learning_rate": 1.0731205893399577e-06,
      "loss": 0.0103,
      "step": 2729540
    },
    {
      "epoch": 4.466984806530377,
      "grad_norm": 0.1958988755941391,
      "learning_rate": 1.0730546971264407e-06,
      "loss": 0.0071,
      "step": 2729560
    },
    {
      "epoch": 4.46701753696903,
      "grad_norm": 0.0615982711315155,
      "learning_rate": 1.0729888049129234e-06,
      "loss": 0.0115,
      "step": 2729580
    },
    {
      "epoch": 4.467050267407684,
      "grad_norm": 0.19540445506572723,
      "learning_rate": 1.0729229126994064e-06,
      "loss": 0.0057,
      "step": 2729600
    },
    {
      "epoch": 4.467082997846337,
      "grad_norm": 0.05733576416969299,
      "learning_rate": 1.0728570204858893e-06,
      "loss": 0.0118,
      "step": 2729620
    },
    {
      "epoch": 4.46711572828499,
      "grad_norm": 0.44322121143341064,
      "learning_rate": 1.072791128272372e-06,
      "loss": 0.0122,
      "step": 2729640
    },
    {
      "epoch": 4.467148458723644,
      "grad_norm": 0.08010096848011017,
      "learning_rate": 1.072725236058855e-06,
      "loss": 0.0089,
      "step": 2729660
    },
    {
      "epoch": 4.4671811891622974,
      "grad_norm": 0.5312322974205017,
      "learning_rate": 1.0726593438453377e-06,
      "loss": 0.0078,
      "step": 2729680
    },
    {
      "epoch": 4.46721391960095,
      "grad_norm": 0.33785948157310486,
      "learning_rate": 1.072593451631821e-06,
      "loss": 0.0081,
      "step": 2729700
    },
    {
      "epoch": 4.467246650039604,
      "grad_norm": 0.09626010805368423,
      "learning_rate": 1.0725275594183036e-06,
      "loss": 0.0165,
      "step": 2729720
    },
    {
      "epoch": 4.467279380478257,
      "grad_norm": 0.10831387341022491,
      "learning_rate": 1.0724616672047864e-06,
      "loss": 0.009,
      "step": 2729740
    },
    {
      "epoch": 4.467312110916911,
      "grad_norm": 0.11034364253282547,
      "learning_rate": 1.0723957749912693e-06,
      "loss": 0.0097,
      "step": 2729760
    },
    {
      "epoch": 4.467344841355564,
      "grad_norm": 0.6651979684829712,
      "learning_rate": 1.0723298827777523e-06,
      "loss": 0.0088,
      "step": 2729780
    },
    {
      "epoch": 4.467377571794217,
      "grad_norm": 0.052731890231370926,
      "learning_rate": 1.0722639905642352e-06,
      "loss": 0.0099,
      "step": 2729800
    },
    {
      "epoch": 4.467410302232871,
      "grad_norm": 0.0813133716583252,
      "learning_rate": 1.072198098350718e-06,
      "loss": 0.0099,
      "step": 2729820
    },
    {
      "epoch": 4.4674430326715235,
      "grad_norm": 0.24099260568618774,
      "learning_rate": 1.0721322061372007e-06,
      "loss": 0.0064,
      "step": 2729840
    },
    {
      "epoch": 4.467475763110177,
      "grad_norm": 0.12544308602809906,
      "learning_rate": 1.0720663139236839e-06,
      "loss": 0.0056,
      "step": 2729860
    },
    {
      "epoch": 4.467508493548831,
      "grad_norm": 0.4986360967159271,
      "learning_rate": 1.0720004217101666e-06,
      "loss": 0.0108,
      "step": 2729880
    },
    {
      "epoch": 4.467541223987483,
      "grad_norm": 0.30217602849006653,
      "learning_rate": 1.0719345294966496e-06,
      "loss": 0.0099,
      "step": 2729900
    },
    {
      "epoch": 4.467573954426137,
      "grad_norm": 0.29491162300109863,
      "learning_rate": 1.0718686372831323e-06,
      "loss": 0.0074,
      "step": 2729920
    },
    {
      "epoch": 4.467606684864791,
      "grad_norm": 0.1567993462085724,
      "learning_rate": 1.071802745069615e-06,
      "loss": 0.0075,
      "step": 2729940
    },
    {
      "epoch": 4.467639415303444,
      "grad_norm": 0.2056734561920166,
      "learning_rate": 1.0717368528560982e-06,
      "loss": 0.0098,
      "step": 2729960
    },
    {
      "epoch": 4.467672145742097,
      "grad_norm": 0.15495683252811432,
      "learning_rate": 1.071670960642581e-06,
      "loss": 0.0065,
      "step": 2729980
    },
    {
      "epoch": 4.4677048761807505,
      "grad_norm": 0.20295006036758423,
      "learning_rate": 1.071605068429064e-06,
      "loss": 0.0072,
      "step": 2730000
    },
    {
      "epoch": 4.467737606619404,
      "grad_norm": 0.12653784453868866,
      "learning_rate": 1.0715391762155466e-06,
      "loss": 0.0096,
      "step": 2730020
    },
    {
      "epoch": 4.467770337058058,
      "grad_norm": 0.211142435669899,
      "learning_rate": 1.0714732840020296e-06,
      "loss": 0.015,
      "step": 2730040
    },
    {
      "epoch": 4.46780306749671,
      "grad_norm": 0.21274025738239288,
      "learning_rate": 1.0714073917885125e-06,
      "loss": 0.0065,
      "step": 2730060
    },
    {
      "epoch": 4.467835797935364,
      "grad_norm": 0.13942620158195496,
      "learning_rate": 1.0713414995749953e-06,
      "loss": 0.0091,
      "step": 2730080
    },
    {
      "epoch": 4.467868528374018,
      "grad_norm": 0.1686965376138687,
      "learning_rate": 1.0712756073614782e-06,
      "loss": 0.0088,
      "step": 2730100
    },
    {
      "epoch": 4.46790125881267,
      "grad_norm": 0.573573887348175,
      "learning_rate": 1.071209715147961e-06,
      "loss": 0.0097,
      "step": 2730120
    },
    {
      "epoch": 4.467933989251324,
      "grad_norm": 0.06376133114099503,
      "learning_rate": 1.071143822934444e-06,
      "loss": 0.0096,
      "step": 2730140
    },
    {
      "epoch": 4.4679667196899775,
      "grad_norm": 0.6675567030906677,
      "learning_rate": 1.0710779307209269e-06,
      "loss": 0.0194,
      "step": 2730160
    },
    {
      "epoch": 4.46799945012863,
      "grad_norm": 0.10284282267093658,
      "learning_rate": 1.0710120385074096e-06,
      "loss": 0.011,
      "step": 2730180
    },
    {
      "epoch": 4.468032180567284,
      "grad_norm": 0.09461339563131332,
      "learning_rate": 1.0709461462938926e-06,
      "loss": 0.0085,
      "step": 2730200
    },
    {
      "epoch": 4.468064911005937,
      "grad_norm": 0.2510150074958801,
      "learning_rate": 1.0708802540803755e-06,
      "loss": 0.0141,
      "step": 2730220
    },
    {
      "epoch": 4.468097641444591,
      "grad_norm": 0.27394554018974304,
      "learning_rate": 1.0708143618668583e-06,
      "loss": 0.012,
      "step": 2730240
    },
    {
      "epoch": 4.468130371883244,
      "grad_norm": 0.11851586401462555,
      "learning_rate": 1.0707484696533412e-06,
      "loss": 0.0098,
      "step": 2730260
    },
    {
      "epoch": 4.468163102321897,
      "grad_norm": 0.08238229155540466,
      "learning_rate": 1.070682577439824e-06,
      "loss": 0.0115,
      "step": 2730280
    },
    {
      "epoch": 4.468195832760551,
      "grad_norm": 0.08960111439228058,
      "learning_rate": 1.070616685226307e-06,
      "loss": 0.0096,
      "step": 2730300
    },
    {
      "epoch": 4.468228563199204,
      "grad_norm": 0.44425737857818604,
      "learning_rate": 1.0705507930127899e-06,
      "loss": 0.0123,
      "step": 2730320
    },
    {
      "epoch": 4.468261293637857,
      "grad_norm": 0.24876509606838226,
      "learning_rate": 1.0704849007992726e-06,
      "loss": 0.012,
      "step": 2730340
    },
    {
      "epoch": 4.468294024076511,
      "grad_norm": 0.3541799485683441,
      "learning_rate": 1.0704190085857556e-06,
      "loss": 0.0107,
      "step": 2730360
    },
    {
      "epoch": 4.468326754515164,
      "grad_norm": 0.23364531993865967,
      "learning_rate": 1.0703531163722383e-06,
      "loss": 0.0069,
      "step": 2730380
    },
    {
      "epoch": 4.468359484953817,
      "grad_norm": 0.5008835196495056,
      "learning_rate": 1.0702872241587212e-06,
      "loss": 0.0079,
      "step": 2730400
    },
    {
      "epoch": 4.468392215392471,
      "grad_norm": 0.12050376832485199,
      "learning_rate": 1.0702213319452042e-06,
      "loss": 0.008,
      "step": 2730420
    },
    {
      "epoch": 4.468424945831124,
      "grad_norm": 0.19555319845676422,
      "learning_rate": 1.070155439731687e-06,
      "loss": 0.0101,
      "step": 2730440
    },
    {
      "epoch": 4.468457676269777,
      "grad_norm": 0.18970410525798798,
      "learning_rate": 1.0700895475181699e-06,
      "loss": 0.0068,
      "step": 2730460
    },
    {
      "epoch": 4.468490406708431,
      "grad_norm": 0.3597351908683777,
      "learning_rate": 1.0700236553046528e-06,
      "loss": 0.0108,
      "step": 2730480
    },
    {
      "epoch": 4.468523137147084,
      "grad_norm": 0.2097158282995224,
      "learning_rate": 1.0699577630911356e-06,
      "loss": 0.0069,
      "step": 2730500
    },
    {
      "epoch": 4.468555867585738,
      "grad_norm": 0.3139147460460663,
      "learning_rate": 1.0698918708776185e-06,
      "loss": 0.0102,
      "step": 2730520
    },
    {
      "epoch": 4.4685885980243905,
      "grad_norm": 0.12053675204515457,
      "learning_rate": 1.0698259786641013e-06,
      "loss": 0.0109,
      "step": 2730540
    },
    {
      "epoch": 4.468621328463044,
      "grad_norm": 0.2402937114238739,
      "learning_rate": 1.0697600864505842e-06,
      "loss": 0.0102,
      "step": 2730560
    },
    {
      "epoch": 4.468654058901698,
      "grad_norm": 0.1820792406797409,
      "learning_rate": 1.0696941942370672e-06,
      "loss": 0.0081,
      "step": 2730580
    },
    {
      "epoch": 4.46868678934035,
      "grad_norm": 0.09394075721502304,
      "learning_rate": 1.06962830202355e-06,
      "loss": 0.0091,
      "step": 2730600
    },
    {
      "epoch": 4.468719519779004,
      "grad_norm": 0.4006786644458771,
      "learning_rate": 1.0695624098100329e-06,
      "loss": 0.0128,
      "step": 2730620
    },
    {
      "epoch": 4.4687522502176575,
      "grad_norm": 0.0605122409760952,
      "learning_rate": 1.0694965175965156e-06,
      "loss": 0.0047,
      "step": 2730640
    },
    {
      "epoch": 4.468784980656311,
      "grad_norm": 0.12147551029920578,
      "learning_rate": 1.0694306253829986e-06,
      "loss": 0.0104,
      "step": 2730660
    },
    {
      "epoch": 4.468817711094964,
      "grad_norm": 0.17618101835250854,
      "learning_rate": 1.0693647331694815e-06,
      "loss": 0.0113,
      "step": 2730680
    },
    {
      "epoch": 4.468850441533617,
      "grad_norm": 0.2216302454471588,
      "learning_rate": 1.0692988409559642e-06,
      "loss": 0.008,
      "step": 2730700
    },
    {
      "epoch": 4.468883171972271,
      "grad_norm": 0.34807705879211426,
      "learning_rate": 1.0692329487424472e-06,
      "loss": 0.0088,
      "step": 2730720
    },
    {
      "epoch": 4.468915902410924,
      "grad_norm": 0.14910274744033813,
      "learning_rate": 1.0691670565289301e-06,
      "loss": 0.0099,
      "step": 2730740
    },
    {
      "epoch": 4.468948632849577,
      "grad_norm": 0.148690864443779,
      "learning_rate": 1.0691011643154129e-06,
      "loss": 0.0131,
      "step": 2730760
    },
    {
      "epoch": 4.468981363288231,
      "grad_norm": 0.22418081760406494,
      "learning_rate": 1.0690352721018958e-06,
      "loss": 0.008,
      "step": 2730780
    },
    {
      "epoch": 4.4690140937268845,
      "grad_norm": 0.28720852732658386,
      "learning_rate": 1.0689693798883786e-06,
      "loss": 0.0111,
      "step": 2730800
    },
    {
      "epoch": 4.469046824165537,
      "grad_norm": 0.499999076128006,
      "learning_rate": 1.0689034876748615e-06,
      "loss": 0.0068,
      "step": 2730820
    },
    {
      "epoch": 4.469079554604191,
      "grad_norm": 0.3600617051124573,
      "learning_rate": 1.0688375954613445e-06,
      "loss": 0.0114,
      "step": 2730840
    },
    {
      "epoch": 4.469112285042844,
      "grad_norm": 0.12232235074043274,
      "learning_rate": 1.0687717032478272e-06,
      "loss": 0.0122,
      "step": 2730860
    },
    {
      "epoch": 4.469145015481497,
      "grad_norm": 0.232757568359375,
      "learning_rate": 1.0687058110343102e-06,
      "loss": 0.0085,
      "step": 2730880
    },
    {
      "epoch": 4.469177745920151,
      "grad_norm": 0.14867082238197327,
      "learning_rate": 1.068639918820793e-06,
      "loss": 0.0139,
      "step": 2730900
    },
    {
      "epoch": 4.469210476358804,
      "grad_norm": 0.4104264974594116,
      "learning_rate": 1.0685740266072759e-06,
      "loss": 0.0051,
      "step": 2730920
    },
    {
      "epoch": 4.469243206797458,
      "grad_norm": 0.34491395950317383,
      "learning_rate": 1.0685081343937588e-06,
      "loss": 0.0063,
      "step": 2730940
    },
    {
      "epoch": 4.469275937236111,
      "grad_norm": 0.21159549057483673,
      "learning_rate": 1.0684422421802416e-06,
      "loss": 0.0085,
      "step": 2730960
    },
    {
      "epoch": 4.469308667674764,
      "grad_norm": 0.14666788280010223,
      "learning_rate": 1.0683763499667245e-06,
      "loss": 0.0066,
      "step": 2730980
    },
    {
      "epoch": 4.469341398113418,
      "grad_norm": 0.09401142597198486,
      "learning_rate": 1.0683104577532072e-06,
      "loss": 0.0088,
      "step": 2731000
    },
    {
      "epoch": 4.4693741285520705,
      "grad_norm": 0.7169638872146606,
      "learning_rate": 1.0682445655396904e-06,
      "loss": 0.0097,
      "step": 2731020
    },
    {
      "epoch": 4.469406858990724,
      "grad_norm": 0.4145790636539459,
      "learning_rate": 1.0681786733261731e-06,
      "loss": 0.0096,
      "step": 2731040
    },
    {
      "epoch": 4.469439589429378,
      "grad_norm": 0.3357074558734894,
      "learning_rate": 1.0681127811126559e-06,
      "loss": 0.0141,
      "step": 2731060
    },
    {
      "epoch": 4.469472319868031,
      "grad_norm": 0.054379723966121674,
      "learning_rate": 1.0680468888991388e-06,
      "loss": 0.0155,
      "step": 2731080
    },
    {
      "epoch": 4.469505050306684,
      "grad_norm": 0.33076542615890503,
      "learning_rate": 1.0679809966856218e-06,
      "loss": 0.0081,
      "step": 2731100
    },
    {
      "epoch": 4.469537780745338,
      "grad_norm": 0.13588321208953857,
      "learning_rate": 1.0679151044721047e-06,
      "loss": 0.014,
      "step": 2731120
    },
    {
      "epoch": 4.469570511183991,
      "grad_norm": 0.15976949036121368,
      "learning_rate": 1.0678492122585875e-06,
      "loss": 0.007,
      "step": 2731140
    },
    {
      "epoch": 4.469603241622644,
      "grad_norm": 0.3190488815307617,
      "learning_rate": 1.0677833200450702e-06,
      "loss": 0.0092,
      "step": 2731160
    },
    {
      "epoch": 4.4696359720612975,
      "grad_norm": 0.1376868635416031,
      "learning_rate": 1.0677174278315534e-06,
      "loss": 0.0071,
      "step": 2731180
    },
    {
      "epoch": 4.469668702499951,
      "grad_norm": 0.46767204999923706,
      "learning_rate": 1.0676515356180361e-06,
      "loss": 0.0074,
      "step": 2731200
    },
    {
      "epoch": 4.469701432938605,
      "grad_norm": 0.36411118507385254,
      "learning_rate": 1.067585643404519e-06,
      "loss": 0.0086,
      "step": 2731220
    },
    {
      "epoch": 4.469734163377257,
      "grad_norm": 0.16270938515663147,
      "learning_rate": 1.0675197511910018e-06,
      "loss": 0.007,
      "step": 2731240
    },
    {
      "epoch": 4.469766893815911,
      "grad_norm": 0.14584431052207947,
      "learning_rate": 1.0674538589774846e-06,
      "loss": 0.0096,
      "step": 2731260
    },
    {
      "epoch": 4.469799624254565,
      "grad_norm": 0.4669395387172699,
      "learning_rate": 1.0673879667639677e-06,
      "loss": 0.0081,
      "step": 2731280
    },
    {
      "epoch": 4.469832354693217,
      "grad_norm": 0.08311239629983902,
      "learning_rate": 1.0673220745504505e-06,
      "loss": 0.0148,
      "step": 2731300
    },
    {
      "epoch": 4.469865085131871,
      "grad_norm": 0.3845486342906952,
      "learning_rate": 1.0672561823369334e-06,
      "loss": 0.0056,
      "step": 2731320
    },
    {
      "epoch": 4.4698978155705245,
      "grad_norm": 0.20332904160022736,
      "learning_rate": 1.0671902901234162e-06,
      "loss": 0.0085,
      "step": 2731340
    },
    {
      "epoch": 4.469930546009177,
      "grad_norm": 0.11698983609676361,
      "learning_rate": 1.067124397909899e-06,
      "loss": 0.0088,
      "step": 2731360
    },
    {
      "epoch": 4.469963276447831,
      "grad_norm": 0.09579025954008102,
      "learning_rate": 1.067058505696382e-06,
      "loss": 0.0066,
      "step": 2731380
    },
    {
      "epoch": 4.469996006886484,
      "grad_norm": 0.0964546725153923,
      "learning_rate": 1.0669926134828648e-06,
      "loss": 0.0057,
      "step": 2731400
    },
    {
      "epoch": 4.470028737325138,
      "grad_norm": 0.23814791440963745,
      "learning_rate": 1.0669267212693477e-06,
      "loss": 0.0145,
      "step": 2731420
    },
    {
      "epoch": 4.470061467763791,
      "grad_norm": 0.20834125578403473,
      "learning_rate": 1.0668608290558305e-06,
      "loss": 0.0077,
      "step": 2731440
    },
    {
      "epoch": 4.470094198202444,
      "grad_norm": 0.13792695105075836,
      "learning_rate": 1.0667949368423134e-06,
      "loss": 0.0123,
      "step": 2731460
    },
    {
      "epoch": 4.470126928641098,
      "grad_norm": 0.14575329422950745,
      "learning_rate": 1.0667290446287964e-06,
      "loss": 0.0084,
      "step": 2731480
    },
    {
      "epoch": 4.470159659079751,
      "grad_norm": 0.3402646780014038,
      "learning_rate": 1.0666631524152791e-06,
      "loss": 0.0093,
      "step": 2731500
    },
    {
      "epoch": 4.470192389518404,
      "grad_norm": 0.22424300014972687,
      "learning_rate": 1.066597260201762e-06,
      "loss": 0.0084,
      "step": 2731520
    },
    {
      "epoch": 4.470225119957058,
      "grad_norm": 0.04011143371462822,
      "learning_rate": 1.066531367988245e-06,
      "loss": 0.0104,
      "step": 2731540
    },
    {
      "epoch": 4.470257850395711,
      "grad_norm": 0.5314168930053711,
      "learning_rate": 1.0664654757747278e-06,
      "loss": 0.01,
      "step": 2731560
    },
    {
      "epoch": 4.470290580834364,
      "grad_norm": 0.08271382749080658,
      "learning_rate": 1.0663995835612107e-06,
      "loss": 0.0124,
      "step": 2731580
    },
    {
      "epoch": 4.470323311273018,
      "grad_norm": 0.5669974684715271,
      "learning_rate": 1.0663336913476935e-06,
      "loss": 0.0104,
      "step": 2731600
    },
    {
      "epoch": 4.470356041711671,
      "grad_norm": 0.18067117035388947,
      "learning_rate": 1.0662677991341764e-06,
      "loss": 0.0087,
      "step": 2731620
    },
    {
      "epoch": 4.470388772150324,
      "grad_norm": 0.07350385934114456,
      "learning_rate": 1.0662019069206594e-06,
      "loss": 0.0126,
      "step": 2731640
    },
    {
      "epoch": 4.4704215025889775,
      "grad_norm": 0.20827879011631012,
      "learning_rate": 1.066136014707142e-06,
      "loss": 0.0111,
      "step": 2731660
    },
    {
      "epoch": 4.470454233027631,
      "grad_norm": 0.5361643433570862,
      "learning_rate": 1.066070122493625e-06,
      "loss": 0.0083,
      "step": 2731680
    },
    {
      "epoch": 4.470486963466285,
      "grad_norm": 0.13764053583145142,
      "learning_rate": 1.0660042302801078e-06,
      "loss": 0.0086,
      "step": 2731700
    },
    {
      "epoch": 4.470519693904937,
      "grad_norm": 0.28105732798576355,
      "learning_rate": 1.0659383380665907e-06,
      "loss": 0.0081,
      "step": 2731720
    },
    {
      "epoch": 4.470552424343591,
      "grad_norm": 0.1697646826505661,
      "learning_rate": 1.0658724458530737e-06,
      "loss": 0.0104,
      "step": 2731740
    },
    {
      "epoch": 4.470585154782245,
      "grad_norm": 0.08823767304420471,
      "learning_rate": 1.0658065536395564e-06,
      "loss": 0.0117,
      "step": 2731760
    },
    {
      "epoch": 4.470617885220898,
      "grad_norm": 0.13044393062591553,
      "learning_rate": 1.0657406614260394e-06,
      "loss": 0.0092,
      "step": 2731780
    },
    {
      "epoch": 4.470650615659551,
      "grad_norm": 0.1894761621952057,
      "learning_rate": 1.0656747692125223e-06,
      "loss": 0.008,
      "step": 2731800
    },
    {
      "epoch": 4.4706833460982045,
      "grad_norm": 0.21935665607452393,
      "learning_rate": 1.065608876999005e-06,
      "loss": 0.0117,
      "step": 2731820
    },
    {
      "epoch": 4.470716076536858,
      "grad_norm": 0.0717075914144516,
      "learning_rate": 1.065542984785488e-06,
      "loss": 0.0092,
      "step": 2731840
    },
    {
      "epoch": 4.470748806975511,
      "grad_norm": 0.41495856642723083,
      "learning_rate": 1.0654770925719708e-06,
      "loss": 0.0111,
      "step": 2731860
    },
    {
      "epoch": 4.470781537414164,
      "grad_norm": 0.10964392870664597,
      "learning_rate": 1.0654112003584537e-06,
      "loss": 0.0103,
      "step": 2731880
    },
    {
      "epoch": 4.470814267852818,
      "grad_norm": 0.5555100440979004,
      "learning_rate": 1.0653453081449367e-06,
      "loss": 0.0118,
      "step": 2731900
    },
    {
      "epoch": 4.470846998291471,
      "grad_norm": 0.3755062520503998,
      "learning_rate": 1.0652794159314194e-06,
      "loss": 0.0111,
      "step": 2731920
    },
    {
      "epoch": 4.470879728730124,
      "grad_norm": 0.19546565413475037,
      "learning_rate": 1.0652135237179024e-06,
      "loss": 0.0186,
      "step": 2731940
    },
    {
      "epoch": 4.470912459168778,
      "grad_norm": 0.1289505958557129,
      "learning_rate": 1.0651476315043851e-06,
      "loss": 0.0108,
      "step": 2731960
    },
    {
      "epoch": 4.4709451896074315,
      "grad_norm": 0.8102694153785706,
      "learning_rate": 1.065081739290868e-06,
      "loss": 0.0119,
      "step": 2731980
    },
    {
      "epoch": 4.470977920046084,
      "grad_norm": 0.09175857156515121,
      "learning_rate": 1.065015847077351e-06,
      "loss": 0.0083,
      "step": 2732000
    },
    {
      "epoch": 4.471010650484738,
      "grad_norm": 0.19510161876678467,
      "learning_rate": 1.0649499548638337e-06,
      "loss": 0.0068,
      "step": 2732020
    },
    {
      "epoch": 4.471043380923391,
      "grad_norm": 0.2575889825820923,
      "learning_rate": 1.0648840626503167e-06,
      "loss": 0.0086,
      "step": 2732040
    },
    {
      "epoch": 4.471076111362044,
      "grad_norm": 0.4640182554721832,
      "learning_rate": 1.0648181704367997e-06,
      "loss": 0.0067,
      "step": 2732060
    },
    {
      "epoch": 4.471108841800698,
      "grad_norm": 0.19620944559574127,
      "learning_rate": 1.0647522782232824e-06,
      "loss": 0.0076,
      "step": 2732080
    },
    {
      "epoch": 4.471141572239351,
      "grad_norm": 0.32273972034454346,
      "learning_rate": 1.0646863860097653e-06,
      "loss": 0.009,
      "step": 2732100
    },
    {
      "epoch": 4.471174302678005,
      "grad_norm": 0.2632173001766205,
      "learning_rate": 1.064620493796248e-06,
      "loss": 0.0063,
      "step": 2732120
    },
    {
      "epoch": 4.471207033116658,
      "grad_norm": 0.08076146245002747,
      "learning_rate": 1.064554601582731e-06,
      "loss": 0.0056,
      "step": 2732140
    },
    {
      "epoch": 4.471239763555311,
      "grad_norm": 0.41850265860557556,
      "learning_rate": 1.064488709369214e-06,
      "loss": 0.0117,
      "step": 2732160
    },
    {
      "epoch": 4.471272493993965,
      "grad_norm": 0.3703158497810364,
      "learning_rate": 1.0644228171556967e-06,
      "loss": 0.0105,
      "step": 2732180
    },
    {
      "epoch": 4.4713052244326175,
      "grad_norm": 0.2545933723449707,
      "learning_rate": 1.0643569249421797e-06,
      "loss": 0.008,
      "step": 2732200
    },
    {
      "epoch": 4.471337954871271,
      "grad_norm": 0.2902764678001404,
      "learning_rate": 1.0642910327286624e-06,
      "loss": 0.0105,
      "step": 2732220
    },
    {
      "epoch": 4.471370685309925,
      "grad_norm": 0.3887861967086792,
      "learning_rate": 1.0642251405151454e-06,
      "loss": 0.0075,
      "step": 2732240
    },
    {
      "epoch": 4.471403415748578,
      "grad_norm": 0.046124204993247986,
      "learning_rate": 1.0641592483016283e-06,
      "loss": 0.0087,
      "step": 2732260
    },
    {
      "epoch": 4.471436146187231,
      "grad_norm": 0.2212468981742859,
      "learning_rate": 1.064093356088111e-06,
      "loss": 0.0113,
      "step": 2732280
    },
    {
      "epoch": 4.4714688766258845,
      "grad_norm": 0.23912395536899567,
      "learning_rate": 1.064027463874594e-06,
      "loss": 0.0074,
      "step": 2732300
    },
    {
      "epoch": 4.471501607064538,
      "grad_norm": 0.2366466224193573,
      "learning_rate": 1.0639615716610768e-06,
      "loss": 0.0104,
      "step": 2732320
    },
    {
      "epoch": 4.471534337503191,
      "grad_norm": 0.15642336010932922,
      "learning_rate": 1.06389567944756e-06,
      "loss": 0.0084,
      "step": 2732340
    },
    {
      "epoch": 4.471567067941844,
      "grad_norm": 0.21136502921581268,
      "learning_rate": 1.0638297872340427e-06,
      "loss": 0.0072,
      "step": 2732360
    },
    {
      "epoch": 4.471599798380498,
      "grad_norm": 0.5085830688476562,
      "learning_rate": 1.0637638950205254e-06,
      "loss": 0.0115,
      "step": 2732380
    },
    {
      "epoch": 4.471632528819152,
      "grad_norm": 0.1282346099615097,
      "learning_rate": 1.0636980028070083e-06,
      "loss": 0.0083,
      "step": 2732400
    },
    {
      "epoch": 4.471665259257804,
      "grad_norm": 0.1321125030517578,
      "learning_rate": 1.0636321105934913e-06,
      "loss": 0.0058,
      "step": 2732420
    },
    {
      "epoch": 4.471697989696458,
      "grad_norm": 0.46835458278656006,
      "learning_rate": 1.0635662183799742e-06,
      "loss": 0.0111,
      "step": 2732440
    },
    {
      "epoch": 4.4717307201351115,
      "grad_norm": 0.06951884925365448,
      "learning_rate": 1.063500326166457e-06,
      "loss": 0.0106,
      "step": 2732460
    },
    {
      "epoch": 4.471763450573764,
      "grad_norm": 0.22524091601371765,
      "learning_rate": 1.0634344339529397e-06,
      "loss": 0.0082,
      "step": 2732480
    },
    {
      "epoch": 4.471796181012418,
      "grad_norm": 0.29627418518066406,
      "learning_rate": 1.0633685417394229e-06,
      "loss": 0.0105,
      "step": 2732500
    },
    {
      "epoch": 4.471828911451071,
      "grad_norm": 0.15758201479911804,
      "learning_rate": 1.0633026495259056e-06,
      "loss": 0.0084,
      "step": 2732520
    },
    {
      "epoch": 4.471861641889725,
      "grad_norm": 0.1479244977235794,
      "learning_rate": 1.0632367573123886e-06,
      "loss": 0.0097,
      "step": 2732540
    },
    {
      "epoch": 4.471894372328378,
      "grad_norm": 0.5431399345397949,
      "learning_rate": 1.0631708650988713e-06,
      "loss": 0.0128,
      "step": 2732560
    },
    {
      "epoch": 4.471927102767031,
      "grad_norm": 0.11192021518945694,
      "learning_rate": 1.063104972885354e-06,
      "loss": 0.0075,
      "step": 2732580
    },
    {
      "epoch": 4.471959833205685,
      "grad_norm": 0.2681872248649597,
      "learning_rate": 1.0630390806718372e-06,
      "loss": 0.0111,
      "step": 2732600
    },
    {
      "epoch": 4.471992563644338,
      "grad_norm": 0.31616517901420593,
      "learning_rate": 1.06297318845832e-06,
      "loss": 0.0119,
      "step": 2732620
    },
    {
      "epoch": 4.472025294082991,
      "grad_norm": 0.11110453307628632,
      "learning_rate": 1.062907296244803e-06,
      "loss": 0.0103,
      "step": 2732640
    },
    {
      "epoch": 4.472058024521645,
      "grad_norm": 0.2173832207918167,
      "learning_rate": 1.0628414040312857e-06,
      "loss": 0.0093,
      "step": 2732660
    },
    {
      "epoch": 4.472090754960298,
      "grad_norm": 0.3073577582836151,
      "learning_rate": 1.0627755118177686e-06,
      "loss": 0.0091,
      "step": 2732680
    },
    {
      "epoch": 4.472123485398951,
      "grad_norm": 0.16190578043460846,
      "learning_rate": 1.0627096196042516e-06,
      "loss": 0.0059,
      "step": 2732700
    },
    {
      "epoch": 4.472156215837605,
      "grad_norm": 0.36763909459114075,
      "learning_rate": 1.0626437273907343e-06,
      "loss": 0.0066,
      "step": 2732720
    },
    {
      "epoch": 4.472188946276258,
      "grad_norm": 0.1273951232433319,
      "learning_rate": 1.0625778351772173e-06,
      "loss": 0.0094,
      "step": 2732740
    },
    {
      "epoch": 4.472221676714911,
      "grad_norm": 0.3657326102256775,
      "learning_rate": 1.0625119429637e-06,
      "loss": 0.0142,
      "step": 2732760
    },
    {
      "epoch": 4.472254407153565,
      "grad_norm": 0.3339027166366577,
      "learning_rate": 1.062446050750183e-06,
      "loss": 0.0088,
      "step": 2732780
    },
    {
      "epoch": 4.472287137592218,
      "grad_norm": 0.20281988382339478,
      "learning_rate": 1.0623801585366659e-06,
      "loss": 0.0096,
      "step": 2732800
    },
    {
      "epoch": 4.472319868030871,
      "grad_norm": 0.5687304735183716,
      "learning_rate": 1.0623142663231486e-06,
      "loss": 0.0071,
      "step": 2732820
    },
    {
      "epoch": 4.4723525984695245,
      "grad_norm": 0.139723539352417,
      "learning_rate": 1.0622483741096316e-06,
      "loss": 0.009,
      "step": 2732840
    },
    {
      "epoch": 4.472385328908178,
      "grad_norm": 0.7717164158821106,
      "learning_rate": 1.0621824818961145e-06,
      "loss": 0.0092,
      "step": 2732860
    },
    {
      "epoch": 4.472418059346832,
      "grad_norm": 0.45427533984184265,
      "learning_rate": 1.0621165896825973e-06,
      "loss": 0.0098,
      "step": 2732880
    },
    {
      "epoch": 4.472450789785484,
      "grad_norm": 0.10717371851205826,
      "learning_rate": 1.0620506974690802e-06,
      "loss": 0.0078,
      "step": 2732900
    },
    {
      "epoch": 4.472483520224138,
      "grad_norm": 0.9075036644935608,
      "learning_rate": 1.061984805255563e-06,
      "loss": 0.0124,
      "step": 2732920
    },
    {
      "epoch": 4.472516250662792,
      "grad_norm": 0.25725501775741577,
      "learning_rate": 1.061918913042046e-06,
      "loss": 0.0062,
      "step": 2732940
    },
    {
      "epoch": 4.472548981101445,
      "grad_norm": 0.1703554093837738,
      "learning_rate": 1.0618530208285289e-06,
      "loss": 0.0058,
      "step": 2732960
    },
    {
      "epoch": 4.472581711540098,
      "grad_norm": 0.16808831691741943,
      "learning_rate": 1.0617871286150116e-06,
      "loss": 0.0082,
      "step": 2732980
    },
    {
      "epoch": 4.4726144419787515,
      "grad_norm": 0.515021562576294,
      "learning_rate": 1.0617212364014946e-06,
      "loss": 0.0074,
      "step": 2733000
    },
    {
      "epoch": 4.472647172417405,
      "grad_norm": 0.20655982196331024,
      "learning_rate": 1.0616553441879773e-06,
      "loss": 0.0083,
      "step": 2733020
    },
    {
      "epoch": 4.472679902856058,
      "grad_norm": 0.20202766358852386,
      "learning_rate": 1.0615894519744603e-06,
      "loss": 0.0123,
      "step": 2733040
    },
    {
      "epoch": 4.472712633294711,
      "grad_norm": 0.15358871221542358,
      "learning_rate": 1.0615235597609432e-06,
      "loss": 0.0138,
      "step": 2733060
    },
    {
      "epoch": 4.472745363733365,
      "grad_norm": 0.22178684175014496,
      "learning_rate": 1.061457667547426e-06,
      "loss": 0.0105,
      "step": 2733080
    },
    {
      "epoch": 4.472778094172018,
      "grad_norm": 0.7610567808151245,
      "learning_rate": 1.061391775333909e-06,
      "loss": 0.0093,
      "step": 2733100
    },
    {
      "epoch": 4.472810824610671,
      "grad_norm": 0.4416065216064453,
      "learning_rate": 1.0613258831203918e-06,
      "loss": 0.0093,
      "step": 2733120
    },
    {
      "epoch": 4.472843555049325,
      "grad_norm": 0.23434044420719147,
      "learning_rate": 1.0612599909068746e-06,
      "loss": 0.0107,
      "step": 2733140
    },
    {
      "epoch": 4.472876285487978,
      "grad_norm": 0.2822933495044708,
      "learning_rate": 1.0611940986933575e-06,
      "loss": 0.0087,
      "step": 2733160
    },
    {
      "epoch": 4.472909015926631,
      "grad_norm": 0.1546078622341156,
      "learning_rate": 1.0611282064798403e-06,
      "loss": 0.007,
      "step": 2733180
    },
    {
      "epoch": 4.472941746365285,
      "grad_norm": 0.11718416213989258,
      "learning_rate": 1.0610623142663232e-06,
      "loss": 0.0089,
      "step": 2733200
    },
    {
      "epoch": 4.472974476803938,
      "grad_norm": 0.32148003578186035,
      "learning_rate": 1.0609964220528062e-06,
      "loss": 0.0092,
      "step": 2733220
    },
    {
      "epoch": 4.473007207242592,
      "grad_norm": 0.4727218747138977,
      "learning_rate": 1.060930529839289e-06,
      "loss": 0.0093,
      "step": 2733240
    },
    {
      "epoch": 4.473039937681245,
      "grad_norm": 0.3550797402858734,
      "learning_rate": 1.0608646376257719e-06,
      "loss": 0.0124,
      "step": 2733260
    },
    {
      "epoch": 4.473072668119898,
      "grad_norm": 0.4335278272628784,
      "learning_rate": 1.0607987454122546e-06,
      "loss": 0.0103,
      "step": 2733280
    },
    {
      "epoch": 4.473105398558552,
      "grad_norm": 0.42892754077911377,
      "learning_rate": 1.0607328531987376e-06,
      "loss": 0.0126,
      "step": 2733300
    },
    {
      "epoch": 4.4731381289972045,
      "grad_norm": 0.07046382129192352,
      "learning_rate": 1.0606669609852205e-06,
      "loss": 0.0089,
      "step": 2733320
    },
    {
      "epoch": 4.473170859435858,
      "grad_norm": 0.25370463728904724,
      "learning_rate": 1.0606010687717033e-06,
      "loss": 0.0092,
      "step": 2733340
    },
    {
      "epoch": 4.473203589874512,
      "grad_norm": 0.17464236915111542,
      "learning_rate": 1.0605351765581862e-06,
      "loss": 0.0142,
      "step": 2733360
    },
    {
      "epoch": 4.473236320313164,
      "grad_norm": 0.12678830325603485,
      "learning_rate": 1.0604692843446692e-06,
      "loss": 0.0167,
      "step": 2733380
    },
    {
      "epoch": 4.473269050751818,
      "grad_norm": 0.20227263867855072,
      "learning_rate": 1.060403392131152e-06,
      "loss": 0.0072,
      "step": 2733400
    },
    {
      "epoch": 4.473301781190472,
      "grad_norm": 0.2283247411251068,
      "learning_rate": 1.0603374999176348e-06,
      "loss": 0.0132,
      "step": 2733420
    },
    {
      "epoch": 4.473334511629125,
      "grad_norm": 0.08913178741931915,
      "learning_rate": 1.0602716077041176e-06,
      "loss": 0.0085,
      "step": 2733440
    },
    {
      "epoch": 4.473367242067778,
      "grad_norm": 0.20341874659061432,
      "learning_rate": 1.0602057154906005e-06,
      "loss": 0.0111,
      "step": 2733460
    },
    {
      "epoch": 4.4733999725064315,
      "grad_norm": 0.24447639286518097,
      "learning_rate": 1.0601398232770835e-06,
      "loss": 0.0066,
      "step": 2733480
    },
    {
      "epoch": 4.473432702945085,
      "grad_norm": 0.1993623822927475,
      "learning_rate": 1.0600739310635662e-06,
      "loss": 0.0069,
      "step": 2733500
    },
    {
      "epoch": 4.473465433383738,
      "grad_norm": 0.15156064927577972,
      "learning_rate": 1.0600080388500492e-06,
      "loss": 0.0102,
      "step": 2733520
    },
    {
      "epoch": 4.473498163822391,
      "grad_norm": 0.15493100881576538,
      "learning_rate": 1.059942146636532e-06,
      "loss": 0.008,
      "step": 2733540
    },
    {
      "epoch": 4.473530894261045,
      "grad_norm": 0.11804452538490295,
      "learning_rate": 1.0598762544230149e-06,
      "loss": 0.0078,
      "step": 2733560
    },
    {
      "epoch": 4.473563624699699,
      "grad_norm": 0.4839746654033661,
      "learning_rate": 1.0598103622094978e-06,
      "loss": 0.0158,
      "step": 2733580
    },
    {
      "epoch": 4.473596355138351,
      "grad_norm": 0.19040200114250183,
      "learning_rate": 1.0597444699959806e-06,
      "loss": 0.0108,
      "step": 2733600
    },
    {
      "epoch": 4.473629085577005,
      "grad_norm": 0.20362000167369843,
      "learning_rate": 1.0596785777824635e-06,
      "loss": 0.0082,
      "step": 2733620
    },
    {
      "epoch": 4.4736618160156585,
      "grad_norm": 0.24699260294437408,
      "learning_rate": 1.0596126855689463e-06,
      "loss": 0.0119,
      "step": 2733640
    },
    {
      "epoch": 4.473694546454311,
      "grad_norm": 0.23067329823970795,
      "learning_rate": 1.0595467933554294e-06,
      "loss": 0.0077,
      "step": 2733660
    },
    {
      "epoch": 4.473727276892965,
      "grad_norm": 0.08146851509809494,
      "learning_rate": 1.0594809011419122e-06,
      "loss": 0.0088,
      "step": 2733680
    },
    {
      "epoch": 4.473760007331618,
      "grad_norm": 0.338533878326416,
      "learning_rate": 1.059415008928395e-06,
      "loss": 0.0067,
      "step": 2733700
    },
    {
      "epoch": 4.473792737770272,
      "grad_norm": 0.2105988711118698,
      "learning_rate": 1.0593491167148779e-06,
      "loss": 0.0086,
      "step": 2733720
    },
    {
      "epoch": 4.473825468208925,
      "grad_norm": 0.14523440599441528,
      "learning_rate": 1.0592832245013608e-06,
      "loss": 0.0102,
      "step": 2733740
    },
    {
      "epoch": 4.473858198647578,
      "grad_norm": 0.2552890181541443,
      "learning_rate": 1.0592173322878438e-06,
      "loss": 0.0074,
      "step": 2733760
    },
    {
      "epoch": 4.473890929086232,
      "grad_norm": 0.05208651348948479,
      "learning_rate": 1.0591514400743265e-06,
      "loss": 0.0062,
      "step": 2733780
    },
    {
      "epoch": 4.473923659524885,
      "grad_norm": 0.45742812752723694,
      "learning_rate": 1.0590855478608092e-06,
      "loss": 0.005,
      "step": 2733800
    },
    {
      "epoch": 4.473956389963538,
      "grad_norm": 0.3099704682826996,
      "learning_rate": 1.0590196556472924e-06,
      "loss": 0.0071,
      "step": 2733820
    },
    {
      "epoch": 4.473989120402192,
      "grad_norm": 0.17500931024551392,
      "learning_rate": 1.0589537634337751e-06,
      "loss": 0.0073,
      "step": 2733840
    },
    {
      "epoch": 4.474021850840845,
      "grad_norm": 0.09271314740180969,
      "learning_rate": 1.058887871220258e-06,
      "loss": 0.0069,
      "step": 2733860
    },
    {
      "epoch": 4.474054581279498,
      "grad_norm": 0.2387930452823639,
      "learning_rate": 1.0588219790067408e-06,
      "loss": 0.0073,
      "step": 2733880
    },
    {
      "epoch": 4.474087311718152,
      "grad_norm": 0.23707321286201477,
      "learning_rate": 1.0587560867932236e-06,
      "loss": 0.0073,
      "step": 2733900
    },
    {
      "epoch": 4.474120042156805,
      "grad_norm": 0.32824912667274475,
      "learning_rate": 1.0586901945797067e-06,
      "loss": 0.0097,
      "step": 2733920
    },
    {
      "epoch": 4.474152772595458,
      "grad_norm": 0.2522982060909271,
      "learning_rate": 1.0586243023661895e-06,
      "loss": 0.0081,
      "step": 2733940
    },
    {
      "epoch": 4.4741855030341116,
      "grad_norm": 0.3733927607536316,
      "learning_rate": 1.0585584101526724e-06,
      "loss": 0.0099,
      "step": 2733960
    },
    {
      "epoch": 4.474218233472765,
      "grad_norm": 0.177601158618927,
      "learning_rate": 1.0584925179391552e-06,
      "loss": 0.0064,
      "step": 2733980
    },
    {
      "epoch": 4.474250963911419,
      "grad_norm": 0.3666217625141144,
      "learning_rate": 1.0584266257256381e-06,
      "loss": 0.0076,
      "step": 2734000
    },
    {
      "epoch": 4.4742836943500714,
      "grad_norm": 0.47949686646461487,
      "learning_rate": 1.058360733512121e-06,
      "loss": 0.0091,
      "step": 2734020
    },
    {
      "epoch": 4.474316424788725,
      "grad_norm": 0.06580694019794464,
      "learning_rate": 1.0582948412986038e-06,
      "loss": 0.0054,
      "step": 2734040
    },
    {
      "epoch": 4.474349155227379,
      "grad_norm": 0.36731889843940735,
      "learning_rate": 1.0582289490850868e-06,
      "loss": 0.0087,
      "step": 2734060
    },
    {
      "epoch": 4.474381885666031,
      "grad_norm": 0.37102338671684265,
      "learning_rate": 1.0581630568715695e-06,
      "loss": 0.0084,
      "step": 2734080
    },
    {
      "epoch": 4.474414616104685,
      "grad_norm": 0.06445804238319397,
      "learning_rate": 1.0580971646580524e-06,
      "loss": 0.0064,
      "step": 2734100
    },
    {
      "epoch": 4.4744473465433385,
      "grad_norm": 0.22081662714481354,
      "learning_rate": 1.0580312724445354e-06,
      "loss": 0.0076,
      "step": 2734120
    },
    {
      "epoch": 4.474480076981992,
      "grad_norm": 0.10514412820339203,
      "learning_rate": 1.0579653802310181e-06,
      "loss": 0.0089,
      "step": 2734140
    },
    {
      "epoch": 4.474512807420645,
      "grad_norm": 0.11608689278364182,
      "learning_rate": 1.057899488017501e-06,
      "loss": 0.006,
      "step": 2734160
    },
    {
      "epoch": 4.474545537859298,
      "grad_norm": 0.22231131792068481,
      "learning_rate": 1.057833595803984e-06,
      "loss": 0.007,
      "step": 2734180
    },
    {
      "epoch": 4.474578268297952,
      "grad_norm": 0.19098882377147675,
      "learning_rate": 1.0577677035904668e-06,
      "loss": 0.008,
      "step": 2734200
    },
    {
      "epoch": 4.474610998736605,
      "grad_norm": 0.15493369102478027,
      "learning_rate": 1.0577018113769497e-06,
      "loss": 0.0068,
      "step": 2734220
    },
    {
      "epoch": 4.474643729175258,
      "grad_norm": 0.32508379220962524,
      "learning_rate": 1.0576359191634325e-06,
      "loss": 0.0088,
      "step": 2734240
    },
    {
      "epoch": 4.474676459613912,
      "grad_norm": 0.3869037330150604,
      "learning_rate": 1.0575700269499154e-06,
      "loss": 0.0093,
      "step": 2734260
    },
    {
      "epoch": 4.474709190052565,
      "grad_norm": 0.20593927800655365,
      "learning_rate": 1.0575041347363984e-06,
      "loss": 0.0075,
      "step": 2734280
    },
    {
      "epoch": 4.474741920491218,
      "grad_norm": 0.18573497235774994,
      "learning_rate": 1.0574382425228811e-06,
      "loss": 0.0083,
      "step": 2734300
    },
    {
      "epoch": 4.474774650929872,
      "grad_norm": 0.2311495691537857,
      "learning_rate": 1.057372350309364e-06,
      "loss": 0.0084,
      "step": 2734320
    },
    {
      "epoch": 4.474807381368525,
      "grad_norm": 0.25301051139831543,
      "learning_rate": 1.0573064580958468e-06,
      "loss": 0.0126,
      "step": 2734340
    },
    {
      "epoch": 4.474840111807178,
      "grad_norm": 2.6110548973083496,
      "learning_rate": 1.0572405658823298e-06,
      "loss": 0.0071,
      "step": 2734360
    },
    {
      "epoch": 4.474872842245832,
      "grad_norm": 0.16649003326892853,
      "learning_rate": 1.0571746736688127e-06,
      "loss": 0.0072,
      "step": 2734380
    },
    {
      "epoch": 4.474905572684485,
      "grad_norm": 0.22968707978725433,
      "learning_rate": 1.0571087814552954e-06,
      "loss": 0.0083,
      "step": 2734400
    },
    {
      "epoch": 4.474938303123139,
      "grad_norm": 0.4526348114013672,
      "learning_rate": 1.0570428892417784e-06,
      "loss": 0.0145,
      "step": 2734420
    },
    {
      "epoch": 4.474971033561792,
      "grad_norm": 0.15350167453289032,
      "learning_rate": 1.0569769970282614e-06,
      "loss": 0.0081,
      "step": 2734440
    },
    {
      "epoch": 4.475003764000445,
      "grad_norm": 0.17007116973400116,
      "learning_rate": 1.056911104814744e-06,
      "loss": 0.0073,
      "step": 2734460
    },
    {
      "epoch": 4.475036494439099,
      "grad_norm": 0.23081843554973602,
      "learning_rate": 1.056845212601227e-06,
      "loss": 0.0118,
      "step": 2734480
    },
    {
      "epoch": 4.4750692248777515,
      "grad_norm": 0.16653688251972198,
      "learning_rate": 1.0567793203877098e-06,
      "loss": 0.0089,
      "step": 2734500
    },
    {
      "epoch": 4.475101955316405,
      "grad_norm": 0.17732647061347961,
      "learning_rate": 1.0567134281741927e-06,
      "loss": 0.0073,
      "step": 2734520
    },
    {
      "epoch": 4.475134685755059,
      "grad_norm": 0.09237460047006607,
      "learning_rate": 1.0566475359606757e-06,
      "loss": 0.009,
      "step": 2734540
    },
    {
      "epoch": 4.475167416193711,
      "grad_norm": 0.5993859171867371,
      "learning_rate": 1.0565816437471584e-06,
      "loss": 0.0104,
      "step": 2734560
    },
    {
      "epoch": 4.475200146632365,
      "grad_norm": 0.38202419877052307,
      "learning_rate": 1.0565157515336414e-06,
      "loss": 0.0069,
      "step": 2734580
    },
    {
      "epoch": 4.475232877071019,
      "grad_norm": 0.1471043825149536,
      "learning_rate": 1.0564498593201241e-06,
      "loss": 0.0098,
      "step": 2734600
    },
    {
      "epoch": 4.475265607509672,
      "grad_norm": 0.20673130452632904,
      "learning_rate": 1.056383967106607e-06,
      "loss": 0.0118,
      "step": 2734620
    },
    {
      "epoch": 4.475298337948325,
      "grad_norm": 0.2645373046398163,
      "learning_rate": 1.05631807489309e-06,
      "loss": 0.0083,
      "step": 2734640
    },
    {
      "epoch": 4.4753310683869785,
      "grad_norm": 0.4523371160030365,
      "learning_rate": 1.0562521826795728e-06,
      "loss": 0.0102,
      "step": 2734660
    },
    {
      "epoch": 4.475363798825632,
      "grad_norm": 0.12001454830169678,
      "learning_rate": 1.0561862904660557e-06,
      "loss": 0.0071,
      "step": 2734680
    },
    {
      "epoch": 4.475396529264286,
      "grad_norm": 0.14539898931980133,
      "learning_rate": 1.0561203982525387e-06,
      "loss": 0.0063,
      "step": 2734700
    },
    {
      "epoch": 4.475429259702938,
      "grad_norm": 0.1013115793466568,
      "learning_rate": 1.0560545060390214e-06,
      "loss": 0.01,
      "step": 2734720
    },
    {
      "epoch": 4.475461990141592,
      "grad_norm": 0.12303081154823303,
      "learning_rate": 1.0559886138255044e-06,
      "loss": 0.0079,
      "step": 2734740
    },
    {
      "epoch": 4.4754947205802456,
      "grad_norm": 0.08536496758460999,
      "learning_rate": 1.055922721611987e-06,
      "loss": 0.0132,
      "step": 2734760
    },
    {
      "epoch": 4.475527451018898,
      "grad_norm": 0.4155751168727875,
      "learning_rate": 1.05585682939847e-06,
      "loss": 0.0072,
      "step": 2734780
    },
    {
      "epoch": 4.475560181457552,
      "grad_norm": 0.38644981384277344,
      "learning_rate": 1.055790937184953e-06,
      "loss": 0.0112,
      "step": 2734800
    },
    {
      "epoch": 4.4755929118962055,
      "grad_norm": 0.163898766040802,
      "learning_rate": 1.0557250449714357e-06,
      "loss": 0.0088,
      "step": 2734820
    },
    {
      "epoch": 4.475625642334858,
      "grad_norm": 0.16199195384979248,
      "learning_rate": 1.0556591527579187e-06,
      "loss": 0.0099,
      "step": 2734840
    },
    {
      "epoch": 4.475658372773512,
      "grad_norm": 0.12598805129528046,
      "learning_rate": 1.0555932605444014e-06,
      "loss": 0.0067,
      "step": 2734860
    },
    {
      "epoch": 4.475691103212165,
      "grad_norm": 0.29826804995536804,
      "learning_rate": 1.0555273683308844e-06,
      "loss": 0.0142,
      "step": 2734880
    },
    {
      "epoch": 4.475723833650819,
      "grad_norm": 0.05365582928061485,
      "learning_rate": 1.0554614761173673e-06,
      "loss": 0.0124,
      "step": 2734900
    },
    {
      "epoch": 4.475756564089472,
      "grad_norm": 0.07719714939594269,
      "learning_rate": 1.05539558390385e-06,
      "loss": 0.0074,
      "step": 2734920
    },
    {
      "epoch": 4.475789294528125,
      "grad_norm": 0.10816539824008942,
      "learning_rate": 1.055329691690333e-06,
      "loss": 0.0065,
      "step": 2734940
    },
    {
      "epoch": 4.475822024966779,
      "grad_norm": 0.3021535873413086,
      "learning_rate": 1.0552637994768158e-06,
      "loss": 0.0078,
      "step": 2734960
    },
    {
      "epoch": 4.4758547554054315,
      "grad_norm": 0.1780978888273239,
      "learning_rate": 1.055197907263299e-06,
      "loss": 0.0087,
      "step": 2734980
    },
    {
      "epoch": 4.475887485844085,
      "grad_norm": 0.1780533343553543,
      "learning_rate": 1.0551320150497817e-06,
      "loss": 0.008,
      "step": 2735000
    },
    {
      "epoch": 4.475920216282739,
      "grad_norm": 0.15720678865909576,
      "learning_rate": 1.0550661228362644e-06,
      "loss": 0.0066,
      "step": 2735020
    },
    {
      "epoch": 4.475952946721392,
      "grad_norm": 0.044461071491241455,
      "learning_rate": 1.0550002306227474e-06,
      "loss": 0.0119,
      "step": 2735040
    },
    {
      "epoch": 4.475985677160045,
      "grad_norm": 0.4528014361858368,
      "learning_rate": 1.0549343384092303e-06,
      "loss": 0.0088,
      "step": 2735060
    },
    {
      "epoch": 4.476018407598699,
      "grad_norm": 0.7364296317100525,
      "learning_rate": 1.0548684461957133e-06,
      "loss": 0.0111,
      "step": 2735080
    },
    {
      "epoch": 4.476051138037352,
      "grad_norm": 0.11902789026498795,
      "learning_rate": 1.054802553982196e-06,
      "loss": 0.0086,
      "step": 2735100
    },
    {
      "epoch": 4.476083868476005,
      "grad_norm": 0.16650550067424774,
      "learning_rate": 1.0547366617686787e-06,
      "loss": 0.0094,
      "step": 2735120
    },
    {
      "epoch": 4.4761165989146585,
      "grad_norm": 0.09245248138904572,
      "learning_rate": 1.054670769555162e-06,
      "loss": 0.0156,
      "step": 2735140
    },
    {
      "epoch": 4.476149329353312,
      "grad_norm": 0.20115582644939423,
      "learning_rate": 1.0546048773416446e-06,
      "loss": 0.0081,
      "step": 2735160
    },
    {
      "epoch": 4.476182059791966,
      "grad_norm": 0.36618226766586304,
      "learning_rate": 1.0545389851281276e-06,
      "loss": 0.0117,
      "step": 2735180
    },
    {
      "epoch": 4.476214790230618,
      "grad_norm": 0.1360979974269867,
      "learning_rate": 1.0544730929146103e-06,
      "loss": 0.0088,
      "step": 2735200
    },
    {
      "epoch": 4.476247520669272,
      "grad_norm": 0.1172589585185051,
      "learning_rate": 1.054407200701093e-06,
      "loss": 0.0116,
      "step": 2735220
    },
    {
      "epoch": 4.476280251107926,
      "grad_norm": 0.4654143750667572,
      "learning_rate": 1.0543413084875762e-06,
      "loss": 0.015,
      "step": 2735240
    },
    {
      "epoch": 4.476312981546578,
      "grad_norm": 0.1501954197883606,
      "learning_rate": 1.054275416274059e-06,
      "loss": 0.0133,
      "step": 2735260
    },
    {
      "epoch": 4.476345711985232,
      "grad_norm": 0.8819582462310791,
      "learning_rate": 1.054209524060542e-06,
      "loss": 0.0102,
      "step": 2735280
    },
    {
      "epoch": 4.4763784424238855,
      "grad_norm": 0.21992924809455872,
      "learning_rate": 1.0541436318470247e-06,
      "loss": 0.0107,
      "step": 2735300
    },
    {
      "epoch": 4.476411172862539,
      "grad_norm": 0.17568212747573853,
      "learning_rate": 1.0540777396335076e-06,
      "loss": 0.0077,
      "step": 2735320
    },
    {
      "epoch": 4.476443903301192,
      "grad_norm": 0.103603795170784,
      "learning_rate": 1.0540118474199906e-06,
      "loss": 0.0121,
      "step": 2735340
    },
    {
      "epoch": 4.476476633739845,
      "grad_norm": 0.2755499482154846,
      "learning_rate": 1.0539459552064733e-06,
      "loss": 0.0061,
      "step": 2735360
    },
    {
      "epoch": 4.476509364178499,
      "grad_norm": 0.321199506521225,
      "learning_rate": 1.0538800629929563e-06,
      "loss": 0.0065,
      "step": 2735380
    },
    {
      "epoch": 4.476542094617152,
      "grad_norm": 0.20536014437675476,
      "learning_rate": 1.053814170779439e-06,
      "loss": 0.0073,
      "step": 2735400
    },
    {
      "epoch": 4.476574825055805,
      "grad_norm": 0.0999189019203186,
      "learning_rate": 1.053748278565922e-06,
      "loss": 0.007,
      "step": 2735420
    },
    {
      "epoch": 4.476607555494459,
      "grad_norm": 0.8694691061973572,
      "learning_rate": 1.053682386352405e-06,
      "loss": 0.009,
      "step": 2735440
    },
    {
      "epoch": 4.4766402859331125,
      "grad_norm": 0.39809054136276245,
      "learning_rate": 1.0536164941388876e-06,
      "loss": 0.0083,
      "step": 2735460
    },
    {
      "epoch": 4.476673016371765,
      "grad_norm": 0.19125990569591522,
      "learning_rate": 1.0535506019253706e-06,
      "loss": 0.007,
      "step": 2735480
    },
    {
      "epoch": 4.476705746810419,
      "grad_norm": 0.17642123997211456,
      "learning_rate": 1.0534847097118535e-06,
      "loss": 0.0099,
      "step": 2735500
    },
    {
      "epoch": 4.476738477249072,
      "grad_norm": 0.526563286781311,
      "learning_rate": 1.0534188174983363e-06,
      "loss": 0.0098,
      "step": 2735520
    },
    {
      "epoch": 4.476771207687725,
      "grad_norm": 0.2778600752353668,
      "learning_rate": 1.0533529252848192e-06,
      "loss": 0.0076,
      "step": 2735540
    },
    {
      "epoch": 4.476803938126379,
      "grad_norm": 0.7966904640197754,
      "learning_rate": 1.053287033071302e-06,
      "loss": 0.0081,
      "step": 2735560
    },
    {
      "epoch": 4.476836668565032,
      "grad_norm": 0.39363065361976624,
      "learning_rate": 1.053221140857785e-06,
      "loss": 0.0169,
      "step": 2735580
    },
    {
      "epoch": 4.476869399003686,
      "grad_norm": 0.09456635266542435,
      "learning_rate": 1.0531552486442679e-06,
      "loss": 0.0082,
      "step": 2735600
    },
    {
      "epoch": 4.476902129442339,
      "grad_norm": 0.25970426201820374,
      "learning_rate": 1.0530893564307506e-06,
      "loss": 0.011,
      "step": 2735620
    },
    {
      "epoch": 4.476934859880992,
      "grad_norm": 0.2804218530654907,
      "learning_rate": 1.0530234642172336e-06,
      "loss": 0.0113,
      "step": 2735640
    },
    {
      "epoch": 4.476967590319646,
      "grad_norm": 0.4481894075870514,
      "learning_rate": 1.0529575720037163e-06,
      "loss": 0.0081,
      "step": 2735660
    },
    {
      "epoch": 4.4770003207582985,
      "grad_norm": 0.45008236169815063,
      "learning_rate": 1.0528916797901993e-06,
      "loss": 0.0091,
      "step": 2735680
    },
    {
      "epoch": 4.477033051196952,
      "grad_norm": 0.23896758258342743,
      "learning_rate": 1.0528257875766822e-06,
      "loss": 0.01,
      "step": 2735700
    },
    {
      "epoch": 4.477065781635606,
      "grad_norm": 0.2677880823612213,
      "learning_rate": 1.052759895363165e-06,
      "loss": 0.0085,
      "step": 2735720
    },
    {
      "epoch": 4.477098512074258,
      "grad_norm": 0.45222222805023193,
      "learning_rate": 1.052694003149648e-06,
      "loss": 0.0077,
      "step": 2735740
    },
    {
      "epoch": 4.477131242512912,
      "grad_norm": 0.2929491698741913,
      "learning_rate": 1.0526281109361309e-06,
      "loss": 0.0082,
      "step": 2735760
    },
    {
      "epoch": 4.4771639729515655,
      "grad_norm": 0.22553089261054993,
      "learning_rate": 1.0525622187226136e-06,
      "loss": 0.0104,
      "step": 2735780
    },
    {
      "epoch": 4.477196703390219,
      "grad_norm": 0.3692772090435028,
      "learning_rate": 1.0524963265090965e-06,
      "loss": 0.0097,
      "step": 2735800
    },
    {
      "epoch": 4.477229433828872,
      "grad_norm": 0.14913469552993774,
      "learning_rate": 1.0524304342955793e-06,
      "loss": 0.0103,
      "step": 2735820
    },
    {
      "epoch": 4.477262164267525,
      "grad_norm": 0.3222977817058563,
      "learning_rate": 1.0523645420820622e-06,
      "loss": 0.0113,
      "step": 2735840
    },
    {
      "epoch": 4.477294894706179,
      "grad_norm": 0.21876639127731323,
      "learning_rate": 1.0522986498685452e-06,
      "loss": 0.0092,
      "step": 2735860
    },
    {
      "epoch": 4.477327625144833,
      "grad_norm": 0.4448319673538208,
      "learning_rate": 1.052232757655028e-06,
      "loss": 0.0111,
      "step": 2735880
    },
    {
      "epoch": 4.477360355583485,
      "grad_norm": 2.4843170642852783,
      "learning_rate": 1.0521668654415109e-06,
      "loss": 0.0087,
      "step": 2735900
    },
    {
      "epoch": 4.477393086022139,
      "grad_norm": 0.17837965488433838,
      "learning_rate": 1.0521009732279936e-06,
      "loss": 0.0073,
      "step": 2735920
    },
    {
      "epoch": 4.4774258164607925,
      "grad_norm": 0.2319146692752838,
      "learning_rate": 1.0520350810144766e-06,
      "loss": 0.0072,
      "step": 2735940
    },
    {
      "epoch": 4.477458546899445,
      "grad_norm": 0.5980377197265625,
      "learning_rate": 1.0519691888009595e-06,
      "loss": 0.0109,
      "step": 2735960
    },
    {
      "epoch": 4.477491277338099,
      "grad_norm": 0.44665321707725525,
      "learning_rate": 1.0519032965874423e-06,
      "loss": 0.0064,
      "step": 2735980
    },
    {
      "epoch": 4.477524007776752,
      "grad_norm": 0.14793142676353455,
      "learning_rate": 1.0518374043739252e-06,
      "loss": 0.0123,
      "step": 2736000
    },
    {
      "epoch": 4.477556738215405,
      "grad_norm": 0.2361869513988495,
      "learning_rate": 1.0517715121604082e-06,
      "loss": 0.0149,
      "step": 2736020
    },
    {
      "epoch": 4.477589468654059,
      "grad_norm": 0.21073779463768005,
      "learning_rate": 1.051705619946891e-06,
      "loss": 0.006,
      "step": 2736040
    },
    {
      "epoch": 4.477622199092712,
      "grad_norm": 0.14219450950622559,
      "learning_rate": 1.0516397277333739e-06,
      "loss": 0.0071,
      "step": 2736060
    },
    {
      "epoch": 4.477654929531366,
      "grad_norm": 0.21582306921482086,
      "learning_rate": 1.0515738355198566e-06,
      "loss": 0.0102,
      "step": 2736080
    },
    {
      "epoch": 4.477687659970019,
      "grad_norm": 0.1986542046070099,
      "learning_rate": 1.0515079433063396e-06,
      "loss": 0.0128,
      "step": 2736100
    },
    {
      "epoch": 4.477720390408672,
      "grad_norm": 0.10710074007511139,
      "learning_rate": 1.0514420510928225e-06,
      "loss": 0.0052,
      "step": 2736120
    },
    {
      "epoch": 4.477753120847326,
      "grad_norm": 0.0894518792629242,
      "learning_rate": 1.0513761588793052e-06,
      "loss": 0.0115,
      "step": 2736140
    },
    {
      "epoch": 4.477785851285979,
      "grad_norm": 0.4929606020450592,
      "learning_rate": 1.0513102666657882e-06,
      "loss": 0.0105,
      "step": 2736160
    },
    {
      "epoch": 4.477818581724632,
      "grad_norm": 0.31703728437423706,
      "learning_rate": 1.051244374452271e-06,
      "loss": 0.0096,
      "step": 2736180
    },
    {
      "epoch": 4.477851312163286,
      "grad_norm": 0.12018442153930664,
      "learning_rate": 1.0511784822387539e-06,
      "loss": 0.0115,
      "step": 2736200
    },
    {
      "epoch": 4.477884042601939,
      "grad_norm": 0.4764375388622284,
      "learning_rate": 1.0511125900252368e-06,
      "loss": 0.0101,
      "step": 2736220
    },
    {
      "epoch": 4.477916773040592,
      "grad_norm": 0.19420906901359558,
      "learning_rate": 1.0510466978117196e-06,
      "loss": 0.0065,
      "step": 2736240
    },
    {
      "epoch": 4.477949503479246,
      "grad_norm": 0.6132861375808716,
      "learning_rate": 1.0509808055982025e-06,
      "loss": 0.0105,
      "step": 2736260
    },
    {
      "epoch": 4.477982233917899,
      "grad_norm": 0.08230528980493546,
      "learning_rate": 1.0509149133846855e-06,
      "loss": 0.0116,
      "step": 2736280
    },
    {
      "epoch": 4.478014964356552,
      "grad_norm": 0.10091898590326309,
      "learning_rate": 1.0508490211711684e-06,
      "loss": 0.0066,
      "step": 2736300
    },
    {
      "epoch": 4.4780476947952055,
      "grad_norm": 0.21829693019390106,
      "learning_rate": 1.0507831289576512e-06,
      "loss": 0.0105,
      "step": 2736320
    },
    {
      "epoch": 4.478080425233859,
      "grad_norm": 0.16050885617733002,
      "learning_rate": 1.050717236744134e-06,
      "loss": 0.0069,
      "step": 2736340
    },
    {
      "epoch": 4.478113155672513,
      "grad_norm": 0.1305338740348816,
      "learning_rate": 1.0506513445306169e-06,
      "loss": 0.0065,
      "step": 2736360
    },
    {
      "epoch": 4.478145886111165,
      "grad_norm": 0.6447890996932983,
      "learning_rate": 1.0505854523170998e-06,
      "loss": 0.0098,
      "step": 2736380
    },
    {
      "epoch": 4.478178616549819,
      "grad_norm": 0.3327498733997345,
      "learning_rate": 1.0505195601035828e-06,
      "loss": 0.0123,
      "step": 2736400
    },
    {
      "epoch": 4.478211346988473,
      "grad_norm": 0.3550935387611389,
      "learning_rate": 1.0504536678900655e-06,
      "loss": 0.0107,
      "step": 2736420
    },
    {
      "epoch": 4.478244077427125,
      "grad_norm": 0.1499669998884201,
      "learning_rate": 1.0503877756765482e-06,
      "loss": 0.0136,
      "step": 2736440
    },
    {
      "epoch": 4.478276807865779,
      "grad_norm": 0.2090637981891632,
      "learning_rate": 1.0503218834630314e-06,
      "loss": 0.0054,
      "step": 2736460
    },
    {
      "epoch": 4.4783095383044325,
      "grad_norm": 0.35801592469215393,
      "learning_rate": 1.0502559912495141e-06,
      "loss": 0.0058,
      "step": 2736480
    },
    {
      "epoch": 4.478342268743086,
      "grad_norm": 0.2791461944580078,
      "learning_rate": 1.050190099035997e-06,
      "loss": 0.0079,
      "step": 2736500
    },
    {
      "epoch": 4.478374999181739,
      "grad_norm": 0.45713862776756287,
      "learning_rate": 1.0501242068224798e-06,
      "loss": 0.0065,
      "step": 2736520
    },
    {
      "epoch": 4.478407729620392,
      "grad_norm": 0.1910773515701294,
      "learning_rate": 1.0500583146089626e-06,
      "loss": 0.0067,
      "step": 2736540
    },
    {
      "epoch": 4.478440460059046,
      "grad_norm": 0.07822483032941818,
      "learning_rate": 1.0499924223954457e-06,
      "loss": 0.0153,
      "step": 2736560
    },
    {
      "epoch": 4.478473190497699,
      "grad_norm": 0.2491060495376587,
      "learning_rate": 1.0499265301819285e-06,
      "loss": 0.0079,
      "step": 2736580
    },
    {
      "epoch": 4.478505920936352,
      "grad_norm": 0.6123270988464355,
      "learning_rate": 1.0498606379684114e-06,
      "loss": 0.0102,
      "step": 2736600
    },
    {
      "epoch": 4.478538651375006,
      "grad_norm": 0.09368022531270981,
      "learning_rate": 1.0497947457548942e-06,
      "loss": 0.0075,
      "step": 2736620
    },
    {
      "epoch": 4.478571381813659,
      "grad_norm": 0.2448500543832779,
      "learning_rate": 1.0497288535413771e-06,
      "loss": 0.0079,
      "step": 2736640
    },
    {
      "epoch": 4.478604112252312,
      "grad_norm": 0.4306603670120239,
      "learning_rate": 1.04966296132786e-06,
      "loss": 0.008,
      "step": 2736660
    },
    {
      "epoch": 4.478636842690966,
      "grad_norm": 0.09544548392295837,
      "learning_rate": 1.0495970691143428e-06,
      "loss": 0.0094,
      "step": 2736680
    },
    {
      "epoch": 4.478669573129619,
      "grad_norm": 0.2911871671676636,
      "learning_rate": 1.0495311769008258e-06,
      "loss": 0.0088,
      "step": 2736700
    },
    {
      "epoch": 4.478702303568272,
      "grad_norm": 0.5416072010993958,
      "learning_rate": 1.0494652846873087e-06,
      "loss": 0.0054,
      "step": 2736720
    },
    {
      "epoch": 4.478735034006926,
      "grad_norm": 0.17809134721755981,
      "learning_rate": 1.0493993924737915e-06,
      "loss": 0.0124,
      "step": 2736740
    },
    {
      "epoch": 4.478767764445579,
      "grad_norm": 0.0678967759013176,
      "learning_rate": 1.0493335002602744e-06,
      "loss": 0.0081,
      "step": 2736760
    },
    {
      "epoch": 4.478800494884233,
      "grad_norm": 0.08364753425121307,
      "learning_rate": 1.0492676080467571e-06,
      "loss": 0.0091,
      "step": 2736780
    },
    {
      "epoch": 4.4788332253228855,
      "grad_norm": 0.3635638356208801,
      "learning_rate": 1.04920171583324e-06,
      "loss": 0.0138,
      "step": 2736800
    },
    {
      "epoch": 4.478865955761539,
      "grad_norm": 0.07350331544876099,
      "learning_rate": 1.049135823619723e-06,
      "loss": 0.0064,
      "step": 2736820
    },
    {
      "epoch": 4.478898686200193,
      "grad_norm": 0.18011854588985443,
      "learning_rate": 1.0490699314062058e-06,
      "loss": 0.0088,
      "step": 2736840
    },
    {
      "epoch": 4.478931416638845,
      "grad_norm": 0.14208608865737915,
      "learning_rate": 1.0490040391926887e-06,
      "loss": 0.0066,
      "step": 2736860
    },
    {
      "epoch": 4.478964147077499,
      "grad_norm": 0.1559199094772339,
      "learning_rate": 1.0489381469791715e-06,
      "loss": 0.009,
      "step": 2736880
    },
    {
      "epoch": 4.478996877516153,
      "grad_norm": 0.05103651434183121,
      "learning_rate": 1.0488722547656544e-06,
      "loss": 0.0077,
      "step": 2736900
    },
    {
      "epoch": 4.479029607954806,
      "grad_norm": 0.28392842411994934,
      "learning_rate": 1.0488063625521374e-06,
      "loss": 0.0071,
      "step": 2736920
    },
    {
      "epoch": 4.479062338393459,
      "grad_norm": 0.14809051156044006,
      "learning_rate": 1.0487404703386201e-06,
      "loss": 0.0113,
      "step": 2736940
    },
    {
      "epoch": 4.4790950688321125,
      "grad_norm": 0.3364938497543335,
      "learning_rate": 1.048674578125103e-06,
      "loss": 0.0116,
      "step": 2736960
    },
    {
      "epoch": 4.479127799270766,
      "grad_norm": 0.1554037183523178,
      "learning_rate": 1.0486086859115858e-06,
      "loss": 0.0092,
      "step": 2736980
    },
    {
      "epoch": 4.479160529709419,
      "grad_norm": 0.24888122081756592,
      "learning_rate": 1.0485427936980688e-06,
      "loss": 0.0098,
      "step": 2737000
    },
    {
      "epoch": 4.479193260148072,
      "grad_norm": 0.28925004601478577,
      "learning_rate": 1.0484769014845517e-06,
      "loss": 0.0089,
      "step": 2737020
    },
    {
      "epoch": 4.479225990586726,
      "grad_norm": 0.11258590221405029,
      "learning_rate": 1.0484110092710345e-06,
      "loss": 0.0072,
      "step": 2737040
    },
    {
      "epoch": 4.47925872102538,
      "grad_norm": 0.05093542858958244,
      "learning_rate": 1.0483451170575174e-06,
      "loss": 0.0088,
      "step": 2737060
    },
    {
      "epoch": 4.479291451464032,
      "grad_norm": 0.06944967061281204,
      "learning_rate": 1.0482792248440004e-06,
      "loss": 0.008,
      "step": 2737080
    },
    {
      "epoch": 4.479324181902686,
      "grad_norm": 0.2115062028169632,
      "learning_rate": 1.048213332630483e-06,
      "loss": 0.0138,
      "step": 2737100
    },
    {
      "epoch": 4.4793569123413395,
      "grad_norm": 0.2404269278049469,
      "learning_rate": 1.048147440416966e-06,
      "loss": 0.0106,
      "step": 2737120
    },
    {
      "epoch": 4.479389642779992,
      "grad_norm": 0.22889000177383423,
      "learning_rate": 1.0480815482034488e-06,
      "loss": 0.006,
      "step": 2737140
    },
    {
      "epoch": 4.479422373218646,
      "grad_norm": 0.2677694261074066,
      "learning_rate": 1.0480156559899317e-06,
      "loss": 0.0066,
      "step": 2737160
    },
    {
      "epoch": 4.479455103657299,
      "grad_norm": 0.28527942299842834,
      "learning_rate": 1.0479497637764147e-06,
      "loss": 0.0087,
      "step": 2737180
    },
    {
      "epoch": 4.479487834095953,
      "grad_norm": 0.20016878843307495,
      "learning_rate": 1.0478838715628974e-06,
      "loss": 0.0125,
      "step": 2737200
    },
    {
      "epoch": 4.479520564534606,
      "grad_norm": 0.25366470217704773,
      "learning_rate": 1.0478179793493804e-06,
      "loss": 0.0092,
      "step": 2737220
    },
    {
      "epoch": 4.479553294973259,
      "grad_norm": 0.30520734190940857,
      "learning_rate": 1.0477520871358631e-06,
      "loss": 0.0094,
      "step": 2737240
    },
    {
      "epoch": 4.479586025411913,
      "grad_norm": 0.21897433698177338,
      "learning_rate": 1.047686194922346e-06,
      "loss": 0.0101,
      "step": 2737260
    },
    {
      "epoch": 4.479618755850566,
      "grad_norm": 0.25072622299194336,
      "learning_rate": 1.047620302708829e-06,
      "loss": 0.0137,
      "step": 2737280
    },
    {
      "epoch": 4.479651486289219,
      "grad_norm": 0.05006203427910805,
      "learning_rate": 1.0475544104953118e-06,
      "loss": 0.0096,
      "step": 2737300
    },
    {
      "epoch": 4.479684216727873,
      "grad_norm": 0.1942254602909088,
      "learning_rate": 1.0474885182817947e-06,
      "loss": 0.0109,
      "step": 2737320
    },
    {
      "epoch": 4.479716947166526,
      "grad_norm": 0.42003974318504333,
      "learning_rate": 1.0474226260682777e-06,
      "loss": 0.0089,
      "step": 2737340
    },
    {
      "epoch": 4.479749677605179,
      "grad_norm": 0.1031239703297615,
      "learning_rate": 1.0473567338547604e-06,
      "loss": 0.0071,
      "step": 2737360
    },
    {
      "epoch": 4.479782408043833,
      "grad_norm": 0.49880433082580566,
      "learning_rate": 1.0472908416412434e-06,
      "loss": 0.0134,
      "step": 2737380
    },
    {
      "epoch": 4.479815138482486,
      "grad_norm": 0.18673495948314667,
      "learning_rate": 1.047224949427726e-06,
      "loss": 0.0068,
      "step": 2737400
    },
    {
      "epoch": 4.479847868921139,
      "grad_norm": 0.14841820299625397,
      "learning_rate": 1.047159057214209e-06,
      "loss": 0.0114,
      "step": 2737420
    },
    {
      "epoch": 4.4798805993597925,
      "grad_norm": 0.24336668848991394,
      "learning_rate": 1.047093165000692e-06,
      "loss": 0.0062,
      "step": 2737440
    },
    {
      "epoch": 4.479913329798446,
      "grad_norm": 0.1352444738149643,
      "learning_rate": 1.0470272727871747e-06,
      "loss": 0.0115,
      "step": 2737460
    },
    {
      "epoch": 4.479946060237099,
      "grad_norm": 0.25510796904563904,
      "learning_rate": 1.0469613805736577e-06,
      "loss": 0.0119,
      "step": 2737480
    },
    {
      "epoch": 4.479978790675752,
      "grad_norm": 0.15328769385814667,
      "learning_rate": 1.0468954883601404e-06,
      "loss": 0.0078,
      "step": 2737500
    },
    {
      "epoch": 4.480011521114406,
      "grad_norm": 0.16127319633960724,
      "learning_rate": 1.0468295961466234e-06,
      "loss": 0.0127,
      "step": 2737520
    },
    {
      "epoch": 4.48004425155306,
      "grad_norm": 0.674744188785553,
      "learning_rate": 1.0467637039331063e-06,
      "loss": 0.0071,
      "step": 2737540
    },
    {
      "epoch": 4.480076981991712,
      "grad_norm": 0.11694301664829254,
      "learning_rate": 1.046697811719589e-06,
      "loss": 0.0114,
      "step": 2737560
    },
    {
      "epoch": 4.480109712430366,
      "grad_norm": 0.9138103723526001,
      "learning_rate": 1.046631919506072e-06,
      "loss": 0.0083,
      "step": 2737580
    },
    {
      "epoch": 4.4801424428690195,
      "grad_norm": 0.2795090675354004,
      "learning_rate": 1.046566027292555e-06,
      "loss": 0.0072,
      "step": 2737600
    },
    {
      "epoch": 4.480175173307673,
      "grad_norm": 0.47806334495544434,
      "learning_rate": 1.046500135079038e-06,
      "loss": 0.0132,
      "step": 2737620
    },
    {
      "epoch": 4.480207903746326,
      "grad_norm": 0.26036763191223145,
      "learning_rate": 1.0464342428655207e-06,
      "loss": 0.0098,
      "step": 2737640
    },
    {
      "epoch": 4.480240634184979,
      "grad_norm": 0.1222936287522316,
      "learning_rate": 1.0463683506520034e-06,
      "loss": 0.0083,
      "step": 2737660
    },
    {
      "epoch": 4.480273364623633,
      "grad_norm": 0.27657508850097656,
      "learning_rate": 1.0463024584384864e-06,
      "loss": 0.0116,
      "step": 2737680
    },
    {
      "epoch": 4.480306095062286,
      "grad_norm": 0.7324917912483215,
      "learning_rate": 1.0462365662249693e-06,
      "loss": 0.0136,
      "step": 2737700
    },
    {
      "epoch": 4.480338825500939,
      "grad_norm": 0.12190455943346024,
      "learning_rate": 1.0461706740114523e-06,
      "loss": 0.013,
      "step": 2737720
    },
    {
      "epoch": 4.480371555939593,
      "grad_norm": 0.12689286470413208,
      "learning_rate": 1.046104781797935e-06,
      "loss": 0.0078,
      "step": 2737740
    },
    {
      "epoch": 4.480404286378246,
      "grad_norm": 0.5843165516853333,
      "learning_rate": 1.0460388895844177e-06,
      "loss": 0.0091,
      "step": 2737760
    },
    {
      "epoch": 4.480437016816899,
      "grad_norm": 0.11973510682582855,
      "learning_rate": 1.045972997370901e-06,
      "loss": 0.0098,
      "step": 2737780
    },
    {
      "epoch": 4.480469747255553,
      "grad_norm": 0.26024872064590454,
      "learning_rate": 1.0459071051573837e-06,
      "loss": 0.0133,
      "step": 2737800
    },
    {
      "epoch": 4.480502477694206,
      "grad_norm": 0.2822594940662384,
      "learning_rate": 1.0458412129438666e-06,
      "loss": 0.011,
      "step": 2737820
    },
    {
      "epoch": 4.480535208132859,
      "grad_norm": 0.07402191311120987,
      "learning_rate": 1.0457753207303493e-06,
      "loss": 0.0089,
      "step": 2737840
    },
    {
      "epoch": 4.480567938571513,
      "grad_norm": 0.7651915550231934,
      "learning_rate": 1.045709428516832e-06,
      "loss": 0.0081,
      "step": 2737860
    },
    {
      "epoch": 4.480600669010166,
      "grad_norm": 0.24589082598686218,
      "learning_rate": 1.0456435363033152e-06,
      "loss": 0.0091,
      "step": 2737880
    },
    {
      "epoch": 4.480633399448819,
      "grad_norm": 0.5808185338973999,
      "learning_rate": 1.045577644089798e-06,
      "loss": 0.0118,
      "step": 2737900
    },
    {
      "epoch": 4.480666129887473,
      "grad_norm": 0.11022761464118958,
      "learning_rate": 1.045511751876281e-06,
      "loss": 0.0102,
      "step": 2737920
    },
    {
      "epoch": 4.480698860326126,
      "grad_norm": 0.28574588894844055,
      "learning_rate": 1.0454458596627637e-06,
      "loss": 0.0081,
      "step": 2737940
    },
    {
      "epoch": 4.48073159076478,
      "grad_norm": 0.36984798312187195,
      "learning_rate": 1.0453799674492466e-06,
      "loss": 0.0115,
      "step": 2737960
    },
    {
      "epoch": 4.4807643212034325,
      "grad_norm": 0.3387196958065033,
      "learning_rate": 1.0453140752357296e-06,
      "loss": 0.0082,
      "step": 2737980
    },
    {
      "epoch": 4.480797051642086,
      "grad_norm": 0.32710033655166626,
      "learning_rate": 1.0452481830222123e-06,
      "loss": 0.0058,
      "step": 2738000
    },
    {
      "epoch": 4.48082978208074,
      "grad_norm": 0.06363051384687424,
      "learning_rate": 1.0451822908086953e-06,
      "loss": 0.01,
      "step": 2738020
    },
    {
      "epoch": 4.480862512519392,
      "grad_norm": 0.2253708839416504,
      "learning_rate": 1.0451163985951782e-06,
      "loss": 0.0085,
      "step": 2738040
    },
    {
      "epoch": 4.480895242958046,
      "grad_norm": 0.22586168348789215,
      "learning_rate": 1.045050506381661e-06,
      "loss": 0.0082,
      "step": 2738060
    },
    {
      "epoch": 4.4809279733967,
      "grad_norm": 0.053291428834199905,
      "learning_rate": 1.044984614168144e-06,
      "loss": 0.0069,
      "step": 2738080
    },
    {
      "epoch": 4.480960703835353,
      "grad_norm": 0.43108612298965454,
      "learning_rate": 1.0449187219546267e-06,
      "loss": 0.011,
      "step": 2738100
    },
    {
      "epoch": 4.480993434274006,
      "grad_norm": 0.16682370007038116,
      "learning_rate": 1.0448528297411096e-06,
      "loss": 0.0097,
      "step": 2738120
    },
    {
      "epoch": 4.4810261647126595,
      "grad_norm": 0.09065508842468262,
      "learning_rate": 1.0447869375275926e-06,
      "loss": 0.0088,
      "step": 2738140
    },
    {
      "epoch": 4.481058895151313,
      "grad_norm": 0.1330816000699997,
      "learning_rate": 1.0447210453140753e-06,
      "loss": 0.0089,
      "step": 2738160
    },
    {
      "epoch": 4.481091625589966,
      "grad_norm": 0.25278881192207336,
      "learning_rate": 1.0446551531005582e-06,
      "loss": 0.0119,
      "step": 2738180
    },
    {
      "epoch": 4.481124356028619,
      "grad_norm": 0.45717450976371765,
      "learning_rate": 1.044589260887041e-06,
      "loss": 0.008,
      "step": 2738200
    },
    {
      "epoch": 4.481157086467273,
      "grad_norm": 0.13961215317249298,
      "learning_rate": 1.044523368673524e-06,
      "loss": 0.0093,
      "step": 2738220
    },
    {
      "epoch": 4.4811898169059265,
      "grad_norm": 0.24045778810977936,
      "learning_rate": 1.0444574764600069e-06,
      "loss": 0.0093,
      "step": 2738240
    },
    {
      "epoch": 4.481222547344579,
      "grad_norm": 0.0932065024971962,
      "learning_rate": 1.0443915842464896e-06,
      "loss": 0.0066,
      "step": 2738260
    },
    {
      "epoch": 4.481255277783233,
      "grad_norm": 0.42395085096359253,
      "learning_rate": 1.0443256920329726e-06,
      "loss": 0.0088,
      "step": 2738280
    },
    {
      "epoch": 4.481288008221886,
      "grad_norm": 0.10448738187551498,
      "learning_rate": 1.0442597998194553e-06,
      "loss": 0.0107,
      "step": 2738300
    },
    {
      "epoch": 4.481320738660539,
      "grad_norm": 0.1942800134420395,
      "learning_rate": 1.0441939076059383e-06,
      "loss": 0.0057,
      "step": 2738320
    },
    {
      "epoch": 4.481353469099193,
      "grad_norm": 0.31726446747779846,
      "learning_rate": 1.0441280153924212e-06,
      "loss": 0.0081,
      "step": 2738340
    },
    {
      "epoch": 4.481386199537846,
      "grad_norm": 0.135144904255867,
      "learning_rate": 1.044062123178904e-06,
      "loss": 0.0074,
      "step": 2738360
    },
    {
      "epoch": 4.4814189299765,
      "grad_norm": 0.35814905166625977,
      "learning_rate": 1.043996230965387e-06,
      "loss": 0.0108,
      "step": 2738380
    },
    {
      "epoch": 4.481451660415153,
      "grad_norm": 0.06485506892204285,
      "learning_rate": 1.0439303387518699e-06,
      "loss": 0.0153,
      "step": 2738400
    },
    {
      "epoch": 4.481484390853806,
      "grad_norm": 0.12298528850078583,
      "learning_rate": 1.0438644465383526e-06,
      "loss": 0.0151,
      "step": 2738420
    },
    {
      "epoch": 4.48151712129246,
      "grad_norm": 0.32838308811187744,
      "learning_rate": 1.0437985543248356e-06,
      "loss": 0.009,
      "step": 2738440
    },
    {
      "epoch": 4.4815498517311125,
      "grad_norm": 0.25043290853500366,
      "learning_rate": 1.0437326621113183e-06,
      "loss": 0.0084,
      "step": 2738460
    },
    {
      "epoch": 4.481582582169766,
      "grad_norm": 0.12534771859645844,
      "learning_rate": 1.0436667698978013e-06,
      "loss": 0.012,
      "step": 2738480
    },
    {
      "epoch": 4.48161531260842,
      "grad_norm": 0.1349957436323166,
      "learning_rate": 1.0436008776842842e-06,
      "loss": 0.0111,
      "step": 2738500
    },
    {
      "epoch": 4.481648043047073,
      "grad_norm": 0.5024054050445557,
      "learning_rate": 1.043534985470767e-06,
      "loss": 0.0097,
      "step": 2738520
    },
    {
      "epoch": 4.481680773485726,
      "grad_norm": 0.18738192319869995,
      "learning_rate": 1.0434690932572499e-06,
      "loss": 0.0089,
      "step": 2738540
    },
    {
      "epoch": 4.48171350392438,
      "grad_norm": 0.23107358813285828,
      "learning_rate": 1.0434032010437326e-06,
      "loss": 0.0049,
      "step": 2738560
    },
    {
      "epoch": 4.481746234363033,
      "grad_norm": 0.4038480818271637,
      "learning_rate": 1.0433373088302156e-06,
      "loss": 0.0074,
      "step": 2738580
    },
    {
      "epoch": 4.481778964801686,
      "grad_norm": 0.5199598073959351,
      "learning_rate": 1.0432714166166985e-06,
      "loss": 0.0104,
      "step": 2738600
    },
    {
      "epoch": 4.4818116952403395,
      "grad_norm": 0.5434711575508118,
      "learning_rate": 1.0432055244031813e-06,
      "loss": 0.0107,
      "step": 2738620
    },
    {
      "epoch": 4.481844425678993,
      "grad_norm": 0.2480260580778122,
      "learning_rate": 1.0431396321896642e-06,
      "loss": 0.0131,
      "step": 2738640
    },
    {
      "epoch": 4.481877156117647,
      "grad_norm": 0.13479731976985931,
      "learning_rate": 1.0430737399761472e-06,
      "loss": 0.0098,
      "step": 2738660
    },
    {
      "epoch": 4.481909886556299,
      "grad_norm": 0.3623051345348358,
      "learning_rate": 1.04300784776263e-06,
      "loss": 0.0141,
      "step": 2738680
    },
    {
      "epoch": 4.481942616994953,
      "grad_norm": 0.388156920671463,
      "learning_rate": 1.0429419555491129e-06,
      "loss": 0.0128,
      "step": 2738700
    },
    {
      "epoch": 4.481975347433607,
      "grad_norm": 0.06471598148345947,
      "learning_rate": 1.0428760633355956e-06,
      "loss": 0.0078,
      "step": 2738720
    },
    {
      "epoch": 4.482008077872259,
      "grad_norm": 0.27337250113487244,
      "learning_rate": 1.0428101711220786e-06,
      "loss": 0.0078,
      "step": 2738740
    },
    {
      "epoch": 4.482040808310913,
      "grad_norm": 0.3155645728111267,
      "learning_rate": 1.0427442789085615e-06,
      "loss": 0.0075,
      "step": 2738760
    },
    {
      "epoch": 4.4820735387495665,
      "grad_norm": 0.21469107270240784,
      "learning_rate": 1.0426783866950443e-06,
      "loss": 0.0119,
      "step": 2738780
    },
    {
      "epoch": 4.48210626918822,
      "grad_norm": 0.4134194552898407,
      "learning_rate": 1.0426124944815272e-06,
      "loss": 0.0094,
      "step": 2738800
    },
    {
      "epoch": 4.482138999626873,
      "grad_norm": 0.4169333577156067,
      "learning_rate": 1.04254660226801e-06,
      "loss": 0.0137,
      "step": 2738820
    },
    {
      "epoch": 4.482171730065526,
      "grad_norm": 0.14624550938606262,
      "learning_rate": 1.042480710054493e-06,
      "loss": 0.0092,
      "step": 2738840
    },
    {
      "epoch": 4.48220446050418,
      "grad_norm": 0.22674058377742767,
      "learning_rate": 1.0424148178409758e-06,
      "loss": 0.0093,
      "step": 2738860
    },
    {
      "epoch": 4.482237190942833,
      "grad_norm": 0.10723436623811722,
      "learning_rate": 1.0423489256274586e-06,
      "loss": 0.0078,
      "step": 2738880
    },
    {
      "epoch": 4.482269921381486,
      "grad_norm": 0.14607588946819305,
      "learning_rate": 1.0422830334139415e-06,
      "loss": 0.0065,
      "step": 2738900
    },
    {
      "epoch": 4.48230265182014,
      "grad_norm": 0.7345736026763916,
      "learning_rate": 1.0422171412004245e-06,
      "loss": 0.0076,
      "step": 2738920
    },
    {
      "epoch": 4.482335382258793,
      "grad_norm": 0.10492657124996185,
      "learning_rate": 1.0421512489869074e-06,
      "loss": 0.0123,
      "step": 2738940
    },
    {
      "epoch": 4.482368112697446,
      "grad_norm": 0.18159087002277374,
      "learning_rate": 1.0420853567733902e-06,
      "loss": 0.0078,
      "step": 2738960
    },
    {
      "epoch": 4.4824008431361,
      "grad_norm": 0.02808516100049019,
      "learning_rate": 1.042019464559873e-06,
      "loss": 0.0113,
      "step": 2738980
    },
    {
      "epoch": 4.482433573574753,
      "grad_norm": 0.33363640308380127,
      "learning_rate": 1.0419535723463559e-06,
      "loss": 0.0098,
      "step": 2739000
    },
    {
      "epoch": 4.482466304013406,
      "grad_norm": 1.159225344657898,
      "learning_rate": 1.0418876801328388e-06,
      "loss": 0.0122,
      "step": 2739020
    },
    {
      "epoch": 4.48249903445206,
      "grad_norm": 0.30552998185157776,
      "learning_rate": 1.0418217879193218e-06,
      "loss": 0.0075,
      "step": 2739040
    },
    {
      "epoch": 4.482531764890713,
      "grad_norm": 0.27196836471557617,
      "learning_rate": 1.0417558957058045e-06,
      "loss": 0.006,
      "step": 2739060
    },
    {
      "epoch": 4.482564495329367,
      "grad_norm": 0.13609617948532104,
      "learning_rate": 1.0416900034922873e-06,
      "loss": 0.0077,
      "step": 2739080
    },
    {
      "epoch": 4.4825972257680196,
      "grad_norm": 0.21615466475486755,
      "learning_rate": 1.0416241112787704e-06,
      "loss": 0.0063,
      "step": 2739100
    },
    {
      "epoch": 4.482629956206673,
      "grad_norm": 0.19742852449417114,
      "learning_rate": 1.0415582190652532e-06,
      "loss": 0.0059,
      "step": 2739120
    },
    {
      "epoch": 4.482662686645327,
      "grad_norm": 0.13844330608844757,
      "learning_rate": 1.0414923268517361e-06,
      "loss": 0.0093,
      "step": 2739140
    },
    {
      "epoch": 4.4826954170839795,
      "grad_norm": 0.5128076076507568,
      "learning_rate": 1.0414264346382188e-06,
      "loss": 0.0099,
      "step": 2739160
    },
    {
      "epoch": 4.482728147522633,
      "grad_norm": 0.2695828080177307,
      "learning_rate": 1.0413605424247016e-06,
      "loss": 0.0103,
      "step": 2739180
    },
    {
      "epoch": 4.482760877961287,
      "grad_norm": 0.2685997188091278,
      "learning_rate": 1.0412946502111848e-06,
      "loss": 0.0086,
      "step": 2739200
    },
    {
      "epoch": 4.482793608399939,
      "grad_norm": 0.3693004250526428,
      "learning_rate": 1.0412287579976675e-06,
      "loss": 0.0091,
      "step": 2739220
    },
    {
      "epoch": 4.482826338838593,
      "grad_norm": 0.27035731077194214,
      "learning_rate": 1.0411628657841504e-06,
      "loss": 0.0094,
      "step": 2739240
    },
    {
      "epoch": 4.4828590692772465,
      "grad_norm": 0.05964244529604912,
      "learning_rate": 1.0410969735706332e-06,
      "loss": 0.0046,
      "step": 2739260
    },
    {
      "epoch": 4.4828917997159,
      "grad_norm": 0.1594972312450409,
      "learning_rate": 1.0410310813571161e-06,
      "loss": 0.0101,
      "step": 2739280
    },
    {
      "epoch": 4.482924530154553,
      "grad_norm": 0.2098856270313263,
      "learning_rate": 1.040965189143599e-06,
      "loss": 0.01,
      "step": 2739300
    },
    {
      "epoch": 4.482957260593206,
      "grad_norm": 0.3151934742927551,
      "learning_rate": 1.0408992969300818e-06,
      "loss": 0.0091,
      "step": 2739320
    },
    {
      "epoch": 4.48298999103186,
      "grad_norm": 0.27438294887542725,
      "learning_rate": 1.0408334047165648e-06,
      "loss": 0.0084,
      "step": 2739340
    },
    {
      "epoch": 4.483022721470514,
      "grad_norm": 0.22705139219760895,
      "learning_rate": 1.0407675125030477e-06,
      "loss": 0.0123,
      "step": 2739360
    },
    {
      "epoch": 4.483055451909166,
      "grad_norm": 0.3423738479614258,
      "learning_rate": 1.0407016202895305e-06,
      "loss": 0.0082,
      "step": 2739380
    },
    {
      "epoch": 4.48308818234782,
      "grad_norm": 0.20668518543243408,
      "learning_rate": 1.0406357280760134e-06,
      "loss": 0.0059,
      "step": 2739400
    },
    {
      "epoch": 4.4831209127864735,
      "grad_norm": 0.10802148282527924,
      "learning_rate": 1.0405698358624962e-06,
      "loss": 0.0061,
      "step": 2739420
    },
    {
      "epoch": 4.483153643225126,
      "grad_norm": 0.07532582432031631,
      "learning_rate": 1.0405039436489791e-06,
      "loss": 0.0071,
      "step": 2739440
    },
    {
      "epoch": 4.48318637366378,
      "grad_norm": 0.08759181201457977,
      "learning_rate": 1.040438051435462e-06,
      "loss": 0.0093,
      "step": 2739460
    },
    {
      "epoch": 4.483219104102433,
      "grad_norm": 0.1283036470413208,
      "learning_rate": 1.0403721592219448e-06,
      "loss": 0.0055,
      "step": 2739480
    },
    {
      "epoch": 4.483251834541086,
      "grad_norm": 0.9055656790733337,
      "learning_rate": 1.0403062670084278e-06,
      "loss": 0.0124,
      "step": 2739500
    },
    {
      "epoch": 4.48328456497974,
      "grad_norm": 0.26755329966545105,
      "learning_rate": 1.0402403747949105e-06,
      "loss": 0.0083,
      "step": 2739520
    },
    {
      "epoch": 4.483317295418393,
      "grad_norm": 0.20081959664821625,
      "learning_rate": 1.0401744825813934e-06,
      "loss": 0.0099,
      "step": 2739540
    },
    {
      "epoch": 4.483350025857047,
      "grad_norm": 0.30558496713638306,
      "learning_rate": 1.0401085903678764e-06,
      "loss": 0.0091,
      "step": 2739560
    },
    {
      "epoch": 4.4833827562957,
      "grad_norm": 0.3695316016674042,
      "learning_rate": 1.0400426981543591e-06,
      "loss": 0.0082,
      "step": 2739580
    },
    {
      "epoch": 4.483415486734353,
      "grad_norm": 0.13009873032569885,
      "learning_rate": 1.039976805940842e-06,
      "loss": 0.0087,
      "step": 2739600
    },
    {
      "epoch": 4.483448217173007,
      "grad_norm": 0.16983278095722198,
      "learning_rate": 1.0399109137273248e-06,
      "loss": 0.0079,
      "step": 2739620
    },
    {
      "epoch": 4.4834809476116595,
      "grad_norm": 0.2602365016937256,
      "learning_rate": 1.0398450215138078e-06,
      "loss": 0.0096,
      "step": 2739640
    },
    {
      "epoch": 4.483513678050313,
      "grad_norm": 0.5147441029548645,
      "learning_rate": 1.0397791293002907e-06,
      "loss": 0.01,
      "step": 2739660
    },
    {
      "epoch": 4.483546408488967,
      "grad_norm": 0.37023043632507324,
      "learning_rate": 1.0397132370867735e-06,
      "loss": 0.0164,
      "step": 2739680
    },
    {
      "epoch": 4.48357913892762,
      "grad_norm": 0.12301598489284515,
      "learning_rate": 1.0396473448732564e-06,
      "loss": 0.0123,
      "step": 2739700
    },
    {
      "epoch": 4.483611869366273,
      "grad_norm": 0.13469310104846954,
      "learning_rate": 1.0395814526597394e-06,
      "loss": 0.0111,
      "step": 2739720
    },
    {
      "epoch": 4.483644599804927,
      "grad_norm": 0.11799625307321548,
      "learning_rate": 1.0395155604462221e-06,
      "loss": 0.0081,
      "step": 2739740
    },
    {
      "epoch": 4.48367733024358,
      "grad_norm": 0.11712673306465149,
      "learning_rate": 1.039449668232705e-06,
      "loss": 0.0048,
      "step": 2739760
    },
    {
      "epoch": 4.483710060682233,
      "grad_norm": 0.17086997628211975,
      "learning_rate": 1.0393837760191878e-06,
      "loss": 0.0087,
      "step": 2739780
    },
    {
      "epoch": 4.4837427911208865,
      "grad_norm": 0.19157542288303375,
      "learning_rate": 1.0393178838056708e-06,
      "loss": 0.0083,
      "step": 2739800
    },
    {
      "epoch": 4.48377552155954,
      "grad_norm": 0.03025338426232338,
      "learning_rate": 1.0392519915921537e-06,
      "loss": 0.0117,
      "step": 2739820
    },
    {
      "epoch": 4.483808251998194,
      "grad_norm": 0.2981933653354645,
      "learning_rate": 1.0391860993786364e-06,
      "loss": 0.009,
      "step": 2739840
    },
    {
      "epoch": 4.483840982436846,
      "grad_norm": 0.18339096009731293,
      "learning_rate": 1.0391202071651194e-06,
      "loss": 0.0088,
      "step": 2739860
    },
    {
      "epoch": 4.4838737128755,
      "grad_norm": 0.08595520257949829,
      "learning_rate": 1.0390543149516021e-06,
      "loss": 0.0104,
      "step": 2739880
    },
    {
      "epoch": 4.483906443314154,
      "grad_norm": 0.4230414628982544,
      "learning_rate": 1.038988422738085e-06,
      "loss": 0.0079,
      "step": 2739900
    },
    {
      "epoch": 4.483939173752806,
      "grad_norm": 0.46786150336265564,
      "learning_rate": 1.038922530524568e-06,
      "loss": 0.0112,
      "step": 2739920
    },
    {
      "epoch": 4.48397190419146,
      "grad_norm": 0.09700841456651688,
      "learning_rate": 1.0388566383110508e-06,
      "loss": 0.0088,
      "step": 2739940
    },
    {
      "epoch": 4.4840046346301135,
      "grad_norm": 0.2751782536506653,
      "learning_rate": 1.0387907460975337e-06,
      "loss": 0.0088,
      "step": 2739960
    },
    {
      "epoch": 4.484037365068767,
      "grad_norm": 0.13797840476036072,
      "learning_rate": 1.0387248538840167e-06,
      "loss": 0.0112,
      "step": 2739980
    },
    {
      "epoch": 4.48407009550742,
      "grad_norm": 0.2296345829963684,
      "learning_rate": 1.0386589616704994e-06,
      "loss": 0.0097,
      "step": 2740000
    },
    {
      "epoch": 4.484102825946073,
      "grad_norm": 0.4452921152114868,
      "learning_rate": 1.0385930694569824e-06,
      "loss": 0.0085,
      "step": 2740020
    },
    {
      "epoch": 4.484135556384727,
      "grad_norm": 0.3422412574291229,
      "learning_rate": 1.0385271772434651e-06,
      "loss": 0.0116,
      "step": 2740040
    },
    {
      "epoch": 4.48416828682338,
      "grad_norm": 0.149959534406662,
      "learning_rate": 1.038461285029948e-06,
      "loss": 0.0072,
      "step": 2740060
    },
    {
      "epoch": 4.484201017262033,
      "grad_norm": 0.3271738886833191,
      "learning_rate": 1.038395392816431e-06,
      "loss": 0.0104,
      "step": 2740080
    },
    {
      "epoch": 4.484233747700687,
      "grad_norm": 0.13808341324329376,
      "learning_rate": 1.0383295006029138e-06,
      "loss": 0.0076,
      "step": 2740100
    },
    {
      "epoch": 4.48426647813934,
      "grad_norm": 0.27616986632347107,
      "learning_rate": 1.0382636083893967e-06,
      "loss": 0.0072,
      "step": 2740120
    },
    {
      "epoch": 4.484299208577993,
      "grad_norm": 0.30971086025238037,
      "learning_rate": 1.0381977161758794e-06,
      "loss": 0.0073,
      "step": 2740140
    },
    {
      "epoch": 4.484331939016647,
      "grad_norm": 0.26268208026885986,
      "learning_rate": 1.0381318239623624e-06,
      "loss": 0.0071,
      "step": 2740160
    },
    {
      "epoch": 4.4843646694553,
      "grad_norm": 0.08064347505569458,
      "learning_rate": 1.0380659317488454e-06,
      "loss": 0.0101,
      "step": 2740180
    },
    {
      "epoch": 4.484397399893953,
      "grad_norm": 0.10371595621109009,
      "learning_rate": 1.038000039535328e-06,
      "loss": 0.0095,
      "step": 2740200
    },
    {
      "epoch": 4.484430130332607,
      "grad_norm": 0.1692536175251007,
      "learning_rate": 1.037934147321811e-06,
      "loss": 0.0067,
      "step": 2740220
    },
    {
      "epoch": 4.48446286077126,
      "grad_norm": 0.091439388692379,
      "learning_rate": 1.037868255108294e-06,
      "loss": 0.0079,
      "step": 2740240
    },
    {
      "epoch": 4.484495591209914,
      "grad_norm": 0.2942936420440674,
      "learning_rate": 1.037802362894777e-06,
      "loss": 0.0097,
      "step": 2740260
    },
    {
      "epoch": 4.4845283216485665,
      "grad_norm": 0.3374093770980835,
      "learning_rate": 1.0377364706812597e-06,
      "loss": 0.0074,
      "step": 2740280
    },
    {
      "epoch": 4.48456105208722,
      "grad_norm": 0.048434678465127945,
      "learning_rate": 1.0376705784677424e-06,
      "loss": 0.0089,
      "step": 2740300
    },
    {
      "epoch": 4.484593782525874,
      "grad_norm": 0.19814041256904602,
      "learning_rate": 1.0376046862542254e-06,
      "loss": 0.0086,
      "step": 2740320
    },
    {
      "epoch": 4.484626512964526,
      "grad_norm": 0.41747549176216125,
      "learning_rate": 1.0375387940407083e-06,
      "loss": 0.0062,
      "step": 2740340
    },
    {
      "epoch": 4.48465924340318,
      "grad_norm": 0.2176051139831543,
      "learning_rate": 1.0374729018271913e-06,
      "loss": 0.0094,
      "step": 2740360
    },
    {
      "epoch": 4.484691973841834,
      "grad_norm": 0.12450326979160309,
      "learning_rate": 1.037407009613674e-06,
      "loss": 0.0136,
      "step": 2740380
    },
    {
      "epoch": 4.484724704280486,
      "grad_norm": 0.2914062440395355,
      "learning_rate": 1.0373411174001568e-06,
      "loss": 0.0105,
      "step": 2740400
    },
    {
      "epoch": 4.48475743471914,
      "grad_norm": 0.16226191818714142,
      "learning_rate": 1.03727522518664e-06,
      "loss": 0.0057,
      "step": 2740420
    },
    {
      "epoch": 4.4847901651577935,
      "grad_norm": 0.1113104373216629,
      "learning_rate": 1.0372093329731227e-06,
      "loss": 0.005,
      "step": 2740440
    },
    {
      "epoch": 4.484822895596447,
      "grad_norm": 0.36529305577278137,
      "learning_rate": 1.0371434407596056e-06,
      "loss": 0.0084,
      "step": 2740460
    },
    {
      "epoch": 4.4848556260351,
      "grad_norm": 0.20626534521579742,
      "learning_rate": 1.0370775485460884e-06,
      "loss": 0.0053,
      "step": 2740480
    },
    {
      "epoch": 4.484888356473753,
      "grad_norm": 0.19126935303211212,
      "learning_rate": 1.037011656332571e-06,
      "loss": 0.0077,
      "step": 2740500
    },
    {
      "epoch": 4.484921086912407,
      "grad_norm": 0.04669778421521187,
      "learning_rate": 1.0369457641190543e-06,
      "loss": 0.0054,
      "step": 2740520
    },
    {
      "epoch": 4.484953817351061,
      "grad_norm": 0.09910544008016586,
      "learning_rate": 1.036879871905537e-06,
      "loss": 0.0094,
      "step": 2740540
    },
    {
      "epoch": 4.484986547789713,
      "grad_norm": 0.13643091917037964,
      "learning_rate": 1.03681397969202e-06,
      "loss": 0.0077,
      "step": 2740560
    },
    {
      "epoch": 4.485019278228367,
      "grad_norm": 0.1180025264620781,
      "learning_rate": 1.0367480874785027e-06,
      "loss": 0.009,
      "step": 2740580
    },
    {
      "epoch": 4.4850520086670205,
      "grad_norm": 0.29221823811531067,
      "learning_rate": 1.0366821952649856e-06,
      "loss": 0.0088,
      "step": 2740600
    },
    {
      "epoch": 4.485084739105673,
      "grad_norm": 0.4052414000034332,
      "learning_rate": 1.0366163030514686e-06,
      "loss": 0.0067,
      "step": 2740620
    },
    {
      "epoch": 4.485117469544327,
      "grad_norm": 0.5251622200012207,
      "learning_rate": 1.0365504108379513e-06,
      "loss": 0.0076,
      "step": 2740640
    },
    {
      "epoch": 4.48515019998298,
      "grad_norm": 0.35400882363319397,
      "learning_rate": 1.0364845186244343e-06,
      "loss": 0.0109,
      "step": 2740660
    },
    {
      "epoch": 4.485182930421633,
      "grad_norm": 0.09199876338243484,
      "learning_rate": 1.0364186264109172e-06,
      "loss": 0.0099,
      "step": 2740680
    },
    {
      "epoch": 4.485215660860287,
      "grad_norm": 0.03245062381029129,
      "learning_rate": 1.0363527341974e-06,
      "loss": 0.0103,
      "step": 2740700
    },
    {
      "epoch": 4.48524839129894,
      "grad_norm": 0.5015099048614502,
      "learning_rate": 1.036286841983883e-06,
      "loss": 0.0099,
      "step": 2740720
    },
    {
      "epoch": 4.485281121737594,
      "grad_norm": 0.4077928066253662,
      "learning_rate": 1.0362209497703657e-06,
      "loss": 0.0066,
      "step": 2740740
    },
    {
      "epoch": 4.485313852176247,
      "grad_norm": 0.539411723613739,
      "learning_rate": 1.0361550575568486e-06,
      "loss": 0.0094,
      "step": 2740760
    },
    {
      "epoch": 4.4853465826149,
      "grad_norm": 0.2856702208518982,
      "learning_rate": 1.0360891653433316e-06,
      "loss": 0.0084,
      "step": 2740780
    },
    {
      "epoch": 4.485379313053554,
      "grad_norm": 0.09924715012311935,
      "learning_rate": 1.0360232731298143e-06,
      "loss": 0.0122,
      "step": 2740800
    },
    {
      "epoch": 4.485412043492207,
      "grad_norm": 0.4932560920715332,
      "learning_rate": 1.0359573809162973e-06,
      "loss": 0.0115,
      "step": 2740820
    },
    {
      "epoch": 4.48544477393086,
      "grad_norm": 0.18398281931877136,
      "learning_rate": 1.03589148870278e-06,
      "loss": 0.0086,
      "step": 2740840
    },
    {
      "epoch": 4.485477504369514,
      "grad_norm": 0.22840213775634766,
      "learning_rate": 1.035825596489263e-06,
      "loss": 0.0127,
      "step": 2740860
    },
    {
      "epoch": 4.485510234808167,
      "grad_norm": 0.14242081344127655,
      "learning_rate": 1.035759704275746e-06,
      "loss": 0.0113,
      "step": 2740880
    },
    {
      "epoch": 4.48554296524682,
      "grad_norm": 0.25755372643470764,
      "learning_rate": 1.0356938120622286e-06,
      "loss": 0.0058,
      "step": 2740900
    },
    {
      "epoch": 4.4855756956854735,
      "grad_norm": 0.24130989611148834,
      "learning_rate": 1.0356279198487116e-06,
      "loss": 0.009,
      "step": 2740920
    },
    {
      "epoch": 4.485608426124127,
      "grad_norm": 0.22379302978515625,
      "learning_rate": 1.0355620276351943e-06,
      "loss": 0.0066,
      "step": 2740940
    },
    {
      "epoch": 4.48564115656278,
      "grad_norm": 0.14053523540496826,
      "learning_rate": 1.0354961354216773e-06,
      "loss": 0.0072,
      "step": 2740960
    },
    {
      "epoch": 4.485673887001433,
      "grad_norm": 0.2211042195558548,
      "learning_rate": 1.0354302432081602e-06,
      "loss": 0.0065,
      "step": 2740980
    },
    {
      "epoch": 4.485706617440087,
      "grad_norm": 0.2607446014881134,
      "learning_rate": 1.035364350994643e-06,
      "loss": 0.0086,
      "step": 2741000
    },
    {
      "epoch": 4.485739347878741,
      "grad_norm": 0.12858618795871735,
      "learning_rate": 1.035298458781126e-06,
      "loss": 0.007,
      "step": 2741020
    },
    {
      "epoch": 4.485772078317393,
      "grad_norm": 0.33917081356048584,
      "learning_rate": 1.0352325665676089e-06,
      "loss": 0.01,
      "step": 2741040
    },
    {
      "epoch": 4.485804808756047,
      "grad_norm": 0.8958448171615601,
      "learning_rate": 1.0351666743540916e-06,
      "loss": 0.0115,
      "step": 2741060
    },
    {
      "epoch": 4.4858375391947005,
      "grad_norm": 0.8815833926200867,
      "learning_rate": 1.0351007821405746e-06,
      "loss": 0.0087,
      "step": 2741080
    },
    {
      "epoch": 4.485870269633353,
      "grad_norm": 0.3750631809234619,
      "learning_rate": 1.0350348899270573e-06,
      "loss": 0.0082,
      "step": 2741100
    },
    {
      "epoch": 4.485903000072007,
      "grad_norm": 0.2463122308254242,
      "learning_rate": 1.0349689977135403e-06,
      "loss": 0.0098,
      "step": 2741120
    },
    {
      "epoch": 4.48593573051066,
      "grad_norm": 0.21293967962265015,
      "learning_rate": 1.0349031055000232e-06,
      "loss": 0.0093,
      "step": 2741140
    },
    {
      "epoch": 4.485968460949314,
      "grad_norm": 0.29382357001304626,
      "learning_rate": 1.034837213286506e-06,
      "loss": 0.0123,
      "step": 2741160
    },
    {
      "epoch": 4.486001191387967,
      "grad_norm": 0.0871955081820488,
      "learning_rate": 1.034771321072989e-06,
      "loss": 0.0094,
      "step": 2741180
    },
    {
      "epoch": 4.48603392182662,
      "grad_norm": 0.25334227085113525,
      "learning_rate": 1.0347054288594716e-06,
      "loss": 0.0106,
      "step": 2741200
    },
    {
      "epoch": 4.486066652265274,
      "grad_norm": 0.24775037169456482,
      "learning_rate": 1.0346395366459546e-06,
      "loss": 0.0096,
      "step": 2741220
    },
    {
      "epoch": 4.486099382703927,
      "grad_norm": 0.11961741745471954,
      "learning_rate": 1.0345736444324375e-06,
      "loss": 0.0072,
      "step": 2741240
    },
    {
      "epoch": 4.48613211314258,
      "grad_norm": 0.19265908002853394,
      "learning_rate": 1.0345077522189203e-06,
      "loss": 0.0054,
      "step": 2741260
    },
    {
      "epoch": 4.486164843581234,
      "grad_norm": 0.14604203402996063,
      "learning_rate": 1.0344418600054032e-06,
      "loss": 0.0098,
      "step": 2741280
    },
    {
      "epoch": 4.486197574019887,
      "grad_norm": 0.08566971868276596,
      "learning_rate": 1.0343759677918862e-06,
      "loss": 0.007,
      "step": 2741300
    },
    {
      "epoch": 4.48623030445854,
      "grad_norm": 0.13863587379455566,
      "learning_rate": 1.034310075578369e-06,
      "loss": 0.0095,
      "step": 2741320
    },
    {
      "epoch": 4.486263034897194,
      "grad_norm": 0.2556961476802826,
      "learning_rate": 1.0342441833648519e-06,
      "loss": 0.0107,
      "step": 2741340
    },
    {
      "epoch": 4.486295765335847,
      "grad_norm": 0.2399311065673828,
      "learning_rate": 1.0341782911513346e-06,
      "loss": 0.0099,
      "step": 2741360
    },
    {
      "epoch": 4.4863284957745,
      "grad_norm": 0.16234751045703888,
      "learning_rate": 1.0341123989378176e-06,
      "loss": 0.008,
      "step": 2741380
    },
    {
      "epoch": 4.486361226213154,
      "grad_norm": 0.5994575023651123,
      "learning_rate": 1.0340465067243005e-06,
      "loss": 0.0094,
      "step": 2741400
    },
    {
      "epoch": 4.486393956651807,
      "grad_norm": 0.29076582193374634,
      "learning_rate": 1.0339806145107833e-06,
      "loss": 0.0106,
      "step": 2741420
    },
    {
      "epoch": 4.486426687090461,
      "grad_norm": 0.14880648255348206,
      "learning_rate": 1.0339147222972662e-06,
      "loss": 0.015,
      "step": 2741440
    },
    {
      "epoch": 4.4864594175291135,
      "grad_norm": 0.2687433362007141,
      "learning_rate": 1.033848830083749e-06,
      "loss": 0.0047,
      "step": 2741460
    },
    {
      "epoch": 4.486492147967767,
      "grad_norm": 0.15773490071296692,
      "learning_rate": 1.0337829378702321e-06,
      "loss": 0.008,
      "step": 2741480
    },
    {
      "epoch": 4.486524878406421,
      "grad_norm": 0.7410125136375427,
      "learning_rate": 1.0337170456567149e-06,
      "loss": 0.0084,
      "step": 2741500
    },
    {
      "epoch": 4.486557608845073,
      "grad_norm": 0.33476659655570984,
      "learning_rate": 1.0336511534431976e-06,
      "loss": 0.0079,
      "step": 2741520
    },
    {
      "epoch": 4.486590339283727,
      "grad_norm": 0.08153920620679855,
      "learning_rate": 1.0335852612296805e-06,
      "loss": 0.0106,
      "step": 2741540
    },
    {
      "epoch": 4.486623069722381,
      "grad_norm": 0.48745131492614746,
      "learning_rate": 1.0335193690161635e-06,
      "loss": 0.0152,
      "step": 2741560
    },
    {
      "epoch": 4.486655800161034,
      "grad_norm": 0.22651658952236176,
      "learning_rate": 1.0334534768026465e-06,
      "loss": 0.009,
      "step": 2741580
    },
    {
      "epoch": 4.486688530599687,
      "grad_norm": 0.14302225410938263,
      "learning_rate": 1.0333875845891292e-06,
      "loss": 0.0086,
      "step": 2741600
    },
    {
      "epoch": 4.4867212610383405,
      "grad_norm": 0.1907256543636322,
      "learning_rate": 1.033321692375612e-06,
      "loss": 0.0061,
      "step": 2741620
    },
    {
      "epoch": 4.486753991476994,
      "grad_norm": 0.3718072175979614,
      "learning_rate": 1.0332558001620949e-06,
      "loss": 0.0079,
      "step": 2741640
    },
    {
      "epoch": 4.486786721915647,
      "grad_norm": 0.19898299872875214,
      "learning_rate": 1.0331899079485778e-06,
      "loss": 0.0092,
      "step": 2741660
    },
    {
      "epoch": 4.4868194523543,
      "grad_norm": 0.4205179512500763,
      "learning_rate": 1.0331240157350608e-06,
      "loss": 0.0081,
      "step": 2741680
    },
    {
      "epoch": 4.486852182792954,
      "grad_norm": 0.4507186710834503,
      "learning_rate": 1.0330581235215435e-06,
      "loss": 0.0077,
      "step": 2741700
    },
    {
      "epoch": 4.4868849132316075,
      "grad_norm": 0.5733230710029602,
      "learning_rate": 1.0329922313080263e-06,
      "loss": 0.0119,
      "step": 2741720
    },
    {
      "epoch": 4.48691764367026,
      "grad_norm": 0.2941514849662781,
      "learning_rate": 1.0329263390945094e-06,
      "loss": 0.0122,
      "step": 2741740
    },
    {
      "epoch": 4.486950374108914,
      "grad_norm": 0.4090133011341095,
      "learning_rate": 1.0328604468809922e-06,
      "loss": 0.0108,
      "step": 2741760
    },
    {
      "epoch": 4.486983104547567,
      "grad_norm": 0.3518930673599243,
      "learning_rate": 1.0327945546674751e-06,
      "loss": 0.0075,
      "step": 2741780
    },
    {
      "epoch": 4.48701583498622,
      "grad_norm": 0.3810108006000519,
      "learning_rate": 1.0327286624539579e-06,
      "loss": 0.0061,
      "step": 2741800
    },
    {
      "epoch": 4.487048565424874,
      "grad_norm": 0.14612720906734467,
      "learning_rate": 1.0326627702404406e-06,
      "loss": 0.0067,
      "step": 2741820
    },
    {
      "epoch": 4.487081295863527,
      "grad_norm": 0.3917023837566376,
      "learning_rate": 1.0325968780269238e-06,
      "loss": 0.0091,
      "step": 2741840
    },
    {
      "epoch": 4.48711402630218,
      "grad_norm": 0.04948849976062775,
      "learning_rate": 1.0325309858134065e-06,
      "loss": 0.0066,
      "step": 2741860
    },
    {
      "epoch": 4.487146756740834,
      "grad_norm": 0.1269901841878891,
      "learning_rate": 1.0324650935998895e-06,
      "loss": 0.0079,
      "step": 2741880
    },
    {
      "epoch": 4.487179487179487,
      "grad_norm": 0.17912915349006653,
      "learning_rate": 1.0323992013863722e-06,
      "loss": 0.0091,
      "step": 2741900
    },
    {
      "epoch": 4.487212217618141,
      "grad_norm": 0.17437197268009186,
      "learning_rate": 1.0323333091728551e-06,
      "loss": 0.0113,
      "step": 2741920
    },
    {
      "epoch": 4.4872449480567935,
      "grad_norm": 0.3888433277606964,
      "learning_rate": 1.032267416959338e-06,
      "loss": 0.0127,
      "step": 2741940
    },
    {
      "epoch": 4.487277678495447,
      "grad_norm": 0.9101288318634033,
      "learning_rate": 1.0322015247458208e-06,
      "loss": 0.011,
      "step": 2741960
    },
    {
      "epoch": 4.487310408934101,
      "grad_norm": 0.3913220167160034,
      "learning_rate": 1.0321356325323038e-06,
      "loss": 0.0117,
      "step": 2741980
    },
    {
      "epoch": 4.487343139372754,
      "grad_norm": 0.593207836151123,
      "learning_rate": 1.0320697403187867e-06,
      "loss": 0.0088,
      "step": 2742000
    },
    {
      "epoch": 4.487375869811407,
      "grad_norm": 0.27836379408836365,
      "learning_rate": 1.0320038481052695e-06,
      "loss": 0.0096,
      "step": 2742020
    },
    {
      "epoch": 4.487408600250061,
      "grad_norm": 0.10050296783447266,
      "learning_rate": 1.0319379558917524e-06,
      "loss": 0.0135,
      "step": 2742040
    },
    {
      "epoch": 4.487441330688714,
      "grad_norm": 0.05510731786489487,
      "learning_rate": 1.0318720636782352e-06,
      "loss": 0.0099,
      "step": 2742060
    },
    {
      "epoch": 4.487474061127367,
      "grad_norm": 0.14049415290355682,
      "learning_rate": 1.0318061714647181e-06,
      "loss": 0.0091,
      "step": 2742080
    },
    {
      "epoch": 4.4875067915660205,
      "grad_norm": 0.2169312685728073,
      "learning_rate": 1.031740279251201e-06,
      "loss": 0.0117,
      "step": 2742100
    },
    {
      "epoch": 4.487539522004674,
      "grad_norm": 0.11399419605731964,
      "learning_rate": 1.0316743870376838e-06,
      "loss": 0.0061,
      "step": 2742120
    },
    {
      "epoch": 4.487572252443327,
      "grad_norm": 0.21379804611206055,
      "learning_rate": 1.0316084948241668e-06,
      "loss": 0.009,
      "step": 2742140
    },
    {
      "epoch": 4.48760498288198,
      "grad_norm": 0.5514007210731506,
      "learning_rate": 1.0315426026106495e-06,
      "loss": 0.0116,
      "step": 2742160
    },
    {
      "epoch": 4.487637713320634,
      "grad_norm": 0.31592586636543274,
      "learning_rate": 1.0314767103971325e-06,
      "loss": 0.0106,
      "step": 2742180
    },
    {
      "epoch": 4.487670443759288,
      "grad_norm": 0.26009225845336914,
      "learning_rate": 1.0314108181836154e-06,
      "loss": 0.0081,
      "step": 2742200
    },
    {
      "epoch": 4.48770317419794,
      "grad_norm": 0.15537536144256592,
      "learning_rate": 1.0313449259700981e-06,
      "loss": 0.0137,
      "step": 2742220
    },
    {
      "epoch": 4.487735904636594,
      "grad_norm": 0.5851733684539795,
      "learning_rate": 1.031279033756581e-06,
      "loss": 0.006,
      "step": 2742240
    },
    {
      "epoch": 4.4877686350752475,
      "grad_norm": 0.11801334470510483,
      "learning_rate": 1.0312131415430638e-06,
      "loss": 0.0085,
      "step": 2742260
    },
    {
      "epoch": 4.487801365513901,
      "grad_norm": 0.13954591751098633,
      "learning_rate": 1.0311472493295468e-06,
      "loss": 0.0081,
      "step": 2742280
    },
    {
      "epoch": 4.487834095952554,
      "grad_norm": 0.17022563517093658,
      "learning_rate": 1.0310813571160297e-06,
      "loss": 0.0066,
      "step": 2742300
    },
    {
      "epoch": 4.487866826391207,
      "grad_norm": 0.3101479113101959,
      "learning_rate": 1.0310154649025125e-06,
      "loss": 0.0081,
      "step": 2742320
    },
    {
      "epoch": 4.487899556829861,
      "grad_norm": 0.27565452456474304,
      "learning_rate": 1.0309495726889954e-06,
      "loss": 0.0064,
      "step": 2742340
    },
    {
      "epoch": 4.487932287268514,
      "grad_norm": 0.33114588260650635,
      "learning_rate": 1.0308836804754784e-06,
      "loss": 0.0111,
      "step": 2742360
    },
    {
      "epoch": 4.487965017707167,
      "grad_norm": 0.24898101389408112,
      "learning_rate": 1.0308177882619611e-06,
      "loss": 0.0107,
      "step": 2742380
    },
    {
      "epoch": 4.487997748145821,
      "grad_norm": 0.17971299588680267,
      "learning_rate": 1.030751896048444e-06,
      "loss": 0.0085,
      "step": 2742400
    },
    {
      "epoch": 4.488030478584474,
      "grad_norm": 0.3118128776550293,
      "learning_rate": 1.0306860038349268e-06,
      "loss": 0.012,
      "step": 2742420
    },
    {
      "epoch": 4.488063209023127,
      "grad_norm": 0.637735903263092,
      "learning_rate": 1.0306201116214098e-06,
      "loss": 0.01,
      "step": 2742440
    },
    {
      "epoch": 4.488095939461781,
      "grad_norm": 0.20128421485424042,
      "learning_rate": 1.0305542194078927e-06,
      "loss": 0.008,
      "step": 2742460
    },
    {
      "epoch": 4.488128669900434,
      "grad_norm": 0.11780761182308197,
      "learning_rate": 1.0304883271943755e-06,
      "loss": 0.0064,
      "step": 2742480
    },
    {
      "epoch": 4.488161400339087,
      "grad_norm": 0.3750409483909607,
      "learning_rate": 1.0304224349808584e-06,
      "loss": 0.0063,
      "step": 2742500
    },
    {
      "epoch": 4.488194130777741,
      "grad_norm": 0.17632558941841125,
      "learning_rate": 1.0303565427673411e-06,
      "loss": 0.0071,
      "step": 2742520
    },
    {
      "epoch": 4.488226861216394,
      "grad_norm": 0.25683268904685974,
      "learning_rate": 1.030290650553824e-06,
      "loss": 0.0081,
      "step": 2742540
    },
    {
      "epoch": 4.488259591655047,
      "grad_norm": 0.8149376511573792,
      "learning_rate": 1.030224758340307e-06,
      "loss": 0.0071,
      "step": 2742560
    },
    {
      "epoch": 4.4882923220937005,
      "grad_norm": 0.18895652890205383,
      "learning_rate": 1.0301588661267898e-06,
      "loss": 0.0075,
      "step": 2742580
    },
    {
      "epoch": 4.488325052532354,
      "grad_norm": 0.263126939535141,
      "learning_rate": 1.0300929739132727e-06,
      "loss": 0.0123,
      "step": 2742600
    },
    {
      "epoch": 4.488357782971008,
      "grad_norm": 0.29611825942993164,
      "learning_rate": 1.0300270816997557e-06,
      "loss": 0.0084,
      "step": 2742620
    },
    {
      "epoch": 4.48839051340966,
      "grad_norm": 0.23934508860111237,
      "learning_rate": 1.0299611894862384e-06,
      "loss": 0.0122,
      "step": 2742640
    },
    {
      "epoch": 4.488423243848314,
      "grad_norm": 0.5513468980789185,
      "learning_rate": 1.0298952972727214e-06,
      "loss": 0.0133,
      "step": 2742660
    },
    {
      "epoch": 4.488455974286968,
      "grad_norm": 0.2984282672405243,
      "learning_rate": 1.0298294050592041e-06,
      "loss": 0.0095,
      "step": 2742680
    },
    {
      "epoch": 4.48848870472562,
      "grad_norm": 0.4308203160762787,
      "learning_rate": 1.029763512845687e-06,
      "loss": 0.0123,
      "step": 2742700
    },
    {
      "epoch": 4.488521435164274,
      "grad_norm": 0.12590932846069336,
      "learning_rate": 1.02969762063217e-06,
      "loss": 0.0098,
      "step": 2742720
    },
    {
      "epoch": 4.4885541656029275,
      "grad_norm": 0.141411691904068,
      "learning_rate": 1.0296317284186528e-06,
      "loss": 0.0083,
      "step": 2742740
    },
    {
      "epoch": 4.488586896041581,
      "grad_norm": 0.23556511104106903,
      "learning_rate": 1.0295658362051357e-06,
      "loss": 0.0069,
      "step": 2742760
    },
    {
      "epoch": 4.488619626480234,
      "grad_norm": 0.30109497904777527,
      "learning_rate": 1.0294999439916185e-06,
      "loss": 0.0088,
      "step": 2742780
    },
    {
      "epoch": 4.488652356918887,
      "grad_norm": 0.12888450920581818,
      "learning_rate": 1.0294340517781016e-06,
      "loss": 0.0109,
      "step": 2742800
    },
    {
      "epoch": 4.488685087357541,
      "grad_norm": 0.3521900177001953,
      "learning_rate": 1.0293681595645844e-06,
      "loss": 0.0133,
      "step": 2742820
    },
    {
      "epoch": 4.488717817796194,
      "grad_norm": 0.25483644008636475,
      "learning_rate": 1.029302267351067e-06,
      "loss": 0.0092,
      "step": 2742840
    },
    {
      "epoch": 4.488750548234847,
      "grad_norm": 0.10777823626995087,
      "learning_rate": 1.02923637513755e-06,
      "loss": 0.0075,
      "step": 2742860
    },
    {
      "epoch": 4.488783278673501,
      "grad_norm": 0.19596146047115326,
      "learning_rate": 1.029170482924033e-06,
      "loss": 0.0098,
      "step": 2742880
    },
    {
      "epoch": 4.4888160091121545,
      "grad_norm": 0.2834562659263611,
      "learning_rate": 1.029104590710516e-06,
      "loss": 0.0073,
      "step": 2742900
    },
    {
      "epoch": 4.488848739550807,
      "grad_norm": 0.6788370013237,
      "learning_rate": 1.0290386984969987e-06,
      "loss": 0.0083,
      "step": 2742920
    },
    {
      "epoch": 4.488881469989461,
      "grad_norm": 0.8395939469337463,
      "learning_rate": 1.0289728062834814e-06,
      "loss": 0.0154,
      "step": 2742940
    },
    {
      "epoch": 4.488914200428114,
      "grad_norm": 0.7869133353233337,
      "learning_rate": 1.0289069140699644e-06,
      "loss": 0.0067,
      "step": 2742960
    },
    {
      "epoch": 4.488946930866767,
      "grad_norm": 0.3539069592952728,
      "learning_rate": 1.0288410218564473e-06,
      "loss": 0.0069,
      "step": 2742980
    },
    {
      "epoch": 4.488979661305421,
      "grad_norm": 0.10091181099414825,
      "learning_rate": 1.0287751296429303e-06,
      "loss": 0.0094,
      "step": 2743000
    },
    {
      "epoch": 4.489012391744074,
      "grad_norm": 0.181895449757576,
      "learning_rate": 1.028709237429413e-06,
      "loss": 0.0086,
      "step": 2743020
    },
    {
      "epoch": 4.489045122182728,
      "grad_norm": 0.11519423127174377,
      "learning_rate": 1.0286433452158958e-06,
      "loss": 0.0125,
      "step": 2743040
    },
    {
      "epoch": 4.489077852621381,
      "grad_norm": 0.3018166422843933,
      "learning_rate": 1.028577453002379e-06,
      "loss": 0.0068,
      "step": 2743060
    },
    {
      "epoch": 4.489110583060034,
      "grad_norm": 0.1647893637418747,
      "learning_rate": 1.0285115607888617e-06,
      "loss": 0.0051,
      "step": 2743080
    },
    {
      "epoch": 4.489143313498688,
      "grad_norm": 0.15759523212909698,
      "learning_rate": 1.0284456685753446e-06,
      "loss": 0.0071,
      "step": 2743100
    },
    {
      "epoch": 4.4891760439373405,
      "grad_norm": 0.3345235586166382,
      "learning_rate": 1.0283797763618274e-06,
      "loss": 0.0065,
      "step": 2743120
    },
    {
      "epoch": 4.489208774375994,
      "grad_norm": 0.3723246455192566,
      "learning_rate": 1.02831388414831e-06,
      "loss": 0.0104,
      "step": 2743140
    },
    {
      "epoch": 4.489241504814648,
      "grad_norm": 0.19150906801223755,
      "learning_rate": 1.0282479919347933e-06,
      "loss": 0.0085,
      "step": 2743160
    },
    {
      "epoch": 4.489274235253301,
      "grad_norm": 0.3867441415786743,
      "learning_rate": 1.028182099721276e-06,
      "loss": 0.008,
      "step": 2743180
    },
    {
      "epoch": 4.489306965691954,
      "grad_norm": 0.4088573753833771,
      "learning_rate": 1.028116207507759e-06,
      "loss": 0.011,
      "step": 2743200
    },
    {
      "epoch": 4.489339696130608,
      "grad_norm": 0.1373523771762848,
      "learning_rate": 1.0280503152942417e-06,
      "loss": 0.0089,
      "step": 2743220
    },
    {
      "epoch": 4.489372426569261,
      "grad_norm": 0.16646479070186615,
      "learning_rate": 1.0279844230807246e-06,
      "loss": 0.009,
      "step": 2743240
    },
    {
      "epoch": 4.489405157007914,
      "grad_norm": 0.42689815163612366,
      "learning_rate": 1.0279185308672076e-06,
      "loss": 0.0114,
      "step": 2743260
    },
    {
      "epoch": 4.4894378874465675,
      "grad_norm": 1.550241470336914,
      "learning_rate": 1.0278526386536903e-06,
      "loss": 0.0108,
      "step": 2743280
    },
    {
      "epoch": 4.489470617885221,
      "grad_norm": 0.369662344455719,
      "learning_rate": 1.0277867464401733e-06,
      "loss": 0.0065,
      "step": 2743300
    },
    {
      "epoch": 4.489503348323874,
      "grad_norm": 0.3077125549316406,
      "learning_rate": 1.0277208542266562e-06,
      "loss": 0.0138,
      "step": 2743320
    },
    {
      "epoch": 4.489536078762527,
      "grad_norm": 0.30923452973365784,
      "learning_rate": 1.027654962013139e-06,
      "loss": 0.0089,
      "step": 2743340
    },
    {
      "epoch": 4.489568809201181,
      "grad_norm": 0.20482082664966583,
      "learning_rate": 1.027589069799622e-06,
      "loss": 0.0091,
      "step": 2743360
    },
    {
      "epoch": 4.4896015396398345,
      "grad_norm": 0.2790738344192505,
      "learning_rate": 1.0275231775861047e-06,
      "loss": 0.0087,
      "step": 2743380
    },
    {
      "epoch": 4.489634270078487,
      "grad_norm": 0.22446124255657196,
      "learning_rate": 1.0274572853725876e-06,
      "loss": 0.0066,
      "step": 2743400
    },
    {
      "epoch": 4.489667000517141,
      "grad_norm": 0.26683664321899414,
      "learning_rate": 1.0273913931590706e-06,
      "loss": 0.006,
      "step": 2743420
    },
    {
      "epoch": 4.489699730955794,
      "grad_norm": 0.1472352147102356,
      "learning_rate": 1.0273255009455533e-06,
      "loss": 0.0069,
      "step": 2743440
    },
    {
      "epoch": 4.489732461394448,
      "grad_norm": 0.1237841323018074,
      "learning_rate": 1.0272596087320363e-06,
      "loss": 0.0126,
      "step": 2743460
    },
    {
      "epoch": 4.489765191833101,
      "grad_norm": 0.11758794635534286,
      "learning_rate": 1.027193716518519e-06,
      "loss": 0.0081,
      "step": 2743480
    },
    {
      "epoch": 4.489797922271754,
      "grad_norm": 0.13517925143241882,
      "learning_rate": 1.027127824305002e-06,
      "loss": 0.0092,
      "step": 2743500
    },
    {
      "epoch": 4.489830652710408,
      "grad_norm": 0.20002596080303192,
      "learning_rate": 1.027061932091485e-06,
      "loss": 0.008,
      "step": 2743520
    },
    {
      "epoch": 4.489863383149061,
      "grad_norm": 0.3962298631668091,
      "learning_rate": 1.0269960398779677e-06,
      "loss": 0.0099,
      "step": 2743540
    },
    {
      "epoch": 4.489896113587714,
      "grad_norm": 0.0994565486907959,
      "learning_rate": 1.0269301476644506e-06,
      "loss": 0.0117,
      "step": 2743560
    },
    {
      "epoch": 4.489928844026368,
      "grad_norm": 0.5848577618598938,
      "learning_rate": 1.0268642554509333e-06,
      "loss": 0.0074,
      "step": 2743580
    },
    {
      "epoch": 4.4899615744650205,
      "grad_norm": 0.12441778182983398,
      "learning_rate": 1.0267983632374163e-06,
      "loss": 0.0097,
      "step": 2743600
    },
    {
      "epoch": 4.489994304903674,
      "grad_norm": 0.32896193861961365,
      "learning_rate": 1.0267324710238992e-06,
      "loss": 0.0082,
      "step": 2743620
    },
    {
      "epoch": 4.490027035342328,
      "grad_norm": 0.041822656989097595,
      "learning_rate": 1.026666578810382e-06,
      "loss": 0.008,
      "step": 2743640
    },
    {
      "epoch": 4.490059765780981,
      "grad_norm": 0.24908824265003204,
      "learning_rate": 1.026600686596865e-06,
      "loss": 0.0095,
      "step": 2743660
    },
    {
      "epoch": 4.490092496219634,
      "grad_norm": 0.12362024933099747,
      "learning_rate": 1.0265347943833479e-06,
      "loss": 0.0065,
      "step": 2743680
    },
    {
      "epoch": 4.490125226658288,
      "grad_norm": 0.48814892768859863,
      "learning_rate": 1.0264689021698306e-06,
      "loss": 0.0082,
      "step": 2743700
    },
    {
      "epoch": 4.490157957096941,
      "grad_norm": 0.2901630699634552,
      "learning_rate": 1.0264030099563136e-06,
      "loss": 0.0138,
      "step": 2743720
    },
    {
      "epoch": 4.490190687535595,
      "grad_norm": 0.06797217577695847,
      "learning_rate": 1.0263371177427963e-06,
      "loss": 0.0077,
      "step": 2743740
    },
    {
      "epoch": 4.4902234179742475,
      "grad_norm": 0.15932981669902802,
      "learning_rate": 1.0262712255292793e-06,
      "loss": 0.008,
      "step": 2743760
    },
    {
      "epoch": 4.490256148412901,
      "grad_norm": 0.1912536770105362,
      "learning_rate": 1.0262053333157622e-06,
      "loss": 0.0109,
      "step": 2743780
    },
    {
      "epoch": 4.490288878851555,
      "grad_norm": 0.04769037663936615,
      "learning_rate": 1.026139441102245e-06,
      "loss": 0.0088,
      "step": 2743800
    },
    {
      "epoch": 4.490321609290207,
      "grad_norm": 0.5561068654060364,
      "learning_rate": 1.026073548888728e-06,
      "loss": 0.0094,
      "step": 2743820
    },
    {
      "epoch": 4.490354339728861,
      "grad_norm": 0.4469715654850006,
      "learning_rate": 1.0260076566752107e-06,
      "loss": 0.0094,
      "step": 2743840
    },
    {
      "epoch": 4.490387070167515,
      "grad_norm": 0.16429659724235535,
      "learning_rate": 1.0259417644616936e-06,
      "loss": 0.0098,
      "step": 2743860
    },
    {
      "epoch": 4.490419800606167,
      "grad_norm": 0.767571747303009,
      "learning_rate": 1.0258758722481766e-06,
      "loss": 0.0111,
      "step": 2743880
    },
    {
      "epoch": 4.490452531044821,
      "grad_norm": 0.20496013760566711,
      "learning_rate": 1.0258099800346593e-06,
      "loss": 0.0094,
      "step": 2743900
    },
    {
      "epoch": 4.4904852614834745,
      "grad_norm": 0.15083779394626617,
      "learning_rate": 1.0257440878211422e-06,
      "loss": 0.0076,
      "step": 2743920
    },
    {
      "epoch": 4.490517991922128,
      "grad_norm": 0.3674751818180084,
      "learning_rate": 1.0256781956076252e-06,
      "loss": 0.0078,
      "step": 2743940
    },
    {
      "epoch": 4.490550722360781,
      "grad_norm": 0.22842608392238617,
      "learning_rate": 1.025612303394108e-06,
      "loss": 0.0095,
      "step": 2743960
    },
    {
      "epoch": 4.490583452799434,
      "grad_norm": 0.289119690656662,
      "learning_rate": 1.0255464111805909e-06,
      "loss": 0.0075,
      "step": 2743980
    },
    {
      "epoch": 4.490616183238088,
      "grad_norm": 0.2604652941226959,
      "learning_rate": 1.0254805189670736e-06,
      "loss": 0.0115,
      "step": 2744000
    },
    {
      "epoch": 4.490648913676741,
      "grad_norm": 0.3228606581687927,
      "learning_rate": 1.0254146267535566e-06,
      "loss": 0.0069,
      "step": 2744020
    },
    {
      "epoch": 4.490681644115394,
      "grad_norm": 0.22310473024845123,
      "learning_rate": 1.0253487345400395e-06,
      "loss": 0.0066,
      "step": 2744040
    },
    {
      "epoch": 4.490714374554048,
      "grad_norm": 0.17523744702339172,
      "learning_rate": 1.0252828423265223e-06,
      "loss": 0.0082,
      "step": 2744060
    },
    {
      "epoch": 4.4907471049927015,
      "grad_norm": 0.6366909742355347,
      "learning_rate": 1.0252169501130052e-06,
      "loss": 0.0072,
      "step": 2744080
    },
    {
      "epoch": 4.490779835431354,
      "grad_norm": 0.25712302327156067,
      "learning_rate": 1.025151057899488e-06,
      "loss": 0.009,
      "step": 2744100
    },
    {
      "epoch": 4.490812565870008,
      "grad_norm": 0.41496729850769043,
      "learning_rate": 1.0250851656859711e-06,
      "loss": 0.0078,
      "step": 2744120
    },
    {
      "epoch": 4.490845296308661,
      "grad_norm": 0.29619452357292175,
      "learning_rate": 1.0250192734724539e-06,
      "loss": 0.0094,
      "step": 2744140
    },
    {
      "epoch": 4.490878026747314,
      "grad_norm": 0.3060853183269501,
      "learning_rate": 1.0249533812589366e-06,
      "loss": 0.0085,
      "step": 2744160
    },
    {
      "epoch": 4.490910757185968,
      "grad_norm": 0.04701339825987816,
      "learning_rate": 1.0248874890454196e-06,
      "loss": 0.0096,
      "step": 2744180
    },
    {
      "epoch": 4.490943487624621,
      "grad_norm": 0.045847337692976,
      "learning_rate": 1.0248215968319025e-06,
      "loss": 0.006,
      "step": 2744200
    },
    {
      "epoch": 4.490976218063275,
      "grad_norm": 0.19868697226047516,
      "learning_rate": 1.0247557046183855e-06,
      "loss": 0.01,
      "step": 2744220
    },
    {
      "epoch": 4.4910089485019276,
      "grad_norm": 0.251473605632782,
      "learning_rate": 1.0246898124048682e-06,
      "loss": 0.0105,
      "step": 2744240
    },
    {
      "epoch": 4.491041678940581,
      "grad_norm": 0.03602084517478943,
      "learning_rate": 1.024623920191351e-06,
      "loss": 0.0087,
      "step": 2744260
    },
    {
      "epoch": 4.491074409379235,
      "grad_norm": 0.16999514400959015,
      "learning_rate": 1.0245580279778339e-06,
      "loss": 0.0096,
      "step": 2744280
    },
    {
      "epoch": 4.4911071398178875,
      "grad_norm": 0.43554332852363586,
      "learning_rate": 1.0244921357643168e-06,
      "loss": 0.012,
      "step": 2744300
    },
    {
      "epoch": 4.491139870256541,
      "grad_norm": 0.09942013025283813,
      "learning_rate": 1.0244262435507998e-06,
      "loss": 0.0098,
      "step": 2744320
    },
    {
      "epoch": 4.491172600695195,
      "grad_norm": 0.2197510302066803,
      "learning_rate": 1.0243603513372825e-06,
      "loss": 0.0057,
      "step": 2744340
    },
    {
      "epoch": 4.491205331133848,
      "grad_norm": 0.48581090569496155,
      "learning_rate": 1.0242944591237653e-06,
      "loss": 0.0096,
      "step": 2744360
    },
    {
      "epoch": 4.491238061572501,
      "grad_norm": 0.05426562950015068,
      "learning_rate": 1.0242285669102484e-06,
      "loss": 0.0104,
      "step": 2744380
    },
    {
      "epoch": 4.4912707920111545,
      "grad_norm": 0.17029322683811188,
      "learning_rate": 1.0241626746967312e-06,
      "loss": 0.0105,
      "step": 2744400
    },
    {
      "epoch": 4.491303522449808,
      "grad_norm": 0.2537519633769989,
      "learning_rate": 1.0240967824832141e-06,
      "loss": 0.0083,
      "step": 2744420
    },
    {
      "epoch": 4.491336252888461,
      "grad_norm": 0.27343299984931946,
      "learning_rate": 1.0240308902696969e-06,
      "loss": 0.009,
      "step": 2744440
    },
    {
      "epoch": 4.491368983327114,
      "grad_norm": 0.2297646850347519,
      "learning_rate": 1.0239649980561798e-06,
      "loss": 0.0114,
      "step": 2744460
    },
    {
      "epoch": 4.491401713765768,
      "grad_norm": 0.22053325176239014,
      "learning_rate": 1.0238991058426628e-06,
      "loss": 0.0101,
      "step": 2744480
    },
    {
      "epoch": 4.491434444204422,
      "grad_norm": 0.15268787741661072,
      "learning_rate": 1.0238332136291455e-06,
      "loss": 0.0147,
      "step": 2744500
    },
    {
      "epoch": 4.491467174643074,
      "grad_norm": 0.1770443618297577,
      "learning_rate": 1.0237673214156285e-06,
      "loss": 0.0107,
      "step": 2744520
    },
    {
      "epoch": 4.491499905081728,
      "grad_norm": 0.1671815812587738,
      "learning_rate": 1.0237014292021112e-06,
      "loss": 0.0055,
      "step": 2744540
    },
    {
      "epoch": 4.4915326355203815,
      "grad_norm": 0.28605180978775024,
      "learning_rate": 1.0236355369885942e-06,
      "loss": 0.0124,
      "step": 2744560
    },
    {
      "epoch": 4.491565365959034,
      "grad_norm": 0.20246578752994537,
      "learning_rate": 1.0235696447750771e-06,
      "loss": 0.0095,
      "step": 2744580
    },
    {
      "epoch": 4.491598096397688,
      "grad_norm": 0.18430304527282715,
      "learning_rate": 1.0235037525615598e-06,
      "loss": 0.0086,
      "step": 2744600
    },
    {
      "epoch": 4.491630826836341,
      "grad_norm": 0.19562941789627075,
      "learning_rate": 1.0234378603480428e-06,
      "loss": 0.0076,
      "step": 2744620
    },
    {
      "epoch": 4.491663557274995,
      "grad_norm": 0.12470049411058426,
      "learning_rate": 1.0233719681345257e-06,
      "loss": 0.0127,
      "step": 2744640
    },
    {
      "epoch": 4.491696287713648,
      "grad_norm": 0.0913977175951004,
      "learning_rate": 1.0233060759210085e-06,
      "loss": 0.0102,
      "step": 2744660
    },
    {
      "epoch": 4.491729018152301,
      "grad_norm": 0.33757224678993225,
      "learning_rate": 1.0232401837074914e-06,
      "loss": 0.0069,
      "step": 2744680
    },
    {
      "epoch": 4.491761748590955,
      "grad_norm": 0.14378491044044495,
      "learning_rate": 1.0231742914939742e-06,
      "loss": 0.0089,
      "step": 2744700
    },
    {
      "epoch": 4.491794479029608,
      "grad_norm": 0.15430784225463867,
      "learning_rate": 1.0231083992804571e-06,
      "loss": 0.0085,
      "step": 2744720
    },
    {
      "epoch": 4.491827209468261,
      "grad_norm": 0.34299150109291077,
      "learning_rate": 1.02304250706694e-06,
      "loss": 0.0079,
      "step": 2744740
    },
    {
      "epoch": 4.491859939906915,
      "grad_norm": 0.9992644786834717,
      "learning_rate": 1.0229766148534228e-06,
      "loss": 0.0071,
      "step": 2744760
    },
    {
      "epoch": 4.491892670345568,
      "grad_norm": 0.11879384517669678,
      "learning_rate": 1.0229107226399058e-06,
      "loss": 0.0093,
      "step": 2744780
    },
    {
      "epoch": 4.491925400784221,
      "grad_norm": 0.34489041566848755,
      "learning_rate": 1.0228448304263885e-06,
      "loss": 0.0098,
      "step": 2744800
    },
    {
      "epoch": 4.491958131222875,
      "grad_norm": 0.3041260540485382,
      "learning_rate": 1.0227789382128715e-06,
      "loss": 0.0128,
      "step": 2744820
    },
    {
      "epoch": 4.491990861661528,
      "grad_norm": 0.22317266464233398,
      "learning_rate": 1.0227130459993544e-06,
      "loss": 0.011,
      "step": 2744840
    },
    {
      "epoch": 4.492023592100181,
      "grad_norm": 0.6635573506355286,
      "learning_rate": 1.0226471537858372e-06,
      "loss": 0.0123,
      "step": 2744860
    },
    {
      "epoch": 4.492056322538835,
      "grad_norm": 0.20820173621177673,
      "learning_rate": 1.0225812615723201e-06,
      "loss": 0.0089,
      "step": 2744880
    },
    {
      "epoch": 4.492089052977488,
      "grad_norm": 0.09857779741287231,
      "learning_rate": 1.022515369358803e-06,
      "loss": 0.008,
      "step": 2744900
    },
    {
      "epoch": 4.492121783416142,
      "grad_norm": 0.4588702917098999,
      "learning_rate": 1.0224494771452858e-06,
      "loss": 0.0105,
      "step": 2744920
    },
    {
      "epoch": 4.4921545138547945,
      "grad_norm": 0.35381296277046204,
      "learning_rate": 1.0223835849317688e-06,
      "loss": 0.0096,
      "step": 2744940
    },
    {
      "epoch": 4.492187244293448,
      "grad_norm": 0.08912906050682068,
      "learning_rate": 1.0223176927182515e-06,
      "loss": 0.0092,
      "step": 2744960
    },
    {
      "epoch": 4.492219974732102,
      "grad_norm": 0.4088441729545593,
      "learning_rate": 1.0222518005047344e-06,
      "loss": 0.0091,
      "step": 2744980
    },
    {
      "epoch": 4.492252705170754,
      "grad_norm": 0.3960193693637848,
      "learning_rate": 1.0221859082912174e-06,
      "loss": 0.0136,
      "step": 2745000
    },
    {
      "epoch": 4.492285435609408,
      "grad_norm": 0.18732911348342896,
      "learning_rate": 1.0221200160777001e-06,
      "loss": 0.0092,
      "step": 2745020
    },
    {
      "epoch": 4.492318166048062,
      "grad_norm": 0.14882710576057434,
      "learning_rate": 1.022054123864183e-06,
      "loss": 0.0079,
      "step": 2745040
    },
    {
      "epoch": 4.492350896486714,
      "grad_norm": 0.7122255563735962,
      "learning_rate": 1.0219882316506658e-06,
      "loss": 0.0126,
      "step": 2745060
    },
    {
      "epoch": 4.492383626925368,
      "grad_norm": 0.17746461927890778,
      "learning_rate": 1.0219223394371488e-06,
      "loss": 0.0084,
      "step": 2745080
    },
    {
      "epoch": 4.4924163573640215,
      "grad_norm": 0.6267271637916565,
      "learning_rate": 1.0218564472236317e-06,
      "loss": 0.0098,
      "step": 2745100
    },
    {
      "epoch": 4.492449087802675,
      "grad_norm": 0.09902419149875641,
      "learning_rate": 1.0217905550101145e-06,
      "loss": 0.0067,
      "step": 2745120
    },
    {
      "epoch": 4.492481818241328,
      "grad_norm": 0.23930273950099945,
      "learning_rate": 1.0217246627965974e-06,
      "loss": 0.0074,
      "step": 2745140
    },
    {
      "epoch": 4.492514548679981,
      "grad_norm": 0.26309555768966675,
      "learning_rate": 1.0216587705830802e-06,
      "loss": 0.0123,
      "step": 2745160
    },
    {
      "epoch": 4.492547279118635,
      "grad_norm": 0.10212460160255432,
      "learning_rate": 1.0215928783695631e-06,
      "loss": 0.0111,
      "step": 2745180
    },
    {
      "epoch": 4.4925800095572885,
      "grad_norm": 0.7820900678634644,
      "learning_rate": 1.021526986156046e-06,
      "loss": 0.0159,
      "step": 2745200
    },
    {
      "epoch": 4.492612739995941,
      "grad_norm": 0.5533556342124939,
      "learning_rate": 1.0214610939425288e-06,
      "loss": 0.0054,
      "step": 2745220
    },
    {
      "epoch": 4.492645470434595,
      "grad_norm": 0.09385844320058823,
      "learning_rate": 1.0213952017290118e-06,
      "loss": 0.0109,
      "step": 2745240
    },
    {
      "epoch": 4.492678200873248,
      "grad_norm": 0.25505223870277405,
      "learning_rate": 1.0213293095154947e-06,
      "loss": 0.0102,
      "step": 2745260
    },
    {
      "epoch": 4.492710931311901,
      "grad_norm": 0.19398696720600128,
      "learning_rate": 1.0212634173019774e-06,
      "loss": 0.0079,
      "step": 2745280
    },
    {
      "epoch": 4.492743661750555,
      "grad_norm": 0.3844138979911804,
      "learning_rate": 1.0211975250884604e-06,
      "loss": 0.013,
      "step": 2745300
    },
    {
      "epoch": 4.492776392189208,
      "grad_norm": 0.09166307747364044,
      "learning_rate": 1.0211316328749431e-06,
      "loss": 0.0062,
      "step": 2745320
    },
    {
      "epoch": 4.492809122627861,
      "grad_norm": 0.27315711975097656,
      "learning_rate": 1.021065740661426e-06,
      "loss": 0.0089,
      "step": 2745340
    },
    {
      "epoch": 4.492841853066515,
      "grad_norm": 0.2281273901462555,
      "learning_rate": 1.020999848447909e-06,
      "loss": 0.008,
      "step": 2745360
    },
    {
      "epoch": 4.492874583505168,
      "grad_norm": 0.3165992498397827,
      "learning_rate": 1.0209339562343918e-06,
      "loss": 0.0103,
      "step": 2745380
    },
    {
      "epoch": 4.492907313943822,
      "grad_norm": 0.0722498670220375,
      "learning_rate": 1.0208680640208747e-06,
      "loss": 0.0111,
      "step": 2745400
    },
    {
      "epoch": 4.4929400443824745,
      "grad_norm": 0.09734132885932922,
      "learning_rate": 1.0208021718073575e-06,
      "loss": 0.0136,
      "step": 2745420
    },
    {
      "epoch": 4.492972774821128,
      "grad_norm": 0.3192872405052185,
      "learning_rate": 1.0207362795938406e-06,
      "loss": 0.0089,
      "step": 2745440
    },
    {
      "epoch": 4.493005505259782,
      "grad_norm": 0.28339889645576477,
      "learning_rate": 1.0206703873803234e-06,
      "loss": 0.0055,
      "step": 2745460
    },
    {
      "epoch": 4.493038235698435,
      "grad_norm": 0.18568505346775055,
      "learning_rate": 1.0206044951668061e-06,
      "loss": 0.0085,
      "step": 2745480
    },
    {
      "epoch": 4.493070966137088,
      "grad_norm": 0.12844333052635193,
      "learning_rate": 1.020538602953289e-06,
      "loss": 0.0095,
      "step": 2745500
    },
    {
      "epoch": 4.493103696575742,
      "grad_norm": 0.12257406860589981,
      "learning_rate": 1.020472710739772e-06,
      "loss": 0.0097,
      "step": 2745520
    },
    {
      "epoch": 4.493136427014395,
      "grad_norm": 0.20769809186458588,
      "learning_rate": 1.020406818526255e-06,
      "loss": 0.0076,
      "step": 2745540
    },
    {
      "epoch": 4.493169157453048,
      "grad_norm": 0.35776662826538086,
      "learning_rate": 1.0203409263127377e-06,
      "loss": 0.0112,
      "step": 2745560
    },
    {
      "epoch": 4.4932018878917015,
      "grad_norm": 0.130308598279953,
      "learning_rate": 1.0202750340992204e-06,
      "loss": 0.0099,
      "step": 2745580
    },
    {
      "epoch": 4.493234618330355,
      "grad_norm": 0.24440477788448334,
      "learning_rate": 1.0202091418857034e-06,
      "loss": 0.0091,
      "step": 2745600
    },
    {
      "epoch": 4.493267348769008,
      "grad_norm": 0.09615697711706161,
      "learning_rate": 1.0201432496721863e-06,
      "loss": 0.0067,
      "step": 2745620
    },
    {
      "epoch": 4.493300079207661,
      "grad_norm": 0.2627505362033844,
      "learning_rate": 1.0200773574586693e-06,
      "loss": 0.0079,
      "step": 2745640
    },
    {
      "epoch": 4.493332809646315,
      "grad_norm": 0.21542960405349731,
      "learning_rate": 1.020011465245152e-06,
      "loss": 0.0099,
      "step": 2745660
    },
    {
      "epoch": 4.493365540084969,
      "grad_norm": 0.5026883482933044,
      "learning_rate": 1.0199455730316348e-06,
      "loss": 0.0089,
      "step": 2745680
    },
    {
      "epoch": 4.493398270523621,
      "grad_norm": 0.12044601142406464,
      "learning_rate": 1.019879680818118e-06,
      "loss": 0.0084,
      "step": 2745700
    },
    {
      "epoch": 4.493431000962275,
      "grad_norm": 0.3780139982700348,
      "learning_rate": 1.0198137886046007e-06,
      "loss": 0.0081,
      "step": 2745720
    },
    {
      "epoch": 4.4934637314009285,
      "grad_norm": 0.1347549557685852,
      "learning_rate": 1.0197478963910836e-06,
      "loss": 0.0054,
      "step": 2745740
    },
    {
      "epoch": 4.493496461839581,
      "grad_norm": 0.14372144639492035,
      "learning_rate": 1.0196820041775664e-06,
      "loss": 0.0077,
      "step": 2745760
    },
    {
      "epoch": 4.493529192278235,
      "grad_norm": 0.12226448953151703,
      "learning_rate": 1.0196161119640493e-06,
      "loss": 0.0088,
      "step": 2745780
    },
    {
      "epoch": 4.493561922716888,
      "grad_norm": 0.27972331643104553,
      "learning_rate": 1.0195502197505323e-06,
      "loss": 0.0152,
      "step": 2745800
    },
    {
      "epoch": 4.493594653155542,
      "grad_norm": 0.17458927631378174,
      "learning_rate": 1.019484327537015e-06,
      "loss": 0.0101,
      "step": 2745820
    },
    {
      "epoch": 4.493627383594195,
      "grad_norm": 0.07497600466012955,
      "learning_rate": 1.019418435323498e-06,
      "loss": 0.0066,
      "step": 2745840
    },
    {
      "epoch": 4.493660114032848,
      "grad_norm": 0.21464169025421143,
      "learning_rate": 1.0193525431099807e-06,
      "loss": 0.009,
      "step": 2745860
    },
    {
      "epoch": 4.493692844471502,
      "grad_norm": 0.5235052108764648,
      "learning_rate": 1.0192866508964637e-06,
      "loss": 0.0105,
      "step": 2745880
    },
    {
      "epoch": 4.493725574910155,
      "grad_norm": 0.20153452455997467,
      "learning_rate": 1.0192207586829466e-06,
      "loss": 0.0087,
      "step": 2745900
    },
    {
      "epoch": 4.493758305348808,
      "grad_norm": 0.05094612389802933,
      "learning_rate": 1.0191548664694294e-06,
      "loss": 0.0102,
      "step": 2745920
    },
    {
      "epoch": 4.493791035787462,
      "grad_norm": 0.2753596305847168,
      "learning_rate": 1.0190889742559123e-06,
      "loss": 0.007,
      "step": 2745940
    },
    {
      "epoch": 4.493823766226115,
      "grad_norm": 0.2814740240573883,
      "learning_rate": 1.0190230820423953e-06,
      "loss": 0.0134,
      "step": 2745960
    },
    {
      "epoch": 4.493856496664768,
      "grad_norm": 0.44664573669433594,
      "learning_rate": 1.018957189828878e-06,
      "loss": 0.01,
      "step": 2745980
    },
    {
      "epoch": 4.493889227103422,
      "grad_norm": 0.6017796993255615,
      "learning_rate": 1.018891297615361e-06,
      "loss": 0.0124,
      "step": 2746000
    },
    {
      "epoch": 4.493921957542075,
      "grad_norm": 0.08118116110563278,
      "learning_rate": 1.0188254054018437e-06,
      "loss": 0.0121,
      "step": 2746020
    },
    {
      "epoch": 4.493954687980728,
      "grad_norm": 0.453535258769989,
      "learning_rate": 1.0187595131883266e-06,
      "loss": 0.0086,
      "step": 2746040
    },
    {
      "epoch": 4.4939874184193815,
      "grad_norm": 0.10545217245817184,
      "learning_rate": 1.0186936209748096e-06,
      "loss": 0.0076,
      "step": 2746060
    },
    {
      "epoch": 4.494020148858035,
      "grad_norm": 0.17181912064552307,
      "learning_rate": 1.0186277287612923e-06,
      "loss": 0.008,
      "step": 2746080
    },
    {
      "epoch": 4.494052879296689,
      "grad_norm": 0.3532341420650482,
      "learning_rate": 1.0185618365477753e-06,
      "loss": 0.0085,
      "step": 2746100
    },
    {
      "epoch": 4.494085609735341,
      "grad_norm": 0.2816438674926758,
      "learning_rate": 1.018495944334258e-06,
      "loss": 0.0086,
      "step": 2746120
    },
    {
      "epoch": 4.494118340173995,
      "grad_norm": 0.2007157951593399,
      "learning_rate": 1.018430052120741e-06,
      "loss": 0.0061,
      "step": 2746140
    },
    {
      "epoch": 4.494151070612649,
      "grad_norm": 0.5107843279838562,
      "learning_rate": 1.018364159907224e-06,
      "loss": 0.0146,
      "step": 2746160
    },
    {
      "epoch": 4.494183801051301,
      "grad_norm": 0.2810191810131073,
      "learning_rate": 1.0182982676937067e-06,
      "loss": 0.0084,
      "step": 2746180
    },
    {
      "epoch": 4.494216531489955,
      "grad_norm": 0.2381327748298645,
      "learning_rate": 1.0182323754801896e-06,
      "loss": 0.007,
      "step": 2746200
    },
    {
      "epoch": 4.4942492619286085,
      "grad_norm": 0.3124386668205261,
      "learning_rate": 1.0181664832666726e-06,
      "loss": 0.0104,
      "step": 2746220
    },
    {
      "epoch": 4.494281992367262,
      "grad_norm": 0.16229930520057678,
      "learning_rate": 1.0181005910531553e-06,
      "loss": 0.0087,
      "step": 2746240
    },
    {
      "epoch": 4.494314722805915,
      "grad_norm": 0.16988661885261536,
      "learning_rate": 1.0180346988396383e-06,
      "loss": 0.0113,
      "step": 2746260
    },
    {
      "epoch": 4.494347453244568,
      "grad_norm": 0.23134727776050568,
      "learning_rate": 1.017968806626121e-06,
      "loss": 0.0085,
      "step": 2746280
    },
    {
      "epoch": 4.494380183683222,
      "grad_norm": 0.7143962383270264,
      "learning_rate": 1.017902914412604e-06,
      "loss": 0.0102,
      "step": 2746300
    },
    {
      "epoch": 4.494412914121875,
      "grad_norm": 0.07508967816829681,
      "learning_rate": 1.017837022199087e-06,
      "loss": 0.0113,
      "step": 2746320
    },
    {
      "epoch": 4.494445644560528,
      "grad_norm": 0.18988992273807526,
      "learning_rate": 1.0177711299855696e-06,
      "loss": 0.0079,
      "step": 2746340
    },
    {
      "epoch": 4.494478374999182,
      "grad_norm": 0.2268795222043991,
      "learning_rate": 1.0177052377720526e-06,
      "loss": 0.0096,
      "step": 2746360
    },
    {
      "epoch": 4.4945111054378355,
      "grad_norm": 0.302713006734848,
      "learning_rate": 1.0176393455585353e-06,
      "loss": 0.0091,
      "step": 2746380
    },
    {
      "epoch": 4.494543835876488,
      "grad_norm": 0.6170119047164917,
      "learning_rate": 1.0175734533450183e-06,
      "loss": 0.011,
      "step": 2746400
    },
    {
      "epoch": 4.494576566315142,
      "grad_norm": 0.34883445501327515,
      "learning_rate": 1.0175075611315012e-06,
      "loss": 0.0064,
      "step": 2746420
    },
    {
      "epoch": 4.494609296753795,
      "grad_norm": 0.4604252278804779,
      "learning_rate": 1.017441668917984e-06,
      "loss": 0.0077,
      "step": 2746440
    },
    {
      "epoch": 4.494642027192448,
      "grad_norm": 0.16963377594947815,
      "learning_rate": 1.017375776704467e-06,
      "loss": 0.0094,
      "step": 2746460
    },
    {
      "epoch": 4.494674757631102,
      "grad_norm": 0.11743604391813278,
      "learning_rate": 1.0173098844909497e-06,
      "loss": 0.0075,
      "step": 2746480
    },
    {
      "epoch": 4.494707488069755,
      "grad_norm": 0.10024631768465042,
      "learning_rate": 1.0172439922774326e-06,
      "loss": 0.0075,
      "step": 2746500
    },
    {
      "epoch": 4.494740218508408,
      "grad_norm": 0.18222232162952423,
      "learning_rate": 1.0171781000639156e-06,
      "loss": 0.0133,
      "step": 2746520
    },
    {
      "epoch": 4.494772948947062,
      "grad_norm": 0.23939672112464905,
      "learning_rate": 1.0171122078503983e-06,
      "loss": 0.0096,
      "step": 2746540
    },
    {
      "epoch": 4.494805679385715,
      "grad_norm": 0.4330388009548187,
      "learning_rate": 1.0170463156368813e-06,
      "loss": 0.0137,
      "step": 2746560
    },
    {
      "epoch": 4.494838409824369,
      "grad_norm": 0.3671148419380188,
      "learning_rate": 1.0169804234233642e-06,
      "loss": 0.0073,
      "step": 2746580
    },
    {
      "epoch": 4.4948711402630215,
      "grad_norm": 0.07407654076814651,
      "learning_rate": 1.016914531209847e-06,
      "loss": 0.0042,
      "step": 2746600
    },
    {
      "epoch": 4.494903870701675,
      "grad_norm": 0.4119618535041809,
      "learning_rate": 1.01684863899633e-06,
      "loss": 0.0103,
      "step": 2746620
    },
    {
      "epoch": 4.494936601140329,
      "grad_norm": 0.19170992076396942,
      "learning_rate": 1.0167827467828126e-06,
      "loss": 0.0062,
      "step": 2746640
    },
    {
      "epoch": 4.494969331578982,
      "grad_norm": 0.08498701453208923,
      "learning_rate": 1.0167168545692956e-06,
      "loss": 0.0087,
      "step": 2746660
    },
    {
      "epoch": 4.495002062017635,
      "grad_norm": 0.0910864919424057,
      "learning_rate": 1.0166509623557785e-06,
      "loss": 0.005,
      "step": 2746680
    },
    {
      "epoch": 4.495034792456289,
      "grad_norm": 0.3361699879169464,
      "learning_rate": 1.0165850701422613e-06,
      "loss": 0.0099,
      "step": 2746700
    },
    {
      "epoch": 4.495067522894942,
      "grad_norm": 0.4096865952014923,
      "learning_rate": 1.0165191779287442e-06,
      "loss": 0.0111,
      "step": 2746720
    },
    {
      "epoch": 4.495100253333595,
      "grad_norm": 0.21590784192085266,
      "learning_rate": 1.016453285715227e-06,
      "loss": 0.0087,
      "step": 2746740
    },
    {
      "epoch": 4.4951329837722485,
      "grad_norm": 0.10335633158683777,
      "learning_rate": 1.0163873935017101e-06,
      "loss": 0.0116,
      "step": 2746760
    },
    {
      "epoch": 4.495165714210902,
      "grad_norm": 1.3901904821395874,
      "learning_rate": 1.0163215012881929e-06,
      "loss": 0.0122,
      "step": 2746780
    },
    {
      "epoch": 4.495198444649555,
      "grad_norm": 0.1060551106929779,
      "learning_rate": 1.0162556090746756e-06,
      "loss": 0.0121,
      "step": 2746800
    },
    {
      "epoch": 4.495231175088208,
      "grad_norm": 0.13558362424373627,
      "learning_rate": 1.0161897168611586e-06,
      "loss": 0.0066,
      "step": 2746820
    },
    {
      "epoch": 4.495263905526862,
      "grad_norm": 0.36136743426322937,
      "learning_rate": 1.0161238246476415e-06,
      "loss": 0.0068,
      "step": 2746840
    },
    {
      "epoch": 4.4952966359655155,
      "grad_norm": 0.13099980354309082,
      "learning_rate": 1.0160579324341245e-06,
      "loss": 0.0086,
      "step": 2746860
    },
    {
      "epoch": 4.495329366404168,
      "grad_norm": 0.12148035317659378,
      "learning_rate": 1.0159920402206072e-06,
      "loss": 0.0047,
      "step": 2746880
    },
    {
      "epoch": 4.495362096842822,
      "grad_norm": 0.22167539596557617,
      "learning_rate": 1.01592614800709e-06,
      "loss": 0.0066,
      "step": 2746900
    },
    {
      "epoch": 4.495394827281475,
      "grad_norm": 0.17582352459430695,
      "learning_rate": 1.015860255793573e-06,
      "loss": 0.0125,
      "step": 2746920
    },
    {
      "epoch": 4.495427557720129,
      "grad_norm": 0.54816073179245,
      "learning_rate": 1.0157943635800559e-06,
      "loss": 0.0068,
      "step": 2746940
    },
    {
      "epoch": 4.495460288158782,
      "grad_norm": 0.08543837070465088,
      "learning_rate": 1.0157284713665388e-06,
      "loss": 0.0065,
      "step": 2746960
    },
    {
      "epoch": 4.495493018597435,
      "grad_norm": 0.3294435441493988,
      "learning_rate": 1.0156625791530215e-06,
      "loss": 0.0095,
      "step": 2746980
    },
    {
      "epoch": 4.495525749036089,
      "grad_norm": 0.25411880016326904,
      "learning_rate": 1.0155966869395043e-06,
      "loss": 0.0148,
      "step": 2747000
    },
    {
      "epoch": 4.495558479474742,
      "grad_norm": 0.4779517650604248,
      "learning_rate": 1.0155307947259874e-06,
      "loss": 0.0106,
      "step": 2747020
    },
    {
      "epoch": 4.495591209913395,
      "grad_norm": 0.1050528883934021,
      "learning_rate": 1.0154649025124702e-06,
      "loss": 0.0075,
      "step": 2747040
    },
    {
      "epoch": 4.495623940352049,
      "grad_norm": 0.4319743812084198,
      "learning_rate": 1.0153990102989531e-06,
      "loss": 0.0077,
      "step": 2747060
    },
    {
      "epoch": 4.4956566707907015,
      "grad_norm": 0.06602593511343002,
      "learning_rate": 1.0153331180854359e-06,
      "loss": 0.0077,
      "step": 2747080
    },
    {
      "epoch": 4.495689401229355,
      "grad_norm": 0.10864195972681046,
      "learning_rate": 1.0152672258719188e-06,
      "loss": 0.0181,
      "step": 2747100
    },
    {
      "epoch": 4.495722131668009,
      "grad_norm": 0.39412233233451843,
      "learning_rate": 1.0152013336584018e-06,
      "loss": 0.0071,
      "step": 2747120
    },
    {
      "epoch": 4.495754862106662,
      "grad_norm": 0.3579288125038147,
      "learning_rate": 1.0151354414448845e-06,
      "loss": 0.0072,
      "step": 2747140
    },
    {
      "epoch": 4.495787592545315,
      "grad_norm": 0.15547730028629303,
      "learning_rate": 1.0150695492313675e-06,
      "loss": 0.0086,
      "step": 2747160
    },
    {
      "epoch": 4.495820322983969,
      "grad_norm": 0.2615582346916199,
      "learning_rate": 1.0150036570178502e-06,
      "loss": 0.0089,
      "step": 2747180
    },
    {
      "epoch": 4.495853053422622,
      "grad_norm": 0.16088898479938507,
      "learning_rate": 1.0149377648043332e-06,
      "loss": 0.0069,
      "step": 2747200
    },
    {
      "epoch": 4.495885783861275,
      "grad_norm": 0.3796556293964386,
      "learning_rate": 1.0148718725908161e-06,
      "loss": 0.01,
      "step": 2747220
    },
    {
      "epoch": 4.4959185142999285,
      "grad_norm": 0.16277781128883362,
      "learning_rate": 1.0148059803772989e-06,
      "loss": 0.0078,
      "step": 2747240
    },
    {
      "epoch": 4.495951244738582,
      "grad_norm": 0.10712382197380066,
      "learning_rate": 1.0147400881637818e-06,
      "loss": 0.0088,
      "step": 2747260
    },
    {
      "epoch": 4.495983975177236,
      "grad_norm": 0.1868840605020523,
      "learning_rate": 1.0146741959502648e-06,
      "loss": 0.0099,
      "step": 2747280
    },
    {
      "epoch": 4.496016705615888,
      "grad_norm": 0.264369398355484,
      "learning_rate": 1.0146083037367475e-06,
      "loss": 0.0065,
      "step": 2747300
    },
    {
      "epoch": 4.496049436054542,
      "grad_norm": 0.08178197592496872,
      "learning_rate": 1.0145424115232305e-06,
      "loss": 0.0106,
      "step": 2747320
    },
    {
      "epoch": 4.496082166493196,
      "grad_norm": 0.1841471791267395,
      "learning_rate": 1.0144765193097132e-06,
      "loss": 0.0104,
      "step": 2747340
    },
    {
      "epoch": 4.496114896931848,
      "grad_norm": 0.3343685567378998,
      "learning_rate": 1.0144106270961961e-06,
      "loss": 0.0097,
      "step": 2747360
    },
    {
      "epoch": 4.496147627370502,
      "grad_norm": 0.14599859714508057,
      "learning_rate": 1.014344734882679e-06,
      "loss": 0.0047,
      "step": 2747380
    },
    {
      "epoch": 4.4961803578091555,
      "grad_norm": 0.22960905730724335,
      "learning_rate": 1.0142788426691618e-06,
      "loss": 0.0091,
      "step": 2747400
    },
    {
      "epoch": 4.496213088247809,
      "grad_norm": 0.3144291937351227,
      "learning_rate": 1.0142129504556448e-06,
      "loss": 0.0103,
      "step": 2747420
    },
    {
      "epoch": 4.496245818686462,
      "grad_norm": 0.14108897745609283,
      "learning_rate": 1.0141470582421275e-06,
      "loss": 0.0125,
      "step": 2747440
    },
    {
      "epoch": 4.496278549125115,
      "grad_norm": 0.16515691578388214,
      "learning_rate": 1.0140811660286105e-06,
      "loss": 0.0123,
      "step": 2747460
    },
    {
      "epoch": 4.496311279563769,
      "grad_norm": 0.2243368774652481,
      "learning_rate": 1.0140152738150934e-06,
      "loss": 0.0069,
      "step": 2747480
    },
    {
      "epoch": 4.496344010002422,
      "grad_norm": 0.2952187955379486,
      "learning_rate": 1.0139493816015762e-06,
      "loss": 0.0117,
      "step": 2747500
    },
    {
      "epoch": 4.496376740441075,
      "grad_norm": 0.14913630485534668,
      "learning_rate": 1.0138834893880591e-06,
      "loss": 0.0097,
      "step": 2747520
    },
    {
      "epoch": 4.496409470879729,
      "grad_norm": 0.051372040063142776,
      "learning_rate": 1.013817597174542e-06,
      "loss": 0.009,
      "step": 2747540
    },
    {
      "epoch": 4.4964422013183825,
      "grad_norm": 0.15309038758277893,
      "learning_rate": 1.0137517049610248e-06,
      "loss": 0.0087,
      "step": 2747560
    },
    {
      "epoch": 4.496474931757035,
      "grad_norm": 0.2047232836484909,
      "learning_rate": 1.0136858127475078e-06,
      "loss": 0.0076,
      "step": 2747580
    },
    {
      "epoch": 4.496507662195689,
      "grad_norm": 0.126896932721138,
      "learning_rate": 1.0136199205339905e-06,
      "loss": 0.0114,
      "step": 2747600
    },
    {
      "epoch": 4.496540392634342,
      "grad_norm": 0.09613282978534698,
      "learning_rate": 1.0135540283204735e-06,
      "loss": 0.0119,
      "step": 2747620
    },
    {
      "epoch": 4.496573123072995,
      "grad_norm": 0.30675601959228516,
      "learning_rate": 1.0134881361069564e-06,
      "loss": 0.0097,
      "step": 2747640
    },
    {
      "epoch": 4.496605853511649,
      "grad_norm": 0.894512951374054,
      "learning_rate": 1.0134222438934391e-06,
      "loss": 0.0139,
      "step": 2747660
    },
    {
      "epoch": 4.496638583950302,
      "grad_norm": 0.491696834564209,
      "learning_rate": 1.013356351679922e-06,
      "loss": 0.0119,
      "step": 2747680
    },
    {
      "epoch": 4.496671314388956,
      "grad_norm": 0.25691473484039307,
      "learning_rate": 1.0132904594664048e-06,
      "loss": 0.0166,
      "step": 2747700
    },
    {
      "epoch": 4.4967040448276085,
      "grad_norm": 0.1923263967037201,
      "learning_rate": 1.0132245672528878e-06,
      "loss": 0.0065,
      "step": 2747720
    },
    {
      "epoch": 4.496736775266262,
      "grad_norm": 0.13151633739471436,
      "learning_rate": 1.0131586750393707e-06,
      "loss": 0.0114,
      "step": 2747740
    },
    {
      "epoch": 4.496769505704916,
      "grad_norm": 0.4224051535129547,
      "learning_rate": 1.0130927828258535e-06,
      "loss": 0.0099,
      "step": 2747760
    },
    {
      "epoch": 4.496802236143568,
      "grad_norm": 0.21806290745735168,
      "learning_rate": 1.0130268906123364e-06,
      "loss": 0.0066,
      "step": 2747780
    },
    {
      "epoch": 4.496834966582222,
      "grad_norm": 0.13860727846622467,
      "learning_rate": 1.0129609983988192e-06,
      "loss": 0.0082,
      "step": 2747800
    },
    {
      "epoch": 4.496867697020876,
      "grad_norm": 0.32131487131118774,
      "learning_rate": 1.0128951061853021e-06,
      "loss": 0.0086,
      "step": 2747820
    },
    {
      "epoch": 4.496900427459529,
      "grad_norm": 0.28350090980529785,
      "learning_rate": 1.012829213971785e-06,
      "loss": 0.0062,
      "step": 2747840
    },
    {
      "epoch": 4.496933157898182,
      "grad_norm": 0.16723760962486267,
      "learning_rate": 1.0127633217582678e-06,
      "loss": 0.0059,
      "step": 2747860
    },
    {
      "epoch": 4.4969658883368355,
      "grad_norm": 0.13041551411151886,
      "learning_rate": 1.0126974295447508e-06,
      "loss": 0.0069,
      "step": 2747880
    },
    {
      "epoch": 4.496998618775489,
      "grad_norm": 0.4148126542568207,
      "learning_rate": 1.0126315373312337e-06,
      "loss": 0.0064,
      "step": 2747900
    },
    {
      "epoch": 4.497031349214142,
      "grad_norm": 0.3060178756713867,
      "learning_rate": 1.0125656451177165e-06,
      "loss": 0.0116,
      "step": 2747920
    },
    {
      "epoch": 4.497064079652795,
      "grad_norm": 0.07554584741592407,
      "learning_rate": 1.0124997529041994e-06,
      "loss": 0.0065,
      "step": 2747940
    },
    {
      "epoch": 4.497096810091449,
      "grad_norm": 0.22186340391635895,
      "learning_rate": 1.0124338606906821e-06,
      "loss": 0.0124,
      "step": 2747960
    },
    {
      "epoch": 4.497129540530102,
      "grad_norm": 0.24558769166469574,
      "learning_rate": 1.012367968477165e-06,
      "loss": 0.0088,
      "step": 2747980
    },
    {
      "epoch": 4.497162270968755,
      "grad_norm": 0.08563616126775742,
      "learning_rate": 1.012302076263648e-06,
      "loss": 0.008,
      "step": 2748000
    },
    {
      "epoch": 4.497195001407409,
      "grad_norm": 0.7502302527427673,
      "learning_rate": 1.0122361840501308e-06,
      "loss": 0.0102,
      "step": 2748020
    },
    {
      "epoch": 4.4972277318460625,
      "grad_norm": 0.1442059725522995,
      "learning_rate": 1.0121702918366137e-06,
      "loss": 0.0095,
      "step": 2748040
    },
    {
      "epoch": 4.497260462284715,
      "grad_norm": 0.13541492819786072,
      "learning_rate": 1.0121043996230965e-06,
      "loss": 0.0113,
      "step": 2748060
    },
    {
      "epoch": 4.497293192723369,
      "grad_norm": 0.1886928826570511,
      "learning_rate": 1.0120385074095796e-06,
      "loss": 0.008,
      "step": 2748080
    },
    {
      "epoch": 4.497325923162022,
      "grad_norm": 0.377023845911026,
      "learning_rate": 1.0119726151960624e-06,
      "loss": 0.0077,
      "step": 2748100
    },
    {
      "epoch": 4.497358653600676,
      "grad_norm": 0.3288734257221222,
      "learning_rate": 1.0119067229825451e-06,
      "loss": 0.012,
      "step": 2748120
    },
    {
      "epoch": 4.497391384039329,
      "grad_norm": 0.25533968210220337,
      "learning_rate": 1.011840830769028e-06,
      "loss": 0.0108,
      "step": 2748140
    },
    {
      "epoch": 4.497424114477982,
      "grad_norm": 0.5640857815742493,
      "learning_rate": 1.011774938555511e-06,
      "loss": 0.0126,
      "step": 2748160
    },
    {
      "epoch": 4.497456844916636,
      "grad_norm": 0.18669861555099487,
      "learning_rate": 1.011709046341994e-06,
      "loss": 0.0086,
      "step": 2748180
    },
    {
      "epoch": 4.497489575355289,
      "grad_norm": 0.28113120794296265,
      "learning_rate": 1.0116431541284767e-06,
      "loss": 0.01,
      "step": 2748200
    },
    {
      "epoch": 4.497522305793942,
      "grad_norm": 0.09849728643894196,
      "learning_rate": 1.0115772619149595e-06,
      "loss": 0.0094,
      "step": 2748220
    },
    {
      "epoch": 4.497555036232596,
      "grad_norm": 0.14230674505233765,
      "learning_rate": 1.0115113697014424e-06,
      "loss": 0.0125,
      "step": 2748240
    },
    {
      "epoch": 4.4975877666712485,
      "grad_norm": 0.8187994956970215,
      "learning_rate": 1.0114454774879254e-06,
      "loss": 0.0107,
      "step": 2748260
    },
    {
      "epoch": 4.497620497109902,
      "grad_norm": 0.7103429436683655,
      "learning_rate": 1.0113795852744083e-06,
      "loss": 0.0088,
      "step": 2748280
    },
    {
      "epoch": 4.497653227548556,
      "grad_norm": 0.24231575429439545,
      "learning_rate": 1.011313693060891e-06,
      "loss": 0.0065,
      "step": 2748300
    },
    {
      "epoch": 4.497685957987209,
      "grad_norm": 0.09060464054346085,
      "learning_rate": 1.0112478008473738e-06,
      "loss": 0.0071,
      "step": 2748320
    },
    {
      "epoch": 4.497718688425862,
      "grad_norm": 0.16870318353176117,
      "learning_rate": 1.011181908633857e-06,
      "loss": 0.0053,
      "step": 2748340
    },
    {
      "epoch": 4.497751418864516,
      "grad_norm": 0.07986805588006973,
      "learning_rate": 1.0111160164203397e-06,
      "loss": 0.0075,
      "step": 2748360
    },
    {
      "epoch": 4.497784149303169,
      "grad_norm": 0.9124820828437805,
      "learning_rate": 1.0110501242068226e-06,
      "loss": 0.0126,
      "step": 2748380
    },
    {
      "epoch": 4.497816879741823,
      "grad_norm": 0.14877194166183472,
      "learning_rate": 1.0109842319933054e-06,
      "loss": 0.0063,
      "step": 2748400
    },
    {
      "epoch": 4.4978496101804755,
      "grad_norm": 0.25552254915237427,
      "learning_rate": 1.0109183397797883e-06,
      "loss": 0.0132,
      "step": 2748420
    },
    {
      "epoch": 4.497882340619129,
      "grad_norm": 0.20855168998241425,
      "learning_rate": 1.0108524475662713e-06,
      "loss": 0.0073,
      "step": 2748440
    },
    {
      "epoch": 4.497915071057783,
      "grad_norm": 0.49557486176490784,
      "learning_rate": 1.010786555352754e-06,
      "loss": 0.006,
      "step": 2748460
    },
    {
      "epoch": 4.497947801496435,
      "grad_norm": 0.3059966266155243,
      "learning_rate": 1.010720663139237e-06,
      "loss": 0.008,
      "step": 2748480
    },
    {
      "epoch": 4.497980531935089,
      "grad_norm": 0.0691787376999855,
      "learning_rate": 1.0106547709257197e-06,
      "loss": 0.0175,
      "step": 2748500
    },
    {
      "epoch": 4.4980132623737425,
      "grad_norm": 0.5050315856933594,
      "learning_rate": 1.0105888787122027e-06,
      "loss": 0.0109,
      "step": 2748520
    },
    {
      "epoch": 4.498045992812395,
      "grad_norm": 0.15598756074905396,
      "learning_rate": 1.0105229864986856e-06,
      "loss": 0.008,
      "step": 2748540
    },
    {
      "epoch": 4.498078723251049,
      "grad_norm": 0.2802722752094269,
      "learning_rate": 1.0104570942851684e-06,
      "loss": 0.0117,
      "step": 2748560
    },
    {
      "epoch": 4.498111453689702,
      "grad_norm": 0.22557511925697327,
      "learning_rate": 1.0103912020716513e-06,
      "loss": 0.0079,
      "step": 2748580
    },
    {
      "epoch": 4.498144184128356,
      "grad_norm": 0.953313410282135,
      "learning_rate": 1.0103253098581343e-06,
      "loss": 0.0121,
      "step": 2748600
    },
    {
      "epoch": 4.498176914567009,
      "grad_norm": 0.15898047387599945,
      "learning_rate": 1.010259417644617e-06,
      "loss": 0.0114,
      "step": 2748620
    },
    {
      "epoch": 4.498209645005662,
      "grad_norm": 0.17804564535617828,
      "learning_rate": 1.0101935254311e-06,
      "loss": 0.0082,
      "step": 2748640
    },
    {
      "epoch": 4.498242375444316,
      "grad_norm": 0.1867142915725708,
      "learning_rate": 1.0101276332175827e-06,
      "loss": 0.0059,
      "step": 2748660
    },
    {
      "epoch": 4.498275105882969,
      "grad_norm": 0.22606222331523895,
      "learning_rate": 1.0100617410040656e-06,
      "loss": 0.012,
      "step": 2748680
    },
    {
      "epoch": 4.498307836321622,
      "grad_norm": 0.2794802486896515,
      "learning_rate": 1.0099958487905486e-06,
      "loss": 0.0106,
      "step": 2748700
    },
    {
      "epoch": 4.498340566760276,
      "grad_norm": 0.30773472785949707,
      "learning_rate": 1.0099299565770313e-06,
      "loss": 0.0126,
      "step": 2748720
    },
    {
      "epoch": 4.498373297198929,
      "grad_norm": 0.18906766176223755,
      "learning_rate": 1.0098640643635143e-06,
      "loss": 0.0101,
      "step": 2748740
    },
    {
      "epoch": 4.498406027637582,
      "grad_norm": 0.30800631642341614,
      "learning_rate": 1.009798172149997e-06,
      "loss": 0.0066,
      "step": 2748760
    },
    {
      "epoch": 4.498438758076236,
      "grad_norm": 0.1904924511909485,
      "learning_rate": 1.00973227993648e-06,
      "loss": 0.0087,
      "step": 2748780
    },
    {
      "epoch": 4.498471488514889,
      "grad_norm": 0.11475500464439392,
      "learning_rate": 1.009666387722963e-06,
      "loss": 0.0114,
      "step": 2748800
    },
    {
      "epoch": 4.498504218953542,
      "grad_norm": 0.11531804502010345,
      "learning_rate": 1.0096004955094457e-06,
      "loss": 0.0095,
      "step": 2748820
    },
    {
      "epoch": 4.498536949392196,
      "grad_norm": 0.17790761590003967,
      "learning_rate": 1.0095346032959286e-06,
      "loss": 0.0059,
      "step": 2748840
    },
    {
      "epoch": 4.498569679830849,
      "grad_norm": 0.11868337541818619,
      "learning_rate": 1.0094687110824116e-06,
      "loss": 0.01,
      "step": 2748860
    },
    {
      "epoch": 4.498602410269503,
      "grad_norm": 0.13734520971775055,
      "learning_rate": 1.0094028188688943e-06,
      "loss": 0.0053,
      "step": 2748880
    },
    {
      "epoch": 4.4986351407081555,
      "grad_norm": 0.09289567917585373,
      "learning_rate": 1.0093369266553773e-06,
      "loss": 0.0128,
      "step": 2748900
    },
    {
      "epoch": 4.498667871146809,
      "grad_norm": 0.1632017195224762,
      "learning_rate": 1.00927103444186e-06,
      "loss": 0.0107,
      "step": 2748920
    },
    {
      "epoch": 4.498700601585463,
      "grad_norm": 0.0998767539858818,
      "learning_rate": 1.009205142228343e-06,
      "loss": 0.0098,
      "step": 2748940
    },
    {
      "epoch": 4.498733332024115,
      "grad_norm": 0.2221866101026535,
      "learning_rate": 1.009139250014826e-06,
      "loss": 0.0092,
      "step": 2748960
    },
    {
      "epoch": 4.498766062462769,
      "grad_norm": 0.752214789390564,
      "learning_rate": 1.0090733578013086e-06,
      "loss": 0.0141,
      "step": 2748980
    },
    {
      "epoch": 4.498798792901423,
      "grad_norm": 0.31767538189888,
      "learning_rate": 1.0090074655877916e-06,
      "loss": 0.0113,
      "step": 2749000
    },
    {
      "epoch": 4.498831523340076,
      "grad_norm": 0.27037423849105835,
      "learning_rate": 1.0089415733742743e-06,
      "loss": 0.0097,
      "step": 2749020
    },
    {
      "epoch": 4.498864253778729,
      "grad_norm": 0.492012083530426,
      "learning_rate": 1.0088756811607573e-06,
      "loss": 0.0109,
      "step": 2749040
    },
    {
      "epoch": 4.4988969842173825,
      "grad_norm": 0.20803703367710114,
      "learning_rate": 1.0088097889472402e-06,
      "loss": 0.008,
      "step": 2749060
    },
    {
      "epoch": 4.498929714656036,
      "grad_norm": 0.3992602229118347,
      "learning_rate": 1.008743896733723e-06,
      "loss": 0.0093,
      "step": 2749080
    },
    {
      "epoch": 4.498962445094689,
      "grad_norm": 0.2182895988225937,
      "learning_rate": 1.008678004520206e-06,
      "loss": 0.0111,
      "step": 2749100
    },
    {
      "epoch": 4.498995175533342,
      "grad_norm": 0.3060224950313568,
      "learning_rate": 1.0086121123066887e-06,
      "loss": 0.0091,
      "step": 2749120
    },
    {
      "epoch": 4.499027905971996,
      "grad_norm": 0.3845639228820801,
      "learning_rate": 1.0085462200931716e-06,
      "loss": 0.0085,
      "step": 2749140
    },
    {
      "epoch": 4.49906063641065,
      "grad_norm": 0.18790748715400696,
      "learning_rate": 1.0084803278796546e-06,
      "loss": 0.0134,
      "step": 2749160
    },
    {
      "epoch": 4.499093366849302,
      "grad_norm": 0.3761288821697235,
      "learning_rate": 1.0084144356661373e-06,
      "loss": 0.008,
      "step": 2749180
    },
    {
      "epoch": 4.499126097287956,
      "grad_norm": 0.14022479951381683,
      "learning_rate": 1.0083485434526203e-06,
      "loss": 0.0076,
      "step": 2749200
    },
    {
      "epoch": 4.4991588277266095,
      "grad_norm": 0.06805311888456345,
      "learning_rate": 1.0082826512391032e-06,
      "loss": 0.0055,
      "step": 2749220
    },
    {
      "epoch": 4.499191558165262,
      "grad_norm": 0.1479489654302597,
      "learning_rate": 1.008216759025586e-06,
      "loss": 0.0078,
      "step": 2749240
    },
    {
      "epoch": 4.499224288603916,
      "grad_norm": 0.07852624356746674,
      "learning_rate": 1.008150866812069e-06,
      "loss": 0.0061,
      "step": 2749260
    },
    {
      "epoch": 4.499257019042569,
      "grad_norm": 0.2954331636428833,
      "learning_rate": 1.0080849745985517e-06,
      "loss": 0.0122,
      "step": 2749280
    },
    {
      "epoch": 4.499289749481223,
      "grad_norm": 0.14853809773921967,
      "learning_rate": 1.0080190823850346e-06,
      "loss": 0.0063,
      "step": 2749300
    },
    {
      "epoch": 4.499322479919876,
      "grad_norm": 0.3656325042247772,
      "learning_rate": 1.0079531901715176e-06,
      "loss": 0.0094,
      "step": 2749320
    },
    {
      "epoch": 4.499355210358529,
      "grad_norm": 0.33416661620140076,
      "learning_rate": 1.0078872979580003e-06,
      "loss": 0.0078,
      "step": 2749340
    },
    {
      "epoch": 4.499387940797183,
      "grad_norm": 0.3909456729888916,
      "learning_rate": 1.0078214057444832e-06,
      "loss": 0.0095,
      "step": 2749360
    },
    {
      "epoch": 4.499420671235836,
      "grad_norm": 0.5314805507659912,
      "learning_rate": 1.007755513530966e-06,
      "loss": 0.009,
      "step": 2749380
    },
    {
      "epoch": 4.499453401674489,
      "grad_norm": 0.08462448418140411,
      "learning_rate": 1.0076896213174491e-06,
      "loss": 0.0086,
      "step": 2749400
    },
    {
      "epoch": 4.499486132113143,
      "grad_norm": 0.149398073554039,
      "learning_rate": 1.0076237291039319e-06,
      "loss": 0.0083,
      "step": 2749420
    },
    {
      "epoch": 4.4995188625517955,
      "grad_norm": 0.17295652627944946,
      "learning_rate": 1.0075578368904146e-06,
      "loss": 0.0086,
      "step": 2749440
    },
    {
      "epoch": 4.499551592990449,
      "grad_norm": 0.17301291227340698,
      "learning_rate": 1.0074919446768976e-06,
      "loss": 0.0101,
      "step": 2749460
    },
    {
      "epoch": 4.499584323429103,
      "grad_norm": 0.26498711109161377,
      "learning_rate": 1.0074260524633805e-06,
      "loss": 0.0079,
      "step": 2749480
    },
    {
      "epoch": 4.499617053867756,
      "grad_norm": 0.2038106918334961,
      "learning_rate": 1.0073601602498635e-06,
      "loss": 0.0081,
      "step": 2749500
    },
    {
      "epoch": 4.499649784306409,
      "grad_norm": 0.2430940866470337,
      "learning_rate": 1.0072942680363462e-06,
      "loss": 0.0121,
      "step": 2749520
    },
    {
      "epoch": 4.4996825147450625,
      "grad_norm": 0.20255550742149353,
      "learning_rate": 1.007228375822829e-06,
      "loss": 0.0071,
      "step": 2749540
    },
    {
      "epoch": 4.499715245183716,
      "grad_norm": 0.19130869209766388,
      "learning_rate": 1.007162483609312e-06,
      "loss": 0.0083,
      "step": 2749560
    },
    {
      "epoch": 4.49974797562237,
      "grad_norm": 0.2828381359577179,
      "learning_rate": 1.0070965913957949e-06,
      "loss": 0.0084,
      "step": 2749580
    },
    {
      "epoch": 4.499780706061022,
      "grad_norm": 0.13220441341400146,
      "learning_rate": 1.0070306991822778e-06,
      "loss": 0.0067,
      "step": 2749600
    },
    {
      "epoch": 4.499813436499676,
      "grad_norm": 0.11498601734638214,
      "learning_rate": 1.0069648069687606e-06,
      "loss": 0.0093,
      "step": 2749620
    },
    {
      "epoch": 4.49984616693833,
      "grad_norm": 0.7657076120376587,
      "learning_rate": 1.0068989147552433e-06,
      "loss": 0.0086,
      "step": 2749640
    },
    {
      "epoch": 4.499878897376982,
      "grad_norm": 0.13274158537387848,
      "learning_rate": 1.0068330225417265e-06,
      "loss": 0.0092,
      "step": 2749660
    },
    {
      "epoch": 4.499911627815636,
      "grad_norm": 0.13219475746154785,
      "learning_rate": 1.0067671303282092e-06,
      "loss": 0.0082,
      "step": 2749680
    },
    {
      "epoch": 4.4999443582542895,
      "grad_norm": 0.11601941287517548,
      "learning_rate": 1.0067012381146922e-06,
      "loss": 0.0062,
      "step": 2749700
    },
    {
      "epoch": 4.499977088692942,
      "grad_norm": 0.5861018896102905,
      "learning_rate": 1.0066353459011749e-06,
      "loss": 0.0084,
      "step": 2749720
    },
    {
      "epoch": 4.500009819131596,
      "grad_norm": 0.16564351320266724,
      "learning_rate": 1.0065694536876578e-06,
      "loss": 0.0061,
      "step": 2749740
    },
    {
      "epoch": 4.500042549570249,
      "grad_norm": 0.574104368686676,
      "learning_rate": 1.0065035614741408e-06,
      "loss": 0.0072,
      "step": 2749760
    },
    {
      "epoch": 4.500075280008903,
      "grad_norm": 0.39904657006263733,
      "learning_rate": 1.0064376692606235e-06,
      "loss": 0.0097,
      "step": 2749780
    },
    {
      "epoch": 4.500108010447556,
      "grad_norm": 0.08309947699308395,
      "learning_rate": 1.0063717770471065e-06,
      "loss": 0.0089,
      "step": 2749800
    },
    {
      "epoch": 4.500140740886209,
      "grad_norm": 0.27603307366371155,
      "learning_rate": 1.0063058848335892e-06,
      "loss": 0.0103,
      "step": 2749820
    },
    {
      "epoch": 4.500173471324863,
      "grad_norm": 0.41664817929267883,
      "learning_rate": 1.0062399926200722e-06,
      "loss": 0.012,
      "step": 2749840
    },
    {
      "epoch": 4.5002062017635165,
      "grad_norm": 0.6144501566886902,
      "learning_rate": 1.0061741004065551e-06,
      "loss": 0.011,
      "step": 2749860
    },
    {
      "epoch": 4.500238932202169,
      "grad_norm": 0.1852063536643982,
      "learning_rate": 1.0061082081930379e-06,
      "loss": 0.0094,
      "step": 2749880
    },
    {
      "epoch": 4.500271662640823,
      "grad_norm": 0.2344789355993271,
      "learning_rate": 1.0060423159795208e-06,
      "loss": 0.0079,
      "step": 2749900
    },
    {
      "epoch": 4.500304393079476,
      "grad_norm": 0.11598967760801315,
      "learning_rate": 1.0059764237660038e-06,
      "loss": 0.0081,
      "step": 2749920
    },
    {
      "epoch": 4.500337123518129,
      "grad_norm": 0.0623653270304203,
      "learning_rate": 1.0059105315524865e-06,
      "loss": 0.0102,
      "step": 2749940
    },
    {
      "epoch": 4.500369853956783,
      "grad_norm": 0.3291660249233246,
      "learning_rate": 1.0058446393389695e-06,
      "loss": 0.0108,
      "step": 2749960
    },
    {
      "epoch": 4.500402584395436,
      "grad_norm": 0.17097783088684082,
      "learning_rate": 1.0057787471254522e-06,
      "loss": 0.0057,
      "step": 2749980
    },
    {
      "epoch": 4.500435314834089,
      "grad_norm": 0.17752501368522644,
      "learning_rate": 1.0057128549119352e-06,
      "loss": 0.0069,
      "step": 2750000
    },
    {
      "epoch": 4.500435314834089,
      "eval_loss": 0.0058248648419976234,
      "eval_runtime": 6510.3554,
      "eval_samples_per_second": 157.88,
      "eval_steps_per_second": 15.788,
      "eval_sts-dev_pearson_cosine": 0.9867944490224981,
      "eval_sts-dev_spearman_cosine": 0.8966847217761135,
      "step": 2750000
    },
    {
      "epoch": 4.500468045272743,
      "grad_norm": 0.23960979282855988,
      "learning_rate": 1.005646962698418e-06,
      "loss": 0.0111,
      "step": 2750020
    },
    {
      "epoch": 4.500500775711396,
      "grad_norm": 0.252033531665802,
      "learning_rate": 1.0055810704849008e-06,
      "loss": 0.0101,
      "step": 2750040
    },
    {
      "epoch": 4.50053350615005,
      "grad_norm": 0.27103757858276367,
      "learning_rate": 1.0055151782713838e-06,
      "loss": 0.0111,
      "step": 2750060
    },
    {
      "epoch": 4.5005662365887025,
      "grad_norm": 0.21103358268737793,
      "learning_rate": 1.0054492860578665e-06,
      "loss": 0.0076,
      "step": 2750080
    },
    {
      "epoch": 4.500598967027356,
      "grad_norm": 0.15871384739875793,
      "learning_rate": 1.0053833938443495e-06,
      "loss": 0.0064,
      "step": 2750100
    },
    {
      "epoch": 4.50063169746601,
      "grad_norm": 0.10778642445802689,
      "learning_rate": 1.0053175016308324e-06,
      "loss": 0.006,
      "step": 2750120
    },
    {
      "epoch": 4.500664427904663,
      "grad_norm": 0.08078746497631073,
      "learning_rate": 1.0052516094173152e-06,
      "loss": 0.0074,
      "step": 2750140
    },
    {
      "epoch": 4.500697158343316,
      "grad_norm": 0.1225539967417717,
      "learning_rate": 1.0051857172037981e-06,
      "loss": 0.0092,
      "step": 2750160
    },
    {
      "epoch": 4.50072988878197,
      "grad_norm": 0.16244390606880188,
      "learning_rate": 1.005119824990281e-06,
      "loss": 0.0063,
      "step": 2750180
    },
    {
      "epoch": 4.500762619220623,
      "grad_norm": 0.4464563727378845,
      "learning_rate": 1.0050539327767638e-06,
      "loss": 0.0086,
      "step": 2750200
    },
    {
      "epoch": 4.500795349659276,
      "grad_norm": 0.45701998472213745,
      "learning_rate": 1.0049880405632468e-06,
      "loss": 0.0088,
      "step": 2750220
    },
    {
      "epoch": 4.5008280800979295,
      "grad_norm": 0.20941786468029022,
      "learning_rate": 1.0049221483497295e-06,
      "loss": 0.0097,
      "step": 2750240
    },
    {
      "epoch": 4.500860810536583,
      "grad_norm": 0.4877129793167114,
      "learning_rate": 1.0048562561362125e-06,
      "loss": 0.0061,
      "step": 2750260
    },
    {
      "epoch": 4.500893540975236,
      "grad_norm": 0.2609328627586365,
      "learning_rate": 1.0047903639226954e-06,
      "loss": 0.0113,
      "step": 2750280
    },
    {
      "epoch": 4.500926271413889,
      "grad_norm": 0.3453957140445709,
      "learning_rate": 1.0047244717091782e-06,
      "loss": 0.0096,
      "step": 2750300
    },
    {
      "epoch": 4.500959001852543,
      "grad_norm": 0.19031639397144318,
      "learning_rate": 1.0046585794956611e-06,
      "loss": 0.0067,
      "step": 2750320
    },
    {
      "epoch": 4.5009917322911965,
      "grad_norm": 0.4291768968105316,
      "learning_rate": 1.0045926872821438e-06,
      "loss": 0.0088,
      "step": 2750340
    },
    {
      "epoch": 4.501024462729849,
      "grad_norm": 0.20140962302684784,
      "learning_rate": 1.0045267950686268e-06,
      "loss": 0.0112,
      "step": 2750360
    },
    {
      "epoch": 4.501057193168503,
      "grad_norm": 0.07298044115304947,
      "learning_rate": 1.0044609028551097e-06,
      "loss": 0.0066,
      "step": 2750380
    },
    {
      "epoch": 4.501089923607156,
      "grad_norm": 0.052661120891571045,
      "learning_rate": 1.0043950106415925e-06,
      "loss": 0.0074,
      "step": 2750400
    },
    {
      "epoch": 4.501122654045809,
      "grad_norm": 0.163263738155365,
      "learning_rate": 1.0043291184280754e-06,
      "loss": 0.0137,
      "step": 2750420
    },
    {
      "epoch": 4.501155384484463,
      "grad_norm": 0.43644315004348755,
      "learning_rate": 1.0042632262145582e-06,
      "loss": 0.0065,
      "step": 2750440
    },
    {
      "epoch": 4.501188114923116,
      "grad_norm": 0.044746533036231995,
      "learning_rate": 1.0041973340010411e-06,
      "loss": 0.0137,
      "step": 2750460
    },
    {
      "epoch": 4.50122084536177,
      "grad_norm": 0.23627547919750214,
      "learning_rate": 1.004131441787524e-06,
      "loss": 0.0137,
      "step": 2750480
    },
    {
      "epoch": 4.501253575800423,
      "grad_norm": 0.11409400403499603,
      "learning_rate": 1.0040655495740068e-06,
      "loss": 0.019,
      "step": 2750500
    },
    {
      "epoch": 4.501286306239076,
      "grad_norm": 0.4903882145881653,
      "learning_rate": 1.0039996573604898e-06,
      "loss": 0.0083,
      "step": 2750520
    },
    {
      "epoch": 4.50131903667773,
      "grad_norm": 0.12675803899765015,
      "learning_rate": 1.0039337651469727e-06,
      "loss": 0.0089,
      "step": 2750540
    },
    {
      "epoch": 4.5013517671163825,
      "grad_norm": 0.2053440362215042,
      "learning_rate": 1.0038678729334555e-06,
      "loss": 0.0111,
      "step": 2750560
    },
    {
      "epoch": 4.501384497555036,
      "grad_norm": 0.37227126955986023,
      "learning_rate": 1.0038019807199384e-06,
      "loss": 0.0086,
      "step": 2750580
    },
    {
      "epoch": 4.50141722799369,
      "grad_norm": 0.16956844925880432,
      "learning_rate": 1.0037360885064212e-06,
      "loss": 0.0066,
      "step": 2750600
    },
    {
      "epoch": 4.501449958432342,
      "grad_norm": 0.05053415521979332,
      "learning_rate": 1.0036701962929041e-06,
      "loss": 0.0121,
      "step": 2750620
    },
    {
      "epoch": 4.501482688870996,
      "grad_norm": 0.13315404951572418,
      "learning_rate": 1.003604304079387e-06,
      "loss": 0.0086,
      "step": 2750640
    },
    {
      "epoch": 4.50151541930965,
      "grad_norm": 0.27872222661972046,
      "learning_rate": 1.0035384118658698e-06,
      "loss": 0.0096,
      "step": 2750660
    },
    {
      "epoch": 4.501548149748303,
      "grad_norm": 0.05554213747382164,
      "learning_rate": 1.0034725196523528e-06,
      "loss": 0.0057,
      "step": 2750680
    },
    {
      "epoch": 4.501580880186956,
      "grad_norm": 0.4189029932022095,
      "learning_rate": 1.0034066274388355e-06,
      "loss": 0.0083,
      "step": 2750700
    },
    {
      "epoch": 4.5016136106256095,
      "grad_norm": 0.234755739569664,
      "learning_rate": 1.0033407352253187e-06,
      "loss": 0.011,
      "step": 2750720
    },
    {
      "epoch": 4.501646341064263,
      "grad_norm": 0.20681169629096985,
      "learning_rate": 1.0032748430118014e-06,
      "loss": 0.0111,
      "step": 2750740
    },
    {
      "epoch": 4.501679071502917,
      "grad_norm": 0.1433100700378418,
      "learning_rate": 1.0032089507982841e-06,
      "loss": 0.0136,
      "step": 2750760
    },
    {
      "epoch": 4.501711801941569,
      "grad_norm": 0.2744928002357483,
      "learning_rate": 1.003143058584767e-06,
      "loss": 0.0064,
      "step": 2750780
    },
    {
      "epoch": 4.501744532380223,
      "grad_norm": 0.03892466425895691,
      "learning_rate": 1.00307716637125e-06,
      "loss": 0.0069,
      "step": 2750800
    },
    {
      "epoch": 4.501777262818877,
      "grad_norm": 0.2611144185066223,
      "learning_rate": 1.003011274157733e-06,
      "loss": 0.0118,
      "step": 2750820
    },
    {
      "epoch": 4.501809993257529,
      "grad_norm": 0.05150464549660683,
      "learning_rate": 1.0029453819442157e-06,
      "loss": 0.0068,
      "step": 2750840
    },
    {
      "epoch": 4.501842723696183,
      "grad_norm": 0.08463296294212341,
      "learning_rate": 1.0028794897306985e-06,
      "loss": 0.0089,
      "step": 2750860
    },
    {
      "epoch": 4.5018754541348365,
      "grad_norm": 0.21731218695640564,
      "learning_rate": 1.0028135975171814e-06,
      "loss": 0.0123,
      "step": 2750880
    },
    {
      "epoch": 4.501908184573489,
      "grad_norm": 0.27857765555381775,
      "learning_rate": 1.0027477053036644e-06,
      "loss": 0.0086,
      "step": 2750900
    },
    {
      "epoch": 4.501940915012143,
      "grad_norm": 0.6038357019424438,
      "learning_rate": 1.0026818130901473e-06,
      "loss": 0.0109,
      "step": 2750920
    },
    {
      "epoch": 4.501973645450796,
      "grad_norm": 0.4445798695087433,
      "learning_rate": 1.00261592087663e-06,
      "loss": 0.006,
      "step": 2750940
    },
    {
      "epoch": 4.50200637588945,
      "grad_norm": 0.21584627032279968,
      "learning_rate": 1.0025500286631128e-06,
      "loss": 0.0105,
      "step": 2750960
    },
    {
      "epoch": 4.502039106328103,
      "grad_norm": 0.2726495563983917,
      "learning_rate": 1.002484136449596e-06,
      "loss": 0.0076,
      "step": 2750980
    },
    {
      "epoch": 4.502071836766756,
      "grad_norm": 0.163423553109169,
      "learning_rate": 1.0024182442360787e-06,
      "loss": 0.0048,
      "step": 2751000
    },
    {
      "epoch": 4.50210456720541,
      "grad_norm": 0.2739301323890686,
      "learning_rate": 1.0023523520225617e-06,
      "loss": 0.0104,
      "step": 2751020
    },
    {
      "epoch": 4.5021372976440635,
      "grad_norm": 0.3769128918647766,
      "learning_rate": 1.0022864598090444e-06,
      "loss": 0.0059,
      "step": 2751040
    },
    {
      "epoch": 4.502170028082716,
      "grad_norm": 0.21248914301395416,
      "learning_rate": 1.0022205675955273e-06,
      "loss": 0.0106,
      "step": 2751060
    },
    {
      "epoch": 4.50220275852137,
      "grad_norm": 0.3625505864620209,
      "learning_rate": 1.0021546753820103e-06,
      "loss": 0.008,
      "step": 2751080
    },
    {
      "epoch": 4.502235488960023,
      "grad_norm": 0.3209110498428345,
      "learning_rate": 1.002088783168493e-06,
      "loss": 0.0064,
      "step": 2751100
    },
    {
      "epoch": 4.502268219398676,
      "grad_norm": 0.4038916230201721,
      "learning_rate": 1.002022890954976e-06,
      "loss": 0.0117,
      "step": 2751120
    },
    {
      "epoch": 4.50230094983733,
      "grad_norm": 0.2321247160434723,
      "learning_rate": 1.0019569987414587e-06,
      "loss": 0.0097,
      "step": 2751140
    },
    {
      "epoch": 4.502333680275983,
      "grad_norm": 0.31166139245033264,
      "learning_rate": 1.0018911065279417e-06,
      "loss": 0.0089,
      "step": 2751160
    },
    {
      "epoch": 4.502366410714636,
      "grad_norm": 0.0618007518351078,
      "learning_rate": 1.0018252143144246e-06,
      "loss": 0.008,
      "step": 2751180
    },
    {
      "epoch": 4.5023991411532895,
      "grad_norm": 0.20971329510211945,
      "learning_rate": 1.0017593221009074e-06,
      "loss": 0.0084,
      "step": 2751200
    },
    {
      "epoch": 4.502431871591943,
      "grad_norm": 0.35693058371543884,
      "learning_rate": 1.0016934298873903e-06,
      "loss": 0.009,
      "step": 2751220
    },
    {
      "epoch": 4.502464602030597,
      "grad_norm": 0.2441423237323761,
      "learning_rate": 1.0016275376738733e-06,
      "loss": 0.0086,
      "step": 2751240
    },
    {
      "epoch": 4.502497332469249,
      "grad_norm": 0.34503382444381714,
      "learning_rate": 1.001561645460356e-06,
      "loss": 0.0109,
      "step": 2751260
    },
    {
      "epoch": 4.502530062907903,
      "grad_norm": 0.38257071375846863,
      "learning_rate": 1.001495753246839e-06,
      "loss": 0.0129,
      "step": 2751280
    },
    {
      "epoch": 4.502562793346557,
      "grad_norm": 0.11083158105611801,
      "learning_rate": 1.0014298610333217e-06,
      "loss": 0.015,
      "step": 2751300
    },
    {
      "epoch": 4.50259552378521,
      "grad_norm": 0.10167685151100159,
      "learning_rate": 1.0013639688198047e-06,
      "loss": 0.0081,
      "step": 2751320
    },
    {
      "epoch": 4.502628254223863,
      "grad_norm": 0.41104304790496826,
      "learning_rate": 1.0012980766062876e-06,
      "loss": 0.01,
      "step": 2751340
    },
    {
      "epoch": 4.5026609846625165,
      "grad_norm": 0.565333366394043,
      "learning_rate": 1.0012321843927703e-06,
      "loss": 0.0072,
      "step": 2751360
    },
    {
      "epoch": 4.50269371510117,
      "grad_norm": 0.6768184900283813,
      "learning_rate": 1.0011662921792533e-06,
      "loss": 0.0093,
      "step": 2751380
    },
    {
      "epoch": 4.502726445539823,
      "grad_norm": 0.31389909982681274,
      "learning_rate": 1.001100399965736e-06,
      "loss": 0.0084,
      "step": 2751400
    },
    {
      "epoch": 4.502759175978476,
      "grad_norm": 0.1451379507780075,
      "learning_rate": 1.001034507752219e-06,
      "loss": 0.008,
      "step": 2751420
    },
    {
      "epoch": 4.50279190641713,
      "grad_norm": 0.14915791153907776,
      "learning_rate": 1.000968615538702e-06,
      "loss": 0.0083,
      "step": 2751440
    },
    {
      "epoch": 4.502824636855783,
      "grad_norm": 0.1586666852235794,
      "learning_rate": 1.0009027233251847e-06,
      "loss": 0.0063,
      "step": 2751460
    },
    {
      "epoch": 4.502857367294436,
      "grad_norm": 0.2938358187675476,
      "learning_rate": 1.0008368311116676e-06,
      "loss": 0.0109,
      "step": 2751480
    },
    {
      "epoch": 4.50289009773309,
      "grad_norm": 0.15440508723258972,
      "learning_rate": 1.0007709388981506e-06,
      "loss": 0.0114,
      "step": 2751500
    },
    {
      "epoch": 4.5029228281717435,
      "grad_norm": 0.564956784248352,
      "learning_rate": 1.0007050466846333e-06,
      "loss": 0.0073,
      "step": 2751520
    },
    {
      "epoch": 4.502955558610396,
      "grad_norm": 0.3057907223701477,
      "learning_rate": 1.0006391544711163e-06,
      "loss": 0.0071,
      "step": 2751540
    },
    {
      "epoch": 4.50298828904905,
      "grad_norm": 0.1390414834022522,
      "learning_rate": 1.000573262257599e-06,
      "loss": 0.0092,
      "step": 2751560
    },
    {
      "epoch": 4.503021019487703,
      "grad_norm": 0.37478604912757874,
      "learning_rate": 1.000507370044082e-06,
      "loss": 0.0087,
      "step": 2751580
    },
    {
      "epoch": 4.503053749926357,
      "grad_norm": 0.34371787309646606,
      "learning_rate": 1.000441477830565e-06,
      "loss": 0.0076,
      "step": 2751600
    },
    {
      "epoch": 4.50308648036501,
      "grad_norm": 0.11227267980575562,
      "learning_rate": 1.0003755856170477e-06,
      "loss": 0.0054,
      "step": 2751620
    },
    {
      "epoch": 4.503119210803663,
      "grad_norm": 0.4567071199417114,
      "learning_rate": 1.0003096934035306e-06,
      "loss": 0.0116,
      "step": 2751640
    },
    {
      "epoch": 4.503151941242317,
      "grad_norm": 0.3142823874950409,
      "learning_rate": 1.0002438011900134e-06,
      "loss": 0.0093,
      "step": 2751660
    },
    {
      "epoch": 4.50318467168097,
      "grad_norm": 0.2538169026374817,
      "learning_rate": 1.0001779089764963e-06,
      "loss": 0.0095,
      "step": 2751680
    },
    {
      "epoch": 4.503217402119623,
      "grad_norm": 0.22948217391967773,
      "learning_rate": 1.0001120167629793e-06,
      "loss": 0.0081,
      "step": 2751700
    },
    {
      "epoch": 4.503250132558277,
      "grad_norm": 0.21352140605449677,
      "learning_rate": 1.000046124549462e-06,
      "loss": 0.0126,
      "step": 2751720
    },
    {
      "epoch": 4.5032828629969295,
      "grad_norm": 0.22435159981250763,
      "learning_rate": 9.99980232335945e-07,
      "loss": 0.0076,
      "step": 2751740
    },
    {
      "epoch": 4.503315593435583,
      "grad_norm": 0.19586649537086487,
      "learning_rate": 9.999143401224277e-07,
      "loss": 0.0086,
      "step": 2751760
    },
    {
      "epoch": 4.503348323874237,
      "grad_norm": 0.2394704669713974,
      "learning_rate": 9.998484479089106e-07,
      "loss": 0.013,
      "step": 2751780
    },
    {
      "epoch": 4.50338105431289,
      "grad_norm": 0.25064852833747864,
      "learning_rate": 9.997825556953936e-07,
      "loss": 0.0111,
      "step": 2751800
    },
    {
      "epoch": 4.503413784751543,
      "grad_norm": 0.17960478365421295,
      "learning_rate": 9.997166634818763e-07,
      "loss": 0.0081,
      "step": 2751820
    },
    {
      "epoch": 4.503446515190197,
      "grad_norm": 0.5086544752120972,
      "learning_rate": 9.996507712683593e-07,
      "loss": 0.0117,
      "step": 2751840
    },
    {
      "epoch": 4.50347924562885,
      "grad_norm": 0.399920791387558,
      "learning_rate": 9.995848790548422e-07,
      "loss": 0.0072,
      "step": 2751860
    },
    {
      "epoch": 4.503511976067504,
      "grad_norm": 0.1177312359213829,
      "learning_rate": 9.99518986841325e-07,
      "loss": 0.0063,
      "step": 2751880
    },
    {
      "epoch": 4.5035447065061565,
      "grad_norm": 0.28746387362480164,
      "learning_rate": 9.99453094627808e-07,
      "loss": 0.0102,
      "step": 2751900
    },
    {
      "epoch": 4.50357743694481,
      "grad_norm": 0.1513209342956543,
      "learning_rate": 9.993872024142907e-07,
      "loss": 0.0084,
      "step": 2751920
    },
    {
      "epoch": 4.503610167383464,
      "grad_norm": 0.3372337818145752,
      "learning_rate": 9.993213102007736e-07,
      "loss": 0.0099,
      "step": 2751940
    },
    {
      "epoch": 4.503642897822116,
      "grad_norm": 0.06021983176469803,
      "learning_rate": 9.992554179872566e-07,
      "loss": 0.0071,
      "step": 2751960
    },
    {
      "epoch": 4.50367562826077,
      "grad_norm": 1.7233023643493652,
      "learning_rate": 9.991895257737393e-07,
      "loss": 0.0111,
      "step": 2751980
    },
    {
      "epoch": 4.5037083586994235,
      "grad_norm": 0.17196954786777496,
      "learning_rate": 9.991236335602223e-07,
      "loss": 0.0045,
      "step": 2752000
    },
    {
      "epoch": 4.503741089138076,
      "grad_norm": 0.12192583829164505,
      "learning_rate": 9.99057741346705e-07,
      "loss": 0.0114,
      "step": 2752020
    },
    {
      "epoch": 4.50377381957673,
      "grad_norm": 0.21385566890239716,
      "learning_rate": 9.989918491331882e-07,
      "loss": 0.0099,
      "step": 2752040
    },
    {
      "epoch": 4.503806550015383,
      "grad_norm": 0.044436704367399216,
      "learning_rate": 9.98925956919671e-07,
      "loss": 0.008,
      "step": 2752060
    },
    {
      "epoch": 4.503839280454037,
      "grad_norm": 0.10937951505184174,
      "learning_rate": 9.988600647061536e-07,
      "loss": 0.0105,
      "step": 2752080
    },
    {
      "epoch": 4.50387201089269,
      "grad_norm": 0.20618531107902527,
      "learning_rate": 9.987941724926366e-07,
      "loss": 0.0065,
      "step": 2752100
    },
    {
      "epoch": 4.503904741331343,
      "grad_norm": 0.32324478030204773,
      "learning_rate": 9.987282802791195e-07,
      "loss": 0.0077,
      "step": 2752120
    },
    {
      "epoch": 4.503937471769997,
      "grad_norm": 0.1942174881696701,
      "learning_rate": 9.986623880656025e-07,
      "loss": 0.008,
      "step": 2752140
    },
    {
      "epoch": 4.50397020220865,
      "grad_norm": 0.31602734327316284,
      "learning_rate": 9.985964958520852e-07,
      "loss": 0.0074,
      "step": 2752160
    },
    {
      "epoch": 4.504002932647303,
      "grad_norm": 0.33962053060531616,
      "learning_rate": 9.98530603638568e-07,
      "loss": 0.0069,
      "step": 2752180
    },
    {
      "epoch": 4.504035663085957,
      "grad_norm": 0.23851390182971954,
      "learning_rate": 9.98464711425051e-07,
      "loss": 0.0099,
      "step": 2752200
    },
    {
      "epoch": 4.50406839352461,
      "grad_norm": 0.3555622398853302,
      "learning_rate": 9.983988192115339e-07,
      "loss": 0.0113,
      "step": 2752220
    },
    {
      "epoch": 4.504101123963263,
      "grad_norm": 0.1433742791414261,
      "learning_rate": 9.983329269980168e-07,
      "loss": 0.0121,
      "step": 2752240
    },
    {
      "epoch": 4.504133854401917,
      "grad_norm": 0.1340998262166977,
      "learning_rate": 9.982670347844996e-07,
      "loss": 0.006,
      "step": 2752260
    },
    {
      "epoch": 4.50416658484057,
      "grad_norm": 0.09596399962902069,
      "learning_rate": 9.982011425709823e-07,
      "loss": 0.0099,
      "step": 2752280
    },
    {
      "epoch": 4.504199315279223,
      "grad_norm": 0.18110865354537964,
      "learning_rate": 9.981352503574655e-07,
      "loss": 0.0103,
      "step": 2752300
    },
    {
      "epoch": 4.504232045717877,
      "grad_norm": 0.06700250506401062,
      "learning_rate": 9.980693581439482e-07,
      "loss": 0.0108,
      "step": 2752320
    },
    {
      "epoch": 4.50426477615653,
      "grad_norm": 0.17104515433311462,
      "learning_rate": 9.980034659304312e-07,
      "loss": 0.0074,
      "step": 2752340
    },
    {
      "epoch": 4.504297506595183,
      "grad_norm": 0.17793633043766022,
      "learning_rate": 9.97937573716914e-07,
      "loss": 0.0085,
      "step": 2752360
    },
    {
      "epoch": 4.5043302370338365,
      "grad_norm": 0.4495207667350769,
      "learning_rate": 9.978716815033969e-07,
      "loss": 0.0085,
      "step": 2752380
    },
    {
      "epoch": 4.50436296747249,
      "grad_norm": 0.455985963344574,
      "learning_rate": 9.978057892898798e-07,
      "loss": 0.0123,
      "step": 2752400
    },
    {
      "epoch": 4.504395697911144,
      "grad_norm": 0.26377078890800476,
      "learning_rate": 9.977398970763625e-07,
      "loss": 0.0151,
      "step": 2752420
    },
    {
      "epoch": 4.504428428349796,
      "grad_norm": 0.46290698647499084,
      "learning_rate": 9.976740048628455e-07,
      "loss": 0.0091,
      "step": 2752440
    },
    {
      "epoch": 4.50446115878845,
      "grad_norm": 0.1825365275144577,
      "learning_rate": 9.976081126493282e-07,
      "loss": 0.006,
      "step": 2752460
    },
    {
      "epoch": 4.504493889227104,
      "grad_norm": 0.28749364614486694,
      "learning_rate": 9.975422204358112e-07,
      "loss": 0.0085,
      "step": 2752480
    },
    {
      "epoch": 4.504526619665757,
      "grad_norm": 0.04488099738955498,
      "learning_rate": 9.974763282222941e-07,
      "loss": 0.0099,
      "step": 2752500
    },
    {
      "epoch": 4.50455935010441,
      "grad_norm": 0.17587679624557495,
      "learning_rate": 9.974104360087769e-07,
      "loss": 0.0089,
      "step": 2752520
    },
    {
      "epoch": 4.5045920805430635,
      "grad_norm": 0.3270739018917084,
      "learning_rate": 9.973445437952598e-07,
      "loss": 0.0095,
      "step": 2752540
    },
    {
      "epoch": 4.504624810981717,
      "grad_norm": 1.050123691558838,
      "learning_rate": 9.972786515817428e-07,
      "loss": 0.0048,
      "step": 2752560
    },
    {
      "epoch": 4.50465754142037,
      "grad_norm": 0.5364603400230408,
      "learning_rate": 9.972127593682255e-07,
      "loss": 0.0082,
      "step": 2752580
    },
    {
      "epoch": 4.504690271859023,
      "grad_norm": 0.2244616448879242,
      "learning_rate": 9.971468671547085e-07,
      "loss": 0.0086,
      "step": 2752600
    },
    {
      "epoch": 4.504723002297677,
      "grad_norm": 0.13679081201553345,
      "learning_rate": 9.970809749411912e-07,
      "loss": 0.0076,
      "step": 2752620
    },
    {
      "epoch": 4.50475573273633,
      "grad_norm": 0.2717210352420807,
      "learning_rate": 9.970150827276742e-07,
      "loss": 0.0069,
      "step": 2752640
    },
    {
      "epoch": 4.504788463174983,
      "grad_norm": 0.10029902309179306,
      "learning_rate": 9.969491905141571e-07,
      "loss": 0.0085,
      "step": 2752660
    },
    {
      "epoch": 4.504821193613637,
      "grad_norm": 0.6028158664703369,
      "learning_rate": 9.968832983006399e-07,
      "loss": 0.0122,
      "step": 2752680
    },
    {
      "epoch": 4.5048539240522905,
      "grad_norm": 0.14044508337974548,
      "learning_rate": 9.968174060871228e-07,
      "loss": 0.0113,
      "step": 2752700
    },
    {
      "epoch": 4.504886654490943,
      "grad_norm": 0.08653733879327774,
      "learning_rate": 9.967515138736055e-07,
      "loss": 0.0077,
      "step": 2752720
    },
    {
      "epoch": 4.504919384929597,
      "grad_norm": 0.17311887443065643,
      "learning_rate": 9.966856216600885e-07,
      "loss": 0.0104,
      "step": 2752740
    },
    {
      "epoch": 4.50495211536825,
      "grad_norm": 0.2587561011314392,
      "learning_rate": 9.966197294465714e-07,
      "loss": 0.0114,
      "step": 2752760
    },
    {
      "epoch": 4.504984845806904,
      "grad_norm": 0.20815037190914154,
      "learning_rate": 9.965538372330542e-07,
      "loss": 0.0079,
      "step": 2752780
    },
    {
      "epoch": 4.505017576245557,
      "grad_norm": 0.9996413588523865,
      "learning_rate": 9.964879450195371e-07,
      "loss": 0.0078,
      "step": 2752800
    },
    {
      "epoch": 4.50505030668421,
      "grad_norm": 0.18588770925998688,
      "learning_rate": 9.9642205280602e-07,
      "loss": 0.0158,
      "step": 2752820
    },
    {
      "epoch": 4.505083037122864,
      "grad_norm": 0.4554304778575897,
      "learning_rate": 9.963561605925028e-07,
      "loss": 0.007,
      "step": 2752840
    },
    {
      "epoch": 4.5051157675615165,
      "grad_norm": 0.28554150462150574,
      "learning_rate": 9.962902683789858e-07,
      "loss": 0.0105,
      "step": 2752860
    },
    {
      "epoch": 4.50514849800017,
      "grad_norm": 0.1120912954211235,
      "learning_rate": 9.962243761654685e-07,
      "loss": 0.008,
      "step": 2752880
    },
    {
      "epoch": 4.505181228438824,
      "grad_norm": 0.09417477250099182,
      "learning_rate": 9.961584839519515e-07,
      "loss": 0.0083,
      "step": 2752900
    },
    {
      "epoch": 4.505213958877476,
      "grad_norm": 0.4038432836532593,
      "learning_rate": 9.960925917384344e-07,
      "loss": 0.0109,
      "step": 2752920
    },
    {
      "epoch": 4.50524668931613,
      "grad_norm": 0.1671094298362732,
      "learning_rate": 9.960266995249172e-07,
      "loss": 0.0093,
      "step": 2752940
    },
    {
      "epoch": 4.505279419754784,
      "grad_norm": 0.11899836361408234,
      "learning_rate": 9.959608073114001e-07,
      "loss": 0.0089,
      "step": 2752960
    },
    {
      "epoch": 4.505312150193437,
      "grad_norm": 0.14879348874092102,
      "learning_rate": 9.958949150978829e-07,
      "loss": 0.0133,
      "step": 2752980
    },
    {
      "epoch": 4.50534488063209,
      "grad_norm": 0.075489841401577,
      "learning_rate": 9.958290228843658e-07,
      "loss": 0.0113,
      "step": 2753000
    },
    {
      "epoch": 4.5053776110707435,
      "grad_norm": 0.2765970528125763,
      "learning_rate": 9.957631306708488e-07,
      "loss": 0.009,
      "step": 2753020
    },
    {
      "epoch": 4.505410341509397,
      "grad_norm": 0.26854121685028076,
      "learning_rate": 9.956972384573315e-07,
      "loss": 0.0093,
      "step": 2753040
    },
    {
      "epoch": 4.505443071948051,
      "grad_norm": 0.22414757311344147,
      "learning_rate": 9.956313462438145e-07,
      "loss": 0.0144,
      "step": 2753060
    },
    {
      "epoch": 4.505475802386703,
      "grad_norm": 0.37032657861709595,
      "learning_rate": 9.955654540302974e-07,
      "loss": 0.0128,
      "step": 2753080
    },
    {
      "epoch": 4.505508532825357,
      "grad_norm": 0.24064446985721588,
      "learning_rate": 9.954995618167801e-07,
      "loss": 0.0096,
      "step": 2753100
    },
    {
      "epoch": 4.505541263264011,
      "grad_norm": 0.09884997457265854,
      "learning_rate": 9.95433669603263e-07,
      "loss": 0.0147,
      "step": 2753120
    },
    {
      "epoch": 4.505573993702663,
      "grad_norm": 0.24872085452079773,
      "learning_rate": 9.953677773897458e-07,
      "loss": 0.0081,
      "step": 2753140
    },
    {
      "epoch": 4.505606724141317,
      "grad_norm": 0.17788831889629364,
      "learning_rate": 9.953018851762288e-07,
      "loss": 0.0111,
      "step": 2753160
    },
    {
      "epoch": 4.5056394545799705,
      "grad_norm": 0.15788725018501282,
      "learning_rate": 9.952359929627117e-07,
      "loss": 0.0113,
      "step": 2753180
    },
    {
      "epoch": 4.505672185018623,
      "grad_norm": 0.20520876348018646,
      "learning_rate": 9.951701007491945e-07,
      "loss": 0.0067,
      "step": 2753200
    },
    {
      "epoch": 4.505704915457277,
      "grad_norm": 0.18268585205078125,
      "learning_rate": 9.951042085356774e-07,
      "loss": 0.0094,
      "step": 2753220
    },
    {
      "epoch": 4.50573764589593,
      "grad_norm": 0.4717426002025604,
      "learning_rate": 9.950383163221602e-07,
      "loss": 0.0105,
      "step": 2753240
    },
    {
      "epoch": 4.505770376334584,
      "grad_norm": 0.2503024637699127,
      "learning_rate": 9.949724241086433e-07,
      "loss": 0.0089,
      "step": 2753260
    },
    {
      "epoch": 4.505803106773237,
      "grad_norm": 0.49188604950904846,
      "learning_rate": 9.94906531895126e-07,
      "loss": 0.0142,
      "step": 2753280
    },
    {
      "epoch": 4.50583583721189,
      "grad_norm": 0.3197062015533447,
      "learning_rate": 9.948406396816088e-07,
      "loss": 0.0085,
      "step": 2753300
    },
    {
      "epoch": 4.505868567650544,
      "grad_norm": 0.17272266745567322,
      "learning_rate": 9.947747474680918e-07,
      "loss": 0.0104,
      "step": 2753320
    },
    {
      "epoch": 4.5059012980891975,
      "grad_norm": 0.16384488344192505,
      "learning_rate": 9.947088552545745e-07,
      "loss": 0.0072,
      "step": 2753340
    },
    {
      "epoch": 4.50593402852785,
      "grad_norm": 0.44541507959365845,
      "learning_rate": 9.946429630410577e-07,
      "loss": 0.0128,
      "step": 2753360
    },
    {
      "epoch": 4.505966758966504,
      "grad_norm": 0.048793256282806396,
      "learning_rate": 9.945770708275404e-07,
      "loss": 0.0056,
      "step": 2753380
    },
    {
      "epoch": 4.505999489405157,
      "grad_norm": 0.1272253394126892,
      "learning_rate": 9.945111786140231e-07,
      "loss": 0.0126,
      "step": 2753400
    },
    {
      "epoch": 4.50603221984381,
      "grad_norm": 0.4501135051250458,
      "learning_rate": 9.94445286400506e-07,
      "loss": 0.0124,
      "step": 2753420
    },
    {
      "epoch": 4.506064950282464,
      "grad_norm": 0.10539767891168594,
      "learning_rate": 9.94379394186989e-07,
      "loss": 0.009,
      "step": 2753440
    },
    {
      "epoch": 4.506097680721117,
      "grad_norm": 0.1878681629896164,
      "learning_rate": 9.94313501973472e-07,
      "loss": 0.0079,
      "step": 2753460
    },
    {
      "epoch": 4.50613041115977,
      "grad_norm": 0.2728188931941986,
      "learning_rate": 9.942476097599547e-07,
      "loss": 0.0083,
      "step": 2753480
    },
    {
      "epoch": 4.506163141598424,
      "grad_norm": 0.29804593324661255,
      "learning_rate": 9.941817175464375e-07,
      "loss": 0.0075,
      "step": 2753500
    },
    {
      "epoch": 4.506195872037077,
      "grad_norm": 0.38853076100349426,
      "learning_rate": 9.941158253329206e-07,
      "loss": 0.0089,
      "step": 2753520
    },
    {
      "epoch": 4.506228602475731,
      "grad_norm": 0.22309046983718872,
      "learning_rate": 9.940499331194034e-07,
      "loss": 0.0083,
      "step": 2753540
    },
    {
      "epoch": 4.5062613329143835,
      "grad_norm": 0.31385332345962524,
      "learning_rate": 9.939840409058863e-07,
      "loss": 0.011,
      "step": 2753560
    },
    {
      "epoch": 4.506294063353037,
      "grad_norm": 0.06395959109067917,
      "learning_rate": 9.93918148692369e-07,
      "loss": 0.0089,
      "step": 2753580
    },
    {
      "epoch": 4.506326793791691,
      "grad_norm": 0.24477073550224304,
      "learning_rate": 9.938522564788518e-07,
      "loss": 0.0111,
      "step": 2753600
    },
    {
      "epoch": 4.506359524230343,
      "grad_norm": 0.2371567189693451,
      "learning_rate": 9.93786364265335e-07,
      "loss": 0.0124,
      "step": 2753620
    },
    {
      "epoch": 4.506392254668997,
      "grad_norm": 0.36252397298812866,
      "learning_rate": 9.937204720518177e-07,
      "loss": 0.012,
      "step": 2753640
    },
    {
      "epoch": 4.5064249851076505,
      "grad_norm": 0.3952195346355438,
      "learning_rate": 9.936545798383007e-07,
      "loss": 0.009,
      "step": 2753660
    },
    {
      "epoch": 4.506457715546304,
      "grad_norm": 0.1742820143699646,
      "learning_rate": 9.935886876247834e-07,
      "loss": 0.0077,
      "step": 2753680
    },
    {
      "epoch": 4.506490445984957,
      "grad_norm": 0.27567508816719055,
      "learning_rate": 9.935227954112664e-07,
      "loss": 0.0069,
      "step": 2753700
    },
    {
      "epoch": 4.50652317642361,
      "grad_norm": 0.547845721244812,
      "learning_rate": 9.934569031977493e-07,
      "loss": 0.0104,
      "step": 2753720
    },
    {
      "epoch": 4.506555906862264,
      "grad_norm": 0.21014030277729034,
      "learning_rate": 9.93391010984232e-07,
      "loss": 0.0086,
      "step": 2753740
    },
    {
      "epoch": 4.506588637300917,
      "grad_norm": 0.4620748460292816,
      "learning_rate": 9.93325118770715e-07,
      "loss": 0.0094,
      "step": 2753760
    },
    {
      "epoch": 4.50662136773957,
      "grad_norm": 0.3236251175403595,
      "learning_rate": 9.932592265571977e-07,
      "loss": 0.0111,
      "step": 2753780
    },
    {
      "epoch": 4.506654098178224,
      "grad_norm": 0.16531209647655487,
      "learning_rate": 9.931933343436807e-07,
      "loss": 0.0078,
      "step": 2753800
    },
    {
      "epoch": 4.506686828616877,
      "grad_norm": 0.211192786693573,
      "learning_rate": 9.931274421301636e-07,
      "loss": 0.0134,
      "step": 2753820
    },
    {
      "epoch": 4.50671955905553,
      "grad_norm": 0.23660972714424133,
      "learning_rate": 9.930615499166464e-07,
      "loss": 0.0132,
      "step": 2753840
    },
    {
      "epoch": 4.506752289494184,
      "grad_norm": 0.06962835788726807,
      "learning_rate": 9.929956577031293e-07,
      "loss": 0.0069,
      "step": 2753860
    },
    {
      "epoch": 4.506785019932837,
      "grad_norm": 0.19495123624801636,
      "learning_rate": 9.929297654896123e-07,
      "loss": 0.0055,
      "step": 2753880
    },
    {
      "epoch": 4.50681775037149,
      "grad_norm": 0.23661650717258453,
      "learning_rate": 9.92863873276095e-07,
      "loss": 0.0072,
      "step": 2753900
    },
    {
      "epoch": 4.506850480810144,
      "grad_norm": 0.2923257648944855,
      "learning_rate": 9.92797981062578e-07,
      "loss": 0.009,
      "step": 2753920
    },
    {
      "epoch": 4.506883211248797,
      "grad_norm": 0.41501399874687195,
      "learning_rate": 9.927320888490607e-07,
      "loss": 0.0089,
      "step": 2753940
    },
    {
      "epoch": 4.506915941687451,
      "grad_norm": 0.2827739417552948,
      "learning_rate": 9.926661966355437e-07,
      "loss": 0.0099,
      "step": 2753960
    },
    {
      "epoch": 4.506948672126104,
      "grad_norm": 0.09647302329540253,
      "learning_rate": 9.926003044220266e-07,
      "loss": 0.0092,
      "step": 2753980
    },
    {
      "epoch": 4.506981402564757,
      "grad_norm": 0.16934046149253845,
      "learning_rate": 9.925344122085094e-07,
      "loss": 0.0115,
      "step": 2754000
    },
    {
      "epoch": 4.507014133003411,
      "grad_norm": 0.43802472949028015,
      "learning_rate": 9.924685199949923e-07,
      "loss": 0.0099,
      "step": 2754020
    },
    {
      "epoch": 4.5070468634420635,
      "grad_norm": 0.1932181417942047,
      "learning_rate": 9.92402627781475e-07,
      "loss": 0.0113,
      "step": 2754040
    },
    {
      "epoch": 4.507079593880717,
      "grad_norm": 0.13065128028392792,
      "learning_rate": 9.92336735567958e-07,
      "loss": 0.0109,
      "step": 2754060
    },
    {
      "epoch": 4.507112324319371,
      "grad_norm": 0.18126165866851807,
      "learning_rate": 9.92270843354441e-07,
      "loss": 0.0087,
      "step": 2754080
    },
    {
      "epoch": 4.507145054758023,
      "grad_norm": 0.052728138864040375,
      "learning_rate": 9.922049511409237e-07,
      "loss": 0.007,
      "step": 2754100
    },
    {
      "epoch": 4.507177785196677,
      "grad_norm": 0.2448452264070511,
      "learning_rate": 9.921390589274066e-07,
      "loss": 0.0059,
      "step": 2754120
    },
    {
      "epoch": 4.507210515635331,
      "grad_norm": 0.08403259515762329,
      "learning_rate": 9.920731667138896e-07,
      "loss": 0.0088,
      "step": 2754140
    },
    {
      "epoch": 4.507243246073984,
      "grad_norm": 0.6489580869674683,
      "learning_rate": 9.920072745003723e-07,
      "loss": 0.0076,
      "step": 2754160
    },
    {
      "epoch": 4.507275976512637,
      "grad_norm": 0.09103427827358246,
      "learning_rate": 9.919413822868553e-07,
      "loss": 0.0061,
      "step": 2754180
    },
    {
      "epoch": 4.5073087069512905,
      "grad_norm": 0.40563416481018066,
      "learning_rate": 9.91875490073338e-07,
      "loss": 0.0129,
      "step": 2754200
    },
    {
      "epoch": 4.507341437389944,
      "grad_norm": 0.09006199240684509,
      "learning_rate": 9.91809597859821e-07,
      "loss": 0.0069,
      "step": 2754220
    },
    {
      "epoch": 4.507374167828598,
      "grad_norm": 0.17924435436725616,
      "learning_rate": 9.91743705646304e-07,
      "loss": 0.0097,
      "step": 2754240
    },
    {
      "epoch": 4.50740689826725,
      "grad_norm": 0.310483455657959,
      "learning_rate": 9.916778134327867e-07,
      "loss": 0.0093,
      "step": 2754260
    },
    {
      "epoch": 4.507439628705904,
      "grad_norm": 0.26450425386428833,
      "learning_rate": 9.916119212192696e-07,
      "loss": 0.0103,
      "step": 2754280
    },
    {
      "epoch": 4.507472359144558,
      "grad_norm": 0.28107208013534546,
      "learning_rate": 9.915460290057524e-07,
      "loss": 0.0093,
      "step": 2754300
    },
    {
      "epoch": 4.50750508958321,
      "grad_norm": 0.7691182494163513,
      "learning_rate": 9.914801367922353e-07,
      "loss": 0.0105,
      "step": 2754320
    },
    {
      "epoch": 4.507537820021864,
      "grad_norm": 0.15424975752830505,
      "learning_rate": 9.914142445787183e-07,
      "loss": 0.0087,
      "step": 2754340
    },
    {
      "epoch": 4.5075705504605175,
      "grad_norm": 0.12030571699142456,
      "learning_rate": 9.91348352365201e-07,
      "loss": 0.0083,
      "step": 2754360
    },
    {
      "epoch": 4.50760328089917,
      "grad_norm": 0.08079643547534943,
      "learning_rate": 9.91282460151684e-07,
      "loss": 0.0072,
      "step": 2754380
    },
    {
      "epoch": 4.507636011337824,
      "grad_norm": 0.09514231979846954,
      "learning_rate": 9.91216567938167e-07,
      "loss": 0.0064,
      "step": 2754400
    },
    {
      "epoch": 4.507668741776477,
      "grad_norm": 0.10876680910587311,
      "learning_rate": 9.911506757246496e-07,
      "loss": 0.0128,
      "step": 2754420
    },
    {
      "epoch": 4.507701472215131,
      "grad_norm": 0.19436125457286835,
      "learning_rate": 9.910847835111326e-07,
      "loss": 0.0102,
      "step": 2754440
    },
    {
      "epoch": 4.507734202653784,
      "grad_norm": Infinity,
      "learning_rate": 9.910188912976153e-07,
      "loss": 0.0103,
      "step": 2754460
    },
    {
      "epoch": 4.507766933092437,
      "grad_norm": 0.2062331736087799,
      "learning_rate": 9.909529990840983e-07,
      "loss": 0.0065,
      "step": 2754480
    },
    {
      "epoch": 4.507799663531091,
      "grad_norm": 0.4175878167152405,
      "learning_rate": 9.908871068705812e-07,
      "loss": 0.0121,
      "step": 2754500
    },
    {
      "epoch": 4.5078323939697444,
      "grad_norm": 0.33858272433280945,
      "learning_rate": 9.90821214657064e-07,
      "loss": 0.008,
      "step": 2754520
    },
    {
      "epoch": 4.507865124408397,
      "grad_norm": 0.29907330870628357,
      "learning_rate": 9.90755322443547e-07,
      "loss": 0.0088,
      "step": 2754540
    },
    {
      "epoch": 4.507897854847051,
      "grad_norm": 0.08330679684877396,
      "learning_rate": 9.906894302300297e-07,
      "loss": 0.011,
      "step": 2754560
    },
    {
      "epoch": 4.507930585285704,
      "grad_norm": 0.14009103178977966,
      "learning_rate": 9.906235380165128e-07,
      "loss": 0.0079,
      "step": 2754580
    },
    {
      "epoch": 4.507963315724357,
      "grad_norm": 0.04622255265712738,
      "learning_rate": 9.905576458029956e-07,
      "loss": 0.0064,
      "step": 2754600
    },
    {
      "epoch": 4.507996046163011,
      "grad_norm": 0.05779384449124336,
      "learning_rate": 9.904917535894783e-07,
      "loss": 0.0088,
      "step": 2754620
    },
    {
      "epoch": 4.508028776601664,
      "grad_norm": 0.18019390106201172,
      "learning_rate": 9.904258613759613e-07,
      "loss": 0.0079,
      "step": 2754640
    },
    {
      "epoch": 4.508061507040317,
      "grad_norm": 0.10767221450805664,
      "learning_rate": 9.90359969162444e-07,
      "loss": 0.0063,
      "step": 2754660
    },
    {
      "epoch": 4.5080942374789705,
      "grad_norm": 0.3117491900920868,
      "learning_rate": 9.902940769489272e-07,
      "loss": 0.0074,
      "step": 2754680
    },
    {
      "epoch": 4.508126967917624,
      "grad_norm": 0.1961991935968399,
      "learning_rate": 9.9022818473541e-07,
      "loss": 0.0133,
      "step": 2754700
    },
    {
      "epoch": 4.508159698356278,
      "grad_norm": 0.17874611914157867,
      "learning_rate": 9.901622925218926e-07,
      "loss": 0.0071,
      "step": 2754720
    },
    {
      "epoch": 4.50819242879493,
      "grad_norm": 0.48510977625846863,
      "learning_rate": 9.900964003083756e-07,
      "loss": 0.0102,
      "step": 2754740
    },
    {
      "epoch": 4.508225159233584,
      "grad_norm": 0.2650481164455414,
      "learning_rate": 9.900305080948586e-07,
      "loss": 0.0118,
      "step": 2754760
    },
    {
      "epoch": 4.508257889672238,
      "grad_norm": 0.08902162313461304,
      "learning_rate": 9.899646158813415e-07,
      "loss": 0.0125,
      "step": 2754780
    },
    {
      "epoch": 4.508290620110891,
      "grad_norm": 0.2777624726295471,
      "learning_rate": 9.898987236678242e-07,
      "loss": 0.0063,
      "step": 2754800
    },
    {
      "epoch": 4.508323350549544,
      "grad_norm": 0.25707581639289856,
      "learning_rate": 9.89832831454307e-07,
      "loss": 0.0092,
      "step": 2754820
    },
    {
      "epoch": 4.5083560809881975,
      "grad_norm": 0.10528714954853058,
      "learning_rate": 9.897669392407901e-07,
      "loss": 0.0082,
      "step": 2754840
    },
    {
      "epoch": 4.508388811426851,
      "grad_norm": 0.3419966995716095,
      "learning_rate": 9.897010470272729e-07,
      "loss": 0.0102,
      "step": 2754860
    },
    {
      "epoch": 4.508421541865504,
      "grad_norm": 0.7143625020980835,
      "learning_rate": 9.896351548137558e-07,
      "loss": 0.0127,
      "step": 2754880
    },
    {
      "epoch": 4.508454272304157,
      "grad_norm": 0.2512744963169098,
      "learning_rate": 9.895692626002386e-07,
      "loss": 0.0075,
      "step": 2754900
    },
    {
      "epoch": 4.508487002742811,
      "grad_norm": 0.9394142031669617,
      "learning_rate": 9.895033703867213e-07,
      "loss": 0.0088,
      "step": 2754920
    },
    {
      "epoch": 4.508519733181464,
      "grad_norm": 0.14494770765304565,
      "learning_rate": 9.894374781732045e-07,
      "loss": 0.012,
      "step": 2754940
    },
    {
      "epoch": 4.508552463620117,
      "grad_norm": 0.11031524091959,
      "learning_rate": 9.893715859596872e-07,
      "loss": 0.008,
      "step": 2754960
    },
    {
      "epoch": 4.508585194058771,
      "grad_norm": 0.1766463816165924,
      "learning_rate": 9.893056937461702e-07,
      "loss": 0.008,
      "step": 2754980
    },
    {
      "epoch": 4.5086179244974245,
      "grad_norm": 0.21230752766132355,
      "learning_rate": 9.89239801532653e-07,
      "loss": 0.0072,
      "step": 2755000
    },
    {
      "epoch": 4.508650654936077,
      "grad_norm": 0.1792128086090088,
      "learning_rate": 9.891739093191359e-07,
      "loss": 0.0081,
      "step": 2755020
    },
    {
      "epoch": 4.508683385374731,
      "grad_norm": 0.08661755174398422,
      "learning_rate": 9.891080171056188e-07,
      "loss": 0.0064,
      "step": 2755040
    },
    {
      "epoch": 4.508716115813384,
      "grad_norm": 0.2252742201089859,
      "learning_rate": 9.890421248921016e-07,
      "loss": 0.0071,
      "step": 2755060
    },
    {
      "epoch": 4.508748846252037,
      "grad_norm": 0.20236818492412567,
      "learning_rate": 9.889762326785845e-07,
      "loss": 0.0085,
      "step": 2755080
    },
    {
      "epoch": 4.508781576690691,
      "grad_norm": 0.06973189115524292,
      "learning_rate": 9.889103404650672e-07,
      "loss": 0.007,
      "step": 2755100
    },
    {
      "epoch": 4.508814307129344,
      "grad_norm": 0.06530994921922684,
      "learning_rate": 9.888444482515502e-07,
      "loss": 0.0073,
      "step": 2755120
    },
    {
      "epoch": 4.508847037567998,
      "grad_norm": 0.13635119795799255,
      "learning_rate": 9.887785560380331e-07,
      "loss": 0.0096,
      "step": 2755140
    },
    {
      "epoch": 4.508879768006651,
      "grad_norm": 0.2792965769767761,
      "learning_rate": 9.887126638245159e-07,
      "loss": 0.0068,
      "step": 2755160
    },
    {
      "epoch": 4.508912498445304,
      "grad_norm": 0.1563417613506317,
      "learning_rate": 9.886467716109988e-07,
      "loss": 0.0102,
      "step": 2755180
    },
    {
      "epoch": 4.508945228883958,
      "grad_norm": 0.39747944474220276,
      "learning_rate": 9.885808793974818e-07,
      "loss": 0.0085,
      "step": 2755200
    },
    {
      "epoch": 4.5089779593226105,
      "grad_norm": 0.14266273379325867,
      "learning_rate": 9.885149871839645e-07,
      "loss": 0.0073,
      "step": 2755220
    },
    {
      "epoch": 4.509010689761264,
      "grad_norm": 0.5776770114898682,
      "learning_rate": 9.884490949704475e-07,
      "loss": 0.0067,
      "step": 2755240
    },
    {
      "epoch": 4.509043420199918,
      "grad_norm": 0.3156503438949585,
      "learning_rate": 9.883832027569302e-07,
      "loss": 0.0082,
      "step": 2755260
    },
    {
      "epoch": 4.50907615063857,
      "grad_norm": 0.2966861426830292,
      "learning_rate": 9.883173105434132e-07,
      "loss": 0.0089,
      "step": 2755280
    },
    {
      "epoch": 4.509108881077224,
      "grad_norm": 0.4549582600593567,
      "learning_rate": 9.882514183298961e-07,
      "loss": 0.0092,
      "step": 2755300
    },
    {
      "epoch": 4.509141611515878,
      "grad_norm": 0.1250847429037094,
      "learning_rate": 9.881855261163789e-07,
      "loss": 0.0099,
      "step": 2755320
    },
    {
      "epoch": 4.509174341954531,
      "grad_norm": 0.18375425040721893,
      "learning_rate": 9.881196339028618e-07,
      "loss": 0.0051,
      "step": 2755340
    },
    {
      "epoch": 4.509207072393184,
      "grad_norm": 0.4976489245891571,
      "learning_rate": 9.880537416893446e-07,
      "loss": 0.0095,
      "step": 2755360
    },
    {
      "epoch": 4.5092398028318375,
      "grad_norm": 0.49136990308761597,
      "learning_rate": 9.879878494758275e-07,
      "loss": 0.0083,
      "step": 2755380
    },
    {
      "epoch": 4.509272533270491,
      "grad_norm": 0.3077358305454254,
      "learning_rate": 9.879219572623105e-07,
      "loss": 0.0086,
      "step": 2755400
    },
    {
      "epoch": 4.509305263709145,
      "grad_norm": 0.22785574197769165,
      "learning_rate": 9.878560650487932e-07,
      "loss": 0.0068,
      "step": 2755420
    },
    {
      "epoch": 4.509337994147797,
      "grad_norm": 0.11445420980453491,
      "learning_rate": 9.877901728352762e-07,
      "loss": 0.0053,
      "step": 2755440
    },
    {
      "epoch": 4.509370724586451,
      "grad_norm": 0.3349209427833557,
      "learning_rate": 9.87724280621759e-07,
      "loss": 0.011,
      "step": 2755460
    },
    {
      "epoch": 4.5094034550251045,
      "grad_norm": 0.1763233095407486,
      "learning_rate": 9.876583884082418e-07,
      "loss": 0.0087,
      "step": 2755480
    },
    {
      "epoch": 4.509436185463757,
      "grad_norm": 0.13328000903129578,
      "learning_rate": 9.875924961947248e-07,
      "loss": 0.0109,
      "step": 2755500
    },
    {
      "epoch": 4.509468915902411,
      "grad_norm": 0.2796929180622101,
      "learning_rate": 9.875266039812075e-07,
      "loss": 0.0085,
      "step": 2755520
    },
    {
      "epoch": 4.509501646341064,
      "grad_norm": 0.43207839131355286,
      "learning_rate": 9.874607117676905e-07,
      "loss": 0.0101,
      "step": 2755540
    },
    {
      "epoch": 4.509534376779717,
      "grad_norm": 0.23838675022125244,
      "learning_rate": 9.873948195541734e-07,
      "loss": 0.0105,
      "step": 2755560
    },
    {
      "epoch": 4.509567107218371,
      "grad_norm": 0.07313672453165054,
      "learning_rate": 9.873289273406562e-07,
      "loss": 0.009,
      "step": 2755580
    },
    {
      "epoch": 4.509599837657024,
      "grad_norm": 0.1477109044790268,
      "learning_rate": 9.872630351271391e-07,
      "loss": 0.0055,
      "step": 2755600
    },
    {
      "epoch": 4.509632568095678,
      "grad_norm": 0.15434959530830383,
      "learning_rate": 9.871971429136219e-07,
      "loss": 0.0121,
      "step": 2755620
    },
    {
      "epoch": 4.509665298534331,
      "grad_norm": 0.295305073261261,
      "learning_rate": 9.871312507001048e-07,
      "loss": 0.0089,
      "step": 2755640
    },
    {
      "epoch": 4.509698028972984,
      "grad_norm": 0.2463492900133133,
      "learning_rate": 9.870653584865878e-07,
      "loss": 0.0091,
      "step": 2755660
    },
    {
      "epoch": 4.509730759411638,
      "grad_norm": 0.29894232749938965,
      "learning_rate": 9.869994662730705e-07,
      "loss": 0.0142,
      "step": 2755680
    },
    {
      "epoch": 4.509763489850291,
      "grad_norm": 0.29800888895988464,
      "learning_rate": 9.869335740595535e-07,
      "loss": 0.0084,
      "step": 2755700
    },
    {
      "epoch": 4.509796220288944,
      "grad_norm": 0.42777588963508606,
      "learning_rate": 9.868676818460364e-07,
      "loss": 0.0093,
      "step": 2755720
    },
    {
      "epoch": 4.509828950727598,
      "grad_norm": 0.18101978302001953,
      "learning_rate": 9.868017896325192e-07,
      "loss": 0.0128,
      "step": 2755740
    },
    {
      "epoch": 4.509861681166251,
      "grad_norm": 0.14859500527381897,
      "learning_rate": 9.86735897419002e-07,
      "loss": 0.0109,
      "step": 2755760
    },
    {
      "epoch": 4.509894411604904,
      "grad_norm": 0.23158591985702515,
      "learning_rate": 9.866700052054848e-07,
      "loss": 0.009,
      "step": 2755780
    },
    {
      "epoch": 4.509927142043558,
      "grad_norm": 0.2481963336467743,
      "learning_rate": 9.866041129919678e-07,
      "loss": 0.0059,
      "step": 2755800
    },
    {
      "epoch": 4.509959872482211,
      "grad_norm": 0.2480047345161438,
      "learning_rate": 9.865382207784507e-07,
      "loss": 0.0071,
      "step": 2755820
    },
    {
      "epoch": 4.509992602920864,
      "grad_norm": 0.17323587834835052,
      "learning_rate": 9.864723285649335e-07,
      "loss": 0.0088,
      "step": 2755840
    },
    {
      "epoch": 4.5100253333595175,
      "grad_norm": 0.20184555649757385,
      "learning_rate": 9.864064363514164e-07,
      "loss": 0.0088,
      "step": 2755860
    },
    {
      "epoch": 4.510058063798171,
      "grad_norm": 0.2656659781932831,
      "learning_rate": 9.863405441378992e-07,
      "loss": 0.0098,
      "step": 2755880
    },
    {
      "epoch": 4.510090794236825,
      "grad_norm": 0.39001086354255676,
      "learning_rate": 9.862746519243823e-07,
      "loss": 0.0065,
      "step": 2755900
    },
    {
      "epoch": 4.510123524675477,
      "grad_norm": 0.11995697766542435,
      "learning_rate": 9.86208759710865e-07,
      "loss": 0.0074,
      "step": 2755920
    },
    {
      "epoch": 4.510156255114131,
      "grad_norm": 0.1852882355451584,
      "learning_rate": 9.861428674973478e-07,
      "loss": 0.006,
      "step": 2755940
    },
    {
      "epoch": 4.510188985552785,
      "grad_norm": 0.22643445432186127,
      "learning_rate": 9.860769752838308e-07,
      "loss": 0.0095,
      "step": 2755960
    },
    {
      "epoch": 4.510221715991438,
      "grad_norm": 0.21360838413238525,
      "learning_rate": 9.860110830703135e-07,
      "loss": 0.0107,
      "step": 2755980
    },
    {
      "epoch": 4.510254446430091,
      "grad_norm": 0.038362257182598114,
      "learning_rate": 9.859451908567967e-07,
      "loss": 0.0098,
      "step": 2756000
    },
    {
      "epoch": 4.5102871768687445,
      "grad_norm": 0.14216266572475433,
      "learning_rate": 9.858792986432794e-07,
      "loss": 0.0122,
      "step": 2756020
    },
    {
      "epoch": 4.510319907307398,
      "grad_norm": 0.14739137887954712,
      "learning_rate": 9.858134064297622e-07,
      "loss": 0.0076,
      "step": 2756040
    },
    {
      "epoch": 4.510352637746051,
      "grad_norm": 0.7716923952102661,
      "learning_rate": 9.857475142162451e-07,
      "loss": 0.01,
      "step": 2756060
    },
    {
      "epoch": 4.510385368184704,
      "grad_norm": 0.2808796763420105,
      "learning_rate": 9.85681622002728e-07,
      "loss": 0.0146,
      "step": 2756080
    },
    {
      "epoch": 4.510418098623358,
      "grad_norm": 0.20208340883255005,
      "learning_rate": 9.85615729789211e-07,
      "loss": 0.0139,
      "step": 2756100
    },
    {
      "epoch": 4.510450829062011,
      "grad_norm": 0.21182453632354736,
      "learning_rate": 9.855498375756937e-07,
      "loss": 0.007,
      "step": 2756120
    },
    {
      "epoch": 4.510483559500664,
      "grad_norm": 0.18783393502235413,
      "learning_rate": 9.854839453621765e-07,
      "loss": 0.0084,
      "step": 2756140
    },
    {
      "epoch": 4.510516289939318,
      "grad_norm": 0.278802752494812,
      "learning_rate": 9.854180531486597e-07,
      "loss": 0.0098,
      "step": 2756160
    },
    {
      "epoch": 4.5105490203779715,
      "grad_norm": 0.17240160703659058,
      "learning_rate": 9.853521609351424e-07,
      "loss": 0.0094,
      "step": 2756180
    },
    {
      "epoch": 4.510581750816624,
      "grad_norm": 0.12535084784030914,
      "learning_rate": 9.852862687216253e-07,
      "loss": 0.0099,
      "step": 2756200
    },
    {
      "epoch": 4.510614481255278,
      "grad_norm": 0.2457321584224701,
      "learning_rate": 9.85220376508108e-07,
      "loss": 0.0097,
      "step": 2756220
    },
    {
      "epoch": 4.510647211693931,
      "grad_norm": 0.33949077129364014,
      "learning_rate": 9.85154484294591e-07,
      "loss": 0.0068,
      "step": 2756240
    },
    {
      "epoch": 4.510679942132585,
      "grad_norm": 0.15679876506328583,
      "learning_rate": 9.85088592081074e-07,
      "loss": 0.0057,
      "step": 2756260
    },
    {
      "epoch": 4.510712672571238,
      "grad_norm": 0.1177336797118187,
      "learning_rate": 9.850226998675567e-07,
      "loss": 0.0112,
      "step": 2756280
    },
    {
      "epoch": 4.510745403009891,
      "grad_norm": 0.15596936643123627,
      "learning_rate": 9.849568076540397e-07,
      "loss": 0.0082,
      "step": 2756300
    },
    {
      "epoch": 4.510778133448545,
      "grad_norm": 0.1238209679722786,
      "learning_rate": 9.848909154405224e-07,
      "loss": 0.0058,
      "step": 2756320
    },
    {
      "epoch": 4.5108108638871975,
      "grad_norm": 0.1850469559431076,
      "learning_rate": 9.848250232270054e-07,
      "loss": 0.0056,
      "step": 2756340
    },
    {
      "epoch": 4.510843594325851,
      "grad_norm": 0.12122880667448044,
      "learning_rate": 9.847591310134883e-07,
      "loss": 0.0065,
      "step": 2756360
    },
    {
      "epoch": 4.510876324764505,
      "grad_norm": 0.24881130456924438,
      "learning_rate": 9.84693238799971e-07,
      "loss": 0.0076,
      "step": 2756380
    },
    {
      "epoch": 4.510909055203157,
      "grad_norm": 0.2110578715801239,
      "learning_rate": 9.84627346586454e-07,
      "loss": 0.0077,
      "step": 2756400
    },
    {
      "epoch": 4.510941785641811,
      "grad_norm": 0.5422016978263855,
      "learning_rate": 9.845614543729368e-07,
      "loss": 0.0082,
      "step": 2756420
    },
    {
      "epoch": 4.510974516080465,
      "grad_norm": 0.09007488936185837,
      "learning_rate": 9.844955621594197e-07,
      "loss": 0.0111,
      "step": 2756440
    },
    {
      "epoch": 4.511007246519118,
      "grad_norm": 1.1360392570495605,
      "learning_rate": 9.844296699459027e-07,
      "loss": 0.0104,
      "step": 2756460
    },
    {
      "epoch": 4.511039976957771,
      "grad_norm": 0.4298051595687866,
      "learning_rate": 9.843637777323854e-07,
      "loss": 0.0105,
      "step": 2756480
    },
    {
      "epoch": 4.5110727073964245,
      "grad_norm": 0.11837472021579742,
      "learning_rate": 9.842978855188683e-07,
      "loss": 0.0066,
      "step": 2756500
    },
    {
      "epoch": 4.511105437835078,
      "grad_norm": 0.39282894134521484,
      "learning_rate": 9.842319933053513e-07,
      "loss": 0.0097,
      "step": 2756520
    },
    {
      "epoch": 4.511138168273731,
      "grad_norm": 0.2146289348602295,
      "learning_rate": 9.84166101091834e-07,
      "loss": 0.0078,
      "step": 2756540
    },
    {
      "epoch": 4.511170898712384,
      "grad_norm": 0.1882907748222351,
      "learning_rate": 9.84100208878317e-07,
      "loss": 0.0095,
      "step": 2756560
    },
    {
      "epoch": 4.511203629151038,
      "grad_norm": 0.25188127160072327,
      "learning_rate": 9.840343166647997e-07,
      "loss": 0.0102,
      "step": 2756580
    },
    {
      "epoch": 4.511236359589692,
      "grad_norm": 0.12098170816898346,
      "learning_rate": 9.839684244512827e-07,
      "loss": 0.0113,
      "step": 2756600
    },
    {
      "epoch": 4.511269090028344,
      "grad_norm": 0.06820762157440186,
      "learning_rate": 9.839025322377656e-07,
      "loss": 0.0099,
      "step": 2756620
    },
    {
      "epoch": 4.511301820466998,
      "grad_norm": 0.38429057598114014,
      "learning_rate": 9.838366400242484e-07,
      "loss": 0.0117,
      "step": 2756640
    },
    {
      "epoch": 4.5113345509056515,
      "grad_norm": 0.08095736056566238,
      "learning_rate": 9.837707478107313e-07,
      "loss": 0.0064,
      "step": 2756660
    },
    {
      "epoch": 4.511367281344304,
      "grad_norm": 0.33328163623809814,
      "learning_rate": 9.83704855597214e-07,
      "loss": 0.0106,
      "step": 2756680
    },
    {
      "epoch": 4.511400011782958,
      "grad_norm": 0.05508866533637047,
      "learning_rate": 9.83638963383697e-07,
      "loss": 0.0073,
      "step": 2756700
    },
    {
      "epoch": 4.511432742221611,
      "grad_norm": 0.26260215044021606,
      "learning_rate": 9.8357307117018e-07,
      "loss": 0.0094,
      "step": 2756720
    },
    {
      "epoch": 4.511465472660264,
      "grad_norm": 0.5387105941772461,
      "learning_rate": 9.835071789566627e-07,
      "loss": 0.01,
      "step": 2756740
    },
    {
      "epoch": 4.511498203098918,
      "grad_norm": 0.2647955119609833,
      "learning_rate": 9.834412867431457e-07,
      "loss": 0.0099,
      "step": 2756760
    },
    {
      "epoch": 4.511530933537571,
      "grad_norm": 0.5679001212120056,
      "learning_rate": 9.833753945296286e-07,
      "loss": 0.0093,
      "step": 2756780
    },
    {
      "epoch": 4.511563663976225,
      "grad_norm": 0.26936694979667664,
      "learning_rate": 9.833095023161113e-07,
      "loss": 0.0094,
      "step": 2756800
    },
    {
      "epoch": 4.511596394414878,
      "grad_norm": 0.09971721470355988,
      "learning_rate": 9.832436101025943e-07,
      "loss": 0.0095,
      "step": 2756820
    },
    {
      "epoch": 4.511629124853531,
      "grad_norm": 0.2611037790775299,
      "learning_rate": 9.83177717889077e-07,
      "loss": 0.0107,
      "step": 2756840
    },
    {
      "epoch": 4.511661855292185,
      "grad_norm": 0.1095520630478859,
      "learning_rate": 9.8311182567556e-07,
      "loss": 0.0065,
      "step": 2756860
    },
    {
      "epoch": 4.511694585730838,
      "grad_norm": 0.4283754229545593,
      "learning_rate": 9.83045933462043e-07,
      "loss": 0.0091,
      "step": 2756880
    },
    {
      "epoch": 4.511727316169491,
      "grad_norm": 0.17999550700187683,
      "learning_rate": 9.829800412485257e-07,
      "loss": 0.0108,
      "step": 2756900
    },
    {
      "epoch": 4.511760046608145,
      "grad_norm": 0.20673012733459473,
      "learning_rate": 9.829141490350086e-07,
      "loss": 0.0067,
      "step": 2756920
    },
    {
      "epoch": 4.511792777046798,
      "grad_norm": 0.25077930092811584,
      "learning_rate": 9.828482568214914e-07,
      "loss": 0.0073,
      "step": 2756940
    },
    {
      "epoch": 4.511825507485451,
      "grad_norm": 0.11900176852941513,
      "learning_rate": 9.827823646079743e-07,
      "loss": 0.0111,
      "step": 2756960
    },
    {
      "epoch": 4.511858237924105,
      "grad_norm": 0.278398722410202,
      "learning_rate": 9.827164723944573e-07,
      "loss": 0.0097,
      "step": 2756980
    },
    {
      "epoch": 4.511890968362758,
      "grad_norm": 0.08754470944404602,
      "learning_rate": 9.8265058018094e-07,
      "loss": 0.0115,
      "step": 2757000
    },
    {
      "epoch": 4.511923698801411,
      "grad_norm": 0.09031342715024948,
      "learning_rate": 9.82584687967423e-07,
      "loss": 0.0064,
      "step": 2757020
    },
    {
      "epoch": 4.5119564292400645,
      "grad_norm": 0.255287230014801,
      "learning_rate": 9.82518795753906e-07,
      "loss": 0.0078,
      "step": 2757040
    },
    {
      "epoch": 4.511989159678718,
      "grad_norm": 0.29342374205589294,
      "learning_rate": 9.824529035403887e-07,
      "loss": 0.0089,
      "step": 2757060
    },
    {
      "epoch": 4.512021890117372,
      "grad_norm": 0.2999356985092163,
      "learning_rate": 9.823870113268716e-07,
      "loss": 0.013,
      "step": 2757080
    },
    {
      "epoch": 4.512054620556024,
      "grad_norm": 2.9151251316070557,
      "learning_rate": 9.823211191133543e-07,
      "loss": 0.0079,
      "step": 2757100
    },
    {
      "epoch": 4.512087350994678,
      "grad_norm": 0.5931690335273743,
      "learning_rate": 9.822552268998373e-07,
      "loss": 0.0165,
      "step": 2757120
    },
    {
      "epoch": 4.5121200814333315,
      "grad_norm": 0.14460685849189758,
      "learning_rate": 9.821893346863203e-07,
      "loss": 0.009,
      "step": 2757140
    },
    {
      "epoch": 4.512152811871985,
      "grad_norm": 0.3806256353855133,
      "learning_rate": 9.82123442472803e-07,
      "loss": 0.0108,
      "step": 2757160
    },
    {
      "epoch": 4.512185542310638,
      "grad_norm": 0.2269514948129654,
      "learning_rate": 9.82057550259286e-07,
      "loss": 0.0067,
      "step": 2757180
    },
    {
      "epoch": 4.512218272749291,
      "grad_norm": 0.6072512865066528,
      "learning_rate": 9.819916580457687e-07,
      "loss": 0.0081,
      "step": 2757200
    },
    {
      "epoch": 4.512251003187945,
      "grad_norm": 0.5721203684806824,
      "learning_rate": 9.819257658322518e-07,
      "loss": 0.0093,
      "step": 2757220
    },
    {
      "epoch": 4.512283733626598,
      "grad_norm": 0.27049925923347473,
      "learning_rate": 9.818598736187346e-07,
      "loss": 0.0104,
      "step": 2757240
    },
    {
      "epoch": 4.512316464065251,
      "grad_norm": 0.17765769362449646,
      "learning_rate": 9.817939814052173e-07,
      "loss": 0.0131,
      "step": 2757260
    },
    {
      "epoch": 4.512349194503905,
      "grad_norm": 0.16911454498767853,
      "learning_rate": 9.817280891917003e-07,
      "loss": 0.0116,
      "step": 2757280
    },
    {
      "epoch": 4.512381924942558,
      "grad_norm": 0.3616674840450287,
      "learning_rate": 9.81662196978183e-07,
      "loss": 0.0155,
      "step": 2757300
    },
    {
      "epoch": 4.512414655381211,
      "grad_norm": 0.09544882923364639,
      "learning_rate": 9.815963047646662e-07,
      "loss": 0.0084,
      "step": 2757320
    },
    {
      "epoch": 4.512447385819865,
      "grad_norm": 0.17721936106681824,
      "learning_rate": 9.81530412551149e-07,
      "loss": 0.0116,
      "step": 2757340
    },
    {
      "epoch": 4.512480116258518,
      "grad_norm": 0.11623997986316681,
      "learning_rate": 9.814645203376317e-07,
      "loss": 0.0049,
      "step": 2757360
    },
    {
      "epoch": 4.512512846697171,
      "grad_norm": 0.8081315755844116,
      "learning_rate": 9.813986281241146e-07,
      "loss": 0.0123,
      "step": 2757380
    },
    {
      "epoch": 4.512545577135825,
      "grad_norm": 0.15945583581924438,
      "learning_rate": 9.813327359105976e-07,
      "loss": 0.0088,
      "step": 2757400
    },
    {
      "epoch": 4.512578307574478,
      "grad_norm": 0.1260315626859665,
      "learning_rate": 9.812668436970805e-07,
      "loss": 0.0049,
      "step": 2757420
    },
    {
      "epoch": 4.512611038013132,
      "grad_norm": 0.5219770669937134,
      "learning_rate": 9.812009514835633e-07,
      "loss": 0.0129,
      "step": 2757440
    },
    {
      "epoch": 4.512643768451785,
      "grad_norm": 0.5347686409950256,
      "learning_rate": 9.81135059270046e-07,
      "loss": 0.011,
      "step": 2757460
    },
    {
      "epoch": 4.512676498890438,
      "grad_norm": 0.17336037755012512,
      "learning_rate": 9.810691670565292e-07,
      "loss": 0.0071,
      "step": 2757480
    },
    {
      "epoch": 4.512709229329092,
      "grad_norm": 0.07610159367322922,
      "learning_rate": 9.81003274843012e-07,
      "loss": 0.0109,
      "step": 2757500
    },
    {
      "epoch": 4.5127419597677445,
      "grad_norm": 0.12745699286460876,
      "learning_rate": 9.809373826294948e-07,
      "loss": 0.0062,
      "step": 2757520
    },
    {
      "epoch": 4.512774690206398,
      "grad_norm": 0.08366389572620392,
      "learning_rate": 9.808714904159776e-07,
      "loss": 0.0092,
      "step": 2757540
    },
    {
      "epoch": 4.512807420645052,
      "grad_norm": 0.23813043534755707,
      "learning_rate": 9.808055982024605e-07,
      "loss": 0.0078,
      "step": 2757560
    },
    {
      "epoch": 4.512840151083704,
      "grad_norm": 0.2828308641910553,
      "learning_rate": 9.807397059889435e-07,
      "loss": 0.006,
      "step": 2757580
    },
    {
      "epoch": 4.512872881522358,
      "grad_norm": 0.19328343868255615,
      "learning_rate": 9.806738137754262e-07,
      "loss": 0.0065,
      "step": 2757600
    },
    {
      "epoch": 4.512905611961012,
      "grad_norm": 0.08735086768865585,
      "learning_rate": 9.806079215619092e-07,
      "loss": 0.0118,
      "step": 2757620
    },
    {
      "epoch": 4.512938342399665,
      "grad_norm": 0.5129972100257874,
      "learning_rate": 9.80542029348392e-07,
      "loss": 0.0086,
      "step": 2757640
    },
    {
      "epoch": 4.512971072838318,
      "grad_norm": 0.41409212350845337,
      "learning_rate": 9.804761371348749e-07,
      "loss": 0.0095,
      "step": 2757660
    },
    {
      "epoch": 4.5130038032769715,
      "grad_norm": 0.24947014451026917,
      "learning_rate": 9.804102449213578e-07,
      "loss": 0.009,
      "step": 2757680
    },
    {
      "epoch": 4.513036533715625,
      "grad_norm": 0.29806599020957947,
      "learning_rate": 9.803443527078406e-07,
      "loss": 0.0122,
      "step": 2757700
    },
    {
      "epoch": 4.513069264154279,
      "grad_norm": 0.07441602647304535,
      "learning_rate": 9.802784604943235e-07,
      "loss": 0.0071,
      "step": 2757720
    },
    {
      "epoch": 4.513101994592931,
      "grad_norm": 0.1121915653347969,
      "learning_rate": 9.802125682808063e-07,
      "loss": 0.0083,
      "step": 2757740
    },
    {
      "epoch": 4.513134725031585,
      "grad_norm": 0.1500156670808792,
      "learning_rate": 9.801466760672892e-07,
      "loss": 0.0115,
      "step": 2757760
    },
    {
      "epoch": 4.513167455470239,
      "grad_norm": 0.15116144716739655,
      "learning_rate": 9.800807838537722e-07,
      "loss": 0.0082,
      "step": 2757780
    },
    {
      "epoch": 4.513200185908891,
      "grad_norm": 0.11573655903339386,
      "learning_rate": 9.80014891640255e-07,
      "loss": 0.0095,
      "step": 2757800
    },
    {
      "epoch": 4.513232916347545,
      "grad_norm": 0.0874679908156395,
      "learning_rate": 9.799489994267379e-07,
      "loss": 0.0069,
      "step": 2757820
    },
    {
      "epoch": 4.5132656467861985,
      "grad_norm": 0.08047472685575485,
      "learning_rate": 9.798831072132208e-07,
      "loss": 0.0073,
      "step": 2757840
    },
    {
      "epoch": 4.513298377224851,
      "grad_norm": 0.5519540905952454,
      "learning_rate": 9.798172149997035e-07,
      "loss": 0.0094,
      "step": 2757860
    },
    {
      "epoch": 4.513331107663505,
      "grad_norm": 0.1832415908575058,
      "learning_rate": 9.797513227861865e-07,
      "loss": 0.0089,
      "step": 2757880
    },
    {
      "epoch": 4.513363838102158,
      "grad_norm": 0.24874252080917358,
      "learning_rate": 9.796854305726692e-07,
      "loss": 0.0076,
      "step": 2757900
    },
    {
      "epoch": 4.513396568540812,
      "grad_norm": 0.24705912172794342,
      "learning_rate": 9.796195383591522e-07,
      "loss": 0.009,
      "step": 2757920
    },
    {
      "epoch": 4.513429298979465,
      "grad_norm": 0.1535305678844452,
      "learning_rate": 9.795536461456351e-07,
      "loss": 0.0082,
      "step": 2757940
    },
    {
      "epoch": 4.513462029418118,
      "grad_norm": 0.13344822824001312,
      "learning_rate": 9.794877539321179e-07,
      "loss": 0.0069,
      "step": 2757960
    },
    {
      "epoch": 4.513494759856772,
      "grad_norm": 0.274025022983551,
      "learning_rate": 9.794218617186008e-07,
      "loss": 0.0095,
      "step": 2757980
    },
    {
      "epoch": 4.513527490295425,
      "grad_norm": 0.204701766371727,
      "learning_rate": 9.793559695050836e-07,
      "loss": 0.0089,
      "step": 2758000
    },
    {
      "epoch": 4.513560220734078,
      "grad_norm": 0.3146364688873291,
      "learning_rate": 9.792900772915665e-07,
      "loss": 0.0074,
      "step": 2758020
    },
    {
      "epoch": 4.513592951172732,
      "grad_norm": 0.34034886956214905,
      "learning_rate": 9.792241850780495e-07,
      "loss": 0.015,
      "step": 2758040
    },
    {
      "epoch": 4.513625681611385,
      "grad_norm": 0.3618488013744354,
      "learning_rate": 9.791582928645322e-07,
      "loss": 0.0072,
      "step": 2758060
    },
    {
      "epoch": 4.513658412050038,
      "grad_norm": 0.19250953197479248,
      "learning_rate": 9.790924006510152e-07,
      "loss": 0.007,
      "step": 2758080
    },
    {
      "epoch": 4.513691142488692,
      "grad_norm": 0.2422751933336258,
      "learning_rate": 9.790265084374981e-07,
      "loss": 0.0086,
      "step": 2758100
    },
    {
      "epoch": 4.513723872927345,
      "grad_norm": 0.6062301397323608,
      "learning_rate": 9.789606162239809e-07,
      "loss": 0.0108,
      "step": 2758120
    },
    {
      "epoch": 4.513756603365998,
      "grad_norm": 0.38856372237205505,
      "learning_rate": 9.788947240104638e-07,
      "loss": 0.0089,
      "step": 2758140
    },
    {
      "epoch": 4.5137893338046515,
      "grad_norm": 0.26643750071525574,
      "learning_rate": 9.788288317969465e-07,
      "loss": 0.0111,
      "step": 2758160
    },
    {
      "epoch": 4.513822064243305,
      "grad_norm": 0.19194306433200836,
      "learning_rate": 9.787629395834295e-07,
      "loss": 0.0093,
      "step": 2758180
    },
    {
      "epoch": 4.513854794681959,
      "grad_norm": 0.19915828108787537,
      "learning_rate": 9.786970473699124e-07,
      "loss": 0.0074,
      "step": 2758200
    },
    {
      "epoch": 4.513887525120611,
      "grad_norm": 0.1657387763261795,
      "learning_rate": 9.786311551563952e-07,
      "loss": 0.0066,
      "step": 2758220
    },
    {
      "epoch": 4.513920255559265,
      "grad_norm": 0.48229414224624634,
      "learning_rate": 9.785652629428781e-07,
      "loss": 0.0091,
      "step": 2758240
    },
    {
      "epoch": 4.513952985997919,
      "grad_norm": 0.18468044698238373,
      "learning_rate": 9.784993707293609e-07,
      "loss": 0.014,
      "step": 2758260
    },
    {
      "epoch": 4.513985716436571,
      "grad_norm": 0.1358259916305542,
      "learning_rate": 9.784334785158438e-07,
      "loss": 0.0114,
      "step": 2758280
    },
    {
      "epoch": 4.514018446875225,
      "grad_norm": 0.26282504200935364,
      "learning_rate": 9.783675863023268e-07,
      "loss": 0.0172,
      "step": 2758300
    },
    {
      "epoch": 4.5140511773138785,
      "grad_norm": 0.8288707137107849,
      "learning_rate": 9.783016940888095e-07,
      "loss": 0.0068,
      "step": 2758320
    },
    {
      "epoch": 4.514083907752532,
      "grad_norm": 0.1579282581806183,
      "learning_rate": 9.782358018752925e-07,
      "loss": 0.0076,
      "step": 2758340
    },
    {
      "epoch": 4.514116638191185,
      "grad_norm": 0.1623525321483612,
      "learning_rate": 9.781699096617754e-07,
      "loss": 0.0109,
      "step": 2758360
    },
    {
      "epoch": 4.514149368629838,
      "grad_norm": 0.3004780411720276,
      "learning_rate": 9.781040174482582e-07,
      "loss": 0.0116,
      "step": 2758380
    },
    {
      "epoch": 4.514182099068492,
      "grad_norm": 0.5310767292976379,
      "learning_rate": 9.780381252347411e-07,
      "loss": 0.0071,
      "step": 2758400
    },
    {
      "epoch": 4.514214829507145,
      "grad_norm": 0.640277624130249,
      "learning_rate": 9.779722330212239e-07,
      "loss": 0.009,
      "step": 2758420
    },
    {
      "epoch": 4.514247559945798,
      "grad_norm": 0.09228400141000748,
      "learning_rate": 9.779063408077068e-07,
      "loss": 0.0145,
      "step": 2758440
    },
    {
      "epoch": 4.514280290384452,
      "grad_norm": 0.2997547686100006,
      "learning_rate": 9.778404485941898e-07,
      "loss": 0.0087,
      "step": 2758460
    },
    {
      "epoch": 4.514313020823105,
      "grad_norm": 0.14264385402202606,
      "learning_rate": 9.777745563806725e-07,
      "loss": 0.0087,
      "step": 2758480
    },
    {
      "epoch": 4.514345751261758,
      "grad_norm": 0.12345903366804123,
      "learning_rate": 9.777086641671554e-07,
      "loss": 0.0099,
      "step": 2758500
    },
    {
      "epoch": 4.514378481700412,
      "grad_norm": 0.11645007133483887,
      "learning_rate": 9.776427719536382e-07,
      "loss": 0.0096,
      "step": 2758520
    },
    {
      "epoch": 4.514411212139065,
      "grad_norm": 0.12129537761211395,
      "learning_rate": 9.775768797401214e-07,
      "loss": 0.0069,
      "step": 2758540
    },
    {
      "epoch": 4.514443942577718,
      "grad_norm": 0.1989305168390274,
      "learning_rate": 9.77510987526604e-07,
      "loss": 0.0116,
      "step": 2758560
    },
    {
      "epoch": 4.514476673016372,
      "grad_norm": 0.31407326459884644,
      "learning_rate": 9.774450953130868e-07,
      "loss": 0.0061,
      "step": 2758580
    },
    {
      "epoch": 4.514509403455025,
      "grad_norm": 0.26632174849510193,
      "learning_rate": 9.773792030995698e-07,
      "loss": 0.0087,
      "step": 2758600
    },
    {
      "epoch": 4.514542133893679,
      "grad_norm": 0.389678955078125,
      "learning_rate": 9.773133108860525e-07,
      "loss": 0.014,
      "step": 2758620
    },
    {
      "epoch": 4.514574864332332,
      "grad_norm": 0.09815489500761032,
      "learning_rate": 9.772474186725357e-07,
      "loss": 0.0117,
      "step": 2758640
    },
    {
      "epoch": 4.514607594770985,
      "grad_norm": 0.9492883682250977,
      "learning_rate": 9.771815264590184e-07,
      "loss": 0.0065,
      "step": 2758660
    },
    {
      "epoch": 4.514640325209639,
      "grad_norm": 0.42813828587532043,
      "learning_rate": 9.771156342455012e-07,
      "loss": 0.0119,
      "step": 2758680
    },
    {
      "epoch": 4.5146730556482915,
      "grad_norm": 0.2975485324859619,
      "learning_rate": 9.770497420319841e-07,
      "loss": 0.0147,
      "step": 2758700
    },
    {
      "epoch": 4.514705786086945,
      "grad_norm": 0.416524738073349,
      "learning_rate": 9.76983849818467e-07,
      "loss": 0.0114,
      "step": 2758720
    },
    {
      "epoch": 4.514738516525599,
      "grad_norm": 0.1348382830619812,
      "learning_rate": 9.7691795760495e-07,
      "loss": 0.0073,
      "step": 2758740
    },
    {
      "epoch": 4.514771246964251,
      "grad_norm": 0.22935718297958374,
      "learning_rate": 9.768520653914328e-07,
      "loss": 0.0116,
      "step": 2758760
    },
    {
      "epoch": 4.514803977402905,
      "grad_norm": 0.2766818702220917,
      "learning_rate": 9.767861731779155e-07,
      "loss": 0.0096,
      "step": 2758780
    },
    {
      "epoch": 4.5148367078415585,
      "grad_norm": 0.13616535067558289,
      "learning_rate": 9.767202809643987e-07,
      "loss": 0.0064,
      "step": 2758800
    },
    {
      "epoch": 4.514869438280212,
      "grad_norm": 0.29940274357795715,
      "learning_rate": 9.766543887508814e-07,
      "loss": 0.0054,
      "step": 2758820
    },
    {
      "epoch": 4.514902168718865,
      "grad_norm": 0.35262978076934814,
      "learning_rate": 9.765884965373644e-07,
      "loss": 0.0132,
      "step": 2758840
    },
    {
      "epoch": 4.5149348991575184,
      "grad_norm": 0.5413727164268494,
      "learning_rate": 9.76522604323847e-07,
      "loss": 0.0093,
      "step": 2758860
    },
    {
      "epoch": 4.514967629596172,
      "grad_norm": 0.3400821089744568,
      "learning_rate": 9.7645671211033e-07,
      "loss": 0.0075,
      "step": 2758880
    },
    {
      "epoch": 4.515000360034826,
      "grad_norm": 0.23555821180343628,
      "learning_rate": 9.76390819896813e-07,
      "loss": 0.0119,
      "step": 2758900
    },
    {
      "epoch": 4.515033090473478,
      "grad_norm": 0.04083294793963432,
      "learning_rate": 9.763249276832957e-07,
      "loss": 0.0068,
      "step": 2758920
    },
    {
      "epoch": 4.515065820912132,
      "grad_norm": 0.08172743022441864,
      "learning_rate": 9.762590354697787e-07,
      "loss": 0.0127,
      "step": 2758940
    },
    {
      "epoch": 4.5150985513507855,
      "grad_norm": 0.12467627227306366,
      "learning_rate": 9.761931432562614e-07,
      "loss": 0.0056,
      "step": 2758960
    },
    {
      "epoch": 4.515131281789438,
      "grad_norm": 0.2276102602481842,
      "learning_rate": 9.761272510427444e-07,
      "loss": 0.0059,
      "step": 2758980
    },
    {
      "epoch": 4.515164012228092,
      "grad_norm": 0.43326249718666077,
      "learning_rate": 9.760613588292273e-07,
      "loss": 0.0074,
      "step": 2759000
    },
    {
      "epoch": 4.515196742666745,
      "grad_norm": 0.2617201507091522,
      "learning_rate": 9.7599546661571e-07,
      "loss": 0.0082,
      "step": 2759020
    },
    {
      "epoch": 4.515229473105398,
      "grad_norm": 0.15910620987415314,
      "learning_rate": 9.75929574402193e-07,
      "loss": 0.0096,
      "step": 2759040
    },
    {
      "epoch": 4.515262203544052,
      "grad_norm": 0.1679627001285553,
      "learning_rate": 9.758636821886758e-07,
      "loss": 0.0089,
      "step": 2759060
    },
    {
      "epoch": 4.515294933982705,
      "grad_norm": 0.23597633838653564,
      "learning_rate": 9.757977899751587e-07,
      "loss": 0.0065,
      "step": 2759080
    },
    {
      "epoch": 4.515327664421359,
      "grad_norm": 0.08021244406700134,
      "learning_rate": 9.757318977616417e-07,
      "loss": 0.0055,
      "step": 2759100
    },
    {
      "epoch": 4.515360394860012,
      "grad_norm": 0.18730273842811584,
      "learning_rate": 9.756660055481244e-07,
      "loss": 0.0153,
      "step": 2759120
    },
    {
      "epoch": 4.515393125298665,
      "grad_norm": 0.2905201315879822,
      "learning_rate": 9.756001133346074e-07,
      "loss": 0.0103,
      "step": 2759140
    },
    {
      "epoch": 4.515425855737319,
      "grad_norm": 0.11362575739622116,
      "learning_rate": 9.755342211210903e-07,
      "loss": 0.0068,
      "step": 2759160
    },
    {
      "epoch": 4.515458586175972,
      "grad_norm": 0.2787049114704132,
      "learning_rate": 9.75468328907573e-07,
      "loss": 0.0065,
      "step": 2759180
    },
    {
      "epoch": 4.515491316614625,
      "grad_norm": 0.37685129046440125,
      "learning_rate": 9.75402436694056e-07,
      "loss": 0.0085,
      "step": 2759200
    },
    {
      "epoch": 4.515524047053279,
      "grad_norm": 0.4139568507671356,
      "learning_rate": 9.753365444805387e-07,
      "loss": 0.0082,
      "step": 2759220
    },
    {
      "epoch": 4.515556777491932,
      "grad_norm": 0.08351843059062958,
      "learning_rate": 9.752706522670217e-07,
      "loss": 0.011,
      "step": 2759240
    },
    {
      "epoch": 4.515589507930585,
      "grad_norm": 0.29829999804496765,
      "learning_rate": 9.752047600535046e-07,
      "loss": 0.0096,
      "step": 2759260
    },
    {
      "epoch": 4.515622238369239,
      "grad_norm": 0.5097896456718445,
      "learning_rate": 9.751388678399874e-07,
      "loss": 0.009,
      "step": 2759280
    },
    {
      "epoch": 4.515654968807892,
      "grad_norm": 0.5470505952835083,
      "learning_rate": 9.750729756264703e-07,
      "loss": 0.0073,
      "step": 2759300
    },
    {
      "epoch": 4.515687699246545,
      "grad_norm": 0.09825414419174194,
      "learning_rate": 9.75007083412953e-07,
      "loss": 0.0089,
      "step": 2759320
    },
    {
      "epoch": 4.5157204296851985,
      "grad_norm": 0.16332966089248657,
      "learning_rate": 9.74941191199436e-07,
      "loss": 0.0119,
      "step": 2759340
    },
    {
      "epoch": 4.515753160123852,
      "grad_norm": 0.20650869607925415,
      "learning_rate": 9.74875298985919e-07,
      "loss": 0.0098,
      "step": 2759360
    },
    {
      "epoch": 4.515785890562506,
      "grad_norm": 0.08355312794446945,
      "learning_rate": 9.748094067724017e-07,
      "loss": 0.0064,
      "step": 2759380
    },
    {
      "epoch": 4.515818621001158,
      "grad_norm": 0.17896907031536102,
      "learning_rate": 9.747435145588847e-07,
      "loss": 0.0064,
      "step": 2759400
    },
    {
      "epoch": 4.515851351439812,
      "grad_norm": 0.15352311730384827,
      "learning_rate": 9.746776223453676e-07,
      "loss": 0.0073,
      "step": 2759420
    },
    {
      "epoch": 4.515884081878466,
      "grad_norm": 0.3336969017982483,
      "learning_rate": 9.746117301318504e-07,
      "loss": 0.0111,
      "step": 2759440
    },
    {
      "epoch": 4.515916812317119,
      "grad_norm": 0.20777088403701782,
      "learning_rate": 9.745458379183333e-07,
      "loss": 0.01,
      "step": 2759460
    },
    {
      "epoch": 4.515949542755772,
      "grad_norm": 0.13294023275375366,
      "learning_rate": 9.74479945704816e-07,
      "loss": 0.0085,
      "step": 2759480
    },
    {
      "epoch": 4.5159822731944255,
      "grad_norm": 0.468416690826416,
      "learning_rate": 9.74414053491299e-07,
      "loss": 0.0109,
      "step": 2759500
    },
    {
      "epoch": 4.516015003633079,
      "grad_norm": 0.12358200550079346,
      "learning_rate": 9.74348161277782e-07,
      "loss": 0.0098,
      "step": 2759520
    },
    {
      "epoch": 4.516047734071732,
      "grad_norm": 0.15970249474048615,
      "learning_rate": 9.742822690642647e-07,
      "loss": 0.008,
      "step": 2759540
    },
    {
      "epoch": 4.516080464510385,
      "grad_norm": 0.32850950956344604,
      "learning_rate": 9.742163768507476e-07,
      "loss": 0.0082,
      "step": 2759560
    },
    {
      "epoch": 4.516113194949039,
      "grad_norm": 0.4216965138912201,
      "learning_rate": 9.741504846372304e-07,
      "loss": 0.0084,
      "step": 2759580
    },
    {
      "epoch": 4.516145925387692,
      "grad_norm": 0.13015440106391907,
      "learning_rate": 9.740845924237133e-07,
      "loss": 0.0081,
      "step": 2759600
    },
    {
      "epoch": 4.516178655826345,
      "grad_norm": 0.24325931072235107,
      "learning_rate": 9.740187002101963e-07,
      "loss": 0.0093,
      "step": 2759620
    },
    {
      "epoch": 4.516211386264999,
      "grad_norm": 0.1443231999874115,
      "learning_rate": 9.73952807996679e-07,
      "loss": 0.0081,
      "step": 2759640
    },
    {
      "epoch": 4.5162441167036524,
      "grad_norm": 0.4135916829109192,
      "learning_rate": 9.73886915783162e-07,
      "loss": 0.008,
      "step": 2759660
    },
    {
      "epoch": 4.516276847142305,
      "grad_norm": 0.404989629983902,
      "learning_rate": 9.73821023569645e-07,
      "loss": 0.0069,
      "step": 2759680
    },
    {
      "epoch": 4.516309577580959,
      "grad_norm": 0.27551019191741943,
      "learning_rate": 9.737551313561277e-07,
      "loss": 0.0091,
      "step": 2759700
    },
    {
      "epoch": 4.516342308019612,
      "grad_norm": 0.1449727565050125,
      "learning_rate": 9.736892391426106e-07,
      "loss": 0.0111,
      "step": 2759720
    },
    {
      "epoch": 4.516375038458265,
      "grad_norm": 0.5912784934043884,
      "learning_rate": 9.736233469290934e-07,
      "loss": 0.0134,
      "step": 2759740
    },
    {
      "epoch": 4.516407768896919,
      "grad_norm": 0.1604272574186325,
      "learning_rate": 9.735574547155763e-07,
      "loss": 0.008,
      "step": 2759760
    },
    {
      "epoch": 4.516440499335572,
      "grad_norm": 0.2801377773284912,
      "learning_rate": 9.734915625020593e-07,
      "loss": 0.0071,
      "step": 2759780
    },
    {
      "epoch": 4.516473229774226,
      "grad_norm": 0.25499776005744934,
      "learning_rate": 9.73425670288542e-07,
      "loss": 0.007,
      "step": 2759800
    },
    {
      "epoch": 4.5165059602128785,
      "grad_norm": 0.1424407660961151,
      "learning_rate": 9.73359778075025e-07,
      "loss": 0.0145,
      "step": 2759820
    },
    {
      "epoch": 4.516538690651532,
      "grad_norm": 0.15912026166915894,
      "learning_rate": 9.732938858615077e-07,
      "loss": 0.0073,
      "step": 2759840
    },
    {
      "epoch": 4.516571421090186,
      "grad_norm": 0.09492119401693344,
      "learning_rate": 9.732279936479909e-07,
      "loss": 0.0122,
      "step": 2759860
    },
    {
      "epoch": 4.516604151528838,
      "grad_norm": 0.05482623353600502,
      "learning_rate": 9.731621014344736e-07,
      "loss": 0.0071,
      "step": 2759880
    },
    {
      "epoch": 4.516636881967492,
      "grad_norm": 0.2524491250514984,
      "learning_rate": 9.730962092209563e-07,
      "loss": 0.0044,
      "step": 2759900
    },
    {
      "epoch": 4.516669612406146,
      "grad_norm": 0.25041452050209045,
      "learning_rate": 9.730303170074393e-07,
      "loss": 0.0063,
      "step": 2759920
    },
    {
      "epoch": 4.516702342844798,
      "grad_norm": 0.11073962599039078,
      "learning_rate": 9.72964424793922e-07,
      "loss": 0.0111,
      "step": 2759940
    },
    {
      "epoch": 4.516735073283452,
      "grad_norm": 0.5616933703422546,
      "learning_rate": 9.728985325804052e-07,
      "loss": 0.0123,
      "step": 2759960
    },
    {
      "epoch": 4.5167678037221055,
      "grad_norm": 0.28116071224212646,
      "learning_rate": 9.72832640366888e-07,
      "loss": 0.011,
      "step": 2759980
    },
    {
      "epoch": 4.516800534160759,
      "grad_norm": 0.41595199704170227,
      "learning_rate": 9.727667481533707e-07,
      "loss": 0.0126,
      "step": 2760000
    },
    {
      "epoch": 4.516833264599412,
      "grad_norm": 0.2725200653076172,
      "learning_rate": 9.727008559398536e-07,
      "loss": 0.0063,
      "step": 2760020
    },
    {
      "epoch": 4.516865995038065,
      "grad_norm": 0.28072693943977356,
      "learning_rate": 9.726349637263366e-07,
      "loss": 0.0067,
      "step": 2760040
    },
    {
      "epoch": 4.516898725476719,
      "grad_norm": 0.12481975555419922,
      "learning_rate": 9.725690715128195e-07,
      "loss": 0.0114,
      "step": 2760060
    },
    {
      "epoch": 4.516931455915373,
      "grad_norm": 0.19585020840168,
      "learning_rate": 9.725031792993023e-07,
      "loss": 0.012,
      "step": 2760080
    },
    {
      "epoch": 4.516964186354025,
      "grad_norm": 0.23802463710308075,
      "learning_rate": 9.72437287085785e-07,
      "loss": 0.0081,
      "step": 2760100
    },
    {
      "epoch": 4.516996916792679,
      "grad_norm": 0.10638504475355148,
      "learning_rate": 9.723713948722682e-07,
      "loss": 0.0063,
      "step": 2760120
    },
    {
      "epoch": 4.5170296472313325,
      "grad_norm": 0.1928030550479889,
      "learning_rate": 9.72305502658751e-07,
      "loss": 0.0123,
      "step": 2760140
    },
    {
      "epoch": 4.517062377669985,
      "grad_norm": 0.2764502763748169,
      "learning_rate": 9.722396104452339e-07,
      "loss": 0.0119,
      "step": 2760160
    },
    {
      "epoch": 4.517095108108639,
      "grad_norm": 0.2987583875656128,
      "learning_rate": 9.721737182317166e-07,
      "loss": 0.0098,
      "step": 2760180
    },
    {
      "epoch": 4.517127838547292,
      "grad_norm": 0.33121052384376526,
      "learning_rate": 9.721078260181996e-07,
      "loss": 0.011,
      "step": 2760200
    },
    {
      "epoch": 4.517160568985945,
      "grad_norm": 0.5050812363624573,
      "learning_rate": 9.720419338046825e-07,
      "loss": 0.0082,
      "step": 2760220
    },
    {
      "epoch": 4.517193299424599,
      "grad_norm": 0.38098984956741333,
      "learning_rate": 9.719760415911652e-07,
      "loss": 0.0137,
      "step": 2760240
    },
    {
      "epoch": 4.517226029863252,
      "grad_norm": 0.3102976381778717,
      "learning_rate": 9.719101493776482e-07,
      "loss": 0.0078,
      "step": 2760260
    },
    {
      "epoch": 4.517258760301906,
      "grad_norm": 0.05207561329007149,
      "learning_rate": 9.71844257164131e-07,
      "loss": 0.0103,
      "step": 2760280
    },
    {
      "epoch": 4.517291490740559,
      "grad_norm": 0.03918486088514328,
      "learning_rate": 9.717783649506139e-07,
      "loss": 0.0128,
      "step": 2760300
    },
    {
      "epoch": 4.517324221179212,
      "grad_norm": 0.12760654091835022,
      "learning_rate": 9.717124727370968e-07,
      "loss": 0.0075,
      "step": 2760320
    },
    {
      "epoch": 4.517356951617866,
      "grad_norm": 0.258728951215744,
      "learning_rate": 9.716465805235796e-07,
      "loss": 0.0079,
      "step": 2760340
    },
    {
      "epoch": 4.517389682056519,
      "grad_norm": 1.2360658645629883,
      "learning_rate": 9.715806883100625e-07,
      "loss": 0.0108,
      "step": 2760360
    },
    {
      "epoch": 4.517422412495172,
      "grad_norm": 0.39841020107269287,
      "learning_rate": 9.715147960965453e-07,
      "loss": 0.0083,
      "step": 2760380
    },
    {
      "epoch": 4.517455142933826,
      "grad_norm": 0.3656860888004303,
      "learning_rate": 9.714489038830282e-07,
      "loss": 0.0101,
      "step": 2760400
    },
    {
      "epoch": 4.517487873372479,
      "grad_norm": 0.06938669085502625,
      "learning_rate": 9.713830116695112e-07,
      "loss": 0.0122,
      "step": 2760420
    },
    {
      "epoch": 4.517520603811132,
      "grad_norm": 0.28394559025764465,
      "learning_rate": 9.71317119455994e-07,
      "loss": 0.0083,
      "step": 2760440
    },
    {
      "epoch": 4.517553334249786,
      "grad_norm": 0.18524053692817688,
      "learning_rate": 9.712512272424769e-07,
      "loss": 0.0078,
      "step": 2760460
    },
    {
      "epoch": 4.517586064688439,
      "grad_norm": 0.14149945974349976,
      "learning_rate": 9.711853350289598e-07,
      "loss": 0.0088,
      "step": 2760480
    },
    {
      "epoch": 4.517618795127092,
      "grad_norm": 0.1590142697095871,
      "learning_rate": 9.711194428154426e-07,
      "loss": 0.0057,
      "step": 2760500
    },
    {
      "epoch": 4.5176515255657455,
      "grad_norm": 0.23838093876838684,
      "learning_rate": 9.710535506019255e-07,
      "loss": 0.0113,
      "step": 2760520
    },
    {
      "epoch": 4.517684256004399,
      "grad_norm": 0.1247694194316864,
      "learning_rate": 9.709876583884082e-07,
      "loss": 0.0068,
      "step": 2760540
    },
    {
      "epoch": 4.517716986443053,
      "grad_norm": 0.2235725373029709,
      "learning_rate": 9.709217661748912e-07,
      "loss": 0.0091,
      "step": 2760560
    },
    {
      "epoch": 4.517749716881705,
      "grad_norm": 0.5190343856811523,
      "learning_rate": 9.708558739613741e-07,
      "loss": 0.0087,
      "step": 2760580
    },
    {
      "epoch": 4.517782447320359,
      "grad_norm": 0.18508468568325043,
      "learning_rate": 9.707899817478569e-07,
      "loss": 0.0098,
      "step": 2760600
    },
    {
      "epoch": 4.5178151777590125,
      "grad_norm": 0.25624871253967285,
      "learning_rate": 9.707240895343398e-07,
      "loss": 0.0107,
      "step": 2760620
    },
    {
      "epoch": 4.517847908197666,
      "grad_norm": 0.17571638524532318,
      "learning_rate": 9.706581973208226e-07,
      "loss": 0.0065,
      "step": 2760640
    },
    {
      "epoch": 4.517880638636319,
      "grad_norm": 0.25098100304603577,
      "learning_rate": 9.705923051073055e-07,
      "loss": 0.0059,
      "step": 2760660
    },
    {
      "epoch": 4.517913369074972,
      "grad_norm": 0.2560904026031494,
      "learning_rate": 9.705264128937885e-07,
      "loss": 0.0101,
      "step": 2760680
    },
    {
      "epoch": 4.517946099513626,
      "grad_norm": 0.38786420226097107,
      "learning_rate": 9.704605206802712e-07,
      "loss": 0.008,
      "step": 2760700
    },
    {
      "epoch": 4.517978829952279,
      "grad_norm": 0.2665887773036957,
      "learning_rate": 9.703946284667542e-07,
      "loss": 0.0135,
      "step": 2760720
    },
    {
      "epoch": 4.518011560390932,
      "grad_norm": 0.13199689984321594,
      "learning_rate": 9.703287362532371e-07,
      "loss": 0.0086,
      "step": 2760740
    },
    {
      "epoch": 4.518044290829586,
      "grad_norm": 0.34719955921173096,
      "learning_rate": 9.702628440397199e-07,
      "loss": 0.0087,
      "step": 2760760
    },
    {
      "epoch": 4.518077021268239,
      "grad_norm": 0.12013840675354004,
      "learning_rate": 9.701969518262028e-07,
      "loss": 0.0116,
      "step": 2760780
    },
    {
      "epoch": 4.518109751706892,
      "grad_norm": 0.23910555243492126,
      "learning_rate": 9.701310596126856e-07,
      "loss": 0.0099,
      "step": 2760800
    },
    {
      "epoch": 4.518142482145546,
      "grad_norm": 0.08721229434013367,
      "learning_rate": 9.700651673991685e-07,
      "loss": 0.0079,
      "step": 2760820
    },
    {
      "epoch": 4.518175212584199,
      "grad_norm": 0.3417045474052429,
      "learning_rate": 9.699992751856515e-07,
      "loss": 0.0156,
      "step": 2760840
    },
    {
      "epoch": 4.518207943022852,
      "grad_norm": 0.3722684383392334,
      "learning_rate": 9.699333829721342e-07,
      "loss": 0.0084,
      "step": 2760860
    },
    {
      "epoch": 4.518240673461506,
      "grad_norm": 1.1179468631744385,
      "learning_rate": 9.698674907586171e-07,
      "loss": 0.0121,
      "step": 2760880
    },
    {
      "epoch": 4.518273403900159,
      "grad_norm": 2.3194918632507324,
      "learning_rate": 9.698015985450999e-07,
      "loss": 0.0092,
      "step": 2760900
    },
    {
      "epoch": 4.518306134338813,
      "grad_norm": 0.40574103593826294,
      "learning_rate": 9.697357063315828e-07,
      "loss": 0.0101,
      "step": 2760920
    },
    {
      "epoch": 4.518338864777466,
      "grad_norm": 0.13587120175361633,
      "learning_rate": 9.696698141180658e-07,
      "loss": 0.0065,
      "step": 2760940
    },
    {
      "epoch": 4.518371595216119,
      "grad_norm": 0.10690806061029434,
      "learning_rate": 9.696039219045485e-07,
      "loss": 0.0076,
      "step": 2760960
    },
    {
      "epoch": 4.518404325654773,
      "grad_norm": 0.16634905338287354,
      "learning_rate": 9.695380296910315e-07,
      "loss": 0.0095,
      "step": 2760980
    },
    {
      "epoch": 4.5184370560934255,
      "grad_norm": 0.12260537594556808,
      "learning_rate": 9.694721374775144e-07,
      "loss": 0.0058,
      "step": 2761000
    },
    {
      "epoch": 4.518469786532079,
      "grad_norm": 0.2756166458129883,
      "learning_rate": 9.694062452639972e-07,
      "loss": 0.0063,
      "step": 2761020
    },
    {
      "epoch": 4.518502516970733,
      "grad_norm": 0.1550752967596054,
      "learning_rate": 9.693403530504801e-07,
      "loss": 0.0079,
      "step": 2761040
    },
    {
      "epoch": 4.518535247409385,
      "grad_norm": 0.1500614881515503,
      "learning_rate": 9.692744608369629e-07,
      "loss": 0.0091,
      "step": 2761060
    },
    {
      "epoch": 4.518567977848039,
      "grad_norm": 0.3221522271633148,
      "learning_rate": 9.692085686234458e-07,
      "loss": 0.0107,
      "step": 2761080
    },
    {
      "epoch": 4.518600708286693,
      "grad_norm": 0.13623930513858795,
      "learning_rate": 9.691426764099288e-07,
      "loss": 0.0098,
      "step": 2761100
    },
    {
      "epoch": 4.518633438725346,
      "grad_norm": 0.3579215109348297,
      "learning_rate": 9.690767841964115e-07,
      "loss": 0.0119,
      "step": 2761120
    },
    {
      "epoch": 4.518666169163999,
      "grad_norm": 0.10245547443628311,
      "learning_rate": 9.690108919828945e-07,
      "loss": 0.0108,
      "step": 2761140
    },
    {
      "epoch": 4.5186988996026525,
      "grad_norm": 0.03123767301440239,
      "learning_rate": 9.689449997693772e-07,
      "loss": 0.0085,
      "step": 2761160
    },
    {
      "epoch": 4.518731630041306,
      "grad_norm": 0.3678888976573944,
      "learning_rate": 9.688791075558604e-07,
      "loss": 0.0097,
      "step": 2761180
    },
    {
      "epoch": 4.518764360479959,
      "grad_norm": 0.19439169764518738,
      "learning_rate": 9.68813215342343e-07,
      "loss": 0.0086,
      "step": 2761200
    },
    {
      "epoch": 4.518797090918612,
      "grad_norm": 0.11160384863615036,
      "learning_rate": 9.687473231288258e-07,
      "loss": 0.0097,
      "step": 2761220
    },
    {
      "epoch": 4.518829821357266,
      "grad_norm": 0.4070564806461334,
      "learning_rate": 9.686814309153088e-07,
      "loss": 0.0094,
      "step": 2761240
    },
    {
      "epoch": 4.51886255179592,
      "grad_norm": 0.2197129726409912,
      "learning_rate": 9.686155387017917e-07,
      "loss": 0.0093,
      "step": 2761260
    },
    {
      "epoch": 4.518895282234572,
      "grad_norm": 0.18682172894477844,
      "learning_rate": 9.685496464882747e-07,
      "loss": 0.0065,
      "step": 2761280
    },
    {
      "epoch": 4.518928012673226,
      "grad_norm": 0.5260144472122192,
      "learning_rate": 9.684837542747574e-07,
      "loss": 0.0063,
      "step": 2761300
    },
    {
      "epoch": 4.5189607431118795,
      "grad_norm": 0.03970731049776077,
      "learning_rate": 9.684178620612402e-07,
      "loss": 0.0072,
      "step": 2761320
    },
    {
      "epoch": 4.518993473550532,
      "grad_norm": 0.10819274187088013,
      "learning_rate": 9.683519698477231e-07,
      "loss": 0.0089,
      "step": 2761340
    },
    {
      "epoch": 4.519026203989186,
      "grad_norm": 0.0707656741142273,
      "learning_rate": 9.68286077634206e-07,
      "loss": 0.0089,
      "step": 2761360
    },
    {
      "epoch": 4.519058934427839,
      "grad_norm": 0.9198505878448486,
      "learning_rate": 9.68220185420689e-07,
      "loss": 0.0121,
      "step": 2761380
    },
    {
      "epoch": 4.519091664866492,
      "grad_norm": 0.19509698450565338,
      "learning_rate": 9.681542932071718e-07,
      "loss": 0.0067,
      "step": 2761400
    },
    {
      "epoch": 4.519124395305146,
      "grad_norm": 0.1700165718793869,
      "learning_rate": 9.680884009936545e-07,
      "loss": 0.0055,
      "step": 2761420
    },
    {
      "epoch": 4.519157125743799,
      "grad_norm": 0.05634600296616554,
      "learning_rate": 9.680225087801377e-07,
      "loss": 0.0117,
      "step": 2761440
    },
    {
      "epoch": 4.519189856182453,
      "grad_norm": 0.13838738203048706,
      "learning_rate": 9.679566165666204e-07,
      "loss": 0.0094,
      "step": 2761460
    },
    {
      "epoch": 4.5192225866211055,
      "grad_norm": 0.32245391607284546,
      "learning_rate": 9.678907243531034e-07,
      "loss": 0.0113,
      "step": 2761480
    },
    {
      "epoch": 4.519255317059759,
      "grad_norm": 0.08474216610193253,
      "learning_rate": 9.67824832139586e-07,
      "loss": 0.0071,
      "step": 2761500
    },
    {
      "epoch": 4.519288047498413,
      "grad_norm": 0.33439984917640686,
      "learning_rate": 9.67758939926069e-07,
      "loss": 0.0077,
      "step": 2761520
    },
    {
      "epoch": 4.519320777937066,
      "grad_norm": 0.21086744964122772,
      "learning_rate": 9.67693047712552e-07,
      "loss": 0.0083,
      "step": 2761540
    },
    {
      "epoch": 4.519353508375719,
      "grad_norm": 0.11729324609041214,
      "learning_rate": 9.676271554990347e-07,
      "loss": 0.0078,
      "step": 2761560
    },
    {
      "epoch": 4.519386238814373,
      "grad_norm": 0.3158906400203705,
      "learning_rate": 9.675612632855177e-07,
      "loss": 0.0079,
      "step": 2761580
    },
    {
      "epoch": 4.519418969253026,
      "grad_norm": 0.6184436082839966,
      "learning_rate": 9.674953710720004e-07,
      "loss": 0.0087,
      "step": 2761600
    },
    {
      "epoch": 4.519451699691679,
      "grad_norm": 0.1981612592935562,
      "learning_rate": 9.674294788584834e-07,
      "loss": 0.0065,
      "step": 2761620
    },
    {
      "epoch": 4.5194844301303325,
      "grad_norm": 0.08826849609613419,
      "learning_rate": 9.673635866449663e-07,
      "loss": 0.0051,
      "step": 2761640
    },
    {
      "epoch": 4.519517160568986,
      "grad_norm": 0.1736760139465332,
      "learning_rate": 9.67297694431449e-07,
      "loss": 0.0091,
      "step": 2761660
    },
    {
      "epoch": 4.519549891007639,
      "grad_norm": 0.4916604161262512,
      "learning_rate": 9.67231802217932e-07,
      "loss": 0.0072,
      "step": 2761680
    },
    {
      "epoch": 4.519582621446292,
      "grad_norm": 0.18709401786327362,
      "learning_rate": 9.67165910004415e-07,
      "loss": 0.0106,
      "step": 2761700
    },
    {
      "epoch": 4.519615351884946,
      "grad_norm": 0.12317834794521332,
      "learning_rate": 9.671000177908977e-07,
      "loss": 0.0088,
      "step": 2761720
    },
    {
      "epoch": 4.5196480823236,
      "grad_norm": 0.2790333926677704,
      "learning_rate": 9.670341255773807e-07,
      "loss": 0.0082,
      "step": 2761740
    },
    {
      "epoch": 4.519680812762252,
      "grad_norm": 0.7323877215385437,
      "learning_rate": 9.669682333638634e-07,
      "loss": 0.009,
      "step": 2761760
    },
    {
      "epoch": 4.519713543200906,
      "grad_norm": 0.1593593806028366,
      "learning_rate": 9.669023411503464e-07,
      "loss": 0.0064,
      "step": 2761780
    },
    {
      "epoch": 4.5197462736395595,
      "grad_norm": 0.17793187499046326,
      "learning_rate": 9.668364489368293e-07,
      "loss": 0.0058,
      "step": 2761800
    },
    {
      "epoch": 4.519779004078213,
      "grad_norm": 0.2724141478538513,
      "learning_rate": 9.66770556723312e-07,
      "loss": 0.0146,
      "step": 2761820
    },
    {
      "epoch": 4.519811734516866,
      "grad_norm": 0.36513417959213257,
      "learning_rate": 9.66704664509795e-07,
      "loss": 0.0091,
      "step": 2761840
    },
    {
      "epoch": 4.519844464955519,
      "grad_norm": 0.33905425667762756,
      "learning_rate": 9.666387722962777e-07,
      "loss": 0.0081,
      "step": 2761860
    },
    {
      "epoch": 4.519877195394173,
      "grad_norm": 0.10814842581748962,
      "learning_rate": 9.665728800827607e-07,
      "loss": 0.0069,
      "step": 2761880
    },
    {
      "epoch": 4.519909925832826,
      "grad_norm": 0.021733634173870087,
      "learning_rate": 9.665069878692437e-07,
      "loss": 0.0079,
      "step": 2761900
    },
    {
      "epoch": 4.519942656271479,
      "grad_norm": 0.05066456273198128,
      "learning_rate": 9.664410956557264e-07,
      "loss": 0.0083,
      "step": 2761920
    },
    {
      "epoch": 4.519975386710133,
      "grad_norm": 0.43186646699905396,
      "learning_rate": 9.663752034422093e-07,
      "loss": 0.0074,
      "step": 2761940
    },
    {
      "epoch": 4.520008117148786,
      "grad_norm": 0.10646849870681763,
      "learning_rate": 9.66309311228692e-07,
      "loss": 0.0075,
      "step": 2761960
    },
    {
      "epoch": 4.520040847587439,
      "grad_norm": 0.25179120898246765,
      "learning_rate": 9.66243419015175e-07,
      "loss": 0.0109,
      "step": 2761980
    },
    {
      "epoch": 4.520073578026093,
      "grad_norm": 0.09604327380657196,
      "learning_rate": 9.66177526801658e-07,
      "loss": 0.0061,
      "step": 2762000
    },
    {
      "epoch": 4.520106308464746,
      "grad_norm": 0.239254429936409,
      "learning_rate": 9.661116345881407e-07,
      "loss": 0.009,
      "step": 2762020
    },
    {
      "epoch": 4.520139038903399,
      "grad_norm": 0.24530041217803955,
      "learning_rate": 9.660457423746237e-07,
      "loss": 0.0101,
      "step": 2762040
    },
    {
      "epoch": 4.520171769342053,
      "grad_norm": 0.33129507303237915,
      "learning_rate": 9.659798501611066e-07,
      "loss": 0.0085,
      "step": 2762060
    },
    {
      "epoch": 4.520204499780706,
      "grad_norm": 0.1314552128314972,
      "learning_rate": 9.659139579475894e-07,
      "loss": 0.0079,
      "step": 2762080
    },
    {
      "epoch": 4.52023723021936,
      "grad_norm": 0.45537012815475464,
      "learning_rate": 9.658480657340723e-07,
      "loss": 0.0093,
      "step": 2762100
    },
    {
      "epoch": 4.520269960658013,
      "grad_norm": 0.058768194168806076,
      "learning_rate": 9.65782173520555e-07,
      "loss": 0.0095,
      "step": 2762120
    },
    {
      "epoch": 4.520302691096666,
      "grad_norm": 0.1399495154619217,
      "learning_rate": 9.65716281307038e-07,
      "loss": 0.0061,
      "step": 2762140
    },
    {
      "epoch": 4.52033542153532,
      "grad_norm": 0.04328256472945213,
      "learning_rate": 9.65650389093521e-07,
      "loss": 0.006,
      "step": 2762160
    },
    {
      "epoch": 4.5203681519739725,
      "grad_norm": 0.3127134144306183,
      "learning_rate": 9.655844968800037e-07,
      "loss": 0.0099,
      "step": 2762180
    },
    {
      "epoch": 4.520400882412626,
      "grad_norm": 0.3499537706375122,
      "learning_rate": 9.655186046664867e-07,
      "loss": 0.0069,
      "step": 2762200
    },
    {
      "epoch": 4.52043361285128,
      "grad_norm": 0.46744248270988464,
      "learning_rate": 9.654527124529694e-07,
      "loss": 0.012,
      "step": 2762220
    },
    {
      "epoch": 4.520466343289932,
      "grad_norm": 0.3897733986377716,
      "learning_rate": 9.653868202394523e-07,
      "loss": 0.0061,
      "step": 2762240
    },
    {
      "epoch": 4.520499073728586,
      "grad_norm": 0.2117551565170288,
      "learning_rate": 9.653209280259353e-07,
      "loss": 0.0186,
      "step": 2762260
    },
    {
      "epoch": 4.5205318041672395,
      "grad_norm": 0.23980821669101715,
      "learning_rate": 9.65255035812418e-07,
      "loss": 0.0093,
      "step": 2762280
    },
    {
      "epoch": 4.520564534605893,
      "grad_norm": 0.1514725685119629,
      "learning_rate": 9.65189143598901e-07,
      "loss": 0.0068,
      "step": 2762300
    },
    {
      "epoch": 4.520597265044546,
      "grad_norm": 0.23921486735343933,
      "learning_rate": 9.65123251385384e-07,
      "loss": 0.0073,
      "step": 2762320
    },
    {
      "epoch": 4.520629995483199,
      "grad_norm": 0.1737498939037323,
      "learning_rate": 9.650573591718667e-07,
      "loss": 0.009,
      "step": 2762340
    },
    {
      "epoch": 4.520662725921853,
      "grad_norm": 0.45013612508773804,
      "learning_rate": 9.649914669583496e-07,
      "loss": 0.0121,
      "step": 2762360
    },
    {
      "epoch": 4.520695456360507,
      "grad_norm": 0.47983038425445557,
      "learning_rate": 9.649255747448324e-07,
      "loss": 0.0088,
      "step": 2762380
    },
    {
      "epoch": 4.520728186799159,
      "grad_norm": 0.16682042181491852,
      "learning_rate": 9.648596825313153e-07,
      "loss": 0.0093,
      "step": 2762400
    },
    {
      "epoch": 4.520760917237813,
      "grad_norm": 0.08574187010526657,
      "learning_rate": 9.647937903177983e-07,
      "loss": 0.0075,
      "step": 2762420
    },
    {
      "epoch": 4.5207936476764665,
      "grad_norm": 0.3291454017162323,
      "learning_rate": 9.64727898104281e-07,
      "loss": 0.008,
      "step": 2762440
    },
    {
      "epoch": 4.520826378115119,
      "grad_norm": 0.3215116858482361,
      "learning_rate": 9.64662005890764e-07,
      "loss": 0.0125,
      "step": 2762460
    },
    {
      "epoch": 4.520859108553773,
      "grad_norm": 0.17102769017219543,
      "learning_rate": 9.645961136772467e-07,
      "loss": 0.0125,
      "step": 2762480
    },
    {
      "epoch": 4.520891838992426,
      "grad_norm": 0.06714092940092087,
      "learning_rate": 9.645302214637299e-07,
      "loss": 0.0107,
      "step": 2762500
    },
    {
      "epoch": 4.520924569431079,
      "grad_norm": 0.2369004338979721,
      "learning_rate": 9.644643292502126e-07,
      "loss": 0.0104,
      "step": 2762520
    },
    {
      "epoch": 4.520957299869733,
      "grad_norm": 0.25019946694374084,
      "learning_rate": 9.643984370366953e-07,
      "loss": 0.0082,
      "step": 2762540
    },
    {
      "epoch": 4.520990030308386,
      "grad_norm": 0.16377651691436768,
      "learning_rate": 9.643325448231783e-07,
      "loss": 0.0088,
      "step": 2762560
    },
    {
      "epoch": 4.52102276074704,
      "grad_norm": 0.23581796884536743,
      "learning_rate": 9.642666526096613e-07,
      "loss": 0.0096,
      "step": 2762580
    },
    {
      "epoch": 4.521055491185693,
      "grad_norm": 0.5638183355331421,
      "learning_rate": 9.642007603961442e-07,
      "loss": 0.0092,
      "step": 2762600
    },
    {
      "epoch": 4.521088221624346,
      "grad_norm": 0.1936511993408203,
      "learning_rate": 9.64134868182627e-07,
      "loss": 0.0086,
      "step": 2762620
    },
    {
      "epoch": 4.521120952063,
      "grad_norm": 0.15637698769569397,
      "learning_rate": 9.640689759691097e-07,
      "loss": 0.0084,
      "step": 2762640
    },
    {
      "epoch": 4.5211536825016525,
      "grad_norm": 0.13485099375247955,
      "learning_rate": 9.640030837555926e-07,
      "loss": 0.012,
      "step": 2762660
    },
    {
      "epoch": 4.521186412940306,
      "grad_norm": 0.07888782024383545,
      "learning_rate": 9.639371915420756e-07,
      "loss": 0.0097,
      "step": 2762680
    },
    {
      "epoch": 4.52121914337896,
      "grad_norm": 0.1208520159125328,
      "learning_rate": 9.638712993285585e-07,
      "loss": 0.0095,
      "step": 2762700
    },
    {
      "epoch": 4.521251873817613,
      "grad_norm": 0.10817322880029678,
      "learning_rate": 9.638054071150413e-07,
      "loss": 0.0066,
      "step": 2762720
    },
    {
      "epoch": 4.521284604256266,
      "grad_norm": 0.31841427087783813,
      "learning_rate": 9.63739514901524e-07,
      "loss": 0.0056,
      "step": 2762740
    },
    {
      "epoch": 4.52131733469492,
      "grad_norm": 0.08738496899604797,
      "learning_rate": 9.636736226880072e-07,
      "loss": 0.009,
      "step": 2762760
    },
    {
      "epoch": 4.521350065133573,
      "grad_norm": 0.16260352730751038,
      "learning_rate": 9.6360773047449e-07,
      "loss": 0.0106,
      "step": 2762780
    },
    {
      "epoch": 4.521382795572226,
      "grad_norm": 0.13515064120292664,
      "learning_rate": 9.635418382609729e-07,
      "loss": 0.0092,
      "step": 2762800
    },
    {
      "epoch": 4.5214155260108795,
      "grad_norm": 0.12990112602710724,
      "learning_rate": 9.634759460474556e-07,
      "loss": 0.0063,
      "step": 2762820
    },
    {
      "epoch": 4.521448256449533,
      "grad_norm": 0.06941644102334976,
      "learning_rate": 9.634100538339386e-07,
      "loss": 0.0087,
      "step": 2762840
    },
    {
      "epoch": 4.521480986888186,
      "grad_norm": 0.5351901054382324,
      "learning_rate": 9.633441616204215e-07,
      "loss": 0.0111,
      "step": 2762860
    },
    {
      "epoch": 4.521513717326839,
      "grad_norm": 0.16760361194610596,
      "learning_rate": 9.632782694069043e-07,
      "loss": 0.0122,
      "step": 2762880
    },
    {
      "epoch": 4.521546447765493,
      "grad_norm": 0.9603168368339539,
      "learning_rate": 9.632123771933872e-07,
      "loss": 0.0083,
      "step": 2762900
    },
    {
      "epoch": 4.521579178204147,
      "grad_norm": 0.05361726135015488,
      "learning_rate": 9.6314648497987e-07,
      "loss": 0.0067,
      "step": 2762920
    },
    {
      "epoch": 4.521611908642799,
      "grad_norm": 0.11713149398565292,
      "learning_rate": 9.63080592766353e-07,
      "loss": 0.0086,
      "step": 2762940
    },
    {
      "epoch": 4.521644639081453,
      "grad_norm": 0.10817436128854752,
      "learning_rate": 9.630147005528358e-07,
      "loss": 0.0083,
      "step": 2762960
    },
    {
      "epoch": 4.5216773695201065,
      "grad_norm": 0.2743324339389801,
      "learning_rate": 9.629488083393186e-07,
      "loss": 0.0074,
      "step": 2762980
    },
    {
      "epoch": 4.52171009995876,
      "grad_norm": 0.3562873601913452,
      "learning_rate": 9.628829161258015e-07,
      "loss": 0.0075,
      "step": 2763000
    },
    {
      "epoch": 4.521742830397413,
      "grad_norm": 0.177815780043602,
      "learning_rate": 9.628170239122845e-07,
      "loss": 0.0097,
      "step": 2763020
    },
    {
      "epoch": 4.521775560836066,
      "grad_norm": 0.3551255166530609,
      "learning_rate": 9.627511316987672e-07,
      "loss": 0.0104,
      "step": 2763040
    },
    {
      "epoch": 4.52180829127472,
      "grad_norm": 0.17915387451648712,
      "learning_rate": 9.626852394852502e-07,
      "loss": 0.0108,
      "step": 2763060
    },
    {
      "epoch": 4.521841021713373,
      "grad_norm": 0.32482457160949707,
      "learning_rate": 9.62619347271733e-07,
      "loss": 0.0091,
      "step": 2763080
    },
    {
      "epoch": 4.521873752152026,
      "grad_norm": 0.19402074813842773,
      "learning_rate": 9.625534550582159e-07,
      "loss": 0.0079,
      "step": 2763100
    },
    {
      "epoch": 4.52190648259068,
      "grad_norm": 0.0789121612906456,
      "learning_rate": 9.624875628446988e-07,
      "loss": 0.01,
      "step": 2763120
    },
    {
      "epoch": 4.5219392130293325,
      "grad_norm": 0.2205391228199005,
      "learning_rate": 9.624216706311816e-07,
      "loss": 0.0124,
      "step": 2763140
    },
    {
      "epoch": 4.521971943467986,
      "grad_norm": 0.32697027921676636,
      "learning_rate": 9.623557784176645e-07,
      "loss": 0.0117,
      "step": 2763160
    },
    {
      "epoch": 4.52200467390664,
      "grad_norm": 0.612450361251831,
      "learning_rate": 9.622898862041473e-07,
      "loss": 0.0154,
      "step": 2763180
    },
    {
      "epoch": 4.522037404345293,
      "grad_norm": 0.09332486242055893,
      "learning_rate": 9.622239939906302e-07,
      "loss": 0.0068,
      "step": 2763200
    },
    {
      "epoch": 4.522070134783946,
      "grad_norm": 0.1545708328485489,
      "learning_rate": 9.621581017771132e-07,
      "loss": 0.0071,
      "step": 2763220
    },
    {
      "epoch": 4.5221028652226,
      "grad_norm": 0.24796724319458008,
      "learning_rate": 9.62092209563596e-07,
      "loss": 0.0107,
      "step": 2763240
    },
    {
      "epoch": 4.522135595661253,
      "grad_norm": 0.3034394383430481,
      "learning_rate": 9.620263173500788e-07,
      "loss": 0.0069,
      "step": 2763260
    },
    {
      "epoch": 4.522168326099907,
      "grad_norm": 0.08777768164873123,
      "learning_rate": 9.619604251365616e-07,
      "loss": 0.0076,
      "step": 2763280
    },
    {
      "epoch": 4.5222010565385595,
      "grad_norm": 0.14511868357658386,
      "learning_rate": 9.618945329230445e-07,
      "loss": 0.0065,
      "step": 2763300
    },
    {
      "epoch": 4.522233786977213,
      "grad_norm": 0.22738274931907654,
      "learning_rate": 9.618286407095275e-07,
      "loss": 0.009,
      "step": 2763320
    },
    {
      "epoch": 4.522266517415867,
      "grad_norm": 0.08796822279691696,
      "learning_rate": 9.617627484960102e-07,
      "loss": 0.0084,
      "step": 2763340
    },
    {
      "epoch": 4.522299247854519,
      "grad_norm": 0.12626545131206512,
      "learning_rate": 9.616968562824932e-07,
      "loss": 0.0095,
      "step": 2763360
    },
    {
      "epoch": 4.522331978293173,
      "grad_norm": 0.11308380216360092,
      "learning_rate": 9.616309640689761e-07,
      "loss": 0.0093,
      "step": 2763380
    },
    {
      "epoch": 4.522364708731827,
      "grad_norm": 0.04383019357919693,
      "learning_rate": 9.615650718554589e-07,
      "loss": 0.0076,
      "step": 2763400
    },
    {
      "epoch": 4.522397439170479,
      "grad_norm": 0.13659867644309998,
      "learning_rate": 9.614991796419418e-07,
      "loss": 0.0089,
      "step": 2763420
    },
    {
      "epoch": 4.522430169609133,
      "grad_norm": 0.06441091746091843,
      "learning_rate": 9.614332874284246e-07,
      "loss": 0.0128,
      "step": 2763440
    },
    {
      "epoch": 4.5224629000477865,
      "grad_norm": 0.21017444133758545,
      "learning_rate": 9.613673952149075e-07,
      "loss": 0.0101,
      "step": 2763460
    },
    {
      "epoch": 4.52249563048644,
      "grad_norm": 0.281381219625473,
      "learning_rate": 9.613015030013905e-07,
      "loss": 0.0084,
      "step": 2763480
    },
    {
      "epoch": 4.522528360925093,
      "grad_norm": 0.615982174873352,
      "learning_rate": 9.612356107878732e-07,
      "loss": 0.0058,
      "step": 2763500
    },
    {
      "epoch": 4.522561091363746,
      "grad_norm": 0.9022316932678223,
      "learning_rate": 9.611697185743562e-07,
      "loss": 0.0103,
      "step": 2763520
    },
    {
      "epoch": 4.5225938218024,
      "grad_norm": 0.3219982385635376,
      "learning_rate": 9.61103826360839e-07,
      "loss": 0.0115,
      "step": 2763540
    },
    {
      "epoch": 4.522626552241054,
      "grad_norm": 0.2062518149614334,
      "learning_rate": 9.610379341473219e-07,
      "loss": 0.0093,
      "step": 2763560
    },
    {
      "epoch": 4.522659282679706,
      "grad_norm": 0.33444616198539734,
      "learning_rate": 9.609720419338048e-07,
      "loss": 0.01,
      "step": 2763580
    },
    {
      "epoch": 4.52269201311836,
      "grad_norm": 0.1775389313697815,
      "learning_rate": 9.609061497202875e-07,
      "loss": 0.0103,
      "step": 2763600
    },
    {
      "epoch": 4.5227247435570135,
      "grad_norm": 0.35146239399909973,
      "learning_rate": 9.608402575067705e-07,
      "loss": 0.0153,
      "step": 2763620
    },
    {
      "epoch": 4.522757473995666,
      "grad_norm": 0.26320919394493103,
      "learning_rate": 9.607743652932534e-07,
      "loss": 0.0117,
      "step": 2763640
    },
    {
      "epoch": 4.52279020443432,
      "grad_norm": 0.18189483880996704,
      "learning_rate": 9.607084730797362e-07,
      "loss": 0.0072,
      "step": 2763660
    },
    {
      "epoch": 4.522822934872973,
      "grad_norm": 0.18170753121376038,
      "learning_rate": 9.606425808662191e-07,
      "loss": 0.0131,
      "step": 2763680
    },
    {
      "epoch": 4.522855665311626,
      "grad_norm": 0.11887519806623459,
      "learning_rate": 9.605766886527019e-07,
      "loss": 0.0092,
      "step": 2763700
    },
    {
      "epoch": 4.52288839575028,
      "grad_norm": 0.30097609758377075,
      "learning_rate": 9.605107964391848e-07,
      "loss": 0.009,
      "step": 2763720
    },
    {
      "epoch": 4.522921126188933,
      "grad_norm": 0.2495119273662567,
      "learning_rate": 9.604449042256678e-07,
      "loss": 0.0073,
      "step": 2763740
    },
    {
      "epoch": 4.522953856627587,
      "grad_norm": 0.6719539165496826,
      "learning_rate": 9.603790120121505e-07,
      "loss": 0.0101,
      "step": 2763760
    },
    {
      "epoch": 4.52298658706624,
      "grad_norm": 0.08221389353275299,
      "learning_rate": 9.603131197986335e-07,
      "loss": 0.0128,
      "step": 2763780
    },
    {
      "epoch": 4.523019317504893,
      "grad_norm": 0.42196157574653625,
      "learning_rate": 9.602472275851162e-07,
      "loss": 0.0084,
      "step": 2763800
    },
    {
      "epoch": 4.523052047943547,
      "grad_norm": 0.3412477970123291,
      "learning_rate": 9.601813353715994e-07,
      "loss": 0.0127,
      "step": 2763820
    },
    {
      "epoch": 4.5230847783822,
      "grad_norm": 0.15605880320072174,
      "learning_rate": 9.601154431580821e-07,
      "loss": 0.0088,
      "step": 2763840
    },
    {
      "epoch": 4.523117508820853,
      "grad_norm": 0.17692194879055023,
      "learning_rate": 9.600495509445649e-07,
      "loss": 0.0126,
      "step": 2763860
    },
    {
      "epoch": 4.523150239259507,
      "grad_norm": 0.26498836278915405,
      "learning_rate": 9.599836587310478e-07,
      "loss": 0.0163,
      "step": 2763880
    },
    {
      "epoch": 4.52318296969816,
      "grad_norm": 0.8498570919036865,
      "learning_rate": 9.599177665175308e-07,
      "loss": 0.0086,
      "step": 2763900
    },
    {
      "epoch": 4.523215700136813,
      "grad_norm": 0.08495436608791351,
      "learning_rate": 9.598518743040137e-07,
      "loss": 0.0067,
      "step": 2763920
    },
    {
      "epoch": 4.5232484305754665,
      "grad_norm": 0.28710776567459106,
      "learning_rate": 9.597859820904964e-07,
      "loss": 0.0072,
      "step": 2763940
    },
    {
      "epoch": 4.52328116101412,
      "grad_norm": 0.12907783687114716,
      "learning_rate": 9.597200898769792e-07,
      "loss": 0.0094,
      "step": 2763960
    },
    {
      "epoch": 4.523313891452773,
      "grad_norm": 0.22313684225082397,
      "learning_rate": 9.596541976634621e-07,
      "loss": 0.006,
      "step": 2763980
    },
    {
      "epoch": 4.5233466218914264,
      "grad_norm": 0.17338906228542328,
      "learning_rate": 9.59588305449945e-07,
      "loss": 0.0087,
      "step": 2764000
    },
    {
      "epoch": 4.52337935233008,
      "grad_norm": 0.5171828866004944,
      "learning_rate": 9.59522413236428e-07,
      "loss": 0.0102,
      "step": 2764020
    },
    {
      "epoch": 4.523412082768734,
      "grad_norm": 0.3046255111694336,
      "learning_rate": 9.594565210229108e-07,
      "loss": 0.01,
      "step": 2764040
    },
    {
      "epoch": 4.523444813207386,
      "grad_norm": 0.43998998403549194,
      "learning_rate": 9.593906288093935e-07,
      "loss": 0.0091,
      "step": 2764060
    },
    {
      "epoch": 4.52347754364604,
      "grad_norm": 0.11717692017555237,
      "learning_rate": 9.593247365958767e-07,
      "loss": 0.0085,
      "step": 2764080
    },
    {
      "epoch": 4.5235102740846935,
      "grad_norm": 0.4307568371295929,
      "learning_rate": 9.592588443823594e-07,
      "loss": 0.0084,
      "step": 2764100
    },
    {
      "epoch": 4.523543004523346,
      "grad_norm": 0.17211605608463287,
      "learning_rate": 9.591929521688424e-07,
      "loss": 0.0082,
      "step": 2764120
    },
    {
      "epoch": 4.523575734962,
      "grad_norm": 0.28401049971580505,
      "learning_rate": 9.591270599553251e-07,
      "loss": 0.0098,
      "step": 2764140
    },
    {
      "epoch": 4.523608465400653,
      "grad_norm": 0.21415789425373077,
      "learning_rate": 9.59061167741808e-07,
      "loss": 0.012,
      "step": 2764160
    },
    {
      "epoch": 4.523641195839307,
      "grad_norm": 0.38509345054626465,
      "learning_rate": 9.58995275528291e-07,
      "loss": 0.008,
      "step": 2764180
    },
    {
      "epoch": 4.52367392627796,
      "grad_norm": 0.3477105498313904,
      "learning_rate": 9.589293833147738e-07,
      "loss": 0.0062,
      "step": 2764200
    },
    {
      "epoch": 4.523706656716613,
      "grad_norm": 0.15791991353034973,
      "learning_rate": 9.588634911012567e-07,
      "loss": 0.0092,
      "step": 2764220
    },
    {
      "epoch": 4.523739387155267,
      "grad_norm": 0.3929615020751953,
      "learning_rate": 9.587975988877394e-07,
      "loss": 0.0098,
      "step": 2764240
    },
    {
      "epoch": 4.52377211759392,
      "grad_norm": 0.18802139163017273,
      "learning_rate": 9.587317066742224e-07,
      "loss": 0.0069,
      "step": 2764260
    },
    {
      "epoch": 4.523804848032573,
      "grad_norm": 0.19244998693466187,
      "learning_rate": 9.586658144607054e-07,
      "loss": 0.0068,
      "step": 2764280
    },
    {
      "epoch": 4.523837578471227,
      "grad_norm": 0.08350425213575363,
      "learning_rate": 9.58599922247188e-07,
      "loss": 0.0121,
      "step": 2764300
    },
    {
      "epoch": 4.5238703089098795,
      "grad_norm": 0.2924001216888428,
      "learning_rate": 9.58534030033671e-07,
      "loss": 0.0092,
      "step": 2764320
    },
    {
      "epoch": 4.523903039348533,
      "grad_norm": 0.4012549817562103,
      "learning_rate": 9.58468137820154e-07,
      "loss": 0.0099,
      "step": 2764340
    },
    {
      "epoch": 4.523935769787187,
      "grad_norm": 0.05584762617945671,
      "learning_rate": 9.584022456066367e-07,
      "loss": 0.0072,
      "step": 2764360
    },
    {
      "epoch": 4.52396850022584,
      "grad_norm": 0.43441563844680786,
      "learning_rate": 9.583363533931197e-07,
      "loss": 0.0097,
      "step": 2764380
    },
    {
      "epoch": 4.524001230664493,
      "grad_norm": 0.24571050703525543,
      "learning_rate": 9.582704611796024e-07,
      "loss": 0.0084,
      "step": 2764400
    },
    {
      "epoch": 4.524033961103147,
      "grad_norm": 0.920385479927063,
      "learning_rate": 9.582045689660854e-07,
      "loss": 0.0129,
      "step": 2764420
    },
    {
      "epoch": 4.5240666915418,
      "grad_norm": 0.2918563187122345,
      "learning_rate": 9.581386767525683e-07,
      "loss": 0.0138,
      "step": 2764440
    },
    {
      "epoch": 4.524099421980454,
      "grad_norm": 0.22341559827327728,
      "learning_rate": 9.58072784539051e-07,
      "loss": 0.0119,
      "step": 2764460
    },
    {
      "epoch": 4.5241321524191065,
      "grad_norm": 0.24546736478805542,
      "learning_rate": 9.58006892325534e-07,
      "loss": 0.0093,
      "step": 2764480
    },
    {
      "epoch": 4.52416488285776,
      "grad_norm": 0.3895542025566101,
      "learning_rate": 9.579410001120168e-07,
      "loss": 0.0099,
      "step": 2764500
    },
    {
      "epoch": 4.524197613296414,
      "grad_norm": 0.30164358019828796,
      "learning_rate": 9.578751078984997e-07,
      "loss": 0.0068,
      "step": 2764520
    },
    {
      "epoch": 4.524230343735066,
      "grad_norm": 0.12492159008979797,
      "learning_rate": 9.578092156849827e-07,
      "loss": 0.0111,
      "step": 2764540
    },
    {
      "epoch": 4.52426307417372,
      "grad_norm": 0.5141633749008179,
      "learning_rate": 9.577433234714654e-07,
      "loss": 0.0074,
      "step": 2764560
    },
    {
      "epoch": 4.524295804612374,
      "grad_norm": 0.3829978108406067,
      "learning_rate": 9.576774312579484e-07,
      "loss": 0.0093,
      "step": 2764580
    },
    {
      "epoch": 4.524328535051026,
      "grad_norm": 0.0863197073340416,
      "learning_rate": 9.57611539044431e-07,
      "loss": 0.0092,
      "step": 2764600
    },
    {
      "epoch": 4.52436126548968,
      "grad_norm": 0.19379135966300964,
      "learning_rate": 9.57545646830914e-07,
      "loss": 0.0055,
      "step": 2764620
    },
    {
      "epoch": 4.5243939959283335,
      "grad_norm": 0.269504576921463,
      "learning_rate": 9.57479754617397e-07,
      "loss": 0.0074,
      "step": 2764640
    },
    {
      "epoch": 4.524426726366987,
      "grad_norm": 0.035308219492435455,
      "learning_rate": 9.574138624038797e-07,
      "loss": 0.0075,
      "step": 2764660
    },
    {
      "epoch": 4.52445945680564,
      "grad_norm": 0.2577877640724182,
      "learning_rate": 9.573479701903627e-07,
      "loss": 0.0056,
      "step": 2764680
    },
    {
      "epoch": 4.524492187244293,
      "grad_norm": 0.10692832618951797,
      "learning_rate": 9.572820779768456e-07,
      "loss": 0.0086,
      "step": 2764700
    },
    {
      "epoch": 4.524524917682947,
      "grad_norm": 0.2931654751300812,
      "learning_rate": 9.572161857633284e-07,
      "loss": 0.0096,
      "step": 2764720
    },
    {
      "epoch": 4.5245576481216006,
      "grad_norm": 0.2176680564880371,
      "learning_rate": 9.571502935498113e-07,
      "loss": 0.0114,
      "step": 2764740
    },
    {
      "epoch": 4.524590378560253,
      "grad_norm": 0.25931382179260254,
      "learning_rate": 9.57084401336294e-07,
      "loss": 0.0099,
      "step": 2764760
    },
    {
      "epoch": 4.524623108998907,
      "grad_norm": 0.232699915766716,
      "learning_rate": 9.57018509122777e-07,
      "loss": 0.0089,
      "step": 2764780
    },
    {
      "epoch": 4.5246558394375604,
      "grad_norm": 0.320679634809494,
      "learning_rate": 9.5695261690926e-07,
      "loss": 0.0093,
      "step": 2764800
    },
    {
      "epoch": 4.524688569876213,
      "grad_norm": 0.20960068702697754,
      "learning_rate": 9.568867246957427e-07,
      "loss": 0.0082,
      "step": 2764820
    },
    {
      "epoch": 4.524721300314867,
      "grad_norm": 0.05877875164151192,
      "learning_rate": 9.568208324822257e-07,
      "loss": 0.0066,
      "step": 2764840
    },
    {
      "epoch": 4.52475403075352,
      "grad_norm": 0.13149695098400116,
      "learning_rate": 9.567549402687084e-07,
      "loss": 0.0103,
      "step": 2764860
    },
    {
      "epoch": 4.524786761192173,
      "grad_norm": 0.12176360189914703,
      "learning_rate": 9.566890480551914e-07,
      "loss": 0.0082,
      "step": 2764880
    },
    {
      "epoch": 4.524819491630827,
      "grad_norm": 0.0748777762055397,
      "learning_rate": 9.566231558416743e-07,
      "loss": 0.0101,
      "step": 2764900
    },
    {
      "epoch": 4.52485222206948,
      "grad_norm": 0.20511780679225922,
      "learning_rate": 9.56557263628157e-07,
      "loss": 0.01,
      "step": 2764920
    },
    {
      "epoch": 4.524884952508134,
      "grad_norm": 0.09918373078107834,
      "learning_rate": 9.5649137141464e-07,
      "loss": 0.0091,
      "step": 2764940
    },
    {
      "epoch": 4.5249176829467865,
      "grad_norm": 0.07974895089864731,
      "learning_rate": 9.56425479201123e-07,
      "loss": 0.0094,
      "step": 2764960
    },
    {
      "epoch": 4.52495041338544,
      "grad_norm": 0.07000157237052917,
      "learning_rate": 9.563595869876057e-07,
      "loss": 0.0076,
      "step": 2764980
    },
    {
      "epoch": 4.524983143824094,
      "grad_norm": 0.24729350209236145,
      "learning_rate": 9.562936947740886e-07,
      "loss": 0.0118,
      "step": 2765000
    },
    {
      "epoch": 4.525015874262747,
      "grad_norm": 0.16315831243991852,
      "learning_rate": 9.562278025605714e-07,
      "loss": 0.006,
      "step": 2765020
    },
    {
      "epoch": 4.5250486047014,
      "grad_norm": 0.14439521729946136,
      "learning_rate": 9.561619103470543e-07,
      "loss": 0.0086,
      "step": 2765040
    },
    {
      "epoch": 4.525081335140054,
      "grad_norm": 0.36179792881011963,
      "learning_rate": 9.560960181335373e-07,
      "loss": 0.0116,
      "step": 2765060
    },
    {
      "epoch": 4.525114065578707,
      "grad_norm": 0.22504092752933502,
      "learning_rate": 9.5603012592002e-07,
      "loss": 0.01,
      "step": 2765080
    },
    {
      "epoch": 4.52514679601736,
      "grad_norm": 0.4514230787754059,
      "learning_rate": 9.55964233706503e-07,
      "loss": 0.0097,
      "step": 2765100
    },
    {
      "epoch": 4.5251795264560135,
      "grad_norm": 0.17890377342700958,
      "learning_rate": 9.558983414929857e-07,
      "loss": 0.0072,
      "step": 2765120
    },
    {
      "epoch": 4.525212256894667,
      "grad_norm": 0.31252482533454895,
      "learning_rate": 9.558324492794689e-07,
      "loss": 0.0095,
      "step": 2765140
    },
    {
      "epoch": 4.52524498733332,
      "grad_norm": 0.23007094860076904,
      "learning_rate": 9.557665570659516e-07,
      "loss": 0.0143,
      "step": 2765160
    },
    {
      "epoch": 4.525277717771973,
      "grad_norm": 0.2550065517425537,
      "learning_rate": 9.557006648524344e-07,
      "loss": 0.0068,
      "step": 2765180
    },
    {
      "epoch": 4.525310448210627,
      "grad_norm": 0.39295271039009094,
      "learning_rate": 9.556347726389173e-07,
      "loss": 0.0093,
      "step": 2765200
    },
    {
      "epoch": 4.525343178649281,
      "grad_norm": 0.060068875551223755,
      "learning_rate": 9.555688804254003e-07,
      "loss": 0.0071,
      "step": 2765220
    },
    {
      "epoch": 4.525375909087933,
      "grad_norm": 0.5109375715255737,
      "learning_rate": 9.555029882118832e-07,
      "loss": 0.0122,
      "step": 2765240
    },
    {
      "epoch": 4.525408639526587,
      "grad_norm": 0.26057395339012146,
      "learning_rate": 9.55437095998366e-07,
      "loss": 0.0088,
      "step": 2765260
    },
    {
      "epoch": 4.5254413699652405,
      "grad_norm": 0.4185628294944763,
      "learning_rate": 9.553712037848487e-07,
      "loss": 0.014,
      "step": 2765280
    },
    {
      "epoch": 4.525474100403894,
      "grad_norm": 0.25581908226013184,
      "learning_rate": 9.553053115713316e-07,
      "loss": 0.0105,
      "step": 2765300
    },
    {
      "epoch": 4.525506830842547,
      "grad_norm": 0.639748215675354,
      "learning_rate": 9.552394193578146e-07,
      "loss": 0.0095,
      "step": 2765320
    },
    {
      "epoch": 4.5255395612812,
      "grad_norm": 0.2601177394390106,
      "learning_rate": 9.551735271442975e-07,
      "loss": 0.0106,
      "step": 2765340
    },
    {
      "epoch": 4.525572291719854,
      "grad_norm": 0.2668340504169464,
      "learning_rate": 9.551076349307803e-07,
      "loss": 0.0067,
      "step": 2765360
    },
    {
      "epoch": 4.525605022158507,
      "grad_norm": 0.19421248137950897,
      "learning_rate": 9.55041742717263e-07,
      "loss": 0.0064,
      "step": 2765380
    },
    {
      "epoch": 4.52563775259716,
      "grad_norm": 0.10884202271699905,
      "learning_rate": 9.549758505037462e-07,
      "loss": 0.008,
      "step": 2765400
    },
    {
      "epoch": 4.525670483035814,
      "grad_norm": 0.06499826908111572,
      "learning_rate": 9.54909958290229e-07,
      "loss": 0.0093,
      "step": 2765420
    },
    {
      "epoch": 4.525703213474467,
      "grad_norm": 0.5171065926551819,
      "learning_rate": 9.548440660767119e-07,
      "loss": 0.0073,
      "step": 2765440
    },
    {
      "epoch": 4.52573594391312,
      "grad_norm": 0.406289279460907,
      "learning_rate": 9.547781738631946e-07,
      "loss": 0.011,
      "step": 2765460
    },
    {
      "epoch": 4.525768674351774,
      "grad_norm": 0.19227273762226105,
      "learning_rate": 9.547122816496776e-07,
      "loss": 0.0135,
      "step": 2765480
    },
    {
      "epoch": 4.525801404790427,
      "grad_norm": 0.4798585772514343,
      "learning_rate": 9.546463894361605e-07,
      "loss": 0.0103,
      "step": 2765500
    },
    {
      "epoch": 4.52583413522908,
      "grad_norm": 0.08239517360925674,
      "learning_rate": 9.545804972226433e-07,
      "loss": 0.0063,
      "step": 2765520
    },
    {
      "epoch": 4.525866865667734,
      "grad_norm": 0.1420920491218567,
      "learning_rate": 9.545146050091262e-07,
      "loss": 0.0118,
      "step": 2765540
    },
    {
      "epoch": 4.525899596106387,
      "grad_norm": 0.11410782486200333,
      "learning_rate": 9.54448712795609e-07,
      "loss": 0.0077,
      "step": 2765560
    },
    {
      "epoch": 4.525932326545041,
      "grad_norm": 0.2726444602012634,
      "learning_rate": 9.54382820582092e-07,
      "loss": 0.0086,
      "step": 2765580
    },
    {
      "epoch": 4.525965056983694,
      "grad_norm": 0.2179480791091919,
      "learning_rate": 9.543169283685749e-07,
      "loss": 0.0119,
      "step": 2765600
    },
    {
      "epoch": 4.525997787422347,
      "grad_norm": 0.11952930688858032,
      "learning_rate": 9.542510361550576e-07,
      "loss": 0.0129,
      "step": 2765620
    },
    {
      "epoch": 4.526030517861001,
      "grad_norm": 0.18042558431625366,
      "learning_rate": 9.541851439415405e-07,
      "loss": 0.0098,
      "step": 2765640
    },
    {
      "epoch": 4.5260632482996535,
      "grad_norm": 0.20714931190013885,
      "learning_rate": 9.541192517280235e-07,
      "loss": 0.0084,
      "step": 2765660
    },
    {
      "epoch": 4.526095978738307,
      "grad_norm": 0.23890946805477142,
      "learning_rate": 9.540533595145062e-07,
      "loss": 0.0099,
      "step": 2765680
    },
    {
      "epoch": 4.526128709176961,
      "grad_norm": 0.2529621720314026,
      "learning_rate": 9.539874673009892e-07,
      "loss": 0.0111,
      "step": 2765700
    },
    {
      "epoch": 4.526161439615613,
      "grad_norm": 0.10887911170721054,
      "learning_rate": 9.53921575087472e-07,
      "loss": 0.0067,
      "step": 2765720
    },
    {
      "epoch": 4.526194170054267,
      "grad_norm": 0.4116917848587036,
      "learning_rate": 9.538556828739549e-07,
      "loss": 0.0071,
      "step": 2765740
    },
    {
      "epoch": 4.5262269004929205,
      "grad_norm": 0.19783827662467957,
      "learning_rate": 9.537897906604378e-07,
      "loss": 0.0078,
      "step": 2765760
    },
    {
      "epoch": 4.526259630931574,
      "grad_norm": 0.35427889227867126,
      "learning_rate": 9.537238984469206e-07,
      "loss": 0.0071,
      "step": 2765780
    },
    {
      "epoch": 4.526292361370227,
      "grad_norm": 0.4104066789150238,
      "learning_rate": 9.536580062334034e-07,
      "loss": 0.0095,
      "step": 2765800
    },
    {
      "epoch": 4.52632509180888,
      "grad_norm": 0.37177568674087524,
      "learning_rate": 9.535921140198863e-07,
      "loss": 0.0089,
      "step": 2765820
    },
    {
      "epoch": 4.526357822247534,
      "grad_norm": 0.4564191401004791,
      "learning_rate": 9.535262218063693e-07,
      "loss": 0.007,
      "step": 2765840
    },
    {
      "epoch": 4.526390552686187,
      "grad_norm": 0.1325100213289261,
      "learning_rate": 9.534603295928522e-07,
      "loss": 0.0077,
      "step": 2765860
    },
    {
      "epoch": 4.52642328312484,
      "grad_norm": 0.1112026572227478,
      "learning_rate": 9.533944373793349e-07,
      "loss": 0.0094,
      "step": 2765880
    },
    {
      "epoch": 4.526456013563494,
      "grad_norm": 0.17597001791000366,
      "learning_rate": 9.533285451658178e-07,
      "loss": 0.012,
      "step": 2765900
    },
    {
      "epoch": 4.5264887440021475,
      "grad_norm": 0.10230668634176254,
      "learning_rate": 9.532626529523006e-07,
      "loss": 0.0067,
      "step": 2765920
    },
    {
      "epoch": 4.5265214744408,
      "grad_norm": 0.8141870498657227,
      "learning_rate": 9.531967607387837e-07,
      "loss": 0.012,
      "step": 2765940
    },
    {
      "epoch": 4.526554204879454,
      "grad_norm": 0.23252327740192413,
      "learning_rate": 9.531308685252665e-07,
      "loss": 0.0063,
      "step": 2765960
    },
    {
      "epoch": 4.526586935318107,
      "grad_norm": 0.3243968188762665,
      "learning_rate": 9.530649763117493e-07,
      "loss": 0.0115,
      "step": 2765980
    },
    {
      "epoch": 4.52661966575676,
      "grad_norm": 0.44836530089378357,
      "learning_rate": 9.529990840982321e-07,
      "loss": 0.0113,
      "step": 2766000
    },
    {
      "epoch": 4.526652396195414,
      "grad_norm": 0.18442288041114807,
      "learning_rate": 9.529331918847151e-07,
      "loss": 0.0095,
      "step": 2766020
    },
    {
      "epoch": 4.526685126634067,
      "grad_norm": 0.13639885187149048,
      "learning_rate": 9.52867299671198e-07,
      "loss": 0.0162,
      "step": 2766040
    },
    {
      "epoch": 4.52671785707272,
      "grad_norm": 0.47961699962615967,
      "learning_rate": 9.528014074576808e-07,
      "loss": 0.0129,
      "step": 2766060
    },
    {
      "epoch": 4.526750587511374,
      "grad_norm": 0.5179714560508728,
      "learning_rate": 9.527355152441637e-07,
      "loss": 0.0071,
      "step": 2766080
    },
    {
      "epoch": 4.526783317950027,
      "grad_norm": 0.044699542224407196,
      "learning_rate": 9.526696230306466e-07,
      "loss": 0.0099,
      "step": 2766100
    },
    {
      "epoch": 4.526816048388681,
      "grad_norm": 0.2994244396686554,
      "learning_rate": 9.526037308171295e-07,
      "loss": 0.0069,
      "step": 2766120
    },
    {
      "epoch": 4.5268487788273335,
      "grad_norm": 0.05987853556871414,
      "learning_rate": 9.525378386036123e-07,
      "loss": 0.0062,
      "step": 2766140
    },
    {
      "epoch": 4.526881509265987,
      "grad_norm": 0.633255660533905,
      "learning_rate": 9.524719463900952e-07,
      "loss": 0.0103,
      "step": 2766160
    },
    {
      "epoch": 4.526914239704641,
      "grad_norm": 0.35742905735969543,
      "learning_rate": 9.52406054176578e-07,
      "loss": 0.0114,
      "step": 2766180
    },
    {
      "epoch": 4.526946970143294,
      "grad_norm": 0.24395157396793365,
      "learning_rate": 9.52340161963061e-07,
      "loss": 0.0137,
      "step": 2766200
    },
    {
      "epoch": 4.526979700581947,
      "grad_norm": 0.16607818007469177,
      "learning_rate": 9.522742697495438e-07,
      "loss": 0.0138,
      "step": 2766220
    },
    {
      "epoch": 4.527012431020601,
      "grad_norm": 0.3725433051586151,
      "learning_rate": 9.522083775360267e-07,
      "loss": 0.0095,
      "step": 2766240
    },
    {
      "epoch": 4.527045161459254,
      "grad_norm": 0.08582376688718796,
      "learning_rate": 9.521424853225095e-07,
      "loss": 0.0089,
      "step": 2766260
    },
    {
      "epoch": 4.527077891897907,
      "grad_norm": 0.128021702170372,
      "learning_rate": 9.520765931089925e-07,
      "loss": 0.0091,
      "step": 2766280
    },
    {
      "epoch": 4.5271106223365605,
      "grad_norm": 0.38207438588142395,
      "learning_rate": 9.520107008954753e-07,
      "loss": 0.0084,
      "step": 2766300
    },
    {
      "epoch": 4.527143352775214,
      "grad_norm": 0.5524125695228577,
      "learning_rate": 9.519448086819581e-07,
      "loss": 0.0121,
      "step": 2766320
    },
    {
      "epoch": 4.527176083213867,
      "grad_norm": 0.2645949423313141,
      "learning_rate": 9.51878916468441e-07,
      "loss": 0.0088,
      "step": 2766340
    },
    {
      "epoch": 4.52720881365252,
      "grad_norm": 0.16174477338790894,
      "learning_rate": 9.518130242549238e-07,
      "loss": 0.0074,
      "step": 2766360
    },
    {
      "epoch": 4.527241544091174,
      "grad_norm": 0.10273681581020355,
      "learning_rate": 9.517471320414068e-07,
      "loss": 0.0085,
      "step": 2766380
    },
    {
      "epoch": 4.527274274529828,
      "grad_norm": 0.36091935634613037,
      "learning_rate": 9.516812398278896e-07,
      "loss": 0.0114,
      "step": 2766400
    },
    {
      "epoch": 4.52730700496848,
      "grad_norm": 0.34452030062675476,
      "learning_rate": 9.516153476143725e-07,
      "loss": 0.0093,
      "step": 2766420
    },
    {
      "epoch": 4.527339735407134,
      "grad_norm": 0.18811403214931488,
      "learning_rate": 9.515494554008553e-07,
      "loss": 0.0071,
      "step": 2766440
    },
    {
      "epoch": 4.5273724658457875,
      "grad_norm": 0.17895646393299103,
      "learning_rate": 9.514835631873383e-07,
      "loss": 0.0116,
      "step": 2766460
    },
    {
      "epoch": 4.527405196284441,
      "grad_norm": 0.1464870274066925,
      "learning_rate": 9.514176709738211e-07,
      "loss": 0.009,
      "step": 2766480
    },
    {
      "epoch": 4.527437926723094,
      "grad_norm": 0.13162995874881744,
      "learning_rate": 9.51351778760304e-07,
      "loss": 0.0061,
      "step": 2766500
    },
    {
      "epoch": 4.527470657161747,
      "grad_norm": 0.27369633316993713,
      "learning_rate": 9.512858865467868e-07,
      "loss": 0.0076,
      "step": 2766520
    },
    {
      "epoch": 4.527503387600401,
      "grad_norm": 0.09231135249137878,
      "learning_rate": 9.512199943332698e-07,
      "loss": 0.0136,
      "step": 2766540
    },
    {
      "epoch": 4.527536118039054,
      "grad_norm": 0.2376706302165985,
      "learning_rate": 9.511541021197526e-07,
      "loss": 0.0095,
      "step": 2766560
    },
    {
      "epoch": 4.527568848477707,
      "grad_norm": 0.14098887145519257,
      "learning_rate": 9.510882099062355e-07,
      "loss": 0.0089,
      "step": 2766580
    },
    {
      "epoch": 4.527601578916361,
      "grad_norm": 0.39521923661231995,
      "learning_rate": 9.510223176927183e-07,
      "loss": 0.0094,
      "step": 2766600
    },
    {
      "epoch": 4.5276343093550135,
      "grad_norm": 0.10169047862291336,
      "learning_rate": 9.509564254792011e-07,
      "loss": 0.0048,
      "step": 2766620
    },
    {
      "epoch": 4.527667039793667,
      "grad_norm": 0.6898884177207947,
      "learning_rate": 9.508905332656841e-07,
      "loss": 0.0078,
      "step": 2766640
    },
    {
      "epoch": 4.527699770232321,
      "grad_norm": 0.08104211837053299,
      "learning_rate": 9.508246410521669e-07,
      "loss": 0.0095,
      "step": 2766660
    },
    {
      "epoch": 4.527732500670974,
      "grad_norm": 0.41075703501701355,
      "learning_rate": 9.507587488386498e-07,
      "loss": 0.0129,
      "step": 2766680
    },
    {
      "epoch": 4.527765231109627,
      "grad_norm": 0.08878044784069061,
      "learning_rate": 9.506928566251326e-07,
      "loss": 0.0089,
      "step": 2766700
    },
    {
      "epoch": 4.527797961548281,
      "grad_norm": 0.11138787120580673,
      "learning_rate": 9.506269644116156e-07,
      "loss": 0.0083,
      "step": 2766720
    },
    {
      "epoch": 4.527830691986934,
      "grad_norm": 0.6389071345329285,
      "learning_rate": 9.505610721980984e-07,
      "loss": 0.0087,
      "step": 2766740
    },
    {
      "epoch": 4.527863422425588,
      "grad_norm": 0.6635768413543701,
      "learning_rate": 9.504951799845813e-07,
      "loss": 0.0126,
      "step": 2766760
    },
    {
      "epoch": 4.5278961528642405,
      "grad_norm": 0.2712951898574829,
      "learning_rate": 9.504292877710641e-07,
      "loss": 0.0058,
      "step": 2766780
    },
    {
      "epoch": 4.527928883302894,
      "grad_norm": 0.32191160321235657,
      "learning_rate": 9.50363395557547e-07,
      "loss": 0.0063,
      "step": 2766800
    },
    {
      "epoch": 4.527961613741548,
      "grad_norm": 0.0853150337934494,
      "learning_rate": 9.502975033440299e-07,
      "loss": 0.0064,
      "step": 2766820
    },
    {
      "epoch": 4.5279943441802,
      "grad_norm": 0.2038855403661728,
      "learning_rate": 9.502316111305128e-07,
      "loss": 0.0113,
      "step": 2766840
    },
    {
      "epoch": 4.528027074618854,
      "grad_norm": 0.18016667664051056,
      "learning_rate": 9.501657189169956e-07,
      "loss": 0.0112,
      "step": 2766860
    },
    {
      "epoch": 4.528059805057508,
      "grad_norm": 0.15870964527130127,
      "learning_rate": 9.500998267034785e-07,
      "loss": 0.007,
      "step": 2766880
    },
    {
      "epoch": 4.52809253549616,
      "grad_norm": 0.26560065150260925,
      "learning_rate": 9.500339344899614e-07,
      "loss": 0.0093,
      "step": 2766900
    },
    {
      "epoch": 4.528125265934814,
      "grad_norm": 0.22941695153713226,
      "learning_rate": 9.499680422764443e-07,
      "loss": 0.0095,
      "step": 2766920
    },
    {
      "epoch": 4.5281579963734675,
      "grad_norm": 0.15210889279842377,
      "learning_rate": 9.499021500629271e-07,
      "loss": 0.0055,
      "step": 2766940
    },
    {
      "epoch": 4.528190726812121,
      "grad_norm": 0.15735599398612976,
      "learning_rate": 9.4983625784941e-07,
      "loss": 0.0087,
      "step": 2766960
    },
    {
      "epoch": 4.528223457250774,
      "grad_norm": 0.17715466022491455,
      "learning_rate": 9.497703656358929e-07,
      "loss": 0.005,
      "step": 2766980
    },
    {
      "epoch": 4.528256187689427,
      "grad_norm": 0.551064670085907,
      "learning_rate": 9.497044734223757e-07,
      "loss": 0.0072,
      "step": 2767000
    },
    {
      "epoch": 4.528288918128081,
      "grad_norm": 0.26862984895706177,
      "learning_rate": 9.496385812088586e-07,
      "loss": 0.0102,
      "step": 2767020
    },
    {
      "epoch": 4.528321648566735,
      "grad_norm": 0.12237753719091415,
      "learning_rate": 9.495726889953414e-07,
      "loss": 0.0057,
      "step": 2767040
    },
    {
      "epoch": 4.528354379005387,
      "grad_norm": 0.3348245322704315,
      "learning_rate": 9.495067967818243e-07,
      "loss": 0.0079,
      "step": 2767060
    },
    {
      "epoch": 4.528387109444041,
      "grad_norm": 0.47700363397598267,
      "learning_rate": 9.494409045683072e-07,
      "loss": 0.0086,
      "step": 2767080
    },
    {
      "epoch": 4.5284198398826945,
      "grad_norm": 0.15146778523921967,
      "learning_rate": 9.493750123547901e-07,
      "loss": 0.0091,
      "step": 2767100
    },
    {
      "epoch": 4.528452570321347,
      "grad_norm": 0.3559490740299225,
      "learning_rate": 9.493091201412729e-07,
      "loss": 0.0092,
      "step": 2767120
    },
    {
      "epoch": 4.528485300760001,
      "grad_norm": 0.38287845253944397,
      "learning_rate": 9.492432279277558e-07,
      "loss": 0.0095,
      "step": 2767140
    },
    {
      "epoch": 4.528518031198654,
      "grad_norm": 0.301432728767395,
      "learning_rate": 9.491773357142388e-07,
      "loss": 0.0086,
      "step": 2767160
    },
    {
      "epoch": 4.528550761637307,
      "grad_norm": 0.1945628821849823,
      "learning_rate": 9.491114435007217e-07,
      "loss": 0.0138,
      "step": 2767180
    },
    {
      "epoch": 4.528583492075961,
      "grad_norm": 0.5348422527313232,
      "learning_rate": 9.490455512872044e-07,
      "loss": 0.0069,
      "step": 2767200
    },
    {
      "epoch": 4.528616222514614,
      "grad_norm": 0.14706680178642273,
      "learning_rate": 9.489796590736873e-07,
      "loss": 0.0081,
      "step": 2767220
    },
    {
      "epoch": 4.528648952953268,
      "grad_norm": 0.16087928414344788,
      "learning_rate": 9.489137668601701e-07,
      "loss": 0.0109,
      "step": 2767240
    },
    {
      "epoch": 4.528681683391921,
      "grad_norm": 0.22215524315834045,
      "learning_rate": 9.488478746466532e-07,
      "loss": 0.0123,
      "step": 2767260
    },
    {
      "epoch": 4.528714413830574,
      "grad_norm": 0.2019631713628769,
      "learning_rate": 9.48781982433136e-07,
      "loss": 0.011,
      "step": 2767280
    },
    {
      "epoch": 4.528747144269228,
      "grad_norm": 0.365604043006897,
      "learning_rate": 9.487160902196189e-07,
      "loss": 0.0099,
      "step": 2767300
    },
    {
      "epoch": 4.5287798747078805,
      "grad_norm": 0.29601818323135376,
      "learning_rate": 9.486501980061016e-07,
      "loss": 0.008,
      "step": 2767320
    },
    {
      "epoch": 4.528812605146534,
      "grad_norm": 0.141038715839386,
      "learning_rate": 9.485843057925847e-07,
      "loss": 0.0088,
      "step": 2767340
    },
    {
      "epoch": 4.528845335585188,
      "grad_norm": 0.15855352580547333,
      "learning_rate": 9.485184135790675e-07,
      "loss": 0.009,
      "step": 2767360
    },
    {
      "epoch": 4.528878066023841,
      "grad_norm": 0.048621222376823425,
      "learning_rate": 9.484525213655503e-07,
      "loss": 0.0089,
      "step": 2767380
    },
    {
      "epoch": 4.528910796462494,
      "grad_norm": 0.14035741984844208,
      "learning_rate": 9.483866291520332e-07,
      "loss": 0.0102,
      "step": 2767400
    },
    {
      "epoch": 4.5289435269011475,
      "grad_norm": 0.3957550525665283,
      "learning_rate": 9.483207369385161e-07,
      "loss": 0.0079,
      "step": 2767420
    },
    {
      "epoch": 4.528976257339801,
      "grad_norm": 0.17473852634429932,
      "learning_rate": 9.48254844724999e-07,
      "loss": 0.0128,
      "step": 2767440
    },
    {
      "epoch": 4.529008987778454,
      "grad_norm": 0.233509823679924,
      "learning_rate": 9.481889525114818e-07,
      "loss": 0.0092,
      "step": 2767460
    },
    {
      "epoch": 4.529041718217107,
      "grad_norm": 0.05213266238570213,
      "learning_rate": 9.481230602979647e-07,
      "loss": 0.0059,
      "step": 2767480
    },
    {
      "epoch": 4.529074448655761,
      "grad_norm": 0.3935294449329376,
      "learning_rate": 9.480571680844475e-07,
      "loss": 0.0085,
      "step": 2767500
    },
    {
      "epoch": 4.529107179094414,
      "grad_norm": 0.32212767004966736,
      "learning_rate": 9.479912758709305e-07,
      "loss": 0.0117,
      "step": 2767520
    },
    {
      "epoch": 4.529139909533067,
      "grad_norm": 0.17720770835876465,
      "learning_rate": 9.479253836574133e-07,
      "loss": 0.0117,
      "step": 2767540
    },
    {
      "epoch": 4.529172639971721,
      "grad_norm": 0.22603698074817657,
      "learning_rate": 9.478594914438962e-07,
      "loss": 0.0111,
      "step": 2767560
    },
    {
      "epoch": 4.5292053704103745,
      "grad_norm": 0.1153513714671135,
      "learning_rate": 9.47793599230379e-07,
      "loss": 0.009,
      "step": 2767580
    },
    {
      "epoch": 4.529238100849027,
      "grad_norm": 0.12366089224815369,
      "learning_rate": 9.47727707016862e-07,
      "loss": 0.0095,
      "step": 2767600
    },
    {
      "epoch": 4.529270831287681,
      "grad_norm": 0.3623489737510681,
      "learning_rate": 9.476618148033448e-07,
      "loss": 0.0087,
      "step": 2767620
    },
    {
      "epoch": 4.529303561726334,
      "grad_norm": 0.10197169333696365,
      "learning_rate": 9.475959225898277e-07,
      "loss": 0.0072,
      "step": 2767640
    },
    {
      "epoch": 4.529336292164988,
      "grad_norm": 0.18134675920009613,
      "learning_rate": 9.475300303763105e-07,
      "loss": 0.0072,
      "step": 2767660
    },
    {
      "epoch": 4.529369022603641,
      "grad_norm": 0.5237841010093689,
      "learning_rate": 9.474641381627933e-07,
      "loss": 0.0106,
      "step": 2767680
    },
    {
      "epoch": 4.529401753042294,
      "grad_norm": 0.08839677274227142,
      "learning_rate": 9.473982459492763e-07,
      "loss": 0.0066,
      "step": 2767700
    },
    {
      "epoch": 4.529434483480948,
      "grad_norm": 0.37420937418937683,
      "learning_rate": 9.473323537357591e-07,
      "loss": 0.01,
      "step": 2767720
    },
    {
      "epoch": 4.529467213919601,
      "grad_norm": 0.5281408429145813,
      "learning_rate": 9.47266461522242e-07,
      "loss": 0.0083,
      "step": 2767740
    },
    {
      "epoch": 4.529499944358254,
      "grad_norm": 0.1875302940607071,
      "learning_rate": 9.472005693087248e-07,
      "loss": 0.0063,
      "step": 2767760
    },
    {
      "epoch": 4.529532674796908,
      "grad_norm": 0.3319566547870636,
      "learning_rate": 9.471346770952078e-07,
      "loss": 0.0105,
      "step": 2767780
    },
    {
      "epoch": 4.5295654052355605,
      "grad_norm": 0.1515091508626938,
      "learning_rate": 9.470687848816906e-07,
      "loss": 0.0074,
      "step": 2767800
    },
    {
      "epoch": 4.529598135674214,
      "grad_norm": 0.07899294793605804,
      "learning_rate": 9.470028926681735e-07,
      "loss": 0.0102,
      "step": 2767820
    },
    {
      "epoch": 4.529630866112868,
      "grad_norm": 0.10084892064332962,
      "learning_rate": 9.469370004546563e-07,
      "loss": 0.0083,
      "step": 2767840
    },
    {
      "epoch": 4.529663596551521,
      "grad_norm": 0.39271649718284607,
      "learning_rate": 9.468711082411393e-07,
      "loss": 0.0088,
      "step": 2767860
    },
    {
      "epoch": 4.529696326990174,
      "grad_norm": 0.07347025722265244,
      "learning_rate": 9.468052160276221e-07,
      "loss": 0.006,
      "step": 2767880
    },
    {
      "epoch": 4.529729057428828,
      "grad_norm": 0.2329302877187729,
      "learning_rate": 9.46739323814105e-07,
      "loss": 0.0098,
      "step": 2767900
    },
    {
      "epoch": 4.529761787867481,
      "grad_norm": 0.23778562247753143,
      "learning_rate": 9.466734316005878e-07,
      "loss": 0.0058,
      "step": 2767920
    },
    {
      "epoch": 4.529794518306135,
      "grad_norm": 0.1910969316959381,
      "learning_rate": 9.466075393870707e-07,
      "loss": 0.0058,
      "step": 2767940
    },
    {
      "epoch": 4.5298272487447875,
      "grad_norm": 0.23473405838012695,
      "learning_rate": 9.465416471735536e-07,
      "loss": 0.0129,
      "step": 2767960
    },
    {
      "epoch": 4.529859979183441,
      "grad_norm": 0.27648651599884033,
      "learning_rate": 9.464757549600365e-07,
      "loss": 0.0075,
      "step": 2767980
    },
    {
      "epoch": 4.529892709622095,
      "grad_norm": 0.2250983715057373,
      "learning_rate": 9.464098627465193e-07,
      "loss": 0.0089,
      "step": 2768000
    },
    {
      "epoch": 4.529925440060747,
      "grad_norm": 0.18193191289901733,
      "learning_rate": 9.463439705330021e-07,
      "loss": 0.0162,
      "step": 2768020
    },
    {
      "epoch": 4.529958170499401,
      "grad_norm": 0.18410730361938477,
      "learning_rate": 9.462780783194851e-07,
      "loss": 0.0103,
      "step": 2768040
    },
    {
      "epoch": 4.529990900938055,
      "grad_norm": 0.20534135401248932,
      "learning_rate": 9.462121861059679e-07,
      "loss": 0.0081,
      "step": 2768060
    },
    {
      "epoch": 4.530023631376707,
      "grad_norm": 0.2010130137205124,
      "learning_rate": 9.461462938924508e-07,
      "loss": 0.011,
      "step": 2768080
    },
    {
      "epoch": 4.530056361815361,
      "grad_norm": 0.2543880343437195,
      "learning_rate": 9.460804016789336e-07,
      "loss": 0.0092,
      "step": 2768100
    },
    {
      "epoch": 4.5300890922540145,
      "grad_norm": 0.314878910779953,
      "learning_rate": 9.460145094654165e-07,
      "loss": 0.0131,
      "step": 2768120
    },
    {
      "epoch": 4.530121822692668,
      "grad_norm": 0.28649380803108215,
      "learning_rate": 9.459486172518994e-07,
      "loss": 0.0057,
      "step": 2768140
    },
    {
      "epoch": 4.530154553131321,
      "grad_norm": 0.06611499190330505,
      "learning_rate": 9.458827250383823e-07,
      "loss": 0.0056,
      "step": 2768160
    },
    {
      "epoch": 4.530187283569974,
      "grad_norm": 0.1199335902929306,
      "learning_rate": 9.458168328248651e-07,
      "loss": 0.0105,
      "step": 2768180
    },
    {
      "epoch": 4.530220014008628,
      "grad_norm": 0.1674925833940506,
      "learning_rate": 9.45750940611348e-07,
      "loss": 0.0068,
      "step": 2768200
    },
    {
      "epoch": 4.5302527444472815,
      "grad_norm": 0.2820975184440613,
      "learning_rate": 9.456850483978309e-07,
      "loss": 0.0072,
      "step": 2768220
    },
    {
      "epoch": 4.530285474885934,
      "grad_norm": 0.2595824599266052,
      "learning_rate": 9.456191561843138e-07,
      "loss": 0.008,
      "step": 2768240
    },
    {
      "epoch": 4.530318205324588,
      "grad_norm": 0.34707877039909363,
      "learning_rate": 9.455532639707966e-07,
      "loss": 0.0085,
      "step": 2768260
    },
    {
      "epoch": 4.530350935763241,
      "grad_norm": 0.18628278374671936,
      "learning_rate": 9.454873717572795e-07,
      "loss": 0.0096,
      "step": 2768280
    },
    {
      "epoch": 4.530383666201894,
      "grad_norm": 0.2093835324048996,
      "learning_rate": 9.454214795437624e-07,
      "loss": 0.0105,
      "step": 2768300
    },
    {
      "epoch": 4.530416396640548,
      "grad_norm": 0.18572810292243958,
      "learning_rate": 9.453555873302453e-07,
      "loss": 0.0072,
      "step": 2768320
    },
    {
      "epoch": 4.530449127079201,
      "grad_norm": 0.11790421605110168,
      "learning_rate": 9.452896951167281e-07,
      "loss": 0.008,
      "step": 2768340
    },
    {
      "epoch": 4.530481857517854,
      "grad_norm": 0.29574644565582275,
      "learning_rate": 9.452238029032109e-07,
      "loss": 0.0123,
      "step": 2768360
    },
    {
      "epoch": 4.530514587956508,
      "grad_norm": 0.34711411595344543,
      "learning_rate": 9.451579106896938e-07,
      "loss": 0.0087,
      "step": 2768380
    },
    {
      "epoch": 4.530547318395161,
      "grad_norm": 0.49622926115989685,
      "learning_rate": 9.450920184761768e-07,
      "loss": 0.0119,
      "step": 2768400
    },
    {
      "epoch": 4.530580048833815,
      "grad_norm": 0.3493582606315613,
      "learning_rate": 9.450261262626596e-07,
      "loss": 0.0073,
      "step": 2768420
    },
    {
      "epoch": 4.5306127792724675,
      "grad_norm": 0.1247624084353447,
      "learning_rate": 9.449602340491424e-07,
      "loss": 0.0081,
      "step": 2768440
    },
    {
      "epoch": 4.530645509711121,
      "grad_norm": 0.16975019872188568,
      "learning_rate": 9.448943418356253e-07,
      "loss": 0.011,
      "step": 2768460
    },
    {
      "epoch": 4.530678240149775,
      "grad_norm": 0.3097149133682251,
      "learning_rate": 9.448284496221083e-07,
      "loss": 0.0097,
      "step": 2768480
    },
    {
      "epoch": 4.530710970588428,
      "grad_norm": 0.20042684674263,
      "learning_rate": 9.447625574085912e-07,
      "loss": 0.0121,
      "step": 2768500
    },
    {
      "epoch": 4.530743701027081,
      "grad_norm": 0.25087639689445496,
      "learning_rate": 9.446966651950739e-07,
      "loss": 0.0063,
      "step": 2768520
    },
    {
      "epoch": 4.530776431465735,
      "grad_norm": 0.1441127359867096,
      "learning_rate": 9.446307729815568e-07,
      "loss": 0.0115,
      "step": 2768540
    },
    {
      "epoch": 4.530809161904388,
      "grad_norm": 0.5375674962997437,
      "learning_rate": 9.445648807680396e-07,
      "loss": 0.0102,
      "step": 2768560
    },
    {
      "epoch": 4.530841892343041,
      "grad_norm": 0.09597793966531754,
      "learning_rate": 9.444989885545227e-07,
      "loss": 0.0081,
      "step": 2768580
    },
    {
      "epoch": 4.5308746227816945,
      "grad_norm": 0.44815537333488464,
      "learning_rate": 9.444330963410055e-07,
      "loss": 0.0099,
      "step": 2768600
    },
    {
      "epoch": 4.530907353220348,
      "grad_norm": 0.21289211511611938,
      "learning_rate": 9.443672041274884e-07,
      "loss": 0.0155,
      "step": 2768620
    },
    {
      "epoch": 4.530940083659001,
      "grad_norm": 0.1096951961517334,
      "learning_rate": 9.443013119139711e-07,
      "loss": 0.011,
      "step": 2768640
    },
    {
      "epoch": 4.530972814097654,
      "grad_norm": 0.42754191160202026,
      "learning_rate": 9.442354197004542e-07,
      "loss": 0.0087,
      "step": 2768660
    },
    {
      "epoch": 4.531005544536308,
      "grad_norm": 0.153911754488945,
      "learning_rate": 9.44169527486937e-07,
      "loss": 0.0069,
      "step": 2768680
    },
    {
      "epoch": 4.531038274974962,
      "grad_norm": 0.5093327760696411,
      "learning_rate": 9.441036352734198e-07,
      "loss": 0.0104,
      "step": 2768700
    },
    {
      "epoch": 4.531071005413614,
      "grad_norm": 0.20973768830299377,
      "learning_rate": 9.440377430599027e-07,
      "loss": 0.0072,
      "step": 2768720
    },
    {
      "epoch": 4.531103735852268,
      "grad_norm": 0.4212479591369629,
      "learning_rate": 9.439718508463856e-07,
      "loss": 0.0092,
      "step": 2768740
    },
    {
      "epoch": 4.5311364662909215,
      "grad_norm": 0.2193680852651596,
      "learning_rate": 9.439059586328685e-07,
      "loss": 0.0106,
      "step": 2768760
    },
    {
      "epoch": 4.531169196729574,
      "grad_norm": 0.2137192338705063,
      "learning_rate": 9.438400664193513e-07,
      "loss": 0.0084,
      "step": 2768780
    },
    {
      "epoch": 4.531201927168228,
      "grad_norm": 0.16934342682361603,
      "learning_rate": 9.437741742058342e-07,
      "loss": 0.0079,
      "step": 2768800
    },
    {
      "epoch": 4.531234657606881,
      "grad_norm": 0.17857013642787933,
      "learning_rate": 9.43708281992317e-07,
      "loss": 0.0092,
      "step": 2768820
    },
    {
      "epoch": 4.531267388045535,
      "grad_norm": 0.5105690956115723,
      "learning_rate": 9.436423897788e-07,
      "loss": 0.0164,
      "step": 2768840
    },
    {
      "epoch": 4.531300118484188,
      "grad_norm": 0.10390512645244598,
      "learning_rate": 9.435764975652828e-07,
      "loss": 0.0087,
      "step": 2768860
    },
    {
      "epoch": 4.531332848922841,
      "grad_norm": 0.44908615946769714,
      "learning_rate": 9.435106053517657e-07,
      "loss": 0.0111,
      "step": 2768880
    },
    {
      "epoch": 4.531365579361495,
      "grad_norm": 0.08588232845067978,
      "learning_rate": 9.434447131382485e-07,
      "loss": 0.0066,
      "step": 2768900
    },
    {
      "epoch": 4.531398309800148,
      "grad_norm": 0.23186612129211426,
      "learning_rate": 9.433788209247315e-07,
      "loss": 0.0098,
      "step": 2768920
    },
    {
      "epoch": 4.531431040238801,
      "grad_norm": 0.1447322815656662,
      "learning_rate": 9.433129287112143e-07,
      "loss": 0.0119,
      "step": 2768940
    },
    {
      "epoch": 4.531463770677455,
      "grad_norm": 0.2467629462480545,
      "learning_rate": 9.432470364976972e-07,
      "loss": 0.007,
      "step": 2768960
    },
    {
      "epoch": 4.5314965011161075,
      "grad_norm": 0.0747857466340065,
      "learning_rate": 9.4318114428418e-07,
      "loss": 0.0062,
      "step": 2768980
    },
    {
      "epoch": 4.531529231554761,
      "grad_norm": 0.38424795866012573,
      "learning_rate": 9.431152520706628e-07,
      "loss": 0.009,
      "step": 2769000
    },
    {
      "epoch": 4.531561961993415,
      "grad_norm": 0.07187311351299286,
      "learning_rate": 9.430493598571458e-07,
      "loss": 0.0111,
      "step": 2769020
    },
    {
      "epoch": 4.531594692432068,
      "grad_norm": 0.08453285694122314,
      "learning_rate": 9.429834676436286e-07,
      "loss": 0.0072,
      "step": 2769040
    },
    {
      "epoch": 4.531627422870721,
      "grad_norm": 0.6730343699455261,
      "learning_rate": 9.429175754301115e-07,
      "loss": 0.0103,
      "step": 2769060
    },
    {
      "epoch": 4.5316601533093746,
      "grad_norm": 0.2327510267496109,
      "learning_rate": 9.428516832165943e-07,
      "loss": 0.0121,
      "step": 2769080
    },
    {
      "epoch": 4.531692883748028,
      "grad_norm": 0.2061867117881775,
      "learning_rate": 9.427857910030773e-07,
      "loss": 0.0108,
      "step": 2769100
    },
    {
      "epoch": 4.531725614186682,
      "grad_norm": 0.2527298927307129,
      "learning_rate": 9.427198987895601e-07,
      "loss": 0.0073,
      "step": 2769120
    },
    {
      "epoch": 4.5317583446253344,
      "grad_norm": 1.0182712078094482,
      "learning_rate": 9.42654006576043e-07,
      "loss": 0.0093,
      "step": 2769140
    },
    {
      "epoch": 4.531791075063988,
      "grad_norm": 0.321382999420166,
      "learning_rate": 9.425881143625258e-07,
      "loss": 0.0076,
      "step": 2769160
    },
    {
      "epoch": 4.531823805502642,
      "grad_norm": 0.03769989311695099,
      "learning_rate": 9.425222221490088e-07,
      "loss": 0.0078,
      "step": 2769180
    },
    {
      "epoch": 4.531856535941294,
      "grad_norm": 0.24495847523212433,
      "learning_rate": 9.424563299354916e-07,
      "loss": 0.0067,
      "step": 2769200
    },
    {
      "epoch": 4.531889266379948,
      "grad_norm": 0.31540217995643616,
      "learning_rate": 9.423904377219745e-07,
      "loss": 0.0073,
      "step": 2769220
    },
    {
      "epoch": 4.5319219968186015,
      "grad_norm": 0.22278547286987305,
      "learning_rate": 9.423245455084573e-07,
      "loss": 0.0086,
      "step": 2769240
    },
    {
      "epoch": 4.531954727257254,
      "grad_norm": 0.9092972278594971,
      "learning_rate": 9.422586532949402e-07,
      "loss": 0.0163,
      "step": 2769260
    },
    {
      "epoch": 4.531987457695908,
      "grad_norm": 0.14398160576820374,
      "learning_rate": 9.421927610814231e-07,
      "loss": 0.0085,
      "step": 2769280
    },
    {
      "epoch": 4.532020188134561,
      "grad_norm": 0.11182963848114014,
      "learning_rate": 9.42126868867906e-07,
      "loss": 0.0127,
      "step": 2769300
    },
    {
      "epoch": 4.532052918573215,
      "grad_norm": 0.3337860107421875,
      "learning_rate": 9.420609766543888e-07,
      "loss": 0.0082,
      "step": 2769320
    },
    {
      "epoch": 4.532085649011868,
      "grad_norm": 0.1406746655702591,
      "learning_rate": 9.419950844408716e-07,
      "loss": 0.007,
      "step": 2769340
    },
    {
      "epoch": 4.532118379450521,
      "grad_norm": 0.22080184519290924,
      "learning_rate": 9.419291922273546e-07,
      "loss": 0.0097,
      "step": 2769360
    },
    {
      "epoch": 4.532151109889175,
      "grad_norm": 0.17902003228664398,
      "learning_rate": 9.418633000138374e-07,
      "loss": 0.0053,
      "step": 2769380
    },
    {
      "epoch": 4.5321838403278285,
      "grad_norm": 0.3705770671367645,
      "learning_rate": 9.417974078003203e-07,
      "loss": 0.0088,
      "step": 2769400
    },
    {
      "epoch": 4.532216570766481,
      "grad_norm": 0.27654558420181274,
      "learning_rate": 9.417315155868031e-07,
      "loss": 0.0063,
      "step": 2769420
    },
    {
      "epoch": 4.532249301205135,
      "grad_norm": 0.2832413911819458,
      "learning_rate": 9.416656233732861e-07,
      "loss": 0.007,
      "step": 2769440
    },
    {
      "epoch": 4.532282031643788,
      "grad_norm": 0.2181328535079956,
      "learning_rate": 9.415997311597689e-07,
      "loss": 0.0134,
      "step": 2769460
    },
    {
      "epoch": 4.532314762082441,
      "grad_norm": 0.05377146229147911,
      "learning_rate": 9.415338389462518e-07,
      "loss": 0.0107,
      "step": 2769480
    },
    {
      "epoch": 4.532347492521095,
      "grad_norm": 0.5961909294128418,
      "learning_rate": 9.414679467327346e-07,
      "loss": 0.0076,
      "step": 2769500
    },
    {
      "epoch": 4.532380222959748,
      "grad_norm": 0.48659762740135193,
      "learning_rate": 9.414020545192175e-07,
      "loss": 0.0104,
      "step": 2769520
    },
    {
      "epoch": 4.532412953398401,
      "grad_norm": 0.15554557740688324,
      "learning_rate": 9.413361623057004e-07,
      "loss": 0.0088,
      "step": 2769540
    },
    {
      "epoch": 4.532445683837055,
      "grad_norm": 0.9325969815254211,
      "learning_rate": 9.412702700921833e-07,
      "loss": 0.008,
      "step": 2769560
    },
    {
      "epoch": 4.532478414275708,
      "grad_norm": 0.3168345093727112,
      "learning_rate": 9.412043778786661e-07,
      "loss": 0.0104,
      "step": 2769580
    },
    {
      "epoch": 4.532511144714362,
      "grad_norm": 0.14674115180969238,
      "learning_rate": 9.41138485665149e-07,
      "loss": 0.0088,
      "step": 2769600
    },
    {
      "epoch": 4.5325438751530145,
      "grad_norm": 0.26855501532554626,
      "learning_rate": 9.410725934516319e-07,
      "loss": 0.0082,
      "step": 2769620
    },
    {
      "epoch": 4.532576605591668,
      "grad_norm": 0.11958692967891693,
      "learning_rate": 9.410067012381148e-07,
      "loss": 0.0059,
      "step": 2769640
    },
    {
      "epoch": 4.532609336030322,
      "grad_norm": 0.24372515082359314,
      "learning_rate": 9.409408090245976e-07,
      "loss": 0.0081,
      "step": 2769660
    },
    {
      "epoch": 4.532642066468975,
      "grad_norm": 0.14444124698638916,
      "learning_rate": 9.408749168110804e-07,
      "loss": 0.0097,
      "step": 2769680
    },
    {
      "epoch": 4.532674796907628,
      "grad_norm": 0.3876151740550995,
      "learning_rate": 9.408090245975633e-07,
      "loss": 0.0096,
      "step": 2769700
    },
    {
      "epoch": 4.532707527346282,
      "grad_norm": 0.08556906133890152,
      "learning_rate": 9.407431323840464e-07,
      "loss": 0.0116,
      "step": 2769720
    },
    {
      "epoch": 4.532740257784935,
      "grad_norm": 0.14048798382282257,
      "learning_rate": 9.406772401705291e-07,
      "loss": 0.0096,
      "step": 2769740
    },
    {
      "epoch": 4.532772988223588,
      "grad_norm": 0.8695741295814514,
      "learning_rate": 9.406113479570119e-07,
      "loss": 0.0097,
      "step": 2769760
    },
    {
      "epoch": 4.5328057186622415,
      "grad_norm": 0.40504398941993713,
      "learning_rate": 9.405454557434948e-07,
      "loss": 0.0064,
      "step": 2769780
    },
    {
      "epoch": 4.532838449100895,
      "grad_norm": 0.18941879272460938,
      "learning_rate": 9.404795635299778e-07,
      "loss": 0.0083,
      "step": 2769800
    },
    {
      "epoch": 4.532871179539548,
      "grad_norm": 0.6317160725593567,
      "learning_rate": 9.404136713164607e-07,
      "loss": 0.0113,
      "step": 2769820
    },
    {
      "epoch": 4.532903909978201,
      "grad_norm": 0.3525349199771881,
      "learning_rate": 9.403477791029434e-07,
      "loss": 0.0056,
      "step": 2769840
    },
    {
      "epoch": 4.532936640416855,
      "grad_norm": 0.06610780209302902,
      "learning_rate": 9.402818868894263e-07,
      "loss": 0.0062,
      "step": 2769860
    },
    {
      "epoch": 4.5329693708555086,
      "grad_norm": 0.14217855036258698,
      "learning_rate": 9.402159946759093e-07,
      "loss": 0.0081,
      "step": 2769880
    },
    {
      "epoch": 4.533002101294161,
      "grad_norm": 0.8004207015037537,
      "learning_rate": 9.401501024623922e-07,
      "loss": 0.0109,
      "step": 2769900
    },
    {
      "epoch": 4.533034831732815,
      "grad_norm": 0.05518092215061188,
      "learning_rate": 9.40084210248875e-07,
      "loss": 0.0055,
      "step": 2769920
    },
    {
      "epoch": 4.5330675621714684,
      "grad_norm": 0.24515441060066223,
      "learning_rate": 9.400183180353579e-07,
      "loss": 0.0073,
      "step": 2769940
    },
    {
      "epoch": 4.533100292610122,
      "grad_norm": 0.24460455775260925,
      "learning_rate": 9.399524258218406e-07,
      "loss": 0.0101,
      "step": 2769960
    },
    {
      "epoch": 4.533133023048775,
      "grad_norm": 0.15516065061092377,
      "learning_rate": 9.398865336083237e-07,
      "loss": 0.0076,
      "step": 2769980
    },
    {
      "epoch": 4.533165753487428,
      "grad_norm": 0.20922450721263885,
      "learning_rate": 9.398206413948065e-07,
      "loss": 0.007,
      "step": 2770000
    },
    {
      "epoch": 4.533198483926082,
      "grad_norm": 0.17300936579704285,
      "learning_rate": 9.397547491812894e-07,
      "loss": 0.0089,
      "step": 2770020
    },
    {
      "epoch": 4.533231214364735,
      "grad_norm": 0.4214388430118561,
      "learning_rate": 9.396888569677722e-07,
      "loss": 0.0099,
      "step": 2770040
    },
    {
      "epoch": 4.533263944803388,
      "grad_norm": 0.5939456224441528,
      "learning_rate": 9.396229647542551e-07,
      "loss": 0.0095,
      "step": 2770060
    },
    {
      "epoch": 4.533296675242042,
      "grad_norm": 0.5057453513145447,
      "learning_rate": 9.39557072540738e-07,
      "loss": 0.0074,
      "step": 2770080
    },
    {
      "epoch": 4.5333294056806945,
      "grad_norm": 0.20431861281394958,
      "learning_rate": 9.394911803272208e-07,
      "loss": 0.0065,
      "step": 2770100
    },
    {
      "epoch": 4.533362136119348,
      "grad_norm": 0.2447570413351059,
      "learning_rate": 9.394252881137037e-07,
      "loss": 0.0103,
      "step": 2770120
    },
    {
      "epoch": 4.533394866558002,
      "grad_norm": 0.1354321539402008,
      "learning_rate": 9.393593959001865e-07,
      "loss": 0.0064,
      "step": 2770140
    },
    {
      "epoch": 4.533427596996655,
      "grad_norm": 0.22991226613521576,
      "learning_rate": 9.392935036866695e-07,
      "loss": 0.0081,
      "step": 2770160
    },
    {
      "epoch": 4.533460327435308,
      "grad_norm": 0.18160675466060638,
      "learning_rate": 9.392276114731523e-07,
      "loss": 0.0057,
      "step": 2770180
    },
    {
      "epoch": 4.533493057873962,
      "grad_norm": 0.5457542538642883,
      "learning_rate": 9.391617192596352e-07,
      "loss": 0.009,
      "step": 2770200
    },
    {
      "epoch": 4.533525788312615,
      "grad_norm": 0.4812980592250824,
      "learning_rate": 9.39095827046118e-07,
      "loss": 0.0092,
      "step": 2770220
    },
    {
      "epoch": 4.533558518751268,
      "grad_norm": 0.13521786034107208,
      "learning_rate": 9.39029934832601e-07,
      "loss": 0.009,
      "step": 2770240
    },
    {
      "epoch": 4.5335912491899215,
      "grad_norm": 0.6821891069412231,
      "learning_rate": 9.389640426190838e-07,
      "loss": 0.0078,
      "step": 2770260
    },
    {
      "epoch": 4.533623979628575,
      "grad_norm": 0.2815551161766052,
      "learning_rate": 9.388981504055667e-07,
      "loss": 0.0127,
      "step": 2770280
    },
    {
      "epoch": 4.533656710067229,
      "grad_norm": 0.28728872537612915,
      "learning_rate": 9.388322581920495e-07,
      "loss": 0.0076,
      "step": 2770300
    },
    {
      "epoch": 4.533689440505881,
      "grad_norm": 0.19436955451965332,
      "learning_rate": 9.387663659785325e-07,
      "loss": 0.0119,
      "step": 2770320
    },
    {
      "epoch": 4.533722170944535,
      "grad_norm": 0.06968085467815399,
      "learning_rate": 9.387004737650153e-07,
      "loss": 0.0121,
      "step": 2770340
    },
    {
      "epoch": 4.533754901383189,
      "grad_norm": 0.21335098147392273,
      "learning_rate": 9.386345815514982e-07,
      "loss": 0.0085,
      "step": 2770360
    },
    {
      "epoch": 4.533787631821841,
      "grad_norm": 0.16283191740512848,
      "learning_rate": 9.38568689337981e-07,
      "loss": 0.0087,
      "step": 2770380
    },
    {
      "epoch": 4.533820362260495,
      "grad_norm": 0.13296739757061005,
      "learning_rate": 9.385027971244638e-07,
      "loss": 0.0083,
      "step": 2770400
    },
    {
      "epoch": 4.5338530926991485,
      "grad_norm": 0.15272696316242218,
      "learning_rate": 9.384369049109468e-07,
      "loss": 0.0096,
      "step": 2770420
    },
    {
      "epoch": 4.533885823137801,
      "grad_norm": 0.26630452275276184,
      "learning_rate": 9.383710126974296e-07,
      "loss": 0.0093,
      "step": 2770440
    },
    {
      "epoch": 4.533918553576455,
      "grad_norm": 0.42736518383026123,
      "learning_rate": 9.383051204839125e-07,
      "loss": 0.0124,
      "step": 2770460
    },
    {
      "epoch": 4.533951284015108,
      "grad_norm": 0.3374854624271393,
      "learning_rate": 9.382392282703953e-07,
      "loss": 0.0087,
      "step": 2770480
    },
    {
      "epoch": 4.533984014453762,
      "grad_norm": 0.20365838706493378,
      "learning_rate": 9.381733360568783e-07,
      "loss": 0.0068,
      "step": 2770500
    },
    {
      "epoch": 4.534016744892415,
      "grad_norm": 0.7221630811691284,
      "learning_rate": 9.381074438433611e-07,
      "loss": 0.0105,
      "step": 2770520
    },
    {
      "epoch": 4.534049475331068,
      "grad_norm": 0.12352869659662247,
      "learning_rate": 9.38041551629844e-07,
      "loss": 0.006,
      "step": 2770540
    },
    {
      "epoch": 4.534082205769722,
      "grad_norm": 0.2551485598087311,
      "learning_rate": 9.379756594163268e-07,
      "loss": 0.0128,
      "step": 2770560
    },
    {
      "epoch": 4.5341149362083755,
      "grad_norm": 0.15831373631954193,
      "learning_rate": 9.379097672028097e-07,
      "loss": 0.0096,
      "step": 2770580
    },
    {
      "epoch": 4.534147666647028,
      "grad_norm": 0.3558620810508728,
      "learning_rate": 9.378438749892926e-07,
      "loss": 0.0082,
      "step": 2770600
    },
    {
      "epoch": 4.534180397085682,
      "grad_norm": 0.08946449309587479,
      "learning_rate": 9.377779827757755e-07,
      "loss": 0.0097,
      "step": 2770620
    },
    {
      "epoch": 4.534213127524335,
      "grad_norm": 0.3173362612724304,
      "learning_rate": 9.377120905622583e-07,
      "loss": 0.006,
      "step": 2770640
    },
    {
      "epoch": 4.534245857962988,
      "grad_norm": 0.13174070417881012,
      "learning_rate": 9.376461983487412e-07,
      "loss": 0.0071,
      "step": 2770660
    },
    {
      "epoch": 4.534278588401642,
      "grad_norm": 0.14905788004398346,
      "learning_rate": 9.375803061352241e-07,
      "loss": 0.0079,
      "step": 2770680
    },
    {
      "epoch": 4.534311318840295,
      "grad_norm": 0.10434416681528091,
      "learning_rate": 9.37514413921707e-07,
      "loss": 0.0087,
      "step": 2770700
    },
    {
      "epoch": 4.534344049278948,
      "grad_norm": 0.23548750579357147,
      "learning_rate": 9.374485217081898e-07,
      "loss": 0.0082,
      "step": 2770720
    },
    {
      "epoch": 4.534376779717602,
      "grad_norm": 0.1821376234292984,
      "learning_rate": 9.373826294946726e-07,
      "loss": 0.0061,
      "step": 2770740
    },
    {
      "epoch": 4.534409510156255,
      "grad_norm": 0.16647425293922424,
      "learning_rate": 9.373167372811556e-07,
      "loss": 0.0116,
      "step": 2770760
    },
    {
      "epoch": 4.534442240594909,
      "grad_norm": 0.10193588584661484,
      "learning_rate": 9.372508450676384e-07,
      "loss": 0.0073,
      "step": 2770780
    },
    {
      "epoch": 4.5344749710335615,
      "grad_norm": 0.18116018176078796,
      "learning_rate": 9.371849528541213e-07,
      "loss": 0.0092,
      "step": 2770800
    },
    {
      "epoch": 4.534507701472215,
      "grad_norm": 0.08324993401765823,
      "learning_rate": 9.371190606406041e-07,
      "loss": 0.0063,
      "step": 2770820
    },
    {
      "epoch": 4.534540431910869,
      "grad_norm": 0.18721118569374084,
      "learning_rate": 9.37053168427087e-07,
      "loss": 0.009,
      "step": 2770840
    },
    {
      "epoch": 4.534573162349522,
      "grad_norm": 0.2831173539161682,
      "learning_rate": 9.369872762135699e-07,
      "loss": 0.0062,
      "step": 2770860
    },
    {
      "epoch": 4.534605892788175,
      "grad_norm": 0.36778339743614197,
      "learning_rate": 9.369213840000528e-07,
      "loss": 0.007,
      "step": 2770880
    },
    {
      "epoch": 4.5346386232268285,
      "grad_norm": 0.1736099123954773,
      "learning_rate": 9.368554917865356e-07,
      "loss": 0.0115,
      "step": 2770900
    },
    {
      "epoch": 4.534671353665482,
      "grad_norm": 0.16258656978607178,
      "learning_rate": 9.367895995730185e-07,
      "loss": 0.008,
      "step": 2770920
    },
    {
      "epoch": 4.534704084104135,
      "grad_norm": 0.23600395023822784,
      "learning_rate": 9.367237073595014e-07,
      "loss": 0.0058,
      "step": 2770940
    },
    {
      "epoch": 4.534736814542788,
      "grad_norm": 0.2445087432861328,
      "learning_rate": 9.366578151459843e-07,
      "loss": 0.0085,
      "step": 2770960
    },
    {
      "epoch": 4.534769544981442,
      "grad_norm": 0.18278822302818298,
      "learning_rate": 9.365919229324671e-07,
      "loss": 0.0135,
      "step": 2770980
    },
    {
      "epoch": 4.534802275420095,
      "grad_norm": 0.1386926770210266,
      "learning_rate": 9.3652603071895e-07,
      "loss": 0.011,
      "step": 2771000
    },
    {
      "epoch": 4.534835005858748,
      "grad_norm": 0.5459724068641663,
      "learning_rate": 9.364601385054328e-07,
      "loss": 0.0067,
      "step": 2771020
    },
    {
      "epoch": 4.534867736297402,
      "grad_norm": 0.19810734689235687,
      "learning_rate": 9.363942462919159e-07,
      "loss": 0.0074,
      "step": 2771040
    },
    {
      "epoch": 4.5349004667360555,
      "grad_norm": 0.2298058122396469,
      "learning_rate": 9.363283540783986e-07,
      "loss": 0.0078,
      "step": 2771060
    },
    {
      "epoch": 4.534933197174708,
      "grad_norm": 0.28441551327705383,
      "learning_rate": 9.362624618648814e-07,
      "loss": 0.0095,
      "step": 2771080
    },
    {
      "epoch": 4.534965927613362,
      "grad_norm": 0.15425899624824524,
      "learning_rate": 9.361965696513643e-07,
      "loss": 0.0084,
      "step": 2771100
    },
    {
      "epoch": 4.534998658052015,
      "grad_norm": 0.09713822603225708,
      "learning_rate": 9.361306774378473e-07,
      "loss": 0.0128,
      "step": 2771120
    },
    {
      "epoch": 4.535031388490669,
      "grad_norm": 0.19572384655475616,
      "learning_rate": 9.360647852243302e-07,
      "loss": 0.005,
      "step": 2771140
    },
    {
      "epoch": 4.535064118929322,
      "grad_norm": 0.2726752758026123,
      "learning_rate": 9.359988930108129e-07,
      "loss": 0.0093,
      "step": 2771160
    },
    {
      "epoch": 4.535096849367975,
      "grad_norm": 0.37813282012939453,
      "learning_rate": 9.359330007972958e-07,
      "loss": 0.0096,
      "step": 2771180
    },
    {
      "epoch": 4.535129579806629,
      "grad_norm": 0.48176807165145874,
      "learning_rate": 9.358671085837788e-07,
      "loss": 0.007,
      "step": 2771200
    },
    {
      "epoch": 4.535162310245282,
      "grad_norm": 0.1145629957318306,
      "learning_rate": 9.358012163702617e-07,
      "loss": 0.0048,
      "step": 2771220
    },
    {
      "epoch": 4.535195040683935,
      "grad_norm": 0.20046985149383545,
      "learning_rate": 9.357353241567445e-07,
      "loss": 0.0073,
      "step": 2771240
    },
    {
      "epoch": 4.535227771122589,
      "grad_norm": 0.1458316296339035,
      "learning_rate": 9.356694319432274e-07,
      "loss": 0.0091,
      "step": 2771260
    },
    {
      "epoch": 4.5352605015612415,
      "grad_norm": 0.7431470155715942,
      "learning_rate": 9.356035397297101e-07,
      "loss": 0.0065,
      "step": 2771280
    },
    {
      "epoch": 4.535293231999895,
      "grad_norm": 0.16488218307495117,
      "learning_rate": 9.355376475161932e-07,
      "loss": 0.0092,
      "step": 2771300
    },
    {
      "epoch": 4.535325962438549,
      "grad_norm": 0.20353366434574127,
      "learning_rate": 9.35471755302676e-07,
      "loss": 0.0093,
      "step": 2771320
    },
    {
      "epoch": 4.535358692877202,
      "grad_norm": 0.4039164185523987,
      "learning_rate": 9.354058630891589e-07,
      "loss": 0.0102,
      "step": 2771340
    },
    {
      "epoch": 4.535391423315855,
      "grad_norm": 0.12911738455295563,
      "learning_rate": 9.353399708756417e-07,
      "loss": 0.0115,
      "step": 2771360
    },
    {
      "epoch": 4.535424153754509,
      "grad_norm": 0.33119767904281616,
      "learning_rate": 9.352740786621247e-07,
      "loss": 0.0135,
      "step": 2771380
    },
    {
      "epoch": 4.535456884193162,
      "grad_norm": 0.16108937561511993,
      "learning_rate": 9.352081864486075e-07,
      "loss": 0.0082,
      "step": 2771400
    },
    {
      "epoch": 4.535489614631816,
      "grad_norm": 0.4254303276538849,
      "learning_rate": 9.351422942350903e-07,
      "loss": 0.0113,
      "step": 2771420
    },
    {
      "epoch": 4.5355223450704685,
      "grad_norm": 0.10766750574111938,
      "learning_rate": 9.350764020215732e-07,
      "loss": 0.0068,
      "step": 2771440
    },
    {
      "epoch": 4.535555075509122,
      "grad_norm": 0.09641966968774796,
      "learning_rate": 9.35010509808056e-07,
      "loss": 0.0084,
      "step": 2771460
    },
    {
      "epoch": 4.535587805947776,
      "grad_norm": 0.6367719173431396,
      "learning_rate": 9.34944617594539e-07,
      "loss": 0.0095,
      "step": 2771480
    },
    {
      "epoch": 4.535620536386428,
      "grad_norm": 0.3123980462551117,
      "learning_rate": 9.348787253810218e-07,
      "loss": 0.0106,
      "step": 2771500
    },
    {
      "epoch": 4.535653266825082,
      "grad_norm": 0.09898693114519119,
      "learning_rate": 9.348128331675047e-07,
      "loss": 0.0081,
      "step": 2771520
    },
    {
      "epoch": 4.535685997263736,
      "grad_norm": 0.07749482244253159,
      "learning_rate": 9.347469409539875e-07,
      "loss": 0.0088,
      "step": 2771540
    },
    {
      "epoch": 4.535718727702388,
      "grad_norm": 0.15678590536117554,
      "learning_rate": 9.346810487404705e-07,
      "loss": 0.0078,
      "step": 2771560
    },
    {
      "epoch": 4.535751458141042,
      "grad_norm": 0.39862483739852905,
      "learning_rate": 9.346151565269533e-07,
      "loss": 0.0105,
      "step": 2771580
    },
    {
      "epoch": 4.5357841885796955,
      "grad_norm": 0.24176402390003204,
      "learning_rate": 9.345492643134362e-07,
      "loss": 0.0117,
      "step": 2771600
    },
    {
      "epoch": 4.535816919018349,
      "grad_norm": 0.09489624947309494,
      "learning_rate": 9.34483372099919e-07,
      "loss": 0.0107,
      "step": 2771620
    },
    {
      "epoch": 4.535849649457002,
      "grad_norm": 0.08038410544395447,
      "learning_rate": 9.34417479886402e-07,
      "loss": 0.0095,
      "step": 2771640
    },
    {
      "epoch": 4.535882379895655,
      "grad_norm": 0.11385875940322876,
      "learning_rate": 9.343515876728848e-07,
      "loss": 0.0131,
      "step": 2771660
    },
    {
      "epoch": 4.535915110334309,
      "grad_norm": 0.27222713828086853,
      "learning_rate": 9.342856954593677e-07,
      "loss": 0.0059,
      "step": 2771680
    },
    {
      "epoch": 4.5359478407729625,
      "grad_norm": 0.11569041758775711,
      "learning_rate": 9.342198032458505e-07,
      "loss": 0.0106,
      "step": 2771700
    },
    {
      "epoch": 4.535980571211615,
      "grad_norm": 0.1362934559583664,
      "learning_rate": 9.341539110323333e-07,
      "loss": 0.0063,
      "step": 2771720
    },
    {
      "epoch": 4.536013301650269,
      "grad_norm": 0.10054583102464676,
      "learning_rate": 9.340880188188163e-07,
      "loss": 0.0126,
      "step": 2771740
    },
    {
      "epoch": 4.536046032088922,
      "grad_norm": 0.11580352485179901,
      "learning_rate": 9.340221266052991e-07,
      "loss": 0.0094,
      "step": 2771760
    },
    {
      "epoch": 4.536078762527575,
      "grad_norm": 0.17298266291618347,
      "learning_rate": 9.33956234391782e-07,
      "loss": 0.009,
      "step": 2771780
    },
    {
      "epoch": 4.536111492966229,
      "grad_norm": 0.3160451650619507,
      "learning_rate": 9.338903421782648e-07,
      "loss": 0.0061,
      "step": 2771800
    },
    {
      "epoch": 4.536144223404882,
      "grad_norm": 0.26570218801498413,
      "learning_rate": 9.338244499647478e-07,
      "loss": 0.0092,
      "step": 2771820
    },
    {
      "epoch": 4.536176953843535,
      "grad_norm": 0.1959438920021057,
      "learning_rate": 9.337585577512306e-07,
      "loss": 0.0083,
      "step": 2771840
    },
    {
      "epoch": 4.536209684282189,
      "grad_norm": 0.06685253977775574,
      "learning_rate": 9.336926655377135e-07,
      "loss": 0.0096,
      "step": 2771860
    },
    {
      "epoch": 4.536242414720842,
      "grad_norm": 0.38376384973526,
      "learning_rate": 9.336267733241963e-07,
      "loss": 0.0083,
      "step": 2771880
    },
    {
      "epoch": 4.536275145159496,
      "grad_norm": 0.11680275946855545,
      "learning_rate": 9.335608811106792e-07,
      "loss": 0.0116,
      "step": 2771900
    },
    {
      "epoch": 4.5363078755981485,
      "grad_norm": 0.13661009073257446,
      "learning_rate": 9.334949888971621e-07,
      "loss": 0.0091,
      "step": 2771920
    },
    {
      "epoch": 4.536340606036802,
      "grad_norm": 0.334206223487854,
      "learning_rate": 9.33429096683645e-07,
      "loss": 0.0095,
      "step": 2771940
    },
    {
      "epoch": 4.536373336475456,
      "grad_norm": 0.20586563646793365,
      "learning_rate": 9.333632044701278e-07,
      "loss": 0.011,
      "step": 2771960
    },
    {
      "epoch": 4.536406066914108,
      "grad_norm": 0.6116489171981812,
      "learning_rate": 9.332973122566107e-07,
      "loss": 0.0082,
      "step": 2771980
    },
    {
      "epoch": 4.536438797352762,
      "grad_norm": 0.5176841020584106,
      "learning_rate": 9.332314200430936e-07,
      "loss": 0.0088,
      "step": 2772000
    },
    {
      "epoch": 4.536471527791416,
      "grad_norm": 0.8157743215560913,
      "learning_rate": 9.331655278295765e-07,
      "loss": 0.0115,
      "step": 2772020
    },
    {
      "epoch": 4.536504258230069,
      "grad_norm": 0.1748114377260208,
      "learning_rate": 9.330996356160593e-07,
      "loss": 0.0114,
      "step": 2772040
    },
    {
      "epoch": 4.536536988668722,
      "grad_norm": 0.20271505415439606,
      "learning_rate": 9.330337434025421e-07,
      "loss": 0.0106,
      "step": 2772060
    },
    {
      "epoch": 4.5365697191073755,
      "grad_norm": 0.16351686418056488,
      "learning_rate": 9.329678511890251e-07,
      "loss": 0.0085,
      "step": 2772080
    },
    {
      "epoch": 4.536602449546029,
      "grad_norm": 0.2778760492801666,
      "learning_rate": 9.329019589755079e-07,
      "loss": 0.0099,
      "step": 2772100
    },
    {
      "epoch": 4.536635179984682,
      "grad_norm": 0.08414915949106216,
      "learning_rate": 9.328360667619908e-07,
      "loss": 0.0046,
      "step": 2772120
    },
    {
      "epoch": 4.536667910423335,
      "grad_norm": 0.07060065120458603,
      "learning_rate": 9.327701745484736e-07,
      "loss": 0.0067,
      "step": 2772140
    },
    {
      "epoch": 4.536700640861989,
      "grad_norm": 0.09667432308197021,
      "learning_rate": 9.327042823349565e-07,
      "loss": 0.0085,
      "step": 2772160
    },
    {
      "epoch": 4.536733371300642,
      "grad_norm": 0.21073311567306519,
      "learning_rate": 9.326383901214394e-07,
      "loss": 0.0112,
      "step": 2772180
    },
    {
      "epoch": 4.536766101739295,
      "grad_norm": 0.3250685930252075,
      "learning_rate": 9.325724979079223e-07,
      "loss": 0.0083,
      "step": 2772200
    },
    {
      "epoch": 4.536798832177949,
      "grad_norm": 0.24854223430156708,
      "learning_rate": 9.325066056944051e-07,
      "loss": 0.0086,
      "step": 2772220
    },
    {
      "epoch": 4.5368315626166025,
      "grad_norm": 0.06613104045391083,
      "learning_rate": 9.32440713480888e-07,
      "loss": 0.0097,
      "step": 2772240
    },
    {
      "epoch": 4.536864293055255,
      "grad_norm": 0.4776412844657898,
      "learning_rate": 9.323748212673709e-07,
      "loss": 0.0112,
      "step": 2772260
    },
    {
      "epoch": 4.536897023493909,
      "grad_norm": 0.4227040708065033,
      "learning_rate": 9.323089290538538e-07,
      "loss": 0.0098,
      "step": 2772280
    },
    {
      "epoch": 4.536929753932562,
      "grad_norm": 0.8466404676437378,
      "learning_rate": 9.322430368403366e-07,
      "loss": 0.0084,
      "step": 2772300
    },
    {
      "epoch": 4.536962484371216,
      "grad_norm": 0.11931319534778595,
      "learning_rate": 9.321771446268195e-07,
      "loss": 0.0098,
      "step": 2772320
    },
    {
      "epoch": 4.536995214809869,
      "grad_norm": 0.1996062994003296,
      "learning_rate": 9.321112524133023e-07,
      "loss": 0.0081,
      "step": 2772340
    },
    {
      "epoch": 4.537027945248522,
      "grad_norm": 0.1456909477710724,
      "learning_rate": 9.320453601997854e-07,
      "loss": 0.007,
      "step": 2772360
    },
    {
      "epoch": 4.537060675687176,
      "grad_norm": 0.749937117099762,
      "learning_rate": 9.319794679862681e-07,
      "loss": 0.0112,
      "step": 2772380
    },
    {
      "epoch": 4.537093406125829,
      "grad_norm": 0.0917230173945427,
      "learning_rate": 9.319135757727509e-07,
      "loss": 0.0061,
      "step": 2772400
    },
    {
      "epoch": 4.537126136564482,
      "grad_norm": 0.21918176114559174,
      "learning_rate": 9.318476835592338e-07,
      "loss": 0.0128,
      "step": 2772420
    },
    {
      "epoch": 4.537158867003136,
      "grad_norm": 0.34298354387283325,
      "learning_rate": 9.317817913457168e-07,
      "loss": 0.0112,
      "step": 2772440
    },
    {
      "epoch": 4.5371915974417885,
      "grad_norm": 0.11011755466461182,
      "learning_rate": 9.317158991321997e-07,
      "loss": 0.0098,
      "step": 2772460
    },
    {
      "epoch": 4.537224327880442,
      "grad_norm": 0.15057511627674103,
      "learning_rate": 9.316500069186824e-07,
      "loss": 0.0124,
      "step": 2772480
    },
    {
      "epoch": 4.537257058319096,
      "grad_norm": 0.15323282778263092,
      "learning_rate": 9.315841147051653e-07,
      "loss": 0.0121,
      "step": 2772500
    },
    {
      "epoch": 4.537289788757749,
      "grad_norm": 0.3711795508861542,
      "learning_rate": 9.315182224916483e-07,
      "loss": 0.0076,
      "step": 2772520
    },
    {
      "epoch": 4.537322519196402,
      "grad_norm": 0.2750523090362549,
      "learning_rate": 9.314523302781312e-07,
      "loss": 0.0091,
      "step": 2772540
    },
    {
      "epoch": 4.5373552496350555,
      "grad_norm": 0.07657910883426666,
      "learning_rate": 9.31386438064614e-07,
      "loss": 0.0086,
      "step": 2772560
    },
    {
      "epoch": 4.537387980073709,
      "grad_norm": 0.21349750459194183,
      "learning_rate": 9.313205458510969e-07,
      "loss": 0.0064,
      "step": 2772580
    },
    {
      "epoch": 4.537420710512363,
      "grad_norm": 0.5586839318275452,
      "learning_rate": 9.312546536375796e-07,
      "loss": 0.007,
      "step": 2772600
    },
    {
      "epoch": 4.537453440951015,
      "grad_norm": 0.3084239065647125,
      "learning_rate": 9.311887614240627e-07,
      "loss": 0.0084,
      "step": 2772620
    },
    {
      "epoch": 4.537486171389669,
      "grad_norm": 0.20645776391029358,
      "learning_rate": 9.311228692105455e-07,
      "loss": 0.009,
      "step": 2772640
    },
    {
      "epoch": 4.537518901828323,
      "grad_norm": 0.08606580644845963,
      "learning_rate": 9.310569769970284e-07,
      "loss": 0.0163,
      "step": 2772660
    },
    {
      "epoch": 4.537551632266975,
      "grad_norm": 0.3532640337944031,
      "learning_rate": 9.309910847835112e-07,
      "loss": 0.0096,
      "step": 2772680
    },
    {
      "epoch": 4.537584362705629,
      "grad_norm": 0.2130102515220642,
      "learning_rate": 9.309251925699942e-07,
      "loss": 0.0093,
      "step": 2772700
    },
    {
      "epoch": 4.5376170931442825,
      "grad_norm": 1.1042627096176147,
      "learning_rate": 9.30859300356477e-07,
      "loss": 0.0162,
      "step": 2772720
    },
    {
      "epoch": 4.537649823582935,
      "grad_norm": 0.20683355629444122,
      "learning_rate": 9.307934081429599e-07,
      "loss": 0.0111,
      "step": 2772740
    },
    {
      "epoch": 4.537682554021589,
      "grad_norm": 0.1409408152103424,
      "learning_rate": 9.307275159294427e-07,
      "loss": 0.0076,
      "step": 2772760
    },
    {
      "epoch": 4.537715284460242,
      "grad_norm": 0.3129924237728119,
      "learning_rate": 9.306616237159255e-07,
      "loss": 0.0116,
      "step": 2772780
    },
    {
      "epoch": 4.537748014898896,
      "grad_norm": 0.18059970438480377,
      "learning_rate": 9.305957315024085e-07,
      "loss": 0.007,
      "step": 2772800
    },
    {
      "epoch": 4.537780745337549,
      "grad_norm": 0.1434175968170166,
      "learning_rate": 9.305298392888913e-07,
      "loss": 0.0081,
      "step": 2772820
    },
    {
      "epoch": 4.537813475776202,
      "grad_norm": 0.1751699000597,
      "learning_rate": 9.304639470753742e-07,
      "loss": 0.0101,
      "step": 2772840
    },
    {
      "epoch": 4.537846206214856,
      "grad_norm": 0.046160344034433365,
      "learning_rate": 9.30398054861857e-07,
      "loss": 0.0086,
      "step": 2772860
    },
    {
      "epoch": 4.5378789366535095,
      "grad_norm": 0.6039935946464539,
      "learning_rate": 9.3033216264834e-07,
      "loss": 0.0096,
      "step": 2772880
    },
    {
      "epoch": 4.537911667092162,
      "grad_norm": 0.14737765491008759,
      "learning_rate": 9.302662704348228e-07,
      "loss": 0.009,
      "step": 2772900
    },
    {
      "epoch": 4.537944397530816,
      "grad_norm": 0.38020771741867065,
      "learning_rate": 9.302003782213057e-07,
      "loss": 0.0075,
      "step": 2772920
    },
    {
      "epoch": 4.537977127969469,
      "grad_norm": 0.218405082821846,
      "learning_rate": 9.301344860077885e-07,
      "loss": 0.0059,
      "step": 2772940
    },
    {
      "epoch": 4.538009858408122,
      "grad_norm": 0.16220176219940186,
      "learning_rate": 9.300685937942715e-07,
      "loss": 0.0084,
      "step": 2772960
    },
    {
      "epoch": 4.538042588846776,
      "grad_norm": 0.1494297832250595,
      "learning_rate": 9.300027015807543e-07,
      "loss": 0.011,
      "step": 2772980
    },
    {
      "epoch": 4.538075319285429,
      "grad_norm": 0.30806201696395874,
      "learning_rate": 9.299368093672372e-07,
      "loss": 0.0074,
      "step": 2773000
    },
    {
      "epoch": 4.538108049724082,
      "grad_norm": 0.1085597351193428,
      "learning_rate": 9.2987091715372e-07,
      "loss": 0.0078,
      "step": 2773020
    },
    {
      "epoch": 4.538140780162736,
      "grad_norm": 0.5104992389678955,
      "learning_rate": 9.298050249402029e-07,
      "loss": 0.0088,
      "step": 2773040
    },
    {
      "epoch": 4.538173510601389,
      "grad_norm": 0.2348262518644333,
      "learning_rate": 9.297391327266858e-07,
      "loss": 0.0124,
      "step": 2773060
    },
    {
      "epoch": 4.538206241040043,
      "grad_norm": 0.06081469729542732,
      "learning_rate": 9.296732405131687e-07,
      "loss": 0.0072,
      "step": 2773080
    },
    {
      "epoch": 4.5382389714786955,
      "grad_norm": 0.15274392068386078,
      "learning_rate": 9.296073482996515e-07,
      "loss": 0.0067,
      "step": 2773100
    },
    {
      "epoch": 4.538271701917349,
      "grad_norm": 0.19341078400611877,
      "learning_rate": 9.295414560861343e-07,
      "loss": 0.0088,
      "step": 2773120
    },
    {
      "epoch": 4.538304432356003,
      "grad_norm": 0.10216710716485977,
      "learning_rate": 9.294755638726173e-07,
      "loss": 0.0137,
      "step": 2773140
    },
    {
      "epoch": 4.538337162794656,
      "grad_norm": 0.17214220762252808,
      "learning_rate": 9.294096716591001e-07,
      "loss": 0.0069,
      "step": 2773160
    },
    {
      "epoch": 4.538369893233309,
      "grad_norm": 0.23431730270385742,
      "learning_rate": 9.29343779445583e-07,
      "loss": 0.0101,
      "step": 2773180
    },
    {
      "epoch": 4.538402623671963,
      "grad_norm": 0.03352062776684761,
      "learning_rate": 9.292778872320658e-07,
      "loss": 0.008,
      "step": 2773200
    },
    {
      "epoch": 4.538435354110616,
      "grad_norm": 0.5616298317909241,
      "learning_rate": 9.292119950185487e-07,
      "loss": 0.0074,
      "step": 2773220
    },
    {
      "epoch": 4.538468084549269,
      "grad_norm": 0.1765642911195755,
      "learning_rate": 9.291461028050316e-07,
      "loss": 0.0057,
      "step": 2773240
    },
    {
      "epoch": 4.5385008149879225,
      "grad_norm": 0.3087719678878784,
      "learning_rate": 9.290802105915145e-07,
      "loss": 0.0081,
      "step": 2773260
    },
    {
      "epoch": 4.538533545426576,
      "grad_norm": 0.3425047993659973,
      "learning_rate": 9.290143183779973e-07,
      "loss": 0.0062,
      "step": 2773280
    },
    {
      "epoch": 4.538566275865229,
      "grad_norm": 0.35420969128608704,
      "learning_rate": 9.289484261644802e-07,
      "loss": 0.0068,
      "step": 2773300
    },
    {
      "epoch": 4.538599006303882,
      "grad_norm": 0.07552214711904526,
      "learning_rate": 9.288825339509631e-07,
      "loss": 0.0079,
      "step": 2773320
    },
    {
      "epoch": 4.538631736742536,
      "grad_norm": 0.10890128463506699,
      "learning_rate": 9.28816641737446e-07,
      "loss": 0.0072,
      "step": 2773340
    },
    {
      "epoch": 4.5386644671811895,
      "grad_norm": 0.3051174581050873,
      "learning_rate": 9.287507495239288e-07,
      "loss": 0.0096,
      "step": 2773360
    },
    {
      "epoch": 4.538697197619842,
      "grad_norm": 0.09749256074428558,
      "learning_rate": 9.286848573104117e-07,
      "loss": 0.0083,
      "step": 2773380
    },
    {
      "epoch": 4.538729928058496,
      "grad_norm": 0.2460186928510666,
      "learning_rate": 9.286189650968946e-07,
      "loss": 0.0183,
      "step": 2773400
    },
    {
      "epoch": 4.538762658497149,
      "grad_norm": 0.13920822739601135,
      "learning_rate": 9.285530728833774e-07,
      "loss": 0.0104,
      "step": 2773420
    },
    {
      "epoch": 4.538795388935802,
      "grad_norm": 0.21647529304027557,
      "learning_rate": 9.284871806698603e-07,
      "loss": 0.0123,
      "step": 2773440
    },
    {
      "epoch": 4.538828119374456,
      "grad_norm": 0.21788997948169708,
      "learning_rate": 9.284212884563431e-07,
      "loss": 0.0124,
      "step": 2773460
    },
    {
      "epoch": 4.538860849813109,
      "grad_norm": 0.16592249274253845,
      "learning_rate": 9.28355396242826e-07,
      "loss": 0.0106,
      "step": 2773480
    },
    {
      "epoch": 4.538893580251763,
      "grad_norm": 1.4470570087432861,
      "learning_rate": 9.282895040293089e-07,
      "loss": 0.0106,
      "step": 2773500
    },
    {
      "epoch": 4.538926310690416,
      "grad_norm": 0.6928408741950989,
      "learning_rate": 9.282236118157918e-07,
      "loss": 0.0077,
      "step": 2773520
    },
    {
      "epoch": 4.538959041129069,
      "grad_norm": 0.2547268569469452,
      "learning_rate": 9.281577196022746e-07,
      "loss": 0.0077,
      "step": 2773540
    },
    {
      "epoch": 4.538991771567723,
      "grad_norm": 0.12525545060634613,
      "learning_rate": 9.280918273887575e-07,
      "loss": 0.0124,
      "step": 2773560
    },
    {
      "epoch": 4.5390245020063755,
      "grad_norm": 0.3474106788635254,
      "learning_rate": 9.280259351752404e-07,
      "loss": 0.0117,
      "step": 2773580
    },
    {
      "epoch": 4.539057232445029,
      "grad_norm": 0.21904043853282928,
      "learning_rate": 9.279600429617233e-07,
      "loss": 0.0067,
      "step": 2773600
    },
    {
      "epoch": 4.539089962883683,
      "grad_norm": 0.13492825627326965,
      "learning_rate": 9.278941507482061e-07,
      "loss": 0.0099,
      "step": 2773620
    },
    {
      "epoch": 4.539122693322335,
      "grad_norm": 0.322417289018631,
      "learning_rate": 9.27828258534689e-07,
      "loss": 0.009,
      "step": 2773640
    },
    {
      "epoch": 4.539155423760989,
      "grad_norm": 0.22545431554317474,
      "learning_rate": 9.277623663211718e-07,
      "loss": 0.0109,
      "step": 2773660
    },
    {
      "epoch": 4.539188154199643,
      "grad_norm": 0.17666508257389069,
      "learning_rate": 9.276964741076549e-07,
      "loss": 0.0089,
      "step": 2773680
    },
    {
      "epoch": 4.539220884638296,
      "grad_norm": 0.3668561577796936,
      "learning_rate": 9.276305818941376e-07,
      "loss": 0.0086,
      "step": 2773700
    },
    {
      "epoch": 4.539253615076949,
      "grad_norm": 0.13250049948692322,
      "learning_rate": 9.275646896806205e-07,
      "loss": 0.01,
      "step": 2773720
    },
    {
      "epoch": 4.5392863455156025,
      "grad_norm": 0.1789097636938095,
      "learning_rate": 9.274987974671033e-07,
      "loss": 0.0071,
      "step": 2773740
    },
    {
      "epoch": 4.539319075954256,
      "grad_norm": 0.5556817054748535,
      "learning_rate": 9.274329052535864e-07,
      "loss": 0.0115,
      "step": 2773760
    },
    {
      "epoch": 4.53935180639291,
      "grad_norm": 0.15505681931972504,
      "learning_rate": 9.273670130400692e-07,
      "loss": 0.0054,
      "step": 2773780
    },
    {
      "epoch": 4.539384536831562,
      "grad_norm": 0.7733812928199768,
      "learning_rate": 9.273011208265519e-07,
      "loss": 0.0103,
      "step": 2773800
    },
    {
      "epoch": 4.539417267270216,
      "grad_norm": 0.09377186745405197,
      "learning_rate": 9.272352286130348e-07,
      "loss": 0.0088,
      "step": 2773820
    },
    {
      "epoch": 4.53944999770887,
      "grad_norm": 0.3529850244522095,
      "learning_rate": 9.271693363995178e-07,
      "loss": 0.009,
      "step": 2773840
    },
    {
      "epoch": 4.539482728147522,
      "grad_norm": 0.14196696877479553,
      "learning_rate": 9.271034441860007e-07,
      "loss": 0.0067,
      "step": 2773860
    },
    {
      "epoch": 4.539515458586176,
      "grad_norm": 0.12940126657485962,
      "learning_rate": 9.270375519724835e-07,
      "loss": 0.0064,
      "step": 2773880
    },
    {
      "epoch": 4.5395481890248295,
      "grad_norm": 0.49136272072792053,
      "learning_rate": 9.269716597589664e-07,
      "loss": 0.0097,
      "step": 2773900
    },
    {
      "epoch": 4.539580919463482,
      "grad_norm": 0.10137531161308289,
      "learning_rate": 9.269057675454491e-07,
      "loss": 0.0133,
      "step": 2773920
    },
    {
      "epoch": 4.539613649902136,
      "grad_norm": 0.08506134897470474,
      "learning_rate": 9.268398753319322e-07,
      "loss": 0.0082,
      "step": 2773940
    },
    {
      "epoch": 4.539646380340789,
      "grad_norm": 0.13651816546916962,
      "learning_rate": 9.26773983118415e-07,
      "loss": 0.007,
      "step": 2773960
    },
    {
      "epoch": 4.539679110779443,
      "grad_norm": 0.19740837812423706,
      "learning_rate": 9.267080909048979e-07,
      "loss": 0.008,
      "step": 2773980
    },
    {
      "epoch": 4.539711841218096,
      "grad_norm": 0.047255877405405045,
      "learning_rate": 9.266421986913807e-07,
      "loss": 0.0131,
      "step": 2774000
    },
    {
      "epoch": 4.539744571656749,
      "grad_norm": 0.4351581633090973,
      "learning_rate": 9.265763064778637e-07,
      "loss": 0.0102,
      "step": 2774020
    },
    {
      "epoch": 4.539777302095403,
      "grad_norm": 0.17995446920394897,
      "learning_rate": 9.265104142643465e-07,
      "loss": 0.0062,
      "step": 2774040
    },
    {
      "epoch": 4.5398100325340565,
      "grad_norm": 0.057685889303684235,
      "learning_rate": 9.264445220508294e-07,
      "loss": 0.0064,
      "step": 2774060
    },
    {
      "epoch": 4.539842762972709,
      "grad_norm": 0.6072912812232971,
      "learning_rate": 9.263786298373122e-07,
      "loss": 0.0089,
      "step": 2774080
    },
    {
      "epoch": 4.539875493411363,
      "grad_norm": 0.4706825017929077,
      "learning_rate": 9.26312737623795e-07,
      "loss": 0.0085,
      "step": 2774100
    },
    {
      "epoch": 4.539908223850016,
      "grad_norm": 0.10193707793951035,
      "learning_rate": 9.26246845410278e-07,
      "loss": 0.0065,
      "step": 2774120
    },
    {
      "epoch": 4.539940954288669,
      "grad_norm": 0.17708328366279602,
      "learning_rate": 9.261809531967608e-07,
      "loss": 0.009,
      "step": 2774140
    },
    {
      "epoch": 4.539973684727323,
      "grad_norm": 0.1435728520154953,
      "learning_rate": 9.261150609832437e-07,
      "loss": 0.0095,
      "step": 2774160
    },
    {
      "epoch": 4.540006415165976,
      "grad_norm": 0.3385700583457947,
      "learning_rate": 9.260491687697265e-07,
      "loss": 0.0103,
      "step": 2774180
    },
    {
      "epoch": 4.540039145604629,
      "grad_norm": 0.2835890054702759,
      "learning_rate": 9.259832765562095e-07,
      "loss": 0.0074,
      "step": 2774200
    },
    {
      "epoch": 4.5400718760432826,
      "grad_norm": 0.079976886510849,
      "learning_rate": 9.259173843426923e-07,
      "loss": 0.0116,
      "step": 2774220
    },
    {
      "epoch": 4.540104606481936,
      "grad_norm": 0.3009222447872162,
      "learning_rate": 9.258514921291752e-07,
      "loss": 0.0071,
      "step": 2774240
    },
    {
      "epoch": 4.54013733692059,
      "grad_norm": 0.31813710927963257,
      "learning_rate": 9.25785599915658e-07,
      "loss": 0.0103,
      "step": 2774260
    },
    {
      "epoch": 4.5401700673592424,
      "grad_norm": 0.3990073800086975,
      "learning_rate": 9.25719707702141e-07,
      "loss": 0.0105,
      "step": 2774280
    },
    {
      "epoch": 4.540202797797896,
      "grad_norm": 0.07377947866916656,
      "learning_rate": 9.256538154886238e-07,
      "loss": 0.0066,
      "step": 2774300
    },
    {
      "epoch": 4.54023552823655,
      "grad_norm": 0.04754260927438736,
      "learning_rate": 9.255879232751067e-07,
      "loss": 0.0074,
      "step": 2774320
    },
    {
      "epoch": 4.540268258675203,
      "grad_norm": 0.21771161258220673,
      "learning_rate": 9.255220310615895e-07,
      "loss": 0.0096,
      "step": 2774340
    },
    {
      "epoch": 4.540300989113856,
      "grad_norm": 0.09879253804683685,
      "learning_rate": 9.254561388480724e-07,
      "loss": 0.0098,
      "step": 2774360
    },
    {
      "epoch": 4.5403337195525095,
      "grad_norm": 0.28828689455986023,
      "learning_rate": 9.253902466345553e-07,
      "loss": 0.0085,
      "step": 2774380
    },
    {
      "epoch": 4.540366449991163,
      "grad_norm": 0.455659955739975,
      "learning_rate": 9.253243544210382e-07,
      "loss": 0.0101,
      "step": 2774400
    },
    {
      "epoch": 4.540399180429816,
      "grad_norm": 0.14997555315494537,
      "learning_rate": 9.25258462207521e-07,
      "loss": 0.0122,
      "step": 2774420
    },
    {
      "epoch": 4.540431910868469,
      "grad_norm": 0.2745482623577118,
      "learning_rate": 9.251925699940038e-07,
      "loss": 0.0128,
      "step": 2774440
    },
    {
      "epoch": 4.540464641307123,
      "grad_norm": 0.2937474250793457,
      "learning_rate": 9.251266777804868e-07,
      "loss": 0.0083,
      "step": 2774460
    },
    {
      "epoch": 4.540497371745776,
      "grad_norm": 0.640874981880188,
      "learning_rate": 9.250607855669696e-07,
      "loss": 0.0121,
      "step": 2774480
    },
    {
      "epoch": 4.540530102184429,
      "grad_norm": 0.2841639816761017,
      "learning_rate": 9.249948933534525e-07,
      "loss": 0.0153,
      "step": 2774500
    },
    {
      "epoch": 4.540562832623083,
      "grad_norm": 0.0791693702340126,
      "learning_rate": 9.249290011399353e-07,
      "loss": 0.0101,
      "step": 2774520
    },
    {
      "epoch": 4.5405955630617365,
      "grad_norm": 0.4265328645706177,
      "learning_rate": 9.248631089264182e-07,
      "loss": 0.0104,
      "step": 2774540
    },
    {
      "epoch": 4.540628293500389,
      "grad_norm": 0.5271322131156921,
      "learning_rate": 9.247972167129011e-07,
      "loss": 0.0125,
      "step": 2774560
    },
    {
      "epoch": 4.540661023939043,
      "grad_norm": 0.18507911264896393,
      "learning_rate": 9.24731324499384e-07,
      "loss": 0.0129,
      "step": 2774580
    },
    {
      "epoch": 4.540693754377696,
      "grad_norm": 0.216740682721138,
      "learning_rate": 9.246654322858668e-07,
      "loss": 0.0094,
      "step": 2774600
    },
    {
      "epoch": 4.54072648481635,
      "grad_norm": 0.1822424679994583,
      "learning_rate": 9.245995400723497e-07,
      "loss": 0.0093,
      "step": 2774620
    },
    {
      "epoch": 4.540759215255003,
      "grad_norm": 0.27873167395591736,
      "learning_rate": 9.245336478588326e-07,
      "loss": 0.0054,
      "step": 2774640
    },
    {
      "epoch": 4.540791945693656,
      "grad_norm": 0.09954333305358887,
      "learning_rate": 9.244677556453155e-07,
      "loss": 0.0088,
      "step": 2774660
    },
    {
      "epoch": 4.54082467613231,
      "grad_norm": 0.3227408528327942,
      "learning_rate": 9.244018634317983e-07,
      "loss": 0.0082,
      "step": 2774680
    },
    {
      "epoch": 4.540857406570963,
      "grad_norm": 0.27509942650794983,
      "learning_rate": 9.243359712182812e-07,
      "loss": 0.0117,
      "step": 2774700
    },
    {
      "epoch": 4.540890137009616,
      "grad_norm": 0.37166827917099,
      "learning_rate": 9.242700790047641e-07,
      "loss": 0.0081,
      "step": 2774720
    },
    {
      "epoch": 4.54092286744827,
      "grad_norm": 0.12113810330629349,
      "learning_rate": 9.24204186791247e-07,
      "loss": 0.0143,
      "step": 2774740
    },
    {
      "epoch": 4.5409555978869225,
      "grad_norm": 0.08656764775514603,
      "learning_rate": 9.241382945777298e-07,
      "loss": 0.0059,
      "step": 2774760
    },
    {
      "epoch": 4.540988328325576,
      "grad_norm": 0.17532148957252502,
      "learning_rate": 9.240724023642126e-07,
      "loss": 0.0082,
      "step": 2774780
    },
    {
      "epoch": 4.54102105876423,
      "grad_norm": 0.4064079821109772,
      "learning_rate": 9.240065101506955e-07,
      "loss": 0.0075,
      "step": 2774800
    },
    {
      "epoch": 4.541053789202883,
      "grad_norm": 0.39860793948173523,
      "learning_rate": 9.239406179371784e-07,
      "loss": 0.0082,
      "step": 2774820
    },
    {
      "epoch": 4.541086519641536,
      "grad_norm": 0.2928934395313263,
      "learning_rate": 9.238747257236613e-07,
      "loss": 0.0081,
      "step": 2774840
    },
    {
      "epoch": 4.54111925008019,
      "grad_norm": 0.28789275884628296,
      "learning_rate": 9.238088335101441e-07,
      "loss": 0.0077,
      "step": 2774860
    },
    {
      "epoch": 4.541151980518843,
      "grad_norm": 0.1802225410938263,
      "learning_rate": 9.23742941296627e-07,
      "loss": 0.0104,
      "step": 2774880
    },
    {
      "epoch": 4.541184710957496,
      "grad_norm": 0.15697911381721497,
      "learning_rate": 9.236770490831099e-07,
      "loss": 0.0065,
      "step": 2774900
    },
    {
      "epoch": 4.5412174413961495,
      "grad_norm": 0.39489027857780457,
      "learning_rate": 9.236111568695928e-07,
      "loss": 0.0078,
      "step": 2774920
    },
    {
      "epoch": 4.541250171834803,
      "grad_norm": 0.1111091896891594,
      "learning_rate": 9.235452646560756e-07,
      "loss": 0.0086,
      "step": 2774940
    },
    {
      "epoch": 4.541282902273457,
      "grad_norm": 0.2869287133216858,
      "learning_rate": 9.234793724425585e-07,
      "loss": 0.0082,
      "step": 2774960
    },
    {
      "epoch": 4.541315632712109,
      "grad_norm": 0.1442311555147171,
      "learning_rate": 9.234134802290413e-07,
      "loss": 0.0084,
      "step": 2774980
    },
    {
      "epoch": 4.541348363150763,
      "grad_norm": 0.2069898098707199,
      "learning_rate": 9.233475880155244e-07,
      "loss": 0.0048,
      "step": 2775000
    },
    {
      "epoch": 4.5413810935894166,
      "grad_norm": 0.1485687792301178,
      "learning_rate": 9.232816958020071e-07,
      "loss": 0.0096,
      "step": 2775020
    },
    {
      "epoch": 4.541413824028069,
      "grad_norm": 0.07190097868442535,
      "learning_rate": 9.2321580358849e-07,
      "loss": 0.0092,
      "step": 2775040
    },
    {
      "epoch": 4.541446554466723,
      "grad_norm": 0.22404663264751434,
      "learning_rate": 9.231499113749728e-07,
      "loss": 0.0053,
      "step": 2775060
    },
    {
      "epoch": 4.5414792849053764,
      "grad_norm": 0.17547842860221863,
      "learning_rate": 9.230840191614559e-07,
      "loss": 0.0094,
      "step": 2775080
    },
    {
      "epoch": 4.541512015344029,
      "grad_norm": 0.7316001653671265,
      "learning_rate": 9.230181269479387e-07,
      "loss": 0.0118,
      "step": 2775100
    },
    {
      "epoch": 4.541544745782683,
      "grad_norm": 0.43084004521369934,
      "learning_rate": 9.229522347344214e-07,
      "loss": 0.0087,
      "step": 2775120
    },
    {
      "epoch": 4.541577476221336,
      "grad_norm": 0.270669162273407,
      "learning_rate": 9.228863425209043e-07,
      "loss": 0.0091,
      "step": 2775140
    },
    {
      "epoch": 4.54161020665999,
      "grad_norm": 0.17096775770187378,
      "learning_rate": 9.228204503073873e-07,
      "loss": 0.0056,
      "step": 2775160
    },
    {
      "epoch": 4.541642937098643,
      "grad_norm": 0.32525235414505005,
      "learning_rate": 9.227545580938702e-07,
      "loss": 0.0094,
      "step": 2775180
    },
    {
      "epoch": 4.541675667537296,
      "grad_norm": 0.2814806401729584,
      "learning_rate": 9.22688665880353e-07,
      "loss": 0.008,
      "step": 2775200
    },
    {
      "epoch": 4.54170839797595,
      "grad_norm": 0.12338802218437195,
      "learning_rate": 9.226227736668359e-07,
      "loss": 0.0051,
      "step": 2775220
    },
    {
      "epoch": 4.541741128414603,
      "grad_norm": 1.0478029251098633,
      "learning_rate": 9.225568814533186e-07,
      "loss": 0.0103,
      "step": 2775240
    },
    {
      "epoch": 4.541773858853256,
      "grad_norm": 0.12469371408224106,
      "learning_rate": 9.224909892398017e-07,
      "loss": 0.0068,
      "step": 2775260
    },
    {
      "epoch": 4.54180658929191,
      "grad_norm": 0.18537874519824982,
      "learning_rate": 9.224250970262845e-07,
      "loss": 0.009,
      "step": 2775280
    },
    {
      "epoch": 4.541839319730563,
      "grad_norm": 0.5342023372650146,
      "learning_rate": 9.223592048127674e-07,
      "loss": 0.0089,
      "step": 2775300
    },
    {
      "epoch": 4.541872050169216,
      "grad_norm": 0.1762915551662445,
      "learning_rate": 9.222933125992502e-07,
      "loss": 0.0069,
      "step": 2775320
    },
    {
      "epoch": 4.54190478060787,
      "grad_norm": 0.2877011001110077,
      "learning_rate": 9.222274203857332e-07,
      "loss": 0.0083,
      "step": 2775340
    },
    {
      "epoch": 4.541937511046523,
      "grad_norm": 0.1808341145515442,
      "learning_rate": 9.22161528172216e-07,
      "loss": 0.0082,
      "step": 2775360
    },
    {
      "epoch": 4.541970241485176,
      "grad_norm": 0.5392516255378723,
      "learning_rate": 9.220956359586989e-07,
      "loss": 0.0097,
      "step": 2775380
    },
    {
      "epoch": 4.5420029719238295,
      "grad_norm": 0.2944735884666443,
      "learning_rate": 9.220297437451817e-07,
      "loss": 0.0104,
      "step": 2775400
    },
    {
      "epoch": 4.542035702362483,
      "grad_norm": 0.505646288394928,
      "learning_rate": 9.219638515316646e-07,
      "loss": 0.0095,
      "step": 2775420
    },
    {
      "epoch": 4.542068432801137,
      "grad_norm": 0.09581106901168823,
      "learning_rate": 9.218979593181475e-07,
      "loss": 0.0071,
      "step": 2775440
    },
    {
      "epoch": 4.542101163239789,
      "grad_norm": 0.45711079239845276,
      "learning_rate": 9.218320671046304e-07,
      "loss": 0.0119,
      "step": 2775460
    },
    {
      "epoch": 4.542133893678443,
      "grad_norm": 0.27694666385650635,
      "learning_rate": 9.217661748911132e-07,
      "loss": 0.0083,
      "step": 2775480
    },
    {
      "epoch": 4.542166624117097,
      "grad_norm": 0.21417535841464996,
      "learning_rate": 9.21700282677596e-07,
      "loss": 0.0066,
      "step": 2775500
    },
    {
      "epoch": 4.54219935455575,
      "grad_norm": 0.040287479758262634,
      "learning_rate": 9.21634390464079e-07,
      "loss": 0.0078,
      "step": 2775520
    },
    {
      "epoch": 4.542232084994403,
      "grad_norm": 0.20390863716602325,
      "learning_rate": 9.215684982505618e-07,
      "loss": 0.0093,
      "step": 2775540
    },
    {
      "epoch": 4.5422648154330565,
      "grad_norm": 0.1754622906446457,
      "learning_rate": 9.215026060370447e-07,
      "loss": 0.0099,
      "step": 2775560
    },
    {
      "epoch": 4.54229754587171,
      "grad_norm": 0.1071845144033432,
      "learning_rate": 9.214367138235275e-07,
      "loss": 0.0083,
      "step": 2775580
    },
    {
      "epoch": 4.542330276310363,
      "grad_norm": 0.3029816746711731,
      "learning_rate": 9.213708216100105e-07,
      "loss": 0.0114,
      "step": 2775600
    },
    {
      "epoch": 4.542363006749016,
      "grad_norm": 0.3357853293418884,
      "learning_rate": 9.213049293964933e-07,
      "loss": 0.012,
      "step": 2775620
    },
    {
      "epoch": 4.54239573718767,
      "grad_norm": 0.20886102318763733,
      "learning_rate": 9.212390371829762e-07,
      "loss": 0.0057,
      "step": 2775640
    },
    {
      "epoch": 4.542428467626323,
      "grad_norm": 0.13581538200378418,
      "learning_rate": 9.21173144969459e-07,
      "loss": 0.0087,
      "step": 2775660
    },
    {
      "epoch": 4.542461198064976,
      "grad_norm": 0.11609266698360443,
      "learning_rate": 9.211072527559419e-07,
      "loss": 0.0062,
      "step": 2775680
    },
    {
      "epoch": 4.54249392850363,
      "grad_norm": 0.13154415786266327,
      "learning_rate": 9.210413605424248e-07,
      "loss": 0.0076,
      "step": 2775700
    },
    {
      "epoch": 4.5425266589422835,
      "grad_norm": 0.21785679459571838,
      "learning_rate": 9.209754683289077e-07,
      "loss": 0.0063,
      "step": 2775720
    },
    {
      "epoch": 4.542559389380936,
      "grad_norm": 0.12050934135913849,
      "learning_rate": 9.209095761153905e-07,
      "loss": 0.0124,
      "step": 2775740
    },
    {
      "epoch": 4.54259211981959,
      "grad_norm": 0.526320219039917,
      "learning_rate": 9.208436839018734e-07,
      "loss": 0.0119,
      "step": 2775760
    },
    {
      "epoch": 4.542624850258243,
      "grad_norm": 0.25331413745880127,
      "learning_rate": 9.207777916883563e-07,
      "loss": 0.0081,
      "step": 2775780
    },
    {
      "epoch": 4.542657580696897,
      "grad_norm": 0.5933270454406738,
      "learning_rate": 9.207118994748391e-07,
      "loss": 0.0098,
      "step": 2775800
    },
    {
      "epoch": 4.54269031113555,
      "grad_norm": 0.29730042815208435,
      "learning_rate": 9.20646007261322e-07,
      "loss": 0.008,
      "step": 2775820
    },
    {
      "epoch": 4.542723041574203,
      "grad_norm": 0.30977723002433777,
      "learning_rate": 9.205801150478048e-07,
      "loss": 0.0111,
      "step": 2775840
    },
    {
      "epoch": 4.542755772012857,
      "grad_norm": 0.13369357585906982,
      "learning_rate": 9.205142228342877e-07,
      "loss": 0.0068,
      "step": 2775860
    },
    {
      "epoch": 4.54278850245151,
      "grad_norm": 0.32391098141670227,
      "learning_rate": 9.204483306207706e-07,
      "loss": 0.0083,
      "step": 2775880
    },
    {
      "epoch": 4.542821232890163,
      "grad_norm": 0.23739098012447357,
      "learning_rate": 9.203824384072535e-07,
      "loss": 0.007,
      "step": 2775900
    },
    {
      "epoch": 4.542853963328817,
      "grad_norm": 0.1330758035182953,
      "learning_rate": 9.203165461937363e-07,
      "loss": 0.0085,
      "step": 2775920
    },
    {
      "epoch": 4.5428866937674695,
      "grad_norm": 0.22278890013694763,
      "learning_rate": 9.202506539802192e-07,
      "loss": 0.0075,
      "step": 2775940
    },
    {
      "epoch": 4.542919424206123,
      "grad_norm": 0.3098922669887543,
      "learning_rate": 9.201847617667021e-07,
      "loss": 0.0096,
      "step": 2775960
    },
    {
      "epoch": 4.542952154644777,
      "grad_norm": 0.10680245608091354,
      "learning_rate": 9.20118869553185e-07,
      "loss": 0.0094,
      "step": 2775980
    },
    {
      "epoch": 4.54298488508343,
      "grad_norm": 0.17714743316173553,
      "learning_rate": 9.200529773396678e-07,
      "loss": 0.0069,
      "step": 2776000
    },
    {
      "epoch": 4.543017615522083,
      "grad_norm": 0.18679282069206238,
      "learning_rate": 9.199870851261507e-07,
      "loss": 0.0103,
      "step": 2776020
    },
    {
      "epoch": 4.5430503459607365,
      "grad_norm": 0.237636536359787,
      "learning_rate": 9.199211929126336e-07,
      "loss": 0.0077,
      "step": 2776040
    },
    {
      "epoch": 4.54308307639939,
      "grad_norm": 0.14187949895858765,
      "learning_rate": 9.198553006991165e-07,
      "loss": 0.0087,
      "step": 2776060
    },
    {
      "epoch": 4.543115806838044,
      "grad_norm": 0.13315445184707642,
      "learning_rate": 9.197894084855993e-07,
      "loss": 0.0093,
      "step": 2776080
    },
    {
      "epoch": 4.543148537276696,
      "grad_norm": 0.30931785702705383,
      "learning_rate": 9.197235162720822e-07,
      "loss": 0.0099,
      "step": 2776100
    },
    {
      "epoch": 4.54318126771535,
      "grad_norm": 0.08795715868473053,
      "learning_rate": 9.19657624058565e-07,
      "loss": 0.0113,
      "step": 2776120
    },
    {
      "epoch": 4.543213998154004,
      "grad_norm": 0.2049238383769989,
      "learning_rate": 9.19591731845048e-07,
      "loss": 0.0048,
      "step": 2776140
    },
    {
      "epoch": 4.543246728592656,
      "grad_norm": 0.2711467742919922,
      "learning_rate": 9.195258396315308e-07,
      "loss": 0.0086,
      "step": 2776160
    },
    {
      "epoch": 4.54327945903131,
      "grad_norm": 0.4565386474132538,
      "learning_rate": 9.194599474180136e-07,
      "loss": 0.0083,
      "step": 2776180
    },
    {
      "epoch": 4.5433121894699635,
      "grad_norm": 0.09643808007240295,
      "learning_rate": 9.193940552044965e-07,
      "loss": 0.0074,
      "step": 2776200
    },
    {
      "epoch": 4.543344919908616,
      "grad_norm": 0.1504109799861908,
      "learning_rate": 9.193281629909794e-07,
      "loss": 0.008,
      "step": 2776220
    },
    {
      "epoch": 4.54337765034727,
      "grad_norm": 0.19442681968212128,
      "learning_rate": 9.192622707774623e-07,
      "loss": 0.0127,
      "step": 2776240
    },
    {
      "epoch": 4.543410380785923,
      "grad_norm": 0.22184111177921295,
      "learning_rate": 9.191963785639451e-07,
      "loss": 0.0063,
      "step": 2776260
    },
    {
      "epoch": 4.543443111224577,
      "grad_norm": 0.44229623675346375,
      "learning_rate": 9.19130486350428e-07,
      "loss": 0.0099,
      "step": 2776280
    },
    {
      "epoch": 4.54347584166323,
      "grad_norm": 0.1287996619939804,
      "learning_rate": 9.190645941369108e-07,
      "loss": 0.0083,
      "step": 2776300
    },
    {
      "epoch": 4.543508572101883,
      "grad_norm": 0.5403745174407959,
      "learning_rate": 9.189987019233939e-07,
      "loss": 0.0098,
      "step": 2776320
    },
    {
      "epoch": 4.543541302540537,
      "grad_norm": 0.13352948427200317,
      "learning_rate": 9.189328097098766e-07,
      "loss": 0.0065,
      "step": 2776340
    },
    {
      "epoch": 4.54357403297919,
      "grad_norm": 0.26549339294433594,
      "learning_rate": 9.188669174963595e-07,
      "loss": 0.0132,
      "step": 2776360
    },
    {
      "epoch": 4.543606763417843,
      "grad_norm": 0.1287345439195633,
      "learning_rate": 9.188010252828423e-07,
      "loss": 0.0084,
      "step": 2776380
    },
    {
      "epoch": 4.543639493856497,
      "grad_norm": 0.36503490805625916,
      "learning_rate": 9.187351330693254e-07,
      "loss": 0.006,
      "step": 2776400
    },
    {
      "epoch": 4.54367222429515,
      "grad_norm": 0.2317294329404831,
      "learning_rate": 9.186692408558082e-07,
      "loss": 0.0101,
      "step": 2776420
    },
    {
      "epoch": 4.543704954733803,
      "grad_norm": 0.2508731484413147,
      "learning_rate": 9.186033486422911e-07,
      "loss": 0.0095,
      "step": 2776440
    },
    {
      "epoch": 4.543737685172457,
      "grad_norm": 0.26116943359375,
      "learning_rate": 9.185374564287738e-07,
      "loss": 0.0084,
      "step": 2776460
    },
    {
      "epoch": 4.54377041561111,
      "grad_norm": 0.4723951816558838,
      "learning_rate": 9.184715642152569e-07,
      "loss": 0.0085,
      "step": 2776480
    },
    {
      "epoch": 4.543803146049763,
      "grad_norm": 0.25905635952949524,
      "learning_rate": 9.184056720017397e-07,
      "loss": 0.012,
      "step": 2776500
    },
    {
      "epoch": 4.543835876488417,
      "grad_norm": 0.34655997157096863,
      "learning_rate": 9.183397797882225e-07,
      "loss": 0.0098,
      "step": 2776520
    },
    {
      "epoch": 4.54386860692707,
      "grad_norm": 0.27632105350494385,
      "learning_rate": 9.182738875747054e-07,
      "loss": 0.0147,
      "step": 2776540
    },
    {
      "epoch": 4.543901337365723,
      "grad_norm": 0.6353728771209717,
      "learning_rate": 9.182079953611881e-07,
      "loss": 0.0084,
      "step": 2776560
    },
    {
      "epoch": 4.5439340678043765,
      "grad_norm": 0.10031554847955704,
      "learning_rate": 9.181421031476712e-07,
      "loss": 0.0082,
      "step": 2776580
    },
    {
      "epoch": 4.54396679824303,
      "grad_norm": 0.19340834021568298,
      "learning_rate": 9.18076210934154e-07,
      "loss": 0.0091,
      "step": 2776600
    },
    {
      "epoch": 4.543999528681684,
      "grad_norm": 0.23870477080345154,
      "learning_rate": 9.180103187206369e-07,
      "loss": 0.0075,
      "step": 2776620
    },
    {
      "epoch": 4.544032259120336,
      "grad_norm": 0.14225038886070251,
      "learning_rate": 9.179444265071197e-07,
      "loss": 0.0055,
      "step": 2776640
    },
    {
      "epoch": 4.54406498955899,
      "grad_norm": 0.30180642008781433,
      "learning_rate": 9.178785342936027e-07,
      "loss": 0.0129,
      "step": 2776660
    },
    {
      "epoch": 4.544097719997644,
      "grad_norm": 0.5029536485671997,
      "learning_rate": 9.178126420800855e-07,
      "loss": 0.0085,
      "step": 2776680
    },
    {
      "epoch": 4.544130450436297,
      "grad_norm": 0.13893751800060272,
      "learning_rate": 9.177467498665684e-07,
      "loss": 0.01,
      "step": 2776700
    },
    {
      "epoch": 4.54416318087495,
      "grad_norm": 0.20373576879501343,
      "learning_rate": 9.176808576530512e-07,
      "loss": 0.0079,
      "step": 2776720
    },
    {
      "epoch": 4.5441959113136035,
      "grad_norm": 0.09956914931535721,
      "learning_rate": 9.176149654395341e-07,
      "loss": 0.0056,
      "step": 2776740
    },
    {
      "epoch": 4.544228641752257,
      "grad_norm": 0.2092190980911255,
      "learning_rate": 9.17549073226017e-07,
      "loss": 0.0071,
      "step": 2776760
    },
    {
      "epoch": 4.54426137219091,
      "grad_norm": 0.22517693042755127,
      "learning_rate": 9.174831810124999e-07,
      "loss": 0.0074,
      "step": 2776780
    },
    {
      "epoch": 4.544294102629563,
      "grad_norm": 0.4084075093269348,
      "learning_rate": 9.174172887989827e-07,
      "loss": 0.0084,
      "step": 2776800
    },
    {
      "epoch": 4.544326833068217,
      "grad_norm": 0.20168210566043854,
      "learning_rate": 9.173513965854655e-07,
      "loss": 0.0077,
      "step": 2776820
    },
    {
      "epoch": 4.54435956350687,
      "grad_norm": 0.4220431447029114,
      "learning_rate": 9.172855043719485e-07,
      "loss": 0.0126,
      "step": 2776840
    },
    {
      "epoch": 4.544392293945523,
      "grad_norm": 0.33853575587272644,
      "learning_rate": 9.172196121584313e-07,
      "loss": 0.0116,
      "step": 2776860
    },
    {
      "epoch": 4.544425024384177,
      "grad_norm": 0.566678524017334,
      "learning_rate": 9.171537199449142e-07,
      "loss": 0.0101,
      "step": 2776880
    },
    {
      "epoch": 4.54445775482283,
      "grad_norm": 0.18314562737941742,
      "learning_rate": 9.17087827731397e-07,
      "loss": 0.0064,
      "step": 2776900
    },
    {
      "epoch": 4.544490485261483,
      "grad_norm": 0.32194292545318604,
      "learning_rate": 9.1702193551788e-07,
      "loss": 0.0084,
      "step": 2776920
    },
    {
      "epoch": 4.544523215700137,
      "grad_norm": 0.4704594612121582,
      "learning_rate": 9.169560433043628e-07,
      "loss": 0.011,
      "step": 2776940
    },
    {
      "epoch": 4.54455594613879,
      "grad_norm": 0.13035404682159424,
      "learning_rate": 9.168901510908457e-07,
      "loss": 0.0071,
      "step": 2776960
    },
    {
      "epoch": 4.544588676577444,
      "grad_norm": 0.20055313408374786,
      "learning_rate": 9.168242588773285e-07,
      "loss": 0.0075,
      "step": 2776980
    },
    {
      "epoch": 4.544621407016097,
      "grad_norm": 0.08876042813062668,
      "learning_rate": 9.167583666638114e-07,
      "loss": 0.0052,
      "step": 2777000
    },
    {
      "epoch": 4.54465413745475,
      "grad_norm": 0.16257891058921814,
      "learning_rate": 9.166924744502943e-07,
      "loss": 0.014,
      "step": 2777020
    },
    {
      "epoch": 4.544686867893404,
      "grad_norm": 0.19838100671768188,
      "learning_rate": 9.166265822367772e-07,
      "loss": 0.0058,
      "step": 2777040
    },
    {
      "epoch": 4.5447195983320565,
      "grad_norm": 0.35473790764808655,
      "learning_rate": 9.1656069002326e-07,
      "loss": 0.0068,
      "step": 2777060
    },
    {
      "epoch": 4.54475232877071,
      "grad_norm": 0.8105637431144714,
      "learning_rate": 9.164947978097429e-07,
      "loss": 0.0073,
      "step": 2777080
    },
    {
      "epoch": 4.544785059209364,
      "grad_norm": 0.773367166519165,
      "learning_rate": 9.164289055962258e-07,
      "loss": 0.0123,
      "step": 2777100
    },
    {
      "epoch": 4.544817789648016,
      "grad_norm": 0.29472407698631287,
      "learning_rate": 9.163630133827087e-07,
      "loss": 0.0064,
      "step": 2777120
    },
    {
      "epoch": 4.54485052008667,
      "grad_norm": 0.1240481585264206,
      "learning_rate": 9.162971211691915e-07,
      "loss": 0.008,
      "step": 2777140
    },
    {
      "epoch": 4.544883250525324,
      "grad_norm": 0.764859139919281,
      "learning_rate": 9.162312289556743e-07,
      "loss": 0.0077,
      "step": 2777160
    },
    {
      "epoch": 4.544915980963977,
      "grad_norm": 0.15176503360271454,
      "learning_rate": 9.161653367421572e-07,
      "loss": 0.007,
      "step": 2777180
    },
    {
      "epoch": 4.54494871140263,
      "grad_norm": 0.17031781375408173,
      "learning_rate": 9.160994445286401e-07,
      "loss": 0.0048,
      "step": 2777200
    },
    {
      "epoch": 4.5449814418412835,
      "grad_norm": 0.13211818039417267,
      "learning_rate": 9.16033552315123e-07,
      "loss": 0.0086,
      "step": 2777220
    },
    {
      "epoch": 4.545014172279937,
      "grad_norm": 0.3166426420211792,
      "learning_rate": 9.159676601016058e-07,
      "loss": 0.0063,
      "step": 2777240
    },
    {
      "epoch": 4.545046902718591,
      "grad_norm": 0.06174888461828232,
      "learning_rate": 9.159017678880887e-07,
      "loss": 0.0065,
      "step": 2777260
    },
    {
      "epoch": 4.545079633157243,
      "grad_norm": 0.19243617355823517,
      "learning_rate": 9.158358756745716e-07,
      "loss": 0.0091,
      "step": 2777280
    },
    {
      "epoch": 4.545112363595897,
      "grad_norm": 0.41060778498649597,
      "learning_rate": 9.157699834610545e-07,
      "loss": 0.0072,
      "step": 2777300
    },
    {
      "epoch": 4.545145094034551,
      "grad_norm": 0.37551990151405334,
      "learning_rate": 9.157040912475373e-07,
      "loss": 0.0088,
      "step": 2777320
    },
    {
      "epoch": 4.545177824473203,
      "grad_norm": 0.29861003160476685,
      "learning_rate": 9.156381990340202e-07,
      "loss": 0.0082,
      "step": 2777340
    },
    {
      "epoch": 4.545210554911857,
      "grad_norm": 0.26452985405921936,
      "learning_rate": 9.155723068205031e-07,
      "loss": 0.0116,
      "step": 2777360
    },
    {
      "epoch": 4.5452432853505105,
      "grad_norm": 0.48723727464675903,
      "learning_rate": 9.15506414606986e-07,
      "loss": 0.0082,
      "step": 2777380
    },
    {
      "epoch": 4.545276015789163,
      "grad_norm": 0.37126582860946655,
      "learning_rate": 9.154405223934688e-07,
      "loss": 0.0082,
      "step": 2777400
    },
    {
      "epoch": 4.545308746227817,
      "grad_norm": 0.2120804339647293,
      "learning_rate": 9.153746301799517e-07,
      "loss": 0.0094,
      "step": 2777420
    },
    {
      "epoch": 4.54534147666647,
      "grad_norm": 0.21002031862735748,
      "learning_rate": 9.153087379664345e-07,
      "loss": 0.0078,
      "step": 2777440
    },
    {
      "epoch": 4.545374207105124,
      "grad_norm": 0.1833166927099228,
      "learning_rate": 9.152428457529175e-07,
      "loss": 0.0103,
      "step": 2777460
    },
    {
      "epoch": 4.545406937543777,
      "grad_norm": 0.4084382951259613,
      "learning_rate": 9.151769535394003e-07,
      "loss": 0.013,
      "step": 2777480
    },
    {
      "epoch": 4.54543966798243,
      "grad_norm": 0.20229989290237427,
      "learning_rate": 9.151110613258831e-07,
      "loss": 0.0102,
      "step": 2777500
    },
    {
      "epoch": 4.545472398421084,
      "grad_norm": 0.09128523617982864,
      "learning_rate": 9.15045169112366e-07,
      "loss": 0.0067,
      "step": 2777520
    },
    {
      "epoch": 4.5455051288597375,
      "grad_norm": 0.15655997395515442,
      "learning_rate": 9.149792768988489e-07,
      "loss": 0.0102,
      "step": 2777540
    },
    {
      "epoch": 4.54553785929839,
      "grad_norm": 0.24906925857067108,
      "learning_rate": 9.149133846853318e-07,
      "loss": 0.0068,
      "step": 2777560
    },
    {
      "epoch": 4.545570589737044,
      "grad_norm": 0.16877442598342896,
      "learning_rate": 9.148474924718146e-07,
      "loss": 0.0098,
      "step": 2777580
    },
    {
      "epoch": 4.545603320175697,
      "grad_norm": 0.12268182635307312,
      "learning_rate": 9.147816002582975e-07,
      "loss": 0.0086,
      "step": 2777600
    },
    {
      "epoch": 4.54563605061435,
      "grad_norm": 0.2024545818567276,
      "learning_rate": 9.147157080447803e-07,
      "loss": 0.0051,
      "step": 2777620
    },
    {
      "epoch": 4.545668781053004,
      "grad_norm": 0.25199466943740845,
      "learning_rate": 9.146498158312634e-07,
      "loss": 0.0076,
      "step": 2777640
    },
    {
      "epoch": 4.545701511491657,
      "grad_norm": 0.46400710940361023,
      "learning_rate": 9.145839236177461e-07,
      "loss": 0.0117,
      "step": 2777660
    },
    {
      "epoch": 4.54573424193031,
      "grad_norm": 0.12603142857551575,
      "learning_rate": 9.14518031404229e-07,
      "loss": 0.0115,
      "step": 2777680
    },
    {
      "epoch": 4.5457669723689635,
      "grad_norm": 0.07004761695861816,
      "learning_rate": 9.144521391907118e-07,
      "loss": 0.0065,
      "step": 2777700
    },
    {
      "epoch": 4.545799702807617,
      "grad_norm": 0.31098246574401855,
      "learning_rate": 9.143862469771949e-07,
      "loss": 0.0115,
      "step": 2777720
    },
    {
      "epoch": 4.545832433246271,
      "grad_norm": 0.17920316755771637,
      "learning_rate": 9.143203547636777e-07,
      "loss": 0.0095,
      "step": 2777740
    },
    {
      "epoch": 4.545865163684923,
      "grad_norm": 0.17266525328159332,
      "learning_rate": 9.142544625501606e-07,
      "loss": 0.013,
      "step": 2777760
    },
    {
      "epoch": 4.545897894123577,
      "grad_norm": 0.29246965050697327,
      "learning_rate": 9.141885703366433e-07,
      "loss": 0.0085,
      "step": 2777780
    },
    {
      "epoch": 4.545930624562231,
      "grad_norm": 0.16438841819763184,
      "learning_rate": 9.141226781231264e-07,
      "loss": 0.013,
      "step": 2777800
    },
    {
      "epoch": 4.545963355000884,
      "grad_norm": 0.1546965092420578,
      "learning_rate": 9.140567859096092e-07,
      "loss": 0.0105,
      "step": 2777820
    },
    {
      "epoch": 4.545996085439537,
      "grad_norm": 0.07178381085395813,
      "learning_rate": 9.13990893696092e-07,
      "loss": 0.0158,
      "step": 2777840
    },
    {
      "epoch": 4.5460288158781905,
      "grad_norm": 0.1873258352279663,
      "learning_rate": 9.139250014825749e-07,
      "loss": 0.0097,
      "step": 2777860
    },
    {
      "epoch": 4.546061546316844,
      "grad_norm": 0.21656575798988342,
      "learning_rate": 9.138591092690576e-07,
      "loss": 0.0063,
      "step": 2777880
    },
    {
      "epoch": 4.546094276755497,
      "grad_norm": 0.13290315866470337,
      "learning_rate": 9.137932170555407e-07,
      "loss": 0.0081,
      "step": 2777900
    },
    {
      "epoch": 4.54612700719415,
      "grad_norm": 0.44919875264167786,
      "learning_rate": 9.137273248420235e-07,
      "loss": 0.0072,
      "step": 2777920
    },
    {
      "epoch": 4.546159737632804,
      "grad_norm": 0.14506062865257263,
      "learning_rate": 9.136614326285064e-07,
      "loss": 0.0081,
      "step": 2777940
    },
    {
      "epoch": 4.546192468071457,
      "grad_norm": 0.23909208178520203,
      "learning_rate": 9.135955404149892e-07,
      "loss": 0.0101,
      "step": 2777960
    },
    {
      "epoch": 4.54622519851011,
      "grad_norm": 0.3623044490814209,
      "learning_rate": 9.135296482014722e-07,
      "loss": 0.015,
      "step": 2777980
    },
    {
      "epoch": 4.546257928948764,
      "grad_norm": 0.11593817174434662,
      "learning_rate": 9.13463755987955e-07,
      "loss": 0.008,
      "step": 2778000
    },
    {
      "epoch": 4.546290659387417,
      "grad_norm": 0.2886870503425598,
      "learning_rate": 9.133978637744379e-07,
      "loss": 0.0093,
      "step": 2778020
    },
    {
      "epoch": 4.54632338982607,
      "grad_norm": 0.06390059739351273,
      "learning_rate": 9.133319715609207e-07,
      "loss": 0.0102,
      "step": 2778040
    },
    {
      "epoch": 4.546356120264724,
      "grad_norm": 0.1770695447921753,
      "learning_rate": 9.132660793474037e-07,
      "loss": 0.0074,
      "step": 2778060
    },
    {
      "epoch": 4.546388850703377,
      "grad_norm": 0.09708330780267715,
      "learning_rate": 9.132001871338865e-07,
      "loss": 0.0129,
      "step": 2778080
    },
    {
      "epoch": 4.54642158114203,
      "grad_norm": 0.11569332331418991,
      "learning_rate": 9.131342949203694e-07,
      "loss": 0.0076,
      "step": 2778100
    },
    {
      "epoch": 4.546454311580684,
      "grad_norm": 0.28105077147483826,
      "learning_rate": 9.130684027068522e-07,
      "loss": 0.0095,
      "step": 2778120
    },
    {
      "epoch": 4.546487042019337,
      "grad_norm": 0.1436508595943451,
      "learning_rate": 9.13002510493335e-07,
      "loss": 0.0065,
      "step": 2778140
    },
    {
      "epoch": 4.546519772457991,
      "grad_norm": 0.14581924676895142,
      "learning_rate": 9.12936618279818e-07,
      "loss": 0.008,
      "step": 2778160
    },
    {
      "epoch": 4.546552502896644,
      "grad_norm": 0.2855346202850342,
      "learning_rate": 9.128707260663008e-07,
      "loss": 0.0125,
      "step": 2778180
    },
    {
      "epoch": 4.546585233335297,
      "grad_norm": 2.17726469039917,
      "learning_rate": 9.128048338527837e-07,
      "loss": 0.0072,
      "step": 2778200
    },
    {
      "epoch": 4.546617963773951,
      "grad_norm": 0.1380564570426941,
      "learning_rate": 9.127389416392665e-07,
      "loss": 0.0092,
      "step": 2778220
    },
    {
      "epoch": 4.5466506942126035,
      "grad_norm": 0.22047367691993713,
      "learning_rate": 9.126730494257495e-07,
      "loss": 0.0082,
      "step": 2778240
    },
    {
      "epoch": 4.546683424651257,
      "grad_norm": 0.301469624042511,
      "learning_rate": 9.126071572122323e-07,
      "loss": 0.0084,
      "step": 2778260
    },
    {
      "epoch": 4.546716155089911,
      "grad_norm": 0.10328158736228943,
      "learning_rate": 9.125412649987152e-07,
      "loss": 0.0053,
      "step": 2778280
    },
    {
      "epoch": 4.546748885528563,
      "grad_norm": 0.13641120493412018,
      "learning_rate": 9.12475372785198e-07,
      "loss": 0.0084,
      "step": 2778300
    },
    {
      "epoch": 4.546781615967217,
      "grad_norm": 0.2875831723213196,
      "learning_rate": 9.124094805716809e-07,
      "loss": 0.006,
      "step": 2778320
    },
    {
      "epoch": 4.546814346405871,
      "grad_norm": 0.0939428061246872,
      "learning_rate": 9.123435883581638e-07,
      "loss": 0.0097,
      "step": 2778340
    },
    {
      "epoch": 4.546847076844524,
      "grad_norm": 0.2542368173599243,
      "learning_rate": 9.122776961446467e-07,
      "loss": 0.006,
      "step": 2778360
    },
    {
      "epoch": 4.546879807283177,
      "grad_norm": 0.19953690469264984,
      "learning_rate": 9.122118039311295e-07,
      "loss": 0.0098,
      "step": 2778380
    },
    {
      "epoch": 4.5469125377218305,
      "grad_norm": 0.06606951355934143,
      "learning_rate": 9.121459117176124e-07,
      "loss": 0.0119,
      "step": 2778400
    },
    {
      "epoch": 4.546945268160484,
      "grad_norm": 0.17233744263648987,
      "learning_rate": 9.120800195040953e-07,
      "loss": 0.0046,
      "step": 2778420
    },
    {
      "epoch": 4.546977998599138,
      "grad_norm": 0.40308335423469543,
      "learning_rate": 9.120141272905782e-07,
      "loss": 0.0089,
      "step": 2778440
    },
    {
      "epoch": 4.54701072903779,
      "grad_norm": 0.27038222551345825,
      "learning_rate": 9.11948235077061e-07,
      "loss": 0.0081,
      "step": 2778460
    },
    {
      "epoch": 4.547043459476444,
      "grad_norm": 0.42139166593551636,
      "learning_rate": 9.118823428635439e-07,
      "loss": 0.0081,
      "step": 2778480
    },
    {
      "epoch": 4.5470761899150975,
      "grad_norm": 0.1553608924150467,
      "learning_rate": 9.118164506500268e-07,
      "loss": 0.0106,
      "step": 2778500
    },
    {
      "epoch": 4.54710892035375,
      "grad_norm": 0.263094425201416,
      "learning_rate": 9.117505584365096e-07,
      "loss": 0.0116,
      "step": 2778520
    },
    {
      "epoch": 4.547141650792404,
      "grad_norm": 0.14275553822517395,
      "learning_rate": 9.116846662229925e-07,
      "loss": 0.0071,
      "step": 2778540
    },
    {
      "epoch": 4.547174381231057,
      "grad_norm": 0.19805096089839935,
      "learning_rate": 9.116187740094753e-07,
      "loss": 0.0094,
      "step": 2778560
    },
    {
      "epoch": 4.54720711166971,
      "grad_norm": 0.3917768597602844,
      "learning_rate": 9.115528817959582e-07,
      "loss": 0.0092,
      "step": 2778580
    },
    {
      "epoch": 4.547239842108364,
      "grad_norm": 0.21927574276924133,
      "learning_rate": 9.114869895824411e-07,
      "loss": 0.0097,
      "step": 2778600
    },
    {
      "epoch": 4.547272572547017,
      "grad_norm": 0.05932849645614624,
      "learning_rate": 9.11421097368924e-07,
      "loss": 0.0081,
      "step": 2778620
    },
    {
      "epoch": 4.547305302985671,
      "grad_norm": 0.0585884153842926,
      "learning_rate": 9.113552051554068e-07,
      "loss": 0.0057,
      "step": 2778640
    },
    {
      "epoch": 4.547338033424324,
      "grad_norm": 0.24762532114982605,
      "learning_rate": 9.112893129418897e-07,
      "loss": 0.0078,
      "step": 2778660
    },
    {
      "epoch": 4.547370763862977,
      "grad_norm": 0.5800890326499939,
      "learning_rate": 9.112234207283726e-07,
      "loss": 0.0096,
      "step": 2778680
    },
    {
      "epoch": 4.547403494301631,
      "grad_norm": 0.18050774931907654,
      "learning_rate": 9.111575285148555e-07,
      "loss": 0.0064,
      "step": 2778700
    },
    {
      "epoch": 4.547436224740284,
      "grad_norm": 0.09771517664194107,
      "learning_rate": 9.110916363013383e-07,
      "loss": 0.0071,
      "step": 2778720
    },
    {
      "epoch": 4.547468955178937,
      "grad_norm": 0.1262224316596985,
      "learning_rate": 9.110257440878212e-07,
      "loss": 0.0106,
      "step": 2778740
    },
    {
      "epoch": 4.547501685617591,
      "grad_norm": 0.0966736227273941,
      "learning_rate": 9.10959851874304e-07,
      "loss": 0.0053,
      "step": 2778760
    },
    {
      "epoch": 4.547534416056244,
      "grad_norm": 0.12682068347930908,
      "learning_rate": 9.10893959660787e-07,
      "loss": 0.0056,
      "step": 2778780
    },
    {
      "epoch": 4.547567146494897,
      "grad_norm": 0.3624838590621948,
      "learning_rate": 9.108280674472698e-07,
      "loss": 0.0083,
      "step": 2778800
    },
    {
      "epoch": 4.547599876933551,
      "grad_norm": 0.14772529900074005,
      "learning_rate": 9.107621752337527e-07,
      "loss": 0.0077,
      "step": 2778820
    },
    {
      "epoch": 4.547632607372204,
      "grad_norm": 0.2195611596107483,
      "learning_rate": 9.106962830202355e-07,
      "loss": 0.0076,
      "step": 2778840
    },
    {
      "epoch": 4.547665337810857,
      "grad_norm": 0.4677811563014984,
      "learning_rate": 9.106303908067184e-07,
      "loss": 0.0099,
      "step": 2778860
    },
    {
      "epoch": 4.5476980682495105,
      "grad_norm": 0.025106798857450485,
      "learning_rate": 9.105644985932013e-07,
      "loss": 0.0076,
      "step": 2778880
    },
    {
      "epoch": 4.547730798688164,
      "grad_norm": 0.7387397885322571,
      "learning_rate": 9.104986063796841e-07,
      "loss": 0.0119,
      "step": 2778900
    },
    {
      "epoch": 4.547763529126818,
      "grad_norm": 0.049325648695230484,
      "learning_rate": 9.10432714166167e-07,
      "loss": 0.0096,
      "step": 2778920
    },
    {
      "epoch": 4.54779625956547,
      "grad_norm": 0.20911464095115662,
      "learning_rate": 9.1036682195265e-07,
      "loss": 0.0119,
      "step": 2778940
    },
    {
      "epoch": 4.547828990004124,
      "grad_norm": 0.1395590901374817,
      "learning_rate": 9.103009297391329e-07,
      "loss": 0.01,
      "step": 2778960
    },
    {
      "epoch": 4.547861720442778,
      "grad_norm": 0.07062701135873795,
      "learning_rate": 9.102350375256156e-07,
      "loss": 0.007,
      "step": 2778980
    },
    {
      "epoch": 4.547894450881431,
      "grad_norm": 0.15319523215293884,
      "learning_rate": 9.101691453120985e-07,
      "loss": 0.0054,
      "step": 2779000
    },
    {
      "epoch": 4.547927181320084,
      "grad_norm": 0.11965199559926987,
      "learning_rate": 9.101032530985813e-07,
      "loss": 0.0079,
      "step": 2779020
    },
    {
      "epoch": 4.5479599117587375,
      "grad_norm": 0.3236900866031647,
      "learning_rate": 9.100373608850644e-07,
      "loss": 0.0088,
      "step": 2779040
    },
    {
      "epoch": 4.547992642197391,
      "grad_norm": 0.14097122848033905,
      "learning_rate": 9.099714686715472e-07,
      "loss": 0.006,
      "step": 2779060
    },
    {
      "epoch": 4.548025372636044,
      "grad_norm": 0.17855411767959595,
      "learning_rate": 9.099055764580301e-07,
      "loss": 0.0078,
      "step": 2779080
    },
    {
      "epoch": 4.548058103074697,
      "grad_norm": 0.20042596757411957,
      "learning_rate": 9.098396842445128e-07,
      "loss": 0.0082,
      "step": 2779100
    },
    {
      "epoch": 4.548090833513351,
      "grad_norm": 0.7117547392845154,
      "learning_rate": 9.097737920309959e-07,
      "loss": 0.0088,
      "step": 2779120
    },
    {
      "epoch": 4.548123563952004,
      "grad_norm": 0.3649044632911682,
      "learning_rate": 9.097078998174787e-07,
      "loss": 0.0073,
      "step": 2779140
    },
    {
      "epoch": 4.548156294390657,
      "grad_norm": 0.1923641860485077,
      "learning_rate": 9.096420076039616e-07,
      "loss": 0.004,
      "step": 2779160
    },
    {
      "epoch": 4.548189024829311,
      "grad_norm": 0.29945850372314453,
      "learning_rate": 9.095761153904444e-07,
      "loss": 0.0083,
      "step": 2779180
    },
    {
      "epoch": 4.5482217552679645,
      "grad_norm": 0.24047724902629852,
      "learning_rate": 9.095102231769271e-07,
      "loss": 0.0114,
      "step": 2779200
    },
    {
      "epoch": 4.548254485706617,
      "grad_norm": 0.19196276366710663,
      "learning_rate": 9.094443309634102e-07,
      "loss": 0.0094,
      "step": 2779220
    },
    {
      "epoch": 4.548287216145271,
      "grad_norm": 0.21142147481441498,
      "learning_rate": 9.09378438749893e-07,
      "loss": 0.0081,
      "step": 2779240
    },
    {
      "epoch": 4.548319946583924,
      "grad_norm": 0.29855507612228394,
      "learning_rate": 9.093125465363759e-07,
      "loss": 0.0066,
      "step": 2779260
    },
    {
      "epoch": 4.548352677022578,
      "grad_norm": 0.3251974284648895,
      "learning_rate": 9.092466543228587e-07,
      "loss": 0.0159,
      "step": 2779280
    },
    {
      "epoch": 4.548385407461231,
      "grad_norm": 0.26384979486465454,
      "learning_rate": 9.091807621093417e-07,
      "loss": 0.0088,
      "step": 2779300
    },
    {
      "epoch": 4.548418137899884,
      "grad_norm": 0.2054576575756073,
      "learning_rate": 9.091148698958245e-07,
      "loss": 0.0092,
      "step": 2779320
    },
    {
      "epoch": 4.548450868338538,
      "grad_norm": 0.08368539065122604,
      "learning_rate": 9.090489776823074e-07,
      "loss": 0.0055,
      "step": 2779340
    },
    {
      "epoch": 4.5484835987771906,
      "grad_norm": 0.07794005423784256,
      "learning_rate": 9.089830854687902e-07,
      "loss": 0.006,
      "step": 2779360
    },
    {
      "epoch": 4.548516329215844,
      "grad_norm": 0.5676934719085693,
      "learning_rate": 9.089171932552732e-07,
      "loss": 0.0115,
      "step": 2779380
    },
    {
      "epoch": 4.548549059654498,
      "grad_norm": 0.2567698657512665,
      "learning_rate": 9.08851301041756e-07,
      "loss": 0.0112,
      "step": 2779400
    },
    {
      "epoch": 4.5485817900931504,
      "grad_norm": 0.3422296643257141,
      "learning_rate": 9.087854088282389e-07,
      "loss": 0.0067,
      "step": 2779420
    },
    {
      "epoch": 4.548614520531804,
      "grad_norm": 0.2535293996334076,
      "learning_rate": 9.087195166147217e-07,
      "loss": 0.0068,
      "step": 2779440
    },
    {
      "epoch": 4.548647250970458,
      "grad_norm": 0.14219489693641663,
      "learning_rate": 9.086536244012046e-07,
      "loss": 0.014,
      "step": 2779460
    },
    {
      "epoch": 4.548679981409111,
      "grad_norm": 0.9303498864173889,
      "learning_rate": 9.085877321876875e-07,
      "loss": 0.017,
      "step": 2779480
    },
    {
      "epoch": 4.548712711847764,
      "grad_norm": 0.09653032571077347,
      "learning_rate": 9.085218399741704e-07,
      "loss": 0.0054,
      "step": 2779500
    },
    {
      "epoch": 4.5487454422864175,
      "grad_norm": 0.20115861296653748,
      "learning_rate": 9.084559477606532e-07,
      "loss": 0.0076,
      "step": 2779520
    },
    {
      "epoch": 4.548778172725071,
      "grad_norm": 0.19964975118637085,
      "learning_rate": 9.08390055547136e-07,
      "loss": 0.0071,
      "step": 2779540
    },
    {
      "epoch": 4.548810903163724,
      "grad_norm": 0.2460954338312149,
      "learning_rate": 9.08324163333619e-07,
      "loss": 0.0122,
      "step": 2779560
    },
    {
      "epoch": 4.548843633602377,
      "grad_norm": 0.4811050593852997,
      "learning_rate": 9.082582711201018e-07,
      "loss": 0.0084,
      "step": 2779580
    },
    {
      "epoch": 4.548876364041031,
      "grad_norm": 0.18187406659126282,
      "learning_rate": 9.081923789065847e-07,
      "loss": 0.0081,
      "step": 2779600
    },
    {
      "epoch": 4.548909094479685,
      "grad_norm": 0.2343655526638031,
      "learning_rate": 9.081264866930675e-07,
      "loss": 0.0055,
      "step": 2779620
    },
    {
      "epoch": 4.548941824918337,
      "grad_norm": 0.06053284555673599,
      "learning_rate": 9.080605944795504e-07,
      "loss": 0.0093,
      "step": 2779640
    },
    {
      "epoch": 4.548974555356991,
      "grad_norm": 0.36400318145751953,
      "learning_rate": 9.079947022660333e-07,
      "loss": 0.0124,
      "step": 2779660
    },
    {
      "epoch": 4.5490072857956445,
      "grad_norm": 0.5188305974006653,
      "learning_rate": 9.079288100525162e-07,
      "loss": 0.0129,
      "step": 2779680
    },
    {
      "epoch": 4.549040016234297,
      "grad_norm": 0.11732455343008041,
      "learning_rate": 9.07862917838999e-07,
      "loss": 0.0063,
      "step": 2779700
    },
    {
      "epoch": 4.549072746672951,
      "grad_norm": 0.7048858404159546,
      "learning_rate": 9.077970256254819e-07,
      "loss": 0.0098,
      "step": 2779720
    },
    {
      "epoch": 4.549105477111604,
      "grad_norm": 0.12054401636123657,
      "learning_rate": 9.077311334119648e-07,
      "loss": 0.0145,
      "step": 2779740
    },
    {
      "epoch": 4.549138207550257,
      "grad_norm": 0.22683589160442352,
      "learning_rate": 9.076652411984477e-07,
      "loss": 0.0095,
      "step": 2779760
    },
    {
      "epoch": 4.549170937988911,
      "grad_norm": 0.1644076704978943,
      "learning_rate": 9.075993489849305e-07,
      "loss": 0.0106,
      "step": 2779780
    },
    {
      "epoch": 4.549203668427564,
      "grad_norm": 0.49040305614471436,
      "learning_rate": 9.075334567714134e-07,
      "loss": 0.0122,
      "step": 2779800
    },
    {
      "epoch": 4.549236398866218,
      "grad_norm": 0.08513109385967255,
      "learning_rate": 9.074675645578963e-07,
      "loss": 0.0052,
      "step": 2779820
    },
    {
      "epoch": 4.549269129304871,
      "grad_norm": 0.23847633600234985,
      "learning_rate": 9.074016723443792e-07,
      "loss": 0.005,
      "step": 2779840
    },
    {
      "epoch": 4.549301859743524,
      "grad_norm": 0.6972562074661255,
      "learning_rate": 9.07335780130862e-07,
      "loss": 0.0112,
      "step": 2779860
    },
    {
      "epoch": 4.549334590182178,
      "grad_norm": 0.11790519952774048,
      "learning_rate": 9.072698879173448e-07,
      "loss": 0.0102,
      "step": 2779880
    },
    {
      "epoch": 4.549367320620831,
      "grad_norm": 0.2597094774246216,
      "learning_rate": 9.072039957038277e-07,
      "loss": 0.0063,
      "step": 2779900
    },
    {
      "epoch": 4.549400051059484,
      "grad_norm": 0.07958129793405533,
      "learning_rate": 9.071381034903106e-07,
      "loss": 0.0068,
      "step": 2779920
    },
    {
      "epoch": 4.549432781498138,
      "grad_norm": 0.16370001435279846,
      "learning_rate": 9.070722112767935e-07,
      "loss": 0.0062,
      "step": 2779940
    },
    {
      "epoch": 4.549465511936791,
      "grad_norm": 0.30283689498901367,
      "learning_rate": 9.070063190632763e-07,
      "loss": 0.0129,
      "step": 2779960
    },
    {
      "epoch": 4.549498242375444,
      "grad_norm": 0.31741106510162354,
      "learning_rate": 9.069404268497592e-07,
      "loss": 0.0085,
      "step": 2779980
    },
    {
      "epoch": 4.549530972814098,
      "grad_norm": 0.10391131043434143,
      "learning_rate": 9.068745346362421e-07,
      "loss": 0.0085,
      "step": 2780000
    },
    {
      "epoch": 4.549563703252751,
      "grad_norm": 0.23956221342086792,
      "learning_rate": 9.06808642422725e-07,
      "loss": 0.0118,
      "step": 2780020
    },
    {
      "epoch": 4.549596433691404,
      "grad_norm": 0.3296182453632355,
      "learning_rate": 9.067427502092078e-07,
      "loss": 0.0087,
      "step": 2780040
    },
    {
      "epoch": 4.5496291641300575,
      "grad_norm": 0.30599406361579895,
      "learning_rate": 9.066768579956907e-07,
      "loss": 0.0091,
      "step": 2780060
    },
    {
      "epoch": 4.549661894568711,
      "grad_norm": 0.12110985815525055,
      "learning_rate": 9.066109657821735e-07,
      "loss": 0.0105,
      "step": 2780080
    },
    {
      "epoch": 4.549694625007365,
      "grad_norm": 0.22674259543418884,
      "learning_rate": 9.065450735686565e-07,
      "loss": 0.0071,
      "step": 2780100
    },
    {
      "epoch": 4.549727355446017,
      "grad_norm": 0.12446972727775574,
      "learning_rate": 9.064791813551393e-07,
      "loss": 0.008,
      "step": 2780120
    },
    {
      "epoch": 4.549760085884671,
      "grad_norm": 0.062004297971725464,
      "learning_rate": 9.064132891416222e-07,
      "loss": 0.0088,
      "step": 2780140
    },
    {
      "epoch": 4.5497928163233246,
      "grad_norm": 0.5025310516357422,
      "learning_rate": 9.06347396928105e-07,
      "loss": 0.0117,
      "step": 2780160
    },
    {
      "epoch": 4.549825546761978,
      "grad_norm": 0.2864334285259247,
      "learning_rate": 9.062815047145881e-07,
      "loss": 0.0155,
      "step": 2780180
    },
    {
      "epoch": 4.549858277200631,
      "grad_norm": 0.1499887853860855,
      "learning_rate": 9.062156125010708e-07,
      "loss": 0.0082,
      "step": 2780200
    },
    {
      "epoch": 4.5498910076392844,
      "grad_norm": 0.26357707381248474,
      "learning_rate": 9.061497202875536e-07,
      "loss": 0.0095,
      "step": 2780220
    },
    {
      "epoch": 4.549923738077938,
      "grad_norm": 0.18689589202404022,
      "learning_rate": 9.060838280740365e-07,
      "loss": 0.0097,
      "step": 2780240
    },
    {
      "epoch": 4.549956468516591,
      "grad_norm": 0.0875420942902565,
      "learning_rate": 9.060179358605195e-07,
      "loss": 0.0081,
      "step": 2780260
    },
    {
      "epoch": 4.549989198955244,
      "grad_norm": 0.1749868243932724,
      "learning_rate": 9.059520436470024e-07,
      "loss": 0.0105,
      "step": 2780280
    },
    {
      "epoch": 4.550021929393898,
      "grad_norm": 0.09355167299509048,
      "learning_rate": 9.058861514334851e-07,
      "loss": 0.0119,
      "step": 2780300
    },
    {
      "epoch": 4.550054659832551,
      "grad_norm": 0.16581277549266815,
      "learning_rate": 9.05820259219968e-07,
      "loss": 0.0074,
      "step": 2780320
    },
    {
      "epoch": 4.550087390271204,
      "grad_norm": 0.1473209261894226,
      "learning_rate": 9.057543670064508e-07,
      "loss": 0.0062,
      "step": 2780340
    },
    {
      "epoch": 4.550120120709858,
      "grad_norm": 0.28844520449638367,
      "learning_rate": 9.056884747929339e-07,
      "loss": 0.0103,
      "step": 2780360
    },
    {
      "epoch": 4.550152851148511,
      "grad_norm": 0.11496509611606598,
      "learning_rate": 9.056225825794167e-07,
      "loss": 0.01,
      "step": 2780380
    },
    {
      "epoch": 4.550185581587164,
      "grad_norm": 0.11615832895040512,
      "learning_rate": 9.055566903658996e-07,
      "loss": 0.0107,
      "step": 2780400
    },
    {
      "epoch": 4.550218312025818,
      "grad_norm": 0.5932843089103699,
      "learning_rate": 9.054907981523823e-07,
      "loss": 0.0078,
      "step": 2780420
    },
    {
      "epoch": 4.550251042464471,
      "grad_norm": 0.4232432246208191,
      "learning_rate": 9.054249059388654e-07,
      "loss": 0.0103,
      "step": 2780440
    },
    {
      "epoch": 4.550283772903125,
      "grad_norm": 0.10176080465316772,
      "learning_rate": 9.053590137253482e-07,
      "loss": 0.0069,
      "step": 2780460
    },
    {
      "epoch": 4.550316503341778,
      "grad_norm": 0.11259365826845169,
      "learning_rate": 9.052931215118311e-07,
      "loss": 0.0065,
      "step": 2780480
    },
    {
      "epoch": 4.550349233780431,
      "grad_norm": 0.10310423374176025,
      "learning_rate": 9.052272292983139e-07,
      "loss": 0.0071,
      "step": 2780500
    },
    {
      "epoch": 4.550381964219085,
      "grad_norm": 0.2684672176837921,
      "learning_rate": 9.051613370847966e-07,
      "loss": 0.0093,
      "step": 2780520
    },
    {
      "epoch": 4.5504146946577375,
      "grad_norm": 0.3592071831226349,
      "learning_rate": 9.050954448712797e-07,
      "loss": 0.0111,
      "step": 2780540
    },
    {
      "epoch": 4.550447425096391,
      "grad_norm": 0.10229210555553436,
      "learning_rate": 9.050295526577625e-07,
      "loss": 0.0064,
      "step": 2780560
    },
    {
      "epoch": 4.550480155535045,
      "grad_norm": 0.2980652451515198,
      "learning_rate": 9.049636604442454e-07,
      "loss": 0.0102,
      "step": 2780580
    },
    {
      "epoch": 4.550512885973697,
      "grad_norm": 0.34743431210517883,
      "learning_rate": 9.048977682307282e-07,
      "loss": 0.0123,
      "step": 2780600
    },
    {
      "epoch": 4.550545616412351,
      "grad_norm": 0.11276202648878098,
      "learning_rate": 9.048318760172112e-07,
      "loss": 0.0097,
      "step": 2780620
    },
    {
      "epoch": 4.550578346851005,
      "grad_norm": 0.2094440758228302,
      "learning_rate": 9.04765983803694e-07,
      "loss": 0.007,
      "step": 2780640
    },
    {
      "epoch": 4.550611077289658,
      "grad_norm": 0.08179505914449692,
      "learning_rate": 9.047000915901769e-07,
      "loss": 0.0065,
      "step": 2780660
    },
    {
      "epoch": 4.550643807728311,
      "grad_norm": 0.37041717767715454,
      "learning_rate": 9.046341993766597e-07,
      "loss": 0.0097,
      "step": 2780680
    },
    {
      "epoch": 4.5506765381669645,
      "grad_norm": 0.322949081659317,
      "learning_rate": 9.045683071631427e-07,
      "loss": 0.0073,
      "step": 2780700
    },
    {
      "epoch": 4.550709268605618,
      "grad_norm": 0.16070491075515747,
      "learning_rate": 9.045024149496255e-07,
      "loss": 0.0091,
      "step": 2780720
    },
    {
      "epoch": 4.550741999044272,
      "grad_norm": 0.19327355921268463,
      "learning_rate": 9.044365227361084e-07,
      "loss": 0.011,
      "step": 2780740
    },
    {
      "epoch": 4.550774729482924,
      "grad_norm": 0.09862715005874634,
      "learning_rate": 9.043706305225912e-07,
      "loss": 0.0122,
      "step": 2780760
    },
    {
      "epoch": 4.550807459921578,
      "grad_norm": 0.2129536271095276,
      "learning_rate": 9.043047383090741e-07,
      "loss": 0.0106,
      "step": 2780780
    },
    {
      "epoch": 4.550840190360232,
      "grad_norm": 0.3678467273712158,
      "learning_rate": 9.04238846095557e-07,
      "loss": 0.0072,
      "step": 2780800
    },
    {
      "epoch": 4.550872920798884,
      "grad_norm": 0.2581535875797272,
      "learning_rate": 9.041729538820399e-07,
      "loss": 0.0084,
      "step": 2780820
    },
    {
      "epoch": 4.550905651237538,
      "grad_norm": 0.1585254818201065,
      "learning_rate": 9.041070616685227e-07,
      "loss": 0.0104,
      "step": 2780840
    },
    {
      "epoch": 4.5509383816761915,
      "grad_norm": 0.25423717498779297,
      "learning_rate": 9.040411694550056e-07,
      "loss": 0.0102,
      "step": 2780860
    },
    {
      "epoch": 4.550971112114844,
      "grad_norm": 0.0839305967092514,
      "learning_rate": 9.039752772414885e-07,
      "loss": 0.0088,
      "step": 2780880
    },
    {
      "epoch": 4.551003842553498,
      "grad_norm": 0.07040392607450485,
      "learning_rate": 9.039093850279713e-07,
      "loss": 0.0088,
      "step": 2780900
    },
    {
      "epoch": 4.551036572992151,
      "grad_norm": 0.5621727705001831,
      "learning_rate": 9.038434928144542e-07,
      "loss": 0.0088,
      "step": 2780920
    },
    {
      "epoch": 4.551069303430805,
      "grad_norm": 0.6291999816894531,
      "learning_rate": 9.03777600600937e-07,
      "loss": 0.0127,
      "step": 2780940
    },
    {
      "epoch": 4.551102033869458,
      "grad_norm": 0.10334242880344391,
      "learning_rate": 9.037117083874199e-07,
      "loss": 0.0059,
      "step": 2780960
    },
    {
      "epoch": 4.551134764308111,
      "grad_norm": 0.20793132483959198,
      "learning_rate": 9.036458161739028e-07,
      "loss": 0.0071,
      "step": 2780980
    },
    {
      "epoch": 4.551167494746765,
      "grad_norm": 0.7115402817726135,
      "learning_rate": 9.035799239603857e-07,
      "loss": 0.0093,
      "step": 2781000
    },
    {
      "epoch": 4.551200225185418,
      "grad_norm": 0.12410930544137955,
      "learning_rate": 9.035140317468685e-07,
      "loss": 0.0103,
      "step": 2781020
    },
    {
      "epoch": 4.551232955624071,
      "grad_norm": 0.13942286372184753,
      "learning_rate": 9.034481395333514e-07,
      "loss": 0.008,
      "step": 2781040
    },
    {
      "epoch": 4.551265686062725,
      "grad_norm": 0.12774810194969177,
      "learning_rate": 9.033822473198343e-07,
      "loss": 0.0089,
      "step": 2781060
    },
    {
      "epoch": 4.551298416501378,
      "grad_norm": 0.30060893297195435,
      "learning_rate": 9.033163551063172e-07,
      "loss": 0.0127,
      "step": 2781080
    },
    {
      "epoch": 4.551331146940031,
      "grad_norm": 0.43570148944854736,
      "learning_rate": 9.032504628928e-07,
      "loss": 0.0102,
      "step": 2781100
    },
    {
      "epoch": 4.551363877378685,
      "grad_norm": 0.8311673402786255,
      "learning_rate": 9.031845706792829e-07,
      "loss": 0.0118,
      "step": 2781120
    },
    {
      "epoch": 4.551396607817338,
      "grad_norm": 1.402959942817688,
      "learning_rate": 9.031186784657658e-07,
      "loss": 0.0138,
      "step": 2781140
    },
    {
      "epoch": 4.551429338255991,
      "grad_norm": 0.22675776481628418,
      "learning_rate": 9.030527862522487e-07,
      "loss": 0.0057,
      "step": 2781160
    },
    {
      "epoch": 4.5514620686946445,
      "grad_norm": 0.5407457947731018,
      "learning_rate": 9.029868940387315e-07,
      "loss": 0.0071,
      "step": 2781180
    },
    {
      "epoch": 4.551494799133298,
      "grad_norm": 0.11576667428016663,
      "learning_rate": 9.029210018252144e-07,
      "loss": 0.0067,
      "step": 2781200
    },
    {
      "epoch": 4.551527529571951,
      "grad_norm": 0.13220173120498657,
      "learning_rate": 9.028551096116972e-07,
      "loss": 0.0136,
      "step": 2781220
    },
    {
      "epoch": 4.551560260010604,
      "grad_norm": 0.2076866626739502,
      "learning_rate": 9.027892173981801e-07,
      "loss": 0.0122,
      "step": 2781240
    },
    {
      "epoch": 4.551592990449258,
      "grad_norm": 0.08823966234922409,
      "learning_rate": 9.02723325184663e-07,
      "loss": 0.011,
      "step": 2781260
    },
    {
      "epoch": 4.551625720887912,
      "grad_norm": 0.1475098729133606,
      "learning_rate": 9.026574329711458e-07,
      "loss": 0.0196,
      "step": 2781280
    },
    {
      "epoch": 4.551658451326564,
      "grad_norm": 0.3771020472049713,
      "learning_rate": 9.025915407576287e-07,
      "loss": 0.0086,
      "step": 2781300
    },
    {
      "epoch": 4.551691181765218,
      "grad_norm": 0.2002878487110138,
      "learning_rate": 9.025256485441116e-07,
      "loss": 0.0094,
      "step": 2781320
    },
    {
      "epoch": 4.5517239122038715,
      "grad_norm": 0.1683526188135147,
      "learning_rate": 9.024597563305945e-07,
      "loss": 0.0098,
      "step": 2781340
    },
    {
      "epoch": 4.551756642642525,
      "grad_norm": 0.2501786947250366,
      "learning_rate": 9.023938641170773e-07,
      "loss": 0.0078,
      "step": 2781360
    },
    {
      "epoch": 4.551789373081178,
      "grad_norm": 0.16251850128173828,
      "learning_rate": 9.023279719035602e-07,
      "loss": 0.009,
      "step": 2781380
    },
    {
      "epoch": 4.551822103519831,
      "grad_norm": 0.2514520585536957,
      "learning_rate": 9.02262079690043e-07,
      "loss": 0.0142,
      "step": 2781400
    },
    {
      "epoch": 4.551854833958485,
      "grad_norm": 0.3566577136516571,
      "learning_rate": 9.02196187476526e-07,
      "loss": 0.0084,
      "step": 2781420
    },
    {
      "epoch": 4.551887564397138,
      "grad_norm": 0.19590426981449127,
      "learning_rate": 9.021302952630088e-07,
      "loss": 0.0069,
      "step": 2781440
    },
    {
      "epoch": 4.551920294835791,
      "grad_norm": 0.32895827293395996,
      "learning_rate": 9.020644030494917e-07,
      "loss": 0.0095,
      "step": 2781460
    },
    {
      "epoch": 4.551953025274445,
      "grad_norm": 0.33342018723487854,
      "learning_rate": 9.019985108359745e-07,
      "loss": 0.0075,
      "step": 2781480
    },
    {
      "epoch": 4.551985755713098,
      "grad_norm": 0.18336771428585052,
      "learning_rate": 9.019326186224576e-07,
      "loss": 0.011,
      "step": 2781500
    },
    {
      "epoch": 4.552018486151751,
      "grad_norm": 0.40301790833473206,
      "learning_rate": 9.018667264089403e-07,
      "loss": 0.0124,
      "step": 2781520
    },
    {
      "epoch": 4.552051216590405,
      "grad_norm": 0.09871964901685715,
      "learning_rate": 9.018008341954231e-07,
      "loss": 0.0135,
      "step": 2781540
    },
    {
      "epoch": 4.552083947029058,
      "grad_norm": 0.16517898440361023,
      "learning_rate": 9.01734941981906e-07,
      "loss": 0.0073,
      "step": 2781560
    },
    {
      "epoch": 4.552116677467711,
      "grad_norm": 0.08202096074819565,
      "learning_rate": 9.01669049768389e-07,
      "loss": 0.0054,
      "step": 2781580
    },
    {
      "epoch": 4.552149407906365,
      "grad_norm": 0.10618589073419571,
      "learning_rate": 9.016031575548719e-07,
      "loss": 0.0052,
      "step": 2781600
    },
    {
      "epoch": 4.552182138345018,
      "grad_norm": 0.15479786694049835,
      "learning_rate": 9.015372653413546e-07,
      "loss": 0.0092,
      "step": 2781620
    },
    {
      "epoch": 4.552214868783672,
      "grad_norm": 0.2066870629787445,
      "learning_rate": 9.014713731278375e-07,
      "loss": 0.0069,
      "step": 2781640
    },
    {
      "epoch": 4.552247599222325,
      "grad_norm": 0.18100301921367645,
      "learning_rate": 9.014054809143203e-07,
      "loss": 0.0056,
      "step": 2781660
    },
    {
      "epoch": 4.552280329660978,
      "grad_norm": 0.24246786534786224,
      "learning_rate": 9.013395887008034e-07,
      "loss": 0.0123,
      "step": 2781680
    },
    {
      "epoch": 4.552313060099632,
      "grad_norm": 0.43149518966674805,
      "learning_rate": 9.012736964872862e-07,
      "loss": 0.0113,
      "step": 2781700
    },
    {
      "epoch": 4.5523457905382845,
      "grad_norm": 0.4219453036785126,
      "learning_rate": 9.012078042737691e-07,
      "loss": 0.0113,
      "step": 2781720
    },
    {
      "epoch": 4.552378520976938,
      "grad_norm": 0.26305091381073,
      "learning_rate": 9.011419120602518e-07,
      "loss": 0.0074,
      "step": 2781740
    },
    {
      "epoch": 4.552411251415592,
      "grad_norm": 0.32935503125190735,
      "learning_rate": 9.010760198467349e-07,
      "loss": 0.0095,
      "step": 2781760
    },
    {
      "epoch": 4.552443981854244,
      "grad_norm": 0.1259058266878128,
      "learning_rate": 9.010101276332177e-07,
      "loss": 0.0102,
      "step": 2781780
    },
    {
      "epoch": 4.552476712292898,
      "grad_norm": 0.5435097217559814,
      "learning_rate": 9.009442354197006e-07,
      "loss": 0.0139,
      "step": 2781800
    },
    {
      "epoch": 4.552509442731552,
      "grad_norm": 0.09498222172260284,
      "learning_rate": 9.008783432061834e-07,
      "loss": 0.01,
      "step": 2781820
    },
    {
      "epoch": 4.552542173170205,
      "grad_norm": 0.3265797793865204,
      "learning_rate": 9.008124509926662e-07,
      "loss": 0.012,
      "step": 2781840
    },
    {
      "epoch": 4.552574903608858,
      "grad_norm": 0.1330137997865677,
      "learning_rate": 9.007465587791492e-07,
      "loss": 0.0125,
      "step": 2781860
    },
    {
      "epoch": 4.5526076340475115,
      "grad_norm": 0.3367719054222107,
      "learning_rate": 9.006806665656321e-07,
      "loss": 0.0093,
      "step": 2781880
    },
    {
      "epoch": 4.552640364486165,
      "grad_norm": 0.14908337593078613,
      "learning_rate": 9.006147743521149e-07,
      "loss": 0.0065,
      "step": 2781900
    },
    {
      "epoch": 4.552673094924819,
      "grad_norm": 0.21982397139072418,
      "learning_rate": 9.005488821385977e-07,
      "loss": 0.0115,
      "step": 2781920
    },
    {
      "epoch": 4.552705825363471,
      "grad_norm": 0.2407592236995697,
      "learning_rate": 9.004829899250807e-07,
      "loss": 0.0065,
      "step": 2781940
    },
    {
      "epoch": 4.552738555802125,
      "grad_norm": 0.09939315915107727,
      "learning_rate": 9.004170977115635e-07,
      "loss": 0.0057,
      "step": 2781960
    },
    {
      "epoch": 4.5527712862407785,
      "grad_norm": 0.1777951866388321,
      "learning_rate": 9.003512054980464e-07,
      "loss": 0.0108,
      "step": 2781980
    },
    {
      "epoch": 4.552804016679431,
      "grad_norm": 0.13638922572135925,
      "learning_rate": 9.002853132845292e-07,
      "loss": 0.0077,
      "step": 2782000
    },
    {
      "epoch": 4.552836747118085,
      "grad_norm": 0.13353990018367767,
      "learning_rate": 9.002194210710122e-07,
      "loss": 0.0103,
      "step": 2782020
    },
    {
      "epoch": 4.552869477556738,
      "grad_norm": 0.2807942032814026,
      "learning_rate": 9.00153528857495e-07,
      "loss": 0.0061,
      "step": 2782040
    },
    {
      "epoch": 4.552902207995391,
      "grad_norm": 0.1311851441860199,
      "learning_rate": 9.000876366439779e-07,
      "loss": 0.0102,
      "step": 2782060
    },
    {
      "epoch": 4.552934938434045,
      "grad_norm": 0.22793221473693848,
      "learning_rate": 9.000217444304607e-07,
      "loss": 0.0081,
      "step": 2782080
    },
    {
      "epoch": 4.552967668872698,
      "grad_norm": 0.3514380156993866,
      "learning_rate": 8.999558522169436e-07,
      "loss": 0.0074,
      "step": 2782100
    },
    {
      "epoch": 4.553000399311352,
      "grad_norm": 0.3359951674938202,
      "learning_rate": 8.998899600034265e-07,
      "loss": 0.0105,
      "step": 2782120
    },
    {
      "epoch": 4.553033129750005,
      "grad_norm": 0.09308863431215286,
      "learning_rate": 8.998240677899094e-07,
      "loss": 0.0068,
      "step": 2782140
    },
    {
      "epoch": 4.553065860188658,
      "grad_norm": 0.08125334978103638,
      "learning_rate": 8.997581755763922e-07,
      "loss": 0.0062,
      "step": 2782160
    },
    {
      "epoch": 4.553098590627312,
      "grad_norm": 0.25158634781837463,
      "learning_rate": 8.996922833628751e-07,
      "loss": 0.0115,
      "step": 2782180
    },
    {
      "epoch": 4.553131321065965,
      "grad_norm": 0.3517726957798004,
      "learning_rate": 8.99626391149358e-07,
      "loss": 0.0065,
      "step": 2782200
    },
    {
      "epoch": 4.553164051504618,
      "grad_norm": 0.12925007939338684,
      "learning_rate": 8.995604989358409e-07,
      "loss": 0.0081,
      "step": 2782220
    },
    {
      "epoch": 4.553196781943272,
      "grad_norm": 0.29501473903656006,
      "learning_rate": 8.994946067223237e-07,
      "loss": 0.0056,
      "step": 2782240
    },
    {
      "epoch": 4.553229512381925,
      "grad_norm": 0.1936822235584259,
      "learning_rate": 8.994287145088065e-07,
      "loss": 0.0075,
      "step": 2782260
    },
    {
      "epoch": 4.553262242820578,
      "grad_norm": 0.8648727536201477,
      "learning_rate": 8.993628222952894e-07,
      "loss": 0.0088,
      "step": 2782280
    },
    {
      "epoch": 4.553294973259232,
      "grad_norm": 0.16079619526863098,
      "learning_rate": 8.992969300817723e-07,
      "loss": 0.007,
      "step": 2782300
    },
    {
      "epoch": 4.553327703697885,
      "grad_norm": 0.24017469584941864,
      "learning_rate": 8.992310378682552e-07,
      "loss": 0.0126,
      "step": 2782320
    },
    {
      "epoch": 4.553360434136538,
      "grad_norm": 0.17605146765708923,
      "learning_rate": 8.99165145654738e-07,
      "loss": 0.01,
      "step": 2782340
    },
    {
      "epoch": 4.5533931645751915,
      "grad_norm": 0.0952436625957489,
      "learning_rate": 8.990992534412209e-07,
      "loss": 0.0113,
      "step": 2782360
    },
    {
      "epoch": 4.553425895013845,
      "grad_norm": 0.23227114975452423,
      "learning_rate": 8.990333612277038e-07,
      "loss": 0.009,
      "step": 2782380
    },
    {
      "epoch": 4.553458625452499,
      "grad_norm": 0.2741394340991974,
      "learning_rate": 8.989674690141867e-07,
      "loss": 0.0074,
      "step": 2782400
    },
    {
      "epoch": 4.553491355891151,
      "grad_norm": 0.15255631506443024,
      "learning_rate": 8.989015768006695e-07,
      "loss": 0.0089,
      "step": 2782420
    },
    {
      "epoch": 4.553524086329805,
      "grad_norm": 0.3864966332912445,
      "learning_rate": 8.988356845871524e-07,
      "loss": 0.0154,
      "step": 2782440
    },
    {
      "epoch": 4.553556816768459,
      "grad_norm": 0.6333925127983093,
      "learning_rate": 8.987697923736353e-07,
      "loss": 0.0093,
      "step": 2782460
    },
    {
      "epoch": 4.553589547207111,
      "grad_norm": 0.14392302930355072,
      "learning_rate": 8.987039001601182e-07,
      "loss": 0.006,
      "step": 2782480
    },
    {
      "epoch": 4.553622277645765,
      "grad_norm": 0.5605431795120239,
      "learning_rate": 8.98638007946601e-07,
      "loss": 0.0069,
      "step": 2782500
    },
    {
      "epoch": 4.5536550080844185,
      "grad_norm": 0.6825703978538513,
      "learning_rate": 8.985721157330839e-07,
      "loss": 0.0094,
      "step": 2782520
    },
    {
      "epoch": 4.553687738523072,
      "grad_norm": 0.13294540345668793,
      "learning_rate": 8.985062235195667e-07,
      "loss": 0.0076,
      "step": 2782540
    },
    {
      "epoch": 4.553720468961725,
      "grad_norm": 0.2257111668586731,
      "learning_rate": 8.984403313060497e-07,
      "loss": 0.0159,
      "step": 2782560
    },
    {
      "epoch": 4.553753199400378,
      "grad_norm": 0.5959573984146118,
      "learning_rate": 8.983744390925325e-07,
      "loss": 0.0103,
      "step": 2782580
    },
    {
      "epoch": 4.553785929839032,
      "grad_norm": 0.18874900043010712,
      "learning_rate": 8.983085468790153e-07,
      "loss": 0.0071,
      "step": 2782600
    },
    {
      "epoch": 4.553818660277685,
      "grad_norm": 0.16134510934352875,
      "learning_rate": 8.982426546654982e-07,
      "loss": 0.0087,
      "step": 2782620
    },
    {
      "epoch": 4.553851390716338,
      "grad_norm": 0.11682307720184326,
      "learning_rate": 8.981767624519811e-07,
      "loss": 0.0139,
      "step": 2782640
    },
    {
      "epoch": 4.553884121154992,
      "grad_norm": 0.6782570481300354,
      "learning_rate": 8.98110870238464e-07,
      "loss": 0.0102,
      "step": 2782660
    },
    {
      "epoch": 4.553916851593645,
      "grad_norm": 0.3210829198360443,
      "learning_rate": 8.980449780249468e-07,
      "loss": 0.0103,
      "step": 2782680
    },
    {
      "epoch": 4.553949582032298,
      "grad_norm": 0.1611684113740921,
      "learning_rate": 8.979790858114297e-07,
      "loss": 0.0108,
      "step": 2782700
    },
    {
      "epoch": 4.553982312470952,
      "grad_norm": 0.07282435894012451,
      "learning_rate": 8.979131935979125e-07,
      "loss": 0.007,
      "step": 2782720
    },
    {
      "epoch": 4.554015042909605,
      "grad_norm": 0.27458104491233826,
      "learning_rate": 8.978473013843955e-07,
      "loss": 0.0079,
      "step": 2782740
    },
    {
      "epoch": 4.554047773348258,
      "grad_norm": 0.4482520818710327,
      "learning_rate": 8.977814091708783e-07,
      "loss": 0.0129,
      "step": 2782760
    },
    {
      "epoch": 4.554080503786912,
      "grad_norm": 0.3330483138561249,
      "learning_rate": 8.977155169573612e-07,
      "loss": 0.0066,
      "step": 2782780
    },
    {
      "epoch": 4.554113234225565,
      "grad_norm": 0.346190869808197,
      "learning_rate": 8.97649624743844e-07,
      "loss": 0.0091,
      "step": 2782800
    },
    {
      "epoch": 4.554145964664219,
      "grad_norm": 0.2433764785528183,
      "learning_rate": 8.975837325303271e-07,
      "loss": 0.0106,
      "step": 2782820
    },
    {
      "epoch": 4.5541786951028715,
      "grad_norm": 0.08296769112348557,
      "learning_rate": 8.975178403168098e-07,
      "loss": 0.0105,
      "step": 2782840
    },
    {
      "epoch": 4.554211425541525,
      "grad_norm": 0.1847536861896515,
      "learning_rate": 8.974519481032927e-07,
      "loss": 0.0068,
      "step": 2782860
    },
    {
      "epoch": 4.554244155980179,
      "grad_norm": 0.24923445284366608,
      "learning_rate": 8.973860558897755e-07,
      "loss": 0.0089,
      "step": 2782880
    },
    {
      "epoch": 4.554276886418831,
      "grad_norm": 0.5258945822715759,
      "learning_rate": 8.973201636762586e-07,
      "loss": 0.0088,
      "step": 2782900
    },
    {
      "epoch": 4.554309616857485,
      "grad_norm": 0.11150237917900085,
      "learning_rate": 8.972542714627414e-07,
      "loss": 0.0089,
      "step": 2782920
    },
    {
      "epoch": 4.554342347296139,
      "grad_norm": 0.12773877382278442,
      "learning_rate": 8.971883792492241e-07,
      "loss": 0.0116,
      "step": 2782940
    },
    {
      "epoch": 4.554375077734791,
      "grad_norm": 0.20261916518211365,
      "learning_rate": 8.97122487035707e-07,
      "loss": 0.0069,
      "step": 2782960
    },
    {
      "epoch": 4.554407808173445,
      "grad_norm": 0.049822233617305756,
      "learning_rate": 8.970565948221898e-07,
      "loss": 0.0092,
      "step": 2782980
    },
    {
      "epoch": 4.5544405386120985,
      "grad_norm": 0.1856275200843811,
      "learning_rate": 8.969907026086729e-07,
      "loss": 0.006,
      "step": 2783000
    },
    {
      "epoch": 4.554473269050752,
      "grad_norm": 0.6404744386672974,
      "learning_rate": 8.969248103951557e-07,
      "loss": 0.0096,
      "step": 2783020
    },
    {
      "epoch": 4.554505999489405,
      "grad_norm": 0.3803772032260895,
      "learning_rate": 8.968589181816386e-07,
      "loss": 0.0132,
      "step": 2783040
    },
    {
      "epoch": 4.554538729928058,
      "grad_norm": 0.3708445131778717,
      "learning_rate": 8.967930259681213e-07,
      "loss": 0.0103,
      "step": 2783060
    },
    {
      "epoch": 4.554571460366712,
      "grad_norm": 0.2736089527606964,
      "learning_rate": 8.967271337546044e-07,
      "loss": 0.0114,
      "step": 2783080
    },
    {
      "epoch": 4.554604190805366,
      "grad_norm": 0.6254018545150757,
      "learning_rate": 8.966612415410872e-07,
      "loss": 0.0074,
      "step": 2783100
    },
    {
      "epoch": 4.554636921244018,
      "grad_norm": 0.057524606585502625,
      "learning_rate": 8.965953493275701e-07,
      "loss": 0.0086,
      "step": 2783120
    },
    {
      "epoch": 4.554669651682672,
      "grad_norm": 0.24516992270946503,
      "learning_rate": 8.965294571140529e-07,
      "loss": 0.0112,
      "step": 2783140
    },
    {
      "epoch": 4.5547023821213255,
      "grad_norm": 0.04143839329481125,
      "learning_rate": 8.964635649005357e-07,
      "loss": 0.0065,
      "step": 2783160
    },
    {
      "epoch": 4.554735112559978,
      "grad_norm": 0.28443217277526855,
      "learning_rate": 8.963976726870187e-07,
      "loss": 0.0117,
      "step": 2783180
    },
    {
      "epoch": 4.554767842998632,
      "grad_norm": 0.2548203766345978,
      "learning_rate": 8.963317804735016e-07,
      "loss": 0.0097,
      "step": 2783200
    },
    {
      "epoch": 4.554800573437285,
      "grad_norm": 0.10286926478147507,
      "learning_rate": 8.962658882599844e-07,
      "loss": 0.0085,
      "step": 2783220
    },
    {
      "epoch": 4.554833303875938,
      "grad_norm": 0.4703271985054016,
      "learning_rate": 8.961999960464673e-07,
      "loss": 0.01,
      "step": 2783240
    },
    {
      "epoch": 4.554866034314592,
      "grad_norm": 0.12946344912052155,
      "learning_rate": 8.961341038329502e-07,
      "loss": 0.0102,
      "step": 2783260
    },
    {
      "epoch": 4.554898764753245,
      "grad_norm": 0.5492000579833984,
      "learning_rate": 8.96068211619433e-07,
      "loss": 0.0082,
      "step": 2783280
    },
    {
      "epoch": 4.554931495191899,
      "grad_norm": 0.20654484629631042,
      "learning_rate": 8.960023194059159e-07,
      "loss": 0.0071,
      "step": 2783300
    },
    {
      "epoch": 4.554964225630552,
      "grad_norm": 0.08163054287433624,
      "learning_rate": 8.959364271923987e-07,
      "loss": 0.0064,
      "step": 2783320
    },
    {
      "epoch": 4.554996956069205,
      "grad_norm": 0.04450220614671707,
      "learning_rate": 8.958705349788817e-07,
      "loss": 0.0071,
      "step": 2783340
    },
    {
      "epoch": 4.555029686507859,
      "grad_norm": 0.1007198840379715,
      "learning_rate": 8.958046427653645e-07,
      "loss": 0.0116,
      "step": 2783360
    },
    {
      "epoch": 4.555062416946512,
      "grad_norm": 0.45322567224502563,
      "learning_rate": 8.957387505518474e-07,
      "loss": 0.0081,
      "step": 2783380
    },
    {
      "epoch": 4.555095147385165,
      "grad_norm": 0.1384464055299759,
      "learning_rate": 8.956728583383302e-07,
      "loss": 0.0075,
      "step": 2783400
    },
    {
      "epoch": 4.555127877823819,
      "grad_norm": 0.134374737739563,
      "learning_rate": 8.956069661248131e-07,
      "loss": 0.0105,
      "step": 2783420
    },
    {
      "epoch": 4.555160608262472,
      "grad_norm": 0.19979000091552734,
      "learning_rate": 8.95541073911296e-07,
      "loss": 0.0127,
      "step": 2783440
    },
    {
      "epoch": 4.555193338701125,
      "grad_norm": 0.35366684198379517,
      "learning_rate": 8.954751816977789e-07,
      "loss": 0.0101,
      "step": 2783460
    },
    {
      "epoch": 4.555226069139779,
      "grad_norm": 0.07362179458141327,
      "learning_rate": 8.954092894842617e-07,
      "loss": 0.0069,
      "step": 2783480
    },
    {
      "epoch": 4.555258799578432,
      "grad_norm": 0.09202504903078079,
      "learning_rate": 8.953433972707446e-07,
      "loss": 0.0098,
      "step": 2783500
    },
    {
      "epoch": 4.555291530017085,
      "grad_norm": 0.11351165175437927,
      "learning_rate": 8.952775050572275e-07,
      "loss": 0.0076,
      "step": 2783520
    },
    {
      "epoch": 4.5553242604557385,
      "grad_norm": 0.14997394382953644,
      "learning_rate": 8.952116128437104e-07,
      "loss": 0.008,
      "step": 2783540
    },
    {
      "epoch": 4.555356990894392,
      "grad_norm": 0.07918696105480194,
      "learning_rate": 8.951457206301932e-07,
      "loss": 0.0073,
      "step": 2783560
    },
    {
      "epoch": 4.555389721333046,
      "grad_norm": 0.10625166445970535,
      "learning_rate": 8.95079828416676e-07,
      "loss": 0.0075,
      "step": 2783580
    },
    {
      "epoch": 4.555422451771698,
      "grad_norm": 0.10315070301294327,
      "learning_rate": 8.950139362031589e-07,
      "loss": 0.0063,
      "step": 2783600
    },
    {
      "epoch": 4.555455182210352,
      "grad_norm": 0.13812203705310822,
      "learning_rate": 8.949480439896418e-07,
      "loss": 0.0142,
      "step": 2783620
    },
    {
      "epoch": 4.5554879126490055,
      "grad_norm": 0.16711771488189697,
      "learning_rate": 8.948821517761247e-07,
      "loss": 0.0098,
      "step": 2783640
    },
    {
      "epoch": 4.555520643087659,
      "grad_norm": 0.2551621198654175,
      "learning_rate": 8.948162595626075e-07,
      "loss": 0.01,
      "step": 2783660
    },
    {
      "epoch": 4.555553373526312,
      "grad_norm": 0.1345612108707428,
      "learning_rate": 8.947503673490904e-07,
      "loss": 0.0098,
      "step": 2783680
    },
    {
      "epoch": 4.555586103964965,
      "grad_norm": 0.14073406159877777,
      "learning_rate": 8.946844751355733e-07,
      "loss": 0.0108,
      "step": 2783700
    },
    {
      "epoch": 4.555618834403619,
      "grad_norm": 0.6061204075813293,
      "learning_rate": 8.946185829220562e-07,
      "loss": 0.0111,
      "step": 2783720
    },
    {
      "epoch": 4.555651564842272,
      "grad_norm": 0.11677836626768112,
      "learning_rate": 8.94552690708539e-07,
      "loss": 0.0102,
      "step": 2783740
    },
    {
      "epoch": 4.555684295280925,
      "grad_norm": 0.07215578109025955,
      "learning_rate": 8.944867984950219e-07,
      "loss": 0.0083,
      "step": 2783760
    },
    {
      "epoch": 4.555717025719579,
      "grad_norm": 0.34331050515174866,
      "learning_rate": 8.944209062815048e-07,
      "loss": 0.0065,
      "step": 2783780
    },
    {
      "epoch": 4.555749756158232,
      "grad_norm": 0.25936582684516907,
      "learning_rate": 8.943550140679877e-07,
      "loss": 0.0134,
      "step": 2783800
    },
    {
      "epoch": 4.555782486596885,
      "grad_norm": 0.3222157657146454,
      "learning_rate": 8.942891218544705e-07,
      "loss": 0.0067,
      "step": 2783820
    },
    {
      "epoch": 4.555815217035539,
      "grad_norm": 0.37096506357192993,
      "learning_rate": 8.942232296409534e-07,
      "loss": 0.0097,
      "step": 2783840
    },
    {
      "epoch": 4.555847947474192,
      "grad_norm": 0.09289103001356125,
      "learning_rate": 8.941573374274362e-07,
      "loss": 0.0133,
      "step": 2783860
    },
    {
      "epoch": 4.555880677912845,
      "grad_norm": 0.1240987777709961,
      "learning_rate": 8.940914452139192e-07,
      "loss": 0.0103,
      "step": 2783880
    },
    {
      "epoch": 4.555913408351499,
      "grad_norm": 0.1948278248310089,
      "learning_rate": 8.94025553000402e-07,
      "loss": 0.0073,
      "step": 2783900
    },
    {
      "epoch": 4.555946138790152,
      "grad_norm": 0.47158166766166687,
      "learning_rate": 8.939596607868848e-07,
      "loss": 0.0083,
      "step": 2783920
    },
    {
      "epoch": 4.555978869228805,
      "grad_norm": 0.2581845223903656,
      "learning_rate": 8.938937685733677e-07,
      "loss": 0.009,
      "step": 2783940
    },
    {
      "epoch": 4.556011599667459,
      "grad_norm": 0.18444299697875977,
      "learning_rate": 8.938278763598506e-07,
      "loss": 0.0056,
      "step": 2783960
    },
    {
      "epoch": 4.556044330106112,
      "grad_norm": 0.3169567584991455,
      "learning_rate": 8.937619841463335e-07,
      "loss": 0.0083,
      "step": 2783980
    },
    {
      "epoch": 4.556077060544766,
      "grad_norm": 0.09795153141021729,
      "learning_rate": 8.936960919328163e-07,
      "loss": 0.0108,
      "step": 2784000
    },
    {
      "epoch": 4.5561097909834185,
      "grad_norm": 0.1598959118127823,
      "learning_rate": 8.936301997192992e-07,
      "loss": 0.0061,
      "step": 2784020
    },
    {
      "epoch": 4.556142521422072,
      "grad_norm": 0.18468767404556274,
      "learning_rate": 8.93564307505782e-07,
      "loss": 0.0121,
      "step": 2784040
    },
    {
      "epoch": 4.556175251860726,
      "grad_norm": 0.17010651528835297,
      "learning_rate": 8.93498415292265e-07,
      "loss": 0.0092,
      "step": 2784060
    },
    {
      "epoch": 4.556207982299378,
      "grad_norm": 0.12558649480342865,
      "learning_rate": 8.934325230787478e-07,
      "loss": 0.0071,
      "step": 2784080
    },
    {
      "epoch": 4.556240712738032,
      "grad_norm": 0.0908375233411789,
      "learning_rate": 8.933666308652307e-07,
      "loss": 0.0133,
      "step": 2784100
    },
    {
      "epoch": 4.556273443176686,
      "grad_norm": 0.22910362482070923,
      "learning_rate": 8.933007386517135e-07,
      "loss": 0.0065,
      "step": 2784120
    },
    {
      "epoch": 4.556306173615338,
      "grad_norm": 0.0692303329706192,
      "learning_rate": 8.932348464381966e-07,
      "loss": 0.0066,
      "step": 2784140
    },
    {
      "epoch": 4.556338904053992,
      "grad_norm": 0.39632299542427063,
      "learning_rate": 8.931689542246793e-07,
      "loss": 0.0067,
      "step": 2784160
    },
    {
      "epoch": 4.5563716344926455,
      "grad_norm": 0.15815383195877075,
      "learning_rate": 8.931030620111622e-07,
      "loss": 0.01,
      "step": 2784180
    },
    {
      "epoch": 4.556404364931299,
      "grad_norm": 0.5903995633125305,
      "learning_rate": 8.93037169797645e-07,
      "loss": 0.0065,
      "step": 2784200
    },
    {
      "epoch": 4.556437095369952,
      "grad_norm": 0.28439027070999146,
      "learning_rate": 8.929712775841281e-07,
      "loss": 0.0056,
      "step": 2784220
    },
    {
      "epoch": 4.556469825808605,
      "grad_norm": 0.327251136302948,
      "learning_rate": 8.929053853706109e-07,
      "loss": 0.0104,
      "step": 2784240
    },
    {
      "epoch": 4.556502556247259,
      "grad_norm": 0.15325582027435303,
      "learning_rate": 8.928394931570936e-07,
      "loss": 0.0072,
      "step": 2784260
    },
    {
      "epoch": 4.556535286685913,
      "grad_norm": 0.2416306585073471,
      "learning_rate": 8.927736009435765e-07,
      "loss": 0.0092,
      "step": 2784280
    },
    {
      "epoch": 4.556568017124565,
      "grad_norm": 0.24618692696094513,
      "learning_rate": 8.927077087300593e-07,
      "loss": 0.0098,
      "step": 2784300
    },
    {
      "epoch": 4.556600747563219,
      "grad_norm": 0.08838625252246857,
      "learning_rate": 8.926418165165424e-07,
      "loss": 0.0057,
      "step": 2784320
    },
    {
      "epoch": 4.5566334780018725,
      "grad_norm": 0.21028432250022888,
      "learning_rate": 8.925759243030252e-07,
      "loss": 0.0084,
      "step": 2784340
    },
    {
      "epoch": 4.556666208440525,
      "grad_norm": 0.21807335317134857,
      "learning_rate": 8.925100320895081e-07,
      "loss": 0.0099,
      "step": 2784360
    },
    {
      "epoch": 4.556698938879179,
      "grad_norm": 0.13564686477184296,
      "learning_rate": 8.924441398759908e-07,
      "loss": 0.0099,
      "step": 2784380
    },
    {
      "epoch": 4.556731669317832,
      "grad_norm": 0.07574596256017685,
      "learning_rate": 8.923782476624739e-07,
      "loss": 0.0053,
      "step": 2784400
    },
    {
      "epoch": 4.556764399756485,
      "grad_norm": 0.36705297231674194,
      "learning_rate": 8.923123554489567e-07,
      "loss": 0.009,
      "step": 2784420
    },
    {
      "epoch": 4.556797130195139,
      "grad_norm": 0.2076410949230194,
      "learning_rate": 8.922464632354396e-07,
      "loss": 0.0091,
      "step": 2784440
    },
    {
      "epoch": 4.556829860633792,
      "grad_norm": 0.08222568035125732,
      "learning_rate": 8.921805710219224e-07,
      "loss": 0.0093,
      "step": 2784460
    },
    {
      "epoch": 4.556862591072446,
      "grad_norm": 0.31510141491889954,
      "learning_rate": 8.921146788084053e-07,
      "loss": 0.0128,
      "step": 2784480
    },
    {
      "epoch": 4.5568953215110986,
      "grad_norm": 0.23158961534500122,
      "learning_rate": 8.920487865948882e-07,
      "loss": 0.0081,
      "step": 2784500
    },
    {
      "epoch": 4.556928051949752,
      "grad_norm": 0.13091647624969482,
      "learning_rate": 8.919828943813711e-07,
      "loss": 0.0091,
      "step": 2784520
    },
    {
      "epoch": 4.556960782388406,
      "grad_norm": 0.2971668243408203,
      "learning_rate": 8.919170021678539e-07,
      "loss": 0.0095,
      "step": 2784540
    },
    {
      "epoch": 4.556993512827059,
      "grad_norm": 0.2881043255329132,
      "learning_rate": 8.918511099543368e-07,
      "loss": 0.009,
      "step": 2784560
    },
    {
      "epoch": 4.557026243265712,
      "grad_norm": 0.1321939378976822,
      "learning_rate": 8.917852177408197e-07,
      "loss": 0.0108,
      "step": 2784580
    },
    {
      "epoch": 4.557058973704366,
      "grad_norm": 0.09201191365718842,
      "learning_rate": 8.917193255273026e-07,
      "loss": 0.0114,
      "step": 2784600
    },
    {
      "epoch": 4.557091704143019,
      "grad_norm": 0.18644998967647552,
      "learning_rate": 8.916534333137854e-07,
      "loss": 0.0099,
      "step": 2784620
    },
    {
      "epoch": 4.557124434581672,
      "grad_norm": 0.08299210667610168,
      "learning_rate": 8.915875411002682e-07,
      "loss": 0.0074,
      "step": 2784640
    },
    {
      "epoch": 4.5571571650203255,
      "grad_norm": 0.1833534985780716,
      "learning_rate": 8.915216488867512e-07,
      "loss": 0.0095,
      "step": 2784660
    },
    {
      "epoch": 4.557189895458979,
      "grad_norm": 0.1463867574930191,
      "learning_rate": 8.91455756673234e-07,
      "loss": 0.0073,
      "step": 2784680
    },
    {
      "epoch": 4.557222625897632,
      "grad_norm": 0.2970832288265228,
      "learning_rate": 8.913898644597169e-07,
      "loss": 0.0104,
      "step": 2784700
    },
    {
      "epoch": 4.557255356336285,
      "grad_norm": 0.19705238938331604,
      "learning_rate": 8.913239722461997e-07,
      "loss": 0.0104,
      "step": 2784720
    },
    {
      "epoch": 4.557288086774939,
      "grad_norm": 0.24384556710720062,
      "learning_rate": 8.912580800326826e-07,
      "loss": 0.0094,
      "step": 2784740
    },
    {
      "epoch": 4.557320817213593,
      "grad_norm": 0.10411256551742554,
      "learning_rate": 8.911921878191655e-07,
      "loss": 0.0075,
      "step": 2784760
    },
    {
      "epoch": 4.557353547652245,
      "grad_norm": 0.5308232307434082,
      "learning_rate": 8.911262956056484e-07,
      "loss": 0.0092,
      "step": 2784780
    },
    {
      "epoch": 4.557386278090899,
      "grad_norm": 0.10117635130882263,
      "learning_rate": 8.910604033921312e-07,
      "loss": 0.0059,
      "step": 2784800
    },
    {
      "epoch": 4.5574190085295525,
      "grad_norm": 0.13593581318855286,
      "learning_rate": 8.909945111786141e-07,
      "loss": 0.0075,
      "step": 2784820
    },
    {
      "epoch": 4.557451738968206,
      "grad_norm": 0.14580464363098145,
      "learning_rate": 8.90928618965097e-07,
      "loss": 0.0109,
      "step": 2784840
    },
    {
      "epoch": 4.557484469406859,
      "grad_norm": 0.3594348430633545,
      "learning_rate": 8.908627267515799e-07,
      "loss": 0.0103,
      "step": 2784860
    },
    {
      "epoch": 4.557517199845512,
      "grad_norm": 0.3225221633911133,
      "learning_rate": 8.907968345380627e-07,
      "loss": 0.0075,
      "step": 2784880
    },
    {
      "epoch": 4.557549930284166,
      "grad_norm": 0.15690822899341583,
      "learning_rate": 8.907309423245456e-07,
      "loss": 0.0072,
      "step": 2784900
    },
    {
      "epoch": 4.557582660722819,
      "grad_norm": 0.2628081738948822,
      "learning_rate": 8.906650501110284e-07,
      "loss": 0.0098,
      "step": 2784920
    },
    {
      "epoch": 4.557615391161472,
      "grad_norm": 0.23965828120708466,
      "learning_rate": 8.905991578975114e-07,
      "loss": 0.0123,
      "step": 2784940
    },
    {
      "epoch": 4.557648121600126,
      "grad_norm": 0.3617445230484009,
      "learning_rate": 8.905332656839942e-07,
      "loss": 0.0098,
      "step": 2784960
    },
    {
      "epoch": 4.557680852038779,
      "grad_norm": 0.19514228403568268,
      "learning_rate": 8.90467373470477e-07,
      "loss": 0.0065,
      "step": 2784980
    },
    {
      "epoch": 4.557713582477432,
      "grad_norm": 0.12202239781618118,
      "learning_rate": 8.904014812569599e-07,
      "loss": 0.0087,
      "step": 2785000
    },
    {
      "epoch": 4.557746312916086,
      "grad_norm": 0.27582553029060364,
      "learning_rate": 8.903355890434428e-07,
      "loss": 0.0096,
      "step": 2785020
    },
    {
      "epoch": 4.557779043354739,
      "grad_norm": 0.44007712602615356,
      "learning_rate": 8.902696968299257e-07,
      "loss": 0.0105,
      "step": 2785040
    },
    {
      "epoch": 4.557811773793392,
      "grad_norm": 0.22474467754364014,
      "learning_rate": 8.902038046164085e-07,
      "loss": 0.0089,
      "step": 2785060
    },
    {
      "epoch": 4.557844504232046,
      "grad_norm": 0.32383599877357483,
      "learning_rate": 8.901379124028914e-07,
      "loss": 0.008,
      "step": 2785080
    },
    {
      "epoch": 4.557877234670699,
      "grad_norm": 0.39408570528030396,
      "learning_rate": 8.900720201893743e-07,
      "loss": 0.009,
      "step": 2785100
    },
    {
      "epoch": 4.557909965109353,
      "grad_norm": 0.17310047149658203,
      "learning_rate": 8.900061279758572e-07,
      "loss": 0.0102,
      "step": 2785120
    },
    {
      "epoch": 4.557942695548006,
      "grad_norm": 0.3355605900287628,
      "learning_rate": 8.8994023576234e-07,
      "loss": 0.0059,
      "step": 2785140
    },
    {
      "epoch": 4.557975425986659,
      "grad_norm": 0.29695194959640503,
      "learning_rate": 8.898743435488229e-07,
      "loss": 0.0079,
      "step": 2785160
    },
    {
      "epoch": 4.558008156425313,
      "grad_norm": 0.23282790184020996,
      "learning_rate": 8.898084513353057e-07,
      "loss": 0.0109,
      "step": 2785180
    },
    {
      "epoch": 4.5580408868639655,
      "grad_norm": 0.1459609419107437,
      "learning_rate": 8.897425591217887e-07,
      "loss": 0.0127,
      "step": 2785200
    },
    {
      "epoch": 4.558073617302619,
      "grad_norm": 0.20712630450725555,
      "learning_rate": 8.896766669082715e-07,
      "loss": 0.0087,
      "step": 2785220
    },
    {
      "epoch": 4.558106347741273,
      "grad_norm": 0.16196347773075104,
      "learning_rate": 8.896107746947544e-07,
      "loss": 0.0079,
      "step": 2785240
    },
    {
      "epoch": 4.558139078179925,
      "grad_norm": 0.2726144790649414,
      "learning_rate": 8.895448824812372e-07,
      "loss": 0.0069,
      "step": 2785260
    },
    {
      "epoch": 4.558171808618579,
      "grad_norm": 1.0086897611618042,
      "learning_rate": 8.894789902677202e-07,
      "loss": 0.0081,
      "step": 2785280
    },
    {
      "epoch": 4.5582045390572326,
      "grad_norm": 0.23419544100761414,
      "learning_rate": 8.89413098054203e-07,
      "loss": 0.01,
      "step": 2785300
    },
    {
      "epoch": 4.558237269495886,
      "grad_norm": 0.27838659286499023,
      "learning_rate": 8.893472058406858e-07,
      "loss": 0.0126,
      "step": 2785320
    },
    {
      "epoch": 4.558269999934539,
      "grad_norm": 0.44853445887565613,
      "learning_rate": 8.892813136271687e-07,
      "loss": 0.0134,
      "step": 2785340
    },
    {
      "epoch": 4.5583027303731924,
      "grad_norm": 0.3794483244419098,
      "learning_rate": 8.892154214136515e-07,
      "loss": 0.0097,
      "step": 2785360
    },
    {
      "epoch": 4.558335460811846,
      "grad_norm": 0.3451662063598633,
      "learning_rate": 8.891495292001345e-07,
      "loss": 0.0066,
      "step": 2785380
    },
    {
      "epoch": 4.5583681912505,
      "grad_norm": 0.35832977294921875,
      "learning_rate": 8.890836369866173e-07,
      "loss": 0.0069,
      "step": 2785400
    },
    {
      "epoch": 4.558400921689152,
      "grad_norm": 0.1352517306804657,
      "learning_rate": 8.890177447731002e-07,
      "loss": 0.0153,
      "step": 2785420
    },
    {
      "epoch": 4.558433652127806,
      "grad_norm": 0.1325768381357193,
      "learning_rate": 8.88951852559583e-07,
      "loss": 0.0094,
      "step": 2785440
    },
    {
      "epoch": 4.5584663825664595,
      "grad_norm": 0.2921397387981415,
      "learning_rate": 8.888859603460661e-07,
      "loss": 0.0122,
      "step": 2785460
    },
    {
      "epoch": 4.558499113005112,
      "grad_norm": 0.18996086716651917,
      "learning_rate": 8.888200681325488e-07,
      "loss": 0.0083,
      "step": 2785480
    },
    {
      "epoch": 4.558531843443766,
      "grad_norm": 0.21687600016593933,
      "learning_rate": 8.887541759190317e-07,
      "loss": 0.007,
      "step": 2785500
    },
    {
      "epoch": 4.558564573882419,
      "grad_norm": 0.5501542091369629,
      "learning_rate": 8.886882837055145e-07,
      "loss": 0.0173,
      "step": 2785520
    },
    {
      "epoch": 4.558597304321072,
      "grad_norm": 0.45598897337913513,
      "learning_rate": 8.886223914919976e-07,
      "loss": 0.0103,
      "step": 2785540
    },
    {
      "epoch": 4.558630034759726,
      "grad_norm": 0.2552469074726105,
      "learning_rate": 8.885564992784804e-07,
      "loss": 0.0102,
      "step": 2785560
    },
    {
      "epoch": 4.558662765198379,
      "grad_norm": 0.11276909708976746,
      "learning_rate": 8.884906070649632e-07,
      "loss": 0.0076,
      "step": 2785580
    },
    {
      "epoch": 4.558695495637033,
      "grad_norm": 0.09528594464063644,
      "learning_rate": 8.88424714851446e-07,
      "loss": 0.0077,
      "step": 2785600
    },
    {
      "epoch": 4.558728226075686,
      "grad_norm": 0.23705439269542694,
      "learning_rate": 8.883588226379288e-07,
      "loss": 0.0086,
      "step": 2785620
    },
    {
      "epoch": 4.558760956514339,
      "grad_norm": 0.5131982564926147,
      "learning_rate": 8.882929304244119e-07,
      "loss": 0.008,
      "step": 2785640
    },
    {
      "epoch": 4.558793686952993,
      "grad_norm": 0.14773476123809814,
      "learning_rate": 8.882270382108947e-07,
      "loss": 0.0083,
      "step": 2785660
    },
    {
      "epoch": 4.5588264173916455,
      "grad_norm": 0.2940996587276459,
      "learning_rate": 8.881611459973776e-07,
      "loss": 0.0068,
      "step": 2785680
    },
    {
      "epoch": 4.558859147830299,
      "grad_norm": 0.4982542097568512,
      "learning_rate": 8.880952537838603e-07,
      "loss": 0.0082,
      "step": 2785700
    },
    {
      "epoch": 4.558891878268953,
      "grad_norm": 0.17788830399513245,
      "learning_rate": 8.880293615703434e-07,
      "loss": 0.0131,
      "step": 2785720
    },
    {
      "epoch": 4.558924608707606,
      "grad_norm": 0.17081432044506073,
      "learning_rate": 8.879634693568262e-07,
      "loss": 0.0099,
      "step": 2785740
    },
    {
      "epoch": 4.558957339146259,
      "grad_norm": 0.31489047408103943,
      "learning_rate": 8.878975771433091e-07,
      "loss": 0.008,
      "step": 2785760
    },
    {
      "epoch": 4.558990069584913,
      "grad_norm": Infinity,
      "learning_rate": 8.878316849297919e-07,
      "loss": 0.01,
      "step": 2785780
    },
    {
      "epoch": 4.559022800023566,
      "grad_norm": 0.33479195833206177,
      "learning_rate": 8.877657927162748e-07,
      "loss": 0.008,
      "step": 2785800
    },
    {
      "epoch": 4.559055530462219,
      "grad_norm": 0.2720983624458313,
      "learning_rate": 8.876999005027577e-07,
      "loss": 0.0082,
      "step": 2785820
    },
    {
      "epoch": 4.5590882609008725,
      "grad_norm": 0.12442053854465485,
      "learning_rate": 8.876340082892406e-07,
      "loss": 0.008,
      "step": 2785840
    },
    {
      "epoch": 4.559120991339526,
      "grad_norm": 0.17625874280929565,
      "learning_rate": 8.875681160757234e-07,
      "loss": 0.0067,
      "step": 2785860
    },
    {
      "epoch": 4.559153721778179,
      "grad_norm": 0.3260957896709442,
      "learning_rate": 8.875022238622063e-07,
      "loss": 0.0126,
      "step": 2785880
    },
    {
      "epoch": 4.559186452216832,
      "grad_norm": 0.26627179980278015,
      "learning_rate": 8.874363316486892e-07,
      "loss": 0.0116,
      "step": 2785900
    },
    {
      "epoch": 4.559219182655486,
      "grad_norm": 0.11531572043895721,
      "learning_rate": 8.873704394351721e-07,
      "loss": 0.0061,
      "step": 2785920
    },
    {
      "epoch": 4.55925191309414,
      "grad_norm": 0.30755385756492615,
      "learning_rate": 8.873045472216549e-07,
      "loss": 0.011,
      "step": 2785940
    },
    {
      "epoch": 4.559284643532792,
      "grad_norm": 0.1579490453004837,
      "learning_rate": 8.872386550081378e-07,
      "loss": 0.0073,
      "step": 2785960
    },
    {
      "epoch": 4.559317373971446,
      "grad_norm": 0.0337863564491272,
      "learning_rate": 8.871727627946207e-07,
      "loss": 0.0102,
      "step": 2785980
    },
    {
      "epoch": 4.5593501044100995,
      "grad_norm": 0.24435332417488098,
      "learning_rate": 8.871068705811035e-07,
      "loss": 0.0097,
      "step": 2786000
    },
    {
      "epoch": 4.559382834848753,
      "grad_norm": 0.23455466330051422,
      "learning_rate": 8.870409783675864e-07,
      "loss": 0.0098,
      "step": 2786020
    },
    {
      "epoch": 4.559415565287406,
      "grad_norm": 0.02363632060587406,
      "learning_rate": 8.869750861540692e-07,
      "loss": 0.0069,
      "step": 2786040
    },
    {
      "epoch": 4.559448295726059,
      "grad_norm": 0.37966087460517883,
      "learning_rate": 8.869091939405521e-07,
      "loss": 0.0164,
      "step": 2786060
    },
    {
      "epoch": 4.559481026164713,
      "grad_norm": 0.14976060390472412,
      "learning_rate": 8.86843301727035e-07,
      "loss": 0.0105,
      "step": 2786080
    },
    {
      "epoch": 4.559513756603366,
      "grad_norm": 0.10913066565990448,
      "learning_rate": 8.867774095135179e-07,
      "loss": 0.0071,
      "step": 2786100
    },
    {
      "epoch": 4.559546487042019,
      "grad_norm": 0.2321336716413498,
      "learning_rate": 8.867115173000007e-07,
      "loss": 0.0096,
      "step": 2786120
    },
    {
      "epoch": 4.559579217480673,
      "grad_norm": 0.09195034950971603,
      "learning_rate": 8.866456250864836e-07,
      "loss": 0.0126,
      "step": 2786140
    },
    {
      "epoch": 4.559611947919326,
      "grad_norm": 0.5017042756080627,
      "learning_rate": 8.865797328729665e-07,
      "loss": 0.0123,
      "step": 2786160
    },
    {
      "epoch": 4.559644678357979,
      "grad_norm": 0.06784657388925552,
      "learning_rate": 8.865138406594494e-07,
      "loss": 0.0081,
      "step": 2786180
    },
    {
      "epoch": 4.559677408796633,
      "grad_norm": 0.34660208225250244,
      "learning_rate": 8.864479484459322e-07,
      "loss": 0.0103,
      "step": 2786200
    },
    {
      "epoch": 4.559710139235286,
      "grad_norm": 0.4606421887874603,
      "learning_rate": 8.863820562324151e-07,
      "loss": 0.0095,
      "step": 2786220
    },
    {
      "epoch": 4.559742869673939,
      "grad_norm": 0.1595180779695511,
      "learning_rate": 8.86316164018898e-07,
      "loss": 0.0079,
      "step": 2786240
    },
    {
      "epoch": 4.559775600112593,
      "grad_norm": 0.18718603253364563,
      "learning_rate": 8.862502718053809e-07,
      "loss": 0.0115,
      "step": 2786260
    },
    {
      "epoch": 4.559808330551246,
      "grad_norm": 0.272890567779541,
      "learning_rate": 8.861843795918637e-07,
      "loss": 0.0068,
      "step": 2786280
    },
    {
      "epoch": 4.5598410609899,
      "grad_norm": 0.22228273749351501,
      "learning_rate": 8.861184873783465e-07,
      "loss": 0.0104,
      "step": 2786300
    },
    {
      "epoch": 4.5598737914285525,
      "grad_norm": 0.250288724899292,
      "learning_rate": 8.860525951648294e-07,
      "loss": 0.0085,
      "step": 2786320
    },
    {
      "epoch": 4.559906521867206,
      "grad_norm": 0.18402421474456787,
      "learning_rate": 8.859867029513123e-07,
      "loss": 0.0074,
      "step": 2786340
    },
    {
      "epoch": 4.55993925230586,
      "grad_norm": 0.22552265226840973,
      "learning_rate": 8.859208107377952e-07,
      "loss": 0.0077,
      "step": 2786360
    },
    {
      "epoch": 4.559971982744512,
      "grad_norm": 0.5334559679031372,
      "learning_rate": 8.85854918524278e-07,
      "loss": 0.0101,
      "step": 2786380
    },
    {
      "epoch": 4.560004713183166,
      "grad_norm": 0.0927945077419281,
      "learning_rate": 8.857890263107609e-07,
      "loss": 0.0061,
      "step": 2786400
    },
    {
      "epoch": 4.56003744362182,
      "grad_norm": 0.042127978056669235,
      "learning_rate": 8.857231340972438e-07,
      "loss": 0.0098,
      "step": 2786420
    },
    {
      "epoch": 4.560070174060472,
      "grad_norm": 0.17908143997192383,
      "learning_rate": 8.856572418837267e-07,
      "loss": 0.0095,
      "step": 2786440
    },
    {
      "epoch": 4.560102904499126,
      "grad_norm": 0.13393355906009674,
      "learning_rate": 8.855913496702095e-07,
      "loss": 0.0082,
      "step": 2786460
    },
    {
      "epoch": 4.5601356349377795,
      "grad_norm": 0.2533733546733856,
      "learning_rate": 8.855254574566924e-07,
      "loss": 0.0089,
      "step": 2786480
    },
    {
      "epoch": 4.560168365376433,
      "grad_norm": 0.1739569455385208,
      "learning_rate": 8.854595652431752e-07,
      "loss": 0.0052,
      "step": 2786500
    },
    {
      "epoch": 4.560201095815086,
      "grad_norm": 0.09286949038505554,
      "learning_rate": 8.853936730296582e-07,
      "loss": 0.0067,
      "step": 2786520
    },
    {
      "epoch": 4.560233826253739,
      "grad_norm": 0.11099044233560562,
      "learning_rate": 8.85327780816141e-07,
      "loss": 0.0087,
      "step": 2786540
    },
    {
      "epoch": 4.560266556692393,
      "grad_norm": 0.1333361715078354,
      "learning_rate": 8.852618886026239e-07,
      "loss": 0.0112,
      "step": 2786560
    },
    {
      "epoch": 4.560299287131047,
      "grad_norm": 0.15406332910060883,
      "learning_rate": 8.851959963891067e-07,
      "loss": 0.0084,
      "step": 2786580
    },
    {
      "epoch": 4.560332017569699,
      "grad_norm": 0.12214978039264679,
      "learning_rate": 8.851301041755897e-07,
      "loss": 0.0114,
      "step": 2786600
    },
    {
      "epoch": 4.560364748008353,
      "grad_norm": 0.28098806738853455,
      "learning_rate": 8.850642119620725e-07,
      "loss": 0.0069,
      "step": 2786620
    },
    {
      "epoch": 4.5603974784470065,
      "grad_norm": 0.1907731592655182,
      "learning_rate": 8.849983197485553e-07,
      "loss": 0.0067,
      "step": 2786640
    },
    {
      "epoch": 4.560430208885659,
      "grad_norm": 0.13071534037590027,
      "learning_rate": 8.849324275350382e-07,
      "loss": 0.0092,
      "step": 2786660
    },
    {
      "epoch": 4.560462939324313,
      "grad_norm": 0.20295001566410065,
      "learning_rate": 8.848665353215211e-07,
      "loss": 0.0098,
      "step": 2786680
    },
    {
      "epoch": 4.560495669762966,
      "grad_norm": 0.3120012879371643,
      "learning_rate": 8.84800643108004e-07,
      "loss": 0.0072,
      "step": 2786700
    },
    {
      "epoch": 4.560528400201619,
      "grad_norm": 0.18732649087905884,
      "learning_rate": 8.847347508944868e-07,
      "loss": 0.0105,
      "step": 2786720
    },
    {
      "epoch": 4.560561130640273,
      "grad_norm": 0.08979639410972595,
      "learning_rate": 8.846688586809697e-07,
      "loss": 0.0118,
      "step": 2786740
    },
    {
      "epoch": 4.560593861078926,
      "grad_norm": 0.15266761183738708,
      "learning_rate": 8.846029664674525e-07,
      "loss": 0.009,
      "step": 2786760
    },
    {
      "epoch": 4.56062659151758,
      "grad_norm": 0.16515572369098663,
      "learning_rate": 8.845370742539356e-07,
      "loss": 0.0096,
      "step": 2786780
    },
    {
      "epoch": 4.560659321956233,
      "grad_norm": 0.39775925874710083,
      "learning_rate": 8.844711820404183e-07,
      "loss": 0.0063,
      "step": 2786800
    },
    {
      "epoch": 4.560692052394886,
      "grad_norm": 0.18759393692016602,
      "learning_rate": 8.844052898269012e-07,
      "loss": 0.0069,
      "step": 2786820
    },
    {
      "epoch": 4.56072478283354,
      "grad_norm": 0.3593360483646393,
      "learning_rate": 8.84339397613384e-07,
      "loss": 0.0133,
      "step": 2786840
    },
    {
      "epoch": 4.560757513272193,
      "grad_norm": 0.15887583792209625,
      "learning_rate": 8.842735053998671e-07,
      "loss": 0.0061,
      "step": 2786860
    },
    {
      "epoch": 4.560790243710846,
      "grad_norm": 0.18280041217803955,
      "learning_rate": 8.842076131863499e-07,
      "loss": 0.011,
      "step": 2786880
    },
    {
      "epoch": 4.5608229741495,
      "grad_norm": 0.2625141441822052,
      "learning_rate": 8.841417209728327e-07,
      "loss": 0.0082,
      "step": 2786900
    },
    {
      "epoch": 4.560855704588153,
      "grad_norm": 0.2352040857076645,
      "learning_rate": 8.840758287593155e-07,
      "loss": 0.0104,
      "step": 2786920
    },
    {
      "epoch": 4.560888435026806,
      "grad_norm": 0.15121063590049744,
      "learning_rate": 8.840099365457984e-07,
      "loss": 0.0088,
      "step": 2786940
    },
    {
      "epoch": 4.56092116546546,
      "grad_norm": 0.47794413566589355,
      "learning_rate": 8.839440443322814e-07,
      "loss": 0.0078,
      "step": 2786960
    },
    {
      "epoch": 4.560953895904113,
      "grad_norm": 0.3243998885154724,
      "learning_rate": 8.838781521187643e-07,
      "loss": 0.0081,
      "step": 2786980
    },
    {
      "epoch": 4.560986626342766,
      "grad_norm": 0.16784368455410004,
      "learning_rate": 8.838122599052471e-07,
      "loss": 0.0106,
      "step": 2787000
    },
    {
      "epoch": 4.5610193567814195,
      "grad_norm": 0.10618173331022263,
      "learning_rate": 8.837463676917298e-07,
      "loss": 0.011,
      "step": 2787020
    },
    {
      "epoch": 4.561052087220073,
      "grad_norm": 0.3610519766807556,
      "learning_rate": 8.836804754782129e-07,
      "loss": 0.007,
      "step": 2787040
    },
    {
      "epoch": 4.561084817658727,
      "grad_norm": 0.40590715408325195,
      "learning_rate": 8.836145832646957e-07,
      "loss": 0.0105,
      "step": 2787060
    },
    {
      "epoch": 4.561117548097379,
      "grad_norm": 0.26545459032058716,
      "learning_rate": 8.835486910511786e-07,
      "loss": 0.0118,
      "step": 2787080
    },
    {
      "epoch": 4.561150278536033,
      "grad_norm": 0.597059428691864,
      "learning_rate": 8.834827988376614e-07,
      "loss": 0.0096,
      "step": 2787100
    },
    {
      "epoch": 4.5611830089746865,
      "grad_norm": 0.15464068949222565,
      "learning_rate": 8.834169066241444e-07,
      "loss": 0.0073,
      "step": 2787120
    },
    {
      "epoch": 4.561215739413339,
      "grad_norm": 0.4402679204940796,
      "learning_rate": 8.833510144106272e-07,
      "loss": 0.0073,
      "step": 2787140
    },
    {
      "epoch": 4.561248469851993,
      "grad_norm": 0.20889146625995636,
      "learning_rate": 8.832851221971101e-07,
      "loss": 0.0076,
      "step": 2787160
    },
    {
      "epoch": 4.561281200290646,
      "grad_norm": 0.13300403952598572,
      "learning_rate": 8.832192299835929e-07,
      "loss": 0.0064,
      "step": 2787180
    },
    {
      "epoch": 4.5613139307293,
      "grad_norm": 0.2753862738609314,
      "learning_rate": 8.831533377700758e-07,
      "loss": 0.0087,
      "step": 2787200
    },
    {
      "epoch": 4.561346661167953,
      "grad_norm": 0.44564488530158997,
      "learning_rate": 8.830874455565587e-07,
      "loss": 0.008,
      "step": 2787220
    },
    {
      "epoch": 4.561379391606606,
      "grad_norm": 0.24437181651592255,
      "learning_rate": 8.830215533430416e-07,
      "loss": 0.0089,
      "step": 2787240
    },
    {
      "epoch": 4.56141212204526,
      "grad_norm": 0.15613068640232086,
      "learning_rate": 8.829556611295244e-07,
      "loss": 0.0091,
      "step": 2787260
    },
    {
      "epoch": 4.561444852483913,
      "grad_norm": 0.7408144474029541,
      "learning_rate": 8.828897689160073e-07,
      "loss": 0.0105,
      "step": 2787280
    },
    {
      "epoch": 4.561477582922566,
      "grad_norm": 0.13379210233688354,
      "learning_rate": 8.828238767024902e-07,
      "loss": 0.0065,
      "step": 2787300
    },
    {
      "epoch": 4.56151031336122,
      "grad_norm": 0.16329063475131989,
      "learning_rate": 8.827579844889731e-07,
      "loss": 0.0068,
      "step": 2787320
    },
    {
      "epoch": 4.5615430437998725,
      "grad_norm": 0.14711447060108185,
      "learning_rate": 8.826920922754559e-07,
      "loss": 0.0119,
      "step": 2787340
    },
    {
      "epoch": 4.561575774238526,
      "grad_norm": 0.02873585931956768,
      "learning_rate": 8.826262000619387e-07,
      "loss": 0.0117,
      "step": 2787360
    },
    {
      "epoch": 4.56160850467718,
      "grad_norm": 0.058453720062971115,
      "learning_rate": 8.825603078484216e-07,
      "loss": 0.0077,
      "step": 2787380
    },
    {
      "epoch": 4.561641235115833,
      "grad_norm": 0.1988321840763092,
      "learning_rate": 8.824944156349045e-07,
      "loss": 0.0092,
      "step": 2787400
    },
    {
      "epoch": 4.561673965554486,
      "grad_norm": 0.3052233159542084,
      "learning_rate": 8.824285234213874e-07,
      "loss": 0.0098,
      "step": 2787420
    },
    {
      "epoch": 4.56170669599314,
      "grad_norm": 0.13657426834106445,
      "learning_rate": 8.823626312078702e-07,
      "loss": 0.0091,
      "step": 2787440
    },
    {
      "epoch": 4.561739426431793,
      "grad_norm": 0.10392740368843079,
      "learning_rate": 8.822967389943531e-07,
      "loss": 0.011,
      "step": 2787460
    },
    {
      "epoch": 4.561772156870447,
      "grad_norm": 0.10808748751878738,
      "learning_rate": 8.82230846780836e-07,
      "loss": 0.0066,
      "step": 2787480
    },
    {
      "epoch": 4.5618048873090995,
      "grad_norm": 0.37087059020996094,
      "learning_rate": 8.821649545673189e-07,
      "loss": 0.0081,
      "step": 2787500
    },
    {
      "epoch": 4.561837617747753,
      "grad_norm": 0.12087919563055038,
      "learning_rate": 8.820990623538017e-07,
      "loss": 0.0108,
      "step": 2787520
    },
    {
      "epoch": 4.561870348186407,
      "grad_norm": 0.2609297037124634,
      "learning_rate": 8.820331701402846e-07,
      "loss": 0.0105,
      "step": 2787540
    },
    {
      "epoch": 4.561903078625059,
      "grad_norm": 0.1315355896949768,
      "learning_rate": 8.819672779267675e-07,
      "loss": 0.0078,
      "step": 2787560
    },
    {
      "epoch": 4.561935809063713,
      "grad_norm": 0.3586387634277344,
      "learning_rate": 8.819013857132504e-07,
      "loss": 0.0073,
      "step": 2787580
    },
    {
      "epoch": 4.561968539502367,
      "grad_norm": 0.33945345878601074,
      "learning_rate": 8.818354934997332e-07,
      "loss": 0.006,
      "step": 2787600
    },
    {
      "epoch": 4.562001269941019,
      "grad_norm": 0.15663844347000122,
      "learning_rate": 8.817696012862161e-07,
      "loss": 0.0062,
      "step": 2787620
    },
    {
      "epoch": 4.562034000379673,
      "grad_norm": 0.14266982674598694,
      "learning_rate": 8.817037090726989e-07,
      "loss": 0.0123,
      "step": 2787640
    },
    {
      "epoch": 4.5620667308183265,
      "grad_norm": 0.11844164878129959,
      "learning_rate": 8.816378168591819e-07,
      "loss": 0.0071,
      "step": 2787660
    },
    {
      "epoch": 4.56209946125698,
      "grad_norm": 0.09010723233222961,
      "learning_rate": 8.815719246456647e-07,
      "loss": 0.0087,
      "step": 2787680
    },
    {
      "epoch": 4.562132191695633,
      "grad_norm": 0.5382252335548401,
      "learning_rate": 8.815060324321475e-07,
      "loss": 0.013,
      "step": 2787700
    },
    {
      "epoch": 4.562164922134286,
      "grad_norm": 0.2250075787305832,
      "learning_rate": 8.814401402186304e-07,
      "loss": 0.0073,
      "step": 2787720
    },
    {
      "epoch": 4.56219765257294,
      "grad_norm": 0.20307056605815887,
      "learning_rate": 8.813742480051133e-07,
      "loss": 0.0122,
      "step": 2787740
    },
    {
      "epoch": 4.562230383011594,
      "grad_norm": 0.6994633674621582,
      "learning_rate": 8.813083557915962e-07,
      "loss": 0.0112,
      "step": 2787760
    },
    {
      "epoch": 4.562263113450246,
      "grad_norm": 0.35269418358802795,
      "learning_rate": 8.81242463578079e-07,
      "loss": 0.0119,
      "step": 2787780
    },
    {
      "epoch": 4.5622958438889,
      "grad_norm": 0.426653116941452,
      "learning_rate": 8.811765713645619e-07,
      "loss": 0.0139,
      "step": 2787800
    },
    {
      "epoch": 4.5623285743275535,
      "grad_norm": 0.24689728021621704,
      "learning_rate": 8.811106791510447e-07,
      "loss": 0.0092,
      "step": 2787820
    },
    {
      "epoch": 4.562361304766206,
      "grad_norm": 0.2531437873840332,
      "learning_rate": 8.810447869375277e-07,
      "loss": 0.0094,
      "step": 2787840
    },
    {
      "epoch": 4.56239403520486,
      "grad_norm": 0.4651447534561157,
      "learning_rate": 8.809788947240105e-07,
      "loss": 0.0121,
      "step": 2787860
    },
    {
      "epoch": 4.562426765643513,
      "grad_norm": 0.10862347483634949,
      "learning_rate": 8.809130025104934e-07,
      "loss": 0.0067,
      "step": 2787880
    },
    {
      "epoch": 4.562459496082166,
      "grad_norm": 0.2082882672548294,
      "learning_rate": 8.808471102969762e-07,
      "loss": 0.009,
      "step": 2787900
    },
    {
      "epoch": 4.56249222652082,
      "grad_norm": 0.26330748200416565,
      "learning_rate": 8.807812180834592e-07,
      "loss": 0.0095,
      "step": 2787920
    },
    {
      "epoch": 4.562524956959473,
      "grad_norm": 0.2974461019039154,
      "learning_rate": 8.80715325869942e-07,
      "loss": 0.0111,
      "step": 2787940
    },
    {
      "epoch": 4.562557687398127,
      "grad_norm": 0.3124679625034332,
      "learning_rate": 8.806494336564249e-07,
      "loss": 0.0121,
      "step": 2787960
    },
    {
      "epoch": 4.5625904178367795,
      "grad_norm": 0.08004723489284515,
      "learning_rate": 8.805835414429077e-07,
      "loss": 0.0058,
      "step": 2787980
    },
    {
      "epoch": 4.562623148275433,
      "grad_norm": 0.7097325325012207,
      "learning_rate": 8.805176492293907e-07,
      "loss": 0.0124,
      "step": 2788000
    },
    {
      "epoch": 4.562655878714087,
      "grad_norm": 0.09977046400308609,
      "learning_rate": 8.804517570158735e-07,
      "loss": 0.0064,
      "step": 2788020
    },
    {
      "epoch": 4.56268860915274,
      "grad_norm": 0.23223070800304413,
      "learning_rate": 8.803858648023563e-07,
      "loss": 0.007,
      "step": 2788040
    },
    {
      "epoch": 4.562721339591393,
      "grad_norm": 0.1406613141298294,
      "learning_rate": 8.803199725888392e-07,
      "loss": 0.0071,
      "step": 2788060
    },
    {
      "epoch": 4.562754070030047,
      "grad_norm": 0.11316744983196259,
      "learning_rate": 8.80254080375322e-07,
      "loss": 0.0078,
      "step": 2788080
    },
    {
      "epoch": 4.5627868004687,
      "grad_norm": 0.1878216415643692,
      "learning_rate": 8.801881881618051e-07,
      "loss": 0.0073,
      "step": 2788100
    },
    {
      "epoch": 4.562819530907353,
      "grad_norm": 0.42279401421546936,
      "learning_rate": 8.801222959482878e-07,
      "loss": 0.009,
      "step": 2788120
    },
    {
      "epoch": 4.5628522613460065,
      "grad_norm": 0.0658816546201706,
      "learning_rate": 8.800564037347707e-07,
      "loss": 0.0094,
      "step": 2788140
    },
    {
      "epoch": 4.56288499178466,
      "grad_norm": 0.09676293283700943,
      "learning_rate": 8.799905115212535e-07,
      "loss": 0.0083,
      "step": 2788160
    },
    {
      "epoch": 4.562917722223313,
      "grad_norm": 0.1311476081609726,
      "learning_rate": 8.799246193077366e-07,
      "loss": 0.0101,
      "step": 2788180
    },
    {
      "epoch": 4.562950452661966,
      "grad_norm": 0.11901311576366425,
      "learning_rate": 8.798587270942194e-07,
      "loss": 0.0076,
      "step": 2788200
    },
    {
      "epoch": 4.56298318310062,
      "grad_norm": 0.09400355815887451,
      "learning_rate": 8.797928348807023e-07,
      "loss": 0.0086,
      "step": 2788220
    },
    {
      "epoch": 4.563015913539274,
      "grad_norm": 0.41059598326683044,
      "learning_rate": 8.79726942667185e-07,
      "loss": 0.0068,
      "step": 2788240
    },
    {
      "epoch": 4.563048643977926,
      "grad_norm": 0.305563360452652,
      "learning_rate": 8.796610504536679e-07,
      "loss": 0.0073,
      "step": 2788260
    },
    {
      "epoch": 4.56308137441658,
      "grad_norm": 0.24737238883972168,
      "learning_rate": 8.795951582401509e-07,
      "loss": 0.0126,
      "step": 2788280
    },
    {
      "epoch": 4.5631141048552335,
      "grad_norm": 0.1466684192419052,
      "learning_rate": 8.795292660266338e-07,
      "loss": 0.0133,
      "step": 2788300
    },
    {
      "epoch": 4.563146835293887,
      "grad_norm": 0.1506378948688507,
      "learning_rate": 8.794633738131166e-07,
      "loss": 0.0068,
      "step": 2788320
    },
    {
      "epoch": 4.56317956573254,
      "grad_norm": 0.11641872674226761,
      "learning_rate": 8.793974815995993e-07,
      "loss": 0.0066,
      "step": 2788340
    },
    {
      "epoch": 4.563212296171193,
      "grad_norm": 0.3292914628982544,
      "learning_rate": 8.793315893860824e-07,
      "loss": 0.0137,
      "step": 2788360
    },
    {
      "epoch": 4.563245026609847,
      "grad_norm": 0.1360923945903778,
      "learning_rate": 8.792656971725652e-07,
      "loss": 0.0057,
      "step": 2788380
    },
    {
      "epoch": 4.5632777570485,
      "grad_norm": 0.13909868896007538,
      "learning_rate": 8.791998049590481e-07,
      "loss": 0.009,
      "step": 2788400
    },
    {
      "epoch": 4.563310487487153,
      "grad_norm": 0.25796353816986084,
      "learning_rate": 8.791339127455309e-07,
      "loss": 0.0107,
      "step": 2788420
    },
    {
      "epoch": 4.563343217925807,
      "grad_norm": 0.10298033803701401,
      "learning_rate": 8.790680205320139e-07,
      "loss": 0.0103,
      "step": 2788440
    },
    {
      "epoch": 4.56337594836446,
      "grad_norm": 0.13348542153835297,
      "learning_rate": 8.790021283184967e-07,
      "loss": 0.0048,
      "step": 2788460
    },
    {
      "epoch": 4.563408678803113,
      "grad_norm": 0.27178600430488586,
      "learning_rate": 8.789362361049796e-07,
      "loss": 0.0078,
      "step": 2788480
    },
    {
      "epoch": 4.563441409241767,
      "grad_norm": 0.1657385379076004,
      "learning_rate": 8.788703438914624e-07,
      "loss": 0.0081,
      "step": 2788500
    },
    {
      "epoch": 4.56347413968042,
      "grad_norm": 0.2970704734325409,
      "learning_rate": 8.788044516779453e-07,
      "loss": 0.0075,
      "step": 2788520
    },
    {
      "epoch": 4.563506870119073,
      "grad_norm": 0.22909113764762878,
      "learning_rate": 8.787385594644282e-07,
      "loss": 0.0118,
      "step": 2788540
    },
    {
      "epoch": 4.563539600557727,
      "grad_norm": 0.5779082179069519,
      "learning_rate": 8.786726672509111e-07,
      "loss": 0.0069,
      "step": 2788560
    },
    {
      "epoch": 4.56357233099638,
      "grad_norm": 0.15938439965248108,
      "learning_rate": 8.786067750373939e-07,
      "loss": 0.0077,
      "step": 2788580
    },
    {
      "epoch": 4.563605061435033,
      "grad_norm": 0.20013867318630219,
      "learning_rate": 8.785408828238768e-07,
      "loss": 0.0092,
      "step": 2788600
    },
    {
      "epoch": 4.563637791873687,
      "grad_norm": 0.285372257232666,
      "learning_rate": 8.784749906103597e-07,
      "loss": 0.0153,
      "step": 2788620
    },
    {
      "epoch": 4.56367052231234,
      "grad_norm": 0.21800002455711365,
      "learning_rate": 8.784090983968426e-07,
      "loss": 0.008,
      "step": 2788640
    },
    {
      "epoch": 4.563703252750994,
      "grad_norm": 0.22757935523986816,
      "learning_rate": 8.783432061833254e-07,
      "loss": 0.012,
      "step": 2788660
    },
    {
      "epoch": 4.5637359831896465,
      "grad_norm": 0.389920711517334,
      "learning_rate": 8.782773139698082e-07,
      "loss": 0.0079,
      "step": 2788680
    },
    {
      "epoch": 4.5637687136283,
      "grad_norm": 0.10127411037683487,
      "learning_rate": 8.782114217562911e-07,
      "loss": 0.0099,
      "step": 2788700
    },
    {
      "epoch": 4.563801444066954,
      "grad_norm": 0.43514111638069153,
      "learning_rate": 8.78145529542774e-07,
      "loss": 0.0093,
      "step": 2788720
    },
    {
      "epoch": 4.563834174505606,
      "grad_norm": 0.18255309760570526,
      "learning_rate": 8.780796373292569e-07,
      "loss": 0.0078,
      "step": 2788740
    },
    {
      "epoch": 4.56386690494426,
      "grad_norm": 0.04614142328500748,
      "learning_rate": 8.780137451157397e-07,
      "loss": 0.0097,
      "step": 2788760
    },
    {
      "epoch": 4.5638996353829135,
      "grad_norm": 0.570538341999054,
      "learning_rate": 8.779478529022226e-07,
      "loss": 0.0115,
      "step": 2788780
    },
    {
      "epoch": 4.563932365821566,
      "grad_norm": 0.28181424736976624,
      "learning_rate": 8.778819606887055e-07,
      "loss": 0.0074,
      "step": 2788800
    },
    {
      "epoch": 4.56396509626022,
      "grad_norm": 0.09314655512571335,
      "learning_rate": 8.778160684751884e-07,
      "loss": 0.0127,
      "step": 2788820
    },
    {
      "epoch": 4.563997826698873,
      "grad_norm": 0.09398584067821503,
      "learning_rate": 8.777501762616712e-07,
      "loss": 0.0088,
      "step": 2788840
    },
    {
      "epoch": 4.564030557137527,
      "grad_norm": 0.0907466933131218,
      "learning_rate": 8.776842840481541e-07,
      "loss": 0.0085,
      "step": 2788860
    },
    {
      "epoch": 4.56406328757618,
      "grad_norm": 0.12578946352005005,
      "learning_rate": 8.77618391834637e-07,
      "loss": 0.0091,
      "step": 2788880
    },
    {
      "epoch": 4.564096018014833,
      "grad_norm": 0.22205297648906708,
      "learning_rate": 8.775524996211199e-07,
      "loss": 0.0087,
      "step": 2788900
    },
    {
      "epoch": 4.564128748453487,
      "grad_norm": 0.19859804213047028,
      "learning_rate": 8.774866074076027e-07,
      "loss": 0.0073,
      "step": 2788920
    },
    {
      "epoch": 4.5641614788921405,
      "grad_norm": 0.18759775161743164,
      "learning_rate": 8.774207151940856e-07,
      "loss": 0.0066,
      "step": 2788940
    },
    {
      "epoch": 4.564194209330793,
      "grad_norm": 0.42738258838653564,
      "learning_rate": 8.773548229805684e-07,
      "loss": 0.0079,
      "step": 2788960
    },
    {
      "epoch": 4.564226939769447,
      "grad_norm": 0.4042890667915344,
      "learning_rate": 8.772889307670514e-07,
      "loss": 0.0109,
      "step": 2788980
    },
    {
      "epoch": 4.5642596702081,
      "grad_norm": 0.0644797533750534,
      "learning_rate": 8.772230385535342e-07,
      "loss": 0.0113,
      "step": 2789000
    },
    {
      "epoch": 4.564292400646753,
      "grad_norm": 0.10888988524675369,
      "learning_rate": 8.77157146340017e-07,
      "loss": 0.0115,
      "step": 2789020
    },
    {
      "epoch": 4.564325131085407,
      "grad_norm": 0.4616808295249939,
      "learning_rate": 8.770912541264999e-07,
      "loss": 0.0081,
      "step": 2789040
    },
    {
      "epoch": 4.56435786152406,
      "grad_norm": 0.3117871880531311,
      "learning_rate": 8.770253619129828e-07,
      "loss": 0.0066,
      "step": 2789060
    },
    {
      "epoch": 4.564390591962713,
      "grad_norm": 0.16319729387760162,
      "learning_rate": 8.769594696994657e-07,
      "loss": 0.0104,
      "step": 2789080
    },
    {
      "epoch": 4.564423322401367,
      "grad_norm": 0.34680652618408203,
      "learning_rate": 8.768935774859485e-07,
      "loss": 0.0112,
      "step": 2789100
    },
    {
      "epoch": 4.56445605284002,
      "grad_norm": 0.3600635230541229,
      "learning_rate": 8.768276852724314e-07,
      "loss": 0.0077,
      "step": 2789120
    },
    {
      "epoch": 4.564488783278674,
      "grad_norm": 0.3689819574356079,
      "learning_rate": 8.767617930589142e-07,
      "loss": 0.0112,
      "step": 2789140
    },
    {
      "epoch": 4.5645215137173265,
      "grad_norm": 0.08943018317222595,
      "learning_rate": 8.766959008453972e-07,
      "loss": 0.0064,
      "step": 2789160
    },
    {
      "epoch": 4.56455424415598,
      "grad_norm": 0.3931765556335449,
      "learning_rate": 8.7663000863188e-07,
      "loss": 0.0118,
      "step": 2789180
    },
    {
      "epoch": 4.564586974594634,
      "grad_norm": 0.11146712303161621,
      "learning_rate": 8.765641164183629e-07,
      "loss": 0.0072,
      "step": 2789200
    },
    {
      "epoch": 4.564619705033287,
      "grad_norm": 0.3907001316547394,
      "learning_rate": 8.764982242048457e-07,
      "loss": 0.0058,
      "step": 2789220
    },
    {
      "epoch": 4.56465243547194,
      "grad_norm": 0.322910338640213,
      "learning_rate": 8.764323319913287e-07,
      "loss": 0.0112,
      "step": 2789240
    },
    {
      "epoch": 4.564685165910594,
      "grad_norm": 0.35813000798225403,
      "learning_rate": 8.763664397778115e-07,
      "loss": 0.0119,
      "step": 2789260
    },
    {
      "epoch": 4.564717896349247,
      "grad_norm": 0.5667366981506348,
      "learning_rate": 8.763005475642944e-07,
      "loss": 0.0137,
      "step": 2789280
    },
    {
      "epoch": 4.5647506267879,
      "grad_norm": 0.26736345887184143,
      "learning_rate": 8.762346553507772e-07,
      "loss": 0.0088,
      "step": 2789300
    },
    {
      "epoch": 4.5647833572265535,
      "grad_norm": 0.11386262625455856,
      "learning_rate": 8.761687631372602e-07,
      "loss": 0.0064,
      "step": 2789320
    },
    {
      "epoch": 4.564816087665207,
      "grad_norm": 0.3727302551269531,
      "learning_rate": 8.76102870923743e-07,
      "loss": 0.0107,
      "step": 2789340
    },
    {
      "epoch": 4.56484881810386,
      "grad_norm": 0.21445325016975403,
      "learning_rate": 8.760369787102258e-07,
      "loss": 0.0096,
      "step": 2789360
    },
    {
      "epoch": 4.564881548542513,
      "grad_norm": 0.09385821968317032,
      "learning_rate": 8.759710864967087e-07,
      "loss": 0.0071,
      "step": 2789380
    },
    {
      "epoch": 4.564914278981167,
      "grad_norm": 0.6846277713775635,
      "learning_rate": 8.759051942831915e-07,
      "loss": 0.0108,
      "step": 2789400
    },
    {
      "epoch": 4.564947009419821,
      "grad_norm": 0.2932111620903015,
      "learning_rate": 8.758393020696746e-07,
      "loss": 0.01,
      "step": 2789420
    },
    {
      "epoch": 4.564979739858473,
      "grad_norm": 0.16119855642318726,
      "learning_rate": 8.757734098561573e-07,
      "loss": 0.0096,
      "step": 2789440
    },
    {
      "epoch": 4.565012470297127,
      "grad_norm": 0.04480158910155296,
      "learning_rate": 8.757075176426402e-07,
      "loss": 0.0151,
      "step": 2789460
    },
    {
      "epoch": 4.5650452007357805,
      "grad_norm": 0.1435980647802353,
      "learning_rate": 8.75641625429123e-07,
      "loss": 0.0061,
      "step": 2789480
    },
    {
      "epoch": 4.565077931174434,
      "grad_norm": 0.4524775445461273,
      "learning_rate": 8.755757332156061e-07,
      "loss": 0.0079,
      "step": 2789500
    },
    {
      "epoch": 4.565110661613087,
      "grad_norm": 0.10581448674201965,
      "learning_rate": 8.755098410020889e-07,
      "loss": 0.0069,
      "step": 2789520
    },
    {
      "epoch": 4.56514339205174,
      "grad_norm": 0.5326070785522461,
      "learning_rate": 8.754439487885718e-07,
      "loss": 0.0097,
      "step": 2789540
    },
    {
      "epoch": 4.565176122490394,
      "grad_norm": 0.33977463841438293,
      "learning_rate": 8.753780565750545e-07,
      "loss": 0.0107,
      "step": 2789560
    },
    {
      "epoch": 4.565208852929047,
      "grad_norm": 0.11471454799175262,
      "learning_rate": 8.753121643615374e-07,
      "loss": 0.0121,
      "step": 2789580
    },
    {
      "epoch": 4.5652415833677,
      "grad_norm": 0.45120421051979065,
      "learning_rate": 8.752462721480204e-07,
      "loss": 0.0069,
      "step": 2789600
    },
    {
      "epoch": 4.565274313806354,
      "grad_norm": 0.08762525767087936,
      "learning_rate": 8.751803799345033e-07,
      "loss": 0.007,
      "step": 2789620
    },
    {
      "epoch": 4.5653070442450066,
      "grad_norm": 0.19685418903827667,
      "learning_rate": 8.751144877209861e-07,
      "loss": 0.0106,
      "step": 2789640
    },
    {
      "epoch": 4.56533977468366,
      "grad_norm": 0.24374285340309143,
      "learning_rate": 8.750485955074688e-07,
      "loss": 0.0097,
      "step": 2789660
    },
    {
      "epoch": 4.565372505122314,
      "grad_norm": 0.11647647619247437,
      "learning_rate": 8.749827032939519e-07,
      "loss": 0.0076,
      "step": 2789680
    },
    {
      "epoch": 4.565405235560967,
      "grad_norm": 0.5964150428771973,
      "learning_rate": 8.749168110804348e-07,
      "loss": 0.007,
      "step": 2789700
    },
    {
      "epoch": 4.56543796599962,
      "grad_norm": 0.2667236328125,
      "learning_rate": 8.748509188669176e-07,
      "loss": 0.0096,
      "step": 2789720
    },
    {
      "epoch": 4.565470696438274,
      "grad_norm": 0.3523222506046295,
      "learning_rate": 8.747850266534004e-07,
      "loss": 0.01,
      "step": 2789740
    },
    {
      "epoch": 4.565503426876927,
      "grad_norm": 0.38489922881126404,
      "learning_rate": 8.747191344398834e-07,
      "loss": 0.0107,
      "step": 2789760
    },
    {
      "epoch": 4.565536157315581,
      "grad_norm": 0.06953243911266327,
      "learning_rate": 8.746532422263662e-07,
      "loss": 0.0095,
      "step": 2789780
    },
    {
      "epoch": 4.5655688877542335,
      "grad_norm": 0.11026772856712341,
      "learning_rate": 8.745873500128491e-07,
      "loss": 0.0083,
      "step": 2789800
    },
    {
      "epoch": 4.565601618192887,
      "grad_norm": 0.18422192335128784,
      "learning_rate": 8.745214577993319e-07,
      "loss": 0.0064,
      "step": 2789820
    },
    {
      "epoch": 4.565634348631541,
      "grad_norm": 0.19794310629367828,
      "learning_rate": 8.744555655858148e-07,
      "loss": 0.0053,
      "step": 2789840
    },
    {
      "epoch": 4.565667079070193,
      "grad_norm": 0.14178673923015594,
      "learning_rate": 8.743896733722977e-07,
      "loss": 0.0095,
      "step": 2789860
    },
    {
      "epoch": 4.565699809508847,
      "grad_norm": 0.11977697163820267,
      "learning_rate": 8.743237811587806e-07,
      "loss": 0.0112,
      "step": 2789880
    },
    {
      "epoch": 4.565732539947501,
      "grad_norm": 0.20450004935264587,
      "learning_rate": 8.742578889452634e-07,
      "loss": 0.0086,
      "step": 2789900
    },
    {
      "epoch": 4.565765270386153,
      "grad_norm": 0.16455818712711334,
      "learning_rate": 8.741919967317463e-07,
      "loss": 0.0062,
      "step": 2789920
    },
    {
      "epoch": 4.565798000824807,
      "grad_norm": 0.7838162779808044,
      "learning_rate": 8.741261045182292e-07,
      "loss": 0.0059,
      "step": 2789940
    },
    {
      "epoch": 4.5658307312634605,
      "grad_norm": 0.3123602867126465,
      "learning_rate": 8.740602123047121e-07,
      "loss": 0.0075,
      "step": 2789960
    },
    {
      "epoch": 4.565863461702114,
      "grad_norm": 0.21057236194610596,
      "learning_rate": 8.739943200911949e-07,
      "loss": 0.0067,
      "step": 2789980
    },
    {
      "epoch": 4.565896192140767,
      "grad_norm": 0.6781186461448669,
      "learning_rate": 8.739284278776778e-07,
      "loss": 0.009,
      "step": 2790000
    },
    {
      "epoch": 4.56592892257942,
      "grad_norm": 0.28157466650009155,
      "learning_rate": 8.738625356641606e-07,
      "loss": 0.0099,
      "step": 2790020
    },
    {
      "epoch": 4.565961653018074,
      "grad_norm": 0.12043967097997665,
      "learning_rate": 8.737966434506436e-07,
      "loss": 0.0068,
      "step": 2790040
    },
    {
      "epoch": 4.565994383456727,
      "grad_norm": 0.14961594343185425,
      "learning_rate": 8.737307512371264e-07,
      "loss": 0.0151,
      "step": 2790060
    },
    {
      "epoch": 4.56602711389538,
      "grad_norm": 0.08429136127233505,
      "learning_rate": 8.736648590236092e-07,
      "loss": 0.0084,
      "step": 2790080
    },
    {
      "epoch": 4.566059844334034,
      "grad_norm": 0.14174094796180725,
      "learning_rate": 8.735989668100921e-07,
      "loss": 0.011,
      "step": 2790100
    },
    {
      "epoch": 4.5660925747726875,
      "grad_norm": 0.2204970419406891,
      "learning_rate": 8.73533074596575e-07,
      "loss": 0.0124,
      "step": 2790120
    },
    {
      "epoch": 4.56612530521134,
      "grad_norm": 0.1863802969455719,
      "learning_rate": 8.734671823830579e-07,
      "loss": 0.0063,
      "step": 2790140
    },
    {
      "epoch": 4.566158035649994,
      "grad_norm": 0.42765381932258606,
      "learning_rate": 8.734012901695407e-07,
      "loss": 0.0086,
      "step": 2790160
    },
    {
      "epoch": 4.566190766088647,
      "grad_norm": 0.23724500834941864,
      "learning_rate": 8.733353979560236e-07,
      "loss": 0.0066,
      "step": 2790180
    },
    {
      "epoch": 4.5662234965273,
      "grad_norm": 0.1161375492811203,
      "learning_rate": 8.732695057425065e-07,
      "loss": 0.0101,
      "step": 2790200
    },
    {
      "epoch": 4.566256226965954,
      "grad_norm": 0.31605881452560425,
      "learning_rate": 8.732036135289894e-07,
      "loss": 0.009,
      "step": 2790220
    },
    {
      "epoch": 4.566288957404607,
      "grad_norm": 0.3043108284473419,
      "learning_rate": 8.731377213154722e-07,
      "loss": 0.0062,
      "step": 2790240
    },
    {
      "epoch": 4.56632168784326,
      "grad_norm": 0.5203803777694702,
      "learning_rate": 8.730718291019551e-07,
      "loss": 0.0084,
      "step": 2790260
    },
    {
      "epoch": 4.566354418281914,
      "grad_norm": 0.08764611184597015,
      "learning_rate": 8.730059368884379e-07,
      "loss": 0.0083,
      "step": 2790280
    },
    {
      "epoch": 4.566387148720567,
      "grad_norm": 0.1385737955570221,
      "learning_rate": 8.729400446749209e-07,
      "loss": 0.0102,
      "step": 2790300
    },
    {
      "epoch": 4.566419879159221,
      "grad_norm": 0.03912360221147537,
      "learning_rate": 8.728741524614037e-07,
      "loss": 0.0084,
      "step": 2790320
    },
    {
      "epoch": 4.5664526095978735,
      "grad_norm": 0.11842964589595795,
      "learning_rate": 8.728082602478866e-07,
      "loss": 0.0104,
      "step": 2790340
    },
    {
      "epoch": 4.566485340036527,
      "grad_norm": 0.24367225170135498,
      "learning_rate": 8.727423680343694e-07,
      "loss": 0.0133,
      "step": 2790360
    },
    {
      "epoch": 4.566518070475181,
      "grad_norm": 0.1904153823852539,
      "learning_rate": 8.726764758208524e-07,
      "loss": 0.0069,
      "step": 2790380
    },
    {
      "epoch": 4.566550800913834,
      "grad_norm": 0.24573257565498352,
      "learning_rate": 8.726105836073352e-07,
      "loss": 0.0132,
      "step": 2790400
    },
    {
      "epoch": 4.566583531352487,
      "grad_norm": 0.26916834712028503,
      "learning_rate": 8.72544691393818e-07,
      "loss": 0.0115,
      "step": 2790420
    },
    {
      "epoch": 4.5666162617911406,
      "grad_norm": 0.2748889923095703,
      "learning_rate": 8.724787991803009e-07,
      "loss": 0.0063,
      "step": 2790440
    },
    {
      "epoch": 4.566648992229794,
      "grad_norm": 0.4041934907436371,
      "learning_rate": 8.724129069667837e-07,
      "loss": 0.0082,
      "step": 2790460
    },
    {
      "epoch": 4.566681722668447,
      "grad_norm": 0.2667151689529419,
      "learning_rate": 8.723470147532667e-07,
      "loss": 0.0126,
      "step": 2790480
    },
    {
      "epoch": 4.5667144531071004,
      "grad_norm": 0.33677607774734497,
      "learning_rate": 8.722811225397495e-07,
      "loss": 0.0095,
      "step": 2790500
    },
    {
      "epoch": 4.566747183545754,
      "grad_norm": 0.3612561821937561,
      "learning_rate": 8.722152303262324e-07,
      "loss": 0.0082,
      "step": 2790520
    },
    {
      "epoch": 4.566779913984407,
      "grad_norm": 0.14415781199932098,
      "learning_rate": 8.721493381127152e-07,
      "loss": 0.0065,
      "step": 2790540
    },
    {
      "epoch": 4.56681264442306,
      "grad_norm": 0.16036586463451385,
      "learning_rate": 8.720834458991982e-07,
      "loss": 0.0083,
      "step": 2790560
    },
    {
      "epoch": 4.566845374861714,
      "grad_norm": 0.21021822094917297,
      "learning_rate": 8.72017553685681e-07,
      "loss": 0.0125,
      "step": 2790580
    },
    {
      "epoch": 4.5668781053003675,
      "grad_norm": 0.16092397272586823,
      "learning_rate": 8.719516614721639e-07,
      "loss": 0.0067,
      "step": 2790600
    },
    {
      "epoch": 4.56691083573902,
      "grad_norm": 0.1914091557264328,
      "learning_rate": 8.718857692586467e-07,
      "loss": 0.0095,
      "step": 2790620
    },
    {
      "epoch": 4.566943566177674,
      "grad_norm": 0.38891640305519104,
      "learning_rate": 8.718198770451297e-07,
      "loss": 0.0089,
      "step": 2790640
    },
    {
      "epoch": 4.566976296616327,
      "grad_norm": 0.12879517674446106,
      "learning_rate": 8.717539848316125e-07,
      "loss": 0.0103,
      "step": 2790660
    },
    {
      "epoch": 4.567009027054981,
      "grad_norm": 0.292117178440094,
      "learning_rate": 8.716880926180954e-07,
      "loss": 0.0105,
      "step": 2790680
    },
    {
      "epoch": 4.567041757493634,
      "grad_norm": 0.17149335145950317,
      "learning_rate": 8.716222004045782e-07,
      "loss": 0.0118,
      "step": 2790700
    },
    {
      "epoch": 4.567074487932287,
      "grad_norm": 0.07635338604450226,
      "learning_rate": 8.71556308191061e-07,
      "loss": 0.01,
      "step": 2790720
    },
    {
      "epoch": 4.567107218370941,
      "grad_norm": 0.4286089241504669,
      "learning_rate": 8.714904159775441e-07,
      "loss": 0.0151,
      "step": 2790740
    },
    {
      "epoch": 4.567139948809594,
      "grad_norm": 0.20679686963558197,
      "learning_rate": 8.714245237640268e-07,
      "loss": 0.0099,
      "step": 2790760
    },
    {
      "epoch": 4.567172679248247,
      "grad_norm": 0.08926668763160706,
      "learning_rate": 8.713586315505097e-07,
      "loss": 0.0066,
      "step": 2790780
    },
    {
      "epoch": 4.567205409686901,
      "grad_norm": 0.16656708717346191,
      "learning_rate": 8.712927393369925e-07,
      "loss": 0.0137,
      "step": 2790800
    },
    {
      "epoch": 4.5672381401255535,
      "grad_norm": 0.7128491401672363,
      "learning_rate": 8.712268471234756e-07,
      "loss": 0.0152,
      "step": 2790820
    },
    {
      "epoch": 4.567270870564207,
      "grad_norm": 0.13781629502773285,
      "learning_rate": 8.711609549099584e-07,
      "loss": 0.0077,
      "step": 2790840
    },
    {
      "epoch": 4.567303601002861,
      "grad_norm": 0.669314444065094,
      "learning_rate": 8.710950626964413e-07,
      "loss": 0.0079,
      "step": 2790860
    },
    {
      "epoch": 4.567336331441514,
      "grad_norm": 0.03835030272603035,
      "learning_rate": 8.71029170482924e-07,
      "loss": 0.0077,
      "step": 2790880
    },
    {
      "epoch": 4.567369061880167,
      "grad_norm": 0.34993186593055725,
      "learning_rate": 8.709632782694069e-07,
      "loss": 0.0099,
      "step": 2790900
    },
    {
      "epoch": 4.567401792318821,
      "grad_norm": 0.14661012589931488,
      "learning_rate": 8.708973860558899e-07,
      "loss": 0.011,
      "step": 2790920
    },
    {
      "epoch": 4.567434522757474,
      "grad_norm": 0.30512160062789917,
      "learning_rate": 8.708314938423728e-07,
      "loss": 0.0099,
      "step": 2790940
    },
    {
      "epoch": 4.567467253196128,
      "grad_norm": 0.3486894369125366,
      "learning_rate": 8.707656016288556e-07,
      "loss": 0.0091,
      "step": 2790960
    },
    {
      "epoch": 4.5674999836347805,
      "grad_norm": 0.5318489074707031,
      "learning_rate": 8.706997094153384e-07,
      "loss": 0.0085,
      "step": 2790980
    },
    {
      "epoch": 4.567532714073434,
      "grad_norm": 0.2989487051963806,
      "learning_rate": 8.706338172018214e-07,
      "loss": 0.0072,
      "step": 2791000
    },
    {
      "epoch": 4.567565444512088,
      "grad_norm": 0.19334299862384796,
      "learning_rate": 8.705679249883043e-07,
      "loss": 0.0082,
      "step": 2791020
    },
    {
      "epoch": 4.56759817495074,
      "grad_norm": 0.09677940607070923,
      "learning_rate": 8.705020327747871e-07,
      "loss": 0.0108,
      "step": 2791040
    },
    {
      "epoch": 4.567630905389394,
      "grad_norm": 0.125058114528656,
      "learning_rate": 8.7043614056127e-07,
      "loss": 0.0092,
      "step": 2791060
    },
    {
      "epoch": 4.567663635828048,
      "grad_norm": 0.888062059879303,
      "learning_rate": 8.703702483477529e-07,
      "loss": 0.0099,
      "step": 2791080
    },
    {
      "epoch": 4.5676963662667,
      "grad_norm": 0.16869963705539703,
      "learning_rate": 8.703043561342357e-07,
      "loss": 0.0095,
      "step": 2791100
    },
    {
      "epoch": 4.567729096705354,
      "grad_norm": 0.5557068586349487,
      "learning_rate": 8.702384639207186e-07,
      "loss": 0.0088,
      "step": 2791120
    },
    {
      "epoch": 4.5677618271440075,
      "grad_norm": 0.20050854980945587,
      "learning_rate": 8.701725717072014e-07,
      "loss": 0.007,
      "step": 2791140
    },
    {
      "epoch": 4.567794557582661,
      "grad_norm": 0.13273277878761292,
      "learning_rate": 8.701066794936843e-07,
      "loss": 0.0063,
      "step": 2791160
    },
    {
      "epoch": 4.567827288021314,
      "grad_norm": 0.22157132625579834,
      "learning_rate": 8.700407872801672e-07,
      "loss": 0.0095,
      "step": 2791180
    },
    {
      "epoch": 4.567860018459967,
      "grad_norm": 0.2936851978302002,
      "learning_rate": 8.699748950666501e-07,
      "loss": 0.0091,
      "step": 2791200
    },
    {
      "epoch": 4.567892748898621,
      "grad_norm": 0.23288054764270782,
      "learning_rate": 8.699090028531329e-07,
      "loss": 0.0095,
      "step": 2791220
    },
    {
      "epoch": 4.5679254793372746,
      "grad_norm": 0.13996241986751556,
      "learning_rate": 8.698431106396158e-07,
      "loss": 0.0074,
      "step": 2791240
    },
    {
      "epoch": 4.567958209775927,
      "grad_norm": 0.1870604008436203,
      "learning_rate": 8.697772184260987e-07,
      "loss": 0.0122,
      "step": 2791260
    },
    {
      "epoch": 4.567990940214581,
      "grad_norm": 0.3253101110458374,
      "learning_rate": 8.697113262125816e-07,
      "loss": 0.0071,
      "step": 2791280
    },
    {
      "epoch": 4.5680236706532344,
      "grad_norm": 0.48040100932121277,
      "learning_rate": 8.696454339990644e-07,
      "loss": 0.0075,
      "step": 2791300
    },
    {
      "epoch": 4.568056401091887,
      "grad_norm": 0.31095823645591736,
      "learning_rate": 8.695795417855473e-07,
      "loss": 0.0077,
      "step": 2791320
    },
    {
      "epoch": 4.568089131530541,
      "grad_norm": 0.40511417388916016,
      "learning_rate": 8.695136495720301e-07,
      "loss": 0.0076,
      "step": 2791340
    },
    {
      "epoch": 4.568121861969194,
      "grad_norm": 0.19382549822330475,
      "learning_rate": 8.694477573585131e-07,
      "loss": 0.0073,
      "step": 2791360
    },
    {
      "epoch": 4.568154592407847,
      "grad_norm": 0.2397826462984085,
      "learning_rate": 8.693818651449959e-07,
      "loss": 0.0086,
      "step": 2791380
    },
    {
      "epoch": 4.568187322846501,
      "grad_norm": 0.2949214279651642,
      "learning_rate": 8.693159729314787e-07,
      "loss": 0.0079,
      "step": 2791400
    },
    {
      "epoch": 4.568220053285154,
      "grad_norm": 0.11753042787313461,
      "learning_rate": 8.692500807179616e-07,
      "loss": 0.0076,
      "step": 2791420
    },
    {
      "epoch": 4.568252783723808,
      "grad_norm": 0.3880411684513092,
      "learning_rate": 8.691841885044445e-07,
      "loss": 0.0076,
      "step": 2791440
    },
    {
      "epoch": 4.5682855141624605,
      "grad_norm": 1.1608664989471436,
      "learning_rate": 8.691182962909274e-07,
      "loss": 0.0136,
      "step": 2791460
    },
    {
      "epoch": 4.568318244601114,
      "grad_norm": 0.1978664994239807,
      "learning_rate": 8.690524040774102e-07,
      "loss": 0.0065,
      "step": 2791480
    },
    {
      "epoch": 4.568350975039768,
      "grad_norm": 0.09387259185314178,
      "learning_rate": 8.689865118638931e-07,
      "loss": 0.0043,
      "step": 2791500
    },
    {
      "epoch": 4.568383705478421,
      "grad_norm": 0.4361993074417114,
      "learning_rate": 8.68920619650376e-07,
      "loss": 0.0109,
      "step": 2791520
    },
    {
      "epoch": 4.568416435917074,
      "grad_norm": 0.12605327367782593,
      "learning_rate": 8.688547274368589e-07,
      "loss": 0.0091,
      "step": 2791540
    },
    {
      "epoch": 4.568449166355728,
      "grad_norm": 0.3334692418575287,
      "learning_rate": 8.687888352233417e-07,
      "loss": 0.0063,
      "step": 2791560
    },
    {
      "epoch": 4.568481896794381,
      "grad_norm": 0.5877456665039062,
      "learning_rate": 8.687229430098246e-07,
      "loss": 0.0163,
      "step": 2791580
    },
    {
      "epoch": 4.568514627233034,
      "grad_norm": 0.19821839034557343,
      "learning_rate": 8.686570507963074e-07,
      "loss": 0.0106,
      "step": 2791600
    },
    {
      "epoch": 4.5685473576716875,
      "grad_norm": 0.15616241097450256,
      "learning_rate": 8.685911585827904e-07,
      "loss": 0.0124,
      "step": 2791620
    },
    {
      "epoch": 4.568580088110341,
      "grad_norm": 0.5547083020210266,
      "learning_rate": 8.685252663692732e-07,
      "loss": 0.0094,
      "step": 2791640
    },
    {
      "epoch": 4.568612818548994,
      "grad_norm": 0.17959226667881012,
      "learning_rate": 8.684593741557561e-07,
      "loss": 0.0117,
      "step": 2791660
    },
    {
      "epoch": 4.568645548987647,
      "grad_norm": 0.4492635726928711,
      "learning_rate": 8.683934819422389e-07,
      "loss": 0.0131,
      "step": 2791680
    },
    {
      "epoch": 4.568678279426301,
      "grad_norm": 0.21160098910331726,
      "learning_rate": 8.683275897287219e-07,
      "loss": 0.011,
      "step": 2791700
    },
    {
      "epoch": 4.568711009864955,
      "grad_norm": 0.6235433220863342,
      "learning_rate": 8.682616975152047e-07,
      "loss": 0.0112,
      "step": 2791720
    },
    {
      "epoch": 4.568743740303607,
      "grad_norm": 0.3693901002407074,
      "learning_rate": 8.681958053016875e-07,
      "loss": 0.0169,
      "step": 2791740
    },
    {
      "epoch": 4.568776470742261,
      "grad_norm": 0.14448367059230804,
      "learning_rate": 8.681299130881704e-07,
      "loss": 0.0089,
      "step": 2791760
    },
    {
      "epoch": 4.5688092011809145,
      "grad_norm": 0.3195209205150604,
      "learning_rate": 8.680640208746532e-07,
      "loss": 0.0106,
      "step": 2791780
    },
    {
      "epoch": 4.568841931619567,
      "grad_norm": 0.23245131969451904,
      "learning_rate": 8.679981286611362e-07,
      "loss": 0.0078,
      "step": 2791800
    },
    {
      "epoch": 4.568874662058221,
      "grad_norm": 0.13784758746623993,
      "learning_rate": 8.67932236447619e-07,
      "loss": 0.0105,
      "step": 2791820
    },
    {
      "epoch": 4.568907392496874,
      "grad_norm": 0.06497623026371002,
      "learning_rate": 8.678663442341019e-07,
      "loss": 0.0058,
      "step": 2791840
    },
    {
      "epoch": 4.568940122935528,
      "grad_norm": 0.35507237911224365,
      "learning_rate": 8.678004520205847e-07,
      "loss": 0.0096,
      "step": 2791860
    },
    {
      "epoch": 4.568972853374181,
      "grad_norm": 0.29907989501953125,
      "learning_rate": 8.677345598070677e-07,
      "loss": 0.0109,
      "step": 2791880
    },
    {
      "epoch": 4.569005583812834,
      "grad_norm": 0.743642270565033,
      "learning_rate": 8.676686675935505e-07,
      "loss": 0.0152,
      "step": 2791900
    },
    {
      "epoch": 4.569038314251488,
      "grad_norm": 0.20298407971858978,
      "learning_rate": 8.676027753800334e-07,
      "loss": 0.0054,
      "step": 2791920
    },
    {
      "epoch": 4.569071044690141,
      "grad_norm": 0.07612485438585281,
      "learning_rate": 8.675368831665162e-07,
      "loss": 0.0087,
      "step": 2791940
    },
    {
      "epoch": 4.569103775128794,
      "grad_norm": 0.16593410074710846,
      "learning_rate": 8.674709909529992e-07,
      "loss": 0.0126,
      "step": 2791960
    },
    {
      "epoch": 4.569136505567448,
      "grad_norm": 0.124489925801754,
      "learning_rate": 8.67405098739482e-07,
      "loss": 0.0066,
      "step": 2791980
    },
    {
      "epoch": 4.5691692360061005,
      "grad_norm": 0.10532230138778687,
      "learning_rate": 8.673392065259649e-07,
      "loss": 0.0088,
      "step": 2792000
    },
    {
      "epoch": 4.569201966444754,
      "grad_norm": 0.34032461047172546,
      "learning_rate": 8.672733143124477e-07,
      "loss": 0.0057,
      "step": 2792020
    },
    {
      "epoch": 4.569234696883408,
      "grad_norm": 0.1692848801612854,
      "learning_rate": 8.672074220989305e-07,
      "loss": 0.0064,
      "step": 2792040
    },
    {
      "epoch": 4.569267427322061,
      "grad_norm": 0.15129396319389343,
      "learning_rate": 8.671415298854136e-07,
      "loss": 0.0083,
      "step": 2792060
    },
    {
      "epoch": 4.569300157760714,
      "grad_norm": 0.26504001021385193,
      "learning_rate": 8.670756376718963e-07,
      "loss": 0.0124,
      "step": 2792080
    },
    {
      "epoch": 4.569332888199368,
      "grad_norm": 0.043672557920217514,
      "learning_rate": 8.670097454583792e-07,
      "loss": 0.0071,
      "step": 2792100
    },
    {
      "epoch": 4.569365618638021,
      "grad_norm": 0.28244590759277344,
      "learning_rate": 8.66943853244862e-07,
      "loss": 0.0078,
      "step": 2792120
    },
    {
      "epoch": 4.569398349076675,
      "grad_norm": 0.40624839067459106,
      "learning_rate": 8.668779610313451e-07,
      "loss": 0.0092,
      "step": 2792140
    },
    {
      "epoch": 4.5694310795153275,
      "grad_norm": 0.1499868929386139,
      "learning_rate": 8.668120688178279e-07,
      "loss": 0.0073,
      "step": 2792160
    },
    {
      "epoch": 4.569463809953981,
      "grad_norm": 0.22373449802398682,
      "learning_rate": 8.667461766043108e-07,
      "loss": 0.0081,
      "step": 2792180
    },
    {
      "epoch": 4.569496540392635,
      "grad_norm": 0.22146570682525635,
      "learning_rate": 8.666802843907935e-07,
      "loss": 0.0087,
      "step": 2792200
    },
    {
      "epoch": 4.569529270831287,
      "grad_norm": 0.21553678810596466,
      "learning_rate": 8.666143921772764e-07,
      "loss": 0.0089,
      "step": 2792220
    },
    {
      "epoch": 4.569562001269941,
      "grad_norm": 0.2036632001399994,
      "learning_rate": 8.665484999637594e-07,
      "loss": 0.0076,
      "step": 2792240
    },
    {
      "epoch": 4.5695947317085945,
      "grad_norm": 0.13774847984313965,
      "learning_rate": 8.664826077502423e-07,
      "loss": 0.006,
      "step": 2792260
    },
    {
      "epoch": 4.569627462147247,
      "grad_norm": 0.03750521317124367,
      "learning_rate": 8.664167155367251e-07,
      "loss": 0.009,
      "step": 2792280
    },
    {
      "epoch": 4.569660192585901,
      "grad_norm": 0.08959506452083588,
      "learning_rate": 8.663508233232079e-07,
      "loss": 0.0066,
      "step": 2792300
    },
    {
      "epoch": 4.569692923024554,
      "grad_norm": 0.38520869612693787,
      "learning_rate": 8.662849311096909e-07,
      "loss": 0.0078,
      "step": 2792320
    },
    {
      "epoch": 4.569725653463208,
      "grad_norm": 0.2095208764076233,
      "learning_rate": 8.662190388961738e-07,
      "loss": 0.0103,
      "step": 2792340
    },
    {
      "epoch": 4.569758383901861,
      "grad_norm": 0.14738963544368744,
      "learning_rate": 8.661531466826566e-07,
      "loss": 0.0062,
      "step": 2792360
    },
    {
      "epoch": 4.569791114340514,
      "grad_norm": 0.24886228144168854,
      "learning_rate": 8.660872544691395e-07,
      "loss": 0.0083,
      "step": 2792380
    },
    {
      "epoch": 4.569823844779168,
      "grad_norm": 0.047349900007247925,
      "learning_rate": 8.660213622556224e-07,
      "loss": 0.0077,
      "step": 2792400
    },
    {
      "epoch": 4.5698565752178215,
      "grad_norm": 0.16441333293914795,
      "learning_rate": 8.659554700421053e-07,
      "loss": 0.0055,
      "step": 2792420
    },
    {
      "epoch": 4.569889305656474,
      "grad_norm": 0.07641758769750595,
      "learning_rate": 8.658895778285881e-07,
      "loss": 0.0118,
      "step": 2792440
    },
    {
      "epoch": 4.569922036095128,
      "grad_norm": 0.3285079002380371,
      "learning_rate": 8.658236856150709e-07,
      "loss": 0.0071,
      "step": 2792460
    },
    {
      "epoch": 4.569954766533781,
      "grad_norm": 0.23912008106708527,
      "learning_rate": 8.657577934015538e-07,
      "loss": 0.0071,
      "step": 2792480
    },
    {
      "epoch": 4.569987496972434,
      "grad_norm": 0.22463679313659668,
      "learning_rate": 8.656919011880367e-07,
      "loss": 0.0066,
      "step": 2792500
    },
    {
      "epoch": 4.570020227411088,
      "grad_norm": 0.20578601956367493,
      "learning_rate": 8.656260089745196e-07,
      "loss": 0.0085,
      "step": 2792520
    },
    {
      "epoch": 4.570052957849741,
      "grad_norm": 0.06682273000478745,
      "learning_rate": 8.655601167610024e-07,
      "loss": 0.0056,
      "step": 2792540
    },
    {
      "epoch": 4.570085688288394,
      "grad_norm": 0.18744012713432312,
      "learning_rate": 8.654942245474853e-07,
      "loss": 0.0094,
      "step": 2792560
    },
    {
      "epoch": 4.570118418727048,
      "grad_norm": 0.4880426824092865,
      "learning_rate": 8.654283323339682e-07,
      "loss": 0.01,
      "step": 2792580
    },
    {
      "epoch": 4.570151149165701,
      "grad_norm": 0.13976800441741943,
      "learning_rate": 8.653624401204511e-07,
      "loss": 0.01,
      "step": 2792600
    },
    {
      "epoch": 4.570183879604355,
      "grad_norm": 0.22601251304149628,
      "learning_rate": 8.652965479069339e-07,
      "loss": 0.0067,
      "step": 2792620
    },
    {
      "epoch": 4.5702166100430075,
      "grad_norm": 0.35614854097366333,
      "learning_rate": 8.652306556934168e-07,
      "loss": 0.0097,
      "step": 2792640
    },
    {
      "epoch": 4.570249340481661,
      "grad_norm": 0.2886641323566437,
      "learning_rate": 8.651647634798996e-07,
      "loss": 0.0138,
      "step": 2792660
    },
    {
      "epoch": 4.570282070920315,
      "grad_norm": 0.45184946060180664,
      "learning_rate": 8.650988712663826e-07,
      "loss": 0.0123,
      "step": 2792680
    },
    {
      "epoch": 4.570314801358968,
      "grad_norm": 0.15425829589366913,
      "learning_rate": 8.650329790528654e-07,
      "loss": 0.0123,
      "step": 2792700
    },
    {
      "epoch": 4.570347531797621,
      "grad_norm": 0.835263192653656,
      "learning_rate": 8.649670868393483e-07,
      "loss": 0.0127,
      "step": 2792720
    },
    {
      "epoch": 4.570380262236275,
      "grad_norm": 0.2805609107017517,
      "learning_rate": 8.649011946258311e-07,
      "loss": 0.0096,
      "step": 2792740
    },
    {
      "epoch": 4.570412992674928,
      "grad_norm": 0.21582859754562378,
      "learning_rate": 8.64835302412314e-07,
      "loss": 0.0095,
      "step": 2792760
    },
    {
      "epoch": 4.570445723113581,
      "grad_norm": 0.15410302579402924,
      "learning_rate": 8.647694101987969e-07,
      "loss": 0.0121,
      "step": 2792780
    },
    {
      "epoch": 4.5704784535522345,
      "grad_norm": 0.12715166807174683,
      "learning_rate": 8.647035179852797e-07,
      "loss": 0.0086,
      "step": 2792800
    },
    {
      "epoch": 4.570511183990888,
      "grad_norm": 0.3795759975910187,
      "learning_rate": 8.646376257717626e-07,
      "loss": 0.0126,
      "step": 2792820
    },
    {
      "epoch": 4.570543914429541,
      "grad_norm": 0.30771908164024353,
      "learning_rate": 8.645717335582455e-07,
      "loss": 0.0094,
      "step": 2792840
    },
    {
      "epoch": 4.570576644868194,
      "grad_norm": 0.25769564509391785,
      "learning_rate": 8.645058413447284e-07,
      "loss": 0.0074,
      "step": 2792860
    },
    {
      "epoch": 4.570609375306848,
      "grad_norm": 0.1142253428697586,
      "learning_rate": 8.644399491312112e-07,
      "loss": 0.0069,
      "step": 2792880
    },
    {
      "epoch": 4.570642105745502,
      "grad_norm": 0.16516755521297455,
      "learning_rate": 8.643740569176941e-07,
      "loss": 0.0066,
      "step": 2792900
    },
    {
      "epoch": 4.570674836184154,
      "grad_norm": 0.34992516040802,
      "learning_rate": 8.643081647041769e-07,
      "loss": 0.0073,
      "step": 2792920
    },
    {
      "epoch": 4.570707566622808,
      "grad_norm": 0.3356342017650604,
      "learning_rate": 8.642422724906599e-07,
      "loss": 0.0052,
      "step": 2792940
    },
    {
      "epoch": 4.5707402970614615,
      "grad_norm": 0.2584711015224457,
      "learning_rate": 8.641763802771427e-07,
      "loss": 0.0092,
      "step": 2792960
    },
    {
      "epoch": 4.570773027500115,
      "grad_norm": 0.2947838604450226,
      "learning_rate": 8.641104880636256e-07,
      "loss": 0.0078,
      "step": 2792980
    },
    {
      "epoch": 4.570805757938768,
      "grad_norm": 0.3244018852710724,
      "learning_rate": 8.640445958501084e-07,
      "loss": 0.008,
      "step": 2793000
    },
    {
      "epoch": 4.570838488377421,
      "grad_norm": 0.3864147663116455,
      "learning_rate": 8.639787036365914e-07,
      "loss": 0.0132,
      "step": 2793020
    },
    {
      "epoch": 4.570871218816075,
      "grad_norm": 0.0613090880215168,
      "learning_rate": 8.639128114230742e-07,
      "loss": 0.0091,
      "step": 2793040
    },
    {
      "epoch": 4.570903949254728,
      "grad_norm": 0.33746960759162903,
      "learning_rate": 8.638469192095571e-07,
      "loss": 0.0079,
      "step": 2793060
    },
    {
      "epoch": 4.570936679693381,
      "grad_norm": 0.30535247921943665,
      "learning_rate": 8.637810269960399e-07,
      "loss": 0.0082,
      "step": 2793080
    },
    {
      "epoch": 4.570969410132035,
      "grad_norm": 0.31128397583961487,
      "learning_rate": 8.637151347825227e-07,
      "loss": 0.0099,
      "step": 2793100
    },
    {
      "epoch": 4.5710021405706875,
      "grad_norm": 0.3141927123069763,
      "learning_rate": 8.636492425690057e-07,
      "loss": 0.0072,
      "step": 2793120
    },
    {
      "epoch": 4.571034871009341,
      "grad_norm": 0.10177918523550034,
      "learning_rate": 8.635833503554885e-07,
      "loss": 0.0079,
      "step": 2793140
    },
    {
      "epoch": 4.571067601447995,
      "grad_norm": 0.1442985236644745,
      "learning_rate": 8.635174581419714e-07,
      "loss": 0.0084,
      "step": 2793160
    },
    {
      "epoch": 4.571100331886648,
      "grad_norm": 0.31922775506973267,
      "learning_rate": 8.634515659284542e-07,
      "loss": 0.0114,
      "step": 2793180
    },
    {
      "epoch": 4.571133062325301,
      "grad_norm": 0.259581983089447,
      "learning_rate": 8.633856737149372e-07,
      "loss": 0.0116,
      "step": 2793200
    },
    {
      "epoch": 4.571165792763955,
      "grad_norm": 0.2139378935098648,
      "learning_rate": 8.6331978150142e-07,
      "loss": 0.0084,
      "step": 2793220
    },
    {
      "epoch": 4.571198523202608,
      "grad_norm": 0.05399949103593826,
      "learning_rate": 8.632538892879029e-07,
      "loss": 0.0102,
      "step": 2793240
    },
    {
      "epoch": 4.571231253641261,
      "grad_norm": 0.039397500455379486,
      "learning_rate": 8.631879970743857e-07,
      "loss": 0.0101,
      "step": 2793260
    },
    {
      "epoch": 4.5712639840799145,
      "grad_norm": 0.09281682223081589,
      "learning_rate": 8.631221048608688e-07,
      "loss": 0.0123,
      "step": 2793280
    },
    {
      "epoch": 4.571296714518568,
      "grad_norm": 0.10064839571714401,
      "learning_rate": 8.630562126473515e-07,
      "loss": 0.0127,
      "step": 2793300
    },
    {
      "epoch": 4.571329444957222,
      "grad_norm": 0.7493306398391724,
      "learning_rate": 8.629903204338344e-07,
      "loss": 0.0139,
      "step": 2793320
    },
    {
      "epoch": 4.571362175395874,
      "grad_norm": 0.16548733413219452,
      "learning_rate": 8.629244282203172e-07,
      "loss": 0.0081,
      "step": 2793340
    },
    {
      "epoch": 4.571394905834528,
      "grad_norm": 0.27686819434165955,
      "learning_rate": 8.628585360068001e-07,
      "loss": 0.0105,
      "step": 2793360
    },
    {
      "epoch": 4.571427636273182,
      "grad_norm": 0.14005057513713837,
      "learning_rate": 8.627926437932831e-07,
      "loss": 0.0082,
      "step": 2793380
    },
    {
      "epoch": 4.571460366711834,
      "grad_norm": 0.08166623115539551,
      "learning_rate": 8.627267515797659e-07,
      "loss": 0.0089,
      "step": 2793400
    },
    {
      "epoch": 4.571493097150488,
      "grad_norm": 0.1174202412366867,
      "learning_rate": 8.626608593662487e-07,
      "loss": 0.011,
      "step": 2793420
    },
    {
      "epoch": 4.5715258275891415,
      "grad_norm": 0.13859644532203674,
      "learning_rate": 8.625949671527315e-07,
      "loss": 0.0074,
      "step": 2793440
    },
    {
      "epoch": 4.571558558027794,
      "grad_norm": 0.11488407850265503,
      "learning_rate": 8.625290749392146e-07,
      "loss": 0.0121,
      "step": 2793460
    },
    {
      "epoch": 4.571591288466448,
      "grad_norm": 0.18299521505832672,
      "learning_rate": 8.624631827256974e-07,
      "loss": 0.0114,
      "step": 2793480
    },
    {
      "epoch": 4.571624018905101,
      "grad_norm": 0.15465262532234192,
      "learning_rate": 8.623972905121803e-07,
      "loss": 0.0081,
      "step": 2793500
    },
    {
      "epoch": 4.571656749343755,
      "grad_norm": 0.47550955414772034,
      "learning_rate": 8.62331398298663e-07,
      "loss": 0.0153,
      "step": 2793520
    },
    {
      "epoch": 4.571689479782408,
      "grad_norm": 0.12441585958003998,
      "learning_rate": 8.622655060851459e-07,
      "loss": 0.0067,
      "step": 2793540
    },
    {
      "epoch": 4.571722210221061,
      "grad_norm": 0.3815068006515503,
      "learning_rate": 8.621996138716289e-07,
      "loss": 0.0072,
      "step": 2793560
    },
    {
      "epoch": 4.571754940659715,
      "grad_norm": 0.3344568908214569,
      "learning_rate": 8.621337216581118e-07,
      "loss": 0.0067,
      "step": 2793580
    },
    {
      "epoch": 4.5717876710983685,
      "grad_norm": 0.1632823348045349,
      "learning_rate": 8.620678294445946e-07,
      "loss": 0.0089,
      "step": 2793600
    },
    {
      "epoch": 4.571820401537021,
      "grad_norm": 0.21612465381622314,
      "learning_rate": 8.620019372310774e-07,
      "loss": 0.0111,
      "step": 2793620
    },
    {
      "epoch": 4.571853131975675,
      "grad_norm": 0.11072216928005219,
      "learning_rate": 8.619360450175604e-07,
      "loss": 0.0063,
      "step": 2793640
    },
    {
      "epoch": 4.571885862414328,
      "grad_norm": 0.1761271208524704,
      "learning_rate": 8.618701528040433e-07,
      "loss": 0.0065,
      "step": 2793660
    },
    {
      "epoch": 4.571918592852981,
      "grad_norm": 0.32728368043899536,
      "learning_rate": 8.618042605905261e-07,
      "loss": 0.009,
      "step": 2793680
    },
    {
      "epoch": 4.571951323291635,
      "grad_norm": 0.3928895592689514,
      "learning_rate": 8.61738368377009e-07,
      "loss": 0.0098,
      "step": 2793700
    },
    {
      "epoch": 4.571984053730288,
      "grad_norm": 0.22615449130535126,
      "learning_rate": 8.616724761634919e-07,
      "loss": 0.0064,
      "step": 2793720
    },
    {
      "epoch": 4.572016784168941,
      "grad_norm": 0.31366872787475586,
      "learning_rate": 8.616065839499748e-07,
      "loss": 0.0076,
      "step": 2793740
    },
    {
      "epoch": 4.572049514607595,
      "grad_norm": 0.3100851774215698,
      "learning_rate": 8.615406917364576e-07,
      "loss": 0.0086,
      "step": 2793760
    },
    {
      "epoch": 4.572082245046248,
      "grad_norm": 0.17563322186470032,
      "learning_rate": 8.614747995229404e-07,
      "loss": 0.0122,
      "step": 2793780
    },
    {
      "epoch": 4.572114975484902,
      "grad_norm": 0.055781882256269455,
      "learning_rate": 8.614089073094233e-07,
      "loss": 0.0052,
      "step": 2793800
    },
    {
      "epoch": 4.5721477059235545,
      "grad_norm": 0.08130291849374771,
      "learning_rate": 8.613430150959062e-07,
      "loss": 0.0119,
      "step": 2793820
    },
    {
      "epoch": 4.572180436362208,
      "grad_norm": 0.26513320207595825,
      "learning_rate": 8.612771228823891e-07,
      "loss": 0.0065,
      "step": 2793840
    },
    {
      "epoch": 4.572213166800862,
      "grad_norm": 0.5428846478462219,
      "learning_rate": 8.612112306688719e-07,
      "loss": 0.0091,
      "step": 2793860
    },
    {
      "epoch": 4.572245897239515,
      "grad_norm": 0.09525465220212936,
      "learning_rate": 8.611453384553548e-07,
      "loss": 0.0082,
      "step": 2793880
    },
    {
      "epoch": 4.572278627678168,
      "grad_norm": 0.42996880412101746,
      "learning_rate": 8.610794462418377e-07,
      "loss": 0.0095,
      "step": 2793900
    },
    {
      "epoch": 4.5723113581168215,
      "grad_norm": 0.6420886516571045,
      "learning_rate": 8.610135540283206e-07,
      "loss": 0.0081,
      "step": 2793920
    },
    {
      "epoch": 4.572344088555475,
      "grad_norm": 0.5692915916442871,
      "learning_rate": 8.609476618148034e-07,
      "loss": 0.0125,
      "step": 2793940
    },
    {
      "epoch": 4.572376818994128,
      "grad_norm": 0.22380848228931427,
      "learning_rate": 8.608817696012863e-07,
      "loss": 0.0082,
      "step": 2793960
    },
    {
      "epoch": 4.572409549432781,
      "grad_norm": 0.3646572232246399,
      "learning_rate": 8.608158773877691e-07,
      "loss": 0.0059,
      "step": 2793980
    },
    {
      "epoch": 4.572442279871435,
      "grad_norm": 0.6043976545333862,
      "learning_rate": 8.607499851742521e-07,
      "loss": 0.0191,
      "step": 2794000
    },
    {
      "epoch": 4.572475010310088,
      "grad_norm": 0.8493872880935669,
      "learning_rate": 8.606840929607349e-07,
      "loss": 0.007,
      "step": 2794020
    },
    {
      "epoch": 4.572507740748741,
      "grad_norm": 0.15252763032913208,
      "learning_rate": 8.606182007472178e-07,
      "loss": 0.0106,
      "step": 2794040
    },
    {
      "epoch": 4.572540471187395,
      "grad_norm": 0.10692160576581955,
      "learning_rate": 8.605523085337006e-07,
      "loss": 0.006,
      "step": 2794060
    },
    {
      "epoch": 4.5725732016260485,
      "grad_norm": 0.1835317313671112,
      "learning_rate": 8.604864163201836e-07,
      "loss": 0.0077,
      "step": 2794080
    },
    {
      "epoch": 4.572605932064701,
      "grad_norm": 0.1432838886976242,
      "learning_rate": 8.604205241066664e-07,
      "loss": 0.0084,
      "step": 2794100
    },
    {
      "epoch": 4.572638662503355,
      "grad_norm": 0.1212163046002388,
      "learning_rate": 8.603546318931492e-07,
      "loss": 0.0103,
      "step": 2794120
    },
    {
      "epoch": 4.572671392942008,
      "grad_norm": 0.20709416270256042,
      "learning_rate": 8.602887396796321e-07,
      "loss": 0.0094,
      "step": 2794140
    },
    {
      "epoch": 4.572704123380662,
      "grad_norm": 0.360164999961853,
      "learning_rate": 8.60222847466115e-07,
      "loss": 0.007,
      "step": 2794160
    },
    {
      "epoch": 4.572736853819315,
      "grad_norm": 0.1232956051826477,
      "learning_rate": 8.601569552525979e-07,
      "loss": 0.0057,
      "step": 2794180
    },
    {
      "epoch": 4.572769584257968,
      "grad_norm": 0.2563116252422333,
      "learning_rate": 8.600910630390807e-07,
      "loss": 0.0083,
      "step": 2794200
    },
    {
      "epoch": 4.572802314696622,
      "grad_norm": 0.3814637362957001,
      "learning_rate": 8.600251708255636e-07,
      "loss": 0.0077,
      "step": 2794220
    },
    {
      "epoch": 4.572835045135275,
      "grad_norm": 0.33911436796188354,
      "learning_rate": 8.599592786120464e-07,
      "loss": 0.0082,
      "step": 2794240
    },
    {
      "epoch": 4.572867775573928,
      "grad_norm": 0.31471362709999084,
      "learning_rate": 8.598933863985294e-07,
      "loss": 0.008,
      "step": 2794260
    },
    {
      "epoch": 4.572900506012582,
      "grad_norm": 0.16131670773029327,
      "learning_rate": 8.598274941850122e-07,
      "loss": 0.0124,
      "step": 2794280
    },
    {
      "epoch": 4.5729332364512345,
      "grad_norm": 0.2636575698852539,
      "learning_rate": 8.597616019714951e-07,
      "loss": 0.0095,
      "step": 2794300
    },
    {
      "epoch": 4.572965966889888,
      "grad_norm": 0.1217908039689064,
      "learning_rate": 8.596957097579779e-07,
      "loss": 0.0076,
      "step": 2794320
    },
    {
      "epoch": 4.572998697328542,
      "grad_norm": 0.06312809884548187,
      "learning_rate": 8.596298175444609e-07,
      "loss": 0.012,
      "step": 2794340
    },
    {
      "epoch": 4.573031427767195,
      "grad_norm": 0.34425461292266846,
      "learning_rate": 8.595639253309437e-07,
      "loss": 0.0089,
      "step": 2794360
    },
    {
      "epoch": 4.573064158205848,
      "grad_norm": 0.14304840564727783,
      "learning_rate": 8.594980331174266e-07,
      "loss": 0.0135,
      "step": 2794380
    },
    {
      "epoch": 4.573096888644502,
      "grad_norm": 0.0420503169298172,
      "learning_rate": 8.594321409039094e-07,
      "loss": 0.0061,
      "step": 2794400
    },
    {
      "epoch": 4.573129619083155,
      "grad_norm": 0.23667563498020172,
      "learning_rate": 8.593662486903922e-07,
      "loss": 0.0082,
      "step": 2794420
    },
    {
      "epoch": 4.573162349521809,
      "grad_norm": 0.13586007058620453,
      "learning_rate": 8.593003564768752e-07,
      "loss": 0.0107,
      "step": 2794440
    },
    {
      "epoch": 4.5731950799604615,
      "grad_norm": 0.09998909384012222,
      "learning_rate": 8.59234464263358e-07,
      "loss": 0.0063,
      "step": 2794460
    },
    {
      "epoch": 4.573227810399115,
      "grad_norm": 0.5041571259498596,
      "learning_rate": 8.591685720498409e-07,
      "loss": 0.0101,
      "step": 2794480
    },
    {
      "epoch": 4.573260540837769,
      "grad_norm": 0.1095544770359993,
      "learning_rate": 8.591026798363237e-07,
      "loss": 0.0073,
      "step": 2794500
    },
    {
      "epoch": 4.573293271276421,
      "grad_norm": 0.15009990334510803,
      "learning_rate": 8.590367876228067e-07,
      "loss": 0.0077,
      "step": 2794520
    },
    {
      "epoch": 4.573326001715075,
      "grad_norm": 0.14389637112617493,
      "learning_rate": 8.589708954092895e-07,
      "loss": 0.012,
      "step": 2794540
    },
    {
      "epoch": 4.573358732153729,
      "grad_norm": 0.5793533325195312,
      "learning_rate": 8.589050031957724e-07,
      "loss": 0.0123,
      "step": 2794560
    },
    {
      "epoch": 4.573391462592381,
      "grad_norm": 0.09929300844669342,
      "learning_rate": 8.588391109822552e-07,
      "loss": 0.0065,
      "step": 2794580
    },
    {
      "epoch": 4.573424193031035,
      "grad_norm": 0.14185352623462677,
      "learning_rate": 8.587732187687383e-07,
      "loss": 0.0129,
      "step": 2794600
    },
    {
      "epoch": 4.5734569234696885,
      "grad_norm": 0.0682762935757637,
      "learning_rate": 8.58707326555221e-07,
      "loss": 0.0068,
      "step": 2794620
    },
    {
      "epoch": 4.573489653908342,
      "grad_norm": 0.16574084758758545,
      "learning_rate": 8.586414343417039e-07,
      "loss": 0.0095,
      "step": 2794640
    },
    {
      "epoch": 4.573522384346995,
      "grad_norm": 0.12820319831371307,
      "learning_rate": 8.585755421281867e-07,
      "loss": 0.0088,
      "step": 2794660
    },
    {
      "epoch": 4.573555114785648,
      "grad_norm": 0.2322666198015213,
      "learning_rate": 8.585096499146696e-07,
      "loss": 0.0102,
      "step": 2794680
    },
    {
      "epoch": 4.573587845224302,
      "grad_norm": 0.10945775359869003,
      "learning_rate": 8.584437577011526e-07,
      "loss": 0.0114,
      "step": 2794700
    },
    {
      "epoch": 4.573620575662955,
      "grad_norm": 0.179832324385643,
      "learning_rate": 8.583778654876354e-07,
      "loss": 0.0131,
      "step": 2794720
    },
    {
      "epoch": 4.573653306101608,
      "grad_norm": 0.11583545058965683,
      "learning_rate": 8.583119732741182e-07,
      "loss": 0.0057,
      "step": 2794740
    },
    {
      "epoch": 4.573686036540262,
      "grad_norm": 0.5194060206413269,
      "learning_rate": 8.58246081060601e-07,
      "loss": 0.0082,
      "step": 2794760
    },
    {
      "epoch": 4.573718766978915,
      "grad_norm": 0.12497668713331223,
      "learning_rate": 8.581801888470841e-07,
      "loss": 0.009,
      "step": 2794780
    },
    {
      "epoch": 4.573751497417568,
      "grad_norm": 0.1619855761528015,
      "learning_rate": 8.58114296633567e-07,
      "loss": 0.0108,
      "step": 2794800
    },
    {
      "epoch": 4.573784227856222,
      "grad_norm": 0.32763129472732544,
      "learning_rate": 8.580484044200498e-07,
      "loss": 0.0086,
      "step": 2794820
    },
    {
      "epoch": 4.573816958294875,
      "grad_norm": 0.17049625515937805,
      "learning_rate": 8.579825122065325e-07,
      "loss": 0.0062,
      "step": 2794840
    },
    {
      "epoch": 4.573849688733528,
      "grad_norm": 0.14192426204681396,
      "learning_rate": 8.579166199930156e-07,
      "loss": 0.0072,
      "step": 2794860
    },
    {
      "epoch": 4.573882419172182,
      "grad_norm": 0.15962979197502136,
      "learning_rate": 8.578507277794984e-07,
      "loss": 0.0096,
      "step": 2794880
    },
    {
      "epoch": 4.573915149610835,
      "grad_norm": 0.23838932812213898,
      "learning_rate": 8.577848355659813e-07,
      "loss": 0.0116,
      "step": 2794900
    },
    {
      "epoch": 4.573947880049488,
      "grad_norm": 0.3791740834712982,
      "learning_rate": 8.577189433524641e-07,
      "loss": 0.0092,
      "step": 2794920
    },
    {
      "epoch": 4.5739806104881415,
      "grad_norm": 0.5099547505378723,
      "learning_rate": 8.576530511389469e-07,
      "loss": 0.0099,
      "step": 2794940
    },
    {
      "epoch": 4.574013340926795,
      "grad_norm": 0.14626862108707428,
      "learning_rate": 8.575871589254299e-07,
      "loss": 0.0062,
      "step": 2794960
    },
    {
      "epoch": 4.574046071365449,
      "grad_norm": 0.22098451852798462,
      "learning_rate": 8.575212667119128e-07,
      "loss": 0.0064,
      "step": 2794980
    },
    {
      "epoch": 4.574078801804101,
      "grad_norm": 0.10433738678693771,
      "learning_rate": 8.574553744983956e-07,
      "loss": 0.0072,
      "step": 2795000
    },
    {
      "epoch": 4.574111532242755,
      "grad_norm": 0.1231275126338005,
      "learning_rate": 8.573894822848785e-07,
      "loss": 0.0125,
      "step": 2795020
    },
    {
      "epoch": 4.574144262681409,
      "grad_norm": 0.30384811758995056,
      "learning_rate": 8.573235900713614e-07,
      "loss": 0.0066,
      "step": 2795040
    },
    {
      "epoch": 4.574176993120062,
      "grad_norm": 0.2703852951526642,
      "learning_rate": 8.572576978578443e-07,
      "loss": 0.0083,
      "step": 2795060
    },
    {
      "epoch": 4.574209723558715,
      "grad_norm": 0.5369864702224731,
      "learning_rate": 8.571918056443271e-07,
      "loss": 0.0121,
      "step": 2795080
    },
    {
      "epoch": 4.5742424539973685,
      "grad_norm": 0.2962401509284973,
      "learning_rate": 8.5712591343081e-07,
      "loss": 0.0063,
      "step": 2795100
    },
    {
      "epoch": 4.574275184436022,
      "grad_norm": 0.36628901958465576,
      "learning_rate": 8.570600212172928e-07,
      "loss": 0.0089,
      "step": 2795120
    },
    {
      "epoch": 4.574307914874675,
      "grad_norm": 0.2957017123699188,
      "learning_rate": 8.569941290037758e-07,
      "loss": 0.0074,
      "step": 2795140
    },
    {
      "epoch": 4.574340645313328,
      "grad_norm": 0.15150323510169983,
      "learning_rate": 8.569282367902586e-07,
      "loss": 0.0086,
      "step": 2795160
    },
    {
      "epoch": 4.574373375751982,
      "grad_norm": 0.1578933745622635,
      "learning_rate": 8.568623445767414e-07,
      "loss": 0.0077,
      "step": 2795180
    },
    {
      "epoch": 4.574406106190635,
      "grad_norm": 0.26430743932724,
      "learning_rate": 8.567964523632243e-07,
      "loss": 0.0083,
      "step": 2795200
    },
    {
      "epoch": 4.574438836629288,
      "grad_norm": 0.3149270713329315,
      "learning_rate": 8.567305601497072e-07,
      "loss": 0.0087,
      "step": 2795220
    },
    {
      "epoch": 4.574471567067942,
      "grad_norm": 0.24480323493480682,
      "learning_rate": 8.566646679361901e-07,
      "loss": 0.006,
      "step": 2795240
    },
    {
      "epoch": 4.5745042975065955,
      "grad_norm": 0.08414986729621887,
      "learning_rate": 8.565987757226729e-07,
      "loss": 0.007,
      "step": 2795260
    },
    {
      "epoch": 4.574537027945248,
      "grad_norm": 0.0886513814330101,
      "learning_rate": 8.565328835091558e-07,
      "loss": 0.0074,
      "step": 2795280
    },
    {
      "epoch": 4.574569758383902,
      "grad_norm": 0.11855657398700714,
      "learning_rate": 8.564669912956387e-07,
      "loss": 0.0073,
      "step": 2795300
    },
    {
      "epoch": 4.574602488822555,
      "grad_norm": 0.3934829831123352,
      "learning_rate": 8.564010990821216e-07,
      "loss": 0.0138,
      "step": 2795320
    },
    {
      "epoch": 4.574635219261209,
      "grad_norm": 0.4203007221221924,
      "learning_rate": 8.563352068686044e-07,
      "loss": 0.0068,
      "step": 2795340
    },
    {
      "epoch": 4.574667949699862,
      "grad_norm": 0.1861833781003952,
      "learning_rate": 8.562693146550873e-07,
      "loss": 0.0082,
      "step": 2795360
    },
    {
      "epoch": 4.574700680138515,
      "grad_norm": 0.2677600383758545,
      "learning_rate": 8.562034224415701e-07,
      "loss": 0.0098,
      "step": 2795380
    },
    {
      "epoch": 4.574733410577169,
      "grad_norm": 0.20165863633155823,
      "learning_rate": 8.561375302280531e-07,
      "loss": 0.0059,
      "step": 2795400
    },
    {
      "epoch": 4.574766141015822,
      "grad_norm": 0.1009690910577774,
      "learning_rate": 8.560716380145359e-07,
      "loss": 0.0131,
      "step": 2795420
    },
    {
      "epoch": 4.574798871454475,
      "grad_norm": 0.12733136117458344,
      "learning_rate": 8.560057458010188e-07,
      "loss": 0.0107,
      "step": 2795440
    },
    {
      "epoch": 4.574831601893129,
      "grad_norm": 0.2579169273376465,
      "learning_rate": 8.559398535875016e-07,
      "loss": 0.0106,
      "step": 2795460
    },
    {
      "epoch": 4.5748643323317815,
      "grad_norm": 0.27666640281677246,
      "learning_rate": 8.558739613739846e-07,
      "loss": 0.0089,
      "step": 2795480
    },
    {
      "epoch": 4.574897062770435,
      "grad_norm": 0.29259559512138367,
      "learning_rate": 8.558080691604674e-07,
      "loss": 0.0122,
      "step": 2795500
    },
    {
      "epoch": 4.574929793209089,
      "grad_norm": 0.15295112133026123,
      "learning_rate": 8.557421769469502e-07,
      "loss": 0.0084,
      "step": 2795520
    },
    {
      "epoch": 4.574962523647742,
      "grad_norm": 0.10031785070896149,
      "learning_rate": 8.556762847334331e-07,
      "loss": 0.008,
      "step": 2795540
    },
    {
      "epoch": 4.574995254086395,
      "grad_norm": 0.2893044948577881,
      "learning_rate": 8.556103925199159e-07,
      "loss": 0.0122,
      "step": 2795560
    },
    {
      "epoch": 4.5750279845250486,
      "grad_norm": 0.3127500116825104,
      "learning_rate": 8.555445003063989e-07,
      "loss": 0.0108,
      "step": 2795580
    },
    {
      "epoch": 4.575060714963702,
      "grad_norm": 0.4208897352218628,
      "learning_rate": 8.554786080928817e-07,
      "loss": 0.0092,
      "step": 2795600
    },
    {
      "epoch": 4.575093445402356,
      "grad_norm": 0.37513720989227295,
      "learning_rate": 8.554127158793646e-07,
      "loss": 0.0087,
      "step": 2795620
    },
    {
      "epoch": 4.5751261758410084,
      "grad_norm": 0.8090199828147888,
      "learning_rate": 8.553468236658474e-07,
      "loss": 0.0115,
      "step": 2795640
    },
    {
      "epoch": 4.575158906279662,
      "grad_norm": 0.22653639316558838,
      "learning_rate": 8.552809314523304e-07,
      "loss": 0.0074,
      "step": 2795660
    },
    {
      "epoch": 4.575191636718316,
      "grad_norm": 0.8299139142036438,
      "learning_rate": 8.552150392388132e-07,
      "loss": 0.0089,
      "step": 2795680
    },
    {
      "epoch": 4.575224367156968,
      "grad_norm": 0.30599692463874817,
      "learning_rate": 8.551491470252961e-07,
      "loss": 0.0069,
      "step": 2795700
    },
    {
      "epoch": 4.575257097595622,
      "grad_norm": 0.09965121001005173,
      "learning_rate": 8.550832548117789e-07,
      "loss": 0.0096,
      "step": 2795720
    },
    {
      "epoch": 4.5752898280342755,
      "grad_norm": 0.24350635707378387,
      "learning_rate": 8.550173625982619e-07,
      "loss": 0.0062,
      "step": 2795740
    },
    {
      "epoch": 4.575322558472928,
      "grad_norm": 0.3493785560131073,
      "learning_rate": 8.549514703847447e-07,
      "loss": 0.0171,
      "step": 2795760
    },
    {
      "epoch": 4.575355288911582,
      "grad_norm": 0.27983391284942627,
      "learning_rate": 8.548855781712276e-07,
      "loss": 0.0096,
      "step": 2795780
    },
    {
      "epoch": 4.575388019350235,
      "grad_norm": 0.24824334681034088,
      "learning_rate": 8.548196859577104e-07,
      "loss": 0.0078,
      "step": 2795800
    },
    {
      "epoch": 4.575420749788889,
      "grad_norm": 0.1765233874320984,
      "learning_rate": 8.547537937441932e-07,
      "loss": 0.0085,
      "step": 2795820
    },
    {
      "epoch": 4.575453480227542,
      "grad_norm": 0.5040559768676758,
      "learning_rate": 8.546879015306762e-07,
      "loss": 0.012,
      "step": 2795840
    },
    {
      "epoch": 4.575486210666195,
      "grad_norm": 0.2057829201221466,
      "learning_rate": 8.54622009317159e-07,
      "loss": 0.0082,
      "step": 2795860
    },
    {
      "epoch": 4.575518941104849,
      "grad_norm": 0.275895893573761,
      "learning_rate": 8.545561171036419e-07,
      "loss": 0.0057,
      "step": 2795880
    },
    {
      "epoch": 4.5755516715435025,
      "grad_norm": 0.10771764814853668,
      "learning_rate": 8.544902248901247e-07,
      "loss": 0.0056,
      "step": 2795900
    },
    {
      "epoch": 4.575584401982155,
      "grad_norm": 0.11805884540081024,
      "learning_rate": 8.544243326766078e-07,
      "loss": 0.0089,
      "step": 2795920
    },
    {
      "epoch": 4.575617132420809,
      "grad_norm": 0.07127711176872253,
      "learning_rate": 8.543584404630905e-07,
      "loss": 0.0069,
      "step": 2795940
    },
    {
      "epoch": 4.575649862859462,
      "grad_norm": 0.1917358934879303,
      "learning_rate": 8.542925482495734e-07,
      "loss": 0.0079,
      "step": 2795960
    },
    {
      "epoch": 4.575682593298115,
      "grad_norm": 0.22577112913131714,
      "learning_rate": 8.542266560360562e-07,
      "loss": 0.0084,
      "step": 2795980
    },
    {
      "epoch": 4.575715323736769,
      "grad_norm": 0.07990460097789764,
      "learning_rate": 8.541607638225391e-07,
      "loss": 0.0082,
      "step": 2796000
    },
    {
      "epoch": 4.575748054175422,
      "grad_norm": 0.0855359137058258,
      "learning_rate": 8.540948716090221e-07,
      "loss": 0.0085,
      "step": 2796020
    },
    {
      "epoch": 4.575780784614075,
      "grad_norm": 0.08936852216720581,
      "learning_rate": 8.540289793955049e-07,
      "loss": 0.0096,
      "step": 2796040
    },
    {
      "epoch": 4.575813515052729,
      "grad_norm": 0.10770754516124725,
      "learning_rate": 8.539630871819877e-07,
      "loss": 0.0078,
      "step": 2796060
    },
    {
      "epoch": 4.575846245491382,
      "grad_norm": 0.3474956154823303,
      "learning_rate": 8.538971949684706e-07,
      "loss": 0.0122,
      "step": 2796080
    },
    {
      "epoch": 4.575878975930036,
      "grad_norm": 0.37160730361938477,
      "learning_rate": 8.538313027549536e-07,
      "loss": 0.008,
      "step": 2796100
    },
    {
      "epoch": 4.5759117063686885,
      "grad_norm": 0.5620415806770325,
      "learning_rate": 8.537654105414365e-07,
      "loss": 0.0104,
      "step": 2796120
    },
    {
      "epoch": 4.575944436807342,
      "grad_norm": 0.1414635181427002,
      "learning_rate": 8.536995183279193e-07,
      "loss": 0.0182,
      "step": 2796140
    },
    {
      "epoch": 4.575977167245996,
      "grad_norm": 0.5122722387313843,
      "learning_rate": 8.53633626114402e-07,
      "loss": 0.0111,
      "step": 2796160
    },
    {
      "epoch": 4.576009897684648,
      "grad_norm": 0.1939098984003067,
      "learning_rate": 8.535677339008851e-07,
      "loss": 0.01,
      "step": 2796180
    },
    {
      "epoch": 4.576042628123302,
      "grad_norm": 0.5467237234115601,
      "learning_rate": 8.535018416873679e-07,
      "loss": 0.0103,
      "step": 2796200
    },
    {
      "epoch": 4.576075358561956,
      "grad_norm": 0.2588191032409668,
      "learning_rate": 8.534359494738508e-07,
      "loss": 0.0066,
      "step": 2796220
    },
    {
      "epoch": 4.576108089000609,
      "grad_norm": 0.24331134557724,
      "learning_rate": 8.533700572603336e-07,
      "loss": 0.0115,
      "step": 2796240
    },
    {
      "epoch": 4.576140819439262,
      "grad_norm": 0.3436109721660614,
      "learning_rate": 8.533041650468165e-07,
      "loss": 0.0087,
      "step": 2796260
    },
    {
      "epoch": 4.5761735498779155,
      "grad_norm": 0.5146169662475586,
      "learning_rate": 8.532382728332994e-07,
      "loss": 0.0095,
      "step": 2796280
    },
    {
      "epoch": 4.576206280316569,
      "grad_norm": 0.5044733881950378,
      "learning_rate": 8.531723806197823e-07,
      "loss": 0.0102,
      "step": 2796300
    },
    {
      "epoch": 4.576239010755222,
      "grad_norm": 0.29912635684013367,
      "learning_rate": 8.531064884062651e-07,
      "loss": 0.0085,
      "step": 2796320
    },
    {
      "epoch": 4.576271741193875,
      "grad_norm": 0.07035772502422333,
      "learning_rate": 8.53040596192748e-07,
      "loss": 0.0121,
      "step": 2796340
    },
    {
      "epoch": 4.576304471632529,
      "grad_norm": 0.3491250276565552,
      "learning_rate": 8.529747039792309e-07,
      "loss": 0.0118,
      "step": 2796360
    },
    {
      "epoch": 4.576337202071182,
      "grad_norm": 0.23464609682559967,
      "learning_rate": 8.529088117657138e-07,
      "loss": 0.0071,
      "step": 2796380
    },
    {
      "epoch": 4.576369932509835,
      "grad_norm": 0.0938594788312912,
      "learning_rate": 8.528429195521966e-07,
      "loss": 0.0103,
      "step": 2796400
    },
    {
      "epoch": 4.576402662948489,
      "grad_norm": 0.8331987857818604,
      "learning_rate": 8.527770273386795e-07,
      "loss": 0.0153,
      "step": 2796420
    },
    {
      "epoch": 4.5764353933871424,
      "grad_norm": 0.4197022318840027,
      "learning_rate": 8.527111351251623e-07,
      "loss": 0.0089,
      "step": 2796440
    },
    {
      "epoch": 4.576468123825795,
      "grad_norm": 0.12557071447372437,
      "learning_rate": 8.526452429116453e-07,
      "loss": 0.0074,
      "step": 2796460
    },
    {
      "epoch": 4.576500854264449,
      "grad_norm": 0.13431265950202942,
      "learning_rate": 8.525793506981281e-07,
      "loss": 0.0093,
      "step": 2796480
    },
    {
      "epoch": 4.576533584703102,
      "grad_norm": 0.1583477407693863,
      "learning_rate": 8.52513458484611e-07,
      "loss": 0.0094,
      "step": 2796500
    },
    {
      "epoch": 4.576566315141756,
      "grad_norm": 0.2009701132774353,
      "learning_rate": 8.524475662710938e-07,
      "loss": 0.0072,
      "step": 2796520
    },
    {
      "epoch": 4.576599045580409,
      "grad_norm": 0.15621106326580048,
      "learning_rate": 8.523816740575767e-07,
      "loss": 0.0045,
      "step": 2796540
    },
    {
      "epoch": 4.576631776019062,
      "grad_norm": 0.09733212739229202,
      "learning_rate": 8.523157818440596e-07,
      "loss": 0.0075,
      "step": 2796560
    },
    {
      "epoch": 4.576664506457716,
      "grad_norm": 3.235421895980835,
      "learning_rate": 8.522498896305424e-07,
      "loss": 0.0117,
      "step": 2796580
    },
    {
      "epoch": 4.5766972368963685,
      "grad_norm": 0.21446678042411804,
      "learning_rate": 8.521839974170253e-07,
      "loss": 0.0084,
      "step": 2796600
    },
    {
      "epoch": 4.576729967335022,
      "grad_norm": 0.27546611428260803,
      "learning_rate": 8.521181052035082e-07,
      "loss": 0.0111,
      "step": 2796620
    },
    {
      "epoch": 4.576762697773676,
      "grad_norm": 0.14391852915287018,
      "learning_rate": 8.520522129899911e-07,
      "loss": 0.0066,
      "step": 2796640
    },
    {
      "epoch": 4.576795428212328,
      "grad_norm": 0.28312885761260986,
      "learning_rate": 8.519863207764739e-07,
      "loss": 0.0093,
      "step": 2796660
    },
    {
      "epoch": 4.576828158650982,
      "grad_norm": 0.1782890409231186,
      "learning_rate": 8.519204285629568e-07,
      "loss": 0.0083,
      "step": 2796680
    },
    {
      "epoch": 4.576860889089636,
      "grad_norm": 0.3662523925304413,
      "learning_rate": 8.518545363494396e-07,
      "loss": 0.007,
      "step": 2796700
    },
    {
      "epoch": 4.576893619528289,
      "grad_norm": 0.28555572032928467,
      "learning_rate": 8.517886441359226e-07,
      "loss": 0.0083,
      "step": 2796720
    },
    {
      "epoch": 4.576926349966942,
      "grad_norm": 0.05644448101520538,
      "learning_rate": 8.517227519224054e-07,
      "loss": 0.0078,
      "step": 2796740
    },
    {
      "epoch": 4.5769590804055955,
      "grad_norm": 0.22466734051704407,
      "learning_rate": 8.516568597088883e-07,
      "loss": 0.0087,
      "step": 2796760
    },
    {
      "epoch": 4.576991810844249,
      "grad_norm": 0.546538233757019,
      "learning_rate": 8.515909674953711e-07,
      "loss": 0.0077,
      "step": 2796780
    },
    {
      "epoch": 4.577024541282903,
      "grad_norm": 0.33453264832496643,
      "learning_rate": 8.515250752818541e-07,
      "loss": 0.0123,
      "step": 2796800
    },
    {
      "epoch": 4.577057271721555,
      "grad_norm": 0.3750620484352112,
      "learning_rate": 8.514591830683369e-07,
      "loss": 0.01,
      "step": 2796820
    },
    {
      "epoch": 4.577090002160209,
      "grad_norm": 0.38485291600227356,
      "learning_rate": 8.513932908548197e-07,
      "loss": 0.0057,
      "step": 2796840
    },
    {
      "epoch": 4.577122732598863,
      "grad_norm": 0.141770601272583,
      "learning_rate": 8.513273986413026e-07,
      "loss": 0.0109,
      "step": 2796860
    },
    {
      "epoch": 4.577155463037515,
      "grad_norm": 0.15619750320911407,
      "learning_rate": 8.512615064277854e-07,
      "loss": 0.0113,
      "step": 2796880
    },
    {
      "epoch": 4.577188193476169,
      "grad_norm": 0.10801037400960922,
      "learning_rate": 8.511956142142684e-07,
      "loss": 0.0107,
      "step": 2796900
    },
    {
      "epoch": 4.5772209239148225,
      "grad_norm": 0.07360944896936417,
      "learning_rate": 8.511297220007512e-07,
      "loss": 0.0085,
      "step": 2796920
    },
    {
      "epoch": 4.577253654353475,
      "grad_norm": 0.1590241640806198,
      "learning_rate": 8.510638297872341e-07,
      "loss": 0.0087,
      "step": 2796940
    },
    {
      "epoch": 4.577286384792129,
      "grad_norm": 0.1484447866678238,
      "learning_rate": 8.509979375737169e-07,
      "loss": 0.0091,
      "step": 2796960
    },
    {
      "epoch": 4.577319115230782,
      "grad_norm": 0.17864160239696503,
      "learning_rate": 8.509320453601999e-07,
      "loss": 0.0154,
      "step": 2796980
    },
    {
      "epoch": 4.577351845669436,
      "grad_norm": 0.2135714292526245,
      "learning_rate": 8.508661531466827e-07,
      "loss": 0.0081,
      "step": 2797000
    },
    {
      "epoch": 4.577384576108089,
      "grad_norm": 0.37423276901245117,
      "learning_rate": 8.508002609331656e-07,
      "loss": 0.0094,
      "step": 2797020
    },
    {
      "epoch": 4.577417306546742,
      "grad_norm": 0.25906187295913696,
      "learning_rate": 8.507343687196484e-07,
      "loss": 0.0084,
      "step": 2797040
    },
    {
      "epoch": 4.577450036985396,
      "grad_norm": 0.12005581706762314,
      "learning_rate": 8.506684765061314e-07,
      "loss": 0.01,
      "step": 2797060
    },
    {
      "epoch": 4.5774827674240495,
      "grad_norm": 0.23281452059745789,
      "learning_rate": 8.506025842926142e-07,
      "loss": 0.0078,
      "step": 2797080
    },
    {
      "epoch": 4.577515497862702,
      "grad_norm": 0.4987455904483795,
      "learning_rate": 8.505366920790971e-07,
      "loss": 0.0103,
      "step": 2797100
    },
    {
      "epoch": 4.577548228301356,
      "grad_norm": 0.3029378652572632,
      "learning_rate": 8.504707998655799e-07,
      "loss": 0.0065,
      "step": 2797120
    },
    {
      "epoch": 4.577580958740009,
      "grad_norm": 0.19933494925498962,
      "learning_rate": 8.504049076520627e-07,
      "loss": 0.012,
      "step": 2797140
    },
    {
      "epoch": 4.577613689178662,
      "grad_norm": 0.18993701040744781,
      "learning_rate": 8.503390154385457e-07,
      "loss": 0.0069,
      "step": 2797160
    },
    {
      "epoch": 4.577646419617316,
      "grad_norm": 0.27125510573387146,
      "learning_rate": 8.502731232250285e-07,
      "loss": 0.0089,
      "step": 2797180
    },
    {
      "epoch": 4.577679150055969,
      "grad_norm": 0.21232007443904877,
      "learning_rate": 8.502072310115114e-07,
      "loss": 0.0076,
      "step": 2797200
    },
    {
      "epoch": 4.577711880494622,
      "grad_norm": 0.12887164950370789,
      "learning_rate": 8.501413387979942e-07,
      "loss": 0.0104,
      "step": 2797220
    },
    {
      "epoch": 4.577744610933276,
      "grad_norm": 0.8525039553642273,
      "learning_rate": 8.500754465844773e-07,
      "loss": 0.0081,
      "step": 2797240
    },
    {
      "epoch": 4.577777341371929,
      "grad_norm": 0.23596540093421936,
      "learning_rate": 8.5000955437096e-07,
      "loss": 0.0093,
      "step": 2797260
    },
    {
      "epoch": 4.577810071810583,
      "grad_norm": 0.31979674100875854,
      "learning_rate": 8.499436621574429e-07,
      "loss": 0.0105,
      "step": 2797280
    },
    {
      "epoch": 4.5778428022492355,
      "grad_norm": 0.15065094828605652,
      "learning_rate": 8.498777699439257e-07,
      "loss": 0.0067,
      "step": 2797300
    },
    {
      "epoch": 4.577875532687889,
      "grad_norm": 0.1683569997549057,
      "learning_rate": 8.498118777304086e-07,
      "loss": 0.0056,
      "step": 2797320
    },
    {
      "epoch": 4.577908263126543,
      "grad_norm": 0.30754855275154114,
      "learning_rate": 8.497459855168916e-07,
      "loss": 0.0081,
      "step": 2797340
    },
    {
      "epoch": 4.577940993565196,
      "grad_norm": 0.2157239466905594,
      "learning_rate": 8.496800933033744e-07,
      "loss": 0.0124,
      "step": 2797360
    },
    {
      "epoch": 4.577973724003849,
      "grad_norm": 0.5036658048629761,
      "learning_rate": 8.496142010898572e-07,
      "loss": 0.0091,
      "step": 2797380
    },
    {
      "epoch": 4.5780064544425025,
      "grad_norm": 0.10157345980405807,
      "learning_rate": 8.495483088763401e-07,
      "loss": 0.0093,
      "step": 2797400
    },
    {
      "epoch": 4.578039184881156,
      "grad_norm": 0.3786163628101349,
      "learning_rate": 8.494824166628231e-07,
      "loss": 0.0085,
      "step": 2797420
    },
    {
      "epoch": 4.578071915319809,
      "grad_norm": 0.16103863716125488,
      "learning_rate": 8.49416524449306e-07,
      "loss": 0.0097,
      "step": 2797440
    },
    {
      "epoch": 4.578104645758462,
      "grad_norm": 0.3020216226577759,
      "learning_rate": 8.493506322357888e-07,
      "loss": 0.0096,
      "step": 2797460
    },
    {
      "epoch": 4.578137376197116,
      "grad_norm": 0.4723089635372162,
      "learning_rate": 8.492847400222715e-07,
      "loss": 0.0106,
      "step": 2797480
    },
    {
      "epoch": 4.578170106635769,
      "grad_norm": 0.24340759217739105,
      "learning_rate": 8.492188478087546e-07,
      "loss": 0.0101,
      "step": 2797500
    },
    {
      "epoch": 4.578202837074422,
      "grad_norm": 0.4372606873512268,
      "learning_rate": 8.491529555952375e-07,
      "loss": 0.0066,
      "step": 2797520
    },
    {
      "epoch": 4.578235567513076,
      "grad_norm": 0.21045291423797607,
      "learning_rate": 8.490870633817203e-07,
      "loss": 0.0081,
      "step": 2797540
    },
    {
      "epoch": 4.5782682979517295,
      "grad_norm": 0.325880765914917,
      "learning_rate": 8.490211711682031e-07,
      "loss": 0.0103,
      "step": 2797560
    },
    {
      "epoch": 4.578301028390382,
      "grad_norm": 0.20442193746566772,
      "learning_rate": 8.48955278954686e-07,
      "loss": 0.0067,
      "step": 2797580
    },
    {
      "epoch": 4.578333758829036,
      "grad_norm": 0.5074124932289124,
      "learning_rate": 8.488893867411689e-07,
      "loss": 0.0066,
      "step": 2797600
    },
    {
      "epoch": 4.578366489267689,
      "grad_norm": 0.341037780046463,
      "learning_rate": 8.488234945276518e-07,
      "loss": 0.0092,
      "step": 2797620
    },
    {
      "epoch": 4.578399219706342,
      "grad_norm": 0.272483766078949,
      "learning_rate": 8.487576023141346e-07,
      "loss": 0.0095,
      "step": 2797640
    },
    {
      "epoch": 4.578431950144996,
      "grad_norm": 0.1213662177324295,
      "learning_rate": 8.486917101006175e-07,
      "loss": 0.0092,
      "step": 2797660
    },
    {
      "epoch": 4.578464680583649,
      "grad_norm": 0.23443593084812164,
      "learning_rate": 8.486258178871004e-07,
      "loss": 0.0081,
      "step": 2797680
    },
    {
      "epoch": 4.578497411022303,
      "grad_norm": 0.1673559993505478,
      "learning_rate": 8.485599256735833e-07,
      "loss": 0.0055,
      "step": 2797700
    },
    {
      "epoch": 4.578530141460956,
      "grad_norm": 0.24387137591838837,
      "learning_rate": 8.484940334600661e-07,
      "loss": 0.0075,
      "step": 2797720
    },
    {
      "epoch": 4.578562871899609,
      "grad_norm": 0.04686471074819565,
      "learning_rate": 8.48428141246549e-07,
      "loss": 0.0111,
      "step": 2797740
    },
    {
      "epoch": 4.578595602338263,
      "grad_norm": 0.36541369557380676,
      "learning_rate": 8.483622490330318e-07,
      "loss": 0.0071,
      "step": 2797760
    },
    {
      "epoch": 4.5786283327769155,
      "grad_norm": 0.10502370446920395,
      "learning_rate": 8.482963568195148e-07,
      "loss": 0.0109,
      "step": 2797780
    },
    {
      "epoch": 4.578661063215569,
      "grad_norm": 0.3146038353443146,
      "learning_rate": 8.482304646059976e-07,
      "loss": 0.0087,
      "step": 2797800
    },
    {
      "epoch": 4.578693793654223,
      "grad_norm": 0.563322126865387,
      "learning_rate": 8.481645723924805e-07,
      "loss": 0.0084,
      "step": 2797820
    },
    {
      "epoch": 4.578726524092875,
      "grad_norm": 0.16697701811790466,
      "learning_rate": 8.480986801789633e-07,
      "loss": 0.0066,
      "step": 2797840
    },
    {
      "epoch": 4.578759254531529,
      "grad_norm": 0.2793494462966919,
      "learning_rate": 8.480327879654462e-07,
      "loss": 0.0091,
      "step": 2797860
    },
    {
      "epoch": 4.578791984970183,
      "grad_norm": 0.48948344588279724,
      "learning_rate": 8.479668957519291e-07,
      "loss": 0.0105,
      "step": 2797880
    },
    {
      "epoch": 4.578824715408836,
      "grad_norm": 0.14522792398929596,
      "learning_rate": 8.479010035384119e-07,
      "loss": 0.0084,
      "step": 2797900
    },
    {
      "epoch": 4.578857445847489,
      "grad_norm": 0.16793431341648102,
      "learning_rate": 8.478351113248948e-07,
      "loss": 0.0079,
      "step": 2797920
    },
    {
      "epoch": 4.5788901762861425,
      "grad_norm": 0.12465176731348038,
      "learning_rate": 8.477692191113777e-07,
      "loss": 0.0105,
      "step": 2797940
    },
    {
      "epoch": 4.578922906724796,
      "grad_norm": 0.22275495529174805,
      "learning_rate": 8.477033268978606e-07,
      "loss": 0.0076,
      "step": 2797960
    },
    {
      "epoch": 4.57895563716345,
      "grad_norm": 0.34846031665802,
      "learning_rate": 8.476374346843434e-07,
      "loss": 0.0086,
      "step": 2797980
    },
    {
      "epoch": 4.578988367602102,
      "grad_norm": 0.3071342408657074,
      "learning_rate": 8.475715424708263e-07,
      "loss": 0.0103,
      "step": 2798000
    },
    {
      "epoch": 4.579021098040756,
      "grad_norm": 0.2523309588432312,
      "learning_rate": 8.475056502573091e-07,
      "loss": 0.0086,
      "step": 2798020
    },
    {
      "epoch": 4.57905382847941,
      "grad_norm": 0.06822613626718521,
      "learning_rate": 8.474397580437921e-07,
      "loss": 0.0124,
      "step": 2798040
    },
    {
      "epoch": 4.579086558918062,
      "grad_norm": 0.213129922747612,
      "learning_rate": 8.473738658302749e-07,
      "loss": 0.009,
      "step": 2798060
    },
    {
      "epoch": 4.579119289356716,
      "grad_norm": 0.09981849789619446,
      "learning_rate": 8.473079736167578e-07,
      "loss": 0.0103,
      "step": 2798080
    },
    {
      "epoch": 4.5791520197953695,
      "grad_norm": 0.33110007643699646,
      "learning_rate": 8.472420814032406e-07,
      "loss": 0.0089,
      "step": 2798100
    },
    {
      "epoch": 4.579184750234022,
      "grad_norm": 0.6283954381942749,
      "learning_rate": 8.471761891897236e-07,
      "loss": 0.0134,
      "step": 2798120
    },
    {
      "epoch": 4.579217480672676,
      "grad_norm": 0.1450708508491516,
      "learning_rate": 8.471102969762064e-07,
      "loss": 0.0095,
      "step": 2798140
    },
    {
      "epoch": 4.579250211111329,
      "grad_norm": 0.5434058904647827,
      "learning_rate": 8.470444047626893e-07,
      "loss": 0.0135,
      "step": 2798160
    },
    {
      "epoch": 4.579282941549983,
      "grad_norm": 0.5574594736099243,
      "learning_rate": 8.469785125491721e-07,
      "loss": 0.0115,
      "step": 2798180
    },
    {
      "epoch": 4.579315671988636,
      "grad_norm": 0.19709676504135132,
      "learning_rate": 8.469126203356549e-07,
      "loss": 0.0092,
      "step": 2798200
    },
    {
      "epoch": 4.579348402427289,
      "grad_norm": 0.11381644755601883,
      "learning_rate": 8.468467281221379e-07,
      "loss": 0.0094,
      "step": 2798220
    },
    {
      "epoch": 4.579381132865943,
      "grad_norm": 0.18351560831069946,
      "learning_rate": 8.467808359086207e-07,
      "loss": 0.011,
      "step": 2798240
    },
    {
      "epoch": 4.579413863304596,
      "grad_norm": 0.4405650198459625,
      "learning_rate": 8.467149436951036e-07,
      "loss": 0.0095,
      "step": 2798260
    },
    {
      "epoch": 4.579446593743249,
      "grad_norm": 0.30825427174568176,
      "learning_rate": 8.466490514815864e-07,
      "loss": 0.012,
      "step": 2798280
    },
    {
      "epoch": 4.579479324181903,
      "grad_norm": 0.08135644346475601,
      "learning_rate": 8.465831592680694e-07,
      "loss": 0.0104,
      "step": 2798300
    },
    {
      "epoch": 4.579512054620556,
      "grad_norm": 0.31360793113708496,
      "learning_rate": 8.465172670545522e-07,
      "loss": 0.007,
      "step": 2798320
    },
    {
      "epoch": 4.579544785059209,
      "grad_norm": 0.148416668176651,
      "learning_rate": 8.464513748410351e-07,
      "loss": 0.0079,
      "step": 2798340
    },
    {
      "epoch": 4.579577515497863,
      "grad_norm": 0.10855833441019058,
      "learning_rate": 8.463854826275179e-07,
      "loss": 0.0078,
      "step": 2798360
    },
    {
      "epoch": 4.579610245936516,
      "grad_norm": 0.2659025490283966,
      "learning_rate": 8.463195904140009e-07,
      "loss": 0.011,
      "step": 2798380
    },
    {
      "epoch": 4.579642976375169,
      "grad_norm": 0.4550458788871765,
      "learning_rate": 8.462536982004837e-07,
      "loss": 0.0135,
      "step": 2798400
    },
    {
      "epoch": 4.5796757068138225,
      "grad_norm": 0.23736849427223206,
      "learning_rate": 8.461878059869666e-07,
      "loss": 0.0097,
      "step": 2798420
    },
    {
      "epoch": 4.579708437252476,
      "grad_norm": 0.16190363466739655,
      "learning_rate": 8.461219137734494e-07,
      "loss": 0.0059,
      "step": 2798440
    },
    {
      "epoch": 4.57974116769113,
      "grad_norm": 0.30242660641670227,
      "learning_rate": 8.460560215599323e-07,
      "loss": 0.0098,
      "step": 2798460
    },
    {
      "epoch": 4.579773898129782,
      "grad_norm": 0.1383299082517624,
      "learning_rate": 8.459901293464152e-07,
      "loss": 0.0067,
      "step": 2798480
    },
    {
      "epoch": 4.579806628568436,
      "grad_norm": 0.07200786471366882,
      "learning_rate": 8.45924237132898e-07,
      "loss": 0.0069,
      "step": 2798500
    },
    {
      "epoch": 4.57983935900709,
      "grad_norm": 0.12443810701370239,
      "learning_rate": 8.458583449193809e-07,
      "loss": 0.0075,
      "step": 2798520
    },
    {
      "epoch": 4.579872089445743,
      "grad_norm": 0.10685667395591736,
      "learning_rate": 8.457924527058637e-07,
      "loss": 0.016,
      "step": 2798540
    },
    {
      "epoch": 4.579904819884396,
      "grad_norm": 0.09716368466615677,
      "learning_rate": 8.457265604923468e-07,
      "loss": 0.0059,
      "step": 2798560
    },
    {
      "epoch": 4.5799375503230495,
      "grad_norm": 0.3703635036945343,
      "learning_rate": 8.456606682788295e-07,
      "loss": 0.0087,
      "step": 2798580
    },
    {
      "epoch": 4.579970280761703,
      "grad_norm": 0.3894268870353699,
      "learning_rate": 8.455947760653124e-07,
      "loss": 0.0068,
      "step": 2798600
    },
    {
      "epoch": 4.580003011200356,
      "grad_norm": 0.2402559518814087,
      "learning_rate": 8.455288838517952e-07,
      "loss": 0.0086,
      "step": 2798620
    },
    {
      "epoch": 4.580035741639009,
      "grad_norm": 0.1736317276954651,
      "learning_rate": 8.454629916382781e-07,
      "loss": 0.0089,
      "step": 2798640
    },
    {
      "epoch": 4.580068472077663,
      "grad_norm": 0.1376233994960785,
      "learning_rate": 8.453970994247611e-07,
      "loss": 0.0099,
      "step": 2798660
    },
    {
      "epoch": 4.580101202516316,
      "grad_norm": 0.5715422034263611,
      "learning_rate": 8.453312072112439e-07,
      "loss": 0.0125,
      "step": 2798680
    },
    {
      "epoch": 4.580133932954969,
      "grad_norm": 0.13669700920581818,
      "learning_rate": 8.452653149977267e-07,
      "loss": 0.0106,
      "step": 2798700
    },
    {
      "epoch": 4.580166663393623,
      "grad_norm": 0.36948782205581665,
      "learning_rate": 8.451994227842096e-07,
      "loss": 0.0061,
      "step": 2798720
    },
    {
      "epoch": 4.5801993938322765,
      "grad_norm": 0.4197779893875122,
      "learning_rate": 8.451335305706926e-07,
      "loss": 0.0096,
      "step": 2798740
    },
    {
      "epoch": 4.580232124270929,
      "grad_norm": 0.4415247142314911,
      "learning_rate": 8.450676383571755e-07,
      "loss": 0.0116,
      "step": 2798760
    },
    {
      "epoch": 4.580264854709583,
      "grad_norm": 0.2238118052482605,
      "learning_rate": 8.450017461436583e-07,
      "loss": 0.009,
      "step": 2798780
    },
    {
      "epoch": 4.580297585148236,
      "grad_norm": 0.05725156143307686,
      "learning_rate": 8.449358539301411e-07,
      "loss": 0.0067,
      "step": 2798800
    },
    {
      "epoch": 4.58033031558689,
      "grad_norm": 0.27225175499916077,
      "learning_rate": 8.448699617166241e-07,
      "loss": 0.0106,
      "step": 2798820
    },
    {
      "epoch": 4.580363046025543,
      "grad_norm": 0.24940581619739532,
      "learning_rate": 8.44804069503107e-07,
      "loss": 0.0089,
      "step": 2798840
    },
    {
      "epoch": 4.580395776464196,
      "grad_norm": 0.14150123298168182,
      "learning_rate": 8.447381772895898e-07,
      "loss": 0.0098,
      "step": 2798860
    },
    {
      "epoch": 4.58042850690285,
      "grad_norm": 0.3010196089744568,
      "learning_rate": 8.446722850760726e-07,
      "loss": 0.0078,
      "step": 2798880
    },
    {
      "epoch": 4.580461237341503,
      "grad_norm": 0.22708553075790405,
      "learning_rate": 8.446063928625555e-07,
      "loss": 0.0092,
      "step": 2798900
    },
    {
      "epoch": 4.580493967780156,
      "grad_norm": 0.08679009974002838,
      "learning_rate": 8.445405006490384e-07,
      "loss": 0.009,
      "step": 2798920
    },
    {
      "epoch": 4.58052669821881,
      "grad_norm": 0.16313256323337555,
      "learning_rate": 8.444746084355213e-07,
      "loss": 0.0063,
      "step": 2798940
    },
    {
      "epoch": 4.5805594286574625,
      "grad_norm": 0.25258684158325195,
      "learning_rate": 8.444087162220041e-07,
      "loss": 0.0091,
      "step": 2798960
    },
    {
      "epoch": 4.580592159096116,
      "grad_norm": 0.1955389529466629,
      "learning_rate": 8.44342824008487e-07,
      "loss": 0.0061,
      "step": 2798980
    },
    {
      "epoch": 4.58062488953477,
      "grad_norm": 0.1241338923573494,
      "learning_rate": 8.442769317949699e-07,
      "loss": 0.006,
      "step": 2799000
    },
    {
      "epoch": 4.580657619973423,
      "grad_norm": 0.18484215438365936,
      "learning_rate": 8.442110395814528e-07,
      "loss": 0.0091,
      "step": 2799020
    },
    {
      "epoch": 4.580690350412076,
      "grad_norm": 0.18132515251636505,
      "learning_rate": 8.441451473679356e-07,
      "loss": 0.0054,
      "step": 2799040
    },
    {
      "epoch": 4.5807230808507295,
      "grad_norm": 0.38250041007995605,
      "learning_rate": 8.440792551544185e-07,
      "loss": 0.0109,
      "step": 2799060
    },
    {
      "epoch": 4.580755811289383,
      "grad_norm": 0.30072200298309326,
      "learning_rate": 8.440133629409013e-07,
      "loss": 0.0072,
      "step": 2799080
    },
    {
      "epoch": 4.580788541728037,
      "grad_norm": 0.26055312156677246,
      "learning_rate": 8.439474707273843e-07,
      "loss": 0.0073,
      "step": 2799100
    },
    {
      "epoch": 4.580821272166689,
      "grad_norm": 0.3384019434452057,
      "learning_rate": 8.438815785138671e-07,
      "loss": 0.0049,
      "step": 2799120
    },
    {
      "epoch": 4.580854002605343,
      "grad_norm": 0.1814143806695938,
      "learning_rate": 8.4381568630035e-07,
      "loss": 0.0078,
      "step": 2799140
    },
    {
      "epoch": 4.580886733043997,
      "grad_norm": 0.13783107697963715,
      "learning_rate": 8.437497940868328e-07,
      "loss": 0.0102,
      "step": 2799160
    },
    {
      "epoch": 4.580919463482649,
      "grad_norm": 0.3316013216972351,
      "learning_rate": 8.436839018733158e-07,
      "loss": 0.0076,
      "step": 2799180
    },
    {
      "epoch": 4.580952193921303,
      "grad_norm": 0.29376286268234253,
      "learning_rate": 8.436180096597986e-07,
      "loss": 0.0055,
      "step": 2799200
    },
    {
      "epoch": 4.5809849243599565,
      "grad_norm": 0.11630215495824814,
      "learning_rate": 8.435521174462814e-07,
      "loss": 0.0068,
      "step": 2799220
    },
    {
      "epoch": 4.581017654798609,
      "grad_norm": 0.31777632236480713,
      "learning_rate": 8.434862252327643e-07,
      "loss": 0.0098,
      "step": 2799240
    },
    {
      "epoch": 4.581050385237263,
      "grad_norm": 0.17168880999088287,
      "learning_rate": 8.434203330192472e-07,
      "loss": 0.0083,
      "step": 2799260
    },
    {
      "epoch": 4.581083115675916,
      "grad_norm": 1.043515920639038,
      "learning_rate": 8.433544408057301e-07,
      "loss": 0.0143,
      "step": 2799280
    },
    {
      "epoch": 4.58111584611457,
      "grad_norm": 0.3610817492008209,
      "learning_rate": 8.432885485922129e-07,
      "loss": 0.0091,
      "step": 2799300
    },
    {
      "epoch": 4.581148576553223,
      "grad_norm": 0.24587306380271912,
      "learning_rate": 8.432226563786958e-07,
      "loss": 0.0098,
      "step": 2799320
    },
    {
      "epoch": 4.581181306991876,
      "grad_norm": 0.21901048719882965,
      "learning_rate": 8.431567641651786e-07,
      "loss": 0.0051,
      "step": 2799340
    },
    {
      "epoch": 4.58121403743053,
      "grad_norm": 0.5926290154457092,
      "learning_rate": 8.430908719516616e-07,
      "loss": 0.0091,
      "step": 2799360
    },
    {
      "epoch": 4.581246767869183,
      "grad_norm": 0.21084551513195038,
      "learning_rate": 8.430249797381444e-07,
      "loss": 0.009,
      "step": 2799380
    },
    {
      "epoch": 4.581279498307836,
      "grad_norm": 0.361528605222702,
      "learning_rate": 8.429590875246273e-07,
      "loss": 0.0093,
      "step": 2799400
    },
    {
      "epoch": 4.58131222874649,
      "grad_norm": 0.2657642066478729,
      "learning_rate": 8.428931953111101e-07,
      "loss": 0.0093,
      "step": 2799420
    },
    {
      "epoch": 4.581344959185143,
      "grad_norm": 0.1482401043176651,
      "learning_rate": 8.428273030975931e-07,
      "loss": 0.0083,
      "step": 2799440
    },
    {
      "epoch": 4.581377689623796,
      "grad_norm": 0.2195213884115219,
      "learning_rate": 8.427614108840759e-07,
      "loss": 0.0093,
      "step": 2799460
    },
    {
      "epoch": 4.58141042006245,
      "grad_norm": 0.6354950070381165,
      "learning_rate": 8.426955186705588e-07,
      "loss": 0.0081,
      "step": 2799480
    },
    {
      "epoch": 4.581443150501103,
      "grad_norm": 0.07469897717237473,
      "learning_rate": 8.426296264570416e-07,
      "loss": 0.0087,
      "step": 2799500
    },
    {
      "epoch": 4.581475880939756,
      "grad_norm": 0.16714179515838623,
      "learning_rate": 8.425637342435244e-07,
      "loss": 0.0082,
      "step": 2799520
    },
    {
      "epoch": 4.58150861137841,
      "grad_norm": 0.10004879534244537,
      "learning_rate": 8.424978420300074e-07,
      "loss": 0.0074,
      "step": 2799540
    },
    {
      "epoch": 4.581541341817063,
      "grad_norm": 0.14125338196754456,
      "learning_rate": 8.424319498164902e-07,
      "loss": 0.0104,
      "step": 2799560
    },
    {
      "epoch": 4.581574072255716,
      "grad_norm": 0.34105974435806274,
      "learning_rate": 8.423660576029731e-07,
      "loss": 0.0086,
      "step": 2799580
    },
    {
      "epoch": 4.5816068026943695,
      "grad_norm": 0.07233743369579315,
      "learning_rate": 8.423001653894559e-07,
      "loss": 0.0078,
      "step": 2799600
    },
    {
      "epoch": 4.581639533133023,
      "grad_norm": 0.5365777611732483,
      "learning_rate": 8.422342731759389e-07,
      "loss": 0.0105,
      "step": 2799620
    },
    {
      "epoch": 4.581672263571677,
      "grad_norm": 0.164903923869133,
      "learning_rate": 8.421683809624217e-07,
      "loss": 0.0087,
      "step": 2799640
    },
    {
      "epoch": 4.581704994010329,
      "grad_norm": 0.7475622892379761,
      "learning_rate": 8.421024887489046e-07,
      "loss": 0.0118,
      "step": 2799660
    },
    {
      "epoch": 4.581737724448983,
      "grad_norm": 0.14806470274925232,
      "learning_rate": 8.420365965353874e-07,
      "loss": 0.0078,
      "step": 2799680
    },
    {
      "epoch": 4.581770454887637,
      "grad_norm": 0.13594724237918854,
      "learning_rate": 8.419707043218704e-07,
      "loss": 0.0065,
      "step": 2799700
    },
    {
      "epoch": 4.58180318532629,
      "grad_norm": 0.2634870409965515,
      "learning_rate": 8.419048121083532e-07,
      "loss": 0.0075,
      "step": 2799720
    },
    {
      "epoch": 4.581835915764943,
      "grad_norm": 0.2589693069458008,
      "learning_rate": 8.418389198948361e-07,
      "loss": 0.0093,
      "step": 2799740
    },
    {
      "epoch": 4.5818686462035965,
      "grad_norm": 0.4006165862083435,
      "learning_rate": 8.417730276813189e-07,
      "loss": 0.0105,
      "step": 2799760
    },
    {
      "epoch": 4.58190137664225,
      "grad_norm": 0.24920673668384552,
      "learning_rate": 8.417071354678018e-07,
      "loss": 0.0095,
      "step": 2799780
    },
    {
      "epoch": 4.581934107080903,
      "grad_norm": 0.2559358477592468,
      "learning_rate": 8.416412432542847e-07,
      "loss": 0.0117,
      "step": 2799800
    },
    {
      "epoch": 4.581966837519556,
      "grad_norm": 0.12606742978096008,
      "learning_rate": 8.415753510407676e-07,
      "loss": 0.0078,
      "step": 2799820
    },
    {
      "epoch": 4.58199956795821,
      "grad_norm": 0.27504128217697144,
      "learning_rate": 8.415094588272504e-07,
      "loss": 0.0088,
      "step": 2799840
    },
    {
      "epoch": 4.582032298396863,
      "grad_norm": 0.23473824560642242,
      "learning_rate": 8.414435666137332e-07,
      "loss": 0.0124,
      "step": 2799860
    },
    {
      "epoch": 4.582065028835516,
      "grad_norm": 0.2164478749036789,
      "learning_rate": 8.413776744002163e-07,
      "loss": 0.0068,
      "step": 2799880
    },
    {
      "epoch": 4.58209775927417,
      "grad_norm": 0.1253233104944229,
      "learning_rate": 8.41311782186699e-07,
      "loss": 0.0111,
      "step": 2799900
    },
    {
      "epoch": 4.582130489712823,
      "grad_norm": 0.3207494616508484,
      "learning_rate": 8.412458899731819e-07,
      "loss": 0.0089,
      "step": 2799920
    },
    {
      "epoch": 4.582163220151476,
      "grad_norm": 0.17573504149913788,
      "learning_rate": 8.411799977596647e-07,
      "loss": 0.0064,
      "step": 2799940
    },
    {
      "epoch": 4.58219595059013,
      "grad_norm": 0.11503812670707703,
      "learning_rate": 8.411141055461476e-07,
      "loss": 0.0097,
      "step": 2799960
    },
    {
      "epoch": 4.582228681028783,
      "grad_norm": 0.2646492123603821,
      "learning_rate": 8.410482133326306e-07,
      "loss": 0.0078,
      "step": 2799980
    },
    {
      "epoch": 4.582261411467437,
      "grad_norm": 0.07836981862783432,
      "learning_rate": 8.409823211191134e-07,
      "loss": 0.0118,
      "step": 2800000
    },
    {
      "epoch": 4.582261411467437,
      "eval_loss": 0.0057740346528589725,
      "eval_runtime": 6484.9161,
      "eval_samples_per_second": 158.5,
      "eval_steps_per_second": 15.85,
      "eval_sts-dev_pearson_cosine": 0.9869647703446673,
      "eval_sts-dev_spearman_cosine": 0.8968677061363108,
      "step": 2800000
    },
    {
      "epoch": 4.58229414190609,
      "grad_norm": 0.08462262153625488,
      "learning_rate": 8.409164289055962e-07,
      "loss": 0.0101,
      "step": 2800020
    },
    {
      "epoch": 4.582326872344743,
      "grad_norm": 0.27065885066986084,
      "learning_rate": 8.408505366920791e-07,
      "loss": 0.0078,
      "step": 2800040
    },
    {
      "epoch": 4.582359602783397,
      "grad_norm": 0.27860966324806213,
      "learning_rate": 8.407846444785621e-07,
      "loss": 0.01,
      "step": 2800060
    },
    {
      "epoch": 4.5823923332220495,
      "grad_norm": 0.19687838852405548,
      "learning_rate": 8.40718752265045e-07,
      "loss": 0.0071,
      "step": 2800080
    },
    {
      "epoch": 4.582425063660703,
      "grad_norm": 0.11334919184446335,
      "learning_rate": 8.406528600515278e-07,
      "loss": 0.0094,
      "step": 2800100
    },
    {
      "epoch": 4.582457794099357,
      "grad_norm": 0.13276171684265137,
      "learning_rate": 8.405869678380106e-07,
      "loss": 0.0078,
      "step": 2800120
    },
    {
      "epoch": 4.582490524538009,
      "grad_norm": 0.350598007440567,
      "learning_rate": 8.405210756244936e-07,
      "loss": 0.0061,
      "step": 2800140
    },
    {
      "epoch": 4.582523254976663,
      "grad_norm": 0.19582439959049225,
      "learning_rate": 8.404551834109765e-07,
      "loss": 0.0037,
      "step": 2800160
    },
    {
      "epoch": 4.582555985415317,
      "grad_norm": 0.5398004651069641,
      "learning_rate": 8.403892911974593e-07,
      "loss": 0.0101,
      "step": 2800180
    },
    {
      "epoch": 4.58258871585397,
      "grad_norm": 0.29929694533348083,
      "learning_rate": 8.403233989839422e-07,
      "loss": 0.0077,
      "step": 2800200
    },
    {
      "epoch": 4.582621446292623,
      "grad_norm": 0.018582602962851524,
      "learning_rate": 8.40257506770425e-07,
      "loss": 0.0082,
      "step": 2800220
    },
    {
      "epoch": 4.5826541767312765,
      "grad_norm": 0.3631620407104492,
      "learning_rate": 8.40191614556908e-07,
      "loss": 0.0085,
      "step": 2800240
    },
    {
      "epoch": 4.58268690716993,
      "grad_norm": 1.159348487854004,
      "learning_rate": 8.401257223433908e-07,
      "loss": 0.0085,
      "step": 2800260
    },
    {
      "epoch": 4.582719637608584,
      "grad_norm": 0.35218530893325806,
      "learning_rate": 8.400598301298736e-07,
      "loss": 0.0097,
      "step": 2800280
    },
    {
      "epoch": 4.582752368047236,
      "grad_norm": 0.5510445237159729,
      "learning_rate": 8.399939379163565e-07,
      "loss": 0.0091,
      "step": 2800300
    },
    {
      "epoch": 4.58278509848589,
      "grad_norm": 0.5790178179740906,
      "learning_rate": 8.399280457028394e-07,
      "loss": 0.0156,
      "step": 2800320
    },
    {
      "epoch": 4.582817828924544,
      "grad_norm": 0.3185933530330658,
      "learning_rate": 8.398621534893223e-07,
      "loss": 0.0069,
      "step": 2800340
    },
    {
      "epoch": 4.582850559363196,
      "grad_norm": 0.4970322549343109,
      "learning_rate": 8.397962612758051e-07,
      "loss": 0.0087,
      "step": 2800360
    },
    {
      "epoch": 4.58288328980185,
      "grad_norm": 0.23523162305355072,
      "learning_rate": 8.39730369062288e-07,
      "loss": 0.0063,
      "step": 2800380
    },
    {
      "epoch": 4.5829160202405035,
      "grad_norm": 0.2460121512413025,
      "learning_rate": 8.396644768487708e-07,
      "loss": 0.0107,
      "step": 2800400
    },
    {
      "epoch": 4.582948750679156,
      "grad_norm": 0.4066908061504364,
      "learning_rate": 8.395985846352538e-07,
      "loss": 0.0084,
      "step": 2800420
    },
    {
      "epoch": 4.58298148111781,
      "grad_norm": 0.21858897805213928,
      "learning_rate": 8.395326924217366e-07,
      "loss": 0.0108,
      "step": 2800440
    },
    {
      "epoch": 4.583014211556463,
      "grad_norm": 0.1429239958524704,
      "learning_rate": 8.394668002082195e-07,
      "loss": 0.0122,
      "step": 2800460
    },
    {
      "epoch": 4.583046941995117,
      "grad_norm": 0.06643950939178467,
      "learning_rate": 8.394009079947023e-07,
      "loss": 0.0069,
      "step": 2800480
    },
    {
      "epoch": 4.58307967243377,
      "grad_norm": 0.10859444737434387,
      "learning_rate": 8.393350157811853e-07,
      "loss": 0.0083,
      "step": 2800500
    },
    {
      "epoch": 4.583112402872423,
      "grad_norm": 0.5886083841323853,
      "learning_rate": 8.392691235676681e-07,
      "loss": 0.0086,
      "step": 2800520
    },
    {
      "epoch": 4.583145133311077,
      "grad_norm": 0.14442406594753265,
      "learning_rate": 8.39203231354151e-07,
      "loss": 0.0112,
      "step": 2800540
    },
    {
      "epoch": 4.5831778637497305,
      "grad_norm": 0.2356259524822235,
      "learning_rate": 8.391373391406338e-07,
      "loss": 0.0084,
      "step": 2800560
    },
    {
      "epoch": 4.583210594188383,
      "grad_norm": 0.2979503870010376,
      "learning_rate": 8.390714469271167e-07,
      "loss": 0.0089,
      "step": 2800580
    },
    {
      "epoch": 4.583243324627037,
      "grad_norm": 0.14424295723438263,
      "learning_rate": 8.390055547135996e-07,
      "loss": 0.0054,
      "step": 2800600
    },
    {
      "epoch": 4.58327605506569,
      "grad_norm": 0.0701633170247078,
      "learning_rate": 8.389396625000824e-07,
      "loss": 0.0109,
      "step": 2800620
    },
    {
      "epoch": 4.583308785504343,
      "grad_norm": 0.7229858636856079,
      "learning_rate": 8.388737702865653e-07,
      "loss": 0.0122,
      "step": 2800640
    },
    {
      "epoch": 4.583341515942997,
      "grad_norm": 0.08971599489450455,
      "learning_rate": 8.388078780730481e-07,
      "loss": 0.0107,
      "step": 2800660
    },
    {
      "epoch": 4.58337424638165,
      "grad_norm": 0.3429662585258484,
      "learning_rate": 8.387419858595311e-07,
      "loss": 0.0085,
      "step": 2800680
    },
    {
      "epoch": 4.583406976820303,
      "grad_norm": 0.25998926162719727,
      "learning_rate": 8.386760936460139e-07,
      "loss": 0.0087,
      "step": 2800700
    },
    {
      "epoch": 4.5834397072589566,
      "grad_norm": 0.34594646096229553,
      "learning_rate": 8.386102014324968e-07,
      "loss": 0.0122,
      "step": 2800720
    },
    {
      "epoch": 4.58347243769761,
      "grad_norm": 0.17453673481941223,
      "learning_rate": 8.385443092189796e-07,
      "loss": 0.0078,
      "step": 2800740
    },
    {
      "epoch": 4.583505168136264,
      "grad_norm": 0.20710860192775726,
      "learning_rate": 8.384784170054626e-07,
      "loss": 0.0119,
      "step": 2800760
    },
    {
      "epoch": 4.5835378985749164,
      "grad_norm": 0.8130276203155518,
      "learning_rate": 8.384125247919454e-07,
      "loss": 0.0096,
      "step": 2800780
    },
    {
      "epoch": 4.58357062901357,
      "grad_norm": 0.05893740430474281,
      "learning_rate": 8.383466325784283e-07,
      "loss": 0.0092,
      "step": 2800800
    },
    {
      "epoch": 4.583603359452224,
      "grad_norm": 0.360022634267807,
      "learning_rate": 8.382807403649111e-07,
      "loss": 0.0098,
      "step": 2800820
    },
    {
      "epoch": 4.583636089890876,
      "grad_norm": 0.18113510310649872,
      "learning_rate": 8.38214848151394e-07,
      "loss": 0.0101,
      "step": 2800840
    },
    {
      "epoch": 4.58366882032953,
      "grad_norm": 0.4324992299079895,
      "learning_rate": 8.381489559378769e-07,
      "loss": 0.0067,
      "step": 2800860
    },
    {
      "epoch": 4.5837015507681835,
      "grad_norm": 0.8396599888801575,
      "learning_rate": 8.380830637243598e-07,
      "loss": 0.0063,
      "step": 2800880
    },
    {
      "epoch": 4.583734281206837,
      "grad_norm": 0.1446884721517563,
      "learning_rate": 8.380171715108426e-07,
      "loss": 0.0111,
      "step": 2800900
    },
    {
      "epoch": 4.58376701164549,
      "grad_norm": 0.2821241319179535,
      "learning_rate": 8.379512792973254e-07,
      "loss": 0.0048,
      "step": 2800920
    },
    {
      "epoch": 4.583799742084143,
      "grad_norm": 0.3182818293571472,
      "learning_rate": 8.378853870838084e-07,
      "loss": 0.0113,
      "step": 2800940
    },
    {
      "epoch": 4.583832472522797,
      "grad_norm": 0.15941005945205688,
      "learning_rate": 8.378194948702912e-07,
      "loss": 0.0091,
      "step": 2800960
    },
    {
      "epoch": 4.58386520296145,
      "grad_norm": 0.34536540508270264,
      "learning_rate": 8.377536026567741e-07,
      "loss": 0.009,
      "step": 2800980
    },
    {
      "epoch": 4.583897933400103,
      "grad_norm": 0.19740134477615356,
      "learning_rate": 8.376877104432569e-07,
      "loss": 0.0087,
      "step": 2801000
    },
    {
      "epoch": 4.583930663838757,
      "grad_norm": 0.07949091494083405,
      "learning_rate": 8.376218182297399e-07,
      "loss": 0.011,
      "step": 2801020
    },
    {
      "epoch": 4.58396339427741,
      "grad_norm": 0.27902472019195557,
      "learning_rate": 8.375559260162227e-07,
      "loss": 0.011,
      "step": 2801040
    },
    {
      "epoch": 4.583996124716063,
      "grad_norm": 0.23238369822502136,
      "learning_rate": 8.374900338027056e-07,
      "loss": 0.0073,
      "step": 2801060
    },
    {
      "epoch": 4.584028855154717,
      "grad_norm": 0.38317975401878357,
      "learning_rate": 8.374241415891884e-07,
      "loss": 0.0078,
      "step": 2801080
    },
    {
      "epoch": 4.58406158559337,
      "grad_norm": 0.07172255963087082,
      "learning_rate": 8.373582493756713e-07,
      "loss": 0.0071,
      "step": 2801100
    },
    {
      "epoch": 4.584094316032023,
      "grad_norm": 0.2709565758705139,
      "learning_rate": 8.372923571621542e-07,
      "loss": 0.0081,
      "step": 2801120
    },
    {
      "epoch": 4.584127046470677,
      "grad_norm": 0.19863829016685486,
      "learning_rate": 8.372264649486371e-07,
      "loss": 0.0066,
      "step": 2801140
    },
    {
      "epoch": 4.58415977690933,
      "grad_norm": 0.6124093532562256,
      "learning_rate": 8.371605727351199e-07,
      "loss": 0.0095,
      "step": 2801160
    },
    {
      "epoch": 4.584192507347984,
      "grad_norm": 0.6676921844482422,
      "learning_rate": 8.370946805216028e-07,
      "loss": 0.0113,
      "step": 2801180
    },
    {
      "epoch": 4.584225237786637,
      "grad_norm": 0.14418122172355652,
      "learning_rate": 8.370287883080858e-07,
      "loss": 0.0069,
      "step": 2801200
    },
    {
      "epoch": 4.58425796822529,
      "grad_norm": 0.3126775622367859,
      "learning_rate": 8.369628960945686e-07,
      "loss": 0.0076,
      "step": 2801220
    },
    {
      "epoch": 4.584290698663944,
      "grad_norm": 0.28991612792015076,
      "learning_rate": 8.368970038810514e-07,
      "loss": 0.0084,
      "step": 2801240
    },
    {
      "epoch": 4.5843234291025965,
      "grad_norm": 0.25245401263237,
      "learning_rate": 8.368311116675342e-07,
      "loss": 0.0109,
      "step": 2801260
    },
    {
      "epoch": 4.58435615954125,
      "grad_norm": 0.336640864610672,
      "learning_rate": 8.367652194540171e-07,
      "loss": 0.0097,
      "step": 2801280
    },
    {
      "epoch": 4.584388889979904,
      "grad_norm": 0.2593940198421478,
      "learning_rate": 8.366993272405001e-07,
      "loss": 0.0073,
      "step": 2801300
    },
    {
      "epoch": 4.584421620418556,
      "grad_norm": 0.24431981146335602,
      "learning_rate": 8.36633435026983e-07,
      "loss": 0.0071,
      "step": 2801320
    },
    {
      "epoch": 4.58445435085721,
      "grad_norm": 0.3904688060283661,
      "learning_rate": 8.365675428134657e-07,
      "loss": 0.0072,
      "step": 2801340
    },
    {
      "epoch": 4.584487081295864,
      "grad_norm": 0.23152601718902588,
      "learning_rate": 8.365016505999486e-07,
      "loss": 0.012,
      "step": 2801360
    },
    {
      "epoch": 4.584519811734517,
      "grad_norm": 0.1851998269557953,
      "learning_rate": 8.364357583864316e-07,
      "loss": 0.0114,
      "step": 2801380
    },
    {
      "epoch": 4.58455254217317,
      "grad_norm": 0.32271522283554077,
      "learning_rate": 8.363698661729145e-07,
      "loss": 0.0072,
      "step": 2801400
    },
    {
      "epoch": 4.5845852726118235,
      "grad_norm": 0.31894370913505554,
      "learning_rate": 8.363039739593973e-07,
      "loss": 0.0071,
      "step": 2801420
    },
    {
      "epoch": 4.584618003050477,
      "grad_norm": 0.4555204510688782,
      "learning_rate": 8.362380817458801e-07,
      "loss": 0.0125,
      "step": 2801440
    },
    {
      "epoch": 4.584650733489131,
      "grad_norm": 0.08912689983844757,
      "learning_rate": 8.361721895323631e-07,
      "loss": 0.0094,
      "step": 2801460
    },
    {
      "epoch": 4.584683463927783,
      "grad_norm": 0.053110357373952866,
      "learning_rate": 8.36106297318846e-07,
      "loss": 0.005,
      "step": 2801480
    },
    {
      "epoch": 4.584716194366437,
      "grad_norm": 0.06802941858768463,
      "learning_rate": 8.360404051053288e-07,
      "loss": 0.009,
      "step": 2801500
    },
    {
      "epoch": 4.5847489248050906,
      "grad_norm": 0.18197931349277496,
      "learning_rate": 8.359745128918117e-07,
      "loss": 0.0086,
      "step": 2801520
    },
    {
      "epoch": 4.584781655243743,
      "grad_norm": 0.20469126105308533,
      "learning_rate": 8.359086206782945e-07,
      "loss": 0.0092,
      "step": 2801540
    },
    {
      "epoch": 4.584814385682397,
      "grad_norm": 0.34672224521636963,
      "learning_rate": 8.358427284647775e-07,
      "loss": 0.0154,
      "step": 2801560
    },
    {
      "epoch": 4.5848471161210504,
      "grad_norm": 0.280656099319458,
      "learning_rate": 8.357768362512603e-07,
      "loss": 0.0088,
      "step": 2801580
    },
    {
      "epoch": 4.584879846559703,
      "grad_norm": 0.11664705723524094,
      "learning_rate": 8.357109440377431e-07,
      "loss": 0.0074,
      "step": 2801600
    },
    {
      "epoch": 4.584912576998357,
      "grad_norm": 0.4742625057697296,
      "learning_rate": 8.35645051824226e-07,
      "loss": 0.0073,
      "step": 2801620
    },
    {
      "epoch": 4.58494530743701,
      "grad_norm": 0.4075719118118286,
      "learning_rate": 8.355791596107089e-07,
      "loss": 0.0069,
      "step": 2801640
    },
    {
      "epoch": 4.584978037875664,
      "grad_norm": 0.10896168649196625,
      "learning_rate": 8.355132673971918e-07,
      "loss": 0.0119,
      "step": 2801660
    },
    {
      "epoch": 4.585010768314317,
      "grad_norm": 0.06155209243297577,
      "learning_rate": 8.354473751836746e-07,
      "loss": 0.01,
      "step": 2801680
    },
    {
      "epoch": 4.58504349875297,
      "grad_norm": 0.12292972952127457,
      "learning_rate": 8.353814829701575e-07,
      "loss": 0.0064,
      "step": 2801700
    },
    {
      "epoch": 4.585076229191624,
      "grad_norm": 0.2882732152938843,
      "learning_rate": 8.353155907566403e-07,
      "loss": 0.0162,
      "step": 2801720
    },
    {
      "epoch": 4.585108959630277,
      "grad_norm": 0.5128956437110901,
      "learning_rate": 8.352496985431233e-07,
      "loss": 0.0073,
      "step": 2801740
    },
    {
      "epoch": 4.58514169006893,
      "grad_norm": 0.301072895526886,
      "learning_rate": 8.351838063296061e-07,
      "loss": 0.0079,
      "step": 2801760
    },
    {
      "epoch": 4.585174420507584,
      "grad_norm": 0.1343696415424347,
      "learning_rate": 8.35117914116089e-07,
      "loss": 0.0087,
      "step": 2801780
    },
    {
      "epoch": 4.585207150946237,
      "grad_norm": 0.37088510394096375,
      "learning_rate": 8.350520219025718e-07,
      "loss": 0.0145,
      "step": 2801800
    },
    {
      "epoch": 4.58523988138489,
      "grad_norm": 0.13134683668613434,
      "learning_rate": 8.349861296890548e-07,
      "loss": 0.009,
      "step": 2801820
    },
    {
      "epoch": 4.585272611823544,
      "grad_norm": 0.34209322929382324,
      "learning_rate": 8.349202374755376e-07,
      "loss": 0.0071,
      "step": 2801840
    },
    {
      "epoch": 4.585305342262197,
      "grad_norm": 0.5113734006881714,
      "learning_rate": 8.348543452620205e-07,
      "loss": 0.0065,
      "step": 2801860
    },
    {
      "epoch": 4.58533807270085,
      "grad_norm": 0.10696320235729218,
      "learning_rate": 8.347884530485033e-07,
      "loss": 0.0071,
      "step": 2801880
    },
    {
      "epoch": 4.5853708031395035,
      "grad_norm": 0.1349644958972931,
      "learning_rate": 8.347225608349863e-07,
      "loss": 0.0075,
      "step": 2801900
    },
    {
      "epoch": 4.585403533578157,
      "grad_norm": 0.1982681155204773,
      "learning_rate": 8.346566686214691e-07,
      "loss": 0.0087,
      "step": 2801920
    },
    {
      "epoch": 4.585436264016811,
      "grad_norm": 0.10046347230672836,
      "learning_rate": 8.345907764079519e-07,
      "loss": 0.0085,
      "step": 2801940
    },
    {
      "epoch": 4.585468994455463,
      "grad_norm": 0.18633811175823212,
      "learning_rate": 8.345248841944348e-07,
      "loss": 0.0084,
      "step": 2801960
    },
    {
      "epoch": 4.585501724894117,
      "grad_norm": 0.7899816036224365,
      "learning_rate": 8.344589919809176e-07,
      "loss": 0.0119,
      "step": 2801980
    },
    {
      "epoch": 4.585534455332771,
      "grad_norm": 0.5614283680915833,
      "learning_rate": 8.343930997674006e-07,
      "loss": 0.0128,
      "step": 2802000
    },
    {
      "epoch": 4.585567185771424,
      "grad_norm": 0.3637479543685913,
      "learning_rate": 8.343272075538834e-07,
      "loss": 0.0103,
      "step": 2802020
    },
    {
      "epoch": 4.585599916210077,
      "grad_norm": 0.38725781440734863,
      "learning_rate": 8.342613153403663e-07,
      "loss": 0.0085,
      "step": 2802040
    },
    {
      "epoch": 4.5856326466487305,
      "grad_norm": 0.2688564658164978,
      "learning_rate": 8.341954231268491e-07,
      "loss": 0.0086,
      "step": 2802060
    },
    {
      "epoch": 4.585665377087384,
      "grad_norm": 0.30154135823249817,
      "learning_rate": 8.341295309133321e-07,
      "loss": 0.0105,
      "step": 2802080
    },
    {
      "epoch": 4.585698107526037,
      "grad_norm": 0.19887098670005798,
      "learning_rate": 8.340636386998149e-07,
      "loss": 0.0102,
      "step": 2802100
    },
    {
      "epoch": 4.58573083796469,
      "grad_norm": 0.2679700255393982,
      "learning_rate": 8.339977464862978e-07,
      "loss": 0.0081,
      "step": 2802120
    },
    {
      "epoch": 4.585763568403344,
      "grad_norm": 0.09771914035081863,
      "learning_rate": 8.339318542727806e-07,
      "loss": 0.0116,
      "step": 2802140
    },
    {
      "epoch": 4.585796298841997,
      "grad_norm": 0.20361250638961792,
      "learning_rate": 8.338659620592635e-07,
      "loss": 0.0098,
      "step": 2802160
    },
    {
      "epoch": 4.58582902928065,
      "grad_norm": 0.11440009623765945,
      "learning_rate": 8.338000698457464e-07,
      "loss": 0.0081,
      "step": 2802180
    },
    {
      "epoch": 4.585861759719304,
      "grad_norm": 0.3505445718765259,
      "learning_rate": 8.337341776322293e-07,
      "loss": 0.0095,
      "step": 2802200
    },
    {
      "epoch": 4.5858944901579575,
      "grad_norm": 0.1933465600013733,
      "learning_rate": 8.336682854187121e-07,
      "loss": 0.0056,
      "step": 2802220
    },
    {
      "epoch": 4.58592722059661,
      "grad_norm": 0.31136447191238403,
      "learning_rate": 8.33602393205195e-07,
      "loss": 0.0086,
      "step": 2802240
    },
    {
      "epoch": 4.585959951035264,
      "grad_norm": 0.0701831579208374,
      "learning_rate": 8.335365009916779e-07,
      "loss": 0.0073,
      "step": 2802260
    },
    {
      "epoch": 4.585992681473917,
      "grad_norm": 0.0918937548995018,
      "learning_rate": 8.334706087781607e-07,
      "loss": 0.0073,
      "step": 2802280
    },
    {
      "epoch": 4.58602541191257,
      "grad_norm": 0.1084175780415535,
      "learning_rate": 8.334047165646436e-07,
      "loss": 0.0078,
      "step": 2802300
    },
    {
      "epoch": 4.586058142351224,
      "grad_norm": 0.1677696853876114,
      "learning_rate": 8.333388243511264e-07,
      "loss": 0.0069,
      "step": 2802320
    },
    {
      "epoch": 4.586090872789877,
      "grad_norm": 0.08944061398506165,
      "learning_rate": 8.332729321376094e-07,
      "loss": 0.0063,
      "step": 2802340
    },
    {
      "epoch": 4.586123603228531,
      "grad_norm": 0.21698744595050812,
      "learning_rate": 8.332070399240922e-07,
      "loss": 0.0089,
      "step": 2802360
    },
    {
      "epoch": 4.586156333667184,
      "grad_norm": 0.16972316801548004,
      "learning_rate": 8.331411477105751e-07,
      "loss": 0.0081,
      "step": 2802380
    },
    {
      "epoch": 4.586189064105837,
      "grad_norm": 0.15311875939369202,
      "learning_rate": 8.330752554970579e-07,
      "loss": 0.0108,
      "step": 2802400
    },
    {
      "epoch": 4.586221794544491,
      "grad_norm": 0.17320622503757477,
      "learning_rate": 8.330093632835408e-07,
      "loss": 0.0082,
      "step": 2802420
    },
    {
      "epoch": 4.5862545249831435,
      "grad_norm": 0.21212820708751678,
      "learning_rate": 8.329434710700237e-07,
      "loss": 0.0062,
      "step": 2802440
    },
    {
      "epoch": 4.586287255421797,
      "grad_norm": 0.44052746891975403,
      "learning_rate": 8.328775788565066e-07,
      "loss": 0.0075,
      "step": 2802460
    },
    {
      "epoch": 4.586319985860451,
      "grad_norm": 0.23780781030654907,
      "learning_rate": 8.328116866429894e-07,
      "loss": 0.0091,
      "step": 2802480
    },
    {
      "epoch": 4.586352716299103,
      "grad_norm": 0.13283923268318176,
      "learning_rate": 8.327457944294723e-07,
      "loss": 0.0063,
      "step": 2802500
    },
    {
      "epoch": 4.586385446737757,
      "grad_norm": 0.23262879252433777,
      "learning_rate": 8.326799022159553e-07,
      "loss": 0.0074,
      "step": 2802520
    },
    {
      "epoch": 4.5864181771764105,
      "grad_norm": 0.153578981757164,
      "learning_rate": 8.326140100024381e-07,
      "loss": 0.0105,
      "step": 2802540
    },
    {
      "epoch": 4.586450907615064,
      "grad_norm": 0.1224304661154747,
      "learning_rate": 8.325481177889209e-07,
      "loss": 0.0084,
      "step": 2802560
    },
    {
      "epoch": 4.586483638053717,
      "grad_norm": 0.07514771819114685,
      "learning_rate": 8.324822255754037e-07,
      "loss": 0.007,
      "step": 2802580
    },
    {
      "epoch": 4.58651636849237,
      "grad_norm": 0.2004520297050476,
      "learning_rate": 8.324163333618866e-07,
      "loss": 0.0091,
      "step": 2802600
    },
    {
      "epoch": 4.586549098931024,
      "grad_norm": 0.44200101494789124,
      "learning_rate": 8.323504411483696e-07,
      "loss": 0.0084,
      "step": 2802620
    },
    {
      "epoch": 4.586581829369678,
      "grad_norm": 0.44545650482177734,
      "learning_rate": 8.322845489348525e-07,
      "loss": 0.0095,
      "step": 2802640
    },
    {
      "epoch": 4.58661455980833,
      "grad_norm": 0.2746375501155853,
      "learning_rate": 8.322186567213352e-07,
      "loss": 0.0081,
      "step": 2802660
    },
    {
      "epoch": 4.586647290246984,
      "grad_norm": 0.04390132054686546,
      "learning_rate": 8.321527645078181e-07,
      "loss": 0.007,
      "step": 2802680
    },
    {
      "epoch": 4.5866800206856375,
      "grad_norm": 0.2613223195075989,
      "learning_rate": 8.320868722943011e-07,
      "loss": 0.0087,
      "step": 2802700
    },
    {
      "epoch": 4.58671275112429,
      "grad_norm": 0.372368186712265,
      "learning_rate": 8.32020980080784e-07,
      "loss": 0.0071,
      "step": 2802720
    },
    {
      "epoch": 4.586745481562944,
      "grad_norm": 0.47223708033561707,
      "learning_rate": 8.319550878672668e-07,
      "loss": 0.0133,
      "step": 2802740
    },
    {
      "epoch": 4.586778212001597,
      "grad_norm": 0.40561971068382263,
      "learning_rate": 8.318891956537496e-07,
      "loss": 0.0067,
      "step": 2802760
    },
    {
      "epoch": 4.58681094244025,
      "grad_norm": 0.09067041426897049,
      "learning_rate": 8.318233034402326e-07,
      "loss": 0.0096,
      "step": 2802780
    },
    {
      "epoch": 4.586843672878904,
      "grad_norm": 0.3743295967578888,
      "learning_rate": 8.317574112267155e-07,
      "loss": 0.0122,
      "step": 2802800
    },
    {
      "epoch": 4.586876403317557,
      "grad_norm": 0.3173096179962158,
      "learning_rate": 8.316915190131983e-07,
      "loss": 0.0097,
      "step": 2802820
    },
    {
      "epoch": 4.586909133756211,
      "grad_norm": 0.08101057261228561,
      "learning_rate": 8.316256267996812e-07,
      "loss": 0.008,
      "step": 2802840
    },
    {
      "epoch": 4.586941864194864,
      "grad_norm": 0.5435785055160522,
      "learning_rate": 8.31559734586164e-07,
      "loss": 0.011,
      "step": 2802860
    },
    {
      "epoch": 4.586974594633517,
      "grad_norm": 0.3821641802787781,
      "learning_rate": 8.31493842372647e-07,
      "loss": 0.0099,
      "step": 2802880
    },
    {
      "epoch": 4.587007325072171,
      "grad_norm": 0.07214146107435226,
      "learning_rate": 8.314279501591298e-07,
      "loss": 0.0069,
      "step": 2802900
    },
    {
      "epoch": 4.587040055510824,
      "grad_norm": 0.494634211063385,
      "learning_rate": 8.313620579456127e-07,
      "loss": 0.0055,
      "step": 2802920
    },
    {
      "epoch": 4.587072785949477,
      "grad_norm": 0.8657129406929016,
      "learning_rate": 8.312961657320955e-07,
      "loss": 0.0131,
      "step": 2802940
    },
    {
      "epoch": 4.587105516388131,
      "grad_norm": 0.19855529069900513,
      "learning_rate": 8.312302735185784e-07,
      "loss": 0.0048,
      "step": 2802960
    },
    {
      "epoch": 4.587138246826784,
      "grad_norm": 0.06456920504570007,
      "learning_rate": 8.311643813050613e-07,
      "loss": 0.0083,
      "step": 2802980
    },
    {
      "epoch": 4.587170977265437,
      "grad_norm": 0.5575113296508789,
      "learning_rate": 8.310984890915441e-07,
      "loss": 0.0102,
      "step": 2803000
    },
    {
      "epoch": 4.587203707704091,
      "grad_norm": 0.45037153363227844,
      "learning_rate": 8.31032596878027e-07,
      "loss": 0.0107,
      "step": 2803020
    },
    {
      "epoch": 4.587236438142744,
      "grad_norm": 0.07102359086275101,
      "learning_rate": 8.309667046645099e-07,
      "loss": 0.0064,
      "step": 2803040
    },
    {
      "epoch": 4.587269168581397,
      "grad_norm": 0.7818114161491394,
      "learning_rate": 8.309008124509928e-07,
      "loss": 0.0137,
      "step": 2803060
    },
    {
      "epoch": 4.5873018990200505,
      "grad_norm": 0.4478709399700165,
      "learning_rate": 8.308349202374756e-07,
      "loss": 0.0114,
      "step": 2803080
    },
    {
      "epoch": 4.587334629458704,
      "grad_norm": 0.27609941363334656,
      "learning_rate": 8.307690280239585e-07,
      "loss": 0.011,
      "step": 2803100
    },
    {
      "epoch": 4.587367359897358,
      "grad_norm": 0.24098138511180878,
      "learning_rate": 8.307031358104413e-07,
      "loss": 0.0095,
      "step": 2803120
    },
    {
      "epoch": 4.58740009033601,
      "grad_norm": 0.12954923510551453,
      "learning_rate": 8.306372435969243e-07,
      "loss": 0.0107,
      "step": 2803140
    },
    {
      "epoch": 4.587432820774664,
      "grad_norm": 0.11590361595153809,
      "learning_rate": 8.305713513834071e-07,
      "loss": 0.0083,
      "step": 2803160
    },
    {
      "epoch": 4.587465551213318,
      "grad_norm": 0.13093534111976624,
      "learning_rate": 8.3050545916989e-07,
      "loss": 0.0083,
      "step": 2803180
    },
    {
      "epoch": 4.587498281651971,
      "grad_norm": 0.19701337814331055,
      "learning_rate": 8.304395669563728e-07,
      "loss": 0.0082,
      "step": 2803200
    },
    {
      "epoch": 4.587531012090624,
      "grad_norm": 0.08070772141218185,
      "learning_rate": 8.303736747428558e-07,
      "loss": 0.0075,
      "step": 2803220
    },
    {
      "epoch": 4.5875637425292775,
      "grad_norm": 0.49227118492126465,
      "learning_rate": 8.303077825293386e-07,
      "loss": 0.0095,
      "step": 2803240
    },
    {
      "epoch": 4.587596472967931,
      "grad_norm": 0.6144070625305176,
      "learning_rate": 8.302418903158215e-07,
      "loss": 0.0113,
      "step": 2803260
    },
    {
      "epoch": 4.587629203406584,
      "grad_norm": 0.17506712675094604,
      "learning_rate": 8.301759981023043e-07,
      "loss": 0.0105,
      "step": 2803280
    },
    {
      "epoch": 4.587661933845237,
      "grad_norm": 0.2973390519618988,
      "learning_rate": 8.301101058887871e-07,
      "loss": 0.0065,
      "step": 2803300
    },
    {
      "epoch": 4.587694664283891,
      "grad_norm": 0.13456043601036072,
      "learning_rate": 8.300442136752701e-07,
      "loss": 0.0062,
      "step": 2803320
    },
    {
      "epoch": 4.587727394722544,
      "grad_norm": 0.4371543824672699,
      "learning_rate": 8.299783214617529e-07,
      "loss": 0.0052,
      "step": 2803340
    },
    {
      "epoch": 4.587760125161197,
      "grad_norm": 0.09670910239219666,
      "learning_rate": 8.299124292482358e-07,
      "loss": 0.0044,
      "step": 2803360
    },
    {
      "epoch": 4.587792855599851,
      "grad_norm": 0.19418863952159882,
      "learning_rate": 8.298465370347186e-07,
      "loss": 0.0091,
      "step": 2803380
    },
    {
      "epoch": 4.587825586038504,
      "grad_norm": 0.07065626978874207,
      "learning_rate": 8.297806448212016e-07,
      "loss": 0.0096,
      "step": 2803400
    },
    {
      "epoch": 4.587858316477157,
      "grad_norm": 0.1085166409611702,
      "learning_rate": 8.297147526076844e-07,
      "loss": 0.0071,
      "step": 2803420
    },
    {
      "epoch": 4.587891046915811,
      "grad_norm": 0.3830486536026001,
      "learning_rate": 8.296488603941673e-07,
      "loss": 0.0101,
      "step": 2803440
    },
    {
      "epoch": 4.587923777354464,
      "grad_norm": 0.3183971345424652,
      "learning_rate": 8.295829681806501e-07,
      "loss": 0.0112,
      "step": 2803460
    },
    {
      "epoch": 4.587956507793118,
      "grad_norm": 0.27668291330337524,
      "learning_rate": 8.295170759671331e-07,
      "loss": 0.0065,
      "step": 2803480
    },
    {
      "epoch": 4.587989238231771,
      "grad_norm": 0.4782138764858246,
      "learning_rate": 8.294511837536159e-07,
      "loss": 0.0139,
      "step": 2803500
    },
    {
      "epoch": 4.588021968670424,
      "grad_norm": 0.31064876914024353,
      "learning_rate": 8.293852915400988e-07,
      "loss": 0.0068,
      "step": 2803520
    },
    {
      "epoch": 4.588054699109078,
      "grad_norm": 0.30716779828071594,
      "learning_rate": 8.293193993265816e-07,
      "loss": 0.0102,
      "step": 2803540
    },
    {
      "epoch": 4.5880874295477305,
      "grad_norm": 0.16923968493938446,
      "learning_rate": 8.292535071130645e-07,
      "loss": 0.0082,
      "step": 2803560
    },
    {
      "epoch": 4.588120159986384,
      "grad_norm": 0.2013855278491974,
      "learning_rate": 8.291876148995474e-07,
      "loss": 0.0084,
      "step": 2803580
    },
    {
      "epoch": 4.588152890425038,
      "grad_norm": 0.22838951647281647,
      "learning_rate": 8.291217226860303e-07,
      "loss": 0.0123,
      "step": 2803600
    },
    {
      "epoch": 4.58818562086369,
      "grad_norm": 0.10003957897424698,
      "learning_rate": 8.290558304725131e-07,
      "loss": 0.0088,
      "step": 2803620
    },
    {
      "epoch": 4.588218351302344,
      "grad_norm": 0.2166978120803833,
      "learning_rate": 8.289899382589959e-07,
      "loss": 0.0071,
      "step": 2803640
    },
    {
      "epoch": 4.588251081740998,
      "grad_norm": 0.16327230632305145,
      "learning_rate": 8.289240460454789e-07,
      "loss": 0.0087,
      "step": 2803660
    },
    {
      "epoch": 4.588283812179651,
      "grad_norm": 0.2461082488298416,
      "learning_rate": 8.288581538319617e-07,
      "loss": 0.0074,
      "step": 2803680
    },
    {
      "epoch": 4.588316542618304,
      "grad_norm": 0.35190603137016296,
      "learning_rate": 8.287922616184446e-07,
      "loss": 0.0112,
      "step": 2803700
    },
    {
      "epoch": 4.5883492730569575,
      "grad_norm": 0.5000491738319397,
      "learning_rate": 8.287263694049274e-07,
      "loss": 0.0079,
      "step": 2803720
    },
    {
      "epoch": 4.588382003495611,
      "grad_norm": 0.21747782826423645,
      "learning_rate": 8.286604771914103e-07,
      "loss": 0.0144,
      "step": 2803740
    },
    {
      "epoch": 4.588414733934264,
      "grad_norm": 0.66438227891922,
      "learning_rate": 8.285945849778932e-07,
      "loss": 0.011,
      "step": 2803760
    },
    {
      "epoch": 4.588447464372917,
      "grad_norm": 0.575291633605957,
      "learning_rate": 8.285286927643761e-07,
      "loss": 0.0092,
      "step": 2803780
    },
    {
      "epoch": 4.588480194811571,
      "grad_norm": 0.07213713228702545,
      "learning_rate": 8.284628005508589e-07,
      "loss": 0.0074,
      "step": 2803800
    },
    {
      "epoch": 4.588512925250225,
      "grad_norm": 0.5089598298072815,
      "learning_rate": 8.283969083373418e-07,
      "loss": 0.0072,
      "step": 2803820
    },
    {
      "epoch": 4.588545655688877,
      "grad_norm": 0.21428148448467255,
      "learning_rate": 8.283310161238248e-07,
      "loss": 0.0098,
      "step": 2803840
    },
    {
      "epoch": 4.588578386127531,
      "grad_norm": 0.0571950264275074,
      "learning_rate": 8.282651239103076e-07,
      "loss": 0.0103,
      "step": 2803860
    },
    {
      "epoch": 4.5886111165661845,
      "grad_norm": 0.2470971643924713,
      "learning_rate": 8.281992316967904e-07,
      "loss": 0.0145,
      "step": 2803880
    },
    {
      "epoch": 4.588643847004837,
      "grad_norm": 0.19543106853961945,
      "learning_rate": 8.281333394832733e-07,
      "loss": 0.0133,
      "step": 2803900
    },
    {
      "epoch": 4.588676577443491,
      "grad_norm": 0.19237329065799713,
      "learning_rate": 8.280674472697563e-07,
      "loss": 0.0109,
      "step": 2803920
    },
    {
      "epoch": 4.588709307882144,
      "grad_norm": 0.137668177485466,
      "learning_rate": 8.280015550562392e-07,
      "loss": 0.0081,
      "step": 2803940
    },
    {
      "epoch": 4.588742038320797,
      "grad_norm": 0.4547135829925537,
      "learning_rate": 8.27935662842722e-07,
      "loss": 0.0096,
      "step": 2803960
    },
    {
      "epoch": 4.588774768759451,
      "grad_norm": 0.7063772082328796,
      "learning_rate": 8.278697706292047e-07,
      "loss": 0.0073,
      "step": 2803980
    },
    {
      "epoch": 4.588807499198104,
      "grad_norm": 0.10116250067949295,
      "learning_rate": 8.278038784156876e-07,
      "loss": 0.0087,
      "step": 2804000
    },
    {
      "epoch": 4.588840229636758,
      "grad_norm": 0.12477196753025055,
      "learning_rate": 8.277379862021706e-07,
      "loss": 0.0088,
      "step": 2804020
    },
    {
      "epoch": 4.588872960075411,
      "grad_norm": 0.12786374986171722,
      "learning_rate": 8.276720939886535e-07,
      "loss": 0.0093,
      "step": 2804040
    },
    {
      "epoch": 4.588905690514064,
      "grad_norm": 0.18612712621688843,
      "learning_rate": 8.276062017751363e-07,
      "loss": 0.0093,
      "step": 2804060
    },
    {
      "epoch": 4.588938420952718,
      "grad_norm": 0.21329107880592346,
      "learning_rate": 8.275403095616191e-07,
      "loss": 0.0065,
      "step": 2804080
    },
    {
      "epoch": 4.588971151391371,
      "grad_norm": 0.14815810322761536,
      "learning_rate": 8.274744173481021e-07,
      "loss": 0.0067,
      "step": 2804100
    },
    {
      "epoch": 4.589003881830024,
      "grad_norm": 0.5508118867874146,
      "learning_rate": 8.27408525134585e-07,
      "loss": 0.0081,
      "step": 2804120
    },
    {
      "epoch": 4.589036612268678,
      "grad_norm": 0.4461163282394409,
      "learning_rate": 8.273426329210678e-07,
      "loss": 0.0089,
      "step": 2804140
    },
    {
      "epoch": 4.589069342707331,
      "grad_norm": 0.31192371249198914,
      "learning_rate": 8.272767407075507e-07,
      "loss": 0.0089,
      "step": 2804160
    },
    {
      "epoch": 4.589102073145984,
      "grad_norm": 0.27139776945114136,
      "learning_rate": 8.272108484940335e-07,
      "loss": 0.0064,
      "step": 2804180
    },
    {
      "epoch": 4.5891348035846375,
      "grad_norm": 0.26515012979507446,
      "learning_rate": 8.271449562805165e-07,
      "loss": 0.008,
      "step": 2804200
    },
    {
      "epoch": 4.589167534023291,
      "grad_norm": 0.4362209439277649,
      "learning_rate": 8.270790640669993e-07,
      "loss": 0.0055,
      "step": 2804220
    },
    {
      "epoch": 4.589200264461944,
      "grad_norm": 0.20337562263011932,
      "learning_rate": 8.270131718534822e-07,
      "loss": 0.0107,
      "step": 2804240
    },
    {
      "epoch": 4.589232994900597,
      "grad_norm": 0.11929809302091599,
      "learning_rate": 8.26947279639965e-07,
      "loss": 0.011,
      "step": 2804260
    },
    {
      "epoch": 4.589265725339251,
      "grad_norm": 0.537815511226654,
      "learning_rate": 8.26881387426448e-07,
      "loss": 0.0092,
      "step": 2804280
    },
    {
      "epoch": 4.589298455777905,
      "grad_norm": 0.10003802925348282,
      "learning_rate": 8.268154952129308e-07,
      "loss": 0.0077,
      "step": 2804300
    },
    {
      "epoch": 4.589331186216557,
      "grad_norm": 0.3980928957462311,
      "learning_rate": 8.267496029994136e-07,
      "loss": 0.011,
      "step": 2804320
    },
    {
      "epoch": 4.589363916655211,
      "grad_norm": 0.1597924828529358,
      "learning_rate": 8.266837107858965e-07,
      "loss": 0.0062,
      "step": 2804340
    },
    {
      "epoch": 4.5893966470938645,
      "grad_norm": 0.38410747051239014,
      "learning_rate": 8.266178185723794e-07,
      "loss": 0.009,
      "step": 2804360
    },
    {
      "epoch": 4.589429377532518,
      "grad_norm": 0.33612507581710815,
      "learning_rate": 8.265519263588623e-07,
      "loss": 0.0082,
      "step": 2804380
    },
    {
      "epoch": 4.589462107971171,
      "grad_norm": 0.24885185062885284,
      "learning_rate": 8.264860341453451e-07,
      "loss": 0.0063,
      "step": 2804400
    },
    {
      "epoch": 4.589494838409824,
      "grad_norm": 0.14429409801959991,
      "learning_rate": 8.26420141931828e-07,
      "loss": 0.0076,
      "step": 2804420
    },
    {
      "epoch": 4.589527568848478,
      "grad_norm": 0.08393698930740356,
      "learning_rate": 8.263542497183108e-07,
      "loss": 0.0099,
      "step": 2804440
    },
    {
      "epoch": 4.589560299287131,
      "grad_norm": 0.13443872332572937,
      "learning_rate": 8.262883575047938e-07,
      "loss": 0.0061,
      "step": 2804460
    },
    {
      "epoch": 4.589593029725784,
      "grad_norm": 0.09403500705957413,
      "learning_rate": 8.262224652912766e-07,
      "loss": 0.0067,
      "step": 2804480
    },
    {
      "epoch": 4.589625760164438,
      "grad_norm": 0.13108114898204803,
      "learning_rate": 8.261565730777595e-07,
      "loss": 0.0117,
      "step": 2804500
    },
    {
      "epoch": 4.589658490603091,
      "grad_norm": 0.24015545845031738,
      "learning_rate": 8.260906808642423e-07,
      "loss": 0.0072,
      "step": 2804520
    },
    {
      "epoch": 4.589691221041744,
      "grad_norm": 0.26164060831069946,
      "learning_rate": 8.260247886507253e-07,
      "loss": 0.0064,
      "step": 2804540
    },
    {
      "epoch": 4.589723951480398,
      "grad_norm": 0.1935764104127884,
      "learning_rate": 8.259588964372081e-07,
      "loss": 0.0052,
      "step": 2804560
    },
    {
      "epoch": 4.589756681919051,
      "grad_norm": 0.34744757413864136,
      "learning_rate": 8.25893004223691e-07,
      "loss": 0.0126,
      "step": 2804580
    },
    {
      "epoch": 4.589789412357704,
      "grad_norm": 0.6317238211631775,
      "learning_rate": 8.258271120101738e-07,
      "loss": 0.0125,
      "step": 2804600
    },
    {
      "epoch": 4.589822142796358,
      "grad_norm": 0.2712700068950653,
      "learning_rate": 8.257612197966566e-07,
      "loss": 0.0106,
      "step": 2804620
    },
    {
      "epoch": 4.589854873235011,
      "grad_norm": 0.29960954189300537,
      "learning_rate": 8.256953275831396e-07,
      "loss": 0.0126,
      "step": 2804640
    },
    {
      "epoch": 4.589887603673665,
      "grad_norm": 0.31101804971694946,
      "learning_rate": 8.256294353696224e-07,
      "loss": 0.0135,
      "step": 2804660
    },
    {
      "epoch": 4.589920334112318,
      "grad_norm": 0.2744770050048828,
      "learning_rate": 8.255635431561053e-07,
      "loss": 0.0107,
      "step": 2804680
    },
    {
      "epoch": 4.589953064550971,
      "grad_norm": 0.2992047667503357,
      "learning_rate": 8.254976509425881e-07,
      "loss": 0.0079,
      "step": 2804700
    },
    {
      "epoch": 4.589985794989625,
      "grad_norm": 0.24085000157356262,
      "learning_rate": 8.254317587290711e-07,
      "loss": 0.0091,
      "step": 2804720
    },
    {
      "epoch": 4.5900185254282775,
      "grad_norm": 0.7868033647537231,
      "learning_rate": 8.253658665155539e-07,
      "loss": 0.0084,
      "step": 2804740
    },
    {
      "epoch": 4.590051255866931,
      "grad_norm": 0.1558433175086975,
      "learning_rate": 8.252999743020368e-07,
      "loss": 0.0096,
      "step": 2804760
    },
    {
      "epoch": 4.590083986305585,
      "grad_norm": 0.13407307863235474,
      "learning_rate": 8.252340820885196e-07,
      "loss": 0.0154,
      "step": 2804780
    },
    {
      "epoch": 4.590116716744237,
      "grad_norm": 0.21921426057815552,
      "learning_rate": 8.251681898750026e-07,
      "loss": 0.008,
      "step": 2804800
    },
    {
      "epoch": 4.590149447182891,
      "grad_norm": 0.5977235436439514,
      "learning_rate": 8.251022976614854e-07,
      "loss": 0.0096,
      "step": 2804820
    },
    {
      "epoch": 4.590182177621545,
      "grad_norm": 0.6025971174240112,
      "learning_rate": 8.250364054479683e-07,
      "loss": 0.0141,
      "step": 2804840
    },
    {
      "epoch": 4.590214908060198,
      "grad_norm": 0.3471773862838745,
      "learning_rate": 8.249705132344511e-07,
      "loss": 0.0077,
      "step": 2804860
    },
    {
      "epoch": 4.590247638498851,
      "grad_norm": 0.09591616690158844,
      "learning_rate": 8.24904621020934e-07,
      "loss": 0.008,
      "step": 2804880
    },
    {
      "epoch": 4.5902803689375045,
      "grad_norm": 0.15068097412586212,
      "learning_rate": 8.248387288074169e-07,
      "loss": 0.0082,
      "step": 2804900
    },
    {
      "epoch": 4.590313099376158,
      "grad_norm": 0.06891550868749619,
      "learning_rate": 8.247728365938998e-07,
      "loss": 0.0107,
      "step": 2804920
    },
    {
      "epoch": 4.590345829814812,
      "grad_norm": 0.1436806172132492,
      "learning_rate": 8.247069443803826e-07,
      "loss": 0.0105,
      "step": 2804940
    },
    {
      "epoch": 4.590378560253464,
      "grad_norm": 0.16727745532989502,
      "learning_rate": 8.246410521668654e-07,
      "loss": 0.0073,
      "step": 2804960
    },
    {
      "epoch": 4.590411290692118,
      "grad_norm": 0.16644181311130524,
      "learning_rate": 8.245751599533484e-07,
      "loss": 0.0116,
      "step": 2804980
    },
    {
      "epoch": 4.5904440211307715,
      "grad_norm": 0.37218132615089417,
      "learning_rate": 8.245092677398312e-07,
      "loss": 0.0115,
      "step": 2805000
    },
    {
      "epoch": 4.590476751569424,
      "grad_norm": 0.20887643098831177,
      "learning_rate": 8.244433755263141e-07,
      "loss": 0.0102,
      "step": 2805020
    },
    {
      "epoch": 4.590509482008078,
      "grad_norm": 0.27923786640167236,
      "learning_rate": 8.243774833127969e-07,
      "loss": 0.0105,
      "step": 2805040
    },
    {
      "epoch": 4.590542212446731,
      "grad_norm": 0.17860616743564606,
      "learning_rate": 8.243115910992798e-07,
      "loss": 0.0071,
      "step": 2805060
    },
    {
      "epoch": 4.590574942885384,
      "grad_norm": 0.1848534792661667,
      "learning_rate": 8.242456988857627e-07,
      "loss": 0.0076,
      "step": 2805080
    },
    {
      "epoch": 4.590607673324038,
      "grad_norm": 0.47190380096435547,
      "learning_rate": 8.241798066722456e-07,
      "loss": 0.0089,
      "step": 2805100
    },
    {
      "epoch": 4.590640403762691,
      "grad_norm": 0.14386431872844696,
      "learning_rate": 8.241139144587284e-07,
      "loss": 0.0063,
      "step": 2805120
    },
    {
      "epoch": 4.590673134201345,
      "grad_norm": 0.23157258331775665,
      "learning_rate": 8.240480222452113e-07,
      "loss": 0.0074,
      "step": 2805140
    },
    {
      "epoch": 4.590705864639998,
      "grad_norm": 0.1948450803756714,
      "learning_rate": 8.239821300316943e-07,
      "loss": 0.0121,
      "step": 2805160
    },
    {
      "epoch": 4.590738595078651,
      "grad_norm": 0.07584010809659958,
      "learning_rate": 8.239162378181771e-07,
      "loss": 0.0097,
      "step": 2805180
    },
    {
      "epoch": 4.590771325517305,
      "grad_norm": 0.15100924670696259,
      "learning_rate": 8.238503456046599e-07,
      "loss": 0.0108,
      "step": 2805200
    },
    {
      "epoch": 4.590804055955958,
      "grad_norm": 0.27012062072753906,
      "learning_rate": 8.237844533911428e-07,
      "loss": 0.005,
      "step": 2805220
    },
    {
      "epoch": 4.590836786394611,
      "grad_norm": 0.5167903900146484,
      "learning_rate": 8.237185611776258e-07,
      "loss": 0.0109,
      "step": 2805240
    },
    {
      "epoch": 4.590869516833265,
      "grad_norm": 0.5220171213150024,
      "learning_rate": 8.236526689641087e-07,
      "loss": 0.0081,
      "step": 2805260
    },
    {
      "epoch": 4.590902247271918,
      "grad_norm": 0.24988147616386414,
      "learning_rate": 8.235867767505915e-07,
      "loss": 0.0074,
      "step": 2805280
    },
    {
      "epoch": 4.590934977710571,
      "grad_norm": 0.8618581891059875,
      "learning_rate": 8.235208845370742e-07,
      "loss": 0.0123,
      "step": 2805300
    },
    {
      "epoch": 4.590967708149225,
      "grad_norm": 0.5437560081481934,
      "learning_rate": 8.234549923235571e-07,
      "loss": 0.0123,
      "step": 2805320
    },
    {
      "epoch": 4.591000438587878,
      "grad_norm": 0.21438108384609222,
      "learning_rate": 8.233891001100401e-07,
      "loss": 0.0054,
      "step": 2805340
    },
    {
      "epoch": 4.591033169026531,
      "grad_norm": 0.33491894602775574,
      "learning_rate": 8.23323207896523e-07,
      "loss": 0.007,
      "step": 2805360
    },
    {
      "epoch": 4.5910658994651845,
      "grad_norm": 0.07541446387767792,
      "learning_rate": 8.232573156830058e-07,
      "loss": 0.009,
      "step": 2805380
    },
    {
      "epoch": 4.591098629903838,
      "grad_norm": 0.1836031824350357,
      "learning_rate": 8.231914234694886e-07,
      "loss": 0.0101,
      "step": 2805400
    },
    {
      "epoch": 4.591131360342492,
      "grad_norm": 0.39162346720695496,
      "learning_rate": 8.231255312559716e-07,
      "loss": 0.0135,
      "step": 2805420
    },
    {
      "epoch": 4.591164090781144,
      "grad_norm": 0.25532594323158264,
      "learning_rate": 8.230596390424545e-07,
      "loss": 0.0067,
      "step": 2805440
    },
    {
      "epoch": 4.591196821219798,
      "grad_norm": 0.20426827669143677,
      "learning_rate": 8.229937468289373e-07,
      "loss": 0.0099,
      "step": 2805460
    },
    {
      "epoch": 4.591229551658452,
      "grad_norm": 0.1546318680047989,
      "learning_rate": 8.229278546154202e-07,
      "loss": 0.0104,
      "step": 2805480
    },
    {
      "epoch": 4.591262282097104,
      "grad_norm": 0.20282529294490814,
      "learning_rate": 8.22861962401903e-07,
      "loss": 0.0107,
      "step": 2805500
    },
    {
      "epoch": 4.591295012535758,
      "grad_norm": 0.09741468727588654,
      "learning_rate": 8.22796070188386e-07,
      "loss": 0.0103,
      "step": 2805520
    },
    {
      "epoch": 4.5913277429744115,
      "grad_norm": 0.5660802721977234,
      "learning_rate": 8.227301779748688e-07,
      "loss": 0.0124,
      "step": 2805540
    },
    {
      "epoch": 4.591360473413065,
      "grad_norm": 0.3695880174636841,
      "learning_rate": 8.226642857613517e-07,
      "loss": 0.0108,
      "step": 2805560
    },
    {
      "epoch": 4.591393203851718,
      "grad_norm": 0.27036339044570923,
      "learning_rate": 8.225983935478345e-07,
      "loss": 0.0122,
      "step": 2805580
    },
    {
      "epoch": 4.591425934290371,
      "grad_norm": 0.36767420172691345,
      "learning_rate": 8.225325013343175e-07,
      "loss": 0.0067,
      "step": 2805600
    },
    {
      "epoch": 4.591458664729025,
      "grad_norm": 0.5582630038261414,
      "learning_rate": 8.224666091208003e-07,
      "loss": 0.0115,
      "step": 2805620
    },
    {
      "epoch": 4.591491395167678,
      "grad_norm": 0.14655141532421112,
      "learning_rate": 8.224007169072832e-07,
      "loss": 0.0097,
      "step": 2805640
    },
    {
      "epoch": 4.591524125606331,
      "grad_norm": 0.08492089062929153,
      "learning_rate": 8.22334824693766e-07,
      "loss": 0.007,
      "step": 2805660
    },
    {
      "epoch": 4.591556856044985,
      "grad_norm": 0.3756372034549713,
      "learning_rate": 8.22268932480249e-07,
      "loss": 0.0107,
      "step": 2805680
    },
    {
      "epoch": 4.591589586483638,
      "grad_norm": 0.12935058772563934,
      "learning_rate": 8.222030402667318e-07,
      "loss": 0.0069,
      "step": 2805700
    },
    {
      "epoch": 4.591622316922291,
      "grad_norm": 0.253801554441452,
      "learning_rate": 8.221371480532146e-07,
      "loss": 0.0058,
      "step": 2805720
    },
    {
      "epoch": 4.591655047360945,
      "grad_norm": 0.2295628935098648,
      "learning_rate": 8.220712558396975e-07,
      "loss": 0.0118,
      "step": 2805740
    },
    {
      "epoch": 4.591687777799598,
      "grad_norm": 0.27748942375183105,
      "learning_rate": 8.220053636261803e-07,
      "loss": 0.0119,
      "step": 2805760
    },
    {
      "epoch": 4.591720508238251,
      "grad_norm": 0.1794806867837906,
      "learning_rate": 8.219394714126633e-07,
      "loss": 0.0054,
      "step": 2805780
    },
    {
      "epoch": 4.591753238676905,
      "grad_norm": 0.22873325645923615,
      "learning_rate": 8.218735791991461e-07,
      "loss": 0.0125,
      "step": 2805800
    },
    {
      "epoch": 4.591785969115558,
      "grad_norm": 0.1421624720096588,
      "learning_rate": 8.21807686985629e-07,
      "loss": 0.0124,
      "step": 2805820
    },
    {
      "epoch": 4.591818699554212,
      "grad_norm": 0.17067325115203857,
      "learning_rate": 8.217417947721118e-07,
      "loss": 0.0113,
      "step": 2805840
    },
    {
      "epoch": 4.5918514299928646,
      "grad_norm": 0.328185111284256,
      "learning_rate": 8.216759025585948e-07,
      "loss": 0.0126,
      "step": 2805860
    },
    {
      "epoch": 4.591884160431518,
      "grad_norm": 0.07927440106868744,
      "learning_rate": 8.216100103450776e-07,
      "loss": 0.0067,
      "step": 2805880
    },
    {
      "epoch": 4.591916890870172,
      "grad_norm": 0.1183101162314415,
      "learning_rate": 8.215441181315605e-07,
      "loss": 0.0101,
      "step": 2805900
    },
    {
      "epoch": 4.5919496213088244,
      "grad_norm": 0.13383585214614868,
      "learning_rate": 8.214782259180433e-07,
      "loss": 0.0094,
      "step": 2805920
    },
    {
      "epoch": 4.591982351747478,
      "grad_norm": 0.24685810506343842,
      "learning_rate": 8.214123337045262e-07,
      "loss": 0.0068,
      "step": 2805940
    },
    {
      "epoch": 4.592015082186132,
      "grad_norm": 0.06694892048835754,
      "learning_rate": 8.213464414910091e-07,
      "loss": 0.0092,
      "step": 2805960
    },
    {
      "epoch": 4.592047812624784,
      "grad_norm": 0.21420776844024658,
      "learning_rate": 8.21280549277492e-07,
      "loss": 0.0088,
      "step": 2805980
    },
    {
      "epoch": 4.592080543063438,
      "grad_norm": 0.1414584517478943,
      "learning_rate": 8.212146570639748e-07,
      "loss": 0.0107,
      "step": 2806000
    },
    {
      "epoch": 4.5921132735020915,
      "grad_norm": 0.09382737427949905,
      "learning_rate": 8.211487648504576e-07,
      "loss": 0.0043,
      "step": 2806020
    },
    {
      "epoch": 4.592146003940745,
      "grad_norm": 0.1339249610900879,
      "learning_rate": 8.210828726369406e-07,
      "loss": 0.005,
      "step": 2806040
    },
    {
      "epoch": 4.592178734379398,
      "grad_norm": 0.4406444728374481,
      "learning_rate": 8.210169804234234e-07,
      "loss": 0.0072,
      "step": 2806060
    },
    {
      "epoch": 4.592211464818051,
      "grad_norm": 0.3470019996166229,
      "learning_rate": 8.209510882099063e-07,
      "loss": 0.0135,
      "step": 2806080
    },
    {
      "epoch": 4.592244195256705,
      "grad_norm": 0.056533027440309525,
      "learning_rate": 8.208851959963891e-07,
      "loss": 0.0082,
      "step": 2806100
    },
    {
      "epoch": 4.592276925695359,
      "grad_norm": 0.2567732036113739,
      "learning_rate": 8.208193037828721e-07,
      "loss": 0.0118,
      "step": 2806120
    },
    {
      "epoch": 4.592309656134011,
      "grad_norm": 0.30881747603416443,
      "learning_rate": 8.207534115693549e-07,
      "loss": 0.0134,
      "step": 2806140
    },
    {
      "epoch": 4.592342386572665,
      "grad_norm": 0.5211969614028931,
      "learning_rate": 8.206875193558378e-07,
      "loss": 0.0083,
      "step": 2806160
    },
    {
      "epoch": 4.5923751170113185,
      "grad_norm": 0.13326485455036163,
      "learning_rate": 8.206216271423206e-07,
      "loss": 0.0075,
      "step": 2806180
    },
    {
      "epoch": 4.592407847449971,
      "grad_norm": 0.2689182460308075,
      "learning_rate": 8.205557349288035e-07,
      "loss": 0.0131,
      "step": 2806200
    },
    {
      "epoch": 4.592440577888625,
      "grad_norm": 0.15768875181674957,
      "learning_rate": 8.204898427152864e-07,
      "loss": 0.008,
      "step": 2806220
    },
    {
      "epoch": 4.592473308327278,
      "grad_norm": 0.2079474925994873,
      "learning_rate": 8.204239505017693e-07,
      "loss": 0.0158,
      "step": 2806240
    },
    {
      "epoch": 4.592506038765931,
      "grad_norm": 0.29880326986312866,
      "learning_rate": 8.203580582882521e-07,
      "loss": 0.0106,
      "step": 2806260
    },
    {
      "epoch": 4.592538769204585,
      "grad_norm": 0.08051925152540207,
      "learning_rate": 8.20292166074735e-07,
      "loss": 0.0103,
      "step": 2806280
    },
    {
      "epoch": 4.592571499643238,
      "grad_norm": 0.2017747312784195,
      "learning_rate": 8.202262738612179e-07,
      "loss": 0.0132,
      "step": 2806300
    },
    {
      "epoch": 4.592604230081892,
      "grad_norm": 0.28383076190948486,
      "learning_rate": 8.201603816477007e-07,
      "loss": 0.008,
      "step": 2806320
    },
    {
      "epoch": 4.592636960520545,
      "grad_norm": 0.21888218820095062,
      "learning_rate": 8.200944894341836e-07,
      "loss": 0.0136,
      "step": 2806340
    },
    {
      "epoch": 4.592669690959198,
      "grad_norm": 0.21503733098506927,
      "learning_rate": 8.200285972206664e-07,
      "loss": 0.0063,
      "step": 2806360
    },
    {
      "epoch": 4.592702421397852,
      "grad_norm": 0.13628236949443817,
      "learning_rate": 8.199627050071493e-07,
      "loss": 0.0115,
      "step": 2806380
    },
    {
      "epoch": 4.592735151836505,
      "grad_norm": 0.26972824335098267,
      "learning_rate": 8.198968127936322e-07,
      "loss": 0.0057,
      "step": 2806400
    },
    {
      "epoch": 4.592767882275158,
      "grad_norm": 0.7987851500511169,
      "learning_rate": 8.198309205801151e-07,
      "loss": 0.0141,
      "step": 2806420
    },
    {
      "epoch": 4.592800612713812,
      "grad_norm": 0.3881513774394989,
      "learning_rate": 8.197650283665979e-07,
      "loss": 0.0102,
      "step": 2806440
    },
    {
      "epoch": 4.592833343152465,
      "grad_norm": 0.1657000035047531,
      "learning_rate": 8.196991361530808e-07,
      "loss": 0.0073,
      "step": 2806460
    },
    {
      "epoch": 4.592866073591118,
      "grad_norm": 0.5037235617637634,
      "learning_rate": 8.196332439395638e-07,
      "loss": 0.0116,
      "step": 2806480
    },
    {
      "epoch": 4.592898804029772,
      "grad_norm": 0.3434637188911438,
      "learning_rate": 8.195673517260466e-07,
      "loss": 0.0077,
      "step": 2806500
    },
    {
      "epoch": 4.592931534468425,
      "grad_norm": 0.23424746096134186,
      "learning_rate": 8.195014595125294e-07,
      "loss": 0.0103,
      "step": 2806520
    },
    {
      "epoch": 4.592964264907078,
      "grad_norm": 0.23134343326091766,
      "learning_rate": 8.194355672990123e-07,
      "loss": 0.0088,
      "step": 2806540
    },
    {
      "epoch": 4.5929969953457315,
      "grad_norm": 0.49698546528816223,
      "learning_rate": 8.193696750854953e-07,
      "loss": 0.0097,
      "step": 2806560
    },
    {
      "epoch": 4.593029725784385,
      "grad_norm": 0.4388567805290222,
      "learning_rate": 8.193037828719782e-07,
      "loss": 0.0122,
      "step": 2806580
    },
    {
      "epoch": 4.593062456223039,
      "grad_norm": 0.17102523148059845,
      "learning_rate": 8.19237890658461e-07,
      "loss": 0.0063,
      "step": 2806600
    },
    {
      "epoch": 4.593095186661691,
      "grad_norm": 0.15608716011047363,
      "learning_rate": 8.191719984449438e-07,
      "loss": 0.011,
      "step": 2806620
    },
    {
      "epoch": 4.593127917100345,
      "grad_norm": 0.11271625012159348,
      "learning_rate": 8.191061062314266e-07,
      "loss": 0.0102,
      "step": 2806640
    },
    {
      "epoch": 4.5931606475389986,
      "grad_norm": 0.1524909883737564,
      "learning_rate": 8.190402140179097e-07,
      "loss": 0.0067,
      "step": 2806660
    },
    {
      "epoch": 4.593193377977652,
      "grad_norm": 0.4233601987361908,
      "learning_rate": 8.189743218043925e-07,
      "loss": 0.0089,
      "step": 2806680
    },
    {
      "epoch": 4.593226108416305,
      "grad_norm": 0.20927274227142334,
      "learning_rate": 8.189084295908753e-07,
      "loss": 0.0089,
      "step": 2806700
    },
    {
      "epoch": 4.5932588388549584,
      "grad_norm": 0.10154155641794205,
      "learning_rate": 8.188425373773581e-07,
      "loss": 0.0089,
      "step": 2806720
    },
    {
      "epoch": 4.593291569293612,
      "grad_norm": 0.3073653280735016,
      "learning_rate": 8.187766451638411e-07,
      "loss": 0.0088,
      "step": 2806740
    },
    {
      "epoch": 4.593324299732265,
      "grad_norm": 0.6402963399887085,
      "learning_rate": 8.18710752950324e-07,
      "loss": 0.0111,
      "step": 2806760
    },
    {
      "epoch": 4.593357030170918,
      "grad_norm": 0.5679101943969727,
      "learning_rate": 8.186448607368068e-07,
      "loss": 0.009,
      "step": 2806780
    },
    {
      "epoch": 4.593389760609572,
      "grad_norm": 0.26584941148757935,
      "learning_rate": 8.185789685232897e-07,
      "loss": 0.0056,
      "step": 2806800
    },
    {
      "epoch": 4.593422491048225,
      "grad_norm": 0.2443036437034607,
      "learning_rate": 8.185130763097725e-07,
      "loss": 0.0115,
      "step": 2806820
    },
    {
      "epoch": 4.593455221486878,
      "grad_norm": 0.20334883034229279,
      "learning_rate": 8.184471840962555e-07,
      "loss": 0.0069,
      "step": 2806840
    },
    {
      "epoch": 4.593487951925532,
      "grad_norm": 0.11540180444717407,
      "learning_rate": 8.183812918827383e-07,
      "loss": 0.0076,
      "step": 2806860
    },
    {
      "epoch": 4.593520682364185,
      "grad_norm": 0.16183152794837952,
      "learning_rate": 8.183153996692212e-07,
      "loss": 0.0082,
      "step": 2806880
    },
    {
      "epoch": 4.593553412802838,
      "grad_norm": 0.455888032913208,
      "learning_rate": 8.18249507455704e-07,
      "loss": 0.0098,
      "step": 2806900
    },
    {
      "epoch": 4.593586143241492,
      "grad_norm": 0.1948651373386383,
      "learning_rate": 8.18183615242187e-07,
      "loss": 0.0068,
      "step": 2806920
    },
    {
      "epoch": 4.593618873680145,
      "grad_norm": 0.1288096159696579,
      "learning_rate": 8.181177230286698e-07,
      "loss": 0.0095,
      "step": 2806940
    },
    {
      "epoch": 4.593651604118798,
      "grad_norm": 0.08734503388404846,
      "learning_rate": 8.180518308151527e-07,
      "loss": 0.0071,
      "step": 2806960
    },
    {
      "epoch": 4.593684334557452,
      "grad_norm": 0.19874487817287445,
      "learning_rate": 8.179859386016355e-07,
      "loss": 0.0073,
      "step": 2806980
    },
    {
      "epoch": 4.593717064996105,
      "grad_norm": 0.11738751828670502,
      "learning_rate": 8.179200463881185e-07,
      "loss": 0.0094,
      "step": 2807000
    },
    {
      "epoch": 4.593749795434759,
      "grad_norm": 0.2999987304210663,
      "learning_rate": 8.178541541746013e-07,
      "loss": 0.0104,
      "step": 2807020
    },
    {
      "epoch": 4.5937825258734115,
      "grad_norm": 0.3170737326145172,
      "learning_rate": 8.177882619610841e-07,
      "loss": 0.0093,
      "step": 2807040
    },
    {
      "epoch": 4.593815256312065,
      "grad_norm": 0.16782927513122559,
      "learning_rate": 8.17722369747567e-07,
      "loss": 0.0071,
      "step": 2807060
    },
    {
      "epoch": 4.593847986750719,
      "grad_norm": 0.07551243156194687,
      "learning_rate": 8.176564775340498e-07,
      "loss": 0.0107,
      "step": 2807080
    },
    {
      "epoch": 4.593880717189371,
      "grad_norm": 0.42926350235939026,
      "learning_rate": 8.175905853205328e-07,
      "loss": 0.0087,
      "step": 2807100
    },
    {
      "epoch": 4.593913447628025,
      "grad_norm": 0.37664902210235596,
      "learning_rate": 8.175246931070156e-07,
      "loss": 0.0074,
      "step": 2807120
    },
    {
      "epoch": 4.593946178066679,
      "grad_norm": 0.31966015696525574,
      "learning_rate": 8.174588008934985e-07,
      "loss": 0.0079,
      "step": 2807140
    },
    {
      "epoch": 4.593978908505331,
      "grad_norm": 0.23373639583587646,
      "learning_rate": 8.173929086799813e-07,
      "loss": 0.0085,
      "step": 2807160
    },
    {
      "epoch": 4.594011638943985,
      "grad_norm": 0.3611396551132202,
      "learning_rate": 8.173270164664643e-07,
      "loss": 0.0091,
      "step": 2807180
    },
    {
      "epoch": 4.5940443693826385,
      "grad_norm": 0.09190524369478226,
      "learning_rate": 8.172611242529471e-07,
      "loss": 0.0082,
      "step": 2807200
    },
    {
      "epoch": 4.594077099821292,
      "grad_norm": 0.2973744869232178,
      "learning_rate": 8.1719523203943e-07,
      "loss": 0.0109,
      "step": 2807220
    },
    {
      "epoch": 4.594109830259945,
      "grad_norm": 0.1421372890472412,
      "learning_rate": 8.171293398259128e-07,
      "loss": 0.0052,
      "step": 2807240
    },
    {
      "epoch": 4.594142560698598,
      "grad_norm": 0.4612343907356262,
      "learning_rate": 8.170634476123957e-07,
      "loss": 0.0084,
      "step": 2807260
    },
    {
      "epoch": 4.594175291137252,
      "grad_norm": 0.10959582030773163,
      "learning_rate": 8.169975553988786e-07,
      "loss": 0.0086,
      "step": 2807280
    },
    {
      "epoch": 4.594208021575906,
      "grad_norm": 0.2532556653022766,
      "learning_rate": 8.169316631853615e-07,
      "loss": 0.0164,
      "step": 2807300
    },
    {
      "epoch": 4.594240752014558,
      "grad_norm": 0.07162921130657196,
      "learning_rate": 8.168657709718443e-07,
      "loss": 0.0086,
      "step": 2807320
    },
    {
      "epoch": 4.594273482453212,
      "grad_norm": 0.10702096670866013,
      "learning_rate": 8.167998787583271e-07,
      "loss": 0.0114,
      "step": 2807340
    },
    {
      "epoch": 4.5943062128918655,
      "grad_norm": 0.26634883880615234,
      "learning_rate": 8.167339865448101e-07,
      "loss": 0.0102,
      "step": 2807360
    },
    {
      "epoch": 4.594338943330518,
      "grad_norm": 0.32341328263282776,
      "learning_rate": 8.166680943312929e-07,
      "loss": 0.0117,
      "step": 2807380
    },
    {
      "epoch": 4.594371673769172,
      "grad_norm": 0.19024774432182312,
      "learning_rate": 8.166022021177758e-07,
      "loss": 0.0108,
      "step": 2807400
    },
    {
      "epoch": 4.594404404207825,
      "grad_norm": 0.3197639584541321,
      "learning_rate": 8.165363099042586e-07,
      "loss": 0.0086,
      "step": 2807420
    },
    {
      "epoch": 4.594437134646478,
      "grad_norm": 0.2996399998664856,
      "learning_rate": 8.164704176907416e-07,
      "loss": 0.0077,
      "step": 2807440
    },
    {
      "epoch": 4.594469865085132,
      "grad_norm": 0.472842812538147,
      "learning_rate": 8.164045254772244e-07,
      "loss": 0.0091,
      "step": 2807460
    },
    {
      "epoch": 4.594502595523785,
      "grad_norm": 0.2568559944629669,
      "learning_rate": 8.163386332637073e-07,
      "loss": 0.0093,
      "step": 2807480
    },
    {
      "epoch": 4.594535325962439,
      "grad_norm": 0.3541485071182251,
      "learning_rate": 8.162727410501901e-07,
      "loss": 0.0116,
      "step": 2807500
    },
    {
      "epoch": 4.594568056401092,
      "grad_norm": 0.5418105125427246,
      "learning_rate": 8.16206848836673e-07,
      "loss": 0.0075,
      "step": 2807520
    },
    {
      "epoch": 4.594600786839745,
      "grad_norm": 0.16083109378814697,
      "learning_rate": 8.161409566231559e-07,
      "loss": 0.0062,
      "step": 2807540
    },
    {
      "epoch": 4.594633517278399,
      "grad_norm": 0.3647290766239166,
      "learning_rate": 8.160750644096388e-07,
      "loss": 0.008,
      "step": 2807560
    },
    {
      "epoch": 4.594666247717052,
      "grad_norm": 0.27311623096466064,
      "learning_rate": 8.160091721961216e-07,
      "loss": 0.0097,
      "step": 2807580
    },
    {
      "epoch": 4.594698978155705,
      "grad_norm": 0.11383707821369171,
      "learning_rate": 8.159432799826045e-07,
      "loss": 0.0084,
      "step": 2807600
    },
    {
      "epoch": 4.594731708594359,
      "grad_norm": 0.6646621227264404,
      "learning_rate": 8.158773877690874e-07,
      "loss": 0.0136,
      "step": 2807620
    },
    {
      "epoch": 4.594764439033012,
      "grad_norm": 0.23445038497447968,
      "learning_rate": 8.158114955555703e-07,
      "loss": 0.0094,
      "step": 2807640
    },
    {
      "epoch": 4.594797169471665,
      "grad_norm": 0.2868245244026184,
      "learning_rate": 8.157456033420531e-07,
      "loss": 0.0091,
      "step": 2807660
    },
    {
      "epoch": 4.5948298999103185,
      "grad_norm": 0.1585753858089447,
      "learning_rate": 8.156797111285359e-07,
      "loss": 0.0058,
      "step": 2807680
    },
    {
      "epoch": 4.594862630348972,
      "grad_norm": 0.12783917784690857,
      "learning_rate": 8.156138189150188e-07,
      "loss": 0.0065,
      "step": 2807700
    },
    {
      "epoch": 4.594895360787625,
      "grad_norm": 0.1642632931470871,
      "learning_rate": 8.155479267015017e-07,
      "loss": 0.0083,
      "step": 2807720
    },
    {
      "epoch": 4.594928091226278,
      "grad_norm": 0.27520254254341125,
      "learning_rate": 8.154820344879846e-07,
      "loss": 0.0081,
      "step": 2807740
    },
    {
      "epoch": 4.594960821664932,
      "grad_norm": 0.46452796459198,
      "learning_rate": 8.154161422744674e-07,
      "loss": 0.008,
      "step": 2807760
    },
    {
      "epoch": 4.594993552103586,
      "grad_norm": 0.1657845675945282,
      "learning_rate": 8.153502500609503e-07,
      "loss": 0.0107,
      "step": 2807780
    },
    {
      "epoch": 4.595026282542238,
      "grad_norm": 0.08984313160181046,
      "learning_rate": 8.152843578474333e-07,
      "loss": 0.0065,
      "step": 2807800
    },
    {
      "epoch": 4.595059012980892,
      "grad_norm": 0.4564405083656311,
      "learning_rate": 8.152184656339161e-07,
      "loss": 0.0139,
      "step": 2807820
    },
    {
      "epoch": 4.5950917434195455,
      "grad_norm": 0.13894033432006836,
      "learning_rate": 8.151525734203989e-07,
      "loss": 0.0064,
      "step": 2807840
    },
    {
      "epoch": 4.595124473858199,
      "grad_norm": 0.5965827703475952,
      "learning_rate": 8.150866812068818e-07,
      "loss": 0.0173,
      "step": 2807860
    },
    {
      "epoch": 4.595157204296852,
      "grad_norm": 0.22670140862464905,
      "learning_rate": 8.150207889933648e-07,
      "loss": 0.0063,
      "step": 2807880
    },
    {
      "epoch": 4.595189934735505,
      "grad_norm": 0.41623881459236145,
      "learning_rate": 8.149548967798477e-07,
      "loss": 0.007,
      "step": 2807900
    },
    {
      "epoch": 4.595222665174159,
      "grad_norm": 0.4363887310028076,
      "learning_rate": 8.148890045663305e-07,
      "loss": 0.0092,
      "step": 2807920
    },
    {
      "epoch": 4.595255395612812,
      "grad_norm": 0.14814209938049316,
      "learning_rate": 8.148231123528133e-07,
      "loss": 0.0096,
      "step": 2807940
    },
    {
      "epoch": 4.595288126051465,
      "grad_norm": 0.24682500958442688,
      "learning_rate": 8.147572201392961e-07,
      "loss": 0.0105,
      "step": 2807960
    },
    {
      "epoch": 4.595320856490119,
      "grad_norm": 0.25107425451278687,
      "learning_rate": 8.146913279257792e-07,
      "loss": 0.0087,
      "step": 2807980
    },
    {
      "epoch": 4.595353586928772,
      "grad_norm": 0.25823596119880676,
      "learning_rate": 8.14625435712262e-07,
      "loss": 0.0113,
      "step": 2808000
    },
    {
      "epoch": 4.595386317367425,
      "grad_norm": 0.21162214875221252,
      "learning_rate": 8.145595434987449e-07,
      "loss": 0.0097,
      "step": 2808020
    },
    {
      "epoch": 4.595419047806079,
      "grad_norm": 0.165422260761261,
      "learning_rate": 8.144936512852277e-07,
      "loss": 0.0073,
      "step": 2808040
    },
    {
      "epoch": 4.595451778244732,
      "grad_norm": 0.18625037372112274,
      "learning_rate": 8.144277590717106e-07,
      "loss": 0.012,
      "step": 2808060
    },
    {
      "epoch": 4.595484508683385,
      "grad_norm": 0.31003105640411377,
      "learning_rate": 8.143618668581935e-07,
      "loss": 0.0072,
      "step": 2808080
    },
    {
      "epoch": 4.595517239122039,
      "grad_norm": 0.1476174294948578,
      "learning_rate": 8.142959746446763e-07,
      "loss": 0.0074,
      "step": 2808100
    },
    {
      "epoch": 4.595549969560692,
      "grad_norm": 0.06676090508699417,
      "learning_rate": 8.142300824311592e-07,
      "loss": 0.0124,
      "step": 2808120
    },
    {
      "epoch": 4.595582699999346,
      "grad_norm": 0.33848774433135986,
      "learning_rate": 8.14164190217642e-07,
      "loss": 0.0078,
      "step": 2808140
    },
    {
      "epoch": 4.595615430437999,
      "grad_norm": 0.19027550518512726,
      "learning_rate": 8.14098298004125e-07,
      "loss": 0.0055,
      "step": 2808160
    },
    {
      "epoch": 4.595648160876652,
      "grad_norm": 0.2924867868423462,
      "learning_rate": 8.140324057906078e-07,
      "loss": 0.0101,
      "step": 2808180
    },
    {
      "epoch": 4.595680891315306,
      "grad_norm": 0.14927977323532104,
      "learning_rate": 8.139665135770907e-07,
      "loss": 0.0133,
      "step": 2808200
    },
    {
      "epoch": 4.5957136217539585,
      "grad_norm": 0.2328682541847229,
      "learning_rate": 8.139006213635735e-07,
      "loss": 0.0099,
      "step": 2808220
    },
    {
      "epoch": 4.595746352192612,
      "grad_norm": 0.1570349633693695,
      "learning_rate": 8.138347291500565e-07,
      "loss": 0.0078,
      "step": 2808240
    },
    {
      "epoch": 4.595779082631266,
      "grad_norm": 0.24260875582695007,
      "learning_rate": 8.137688369365393e-07,
      "loss": 0.0104,
      "step": 2808260
    },
    {
      "epoch": 4.595811813069918,
      "grad_norm": 0.12036096304655075,
      "learning_rate": 8.137029447230222e-07,
      "loss": 0.0093,
      "step": 2808280
    },
    {
      "epoch": 4.595844543508572,
      "grad_norm": 0.1451442688703537,
      "learning_rate": 8.13637052509505e-07,
      "loss": 0.0074,
      "step": 2808300
    },
    {
      "epoch": 4.595877273947226,
      "grad_norm": 0.41723203659057617,
      "learning_rate": 8.13571160295988e-07,
      "loss": 0.0078,
      "step": 2808320
    },
    {
      "epoch": 4.595910004385879,
      "grad_norm": 0.06150299310684204,
      "learning_rate": 8.135052680824708e-07,
      "loss": 0.0116,
      "step": 2808340
    },
    {
      "epoch": 4.595942734824532,
      "grad_norm": 0.21064577996730804,
      "learning_rate": 8.134393758689536e-07,
      "loss": 0.0088,
      "step": 2808360
    },
    {
      "epoch": 4.5959754652631855,
      "grad_norm": 0.27469494938850403,
      "learning_rate": 8.133734836554365e-07,
      "loss": 0.0065,
      "step": 2808380
    },
    {
      "epoch": 4.596008195701839,
      "grad_norm": 0.39625319838523865,
      "learning_rate": 8.133075914419193e-07,
      "loss": 0.0113,
      "step": 2808400
    },
    {
      "epoch": 4.596040926140492,
      "grad_norm": 0.37410813570022583,
      "learning_rate": 8.132416992284023e-07,
      "loss": 0.0097,
      "step": 2808420
    },
    {
      "epoch": 4.596073656579145,
      "grad_norm": 0.19732074439525604,
      "learning_rate": 8.131758070148851e-07,
      "loss": 0.0099,
      "step": 2808440
    },
    {
      "epoch": 4.596106387017799,
      "grad_norm": 0.387318879365921,
      "learning_rate": 8.13109914801368e-07,
      "loss": 0.009,
      "step": 2808460
    },
    {
      "epoch": 4.5961391174564525,
      "grad_norm": 0.20686082541942596,
      "learning_rate": 8.130440225878508e-07,
      "loss": 0.0065,
      "step": 2808480
    },
    {
      "epoch": 4.596171847895105,
      "grad_norm": 0.1649172455072403,
      "learning_rate": 8.129781303743338e-07,
      "loss": 0.0068,
      "step": 2808500
    },
    {
      "epoch": 4.596204578333759,
      "grad_norm": 0.1485898345708847,
      "learning_rate": 8.129122381608166e-07,
      "loss": 0.0075,
      "step": 2808520
    },
    {
      "epoch": 4.596237308772412,
      "grad_norm": 0.08058926463127136,
      "learning_rate": 8.128463459472995e-07,
      "loss": 0.0141,
      "step": 2808540
    },
    {
      "epoch": 4.596270039211065,
      "grad_norm": 0.2582065761089325,
      "learning_rate": 8.127804537337823e-07,
      "loss": 0.0088,
      "step": 2808560
    },
    {
      "epoch": 4.596302769649719,
      "grad_norm": 0.15177440643310547,
      "learning_rate": 8.127145615202652e-07,
      "loss": 0.0083,
      "step": 2808580
    },
    {
      "epoch": 4.596335500088372,
      "grad_norm": 0.41166236996650696,
      "learning_rate": 8.126486693067481e-07,
      "loss": 0.0072,
      "step": 2808600
    },
    {
      "epoch": 4.596368230527025,
      "grad_norm": 0.11029994487762451,
      "learning_rate": 8.12582777093231e-07,
      "loss": 0.0104,
      "step": 2808620
    },
    {
      "epoch": 4.596400960965679,
      "grad_norm": 0.07402864098548889,
      "learning_rate": 8.125168848797138e-07,
      "loss": 0.0091,
      "step": 2808640
    },
    {
      "epoch": 4.596433691404332,
      "grad_norm": 0.323635071516037,
      "learning_rate": 8.124509926661967e-07,
      "loss": 0.0093,
      "step": 2808660
    },
    {
      "epoch": 4.596466421842986,
      "grad_norm": 0.11832933872938156,
      "learning_rate": 8.123851004526796e-07,
      "loss": 0.011,
      "step": 2808680
    },
    {
      "epoch": 4.5964991522816385,
      "grad_norm": 0.16496197879314423,
      "learning_rate": 8.123192082391624e-07,
      "loss": 0.012,
      "step": 2808700
    },
    {
      "epoch": 4.596531882720292,
      "grad_norm": 0.7385041117668152,
      "learning_rate": 8.122533160256453e-07,
      "loss": 0.0102,
      "step": 2808720
    },
    {
      "epoch": 4.596564613158946,
      "grad_norm": 0.0736120417714119,
      "learning_rate": 8.121874238121281e-07,
      "loss": 0.0105,
      "step": 2808740
    },
    {
      "epoch": 4.596597343597599,
      "grad_norm": 0.21015171706676483,
      "learning_rate": 8.121215315986111e-07,
      "loss": 0.008,
      "step": 2808760
    },
    {
      "epoch": 4.596630074036252,
      "grad_norm": 0.22536157071590424,
      "learning_rate": 8.120556393850939e-07,
      "loss": 0.01,
      "step": 2808780
    },
    {
      "epoch": 4.596662804474906,
      "grad_norm": 0.2520465552806854,
      "learning_rate": 8.119897471715768e-07,
      "loss": 0.0098,
      "step": 2808800
    },
    {
      "epoch": 4.596695534913559,
      "grad_norm": 0.14806661009788513,
      "learning_rate": 8.119238549580596e-07,
      "loss": 0.0087,
      "step": 2808820
    },
    {
      "epoch": 4.596728265352212,
      "grad_norm": 0.04597130045294762,
      "learning_rate": 8.118579627445425e-07,
      "loss": 0.0083,
      "step": 2808840
    },
    {
      "epoch": 4.5967609957908655,
      "grad_norm": 0.3087999224662781,
      "learning_rate": 8.117920705310254e-07,
      "loss": 0.0069,
      "step": 2808860
    },
    {
      "epoch": 4.596793726229519,
      "grad_norm": 0.10458170622587204,
      "learning_rate": 8.117261783175083e-07,
      "loss": 0.0104,
      "step": 2808880
    },
    {
      "epoch": 4.596826456668172,
      "grad_norm": 0.33661261200904846,
      "learning_rate": 8.116602861039911e-07,
      "loss": 0.0065,
      "step": 2808900
    },
    {
      "epoch": 4.596859187106825,
      "grad_norm": 0.13234132528305054,
      "learning_rate": 8.11594393890474e-07,
      "loss": 0.0113,
      "step": 2808920
    },
    {
      "epoch": 4.596891917545479,
      "grad_norm": 0.1720302253961563,
      "learning_rate": 8.115285016769569e-07,
      "loss": 0.0143,
      "step": 2808940
    },
    {
      "epoch": 4.596924647984133,
      "grad_norm": 0.06644067168235779,
      "learning_rate": 8.114626094634398e-07,
      "loss": 0.0073,
      "step": 2808960
    },
    {
      "epoch": 4.596957378422785,
      "grad_norm": 0.20762653648853302,
      "learning_rate": 8.113967172499226e-07,
      "loss": 0.0138,
      "step": 2808980
    },
    {
      "epoch": 4.596990108861439,
      "grad_norm": 0.17572572827339172,
      "learning_rate": 8.113308250364055e-07,
      "loss": 0.0084,
      "step": 2809000
    },
    {
      "epoch": 4.5970228393000925,
      "grad_norm": 0.5235667824745178,
      "learning_rate": 8.112649328228883e-07,
      "loss": 0.0093,
      "step": 2809020
    },
    {
      "epoch": 4.597055569738746,
      "grad_norm": 0.14951461553573608,
      "learning_rate": 8.111990406093712e-07,
      "loss": 0.0096,
      "step": 2809040
    },
    {
      "epoch": 4.597088300177399,
      "grad_norm": 0.14711330831050873,
      "learning_rate": 8.111331483958541e-07,
      "loss": 0.0081,
      "step": 2809060
    },
    {
      "epoch": 4.597121030616052,
      "grad_norm": 0.36040744185447693,
      "learning_rate": 8.110672561823369e-07,
      "loss": 0.0086,
      "step": 2809080
    },
    {
      "epoch": 4.597153761054706,
      "grad_norm": 0.08988771587610245,
      "learning_rate": 8.110013639688198e-07,
      "loss": 0.0078,
      "step": 2809100
    },
    {
      "epoch": 4.597186491493359,
      "grad_norm": 0.21573588252067566,
      "learning_rate": 8.109354717553028e-07,
      "loss": 0.0093,
      "step": 2809120
    },
    {
      "epoch": 4.597219221932012,
      "grad_norm": 0.17951835691928864,
      "learning_rate": 8.108695795417856e-07,
      "loss": 0.0092,
      "step": 2809140
    },
    {
      "epoch": 4.597251952370666,
      "grad_norm": 0.1124655157327652,
      "learning_rate": 8.108036873282684e-07,
      "loss": 0.0056,
      "step": 2809160
    },
    {
      "epoch": 4.597284682809319,
      "grad_norm": 0.12982867658138275,
      "learning_rate": 8.107377951147513e-07,
      "loss": 0.0101,
      "step": 2809180
    },
    {
      "epoch": 4.597317413247972,
      "grad_norm": 0.18958784639835358,
      "learning_rate": 8.106719029012343e-07,
      "loss": 0.0106,
      "step": 2809200
    },
    {
      "epoch": 4.597350143686626,
      "grad_norm": 0.23366667330265045,
      "learning_rate": 8.106060106877172e-07,
      "loss": 0.0096,
      "step": 2809220
    },
    {
      "epoch": 4.597382874125279,
      "grad_norm": 0.5019532442092896,
      "learning_rate": 8.105401184742e-07,
      "loss": 0.0056,
      "step": 2809240
    },
    {
      "epoch": 4.597415604563932,
      "grad_norm": 0.19994212687015533,
      "learning_rate": 8.104742262606828e-07,
      "loss": 0.0077,
      "step": 2809260
    },
    {
      "epoch": 4.597448335002586,
      "grad_norm": 0.3352225720882416,
      "learning_rate": 8.104083340471656e-07,
      "loss": 0.0073,
      "step": 2809280
    },
    {
      "epoch": 4.597481065441239,
      "grad_norm": 0.2267441302537918,
      "learning_rate": 8.103424418336487e-07,
      "loss": 0.0097,
      "step": 2809300
    },
    {
      "epoch": 4.597513795879893,
      "grad_norm": 0.1845676749944687,
      "learning_rate": 8.102765496201315e-07,
      "loss": 0.0061,
      "step": 2809320
    },
    {
      "epoch": 4.5975465263185455,
      "grad_norm": 0.18809828162193298,
      "learning_rate": 8.102106574066144e-07,
      "loss": 0.0098,
      "step": 2809340
    },
    {
      "epoch": 4.597579256757199,
      "grad_norm": 0.24391604959964752,
      "learning_rate": 8.101447651930972e-07,
      "loss": 0.0114,
      "step": 2809360
    },
    {
      "epoch": 4.597611987195853,
      "grad_norm": 0.15244580805301666,
      "learning_rate": 8.100788729795802e-07,
      "loss": 0.0079,
      "step": 2809380
    },
    {
      "epoch": 4.597644717634505,
      "grad_norm": 0.11046545207500458,
      "learning_rate": 8.10012980766063e-07,
      "loss": 0.0098,
      "step": 2809400
    },
    {
      "epoch": 4.597677448073159,
      "grad_norm": 0.14412850141525269,
      "learning_rate": 8.099470885525458e-07,
      "loss": 0.0087,
      "step": 2809420
    },
    {
      "epoch": 4.597710178511813,
      "grad_norm": 0.41355571150779724,
      "learning_rate": 8.098811963390287e-07,
      "loss": 0.0078,
      "step": 2809440
    },
    {
      "epoch": 4.597742908950465,
      "grad_norm": 0.1457449346780777,
      "learning_rate": 8.098153041255115e-07,
      "loss": 0.009,
      "step": 2809460
    },
    {
      "epoch": 4.597775639389119,
      "grad_norm": 0.13068938255310059,
      "learning_rate": 8.097494119119945e-07,
      "loss": 0.0068,
      "step": 2809480
    },
    {
      "epoch": 4.5978083698277725,
      "grad_norm": 0.16867990791797638,
      "learning_rate": 8.096835196984773e-07,
      "loss": 0.012,
      "step": 2809500
    },
    {
      "epoch": 4.597841100266426,
      "grad_norm": 0.204071506857872,
      "learning_rate": 8.096176274849602e-07,
      "loss": 0.0086,
      "step": 2809520
    },
    {
      "epoch": 4.597873830705079,
      "grad_norm": 0.30559563636779785,
      "learning_rate": 8.09551735271443e-07,
      "loss": 0.0065,
      "step": 2809540
    },
    {
      "epoch": 4.597906561143732,
      "grad_norm": 0.22005631029605865,
      "learning_rate": 8.09485843057926e-07,
      "loss": 0.0095,
      "step": 2809560
    },
    {
      "epoch": 4.597939291582386,
      "grad_norm": 0.23357486724853516,
      "learning_rate": 8.094199508444088e-07,
      "loss": 0.0093,
      "step": 2809580
    },
    {
      "epoch": 4.59797202202104,
      "grad_norm": 0.24657602608203888,
      "learning_rate": 8.093540586308917e-07,
      "loss": 0.0099,
      "step": 2809600
    },
    {
      "epoch": 4.598004752459692,
      "grad_norm": 0.19707529246807098,
      "learning_rate": 8.092881664173745e-07,
      "loss": 0.0058,
      "step": 2809620
    },
    {
      "epoch": 4.598037482898346,
      "grad_norm": 0.22847731411457062,
      "learning_rate": 8.092222742038575e-07,
      "loss": 0.0126,
      "step": 2809640
    },
    {
      "epoch": 4.5980702133369995,
      "grad_norm": 0.2871752679347992,
      "learning_rate": 8.091563819903403e-07,
      "loss": 0.0087,
      "step": 2809660
    },
    {
      "epoch": 4.598102943775652,
      "grad_norm": 0.07419804483652115,
      "learning_rate": 8.090904897768232e-07,
      "loss": 0.0099,
      "step": 2809680
    },
    {
      "epoch": 4.598135674214306,
      "grad_norm": 0.2625192105770111,
      "learning_rate": 8.09024597563306e-07,
      "loss": 0.0104,
      "step": 2809700
    },
    {
      "epoch": 4.598168404652959,
      "grad_norm": 0.0420583039522171,
      "learning_rate": 8.089587053497888e-07,
      "loss": 0.009,
      "step": 2809720
    },
    {
      "epoch": 4.598201135091612,
      "grad_norm": 0.4260207414627075,
      "learning_rate": 8.088928131362718e-07,
      "loss": 0.0084,
      "step": 2809740
    },
    {
      "epoch": 4.598233865530266,
      "grad_norm": 0.7644304037094116,
      "learning_rate": 8.088269209227546e-07,
      "loss": 0.0101,
      "step": 2809760
    },
    {
      "epoch": 4.598266595968919,
      "grad_norm": 0.14467492699623108,
      "learning_rate": 8.087610287092375e-07,
      "loss": 0.0107,
      "step": 2809780
    },
    {
      "epoch": 4.598299326407573,
      "grad_norm": 0.6543405055999756,
      "learning_rate": 8.086951364957203e-07,
      "loss": 0.0096,
      "step": 2809800
    },
    {
      "epoch": 4.598332056846226,
      "grad_norm": 0.39928221702575684,
      "learning_rate": 8.086292442822033e-07,
      "loss": 0.0117,
      "step": 2809820
    },
    {
      "epoch": 4.598364787284879,
      "grad_norm": 0.22597166895866394,
      "learning_rate": 8.085633520686861e-07,
      "loss": 0.0078,
      "step": 2809840
    },
    {
      "epoch": 4.598397517723533,
      "grad_norm": 0.6475209593772888,
      "learning_rate": 8.08497459855169e-07,
      "loss": 0.01,
      "step": 2809860
    },
    {
      "epoch": 4.5984302481621855,
      "grad_norm": 0.7873013019561768,
      "learning_rate": 8.084315676416518e-07,
      "loss": 0.0123,
      "step": 2809880
    },
    {
      "epoch": 4.598462978600839,
      "grad_norm": 0.21076448261737823,
      "learning_rate": 8.083656754281347e-07,
      "loss": 0.011,
      "step": 2809900
    },
    {
      "epoch": 4.598495709039493,
      "grad_norm": 0.0556517094373703,
      "learning_rate": 8.082997832146176e-07,
      "loss": 0.0109,
      "step": 2809920
    },
    {
      "epoch": 4.598528439478146,
      "grad_norm": 0.09162163734436035,
      "learning_rate": 8.082338910011005e-07,
      "loss": 0.0107,
      "step": 2809940
    },
    {
      "epoch": 4.598561169916799,
      "grad_norm": 0.30262815952301025,
      "learning_rate": 8.081679987875833e-07,
      "loss": 0.011,
      "step": 2809960
    },
    {
      "epoch": 4.598593900355453,
      "grad_norm": 0.4479289650917053,
      "learning_rate": 8.081021065740662e-07,
      "loss": 0.0096,
      "step": 2809980
    },
    {
      "epoch": 4.598626630794106,
      "grad_norm": 0.389871746301651,
      "learning_rate": 8.080362143605491e-07,
      "loss": 0.0066,
      "step": 2810000
    },
    {
      "epoch": 4.598659361232759,
      "grad_norm": 0.42747220396995544,
      "learning_rate": 8.07970322147032e-07,
      "loss": 0.0083,
      "step": 2810020
    },
    {
      "epoch": 4.5986920916714125,
      "grad_norm": 0.17863398790359497,
      "learning_rate": 8.079044299335148e-07,
      "loss": 0.008,
      "step": 2810040
    },
    {
      "epoch": 4.598724822110066,
      "grad_norm": 0.19647595286369324,
      "learning_rate": 8.078385377199976e-07,
      "loss": 0.0087,
      "step": 2810060
    },
    {
      "epoch": 4.598757552548719,
      "grad_norm": 0.16472847759723663,
      "learning_rate": 8.077726455064806e-07,
      "loss": 0.0081,
      "step": 2810080
    },
    {
      "epoch": 4.598790282987372,
      "grad_norm": 0.20614716410636902,
      "learning_rate": 8.077067532929634e-07,
      "loss": 0.0096,
      "step": 2810100
    },
    {
      "epoch": 4.598823013426026,
      "grad_norm": 0.7085899114608765,
      "learning_rate": 8.076408610794463e-07,
      "loss": 0.0105,
      "step": 2810120
    },
    {
      "epoch": 4.5988557438646795,
      "grad_norm": 0.3426344096660614,
      "learning_rate": 8.075749688659291e-07,
      "loss": 0.0085,
      "step": 2810140
    },
    {
      "epoch": 4.598888474303332,
      "grad_norm": 0.14655420184135437,
      "learning_rate": 8.07509076652412e-07,
      "loss": 0.0134,
      "step": 2810160
    },
    {
      "epoch": 4.598921204741986,
      "grad_norm": 0.27854764461517334,
      "learning_rate": 8.074431844388949e-07,
      "loss": 0.0091,
      "step": 2810180
    },
    {
      "epoch": 4.598953935180639,
      "grad_norm": 0.784451961517334,
      "learning_rate": 8.073772922253778e-07,
      "loss": 0.0089,
      "step": 2810200
    },
    {
      "epoch": 4.598986665619293,
      "grad_norm": 0.2605743706226349,
      "learning_rate": 8.073114000118606e-07,
      "loss": 0.0089,
      "step": 2810220
    },
    {
      "epoch": 4.599019396057946,
      "grad_norm": 0.2837074398994446,
      "learning_rate": 8.072455077983435e-07,
      "loss": 0.0104,
      "step": 2810240
    },
    {
      "epoch": 4.599052126496599,
      "grad_norm": 0.10757782310247421,
      "learning_rate": 8.071796155848264e-07,
      "loss": 0.0065,
      "step": 2810260
    },
    {
      "epoch": 4.599084856935253,
      "grad_norm": 0.18686233460903168,
      "learning_rate": 8.071137233713093e-07,
      "loss": 0.0102,
      "step": 2810280
    },
    {
      "epoch": 4.599117587373906,
      "grad_norm": 0.11476597189903259,
      "learning_rate": 8.070478311577921e-07,
      "loss": 0.0117,
      "step": 2810300
    },
    {
      "epoch": 4.599150317812559,
      "grad_norm": 0.23392683267593384,
      "learning_rate": 8.06981938944275e-07,
      "loss": 0.0112,
      "step": 2810320
    },
    {
      "epoch": 4.599183048251213,
      "grad_norm": 0.20128919184207916,
      "learning_rate": 8.069160467307578e-07,
      "loss": 0.0083,
      "step": 2810340
    },
    {
      "epoch": 4.5992157786898655,
      "grad_norm": 0.20179332792758942,
      "learning_rate": 8.068501545172408e-07,
      "loss": 0.0091,
      "step": 2810360
    },
    {
      "epoch": 4.599248509128519,
      "grad_norm": 0.17449431121349335,
      "learning_rate": 8.067842623037236e-07,
      "loss": 0.0073,
      "step": 2810380
    },
    {
      "epoch": 4.599281239567173,
      "grad_norm": 0.2890693247318268,
      "learning_rate": 8.067183700902064e-07,
      "loss": 0.009,
      "step": 2810400
    },
    {
      "epoch": 4.599313970005826,
      "grad_norm": 0.24348431825637817,
      "learning_rate": 8.066524778766893e-07,
      "loss": 0.0058,
      "step": 2810420
    },
    {
      "epoch": 4.599346700444479,
      "grad_norm": 0.3155883550643921,
      "learning_rate": 8.065865856631723e-07,
      "loss": 0.0108,
      "step": 2810440
    },
    {
      "epoch": 4.599379430883133,
      "grad_norm": 0.08725456893444061,
      "learning_rate": 8.065206934496551e-07,
      "loss": 0.0085,
      "step": 2810460
    },
    {
      "epoch": 4.599412161321786,
      "grad_norm": 0.19204023480415344,
      "learning_rate": 8.064548012361379e-07,
      "loss": 0.0096,
      "step": 2810480
    },
    {
      "epoch": 4.59944489176044,
      "grad_norm": 0.12959659099578857,
      "learning_rate": 8.063889090226208e-07,
      "loss": 0.0077,
      "step": 2810500
    },
    {
      "epoch": 4.5994776221990925,
      "grad_norm": 0.3492696285247803,
      "learning_rate": 8.063230168091038e-07,
      "loss": 0.0083,
      "step": 2810520
    },
    {
      "epoch": 4.599510352637746,
      "grad_norm": 0.2561986744403839,
      "learning_rate": 8.062571245955867e-07,
      "loss": 0.0093,
      "step": 2810540
    },
    {
      "epoch": 4.5995430830764,
      "grad_norm": 0.3594859540462494,
      "learning_rate": 8.061912323820695e-07,
      "loss": 0.0103,
      "step": 2810560
    },
    {
      "epoch": 4.599575813515052,
      "grad_norm": 0.42324963212013245,
      "learning_rate": 8.061253401685523e-07,
      "loss": 0.0116,
      "step": 2810580
    },
    {
      "epoch": 4.599608543953706,
      "grad_norm": 0.45254287123680115,
      "learning_rate": 8.060594479550351e-07,
      "loss": 0.0085,
      "step": 2810600
    },
    {
      "epoch": 4.59964127439236,
      "grad_norm": 0.27440428733825684,
      "learning_rate": 8.059935557415182e-07,
      "loss": 0.0096,
      "step": 2810620
    },
    {
      "epoch": 4.599674004831012,
      "grad_norm": 0.10697028785943985,
      "learning_rate": 8.05927663528001e-07,
      "loss": 0.0084,
      "step": 2810640
    },
    {
      "epoch": 4.599706735269666,
      "grad_norm": 0.5161225199699402,
      "learning_rate": 8.058617713144839e-07,
      "loss": 0.0092,
      "step": 2810660
    },
    {
      "epoch": 4.5997394657083195,
      "grad_norm": 0.23518739640712738,
      "learning_rate": 8.057958791009667e-07,
      "loss": 0.0137,
      "step": 2810680
    },
    {
      "epoch": 4.599772196146973,
      "grad_norm": 0.3081737458705902,
      "learning_rate": 8.057299868874497e-07,
      "loss": 0.0102,
      "step": 2810700
    },
    {
      "epoch": 4.599804926585626,
      "grad_norm": 0.27089589834213257,
      "learning_rate": 8.056640946739325e-07,
      "loss": 0.008,
      "step": 2810720
    },
    {
      "epoch": 4.599837657024279,
      "grad_norm": 0.38533762097358704,
      "learning_rate": 8.055982024604153e-07,
      "loss": 0.0103,
      "step": 2810740
    },
    {
      "epoch": 4.599870387462933,
      "grad_norm": 0.6538569331169128,
      "learning_rate": 8.055323102468982e-07,
      "loss": 0.0059,
      "step": 2810760
    },
    {
      "epoch": 4.599903117901587,
      "grad_norm": 0.1466427445411682,
      "learning_rate": 8.05466418033381e-07,
      "loss": 0.0118,
      "step": 2810780
    },
    {
      "epoch": 4.599935848340239,
      "grad_norm": 0.18273243308067322,
      "learning_rate": 8.05400525819864e-07,
      "loss": 0.006,
      "step": 2810800
    },
    {
      "epoch": 4.599968578778893,
      "grad_norm": 0.3082602620124817,
      "learning_rate": 8.053346336063468e-07,
      "loss": 0.01,
      "step": 2810820
    },
    {
      "epoch": 4.6000013092175465,
      "grad_norm": 0.06150272861123085,
      "learning_rate": 8.052687413928297e-07,
      "loss": 0.0097,
      "step": 2810840
    },
    {
      "epoch": 4.600034039656199,
      "grad_norm": 0.2886333763599396,
      "learning_rate": 8.052028491793125e-07,
      "loss": 0.0132,
      "step": 2810860
    },
    {
      "epoch": 4.600066770094853,
      "grad_norm": 0.24766814708709717,
      "learning_rate": 8.051369569657955e-07,
      "loss": 0.0073,
      "step": 2810880
    },
    {
      "epoch": 4.600099500533506,
      "grad_norm": 0.21598684787750244,
      "learning_rate": 8.050710647522783e-07,
      "loss": 0.0107,
      "step": 2810900
    },
    {
      "epoch": 4.600132230972159,
      "grad_norm": 0.45131486654281616,
      "learning_rate": 8.050051725387612e-07,
      "loss": 0.0081,
      "step": 2810920
    },
    {
      "epoch": 4.600164961410813,
      "grad_norm": 0.28546828031539917,
      "learning_rate": 8.04939280325244e-07,
      "loss": 0.0116,
      "step": 2810940
    },
    {
      "epoch": 4.600197691849466,
      "grad_norm": 0.05403418466448784,
      "learning_rate": 8.04873388111727e-07,
      "loss": 0.0105,
      "step": 2810960
    },
    {
      "epoch": 4.60023042228812,
      "grad_norm": 0.1896420419216156,
      "learning_rate": 8.048074958982098e-07,
      "loss": 0.0081,
      "step": 2810980
    },
    {
      "epoch": 4.6002631527267726,
      "grad_norm": 0.19706128537654877,
      "learning_rate": 8.047416036846927e-07,
      "loss": 0.0098,
      "step": 2811000
    },
    {
      "epoch": 4.600295883165426,
      "grad_norm": 0.5716948509216309,
      "learning_rate": 8.046757114711755e-07,
      "loss": 0.0113,
      "step": 2811020
    },
    {
      "epoch": 4.60032861360408,
      "grad_norm": 0.21171607077121735,
      "learning_rate": 8.046098192576584e-07,
      "loss": 0.0067,
      "step": 2811040
    },
    {
      "epoch": 4.600361344042733,
      "grad_norm": 0.24744021892547607,
      "learning_rate": 8.045439270441413e-07,
      "loss": 0.0074,
      "step": 2811060
    },
    {
      "epoch": 4.600394074481386,
      "grad_norm": 0.47548016905784607,
      "learning_rate": 8.044780348306241e-07,
      "loss": 0.0068,
      "step": 2811080
    },
    {
      "epoch": 4.60042680492004,
      "grad_norm": 0.16123256087303162,
      "learning_rate": 8.04412142617107e-07,
      "loss": 0.009,
      "step": 2811100
    },
    {
      "epoch": 4.600459535358693,
      "grad_norm": 0.20684492588043213,
      "learning_rate": 8.043462504035898e-07,
      "loss": 0.0086,
      "step": 2811120
    },
    {
      "epoch": 4.600492265797346,
      "grad_norm": 0.2858218252658844,
      "learning_rate": 8.042803581900728e-07,
      "loss": 0.008,
      "step": 2811140
    },
    {
      "epoch": 4.6005249962359995,
      "grad_norm": 0.24122680723667145,
      "learning_rate": 8.042144659765556e-07,
      "loss": 0.0098,
      "step": 2811160
    },
    {
      "epoch": 4.600557726674653,
      "grad_norm": 0.03936680033802986,
      "learning_rate": 8.041485737630385e-07,
      "loss": 0.0061,
      "step": 2811180
    },
    {
      "epoch": 4.600590457113306,
      "grad_norm": 0.2377828061580658,
      "learning_rate": 8.040826815495213e-07,
      "loss": 0.0067,
      "step": 2811200
    },
    {
      "epoch": 4.600623187551959,
      "grad_norm": 0.2945185601711273,
      "learning_rate": 8.040167893360042e-07,
      "loss": 0.0093,
      "step": 2811220
    },
    {
      "epoch": 4.600655917990613,
      "grad_norm": 0.3116539418697357,
      "learning_rate": 8.039508971224871e-07,
      "loss": 0.0084,
      "step": 2811240
    },
    {
      "epoch": 4.600688648429267,
      "grad_norm": 0.055248111486434937,
      "learning_rate": 8.0388500490897e-07,
      "loss": 0.0085,
      "step": 2811260
    },
    {
      "epoch": 4.600721378867919,
      "grad_norm": 0.2892777621746063,
      "learning_rate": 8.038191126954528e-07,
      "loss": 0.007,
      "step": 2811280
    },
    {
      "epoch": 4.600754109306573,
      "grad_norm": 0.4948984980583191,
      "learning_rate": 8.037532204819357e-07,
      "loss": 0.0081,
      "step": 2811300
    },
    {
      "epoch": 4.6007868397452265,
      "grad_norm": 0.27761808037757874,
      "learning_rate": 8.036873282684186e-07,
      "loss": 0.0066,
      "step": 2811320
    },
    {
      "epoch": 4.60081957018388,
      "grad_norm": 0.368507444858551,
      "learning_rate": 8.036214360549015e-07,
      "loss": 0.011,
      "step": 2811340
    },
    {
      "epoch": 4.600852300622533,
      "grad_norm": 0.2003839910030365,
      "learning_rate": 8.035555438413843e-07,
      "loss": 0.013,
      "step": 2811360
    },
    {
      "epoch": 4.600885031061186,
      "grad_norm": 1.057888150215149,
      "learning_rate": 8.034896516278672e-07,
      "loss": 0.0091,
      "step": 2811380
    },
    {
      "epoch": 4.60091776149984,
      "grad_norm": 0.13571202754974365,
      "learning_rate": 8.034237594143501e-07,
      "loss": 0.0083,
      "step": 2811400
    },
    {
      "epoch": 4.600950491938493,
      "grad_norm": 0.2743152678012848,
      "learning_rate": 8.03357867200833e-07,
      "loss": 0.009,
      "step": 2811420
    },
    {
      "epoch": 4.600983222377146,
      "grad_norm": 0.35883957147598267,
      "learning_rate": 8.032919749873158e-07,
      "loss": 0.0055,
      "step": 2811440
    },
    {
      "epoch": 4.6010159528158,
      "grad_norm": 0.07723790407180786,
      "learning_rate": 8.032260827737986e-07,
      "loss": 0.008,
      "step": 2811460
    },
    {
      "epoch": 4.601048683254453,
      "grad_norm": 0.31255555152893066,
      "learning_rate": 8.031601905602815e-07,
      "loss": 0.01,
      "step": 2811480
    },
    {
      "epoch": 4.601081413693106,
      "grad_norm": 0.6695748567581177,
      "learning_rate": 8.030942983467644e-07,
      "loss": 0.0055,
      "step": 2811500
    },
    {
      "epoch": 4.60111414413176,
      "grad_norm": 0.10108940303325653,
      "learning_rate": 8.030284061332473e-07,
      "loss": 0.0076,
      "step": 2811520
    },
    {
      "epoch": 4.601146874570413,
      "grad_norm": 0.18097494542598724,
      "learning_rate": 8.029625139197301e-07,
      "loss": 0.0097,
      "step": 2811540
    },
    {
      "epoch": 4.601179605009066,
      "grad_norm": 0.4999811351299286,
      "learning_rate": 8.02896621706213e-07,
      "loss": 0.0067,
      "step": 2811560
    },
    {
      "epoch": 4.60121233544772,
      "grad_norm": 0.4020523726940155,
      "learning_rate": 8.028307294926959e-07,
      "loss": 0.007,
      "step": 2811580
    },
    {
      "epoch": 4.601245065886373,
      "grad_norm": 0.10219227522611618,
      "learning_rate": 8.027648372791788e-07,
      "loss": 0.0103,
      "step": 2811600
    },
    {
      "epoch": 4.601277796325026,
      "grad_norm": 0.595045804977417,
      "learning_rate": 8.026989450656616e-07,
      "loss": 0.0084,
      "step": 2811620
    },
    {
      "epoch": 4.60131052676368,
      "grad_norm": 0.2627300024032593,
      "learning_rate": 8.026330528521445e-07,
      "loss": 0.0098,
      "step": 2811640
    },
    {
      "epoch": 4.601343257202333,
      "grad_norm": 0.21347230672836304,
      "learning_rate": 8.025671606386275e-07,
      "loss": 0.0096,
      "step": 2811660
    },
    {
      "epoch": 4.601375987640987,
      "grad_norm": 0.11125126481056213,
      "learning_rate": 8.025012684251103e-07,
      "loss": 0.0081,
      "step": 2811680
    },
    {
      "epoch": 4.6014087180796395,
      "grad_norm": 0.10418780148029327,
      "learning_rate": 8.024353762115931e-07,
      "loss": 0.0135,
      "step": 2811700
    },
    {
      "epoch": 4.601441448518293,
      "grad_norm": 0.37066519260406494,
      "learning_rate": 8.02369483998076e-07,
      "loss": 0.0131,
      "step": 2811720
    },
    {
      "epoch": 4.601474178956947,
      "grad_norm": 0.4213149845600128,
      "learning_rate": 8.023035917845588e-07,
      "loss": 0.013,
      "step": 2811740
    },
    {
      "epoch": 4.601506909395599,
      "grad_norm": 0.5233662128448486,
      "learning_rate": 8.022376995710419e-07,
      "loss": 0.01,
      "step": 2811760
    },
    {
      "epoch": 4.601539639834253,
      "grad_norm": 0.33916521072387695,
      "learning_rate": 8.021718073575246e-07,
      "loss": 0.0128,
      "step": 2811780
    },
    {
      "epoch": 4.6015723702729066,
      "grad_norm": 0.08396244794130325,
      "learning_rate": 8.021059151440074e-07,
      "loss": 0.0088,
      "step": 2811800
    },
    {
      "epoch": 4.601605100711559,
      "grad_norm": 0.16573704779148102,
      "learning_rate": 8.020400229304903e-07,
      "loss": 0.0059,
      "step": 2811820
    },
    {
      "epoch": 4.601637831150213,
      "grad_norm": 0.19937226176261902,
      "learning_rate": 8.019741307169733e-07,
      "loss": 0.0062,
      "step": 2811840
    },
    {
      "epoch": 4.6016705615888664,
      "grad_norm": 0.3068931996822357,
      "learning_rate": 8.019082385034562e-07,
      "loss": 0.0101,
      "step": 2811860
    },
    {
      "epoch": 4.60170329202752,
      "grad_norm": 0.10206592828035355,
      "learning_rate": 8.01842346289939e-07,
      "loss": 0.0092,
      "step": 2811880
    },
    {
      "epoch": 4.601736022466173,
      "grad_norm": 0.3463327884674072,
      "learning_rate": 8.017764540764218e-07,
      "loss": 0.0066,
      "step": 2811900
    },
    {
      "epoch": 4.601768752904826,
      "grad_norm": 0.32508519291877747,
      "learning_rate": 8.017105618629046e-07,
      "loss": 0.0052,
      "step": 2811920
    },
    {
      "epoch": 4.60180148334348,
      "grad_norm": 0.53475022315979,
      "learning_rate": 8.016446696493877e-07,
      "loss": 0.0096,
      "step": 2811940
    },
    {
      "epoch": 4.6018342137821335,
      "grad_norm": 0.20759405195713043,
      "learning_rate": 8.015787774358705e-07,
      "loss": 0.0079,
      "step": 2811960
    },
    {
      "epoch": 4.601866944220786,
      "grad_norm": 0.17422080039978027,
      "learning_rate": 8.015128852223534e-07,
      "loss": 0.0094,
      "step": 2811980
    },
    {
      "epoch": 4.60189967465944,
      "grad_norm": 0.12370754778385162,
      "learning_rate": 8.014469930088362e-07,
      "loss": 0.0097,
      "step": 2812000
    },
    {
      "epoch": 4.601932405098093,
      "grad_norm": 0.22967670857906342,
      "learning_rate": 8.013811007953192e-07,
      "loss": 0.0072,
      "step": 2812020
    },
    {
      "epoch": 4.601965135536746,
      "grad_norm": 0.09289825707674026,
      "learning_rate": 8.01315208581802e-07,
      "loss": 0.0096,
      "step": 2812040
    },
    {
      "epoch": 4.6019978659754,
      "grad_norm": 0.20789557695388794,
      "learning_rate": 8.012493163682849e-07,
      "loss": 0.0113,
      "step": 2812060
    },
    {
      "epoch": 4.602030596414053,
      "grad_norm": 0.15767338871955872,
      "learning_rate": 8.011834241547677e-07,
      "loss": 0.0106,
      "step": 2812080
    },
    {
      "epoch": 4.602063326852706,
      "grad_norm": 0.20190496742725372,
      "learning_rate": 8.011175319412507e-07,
      "loss": 0.0074,
      "step": 2812100
    },
    {
      "epoch": 4.60209605729136,
      "grad_norm": 0.12372089922428131,
      "learning_rate": 8.010516397277335e-07,
      "loss": 0.0121,
      "step": 2812120
    },
    {
      "epoch": 4.602128787730013,
      "grad_norm": 0.17515745759010315,
      "learning_rate": 8.009857475142163e-07,
      "loss": 0.0102,
      "step": 2812140
    },
    {
      "epoch": 4.602161518168667,
      "grad_norm": 0.3027280569076538,
      "learning_rate": 8.009198553006992e-07,
      "loss": 0.0073,
      "step": 2812160
    },
    {
      "epoch": 4.6021942486073195,
      "grad_norm": 0.09589473158121109,
      "learning_rate": 8.00853963087182e-07,
      "loss": 0.0108,
      "step": 2812180
    },
    {
      "epoch": 4.602226979045973,
      "grad_norm": 0.15003879368305206,
      "learning_rate": 8.00788070873665e-07,
      "loss": 0.006,
      "step": 2812200
    },
    {
      "epoch": 4.602259709484627,
      "grad_norm": 0.12165278196334839,
      "learning_rate": 8.007221786601478e-07,
      "loss": 0.0097,
      "step": 2812220
    },
    {
      "epoch": 4.60229243992328,
      "grad_norm": 0.11505909264087677,
      "learning_rate": 8.006562864466307e-07,
      "loss": 0.0065,
      "step": 2812240
    },
    {
      "epoch": 4.602325170361933,
      "grad_norm": 0.22452867031097412,
      "learning_rate": 8.005903942331135e-07,
      "loss": 0.0128,
      "step": 2812260
    },
    {
      "epoch": 4.602357900800587,
      "grad_norm": 0.43052175641059875,
      "learning_rate": 8.005245020195965e-07,
      "loss": 0.0097,
      "step": 2812280
    },
    {
      "epoch": 4.60239063123924,
      "grad_norm": 0.5313474535942078,
      "learning_rate": 8.004586098060793e-07,
      "loss": 0.0124,
      "step": 2812300
    },
    {
      "epoch": 4.602423361677893,
      "grad_norm": 0.05294255539774895,
      "learning_rate": 8.003927175925622e-07,
      "loss": 0.0101,
      "step": 2812320
    },
    {
      "epoch": 4.6024560921165465,
      "grad_norm": 0.1207168847322464,
      "learning_rate": 8.00326825379045e-07,
      "loss": 0.0098,
      "step": 2812340
    },
    {
      "epoch": 4.6024888225552,
      "grad_norm": 0.18797282874584198,
      "learning_rate": 8.002609331655279e-07,
      "loss": 0.0103,
      "step": 2812360
    },
    {
      "epoch": 4.602521552993853,
      "grad_norm": 0.44536709785461426,
      "learning_rate": 8.001950409520108e-07,
      "loss": 0.0136,
      "step": 2812380
    },
    {
      "epoch": 4.602554283432506,
      "grad_norm": 0.2033020257949829,
      "learning_rate": 8.001291487384937e-07,
      "loss": 0.011,
      "step": 2812400
    },
    {
      "epoch": 4.60258701387116,
      "grad_norm": 0.23496653139591217,
      "learning_rate": 8.000632565249765e-07,
      "loss": 0.0072,
      "step": 2812420
    },
    {
      "epoch": 4.602619744309814,
      "grad_norm": 0.2756226062774658,
      "learning_rate": 7.999973643114593e-07,
      "loss": 0.0121,
      "step": 2812440
    },
    {
      "epoch": 4.602652474748466,
      "grad_norm": 0.843993604183197,
      "learning_rate": 7.999314720979423e-07,
      "loss": 0.0126,
      "step": 2812460
    },
    {
      "epoch": 4.60268520518712,
      "grad_norm": 0.2402869462966919,
      "learning_rate": 7.998655798844251e-07,
      "loss": 0.0075,
      "step": 2812480
    },
    {
      "epoch": 4.6027179356257735,
      "grad_norm": 0.48290082812309265,
      "learning_rate": 7.99799687670908e-07,
      "loss": 0.009,
      "step": 2812500
    },
    {
      "epoch": 4.602750666064427,
      "grad_norm": 0.2463110089302063,
      "learning_rate": 7.997337954573908e-07,
      "loss": 0.0077,
      "step": 2812520
    },
    {
      "epoch": 4.60278339650308,
      "grad_norm": 0.08911997824907303,
      "learning_rate": 7.996679032438738e-07,
      "loss": 0.01,
      "step": 2812540
    },
    {
      "epoch": 4.602816126941733,
      "grad_norm": 0.14067837595939636,
      "learning_rate": 7.996020110303566e-07,
      "loss": 0.0081,
      "step": 2812560
    },
    {
      "epoch": 4.602848857380387,
      "grad_norm": 0.15554630756378174,
      "learning_rate": 7.995361188168395e-07,
      "loss": 0.0106,
      "step": 2812580
    },
    {
      "epoch": 4.60288158781904,
      "grad_norm": 0.12681519985198975,
      "learning_rate": 7.994702266033223e-07,
      "loss": 0.0068,
      "step": 2812600
    },
    {
      "epoch": 4.602914318257693,
      "grad_norm": 0.11666001379489899,
      "learning_rate": 7.994043343898052e-07,
      "loss": 0.0139,
      "step": 2812620
    },
    {
      "epoch": 4.602947048696347,
      "grad_norm": 0.3073326349258423,
      "learning_rate": 7.993384421762881e-07,
      "loss": 0.0105,
      "step": 2812640
    },
    {
      "epoch": 4.602979779135,
      "grad_norm": 0.5123400092124939,
      "learning_rate": 7.99272549962771e-07,
      "loss": 0.0091,
      "step": 2812660
    },
    {
      "epoch": 4.603012509573653,
      "grad_norm": 0.17834234237670898,
      "learning_rate": 7.992066577492538e-07,
      "loss": 0.0071,
      "step": 2812680
    },
    {
      "epoch": 4.603045240012307,
      "grad_norm": 0.28399771451950073,
      "learning_rate": 7.991407655357367e-07,
      "loss": 0.0105,
      "step": 2812700
    },
    {
      "epoch": 4.60307797045096,
      "grad_norm": 0.15482424199581146,
      "learning_rate": 7.990748733222196e-07,
      "loss": 0.0123,
      "step": 2812720
    },
    {
      "epoch": 4.603110700889613,
      "grad_norm": 0.11715758591890335,
      "learning_rate": 7.990089811087025e-07,
      "loss": 0.0086,
      "step": 2812740
    },
    {
      "epoch": 4.603143431328267,
      "grad_norm": 0.42739591002464294,
      "learning_rate": 7.989430888951853e-07,
      "loss": 0.008,
      "step": 2812760
    },
    {
      "epoch": 4.60317616176692,
      "grad_norm": 0.44570955634117126,
      "learning_rate": 7.988771966816681e-07,
      "loss": 0.0091,
      "step": 2812780
    },
    {
      "epoch": 4.603208892205574,
      "grad_norm": 0.06955770403146744,
      "learning_rate": 7.98811304468151e-07,
      "loss": 0.0091,
      "step": 2812800
    },
    {
      "epoch": 4.6032416226442265,
      "grad_norm": 0.1996307671070099,
      "learning_rate": 7.987454122546339e-07,
      "loss": 0.0102,
      "step": 2812820
    },
    {
      "epoch": 4.60327435308288,
      "grad_norm": 0.17093046009540558,
      "learning_rate": 7.986795200411168e-07,
      "loss": 0.0097,
      "step": 2812840
    },
    {
      "epoch": 4.603307083521534,
      "grad_norm": 0.39631953835487366,
      "learning_rate": 7.986136278275996e-07,
      "loss": 0.0105,
      "step": 2812860
    },
    {
      "epoch": 4.603339813960186,
      "grad_norm": 0.31198859214782715,
      "learning_rate": 7.985477356140825e-07,
      "loss": 0.0085,
      "step": 2812880
    },
    {
      "epoch": 4.60337254439884,
      "grad_norm": 0.11537162959575653,
      "learning_rate": 7.984818434005654e-07,
      "loss": 0.0085,
      "step": 2812900
    },
    {
      "epoch": 4.603405274837494,
      "grad_norm": 0.3252415657043457,
      "learning_rate": 7.984159511870483e-07,
      "loss": 0.0063,
      "step": 2812920
    },
    {
      "epoch": 4.603438005276146,
      "grad_norm": 0.43967771530151367,
      "learning_rate": 7.983500589735311e-07,
      "loss": 0.0117,
      "step": 2812940
    },
    {
      "epoch": 4.6034707357148,
      "grad_norm": 0.23564273118972778,
      "learning_rate": 7.98284166760014e-07,
      "loss": 0.0079,
      "step": 2812960
    },
    {
      "epoch": 4.6035034661534535,
      "grad_norm": 0.05289963632822037,
      "learning_rate": 7.98218274546497e-07,
      "loss": 0.0069,
      "step": 2812980
    },
    {
      "epoch": 4.603536196592107,
      "grad_norm": 0.5230827927589417,
      "learning_rate": 7.981523823329798e-07,
      "loss": 0.0112,
      "step": 2813000
    },
    {
      "epoch": 4.60356892703076,
      "grad_norm": 0.23166337609291077,
      "learning_rate": 7.980864901194626e-07,
      "loss": 0.007,
      "step": 2813020
    },
    {
      "epoch": 4.603601657469413,
      "grad_norm": 0.32943740487098694,
      "learning_rate": 7.980205979059455e-07,
      "loss": 0.0095,
      "step": 2813040
    },
    {
      "epoch": 4.603634387908067,
      "grad_norm": 0.16881129145622253,
      "learning_rate": 7.979547056924283e-07,
      "loss": 0.0102,
      "step": 2813060
    },
    {
      "epoch": 4.60366711834672,
      "grad_norm": 0.07164452970027924,
      "learning_rate": 7.978888134789114e-07,
      "loss": 0.0075,
      "step": 2813080
    },
    {
      "epoch": 4.603699848785373,
      "grad_norm": 0.21140442788600922,
      "learning_rate": 7.978229212653942e-07,
      "loss": 0.0064,
      "step": 2813100
    },
    {
      "epoch": 4.603732579224027,
      "grad_norm": 0.42290613055229187,
      "learning_rate": 7.977570290518769e-07,
      "loss": 0.0106,
      "step": 2813120
    },
    {
      "epoch": 4.6037653096626805,
      "grad_norm": 0.36871835589408875,
      "learning_rate": 7.976911368383598e-07,
      "loss": 0.0094,
      "step": 2813140
    },
    {
      "epoch": 4.603798040101333,
      "grad_norm": 0.2968079149723053,
      "learning_rate": 7.976252446248428e-07,
      "loss": 0.0076,
      "step": 2813160
    },
    {
      "epoch": 4.603830770539987,
      "grad_norm": 0.24530518054962158,
      "learning_rate": 7.975593524113257e-07,
      "loss": 0.0083,
      "step": 2813180
    },
    {
      "epoch": 4.60386350097864,
      "grad_norm": 0.19270682334899902,
      "learning_rate": 7.974934601978085e-07,
      "loss": 0.0145,
      "step": 2813200
    },
    {
      "epoch": 4.603896231417293,
      "grad_norm": 0.21654458343982697,
      "learning_rate": 7.974275679842913e-07,
      "loss": 0.0072,
      "step": 2813220
    },
    {
      "epoch": 4.603928961855947,
      "grad_norm": 0.07479185611009598,
      "learning_rate": 7.973616757707741e-07,
      "loss": 0.0091,
      "step": 2813240
    },
    {
      "epoch": 4.6039616922946,
      "grad_norm": 0.2013458013534546,
      "learning_rate": 7.972957835572572e-07,
      "loss": 0.0049,
      "step": 2813260
    },
    {
      "epoch": 4.603994422733253,
      "grad_norm": 0.030772419646382332,
      "learning_rate": 7.9722989134374e-07,
      "loss": 0.0117,
      "step": 2813280
    },
    {
      "epoch": 4.604027153171907,
      "grad_norm": 0.5883369445800781,
      "learning_rate": 7.971639991302229e-07,
      "loss": 0.0104,
      "step": 2813300
    },
    {
      "epoch": 4.60405988361056,
      "grad_norm": 0.25063204765319824,
      "learning_rate": 7.970981069167057e-07,
      "loss": 0.0067,
      "step": 2813320
    },
    {
      "epoch": 4.604092614049214,
      "grad_norm": 0.09003731608390808,
      "learning_rate": 7.970322147031887e-07,
      "loss": 0.0114,
      "step": 2813340
    },
    {
      "epoch": 4.6041253444878665,
      "grad_norm": 0.22972650825977325,
      "learning_rate": 7.969663224896715e-07,
      "loss": 0.0094,
      "step": 2813360
    },
    {
      "epoch": 4.60415807492652,
      "grad_norm": 0.39568719267845154,
      "learning_rate": 7.969004302761544e-07,
      "loss": 0.0087,
      "step": 2813380
    },
    {
      "epoch": 4.604190805365174,
      "grad_norm": 0.2838527262210846,
      "learning_rate": 7.968345380626372e-07,
      "loss": 0.0114,
      "step": 2813400
    },
    {
      "epoch": 4.604223535803827,
      "grad_norm": 0.17585206031799316,
      "learning_rate": 7.967686458491202e-07,
      "loss": 0.0079,
      "step": 2813420
    },
    {
      "epoch": 4.60425626624248,
      "grad_norm": 0.043372247368097305,
      "learning_rate": 7.96702753635603e-07,
      "loss": 0.0059,
      "step": 2813440
    },
    {
      "epoch": 4.604288996681134,
      "grad_norm": 0.1806822121143341,
      "learning_rate": 7.966368614220858e-07,
      "loss": 0.0063,
      "step": 2813460
    },
    {
      "epoch": 4.604321727119787,
      "grad_norm": 0.1169009655714035,
      "learning_rate": 7.965709692085687e-07,
      "loss": 0.0102,
      "step": 2813480
    },
    {
      "epoch": 4.60435445755844,
      "grad_norm": 0.15553681552410126,
      "learning_rate": 7.965050769950515e-07,
      "loss": 0.0094,
      "step": 2813500
    },
    {
      "epoch": 4.6043871879970935,
      "grad_norm": 0.20267489552497864,
      "learning_rate": 7.964391847815345e-07,
      "loss": 0.0101,
      "step": 2813520
    },
    {
      "epoch": 4.604419918435747,
      "grad_norm": 0.47581014037132263,
      "learning_rate": 7.963732925680173e-07,
      "loss": 0.0124,
      "step": 2813540
    },
    {
      "epoch": 4.6044526488744,
      "grad_norm": 0.13107085227966309,
      "learning_rate": 7.963074003545002e-07,
      "loss": 0.0129,
      "step": 2813560
    },
    {
      "epoch": 4.604485379313053,
      "grad_norm": 0.5085695385932922,
      "learning_rate": 7.96241508140983e-07,
      "loss": 0.0128,
      "step": 2813580
    },
    {
      "epoch": 4.604518109751707,
      "grad_norm": 0.145112544298172,
      "learning_rate": 7.96175615927466e-07,
      "loss": 0.0091,
      "step": 2813600
    },
    {
      "epoch": 4.6045508401903605,
      "grad_norm": 0.18820562958717346,
      "learning_rate": 7.961097237139488e-07,
      "loss": 0.0124,
      "step": 2813620
    },
    {
      "epoch": 4.604583570629013,
      "grad_norm": 0.25387826561927795,
      "learning_rate": 7.960438315004317e-07,
      "loss": 0.0095,
      "step": 2813640
    },
    {
      "epoch": 4.604616301067667,
      "grad_norm": 0.1370968073606491,
      "learning_rate": 7.959779392869145e-07,
      "loss": 0.0081,
      "step": 2813660
    },
    {
      "epoch": 4.60464903150632,
      "grad_norm": 0.28574827313423157,
      "learning_rate": 7.959120470733974e-07,
      "loss": 0.0083,
      "step": 2813680
    },
    {
      "epoch": 4.604681761944974,
      "grad_norm": 0.1293254941701889,
      "learning_rate": 7.958461548598803e-07,
      "loss": 0.0112,
      "step": 2813700
    },
    {
      "epoch": 4.604714492383627,
      "grad_norm": 0.11633031815290451,
      "learning_rate": 7.957802626463632e-07,
      "loss": 0.0103,
      "step": 2813720
    },
    {
      "epoch": 4.60474722282228,
      "grad_norm": 0.11095298826694489,
      "learning_rate": 7.95714370432846e-07,
      "loss": 0.0091,
      "step": 2813740
    },
    {
      "epoch": 4.604779953260934,
      "grad_norm": 0.07461947202682495,
      "learning_rate": 7.956484782193289e-07,
      "loss": 0.0105,
      "step": 2813760
    },
    {
      "epoch": 4.604812683699587,
      "grad_norm": 0.15462516248226166,
      "learning_rate": 7.955825860058118e-07,
      "loss": 0.007,
      "step": 2813780
    },
    {
      "epoch": 4.60484541413824,
      "grad_norm": 0.1725146770477295,
      "learning_rate": 7.955166937922946e-07,
      "loss": 0.0058,
      "step": 2813800
    },
    {
      "epoch": 4.604878144576894,
      "grad_norm": 0.2796455919742584,
      "learning_rate": 7.954508015787775e-07,
      "loss": 0.0121,
      "step": 2813820
    },
    {
      "epoch": 4.6049108750155465,
      "grad_norm": 0.15417563915252686,
      "learning_rate": 7.953849093652603e-07,
      "loss": 0.0093,
      "step": 2813840
    },
    {
      "epoch": 4.6049436054542,
      "grad_norm": 0.20143482089042664,
      "learning_rate": 7.953190171517433e-07,
      "loss": 0.009,
      "step": 2813860
    },
    {
      "epoch": 4.604976335892854,
      "grad_norm": 0.5872358679771423,
      "learning_rate": 7.952531249382261e-07,
      "loss": 0.0085,
      "step": 2813880
    },
    {
      "epoch": 4.605009066331507,
      "grad_norm": 0.1596977263689041,
      "learning_rate": 7.95187232724709e-07,
      "loss": 0.0086,
      "step": 2813900
    },
    {
      "epoch": 4.60504179677016,
      "grad_norm": 0.2786332666873932,
      "learning_rate": 7.951213405111918e-07,
      "loss": 0.0123,
      "step": 2813920
    },
    {
      "epoch": 4.605074527208814,
      "grad_norm": 0.29857921600341797,
      "learning_rate": 7.950554482976747e-07,
      "loss": 0.0112,
      "step": 2813940
    },
    {
      "epoch": 4.605107257647467,
      "grad_norm": 0.1307927370071411,
      "learning_rate": 7.949895560841576e-07,
      "loss": 0.01,
      "step": 2813960
    },
    {
      "epoch": 4.605139988086121,
      "grad_norm": 0.34818485379219055,
      "learning_rate": 7.949236638706405e-07,
      "loss": 0.0098,
      "step": 2813980
    },
    {
      "epoch": 4.6051727185247735,
      "grad_norm": 0.36560338735580444,
      "learning_rate": 7.948577716571233e-07,
      "loss": 0.0078,
      "step": 2814000
    },
    {
      "epoch": 4.605205448963427,
      "grad_norm": 0.17821526527404785,
      "learning_rate": 7.947918794436062e-07,
      "loss": 0.0088,
      "step": 2814020
    },
    {
      "epoch": 4.605238179402081,
      "grad_norm": 0.12855857610702515,
      "learning_rate": 7.947259872300891e-07,
      "loss": 0.0135,
      "step": 2814040
    },
    {
      "epoch": 4.605270909840733,
      "grad_norm": 0.17980603873729706,
      "learning_rate": 7.94660095016572e-07,
      "loss": 0.0053,
      "step": 2814060
    },
    {
      "epoch": 4.605303640279387,
      "grad_norm": 0.13248923420906067,
      "learning_rate": 7.945942028030548e-07,
      "loss": 0.005,
      "step": 2814080
    },
    {
      "epoch": 4.605336370718041,
      "grad_norm": 0.11521759629249573,
      "learning_rate": 7.945283105895376e-07,
      "loss": 0.013,
      "step": 2814100
    },
    {
      "epoch": 4.605369101156693,
      "grad_norm": 0.3386673033237457,
      "learning_rate": 7.944624183760205e-07,
      "loss": 0.0079,
      "step": 2814120
    },
    {
      "epoch": 4.605401831595347,
      "grad_norm": 0.2124234288930893,
      "learning_rate": 7.943965261625034e-07,
      "loss": 0.006,
      "step": 2814140
    },
    {
      "epoch": 4.6054345620340005,
      "grad_norm": 0.08759051561355591,
      "learning_rate": 7.943306339489863e-07,
      "loss": 0.0102,
      "step": 2814160
    },
    {
      "epoch": 4.605467292472654,
      "grad_norm": 0.23064221441745758,
      "learning_rate": 7.942647417354691e-07,
      "loss": 0.0062,
      "step": 2814180
    },
    {
      "epoch": 4.605500022911307,
      "grad_norm": 0.3023616373538971,
      "learning_rate": 7.94198849521952e-07,
      "loss": 0.0092,
      "step": 2814200
    },
    {
      "epoch": 4.60553275334996,
      "grad_norm": 0.1744392365217209,
      "learning_rate": 7.941329573084349e-07,
      "loss": 0.0112,
      "step": 2814220
    },
    {
      "epoch": 4.605565483788614,
      "grad_norm": 0.1345314085483551,
      "learning_rate": 7.940670650949178e-07,
      "loss": 0.007,
      "step": 2814240
    },
    {
      "epoch": 4.605598214227268,
      "grad_norm": 0.39626380801200867,
      "learning_rate": 7.940011728814006e-07,
      "loss": 0.0076,
      "step": 2814260
    },
    {
      "epoch": 4.60563094466592,
      "grad_norm": 0.11735810339450836,
      "learning_rate": 7.939352806678835e-07,
      "loss": 0.007,
      "step": 2814280
    },
    {
      "epoch": 4.605663675104574,
      "grad_norm": 0.36887454986572266,
      "learning_rate": 7.938693884543665e-07,
      "loss": 0.0135,
      "step": 2814300
    },
    {
      "epoch": 4.6056964055432275,
      "grad_norm": 0.24510258436203003,
      "learning_rate": 7.938034962408493e-07,
      "loss": 0.0065,
      "step": 2814320
    },
    {
      "epoch": 4.60572913598188,
      "grad_norm": 0.5043384432792664,
      "learning_rate": 7.937376040273321e-07,
      "loss": 0.0188,
      "step": 2814340
    },
    {
      "epoch": 4.605761866420534,
      "grad_norm": 0.5164551138877869,
      "learning_rate": 7.93671711813815e-07,
      "loss": 0.0076,
      "step": 2814360
    },
    {
      "epoch": 4.605794596859187,
      "grad_norm": 0.24343860149383545,
      "learning_rate": 7.936058196002978e-07,
      "loss": 0.008,
      "step": 2814380
    },
    {
      "epoch": 4.60582732729784,
      "grad_norm": 0.24961961805820465,
      "learning_rate": 7.935399273867809e-07,
      "loss": 0.0094,
      "step": 2814400
    },
    {
      "epoch": 4.605860057736494,
      "grad_norm": 0.25402337312698364,
      "learning_rate": 7.934740351732637e-07,
      "loss": 0.0072,
      "step": 2814420
    },
    {
      "epoch": 4.605892788175147,
      "grad_norm": 0.2837139368057251,
      "learning_rate": 7.934081429597464e-07,
      "loss": 0.0078,
      "step": 2814440
    },
    {
      "epoch": 4.605925518613801,
      "grad_norm": 0.18219530582427979,
      "learning_rate": 7.933422507462293e-07,
      "loss": 0.0086,
      "step": 2814460
    },
    {
      "epoch": 4.6059582490524535,
      "grad_norm": 0.1681194007396698,
      "learning_rate": 7.932763585327124e-07,
      "loss": 0.0049,
      "step": 2814480
    },
    {
      "epoch": 4.605990979491107,
      "grad_norm": 0.07347887009382248,
      "learning_rate": 7.932104663191952e-07,
      "loss": 0.0069,
      "step": 2814500
    },
    {
      "epoch": 4.606023709929761,
      "grad_norm": 0.2056543231010437,
      "learning_rate": 7.93144574105678e-07,
      "loss": 0.0076,
      "step": 2814520
    },
    {
      "epoch": 4.606056440368413,
      "grad_norm": 0.16599895060062408,
      "learning_rate": 7.930786818921608e-07,
      "loss": 0.0121,
      "step": 2814540
    },
    {
      "epoch": 4.606089170807067,
      "grad_norm": 0.1348867118358612,
      "learning_rate": 7.930127896786436e-07,
      "loss": 0.0115,
      "step": 2814560
    },
    {
      "epoch": 4.606121901245721,
      "grad_norm": 0.36306285858154297,
      "learning_rate": 7.929468974651267e-07,
      "loss": 0.0129,
      "step": 2814580
    },
    {
      "epoch": 4.606154631684374,
      "grad_norm": 0.1994277536869049,
      "learning_rate": 7.928810052516095e-07,
      "loss": 0.0108,
      "step": 2814600
    },
    {
      "epoch": 4.606187362123027,
      "grad_norm": 0.499531090259552,
      "learning_rate": 7.928151130380924e-07,
      "loss": 0.0073,
      "step": 2814620
    },
    {
      "epoch": 4.6062200925616805,
      "grad_norm": 0.31112509965896606,
      "learning_rate": 7.927492208245752e-07,
      "loss": 0.0076,
      "step": 2814640
    },
    {
      "epoch": 4.606252823000334,
      "grad_norm": 0.05597236379981041,
      "learning_rate": 7.926833286110582e-07,
      "loss": 0.0077,
      "step": 2814660
    },
    {
      "epoch": 4.606285553438987,
      "grad_norm": 0.11229410022497177,
      "learning_rate": 7.92617436397541e-07,
      "loss": 0.0066,
      "step": 2814680
    },
    {
      "epoch": 4.60631828387764,
      "grad_norm": 0.2894169092178345,
      "learning_rate": 7.925515441840239e-07,
      "loss": 0.0089,
      "step": 2814700
    },
    {
      "epoch": 4.606351014316294,
      "grad_norm": 0.3059711754322052,
      "learning_rate": 7.924856519705067e-07,
      "loss": 0.0071,
      "step": 2814720
    },
    {
      "epoch": 4.606383744754947,
      "grad_norm": 0.19318725168704987,
      "learning_rate": 7.924197597569897e-07,
      "loss": 0.0079,
      "step": 2814740
    },
    {
      "epoch": 4.6064164751936,
      "grad_norm": 0.5901792645454407,
      "learning_rate": 7.923538675434725e-07,
      "loss": 0.0076,
      "step": 2814760
    },
    {
      "epoch": 4.606449205632254,
      "grad_norm": 0.2807154953479767,
      "learning_rate": 7.922879753299554e-07,
      "loss": 0.0066,
      "step": 2814780
    },
    {
      "epoch": 4.6064819360709075,
      "grad_norm": 0.229817733168602,
      "learning_rate": 7.922220831164382e-07,
      "loss": 0.0112,
      "step": 2814800
    },
    {
      "epoch": 4.60651466650956,
      "grad_norm": 0.33320820331573486,
      "learning_rate": 7.92156190902921e-07,
      "loss": 0.0115,
      "step": 2814820
    },
    {
      "epoch": 4.606547396948214,
      "grad_norm": 0.3157689571380615,
      "learning_rate": 7.92090298689404e-07,
      "loss": 0.0109,
      "step": 2814840
    },
    {
      "epoch": 4.606580127386867,
      "grad_norm": 0.1478433758020401,
      "learning_rate": 7.920244064758868e-07,
      "loss": 0.0068,
      "step": 2814860
    },
    {
      "epoch": 4.606612857825521,
      "grad_norm": 0.1615726202726364,
      "learning_rate": 7.919585142623697e-07,
      "loss": 0.0129,
      "step": 2814880
    },
    {
      "epoch": 4.606645588264174,
      "grad_norm": 0.10202181339263916,
      "learning_rate": 7.918926220488525e-07,
      "loss": 0.0095,
      "step": 2814900
    },
    {
      "epoch": 4.606678318702827,
      "grad_norm": 0.441513329744339,
      "learning_rate": 7.918267298353355e-07,
      "loss": 0.011,
      "step": 2814920
    },
    {
      "epoch": 4.606711049141481,
      "grad_norm": 0.7694718837738037,
      "learning_rate": 7.917608376218183e-07,
      "loss": 0.0137,
      "step": 2814940
    },
    {
      "epoch": 4.606743779580134,
      "grad_norm": 0.3228704035282135,
      "learning_rate": 7.916949454083012e-07,
      "loss": 0.0101,
      "step": 2814960
    },
    {
      "epoch": 4.606776510018787,
      "grad_norm": 0.16128914058208466,
      "learning_rate": 7.91629053194784e-07,
      "loss": 0.006,
      "step": 2814980
    },
    {
      "epoch": 4.606809240457441,
      "grad_norm": 0.2858455181121826,
      "learning_rate": 7.915631609812669e-07,
      "loss": 0.0106,
      "step": 2815000
    },
    {
      "epoch": 4.6068419708960935,
      "grad_norm": 0.40639352798461914,
      "learning_rate": 7.914972687677498e-07,
      "loss": 0.0127,
      "step": 2815020
    },
    {
      "epoch": 4.606874701334747,
      "grad_norm": 0.10348756611347198,
      "learning_rate": 7.914313765542327e-07,
      "loss": 0.01,
      "step": 2815040
    },
    {
      "epoch": 4.606907431773401,
      "grad_norm": 0.12264682352542877,
      "learning_rate": 7.913654843407155e-07,
      "loss": 0.0086,
      "step": 2815060
    },
    {
      "epoch": 4.606940162212054,
      "grad_norm": 0.1228218525648117,
      "learning_rate": 7.912995921271984e-07,
      "loss": 0.0071,
      "step": 2815080
    },
    {
      "epoch": 4.606972892650707,
      "grad_norm": 0.2599031627178192,
      "learning_rate": 7.912336999136813e-07,
      "loss": 0.0096,
      "step": 2815100
    },
    {
      "epoch": 4.607005623089361,
      "grad_norm": 0.10649794340133667,
      "learning_rate": 7.911678077001642e-07,
      "loss": 0.0094,
      "step": 2815120
    },
    {
      "epoch": 4.607038353528014,
      "grad_norm": 0.16531261801719666,
      "learning_rate": 7.91101915486647e-07,
      "loss": 0.0067,
      "step": 2815140
    },
    {
      "epoch": 4.607071083966668,
      "grad_norm": 0.12003995478153229,
      "learning_rate": 7.910360232731298e-07,
      "loss": 0.0104,
      "step": 2815160
    },
    {
      "epoch": 4.6071038144053205,
      "grad_norm": 0.10484261065721512,
      "learning_rate": 7.909701310596128e-07,
      "loss": 0.0093,
      "step": 2815180
    },
    {
      "epoch": 4.607136544843974,
      "grad_norm": 0.14608511328697205,
      "learning_rate": 7.909042388460956e-07,
      "loss": 0.0126,
      "step": 2815200
    },
    {
      "epoch": 4.607169275282628,
      "grad_norm": 0.3877408504486084,
      "learning_rate": 7.908383466325785e-07,
      "loss": 0.0076,
      "step": 2815220
    },
    {
      "epoch": 4.60720200572128,
      "grad_norm": 0.11673003435134888,
      "learning_rate": 7.907724544190613e-07,
      "loss": 0.0049,
      "step": 2815240
    },
    {
      "epoch": 4.607234736159934,
      "grad_norm": 0.3696441948413849,
      "learning_rate": 7.907065622055442e-07,
      "loss": 0.0089,
      "step": 2815260
    },
    {
      "epoch": 4.6072674665985875,
      "grad_norm": 0.13137371838092804,
      "learning_rate": 7.906406699920271e-07,
      "loss": 0.0115,
      "step": 2815280
    },
    {
      "epoch": 4.60730019703724,
      "grad_norm": 0.13347379863262177,
      "learning_rate": 7.9057477777851e-07,
      "loss": 0.0092,
      "step": 2815300
    },
    {
      "epoch": 4.607332927475894,
      "grad_norm": 0.13799713551998138,
      "learning_rate": 7.905088855649928e-07,
      "loss": 0.0161,
      "step": 2815320
    },
    {
      "epoch": 4.607365657914547,
      "grad_norm": 0.09605546295642853,
      "learning_rate": 7.904429933514757e-07,
      "loss": 0.0076,
      "step": 2815340
    },
    {
      "epoch": 4.607398388353201,
      "grad_norm": 0.3113187253475189,
      "learning_rate": 7.903771011379586e-07,
      "loss": 0.0086,
      "step": 2815360
    },
    {
      "epoch": 4.607431118791854,
      "grad_norm": 0.45772087574005127,
      "learning_rate": 7.903112089244415e-07,
      "loss": 0.0155,
      "step": 2815380
    },
    {
      "epoch": 4.607463849230507,
      "grad_norm": 0.09403490275144577,
      "learning_rate": 7.902453167109243e-07,
      "loss": 0.0063,
      "step": 2815400
    },
    {
      "epoch": 4.607496579669161,
      "grad_norm": 0.2018536478281021,
      "learning_rate": 7.901794244974072e-07,
      "loss": 0.0088,
      "step": 2815420
    },
    {
      "epoch": 4.6075293101078145,
      "grad_norm": 0.247991681098938,
      "learning_rate": 7.9011353228389e-07,
      "loss": 0.0092,
      "step": 2815440
    },
    {
      "epoch": 4.607562040546467,
      "grad_norm": 0.14230859279632568,
      "learning_rate": 7.90047640070373e-07,
      "loss": 0.0074,
      "step": 2815460
    },
    {
      "epoch": 4.607594770985121,
      "grad_norm": 0.18731030821800232,
      "learning_rate": 7.899817478568558e-07,
      "loss": 0.0164,
      "step": 2815480
    },
    {
      "epoch": 4.607627501423774,
      "grad_norm": 0.3469918370246887,
      "learning_rate": 7.899158556433386e-07,
      "loss": 0.0101,
      "step": 2815500
    },
    {
      "epoch": 4.607660231862427,
      "grad_norm": 0.13709178566932678,
      "learning_rate": 7.898499634298215e-07,
      "loss": 0.009,
      "step": 2815520
    },
    {
      "epoch": 4.607692962301081,
      "grad_norm": 0.24744638800621033,
      "learning_rate": 7.897840712163044e-07,
      "loss": 0.009,
      "step": 2815540
    },
    {
      "epoch": 4.607725692739734,
      "grad_norm": 0.11777477711439133,
      "learning_rate": 7.897181790027873e-07,
      "loss": 0.0124,
      "step": 2815560
    },
    {
      "epoch": 4.607758423178387,
      "grad_norm": 0.053791340440511703,
      "learning_rate": 7.896522867892701e-07,
      "loss": 0.0072,
      "step": 2815580
    },
    {
      "epoch": 4.607791153617041,
      "grad_norm": 0.13024082779884338,
      "learning_rate": 7.89586394575753e-07,
      "loss": 0.0074,
      "step": 2815600
    },
    {
      "epoch": 4.607823884055694,
      "grad_norm": 0.25564509630203247,
      "learning_rate": 7.89520502362236e-07,
      "loss": 0.008,
      "step": 2815620
    },
    {
      "epoch": 4.607856614494348,
      "grad_norm": 0.21483835577964783,
      "learning_rate": 7.894546101487188e-07,
      "loss": 0.0069,
      "step": 2815640
    },
    {
      "epoch": 4.6078893449330005,
      "grad_norm": 0.17288488149642944,
      "learning_rate": 7.893887179352016e-07,
      "loss": 0.0134,
      "step": 2815660
    },
    {
      "epoch": 4.607922075371654,
      "grad_norm": 0.3102428913116455,
      "learning_rate": 7.893228257216845e-07,
      "loss": 0.0074,
      "step": 2815680
    },
    {
      "epoch": 4.607954805810308,
      "grad_norm": 0.28958725929260254,
      "learning_rate": 7.892569335081673e-07,
      "loss": 0.0109,
      "step": 2815700
    },
    {
      "epoch": 4.607987536248961,
      "grad_norm": 0.2539598345756531,
      "learning_rate": 7.891910412946504e-07,
      "loss": 0.0101,
      "step": 2815720
    },
    {
      "epoch": 4.608020266687614,
      "grad_norm": 0.4434680938720703,
      "learning_rate": 7.891251490811332e-07,
      "loss": 0.0116,
      "step": 2815740
    },
    {
      "epoch": 4.608052997126268,
      "grad_norm": 0.2677520215511322,
      "learning_rate": 7.89059256867616e-07,
      "loss": 0.0083,
      "step": 2815760
    },
    {
      "epoch": 4.608085727564921,
      "grad_norm": 0.07956846803426743,
      "learning_rate": 7.889933646540988e-07,
      "loss": 0.0092,
      "step": 2815780
    },
    {
      "epoch": 4.608118458003574,
      "grad_norm": 0.2674520015716553,
      "learning_rate": 7.889274724405819e-07,
      "loss": 0.0098,
      "step": 2815800
    },
    {
      "epoch": 4.6081511884422275,
      "grad_norm": 0.19667008519172668,
      "learning_rate": 7.888615802270647e-07,
      "loss": 0.0082,
      "step": 2815820
    },
    {
      "epoch": 4.608183918880881,
      "grad_norm": 0.11144516617059708,
      "learning_rate": 7.887956880135475e-07,
      "loss": 0.0096,
      "step": 2815840
    },
    {
      "epoch": 4.608216649319534,
      "grad_norm": 0.19262655079364777,
      "learning_rate": 7.887297958000303e-07,
      "loss": 0.0094,
      "step": 2815860
    },
    {
      "epoch": 4.608249379758187,
      "grad_norm": 0.09474383294582367,
      "learning_rate": 7.886639035865131e-07,
      "loss": 0.0108,
      "step": 2815880
    },
    {
      "epoch": 4.608282110196841,
      "grad_norm": 0.2955544590950012,
      "learning_rate": 7.885980113729962e-07,
      "loss": 0.0075,
      "step": 2815900
    },
    {
      "epoch": 4.608314840635495,
      "grad_norm": 0.10697610676288605,
      "learning_rate": 7.88532119159479e-07,
      "loss": 0.0091,
      "step": 2815920
    },
    {
      "epoch": 4.608347571074147,
      "grad_norm": 0.15602901577949524,
      "learning_rate": 7.884662269459619e-07,
      "loss": 0.0091,
      "step": 2815940
    },
    {
      "epoch": 4.608380301512801,
      "grad_norm": 0.2941121459007263,
      "learning_rate": 7.884003347324447e-07,
      "loss": 0.0083,
      "step": 2815960
    },
    {
      "epoch": 4.6084130319514545,
      "grad_norm": 0.362496554851532,
      "learning_rate": 7.883344425189277e-07,
      "loss": 0.01,
      "step": 2815980
    },
    {
      "epoch": 4.608445762390107,
      "grad_norm": 0.1543329805135727,
      "learning_rate": 7.882685503054105e-07,
      "loss": 0.0079,
      "step": 2816000
    },
    {
      "epoch": 4.608478492828761,
      "grad_norm": 0.1300354301929474,
      "learning_rate": 7.882026580918934e-07,
      "loss": 0.0055,
      "step": 2816020
    },
    {
      "epoch": 4.608511223267414,
      "grad_norm": 0.20773635804653168,
      "learning_rate": 7.881367658783762e-07,
      "loss": 0.0059,
      "step": 2816040
    },
    {
      "epoch": 4.608543953706068,
      "grad_norm": 0.20385751128196716,
      "learning_rate": 7.880708736648592e-07,
      "loss": 0.0078,
      "step": 2816060
    },
    {
      "epoch": 4.608576684144721,
      "grad_norm": 0.120792455971241,
      "learning_rate": 7.88004981451342e-07,
      "loss": 0.0073,
      "step": 2816080
    },
    {
      "epoch": 4.608609414583374,
      "grad_norm": 0.0960957482457161,
      "learning_rate": 7.879390892378249e-07,
      "loss": 0.0068,
      "step": 2816100
    },
    {
      "epoch": 4.608642145022028,
      "grad_norm": 0.41312602162361145,
      "learning_rate": 7.878731970243077e-07,
      "loss": 0.0112,
      "step": 2816120
    },
    {
      "epoch": 4.6086748754606806,
      "grad_norm": 0.1480792611837387,
      "learning_rate": 7.878073048107906e-07,
      "loss": 0.0098,
      "step": 2816140
    },
    {
      "epoch": 4.608707605899334,
      "grad_norm": 0.5434986352920532,
      "learning_rate": 7.877414125972735e-07,
      "loss": 0.0119,
      "step": 2816160
    },
    {
      "epoch": 4.608740336337988,
      "grad_norm": 0.10765203833580017,
      "learning_rate": 7.876755203837563e-07,
      "loss": 0.0084,
      "step": 2816180
    },
    {
      "epoch": 4.6087730667766404,
      "grad_norm": 0.228554368019104,
      "learning_rate": 7.876096281702392e-07,
      "loss": 0.0095,
      "step": 2816200
    },
    {
      "epoch": 4.608805797215294,
      "grad_norm": 0.47490110993385315,
      "learning_rate": 7.87543735956722e-07,
      "loss": 0.0057,
      "step": 2816220
    },
    {
      "epoch": 4.608838527653948,
      "grad_norm": 0.12451425939798355,
      "learning_rate": 7.87477843743205e-07,
      "loss": 0.0084,
      "step": 2816240
    },
    {
      "epoch": 4.608871258092601,
      "grad_norm": 0.09830360114574432,
      "learning_rate": 7.874119515296878e-07,
      "loss": 0.0081,
      "step": 2816260
    },
    {
      "epoch": 4.608903988531254,
      "grad_norm": 0.10569895803928375,
      "learning_rate": 7.873460593161707e-07,
      "loss": 0.0101,
      "step": 2816280
    },
    {
      "epoch": 4.6089367189699075,
      "grad_norm": 0.37638330459594727,
      "learning_rate": 7.872801671026535e-07,
      "loss": 0.0112,
      "step": 2816300
    },
    {
      "epoch": 4.608969449408561,
      "grad_norm": 0.09578198194503784,
      "learning_rate": 7.872142748891364e-07,
      "loss": 0.0154,
      "step": 2816320
    },
    {
      "epoch": 4.609002179847215,
      "grad_norm": 0.31006520986557007,
      "learning_rate": 7.871483826756193e-07,
      "loss": 0.005,
      "step": 2816340
    },
    {
      "epoch": 4.609034910285867,
      "grad_norm": 0.506203293800354,
      "learning_rate": 7.870824904621022e-07,
      "loss": 0.0104,
      "step": 2816360
    },
    {
      "epoch": 4.609067640724521,
      "grad_norm": 0.7664933800697327,
      "learning_rate": 7.87016598248585e-07,
      "loss": 0.009,
      "step": 2816380
    },
    {
      "epoch": 4.609100371163175,
      "grad_norm": 0.28233957290649414,
      "learning_rate": 7.869507060350679e-07,
      "loss": 0.0093,
      "step": 2816400
    },
    {
      "epoch": 4.609133101601827,
      "grad_norm": 0.273018479347229,
      "learning_rate": 7.868848138215508e-07,
      "loss": 0.0084,
      "step": 2816420
    },
    {
      "epoch": 4.609165832040481,
      "grad_norm": 0.6337496638298035,
      "learning_rate": 7.868189216080337e-07,
      "loss": 0.0125,
      "step": 2816440
    },
    {
      "epoch": 4.6091985624791345,
      "grad_norm": 0.41618970036506653,
      "learning_rate": 7.867530293945165e-07,
      "loss": 0.009,
      "step": 2816460
    },
    {
      "epoch": 4.609231292917787,
      "grad_norm": 0.7530671954154968,
      "learning_rate": 7.866871371809993e-07,
      "loss": 0.0153,
      "step": 2816480
    },
    {
      "epoch": 4.609264023356441,
      "grad_norm": 0.1916070133447647,
      "learning_rate": 7.866212449674823e-07,
      "loss": 0.0055,
      "step": 2816500
    },
    {
      "epoch": 4.609296753795094,
      "grad_norm": 0.4547617733478546,
      "learning_rate": 7.865553527539651e-07,
      "loss": 0.0112,
      "step": 2816520
    },
    {
      "epoch": 4.609329484233748,
      "grad_norm": 0.5740886926651001,
      "learning_rate": 7.86489460540448e-07,
      "loss": 0.0097,
      "step": 2816540
    },
    {
      "epoch": 4.609362214672401,
      "grad_norm": 0.3473072946071625,
      "learning_rate": 7.864235683269308e-07,
      "loss": 0.0086,
      "step": 2816560
    },
    {
      "epoch": 4.609394945111054,
      "grad_norm": 0.29983949661254883,
      "learning_rate": 7.863576761134137e-07,
      "loss": 0.0099,
      "step": 2816580
    },
    {
      "epoch": 4.609427675549708,
      "grad_norm": 0.12179062515497208,
      "learning_rate": 7.862917838998966e-07,
      "loss": 0.009,
      "step": 2816600
    },
    {
      "epoch": 4.6094604059883615,
      "grad_norm": 0.21093730628490448,
      "learning_rate": 7.862258916863795e-07,
      "loss": 0.0068,
      "step": 2816620
    },
    {
      "epoch": 4.609493136427014,
      "grad_norm": 0.03967463970184326,
      "learning_rate": 7.861599994728623e-07,
      "loss": 0.0068,
      "step": 2816640
    },
    {
      "epoch": 4.609525866865668,
      "grad_norm": 0.6996563076972961,
      "learning_rate": 7.860941072593452e-07,
      "loss": 0.0129,
      "step": 2816660
    },
    {
      "epoch": 4.609558597304321,
      "grad_norm": 0.1503295600414276,
      "learning_rate": 7.860282150458281e-07,
      "loss": 0.0079,
      "step": 2816680
    },
    {
      "epoch": 4.609591327742974,
      "grad_norm": 0.09125575423240662,
      "learning_rate": 7.85962322832311e-07,
      "loss": 0.0051,
      "step": 2816700
    },
    {
      "epoch": 4.609624058181628,
      "grad_norm": 0.17043279111385345,
      "learning_rate": 7.858964306187938e-07,
      "loss": 0.008,
      "step": 2816720
    },
    {
      "epoch": 4.609656788620281,
      "grad_norm": 0.0709809809923172,
      "learning_rate": 7.858305384052767e-07,
      "loss": 0.0117,
      "step": 2816740
    },
    {
      "epoch": 4.609689519058934,
      "grad_norm": 0.19646969437599182,
      "learning_rate": 7.857646461917595e-07,
      "loss": 0.0067,
      "step": 2816760
    },
    {
      "epoch": 4.609722249497588,
      "grad_norm": 0.21047928929328918,
      "learning_rate": 7.856987539782425e-07,
      "loss": 0.0093,
      "step": 2816780
    },
    {
      "epoch": 4.609754979936241,
      "grad_norm": 0.12567180395126343,
      "learning_rate": 7.856328617647253e-07,
      "loss": 0.0106,
      "step": 2816800
    },
    {
      "epoch": 4.609787710374895,
      "grad_norm": 0.09059786796569824,
      "learning_rate": 7.855669695512081e-07,
      "loss": 0.0092,
      "step": 2816820
    },
    {
      "epoch": 4.6098204408135475,
      "grad_norm": 0.06349384784698486,
      "learning_rate": 7.85501077337691e-07,
      "loss": 0.0065,
      "step": 2816840
    },
    {
      "epoch": 4.609853171252201,
      "grad_norm": 0.12155213207006454,
      "learning_rate": 7.854351851241739e-07,
      "loss": 0.0075,
      "step": 2816860
    },
    {
      "epoch": 4.609885901690855,
      "grad_norm": 0.2848190367221832,
      "learning_rate": 7.853692929106568e-07,
      "loss": 0.0122,
      "step": 2816880
    },
    {
      "epoch": 4.609918632129508,
      "grad_norm": 0.11995857208967209,
      "learning_rate": 7.853034006971396e-07,
      "loss": 0.0084,
      "step": 2816900
    },
    {
      "epoch": 4.609951362568161,
      "grad_norm": 0.10488434135913849,
      "learning_rate": 7.852375084836225e-07,
      "loss": 0.0046,
      "step": 2816920
    },
    {
      "epoch": 4.6099840930068146,
      "grad_norm": 0.253221333026886,
      "learning_rate": 7.851716162701055e-07,
      "loss": 0.0093,
      "step": 2816940
    },
    {
      "epoch": 4.610016823445468,
      "grad_norm": 0.10799448937177658,
      "learning_rate": 7.851057240565883e-07,
      "loss": 0.0063,
      "step": 2816960
    },
    {
      "epoch": 4.610049553884121,
      "grad_norm": 0.11443625390529633,
      "learning_rate": 7.850398318430711e-07,
      "loss": 0.0054,
      "step": 2816980
    },
    {
      "epoch": 4.6100822843227744,
      "grad_norm": 0.3895919919013977,
      "learning_rate": 7.84973939629554e-07,
      "loss": 0.0102,
      "step": 2817000
    },
    {
      "epoch": 4.610115014761428,
      "grad_norm": 0.06510404497385025,
      "learning_rate": 7.849080474160368e-07,
      "loss": 0.0148,
      "step": 2817020
    },
    {
      "epoch": 4.610147745200081,
      "grad_norm": 0.330314964056015,
      "learning_rate": 7.848421552025199e-07,
      "loss": 0.0104,
      "step": 2817040
    },
    {
      "epoch": 4.610180475638734,
      "grad_norm": 0.18182885646820068,
      "learning_rate": 7.847762629890027e-07,
      "loss": 0.0127,
      "step": 2817060
    },
    {
      "epoch": 4.610213206077388,
      "grad_norm": 0.18386399745941162,
      "learning_rate": 7.847103707754855e-07,
      "loss": 0.0128,
      "step": 2817080
    },
    {
      "epoch": 4.6102459365160415,
      "grad_norm": 0.35615089535713196,
      "learning_rate": 7.846444785619683e-07,
      "loss": 0.0132,
      "step": 2817100
    },
    {
      "epoch": 4.610278666954694,
      "grad_norm": 0.17414997518062592,
      "learning_rate": 7.845785863484514e-07,
      "loss": 0.01,
      "step": 2817120
    },
    {
      "epoch": 4.610311397393348,
      "grad_norm": 0.22635434567928314,
      "learning_rate": 7.845126941349342e-07,
      "loss": 0.009,
      "step": 2817140
    },
    {
      "epoch": 4.610344127832001,
      "grad_norm": 0.16933239996433258,
      "learning_rate": 7.844468019214171e-07,
      "loss": 0.0057,
      "step": 2817160
    },
    {
      "epoch": 4.610376858270655,
      "grad_norm": 0.14072725176811218,
      "learning_rate": 7.843809097078998e-07,
      "loss": 0.0097,
      "step": 2817180
    },
    {
      "epoch": 4.610409588709308,
      "grad_norm": 0.3444693982601166,
      "learning_rate": 7.843150174943826e-07,
      "loss": 0.0077,
      "step": 2817200
    },
    {
      "epoch": 4.610442319147961,
      "grad_norm": 0.7146698236465454,
      "learning_rate": 7.842491252808657e-07,
      "loss": 0.0149,
      "step": 2817220
    },
    {
      "epoch": 4.610475049586615,
      "grad_norm": 0.4215339422225952,
      "learning_rate": 7.841832330673485e-07,
      "loss": 0.0089,
      "step": 2817240
    },
    {
      "epoch": 4.610507780025268,
      "grad_norm": 0.20169617235660553,
      "learning_rate": 7.841173408538314e-07,
      "loss": 0.0072,
      "step": 2817260
    },
    {
      "epoch": 4.610540510463921,
      "grad_norm": 0.08464650064706802,
      "learning_rate": 7.840514486403142e-07,
      "loss": 0.0084,
      "step": 2817280
    },
    {
      "epoch": 4.610573240902575,
      "grad_norm": 0.4652743339538574,
      "learning_rate": 7.839855564267972e-07,
      "loss": 0.0097,
      "step": 2817300
    },
    {
      "epoch": 4.6106059713412275,
      "grad_norm": 0.250151664018631,
      "learning_rate": 7.8391966421328e-07,
      "loss": 0.0075,
      "step": 2817320
    },
    {
      "epoch": 4.610638701779881,
      "grad_norm": 0.18090341985225677,
      "learning_rate": 7.838537719997629e-07,
      "loss": 0.0096,
      "step": 2817340
    },
    {
      "epoch": 4.610671432218535,
      "grad_norm": 0.07067423313856125,
      "learning_rate": 7.837878797862457e-07,
      "loss": 0.0068,
      "step": 2817360
    },
    {
      "epoch": 4.610704162657188,
      "grad_norm": 0.23231250047683716,
      "learning_rate": 7.837219875727287e-07,
      "loss": 0.0092,
      "step": 2817380
    },
    {
      "epoch": 4.610736893095841,
      "grad_norm": 0.24591019749641418,
      "learning_rate": 7.836560953592115e-07,
      "loss": 0.0083,
      "step": 2817400
    },
    {
      "epoch": 4.610769623534495,
      "grad_norm": 0.37262627482414246,
      "learning_rate": 7.835902031456944e-07,
      "loss": 0.0082,
      "step": 2817420
    },
    {
      "epoch": 4.610802353973148,
      "grad_norm": 0.17037123441696167,
      "learning_rate": 7.835243109321772e-07,
      "loss": 0.0091,
      "step": 2817440
    },
    {
      "epoch": 4.610835084411801,
      "grad_norm": 0.16615858674049377,
      "learning_rate": 7.834584187186601e-07,
      "loss": 0.0059,
      "step": 2817460
    },
    {
      "epoch": 4.6108678148504545,
      "grad_norm": 0.4458678662776947,
      "learning_rate": 7.83392526505143e-07,
      "loss": 0.0066,
      "step": 2817480
    },
    {
      "epoch": 4.610900545289108,
      "grad_norm": 0.5952693819999695,
      "learning_rate": 7.833266342916259e-07,
      "loss": 0.0084,
      "step": 2817500
    },
    {
      "epoch": 4.610933275727762,
      "grad_norm": 0.5143022537231445,
      "learning_rate": 7.832607420781087e-07,
      "loss": 0.0087,
      "step": 2817520
    },
    {
      "epoch": 4.610966006166414,
      "grad_norm": 0.08932102471590042,
      "learning_rate": 7.831948498645915e-07,
      "loss": 0.0065,
      "step": 2817540
    },
    {
      "epoch": 4.610998736605068,
      "grad_norm": 0.20149312913417816,
      "learning_rate": 7.831289576510745e-07,
      "loss": 0.0084,
      "step": 2817560
    },
    {
      "epoch": 4.611031467043722,
      "grad_norm": 0.09571503847837448,
      "learning_rate": 7.830630654375573e-07,
      "loss": 0.0108,
      "step": 2817580
    },
    {
      "epoch": 4.611064197482374,
      "grad_norm": 0.24122057855129242,
      "learning_rate": 7.829971732240402e-07,
      "loss": 0.0089,
      "step": 2817600
    },
    {
      "epoch": 4.611096927921028,
      "grad_norm": 0.528997540473938,
      "learning_rate": 7.82931281010523e-07,
      "loss": 0.0081,
      "step": 2817620
    },
    {
      "epoch": 4.6111296583596815,
      "grad_norm": 0.16881448030471802,
      "learning_rate": 7.828653887970059e-07,
      "loss": 0.008,
      "step": 2817640
    },
    {
      "epoch": 4.611162388798334,
      "grad_norm": 0.0914013534784317,
      "learning_rate": 7.827994965834888e-07,
      "loss": 0.008,
      "step": 2817660
    },
    {
      "epoch": 4.611195119236988,
      "grad_norm": 0.17153193056583405,
      "learning_rate": 7.827336043699717e-07,
      "loss": 0.0094,
      "step": 2817680
    },
    {
      "epoch": 4.611227849675641,
      "grad_norm": 0.3137640357017517,
      "learning_rate": 7.826677121564545e-07,
      "loss": 0.0068,
      "step": 2817700
    },
    {
      "epoch": 4.611260580114295,
      "grad_norm": 0.12113769352436066,
      "learning_rate": 7.826018199429374e-07,
      "loss": 0.0089,
      "step": 2817720
    },
    {
      "epoch": 4.611293310552948,
      "grad_norm": 0.20777297019958496,
      "learning_rate": 7.825359277294203e-07,
      "loss": 0.0106,
      "step": 2817740
    },
    {
      "epoch": 4.611326040991601,
      "grad_norm": 0.3701155483722687,
      "learning_rate": 7.824700355159032e-07,
      "loss": 0.0065,
      "step": 2817760
    },
    {
      "epoch": 4.611358771430255,
      "grad_norm": 0.21744310855865479,
      "learning_rate": 7.82404143302386e-07,
      "loss": 0.0072,
      "step": 2817780
    },
    {
      "epoch": 4.6113915018689084,
      "grad_norm": 0.15684787929058075,
      "learning_rate": 7.823382510888689e-07,
      "loss": 0.009,
      "step": 2817800
    },
    {
      "epoch": 4.611424232307561,
      "grad_norm": 0.042281825095415115,
      "learning_rate": 7.822723588753518e-07,
      "loss": 0.0083,
      "step": 2817820
    },
    {
      "epoch": 4.611456962746215,
      "grad_norm": 0.24828894436359406,
      "learning_rate": 7.822064666618347e-07,
      "loss": 0.0105,
      "step": 2817840
    },
    {
      "epoch": 4.611489693184868,
      "grad_norm": 0.15057584643363953,
      "learning_rate": 7.821405744483175e-07,
      "loss": 0.011,
      "step": 2817860
    },
    {
      "epoch": 4.611522423623521,
      "grad_norm": 0.35691389441490173,
      "learning_rate": 7.820746822348003e-07,
      "loss": 0.0076,
      "step": 2817880
    },
    {
      "epoch": 4.611555154062175,
      "grad_norm": 0.24359899759292603,
      "learning_rate": 7.820087900212832e-07,
      "loss": 0.0076,
      "step": 2817900
    },
    {
      "epoch": 4.611587884500828,
      "grad_norm": 0.16554652154445648,
      "learning_rate": 7.819428978077661e-07,
      "loss": 0.0049,
      "step": 2817920
    },
    {
      "epoch": 4.611620614939481,
      "grad_norm": 0.2022927701473236,
      "learning_rate": 7.81877005594249e-07,
      "loss": 0.0078,
      "step": 2817940
    },
    {
      "epoch": 4.6116533453781345,
      "grad_norm": 0.353460431098938,
      "learning_rate": 7.818111133807318e-07,
      "loss": 0.0081,
      "step": 2817960
    },
    {
      "epoch": 4.611686075816788,
      "grad_norm": 0.1957828551530838,
      "learning_rate": 7.817452211672147e-07,
      "loss": 0.0093,
      "step": 2817980
    },
    {
      "epoch": 4.611718806255442,
      "grad_norm": 0.2891453802585602,
      "learning_rate": 7.816793289536976e-07,
      "loss": 0.0086,
      "step": 2818000
    },
    {
      "epoch": 4.611751536694094,
      "grad_norm": 0.08189277350902557,
      "learning_rate": 7.816134367401805e-07,
      "loss": 0.0106,
      "step": 2818020
    },
    {
      "epoch": 4.611784267132748,
      "grad_norm": 0.2600265145301819,
      "learning_rate": 7.815475445266633e-07,
      "loss": 0.0117,
      "step": 2818040
    },
    {
      "epoch": 4.611816997571402,
      "grad_norm": 0.33379727602005005,
      "learning_rate": 7.814816523131462e-07,
      "loss": 0.0118,
      "step": 2818060
    },
    {
      "epoch": 4.611849728010055,
      "grad_norm": 0.039845313876867294,
      "learning_rate": 7.81415760099629e-07,
      "loss": 0.0073,
      "step": 2818080
    },
    {
      "epoch": 4.611882458448708,
      "grad_norm": 0.31563785672187805,
      "learning_rate": 7.81349867886112e-07,
      "loss": 0.0071,
      "step": 2818100
    },
    {
      "epoch": 4.6119151888873615,
      "grad_norm": 0.14826060831546783,
      "learning_rate": 7.812839756725948e-07,
      "loss": 0.0063,
      "step": 2818120
    },
    {
      "epoch": 4.611947919326015,
      "grad_norm": 0.045394495129585266,
      "learning_rate": 7.812180834590777e-07,
      "loss": 0.0053,
      "step": 2818140
    },
    {
      "epoch": 4.611980649764668,
      "grad_norm": 0.1063794270157814,
      "learning_rate": 7.811521912455605e-07,
      "loss": 0.0101,
      "step": 2818160
    },
    {
      "epoch": 4.612013380203321,
      "grad_norm": 0.13965347409248352,
      "learning_rate": 7.810862990320435e-07,
      "loss": 0.0075,
      "step": 2818180
    },
    {
      "epoch": 4.612046110641975,
      "grad_norm": 0.30273571610450745,
      "learning_rate": 7.810204068185263e-07,
      "loss": 0.0058,
      "step": 2818200
    },
    {
      "epoch": 4.612078841080628,
      "grad_norm": 0.16031256318092346,
      "learning_rate": 7.809545146050091e-07,
      "loss": 0.0085,
      "step": 2818220
    },
    {
      "epoch": 4.612111571519281,
      "grad_norm": 0.09486351907253265,
      "learning_rate": 7.80888622391492e-07,
      "loss": 0.0076,
      "step": 2818240
    },
    {
      "epoch": 4.612144301957935,
      "grad_norm": 0.22714446485042572,
      "learning_rate": 7.80822730177975e-07,
      "loss": 0.0058,
      "step": 2818260
    },
    {
      "epoch": 4.6121770323965885,
      "grad_norm": 0.4032171070575714,
      "learning_rate": 7.807568379644578e-07,
      "loss": 0.0135,
      "step": 2818280
    },
    {
      "epoch": 4.612209762835241,
      "grad_norm": 0.19119885563850403,
      "learning_rate": 7.806909457509406e-07,
      "loss": 0.0059,
      "step": 2818300
    },
    {
      "epoch": 4.612242493273895,
      "grad_norm": 0.4884065091609955,
      "learning_rate": 7.806250535374235e-07,
      "loss": 0.0105,
      "step": 2818320
    },
    {
      "epoch": 4.612275223712548,
      "grad_norm": 0.11902786791324615,
      "learning_rate": 7.805591613239063e-07,
      "loss": 0.0129,
      "step": 2818340
    },
    {
      "epoch": 4.612307954151202,
      "grad_norm": 0.7605702877044678,
      "learning_rate": 7.804932691103894e-07,
      "loss": 0.009,
      "step": 2818360
    },
    {
      "epoch": 4.612340684589855,
      "grad_norm": 0.2797018587589264,
      "learning_rate": 7.804273768968722e-07,
      "loss": 0.0106,
      "step": 2818380
    },
    {
      "epoch": 4.612373415028508,
      "grad_norm": 0.09274394065141678,
      "learning_rate": 7.80361484683355e-07,
      "loss": 0.0078,
      "step": 2818400
    },
    {
      "epoch": 4.612406145467162,
      "grad_norm": 0.1829962581396103,
      "learning_rate": 7.802955924698378e-07,
      "loss": 0.0052,
      "step": 2818420
    },
    {
      "epoch": 4.612438875905815,
      "grad_norm": 0.12725380063056946,
      "learning_rate": 7.802297002563209e-07,
      "loss": 0.0084,
      "step": 2818440
    },
    {
      "epoch": 4.612471606344468,
      "grad_norm": 0.2899704873561859,
      "learning_rate": 7.801638080428037e-07,
      "loss": 0.0092,
      "step": 2818460
    },
    {
      "epoch": 4.612504336783122,
      "grad_norm": 0.09771876037120819,
      "learning_rate": 7.800979158292866e-07,
      "loss": 0.0099,
      "step": 2818480
    },
    {
      "epoch": 4.6125370672217745,
      "grad_norm": 0.0945819616317749,
      "learning_rate": 7.800320236157693e-07,
      "loss": 0.0084,
      "step": 2818500
    },
    {
      "epoch": 4.612569797660428,
      "grad_norm": 0.5477727651596069,
      "learning_rate": 7.799661314022521e-07,
      "loss": 0.0099,
      "step": 2818520
    },
    {
      "epoch": 4.612602528099082,
      "grad_norm": 0.12732279300689697,
      "learning_rate": 7.799002391887352e-07,
      "loss": 0.0115,
      "step": 2818540
    },
    {
      "epoch": 4.612635258537735,
      "grad_norm": 0.3366522490978241,
      "learning_rate": 7.79834346975218e-07,
      "loss": 0.0074,
      "step": 2818560
    },
    {
      "epoch": 4.612667988976388,
      "grad_norm": 0.2199275642633438,
      "learning_rate": 7.797684547617009e-07,
      "loss": 0.0087,
      "step": 2818580
    },
    {
      "epoch": 4.612700719415042,
      "grad_norm": 0.19303776323795319,
      "learning_rate": 7.797025625481837e-07,
      "loss": 0.0082,
      "step": 2818600
    },
    {
      "epoch": 4.612733449853695,
      "grad_norm": 0.23797792196273804,
      "learning_rate": 7.796366703346667e-07,
      "loss": 0.0083,
      "step": 2818620
    },
    {
      "epoch": 4.612766180292349,
      "grad_norm": 0.16875289380550385,
      "learning_rate": 7.795707781211495e-07,
      "loss": 0.0089,
      "step": 2818640
    },
    {
      "epoch": 4.6127989107310015,
      "grad_norm": 0.19374807178974152,
      "learning_rate": 7.795048859076324e-07,
      "loss": 0.0132,
      "step": 2818660
    },
    {
      "epoch": 4.612831641169655,
      "grad_norm": 0.2336595356464386,
      "learning_rate": 7.794389936941152e-07,
      "loss": 0.0117,
      "step": 2818680
    },
    {
      "epoch": 4.612864371608309,
      "grad_norm": 0.3482307195663452,
      "learning_rate": 7.793731014805982e-07,
      "loss": 0.0094,
      "step": 2818700
    },
    {
      "epoch": 4.612897102046961,
      "grad_norm": 0.2874804735183716,
      "learning_rate": 7.79307209267081e-07,
      "loss": 0.0155,
      "step": 2818720
    },
    {
      "epoch": 4.612929832485615,
      "grad_norm": 0.22504901885986328,
      "learning_rate": 7.792413170535639e-07,
      "loss": 0.0093,
      "step": 2818740
    },
    {
      "epoch": 4.6129625629242685,
      "grad_norm": 0.47680217027664185,
      "learning_rate": 7.791754248400467e-07,
      "loss": 0.0114,
      "step": 2818760
    },
    {
      "epoch": 4.612995293362921,
      "grad_norm": 0.3546527922153473,
      "learning_rate": 7.791095326265296e-07,
      "loss": 0.0133,
      "step": 2818780
    },
    {
      "epoch": 4.613028023801575,
      "grad_norm": 0.1801004260778427,
      "learning_rate": 7.790436404130125e-07,
      "loss": 0.007,
      "step": 2818800
    },
    {
      "epoch": 4.613060754240228,
      "grad_norm": 0.2167605608701706,
      "learning_rate": 7.789777481994954e-07,
      "loss": 0.0095,
      "step": 2818820
    },
    {
      "epoch": 4.613093484678882,
      "grad_norm": 0.22007325291633606,
      "learning_rate": 7.789118559859782e-07,
      "loss": 0.0096,
      "step": 2818840
    },
    {
      "epoch": 4.613126215117535,
      "grad_norm": 0.2875828146934509,
      "learning_rate": 7.78845963772461e-07,
      "loss": 0.0079,
      "step": 2818860
    },
    {
      "epoch": 4.613158945556188,
      "grad_norm": 0.2811184823513031,
      "learning_rate": 7.78780071558944e-07,
      "loss": 0.0072,
      "step": 2818880
    },
    {
      "epoch": 4.613191675994842,
      "grad_norm": 0.2859490215778351,
      "learning_rate": 7.787141793454268e-07,
      "loss": 0.0137,
      "step": 2818900
    },
    {
      "epoch": 4.6132244064334955,
      "grad_norm": 0.29413020610809326,
      "learning_rate": 7.786482871319097e-07,
      "loss": 0.0114,
      "step": 2818920
    },
    {
      "epoch": 4.613257136872148,
      "grad_norm": 0.17699959874153137,
      "learning_rate": 7.785823949183925e-07,
      "loss": 0.0072,
      "step": 2818940
    },
    {
      "epoch": 4.613289867310802,
      "grad_norm": 0.422888845205307,
      "learning_rate": 7.785165027048754e-07,
      "loss": 0.0075,
      "step": 2818960
    },
    {
      "epoch": 4.613322597749455,
      "grad_norm": 0.1293618232011795,
      "learning_rate": 7.784506104913583e-07,
      "loss": 0.0129,
      "step": 2818980
    },
    {
      "epoch": 4.613355328188108,
      "grad_norm": 0.16452261805534363,
      "learning_rate": 7.783847182778412e-07,
      "loss": 0.0149,
      "step": 2819000
    },
    {
      "epoch": 4.613388058626762,
      "grad_norm": 0.1333015114068985,
      "learning_rate": 7.78318826064324e-07,
      "loss": 0.0081,
      "step": 2819020
    },
    {
      "epoch": 4.613420789065415,
      "grad_norm": 0.16970884799957275,
      "learning_rate": 7.782529338508069e-07,
      "loss": 0.0076,
      "step": 2819040
    },
    {
      "epoch": 4.613453519504068,
      "grad_norm": 0.23895861208438873,
      "learning_rate": 7.781870416372898e-07,
      "loss": 0.0113,
      "step": 2819060
    },
    {
      "epoch": 4.613486249942722,
      "grad_norm": 0.602159857749939,
      "learning_rate": 7.781211494237727e-07,
      "loss": 0.0108,
      "step": 2819080
    },
    {
      "epoch": 4.613518980381375,
      "grad_norm": 0.5155415534973145,
      "learning_rate": 7.780552572102555e-07,
      "loss": 0.0072,
      "step": 2819100
    },
    {
      "epoch": 4.613551710820029,
      "grad_norm": 0.684647262096405,
      "learning_rate": 7.779893649967384e-07,
      "loss": 0.0092,
      "step": 2819120
    },
    {
      "epoch": 4.6135844412586815,
      "grad_norm": 0.06197405979037285,
      "learning_rate": 7.779234727832213e-07,
      "loss": 0.0065,
      "step": 2819140
    },
    {
      "epoch": 4.613617171697335,
      "grad_norm": 0.6198089718818665,
      "learning_rate": 7.778575805697042e-07,
      "loss": 0.0111,
      "step": 2819160
    },
    {
      "epoch": 4.613649902135989,
      "grad_norm": 0.21207685768604279,
      "learning_rate": 7.77791688356187e-07,
      "loss": 0.0075,
      "step": 2819180
    },
    {
      "epoch": 4.613682632574641,
      "grad_norm": 0.25234168767929077,
      "learning_rate": 7.777257961426698e-07,
      "loss": 0.0077,
      "step": 2819200
    },
    {
      "epoch": 4.613715363013295,
      "grad_norm": 0.3935604989528656,
      "learning_rate": 7.776599039291527e-07,
      "loss": 0.0147,
      "step": 2819220
    },
    {
      "epoch": 4.613748093451949,
      "grad_norm": 0.13943281769752502,
      "learning_rate": 7.775940117156356e-07,
      "loss": 0.0107,
      "step": 2819240
    },
    {
      "epoch": 4.613780823890602,
      "grad_norm": 0.08251699060201645,
      "learning_rate": 7.775281195021185e-07,
      "loss": 0.0095,
      "step": 2819260
    },
    {
      "epoch": 4.613813554329255,
      "grad_norm": 0.3183152675628662,
      "learning_rate": 7.774622272886013e-07,
      "loss": 0.0082,
      "step": 2819280
    },
    {
      "epoch": 4.6138462847679085,
      "grad_norm": 0.35005301237106323,
      "learning_rate": 7.773963350750842e-07,
      "loss": 0.0084,
      "step": 2819300
    },
    {
      "epoch": 4.613879015206562,
      "grad_norm": 0.1505315601825714,
      "learning_rate": 7.773304428615671e-07,
      "loss": 0.0109,
      "step": 2819320
    },
    {
      "epoch": 4.613911745645215,
      "grad_norm": 0.17326152324676514,
      "learning_rate": 7.7726455064805e-07,
      "loss": 0.0112,
      "step": 2819340
    },
    {
      "epoch": 4.613944476083868,
      "grad_norm": 0.22893039882183075,
      "learning_rate": 7.771986584345328e-07,
      "loss": 0.0094,
      "step": 2819360
    },
    {
      "epoch": 4.613977206522522,
      "grad_norm": 0.17652371525764465,
      "learning_rate": 7.771327662210157e-07,
      "loss": 0.0093,
      "step": 2819380
    },
    {
      "epoch": 4.614009936961175,
      "grad_norm": 0.31783047318458557,
      "learning_rate": 7.770668740074985e-07,
      "loss": 0.0137,
      "step": 2819400
    },
    {
      "epoch": 4.614042667399828,
      "grad_norm": 0.26031821966171265,
      "learning_rate": 7.770009817939815e-07,
      "loss": 0.0112,
      "step": 2819420
    },
    {
      "epoch": 4.614075397838482,
      "grad_norm": 0.06657637655735016,
      "learning_rate": 7.769350895804643e-07,
      "loss": 0.0066,
      "step": 2819440
    },
    {
      "epoch": 4.6141081282771355,
      "grad_norm": 0.12620024383068085,
      "learning_rate": 7.768691973669472e-07,
      "loss": 0.0089,
      "step": 2819460
    },
    {
      "epoch": 4.614140858715788,
      "grad_norm": 0.13296695053577423,
      "learning_rate": 7.7680330515343e-07,
      "loss": 0.0106,
      "step": 2819480
    },
    {
      "epoch": 4.614173589154442,
      "grad_norm": 0.1291854828596115,
      "learning_rate": 7.76737412939913e-07,
      "loss": 0.0113,
      "step": 2819500
    },
    {
      "epoch": 4.614206319593095,
      "grad_norm": 0.592040479183197,
      "learning_rate": 7.766715207263958e-07,
      "loss": 0.0067,
      "step": 2819520
    },
    {
      "epoch": 4.614239050031749,
      "grad_norm": 0.27636948227882385,
      "learning_rate": 7.766056285128786e-07,
      "loss": 0.0083,
      "step": 2819540
    },
    {
      "epoch": 4.614271780470402,
      "grad_norm": 0.25940626859664917,
      "learning_rate": 7.765397362993615e-07,
      "loss": 0.0093,
      "step": 2819560
    },
    {
      "epoch": 4.614304510909055,
      "grad_norm": 0.1833762228488922,
      "learning_rate": 7.764738440858446e-07,
      "loss": 0.0095,
      "step": 2819580
    },
    {
      "epoch": 4.614337241347709,
      "grad_norm": 0.5264918804168701,
      "learning_rate": 7.764079518723273e-07,
      "loss": 0.008,
      "step": 2819600
    },
    {
      "epoch": 4.6143699717863615,
      "grad_norm": 0.23236477375030518,
      "learning_rate": 7.763420596588101e-07,
      "loss": 0.0101,
      "step": 2819620
    },
    {
      "epoch": 4.614402702225015,
      "grad_norm": 0.03664378821849823,
      "learning_rate": 7.76276167445293e-07,
      "loss": 0.0094,
      "step": 2819640
    },
    {
      "epoch": 4.614435432663669,
      "grad_norm": 0.15802232921123505,
      "learning_rate": 7.762102752317758e-07,
      "loss": 0.007,
      "step": 2819660
    },
    {
      "epoch": 4.614468163102321,
      "grad_norm": 0.08893034607172012,
      "learning_rate": 7.761443830182589e-07,
      "loss": 0.0084,
      "step": 2819680
    },
    {
      "epoch": 4.614500893540975,
      "grad_norm": 0.5391750931739807,
      "learning_rate": 7.760784908047417e-07,
      "loss": 0.0099,
      "step": 2819700
    },
    {
      "epoch": 4.614533623979629,
      "grad_norm": 0.1280135214328766,
      "learning_rate": 7.760125985912245e-07,
      "loss": 0.0062,
      "step": 2819720
    },
    {
      "epoch": 4.614566354418282,
      "grad_norm": 0.4617941677570343,
      "learning_rate": 7.759467063777073e-07,
      "loss": 0.007,
      "step": 2819740
    },
    {
      "epoch": 4.614599084856935,
      "grad_norm": 0.29701247811317444,
      "learning_rate": 7.758808141641904e-07,
      "loss": 0.0079,
      "step": 2819760
    },
    {
      "epoch": 4.6146318152955885,
      "grad_norm": 0.33060479164123535,
      "learning_rate": 7.758149219506732e-07,
      "loss": 0.012,
      "step": 2819780
    },
    {
      "epoch": 4.614664545734242,
      "grad_norm": 0.21391282975673676,
      "learning_rate": 7.757490297371561e-07,
      "loss": 0.0057,
      "step": 2819800
    },
    {
      "epoch": 4.614697276172896,
      "grad_norm": 0.6196411848068237,
      "learning_rate": 7.756831375236388e-07,
      "loss": 0.0081,
      "step": 2819820
    },
    {
      "epoch": 4.614730006611548,
      "grad_norm": 0.28352078795433044,
      "learning_rate": 7.756172453101219e-07,
      "loss": 0.0078,
      "step": 2819840
    },
    {
      "epoch": 4.614762737050202,
      "grad_norm": 0.42407289147377014,
      "learning_rate": 7.755513530966047e-07,
      "loss": 0.0065,
      "step": 2819860
    },
    {
      "epoch": 4.614795467488856,
      "grad_norm": 0.25675201416015625,
      "learning_rate": 7.754854608830876e-07,
      "loss": 0.008,
      "step": 2819880
    },
    {
      "epoch": 4.614828197927508,
      "grad_norm": 0.5421342849731445,
      "learning_rate": 7.754195686695704e-07,
      "loss": 0.0103,
      "step": 2819900
    },
    {
      "epoch": 4.614860928366162,
      "grad_norm": 0.346336305141449,
      "learning_rate": 7.753536764560532e-07,
      "loss": 0.0128,
      "step": 2819920
    },
    {
      "epoch": 4.6148936588048155,
      "grad_norm": 0.10997852683067322,
      "learning_rate": 7.752877842425362e-07,
      "loss": 0.0103,
      "step": 2819940
    },
    {
      "epoch": 4.614926389243468,
      "grad_norm": 0.33059921860694885,
      "learning_rate": 7.75221892029019e-07,
      "loss": 0.007,
      "step": 2819960
    },
    {
      "epoch": 4.614959119682122,
      "grad_norm": 0.6313457489013672,
      "learning_rate": 7.751559998155019e-07,
      "loss": 0.0137,
      "step": 2819980
    },
    {
      "epoch": 4.614991850120775,
      "grad_norm": 0.7561143636703491,
      "learning_rate": 7.750901076019847e-07,
      "loss": 0.0136,
      "step": 2820000
    },
    {
      "epoch": 4.615024580559429,
      "grad_norm": 0.8336389064788818,
      "learning_rate": 7.750242153884677e-07,
      "loss": 0.0081,
      "step": 2820020
    },
    {
      "epoch": 4.615057310998082,
      "grad_norm": 0.791217029094696,
      "learning_rate": 7.749583231749505e-07,
      "loss": 0.0109,
      "step": 2820040
    },
    {
      "epoch": 4.615090041436735,
      "grad_norm": 0.23331128060817719,
      "learning_rate": 7.748924309614334e-07,
      "loss": 0.009,
      "step": 2820060
    },
    {
      "epoch": 4.615122771875389,
      "grad_norm": 0.5634559988975525,
      "learning_rate": 7.748265387479162e-07,
      "loss": 0.0073,
      "step": 2820080
    },
    {
      "epoch": 4.6151555023140425,
      "grad_norm": 0.21099740266799927,
      "learning_rate": 7.747606465343991e-07,
      "loss": 0.0077,
      "step": 2820100
    },
    {
      "epoch": 4.615188232752695,
      "grad_norm": 0.3838672935962677,
      "learning_rate": 7.74694754320882e-07,
      "loss": 0.0129,
      "step": 2820120
    },
    {
      "epoch": 4.615220963191349,
      "grad_norm": 0.34904447197914124,
      "learning_rate": 7.746288621073649e-07,
      "loss": 0.0101,
      "step": 2820140
    },
    {
      "epoch": 4.615253693630002,
      "grad_norm": 0.10991048067808151,
      "learning_rate": 7.745629698938477e-07,
      "loss": 0.0064,
      "step": 2820160
    },
    {
      "epoch": 4.615286424068655,
      "grad_norm": 0.40309035778045654,
      "learning_rate": 7.744970776803306e-07,
      "loss": 0.008,
      "step": 2820180
    },
    {
      "epoch": 4.615319154507309,
      "grad_norm": 0.2356194406747818,
      "learning_rate": 7.744311854668135e-07,
      "loss": 0.007,
      "step": 2820200
    },
    {
      "epoch": 4.615351884945962,
      "grad_norm": 0.3095290958881378,
      "learning_rate": 7.743652932532964e-07,
      "loss": 0.0109,
      "step": 2820220
    },
    {
      "epoch": 4.615384615384615,
      "grad_norm": 0.4960136413574219,
      "learning_rate": 7.742994010397792e-07,
      "loss": 0.0091,
      "step": 2820240
    },
    {
      "epoch": 4.615417345823269,
      "grad_norm": 0.1443593055009842,
      "learning_rate": 7.74233508826262e-07,
      "loss": 0.0124,
      "step": 2820260
    },
    {
      "epoch": 4.615450076261922,
      "grad_norm": 0.13526734709739685,
      "learning_rate": 7.74167616612745e-07,
      "loss": 0.0077,
      "step": 2820280
    },
    {
      "epoch": 4.615482806700576,
      "grad_norm": 0.41705864667892456,
      "learning_rate": 7.741017243992278e-07,
      "loss": 0.0073,
      "step": 2820300
    },
    {
      "epoch": 4.6155155371392285,
      "grad_norm": 0.516880989074707,
      "learning_rate": 7.740358321857107e-07,
      "loss": 0.009,
      "step": 2820320
    },
    {
      "epoch": 4.615548267577882,
      "grad_norm": 0.2712256610393524,
      "learning_rate": 7.739699399721935e-07,
      "loss": 0.0077,
      "step": 2820340
    },
    {
      "epoch": 4.615580998016536,
      "grad_norm": 0.23273813724517822,
      "learning_rate": 7.739040477586764e-07,
      "loss": 0.008,
      "step": 2820360
    },
    {
      "epoch": 4.615613728455189,
      "grad_norm": 0.1476796716451645,
      "learning_rate": 7.738381555451593e-07,
      "loss": 0.0086,
      "step": 2820380
    },
    {
      "epoch": 4.615646458893842,
      "grad_norm": 0.3194265067577362,
      "learning_rate": 7.737722633316422e-07,
      "loss": 0.0058,
      "step": 2820400
    },
    {
      "epoch": 4.6156791893324955,
      "grad_norm": 0.1533913016319275,
      "learning_rate": 7.73706371118125e-07,
      "loss": 0.0096,
      "step": 2820420
    },
    {
      "epoch": 4.615711919771149,
      "grad_norm": 0.12332333624362946,
      "learning_rate": 7.736404789046079e-07,
      "loss": 0.0116,
      "step": 2820440
    },
    {
      "epoch": 4.615744650209802,
      "grad_norm": 0.18423670530319214,
      "learning_rate": 7.735745866910908e-07,
      "loss": 0.0055,
      "step": 2820460
    },
    {
      "epoch": 4.615777380648455,
      "grad_norm": 0.2012968361377716,
      "learning_rate": 7.735086944775737e-07,
      "loss": 0.0082,
      "step": 2820480
    },
    {
      "epoch": 4.615810111087109,
      "grad_norm": 0.09031710773706436,
      "learning_rate": 7.734428022640565e-07,
      "loss": 0.0073,
      "step": 2820500
    },
    {
      "epoch": 4.615842841525762,
      "grad_norm": 0.3636254668235779,
      "learning_rate": 7.733769100505394e-07,
      "loss": 0.0106,
      "step": 2820520
    },
    {
      "epoch": 4.615875571964415,
      "grad_norm": 0.4066994786262512,
      "learning_rate": 7.733110178370222e-07,
      "loss": 0.0084,
      "step": 2820540
    },
    {
      "epoch": 4.615908302403069,
      "grad_norm": 0.6276490092277527,
      "learning_rate": 7.732451256235052e-07,
      "loss": 0.01,
      "step": 2820560
    },
    {
      "epoch": 4.6159410328417225,
      "grad_norm": 0.2884340286254883,
      "learning_rate": 7.73179233409988e-07,
      "loss": 0.013,
      "step": 2820580
    },
    {
      "epoch": 4.615973763280375,
      "grad_norm": 0.18228530883789062,
      "learning_rate": 7.731133411964708e-07,
      "loss": 0.0087,
      "step": 2820600
    },
    {
      "epoch": 4.616006493719029,
      "grad_norm": 0.04358036071062088,
      "learning_rate": 7.730474489829537e-07,
      "loss": 0.0067,
      "step": 2820620
    },
    {
      "epoch": 4.616039224157682,
      "grad_norm": 0.10977387428283691,
      "learning_rate": 7.729815567694366e-07,
      "loss": 0.0137,
      "step": 2820640
    },
    {
      "epoch": 4.616071954596335,
      "grad_norm": 0.2449573427438736,
      "learning_rate": 7.729156645559195e-07,
      "loss": 0.0097,
      "step": 2820660
    },
    {
      "epoch": 4.616104685034989,
      "grad_norm": 0.4770987331867218,
      "learning_rate": 7.728497723424023e-07,
      "loss": 0.0107,
      "step": 2820680
    },
    {
      "epoch": 4.616137415473642,
      "grad_norm": 0.6925582885742188,
      "learning_rate": 7.727838801288852e-07,
      "loss": 0.0096,
      "step": 2820700
    },
    {
      "epoch": 4.616170145912296,
      "grad_norm": 0.17903205752372742,
      "learning_rate": 7.727179879153681e-07,
      "loss": 0.0071,
      "step": 2820720
    },
    {
      "epoch": 4.616202876350949,
      "grad_norm": 0.2245214581489563,
      "learning_rate": 7.72652095701851e-07,
      "loss": 0.0087,
      "step": 2820740
    },
    {
      "epoch": 4.616235606789602,
      "grad_norm": 0.20048865675926208,
      "learning_rate": 7.725862034883338e-07,
      "loss": 0.0063,
      "step": 2820760
    },
    {
      "epoch": 4.616268337228256,
      "grad_norm": 0.1971864104270935,
      "learning_rate": 7.725203112748167e-07,
      "loss": 0.0108,
      "step": 2820780
    },
    {
      "epoch": 4.6163010676669085,
      "grad_norm": 0.3609793484210968,
      "learning_rate": 7.724544190612995e-07,
      "loss": 0.0078,
      "step": 2820800
    },
    {
      "epoch": 4.616333798105562,
      "grad_norm": 0.27197012305259705,
      "learning_rate": 7.723885268477825e-07,
      "loss": 0.0091,
      "step": 2820820
    },
    {
      "epoch": 4.616366528544216,
      "grad_norm": 0.2815014719963074,
      "learning_rate": 7.723226346342653e-07,
      "loss": 0.0068,
      "step": 2820840
    },
    {
      "epoch": 4.616399258982868,
      "grad_norm": 0.15758344531059265,
      "learning_rate": 7.722567424207482e-07,
      "loss": 0.0063,
      "step": 2820860
    },
    {
      "epoch": 4.616431989421522,
      "grad_norm": 0.15023493766784668,
      "learning_rate": 7.72190850207231e-07,
      "loss": 0.0086,
      "step": 2820880
    },
    {
      "epoch": 4.616464719860176,
      "grad_norm": 0.12857213616371155,
      "learning_rate": 7.721249579937141e-07,
      "loss": 0.0089,
      "step": 2820900
    },
    {
      "epoch": 4.616497450298829,
      "grad_norm": 0.11353643238544464,
      "learning_rate": 7.720590657801968e-07,
      "loss": 0.0058,
      "step": 2820920
    },
    {
      "epoch": 4.616530180737482,
      "grad_norm": 0.21179232001304626,
      "learning_rate": 7.719931735666796e-07,
      "loss": 0.0073,
      "step": 2820940
    },
    {
      "epoch": 4.6165629111761355,
      "grad_norm": 0.043710242956876755,
      "learning_rate": 7.719272813531625e-07,
      "loss": 0.0091,
      "step": 2820960
    },
    {
      "epoch": 4.616595641614789,
      "grad_norm": 0.2103765904903412,
      "learning_rate": 7.718613891396453e-07,
      "loss": 0.0141,
      "step": 2820980
    },
    {
      "epoch": 4.616628372053443,
      "grad_norm": 0.5052838921546936,
      "learning_rate": 7.717954969261284e-07,
      "loss": 0.0103,
      "step": 2821000
    },
    {
      "epoch": 4.616661102492095,
      "grad_norm": 0.24105088412761688,
      "learning_rate": 7.717296047126112e-07,
      "loss": 0.006,
      "step": 2821020
    },
    {
      "epoch": 4.616693832930749,
      "grad_norm": 0.24527229368686676,
      "learning_rate": 7.71663712499094e-07,
      "loss": 0.0105,
      "step": 2821040
    },
    {
      "epoch": 4.616726563369403,
      "grad_norm": 0.1837916374206543,
      "learning_rate": 7.715978202855768e-07,
      "loss": 0.0109,
      "step": 2821060
    },
    {
      "epoch": 4.616759293808055,
      "grad_norm": 0.07659454643726349,
      "learning_rate": 7.715319280720599e-07,
      "loss": 0.0106,
      "step": 2821080
    },
    {
      "epoch": 4.616792024246709,
      "grad_norm": 0.17887941002845764,
      "learning_rate": 7.714660358585427e-07,
      "loss": 0.0122,
      "step": 2821100
    },
    {
      "epoch": 4.6168247546853625,
      "grad_norm": 0.09834009408950806,
      "learning_rate": 7.714001436450256e-07,
      "loss": 0.0072,
      "step": 2821120
    },
    {
      "epoch": 4.616857485124015,
      "grad_norm": 0.28381800651550293,
      "learning_rate": 7.713342514315084e-07,
      "loss": 0.0068,
      "step": 2821140
    },
    {
      "epoch": 4.616890215562669,
      "grad_norm": 0.40582361817359924,
      "learning_rate": 7.712683592179914e-07,
      "loss": 0.0092,
      "step": 2821160
    },
    {
      "epoch": 4.616922946001322,
      "grad_norm": 0.5725240111351013,
      "learning_rate": 7.712024670044742e-07,
      "loss": 0.0113,
      "step": 2821180
    },
    {
      "epoch": 4.616955676439976,
      "grad_norm": 0.07992397248744965,
      "learning_rate": 7.711365747909571e-07,
      "loss": 0.0094,
      "step": 2821200
    },
    {
      "epoch": 4.616988406878629,
      "grad_norm": 0.15015926957130432,
      "learning_rate": 7.710706825774399e-07,
      "loss": 0.0092,
      "step": 2821220
    },
    {
      "epoch": 4.617021137317282,
      "grad_norm": 0.1444227397441864,
      "learning_rate": 7.710047903639227e-07,
      "loss": 0.0107,
      "step": 2821240
    },
    {
      "epoch": 4.617053867755936,
      "grad_norm": 0.20403698086738586,
      "learning_rate": 7.709388981504057e-07,
      "loss": 0.0057,
      "step": 2821260
    },
    {
      "epoch": 4.617086598194589,
      "grad_norm": 0.5080286264419556,
      "learning_rate": 7.708730059368885e-07,
      "loss": 0.0112,
      "step": 2821280
    },
    {
      "epoch": 4.617119328633242,
      "grad_norm": 0.09909732639789581,
      "learning_rate": 7.708071137233714e-07,
      "loss": 0.0083,
      "step": 2821300
    },
    {
      "epoch": 4.617152059071896,
      "grad_norm": 0.12081616371870041,
      "learning_rate": 7.707412215098542e-07,
      "loss": 0.0085,
      "step": 2821320
    },
    {
      "epoch": 4.617184789510549,
      "grad_norm": 0.2711997926235199,
      "learning_rate": 7.706753292963372e-07,
      "loss": 0.0147,
      "step": 2821340
    },
    {
      "epoch": 4.617217519949202,
      "grad_norm": 0.07832023501396179,
      "learning_rate": 7.7060943708282e-07,
      "loss": 0.0056,
      "step": 2821360
    },
    {
      "epoch": 4.617250250387856,
      "grad_norm": 0.286309152841568,
      "learning_rate": 7.705435448693029e-07,
      "loss": 0.0073,
      "step": 2821380
    },
    {
      "epoch": 4.617282980826509,
      "grad_norm": 0.07221000641584396,
      "learning_rate": 7.704776526557857e-07,
      "loss": 0.0119,
      "step": 2821400
    },
    {
      "epoch": 4.617315711265162,
      "grad_norm": 0.13200710713863373,
      "learning_rate": 7.704117604422686e-07,
      "loss": 0.0076,
      "step": 2821420
    },
    {
      "epoch": 4.6173484417038155,
      "grad_norm": 0.13264571130275726,
      "learning_rate": 7.703458682287515e-07,
      "loss": 0.0077,
      "step": 2821440
    },
    {
      "epoch": 4.617381172142469,
      "grad_norm": 0.10767406225204468,
      "learning_rate": 7.702799760152344e-07,
      "loss": 0.0065,
      "step": 2821460
    },
    {
      "epoch": 4.617413902581123,
      "grad_norm": 0.32481417059898376,
      "learning_rate": 7.702140838017172e-07,
      "loss": 0.0076,
      "step": 2821480
    },
    {
      "epoch": 4.617446633019775,
      "grad_norm": 0.384450763463974,
      "learning_rate": 7.701481915882001e-07,
      "loss": 0.0047,
      "step": 2821500
    },
    {
      "epoch": 4.617479363458429,
      "grad_norm": 0.23684631288051605,
      "learning_rate": 7.70082299374683e-07,
      "loss": 0.0074,
      "step": 2821520
    },
    {
      "epoch": 4.617512093897083,
      "grad_norm": 0.5658597350120544,
      "learning_rate": 7.700164071611659e-07,
      "loss": 0.0155,
      "step": 2821540
    },
    {
      "epoch": 4.617544824335736,
      "grad_norm": 0.17961136996746063,
      "learning_rate": 7.699505149476487e-07,
      "loss": 0.0073,
      "step": 2821560
    },
    {
      "epoch": 4.617577554774389,
      "grad_norm": 0.12268149852752686,
      "learning_rate": 7.698846227341315e-07,
      "loss": 0.0095,
      "step": 2821580
    },
    {
      "epoch": 4.6176102852130425,
      "grad_norm": 1.8526517152786255,
      "learning_rate": 7.698187305206145e-07,
      "loss": 0.0064,
      "step": 2821600
    },
    {
      "epoch": 4.617643015651696,
      "grad_norm": 0.1976800560951233,
      "learning_rate": 7.697528383070973e-07,
      "loss": 0.0122,
      "step": 2821620
    },
    {
      "epoch": 4.617675746090349,
      "grad_norm": 0.2785957157611847,
      "learning_rate": 7.696869460935802e-07,
      "loss": 0.0075,
      "step": 2821640
    },
    {
      "epoch": 4.617708476529002,
      "grad_norm": 0.22220851480960846,
      "learning_rate": 7.69621053880063e-07,
      "loss": 0.008,
      "step": 2821660
    },
    {
      "epoch": 4.617741206967656,
      "grad_norm": 0.26918330788612366,
      "learning_rate": 7.695551616665459e-07,
      "loss": 0.0058,
      "step": 2821680
    },
    {
      "epoch": 4.617773937406309,
      "grad_norm": 0.3728850185871124,
      "learning_rate": 7.694892694530288e-07,
      "loss": 0.0073,
      "step": 2821700
    },
    {
      "epoch": 4.617806667844962,
      "grad_norm": 0.1355637162923813,
      "learning_rate": 7.694233772395117e-07,
      "loss": 0.0089,
      "step": 2821720
    },
    {
      "epoch": 4.617839398283616,
      "grad_norm": 0.03211232274770737,
      "learning_rate": 7.693574850259945e-07,
      "loss": 0.0064,
      "step": 2821740
    },
    {
      "epoch": 4.6178721287222695,
      "grad_norm": 0.3044460117816925,
      "learning_rate": 7.692915928124774e-07,
      "loss": 0.0099,
      "step": 2821760
    },
    {
      "epoch": 4.617904859160922,
      "grad_norm": 0.28072917461395264,
      "learning_rate": 7.692257005989603e-07,
      "loss": 0.0067,
      "step": 2821780
    },
    {
      "epoch": 4.617937589599576,
      "grad_norm": 0.36443570256233215,
      "learning_rate": 7.691598083854432e-07,
      "loss": 0.0082,
      "step": 2821800
    },
    {
      "epoch": 4.617970320038229,
      "grad_norm": 0.08546185493469238,
      "learning_rate": 7.69093916171926e-07,
      "loss": 0.0139,
      "step": 2821820
    },
    {
      "epoch": 4.618003050476883,
      "grad_norm": 0.30184847116470337,
      "learning_rate": 7.690280239584089e-07,
      "loss": 0.0089,
      "step": 2821840
    },
    {
      "epoch": 4.618035780915536,
      "grad_norm": 0.2534646689891815,
      "learning_rate": 7.689621317448917e-07,
      "loss": 0.0089,
      "step": 2821860
    },
    {
      "epoch": 4.618068511354189,
      "grad_norm": 0.14219456911087036,
      "learning_rate": 7.688962395313747e-07,
      "loss": 0.0066,
      "step": 2821880
    },
    {
      "epoch": 4.618101241792843,
      "grad_norm": 0.14116665720939636,
      "learning_rate": 7.688303473178575e-07,
      "loss": 0.0057,
      "step": 2821900
    },
    {
      "epoch": 4.618133972231496,
      "grad_norm": 0.2258831262588501,
      "learning_rate": 7.687644551043403e-07,
      "loss": 0.0073,
      "step": 2821920
    },
    {
      "epoch": 4.618166702670149,
      "grad_norm": 0.031205998733639717,
      "learning_rate": 7.686985628908232e-07,
      "loss": 0.0097,
      "step": 2821940
    },
    {
      "epoch": 4.618199433108803,
      "grad_norm": 0.13594882190227509,
      "learning_rate": 7.686326706773061e-07,
      "loss": 0.0058,
      "step": 2821960
    },
    {
      "epoch": 4.6182321635474555,
      "grad_norm": 0.15938127040863037,
      "learning_rate": 7.68566778463789e-07,
      "loss": 0.0123,
      "step": 2821980
    },
    {
      "epoch": 4.618264893986109,
      "grad_norm": 0.28043732047080994,
      "learning_rate": 7.685008862502718e-07,
      "loss": 0.0085,
      "step": 2822000
    },
    {
      "epoch": 4.618297624424763,
      "grad_norm": 0.13143348693847656,
      "learning_rate": 7.684349940367547e-07,
      "loss": 0.0114,
      "step": 2822020
    },
    {
      "epoch": 4.618330354863416,
      "grad_norm": 0.13639813661575317,
      "learning_rate": 7.683691018232376e-07,
      "loss": 0.0097,
      "step": 2822040
    },
    {
      "epoch": 4.618363085302069,
      "grad_norm": 0.12505142390727997,
      "learning_rate": 7.683032096097205e-07,
      "loss": 0.0056,
      "step": 2822060
    },
    {
      "epoch": 4.6183958157407226,
      "grad_norm": 0.28127533197402954,
      "learning_rate": 7.682373173962033e-07,
      "loss": 0.011,
      "step": 2822080
    },
    {
      "epoch": 4.618428546179376,
      "grad_norm": 0.24111583828926086,
      "learning_rate": 7.681714251826862e-07,
      "loss": 0.0109,
      "step": 2822100
    },
    {
      "epoch": 4.618461276618029,
      "grad_norm": 0.09859953820705414,
      "learning_rate": 7.68105532969169e-07,
      "loss": 0.0136,
      "step": 2822120
    },
    {
      "epoch": 4.6184940070566824,
      "grad_norm": 0.3243582546710968,
      "learning_rate": 7.68039640755652e-07,
      "loss": 0.0054,
      "step": 2822140
    },
    {
      "epoch": 4.618526737495336,
      "grad_norm": 0.17000111937522888,
      "learning_rate": 7.679737485421348e-07,
      "loss": 0.01,
      "step": 2822160
    },
    {
      "epoch": 4.61855946793399,
      "grad_norm": 0.39467138051986694,
      "learning_rate": 7.679078563286177e-07,
      "loss": 0.0089,
      "step": 2822180
    },
    {
      "epoch": 4.618592198372642,
      "grad_norm": 0.28256985545158386,
      "learning_rate": 7.678419641151005e-07,
      "loss": 0.0053,
      "step": 2822200
    },
    {
      "epoch": 4.618624928811296,
      "grad_norm": 0.10763251036405563,
      "learning_rate": 7.677760719015836e-07,
      "loss": 0.0074,
      "step": 2822220
    },
    {
      "epoch": 4.6186576592499495,
      "grad_norm": 0.17980138957500458,
      "learning_rate": 7.677101796880663e-07,
      "loss": 0.0067,
      "step": 2822240
    },
    {
      "epoch": 4.618690389688602,
      "grad_norm": 0.16239389777183533,
      "learning_rate": 7.676442874745491e-07,
      "loss": 0.0092,
      "step": 2822260
    },
    {
      "epoch": 4.618723120127256,
      "grad_norm": 0.6221715211868286,
      "learning_rate": 7.67578395261032e-07,
      "loss": 0.0147,
      "step": 2822280
    },
    {
      "epoch": 4.618755850565909,
      "grad_norm": 0.21985654532909393,
      "learning_rate": 7.675125030475148e-07,
      "loss": 0.0118,
      "step": 2822300
    },
    {
      "epoch": 4.618788581004562,
      "grad_norm": 0.11290936917066574,
      "learning_rate": 7.674466108339979e-07,
      "loss": 0.011,
      "step": 2822320
    },
    {
      "epoch": 4.618821311443216,
      "grad_norm": 0.13952341675758362,
      "learning_rate": 7.673807186204807e-07,
      "loss": 0.0149,
      "step": 2822340
    },
    {
      "epoch": 4.618854041881869,
      "grad_norm": 0.24435101449489594,
      "learning_rate": 7.673148264069635e-07,
      "loss": 0.0088,
      "step": 2822360
    },
    {
      "epoch": 4.618886772320523,
      "grad_norm": 0.44804325699806213,
      "learning_rate": 7.672489341934463e-07,
      "loss": 0.0103,
      "step": 2822380
    },
    {
      "epoch": 4.618919502759176,
      "grad_norm": 0.25755998492240906,
      "learning_rate": 7.671830419799294e-07,
      "loss": 0.0069,
      "step": 2822400
    },
    {
      "epoch": 4.618952233197829,
      "grad_norm": 0.37685152888298035,
      "learning_rate": 7.671171497664122e-07,
      "loss": 0.0103,
      "step": 2822420
    },
    {
      "epoch": 4.618984963636483,
      "grad_norm": 0.27865636348724365,
      "learning_rate": 7.670512575528951e-07,
      "loss": 0.0074,
      "step": 2822440
    },
    {
      "epoch": 4.619017694075136,
      "grad_norm": 0.2526606619358063,
      "learning_rate": 7.669853653393779e-07,
      "loss": 0.0126,
      "step": 2822460
    },
    {
      "epoch": 4.619050424513789,
      "grad_norm": 0.7502354979515076,
      "learning_rate": 7.669194731258609e-07,
      "loss": 0.0132,
      "step": 2822480
    },
    {
      "epoch": 4.619083154952443,
      "grad_norm": 0.06143471598625183,
      "learning_rate": 7.668535809123437e-07,
      "loss": 0.0105,
      "step": 2822500
    },
    {
      "epoch": 4.619115885391096,
      "grad_norm": 0.2865391969680786,
      "learning_rate": 7.667876886988266e-07,
      "loss": 0.0105,
      "step": 2822520
    },
    {
      "epoch": 4.619148615829749,
      "grad_norm": 0.21817909181118011,
      "learning_rate": 7.667217964853094e-07,
      "loss": 0.0077,
      "step": 2822540
    },
    {
      "epoch": 4.619181346268403,
      "grad_norm": 0.2792390286922455,
      "learning_rate": 7.666559042717923e-07,
      "loss": 0.0099,
      "step": 2822560
    },
    {
      "epoch": 4.619214076707056,
      "grad_norm": 0.09805063158273697,
      "learning_rate": 7.665900120582752e-07,
      "loss": 0.0114,
      "step": 2822580
    },
    {
      "epoch": 4.619246807145709,
      "grad_norm": 0.03771897032856941,
      "learning_rate": 7.66524119844758e-07,
      "loss": 0.0073,
      "step": 2822600
    },
    {
      "epoch": 4.6192795375843625,
      "grad_norm": 0.19092050194740295,
      "learning_rate": 7.664582276312409e-07,
      "loss": 0.006,
      "step": 2822620
    },
    {
      "epoch": 4.619312268023016,
      "grad_norm": 0.3618412911891937,
      "learning_rate": 7.663923354177237e-07,
      "loss": 0.0084,
      "step": 2822640
    },
    {
      "epoch": 4.61934499846167,
      "grad_norm": 0.17079879343509674,
      "learning_rate": 7.663264432042067e-07,
      "loss": 0.0105,
      "step": 2822660
    },
    {
      "epoch": 4.619377728900322,
      "grad_norm": 0.32036155462265015,
      "learning_rate": 7.662605509906895e-07,
      "loss": 0.0113,
      "step": 2822680
    },
    {
      "epoch": 4.619410459338976,
      "grad_norm": 0.2605784237384796,
      "learning_rate": 7.661946587771724e-07,
      "loss": 0.0067,
      "step": 2822700
    },
    {
      "epoch": 4.61944318977763,
      "grad_norm": 0.1487230509519577,
      "learning_rate": 7.661287665636552e-07,
      "loss": 0.0069,
      "step": 2822720
    },
    {
      "epoch": 4.619475920216283,
      "grad_norm": 0.15302127599716187,
      "learning_rate": 7.660628743501381e-07,
      "loss": 0.0113,
      "step": 2822740
    },
    {
      "epoch": 4.619508650654936,
      "grad_norm": 0.23696237802505493,
      "learning_rate": 7.65996982136621e-07,
      "loss": 0.0059,
      "step": 2822760
    },
    {
      "epoch": 4.6195413810935895,
      "grad_norm": 0.2676827013492584,
      "learning_rate": 7.659310899231039e-07,
      "loss": 0.0117,
      "step": 2822780
    },
    {
      "epoch": 4.619574111532243,
      "grad_norm": 0.3965270221233368,
      "learning_rate": 7.658651977095867e-07,
      "loss": 0.0103,
      "step": 2822800
    },
    {
      "epoch": 4.619606841970896,
      "grad_norm": 0.15718486905097961,
      "learning_rate": 7.657993054960696e-07,
      "loss": 0.0054,
      "step": 2822820
    },
    {
      "epoch": 4.619639572409549,
      "grad_norm": 0.0740927904844284,
      "learning_rate": 7.657334132825525e-07,
      "loss": 0.0082,
      "step": 2822840
    },
    {
      "epoch": 4.619672302848203,
      "grad_norm": 0.18057185411453247,
      "learning_rate": 7.656675210690354e-07,
      "loss": 0.0119,
      "step": 2822860
    },
    {
      "epoch": 4.619705033286856,
      "grad_norm": 0.24128559231758118,
      "learning_rate": 7.656016288555182e-07,
      "loss": 0.0076,
      "step": 2822880
    },
    {
      "epoch": 4.619737763725509,
      "grad_norm": 0.11790355294942856,
      "learning_rate": 7.655357366420011e-07,
      "loss": 0.0101,
      "step": 2822900
    },
    {
      "epoch": 4.619770494164163,
      "grad_norm": 0.19755342602729797,
      "learning_rate": 7.65469844428484e-07,
      "loss": 0.0105,
      "step": 2822920
    },
    {
      "epoch": 4.6198032246028164,
      "grad_norm": 0.28634804487228394,
      "learning_rate": 7.654039522149669e-07,
      "loss": 0.0108,
      "step": 2822940
    },
    {
      "epoch": 4.619835955041469,
      "grad_norm": 0.3872687518596649,
      "learning_rate": 7.653380600014497e-07,
      "loss": 0.0088,
      "step": 2822960
    },
    {
      "epoch": 4.619868685480123,
      "grad_norm": 0.19708853960037231,
      "learning_rate": 7.652721677879325e-07,
      "loss": 0.0099,
      "step": 2822980
    },
    {
      "epoch": 4.619901415918776,
      "grad_norm": 0.10005810111761093,
      "learning_rate": 7.652062755744154e-07,
      "loss": 0.0075,
      "step": 2823000
    },
    {
      "epoch": 4.61993414635743,
      "grad_norm": 0.39528700709342957,
      "learning_rate": 7.651403833608983e-07,
      "loss": 0.0144,
      "step": 2823020
    },
    {
      "epoch": 4.619966876796083,
      "grad_norm": 0.5466679334640503,
      "learning_rate": 7.650744911473812e-07,
      "loss": 0.0101,
      "step": 2823040
    },
    {
      "epoch": 4.619999607234736,
      "grad_norm": 0.2349262684583664,
      "learning_rate": 7.65008598933864e-07,
      "loss": 0.0077,
      "step": 2823060
    },
    {
      "epoch": 4.62003233767339,
      "grad_norm": 0.16985641419887543,
      "learning_rate": 7.649427067203469e-07,
      "loss": 0.0079,
      "step": 2823080
    },
    {
      "epoch": 4.6200650681120425,
      "grad_norm": 0.06681902706623077,
      "learning_rate": 7.648768145068298e-07,
      "loss": 0.0066,
      "step": 2823100
    },
    {
      "epoch": 4.620097798550696,
      "grad_norm": 0.5612983703613281,
      "learning_rate": 7.648109222933127e-07,
      "loss": 0.0079,
      "step": 2823120
    },
    {
      "epoch": 4.62013052898935,
      "grad_norm": 0.13070769608020782,
      "learning_rate": 7.647450300797955e-07,
      "loss": 0.0076,
      "step": 2823140
    },
    {
      "epoch": 4.620163259428002,
      "grad_norm": 0.25108227133750916,
      "learning_rate": 7.646791378662784e-07,
      "loss": 0.0075,
      "step": 2823160
    },
    {
      "epoch": 4.620195989866656,
      "grad_norm": 0.10836881399154663,
      "learning_rate": 7.646132456527612e-07,
      "loss": 0.0077,
      "step": 2823180
    },
    {
      "epoch": 4.62022872030531,
      "grad_norm": 0.22740091383457184,
      "learning_rate": 7.645473534392442e-07,
      "loss": 0.0062,
      "step": 2823200
    },
    {
      "epoch": 4.620261450743963,
      "grad_norm": 0.6775224208831787,
      "learning_rate": 7.64481461225727e-07,
      "loss": 0.0087,
      "step": 2823220
    },
    {
      "epoch": 4.620294181182616,
      "grad_norm": 0.22359248995780945,
      "learning_rate": 7.644155690122099e-07,
      "loss": 0.0157,
      "step": 2823240
    },
    {
      "epoch": 4.6203269116212695,
      "grad_norm": 0.5305023789405823,
      "learning_rate": 7.643496767986927e-07,
      "loss": 0.0076,
      "step": 2823260
    },
    {
      "epoch": 4.620359642059923,
      "grad_norm": 0.2554190456867218,
      "learning_rate": 7.642837845851757e-07,
      "loss": 0.009,
      "step": 2823280
    },
    {
      "epoch": 4.620392372498577,
      "grad_norm": 0.09999942034482956,
      "learning_rate": 7.642178923716585e-07,
      "loss": 0.0186,
      "step": 2823300
    },
    {
      "epoch": 4.620425102937229,
      "grad_norm": 0.1902671754360199,
      "learning_rate": 7.641520001581413e-07,
      "loss": 0.0076,
      "step": 2823320
    },
    {
      "epoch": 4.620457833375883,
      "grad_norm": 0.12281478196382523,
      "learning_rate": 7.640861079446242e-07,
      "loss": 0.0094,
      "step": 2823340
    },
    {
      "epoch": 4.620490563814537,
      "grad_norm": 0.15575626492500305,
      "learning_rate": 7.640202157311071e-07,
      "loss": 0.0099,
      "step": 2823360
    },
    {
      "epoch": 4.620523294253189,
      "grad_norm": 0.5310176610946655,
      "learning_rate": 7.6395432351759e-07,
      "loss": 0.0129,
      "step": 2823380
    },
    {
      "epoch": 4.620556024691843,
      "grad_norm": 0.059993281960487366,
      "learning_rate": 7.638884313040728e-07,
      "loss": 0.0066,
      "step": 2823400
    },
    {
      "epoch": 4.6205887551304965,
      "grad_norm": 0.17459237575531006,
      "learning_rate": 7.638225390905557e-07,
      "loss": 0.0096,
      "step": 2823420
    },
    {
      "epoch": 4.620621485569149,
      "grad_norm": 0.24176368117332458,
      "learning_rate": 7.637566468770385e-07,
      "loss": 0.0105,
      "step": 2823440
    },
    {
      "epoch": 4.620654216007803,
      "grad_norm": 0.22592751681804657,
      "learning_rate": 7.636907546635215e-07,
      "loss": 0.0081,
      "step": 2823460
    },
    {
      "epoch": 4.620686946446456,
      "grad_norm": 0.17164260149002075,
      "learning_rate": 7.636248624500043e-07,
      "loss": 0.0074,
      "step": 2823480
    },
    {
      "epoch": 4.62071967688511,
      "grad_norm": 0.24274033308029175,
      "learning_rate": 7.635589702364872e-07,
      "loss": 0.0067,
      "step": 2823500
    },
    {
      "epoch": 4.620752407323763,
      "grad_norm": 0.6429798007011414,
      "learning_rate": 7.6349307802297e-07,
      "loss": 0.0084,
      "step": 2823520
    },
    {
      "epoch": 4.620785137762416,
      "grad_norm": 0.13461416959762573,
      "learning_rate": 7.634271858094531e-07,
      "loss": 0.0074,
      "step": 2823540
    },
    {
      "epoch": 4.62081786820107,
      "grad_norm": 0.2224999964237213,
      "learning_rate": 7.633612935959358e-07,
      "loss": 0.0071,
      "step": 2823560
    },
    {
      "epoch": 4.620850598639723,
      "grad_norm": 0.28471407294273376,
      "learning_rate": 7.632954013824187e-07,
      "loss": 0.0086,
      "step": 2823580
    },
    {
      "epoch": 4.620883329078376,
      "grad_norm": 0.15322867035865784,
      "learning_rate": 7.632295091689015e-07,
      "loss": 0.0054,
      "step": 2823600
    },
    {
      "epoch": 4.62091605951703,
      "grad_norm": 0.34590986371040344,
      "learning_rate": 7.631636169553843e-07,
      "loss": 0.0099,
      "step": 2823620
    },
    {
      "epoch": 4.620948789955683,
      "grad_norm": 0.17089343070983887,
      "learning_rate": 7.630977247418674e-07,
      "loss": 0.011,
      "step": 2823640
    },
    {
      "epoch": 4.620981520394336,
      "grad_norm": 0.05441839620471001,
      "learning_rate": 7.630318325283502e-07,
      "loss": 0.0079,
      "step": 2823660
    },
    {
      "epoch": 4.62101425083299,
      "grad_norm": 0.5586785078048706,
      "learning_rate": 7.62965940314833e-07,
      "loss": 0.0082,
      "step": 2823680
    },
    {
      "epoch": 4.621046981271643,
      "grad_norm": 0.2622586786746979,
      "learning_rate": 7.629000481013158e-07,
      "loss": 0.009,
      "step": 2823700
    },
    {
      "epoch": 4.621079711710296,
      "grad_norm": 0.15284812450408936,
      "learning_rate": 7.628341558877989e-07,
      "loss": 0.0134,
      "step": 2823720
    },
    {
      "epoch": 4.62111244214895,
      "grad_norm": 0.12907077372074127,
      "learning_rate": 7.627682636742817e-07,
      "loss": 0.0097,
      "step": 2823740
    },
    {
      "epoch": 4.621145172587603,
      "grad_norm": 0.5263092517852783,
      "learning_rate": 7.627023714607646e-07,
      "loss": 0.0071,
      "step": 2823760
    },
    {
      "epoch": 4.621177903026256,
      "grad_norm": 0.2260211557149887,
      "learning_rate": 7.626364792472474e-07,
      "loss": 0.0098,
      "step": 2823780
    },
    {
      "epoch": 4.6212106334649095,
      "grad_norm": 0.04861295223236084,
      "learning_rate": 7.625705870337304e-07,
      "loss": 0.0083,
      "step": 2823800
    },
    {
      "epoch": 4.621243363903563,
      "grad_norm": 0.25490516424179077,
      "learning_rate": 7.625046948202132e-07,
      "loss": 0.0098,
      "step": 2823820
    },
    {
      "epoch": 4.621276094342217,
      "grad_norm": 0.07097935676574707,
      "learning_rate": 7.624388026066961e-07,
      "loss": 0.0071,
      "step": 2823840
    },
    {
      "epoch": 4.621308824780869,
      "grad_norm": 0.1962963491678238,
      "learning_rate": 7.623729103931789e-07,
      "loss": 0.0073,
      "step": 2823860
    },
    {
      "epoch": 4.621341555219523,
      "grad_norm": 0.3700273036956787,
      "learning_rate": 7.623070181796618e-07,
      "loss": 0.0048,
      "step": 2823880
    },
    {
      "epoch": 4.6213742856581765,
      "grad_norm": 0.08273399621248245,
      "learning_rate": 7.622411259661447e-07,
      "loss": 0.0093,
      "step": 2823900
    },
    {
      "epoch": 4.62140701609683,
      "grad_norm": 0.2883513867855072,
      "learning_rate": 7.621752337526276e-07,
      "loss": 0.0095,
      "step": 2823920
    },
    {
      "epoch": 4.621439746535483,
      "grad_norm": 0.14719805121421814,
      "learning_rate": 7.621093415391104e-07,
      "loss": 0.0061,
      "step": 2823940
    },
    {
      "epoch": 4.621472476974136,
      "grad_norm": 0.3395567238330841,
      "learning_rate": 7.620434493255932e-07,
      "loss": 0.0142,
      "step": 2823960
    },
    {
      "epoch": 4.62150520741279,
      "grad_norm": 1.0849508047103882,
      "learning_rate": 7.619775571120762e-07,
      "loss": 0.0082,
      "step": 2823980
    },
    {
      "epoch": 4.621537937851443,
      "grad_norm": 0.2201738953590393,
      "learning_rate": 7.61911664898559e-07,
      "loss": 0.0087,
      "step": 2824000
    },
    {
      "epoch": 4.621570668290096,
      "grad_norm": 0.11415282636880875,
      "learning_rate": 7.618457726850419e-07,
      "loss": 0.0095,
      "step": 2824020
    },
    {
      "epoch": 4.62160339872875,
      "grad_norm": 0.08805975317955017,
      "learning_rate": 7.617798804715247e-07,
      "loss": 0.011,
      "step": 2824040
    },
    {
      "epoch": 4.621636129167403,
      "grad_norm": 0.12235507369041443,
      "learning_rate": 7.617139882580076e-07,
      "loss": 0.0089,
      "step": 2824060
    },
    {
      "epoch": 4.621668859606056,
      "grad_norm": 0.3326989710330963,
      "learning_rate": 7.616480960444905e-07,
      "loss": 0.0077,
      "step": 2824080
    },
    {
      "epoch": 4.62170159004471,
      "grad_norm": 0.13832026720046997,
      "learning_rate": 7.615822038309734e-07,
      "loss": 0.008,
      "step": 2824100
    },
    {
      "epoch": 4.621734320483363,
      "grad_norm": 0.22227603197097778,
      "learning_rate": 7.615163116174562e-07,
      "loss": 0.0077,
      "step": 2824120
    },
    {
      "epoch": 4.621767050922016,
      "grad_norm": 0.6506562232971191,
      "learning_rate": 7.614504194039391e-07,
      "loss": 0.0144,
      "step": 2824140
    },
    {
      "epoch": 4.62179978136067,
      "grad_norm": 0.1297825425863266,
      "learning_rate": 7.61384527190422e-07,
      "loss": 0.008,
      "step": 2824160
    },
    {
      "epoch": 4.621832511799323,
      "grad_norm": 0.037140969187021255,
      "learning_rate": 7.613186349769049e-07,
      "loss": 0.0048,
      "step": 2824180
    },
    {
      "epoch": 4.621865242237977,
      "grad_norm": 0.20213019847869873,
      "learning_rate": 7.612527427633877e-07,
      "loss": 0.0106,
      "step": 2824200
    },
    {
      "epoch": 4.62189797267663,
      "grad_norm": 0.10423468798398972,
      "learning_rate": 7.611868505498706e-07,
      "loss": 0.009,
      "step": 2824220
    },
    {
      "epoch": 4.621930703115283,
      "grad_norm": 0.19805824756622314,
      "learning_rate": 7.611209583363535e-07,
      "loss": 0.0167,
      "step": 2824240
    },
    {
      "epoch": 4.621963433553937,
      "grad_norm": 0.186740443110466,
      "learning_rate": 7.610550661228364e-07,
      "loss": 0.0076,
      "step": 2824260
    },
    {
      "epoch": 4.6219961639925895,
      "grad_norm": 0.07156704366207123,
      "learning_rate": 7.609891739093192e-07,
      "loss": 0.0078,
      "step": 2824280
    },
    {
      "epoch": 4.622028894431243,
      "grad_norm": 0.2150088995695114,
      "learning_rate": 7.60923281695802e-07,
      "loss": 0.0074,
      "step": 2824300
    },
    {
      "epoch": 4.622061624869897,
      "grad_norm": 0.3602868616580963,
      "learning_rate": 7.608573894822849e-07,
      "loss": 0.0068,
      "step": 2824320
    },
    {
      "epoch": 4.622094355308549,
      "grad_norm": 0.31506162881851196,
      "learning_rate": 7.607914972687678e-07,
      "loss": 0.0102,
      "step": 2824340
    },
    {
      "epoch": 4.622127085747203,
      "grad_norm": 0.6532564163208008,
      "learning_rate": 7.607256050552507e-07,
      "loss": 0.0091,
      "step": 2824360
    },
    {
      "epoch": 4.622159816185857,
      "grad_norm": 0.22417223453521729,
      "learning_rate": 7.606597128417335e-07,
      "loss": 0.0119,
      "step": 2824380
    },
    {
      "epoch": 4.62219254662451,
      "grad_norm": 0.22443531453609467,
      "learning_rate": 7.605938206282164e-07,
      "loss": 0.0076,
      "step": 2824400
    },
    {
      "epoch": 4.622225277063163,
      "grad_norm": 0.17465056478977203,
      "learning_rate": 7.605279284146993e-07,
      "loss": 0.0059,
      "step": 2824420
    },
    {
      "epoch": 4.6222580075018165,
      "grad_norm": 0.4441659450531006,
      "learning_rate": 7.604620362011822e-07,
      "loss": 0.0079,
      "step": 2824440
    },
    {
      "epoch": 4.62229073794047,
      "grad_norm": 0.2688797116279602,
      "learning_rate": 7.60396143987665e-07,
      "loss": 0.0093,
      "step": 2824460
    },
    {
      "epoch": 4.622323468379124,
      "grad_norm": 0.1985258162021637,
      "learning_rate": 7.603302517741479e-07,
      "loss": 0.0084,
      "step": 2824480
    },
    {
      "epoch": 4.622356198817776,
      "grad_norm": 0.15270955860614777,
      "learning_rate": 7.602643595606307e-07,
      "loss": 0.0077,
      "step": 2824500
    },
    {
      "epoch": 4.62238892925643,
      "grad_norm": 0.142287477850914,
      "learning_rate": 7.601984673471137e-07,
      "loss": 0.0072,
      "step": 2824520
    },
    {
      "epoch": 4.622421659695084,
      "grad_norm": 0.11529828608036041,
      "learning_rate": 7.601325751335965e-07,
      "loss": 0.0055,
      "step": 2824540
    },
    {
      "epoch": 4.622454390133736,
      "grad_norm": 0.07911036908626556,
      "learning_rate": 7.600666829200794e-07,
      "loss": 0.0102,
      "step": 2824560
    },
    {
      "epoch": 4.62248712057239,
      "grad_norm": 0.1707744598388672,
      "learning_rate": 7.600007907065622e-07,
      "loss": 0.0064,
      "step": 2824580
    },
    {
      "epoch": 4.6225198510110435,
      "grad_norm": 0.25093087553977966,
      "learning_rate": 7.599348984930452e-07,
      "loss": 0.0065,
      "step": 2824600
    },
    {
      "epoch": 4.622552581449696,
      "grad_norm": 0.14276455342769623,
      "learning_rate": 7.59869006279528e-07,
      "loss": 0.0068,
      "step": 2824620
    },
    {
      "epoch": 4.62258531188835,
      "grad_norm": 0.20246213674545288,
      "learning_rate": 7.598031140660108e-07,
      "loss": 0.0061,
      "step": 2824640
    },
    {
      "epoch": 4.622618042327003,
      "grad_norm": 0.19255241751670837,
      "learning_rate": 7.597372218524937e-07,
      "loss": 0.0071,
      "step": 2824660
    },
    {
      "epoch": 4.622650772765657,
      "grad_norm": 0.1232636347413063,
      "learning_rate": 7.596713296389766e-07,
      "loss": 0.0083,
      "step": 2824680
    },
    {
      "epoch": 4.62268350320431,
      "grad_norm": 0.26113927364349365,
      "learning_rate": 7.596054374254595e-07,
      "loss": 0.0092,
      "step": 2824700
    },
    {
      "epoch": 4.622716233642963,
      "grad_norm": 0.21316426992416382,
      "learning_rate": 7.595395452119423e-07,
      "loss": 0.0055,
      "step": 2824720
    },
    {
      "epoch": 4.622748964081617,
      "grad_norm": 1.7392979860305786,
      "learning_rate": 7.594736529984252e-07,
      "loss": 0.0092,
      "step": 2824740
    },
    {
      "epoch": 4.62278169452027,
      "grad_norm": 0.512057363986969,
      "learning_rate": 7.59407760784908e-07,
      "loss": 0.012,
      "step": 2824760
    },
    {
      "epoch": 4.622814424958923,
      "grad_norm": 0.10722336918115616,
      "learning_rate": 7.59341868571391e-07,
      "loss": 0.0106,
      "step": 2824780
    },
    {
      "epoch": 4.622847155397577,
      "grad_norm": 0.21414659917354584,
      "learning_rate": 7.592759763578738e-07,
      "loss": 0.0106,
      "step": 2824800
    },
    {
      "epoch": 4.62287988583623,
      "grad_norm": 0.07950719445943832,
      "learning_rate": 7.592100841443567e-07,
      "loss": 0.0078,
      "step": 2824820
    },
    {
      "epoch": 4.622912616274883,
      "grad_norm": 0.27794331312179565,
      "learning_rate": 7.591441919308395e-07,
      "loss": 0.0153,
      "step": 2824840
    },
    {
      "epoch": 4.622945346713537,
      "grad_norm": 0.32437142729759216,
      "learning_rate": 7.590782997173226e-07,
      "loss": 0.0092,
      "step": 2824860
    },
    {
      "epoch": 4.62297807715219,
      "grad_norm": 0.17266863584518433,
      "learning_rate": 7.590124075038054e-07,
      "loss": 0.0079,
      "step": 2824880
    },
    {
      "epoch": 4.623010807590843,
      "grad_norm": 0.4402201771736145,
      "learning_rate": 7.589465152902882e-07,
      "loss": 0.0097,
      "step": 2824900
    },
    {
      "epoch": 4.6230435380294965,
      "grad_norm": 0.29316410422325134,
      "learning_rate": 7.58880623076771e-07,
      "loss": 0.0089,
      "step": 2824920
    },
    {
      "epoch": 4.62307626846815,
      "grad_norm": 0.21436747908592224,
      "learning_rate": 7.588147308632538e-07,
      "loss": 0.0081,
      "step": 2824940
    },
    {
      "epoch": 4.623108998906804,
      "grad_norm": 0.26950058341026306,
      "learning_rate": 7.587488386497369e-07,
      "loss": 0.0139,
      "step": 2824960
    },
    {
      "epoch": 4.623141729345456,
      "grad_norm": 0.22382910549640656,
      "learning_rate": 7.586829464362198e-07,
      "loss": 0.0095,
      "step": 2824980
    },
    {
      "epoch": 4.62317445978411,
      "grad_norm": 0.6991683840751648,
      "learning_rate": 7.586170542227025e-07,
      "loss": 0.007,
      "step": 2825000
    },
    {
      "epoch": 4.623207190222764,
      "grad_norm": 0.35027211904525757,
      "learning_rate": 7.585511620091853e-07,
      "loss": 0.0107,
      "step": 2825020
    },
    {
      "epoch": 4.623239920661417,
      "grad_norm": 0.15347498655319214,
      "learning_rate": 7.584852697956684e-07,
      "loss": 0.0091,
      "step": 2825040
    },
    {
      "epoch": 4.62327265110007,
      "grad_norm": 0.16157807409763336,
      "learning_rate": 7.584193775821512e-07,
      "loss": 0.0102,
      "step": 2825060
    },
    {
      "epoch": 4.6233053815387235,
      "grad_norm": 0.15914195775985718,
      "learning_rate": 7.583534853686341e-07,
      "loss": 0.009,
      "step": 2825080
    },
    {
      "epoch": 4.623338111977377,
      "grad_norm": 0.09504330903291702,
      "learning_rate": 7.582875931551169e-07,
      "loss": 0.0075,
      "step": 2825100
    },
    {
      "epoch": 4.62337084241603,
      "grad_norm": 0.42486897110939026,
      "learning_rate": 7.582217009415999e-07,
      "loss": 0.0051,
      "step": 2825120
    },
    {
      "epoch": 4.623403572854683,
      "grad_norm": 0.26713499426841736,
      "learning_rate": 7.581558087280827e-07,
      "loss": 0.0119,
      "step": 2825140
    },
    {
      "epoch": 4.623436303293337,
      "grad_norm": 0.1849142462015152,
      "learning_rate": 7.580899165145656e-07,
      "loss": 0.0098,
      "step": 2825160
    },
    {
      "epoch": 4.62346903373199,
      "grad_norm": 0.12738725543022156,
      "learning_rate": 7.580240243010484e-07,
      "loss": 0.0093,
      "step": 2825180
    },
    {
      "epoch": 4.623501764170643,
      "grad_norm": 0.14484082162380219,
      "learning_rate": 7.579581320875313e-07,
      "loss": 0.0115,
      "step": 2825200
    },
    {
      "epoch": 4.623534494609297,
      "grad_norm": 0.2369282990694046,
      "learning_rate": 7.578922398740142e-07,
      "loss": 0.0095,
      "step": 2825220
    },
    {
      "epoch": 4.6235672250479505,
      "grad_norm": 0.14402516186237335,
      "learning_rate": 7.578263476604971e-07,
      "loss": 0.0105,
      "step": 2825240
    },
    {
      "epoch": 4.623599955486603,
      "grad_norm": 0.14082178473472595,
      "learning_rate": 7.577604554469799e-07,
      "loss": 0.0076,
      "step": 2825260
    },
    {
      "epoch": 4.623632685925257,
      "grad_norm": 0.4270813763141632,
      "learning_rate": 7.576945632334628e-07,
      "loss": 0.0081,
      "step": 2825280
    },
    {
      "epoch": 4.62366541636391,
      "grad_norm": 0.3495822548866272,
      "learning_rate": 7.576286710199457e-07,
      "loss": 0.0109,
      "step": 2825300
    },
    {
      "epoch": 4.623698146802563,
      "grad_norm": 0.20349204540252686,
      "learning_rate": 7.575627788064286e-07,
      "loss": 0.0069,
      "step": 2825320
    },
    {
      "epoch": 4.623730877241217,
      "grad_norm": 0.07448584586381912,
      "learning_rate": 7.574968865929114e-07,
      "loss": 0.0078,
      "step": 2825340
    },
    {
      "epoch": 4.62376360767987,
      "grad_norm": 0.3317684531211853,
      "learning_rate": 7.574309943793942e-07,
      "loss": 0.0125,
      "step": 2825360
    },
    {
      "epoch": 4.623796338118524,
      "grad_norm": 0.18198926746845245,
      "learning_rate": 7.573651021658771e-07,
      "loss": 0.0111,
      "step": 2825380
    },
    {
      "epoch": 4.623829068557177,
      "grad_norm": 0.1239921972155571,
      "learning_rate": 7.5729920995236e-07,
      "loss": 0.0112,
      "step": 2825400
    },
    {
      "epoch": 4.62386179899583,
      "grad_norm": 0.17258599400520325,
      "learning_rate": 7.572333177388429e-07,
      "loss": 0.0121,
      "step": 2825420
    },
    {
      "epoch": 4.623894529434484,
      "grad_norm": 0.22226844727993011,
      "learning_rate": 7.571674255253257e-07,
      "loss": 0.0079,
      "step": 2825440
    },
    {
      "epoch": 4.6239272598731365,
      "grad_norm": 0.13867899775505066,
      "learning_rate": 7.571015333118086e-07,
      "loss": 0.0097,
      "step": 2825460
    },
    {
      "epoch": 4.62395999031179,
      "grad_norm": 0.07526446133852005,
      "learning_rate": 7.570356410982915e-07,
      "loss": 0.0075,
      "step": 2825480
    },
    {
      "epoch": 4.623992720750444,
      "grad_norm": 0.35258135199546814,
      "learning_rate": 7.569697488847744e-07,
      "loss": 0.0081,
      "step": 2825500
    },
    {
      "epoch": 4.624025451189096,
      "grad_norm": 0.43620410561561584,
      "learning_rate": 7.569038566712572e-07,
      "loss": 0.0098,
      "step": 2825520
    },
    {
      "epoch": 4.62405818162775,
      "grad_norm": 0.3753573000431061,
      "learning_rate": 7.568379644577401e-07,
      "loss": 0.0086,
      "step": 2825540
    },
    {
      "epoch": 4.6240909120664035,
      "grad_norm": 0.10692904144525528,
      "learning_rate": 7.56772072244223e-07,
      "loss": 0.0086,
      "step": 2825560
    },
    {
      "epoch": 4.624123642505057,
      "grad_norm": 0.22113075852394104,
      "learning_rate": 7.567061800307059e-07,
      "loss": 0.0053,
      "step": 2825580
    },
    {
      "epoch": 4.62415637294371,
      "grad_norm": 0.14071805775165558,
      "learning_rate": 7.566402878171887e-07,
      "loss": 0.0094,
      "step": 2825600
    },
    {
      "epoch": 4.624189103382363,
      "grad_norm": 0.27912256121635437,
      "learning_rate": 7.565743956036716e-07,
      "loss": 0.0081,
      "step": 2825620
    },
    {
      "epoch": 4.624221833821017,
      "grad_norm": 0.7348927855491638,
      "learning_rate": 7.565085033901544e-07,
      "loss": 0.0125,
      "step": 2825640
    },
    {
      "epoch": 4.624254564259671,
      "grad_norm": 0.15178029239177704,
      "learning_rate": 7.564426111766374e-07,
      "loss": 0.0055,
      "step": 2825660
    },
    {
      "epoch": 4.624287294698323,
      "grad_norm": 0.1040153056383133,
      "learning_rate": 7.563767189631202e-07,
      "loss": 0.0094,
      "step": 2825680
    },
    {
      "epoch": 4.624320025136977,
      "grad_norm": 0.2221168875694275,
      "learning_rate": 7.56310826749603e-07,
      "loss": 0.0076,
      "step": 2825700
    },
    {
      "epoch": 4.6243527555756305,
      "grad_norm": 0.0968879982829094,
      "learning_rate": 7.562449345360859e-07,
      "loss": 0.0086,
      "step": 2825720
    },
    {
      "epoch": 4.624385486014283,
      "grad_norm": 0.3948308825492859,
      "learning_rate": 7.561790423225688e-07,
      "loss": 0.0107,
      "step": 2825740
    },
    {
      "epoch": 4.624418216452937,
      "grad_norm": 0.33827903866767883,
      "learning_rate": 7.561131501090517e-07,
      "loss": 0.0095,
      "step": 2825760
    },
    {
      "epoch": 4.62445094689159,
      "grad_norm": 0.2241736203432083,
      "learning_rate": 7.560472578955345e-07,
      "loss": 0.0065,
      "step": 2825780
    },
    {
      "epoch": 4.624483677330243,
      "grad_norm": 0.3285319209098816,
      "learning_rate": 7.559813656820174e-07,
      "loss": 0.0055,
      "step": 2825800
    },
    {
      "epoch": 4.624516407768897,
      "grad_norm": 0.25361374020576477,
      "learning_rate": 7.559154734685002e-07,
      "loss": 0.0075,
      "step": 2825820
    },
    {
      "epoch": 4.62454913820755,
      "grad_norm": 0.30504634976387024,
      "learning_rate": 7.558495812549832e-07,
      "loss": 0.0154,
      "step": 2825840
    },
    {
      "epoch": 4.624581868646204,
      "grad_norm": 0.07629939168691635,
      "learning_rate": 7.55783689041466e-07,
      "loss": 0.0074,
      "step": 2825860
    },
    {
      "epoch": 4.624614599084857,
      "grad_norm": 0.15313754975795746,
      "learning_rate": 7.557177968279489e-07,
      "loss": 0.0108,
      "step": 2825880
    },
    {
      "epoch": 4.62464732952351,
      "grad_norm": 0.07022973895072937,
      "learning_rate": 7.556519046144317e-07,
      "loss": 0.0099,
      "step": 2825900
    },
    {
      "epoch": 4.624680059962164,
      "grad_norm": 0.10829442739486694,
      "learning_rate": 7.555860124009147e-07,
      "loss": 0.0083,
      "step": 2825920
    },
    {
      "epoch": 4.624712790400817,
      "grad_norm": 0.22186735272407532,
      "learning_rate": 7.555201201873975e-07,
      "loss": 0.0059,
      "step": 2825940
    },
    {
      "epoch": 4.62474552083947,
      "grad_norm": 0.29876652359962463,
      "learning_rate": 7.554542279738804e-07,
      "loss": 0.0066,
      "step": 2825960
    },
    {
      "epoch": 4.624778251278124,
      "grad_norm": 0.18093816936016083,
      "learning_rate": 7.553883357603632e-07,
      "loss": 0.0105,
      "step": 2825980
    },
    {
      "epoch": 4.624810981716777,
      "grad_norm": 0.27240127325057983,
      "learning_rate": 7.553224435468461e-07,
      "loss": 0.0063,
      "step": 2826000
    },
    {
      "epoch": 4.62484371215543,
      "grad_norm": 0.4719482958316803,
      "learning_rate": 7.55256551333329e-07,
      "loss": 0.0098,
      "step": 2826020
    },
    {
      "epoch": 4.624876442594084,
      "grad_norm": 0.18363040685653687,
      "learning_rate": 7.551906591198118e-07,
      "loss": 0.011,
      "step": 2826040
    },
    {
      "epoch": 4.624909173032737,
      "grad_norm": 0.3440559506416321,
      "learning_rate": 7.551247669062947e-07,
      "loss": 0.0078,
      "step": 2826060
    },
    {
      "epoch": 4.62494190347139,
      "grad_norm": 0.6858449578285217,
      "learning_rate": 7.550588746927775e-07,
      "loss": 0.0118,
      "step": 2826080
    },
    {
      "epoch": 4.6249746339100435,
      "grad_norm": 0.07468916475772858,
      "learning_rate": 7.549929824792605e-07,
      "loss": 0.0089,
      "step": 2826100
    },
    {
      "epoch": 4.625007364348697,
      "grad_norm": 0.48429417610168457,
      "learning_rate": 7.549270902657433e-07,
      "loss": 0.0076,
      "step": 2826120
    },
    {
      "epoch": 4.625040094787351,
      "grad_norm": 0.20934928953647614,
      "learning_rate": 7.548611980522262e-07,
      "loss": 0.0104,
      "step": 2826140
    },
    {
      "epoch": 4.625072825226003,
      "grad_norm": 0.4298526644706726,
      "learning_rate": 7.54795305838709e-07,
      "loss": 0.0126,
      "step": 2826160
    },
    {
      "epoch": 4.625105555664657,
      "grad_norm": 1.3163785934448242,
      "learning_rate": 7.547294136251921e-07,
      "loss": 0.0114,
      "step": 2826180
    },
    {
      "epoch": 4.625138286103311,
      "grad_norm": 0.6774647831916809,
      "learning_rate": 7.546635214116749e-07,
      "loss": 0.0101,
      "step": 2826200
    },
    {
      "epoch": 4.625171016541964,
      "grad_norm": 0.1751115769147873,
      "learning_rate": 7.545976291981577e-07,
      "loss": 0.0074,
      "step": 2826220
    },
    {
      "epoch": 4.625203746980617,
      "grad_norm": 0.0950465202331543,
      "learning_rate": 7.545317369846405e-07,
      "loss": 0.0086,
      "step": 2826240
    },
    {
      "epoch": 4.6252364774192705,
      "grad_norm": 0.09747365117073059,
      "learning_rate": 7.544658447711234e-07,
      "loss": 0.0066,
      "step": 2826260
    },
    {
      "epoch": 4.625269207857924,
      "grad_norm": 0.05953250080347061,
      "learning_rate": 7.543999525576064e-07,
      "loss": 0.0074,
      "step": 2826280
    },
    {
      "epoch": 4.625301938296577,
      "grad_norm": 0.4746270775794983,
      "learning_rate": 7.543340603440893e-07,
      "loss": 0.0065,
      "step": 2826300
    },
    {
      "epoch": 4.62533466873523,
      "grad_norm": 0.23553042113780975,
      "learning_rate": 7.54268168130572e-07,
      "loss": 0.0109,
      "step": 2826320
    },
    {
      "epoch": 4.625367399173884,
      "grad_norm": 0.28865280747413635,
      "learning_rate": 7.542022759170548e-07,
      "loss": 0.011,
      "step": 2826340
    },
    {
      "epoch": 4.625400129612537,
      "grad_norm": 0.37788137793540955,
      "learning_rate": 7.541363837035379e-07,
      "loss": 0.007,
      "step": 2826360
    },
    {
      "epoch": 4.62543286005119,
      "grad_norm": 0.19495292007923126,
      "learning_rate": 7.540704914900207e-07,
      "loss": 0.0076,
      "step": 2826380
    },
    {
      "epoch": 4.625465590489844,
      "grad_norm": 0.27217715978622437,
      "learning_rate": 7.540045992765036e-07,
      "loss": 0.0082,
      "step": 2826400
    },
    {
      "epoch": 4.625498320928497,
      "grad_norm": 0.1963852047920227,
      "learning_rate": 7.539387070629864e-07,
      "loss": 0.009,
      "step": 2826420
    },
    {
      "epoch": 4.62553105136715,
      "grad_norm": 0.2154247760772705,
      "learning_rate": 7.538728148494694e-07,
      "loss": 0.0069,
      "step": 2826440
    },
    {
      "epoch": 4.625563781805804,
      "grad_norm": 0.21094176173210144,
      "learning_rate": 7.538069226359522e-07,
      "loss": 0.0152,
      "step": 2826460
    },
    {
      "epoch": 4.625596512244457,
      "grad_norm": 0.16630786657333374,
      "learning_rate": 7.537410304224351e-07,
      "loss": 0.0122,
      "step": 2826480
    },
    {
      "epoch": 4.625629242683111,
      "grad_norm": 0.22108040750026703,
      "learning_rate": 7.536751382089179e-07,
      "loss": 0.0089,
      "step": 2826500
    },
    {
      "epoch": 4.625661973121764,
      "grad_norm": 0.17706948518753052,
      "learning_rate": 7.536092459954008e-07,
      "loss": 0.0073,
      "step": 2826520
    },
    {
      "epoch": 4.625694703560417,
      "grad_norm": 0.2258898913860321,
      "learning_rate": 7.535433537818837e-07,
      "loss": 0.006,
      "step": 2826540
    },
    {
      "epoch": 4.625727433999071,
      "grad_norm": 0.21454091370105743,
      "learning_rate": 7.534774615683666e-07,
      "loss": 0.0085,
      "step": 2826560
    },
    {
      "epoch": 4.6257601644377235,
      "grad_norm": 0.4118954539299011,
      "learning_rate": 7.534115693548494e-07,
      "loss": 0.0103,
      "step": 2826580
    },
    {
      "epoch": 4.625792894876377,
      "grad_norm": 0.4604753255844116,
      "learning_rate": 7.533456771413323e-07,
      "loss": 0.0108,
      "step": 2826600
    },
    {
      "epoch": 4.625825625315031,
      "grad_norm": 0.10502917319536209,
      "learning_rate": 7.532797849278152e-07,
      "loss": 0.0057,
      "step": 2826620
    },
    {
      "epoch": 4.625858355753683,
      "grad_norm": 0.12845809757709503,
      "learning_rate": 7.532138927142981e-07,
      "loss": 0.0095,
      "step": 2826640
    },
    {
      "epoch": 4.625891086192337,
      "grad_norm": 0.31266865134239197,
      "learning_rate": 7.531480005007809e-07,
      "loss": 0.0093,
      "step": 2826660
    },
    {
      "epoch": 4.625923816630991,
      "grad_norm": 0.4678056240081787,
      "learning_rate": 7.530821082872637e-07,
      "loss": 0.0086,
      "step": 2826680
    },
    {
      "epoch": 4.625956547069644,
      "grad_norm": 0.11033051460981369,
      "learning_rate": 7.530162160737466e-07,
      "loss": 0.0067,
      "step": 2826700
    },
    {
      "epoch": 4.625989277508297,
      "grad_norm": 0.13947314023971558,
      "learning_rate": 7.529503238602295e-07,
      "loss": 0.0146,
      "step": 2826720
    },
    {
      "epoch": 4.6260220079469505,
      "grad_norm": 0.6120438575744629,
      "learning_rate": 7.528844316467124e-07,
      "loss": 0.0127,
      "step": 2826740
    },
    {
      "epoch": 4.626054738385604,
      "grad_norm": 0.08574601262807846,
      "learning_rate": 7.528185394331952e-07,
      "loss": 0.0094,
      "step": 2826760
    },
    {
      "epoch": 4.626087468824257,
      "grad_norm": 0.044134754687547684,
      "learning_rate": 7.527526472196781e-07,
      "loss": 0.0085,
      "step": 2826780
    },
    {
      "epoch": 4.62612019926291,
      "grad_norm": 0.3916335105895996,
      "learning_rate": 7.52686755006161e-07,
      "loss": 0.0099,
      "step": 2826800
    },
    {
      "epoch": 4.626152929701564,
      "grad_norm": 0.28872936964035034,
      "learning_rate": 7.526208627926439e-07,
      "loss": 0.0085,
      "step": 2826820
    },
    {
      "epoch": 4.626185660140218,
      "grad_norm": 0.23287561535835266,
      "learning_rate": 7.525549705791267e-07,
      "loss": 0.0104,
      "step": 2826840
    },
    {
      "epoch": 4.62621839057887,
      "grad_norm": 0.22931063175201416,
      "learning_rate": 7.524890783656096e-07,
      "loss": 0.0084,
      "step": 2826860
    },
    {
      "epoch": 4.626251121017524,
      "grad_norm": 0.4709726870059967,
      "learning_rate": 7.524231861520925e-07,
      "loss": 0.0084,
      "step": 2826880
    },
    {
      "epoch": 4.6262838514561775,
      "grad_norm": 0.2328256219625473,
      "learning_rate": 7.523572939385754e-07,
      "loss": 0.0065,
      "step": 2826900
    },
    {
      "epoch": 4.62631658189483,
      "grad_norm": 0.20207451283931732,
      "learning_rate": 7.522914017250582e-07,
      "loss": 0.0061,
      "step": 2826920
    },
    {
      "epoch": 4.626349312333484,
      "grad_norm": 0.16479118168354034,
      "learning_rate": 7.522255095115411e-07,
      "loss": 0.0109,
      "step": 2826940
    },
    {
      "epoch": 4.626382042772137,
      "grad_norm": 0.2555529475212097,
      "learning_rate": 7.521596172980239e-07,
      "loss": 0.0083,
      "step": 2826960
    },
    {
      "epoch": 4.62641477321079,
      "grad_norm": 0.07524871081113815,
      "learning_rate": 7.520937250845069e-07,
      "loss": 0.0079,
      "step": 2826980
    },
    {
      "epoch": 4.626447503649444,
      "grad_norm": 0.13871629536151886,
      "learning_rate": 7.520278328709897e-07,
      "loss": 0.0079,
      "step": 2827000
    },
    {
      "epoch": 4.626480234088097,
      "grad_norm": 0.12295980751514435,
      "learning_rate": 7.519619406574725e-07,
      "loss": 0.0094,
      "step": 2827020
    },
    {
      "epoch": 4.626512964526751,
      "grad_norm": 0.11292941123247147,
      "learning_rate": 7.518960484439554e-07,
      "loss": 0.0112,
      "step": 2827040
    },
    {
      "epoch": 4.626545694965404,
      "grad_norm": 0.37802785634994507,
      "learning_rate": 7.518301562304383e-07,
      "loss": 0.0079,
      "step": 2827060
    },
    {
      "epoch": 4.626578425404057,
      "grad_norm": 0.2807386517524719,
      "learning_rate": 7.517642640169212e-07,
      "loss": 0.0156,
      "step": 2827080
    },
    {
      "epoch": 4.626611155842711,
      "grad_norm": 0.20647291839122772,
      "learning_rate": 7.51698371803404e-07,
      "loss": 0.0101,
      "step": 2827100
    },
    {
      "epoch": 4.626643886281364,
      "grad_norm": 0.16588987410068512,
      "learning_rate": 7.516324795898869e-07,
      "loss": 0.009,
      "step": 2827120
    },
    {
      "epoch": 4.626676616720017,
      "grad_norm": 0.4148317873477936,
      "learning_rate": 7.515665873763697e-07,
      "loss": 0.0091,
      "step": 2827140
    },
    {
      "epoch": 4.626709347158671,
      "grad_norm": 0.1229967474937439,
      "learning_rate": 7.515006951628527e-07,
      "loss": 0.012,
      "step": 2827160
    },
    {
      "epoch": 4.626742077597324,
      "grad_norm": 0.3309846520423889,
      "learning_rate": 7.514348029493355e-07,
      "loss": 0.0062,
      "step": 2827180
    },
    {
      "epoch": 4.626774808035977,
      "grad_norm": 0.12247059494256973,
      "learning_rate": 7.513689107358184e-07,
      "loss": 0.0102,
      "step": 2827200
    },
    {
      "epoch": 4.6268075384746306,
      "grad_norm": 0.27694711089134216,
      "learning_rate": 7.513030185223012e-07,
      "loss": 0.0132,
      "step": 2827220
    },
    {
      "epoch": 4.626840268913284,
      "grad_norm": 0.2848144769668579,
      "learning_rate": 7.512371263087842e-07,
      "loss": 0.006,
      "step": 2827240
    },
    {
      "epoch": 4.626872999351937,
      "grad_norm": 0.2859288454055786,
      "learning_rate": 7.51171234095267e-07,
      "loss": 0.0107,
      "step": 2827260
    },
    {
      "epoch": 4.6269057297905904,
      "grad_norm": 0.6349548101425171,
      "learning_rate": 7.511053418817499e-07,
      "loss": 0.0073,
      "step": 2827280
    },
    {
      "epoch": 4.626938460229244,
      "grad_norm": 0.10772749781608582,
      "learning_rate": 7.510394496682327e-07,
      "loss": 0.008,
      "step": 2827300
    },
    {
      "epoch": 4.626971190667898,
      "grad_norm": 0.14133590459823608,
      "learning_rate": 7.509735574547157e-07,
      "loss": 0.0119,
      "step": 2827320
    },
    {
      "epoch": 4.62700392110655,
      "grad_norm": 0.16262094676494598,
      "learning_rate": 7.509076652411985e-07,
      "loss": 0.0157,
      "step": 2827340
    },
    {
      "epoch": 4.627036651545204,
      "grad_norm": 0.07841748744249344,
      "learning_rate": 7.508417730276813e-07,
      "loss": 0.0103,
      "step": 2827360
    },
    {
      "epoch": 4.6270693819838575,
      "grad_norm": 0.24360881745815277,
      "learning_rate": 7.507758808141642e-07,
      "loss": 0.0092,
      "step": 2827380
    },
    {
      "epoch": 4.627102112422511,
      "grad_norm": 0.40551120042800903,
      "learning_rate": 7.50709988600647e-07,
      "loss": 0.0093,
      "step": 2827400
    },
    {
      "epoch": 4.627134842861164,
      "grad_norm": 0.35985660552978516,
      "learning_rate": 7.5064409638713e-07,
      "loss": 0.0094,
      "step": 2827420
    },
    {
      "epoch": 4.627167573299817,
      "grad_norm": 0.19178687036037445,
      "learning_rate": 7.505782041736128e-07,
      "loss": 0.0073,
      "step": 2827440
    },
    {
      "epoch": 4.627200303738471,
      "grad_norm": 0.07868408411741257,
      "learning_rate": 7.505123119600957e-07,
      "loss": 0.0081,
      "step": 2827460
    },
    {
      "epoch": 4.627233034177124,
      "grad_norm": 0.2914423644542694,
      "learning_rate": 7.504464197465785e-07,
      "loss": 0.0078,
      "step": 2827480
    },
    {
      "epoch": 4.627265764615777,
      "grad_norm": 0.2252749651670456,
      "learning_rate": 7.503805275330616e-07,
      "loss": 0.0073,
      "step": 2827500
    },
    {
      "epoch": 4.627298495054431,
      "grad_norm": 0.2308351993560791,
      "learning_rate": 7.503146353195444e-07,
      "loss": 0.0047,
      "step": 2827520
    },
    {
      "epoch": 4.627331225493084,
      "grad_norm": 0.43189698457717896,
      "learning_rate": 7.502487431060272e-07,
      "loss": 0.0086,
      "step": 2827540
    },
    {
      "epoch": 4.627363955931737,
      "grad_norm": 0.05200217291712761,
      "learning_rate": 7.5018285089251e-07,
      "loss": 0.0083,
      "step": 2827560
    },
    {
      "epoch": 4.627396686370391,
      "grad_norm": 0.20523403584957123,
      "learning_rate": 7.501169586789929e-07,
      "loss": 0.0071,
      "step": 2827580
    },
    {
      "epoch": 4.627429416809044,
      "grad_norm": 0.20563560724258423,
      "learning_rate": 7.500510664654759e-07,
      "loss": 0.009,
      "step": 2827600
    },
    {
      "epoch": 4.627462147247697,
      "grad_norm": 0.21658222377300262,
      "learning_rate": 7.499851742519588e-07,
      "loss": 0.0095,
      "step": 2827620
    },
    {
      "epoch": 4.627494877686351,
      "grad_norm": 0.47837790846824646,
      "learning_rate": 7.499192820384415e-07,
      "loss": 0.011,
      "step": 2827640
    },
    {
      "epoch": 4.627527608125004,
      "grad_norm": 0.3395273685455322,
      "learning_rate": 7.498533898249243e-07,
      "loss": 0.0157,
      "step": 2827660
    },
    {
      "epoch": 4.627560338563658,
      "grad_norm": 0.4259309470653534,
      "learning_rate": 7.497874976114074e-07,
      "loss": 0.0107,
      "step": 2827680
    },
    {
      "epoch": 4.627593069002311,
      "grad_norm": 0.3606751263141632,
      "learning_rate": 7.497216053978903e-07,
      "loss": 0.0085,
      "step": 2827700
    },
    {
      "epoch": 4.627625799440964,
      "grad_norm": 0.15096917748451233,
      "learning_rate": 7.496557131843731e-07,
      "loss": 0.0077,
      "step": 2827720
    },
    {
      "epoch": 4.627658529879618,
      "grad_norm": 0.14617815613746643,
      "learning_rate": 7.495898209708559e-07,
      "loss": 0.0133,
      "step": 2827740
    },
    {
      "epoch": 4.6276912603182705,
      "grad_norm": 0.09984510391950607,
      "learning_rate": 7.495239287573389e-07,
      "loss": 0.0077,
      "step": 2827760
    },
    {
      "epoch": 4.627723990756924,
      "grad_norm": 0.22673963010311127,
      "learning_rate": 7.494580365438217e-07,
      "loss": 0.014,
      "step": 2827780
    },
    {
      "epoch": 4.627756721195578,
      "grad_norm": 0.7615424394607544,
      "learning_rate": 7.493921443303046e-07,
      "loss": 0.0073,
      "step": 2827800
    },
    {
      "epoch": 4.62778945163423,
      "grad_norm": 0.15619748830795288,
      "learning_rate": 7.493262521167874e-07,
      "loss": 0.007,
      "step": 2827820
    },
    {
      "epoch": 4.627822182072884,
      "grad_norm": 0.19280710816383362,
      "learning_rate": 7.492603599032703e-07,
      "loss": 0.0111,
      "step": 2827840
    },
    {
      "epoch": 4.627854912511538,
      "grad_norm": 0.14994333684444427,
      "learning_rate": 7.491944676897532e-07,
      "loss": 0.006,
      "step": 2827860
    },
    {
      "epoch": 4.627887642950191,
      "grad_norm": 0.16602303087711334,
      "learning_rate": 7.491285754762361e-07,
      "loss": 0.0103,
      "step": 2827880
    },
    {
      "epoch": 4.627920373388844,
      "grad_norm": 0.13423845171928406,
      "learning_rate": 7.490626832627189e-07,
      "loss": 0.0078,
      "step": 2827900
    },
    {
      "epoch": 4.6279531038274975,
      "grad_norm": 0.24610209465026855,
      "learning_rate": 7.489967910492018e-07,
      "loss": 0.0096,
      "step": 2827920
    },
    {
      "epoch": 4.627985834266151,
      "grad_norm": 0.08704624325037003,
      "learning_rate": 7.489308988356847e-07,
      "loss": 0.0073,
      "step": 2827940
    },
    {
      "epoch": 4.628018564704805,
      "grad_norm": 0.07378626614809036,
      "learning_rate": 7.488650066221676e-07,
      "loss": 0.0091,
      "step": 2827960
    },
    {
      "epoch": 4.628051295143457,
      "grad_norm": 0.0993487611413002,
      "learning_rate": 7.487991144086504e-07,
      "loss": 0.0089,
      "step": 2827980
    },
    {
      "epoch": 4.628084025582111,
      "grad_norm": 0.41031375527381897,
      "learning_rate": 7.487332221951333e-07,
      "loss": 0.0128,
      "step": 2828000
    },
    {
      "epoch": 4.6281167560207646,
      "grad_norm": 0.304733544588089,
      "learning_rate": 7.486673299816161e-07,
      "loss": 0.0119,
      "step": 2828020
    },
    {
      "epoch": 4.628149486459417,
      "grad_norm": 0.11018162965774536,
      "learning_rate": 7.48601437768099e-07,
      "loss": 0.007,
      "step": 2828040
    },
    {
      "epoch": 4.628182216898071,
      "grad_norm": 0.18189743161201477,
      "learning_rate": 7.485355455545819e-07,
      "loss": 0.0071,
      "step": 2828060
    },
    {
      "epoch": 4.6282149473367244,
      "grad_norm": 0.05220654606819153,
      "learning_rate": 7.484696533410647e-07,
      "loss": 0.008,
      "step": 2828080
    },
    {
      "epoch": 4.628247677775377,
      "grad_norm": 0.2799186110496521,
      "learning_rate": 7.484037611275476e-07,
      "loss": 0.0082,
      "step": 2828100
    },
    {
      "epoch": 4.628280408214031,
      "grad_norm": 0.41397738456726074,
      "learning_rate": 7.483378689140305e-07,
      "loss": 0.0086,
      "step": 2828120
    },
    {
      "epoch": 4.628313138652684,
      "grad_norm": 0.1564929038286209,
      "learning_rate": 7.482719767005134e-07,
      "loss": 0.0061,
      "step": 2828140
    },
    {
      "epoch": 4.628345869091338,
      "grad_norm": 0.16813154518604279,
      "learning_rate": 7.482060844869962e-07,
      "loss": 0.0115,
      "step": 2828160
    },
    {
      "epoch": 4.628378599529991,
      "grad_norm": 0.12264315038919449,
      "learning_rate": 7.481401922734791e-07,
      "loss": 0.0093,
      "step": 2828180
    },
    {
      "epoch": 4.628411329968644,
      "grad_norm": 0.07753323763608932,
      "learning_rate": 7.48074300059962e-07,
      "loss": 0.0155,
      "step": 2828200
    },
    {
      "epoch": 4.628444060407298,
      "grad_norm": 0.174595907330513,
      "learning_rate": 7.480084078464449e-07,
      "loss": 0.0102,
      "step": 2828220
    },
    {
      "epoch": 4.6284767908459505,
      "grad_norm": 0.25425034761428833,
      "learning_rate": 7.479425156329277e-07,
      "loss": 0.0104,
      "step": 2828240
    },
    {
      "epoch": 4.628509521284604,
      "grad_norm": 0.1569627970457077,
      "learning_rate": 7.478766234194106e-07,
      "loss": 0.0086,
      "step": 2828260
    },
    {
      "epoch": 4.628542251723258,
      "grad_norm": 0.409576416015625,
      "learning_rate": 7.478107312058934e-07,
      "loss": 0.0093,
      "step": 2828280
    },
    {
      "epoch": 4.628574982161911,
      "grad_norm": 0.23053035140037537,
      "learning_rate": 7.477448389923764e-07,
      "loss": 0.01,
      "step": 2828300
    },
    {
      "epoch": 4.628607712600564,
      "grad_norm": 0.34226781129837036,
      "learning_rate": 7.476789467788592e-07,
      "loss": 0.0062,
      "step": 2828320
    },
    {
      "epoch": 4.628640443039218,
      "grad_norm": 0.38645264506340027,
      "learning_rate": 7.47613054565342e-07,
      "loss": 0.0092,
      "step": 2828340
    },
    {
      "epoch": 4.628673173477871,
      "grad_norm": 0.2370053380727768,
      "learning_rate": 7.475471623518249e-07,
      "loss": 0.0085,
      "step": 2828360
    },
    {
      "epoch": 4.628705903916524,
      "grad_norm": 0.26977306604385376,
      "learning_rate": 7.474812701383078e-07,
      "loss": 0.0082,
      "step": 2828380
    },
    {
      "epoch": 4.6287386343551775,
      "grad_norm": 0.13719891011714935,
      "learning_rate": 7.474153779247907e-07,
      "loss": 0.0081,
      "step": 2828400
    },
    {
      "epoch": 4.628771364793831,
      "grad_norm": 0.1334693729877472,
      "learning_rate": 7.473494857112735e-07,
      "loss": 0.0099,
      "step": 2828420
    },
    {
      "epoch": 4.628804095232484,
      "grad_norm": 0.14199940860271454,
      "learning_rate": 7.472835934977564e-07,
      "loss": 0.0085,
      "step": 2828440
    },
    {
      "epoch": 4.628836825671137,
      "grad_norm": 0.3026154339313507,
      "learning_rate": 7.472177012842393e-07,
      "loss": 0.0112,
      "step": 2828460
    },
    {
      "epoch": 4.628869556109791,
      "grad_norm": 0.19336508214473724,
      "learning_rate": 7.471518090707222e-07,
      "loss": 0.0083,
      "step": 2828480
    },
    {
      "epoch": 4.628902286548445,
      "grad_norm": 0.09527548402547836,
      "learning_rate": 7.47085916857205e-07,
      "loss": 0.0104,
      "step": 2828500
    },
    {
      "epoch": 4.628935016987097,
      "grad_norm": 0.2517533302307129,
      "learning_rate": 7.470200246436879e-07,
      "loss": 0.0096,
      "step": 2828520
    },
    {
      "epoch": 4.628967747425751,
      "grad_norm": 0.17344629764556885,
      "learning_rate": 7.469541324301707e-07,
      "loss": 0.0064,
      "step": 2828540
    },
    {
      "epoch": 4.6290004778644045,
      "grad_norm": 0.23553891479969025,
      "learning_rate": 7.468882402166537e-07,
      "loss": 0.0055,
      "step": 2828560
    },
    {
      "epoch": 4.629033208303058,
      "grad_norm": 0.46836981177330017,
      "learning_rate": 7.468223480031365e-07,
      "loss": 0.0088,
      "step": 2828580
    },
    {
      "epoch": 4.629065938741711,
      "grad_norm": 0.05783849209547043,
      "learning_rate": 7.467564557896194e-07,
      "loss": 0.0063,
      "step": 2828600
    },
    {
      "epoch": 4.629098669180364,
      "grad_norm": 0.16863258183002472,
      "learning_rate": 7.466905635761022e-07,
      "loss": 0.0069,
      "step": 2828620
    },
    {
      "epoch": 4.629131399619018,
      "grad_norm": 0.09248092770576477,
      "learning_rate": 7.466246713625852e-07,
      "loss": 0.0089,
      "step": 2828640
    },
    {
      "epoch": 4.629164130057671,
      "grad_norm": 0.15528017282485962,
      "learning_rate": 7.46558779149068e-07,
      "loss": 0.0081,
      "step": 2828660
    },
    {
      "epoch": 4.629196860496324,
      "grad_norm": 0.18999142944812775,
      "learning_rate": 7.464928869355509e-07,
      "loss": 0.0093,
      "step": 2828680
    },
    {
      "epoch": 4.629229590934978,
      "grad_norm": 0.38010329008102417,
      "learning_rate": 7.464269947220337e-07,
      "loss": 0.0078,
      "step": 2828700
    },
    {
      "epoch": 4.629262321373631,
      "grad_norm": 0.08477628976106644,
      "learning_rate": 7.463611025085165e-07,
      "loss": 0.0096,
      "step": 2828720
    },
    {
      "epoch": 4.629295051812284,
      "grad_norm": 0.2092835009098053,
      "learning_rate": 7.462952102949995e-07,
      "loss": 0.0095,
      "step": 2828740
    },
    {
      "epoch": 4.629327782250938,
      "grad_norm": 0.24427379667758942,
      "learning_rate": 7.462293180814823e-07,
      "loss": 0.0117,
      "step": 2828760
    },
    {
      "epoch": 4.629360512689591,
      "grad_norm": 0.20803500711917877,
      "learning_rate": 7.461634258679652e-07,
      "loss": 0.0084,
      "step": 2828780
    },
    {
      "epoch": 4.629393243128244,
      "grad_norm": 0.3510304093360901,
      "learning_rate": 7.46097533654448e-07,
      "loss": 0.0089,
      "step": 2828800
    },
    {
      "epoch": 4.629425973566898,
      "grad_norm": 0.4522736668586731,
      "learning_rate": 7.460316414409311e-07,
      "loss": 0.0079,
      "step": 2828820
    },
    {
      "epoch": 4.629458704005551,
      "grad_norm": 0.05212769657373428,
      "learning_rate": 7.459657492274139e-07,
      "loss": 0.0079,
      "step": 2828840
    },
    {
      "epoch": 4.629491434444205,
      "grad_norm": 0.1437489539384842,
      "learning_rate": 7.458998570138967e-07,
      "loss": 0.0105,
      "step": 2828860
    },
    {
      "epoch": 4.629524164882858,
      "grad_norm": 0.38983583450317383,
      "learning_rate": 7.458339648003795e-07,
      "loss": 0.006,
      "step": 2828880
    },
    {
      "epoch": 4.629556895321511,
      "grad_norm": 0.1095639169216156,
      "learning_rate": 7.457680725868626e-07,
      "loss": 0.0085,
      "step": 2828900
    },
    {
      "epoch": 4.629589625760165,
      "grad_norm": 0.6243202090263367,
      "learning_rate": 7.457021803733454e-07,
      "loss": 0.0115,
      "step": 2828920
    },
    {
      "epoch": 4.6296223561988175,
      "grad_norm": 0.22725670039653778,
      "learning_rate": 7.456362881598283e-07,
      "loss": 0.0084,
      "step": 2828940
    },
    {
      "epoch": 4.629655086637471,
      "grad_norm": 0.2639215588569641,
      "learning_rate": 7.45570395946311e-07,
      "loss": 0.0108,
      "step": 2828960
    },
    {
      "epoch": 4.629687817076125,
      "grad_norm": 0.16574549674987793,
      "learning_rate": 7.455045037327939e-07,
      "loss": 0.0119,
      "step": 2828980
    },
    {
      "epoch": 4.629720547514777,
      "grad_norm": 0.15581613779067993,
      "learning_rate": 7.454386115192769e-07,
      "loss": 0.0035,
      "step": 2829000
    },
    {
      "epoch": 4.629753277953431,
      "grad_norm": 0.28262484073638916,
      "learning_rate": 7.453727193057598e-07,
      "loss": 0.013,
      "step": 2829020
    },
    {
      "epoch": 4.6297860083920845,
      "grad_norm": 0.4886854887008667,
      "learning_rate": 7.453068270922426e-07,
      "loss": 0.0104,
      "step": 2829040
    },
    {
      "epoch": 4.629818738830738,
      "grad_norm": 0.6436363458633423,
      "learning_rate": 7.452409348787254e-07,
      "loss": 0.0103,
      "step": 2829060
    },
    {
      "epoch": 4.629851469269391,
      "grad_norm": 0.19397373497486115,
      "learning_rate": 7.451750426652084e-07,
      "loss": 0.0125,
      "step": 2829080
    },
    {
      "epoch": 4.629884199708044,
      "grad_norm": 0.368360310792923,
      "learning_rate": 7.451091504516912e-07,
      "loss": 0.009,
      "step": 2829100
    },
    {
      "epoch": 4.629916930146698,
      "grad_norm": 0.1187778040766716,
      "learning_rate": 7.450432582381741e-07,
      "loss": 0.0093,
      "step": 2829120
    },
    {
      "epoch": 4.629949660585352,
      "grad_norm": 0.21964208781719208,
      "learning_rate": 7.449773660246569e-07,
      "loss": 0.0117,
      "step": 2829140
    },
    {
      "epoch": 4.629982391024004,
      "grad_norm": 0.5990733504295349,
      "learning_rate": 7.449114738111398e-07,
      "loss": 0.0066,
      "step": 2829160
    },
    {
      "epoch": 4.630015121462658,
      "grad_norm": 0.11306817084550858,
      "learning_rate": 7.448455815976227e-07,
      "loss": 0.0117,
      "step": 2829180
    },
    {
      "epoch": 4.6300478519013115,
      "grad_norm": 0.11938826739788055,
      "learning_rate": 7.447796893841056e-07,
      "loss": 0.0061,
      "step": 2829200
    },
    {
      "epoch": 4.630080582339964,
      "grad_norm": 0.11986574530601501,
      "learning_rate": 7.447137971705884e-07,
      "loss": 0.0068,
      "step": 2829220
    },
    {
      "epoch": 4.630113312778618,
      "grad_norm": 0.13900114595890045,
      "learning_rate": 7.446479049570713e-07,
      "loss": 0.0116,
      "step": 2829240
    },
    {
      "epoch": 4.630146043217271,
      "grad_norm": 0.16050215065479279,
      "learning_rate": 7.445820127435542e-07,
      "loss": 0.0077,
      "step": 2829260
    },
    {
      "epoch": 4.630178773655924,
      "grad_norm": 0.048273585736751556,
      "learning_rate": 7.445161205300371e-07,
      "loss": 0.0124,
      "step": 2829280
    },
    {
      "epoch": 4.630211504094578,
      "grad_norm": 0.23457463085651398,
      "learning_rate": 7.444502283165199e-07,
      "loss": 0.009,
      "step": 2829300
    },
    {
      "epoch": 4.630244234533231,
      "grad_norm": 0.19462233781814575,
      "learning_rate": 7.443843361030028e-07,
      "loss": 0.0078,
      "step": 2829320
    },
    {
      "epoch": 4.630276964971885,
      "grad_norm": 0.0846659392118454,
      "learning_rate": 7.443184438894857e-07,
      "loss": 0.0099,
      "step": 2829340
    },
    {
      "epoch": 4.630309695410538,
      "grad_norm": 0.46553388237953186,
      "learning_rate": 7.442525516759686e-07,
      "loss": 0.0088,
      "step": 2829360
    },
    {
      "epoch": 4.630342425849191,
      "grad_norm": 0.2502933144569397,
      "learning_rate": 7.441866594624514e-07,
      "loss": 0.0074,
      "step": 2829380
    },
    {
      "epoch": 4.630375156287845,
      "grad_norm": 0.19756002724170685,
      "learning_rate": 7.441207672489342e-07,
      "loss": 0.0099,
      "step": 2829400
    },
    {
      "epoch": 4.630407886726498,
      "grad_norm": 0.14988377690315247,
      "learning_rate": 7.440548750354171e-07,
      "loss": 0.0128,
      "step": 2829420
    },
    {
      "epoch": 4.630440617165151,
      "grad_norm": 0.1401759386062622,
      "learning_rate": 7.439889828219e-07,
      "loss": 0.0078,
      "step": 2829440
    },
    {
      "epoch": 4.630473347603805,
      "grad_norm": 0.17239294946193695,
      "learning_rate": 7.439230906083829e-07,
      "loss": 0.0085,
      "step": 2829460
    },
    {
      "epoch": 4.630506078042458,
      "grad_norm": 0.11854405701160431,
      "learning_rate": 7.438571983948657e-07,
      "loss": 0.0088,
      "step": 2829480
    },
    {
      "epoch": 4.630538808481111,
      "grad_norm": 0.3851431608200073,
      "learning_rate": 7.437913061813486e-07,
      "loss": 0.0079,
      "step": 2829500
    },
    {
      "epoch": 4.630571538919765,
      "grad_norm": 0.1868658810853958,
      "learning_rate": 7.437254139678315e-07,
      "loss": 0.0089,
      "step": 2829520
    },
    {
      "epoch": 4.630604269358418,
      "grad_norm": 0.04969516396522522,
      "learning_rate": 7.436595217543144e-07,
      "loss": 0.0096,
      "step": 2829540
    },
    {
      "epoch": 4.630636999797071,
      "grad_norm": 0.4634447395801544,
      "learning_rate": 7.435936295407972e-07,
      "loss": 0.0081,
      "step": 2829560
    },
    {
      "epoch": 4.6306697302357245,
      "grad_norm": 0.19526784121990204,
      "learning_rate": 7.435277373272801e-07,
      "loss": 0.014,
      "step": 2829580
    },
    {
      "epoch": 4.630702460674378,
      "grad_norm": 0.688572883605957,
      "learning_rate": 7.434618451137629e-07,
      "loss": 0.0105,
      "step": 2829600
    },
    {
      "epoch": 4.630735191113032,
      "grad_norm": 0.299628883600235,
      "learning_rate": 7.433959529002459e-07,
      "loss": 0.0091,
      "step": 2829620
    },
    {
      "epoch": 4.630767921551684,
      "grad_norm": 0.6340784430503845,
      "learning_rate": 7.433300606867287e-07,
      "loss": 0.0155,
      "step": 2829640
    },
    {
      "epoch": 4.630800651990338,
      "grad_norm": 0.21565383672714233,
      "learning_rate": 7.432641684732116e-07,
      "loss": 0.0096,
      "step": 2829660
    },
    {
      "epoch": 4.630833382428992,
      "grad_norm": 0.3085155785083771,
      "learning_rate": 7.431982762596944e-07,
      "loss": 0.0132,
      "step": 2829680
    },
    {
      "epoch": 4.630866112867644,
      "grad_norm": 1.3125330209732056,
      "learning_rate": 7.431323840461774e-07,
      "loss": 0.0081,
      "step": 2829700
    },
    {
      "epoch": 4.630898843306298,
      "grad_norm": 0.14571596682071686,
      "learning_rate": 7.430664918326602e-07,
      "loss": 0.0082,
      "step": 2829720
    },
    {
      "epoch": 4.6309315737449515,
      "grad_norm": 0.18167860805988312,
      "learning_rate": 7.43000599619143e-07,
      "loss": 0.0077,
      "step": 2829740
    },
    {
      "epoch": 4.630964304183605,
      "grad_norm": 0.10405171662569046,
      "learning_rate": 7.429347074056259e-07,
      "loss": 0.0084,
      "step": 2829760
    },
    {
      "epoch": 4.630997034622258,
      "grad_norm": 0.34123095870018005,
      "learning_rate": 7.428688151921088e-07,
      "loss": 0.0087,
      "step": 2829780
    },
    {
      "epoch": 4.631029765060911,
      "grad_norm": 0.2896413803100586,
      "learning_rate": 7.428029229785917e-07,
      "loss": 0.0104,
      "step": 2829800
    },
    {
      "epoch": 4.631062495499565,
      "grad_norm": 0.30252987146377563,
      "learning_rate": 7.427370307650745e-07,
      "loss": 0.0081,
      "step": 2829820
    },
    {
      "epoch": 4.631095225938218,
      "grad_norm": 0.18262459337711334,
      "learning_rate": 7.426711385515574e-07,
      "loss": 0.0084,
      "step": 2829840
    },
    {
      "epoch": 4.631127956376871,
      "grad_norm": 0.21369801461696625,
      "learning_rate": 7.426052463380402e-07,
      "loss": 0.0045,
      "step": 2829860
    },
    {
      "epoch": 4.631160686815525,
      "grad_norm": 0.13348737359046936,
      "learning_rate": 7.425393541245232e-07,
      "loss": 0.0086,
      "step": 2829880
    },
    {
      "epoch": 4.6311934172541775,
      "grad_norm": 0.3339661955833435,
      "learning_rate": 7.42473461911006e-07,
      "loss": 0.0119,
      "step": 2829900
    },
    {
      "epoch": 4.631226147692831,
      "grad_norm": 0.1716005951166153,
      "learning_rate": 7.424075696974889e-07,
      "loss": 0.0103,
      "step": 2829920
    },
    {
      "epoch": 4.631258878131485,
      "grad_norm": 0.42734482884407043,
      "learning_rate": 7.423416774839717e-07,
      "loss": 0.0133,
      "step": 2829940
    },
    {
      "epoch": 4.631291608570138,
      "grad_norm": 0.8024687767028809,
      "learning_rate": 7.422757852704547e-07,
      "loss": 0.0076,
      "step": 2829960
    },
    {
      "epoch": 4.631324339008791,
      "grad_norm": 0.1604515016078949,
      "learning_rate": 7.422098930569375e-07,
      "loss": 0.0073,
      "step": 2829980
    },
    {
      "epoch": 4.631357069447445,
      "grad_norm": 0.19569355249404907,
      "learning_rate": 7.421440008434204e-07,
      "loss": 0.0116,
      "step": 2830000
    },
    {
      "epoch": 4.631389799886098,
      "grad_norm": 0.07850942760705948,
      "learning_rate": 7.420781086299032e-07,
      "loss": 0.0059,
      "step": 2830020
    },
    {
      "epoch": 4.631422530324752,
      "grad_norm": 0.22396992146968842,
      "learning_rate": 7.42012216416386e-07,
      "loss": 0.0094,
      "step": 2830040
    },
    {
      "epoch": 4.6314552607634045,
      "grad_norm": 0.26428547501564026,
      "learning_rate": 7.41946324202869e-07,
      "loss": 0.009,
      "step": 2830060
    },
    {
      "epoch": 4.631487991202058,
      "grad_norm": 0.12317188829183578,
      "learning_rate": 7.418804319893518e-07,
      "loss": 0.0072,
      "step": 2830080
    },
    {
      "epoch": 4.631520721640712,
      "grad_norm": 0.33838531374931335,
      "learning_rate": 7.418145397758347e-07,
      "loss": 0.009,
      "step": 2830100
    },
    {
      "epoch": 4.631553452079364,
      "grad_norm": 0.27642759680747986,
      "learning_rate": 7.417486475623175e-07,
      "loss": 0.0092,
      "step": 2830120
    },
    {
      "epoch": 4.631586182518018,
      "grad_norm": 0.3375954031944275,
      "learning_rate": 7.416827553488006e-07,
      "loss": 0.0127,
      "step": 2830140
    },
    {
      "epoch": 4.631618912956672,
      "grad_norm": 0.3079012632369995,
      "learning_rate": 7.416168631352834e-07,
      "loss": 0.0068,
      "step": 2830160
    },
    {
      "epoch": 4.631651643395324,
      "grad_norm": 0.09152015298604965,
      "learning_rate": 7.415509709217662e-07,
      "loss": 0.0067,
      "step": 2830180
    },
    {
      "epoch": 4.631684373833978,
      "grad_norm": 0.2813502550125122,
      "learning_rate": 7.41485078708249e-07,
      "loss": 0.0092,
      "step": 2830200
    },
    {
      "epoch": 4.6317171042726315,
      "grad_norm": 0.16196253895759583,
      "learning_rate": 7.414191864947321e-07,
      "loss": 0.0075,
      "step": 2830220
    },
    {
      "epoch": 4.631749834711285,
      "grad_norm": 0.19650371372699738,
      "learning_rate": 7.413532942812149e-07,
      "loss": 0.0115,
      "step": 2830240
    },
    {
      "epoch": 4.631782565149938,
      "grad_norm": 0.6716778874397278,
      "learning_rate": 7.412874020676978e-07,
      "loss": 0.0099,
      "step": 2830260
    },
    {
      "epoch": 4.631815295588591,
      "grad_norm": 0.5960157513618469,
      "learning_rate": 7.412215098541805e-07,
      "loss": 0.0074,
      "step": 2830280
    },
    {
      "epoch": 4.631848026027245,
      "grad_norm": 0.3267790973186493,
      "learning_rate": 7.411556176406634e-07,
      "loss": 0.0075,
      "step": 2830300
    },
    {
      "epoch": 4.631880756465899,
      "grad_norm": 0.15361422300338745,
      "learning_rate": 7.410897254271464e-07,
      "loss": 0.0069,
      "step": 2830320
    },
    {
      "epoch": 4.631913486904551,
      "grad_norm": 0.19283245503902435,
      "learning_rate": 7.410238332136293e-07,
      "loss": 0.0098,
      "step": 2830340
    },
    {
      "epoch": 4.631946217343205,
      "grad_norm": 0.5161129236221313,
      "learning_rate": 7.409579410001121e-07,
      "loss": 0.0107,
      "step": 2830360
    },
    {
      "epoch": 4.6319789477818585,
      "grad_norm": 0.036317192018032074,
      "learning_rate": 7.40892048786595e-07,
      "loss": 0.012,
      "step": 2830380
    },
    {
      "epoch": 4.632011678220511,
      "grad_norm": 0.4963891804218292,
      "learning_rate": 7.408261565730779e-07,
      "loss": 0.0112,
      "step": 2830400
    },
    {
      "epoch": 4.632044408659165,
      "grad_norm": 0.10633919388055801,
      "learning_rate": 7.407602643595607e-07,
      "loss": 0.0075,
      "step": 2830420
    },
    {
      "epoch": 4.632077139097818,
      "grad_norm": 0.7740035057067871,
      "learning_rate": 7.406943721460436e-07,
      "loss": 0.0097,
      "step": 2830440
    },
    {
      "epoch": 4.632109869536471,
      "grad_norm": 0.1449396014213562,
      "learning_rate": 7.406284799325264e-07,
      "loss": 0.0077,
      "step": 2830460
    },
    {
      "epoch": 4.632142599975125,
      "grad_norm": 0.22265253961086273,
      "learning_rate": 7.405625877190093e-07,
      "loss": 0.0082,
      "step": 2830480
    },
    {
      "epoch": 4.632175330413778,
      "grad_norm": 0.10076668113470078,
      "learning_rate": 7.404966955054922e-07,
      "loss": 0.0099,
      "step": 2830500
    },
    {
      "epoch": 4.632208060852432,
      "grad_norm": 0.12826409935951233,
      "learning_rate": 7.404308032919751e-07,
      "loss": 0.007,
      "step": 2830520
    },
    {
      "epoch": 4.632240791291085,
      "grad_norm": 0.1569710075855255,
      "learning_rate": 7.403649110784579e-07,
      "loss": 0.0074,
      "step": 2830540
    },
    {
      "epoch": 4.632273521729738,
      "grad_norm": 0.06958885490894318,
      "learning_rate": 7.402990188649408e-07,
      "loss": 0.0047,
      "step": 2830560
    },
    {
      "epoch": 4.632306252168392,
      "grad_norm": 0.21727323532104492,
      "learning_rate": 7.402331266514237e-07,
      "loss": 0.0078,
      "step": 2830580
    },
    {
      "epoch": 4.632338982607045,
      "grad_norm": 0.14996762573719025,
      "learning_rate": 7.401672344379066e-07,
      "loss": 0.008,
      "step": 2830600
    },
    {
      "epoch": 4.632371713045698,
      "grad_norm": 0.5581235289573669,
      "learning_rate": 7.401013422243894e-07,
      "loss": 0.0101,
      "step": 2830620
    },
    {
      "epoch": 4.632404443484352,
      "grad_norm": 0.2522105276584625,
      "learning_rate": 7.400354500108723e-07,
      "loss": 0.0066,
      "step": 2830640
    },
    {
      "epoch": 4.632437173923005,
      "grad_norm": 0.08225854486227036,
      "learning_rate": 7.399695577973552e-07,
      "loss": 0.0068,
      "step": 2830660
    },
    {
      "epoch": 4.632469904361658,
      "grad_norm": 0.2195916622877121,
      "learning_rate": 7.399036655838381e-07,
      "loss": 0.0097,
      "step": 2830680
    },
    {
      "epoch": 4.6325026348003115,
      "grad_norm": 0.2470027506351471,
      "learning_rate": 7.398377733703209e-07,
      "loss": 0.0157,
      "step": 2830700
    },
    {
      "epoch": 4.632535365238965,
      "grad_norm": 0.2340352088212967,
      "learning_rate": 7.397718811568038e-07,
      "loss": 0.0084,
      "step": 2830720
    },
    {
      "epoch": 4.632568095677618,
      "grad_norm": 0.1940462738275528,
      "learning_rate": 7.397059889432866e-07,
      "loss": 0.0095,
      "step": 2830740
    },
    {
      "epoch": 4.632600826116271,
      "grad_norm": 0.5298738479614258,
      "learning_rate": 7.396400967297695e-07,
      "loss": 0.0077,
      "step": 2830760
    },
    {
      "epoch": 4.632633556554925,
      "grad_norm": 0.16817477345466614,
      "learning_rate": 7.395742045162524e-07,
      "loss": 0.0058,
      "step": 2830780
    },
    {
      "epoch": 4.632666286993579,
      "grad_norm": 0.08612088859081268,
      "learning_rate": 7.395083123027352e-07,
      "loss": 0.006,
      "step": 2830800
    },
    {
      "epoch": 4.632699017432231,
      "grad_norm": 0.18625520169734955,
      "learning_rate": 7.394424200892181e-07,
      "loss": 0.0087,
      "step": 2830820
    },
    {
      "epoch": 4.632731747870885,
      "grad_norm": 0.24275565147399902,
      "learning_rate": 7.39376527875701e-07,
      "loss": 0.0075,
      "step": 2830840
    },
    {
      "epoch": 4.6327644783095385,
      "grad_norm": 0.41819682717323303,
      "learning_rate": 7.393106356621839e-07,
      "loss": 0.0075,
      "step": 2830860
    },
    {
      "epoch": 4.632797208748192,
      "grad_norm": 0.41187673807144165,
      "learning_rate": 7.392447434486667e-07,
      "loss": 0.0079,
      "step": 2830880
    },
    {
      "epoch": 4.632829939186845,
      "grad_norm": 0.3723911643028259,
      "learning_rate": 7.391788512351496e-07,
      "loss": 0.0071,
      "step": 2830900
    },
    {
      "epoch": 4.632862669625498,
      "grad_norm": 0.08671167492866516,
      "learning_rate": 7.391129590216324e-07,
      "loss": 0.0085,
      "step": 2830920
    },
    {
      "epoch": 4.632895400064152,
      "grad_norm": 0.06834081560373306,
      "learning_rate": 7.390470668081154e-07,
      "loss": 0.008,
      "step": 2830940
    },
    {
      "epoch": 4.632928130502805,
      "grad_norm": 0.1257821023464203,
      "learning_rate": 7.389811745945982e-07,
      "loss": 0.008,
      "step": 2830960
    },
    {
      "epoch": 4.632960860941458,
      "grad_norm": 0.42177870869636536,
      "learning_rate": 7.389152823810811e-07,
      "loss": 0.01,
      "step": 2830980
    },
    {
      "epoch": 4.632993591380112,
      "grad_norm": 0.2479170560836792,
      "learning_rate": 7.388493901675639e-07,
      "loss": 0.0106,
      "step": 2831000
    },
    {
      "epoch": 4.633026321818765,
      "grad_norm": 0.522951066493988,
      "learning_rate": 7.387834979540469e-07,
      "loss": 0.0108,
      "step": 2831020
    },
    {
      "epoch": 4.633059052257418,
      "grad_norm": 0.27725642919540405,
      "learning_rate": 7.387176057405297e-07,
      "loss": 0.0071,
      "step": 2831040
    },
    {
      "epoch": 4.633091782696072,
      "grad_norm": 0.504968523979187,
      "learning_rate": 7.386517135270126e-07,
      "loss": 0.0073,
      "step": 2831060
    },
    {
      "epoch": 4.633124513134725,
      "grad_norm": 0.11717608571052551,
      "learning_rate": 7.385858213134954e-07,
      "loss": 0.0069,
      "step": 2831080
    },
    {
      "epoch": 4.633157243573378,
      "grad_norm": 0.25174710154533386,
      "learning_rate": 7.385199290999783e-07,
      "loss": 0.0095,
      "step": 2831100
    },
    {
      "epoch": 4.633189974012032,
      "grad_norm": 0.32980480790138245,
      "learning_rate": 7.384540368864612e-07,
      "loss": 0.0063,
      "step": 2831120
    },
    {
      "epoch": 4.633222704450685,
      "grad_norm": 0.1705312430858612,
      "learning_rate": 7.38388144672944e-07,
      "loss": 0.0097,
      "step": 2831140
    },
    {
      "epoch": 4.633255434889339,
      "grad_norm": 0.08757852017879486,
      "learning_rate": 7.383222524594269e-07,
      "loss": 0.0083,
      "step": 2831160
    },
    {
      "epoch": 4.633288165327992,
      "grad_norm": 0.1987811028957367,
      "learning_rate": 7.382563602459097e-07,
      "loss": 0.0076,
      "step": 2831180
    },
    {
      "epoch": 4.633320895766645,
      "grad_norm": 0.14285464584827423,
      "learning_rate": 7.381904680323927e-07,
      "loss": 0.0122,
      "step": 2831200
    },
    {
      "epoch": 4.633353626205299,
      "grad_norm": 0.14067260921001434,
      "learning_rate": 7.381245758188755e-07,
      "loss": 0.0119,
      "step": 2831220
    },
    {
      "epoch": 4.6333863566439515,
      "grad_norm": 0.2181876003742218,
      "learning_rate": 7.380586836053584e-07,
      "loss": 0.0064,
      "step": 2831240
    },
    {
      "epoch": 4.633419087082605,
      "grad_norm": 0.1678546518087387,
      "learning_rate": 7.379927913918412e-07,
      "loss": 0.011,
      "step": 2831260
    },
    {
      "epoch": 4.633451817521259,
      "grad_norm": 0.12273192405700684,
      "learning_rate": 7.379268991783242e-07,
      "loss": 0.0105,
      "step": 2831280
    },
    {
      "epoch": 4.633484547959911,
      "grad_norm": 0.13520395755767822,
      "learning_rate": 7.37861006964807e-07,
      "loss": 0.0128,
      "step": 2831300
    },
    {
      "epoch": 4.633517278398565,
      "grad_norm": 0.08105733245611191,
      "learning_rate": 7.377951147512899e-07,
      "loss": 0.0117,
      "step": 2831320
    },
    {
      "epoch": 4.633550008837219,
      "grad_norm": 0.3347785472869873,
      "learning_rate": 7.377292225377727e-07,
      "loss": 0.0091,
      "step": 2831340
    },
    {
      "epoch": 4.633582739275871,
      "grad_norm": 0.17322269082069397,
      "learning_rate": 7.376633303242556e-07,
      "loss": 0.0082,
      "step": 2831360
    },
    {
      "epoch": 4.633615469714525,
      "grad_norm": 0.21532364189624786,
      "learning_rate": 7.375974381107385e-07,
      "loss": 0.0098,
      "step": 2831380
    },
    {
      "epoch": 4.6336482001531785,
      "grad_norm": 0.15239067375659943,
      "learning_rate": 7.375315458972214e-07,
      "loss": 0.0111,
      "step": 2831400
    },
    {
      "epoch": 4.633680930591832,
      "grad_norm": 0.32355305552482605,
      "learning_rate": 7.374656536837042e-07,
      "loss": 0.0081,
      "step": 2831420
    },
    {
      "epoch": 4.633713661030485,
      "grad_norm": 0.16181093454360962,
      "learning_rate": 7.37399761470187e-07,
      "loss": 0.007,
      "step": 2831440
    },
    {
      "epoch": 4.633746391469138,
      "grad_norm": 0.07213500142097473,
      "learning_rate": 7.373338692566701e-07,
      "loss": 0.0059,
      "step": 2831460
    },
    {
      "epoch": 4.633779121907792,
      "grad_norm": 0.9852010607719421,
      "learning_rate": 7.372679770431529e-07,
      "loss": 0.0153,
      "step": 2831480
    },
    {
      "epoch": 4.6338118523464455,
      "grad_norm": 0.11564718931913376,
      "learning_rate": 7.372020848296357e-07,
      "loss": 0.0086,
      "step": 2831500
    },
    {
      "epoch": 4.633844582785098,
      "grad_norm": 0.10111407935619354,
      "learning_rate": 7.371361926161185e-07,
      "loss": 0.0087,
      "step": 2831520
    },
    {
      "epoch": 4.633877313223752,
      "grad_norm": 0.2787819504737854,
      "learning_rate": 7.370703004026016e-07,
      "loss": 0.0058,
      "step": 2831540
    },
    {
      "epoch": 4.633910043662405,
      "grad_norm": 0.16894394159317017,
      "learning_rate": 7.370044081890844e-07,
      "loss": 0.0088,
      "step": 2831560
    },
    {
      "epoch": 4.633942774101058,
      "grad_norm": 0.3121628165245056,
      "learning_rate": 7.369385159755673e-07,
      "loss": 0.0069,
      "step": 2831580
    },
    {
      "epoch": 4.633975504539712,
      "grad_norm": 0.21326208114624023,
      "learning_rate": 7.3687262376205e-07,
      "loss": 0.0126,
      "step": 2831600
    },
    {
      "epoch": 4.634008234978365,
      "grad_norm": 0.2582744359970093,
      "learning_rate": 7.368067315485329e-07,
      "loss": 0.0061,
      "step": 2831620
    },
    {
      "epoch": 4.634040965417018,
      "grad_norm": 0.23515775799751282,
      "learning_rate": 7.367408393350159e-07,
      "loss": 0.0122,
      "step": 2831640
    },
    {
      "epoch": 4.634073695855672,
      "grad_norm": 0.08699104934930801,
      "learning_rate": 7.366749471214988e-07,
      "loss": 0.0099,
      "step": 2831660
    },
    {
      "epoch": 4.634106426294325,
      "grad_norm": 0.40500587224960327,
      "learning_rate": 7.366090549079816e-07,
      "loss": 0.0124,
      "step": 2831680
    },
    {
      "epoch": 4.634139156732979,
      "grad_norm": 0.1326516568660736,
      "learning_rate": 7.365431626944645e-07,
      "loss": 0.0094,
      "step": 2831700
    },
    {
      "epoch": 4.6341718871716315,
      "grad_norm": 0.24843013286590576,
      "learning_rate": 7.364772704809474e-07,
      "loss": 0.0084,
      "step": 2831720
    },
    {
      "epoch": 4.634204617610285,
      "grad_norm": 0.16318020224571228,
      "learning_rate": 7.364113782674303e-07,
      "loss": 0.0099,
      "step": 2831740
    },
    {
      "epoch": 4.634237348048939,
      "grad_norm": 0.1603359878063202,
      "learning_rate": 7.363454860539131e-07,
      "loss": 0.0053,
      "step": 2831760
    },
    {
      "epoch": 4.634270078487592,
      "grad_norm": 0.5608963966369629,
      "learning_rate": 7.362795938403959e-07,
      "loss": 0.0102,
      "step": 2831780
    },
    {
      "epoch": 4.634302808926245,
      "grad_norm": 0.11979066580533981,
      "learning_rate": 7.362137016268788e-07,
      "loss": 0.0059,
      "step": 2831800
    },
    {
      "epoch": 4.634335539364899,
      "grad_norm": 0.16177880764007568,
      "learning_rate": 7.361478094133617e-07,
      "loss": 0.0063,
      "step": 2831820
    },
    {
      "epoch": 4.634368269803552,
      "grad_norm": 0.1131841316819191,
      "learning_rate": 7.360819171998446e-07,
      "loss": 0.0067,
      "step": 2831840
    },
    {
      "epoch": 4.634401000242205,
      "grad_norm": 0.15603205561637878,
      "learning_rate": 7.360160249863274e-07,
      "loss": 0.0076,
      "step": 2831860
    },
    {
      "epoch": 4.6344337306808585,
      "grad_norm": 0.16694463789463043,
      "learning_rate": 7.359501327728103e-07,
      "loss": 0.009,
      "step": 2831880
    },
    {
      "epoch": 4.634466461119512,
      "grad_norm": 0.05772612243890762,
      "learning_rate": 7.358842405592932e-07,
      "loss": 0.006,
      "step": 2831900
    },
    {
      "epoch": 4.634499191558165,
      "grad_norm": 0.2796197533607483,
      "learning_rate": 7.358183483457761e-07,
      "loss": 0.0059,
      "step": 2831920
    },
    {
      "epoch": 4.634531921996818,
      "grad_norm": 0.5323725342750549,
      "learning_rate": 7.357524561322589e-07,
      "loss": 0.0107,
      "step": 2831940
    },
    {
      "epoch": 4.634564652435472,
      "grad_norm": 0.371072381734848,
      "learning_rate": 7.356865639187418e-07,
      "loss": 0.0106,
      "step": 2831960
    },
    {
      "epoch": 4.634597382874126,
      "grad_norm": 0.20503337681293488,
      "learning_rate": 7.356206717052247e-07,
      "loss": 0.005,
      "step": 2831980
    },
    {
      "epoch": 4.634630113312778,
      "grad_norm": 0.22179175913333893,
      "learning_rate": 7.355547794917076e-07,
      "loss": 0.0088,
      "step": 2832000
    },
    {
      "epoch": 4.634662843751432,
      "grad_norm": 0.21877478063106537,
      "learning_rate": 7.354888872781904e-07,
      "loss": 0.0083,
      "step": 2832020
    },
    {
      "epoch": 4.6346955741900855,
      "grad_norm": 0.27229931950569153,
      "learning_rate": 7.354229950646733e-07,
      "loss": 0.0098,
      "step": 2832040
    },
    {
      "epoch": 4.634728304628739,
      "grad_norm": 0.17820149660110474,
      "learning_rate": 7.353571028511561e-07,
      "loss": 0.0077,
      "step": 2832060
    },
    {
      "epoch": 4.634761035067392,
      "grad_norm": 0.18093252182006836,
      "learning_rate": 7.352912106376391e-07,
      "loss": 0.0069,
      "step": 2832080
    },
    {
      "epoch": 4.634793765506045,
      "grad_norm": 0.13267706334590912,
      "learning_rate": 7.352253184241219e-07,
      "loss": 0.009,
      "step": 2832100
    },
    {
      "epoch": 4.634826495944699,
      "grad_norm": 1.4835517406463623,
      "learning_rate": 7.351594262106047e-07,
      "loss": 0.007,
      "step": 2832120
    },
    {
      "epoch": 4.634859226383352,
      "grad_norm": 0.19477497041225433,
      "learning_rate": 7.350935339970876e-07,
      "loss": 0.008,
      "step": 2832140
    },
    {
      "epoch": 4.634891956822005,
      "grad_norm": 0.27712482213974,
      "learning_rate": 7.350276417835705e-07,
      "loss": 0.0112,
      "step": 2832160
    },
    {
      "epoch": 4.634924687260659,
      "grad_norm": 0.4376581013202667,
      "learning_rate": 7.349617495700534e-07,
      "loss": 0.0077,
      "step": 2832180
    },
    {
      "epoch": 4.634957417699312,
      "grad_norm": 0.12812259793281555,
      "learning_rate": 7.348958573565362e-07,
      "loss": 0.011,
      "step": 2832200
    },
    {
      "epoch": 4.634990148137965,
      "grad_norm": 0.22493915259838104,
      "learning_rate": 7.348299651430191e-07,
      "loss": 0.0068,
      "step": 2832220
    },
    {
      "epoch": 4.635022878576619,
      "grad_norm": 0.22771978378295898,
      "learning_rate": 7.347640729295019e-07,
      "loss": 0.0064,
      "step": 2832240
    },
    {
      "epoch": 4.635055609015272,
      "grad_norm": 0.08851871639490128,
      "learning_rate": 7.346981807159849e-07,
      "loss": 0.0084,
      "step": 2832260
    },
    {
      "epoch": 4.635088339453925,
      "grad_norm": 0.8317915797233582,
      "learning_rate": 7.346322885024677e-07,
      "loss": 0.0167,
      "step": 2832280
    },
    {
      "epoch": 4.635121069892579,
      "grad_norm": 0.15205852687358856,
      "learning_rate": 7.345663962889506e-07,
      "loss": 0.009,
      "step": 2832300
    },
    {
      "epoch": 4.635153800331232,
      "grad_norm": 0.2080184370279312,
      "learning_rate": 7.345005040754334e-07,
      "loss": 0.0125,
      "step": 2832320
    },
    {
      "epoch": 4.635186530769886,
      "grad_norm": 0.34145697951316833,
      "learning_rate": 7.344346118619164e-07,
      "loss": 0.0159,
      "step": 2832340
    },
    {
      "epoch": 4.6352192612085386,
      "grad_norm": 0.22278252243995667,
      "learning_rate": 7.343687196483992e-07,
      "loss": 0.0082,
      "step": 2832360
    },
    {
      "epoch": 4.635251991647192,
      "grad_norm": 0.2732860743999481,
      "learning_rate": 7.343028274348821e-07,
      "loss": 0.0153,
      "step": 2832380
    },
    {
      "epoch": 4.635284722085846,
      "grad_norm": 0.05412375181913376,
      "learning_rate": 7.342369352213649e-07,
      "loss": 0.0077,
      "step": 2832400
    },
    {
      "epoch": 4.6353174525244984,
      "grad_norm": 0.2570793628692627,
      "learning_rate": 7.341710430078479e-07,
      "loss": 0.0106,
      "step": 2832420
    },
    {
      "epoch": 4.635350182963152,
      "grad_norm": 0.18856944143772125,
      "learning_rate": 7.341051507943307e-07,
      "loss": 0.0119,
      "step": 2832440
    },
    {
      "epoch": 4.635382913401806,
      "grad_norm": 0.09054360538721085,
      "learning_rate": 7.340392585808135e-07,
      "loss": 0.005,
      "step": 2832460
    },
    {
      "epoch": 4.635415643840458,
      "grad_norm": 0.4433083236217499,
      "learning_rate": 7.339733663672964e-07,
      "loss": 0.0125,
      "step": 2832480
    },
    {
      "epoch": 4.635448374279112,
      "grad_norm": 0.18984641134738922,
      "learning_rate": 7.339074741537792e-07,
      "loss": 0.0117,
      "step": 2832500
    },
    {
      "epoch": 4.6354811047177655,
      "grad_norm": 0.12101156264543533,
      "learning_rate": 7.338415819402622e-07,
      "loss": 0.01,
      "step": 2832520
    },
    {
      "epoch": 4.635513835156419,
      "grad_norm": 0.13700774312019348,
      "learning_rate": 7.33775689726745e-07,
      "loss": 0.0082,
      "step": 2832540
    },
    {
      "epoch": 4.635546565595072,
      "grad_norm": 0.13903997838497162,
      "learning_rate": 7.337097975132279e-07,
      "loss": 0.0117,
      "step": 2832560
    },
    {
      "epoch": 4.635579296033725,
      "grad_norm": 0.4967307150363922,
      "learning_rate": 7.336439052997107e-07,
      "loss": 0.0105,
      "step": 2832580
    },
    {
      "epoch": 4.635612026472379,
      "grad_norm": 0.21249127388000488,
      "learning_rate": 7.335780130861937e-07,
      "loss": 0.0079,
      "step": 2832600
    },
    {
      "epoch": 4.635644756911033,
      "grad_norm": 0.18437260389328003,
      "learning_rate": 7.335121208726765e-07,
      "loss": 0.0065,
      "step": 2832620
    },
    {
      "epoch": 4.635677487349685,
      "grad_norm": 0.07881230860948563,
      "learning_rate": 7.334462286591594e-07,
      "loss": 0.0096,
      "step": 2832640
    },
    {
      "epoch": 4.635710217788339,
      "grad_norm": 0.14885549247264862,
      "learning_rate": 7.333803364456422e-07,
      "loss": 0.0084,
      "step": 2832660
    },
    {
      "epoch": 4.6357429482269925,
      "grad_norm": 0.17206327617168427,
      "learning_rate": 7.333144442321251e-07,
      "loss": 0.0083,
      "step": 2832680
    },
    {
      "epoch": 4.635775678665645,
      "grad_norm": 0.08852085471153259,
      "learning_rate": 7.33248552018608e-07,
      "loss": 0.0078,
      "step": 2832700
    },
    {
      "epoch": 4.635808409104299,
      "grad_norm": 0.20485422015190125,
      "learning_rate": 7.331826598050909e-07,
      "loss": 0.0081,
      "step": 2832720
    },
    {
      "epoch": 4.635841139542952,
      "grad_norm": 0.12590374052524567,
      "learning_rate": 7.331167675915737e-07,
      "loss": 0.0103,
      "step": 2832740
    },
    {
      "epoch": 4.635873869981605,
      "grad_norm": 0.3036171793937683,
      "learning_rate": 7.330508753780565e-07,
      "loss": 0.0106,
      "step": 2832760
    },
    {
      "epoch": 4.635906600420259,
      "grad_norm": 0.11426118016242981,
      "learning_rate": 7.329849831645396e-07,
      "loss": 0.0102,
      "step": 2832780
    },
    {
      "epoch": 4.635939330858912,
      "grad_norm": 0.09900203347206116,
      "learning_rate": 7.329190909510224e-07,
      "loss": 0.0082,
      "step": 2832800
    },
    {
      "epoch": 4.635972061297566,
      "grad_norm": 0.20525150001049042,
      "learning_rate": 7.328531987375052e-07,
      "loss": 0.0121,
      "step": 2832820
    },
    {
      "epoch": 4.636004791736219,
      "grad_norm": 0.39949318766593933,
      "learning_rate": 7.32787306523988e-07,
      "loss": 0.0072,
      "step": 2832840
    },
    {
      "epoch": 4.636037522174872,
      "grad_norm": 0.030764106661081314,
      "learning_rate": 7.327214143104711e-07,
      "loss": 0.0072,
      "step": 2832860
    },
    {
      "epoch": 4.636070252613526,
      "grad_norm": 0.13704940676689148,
      "learning_rate": 7.326555220969539e-07,
      "loss": 0.0081,
      "step": 2832880
    },
    {
      "epoch": 4.6361029830521785,
      "grad_norm": 0.4782007932662964,
      "learning_rate": 7.325896298834368e-07,
      "loss": 0.0111,
      "step": 2832900
    },
    {
      "epoch": 4.636135713490832,
      "grad_norm": 0.2520146667957306,
      "learning_rate": 7.325237376699196e-07,
      "loss": 0.008,
      "step": 2832920
    },
    {
      "epoch": 4.636168443929486,
      "grad_norm": 0.21265141665935516,
      "learning_rate": 7.324578454564024e-07,
      "loss": 0.0068,
      "step": 2832940
    },
    {
      "epoch": 4.636201174368139,
      "grad_norm": 0.20030225813388824,
      "learning_rate": 7.323919532428854e-07,
      "loss": 0.0077,
      "step": 2832960
    },
    {
      "epoch": 4.636233904806792,
      "grad_norm": 0.13892251253128052,
      "learning_rate": 7.323260610293683e-07,
      "loss": 0.0112,
      "step": 2832980
    },
    {
      "epoch": 4.636266635245446,
      "grad_norm": 0.3087412118911743,
      "learning_rate": 7.322601688158511e-07,
      "loss": 0.0076,
      "step": 2833000
    },
    {
      "epoch": 4.636299365684099,
      "grad_norm": 0.4220561981201172,
      "learning_rate": 7.32194276602334e-07,
      "loss": 0.0071,
      "step": 2833020
    },
    {
      "epoch": 4.636332096122752,
      "grad_norm": 0.6939215064048767,
      "learning_rate": 7.321283843888169e-07,
      "loss": 0.0176,
      "step": 2833040
    },
    {
      "epoch": 4.6363648265614055,
      "grad_norm": 0.1837427020072937,
      "learning_rate": 7.320624921752998e-07,
      "loss": 0.0079,
      "step": 2833060
    },
    {
      "epoch": 4.636397557000059,
      "grad_norm": 0.41459643840789795,
      "learning_rate": 7.319965999617826e-07,
      "loss": 0.0077,
      "step": 2833080
    },
    {
      "epoch": 4.636430287438712,
      "grad_norm": 0.16581444442272186,
      "learning_rate": 7.319307077482655e-07,
      "loss": 0.0066,
      "step": 2833100
    },
    {
      "epoch": 4.636463017877365,
      "grad_norm": 0.03377851843833923,
      "learning_rate": 7.318648155347483e-07,
      "loss": 0.0077,
      "step": 2833120
    },
    {
      "epoch": 4.636495748316019,
      "grad_norm": 0.0740123987197876,
      "learning_rate": 7.317989233212312e-07,
      "loss": 0.0079,
      "step": 2833140
    },
    {
      "epoch": 4.6365284787546726,
      "grad_norm": 0.4403558671474457,
      "learning_rate": 7.317330311077141e-07,
      "loss": 0.0088,
      "step": 2833160
    },
    {
      "epoch": 4.636561209193325,
      "grad_norm": 0.2308308482170105,
      "learning_rate": 7.316671388941969e-07,
      "loss": 0.0058,
      "step": 2833180
    },
    {
      "epoch": 4.636593939631979,
      "grad_norm": 0.1848946362733841,
      "learning_rate": 7.316012466806798e-07,
      "loss": 0.0081,
      "step": 2833200
    },
    {
      "epoch": 4.6366266700706324,
      "grad_norm": 0.1439632773399353,
      "learning_rate": 7.315353544671627e-07,
      "loss": 0.0084,
      "step": 2833220
    },
    {
      "epoch": 4.636659400509286,
      "grad_norm": 0.2495974898338318,
      "learning_rate": 7.314694622536456e-07,
      "loss": 0.0081,
      "step": 2833240
    },
    {
      "epoch": 4.636692130947939,
      "grad_norm": 0.3227905035018921,
      "learning_rate": 7.314035700401284e-07,
      "loss": 0.009,
      "step": 2833260
    },
    {
      "epoch": 4.636724861386592,
      "grad_norm": 0.22173556685447693,
      "learning_rate": 7.313376778266113e-07,
      "loss": 0.0101,
      "step": 2833280
    },
    {
      "epoch": 4.636757591825246,
      "grad_norm": 0.3517071306705475,
      "learning_rate": 7.312717856130942e-07,
      "loss": 0.0079,
      "step": 2833300
    },
    {
      "epoch": 4.636790322263899,
      "grad_norm": 0.14484892785549164,
      "learning_rate": 7.312058933995771e-07,
      "loss": 0.0078,
      "step": 2833320
    },
    {
      "epoch": 4.636823052702552,
      "grad_norm": 0.27464690804481506,
      "learning_rate": 7.311400011860599e-07,
      "loss": 0.0057,
      "step": 2833340
    },
    {
      "epoch": 4.636855783141206,
      "grad_norm": 0.3458017110824585,
      "learning_rate": 7.310741089725428e-07,
      "loss": 0.0065,
      "step": 2833360
    },
    {
      "epoch": 4.6368885135798585,
      "grad_norm": 0.11668829619884491,
      "learning_rate": 7.310082167590256e-07,
      "loss": 0.0067,
      "step": 2833380
    },
    {
      "epoch": 4.636921244018512,
      "grad_norm": 0.13774429261684418,
      "learning_rate": 7.309423245455086e-07,
      "loss": 0.0086,
      "step": 2833400
    },
    {
      "epoch": 4.636953974457166,
      "grad_norm": 0.09102997183799744,
      "learning_rate": 7.308764323319914e-07,
      "loss": 0.0076,
      "step": 2833420
    },
    {
      "epoch": 4.636986704895819,
      "grad_norm": 0.23481690883636475,
      "learning_rate": 7.308105401184743e-07,
      "loss": 0.0066,
      "step": 2833440
    },
    {
      "epoch": 4.637019435334472,
      "grad_norm": 0.12837845087051392,
      "learning_rate": 7.307446479049571e-07,
      "loss": 0.0066,
      "step": 2833460
    },
    {
      "epoch": 4.637052165773126,
      "grad_norm": 0.2672879993915558,
      "learning_rate": 7.3067875569144e-07,
      "loss": 0.0079,
      "step": 2833480
    },
    {
      "epoch": 4.637084896211779,
      "grad_norm": 0.18421073257923126,
      "learning_rate": 7.306128634779229e-07,
      "loss": 0.0094,
      "step": 2833500
    },
    {
      "epoch": 4.637117626650433,
      "grad_norm": 0.5601575374603271,
      "learning_rate": 7.305469712644057e-07,
      "loss": 0.0104,
      "step": 2833520
    },
    {
      "epoch": 4.6371503570890855,
      "grad_norm": 0.14779438078403473,
      "learning_rate": 7.304810790508886e-07,
      "loss": 0.0061,
      "step": 2833540
    },
    {
      "epoch": 4.637183087527739,
      "grad_norm": 0.15418605506420135,
      "learning_rate": 7.304151868373714e-07,
      "loss": 0.0075,
      "step": 2833560
    },
    {
      "epoch": 4.637215817966393,
      "grad_norm": 0.5049749612808228,
      "learning_rate": 7.303492946238544e-07,
      "loss": 0.0117,
      "step": 2833580
    },
    {
      "epoch": 4.637248548405045,
      "grad_norm": 0.276639848947525,
      "learning_rate": 7.302834024103372e-07,
      "loss": 0.0076,
      "step": 2833600
    },
    {
      "epoch": 4.637281278843699,
      "grad_norm": 0.11181103438138962,
      "learning_rate": 7.302175101968201e-07,
      "loss": 0.0069,
      "step": 2833620
    },
    {
      "epoch": 4.637314009282353,
      "grad_norm": 0.04456084594130516,
      "learning_rate": 7.301516179833029e-07,
      "loss": 0.0061,
      "step": 2833640
    },
    {
      "epoch": 4.637346739721005,
      "grad_norm": 0.28431954979896545,
      "learning_rate": 7.300857257697859e-07,
      "loss": 0.0056,
      "step": 2833660
    },
    {
      "epoch": 4.637379470159659,
      "grad_norm": 0.49554333090782166,
      "learning_rate": 7.300198335562687e-07,
      "loss": 0.0076,
      "step": 2833680
    },
    {
      "epoch": 4.6374122005983125,
      "grad_norm": 0.33996424078941345,
      "learning_rate": 7.299539413427516e-07,
      "loss": 0.0097,
      "step": 2833700
    },
    {
      "epoch": 4.637444931036966,
      "grad_norm": 0.35776132345199585,
      "learning_rate": 7.298880491292344e-07,
      "loss": 0.0117,
      "step": 2833720
    },
    {
      "epoch": 4.637477661475619,
      "grad_norm": 0.14707514643669128,
      "learning_rate": 7.298221569157174e-07,
      "loss": 0.0082,
      "step": 2833740
    },
    {
      "epoch": 4.637510391914272,
      "grad_norm": 0.3243308365345001,
      "learning_rate": 7.297562647022002e-07,
      "loss": 0.0091,
      "step": 2833760
    },
    {
      "epoch": 4.637543122352926,
      "grad_norm": 0.14642402529716492,
      "learning_rate": 7.29690372488683e-07,
      "loss": 0.0089,
      "step": 2833780
    },
    {
      "epoch": 4.63757585279158,
      "grad_norm": 0.5420010089874268,
      "learning_rate": 7.296244802751659e-07,
      "loss": 0.0112,
      "step": 2833800
    },
    {
      "epoch": 4.637608583230232,
      "grad_norm": 0.18744175136089325,
      "learning_rate": 7.295585880616487e-07,
      "loss": 0.0072,
      "step": 2833820
    },
    {
      "epoch": 4.637641313668886,
      "grad_norm": 0.16687844693660736,
      "learning_rate": 7.294926958481317e-07,
      "loss": 0.0085,
      "step": 2833840
    },
    {
      "epoch": 4.6376740441075395,
      "grad_norm": 0.1076224222779274,
      "learning_rate": 7.294268036346145e-07,
      "loss": 0.0063,
      "step": 2833860
    },
    {
      "epoch": 4.637706774546192,
      "grad_norm": 0.1957852691411972,
      "learning_rate": 7.293609114210974e-07,
      "loss": 0.0073,
      "step": 2833880
    },
    {
      "epoch": 4.637739504984846,
      "grad_norm": 0.5137262344360352,
      "learning_rate": 7.292950192075802e-07,
      "loss": 0.0087,
      "step": 2833900
    },
    {
      "epoch": 4.637772235423499,
      "grad_norm": 0.11466086655855179,
      "learning_rate": 7.292291269940632e-07,
      "loss": 0.0095,
      "step": 2833920
    },
    {
      "epoch": 4.637804965862152,
      "grad_norm": 0.13057772815227509,
      "learning_rate": 7.29163234780546e-07,
      "loss": 0.0093,
      "step": 2833940
    },
    {
      "epoch": 4.637837696300806,
      "grad_norm": 0.09681588411331177,
      "learning_rate": 7.290973425670289e-07,
      "loss": 0.0072,
      "step": 2833960
    },
    {
      "epoch": 4.637870426739459,
      "grad_norm": 0.03497650846838951,
      "learning_rate": 7.290314503535117e-07,
      "loss": 0.0085,
      "step": 2833980
    },
    {
      "epoch": 4.637903157178113,
      "grad_norm": 0.22271528840065002,
      "learning_rate": 7.289655581399946e-07,
      "loss": 0.0105,
      "step": 2834000
    },
    {
      "epoch": 4.637935887616766,
      "grad_norm": 0.14978431165218353,
      "learning_rate": 7.288996659264775e-07,
      "loss": 0.0057,
      "step": 2834020
    },
    {
      "epoch": 4.637968618055419,
      "grad_norm": 0.166288360953331,
      "learning_rate": 7.288337737129604e-07,
      "loss": 0.0185,
      "step": 2834040
    },
    {
      "epoch": 4.638001348494073,
      "grad_norm": 0.2590852677822113,
      "learning_rate": 7.287678814994432e-07,
      "loss": 0.0096,
      "step": 2834060
    },
    {
      "epoch": 4.638034078932726,
      "grad_norm": 0.22883738577365875,
      "learning_rate": 7.28701989285926e-07,
      "loss": 0.0059,
      "step": 2834080
    },
    {
      "epoch": 4.638066809371379,
      "grad_norm": 0.09346037358045578,
      "learning_rate": 7.286360970724091e-07,
      "loss": 0.0079,
      "step": 2834100
    },
    {
      "epoch": 4.638099539810033,
      "grad_norm": 0.5506705641746521,
      "learning_rate": 7.28570204858892e-07,
      "loss": 0.0105,
      "step": 2834120
    },
    {
      "epoch": 4.638132270248686,
      "grad_norm": 0.16008029878139496,
      "learning_rate": 7.285043126453747e-07,
      "loss": 0.0098,
      "step": 2834140
    },
    {
      "epoch": 4.638165000687339,
      "grad_norm": 0.6104989647865295,
      "learning_rate": 7.284384204318575e-07,
      "loss": 0.0074,
      "step": 2834160
    },
    {
      "epoch": 4.6381977311259925,
      "grad_norm": 0.09212690591812134,
      "learning_rate": 7.283725282183406e-07,
      "loss": 0.0086,
      "step": 2834180
    },
    {
      "epoch": 4.638230461564646,
      "grad_norm": 0.057897746562957764,
      "learning_rate": 7.283066360048234e-07,
      "loss": 0.0071,
      "step": 2834200
    },
    {
      "epoch": 4.638263192003299,
      "grad_norm": 0.8472170829772949,
      "learning_rate": 7.282407437913063e-07,
      "loss": 0.0062,
      "step": 2834220
    },
    {
      "epoch": 4.638295922441952,
      "grad_norm": 0.051280807703733444,
      "learning_rate": 7.281748515777891e-07,
      "loss": 0.0084,
      "step": 2834240
    },
    {
      "epoch": 4.638328652880606,
      "grad_norm": 0.2849162817001343,
      "learning_rate": 7.281089593642719e-07,
      "loss": 0.0083,
      "step": 2834260
    },
    {
      "epoch": 4.63836138331926,
      "grad_norm": 0.2591773271560669,
      "learning_rate": 7.280430671507549e-07,
      "loss": 0.0111,
      "step": 2834280
    },
    {
      "epoch": 4.638394113757912,
      "grad_norm": 0.3466365933418274,
      "learning_rate": 7.279771749372378e-07,
      "loss": 0.0116,
      "step": 2834300
    },
    {
      "epoch": 4.638426844196566,
      "grad_norm": 0.11418347805738449,
      "learning_rate": 7.279112827237206e-07,
      "loss": 0.0096,
      "step": 2834320
    },
    {
      "epoch": 4.6384595746352195,
      "grad_norm": 0.25022411346435547,
      "learning_rate": 7.278453905102035e-07,
      "loss": 0.0095,
      "step": 2834340
    },
    {
      "epoch": 4.638492305073872,
      "grad_norm": 0.1870056390762329,
      "learning_rate": 7.277794982966864e-07,
      "loss": 0.006,
      "step": 2834360
    },
    {
      "epoch": 4.638525035512526,
      "grad_norm": 0.7079261541366577,
      "learning_rate": 7.277136060831693e-07,
      "loss": 0.0081,
      "step": 2834380
    },
    {
      "epoch": 4.638557765951179,
      "grad_norm": 0.20959791541099548,
      "learning_rate": 7.276477138696521e-07,
      "loss": 0.0084,
      "step": 2834400
    },
    {
      "epoch": 4.638590496389833,
      "grad_norm": 0.5047999024391174,
      "learning_rate": 7.27581821656135e-07,
      "loss": 0.0079,
      "step": 2834420
    },
    {
      "epoch": 4.638623226828486,
      "grad_norm": 0.11099646985530853,
      "learning_rate": 7.275159294426178e-07,
      "loss": 0.0062,
      "step": 2834440
    },
    {
      "epoch": 4.638655957267139,
      "grad_norm": 0.10284998267889023,
      "learning_rate": 7.274500372291008e-07,
      "loss": 0.0079,
      "step": 2834460
    },
    {
      "epoch": 4.638688687705793,
      "grad_norm": 0.38774922490119934,
      "learning_rate": 7.273841450155836e-07,
      "loss": 0.0127,
      "step": 2834480
    },
    {
      "epoch": 4.638721418144446,
      "grad_norm": 0.14878147840499878,
      "learning_rate": 7.273182528020664e-07,
      "loss": 0.0085,
      "step": 2834500
    },
    {
      "epoch": 4.638754148583099,
      "grad_norm": 0.17371709644794464,
      "learning_rate": 7.272523605885493e-07,
      "loss": 0.0075,
      "step": 2834520
    },
    {
      "epoch": 4.638786879021753,
      "grad_norm": 0.13619089126586914,
      "learning_rate": 7.271864683750322e-07,
      "loss": 0.0064,
      "step": 2834540
    },
    {
      "epoch": 4.6388196094604055,
      "grad_norm": 0.2467760145664215,
      "learning_rate": 7.271205761615151e-07,
      "loss": 0.0078,
      "step": 2834560
    },
    {
      "epoch": 4.638852339899059,
      "grad_norm": 0.3715772032737732,
      "learning_rate": 7.270546839479979e-07,
      "loss": 0.0103,
      "step": 2834580
    },
    {
      "epoch": 4.638885070337713,
      "grad_norm": 0.2261042445898056,
      "learning_rate": 7.269887917344808e-07,
      "loss": 0.0083,
      "step": 2834600
    },
    {
      "epoch": 4.638917800776366,
      "grad_norm": 0.3375796377658844,
      "learning_rate": 7.269228995209637e-07,
      "loss": 0.0129,
      "step": 2834620
    },
    {
      "epoch": 4.638950531215019,
      "grad_norm": 0.34365114569664,
      "learning_rate": 7.268570073074466e-07,
      "loss": 0.0076,
      "step": 2834640
    },
    {
      "epoch": 4.638983261653673,
      "grad_norm": 0.31749212741851807,
      "learning_rate": 7.267911150939294e-07,
      "loss": 0.0097,
      "step": 2834660
    },
    {
      "epoch": 4.639015992092326,
      "grad_norm": 0.06337042897939682,
      "learning_rate": 7.267252228804123e-07,
      "loss": 0.0056,
      "step": 2834680
    },
    {
      "epoch": 4.63904872253098,
      "grad_norm": 0.08660800009965897,
      "learning_rate": 7.266593306668951e-07,
      "loss": 0.0089,
      "step": 2834700
    },
    {
      "epoch": 4.6390814529696325,
      "grad_norm": 0.15224523842334747,
      "learning_rate": 7.265934384533781e-07,
      "loss": 0.0055,
      "step": 2834720
    },
    {
      "epoch": 4.639114183408286,
      "grad_norm": 0.2676660418510437,
      "learning_rate": 7.265275462398609e-07,
      "loss": 0.0071,
      "step": 2834740
    },
    {
      "epoch": 4.63914691384694,
      "grad_norm": 0.354619562625885,
      "learning_rate": 7.264616540263438e-07,
      "loss": 0.0091,
      "step": 2834760
    },
    {
      "epoch": 4.639179644285592,
      "grad_norm": 0.25829532742500305,
      "learning_rate": 7.263957618128266e-07,
      "loss": 0.0064,
      "step": 2834780
    },
    {
      "epoch": 4.639212374724246,
      "grad_norm": 0.2907257676124573,
      "learning_rate": 7.263298695993096e-07,
      "loss": 0.0075,
      "step": 2834800
    },
    {
      "epoch": 4.6392451051629,
      "grad_norm": 0.3777226507663727,
      "learning_rate": 7.262639773857924e-07,
      "loss": 0.0108,
      "step": 2834820
    },
    {
      "epoch": 4.639277835601552,
      "grad_norm": 0.2677113711833954,
      "learning_rate": 7.261980851722752e-07,
      "loss": 0.0084,
      "step": 2834840
    },
    {
      "epoch": 4.639310566040206,
      "grad_norm": 0.13657008111476898,
      "learning_rate": 7.261321929587581e-07,
      "loss": 0.0073,
      "step": 2834860
    },
    {
      "epoch": 4.6393432964788595,
      "grad_norm": 0.0960722416639328,
      "learning_rate": 7.260663007452409e-07,
      "loss": 0.0158,
      "step": 2834880
    },
    {
      "epoch": 4.639376026917513,
      "grad_norm": 0.04112900421023369,
      "learning_rate": 7.260004085317239e-07,
      "loss": 0.0099,
      "step": 2834900
    },
    {
      "epoch": 4.639408757356166,
      "grad_norm": 0.08480729907751083,
      "learning_rate": 7.259345163182067e-07,
      "loss": 0.0081,
      "step": 2834920
    },
    {
      "epoch": 4.639441487794819,
      "grad_norm": 0.1533171832561493,
      "learning_rate": 7.258686241046896e-07,
      "loss": 0.0079,
      "step": 2834940
    },
    {
      "epoch": 4.639474218233473,
      "grad_norm": 0.278452605009079,
      "learning_rate": 7.258027318911724e-07,
      "loss": 0.0108,
      "step": 2834960
    },
    {
      "epoch": 4.6395069486721265,
      "grad_norm": 0.3046162724494934,
      "learning_rate": 7.257368396776554e-07,
      "loss": 0.0077,
      "step": 2834980
    },
    {
      "epoch": 4.639539679110779,
      "grad_norm": 0.2266944944858551,
      "learning_rate": 7.256709474641382e-07,
      "loss": 0.0108,
      "step": 2835000
    },
    {
      "epoch": 4.639572409549433,
      "grad_norm": 0.6402357816696167,
      "learning_rate": 7.256050552506211e-07,
      "loss": 0.0124,
      "step": 2835020
    },
    {
      "epoch": 4.639605139988086,
      "grad_norm": 0.15707886219024658,
      "learning_rate": 7.255391630371039e-07,
      "loss": 0.006,
      "step": 2835040
    },
    {
      "epoch": 4.639637870426739,
      "grad_norm": 0.6252054572105408,
      "learning_rate": 7.254732708235869e-07,
      "loss": 0.0141,
      "step": 2835060
    },
    {
      "epoch": 4.639670600865393,
      "grad_norm": 0.05724436417222023,
      "learning_rate": 7.254073786100697e-07,
      "loss": 0.0087,
      "step": 2835080
    },
    {
      "epoch": 4.639703331304046,
      "grad_norm": 0.14649048447608948,
      "learning_rate": 7.253414863965526e-07,
      "loss": 0.0068,
      "step": 2835100
    },
    {
      "epoch": 4.639736061742699,
      "grad_norm": 0.2619636058807373,
      "learning_rate": 7.252755941830354e-07,
      "loss": 0.0111,
      "step": 2835120
    },
    {
      "epoch": 4.639768792181353,
      "grad_norm": 0.14757908880710602,
      "learning_rate": 7.252097019695182e-07,
      "loss": 0.0061,
      "step": 2835140
    },
    {
      "epoch": 4.639801522620006,
      "grad_norm": 0.5335220694541931,
      "learning_rate": 7.251438097560012e-07,
      "loss": 0.0122,
      "step": 2835160
    },
    {
      "epoch": 4.63983425305866,
      "grad_norm": 0.10653920471668243,
      "learning_rate": 7.25077917542484e-07,
      "loss": 0.006,
      "step": 2835180
    },
    {
      "epoch": 4.6398669834973125,
      "grad_norm": 0.3466768264770508,
      "learning_rate": 7.250120253289669e-07,
      "loss": 0.0074,
      "step": 2835200
    },
    {
      "epoch": 4.639899713935966,
      "grad_norm": 0.295016348361969,
      "learning_rate": 7.249461331154497e-07,
      "loss": 0.0074,
      "step": 2835220
    },
    {
      "epoch": 4.63993244437462,
      "grad_norm": 0.27576205134391785,
      "learning_rate": 7.248802409019327e-07,
      "loss": 0.0053,
      "step": 2835240
    },
    {
      "epoch": 4.639965174813273,
      "grad_norm": 0.34901630878448486,
      "learning_rate": 7.248143486884155e-07,
      "loss": 0.0085,
      "step": 2835260
    },
    {
      "epoch": 4.639997905251926,
      "grad_norm": 2.0521631240844727,
      "learning_rate": 7.247484564748984e-07,
      "loss": 0.008,
      "step": 2835280
    },
    {
      "epoch": 4.64003063569058,
      "grad_norm": 0.04333117976784706,
      "learning_rate": 7.246825642613812e-07,
      "loss": 0.0074,
      "step": 2835300
    },
    {
      "epoch": 4.640063366129233,
      "grad_norm": 0.103188157081604,
      "learning_rate": 7.246166720478641e-07,
      "loss": 0.01,
      "step": 2835320
    },
    {
      "epoch": 4.640096096567886,
      "grad_norm": 0.18609179556369781,
      "learning_rate": 7.24550779834347e-07,
      "loss": 0.0095,
      "step": 2835340
    },
    {
      "epoch": 4.6401288270065395,
      "grad_norm": 0.08610409498214722,
      "learning_rate": 7.244848876208299e-07,
      "loss": 0.0071,
      "step": 2835360
    },
    {
      "epoch": 4.640161557445193,
      "grad_norm": 0.17276331782341003,
      "learning_rate": 7.244189954073127e-07,
      "loss": 0.0073,
      "step": 2835380
    },
    {
      "epoch": 4.640194287883846,
      "grad_norm": 0.6453016400337219,
      "learning_rate": 7.243531031937956e-07,
      "loss": 0.015,
      "step": 2835400
    },
    {
      "epoch": 4.640227018322499,
      "grad_norm": 0.15525047481060028,
      "learning_rate": 7.242872109802786e-07,
      "loss": 0.0088,
      "step": 2835420
    },
    {
      "epoch": 4.640259748761153,
      "grad_norm": 0.07556536048650742,
      "learning_rate": 7.242213187667615e-07,
      "loss": 0.0124,
      "step": 2835440
    },
    {
      "epoch": 4.640292479199807,
      "grad_norm": 0.43620002269744873,
      "learning_rate": 7.241554265532442e-07,
      "loss": 0.0124,
      "step": 2835460
    },
    {
      "epoch": 4.640325209638459,
      "grad_norm": 0.33159124851226807,
      "learning_rate": 7.24089534339727e-07,
      "loss": 0.0141,
      "step": 2835480
    },
    {
      "epoch": 4.640357940077113,
      "grad_norm": 1.1790343523025513,
      "learning_rate": 7.240236421262101e-07,
      "loss": 0.0086,
      "step": 2835500
    },
    {
      "epoch": 4.6403906705157665,
      "grad_norm": 0.14296212792396545,
      "learning_rate": 7.23957749912693e-07,
      "loss": 0.0122,
      "step": 2835520
    },
    {
      "epoch": 4.64042340095442,
      "grad_norm": 0.15718549489974976,
      "learning_rate": 7.238918576991758e-07,
      "loss": 0.0079,
      "step": 2835540
    },
    {
      "epoch": 4.640456131393073,
      "grad_norm": 0.41150084137916565,
      "learning_rate": 7.238259654856586e-07,
      "loss": 0.0083,
      "step": 2835560
    },
    {
      "epoch": 4.640488861831726,
      "grad_norm": 0.04561137408018112,
      "learning_rate": 7.237600732721414e-07,
      "loss": 0.0069,
      "step": 2835580
    },
    {
      "epoch": 4.64052159227038,
      "grad_norm": 0.05594589188694954,
      "learning_rate": 7.236941810586244e-07,
      "loss": 0.0071,
      "step": 2835600
    },
    {
      "epoch": 4.640554322709033,
      "grad_norm": 0.27646398544311523,
      "learning_rate": 7.236282888451073e-07,
      "loss": 0.01,
      "step": 2835620
    },
    {
      "epoch": 4.640587053147686,
      "grad_norm": 0.2005196511745453,
      "learning_rate": 7.235623966315901e-07,
      "loss": 0.0063,
      "step": 2835640
    },
    {
      "epoch": 4.64061978358634,
      "grad_norm": 0.20179861783981323,
      "learning_rate": 7.23496504418073e-07,
      "loss": 0.0107,
      "step": 2835660
    },
    {
      "epoch": 4.640652514024993,
      "grad_norm": 0.2603480815887451,
      "learning_rate": 7.234306122045559e-07,
      "loss": 0.0078,
      "step": 2835680
    },
    {
      "epoch": 4.640685244463646,
      "grad_norm": 0.17008203268051147,
      "learning_rate": 7.233647199910388e-07,
      "loss": 0.0116,
      "step": 2835700
    },
    {
      "epoch": 4.6407179749023,
      "grad_norm": 0.07225700467824936,
      "learning_rate": 7.232988277775216e-07,
      "loss": 0.0062,
      "step": 2835720
    },
    {
      "epoch": 4.640750705340953,
      "grad_norm": 0.5960323214530945,
      "learning_rate": 7.232329355640045e-07,
      "loss": 0.0071,
      "step": 2835740
    },
    {
      "epoch": 4.640783435779606,
      "grad_norm": 0.06648604571819305,
      "learning_rate": 7.231670433504873e-07,
      "loss": 0.0051,
      "step": 2835760
    },
    {
      "epoch": 4.64081616621826,
      "grad_norm": 0.09953504800796509,
      "learning_rate": 7.231011511369703e-07,
      "loss": 0.0067,
      "step": 2835780
    },
    {
      "epoch": 4.640848896656913,
      "grad_norm": 0.3870025873184204,
      "learning_rate": 7.230352589234531e-07,
      "loss": 0.0091,
      "step": 2835800
    },
    {
      "epoch": 4.640881627095566,
      "grad_norm": 0.108797088265419,
      "learning_rate": 7.22969366709936e-07,
      "loss": 0.0078,
      "step": 2835820
    },
    {
      "epoch": 4.6409143575342195,
      "grad_norm": 0.049923717975616455,
      "learning_rate": 7.229034744964188e-07,
      "loss": 0.013,
      "step": 2835840
    },
    {
      "epoch": 4.640947087972873,
      "grad_norm": 0.11582561582326889,
      "learning_rate": 7.228375822829017e-07,
      "loss": 0.0072,
      "step": 2835860
    },
    {
      "epoch": 4.640979818411527,
      "grad_norm": 0.3178015649318695,
      "learning_rate": 7.227716900693846e-07,
      "loss": 0.0065,
      "step": 2835880
    },
    {
      "epoch": 4.641012548850179,
      "grad_norm": 0.3264292776584625,
      "learning_rate": 7.227057978558674e-07,
      "loss": 0.008,
      "step": 2835900
    },
    {
      "epoch": 4.641045279288833,
      "grad_norm": 0.19829660654067993,
      "learning_rate": 7.226399056423503e-07,
      "loss": 0.0145,
      "step": 2835920
    },
    {
      "epoch": 4.641078009727487,
      "grad_norm": 0.2670057713985443,
      "learning_rate": 7.225740134288332e-07,
      "loss": 0.0069,
      "step": 2835940
    },
    {
      "epoch": 4.641110740166139,
      "grad_norm": 0.08367078006267548,
      "learning_rate": 7.225081212153161e-07,
      "loss": 0.0107,
      "step": 2835960
    },
    {
      "epoch": 4.641143470604793,
      "grad_norm": 0.17029158771038055,
      "learning_rate": 7.224422290017989e-07,
      "loss": 0.0076,
      "step": 2835980
    },
    {
      "epoch": 4.6411762010434465,
      "grad_norm": 0.19995765388011932,
      "learning_rate": 7.223763367882818e-07,
      "loss": 0.0062,
      "step": 2836000
    },
    {
      "epoch": 4.641208931482099,
      "grad_norm": 0.1501673012971878,
      "learning_rate": 7.223104445747646e-07,
      "loss": 0.0068,
      "step": 2836020
    },
    {
      "epoch": 4.641241661920753,
      "grad_norm": 0.1109367236495018,
      "learning_rate": 7.222445523612476e-07,
      "loss": 0.0078,
      "step": 2836040
    },
    {
      "epoch": 4.641274392359406,
      "grad_norm": 0.391484797000885,
      "learning_rate": 7.221786601477304e-07,
      "loss": 0.0072,
      "step": 2836060
    },
    {
      "epoch": 4.64130712279806,
      "grad_norm": 0.07679048180580139,
      "learning_rate": 7.221127679342133e-07,
      "loss": 0.0079,
      "step": 2836080
    },
    {
      "epoch": 4.641339853236713,
      "grad_norm": 0.3691647946834564,
      "learning_rate": 7.220468757206961e-07,
      "loss": 0.0071,
      "step": 2836100
    },
    {
      "epoch": 4.641372583675366,
      "grad_norm": 0.24576488137245178,
      "learning_rate": 7.219809835071791e-07,
      "loss": 0.0087,
      "step": 2836120
    },
    {
      "epoch": 4.64140531411402,
      "grad_norm": 0.26676246523857117,
      "learning_rate": 7.219150912936619e-07,
      "loss": 0.0137,
      "step": 2836140
    },
    {
      "epoch": 4.6414380445526735,
      "grad_norm": 0.148586243391037,
      "learning_rate": 7.218491990801447e-07,
      "loss": 0.0098,
      "step": 2836160
    },
    {
      "epoch": 4.641470774991326,
      "grad_norm": 0.18102706968784332,
      "learning_rate": 7.217833068666276e-07,
      "loss": 0.006,
      "step": 2836180
    },
    {
      "epoch": 4.64150350542998,
      "grad_norm": 0.18306346237659454,
      "learning_rate": 7.217174146531104e-07,
      "loss": 0.0081,
      "step": 2836200
    },
    {
      "epoch": 4.641536235868633,
      "grad_norm": 0.15591087937355042,
      "learning_rate": 7.216515224395934e-07,
      "loss": 0.0111,
      "step": 2836220
    },
    {
      "epoch": 4.641568966307286,
      "grad_norm": 0.12166882306337357,
      "learning_rate": 7.215856302260762e-07,
      "loss": 0.0103,
      "step": 2836240
    },
    {
      "epoch": 4.64160169674594,
      "grad_norm": 0.11316165328025818,
      "learning_rate": 7.215197380125591e-07,
      "loss": 0.0066,
      "step": 2836260
    },
    {
      "epoch": 4.641634427184593,
      "grad_norm": 0.07375247031450272,
      "learning_rate": 7.214538457990419e-07,
      "loss": 0.01,
      "step": 2836280
    },
    {
      "epoch": 4.641667157623246,
      "grad_norm": 0.18866269290447235,
      "learning_rate": 7.213879535855249e-07,
      "loss": 0.0087,
      "step": 2836300
    },
    {
      "epoch": 4.6416998880619,
      "grad_norm": 0.279573917388916,
      "learning_rate": 7.213220613720077e-07,
      "loss": 0.0085,
      "step": 2836320
    },
    {
      "epoch": 4.641732618500553,
      "grad_norm": 0.26060575246810913,
      "learning_rate": 7.212561691584906e-07,
      "loss": 0.0106,
      "step": 2836340
    },
    {
      "epoch": 4.641765348939207,
      "grad_norm": 0.0963965356349945,
      "learning_rate": 7.211902769449734e-07,
      "loss": 0.0059,
      "step": 2836360
    },
    {
      "epoch": 4.6417980793778595,
      "grad_norm": 0.13699567317962646,
      "learning_rate": 7.211243847314564e-07,
      "loss": 0.008,
      "step": 2836380
    },
    {
      "epoch": 4.641830809816513,
      "grad_norm": 0.2300221472978592,
      "learning_rate": 7.210584925179392e-07,
      "loss": 0.0117,
      "step": 2836400
    },
    {
      "epoch": 4.641863540255167,
      "grad_norm": 0.28464359045028687,
      "learning_rate": 7.209926003044221e-07,
      "loss": 0.0105,
      "step": 2836420
    },
    {
      "epoch": 4.64189627069382,
      "grad_norm": 0.3024003505706787,
      "learning_rate": 7.209267080909049e-07,
      "loss": 0.0106,
      "step": 2836440
    },
    {
      "epoch": 4.641929001132473,
      "grad_norm": 0.20905601978302002,
      "learning_rate": 7.208608158773878e-07,
      "loss": 0.0072,
      "step": 2836460
    },
    {
      "epoch": 4.641961731571127,
      "grad_norm": 0.2489190250635147,
      "learning_rate": 7.207949236638707e-07,
      "loss": 0.0105,
      "step": 2836480
    },
    {
      "epoch": 4.64199446200978,
      "grad_norm": 0.48615795373916626,
      "learning_rate": 7.207290314503535e-07,
      "loss": 0.0098,
      "step": 2836500
    },
    {
      "epoch": 4.642027192448433,
      "grad_norm": 0.18538101017475128,
      "learning_rate": 7.206631392368364e-07,
      "loss": 0.0073,
      "step": 2836520
    },
    {
      "epoch": 4.6420599228870865,
      "grad_norm": 0.6869462132453918,
      "learning_rate": 7.205972470233192e-07,
      "loss": 0.0127,
      "step": 2836540
    },
    {
      "epoch": 4.64209265332574,
      "grad_norm": 0.47173231840133667,
      "learning_rate": 7.205313548098022e-07,
      "loss": 0.0094,
      "step": 2836560
    },
    {
      "epoch": 4.642125383764393,
      "grad_norm": 0.29799556732177734,
      "learning_rate": 7.20465462596285e-07,
      "loss": 0.0098,
      "step": 2836580
    },
    {
      "epoch": 4.642158114203046,
      "grad_norm": 0.6023598909378052,
      "learning_rate": 7.203995703827679e-07,
      "loss": 0.0089,
      "step": 2836600
    },
    {
      "epoch": 4.6421908446417,
      "grad_norm": 0.21274511516094208,
      "learning_rate": 7.203336781692507e-07,
      "loss": 0.0119,
      "step": 2836620
    },
    {
      "epoch": 4.6422235750803535,
      "grad_norm": 0.09326338022947311,
      "learning_rate": 7.202677859557336e-07,
      "loss": 0.0089,
      "step": 2836640
    },
    {
      "epoch": 4.642256305519006,
      "grad_norm": 0.21471984684467316,
      "learning_rate": 7.202018937422166e-07,
      "loss": 0.0122,
      "step": 2836660
    },
    {
      "epoch": 4.64228903595766,
      "grad_norm": 0.524306058883667,
      "learning_rate": 7.201360015286994e-07,
      "loss": 0.007,
      "step": 2836680
    },
    {
      "epoch": 4.642321766396313,
      "grad_norm": 0.3279040455818176,
      "learning_rate": 7.200701093151822e-07,
      "loss": 0.0114,
      "step": 2836700
    },
    {
      "epoch": 4.642354496834967,
      "grad_norm": 0.2030034363269806,
      "learning_rate": 7.200042171016651e-07,
      "loss": 0.0068,
      "step": 2836720
    },
    {
      "epoch": 4.64238722727362,
      "grad_norm": 0.261417955160141,
      "learning_rate": 7.199383248881481e-07,
      "loss": 0.0076,
      "step": 2836740
    },
    {
      "epoch": 4.642419957712273,
      "grad_norm": 0.16322551667690277,
      "learning_rate": 7.19872432674631e-07,
      "loss": 0.0064,
      "step": 2836760
    },
    {
      "epoch": 4.642452688150927,
      "grad_norm": 0.21612097322940826,
      "learning_rate": 7.198065404611137e-07,
      "loss": 0.0072,
      "step": 2836780
    },
    {
      "epoch": 4.64248541858958,
      "grad_norm": 0.1520823985338211,
      "learning_rate": 7.197406482475966e-07,
      "loss": 0.0102,
      "step": 2836800
    },
    {
      "epoch": 4.642518149028233,
      "grad_norm": 0.3164522349834442,
      "learning_rate": 7.196747560340796e-07,
      "loss": 0.0145,
      "step": 2836820
    },
    {
      "epoch": 4.642550879466887,
      "grad_norm": 0.2797585427761078,
      "learning_rate": 7.196088638205625e-07,
      "loss": 0.013,
      "step": 2836840
    },
    {
      "epoch": 4.6425836099055395,
      "grad_norm": 0.08285850286483765,
      "learning_rate": 7.195429716070453e-07,
      "loss": 0.0057,
      "step": 2836860
    },
    {
      "epoch": 4.642616340344193,
      "grad_norm": 0.1707715541124344,
      "learning_rate": 7.194770793935281e-07,
      "loss": 0.0094,
      "step": 2836880
    },
    {
      "epoch": 4.642649070782847,
      "grad_norm": 0.31281527876853943,
      "learning_rate": 7.194111871800109e-07,
      "loss": 0.0065,
      "step": 2836900
    },
    {
      "epoch": 4.6426818012215,
      "grad_norm": 0.1034112498164177,
      "learning_rate": 7.193452949664939e-07,
      "loss": 0.0093,
      "step": 2836920
    },
    {
      "epoch": 4.642714531660153,
      "grad_norm": 0.24097713828086853,
      "learning_rate": 7.192794027529768e-07,
      "loss": 0.0083,
      "step": 2836940
    },
    {
      "epoch": 4.642747262098807,
      "grad_norm": 0.2914678454399109,
      "learning_rate": 7.192135105394596e-07,
      "loss": 0.0129,
      "step": 2836960
    },
    {
      "epoch": 4.64277999253746,
      "grad_norm": 0.3617866635322571,
      "learning_rate": 7.191476183259425e-07,
      "loss": 0.0117,
      "step": 2836980
    },
    {
      "epoch": 4.642812722976114,
      "grad_norm": 0.15139487385749817,
      "learning_rate": 7.190817261124254e-07,
      "loss": 0.0102,
      "step": 2837000
    },
    {
      "epoch": 4.6428454534147665,
      "grad_norm": 0.18499691784381866,
      "learning_rate": 7.190158338989083e-07,
      "loss": 0.0086,
      "step": 2837020
    },
    {
      "epoch": 4.64287818385342,
      "grad_norm": 0.18212181329727173,
      "learning_rate": 7.189499416853911e-07,
      "loss": 0.0075,
      "step": 2837040
    },
    {
      "epoch": 4.642910914292074,
      "grad_norm": 0.09149257838726044,
      "learning_rate": 7.18884049471874e-07,
      "loss": 0.0086,
      "step": 2837060
    },
    {
      "epoch": 4.642943644730726,
      "grad_norm": 0.23264940083026886,
      "learning_rate": 7.188181572583569e-07,
      "loss": 0.0101,
      "step": 2837080
    },
    {
      "epoch": 4.64297637516938,
      "grad_norm": 0.37751466035842896,
      "learning_rate": 7.187522650448398e-07,
      "loss": 0.0077,
      "step": 2837100
    },
    {
      "epoch": 4.643009105608034,
      "grad_norm": 0.15582482516765594,
      "learning_rate": 7.186863728313226e-07,
      "loss": 0.005,
      "step": 2837120
    },
    {
      "epoch": 4.643041836046686,
      "grad_norm": 0.27824667096138,
      "learning_rate": 7.186204806178055e-07,
      "loss": 0.0103,
      "step": 2837140
    },
    {
      "epoch": 4.64307456648534,
      "grad_norm": 0.12336250394582748,
      "learning_rate": 7.185545884042883e-07,
      "loss": 0.0064,
      "step": 2837160
    },
    {
      "epoch": 4.6431072969239935,
      "grad_norm": 0.251468300819397,
      "learning_rate": 7.184886961907713e-07,
      "loss": 0.0075,
      "step": 2837180
    },
    {
      "epoch": 4.643140027362647,
      "grad_norm": 0.4614795446395874,
      "learning_rate": 7.184228039772541e-07,
      "loss": 0.011,
      "step": 2837200
    },
    {
      "epoch": 4.6431727578013,
      "grad_norm": 0.13961109519004822,
      "learning_rate": 7.183569117637369e-07,
      "loss": 0.0127,
      "step": 2837220
    },
    {
      "epoch": 4.643205488239953,
      "grad_norm": 0.19527274370193481,
      "learning_rate": 7.182910195502198e-07,
      "loss": 0.0112,
      "step": 2837240
    },
    {
      "epoch": 4.643238218678607,
      "grad_norm": 0.4722428321838379,
      "learning_rate": 7.182251273367027e-07,
      "loss": 0.0081,
      "step": 2837260
    },
    {
      "epoch": 4.64327094911726,
      "grad_norm": 0.305279403924942,
      "learning_rate": 7.181592351231856e-07,
      "loss": 0.0113,
      "step": 2837280
    },
    {
      "epoch": 4.643303679555913,
      "grad_norm": 0.12052670121192932,
      "learning_rate": 7.180933429096684e-07,
      "loss": 0.0067,
      "step": 2837300
    },
    {
      "epoch": 4.643336409994567,
      "grad_norm": 0.2188631147146225,
      "learning_rate": 7.180274506961513e-07,
      "loss": 0.0101,
      "step": 2837320
    },
    {
      "epoch": 4.6433691404332205,
      "grad_norm": 0.08288579434156418,
      "learning_rate": 7.179615584826341e-07,
      "loss": 0.008,
      "step": 2837340
    },
    {
      "epoch": 4.643401870871873,
      "grad_norm": 0.47591739892959595,
      "learning_rate": 7.178956662691171e-07,
      "loss": 0.0062,
      "step": 2837360
    },
    {
      "epoch": 4.643434601310527,
      "grad_norm": 0.3495086133480072,
      "learning_rate": 7.178297740555999e-07,
      "loss": 0.0063,
      "step": 2837380
    },
    {
      "epoch": 4.64346733174918,
      "grad_norm": 0.21028853952884674,
      "learning_rate": 7.177638818420828e-07,
      "loss": 0.0068,
      "step": 2837400
    },
    {
      "epoch": 4.643500062187833,
      "grad_norm": 0.16206388175487518,
      "learning_rate": 7.176979896285656e-07,
      "loss": 0.0065,
      "step": 2837420
    },
    {
      "epoch": 4.643532792626487,
      "grad_norm": 0.11546815186738968,
      "learning_rate": 7.176320974150486e-07,
      "loss": 0.0107,
      "step": 2837440
    },
    {
      "epoch": 4.64356552306514,
      "grad_norm": 0.12443370372056961,
      "learning_rate": 7.175662052015314e-07,
      "loss": 0.008,
      "step": 2837460
    },
    {
      "epoch": 4.643598253503793,
      "grad_norm": 0.29691454768180847,
      "learning_rate": 7.175003129880143e-07,
      "loss": 0.0077,
      "step": 2837480
    },
    {
      "epoch": 4.6436309839424466,
      "grad_norm": 0.2862386405467987,
      "learning_rate": 7.174344207744971e-07,
      "loss": 0.0114,
      "step": 2837500
    },
    {
      "epoch": 4.6436637143811,
      "grad_norm": 0.6086385250091553,
      "learning_rate": 7.1736852856098e-07,
      "loss": 0.0136,
      "step": 2837520
    },
    {
      "epoch": 4.643696444819754,
      "grad_norm": 0.10684176534414291,
      "learning_rate": 7.173026363474629e-07,
      "loss": 0.0074,
      "step": 2837540
    },
    {
      "epoch": 4.6437291752584064,
      "grad_norm": 0.23459240794181824,
      "learning_rate": 7.172367441339457e-07,
      "loss": 0.0076,
      "step": 2837560
    },
    {
      "epoch": 4.64376190569706,
      "grad_norm": 0.2944055497646332,
      "learning_rate": 7.171708519204286e-07,
      "loss": 0.0138,
      "step": 2837580
    },
    {
      "epoch": 4.643794636135714,
      "grad_norm": 0.38999179005622864,
      "learning_rate": 7.171049597069114e-07,
      "loss": 0.01,
      "step": 2837600
    },
    {
      "epoch": 4.643827366574367,
      "grad_norm": 0.07393199950456619,
      "learning_rate": 7.170390674933944e-07,
      "loss": 0.0092,
      "step": 2837620
    },
    {
      "epoch": 4.64386009701302,
      "grad_norm": 0.10159802436828613,
      "learning_rate": 7.169731752798772e-07,
      "loss": 0.0085,
      "step": 2837640
    },
    {
      "epoch": 4.6438928274516735,
      "grad_norm": 0.159776508808136,
      "learning_rate": 7.169072830663601e-07,
      "loss": 0.0072,
      "step": 2837660
    },
    {
      "epoch": 4.643925557890327,
      "grad_norm": 0.16284379363059998,
      "learning_rate": 7.168413908528429e-07,
      "loss": 0.009,
      "step": 2837680
    },
    {
      "epoch": 4.64395828832898,
      "grad_norm": 0.11458000540733337,
      "learning_rate": 7.167754986393259e-07,
      "loss": 0.0103,
      "step": 2837700
    },
    {
      "epoch": 4.643991018767633,
      "grad_norm": 0.21365152299404144,
      "learning_rate": 7.167096064258087e-07,
      "loss": 0.0091,
      "step": 2837720
    },
    {
      "epoch": 4.644023749206287,
      "grad_norm": 0.14555543661117554,
      "learning_rate": 7.166437142122916e-07,
      "loss": 0.0095,
      "step": 2837740
    },
    {
      "epoch": 4.64405647964494,
      "grad_norm": 0.18135438859462738,
      "learning_rate": 7.165778219987744e-07,
      "loss": 0.0085,
      "step": 2837760
    },
    {
      "epoch": 4.644089210083593,
      "grad_norm": 0.20311906933784485,
      "learning_rate": 7.165119297852573e-07,
      "loss": 0.0098,
      "step": 2837780
    },
    {
      "epoch": 4.644121940522247,
      "grad_norm": 0.3606777489185333,
      "learning_rate": 7.164460375717402e-07,
      "loss": 0.0063,
      "step": 2837800
    },
    {
      "epoch": 4.6441546709609005,
      "grad_norm": 0.25276175141334534,
      "learning_rate": 7.163801453582231e-07,
      "loss": 0.0126,
      "step": 2837820
    },
    {
      "epoch": 4.644187401399553,
      "grad_norm": 0.10096805542707443,
      "learning_rate": 7.163142531447059e-07,
      "loss": 0.0095,
      "step": 2837840
    },
    {
      "epoch": 4.644220131838207,
      "grad_norm": 0.35847124457359314,
      "learning_rate": 7.162483609311887e-07,
      "loss": 0.0124,
      "step": 2837860
    },
    {
      "epoch": 4.64425286227686,
      "grad_norm": 0.1343802809715271,
      "learning_rate": 7.161824687176717e-07,
      "loss": 0.0147,
      "step": 2837880
    },
    {
      "epoch": 4.644285592715514,
      "grad_norm": 0.35760027170181274,
      "learning_rate": 7.161165765041545e-07,
      "loss": 0.008,
      "step": 2837900
    },
    {
      "epoch": 4.644318323154167,
      "grad_norm": 0.20599138736724854,
      "learning_rate": 7.160506842906374e-07,
      "loss": 0.0121,
      "step": 2837920
    },
    {
      "epoch": 4.64435105359282,
      "grad_norm": 0.38817375898361206,
      "learning_rate": 7.159847920771202e-07,
      "loss": 0.0098,
      "step": 2837940
    },
    {
      "epoch": 4.644383784031474,
      "grad_norm": 0.41103315353393555,
      "learning_rate": 7.159188998636033e-07,
      "loss": 0.0118,
      "step": 2837960
    },
    {
      "epoch": 4.644416514470127,
      "grad_norm": 0.49777236580848694,
      "learning_rate": 7.158530076500861e-07,
      "loss": 0.0136,
      "step": 2837980
    },
    {
      "epoch": 4.64444924490878,
      "grad_norm": 0.6628390550613403,
      "learning_rate": 7.157871154365689e-07,
      "loss": 0.0087,
      "step": 2838000
    },
    {
      "epoch": 4.644481975347434,
      "grad_norm": 0.6866778135299683,
      "learning_rate": 7.157212232230517e-07,
      "loss": 0.0061,
      "step": 2838020
    },
    {
      "epoch": 4.6445147057860865,
      "grad_norm": 0.21546733379364014,
      "learning_rate": 7.156553310095346e-07,
      "loss": 0.0091,
      "step": 2838040
    },
    {
      "epoch": 4.64454743622474,
      "grad_norm": 0.3044622838497162,
      "learning_rate": 7.155894387960176e-07,
      "loss": 0.0102,
      "step": 2838060
    },
    {
      "epoch": 4.644580166663394,
      "grad_norm": 0.14087660610675812,
      "learning_rate": 7.155235465825005e-07,
      "loss": 0.0106,
      "step": 2838080
    },
    {
      "epoch": 4.644612897102047,
      "grad_norm": 0.17088012397289276,
      "learning_rate": 7.154576543689832e-07,
      "loss": 0.0118,
      "step": 2838100
    },
    {
      "epoch": 4.6446456275407,
      "grad_norm": 0.3114587962627411,
      "learning_rate": 7.153917621554661e-07,
      "loss": 0.011,
      "step": 2838120
    },
    {
      "epoch": 4.644678357979354,
      "grad_norm": 0.25049883127212524,
      "learning_rate": 7.153258699419491e-07,
      "loss": 0.0141,
      "step": 2838140
    },
    {
      "epoch": 4.644711088418007,
      "grad_norm": 0.19536025822162628,
      "learning_rate": 7.15259977728432e-07,
      "loss": 0.0117,
      "step": 2838160
    },
    {
      "epoch": 4.644743818856661,
      "grad_norm": 0.047885846346616745,
      "learning_rate": 7.151940855149148e-07,
      "loss": 0.011,
      "step": 2838180
    },
    {
      "epoch": 4.6447765492953135,
      "grad_norm": 0.27120786905288696,
      "learning_rate": 7.151281933013977e-07,
      "loss": 0.0067,
      "step": 2838200
    },
    {
      "epoch": 4.644809279733967,
      "grad_norm": 0.3148104250431061,
      "learning_rate": 7.150623010878804e-07,
      "loss": 0.0086,
      "step": 2838220
    },
    {
      "epoch": 4.644842010172621,
      "grad_norm": 0.22591246664524078,
      "learning_rate": 7.149964088743634e-07,
      "loss": 0.0141,
      "step": 2838240
    },
    {
      "epoch": 4.644874740611273,
      "grad_norm": 0.11346018314361572,
      "learning_rate": 7.149305166608463e-07,
      "loss": 0.0083,
      "step": 2838260
    },
    {
      "epoch": 4.644907471049927,
      "grad_norm": 0.26825207471847534,
      "learning_rate": 7.148646244473291e-07,
      "loss": 0.0094,
      "step": 2838280
    },
    {
      "epoch": 4.6449402014885806,
      "grad_norm": 0.19981108605861664,
      "learning_rate": 7.14798732233812e-07,
      "loss": 0.0056,
      "step": 2838300
    },
    {
      "epoch": 4.644972931927233,
      "grad_norm": 0.25393861532211304,
      "learning_rate": 7.147328400202949e-07,
      "loss": 0.0084,
      "step": 2838320
    },
    {
      "epoch": 4.645005662365887,
      "grad_norm": 0.21348458528518677,
      "learning_rate": 7.146669478067778e-07,
      "loss": 0.0094,
      "step": 2838340
    },
    {
      "epoch": 4.6450383928045405,
      "grad_norm": 0.4095039665699005,
      "learning_rate": 7.146010555932606e-07,
      "loss": 0.0087,
      "step": 2838360
    },
    {
      "epoch": 4.645071123243194,
      "grad_norm": 0.3512613773345947,
      "learning_rate": 7.145351633797435e-07,
      "loss": 0.0086,
      "step": 2838380
    },
    {
      "epoch": 4.645103853681847,
      "grad_norm": 0.39129534363746643,
      "learning_rate": 7.144692711662264e-07,
      "loss": 0.0107,
      "step": 2838400
    },
    {
      "epoch": 4.6451365841205,
      "grad_norm": 0.06907796859741211,
      "learning_rate": 7.144033789527093e-07,
      "loss": 0.0075,
      "step": 2838420
    },
    {
      "epoch": 4.645169314559154,
      "grad_norm": 0.6450933814048767,
      "learning_rate": 7.143374867391921e-07,
      "loss": 0.0091,
      "step": 2838440
    },
    {
      "epoch": 4.6452020449978075,
      "grad_norm": 0.2715335488319397,
      "learning_rate": 7.14271594525675e-07,
      "loss": 0.009,
      "step": 2838460
    },
    {
      "epoch": 4.64523477543646,
      "grad_norm": 0.21643933653831482,
      "learning_rate": 7.142057023121578e-07,
      "loss": 0.0118,
      "step": 2838480
    },
    {
      "epoch": 4.645267505875114,
      "grad_norm": 0.3682987689971924,
      "learning_rate": 7.141398100986408e-07,
      "loss": 0.0082,
      "step": 2838500
    },
    {
      "epoch": 4.645300236313767,
      "grad_norm": 0.16637638211250305,
      "learning_rate": 7.140739178851236e-07,
      "loss": 0.0124,
      "step": 2838520
    },
    {
      "epoch": 4.64533296675242,
      "grad_norm": 0.07809415459632874,
      "learning_rate": 7.140080256716064e-07,
      "loss": 0.007,
      "step": 2838540
    },
    {
      "epoch": 4.645365697191074,
      "grad_norm": 0.3705071210861206,
      "learning_rate": 7.139421334580893e-07,
      "loss": 0.0085,
      "step": 2838560
    },
    {
      "epoch": 4.645398427629727,
      "grad_norm": 0.1445753127336502,
      "learning_rate": 7.138762412445722e-07,
      "loss": 0.0108,
      "step": 2838580
    },
    {
      "epoch": 4.64543115806838,
      "grad_norm": 0.5020989775657654,
      "learning_rate": 7.138103490310551e-07,
      "loss": 0.0138,
      "step": 2838600
    },
    {
      "epoch": 4.645463888507034,
      "grad_norm": 0.1987595409154892,
      "learning_rate": 7.137444568175379e-07,
      "loss": 0.0124,
      "step": 2838620
    },
    {
      "epoch": 4.645496618945687,
      "grad_norm": 0.10933397710323334,
      "learning_rate": 7.136785646040208e-07,
      "loss": 0.0095,
      "step": 2838640
    },
    {
      "epoch": 4.645529349384341,
      "grad_norm": 0.6469707489013672,
      "learning_rate": 7.136126723905036e-07,
      "loss": 0.0128,
      "step": 2838660
    },
    {
      "epoch": 4.6455620798229935,
      "grad_norm": 0.10108425468206406,
      "learning_rate": 7.135467801769866e-07,
      "loss": 0.0098,
      "step": 2838680
    },
    {
      "epoch": 4.645594810261647,
      "grad_norm": 0.11653096228837967,
      "learning_rate": 7.134808879634694e-07,
      "loss": 0.007,
      "step": 2838700
    },
    {
      "epoch": 4.645627540700301,
      "grad_norm": 0.16993367671966553,
      "learning_rate": 7.134149957499523e-07,
      "loss": 0.0097,
      "step": 2838720
    },
    {
      "epoch": 4.645660271138954,
      "grad_norm": 0.2605898678302765,
      "learning_rate": 7.133491035364351e-07,
      "loss": 0.0106,
      "step": 2838740
    },
    {
      "epoch": 4.645693001577607,
      "grad_norm": 0.5397416949272156,
      "learning_rate": 7.132832113229181e-07,
      "loss": 0.011,
      "step": 2838760
    },
    {
      "epoch": 4.645725732016261,
      "grad_norm": 0.22955070436000824,
      "learning_rate": 7.132173191094009e-07,
      "loss": 0.0124,
      "step": 2838780
    },
    {
      "epoch": 4.645758462454914,
      "grad_norm": 0.1973719298839569,
      "learning_rate": 7.131514268958838e-07,
      "loss": 0.0092,
      "step": 2838800
    },
    {
      "epoch": 4.645791192893567,
      "grad_norm": 0.15389105677604675,
      "learning_rate": 7.130855346823666e-07,
      "loss": 0.0078,
      "step": 2838820
    },
    {
      "epoch": 4.6458239233322205,
      "grad_norm": 0.40728244185447693,
      "learning_rate": 7.130196424688496e-07,
      "loss": 0.0114,
      "step": 2838840
    },
    {
      "epoch": 4.645856653770874,
      "grad_norm": 0.058170318603515625,
      "learning_rate": 7.129537502553324e-07,
      "loss": 0.0073,
      "step": 2838860
    },
    {
      "epoch": 4.645889384209527,
      "grad_norm": 0.5917882919311523,
      "learning_rate": 7.128878580418152e-07,
      "loss": 0.0117,
      "step": 2838880
    },
    {
      "epoch": 4.64592211464818,
      "grad_norm": 0.2966257333755493,
      "learning_rate": 7.128219658282981e-07,
      "loss": 0.0077,
      "step": 2838900
    },
    {
      "epoch": 4.645954845086834,
      "grad_norm": 0.15760254859924316,
      "learning_rate": 7.127560736147809e-07,
      "loss": 0.0097,
      "step": 2838920
    },
    {
      "epoch": 4.645987575525488,
      "grad_norm": 0.28441593050956726,
      "learning_rate": 7.126901814012639e-07,
      "loss": 0.0069,
      "step": 2838940
    },
    {
      "epoch": 4.64602030596414,
      "grad_norm": 0.11864791810512543,
      "learning_rate": 7.126242891877467e-07,
      "loss": 0.0085,
      "step": 2838960
    },
    {
      "epoch": 4.646053036402794,
      "grad_norm": 0.525440514087677,
      "learning_rate": 7.125583969742296e-07,
      "loss": 0.012,
      "step": 2838980
    },
    {
      "epoch": 4.6460857668414475,
      "grad_norm": 0.2102201282978058,
      "learning_rate": 7.124925047607124e-07,
      "loss": 0.0105,
      "step": 2839000
    },
    {
      "epoch": 4.6461184972801,
      "grad_norm": 0.11046864092350006,
      "learning_rate": 7.124266125471954e-07,
      "loss": 0.0068,
      "step": 2839020
    },
    {
      "epoch": 4.646151227718754,
      "grad_norm": 0.27051350474357605,
      "learning_rate": 7.123607203336782e-07,
      "loss": 0.01,
      "step": 2839040
    },
    {
      "epoch": 4.646183958157407,
      "grad_norm": 0.13696126639842987,
      "learning_rate": 7.122948281201611e-07,
      "loss": 0.0079,
      "step": 2839060
    },
    {
      "epoch": 4.646216688596061,
      "grad_norm": 0.10750117152929306,
      "learning_rate": 7.122289359066439e-07,
      "loss": 0.0116,
      "step": 2839080
    },
    {
      "epoch": 4.646249419034714,
      "grad_norm": 0.07230043411254883,
      "learning_rate": 7.121630436931268e-07,
      "loss": 0.0079,
      "step": 2839100
    },
    {
      "epoch": 4.646282149473367,
      "grad_norm": 0.2894628047943115,
      "learning_rate": 7.120971514796097e-07,
      "loss": 0.0071,
      "step": 2839120
    },
    {
      "epoch": 4.646314879912021,
      "grad_norm": 0.8384697437286377,
      "learning_rate": 7.120312592660926e-07,
      "loss": 0.0139,
      "step": 2839140
    },
    {
      "epoch": 4.646347610350674,
      "grad_norm": 0.1143505647778511,
      "learning_rate": 7.119653670525754e-07,
      "loss": 0.0108,
      "step": 2839160
    },
    {
      "epoch": 4.646380340789327,
      "grad_norm": 0.090347059071064,
      "learning_rate": 7.118994748390583e-07,
      "loss": 0.0092,
      "step": 2839180
    },
    {
      "epoch": 4.646413071227981,
      "grad_norm": 0.178871750831604,
      "learning_rate": 7.118335826255412e-07,
      "loss": 0.0087,
      "step": 2839200
    },
    {
      "epoch": 4.6464458016666335,
      "grad_norm": 0.8909909129142761,
      "learning_rate": 7.11767690412024e-07,
      "loss": 0.0111,
      "step": 2839220
    },
    {
      "epoch": 4.646478532105287,
      "grad_norm": 0.3031543493270874,
      "learning_rate": 7.117017981985069e-07,
      "loss": 0.0095,
      "step": 2839240
    },
    {
      "epoch": 4.646511262543941,
      "grad_norm": 0.14515262842178345,
      "learning_rate": 7.116359059849897e-07,
      "loss": 0.0093,
      "step": 2839260
    },
    {
      "epoch": 4.646543992982594,
      "grad_norm": 0.3151368796825409,
      "learning_rate": 7.115700137714728e-07,
      "loss": 0.0141,
      "step": 2839280
    },
    {
      "epoch": 4.646576723421247,
      "grad_norm": 0.2644893527030945,
      "learning_rate": 7.115041215579556e-07,
      "loss": 0.0077,
      "step": 2839300
    },
    {
      "epoch": 4.6466094538599005,
      "grad_norm": 0.08909415453672409,
      "learning_rate": 7.114382293444384e-07,
      "loss": 0.0099,
      "step": 2839320
    },
    {
      "epoch": 4.646642184298554,
      "grad_norm": 0.13076506555080414,
      "learning_rate": 7.113723371309212e-07,
      "loss": 0.0065,
      "step": 2839340
    },
    {
      "epoch": 4.646674914737208,
      "grad_norm": 0.22885891795158386,
      "learning_rate": 7.113064449174041e-07,
      "loss": 0.0095,
      "step": 2839360
    },
    {
      "epoch": 4.64670764517586,
      "grad_norm": 0.07881946116685867,
      "learning_rate": 7.112405527038871e-07,
      "loss": 0.0081,
      "step": 2839380
    },
    {
      "epoch": 4.646740375614514,
      "grad_norm": 0.2928169369697571,
      "learning_rate": 7.1117466049037e-07,
      "loss": 0.0054,
      "step": 2839400
    },
    {
      "epoch": 4.646773106053168,
      "grad_norm": 0.31284093856811523,
      "learning_rate": 7.111087682768527e-07,
      "loss": 0.0078,
      "step": 2839420
    },
    {
      "epoch": 4.64680583649182,
      "grad_norm": 0.2573620080947876,
      "learning_rate": 7.110428760633356e-07,
      "loss": 0.0101,
      "step": 2839440
    },
    {
      "epoch": 4.646838566930474,
      "grad_norm": 0.13623593747615814,
      "learning_rate": 7.109769838498186e-07,
      "loss": 0.0124,
      "step": 2839460
    },
    {
      "epoch": 4.6468712973691275,
      "grad_norm": 0.1006256714463234,
      "learning_rate": 7.109110916363015e-07,
      "loss": 0.007,
      "step": 2839480
    },
    {
      "epoch": 4.64690402780778,
      "grad_norm": 0.4267595112323761,
      "learning_rate": 7.108451994227843e-07,
      "loss": 0.0106,
      "step": 2839500
    },
    {
      "epoch": 4.646936758246434,
      "grad_norm": 0.09062504023313522,
      "learning_rate": 7.107793072092672e-07,
      "loss": 0.009,
      "step": 2839520
    },
    {
      "epoch": 4.646969488685087,
      "grad_norm": 0.2050883024930954,
      "learning_rate": 7.107134149957499e-07,
      "loss": 0.0105,
      "step": 2839540
    },
    {
      "epoch": 4.647002219123741,
      "grad_norm": 0.33168283104896545,
      "learning_rate": 7.10647522782233e-07,
      "loss": 0.01,
      "step": 2839560
    },
    {
      "epoch": 4.647034949562394,
      "grad_norm": 0.14413334429264069,
      "learning_rate": 7.105816305687158e-07,
      "loss": 0.0116,
      "step": 2839580
    },
    {
      "epoch": 4.647067680001047,
      "grad_norm": 0.192308709025383,
      "learning_rate": 7.105157383551986e-07,
      "loss": 0.0085,
      "step": 2839600
    },
    {
      "epoch": 4.647100410439701,
      "grad_norm": 0.1595979630947113,
      "learning_rate": 7.104498461416815e-07,
      "loss": 0.0109,
      "step": 2839620
    },
    {
      "epoch": 4.6471331408783545,
      "grad_norm": 0.10920526832342148,
      "learning_rate": 7.103839539281644e-07,
      "loss": 0.0094,
      "step": 2839640
    },
    {
      "epoch": 4.647165871317007,
      "grad_norm": 0.3330923318862915,
      "learning_rate": 7.103180617146473e-07,
      "loss": 0.008,
      "step": 2839660
    },
    {
      "epoch": 4.647198601755661,
      "grad_norm": 0.2204095721244812,
      "learning_rate": 7.102521695011301e-07,
      "loss": 0.0069,
      "step": 2839680
    },
    {
      "epoch": 4.647231332194314,
      "grad_norm": 0.1263846457004547,
      "learning_rate": 7.10186277287613e-07,
      "loss": 0.0124,
      "step": 2839700
    },
    {
      "epoch": 4.647264062632967,
      "grad_norm": 0.4263325333595276,
      "learning_rate": 7.101203850740959e-07,
      "loss": 0.0074,
      "step": 2839720
    },
    {
      "epoch": 4.647296793071621,
      "grad_norm": 0.12188146263360977,
      "learning_rate": 7.100544928605788e-07,
      "loss": 0.0106,
      "step": 2839740
    },
    {
      "epoch": 4.647329523510274,
      "grad_norm": 0.21195268630981445,
      "learning_rate": 7.099886006470616e-07,
      "loss": 0.0155,
      "step": 2839760
    },
    {
      "epoch": 4.647362253948927,
      "grad_norm": 0.3479328155517578,
      "learning_rate": 7.099227084335445e-07,
      "loss": 0.0107,
      "step": 2839780
    },
    {
      "epoch": 4.647394984387581,
      "grad_norm": 0.2876014709472656,
      "learning_rate": 7.098568162200273e-07,
      "loss": 0.0091,
      "step": 2839800
    },
    {
      "epoch": 4.647427714826234,
      "grad_norm": 0.06739377230405807,
      "learning_rate": 7.097909240065103e-07,
      "loss": 0.0059,
      "step": 2839820
    },
    {
      "epoch": 4.647460445264888,
      "grad_norm": 0.3822426497936249,
      "learning_rate": 7.097250317929931e-07,
      "loss": 0.01,
      "step": 2839840
    },
    {
      "epoch": 4.6474931757035405,
      "grad_norm": 0.12433391064405441,
      "learning_rate": 7.09659139579476e-07,
      "loss": 0.0113,
      "step": 2839860
    },
    {
      "epoch": 4.647525906142194,
      "grad_norm": 0.10894379019737244,
      "learning_rate": 7.095932473659588e-07,
      "loss": 0.0072,
      "step": 2839880
    },
    {
      "epoch": 4.647558636580848,
      "grad_norm": 0.11686782538890839,
      "learning_rate": 7.095273551524418e-07,
      "loss": 0.0088,
      "step": 2839900
    },
    {
      "epoch": 4.647591367019501,
      "grad_norm": 0.7935518622398376,
      "learning_rate": 7.094614629389246e-07,
      "loss": 0.0095,
      "step": 2839920
    },
    {
      "epoch": 4.647624097458154,
      "grad_norm": 0.5566849708557129,
      "learning_rate": 7.093955707254074e-07,
      "loss": 0.0106,
      "step": 2839940
    },
    {
      "epoch": 4.647656827896808,
      "grad_norm": 0.1498892605304718,
      "learning_rate": 7.093296785118903e-07,
      "loss": 0.012,
      "step": 2839960
    },
    {
      "epoch": 4.647689558335461,
      "grad_norm": 0.2312759906053543,
      "learning_rate": 7.092637862983731e-07,
      "loss": 0.0095,
      "step": 2839980
    },
    {
      "epoch": 4.647722288774114,
      "grad_norm": 0.221000537276268,
      "learning_rate": 7.091978940848561e-07,
      "loss": 0.0094,
      "step": 2840000
    },
    {
      "epoch": 4.6477550192127675,
      "grad_norm": 0.2819872796535492,
      "learning_rate": 7.091320018713389e-07,
      "loss": 0.014,
      "step": 2840020
    },
    {
      "epoch": 4.647787749651421,
      "grad_norm": 0.16179698705673218,
      "learning_rate": 7.090661096578218e-07,
      "loss": 0.0057,
      "step": 2840040
    },
    {
      "epoch": 4.647820480090074,
      "grad_norm": 0.06991040706634521,
      "learning_rate": 7.090002174443046e-07,
      "loss": 0.0059,
      "step": 2840060
    },
    {
      "epoch": 4.647853210528727,
      "grad_norm": 0.18968571722507477,
      "learning_rate": 7.089343252307876e-07,
      "loss": 0.01,
      "step": 2840080
    },
    {
      "epoch": 4.647885940967381,
      "grad_norm": 0.1895953118801117,
      "learning_rate": 7.088684330172704e-07,
      "loss": 0.0088,
      "step": 2840100
    },
    {
      "epoch": 4.6479186714060345,
      "grad_norm": 0.104023776948452,
      "learning_rate": 7.088025408037533e-07,
      "loss": 0.0109,
      "step": 2840120
    },
    {
      "epoch": 4.647951401844687,
      "grad_norm": 0.41633591055870056,
      "learning_rate": 7.087366485902361e-07,
      "loss": 0.0073,
      "step": 2840140
    },
    {
      "epoch": 4.647984132283341,
      "grad_norm": 0.2345455437898636,
      "learning_rate": 7.086707563767191e-07,
      "loss": 0.011,
      "step": 2840160
    },
    {
      "epoch": 4.648016862721994,
      "grad_norm": 0.28434762358665466,
      "learning_rate": 7.086048641632019e-07,
      "loss": 0.0113,
      "step": 2840180
    },
    {
      "epoch": 4.648049593160648,
      "grad_norm": 0.42905905842781067,
      "learning_rate": 7.085389719496848e-07,
      "loss": 0.0069,
      "step": 2840200
    },
    {
      "epoch": 4.648082323599301,
      "grad_norm": 0.09608280658721924,
      "learning_rate": 7.084730797361676e-07,
      "loss": 0.01,
      "step": 2840220
    },
    {
      "epoch": 4.648115054037954,
      "grad_norm": 0.1747988909482956,
      "learning_rate": 7.084071875226504e-07,
      "loss": 0.0125,
      "step": 2840240
    },
    {
      "epoch": 4.648147784476608,
      "grad_norm": 0.3755089044570923,
      "learning_rate": 7.083412953091334e-07,
      "loss": 0.0087,
      "step": 2840260
    },
    {
      "epoch": 4.648180514915261,
      "grad_norm": 0.20178039371967316,
      "learning_rate": 7.082754030956162e-07,
      "loss": 0.0096,
      "step": 2840280
    },
    {
      "epoch": 4.648213245353914,
      "grad_norm": 0.16280022263526917,
      "learning_rate": 7.082095108820991e-07,
      "loss": 0.0097,
      "step": 2840300
    },
    {
      "epoch": 4.648245975792568,
      "grad_norm": 0.03907116875052452,
      "learning_rate": 7.081436186685819e-07,
      "loss": 0.0124,
      "step": 2840320
    },
    {
      "epoch": 4.6482787062312205,
      "grad_norm": 0.44978436827659607,
      "learning_rate": 7.080777264550649e-07,
      "loss": 0.014,
      "step": 2840340
    },
    {
      "epoch": 4.648311436669874,
      "grad_norm": 0.13208942115306854,
      "learning_rate": 7.080118342415477e-07,
      "loss": 0.0071,
      "step": 2840360
    },
    {
      "epoch": 4.648344167108528,
      "grad_norm": 0.6975515484809875,
      "learning_rate": 7.079459420280306e-07,
      "loss": 0.0106,
      "step": 2840380
    },
    {
      "epoch": 4.648376897547181,
      "grad_norm": 0.1996307075023651,
      "learning_rate": 7.078800498145134e-07,
      "loss": 0.0104,
      "step": 2840400
    },
    {
      "epoch": 4.648409627985834,
      "grad_norm": 0.10821159929037094,
      "learning_rate": 7.078141576009963e-07,
      "loss": 0.013,
      "step": 2840420
    },
    {
      "epoch": 4.648442358424488,
      "grad_norm": 0.08619029074907303,
      "learning_rate": 7.077482653874792e-07,
      "loss": 0.0104,
      "step": 2840440
    },
    {
      "epoch": 4.648475088863141,
      "grad_norm": 0.4230000674724579,
      "learning_rate": 7.076823731739621e-07,
      "loss": 0.0076,
      "step": 2840460
    },
    {
      "epoch": 4.648507819301794,
      "grad_norm": 0.1712597906589508,
      "learning_rate": 7.076164809604449e-07,
      "loss": 0.0108,
      "step": 2840480
    },
    {
      "epoch": 4.6485405497404475,
      "grad_norm": 0.16146767139434814,
      "learning_rate": 7.075505887469278e-07,
      "loss": 0.0086,
      "step": 2840500
    },
    {
      "epoch": 4.648573280179101,
      "grad_norm": 0.09893772751092911,
      "learning_rate": 7.074846965334107e-07,
      "loss": 0.0082,
      "step": 2840520
    },
    {
      "epoch": 4.648606010617755,
      "grad_norm": 0.08203372359275818,
      "learning_rate": 7.074188043198936e-07,
      "loss": 0.0063,
      "step": 2840540
    },
    {
      "epoch": 4.648638741056407,
      "grad_norm": 0.27553999423980713,
      "learning_rate": 7.073529121063764e-07,
      "loss": 0.0091,
      "step": 2840560
    },
    {
      "epoch": 4.648671471495061,
      "grad_norm": 0.2237914353609085,
      "learning_rate": 7.072870198928592e-07,
      "loss": 0.0083,
      "step": 2840580
    },
    {
      "epoch": 4.648704201933715,
      "grad_norm": 0.1524144560098648,
      "learning_rate": 7.072211276793423e-07,
      "loss": 0.0081,
      "step": 2840600
    },
    {
      "epoch": 4.648736932372367,
      "grad_norm": 0.5932735204696655,
      "learning_rate": 7.071552354658251e-07,
      "loss": 0.0089,
      "step": 2840620
    },
    {
      "epoch": 4.648769662811021,
      "grad_norm": 0.05821433290839195,
      "learning_rate": 7.070893432523079e-07,
      "loss": 0.0078,
      "step": 2840640
    },
    {
      "epoch": 4.6488023932496745,
      "grad_norm": 0.3002844452857971,
      "learning_rate": 7.070234510387907e-07,
      "loss": 0.0089,
      "step": 2840660
    },
    {
      "epoch": 4.648835123688327,
      "grad_norm": 0.40203583240509033,
      "learning_rate": 7.069575588252736e-07,
      "loss": 0.0086,
      "step": 2840680
    },
    {
      "epoch": 4.648867854126981,
      "grad_norm": 0.21182848513126373,
      "learning_rate": 7.068916666117566e-07,
      "loss": 0.0076,
      "step": 2840700
    },
    {
      "epoch": 4.648900584565634,
      "grad_norm": 0.1189960166811943,
      "learning_rate": 7.068257743982395e-07,
      "loss": 0.0075,
      "step": 2840720
    },
    {
      "epoch": 4.648933315004288,
      "grad_norm": 0.6046736240386963,
      "learning_rate": 7.067598821847222e-07,
      "loss": 0.008,
      "step": 2840740
    },
    {
      "epoch": 4.648966045442941,
      "grad_norm": 0.25197482109069824,
      "learning_rate": 7.066939899712051e-07,
      "loss": 0.0083,
      "step": 2840760
    },
    {
      "epoch": 4.648998775881594,
      "grad_norm": 0.3763327896595001,
      "learning_rate": 7.066280977576881e-07,
      "loss": 0.0103,
      "step": 2840780
    },
    {
      "epoch": 4.649031506320248,
      "grad_norm": 0.7796284556388855,
      "learning_rate": 7.06562205544171e-07,
      "loss": 0.0078,
      "step": 2840800
    },
    {
      "epoch": 4.6490642367589015,
      "grad_norm": 0.26865464448928833,
      "learning_rate": 7.064963133306538e-07,
      "loss": 0.0097,
      "step": 2840820
    },
    {
      "epoch": 4.649096967197554,
      "grad_norm": 0.30532702803611755,
      "learning_rate": 7.064304211171367e-07,
      "loss": 0.0117,
      "step": 2840840
    },
    {
      "epoch": 4.649129697636208,
      "grad_norm": 0.3044511675834656,
      "learning_rate": 7.063645289036194e-07,
      "loss": 0.0071,
      "step": 2840860
    },
    {
      "epoch": 4.649162428074861,
      "grad_norm": 0.5839623212814331,
      "learning_rate": 7.062986366901025e-07,
      "loss": 0.0087,
      "step": 2840880
    },
    {
      "epoch": 4.649195158513514,
      "grad_norm": 0.32005077600479126,
      "learning_rate": 7.062327444765853e-07,
      "loss": 0.0079,
      "step": 2840900
    },
    {
      "epoch": 4.649227888952168,
      "grad_norm": 0.24217186868190765,
      "learning_rate": 7.061668522630681e-07,
      "loss": 0.0064,
      "step": 2840920
    },
    {
      "epoch": 4.649260619390821,
      "grad_norm": 0.17616616189479828,
      "learning_rate": 7.06100960049551e-07,
      "loss": 0.0144,
      "step": 2840940
    },
    {
      "epoch": 4.649293349829474,
      "grad_norm": 0.39069560170173645,
      "learning_rate": 7.060350678360339e-07,
      "loss": 0.0134,
      "step": 2840960
    },
    {
      "epoch": 4.6493260802681275,
      "grad_norm": 0.16572794318199158,
      "learning_rate": 7.059691756225168e-07,
      "loss": 0.0128,
      "step": 2840980
    },
    {
      "epoch": 4.649358810706781,
      "grad_norm": 0.31067565083503723,
      "learning_rate": 7.059032834089996e-07,
      "loss": 0.0091,
      "step": 2841000
    },
    {
      "epoch": 4.649391541145435,
      "grad_norm": 0.2720547020435333,
      "learning_rate": 7.058373911954825e-07,
      "loss": 0.0089,
      "step": 2841020
    },
    {
      "epoch": 4.649424271584087,
      "grad_norm": 0.1433020383119583,
      "learning_rate": 7.057714989819654e-07,
      "loss": 0.0066,
      "step": 2841040
    },
    {
      "epoch": 4.649457002022741,
      "grad_norm": 0.4160463511943817,
      "learning_rate": 7.057056067684483e-07,
      "loss": 0.0094,
      "step": 2841060
    },
    {
      "epoch": 4.649489732461395,
      "grad_norm": 0.15496563911437988,
      "learning_rate": 7.056397145549311e-07,
      "loss": 0.0073,
      "step": 2841080
    },
    {
      "epoch": 4.649522462900048,
      "grad_norm": 0.20620104670524597,
      "learning_rate": 7.05573822341414e-07,
      "loss": 0.0075,
      "step": 2841100
    },
    {
      "epoch": 4.649555193338701,
      "grad_norm": 0.05360908433794975,
      "learning_rate": 7.055079301278968e-07,
      "loss": 0.0084,
      "step": 2841120
    },
    {
      "epoch": 4.6495879237773545,
      "grad_norm": 0.10047069191932678,
      "learning_rate": 7.054420379143798e-07,
      "loss": 0.0077,
      "step": 2841140
    },
    {
      "epoch": 4.649620654216008,
      "grad_norm": 0.2926647663116455,
      "learning_rate": 7.053761457008626e-07,
      "loss": 0.008,
      "step": 2841160
    },
    {
      "epoch": 4.649653384654661,
      "grad_norm": 0.1666788011789322,
      "learning_rate": 7.053102534873455e-07,
      "loss": 0.0137,
      "step": 2841180
    },
    {
      "epoch": 4.649686115093314,
      "grad_norm": 0.1524830013513565,
      "learning_rate": 7.052443612738283e-07,
      "loss": 0.0081,
      "step": 2841200
    },
    {
      "epoch": 4.649718845531968,
      "grad_norm": 0.2724709212779999,
      "learning_rate": 7.051784690603113e-07,
      "loss": 0.0169,
      "step": 2841220
    },
    {
      "epoch": 4.649751575970621,
      "grad_norm": 0.08552028983831406,
      "learning_rate": 7.051125768467941e-07,
      "loss": 0.0098,
      "step": 2841240
    },
    {
      "epoch": 4.649784306409274,
      "grad_norm": 0.40068766474723816,
      "learning_rate": 7.05046684633277e-07,
      "loss": 0.0102,
      "step": 2841260
    },
    {
      "epoch": 4.649817036847928,
      "grad_norm": 0.18018838763237,
      "learning_rate": 7.049807924197598e-07,
      "loss": 0.0089,
      "step": 2841280
    },
    {
      "epoch": 4.6498497672865815,
      "grad_norm": 0.22533509135246277,
      "learning_rate": 7.049149002062426e-07,
      "loss": 0.012,
      "step": 2841300
    },
    {
      "epoch": 4.649882497725234,
      "grad_norm": 0.14190176129341125,
      "learning_rate": 7.048490079927256e-07,
      "loss": 0.0073,
      "step": 2841320
    },
    {
      "epoch": 4.649915228163888,
      "grad_norm": 0.23091751337051392,
      "learning_rate": 7.047831157792084e-07,
      "loss": 0.0085,
      "step": 2841340
    },
    {
      "epoch": 4.649947958602541,
      "grad_norm": 0.267903208732605,
      "learning_rate": 7.047172235656913e-07,
      "loss": 0.0077,
      "step": 2841360
    },
    {
      "epoch": 4.649980689041195,
      "grad_norm": 0.1495705097913742,
      "learning_rate": 7.046513313521741e-07,
      "loss": 0.0063,
      "step": 2841380
    },
    {
      "epoch": 4.650013419479848,
      "grad_norm": 0.1990288645029068,
      "learning_rate": 7.045854391386571e-07,
      "loss": 0.0115,
      "step": 2841400
    },
    {
      "epoch": 4.650046149918501,
      "grad_norm": 0.18946276605129242,
      "learning_rate": 7.045195469251399e-07,
      "loss": 0.0098,
      "step": 2841420
    },
    {
      "epoch": 4.650078880357155,
      "grad_norm": 0.06453649699687958,
      "learning_rate": 7.044536547116228e-07,
      "loss": 0.0119,
      "step": 2841440
    },
    {
      "epoch": 4.650111610795808,
      "grad_norm": 0.6529415845870972,
      "learning_rate": 7.043877624981056e-07,
      "loss": 0.0097,
      "step": 2841460
    },
    {
      "epoch": 4.650144341234461,
      "grad_norm": 0.2657635509967804,
      "learning_rate": 7.043218702845886e-07,
      "loss": 0.0093,
      "step": 2841480
    },
    {
      "epoch": 4.650177071673115,
      "grad_norm": 0.19936330616474152,
      "learning_rate": 7.042559780710714e-07,
      "loss": 0.0077,
      "step": 2841500
    },
    {
      "epoch": 4.6502098021117675,
      "grad_norm": 0.33732885122299194,
      "learning_rate": 7.041900858575543e-07,
      "loss": 0.0074,
      "step": 2841520
    },
    {
      "epoch": 4.650242532550421,
      "grad_norm": 0.25723564624786377,
      "learning_rate": 7.041241936440371e-07,
      "loss": 0.0093,
      "step": 2841540
    },
    {
      "epoch": 4.650275262989075,
      "grad_norm": 0.3107035160064697,
      "learning_rate": 7.0405830143052e-07,
      "loss": 0.0109,
      "step": 2841560
    },
    {
      "epoch": 4.650307993427728,
      "grad_norm": 0.284651517868042,
      "learning_rate": 7.039924092170029e-07,
      "loss": 0.0119,
      "step": 2841580
    },
    {
      "epoch": 4.650340723866381,
      "grad_norm": 0.2562916874885559,
      "learning_rate": 7.039265170034857e-07,
      "loss": 0.0055,
      "step": 2841600
    },
    {
      "epoch": 4.650373454305035,
      "grad_norm": 0.2828681766986847,
      "learning_rate": 7.038606247899686e-07,
      "loss": 0.0099,
      "step": 2841620
    },
    {
      "epoch": 4.650406184743688,
      "grad_norm": 0.33522218465805054,
      "learning_rate": 7.037947325764514e-07,
      "loss": 0.0093,
      "step": 2841640
    },
    {
      "epoch": 4.650438915182342,
      "grad_norm": 0.1612653285264969,
      "learning_rate": 7.037288403629344e-07,
      "loss": 0.0062,
      "step": 2841660
    },
    {
      "epoch": 4.6504716456209945,
      "grad_norm": 0.10411684960126877,
      "learning_rate": 7.036629481494172e-07,
      "loss": 0.0095,
      "step": 2841680
    },
    {
      "epoch": 4.650504376059648,
      "grad_norm": 0.5059912204742432,
      "learning_rate": 7.035970559359001e-07,
      "loss": 0.0085,
      "step": 2841700
    },
    {
      "epoch": 4.650537106498302,
      "grad_norm": 0.3277578055858612,
      "learning_rate": 7.035311637223829e-07,
      "loss": 0.0105,
      "step": 2841720
    },
    {
      "epoch": 4.650569836936954,
      "grad_norm": 0.5412042737007141,
      "learning_rate": 7.034652715088658e-07,
      "loss": 0.0117,
      "step": 2841740
    },
    {
      "epoch": 4.650602567375608,
      "grad_norm": 0.19102828204631805,
      "learning_rate": 7.033993792953487e-07,
      "loss": 0.0075,
      "step": 2841760
    },
    {
      "epoch": 4.6506352978142615,
      "grad_norm": 0.250805139541626,
      "learning_rate": 7.033334870818316e-07,
      "loss": 0.0078,
      "step": 2841780
    },
    {
      "epoch": 4.650668028252914,
      "grad_norm": 0.3503893315792084,
      "learning_rate": 7.032675948683144e-07,
      "loss": 0.0109,
      "step": 2841800
    },
    {
      "epoch": 4.650700758691568,
      "grad_norm": 0.13982751965522766,
      "learning_rate": 7.032017026547973e-07,
      "loss": 0.0105,
      "step": 2841820
    },
    {
      "epoch": 4.650733489130221,
      "grad_norm": 0.06421054154634476,
      "learning_rate": 7.031358104412802e-07,
      "loss": 0.0094,
      "step": 2841840
    },
    {
      "epoch": 4.650766219568875,
      "grad_norm": 0.15456509590148926,
      "learning_rate": 7.030699182277631e-07,
      "loss": 0.007,
      "step": 2841860
    },
    {
      "epoch": 4.650798950007528,
      "grad_norm": 0.11976266652345657,
      "learning_rate": 7.030040260142459e-07,
      "loss": 0.005,
      "step": 2841880
    },
    {
      "epoch": 4.650831680446181,
      "grad_norm": 0.5671204924583435,
      "learning_rate": 7.029381338007288e-07,
      "loss": 0.0083,
      "step": 2841900
    },
    {
      "epoch": 4.650864410884835,
      "grad_norm": 0.275394469499588,
      "learning_rate": 7.028722415872118e-07,
      "loss": 0.0104,
      "step": 2841920
    },
    {
      "epoch": 4.650897141323488,
      "grad_norm": 0.13009652495384216,
      "learning_rate": 7.028063493736947e-07,
      "loss": 0.0081,
      "step": 2841940
    },
    {
      "epoch": 4.650929871762141,
      "grad_norm": 0.21717427670955658,
      "learning_rate": 7.027404571601774e-07,
      "loss": 0.0106,
      "step": 2841960
    },
    {
      "epoch": 4.650962602200795,
      "grad_norm": 0.15107686817646027,
      "learning_rate": 7.026745649466602e-07,
      "loss": 0.0064,
      "step": 2841980
    },
    {
      "epoch": 4.650995332639448,
      "grad_norm": 0.26674315333366394,
      "learning_rate": 7.026086727331431e-07,
      "loss": 0.0084,
      "step": 2842000
    },
    {
      "epoch": 4.651028063078101,
      "grad_norm": 0.24101632833480835,
      "learning_rate": 7.025427805196261e-07,
      "loss": 0.0064,
      "step": 2842020
    },
    {
      "epoch": 4.651060793516755,
      "grad_norm": 0.12246677279472351,
      "learning_rate": 7.02476888306109e-07,
      "loss": 0.0103,
      "step": 2842040
    },
    {
      "epoch": 4.651093523955408,
      "grad_norm": 0.2356279045343399,
      "learning_rate": 7.024109960925917e-07,
      "loss": 0.0117,
      "step": 2842060
    },
    {
      "epoch": 4.651126254394061,
      "grad_norm": 0.3957814574241638,
      "learning_rate": 7.023451038790746e-07,
      "loss": 0.0062,
      "step": 2842080
    },
    {
      "epoch": 4.651158984832715,
      "grad_norm": 0.19637762010097504,
      "learning_rate": 7.022792116655576e-07,
      "loss": 0.0077,
      "step": 2842100
    },
    {
      "epoch": 4.651191715271368,
      "grad_norm": 0.26797109842300415,
      "learning_rate": 7.022133194520405e-07,
      "loss": 0.0135,
      "step": 2842120
    },
    {
      "epoch": 4.651224445710021,
      "grad_norm": 0.1995750367641449,
      "learning_rate": 7.021474272385233e-07,
      "loss": 0.0068,
      "step": 2842140
    },
    {
      "epoch": 4.6512571761486745,
      "grad_norm": 0.08979910612106323,
      "learning_rate": 7.020815350250062e-07,
      "loss": 0.0081,
      "step": 2842160
    },
    {
      "epoch": 4.651289906587328,
      "grad_norm": 0.32656028866767883,
      "learning_rate": 7.020156428114889e-07,
      "loss": 0.0093,
      "step": 2842180
    },
    {
      "epoch": 4.651322637025982,
      "grad_norm": 0.4735218584537506,
      "learning_rate": 7.01949750597972e-07,
      "loss": 0.011,
      "step": 2842200
    },
    {
      "epoch": 4.651355367464634,
      "grad_norm": 0.21054847538471222,
      "learning_rate": 7.018838583844548e-07,
      "loss": 0.0088,
      "step": 2842220
    },
    {
      "epoch": 4.651388097903288,
      "grad_norm": 0.20837146043777466,
      "learning_rate": 7.018179661709377e-07,
      "loss": 0.0123,
      "step": 2842240
    },
    {
      "epoch": 4.651420828341942,
      "grad_norm": 0.05953960493206978,
      "learning_rate": 7.017520739574205e-07,
      "loss": 0.0081,
      "step": 2842260
    },
    {
      "epoch": 4.651453558780595,
      "grad_norm": 0.46839794516563416,
      "learning_rate": 7.016861817439035e-07,
      "loss": 0.0056,
      "step": 2842280
    },
    {
      "epoch": 4.651486289219248,
      "grad_norm": 0.06671489030122757,
      "learning_rate": 7.016202895303863e-07,
      "loss": 0.0085,
      "step": 2842300
    },
    {
      "epoch": 4.6515190196579015,
      "grad_norm": 0.28798550367355347,
      "learning_rate": 7.015543973168691e-07,
      "loss": 0.0116,
      "step": 2842320
    },
    {
      "epoch": 4.651551750096555,
      "grad_norm": 0.0969688892364502,
      "learning_rate": 7.01488505103352e-07,
      "loss": 0.0107,
      "step": 2842340
    },
    {
      "epoch": 4.651584480535208,
      "grad_norm": 0.24647903442382812,
      "learning_rate": 7.014226128898349e-07,
      "loss": 0.014,
      "step": 2842360
    },
    {
      "epoch": 4.651617210973861,
      "grad_norm": 0.20406754314899445,
      "learning_rate": 7.013567206763178e-07,
      "loss": 0.0124,
      "step": 2842380
    },
    {
      "epoch": 4.651649941412515,
      "grad_norm": 0.14108218252658844,
      "learning_rate": 7.012908284628006e-07,
      "loss": 0.0069,
      "step": 2842400
    },
    {
      "epoch": 4.651682671851168,
      "grad_norm": 0.2625983953475952,
      "learning_rate": 7.012249362492835e-07,
      "loss": 0.0086,
      "step": 2842420
    },
    {
      "epoch": 4.651715402289821,
      "grad_norm": 0.102048359811306,
      "learning_rate": 7.011590440357663e-07,
      "loss": 0.0069,
      "step": 2842440
    },
    {
      "epoch": 4.651748132728475,
      "grad_norm": 0.4114503562450409,
      "learning_rate": 7.010931518222493e-07,
      "loss": 0.0113,
      "step": 2842460
    },
    {
      "epoch": 4.6517808631671285,
      "grad_norm": 0.14138007164001465,
      "learning_rate": 7.010272596087321e-07,
      "loss": 0.0074,
      "step": 2842480
    },
    {
      "epoch": 4.651813593605781,
      "grad_norm": 0.0707009881734848,
      "learning_rate": 7.00961367395215e-07,
      "loss": 0.0061,
      "step": 2842500
    },
    {
      "epoch": 4.651846324044435,
      "grad_norm": 0.21529345214366913,
      "learning_rate": 7.008954751816978e-07,
      "loss": 0.0143,
      "step": 2842520
    },
    {
      "epoch": 4.651879054483088,
      "grad_norm": 0.1563708782196045,
      "learning_rate": 7.008295829681808e-07,
      "loss": 0.0056,
      "step": 2842540
    },
    {
      "epoch": 4.651911784921742,
      "grad_norm": 0.13215439021587372,
      "learning_rate": 7.007636907546636e-07,
      "loss": 0.0062,
      "step": 2842560
    },
    {
      "epoch": 4.651944515360395,
      "grad_norm": 0.054630596190690994,
      "learning_rate": 7.006977985411465e-07,
      "loss": 0.0087,
      "step": 2842580
    },
    {
      "epoch": 4.651977245799048,
      "grad_norm": 0.23776914179325104,
      "learning_rate": 7.006319063276293e-07,
      "loss": 0.0098,
      "step": 2842600
    },
    {
      "epoch": 4.652009976237702,
      "grad_norm": 0.4278497099876404,
      "learning_rate": 7.005660141141121e-07,
      "loss": 0.0108,
      "step": 2842620
    },
    {
      "epoch": 4.6520427066763546,
      "grad_norm": 0.39795055985450745,
      "learning_rate": 7.005001219005951e-07,
      "loss": 0.008,
      "step": 2842640
    },
    {
      "epoch": 4.652075437115008,
      "grad_norm": 0.21431410312652588,
      "learning_rate": 7.004342296870779e-07,
      "loss": 0.0089,
      "step": 2842660
    },
    {
      "epoch": 4.652108167553662,
      "grad_norm": 0.4812978208065033,
      "learning_rate": 7.003683374735608e-07,
      "loss": 0.0069,
      "step": 2842680
    },
    {
      "epoch": 4.6521408979923144,
      "grad_norm": 0.08332306146621704,
      "learning_rate": 7.003024452600436e-07,
      "loss": 0.0113,
      "step": 2842700
    },
    {
      "epoch": 4.652173628430968,
      "grad_norm": 0.09928907454013824,
      "learning_rate": 7.002365530465266e-07,
      "loss": 0.0111,
      "step": 2842720
    },
    {
      "epoch": 4.652206358869622,
      "grad_norm": 0.33359602093696594,
      "learning_rate": 7.001706608330094e-07,
      "loss": 0.0113,
      "step": 2842740
    },
    {
      "epoch": 4.652239089308275,
      "grad_norm": 0.02607547491788864,
      "learning_rate": 7.001047686194923e-07,
      "loss": 0.0058,
      "step": 2842760
    },
    {
      "epoch": 4.652271819746928,
      "grad_norm": 0.20787298679351807,
      "learning_rate": 7.000388764059751e-07,
      "loss": 0.0083,
      "step": 2842780
    },
    {
      "epoch": 4.6523045501855815,
      "grad_norm": 0.09969864040613174,
      "learning_rate": 6.999729841924581e-07,
      "loss": 0.0073,
      "step": 2842800
    },
    {
      "epoch": 4.652337280624235,
      "grad_norm": 0.18845266103744507,
      "learning_rate": 6.999070919789409e-07,
      "loss": 0.0064,
      "step": 2842820
    },
    {
      "epoch": 4.652370011062889,
      "grad_norm": 0.14398148655891418,
      "learning_rate": 6.998411997654238e-07,
      "loss": 0.0091,
      "step": 2842840
    },
    {
      "epoch": 4.652402741501541,
      "grad_norm": 0.12188152223825455,
      "learning_rate": 6.997753075519066e-07,
      "loss": 0.0112,
      "step": 2842860
    },
    {
      "epoch": 4.652435471940195,
      "grad_norm": 0.39862483739852905,
      "learning_rate": 6.997094153383895e-07,
      "loss": 0.0114,
      "step": 2842880
    },
    {
      "epoch": 4.652468202378849,
      "grad_norm": 0.3175473213195801,
      "learning_rate": 6.996435231248724e-07,
      "loss": 0.0076,
      "step": 2842900
    },
    {
      "epoch": 4.652500932817501,
      "grad_norm": 0.31554245948791504,
      "learning_rate": 6.995776309113553e-07,
      "loss": 0.0081,
      "step": 2842920
    },
    {
      "epoch": 4.652533663256155,
      "grad_norm": 0.3049429655075073,
      "learning_rate": 6.995117386978381e-07,
      "loss": 0.0085,
      "step": 2842940
    },
    {
      "epoch": 4.6525663936948085,
      "grad_norm": 0.31806933879852295,
      "learning_rate": 6.994458464843209e-07,
      "loss": 0.0111,
      "step": 2842960
    },
    {
      "epoch": 4.652599124133461,
      "grad_norm": 0.07959591597318649,
      "learning_rate": 6.993799542708039e-07,
      "loss": 0.0099,
      "step": 2842980
    },
    {
      "epoch": 4.652631854572115,
      "grad_norm": 0.1070776879787445,
      "learning_rate": 6.993140620572867e-07,
      "loss": 0.0069,
      "step": 2843000
    },
    {
      "epoch": 4.652664585010768,
      "grad_norm": 0.18152852356433868,
      "learning_rate": 6.992481698437696e-07,
      "loss": 0.0109,
      "step": 2843020
    },
    {
      "epoch": 4.652697315449422,
      "grad_norm": 0.1768290400505066,
      "learning_rate": 6.991822776302524e-07,
      "loss": 0.0113,
      "step": 2843040
    },
    {
      "epoch": 4.652730045888075,
      "grad_norm": 0.13887903094291687,
      "learning_rate": 6.991163854167353e-07,
      "loss": 0.0047,
      "step": 2843060
    },
    {
      "epoch": 4.652762776326728,
      "grad_norm": 0.27411484718322754,
      "learning_rate": 6.990504932032182e-07,
      "loss": 0.0096,
      "step": 2843080
    },
    {
      "epoch": 4.652795506765382,
      "grad_norm": 0.7905029058456421,
      "learning_rate": 6.989846009897011e-07,
      "loss": 0.0079,
      "step": 2843100
    },
    {
      "epoch": 4.6528282372040355,
      "grad_norm": 0.3086993098258972,
      "learning_rate": 6.989187087761839e-07,
      "loss": 0.0108,
      "step": 2843120
    },
    {
      "epoch": 4.652860967642688,
      "grad_norm": 0.22778183221817017,
      "learning_rate": 6.988528165626668e-07,
      "loss": 0.013,
      "step": 2843140
    },
    {
      "epoch": 4.652893698081342,
      "grad_norm": 0.2551414966583252,
      "learning_rate": 6.987869243491497e-07,
      "loss": 0.0084,
      "step": 2843160
    },
    {
      "epoch": 4.652926428519995,
      "grad_norm": 0.1115734651684761,
      "learning_rate": 6.987210321356326e-07,
      "loss": 0.0104,
      "step": 2843180
    },
    {
      "epoch": 4.652959158958648,
      "grad_norm": 0.3322785794734955,
      "learning_rate": 6.986551399221154e-07,
      "loss": 0.0056,
      "step": 2843200
    },
    {
      "epoch": 4.652991889397302,
      "grad_norm": 0.16033372282981873,
      "learning_rate": 6.985892477085983e-07,
      "loss": 0.0078,
      "step": 2843220
    },
    {
      "epoch": 4.653024619835955,
      "grad_norm": 0.1772293895483017,
      "learning_rate": 6.985233554950813e-07,
      "loss": 0.0098,
      "step": 2843240
    },
    {
      "epoch": 4.653057350274608,
      "grad_norm": 0.08981727808713913,
      "learning_rate": 6.984574632815642e-07,
      "loss": 0.0094,
      "step": 2843260
    },
    {
      "epoch": 4.653090080713262,
      "grad_norm": 0.24011807143688202,
      "learning_rate": 6.983915710680469e-07,
      "loss": 0.0114,
      "step": 2843280
    },
    {
      "epoch": 4.653122811151915,
      "grad_norm": 0.22735393047332764,
      "learning_rate": 6.983256788545297e-07,
      "loss": 0.0087,
      "step": 2843300
    },
    {
      "epoch": 4.653155541590569,
      "grad_norm": 0.13264857232570648,
      "learning_rate": 6.982597866410126e-07,
      "loss": 0.0062,
      "step": 2843320
    },
    {
      "epoch": 4.6531882720292215,
      "grad_norm": 0.1420024335384369,
      "learning_rate": 6.981938944274956e-07,
      "loss": 0.0114,
      "step": 2843340
    },
    {
      "epoch": 4.653221002467875,
      "grad_norm": 0.15734408795833588,
      "learning_rate": 6.981280022139785e-07,
      "loss": 0.0064,
      "step": 2843360
    },
    {
      "epoch": 4.653253732906529,
      "grad_norm": 0.23489485681056976,
      "learning_rate": 6.980621100004612e-07,
      "loss": 0.0094,
      "step": 2843380
    },
    {
      "epoch": 4.653286463345181,
      "grad_norm": 0.5164090394973755,
      "learning_rate": 6.979962177869441e-07,
      "loss": 0.0086,
      "step": 2843400
    },
    {
      "epoch": 4.653319193783835,
      "grad_norm": 0.24410216510295868,
      "learning_rate": 6.979303255734271e-07,
      "loss": 0.0078,
      "step": 2843420
    },
    {
      "epoch": 4.653351924222489,
      "grad_norm": 0.17428763210773468,
      "learning_rate": 6.9786443335991e-07,
      "loss": 0.0082,
      "step": 2843440
    },
    {
      "epoch": 4.653384654661142,
      "grad_norm": 0.2809545397758484,
      "learning_rate": 6.977985411463928e-07,
      "loss": 0.011,
      "step": 2843460
    },
    {
      "epoch": 4.653417385099795,
      "grad_norm": 1.1146405935287476,
      "learning_rate": 6.977326489328757e-07,
      "loss": 0.0149,
      "step": 2843480
    },
    {
      "epoch": 4.6534501155384485,
      "grad_norm": 0.4522925615310669,
      "learning_rate": 6.976667567193584e-07,
      "loss": 0.0078,
      "step": 2843500
    },
    {
      "epoch": 4.653482845977102,
      "grad_norm": 0.11284159868955612,
      "learning_rate": 6.976008645058415e-07,
      "loss": 0.0071,
      "step": 2843520
    },
    {
      "epoch": 4.653515576415755,
      "grad_norm": 0.16578735411167145,
      "learning_rate": 6.975349722923243e-07,
      "loss": 0.0097,
      "step": 2843540
    },
    {
      "epoch": 4.653548306854408,
      "grad_norm": 0.30979248881340027,
      "learning_rate": 6.974690800788072e-07,
      "loss": 0.0102,
      "step": 2843560
    },
    {
      "epoch": 4.653581037293062,
      "grad_norm": 0.47317934036254883,
      "learning_rate": 6.9740318786529e-07,
      "loss": 0.0079,
      "step": 2843580
    },
    {
      "epoch": 4.653613767731715,
      "grad_norm": 0.47513440251350403,
      "learning_rate": 6.97337295651773e-07,
      "loss": 0.0076,
      "step": 2843600
    },
    {
      "epoch": 4.653646498170368,
      "grad_norm": 0.4052164554595947,
      "learning_rate": 6.972714034382558e-07,
      "loss": 0.0062,
      "step": 2843620
    },
    {
      "epoch": 4.653679228609022,
      "grad_norm": 0.24721769988536835,
      "learning_rate": 6.972055112247386e-07,
      "loss": 0.0092,
      "step": 2843640
    },
    {
      "epoch": 4.653711959047675,
      "grad_norm": 0.03194749355316162,
      "learning_rate": 6.971396190112215e-07,
      "loss": 0.0093,
      "step": 2843660
    },
    {
      "epoch": 4.653744689486328,
      "grad_norm": 0.2115267664194107,
      "learning_rate": 6.970737267977044e-07,
      "loss": 0.0077,
      "step": 2843680
    },
    {
      "epoch": 4.653777419924982,
      "grad_norm": 0.07243166863918304,
      "learning_rate": 6.970078345841873e-07,
      "loss": 0.0102,
      "step": 2843700
    },
    {
      "epoch": 4.653810150363635,
      "grad_norm": 0.12321524322032928,
      "learning_rate": 6.969419423706701e-07,
      "loss": 0.0073,
      "step": 2843720
    },
    {
      "epoch": 4.653842880802289,
      "grad_norm": 0.40062910318374634,
      "learning_rate": 6.96876050157153e-07,
      "loss": 0.0079,
      "step": 2843740
    },
    {
      "epoch": 4.653875611240942,
      "grad_norm": 0.08393508195877075,
      "learning_rate": 6.968101579436358e-07,
      "loss": 0.0075,
      "step": 2843760
    },
    {
      "epoch": 4.653908341679595,
      "grad_norm": 0.39773890376091003,
      "learning_rate": 6.967442657301188e-07,
      "loss": 0.0106,
      "step": 2843780
    },
    {
      "epoch": 4.653941072118249,
      "grad_norm": 0.25684356689453125,
      "learning_rate": 6.966783735166016e-07,
      "loss": 0.0088,
      "step": 2843800
    },
    {
      "epoch": 4.6539738025569015,
      "grad_norm": 0.21636363863945007,
      "learning_rate": 6.966124813030845e-07,
      "loss": 0.0099,
      "step": 2843820
    },
    {
      "epoch": 4.654006532995555,
      "grad_norm": 0.3998441696166992,
      "learning_rate": 6.965465890895673e-07,
      "loss": 0.0073,
      "step": 2843840
    },
    {
      "epoch": 4.654039263434209,
      "grad_norm": 0.16060787439346313,
      "learning_rate": 6.964806968760503e-07,
      "loss": 0.0073,
      "step": 2843860
    },
    {
      "epoch": 4.654071993872861,
      "grad_norm": 0.5478277802467346,
      "learning_rate": 6.964148046625331e-07,
      "loss": 0.0093,
      "step": 2843880
    },
    {
      "epoch": 4.654104724311515,
      "grad_norm": 0.4637604355812073,
      "learning_rate": 6.96348912449016e-07,
      "loss": 0.0124,
      "step": 2843900
    },
    {
      "epoch": 4.654137454750169,
      "grad_norm": 0.08816993236541748,
      "learning_rate": 6.962830202354988e-07,
      "loss": 0.0083,
      "step": 2843920
    },
    {
      "epoch": 4.654170185188822,
      "grad_norm": 0.1688355952501297,
      "learning_rate": 6.962171280219817e-07,
      "loss": 0.0097,
      "step": 2843940
    },
    {
      "epoch": 4.654202915627475,
      "grad_norm": 0.257778137922287,
      "learning_rate": 6.961512358084646e-07,
      "loss": 0.0102,
      "step": 2843960
    },
    {
      "epoch": 4.6542356460661285,
      "grad_norm": 0.2252199500799179,
      "learning_rate": 6.960853435949474e-07,
      "loss": 0.0058,
      "step": 2843980
    },
    {
      "epoch": 4.654268376504782,
      "grad_norm": 0.13197512924671173,
      "learning_rate": 6.960194513814303e-07,
      "loss": 0.0085,
      "step": 2844000
    },
    {
      "epoch": 4.654301106943436,
      "grad_norm": 0.367380827665329,
      "learning_rate": 6.959535591679131e-07,
      "loss": 0.0075,
      "step": 2844020
    },
    {
      "epoch": 4.654333837382088,
      "grad_norm": 0.1479509323835373,
      "learning_rate": 6.958876669543961e-07,
      "loss": 0.0111,
      "step": 2844040
    },
    {
      "epoch": 4.654366567820742,
      "grad_norm": 0.12474257498979568,
      "learning_rate": 6.958217747408789e-07,
      "loss": 0.0084,
      "step": 2844060
    },
    {
      "epoch": 4.654399298259396,
      "grad_norm": 0.43845027685165405,
      "learning_rate": 6.957558825273618e-07,
      "loss": 0.0116,
      "step": 2844080
    },
    {
      "epoch": 4.654432028698048,
      "grad_norm": 0.19109387695789337,
      "learning_rate": 6.956899903138446e-07,
      "loss": 0.007,
      "step": 2844100
    },
    {
      "epoch": 4.654464759136702,
      "grad_norm": 0.3273235559463501,
      "learning_rate": 6.956240981003276e-07,
      "loss": 0.0089,
      "step": 2844120
    },
    {
      "epoch": 4.6544974895753555,
      "grad_norm": 0.23896485567092896,
      "learning_rate": 6.955582058868104e-07,
      "loss": 0.0085,
      "step": 2844140
    },
    {
      "epoch": 4.654530220014008,
      "grad_norm": 0.21492017805576324,
      "learning_rate": 6.954923136732933e-07,
      "loss": 0.0089,
      "step": 2844160
    },
    {
      "epoch": 4.654562950452662,
      "grad_norm": 0.6680179834365845,
      "learning_rate": 6.954264214597761e-07,
      "loss": 0.0133,
      "step": 2844180
    },
    {
      "epoch": 4.654595680891315,
      "grad_norm": 0.08711621910333633,
      "learning_rate": 6.95360529246259e-07,
      "loss": 0.0137,
      "step": 2844200
    },
    {
      "epoch": 4.654628411329969,
      "grad_norm": 0.19645394384860992,
      "learning_rate": 6.952946370327419e-07,
      "loss": 0.0085,
      "step": 2844220
    },
    {
      "epoch": 4.654661141768622,
      "grad_norm": 0.3260928690433502,
      "learning_rate": 6.952287448192248e-07,
      "loss": 0.0082,
      "step": 2844240
    },
    {
      "epoch": 4.654693872207275,
      "grad_norm": 0.4709586501121521,
      "learning_rate": 6.951628526057076e-07,
      "loss": 0.0091,
      "step": 2844260
    },
    {
      "epoch": 4.654726602645929,
      "grad_norm": 0.45559439063072205,
      "learning_rate": 6.950969603921904e-07,
      "loss": 0.0116,
      "step": 2844280
    },
    {
      "epoch": 4.6547593330845825,
      "grad_norm": 0.27388453483581543,
      "learning_rate": 6.950310681786734e-07,
      "loss": 0.006,
      "step": 2844300
    },
    {
      "epoch": 4.654792063523235,
      "grad_norm": 0.1546599566936493,
      "learning_rate": 6.949651759651562e-07,
      "loss": 0.0079,
      "step": 2844320
    },
    {
      "epoch": 4.654824793961889,
      "grad_norm": 0.07998240739107132,
      "learning_rate": 6.948992837516391e-07,
      "loss": 0.009,
      "step": 2844340
    },
    {
      "epoch": 4.654857524400542,
      "grad_norm": 0.47529885172843933,
      "learning_rate": 6.948333915381219e-07,
      "loss": 0.0083,
      "step": 2844360
    },
    {
      "epoch": 4.654890254839195,
      "grad_norm": 0.2803876996040344,
      "learning_rate": 6.947674993246048e-07,
      "loss": 0.0077,
      "step": 2844380
    },
    {
      "epoch": 4.654922985277849,
      "grad_norm": 0.13975901901721954,
      "learning_rate": 6.947016071110877e-07,
      "loss": 0.0068,
      "step": 2844400
    },
    {
      "epoch": 4.654955715716502,
      "grad_norm": 0.32844480872154236,
      "learning_rate": 6.946357148975706e-07,
      "loss": 0.0122,
      "step": 2844420
    },
    {
      "epoch": 4.654988446155155,
      "grad_norm": 0.20086011290550232,
      "learning_rate": 6.945698226840534e-07,
      "loss": 0.0111,
      "step": 2844440
    },
    {
      "epoch": 4.6550211765938085,
      "grad_norm": 0.14419810473918915,
      "learning_rate": 6.945039304705363e-07,
      "loss": 0.0112,
      "step": 2844460
    },
    {
      "epoch": 4.655053907032462,
      "grad_norm": 0.3105163872241974,
      "learning_rate": 6.944380382570192e-07,
      "loss": 0.0093,
      "step": 2844480
    },
    {
      "epoch": 4.655086637471116,
      "grad_norm": 0.09745343774557114,
      "learning_rate": 6.943721460435021e-07,
      "loss": 0.0132,
      "step": 2844500
    },
    {
      "epoch": 4.655119367909768,
      "grad_norm": 0.6562327146530151,
      "learning_rate": 6.943062538299849e-07,
      "loss": 0.0121,
      "step": 2844520
    },
    {
      "epoch": 4.655152098348422,
      "grad_norm": 0.3119506239891052,
      "learning_rate": 6.942403616164678e-07,
      "loss": 0.0089,
      "step": 2844540
    },
    {
      "epoch": 4.655184828787076,
      "grad_norm": 0.04144806042313576,
      "learning_rate": 6.941744694029508e-07,
      "loss": 0.0074,
      "step": 2844560
    },
    {
      "epoch": 4.655217559225729,
      "grad_norm": 0.2394094467163086,
      "learning_rate": 6.941085771894337e-07,
      "loss": 0.0098,
      "step": 2844580
    },
    {
      "epoch": 4.655250289664382,
      "grad_norm": 0.2914894223213196,
      "learning_rate": 6.940426849759164e-07,
      "loss": 0.0086,
      "step": 2844600
    },
    {
      "epoch": 4.6552830201030355,
      "grad_norm": 0.15213610231876373,
      "learning_rate": 6.939767927623992e-07,
      "loss": 0.0124,
      "step": 2844620
    },
    {
      "epoch": 4.655315750541689,
      "grad_norm": 0.15255749225616455,
      "learning_rate": 6.939109005488821e-07,
      "loss": 0.0085,
      "step": 2844640
    },
    {
      "epoch": 4.655348480980342,
      "grad_norm": 0.26083436608314514,
      "learning_rate": 6.938450083353652e-07,
      "loss": 0.0062,
      "step": 2844660
    },
    {
      "epoch": 4.655381211418995,
      "grad_norm": 0.08868908137083054,
      "learning_rate": 6.93779116121848e-07,
      "loss": 0.0087,
      "step": 2844680
    },
    {
      "epoch": 4.655413941857649,
      "grad_norm": 0.15208324790000916,
      "learning_rate": 6.937132239083308e-07,
      "loss": 0.0084,
      "step": 2844700
    },
    {
      "epoch": 4.655446672296302,
      "grad_norm": 0.14175637066364288,
      "learning_rate": 6.936473316948136e-07,
      "loss": 0.006,
      "step": 2844720
    },
    {
      "epoch": 4.655479402734955,
      "grad_norm": 0.12254966795444489,
      "learning_rate": 6.935814394812966e-07,
      "loss": 0.012,
      "step": 2844740
    },
    {
      "epoch": 4.655512133173609,
      "grad_norm": 0.3628183901309967,
      "learning_rate": 6.935155472677795e-07,
      "loss": 0.0077,
      "step": 2844760
    },
    {
      "epoch": 4.6555448636122625,
      "grad_norm": 0.32105568051338196,
      "learning_rate": 6.934496550542623e-07,
      "loss": 0.0078,
      "step": 2844780
    },
    {
      "epoch": 4.655577594050915,
      "grad_norm": 0.23471032083034515,
      "learning_rate": 6.933837628407452e-07,
      "loss": 0.013,
      "step": 2844800
    },
    {
      "epoch": 4.655610324489569,
      "grad_norm": 0.1868188977241516,
      "learning_rate": 6.933178706272279e-07,
      "loss": 0.0075,
      "step": 2844820
    },
    {
      "epoch": 4.655643054928222,
      "grad_norm": 0.2694726288318634,
      "learning_rate": 6.93251978413711e-07,
      "loss": 0.0097,
      "step": 2844840
    },
    {
      "epoch": 4.655675785366876,
      "grad_norm": 0.13735538721084595,
      "learning_rate": 6.931860862001938e-07,
      "loss": 0.0078,
      "step": 2844860
    },
    {
      "epoch": 4.655708515805529,
      "grad_norm": 0.14160795509815216,
      "learning_rate": 6.931201939866767e-07,
      "loss": 0.0067,
      "step": 2844880
    },
    {
      "epoch": 4.655741246244182,
      "grad_norm": 0.16201888024806976,
      "learning_rate": 6.930543017731595e-07,
      "loss": 0.0068,
      "step": 2844900
    },
    {
      "epoch": 4.655773976682836,
      "grad_norm": 0.42240840196609497,
      "learning_rate": 6.929884095596425e-07,
      "loss": 0.0167,
      "step": 2844920
    },
    {
      "epoch": 4.655806707121489,
      "grad_norm": 0.12199867516756058,
      "learning_rate": 6.929225173461253e-07,
      "loss": 0.0115,
      "step": 2844940
    },
    {
      "epoch": 4.655839437560142,
      "grad_norm": 0.13478325307369232,
      "learning_rate": 6.928566251326082e-07,
      "loss": 0.0061,
      "step": 2844960
    },
    {
      "epoch": 4.655872167998796,
      "grad_norm": 0.06251536309719086,
      "learning_rate": 6.92790732919091e-07,
      "loss": 0.0063,
      "step": 2844980
    },
    {
      "epoch": 4.6559048984374485,
      "grad_norm": 0.09557919204235077,
      "learning_rate": 6.92724840705574e-07,
      "loss": 0.0109,
      "step": 2845000
    },
    {
      "epoch": 4.655937628876102,
      "grad_norm": 0.10499243438243866,
      "learning_rate": 6.926589484920568e-07,
      "loss": 0.0078,
      "step": 2845020
    },
    {
      "epoch": 4.655970359314756,
      "grad_norm": 0.1960291713476181,
      "learning_rate": 6.925930562785396e-07,
      "loss": 0.0141,
      "step": 2845040
    },
    {
      "epoch": 4.656003089753409,
      "grad_norm": 0.1275099217891693,
      "learning_rate": 6.925271640650225e-07,
      "loss": 0.0081,
      "step": 2845060
    },
    {
      "epoch": 4.656035820192062,
      "grad_norm": 0.3810443580150604,
      "learning_rate": 6.924612718515053e-07,
      "loss": 0.011,
      "step": 2845080
    },
    {
      "epoch": 4.656068550630716,
      "grad_norm": 0.7039669752120972,
      "learning_rate": 6.923953796379883e-07,
      "loss": 0.0115,
      "step": 2845100
    },
    {
      "epoch": 4.656101281069369,
      "grad_norm": 0.18291783332824707,
      "learning_rate": 6.923294874244711e-07,
      "loss": 0.0094,
      "step": 2845120
    },
    {
      "epoch": 4.656134011508022,
      "grad_norm": 0.11399155855178833,
      "learning_rate": 6.92263595210954e-07,
      "loss": 0.0061,
      "step": 2845140
    },
    {
      "epoch": 4.6561667419466755,
      "grad_norm": 0.21043458580970764,
      "learning_rate": 6.921977029974368e-07,
      "loss": 0.0093,
      "step": 2845160
    },
    {
      "epoch": 4.656199472385329,
      "grad_norm": 0.226165309548378,
      "learning_rate": 6.921318107839198e-07,
      "loss": 0.0091,
      "step": 2845180
    },
    {
      "epoch": 4.656232202823983,
      "grad_norm": 0.5094510912895203,
      "learning_rate": 6.920659185704026e-07,
      "loss": 0.0061,
      "step": 2845200
    },
    {
      "epoch": 4.656264933262635,
      "grad_norm": 0.2947397828102112,
      "learning_rate": 6.920000263568855e-07,
      "loss": 0.0076,
      "step": 2845220
    },
    {
      "epoch": 4.656297663701289,
      "grad_norm": 0.17259830236434937,
      "learning_rate": 6.919341341433683e-07,
      "loss": 0.0073,
      "step": 2845240
    },
    {
      "epoch": 4.6563303941399425,
      "grad_norm": 0.12786413729190826,
      "learning_rate": 6.918682419298513e-07,
      "loss": 0.0056,
      "step": 2845260
    },
    {
      "epoch": 4.656363124578595,
      "grad_norm": 0.33997440338134766,
      "learning_rate": 6.918023497163341e-07,
      "loss": 0.0112,
      "step": 2845280
    },
    {
      "epoch": 4.656395855017249,
      "grad_norm": 0.14723411202430725,
      "learning_rate": 6.91736457502817e-07,
      "loss": 0.0108,
      "step": 2845300
    },
    {
      "epoch": 4.656428585455902,
      "grad_norm": 0.12059136480093002,
      "learning_rate": 6.916705652892998e-07,
      "loss": 0.0116,
      "step": 2845320
    },
    {
      "epoch": 4.656461315894555,
      "grad_norm": 0.07523854076862335,
      "learning_rate": 6.916046730757826e-07,
      "loss": 0.0068,
      "step": 2845340
    },
    {
      "epoch": 4.656494046333209,
      "grad_norm": 0.1681085228919983,
      "learning_rate": 6.915387808622656e-07,
      "loss": 0.0123,
      "step": 2845360
    },
    {
      "epoch": 4.656526776771862,
      "grad_norm": 0.15322355926036835,
      "learning_rate": 6.914728886487484e-07,
      "loss": 0.0089,
      "step": 2845380
    },
    {
      "epoch": 4.656559507210516,
      "grad_norm": 0.10158468782901764,
      "learning_rate": 6.914069964352313e-07,
      "loss": 0.0134,
      "step": 2845400
    },
    {
      "epoch": 4.656592237649169,
      "grad_norm": 0.29967740178108215,
      "learning_rate": 6.913411042217141e-07,
      "loss": 0.016,
      "step": 2845420
    },
    {
      "epoch": 4.656624968087822,
      "grad_norm": 0.43239879608154297,
      "learning_rate": 6.912752120081971e-07,
      "loss": 0.008,
      "step": 2845440
    },
    {
      "epoch": 4.656657698526476,
      "grad_norm": 0.09760042279958725,
      "learning_rate": 6.912093197946799e-07,
      "loss": 0.0113,
      "step": 2845460
    },
    {
      "epoch": 4.656690428965129,
      "grad_norm": 0.505065381526947,
      "learning_rate": 6.911434275811628e-07,
      "loss": 0.009,
      "step": 2845480
    },
    {
      "epoch": 4.656723159403782,
      "grad_norm": 0.2178201824426651,
      "learning_rate": 6.910775353676456e-07,
      "loss": 0.011,
      "step": 2845500
    },
    {
      "epoch": 4.656755889842436,
      "grad_norm": 0.5439257025718689,
      "learning_rate": 6.910116431541285e-07,
      "loss": 0.0082,
      "step": 2845520
    },
    {
      "epoch": 4.656788620281089,
      "grad_norm": 0.15833412110805511,
      "learning_rate": 6.909457509406114e-07,
      "loss": 0.0112,
      "step": 2845540
    },
    {
      "epoch": 4.656821350719742,
      "grad_norm": 0.09392672032117844,
      "learning_rate": 6.908798587270943e-07,
      "loss": 0.0096,
      "step": 2845560
    },
    {
      "epoch": 4.656854081158396,
      "grad_norm": 0.18183736503124237,
      "learning_rate": 6.908139665135771e-07,
      "loss": 0.0088,
      "step": 2845580
    },
    {
      "epoch": 4.656886811597049,
      "grad_norm": 0.11915278434753418,
      "learning_rate": 6.9074807430006e-07,
      "loss": 0.0081,
      "step": 2845600
    },
    {
      "epoch": 4.656919542035702,
      "grad_norm": 0.17352241277694702,
      "learning_rate": 6.906821820865429e-07,
      "loss": 0.007,
      "step": 2845620
    },
    {
      "epoch": 4.6569522724743555,
      "grad_norm": 0.2136872410774231,
      "learning_rate": 6.906162898730258e-07,
      "loss": 0.0131,
      "step": 2845640
    },
    {
      "epoch": 4.656985002913009,
      "grad_norm": 0.279218465089798,
      "learning_rate": 6.905503976595086e-07,
      "loss": 0.0096,
      "step": 2845660
    },
    {
      "epoch": 4.657017733351663,
      "grad_norm": 0.21613040566444397,
      "learning_rate": 6.904845054459914e-07,
      "loss": 0.0063,
      "step": 2845680
    },
    {
      "epoch": 4.657050463790315,
      "grad_norm": 0.1507381647825241,
      "learning_rate": 6.904186132324744e-07,
      "loss": 0.0078,
      "step": 2845700
    },
    {
      "epoch": 4.657083194228969,
      "grad_norm": 0.293834388256073,
      "learning_rate": 6.903527210189572e-07,
      "loss": 0.0051,
      "step": 2845720
    },
    {
      "epoch": 4.657115924667623,
      "grad_norm": 0.2572629749774933,
      "learning_rate": 6.902868288054401e-07,
      "loss": 0.0124,
      "step": 2845740
    },
    {
      "epoch": 4.657148655106276,
      "grad_norm": 0.2836958169937134,
      "learning_rate": 6.902209365919229e-07,
      "loss": 0.0077,
      "step": 2845760
    },
    {
      "epoch": 4.657181385544929,
      "grad_norm": 0.22675657272338867,
      "learning_rate": 6.901550443784058e-07,
      "loss": 0.0108,
      "step": 2845780
    },
    {
      "epoch": 4.6572141159835825,
      "grad_norm": 0.046002428978681564,
      "learning_rate": 6.900891521648887e-07,
      "loss": 0.0082,
      "step": 2845800
    },
    {
      "epoch": 4.657246846422236,
      "grad_norm": 0.0904969573020935,
      "learning_rate": 6.900232599513716e-07,
      "loss": 0.0085,
      "step": 2845820
    },
    {
      "epoch": 4.657279576860889,
      "grad_norm": 0.12184714525938034,
      "learning_rate": 6.899573677378544e-07,
      "loss": 0.0072,
      "step": 2845840
    },
    {
      "epoch": 4.657312307299542,
      "grad_norm": 0.31658151745796204,
      "learning_rate": 6.898914755243373e-07,
      "loss": 0.0123,
      "step": 2845860
    },
    {
      "epoch": 4.657345037738196,
      "grad_norm": 0.1690037101507187,
      "learning_rate": 6.898255833108203e-07,
      "loss": 0.0078,
      "step": 2845880
    },
    {
      "epoch": 4.657377768176849,
      "grad_norm": 0.13464953005313873,
      "learning_rate": 6.897596910973032e-07,
      "loss": 0.0096,
      "step": 2845900
    },
    {
      "epoch": 4.657410498615502,
      "grad_norm": 0.33099791407585144,
      "learning_rate": 6.896937988837859e-07,
      "loss": 0.0085,
      "step": 2845920
    },
    {
      "epoch": 4.657443229054156,
      "grad_norm": 0.05872562900185585,
      "learning_rate": 6.896279066702688e-07,
      "loss": 0.0093,
      "step": 2845940
    },
    {
      "epoch": 4.6574759594928095,
      "grad_norm": 0.2415725439786911,
      "learning_rate": 6.895620144567516e-07,
      "loss": 0.0092,
      "step": 2845960
    },
    {
      "epoch": 4.657508689931462,
      "grad_norm": 0.15776993334293365,
      "learning_rate": 6.894961222432347e-07,
      "loss": 0.0099,
      "step": 2845980
    },
    {
      "epoch": 4.657541420370116,
      "grad_norm": 0.5614669919013977,
      "learning_rate": 6.894302300297175e-07,
      "loss": 0.0101,
      "step": 2846000
    },
    {
      "epoch": 4.657574150808769,
      "grad_norm": 0.15594437718391418,
      "learning_rate": 6.893643378162003e-07,
      "loss": 0.0089,
      "step": 2846020
    },
    {
      "epoch": 4.657606881247423,
      "grad_norm": 0.1563175767660141,
      "learning_rate": 6.892984456026831e-07,
      "loss": 0.0083,
      "step": 2846040
    },
    {
      "epoch": 4.657639611686076,
      "grad_norm": 0.40257370471954346,
      "learning_rate": 6.892325533891661e-07,
      "loss": 0.0075,
      "step": 2846060
    },
    {
      "epoch": 4.657672342124729,
      "grad_norm": 0.2550152540206909,
      "learning_rate": 6.89166661175649e-07,
      "loss": 0.0081,
      "step": 2846080
    },
    {
      "epoch": 4.657705072563383,
      "grad_norm": 0.40063080191612244,
      "learning_rate": 6.891007689621318e-07,
      "loss": 0.0092,
      "step": 2846100
    },
    {
      "epoch": 4.6577378030020355,
      "grad_norm": 0.5879892110824585,
      "learning_rate": 6.890348767486147e-07,
      "loss": 0.0083,
      "step": 2846120
    },
    {
      "epoch": 4.657770533440689,
      "grad_norm": 0.19373953342437744,
      "learning_rate": 6.889689845350976e-07,
      "loss": 0.013,
      "step": 2846140
    },
    {
      "epoch": 4.657803263879343,
      "grad_norm": 0.3526958227157593,
      "learning_rate": 6.889030923215805e-07,
      "loss": 0.0084,
      "step": 2846160
    },
    {
      "epoch": 4.657835994317995,
      "grad_norm": 0.09650325775146484,
      "learning_rate": 6.888372001080633e-07,
      "loss": 0.0112,
      "step": 2846180
    },
    {
      "epoch": 4.657868724756649,
      "grad_norm": 0.1255643367767334,
      "learning_rate": 6.887713078945462e-07,
      "loss": 0.0087,
      "step": 2846200
    },
    {
      "epoch": 4.657901455195303,
      "grad_norm": 0.4149143397808075,
      "learning_rate": 6.88705415681029e-07,
      "loss": 0.0127,
      "step": 2846220
    },
    {
      "epoch": 4.657934185633956,
      "grad_norm": 0.29712003469467163,
      "learning_rate": 6.88639523467512e-07,
      "loss": 0.0115,
      "step": 2846240
    },
    {
      "epoch": 4.657966916072609,
      "grad_norm": 0.13808894157409668,
      "learning_rate": 6.885736312539948e-07,
      "loss": 0.0091,
      "step": 2846260
    },
    {
      "epoch": 4.6579996465112625,
      "grad_norm": 0.38123491406440735,
      "learning_rate": 6.885077390404777e-07,
      "loss": 0.0136,
      "step": 2846280
    },
    {
      "epoch": 4.658032376949916,
      "grad_norm": 0.1372329741716385,
      "learning_rate": 6.884418468269605e-07,
      "loss": 0.0117,
      "step": 2846300
    },
    {
      "epoch": 4.65806510738857,
      "grad_norm": 0.2625948190689087,
      "learning_rate": 6.883759546134435e-07,
      "loss": 0.0122,
      "step": 2846320
    },
    {
      "epoch": 4.658097837827222,
      "grad_norm": 0.20431150496006012,
      "learning_rate": 6.883100623999263e-07,
      "loss": 0.0101,
      "step": 2846340
    },
    {
      "epoch": 4.658130568265876,
      "grad_norm": 0.5506258010864258,
      "learning_rate": 6.882441701864091e-07,
      "loss": 0.0102,
      "step": 2846360
    },
    {
      "epoch": 4.65816329870453,
      "grad_norm": 0.13531190156936646,
      "learning_rate": 6.88178277972892e-07,
      "loss": 0.0065,
      "step": 2846380
    },
    {
      "epoch": 4.658196029143182,
      "grad_norm": 0.1334415227174759,
      "learning_rate": 6.881123857593748e-07,
      "loss": 0.0073,
      "step": 2846400
    },
    {
      "epoch": 4.658228759581836,
      "grad_norm": 0.3550303280353546,
      "learning_rate": 6.880464935458578e-07,
      "loss": 0.0047,
      "step": 2846420
    },
    {
      "epoch": 4.6582614900204895,
      "grad_norm": 0.25216928124427795,
      "learning_rate": 6.879806013323406e-07,
      "loss": 0.0065,
      "step": 2846440
    },
    {
      "epoch": 4.658294220459142,
      "grad_norm": 0.3390065133571625,
      "learning_rate": 6.879147091188235e-07,
      "loss": 0.0083,
      "step": 2846460
    },
    {
      "epoch": 4.658326950897796,
      "grad_norm": 0.23102372884750366,
      "learning_rate": 6.878488169053063e-07,
      "loss": 0.0071,
      "step": 2846480
    },
    {
      "epoch": 4.658359681336449,
      "grad_norm": 0.11676692217588425,
      "learning_rate": 6.877829246917893e-07,
      "loss": 0.0053,
      "step": 2846500
    },
    {
      "epoch": 4.658392411775103,
      "grad_norm": 0.07234469056129456,
      "learning_rate": 6.877170324782721e-07,
      "loss": 0.013,
      "step": 2846520
    },
    {
      "epoch": 4.658425142213756,
      "grad_norm": 0.11377400904893875,
      "learning_rate": 6.87651140264755e-07,
      "loss": 0.0079,
      "step": 2846540
    },
    {
      "epoch": 4.658457872652409,
      "grad_norm": 0.2727956771850586,
      "learning_rate": 6.875852480512378e-07,
      "loss": 0.0068,
      "step": 2846560
    },
    {
      "epoch": 4.658490603091063,
      "grad_norm": 0.23368103802204132,
      "learning_rate": 6.875193558377208e-07,
      "loss": 0.0085,
      "step": 2846580
    },
    {
      "epoch": 4.658523333529716,
      "grad_norm": 0.6970131397247314,
      "learning_rate": 6.874534636242036e-07,
      "loss": 0.0087,
      "step": 2846600
    },
    {
      "epoch": 4.658556063968369,
      "grad_norm": 0.25848832726478577,
      "learning_rate": 6.873875714106865e-07,
      "loss": 0.0066,
      "step": 2846620
    },
    {
      "epoch": 4.658588794407023,
      "grad_norm": 0.4470854103565216,
      "learning_rate": 6.873216791971693e-07,
      "loss": 0.0121,
      "step": 2846640
    },
    {
      "epoch": 4.658621524845676,
      "grad_norm": 0.17985601723194122,
      "learning_rate": 6.872557869836521e-07,
      "loss": 0.0112,
      "step": 2846660
    },
    {
      "epoch": 4.658654255284329,
      "grad_norm": 0.1444675773382187,
      "learning_rate": 6.871898947701351e-07,
      "loss": 0.0126,
      "step": 2846680
    },
    {
      "epoch": 4.658686985722983,
      "grad_norm": 0.33103427290916443,
      "learning_rate": 6.871240025566179e-07,
      "loss": 0.0088,
      "step": 2846700
    },
    {
      "epoch": 4.658719716161636,
      "grad_norm": 0.05988895520567894,
      "learning_rate": 6.870581103431008e-07,
      "loss": 0.007,
      "step": 2846720
    },
    {
      "epoch": 4.658752446600289,
      "grad_norm": 0.10871370881795883,
      "learning_rate": 6.869922181295836e-07,
      "loss": 0.0097,
      "step": 2846740
    },
    {
      "epoch": 4.658785177038943,
      "grad_norm": 0.08171688765287399,
      "learning_rate": 6.869263259160666e-07,
      "loss": 0.0082,
      "step": 2846760
    },
    {
      "epoch": 4.658817907477596,
      "grad_norm": 0.2634064555168152,
      "learning_rate": 6.868604337025494e-07,
      "loss": 0.0105,
      "step": 2846780
    },
    {
      "epoch": 4.658850637916249,
      "grad_norm": 0.23195841908454895,
      "learning_rate": 6.867945414890323e-07,
      "loss": 0.0091,
      "step": 2846800
    },
    {
      "epoch": 4.6588833683549025,
      "grad_norm": 0.2095983475446701,
      "learning_rate": 6.867286492755151e-07,
      "loss": 0.006,
      "step": 2846820
    },
    {
      "epoch": 4.658916098793556,
      "grad_norm": 0.2395653873682022,
      "learning_rate": 6.86662757061998e-07,
      "loss": 0.0053,
      "step": 2846840
    },
    {
      "epoch": 4.65894882923221,
      "grad_norm": 0.4044256806373596,
      "learning_rate": 6.865968648484809e-07,
      "loss": 0.0084,
      "step": 2846860
    },
    {
      "epoch": 4.658981559670862,
      "grad_norm": 0.23210491240024567,
      "learning_rate": 6.865309726349638e-07,
      "loss": 0.0127,
      "step": 2846880
    },
    {
      "epoch": 4.659014290109516,
      "grad_norm": 0.10239148139953613,
      "learning_rate": 6.864650804214466e-07,
      "loss": 0.0082,
      "step": 2846900
    },
    {
      "epoch": 4.6590470205481695,
      "grad_norm": 0.14798715710639954,
      "learning_rate": 6.863991882079295e-07,
      "loss": 0.0053,
      "step": 2846920
    },
    {
      "epoch": 4.659079750986823,
      "grad_norm": 0.33257514238357544,
      "learning_rate": 6.863332959944124e-07,
      "loss": 0.0102,
      "step": 2846940
    },
    {
      "epoch": 4.659112481425476,
      "grad_norm": 0.2968124449253082,
      "learning_rate": 6.862674037808953e-07,
      "loss": 0.0061,
      "step": 2846960
    },
    {
      "epoch": 4.659145211864129,
      "grad_norm": 0.3162476420402527,
      "learning_rate": 6.862015115673781e-07,
      "loss": 0.0059,
      "step": 2846980
    },
    {
      "epoch": 4.659177942302783,
      "grad_norm": 0.30954959988594055,
      "learning_rate": 6.86135619353861e-07,
      "loss": 0.0091,
      "step": 2847000
    },
    {
      "epoch": 4.659210672741436,
      "grad_norm": 0.4207581579685211,
      "learning_rate": 6.860697271403439e-07,
      "loss": 0.0105,
      "step": 2847020
    },
    {
      "epoch": 4.659243403180089,
      "grad_norm": 0.8382421135902405,
      "learning_rate": 6.860038349268267e-07,
      "loss": 0.0115,
      "step": 2847040
    },
    {
      "epoch": 4.659276133618743,
      "grad_norm": 0.16159550845623016,
      "learning_rate": 6.859379427133096e-07,
      "loss": 0.0077,
      "step": 2847060
    },
    {
      "epoch": 4.659308864057396,
      "grad_norm": 0.05676446855068207,
      "learning_rate": 6.858720504997924e-07,
      "loss": 0.007,
      "step": 2847080
    },
    {
      "epoch": 4.659341594496049,
      "grad_norm": 0.23695306479930878,
      "learning_rate": 6.858061582862753e-07,
      "loss": 0.007,
      "step": 2847100
    },
    {
      "epoch": 4.659374324934703,
      "grad_norm": 0.09468978643417358,
      "learning_rate": 6.857402660727582e-07,
      "loss": 0.0126,
      "step": 2847120
    },
    {
      "epoch": 4.659407055373356,
      "grad_norm": 0.39711517095565796,
      "learning_rate": 6.856743738592411e-07,
      "loss": 0.0098,
      "step": 2847140
    },
    {
      "epoch": 4.659439785812009,
      "grad_norm": 0.3296378552913666,
      "learning_rate": 6.856084816457239e-07,
      "loss": 0.0115,
      "step": 2847160
    },
    {
      "epoch": 4.659472516250663,
      "grad_norm": 0.27747562527656555,
      "learning_rate": 6.855425894322068e-07,
      "loss": 0.0105,
      "step": 2847180
    },
    {
      "epoch": 4.659505246689316,
      "grad_norm": 0.5305343866348267,
      "learning_rate": 6.854766972186898e-07,
      "loss": 0.0083,
      "step": 2847200
    },
    {
      "epoch": 4.65953797712797,
      "grad_norm": 0.08699651062488556,
      "learning_rate": 6.854108050051727e-07,
      "loss": 0.0111,
      "step": 2847220
    },
    {
      "epoch": 4.659570707566623,
      "grad_norm": 0.13675081729888916,
      "learning_rate": 6.853449127916554e-07,
      "loss": 0.0078,
      "step": 2847240
    },
    {
      "epoch": 4.659603438005276,
      "grad_norm": 0.0883190706372261,
      "learning_rate": 6.852790205781383e-07,
      "loss": 0.0061,
      "step": 2847260
    },
    {
      "epoch": 4.65963616844393,
      "grad_norm": 0.14289431273937225,
      "learning_rate": 6.852131283646211e-07,
      "loss": 0.0056,
      "step": 2847280
    },
    {
      "epoch": 4.6596688988825825,
      "grad_norm": 0.04756869375705719,
      "learning_rate": 6.851472361511042e-07,
      "loss": 0.0065,
      "step": 2847300
    },
    {
      "epoch": 4.659701629321236,
      "grad_norm": 0.10501784086227417,
      "learning_rate": 6.85081343937587e-07,
      "loss": 0.0077,
      "step": 2847320
    },
    {
      "epoch": 4.65973435975989,
      "grad_norm": 0.19147150218486786,
      "learning_rate": 6.850154517240699e-07,
      "loss": 0.0097,
      "step": 2847340
    },
    {
      "epoch": 4.659767090198542,
      "grad_norm": 0.15517784655094147,
      "learning_rate": 6.849495595105526e-07,
      "loss": 0.0092,
      "step": 2847360
    },
    {
      "epoch": 4.659799820637196,
      "grad_norm": 0.2812705636024475,
      "learning_rate": 6.848836672970357e-07,
      "loss": 0.0094,
      "step": 2847380
    },
    {
      "epoch": 4.65983255107585,
      "grad_norm": 0.12667116522789001,
      "learning_rate": 6.848177750835185e-07,
      "loss": 0.0083,
      "step": 2847400
    },
    {
      "epoch": 4.659865281514503,
      "grad_norm": 0.1302473098039627,
      "learning_rate": 6.847518828700013e-07,
      "loss": 0.0071,
      "step": 2847420
    },
    {
      "epoch": 4.659898011953156,
      "grad_norm": 0.44185692071914673,
      "learning_rate": 6.846859906564842e-07,
      "loss": 0.0068,
      "step": 2847440
    },
    {
      "epoch": 4.6599307423918095,
      "grad_norm": 0.4989844560623169,
      "learning_rate": 6.846200984429671e-07,
      "loss": 0.0083,
      "step": 2847460
    },
    {
      "epoch": 4.659963472830463,
      "grad_norm": 0.3256266415119171,
      "learning_rate": 6.8455420622945e-07,
      "loss": 0.0099,
      "step": 2847480
    },
    {
      "epoch": 4.659996203269117,
      "grad_norm": 0.18184269964694977,
      "learning_rate": 6.844883140159328e-07,
      "loss": 0.0092,
      "step": 2847500
    },
    {
      "epoch": 4.660028933707769,
      "grad_norm": 0.15978208184242249,
      "learning_rate": 6.844224218024157e-07,
      "loss": 0.012,
      "step": 2847520
    },
    {
      "epoch": 4.660061664146423,
      "grad_norm": 0.17032170295715332,
      "learning_rate": 6.843565295888985e-07,
      "loss": 0.0111,
      "step": 2847540
    },
    {
      "epoch": 4.660094394585077,
      "grad_norm": 0.740186870098114,
      "learning_rate": 6.842906373753815e-07,
      "loss": 0.0095,
      "step": 2847560
    },
    {
      "epoch": 4.660127125023729,
      "grad_norm": 0.36445894837379456,
      "learning_rate": 6.842247451618643e-07,
      "loss": 0.0091,
      "step": 2847580
    },
    {
      "epoch": 4.660159855462383,
      "grad_norm": 0.45970162749290466,
      "learning_rate": 6.841588529483472e-07,
      "loss": 0.0088,
      "step": 2847600
    },
    {
      "epoch": 4.6601925859010365,
      "grad_norm": 0.126989483833313,
      "learning_rate": 6.8409296073483e-07,
      "loss": 0.0101,
      "step": 2847620
    },
    {
      "epoch": 4.660225316339689,
      "grad_norm": 0.7874076962471008,
      "learning_rate": 6.84027068521313e-07,
      "loss": 0.0129,
      "step": 2847640
    },
    {
      "epoch": 4.660258046778343,
      "grad_norm": 0.1424713134765625,
      "learning_rate": 6.839611763077958e-07,
      "loss": 0.011,
      "step": 2847660
    },
    {
      "epoch": 4.660290777216996,
      "grad_norm": 0.10762955248355865,
      "learning_rate": 6.838952840942787e-07,
      "loss": 0.0061,
      "step": 2847680
    },
    {
      "epoch": 4.66032350765565,
      "grad_norm": 0.19277726113796234,
      "learning_rate": 6.838293918807615e-07,
      "loss": 0.008,
      "step": 2847700
    },
    {
      "epoch": 4.660356238094303,
      "grad_norm": 0.38131558895111084,
      "learning_rate": 6.837634996672443e-07,
      "loss": 0.0088,
      "step": 2847720
    },
    {
      "epoch": 4.660388968532956,
      "grad_norm": 0.27956080436706543,
      "learning_rate": 6.836976074537273e-07,
      "loss": 0.0049,
      "step": 2847740
    },
    {
      "epoch": 4.66042169897161,
      "grad_norm": 0.1160229817032814,
      "learning_rate": 6.836317152402101e-07,
      "loss": 0.0094,
      "step": 2847760
    },
    {
      "epoch": 4.660454429410263,
      "grad_norm": 0.15816548466682434,
      "learning_rate": 6.83565823026693e-07,
      "loss": 0.0057,
      "step": 2847780
    },
    {
      "epoch": 4.660487159848916,
      "grad_norm": 0.14548516273498535,
      "learning_rate": 6.834999308131758e-07,
      "loss": 0.0131,
      "step": 2847800
    },
    {
      "epoch": 4.66051989028757,
      "grad_norm": 0.15829679369926453,
      "learning_rate": 6.834340385996588e-07,
      "loss": 0.0118,
      "step": 2847820
    },
    {
      "epoch": 4.660552620726223,
      "grad_norm": 0.090055912733078,
      "learning_rate": 6.833681463861416e-07,
      "loss": 0.0078,
      "step": 2847840
    },
    {
      "epoch": 4.660585351164876,
      "grad_norm": 0.40677645802497864,
      "learning_rate": 6.833022541726245e-07,
      "loss": 0.0069,
      "step": 2847860
    },
    {
      "epoch": 4.66061808160353,
      "grad_norm": 0.22058777511119843,
      "learning_rate": 6.832363619591073e-07,
      "loss": 0.0089,
      "step": 2847880
    },
    {
      "epoch": 4.660650812042183,
      "grad_norm": 0.1915706843137741,
      "learning_rate": 6.831704697455903e-07,
      "loss": 0.0086,
      "step": 2847900
    },
    {
      "epoch": 4.660683542480836,
      "grad_norm": 0.11685549467802048,
      "learning_rate": 6.831045775320731e-07,
      "loss": 0.0097,
      "step": 2847920
    },
    {
      "epoch": 4.6607162729194895,
      "grad_norm": 0.46597129106521606,
      "learning_rate": 6.83038685318556e-07,
      "loss": 0.0066,
      "step": 2847940
    },
    {
      "epoch": 4.660749003358143,
      "grad_norm": 0.18439915776252747,
      "learning_rate": 6.829727931050388e-07,
      "loss": 0.0091,
      "step": 2847960
    },
    {
      "epoch": 4.660781733796797,
      "grad_norm": 0.13287535309791565,
      "learning_rate": 6.829069008915217e-07,
      "loss": 0.0077,
      "step": 2847980
    },
    {
      "epoch": 4.660814464235449,
      "grad_norm": 0.24867330491542816,
      "learning_rate": 6.828410086780046e-07,
      "loss": 0.0127,
      "step": 2848000
    },
    {
      "epoch": 4.660847194674103,
      "grad_norm": 0.16539058089256287,
      "learning_rate": 6.827751164644875e-07,
      "loss": 0.0093,
      "step": 2848020
    },
    {
      "epoch": 4.660879925112757,
      "grad_norm": 0.41961169242858887,
      "learning_rate": 6.827092242509703e-07,
      "loss": 0.008,
      "step": 2848040
    },
    {
      "epoch": 4.660912655551409,
      "grad_norm": 0.17275741696357727,
      "learning_rate": 6.826433320374531e-07,
      "loss": 0.0128,
      "step": 2848060
    },
    {
      "epoch": 4.660945385990063,
      "grad_norm": 0.06068376079201698,
      "learning_rate": 6.825774398239361e-07,
      "loss": 0.0069,
      "step": 2848080
    },
    {
      "epoch": 4.6609781164287165,
      "grad_norm": 0.28757375478744507,
      "learning_rate": 6.825115476104189e-07,
      "loss": 0.0083,
      "step": 2848100
    },
    {
      "epoch": 4.66101084686737,
      "grad_norm": 0.32253095507621765,
      "learning_rate": 6.824456553969018e-07,
      "loss": 0.0128,
      "step": 2848120
    },
    {
      "epoch": 4.661043577306023,
      "grad_norm": 0.12984542548656464,
      "learning_rate": 6.823797631833846e-07,
      "loss": 0.0098,
      "step": 2848140
    },
    {
      "epoch": 4.661076307744676,
      "grad_norm": 0.4026806652545929,
      "learning_rate": 6.823138709698675e-07,
      "loss": 0.0065,
      "step": 2848160
    },
    {
      "epoch": 4.66110903818333,
      "grad_norm": 0.09331844002008438,
      "learning_rate": 6.822479787563504e-07,
      "loss": 0.0087,
      "step": 2848180
    },
    {
      "epoch": 4.661141768621983,
      "grad_norm": 0.4494714140892029,
      "learning_rate": 6.821820865428333e-07,
      "loss": 0.0114,
      "step": 2848200
    },
    {
      "epoch": 4.661174499060636,
      "grad_norm": 0.21550330519676208,
      "learning_rate": 6.821161943293161e-07,
      "loss": 0.0096,
      "step": 2848220
    },
    {
      "epoch": 4.66120722949929,
      "grad_norm": 0.30333608388900757,
      "learning_rate": 6.82050302115799e-07,
      "loss": 0.005,
      "step": 2848240
    },
    {
      "epoch": 4.661239959937943,
      "grad_norm": 0.11492154002189636,
      "learning_rate": 6.819844099022819e-07,
      "loss": 0.0074,
      "step": 2848260
    },
    {
      "epoch": 4.661272690376596,
      "grad_norm": 0.04406863451004028,
      "learning_rate": 6.819185176887648e-07,
      "loss": 0.0114,
      "step": 2848280
    },
    {
      "epoch": 4.66130542081525,
      "grad_norm": 0.5977094173431396,
      "learning_rate": 6.818526254752476e-07,
      "loss": 0.0098,
      "step": 2848300
    },
    {
      "epoch": 4.661338151253903,
      "grad_norm": 0.13658392429351807,
      "learning_rate": 6.817867332617305e-07,
      "loss": 0.0066,
      "step": 2848320
    },
    {
      "epoch": 4.661370881692556,
      "grad_norm": 0.19342240691184998,
      "learning_rate": 6.817208410482134e-07,
      "loss": 0.0051,
      "step": 2848340
    },
    {
      "epoch": 4.66140361213121,
      "grad_norm": 0.08047047257423401,
      "learning_rate": 6.816549488346963e-07,
      "loss": 0.0093,
      "step": 2848360
    },
    {
      "epoch": 4.661436342569863,
      "grad_norm": 0.21541094779968262,
      "learning_rate": 6.815890566211791e-07,
      "loss": 0.008,
      "step": 2848380
    },
    {
      "epoch": 4.661469073008517,
      "grad_norm": 0.17944049835205078,
      "learning_rate": 6.815231644076619e-07,
      "loss": 0.0084,
      "step": 2848400
    },
    {
      "epoch": 4.66150180344717,
      "grad_norm": 0.11624753475189209,
      "learning_rate": 6.814572721941448e-07,
      "loss": 0.0082,
      "step": 2848420
    },
    {
      "epoch": 4.661534533885823,
      "grad_norm": 0.31837940216064453,
      "learning_rate": 6.813913799806278e-07,
      "loss": 0.0064,
      "step": 2848440
    },
    {
      "epoch": 4.661567264324477,
      "grad_norm": 0.20792491734027863,
      "learning_rate": 6.813254877671106e-07,
      "loss": 0.0072,
      "step": 2848460
    },
    {
      "epoch": 4.6615999947631295,
      "grad_norm": 0.2044624537229538,
      "learning_rate": 6.812595955535934e-07,
      "loss": 0.0081,
      "step": 2848480
    },
    {
      "epoch": 4.661632725201783,
      "grad_norm": 0.24155758321285248,
      "learning_rate": 6.811937033400763e-07,
      "loss": 0.0106,
      "step": 2848500
    },
    {
      "epoch": 4.661665455640437,
      "grad_norm": 0.05735854059457779,
      "learning_rate": 6.811278111265593e-07,
      "loss": 0.0065,
      "step": 2848520
    },
    {
      "epoch": 4.661698186079089,
      "grad_norm": 0.24364593625068665,
      "learning_rate": 6.810619189130422e-07,
      "loss": 0.0118,
      "step": 2848540
    },
    {
      "epoch": 4.661730916517743,
      "grad_norm": 0.10757305473089218,
      "learning_rate": 6.809960266995249e-07,
      "loss": 0.0071,
      "step": 2848560
    },
    {
      "epoch": 4.661763646956397,
      "grad_norm": 0.16005635261535645,
      "learning_rate": 6.809301344860078e-07,
      "loss": 0.0082,
      "step": 2848580
    },
    {
      "epoch": 4.66179637739505,
      "grad_norm": 0.42834895849227905,
      "learning_rate": 6.808642422724906e-07,
      "loss": 0.0054,
      "step": 2848600
    },
    {
      "epoch": 4.661829107833703,
      "grad_norm": 0.09960830956697464,
      "learning_rate": 6.807983500589737e-07,
      "loss": 0.009,
      "step": 2848620
    },
    {
      "epoch": 4.6618618382723565,
      "grad_norm": 0.10131968557834625,
      "learning_rate": 6.807324578454565e-07,
      "loss": 0.0072,
      "step": 2848640
    },
    {
      "epoch": 4.66189456871101,
      "grad_norm": 0.1426944136619568,
      "learning_rate": 6.806665656319394e-07,
      "loss": 0.0098,
      "step": 2848660
    },
    {
      "epoch": 4.661927299149664,
      "grad_norm": 0.41303080320358276,
      "learning_rate": 6.806006734184221e-07,
      "loss": 0.0077,
      "step": 2848680
    },
    {
      "epoch": 4.661960029588316,
      "grad_norm": 0.2687167227268219,
      "learning_rate": 6.805347812049052e-07,
      "loss": 0.0081,
      "step": 2848700
    },
    {
      "epoch": 4.66199276002697,
      "grad_norm": 0.0932195633649826,
      "learning_rate": 6.80468888991388e-07,
      "loss": 0.0084,
      "step": 2848720
    },
    {
      "epoch": 4.6620254904656235,
      "grad_norm": 0.20799635350704193,
      "learning_rate": 6.804029967778708e-07,
      "loss": 0.0091,
      "step": 2848740
    },
    {
      "epoch": 4.662058220904276,
      "grad_norm": 0.18423083424568176,
      "learning_rate": 6.803371045643537e-07,
      "loss": 0.0106,
      "step": 2848760
    },
    {
      "epoch": 4.66209095134293,
      "grad_norm": 0.27225035429000854,
      "learning_rate": 6.802712123508366e-07,
      "loss": 0.0125,
      "step": 2848780
    },
    {
      "epoch": 4.662123681781583,
      "grad_norm": 0.06709166616201401,
      "learning_rate": 6.802053201373195e-07,
      "loss": 0.009,
      "step": 2848800
    },
    {
      "epoch": 4.662156412220236,
      "grad_norm": 0.1499486267566681,
      "learning_rate": 6.801394279238023e-07,
      "loss": 0.0051,
      "step": 2848820
    },
    {
      "epoch": 4.66218914265889,
      "grad_norm": 0.6284943222999573,
      "learning_rate": 6.800735357102852e-07,
      "loss": 0.0107,
      "step": 2848840
    },
    {
      "epoch": 4.662221873097543,
      "grad_norm": 0.14698763191699982,
      "learning_rate": 6.80007643496768e-07,
      "loss": 0.0075,
      "step": 2848860
    },
    {
      "epoch": 4.662254603536197,
      "grad_norm": 0.14649136364459991,
      "learning_rate": 6.79941751283251e-07,
      "loss": 0.0095,
      "step": 2848880
    },
    {
      "epoch": 4.66228733397485,
      "grad_norm": 0.28870537877082825,
      "learning_rate": 6.798758590697338e-07,
      "loss": 0.0086,
      "step": 2848900
    },
    {
      "epoch": 4.662320064413503,
      "grad_norm": 0.22480690479278564,
      "learning_rate": 6.798099668562167e-07,
      "loss": 0.0116,
      "step": 2848920
    },
    {
      "epoch": 4.662352794852157,
      "grad_norm": 0.5806401968002319,
      "learning_rate": 6.797440746426995e-07,
      "loss": 0.0112,
      "step": 2848940
    },
    {
      "epoch": 4.66238552529081,
      "grad_norm": 0.09840027987957001,
      "learning_rate": 6.796781824291825e-07,
      "loss": 0.0181,
      "step": 2848960
    },
    {
      "epoch": 4.662418255729463,
      "grad_norm": 0.07949917763471603,
      "learning_rate": 6.796122902156653e-07,
      "loss": 0.0073,
      "step": 2848980
    },
    {
      "epoch": 4.662450986168117,
      "grad_norm": 0.10520989447832108,
      "learning_rate": 6.795463980021482e-07,
      "loss": 0.0065,
      "step": 2849000
    },
    {
      "epoch": 4.66248371660677,
      "grad_norm": 0.21910080313682556,
      "learning_rate": 6.79480505788631e-07,
      "loss": 0.0101,
      "step": 2849020
    },
    {
      "epoch": 4.662516447045423,
      "grad_norm": 0.2877531945705414,
      "learning_rate": 6.794146135751138e-07,
      "loss": 0.0125,
      "step": 2849040
    },
    {
      "epoch": 4.662549177484077,
      "grad_norm": 0.07366207242012024,
      "learning_rate": 6.793487213615968e-07,
      "loss": 0.0074,
      "step": 2849060
    },
    {
      "epoch": 4.66258190792273,
      "grad_norm": 0.3451683223247528,
      "learning_rate": 6.792828291480796e-07,
      "loss": 0.0088,
      "step": 2849080
    },
    {
      "epoch": 4.662614638361383,
      "grad_norm": 0.2200060486793518,
      "learning_rate": 6.792169369345625e-07,
      "loss": 0.0071,
      "step": 2849100
    },
    {
      "epoch": 4.6626473688000365,
      "grad_norm": 0.13375040888786316,
      "learning_rate": 6.791510447210453e-07,
      "loss": 0.0082,
      "step": 2849120
    },
    {
      "epoch": 4.66268009923869,
      "grad_norm": 0.19668257236480713,
      "learning_rate": 6.790851525075283e-07,
      "loss": 0.009,
      "step": 2849140
    },
    {
      "epoch": 4.662712829677344,
      "grad_norm": 0.20285560190677643,
      "learning_rate": 6.790192602940111e-07,
      "loss": 0.0081,
      "step": 2849160
    },
    {
      "epoch": 4.662745560115996,
      "grad_norm": 0.6076560616493225,
      "learning_rate": 6.78953368080494e-07,
      "loss": 0.0079,
      "step": 2849180
    },
    {
      "epoch": 4.66277829055465,
      "grad_norm": 0.49065956473350525,
      "learning_rate": 6.788874758669768e-07,
      "loss": 0.0092,
      "step": 2849200
    },
    {
      "epoch": 4.662811020993304,
      "grad_norm": 0.32437264919281006,
      "learning_rate": 6.788215836534598e-07,
      "loss": 0.0109,
      "step": 2849220
    },
    {
      "epoch": 4.662843751431957,
      "grad_norm": 0.22828194499015808,
      "learning_rate": 6.787556914399426e-07,
      "loss": 0.007,
      "step": 2849240
    },
    {
      "epoch": 4.66287648187061,
      "grad_norm": 0.30640363693237305,
      "learning_rate": 6.786897992264255e-07,
      "loss": 0.0071,
      "step": 2849260
    },
    {
      "epoch": 4.6629092123092635,
      "grad_norm": 0.14620521664619446,
      "learning_rate": 6.786239070129083e-07,
      "loss": 0.0121,
      "step": 2849280
    },
    {
      "epoch": 4.662941942747917,
      "grad_norm": 0.18852849304676056,
      "learning_rate": 6.785580147993912e-07,
      "loss": 0.005,
      "step": 2849300
    },
    {
      "epoch": 4.66297467318657,
      "grad_norm": 0.4486391246318817,
      "learning_rate": 6.784921225858741e-07,
      "loss": 0.0068,
      "step": 2849320
    },
    {
      "epoch": 4.663007403625223,
      "grad_norm": 0.2743198275566101,
      "learning_rate": 6.78426230372357e-07,
      "loss": 0.0134,
      "step": 2849340
    },
    {
      "epoch": 4.663040134063877,
      "grad_norm": 0.21390558779239655,
      "learning_rate": 6.783603381588398e-07,
      "loss": 0.0088,
      "step": 2849360
    },
    {
      "epoch": 4.66307286450253,
      "grad_norm": 0.5805826187133789,
      "learning_rate": 6.782944459453226e-07,
      "loss": 0.0097,
      "step": 2849380
    },
    {
      "epoch": 4.663105594941183,
      "grad_norm": 0.1467423290014267,
      "learning_rate": 6.782285537318056e-07,
      "loss": 0.0099,
      "step": 2849400
    },
    {
      "epoch": 4.663138325379837,
      "grad_norm": 0.5577430725097656,
      "learning_rate": 6.781626615182884e-07,
      "loss": 0.0116,
      "step": 2849420
    },
    {
      "epoch": 4.6631710558184905,
      "grad_norm": 0.09546799957752228,
      "learning_rate": 6.780967693047713e-07,
      "loss": 0.0109,
      "step": 2849440
    },
    {
      "epoch": 4.663203786257143,
      "grad_norm": 0.2258131355047226,
      "learning_rate": 6.780308770912541e-07,
      "loss": 0.0081,
      "step": 2849460
    },
    {
      "epoch": 4.663236516695797,
      "grad_norm": 0.7710134983062744,
      "learning_rate": 6.77964984877737e-07,
      "loss": 0.0104,
      "step": 2849480
    },
    {
      "epoch": 4.66326924713445,
      "grad_norm": 0.4002075493335724,
      "learning_rate": 6.778990926642199e-07,
      "loss": 0.0085,
      "step": 2849500
    },
    {
      "epoch": 4.663301977573103,
      "grad_norm": 0.29638564586639404,
      "learning_rate": 6.778332004507028e-07,
      "loss": 0.0102,
      "step": 2849520
    },
    {
      "epoch": 4.663334708011757,
      "grad_norm": 0.34056752920150757,
      "learning_rate": 6.777673082371856e-07,
      "loss": 0.0118,
      "step": 2849540
    },
    {
      "epoch": 4.66336743845041,
      "grad_norm": 0.08505385369062424,
      "learning_rate": 6.777014160236685e-07,
      "loss": 0.0082,
      "step": 2849560
    },
    {
      "epoch": 4.663400168889064,
      "grad_norm": 0.63749760389328,
      "learning_rate": 6.776355238101514e-07,
      "loss": 0.0121,
      "step": 2849580
    },
    {
      "epoch": 4.6634328993277165,
      "grad_norm": 0.10105713456869125,
      "learning_rate": 6.775696315966343e-07,
      "loss": 0.0136,
      "step": 2849600
    },
    {
      "epoch": 4.66346562976637,
      "grad_norm": 0.6706452965736389,
      "learning_rate": 6.775037393831171e-07,
      "loss": 0.0104,
      "step": 2849620
    },
    {
      "epoch": 4.663498360205024,
      "grad_norm": 0.11809176951646805,
      "learning_rate": 6.774378471696e-07,
      "loss": 0.0106,
      "step": 2849640
    },
    {
      "epoch": 4.663531090643676,
      "grad_norm": 0.2528870403766632,
      "learning_rate": 6.773719549560829e-07,
      "loss": 0.0074,
      "step": 2849660
    },
    {
      "epoch": 4.66356382108233,
      "grad_norm": 0.12366071343421936,
      "learning_rate": 6.773060627425658e-07,
      "loss": 0.0087,
      "step": 2849680
    },
    {
      "epoch": 4.663596551520984,
      "grad_norm": 0.6070687174797058,
      "learning_rate": 6.772401705290486e-07,
      "loss": 0.0086,
      "step": 2849700
    },
    {
      "epoch": 4.663629281959636,
      "grad_norm": 0.06585665047168732,
      "learning_rate": 6.771742783155314e-07,
      "loss": 0.0117,
      "step": 2849720
    },
    {
      "epoch": 4.66366201239829,
      "grad_norm": 0.14964614808559418,
      "learning_rate": 6.771083861020143e-07,
      "loss": 0.0086,
      "step": 2849740
    },
    {
      "epoch": 4.6636947428369435,
      "grad_norm": 0.09448954463005066,
      "learning_rate": 6.770424938884974e-07,
      "loss": 0.0121,
      "step": 2849760
    },
    {
      "epoch": 4.663727473275597,
      "grad_norm": 0.14035801589488983,
      "learning_rate": 6.769766016749801e-07,
      "loss": 0.0084,
      "step": 2849780
    },
    {
      "epoch": 4.66376020371425,
      "grad_norm": 0.11072129011154175,
      "learning_rate": 6.769107094614629e-07,
      "loss": 0.01,
      "step": 2849800
    },
    {
      "epoch": 4.663792934152903,
      "grad_norm": 0.43633604049682617,
      "learning_rate": 6.768448172479458e-07,
      "loss": 0.0103,
      "step": 2849820
    },
    {
      "epoch": 4.663825664591557,
      "grad_norm": 0.2966322898864746,
      "learning_rate": 6.767789250344288e-07,
      "loss": 0.0131,
      "step": 2849840
    },
    {
      "epoch": 4.663858395030211,
      "grad_norm": 0.29925087094306946,
      "learning_rate": 6.767130328209117e-07,
      "loss": 0.0073,
      "step": 2849860
    },
    {
      "epoch": 4.663891125468863,
      "grad_norm": 0.7368812561035156,
      "learning_rate": 6.766471406073944e-07,
      "loss": 0.0065,
      "step": 2849880
    },
    {
      "epoch": 4.663923855907517,
      "grad_norm": 0.42592477798461914,
      "learning_rate": 6.765812483938773e-07,
      "loss": 0.0159,
      "step": 2849900
    },
    {
      "epoch": 4.6639565863461705,
      "grad_norm": 0.5519406199455261,
      "learning_rate": 6.765153561803601e-07,
      "loss": 0.0127,
      "step": 2849920
    },
    {
      "epoch": 4.663989316784823,
      "grad_norm": 0.20044134557247162,
      "learning_rate": 6.764494639668432e-07,
      "loss": 0.008,
      "step": 2849940
    },
    {
      "epoch": 4.664022047223477,
      "grad_norm": 0.10420014709234238,
      "learning_rate": 6.76383571753326e-07,
      "loss": 0.0088,
      "step": 2849960
    },
    {
      "epoch": 4.66405477766213,
      "grad_norm": 0.3912297785282135,
      "learning_rate": 6.763176795398089e-07,
      "loss": 0.0085,
      "step": 2849980
    },
    {
      "epoch": 4.664087508100783,
      "grad_norm": 0.17726443707942963,
      "learning_rate": 6.762517873262916e-07,
      "loss": 0.0078,
      "step": 2850000
    },
    {
      "epoch": 4.664087508100783,
      "eval_loss": 0.005722072906792164,
      "eval_runtime": 6474.6533,
      "eval_samples_per_second": 158.751,
      "eval_steps_per_second": 15.875,
      "eval_sts-dev_pearson_cosine": 0.9870701957725454,
      "eval_sts-dev_spearman_cosine": 0.8969048544219508,
      "step": 2850000
    },
    {
      "epoch": 4.664120238539437,
      "grad_norm": 0.05325780436396599,
      "learning_rate": 6.761858951127747e-07,
      "loss": 0.011,
      "step": 2850020
    },
    {
      "epoch": 4.66415296897809,
      "grad_norm": 0.4465458393096924,
      "learning_rate": 6.761200028992575e-07,
      "loss": 0.0113,
      "step": 2850040
    },
    {
      "epoch": 4.664185699416744,
      "grad_norm": 0.2930390238761902,
      "learning_rate": 6.760541106857404e-07,
      "loss": 0.009,
      "step": 2850060
    },
    {
      "epoch": 4.664218429855397,
      "grad_norm": 0.13754254579544067,
      "learning_rate": 6.759882184722232e-07,
      "loss": 0.0078,
      "step": 2850080
    },
    {
      "epoch": 4.66425116029405,
      "grad_norm": 0.2693522274494171,
      "learning_rate": 6.759223262587062e-07,
      "loss": 0.01,
      "step": 2850100
    },
    {
      "epoch": 4.664283890732704,
      "grad_norm": 0.25282585620880127,
      "learning_rate": 6.75856434045189e-07,
      "loss": 0.0102,
      "step": 2850120
    },
    {
      "epoch": 4.664316621171357,
      "grad_norm": 0.1242341548204422,
      "learning_rate": 6.757905418316718e-07,
      "loss": 0.0058,
      "step": 2850140
    },
    {
      "epoch": 4.66434935161001,
      "grad_norm": 0.2087043970823288,
      "learning_rate": 6.757246496181547e-07,
      "loss": 0.0092,
      "step": 2850160
    },
    {
      "epoch": 4.664382082048664,
      "grad_norm": 0.24146077036857605,
      "learning_rate": 6.756587574046375e-07,
      "loss": 0.0079,
      "step": 2850180
    },
    {
      "epoch": 4.664414812487317,
      "grad_norm": 0.0332949236035347,
      "learning_rate": 6.755928651911205e-07,
      "loss": 0.0109,
      "step": 2850200
    },
    {
      "epoch": 4.66444754292597,
      "grad_norm": 0.050028569996356964,
      "learning_rate": 6.755269729776033e-07,
      "loss": 0.0066,
      "step": 2850220
    },
    {
      "epoch": 4.664480273364624,
      "grad_norm": 0.24394269287586212,
      "learning_rate": 6.754610807640862e-07,
      "loss": 0.0093,
      "step": 2850240
    },
    {
      "epoch": 4.664513003803277,
      "grad_norm": 0.3344416320323944,
      "learning_rate": 6.75395188550569e-07,
      "loss": 0.0112,
      "step": 2850260
    },
    {
      "epoch": 4.66454573424193,
      "grad_norm": 0.17504020035266876,
      "learning_rate": 6.75329296337052e-07,
      "loss": 0.0087,
      "step": 2850280
    },
    {
      "epoch": 4.6645784646805835,
      "grad_norm": 0.12943652272224426,
      "learning_rate": 6.752634041235348e-07,
      "loss": 0.0075,
      "step": 2850300
    },
    {
      "epoch": 4.664611195119237,
      "grad_norm": 0.17274950444698334,
      "learning_rate": 6.751975119100177e-07,
      "loss": 0.0085,
      "step": 2850320
    },
    {
      "epoch": 4.664643925557891,
      "grad_norm": 0.14685994386672974,
      "learning_rate": 6.751316196965005e-07,
      "loss": 0.0092,
      "step": 2850340
    },
    {
      "epoch": 4.664676655996543,
      "grad_norm": 0.15836042165756226,
      "learning_rate": 6.750657274829834e-07,
      "loss": 0.0074,
      "step": 2850360
    },
    {
      "epoch": 4.664709386435197,
      "grad_norm": 0.14245744049549103,
      "learning_rate": 6.749998352694663e-07,
      "loss": 0.0069,
      "step": 2850380
    },
    {
      "epoch": 4.6647421168738505,
      "grad_norm": 0.12687872350215912,
      "learning_rate": 6.749339430559492e-07,
      "loss": 0.0098,
      "step": 2850400
    },
    {
      "epoch": 4.664774847312504,
      "grad_norm": 0.12184837460517883,
      "learning_rate": 6.74868050842432e-07,
      "loss": 0.0069,
      "step": 2850420
    },
    {
      "epoch": 4.664807577751157,
      "grad_norm": 0.33557137846946716,
      "learning_rate": 6.748021586289148e-07,
      "loss": 0.0099,
      "step": 2850440
    },
    {
      "epoch": 4.66484030818981,
      "grad_norm": 0.14165319502353668,
      "learning_rate": 6.747362664153978e-07,
      "loss": 0.0098,
      "step": 2850460
    },
    {
      "epoch": 4.664873038628464,
      "grad_norm": 0.06613567471504211,
      "learning_rate": 6.746703742018806e-07,
      "loss": 0.0056,
      "step": 2850480
    },
    {
      "epoch": 4.664905769067117,
      "grad_norm": 0.4094208776950836,
      "learning_rate": 6.746044819883635e-07,
      "loss": 0.0078,
      "step": 2850500
    },
    {
      "epoch": 4.66493849950577,
      "grad_norm": 0.29041963815689087,
      "learning_rate": 6.745385897748463e-07,
      "loss": 0.0123,
      "step": 2850520
    },
    {
      "epoch": 4.664971229944424,
      "grad_norm": 0.07391154021024704,
      "learning_rate": 6.744726975613293e-07,
      "loss": 0.0071,
      "step": 2850540
    },
    {
      "epoch": 4.665003960383077,
      "grad_norm": 0.607366681098938,
      "learning_rate": 6.744068053478121e-07,
      "loss": 0.0108,
      "step": 2850560
    },
    {
      "epoch": 4.66503669082173,
      "grad_norm": 0.3712601959705353,
      "learning_rate": 6.74340913134295e-07,
      "loss": 0.0115,
      "step": 2850580
    },
    {
      "epoch": 4.665069421260384,
      "grad_norm": 0.6636248826980591,
      "learning_rate": 6.742750209207778e-07,
      "loss": 0.009,
      "step": 2850600
    },
    {
      "epoch": 4.665102151699037,
      "grad_norm": 0.5896273851394653,
      "learning_rate": 6.742091287072607e-07,
      "loss": 0.0133,
      "step": 2850620
    },
    {
      "epoch": 4.66513488213769,
      "grad_norm": 0.45212477445602417,
      "learning_rate": 6.741432364937436e-07,
      "loss": 0.0101,
      "step": 2850640
    },
    {
      "epoch": 4.665167612576344,
      "grad_norm": 0.508520245552063,
      "learning_rate": 6.740773442802265e-07,
      "loss": 0.0075,
      "step": 2850660
    },
    {
      "epoch": 4.665200343014997,
      "grad_norm": 0.7202695608139038,
      "learning_rate": 6.740114520667093e-07,
      "loss": 0.0087,
      "step": 2850680
    },
    {
      "epoch": 4.665233073453651,
      "grad_norm": 0.22778193652629852,
      "learning_rate": 6.739455598531922e-07,
      "loss": 0.0097,
      "step": 2850700
    },
    {
      "epoch": 4.665265803892304,
      "grad_norm": 0.09026754647493362,
      "learning_rate": 6.738796676396751e-07,
      "loss": 0.0102,
      "step": 2850720
    },
    {
      "epoch": 4.665298534330957,
      "grad_norm": 0.4586034119129181,
      "learning_rate": 6.73813775426158e-07,
      "loss": 0.0095,
      "step": 2850740
    },
    {
      "epoch": 4.665331264769611,
      "grad_norm": 0.5421486496925354,
      "learning_rate": 6.737478832126408e-07,
      "loss": 0.0056,
      "step": 2850760
    },
    {
      "epoch": 4.6653639952082635,
      "grad_norm": 0.2220943719148636,
      "learning_rate": 6.736819909991236e-07,
      "loss": 0.0059,
      "step": 2850780
    },
    {
      "epoch": 4.665396725646917,
      "grad_norm": 0.16443116962909698,
      "learning_rate": 6.736160987856065e-07,
      "loss": 0.0084,
      "step": 2850800
    },
    {
      "epoch": 4.665429456085571,
      "grad_norm": 0.3530905544757843,
      "learning_rate": 6.735502065720894e-07,
      "loss": 0.0114,
      "step": 2850820
    },
    {
      "epoch": 4.665462186524223,
      "grad_norm": 0.09324897825717926,
      "learning_rate": 6.734843143585723e-07,
      "loss": 0.0135,
      "step": 2850840
    },
    {
      "epoch": 4.665494916962877,
      "grad_norm": 0.24576793611049652,
      "learning_rate": 6.734184221450551e-07,
      "loss": 0.0064,
      "step": 2850860
    },
    {
      "epoch": 4.665527647401531,
      "grad_norm": 0.5062402486801147,
      "learning_rate": 6.73352529931538e-07,
      "loss": 0.0122,
      "step": 2850880
    },
    {
      "epoch": 4.665560377840184,
      "grad_norm": 0.36680445075035095,
      "learning_rate": 6.732866377180209e-07,
      "loss": 0.0081,
      "step": 2850900
    },
    {
      "epoch": 4.665593108278837,
      "grad_norm": 0.39762553572654724,
      "learning_rate": 6.732207455045038e-07,
      "loss": 0.0091,
      "step": 2850920
    },
    {
      "epoch": 4.6656258387174905,
      "grad_norm": 0.12885557115077972,
      "learning_rate": 6.731548532909866e-07,
      "loss": 0.0101,
      "step": 2850940
    },
    {
      "epoch": 4.665658569156144,
      "grad_norm": 0.07210783660411835,
      "learning_rate": 6.730889610774695e-07,
      "loss": 0.0088,
      "step": 2850960
    },
    {
      "epoch": 4.665691299594797,
      "grad_norm": 0.43809470534324646,
      "learning_rate": 6.730230688639524e-07,
      "loss": 0.0093,
      "step": 2850980
    },
    {
      "epoch": 4.66572403003345,
      "grad_norm": 0.17748939990997314,
      "learning_rate": 6.729571766504353e-07,
      "loss": 0.0067,
      "step": 2851000
    },
    {
      "epoch": 4.665756760472104,
      "grad_norm": 0.1929987668991089,
      "learning_rate": 6.728912844369181e-07,
      "loss": 0.0087,
      "step": 2851020
    },
    {
      "epoch": 4.665789490910758,
      "grad_norm": 0.31877973675727844,
      "learning_rate": 6.72825392223401e-07,
      "loss": 0.0072,
      "step": 2851040
    },
    {
      "epoch": 4.66582222134941,
      "grad_norm": 0.03338951990008354,
      "learning_rate": 6.727595000098838e-07,
      "loss": 0.0087,
      "step": 2851060
    },
    {
      "epoch": 4.665854951788064,
      "grad_norm": 0.3952098488807678,
      "learning_rate": 6.726936077963669e-07,
      "loss": 0.0128,
      "step": 2851080
    },
    {
      "epoch": 4.6658876822267175,
      "grad_norm": 0.14171580970287323,
      "learning_rate": 6.726277155828496e-07,
      "loss": 0.008,
      "step": 2851100
    },
    {
      "epoch": 4.66592041266537,
      "grad_norm": 0.36517956852912903,
      "learning_rate": 6.725618233693324e-07,
      "loss": 0.0119,
      "step": 2851120
    },
    {
      "epoch": 4.665953143104024,
      "grad_norm": 0.22121848165988922,
      "learning_rate": 6.724959311558153e-07,
      "loss": 0.0087,
      "step": 2851140
    },
    {
      "epoch": 4.665985873542677,
      "grad_norm": 0.5368264317512512,
      "learning_rate": 6.724300389422983e-07,
      "loss": 0.0126,
      "step": 2851160
    },
    {
      "epoch": 4.66601860398133,
      "grad_norm": 0.3285931646823883,
      "learning_rate": 6.723641467287812e-07,
      "loss": 0.0087,
      "step": 2851180
    },
    {
      "epoch": 4.666051334419984,
      "grad_norm": 0.11602196842432022,
      "learning_rate": 6.722982545152639e-07,
      "loss": 0.0085,
      "step": 2851200
    },
    {
      "epoch": 4.666084064858637,
      "grad_norm": 0.27259352803230286,
      "learning_rate": 6.722323623017468e-07,
      "loss": 0.0125,
      "step": 2851220
    },
    {
      "epoch": 4.666116795297291,
      "grad_norm": 0.4202776551246643,
      "learning_rate": 6.721664700882296e-07,
      "loss": 0.0075,
      "step": 2851240
    },
    {
      "epoch": 4.6661495257359435,
      "grad_norm": 0.23996059596538544,
      "learning_rate": 6.721005778747127e-07,
      "loss": 0.0074,
      "step": 2851260
    },
    {
      "epoch": 4.666182256174597,
      "grad_norm": 0.2461242824792862,
      "learning_rate": 6.720346856611955e-07,
      "loss": 0.008,
      "step": 2851280
    },
    {
      "epoch": 4.666214986613251,
      "grad_norm": 0.23863889276981354,
      "learning_rate": 6.719687934476784e-07,
      "loss": 0.0061,
      "step": 2851300
    },
    {
      "epoch": 4.666247717051904,
      "grad_norm": 0.26499995589256287,
      "learning_rate": 6.719029012341611e-07,
      "loss": 0.008,
      "step": 2851320
    },
    {
      "epoch": 4.666280447490557,
      "grad_norm": 0.19539009034633636,
      "learning_rate": 6.718370090206442e-07,
      "loss": 0.0106,
      "step": 2851340
    },
    {
      "epoch": 4.666313177929211,
      "grad_norm": 0.47042223811149597,
      "learning_rate": 6.71771116807127e-07,
      "loss": 0.012,
      "step": 2851360
    },
    {
      "epoch": 4.666345908367864,
      "grad_norm": 0.32289624214172363,
      "learning_rate": 6.717052245936099e-07,
      "loss": 0.0095,
      "step": 2851380
    },
    {
      "epoch": 4.666378638806517,
      "grad_norm": 0.2045758068561554,
      "learning_rate": 6.716393323800927e-07,
      "loss": 0.0067,
      "step": 2851400
    },
    {
      "epoch": 4.6664113692451705,
      "grad_norm": 0.04503203183412552,
      "learning_rate": 6.715734401665757e-07,
      "loss": 0.0061,
      "step": 2851420
    },
    {
      "epoch": 4.666444099683824,
      "grad_norm": 0.2239483892917633,
      "learning_rate": 6.715075479530585e-07,
      "loss": 0.0083,
      "step": 2851440
    },
    {
      "epoch": 4.666476830122477,
      "grad_norm": 0.22577130794525146,
      "learning_rate": 6.714416557395413e-07,
      "loss": 0.0085,
      "step": 2851460
    },
    {
      "epoch": 4.66650956056113,
      "grad_norm": 0.6593191027641296,
      "learning_rate": 6.713757635260242e-07,
      "loss": 0.0101,
      "step": 2851480
    },
    {
      "epoch": 4.666542290999784,
      "grad_norm": 0.23831807076931,
      "learning_rate": 6.71309871312507e-07,
      "loss": 0.0067,
      "step": 2851500
    },
    {
      "epoch": 4.666575021438438,
      "grad_norm": 0.33587878942489624,
      "learning_rate": 6.7124397909899e-07,
      "loss": 0.0107,
      "step": 2851520
    },
    {
      "epoch": 4.66660775187709,
      "grad_norm": 0.12417399138212204,
      "learning_rate": 6.711780868854728e-07,
      "loss": 0.0141,
      "step": 2851540
    },
    {
      "epoch": 4.666640482315744,
      "grad_norm": 0.543660581111908,
      "learning_rate": 6.711121946719557e-07,
      "loss": 0.0145,
      "step": 2851560
    },
    {
      "epoch": 4.6666732127543975,
      "grad_norm": 0.19036608934402466,
      "learning_rate": 6.710463024584385e-07,
      "loss": 0.0107,
      "step": 2851580
    },
    {
      "epoch": 4.666705943193051,
      "grad_norm": 0.07698541134595871,
      "learning_rate": 6.709804102449215e-07,
      "loss": 0.0103,
      "step": 2851600
    },
    {
      "epoch": 4.666738673631704,
      "grad_norm": 0.18862232565879822,
      "learning_rate": 6.709145180314043e-07,
      "loss": 0.0054,
      "step": 2851620
    },
    {
      "epoch": 4.666771404070357,
      "grad_norm": 0.1948370635509491,
      "learning_rate": 6.708486258178872e-07,
      "loss": 0.0136,
      "step": 2851640
    },
    {
      "epoch": 4.666804134509011,
      "grad_norm": 0.23914092779159546,
      "learning_rate": 6.7078273360437e-07,
      "loss": 0.0073,
      "step": 2851660
    },
    {
      "epoch": 4.666836864947664,
      "grad_norm": 0.1647815704345703,
      "learning_rate": 6.707168413908529e-07,
      "loss": 0.0132,
      "step": 2851680
    },
    {
      "epoch": 4.666869595386317,
      "grad_norm": 0.22609201073646545,
      "learning_rate": 6.706509491773358e-07,
      "loss": 0.0101,
      "step": 2851700
    },
    {
      "epoch": 4.666902325824971,
      "grad_norm": 0.38710981607437134,
      "learning_rate": 6.705850569638187e-07,
      "loss": 0.0101,
      "step": 2851720
    },
    {
      "epoch": 4.666935056263624,
      "grad_norm": 0.42040538787841797,
      "learning_rate": 6.705191647503015e-07,
      "loss": 0.0096,
      "step": 2851740
    },
    {
      "epoch": 4.666967786702277,
      "grad_norm": 0.14676901698112488,
      "learning_rate": 6.704532725367843e-07,
      "loss": 0.0066,
      "step": 2851760
    },
    {
      "epoch": 4.667000517140931,
      "grad_norm": 1.0488945245742798,
      "learning_rate": 6.703873803232673e-07,
      "loss": 0.0089,
      "step": 2851780
    },
    {
      "epoch": 4.667033247579584,
      "grad_norm": 0.2397497594356537,
      "learning_rate": 6.703214881097501e-07,
      "loss": 0.0109,
      "step": 2851800
    },
    {
      "epoch": 4.667065978018237,
      "grad_norm": 0.042926859110593796,
      "learning_rate": 6.70255595896233e-07,
      "loss": 0.0068,
      "step": 2851820
    },
    {
      "epoch": 4.667098708456891,
      "grad_norm": 0.18871574103832245,
      "learning_rate": 6.701897036827158e-07,
      "loss": 0.0102,
      "step": 2851840
    },
    {
      "epoch": 4.667131438895544,
      "grad_norm": 0.23274314403533936,
      "learning_rate": 6.701238114691988e-07,
      "loss": 0.0078,
      "step": 2851860
    },
    {
      "epoch": 4.667164169334198,
      "grad_norm": 0.033328525722026825,
      "learning_rate": 6.700579192556816e-07,
      "loss": 0.0055,
      "step": 2851880
    },
    {
      "epoch": 4.667196899772851,
      "grad_norm": 0.45006412267684937,
      "learning_rate": 6.699920270421645e-07,
      "loss": 0.0097,
      "step": 2851900
    },
    {
      "epoch": 4.667229630211504,
      "grad_norm": 0.1658530980348587,
      "learning_rate": 6.699261348286473e-07,
      "loss": 0.0089,
      "step": 2851920
    },
    {
      "epoch": 4.667262360650158,
      "grad_norm": 0.10487993061542511,
      "learning_rate": 6.698602426151302e-07,
      "loss": 0.0086,
      "step": 2851940
    },
    {
      "epoch": 4.6672950910888105,
      "grad_norm": 0.15600192546844482,
      "learning_rate": 6.697943504016131e-07,
      "loss": 0.007,
      "step": 2851960
    },
    {
      "epoch": 4.667327821527464,
      "grad_norm": 0.20383982360363007,
      "learning_rate": 6.69728458188096e-07,
      "loss": 0.0118,
      "step": 2851980
    },
    {
      "epoch": 4.667360551966118,
      "grad_norm": 0.7081028819084167,
      "learning_rate": 6.696625659745788e-07,
      "loss": 0.0113,
      "step": 2852000
    },
    {
      "epoch": 4.66739328240477,
      "grad_norm": 0.29253625869750977,
      "learning_rate": 6.695966737610617e-07,
      "loss": 0.0197,
      "step": 2852020
    },
    {
      "epoch": 4.667426012843424,
      "grad_norm": 0.20691312849521637,
      "learning_rate": 6.695307815475446e-07,
      "loss": 0.0116,
      "step": 2852040
    },
    {
      "epoch": 4.6674587432820775,
      "grad_norm": 0.2699487507343292,
      "learning_rate": 6.694648893340275e-07,
      "loss": 0.0116,
      "step": 2852060
    },
    {
      "epoch": 4.667491473720731,
      "grad_norm": 0.2352142184972763,
      "learning_rate": 6.693989971205103e-07,
      "loss": 0.0074,
      "step": 2852080
    },
    {
      "epoch": 4.667524204159384,
      "grad_norm": 0.22173793613910675,
      "learning_rate": 6.693331049069931e-07,
      "loss": 0.0062,
      "step": 2852100
    },
    {
      "epoch": 4.667556934598037,
      "grad_norm": 0.16724109649658203,
      "learning_rate": 6.69267212693476e-07,
      "loss": 0.0088,
      "step": 2852120
    },
    {
      "epoch": 4.667589665036691,
      "grad_norm": 0.3697716295719147,
      "learning_rate": 6.692013204799589e-07,
      "loss": 0.0129,
      "step": 2852140
    },
    {
      "epoch": 4.667622395475345,
      "grad_norm": 0.49862948060035706,
      "learning_rate": 6.691354282664418e-07,
      "loss": 0.0084,
      "step": 2852160
    },
    {
      "epoch": 4.667655125913997,
      "grad_norm": 0.07743280380964279,
      "learning_rate": 6.690695360529246e-07,
      "loss": 0.0085,
      "step": 2852180
    },
    {
      "epoch": 4.667687856352651,
      "grad_norm": 0.09814199060201645,
      "learning_rate": 6.690036438394075e-07,
      "loss": 0.0082,
      "step": 2852200
    },
    {
      "epoch": 4.6677205867913045,
      "grad_norm": 0.856835663318634,
      "learning_rate": 6.689377516258904e-07,
      "loss": 0.0115,
      "step": 2852220
    },
    {
      "epoch": 4.667753317229957,
      "grad_norm": 0.20607121288776398,
      "learning_rate": 6.688718594123733e-07,
      "loss": 0.0048,
      "step": 2852240
    },
    {
      "epoch": 4.667786047668611,
      "grad_norm": 0.16063247621059418,
      "learning_rate": 6.688059671988561e-07,
      "loss": 0.0098,
      "step": 2852260
    },
    {
      "epoch": 4.667818778107264,
      "grad_norm": 0.13205265998840332,
      "learning_rate": 6.68740074985339e-07,
      "loss": 0.0059,
      "step": 2852280
    },
    {
      "epoch": 4.667851508545917,
      "grad_norm": 0.19817571341991425,
      "learning_rate": 6.686741827718219e-07,
      "loss": 0.0072,
      "step": 2852300
    },
    {
      "epoch": 4.667884238984571,
      "grad_norm": 0.17498193681240082,
      "learning_rate": 6.686082905583048e-07,
      "loss": 0.0092,
      "step": 2852320
    },
    {
      "epoch": 4.667916969423224,
      "grad_norm": 0.26774880290031433,
      "learning_rate": 6.685423983447876e-07,
      "loss": 0.0081,
      "step": 2852340
    },
    {
      "epoch": 4.667949699861878,
      "grad_norm": 0.15895700454711914,
      "learning_rate": 6.684765061312705e-07,
      "loss": 0.014,
      "step": 2852360
    },
    {
      "epoch": 4.667982430300531,
      "grad_norm": 0.3731091320514679,
      "learning_rate": 6.684106139177533e-07,
      "loss": 0.0115,
      "step": 2852380
    },
    {
      "epoch": 4.668015160739184,
      "grad_norm": 0.1905640810728073,
      "learning_rate": 6.683447217042364e-07,
      "loss": 0.0078,
      "step": 2852400
    },
    {
      "epoch": 4.668047891177838,
      "grad_norm": 0.208515465259552,
      "learning_rate": 6.682788294907191e-07,
      "loss": 0.0086,
      "step": 2852420
    },
    {
      "epoch": 4.668080621616491,
      "grad_norm": 0.1445857286453247,
      "learning_rate": 6.682129372772019e-07,
      "loss": 0.0115,
      "step": 2852440
    },
    {
      "epoch": 4.668113352055144,
      "grad_norm": 0.08955053985118866,
      "learning_rate": 6.681470450636848e-07,
      "loss": 0.0104,
      "step": 2852460
    },
    {
      "epoch": 4.668146082493798,
      "grad_norm": 0.43886491656303406,
      "learning_rate": 6.680811528501679e-07,
      "loss": 0.0123,
      "step": 2852480
    },
    {
      "epoch": 4.668178812932451,
      "grad_norm": 0.2639051079750061,
      "learning_rate": 6.680152606366507e-07,
      "loss": 0.0082,
      "step": 2852500
    },
    {
      "epoch": 4.668211543371104,
      "grad_norm": 0.15923500061035156,
      "learning_rate": 6.679493684231334e-07,
      "loss": 0.0073,
      "step": 2852520
    },
    {
      "epoch": 4.668244273809758,
      "grad_norm": 0.6364296674728394,
      "learning_rate": 6.678834762096163e-07,
      "loss": 0.0092,
      "step": 2852540
    },
    {
      "epoch": 4.668277004248411,
      "grad_norm": 0.09403364360332489,
      "learning_rate": 6.678175839960991e-07,
      "loss": 0.0068,
      "step": 2852560
    },
    {
      "epoch": 4.668309734687064,
      "grad_norm": 0.08773303031921387,
      "learning_rate": 6.677516917825822e-07,
      "loss": 0.0084,
      "step": 2852580
    },
    {
      "epoch": 4.6683424651257175,
      "grad_norm": 0.4755482077598572,
      "learning_rate": 6.67685799569065e-07,
      "loss": 0.0071,
      "step": 2852600
    },
    {
      "epoch": 4.668375195564371,
      "grad_norm": 0.4806281626224518,
      "learning_rate": 6.676199073555479e-07,
      "loss": 0.0131,
      "step": 2852620
    },
    {
      "epoch": 4.668407926003025,
      "grad_norm": 0.05618557706475258,
      "learning_rate": 6.675540151420306e-07,
      "loss": 0.0084,
      "step": 2852640
    },
    {
      "epoch": 4.668440656441677,
      "grad_norm": 0.19918037950992584,
      "learning_rate": 6.674881229285137e-07,
      "loss": 0.009,
      "step": 2852660
    },
    {
      "epoch": 4.668473386880331,
      "grad_norm": 0.544215202331543,
      "learning_rate": 6.674222307149965e-07,
      "loss": 0.0169,
      "step": 2852680
    },
    {
      "epoch": 4.668506117318985,
      "grad_norm": 0.12122796475887299,
      "learning_rate": 6.673563385014794e-07,
      "loss": 0.0097,
      "step": 2852700
    },
    {
      "epoch": 4.668538847757637,
      "grad_norm": 0.13185395300388336,
      "learning_rate": 6.672904462879622e-07,
      "loss": 0.0077,
      "step": 2852720
    },
    {
      "epoch": 4.668571578196291,
      "grad_norm": 0.31556379795074463,
      "learning_rate": 6.672245540744452e-07,
      "loss": 0.0072,
      "step": 2852740
    },
    {
      "epoch": 4.6686043086349445,
      "grad_norm": 0.18831366300582886,
      "learning_rate": 6.67158661860928e-07,
      "loss": 0.0085,
      "step": 2852760
    },
    {
      "epoch": 4.668637039073598,
      "grad_norm": 0.23760651051998138,
      "learning_rate": 6.670927696474109e-07,
      "loss": 0.0061,
      "step": 2852780
    },
    {
      "epoch": 4.668669769512251,
      "grad_norm": 0.09381631016731262,
      "learning_rate": 6.670268774338937e-07,
      "loss": 0.0089,
      "step": 2852800
    },
    {
      "epoch": 4.668702499950904,
      "grad_norm": 0.15481439232826233,
      "learning_rate": 6.669609852203765e-07,
      "loss": 0.0101,
      "step": 2852820
    },
    {
      "epoch": 4.668735230389558,
      "grad_norm": 0.10426273196935654,
      "learning_rate": 6.668950930068595e-07,
      "loss": 0.0069,
      "step": 2852840
    },
    {
      "epoch": 4.668767960828211,
      "grad_norm": 0.15141510963439941,
      "learning_rate": 6.668292007933423e-07,
      "loss": 0.0074,
      "step": 2852860
    },
    {
      "epoch": 4.668800691266864,
      "grad_norm": 0.3518960475921631,
      "learning_rate": 6.667633085798252e-07,
      "loss": 0.0062,
      "step": 2852880
    },
    {
      "epoch": 4.668833421705518,
      "grad_norm": 0.065827876329422,
      "learning_rate": 6.66697416366308e-07,
      "loss": 0.0059,
      "step": 2852900
    },
    {
      "epoch": 4.668866152144171,
      "grad_norm": 0.323080450296402,
      "learning_rate": 6.66631524152791e-07,
      "loss": 0.0119,
      "step": 2852920
    },
    {
      "epoch": 4.668898882582824,
      "grad_norm": 0.3626675605773926,
      "learning_rate": 6.665656319392738e-07,
      "loss": 0.0117,
      "step": 2852940
    },
    {
      "epoch": 4.668931613021478,
      "grad_norm": 0.5162507891654968,
      "learning_rate": 6.664997397257567e-07,
      "loss": 0.0083,
      "step": 2852960
    },
    {
      "epoch": 4.668964343460131,
      "grad_norm": 0.0917024314403534,
      "learning_rate": 6.664338475122395e-07,
      "loss": 0.0085,
      "step": 2852980
    },
    {
      "epoch": 4.668997073898784,
      "grad_norm": 0.09153428673744202,
      "learning_rate": 6.663679552987224e-07,
      "loss": 0.006,
      "step": 2853000
    },
    {
      "epoch": 4.669029804337438,
      "grad_norm": 0.07033930718898773,
      "learning_rate": 6.663020630852053e-07,
      "loss": 0.0063,
      "step": 2853020
    },
    {
      "epoch": 4.669062534776091,
      "grad_norm": 0.31418895721435547,
      "learning_rate": 6.662361708716882e-07,
      "loss": 0.0123,
      "step": 2853040
    },
    {
      "epoch": 4.669095265214745,
      "grad_norm": 0.2529838979244232,
      "learning_rate": 6.66170278658171e-07,
      "loss": 0.0106,
      "step": 2853060
    },
    {
      "epoch": 4.6691279956533975,
      "grad_norm": 0.22605103254318237,
      "learning_rate": 6.661043864446539e-07,
      "loss": 0.0075,
      "step": 2853080
    },
    {
      "epoch": 4.669160726092051,
      "grad_norm": 0.08659342676401138,
      "learning_rate": 6.660384942311368e-07,
      "loss": 0.0109,
      "step": 2853100
    },
    {
      "epoch": 4.669193456530705,
      "grad_norm": 0.05909210816025734,
      "learning_rate": 6.659726020176197e-07,
      "loss": 0.006,
      "step": 2853120
    },
    {
      "epoch": 4.669226186969357,
      "grad_norm": 0.10603515058755875,
      "learning_rate": 6.659067098041025e-07,
      "loss": 0.0059,
      "step": 2853140
    },
    {
      "epoch": 4.669258917408011,
      "grad_norm": 0.20405571162700653,
      "learning_rate": 6.658408175905853e-07,
      "loss": 0.0072,
      "step": 2853160
    },
    {
      "epoch": 4.669291647846665,
      "grad_norm": 0.09714227169752121,
      "learning_rate": 6.657749253770683e-07,
      "loss": 0.0082,
      "step": 2853180
    },
    {
      "epoch": 4.669324378285317,
      "grad_norm": 0.1735645830631256,
      "learning_rate": 6.657090331635511e-07,
      "loss": 0.0107,
      "step": 2853200
    },
    {
      "epoch": 4.669357108723971,
      "grad_norm": 0.2992245852947235,
      "learning_rate": 6.65643140950034e-07,
      "loss": 0.0104,
      "step": 2853220
    },
    {
      "epoch": 4.6693898391626245,
      "grad_norm": 0.1722942292690277,
      "learning_rate": 6.655772487365168e-07,
      "loss": 0.0066,
      "step": 2853240
    },
    {
      "epoch": 4.669422569601278,
      "grad_norm": 0.26098984479904175,
      "learning_rate": 6.655113565229997e-07,
      "loss": 0.011,
      "step": 2853260
    },
    {
      "epoch": 4.669455300039931,
      "grad_norm": 0.1904473453760147,
      "learning_rate": 6.654454643094826e-07,
      "loss": 0.008,
      "step": 2853280
    },
    {
      "epoch": 4.669488030478584,
      "grad_norm": 0.213898703455925,
      "learning_rate": 6.653795720959655e-07,
      "loss": 0.0084,
      "step": 2853300
    },
    {
      "epoch": 4.669520760917238,
      "grad_norm": 0.4056510031223297,
      "learning_rate": 6.653136798824483e-07,
      "loss": 0.0065,
      "step": 2853320
    },
    {
      "epoch": 4.669553491355892,
      "grad_norm": 0.5397504568099976,
      "learning_rate": 6.652477876689312e-07,
      "loss": 0.0098,
      "step": 2853340
    },
    {
      "epoch": 4.669586221794544,
      "grad_norm": 0.10569236427545547,
      "learning_rate": 6.651818954554141e-07,
      "loss": 0.0111,
      "step": 2853360
    },
    {
      "epoch": 4.669618952233198,
      "grad_norm": 0.029775485396385193,
      "learning_rate": 6.65116003241897e-07,
      "loss": 0.0077,
      "step": 2853380
    },
    {
      "epoch": 4.6696516826718515,
      "grad_norm": 0.18665656447410583,
      "learning_rate": 6.650501110283798e-07,
      "loss": 0.0071,
      "step": 2853400
    },
    {
      "epoch": 4.669684413110504,
      "grad_norm": 0.22090229392051697,
      "learning_rate": 6.649842188148627e-07,
      "loss": 0.0136,
      "step": 2853420
    },
    {
      "epoch": 4.669717143549158,
      "grad_norm": 0.4485616385936737,
      "learning_rate": 6.649183266013455e-07,
      "loss": 0.0082,
      "step": 2853440
    },
    {
      "epoch": 4.669749873987811,
      "grad_norm": 0.42610788345336914,
      "learning_rate": 6.648524343878285e-07,
      "loss": 0.0084,
      "step": 2853460
    },
    {
      "epoch": 4.669782604426464,
      "grad_norm": 0.16963176429271698,
      "learning_rate": 6.647865421743113e-07,
      "loss": 0.0062,
      "step": 2853480
    },
    {
      "epoch": 4.669815334865118,
      "grad_norm": 0.11978515982627869,
      "learning_rate": 6.647206499607941e-07,
      "loss": 0.0101,
      "step": 2853500
    },
    {
      "epoch": 4.669848065303771,
      "grad_norm": 0.03767362982034683,
      "learning_rate": 6.64654757747277e-07,
      "loss": 0.0107,
      "step": 2853520
    },
    {
      "epoch": 4.669880795742425,
      "grad_norm": 0.3182889223098755,
      "learning_rate": 6.645888655337599e-07,
      "loss": 0.0061,
      "step": 2853540
    },
    {
      "epoch": 4.669913526181078,
      "grad_norm": 0.13663627207279205,
      "learning_rate": 6.645229733202428e-07,
      "loss": 0.0061,
      "step": 2853560
    },
    {
      "epoch": 4.669946256619731,
      "grad_norm": 0.17651180922985077,
      "learning_rate": 6.644570811067256e-07,
      "loss": 0.0074,
      "step": 2853580
    },
    {
      "epoch": 4.669978987058385,
      "grad_norm": 0.053391341120004654,
      "learning_rate": 6.643911888932085e-07,
      "loss": 0.0101,
      "step": 2853600
    },
    {
      "epoch": 4.670011717497038,
      "grad_norm": 0.21252548694610596,
      "learning_rate": 6.643252966796914e-07,
      "loss": 0.009,
      "step": 2853620
    },
    {
      "epoch": 4.670044447935691,
      "grad_norm": 0.2833159863948822,
      "learning_rate": 6.642594044661743e-07,
      "loss": 0.0082,
      "step": 2853640
    },
    {
      "epoch": 4.670077178374345,
      "grad_norm": 0.09335320442914963,
      "learning_rate": 6.641935122526571e-07,
      "loss": 0.008,
      "step": 2853660
    },
    {
      "epoch": 4.670109908812998,
      "grad_norm": 0.10523881018161774,
      "learning_rate": 6.6412762003914e-07,
      "loss": 0.0075,
      "step": 2853680
    },
    {
      "epoch": 4.670142639251651,
      "grad_norm": 0.1738256812095642,
      "learning_rate": 6.640617278256228e-07,
      "loss": 0.0071,
      "step": 2853700
    },
    {
      "epoch": 4.670175369690305,
      "grad_norm": 0.5896351337432861,
      "learning_rate": 6.639958356121059e-07,
      "loss": 0.0081,
      "step": 2853720
    },
    {
      "epoch": 4.670208100128958,
      "grad_norm": 0.32558897137641907,
      "learning_rate": 6.639299433985886e-07,
      "loss": 0.008,
      "step": 2853740
    },
    {
      "epoch": 4.670240830567611,
      "grad_norm": 0.3096584975719452,
      "learning_rate": 6.638640511850715e-07,
      "loss": 0.0113,
      "step": 2853760
    },
    {
      "epoch": 4.6702735610062645,
      "grad_norm": 0.13282208144664764,
      "learning_rate": 6.637981589715543e-07,
      "loss": 0.0136,
      "step": 2853780
    },
    {
      "epoch": 4.670306291444918,
      "grad_norm": 0.22110843658447266,
      "learning_rate": 6.637322667580374e-07,
      "loss": 0.0112,
      "step": 2853800
    },
    {
      "epoch": 4.670339021883572,
      "grad_norm": 0.09553968161344528,
      "learning_rate": 6.636663745445202e-07,
      "loss": 0.016,
      "step": 2853820
    },
    {
      "epoch": 4.670371752322224,
      "grad_norm": 0.1361275464296341,
      "learning_rate": 6.636004823310029e-07,
      "loss": 0.0074,
      "step": 2853840
    },
    {
      "epoch": 4.670404482760878,
      "grad_norm": 0.09355844557285309,
      "learning_rate": 6.635345901174858e-07,
      "loss": 0.0093,
      "step": 2853860
    },
    {
      "epoch": 4.6704372131995315,
      "grad_norm": 0.23354673385620117,
      "learning_rate": 6.634686979039688e-07,
      "loss": 0.0098,
      "step": 2853880
    },
    {
      "epoch": 4.670469943638185,
      "grad_norm": 0.13485586643218994,
      "learning_rate": 6.634028056904517e-07,
      "loss": 0.0089,
      "step": 2853900
    },
    {
      "epoch": 4.670502674076838,
      "grad_norm": 0.6557831168174744,
      "learning_rate": 6.633369134769345e-07,
      "loss": 0.0105,
      "step": 2853920
    },
    {
      "epoch": 4.670535404515491,
      "grad_norm": 0.4264722466468811,
      "learning_rate": 6.632710212634174e-07,
      "loss": 0.0085,
      "step": 2853940
    },
    {
      "epoch": 4.670568134954145,
      "grad_norm": 0.11615193635225296,
      "learning_rate": 6.632051290499001e-07,
      "loss": 0.0072,
      "step": 2853960
    },
    {
      "epoch": 4.670600865392798,
      "grad_norm": 0.21171583235263824,
      "learning_rate": 6.631392368363832e-07,
      "loss": 0.006,
      "step": 2853980
    },
    {
      "epoch": 4.670633595831451,
      "grad_norm": 0.17067746818065643,
      "learning_rate": 6.63073344622866e-07,
      "loss": 0.0127,
      "step": 2854000
    },
    {
      "epoch": 4.670666326270105,
      "grad_norm": 0.1702585518360138,
      "learning_rate": 6.630074524093489e-07,
      "loss": 0.0095,
      "step": 2854020
    },
    {
      "epoch": 4.670699056708758,
      "grad_norm": 0.25488871335983276,
      "learning_rate": 6.629415601958317e-07,
      "loss": 0.007,
      "step": 2854040
    },
    {
      "epoch": 4.670731787147411,
      "grad_norm": 0.12969321012496948,
      "learning_rate": 6.628756679823147e-07,
      "loss": 0.0137,
      "step": 2854060
    },
    {
      "epoch": 4.670764517586065,
      "grad_norm": 0.4629165828227997,
      "learning_rate": 6.628097757687975e-07,
      "loss": 0.0069,
      "step": 2854080
    },
    {
      "epoch": 4.670797248024718,
      "grad_norm": 1.0116056203842163,
      "learning_rate": 6.627438835552804e-07,
      "loss": 0.0129,
      "step": 2854100
    },
    {
      "epoch": 4.670829978463371,
      "grad_norm": 0.2324039191007614,
      "learning_rate": 6.626779913417632e-07,
      "loss": 0.0093,
      "step": 2854120
    },
    {
      "epoch": 4.670862708902025,
      "grad_norm": 0.30283764004707336,
      "learning_rate": 6.62612099128246e-07,
      "loss": 0.0125,
      "step": 2854140
    },
    {
      "epoch": 4.670895439340678,
      "grad_norm": 0.43562906980514526,
      "learning_rate": 6.62546206914729e-07,
      "loss": 0.0106,
      "step": 2854160
    },
    {
      "epoch": 4.670928169779331,
      "grad_norm": 0.142672598361969,
      "learning_rate": 6.624803147012118e-07,
      "loss": 0.0072,
      "step": 2854180
    },
    {
      "epoch": 4.670960900217985,
      "grad_norm": 0.06612961739301682,
      "learning_rate": 6.624144224876947e-07,
      "loss": 0.0059,
      "step": 2854200
    },
    {
      "epoch": 4.670993630656638,
      "grad_norm": 0.30504223704338074,
      "learning_rate": 6.623485302741775e-07,
      "loss": 0.0106,
      "step": 2854220
    },
    {
      "epoch": 4.671026361095292,
      "grad_norm": 0.07256726175546646,
      "learning_rate": 6.622826380606605e-07,
      "loss": 0.0107,
      "step": 2854240
    },
    {
      "epoch": 4.6710590915339445,
      "grad_norm": 0.3265087902545929,
      "learning_rate": 6.622167458471433e-07,
      "loss": 0.0073,
      "step": 2854260
    },
    {
      "epoch": 4.671091821972598,
      "grad_norm": 0.48237594962120056,
      "learning_rate": 6.621508536336262e-07,
      "loss": 0.0102,
      "step": 2854280
    },
    {
      "epoch": 4.671124552411252,
      "grad_norm": 0.4103287160396576,
      "learning_rate": 6.62084961420109e-07,
      "loss": 0.0102,
      "step": 2854300
    },
    {
      "epoch": 4.671157282849904,
      "grad_norm": 0.4242338538169861,
      "learning_rate": 6.62019069206592e-07,
      "loss": 0.0075,
      "step": 2854320
    },
    {
      "epoch": 4.671190013288558,
      "grad_norm": 0.3743169605731964,
      "learning_rate": 6.619531769930748e-07,
      "loss": 0.0092,
      "step": 2854340
    },
    {
      "epoch": 4.671222743727212,
      "grad_norm": 0.48136013746261597,
      "learning_rate": 6.618872847795577e-07,
      "loss": 0.0098,
      "step": 2854360
    },
    {
      "epoch": 4.671255474165864,
      "grad_norm": 0.19433043897151947,
      "learning_rate": 6.618213925660405e-07,
      "loss": 0.0098,
      "step": 2854380
    },
    {
      "epoch": 4.671288204604518,
      "grad_norm": 0.10211841762065887,
      "learning_rate": 6.617555003525234e-07,
      "loss": 0.0063,
      "step": 2854400
    },
    {
      "epoch": 4.6713209350431715,
      "grad_norm": 0.21326331794261932,
      "learning_rate": 6.616896081390063e-07,
      "loss": 0.007,
      "step": 2854420
    },
    {
      "epoch": 4.671353665481825,
      "grad_norm": 0.07856731861829758,
      "learning_rate": 6.616237159254892e-07,
      "loss": 0.0085,
      "step": 2854440
    },
    {
      "epoch": 4.671386395920478,
      "grad_norm": 0.20403516292572021,
      "learning_rate": 6.61557823711972e-07,
      "loss": 0.0052,
      "step": 2854460
    },
    {
      "epoch": 4.671419126359131,
      "grad_norm": 0.11795070767402649,
      "learning_rate": 6.614919314984548e-07,
      "loss": 0.0087,
      "step": 2854480
    },
    {
      "epoch": 4.671451856797785,
      "grad_norm": 0.2989160120487213,
      "learning_rate": 6.614260392849378e-07,
      "loss": 0.0077,
      "step": 2854500
    },
    {
      "epoch": 4.671484587236439,
      "grad_norm": 0.3858105540275574,
      "learning_rate": 6.613601470714206e-07,
      "loss": 0.0091,
      "step": 2854520
    },
    {
      "epoch": 4.671517317675091,
      "grad_norm": 0.13777975738048553,
      "learning_rate": 6.612942548579035e-07,
      "loss": 0.009,
      "step": 2854540
    },
    {
      "epoch": 4.671550048113745,
      "grad_norm": 0.1365286409854889,
      "learning_rate": 6.612283626443863e-07,
      "loss": 0.0073,
      "step": 2854560
    },
    {
      "epoch": 4.6715827785523985,
      "grad_norm": 0.25290241837501526,
      "learning_rate": 6.611624704308692e-07,
      "loss": 0.01,
      "step": 2854580
    },
    {
      "epoch": 4.671615508991051,
      "grad_norm": 0.17217668890953064,
      "learning_rate": 6.610965782173521e-07,
      "loss": 0.0082,
      "step": 2854600
    },
    {
      "epoch": 4.671648239429705,
      "grad_norm": 0.1825399547815323,
      "learning_rate": 6.61030686003835e-07,
      "loss": 0.0102,
      "step": 2854620
    },
    {
      "epoch": 4.671680969868358,
      "grad_norm": 0.16729700565338135,
      "learning_rate": 6.609647937903178e-07,
      "loss": 0.0092,
      "step": 2854640
    },
    {
      "epoch": 4.671713700307011,
      "grad_norm": 0.339609295129776,
      "learning_rate": 6.608989015768007e-07,
      "loss": 0.0102,
      "step": 2854660
    },
    {
      "epoch": 4.671746430745665,
      "grad_norm": 0.33513545989990234,
      "learning_rate": 6.608330093632836e-07,
      "loss": 0.0071,
      "step": 2854680
    },
    {
      "epoch": 4.671779161184318,
      "grad_norm": 0.26381349563598633,
      "learning_rate": 6.607671171497665e-07,
      "loss": 0.0062,
      "step": 2854700
    },
    {
      "epoch": 4.671811891622972,
      "grad_norm": 0.41130104660987854,
      "learning_rate": 6.607012249362493e-07,
      "loss": 0.0107,
      "step": 2854720
    },
    {
      "epoch": 4.6718446220616245,
      "grad_norm": 0.33600565791130066,
      "learning_rate": 6.606353327227322e-07,
      "loss": 0.0076,
      "step": 2854740
    },
    {
      "epoch": 4.671877352500278,
      "grad_norm": 0.11839111894369125,
      "learning_rate": 6.605694405092151e-07,
      "loss": 0.0089,
      "step": 2854760
    },
    {
      "epoch": 4.671910082938932,
      "grad_norm": 0.26585057377815247,
      "learning_rate": 6.60503548295698e-07,
      "loss": 0.009,
      "step": 2854780
    },
    {
      "epoch": 4.671942813377585,
      "grad_norm": 0.1392957866191864,
      "learning_rate": 6.604376560821808e-07,
      "loss": 0.0062,
      "step": 2854800
    },
    {
      "epoch": 4.671975543816238,
      "grad_norm": 0.3620067834854126,
      "learning_rate": 6.603717638686636e-07,
      "loss": 0.0064,
      "step": 2854820
    },
    {
      "epoch": 4.672008274254892,
      "grad_norm": 0.07419993728399277,
      "learning_rate": 6.603058716551465e-07,
      "loss": 0.0061,
      "step": 2854840
    },
    {
      "epoch": 4.672041004693545,
      "grad_norm": 0.19176030158996582,
      "learning_rate": 6.602399794416294e-07,
      "loss": 0.009,
      "step": 2854860
    },
    {
      "epoch": 4.672073735132198,
      "grad_norm": 0.163535013794899,
      "learning_rate": 6.601740872281123e-07,
      "loss": 0.011,
      "step": 2854880
    },
    {
      "epoch": 4.6721064655708515,
      "grad_norm": 0.12850545346736908,
      "learning_rate": 6.601081950145951e-07,
      "loss": 0.0122,
      "step": 2854900
    },
    {
      "epoch": 4.672139196009505,
      "grad_norm": 0.18991819024085999,
      "learning_rate": 6.60042302801078e-07,
      "loss": 0.0091,
      "step": 2854920
    },
    {
      "epoch": 4.672171926448158,
      "grad_norm": 0.11098118871450424,
      "learning_rate": 6.599764105875609e-07,
      "loss": 0.0129,
      "step": 2854940
    },
    {
      "epoch": 4.672204656886811,
      "grad_norm": 0.15937544405460358,
      "learning_rate": 6.599105183740438e-07,
      "loss": 0.0077,
      "step": 2854960
    },
    {
      "epoch": 4.672237387325465,
      "grad_norm": 0.35286062955856323,
      "learning_rate": 6.598446261605266e-07,
      "loss": 0.0091,
      "step": 2854980
    },
    {
      "epoch": 4.672270117764119,
      "grad_norm": 0.17565655708312988,
      "learning_rate": 6.597787339470095e-07,
      "loss": 0.0095,
      "step": 2855000
    },
    {
      "epoch": 4.672302848202771,
      "grad_norm": 0.5791624188423157,
      "learning_rate": 6.597128417334923e-07,
      "loss": 0.0112,
      "step": 2855020
    },
    {
      "epoch": 4.672335578641425,
      "grad_norm": 0.2090310901403427,
      "learning_rate": 6.596469495199754e-07,
      "loss": 0.0057,
      "step": 2855040
    },
    {
      "epoch": 4.6723683090800785,
      "grad_norm": 0.19457903504371643,
      "learning_rate": 6.595810573064581e-07,
      "loss": 0.0156,
      "step": 2855060
    },
    {
      "epoch": 4.672401039518732,
      "grad_norm": 0.3483066260814667,
      "learning_rate": 6.59515165092941e-07,
      "loss": 0.0076,
      "step": 2855080
    },
    {
      "epoch": 4.672433769957385,
      "grad_norm": 0.22419314086437225,
      "learning_rate": 6.594492728794238e-07,
      "loss": 0.0086,
      "step": 2855100
    },
    {
      "epoch": 4.672466500396038,
      "grad_norm": 0.054489780217409134,
      "learning_rate": 6.593833806659069e-07,
      "loss": 0.0051,
      "step": 2855120
    },
    {
      "epoch": 4.672499230834692,
      "grad_norm": 0.1565432846546173,
      "learning_rate": 6.593174884523897e-07,
      "loss": 0.0069,
      "step": 2855140
    },
    {
      "epoch": 4.672531961273345,
      "grad_norm": 0.3294634222984314,
      "learning_rate": 6.592515962388724e-07,
      "loss": 0.0115,
      "step": 2855160
    },
    {
      "epoch": 4.672564691711998,
      "grad_norm": 0.29659003019332886,
      "learning_rate": 6.591857040253553e-07,
      "loss": 0.0082,
      "step": 2855180
    },
    {
      "epoch": 4.672597422150652,
      "grad_norm": 0.11560454219579697,
      "learning_rate": 6.591198118118383e-07,
      "loss": 0.0111,
      "step": 2855200
    },
    {
      "epoch": 4.672630152589305,
      "grad_norm": 0.3907489478588104,
      "learning_rate": 6.590539195983212e-07,
      "loss": 0.0095,
      "step": 2855220
    },
    {
      "epoch": 4.672662883027958,
      "grad_norm": 0.12656214833259583,
      "learning_rate": 6.58988027384804e-07,
      "loss": 0.009,
      "step": 2855240
    },
    {
      "epoch": 4.672695613466612,
      "grad_norm": 0.21099962294101715,
      "learning_rate": 6.589221351712869e-07,
      "loss": 0.0109,
      "step": 2855260
    },
    {
      "epoch": 4.672728343905265,
      "grad_norm": 0.4930080473423004,
      "learning_rate": 6.588562429577696e-07,
      "loss": 0.0144,
      "step": 2855280
    },
    {
      "epoch": 4.672761074343918,
      "grad_norm": 0.7129439115524292,
      "learning_rate": 6.587903507442527e-07,
      "loss": 0.0096,
      "step": 2855300
    },
    {
      "epoch": 4.672793804782572,
      "grad_norm": 0.16283206641674042,
      "learning_rate": 6.587244585307355e-07,
      "loss": 0.0064,
      "step": 2855320
    },
    {
      "epoch": 4.672826535221225,
      "grad_norm": 0.12713897228240967,
      "learning_rate": 6.586585663172184e-07,
      "loss": 0.0078,
      "step": 2855340
    },
    {
      "epoch": 4.672859265659879,
      "grad_norm": 0.07002687454223633,
      "learning_rate": 6.585926741037012e-07,
      "loss": 0.0075,
      "step": 2855360
    },
    {
      "epoch": 4.672891996098532,
      "grad_norm": 0.11542045325040817,
      "learning_rate": 6.585267818901842e-07,
      "loss": 0.0046,
      "step": 2855380
    },
    {
      "epoch": 4.672924726537185,
      "grad_norm": 0.205338254570961,
      "learning_rate": 6.58460889676667e-07,
      "loss": 0.0084,
      "step": 2855400
    },
    {
      "epoch": 4.672957456975839,
      "grad_norm": 0.24344989657402039,
      "learning_rate": 6.583949974631499e-07,
      "loss": 0.0083,
      "step": 2855420
    },
    {
      "epoch": 4.6729901874144915,
      "grad_norm": 0.4317764937877655,
      "learning_rate": 6.583291052496327e-07,
      "loss": 0.0091,
      "step": 2855440
    },
    {
      "epoch": 4.673022917853145,
      "grad_norm": 0.22581811249256134,
      "learning_rate": 6.582632130361156e-07,
      "loss": 0.006,
      "step": 2855460
    },
    {
      "epoch": 4.673055648291799,
      "grad_norm": 0.09479884803295135,
      "learning_rate": 6.581973208225985e-07,
      "loss": 0.0065,
      "step": 2855480
    },
    {
      "epoch": 4.673088378730451,
      "grad_norm": 0.3677390217781067,
      "learning_rate": 6.581314286090814e-07,
      "loss": 0.0082,
      "step": 2855500
    },
    {
      "epoch": 4.673121109169105,
      "grad_norm": 1.6809353828430176,
      "learning_rate": 6.580655363955642e-07,
      "loss": 0.0075,
      "step": 2855520
    },
    {
      "epoch": 4.6731538396077585,
      "grad_norm": 0.09863222390413284,
      "learning_rate": 6.57999644182047e-07,
      "loss": 0.0118,
      "step": 2855540
    },
    {
      "epoch": 4.673186570046412,
      "grad_norm": 0.5621077418327332,
      "learning_rate": 6.5793375196853e-07,
      "loss": 0.0103,
      "step": 2855560
    },
    {
      "epoch": 4.673219300485065,
      "grad_norm": 0.5512149930000305,
      "learning_rate": 6.578678597550128e-07,
      "loss": 0.0086,
      "step": 2855580
    },
    {
      "epoch": 4.673252030923718,
      "grad_norm": 0.1614590883255005,
      "learning_rate": 6.578019675414957e-07,
      "loss": 0.0075,
      "step": 2855600
    },
    {
      "epoch": 4.673284761362372,
      "grad_norm": 0.26604241132736206,
      "learning_rate": 6.577360753279785e-07,
      "loss": 0.0112,
      "step": 2855620
    },
    {
      "epoch": 4.673317491801025,
      "grad_norm": 0.13026732206344604,
      "learning_rate": 6.576701831144615e-07,
      "loss": 0.0083,
      "step": 2855640
    },
    {
      "epoch": 4.673350222239678,
      "grad_norm": 0.5032438039779663,
      "learning_rate": 6.576042909009443e-07,
      "loss": 0.0109,
      "step": 2855660
    },
    {
      "epoch": 4.673382952678332,
      "grad_norm": 0.11941352486610413,
      "learning_rate": 6.575383986874272e-07,
      "loss": 0.0156,
      "step": 2855680
    },
    {
      "epoch": 4.6734156831169855,
      "grad_norm": 0.24824732542037964,
      "learning_rate": 6.5747250647391e-07,
      "loss": 0.0099,
      "step": 2855700
    },
    {
      "epoch": 4.673448413555638,
      "grad_norm": 0.21278172731399536,
      "learning_rate": 6.574066142603929e-07,
      "loss": 0.0093,
      "step": 2855720
    },
    {
      "epoch": 4.673481143994292,
      "grad_norm": 0.2211969494819641,
      "learning_rate": 6.573407220468758e-07,
      "loss": 0.0085,
      "step": 2855740
    },
    {
      "epoch": 4.673513874432945,
      "grad_norm": 0.16826489567756653,
      "learning_rate": 6.572748298333587e-07,
      "loss": 0.0094,
      "step": 2855760
    },
    {
      "epoch": 4.673546604871598,
      "grad_norm": 0.41166195273399353,
      "learning_rate": 6.572089376198415e-07,
      "loss": 0.0066,
      "step": 2855780
    },
    {
      "epoch": 4.673579335310252,
      "grad_norm": 0.2248762995004654,
      "learning_rate": 6.571430454063244e-07,
      "loss": 0.0078,
      "step": 2855800
    },
    {
      "epoch": 4.673612065748905,
      "grad_norm": 0.49245503544807434,
      "learning_rate": 6.570771531928073e-07,
      "loss": 0.01,
      "step": 2855820
    },
    {
      "epoch": 4.673644796187558,
      "grad_norm": 0.08110959827899933,
      "learning_rate": 6.570112609792902e-07,
      "loss": 0.0062,
      "step": 2855840
    },
    {
      "epoch": 4.673677526626212,
      "grad_norm": 0.21707819402217865,
      "learning_rate": 6.56945368765773e-07,
      "loss": 0.0069,
      "step": 2855860
    },
    {
      "epoch": 4.673710257064865,
      "grad_norm": 0.6954935193061829,
      "learning_rate": 6.568794765522558e-07,
      "loss": 0.0103,
      "step": 2855880
    },
    {
      "epoch": 4.673742987503519,
      "grad_norm": 0.4460708796977997,
      "learning_rate": 6.568135843387387e-07,
      "loss": 0.0098,
      "step": 2855900
    },
    {
      "epoch": 4.6737757179421715,
      "grad_norm": 0.33623990416526794,
      "learning_rate": 6.567476921252216e-07,
      "loss": 0.0151,
      "step": 2855920
    },
    {
      "epoch": 4.673808448380825,
      "grad_norm": 0.41151925921440125,
      "learning_rate": 6.566817999117045e-07,
      "loss": 0.0083,
      "step": 2855940
    },
    {
      "epoch": 4.673841178819479,
      "grad_norm": 0.40992653369903564,
      "learning_rate": 6.566159076981873e-07,
      "loss": 0.0093,
      "step": 2855960
    },
    {
      "epoch": 4.673873909258132,
      "grad_norm": 0.017478447407484055,
      "learning_rate": 6.565500154846702e-07,
      "loss": 0.0075,
      "step": 2855980
    },
    {
      "epoch": 4.673906639696785,
      "grad_norm": 0.22805340588092804,
      "learning_rate": 6.564841232711531e-07,
      "loss": 0.0099,
      "step": 2856000
    },
    {
      "epoch": 4.673939370135439,
      "grad_norm": 0.13072659075260162,
      "learning_rate": 6.56418231057636e-07,
      "loss": 0.0139,
      "step": 2856020
    },
    {
      "epoch": 4.673972100574092,
      "grad_norm": 0.1921313852071762,
      "learning_rate": 6.563523388441188e-07,
      "loss": 0.0099,
      "step": 2856040
    },
    {
      "epoch": 4.674004831012745,
      "grad_norm": 0.07537172734737396,
      "learning_rate": 6.562864466306017e-07,
      "loss": 0.01,
      "step": 2856060
    },
    {
      "epoch": 4.6740375614513985,
      "grad_norm": 0.17704293131828308,
      "learning_rate": 6.562205544170846e-07,
      "loss": 0.0082,
      "step": 2856080
    },
    {
      "epoch": 4.674070291890052,
      "grad_norm": 0.31135913729667664,
      "learning_rate": 6.561546622035675e-07,
      "loss": 0.006,
      "step": 2856100
    },
    {
      "epoch": 4.674103022328705,
      "grad_norm": 0.17611470818519592,
      "learning_rate": 6.560887699900503e-07,
      "loss": 0.0065,
      "step": 2856120
    },
    {
      "epoch": 4.674135752767358,
      "grad_norm": 0.5358037948608398,
      "learning_rate": 6.560228777765332e-07,
      "loss": 0.0069,
      "step": 2856140
    },
    {
      "epoch": 4.674168483206012,
      "grad_norm": 0.32562658190727234,
      "learning_rate": 6.55956985563016e-07,
      "loss": 0.008,
      "step": 2856160
    },
    {
      "epoch": 4.674201213644666,
      "grad_norm": 0.05660036578774452,
      "learning_rate": 6.55891093349499e-07,
      "loss": 0.0068,
      "step": 2856180
    },
    {
      "epoch": 4.674233944083318,
      "grad_norm": 0.23638780415058136,
      "learning_rate": 6.558252011359818e-07,
      "loss": 0.0086,
      "step": 2856200
    },
    {
      "epoch": 4.674266674521972,
      "grad_norm": 0.7479780316352844,
      "learning_rate": 6.557593089224646e-07,
      "loss": 0.0098,
      "step": 2856220
    },
    {
      "epoch": 4.6742994049606255,
      "grad_norm": 0.15246985852718353,
      "learning_rate": 6.556934167089475e-07,
      "loss": 0.0081,
      "step": 2856240
    },
    {
      "epoch": 4.674332135399279,
      "grad_norm": 0.17111335694789886,
      "learning_rate": 6.556275244954304e-07,
      "loss": 0.0168,
      "step": 2856260
    },
    {
      "epoch": 4.674364865837932,
      "grad_norm": 0.1627313494682312,
      "learning_rate": 6.555616322819133e-07,
      "loss": 0.0135,
      "step": 2856280
    },
    {
      "epoch": 4.674397596276585,
      "grad_norm": 0.12414462119340897,
      "learning_rate": 6.554957400683961e-07,
      "loss": 0.0056,
      "step": 2856300
    },
    {
      "epoch": 4.674430326715239,
      "grad_norm": 0.30817127227783203,
      "learning_rate": 6.55429847854879e-07,
      "loss": 0.0102,
      "step": 2856320
    },
    {
      "epoch": 4.674463057153892,
      "grad_norm": 0.15300163626670837,
      "learning_rate": 6.553639556413618e-07,
      "loss": 0.0071,
      "step": 2856340
    },
    {
      "epoch": 4.674495787592545,
      "grad_norm": 0.12693552672863007,
      "learning_rate": 6.552980634278449e-07,
      "loss": 0.0062,
      "step": 2856360
    },
    {
      "epoch": 4.674528518031199,
      "grad_norm": 0.4891641139984131,
      "learning_rate": 6.552321712143276e-07,
      "loss": 0.0087,
      "step": 2856380
    },
    {
      "epoch": 4.6745612484698515,
      "grad_norm": 0.45542436838150024,
      "learning_rate": 6.551662790008105e-07,
      "loss": 0.009,
      "step": 2856400
    },
    {
      "epoch": 4.674593978908505,
      "grad_norm": 0.15776582062244415,
      "learning_rate": 6.551003867872933e-07,
      "loss": 0.0097,
      "step": 2856420
    },
    {
      "epoch": 4.674626709347159,
      "grad_norm": 0.328262060880661,
      "learning_rate": 6.550344945737764e-07,
      "loss": 0.0095,
      "step": 2856440
    },
    {
      "epoch": 4.674659439785812,
      "grad_norm": 0.3384854793548584,
      "learning_rate": 6.549686023602592e-07,
      "loss": 0.0078,
      "step": 2856460
    },
    {
      "epoch": 4.674692170224465,
      "grad_norm": 0.16505597531795502,
      "learning_rate": 6.549027101467421e-07,
      "loss": 0.0109,
      "step": 2856480
    },
    {
      "epoch": 4.674724900663119,
      "grad_norm": 0.15668465197086334,
      "learning_rate": 6.548368179332248e-07,
      "loss": 0.0071,
      "step": 2856500
    },
    {
      "epoch": 4.674757631101772,
      "grad_norm": 0.3902633786201477,
      "learning_rate": 6.547709257197079e-07,
      "loss": 0.0122,
      "step": 2856520
    },
    {
      "epoch": 4.674790361540426,
      "grad_norm": 0.07927117496728897,
      "learning_rate": 6.547050335061907e-07,
      "loss": 0.0092,
      "step": 2856540
    },
    {
      "epoch": 4.6748230919790785,
      "grad_norm": 0.3045062720775604,
      "learning_rate": 6.546391412926735e-07,
      "loss": 0.011,
      "step": 2856560
    },
    {
      "epoch": 4.674855822417732,
      "grad_norm": 0.196701779961586,
      "learning_rate": 6.545732490791564e-07,
      "loss": 0.0086,
      "step": 2856580
    },
    {
      "epoch": 4.674888552856386,
      "grad_norm": 0.17776654660701752,
      "learning_rate": 6.545073568656391e-07,
      "loss": 0.007,
      "step": 2856600
    },
    {
      "epoch": 4.674921283295038,
      "grad_norm": 0.10300799459218979,
      "learning_rate": 6.544414646521222e-07,
      "loss": 0.0101,
      "step": 2856620
    },
    {
      "epoch": 4.674954013733692,
      "grad_norm": 0.25723543763160706,
      "learning_rate": 6.54375572438605e-07,
      "loss": 0.0077,
      "step": 2856640
    },
    {
      "epoch": 4.674986744172346,
      "grad_norm": 0.09870720654726028,
      "learning_rate": 6.543096802250879e-07,
      "loss": 0.0051,
      "step": 2856660
    },
    {
      "epoch": 4.675019474610998,
      "grad_norm": 0.45079994201660156,
      "learning_rate": 6.542437880115707e-07,
      "loss": 0.0139,
      "step": 2856680
    },
    {
      "epoch": 4.675052205049652,
      "grad_norm": 0.32718625664711,
      "learning_rate": 6.541778957980537e-07,
      "loss": 0.0058,
      "step": 2856700
    },
    {
      "epoch": 4.6750849354883055,
      "grad_norm": 0.1637478470802307,
      "learning_rate": 6.541120035845365e-07,
      "loss": 0.0121,
      "step": 2856720
    },
    {
      "epoch": 4.675117665926959,
      "grad_norm": 0.3325302302837372,
      "learning_rate": 6.540461113710194e-07,
      "loss": 0.0153,
      "step": 2856740
    },
    {
      "epoch": 4.675150396365612,
      "grad_norm": 0.16929514706134796,
      "learning_rate": 6.539802191575022e-07,
      "loss": 0.0116,
      "step": 2856760
    },
    {
      "epoch": 4.675183126804265,
      "grad_norm": 0.12149519473314285,
      "learning_rate": 6.539143269439851e-07,
      "loss": 0.0102,
      "step": 2856780
    },
    {
      "epoch": 4.675215857242919,
      "grad_norm": 0.2835010290145874,
      "learning_rate": 6.53848434730468e-07,
      "loss": 0.0086,
      "step": 2856800
    },
    {
      "epoch": 4.675248587681573,
      "grad_norm": 0.1138448715209961,
      "learning_rate": 6.537825425169509e-07,
      "loss": 0.0065,
      "step": 2856820
    },
    {
      "epoch": 4.675281318120225,
      "grad_norm": 0.19979241490364075,
      "learning_rate": 6.537166503034337e-07,
      "loss": 0.0082,
      "step": 2856840
    },
    {
      "epoch": 4.675314048558879,
      "grad_norm": 0.2677597105503082,
      "learning_rate": 6.536507580899165e-07,
      "loss": 0.0081,
      "step": 2856860
    },
    {
      "epoch": 4.6753467789975325,
      "grad_norm": 0.16107086837291718,
      "learning_rate": 6.535848658763995e-07,
      "loss": 0.0093,
      "step": 2856880
    },
    {
      "epoch": 4.675379509436185,
      "grad_norm": 0.1973646730184555,
      "learning_rate": 6.535189736628823e-07,
      "loss": 0.0061,
      "step": 2856900
    },
    {
      "epoch": 4.675412239874839,
      "grad_norm": 0.5253831148147583,
      "learning_rate": 6.534530814493652e-07,
      "loss": 0.0156,
      "step": 2856920
    },
    {
      "epoch": 4.675444970313492,
      "grad_norm": 0.3544369637966156,
      "learning_rate": 6.53387189235848e-07,
      "loss": 0.0096,
      "step": 2856940
    },
    {
      "epoch": 4.675477700752145,
      "grad_norm": 0.7598322033882141,
      "learning_rate": 6.53321297022331e-07,
      "loss": 0.0098,
      "step": 2856960
    },
    {
      "epoch": 4.675510431190799,
      "grad_norm": 0.19207604229450226,
      "learning_rate": 6.532554048088138e-07,
      "loss": 0.0096,
      "step": 2856980
    },
    {
      "epoch": 4.675543161629452,
      "grad_norm": 0.1873839795589447,
      "learning_rate": 6.531895125952967e-07,
      "loss": 0.0061,
      "step": 2857000
    },
    {
      "epoch": 4.675575892068106,
      "grad_norm": 0.2697988450527191,
      "learning_rate": 6.531236203817795e-07,
      "loss": 0.0061,
      "step": 2857020
    },
    {
      "epoch": 4.675608622506759,
      "grad_norm": 0.376372754573822,
      "learning_rate": 6.530577281682624e-07,
      "loss": 0.0106,
      "step": 2857040
    },
    {
      "epoch": 4.675641352945412,
      "grad_norm": 0.4060238301753998,
      "learning_rate": 6.529918359547453e-07,
      "loss": 0.0124,
      "step": 2857060
    },
    {
      "epoch": 4.675674083384066,
      "grad_norm": 0.04278375953435898,
      "learning_rate": 6.529259437412282e-07,
      "loss": 0.0073,
      "step": 2857080
    },
    {
      "epoch": 4.6757068138227185,
      "grad_norm": 0.11754611134529114,
      "learning_rate": 6.52860051527711e-07,
      "loss": 0.0051,
      "step": 2857100
    },
    {
      "epoch": 4.675739544261372,
      "grad_norm": 0.13141246140003204,
      "learning_rate": 6.527941593141939e-07,
      "loss": 0.0102,
      "step": 2857120
    },
    {
      "epoch": 4.675772274700026,
      "grad_norm": 0.12965382635593414,
      "learning_rate": 6.527282671006768e-07,
      "loss": 0.0103,
      "step": 2857140
    },
    {
      "epoch": 4.675805005138679,
      "grad_norm": 0.0911824107170105,
      "learning_rate": 6.526623748871597e-07,
      "loss": 0.0191,
      "step": 2857160
    },
    {
      "epoch": 4.675837735577332,
      "grad_norm": 0.1680697351694107,
      "learning_rate": 6.525964826736425e-07,
      "loss": 0.007,
      "step": 2857180
    },
    {
      "epoch": 4.6758704660159855,
      "grad_norm": 0.13101817667484283,
      "learning_rate": 6.525305904601253e-07,
      "loss": 0.0114,
      "step": 2857200
    },
    {
      "epoch": 4.675903196454639,
      "grad_norm": 0.2564651668071747,
      "learning_rate": 6.524646982466082e-07,
      "loss": 0.0113,
      "step": 2857220
    },
    {
      "epoch": 4.675935926893292,
      "grad_norm": 0.30916714668273926,
      "learning_rate": 6.523988060330911e-07,
      "loss": 0.0081,
      "step": 2857240
    },
    {
      "epoch": 4.675968657331945,
      "grad_norm": 0.4703247845172882,
      "learning_rate": 6.52332913819574e-07,
      "loss": 0.0064,
      "step": 2857260
    },
    {
      "epoch": 4.676001387770599,
      "grad_norm": 0.12117281556129456,
      "learning_rate": 6.522670216060568e-07,
      "loss": 0.0114,
      "step": 2857280
    },
    {
      "epoch": 4.676034118209252,
      "grad_norm": 0.28652504086494446,
      "learning_rate": 6.522011293925397e-07,
      "loss": 0.007,
      "step": 2857300
    },
    {
      "epoch": 4.676066848647905,
      "grad_norm": 0.4147661030292511,
      "learning_rate": 6.521352371790226e-07,
      "loss": 0.0084,
      "step": 2857320
    },
    {
      "epoch": 4.676099579086559,
      "grad_norm": 0.11080945283174515,
      "learning_rate": 6.520693449655055e-07,
      "loss": 0.0085,
      "step": 2857340
    },
    {
      "epoch": 4.6761323095252125,
      "grad_norm": 0.38089415431022644,
      "learning_rate": 6.520034527519883e-07,
      "loss": 0.0098,
      "step": 2857360
    },
    {
      "epoch": 4.676165039963865,
      "grad_norm": 0.135267972946167,
      "learning_rate": 6.519375605384712e-07,
      "loss": 0.0099,
      "step": 2857380
    },
    {
      "epoch": 4.676197770402519,
      "grad_norm": 0.16531459987163544,
      "learning_rate": 6.518716683249541e-07,
      "loss": 0.0077,
      "step": 2857400
    },
    {
      "epoch": 4.676230500841172,
      "grad_norm": 0.32327377796173096,
      "learning_rate": 6.51805776111437e-07,
      "loss": 0.0093,
      "step": 2857420
    },
    {
      "epoch": 4.676263231279826,
      "grad_norm": 0.0906020775437355,
      "learning_rate": 6.517398838979198e-07,
      "loss": 0.0109,
      "step": 2857440
    },
    {
      "epoch": 4.676295961718479,
      "grad_norm": 0.7176901698112488,
      "learning_rate": 6.516739916844027e-07,
      "loss": 0.0105,
      "step": 2857460
    },
    {
      "epoch": 4.676328692157132,
      "grad_norm": 0.22195924818515778,
      "learning_rate": 6.516080994708855e-07,
      "loss": 0.0068,
      "step": 2857480
    },
    {
      "epoch": 4.676361422595786,
      "grad_norm": 0.08062024414539337,
      "learning_rate": 6.515422072573685e-07,
      "loss": 0.0063,
      "step": 2857500
    },
    {
      "epoch": 4.676394153034439,
      "grad_norm": 0.20319470763206482,
      "learning_rate": 6.514763150438513e-07,
      "loss": 0.0102,
      "step": 2857520
    },
    {
      "epoch": 4.676426883473092,
      "grad_norm": 0.341194748878479,
      "learning_rate": 6.514104228303341e-07,
      "loss": 0.009,
      "step": 2857540
    },
    {
      "epoch": 4.676459613911746,
      "grad_norm": 0.06649820506572723,
      "learning_rate": 6.51344530616817e-07,
      "loss": 0.0136,
      "step": 2857560
    },
    {
      "epoch": 4.6764923443503985,
      "grad_norm": 0.10536087304353714,
      "learning_rate": 6.512786384032999e-07,
      "loss": 0.0093,
      "step": 2857580
    },
    {
      "epoch": 4.676525074789052,
      "grad_norm": 0.1218496561050415,
      "learning_rate": 6.512127461897828e-07,
      "loss": 0.0092,
      "step": 2857600
    },
    {
      "epoch": 4.676557805227706,
      "grad_norm": 0.33905768394470215,
      "learning_rate": 6.511468539762656e-07,
      "loss": 0.0082,
      "step": 2857620
    },
    {
      "epoch": 4.676590535666359,
      "grad_norm": 0.17347154021263123,
      "learning_rate": 6.510809617627485e-07,
      "loss": 0.0069,
      "step": 2857640
    },
    {
      "epoch": 4.676623266105012,
      "grad_norm": 0.3459698259830475,
      "learning_rate": 6.510150695492313e-07,
      "loss": 0.0127,
      "step": 2857660
    },
    {
      "epoch": 4.676655996543666,
      "grad_norm": 0.4328261613845825,
      "learning_rate": 6.509491773357144e-07,
      "loss": 0.0111,
      "step": 2857680
    },
    {
      "epoch": 4.676688726982319,
      "grad_norm": 0.20591223239898682,
      "learning_rate": 6.508832851221971e-07,
      "loss": 0.0121,
      "step": 2857700
    },
    {
      "epoch": 4.676721457420973,
      "grad_norm": 0.22696051001548767,
      "learning_rate": 6.5081739290868e-07,
      "loss": 0.0097,
      "step": 2857720
    },
    {
      "epoch": 4.6767541878596255,
      "grad_norm": 0.09913438558578491,
      "learning_rate": 6.507515006951628e-07,
      "loss": 0.0076,
      "step": 2857740
    },
    {
      "epoch": 4.676786918298279,
      "grad_norm": 0.2243761569261551,
      "learning_rate": 6.506856084816459e-07,
      "loss": 0.0065,
      "step": 2857760
    },
    {
      "epoch": 4.676819648736933,
      "grad_norm": 0.27968525886535645,
      "learning_rate": 6.506197162681287e-07,
      "loss": 0.009,
      "step": 2857780
    },
    {
      "epoch": 4.676852379175585,
      "grad_norm": 0.25485190749168396,
      "learning_rate": 6.505538240546116e-07,
      "loss": 0.007,
      "step": 2857800
    },
    {
      "epoch": 4.676885109614239,
      "grad_norm": 0.5505574345588684,
      "learning_rate": 6.504879318410943e-07,
      "loss": 0.0072,
      "step": 2857820
    },
    {
      "epoch": 4.676917840052893,
      "grad_norm": 0.4033352732658386,
      "learning_rate": 6.504220396275774e-07,
      "loss": 0.0102,
      "step": 2857840
    },
    {
      "epoch": 4.676950570491545,
      "grad_norm": 0.62871253490448,
      "learning_rate": 6.503561474140602e-07,
      "loss": 0.0197,
      "step": 2857860
    },
    {
      "epoch": 4.676983300930199,
      "grad_norm": 0.35926494002342224,
      "learning_rate": 6.50290255200543e-07,
      "loss": 0.0076,
      "step": 2857880
    },
    {
      "epoch": 4.6770160313688525,
      "grad_norm": 0.7342106103897095,
      "learning_rate": 6.502243629870259e-07,
      "loss": 0.0123,
      "step": 2857900
    },
    {
      "epoch": 4.677048761807506,
      "grad_norm": 0.2394617646932602,
      "learning_rate": 6.501584707735086e-07,
      "loss": 0.013,
      "step": 2857920
    },
    {
      "epoch": 4.677081492246159,
      "grad_norm": 0.23038123548030853,
      "learning_rate": 6.500925785599917e-07,
      "loss": 0.01,
      "step": 2857940
    },
    {
      "epoch": 4.677114222684812,
      "grad_norm": 0.3042190372943878,
      "learning_rate": 6.500266863464745e-07,
      "loss": 0.0094,
      "step": 2857960
    },
    {
      "epoch": 4.677146953123466,
      "grad_norm": 0.1362687647342682,
      "learning_rate": 6.499607941329574e-07,
      "loss": 0.0132,
      "step": 2857980
    },
    {
      "epoch": 4.6771796835621195,
      "grad_norm": 0.1354917585849762,
      "learning_rate": 6.498949019194402e-07,
      "loss": 0.0065,
      "step": 2858000
    },
    {
      "epoch": 4.677212414000772,
      "grad_norm": 0.11149872839450836,
      "learning_rate": 6.498290097059232e-07,
      "loss": 0.0069,
      "step": 2858020
    },
    {
      "epoch": 4.677245144439426,
      "grad_norm": 0.13180382549762726,
      "learning_rate": 6.49763117492406e-07,
      "loss": 0.0071,
      "step": 2858040
    },
    {
      "epoch": 4.6772778748780794,
      "grad_norm": 0.11679449677467346,
      "learning_rate": 6.496972252788889e-07,
      "loss": 0.0114,
      "step": 2858060
    },
    {
      "epoch": 4.677310605316732,
      "grad_norm": 0.32677093148231506,
      "learning_rate": 6.496313330653717e-07,
      "loss": 0.0073,
      "step": 2858080
    },
    {
      "epoch": 4.677343335755386,
      "grad_norm": 0.08628237247467041,
      "learning_rate": 6.495654408518546e-07,
      "loss": 0.0092,
      "step": 2858100
    },
    {
      "epoch": 4.677376066194039,
      "grad_norm": 0.15973681211471558,
      "learning_rate": 6.494995486383375e-07,
      "loss": 0.0108,
      "step": 2858120
    },
    {
      "epoch": 4.677408796632692,
      "grad_norm": 0.3736589550971985,
      "learning_rate": 6.494336564248204e-07,
      "loss": 0.0112,
      "step": 2858140
    },
    {
      "epoch": 4.677441527071346,
      "grad_norm": 0.09548608213663101,
      "learning_rate": 6.493677642113032e-07,
      "loss": 0.0102,
      "step": 2858160
    },
    {
      "epoch": 4.677474257509999,
      "grad_norm": 0.1795346438884735,
      "learning_rate": 6.493018719977861e-07,
      "loss": 0.0057,
      "step": 2858180
    },
    {
      "epoch": 4.677506987948653,
      "grad_norm": 0.17413221299648285,
      "learning_rate": 6.49235979784269e-07,
      "loss": 0.0097,
      "step": 2858200
    },
    {
      "epoch": 4.6775397183873055,
      "grad_norm": 0.2432987093925476,
      "learning_rate": 6.491700875707519e-07,
      "loss": 0.0058,
      "step": 2858220
    },
    {
      "epoch": 4.677572448825959,
      "grad_norm": 0.10823315382003784,
      "learning_rate": 6.491041953572347e-07,
      "loss": 0.007,
      "step": 2858240
    },
    {
      "epoch": 4.677605179264613,
      "grad_norm": 0.06293651461601257,
      "learning_rate": 6.490383031437175e-07,
      "loss": 0.0071,
      "step": 2858260
    },
    {
      "epoch": 4.677637909703266,
      "grad_norm": 0.12626862525939941,
      "learning_rate": 6.489724109302005e-07,
      "loss": 0.0082,
      "step": 2858280
    },
    {
      "epoch": 4.677670640141919,
      "grad_norm": 0.1660589873790741,
      "learning_rate": 6.489065187166833e-07,
      "loss": 0.0082,
      "step": 2858300
    },
    {
      "epoch": 4.677703370580573,
      "grad_norm": 0.09722290933132172,
      "learning_rate": 6.488406265031662e-07,
      "loss": 0.0066,
      "step": 2858320
    },
    {
      "epoch": 4.677736101019226,
      "grad_norm": 0.11857781559228897,
      "learning_rate": 6.48774734289649e-07,
      "loss": 0.0058,
      "step": 2858340
    },
    {
      "epoch": 4.677768831457879,
      "grad_norm": 0.20162156224250793,
      "learning_rate": 6.487088420761319e-07,
      "loss": 0.0094,
      "step": 2858360
    },
    {
      "epoch": 4.6778015618965325,
      "grad_norm": 0.5353652238845825,
      "learning_rate": 6.486429498626148e-07,
      "loss": 0.0086,
      "step": 2858380
    },
    {
      "epoch": 4.677834292335186,
      "grad_norm": 0.20052598416805267,
      "learning_rate": 6.485770576490977e-07,
      "loss": 0.0137,
      "step": 2858400
    },
    {
      "epoch": 4.677867022773839,
      "grad_norm": 0.21204224228858948,
      "learning_rate": 6.485111654355805e-07,
      "loss": 0.0099,
      "step": 2858420
    },
    {
      "epoch": 4.677899753212492,
      "grad_norm": 0.23672913014888763,
      "learning_rate": 6.484452732220634e-07,
      "loss": 0.0092,
      "step": 2858440
    },
    {
      "epoch": 4.677932483651146,
      "grad_norm": 0.06758372485637665,
      "learning_rate": 6.483793810085463e-07,
      "loss": 0.0113,
      "step": 2858460
    },
    {
      "epoch": 4.6779652140898,
      "grad_norm": 0.6620960235595703,
      "learning_rate": 6.483134887950292e-07,
      "loss": 0.0061,
      "step": 2858480
    },
    {
      "epoch": 4.677997944528452,
      "grad_norm": 0.053009238094091415,
      "learning_rate": 6.48247596581512e-07,
      "loss": 0.009,
      "step": 2858500
    },
    {
      "epoch": 4.678030674967106,
      "grad_norm": 0.05717121809720993,
      "learning_rate": 6.481817043679949e-07,
      "loss": 0.0085,
      "step": 2858520
    },
    {
      "epoch": 4.6780634054057595,
      "grad_norm": 0.10753000527620316,
      "learning_rate": 6.481158121544777e-07,
      "loss": 0.0091,
      "step": 2858540
    },
    {
      "epoch": 4.678096135844413,
      "grad_norm": 0.33441999554634094,
      "learning_rate": 6.480499199409606e-07,
      "loss": 0.0092,
      "step": 2858560
    },
    {
      "epoch": 4.678128866283066,
      "grad_norm": 0.11009557545185089,
      "learning_rate": 6.479840277274435e-07,
      "loss": 0.0105,
      "step": 2858580
    },
    {
      "epoch": 4.678161596721719,
      "grad_norm": 0.37367841601371765,
      "learning_rate": 6.479181355139263e-07,
      "loss": 0.0117,
      "step": 2858600
    },
    {
      "epoch": 4.678194327160373,
      "grad_norm": 0.11247435212135315,
      "learning_rate": 6.478522433004092e-07,
      "loss": 0.0095,
      "step": 2858620
    },
    {
      "epoch": 4.678227057599026,
      "grad_norm": 0.13662034273147583,
      "learning_rate": 6.477863510868921e-07,
      "loss": 0.0115,
      "step": 2858640
    },
    {
      "epoch": 4.678259788037679,
      "grad_norm": 0.2428424060344696,
      "learning_rate": 6.47720458873375e-07,
      "loss": 0.0101,
      "step": 2858660
    },
    {
      "epoch": 4.678292518476333,
      "grad_norm": 0.4384848475456238,
      "learning_rate": 6.476545666598578e-07,
      "loss": 0.0089,
      "step": 2858680
    },
    {
      "epoch": 4.678325248914986,
      "grad_norm": 0.08791298419237137,
      "learning_rate": 6.475886744463407e-07,
      "loss": 0.0079,
      "step": 2858700
    },
    {
      "epoch": 4.678357979353639,
      "grad_norm": 0.11403415352106094,
      "learning_rate": 6.475227822328236e-07,
      "loss": 0.0058,
      "step": 2858720
    },
    {
      "epoch": 4.678390709792293,
      "grad_norm": 0.41354310512542725,
      "learning_rate": 6.474568900193065e-07,
      "loss": 0.011,
      "step": 2858740
    },
    {
      "epoch": 4.678423440230946,
      "grad_norm": 0.28506380319595337,
      "learning_rate": 6.473909978057893e-07,
      "loss": 0.0074,
      "step": 2858760
    },
    {
      "epoch": 4.678456170669599,
      "grad_norm": 0.10998164117336273,
      "learning_rate": 6.473251055922722e-07,
      "loss": 0.0125,
      "step": 2858780
    },
    {
      "epoch": 4.678488901108253,
      "grad_norm": 0.26825249195098877,
      "learning_rate": 6.47259213378755e-07,
      "loss": 0.0079,
      "step": 2858800
    },
    {
      "epoch": 4.678521631546906,
      "grad_norm": 0.19879010319709778,
      "learning_rate": 6.47193321165238e-07,
      "loss": 0.0049,
      "step": 2858820
    },
    {
      "epoch": 4.678554361985559,
      "grad_norm": 0.351227730512619,
      "learning_rate": 6.471274289517208e-07,
      "loss": 0.0061,
      "step": 2858840
    },
    {
      "epoch": 4.678587092424213,
      "grad_norm": 0.5366283059120178,
      "learning_rate": 6.470615367382037e-07,
      "loss": 0.0088,
      "step": 2858860
    },
    {
      "epoch": 4.678619822862866,
      "grad_norm": 0.2302703559398651,
      "learning_rate": 6.469956445246865e-07,
      "loss": 0.0055,
      "step": 2858880
    },
    {
      "epoch": 4.67865255330152,
      "grad_norm": 0.1441214680671692,
      "learning_rate": 6.469297523111694e-07,
      "loss": 0.0061,
      "step": 2858900
    },
    {
      "epoch": 4.6786852837401725,
      "grad_norm": 0.5120086073875427,
      "learning_rate": 6.468638600976523e-07,
      "loss": 0.0118,
      "step": 2858920
    },
    {
      "epoch": 4.678718014178826,
      "grad_norm": 0.2926185429096222,
      "learning_rate": 6.467979678841351e-07,
      "loss": 0.0081,
      "step": 2858940
    },
    {
      "epoch": 4.67875074461748,
      "grad_norm": 0.36781907081604004,
      "learning_rate": 6.46732075670618e-07,
      "loss": 0.012,
      "step": 2858960
    },
    {
      "epoch": 4.678783475056132,
      "grad_norm": 0.09710243344306946,
      "learning_rate": 6.466661834571008e-07,
      "loss": 0.0085,
      "step": 2858980
    },
    {
      "epoch": 4.678816205494786,
      "grad_norm": 0.0915462076663971,
      "learning_rate": 6.466002912435839e-07,
      "loss": 0.0073,
      "step": 2859000
    },
    {
      "epoch": 4.6788489359334395,
      "grad_norm": 0.14563168585300446,
      "learning_rate": 6.465343990300666e-07,
      "loss": 0.0101,
      "step": 2859020
    },
    {
      "epoch": 4.678881666372092,
      "grad_norm": 0.13244900107383728,
      "learning_rate": 6.464685068165495e-07,
      "loss": 0.0081,
      "step": 2859040
    },
    {
      "epoch": 4.678914396810746,
      "grad_norm": 0.25302809476852417,
      "learning_rate": 6.464026146030323e-07,
      "loss": 0.0073,
      "step": 2859060
    },
    {
      "epoch": 4.678947127249399,
      "grad_norm": 0.1739465594291687,
      "learning_rate": 6.463367223895154e-07,
      "loss": 0.0117,
      "step": 2859080
    },
    {
      "epoch": 4.678979857688053,
      "grad_norm": 0.341572642326355,
      "learning_rate": 6.462708301759982e-07,
      "loss": 0.0068,
      "step": 2859100
    },
    {
      "epoch": 4.679012588126706,
      "grad_norm": 0.11826424300670624,
      "learning_rate": 6.462049379624811e-07,
      "loss": 0.0102,
      "step": 2859120
    },
    {
      "epoch": 4.679045318565359,
      "grad_norm": 0.11697059869766235,
      "learning_rate": 6.461390457489638e-07,
      "loss": 0.0112,
      "step": 2859140
    },
    {
      "epoch": 4.679078049004013,
      "grad_norm": 0.15642474591732025,
      "learning_rate": 6.460731535354469e-07,
      "loss": 0.0073,
      "step": 2859160
    },
    {
      "epoch": 4.6791107794426665,
      "grad_norm": 0.481663316488266,
      "learning_rate": 6.460072613219297e-07,
      "loss": 0.0082,
      "step": 2859180
    },
    {
      "epoch": 4.679143509881319,
      "grad_norm": 0.14506825804710388,
      "learning_rate": 6.459413691084126e-07,
      "loss": 0.0084,
      "step": 2859200
    },
    {
      "epoch": 4.679176240319973,
      "grad_norm": 0.12883228063583374,
      "learning_rate": 6.458754768948954e-07,
      "loss": 0.0082,
      "step": 2859220
    },
    {
      "epoch": 4.679208970758626,
      "grad_norm": 0.1479421705007553,
      "learning_rate": 6.458095846813781e-07,
      "loss": 0.0115,
      "step": 2859240
    },
    {
      "epoch": 4.679241701197279,
      "grad_norm": 0.2826393246650696,
      "learning_rate": 6.457436924678612e-07,
      "loss": 0.0105,
      "step": 2859260
    },
    {
      "epoch": 4.679274431635933,
      "grad_norm": 0.04653886705636978,
      "learning_rate": 6.45677800254344e-07,
      "loss": 0.0068,
      "step": 2859280
    },
    {
      "epoch": 4.679307162074586,
      "grad_norm": 0.17127825319766998,
      "learning_rate": 6.456119080408269e-07,
      "loss": 0.0075,
      "step": 2859300
    },
    {
      "epoch": 4.679339892513239,
      "grad_norm": 0.11343280225992203,
      "learning_rate": 6.455460158273097e-07,
      "loss": 0.0105,
      "step": 2859320
    },
    {
      "epoch": 4.679372622951893,
      "grad_norm": 0.19526346027851105,
      "learning_rate": 6.454801236137927e-07,
      "loss": 0.009,
      "step": 2859340
    },
    {
      "epoch": 4.679405353390546,
      "grad_norm": 0.2135026454925537,
      "learning_rate": 6.454142314002755e-07,
      "loss": 0.0058,
      "step": 2859360
    },
    {
      "epoch": 4.6794380838292,
      "grad_norm": 0.8464729189872742,
      "learning_rate": 6.453483391867584e-07,
      "loss": 0.0074,
      "step": 2859380
    },
    {
      "epoch": 4.6794708142678525,
      "grad_norm": 0.21169008314609528,
      "learning_rate": 6.452824469732412e-07,
      "loss": 0.0076,
      "step": 2859400
    },
    {
      "epoch": 4.679503544706506,
      "grad_norm": 0.1339852511882782,
      "learning_rate": 6.452165547597241e-07,
      "loss": 0.0055,
      "step": 2859420
    },
    {
      "epoch": 4.67953627514516,
      "grad_norm": 0.22971715033054352,
      "learning_rate": 6.45150662546207e-07,
      "loss": 0.0074,
      "step": 2859440
    },
    {
      "epoch": 4.679569005583813,
      "grad_norm": 0.18444857001304626,
      "learning_rate": 6.450847703326899e-07,
      "loss": 0.0093,
      "step": 2859460
    },
    {
      "epoch": 4.679601736022466,
      "grad_norm": 0.9717901945114136,
      "learning_rate": 6.450188781191727e-07,
      "loss": 0.0125,
      "step": 2859480
    },
    {
      "epoch": 4.67963446646112,
      "grad_norm": 0.4197535514831543,
      "learning_rate": 6.449529859056556e-07,
      "loss": 0.0076,
      "step": 2859500
    },
    {
      "epoch": 4.679667196899773,
      "grad_norm": 0.6056621074676514,
      "learning_rate": 6.448870936921385e-07,
      "loss": 0.0068,
      "step": 2859520
    },
    {
      "epoch": 4.679699927338426,
      "grad_norm": 0.036173511296510696,
      "learning_rate": 6.448212014786214e-07,
      "loss": 0.0094,
      "step": 2859540
    },
    {
      "epoch": 4.6797326577770795,
      "grad_norm": 0.10602259635925293,
      "learning_rate": 6.447553092651042e-07,
      "loss": 0.0076,
      "step": 2859560
    },
    {
      "epoch": 4.679765388215733,
      "grad_norm": 0.7816535234451294,
      "learning_rate": 6.44689417051587e-07,
      "loss": 0.0099,
      "step": 2859580
    },
    {
      "epoch": 4.679798118654386,
      "grad_norm": 0.3123925030231476,
      "learning_rate": 6.4462352483807e-07,
      "loss": 0.0101,
      "step": 2859600
    },
    {
      "epoch": 4.679830849093039,
      "grad_norm": 0.0805009976029396,
      "learning_rate": 6.445576326245528e-07,
      "loss": 0.0091,
      "step": 2859620
    },
    {
      "epoch": 4.679863579531693,
      "grad_norm": 0.4481775462627411,
      "learning_rate": 6.444917404110357e-07,
      "loss": 0.0098,
      "step": 2859640
    },
    {
      "epoch": 4.679896309970347,
      "grad_norm": 0.3531368672847748,
      "learning_rate": 6.444258481975185e-07,
      "loss": 0.0116,
      "step": 2859660
    },
    {
      "epoch": 4.679929040408999,
      "grad_norm": 0.37254950404167175,
      "learning_rate": 6.443599559840014e-07,
      "loss": 0.0073,
      "step": 2859680
    },
    {
      "epoch": 4.679961770847653,
      "grad_norm": 0.2012796550989151,
      "learning_rate": 6.442940637704843e-07,
      "loss": 0.0119,
      "step": 2859700
    },
    {
      "epoch": 4.6799945012863065,
      "grad_norm": 0.16364777088165283,
      "learning_rate": 6.442281715569672e-07,
      "loss": 0.0063,
      "step": 2859720
    },
    {
      "epoch": 4.68002723172496,
      "grad_norm": 0.4114892780780792,
      "learning_rate": 6.4416227934345e-07,
      "loss": 0.0081,
      "step": 2859740
    },
    {
      "epoch": 4.680059962163613,
      "grad_norm": 0.15895827114582062,
      "learning_rate": 6.440963871299329e-07,
      "loss": 0.0064,
      "step": 2859760
    },
    {
      "epoch": 4.680092692602266,
      "grad_norm": 0.16745169460773468,
      "learning_rate": 6.440304949164158e-07,
      "loss": 0.007,
      "step": 2859780
    },
    {
      "epoch": 4.68012542304092,
      "grad_norm": 0.5195353627204895,
      "learning_rate": 6.439646027028987e-07,
      "loss": 0.0057,
      "step": 2859800
    },
    {
      "epoch": 4.680158153479573,
      "grad_norm": 0.1457633525133133,
      "learning_rate": 6.438987104893815e-07,
      "loss": 0.0075,
      "step": 2859820
    },
    {
      "epoch": 4.680190883918226,
      "grad_norm": 0.3172648549079895,
      "learning_rate": 6.438328182758644e-07,
      "loss": 0.0087,
      "step": 2859840
    },
    {
      "epoch": 4.68022361435688,
      "grad_norm": 0.5860899686813354,
      "learning_rate": 6.437669260623472e-07,
      "loss": 0.014,
      "step": 2859860
    },
    {
      "epoch": 4.6802563447955325,
      "grad_norm": 0.1895585060119629,
      "learning_rate": 6.437010338488302e-07,
      "loss": 0.0091,
      "step": 2859880
    },
    {
      "epoch": 4.680289075234186,
      "grad_norm": 0.22528821229934692,
      "learning_rate": 6.43635141635313e-07,
      "loss": 0.0079,
      "step": 2859900
    },
    {
      "epoch": 4.68032180567284,
      "grad_norm": 0.2229175567626953,
      "learning_rate": 6.435692494217958e-07,
      "loss": 0.0084,
      "step": 2859920
    },
    {
      "epoch": 4.680354536111493,
      "grad_norm": 0.37183892726898193,
      "learning_rate": 6.435033572082787e-07,
      "loss": 0.0089,
      "step": 2859940
    },
    {
      "epoch": 4.680387266550146,
      "grad_norm": 0.23207855224609375,
      "learning_rate": 6.434374649947616e-07,
      "loss": 0.0112,
      "step": 2859960
    },
    {
      "epoch": 4.6804199969888,
      "grad_norm": 0.37205249071121216,
      "learning_rate": 6.433715727812445e-07,
      "loss": 0.0123,
      "step": 2859980
    },
    {
      "epoch": 4.680452727427453,
      "grad_norm": 0.3336389362812042,
      "learning_rate": 6.433056805677273e-07,
      "loss": 0.0089,
      "step": 2860000
    },
    {
      "epoch": 4.680485457866107,
      "grad_norm": 0.27656373381614685,
      "learning_rate": 6.432397883542102e-07,
      "loss": 0.007,
      "step": 2860020
    },
    {
      "epoch": 4.6805181883047595,
      "grad_norm": 0.4032841920852661,
      "learning_rate": 6.431738961406931e-07,
      "loss": 0.0077,
      "step": 2860040
    },
    {
      "epoch": 4.680550918743413,
      "grad_norm": 0.2988932132720947,
      "learning_rate": 6.43108003927176e-07,
      "loss": 0.0066,
      "step": 2860060
    },
    {
      "epoch": 4.680583649182067,
      "grad_norm": 0.36923274397850037,
      "learning_rate": 6.430421117136588e-07,
      "loss": 0.0113,
      "step": 2860080
    },
    {
      "epoch": 4.680616379620719,
      "grad_norm": 0.11243940144777298,
      "learning_rate": 6.429762195001417e-07,
      "loss": 0.0064,
      "step": 2860100
    },
    {
      "epoch": 4.680649110059373,
      "grad_norm": 0.46076229214668274,
      "learning_rate": 6.429103272866245e-07,
      "loss": 0.0084,
      "step": 2860120
    },
    {
      "epoch": 4.680681840498027,
      "grad_norm": 0.5752364993095398,
      "learning_rate": 6.428444350731075e-07,
      "loss": 0.0109,
      "step": 2860140
    },
    {
      "epoch": 4.680714570936679,
      "grad_norm": 0.15534614026546478,
      "learning_rate": 6.427785428595903e-07,
      "loss": 0.0108,
      "step": 2860160
    },
    {
      "epoch": 4.680747301375333,
      "grad_norm": 0.23310938477516174,
      "learning_rate": 6.427126506460732e-07,
      "loss": 0.0087,
      "step": 2860180
    },
    {
      "epoch": 4.6807800318139865,
      "grad_norm": 0.30191147327423096,
      "learning_rate": 6.42646758432556e-07,
      "loss": 0.0092,
      "step": 2860200
    },
    {
      "epoch": 4.68081276225264,
      "grad_norm": 0.08729063719511032,
      "learning_rate": 6.425808662190391e-07,
      "loss": 0.009,
      "step": 2860220
    },
    {
      "epoch": 4.680845492691293,
      "grad_norm": 0.337910920381546,
      "learning_rate": 6.425149740055218e-07,
      "loss": 0.0069,
      "step": 2860240
    },
    {
      "epoch": 4.680878223129946,
      "grad_norm": 0.13756349682807922,
      "learning_rate": 6.424490817920046e-07,
      "loss": 0.0167,
      "step": 2860260
    },
    {
      "epoch": 4.6809109535686,
      "grad_norm": 0.2449144423007965,
      "learning_rate": 6.423831895784875e-07,
      "loss": 0.0068,
      "step": 2860280
    },
    {
      "epoch": 4.680943684007253,
      "grad_norm": 0.33498522639274597,
      "learning_rate": 6.423172973649703e-07,
      "loss": 0.0077,
      "step": 2860300
    },
    {
      "epoch": 4.680976414445906,
      "grad_norm": 0.3043404817581177,
      "learning_rate": 6.422514051514534e-07,
      "loss": 0.0046,
      "step": 2860320
    },
    {
      "epoch": 4.68100914488456,
      "grad_norm": 0.26897192001342773,
      "learning_rate": 6.421855129379361e-07,
      "loss": 0.007,
      "step": 2860340
    },
    {
      "epoch": 4.6810418753232135,
      "grad_norm": 0.1409844160079956,
      "learning_rate": 6.42119620724419e-07,
      "loss": 0.008,
      "step": 2860360
    },
    {
      "epoch": 4.681074605761866,
      "grad_norm": 0.30959901213645935,
      "learning_rate": 6.420537285109018e-07,
      "loss": 0.0079,
      "step": 2860380
    },
    {
      "epoch": 4.68110733620052,
      "grad_norm": 0.58678138256073,
      "learning_rate": 6.419878362973849e-07,
      "loss": 0.0059,
      "step": 2860400
    },
    {
      "epoch": 4.681140066639173,
      "grad_norm": 0.14000006020069122,
      "learning_rate": 6.419219440838677e-07,
      "loss": 0.0079,
      "step": 2860420
    },
    {
      "epoch": 4.681172797077826,
      "grad_norm": 0.10277509689331055,
      "learning_rate": 6.418560518703506e-07,
      "loss": 0.0087,
      "step": 2860440
    },
    {
      "epoch": 4.68120552751648,
      "grad_norm": 0.12306158244609833,
      "learning_rate": 6.417901596568333e-07,
      "loss": 0.0077,
      "step": 2860460
    },
    {
      "epoch": 4.681238257955133,
      "grad_norm": 0.6759548783302307,
      "learning_rate": 6.417242674433164e-07,
      "loss": 0.0085,
      "step": 2860480
    },
    {
      "epoch": 4.681270988393786,
      "grad_norm": 0.11063963174819946,
      "learning_rate": 6.416583752297992e-07,
      "loss": 0.0059,
      "step": 2860500
    },
    {
      "epoch": 4.68130371883244,
      "grad_norm": 0.10390666872262955,
      "learning_rate": 6.415924830162821e-07,
      "loss": 0.0085,
      "step": 2860520
    },
    {
      "epoch": 4.681336449271093,
      "grad_norm": 0.04990300536155701,
      "learning_rate": 6.415265908027649e-07,
      "loss": 0.0089,
      "step": 2860540
    },
    {
      "epoch": 4.681369179709747,
      "grad_norm": 0.10012544691562653,
      "learning_rate": 6.414606985892476e-07,
      "loss": 0.0082,
      "step": 2860560
    },
    {
      "epoch": 4.6814019101483995,
      "grad_norm": 0.22865226864814758,
      "learning_rate": 6.413948063757307e-07,
      "loss": 0.0111,
      "step": 2860580
    },
    {
      "epoch": 4.681434640587053,
      "grad_norm": 0.11030419170856476,
      "learning_rate": 6.413289141622135e-07,
      "loss": 0.0081,
      "step": 2860600
    },
    {
      "epoch": 4.681467371025707,
      "grad_norm": 0.4817741811275482,
      "learning_rate": 6.412630219486964e-07,
      "loss": 0.0091,
      "step": 2860620
    },
    {
      "epoch": 4.68150010146436,
      "grad_norm": 0.22964642941951752,
      "learning_rate": 6.411971297351792e-07,
      "loss": 0.0083,
      "step": 2860640
    },
    {
      "epoch": 4.681532831903013,
      "grad_norm": 0.430641770362854,
      "learning_rate": 6.411312375216622e-07,
      "loss": 0.01,
      "step": 2860660
    },
    {
      "epoch": 4.6815655623416665,
      "grad_norm": 0.8681677579879761,
      "learning_rate": 6.41065345308145e-07,
      "loss": 0.0073,
      "step": 2860680
    },
    {
      "epoch": 4.68159829278032,
      "grad_norm": 0.23708488047122955,
      "learning_rate": 6.409994530946279e-07,
      "loss": 0.0099,
      "step": 2860700
    },
    {
      "epoch": 4.681631023218973,
      "grad_norm": 0.13660840690135956,
      "learning_rate": 6.409335608811107e-07,
      "loss": 0.0098,
      "step": 2860720
    },
    {
      "epoch": 4.681663753657626,
      "grad_norm": 0.1282363384962082,
      "learning_rate": 6.408676686675936e-07,
      "loss": 0.0094,
      "step": 2860740
    },
    {
      "epoch": 4.68169648409628,
      "grad_norm": 0.13979217410087585,
      "learning_rate": 6.408017764540765e-07,
      "loss": 0.008,
      "step": 2860760
    },
    {
      "epoch": 4.681729214534933,
      "grad_norm": 0.18852365016937256,
      "learning_rate": 6.407358842405594e-07,
      "loss": 0.0092,
      "step": 2860780
    },
    {
      "epoch": 4.681761944973586,
      "grad_norm": 0.23892587423324585,
      "learning_rate": 6.406699920270422e-07,
      "loss": 0.0111,
      "step": 2860800
    },
    {
      "epoch": 4.68179467541224,
      "grad_norm": 0.11345665156841278,
      "learning_rate": 6.406040998135251e-07,
      "loss": 0.0088,
      "step": 2860820
    },
    {
      "epoch": 4.6818274058508935,
      "grad_norm": 0.22166499495506287,
      "learning_rate": 6.40538207600008e-07,
      "loss": 0.013,
      "step": 2860840
    },
    {
      "epoch": 4.681860136289546,
      "grad_norm": 0.13932867348194122,
      "learning_rate": 6.404723153864909e-07,
      "loss": 0.0124,
      "step": 2860860
    },
    {
      "epoch": 4.6818928667282,
      "grad_norm": 0.3834681510925293,
      "learning_rate": 6.404064231729737e-07,
      "loss": 0.0063,
      "step": 2860880
    },
    {
      "epoch": 4.681925597166853,
      "grad_norm": 0.3614981472492218,
      "learning_rate": 6.403405309594566e-07,
      "loss": 0.0098,
      "step": 2860900
    },
    {
      "epoch": 4.681958327605507,
      "grad_norm": 0.1674429029226303,
      "learning_rate": 6.402746387459395e-07,
      "loss": 0.0122,
      "step": 2860920
    },
    {
      "epoch": 4.68199105804416,
      "grad_norm": 0.11029510945081711,
      "learning_rate": 6.402087465324223e-07,
      "loss": 0.0085,
      "step": 2860940
    },
    {
      "epoch": 4.682023788482813,
      "grad_norm": 0.09684499353170395,
      "learning_rate": 6.401428543189052e-07,
      "loss": 0.0101,
      "step": 2860960
    },
    {
      "epoch": 4.682056518921467,
      "grad_norm": 0.3964356780052185,
      "learning_rate": 6.40076962105388e-07,
      "loss": 0.007,
      "step": 2860980
    },
    {
      "epoch": 4.68208924936012,
      "grad_norm": 0.26930153369903564,
      "learning_rate": 6.400110698918709e-07,
      "loss": 0.0075,
      "step": 2861000
    },
    {
      "epoch": 4.682121979798773,
      "grad_norm": 0.3589152693748474,
      "learning_rate": 6.399451776783538e-07,
      "loss": 0.0086,
      "step": 2861020
    },
    {
      "epoch": 4.682154710237427,
      "grad_norm": 0.08939936757087708,
      "learning_rate": 6.398792854648367e-07,
      "loss": 0.009,
      "step": 2861040
    },
    {
      "epoch": 4.6821874406760795,
      "grad_norm": 0.13179481029510498,
      "learning_rate": 6.398133932513195e-07,
      "loss": 0.0119,
      "step": 2861060
    },
    {
      "epoch": 4.682220171114733,
      "grad_norm": 0.250707745552063,
      "learning_rate": 6.397475010378024e-07,
      "loss": 0.0094,
      "step": 2861080
    },
    {
      "epoch": 4.682252901553387,
      "grad_norm": 0.3370344042778015,
      "learning_rate": 6.396816088242853e-07,
      "loss": 0.0065,
      "step": 2861100
    },
    {
      "epoch": 4.68228563199204,
      "grad_norm": 0.1001051515340805,
      "learning_rate": 6.396157166107682e-07,
      "loss": 0.0089,
      "step": 2861120
    },
    {
      "epoch": 4.682318362430693,
      "grad_norm": 0.3924318552017212,
      "learning_rate": 6.39549824397251e-07,
      "loss": 0.0142,
      "step": 2861140
    },
    {
      "epoch": 4.682351092869347,
      "grad_norm": 0.2605344355106354,
      "learning_rate": 6.394839321837339e-07,
      "loss": 0.0084,
      "step": 2861160
    },
    {
      "epoch": 4.682383823308,
      "grad_norm": 0.06799934059381485,
      "learning_rate": 6.394180399702167e-07,
      "loss": 0.0098,
      "step": 2861180
    },
    {
      "epoch": 4.682416553746654,
      "grad_norm": 0.46214571595191956,
      "learning_rate": 6.393521477566997e-07,
      "loss": 0.0065,
      "step": 2861200
    },
    {
      "epoch": 4.6824492841853065,
      "grad_norm": 0.4733525812625885,
      "learning_rate": 6.392862555431825e-07,
      "loss": 0.0116,
      "step": 2861220
    },
    {
      "epoch": 4.68248201462396,
      "grad_norm": 0.18349212408065796,
      "learning_rate": 6.392203633296654e-07,
      "loss": 0.0069,
      "step": 2861240
    },
    {
      "epoch": 4.682514745062614,
      "grad_norm": 0.25565338134765625,
      "learning_rate": 6.391544711161482e-07,
      "loss": 0.0074,
      "step": 2861260
    },
    {
      "epoch": 4.682547475501266,
      "grad_norm": 0.3500077724456787,
      "learning_rate": 6.390885789026311e-07,
      "loss": 0.0082,
      "step": 2861280
    },
    {
      "epoch": 4.68258020593992,
      "grad_norm": 0.30310261249542236,
      "learning_rate": 6.39022686689114e-07,
      "loss": 0.0116,
      "step": 2861300
    },
    {
      "epoch": 4.682612936378574,
      "grad_norm": 0.1709805577993393,
      "learning_rate": 6.389567944755968e-07,
      "loss": 0.0083,
      "step": 2861320
    },
    {
      "epoch": 4.682645666817226,
      "grad_norm": 0.15466614067554474,
      "learning_rate": 6.388909022620797e-07,
      "loss": 0.0074,
      "step": 2861340
    },
    {
      "epoch": 4.68267839725588,
      "grad_norm": 0.18044306337833405,
      "learning_rate": 6.388250100485626e-07,
      "loss": 0.0086,
      "step": 2861360
    },
    {
      "epoch": 4.6827111276945335,
      "grad_norm": 0.16586390137672424,
      "learning_rate": 6.387591178350455e-07,
      "loss": 0.0101,
      "step": 2861380
    },
    {
      "epoch": 4.682743858133187,
      "grad_norm": 0.4946339428424835,
      "learning_rate": 6.386932256215283e-07,
      "loss": 0.0128,
      "step": 2861400
    },
    {
      "epoch": 4.68277658857184,
      "grad_norm": 0.18732523918151855,
      "learning_rate": 6.386273334080112e-07,
      "loss": 0.0082,
      "step": 2861420
    },
    {
      "epoch": 4.682809319010493,
      "grad_norm": 0.12081001698970795,
      "learning_rate": 6.38561441194494e-07,
      "loss": 0.0084,
      "step": 2861440
    },
    {
      "epoch": 4.682842049449147,
      "grad_norm": 0.19376911222934723,
      "learning_rate": 6.38495548980977e-07,
      "loss": 0.0042,
      "step": 2861460
    },
    {
      "epoch": 4.6828747798878005,
      "grad_norm": 0.11970312148332596,
      "learning_rate": 6.384296567674598e-07,
      "loss": 0.0105,
      "step": 2861480
    },
    {
      "epoch": 4.682907510326453,
      "grad_norm": 0.1734791398048401,
      "learning_rate": 6.383637645539427e-07,
      "loss": 0.0094,
      "step": 2861500
    },
    {
      "epoch": 4.682940240765107,
      "grad_norm": 0.3087301254272461,
      "learning_rate": 6.382978723404255e-07,
      "loss": 0.0059,
      "step": 2861520
    },
    {
      "epoch": 4.68297297120376,
      "grad_norm": 0.18819333612918854,
      "learning_rate": 6.382319801269086e-07,
      "loss": 0.0102,
      "step": 2861540
    },
    {
      "epoch": 4.683005701642413,
      "grad_norm": 0.4178875982761383,
      "learning_rate": 6.381660879133913e-07,
      "loss": 0.0103,
      "step": 2861560
    },
    {
      "epoch": 4.683038432081067,
      "grad_norm": 0.24196791648864746,
      "learning_rate": 6.381001956998742e-07,
      "loss": 0.009,
      "step": 2861580
    },
    {
      "epoch": 4.68307116251972,
      "grad_norm": 0.06623777747154236,
      "learning_rate": 6.38034303486357e-07,
      "loss": 0.0072,
      "step": 2861600
    },
    {
      "epoch": 4.683103892958373,
      "grad_norm": 0.25108879804611206,
      "learning_rate": 6.379684112728398e-07,
      "loss": 0.0102,
      "step": 2861620
    },
    {
      "epoch": 4.683136623397027,
      "grad_norm": 0.16529612243175507,
      "learning_rate": 6.379025190593229e-07,
      "loss": 0.0113,
      "step": 2861640
    },
    {
      "epoch": 4.68316935383568,
      "grad_norm": 0.4840613007545471,
      "learning_rate": 6.378366268458056e-07,
      "loss": 0.0084,
      "step": 2861660
    },
    {
      "epoch": 4.683202084274334,
      "grad_norm": 0.4539400637149811,
      "learning_rate": 6.377707346322885e-07,
      "loss": 0.0097,
      "step": 2861680
    },
    {
      "epoch": 4.6832348147129865,
      "grad_norm": 0.076900415122509,
      "learning_rate": 6.377048424187713e-07,
      "loss": 0.009,
      "step": 2861700
    },
    {
      "epoch": 4.68326754515164,
      "grad_norm": 0.4196835458278656,
      "learning_rate": 6.376389502052544e-07,
      "loss": 0.008,
      "step": 2861720
    },
    {
      "epoch": 4.683300275590294,
      "grad_norm": 0.2657935619354248,
      "learning_rate": 6.375730579917372e-07,
      "loss": 0.0106,
      "step": 2861740
    },
    {
      "epoch": 4.683333006028946,
      "grad_norm": 0.16128304600715637,
      "learning_rate": 6.375071657782201e-07,
      "loss": 0.0125,
      "step": 2861760
    },
    {
      "epoch": 4.6833657364676,
      "grad_norm": 0.21844463050365448,
      "learning_rate": 6.374412735647028e-07,
      "loss": 0.0094,
      "step": 2861780
    },
    {
      "epoch": 4.683398466906254,
      "grad_norm": 0.1696626991033554,
      "learning_rate": 6.373753813511859e-07,
      "loss": 0.0081,
      "step": 2861800
    },
    {
      "epoch": 4.683431197344907,
      "grad_norm": 0.3288632929325104,
      "learning_rate": 6.373094891376687e-07,
      "loss": 0.0087,
      "step": 2861820
    },
    {
      "epoch": 4.68346392778356,
      "grad_norm": 0.10244308412075043,
      "learning_rate": 6.372435969241516e-07,
      "loss": 0.0056,
      "step": 2861840
    },
    {
      "epoch": 4.6834966582222135,
      "grad_norm": 0.2854279577732086,
      "learning_rate": 6.371777047106344e-07,
      "loss": 0.0138,
      "step": 2861860
    },
    {
      "epoch": 4.683529388660867,
      "grad_norm": 0.17434921860694885,
      "learning_rate": 6.371118124971172e-07,
      "loss": 0.009,
      "step": 2861880
    },
    {
      "epoch": 4.68356211909952,
      "grad_norm": 0.02999500185251236,
      "learning_rate": 6.370459202836002e-07,
      "loss": 0.0068,
      "step": 2861900
    },
    {
      "epoch": 4.683594849538173,
      "grad_norm": 0.24119937419891357,
      "learning_rate": 6.369800280700831e-07,
      "loss": 0.0094,
      "step": 2861920
    },
    {
      "epoch": 4.683627579976827,
      "grad_norm": 0.6159149408340454,
      "learning_rate": 6.369141358565659e-07,
      "loss": 0.0107,
      "step": 2861940
    },
    {
      "epoch": 4.68366031041548,
      "grad_norm": 0.05846262723207474,
      "learning_rate": 6.368482436430487e-07,
      "loss": 0.0072,
      "step": 2861960
    },
    {
      "epoch": 4.683693040854133,
      "grad_norm": 0.17440061271190643,
      "learning_rate": 6.367823514295317e-07,
      "loss": 0.006,
      "step": 2861980
    },
    {
      "epoch": 4.683725771292787,
      "grad_norm": 0.4564395844936371,
      "learning_rate": 6.367164592160145e-07,
      "loss": 0.0095,
      "step": 2862000
    },
    {
      "epoch": 4.6837585017314405,
      "grad_norm": 0.2921714186668396,
      "learning_rate": 6.366505670024974e-07,
      "loss": 0.0074,
      "step": 2862020
    },
    {
      "epoch": 4.683791232170093,
      "grad_norm": 0.1732223778963089,
      "learning_rate": 6.365846747889802e-07,
      "loss": 0.0073,
      "step": 2862040
    },
    {
      "epoch": 4.683823962608747,
      "grad_norm": 0.3691983222961426,
      "learning_rate": 6.365187825754632e-07,
      "loss": 0.014,
      "step": 2862060
    },
    {
      "epoch": 4.6838566930474,
      "grad_norm": 0.3374065160751343,
      "learning_rate": 6.36452890361946e-07,
      "loss": 0.009,
      "step": 2862080
    },
    {
      "epoch": 4.683889423486054,
      "grad_norm": 0.14119280874729156,
      "learning_rate": 6.363869981484289e-07,
      "loss": 0.0087,
      "step": 2862100
    },
    {
      "epoch": 4.683922153924707,
      "grad_norm": 0.07603226602077484,
      "learning_rate": 6.363211059349117e-07,
      "loss": 0.0091,
      "step": 2862120
    },
    {
      "epoch": 4.68395488436336,
      "grad_norm": 0.143312469124794,
      "learning_rate": 6.362552137213946e-07,
      "loss": 0.0066,
      "step": 2862140
    },
    {
      "epoch": 4.683987614802014,
      "grad_norm": 0.2667658030986786,
      "learning_rate": 6.361893215078775e-07,
      "loss": 0.0092,
      "step": 2862160
    },
    {
      "epoch": 4.684020345240667,
      "grad_norm": 0.2822442352771759,
      "learning_rate": 6.361234292943604e-07,
      "loss": 0.0077,
      "step": 2862180
    },
    {
      "epoch": 4.68405307567932,
      "grad_norm": 0.09456244856119156,
      "learning_rate": 6.360575370808432e-07,
      "loss": 0.0081,
      "step": 2862200
    },
    {
      "epoch": 4.684085806117974,
      "grad_norm": 0.23378059267997742,
      "learning_rate": 6.359916448673261e-07,
      "loss": 0.0105,
      "step": 2862220
    },
    {
      "epoch": 4.6841185365566265,
      "grad_norm": 0.17085622251033783,
      "learning_rate": 6.35925752653809e-07,
      "loss": 0.0123,
      "step": 2862240
    },
    {
      "epoch": 4.68415126699528,
      "grad_norm": 0.2803540527820587,
      "learning_rate": 6.358598604402919e-07,
      "loss": 0.009,
      "step": 2862260
    },
    {
      "epoch": 4.684183997433934,
      "grad_norm": 0.2054474651813507,
      "learning_rate": 6.357939682267747e-07,
      "loss": 0.0091,
      "step": 2862280
    },
    {
      "epoch": 4.684216727872587,
      "grad_norm": 0.147678941488266,
      "learning_rate": 6.357280760132575e-07,
      "loss": 0.0087,
      "step": 2862300
    },
    {
      "epoch": 4.68424945831124,
      "grad_norm": 0.2540234327316284,
      "learning_rate": 6.356621837997404e-07,
      "loss": 0.0089,
      "step": 2862320
    },
    {
      "epoch": 4.6842821887498935,
      "grad_norm": 0.07433784753084183,
      "learning_rate": 6.355962915862233e-07,
      "loss": 0.0082,
      "step": 2862340
    },
    {
      "epoch": 4.684314919188547,
      "grad_norm": 0.28014639019966125,
      "learning_rate": 6.355303993727062e-07,
      "loss": 0.0057,
      "step": 2862360
    },
    {
      "epoch": 4.684347649627201,
      "grad_norm": 0.15861546993255615,
      "learning_rate": 6.35464507159189e-07,
      "loss": 0.0092,
      "step": 2862380
    },
    {
      "epoch": 4.6843803800658534,
      "grad_norm": 0.17832864820957184,
      "learning_rate": 6.353986149456719e-07,
      "loss": 0.0113,
      "step": 2862400
    },
    {
      "epoch": 4.684413110504507,
      "grad_norm": 0.11501172184944153,
      "learning_rate": 6.353327227321548e-07,
      "loss": 0.0062,
      "step": 2862420
    },
    {
      "epoch": 4.684445840943161,
      "grad_norm": 0.14101989567279816,
      "learning_rate": 6.352668305186377e-07,
      "loss": 0.0071,
      "step": 2862440
    },
    {
      "epoch": 4.684478571381813,
      "grad_norm": 0.16429339349269867,
      "learning_rate": 6.352009383051205e-07,
      "loss": 0.0129,
      "step": 2862460
    },
    {
      "epoch": 4.684511301820467,
      "grad_norm": 0.15450714528560638,
      "learning_rate": 6.351350460916034e-07,
      "loss": 0.0084,
      "step": 2862480
    },
    {
      "epoch": 4.6845440322591205,
      "grad_norm": 0.10733684152364731,
      "learning_rate": 6.350691538780863e-07,
      "loss": 0.0128,
      "step": 2862500
    },
    {
      "epoch": 4.684576762697773,
      "grad_norm": 0.2741357982158661,
      "learning_rate": 6.350032616645692e-07,
      "loss": 0.0074,
      "step": 2862520
    },
    {
      "epoch": 4.684609493136427,
      "grad_norm": 0.08553438633680344,
      "learning_rate": 6.34937369451052e-07,
      "loss": 0.0096,
      "step": 2862540
    },
    {
      "epoch": 4.68464222357508,
      "grad_norm": 0.13850191235542297,
      "learning_rate": 6.348714772375349e-07,
      "loss": 0.0126,
      "step": 2862560
    },
    {
      "epoch": 4.684674954013734,
      "grad_norm": 0.12253706157207489,
      "learning_rate": 6.348055850240177e-07,
      "loss": 0.0112,
      "step": 2862580
    },
    {
      "epoch": 4.684707684452387,
      "grad_norm": 0.47383448481559753,
      "learning_rate": 6.347396928105007e-07,
      "loss": 0.0075,
      "step": 2862600
    },
    {
      "epoch": 4.68474041489104,
      "grad_norm": 0.2250877469778061,
      "learning_rate": 6.346738005969835e-07,
      "loss": 0.0103,
      "step": 2862620
    },
    {
      "epoch": 4.684773145329694,
      "grad_norm": 0.09647909551858902,
      "learning_rate": 6.346079083834663e-07,
      "loss": 0.0058,
      "step": 2862640
    },
    {
      "epoch": 4.6848058757683475,
      "grad_norm": 0.22742430865764618,
      "learning_rate": 6.345420161699492e-07,
      "loss": 0.012,
      "step": 2862660
    },
    {
      "epoch": 4.684838606207,
      "grad_norm": 0.3451676666736603,
      "learning_rate": 6.344761239564321e-07,
      "loss": 0.0086,
      "step": 2862680
    },
    {
      "epoch": 4.684871336645654,
      "grad_norm": 0.09749644249677658,
      "learning_rate": 6.34410231742915e-07,
      "loss": 0.0082,
      "step": 2862700
    },
    {
      "epoch": 4.684904067084307,
      "grad_norm": 0.023521194234490395,
      "learning_rate": 6.343443395293978e-07,
      "loss": 0.0083,
      "step": 2862720
    },
    {
      "epoch": 4.68493679752296,
      "grad_norm": 0.1635308563709259,
      "learning_rate": 6.342784473158807e-07,
      "loss": 0.0115,
      "step": 2862740
    },
    {
      "epoch": 4.684969527961614,
      "grad_norm": 0.05427306517958641,
      "learning_rate": 6.342125551023635e-07,
      "loss": 0.0075,
      "step": 2862760
    },
    {
      "epoch": 4.685002258400267,
      "grad_norm": 0.19785098731517792,
      "learning_rate": 6.341466628888465e-07,
      "loss": 0.0111,
      "step": 2862780
    },
    {
      "epoch": 4.68503498883892,
      "grad_norm": 0.3074626326560974,
      "learning_rate": 6.340807706753293e-07,
      "loss": 0.0078,
      "step": 2862800
    },
    {
      "epoch": 4.685067719277574,
      "grad_norm": 0.10740292072296143,
      "learning_rate": 6.340148784618122e-07,
      "loss": 0.0092,
      "step": 2862820
    },
    {
      "epoch": 4.685100449716227,
      "grad_norm": 0.06584161520004272,
      "learning_rate": 6.33948986248295e-07,
      "loss": 0.0093,
      "step": 2862840
    },
    {
      "epoch": 4.685133180154881,
      "grad_norm": 0.2701568901538849,
      "learning_rate": 6.338830940347781e-07,
      "loss": 0.0073,
      "step": 2862860
    },
    {
      "epoch": 4.6851659105935335,
      "grad_norm": 0.15247181057929993,
      "learning_rate": 6.338172018212608e-07,
      "loss": 0.0089,
      "step": 2862880
    },
    {
      "epoch": 4.685198641032187,
      "grad_norm": 0.2799564301967621,
      "learning_rate": 6.337513096077437e-07,
      "loss": 0.0122,
      "step": 2862900
    },
    {
      "epoch": 4.685231371470841,
      "grad_norm": 0.18701115250587463,
      "learning_rate": 6.336854173942265e-07,
      "loss": 0.0079,
      "step": 2862920
    },
    {
      "epoch": 4.685264101909494,
      "grad_norm": 0.7753162980079651,
      "learning_rate": 6.336195251807096e-07,
      "loss": 0.01,
      "step": 2862940
    },
    {
      "epoch": 4.685296832348147,
      "grad_norm": 0.3727237284183502,
      "learning_rate": 6.335536329671924e-07,
      "loss": 0.0079,
      "step": 2862960
    },
    {
      "epoch": 4.685329562786801,
      "grad_norm": 0.10273556411266327,
      "learning_rate": 6.334877407536751e-07,
      "loss": 0.0104,
      "step": 2862980
    },
    {
      "epoch": 4.685362293225454,
      "grad_norm": 0.1982754021883011,
      "learning_rate": 6.33421848540158e-07,
      "loss": 0.0086,
      "step": 2863000
    },
    {
      "epoch": 4.685395023664107,
      "grad_norm": 0.0910758227109909,
      "learning_rate": 6.333559563266408e-07,
      "loss": 0.0087,
      "step": 2863020
    },
    {
      "epoch": 4.6854277541027605,
      "grad_norm": 0.17275159060955048,
      "learning_rate": 6.332900641131239e-07,
      "loss": 0.0064,
      "step": 2863040
    },
    {
      "epoch": 4.685460484541414,
      "grad_norm": 0.7374129295349121,
      "learning_rate": 6.332241718996067e-07,
      "loss": 0.0107,
      "step": 2863060
    },
    {
      "epoch": 4.685493214980067,
      "grad_norm": 0.21413268148899078,
      "learning_rate": 6.331582796860896e-07,
      "loss": 0.0069,
      "step": 2863080
    },
    {
      "epoch": 4.68552594541872,
      "grad_norm": 0.1599273532629013,
      "learning_rate": 6.330923874725723e-07,
      "loss": 0.0126,
      "step": 2863100
    },
    {
      "epoch": 4.685558675857374,
      "grad_norm": 0.12817950546741486,
      "learning_rate": 6.330264952590554e-07,
      "loss": 0.0095,
      "step": 2863120
    },
    {
      "epoch": 4.6855914062960276,
      "grad_norm": 0.1116250529885292,
      "learning_rate": 6.329606030455382e-07,
      "loss": 0.0079,
      "step": 2863140
    },
    {
      "epoch": 4.68562413673468,
      "grad_norm": 0.3652753531932831,
      "learning_rate": 6.328947108320211e-07,
      "loss": 0.0081,
      "step": 2863160
    },
    {
      "epoch": 4.685656867173334,
      "grad_norm": 0.10530415922403336,
      "learning_rate": 6.328288186185039e-07,
      "loss": 0.0072,
      "step": 2863180
    },
    {
      "epoch": 4.6856895976119874,
      "grad_norm": 0.1146899089217186,
      "learning_rate": 6.327629264049867e-07,
      "loss": 0.0066,
      "step": 2863200
    },
    {
      "epoch": 4.68572232805064,
      "grad_norm": 0.23809204995632172,
      "learning_rate": 6.326970341914697e-07,
      "loss": 0.0081,
      "step": 2863220
    },
    {
      "epoch": 4.685755058489294,
      "grad_norm": 0.08874700218439102,
      "learning_rate": 6.326311419779526e-07,
      "loss": 0.0047,
      "step": 2863240
    },
    {
      "epoch": 4.685787788927947,
      "grad_norm": 0.24469612538814545,
      "learning_rate": 6.325652497644354e-07,
      "loss": 0.0096,
      "step": 2863260
    },
    {
      "epoch": 4.685820519366601,
      "grad_norm": 0.26404985785484314,
      "learning_rate": 6.324993575509183e-07,
      "loss": 0.0096,
      "step": 2863280
    },
    {
      "epoch": 4.685853249805254,
      "grad_norm": 0.48494213819503784,
      "learning_rate": 6.324334653374012e-07,
      "loss": 0.0071,
      "step": 2863300
    },
    {
      "epoch": 4.685885980243907,
      "grad_norm": 0.12543544173240662,
      "learning_rate": 6.32367573123884e-07,
      "loss": 0.0061,
      "step": 2863320
    },
    {
      "epoch": 4.685918710682561,
      "grad_norm": 0.20905201137065887,
      "learning_rate": 6.323016809103669e-07,
      "loss": 0.0064,
      "step": 2863340
    },
    {
      "epoch": 4.6859514411212135,
      "grad_norm": 0.04429033398628235,
      "learning_rate": 6.322357886968497e-07,
      "loss": 0.0088,
      "step": 2863360
    },
    {
      "epoch": 4.685984171559867,
      "grad_norm": 0.08191274851560593,
      "learning_rate": 6.321698964833327e-07,
      "loss": 0.0107,
      "step": 2863380
    },
    {
      "epoch": 4.686016901998521,
      "grad_norm": 0.29308345913887024,
      "learning_rate": 6.321040042698155e-07,
      "loss": 0.0103,
      "step": 2863400
    },
    {
      "epoch": 4.686049632437173,
      "grad_norm": 0.15476778149604797,
      "learning_rate": 6.320381120562984e-07,
      "loss": 0.0069,
      "step": 2863420
    },
    {
      "epoch": 4.686082362875827,
      "grad_norm": 0.2747238874435425,
      "learning_rate": 6.319722198427812e-07,
      "loss": 0.0102,
      "step": 2863440
    },
    {
      "epoch": 4.686115093314481,
      "grad_norm": 0.07788965106010437,
      "learning_rate": 6.319063276292641e-07,
      "loss": 0.0105,
      "step": 2863460
    },
    {
      "epoch": 4.686147823753134,
      "grad_norm": 0.16412939131259918,
      "learning_rate": 6.31840435415747e-07,
      "loss": 0.0109,
      "step": 2863480
    },
    {
      "epoch": 4.686180554191787,
      "grad_norm": 0.13996392488479614,
      "learning_rate": 6.317745432022299e-07,
      "loss": 0.0073,
      "step": 2863500
    },
    {
      "epoch": 4.6862132846304405,
      "grad_norm": 0.12323417514562607,
      "learning_rate": 6.317086509887127e-07,
      "loss": 0.0068,
      "step": 2863520
    },
    {
      "epoch": 4.686246015069094,
      "grad_norm": 0.2381913959980011,
      "learning_rate": 6.316427587751956e-07,
      "loss": 0.0091,
      "step": 2863540
    },
    {
      "epoch": 4.686278745507748,
      "grad_norm": 0.4213969111442566,
      "learning_rate": 6.315768665616785e-07,
      "loss": 0.006,
      "step": 2863560
    },
    {
      "epoch": 4.6863114759464,
      "grad_norm": 0.10288471728563309,
      "learning_rate": 6.315109743481614e-07,
      "loss": 0.0057,
      "step": 2863580
    },
    {
      "epoch": 4.686344206385054,
      "grad_norm": 0.11157941818237305,
      "learning_rate": 6.314450821346442e-07,
      "loss": 0.0051,
      "step": 2863600
    },
    {
      "epoch": 4.686376936823708,
      "grad_norm": 0.6841742396354675,
      "learning_rate": 6.31379189921127e-07,
      "loss": 0.0096,
      "step": 2863620
    },
    {
      "epoch": 4.68640966726236,
      "grad_norm": 0.14865128695964813,
      "learning_rate": 6.313132977076099e-07,
      "loss": 0.0073,
      "step": 2863640
    },
    {
      "epoch": 4.686442397701014,
      "grad_norm": 0.3991040289402008,
      "learning_rate": 6.312474054940928e-07,
      "loss": 0.0067,
      "step": 2863660
    },
    {
      "epoch": 4.6864751281396675,
      "grad_norm": 0.08190912753343582,
      "learning_rate": 6.311815132805757e-07,
      "loss": 0.0103,
      "step": 2863680
    },
    {
      "epoch": 4.68650785857832,
      "grad_norm": 0.23124761879444122,
      "learning_rate": 6.311156210670585e-07,
      "loss": 0.0079,
      "step": 2863700
    },
    {
      "epoch": 4.686540589016974,
      "grad_norm": 0.3053925037384033,
      "learning_rate": 6.310497288535414e-07,
      "loss": 0.0084,
      "step": 2863720
    },
    {
      "epoch": 4.686573319455627,
      "grad_norm": 0.18335001170635223,
      "learning_rate": 6.309838366400243e-07,
      "loss": 0.0066,
      "step": 2863740
    },
    {
      "epoch": 4.686606049894281,
      "grad_norm": 0.38148826360702515,
      "learning_rate": 6.309179444265072e-07,
      "loss": 0.0127,
      "step": 2863760
    },
    {
      "epoch": 4.686638780332934,
      "grad_norm": 0.23859654366970062,
      "learning_rate": 6.3085205221299e-07,
      "loss": 0.0109,
      "step": 2863780
    },
    {
      "epoch": 4.686671510771587,
      "grad_norm": 0.2904040813446045,
      "learning_rate": 6.307861599994729e-07,
      "loss": 0.0139,
      "step": 2863800
    },
    {
      "epoch": 4.686704241210241,
      "grad_norm": 0.13816618919372559,
      "learning_rate": 6.307202677859558e-07,
      "loss": 0.0066,
      "step": 2863820
    },
    {
      "epoch": 4.6867369716488945,
      "grad_norm": 0.21376632153987885,
      "learning_rate": 6.306543755724387e-07,
      "loss": 0.0131,
      "step": 2863840
    },
    {
      "epoch": 4.686769702087547,
      "grad_norm": 0.11202681064605713,
      "learning_rate": 6.305884833589215e-07,
      "loss": 0.0131,
      "step": 2863860
    },
    {
      "epoch": 4.686802432526201,
      "grad_norm": 0.16955026984214783,
      "learning_rate": 6.305225911454044e-07,
      "loss": 0.0101,
      "step": 2863880
    },
    {
      "epoch": 4.686835162964854,
      "grad_norm": 0.2286645770072937,
      "learning_rate": 6.304566989318872e-07,
      "loss": 0.0124,
      "step": 2863900
    },
    {
      "epoch": 4.686867893403507,
      "grad_norm": 0.36271345615386963,
      "learning_rate": 6.303908067183702e-07,
      "loss": 0.0111,
      "step": 2863920
    },
    {
      "epoch": 4.686900623842161,
      "grad_norm": 0.35719189047813416,
      "learning_rate": 6.30324914504853e-07,
      "loss": 0.0084,
      "step": 2863940
    },
    {
      "epoch": 4.686933354280814,
      "grad_norm": 0.16675716638565063,
      "learning_rate": 6.302590222913359e-07,
      "loss": 0.0082,
      "step": 2863960
    },
    {
      "epoch": 4.686966084719467,
      "grad_norm": 0.26601332426071167,
      "learning_rate": 6.301931300778187e-07,
      "loss": 0.0051,
      "step": 2863980
    },
    {
      "epoch": 4.686998815158121,
      "grad_norm": 0.2198629230260849,
      "learning_rate": 6.301272378643016e-07,
      "loss": 0.0124,
      "step": 2864000
    },
    {
      "epoch": 4.687031545596774,
      "grad_norm": 0.2509188652038574,
      "learning_rate": 6.300613456507845e-07,
      "loss": 0.0069,
      "step": 2864020
    },
    {
      "epoch": 4.687064276035428,
      "grad_norm": 0.18699991703033447,
      "learning_rate": 6.299954534372673e-07,
      "loss": 0.0083,
      "step": 2864040
    },
    {
      "epoch": 4.6870970064740805,
      "grad_norm": 0.16774439811706543,
      "learning_rate": 6.299295612237502e-07,
      "loss": 0.0089,
      "step": 2864060
    },
    {
      "epoch": 4.687129736912734,
      "grad_norm": 0.42400598526000977,
      "learning_rate": 6.29863669010233e-07,
      "loss": 0.012,
      "step": 2864080
    },
    {
      "epoch": 4.687162467351388,
      "grad_norm": 0.4064062237739563,
      "learning_rate": 6.29797776796716e-07,
      "loss": 0.0071,
      "step": 2864100
    },
    {
      "epoch": 4.687195197790041,
      "grad_norm": 0.6711173057556152,
      "learning_rate": 6.297318845831988e-07,
      "loss": 0.009,
      "step": 2864120
    },
    {
      "epoch": 4.687227928228694,
      "grad_norm": 0.28403523564338684,
      "learning_rate": 6.296659923696817e-07,
      "loss": 0.0098,
      "step": 2864140
    },
    {
      "epoch": 4.6872606586673475,
      "grad_norm": 0.15194739401340485,
      "learning_rate": 6.296001001561645e-07,
      "loss": 0.0092,
      "step": 2864160
    },
    {
      "epoch": 4.687293389106001,
      "grad_norm": 0.15823112428188324,
      "learning_rate": 6.295342079426476e-07,
      "loss": 0.0071,
      "step": 2864180
    },
    {
      "epoch": 4.687326119544654,
      "grad_norm": 0.1805398166179657,
      "learning_rate": 6.294683157291303e-07,
      "loss": 0.0081,
      "step": 2864200
    },
    {
      "epoch": 4.687358849983307,
      "grad_norm": 0.42034122347831726,
      "learning_rate": 6.294024235156132e-07,
      "loss": 0.0125,
      "step": 2864220
    },
    {
      "epoch": 4.687391580421961,
      "grad_norm": 0.08872534334659576,
      "learning_rate": 6.29336531302096e-07,
      "loss": 0.0075,
      "step": 2864240
    },
    {
      "epoch": 4.687424310860614,
      "grad_norm": 0.37667176127433777,
      "learning_rate": 6.292706390885791e-07,
      "loss": 0.0088,
      "step": 2864260
    },
    {
      "epoch": 4.687457041299267,
      "grad_norm": 0.23022831976413727,
      "learning_rate": 6.292047468750619e-07,
      "loss": 0.0083,
      "step": 2864280
    },
    {
      "epoch": 4.687489771737921,
      "grad_norm": 0.177168607711792,
      "learning_rate": 6.291388546615446e-07,
      "loss": 0.0074,
      "step": 2864300
    },
    {
      "epoch": 4.6875225021765745,
      "grad_norm": 0.1020418033003807,
      "learning_rate": 6.290729624480275e-07,
      "loss": 0.0111,
      "step": 2864320
    },
    {
      "epoch": 4.687555232615227,
      "grad_norm": 0.6063222885131836,
      "learning_rate": 6.290070702345103e-07,
      "loss": 0.0068,
      "step": 2864340
    },
    {
      "epoch": 4.687587963053881,
      "grad_norm": 0.16777795553207397,
      "learning_rate": 6.289411780209934e-07,
      "loss": 0.0085,
      "step": 2864360
    },
    {
      "epoch": 4.687620693492534,
      "grad_norm": 0.2683669626712799,
      "learning_rate": 6.288752858074762e-07,
      "loss": 0.0063,
      "step": 2864380
    },
    {
      "epoch": 4.687653423931188,
      "grad_norm": 0.5688700079917908,
      "learning_rate": 6.288093935939591e-07,
      "loss": 0.0076,
      "step": 2864400
    },
    {
      "epoch": 4.687686154369841,
      "grad_norm": 0.5080305337905884,
      "learning_rate": 6.287435013804418e-07,
      "loss": 0.0078,
      "step": 2864420
    },
    {
      "epoch": 4.687718884808494,
      "grad_norm": 0.13802111148834229,
      "learning_rate": 6.286776091669249e-07,
      "loss": 0.0109,
      "step": 2864440
    },
    {
      "epoch": 4.687751615247148,
      "grad_norm": 0.07225062698125839,
      "learning_rate": 6.286117169534077e-07,
      "loss": 0.0101,
      "step": 2864460
    },
    {
      "epoch": 4.687784345685801,
      "grad_norm": 0.1839805543422699,
      "learning_rate": 6.285458247398906e-07,
      "loss": 0.0079,
      "step": 2864480
    },
    {
      "epoch": 4.687817076124454,
      "grad_norm": 0.195512592792511,
      "learning_rate": 6.284799325263734e-07,
      "loss": 0.0099,
      "step": 2864500
    },
    {
      "epoch": 4.687849806563108,
      "grad_norm": 0.3951163589954376,
      "learning_rate": 6.284140403128563e-07,
      "loss": 0.0112,
      "step": 2864520
    },
    {
      "epoch": 4.6878825370017605,
      "grad_norm": 0.1341238170862198,
      "learning_rate": 6.283481480993392e-07,
      "loss": 0.0121,
      "step": 2864540
    },
    {
      "epoch": 4.687915267440414,
      "grad_norm": 0.4545306861400604,
      "learning_rate": 6.282822558858221e-07,
      "loss": 0.0095,
      "step": 2864560
    },
    {
      "epoch": 4.687947997879068,
      "grad_norm": 0.2713151276111603,
      "learning_rate": 6.282163636723049e-07,
      "loss": 0.0093,
      "step": 2864580
    },
    {
      "epoch": 4.687980728317721,
      "grad_norm": 0.05538865551352501,
      "learning_rate": 6.281504714587878e-07,
      "loss": 0.0102,
      "step": 2864600
    },
    {
      "epoch": 4.688013458756374,
      "grad_norm": 0.23406723141670227,
      "learning_rate": 6.280845792452707e-07,
      "loss": 0.0059,
      "step": 2864620
    },
    {
      "epoch": 4.688046189195028,
      "grad_norm": 0.20355133712291718,
      "learning_rate": 6.280186870317536e-07,
      "loss": 0.0143,
      "step": 2864640
    },
    {
      "epoch": 4.688078919633681,
      "grad_norm": 0.15337586402893066,
      "learning_rate": 6.279527948182364e-07,
      "loss": 0.0079,
      "step": 2864660
    },
    {
      "epoch": 4.688111650072335,
      "grad_norm": 0.13107940554618835,
      "learning_rate": 6.278869026047192e-07,
      "loss": 0.0084,
      "step": 2864680
    },
    {
      "epoch": 4.6881443805109875,
      "grad_norm": 0.04133051261305809,
      "learning_rate": 6.278210103912022e-07,
      "loss": 0.0067,
      "step": 2864700
    },
    {
      "epoch": 4.688177110949641,
      "grad_norm": 0.25274020433425903,
      "learning_rate": 6.27755118177685e-07,
      "loss": 0.0088,
      "step": 2864720
    },
    {
      "epoch": 4.688209841388295,
      "grad_norm": 0.1464998424053192,
      "learning_rate": 6.276892259641679e-07,
      "loss": 0.0085,
      "step": 2864740
    },
    {
      "epoch": 4.688242571826947,
      "grad_norm": 0.14006741344928741,
      "learning_rate": 6.276233337506507e-07,
      "loss": 0.0123,
      "step": 2864760
    },
    {
      "epoch": 4.688275302265601,
      "grad_norm": 0.08520650863647461,
      "learning_rate": 6.275574415371336e-07,
      "loss": 0.0085,
      "step": 2864780
    },
    {
      "epoch": 4.688308032704255,
      "grad_norm": 0.2566435635089874,
      "learning_rate": 6.274915493236165e-07,
      "loss": 0.0116,
      "step": 2864800
    },
    {
      "epoch": 4.688340763142907,
      "grad_norm": 0.04023677110671997,
      "learning_rate": 6.274256571100994e-07,
      "loss": 0.0054,
      "step": 2864820
    },
    {
      "epoch": 4.688373493581561,
      "grad_norm": 0.14000865817070007,
      "learning_rate": 6.273597648965822e-07,
      "loss": 0.007,
      "step": 2864840
    },
    {
      "epoch": 4.6884062240202145,
      "grad_norm": 0.4335118234157562,
      "learning_rate": 6.272938726830651e-07,
      "loss": 0.0059,
      "step": 2864860
    },
    {
      "epoch": 4.688438954458867,
      "grad_norm": 0.04116715118288994,
      "learning_rate": 6.27227980469548e-07,
      "loss": 0.0066,
      "step": 2864880
    },
    {
      "epoch": 4.688471684897521,
      "grad_norm": 0.13825342059135437,
      "learning_rate": 6.271620882560309e-07,
      "loss": 0.0071,
      "step": 2864900
    },
    {
      "epoch": 4.688504415336174,
      "grad_norm": 0.4366210699081421,
      "learning_rate": 6.270961960425137e-07,
      "loss": 0.0085,
      "step": 2864920
    },
    {
      "epoch": 4.688537145774828,
      "grad_norm": 0.4600781202316284,
      "learning_rate": 6.270303038289966e-07,
      "loss": 0.0052,
      "step": 2864940
    },
    {
      "epoch": 4.688569876213481,
      "grad_norm": 0.40669578313827515,
      "learning_rate": 6.269644116154794e-07,
      "loss": 0.0057,
      "step": 2864960
    },
    {
      "epoch": 4.688602606652134,
      "grad_norm": 0.20963428914546967,
      "learning_rate": 6.268985194019624e-07,
      "loss": 0.007,
      "step": 2864980
    },
    {
      "epoch": 4.688635337090788,
      "grad_norm": 0.32602638006210327,
      "learning_rate": 6.268326271884452e-07,
      "loss": 0.0094,
      "step": 2865000
    },
    {
      "epoch": 4.688668067529441,
      "grad_norm": 0.28410497307777405,
      "learning_rate": 6.26766734974928e-07,
      "loss": 0.0073,
      "step": 2865020
    },
    {
      "epoch": 4.688700797968094,
      "grad_norm": 0.23941396176815033,
      "learning_rate": 6.267008427614109e-07,
      "loss": 0.0065,
      "step": 2865040
    },
    {
      "epoch": 4.688733528406748,
      "grad_norm": 0.300325870513916,
      "learning_rate": 6.266349505478938e-07,
      "loss": 0.0098,
      "step": 2865060
    },
    {
      "epoch": 4.688766258845401,
      "grad_norm": 0.16796906292438507,
      "learning_rate": 6.265690583343767e-07,
      "loss": 0.0084,
      "step": 2865080
    },
    {
      "epoch": 4.688798989284054,
      "grad_norm": 0.16379167139530182,
      "learning_rate": 6.265031661208595e-07,
      "loss": 0.0086,
      "step": 2865100
    },
    {
      "epoch": 4.688831719722708,
      "grad_norm": 0.42947763204574585,
      "learning_rate": 6.264372739073424e-07,
      "loss": 0.0069,
      "step": 2865120
    },
    {
      "epoch": 4.688864450161361,
      "grad_norm": 0.14869160950183868,
      "learning_rate": 6.263713816938253e-07,
      "loss": 0.0123,
      "step": 2865140
    },
    {
      "epoch": 4.688897180600014,
      "grad_norm": 0.24280895292758942,
      "learning_rate": 6.263054894803082e-07,
      "loss": 0.0097,
      "step": 2865160
    },
    {
      "epoch": 4.6889299110386675,
      "grad_norm": 0.14877751469612122,
      "learning_rate": 6.26239597266791e-07,
      "loss": 0.0086,
      "step": 2865180
    },
    {
      "epoch": 4.688962641477321,
      "grad_norm": 0.1430426388978958,
      "learning_rate": 6.261737050532739e-07,
      "loss": 0.0162,
      "step": 2865200
    },
    {
      "epoch": 4.688995371915975,
      "grad_norm": 0.12011908739805222,
      "learning_rate": 6.261078128397567e-07,
      "loss": 0.0059,
      "step": 2865220
    },
    {
      "epoch": 4.689028102354627,
      "grad_norm": 0.264081209897995,
      "learning_rate": 6.260419206262397e-07,
      "loss": 0.0056,
      "step": 2865240
    },
    {
      "epoch": 4.689060832793281,
      "grad_norm": 0.15987999737262726,
      "learning_rate": 6.259760284127225e-07,
      "loss": 0.0127,
      "step": 2865260
    },
    {
      "epoch": 4.689093563231935,
      "grad_norm": 0.17150461673736572,
      "learning_rate": 6.259101361992054e-07,
      "loss": 0.0092,
      "step": 2865280
    },
    {
      "epoch": 4.689126293670588,
      "grad_norm": 0.24761933088302612,
      "learning_rate": 6.258442439856882e-07,
      "loss": 0.0064,
      "step": 2865300
    },
    {
      "epoch": 4.689159024109241,
      "grad_norm": 0.22808538377285004,
      "learning_rate": 6.257783517721712e-07,
      "loss": 0.0108,
      "step": 2865320
    },
    {
      "epoch": 4.6891917545478945,
      "grad_norm": 0.05282331258058548,
      "learning_rate": 6.25712459558654e-07,
      "loss": 0.0072,
      "step": 2865340
    },
    {
      "epoch": 4.689224484986548,
      "grad_norm": 0.21870936453342438,
      "learning_rate": 6.256465673451368e-07,
      "loss": 0.0079,
      "step": 2865360
    },
    {
      "epoch": 4.689257215425201,
      "grad_norm": 0.292755663394928,
      "learning_rate": 6.255806751316197e-07,
      "loss": 0.0088,
      "step": 2865380
    },
    {
      "epoch": 4.689289945863854,
      "grad_norm": 0.16068010032176971,
      "learning_rate": 6.255147829181025e-07,
      "loss": 0.0102,
      "step": 2865400
    },
    {
      "epoch": 4.689322676302508,
      "grad_norm": 0.8678787350654602,
      "learning_rate": 6.254488907045855e-07,
      "loss": 0.0088,
      "step": 2865420
    },
    {
      "epoch": 4.689355406741161,
      "grad_norm": 0.13670511543750763,
      "learning_rate": 6.253829984910683e-07,
      "loss": 0.0064,
      "step": 2865440
    },
    {
      "epoch": 4.689388137179814,
      "grad_norm": 0.1353076845407486,
      "learning_rate": 6.253171062775512e-07,
      "loss": 0.0074,
      "step": 2865460
    },
    {
      "epoch": 4.689420867618468,
      "grad_norm": 0.10065265744924545,
      "learning_rate": 6.25251214064034e-07,
      "loss": 0.009,
      "step": 2865480
    },
    {
      "epoch": 4.6894535980571215,
      "grad_norm": 0.2682209610939026,
      "learning_rate": 6.251853218505171e-07,
      "loss": 0.0095,
      "step": 2865500
    },
    {
      "epoch": 4.689486328495774,
      "grad_norm": 0.1785210520029068,
      "learning_rate": 6.251194296369998e-07,
      "loss": 0.008,
      "step": 2865520
    },
    {
      "epoch": 4.689519058934428,
      "grad_norm": 0.24993586540222168,
      "learning_rate": 6.250535374234827e-07,
      "loss": 0.0065,
      "step": 2865540
    },
    {
      "epoch": 4.689551789373081,
      "grad_norm": 0.26868414878845215,
      "learning_rate": 6.249876452099656e-07,
      "loss": 0.0077,
      "step": 2865560
    },
    {
      "epoch": 4.689584519811735,
      "grad_norm": 0.5250013470649719,
      "learning_rate": 6.249217529964485e-07,
      "loss": 0.0099,
      "step": 2865580
    },
    {
      "epoch": 4.689617250250388,
      "grad_norm": 0.23237764835357666,
      "learning_rate": 6.248558607829313e-07,
      "loss": 0.0103,
      "step": 2865600
    },
    {
      "epoch": 4.689649980689041,
      "grad_norm": 0.15240348875522614,
      "learning_rate": 6.247899685694142e-07,
      "loss": 0.0097,
      "step": 2865620
    },
    {
      "epoch": 4.689682711127695,
      "grad_norm": 0.19906429946422577,
      "learning_rate": 6.24724076355897e-07,
      "loss": 0.0057,
      "step": 2865640
    },
    {
      "epoch": 4.689715441566348,
      "grad_norm": 0.35694482922554016,
      "learning_rate": 6.2465818414238e-07,
      "loss": 0.0071,
      "step": 2865660
    },
    {
      "epoch": 4.689748172005001,
      "grad_norm": 0.13543188571929932,
      "learning_rate": 6.245922919288628e-07,
      "loss": 0.0103,
      "step": 2865680
    },
    {
      "epoch": 4.689780902443655,
      "grad_norm": 0.40087902545928955,
      "learning_rate": 6.245263997153457e-07,
      "loss": 0.0104,
      "step": 2865700
    },
    {
      "epoch": 4.6898136328823075,
      "grad_norm": 0.08591108024120331,
      "learning_rate": 6.244605075018286e-07,
      "loss": 0.0053,
      "step": 2865720
    },
    {
      "epoch": 4.689846363320961,
      "grad_norm": 0.2752959132194519,
      "learning_rate": 6.243946152883114e-07,
      "loss": 0.0072,
      "step": 2865740
    },
    {
      "epoch": 4.689879093759615,
      "grad_norm": 0.12797001004219055,
      "learning_rate": 6.243287230747943e-07,
      "loss": 0.0117,
      "step": 2865760
    },
    {
      "epoch": 4.689911824198268,
      "grad_norm": 0.2135746031999588,
      "learning_rate": 6.242628308612772e-07,
      "loss": 0.0091,
      "step": 2865780
    },
    {
      "epoch": 4.689944554636921,
      "grad_norm": 0.3574100434780121,
      "learning_rate": 6.241969386477601e-07,
      "loss": 0.0094,
      "step": 2865800
    },
    {
      "epoch": 4.6899772850755745,
      "grad_norm": 0.0547386035323143,
      "learning_rate": 6.241310464342429e-07,
      "loss": 0.0134,
      "step": 2865820
    },
    {
      "epoch": 4.690010015514228,
      "grad_norm": 0.25528720021247864,
      "learning_rate": 6.240651542207258e-07,
      "loss": 0.0085,
      "step": 2865840
    },
    {
      "epoch": 4.690042745952882,
      "grad_norm": 0.29123327136039734,
      "learning_rate": 6.239992620072086e-07,
      "loss": 0.0075,
      "step": 2865860
    },
    {
      "epoch": 4.690075476391534,
      "grad_norm": 0.21023567020893097,
      "learning_rate": 6.239333697936916e-07,
      "loss": 0.0148,
      "step": 2865880
    },
    {
      "epoch": 4.690108206830188,
      "grad_norm": 0.13931488990783691,
      "learning_rate": 6.238674775801744e-07,
      "loss": 0.0101,
      "step": 2865900
    },
    {
      "epoch": 4.690140937268842,
      "grad_norm": 0.08126410841941833,
      "learning_rate": 6.238015853666573e-07,
      "loss": 0.0092,
      "step": 2865920
    },
    {
      "epoch": 4.690173667707494,
      "grad_norm": 0.4143362045288086,
      "learning_rate": 6.237356931531401e-07,
      "loss": 0.0066,
      "step": 2865940
    },
    {
      "epoch": 4.690206398146148,
      "grad_norm": 0.09600174427032471,
      "learning_rate": 6.236698009396231e-07,
      "loss": 0.008,
      "step": 2865960
    },
    {
      "epoch": 4.6902391285848015,
      "grad_norm": 0.1968461275100708,
      "learning_rate": 6.236039087261059e-07,
      "loss": 0.0084,
      "step": 2865980
    },
    {
      "epoch": 4.690271859023454,
      "grad_norm": 0.3727334439754486,
      "learning_rate": 6.235380165125888e-07,
      "loss": 0.0088,
      "step": 2866000
    },
    {
      "epoch": 4.690304589462108,
      "grad_norm": 0.24461965262889862,
      "learning_rate": 6.234721242990716e-07,
      "loss": 0.0063,
      "step": 2866020
    },
    {
      "epoch": 4.690337319900761,
      "grad_norm": 0.3463640809059143,
      "learning_rate": 6.234062320855544e-07,
      "loss": 0.0077,
      "step": 2866040
    },
    {
      "epoch": 4.690370050339415,
      "grad_norm": 0.16677293181419373,
      "learning_rate": 6.233403398720374e-07,
      "loss": 0.0074,
      "step": 2866060
    },
    {
      "epoch": 4.690402780778068,
      "grad_norm": 0.2616611421108246,
      "learning_rate": 6.232744476585202e-07,
      "loss": 0.0097,
      "step": 2866080
    },
    {
      "epoch": 4.690435511216721,
      "grad_norm": 0.42774468660354614,
      "learning_rate": 6.232085554450031e-07,
      "loss": 0.0097,
      "step": 2866100
    },
    {
      "epoch": 4.690468241655375,
      "grad_norm": 0.20440427958965302,
      "learning_rate": 6.231426632314859e-07,
      "loss": 0.0101,
      "step": 2866120
    },
    {
      "epoch": 4.6905009720940285,
      "grad_norm": 0.3437079191207886,
      "learning_rate": 6.230767710179689e-07,
      "loss": 0.0069,
      "step": 2866140
    },
    {
      "epoch": 4.690533702532681,
      "grad_norm": 0.4336283206939697,
      "learning_rate": 6.230108788044517e-07,
      "loss": 0.0102,
      "step": 2866160
    },
    {
      "epoch": 4.690566432971335,
      "grad_norm": 0.14868567883968353,
      "learning_rate": 6.229449865909346e-07,
      "loss": 0.0124,
      "step": 2866180
    },
    {
      "epoch": 4.690599163409988,
      "grad_norm": 0.11318144202232361,
      "learning_rate": 6.228790943774174e-07,
      "loss": 0.0056,
      "step": 2866200
    },
    {
      "epoch": 4.690631893848641,
      "grad_norm": 0.3524346947669983,
      "learning_rate": 6.228132021639004e-07,
      "loss": 0.0058,
      "step": 2866220
    },
    {
      "epoch": 4.690664624287295,
      "grad_norm": 0.20691633224487305,
      "learning_rate": 6.227473099503832e-07,
      "loss": 0.006,
      "step": 2866240
    },
    {
      "epoch": 4.690697354725948,
      "grad_norm": 0.21930642426013947,
      "learning_rate": 6.226814177368662e-07,
      "loss": 0.0073,
      "step": 2866260
    },
    {
      "epoch": 4.690730085164601,
      "grad_norm": 0.40953364968299866,
      "learning_rate": 6.226155255233489e-07,
      "loss": 0.0093,
      "step": 2866280
    },
    {
      "epoch": 4.690762815603255,
      "grad_norm": 0.15155282616615295,
      "learning_rate": 6.225496333098318e-07,
      "loss": 0.0082,
      "step": 2866300
    },
    {
      "epoch": 4.690795546041908,
      "grad_norm": 0.4159529507160187,
      "learning_rate": 6.224837410963147e-07,
      "loss": 0.0138,
      "step": 2866320
    },
    {
      "epoch": 4.690828276480562,
      "grad_norm": 0.30924028158187866,
      "learning_rate": 6.224178488827976e-07,
      "loss": 0.01,
      "step": 2866340
    },
    {
      "epoch": 4.6908610069192145,
      "grad_norm": 0.158943772315979,
      "learning_rate": 6.223519566692805e-07,
      "loss": 0.006,
      "step": 2866360
    },
    {
      "epoch": 4.690893737357868,
      "grad_norm": 0.07995965331792831,
      "learning_rate": 6.222860644557633e-07,
      "loss": 0.0066,
      "step": 2866380
    },
    {
      "epoch": 4.690926467796522,
      "grad_norm": 0.2430373728275299,
      "learning_rate": 6.222201722422462e-07,
      "loss": 0.0105,
      "step": 2866400
    },
    {
      "epoch": 4.690959198235174,
      "grad_norm": 0.28091567754745483,
      "learning_rate": 6.22154280028729e-07,
      "loss": 0.0098,
      "step": 2866420
    },
    {
      "epoch": 4.690991928673828,
      "grad_norm": 0.5608746409416199,
      "learning_rate": 6.22088387815212e-07,
      "loss": 0.0113,
      "step": 2866440
    },
    {
      "epoch": 4.691024659112482,
      "grad_norm": 0.3911965787410736,
      "learning_rate": 6.220224956016948e-07,
      "loss": 0.0082,
      "step": 2866460
    },
    {
      "epoch": 4.691057389551135,
      "grad_norm": 0.11885134130716324,
      "learning_rate": 6.219566033881777e-07,
      "loss": 0.0082,
      "step": 2866480
    },
    {
      "epoch": 4.691090119989788,
      "grad_norm": 0.16228806972503662,
      "learning_rate": 6.218907111746605e-07,
      "loss": 0.0063,
      "step": 2866500
    },
    {
      "epoch": 4.6911228504284415,
      "grad_norm": 0.11837798357009888,
      "learning_rate": 6.218248189611434e-07,
      "loss": 0.0096,
      "step": 2866520
    },
    {
      "epoch": 4.691155580867095,
      "grad_norm": 0.2616599500179291,
      "learning_rate": 6.217589267476263e-07,
      "loss": 0.0099,
      "step": 2866540
    },
    {
      "epoch": 4.691188311305748,
      "grad_norm": 0.25195926427841187,
      "learning_rate": 6.216930345341092e-07,
      "loss": 0.0172,
      "step": 2866560
    },
    {
      "epoch": 4.691221041744401,
      "grad_norm": 0.087679423391819,
      "learning_rate": 6.21627142320592e-07,
      "loss": 0.0086,
      "step": 2866580
    },
    {
      "epoch": 4.691253772183055,
      "grad_norm": 0.4311419129371643,
      "learning_rate": 6.215612501070749e-07,
      "loss": 0.009,
      "step": 2866600
    },
    {
      "epoch": 4.691286502621708,
      "grad_norm": 0.21390846371650696,
      "learning_rate": 6.214953578935578e-07,
      "loss": 0.0093,
      "step": 2866620
    },
    {
      "epoch": 4.691319233060361,
      "grad_norm": 0.30164504051208496,
      "learning_rate": 6.214294656800407e-07,
      "loss": 0.0124,
      "step": 2866640
    },
    {
      "epoch": 4.691351963499015,
      "grad_norm": 0.1448296457529068,
      "learning_rate": 6.213635734665235e-07,
      "loss": 0.0082,
      "step": 2866660
    },
    {
      "epoch": 4.691384693937668,
      "grad_norm": 0.2950791120529175,
      "learning_rate": 6.212976812530063e-07,
      "loss": 0.0072,
      "step": 2866680
    },
    {
      "epoch": 4.691417424376321,
      "grad_norm": 0.2426394373178482,
      "learning_rate": 6.212317890394893e-07,
      "loss": 0.0094,
      "step": 2866700
    },
    {
      "epoch": 4.691450154814975,
      "grad_norm": 0.27310532331466675,
      "learning_rate": 6.211658968259721e-07,
      "loss": 0.0061,
      "step": 2866720
    },
    {
      "epoch": 4.691482885253628,
      "grad_norm": 0.31711897253990173,
      "learning_rate": 6.21100004612455e-07,
      "loss": 0.0087,
      "step": 2866740
    },
    {
      "epoch": 4.691515615692282,
      "grad_norm": 0.17055556178092957,
      "learning_rate": 6.210341123989378e-07,
      "loss": 0.0098,
      "step": 2866760
    },
    {
      "epoch": 4.691548346130935,
      "grad_norm": 0.20647671818733215,
      "learning_rate": 6.209682201854207e-07,
      "loss": 0.0069,
      "step": 2866780
    },
    {
      "epoch": 4.691581076569588,
      "grad_norm": 0.321544885635376,
      "learning_rate": 6.209023279719036e-07,
      "loss": 0.0082,
      "step": 2866800
    },
    {
      "epoch": 4.691613807008242,
      "grad_norm": 0.060760390013456345,
      "learning_rate": 6.208364357583865e-07,
      "loss": 0.0061,
      "step": 2866820
    },
    {
      "epoch": 4.6916465374468945,
      "grad_norm": 0.4447089731693268,
      "learning_rate": 6.207705435448693e-07,
      "loss": 0.01,
      "step": 2866840
    },
    {
      "epoch": 4.691679267885548,
      "grad_norm": 0.13593867421150208,
      "learning_rate": 6.207046513313522e-07,
      "loss": 0.0079,
      "step": 2866860
    },
    {
      "epoch": 4.691711998324202,
      "grad_norm": 0.2289845049381256,
      "learning_rate": 6.206387591178351e-07,
      "loss": 0.0054,
      "step": 2866880
    },
    {
      "epoch": 4.691744728762854,
      "grad_norm": 0.296091765165329,
      "learning_rate": 6.20572866904318e-07,
      "loss": 0.0152,
      "step": 2866900
    },
    {
      "epoch": 4.691777459201508,
      "grad_norm": 0.22938455641269684,
      "learning_rate": 6.205069746908009e-07,
      "loss": 0.0071,
      "step": 2866920
    },
    {
      "epoch": 4.691810189640162,
      "grad_norm": 0.08773519843816757,
      "learning_rate": 6.204410824772837e-07,
      "loss": 0.0089,
      "step": 2866940
    },
    {
      "epoch": 4.691842920078815,
      "grad_norm": 0.12717920541763306,
      "learning_rate": 6.203751902637665e-07,
      "loss": 0.0079,
      "step": 2866960
    },
    {
      "epoch": 4.691875650517468,
      "grad_norm": 0.5488839149475098,
      "learning_rate": 6.203092980502495e-07,
      "loss": 0.0104,
      "step": 2866980
    },
    {
      "epoch": 4.6919083809561215,
      "grad_norm": 0.5800207853317261,
      "learning_rate": 6.202434058367323e-07,
      "loss": 0.0064,
      "step": 2867000
    },
    {
      "epoch": 4.691941111394775,
      "grad_norm": 0.3737303912639618,
      "learning_rate": 6.201775136232153e-07,
      "loss": 0.0082,
      "step": 2867020
    },
    {
      "epoch": 4.691973841833429,
      "grad_norm": 0.34121474623680115,
      "learning_rate": 6.201116214096981e-07,
      "loss": 0.013,
      "step": 2867040
    },
    {
      "epoch": 4.692006572272081,
      "grad_norm": 0.2715771496295929,
      "learning_rate": 6.200457291961809e-07,
      "loss": 0.0112,
      "step": 2867060
    },
    {
      "epoch": 4.692039302710735,
      "grad_norm": 0.30529657006263733,
      "learning_rate": 6.199798369826638e-07,
      "loss": 0.0131,
      "step": 2867080
    },
    {
      "epoch": 4.692072033149389,
      "grad_norm": 0.053334929049015045,
      "learning_rate": 6.199139447691467e-07,
      "loss": 0.0072,
      "step": 2867100
    },
    {
      "epoch": 4.692104763588041,
      "grad_norm": 0.05838409811258316,
      "learning_rate": 6.198480525556296e-07,
      "loss": 0.008,
      "step": 2867120
    },
    {
      "epoch": 4.692137494026695,
      "grad_norm": 0.4020497500896454,
      "learning_rate": 6.197821603421124e-07,
      "loss": 0.0093,
      "step": 2867140
    },
    {
      "epoch": 4.6921702244653485,
      "grad_norm": 0.2734583020210266,
      "learning_rate": 6.197162681285953e-07,
      "loss": 0.0076,
      "step": 2867160
    },
    {
      "epoch": 4.692202954904001,
      "grad_norm": 0.18985219299793243,
      "learning_rate": 6.196503759150781e-07,
      "loss": 0.0086,
      "step": 2867180
    },
    {
      "epoch": 4.692235685342655,
      "grad_norm": 0.13188722729682922,
      "learning_rate": 6.195844837015611e-07,
      "loss": 0.0088,
      "step": 2867200
    },
    {
      "epoch": 4.692268415781308,
      "grad_norm": 0.37167075276374817,
      "learning_rate": 6.195185914880439e-07,
      "loss": 0.0084,
      "step": 2867220
    },
    {
      "epoch": 4.692301146219962,
      "grad_norm": 0.09426826983690262,
      "learning_rate": 6.194526992745268e-07,
      "loss": 0.0053,
      "step": 2867240
    },
    {
      "epoch": 4.692333876658615,
      "grad_norm": 0.13004696369171143,
      "learning_rate": 6.193868070610096e-07,
      "loss": 0.0102,
      "step": 2867260
    },
    {
      "epoch": 4.692366607097268,
      "grad_norm": 0.4179815351963043,
      "learning_rate": 6.193209148474926e-07,
      "loss": 0.0104,
      "step": 2867280
    },
    {
      "epoch": 4.692399337535922,
      "grad_norm": 0.19503161311149597,
      "learning_rate": 6.192550226339754e-07,
      "loss": 0.0088,
      "step": 2867300
    },
    {
      "epoch": 4.6924320679745755,
      "grad_norm": 0.4190264046192169,
      "learning_rate": 6.191891304204583e-07,
      "loss": 0.0098,
      "step": 2867320
    },
    {
      "epoch": 4.692464798413228,
      "grad_norm": 0.22470331192016602,
      "learning_rate": 6.191232382069411e-07,
      "loss": 0.0098,
      "step": 2867340
    },
    {
      "epoch": 4.692497528851882,
      "grad_norm": 0.16723202168941498,
      "learning_rate": 6.190573459934241e-07,
      "loss": 0.0081,
      "step": 2867360
    },
    {
      "epoch": 4.692530259290535,
      "grad_norm": 0.30019405484199524,
      "learning_rate": 6.189914537799069e-07,
      "loss": 0.0133,
      "step": 2867380
    },
    {
      "epoch": 4.692562989729188,
      "grad_norm": 0.2424800992012024,
      "learning_rate": 6.189255615663897e-07,
      "loss": 0.0091,
      "step": 2867400
    },
    {
      "epoch": 4.692595720167842,
      "grad_norm": 0.172664076089859,
      "learning_rate": 6.188596693528726e-07,
      "loss": 0.014,
      "step": 2867420
    },
    {
      "epoch": 4.692628450606495,
      "grad_norm": 0.09432036429643631,
      "learning_rate": 6.187937771393554e-07,
      "loss": 0.0075,
      "step": 2867440
    },
    {
      "epoch": 4.692661181045148,
      "grad_norm": 0.11546976119279861,
      "learning_rate": 6.187278849258384e-07,
      "loss": 0.0081,
      "step": 2867460
    },
    {
      "epoch": 4.6926939114838015,
      "grad_norm": 0.037471704185009,
      "learning_rate": 6.186619927123212e-07,
      "loss": 0.0082,
      "step": 2867480
    },
    {
      "epoch": 4.692726641922455,
      "grad_norm": 0.16083821654319763,
      "learning_rate": 6.185961004988041e-07,
      "loss": 0.0095,
      "step": 2867500
    },
    {
      "epoch": 4.692759372361109,
      "grad_norm": 0.06489869952201843,
      "learning_rate": 6.185302082852869e-07,
      "loss": 0.009,
      "step": 2867520
    },
    {
      "epoch": 4.6927921027997614,
      "grad_norm": 0.1672532558441162,
      "learning_rate": 6.184643160717699e-07,
      "loss": 0.0043,
      "step": 2867540
    },
    {
      "epoch": 4.692824833238415,
      "grad_norm": 0.34120333194732666,
      "learning_rate": 6.183984238582527e-07,
      "loss": 0.0058,
      "step": 2867560
    },
    {
      "epoch": 4.692857563677069,
      "grad_norm": 0.25339096784591675,
      "learning_rate": 6.183325316447357e-07,
      "loss": 0.0076,
      "step": 2867580
    },
    {
      "epoch": 4.692890294115722,
      "grad_norm": 0.16129839420318604,
      "learning_rate": 6.182666394312184e-07,
      "loss": 0.0066,
      "step": 2867600
    },
    {
      "epoch": 4.692923024554375,
      "grad_norm": 0.2691323757171631,
      "learning_rate": 6.182007472177013e-07,
      "loss": 0.007,
      "step": 2867620
    },
    {
      "epoch": 4.6929557549930285,
      "grad_norm": 0.28563782572746277,
      "learning_rate": 6.181348550041842e-07,
      "loss": 0.0085,
      "step": 2867640
    },
    {
      "epoch": 4.692988485431682,
      "grad_norm": 0.05765514075756073,
      "learning_rate": 6.180689627906671e-07,
      "loss": 0.0144,
      "step": 2867660
    },
    {
      "epoch": 4.693021215870335,
      "grad_norm": 0.3557284474372864,
      "learning_rate": 6.1800307057715e-07,
      "loss": 0.009,
      "step": 2867680
    },
    {
      "epoch": 4.693053946308988,
      "grad_norm": 0.30918315052986145,
      "learning_rate": 6.179371783636329e-07,
      "loss": 0.0074,
      "step": 2867700
    },
    {
      "epoch": 4.693086676747642,
      "grad_norm": 0.295393168926239,
      "learning_rate": 6.178712861501157e-07,
      "loss": 0.005,
      "step": 2867720
    },
    {
      "epoch": 4.693119407186295,
      "grad_norm": 0.11889388412237167,
      "learning_rate": 6.178053939365985e-07,
      "loss": 0.0088,
      "step": 2867740
    },
    {
      "epoch": 4.693152137624948,
      "grad_norm": 0.1625925749540329,
      "learning_rate": 6.177395017230815e-07,
      "loss": 0.006,
      "step": 2867760
    },
    {
      "epoch": 4.693184868063602,
      "grad_norm": 0.09458836913108826,
      "learning_rate": 6.176736095095643e-07,
      "loss": 0.0106,
      "step": 2867780
    },
    {
      "epoch": 4.6932175985022555,
      "grad_norm": 0.16403919458389282,
      "learning_rate": 6.176077172960472e-07,
      "loss": 0.0089,
      "step": 2867800
    },
    {
      "epoch": 4.693250328940908,
      "grad_norm": 0.18380789458751678,
      "learning_rate": 6.1754182508253e-07,
      "loss": 0.0083,
      "step": 2867820
    },
    {
      "epoch": 4.693283059379562,
      "grad_norm": 0.15618260204792023,
      "learning_rate": 6.174759328690129e-07,
      "loss": 0.0112,
      "step": 2867840
    },
    {
      "epoch": 4.693315789818215,
      "grad_norm": 0.2954695224761963,
      "learning_rate": 6.174100406554958e-07,
      "loss": 0.0079,
      "step": 2867860
    },
    {
      "epoch": 4.693348520256868,
      "grad_norm": 0.29367345571517944,
      "learning_rate": 6.173441484419787e-07,
      "loss": 0.0072,
      "step": 2867880
    },
    {
      "epoch": 4.693381250695522,
      "grad_norm": 0.3204153776168823,
      "learning_rate": 6.172782562284615e-07,
      "loss": 0.008,
      "step": 2867900
    },
    {
      "epoch": 4.693413981134175,
      "grad_norm": 0.054406337440013885,
      "learning_rate": 6.172123640149444e-07,
      "loss": 0.007,
      "step": 2867920
    },
    {
      "epoch": 4.693446711572829,
      "grad_norm": 0.14407819509506226,
      "learning_rate": 6.171464718014273e-07,
      "loss": 0.01,
      "step": 2867940
    },
    {
      "epoch": 4.693479442011482,
      "grad_norm": 0.13857907056808472,
      "learning_rate": 6.170805795879102e-07,
      "loss": 0.0068,
      "step": 2867960
    },
    {
      "epoch": 4.693512172450135,
      "grad_norm": 0.24629682302474976,
      "learning_rate": 6.17014687374393e-07,
      "loss": 0.0076,
      "step": 2867980
    },
    {
      "epoch": 4.693544902888789,
      "grad_norm": 0.15111255645751953,
      "learning_rate": 6.169487951608759e-07,
      "loss": 0.0101,
      "step": 2868000
    },
    {
      "epoch": 4.6935776333274415,
      "grad_norm": 0.43291687965393066,
      "learning_rate": 6.168829029473588e-07,
      "loss": 0.0126,
      "step": 2868020
    },
    {
      "epoch": 4.693610363766095,
      "grad_norm": 0.22824686765670776,
      "learning_rate": 6.168170107338417e-07,
      "loss": 0.0068,
      "step": 2868040
    },
    {
      "epoch": 4.693643094204749,
      "grad_norm": 0.1969786286354065,
      "learning_rate": 6.167511185203245e-07,
      "loss": 0.0105,
      "step": 2868060
    },
    {
      "epoch": 4.693675824643401,
      "grad_norm": 0.10888920724391937,
      "learning_rate": 6.166852263068073e-07,
      "loss": 0.0105,
      "step": 2868080
    },
    {
      "epoch": 4.693708555082055,
      "grad_norm": 0.26378709077835083,
      "learning_rate": 6.166193340932902e-07,
      "loss": 0.0089,
      "step": 2868100
    },
    {
      "epoch": 4.693741285520709,
      "grad_norm": 0.1699480265378952,
      "learning_rate": 6.165534418797731e-07,
      "loss": 0.0074,
      "step": 2868120
    },
    {
      "epoch": 4.693774015959362,
      "grad_norm": 0.20490580797195435,
      "learning_rate": 6.16487549666256e-07,
      "loss": 0.0077,
      "step": 2868140
    },
    {
      "epoch": 4.693806746398015,
      "grad_norm": 0.20647475123405457,
      "learning_rate": 6.164216574527388e-07,
      "loss": 0.0065,
      "step": 2868160
    },
    {
      "epoch": 4.6938394768366685,
      "grad_norm": 0.21070565283298492,
      "learning_rate": 6.163557652392217e-07,
      "loss": 0.0122,
      "step": 2868180
    },
    {
      "epoch": 4.693872207275322,
      "grad_norm": 0.29075565934181213,
      "learning_rate": 6.162898730257046e-07,
      "loss": 0.0067,
      "step": 2868200
    },
    {
      "epoch": 4.693904937713976,
      "grad_norm": 0.3018055260181427,
      "learning_rate": 6.162239808121875e-07,
      "loss": 0.0098,
      "step": 2868220
    },
    {
      "epoch": 4.693937668152628,
      "grad_norm": 0.21289652585983276,
      "learning_rate": 6.161580885986704e-07,
      "loss": 0.0072,
      "step": 2868240
    },
    {
      "epoch": 4.693970398591282,
      "grad_norm": 0.3420996069908142,
      "learning_rate": 6.160921963851533e-07,
      "loss": 0.0076,
      "step": 2868260
    },
    {
      "epoch": 4.6940031290299356,
      "grad_norm": 0.4178297519683838,
      "learning_rate": 6.16026304171636e-07,
      "loss": 0.0094,
      "step": 2868280
    },
    {
      "epoch": 4.694035859468588,
      "grad_norm": 0.11933430284261703,
      "learning_rate": 6.15960411958119e-07,
      "loss": 0.0096,
      "step": 2868300
    },
    {
      "epoch": 4.694068589907242,
      "grad_norm": 0.0943358838558197,
      "learning_rate": 6.158945197446018e-07,
      "loss": 0.008,
      "step": 2868320
    },
    {
      "epoch": 4.6941013203458954,
      "grad_norm": 0.4598388969898224,
      "learning_rate": 6.158286275310848e-07,
      "loss": 0.0105,
      "step": 2868340
    },
    {
      "epoch": 4.694134050784548,
      "grad_norm": 0.2889692485332489,
      "learning_rate": 6.157627353175676e-07,
      "loss": 0.0083,
      "step": 2868360
    },
    {
      "epoch": 4.694166781223202,
      "grad_norm": 0.23431655764579773,
      "learning_rate": 6.156968431040505e-07,
      "loss": 0.0075,
      "step": 2868380
    },
    {
      "epoch": 4.694199511661855,
      "grad_norm": 0.32733291387557983,
      "learning_rate": 6.156309508905333e-07,
      "loss": 0.0055,
      "step": 2868400
    },
    {
      "epoch": 4.694232242100509,
      "grad_norm": 0.28280529379844666,
      "learning_rate": 6.155650586770162e-07,
      "loss": 0.0071,
      "step": 2868420
    },
    {
      "epoch": 4.694264972539162,
      "grad_norm": 0.28662678599357605,
      "learning_rate": 6.154991664634991e-07,
      "loss": 0.0088,
      "step": 2868440
    },
    {
      "epoch": 4.694297702977815,
      "grad_norm": 0.10199834406375885,
      "learning_rate": 6.154332742499819e-07,
      "loss": 0.0099,
      "step": 2868460
    },
    {
      "epoch": 4.694330433416469,
      "grad_norm": 0.31447166204452515,
      "learning_rate": 6.153673820364648e-07,
      "loss": 0.0069,
      "step": 2868480
    },
    {
      "epoch": 4.694363163855122,
      "grad_norm": 0.08816909044981003,
      "learning_rate": 6.153014898229476e-07,
      "loss": 0.0045,
      "step": 2868500
    },
    {
      "epoch": 4.694395894293775,
      "grad_norm": 0.17263972759246826,
      "learning_rate": 6.152355976094306e-07,
      "loss": 0.0061,
      "step": 2868520
    },
    {
      "epoch": 4.694428624732429,
      "grad_norm": 1.0775145292282104,
      "learning_rate": 6.151697053959134e-07,
      "loss": 0.0101,
      "step": 2868540
    },
    {
      "epoch": 4.694461355171082,
      "grad_norm": 0.16225865483283997,
      "learning_rate": 6.151038131823963e-07,
      "loss": 0.0085,
      "step": 2868560
    },
    {
      "epoch": 4.694494085609735,
      "grad_norm": 0.10176751017570496,
      "learning_rate": 6.150379209688791e-07,
      "loss": 0.0088,
      "step": 2868580
    },
    {
      "epoch": 4.694526816048389,
      "grad_norm": 0.26196596026420593,
      "learning_rate": 6.149720287553621e-07,
      "loss": 0.0102,
      "step": 2868600
    },
    {
      "epoch": 4.694559546487042,
      "grad_norm": 0.1485971212387085,
      "learning_rate": 6.149061365418449e-07,
      "loss": 0.006,
      "step": 2868620
    },
    {
      "epoch": 4.694592276925695,
      "grad_norm": 0.6317641735076904,
      "learning_rate": 6.148402443283278e-07,
      "loss": 0.0087,
      "step": 2868640
    },
    {
      "epoch": 4.6946250073643485,
      "grad_norm": 0.12363158911466599,
      "learning_rate": 6.147743521148106e-07,
      "loss": 0.0063,
      "step": 2868660
    },
    {
      "epoch": 4.694657737803002,
      "grad_norm": 0.18088680505752563,
      "learning_rate": 6.147084599012936e-07,
      "loss": 0.0101,
      "step": 2868680
    },
    {
      "epoch": 4.694690468241656,
      "grad_norm": 0.20781147480010986,
      "learning_rate": 6.146425676877764e-07,
      "loss": 0.0075,
      "step": 2868700
    },
    {
      "epoch": 4.694723198680308,
      "grad_norm": 0.4183058738708496,
      "learning_rate": 6.145766754742592e-07,
      "loss": 0.0102,
      "step": 2868720
    },
    {
      "epoch": 4.694755929118962,
      "grad_norm": 0.4585557281970978,
      "learning_rate": 6.145107832607421e-07,
      "loss": 0.0107,
      "step": 2868740
    },
    {
      "epoch": 4.694788659557616,
      "grad_norm": 0.09099416434764862,
      "learning_rate": 6.144448910472249e-07,
      "loss": 0.0067,
      "step": 2868760
    },
    {
      "epoch": 4.694821389996269,
      "grad_norm": 0.06315001845359802,
      "learning_rate": 6.143789988337079e-07,
      "loss": 0.0075,
      "step": 2868780
    },
    {
      "epoch": 4.694854120434922,
      "grad_norm": 0.39938750863075256,
      "learning_rate": 6.143131066201907e-07,
      "loss": 0.0066,
      "step": 2868800
    },
    {
      "epoch": 4.6948868508735755,
      "grad_norm": 0.28575748205184937,
      "learning_rate": 6.142472144066736e-07,
      "loss": 0.0088,
      "step": 2868820
    },
    {
      "epoch": 4.694919581312229,
      "grad_norm": 0.2080216109752655,
      "learning_rate": 6.141813221931564e-07,
      "loss": 0.0065,
      "step": 2868840
    },
    {
      "epoch": 4.694952311750882,
      "grad_norm": 0.24942196905612946,
      "learning_rate": 6.141154299796394e-07,
      "loss": 0.0069,
      "step": 2868860
    },
    {
      "epoch": 4.694985042189535,
      "grad_norm": 0.05942980572581291,
      "learning_rate": 6.140495377661222e-07,
      "loss": 0.0084,
      "step": 2868880
    },
    {
      "epoch": 4.695017772628189,
      "grad_norm": 0.046149954199790955,
      "learning_rate": 6.139836455526052e-07,
      "loss": 0.0116,
      "step": 2868900
    },
    {
      "epoch": 4.695050503066842,
      "grad_norm": 0.33186855912208557,
      "learning_rate": 6.13917753339088e-07,
      "loss": 0.008,
      "step": 2868920
    },
    {
      "epoch": 4.695083233505495,
      "grad_norm": 0.3302929401397705,
      "learning_rate": 6.138518611255708e-07,
      "loss": 0.0118,
      "step": 2868940
    },
    {
      "epoch": 4.695115963944149,
      "grad_norm": 0.473380982875824,
      "learning_rate": 6.137859689120537e-07,
      "loss": 0.0117,
      "step": 2868960
    },
    {
      "epoch": 4.6951486943828025,
      "grad_norm": 0.61751389503479,
      "learning_rate": 6.137200766985366e-07,
      "loss": 0.0064,
      "step": 2868980
    },
    {
      "epoch": 4.695181424821455,
      "grad_norm": 0.39551636576652527,
      "learning_rate": 6.136541844850195e-07,
      "loss": 0.0094,
      "step": 2869000
    },
    {
      "epoch": 4.695214155260109,
      "grad_norm": 0.12737500667572021,
      "learning_rate": 6.135882922715024e-07,
      "loss": 0.0096,
      "step": 2869020
    },
    {
      "epoch": 4.695246885698762,
      "grad_norm": 0.06382176280021667,
      "learning_rate": 6.135224000579852e-07,
      "loss": 0.0068,
      "step": 2869040
    },
    {
      "epoch": 4.695279616137416,
      "grad_norm": 0.1741749346256256,
      "learning_rate": 6.13456507844468e-07,
      "loss": 0.0069,
      "step": 2869060
    },
    {
      "epoch": 4.695312346576069,
      "grad_norm": 0.11709106713533401,
      "learning_rate": 6.13390615630951e-07,
      "loss": 0.0123,
      "step": 2869080
    },
    {
      "epoch": 4.695345077014722,
      "grad_norm": 0.19628259539604187,
      "learning_rate": 6.133247234174338e-07,
      "loss": 0.0064,
      "step": 2869100
    },
    {
      "epoch": 4.695377807453376,
      "grad_norm": 0.39058777689933777,
      "learning_rate": 6.132588312039167e-07,
      "loss": 0.0085,
      "step": 2869120
    },
    {
      "epoch": 4.695410537892029,
      "grad_norm": 0.39414122700691223,
      "learning_rate": 6.131929389903995e-07,
      "loss": 0.0059,
      "step": 2869140
    },
    {
      "epoch": 4.695443268330682,
      "grad_norm": 0.22194361686706543,
      "learning_rate": 6.131270467768824e-07,
      "loss": 0.0065,
      "step": 2869160
    },
    {
      "epoch": 4.695475998769336,
      "grad_norm": 0.15839599072933197,
      "learning_rate": 6.130611545633653e-07,
      "loss": 0.0081,
      "step": 2869180
    },
    {
      "epoch": 4.6955087292079885,
      "grad_norm": 0.235359787940979,
      "learning_rate": 6.129952623498482e-07,
      "loss": 0.0089,
      "step": 2869200
    },
    {
      "epoch": 4.695541459646642,
      "grad_norm": 0.146538645029068,
      "learning_rate": 6.12929370136331e-07,
      "loss": 0.0094,
      "step": 2869220
    },
    {
      "epoch": 4.695574190085296,
      "grad_norm": 0.3695545196533203,
      "learning_rate": 6.128634779228139e-07,
      "loss": 0.0102,
      "step": 2869240
    },
    {
      "epoch": 4.695606920523949,
      "grad_norm": 0.2423611730337143,
      "learning_rate": 6.127975857092968e-07,
      "loss": 0.0088,
      "step": 2869260
    },
    {
      "epoch": 4.695639650962602,
      "grad_norm": 0.15235798060894012,
      "learning_rate": 6.127316934957797e-07,
      "loss": 0.0059,
      "step": 2869280
    },
    {
      "epoch": 4.6956723814012555,
      "grad_norm": 0.14943790435791016,
      "learning_rate": 6.126658012822625e-07,
      "loss": 0.0139,
      "step": 2869300
    },
    {
      "epoch": 4.695705111839909,
      "grad_norm": 0.15010999143123627,
      "learning_rate": 6.125999090687454e-07,
      "loss": 0.0132,
      "step": 2869320
    },
    {
      "epoch": 4.695737842278562,
      "grad_norm": 0.3952946364879608,
      "learning_rate": 6.125340168552283e-07,
      "loss": 0.0112,
      "step": 2869340
    },
    {
      "epoch": 4.695770572717215,
      "grad_norm": 0.3771965503692627,
      "learning_rate": 6.124681246417112e-07,
      "loss": 0.0067,
      "step": 2869360
    },
    {
      "epoch": 4.695803303155869,
      "grad_norm": 0.10240430384874344,
      "learning_rate": 6.12402232428194e-07,
      "loss": 0.0104,
      "step": 2869380
    },
    {
      "epoch": 4.695836033594523,
      "grad_norm": 0.42777860164642334,
      "learning_rate": 6.123363402146768e-07,
      "loss": 0.0071,
      "step": 2869400
    },
    {
      "epoch": 4.695868764033175,
      "grad_norm": 0.32562488317489624,
      "learning_rate": 6.122704480011597e-07,
      "loss": 0.0105,
      "step": 2869420
    },
    {
      "epoch": 4.695901494471829,
      "grad_norm": 0.20096565783023834,
      "learning_rate": 6.122045557876426e-07,
      "loss": 0.0072,
      "step": 2869440
    },
    {
      "epoch": 4.6959342249104825,
      "grad_norm": 0.5195758938789368,
      "learning_rate": 6.121386635741255e-07,
      "loss": 0.0144,
      "step": 2869460
    },
    {
      "epoch": 4.695966955349135,
      "grad_norm": 0.16477294266223907,
      "learning_rate": 6.120727713606083e-07,
      "loss": 0.0075,
      "step": 2869480
    },
    {
      "epoch": 4.695999685787789,
      "grad_norm": 0.206168070435524,
      "learning_rate": 6.120068791470912e-07,
      "loss": 0.0077,
      "step": 2869500
    },
    {
      "epoch": 4.696032416226442,
      "grad_norm": 0.1556277573108673,
      "learning_rate": 6.119409869335741e-07,
      "loss": 0.0159,
      "step": 2869520
    },
    {
      "epoch": 4.696065146665095,
      "grad_norm": 0.23873257637023926,
      "learning_rate": 6.11875094720057e-07,
      "loss": 0.0069,
      "step": 2869540
    },
    {
      "epoch": 4.696097877103749,
      "grad_norm": 0.2216513305902481,
      "learning_rate": 6.118092025065399e-07,
      "loss": 0.0092,
      "step": 2869560
    },
    {
      "epoch": 4.696130607542402,
      "grad_norm": 0.4564570188522339,
      "learning_rate": 6.117433102930228e-07,
      "loss": 0.008,
      "step": 2869580
    },
    {
      "epoch": 4.696163337981056,
      "grad_norm": 0.36146456003189087,
      "learning_rate": 6.116774180795055e-07,
      "loss": 0.0081,
      "step": 2869600
    },
    {
      "epoch": 4.696196068419709,
      "grad_norm": 0.1565759778022766,
      "learning_rate": 6.116115258659885e-07,
      "loss": 0.009,
      "step": 2869620
    },
    {
      "epoch": 4.696228798858362,
      "grad_norm": 0.0657198429107666,
      "learning_rate": 6.115456336524713e-07,
      "loss": 0.009,
      "step": 2869640
    },
    {
      "epoch": 4.696261529297016,
      "grad_norm": 0.3673158586025238,
      "learning_rate": 6.114797414389543e-07,
      "loss": 0.0081,
      "step": 2869660
    },
    {
      "epoch": 4.696294259735669,
      "grad_norm": 0.44330549240112305,
      "learning_rate": 6.114138492254371e-07,
      "loss": 0.0079,
      "step": 2869680
    },
    {
      "epoch": 4.696326990174322,
      "grad_norm": 0.347706139087677,
      "learning_rate": 6.1134795701192e-07,
      "loss": 0.0074,
      "step": 2869700
    },
    {
      "epoch": 4.696359720612976,
      "grad_norm": 0.6393921971321106,
      "learning_rate": 6.112820647984028e-07,
      "loss": 0.0097,
      "step": 2869720
    },
    {
      "epoch": 4.696392451051629,
      "grad_norm": 0.39272603392601013,
      "learning_rate": 6.112161725848858e-07,
      "loss": 0.0083,
      "step": 2869740
    },
    {
      "epoch": 4.696425181490282,
      "grad_norm": 0.15281687676906586,
      "learning_rate": 6.111502803713686e-07,
      "loss": 0.0073,
      "step": 2869760
    },
    {
      "epoch": 4.696457911928936,
      "grad_norm": 0.10061618685722351,
      "learning_rate": 6.110843881578514e-07,
      "loss": 0.0114,
      "step": 2869780
    },
    {
      "epoch": 4.696490642367589,
      "grad_norm": 0.03896845132112503,
      "learning_rate": 6.110184959443343e-07,
      "loss": 0.0166,
      "step": 2869800
    },
    {
      "epoch": 4.696523372806242,
      "grad_norm": 0.331215500831604,
      "learning_rate": 6.109526037308171e-07,
      "loss": 0.0118,
      "step": 2869820
    },
    {
      "epoch": 4.6965561032448955,
      "grad_norm": 0.11038427799940109,
      "learning_rate": 6.108867115173001e-07,
      "loss": 0.0108,
      "step": 2869840
    },
    {
      "epoch": 4.696588833683549,
      "grad_norm": 0.1464075744152069,
      "learning_rate": 6.108208193037829e-07,
      "loss": 0.0074,
      "step": 2869860
    },
    {
      "epoch": 4.696621564122203,
      "grad_norm": 0.2676286995410919,
      "learning_rate": 6.107549270902658e-07,
      "loss": 0.0096,
      "step": 2869880
    },
    {
      "epoch": 4.696654294560855,
      "grad_norm": 0.08982084691524506,
      "learning_rate": 6.106890348767486e-07,
      "loss": 0.0086,
      "step": 2869900
    },
    {
      "epoch": 4.696687024999509,
      "grad_norm": 0.236440971493721,
      "learning_rate": 6.106231426632316e-07,
      "loss": 0.0076,
      "step": 2869920
    },
    {
      "epoch": 4.696719755438163,
      "grad_norm": 0.4572685658931732,
      "learning_rate": 6.105572504497144e-07,
      "loss": 0.011,
      "step": 2869940
    },
    {
      "epoch": 4.696752485876816,
      "grad_norm": 0.09288202226161957,
      "learning_rate": 6.104913582361973e-07,
      "loss": 0.0068,
      "step": 2869960
    },
    {
      "epoch": 4.696785216315469,
      "grad_norm": 0.6436052918434143,
      "learning_rate": 6.104254660226801e-07,
      "loss": 0.0116,
      "step": 2869980
    },
    {
      "epoch": 4.6968179467541225,
      "grad_norm": 0.15920290350914001,
      "learning_rate": 6.103595738091631e-07,
      "loss": 0.0073,
      "step": 2870000
    },
    {
      "epoch": 4.696850677192776,
      "grad_norm": 0.21084706485271454,
      "learning_rate": 6.102936815956459e-07,
      "loss": 0.0124,
      "step": 2870020
    },
    {
      "epoch": 4.696883407631429,
      "grad_norm": 0.25672808289527893,
      "learning_rate": 6.102277893821288e-07,
      "loss": 0.0063,
      "step": 2870040
    },
    {
      "epoch": 4.696916138070082,
      "grad_norm": 0.3418445885181427,
      "learning_rate": 6.101618971686116e-07,
      "loss": 0.0133,
      "step": 2870060
    },
    {
      "epoch": 4.696948868508736,
      "grad_norm": 0.256422758102417,
      "learning_rate": 6.100960049550944e-07,
      "loss": 0.0055,
      "step": 2870080
    },
    {
      "epoch": 4.696981598947389,
      "grad_norm": 0.36910280585289,
      "learning_rate": 6.100301127415774e-07,
      "loss": 0.0058,
      "step": 2870100
    },
    {
      "epoch": 4.697014329386042,
      "grad_norm": 0.07897727191448212,
      "learning_rate": 6.099642205280602e-07,
      "loss": 0.0104,
      "step": 2870120
    },
    {
      "epoch": 4.697047059824696,
      "grad_norm": 0.6577675342559814,
      "learning_rate": 6.098983283145431e-07,
      "loss": 0.0104,
      "step": 2870140
    },
    {
      "epoch": 4.697079790263349,
      "grad_norm": 0.357841819524765,
      "learning_rate": 6.098324361010259e-07,
      "loss": 0.0079,
      "step": 2870160
    },
    {
      "epoch": 4.697112520702002,
      "grad_norm": 0.16127550601959229,
      "learning_rate": 6.097665438875089e-07,
      "loss": 0.0131,
      "step": 2870180
    },
    {
      "epoch": 4.697145251140656,
      "grad_norm": 0.352061003446579,
      "learning_rate": 6.097006516739917e-07,
      "loss": 0.0109,
      "step": 2870200
    },
    {
      "epoch": 4.697177981579309,
      "grad_norm": 0.3172348141670227,
      "learning_rate": 6.096347594604747e-07,
      "loss": 0.0109,
      "step": 2870220
    },
    {
      "epoch": 4.697210712017963,
      "grad_norm": 0.34618014097213745,
      "learning_rate": 6.095688672469575e-07,
      "loss": 0.0068,
      "step": 2870240
    },
    {
      "epoch": 4.697243442456616,
      "grad_norm": 0.1741000860929489,
      "learning_rate": 6.095029750334403e-07,
      "loss": 0.0094,
      "step": 2870260
    },
    {
      "epoch": 4.697276172895269,
      "grad_norm": 0.15418674051761627,
      "learning_rate": 6.094370828199232e-07,
      "loss": 0.0058,
      "step": 2870280
    },
    {
      "epoch": 4.697308903333923,
      "grad_norm": 0.2153802514076233,
      "learning_rate": 6.093711906064061e-07,
      "loss": 0.0128,
      "step": 2870300
    },
    {
      "epoch": 4.6973416337725755,
      "grad_norm": 0.2361900806427002,
      "learning_rate": 6.09305298392889e-07,
      "loss": 0.0161,
      "step": 2870320
    },
    {
      "epoch": 4.697374364211229,
      "grad_norm": 0.09916580468416214,
      "learning_rate": 6.092394061793719e-07,
      "loss": 0.0073,
      "step": 2870340
    },
    {
      "epoch": 4.697407094649883,
      "grad_norm": 0.40502414107322693,
      "learning_rate": 6.091735139658547e-07,
      "loss": 0.0062,
      "step": 2870360
    },
    {
      "epoch": 4.697439825088535,
      "grad_norm": 0.2026340663433075,
      "learning_rate": 6.091076217523376e-07,
      "loss": 0.0072,
      "step": 2870380
    },
    {
      "epoch": 4.697472555527189,
      "grad_norm": 0.33311358094215393,
      "learning_rate": 6.090417295388205e-07,
      "loss": 0.0067,
      "step": 2870400
    },
    {
      "epoch": 4.697505285965843,
      "grad_norm": 0.43171387910842896,
      "learning_rate": 6.089758373253034e-07,
      "loss": 0.0063,
      "step": 2870420
    },
    {
      "epoch": 4.697538016404496,
      "grad_norm": 0.23400214314460754,
      "learning_rate": 6.089099451117862e-07,
      "loss": 0.0109,
      "step": 2870440
    },
    {
      "epoch": 4.697570746843149,
      "grad_norm": 0.20976628363132477,
      "learning_rate": 6.08844052898269e-07,
      "loss": 0.0086,
      "step": 2870460
    },
    {
      "epoch": 4.6976034772818025,
      "grad_norm": 0.48324349522590637,
      "learning_rate": 6.087781606847519e-07,
      "loss": 0.008,
      "step": 2870480
    },
    {
      "epoch": 4.697636207720456,
      "grad_norm": 0.36872896552085876,
      "learning_rate": 6.087122684712348e-07,
      "loss": 0.0146,
      "step": 2870500
    },
    {
      "epoch": 4.69766893815911,
      "grad_norm": 0.1992362141609192,
      "learning_rate": 6.086463762577177e-07,
      "loss": 0.007,
      "step": 2870520
    },
    {
      "epoch": 4.697701668597762,
      "grad_norm": 0.12953943014144897,
      "learning_rate": 6.085804840442005e-07,
      "loss": 0.0057,
      "step": 2870540
    },
    {
      "epoch": 4.697734399036416,
      "grad_norm": 0.2723124623298645,
      "learning_rate": 6.085145918306834e-07,
      "loss": 0.0109,
      "step": 2870560
    },
    {
      "epoch": 4.69776712947507,
      "grad_norm": 0.20118063688278198,
      "learning_rate": 6.084486996171663e-07,
      "loss": 0.0136,
      "step": 2870580
    },
    {
      "epoch": 4.697799859913722,
      "grad_norm": 0.04487694054841995,
      "learning_rate": 6.083828074036492e-07,
      "loss": 0.005,
      "step": 2870600
    },
    {
      "epoch": 4.697832590352376,
      "grad_norm": 0.20562781393527985,
      "learning_rate": 6.08316915190132e-07,
      "loss": 0.0092,
      "step": 2870620
    },
    {
      "epoch": 4.6978653207910295,
      "grad_norm": 0.14467613399028778,
      "learning_rate": 6.082510229766149e-07,
      "loss": 0.0098,
      "step": 2870640
    },
    {
      "epoch": 4.697898051229682,
      "grad_norm": 0.4074481129646301,
      "learning_rate": 6.081851307630978e-07,
      "loss": 0.0049,
      "step": 2870660
    },
    {
      "epoch": 4.697930781668336,
      "grad_norm": 0.10334080457687378,
      "learning_rate": 6.081192385495807e-07,
      "loss": 0.0058,
      "step": 2870680
    },
    {
      "epoch": 4.697963512106989,
      "grad_norm": 0.11406590789556503,
      "learning_rate": 6.080533463360635e-07,
      "loss": 0.0089,
      "step": 2870700
    },
    {
      "epoch": 4.697996242545643,
      "grad_norm": 0.27317705750465393,
      "learning_rate": 6.079874541225464e-07,
      "loss": 0.0059,
      "step": 2870720
    },
    {
      "epoch": 4.698028972984296,
      "grad_norm": 0.17753776907920837,
      "learning_rate": 6.079215619090292e-07,
      "loss": 0.0076,
      "step": 2870740
    },
    {
      "epoch": 4.698061703422949,
      "grad_norm": 0.28494998812675476,
      "learning_rate": 6.078556696955122e-07,
      "loss": 0.0083,
      "step": 2870760
    },
    {
      "epoch": 4.698094433861603,
      "grad_norm": 0.11585795879364014,
      "learning_rate": 6.07789777481995e-07,
      "loss": 0.0141,
      "step": 2870780
    },
    {
      "epoch": 4.698127164300256,
      "grad_norm": 0.18203584849834442,
      "learning_rate": 6.077238852684778e-07,
      "loss": 0.0103,
      "step": 2870800
    },
    {
      "epoch": 4.698159894738909,
      "grad_norm": 0.13903455436229706,
      "learning_rate": 6.076579930549607e-07,
      "loss": 0.0103,
      "step": 2870820
    },
    {
      "epoch": 4.698192625177563,
      "grad_norm": 0.11795113980770111,
      "learning_rate": 6.075921008414436e-07,
      "loss": 0.0118,
      "step": 2870840
    },
    {
      "epoch": 4.698225355616216,
      "grad_norm": 0.6161412596702576,
      "learning_rate": 6.075262086279265e-07,
      "loss": 0.0076,
      "step": 2870860
    },
    {
      "epoch": 4.698258086054869,
      "grad_norm": 0.23205667734146118,
      "learning_rate": 6.074603164144094e-07,
      "loss": 0.0101,
      "step": 2870880
    },
    {
      "epoch": 4.698290816493523,
      "grad_norm": 0.16550374031066895,
      "learning_rate": 6.073944242008923e-07,
      "loss": 0.0126,
      "step": 2870900
    },
    {
      "epoch": 4.698323546932176,
      "grad_norm": 0.5997921824455261,
      "learning_rate": 6.07328531987375e-07,
      "loss": 0.0117,
      "step": 2870920
    },
    {
      "epoch": 4.698356277370829,
      "grad_norm": 0.144754558801651,
      "learning_rate": 6.07262639773858e-07,
      "loss": 0.0076,
      "step": 2870940
    },
    {
      "epoch": 4.6983890078094825,
      "grad_norm": 0.15950319170951843,
      "learning_rate": 6.071967475603408e-07,
      "loss": 0.0068,
      "step": 2870960
    },
    {
      "epoch": 4.698421738248136,
      "grad_norm": 0.24090202152729034,
      "learning_rate": 6.071308553468238e-07,
      "loss": 0.0066,
      "step": 2870980
    },
    {
      "epoch": 4.698454468686789,
      "grad_norm": 0.7454840540885925,
      "learning_rate": 6.070649631333066e-07,
      "loss": 0.0098,
      "step": 2871000
    },
    {
      "epoch": 4.698487199125442,
      "grad_norm": 0.32983091473579407,
      "learning_rate": 6.069990709197895e-07,
      "loss": 0.0082,
      "step": 2871020
    },
    {
      "epoch": 4.698519929564096,
      "grad_norm": 0.06781593710184097,
      "learning_rate": 6.069331787062723e-07,
      "loss": 0.0092,
      "step": 2871040
    },
    {
      "epoch": 4.69855266000275,
      "grad_norm": 0.2459125816822052,
      "learning_rate": 6.068672864927553e-07,
      "loss": 0.0078,
      "step": 2871060
    },
    {
      "epoch": 4.698585390441402,
      "grad_norm": 0.22210651636123657,
      "learning_rate": 6.068013942792381e-07,
      "loss": 0.0078,
      "step": 2871080
    },
    {
      "epoch": 4.698618120880056,
      "grad_norm": 0.4906156659126282,
      "learning_rate": 6.06735502065721e-07,
      "loss": 0.0067,
      "step": 2871100
    },
    {
      "epoch": 4.6986508513187095,
      "grad_norm": 0.31015631556510925,
      "learning_rate": 6.066696098522038e-07,
      "loss": 0.0145,
      "step": 2871120
    },
    {
      "epoch": 4.698683581757363,
      "grad_norm": 0.30743154883384705,
      "learning_rate": 6.066037176386866e-07,
      "loss": 0.0093,
      "step": 2871140
    },
    {
      "epoch": 4.698716312196016,
      "grad_norm": 0.3764552175998688,
      "learning_rate": 6.065378254251696e-07,
      "loss": 0.0081,
      "step": 2871160
    },
    {
      "epoch": 4.698749042634669,
      "grad_norm": 0.1975562423467636,
      "learning_rate": 6.064719332116524e-07,
      "loss": 0.0084,
      "step": 2871180
    },
    {
      "epoch": 4.698781773073323,
      "grad_norm": 0.23903273046016693,
      "learning_rate": 6.064060409981353e-07,
      "loss": 0.0102,
      "step": 2871200
    },
    {
      "epoch": 4.698814503511976,
      "grad_norm": 0.06939796358346939,
      "learning_rate": 6.063401487846181e-07,
      "loss": 0.0098,
      "step": 2871220
    },
    {
      "epoch": 4.698847233950629,
      "grad_norm": 0.15983843803405762,
      "learning_rate": 6.062742565711011e-07,
      "loss": 0.0089,
      "step": 2871240
    },
    {
      "epoch": 4.698879964389283,
      "grad_norm": 0.1857726126909256,
      "learning_rate": 6.062083643575839e-07,
      "loss": 0.0098,
      "step": 2871260
    },
    {
      "epoch": 4.698912694827936,
      "grad_norm": 0.14607390761375427,
      "learning_rate": 6.061424721440668e-07,
      "loss": 0.0052,
      "step": 2871280
    },
    {
      "epoch": 4.698945425266589,
      "grad_norm": 0.2949248254299164,
      "learning_rate": 6.060765799305496e-07,
      "loss": 0.0088,
      "step": 2871300
    },
    {
      "epoch": 4.698978155705243,
      "grad_norm": 0.19862480461597443,
      "learning_rate": 6.060106877170326e-07,
      "loss": 0.0094,
      "step": 2871320
    },
    {
      "epoch": 4.699010886143896,
      "grad_norm": 0.10665372759103775,
      "learning_rate": 6.059447955035154e-07,
      "loss": 0.0092,
      "step": 2871340
    },
    {
      "epoch": 4.699043616582549,
      "grad_norm": 0.16901080310344696,
      "learning_rate": 6.058789032899983e-07,
      "loss": 0.0143,
      "step": 2871360
    },
    {
      "epoch": 4.699076347021203,
      "grad_norm": 0.32695913314819336,
      "learning_rate": 6.058130110764811e-07,
      "loss": 0.0093,
      "step": 2871380
    },
    {
      "epoch": 4.699109077459856,
      "grad_norm": 0.22312133014202118,
      "learning_rate": 6.05747118862964e-07,
      "loss": 0.0087,
      "step": 2871400
    },
    {
      "epoch": 4.69914180789851,
      "grad_norm": 0.32682937383651733,
      "learning_rate": 6.056812266494469e-07,
      "loss": 0.007,
      "step": 2871420
    },
    {
      "epoch": 4.699174538337163,
      "grad_norm": 0.0681745707988739,
      "learning_rate": 6.056153344359297e-07,
      "loss": 0.0083,
      "step": 2871440
    },
    {
      "epoch": 4.699207268775816,
      "grad_norm": 0.2878730297088623,
      "learning_rate": 6.055494422224126e-07,
      "loss": 0.0098,
      "step": 2871460
    },
    {
      "epoch": 4.69923999921447,
      "grad_norm": 0.36267971992492676,
      "learning_rate": 6.054835500088954e-07,
      "loss": 0.0094,
      "step": 2871480
    },
    {
      "epoch": 4.6992727296531225,
      "grad_norm": 0.520895779132843,
      "learning_rate": 6.054176577953784e-07,
      "loss": 0.0091,
      "step": 2871500
    },
    {
      "epoch": 4.699305460091776,
      "grad_norm": 0.10647136718034744,
      "learning_rate": 6.053517655818612e-07,
      "loss": 0.0062,
      "step": 2871520
    },
    {
      "epoch": 4.69933819053043,
      "grad_norm": 0.10343358665704727,
      "learning_rate": 6.052858733683442e-07,
      "loss": 0.0088,
      "step": 2871540
    },
    {
      "epoch": 4.699370920969082,
      "grad_norm": 0.26120540499687195,
      "learning_rate": 6.05219981154827e-07,
      "loss": 0.0106,
      "step": 2871560
    },
    {
      "epoch": 4.699403651407736,
      "grad_norm": 0.3863040804862976,
      "learning_rate": 6.051540889413098e-07,
      "loss": 0.0088,
      "step": 2871580
    },
    {
      "epoch": 4.69943638184639,
      "grad_norm": 0.2030130922794342,
      "learning_rate": 6.050881967277927e-07,
      "loss": 0.0069,
      "step": 2871600
    },
    {
      "epoch": 4.699469112285043,
      "grad_norm": 0.2718752920627594,
      "learning_rate": 6.050223045142756e-07,
      "loss": 0.0079,
      "step": 2871620
    },
    {
      "epoch": 4.699501842723696,
      "grad_norm": 0.18950410187244415,
      "learning_rate": 6.049564123007585e-07,
      "loss": 0.007,
      "step": 2871640
    },
    {
      "epoch": 4.6995345731623495,
      "grad_norm": 0.20766647160053253,
      "learning_rate": 6.048905200872414e-07,
      "loss": 0.0099,
      "step": 2871660
    },
    {
      "epoch": 4.699567303601003,
      "grad_norm": 0.2313564121723175,
      "learning_rate": 6.048246278737242e-07,
      "loss": 0.0099,
      "step": 2871680
    },
    {
      "epoch": 4.699600034039657,
      "grad_norm": 0.1550072729587555,
      "learning_rate": 6.047587356602071e-07,
      "loss": 0.0103,
      "step": 2871700
    },
    {
      "epoch": 4.699632764478309,
      "grad_norm": 0.19046542048454285,
      "learning_rate": 6.0469284344669e-07,
      "loss": 0.0097,
      "step": 2871720
    },
    {
      "epoch": 4.699665494916963,
      "grad_norm": 0.8718087673187256,
      "learning_rate": 6.046269512331729e-07,
      "loss": 0.0063,
      "step": 2871740
    },
    {
      "epoch": 4.6996982253556165,
      "grad_norm": 0.3582015931606293,
      "learning_rate": 6.045610590196557e-07,
      "loss": 0.0086,
      "step": 2871760
    },
    {
      "epoch": 4.699730955794269,
      "grad_norm": 0.32319000363349915,
      "learning_rate": 6.044951668061385e-07,
      "loss": 0.0097,
      "step": 2871780
    },
    {
      "epoch": 4.699763686232923,
      "grad_norm": 0.20904406905174255,
      "learning_rate": 6.044292745926214e-07,
      "loss": 0.0091,
      "step": 2871800
    },
    {
      "epoch": 4.699796416671576,
      "grad_norm": 0.1531713306903839,
      "learning_rate": 6.043633823791043e-07,
      "loss": 0.0069,
      "step": 2871820
    },
    {
      "epoch": 4.699829147110229,
      "grad_norm": 0.33057665824890137,
      "learning_rate": 6.042974901655872e-07,
      "loss": 0.009,
      "step": 2871840
    },
    {
      "epoch": 4.699861877548883,
      "grad_norm": 0.729756236076355,
      "learning_rate": 6.0423159795207e-07,
      "loss": 0.0097,
      "step": 2871860
    },
    {
      "epoch": 4.699894607987536,
      "grad_norm": 0.4155237078666687,
      "learning_rate": 6.041657057385529e-07,
      "loss": 0.0095,
      "step": 2871880
    },
    {
      "epoch": 4.69992733842619,
      "grad_norm": 0.42480844259262085,
      "learning_rate": 6.040998135250358e-07,
      "loss": 0.0061,
      "step": 2871900
    },
    {
      "epoch": 4.699960068864843,
      "grad_norm": 1.0545040369033813,
      "learning_rate": 6.040339213115187e-07,
      "loss": 0.0094,
      "step": 2871920
    },
    {
      "epoch": 4.699992799303496,
      "grad_norm": 0.2539799213409424,
      "learning_rate": 6.039680290980015e-07,
      "loss": 0.0078,
      "step": 2871940
    },
    {
      "epoch": 4.70002552974215,
      "grad_norm": 0.4177887439727783,
      "learning_rate": 6.039021368844844e-07,
      "loss": 0.013,
      "step": 2871960
    },
    {
      "epoch": 4.700058260180803,
      "grad_norm": 0.2486230880022049,
      "learning_rate": 6.038362446709673e-07,
      "loss": 0.0151,
      "step": 2871980
    },
    {
      "epoch": 4.700090990619456,
      "grad_norm": 0.11942052096128464,
      "learning_rate": 6.037703524574502e-07,
      "loss": 0.0079,
      "step": 2872000
    },
    {
      "epoch": 4.70012372105811,
      "grad_norm": 0.5977516174316406,
      "learning_rate": 6.03704460243933e-07,
      "loss": 0.0108,
      "step": 2872020
    },
    {
      "epoch": 4.700156451496763,
      "grad_norm": 0.4737125039100647,
      "learning_rate": 6.036385680304159e-07,
      "loss": 0.0069,
      "step": 2872040
    },
    {
      "epoch": 4.700189181935416,
      "grad_norm": 0.24208663403987885,
      "learning_rate": 6.035726758168987e-07,
      "loss": 0.0104,
      "step": 2872060
    },
    {
      "epoch": 4.70022191237407,
      "grad_norm": 0.20267345011234283,
      "learning_rate": 6.035067836033817e-07,
      "loss": 0.0088,
      "step": 2872080
    },
    {
      "epoch": 4.700254642812723,
      "grad_norm": 0.09730631858110428,
      "learning_rate": 6.034408913898645e-07,
      "loss": 0.0143,
      "step": 2872100
    },
    {
      "epoch": 4.700287373251376,
      "grad_norm": 0.3469763696193695,
      "learning_rate": 6.033749991763473e-07,
      "loss": 0.0103,
      "step": 2872120
    },
    {
      "epoch": 4.7003201036900295,
      "grad_norm": 0.1974148452281952,
      "learning_rate": 6.033091069628302e-07,
      "loss": 0.0096,
      "step": 2872140
    },
    {
      "epoch": 4.700352834128683,
      "grad_norm": 0.3470705449581146,
      "learning_rate": 6.032432147493131e-07,
      "loss": 0.009,
      "step": 2872160
    },
    {
      "epoch": 4.700385564567337,
      "grad_norm": 0.15000803768634796,
      "learning_rate": 6.03177322535796e-07,
      "loss": 0.0069,
      "step": 2872180
    },
    {
      "epoch": 4.700418295005989,
      "grad_norm": 0.35631290078163147,
      "learning_rate": 6.031114303222789e-07,
      "loss": 0.0076,
      "step": 2872200
    },
    {
      "epoch": 4.700451025444643,
      "grad_norm": 0.5328320264816284,
      "learning_rate": 6.030455381087618e-07,
      "loss": 0.0099,
      "step": 2872220
    },
    {
      "epoch": 4.700483755883297,
      "grad_norm": 0.5150817632675171,
      "learning_rate": 6.029796458952445e-07,
      "loss": 0.0087,
      "step": 2872240
    },
    {
      "epoch": 4.70051648632195,
      "grad_norm": 0.11853775382041931,
      "learning_rate": 6.029137536817275e-07,
      "loss": 0.012,
      "step": 2872260
    },
    {
      "epoch": 4.700549216760603,
      "grad_norm": 0.17833255231380463,
      "learning_rate": 6.028478614682103e-07,
      "loss": 0.0071,
      "step": 2872280
    },
    {
      "epoch": 4.7005819471992565,
      "grad_norm": 0.3765448033809662,
      "learning_rate": 6.027819692546933e-07,
      "loss": 0.0066,
      "step": 2872300
    },
    {
      "epoch": 4.70061467763791,
      "grad_norm": 0.3748709559440613,
      "learning_rate": 6.027160770411761e-07,
      "loss": 0.0117,
      "step": 2872320
    },
    {
      "epoch": 4.700647408076563,
      "grad_norm": 0.26939788460731506,
      "learning_rate": 6.02650184827659e-07,
      "loss": 0.0077,
      "step": 2872340
    },
    {
      "epoch": 4.700680138515216,
      "grad_norm": 0.14117464423179626,
      "learning_rate": 6.025842926141418e-07,
      "loss": 0.0077,
      "step": 2872360
    },
    {
      "epoch": 4.70071286895387,
      "grad_norm": 0.6927456855773926,
      "learning_rate": 6.025184004006248e-07,
      "loss": 0.0091,
      "step": 2872380
    },
    {
      "epoch": 4.700745599392523,
      "grad_norm": 0.30950555205345154,
      "learning_rate": 6.024525081871076e-07,
      "loss": 0.009,
      "step": 2872400
    },
    {
      "epoch": 4.700778329831176,
      "grad_norm": 0.16534613072872162,
      "learning_rate": 6.023866159735905e-07,
      "loss": 0.008,
      "step": 2872420
    },
    {
      "epoch": 4.70081106026983,
      "grad_norm": 0.24110691249370575,
      "learning_rate": 6.023207237600733e-07,
      "loss": 0.0082,
      "step": 2872440
    },
    {
      "epoch": 4.7008437907084835,
      "grad_norm": 0.15547767281532288,
      "learning_rate": 6.022548315465561e-07,
      "loss": 0.0086,
      "step": 2872460
    },
    {
      "epoch": 4.700876521147136,
      "grad_norm": 0.17529690265655518,
      "learning_rate": 6.021889393330391e-07,
      "loss": 0.0093,
      "step": 2872480
    },
    {
      "epoch": 4.70090925158579,
      "grad_norm": 0.7923692464828491,
      "learning_rate": 6.021230471195219e-07,
      "loss": 0.0131,
      "step": 2872500
    },
    {
      "epoch": 4.700941982024443,
      "grad_norm": 0.10146883130073547,
      "learning_rate": 6.020571549060048e-07,
      "loss": 0.0081,
      "step": 2872520
    },
    {
      "epoch": 4.700974712463096,
      "grad_norm": 0.20513693988323212,
      "learning_rate": 6.019912626924876e-07,
      "loss": 0.0108,
      "step": 2872540
    },
    {
      "epoch": 4.70100744290175,
      "grad_norm": 0.3061791956424713,
      "learning_rate": 6.019253704789706e-07,
      "loss": 0.0149,
      "step": 2872560
    },
    {
      "epoch": 4.701040173340403,
      "grad_norm": 0.4319052994251251,
      "learning_rate": 6.018594782654534e-07,
      "loss": 0.0091,
      "step": 2872580
    },
    {
      "epoch": 4.701072903779057,
      "grad_norm": 0.1302354633808136,
      "learning_rate": 6.017935860519363e-07,
      "loss": 0.0067,
      "step": 2872600
    },
    {
      "epoch": 4.7011056342177096,
      "grad_norm": 0.17218096554279327,
      "learning_rate": 6.017276938384191e-07,
      "loss": 0.0106,
      "step": 2872620
    },
    {
      "epoch": 4.701138364656363,
      "grad_norm": 0.14181554317474365,
      "learning_rate": 6.016618016249021e-07,
      "loss": 0.0097,
      "step": 2872640
    },
    {
      "epoch": 4.701171095095017,
      "grad_norm": 0.29086750745773315,
      "learning_rate": 6.015959094113849e-07,
      "loss": 0.0084,
      "step": 2872660
    },
    {
      "epoch": 4.7012038255336694,
      "grad_norm": 0.30072328448295593,
      "learning_rate": 6.015300171978678e-07,
      "loss": 0.0076,
      "step": 2872680
    },
    {
      "epoch": 4.701236555972323,
      "grad_norm": 0.5088242888450623,
      "learning_rate": 6.014641249843506e-07,
      "loss": 0.0159,
      "step": 2872700
    },
    {
      "epoch": 4.701269286410977,
      "grad_norm": 0.5547548532485962,
      "learning_rate": 6.013982327708335e-07,
      "loss": 0.0124,
      "step": 2872720
    },
    {
      "epoch": 4.701302016849629,
      "grad_norm": 0.08989983797073364,
      "learning_rate": 6.013323405573164e-07,
      "loss": 0.0067,
      "step": 2872740
    },
    {
      "epoch": 4.701334747288283,
      "grad_norm": 0.09609673917293549,
      "learning_rate": 6.012664483437993e-07,
      "loss": 0.0117,
      "step": 2872760
    },
    {
      "epoch": 4.7013674777269365,
      "grad_norm": 0.47828394174575806,
      "learning_rate": 6.012005561302821e-07,
      "loss": 0.0111,
      "step": 2872780
    },
    {
      "epoch": 4.70140020816559,
      "grad_norm": 0.098853200674057,
      "learning_rate": 6.011346639167649e-07,
      "loss": 0.0084,
      "step": 2872800
    },
    {
      "epoch": 4.701432938604243,
      "grad_norm": 0.15781095623970032,
      "learning_rate": 6.010687717032479e-07,
      "loss": 0.0053,
      "step": 2872820
    },
    {
      "epoch": 4.701465669042896,
      "grad_norm": 0.39841005206108093,
      "learning_rate": 6.010028794897307e-07,
      "loss": 0.0116,
      "step": 2872840
    },
    {
      "epoch": 4.70149839948155,
      "grad_norm": 0.24400340020656586,
      "learning_rate": 6.009369872762137e-07,
      "loss": 0.0093,
      "step": 2872860
    },
    {
      "epoch": 4.701531129920204,
      "grad_norm": 0.08825458586215973,
      "learning_rate": 6.008710950626965e-07,
      "loss": 0.0061,
      "step": 2872880
    },
    {
      "epoch": 4.701563860358856,
      "grad_norm": 0.37416359782218933,
      "learning_rate": 6.008052028491793e-07,
      "loss": 0.0142,
      "step": 2872900
    },
    {
      "epoch": 4.70159659079751,
      "grad_norm": 0.11857008934020996,
      "learning_rate": 6.007393106356622e-07,
      "loss": 0.0101,
      "step": 2872920
    },
    {
      "epoch": 4.7016293212361635,
      "grad_norm": 0.20740605890750885,
      "learning_rate": 6.006734184221451e-07,
      "loss": 0.0118,
      "step": 2872940
    },
    {
      "epoch": 4.701662051674816,
      "grad_norm": 0.38176560401916504,
      "learning_rate": 6.00607526208628e-07,
      "loss": 0.0067,
      "step": 2872960
    },
    {
      "epoch": 4.70169478211347,
      "grad_norm": 0.4668216109275818,
      "learning_rate": 6.005416339951109e-07,
      "loss": 0.0079,
      "step": 2872980
    },
    {
      "epoch": 4.701727512552123,
      "grad_norm": 0.09399951249361038,
      "learning_rate": 6.004757417815937e-07,
      "loss": 0.0113,
      "step": 2873000
    },
    {
      "epoch": 4.701760242990776,
      "grad_norm": 0.18620657920837402,
      "learning_rate": 6.004098495680766e-07,
      "loss": 0.0071,
      "step": 2873020
    },
    {
      "epoch": 4.70179297342943,
      "grad_norm": 0.11248413473367691,
      "learning_rate": 6.003439573545595e-07,
      "loss": 0.011,
      "step": 2873040
    },
    {
      "epoch": 4.701825703868083,
      "grad_norm": 0.21402189135551453,
      "learning_rate": 6.002780651410424e-07,
      "loss": 0.0109,
      "step": 2873060
    },
    {
      "epoch": 4.701858434306737,
      "grad_norm": 0.028009120374917984,
      "learning_rate": 6.002121729275252e-07,
      "loss": 0.0089,
      "step": 2873080
    },
    {
      "epoch": 4.70189116474539,
      "grad_norm": 0.5880385637283325,
      "learning_rate": 6.001462807140081e-07,
      "loss": 0.0129,
      "step": 2873100
    },
    {
      "epoch": 4.701923895184043,
      "grad_norm": 0.23818625509738922,
      "learning_rate": 6.000803885004909e-07,
      "loss": 0.0079,
      "step": 2873120
    },
    {
      "epoch": 4.701956625622697,
      "grad_norm": 0.48725616931915283,
      "learning_rate": 6.000144962869739e-07,
      "loss": 0.012,
      "step": 2873140
    },
    {
      "epoch": 4.70198935606135,
      "grad_norm": 0.21299846470355988,
      "learning_rate": 5.999486040734567e-07,
      "loss": 0.007,
      "step": 2873160
    },
    {
      "epoch": 4.702022086500003,
      "grad_norm": 0.24866214394569397,
      "learning_rate": 5.998827118599395e-07,
      "loss": 0.0065,
      "step": 2873180
    },
    {
      "epoch": 4.702054816938657,
      "grad_norm": 0.22855128347873688,
      "learning_rate": 5.998168196464224e-07,
      "loss": 0.0122,
      "step": 2873200
    },
    {
      "epoch": 4.70208754737731,
      "grad_norm": 0.10724303126335144,
      "learning_rate": 5.997509274329053e-07,
      "loss": 0.0085,
      "step": 2873220
    },
    {
      "epoch": 4.702120277815963,
      "grad_norm": 0.37813839316368103,
      "learning_rate": 5.996850352193882e-07,
      "loss": 0.0098,
      "step": 2873240
    },
    {
      "epoch": 4.702153008254617,
      "grad_norm": 0.3819822371006012,
      "learning_rate": 5.99619143005871e-07,
      "loss": 0.009,
      "step": 2873260
    },
    {
      "epoch": 4.70218573869327,
      "grad_norm": 0.1252981722354889,
      "learning_rate": 5.995532507923539e-07,
      "loss": 0.0125,
      "step": 2873280
    },
    {
      "epoch": 4.702218469131923,
      "grad_norm": 0.10387428849935532,
      "learning_rate": 5.994873585788368e-07,
      "loss": 0.0099,
      "step": 2873300
    },
    {
      "epoch": 4.7022511995705765,
      "grad_norm": 0.05967177078127861,
      "learning_rate": 5.994214663653197e-07,
      "loss": 0.0096,
      "step": 2873320
    },
    {
      "epoch": 4.70228393000923,
      "grad_norm": 0.30202949047088623,
      "learning_rate": 5.993555741518025e-07,
      "loss": 0.0076,
      "step": 2873340
    },
    {
      "epoch": 4.702316660447884,
      "grad_norm": 0.2029903531074524,
      "learning_rate": 5.992896819382854e-07,
      "loss": 0.0106,
      "step": 2873360
    },
    {
      "epoch": 4.702349390886536,
      "grad_norm": 0.2744433283805847,
      "learning_rate": 5.992237897247682e-07,
      "loss": 0.009,
      "step": 2873380
    },
    {
      "epoch": 4.70238212132519,
      "grad_norm": 0.5907597541809082,
      "learning_rate": 5.991578975112512e-07,
      "loss": 0.0086,
      "step": 2873400
    },
    {
      "epoch": 4.7024148517638436,
      "grad_norm": 0.17257195711135864,
      "learning_rate": 5.99092005297734e-07,
      "loss": 0.009,
      "step": 2873420
    },
    {
      "epoch": 4.702447582202497,
      "grad_norm": 0.22263304889202118,
      "learning_rate": 5.990261130842169e-07,
      "loss": 0.0111,
      "step": 2873440
    },
    {
      "epoch": 4.70248031264115,
      "grad_norm": 0.08926045894622803,
      "learning_rate": 5.989602208706997e-07,
      "loss": 0.0105,
      "step": 2873460
    },
    {
      "epoch": 4.7025130430798034,
      "grad_norm": 0.12053705006837845,
      "learning_rate": 5.988943286571826e-07,
      "loss": 0.0088,
      "step": 2873480
    },
    {
      "epoch": 4.702545773518457,
      "grad_norm": 0.19992020726203918,
      "learning_rate": 5.988284364436655e-07,
      "loss": 0.0088,
      "step": 2873500
    },
    {
      "epoch": 4.70257850395711,
      "grad_norm": 0.11430328339338303,
      "learning_rate": 5.987625442301484e-07,
      "loss": 0.0054,
      "step": 2873520
    },
    {
      "epoch": 4.702611234395763,
      "grad_norm": 0.09045188128948212,
      "learning_rate": 5.986966520166313e-07,
      "loss": 0.0071,
      "step": 2873540
    },
    {
      "epoch": 4.702643964834417,
      "grad_norm": 0.1334235966205597,
      "learning_rate": 5.98630759803114e-07,
      "loss": 0.0045,
      "step": 2873560
    },
    {
      "epoch": 4.70267669527307,
      "grad_norm": 0.1014861986041069,
      "learning_rate": 5.98564867589597e-07,
      "loss": 0.0078,
      "step": 2873580
    },
    {
      "epoch": 4.702709425711723,
      "grad_norm": 0.210230752825737,
      "learning_rate": 5.984989753760798e-07,
      "loss": 0.0087,
      "step": 2873600
    },
    {
      "epoch": 4.702742156150377,
      "grad_norm": 0.22391864657402039,
      "learning_rate": 5.984330831625628e-07,
      "loss": 0.0077,
      "step": 2873620
    },
    {
      "epoch": 4.70277488658903,
      "grad_norm": 0.22641555964946747,
      "learning_rate": 5.983671909490456e-07,
      "loss": 0.0084,
      "step": 2873640
    },
    {
      "epoch": 4.702807617027683,
      "grad_norm": 0.16599160432815552,
      "learning_rate": 5.983012987355285e-07,
      "loss": 0.012,
      "step": 2873660
    },
    {
      "epoch": 4.702840347466337,
      "grad_norm": 0.1314598172903061,
      "learning_rate": 5.982354065220113e-07,
      "loss": 0.0091,
      "step": 2873680
    },
    {
      "epoch": 4.70287307790499,
      "grad_norm": 0.18995662033557892,
      "learning_rate": 5.981695143084943e-07,
      "loss": 0.0096,
      "step": 2873700
    },
    {
      "epoch": 4.702905808343644,
      "grad_norm": 0.21184390783309937,
      "learning_rate": 5.981036220949771e-07,
      "loss": 0.0075,
      "step": 2873720
    },
    {
      "epoch": 4.702938538782297,
      "grad_norm": 0.2798842787742615,
      "learning_rate": 5.9803772988146e-07,
      "loss": 0.009,
      "step": 2873740
    },
    {
      "epoch": 4.70297126922095,
      "grad_norm": 0.277277410030365,
      "learning_rate": 5.979718376679428e-07,
      "loss": 0.0123,
      "step": 2873760
    },
    {
      "epoch": 4.703003999659604,
      "grad_norm": 0.1511518508195877,
      "learning_rate": 5.979059454544257e-07,
      "loss": 0.0101,
      "step": 2873780
    },
    {
      "epoch": 4.7030367300982565,
      "grad_norm": 0.24759729206562042,
      "learning_rate": 5.978400532409086e-07,
      "loss": 0.0104,
      "step": 2873800
    },
    {
      "epoch": 4.70306946053691,
      "grad_norm": 0.19255217909812927,
      "learning_rate": 5.977741610273914e-07,
      "loss": 0.0081,
      "step": 2873820
    },
    {
      "epoch": 4.703102190975564,
      "grad_norm": 0.13534867763519287,
      "learning_rate": 5.977082688138743e-07,
      "loss": 0.0088,
      "step": 2873840
    },
    {
      "epoch": 4.703134921414216,
      "grad_norm": 0.08807729929685593,
      "learning_rate": 5.976423766003571e-07,
      "loss": 0.0057,
      "step": 2873860
    },
    {
      "epoch": 4.70316765185287,
      "grad_norm": 0.35668474435806274,
      "learning_rate": 5.975764843868401e-07,
      "loss": 0.0059,
      "step": 2873880
    },
    {
      "epoch": 4.703200382291524,
      "grad_norm": 0.35723039507865906,
      "learning_rate": 5.975105921733229e-07,
      "loss": 0.007,
      "step": 2873900
    },
    {
      "epoch": 4.703233112730177,
      "grad_norm": 0.527004063129425,
      "learning_rate": 5.974446999598058e-07,
      "loss": 0.0117,
      "step": 2873920
    },
    {
      "epoch": 4.70326584316883,
      "grad_norm": 1.0026870965957642,
      "learning_rate": 5.973788077462886e-07,
      "loss": 0.0084,
      "step": 2873940
    },
    {
      "epoch": 4.7032985736074835,
      "grad_norm": 0.09358768910169601,
      "learning_rate": 5.973129155327716e-07,
      "loss": 0.0077,
      "step": 2873960
    },
    {
      "epoch": 4.703331304046137,
      "grad_norm": 0.15783657133579254,
      "learning_rate": 5.972470233192544e-07,
      "loss": 0.0063,
      "step": 2873980
    },
    {
      "epoch": 4.70336403448479,
      "grad_norm": 0.27113398909568787,
      "learning_rate": 5.971811311057373e-07,
      "loss": 0.0093,
      "step": 2874000
    },
    {
      "epoch": 4.703396764923443,
      "grad_norm": 0.0759674459695816,
      "learning_rate": 5.971152388922201e-07,
      "loss": 0.0056,
      "step": 2874020
    },
    {
      "epoch": 4.703429495362097,
      "grad_norm": 0.5096589922904968,
      "learning_rate": 5.97049346678703e-07,
      "loss": 0.0103,
      "step": 2874040
    },
    {
      "epoch": 4.703462225800751,
      "grad_norm": 0.29257771372795105,
      "learning_rate": 5.969834544651859e-07,
      "loss": 0.0087,
      "step": 2874060
    },
    {
      "epoch": 4.703494956239403,
      "grad_norm": 0.4351623058319092,
      "learning_rate": 5.969175622516688e-07,
      "loss": 0.0079,
      "step": 2874080
    },
    {
      "epoch": 4.703527686678057,
      "grad_norm": 0.16879719495773315,
      "learning_rate": 5.968516700381516e-07,
      "loss": 0.0065,
      "step": 2874100
    },
    {
      "epoch": 4.7035604171167105,
      "grad_norm": 0.11446390300989151,
      "learning_rate": 5.967857778246345e-07,
      "loss": 0.0104,
      "step": 2874120
    },
    {
      "epoch": 4.703593147555363,
      "grad_norm": 0.21986563503742218,
      "learning_rate": 5.967198856111174e-07,
      "loss": 0.0061,
      "step": 2874140
    },
    {
      "epoch": 4.703625877994017,
      "grad_norm": 0.3669503927230835,
      "learning_rate": 5.966539933976002e-07,
      "loss": 0.0104,
      "step": 2874160
    },
    {
      "epoch": 4.70365860843267,
      "grad_norm": 0.054887838661670685,
      "learning_rate": 5.965881011840832e-07,
      "loss": 0.008,
      "step": 2874180
    },
    {
      "epoch": 4.703691338871323,
      "grad_norm": 0.09298902750015259,
      "learning_rate": 5.96522208970566e-07,
      "loss": 0.0081,
      "step": 2874200
    },
    {
      "epoch": 4.703724069309977,
      "grad_norm": 0.5920215249061584,
      "learning_rate": 5.964563167570488e-07,
      "loss": 0.0106,
      "step": 2874220
    },
    {
      "epoch": 4.70375679974863,
      "grad_norm": 0.13809333741664886,
      "learning_rate": 5.963904245435317e-07,
      "loss": 0.0085,
      "step": 2874240
    },
    {
      "epoch": 4.703789530187284,
      "grad_norm": 0.07271520048379898,
      "learning_rate": 5.963245323300146e-07,
      "loss": 0.0095,
      "step": 2874260
    },
    {
      "epoch": 4.703822260625937,
      "grad_norm": 0.15878278017044067,
      "learning_rate": 5.962586401164975e-07,
      "loss": 0.0091,
      "step": 2874280
    },
    {
      "epoch": 4.70385499106459,
      "grad_norm": 0.4373532235622406,
      "learning_rate": 5.961927479029804e-07,
      "loss": 0.0079,
      "step": 2874300
    },
    {
      "epoch": 4.703887721503244,
      "grad_norm": 0.26137617230415344,
      "learning_rate": 5.961268556894632e-07,
      "loss": 0.0123,
      "step": 2874320
    },
    {
      "epoch": 4.703920451941897,
      "grad_norm": 0.12394294142723083,
      "learning_rate": 5.960609634759461e-07,
      "loss": 0.0066,
      "step": 2874340
    },
    {
      "epoch": 4.70395318238055,
      "grad_norm": 0.06265883892774582,
      "learning_rate": 5.95995071262429e-07,
      "loss": 0.0105,
      "step": 2874360
    },
    {
      "epoch": 4.703985912819204,
      "grad_norm": 0.545513927936554,
      "learning_rate": 5.959291790489119e-07,
      "loss": 0.0123,
      "step": 2874380
    },
    {
      "epoch": 4.704018643257857,
      "grad_norm": 0.3140319585800171,
      "learning_rate": 5.958632868353947e-07,
      "loss": 0.0109,
      "step": 2874400
    },
    {
      "epoch": 4.70405137369651,
      "grad_norm": 0.27937066555023193,
      "learning_rate": 5.957973946218776e-07,
      "loss": 0.0073,
      "step": 2874420
    },
    {
      "epoch": 4.7040841041351635,
      "grad_norm": 0.27261167764663696,
      "learning_rate": 5.957315024083604e-07,
      "loss": 0.0093,
      "step": 2874440
    },
    {
      "epoch": 4.704116834573817,
      "grad_norm": 0.3040045201778412,
      "learning_rate": 5.956656101948434e-07,
      "loss": 0.0074,
      "step": 2874460
    },
    {
      "epoch": 4.70414956501247,
      "grad_norm": 0.23538444936275482,
      "learning_rate": 5.955997179813262e-07,
      "loss": 0.0097,
      "step": 2874480
    },
    {
      "epoch": 4.704182295451123,
      "grad_norm": 0.2564723789691925,
      "learning_rate": 5.95533825767809e-07,
      "loss": 0.0102,
      "step": 2874500
    },
    {
      "epoch": 4.704215025889777,
      "grad_norm": 0.12285888195037842,
      "learning_rate": 5.954679335542919e-07,
      "loss": 0.0078,
      "step": 2874520
    },
    {
      "epoch": 4.704247756328431,
      "grad_norm": 1.9468687772750854,
      "learning_rate": 5.954020413407748e-07,
      "loss": 0.0094,
      "step": 2874540
    },
    {
      "epoch": 4.704280486767083,
      "grad_norm": 0.4281236529350281,
      "learning_rate": 5.953361491272577e-07,
      "loss": 0.0148,
      "step": 2874560
    },
    {
      "epoch": 4.704313217205737,
      "grad_norm": 0.3371824622154236,
      "learning_rate": 5.952702569137405e-07,
      "loss": 0.0101,
      "step": 2874580
    },
    {
      "epoch": 4.7043459476443905,
      "grad_norm": 0.32788437604904175,
      "learning_rate": 5.952043647002234e-07,
      "loss": 0.008,
      "step": 2874600
    },
    {
      "epoch": 4.704378678083044,
      "grad_norm": 0.2958071529865265,
      "learning_rate": 5.951384724867063e-07,
      "loss": 0.0072,
      "step": 2874620
    },
    {
      "epoch": 4.704411408521697,
      "grad_norm": 0.341111958026886,
      "learning_rate": 5.950725802731892e-07,
      "loss": 0.0063,
      "step": 2874640
    },
    {
      "epoch": 4.70444413896035,
      "grad_norm": 0.1932387501001358,
      "learning_rate": 5.95006688059672e-07,
      "loss": 0.0091,
      "step": 2874660
    },
    {
      "epoch": 4.704476869399004,
      "grad_norm": 0.26692792773246765,
      "learning_rate": 5.949407958461549e-07,
      "loss": 0.0072,
      "step": 2874680
    },
    {
      "epoch": 4.704509599837657,
      "grad_norm": 0.5132808685302734,
      "learning_rate": 5.948749036326377e-07,
      "loss": 0.0134,
      "step": 2874700
    },
    {
      "epoch": 4.70454233027631,
      "grad_norm": 0.08086442202329636,
      "learning_rate": 5.948090114191207e-07,
      "loss": 0.0075,
      "step": 2874720
    },
    {
      "epoch": 4.704575060714964,
      "grad_norm": 0.2128736823797226,
      "learning_rate": 5.947431192056035e-07,
      "loss": 0.0079,
      "step": 2874740
    },
    {
      "epoch": 4.704607791153617,
      "grad_norm": 0.1322290599346161,
      "learning_rate": 5.946772269920864e-07,
      "loss": 0.0065,
      "step": 2874760
    },
    {
      "epoch": 4.70464052159227,
      "grad_norm": 0.6040713787078857,
      "learning_rate": 5.946113347785692e-07,
      "loss": 0.0095,
      "step": 2874780
    },
    {
      "epoch": 4.704673252030924,
      "grad_norm": 0.10626999288797379,
      "learning_rate": 5.945454425650522e-07,
      "loss": 0.0118,
      "step": 2874800
    },
    {
      "epoch": 4.704705982469577,
      "grad_norm": 0.1302889883518219,
      "learning_rate": 5.94479550351535e-07,
      "loss": 0.0102,
      "step": 2874820
    },
    {
      "epoch": 4.70473871290823,
      "grad_norm": 0.452129065990448,
      "learning_rate": 5.94413658138018e-07,
      "loss": 0.0078,
      "step": 2874840
    },
    {
      "epoch": 4.704771443346884,
      "grad_norm": 0.20409832894802094,
      "learning_rate": 5.943477659245008e-07,
      "loss": 0.0102,
      "step": 2874860
    },
    {
      "epoch": 4.704804173785537,
      "grad_norm": 0.10726164281368256,
      "learning_rate": 5.942818737109836e-07,
      "loss": 0.0074,
      "step": 2874880
    },
    {
      "epoch": 4.704836904224191,
      "grad_norm": 0.2318817377090454,
      "learning_rate": 5.942159814974665e-07,
      "loss": 0.0068,
      "step": 2874900
    },
    {
      "epoch": 4.704869634662844,
      "grad_norm": 0.2736678123474121,
      "learning_rate": 5.941500892839493e-07,
      "loss": 0.0082,
      "step": 2874920
    },
    {
      "epoch": 4.704902365101497,
      "grad_norm": 0.22583693265914917,
      "learning_rate": 5.940841970704323e-07,
      "loss": 0.0055,
      "step": 2874940
    },
    {
      "epoch": 4.704935095540151,
      "grad_norm": 0.20969466865062714,
      "learning_rate": 5.940183048569151e-07,
      "loss": 0.0097,
      "step": 2874960
    },
    {
      "epoch": 4.7049678259788035,
      "grad_norm": 0.2779971659183502,
      "learning_rate": 5.93952412643398e-07,
      "loss": 0.0077,
      "step": 2874980
    },
    {
      "epoch": 4.705000556417457,
      "grad_norm": 0.25789445638656616,
      "learning_rate": 5.938865204298808e-07,
      "loss": 0.0099,
      "step": 2875000
    },
    {
      "epoch": 4.705033286856111,
      "grad_norm": 0.12135731428861618,
      "learning_rate": 5.938206282163638e-07,
      "loss": 0.009,
      "step": 2875020
    },
    {
      "epoch": 4.705066017294763,
      "grad_norm": 0.3244633674621582,
      "learning_rate": 5.937547360028466e-07,
      "loss": 0.0112,
      "step": 2875040
    },
    {
      "epoch": 4.705098747733417,
      "grad_norm": 0.15339674055576324,
      "learning_rate": 5.936888437893295e-07,
      "loss": 0.0097,
      "step": 2875060
    },
    {
      "epoch": 4.705131478172071,
      "grad_norm": 0.07840536534786224,
      "learning_rate": 5.936229515758123e-07,
      "loss": 0.0083,
      "step": 2875080
    },
    {
      "epoch": 4.705164208610724,
      "grad_norm": 0.20732928812503815,
      "learning_rate": 5.935570593622953e-07,
      "loss": 0.0087,
      "step": 2875100
    },
    {
      "epoch": 4.705196939049377,
      "grad_norm": 0.13983069360256195,
      "learning_rate": 5.934911671487781e-07,
      "loss": 0.0082,
      "step": 2875120
    },
    {
      "epoch": 4.7052296694880305,
      "grad_norm": 0.24251757562160492,
      "learning_rate": 5.93425274935261e-07,
      "loss": 0.0072,
      "step": 2875140
    },
    {
      "epoch": 4.705262399926684,
      "grad_norm": 0.47569575905799866,
      "learning_rate": 5.933593827217438e-07,
      "loss": 0.0073,
      "step": 2875160
    },
    {
      "epoch": 4.705295130365338,
      "grad_norm": 0.23874293267726898,
      "learning_rate": 5.932934905082266e-07,
      "loss": 0.0069,
      "step": 2875180
    },
    {
      "epoch": 4.70532786080399,
      "grad_norm": 0.18166333436965942,
      "learning_rate": 5.932275982947096e-07,
      "loss": 0.009,
      "step": 2875200
    },
    {
      "epoch": 4.705360591242644,
      "grad_norm": 0.09802109003067017,
      "learning_rate": 5.931617060811924e-07,
      "loss": 0.0093,
      "step": 2875220
    },
    {
      "epoch": 4.7053933216812975,
      "grad_norm": 0.34540215134620667,
      "learning_rate": 5.930958138676753e-07,
      "loss": 0.0057,
      "step": 2875240
    },
    {
      "epoch": 4.70542605211995,
      "grad_norm": 0.12643882632255554,
      "learning_rate": 5.930299216541581e-07,
      "loss": 0.0079,
      "step": 2875260
    },
    {
      "epoch": 4.705458782558604,
      "grad_norm": 0.4047749638557434,
      "learning_rate": 5.929640294406411e-07,
      "loss": 0.0094,
      "step": 2875280
    },
    {
      "epoch": 4.705491512997257,
      "grad_norm": 0.12496007233858109,
      "learning_rate": 5.928981372271239e-07,
      "loss": 0.0117,
      "step": 2875300
    },
    {
      "epoch": 4.70552424343591,
      "grad_norm": 0.28593742847442627,
      "learning_rate": 5.928322450136068e-07,
      "loss": 0.0089,
      "step": 2875320
    },
    {
      "epoch": 4.705556973874564,
      "grad_norm": 0.14881521463394165,
      "learning_rate": 5.927663528000896e-07,
      "loss": 0.0074,
      "step": 2875340
    },
    {
      "epoch": 4.705589704313217,
      "grad_norm": 0.30127090215682983,
      "learning_rate": 5.927004605865725e-07,
      "loss": 0.0088,
      "step": 2875360
    },
    {
      "epoch": 4.705622434751871,
      "grad_norm": 0.31415632367134094,
      "learning_rate": 5.926345683730554e-07,
      "loss": 0.0071,
      "step": 2875380
    },
    {
      "epoch": 4.705655165190524,
      "grad_norm": 0.40636685490608215,
      "learning_rate": 5.925686761595383e-07,
      "loss": 0.0131,
      "step": 2875400
    },
    {
      "epoch": 4.705687895629177,
      "grad_norm": 0.10414925217628479,
      "learning_rate": 5.925027839460211e-07,
      "loss": 0.0073,
      "step": 2875420
    },
    {
      "epoch": 4.705720626067831,
      "grad_norm": 0.11967118829488754,
      "learning_rate": 5.92436891732504e-07,
      "loss": 0.0094,
      "step": 2875440
    },
    {
      "epoch": 4.7057533565064835,
      "grad_norm": 0.29762744903564453,
      "learning_rate": 5.923709995189869e-07,
      "loss": 0.006,
      "step": 2875460
    },
    {
      "epoch": 4.705786086945137,
      "grad_norm": 0.2624250650405884,
      "learning_rate": 5.923051073054698e-07,
      "loss": 0.0124,
      "step": 2875480
    },
    {
      "epoch": 4.705818817383791,
      "grad_norm": 0.4119493067264557,
      "learning_rate": 5.922392150919527e-07,
      "loss": 0.0103,
      "step": 2875500
    },
    {
      "epoch": 4.705851547822444,
      "grad_norm": 0.09761128574609756,
      "learning_rate": 5.921733228784356e-07,
      "loss": 0.0119,
      "step": 2875520
    },
    {
      "epoch": 4.705884278261097,
      "grad_norm": 0.16587339341640472,
      "learning_rate": 5.921074306649184e-07,
      "loss": 0.0079,
      "step": 2875540
    },
    {
      "epoch": 4.705917008699751,
      "grad_norm": 0.14506089687347412,
      "learning_rate": 5.920415384514012e-07,
      "loss": 0.0115,
      "step": 2875560
    },
    {
      "epoch": 4.705949739138404,
      "grad_norm": 0.21517781913280487,
      "learning_rate": 5.919756462378841e-07,
      "loss": 0.0083,
      "step": 2875580
    },
    {
      "epoch": 4.705982469577057,
      "grad_norm": 0.18576712906360626,
      "learning_rate": 5.91909754024367e-07,
      "loss": 0.0079,
      "step": 2875600
    },
    {
      "epoch": 4.7060152000157105,
      "grad_norm": 0.18804673850536346,
      "learning_rate": 5.918438618108499e-07,
      "loss": 0.01,
      "step": 2875620
    },
    {
      "epoch": 4.706047930454364,
      "grad_norm": 0.16964150965213776,
      "learning_rate": 5.917779695973327e-07,
      "loss": 0.014,
      "step": 2875640
    },
    {
      "epoch": 4.706080660893017,
      "grad_norm": 0.07765834778547287,
      "learning_rate": 5.917120773838156e-07,
      "loss": 0.0076,
      "step": 2875660
    },
    {
      "epoch": 4.70611339133167,
      "grad_norm": 0.0905630886554718,
      "learning_rate": 5.916461851702985e-07,
      "loss": 0.009,
      "step": 2875680
    },
    {
      "epoch": 4.706146121770324,
      "grad_norm": 0.20765143632888794,
      "learning_rate": 5.915802929567814e-07,
      "loss": 0.008,
      "step": 2875700
    },
    {
      "epoch": 4.706178852208978,
      "grad_norm": 0.10297054052352905,
      "learning_rate": 5.915144007432642e-07,
      "loss": 0.0072,
      "step": 2875720
    },
    {
      "epoch": 4.70621158264763,
      "grad_norm": 0.34062522649765015,
      "learning_rate": 5.914485085297471e-07,
      "loss": 0.007,
      "step": 2875740
    },
    {
      "epoch": 4.706244313086284,
      "grad_norm": 0.3105497658252716,
      "learning_rate": 5.9138261631623e-07,
      "loss": 0.0102,
      "step": 2875760
    },
    {
      "epoch": 4.7062770435249375,
      "grad_norm": 0.16234759986400604,
      "learning_rate": 5.913167241027129e-07,
      "loss": 0.0065,
      "step": 2875780
    },
    {
      "epoch": 4.706309773963591,
      "grad_norm": 0.19725854694843292,
      "learning_rate": 5.912508318891957e-07,
      "loss": 0.0146,
      "step": 2875800
    },
    {
      "epoch": 4.706342504402244,
      "grad_norm": 0.28691890835762024,
      "learning_rate": 5.911849396756786e-07,
      "loss": 0.0073,
      "step": 2875820
    },
    {
      "epoch": 4.706375234840897,
      "grad_norm": 0.2460031360387802,
      "learning_rate": 5.911190474621614e-07,
      "loss": 0.0083,
      "step": 2875840
    },
    {
      "epoch": 4.706407965279551,
      "grad_norm": 0.14646998047828674,
      "learning_rate": 5.910531552486443e-07,
      "loss": 0.0125,
      "step": 2875860
    },
    {
      "epoch": 4.706440695718204,
      "grad_norm": 0.09761708974838257,
      "learning_rate": 5.909872630351272e-07,
      "loss": 0.0093,
      "step": 2875880
    },
    {
      "epoch": 4.706473426156857,
      "grad_norm": 0.15018995106220245,
      "learning_rate": 5.9092137082161e-07,
      "loss": 0.0104,
      "step": 2875900
    },
    {
      "epoch": 4.706506156595511,
      "grad_norm": 0.15364685654640198,
      "learning_rate": 5.908554786080929e-07,
      "loss": 0.0064,
      "step": 2875920
    },
    {
      "epoch": 4.706538887034164,
      "grad_norm": 0.17189925909042358,
      "learning_rate": 5.907895863945758e-07,
      "loss": 0.0099,
      "step": 2875940
    },
    {
      "epoch": 4.706571617472817,
      "grad_norm": 0.3097516596317291,
      "learning_rate": 5.907236941810587e-07,
      "loss": 0.0095,
      "step": 2875960
    },
    {
      "epoch": 4.706604347911471,
      "grad_norm": 0.5397460460662842,
      "learning_rate": 5.906578019675415e-07,
      "loss": 0.0085,
      "step": 2875980
    },
    {
      "epoch": 4.706637078350124,
      "grad_norm": 0.10869944840669632,
      "learning_rate": 5.905919097540244e-07,
      "loss": 0.0092,
      "step": 2876000
    },
    {
      "epoch": 4.706669808788777,
      "grad_norm": 0.1122046411037445,
      "learning_rate": 5.905260175405072e-07,
      "loss": 0.0106,
      "step": 2876020
    },
    {
      "epoch": 4.706702539227431,
      "grad_norm": 0.24685703217983246,
      "learning_rate": 5.904601253269902e-07,
      "loss": 0.0054,
      "step": 2876040
    },
    {
      "epoch": 4.706735269666084,
      "grad_norm": 0.21456539630889893,
      "learning_rate": 5.90394233113473e-07,
      "loss": 0.0059,
      "step": 2876060
    },
    {
      "epoch": 4.706768000104738,
      "grad_norm": 0.6320807933807373,
      "learning_rate": 5.903283408999559e-07,
      "loss": 0.0122,
      "step": 2876080
    },
    {
      "epoch": 4.7068007305433905,
      "grad_norm": 0.08599532395601273,
      "learning_rate": 5.902624486864387e-07,
      "loss": 0.0055,
      "step": 2876100
    },
    {
      "epoch": 4.706833460982044,
      "grad_norm": 0.2951785922050476,
      "learning_rate": 5.901965564729217e-07,
      "loss": 0.0083,
      "step": 2876120
    },
    {
      "epoch": 4.706866191420698,
      "grad_norm": 0.19083434343338013,
      "learning_rate": 5.901306642594045e-07,
      "loss": 0.0138,
      "step": 2876140
    },
    {
      "epoch": 4.70689892185935,
      "grad_norm": 0.1759132742881775,
      "learning_rate": 5.900647720458875e-07,
      "loss": 0.0072,
      "step": 2876160
    },
    {
      "epoch": 4.706931652298004,
      "grad_norm": 0.20440620183944702,
      "learning_rate": 5.899988798323703e-07,
      "loss": 0.0054,
      "step": 2876180
    },
    {
      "epoch": 4.706964382736658,
      "grad_norm": 0.5515329837799072,
      "learning_rate": 5.899329876188531e-07,
      "loss": 0.0108,
      "step": 2876200
    },
    {
      "epoch": 4.70699711317531,
      "grad_norm": 0.4507683515548706,
      "learning_rate": 5.89867095405336e-07,
      "loss": 0.0109,
      "step": 2876220
    },
    {
      "epoch": 4.707029843613964,
      "grad_norm": 0.48720863461494446,
      "learning_rate": 5.898012031918188e-07,
      "loss": 0.0112,
      "step": 2876240
    },
    {
      "epoch": 4.7070625740526175,
      "grad_norm": 0.31646549701690674,
      "learning_rate": 5.897353109783018e-07,
      "loss": 0.0081,
      "step": 2876260
    },
    {
      "epoch": 4.707095304491271,
      "grad_norm": 0.09447461366653442,
      "learning_rate": 5.896694187647846e-07,
      "loss": 0.007,
      "step": 2876280
    },
    {
      "epoch": 4.707128034929924,
      "grad_norm": 0.3341684639453888,
      "learning_rate": 5.896035265512675e-07,
      "loss": 0.0075,
      "step": 2876300
    },
    {
      "epoch": 4.707160765368577,
      "grad_norm": 0.23955298960208893,
      "learning_rate": 5.895376343377503e-07,
      "loss": 0.0073,
      "step": 2876320
    },
    {
      "epoch": 4.707193495807231,
      "grad_norm": 0.3287907838821411,
      "learning_rate": 5.894717421242333e-07,
      "loss": 0.0086,
      "step": 2876340
    },
    {
      "epoch": 4.707226226245885,
      "grad_norm": 0.15881480276584625,
      "learning_rate": 5.894058499107161e-07,
      "loss": 0.0096,
      "step": 2876360
    },
    {
      "epoch": 4.707258956684537,
      "grad_norm": 0.10442402958869934,
      "learning_rate": 5.89339957697199e-07,
      "loss": 0.0065,
      "step": 2876380
    },
    {
      "epoch": 4.707291687123191,
      "grad_norm": 0.094544418156147,
      "learning_rate": 5.892740654836818e-07,
      "loss": 0.0065,
      "step": 2876400
    },
    {
      "epoch": 4.7073244175618445,
      "grad_norm": 0.1986072063446045,
      "learning_rate": 5.892081732701648e-07,
      "loss": 0.0105,
      "step": 2876420
    },
    {
      "epoch": 4.707357148000497,
      "grad_norm": 0.23383909463882446,
      "learning_rate": 5.891422810566476e-07,
      "loss": 0.0081,
      "step": 2876440
    },
    {
      "epoch": 4.707389878439151,
      "grad_norm": 0.36153024435043335,
      "learning_rate": 5.890763888431305e-07,
      "loss": 0.0091,
      "step": 2876460
    },
    {
      "epoch": 4.707422608877804,
      "grad_norm": 0.5211845636367798,
      "learning_rate": 5.890104966296133e-07,
      "loss": 0.0074,
      "step": 2876480
    },
    {
      "epoch": 4.707455339316457,
      "grad_norm": 0.35999050736427307,
      "learning_rate": 5.889446044160962e-07,
      "loss": 0.0095,
      "step": 2876500
    },
    {
      "epoch": 4.707488069755111,
      "grad_norm": 0.2636079788208008,
      "learning_rate": 5.888787122025791e-07,
      "loss": 0.0102,
      "step": 2876520
    },
    {
      "epoch": 4.707520800193764,
      "grad_norm": 0.43439650535583496,
      "learning_rate": 5.88812819989062e-07,
      "loss": 0.0109,
      "step": 2876540
    },
    {
      "epoch": 4.707553530632418,
      "grad_norm": 0.14508768916130066,
      "learning_rate": 5.887469277755448e-07,
      "loss": 0.006,
      "step": 2876560
    },
    {
      "epoch": 4.707586261071071,
      "grad_norm": 0.2552540898323059,
      "learning_rate": 5.886810355620276e-07,
      "loss": 0.008,
      "step": 2876580
    },
    {
      "epoch": 4.707618991509724,
      "grad_norm": 0.9823420643806458,
      "learning_rate": 5.886151433485106e-07,
      "loss": 0.0101,
      "step": 2876600
    },
    {
      "epoch": 4.707651721948378,
      "grad_norm": 0.2288535088300705,
      "learning_rate": 5.885492511349934e-07,
      "loss": 0.0071,
      "step": 2876620
    },
    {
      "epoch": 4.707684452387031,
      "grad_norm": 0.03906724229454994,
      "learning_rate": 5.884833589214763e-07,
      "loss": 0.0056,
      "step": 2876640
    },
    {
      "epoch": 4.707717182825684,
      "grad_norm": 0.21984808146953583,
      "learning_rate": 5.884174667079591e-07,
      "loss": 0.008,
      "step": 2876660
    },
    {
      "epoch": 4.707749913264338,
      "grad_norm": 0.17002785205841064,
      "learning_rate": 5.88351574494442e-07,
      "loss": 0.0075,
      "step": 2876680
    },
    {
      "epoch": 4.707782643702991,
      "grad_norm": 0.4726554751396179,
      "learning_rate": 5.882856822809249e-07,
      "loss": 0.0086,
      "step": 2876700
    },
    {
      "epoch": 4.707815374141644,
      "grad_norm": 0.1382865607738495,
      "learning_rate": 5.882197900674078e-07,
      "loss": 0.0098,
      "step": 2876720
    },
    {
      "epoch": 4.707848104580298,
      "grad_norm": 0.18873955309391022,
      "learning_rate": 5.881538978538906e-07,
      "loss": 0.0074,
      "step": 2876740
    },
    {
      "epoch": 4.707880835018951,
      "grad_norm": 0.44207826256752014,
      "learning_rate": 5.880880056403735e-07,
      "loss": 0.0104,
      "step": 2876760
    },
    {
      "epoch": 4.707913565457604,
      "grad_norm": 0.17585228383541107,
      "learning_rate": 5.880221134268564e-07,
      "loss": 0.0109,
      "step": 2876780
    },
    {
      "epoch": 4.7079462958962575,
      "grad_norm": 0.309247761964798,
      "learning_rate": 5.879562212133393e-07,
      "loss": 0.0099,
      "step": 2876800
    },
    {
      "epoch": 4.707979026334911,
      "grad_norm": 0.21035577356815338,
      "learning_rate": 5.878903289998222e-07,
      "loss": 0.0073,
      "step": 2876820
    },
    {
      "epoch": 4.708011756773565,
      "grad_norm": 0.23026643693447113,
      "learning_rate": 5.878244367863051e-07,
      "loss": 0.0095,
      "step": 2876840
    },
    {
      "epoch": 4.708044487212217,
      "grad_norm": 0.6183945536613464,
      "learning_rate": 5.877585445727879e-07,
      "loss": 0.0083,
      "step": 2876860
    },
    {
      "epoch": 4.708077217650871,
      "grad_norm": 0.20363089442253113,
      "learning_rate": 5.876926523592707e-07,
      "loss": 0.0076,
      "step": 2876880
    },
    {
      "epoch": 4.7081099480895245,
      "grad_norm": 0.11504021286964417,
      "learning_rate": 5.876267601457536e-07,
      "loss": 0.0069,
      "step": 2876900
    },
    {
      "epoch": 4.708142678528177,
      "grad_norm": 0.1726406067609787,
      "learning_rate": 5.875608679322365e-07,
      "loss": 0.0069,
      "step": 2876920
    },
    {
      "epoch": 4.708175408966831,
      "grad_norm": 0.14476829767227173,
      "learning_rate": 5.874949757187194e-07,
      "loss": 0.011,
      "step": 2876940
    },
    {
      "epoch": 4.708208139405484,
      "grad_norm": 0.17731893062591553,
      "learning_rate": 5.874290835052022e-07,
      "loss": 0.0085,
      "step": 2876960
    },
    {
      "epoch": 4.708240869844138,
      "grad_norm": 0.09394554048776627,
      "learning_rate": 5.873631912916851e-07,
      "loss": 0.0081,
      "step": 2876980
    },
    {
      "epoch": 4.708273600282791,
      "grad_norm": 0.2297811508178711,
      "learning_rate": 5.87297299078168e-07,
      "loss": 0.0101,
      "step": 2877000
    },
    {
      "epoch": 4.708306330721444,
      "grad_norm": 0.08173152059316635,
      "learning_rate": 5.872314068646509e-07,
      "loss": 0.0111,
      "step": 2877020
    },
    {
      "epoch": 4.708339061160098,
      "grad_norm": 0.15388265252113342,
      "learning_rate": 5.871655146511337e-07,
      "loss": 0.0143,
      "step": 2877040
    },
    {
      "epoch": 4.708371791598751,
      "grad_norm": 0.10014599561691284,
      "learning_rate": 5.870996224376166e-07,
      "loss": 0.0072,
      "step": 2877060
    },
    {
      "epoch": 4.708404522037404,
      "grad_norm": 0.19897997379302979,
      "learning_rate": 5.870337302240995e-07,
      "loss": 0.0081,
      "step": 2877080
    },
    {
      "epoch": 4.708437252476058,
      "grad_norm": 0.06744460016489029,
      "learning_rate": 5.869678380105824e-07,
      "loss": 0.0095,
      "step": 2877100
    },
    {
      "epoch": 4.7084699829147105,
      "grad_norm": 0.175197035074234,
      "learning_rate": 5.869019457970652e-07,
      "loss": 0.007,
      "step": 2877120
    },
    {
      "epoch": 4.708502713353364,
      "grad_norm": 0.11535198986530304,
      "learning_rate": 5.868360535835481e-07,
      "loss": 0.0089,
      "step": 2877140
    },
    {
      "epoch": 4.708535443792018,
      "grad_norm": 0.2293396294116974,
      "learning_rate": 5.867701613700309e-07,
      "loss": 0.0149,
      "step": 2877160
    },
    {
      "epoch": 4.708568174230671,
      "grad_norm": 0.14195184409618378,
      "learning_rate": 5.867042691565139e-07,
      "loss": 0.0077,
      "step": 2877180
    },
    {
      "epoch": 4.708600904669324,
      "grad_norm": 0.18828922510147095,
      "learning_rate": 5.866383769429967e-07,
      "loss": 0.0089,
      "step": 2877200
    },
    {
      "epoch": 4.708633635107978,
      "grad_norm": 0.08339490741491318,
      "learning_rate": 5.865724847294795e-07,
      "loss": 0.0099,
      "step": 2877220
    },
    {
      "epoch": 4.708666365546631,
      "grad_norm": 0.3931458592414856,
      "learning_rate": 5.865065925159624e-07,
      "loss": 0.0106,
      "step": 2877240
    },
    {
      "epoch": 4.708699095985285,
      "grad_norm": 0.28664299845695496,
      "learning_rate": 5.864407003024453e-07,
      "loss": 0.0053,
      "step": 2877260
    },
    {
      "epoch": 4.7087318264239375,
      "grad_norm": 0.42366889119148254,
      "learning_rate": 5.863748080889282e-07,
      "loss": 0.0093,
      "step": 2877280
    },
    {
      "epoch": 4.708764556862591,
      "grad_norm": 0.219172865152359,
      "learning_rate": 5.86308915875411e-07,
      "loss": 0.008,
      "step": 2877300
    },
    {
      "epoch": 4.708797287301245,
      "grad_norm": 0.17992663383483887,
      "learning_rate": 5.862430236618939e-07,
      "loss": 0.0053,
      "step": 2877320
    },
    {
      "epoch": 4.708830017739897,
      "grad_norm": 0.20303837954998016,
      "learning_rate": 5.861771314483767e-07,
      "loss": 0.0078,
      "step": 2877340
    },
    {
      "epoch": 4.708862748178551,
      "grad_norm": 0.501660168170929,
      "learning_rate": 5.861112392348597e-07,
      "loss": 0.01,
      "step": 2877360
    },
    {
      "epoch": 4.708895478617205,
      "grad_norm": 0.3038819432258606,
      "learning_rate": 5.860453470213425e-07,
      "loss": 0.0072,
      "step": 2877380
    },
    {
      "epoch": 4.708928209055857,
      "grad_norm": 0.12363612651824951,
      "learning_rate": 5.859794548078254e-07,
      "loss": 0.0121,
      "step": 2877400
    },
    {
      "epoch": 4.708960939494511,
      "grad_norm": 0.2483423799276352,
      "learning_rate": 5.859135625943082e-07,
      "loss": 0.0084,
      "step": 2877420
    },
    {
      "epoch": 4.7089936699331645,
      "grad_norm": 0.11593484878540039,
      "learning_rate": 5.858476703807912e-07,
      "loss": 0.0112,
      "step": 2877440
    },
    {
      "epoch": 4.709026400371818,
      "grad_norm": 0.2149745225906372,
      "learning_rate": 5.85781778167274e-07,
      "loss": 0.0101,
      "step": 2877460
    },
    {
      "epoch": 4.709059130810471,
      "grad_norm": 0.11505626142024994,
      "learning_rate": 5.85715885953757e-07,
      "loss": 0.0103,
      "step": 2877480
    },
    {
      "epoch": 4.709091861249124,
      "grad_norm": 0.5053471922874451,
      "learning_rate": 5.856499937402398e-07,
      "loss": 0.0072,
      "step": 2877500
    },
    {
      "epoch": 4.709124591687778,
      "grad_norm": 0.3631034195423126,
      "learning_rate": 5.855841015267227e-07,
      "loss": 0.0093,
      "step": 2877520
    },
    {
      "epoch": 4.709157322126432,
      "grad_norm": 0.20058386027812958,
      "learning_rate": 5.855182093132055e-07,
      "loss": 0.0104,
      "step": 2877540
    },
    {
      "epoch": 4.709190052565084,
      "grad_norm": 0.09538798779249191,
      "learning_rate": 5.854523170996883e-07,
      "loss": 0.0074,
      "step": 2877560
    },
    {
      "epoch": 4.709222783003738,
      "grad_norm": 0.1948811411857605,
      "learning_rate": 5.853864248861713e-07,
      "loss": 0.009,
      "step": 2877580
    },
    {
      "epoch": 4.7092555134423915,
      "grad_norm": 0.19837529957294464,
      "learning_rate": 5.853205326726541e-07,
      "loss": 0.0116,
      "step": 2877600
    },
    {
      "epoch": 4.709288243881044,
      "grad_norm": 0.2098395973443985,
      "learning_rate": 5.85254640459137e-07,
      "loss": 0.009,
      "step": 2877620
    },
    {
      "epoch": 4.709320974319698,
      "grad_norm": 0.4090921878814697,
      "learning_rate": 5.851887482456198e-07,
      "loss": 0.0085,
      "step": 2877640
    },
    {
      "epoch": 4.709353704758351,
      "grad_norm": 0.2552693784236908,
      "learning_rate": 5.851228560321028e-07,
      "loss": 0.0081,
      "step": 2877660
    },
    {
      "epoch": 4.709386435197004,
      "grad_norm": 0.2615673840045929,
      "learning_rate": 5.850569638185856e-07,
      "loss": 0.0111,
      "step": 2877680
    },
    {
      "epoch": 4.709419165635658,
      "grad_norm": 0.2562805712223053,
      "learning_rate": 5.849910716050685e-07,
      "loss": 0.01,
      "step": 2877700
    },
    {
      "epoch": 4.709451896074311,
      "grad_norm": 0.23494015634059906,
      "learning_rate": 5.849251793915513e-07,
      "loss": 0.0093,
      "step": 2877720
    },
    {
      "epoch": 4.709484626512965,
      "grad_norm": 0.11957921087741852,
      "learning_rate": 5.848592871780343e-07,
      "loss": 0.0094,
      "step": 2877740
    },
    {
      "epoch": 4.7095173569516176,
      "grad_norm": 0.20330312848091125,
      "learning_rate": 5.847933949645171e-07,
      "loss": 0.0057,
      "step": 2877760
    },
    {
      "epoch": 4.709550087390271,
      "grad_norm": 0.5962415933609009,
      "learning_rate": 5.84727502751e-07,
      "loss": 0.0108,
      "step": 2877780
    },
    {
      "epoch": 4.709582817828925,
      "grad_norm": 0.2622130513191223,
      "learning_rate": 5.846616105374828e-07,
      "loss": 0.0092,
      "step": 2877800
    },
    {
      "epoch": 4.709615548267578,
      "grad_norm": 0.1742650270462036,
      "learning_rate": 5.845957183239657e-07,
      "loss": 0.0069,
      "step": 2877820
    },
    {
      "epoch": 4.709648278706231,
      "grad_norm": 0.1479969173669815,
      "learning_rate": 5.845298261104486e-07,
      "loss": 0.0048,
      "step": 2877840
    },
    {
      "epoch": 4.709681009144885,
      "grad_norm": 0.6959113478660583,
      "learning_rate": 5.844639338969315e-07,
      "loss": 0.0113,
      "step": 2877860
    },
    {
      "epoch": 4.709713739583538,
      "grad_norm": 0.2163812667131424,
      "learning_rate": 5.843980416834143e-07,
      "loss": 0.0106,
      "step": 2877880
    },
    {
      "epoch": 4.709746470022191,
      "grad_norm": 0.15603035688400269,
      "learning_rate": 5.843321494698971e-07,
      "loss": 0.0067,
      "step": 2877900
    },
    {
      "epoch": 4.7097792004608445,
      "grad_norm": 0.1149008646607399,
      "learning_rate": 5.842662572563801e-07,
      "loss": 0.0112,
      "step": 2877920
    },
    {
      "epoch": 4.709811930899498,
      "grad_norm": 0.1375533491373062,
      "learning_rate": 5.842003650428629e-07,
      "loss": 0.0076,
      "step": 2877940
    },
    {
      "epoch": 4.709844661338151,
      "grad_norm": 0.25209593772888184,
      "learning_rate": 5.841344728293458e-07,
      "loss": 0.0107,
      "step": 2877960
    },
    {
      "epoch": 4.709877391776804,
      "grad_norm": 0.09210256487131119,
      "learning_rate": 5.840685806158286e-07,
      "loss": 0.0082,
      "step": 2877980
    },
    {
      "epoch": 4.709910122215458,
      "grad_norm": 0.15439565479755402,
      "learning_rate": 5.840026884023115e-07,
      "loss": 0.0132,
      "step": 2878000
    },
    {
      "epoch": 4.709942852654112,
      "grad_norm": 0.4870499074459076,
      "learning_rate": 5.839367961887944e-07,
      "loss": 0.0097,
      "step": 2878020
    },
    {
      "epoch": 4.709975583092764,
      "grad_norm": 0.31381961703300476,
      "learning_rate": 5.838709039752773e-07,
      "loss": 0.0071,
      "step": 2878040
    },
    {
      "epoch": 4.710008313531418,
      "grad_norm": 0.26911836862564087,
      "learning_rate": 5.838050117617601e-07,
      "loss": 0.0085,
      "step": 2878060
    },
    {
      "epoch": 4.7100410439700715,
      "grad_norm": 0.31907767057418823,
      "learning_rate": 5.83739119548243e-07,
      "loss": 0.0068,
      "step": 2878080
    },
    {
      "epoch": 4.710073774408725,
      "grad_norm": 0.17680491507053375,
      "learning_rate": 5.836732273347259e-07,
      "loss": 0.0081,
      "step": 2878100
    },
    {
      "epoch": 4.710106504847378,
      "grad_norm": 0.24584782123565674,
      "learning_rate": 5.836073351212088e-07,
      "loss": 0.0097,
      "step": 2878120
    },
    {
      "epoch": 4.710139235286031,
      "grad_norm": 0.17598849534988403,
      "learning_rate": 5.835414429076917e-07,
      "loss": 0.0132,
      "step": 2878140
    },
    {
      "epoch": 4.710171965724685,
      "grad_norm": 0.2117188572883606,
      "learning_rate": 5.834755506941746e-07,
      "loss": 0.0091,
      "step": 2878160
    },
    {
      "epoch": 4.710204696163338,
      "grad_norm": 0.2593519389629364,
      "learning_rate": 5.834096584806574e-07,
      "loss": 0.0063,
      "step": 2878180
    },
    {
      "epoch": 4.710237426601991,
      "grad_norm": 0.09582995623350143,
      "learning_rate": 5.833437662671403e-07,
      "loss": 0.0115,
      "step": 2878200
    },
    {
      "epoch": 4.710270157040645,
      "grad_norm": 0.2711115777492523,
      "learning_rate": 5.832778740536231e-07,
      "loss": 0.0098,
      "step": 2878220
    },
    {
      "epoch": 4.710302887479298,
      "grad_norm": 0.6056827306747437,
      "learning_rate": 5.83211981840106e-07,
      "loss": 0.0082,
      "step": 2878240
    },
    {
      "epoch": 4.710335617917951,
      "grad_norm": 0.2493213415145874,
      "learning_rate": 5.831460896265889e-07,
      "loss": 0.0088,
      "step": 2878260
    },
    {
      "epoch": 4.710368348356605,
      "grad_norm": 0.16257041692733765,
      "learning_rate": 5.830801974130717e-07,
      "loss": 0.0058,
      "step": 2878280
    },
    {
      "epoch": 4.710401078795258,
      "grad_norm": 0.20760458707809448,
      "learning_rate": 5.830143051995546e-07,
      "loss": 0.0091,
      "step": 2878300
    },
    {
      "epoch": 4.710433809233911,
      "grad_norm": 0.08270817995071411,
      "learning_rate": 5.829484129860375e-07,
      "loss": 0.0061,
      "step": 2878320
    },
    {
      "epoch": 4.710466539672565,
      "grad_norm": 0.31495213508605957,
      "learning_rate": 5.828825207725204e-07,
      "loss": 0.0072,
      "step": 2878340
    },
    {
      "epoch": 4.710499270111218,
      "grad_norm": 0.3023379445075989,
      "learning_rate": 5.828166285590032e-07,
      "loss": 0.0075,
      "step": 2878360
    },
    {
      "epoch": 4.710532000549872,
      "grad_norm": 0.2686935365200043,
      "learning_rate": 5.827507363454861e-07,
      "loss": 0.0135,
      "step": 2878380
    },
    {
      "epoch": 4.710564730988525,
      "grad_norm": 0.21441344916820526,
      "learning_rate": 5.82684844131969e-07,
      "loss": 0.0073,
      "step": 2878400
    },
    {
      "epoch": 4.710597461427178,
      "grad_norm": 0.3513038158416748,
      "learning_rate": 5.826189519184519e-07,
      "loss": 0.012,
      "step": 2878420
    },
    {
      "epoch": 4.710630191865832,
      "grad_norm": 0.5151432156562805,
      "learning_rate": 5.825530597049347e-07,
      "loss": 0.0104,
      "step": 2878440
    },
    {
      "epoch": 4.7106629223044845,
      "grad_norm": 0.21928352117538452,
      "learning_rate": 5.824871674914176e-07,
      "loss": 0.0145,
      "step": 2878460
    },
    {
      "epoch": 4.710695652743138,
      "grad_norm": 0.16218850016593933,
      "learning_rate": 5.824212752779004e-07,
      "loss": 0.0075,
      "step": 2878480
    },
    {
      "epoch": 4.710728383181792,
      "grad_norm": 0.4405553936958313,
      "learning_rate": 5.823553830643834e-07,
      "loss": 0.011,
      "step": 2878500
    },
    {
      "epoch": 4.710761113620444,
      "grad_norm": 0.2078704982995987,
      "learning_rate": 5.822894908508662e-07,
      "loss": 0.0077,
      "step": 2878520
    },
    {
      "epoch": 4.710793844059098,
      "grad_norm": 0.10253532230854034,
      "learning_rate": 5.82223598637349e-07,
      "loss": 0.0092,
      "step": 2878540
    },
    {
      "epoch": 4.7108265744977516,
      "grad_norm": 0.07688984274864197,
      "learning_rate": 5.821577064238319e-07,
      "loss": 0.0088,
      "step": 2878560
    },
    {
      "epoch": 4.710859304936405,
      "grad_norm": 0.13542093336582184,
      "learning_rate": 5.820918142103148e-07,
      "loss": 0.0063,
      "step": 2878580
    },
    {
      "epoch": 4.710892035375058,
      "grad_norm": 0.11580736935138702,
      "learning_rate": 5.820259219967977e-07,
      "loss": 0.0084,
      "step": 2878600
    },
    {
      "epoch": 4.7109247658137114,
      "grad_norm": 0.3859480619430542,
      "learning_rate": 5.819600297832805e-07,
      "loss": 0.0072,
      "step": 2878620
    },
    {
      "epoch": 4.710957496252365,
      "grad_norm": 0.2694021463394165,
      "learning_rate": 5.818941375697634e-07,
      "loss": 0.0071,
      "step": 2878640
    },
    {
      "epoch": 4.710990226691018,
      "grad_norm": 0.30892837047576904,
      "learning_rate": 5.818282453562462e-07,
      "loss": 0.0087,
      "step": 2878660
    },
    {
      "epoch": 4.711022957129671,
      "grad_norm": 0.11287941038608551,
      "learning_rate": 5.817623531427292e-07,
      "loss": 0.0066,
      "step": 2878680
    },
    {
      "epoch": 4.711055687568325,
      "grad_norm": 0.11785531789064407,
      "learning_rate": 5.81696460929212e-07,
      "loss": 0.0065,
      "step": 2878700
    },
    {
      "epoch": 4.7110884180069785,
      "grad_norm": 0.11320363730192184,
      "learning_rate": 5.816305687156949e-07,
      "loss": 0.0059,
      "step": 2878720
    },
    {
      "epoch": 4.711121148445631,
      "grad_norm": 0.293064683675766,
      "learning_rate": 5.815646765021777e-07,
      "loss": 0.0116,
      "step": 2878740
    },
    {
      "epoch": 4.711153878884285,
      "grad_norm": 0.28605586290359497,
      "learning_rate": 5.814987842886607e-07,
      "loss": 0.0111,
      "step": 2878760
    },
    {
      "epoch": 4.711186609322938,
      "grad_norm": 0.06891168653964996,
      "learning_rate": 5.814328920751435e-07,
      "loss": 0.0107,
      "step": 2878780
    },
    {
      "epoch": 4.711219339761591,
      "grad_norm": 0.44420716166496277,
      "learning_rate": 5.813669998616265e-07,
      "loss": 0.0109,
      "step": 2878800
    },
    {
      "epoch": 4.711252070200245,
      "grad_norm": 0.10231531411409378,
      "learning_rate": 5.813011076481093e-07,
      "loss": 0.0095,
      "step": 2878820
    },
    {
      "epoch": 4.711284800638898,
      "grad_norm": 0.23813752830028534,
      "learning_rate": 5.812352154345922e-07,
      "loss": 0.0081,
      "step": 2878840
    },
    {
      "epoch": 4.711317531077551,
      "grad_norm": 0.2852095663547516,
      "learning_rate": 5.81169323221075e-07,
      "loss": 0.0124,
      "step": 2878860
    },
    {
      "epoch": 4.711350261516205,
      "grad_norm": 0.5614913105964661,
      "learning_rate": 5.811034310075579e-07,
      "loss": 0.0111,
      "step": 2878880
    },
    {
      "epoch": 4.711382991954858,
      "grad_norm": 0.02186853066086769,
      "learning_rate": 5.810375387940408e-07,
      "loss": 0.0114,
      "step": 2878900
    },
    {
      "epoch": 4.711415722393512,
      "grad_norm": 0.07626524567604065,
      "learning_rate": 5.809716465805236e-07,
      "loss": 0.0138,
      "step": 2878920
    },
    {
      "epoch": 4.7114484528321645,
      "grad_norm": 0.11190002411603928,
      "learning_rate": 5.809057543670065e-07,
      "loss": 0.0111,
      "step": 2878940
    },
    {
      "epoch": 4.711481183270818,
      "grad_norm": 0.1936495006084442,
      "learning_rate": 5.808398621534893e-07,
      "loss": 0.009,
      "step": 2878960
    },
    {
      "epoch": 4.711513913709472,
      "grad_norm": 0.1470455676317215,
      "learning_rate": 5.807739699399723e-07,
      "loss": 0.0088,
      "step": 2878980
    },
    {
      "epoch": 4.711546644148125,
      "grad_norm": 0.28912705183029175,
      "learning_rate": 5.807080777264551e-07,
      "loss": 0.0087,
      "step": 2879000
    },
    {
      "epoch": 4.711579374586778,
      "grad_norm": 0.3171740770339966,
      "learning_rate": 5.80642185512938e-07,
      "loss": 0.0088,
      "step": 2879020
    },
    {
      "epoch": 4.711612105025432,
      "grad_norm": 0.12650206685066223,
      "learning_rate": 5.805762932994208e-07,
      "loss": 0.0097,
      "step": 2879040
    },
    {
      "epoch": 4.711644835464085,
      "grad_norm": 0.3894311189651489,
      "learning_rate": 5.805104010859038e-07,
      "loss": 0.0076,
      "step": 2879060
    },
    {
      "epoch": 4.711677565902738,
      "grad_norm": 0.23901838064193726,
      "learning_rate": 5.804445088723866e-07,
      "loss": 0.0077,
      "step": 2879080
    },
    {
      "epoch": 4.7117102963413915,
      "grad_norm": 0.18670019507408142,
      "learning_rate": 5.803786166588695e-07,
      "loss": 0.0092,
      "step": 2879100
    },
    {
      "epoch": 4.711743026780045,
      "grad_norm": 0.18741793930530548,
      "learning_rate": 5.803127244453523e-07,
      "loss": 0.0063,
      "step": 2879120
    },
    {
      "epoch": 4.711775757218698,
      "grad_norm": 0.6856445670127869,
      "learning_rate": 5.802468322318352e-07,
      "loss": 0.0136,
      "step": 2879140
    },
    {
      "epoch": 4.711808487657351,
      "grad_norm": 0.4022383987903595,
      "learning_rate": 5.801809400183181e-07,
      "loss": 0.0119,
      "step": 2879160
    },
    {
      "epoch": 4.711841218096005,
      "grad_norm": 0.12064250558614731,
      "learning_rate": 5.80115047804801e-07,
      "loss": 0.0067,
      "step": 2879180
    },
    {
      "epoch": 4.711873948534659,
      "grad_norm": 0.06724359095096588,
      "learning_rate": 5.800491555912838e-07,
      "loss": 0.0095,
      "step": 2879200
    },
    {
      "epoch": 4.711906678973311,
      "grad_norm": 0.24491816759109497,
      "learning_rate": 5.799832633777666e-07,
      "loss": 0.0154,
      "step": 2879220
    },
    {
      "epoch": 4.711939409411965,
      "grad_norm": 0.3314473032951355,
      "learning_rate": 5.799173711642496e-07,
      "loss": 0.0094,
      "step": 2879240
    },
    {
      "epoch": 4.7119721398506185,
      "grad_norm": 0.06983863562345505,
      "learning_rate": 5.798514789507324e-07,
      "loss": 0.0097,
      "step": 2879260
    },
    {
      "epoch": 4.712004870289272,
      "grad_norm": 0.07387269288301468,
      "learning_rate": 5.797855867372153e-07,
      "loss": 0.0157,
      "step": 2879280
    },
    {
      "epoch": 4.712037600727925,
      "grad_norm": 0.8100377917289734,
      "learning_rate": 5.797196945236981e-07,
      "loss": 0.0058,
      "step": 2879300
    },
    {
      "epoch": 4.712070331166578,
      "grad_norm": 0.10277893394231796,
      "learning_rate": 5.79653802310181e-07,
      "loss": 0.011,
      "step": 2879320
    },
    {
      "epoch": 4.712103061605232,
      "grad_norm": 0.1648162305355072,
      "learning_rate": 5.795879100966639e-07,
      "loss": 0.0079,
      "step": 2879340
    },
    {
      "epoch": 4.712135792043885,
      "grad_norm": 0.39096322655677795,
      "learning_rate": 5.795220178831468e-07,
      "loss": 0.0084,
      "step": 2879360
    },
    {
      "epoch": 4.712168522482538,
      "grad_norm": 0.029556898400187492,
      "learning_rate": 5.794561256696296e-07,
      "loss": 0.0114,
      "step": 2879380
    },
    {
      "epoch": 4.712201252921192,
      "grad_norm": 0.2608828842639923,
      "learning_rate": 5.793902334561125e-07,
      "loss": 0.0071,
      "step": 2879400
    },
    {
      "epoch": 4.712233983359845,
      "grad_norm": 0.19202162325382233,
      "learning_rate": 5.793243412425954e-07,
      "loss": 0.0104,
      "step": 2879420
    },
    {
      "epoch": 4.712266713798498,
      "grad_norm": 0.08128221333026886,
      "learning_rate": 5.792584490290783e-07,
      "loss": 0.0058,
      "step": 2879440
    },
    {
      "epoch": 4.712299444237152,
      "grad_norm": 0.17179274559020996,
      "learning_rate": 5.791925568155612e-07,
      "loss": 0.0075,
      "step": 2879460
    },
    {
      "epoch": 4.712332174675805,
      "grad_norm": 0.10119852423667908,
      "learning_rate": 5.791266646020441e-07,
      "loss": 0.0056,
      "step": 2879480
    },
    {
      "epoch": 4.712364905114458,
      "grad_norm": 0.24165765941143036,
      "learning_rate": 5.790607723885269e-07,
      "loss": 0.0065,
      "step": 2879500
    },
    {
      "epoch": 4.712397635553112,
      "grad_norm": 0.07742255926132202,
      "learning_rate": 5.789948801750098e-07,
      "loss": 0.016,
      "step": 2879520
    },
    {
      "epoch": 4.712430365991765,
      "grad_norm": 0.0550060048699379,
      "learning_rate": 5.789289879614926e-07,
      "loss": 0.0114,
      "step": 2879540
    },
    {
      "epoch": 4.712463096430419,
      "grad_norm": 0.28349003195762634,
      "learning_rate": 5.788630957479756e-07,
      "loss": 0.0071,
      "step": 2879560
    },
    {
      "epoch": 4.7124958268690715,
      "grad_norm": 0.1238083690404892,
      "learning_rate": 5.787972035344584e-07,
      "loss": 0.0103,
      "step": 2879580
    },
    {
      "epoch": 4.712528557307725,
      "grad_norm": 0.46445780992507935,
      "learning_rate": 5.787313113209412e-07,
      "loss": 0.0085,
      "step": 2879600
    },
    {
      "epoch": 4.712561287746379,
      "grad_norm": 0.305147260427475,
      "learning_rate": 5.786654191074241e-07,
      "loss": 0.0142,
      "step": 2879620
    },
    {
      "epoch": 4.712594018185031,
      "grad_norm": 0.1852574497461319,
      "learning_rate": 5.78599526893907e-07,
      "loss": 0.0113,
      "step": 2879640
    },
    {
      "epoch": 4.712626748623685,
      "grad_norm": 0.22359469532966614,
      "learning_rate": 5.785336346803899e-07,
      "loss": 0.0091,
      "step": 2879660
    },
    {
      "epoch": 4.712659479062339,
      "grad_norm": 0.16282406449317932,
      "learning_rate": 5.784677424668727e-07,
      "loss": 0.006,
      "step": 2879680
    },
    {
      "epoch": 4.712692209500991,
      "grad_norm": 0.3224097192287445,
      "learning_rate": 5.784018502533556e-07,
      "loss": 0.0112,
      "step": 2879700
    },
    {
      "epoch": 4.712724939939645,
      "grad_norm": 0.1961192488670349,
      "learning_rate": 5.783359580398385e-07,
      "loss": 0.0083,
      "step": 2879720
    },
    {
      "epoch": 4.7127576703782985,
      "grad_norm": 0.3018355667591095,
      "learning_rate": 5.782700658263214e-07,
      "loss": 0.0123,
      "step": 2879740
    },
    {
      "epoch": 4.712790400816952,
      "grad_norm": 0.3278736174106598,
      "learning_rate": 5.782041736128042e-07,
      "loss": 0.0087,
      "step": 2879760
    },
    {
      "epoch": 4.712823131255605,
      "grad_norm": 0.18466341495513916,
      "learning_rate": 5.781382813992871e-07,
      "loss": 0.0088,
      "step": 2879780
    },
    {
      "epoch": 4.712855861694258,
      "grad_norm": 0.07286735624074936,
      "learning_rate": 5.780723891857699e-07,
      "loss": 0.0059,
      "step": 2879800
    },
    {
      "epoch": 4.712888592132912,
      "grad_norm": 0.12547463178634644,
      "learning_rate": 5.780064969722529e-07,
      "loss": 0.0056,
      "step": 2879820
    },
    {
      "epoch": 4.712921322571566,
      "grad_norm": 0.5093119144439697,
      "learning_rate": 5.779406047587357e-07,
      "loss": 0.0114,
      "step": 2879840
    },
    {
      "epoch": 4.712954053010218,
      "grad_norm": 0.15818309783935547,
      "learning_rate": 5.778747125452186e-07,
      "loss": 0.0046,
      "step": 2879860
    },
    {
      "epoch": 4.712986783448872,
      "grad_norm": 0.05377332493662834,
      "learning_rate": 5.778088203317014e-07,
      "loss": 0.0075,
      "step": 2879880
    },
    {
      "epoch": 4.7130195138875255,
      "grad_norm": 0.14539696276187897,
      "learning_rate": 5.777429281181844e-07,
      "loss": 0.0092,
      "step": 2879900
    },
    {
      "epoch": 4.713052244326178,
      "grad_norm": 0.10742547363042831,
      "learning_rate": 5.776770359046672e-07,
      "loss": 0.0128,
      "step": 2879920
    },
    {
      "epoch": 4.713084974764832,
      "grad_norm": 0.3488852083683014,
      "learning_rate": 5.7761114369115e-07,
      "loss": 0.0098,
      "step": 2879940
    },
    {
      "epoch": 4.713117705203485,
      "grad_norm": 0.1441798061132431,
      "learning_rate": 5.775452514776329e-07,
      "loss": 0.0087,
      "step": 2879960
    },
    {
      "epoch": 4.713150435642138,
      "grad_norm": 0.4030080735683441,
      "learning_rate": 5.774793592641157e-07,
      "loss": 0.01,
      "step": 2879980
    },
    {
      "epoch": 4.713183166080792,
      "grad_norm": 0.15378686785697937,
      "learning_rate": 5.774134670505987e-07,
      "loss": 0.0096,
      "step": 2880000
    },
    {
      "epoch": 4.713215896519445,
      "grad_norm": 0.2248207926750183,
      "learning_rate": 5.773475748370815e-07,
      "loss": 0.0096,
      "step": 2880020
    },
    {
      "epoch": 4.713248626958099,
      "grad_norm": 0.1409333348274231,
      "learning_rate": 5.772816826235645e-07,
      "loss": 0.0074,
      "step": 2880040
    },
    {
      "epoch": 4.713281357396752,
      "grad_norm": 0.3445693850517273,
      "learning_rate": 5.772157904100472e-07,
      "loss": 0.0074,
      "step": 2880060
    },
    {
      "epoch": 4.713314087835405,
      "grad_norm": 0.2552925646305084,
      "learning_rate": 5.771498981965302e-07,
      "loss": 0.0075,
      "step": 2880080
    },
    {
      "epoch": 4.713346818274059,
      "grad_norm": 0.31803885102272034,
      "learning_rate": 5.77084005983013e-07,
      "loss": 0.0086,
      "step": 2880100
    },
    {
      "epoch": 4.7133795487127115,
      "grad_norm": 0.1021486222743988,
      "learning_rate": 5.77018113769496e-07,
      "loss": 0.009,
      "step": 2880120
    },
    {
      "epoch": 4.713412279151365,
      "grad_norm": 0.06523101031780243,
      "learning_rate": 5.769522215559788e-07,
      "loss": 0.0066,
      "step": 2880140
    },
    {
      "epoch": 4.713445009590019,
      "grad_norm": 0.19459311664104462,
      "learning_rate": 5.768863293424617e-07,
      "loss": 0.009,
      "step": 2880160
    },
    {
      "epoch": 4.713477740028672,
      "grad_norm": 0.23418524861335754,
      "learning_rate": 5.768204371289445e-07,
      "loss": 0.0046,
      "step": 2880180
    },
    {
      "epoch": 4.713510470467325,
      "grad_norm": 0.1322125643491745,
      "learning_rate": 5.767545449154274e-07,
      "loss": 0.0089,
      "step": 2880200
    },
    {
      "epoch": 4.713543200905979,
      "grad_norm": 0.3683862090110779,
      "learning_rate": 5.766886527019103e-07,
      "loss": 0.0063,
      "step": 2880220
    },
    {
      "epoch": 4.713575931344632,
      "grad_norm": 0.19788798689842224,
      "learning_rate": 5.766227604883932e-07,
      "loss": 0.0069,
      "step": 2880240
    },
    {
      "epoch": 4.713608661783285,
      "grad_norm": 0.18000252544879913,
      "learning_rate": 5.76556868274876e-07,
      "loss": 0.0141,
      "step": 2880260
    },
    {
      "epoch": 4.7136413922219385,
      "grad_norm": 0.17123186588287354,
      "learning_rate": 5.764909760613588e-07,
      "loss": 0.0071,
      "step": 2880280
    },
    {
      "epoch": 4.713674122660592,
      "grad_norm": 1.0368660688400269,
      "learning_rate": 5.764250838478418e-07,
      "loss": 0.0073,
      "step": 2880300
    },
    {
      "epoch": 4.713706853099245,
      "grad_norm": 0.19674129784107208,
      "learning_rate": 5.763591916343246e-07,
      "loss": 0.0046,
      "step": 2880320
    },
    {
      "epoch": 4.713739583537898,
      "grad_norm": 0.08450570702552795,
      "learning_rate": 5.762932994208075e-07,
      "loss": 0.0129,
      "step": 2880340
    },
    {
      "epoch": 4.713772313976552,
      "grad_norm": 0.13415920734405518,
      "learning_rate": 5.762274072072903e-07,
      "loss": 0.0092,
      "step": 2880360
    },
    {
      "epoch": 4.7138050444152055,
      "grad_norm": 0.16383594274520874,
      "learning_rate": 5.761615149937733e-07,
      "loss": 0.0069,
      "step": 2880380
    },
    {
      "epoch": 4.713837774853858,
      "grad_norm": 0.30457422137260437,
      "learning_rate": 5.760956227802561e-07,
      "loss": 0.0064,
      "step": 2880400
    },
    {
      "epoch": 4.713870505292512,
      "grad_norm": 0.21137022972106934,
      "learning_rate": 5.76029730566739e-07,
      "loss": 0.007,
      "step": 2880420
    },
    {
      "epoch": 4.713903235731165,
      "grad_norm": 1.7366060018539429,
      "learning_rate": 5.759638383532218e-07,
      "loss": 0.0131,
      "step": 2880440
    },
    {
      "epoch": 4.713935966169819,
      "grad_norm": 0.26713186502456665,
      "learning_rate": 5.758979461397047e-07,
      "loss": 0.0124,
      "step": 2880460
    },
    {
      "epoch": 4.713968696608472,
      "grad_norm": 0.29449570178985596,
      "learning_rate": 5.758320539261876e-07,
      "loss": 0.0094,
      "step": 2880480
    },
    {
      "epoch": 4.714001427047125,
      "grad_norm": 0.1795584261417389,
      "learning_rate": 5.757661617126705e-07,
      "loss": 0.0093,
      "step": 2880500
    },
    {
      "epoch": 4.714034157485779,
      "grad_norm": 0.13679970800876617,
      "learning_rate": 5.757002694991533e-07,
      "loss": 0.0129,
      "step": 2880520
    },
    {
      "epoch": 4.714066887924432,
      "grad_norm": 0.05834786221385002,
      "learning_rate": 5.756343772856362e-07,
      "loss": 0.0058,
      "step": 2880540
    },
    {
      "epoch": 4.714099618363085,
      "grad_norm": 0.22515642642974854,
      "learning_rate": 5.755684850721191e-07,
      "loss": 0.0101,
      "step": 2880560
    },
    {
      "epoch": 4.714132348801739,
      "grad_norm": 0.30341947078704834,
      "learning_rate": 5.75502592858602e-07,
      "loss": 0.0115,
      "step": 2880580
    },
    {
      "epoch": 4.7141650792403915,
      "grad_norm": 0.45542165637016296,
      "learning_rate": 5.754367006450848e-07,
      "loss": 0.0084,
      "step": 2880600
    },
    {
      "epoch": 4.714197809679045,
      "grad_norm": 0.5163219571113586,
      "learning_rate": 5.753708084315676e-07,
      "loss": 0.0083,
      "step": 2880620
    },
    {
      "epoch": 4.714230540117699,
      "grad_norm": 0.23279671370983124,
      "learning_rate": 5.753049162180505e-07,
      "loss": 0.0101,
      "step": 2880640
    },
    {
      "epoch": 4.714263270556352,
      "grad_norm": 0.21537455916404724,
      "learning_rate": 5.752390240045334e-07,
      "loss": 0.0104,
      "step": 2880660
    },
    {
      "epoch": 4.714296000995005,
      "grad_norm": 0.18653610348701477,
      "learning_rate": 5.751731317910163e-07,
      "loss": 0.015,
      "step": 2880680
    },
    {
      "epoch": 4.714328731433659,
      "grad_norm": 0.1017436608672142,
      "learning_rate": 5.751072395774992e-07,
      "loss": 0.012,
      "step": 2880700
    },
    {
      "epoch": 4.714361461872312,
      "grad_norm": 0.23699937760829926,
      "learning_rate": 5.75041347363982e-07,
      "loss": 0.0138,
      "step": 2880720
    },
    {
      "epoch": 4.714394192310966,
      "grad_norm": 0.26029399037361145,
      "learning_rate": 5.749754551504649e-07,
      "loss": 0.0112,
      "step": 2880740
    },
    {
      "epoch": 4.7144269227496185,
      "grad_norm": 0.10545779764652252,
      "learning_rate": 5.749095629369478e-07,
      "loss": 0.0072,
      "step": 2880760
    },
    {
      "epoch": 4.714459653188272,
      "grad_norm": 0.1211342066526413,
      "learning_rate": 5.748436707234307e-07,
      "loss": 0.0097,
      "step": 2880780
    },
    {
      "epoch": 4.714492383626926,
      "grad_norm": 0.2287946194410324,
      "learning_rate": 5.747777785099136e-07,
      "loss": 0.0067,
      "step": 2880800
    },
    {
      "epoch": 4.714525114065578,
      "grad_norm": 0.38298648595809937,
      "learning_rate": 5.747118862963964e-07,
      "loss": 0.0095,
      "step": 2880820
    },
    {
      "epoch": 4.714557844504232,
      "grad_norm": 0.21196788549423218,
      "learning_rate": 5.746459940828793e-07,
      "loss": 0.0063,
      "step": 2880840
    },
    {
      "epoch": 4.714590574942886,
      "grad_norm": 0.135134756565094,
      "learning_rate": 5.745801018693621e-07,
      "loss": 0.0076,
      "step": 2880860
    },
    {
      "epoch": 4.714623305381538,
      "grad_norm": 0.36692479252815247,
      "learning_rate": 5.745142096558451e-07,
      "loss": 0.0107,
      "step": 2880880
    },
    {
      "epoch": 4.714656035820192,
      "grad_norm": 0.2564661502838135,
      "learning_rate": 5.744483174423279e-07,
      "loss": 0.0103,
      "step": 2880900
    },
    {
      "epoch": 4.7146887662588455,
      "grad_norm": 0.280809611082077,
      "learning_rate": 5.743824252288108e-07,
      "loss": 0.0084,
      "step": 2880920
    },
    {
      "epoch": 4.714721496697499,
      "grad_norm": 0.022628964856266975,
      "learning_rate": 5.743165330152936e-07,
      "loss": 0.0064,
      "step": 2880940
    },
    {
      "epoch": 4.714754227136152,
      "grad_norm": 0.28708192706108093,
      "learning_rate": 5.742506408017765e-07,
      "loss": 0.0128,
      "step": 2880960
    },
    {
      "epoch": 4.714786957574805,
      "grad_norm": 0.08800652623176575,
      "learning_rate": 5.741847485882594e-07,
      "loss": 0.01,
      "step": 2880980
    },
    {
      "epoch": 4.714819688013459,
      "grad_norm": 0.08588602393865585,
      "learning_rate": 5.741188563747422e-07,
      "loss": 0.0084,
      "step": 2881000
    },
    {
      "epoch": 4.714852418452113,
      "grad_norm": 0.30104655027389526,
      "learning_rate": 5.740529641612251e-07,
      "loss": 0.0065,
      "step": 2881020
    },
    {
      "epoch": 4.714885148890765,
      "grad_norm": 0.0847262591123581,
      "learning_rate": 5.73987071947708e-07,
      "loss": 0.0089,
      "step": 2881040
    },
    {
      "epoch": 4.714917879329419,
      "grad_norm": 0.4080430269241333,
      "learning_rate": 5.739211797341909e-07,
      "loss": 0.0092,
      "step": 2881060
    },
    {
      "epoch": 4.7149506097680725,
      "grad_norm": 0.13797496259212494,
      "learning_rate": 5.738552875206737e-07,
      "loss": 0.0102,
      "step": 2881080
    },
    {
      "epoch": 4.714983340206725,
      "grad_norm": 0.33567243814468384,
      "learning_rate": 5.737893953071566e-07,
      "loss": 0.0083,
      "step": 2881100
    },
    {
      "epoch": 4.715016070645379,
      "grad_norm": 0.3278988003730774,
      "learning_rate": 5.737235030936394e-07,
      "loss": 0.011,
      "step": 2881120
    },
    {
      "epoch": 4.715048801084032,
      "grad_norm": 0.07463957369327545,
      "learning_rate": 5.736576108801224e-07,
      "loss": 0.01,
      "step": 2881140
    },
    {
      "epoch": 4.715081531522685,
      "grad_norm": 0.2935037612915039,
      "learning_rate": 5.735917186666052e-07,
      "loss": 0.0085,
      "step": 2881160
    },
    {
      "epoch": 4.715114261961339,
      "grad_norm": 0.28629547357559204,
      "learning_rate": 5.735258264530881e-07,
      "loss": 0.0157,
      "step": 2881180
    },
    {
      "epoch": 4.715146992399992,
      "grad_norm": 0.083673857152462,
      "learning_rate": 5.734599342395709e-07,
      "loss": 0.0094,
      "step": 2881200
    },
    {
      "epoch": 4.715179722838646,
      "grad_norm": 0.22608783841133118,
      "learning_rate": 5.733940420260539e-07,
      "loss": 0.0084,
      "step": 2881220
    },
    {
      "epoch": 4.7152124532772985,
      "grad_norm": 0.11394407600164413,
      "learning_rate": 5.733281498125367e-07,
      "loss": 0.0096,
      "step": 2881240
    },
    {
      "epoch": 4.715245183715952,
      "grad_norm": 0.157036691904068,
      "learning_rate": 5.732622575990196e-07,
      "loss": 0.0071,
      "step": 2881260
    },
    {
      "epoch": 4.715277914154606,
      "grad_norm": 0.13724417984485626,
      "learning_rate": 5.731963653855024e-07,
      "loss": 0.0075,
      "step": 2881280
    },
    {
      "epoch": 4.715310644593259,
      "grad_norm": 0.5240848660469055,
      "learning_rate": 5.731304731719852e-07,
      "loss": 0.0075,
      "step": 2881300
    },
    {
      "epoch": 4.715343375031912,
      "grad_norm": 0.19739484786987305,
      "learning_rate": 5.730645809584682e-07,
      "loss": 0.012,
      "step": 2881320
    },
    {
      "epoch": 4.715376105470566,
      "grad_norm": 0.1930397003889084,
      "learning_rate": 5.72998688744951e-07,
      "loss": 0.0116,
      "step": 2881340
    },
    {
      "epoch": 4.715408835909219,
      "grad_norm": 0.5907820463180542,
      "learning_rate": 5.72932796531434e-07,
      "loss": 0.0115,
      "step": 2881360
    },
    {
      "epoch": 4.715441566347872,
      "grad_norm": 0.3221253454685211,
      "learning_rate": 5.728669043179167e-07,
      "loss": 0.0107,
      "step": 2881380
    },
    {
      "epoch": 4.7154742967865255,
      "grad_norm": 0.4881010353565216,
      "learning_rate": 5.728010121043997e-07,
      "loss": 0.0104,
      "step": 2881400
    },
    {
      "epoch": 4.715507027225179,
      "grad_norm": 0.47917789220809937,
      "learning_rate": 5.727351198908825e-07,
      "loss": 0.0093,
      "step": 2881420
    },
    {
      "epoch": 4.715539757663832,
      "grad_norm": 0.10923447459936142,
      "learning_rate": 5.726692276773655e-07,
      "loss": 0.0093,
      "step": 2881440
    },
    {
      "epoch": 4.715572488102485,
      "grad_norm": 0.12541259825229645,
      "learning_rate": 5.726033354638483e-07,
      "loss": 0.0076,
      "step": 2881460
    },
    {
      "epoch": 4.715605218541139,
      "grad_norm": 0.5178916454315186,
      "learning_rate": 5.725374432503312e-07,
      "loss": 0.008,
      "step": 2881480
    },
    {
      "epoch": 4.715637948979793,
      "grad_norm": 0.1145160123705864,
      "learning_rate": 5.72471551036814e-07,
      "loss": 0.0138,
      "step": 2881500
    },
    {
      "epoch": 4.715670679418445,
      "grad_norm": 0.15813815593719482,
      "learning_rate": 5.724056588232969e-07,
      "loss": 0.0077,
      "step": 2881520
    },
    {
      "epoch": 4.715703409857099,
      "grad_norm": 0.256640762090683,
      "learning_rate": 5.723397666097798e-07,
      "loss": 0.0094,
      "step": 2881540
    },
    {
      "epoch": 4.7157361402957525,
      "grad_norm": 0.36125582456588745,
      "learning_rate": 5.722738743962627e-07,
      "loss": 0.0073,
      "step": 2881560
    },
    {
      "epoch": 4.715768870734405,
      "grad_norm": 0.21760405600070953,
      "learning_rate": 5.722079821827455e-07,
      "loss": 0.0065,
      "step": 2881580
    },
    {
      "epoch": 4.715801601173059,
      "grad_norm": 0.12975983321666718,
      "learning_rate": 5.721420899692283e-07,
      "loss": 0.0104,
      "step": 2881600
    },
    {
      "epoch": 4.715834331611712,
      "grad_norm": 0.17994751036167145,
      "learning_rate": 5.720761977557113e-07,
      "loss": 0.0082,
      "step": 2881620
    },
    {
      "epoch": 4.715867062050366,
      "grad_norm": 0.5395767688751221,
      "learning_rate": 5.720103055421941e-07,
      "loss": 0.0096,
      "step": 2881640
    },
    {
      "epoch": 4.715899792489019,
      "grad_norm": 0.177559033036232,
      "learning_rate": 5.71944413328677e-07,
      "loss": 0.0072,
      "step": 2881660
    },
    {
      "epoch": 4.715932522927672,
      "grad_norm": 0.22619856894016266,
      "learning_rate": 5.718785211151598e-07,
      "loss": 0.0098,
      "step": 2881680
    },
    {
      "epoch": 4.715965253366326,
      "grad_norm": 0.37124961614608765,
      "learning_rate": 5.718126289016428e-07,
      "loss": 0.0061,
      "step": 2881700
    },
    {
      "epoch": 4.715997983804979,
      "grad_norm": 0.22478023171424866,
      "learning_rate": 5.717467366881256e-07,
      "loss": 0.0124,
      "step": 2881720
    },
    {
      "epoch": 4.716030714243632,
      "grad_norm": 0.11893413960933685,
      "learning_rate": 5.716808444746085e-07,
      "loss": 0.0108,
      "step": 2881740
    },
    {
      "epoch": 4.716063444682286,
      "grad_norm": 0.13974250853061676,
      "learning_rate": 5.716149522610913e-07,
      "loss": 0.0081,
      "step": 2881760
    },
    {
      "epoch": 4.7160961751209385,
      "grad_norm": 0.7697696089744568,
      "learning_rate": 5.715490600475742e-07,
      "loss": 0.0126,
      "step": 2881780
    },
    {
      "epoch": 4.716128905559592,
      "grad_norm": 0.09861000627279282,
      "learning_rate": 5.714831678340571e-07,
      "loss": 0.0062,
      "step": 2881800
    },
    {
      "epoch": 4.716161635998246,
      "grad_norm": 0.22340720891952515,
      "learning_rate": 5.7141727562054e-07,
      "loss": 0.0123,
      "step": 2881820
    },
    {
      "epoch": 4.716194366436899,
      "grad_norm": 0.21775157749652863,
      "learning_rate": 5.713513834070228e-07,
      "loss": 0.0072,
      "step": 2881840
    },
    {
      "epoch": 4.716227096875552,
      "grad_norm": 0.4115571677684784,
      "learning_rate": 5.712854911935057e-07,
      "loss": 0.007,
      "step": 2881860
    },
    {
      "epoch": 4.716259827314206,
      "grad_norm": 0.2734723389148712,
      "learning_rate": 5.712195989799886e-07,
      "loss": 0.0078,
      "step": 2881880
    },
    {
      "epoch": 4.716292557752859,
      "grad_norm": 0.10451822727918625,
      "learning_rate": 5.711537067664715e-07,
      "loss": 0.0081,
      "step": 2881900
    },
    {
      "epoch": 4.716325288191513,
      "grad_norm": 0.06693794578313828,
      "learning_rate": 5.710878145529543e-07,
      "loss": 0.0054,
      "step": 2881920
    },
    {
      "epoch": 4.7163580186301655,
      "grad_norm": 0.16978691518306732,
      "learning_rate": 5.710219223394371e-07,
      "loss": 0.0113,
      "step": 2881940
    },
    {
      "epoch": 4.716390749068819,
      "grad_norm": 0.11659692227840424,
      "learning_rate": 5.7095603012592e-07,
      "loss": 0.0085,
      "step": 2881960
    },
    {
      "epoch": 4.716423479507473,
      "grad_norm": 0.1497407704591751,
      "learning_rate": 5.708901379124029e-07,
      "loss": 0.0046,
      "step": 2881980
    },
    {
      "epoch": 4.716456209946125,
      "grad_norm": 0.25235480070114136,
      "learning_rate": 5.708242456988858e-07,
      "loss": 0.0158,
      "step": 2882000
    },
    {
      "epoch": 4.716488940384779,
      "grad_norm": 0.19431741535663605,
      "learning_rate": 5.707583534853687e-07,
      "loss": 0.0068,
      "step": 2882020
    },
    {
      "epoch": 4.7165216708234325,
      "grad_norm": 0.42569008469581604,
      "learning_rate": 5.706924612718515e-07,
      "loss": 0.011,
      "step": 2882040
    },
    {
      "epoch": 4.716554401262085,
      "grad_norm": 0.2595410645008087,
      "learning_rate": 5.706265690583344e-07,
      "loss": 0.0085,
      "step": 2882060
    },
    {
      "epoch": 4.716587131700739,
      "grad_norm": 0.15604257583618164,
      "learning_rate": 5.705606768448173e-07,
      "loss": 0.0083,
      "step": 2882080
    },
    {
      "epoch": 4.716619862139392,
      "grad_norm": 0.2104945331811905,
      "learning_rate": 5.704947846313002e-07,
      "loss": 0.0116,
      "step": 2882100
    },
    {
      "epoch": 4.716652592578046,
      "grad_norm": 0.14759953320026398,
      "learning_rate": 5.704288924177831e-07,
      "loss": 0.0088,
      "step": 2882120
    },
    {
      "epoch": 4.716685323016699,
      "grad_norm": 0.24802547693252563,
      "learning_rate": 5.703630002042659e-07,
      "loss": 0.0125,
      "step": 2882140
    },
    {
      "epoch": 4.716718053455352,
      "grad_norm": 0.4060456156730652,
      "learning_rate": 5.702971079907488e-07,
      "loss": 0.0089,
      "step": 2882160
    },
    {
      "epoch": 4.716750783894006,
      "grad_norm": 0.2271062582731247,
      "learning_rate": 5.702312157772316e-07,
      "loss": 0.0122,
      "step": 2882180
    },
    {
      "epoch": 4.7167835143326595,
      "grad_norm": 0.40036532282829285,
      "learning_rate": 5.701653235637146e-07,
      "loss": 0.0082,
      "step": 2882200
    },
    {
      "epoch": 4.716816244771312,
      "grad_norm": 0.1473008245229721,
      "learning_rate": 5.700994313501974e-07,
      "loss": 0.0064,
      "step": 2882220
    },
    {
      "epoch": 4.716848975209966,
      "grad_norm": 0.29366904497146606,
      "learning_rate": 5.700335391366803e-07,
      "loss": 0.0093,
      "step": 2882240
    },
    {
      "epoch": 4.716881705648619,
      "grad_norm": 0.21047793328762054,
      "learning_rate": 5.699676469231631e-07,
      "loss": 0.0081,
      "step": 2882260
    },
    {
      "epoch": 4.716914436087272,
      "grad_norm": 0.24362754821777344,
      "learning_rate": 5.699017547096461e-07,
      "loss": 0.0054,
      "step": 2882280
    },
    {
      "epoch": 4.716947166525926,
      "grad_norm": 0.18770642578601837,
      "learning_rate": 5.698358624961289e-07,
      "loss": 0.008,
      "step": 2882300
    },
    {
      "epoch": 4.716979896964579,
      "grad_norm": 0.22225861251354218,
      "learning_rate": 5.697699702826117e-07,
      "loss": 0.0095,
      "step": 2882320
    },
    {
      "epoch": 4.717012627403232,
      "grad_norm": 0.2054116427898407,
      "learning_rate": 5.697040780690946e-07,
      "loss": 0.0082,
      "step": 2882340
    },
    {
      "epoch": 4.717045357841886,
      "grad_norm": 0.45903638005256653,
      "learning_rate": 5.696381858555775e-07,
      "loss": 0.0125,
      "step": 2882360
    },
    {
      "epoch": 4.717078088280539,
      "grad_norm": 0.331073522567749,
      "learning_rate": 5.695722936420604e-07,
      "loss": 0.011,
      "step": 2882380
    },
    {
      "epoch": 4.717110818719193,
      "grad_norm": 0.23621951043605804,
      "learning_rate": 5.695064014285432e-07,
      "loss": 0.0078,
      "step": 2882400
    },
    {
      "epoch": 4.7171435491578455,
      "grad_norm": 0.25648927688598633,
      "learning_rate": 5.694405092150261e-07,
      "loss": 0.0138,
      "step": 2882420
    },
    {
      "epoch": 4.717176279596499,
      "grad_norm": 0.35557475686073303,
      "learning_rate": 5.693746170015089e-07,
      "loss": 0.0075,
      "step": 2882440
    },
    {
      "epoch": 4.717209010035153,
      "grad_norm": 0.3352958559989929,
      "learning_rate": 5.693087247879919e-07,
      "loss": 0.0099,
      "step": 2882460
    },
    {
      "epoch": 4.717241740473806,
      "grad_norm": 0.169221892952919,
      "learning_rate": 5.692428325744747e-07,
      "loss": 0.0062,
      "step": 2882480
    },
    {
      "epoch": 4.717274470912459,
      "grad_norm": 0.24420218169689178,
      "learning_rate": 5.691769403609576e-07,
      "loss": 0.0067,
      "step": 2882500
    },
    {
      "epoch": 4.717307201351113,
      "grad_norm": 0.1537090688943863,
      "learning_rate": 5.691110481474404e-07,
      "loss": 0.0116,
      "step": 2882520
    },
    {
      "epoch": 4.717339931789766,
      "grad_norm": 0.1843172311782837,
      "learning_rate": 5.690451559339234e-07,
      "loss": 0.0051,
      "step": 2882540
    },
    {
      "epoch": 4.717372662228419,
      "grad_norm": 0.29833388328552246,
      "learning_rate": 5.689792637204062e-07,
      "loss": 0.011,
      "step": 2882560
    },
    {
      "epoch": 4.7174053926670725,
      "grad_norm": 0.21169637143611908,
      "learning_rate": 5.689133715068891e-07,
      "loss": 0.0065,
      "step": 2882580
    },
    {
      "epoch": 4.717438123105726,
      "grad_norm": 0.7045865058898926,
      "learning_rate": 5.688474792933719e-07,
      "loss": 0.0129,
      "step": 2882600
    },
    {
      "epoch": 4.717470853544379,
      "grad_norm": 0.1514793038368225,
      "learning_rate": 5.687815870798547e-07,
      "loss": 0.0133,
      "step": 2882620
    },
    {
      "epoch": 4.717503583983032,
      "grad_norm": 0.15752795338630676,
      "learning_rate": 5.687156948663377e-07,
      "loss": 0.0094,
      "step": 2882640
    },
    {
      "epoch": 4.717536314421686,
      "grad_norm": 0.1400291919708252,
      "learning_rate": 5.686498026528205e-07,
      "loss": 0.0096,
      "step": 2882660
    },
    {
      "epoch": 4.71756904486034,
      "grad_norm": 0.5553449988365173,
      "learning_rate": 5.685839104393035e-07,
      "loss": 0.0105,
      "step": 2882680
    },
    {
      "epoch": 4.717601775298992,
      "grad_norm": 0.17425189912319183,
      "learning_rate": 5.685180182257862e-07,
      "loss": 0.0065,
      "step": 2882700
    },
    {
      "epoch": 4.717634505737646,
      "grad_norm": 0.3866671919822693,
      "learning_rate": 5.684521260122692e-07,
      "loss": 0.0083,
      "step": 2882720
    },
    {
      "epoch": 4.7176672361762995,
      "grad_norm": 0.6210099458694458,
      "learning_rate": 5.68386233798752e-07,
      "loss": 0.0089,
      "step": 2882740
    },
    {
      "epoch": 4.717699966614953,
      "grad_norm": 0.18779030442237854,
      "learning_rate": 5.68320341585235e-07,
      "loss": 0.0065,
      "step": 2882760
    },
    {
      "epoch": 4.717732697053606,
      "grad_norm": 0.38797691464424133,
      "learning_rate": 5.682544493717178e-07,
      "loss": 0.0078,
      "step": 2882780
    },
    {
      "epoch": 4.717765427492259,
      "grad_norm": 0.20260949432849884,
      "learning_rate": 5.681885571582007e-07,
      "loss": 0.0081,
      "step": 2882800
    },
    {
      "epoch": 4.717798157930913,
      "grad_norm": 0.23481960594654083,
      "learning_rate": 5.681226649446835e-07,
      "loss": 0.0061,
      "step": 2882820
    },
    {
      "epoch": 4.717830888369566,
      "grad_norm": 0.4597063362598419,
      "learning_rate": 5.680567727311664e-07,
      "loss": 0.0054,
      "step": 2882840
    },
    {
      "epoch": 4.717863618808219,
      "grad_norm": 0.14009903371334076,
      "learning_rate": 5.679908805176493e-07,
      "loss": 0.0063,
      "step": 2882860
    },
    {
      "epoch": 4.717896349246873,
      "grad_norm": 0.2806073725223541,
      "learning_rate": 5.679249883041322e-07,
      "loss": 0.0082,
      "step": 2882880
    },
    {
      "epoch": 4.7179290796855256,
      "grad_norm": 0.15082287788391113,
      "learning_rate": 5.67859096090615e-07,
      "loss": 0.0106,
      "step": 2882900
    },
    {
      "epoch": 4.717961810124179,
      "grad_norm": 0.6258817911148071,
      "learning_rate": 5.677932038770979e-07,
      "loss": 0.0097,
      "step": 2882920
    },
    {
      "epoch": 4.717994540562833,
      "grad_norm": 0.34492942690849304,
      "learning_rate": 5.677273116635808e-07,
      "loss": 0.0072,
      "step": 2882940
    },
    {
      "epoch": 4.718027271001486,
      "grad_norm": 0.27029111981391907,
      "learning_rate": 5.676614194500637e-07,
      "loss": 0.0085,
      "step": 2882960
    },
    {
      "epoch": 4.718060001440139,
      "grad_norm": 0.4406522512435913,
      "learning_rate": 5.675955272365465e-07,
      "loss": 0.0086,
      "step": 2882980
    },
    {
      "epoch": 4.718092731878793,
      "grad_norm": 0.3459632694721222,
      "learning_rate": 5.675296350230293e-07,
      "loss": 0.008,
      "step": 2883000
    },
    {
      "epoch": 4.718125462317446,
      "grad_norm": 0.1121153011918068,
      "learning_rate": 5.674637428095123e-07,
      "loss": 0.0124,
      "step": 2883020
    },
    {
      "epoch": 4.718158192756099,
      "grad_norm": 0.18482118844985962,
      "learning_rate": 5.673978505959951e-07,
      "loss": 0.0141,
      "step": 2883040
    },
    {
      "epoch": 4.7181909231947525,
      "grad_norm": 0.2713988721370697,
      "learning_rate": 5.67331958382478e-07,
      "loss": 0.0076,
      "step": 2883060
    },
    {
      "epoch": 4.718223653633406,
      "grad_norm": 0.047488968819379807,
      "learning_rate": 5.672660661689608e-07,
      "loss": 0.005,
      "step": 2883080
    },
    {
      "epoch": 4.71825638407206,
      "grad_norm": 0.09522654116153717,
      "learning_rate": 5.672001739554437e-07,
      "loss": 0.0109,
      "step": 2883100
    },
    {
      "epoch": 4.718289114510712,
      "grad_norm": 0.5341848134994507,
      "learning_rate": 5.671342817419266e-07,
      "loss": 0.0118,
      "step": 2883120
    },
    {
      "epoch": 4.718321844949366,
      "grad_norm": 0.2716148793697357,
      "learning_rate": 5.670683895284095e-07,
      "loss": 0.0092,
      "step": 2883140
    },
    {
      "epoch": 4.71835457538802,
      "grad_norm": 0.2433413863182068,
      "learning_rate": 5.670024973148923e-07,
      "loss": 0.0078,
      "step": 2883160
    },
    {
      "epoch": 4.718387305826672,
      "grad_norm": 0.17521555721759796,
      "learning_rate": 5.669366051013752e-07,
      "loss": 0.0074,
      "step": 2883180
    },
    {
      "epoch": 4.718420036265326,
      "grad_norm": 0.13125582039356232,
      "learning_rate": 5.668707128878581e-07,
      "loss": 0.0078,
      "step": 2883200
    },
    {
      "epoch": 4.7184527667039795,
      "grad_norm": 0.519938051700592,
      "learning_rate": 5.66804820674341e-07,
      "loss": 0.0119,
      "step": 2883220
    },
    {
      "epoch": 4.718485497142632,
      "grad_norm": 0.23908481001853943,
      "learning_rate": 5.667389284608238e-07,
      "loss": 0.0089,
      "step": 2883240
    },
    {
      "epoch": 4.718518227581286,
      "grad_norm": 0.24545271694660187,
      "learning_rate": 5.666730362473067e-07,
      "loss": 0.0085,
      "step": 2883260
    },
    {
      "epoch": 4.718550958019939,
      "grad_norm": 0.23166203498840332,
      "learning_rate": 5.666071440337896e-07,
      "loss": 0.0088,
      "step": 2883280
    },
    {
      "epoch": 4.718583688458593,
      "grad_norm": 0.13725607097148895,
      "learning_rate": 5.665412518202725e-07,
      "loss": 0.0123,
      "step": 2883300
    },
    {
      "epoch": 4.718616418897246,
      "grad_norm": 0.13326013088226318,
      "learning_rate": 5.664753596067553e-07,
      "loss": 0.011,
      "step": 2883320
    },
    {
      "epoch": 4.718649149335899,
      "grad_norm": 0.3657783567905426,
      "learning_rate": 5.664094673932382e-07,
      "loss": 0.0096,
      "step": 2883340
    },
    {
      "epoch": 4.718681879774553,
      "grad_norm": 0.3820805549621582,
      "learning_rate": 5.66343575179721e-07,
      "loss": 0.0094,
      "step": 2883360
    },
    {
      "epoch": 4.7187146102132065,
      "grad_norm": 0.12935975193977356,
      "learning_rate": 5.662776829662039e-07,
      "loss": 0.008,
      "step": 2883380
    },
    {
      "epoch": 4.718747340651859,
      "grad_norm": 0.28146421909332275,
      "learning_rate": 5.662117907526868e-07,
      "loss": 0.0074,
      "step": 2883400
    },
    {
      "epoch": 4.718780071090513,
      "grad_norm": 0.20283176004886627,
      "learning_rate": 5.661458985391697e-07,
      "loss": 0.0058,
      "step": 2883420
    },
    {
      "epoch": 4.718812801529166,
      "grad_norm": 0.18036137521266937,
      "learning_rate": 5.660800063256526e-07,
      "loss": 0.0073,
      "step": 2883440
    },
    {
      "epoch": 4.718845531967819,
      "grad_norm": 0.17601574957370758,
      "learning_rate": 5.660141141121354e-07,
      "loss": 0.0082,
      "step": 2883460
    },
    {
      "epoch": 4.718878262406473,
      "grad_norm": 0.6030893921852112,
      "learning_rate": 5.659482218986183e-07,
      "loss": 0.0105,
      "step": 2883480
    },
    {
      "epoch": 4.718910992845126,
      "grad_norm": 0.36399757862091064,
      "learning_rate": 5.658823296851012e-07,
      "loss": 0.0137,
      "step": 2883500
    },
    {
      "epoch": 4.718943723283779,
      "grad_norm": 0.06502784043550491,
      "learning_rate": 5.658164374715841e-07,
      "loss": 0.0085,
      "step": 2883520
    },
    {
      "epoch": 4.718976453722433,
      "grad_norm": 0.1915953904390335,
      "learning_rate": 5.657505452580669e-07,
      "loss": 0.0077,
      "step": 2883540
    },
    {
      "epoch": 4.719009184161086,
      "grad_norm": 0.12441661953926086,
      "learning_rate": 5.656846530445498e-07,
      "loss": 0.0088,
      "step": 2883560
    },
    {
      "epoch": 4.71904191459974,
      "grad_norm": 0.16894394159317017,
      "learning_rate": 5.656187608310326e-07,
      "loss": 0.0063,
      "step": 2883580
    },
    {
      "epoch": 4.7190746450383925,
      "grad_norm": 0.5714960694313049,
      "learning_rate": 5.655528686175156e-07,
      "loss": 0.0088,
      "step": 2883600
    },
    {
      "epoch": 4.719107375477046,
      "grad_norm": 0.14469873905181885,
      "learning_rate": 5.654869764039984e-07,
      "loss": 0.0099,
      "step": 2883620
    },
    {
      "epoch": 4.7191401059157,
      "grad_norm": 0.317931592464447,
      "learning_rate": 5.654210841904813e-07,
      "loss": 0.0058,
      "step": 2883640
    },
    {
      "epoch": 4.719172836354353,
      "grad_norm": 0.15822826325893402,
      "learning_rate": 5.653551919769641e-07,
      "loss": 0.0105,
      "step": 2883660
    },
    {
      "epoch": 4.719205566793006,
      "grad_norm": 0.28162944316864014,
      "learning_rate": 5.65289299763447e-07,
      "loss": 0.01,
      "step": 2883680
    },
    {
      "epoch": 4.7192382972316596,
      "grad_norm": 0.21099503338336945,
      "learning_rate": 5.652234075499299e-07,
      "loss": 0.0099,
      "step": 2883700
    },
    {
      "epoch": 4.719271027670313,
      "grad_norm": 0.12111803144216537,
      "learning_rate": 5.651575153364127e-07,
      "loss": 0.0074,
      "step": 2883720
    },
    {
      "epoch": 4.719303758108966,
      "grad_norm": 0.6353453993797302,
      "learning_rate": 5.650916231228956e-07,
      "loss": 0.0068,
      "step": 2883740
    },
    {
      "epoch": 4.7193364885476194,
      "grad_norm": 0.14834064245224,
      "learning_rate": 5.650257309093784e-07,
      "loss": 0.0082,
      "step": 2883760
    },
    {
      "epoch": 4.719369218986273,
      "grad_norm": 0.10125479847192764,
      "learning_rate": 5.649598386958614e-07,
      "loss": 0.009,
      "step": 2883780
    },
    {
      "epoch": 4.719401949424926,
      "grad_norm": 0.10955614596605301,
      "learning_rate": 5.648939464823442e-07,
      "loss": 0.0101,
      "step": 2883800
    },
    {
      "epoch": 4.719434679863579,
      "grad_norm": 0.05039114132523537,
      "learning_rate": 5.648280542688271e-07,
      "loss": 0.0101,
      "step": 2883820
    },
    {
      "epoch": 4.719467410302233,
      "grad_norm": 0.2261469066143036,
      "learning_rate": 5.647621620553099e-07,
      "loss": 0.0057,
      "step": 2883840
    },
    {
      "epoch": 4.7195001407408865,
      "grad_norm": 0.2797686755657196,
      "learning_rate": 5.646962698417929e-07,
      "loss": 0.0082,
      "step": 2883860
    },
    {
      "epoch": 4.719532871179539,
      "grad_norm": 0.08158828318119049,
      "learning_rate": 5.646303776282757e-07,
      "loss": 0.0072,
      "step": 2883880
    },
    {
      "epoch": 4.719565601618193,
      "grad_norm": 0.08093726634979248,
      "learning_rate": 5.645644854147586e-07,
      "loss": 0.0055,
      "step": 2883900
    },
    {
      "epoch": 4.719598332056846,
      "grad_norm": 0.05204018950462341,
      "learning_rate": 5.644985932012414e-07,
      "loss": 0.0099,
      "step": 2883920
    },
    {
      "epoch": 4.7196310624955,
      "grad_norm": 0.16195744276046753,
      "learning_rate": 5.644327009877244e-07,
      "loss": 0.0054,
      "step": 2883940
    },
    {
      "epoch": 4.719663792934153,
      "grad_norm": 0.19414468109607697,
      "learning_rate": 5.643668087742072e-07,
      "loss": 0.0121,
      "step": 2883960
    },
    {
      "epoch": 4.719696523372806,
      "grad_norm": 0.25638943910598755,
      "learning_rate": 5.6430091656069e-07,
      "loss": 0.0084,
      "step": 2883980
    },
    {
      "epoch": 4.71972925381146,
      "grad_norm": 0.21477766335010529,
      "learning_rate": 5.64235024347173e-07,
      "loss": 0.0084,
      "step": 2884000
    },
    {
      "epoch": 4.719761984250113,
      "grad_norm": 0.5234763026237488,
      "learning_rate": 5.641691321336557e-07,
      "loss": 0.011,
      "step": 2884020
    },
    {
      "epoch": 4.719794714688766,
      "grad_norm": 0.3071725070476532,
      "learning_rate": 5.641032399201387e-07,
      "loss": 0.0084,
      "step": 2884040
    },
    {
      "epoch": 4.71982744512742,
      "grad_norm": 0.07934371381998062,
      "learning_rate": 5.640373477066215e-07,
      "loss": 0.0073,
      "step": 2884060
    },
    {
      "epoch": 4.7198601755660725,
      "grad_norm": 0.16595995426177979,
      "learning_rate": 5.639714554931045e-07,
      "loss": 0.0074,
      "step": 2884080
    },
    {
      "epoch": 4.719892906004726,
      "grad_norm": 0.12046041339635849,
      "learning_rate": 5.639055632795873e-07,
      "loss": 0.0068,
      "step": 2884100
    },
    {
      "epoch": 4.71992563644338,
      "grad_norm": 0.1533583402633667,
      "learning_rate": 5.638396710660702e-07,
      "loss": 0.009,
      "step": 2884120
    },
    {
      "epoch": 4.719958366882033,
      "grad_norm": 0.214695543050766,
      "learning_rate": 5.63773778852553e-07,
      "loss": 0.0089,
      "step": 2884140
    },
    {
      "epoch": 4.719991097320686,
      "grad_norm": 0.17949211597442627,
      "learning_rate": 5.63707886639036e-07,
      "loss": 0.0142,
      "step": 2884160
    },
    {
      "epoch": 4.72002382775934,
      "grad_norm": 0.1225183755159378,
      "learning_rate": 5.636419944255188e-07,
      "loss": 0.0086,
      "step": 2884180
    },
    {
      "epoch": 4.720056558197993,
      "grad_norm": 0.3459804654121399,
      "learning_rate": 5.635761022120017e-07,
      "loss": 0.0079,
      "step": 2884200
    },
    {
      "epoch": 4.720089288636647,
      "grad_norm": 0.14414329826831818,
      "learning_rate": 5.635102099984845e-07,
      "loss": 0.0099,
      "step": 2884220
    },
    {
      "epoch": 4.7201220190752995,
      "grad_norm": 0.10825509577989578,
      "learning_rate": 5.634443177849674e-07,
      "loss": 0.0086,
      "step": 2884240
    },
    {
      "epoch": 4.720154749513953,
      "grad_norm": 0.22318816184997559,
      "learning_rate": 5.633784255714503e-07,
      "loss": 0.0076,
      "step": 2884260
    },
    {
      "epoch": 4.720187479952607,
      "grad_norm": 0.09255383908748627,
      "learning_rate": 5.633125333579332e-07,
      "loss": 0.0088,
      "step": 2884280
    },
    {
      "epoch": 4.720220210391259,
      "grad_norm": 0.27317869663238525,
      "learning_rate": 5.63246641144416e-07,
      "loss": 0.0114,
      "step": 2884300
    },
    {
      "epoch": 4.720252940829913,
      "grad_norm": 0.3287498652935028,
      "learning_rate": 5.631807489308988e-07,
      "loss": 0.0059,
      "step": 2884320
    },
    {
      "epoch": 4.720285671268567,
      "grad_norm": 0.10482292622327805,
      "learning_rate": 5.631148567173818e-07,
      "loss": 0.0098,
      "step": 2884340
    },
    {
      "epoch": 4.720318401707219,
      "grad_norm": 0.2084556221961975,
      "learning_rate": 5.630489645038646e-07,
      "loss": 0.0084,
      "step": 2884360
    },
    {
      "epoch": 4.720351132145873,
      "grad_norm": 0.46415260434150696,
      "learning_rate": 5.629830722903475e-07,
      "loss": 0.0096,
      "step": 2884380
    },
    {
      "epoch": 4.7203838625845265,
      "grad_norm": 0.2782838046550751,
      "learning_rate": 5.629171800768303e-07,
      "loss": 0.0097,
      "step": 2884400
    },
    {
      "epoch": 4.72041659302318,
      "grad_norm": 0.15144996345043182,
      "learning_rate": 5.628512878633132e-07,
      "loss": 0.006,
      "step": 2884420
    },
    {
      "epoch": 4.720449323461833,
      "grad_norm": 0.10707443207502365,
      "learning_rate": 5.627853956497961e-07,
      "loss": 0.0063,
      "step": 2884440
    },
    {
      "epoch": 4.720482053900486,
      "grad_norm": 0.2358146458864212,
      "learning_rate": 5.62719503436279e-07,
      "loss": 0.0112,
      "step": 2884460
    },
    {
      "epoch": 4.72051478433914,
      "grad_norm": 0.36612367630004883,
      "learning_rate": 5.626536112227618e-07,
      "loss": 0.0094,
      "step": 2884480
    },
    {
      "epoch": 4.720547514777793,
      "grad_norm": 0.4125925600528717,
      "learning_rate": 5.625877190092447e-07,
      "loss": 0.0067,
      "step": 2884500
    },
    {
      "epoch": 4.720580245216446,
      "grad_norm": 0.24370074272155762,
      "learning_rate": 5.625218267957276e-07,
      "loss": 0.0098,
      "step": 2884520
    },
    {
      "epoch": 4.7206129756551,
      "grad_norm": 0.2367318868637085,
      "learning_rate": 5.624559345822105e-07,
      "loss": 0.0109,
      "step": 2884540
    },
    {
      "epoch": 4.7206457060937534,
      "grad_norm": 0.1458050161600113,
      "learning_rate": 5.623900423686933e-07,
      "loss": 0.0101,
      "step": 2884560
    },
    {
      "epoch": 4.720678436532406,
      "grad_norm": 0.5575414896011353,
      "learning_rate": 5.623241501551762e-07,
      "loss": 0.0065,
      "step": 2884580
    },
    {
      "epoch": 4.72071116697106,
      "grad_norm": 0.1937388777732849,
      "learning_rate": 5.622582579416591e-07,
      "loss": 0.0135,
      "step": 2884600
    },
    {
      "epoch": 4.720743897409713,
      "grad_norm": 0.2696463167667389,
      "learning_rate": 5.62192365728142e-07,
      "loss": 0.0132,
      "step": 2884620
    },
    {
      "epoch": 4.720776627848366,
      "grad_norm": 0.26761582493782043,
      "learning_rate": 5.621264735146248e-07,
      "loss": 0.0081,
      "step": 2884640
    },
    {
      "epoch": 4.72080935828702,
      "grad_norm": 0.4275341033935547,
      "learning_rate": 5.620605813011078e-07,
      "loss": 0.0106,
      "step": 2884660
    },
    {
      "epoch": 4.720842088725673,
      "grad_norm": 0.028207264840602875,
      "learning_rate": 5.619946890875905e-07,
      "loss": 0.0131,
      "step": 2884680
    },
    {
      "epoch": 4.720874819164326,
      "grad_norm": 0.3900908827781677,
      "learning_rate": 5.619287968740734e-07,
      "loss": 0.0087,
      "step": 2884700
    },
    {
      "epoch": 4.7209075496029795,
      "grad_norm": 0.13370643556118011,
      "learning_rate": 5.618629046605563e-07,
      "loss": 0.0075,
      "step": 2884720
    },
    {
      "epoch": 4.720940280041633,
      "grad_norm": 0.22136352956295013,
      "learning_rate": 5.617970124470392e-07,
      "loss": 0.0131,
      "step": 2884740
    },
    {
      "epoch": 4.720973010480287,
      "grad_norm": 0.24148552119731903,
      "learning_rate": 5.617311202335221e-07,
      "loss": 0.0073,
      "step": 2884760
    },
    {
      "epoch": 4.721005740918939,
      "grad_norm": 0.36996546387672424,
      "learning_rate": 5.616652280200049e-07,
      "loss": 0.0096,
      "step": 2884780
    },
    {
      "epoch": 4.721038471357593,
      "grad_norm": 0.12509450316429138,
      "learning_rate": 5.615993358064878e-07,
      "loss": 0.0109,
      "step": 2884800
    },
    {
      "epoch": 4.721071201796247,
      "grad_norm": 0.24459852278232574,
      "learning_rate": 5.615334435929707e-07,
      "loss": 0.0096,
      "step": 2884820
    },
    {
      "epoch": 4.7211039322349,
      "grad_norm": 0.20422430336475372,
      "learning_rate": 5.614675513794536e-07,
      "loss": 0.0068,
      "step": 2884840
    },
    {
      "epoch": 4.721136662673553,
      "grad_norm": 0.11095291376113892,
      "learning_rate": 5.614016591659364e-07,
      "loss": 0.01,
      "step": 2884860
    },
    {
      "epoch": 4.7211693931122065,
      "grad_norm": 0.16325530409812927,
      "learning_rate": 5.613357669524193e-07,
      "loss": 0.0087,
      "step": 2884880
    },
    {
      "epoch": 4.72120212355086,
      "grad_norm": 0.3849259614944458,
      "learning_rate": 5.612698747389021e-07,
      "loss": 0.0065,
      "step": 2884900
    },
    {
      "epoch": 4.721234853989513,
      "grad_norm": 0.4149826765060425,
      "learning_rate": 5.612039825253851e-07,
      "loss": 0.0105,
      "step": 2884920
    },
    {
      "epoch": 4.721267584428166,
      "grad_norm": 0.22437913715839386,
      "learning_rate": 5.611380903118679e-07,
      "loss": 0.0115,
      "step": 2884940
    },
    {
      "epoch": 4.72130031486682,
      "grad_norm": 0.19035527110099792,
      "learning_rate": 5.610721980983508e-07,
      "loss": 0.0094,
      "step": 2884960
    },
    {
      "epoch": 4.721333045305473,
      "grad_norm": 0.32332420349121094,
      "learning_rate": 5.610063058848336e-07,
      "loss": 0.008,
      "step": 2884980
    },
    {
      "epoch": 4.721365775744126,
      "grad_norm": 0.18655166029930115,
      "learning_rate": 5.609404136713166e-07,
      "loss": 0.0063,
      "step": 2885000
    },
    {
      "epoch": 4.72139850618278,
      "grad_norm": 0.4576910734176636,
      "learning_rate": 5.608745214577994e-07,
      "loss": 0.009,
      "step": 2885020
    },
    {
      "epoch": 4.7214312366214335,
      "grad_norm": 0.10284062474966049,
      "learning_rate": 5.608086292442822e-07,
      "loss": 0.0064,
      "step": 2885040
    },
    {
      "epoch": 4.721463967060086,
      "grad_norm": 0.13972635567188263,
      "learning_rate": 5.607427370307651e-07,
      "loss": 0.0096,
      "step": 2885060
    },
    {
      "epoch": 4.72149669749874,
      "grad_norm": 0.6908965706825256,
      "learning_rate": 5.606768448172479e-07,
      "loss": 0.0089,
      "step": 2885080
    },
    {
      "epoch": 4.721529427937393,
      "grad_norm": 0.3362223505973816,
      "learning_rate": 5.606109526037309e-07,
      "loss": 0.0066,
      "step": 2885100
    },
    {
      "epoch": 4.721562158376047,
      "grad_norm": 0.6278431415557861,
      "learning_rate": 5.605450603902137e-07,
      "loss": 0.0084,
      "step": 2885120
    },
    {
      "epoch": 4.7215948888147,
      "grad_norm": 0.43710875511169434,
      "learning_rate": 5.604791681766966e-07,
      "loss": 0.0169,
      "step": 2885140
    },
    {
      "epoch": 4.721627619253353,
      "grad_norm": 0.32241013646125793,
      "learning_rate": 5.604132759631794e-07,
      "loss": 0.0072,
      "step": 2885160
    },
    {
      "epoch": 4.721660349692007,
      "grad_norm": 0.1642940491437912,
      "learning_rate": 5.603473837496624e-07,
      "loss": 0.0109,
      "step": 2885180
    },
    {
      "epoch": 4.72169308013066,
      "grad_norm": 0.379264235496521,
      "learning_rate": 5.602814915361452e-07,
      "loss": 0.0088,
      "step": 2885200
    },
    {
      "epoch": 4.721725810569313,
      "grad_norm": 0.388208270072937,
      "learning_rate": 5.602155993226281e-07,
      "loss": 0.0127,
      "step": 2885220
    },
    {
      "epoch": 4.721758541007967,
      "grad_norm": 0.08538281172513962,
      "learning_rate": 5.601497071091109e-07,
      "loss": 0.0069,
      "step": 2885240
    },
    {
      "epoch": 4.7217912714466195,
      "grad_norm": 0.3885166049003601,
      "learning_rate": 5.600838148955939e-07,
      "loss": 0.0079,
      "step": 2885260
    },
    {
      "epoch": 4.721824001885273,
      "grad_norm": 0.17587833106517792,
      "learning_rate": 5.600179226820767e-07,
      "loss": 0.0082,
      "step": 2885280
    },
    {
      "epoch": 4.721856732323927,
      "grad_norm": 0.12377253919839859,
      "learning_rate": 5.599520304685596e-07,
      "loss": 0.0058,
      "step": 2885300
    },
    {
      "epoch": 4.72188946276258,
      "grad_norm": 0.47860902547836304,
      "learning_rate": 5.598861382550425e-07,
      "loss": 0.0104,
      "step": 2885320
    },
    {
      "epoch": 4.721922193201233,
      "grad_norm": 0.7815877795219421,
      "learning_rate": 5.598202460415252e-07,
      "loss": 0.0068,
      "step": 2885340
    },
    {
      "epoch": 4.721954923639887,
      "grad_norm": 0.5194416046142578,
      "learning_rate": 5.597543538280082e-07,
      "loss": 0.0107,
      "step": 2885360
    },
    {
      "epoch": 4.72198765407854,
      "grad_norm": 0.10027047246694565,
      "learning_rate": 5.59688461614491e-07,
      "loss": 0.0092,
      "step": 2885380
    },
    {
      "epoch": 4.722020384517194,
      "grad_norm": 0.1424262821674347,
      "learning_rate": 5.59622569400974e-07,
      "loss": 0.0121,
      "step": 2885400
    },
    {
      "epoch": 4.7220531149558465,
      "grad_norm": 0.2206159085035324,
      "learning_rate": 5.595566771874568e-07,
      "loss": 0.0066,
      "step": 2885420
    },
    {
      "epoch": 4.7220858453945,
      "grad_norm": 0.25665611028671265,
      "learning_rate": 5.594907849739397e-07,
      "loss": 0.0092,
      "step": 2885440
    },
    {
      "epoch": 4.722118575833154,
      "grad_norm": 0.22021155059337616,
      "learning_rate": 5.594248927604225e-07,
      "loss": 0.009,
      "step": 2885460
    },
    {
      "epoch": 4.722151306271806,
      "grad_norm": 0.26519253849983215,
      "learning_rate": 5.593590005469055e-07,
      "loss": 0.0101,
      "step": 2885480
    },
    {
      "epoch": 4.72218403671046,
      "grad_norm": 0.22709526121616364,
      "learning_rate": 5.592931083333883e-07,
      "loss": 0.0087,
      "step": 2885500
    },
    {
      "epoch": 4.7222167671491135,
      "grad_norm": 0.11332133412361145,
      "learning_rate": 5.592272161198712e-07,
      "loss": 0.0088,
      "step": 2885520
    },
    {
      "epoch": 4.722249497587766,
      "grad_norm": 0.19776195287704468,
      "learning_rate": 5.59161323906354e-07,
      "loss": 0.0065,
      "step": 2885540
    },
    {
      "epoch": 4.72228222802642,
      "grad_norm": 0.13827316462993622,
      "learning_rate": 5.590954316928369e-07,
      "loss": 0.0109,
      "step": 2885560
    },
    {
      "epoch": 4.722314958465073,
      "grad_norm": 0.5896400809288025,
      "learning_rate": 5.590295394793198e-07,
      "loss": 0.0132,
      "step": 2885580
    },
    {
      "epoch": 4.722347688903727,
      "grad_norm": 0.2677578628063202,
      "learning_rate": 5.589636472658027e-07,
      "loss": 0.0088,
      "step": 2885600
    },
    {
      "epoch": 4.72238041934238,
      "grad_norm": 0.07644987106323242,
      "learning_rate": 5.588977550522855e-07,
      "loss": 0.0101,
      "step": 2885620
    },
    {
      "epoch": 4.722413149781033,
      "grad_norm": 0.25543534755706787,
      "learning_rate": 5.588318628387684e-07,
      "loss": 0.0067,
      "step": 2885640
    },
    {
      "epoch": 4.722445880219687,
      "grad_norm": 0.20279525220394135,
      "learning_rate": 5.587659706252513e-07,
      "loss": 0.0057,
      "step": 2885660
    },
    {
      "epoch": 4.7224786106583405,
      "grad_norm": 0.4580244719982147,
      "learning_rate": 5.587000784117342e-07,
      "loss": 0.0092,
      "step": 2885680
    },
    {
      "epoch": 4.722511341096993,
      "grad_norm": 0.21632331609725952,
      "learning_rate": 5.58634186198217e-07,
      "loss": 0.0098,
      "step": 2885700
    },
    {
      "epoch": 4.722544071535647,
      "grad_norm": 0.39293748140335083,
      "learning_rate": 5.585682939846998e-07,
      "loss": 0.0081,
      "step": 2885720
    },
    {
      "epoch": 4.7225768019743,
      "grad_norm": 0.10960079729557037,
      "learning_rate": 5.585024017711827e-07,
      "loss": 0.0107,
      "step": 2885740
    },
    {
      "epoch": 4.722609532412953,
      "grad_norm": 0.34624341130256653,
      "learning_rate": 5.584365095576656e-07,
      "loss": 0.0082,
      "step": 2885760
    },
    {
      "epoch": 4.722642262851607,
      "grad_norm": 0.08726144582033157,
      "learning_rate": 5.583706173441485e-07,
      "loss": 0.0064,
      "step": 2885780
    },
    {
      "epoch": 4.72267499329026,
      "grad_norm": 0.3542908728122711,
      "learning_rate": 5.583047251306313e-07,
      "loss": 0.0138,
      "step": 2885800
    },
    {
      "epoch": 4.722707723728913,
      "grad_norm": 0.32852718234062195,
      "learning_rate": 5.582388329171142e-07,
      "loss": 0.0078,
      "step": 2885820
    },
    {
      "epoch": 4.722740454167567,
      "grad_norm": 0.20401938259601593,
      "learning_rate": 5.581729407035971e-07,
      "loss": 0.0071,
      "step": 2885840
    },
    {
      "epoch": 4.72277318460622,
      "grad_norm": 0.12040188163518906,
      "learning_rate": 5.5810704849008e-07,
      "loss": 0.0072,
      "step": 2885860
    },
    {
      "epoch": 4.722805915044874,
      "grad_norm": 0.5835700035095215,
      "learning_rate": 5.580411562765628e-07,
      "loss": 0.0117,
      "step": 2885880
    },
    {
      "epoch": 4.7228386454835265,
      "grad_norm": 0.1670692265033722,
      "learning_rate": 5.579752640630457e-07,
      "loss": 0.007,
      "step": 2885900
    },
    {
      "epoch": 4.72287137592218,
      "grad_norm": 0.4179621636867523,
      "learning_rate": 5.579093718495286e-07,
      "loss": 0.0101,
      "step": 2885920
    },
    {
      "epoch": 4.722904106360834,
      "grad_norm": 0.24961639940738678,
      "learning_rate": 5.578434796360115e-07,
      "loss": 0.0096,
      "step": 2885940
    },
    {
      "epoch": 4.722936836799487,
      "grad_norm": 0.405366986989975,
      "learning_rate": 5.577775874224943e-07,
      "loss": 0.0126,
      "step": 2885960
    },
    {
      "epoch": 4.72296956723814,
      "grad_norm": 0.24108301103115082,
      "learning_rate": 5.577116952089773e-07,
      "loss": 0.0066,
      "step": 2885980
    },
    {
      "epoch": 4.723002297676794,
      "grad_norm": 0.34860631823539734,
      "learning_rate": 5.5764580299546e-07,
      "loss": 0.0091,
      "step": 2886000
    },
    {
      "epoch": 4.723035028115447,
      "grad_norm": 0.1724473536014557,
      "learning_rate": 5.57579910781943e-07,
      "loss": 0.0082,
      "step": 2886020
    },
    {
      "epoch": 4.7230677585541,
      "grad_norm": 0.3118572533130646,
      "learning_rate": 5.575140185684258e-07,
      "loss": 0.0089,
      "step": 2886040
    },
    {
      "epoch": 4.7231004889927535,
      "grad_norm": 0.286823034286499,
      "learning_rate": 5.574481263549087e-07,
      "loss": 0.0059,
      "step": 2886060
    },
    {
      "epoch": 4.723133219431407,
      "grad_norm": 0.24204154312610626,
      "learning_rate": 5.573822341413916e-07,
      "loss": 0.0063,
      "step": 2886080
    },
    {
      "epoch": 4.72316594987006,
      "grad_norm": 0.5032978057861328,
      "learning_rate": 5.573163419278744e-07,
      "loss": 0.0094,
      "step": 2886100
    },
    {
      "epoch": 4.723198680308713,
      "grad_norm": 0.20619380474090576,
      "learning_rate": 5.572504497143573e-07,
      "loss": 0.0118,
      "step": 2886120
    },
    {
      "epoch": 4.723231410747367,
      "grad_norm": 0.15917500853538513,
      "learning_rate": 5.571845575008402e-07,
      "loss": 0.0112,
      "step": 2886140
    },
    {
      "epoch": 4.723264141186021,
      "grad_norm": 0.12074078619480133,
      "learning_rate": 5.571186652873231e-07,
      "loss": 0.0103,
      "step": 2886160
    },
    {
      "epoch": 4.723296871624673,
      "grad_norm": 0.12788675725460052,
      "learning_rate": 5.570527730738059e-07,
      "loss": 0.011,
      "step": 2886180
    },
    {
      "epoch": 4.723329602063327,
      "grad_norm": 0.15168212354183197,
      "learning_rate": 5.569868808602888e-07,
      "loss": 0.0056,
      "step": 2886200
    },
    {
      "epoch": 4.7233623325019805,
      "grad_norm": 0.27177610993385315,
      "learning_rate": 5.569209886467716e-07,
      "loss": 0.0061,
      "step": 2886220
    },
    {
      "epoch": 4.723395062940633,
      "grad_norm": 0.5539473295211792,
      "learning_rate": 5.568550964332546e-07,
      "loss": 0.0082,
      "step": 2886240
    },
    {
      "epoch": 4.723427793379287,
      "grad_norm": 0.19021721184253693,
      "learning_rate": 5.567892042197374e-07,
      "loss": 0.0114,
      "step": 2886260
    },
    {
      "epoch": 4.72346052381794,
      "grad_norm": 0.3923638164997101,
      "learning_rate": 5.567233120062203e-07,
      "loss": 0.0082,
      "step": 2886280
    },
    {
      "epoch": 4.723493254256594,
      "grad_norm": 0.15859326720237732,
      "learning_rate": 5.566574197927031e-07,
      "loss": 0.0106,
      "step": 2886300
    },
    {
      "epoch": 4.723525984695247,
      "grad_norm": 0.04717039316892624,
      "learning_rate": 5.565915275791861e-07,
      "loss": 0.0086,
      "step": 2886320
    },
    {
      "epoch": 4.7235587151339,
      "grad_norm": 0.20797887444496155,
      "learning_rate": 5.565256353656689e-07,
      "loss": 0.0137,
      "step": 2886340
    },
    {
      "epoch": 4.723591445572554,
      "grad_norm": 0.13681374490261078,
      "learning_rate": 5.564597431521517e-07,
      "loss": 0.0088,
      "step": 2886360
    },
    {
      "epoch": 4.7236241760112065,
      "grad_norm": 0.2087278664112091,
      "learning_rate": 5.563938509386346e-07,
      "loss": 0.0085,
      "step": 2886380
    },
    {
      "epoch": 4.72365690644986,
      "grad_norm": 0.2694397270679474,
      "learning_rate": 5.563279587251174e-07,
      "loss": 0.0096,
      "step": 2886400
    },
    {
      "epoch": 4.723689636888514,
      "grad_norm": 0.5164416432380676,
      "learning_rate": 5.562620665116004e-07,
      "loss": 0.01,
      "step": 2886420
    },
    {
      "epoch": 4.723722367327166,
      "grad_norm": 0.1775926649570465,
      "learning_rate": 5.561961742980832e-07,
      "loss": 0.012,
      "step": 2886440
    },
    {
      "epoch": 4.72375509776582,
      "grad_norm": 0.30251365900039673,
      "learning_rate": 5.561302820845661e-07,
      "loss": 0.0076,
      "step": 2886460
    },
    {
      "epoch": 4.723787828204474,
      "grad_norm": 0.08314705640077591,
      "learning_rate": 5.560643898710489e-07,
      "loss": 0.0065,
      "step": 2886480
    },
    {
      "epoch": 4.723820558643127,
      "grad_norm": 0.15748295187950134,
      "learning_rate": 5.559984976575319e-07,
      "loss": 0.007,
      "step": 2886500
    },
    {
      "epoch": 4.72385328908178,
      "grad_norm": 0.2900693118572235,
      "learning_rate": 5.559326054440147e-07,
      "loss": 0.0119,
      "step": 2886520
    },
    {
      "epoch": 4.7238860195204335,
      "grad_norm": 0.05379028245806694,
      "learning_rate": 5.558667132304976e-07,
      "loss": 0.0062,
      "step": 2886540
    },
    {
      "epoch": 4.723918749959087,
      "grad_norm": 0.3131224811077118,
      "learning_rate": 5.558008210169804e-07,
      "loss": 0.0103,
      "step": 2886560
    },
    {
      "epoch": 4.723951480397741,
      "grad_norm": 0.15057699382305145,
      "learning_rate": 5.557349288034634e-07,
      "loss": 0.0059,
      "step": 2886580
    },
    {
      "epoch": 4.723984210836393,
      "grad_norm": 0.5442071557044983,
      "learning_rate": 5.556690365899462e-07,
      "loss": 0.0106,
      "step": 2886600
    },
    {
      "epoch": 4.724016941275047,
      "grad_norm": 0.12252587825059891,
      "learning_rate": 5.556031443764291e-07,
      "loss": 0.0102,
      "step": 2886620
    },
    {
      "epoch": 4.724049671713701,
      "grad_norm": 0.20900948345661163,
      "learning_rate": 5.55537252162912e-07,
      "loss": 0.007,
      "step": 2886640
    },
    {
      "epoch": 4.724082402152353,
      "grad_norm": 0.3403238356113434,
      "learning_rate": 5.554713599493948e-07,
      "loss": 0.0079,
      "step": 2886660
    },
    {
      "epoch": 4.724115132591007,
      "grad_norm": 0.352855384349823,
      "learning_rate": 5.554054677358777e-07,
      "loss": 0.0108,
      "step": 2886680
    },
    {
      "epoch": 4.7241478630296605,
      "grad_norm": 0.08805296570062637,
      "learning_rate": 5.553395755223605e-07,
      "loss": 0.0086,
      "step": 2886700
    },
    {
      "epoch": 4.724180593468313,
      "grad_norm": 0.13765402138233185,
      "learning_rate": 5.552736833088435e-07,
      "loss": 0.0061,
      "step": 2886720
    },
    {
      "epoch": 4.724213323906967,
      "grad_norm": 0.3899939954280853,
      "learning_rate": 5.552077910953263e-07,
      "loss": 0.007,
      "step": 2886740
    },
    {
      "epoch": 4.72424605434562,
      "grad_norm": 0.3281562626361847,
      "learning_rate": 5.551418988818092e-07,
      "loss": 0.0159,
      "step": 2886760
    },
    {
      "epoch": 4.724278784784274,
      "grad_norm": 0.30212053656578064,
      "learning_rate": 5.55076006668292e-07,
      "loss": 0.0138,
      "step": 2886780
    },
    {
      "epoch": 4.724311515222927,
      "grad_norm": 0.0761428102850914,
      "learning_rate": 5.55010114454775e-07,
      "loss": 0.0073,
      "step": 2886800
    },
    {
      "epoch": 4.72434424566158,
      "grad_norm": 0.3729889988899231,
      "learning_rate": 5.549442222412578e-07,
      "loss": 0.0151,
      "step": 2886820
    },
    {
      "epoch": 4.724376976100234,
      "grad_norm": 0.4145776331424713,
      "learning_rate": 5.548783300277407e-07,
      "loss": 0.0059,
      "step": 2886840
    },
    {
      "epoch": 4.7244097065388875,
      "grad_norm": 0.22056761384010315,
      "learning_rate": 5.548124378142235e-07,
      "loss": 0.0132,
      "step": 2886860
    },
    {
      "epoch": 4.72444243697754,
      "grad_norm": 0.2164786010980606,
      "learning_rate": 5.547465456007064e-07,
      "loss": 0.0076,
      "step": 2886880
    },
    {
      "epoch": 4.724475167416194,
      "grad_norm": 0.2670139968395233,
      "learning_rate": 5.546806533871893e-07,
      "loss": 0.0094,
      "step": 2886900
    },
    {
      "epoch": 4.724507897854847,
      "grad_norm": 0.3456018269062042,
      "learning_rate": 5.546147611736722e-07,
      "loss": 0.0135,
      "step": 2886920
    },
    {
      "epoch": 4.7245406282935,
      "grad_norm": 0.20775893330574036,
      "learning_rate": 5.54548868960155e-07,
      "loss": 0.0079,
      "step": 2886940
    },
    {
      "epoch": 4.724573358732154,
      "grad_norm": 0.4557020962238312,
      "learning_rate": 5.544829767466379e-07,
      "loss": 0.008,
      "step": 2886960
    },
    {
      "epoch": 4.724606089170807,
      "grad_norm": 0.13130821287631989,
      "learning_rate": 5.544170845331208e-07,
      "loss": 0.0086,
      "step": 2886980
    },
    {
      "epoch": 4.72463881960946,
      "grad_norm": 0.0965922623872757,
      "learning_rate": 5.543511923196037e-07,
      "loss": 0.0103,
      "step": 2887000
    },
    {
      "epoch": 4.724671550048114,
      "grad_norm": 0.5718206763267517,
      "learning_rate": 5.542853001060865e-07,
      "loss": 0.007,
      "step": 2887020
    },
    {
      "epoch": 4.724704280486767,
      "grad_norm": 0.22885902225971222,
      "learning_rate": 5.542194078925693e-07,
      "loss": 0.0073,
      "step": 2887040
    },
    {
      "epoch": 4.724737010925421,
      "grad_norm": 0.34919390082359314,
      "learning_rate": 5.541535156790522e-07,
      "loss": 0.0072,
      "step": 2887060
    },
    {
      "epoch": 4.7247697413640735,
      "grad_norm": 0.45522111654281616,
      "learning_rate": 5.540876234655351e-07,
      "loss": 0.0106,
      "step": 2887080
    },
    {
      "epoch": 4.724802471802727,
      "grad_norm": 0.1319565623998642,
      "learning_rate": 5.54021731252018e-07,
      "loss": 0.007,
      "step": 2887100
    },
    {
      "epoch": 4.724835202241381,
      "grad_norm": 0.16253390908241272,
      "learning_rate": 5.539558390385008e-07,
      "loss": 0.0068,
      "step": 2887120
    },
    {
      "epoch": 4.724867932680034,
      "grad_norm": 0.8395935297012329,
      "learning_rate": 5.538899468249837e-07,
      "loss": 0.0101,
      "step": 2887140
    },
    {
      "epoch": 4.724900663118687,
      "grad_norm": 0.044596776366233826,
      "learning_rate": 5.538240546114666e-07,
      "loss": 0.0059,
      "step": 2887160
    },
    {
      "epoch": 4.7249333935573405,
      "grad_norm": 0.1280689537525177,
      "learning_rate": 5.537581623979495e-07,
      "loss": 0.0056,
      "step": 2887180
    },
    {
      "epoch": 4.724966123995994,
      "grad_norm": 0.11557570844888687,
      "learning_rate": 5.536922701844323e-07,
      "loss": 0.0071,
      "step": 2887200
    },
    {
      "epoch": 4.724998854434647,
      "grad_norm": 0.11082910746335983,
      "learning_rate": 5.536263779709152e-07,
      "loss": 0.0095,
      "step": 2887220
    },
    {
      "epoch": 4.7250315848733,
      "grad_norm": 0.21797692775726318,
      "learning_rate": 5.535604857573981e-07,
      "loss": 0.007,
      "step": 2887240
    },
    {
      "epoch": 4.725064315311954,
      "grad_norm": 0.04122895374894142,
      "learning_rate": 5.53494593543881e-07,
      "loss": 0.0087,
      "step": 2887260
    },
    {
      "epoch": 4.725097045750607,
      "grad_norm": 0.2434690147638321,
      "learning_rate": 5.534287013303638e-07,
      "loss": 0.0089,
      "step": 2887280
    },
    {
      "epoch": 4.72512977618926,
      "grad_norm": 0.20971858501434326,
      "learning_rate": 5.533628091168468e-07,
      "loss": 0.0078,
      "step": 2887300
    },
    {
      "epoch": 4.725162506627914,
      "grad_norm": 0.4102672338485718,
      "learning_rate": 5.532969169033295e-07,
      "loss": 0.011,
      "step": 2887320
    },
    {
      "epoch": 4.7251952370665675,
      "grad_norm": 0.1695772409439087,
      "learning_rate": 5.532310246898125e-07,
      "loss": 0.006,
      "step": 2887340
    },
    {
      "epoch": 4.72522796750522,
      "grad_norm": 0.40605878829956055,
      "learning_rate": 5.531651324762953e-07,
      "loss": 0.0071,
      "step": 2887360
    },
    {
      "epoch": 4.725260697943874,
      "grad_norm": 0.08286780118942261,
      "learning_rate": 5.530992402627783e-07,
      "loss": 0.0079,
      "step": 2887380
    },
    {
      "epoch": 4.725293428382527,
      "grad_norm": 0.09495864063501358,
      "learning_rate": 5.530333480492611e-07,
      "loss": 0.0079,
      "step": 2887400
    },
    {
      "epoch": 4.725326158821181,
      "grad_norm": 0.13111193478107452,
      "learning_rate": 5.529674558357439e-07,
      "loss": 0.0087,
      "step": 2887420
    },
    {
      "epoch": 4.725358889259834,
      "grad_norm": 0.05162528157234192,
      "learning_rate": 5.529015636222268e-07,
      "loss": 0.0114,
      "step": 2887440
    },
    {
      "epoch": 4.725391619698487,
      "grad_norm": 0.08672823011875153,
      "learning_rate": 5.528356714087097e-07,
      "loss": 0.0076,
      "step": 2887460
    },
    {
      "epoch": 4.725424350137141,
      "grad_norm": 0.48149850964546204,
      "learning_rate": 5.527697791951926e-07,
      "loss": 0.0079,
      "step": 2887480
    },
    {
      "epoch": 4.725457080575794,
      "grad_norm": 0.4613944888114929,
      "learning_rate": 5.527038869816754e-07,
      "loss": 0.0075,
      "step": 2887500
    },
    {
      "epoch": 4.725489811014447,
      "grad_norm": 0.12438629567623138,
      "learning_rate": 5.526379947681583e-07,
      "loss": 0.012,
      "step": 2887520
    },
    {
      "epoch": 4.725522541453101,
      "grad_norm": 0.21650023758411407,
      "learning_rate": 5.525721025546411e-07,
      "loss": 0.0073,
      "step": 2887540
    },
    {
      "epoch": 4.7255552718917535,
      "grad_norm": 0.2176991105079651,
      "learning_rate": 5.525062103411241e-07,
      "loss": 0.0061,
      "step": 2887560
    },
    {
      "epoch": 4.725588002330407,
      "grad_norm": 0.22759932279586792,
      "learning_rate": 5.524403181276069e-07,
      "loss": 0.0057,
      "step": 2887580
    },
    {
      "epoch": 4.725620732769061,
      "grad_norm": 0.05628257617354393,
      "learning_rate": 5.523744259140898e-07,
      "loss": 0.0059,
      "step": 2887600
    },
    {
      "epoch": 4.725653463207714,
      "grad_norm": 0.3055567443370819,
      "learning_rate": 5.523085337005726e-07,
      "loss": 0.0073,
      "step": 2887620
    },
    {
      "epoch": 4.725686193646367,
      "grad_norm": 0.20666725933551788,
      "learning_rate": 5.522426414870556e-07,
      "loss": 0.0095,
      "step": 2887640
    },
    {
      "epoch": 4.725718924085021,
      "grad_norm": 0.3367808163166046,
      "learning_rate": 5.521767492735384e-07,
      "loss": 0.007,
      "step": 2887660
    },
    {
      "epoch": 4.725751654523674,
      "grad_norm": 0.3465708792209625,
      "learning_rate": 5.521108570600213e-07,
      "loss": 0.007,
      "step": 2887680
    },
    {
      "epoch": 4.725784384962327,
      "grad_norm": 0.029911883175373077,
      "learning_rate": 5.520449648465041e-07,
      "loss": 0.0104,
      "step": 2887700
    },
    {
      "epoch": 4.7258171154009805,
      "grad_norm": 0.21423786878585815,
      "learning_rate": 5.519790726329869e-07,
      "loss": 0.0042,
      "step": 2887720
    },
    {
      "epoch": 4.725849845839634,
      "grad_norm": 0.08661363273859024,
      "learning_rate": 5.519131804194699e-07,
      "loss": 0.0078,
      "step": 2887740
    },
    {
      "epoch": 4.725882576278288,
      "grad_norm": 0.27429288625717163,
      "learning_rate": 5.518472882059527e-07,
      "loss": 0.0095,
      "step": 2887760
    },
    {
      "epoch": 4.72591530671694,
      "grad_norm": 0.2339688539505005,
      "learning_rate": 5.517813959924356e-07,
      "loss": 0.0076,
      "step": 2887780
    },
    {
      "epoch": 4.725948037155594,
      "grad_norm": 0.13864561915397644,
      "learning_rate": 5.517155037789184e-07,
      "loss": 0.0062,
      "step": 2887800
    },
    {
      "epoch": 4.725980767594248,
      "grad_norm": 0.18431155383586884,
      "learning_rate": 5.516496115654014e-07,
      "loss": 0.0082,
      "step": 2887820
    },
    {
      "epoch": 4.7260134980329,
      "grad_norm": 0.2567521929740906,
      "learning_rate": 5.515837193518842e-07,
      "loss": 0.0158,
      "step": 2887840
    },
    {
      "epoch": 4.726046228471554,
      "grad_norm": 0.1491008698940277,
      "learning_rate": 5.515178271383671e-07,
      "loss": 0.006,
      "step": 2887860
    },
    {
      "epoch": 4.7260789589102075,
      "grad_norm": 0.21110938489437103,
      "learning_rate": 5.514519349248499e-07,
      "loss": 0.0074,
      "step": 2887880
    },
    {
      "epoch": 4.72611168934886,
      "grad_norm": 0.1130935400724411,
      "learning_rate": 5.513860427113329e-07,
      "loss": 0.007,
      "step": 2887900
    },
    {
      "epoch": 4.726144419787514,
      "grad_norm": 0.4488492012023926,
      "learning_rate": 5.513201504978157e-07,
      "loss": 0.0075,
      "step": 2887920
    },
    {
      "epoch": 4.726177150226167,
      "grad_norm": 0.6478412747383118,
      "learning_rate": 5.512542582842986e-07,
      "loss": 0.0075,
      "step": 2887940
    },
    {
      "epoch": 4.726209880664821,
      "grad_norm": 0.22958901524543762,
      "learning_rate": 5.511883660707815e-07,
      "loss": 0.0087,
      "step": 2887960
    },
    {
      "epoch": 4.726242611103474,
      "grad_norm": 0.07317330688238144,
      "learning_rate": 5.511224738572643e-07,
      "loss": 0.0123,
      "step": 2887980
    },
    {
      "epoch": 4.726275341542127,
      "grad_norm": 0.272874653339386,
      "learning_rate": 5.510565816437472e-07,
      "loss": 0.0063,
      "step": 2888000
    },
    {
      "epoch": 4.726308071980781,
      "grad_norm": 0.3259044587612152,
      "learning_rate": 5.509906894302301e-07,
      "loss": 0.0081,
      "step": 2888020
    },
    {
      "epoch": 4.726340802419434,
      "grad_norm": 0.2319961041212082,
      "learning_rate": 5.50924797216713e-07,
      "loss": 0.0115,
      "step": 2888040
    },
    {
      "epoch": 4.726373532858087,
      "grad_norm": 0.14403504133224487,
      "learning_rate": 5.508589050031959e-07,
      "loss": 0.0057,
      "step": 2888060
    },
    {
      "epoch": 4.726406263296741,
      "grad_norm": 0.49330389499664307,
      "learning_rate": 5.507930127896787e-07,
      "loss": 0.007,
      "step": 2888080
    },
    {
      "epoch": 4.726438993735394,
      "grad_norm": 0.043728768825531006,
      "learning_rate": 5.507271205761615e-07,
      "loss": 0.0131,
      "step": 2888100
    },
    {
      "epoch": 4.726471724174047,
      "grad_norm": 0.37395429611206055,
      "learning_rate": 5.506612283626445e-07,
      "loss": 0.0181,
      "step": 2888120
    },
    {
      "epoch": 4.726504454612701,
      "grad_norm": 0.10165441781282425,
      "learning_rate": 5.505953361491273e-07,
      "loss": 0.009,
      "step": 2888140
    },
    {
      "epoch": 4.726537185051354,
      "grad_norm": 0.13674622774124146,
      "learning_rate": 5.505294439356102e-07,
      "loss": 0.0043,
      "step": 2888160
    },
    {
      "epoch": 4.726569915490007,
      "grad_norm": 0.12530040740966797,
      "learning_rate": 5.50463551722093e-07,
      "loss": 0.0085,
      "step": 2888180
    },
    {
      "epoch": 4.7266026459286605,
      "grad_norm": 0.16883428394794464,
      "learning_rate": 5.503976595085759e-07,
      "loss": 0.0074,
      "step": 2888200
    },
    {
      "epoch": 4.726635376367314,
      "grad_norm": 0.4214889407157898,
      "learning_rate": 5.503317672950588e-07,
      "loss": 0.011,
      "step": 2888220
    },
    {
      "epoch": 4.726668106805968,
      "grad_norm": 0.1618986576795578,
      "learning_rate": 5.502658750815417e-07,
      "loss": 0.0074,
      "step": 2888240
    },
    {
      "epoch": 4.72670083724462,
      "grad_norm": 0.8580792546272278,
      "learning_rate": 5.501999828680245e-07,
      "loss": 0.0174,
      "step": 2888260
    },
    {
      "epoch": 4.726733567683274,
      "grad_norm": 0.42030054330825806,
      "learning_rate": 5.501340906545074e-07,
      "loss": 0.0073,
      "step": 2888280
    },
    {
      "epoch": 4.726766298121928,
      "grad_norm": 0.27326858043670654,
      "learning_rate": 5.500681984409903e-07,
      "loss": 0.0052,
      "step": 2888300
    },
    {
      "epoch": 4.726799028560581,
      "grad_norm": 0.10629221051931381,
      "learning_rate": 5.500023062274732e-07,
      "loss": 0.0094,
      "step": 2888320
    },
    {
      "epoch": 4.726831758999234,
      "grad_norm": 0.6817014217376709,
      "learning_rate": 5.49936414013956e-07,
      "loss": 0.0083,
      "step": 2888340
    },
    {
      "epoch": 4.7268644894378875,
      "grad_norm": 0.26039212942123413,
      "learning_rate": 5.498705218004389e-07,
      "loss": 0.0103,
      "step": 2888360
    },
    {
      "epoch": 4.726897219876541,
      "grad_norm": 0.22913599014282227,
      "learning_rate": 5.498046295869217e-07,
      "loss": 0.0073,
      "step": 2888380
    },
    {
      "epoch": 4.726929950315194,
      "grad_norm": 0.24758540093898773,
      "learning_rate": 5.497387373734047e-07,
      "loss": 0.0075,
      "step": 2888400
    },
    {
      "epoch": 4.726962680753847,
      "grad_norm": 0.3092201352119446,
      "learning_rate": 5.496728451598875e-07,
      "loss": 0.0105,
      "step": 2888420
    },
    {
      "epoch": 4.726995411192501,
      "grad_norm": 0.3445433974266052,
      "learning_rate": 5.496069529463703e-07,
      "loss": 0.0108,
      "step": 2888440
    },
    {
      "epoch": 4.727028141631154,
      "grad_norm": 0.1113368570804596,
      "learning_rate": 5.495410607328532e-07,
      "loss": 0.006,
      "step": 2888460
    },
    {
      "epoch": 4.727060872069807,
      "grad_norm": 0.19260956346988678,
      "learning_rate": 5.494751685193361e-07,
      "loss": 0.0064,
      "step": 2888480
    },
    {
      "epoch": 4.727093602508461,
      "grad_norm": 0.2547893524169922,
      "learning_rate": 5.49409276305819e-07,
      "loss": 0.0064,
      "step": 2888500
    },
    {
      "epoch": 4.7271263329471145,
      "grad_norm": 0.2544797360897064,
      "learning_rate": 5.493433840923018e-07,
      "loss": 0.0071,
      "step": 2888520
    },
    {
      "epoch": 4.727159063385767,
      "grad_norm": 0.345589280128479,
      "learning_rate": 5.492774918787847e-07,
      "loss": 0.0118,
      "step": 2888540
    },
    {
      "epoch": 4.727191793824421,
      "grad_norm": 0.28636011481285095,
      "learning_rate": 5.492115996652676e-07,
      "loss": 0.0065,
      "step": 2888560
    },
    {
      "epoch": 4.727224524263074,
      "grad_norm": 0.1533733755350113,
      "learning_rate": 5.491457074517505e-07,
      "loss": 0.0105,
      "step": 2888580
    },
    {
      "epoch": 4.727257254701728,
      "grad_norm": 0.1438675969839096,
      "learning_rate": 5.490798152382333e-07,
      "loss": 0.009,
      "step": 2888600
    },
    {
      "epoch": 4.727289985140381,
      "grad_norm": 0.43250495195388794,
      "learning_rate": 5.490139230247163e-07,
      "loss": 0.0092,
      "step": 2888620
    },
    {
      "epoch": 4.727322715579034,
      "grad_norm": 0.30646175146102905,
      "learning_rate": 5.48948030811199e-07,
      "loss": 0.0119,
      "step": 2888640
    },
    {
      "epoch": 4.727355446017688,
      "grad_norm": 0.21379372477531433,
      "learning_rate": 5.48882138597682e-07,
      "loss": 0.0091,
      "step": 2888660
    },
    {
      "epoch": 4.727388176456341,
      "grad_norm": 0.15738148987293243,
      "learning_rate": 5.488162463841648e-07,
      "loss": 0.0111,
      "step": 2888680
    },
    {
      "epoch": 4.727420906894994,
      "grad_norm": 0.30787941813468933,
      "learning_rate": 5.487503541706478e-07,
      "loss": 0.0114,
      "step": 2888700
    },
    {
      "epoch": 4.727453637333648,
      "grad_norm": 0.07639873772859573,
      "learning_rate": 5.486844619571306e-07,
      "loss": 0.0094,
      "step": 2888720
    },
    {
      "epoch": 4.7274863677723005,
      "grad_norm": 0.11208552867174149,
      "learning_rate": 5.486185697436134e-07,
      "loss": 0.0081,
      "step": 2888740
    },
    {
      "epoch": 4.727519098210954,
      "grad_norm": 0.269589364528656,
      "learning_rate": 5.485526775300963e-07,
      "loss": 0.0097,
      "step": 2888760
    },
    {
      "epoch": 4.727551828649608,
      "grad_norm": 0.31097695231437683,
      "learning_rate": 5.484867853165792e-07,
      "loss": 0.0067,
      "step": 2888780
    },
    {
      "epoch": 4.727584559088261,
      "grad_norm": 0.4663618206977844,
      "learning_rate": 5.484208931030621e-07,
      "loss": 0.0101,
      "step": 2888800
    },
    {
      "epoch": 4.727617289526914,
      "grad_norm": 0.2088467925786972,
      "learning_rate": 5.483550008895449e-07,
      "loss": 0.0093,
      "step": 2888820
    },
    {
      "epoch": 4.7276500199655676,
      "grad_norm": 0.22928547859191895,
      "learning_rate": 5.482891086760278e-07,
      "loss": 0.0105,
      "step": 2888840
    },
    {
      "epoch": 4.727682750404221,
      "grad_norm": 0.6951708197593689,
      "learning_rate": 5.482232164625106e-07,
      "loss": 0.0121,
      "step": 2888860
    },
    {
      "epoch": 4.727715480842875,
      "grad_norm": 0.13283362984657288,
      "learning_rate": 5.481573242489936e-07,
      "loss": 0.0094,
      "step": 2888880
    },
    {
      "epoch": 4.7277482112815274,
      "grad_norm": 0.24378471076488495,
      "learning_rate": 5.480914320354764e-07,
      "loss": 0.0053,
      "step": 2888900
    },
    {
      "epoch": 4.727780941720181,
      "grad_norm": 0.11043231189250946,
      "learning_rate": 5.480255398219593e-07,
      "loss": 0.0091,
      "step": 2888920
    },
    {
      "epoch": 4.727813672158835,
      "grad_norm": 0.16441987454891205,
      "learning_rate": 5.479596476084421e-07,
      "loss": 0.0103,
      "step": 2888940
    },
    {
      "epoch": 4.727846402597487,
      "grad_norm": 0.12202922999858856,
      "learning_rate": 5.478937553949251e-07,
      "loss": 0.0047,
      "step": 2888960
    },
    {
      "epoch": 4.727879133036141,
      "grad_norm": 0.10042107850313187,
      "learning_rate": 5.478278631814079e-07,
      "loss": 0.0114,
      "step": 2888980
    },
    {
      "epoch": 4.7279118634747945,
      "grad_norm": 0.08015210181474686,
      "learning_rate": 5.477619709678908e-07,
      "loss": 0.009,
      "step": 2889000
    },
    {
      "epoch": 4.727944593913447,
      "grad_norm": 0.09613878279924393,
      "learning_rate": 5.476960787543736e-07,
      "loss": 0.0094,
      "step": 2889020
    },
    {
      "epoch": 4.727977324352101,
      "grad_norm": 0.3328718841075897,
      "learning_rate": 5.476301865408565e-07,
      "loss": 0.0127,
      "step": 2889040
    },
    {
      "epoch": 4.728010054790754,
      "grad_norm": 0.13995465636253357,
      "learning_rate": 5.475642943273394e-07,
      "loss": 0.0098,
      "step": 2889060
    },
    {
      "epoch": 4.728042785229408,
      "grad_norm": 0.1443740725517273,
      "learning_rate": 5.474984021138222e-07,
      "loss": 0.0088,
      "step": 2889080
    },
    {
      "epoch": 4.728075515668061,
      "grad_norm": 0.10912521183490753,
      "learning_rate": 5.474325099003051e-07,
      "loss": 0.0073,
      "step": 2889100
    },
    {
      "epoch": 4.728108246106714,
      "grad_norm": 0.15131628513336182,
      "learning_rate": 5.473666176867879e-07,
      "loss": 0.0119,
      "step": 2889120
    },
    {
      "epoch": 4.728140976545368,
      "grad_norm": 0.21596521139144897,
      "learning_rate": 5.473007254732709e-07,
      "loss": 0.0093,
      "step": 2889140
    },
    {
      "epoch": 4.728173706984021,
      "grad_norm": 0.10243608802556992,
      "learning_rate": 5.472348332597537e-07,
      "loss": 0.0098,
      "step": 2889160
    },
    {
      "epoch": 4.728206437422674,
      "grad_norm": 0.3263906240463257,
      "learning_rate": 5.471689410462366e-07,
      "loss": 0.0108,
      "step": 2889180
    },
    {
      "epoch": 4.728239167861328,
      "grad_norm": 0.31159356236457825,
      "learning_rate": 5.471030488327194e-07,
      "loss": 0.0068,
      "step": 2889200
    },
    {
      "epoch": 4.728271898299981,
      "grad_norm": 0.8639858961105347,
      "learning_rate": 5.470371566192024e-07,
      "loss": 0.0129,
      "step": 2889220
    },
    {
      "epoch": 4.728304628738634,
      "grad_norm": 0.5118829011917114,
      "learning_rate": 5.469712644056852e-07,
      "loss": 0.0108,
      "step": 2889240
    },
    {
      "epoch": 4.728337359177288,
      "grad_norm": 0.17523066699504852,
      "learning_rate": 5.469053721921681e-07,
      "loss": 0.0075,
      "step": 2889260
    },
    {
      "epoch": 4.728370089615941,
      "grad_norm": 0.24343201518058777,
      "learning_rate": 5.46839479978651e-07,
      "loss": 0.0126,
      "step": 2889280
    },
    {
      "epoch": 4.728402820054594,
      "grad_norm": 0.5956030488014221,
      "learning_rate": 5.467735877651338e-07,
      "loss": 0.011,
      "step": 2889300
    },
    {
      "epoch": 4.728435550493248,
      "grad_norm": 0.11400413513183594,
      "learning_rate": 5.467076955516167e-07,
      "loss": 0.0115,
      "step": 2889320
    },
    {
      "epoch": 4.728468280931901,
      "grad_norm": 0.3205644190311432,
      "learning_rate": 5.466418033380996e-07,
      "loss": 0.0087,
      "step": 2889340
    },
    {
      "epoch": 4.728501011370554,
      "grad_norm": 0.13118980824947357,
      "learning_rate": 5.465759111245825e-07,
      "loss": 0.0075,
      "step": 2889360
    },
    {
      "epoch": 4.7285337418092075,
      "grad_norm": 0.7806079387664795,
      "learning_rate": 5.465100189110654e-07,
      "loss": 0.0071,
      "step": 2889380
    },
    {
      "epoch": 4.728566472247861,
      "grad_norm": 0.23088853061199188,
      "learning_rate": 5.464441266975482e-07,
      "loss": 0.0093,
      "step": 2889400
    },
    {
      "epoch": 4.728599202686515,
      "grad_norm": 0.2673187255859375,
      "learning_rate": 5.46378234484031e-07,
      "loss": 0.0116,
      "step": 2889420
    },
    {
      "epoch": 4.728631933125167,
      "grad_norm": 0.17410004138946533,
      "learning_rate": 5.46312342270514e-07,
      "loss": 0.0093,
      "step": 2889440
    },
    {
      "epoch": 4.728664663563821,
      "grad_norm": 0.2864301800727844,
      "learning_rate": 5.462464500569968e-07,
      "loss": 0.0085,
      "step": 2889460
    },
    {
      "epoch": 4.728697394002475,
      "grad_norm": 0.3847077190876007,
      "learning_rate": 5.461805578434797e-07,
      "loss": 0.0072,
      "step": 2889480
    },
    {
      "epoch": 4.728730124441128,
      "grad_norm": 0.27267852425575256,
      "learning_rate": 5.461146656299625e-07,
      "loss": 0.01,
      "step": 2889500
    },
    {
      "epoch": 4.728762854879781,
      "grad_norm": 0.24557161331176758,
      "learning_rate": 5.460487734164454e-07,
      "loss": 0.0096,
      "step": 2889520
    },
    {
      "epoch": 4.7287955853184345,
      "grad_norm": 0.3944295346736908,
      "learning_rate": 5.459828812029283e-07,
      "loss": 0.0109,
      "step": 2889540
    },
    {
      "epoch": 4.728828315757088,
      "grad_norm": 0.3697099983692169,
      "learning_rate": 5.459169889894112e-07,
      "loss": 0.0064,
      "step": 2889560
    },
    {
      "epoch": 4.728861046195741,
      "grad_norm": 0.1681239753961563,
      "learning_rate": 5.45851096775894e-07,
      "loss": 0.0107,
      "step": 2889580
    },
    {
      "epoch": 4.728893776634394,
      "grad_norm": 0.34619736671447754,
      "learning_rate": 5.457852045623769e-07,
      "loss": 0.0062,
      "step": 2889600
    },
    {
      "epoch": 4.728926507073048,
      "grad_norm": 0.051951486617326736,
      "learning_rate": 5.457193123488598e-07,
      "loss": 0.007,
      "step": 2889620
    },
    {
      "epoch": 4.728959237511701,
      "grad_norm": 0.19699372351169586,
      "learning_rate": 5.456534201353427e-07,
      "loss": 0.0102,
      "step": 2889640
    },
    {
      "epoch": 4.728991967950354,
      "grad_norm": 0.23719756305217743,
      "learning_rate": 5.455875279218255e-07,
      "loss": 0.0143,
      "step": 2889660
    },
    {
      "epoch": 4.729024698389008,
      "grad_norm": 0.21840240061283112,
      "learning_rate": 5.455216357083084e-07,
      "loss": 0.0103,
      "step": 2889680
    },
    {
      "epoch": 4.7290574288276614,
      "grad_norm": 0.2918063998222351,
      "learning_rate": 5.454557434947912e-07,
      "loss": 0.0088,
      "step": 2889700
    },
    {
      "epoch": 4.729090159266314,
      "grad_norm": 0.1313139647245407,
      "learning_rate": 5.453898512812742e-07,
      "loss": 0.0088,
      "step": 2889720
    },
    {
      "epoch": 4.729122889704968,
      "grad_norm": 0.12530522048473358,
      "learning_rate": 5.45323959067757e-07,
      "loss": 0.0086,
      "step": 2889740
    },
    {
      "epoch": 4.729155620143621,
      "grad_norm": 0.0530252680182457,
      "learning_rate": 5.452580668542398e-07,
      "loss": 0.006,
      "step": 2889760
    },
    {
      "epoch": 4.729188350582275,
      "grad_norm": 0.36554116010665894,
      "learning_rate": 5.451921746407227e-07,
      "loss": 0.0076,
      "step": 2889780
    },
    {
      "epoch": 4.729221081020928,
      "grad_norm": 0.681560218334198,
      "learning_rate": 5.451262824272056e-07,
      "loss": 0.0101,
      "step": 2889800
    },
    {
      "epoch": 4.729253811459581,
      "grad_norm": 0.1921055018901825,
      "learning_rate": 5.450603902136885e-07,
      "loss": 0.0074,
      "step": 2889820
    },
    {
      "epoch": 4.729286541898235,
      "grad_norm": 0.16013380885124207,
      "learning_rate": 5.449944980001713e-07,
      "loss": 0.0119,
      "step": 2889840
    },
    {
      "epoch": 4.7293192723368875,
      "grad_norm": 0.20858493447303772,
      "learning_rate": 5.449286057866542e-07,
      "loss": 0.0092,
      "step": 2889860
    },
    {
      "epoch": 4.729352002775541,
      "grad_norm": 0.1123705729842186,
      "learning_rate": 5.448627135731371e-07,
      "loss": 0.0103,
      "step": 2889880
    },
    {
      "epoch": 4.729384733214195,
      "grad_norm": 0.23400941491127014,
      "learning_rate": 5.4479682135962e-07,
      "loss": 0.0097,
      "step": 2889900
    },
    {
      "epoch": 4.729417463652847,
      "grad_norm": 0.3108063042163849,
      "learning_rate": 5.447309291461028e-07,
      "loss": 0.0054,
      "step": 2889920
    },
    {
      "epoch": 4.729450194091501,
      "grad_norm": 0.16607044637203217,
      "learning_rate": 5.446650369325858e-07,
      "loss": 0.0079,
      "step": 2889940
    },
    {
      "epoch": 4.729482924530155,
      "grad_norm": 0.28260114789009094,
      "learning_rate": 5.445991447190685e-07,
      "loss": 0.012,
      "step": 2889960
    },
    {
      "epoch": 4.729515654968808,
      "grad_norm": 0.42954063415527344,
      "learning_rate": 5.445332525055515e-07,
      "loss": 0.0066,
      "step": 2889980
    },
    {
      "epoch": 4.729548385407461,
      "grad_norm": 0.49181896448135376,
      "learning_rate": 5.444673602920343e-07,
      "loss": 0.0136,
      "step": 2890000
    },
    {
      "epoch": 4.7295811158461145,
      "grad_norm": 0.1718907207250595,
      "learning_rate": 5.444014680785173e-07,
      "loss": 0.0058,
      "step": 2890020
    },
    {
      "epoch": 4.729613846284768,
      "grad_norm": 0.6355220675468445,
      "learning_rate": 5.443355758650001e-07,
      "loss": 0.0108,
      "step": 2890040
    },
    {
      "epoch": 4.729646576723422,
      "grad_norm": 0.038942981511354446,
      "learning_rate": 5.44269683651483e-07,
      "loss": 0.0067,
      "step": 2890060
    },
    {
      "epoch": 4.729679307162074,
      "grad_norm": 0.1957528442144394,
      "learning_rate": 5.442037914379658e-07,
      "loss": 0.0071,
      "step": 2890080
    },
    {
      "epoch": 4.729712037600728,
      "grad_norm": 0.1642007976770401,
      "learning_rate": 5.441378992244488e-07,
      "loss": 0.0107,
      "step": 2890100
    },
    {
      "epoch": 4.729744768039382,
      "grad_norm": 0.10922002792358398,
      "learning_rate": 5.440720070109316e-07,
      "loss": 0.0116,
      "step": 2890120
    },
    {
      "epoch": 4.729777498478034,
      "grad_norm": 0.14190171658992767,
      "learning_rate": 5.440061147974144e-07,
      "loss": 0.0084,
      "step": 2890140
    },
    {
      "epoch": 4.729810228916688,
      "grad_norm": 0.31047746539115906,
      "learning_rate": 5.439402225838973e-07,
      "loss": 0.0059,
      "step": 2890160
    },
    {
      "epoch": 4.7298429593553415,
      "grad_norm": 0.16121543943881989,
      "learning_rate": 5.438743303703801e-07,
      "loss": 0.009,
      "step": 2890180
    },
    {
      "epoch": 4.729875689793994,
      "grad_norm": 0.16530947387218475,
      "learning_rate": 5.438084381568631e-07,
      "loss": 0.0091,
      "step": 2890200
    },
    {
      "epoch": 4.729908420232648,
      "grad_norm": 0.13842816650867462,
      "learning_rate": 5.437425459433459e-07,
      "loss": 0.0097,
      "step": 2890220
    },
    {
      "epoch": 4.729941150671301,
      "grad_norm": 0.0846894159913063,
      "learning_rate": 5.436766537298288e-07,
      "loss": 0.0054,
      "step": 2890240
    },
    {
      "epoch": 4.729973881109955,
      "grad_norm": 0.28156226873397827,
      "learning_rate": 5.436107615163116e-07,
      "loss": 0.0071,
      "step": 2890260
    },
    {
      "epoch": 4.730006611548608,
      "grad_norm": 0.3067091703414917,
      "learning_rate": 5.435448693027946e-07,
      "loss": 0.0083,
      "step": 2890280
    },
    {
      "epoch": 4.730039341987261,
      "grad_norm": 0.187666118144989,
      "learning_rate": 5.434789770892774e-07,
      "loss": 0.0101,
      "step": 2890300
    },
    {
      "epoch": 4.730072072425915,
      "grad_norm": 0.12626013159751892,
      "learning_rate": 5.434130848757603e-07,
      "loss": 0.0136,
      "step": 2890320
    },
    {
      "epoch": 4.7301048028645685,
      "grad_norm": 0.20487543940544128,
      "learning_rate": 5.433471926622431e-07,
      "loss": 0.007,
      "step": 2890340
    },
    {
      "epoch": 4.730137533303221,
      "grad_norm": 0.17368432879447937,
      "learning_rate": 5.43281300448726e-07,
      "loss": 0.0114,
      "step": 2890360
    },
    {
      "epoch": 4.730170263741875,
      "grad_norm": 0.2000264674425125,
      "learning_rate": 5.432154082352089e-07,
      "loss": 0.0072,
      "step": 2890380
    },
    {
      "epoch": 4.730202994180528,
      "grad_norm": 0.4471757411956787,
      "learning_rate": 5.431495160216918e-07,
      "loss": 0.0136,
      "step": 2890400
    },
    {
      "epoch": 4.730235724619181,
      "grad_norm": 0.3090949058532715,
      "learning_rate": 5.430836238081746e-07,
      "loss": 0.0092,
      "step": 2890420
    },
    {
      "epoch": 4.730268455057835,
      "grad_norm": 0.1923859417438507,
      "learning_rate": 5.430177315946574e-07,
      "loss": 0.0121,
      "step": 2890440
    },
    {
      "epoch": 4.730301185496488,
      "grad_norm": 0.43409183621406555,
      "learning_rate": 5.429518393811404e-07,
      "loss": 0.008,
      "step": 2890460
    },
    {
      "epoch": 4.730333915935141,
      "grad_norm": 0.23062875866889954,
      "learning_rate": 5.428859471676232e-07,
      "loss": 0.0103,
      "step": 2890480
    },
    {
      "epoch": 4.730366646373795,
      "grad_norm": 0.05963776633143425,
      "learning_rate": 5.428200549541061e-07,
      "loss": 0.0063,
      "step": 2890500
    },
    {
      "epoch": 4.730399376812448,
      "grad_norm": 0.10610140115022659,
      "learning_rate": 5.427541627405889e-07,
      "loss": 0.0104,
      "step": 2890520
    },
    {
      "epoch": 4.730432107251102,
      "grad_norm": 0.16944536566734314,
      "learning_rate": 5.426882705270719e-07,
      "loss": 0.0058,
      "step": 2890540
    },
    {
      "epoch": 4.7304648376897545,
      "grad_norm": 0.24113450944423676,
      "learning_rate": 5.426223783135547e-07,
      "loss": 0.0077,
      "step": 2890560
    },
    {
      "epoch": 4.730497568128408,
      "grad_norm": 0.16434191167354584,
      "learning_rate": 5.425564861000376e-07,
      "loss": 0.0089,
      "step": 2890580
    },
    {
      "epoch": 4.730530298567062,
      "grad_norm": 0.3040705919265747,
      "learning_rate": 5.424905938865205e-07,
      "loss": 0.0105,
      "step": 2890600
    },
    {
      "epoch": 4.730563029005714,
      "grad_norm": 0.13449445366859436,
      "learning_rate": 5.424247016730033e-07,
      "loss": 0.0092,
      "step": 2890620
    },
    {
      "epoch": 4.730595759444368,
      "grad_norm": 0.13416655361652374,
      "learning_rate": 5.423588094594862e-07,
      "loss": 0.019,
      "step": 2890640
    },
    {
      "epoch": 4.7306284898830215,
      "grad_norm": 0.21205811202526093,
      "learning_rate": 5.422929172459691e-07,
      "loss": 0.0071,
      "step": 2890660
    },
    {
      "epoch": 4.730661220321675,
      "grad_norm": 0.049915604293346405,
      "learning_rate": 5.42227025032452e-07,
      "loss": 0.0106,
      "step": 2890680
    },
    {
      "epoch": 4.730693950760328,
      "grad_norm": 0.34115129709243774,
      "learning_rate": 5.421611328189349e-07,
      "loss": 0.0126,
      "step": 2890700
    },
    {
      "epoch": 4.730726681198981,
      "grad_norm": 0.10367700457572937,
      "learning_rate": 5.420952406054177e-07,
      "loss": 0.0146,
      "step": 2890720
    },
    {
      "epoch": 4.730759411637635,
      "grad_norm": 0.4608266055583954,
      "learning_rate": 5.420293483919006e-07,
      "loss": 0.0093,
      "step": 2890740
    },
    {
      "epoch": 4.730792142076288,
      "grad_norm": 0.11873514205217361,
      "learning_rate": 5.419634561783835e-07,
      "loss": 0.0074,
      "step": 2890760
    },
    {
      "epoch": 4.730824872514941,
      "grad_norm": 0.4425317049026489,
      "learning_rate": 5.418975639648664e-07,
      "loss": 0.005,
      "step": 2890780
    },
    {
      "epoch": 4.730857602953595,
      "grad_norm": 0.0593758150935173,
      "learning_rate": 5.418316717513492e-07,
      "loss": 0.0073,
      "step": 2890800
    },
    {
      "epoch": 4.730890333392248,
      "grad_norm": 0.1469401717185974,
      "learning_rate": 5.41765779537832e-07,
      "loss": 0.0063,
      "step": 2890820
    },
    {
      "epoch": 4.730923063830901,
      "grad_norm": 0.28412994742393494,
      "learning_rate": 5.416998873243149e-07,
      "loss": 0.0138,
      "step": 2890840
    },
    {
      "epoch": 4.730955794269555,
      "grad_norm": 0.2246505618095398,
      "learning_rate": 5.416339951107978e-07,
      "loss": 0.0079,
      "step": 2890860
    },
    {
      "epoch": 4.730988524708208,
      "grad_norm": 0.3037707209587097,
      "learning_rate": 5.415681028972807e-07,
      "loss": 0.0112,
      "step": 2890880
    },
    {
      "epoch": 4.731021255146861,
      "grad_norm": 0.6620509028434753,
      "learning_rate": 5.415022106837635e-07,
      "loss": 0.0098,
      "step": 2890900
    },
    {
      "epoch": 4.731053985585515,
      "grad_norm": 0.25594228506088257,
      "learning_rate": 5.414363184702464e-07,
      "loss": 0.0106,
      "step": 2890920
    },
    {
      "epoch": 4.731086716024168,
      "grad_norm": 0.16385532915592194,
      "learning_rate": 5.413704262567293e-07,
      "loss": 0.0089,
      "step": 2890940
    },
    {
      "epoch": 4.731119446462822,
      "grad_norm": 0.05599859729409218,
      "learning_rate": 5.413045340432122e-07,
      "loss": 0.0103,
      "step": 2890960
    },
    {
      "epoch": 4.731152176901475,
      "grad_norm": 0.4008377194404602,
      "learning_rate": 5.41238641829695e-07,
      "loss": 0.0087,
      "step": 2890980
    },
    {
      "epoch": 4.731184907340128,
      "grad_norm": 0.4071158170700073,
      "learning_rate": 5.411727496161779e-07,
      "loss": 0.0126,
      "step": 2891000
    },
    {
      "epoch": 4.731217637778782,
      "grad_norm": 0.19693833589553833,
      "learning_rate": 5.411068574026607e-07,
      "loss": 0.01,
      "step": 2891020
    },
    {
      "epoch": 4.7312503682174345,
      "grad_norm": 0.06903964281082153,
      "learning_rate": 5.410409651891437e-07,
      "loss": 0.0111,
      "step": 2891040
    },
    {
      "epoch": 4.731283098656088,
      "grad_norm": 0.4825596511363983,
      "learning_rate": 5.409750729756265e-07,
      "loss": 0.012,
      "step": 2891060
    },
    {
      "epoch": 4.731315829094742,
      "grad_norm": 0.08525335043668747,
      "learning_rate": 5.409091807621094e-07,
      "loss": 0.0092,
      "step": 2891080
    },
    {
      "epoch": 4.731348559533394,
      "grad_norm": 0.22923986613750458,
      "learning_rate": 5.408432885485922e-07,
      "loss": 0.007,
      "step": 2891100
    },
    {
      "epoch": 4.731381289972048,
      "grad_norm": 0.07322406023740768,
      "learning_rate": 5.407773963350751e-07,
      "loss": 0.0081,
      "step": 2891120
    },
    {
      "epoch": 4.731414020410702,
      "grad_norm": 0.20957456529140472,
      "learning_rate": 5.40711504121558e-07,
      "loss": 0.0102,
      "step": 2891140
    },
    {
      "epoch": 4.731446750849355,
      "grad_norm": 0.37718915939331055,
      "learning_rate": 5.406456119080408e-07,
      "loss": 0.007,
      "step": 2891160
    },
    {
      "epoch": 4.731479481288008,
      "grad_norm": 0.2819029688835144,
      "learning_rate": 5.405797196945237e-07,
      "loss": 0.0113,
      "step": 2891180
    },
    {
      "epoch": 4.7315122117266615,
      "grad_norm": 0.1927233338356018,
      "learning_rate": 5.405138274810066e-07,
      "loss": 0.0063,
      "step": 2891200
    },
    {
      "epoch": 4.731544942165315,
      "grad_norm": 0.158366397023201,
      "learning_rate": 5.404479352674895e-07,
      "loss": 0.0093,
      "step": 2891220
    },
    {
      "epoch": 4.731577672603969,
      "grad_norm": 0.03647337853908539,
      "learning_rate": 5.403820430539723e-07,
      "loss": 0.0066,
      "step": 2891240
    },
    {
      "epoch": 4.731610403042621,
      "grad_norm": 0.3466925621032715,
      "learning_rate": 5.403161508404553e-07,
      "loss": 0.0049,
      "step": 2891260
    },
    {
      "epoch": 4.731643133481275,
      "grad_norm": 0.1815386563539505,
      "learning_rate": 5.40250258626938e-07,
      "loss": 0.0088,
      "step": 2891280
    },
    {
      "epoch": 4.731675863919929,
      "grad_norm": 0.5533978939056396,
      "learning_rate": 5.40184366413421e-07,
      "loss": 0.0088,
      "step": 2891300
    },
    {
      "epoch": 4.731708594358581,
      "grad_norm": Infinity,
      "learning_rate": 5.401184741999038e-07,
      "loss": 0.011,
      "step": 2891320
    },
    {
      "epoch": 4.731741324797235,
      "grad_norm": 0.4392642378807068,
      "learning_rate": 5.400525819863868e-07,
      "loss": 0.009,
      "step": 2891340
    },
    {
      "epoch": 4.7317740552358885,
      "grad_norm": 0.3996480703353882,
      "learning_rate": 5.399866897728696e-07,
      "loss": 0.0076,
      "step": 2891360
    },
    {
      "epoch": 4.731806785674541,
      "grad_norm": 0.22241884469985962,
      "learning_rate": 5.399207975593525e-07,
      "loss": 0.0103,
      "step": 2891380
    },
    {
      "epoch": 4.731839516113195,
      "grad_norm": 0.31463971734046936,
      "learning_rate": 5.398549053458353e-07,
      "loss": 0.0083,
      "step": 2891400
    },
    {
      "epoch": 4.731872246551848,
      "grad_norm": 0.17641933262348175,
      "learning_rate": 5.397890131323183e-07,
      "loss": 0.0102,
      "step": 2891420
    },
    {
      "epoch": 4.731904976990502,
      "grad_norm": 0.34159913659095764,
      "learning_rate": 5.397231209188011e-07,
      "loss": 0.0089,
      "step": 2891440
    },
    {
      "epoch": 4.731937707429155,
      "grad_norm": 1.0561144351959229,
      "learning_rate": 5.39657228705284e-07,
      "loss": 0.0097,
      "step": 2891460
    },
    {
      "epoch": 4.731970437867808,
      "grad_norm": 0.11808066815137863,
      "learning_rate": 5.395913364917668e-07,
      "loss": 0.009,
      "step": 2891480
    },
    {
      "epoch": 4.732003168306462,
      "grad_norm": 0.22333437204360962,
      "learning_rate": 5.395254442782496e-07,
      "loss": 0.0086,
      "step": 2891500
    },
    {
      "epoch": 4.732035898745115,
      "grad_norm": 0.0751795619726181,
      "learning_rate": 5.394595520647326e-07,
      "loss": 0.0087,
      "step": 2891520
    },
    {
      "epoch": 4.732068629183768,
      "grad_norm": 0.37947842478752136,
      "learning_rate": 5.393936598512154e-07,
      "loss": 0.0092,
      "step": 2891540
    },
    {
      "epoch": 4.732101359622422,
      "grad_norm": 0.07346262037754059,
      "learning_rate": 5.393277676376983e-07,
      "loss": 0.0104,
      "step": 2891560
    },
    {
      "epoch": 4.732134090061075,
      "grad_norm": 0.1040198877453804,
      "learning_rate": 5.392618754241811e-07,
      "loss": 0.0075,
      "step": 2891580
    },
    {
      "epoch": 4.732166820499728,
      "grad_norm": 0.1250365525484085,
      "learning_rate": 5.391959832106641e-07,
      "loss": 0.0078,
      "step": 2891600
    },
    {
      "epoch": 4.732199550938382,
      "grad_norm": 0.08267715573310852,
      "learning_rate": 5.391300909971469e-07,
      "loss": 0.0066,
      "step": 2891620
    },
    {
      "epoch": 4.732232281377035,
      "grad_norm": 0.22676807641983032,
      "learning_rate": 5.390641987836298e-07,
      "loss": 0.0076,
      "step": 2891640
    },
    {
      "epoch": 4.732265011815688,
      "grad_norm": 0.22894994914531708,
      "learning_rate": 5.389983065701126e-07,
      "loss": 0.0073,
      "step": 2891660
    },
    {
      "epoch": 4.7322977422543415,
      "grad_norm": 0.9769931435585022,
      "learning_rate": 5.389324143565956e-07,
      "loss": 0.0099,
      "step": 2891680
    },
    {
      "epoch": 4.732330472692995,
      "grad_norm": 0.2947225570678711,
      "learning_rate": 5.388665221430784e-07,
      "loss": 0.0102,
      "step": 2891700
    },
    {
      "epoch": 4.732363203131649,
      "grad_norm": 0.2689589560031891,
      "learning_rate": 5.388006299295613e-07,
      "loss": 0.0164,
      "step": 2891720
    },
    {
      "epoch": 4.732395933570301,
      "grad_norm": 0.24326921999454498,
      "learning_rate": 5.387347377160441e-07,
      "loss": 0.0076,
      "step": 2891740
    },
    {
      "epoch": 4.732428664008955,
      "grad_norm": 0.6474157571792603,
      "learning_rate": 5.38668845502527e-07,
      "loss": 0.0071,
      "step": 2891760
    },
    {
      "epoch": 4.732461394447609,
      "grad_norm": 0.34340575337409973,
      "learning_rate": 5.386029532890099e-07,
      "loss": 0.0105,
      "step": 2891780
    },
    {
      "epoch": 4.732494124886262,
      "grad_norm": 0.2876526713371277,
      "learning_rate": 5.385370610754927e-07,
      "loss": 0.0089,
      "step": 2891800
    },
    {
      "epoch": 4.732526855324915,
      "grad_norm": 0.22597141563892365,
      "learning_rate": 5.384711688619757e-07,
      "loss": 0.0087,
      "step": 2891820
    },
    {
      "epoch": 4.7325595857635685,
      "grad_norm": 0.4054345190525055,
      "learning_rate": 5.384052766484584e-07,
      "loss": 0.011,
      "step": 2891840
    },
    {
      "epoch": 4.732592316202222,
      "grad_norm": 0.20734195411205292,
      "learning_rate": 5.383393844349414e-07,
      "loss": 0.0122,
      "step": 2891860
    },
    {
      "epoch": 4.732625046640875,
      "grad_norm": 0.06677156686782837,
      "learning_rate": 5.382734922214242e-07,
      "loss": 0.0105,
      "step": 2891880
    },
    {
      "epoch": 4.732657777079528,
      "grad_norm": 0.06664425879716873,
      "learning_rate": 5.382076000079072e-07,
      "loss": 0.0065,
      "step": 2891900
    },
    {
      "epoch": 4.732690507518182,
      "grad_norm": 0.24412134289741516,
      "learning_rate": 5.3814170779439e-07,
      "loss": 0.0087,
      "step": 2891920
    },
    {
      "epoch": 4.732723237956835,
      "grad_norm": 0.21343643963336945,
      "learning_rate": 5.380758155808728e-07,
      "loss": 0.0088,
      "step": 2891940
    },
    {
      "epoch": 4.732755968395488,
      "grad_norm": 0.2390671968460083,
      "learning_rate": 5.380099233673557e-07,
      "loss": 0.009,
      "step": 2891960
    },
    {
      "epoch": 4.732788698834142,
      "grad_norm": 0.23015670478343964,
      "learning_rate": 5.379440311538386e-07,
      "loss": 0.0095,
      "step": 2891980
    },
    {
      "epoch": 4.7328214292727955,
      "grad_norm": 0.2182329297065735,
      "learning_rate": 5.378781389403215e-07,
      "loss": 0.0082,
      "step": 2892000
    },
    {
      "epoch": 4.732854159711448,
      "grad_norm": 0.12623877823352814,
      "learning_rate": 5.378122467268044e-07,
      "loss": 0.0095,
      "step": 2892020
    },
    {
      "epoch": 4.732886890150102,
      "grad_norm": 0.25119614601135254,
      "learning_rate": 5.377463545132872e-07,
      "loss": 0.0085,
      "step": 2892040
    },
    {
      "epoch": 4.732919620588755,
      "grad_norm": 0.1651189774274826,
      "learning_rate": 5.376804622997701e-07,
      "loss": 0.0072,
      "step": 2892060
    },
    {
      "epoch": 4.732952351027409,
      "grad_norm": 0.10078022629022598,
      "learning_rate": 5.37614570086253e-07,
      "loss": 0.0075,
      "step": 2892080
    },
    {
      "epoch": 4.732985081466062,
      "grad_norm": 0.10003644227981567,
      "learning_rate": 5.375486778727359e-07,
      "loss": 0.0082,
      "step": 2892100
    },
    {
      "epoch": 4.733017811904715,
      "grad_norm": 0.3965168595314026,
      "learning_rate": 5.374827856592187e-07,
      "loss": 0.0098,
      "step": 2892120
    },
    {
      "epoch": 4.733050542343369,
      "grad_norm": 0.19947455823421478,
      "learning_rate": 5.374168934457015e-07,
      "loss": 0.0093,
      "step": 2892140
    },
    {
      "epoch": 4.733083272782022,
      "grad_norm": 0.5440276861190796,
      "learning_rate": 5.373510012321844e-07,
      "loss": 0.0126,
      "step": 2892160
    },
    {
      "epoch": 4.733116003220675,
      "grad_norm": 0.1619100719690323,
      "learning_rate": 5.372851090186673e-07,
      "loss": 0.0094,
      "step": 2892180
    },
    {
      "epoch": 4.733148733659329,
      "grad_norm": 0.430123507976532,
      "learning_rate": 5.372192168051502e-07,
      "loss": 0.0096,
      "step": 2892200
    },
    {
      "epoch": 4.7331814640979815,
      "grad_norm": 0.3317508399486542,
      "learning_rate": 5.37153324591633e-07,
      "loss": 0.0173,
      "step": 2892220
    },
    {
      "epoch": 4.733214194536635,
      "grad_norm": 0.348111093044281,
      "learning_rate": 5.370874323781159e-07,
      "loss": 0.0061,
      "step": 2892240
    },
    {
      "epoch": 4.733246924975289,
      "grad_norm": 0.09602256864309311,
      "learning_rate": 5.370215401645988e-07,
      "loss": 0.0104,
      "step": 2892260
    },
    {
      "epoch": 4.733279655413942,
      "grad_norm": 0.09809938073158264,
      "learning_rate": 5.369556479510817e-07,
      "loss": 0.0096,
      "step": 2892280
    },
    {
      "epoch": 4.733312385852595,
      "grad_norm": 0.15714792907238007,
      "learning_rate": 5.368897557375645e-07,
      "loss": 0.0132,
      "step": 2892300
    },
    {
      "epoch": 4.7333451162912485,
      "grad_norm": 0.06653938442468643,
      "learning_rate": 5.368238635240474e-07,
      "loss": 0.0184,
      "step": 2892320
    },
    {
      "epoch": 4.733377846729902,
      "grad_norm": 0.15304094552993774,
      "learning_rate": 5.367579713105303e-07,
      "loss": 0.0063,
      "step": 2892340
    },
    {
      "epoch": 4.733410577168555,
      "grad_norm": 0.09177307784557343,
      "learning_rate": 5.366920790970132e-07,
      "loss": 0.0089,
      "step": 2892360
    },
    {
      "epoch": 4.733443307607208,
      "grad_norm": 0.31476983428001404,
      "learning_rate": 5.36626186883496e-07,
      "loss": 0.0126,
      "step": 2892380
    },
    {
      "epoch": 4.733476038045862,
      "grad_norm": 0.1952521950006485,
      "learning_rate": 5.365602946699789e-07,
      "loss": 0.0102,
      "step": 2892400
    },
    {
      "epoch": 4.733508768484516,
      "grad_norm": 0.47224414348602295,
      "learning_rate": 5.364944024564617e-07,
      "loss": 0.0118,
      "step": 2892420
    },
    {
      "epoch": 4.733541498923168,
      "grad_norm": 0.11714809387922287,
      "learning_rate": 5.364285102429447e-07,
      "loss": 0.0054,
      "step": 2892440
    },
    {
      "epoch": 4.733574229361822,
      "grad_norm": 0.5056970119476318,
      "learning_rate": 5.363626180294275e-07,
      "loss": 0.0086,
      "step": 2892460
    },
    {
      "epoch": 4.7336069598004755,
      "grad_norm": 0.1271485686302185,
      "learning_rate": 5.362967258159105e-07,
      "loss": 0.0055,
      "step": 2892480
    },
    {
      "epoch": 4.733639690239128,
      "grad_norm": 0.12113980203866959,
      "learning_rate": 5.362308336023932e-07,
      "loss": 0.0093,
      "step": 2892500
    },
    {
      "epoch": 4.733672420677782,
      "grad_norm": 0.2139124572277069,
      "learning_rate": 5.361649413888761e-07,
      "loss": 0.0075,
      "step": 2892520
    },
    {
      "epoch": 4.733705151116435,
      "grad_norm": 0.5031926035881042,
      "learning_rate": 5.36099049175359e-07,
      "loss": 0.0095,
      "step": 2892540
    },
    {
      "epoch": 4.733737881555088,
      "grad_norm": 0.15111519396305084,
      "learning_rate": 5.360331569618419e-07,
      "loss": 0.0121,
      "step": 2892560
    },
    {
      "epoch": 4.733770611993742,
      "grad_norm": 0.18240363895893097,
      "learning_rate": 5.359672647483248e-07,
      "loss": 0.0086,
      "step": 2892580
    },
    {
      "epoch": 4.733803342432395,
      "grad_norm": 0.19484128057956696,
      "learning_rate": 5.359013725348075e-07,
      "loss": 0.0095,
      "step": 2892600
    },
    {
      "epoch": 4.733836072871049,
      "grad_norm": 0.1109924167394638,
      "learning_rate": 5.358354803212905e-07,
      "loss": 0.0058,
      "step": 2892620
    },
    {
      "epoch": 4.733868803309702,
      "grad_norm": 0.19387999176979065,
      "learning_rate": 5.357695881077733e-07,
      "loss": 0.0081,
      "step": 2892640
    },
    {
      "epoch": 4.733901533748355,
      "grad_norm": 0.13614718616008759,
      "learning_rate": 5.357036958942563e-07,
      "loss": 0.01,
      "step": 2892660
    },
    {
      "epoch": 4.733934264187009,
      "grad_norm": 0.32895585894584656,
      "learning_rate": 5.356378036807391e-07,
      "loss": 0.0084,
      "step": 2892680
    },
    {
      "epoch": 4.733966994625662,
      "grad_norm": 0.11341771483421326,
      "learning_rate": 5.35571911467222e-07,
      "loss": 0.0053,
      "step": 2892700
    },
    {
      "epoch": 4.733999725064315,
      "grad_norm": 0.41877511143684387,
      "learning_rate": 5.355060192537048e-07,
      "loss": 0.0081,
      "step": 2892720
    },
    {
      "epoch": 4.734032455502969,
      "grad_norm": 0.17897017300128937,
      "learning_rate": 5.354401270401878e-07,
      "loss": 0.0066,
      "step": 2892740
    },
    {
      "epoch": 4.734065185941622,
      "grad_norm": 0.17672963440418243,
      "learning_rate": 5.353742348266706e-07,
      "loss": 0.0076,
      "step": 2892760
    },
    {
      "epoch": 4.734097916380275,
      "grad_norm": 0.7978132963180542,
      "learning_rate": 5.353083426131535e-07,
      "loss": 0.0071,
      "step": 2892780
    },
    {
      "epoch": 4.734130646818929,
      "grad_norm": 0.5484955906867981,
      "learning_rate": 5.352424503996363e-07,
      "loss": 0.0106,
      "step": 2892800
    },
    {
      "epoch": 4.734163377257582,
      "grad_norm": 0.19968341290950775,
      "learning_rate": 5.351765581861191e-07,
      "loss": 0.01,
      "step": 2892820
    },
    {
      "epoch": 4.734196107696235,
      "grad_norm": 0.11690309643745422,
      "learning_rate": 5.351106659726021e-07,
      "loss": 0.0059,
      "step": 2892840
    },
    {
      "epoch": 4.7342288381348885,
      "grad_norm": 0.23296613991260529,
      "learning_rate": 5.350447737590849e-07,
      "loss": 0.0085,
      "step": 2892860
    },
    {
      "epoch": 4.734261568573542,
      "grad_norm": 0.08450501412153244,
      "learning_rate": 5.349788815455678e-07,
      "loss": 0.0056,
      "step": 2892880
    },
    {
      "epoch": 4.734294299012196,
      "grad_norm": 0.31793922185897827,
      "learning_rate": 5.349129893320506e-07,
      "loss": 0.0151,
      "step": 2892900
    },
    {
      "epoch": 4.734327029450848,
      "grad_norm": 0.2038327157497406,
      "learning_rate": 5.348470971185336e-07,
      "loss": 0.0071,
      "step": 2892920
    },
    {
      "epoch": 4.734359759889502,
      "grad_norm": 0.11686641722917557,
      "learning_rate": 5.347812049050164e-07,
      "loss": 0.0069,
      "step": 2892940
    },
    {
      "epoch": 4.734392490328156,
      "grad_norm": 0.34793174266815186,
      "learning_rate": 5.347153126914993e-07,
      "loss": 0.0082,
      "step": 2892960
    },
    {
      "epoch": 4.734425220766809,
      "grad_norm": 0.11739295721054077,
      "learning_rate": 5.346494204779821e-07,
      "loss": 0.0061,
      "step": 2892980
    },
    {
      "epoch": 4.734457951205462,
      "grad_norm": 0.23261895775794983,
      "learning_rate": 5.345835282644651e-07,
      "loss": 0.0095,
      "step": 2893000
    },
    {
      "epoch": 4.7344906816441155,
      "grad_norm": 0.12194899469614029,
      "learning_rate": 5.345176360509479e-07,
      "loss": 0.0079,
      "step": 2893020
    },
    {
      "epoch": 4.734523412082769,
      "grad_norm": 0.39030036330223083,
      "learning_rate": 5.344517438374308e-07,
      "loss": 0.0102,
      "step": 2893040
    },
    {
      "epoch": 4.734556142521422,
      "grad_norm": 0.07958755642175674,
      "learning_rate": 5.343858516239136e-07,
      "loss": 0.0096,
      "step": 2893060
    },
    {
      "epoch": 4.734588872960075,
      "grad_norm": 0.10859259217977524,
      "learning_rate": 5.343199594103965e-07,
      "loss": 0.0091,
      "step": 2893080
    },
    {
      "epoch": 4.734621603398729,
      "grad_norm": 0.6319004893302917,
      "learning_rate": 5.342540671968794e-07,
      "loss": 0.0073,
      "step": 2893100
    },
    {
      "epoch": 4.734654333837382,
      "grad_norm": 0.48236286640167236,
      "learning_rate": 5.341881749833623e-07,
      "loss": 0.0109,
      "step": 2893120
    },
    {
      "epoch": 4.734687064276035,
      "grad_norm": 0.2811356782913208,
      "learning_rate": 5.341222827698452e-07,
      "loss": 0.0123,
      "step": 2893140
    },
    {
      "epoch": 4.734719794714689,
      "grad_norm": 0.40700286626815796,
      "learning_rate": 5.340563905563279e-07,
      "loss": 0.0087,
      "step": 2893160
    },
    {
      "epoch": 4.734752525153342,
      "grad_norm": 0.16635888814926147,
      "learning_rate": 5.339904983428109e-07,
      "loss": 0.0082,
      "step": 2893180
    },
    {
      "epoch": 4.734785255591995,
      "grad_norm": 0.6501713991165161,
      "learning_rate": 5.339246061292937e-07,
      "loss": 0.0084,
      "step": 2893200
    },
    {
      "epoch": 4.734817986030649,
      "grad_norm": 0.16046346724033356,
      "learning_rate": 5.338587139157767e-07,
      "loss": 0.0067,
      "step": 2893220
    },
    {
      "epoch": 4.734850716469302,
      "grad_norm": 0.09069208800792694,
      "learning_rate": 5.337928217022595e-07,
      "loss": 0.0064,
      "step": 2893240
    },
    {
      "epoch": 4.734883446907956,
      "grad_norm": 0.1974799931049347,
      "learning_rate": 5.337269294887423e-07,
      "loss": 0.0101,
      "step": 2893260
    },
    {
      "epoch": 4.734916177346609,
      "grad_norm": 0.37914836406707764,
      "learning_rate": 5.336610372752252e-07,
      "loss": 0.0082,
      "step": 2893280
    },
    {
      "epoch": 4.734948907785262,
      "grad_norm": 0.12825345993041992,
      "learning_rate": 5.335951450617081e-07,
      "loss": 0.011,
      "step": 2893300
    },
    {
      "epoch": 4.734981638223916,
      "grad_norm": 0.30945420265197754,
      "learning_rate": 5.33529252848191e-07,
      "loss": 0.0103,
      "step": 2893320
    },
    {
      "epoch": 4.7350143686625685,
      "grad_norm": 1.2341359853744507,
      "learning_rate": 5.334633606346739e-07,
      "loss": 0.0109,
      "step": 2893340
    },
    {
      "epoch": 4.735047099101222,
      "grad_norm": 0.3856630027294159,
      "learning_rate": 5.333974684211567e-07,
      "loss": 0.0082,
      "step": 2893360
    },
    {
      "epoch": 4.735079829539876,
      "grad_norm": 0.06781721115112305,
      "learning_rate": 5.333315762076396e-07,
      "loss": 0.0093,
      "step": 2893380
    },
    {
      "epoch": 4.735112559978528,
      "grad_norm": 0.1440732181072235,
      "learning_rate": 5.332656839941225e-07,
      "loss": 0.0086,
      "step": 2893400
    },
    {
      "epoch": 4.735145290417182,
      "grad_norm": 0.12405794113874435,
      "learning_rate": 5.331997917806054e-07,
      "loss": 0.0053,
      "step": 2893420
    },
    {
      "epoch": 4.735178020855836,
      "grad_norm": 0.1037370041012764,
      "learning_rate": 5.331338995670882e-07,
      "loss": 0.0115,
      "step": 2893440
    },
    {
      "epoch": 4.735210751294489,
      "grad_norm": 0.20439349114894867,
      "learning_rate": 5.33068007353571e-07,
      "loss": 0.0123,
      "step": 2893460
    },
    {
      "epoch": 4.735243481733142,
      "grad_norm": 0.35377803444862366,
      "learning_rate": 5.330021151400539e-07,
      "loss": 0.0094,
      "step": 2893480
    },
    {
      "epoch": 4.7352762121717955,
      "grad_norm": 0.29064640402793884,
      "learning_rate": 5.329362229265368e-07,
      "loss": 0.0062,
      "step": 2893500
    },
    {
      "epoch": 4.735308942610449,
      "grad_norm": 0.3477827310562134,
      "learning_rate": 5.328703307130197e-07,
      "loss": 0.0077,
      "step": 2893520
    },
    {
      "epoch": 4.735341673049103,
      "grad_norm": 0.24824148416519165,
      "learning_rate": 5.328044384995025e-07,
      "loss": 0.0105,
      "step": 2893540
    },
    {
      "epoch": 4.735374403487755,
      "grad_norm": 0.18956401944160461,
      "learning_rate": 5.327385462859854e-07,
      "loss": 0.0066,
      "step": 2893560
    },
    {
      "epoch": 4.735407133926409,
      "grad_norm": 0.28107917308807373,
      "learning_rate": 5.326726540724683e-07,
      "loss": 0.0094,
      "step": 2893580
    },
    {
      "epoch": 4.735439864365063,
      "grad_norm": 0.12032666057348251,
      "learning_rate": 5.326067618589512e-07,
      "loss": 0.0105,
      "step": 2893600
    },
    {
      "epoch": 4.735472594803715,
      "grad_norm": 0.1474296897649765,
      "learning_rate": 5.32540869645434e-07,
      "loss": 0.0113,
      "step": 2893620
    },
    {
      "epoch": 4.735505325242369,
      "grad_norm": 0.11285010725259781,
      "learning_rate": 5.324749774319169e-07,
      "loss": 0.0088,
      "step": 2893640
    },
    {
      "epoch": 4.7355380556810225,
      "grad_norm": 0.13496622443199158,
      "learning_rate": 5.324090852183998e-07,
      "loss": 0.01,
      "step": 2893660
    },
    {
      "epoch": 4.735570786119675,
      "grad_norm": 0.19384676218032837,
      "learning_rate": 5.323431930048827e-07,
      "loss": 0.01,
      "step": 2893680
    },
    {
      "epoch": 4.735603516558329,
      "grad_norm": 0.37939879298210144,
      "learning_rate": 5.322773007913655e-07,
      "loss": 0.0064,
      "step": 2893700
    },
    {
      "epoch": 4.735636246996982,
      "grad_norm": 0.4020061492919922,
      "learning_rate": 5.322114085778484e-07,
      "loss": 0.0118,
      "step": 2893720
    },
    {
      "epoch": 4.735668977435636,
      "grad_norm": 0.35251033306121826,
      "learning_rate": 5.321455163643312e-07,
      "loss": 0.0089,
      "step": 2893740
    },
    {
      "epoch": 4.735701707874289,
      "grad_norm": 0.21243105828762054,
      "learning_rate": 5.320796241508142e-07,
      "loss": 0.0099,
      "step": 2893760
    },
    {
      "epoch": 4.735734438312942,
      "grad_norm": 0.491440087556839,
      "learning_rate": 5.32013731937297e-07,
      "loss": 0.0113,
      "step": 2893780
    },
    {
      "epoch": 4.735767168751596,
      "grad_norm": 0.2831399142742157,
      "learning_rate": 5.3194783972378e-07,
      "loss": 0.0098,
      "step": 2893800
    },
    {
      "epoch": 4.735799899190249,
      "grad_norm": 0.4474663734436035,
      "learning_rate": 5.318819475102627e-07,
      "loss": 0.0091,
      "step": 2893820
    },
    {
      "epoch": 4.735832629628902,
      "grad_norm": 0.5205405354499817,
      "learning_rate": 5.318160552967456e-07,
      "loss": 0.006,
      "step": 2893840
    },
    {
      "epoch": 4.735865360067556,
      "grad_norm": 0.23140046000480652,
      "learning_rate": 5.317501630832285e-07,
      "loss": 0.0098,
      "step": 2893860
    },
    {
      "epoch": 4.735898090506209,
      "grad_norm": 0.32079967856407166,
      "learning_rate": 5.316842708697114e-07,
      "loss": 0.0046,
      "step": 2893880
    },
    {
      "epoch": 4.735930820944862,
      "grad_norm": 0.30984073877334595,
      "learning_rate": 5.316183786561943e-07,
      "loss": 0.0066,
      "step": 2893900
    },
    {
      "epoch": 4.735963551383516,
      "grad_norm": 0.13637173175811768,
      "learning_rate": 5.31552486442677e-07,
      "loss": 0.0084,
      "step": 2893920
    },
    {
      "epoch": 4.735996281822169,
      "grad_norm": 0.09773062914609909,
      "learning_rate": 5.3148659422916e-07,
      "loss": 0.0071,
      "step": 2893940
    },
    {
      "epoch": 4.736029012260822,
      "grad_norm": 0.0963854193687439,
      "learning_rate": 5.314207020156428e-07,
      "loss": 0.008,
      "step": 2893960
    },
    {
      "epoch": 4.7360617426994756,
      "grad_norm": 0.2605332136154175,
      "learning_rate": 5.313548098021258e-07,
      "loss": 0.0105,
      "step": 2893980
    },
    {
      "epoch": 4.736094473138129,
      "grad_norm": 0.31557318568229675,
      "learning_rate": 5.312889175886086e-07,
      "loss": 0.0078,
      "step": 2894000
    },
    {
      "epoch": 4.736127203576782,
      "grad_norm": 0.2156752198934555,
      "learning_rate": 5.312230253750915e-07,
      "loss": 0.0065,
      "step": 2894020
    },
    {
      "epoch": 4.7361599340154354,
      "grad_norm": 0.14511848986148834,
      "learning_rate": 5.311571331615743e-07,
      "loss": 0.0093,
      "step": 2894040
    },
    {
      "epoch": 4.736192664454089,
      "grad_norm": 0.4463392198085785,
      "learning_rate": 5.310912409480573e-07,
      "loss": 0.0068,
      "step": 2894060
    },
    {
      "epoch": 4.736225394892743,
      "grad_norm": 0.44070807099342346,
      "learning_rate": 5.310253487345401e-07,
      "loss": 0.0094,
      "step": 2894080
    },
    {
      "epoch": 4.736258125331395,
      "grad_norm": 0.1597115695476532,
      "learning_rate": 5.30959456521023e-07,
      "loss": 0.0134,
      "step": 2894100
    },
    {
      "epoch": 4.736290855770049,
      "grad_norm": 0.35406485199928284,
      "learning_rate": 5.308935643075058e-07,
      "loss": 0.0074,
      "step": 2894120
    },
    {
      "epoch": 4.7363235862087025,
      "grad_norm": 0.2078949511051178,
      "learning_rate": 5.308276720939887e-07,
      "loss": 0.0088,
      "step": 2894140
    },
    {
      "epoch": 4.736356316647356,
      "grad_norm": 0.12392345815896988,
      "learning_rate": 5.307617798804716e-07,
      "loss": 0.0099,
      "step": 2894160
    },
    {
      "epoch": 4.736389047086009,
      "grad_norm": 0.2398359775543213,
      "learning_rate": 5.306958876669544e-07,
      "loss": 0.0063,
      "step": 2894180
    },
    {
      "epoch": 4.736421777524662,
      "grad_norm": 0.24949631094932556,
      "learning_rate": 5.306299954534373e-07,
      "loss": 0.0102,
      "step": 2894200
    },
    {
      "epoch": 4.736454507963316,
      "grad_norm": 0.1423109769821167,
      "learning_rate": 5.305641032399201e-07,
      "loss": 0.0098,
      "step": 2894220
    },
    {
      "epoch": 4.736487238401969,
      "grad_norm": 0.2515485882759094,
      "learning_rate": 5.304982110264031e-07,
      "loss": 0.0073,
      "step": 2894240
    },
    {
      "epoch": 4.736519968840622,
      "grad_norm": 0.33617955446243286,
      "learning_rate": 5.304323188128859e-07,
      "loss": 0.0084,
      "step": 2894260
    },
    {
      "epoch": 4.736552699279276,
      "grad_norm": 0.12065677344799042,
      "learning_rate": 5.303664265993688e-07,
      "loss": 0.0077,
      "step": 2894280
    },
    {
      "epoch": 4.736585429717929,
      "grad_norm": 0.2066422402858734,
      "learning_rate": 5.303005343858516e-07,
      "loss": 0.0106,
      "step": 2894300
    },
    {
      "epoch": 4.736618160156582,
      "grad_norm": 0.4159987270832062,
      "learning_rate": 5.302346421723346e-07,
      "loss": 0.0104,
      "step": 2894320
    },
    {
      "epoch": 4.736650890595236,
      "grad_norm": 0.18733803927898407,
      "learning_rate": 5.301687499588174e-07,
      "loss": 0.0076,
      "step": 2894340
    },
    {
      "epoch": 4.736683621033889,
      "grad_norm": 0.06978525221347809,
      "learning_rate": 5.301028577453003e-07,
      "loss": 0.0085,
      "step": 2894360
    },
    {
      "epoch": 4.736716351472542,
      "grad_norm": 0.332696795463562,
      "learning_rate": 5.300369655317831e-07,
      "loss": 0.0096,
      "step": 2894380
    },
    {
      "epoch": 4.736749081911196,
      "grad_norm": 0.27677443623542786,
      "learning_rate": 5.29971073318266e-07,
      "loss": 0.0092,
      "step": 2894400
    },
    {
      "epoch": 4.736781812349849,
      "grad_norm": 0.5249496698379517,
      "learning_rate": 5.299051811047489e-07,
      "loss": 0.0085,
      "step": 2894420
    },
    {
      "epoch": 4.736814542788503,
      "grad_norm": 0.14502716064453125,
      "learning_rate": 5.298392888912318e-07,
      "loss": 0.0096,
      "step": 2894440
    },
    {
      "epoch": 4.736847273227156,
      "grad_norm": 0.33346444368362427,
      "learning_rate": 5.297733966777147e-07,
      "loss": 0.0111,
      "step": 2894460
    },
    {
      "epoch": 4.736880003665809,
      "grad_norm": 0.23304839432239532,
      "learning_rate": 5.297075044641974e-07,
      "loss": 0.0106,
      "step": 2894480
    },
    {
      "epoch": 4.736912734104463,
      "grad_norm": 0.1431005299091339,
      "learning_rate": 5.296416122506804e-07,
      "loss": 0.0062,
      "step": 2894500
    },
    {
      "epoch": 4.7369454645431155,
      "grad_norm": 0.5680925846099854,
      "learning_rate": 5.295757200371632e-07,
      "loss": 0.0096,
      "step": 2894520
    },
    {
      "epoch": 4.736978194981769,
      "grad_norm": 0.16698609292507172,
      "learning_rate": 5.295098278236462e-07,
      "loss": 0.0095,
      "step": 2894540
    },
    {
      "epoch": 4.737010925420423,
      "grad_norm": 0.35368043184280396,
      "learning_rate": 5.29443935610129e-07,
      "loss": 0.0072,
      "step": 2894560
    },
    {
      "epoch": 4.737043655859075,
      "grad_norm": 0.16739313304424286,
      "learning_rate": 5.293780433966118e-07,
      "loss": 0.006,
      "step": 2894580
    },
    {
      "epoch": 4.737076386297729,
      "grad_norm": 0.2650548219680786,
      "learning_rate": 5.293121511830947e-07,
      "loss": 0.0119,
      "step": 2894600
    },
    {
      "epoch": 4.737109116736383,
      "grad_norm": 0.43098917603492737,
      "learning_rate": 5.292462589695776e-07,
      "loss": 0.0079,
      "step": 2894620
    },
    {
      "epoch": 4.737141847175036,
      "grad_norm": 0.3096799850463867,
      "learning_rate": 5.291803667560605e-07,
      "loss": 0.0104,
      "step": 2894640
    },
    {
      "epoch": 4.737174577613689,
      "grad_norm": 0.13169358670711517,
      "learning_rate": 5.291144745425434e-07,
      "loss": 0.0075,
      "step": 2894660
    },
    {
      "epoch": 4.7372073080523425,
      "grad_norm": 0.22951234877109528,
      "learning_rate": 5.290485823290262e-07,
      "loss": 0.0077,
      "step": 2894680
    },
    {
      "epoch": 4.737240038490996,
      "grad_norm": 0.25339964032173157,
      "learning_rate": 5.289826901155091e-07,
      "loss": 0.009,
      "step": 2894700
    },
    {
      "epoch": 4.73727276892965,
      "grad_norm": 0.03738116845488548,
      "learning_rate": 5.28916797901992e-07,
      "loss": 0.0104,
      "step": 2894720
    },
    {
      "epoch": 4.737305499368302,
      "grad_norm": 0.18618960678577423,
      "learning_rate": 5.288509056884749e-07,
      "loss": 0.0093,
      "step": 2894740
    },
    {
      "epoch": 4.737338229806956,
      "grad_norm": 0.22726406157016754,
      "learning_rate": 5.287850134749577e-07,
      "loss": 0.0118,
      "step": 2894760
    },
    {
      "epoch": 4.7373709602456096,
      "grad_norm": 0.1900615394115448,
      "learning_rate": 5.287191212614406e-07,
      "loss": 0.0101,
      "step": 2894780
    },
    {
      "epoch": 4.737403690684262,
      "grad_norm": 0.26740843057632446,
      "learning_rate": 5.286532290479234e-07,
      "loss": 0.0084,
      "step": 2894800
    },
    {
      "epoch": 4.737436421122916,
      "grad_norm": 0.33864080905914307,
      "learning_rate": 5.285873368344064e-07,
      "loss": 0.0162,
      "step": 2894820
    },
    {
      "epoch": 4.7374691515615694,
      "grad_norm": 0.24440108239650726,
      "learning_rate": 5.285214446208892e-07,
      "loss": 0.0121,
      "step": 2894840
    },
    {
      "epoch": 4.737501882000222,
      "grad_norm": 0.06060131639242172,
      "learning_rate": 5.28455552407372e-07,
      "loss": 0.0087,
      "step": 2894860
    },
    {
      "epoch": 4.737534612438876,
      "grad_norm": 0.09759750217199326,
      "learning_rate": 5.283896601938549e-07,
      "loss": 0.0101,
      "step": 2894880
    },
    {
      "epoch": 4.737567342877529,
      "grad_norm": 0.1584828943014145,
      "learning_rate": 5.283237679803378e-07,
      "loss": 0.0073,
      "step": 2894900
    },
    {
      "epoch": 4.737600073316183,
      "grad_norm": 0.2812560796737671,
      "learning_rate": 5.282578757668207e-07,
      "loss": 0.0106,
      "step": 2894920
    },
    {
      "epoch": 4.737632803754836,
      "grad_norm": 0.20273713767528534,
      "learning_rate": 5.281919835533035e-07,
      "loss": 0.0078,
      "step": 2894940
    },
    {
      "epoch": 4.737665534193489,
      "grad_norm": 0.16305208206176758,
      "learning_rate": 5.281260913397864e-07,
      "loss": 0.0106,
      "step": 2894960
    },
    {
      "epoch": 4.737698264632143,
      "grad_norm": 0.23865340650081635,
      "learning_rate": 5.280601991262693e-07,
      "loss": 0.0078,
      "step": 2894980
    },
    {
      "epoch": 4.737730995070796,
      "grad_norm": 0.06143025681376457,
      "learning_rate": 5.279943069127522e-07,
      "loss": 0.0083,
      "step": 2895000
    },
    {
      "epoch": 4.737763725509449,
      "grad_norm": 0.13138826191425323,
      "learning_rate": 5.27928414699235e-07,
      "loss": 0.0091,
      "step": 2895020
    },
    {
      "epoch": 4.737796455948103,
      "grad_norm": 0.14344744384288788,
      "learning_rate": 5.278625224857179e-07,
      "loss": 0.0073,
      "step": 2895040
    },
    {
      "epoch": 4.737829186386756,
      "grad_norm": 0.19353121519088745,
      "learning_rate": 5.277966302722007e-07,
      "loss": 0.0094,
      "step": 2895060
    },
    {
      "epoch": 4.737861916825409,
      "grad_norm": 0.1468622386455536,
      "learning_rate": 5.277307380586837e-07,
      "loss": 0.0107,
      "step": 2895080
    },
    {
      "epoch": 4.737894647264063,
      "grad_norm": 0.28477826714515686,
      "learning_rate": 5.276648458451665e-07,
      "loss": 0.0082,
      "step": 2895100
    },
    {
      "epoch": 4.737927377702716,
      "grad_norm": 0.19627177715301514,
      "learning_rate": 5.275989536316495e-07,
      "loss": 0.0125,
      "step": 2895120
    },
    {
      "epoch": 4.737960108141369,
      "grad_norm": 0.2677963972091675,
      "learning_rate": 5.275330614181322e-07,
      "loss": 0.0084,
      "step": 2895140
    },
    {
      "epoch": 4.7379928385800225,
      "grad_norm": 0.16047348082065582,
      "learning_rate": 5.274671692046152e-07,
      "loss": 0.0086,
      "step": 2895160
    },
    {
      "epoch": 4.738025569018676,
      "grad_norm": 0.3317684233188629,
      "learning_rate": 5.27401276991098e-07,
      "loss": 0.0063,
      "step": 2895180
    },
    {
      "epoch": 4.73805829945733,
      "grad_norm": 0.48661577701568604,
      "learning_rate": 5.27335384777581e-07,
      "loss": 0.0133,
      "step": 2895200
    },
    {
      "epoch": 4.738091029895982,
      "grad_norm": 0.16435173153877258,
      "learning_rate": 5.272694925640638e-07,
      "loss": 0.0087,
      "step": 2895220
    },
    {
      "epoch": 4.738123760334636,
      "grad_norm": 0.2623057961463928,
      "learning_rate": 5.272036003505465e-07,
      "loss": 0.007,
      "step": 2895240
    },
    {
      "epoch": 4.73815649077329,
      "grad_norm": 0.34768545627593994,
      "learning_rate": 5.271377081370295e-07,
      "loss": 0.0102,
      "step": 2895260
    },
    {
      "epoch": 4.738189221211942,
      "grad_norm": 0.18597297370433807,
      "learning_rate": 5.270718159235123e-07,
      "loss": 0.0087,
      "step": 2895280
    },
    {
      "epoch": 4.738221951650596,
      "grad_norm": 0.4548680782318115,
      "learning_rate": 5.270059237099953e-07,
      "loss": 0.0123,
      "step": 2895300
    },
    {
      "epoch": 4.7382546820892495,
      "grad_norm": 0.26866811513900757,
      "learning_rate": 5.269400314964781e-07,
      "loss": 0.009,
      "step": 2895320
    },
    {
      "epoch": 4.738287412527903,
      "grad_norm": 0.404565691947937,
      "learning_rate": 5.26874139282961e-07,
      "loss": 0.0108,
      "step": 2895340
    },
    {
      "epoch": 4.738320142966556,
      "grad_norm": 0.14488297700881958,
      "learning_rate": 5.268082470694438e-07,
      "loss": 0.0081,
      "step": 2895360
    },
    {
      "epoch": 4.738352873405209,
      "grad_norm": 0.30528396368026733,
      "learning_rate": 5.267423548559268e-07,
      "loss": 0.0101,
      "step": 2895380
    },
    {
      "epoch": 4.738385603843863,
      "grad_norm": 0.12772014737129211,
      "learning_rate": 5.266764626424096e-07,
      "loss": 0.0063,
      "step": 2895400
    },
    {
      "epoch": 4.738418334282516,
      "grad_norm": 0.13767418265342712,
      "learning_rate": 5.266105704288925e-07,
      "loss": 0.0074,
      "step": 2895420
    },
    {
      "epoch": 4.738451064721169,
      "grad_norm": 0.07151336967945099,
      "learning_rate": 5.265446782153753e-07,
      "loss": 0.0052,
      "step": 2895440
    },
    {
      "epoch": 4.738483795159823,
      "grad_norm": 0.4597952365875244,
      "learning_rate": 5.264787860018582e-07,
      "loss": 0.0072,
      "step": 2895460
    },
    {
      "epoch": 4.738516525598476,
      "grad_norm": 0.203005850315094,
      "learning_rate": 5.264128937883411e-07,
      "loss": 0.0085,
      "step": 2895480
    },
    {
      "epoch": 4.738549256037129,
      "grad_norm": 0.1072978749871254,
      "learning_rate": 5.26347001574824e-07,
      "loss": 0.0151,
      "step": 2895500
    },
    {
      "epoch": 4.738581986475783,
      "grad_norm": 0.44010716676712036,
      "learning_rate": 5.262811093613068e-07,
      "loss": 0.0145,
      "step": 2895520
    },
    {
      "epoch": 4.738614716914436,
      "grad_norm": 0.21154893934726715,
      "learning_rate": 5.262152171477896e-07,
      "loss": 0.0086,
      "step": 2895540
    },
    {
      "epoch": 4.738647447353089,
      "grad_norm": 0.2136712372303009,
      "learning_rate": 5.261493249342726e-07,
      "loss": 0.0106,
      "step": 2895560
    },
    {
      "epoch": 4.738680177791743,
      "grad_norm": 0.22864486277103424,
      "learning_rate": 5.260834327207554e-07,
      "loss": 0.0118,
      "step": 2895580
    },
    {
      "epoch": 4.738712908230396,
      "grad_norm": 0.12380201369524002,
      "learning_rate": 5.260175405072383e-07,
      "loss": 0.0084,
      "step": 2895600
    },
    {
      "epoch": 4.73874563866905,
      "grad_norm": 0.14708620309829712,
      "learning_rate": 5.259516482937211e-07,
      "loss": 0.0059,
      "step": 2895620
    },
    {
      "epoch": 4.738778369107703,
      "grad_norm": 0.15775154531002045,
      "learning_rate": 5.258857560802041e-07,
      "loss": 0.0088,
      "step": 2895640
    },
    {
      "epoch": 4.738811099546356,
      "grad_norm": 0.4854590892791748,
      "learning_rate": 5.258198638666869e-07,
      "loss": 0.0083,
      "step": 2895660
    },
    {
      "epoch": 4.73884382998501,
      "grad_norm": 0.3780113756656647,
      "learning_rate": 5.257539716531698e-07,
      "loss": 0.0122,
      "step": 2895680
    },
    {
      "epoch": 4.7388765604236625,
      "grad_norm": 0.45428988337516785,
      "learning_rate": 5.256880794396526e-07,
      "loss": 0.0098,
      "step": 2895700
    },
    {
      "epoch": 4.738909290862316,
      "grad_norm": 0.3417983651161194,
      "learning_rate": 5.256221872261355e-07,
      "loss": 0.0063,
      "step": 2895720
    },
    {
      "epoch": 4.73894202130097,
      "grad_norm": 0.3208330571651459,
      "learning_rate": 5.255562950126184e-07,
      "loss": 0.0104,
      "step": 2895740
    },
    {
      "epoch": 4.738974751739622,
      "grad_norm": 0.17330139875411987,
      "learning_rate": 5.254904027991013e-07,
      "loss": 0.0055,
      "step": 2895760
    },
    {
      "epoch": 4.739007482178276,
      "grad_norm": 0.3308632969856262,
      "learning_rate": 5.254245105855842e-07,
      "loss": 0.0125,
      "step": 2895780
    },
    {
      "epoch": 4.7390402126169295,
      "grad_norm": 0.4380488693714142,
      "learning_rate": 5.25358618372067e-07,
      "loss": 0.008,
      "step": 2895800
    },
    {
      "epoch": 4.739072943055583,
      "grad_norm": 0.0984436497092247,
      "learning_rate": 5.252927261585499e-07,
      "loss": 0.0083,
      "step": 2895820
    },
    {
      "epoch": 4.739105673494236,
      "grad_norm": 0.1823337972164154,
      "learning_rate": 5.252268339450328e-07,
      "loss": 0.0112,
      "step": 2895840
    },
    {
      "epoch": 4.739138403932889,
      "grad_norm": 0.09909548610448837,
      "learning_rate": 5.251609417315157e-07,
      "loss": 0.0074,
      "step": 2895860
    },
    {
      "epoch": 4.739171134371543,
      "grad_norm": 0.46160340309143066,
      "learning_rate": 5.250950495179985e-07,
      "loss": 0.0105,
      "step": 2895880
    },
    {
      "epoch": 4.739203864810197,
      "grad_norm": 0.07109510153532028,
      "learning_rate": 5.250291573044813e-07,
      "loss": 0.0076,
      "step": 2895900
    },
    {
      "epoch": 4.739236595248849,
      "grad_norm": 0.18095539510250092,
      "learning_rate": 5.249632650909642e-07,
      "loss": 0.009,
      "step": 2895920
    },
    {
      "epoch": 4.739269325687503,
      "grad_norm": 0.35351088643074036,
      "learning_rate": 5.248973728774471e-07,
      "loss": 0.0139,
      "step": 2895940
    },
    {
      "epoch": 4.7393020561261565,
      "grad_norm": 0.1542242467403412,
      "learning_rate": 5.2483148066393e-07,
      "loss": 0.0073,
      "step": 2895960
    },
    {
      "epoch": 4.739334786564809,
      "grad_norm": 0.3688180446624756,
      "learning_rate": 5.247655884504129e-07,
      "loss": 0.0068,
      "step": 2895980
    },
    {
      "epoch": 4.739367517003463,
      "grad_norm": 0.5737252235412598,
      "learning_rate": 5.246996962368957e-07,
      "loss": 0.014,
      "step": 2896000
    },
    {
      "epoch": 4.739400247442116,
      "grad_norm": 0.1412144899368286,
      "learning_rate": 5.246338040233786e-07,
      "loss": 0.0101,
      "step": 2896020
    },
    {
      "epoch": 4.739432977880769,
      "grad_norm": 0.16548530757427216,
      "learning_rate": 5.245679118098615e-07,
      "loss": 0.0151,
      "step": 2896040
    },
    {
      "epoch": 4.739465708319423,
      "grad_norm": 0.14387980103492737,
      "learning_rate": 5.245020195963444e-07,
      "loss": 0.0071,
      "step": 2896060
    },
    {
      "epoch": 4.739498438758076,
      "grad_norm": 0.22197188436985016,
      "learning_rate": 5.244361273828272e-07,
      "loss": 0.0112,
      "step": 2896080
    },
    {
      "epoch": 4.73953116919673,
      "grad_norm": 0.3220261037349701,
      "learning_rate": 5.243702351693101e-07,
      "loss": 0.0102,
      "step": 2896100
    },
    {
      "epoch": 4.739563899635383,
      "grad_norm": 0.1059572622179985,
      "learning_rate": 5.243043429557929e-07,
      "loss": 0.0083,
      "step": 2896120
    },
    {
      "epoch": 4.739596630074036,
      "grad_norm": 0.27591270208358765,
      "learning_rate": 5.242384507422759e-07,
      "loss": 0.011,
      "step": 2896140
    },
    {
      "epoch": 4.73962936051269,
      "grad_norm": 0.14691162109375,
      "learning_rate": 5.241725585287587e-07,
      "loss": 0.0101,
      "step": 2896160
    },
    {
      "epoch": 4.739662090951343,
      "grad_norm": 0.5463811755180359,
      "learning_rate": 5.241066663152416e-07,
      "loss": 0.0095,
      "step": 2896180
    },
    {
      "epoch": 4.739694821389996,
      "grad_norm": 0.16893205046653748,
      "learning_rate": 5.240407741017244e-07,
      "loss": 0.007,
      "step": 2896200
    },
    {
      "epoch": 4.73972755182865,
      "grad_norm": 0.1713573932647705,
      "learning_rate": 5.239748818882073e-07,
      "loss": 0.0048,
      "step": 2896220
    },
    {
      "epoch": 4.739760282267303,
      "grad_norm": 0.2610630989074707,
      "learning_rate": 5.239089896746902e-07,
      "loss": 0.0119,
      "step": 2896240
    },
    {
      "epoch": 4.739793012705956,
      "grad_norm": 0.16255523264408112,
      "learning_rate": 5.23843097461173e-07,
      "loss": 0.0069,
      "step": 2896260
    },
    {
      "epoch": 4.73982574314461,
      "grad_norm": 0.16706284880638123,
      "learning_rate": 5.237772052476559e-07,
      "loss": 0.0088,
      "step": 2896280
    },
    {
      "epoch": 4.739858473583263,
      "grad_norm": 0.41838741302490234,
      "learning_rate": 5.237113130341388e-07,
      "loss": 0.0081,
      "step": 2896300
    },
    {
      "epoch": 4.739891204021916,
      "grad_norm": 0.20918875932693481,
      "learning_rate": 5.236454208206217e-07,
      "loss": 0.0124,
      "step": 2896320
    },
    {
      "epoch": 4.7399239344605695,
      "grad_norm": 0.1784161478281021,
      "learning_rate": 5.235795286071045e-07,
      "loss": 0.008,
      "step": 2896340
    },
    {
      "epoch": 4.739956664899223,
      "grad_norm": 0.14332304894924164,
      "learning_rate": 5.235136363935874e-07,
      "loss": 0.0075,
      "step": 2896360
    },
    {
      "epoch": 4.739989395337877,
      "grad_norm": 0.5197494626045227,
      "learning_rate": 5.234477441800702e-07,
      "loss": 0.0127,
      "step": 2896380
    },
    {
      "epoch": 4.740022125776529,
      "grad_norm": 0.15175968408584595,
      "learning_rate": 5.233818519665532e-07,
      "loss": 0.0106,
      "step": 2896400
    },
    {
      "epoch": 4.740054856215183,
      "grad_norm": 0.27420127391815186,
      "learning_rate": 5.23315959753036e-07,
      "loss": 0.0068,
      "step": 2896420
    },
    {
      "epoch": 4.740087586653837,
      "grad_norm": 0.4775042235851288,
      "learning_rate": 5.23250067539519e-07,
      "loss": 0.007,
      "step": 2896440
    },
    {
      "epoch": 4.74012031709249,
      "grad_norm": 0.19436484575271606,
      "learning_rate": 5.231841753260017e-07,
      "loss": 0.0056,
      "step": 2896460
    },
    {
      "epoch": 4.740153047531143,
      "grad_norm": 0.1957654058933258,
      "learning_rate": 5.231182831124847e-07,
      "loss": 0.009,
      "step": 2896480
    },
    {
      "epoch": 4.7401857779697965,
      "grad_norm": 0.09734369069337845,
      "learning_rate": 5.230523908989675e-07,
      "loss": 0.0097,
      "step": 2896500
    },
    {
      "epoch": 4.74021850840845,
      "grad_norm": 0.09530279785394669,
      "learning_rate": 5.229864986854505e-07,
      "loss": 0.0123,
      "step": 2896520
    },
    {
      "epoch": 4.740251238847103,
      "grad_norm": 0.1465832144021988,
      "learning_rate": 5.229206064719333e-07,
      "loss": 0.0116,
      "step": 2896540
    },
    {
      "epoch": 4.740283969285756,
      "grad_norm": 0.38344520330429077,
      "learning_rate": 5.22854714258416e-07,
      "loss": 0.009,
      "step": 2896560
    },
    {
      "epoch": 4.74031669972441,
      "grad_norm": 0.2040494978427887,
      "learning_rate": 5.22788822044899e-07,
      "loss": 0.0117,
      "step": 2896580
    },
    {
      "epoch": 4.740349430163063,
      "grad_norm": 0.19821412861347198,
      "learning_rate": 5.227229298313818e-07,
      "loss": 0.0076,
      "step": 2896600
    },
    {
      "epoch": 4.740382160601716,
      "grad_norm": 0.11280956864356995,
      "learning_rate": 5.226570376178648e-07,
      "loss": 0.0064,
      "step": 2896620
    },
    {
      "epoch": 4.74041489104037,
      "grad_norm": 0.1866627037525177,
      "learning_rate": 5.225911454043476e-07,
      "loss": 0.0079,
      "step": 2896640
    },
    {
      "epoch": 4.740447621479023,
      "grad_norm": 0.08332020044326782,
      "learning_rate": 5.225252531908305e-07,
      "loss": 0.0094,
      "step": 2896660
    },
    {
      "epoch": 4.740480351917676,
      "grad_norm": 0.2946195602416992,
      "learning_rate": 5.224593609773133e-07,
      "loss": 0.0132,
      "step": 2896680
    },
    {
      "epoch": 4.74051308235633,
      "grad_norm": 0.09716730564832687,
      "learning_rate": 5.223934687637963e-07,
      "loss": 0.0055,
      "step": 2896700
    },
    {
      "epoch": 4.740545812794983,
      "grad_norm": 0.32095959782600403,
      "learning_rate": 5.223275765502791e-07,
      "loss": 0.0124,
      "step": 2896720
    },
    {
      "epoch": 4.740578543233636,
      "grad_norm": 0.24492131173610687,
      "learning_rate": 5.22261684336762e-07,
      "loss": 0.0068,
      "step": 2896740
    },
    {
      "epoch": 4.74061127367229,
      "grad_norm": 0.4424957036972046,
      "learning_rate": 5.221957921232448e-07,
      "loss": 0.0078,
      "step": 2896760
    },
    {
      "epoch": 4.740644004110943,
      "grad_norm": 0.11478355526924133,
      "learning_rate": 5.221298999097277e-07,
      "loss": 0.011,
      "step": 2896780
    },
    {
      "epoch": 4.740676734549597,
      "grad_norm": 0.10008063167333603,
      "learning_rate": 5.220640076962106e-07,
      "loss": 0.0053,
      "step": 2896800
    },
    {
      "epoch": 4.7407094649882495,
      "grad_norm": 0.12506522238254547,
      "learning_rate": 5.219981154826935e-07,
      "loss": 0.0055,
      "step": 2896820
    },
    {
      "epoch": 4.740742195426903,
      "grad_norm": 0.1868334859609604,
      "learning_rate": 5.219322232691763e-07,
      "loss": 0.0061,
      "step": 2896840
    },
    {
      "epoch": 4.740774925865557,
      "grad_norm": 0.1992611289024353,
      "learning_rate": 5.218663310556591e-07,
      "loss": 0.0103,
      "step": 2896860
    },
    {
      "epoch": 4.740807656304209,
      "grad_norm": 0.12547782063484192,
      "learning_rate": 5.218004388421421e-07,
      "loss": 0.0079,
      "step": 2896880
    },
    {
      "epoch": 4.740840386742863,
      "grad_norm": 0.15690124034881592,
      "learning_rate": 5.217345466286249e-07,
      "loss": 0.0101,
      "step": 2896900
    },
    {
      "epoch": 4.740873117181517,
      "grad_norm": 0.2507460415363312,
      "learning_rate": 5.216686544151078e-07,
      "loss": 0.0088,
      "step": 2896920
    },
    {
      "epoch": 4.740905847620169,
      "grad_norm": 0.2158999890089035,
      "learning_rate": 5.216027622015906e-07,
      "loss": 0.0108,
      "step": 2896940
    },
    {
      "epoch": 4.740938578058823,
      "grad_norm": 0.060006674379110336,
      "learning_rate": 5.215368699880736e-07,
      "loss": 0.0075,
      "step": 2896960
    },
    {
      "epoch": 4.7409713084974765,
      "grad_norm": 0.14853951334953308,
      "learning_rate": 5.214709777745564e-07,
      "loss": 0.0077,
      "step": 2896980
    },
    {
      "epoch": 4.74100403893613,
      "grad_norm": 0.3671928346157074,
      "learning_rate": 5.214050855610393e-07,
      "loss": 0.0101,
      "step": 2897000
    },
    {
      "epoch": 4.741036769374783,
      "grad_norm": 0.22587591409683228,
      "learning_rate": 5.213391933475221e-07,
      "loss": 0.008,
      "step": 2897020
    },
    {
      "epoch": 4.741069499813436,
      "grad_norm": 0.31979605555534363,
      "learning_rate": 5.21273301134005e-07,
      "loss": 0.0111,
      "step": 2897040
    },
    {
      "epoch": 4.74110223025209,
      "grad_norm": 0.16383709013462067,
      "learning_rate": 5.212074089204879e-07,
      "loss": 0.008,
      "step": 2897060
    },
    {
      "epoch": 4.741134960690744,
      "grad_norm": 0.14956708252429962,
      "learning_rate": 5.211415167069708e-07,
      "loss": 0.0083,
      "step": 2897080
    },
    {
      "epoch": 4.741167691129396,
      "grad_norm": 0.15021534264087677,
      "learning_rate": 5.210756244934537e-07,
      "loss": 0.0087,
      "step": 2897100
    },
    {
      "epoch": 4.74120042156805,
      "grad_norm": 0.22526906430721283,
      "learning_rate": 5.210097322799365e-07,
      "loss": 0.006,
      "step": 2897120
    },
    {
      "epoch": 4.7412331520067035,
      "grad_norm": 0.1232089102268219,
      "learning_rate": 5.209438400664194e-07,
      "loss": 0.0085,
      "step": 2897140
    },
    {
      "epoch": 4.741265882445356,
      "grad_norm": 0.5463264584541321,
      "learning_rate": 5.208779478529023e-07,
      "loss": 0.013,
      "step": 2897160
    },
    {
      "epoch": 4.74129861288401,
      "grad_norm": 0.12612588703632355,
      "learning_rate": 5.208120556393852e-07,
      "loss": 0.0126,
      "step": 2897180
    },
    {
      "epoch": 4.741331343322663,
      "grad_norm": 0.7855693697929382,
      "learning_rate": 5.207461634258681e-07,
      "loss": 0.0111,
      "step": 2897200
    },
    {
      "epoch": 4.741364073761316,
      "grad_norm": 0.26714953780174255,
      "learning_rate": 5.206802712123508e-07,
      "loss": 0.0077,
      "step": 2897220
    },
    {
      "epoch": 4.74139680419997,
      "grad_norm": 0.20978960394859314,
      "learning_rate": 5.206143789988337e-07,
      "loss": 0.0086,
      "step": 2897240
    },
    {
      "epoch": 4.741429534638623,
      "grad_norm": 0.05545389652252197,
      "learning_rate": 5.205484867853166e-07,
      "loss": 0.0058,
      "step": 2897260
    },
    {
      "epoch": 4.741462265077277,
      "grad_norm": 0.26261773705482483,
      "learning_rate": 5.204825945717995e-07,
      "loss": 0.0111,
      "step": 2897280
    },
    {
      "epoch": 4.74149499551593,
      "grad_norm": 0.11222316324710846,
      "learning_rate": 5.204167023582824e-07,
      "loss": 0.0066,
      "step": 2897300
    },
    {
      "epoch": 4.741527725954583,
      "grad_norm": 0.07283303886651993,
      "learning_rate": 5.203508101447652e-07,
      "loss": 0.0077,
      "step": 2897320
    },
    {
      "epoch": 4.741560456393237,
      "grad_norm": 0.4124566614627838,
      "learning_rate": 5.202849179312481e-07,
      "loss": 0.0068,
      "step": 2897340
    },
    {
      "epoch": 4.74159318683189,
      "grad_norm": 0.14642204344272614,
      "learning_rate": 5.20219025717731e-07,
      "loss": 0.0128,
      "step": 2897360
    },
    {
      "epoch": 4.741625917270543,
      "grad_norm": 0.1574452519416809,
      "learning_rate": 5.201531335042139e-07,
      "loss": 0.0056,
      "step": 2897380
    },
    {
      "epoch": 4.741658647709197,
      "grad_norm": 0.7776914238929749,
      "learning_rate": 5.200872412906967e-07,
      "loss": 0.0109,
      "step": 2897400
    },
    {
      "epoch": 4.74169137814785,
      "grad_norm": 0.2802261412143707,
      "learning_rate": 5.200213490771796e-07,
      "loss": 0.0078,
      "step": 2897420
    },
    {
      "epoch": 4.741724108586503,
      "grad_norm": 0.13470682501792908,
      "learning_rate": 5.199554568636624e-07,
      "loss": 0.0119,
      "step": 2897440
    },
    {
      "epoch": 4.7417568390251565,
      "grad_norm": 0.1232018694281578,
      "learning_rate": 5.198895646501454e-07,
      "loss": 0.01,
      "step": 2897460
    },
    {
      "epoch": 4.74178956946381,
      "grad_norm": 0.23017412424087524,
      "learning_rate": 5.198236724366282e-07,
      "loss": 0.0078,
      "step": 2897480
    },
    {
      "epoch": 4.741822299902463,
      "grad_norm": 0.2714514136314392,
      "learning_rate": 5.197577802231111e-07,
      "loss": 0.0147,
      "step": 2897500
    },
    {
      "epoch": 4.741855030341116,
      "grad_norm": 0.33595043420791626,
      "learning_rate": 5.196918880095939e-07,
      "loss": 0.0079,
      "step": 2897520
    },
    {
      "epoch": 4.74188776077977,
      "grad_norm": 0.17693422734737396,
      "learning_rate": 5.196259957960769e-07,
      "loss": 0.0061,
      "step": 2897540
    },
    {
      "epoch": 4.741920491218424,
      "grad_norm": 0.2115623950958252,
      "learning_rate": 5.195601035825597e-07,
      "loss": 0.011,
      "step": 2897560
    },
    {
      "epoch": 4.741953221657076,
      "grad_norm": 0.08262208849191666,
      "learning_rate": 5.194942113690425e-07,
      "loss": 0.0119,
      "step": 2897580
    },
    {
      "epoch": 4.74198595209573,
      "grad_norm": 0.15851296484470367,
      "learning_rate": 5.194283191555254e-07,
      "loss": 0.0123,
      "step": 2897600
    },
    {
      "epoch": 4.7420186825343835,
      "grad_norm": 0.5701403021812439,
      "learning_rate": 5.193624269420083e-07,
      "loss": 0.01,
      "step": 2897620
    },
    {
      "epoch": 4.742051412973037,
      "grad_norm": 0.20715108513832092,
      "learning_rate": 5.192965347284912e-07,
      "loss": 0.0076,
      "step": 2897640
    },
    {
      "epoch": 4.74208414341169,
      "grad_norm": 0.13785423338413239,
      "learning_rate": 5.19230642514974e-07,
      "loss": 0.0073,
      "step": 2897660
    },
    {
      "epoch": 4.742116873850343,
      "grad_norm": 0.13486982882022858,
      "learning_rate": 5.191647503014569e-07,
      "loss": 0.0091,
      "step": 2897680
    },
    {
      "epoch": 4.742149604288997,
      "grad_norm": 0.06759374588727951,
      "learning_rate": 5.190988580879397e-07,
      "loss": 0.0086,
      "step": 2897700
    },
    {
      "epoch": 4.74218233472765,
      "grad_norm": 0.409412145614624,
      "learning_rate": 5.190329658744227e-07,
      "loss": 0.0107,
      "step": 2897720
    },
    {
      "epoch": 4.742215065166303,
      "grad_norm": 0.13083717226982117,
      "learning_rate": 5.189670736609055e-07,
      "loss": 0.0078,
      "step": 2897740
    },
    {
      "epoch": 4.742247795604957,
      "grad_norm": 0.5487589240074158,
      "learning_rate": 5.189011814473885e-07,
      "loss": 0.0061,
      "step": 2897760
    },
    {
      "epoch": 4.74228052604361,
      "grad_norm": 0.11035338044166565,
      "learning_rate": 5.188352892338712e-07,
      "loss": 0.0063,
      "step": 2897780
    },
    {
      "epoch": 4.742313256482263,
      "grad_norm": 0.195616215467453,
      "learning_rate": 5.187693970203542e-07,
      "loss": 0.0092,
      "step": 2897800
    },
    {
      "epoch": 4.742345986920917,
      "grad_norm": 0.07452674955129623,
      "learning_rate": 5.18703504806837e-07,
      "loss": 0.0074,
      "step": 2897820
    },
    {
      "epoch": 4.74237871735957,
      "grad_norm": 0.1545329988002777,
      "learning_rate": 5.1863761259332e-07,
      "loss": 0.0125,
      "step": 2897840
    },
    {
      "epoch": 4.742411447798223,
      "grad_norm": 0.20178745687007904,
      "learning_rate": 5.185717203798028e-07,
      "loss": 0.0132,
      "step": 2897860
    },
    {
      "epoch": 4.742444178236877,
      "grad_norm": 0.15931351482868195,
      "learning_rate": 5.185058281662855e-07,
      "loss": 0.0067,
      "step": 2897880
    },
    {
      "epoch": 4.74247690867553,
      "grad_norm": 0.04597140848636627,
      "learning_rate": 5.184399359527685e-07,
      "loss": 0.0067,
      "step": 2897900
    },
    {
      "epoch": 4.742509639114184,
      "grad_norm": 0.1099361777305603,
      "learning_rate": 5.183740437392513e-07,
      "loss": 0.0083,
      "step": 2897920
    },
    {
      "epoch": 4.742542369552837,
      "grad_norm": 0.5196269154548645,
      "learning_rate": 5.183081515257343e-07,
      "loss": 0.0081,
      "step": 2897940
    },
    {
      "epoch": 4.74257509999149,
      "grad_norm": 0.3013865351676941,
      "learning_rate": 5.182422593122171e-07,
      "loss": 0.007,
      "step": 2897960
    },
    {
      "epoch": 4.742607830430144,
      "grad_norm": 0.36440837383270264,
      "learning_rate": 5.181763670987e-07,
      "loss": 0.0173,
      "step": 2897980
    },
    {
      "epoch": 4.7426405608687965,
      "grad_norm": 0.7471657395362854,
      "learning_rate": 5.181104748851828e-07,
      "loss": 0.0051,
      "step": 2898000
    },
    {
      "epoch": 4.74267329130745,
      "grad_norm": 0.37732604146003723,
      "learning_rate": 5.180445826716658e-07,
      "loss": 0.0104,
      "step": 2898020
    },
    {
      "epoch": 4.742706021746104,
      "grad_norm": 0.4561825692653656,
      "learning_rate": 5.179786904581486e-07,
      "loss": 0.0075,
      "step": 2898040
    },
    {
      "epoch": 4.742738752184756,
      "grad_norm": 0.2661181092262268,
      "learning_rate": 5.179127982446315e-07,
      "loss": 0.0088,
      "step": 2898060
    },
    {
      "epoch": 4.74277148262341,
      "grad_norm": 0.27337563037872314,
      "learning_rate": 5.178469060311143e-07,
      "loss": 0.0131,
      "step": 2898080
    },
    {
      "epoch": 4.742804213062064,
      "grad_norm": 0.19244328141212463,
      "learning_rate": 5.177810138175972e-07,
      "loss": 0.0059,
      "step": 2898100
    },
    {
      "epoch": 4.742836943500717,
      "grad_norm": 0.04334944114089012,
      "learning_rate": 5.177151216040801e-07,
      "loss": 0.0076,
      "step": 2898120
    },
    {
      "epoch": 4.74286967393937,
      "grad_norm": 0.1804277002811432,
      "learning_rate": 5.17649229390563e-07,
      "loss": 0.01,
      "step": 2898140
    },
    {
      "epoch": 4.7429024043780235,
      "grad_norm": 0.2778392434120178,
      "learning_rate": 5.175833371770458e-07,
      "loss": 0.0093,
      "step": 2898160
    },
    {
      "epoch": 4.742935134816677,
      "grad_norm": 0.19381549954414368,
      "learning_rate": 5.175174449635287e-07,
      "loss": 0.0069,
      "step": 2898180
    },
    {
      "epoch": 4.742967865255331,
      "grad_norm": 0.18452994525432587,
      "learning_rate": 5.174515527500116e-07,
      "loss": 0.0098,
      "step": 2898200
    },
    {
      "epoch": 4.743000595693983,
      "grad_norm": 0.31783348321914673,
      "learning_rate": 5.173856605364945e-07,
      "loss": 0.0066,
      "step": 2898220
    },
    {
      "epoch": 4.743033326132637,
      "grad_norm": 0.10451322793960571,
      "learning_rate": 5.173197683229773e-07,
      "loss": 0.009,
      "step": 2898240
    },
    {
      "epoch": 4.7430660565712905,
      "grad_norm": 0.2710789740085602,
      "learning_rate": 5.172538761094601e-07,
      "loss": 0.0118,
      "step": 2898260
    },
    {
      "epoch": 4.743098787009943,
      "grad_norm": 0.16626925766468048,
      "learning_rate": 5.171879838959431e-07,
      "loss": 0.0099,
      "step": 2898280
    },
    {
      "epoch": 4.743131517448597,
      "grad_norm": 0.35656866431236267,
      "learning_rate": 5.171220916824259e-07,
      "loss": 0.0073,
      "step": 2898300
    },
    {
      "epoch": 4.74316424788725,
      "grad_norm": 0.1186249703168869,
      "learning_rate": 5.170561994689088e-07,
      "loss": 0.008,
      "step": 2898320
    },
    {
      "epoch": 4.743196978325903,
      "grad_norm": 0.4300723969936371,
      "learning_rate": 5.169903072553916e-07,
      "loss": 0.0097,
      "step": 2898340
    },
    {
      "epoch": 4.743229708764557,
      "grad_norm": 0.15069080889225006,
      "learning_rate": 5.169244150418745e-07,
      "loss": 0.0083,
      "step": 2898360
    },
    {
      "epoch": 4.74326243920321,
      "grad_norm": 0.13028301298618317,
      "learning_rate": 5.168585228283574e-07,
      "loss": 0.0091,
      "step": 2898380
    },
    {
      "epoch": 4.743295169641864,
      "grad_norm": 0.09111078083515167,
      "learning_rate": 5.167926306148403e-07,
      "loss": 0.0081,
      "step": 2898400
    },
    {
      "epoch": 4.743327900080517,
      "grad_norm": 0.33618804812431335,
      "learning_rate": 5.167267384013232e-07,
      "loss": 0.0117,
      "step": 2898420
    },
    {
      "epoch": 4.74336063051917,
      "grad_norm": 0.2335672229528427,
      "learning_rate": 5.16660846187806e-07,
      "loss": 0.0132,
      "step": 2898440
    },
    {
      "epoch": 4.743393360957824,
      "grad_norm": 0.3929334878921509,
      "learning_rate": 5.165949539742889e-07,
      "loss": 0.0102,
      "step": 2898460
    },
    {
      "epoch": 4.7434260913964765,
      "grad_norm": 0.07228387147188187,
      "learning_rate": 5.165290617607718e-07,
      "loss": 0.0076,
      "step": 2898480
    },
    {
      "epoch": 4.74345882183513,
      "grad_norm": 0.3155097961425781,
      "learning_rate": 5.164631695472547e-07,
      "loss": 0.0067,
      "step": 2898500
    },
    {
      "epoch": 4.743491552273784,
      "grad_norm": 0.15790753066539764,
      "learning_rate": 5.163972773337376e-07,
      "loss": 0.0064,
      "step": 2898520
    },
    {
      "epoch": 4.743524282712437,
      "grad_norm": 0.17747023701667786,
      "learning_rate": 5.163313851202203e-07,
      "loss": 0.0081,
      "step": 2898540
    },
    {
      "epoch": 4.74355701315109,
      "grad_norm": 0.7188204526901245,
      "learning_rate": 5.162654929067033e-07,
      "loss": 0.0106,
      "step": 2898560
    },
    {
      "epoch": 4.743589743589744,
      "grad_norm": 0.05480460450053215,
      "learning_rate": 5.161996006931861e-07,
      "loss": 0.0087,
      "step": 2898580
    },
    {
      "epoch": 4.743622474028397,
      "grad_norm": 0.18472178280353546,
      "learning_rate": 5.16133708479669e-07,
      "loss": 0.0104,
      "step": 2898600
    },
    {
      "epoch": 4.74365520446705,
      "grad_norm": 0.12418734282255173,
      "learning_rate": 5.160678162661519e-07,
      "loss": 0.0046,
      "step": 2898620
    },
    {
      "epoch": 4.7436879349057035,
      "grad_norm": 0.18378585577011108,
      "learning_rate": 5.160019240526347e-07,
      "loss": 0.0078,
      "step": 2898640
    },
    {
      "epoch": 4.743720665344357,
      "grad_norm": 0.18939638137817383,
      "learning_rate": 5.159360318391176e-07,
      "loss": 0.0104,
      "step": 2898660
    },
    {
      "epoch": 4.74375339578301,
      "grad_norm": 0.13310225307941437,
      "learning_rate": 5.158701396256005e-07,
      "loss": 0.0076,
      "step": 2898680
    },
    {
      "epoch": 4.743786126221663,
      "grad_norm": 0.15454715490341187,
      "learning_rate": 5.158042474120834e-07,
      "loss": 0.0084,
      "step": 2898700
    },
    {
      "epoch": 4.743818856660317,
      "grad_norm": 0.26478031277656555,
      "learning_rate": 5.157383551985662e-07,
      "loss": 0.0105,
      "step": 2898720
    },
    {
      "epoch": 4.743851587098971,
      "grad_norm": 0.3011002242565155,
      "learning_rate": 5.156724629850491e-07,
      "loss": 0.0089,
      "step": 2898740
    },
    {
      "epoch": 4.743884317537623,
      "grad_norm": 0.21448636054992676,
      "learning_rate": 5.156065707715319e-07,
      "loss": 0.0065,
      "step": 2898760
    },
    {
      "epoch": 4.743917047976277,
      "grad_norm": 0.1681929975748062,
      "learning_rate": 5.155406785580149e-07,
      "loss": 0.0088,
      "step": 2898780
    },
    {
      "epoch": 4.7439497784149305,
      "grad_norm": 0.1918550729751587,
      "learning_rate": 5.154747863444977e-07,
      "loss": 0.0082,
      "step": 2898800
    },
    {
      "epoch": 4.743982508853584,
      "grad_norm": 0.2742134928703308,
      "learning_rate": 5.154088941309806e-07,
      "loss": 0.0079,
      "step": 2898820
    },
    {
      "epoch": 4.744015239292237,
      "grad_norm": 0.1220599040389061,
      "learning_rate": 5.153430019174634e-07,
      "loss": 0.0075,
      "step": 2898840
    },
    {
      "epoch": 4.74404796973089,
      "grad_norm": 0.08207859843969345,
      "learning_rate": 5.152771097039464e-07,
      "loss": 0.0082,
      "step": 2898860
    },
    {
      "epoch": 4.744080700169544,
      "grad_norm": 0.13247719407081604,
      "learning_rate": 5.152112174904292e-07,
      "loss": 0.0093,
      "step": 2898880
    },
    {
      "epoch": 4.744113430608197,
      "grad_norm": 0.15377651154994965,
      "learning_rate": 5.15145325276912e-07,
      "loss": 0.01,
      "step": 2898900
    },
    {
      "epoch": 4.74414616104685,
      "grad_norm": 0.06884174793958664,
      "learning_rate": 5.150794330633949e-07,
      "loss": 0.0069,
      "step": 2898920
    },
    {
      "epoch": 4.744178891485504,
      "grad_norm": 0.07031123340129852,
      "learning_rate": 5.150135408498778e-07,
      "loss": 0.0074,
      "step": 2898940
    },
    {
      "epoch": 4.744211621924157,
      "grad_norm": 0.08853061497211456,
      "learning_rate": 5.149476486363607e-07,
      "loss": 0.0065,
      "step": 2898960
    },
    {
      "epoch": 4.74424435236281,
      "grad_norm": 0.3154246211051941,
      "learning_rate": 5.148817564228435e-07,
      "loss": 0.0099,
      "step": 2898980
    },
    {
      "epoch": 4.744277082801464,
      "grad_norm": 0.33488842844963074,
      "learning_rate": 5.148158642093264e-07,
      "loss": 0.0138,
      "step": 2899000
    },
    {
      "epoch": 4.744309813240117,
      "grad_norm": 0.25609907507896423,
      "learning_rate": 5.147499719958092e-07,
      "loss": 0.0081,
      "step": 2899020
    },
    {
      "epoch": 4.74434254367877,
      "grad_norm": 0.4919160306453705,
      "learning_rate": 5.146840797822922e-07,
      "loss": 0.0097,
      "step": 2899040
    },
    {
      "epoch": 4.744375274117424,
      "grad_norm": 0.3339466154575348,
      "learning_rate": 5.14618187568775e-07,
      "loss": 0.0088,
      "step": 2899060
    },
    {
      "epoch": 4.744408004556077,
      "grad_norm": 0.23340921103954315,
      "learning_rate": 5.14552295355258e-07,
      "loss": 0.0082,
      "step": 2899080
    },
    {
      "epoch": 4.744440734994731,
      "grad_norm": 0.15270498394966125,
      "learning_rate": 5.144864031417407e-07,
      "loss": 0.0085,
      "step": 2899100
    },
    {
      "epoch": 4.7444734654333836,
      "grad_norm": 0.5590353012084961,
      "learning_rate": 5.144205109282237e-07,
      "loss": 0.0072,
      "step": 2899120
    },
    {
      "epoch": 4.744506195872037,
      "grad_norm": 0.23156414926052094,
      "learning_rate": 5.143546187147065e-07,
      "loss": 0.0093,
      "step": 2899140
    },
    {
      "epoch": 4.744538926310691,
      "grad_norm": 0.15257513523101807,
      "learning_rate": 5.142887265011895e-07,
      "loss": 0.0078,
      "step": 2899160
    },
    {
      "epoch": 4.7445716567493434,
      "grad_norm": 0.5633097290992737,
      "learning_rate": 5.142228342876723e-07,
      "loss": 0.0139,
      "step": 2899180
    },
    {
      "epoch": 4.744604387187997,
      "grad_norm": 0.07985083758831024,
      "learning_rate": 5.14156942074155e-07,
      "loss": 0.0074,
      "step": 2899200
    },
    {
      "epoch": 4.744637117626651,
      "grad_norm": 0.29755446314811707,
      "learning_rate": 5.14091049860638e-07,
      "loss": 0.012,
      "step": 2899220
    },
    {
      "epoch": 4.744669848065303,
      "grad_norm": 0.13729281723499298,
      "learning_rate": 5.140251576471208e-07,
      "loss": 0.01,
      "step": 2899240
    },
    {
      "epoch": 4.744702578503957,
      "grad_norm": 0.35139814019203186,
      "learning_rate": 5.139592654336038e-07,
      "loss": 0.0112,
      "step": 2899260
    },
    {
      "epoch": 4.7447353089426105,
      "grad_norm": 0.16039782762527466,
      "learning_rate": 5.138933732200866e-07,
      "loss": 0.0064,
      "step": 2899280
    },
    {
      "epoch": 4.744768039381264,
      "grad_norm": 0.15716983377933502,
      "learning_rate": 5.138274810065695e-07,
      "loss": 0.0086,
      "step": 2899300
    },
    {
      "epoch": 4.744800769819917,
      "grad_norm": 0.09194693714380264,
      "learning_rate": 5.137615887930523e-07,
      "loss": 0.0071,
      "step": 2899320
    },
    {
      "epoch": 4.74483350025857,
      "grad_norm": 0.2825368344783783,
      "learning_rate": 5.136956965795353e-07,
      "loss": 0.0078,
      "step": 2899340
    },
    {
      "epoch": 4.744866230697224,
      "grad_norm": 0.09766354411840439,
      "learning_rate": 5.136298043660181e-07,
      "loss": 0.0054,
      "step": 2899360
    },
    {
      "epoch": 4.744898961135878,
      "grad_norm": 0.22113145887851715,
      "learning_rate": 5.13563912152501e-07,
      "loss": 0.012,
      "step": 2899380
    },
    {
      "epoch": 4.74493169157453,
      "grad_norm": 0.24477346241474152,
      "learning_rate": 5.134980199389838e-07,
      "loss": 0.0108,
      "step": 2899400
    },
    {
      "epoch": 4.744964422013184,
      "grad_norm": 0.5551080703735352,
      "learning_rate": 5.134321277254667e-07,
      "loss": 0.0079,
      "step": 2899420
    },
    {
      "epoch": 4.7449971524518375,
      "grad_norm": 0.02838221751153469,
      "learning_rate": 5.133662355119496e-07,
      "loss": 0.0086,
      "step": 2899440
    },
    {
      "epoch": 4.74502988289049,
      "grad_norm": 0.389541357755661,
      "learning_rate": 5.133003432984325e-07,
      "loss": 0.011,
      "step": 2899460
    },
    {
      "epoch": 4.745062613329144,
      "grad_norm": 0.12661926448345184,
      "learning_rate": 5.132344510849153e-07,
      "loss": 0.0066,
      "step": 2899480
    },
    {
      "epoch": 4.745095343767797,
      "grad_norm": 0.23670555651187897,
      "learning_rate": 5.131685588713982e-07,
      "loss": 0.0075,
      "step": 2899500
    },
    {
      "epoch": 4.74512807420645,
      "grad_norm": 0.15853890776634216,
      "learning_rate": 5.131026666578811e-07,
      "loss": 0.0127,
      "step": 2899520
    },
    {
      "epoch": 4.745160804645104,
      "grad_norm": 0.16887825727462769,
      "learning_rate": 5.13036774444364e-07,
      "loss": 0.0109,
      "step": 2899540
    },
    {
      "epoch": 4.745193535083757,
      "grad_norm": 0.30552446842193604,
      "learning_rate": 5.129708822308468e-07,
      "loss": 0.0075,
      "step": 2899560
    },
    {
      "epoch": 4.745226265522411,
      "grad_norm": 0.410649836063385,
      "learning_rate": 5.129049900173296e-07,
      "loss": 0.0099,
      "step": 2899580
    },
    {
      "epoch": 4.745258995961064,
      "grad_norm": 0.28848639130592346,
      "learning_rate": 5.128390978038126e-07,
      "loss": 0.0132,
      "step": 2899600
    },
    {
      "epoch": 4.745291726399717,
      "grad_norm": 0.24631871283054352,
      "learning_rate": 5.127732055902954e-07,
      "loss": 0.0109,
      "step": 2899620
    },
    {
      "epoch": 4.745324456838371,
      "grad_norm": 0.1534643918275833,
      "learning_rate": 5.127073133767783e-07,
      "loss": 0.0069,
      "step": 2899640
    },
    {
      "epoch": 4.745357187277024,
      "grad_norm": 0.24404922127723694,
      "learning_rate": 5.126414211632611e-07,
      "loss": 0.0084,
      "step": 2899660
    },
    {
      "epoch": 4.745389917715677,
      "grad_norm": 0.13649781048297882,
      "learning_rate": 5.12575528949744e-07,
      "loss": 0.0079,
      "step": 2899680
    },
    {
      "epoch": 4.745422648154331,
      "grad_norm": 0.11232324689626694,
      "learning_rate": 5.125096367362269e-07,
      "loss": 0.0076,
      "step": 2899700
    },
    {
      "epoch": 4.745455378592984,
      "grad_norm": 0.34927231073379517,
      "learning_rate": 5.124437445227098e-07,
      "loss": 0.0055,
      "step": 2899720
    },
    {
      "epoch": 4.745488109031637,
      "grad_norm": 0.1835113763809204,
      "learning_rate": 5.123778523091927e-07,
      "loss": 0.0093,
      "step": 2899740
    },
    {
      "epoch": 4.745520839470291,
      "grad_norm": 0.2433110624551773,
      "learning_rate": 5.123119600956755e-07,
      "loss": 0.0073,
      "step": 2899760
    },
    {
      "epoch": 4.745553569908944,
      "grad_norm": 0.38412001729011536,
      "learning_rate": 5.122460678821584e-07,
      "loss": 0.0068,
      "step": 2899780
    },
    {
      "epoch": 4.745586300347597,
      "grad_norm": 0.20487768948078156,
      "learning_rate": 5.121801756686413e-07,
      "loss": 0.0143,
      "step": 2899800
    },
    {
      "epoch": 4.7456190307862505,
      "grad_norm": 0.45650914311408997,
      "learning_rate": 5.121142834551242e-07,
      "loss": 0.0118,
      "step": 2899820
    },
    {
      "epoch": 4.745651761224904,
      "grad_norm": 0.07159324735403061,
      "learning_rate": 5.120483912416071e-07,
      "loss": 0.0096,
      "step": 2899840
    },
    {
      "epoch": 4.745684491663558,
      "grad_norm": 0.14322079718112946,
      "learning_rate": 5.119824990280899e-07,
      "loss": 0.0067,
      "step": 2899860
    },
    {
      "epoch": 4.74571722210221,
      "grad_norm": 0.4786401689052582,
      "learning_rate": 5.119166068145728e-07,
      "loss": 0.0082,
      "step": 2899880
    },
    {
      "epoch": 4.745749952540864,
      "grad_norm": 0.34735116362571716,
      "learning_rate": 5.118507146010556e-07,
      "loss": 0.0078,
      "step": 2899900
    },
    {
      "epoch": 4.7457826829795176,
      "grad_norm": 0.08432141691446304,
      "learning_rate": 5.117848223875386e-07,
      "loss": 0.0125,
      "step": 2899920
    },
    {
      "epoch": 4.74581541341817,
      "grad_norm": 0.20033331215381622,
      "learning_rate": 5.117189301740214e-07,
      "loss": 0.0091,
      "step": 2899940
    },
    {
      "epoch": 4.745848143856824,
      "grad_norm": 0.26854705810546875,
      "learning_rate": 5.116530379605042e-07,
      "loss": 0.0058,
      "step": 2899960
    },
    {
      "epoch": 4.7458808742954774,
      "grad_norm": 0.11681495606899261,
      "learning_rate": 5.115871457469871e-07,
      "loss": 0.0073,
      "step": 2899980
    },
    {
      "epoch": 4.745913604734131,
      "grad_norm": 0.22725076973438263,
      "learning_rate": 5.1152125353347e-07,
      "loss": 0.0095,
      "step": 2900000
    },
    {
      "epoch": 4.745913604734131,
      "eval_loss": 0.005675988271832466,
      "eval_runtime": 6465.9352,
      "eval_samples_per_second": 158.965,
      "eval_steps_per_second": 15.897,
      "eval_sts-dev_pearson_cosine": 0.9871440518702194,
      "eval_sts-dev_spearman_cosine": 0.896904787967336,
      "step": 2900000
    },
    {
      "epoch": 4.745946335172784,
      "grad_norm": 0.24045556783676147,
      "learning_rate": 5.114553613199529e-07,
      "loss": 0.0095,
      "step": 2900020
    },
    {
      "epoch": 4.745979065611437,
      "grad_norm": 0.43294599652290344,
      "learning_rate": 5.113894691064357e-07,
      "loss": 0.0114,
      "step": 2900040
    },
    {
      "epoch": 4.746011796050091,
      "grad_norm": 0.22224842011928558,
      "learning_rate": 5.113235768929186e-07,
      "loss": 0.0066,
      "step": 2900060
    },
    {
      "epoch": 4.746044526488744,
      "grad_norm": 0.6444379091262817,
      "learning_rate": 5.112576846794015e-07,
      "loss": 0.011,
      "step": 2900080
    },
    {
      "epoch": 4.746077256927397,
      "grad_norm": 0.555580198764801,
      "learning_rate": 5.111917924658844e-07,
      "loss": 0.0125,
      "step": 2900100
    },
    {
      "epoch": 4.746109987366051,
      "grad_norm": 0.28333956003189087,
      "learning_rate": 5.111259002523672e-07,
      "loss": 0.0072,
      "step": 2900120
    },
    {
      "epoch": 4.7461427178047035,
      "grad_norm": 0.1504555344581604,
      "learning_rate": 5.110600080388501e-07,
      "loss": 0.0072,
      "step": 2900140
    },
    {
      "epoch": 4.746175448243357,
      "grad_norm": 0.12903180718421936,
      "learning_rate": 5.109941158253329e-07,
      "loss": 0.0067,
      "step": 2900160
    },
    {
      "epoch": 4.746208178682011,
      "grad_norm": 0.21365898847579956,
      "learning_rate": 5.109282236118159e-07,
      "loss": 0.0116,
      "step": 2900180
    },
    {
      "epoch": 4.746240909120664,
      "grad_norm": 0.3960973024368286,
      "learning_rate": 5.108623313982987e-07,
      "loss": 0.0096,
      "step": 2900200
    },
    {
      "epoch": 4.746273639559317,
      "grad_norm": 0.16105647385120392,
      "learning_rate": 5.107964391847816e-07,
      "loss": 0.0117,
      "step": 2900220
    },
    {
      "epoch": 4.746306369997971,
      "grad_norm": 0.6902644038200378,
      "learning_rate": 5.107305469712644e-07,
      "loss": 0.0088,
      "step": 2900240
    },
    {
      "epoch": 4.746339100436624,
      "grad_norm": 0.13545458018779755,
      "learning_rate": 5.106646547577474e-07,
      "loss": 0.0073,
      "step": 2900260
    },
    {
      "epoch": 4.746371830875278,
      "grad_norm": 0.10288278758525848,
      "learning_rate": 5.105987625442302e-07,
      "loss": 0.0113,
      "step": 2900280
    },
    {
      "epoch": 4.7464045613139305,
      "grad_norm": 0.24016565084457397,
      "learning_rate": 5.10532870330713e-07,
      "loss": 0.0074,
      "step": 2900300
    },
    {
      "epoch": 4.746437291752584,
      "grad_norm": 0.6696129441261292,
      "learning_rate": 5.104669781171959e-07,
      "loss": 0.0088,
      "step": 2900320
    },
    {
      "epoch": 4.746470022191238,
      "grad_norm": 0.18602831661701202,
      "learning_rate": 5.104010859036787e-07,
      "loss": 0.01,
      "step": 2900340
    },
    {
      "epoch": 4.74650275262989,
      "grad_norm": 0.14619053900241852,
      "learning_rate": 5.103351936901617e-07,
      "loss": 0.0082,
      "step": 2900360
    },
    {
      "epoch": 4.746535483068544,
      "grad_norm": 0.1584060937166214,
      "learning_rate": 5.102693014766445e-07,
      "loss": 0.01,
      "step": 2900380
    },
    {
      "epoch": 4.746568213507198,
      "grad_norm": 0.43227437138557434,
      "learning_rate": 5.102034092631275e-07,
      "loss": 0.0082,
      "step": 2900400
    },
    {
      "epoch": 4.74660094394585,
      "grad_norm": 0.24638161063194275,
      "learning_rate": 5.101375170496102e-07,
      "loss": 0.0069,
      "step": 2900420
    },
    {
      "epoch": 4.746633674384504,
      "grad_norm": 0.22949066758155823,
      "learning_rate": 5.100716248360932e-07,
      "loss": 0.0099,
      "step": 2900440
    },
    {
      "epoch": 4.7466664048231575,
      "grad_norm": 0.1807272732257843,
      "learning_rate": 5.10005732622576e-07,
      "loss": 0.0088,
      "step": 2900460
    },
    {
      "epoch": 4.746699135261811,
      "grad_norm": 0.3059768080711365,
      "learning_rate": 5.09939840409059e-07,
      "loss": 0.0122,
      "step": 2900480
    },
    {
      "epoch": 4.746731865700464,
      "grad_norm": 0.09077759832143784,
      "learning_rate": 5.098739481955418e-07,
      "loss": 0.0109,
      "step": 2900500
    },
    {
      "epoch": 4.746764596139117,
      "grad_norm": 0.22707098722457886,
      "learning_rate": 5.098080559820247e-07,
      "loss": 0.0125,
      "step": 2900520
    },
    {
      "epoch": 4.746797326577771,
      "grad_norm": 0.24026022851467133,
      "learning_rate": 5.097421637685075e-07,
      "loss": 0.0118,
      "step": 2900540
    },
    {
      "epoch": 4.746830057016425,
      "grad_norm": 0.14227750897407532,
      "learning_rate": 5.096762715549904e-07,
      "loss": 0.0117,
      "step": 2900560
    },
    {
      "epoch": 4.746862787455077,
      "grad_norm": 0.4791277050971985,
      "learning_rate": 5.096103793414733e-07,
      "loss": 0.0067,
      "step": 2900580
    },
    {
      "epoch": 4.746895517893731,
      "grad_norm": 0.292228102684021,
      "learning_rate": 5.095444871279562e-07,
      "loss": 0.0071,
      "step": 2900600
    },
    {
      "epoch": 4.7469282483323845,
      "grad_norm": 0.28141361474990845,
      "learning_rate": 5.09478594914439e-07,
      "loss": 0.01,
      "step": 2900620
    },
    {
      "epoch": 4.746960978771037,
      "grad_norm": 0.255989134311676,
      "learning_rate": 5.094127027009218e-07,
      "loss": 0.0067,
      "step": 2900640
    },
    {
      "epoch": 4.746993709209691,
      "grad_norm": 0.08022238314151764,
      "learning_rate": 5.093468104874048e-07,
      "loss": 0.009,
      "step": 2900660
    },
    {
      "epoch": 4.747026439648344,
      "grad_norm": 0.11728131771087646,
      "learning_rate": 5.092809182738876e-07,
      "loss": 0.0111,
      "step": 2900680
    },
    {
      "epoch": 4.747059170086997,
      "grad_norm": 0.306412935256958,
      "learning_rate": 5.092150260603705e-07,
      "loss": 0.0074,
      "step": 2900700
    },
    {
      "epoch": 4.747091900525651,
      "grad_norm": 0.4368477463722229,
      "learning_rate": 5.091491338468533e-07,
      "loss": 0.0088,
      "step": 2900720
    },
    {
      "epoch": 4.747124630964304,
      "grad_norm": 0.22782167792320251,
      "learning_rate": 5.090832416333363e-07,
      "loss": 0.011,
      "step": 2900740
    },
    {
      "epoch": 4.747157361402958,
      "grad_norm": 0.3943880796432495,
      "learning_rate": 5.090173494198191e-07,
      "loss": 0.0132,
      "step": 2900760
    },
    {
      "epoch": 4.747190091841611,
      "grad_norm": 0.3552427589893341,
      "learning_rate": 5.08951457206302e-07,
      "loss": 0.0144,
      "step": 2900780
    },
    {
      "epoch": 4.747222822280264,
      "grad_norm": 0.20271967351436615,
      "learning_rate": 5.088855649927848e-07,
      "loss": 0.0071,
      "step": 2900800
    },
    {
      "epoch": 4.747255552718918,
      "grad_norm": 0.04812243580818176,
      "learning_rate": 5.088196727792677e-07,
      "loss": 0.007,
      "step": 2900820
    },
    {
      "epoch": 4.747288283157571,
      "grad_norm": 0.3300231695175171,
      "learning_rate": 5.087537805657506e-07,
      "loss": 0.0167,
      "step": 2900840
    },
    {
      "epoch": 4.747321013596224,
      "grad_norm": 0.18348170816898346,
      "learning_rate": 5.086878883522335e-07,
      "loss": 0.0112,
      "step": 2900860
    },
    {
      "epoch": 4.747353744034878,
      "grad_norm": 0.19573278725147247,
      "learning_rate": 5.086219961387163e-07,
      "loss": 0.0068,
      "step": 2900880
    },
    {
      "epoch": 4.747386474473531,
      "grad_norm": 0.16543947160243988,
      "learning_rate": 5.085561039251992e-07,
      "loss": 0.0061,
      "step": 2900900
    },
    {
      "epoch": 4.747419204912184,
      "grad_norm": 0.08738645166158676,
      "learning_rate": 5.084902117116821e-07,
      "loss": 0.0089,
      "step": 2900920
    },
    {
      "epoch": 4.7474519353508375,
      "grad_norm": 0.18144823610782623,
      "learning_rate": 5.08424319498165e-07,
      "loss": 0.0077,
      "step": 2900940
    },
    {
      "epoch": 4.747484665789491,
      "grad_norm": 0.22901424765586853,
      "learning_rate": 5.083584272846478e-07,
      "loss": 0.009,
      "step": 2900960
    },
    {
      "epoch": 4.747517396228144,
      "grad_norm": 0.19329409301280975,
      "learning_rate": 5.082925350711306e-07,
      "loss": 0.0095,
      "step": 2900980
    },
    {
      "epoch": 4.747550126666797,
      "grad_norm": 0.6869046092033386,
      "learning_rate": 5.082266428576135e-07,
      "loss": 0.0093,
      "step": 2901000
    },
    {
      "epoch": 4.747582857105451,
      "grad_norm": 0.1145821213722229,
      "learning_rate": 5.081607506440964e-07,
      "loss": 0.0086,
      "step": 2901020
    },
    {
      "epoch": 4.747615587544105,
      "grad_norm": 0.23917168378829956,
      "learning_rate": 5.080948584305793e-07,
      "loss": 0.0081,
      "step": 2901040
    },
    {
      "epoch": 4.747648317982757,
      "grad_norm": 0.14869675040245056,
      "learning_rate": 5.080289662170622e-07,
      "loss": 0.0081,
      "step": 2901060
    },
    {
      "epoch": 4.747681048421411,
      "grad_norm": 0.3568425476551056,
      "learning_rate": 5.07963074003545e-07,
      "loss": 0.0073,
      "step": 2901080
    },
    {
      "epoch": 4.7477137788600645,
      "grad_norm": 0.193755105137825,
      "learning_rate": 5.078971817900279e-07,
      "loss": 0.0118,
      "step": 2901100
    },
    {
      "epoch": 4.747746509298718,
      "grad_norm": 0.2406352460384369,
      "learning_rate": 5.078312895765108e-07,
      "loss": 0.0164,
      "step": 2901120
    },
    {
      "epoch": 4.747779239737371,
      "grad_norm": 0.11070297658443451,
      "learning_rate": 5.077653973629937e-07,
      "loss": 0.0096,
      "step": 2901140
    },
    {
      "epoch": 4.747811970176024,
      "grad_norm": 0.17708078026771545,
      "learning_rate": 5.076995051494766e-07,
      "loss": 0.0107,
      "step": 2901160
    },
    {
      "epoch": 4.747844700614678,
      "grad_norm": 0.1308024823665619,
      "learning_rate": 5.076336129359594e-07,
      "loss": 0.009,
      "step": 2901180
    },
    {
      "epoch": 4.747877431053331,
      "grad_norm": 0.11561889201402664,
      "learning_rate": 5.075677207224423e-07,
      "loss": 0.0055,
      "step": 2901200
    },
    {
      "epoch": 4.747910161491984,
      "grad_norm": 0.23567664623260498,
      "learning_rate": 5.075018285089251e-07,
      "loss": 0.0094,
      "step": 2901220
    },
    {
      "epoch": 4.747942891930638,
      "grad_norm": 0.10528496652841568,
      "learning_rate": 5.074359362954081e-07,
      "loss": 0.0078,
      "step": 2901240
    },
    {
      "epoch": 4.747975622369291,
      "grad_norm": 0.18316590785980225,
      "learning_rate": 5.073700440818909e-07,
      "loss": 0.0115,
      "step": 2901260
    },
    {
      "epoch": 4.748008352807944,
      "grad_norm": 0.33840519189834595,
      "learning_rate": 5.073041518683737e-07,
      "loss": 0.0145,
      "step": 2901280
    },
    {
      "epoch": 4.748041083246598,
      "grad_norm": 0.3066152334213257,
      "learning_rate": 5.072382596548566e-07,
      "loss": 0.0112,
      "step": 2901300
    },
    {
      "epoch": 4.748073813685251,
      "grad_norm": 0.03477469086647034,
      "learning_rate": 5.071723674413395e-07,
      "loss": 0.0073,
      "step": 2901320
    },
    {
      "epoch": 4.748106544123904,
      "grad_norm": 0.1622181385755539,
      "learning_rate": 5.071064752278224e-07,
      "loss": 0.014,
      "step": 2901340
    },
    {
      "epoch": 4.748139274562558,
      "grad_norm": 0.39754584431648254,
      "learning_rate": 5.070405830143052e-07,
      "loss": 0.0073,
      "step": 2901360
    },
    {
      "epoch": 4.748172005001211,
      "grad_norm": 0.38923802971839905,
      "learning_rate": 5.069746908007881e-07,
      "loss": 0.0096,
      "step": 2901380
    },
    {
      "epoch": 4.748204735439864,
      "grad_norm": 0.21336998045444489,
      "learning_rate": 5.06908798587271e-07,
      "loss": 0.0093,
      "step": 2901400
    },
    {
      "epoch": 4.748237465878518,
      "grad_norm": 0.22045868635177612,
      "learning_rate": 5.068429063737539e-07,
      "loss": 0.0056,
      "step": 2901420
    },
    {
      "epoch": 4.748270196317171,
      "grad_norm": 0.14484359323978424,
      "learning_rate": 5.067770141602367e-07,
      "loss": 0.0075,
      "step": 2901440
    },
    {
      "epoch": 4.748302926755825,
      "grad_norm": 0.3658052384853363,
      "learning_rate": 5.067111219467196e-07,
      "loss": 0.0078,
      "step": 2901460
    },
    {
      "epoch": 4.7483356571944775,
      "grad_norm": 0.253810852766037,
      "learning_rate": 5.066452297332024e-07,
      "loss": 0.0094,
      "step": 2901480
    },
    {
      "epoch": 4.748368387633131,
      "grad_norm": 0.23627151548862457,
      "learning_rate": 5.065793375196854e-07,
      "loss": 0.0132,
      "step": 2901500
    },
    {
      "epoch": 4.748401118071785,
      "grad_norm": 0.4741074740886688,
      "learning_rate": 5.065134453061682e-07,
      "loss": 0.0072,
      "step": 2901520
    },
    {
      "epoch": 4.748433848510437,
      "grad_norm": 0.19911731779575348,
      "learning_rate": 5.064475530926511e-07,
      "loss": 0.011,
      "step": 2901540
    },
    {
      "epoch": 4.748466578949091,
      "grad_norm": 0.06168742477893829,
      "learning_rate": 5.063816608791339e-07,
      "loss": 0.0089,
      "step": 2901560
    },
    {
      "epoch": 4.748499309387745,
      "grad_norm": 0.09212896227836609,
      "learning_rate": 5.063157686656169e-07,
      "loss": 0.007,
      "step": 2901580
    },
    {
      "epoch": 4.748532039826397,
      "grad_norm": 0.4040491580963135,
      "learning_rate": 5.062498764520997e-07,
      "loss": 0.0094,
      "step": 2901600
    },
    {
      "epoch": 4.748564770265051,
      "grad_norm": 0.4276021718978882,
      "learning_rate": 5.061839842385825e-07,
      "loss": 0.0069,
      "step": 2901620
    },
    {
      "epoch": 4.7485975007037045,
      "grad_norm": 0.11216387152671814,
      "learning_rate": 5.061180920250654e-07,
      "loss": 0.0068,
      "step": 2901640
    },
    {
      "epoch": 4.748630231142358,
      "grad_norm": 0.08509171009063721,
      "learning_rate": 5.060521998115482e-07,
      "loss": 0.0118,
      "step": 2901660
    },
    {
      "epoch": 4.748662961581011,
      "grad_norm": 0.26537802815437317,
      "learning_rate": 5.059863075980312e-07,
      "loss": 0.0105,
      "step": 2901680
    },
    {
      "epoch": 4.748695692019664,
      "grad_norm": 0.22774934768676758,
      "learning_rate": 5.05920415384514e-07,
      "loss": 0.0142,
      "step": 2901700
    },
    {
      "epoch": 4.748728422458318,
      "grad_norm": 0.41495341062545776,
      "learning_rate": 5.05854523170997e-07,
      "loss": 0.0109,
      "step": 2901720
    },
    {
      "epoch": 4.7487611528969715,
      "grad_norm": 0.42762842774391174,
      "learning_rate": 5.057886309574797e-07,
      "loss": 0.0086,
      "step": 2901740
    },
    {
      "epoch": 4.748793883335624,
      "grad_norm": 0.6619755029678345,
      "learning_rate": 5.057227387439627e-07,
      "loss": 0.008,
      "step": 2901760
    },
    {
      "epoch": 4.748826613774278,
      "grad_norm": 0.284084290266037,
      "learning_rate": 5.056568465304455e-07,
      "loss": 0.0114,
      "step": 2901780
    },
    {
      "epoch": 4.748859344212931,
      "grad_norm": 0.4565339684486389,
      "learning_rate": 5.055909543169285e-07,
      "loss": 0.0162,
      "step": 2901800
    },
    {
      "epoch": 4.748892074651584,
      "grad_norm": 0.15943081676959991,
      "learning_rate": 5.055250621034113e-07,
      "loss": 0.0102,
      "step": 2901820
    },
    {
      "epoch": 4.748924805090238,
      "grad_norm": 0.2436670958995819,
      "learning_rate": 5.054591698898942e-07,
      "loss": 0.0132,
      "step": 2901840
    },
    {
      "epoch": 4.748957535528891,
      "grad_norm": 0.34652528166770935,
      "learning_rate": 5.05393277676377e-07,
      "loss": 0.011,
      "step": 2901860
    },
    {
      "epoch": 4.748990265967544,
      "grad_norm": 0.1652042716741562,
      "learning_rate": 5.053273854628599e-07,
      "loss": 0.0098,
      "step": 2901880
    },
    {
      "epoch": 4.749022996406198,
      "grad_norm": 0.1079360768198967,
      "learning_rate": 5.052614932493428e-07,
      "loss": 0.0093,
      "step": 2901900
    },
    {
      "epoch": 4.749055726844851,
      "grad_norm": 0.08533395081758499,
      "learning_rate": 5.051956010358257e-07,
      "loss": 0.0118,
      "step": 2901920
    },
    {
      "epoch": 4.749088457283505,
      "grad_norm": 0.12852239608764648,
      "learning_rate": 5.051297088223085e-07,
      "loss": 0.0075,
      "step": 2901940
    },
    {
      "epoch": 4.7491211877221575,
      "grad_norm": 0.1740310788154602,
      "learning_rate": 5.050638166087913e-07,
      "loss": 0.0112,
      "step": 2901960
    },
    {
      "epoch": 4.749153918160811,
      "grad_norm": 0.2386401891708374,
      "learning_rate": 5.049979243952743e-07,
      "loss": 0.0084,
      "step": 2901980
    },
    {
      "epoch": 4.749186648599465,
      "grad_norm": 0.31021395325660706,
      "learning_rate": 5.049320321817571e-07,
      "loss": 0.0084,
      "step": 2902000
    },
    {
      "epoch": 4.749219379038118,
      "grad_norm": 0.13776370882987976,
      "learning_rate": 5.0486613996824e-07,
      "loss": 0.006,
      "step": 2902020
    },
    {
      "epoch": 4.749252109476771,
      "grad_norm": 0.16497324407100677,
      "learning_rate": 5.048002477547228e-07,
      "loss": 0.0097,
      "step": 2902040
    },
    {
      "epoch": 4.749284839915425,
      "grad_norm": 0.1572277843952179,
      "learning_rate": 5.047343555412058e-07,
      "loss": 0.0073,
      "step": 2902060
    },
    {
      "epoch": 4.749317570354078,
      "grad_norm": 0.31659963726997375,
      "learning_rate": 5.046684633276886e-07,
      "loss": 0.0108,
      "step": 2902080
    },
    {
      "epoch": 4.749350300792731,
      "grad_norm": 0.1288912445306778,
      "learning_rate": 5.046025711141715e-07,
      "loss": 0.0108,
      "step": 2902100
    },
    {
      "epoch": 4.7493830312313845,
      "grad_norm": 0.6764057874679565,
      "learning_rate": 5.045366789006543e-07,
      "loss": 0.0133,
      "step": 2902120
    },
    {
      "epoch": 4.749415761670038,
      "grad_norm": 0.2134820520877838,
      "learning_rate": 5.044707866871372e-07,
      "loss": 0.009,
      "step": 2902140
    },
    {
      "epoch": 4.749448492108691,
      "grad_norm": 0.41764435172080994,
      "learning_rate": 5.044048944736201e-07,
      "loss": 0.011,
      "step": 2902160
    },
    {
      "epoch": 4.749481222547344,
      "grad_norm": 0.13584184646606445,
      "learning_rate": 5.04339002260103e-07,
      "loss": 0.0074,
      "step": 2902180
    },
    {
      "epoch": 4.749513952985998,
      "grad_norm": 0.2638784348964691,
      "learning_rate": 5.042731100465858e-07,
      "loss": 0.0087,
      "step": 2902200
    },
    {
      "epoch": 4.749546683424652,
      "grad_norm": 0.23204465210437775,
      "learning_rate": 5.042072178330687e-07,
      "loss": 0.0128,
      "step": 2902220
    },
    {
      "epoch": 4.749579413863304,
      "grad_norm": 0.13387753069400787,
      "learning_rate": 5.041413256195516e-07,
      "loss": 0.0071,
      "step": 2902240
    },
    {
      "epoch": 4.749612144301958,
      "grad_norm": 0.9102874398231506,
      "learning_rate": 5.040754334060345e-07,
      "loss": 0.0066,
      "step": 2902260
    },
    {
      "epoch": 4.7496448747406115,
      "grad_norm": 0.3961056172847748,
      "learning_rate": 5.040095411925173e-07,
      "loss": 0.0126,
      "step": 2902280
    },
    {
      "epoch": 4.749677605179265,
      "grad_norm": 0.10236209630966187,
      "learning_rate": 5.039436489790001e-07,
      "loss": 0.0078,
      "step": 2902300
    },
    {
      "epoch": 4.749710335617918,
      "grad_norm": 0.18886423110961914,
      "learning_rate": 5.03877756765483e-07,
      "loss": 0.0073,
      "step": 2902320
    },
    {
      "epoch": 4.749743066056571,
      "grad_norm": 0.11596758663654327,
      "learning_rate": 5.038118645519659e-07,
      "loss": 0.0119,
      "step": 2902340
    },
    {
      "epoch": 4.749775796495225,
      "grad_norm": 0.11111018061637878,
      "learning_rate": 5.037459723384488e-07,
      "loss": 0.0102,
      "step": 2902360
    },
    {
      "epoch": 4.749808526933878,
      "grad_norm": 0.25523534417152405,
      "learning_rate": 5.036800801249317e-07,
      "loss": 0.0082,
      "step": 2902380
    },
    {
      "epoch": 4.749841257372531,
      "grad_norm": 0.16311906278133392,
      "learning_rate": 5.036141879114145e-07,
      "loss": 0.011,
      "step": 2902400
    },
    {
      "epoch": 4.749873987811185,
      "grad_norm": 0.0845714882016182,
      "learning_rate": 5.035482956978974e-07,
      "loss": 0.0094,
      "step": 2902420
    },
    {
      "epoch": 4.749906718249838,
      "grad_norm": 0.1347236931324005,
      "learning_rate": 5.034824034843803e-07,
      "loss": 0.0103,
      "step": 2902440
    },
    {
      "epoch": 4.749939448688491,
      "grad_norm": 0.23663412034511566,
      "learning_rate": 5.034165112708632e-07,
      "loss": 0.0113,
      "step": 2902460
    },
    {
      "epoch": 4.749972179127145,
      "grad_norm": 0.32654544711112976,
      "learning_rate": 5.033506190573461e-07,
      "loss": 0.0095,
      "step": 2902480
    },
    {
      "epoch": 4.750004909565798,
      "grad_norm": 0.15237441658973694,
      "learning_rate": 5.032847268438289e-07,
      "loss": 0.0092,
      "step": 2902500
    },
    {
      "epoch": 4.750037640004451,
      "grad_norm": 0.19383683800697327,
      "learning_rate": 5.032188346303118e-07,
      "loss": 0.0101,
      "step": 2902520
    },
    {
      "epoch": 4.750070370443105,
      "grad_norm": 0.4269125163555145,
      "learning_rate": 5.031529424167946e-07,
      "loss": 0.0108,
      "step": 2902540
    },
    {
      "epoch": 4.750103100881758,
      "grad_norm": 0.06545543670654297,
      "learning_rate": 5.030870502032776e-07,
      "loss": 0.0121,
      "step": 2902560
    },
    {
      "epoch": 4.750135831320412,
      "grad_norm": 0.0715719535946846,
      "learning_rate": 5.030211579897604e-07,
      "loss": 0.007,
      "step": 2902580
    },
    {
      "epoch": 4.7501685617590645,
      "grad_norm": 0.30648794770240784,
      "learning_rate": 5.029552657762433e-07,
      "loss": 0.007,
      "step": 2902600
    },
    {
      "epoch": 4.750201292197718,
      "grad_norm": 0.07149756699800491,
      "learning_rate": 5.028893735627261e-07,
      "loss": 0.0063,
      "step": 2902620
    },
    {
      "epoch": 4.750234022636372,
      "grad_norm": 0.12585477530956268,
      "learning_rate": 5.02823481349209e-07,
      "loss": 0.0076,
      "step": 2902640
    },
    {
      "epoch": 4.750266753075024,
      "grad_norm": 0.5074957609176636,
      "learning_rate": 5.027575891356919e-07,
      "loss": 0.009,
      "step": 2902660
    },
    {
      "epoch": 4.750299483513678,
      "grad_norm": 0.23903316259384155,
      "learning_rate": 5.026916969221747e-07,
      "loss": 0.0056,
      "step": 2902680
    },
    {
      "epoch": 4.750332213952332,
      "grad_norm": 0.34810733795166016,
      "learning_rate": 5.026258047086576e-07,
      "loss": 0.0099,
      "step": 2902700
    },
    {
      "epoch": 4.750364944390984,
      "grad_norm": 0.6141855716705322,
      "learning_rate": 5.025599124951405e-07,
      "loss": 0.0125,
      "step": 2902720
    },
    {
      "epoch": 4.750397674829638,
      "grad_norm": 0.5003551244735718,
      "learning_rate": 5.024940202816234e-07,
      "loss": 0.0099,
      "step": 2902740
    },
    {
      "epoch": 4.7504304052682915,
      "grad_norm": 0.3145962059497833,
      "learning_rate": 5.024281280681062e-07,
      "loss": 0.009,
      "step": 2902760
    },
    {
      "epoch": 4.750463135706945,
      "grad_norm": 0.17335301637649536,
      "learning_rate": 5.023622358545891e-07,
      "loss": 0.0086,
      "step": 2902780
    },
    {
      "epoch": 4.750495866145598,
      "grad_norm": 0.13776405155658722,
      "learning_rate": 5.022963436410719e-07,
      "loss": 0.0101,
      "step": 2902800
    },
    {
      "epoch": 4.750528596584251,
      "grad_norm": 0.18514959514141083,
      "learning_rate": 5.022304514275549e-07,
      "loss": 0.0106,
      "step": 2902820
    },
    {
      "epoch": 4.750561327022905,
      "grad_norm": 0.25173482298851013,
      "learning_rate": 5.021645592140377e-07,
      "loss": 0.012,
      "step": 2902840
    },
    {
      "epoch": 4.750594057461558,
      "grad_norm": 0.0939604640007019,
      "learning_rate": 5.020986670005206e-07,
      "loss": 0.0107,
      "step": 2902860
    },
    {
      "epoch": 4.750626787900211,
      "grad_norm": 0.3335212469100952,
      "learning_rate": 5.020327747870034e-07,
      "loss": 0.0123,
      "step": 2902880
    },
    {
      "epoch": 4.750659518338865,
      "grad_norm": 0.27583086490631104,
      "learning_rate": 5.019668825734864e-07,
      "loss": 0.0089,
      "step": 2902900
    },
    {
      "epoch": 4.7506922487775185,
      "grad_norm": 0.21579298377037048,
      "learning_rate": 5.019009903599692e-07,
      "loss": 0.0102,
      "step": 2902920
    },
    {
      "epoch": 4.750724979216171,
      "grad_norm": 0.2734173536300659,
      "learning_rate": 5.018350981464521e-07,
      "loss": 0.0175,
      "step": 2902940
    },
    {
      "epoch": 4.750757709654825,
      "grad_norm": 0.10789225250482559,
      "learning_rate": 5.017692059329349e-07,
      "loss": 0.0153,
      "step": 2902960
    },
    {
      "epoch": 4.750790440093478,
      "grad_norm": 0.4351055920124054,
      "learning_rate": 5.017033137194177e-07,
      "loss": 0.0084,
      "step": 2902980
    },
    {
      "epoch": 4.750823170532131,
      "grad_norm": 0.2161707580089569,
      "learning_rate": 5.016374215059007e-07,
      "loss": 0.0117,
      "step": 2903000
    },
    {
      "epoch": 4.750855900970785,
      "grad_norm": 0.14389953017234802,
      "learning_rate": 5.015715292923835e-07,
      "loss": 0.0084,
      "step": 2903020
    },
    {
      "epoch": 4.750888631409438,
      "grad_norm": 0.21333669126033783,
      "learning_rate": 5.015056370788665e-07,
      "loss": 0.0098,
      "step": 2903040
    },
    {
      "epoch": 4.750921361848091,
      "grad_norm": 0.12306956201791763,
      "learning_rate": 5.014397448653492e-07,
      "loss": 0.0099,
      "step": 2903060
    },
    {
      "epoch": 4.750954092286745,
      "grad_norm": 0.4070049524307251,
      "learning_rate": 5.013738526518322e-07,
      "loss": 0.0056,
      "step": 2903080
    },
    {
      "epoch": 4.750986822725398,
      "grad_norm": 0.08375727385282516,
      "learning_rate": 5.01307960438315e-07,
      "loss": 0.0077,
      "step": 2903100
    },
    {
      "epoch": 4.751019553164052,
      "grad_norm": 0.12074797600507736,
      "learning_rate": 5.01242068224798e-07,
      "loss": 0.0085,
      "step": 2903120
    },
    {
      "epoch": 4.7510522836027045,
      "grad_norm": 0.2600650191307068,
      "learning_rate": 5.011761760112808e-07,
      "loss": 0.0097,
      "step": 2903140
    },
    {
      "epoch": 4.751085014041358,
      "grad_norm": 0.20417745411396027,
      "learning_rate": 5.011102837977637e-07,
      "loss": 0.0071,
      "step": 2903160
    },
    {
      "epoch": 4.751117744480012,
      "grad_norm": 0.1453264057636261,
      "learning_rate": 5.010443915842465e-07,
      "loss": 0.0131,
      "step": 2903180
    },
    {
      "epoch": 4.751150474918665,
      "grad_norm": 0.30171892046928406,
      "learning_rate": 5.009784993707294e-07,
      "loss": 0.0069,
      "step": 2903200
    },
    {
      "epoch": 4.751183205357318,
      "grad_norm": 0.1563800424337387,
      "learning_rate": 5.009126071572123e-07,
      "loss": 0.0097,
      "step": 2903220
    },
    {
      "epoch": 4.751215935795972,
      "grad_norm": 0.08497898280620575,
      "learning_rate": 5.008467149436952e-07,
      "loss": 0.008,
      "step": 2903240
    },
    {
      "epoch": 4.751248666234625,
      "grad_norm": 0.21634316444396973,
      "learning_rate": 5.00780822730178e-07,
      "loss": 0.0093,
      "step": 2903260
    },
    {
      "epoch": 4.751281396673278,
      "grad_norm": 0.27720898389816284,
      "learning_rate": 5.007149305166609e-07,
      "loss": 0.0102,
      "step": 2903280
    },
    {
      "epoch": 4.7513141271119315,
      "grad_norm": 0.11648198217153549,
      "learning_rate": 5.006490383031438e-07,
      "loss": 0.0077,
      "step": 2903300
    },
    {
      "epoch": 4.751346857550585,
      "grad_norm": 0.14553554356098175,
      "learning_rate": 5.005831460896267e-07,
      "loss": 0.0056,
      "step": 2903320
    },
    {
      "epoch": 4.751379587989238,
      "grad_norm": 0.19290250539779663,
      "learning_rate": 5.005172538761095e-07,
      "loss": 0.0086,
      "step": 2903340
    },
    {
      "epoch": 4.751412318427891,
      "grad_norm": 0.2493903487920761,
      "learning_rate": 5.004513616625923e-07,
      "loss": 0.0088,
      "step": 2903360
    },
    {
      "epoch": 4.751445048866545,
      "grad_norm": 0.06041808798909187,
      "learning_rate": 5.003854694490753e-07,
      "loss": 0.0092,
      "step": 2903380
    },
    {
      "epoch": 4.7514777793051985,
      "grad_norm": 0.1261676698923111,
      "learning_rate": 5.003195772355581e-07,
      "loss": 0.0074,
      "step": 2903400
    },
    {
      "epoch": 4.751510509743851,
      "grad_norm": 0.055116020143032074,
      "learning_rate": 5.00253685022041e-07,
      "loss": 0.0082,
      "step": 2903420
    },
    {
      "epoch": 4.751543240182505,
      "grad_norm": 0.17240765690803528,
      "learning_rate": 5.001877928085238e-07,
      "loss": 0.0079,
      "step": 2903440
    },
    {
      "epoch": 4.751575970621158,
      "grad_norm": 0.08359862118959427,
      "learning_rate": 5.001219005950067e-07,
      "loss": 0.0122,
      "step": 2903460
    },
    {
      "epoch": 4.751608701059812,
      "grad_norm": 0.42704373598098755,
      "learning_rate": 5.000560083814896e-07,
      "loss": 0.0103,
      "step": 2903480
    },
    {
      "epoch": 4.751641431498465,
      "grad_norm": 0.3451541066169739,
      "learning_rate": 4.999901161679725e-07,
      "loss": 0.0095,
      "step": 2903500
    },
    {
      "epoch": 4.751674161937118,
      "grad_norm": 0.13103421032428741,
      "learning_rate": 4.999242239544553e-07,
      "loss": 0.0054,
      "step": 2903520
    },
    {
      "epoch": 4.751706892375772,
      "grad_norm": 0.12305992841720581,
      "learning_rate": 4.998583317409382e-07,
      "loss": 0.0065,
      "step": 2903540
    },
    {
      "epoch": 4.751739622814425,
      "grad_norm": 0.21992868185043335,
      "learning_rate": 4.997924395274211e-07,
      "loss": 0.0073,
      "step": 2903560
    },
    {
      "epoch": 4.751772353253078,
      "grad_norm": 0.2337794005870819,
      "learning_rate": 4.99726547313904e-07,
      "loss": 0.0096,
      "step": 2903580
    },
    {
      "epoch": 4.751805083691732,
      "grad_norm": 0.2498699128627777,
      "learning_rate": 4.996606551003868e-07,
      "loss": 0.012,
      "step": 2903600
    },
    {
      "epoch": 4.7518378141303845,
      "grad_norm": 0.3542528450489044,
      "learning_rate": 4.995947628868697e-07,
      "loss": 0.0096,
      "step": 2903620
    },
    {
      "epoch": 4.751870544569038,
      "grad_norm": 0.1411762237548828,
      "learning_rate": 4.995288706733525e-07,
      "loss": 0.007,
      "step": 2903640
    },
    {
      "epoch": 4.751903275007692,
      "grad_norm": 0.11089984327554703,
      "learning_rate": 4.994629784598354e-07,
      "loss": 0.0068,
      "step": 2903660
    },
    {
      "epoch": 4.751936005446345,
      "grad_norm": 0.3748339116573334,
      "learning_rate": 4.993970862463183e-07,
      "loss": 0.0099,
      "step": 2903680
    },
    {
      "epoch": 4.751968735884998,
      "grad_norm": 0.2584686577320099,
      "learning_rate": 4.993311940328012e-07,
      "loss": 0.0072,
      "step": 2903700
    },
    {
      "epoch": 4.752001466323652,
      "grad_norm": 0.22095192968845367,
      "learning_rate": 4.99265301819284e-07,
      "loss": 0.0069,
      "step": 2903720
    },
    {
      "epoch": 4.752034196762305,
      "grad_norm": 0.09113840758800507,
      "learning_rate": 4.991994096057669e-07,
      "loss": 0.0072,
      "step": 2903740
    },
    {
      "epoch": 4.752066927200959,
      "grad_norm": 0.20978513360023499,
      "learning_rate": 4.991335173922498e-07,
      "loss": 0.0079,
      "step": 2903760
    },
    {
      "epoch": 4.7520996576396115,
      "grad_norm": 0.19784122705459595,
      "learning_rate": 4.990676251787327e-07,
      "loss": 0.0089,
      "step": 2903780
    },
    {
      "epoch": 4.752132388078265,
      "grad_norm": 0.19757790863513947,
      "learning_rate": 4.990017329652156e-07,
      "loss": 0.0129,
      "step": 2903800
    },
    {
      "epoch": 4.752165118516919,
      "grad_norm": 0.08572860807180405,
      "learning_rate": 4.989358407516984e-07,
      "loss": 0.0079,
      "step": 2903820
    },
    {
      "epoch": 4.752197848955571,
      "grad_norm": 0.30302953720092773,
      "learning_rate": 4.988699485381813e-07,
      "loss": 0.0126,
      "step": 2903840
    },
    {
      "epoch": 4.752230579394225,
      "grad_norm": 0.2554236352443695,
      "learning_rate": 4.988040563246641e-07,
      "loss": 0.0119,
      "step": 2903860
    },
    {
      "epoch": 4.752263309832879,
      "grad_norm": 0.26318779587745667,
      "learning_rate": 4.987381641111471e-07,
      "loss": 0.0068,
      "step": 2903880
    },
    {
      "epoch": 4.752296040271531,
      "grad_norm": 0.08452843129634857,
      "learning_rate": 4.986722718976299e-07,
      "loss": 0.0098,
      "step": 2903900
    },
    {
      "epoch": 4.752328770710185,
      "grad_norm": 0.40227261185646057,
      "learning_rate": 4.986063796841128e-07,
      "loss": 0.0146,
      "step": 2903920
    },
    {
      "epoch": 4.7523615011488385,
      "grad_norm": 0.19826972484588623,
      "learning_rate": 4.985404874705956e-07,
      "loss": 0.0149,
      "step": 2903940
    },
    {
      "epoch": 4.752394231587492,
      "grad_norm": 0.22522200644016266,
      "learning_rate": 4.984745952570786e-07,
      "loss": 0.0105,
      "step": 2903960
    },
    {
      "epoch": 4.752426962026145,
      "grad_norm": 0.2013653814792633,
      "learning_rate": 4.984087030435614e-07,
      "loss": 0.0078,
      "step": 2903980
    },
    {
      "epoch": 4.752459692464798,
      "grad_norm": 0.25872668623924255,
      "learning_rate": 4.983428108300442e-07,
      "loss": 0.0107,
      "step": 2904000
    },
    {
      "epoch": 4.752492422903452,
      "grad_norm": 0.1958310902118683,
      "learning_rate": 4.982769186165271e-07,
      "loss": 0.0071,
      "step": 2904020
    },
    {
      "epoch": 4.752525153342106,
      "grad_norm": 0.1252102553844452,
      "learning_rate": 4.9821102640301e-07,
      "loss": 0.0088,
      "step": 2904040
    },
    {
      "epoch": 4.752557883780758,
      "grad_norm": 0.8004398345947266,
      "learning_rate": 4.981451341894929e-07,
      "loss": 0.0114,
      "step": 2904060
    },
    {
      "epoch": 4.752590614219412,
      "grad_norm": 0.14470766484737396,
      "learning_rate": 4.980792419759757e-07,
      "loss": 0.007,
      "step": 2904080
    },
    {
      "epoch": 4.7526233446580655,
      "grad_norm": 0.2231057584285736,
      "learning_rate": 4.980133497624586e-07,
      "loss": 0.0087,
      "step": 2904100
    },
    {
      "epoch": 4.752656075096718,
      "grad_norm": 0.18814396858215332,
      "learning_rate": 4.979474575489414e-07,
      "loss": 0.0052,
      "step": 2904120
    },
    {
      "epoch": 4.752688805535372,
      "grad_norm": 0.19266697764396667,
      "learning_rate": 4.978815653354244e-07,
      "loss": 0.008,
      "step": 2904140
    },
    {
      "epoch": 4.752721535974025,
      "grad_norm": 0.28233903646469116,
      "learning_rate": 4.978156731219072e-07,
      "loss": 0.0113,
      "step": 2904160
    },
    {
      "epoch": 4.752754266412678,
      "grad_norm": 0.5057500600814819,
      "learning_rate": 4.977497809083901e-07,
      "loss": 0.0132,
      "step": 2904180
    },
    {
      "epoch": 4.752786996851332,
      "grad_norm": 0.08929824084043503,
      "learning_rate": 4.976838886948729e-07,
      "loss": 0.0074,
      "step": 2904200
    },
    {
      "epoch": 4.752819727289985,
      "grad_norm": 0.6454092264175415,
      "learning_rate": 4.976179964813559e-07,
      "loss": 0.0117,
      "step": 2904220
    },
    {
      "epoch": 4.752852457728639,
      "grad_norm": 0.06526128947734833,
      "learning_rate": 4.975521042678387e-07,
      "loss": 0.0081,
      "step": 2904240
    },
    {
      "epoch": 4.7528851881672916,
      "grad_norm": 0.1641065627336502,
      "learning_rate": 4.974862120543217e-07,
      "loss": 0.0052,
      "step": 2904260
    },
    {
      "epoch": 4.752917918605945,
      "grad_norm": 0.27446162700653076,
      "learning_rate": 4.974203198408044e-07,
      "loss": 0.0073,
      "step": 2904280
    },
    {
      "epoch": 4.752950649044599,
      "grad_norm": 0.4440431594848633,
      "learning_rate": 4.973544276272873e-07,
      "loss": 0.0065,
      "step": 2904300
    },
    {
      "epoch": 4.7529833794832514,
      "grad_norm": 0.13842910528182983,
      "learning_rate": 4.972885354137702e-07,
      "loss": 0.0076,
      "step": 2904320
    },
    {
      "epoch": 4.753016109921905,
      "grad_norm": 0.23419836163520813,
      "learning_rate": 4.97222643200253e-07,
      "loss": 0.0096,
      "step": 2904340
    },
    {
      "epoch": 4.753048840360559,
      "grad_norm": 0.09369773417711258,
      "learning_rate": 4.97156750986736e-07,
      "loss": 0.0079,
      "step": 2904360
    },
    {
      "epoch": 4.753081570799212,
      "grad_norm": 0.08783557265996933,
      "learning_rate": 4.970908587732187e-07,
      "loss": 0.0081,
      "step": 2904380
    },
    {
      "epoch": 4.753114301237865,
      "grad_norm": 0.22179967164993286,
      "learning_rate": 4.970249665597017e-07,
      "loss": 0.0067,
      "step": 2904400
    },
    {
      "epoch": 4.7531470316765185,
      "grad_norm": 0.5272637605667114,
      "learning_rate": 4.969590743461845e-07,
      "loss": 0.0092,
      "step": 2904420
    },
    {
      "epoch": 4.753179762115172,
      "grad_norm": 0.40808436274528503,
      "learning_rate": 4.968931821326675e-07,
      "loss": 0.0075,
      "step": 2904440
    },
    {
      "epoch": 4.753212492553825,
      "grad_norm": 0.3358314037322998,
      "learning_rate": 4.968272899191503e-07,
      "loss": 0.0122,
      "step": 2904460
    },
    {
      "epoch": 4.753245222992478,
      "grad_norm": 0.3734337389469147,
      "learning_rate": 4.967613977056332e-07,
      "loss": 0.0089,
      "step": 2904480
    },
    {
      "epoch": 4.753277953431132,
      "grad_norm": 0.2932779788970947,
      "learning_rate": 4.96695505492116e-07,
      "loss": 0.0137,
      "step": 2904500
    },
    {
      "epoch": 4.753310683869785,
      "grad_norm": 0.2252805083990097,
      "learning_rate": 4.966296132785989e-07,
      "loss": 0.0098,
      "step": 2904520
    },
    {
      "epoch": 4.753343414308438,
      "grad_norm": 0.1332932412624359,
      "learning_rate": 4.965637210650818e-07,
      "loss": 0.0074,
      "step": 2904540
    },
    {
      "epoch": 4.753376144747092,
      "grad_norm": 0.043876972049474716,
      "learning_rate": 4.964978288515647e-07,
      "loss": 0.0057,
      "step": 2904560
    },
    {
      "epoch": 4.7534088751857455,
      "grad_norm": 0.10459547489881516,
      "learning_rate": 4.964319366380475e-07,
      "loss": 0.0103,
      "step": 2904580
    },
    {
      "epoch": 4.753441605624398,
      "grad_norm": 0.17612721025943756,
      "learning_rate": 4.963660444245304e-07,
      "loss": 0.0073,
      "step": 2904600
    },
    {
      "epoch": 4.753474336063052,
      "grad_norm": 0.3602244555950165,
      "learning_rate": 4.963001522110133e-07,
      "loss": 0.0122,
      "step": 2904620
    },
    {
      "epoch": 4.753507066501705,
      "grad_norm": 0.17795568704605103,
      "learning_rate": 4.962342599974962e-07,
      "loss": 0.008,
      "step": 2904640
    },
    {
      "epoch": 4.753539796940359,
      "grad_norm": 0.149434432387352,
      "learning_rate": 4.96168367783979e-07,
      "loss": 0.0088,
      "step": 2904660
    },
    {
      "epoch": 4.753572527379012,
      "grad_norm": 0.2379610389471054,
      "learning_rate": 4.961024755704618e-07,
      "loss": 0.0093,
      "step": 2904680
    },
    {
      "epoch": 4.753605257817665,
      "grad_norm": 0.1975172460079193,
      "learning_rate": 4.960365833569448e-07,
      "loss": 0.0074,
      "step": 2904700
    },
    {
      "epoch": 4.753637988256319,
      "grad_norm": 0.05887603014707565,
      "learning_rate": 4.959706911434276e-07,
      "loss": 0.0075,
      "step": 2904720
    },
    {
      "epoch": 4.753670718694972,
      "grad_norm": 0.48254281282424927,
      "learning_rate": 4.959047989299105e-07,
      "loss": 0.0108,
      "step": 2904740
    },
    {
      "epoch": 4.753703449133625,
      "grad_norm": 0.14938075840473175,
      "learning_rate": 4.958389067163933e-07,
      "loss": 0.0072,
      "step": 2904760
    },
    {
      "epoch": 4.753736179572279,
      "grad_norm": 0.10623003542423248,
      "learning_rate": 4.957730145028762e-07,
      "loss": 0.0063,
      "step": 2904780
    },
    {
      "epoch": 4.7537689100109315,
      "grad_norm": 0.27868860960006714,
      "learning_rate": 4.957071222893591e-07,
      "loss": 0.0101,
      "step": 2904800
    },
    {
      "epoch": 4.753801640449585,
      "grad_norm": 0.24863553047180176,
      "learning_rate": 4.95641230075842e-07,
      "loss": 0.0083,
      "step": 2904820
    },
    {
      "epoch": 4.753834370888239,
      "grad_norm": 0.2213066965341568,
      "learning_rate": 4.955753378623248e-07,
      "loss": 0.0079,
      "step": 2904840
    },
    {
      "epoch": 4.753867101326892,
      "grad_norm": 0.10229825228452682,
      "learning_rate": 4.955094456488077e-07,
      "loss": 0.0103,
      "step": 2904860
    },
    {
      "epoch": 4.753899831765545,
      "grad_norm": 0.11917027086019516,
      "learning_rate": 4.954435534352906e-07,
      "loss": 0.0082,
      "step": 2904880
    },
    {
      "epoch": 4.753932562204199,
      "grad_norm": 0.36599650979042053,
      "learning_rate": 4.953776612217735e-07,
      "loss": 0.0094,
      "step": 2904900
    },
    {
      "epoch": 4.753965292642852,
      "grad_norm": 0.14109653234481812,
      "learning_rate": 4.953117690082564e-07,
      "loss": 0.0084,
      "step": 2904920
    },
    {
      "epoch": 4.753998023081506,
      "grad_norm": 0.2379172146320343,
      "learning_rate": 4.952458767947392e-07,
      "loss": 0.0096,
      "step": 2904940
    },
    {
      "epoch": 4.7540307535201585,
      "grad_norm": 0.21268543601036072,
      "learning_rate": 4.95179984581222e-07,
      "loss": 0.0057,
      "step": 2904960
    },
    {
      "epoch": 4.754063483958812,
      "grad_norm": 0.22070156037807465,
      "learning_rate": 4.95114092367705e-07,
      "loss": 0.007,
      "step": 2904980
    },
    {
      "epoch": 4.754096214397466,
      "grad_norm": 0.8952364325523376,
      "learning_rate": 4.950482001541878e-07,
      "loss": 0.0074,
      "step": 2905000
    },
    {
      "epoch": 4.754128944836118,
      "grad_norm": 0.10374363511800766,
      "learning_rate": 4.949823079406708e-07,
      "loss": 0.0073,
      "step": 2905020
    },
    {
      "epoch": 4.754161675274772,
      "grad_norm": 0.1427561640739441,
      "learning_rate": 4.949164157271535e-07,
      "loss": 0.0081,
      "step": 2905040
    },
    {
      "epoch": 4.7541944057134256,
      "grad_norm": 0.2506016194820404,
      "learning_rate": 4.948505235136364e-07,
      "loss": 0.0147,
      "step": 2905060
    },
    {
      "epoch": 4.754227136152078,
      "grad_norm": 0.7175345420837402,
      "learning_rate": 4.947846313001193e-07,
      "loss": 0.0117,
      "step": 2905080
    },
    {
      "epoch": 4.754259866590732,
      "grad_norm": 0.22301767766475677,
      "learning_rate": 4.947187390866022e-07,
      "loss": 0.0097,
      "step": 2905100
    },
    {
      "epoch": 4.7542925970293854,
      "grad_norm": 0.19337373971939087,
      "learning_rate": 4.946528468730851e-07,
      "loss": 0.009,
      "step": 2905120
    },
    {
      "epoch": 4.754325327468039,
      "grad_norm": 0.15199269354343414,
      "learning_rate": 4.945869546595679e-07,
      "loss": 0.0087,
      "step": 2905140
    },
    {
      "epoch": 4.754358057906692,
      "grad_norm": 0.39790135622024536,
      "learning_rate": 4.945210624460508e-07,
      "loss": 0.0097,
      "step": 2905160
    },
    {
      "epoch": 4.754390788345345,
      "grad_norm": 0.48441237211227417,
      "learning_rate": 4.944551702325336e-07,
      "loss": 0.0114,
      "step": 2905180
    },
    {
      "epoch": 4.754423518783999,
      "grad_norm": 0.07056286931037903,
      "learning_rate": 4.943892780190166e-07,
      "loss": 0.0119,
      "step": 2905200
    },
    {
      "epoch": 4.7544562492226525,
      "grad_norm": 0.2232394516468048,
      "learning_rate": 4.943233858054994e-07,
      "loss": 0.0062,
      "step": 2905220
    },
    {
      "epoch": 4.754488979661305,
      "grad_norm": 0.18113715946674347,
      "learning_rate": 4.942574935919823e-07,
      "loss": 0.0118,
      "step": 2905240
    },
    {
      "epoch": 4.754521710099959,
      "grad_norm": 0.23262034356594086,
      "learning_rate": 4.941916013784651e-07,
      "loss": 0.0105,
      "step": 2905260
    },
    {
      "epoch": 4.754554440538612,
      "grad_norm": 0.13369770348072052,
      "learning_rate": 4.941257091649481e-07,
      "loss": 0.0055,
      "step": 2905280
    },
    {
      "epoch": 4.754587170977265,
      "grad_norm": 0.40158867835998535,
      "learning_rate": 4.940598169514309e-07,
      "loss": 0.0091,
      "step": 2905300
    },
    {
      "epoch": 4.754619901415919,
      "grad_norm": 0.12851549685001373,
      "learning_rate": 4.939939247379138e-07,
      "loss": 0.0091,
      "step": 2905320
    },
    {
      "epoch": 4.754652631854572,
      "grad_norm": 0.3907189667224884,
      "learning_rate": 4.939280325243966e-07,
      "loss": 0.0108,
      "step": 2905340
    },
    {
      "epoch": 4.754685362293225,
      "grad_norm": 0.28730663657188416,
      "learning_rate": 4.938621403108796e-07,
      "loss": 0.013,
      "step": 2905360
    },
    {
      "epoch": 4.754718092731879,
      "grad_norm": 0.3079506754875183,
      "learning_rate": 4.937962480973624e-07,
      "loss": 0.007,
      "step": 2905380
    },
    {
      "epoch": 4.754750823170532,
      "grad_norm": 0.31332942843437195,
      "learning_rate": 4.937303558838452e-07,
      "loss": 0.0071,
      "step": 2905400
    },
    {
      "epoch": 4.754783553609186,
      "grad_norm": 0.3745948374271393,
      "learning_rate": 4.936644636703281e-07,
      "loss": 0.0126,
      "step": 2905420
    },
    {
      "epoch": 4.7548162840478385,
      "grad_norm": 0.2872415781021118,
      "learning_rate": 4.935985714568109e-07,
      "loss": 0.0116,
      "step": 2905440
    },
    {
      "epoch": 4.754849014486492,
      "grad_norm": 0.07559596002101898,
      "learning_rate": 4.935326792432939e-07,
      "loss": 0.0082,
      "step": 2905460
    },
    {
      "epoch": 4.754881744925146,
      "grad_norm": 0.36868783831596375,
      "learning_rate": 4.934667870297767e-07,
      "loss": 0.0061,
      "step": 2905480
    },
    {
      "epoch": 4.754914475363799,
      "grad_norm": 0.11350003629922867,
      "learning_rate": 4.934008948162596e-07,
      "loss": 0.0098,
      "step": 2905500
    },
    {
      "epoch": 4.754947205802452,
      "grad_norm": 0.2084493488073349,
      "learning_rate": 4.933350026027424e-07,
      "loss": 0.0085,
      "step": 2905520
    },
    {
      "epoch": 4.754979936241106,
      "grad_norm": 0.23818960785865784,
      "learning_rate": 4.932691103892254e-07,
      "loss": 0.0067,
      "step": 2905540
    },
    {
      "epoch": 4.755012666679759,
      "grad_norm": 0.07872121781110764,
      "learning_rate": 4.932032181757082e-07,
      "loss": 0.0089,
      "step": 2905560
    },
    {
      "epoch": 4.755045397118412,
      "grad_norm": 0.19534020125865936,
      "learning_rate": 4.931373259621912e-07,
      "loss": 0.0067,
      "step": 2905580
    },
    {
      "epoch": 4.7550781275570655,
      "grad_norm": 0.18183813989162445,
      "learning_rate": 4.930714337486739e-07,
      "loss": 0.0065,
      "step": 2905600
    },
    {
      "epoch": 4.755110857995719,
      "grad_norm": 0.0753457248210907,
      "learning_rate": 4.930055415351568e-07,
      "loss": 0.0081,
      "step": 2905620
    },
    {
      "epoch": 4.755143588434372,
      "grad_norm": 0.14663048088550568,
      "learning_rate": 4.929396493216397e-07,
      "loss": 0.0056,
      "step": 2905640
    },
    {
      "epoch": 4.755176318873025,
      "grad_norm": 0.16361230611801147,
      "learning_rate": 4.928737571081226e-07,
      "loss": 0.0059,
      "step": 2905660
    },
    {
      "epoch": 4.755209049311679,
      "grad_norm": 0.028865061700344086,
      "learning_rate": 4.928078648946055e-07,
      "loss": 0.0062,
      "step": 2905680
    },
    {
      "epoch": 4.755241779750333,
      "grad_norm": 0.626335620880127,
      "learning_rate": 4.927419726810882e-07,
      "loss": 0.0093,
      "step": 2905700
    },
    {
      "epoch": 4.755274510188985,
      "grad_norm": 0.19159357249736786,
      "learning_rate": 4.926760804675712e-07,
      "loss": 0.0074,
      "step": 2905720
    },
    {
      "epoch": 4.755307240627639,
      "grad_norm": 0.30229684710502625,
      "learning_rate": 4.92610188254054e-07,
      "loss": 0.0094,
      "step": 2905740
    },
    {
      "epoch": 4.7553399710662925,
      "grad_norm": 0.438880056142807,
      "learning_rate": 4.92544296040537e-07,
      "loss": 0.006,
      "step": 2905760
    },
    {
      "epoch": 4.755372701504946,
      "grad_norm": 0.21875008940696716,
      "learning_rate": 4.924784038270198e-07,
      "loss": 0.0069,
      "step": 2905780
    },
    {
      "epoch": 4.755405431943599,
      "grad_norm": 0.1674872189760208,
      "learning_rate": 4.924125116135027e-07,
      "loss": 0.0125,
      "step": 2905800
    },
    {
      "epoch": 4.755438162382252,
      "grad_norm": 0.14307284355163574,
      "learning_rate": 4.923466193999855e-07,
      "loss": 0.0073,
      "step": 2905820
    },
    {
      "epoch": 4.755470892820906,
      "grad_norm": 0.1399061232805252,
      "learning_rate": 4.922807271864684e-07,
      "loss": 0.0092,
      "step": 2905840
    },
    {
      "epoch": 4.755503623259559,
      "grad_norm": 0.23783543705940247,
      "learning_rate": 4.922148349729513e-07,
      "loss": 0.006,
      "step": 2905860
    },
    {
      "epoch": 4.755536353698212,
      "grad_norm": 0.20798726379871368,
      "learning_rate": 4.921489427594342e-07,
      "loss": 0.0081,
      "step": 2905880
    },
    {
      "epoch": 4.755569084136866,
      "grad_norm": 0.30878859758377075,
      "learning_rate": 4.92083050545917e-07,
      "loss": 0.0112,
      "step": 2905900
    },
    {
      "epoch": 4.755601814575519,
      "grad_norm": 0.03231111168861389,
      "learning_rate": 4.920171583323999e-07,
      "loss": 0.0081,
      "step": 2905920
    },
    {
      "epoch": 4.755634545014172,
      "grad_norm": 0.056552544236183167,
      "learning_rate": 4.919512661188828e-07,
      "loss": 0.0125,
      "step": 2905940
    },
    {
      "epoch": 4.755667275452826,
      "grad_norm": 0.5388473868370056,
      "learning_rate": 4.918853739053657e-07,
      "loss": 0.0068,
      "step": 2905960
    },
    {
      "epoch": 4.755700005891479,
      "grad_norm": 0.24681219458580017,
      "learning_rate": 4.918194816918485e-07,
      "loss": 0.007,
      "step": 2905980
    },
    {
      "epoch": 4.755732736330132,
      "grad_norm": 0.7440474033355713,
      "learning_rate": 4.917535894783314e-07,
      "loss": 0.0131,
      "step": 2906000
    },
    {
      "epoch": 4.755765466768786,
      "grad_norm": 0.04408685863018036,
      "learning_rate": 4.916876972648143e-07,
      "loss": 0.0088,
      "step": 2906020
    },
    {
      "epoch": 4.755798197207439,
      "grad_norm": 0.21234384179115295,
      "learning_rate": 4.916218050512971e-07,
      "loss": 0.0056,
      "step": 2906040
    },
    {
      "epoch": 4.755830927646092,
      "grad_norm": 0.04926782473921776,
      "learning_rate": 4.9155591283778e-07,
      "loss": 0.0082,
      "step": 2906060
    },
    {
      "epoch": 4.7558636580847455,
      "grad_norm": 0.09458215534687042,
      "learning_rate": 4.914900206242628e-07,
      "loss": 0.008,
      "step": 2906080
    },
    {
      "epoch": 4.755896388523399,
      "grad_norm": 0.3589341342449188,
      "learning_rate": 4.914241284107457e-07,
      "loss": 0.0095,
      "step": 2906100
    },
    {
      "epoch": 4.755929118962053,
      "grad_norm": 0.1423366367816925,
      "learning_rate": 4.913582361972286e-07,
      "loss": 0.0092,
      "step": 2906120
    },
    {
      "epoch": 4.755961849400705,
      "grad_norm": 0.7036816477775574,
      "learning_rate": 4.912923439837115e-07,
      "loss": 0.0074,
      "step": 2906140
    },
    {
      "epoch": 4.755994579839359,
      "grad_norm": 0.35077965259552,
      "learning_rate": 4.912264517701943e-07,
      "loss": 0.0093,
      "step": 2906160
    },
    {
      "epoch": 4.756027310278013,
      "grad_norm": 0.20404312014579773,
      "learning_rate": 4.911605595566772e-07,
      "loss": 0.0084,
      "step": 2906180
    },
    {
      "epoch": 4.756060040716665,
      "grad_norm": 0.31641122698783875,
      "learning_rate": 4.910946673431601e-07,
      "loss": 0.0092,
      "step": 2906200
    },
    {
      "epoch": 4.756092771155319,
      "grad_norm": 0.26847538352012634,
      "learning_rate": 4.91028775129643e-07,
      "loss": 0.008,
      "step": 2906220
    },
    {
      "epoch": 4.7561255015939725,
      "grad_norm": 0.8698229789733887,
      "learning_rate": 4.909628829161259e-07,
      "loss": 0.0048,
      "step": 2906240
    },
    {
      "epoch": 4.756158232032625,
      "grad_norm": 0.23814243078231812,
      "learning_rate": 4.908969907026087e-07,
      "loss": 0.0086,
      "step": 2906260
    },
    {
      "epoch": 4.756190962471279,
      "grad_norm": 0.3738871216773987,
      "learning_rate": 4.908310984890915e-07,
      "loss": 0.0099,
      "step": 2906280
    },
    {
      "epoch": 4.756223692909932,
      "grad_norm": 0.07983621954917908,
      "learning_rate": 4.907652062755745e-07,
      "loss": 0.0059,
      "step": 2906300
    },
    {
      "epoch": 4.756256423348586,
      "grad_norm": 0.13909830152988434,
      "learning_rate": 4.906993140620573e-07,
      "loss": 0.0102,
      "step": 2906320
    },
    {
      "epoch": 4.756289153787239,
      "grad_norm": 0.1515766829252243,
      "learning_rate": 4.906334218485403e-07,
      "loss": 0.0103,
      "step": 2906340
    },
    {
      "epoch": 4.756321884225892,
      "grad_norm": 0.22775274515151978,
      "learning_rate": 4.90567529635023e-07,
      "loss": 0.0094,
      "step": 2906360
    },
    {
      "epoch": 4.756354614664546,
      "grad_norm": 0.0818076804280281,
      "learning_rate": 4.90501637421506e-07,
      "loss": 0.0097,
      "step": 2906380
    },
    {
      "epoch": 4.7563873451031995,
      "grad_norm": 0.23601073026657104,
      "learning_rate": 4.904357452079888e-07,
      "loss": 0.0068,
      "step": 2906400
    },
    {
      "epoch": 4.756420075541852,
      "grad_norm": 0.1873856782913208,
      "learning_rate": 4.903698529944717e-07,
      "loss": 0.0083,
      "step": 2906420
    },
    {
      "epoch": 4.756452805980506,
      "grad_norm": 0.2280447781085968,
      "learning_rate": 4.903039607809546e-07,
      "loss": 0.0118,
      "step": 2906440
    },
    {
      "epoch": 4.756485536419159,
      "grad_norm": 0.15399174392223358,
      "learning_rate": 4.902380685674374e-07,
      "loss": 0.0111,
      "step": 2906460
    },
    {
      "epoch": 4.756518266857812,
      "grad_norm": 0.31639835238456726,
      "learning_rate": 4.901721763539203e-07,
      "loss": 0.0085,
      "step": 2906480
    },
    {
      "epoch": 4.756550997296466,
      "grad_norm": 0.11178641021251678,
      "learning_rate": 4.901062841404031e-07,
      "loss": 0.0073,
      "step": 2906500
    },
    {
      "epoch": 4.756583727735119,
      "grad_norm": 0.4844193756580353,
      "learning_rate": 4.900403919268861e-07,
      "loss": 0.0096,
      "step": 2906520
    },
    {
      "epoch": 4.756616458173772,
      "grad_norm": 0.0964106023311615,
      "learning_rate": 4.899744997133689e-07,
      "loss": 0.0109,
      "step": 2906540
    },
    {
      "epoch": 4.756649188612426,
      "grad_norm": 0.367643266916275,
      "learning_rate": 4.899086074998518e-07,
      "loss": 0.0075,
      "step": 2906560
    },
    {
      "epoch": 4.756681919051079,
      "grad_norm": 0.25405454635620117,
      "learning_rate": 4.898427152863346e-07,
      "loss": 0.0055,
      "step": 2906580
    },
    {
      "epoch": 4.756714649489733,
      "grad_norm": 0.26174837350845337,
      "learning_rate": 4.897768230728176e-07,
      "loss": 0.0111,
      "step": 2906600
    },
    {
      "epoch": 4.7567473799283855,
      "grad_norm": 0.08207985013723373,
      "learning_rate": 4.897109308593004e-07,
      "loss": 0.0114,
      "step": 2906620
    },
    {
      "epoch": 4.756780110367039,
      "grad_norm": 0.04237702116370201,
      "learning_rate": 4.896450386457833e-07,
      "loss": 0.0122,
      "step": 2906640
    },
    {
      "epoch": 4.756812840805693,
      "grad_norm": 0.4220101833343506,
      "learning_rate": 4.895791464322661e-07,
      "loss": 0.0072,
      "step": 2906660
    },
    {
      "epoch": 4.756845571244346,
      "grad_norm": 0.21185462176799774,
      "learning_rate": 4.895132542187491e-07,
      "loss": 0.0076,
      "step": 2906680
    },
    {
      "epoch": 4.756878301682999,
      "grad_norm": 0.37528276443481445,
      "learning_rate": 4.894473620052319e-07,
      "loss": 0.0093,
      "step": 2906700
    },
    {
      "epoch": 4.756911032121653,
      "grad_norm": 0.36052221059799194,
      "learning_rate": 4.893814697917147e-07,
      "loss": 0.008,
      "step": 2906720
    },
    {
      "epoch": 4.756943762560306,
      "grad_norm": 0.21413655579090118,
      "learning_rate": 4.893155775781976e-07,
      "loss": 0.0112,
      "step": 2906740
    },
    {
      "epoch": 4.756976492998959,
      "grad_norm": 0.6153175830841064,
      "learning_rate": 4.892496853646804e-07,
      "loss": 0.0089,
      "step": 2906760
    },
    {
      "epoch": 4.7570092234376125,
      "grad_norm": 0.3024601638317108,
      "learning_rate": 4.891837931511634e-07,
      "loss": 0.0134,
      "step": 2906780
    },
    {
      "epoch": 4.757041953876266,
      "grad_norm": 0.170431986451149,
      "learning_rate": 4.891179009376462e-07,
      "loss": 0.008,
      "step": 2906800
    },
    {
      "epoch": 4.757074684314919,
      "grad_norm": 0.09880591183900833,
      "learning_rate": 4.890520087241291e-07,
      "loss": 0.0088,
      "step": 2906820
    },
    {
      "epoch": 4.757107414753572,
      "grad_norm": 0.15298882126808167,
      "learning_rate": 4.889861165106119e-07,
      "loss": 0.0145,
      "step": 2906840
    },
    {
      "epoch": 4.757140145192226,
      "grad_norm": 0.3655071258544922,
      "learning_rate": 4.889202242970949e-07,
      "loss": 0.0093,
      "step": 2906860
    },
    {
      "epoch": 4.7571728756308795,
      "grad_norm": 0.4117289185523987,
      "learning_rate": 4.888543320835777e-07,
      "loss": 0.0095,
      "step": 2906880
    },
    {
      "epoch": 4.757205606069532,
      "grad_norm": 0.16197237372398376,
      "learning_rate": 4.887884398700607e-07,
      "loss": 0.007,
      "step": 2906900
    },
    {
      "epoch": 4.757238336508186,
      "grad_norm": 0.16666719317436218,
      "learning_rate": 4.887225476565434e-07,
      "loss": 0.0108,
      "step": 2906920
    },
    {
      "epoch": 4.757271066946839,
      "grad_norm": 0.16941271722316742,
      "learning_rate": 4.886566554430263e-07,
      "loss": 0.0084,
      "step": 2906940
    },
    {
      "epoch": 4.757303797385493,
      "grad_norm": 0.20628909766674042,
      "learning_rate": 4.885907632295092e-07,
      "loss": 0.0152,
      "step": 2906960
    },
    {
      "epoch": 4.757336527824146,
      "grad_norm": 0.47894084453582764,
      "learning_rate": 4.885248710159921e-07,
      "loss": 0.0084,
      "step": 2906980
    },
    {
      "epoch": 4.757369258262799,
      "grad_norm": 0.22542481124401093,
      "learning_rate": 4.88458978802475e-07,
      "loss": 0.0071,
      "step": 2907000
    },
    {
      "epoch": 4.757401988701453,
      "grad_norm": 0.24389348924160004,
      "learning_rate": 4.883930865889577e-07,
      "loss": 0.0094,
      "step": 2907020
    },
    {
      "epoch": 4.757434719140106,
      "grad_norm": 0.1617102473974228,
      "learning_rate": 4.883271943754407e-07,
      "loss": 0.0065,
      "step": 2907040
    },
    {
      "epoch": 4.757467449578759,
      "grad_norm": 0.46895864605903625,
      "learning_rate": 4.882613021619235e-07,
      "loss": 0.0151,
      "step": 2907060
    },
    {
      "epoch": 4.757500180017413,
      "grad_norm": 0.3596085011959076,
      "learning_rate": 4.881954099484065e-07,
      "loss": 0.0076,
      "step": 2907080
    },
    {
      "epoch": 4.7575329104560655,
      "grad_norm": 0.4890593886375427,
      "learning_rate": 4.881295177348893e-07,
      "loss": 0.0117,
      "step": 2907100
    },
    {
      "epoch": 4.757565640894719,
      "grad_norm": 0.12368714064359665,
      "learning_rate": 4.880636255213722e-07,
      "loss": 0.0064,
      "step": 2907120
    },
    {
      "epoch": 4.757598371333373,
      "grad_norm": 0.14192882180213928,
      "learning_rate": 4.87997733307855e-07,
      "loss": 0.0064,
      "step": 2907140
    },
    {
      "epoch": 4.757631101772026,
      "grad_norm": 0.233210489153862,
      "learning_rate": 4.879318410943379e-07,
      "loss": 0.0078,
      "step": 2907160
    },
    {
      "epoch": 4.757663832210679,
      "grad_norm": 0.1733945608139038,
      "learning_rate": 4.878659488808208e-07,
      "loss": 0.0092,
      "step": 2907180
    },
    {
      "epoch": 4.757696562649333,
      "grad_norm": 0.24637643992900848,
      "learning_rate": 4.878000566673037e-07,
      "loss": 0.0069,
      "step": 2907200
    },
    {
      "epoch": 4.757729293087986,
      "grad_norm": 0.16660219430923462,
      "learning_rate": 4.877341644537865e-07,
      "loss": 0.0093,
      "step": 2907220
    },
    {
      "epoch": 4.75776202352664,
      "grad_norm": 0.5141524076461792,
      "learning_rate": 4.876682722402694e-07,
      "loss": 0.0101,
      "step": 2907240
    },
    {
      "epoch": 4.7577947539652925,
      "grad_norm": 0.5500384569168091,
      "learning_rate": 4.876023800267523e-07,
      "loss": 0.0132,
      "step": 2907260
    },
    {
      "epoch": 4.757827484403946,
      "grad_norm": 0.27102941274642944,
      "learning_rate": 4.875364878132352e-07,
      "loss": 0.0078,
      "step": 2907280
    },
    {
      "epoch": 4.7578602148426,
      "grad_norm": 0.10013381391763687,
      "learning_rate": 4.87470595599718e-07,
      "loss": 0.0082,
      "step": 2907300
    },
    {
      "epoch": 4.757892945281252,
      "grad_norm": 0.09518546611070633,
      "learning_rate": 4.874047033862009e-07,
      "loss": 0.0122,
      "step": 2907320
    },
    {
      "epoch": 4.757925675719906,
      "grad_norm": 0.17748425900936127,
      "learning_rate": 4.873388111726838e-07,
      "loss": 0.01,
      "step": 2907340
    },
    {
      "epoch": 4.75795840615856,
      "grad_norm": 0.636594831943512,
      "learning_rate": 4.872729189591667e-07,
      "loss": 0.0141,
      "step": 2907360
    },
    {
      "epoch": 4.757991136597212,
      "grad_norm": 0.07046147435903549,
      "learning_rate": 4.872070267456495e-07,
      "loss": 0.0051,
      "step": 2907380
    },
    {
      "epoch": 4.758023867035866,
      "grad_norm": 0.0764988586306572,
      "learning_rate": 4.871411345321323e-07,
      "loss": 0.0125,
      "step": 2907400
    },
    {
      "epoch": 4.7580565974745195,
      "grad_norm": 0.0473376028239727,
      "learning_rate": 4.870752423186152e-07,
      "loss": 0.0082,
      "step": 2907420
    },
    {
      "epoch": 4.758089327913173,
      "grad_norm": 0.12269983440637589,
      "learning_rate": 4.870093501050981e-07,
      "loss": 0.005,
      "step": 2907440
    },
    {
      "epoch": 4.758122058351826,
      "grad_norm": 0.18738195300102234,
      "learning_rate": 4.86943457891581e-07,
      "loss": 0.0077,
      "step": 2907460
    },
    {
      "epoch": 4.758154788790479,
      "grad_norm": 0.20942290127277374,
      "learning_rate": 4.868775656780638e-07,
      "loss": 0.0133,
      "step": 2907480
    },
    {
      "epoch": 4.758187519229133,
      "grad_norm": 0.28222113847732544,
      "learning_rate": 4.868116734645467e-07,
      "loss": 0.0089,
      "step": 2907500
    },
    {
      "epoch": 4.758220249667786,
      "grad_norm": 0.3201099634170532,
      "learning_rate": 4.867457812510296e-07,
      "loss": 0.0102,
      "step": 2907520
    },
    {
      "epoch": 4.758252980106439,
      "grad_norm": 0.5041457414627075,
      "learning_rate": 4.866798890375125e-07,
      "loss": 0.0078,
      "step": 2907540
    },
    {
      "epoch": 4.758285710545093,
      "grad_norm": 0.08852488547563553,
      "learning_rate": 4.866139968239954e-07,
      "loss": 0.0081,
      "step": 2907560
    },
    {
      "epoch": 4.7583184409837465,
      "grad_norm": 0.19360165297985077,
      "learning_rate": 4.865481046104782e-07,
      "loss": 0.0055,
      "step": 2907580
    },
    {
      "epoch": 4.758351171422399,
      "grad_norm": 0.4313243329524994,
      "learning_rate": 4.86482212396961e-07,
      "loss": 0.0111,
      "step": 2907600
    },
    {
      "epoch": 4.758383901861053,
      "grad_norm": 0.16743209958076477,
      "learning_rate": 4.86416320183444e-07,
      "loss": 0.0121,
      "step": 2907620
    },
    {
      "epoch": 4.758416632299706,
      "grad_norm": 0.1371583640575409,
      "learning_rate": 4.863504279699268e-07,
      "loss": 0.0063,
      "step": 2907640
    },
    {
      "epoch": 4.758449362738359,
      "grad_norm": 0.23785153031349182,
      "learning_rate": 4.862845357564098e-07,
      "loss": 0.0093,
      "step": 2907660
    },
    {
      "epoch": 4.758482093177013,
      "grad_norm": 0.1898960918188095,
      "learning_rate": 4.862186435428925e-07,
      "loss": 0.012,
      "step": 2907680
    },
    {
      "epoch": 4.758514823615666,
      "grad_norm": 0.1464754194021225,
      "learning_rate": 4.861527513293755e-07,
      "loss": 0.0131,
      "step": 2907700
    },
    {
      "epoch": 4.758547554054319,
      "grad_norm": 0.15413779020309448,
      "learning_rate": 4.860868591158583e-07,
      "loss": 0.008,
      "step": 2907720
    },
    {
      "epoch": 4.7585802844929725,
      "grad_norm": 0.7249818444252014,
      "learning_rate": 4.860209669023413e-07,
      "loss": 0.0099,
      "step": 2907740
    },
    {
      "epoch": 4.758613014931626,
      "grad_norm": 0.30787110328674316,
      "learning_rate": 4.859550746888241e-07,
      "loss": 0.011,
      "step": 2907760
    },
    {
      "epoch": 4.75864574537028,
      "grad_norm": 0.08354717493057251,
      "learning_rate": 4.858891824753069e-07,
      "loss": 0.0071,
      "step": 2907780
    },
    {
      "epoch": 4.758678475808932,
      "grad_norm": 0.13852046430110931,
      "learning_rate": 4.858232902617898e-07,
      "loss": 0.0091,
      "step": 2907800
    },
    {
      "epoch": 4.758711206247586,
      "grad_norm": 0.6314594745635986,
      "learning_rate": 4.857573980482726e-07,
      "loss": 0.0134,
      "step": 2907820
    },
    {
      "epoch": 4.75874393668624,
      "grad_norm": 0.299983948469162,
      "learning_rate": 4.856915058347556e-07,
      "loss": 0.0094,
      "step": 2907840
    },
    {
      "epoch": 4.758776667124893,
      "grad_norm": 0.15487295389175415,
      "learning_rate": 4.856256136212384e-07,
      "loss": 0.0071,
      "step": 2907860
    },
    {
      "epoch": 4.758809397563546,
      "grad_norm": 0.43333181738853455,
      "learning_rate": 4.855597214077213e-07,
      "loss": 0.009,
      "step": 2907880
    },
    {
      "epoch": 4.7588421280021995,
      "grad_norm": 0.2335597574710846,
      "learning_rate": 4.854938291942041e-07,
      "loss": 0.0101,
      "step": 2907900
    },
    {
      "epoch": 4.758874858440853,
      "grad_norm": 0.4066708981990814,
      "learning_rate": 4.854279369806871e-07,
      "loss": 0.0102,
      "step": 2907920
    },
    {
      "epoch": 4.758907588879506,
      "grad_norm": 0.27554094791412354,
      "learning_rate": 4.853620447671699e-07,
      "loss": 0.0093,
      "step": 2907940
    },
    {
      "epoch": 4.758940319318159,
      "grad_norm": 0.6630487442016602,
      "learning_rate": 4.852961525536528e-07,
      "loss": 0.0131,
      "step": 2907960
    },
    {
      "epoch": 4.758973049756813,
      "grad_norm": 0.11886638402938843,
      "learning_rate": 4.852302603401356e-07,
      "loss": 0.0056,
      "step": 2907980
    },
    {
      "epoch": 4.759005780195466,
      "grad_norm": 0.21116718649864197,
      "learning_rate": 4.851643681266186e-07,
      "loss": 0.01,
      "step": 2908000
    },
    {
      "epoch": 4.759038510634119,
      "grad_norm": 0.1874769777059555,
      "learning_rate": 4.850984759131014e-07,
      "loss": 0.0099,
      "step": 2908020
    },
    {
      "epoch": 4.759071241072773,
      "grad_norm": 0.24609962105751038,
      "learning_rate": 4.850325836995843e-07,
      "loss": 0.0071,
      "step": 2908040
    },
    {
      "epoch": 4.7591039715114265,
      "grad_norm": 0.44044172763824463,
      "learning_rate": 4.849666914860671e-07,
      "loss": 0.0081,
      "step": 2908060
    },
    {
      "epoch": 4.759136701950079,
      "grad_norm": 0.24034424126148224,
      "learning_rate": 4.849007992725499e-07,
      "loss": 0.0084,
      "step": 2908080
    },
    {
      "epoch": 4.759169432388733,
      "grad_norm": 0.0645841732621193,
      "learning_rate": 4.848349070590329e-07,
      "loss": 0.0079,
      "step": 2908100
    },
    {
      "epoch": 4.759202162827386,
      "grad_norm": 0.09714239835739136,
      "learning_rate": 4.847690148455157e-07,
      "loss": 0.0072,
      "step": 2908120
    },
    {
      "epoch": 4.75923489326604,
      "grad_norm": 0.07668459415435791,
      "learning_rate": 4.847031226319986e-07,
      "loss": 0.0077,
      "step": 2908140
    },
    {
      "epoch": 4.759267623704693,
      "grad_norm": 0.4867205321788788,
      "learning_rate": 4.846372304184814e-07,
      "loss": 0.0091,
      "step": 2908160
    },
    {
      "epoch": 4.759300354143346,
      "grad_norm": 0.177628293633461,
      "learning_rate": 4.845713382049644e-07,
      "loss": 0.0155,
      "step": 2908180
    },
    {
      "epoch": 4.759333084582,
      "grad_norm": 0.16206969320774078,
      "learning_rate": 4.845054459914472e-07,
      "loss": 0.0069,
      "step": 2908200
    },
    {
      "epoch": 4.759365815020653,
      "grad_norm": 0.140359029173851,
      "learning_rate": 4.844395537779302e-07,
      "loss": 0.0098,
      "step": 2908220
    },
    {
      "epoch": 4.759398545459306,
      "grad_norm": 0.1917945146560669,
      "learning_rate": 4.843736615644129e-07,
      "loss": 0.0071,
      "step": 2908240
    },
    {
      "epoch": 4.75943127589796,
      "grad_norm": 0.14825789630413055,
      "learning_rate": 4.843077693508959e-07,
      "loss": 0.0079,
      "step": 2908260
    },
    {
      "epoch": 4.7594640063366125,
      "grad_norm": 0.0855991542339325,
      "learning_rate": 4.842418771373787e-07,
      "loss": 0.0064,
      "step": 2908280
    },
    {
      "epoch": 4.759496736775266,
      "grad_norm": 0.2585071325302124,
      "learning_rate": 4.841759849238616e-07,
      "loss": 0.0148,
      "step": 2908300
    },
    {
      "epoch": 4.75952946721392,
      "grad_norm": 0.38105902075767517,
      "learning_rate": 4.841100927103445e-07,
      "loss": 0.0078,
      "step": 2908320
    },
    {
      "epoch": 4.759562197652573,
      "grad_norm": 0.16930365562438965,
      "learning_rate": 4.840442004968273e-07,
      "loss": 0.0109,
      "step": 2908340
    },
    {
      "epoch": 4.759594928091226,
      "grad_norm": 0.2809271514415741,
      "learning_rate": 4.839783082833102e-07,
      "loss": 0.0066,
      "step": 2908360
    },
    {
      "epoch": 4.75962765852988,
      "grad_norm": 0.06369827687740326,
      "learning_rate": 4.83912416069793e-07,
      "loss": 0.0135,
      "step": 2908380
    },
    {
      "epoch": 4.759660388968533,
      "grad_norm": 0.4053388833999634,
      "learning_rate": 4.83846523856276e-07,
      "loss": 0.0097,
      "step": 2908400
    },
    {
      "epoch": 4.759693119407187,
      "grad_norm": 0.3036854863166809,
      "learning_rate": 4.837806316427588e-07,
      "loss": 0.0066,
      "step": 2908420
    },
    {
      "epoch": 4.7597258498458395,
      "grad_norm": 0.10612844675779343,
      "learning_rate": 4.837147394292417e-07,
      "loss": 0.0075,
      "step": 2908440
    },
    {
      "epoch": 4.759758580284493,
      "grad_norm": 0.12936651706695557,
      "learning_rate": 4.836488472157245e-07,
      "loss": 0.0099,
      "step": 2908460
    },
    {
      "epoch": 4.759791310723147,
      "grad_norm": 0.18581654131412506,
      "learning_rate": 4.835829550022075e-07,
      "loss": 0.008,
      "step": 2908480
    },
    {
      "epoch": 4.759824041161799,
      "grad_norm": 0.46416518092155457,
      "learning_rate": 4.835170627886903e-07,
      "loss": 0.0115,
      "step": 2908500
    },
    {
      "epoch": 4.759856771600453,
      "grad_norm": 0.34486857056617737,
      "learning_rate": 4.834511705751732e-07,
      "loss": 0.0138,
      "step": 2908520
    },
    {
      "epoch": 4.7598895020391065,
      "grad_norm": 0.1504497528076172,
      "learning_rate": 4.83385278361656e-07,
      "loss": 0.01,
      "step": 2908540
    },
    {
      "epoch": 4.759922232477759,
      "grad_norm": 0.12039251625537872,
      "learning_rate": 4.833193861481389e-07,
      "loss": 0.0093,
      "step": 2908560
    },
    {
      "epoch": 4.759954962916413,
      "grad_norm": 0.18432939052581787,
      "learning_rate": 4.832534939346218e-07,
      "loss": 0.01,
      "step": 2908580
    },
    {
      "epoch": 4.759987693355066,
      "grad_norm": 0.1196577399969101,
      "learning_rate": 4.831876017211047e-07,
      "loss": 0.0065,
      "step": 2908600
    },
    {
      "epoch": 4.76002042379372,
      "grad_norm": 0.15979087352752686,
      "learning_rate": 4.831217095075875e-07,
      "loss": 0.0105,
      "step": 2908620
    },
    {
      "epoch": 4.760053154232373,
      "grad_norm": 0.1797873079776764,
      "learning_rate": 4.830558172940704e-07,
      "loss": 0.0128,
      "step": 2908640
    },
    {
      "epoch": 4.760085884671026,
      "grad_norm": 0.13562917709350586,
      "learning_rate": 4.829899250805533e-07,
      "loss": 0.0089,
      "step": 2908660
    },
    {
      "epoch": 4.76011861510968,
      "grad_norm": 0.19182750582695007,
      "learning_rate": 4.829240328670362e-07,
      "loss": 0.0166,
      "step": 2908680
    },
    {
      "epoch": 4.7601513455483335,
      "grad_norm": 0.2136392742395401,
      "learning_rate": 4.82858140653519e-07,
      "loss": 0.008,
      "step": 2908700
    },
    {
      "epoch": 4.760184075986986,
      "grad_norm": 0.19580741226673126,
      "learning_rate": 4.827922484400019e-07,
      "loss": 0.0058,
      "step": 2908720
    },
    {
      "epoch": 4.76021680642564,
      "grad_norm": 0.09262107312679291,
      "learning_rate": 4.827263562264847e-07,
      "loss": 0.0076,
      "step": 2908740
    },
    {
      "epoch": 4.760249536864293,
      "grad_norm": 0.3448905944824219,
      "learning_rate": 4.826604640129676e-07,
      "loss": 0.0077,
      "step": 2908760
    },
    {
      "epoch": 4.760282267302946,
      "grad_norm": 0.10823202133178711,
      "learning_rate": 4.825945717994505e-07,
      "loss": 0.0099,
      "step": 2908780
    },
    {
      "epoch": 4.7603149977416,
      "grad_norm": 0.6589642763137817,
      "learning_rate": 4.825286795859333e-07,
      "loss": 0.0128,
      "step": 2908800
    },
    {
      "epoch": 4.760347728180253,
      "grad_norm": 0.6392871141433716,
      "learning_rate": 4.824627873724162e-07,
      "loss": 0.0079,
      "step": 2908820
    },
    {
      "epoch": 4.760380458618906,
      "grad_norm": 0.30232861638069153,
      "learning_rate": 4.823968951588991e-07,
      "loss": 0.0092,
      "step": 2908840
    },
    {
      "epoch": 4.76041318905756,
      "grad_norm": 0.1695401519536972,
      "learning_rate": 4.82331002945382e-07,
      "loss": 0.0133,
      "step": 2908860
    },
    {
      "epoch": 4.760445919496213,
      "grad_norm": 0.32483792304992676,
      "learning_rate": 4.822651107318649e-07,
      "loss": 0.0086,
      "step": 2908880
    },
    {
      "epoch": 4.760478649934867,
      "grad_norm": 0.3178039491176605,
      "learning_rate": 4.821992185183477e-07,
      "loss": 0.0101,
      "step": 2908900
    },
    {
      "epoch": 4.7605113803735195,
      "grad_norm": 0.13958798348903656,
      "learning_rate": 4.821333263048306e-07,
      "loss": 0.0086,
      "step": 2908920
    },
    {
      "epoch": 4.760544110812173,
      "grad_norm": 0.0644560232758522,
      "learning_rate": 4.820674340913135e-07,
      "loss": 0.005,
      "step": 2908940
    },
    {
      "epoch": 4.760576841250827,
      "grad_norm": 0.34563881158828735,
      "learning_rate": 4.820015418777963e-07,
      "loss": 0.0083,
      "step": 2908960
    },
    {
      "epoch": 4.760609571689479,
      "grad_norm": 0.26725855469703674,
      "learning_rate": 4.819356496642793e-07,
      "loss": 0.0112,
      "step": 2908980
    },
    {
      "epoch": 4.760642302128133,
      "grad_norm": 0.11919988691806793,
      "learning_rate": 4.81869757450762e-07,
      "loss": 0.0111,
      "step": 2909000
    },
    {
      "epoch": 4.760675032566787,
      "grad_norm": 0.2642157971858978,
      "learning_rate": 4.81803865237245e-07,
      "loss": 0.0129,
      "step": 2909020
    },
    {
      "epoch": 4.76070776300544,
      "grad_norm": 0.16620862483978271,
      "learning_rate": 4.817379730237278e-07,
      "loss": 0.007,
      "step": 2909040
    },
    {
      "epoch": 4.760740493444093,
      "grad_norm": 0.1941358894109726,
      "learning_rate": 4.816720808102108e-07,
      "loss": 0.0074,
      "step": 2909060
    },
    {
      "epoch": 4.7607732238827465,
      "grad_norm": 0.38376519083976746,
      "learning_rate": 4.816061885966936e-07,
      "loss": 0.0088,
      "step": 2909080
    },
    {
      "epoch": 4.7608059543214,
      "grad_norm": 0.14514096081256866,
      "learning_rate": 4.815402963831764e-07,
      "loss": 0.0067,
      "step": 2909100
    },
    {
      "epoch": 4.760838684760053,
      "grad_norm": 0.06593893468379974,
      "learning_rate": 4.814744041696593e-07,
      "loss": 0.0063,
      "step": 2909120
    },
    {
      "epoch": 4.760871415198706,
      "grad_norm": 0.305594801902771,
      "learning_rate": 4.814085119561422e-07,
      "loss": 0.0078,
      "step": 2909140
    },
    {
      "epoch": 4.76090414563736,
      "grad_norm": 0.23730866611003876,
      "learning_rate": 4.813426197426251e-07,
      "loss": 0.0103,
      "step": 2909160
    },
    {
      "epoch": 4.760936876076013,
      "grad_norm": 0.11286617070436478,
      "learning_rate": 4.812767275291079e-07,
      "loss": 0.0106,
      "step": 2909180
    },
    {
      "epoch": 4.760969606514666,
      "grad_norm": 0.27865567803382874,
      "learning_rate": 4.812108353155908e-07,
      "loss": 0.0097,
      "step": 2909200
    },
    {
      "epoch": 4.76100233695332,
      "grad_norm": 0.2013314962387085,
      "learning_rate": 4.811449431020736e-07,
      "loss": 0.0085,
      "step": 2909220
    },
    {
      "epoch": 4.7610350673919735,
      "grad_norm": 0.23092778027057648,
      "learning_rate": 4.810790508885566e-07,
      "loss": 0.0111,
      "step": 2909240
    },
    {
      "epoch": 4.761067797830626,
      "grad_norm": 0.29951533675193787,
      "learning_rate": 4.810131586750394e-07,
      "loss": 0.009,
      "step": 2909260
    },
    {
      "epoch": 4.76110052826928,
      "grad_norm": 0.24863339960575104,
      "learning_rate": 4.809472664615223e-07,
      "loss": 0.0095,
      "step": 2909280
    },
    {
      "epoch": 4.761133258707933,
      "grad_norm": 0.1567145138978958,
      "learning_rate": 4.808813742480051e-07,
      "loss": 0.0077,
      "step": 2909300
    },
    {
      "epoch": 4.761165989146587,
      "grad_norm": 0.1909080296754837,
      "learning_rate": 4.808154820344881e-07,
      "loss": 0.0081,
      "step": 2909320
    },
    {
      "epoch": 4.76119871958524,
      "grad_norm": 0.173333540558815,
      "learning_rate": 4.807495898209709e-07,
      "loss": 0.0061,
      "step": 2909340
    },
    {
      "epoch": 4.761231450023893,
      "grad_norm": 0.8111938238143921,
      "learning_rate": 4.806836976074538e-07,
      "loss": 0.0093,
      "step": 2909360
    },
    {
      "epoch": 4.761264180462547,
      "grad_norm": 0.4617081880569458,
      "learning_rate": 4.806178053939366e-07,
      "loss": 0.007,
      "step": 2909380
    },
    {
      "epoch": 4.7612969109011996,
      "grad_norm": 0.42270371317863464,
      "learning_rate": 4.805519131804194e-07,
      "loss": 0.0078,
      "step": 2909400
    },
    {
      "epoch": 4.761329641339853,
      "grad_norm": 0.26633307337760925,
      "learning_rate": 4.804860209669024e-07,
      "loss": 0.0087,
      "step": 2909420
    },
    {
      "epoch": 4.761362371778507,
      "grad_norm": 0.20042026042938232,
      "learning_rate": 4.804201287533852e-07,
      "loss": 0.0071,
      "step": 2909440
    },
    {
      "epoch": 4.7613951022171594,
      "grad_norm": 0.30539005994796753,
      "learning_rate": 4.803542365398681e-07,
      "loss": 0.0153,
      "step": 2909460
    },
    {
      "epoch": 4.761427832655813,
      "grad_norm": 0.32170283794403076,
      "learning_rate": 4.802883443263509e-07,
      "loss": 0.0062,
      "step": 2909480
    },
    {
      "epoch": 4.761460563094467,
      "grad_norm": 0.05264664441347122,
      "learning_rate": 4.802224521128339e-07,
      "loss": 0.0127,
      "step": 2909500
    },
    {
      "epoch": 4.76149329353312,
      "grad_norm": 0.47625693678855896,
      "learning_rate": 4.801565598993167e-07,
      "loss": 0.0056,
      "step": 2909520
    },
    {
      "epoch": 4.761526023971773,
      "grad_norm": 0.1573847383260727,
      "learning_rate": 4.800906676857997e-07,
      "loss": 0.0104,
      "step": 2909540
    },
    {
      "epoch": 4.7615587544104265,
      "grad_norm": 0.3134688138961792,
      "learning_rate": 4.800247754722824e-07,
      "loss": 0.0048,
      "step": 2909560
    },
    {
      "epoch": 4.76159148484908,
      "grad_norm": 0.17153286933898926,
      "learning_rate": 4.799588832587654e-07,
      "loss": 0.008,
      "step": 2909580
    },
    {
      "epoch": 4.761624215287734,
      "grad_norm": 0.16914702951908112,
      "learning_rate": 4.798929910452482e-07,
      "loss": 0.006,
      "step": 2909600
    },
    {
      "epoch": 4.761656945726386,
      "grad_norm": 0.39756274223327637,
      "learning_rate": 4.798270988317311e-07,
      "loss": 0.0095,
      "step": 2909620
    },
    {
      "epoch": 4.76168967616504,
      "grad_norm": 0.460610568523407,
      "learning_rate": 4.79761206618214e-07,
      "loss": 0.0059,
      "step": 2909640
    },
    {
      "epoch": 4.761722406603694,
      "grad_norm": 0.22270849347114563,
      "learning_rate": 4.796953144046968e-07,
      "loss": 0.0089,
      "step": 2909660
    },
    {
      "epoch": 4.761755137042346,
      "grad_norm": 0.31628546118736267,
      "learning_rate": 4.796294221911797e-07,
      "loss": 0.0071,
      "step": 2909680
    },
    {
      "epoch": 4.761787867481,
      "grad_norm": 0.15709877014160156,
      "learning_rate": 4.795635299776626e-07,
      "loss": 0.0086,
      "step": 2909700
    },
    {
      "epoch": 4.7618205979196535,
      "grad_norm": 0.1239573210477829,
      "learning_rate": 4.794976377641455e-07,
      "loss": 0.0093,
      "step": 2909720
    },
    {
      "epoch": 4.761853328358306,
      "grad_norm": 0.31392282247543335,
      "learning_rate": 4.794317455506284e-07,
      "loss": 0.0064,
      "step": 2909740
    },
    {
      "epoch": 4.76188605879696,
      "grad_norm": 0.0742032378911972,
      "learning_rate": 4.793658533371112e-07,
      "loss": 0.0061,
      "step": 2909760
    },
    {
      "epoch": 4.761918789235613,
      "grad_norm": 0.3604649603366852,
      "learning_rate": 4.79299961123594e-07,
      "loss": 0.007,
      "step": 2909780
    },
    {
      "epoch": 4.761951519674267,
      "grad_norm": 0.20536166429519653,
      "learning_rate": 4.79234068910077e-07,
      "loss": 0.0135,
      "step": 2909800
    },
    {
      "epoch": 4.76198425011292,
      "grad_norm": 0.23677237331867218,
      "learning_rate": 4.791681766965598e-07,
      "loss": 0.0105,
      "step": 2909820
    },
    {
      "epoch": 4.762016980551573,
      "grad_norm": 0.15538530051708221,
      "learning_rate": 4.791022844830427e-07,
      "loss": 0.0063,
      "step": 2909840
    },
    {
      "epoch": 4.762049710990227,
      "grad_norm": 0.11249200254678726,
      "learning_rate": 4.790363922695255e-07,
      "loss": 0.0095,
      "step": 2909860
    },
    {
      "epoch": 4.7620824414288805,
      "grad_norm": 0.23994149267673492,
      "learning_rate": 4.789705000560084e-07,
      "loss": 0.007,
      "step": 2909880
    },
    {
      "epoch": 4.762115171867533,
      "grad_norm": 0.13852712512016296,
      "learning_rate": 4.789046078424913e-07,
      "loss": 0.0069,
      "step": 2909900
    },
    {
      "epoch": 4.762147902306187,
      "grad_norm": 0.3864697813987732,
      "learning_rate": 4.788387156289742e-07,
      "loss": 0.0089,
      "step": 2909920
    },
    {
      "epoch": 4.76218063274484,
      "grad_norm": 0.27582257986068726,
      "learning_rate": 4.78772823415457e-07,
      "loss": 0.0085,
      "step": 2909940
    },
    {
      "epoch": 4.762213363183493,
      "grad_norm": 0.5193983316421509,
      "learning_rate": 4.787069312019399e-07,
      "loss": 0.008,
      "step": 2909960
    },
    {
      "epoch": 4.762246093622147,
      "grad_norm": 0.18219245970249176,
      "learning_rate": 4.786410389884228e-07,
      "loss": 0.0088,
      "step": 2909980
    },
    {
      "epoch": 4.7622788240608,
      "grad_norm": 0.16473527252674103,
      "learning_rate": 4.785751467749057e-07,
      "loss": 0.0049,
      "step": 2910000
    },
    {
      "epoch": 4.762311554499453,
      "grad_norm": 0.4513140618801117,
      "learning_rate": 4.785092545613885e-07,
      "loss": 0.0096,
      "step": 2910020
    },
    {
      "epoch": 4.762344284938107,
      "grad_norm": 0.30747994780540466,
      "learning_rate": 4.784433623478714e-07,
      "loss": 0.0123,
      "step": 2910040
    },
    {
      "epoch": 4.76237701537676,
      "grad_norm": 0.6175994873046875,
      "learning_rate": 4.783774701343542e-07,
      "loss": 0.0105,
      "step": 2910060
    },
    {
      "epoch": 4.762409745815414,
      "grad_norm": 0.35934922099113464,
      "learning_rate": 4.783115779208372e-07,
      "loss": 0.0099,
      "step": 2910080
    },
    {
      "epoch": 4.7624424762540665,
      "grad_norm": 0.06097117066383362,
      "learning_rate": 4.7824568570732e-07,
      "loss": 0.0064,
      "step": 2910100
    },
    {
      "epoch": 4.76247520669272,
      "grad_norm": 0.22126740217208862,
      "learning_rate": 4.781797934938028e-07,
      "loss": 0.0095,
      "step": 2910120
    },
    {
      "epoch": 4.762507937131374,
      "grad_norm": 0.21364593505859375,
      "learning_rate": 4.781139012802857e-07,
      "loss": 0.0095,
      "step": 2910140
    },
    {
      "epoch": 4.762540667570027,
      "grad_norm": 1.016498327255249,
      "learning_rate": 4.780480090667686e-07,
      "loss": 0.0095,
      "step": 2910160
    },
    {
      "epoch": 4.76257339800868,
      "grad_norm": 0.21656090021133423,
      "learning_rate": 4.779821168532515e-07,
      "loss": 0.0103,
      "step": 2910180
    },
    {
      "epoch": 4.7626061284473336,
      "grad_norm": 0.07081945985555649,
      "learning_rate": 4.779162246397344e-07,
      "loss": 0.0058,
      "step": 2910200
    },
    {
      "epoch": 4.762638858885987,
      "grad_norm": 0.2756471335887909,
      "learning_rate": 4.778503324262172e-07,
      "loss": 0.0053,
      "step": 2910220
    },
    {
      "epoch": 4.76267158932464,
      "grad_norm": 0.1353866159915924,
      "learning_rate": 4.777844402127001e-07,
      "loss": 0.0083,
      "step": 2910240
    },
    {
      "epoch": 4.7627043197632934,
      "grad_norm": 0.12779328227043152,
      "learning_rate": 4.77718547999183e-07,
      "loss": 0.0075,
      "step": 2910260
    },
    {
      "epoch": 4.762737050201947,
      "grad_norm": 0.19907613098621368,
      "learning_rate": 4.776526557856658e-07,
      "loss": 0.0083,
      "step": 2910280
    },
    {
      "epoch": 4.7627697806406,
      "grad_norm": 0.31101685762405396,
      "learning_rate": 4.775867635721488e-07,
      "loss": 0.0061,
      "step": 2910300
    },
    {
      "epoch": 4.762802511079253,
      "grad_norm": 0.24431352317333221,
      "learning_rate": 4.775208713586315e-07,
      "loss": 0.0114,
      "step": 2910320
    },
    {
      "epoch": 4.762835241517907,
      "grad_norm": 0.21899905800819397,
      "learning_rate": 4.774549791451145e-07,
      "loss": 0.0076,
      "step": 2910340
    },
    {
      "epoch": 4.7628679719565605,
      "grad_norm": 0.19091461598873138,
      "learning_rate": 4.773890869315973e-07,
      "loss": 0.0068,
      "step": 2910360
    },
    {
      "epoch": 4.762900702395213,
      "grad_norm": 0.06689449399709702,
      "learning_rate": 4.773231947180803e-07,
      "loss": 0.0099,
      "step": 2910380
    },
    {
      "epoch": 4.762933432833867,
      "grad_norm": 0.28611597418785095,
      "learning_rate": 4.772573025045631e-07,
      "loss": 0.0171,
      "step": 2910400
    },
    {
      "epoch": 4.76296616327252,
      "grad_norm": 0.15831616520881653,
      "learning_rate": 4.77191410291046e-07,
      "loss": 0.0088,
      "step": 2910420
    },
    {
      "epoch": 4.762998893711173,
      "grad_norm": 0.16877494752407074,
      "learning_rate": 4.771255180775288e-07,
      "loss": 0.0081,
      "step": 2910440
    },
    {
      "epoch": 4.763031624149827,
      "grad_norm": 0.2286522388458252,
      "learning_rate": 4.770596258640118e-07,
      "loss": 0.0122,
      "step": 2910460
    },
    {
      "epoch": 4.76306435458848,
      "grad_norm": 0.3107379972934723,
      "learning_rate": 4.769937336504946e-07,
      "loss": 0.0091,
      "step": 2910480
    },
    {
      "epoch": 4.763097085027134,
      "grad_norm": 0.3647734820842743,
      "learning_rate": 4.769278414369774e-07,
      "loss": 0.0096,
      "step": 2910500
    },
    {
      "epoch": 4.763129815465787,
      "grad_norm": 0.18109214305877686,
      "learning_rate": 4.768619492234603e-07,
      "loss": 0.0123,
      "step": 2910520
    },
    {
      "epoch": 4.76316254590444,
      "grad_norm": 0.23607471585273743,
      "learning_rate": 4.7679605700994313e-07,
      "loss": 0.0083,
      "step": 2910540
    },
    {
      "epoch": 4.763195276343094,
      "grad_norm": 0.1773940771818161,
      "learning_rate": 4.767301647964261e-07,
      "loss": 0.0091,
      "step": 2910560
    },
    {
      "epoch": 4.7632280067817465,
      "grad_norm": 0.18587322533130646,
      "learning_rate": 4.766642725829089e-07,
      "loss": 0.0101,
      "step": 2910580
    },
    {
      "epoch": 4.7632607372204,
      "grad_norm": 0.11377783119678497,
      "learning_rate": 4.7659838036939183e-07,
      "loss": 0.0055,
      "step": 2910600
    },
    {
      "epoch": 4.763293467659054,
      "grad_norm": 0.20824186503887177,
      "learning_rate": 4.765324881558747e-07,
      "loss": 0.0066,
      "step": 2910620
    },
    {
      "epoch": 4.763326198097706,
      "grad_norm": 0.08041724562644958,
      "learning_rate": 4.7646659594235757e-07,
      "loss": 0.0096,
      "step": 2910640
    },
    {
      "epoch": 4.76335892853636,
      "grad_norm": 0.186222106218338,
      "learning_rate": 4.764007037288404e-07,
      "loss": 0.008,
      "step": 2910660
    },
    {
      "epoch": 4.763391658975014,
      "grad_norm": 0.1743597388267517,
      "learning_rate": 4.763348115153233e-07,
      "loss": 0.0086,
      "step": 2910680
    },
    {
      "epoch": 4.763424389413667,
      "grad_norm": 0.22963453829288483,
      "learning_rate": 4.7626891930180616e-07,
      "loss": 0.0082,
      "step": 2910700
    },
    {
      "epoch": 4.76345711985232,
      "grad_norm": 0.0821441113948822,
      "learning_rate": 4.76203027088289e-07,
      "loss": 0.012,
      "step": 2910720
    },
    {
      "epoch": 4.7634898502909735,
      "grad_norm": 0.23439477384090424,
      "learning_rate": 4.761371348747719e-07,
      "loss": 0.008,
      "step": 2910740
    },
    {
      "epoch": 4.763522580729627,
      "grad_norm": 0.27443307638168335,
      "learning_rate": 4.7607124266125475e-07,
      "loss": 0.0134,
      "step": 2910760
    },
    {
      "epoch": 4.763555311168281,
      "grad_norm": 0.16909104585647583,
      "learning_rate": 4.7600535044773765e-07,
      "loss": 0.0061,
      "step": 2910780
    },
    {
      "epoch": 4.763588041606933,
      "grad_norm": 0.15564310550689697,
      "learning_rate": 4.759394582342205e-07,
      "loss": 0.0085,
      "step": 2910800
    },
    {
      "epoch": 4.763620772045587,
      "grad_norm": 0.0659450963139534,
      "learning_rate": 4.758735660207034e-07,
      "loss": 0.0072,
      "step": 2910820
    },
    {
      "epoch": 4.763653502484241,
      "grad_norm": 0.3794856369495392,
      "learning_rate": 4.7580767380718624e-07,
      "loss": 0.0082,
      "step": 2910840
    },
    {
      "epoch": 4.763686232922893,
      "grad_norm": 0.35104575753211975,
      "learning_rate": 4.7574178159366914e-07,
      "loss": 0.0074,
      "step": 2910860
    },
    {
      "epoch": 4.763718963361547,
      "grad_norm": 0.5558693408966064,
      "learning_rate": 4.75675889380152e-07,
      "loss": 0.0081,
      "step": 2910880
    },
    {
      "epoch": 4.7637516938002005,
      "grad_norm": 0.23654694855213165,
      "learning_rate": 4.756099971666349e-07,
      "loss": 0.0086,
      "step": 2910900
    },
    {
      "epoch": 4.763784424238853,
      "grad_norm": 0.1870764046907425,
      "learning_rate": 4.7554410495311773e-07,
      "loss": 0.0091,
      "step": 2910920
    },
    {
      "epoch": 4.763817154677507,
      "grad_norm": 0.7687212824821472,
      "learning_rate": 4.754782127396006e-07,
      "loss": 0.0148,
      "step": 2910940
    },
    {
      "epoch": 4.76384988511616,
      "grad_norm": 0.2895471453666687,
      "learning_rate": 4.7541232052608347e-07,
      "loss": 0.0069,
      "step": 2910960
    },
    {
      "epoch": 4.763882615554814,
      "grad_norm": 0.12898975610733032,
      "learning_rate": 4.753464283125663e-07,
      "loss": 0.0061,
      "step": 2910980
    },
    {
      "epoch": 4.763915345993467,
      "grad_norm": 0.2134508490562439,
      "learning_rate": 4.752805360990492e-07,
      "loss": 0.0062,
      "step": 2911000
    },
    {
      "epoch": 4.76394807643212,
      "grad_norm": 0.212564155459404,
      "learning_rate": 4.7521464388553206e-07,
      "loss": 0.0085,
      "step": 2911020
    },
    {
      "epoch": 4.763980806870774,
      "grad_norm": 0.2867933213710785,
      "learning_rate": 4.7514875167201496e-07,
      "loss": 0.0083,
      "step": 2911040
    },
    {
      "epoch": 4.7640135373094274,
      "grad_norm": 0.33684977889060974,
      "learning_rate": 4.750828594584978e-07,
      "loss": 0.0129,
      "step": 2911060
    },
    {
      "epoch": 4.76404626774808,
      "grad_norm": 0.6102349758148193,
      "learning_rate": 4.750169672449807e-07,
      "loss": 0.0093,
      "step": 2911080
    },
    {
      "epoch": 4.764078998186734,
      "grad_norm": 0.35392048954963684,
      "learning_rate": 4.7495107503146355e-07,
      "loss": 0.0078,
      "step": 2911100
    },
    {
      "epoch": 4.764111728625387,
      "grad_norm": 0.3964489698410034,
      "learning_rate": 4.7488518281794645e-07,
      "loss": 0.0085,
      "step": 2911120
    },
    {
      "epoch": 4.76414445906404,
      "grad_norm": 0.15979032218456268,
      "learning_rate": 4.748192906044293e-07,
      "loss": 0.0087,
      "step": 2911140
    },
    {
      "epoch": 4.764177189502694,
      "grad_norm": 0.10701077431440353,
      "learning_rate": 4.7475339839091214e-07,
      "loss": 0.0101,
      "step": 2911160
    },
    {
      "epoch": 4.764209919941347,
      "grad_norm": 0.1995028257369995,
      "learning_rate": 4.7468750617739504e-07,
      "loss": 0.0058,
      "step": 2911180
    },
    {
      "epoch": 4.76424265038,
      "grad_norm": 0.23627296090126038,
      "learning_rate": 4.746216139638779e-07,
      "loss": 0.0073,
      "step": 2911200
    },
    {
      "epoch": 4.7642753808186535,
      "grad_norm": 0.13277995586395264,
      "learning_rate": 4.7455572175036084e-07,
      "loss": 0.0081,
      "step": 2911220
    },
    {
      "epoch": 4.764308111257307,
      "grad_norm": 0.36975663900375366,
      "learning_rate": 4.7448982953684363e-07,
      "loss": 0.0069,
      "step": 2911240
    },
    {
      "epoch": 4.764340841695961,
      "grad_norm": 0.3436235189437866,
      "learning_rate": 4.744239373233266e-07,
      "loss": 0.0106,
      "step": 2911260
    },
    {
      "epoch": 4.764373572134613,
      "grad_norm": 0.3170328438282013,
      "learning_rate": 4.7435804510980943e-07,
      "loss": 0.0109,
      "step": 2911280
    },
    {
      "epoch": 4.764406302573267,
      "grad_norm": 0.25243285298347473,
      "learning_rate": 4.742921528962923e-07,
      "loss": 0.0051,
      "step": 2911300
    },
    {
      "epoch": 4.764439033011921,
      "grad_norm": 0.0784883052110672,
      "learning_rate": 4.7422626068277517e-07,
      "loss": 0.0101,
      "step": 2911320
    },
    {
      "epoch": 4.764471763450574,
      "grad_norm": 0.09930095076560974,
      "learning_rate": 4.7416036846925807e-07,
      "loss": 0.0074,
      "step": 2911340
    },
    {
      "epoch": 4.764504493889227,
      "grad_norm": 0.16524089872837067,
      "learning_rate": 4.740944762557409e-07,
      "loss": 0.0079,
      "step": 2911360
    },
    {
      "epoch": 4.7645372243278805,
      "grad_norm": 0.09597864747047424,
      "learning_rate": 4.7402858404222376e-07,
      "loss": 0.0097,
      "step": 2911380
    },
    {
      "epoch": 4.764569954766534,
      "grad_norm": 0.4743620455265045,
      "learning_rate": 4.7396269182870666e-07,
      "loss": 0.0071,
      "step": 2911400
    },
    {
      "epoch": 4.764602685205187,
      "grad_norm": 0.31191736459732056,
      "learning_rate": 4.738967996151895e-07,
      "loss": 0.008,
      "step": 2911420
    },
    {
      "epoch": 4.76463541564384,
      "grad_norm": 0.5687426328659058,
      "learning_rate": 4.738309074016724e-07,
      "loss": 0.0062,
      "step": 2911440
    },
    {
      "epoch": 4.764668146082494,
      "grad_norm": 0.16275952756404877,
      "learning_rate": 4.7376501518815525e-07,
      "loss": 0.0113,
      "step": 2911460
    },
    {
      "epoch": 4.764700876521147,
      "grad_norm": 0.25865793228149414,
      "learning_rate": 4.7369912297463815e-07,
      "loss": 0.009,
      "step": 2911480
    },
    {
      "epoch": 4.7647336069598,
      "grad_norm": 0.505750298500061,
      "learning_rate": 4.73633230761121e-07,
      "loss": 0.0119,
      "step": 2911500
    },
    {
      "epoch": 4.764766337398454,
      "grad_norm": 0.3386879563331604,
      "learning_rate": 4.735673385476039e-07,
      "loss": 0.0116,
      "step": 2911520
    },
    {
      "epoch": 4.7647990678371075,
      "grad_norm": 0.2462935745716095,
      "learning_rate": 4.7350144633408674e-07,
      "loss": 0.0052,
      "step": 2911540
    },
    {
      "epoch": 4.76483179827576,
      "grad_norm": 0.2127884328365326,
      "learning_rate": 4.7343555412056964e-07,
      "loss": 0.0129,
      "step": 2911560
    },
    {
      "epoch": 4.764864528714414,
      "grad_norm": 0.2960675060749054,
      "learning_rate": 4.733696619070525e-07,
      "loss": 0.01,
      "step": 2911580
    },
    {
      "epoch": 4.764897259153067,
      "grad_norm": 0.10427433252334595,
      "learning_rate": 4.7330376969353533e-07,
      "loss": 0.0062,
      "step": 2911600
    },
    {
      "epoch": 4.764929989591721,
      "grad_norm": 0.16774354875087738,
      "learning_rate": 4.732378774800182e-07,
      "loss": 0.014,
      "step": 2911620
    },
    {
      "epoch": 4.764962720030374,
      "grad_norm": 0.467877060174942,
      "learning_rate": 4.7317198526650107e-07,
      "loss": 0.0111,
      "step": 2911640
    },
    {
      "epoch": 4.764995450469027,
      "grad_norm": 0.7473065257072449,
      "learning_rate": 4.7310609305298397e-07,
      "loss": 0.008,
      "step": 2911660
    },
    {
      "epoch": 4.765028180907681,
      "grad_norm": 0.3440553545951843,
      "learning_rate": 4.730402008394668e-07,
      "loss": 0.0075,
      "step": 2911680
    },
    {
      "epoch": 4.765060911346334,
      "grad_norm": 0.34077706933021545,
      "learning_rate": 4.729743086259497e-07,
      "loss": 0.0069,
      "step": 2911700
    },
    {
      "epoch": 4.765093641784987,
      "grad_norm": 0.11180305480957031,
      "learning_rate": 4.7290841641243256e-07,
      "loss": 0.008,
      "step": 2911720
    },
    {
      "epoch": 4.765126372223641,
      "grad_norm": 0.3136288821697235,
      "learning_rate": 4.7284252419891546e-07,
      "loss": 0.0079,
      "step": 2911740
    },
    {
      "epoch": 4.7651591026622935,
      "grad_norm": 0.11490954458713531,
      "learning_rate": 4.727766319853983e-07,
      "loss": 0.0064,
      "step": 2911760
    },
    {
      "epoch": 4.765191833100947,
      "grad_norm": 0.27905094623565674,
      "learning_rate": 4.727107397718812e-07,
      "loss": 0.01,
      "step": 2911780
    },
    {
      "epoch": 4.765224563539601,
      "grad_norm": 0.34027111530303955,
      "learning_rate": 4.7264484755836405e-07,
      "loss": 0.0114,
      "step": 2911800
    },
    {
      "epoch": 4.765257293978254,
      "grad_norm": 0.7959257960319519,
      "learning_rate": 4.725789553448469e-07,
      "loss": 0.0078,
      "step": 2911820
    },
    {
      "epoch": 4.765290024416907,
      "grad_norm": 0.29381972551345825,
      "learning_rate": 4.725130631313298e-07,
      "loss": 0.0098,
      "step": 2911840
    },
    {
      "epoch": 4.765322754855561,
      "grad_norm": 0.06696967780590057,
      "learning_rate": 4.7244717091781264e-07,
      "loss": 0.0073,
      "step": 2911860
    },
    {
      "epoch": 4.765355485294214,
      "grad_norm": 0.10790734738111496,
      "learning_rate": 4.723812787042956e-07,
      "loss": 0.0075,
      "step": 2911880
    },
    {
      "epoch": 4.765388215732868,
      "grad_norm": 0.06730904430150986,
      "learning_rate": 4.723153864907784e-07,
      "loss": 0.0052,
      "step": 2911900
    },
    {
      "epoch": 4.7654209461715205,
      "grad_norm": 0.6962462663650513,
      "learning_rate": 4.7224949427726133e-07,
      "loss": 0.0088,
      "step": 2911920
    },
    {
      "epoch": 4.765453676610174,
      "grad_norm": 0.14769063889980316,
      "learning_rate": 4.721836020637442e-07,
      "loss": 0.0077,
      "step": 2911940
    },
    {
      "epoch": 4.765486407048828,
      "grad_norm": 0.14379118382930756,
      "learning_rate": 4.721177098502271e-07,
      "loss": 0.0123,
      "step": 2911960
    },
    {
      "epoch": 4.76551913748748,
      "grad_norm": 0.3292561173439026,
      "learning_rate": 4.720518176367099e-07,
      "loss": 0.0057,
      "step": 2911980
    },
    {
      "epoch": 4.765551867926134,
      "grad_norm": 0.10901518911123276,
      "learning_rate": 4.719859254231928e-07,
      "loss": 0.0112,
      "step": 2912000
    },
    {
      "epoch": 4.7655845983647875,
      "grad_norm": 0.06484252214431763,
      "learning_rate": 4.7192003320967567e-07,
      "loss": 0.0073,
      "step": 2912020
    },
    {
      "epoch": 4.76561732880344,
      "grad_norm": 0.5029749870300293,
      "learning_rate": 4.718541409961585e-07,
      "loss": 0.0118,
      "step": 2912040
    },
    {
      "epoch": 4.765650059242094,
      "grad_norm": 0.3500111401081085,
      "learning_rate": 4.717882487826414e-07,
      "loss": 0.0077,
      "step": 2912060
    },
    {
      "epoch": 4.765682789680747,
      "grad_norm": 0.1213492751121521,
      "learning_rate": 4.7172235656912426e-07,
      "loss": 0.0087,
      "step": 2912080
    },
    {
      "epoch": 4.765715520119401,
      "grad_norm": 0.37129855155944824,
      "learning_rate": 4.7165646435560716e-07,
      "loss": 0.0087,
      "step": 2912100
    },
    {
      "epoch": 4.765748250558054,
      "grad_norm": 0.2378143072128296,
      "learning_rate": 4.7159057214209e-07,
      "loss": 0.0111,
      "step": 2912120
    },
    {
      "epoch": 4.765780980996707,
      "grad_norm": 0.10691603273153305,
      "learning_rate": 4.715246799285729e-07,
      "loss": 0.0058,
      "step": 2912140
    },
    {
      "epoch": 4.765813711435361,
      "grad_norm": 0.3339346647262573,
      "learning_rate": 4.7145878771505575e-07,
      "loss": 0.008,
      "step": 2912160
    },
    {
      "epoch": 4.765846441874014,
      "grad_norm": 0.09291216731071472,
      "learning_rate": 4.7139289550153864e-07,
      "loss": 0.0064,
      "step": 2912180
    },
    {
      "epoch": 4.765879172312667,
      "grad_norm": 0.08671583235263824,
      "learning_rate": 4.713270032880215e-07,
      "loss": 0.009,
      "step": 2912200
    },
    {
      "epoch": 4.765911902751321,
      "grad_norm": 0.42607471346855164,
      "learning_rate": 4.712611110745044e-07,
      "loss": 0.0072,
      "step": 2912220
    },
    {
      "epoch": 4.765944633189974,
      "grad_norm": 0.06643245369195938,
      "learning_rate": 4.7119521886098723e-07,
      "loss": 0.0115,
      "step": 2912240
    },
    {
      "epoch": 4.765977363628627,
      "grad_norm": 0.13034285604953766,
      "learning_rate": 4.711293266474701e-07,
      "loss": 0.0089,
      "step": 2912260
    },
    {
      "epoch": 4.766010094067281,
      "grad_norm": 0.1615382581949234,
      "learning_rate": 4.71063434433953e-07,
      "loss": 0.0058,
      "step": 2912280
    },
    {
      "epoch": 4.766042824505934,
      "grad_norm": 0.23491132259368896,
      "learning_rate": 4.709975422204358e-07,
      "loss": 0.0085,
      "step": 2912300
    },
    {
      "epoch": 4.766075554944587,
      "grad_norm": 0.33309367299079895,
      "learning_rate": 4.709316500069187e-07,
      "loss": 0.0112,
      "step": 2912320
    },
    {
      "epoch": 4.766108285383241,
      "grad_norm": 0.2773313820362091,
      "learning_rate": 4.7086575779340157e-07,
      "loss": 0.0093,
      "step": 2912340
    },
    {
      "epoch": 4.766141015821894,
      "grad_norm": 0.35889551043510437,
      "learning_rate": 4.7079986557988447e-07,
      "loss": 0.0091,
      "step": 2912360
    },
    {
      "epoch": 4.766173746260547,
      "grad_norm": 0.48838096857070923,
      "learning_rate": 4.707339733663673e-07,
      "loss": 0.0096,
      "step": 2912380
    },
    {
      "epoch": 4.7662064766992005,
      "grad_norm": 0.08786685764789581,
      "learning_rate": 4.706680811528502e-07,
      "loss": 0.0127,
      "step": 2912400
    },
    {
      "epoch": 4.766239207137854,
      "grad_norm": 0.06915681064128876,
      "learning_rate": 4.7060218893933306e-07,
      "loss": 0.0072,
      "step": 2912420
    },
    {
      "epoch": 4.766271937576508,
      "grad_norm": 0.4240359365940094,
      "learning_rate": 4.7053629672581596e-07,
      "loss": 0.0147,
      "step": 2912440
    },
    {
      "epoch": 4.76630466801516,
      "grad_norm": 0.15649786591529846,
      "learning_rate": 4.704704045122988e-07,
      "loss": 0.0078,
      "step": 2912460
    },
    {
      "epoch": 4.766337398453814,
      "grad_norm": 0.3113357126712799,
      "learning_rate": 4.7040451229878165e-07,
      "loss": 0.0067,
      "step": 2912480
    },
    {
      "epoch": 4.766370128892468,
      "grad_norm": 0.583415150642395,
      "learning_rate": 4.7033862008526455e-07,
      "loss": 0.0106,
      "step": 2912500
    },
    {
      "epoch": 4.766402859331121,
      "grad_norm": 0.2055424600839615,
      "learning_rate": 4.702727278717474e-07,
      "loss": 0.0066,
      "step": 2912520
    },
    {
      "epoch": 4.766435589769774,
      "grad_norm": 0.5290478467941284,
      "learning_rate": 4.7020683565823034e-07,
      "loss": 0.0088,
      "step": 2912540
    },
    {
      "epoch": 4.7664683202084275,
      "grad_norm": 0.11579179018735886,
      "learning_rate": 4.7014094344471313e-07,
      "loss": 0.0093,
      "step": 2912560
    },
    {
      "epoch": 4.766501050647081,
      "grad_norm": 0.14780092239379883,
      "learning_rate": 4.700750512311961e-07,
      "loss": 0.0093,
      "step": 2912580
    },
    {
      "epoch": 4.766533781085734,
      "grad_norm": 0.11102592945098877,
      "learning_rate": 4.7000915901767893e-07,
      "loss": 0.0074,
      "step": 2912600
    },
    {
      "epoch": 4.766566511524387,
      "grad_norm": 0.3426526188850403,
      "learning_rate": 4.6994326680416183e-07,
      "loss": 0.0122,
      "step": 2912620
    },
    {
      "epoch": 4.766599241963041,
      "grad_norm": 0.18456286191940308,
      "learning_rate": 4.698773745906447e-07,
      "loss": 0.0057,
      "step": 2912640
    },
    {
      "epoch": 4.766631972401694,
      "grad_norm": 0.2704450190067291,
      "learning_rate": 4.698114823771276e-07,
      "loss": 0.006,
      "step": 2912660
    },
    {
      "epoch": 4.766664702840347,
      "grad_norm": 0.15422353148460388,
      "learning_rate": 4.697455901636104e-07,
      "loss": 0.01,
      "step": 2912680
    },
    {
      "epoch": 4.766697433279001,
      "grad_norm": 0.26188817620277405,
      "learning_rate": 4.6967969795009327e-07,
      "loss": 0.014,
      "step": 2912700
    },
    {
      "epoch": 4.7667301637176545,
      "grad_norm": 0.24800898134708405,
      "learning_rate": 4.6961380573657616e-07,
      "loss": 0.0106,
      "step": 2912720
    },
    {
      "epoch": 4.766762894156307,
      "grad_norm": 0.11261550337076187,
      "learning_rate": 4.69547913523059e-07,
      "loss": 0.0129,
      "step": 2912740
    },
    {
      "epoch": 4.766795624594961,
      "grad_norm": 0.1710432916879654,
      "learning_rate": 4.694820213095419e-07,
      "loss": 0.0067,
      "step": 2912760
    },
    {
      "epoch": 4.766828355033614,
      "grad_norm": 0.20503297448158264,
      "learning_rate": 4.6941612909602475e-07,
      "loss": 0.0088,
      "step": 2912780
    },
    {
      "epoch": 4.766861085472268,
      "grad_norm": 0.09192900359630585,
      "learning_rate": 4.6935023688250765e-07,
      "loss": 0.0081,
      "step": 2912800
    },
    {
      "epoch": 4.766893815910921,
      "grad_norm": 0.09931278973817825,
      "learning_rate": 4.692843446689905e-07,
      "loss": 0.007,
      "step": 2912820
    },
    {
      "epoch": 4.766926546349574,
      "grad_norm": 0.11961611360311508,
      "learning_rate": 4.692184524554734e-07,
      "loss": 0.0061,
      "step": 2912840
    },
    {
      "epoch": 4.766959276788228,
      "grad_norm": 0.08453816175460815,
      "learning_rate": 4.6915256024195624e-07,
      "loss": 0.0114,
      "step": 2912860
    },
    {
      "epoch": 4.7669920072268805,
      "grad_norm": 0.28029003739356995,
      "learning_rate": 4.6908666802843914e-07,
      "loss": 0.008,
      "step": 2912880
    },
    {
      "epoch": 4.767024737665534,
      "grad_norm": 0.16341464221477509,
      "learning_rate": 4.69020775814922e-07,
      "loss": 0.0049,
      "step": 2912900
    },
    {
      "epoch": 4.767057468104188,
      "grad_norm": 0.13917283713817596,
      "learning_rate": 4.6895488360140483e-07,
      "loss": 0.0085,
      "step": 2912920
    },
    {
      "epoch": 4.76709019854284,
      "grad_norm": 0.6419938206672668,
      "learning_rate": 4.6888899138788773e-07,
      "loss": 0.0077,
      "step": 2912940
    },
    {
      "epoch": 4.767122928981494,
      "grad_norm": 0.0904550701379776,
      "learning_rate": 4.688230991743706e-07,
      "loss": 0.0065,
      "step": 2912960
    },
    {
      "epoch": 4.767155659420148,
      "grad_norm": 0.5182085633277893,
      "learning_rate": 4.687572069608535e-07,
      "loss": 0.0137,
      "step": 2912980
    },
    {
      "epoch": 4.767188389858801,
      "grad_norm": 0.12775255739688873,
      "learning_rate": 4.686913147473363e-07,
      "loss": 0.0053,
      "step": 2913000
    },
    {
      "epoch": 4.767221120297454,
      "grad_norm": 0.03934219479560852,
      "learning_rate": 4.686254225338192e-07,
      "loss": 0.0057,
      "step": 2913020
    },
    {
      "epoch": 4.7672538507361075,
      "grad_norm": 0.3361552655696869,
      "learning_rate": 4.6855953032030207e-07,
      "loss": 0.009,
      "step": 2913040
    },
    {
      "epoch": 4.767286581174761,
      "grad_norm": 0.11753220111131668,
      "learning_rate": 4.6849363810678496e-07,
      "loss": 0.008,
      "step": 2913060
    },
    {
      "epoch": 4.767319311613415,
      "grad_norm": 0.38496094942092896,
      "learning_rate": 4.684277458932678e-07,
      "loss": 0.01,
      "step": 2913080
    },
    {
      "epoch": 4.767352042052067,
      "grad_norm": 0.21713687479496002,
      "learning_rate": 4.683618536797507e-07,
      "loss": 0.0082,
      "step": 2913100
    },
    {
      "epoch": 4.767384772490721,
      "grad_norm": 0.09907609969377518,
      "learning_rate": 4.6829596146623355e-07,
      "loss": 0.0123,
      "step": 2913120
    },
    {
      "epoch": 4.767417502929375,
      "grad_norm": 0.2346886396408081,
      "learning_rate": 4.682300692527164e-07,
      "loss": 0.012,
      "step": 2913140
    },
    {
      "epoch": 4.767450233368027,
      "grad_norm": 0.06030314415693283,
      "learning_rate": 4.681641770391993e-07,
      "loss": 0.0077,
      "step": 2913160
    },
    {
      "epoch": 4.767482963806681,
      "grad_norm": 0.573657751083374,
      "learning_rate": 4.6809828482568214e-07,
      "loss": 0.0088,
      "step": 2913180
    },
    {
      "epoch": 4.7675156942453345,
      "grad_norm": 0.16242508590221405,
      "learning_rate": 4.680323926121651e-07,
      "loss": 0.0098,
      "step": 2913200
    },
    {
      "epoch": 4.767548424683987,
      "grad_norm": 0.1119847521185875,
      "learning_rate": 4.679665003986479e-07,
      "loss": 0.007,
      "step": 2913220
    },
    {
      "epoch": 4.767581155122641,
      "grad_norm": 0.04914698377251625,
      "learning_rate": 4.6790060818513084e-07,
      "loss": 0.0056,
      "step": 2913240
    },
    {
      "epoch": 4.767613885561294,
      "grad_norm": 0.08557619154453278,
      "learning_rate": 4.678347159716137e-07,
      "loss": 0.0063,
      "step": 2913260
    },
    {
      "epoch": 4.767646615999948,
      "grad_norm": 0.3160204291343689,
      "learning_rate": 4.677688237580966e-07,
      "loss": 0.0054,
      "step": 2913280
    },
    {
      "epoch": 4.767679346438601,
      "grad_norm": 0.151772141456604,
      "learning_rate": 4.6770293154457943e-07,
      "loss": 0.0079,
      "step": 2913300
    },
    {
      "epoch": 4.767712076877254,
      "grad_norm": 0.1372039020061493,
      "learning_rate": 4.6763703933106233e-07,
      "loss": 0.0105,
      "step": 2913320
    },
    {
      "epoch": 4.767744807315908,
      "grad_norm": 0.2506568133831024,
      "learning_rate": 4.6757114711754517e-07,
      "loss": 0.0126,
      "step": 2913340
    },
    {
      "epoch": 4.7677775377545615,
      "grad_norm": 0.22120866179466248,
      "learning_rate": 4.67505254904028e-07,
      "loss": 0.0104,
      "step": 2913360
    },
    {
      "epoch": 4.767810268193214,
      "grad_norm": 0.13155674934387207,
      "learning_rate": 4.674393626905109e-07,
      "loss": 0.0063,
      "step": 2913380
    },
    {
      "epoch": 4.767842998631868,
      "grad_norm": 0.3039529621601105,
      "learning_rate": 4.6737347047699376e-07,
      "loss": 0.0116,
      "step": 2913400
    },
    {
      "epoch": 4.767875729070521,
      "grad_norm": 0.2969569265842438,
      "learning_rate": 4.6730757826347666e-07,
      "loss": 0.0116,
      "step": 2913420
    },
    {
      "epoch": 4.767908459509174,
      "grad_norm": 0.20557864010334015,
      "learning_rate": 4.672416860499595e-07,
      "loss": 0.0077,
      "step": 2913440
    },
    {
      "epoch": 4.767941189947828,
      "grad_norm": 0.6656205654144287,
      "learning_rate": 4.671757938364424e-07,
      "loss": 0.012,
      "step": 2913460
    },
    {
      "epoch": 4.767973920386481,
      "grad_norm": 0.17276005446910858,
      "learning_rate": 4.6710990162292525e-07,
      "loss": 0.0073,
      "step": 2913480
    },
    {
      "epoch": 4.768006650825134,
      "grad_norm": 0.11795692890882492,
      "learning_rate": 4.6704400940940815e-07,
      "loss": 0.0062,
      "step": 2913500
    },
    {
      "epoch": 4.768039381263788,
      "grad_norm": 0.20698145031929016,
      "learning_rate": 4.66978117195891e-07,
      "loss": 0.0084,
      "step": 2913520
    },
    {
      "epoch": 4.768072111702441,
      "grad_norm": 0.2693518102169037,
      "learning_rate": 4.669122249823739e-07,
      "loss": 0.0071,
      "step": 2913540
    },
    {
      "epoch": 4.768104842141095,
      "grad_norm": 0.36447465419769287,
      "learning_rate": 4.6684633276885674e-07,
      "loss": 0.0078,
      "step": 2913560
    },
    {
      "epoch": 4.7681375725797475,
      "grad_norm": 0.2693975269794464,
      "learning_rate": 4.667804405553396e-07,
      "loss": 0.009,
      "step": 2913580
    },
    {
      "epoch": 4.768170303018401,
      "grad_norm": 0.22085140645503998,
      "learning_rate": 4.667145483418225e-07,
      "loss": 0.0098,
      "step": 2913600
    },
    {
      "epoch": 4.768203033457055,
      "grad_norm": 0.3156439960002899,
      "learning_rate": 4.6664865612830533e-07,
      "loss": 0.009,
      "step": 2913620
    },
    {
      "epoch": 4.768235763895707,
      "grad_norm": 0.11800326406955719,
      "learning_rate": 4.6658276391478823e-07,
      "loss": 0.0123,
      "step": 2913640
    },
    {
      "epoch": 4.768268494334361,
      "grad_norm": 0.22763003408908844,
      "learning_rate": 4.665168717012711e-07,
      "loss": 0.0098,
      "step": 2913660
    },
    {
      "epoch": 4.7683012247730145,
      "grad_norm": 0.24669861793518066,
      "learning_rate": 4.6645097948775397e-07,
      "loss": 0.0069,
      "step": 2913680
    },
    {
      "epoch": 4.768333955211668,
      "grad_norm": 0.29520276188850403,
      "learning_rate": 4.663850872742368e-07,
      "loss": 0.007,
      "step": 2913700
    },
    {
      "epoch": 4.768366685650321,
      "grad_norm": 0.19397777318954468,
      "learning_rate": 4.663191950607197e-07,
      "loss": 0.0089,
      "step": 2913720
    },
    {
      "epoch": 4.768399416088974,
      "grad_norm": 0.40569618344306946,
      "learning_rate": 4.6625330284720256e-07,
      "loss": 0.009,
      "step": 2913740
    },
    {
      "epoch": 4.768432146527628,
      "grad_norm": 0.1401165872812271,
      "learning_rate": 4.6618741063368546e-07,
      "loss": 0.0105,
      "step": 2913760
    },
    {
      "epoch": 4.768464876966281,
      "grad_norm": 0.12793822586536407,
      "learning_rate": 4.661215184201683e-07,
      "loss": 0.0083,
      "step": 2913780
    },
    {
      "epoch": 4.768497607404934,
      "grad_norm": 0.1988399177789688,
      "learning_rate": 4.6605562620665115e-07,
      "loss": 0.0081,
      "step": 2913800
    },
    {
      "epoch": 4.768530337843588,
      "grad_norm": 1.0472068786621094,
      "learning_rate": 4.6598973399313405e-07,
      "loss": 0.0114,
      "step": 2913820
    },
    {
      "epoch": 4.768563068282241,
      "grad_norm": 0.1213177889585495,
      "learning_rate": 4.659238417796169e-07,
      "loss": 0.0097,
      "step": 2913840
    },
    {
      "epoch": 4.768595798720894,
      "grad_norm": 0.2325160801410675,
      "learning_rate": 4.6585794956609985e-07,
      "loss": 0.0078,
      "step": 2913860
    },
    {
      "epoch": 4.768628529159548,
      "grad_norm": 0.08197686076164246,
      "learning_rate": 4.6579205735258264e-07,
      "loss": 0.0089,
      "step": 2913880
    },
    {
      "epoch": 4.768661259598201,
      "grad_norm": 0.15685983002185822,
      "learning_rate": 4.657261651390656e-07,
      "loss": 0.0082,
      "step": 2913900
    },
    {
      "epoch": 4.768693990036854,
      "grad_norm": 0.08907095342874527,
      "learning_rate": 4.6566027292554844e-07,
      "loss": 0.0072,
      "step": 2913920
    },
    {
      "epoch": 4.768726720475508,
      "grad_norm": 0.09224404394626617,
      "learning_rate": 4.6559438071203134e-07,
      "loss": 0.0064,
      "step": 2913940
    },
    {
      "epoch": 4.768759450914161,
      "grad_norm": 0.31128373742103577,
      "learning_rate": 4.655284884985142e-07,
      "loss": 0.0084,
      "step": 2913960
    },
    {
      "epoch": 4.768792181352815,
      "grad_norm": 0.35110288858413696,
      "learning_rate": 4.654625962849971e-07,
      "loss": 0.0099,
      "step": 2913980
    },
    {
      "epoch": 4.768824911791468,
      "grad_norm": 0.32589849829673767,
      "learning_rate": 4.653967040714799e-07,
      "loss": 0.0102,
      "step": 2914000
    },
    {
      "epoch": 4.768857642230121,
      "grad_norm": 0.5264707803726196,
      "learning_rate": 4.6533081185796277e-07,
      "loss": 0.008,
      "step": 2914020
    },
    {
      "epoch": 4.768890372668775,
      "grad_norm": 0.11667625606060028,
      "learning_rate": 4.6526491964444567e-07,
      "loss": 0.016,
      "step": 2914040
    },
    {
      "epoch": 4.7689231031074275,
      "grad_norm": 0.616491973400116,
      "learning_rate": 4.651990274309285e-07,
      "loss": 0.0077,
      "step": 2914060
    },
    {
      "epoch": 4.768955833546081,
      "grad_norm": 0.9426963925361633,
      "learning_rate": 4.651331352174114e-07,
      "loss": 0.0076,
      "step": 2914080
    },
    {
      "epoch": 4.768988563984735,
      "grad_norm": 0.2484705001115799,
      "learning_rate": 4.6506724300389426e-07,
      "loss": 0.0113,
      "step": 2914100
    },
    {
      "epoch": 4.769021294423387,
      "grad_norm": 0.14978310465812683,
      "learning_rate": 4.6500135079037716e-07,
      "loss": 0.0054,
      "step": 2914120
    },
    {
      "epoch": 4.769054024862041,
      "grad_norm": 0.12490851432085037,
      "learning_rate": 4.6493545857686e-07,
      "loss": 0.0062,
      "step": 2914140
    },
    {
      "epoch": 4.769086755300695,
      "grad_norm": 0.7807475328445435,
      "learning_rate": 4.648695663633429e-07,
      "loss": 0.0116,
      "step": 2914160
    },
    {
      "epoch": 4.769119485739348,
      "grad_norm": 0.20102635025978088,
      "learning_rate": 4.6480367414982575e-07,
      "loss": 0.0088,
      "step": 2914180
    },
    {
      "epoch": 4.769152216178001,
      "grad_norm": 0.14554071426391602,
      "learning_rate": 4.6473778193630865e-07,
      "loss": 0.0109,
      "step": 2914200
    },
    {
      "epoch": 4.7691849466166545,
      "grad_norm": 0.08142401278018951,
      "learning_rate": 4.646718897227915e-07,
      "loss": 0.0083,
      "step": 2914220
    },
    {
      "epoch": 4.769217677055308,
      "grad_norm": 0.16433009505271912,
      "learning_rate": 4.6460599750927434e-07,
      "loss": 0.0058,
      "step": 2914240
    },
    {
      "epoch": 4.769250407493962,
      "grad_norm": 0.14930854737758636,
      "learning_rate": 4.6454010529575724e-07,
      "loss": 0.009,
      "step": 2914260
    },
    {
      "epoch": 4.769283137932614,
      "grad_norm": 0.2759564220905304,
      "learning_rate": 4.644742130822401e-07,
      "loss": 0.0085,
      "step": 2914280
    },
    {
      "epoch": 4.769315868371268,
      "grad_norm": 0.08336325734853745,
      "learning_rate": 4.64408320868723e-07,
      "loss": 0.0092,
      "step": 2914300
    },
    {
      "epoch": 4.769348598809922,
      "grad_norm": 0.48706042766571045,
      "learning_rate": 4.643424286552058e-07,
      "loss": 0.0097,
      "step": 2914320
    },
    {
      "epoch": 4.769381329248574,
      "grad_norm": 0.26530495285987854,
      "learning_rate": 4.642765364416887e-07,
      "loss": 0.0095,
      "step": 2914340
    },
    {
      "epoch": 4.769414059687228,
      "grad_norm": 0.40252432227134705,
      "learning_rate": 4.6421064422817157e-07,
      "loss": 0.0075,
      "step": 2914360
    },
    {
      "epoch": 4.7694467901258815,
      "grad_norm": 0.16264595091342926,
      "learning_rate": 4.6414475201465447e-07,
      "loss": 0.0056,
      "step": 2914380
    },
    {
      "epoch": 4.769479520564534,
      "grad_norm": 0.1857873797416687,
      "learning_rate": 4.640788598011373e-07,
      "loss": 0.0075,
      "step": 2914400
    },
    {
      "epoch": 4.769512251003188,
      "grad_norm": 0.061863768845796585,
      "learning_rate": 4.640129675876202e-07,
      "loss": 0.0061,
      "step": 2914420
    },
    {
      "epoch": 4.769544981441841,
      "grad_norm": 0.35849547386169434,
      "learning_rate": 4.6394707537410306e-07,
      "loss": 0.0116,
      "step": 2914440
    },
    {
      "epoch": 4.769577711880495,
      "grad_norm": 0.5033536553382874,
      "learning_rate": 4.638811831605859e-07,
      "loss": 0.0073,
      "step": 2914460
    },
    {
      "epoch": 4.769610442319148,
      "grad_norm": 0.3247295320034027,
      "learning_rate": 4.638152909470688e-07,
      "loss": 0.0083,
      "step": 2914480
    },
    {
      "epoch": 4.769643172757801,
      "grad_norm": 0.8264042735099792,
      "learning_rate": 4.6374939873355165e-07,
      "loss": 0.0045,
      "step": 2914500
    },
    {
      "epoch": 4.769675903196455,
      "grad_norm": 0.12758709490299225,
      "learning_rate": 4.636835065200346e-07,
      "loss": 0.0057,
      "step": 2914520
    },
    {
      "epoch": 4.769708633635108,
      "grad_norm": 0.04265430569648743,
      "learning_rate": 4.636176143065174e-07,
      "loss": 0.0072,
      "step": 2914540
    },
    {
      "epoch": 4.769741364073761,
      "grad_norm": 0.1584022343158722,
      "learning_rate": 4.6355172209300034e-07,
      "loss": 0.0081,
      "step": 2914560
    },
    {
      "epoch": 4.769774094512415,
      "grad_norm": 0.6301811933517456,
      "learning_rate": 4.634858298794832e-07,
      "loss": 0.0088,
      "step": 2914580
    },
    {
      "epoch": 4.769806824951068,
      "grad_norm": 0.1309116929769516,
      "learning_rate": 4.634199376659661e-07,
      "loss": 0.0075,
      "step": 2914600
    },
    {
      "epoch": 4.769839555389721,
      "grad_norm": 0.041390325874090195,
      "learning_rate": 4.6335404545244893e-07,
      "loss": 0.0083,
      "step": 2914620
    },
    {
      "epoch": 4.769872285828375,
      "grad_norm": 0.20143817365169525,
      "learning_rate": 4.6328815323893183e-07,
      "loss": 0.0077,
      "step": 2914640
    },
    {
      "epoch": 4.769905016267028,
      "grad_norm": 0.11797702312469482,
      "learning_rate": 4.632222610254147e-07,
      "loss": 0.0098,
      "step": 2914660
    },
    {
      "epoch": 4.769937746705681,
      "grad_norm": 0.23155485093593597,
      "learning_rate": 4.631563688118975e-07,
      "loss": 0.0057,
      "step": 2914680
    },
    {
      "epoch": 4.7699704771443345,
      "grad_norm": 0.13625986874103546,
      "learning_rate": 4.630904765983804e-07,
      "loss": 0.007,
      "step": 2914700
    },
    {
      "epoch": 4.770003207582988,
      "grad_norm": 0.1462291032075882,
      "learning_rate": 4.6302458438486327e-07,
      "loss": 0.0058,
      "step": 2914720
    },
    {
      "epoch": 4.770035938021642,
      "grad_norm": 0.321096807718277,
      "learning_rate": 4.6295869217134617e-07,
      "loss": 0.0084,
      "step": 2914740
    },
    {
      "epoch": 4.770068668460294,
      "grad_norm": 0.2609212100505829,
      "learning_rate": 4.62892799957829e-07,
      "loss": 0.0073,
      "step": 2914760
    },
    {
      "epoch": 4.770101398898948,
      "grad_norm": 0.3734600841999054,
      "learning_rate": 4.628269077443119e-07,
      "loss": 0.0104,
      "step": 2914780
    },
    {
      "epoch": 4.770134129337602,
      "grad_norm": 0.38258907198905945,
      "learning_rate": 4.6276101553079476e-07,
      "loss": 0.0072,
      "step": 2914800
    },
    {
      "epoch": 4.770166859776255,
      "grad_norm": 0.36020633578300476,
      "learning_rate": 4.6269512331727766e-07,
      "loss": 0.0076,
      "step": 2914820
    },
    {
      "epoch": 4.770199590214908,
      "grad_norm": 0.2863582372665405,
      "learning_rate": 4.626292311037605e-07,
      "loss": 0.0078,
      "step": 2914840
    },
    {
      "epoch": 4.7702323206535615,
      "grad_norm": 0.16055551171302795,
      "learning_rate": 4.625633388902434e-07,
      "loss": 0.0075,
      "step": 2914860
    },
    {
      "epoch": 4.770265051092215,
      "grad_norm": 0.32777804136276245,
      "learning_rate": 4.6249744667672625e-07,
      "loss": 0.0083,
      "step": 2914880
    },
    {
      "epoch": 4.770297781530868,
      "grad_norm": 0.21160298585891724,
      "learning_rate": 4.624315544632091e-07,
      "loss": 0.0093,
      "step": 2914900
    },
    {
      "epoch": 4.770330511969521,
      "grad_norm": 0.1627112329006195,
      "learning_rate": 4.62365662249692e-07,
      "loss": 0.0089,
      "step": 2914920
    },
    {
      "epoch": 4.770363242408175,
      "grad_norm": 0.09194677323102951,
      "learning_rate": 4.6229977003617483e-07,
      "loss": 0.0112,
      "step": 2914940
    },
    {
      "epoch": 4.770395972846828,
      "grad_norm": 0.1391477882862091,
      "learning_rate": 4.6223387782265773e-07,
      "loss": 0.0086,
      "step": 2914960
    },
    {
      "epoch": 4.770428703285481,
      "grad_norm": 0.3371409475803375,
      "learning_rate": 4.621679856091406e-07,
      "loss": 0.0106,
      "step": 2914980
    },
    {
      "epoch": 4.770461433724135,
      "grad_norm": 0.22626695036888123,
      "learning_rate": 4.621020933956235e-07,
      "loss": 0.0106,
      "step": 2915000
    },
    {
      "epoch": 4.7704941641627885,
      "grad_norm": 0.41808104515075684,
      "learning_rate": 4.620362011821063e-07,
      "loss": 0.0068,
      "step": 2915020
    },
    {
      "epoch": 4.770526894601441,
      "grad_norm": 0.6938797235488892,
      "learning_rate": 4.619703089685892e-07,
      "loss": 0.0091,
      "step": 2915040
    },
    {
      "epoch": 4.770559625040095,
      "grad_norm": 0.4409048855304718,
      "learning_rate": 4.6190441675507207e-07,
      "loss": 0.0064,
      "step": 2915060
    },
    {
      "epoch": 4.770592355478748,
      "grad_norm": 0.5754901170730591,
      "learning_rate": 4.6183852454155497e-07,
      "loss": 0.0103,
      "step": 2915080
    },
    {
      "epoch": 4.770625085917401,
      "grad_norm": 0.2323669046163559,
      "learning_rate": 4.617726323280378e-07,
      "loss": 0.0126,
      "step": 2915100
    },
    {
      "epoch": 4.770657816356055,
      "grad_norm": 0.264213889837265,
      "learning_rate": 4.6170674011452066e-07,
      "loss": 0.0088,
      "step": 2915120
    },
    {
      "epoch": 4.770690546794708,
      "grad_norm": 0.036387261003255844,
      "learning_rate": 4.6164084790100356e-07,
      "loss": 0.0086,
      "step": 2915140
    },
    {
      "epoch": 4.770723277233362,
      "grad_norm": 0.2495492398738861,
      "learning_rate": 4.615749556874864e-07,
      "loss": 0.007,
      "step": 2915160
    },
    {
      "epoch": 4.770756007672015,
      "grad_norm": 0.4496332108974457,
      "learning_rate": 4.6150906347396935e-07,
      "loss": 0.0096,
      "step": 2915180
    },
    {
      "epoch": 4.770788738110668,
      "grad_norm": 0.3770444393157959,
      "learning_rate": 4.6144317126045215e-07,
      "loss": 0.0085,
      "step": 2915200
    },
    {
      "epoch": 4.770821468549322,
      "grad_norm": 0.03327647224068642,
      "learning_rate": 4.613772790469351e-07,
      "loss": 0.0095,
      "step": 2915220
    },
    {
      "epoch": 4.7708541989879745,
      "grad_norm": 0.31967923045158386,
      "learning_rate": 4.6131138683341794e-07,
      "loss": 0.0103,
      "step": 2915240
    },
    {
      "epoch": 4.770886929426628,
      "grad_norm": 0.2678768038749695,
      "learning_rate": 4.6124549461990084e-07,
      "loss": 0.0067,
      "step": 2915260
    },
    {
      "epoch": 4.770919659865282,
      "grad_norm": 0.2902805805206299,
      "learning_rate": 4.611796024063837e-07,
      "loss": 0.0129,
      "step": 2915280
    },
    {
      "epoch": 4.770952390303934,
      "grad_norm": 0.4520697593688965,
      "learning_rate": 4.611137101928666e-07,
      "loss": 0.0104,
      "step": 2915300
    },
    {
      "epoch": 4.770985120742588,
      "grad_norm": 0.34063273668289185,
      "learning_rate": 4.6104781797934943e-07,
      "loss": 0.0081,
      "step": 2915320
    },
    {
      "epoch": 4.7710178511812416,
      "grad_norm": 0.25818750262260437,
      "learning_rate": 4.609819257658323e-07,
      "loss": 0.0083,
      "step": 2915340
    },
    {
      "epoch": 4.771050581619895,
      "grad_norm": 0.10468664765357971,
      "learning_rate": 4.609160335523152e-07,
      "loss": 0.0068,
      "step": 2915360
    },
    {
      "epoch": 4.771083312058548,
      "grad_norm": 0.44073957204818726,
      "learning_rate": 4.60850141338798e-07,
      "loss": 0.0092,
      "step": 2915380
    },
    {
      "epoch": 4.7711160424972014,
      "grad_norm": 0.20995812118053436,
      "learning_rate": 4.607842491252809e-07,
      "loss": 0.0074,
      "step": 2915400
    },
    {
      "epoch": 4.771148772935855,
      "grad_norm": 0.4121682047843933,
      "learning_rate": 4.6071835691176377e-07,
      "loss": 0.0079,
      "step": 2915420
    },
    {
      "epoch": 4.771181503374509,
      "grad_norm": 0.2416699379682541,
      "learning_rate": 4.6065246469824666e-07,
      "loss": 0.0057,
      "step": 2915440
    },
    {
      "epoch": 4.771214233813161,
      "grad_norm": 0.6740080714225769,
      "learning_rate": 4.605865724847295e-07,
      "loss": 0.0095,
      "step": 2915460
    },
    {
      "epoch": 4.771246964251815,
      "grad_norm": 0.20245708525180817,
      "learning_rate": 4.605206802712124e-07,
      "loss": 0.0082,
      "step": 2915480
    },
    {
      "epoch": 4.7712796946904685,
      "grad_norm": 0.09333144873380661,
      "learning_rate": 4.6045478805769525e-07,
      "loss": 0.0065,
      "step": 2915500
    },
    {
      "epoch": 4.771312425129121,
      "grad_norm": 0.22460272908210754,
      "learning_rate": 4.6038889584417815e-07,
      "loss": 0.0061,
      "step": 2915520
    },
    {
      "epoch": 4.771345155567775,
      "grad_norm": 0.09905107319355011,
      "learning_rate": 4.60323003630661e-07,
      "loss": 0.0056,
      "step": 2915540
    },
    {
      "epoch": 4.771377886006428,
      "grad_norm": 0.08465602248907089,
      "learning_rate": 4.6025711141714384e-07,
      "loss": 0.0094,
      "step": 2915560
    },
    {
      "epoch": 4.771410616445081,
      "grad_norm": 0.43525826930999756,
      "learning_rate": 4.6019121920362674e-07,
      "loss": 0.0083,
      "step": 2915580
    },
    {
      "epoch": 4.771443346883735,
      "grad_norm": 0.3019159734249115,
      "learning_rate": 4.601253269901096e-07,
      "loss": 0.0096,
      "step": 2915600
    },
    {
      "epoch": 4.771476077322388,
      "grad_norm": 0.5188356041908264,
      "learning_rate": 4.600594347765925e-07,
      "loss": 0.0074,
      "step": 2915620
    },
    {
      "epoch": 4.771508807761042,
      "grad_norm": 0.14119848608970642,
      "learning_rate": 4.5999354256307533e-07,
      "loss": 0.0114,
      "step": 2915640
    },
    {
      "epoch": 4.771541538199695,
      "grad_norm": 0.2062174528837204,
      "learning_rate": 4.5992765034955823e-07,
      "loss": 0.0045,
      "step": 2915660
    },
    {
      "epoch": 4.771574268638348,
      "grad_norm": 0.16668948531150818,
      "learning_rate": 4.598617581360411e-07,
      "loss": 0.0063,
      "step": 2915680
    },
    {
      "epoch": 4.771606999077002,
      "grad_norm": 0.09647133946418762,
      "learning_rate": 4.59795865922524e-07,
      "loss": 0.0082,
      "step": 2915700
    },
    {
      "epoch": 4.771639729515655,
      "grad_norm": 0.3507884442806244,
      "learning_rate": 4.597299737090068e-07,
      "loss": 0.0079,
      "step": 2915720
    },
    {
      "epoch": 4.771672459954308,
      "grad_norm": 0.16936029493808746,
      "learning_rate": 4.596640814954897e-07,
      "loss": 0.0057,
      "step": 2915740
    },
    {
      "epoch": 4.771705190392962,
      "grad_norm": 0.0736633837223053,
      "learning_rate": 4.5959818928197256e-07,
      "loss": 0.009,
      "step": 2915760
    },
    {
      "epoch": 4.771737920831615,
      "grad_norm": 0.3709423243999481,
      "learning_rate": 4.595322970684554e-07,
      "loss": 0.0086,
      "step": 2915780
    },
    {
      "epoch": 4.771770651270268,
      "grad_norm": 0.49624934792518616,
      "learning_rate": 4.594664048549383e-07,
      "loss": 0.0083,
      "step": 2915800
    },
    {
      "epoch": 4.771803381708922,
      "grad_norm": 0.20740601420402527,
      "learning_rate": 4.5940051264142115e-07,
      "loss": 0.0078,
      "step": 2915820
    },
    {
      "epoch": 4.771836112147575,
      "grad_norm": 0.19839335978031158,
      "learning_rate": 4.593346204279041e-07,
      "loss": 0.0079,
      "step": 2915840
    },
    {
      "epoch": 4.771868842586228,
      "grad_norm": 0.13303621113300323,
      "learning_rate": 4.592687282143869e-07,
      "loss": 0.0047,
      "step": 2915860
    },
    {
      "epoch": 4.7719015730248815,
      "grad_norm": 0.041734546422958374,
      "learning_rate": 4.5920283600086985e-07,
      "loss": 0.0126,
      "step": 2915880
    },
    {
      "epoch": 4.771934303463535,
      "grad_norm": 0.1456591933965683,
      "learning_rate": 4.591369437873527e-07,
      "loss": 0.0066,
      "step": 2915900
    },
    {
      "epoch": 4.771967033902189,
      "grad_norm": 0.1509215533733368,
      "learning_rate": 4.590710515738356e-07,
      "loss": 0.0097,
      "step": 2915920
    },
    {
      "epoch": 4.771999764340841,
      "grad_norm": 0.15479570627212524,
      "learning_rate": 4.5900515936031844e-07,
      "loss": 0.0107,
      "step": 2915940
    },
    {
      "epoch": 4.772032494779495,
      "grad_norm": 0.33256256580352783,
      "learning_rate": 4.5893926714680134e-07,
      "loss": 0.0084,
      "step": 2915960
    },
    {
      "epoch": 4.772065225218149,
      "grad_norm": 0.2727104425430298,
      "learning_rate": 4.588733749332842e-07,
      "loss": 0.0105,
      "step": 2915980
    },
    {
      "epoch": 4.772097955656802,
      "grad_norm": 0.28067678213119507,
      "learning_rate": 4.5880748271976703e-07,
      "loss": 0.0088,
      "step": 2916000
    },
    {
      "epoch": 4.772130686095455,
      "grad_norm": 0.3804613947868347,
      "learning_rate": 4.5874159050624993e-07,
      "loss": 0.0079,
      "step": 2916020
    },
    {
      "epoch": 4.7721634165341085,
      "grad_norm": 0.14760571718215942,
      "learning_rate": 4.586756982927328e-07,
      "loss": 0.0076,
      "step": 2916040
    },
    {
      "epoch": 4.772196146972762,
      "grad_norm": 0.26725533604621887,
      "learning_rate": 4.5860980607921567e-07,
      "loss": 0.0117,
      "step": 2916060
    },
    {
      "epoch": 4.772228877411415,
      "grad_norm": 0.1176057755947113,
      "learning_rate": 4.585439138656985e-07,
      "loss": 0.0118,
      "step": 2916080
    },
    {
      "epoch": 4.772261607850068,
      "grad_norm": 0.49482420086860657,
      "learning_rate": 4.584780216521814e-07,
      "loss": 0.0125,
      "step": 2916100
    },
    {
      "epoch": 4.772294338288722,
      "grad_norm": 0.1625133752822876,
      "learning_rate": 4.5841212943866426e-07,
      "loss": 0.0069,
      "step": 2916120
    },
    {
      "epoch": 4.772327068727375,
      "grad_norm": 0.11957839131355286,
      "learning_rate": 4.5834623722514716e-07,
      "loss": 0.0094,
      "step": 2916140
    },
    {
      "epoch": 4.772359799166028,
      "grad_norm": 0.28984832763671875,
      "learning_rate": 4.5828034501163e-07,
      "loss": 0.0075,
      "step": 2916160
    },
    {
      "epoch": 4.772392529604682,
      "grad_norm": 0.19064311683177948,
      "learning_rate": 4.582144527981129e-07,
      "loss": 0.0084,
      "step": 2916180
    },
    {
      "epoch": 4.7724252600433354,
      "grad_norm": 0.18574167788028717,
      "learning_rate": 4.5814856058459575e-07,
      "loss": 0.0111,
      "step": 2916200
    },
    {
      "epoch": 4.772457990481988,
      "grad_norm": 0.2950156033039093,
      "learning_rate": 4.580826683710786e-07,
      "loss": 0.0131,
      "step": 2916220
    },
    {
      "epoch": 4.772490720920642,
      "grad_norm": 0.1474531888961792,
      "learning_rate": 4.580167761575615e-07,
      "loss": 0.0124,
      "step": 2916240
    },
    {
      "epoch": 4.772523451359295,
      "grad_norm": 0.10378841310739517,
      "learning_rate": 4.5795088394404434e-07,
      "loss": 0.0103,
      "step": 2916260
    },
    {
      "epoch": 4.772556181797949,
      "grad_norm": 0.4764099717140198,
      "learning_rate": 4.5788499173052724e-07,
      "loss": 0.0081,
      "step": 2916280
    },
    {
      "epoch": 4.772588912236602,
      "grad_norm": 0.10204626619815826,
      "learning_rate": 4.578190995170101e-07,
      "loss": 0.0083,
      "step": 2916300
    },
    {
      "epoch": 4.772621642675255,
      "grad_norm": 0.18285419046878815,
      "learning_rate": 4.57753207303493e-07,
      "loss": 0.0066,
      "step": 2916320
    },
    {
      "epoch": 4.772654373113909,
      "grad_norm": 0.15091344714164734,
      "learning_rate": 4.5768731508997583e-07,
      "loss": 0.0106,
      "step": 2916340
    },
    {
      "epoch": 4.7726871035525615,
      "grad_norm": 0.37228772044181824,
      "learning_rate": 4.5762142287645873e-07,
      "loss": 0.0105,
      "step": 2916360
    },
    {
      "epoch": 4.772719833991215,
      "grad_norm": 0.14800702035427094,
      "learning_rate": 4.5755553066294157e-07,
      "loss": 0.0121,
      "step": 2916380
    },
    {
      "epoch": 4.772752564429869,
      "grad_norm": 0.11165277659893036,
      "learning_rate": 4.5748963844942447e-07,
      "loss": 0.0047,
      "step": 2916400
    },
    {
      "epoch": 4.772785294868521,
      "grad_norm": 0.2311457097530365,
      "learning_rate": 4.574237462359073e-07,
      "loss": 0.014,
      "step": 2916420
    },
    {
      "epoch": 4.772818025307175,
      "grad_norm": 0.627482533454895,
      "learning_rate": 4.5735785402239016e-07,
      "loss": 0.0105,
      "step": 2916440
    },
    {
      "epoch": 4.772850755745829,
      "grad_norm": 0.6578072309494019,
      "learning_rate": 4.5729196180887306e-07,
      "loss": 0.0128,
      "step": 2916460
    },
    {
      "epoch": 4.772883486184482,
      "grad_norm": 0.05079278349876404,
      "learning_rate": 4.572260695953559e-07,
      "loss": 0.0084,
      "step": 2916480
    },
    {
      "epoch": 4.772916216623135,
      "grad_norm": 0.18218302726745605,
      "learning_rate": 4.5716017738183886e-07,
      "loss": 0.006,
      "step": 2916500
    },
    {
      "epoch": 4.7729489470617885,
      "grad_norm": 0.5915852189064026,
      "learning_rate": 4.5709428516832165e-07,
      "loss": 0.0088,
      "step": 2916520
    },
    {
      "epoch": 4.772981677500442,
      "grad_norm": 0.31441906094551086,
      "learning_rate": 4.570283929548046e-07,
      "loss": 0.0102,
      "step": 2916540
    },
    {
      "epoch": 4.773014407939095,
      "grad_norm": 0.5069561004638672,
      "learning_rate": 4.5696250074128745e-07,
      "loss": 0.0063,
      "step": 2916560
    },
    {
      "epoch": 4.773047138377748,
      "grad_norm": 0.16669873893260956,
      "learning_rate": 4.5689660852777035e-07,
      "loss": 0.0064,
      "step": 2916580
    },
    {
      "epoch": 4.773079868816402,
      "grad_norm": 0.18991971015930176,
      "learning_rate": 4.568307163142532e-07,
      "loss": 0.0092,
      "step": 2916600
    },
    {
      "epoch": 4.773112599255056,
      "grad_norm": 0.6738064289093018,
      "learning_rate": 4.567648241007361e-07,
      "loss": 0.0141,
      "step": 2916620
    },
    {
      "epoch": 4.773145329693708,
      "grad_norm": 0.6156085133552551,
      "learning_rate": 4.5669893188721894e-07,
      "loss": 0.0105,
      "step": 2916640
    },
    {
      "epoch": 4.773178060132362,
      "grad_norm": 0.08238247036933899,
      "learning_rate": 4.5663303967370184e-07,
      "loss": 0.0162,
      "step": 2916660
    },
    {
      "epoch": 4.7732107905710155,
      "grad_norm": 0.13365551829338074,
      "learning_rate": 4.565671474601847e-07,
      "loss": 0.0099,
      "step": 2916680
    },
    {
      "epoch": 4.773243521009668,
      "grad_norm": 0.3456289768218994,
      "learning_rate": 4.565012552466675e-07,
      "loss": 0.009,
      "step": 2916700
    },
    {
      "epoch": 4.773276251448322,
      "grad_norm": 0.20979854464530945,
      "learning_rate": 4.564353630331504e-07,
      "loss": 0.0094,
      "step": 2916720
    },
    {
      "epoch": 4.773308981886975,
      "grad_norm": 0.367815226316452,
      "learning_rate": 4.5636947081963327e-07,
      "loss": 0.0151,
      "step": 2916740
    },
    {
      "epoch": 4.773341712325628,
      "grad_norm": 0.4340604841709137,
      "learning_rate": 4.5630357860611617e-07,
      "loss": 0.0105,
      "step": 2916760
    },
    {
      "epoch": 4.773374442764282,
      "grad_norm": 0.6043197512626648,
      "learning_rate": 4.56237686392599e-07,
      "loss": 0.0087,
      "step": 2916780
    },
    {
      "epoch": 4.773407173202935,
      "grad_norm": 0.27209702134132385,
      "learning_rate": 4.561717941790819e-07,
      "loss": 0.0093,
      "step": 2916800
    },
    {
      "epoch": 4.773439903641589,
      "grad_norm": 0.09711670130491257,
      "learning_rate": 4.5610590196556476e-07,
      "loss": 0.0074,
      "step": 2916820
    },
    {
      "epoch": 4.773472634080242,
      "grad_norm": 0.16291789710521698,
      "learning_rate": 4.5604000975204766e-07,
      "loss": 0.0059,
      "step": 2916840
    },
    {
      "epoch": 4.773505364518895,
      "grad_norm": 0.11947457492351532,
      "learning_rate": 4.559741175385305e-07,
      "loss": 0.0072,
      "step": 2916860
    },
    {
      "epoch": 4.773538094957549,
      "grad_norm": 0.1263853758573532,
      "learning_rate": 4.559082253250134e-07,
      "loss": 0.01,
      "step": 2916880
    },
    {
      "epoch": 4.773570825396202,
      "grad_norm": 0.5037650465965271,
      "learning_rate": 4.5584233311149625e-07,
      "loss": 0.0131,
      "step": 2916900
    },
    {
      "epoch": 4.773603555834855,
      "grad_norm": 0.41997572779655457,
      "learning_rate": 4.557764408979791e-07,
      "loss": 0.0084,
      "step": 2916920
    },
    {
      "epoch": 4.773636286273509,
      "grad_norm": 0.18155306577682495,
      "learning_rate": 4.55710548684462e-07,
      "loss": 0.0087,
      "step": 2916940
    },
    {
      "epoch": 4.773669016712162,
      "grad_norm": 0.07753609865903854,
      "learning_rate": 4.5564465647094484e-07,
      "loss": 0.0094,
      "step": 2916960
    },
    {
      "epoch": 4.773701747150815,
      "grad_norm": 0.2993801236152649,
      "learning_rate": 4.5557876425742774e-07,
      "loss": 0.0058,
      "step": 2916980
    },
    {
      "epoch": 4.773734477589469,
      "grad_norm": 0.3308747410774231,
      "learning_rate": 4.555128720439106e-07,
      "loss": 0.0063,
      "step": 2917000
    },
    {
      "epoch": 4.773767208028122,
      "grad_norm": 0.1583910882472992,
      "learning_rate": 4.554469798303935e-07,
      "loss": 0.0101,
      "step": 2917020
    },
    {
      "epoch": 4.773799938466775,
      "grad_norm": 0.14312294125556946,
      "learning_rate": 4.553810876168763e-07,
      "loss": 0.0084,
      "step": 2917040
    },
    {
      "epoch": 4.7738326689054285,
      "grad_norm": 0.2515525221824646,
      "learning_rate": 4.553151954033592e-07,
      "loss": 0.0086,
      "step": 2917060
    },
    {
      "epoch": 4.773865399344082,
      "grad_norm": 0.09989209473133087,
      "learning_rate": 4.5524930318984207e-07,
      "loss": 0.0089,
      "step": 2917080
    },
    {
      "epoch": 4.773898129782736,
      "grad_norm": 0.3433532416820526,
      "learning_rate": 4.55183410976325e-07,
      "loss": 0.011,
      "step": 2917100
    },
    {
      "epoch": 4.773930860221388,
      "grad_norm": 0.07649543136358261,
      "learning_rate": 4.551175187628078e-07,
      "loss": 0.0087,
      "step": 2917120
    },
    {
      "epoch": 4.773963590660042,
      "grad_norm": 0.11306028068065643,
      "learning_rate": 4.5505162654929066e-07,
      "loss": 0.0073,
      "step": 2917140
    },
    {
      "epoch": 4.7739963210986955,
      "grad_norm": 0.18876533210277557,
      "learning_rate": 4.549857343357736e-07,
      "loss": 0.0119,
      "step": 2917160
    },
    {
      "epoch": 4.774029051537349,
      "grad_norm": 0.3647780120372772,
      "learning_rate": 4.549198421222564e-07,
      "loss": 0.0071,
      "step": 2917180
    },
    {
      "epoch": 4.774061781976002,
      "grad_norm": 0.4752744436264038,
      "learning_rate": 4.5485394990873936e-07,
      "loss": 0.0078,
      "step": 2917200
    },
    {
      "epoch": 4.774094512414655,
      "grad_norm": 0.1796974241733551,
      "learning_rate": 4.547880576952222e-07,
      "loss": 0.012,
      "step": 2917220
    },
    {
      "epoch": 4.774127242853309,
      "grad_norm": 0.22745312750339508,
      "learning_rate": 4.547221654817051e-07,
      "loss": 0.009,
      "step": 2917240
    },
    {
      "epoch": 4.774159973291962,
      "grad_norm": 0.3799748718738556,
      "learning_rate": 4.5465627326818794e-07,
      "loss": 0.0066,
      "step": 2917260
    },
    {
      "epoch": 4.774192703730615,
      "grad_norm": 0.11563164740800858,
      "learning_rate": 4.5459038105467084e-07,
      "loss": 0.0074,
      "step": 2917280
    },
    {
      "epoch": 4.774225434169269,
      "grad_norm": 0.3267687261104584,
      "learning_rate": 4.545244888411537e-07,
      "loss": 0.0117,
      "step": 2917300
    },
    {
      "epoch": 4.774258164607922,
      "grad_norm": 0.44004327058792114,
      "learning_rate": 4.544585966276366e-07,
      "loss": 0.0125,
      "step": 2917320
    },
    {
      "epoch": 4.774290895046575,
      "grad_norm": 0.0960729643702507,
      "learning_rate": 4.5439270441411943e-07,
      "loss": 0.0053,
      "step": 2917340
    },
    {
      "epoch": 4.774323625485229,
      "grad_norm": 0.10310707241296768,
      "learning_rate": 4.543268122006023e-07,
      "loss": 0.0059,
      "step": 2917360
    },
    {
      "epoch": 4.774356355923882,
      "grad_norm": 0.345321387052536,
      "learning_rate": 4.542609199870852e-07,
      "loss": 0.01,
      "step": 2917380
    },
    {
      "epoch": 4.774389086362535,
      "grad_norm": 0.09066513180732727,
      "learning_rate": 4.54195027773568e-07,
      "loss": 0.0193,
      "step": 2917400
    },
    {
      "epoch": 4.774421816801189,
      "grad_norm": 0.08741975575685501,
      "learning_rate": 4.541291355600509e-07,
      "loss": 0.0077,
      "step": 2917420
    },
    {
      "epoch": 4.774454547239842,
      "grad_norm": 0.24207186698913574,
      "learning_rate": 4.5406324334653377e-07,
      "loss": 0.013,
      "step": 2917440
    },
    {
      "epoch": 4.774487277678496,
      "grad_norm": 0.07693002372980118,
      "learning_rate": 4.5399735113301667e-07,
      "loss": 0.0066,
      "step": 2917460
    },
    {
      "epoch": 4.774520008117149,
      "grad_norm": 0.04715383052825928,
      "learning_rate": 4.539314589194995e-07,
      "loss": 0.0068,
      "step": 2917480
    },
    {
      "epoch": 4.774552738555802,
      "grad_norm": 0.19220201671123505,
      "learning_rate": 4.538655667059824e-07,
      "loss": 0.0081,
      "step": 2917500
    },
    {
      "epoch": 4.774585468994456,
      "grad_norm": 0.3883844316005707,
      "learning_rate": 4.5379967449246526e-07,
      "loss": 0.0101,
      "step": 2917520
    },
    {
      "epoch": 4.7746181994331085,
      "grad_norm": 0.5081191658973694,
      "learning_rate": 4.5373378227894815e-07,
      "loss": 0.0076,
      "step": 2917540
    },
    {
      "epoch": 4.774650929871762,
      "grad_norm": 0.17198292911052704,
      "learning_rate": 4.53667890065431e-07,
      "loss": 0.0076,
      "step": 2917560
    },
    {
      "epoch": 4.774683660310416,
      "grad_norm": 0.1998380571603775,
      "learning_rate": 4.5360199785191385e-07,
      "loss": 0.0054,
      "step": 2917580
    },
    {
      "epoch": 4.774716390749068,
      "grad_norm": 0.23090209066867828,
      "learning_rate": 4.5353610563839674e-07,
      "loss": 0.0109,
      "step": 2917600
    },
    {
      "epoch": 4.774749121187722,
      "grad_norm": 0.09495361149311066,
      "learning_rate": 4.534702134248796e-07,
      "loss": 0.0078,
      "step": 2917620
    },
    {
      "epoch": 4.774781851626376,
      "grad_norm": 0.25334569811820984,
      "learning_rate": 4.534043212113625e-07,
      "loss": 0.0078,
      "step": 2917640
    },
    {
      "epoch": 4.774814582065029,
      "grad_norm": 0.1378639191389084,
      "learning_rate": 4.5333842899784533e-07,
      "loss": 0.0093,
      "step": 2917660
    },
    {
      "epoch": 4.774847312503682,
      "grad_norm": 0.052801463752985,
      "learning_rate": 4.5327253678432823e-07,
      "loss": 0.0069,
      "step": 2917680
    },
    {
      "epoch": 4.7748800429423355,
      "grad_norm": 0.1104934811592102,
      "learning_rate": 4.532066445708111e-07,
      "loss": 0.0117,
      "step": 2917700
    },
    {
      "epoch": 4.774912773380989,
      "grad_norm": 0.10678375512361526,
      "learning_rate": 4.5314075235729403e-07,
      "loss": 0.0039,
      "step": 2917720
    },
    {
      "epoch": 4.774945503819643,
      "grad_norm": 0.22188489139080048,
      "learning_rate": 4.530748601437768e-07,
      "loss": 0.0071,
      "step": 2917740
    },
    {
      "epoch": 4.774978234258295,
      "grad_norm": 0.03331325203180313,
      "learning_rate": 4.530089679302598e-07,
      "loss": 0.0104,
      "step": 2917760
    },
    {
      "epoch": 4.775010964696949,
      "grad_norm": 0.25028008222579956,
      "learning_rate": 4.5294307571674257e-07,
      "loss": 0.0111,
      "step": 2917780
    },
    {
      "epoch": 4.775043695135603,
      "grad_norm": 0.1452641487121582,
      "learning_rate": 4.528771835032254e-07,
      "loss": 0.0093,
      "step": 2917800
    },
    {
      "epoch": 4.775076425574255,
      "grad_norm": 0.05793112516403198,
      "learning_rate": 4.5281129128970836e-07,
      "loss": 0.0124,
      "step": 2917820
    },
    {
      "epoch": 4.775109156012909,
      "grad_norm": 0.165476992726326,
      "learning_rate": 4.5274539907619116e-07,
      "loss": 0.0141,
      "step": 2917840
    },
    {
      "epoch": 4.7751418864515625,
      "grad_norm": 0.11453550308942795,
      "learning_rate": 4.526795068626741e-07,
      "loss": 0.0112,
      "step": 2917860
    },
    {
      "epoch": 4.775174616890215,
      "grad_norm": 1.0059232711791992,
      "learning_rate": 4.5261361464915695e-07,
      "loss": 0.0112,
      "step": 2917880
    },
    {
      "epoch": 4.775207347328869,
      "grad_norm": 0.5531512498855591,
      "learning_rate": 4.5254772243563985e-07,
      "loss": 0.0086,
      "step": 2917900
    },
    {
      "epoch": 4.775240077767522,
      "grad_norm": 0.06594899296760559,
      "learning_rate": 4.524818302221227e-07,
      "loss": 0.0077,
      "step": 2917920
    },
    {
      "epoch": 4.775272808206176,
      "grad_norm": 0.09444310516119003,
      "learning_rate": 4.524159380086056e-07,
      "loss": 0.0074,
      "step": 2917940
    },
    {
      "epoch": 4.775305538644829,
      "grad_norm": 0.1698816567659378,
      "learning_rate": 4.5235004579508844e-07,
      "loss": 0.0118,
      "step": 2917960
    },
    {
      "epoch": 4.775338269083482,
      "grad_norm": 0.13729768991470337,
      "learning_rate": 4.5228415358157134e-07,
      "loss": 0.0052,
      "step": 2917980
    },
    {
      "epoch": 4.775370999522136,
      "grad_norm": 0.16091658174991608,
      "learning_rate": 4.522182613680542e-07,
      "loss": 0.0079,
      "step": 2918000
    },
    {
      "epoch": 4.775403729960789,
      "grad_norm": 0.22040370106697083,
      "learning_rate": 4.5215236915453703e-07,
      "loss": 0.0082,
      "step": 2918020
    },
    {
      "epoch": 4.775436460399442,
      "grad_norm": 0.23860368132591248,
      "learning_rate": 4.5208647694101993e-07,
      "loss": 0.0085,
      "step": 2918040
    },
    {
      "epoch": 4.775469190838096,
      "grad_norm": 0.4142180383205414,
      "learning_rate": 4.520205847275028e-07,
      "loss": 0.0095,
      "step": 2918060
    },
    {
      "epoch": 4.775501921276749,
      "grad_norm": 0.21035589277744293,
      "learning_rate": 4.519546925139857e-07,
      "loss": 0.0105,
      "step": 2918080
    },
    {
      "epoch": 4.775534651715402,
      "grad_norm": 0.22722665965557098,
      "learning_rate": 4.518888003004685e-07,
      "loss": 0.0065,
      "step": 2918100
    },
    {
      "epoch": 4.775567382154056,
      "grad_norm": 0.09842482209205627,
      "learning_rate": 4.518229080869514e-07,
      "loss": 0.0068,
      "step": 2918120
    },
    {
      "epoch": 4.775600112592709,
      "grad_norm": 0.3066284954547882,
      "learning_rate": 4.5175701587343426e-07,
      "loss": 0.0099,
      "step": 2918140
    },
    {
      "epoch": 4.775632843031362,
      "grad_norm": 0.08398663252592087,
      "learning_rate": 4.5169112365991716e-07,
      "loss": 0.0087,
      "step": 2918160
    },
    {
      "epoch": 4.7756655734700155,
      "grad_norm": 0.14172060787677765,
      "learning_rate": 4.516252314464e-07,
      "loss": 0.0076,
      "step": 2918180
    },
    {
      "epoch": 4.775698303908669,
      "grad_norm": 0.16395790874958038,
      "learning_rate": 4.515593392328829e-07,
      "loss": 0.0114,
      "step": 2918200
    },
    {
      "epoch": 4.775731034347322,
      "grad_norm": 0.1926024705171585,
      "learning_rate": 4.5149344701936575e-07,
      "loss": 0.0104,
      "step": 2918220
    },
    {
      "epoch": 4.775763764785975,
      "grad_norm": 0.32314735651016235,
      "learning_rate": 4.514275548058486e-07,
      "loss": 0.0078,
      "step": 2918240
    },
    {
      "epoch": 4.775796495224629,
      "grad_norm": 0.12956254184246063,
      "learning_rate": 4.513616625923315e-07,
      "loss": 0.0074,
      "step": 2918260
    },
    {
      "epoch": 4.775829225663283,
      "grad_norm": 0.10444148629903793,
      "learning_rate": 4.5129577037881434e-07,
      "loss": 0.0067,
      "step": 2918280
    },
    {
      "epoch": 4.775861956101935,
      "grad_norm": 0.18499130010604858,
      "learning_rate": 4.5122987816529724e-07,
      "loss": 0.0081,
      "step": 2918300
    },
    {
      "epoch": 4.775894686540589,
      "grad_norm": 0.5495142936706543,
      "learning_rate": 4.511639859517801e-07,
      "loss": 0.0079,
      "step": 2918320
    },
    {
      "epoch": 4.7759274169792425,
      "grad_norm": 0.24793168902397156,
      "learning_rate": 4.51098093738263e-07,
      "loss": 0.0064,
      "step": 2918340
    },
    {
      "epoch": 4.775960147417896,
      "grad_norm": 0.1500595659017563,
      "learning_rate": 4.5103220152474583e-07,
      "loss": 0.0079,
      "step": 2918360
    },
    {
      "epoch": 4.775992877856549,
      "grad_norm": 0.11327902227640152,
      "learning_rate": 4.509663093112288e-07,
      "loss": 0.0107,
      "step": 2918380
    },
    {
      "epoch": 4.776025608295202,
      "grad_norm": 0.09761520475149155,
      "learning_rate": 4.509004170977116e-07,
      "loss": 0.0056,
      "step": 2918400
    },
    {
      "epoch": 4.776058338733856,
      "grad_norm": 0.42005011439323425,
      "learning_rate": 4.508345248841945e-07,
      "loss": 0.0128,
      "step": 2918420
    },
    {
      "epoch": 4.776091069172509,
      "grad_norm": 0.08284623920917511,
      "learning_rate": 4.507686326706773e-07,
      "loss": 0.0052,
      "step": 2918440
    },
    {
      "epoch": 4.776123799611162,
      "grad_norm": 0.5485924482345581,
      "learning_rate": 4.5070274045716016e-07,
      "loss": 0.0158,
      "step": 2918460
    },
    {
      "epoch": 4.776156530049816,
      "grad_norm": 0.20665447413921356,
      "learning_rate": 4.506368482436431e-07,
      "loss": 0.0098,
      "step": 2918480
    },
    {
      "epoch": 4.776189260488469,
      "grad_norm": 0.1883290559053421,
      "learning_rate": 4.505709560301259e-07,
      "loss": 0.0083,
      "step": 2918500
    },
    {
      "epoch": 4.776221990927122,
      "grad_norm": 0.1346105933189392,
      "learning_rate": 4.5050506381660886e-07,
      "loss": 0.0089,
      "step": 2918520
    },
    {
      "epoch": 4.776254721365776,
      "grad_norm": 0.45810118317604065,
      "learning_rate": 4.504391716030917e-07,
      "loss": 0.0149,
      "step": 2918540
    },
    {
      "epoch": 4.776287451804429,
      "grad_norm": 0.19078074395656586,
      "learning_rate": 4.503732793895746e-07,
      "loss": 0.0092,
      "step": 2918560
    },
    {
      "epoch": 4.776320182243082,
      "grad_norm": 0.20286597311496735,
      "learning_rate": 4.5030738717605745e-07,
      "loss": 0.0086,
      "step": 2918580
    },
    {
      "epoch": 4.776352912681736,
      "grad_norm": 0.21841523051261902,
      "learning_rate": 4.5024149496254035e-07,
      "loss": 0.0108,
      "step": 2918600
    },
    {
      "epoch": 4.776385643120389,
      "grad_norm": 0.2731076180934906,
      "learning_rate": 4.501756027490232e-07,
      "loss": 0.0096,
      "step": 2918620
    },
    {
      "epoch": 4.776418373559043,
      "grad_norm": 0.19822785258293152,
      "learning_rate": 4.501097105355061e-07,
      "loss": 0.007,
      "step": 2918640
    },
    {
      "epoch": 4.776451103997696,
      "grad_norm": 0.5176676511764526,
      "learning_rate": 4.5004381832198894e-07,
      "loss": 0.0109,
      "step": 2918660
    },
    {
      "epoch": 4.776483834436349,
      "grad_norm": 0.14634136855602264,
      "learning_rate": 4.499779261084718e-07,
      "loss": 0.0081,
      "step": 2918680
    },
    {
      "epoch": 4.776516564875003,
      "grad_norm": 0.3949355483055115,
      "learning_rate": 4.499120338949547e-07,
      "loss": 0.0081,
      "step": 2918700
    },
    {
      "epoch": 4.7765492953136555,
      "grad_norm": 0.42753922939300537,
      "learning_rate": 4.4984614168143753e-07,
      "loss": 0.0087,
      "step": 2918720
    },
    {
      "epoch": 4.776582025752309,
      "grad_norm": 0.41996222734451294,
      "learning_rate": 4.4978024946792043e-07,
      "loss": 0.0101,
      "step": 2918740
    },
    {
      "epoch": 4.776614756190963,
      "grad_norm": 0.326499879360199,
      "learning_rate": 4.4971435725440327e-07,
      "loss": 0.0114,
      "step": 2918760
    },
    {
      "epoch": 4.776647486629615,
      "grad_norm": 0.31079813838005066,
      "learning_rate": 4.4964846504088617e-07,
      "loss": 0.0096,
      "step": 2918780
    },
    {
      "epoch": 4.776680217068269,
      "grad_norm": 0.41831842064857483,
      "learning_rate": 4.49582572827369e-07,
      "loss": 0.0128,
      "step": 2918800
    },
    {
      "epoch": 4.7767129475069225,
      "grad_norm": 0.14220073819160461,
      "learning_rate": 4.495166806138519e-07,
      "loss": 0.0094,
      "step": 2918820
    },
    {
      "epoch": 4.776745677945576,
      "grad_norm": 0.16880644857883453,
      "learning_rate": 4.4945078840033476e-07,
      "loss": 0.0095,
      "step": 2918840
    },
    {
      "epoch": 4.776778408384229,
      "grad_norm": 0.2013767808675766,
      "learning_rate": 4.4938489618681766e-07,
      "loss": 0.0083,
      "step": 2918860
    },
    {
      "epoch": 4.776811138822882,
      "grad_norm": 0.19736722111701965,
      "learning_rate": 4.493190039733005e-07,
      "loss": 0.0111,
      "step": 2918880
    },
    {
      "epoch": 4.776843869261536,
      "grad_norm": 0.07671204209327698,
      "learning_rate": 4.4925311175978335e-07,
      "loss": 0.0091,
      "step": 2918900
    },
    {
      "epoch": 4.77687659970019,
      "grad_norm": 0.14840227365493774,
      "learning_rate": 4.4918721954626625e-07,
      "loss": 0.0088,
      "step": 2918920
    },
    {
      "epoch": 4.776909330138842,
      "grad_norm": 0.1426369696855545,
      "learning_rate": 4.491213273327491e-07,
      "loss": 0.0091,
      "step": 2918940
    },
    {
      "epoch": 4.776942060577496,
      "grad_norm": 0.23869961500167847,
      "learning_rate": 4.49055435119232e-07,
      "loss": 0.0076,
      "step": 2918960
    },
    {
      "epoch": 4.7769747910161495,
      "grad_norm": 0.3384799659252167,
      "learning_rate": 4.4898954290571484e-07,
      "loss": 0.0122,
      "step": 2918980
    },
    {
      "epoch": 4.777007521454802,
      "grad_norm": 0.3967968225479126,
      "learning_rate": 4.4892365069219774e-07,
      "loss": 0.0085,
      "step": 2919000
    },
    {
      "epoch": 4.777040251893456,
      "grad_norm": 0.11669587343931198,
      "learning_rate": 4.488577584786806e-07,
      "loss": 0.0079,
      "step": 2919020
    },
    {
      "epoch": 4.777072982332109,
      "grad_norm": 0.41248100996017456,
      "learning_rate": 4.4879186626516353e-07,
      "loss": 0.0085,
      "step": 2919040
    },
    {
      "epoch": 4.777105712770762,
      "grad_norm": 0.3696613907814026,
      "learning_rate": 4.4872597405164633e-07,
      "loss": 0.0116,
      "step": 2919060
    },
    {
      "epoch": 4.777138443209416,
      "grad_norm": 0.12686820328235626,
      "learning_rate": 4.486600818381293e-07,
      "loss": 0.0095,
      "step": 2919080
    },
    {
      "epoch": 4.777171173648069,
      "grad_norm": 0.348896861076355,
      "learning_rate": 4.4859418962461207e-07,
      "loss": 0.0078,
      "step": 2919100
    },
    {
      "epoch": 4.777203904086723,
      "grad_norm": 0.24343541264533997,
      "learning_rate": 4.485282974110949e-07,
      "loss": 0.0067,
      "step": 2919120
    },
    {
      "epoch": 4.777236634525376,
      "grad_norm": 0.1280553787946701,
      "learning_rate": 4.4846240519757787e-07,
      "loss": 0.0104,
      "step": 2919140
    },
    {
      "epoch": 4.777269364964029,
      "grad_norm": 0.28877729177474976,
      "learning_rate": 4.4839651298406066e-07,
      "loss": 0.0098,
      "step": 2919160
    },
    {
      "epoch": 4.777302095402683,
      "grad_norm": 0.3746238350868225,
      "learning_rate": 4.483306207705436e-07,
      "loss": 0.0108,
      "step": 2919180
    },
    {
      "epoch": 4.777334825841336,
      "grad_norm": 0.2720566689968109,
      "learning_rate": 4.4826472855702646e-07,
      "loss": 0.01,
      "step": 2919200
    },
    {
      "epoch": 4.777367556279989,
      "grad_norm": 0.14894816279411316,
      "learning_rate": 4.4819883634350936e-07,
      "loss": 0.0059,
      "step": 2919220
    },
    {
      "epoch": 4.777400286718643,
      "grad_norm": 0.12153265625238419,
      "learning_rate": 4.481329441299922e-07,
      "loss": 0.0104,
      "step": 2919240
    },
    {
      "epoch": 4.777433017157296,
      "grad_norm": 0.7429874539375305,
      "learning_rate": 4.480670519164751e-07,
      "loss": 0.0095,
      "step": 2919260
    },
    {
      "epoch": 4.777465747595949,
      "grad_norm": 0.07664480805397034,
      "learning_rate": 4.4800115970295795e-07,
      "loss": 0.0094,
      "step": 2919280
    },
    {
      "epoch": 4.777498478034603,
      "grad_norm": 0.09730717539787292,
      "learning_rate": 4.4793526748944085e-07,
      "loss": 0.0052,
      "step": 2919300
    },
    {
      "epoch": 4.777531208473256,
      "grad_norm": 0.48207247257232666,
      "learning_rate": 4.478693752759237e-07,
      "loss": 0.015,
      "step": 2919320
    },
    {
      "epoch": 4.777563938911909,
      "grad_norm": 0.11847028136253357,
      "learning_rate": 4.4780348306240654e-07,
      "loss": 0.008,
      "step": 2919340
    },
    {
      "epoch": 4.7775966693505625,
      "grad_norm": 0.18378300964832306,
      "learning_rate": 4.4773759084888944e-07,
      "loss": 0.0041,
      "step": 2919360
    },
    {
      "epoch": 4.777629399789216,
      "grad_norm": 0.09492730349302292,
      "learning_rate": 4.476716986353723e-07,
      "loss": 0.0069,
      "step": 2919380
    },
    {
      "epoch": 4.77766213022787,
      "grad_norm": 0.10589432716369629,
      "learning_rate": 4.476058064218552e-07,
      "loss": 0.0055,
      "step": 2919400
    },
    {
      "epoch": 4.777694860666522,
      "grad_norm": 0.34551700949668884,
      "learning_rate": 4.47539914208338e-07,
      "loss": 0.0074,
      "step": 2919420
    },
    {
      "epoch": 4.777727591105176,
      "grad_norm": 0.23635654151439667,
      "learning_rate": 4.474740219948209e-07,
      "loss": 0.0111,
      "step": 2919440
    },
    {
      "epoch": 4.77776032154383,
      "grad_norm": 0.3380567133426666,
      "learning_rate": 4.4740812978130377e-07,
      "loss": 0.0104,
      "step": 2919460
    },
    {
      "epoch": 4.777793051982483,
      "grad_norm": 0.182887002825737,
      "learning_rate": 4.4734223756778667e-07,
      "loss": 0.0096,
      "step": 2919480
    },
    {
      "epoch": 4.777825782421136,
      "grad_norm": 0.6082505583763123,
      "learning_rate": 4.472763453542695e-07,
      "loss": 0.0092,
      "step": 2919500
    },
    {
      "epoch": 4.7778585128597895,
      "grad_norm": 0.08958855271339417,
      "learning_rate": 4.472104531407524e-07,
      "loss": 0.0051,
      "step": 2919520
    },
    {
      "epoch": 4.777891243298443,
      "grad_norm": 0.1010604202747345,
      "learning_rate": 4.4714456092723526e-07,
      "loss": 0.0121,
      "step": 2919540
    },
    {
      "epoch": 4.777923973737096,
      "grad_norm": 0.2115166038274765,
      "learning_rate": 4.470786687137181e-07,
      "loss": 0.0079,
      "step": 2919560
    },
    {
      "epoch": 4.777956704175749,
      "grad_norm": 0.05674510449171066,
      "learning_rate": 4.47012776500201e-07,
      "loss": 0.0123,
      "step": 2919580
    },
    {
      "epoch": 4.777989434614403,
      "grad_norm": 0.060610126703977585,
      "learning_rate": 4.4694688428668385e-07,
      "loss": 0.0064,
      "step": 2919600
    },
    {
      "epoch": 4.778022165053056,
      "grad_norm": 0.31121113896369934,
      "learning_rate": 4.4688099207316675e-07,
      "loss": 0.0065,
      "step": 2919620
    },
    {
      "epoch": 4.778054895491709,
      "grad_norm": 0.20498166978359222,
      "learning_rate": 4.468150998596496e-07,
      "loss": 0.0069,
      "step": 2919640
    },
    {
      "epoch": 4.778087625930363,
      "grad_norm": 0.15128733217716217,
      "learning_rate": 4.467492076461325e-07,
      "loss": 0.0079,
      "step": 2919660
    },
    {
      "epoch": 4.778120356369016,
      "grad_norm": 0.3452553451061249,
      "learning_rate": 4.4668331543261534e-07,
      "loss": 0.0084,
      "step": 2919680
    },
    {
      "epoch": 4.778153086807669,
      "grad_norm": 0.2819562554359436,
      "learning_rate": 4.466174232190983e-07,
      "loss": 0.0079,
      "step": 2919700
    },
    {
      "epoch": 4.778185817246323,
      "grad_norm": 0.257962167263031,
      "learning_rate": 4.465515310055811e-07,
      "loss": 0.0103,
      "step": 2919720
    },
    {
      "epoch": 4.778218547684976,
      "grad_norm": 0.13268224895000458,
      "learning_rate": 4.4648563879206403e-07,
      "loss": 0.0098,
      "step": 2919740
    },
    {
      "epoch": 4.778251278123629,
      "grad_norm": 0.8762550354003906,
      "learning_rate": 4.464197465785468e-07,
      "loss": 0.0072,
      "step": 2919760
    },
    {
      "epoch": 4.778284008562283,
      "grad_norm": 0.08721053600311279,
      "learning_rate": 4.4635385436502967e-07,
      "loss": 0.0095,
      "step": 2919780
    },
    {
      "epoch": 4.778316739000936,
      "grad_norm": 0.21074321866035461,
      "learning_rate": 4.462879621515126e-07,
      "loss": 0.0069,
      "step": 2919800
    },
    {
      "epoch": 4.77834946943959,
      "grad_norm": 0.6413492560386658,
      "learning_rate": 4.462220699379954e-07,
      "loss": 0.008,
      "step": 2919820
    },
    {
      "epoch": 4.7783821998782425,
      "grad_norm": 0.30167731642723083,
      "learning_rate": 4.4615617772447837e-07,
      "loss": 0.0075,
      "step": 2919840
    },
    {
      "epoch": 4.778414930316896,
      "grad_norm": 0.19424767792224884,
      "learning_rate": 4.460902855109612e-07,
      "loss": 0.0102,
      "step": 2919860
    },
    {
      "epoch": 4.77844766075555,
      "grad_norm": 0.05824859067797661,
      "learning_rate": 4.460243932974441e-07,
      "loss": 0.008,
      "step": 2919880
    },
    {
      "epoch": 4.778480391194202,
      "grad_norm": 0.09671935439109802,
      "learning_rate": 4.4595850108392696e-07,
      "loss": 0.0066,
      "step": 2919900
    },
    {
      "epoch": 4.778513121632856,
      "grad_norm": 0.3813316822052002,
      "learning_rate": 4.4589260887040985e-07,
      "loss": 0.0084,
      "step": 2919920
    },
    {
      "epoch": 4.77854585207151,
      "grad_norm": 0.12553824484348297,
      "learning_rate": 4.458267166568927e-07,
      "loss": 0.0097,
      "step": 2919940
    },
    {
      "epoch": 4.778578582510162,
      "grad_norm": 0.08463028818368912,
      "learning_rate": 4.457608244433756e-07,
      "loss": 0.0114,
      "step": 2919960
    },
    {
      "epoch": 4.778611312948816,
      "grad_norm": 0.2735491394996643,
      "learning_rate": 4.4569493222985844e-07,
      "loss": 0.0088,
      "step": 2919980
    },
    {
      "epoch": 4.7786440433874695,
      "grad_norm": 0.1414402425289154,
      "learning_rate": 4.456290400163413e-07,
      "loss": 0.007,
      "step": 2920000
    },
    {
      "epoch": 4.778676773826123,
      "grad_norm": 0.17505985498428345,
      "learning_rate": 4.455631478028242e-07,
      "loss": 0.0086,
      "step": 2920020
    },
    {
      "epoch": 4.778709504264776,
      "grad_norm": 0.11675633490085602,
      "learning_rate": 4.4549725558930703e-07,
      "loss": 0.0068,
      "step": 2920040
    },
    {
      "epoch": 4.778742234703429,
      "grad_norm": 0.10179305076599121,
      "learning_rate": 4.4543136337578993e-07,
      "loss": 0.0067,
      "step": 2920060
    },
    {
      "epoch": 4.778774965142083,
      "grad_norm": 0.3804328739643097,
      "learning_rate": 4.453654711622728e-07,
      "loss": 0.0096,
      "step": 2920080
    },
    {
      "epoch": 4.778807695580737,
      "grad_norm": 0.09667100012302399,
      "learning_rate": 4.452995789487557e-07,
      "loss": 0.0088,
      "step": 2920100
    },
    {
      "epoch": 4.778840426019389,
      "grad_norm": 0.42053958773612976,
      "learning_rate": 4.452336867352385e-07,
      "loss": 0.0072,
      "step": 2920120
    },
    {
      "epoch": 4.778873156458043,
      "grad_norm": 0.22935853898525238,
      "learning_rate": 4.451677945217214e-07,
      "loss": 0.0081,
      "step": 2920140
    },
    {
      "epoch": 4.7789058868966965,
      "grad_norm": 0.14894358813762665,
      "learning_rate": 4.4510190230820427e-07,
      "loss": 0.0122,
      "step": 2920160
    },
    {
      "epoch": 4.778938617335349,
      "grad_norm": 0.2105776071548462,
      "learning_rate": 4.4503601009468716e-07,
      "loss": 0.0083,
      "step": 2920180
    },
    {
      "epoch": 4.778971347774003,
      "grad_norm": 0.12359974533319473,
      "learning_rate": 4.4497011788117e-07,
      "loss": 0.0093,
      "step": 2920200
    },
    {
      "epoch": 4.779004078212656,
      "grad_norm": 0.23530885577201843,
      "learning_rate": 4.4490422566765286e-07,
      "loss": 0.0083,
      "step": 2920220
    },
    {
      "epoch": 4.779036808651309,
      "grad_norm": 0.16375337541103363,
      "learning_rate": 4.4483833345413575e-07,
      "loss": 0.0115,
      "step": 2920240
    },
    {
      "epoch": 4.779069539089963,
      "grad_norm": 0.16488827764987946,
      "learning_rate": 4.447724412406186e-07,
      "loss": 0.007,
      "step": 2920260
    },
    {
      "epoch": 4.779102269528616,
      "grad_norm": 0.16772966086864471,
      "learning_rate": 4.447065490271015e-07,
      "loss": 0.0088,
      "step": 2920280
    },
    {
      "epoch": 4.77913499996727,
      "grad_norm": 0.23790283501148224,
      "learning_rate": 4.4464065681358434e-07,
      "loss": 0.0099,
      "step": 2920300
    },
    {
      "epoch": 4.779167730405923,
      "grad_norm": 0.11494739353656769,
      "learning_rate": 4.4457476460006724e-07,
      "loss": 0.0086,
      "step": 2920320
    },
    {
      "epoch": 4.779200460844576,
      "grad_norm": 0.4537331163883209,
      "learning_rate": 4.445088723865501e-07,
      "loss": 0.0081,
      "step": 2920340
    },
    {
      "epoch": 4.77923319128323,
      "grad_norm": 0.26481491327285767,
      "learning_rate": 4.4444298017303304e-07,
      "loss": 0.0075,
      "step": 2920360
    },
    {
      "epoch": 4.779265921721883,
      "grad_norm": 0.22746576368808746,
      "learning_rate": 4.4437708795951583e-07,
      "loss": 0.008,
      "step": 2920380
    },
    {
      "epoch": 4.779298652160536,
      "grad_norm": 0.48729169368743896,
      "learning_rate": 4.443111957459988e-07,
      "loss": 0.0103,
      "step": 2920400
    },
    {
      "epoch": 4.77933138259919,
      "grad_norm": 0.2853600084781647,
      "learning_rate": 4.442453035324816e-07,
      "loss": 0.005,
      "step": 2920420
    },
    {
      "epoch": 4.779364113037843,
      "grad_norm": 0.14278659224510193,
      "learning_rate": 4.441794113189644e-07,
      "loss": 0.005,
      "step": 2920440
    },
    {
      "epoch": 4.779396843476496,
      "grad_norm": 0.3527369499206543,
      "learning_rate": 4.441135191054474e-07,
      "loss": 0.0147,
      "step": 2920460
    },
    {
      "epoch": 4.7794295739151496,
      "grad_norm": 0.21111316978931427,
      "learning_rate": 4.4404762689193017e-07,
      "loss": 0.0057,
      "step": 2920480
    },
    {
      "epoch": 4.779462304353803,
      "grad_norm": 0.05116581544280052,
      "learning_rate": 4.439817346784131e-07,
      "loss": 0.0076,
      "step": 2920500
    },
    {
      "epoch": 4.779495034792456,
      "grad_norm": 0.12218736112117767,
      "learning_rate": 4.4391584246489596e-07,
      "loss": 0.0116,
      "step": 2920520
    },
    {
      "epoch": 4.7795277652311094,
      "grad_norm": 0.4851311445236206,
      "learning_rate": 4.4384995025137886e-07,
      "loss": 0.0059,
      "step": 2920540
    },
    {
      "epoch": 4.779560495669763,
      "grad_norm": 0.2460104376077652,
      "learning_rate": 4.437840580378617e-07,
      "loss": 0.0093,
      "step": 2920560
    },
    {
      "epoch": 4.779593226108417,
      "grad_norm": 0.05599560961127281,
      "learning_rate": 4.437181658243446e-07,
      "loss": 0.0067,
      "step": 2920580
    },
    {
      "epoch": 4.779625956547069,
      "grad_norm": 0.20391714572906494,
      "learning_rate": 4.4365227361082745e-07,
      "loss": 0.0081,
      "step": 2920600
    },
    {
      "epoch": 4.779658686985723,
      "grad_norm": 0.2600761353969574,
      "learning_rate": 4.4358638139731035e-07,
      "loss": 0.0076,
      "step": 2920620
    },
    {
      "epoch": 4.7796914174243765,
      "grad_norm": 0.17798206210136414,
      "learning_rate": 4.435204891837932e-07,
      "loss": 0.0074,
      "step": 2920640
    },
    {
      "epoch": 4.77972414786303,
      "grad_norm": 0.2569849491119385,
      "learning_rate": 4.4345459697027604e-07,
      "loss": 0.009,
      "step": 2920660
    },
    {
      "epoch": 4.779756878301683,
      "grad_norm": 0.28119951486587524,
      "learning_rate": 4.4338870475675894e-07,
      "loss": 0.0093,
      "step": 2920680
    },
    {
      "epoch": 4.779789608740336,
      "grad_norm": 0.08457250893115997,
      "learning_rate": 4.433228125432418e-07,
      "loss": 0.0062,
      "step": 2920700
    },
    {
      "epoch": 4.77982233917899,
      "grad_norm": 0.3132476806640625,
      "learning_rate": 4.432569203297247e-07,
      "loss": 0.0132,
      "step": 2920720
    },
    {
      "epoch": 4.779855069617643,
      "grad_norm": 0.13249853253364563,
      "learning_rate": 4.4319102811620753e-07,
      "loss": 0.0139,
      "step": 2920740
    },
    {
      "epoch": 4.779887800056296,
      "grad_norm": 0.1356513500213623,
      "learning_rate": 4.4312513590269043e-07,
      "loss": 0.0078,
      "step": 2920760
    },
    {
      "epoch": 4.77992053049495,
      "grad_norm": 0.30596837401390076,
      "learning_rate": 4.430592436891733e-07,
      "loss": 0.0081,
      "step": 2920780
    },
    {
      "epoch": 4.779953260933603,
      "grad_norm": 0.25938236713409424,
      "learning_rate": 4.4299335147565617e-07,
      "loss": 0.0102,
      "step": 2920800
    },
    {
      "epoch": 4.779985991372256,
      "grad_norm": 0.11914510279893875,
      "learning_rate": 4.42927459262139e-07,
      "loss": 0.0087,
      "step": 2920820
    },
    {
      "epoch": 4.78001872181091,
      "grad_norm": 0.08917221426963806,
      "learning_rate": 4.428615670486219e-07,
      "loss": 0.0046,
      "step": 2920840
    },
    {
      "epoch": 4.780051452249563,
      "grad_norm": 0.13769696652889252,
      "learning_rate": 4.4279567483510476e-07,
      "loss": 0.0096,
      "step": 2920860
    },
    {
      "epoch": 4.780084182688216,
      "grad_norm": 0.2974490821361542,
      "learning_rate": 4.427297826215876e-07,
      "loss": 0.007,
      "step": 2920880
    },
    {
      "epoch": 4.78011691312687,
      "grad_norm": 0.1740206480026245,
      "learning_rate": 4.426638904080705e-07,
      "loss": 0.0078,
      "step": 2920900
    },
    {
      "epoch": 4.780149643565523,
      "grad_norm": 0.5279476046562195,
      "learning_rate": 4.4259799819455335e-07,
      "loss": 0.011,
      "step": 2920920
    },
    {
      "epoch": 4.780182374004177,
      "grad_norm": 0.32448610663414,
      "learning_rate": 4.4253210598103625e-07,
      "loss": 0.0152,
      "step": 2920940
    },
    {
      "epoch": 4.78021510444283,
      "grad_norm": 0.07577278465032578,
      "learning_rate": 4.424662137675191e-07,
      "loss": 0.0098,
      "step": 2920960
    },
    {
      "epoch": 4.780247834881483,
      "grad_norm": 0.20008531212806702,
      "learning_rate": 4.42400321554002e-07,
      "loss": 0.0072,
      "step": 2920980
    },
    {
      "epoch": 4.780280565320137,
      "grad_norm": 0.2147674858570099,
      "learning_rate": 4.4233442934048484e-07,
      "loss": 0.0126,
      "step": 2921000
    },
    {
      "epoch": 4.7803132957587895,
      "grad_norm": 0.24821442365646362,
      "learning_rate": 4.422685371269678e-07,
      "loss": 0.0074,
      "step": 2921020
    },
    {
      "epoch": 4.780346026197443,
      "grad_norm": 0.2429310530424118,
      "learning_rate": 4.422026449134506e-07,
      "loss": 0.01,
      "step": 2921040
    },
    {
      "epoch": 4.780378756636097,
      "grad_norm": 0.2056184858083725,
      "learning_rate": 4.4213675269993354e-07,
      "loss": 0.0076,
      "step": 2921060
    },
    {
      "epoch": 4.780411487074749,
      "grad_norm": 0.3093951642513275,
      "learning_rate": 4.4207086048641633e-07,
      "loss": 0.0116,
      "step": 2921080
    },
    {
      "epoch": 4.780444217513403,
      "grad_norm": 0.3453107476234436,
      "learning_rate": 4.420049682728992e-07,
      "loss": 0.007,
      "step": 2921100
    },
    {
      "epoch": 4.780476947952057,
      "grad_norm": 0.3393671214580536,
      "learning_rate": 4.4193907605938213e-07,
      "loss": 0.0082,
      "step": 2921120
    },
    {
      "epoch": 4.78050967839071,
      "grad_norm": 0.7107150554656982,
      "learning_rate": 4.418731838458649e-07,
      "loss": 0.0085,
      "step": 2921140
    },
    {
      "epoch": 4.780542408829363,
      "grad_norm": 0.12799668312072754,
      "learning_rate": 4.4180729163234787e-07,
      "loss": 0.0077,
      "step": 2921160
    },
    {
      "epoch": 4.7805751392680165,
      "grad_norm": 0.1240854263305664,
      "learning_rate": 4.417413994188307e-07,
      "loss": 0.0051,
      "step": 2921180
    },
    {
      "epoch": 4.78060786970667,
      "grad_norm": 0.23883770406246185,
      "learning_rate": 4.416755072053136e-07,
      "loss": 0.0084,
      "step": 2921200
    },
    {
      "epoch": 4.780640600145323,
      "grad_norm": 0.23525917530059814,
      "learning_rate": 4.4160961499179646e-07,
      "loss": 0.0064,
      "step": 2921220
    },
    {
      "epoch": 4.780673330583976,
      "grad_norm": 0.15542782843112946,
      "learning_rate": 4.4154372277827936e-07,
      "loss": 0.0085,
      "step": 2921240
    },
    {
      "epoch": 4.78070606102263,
      "grad_norm": 0.21133974194526672,
      "learning_rate": 4.414778305647622e-07,
      "loss": 0.0056,
      "step": 2921260
    },
    {
      "epoch": 4.7807387914612836,
      "grad_norm": 0.05605185031890869,
      "learning_rate": 4.414119383512451e-07,
      "loss": 0.0101,
      "step": 2921280
    },
    {
      "epoch": 4.780771521899936,
      "grad_norm": 0.33152997493743896,
      "learning_rate": 4.4134604613772795e-07,
      "loss": 0.01,
      "step": 2921300
    },
    {
      "epoch": 4.78080425233859,
      "grad_norm": 0.17304342985153198,
      "learning_rate": 4.412801539242108e-07,
      "loss": 0.0055,
      "step": 2921320
    },
    {
      "epoch": 4.7808369827772434,
      "grad_norm": 0.4198736846446991,
      "learning_rate": 4.412142617106937e-07,
      "loss": 0.009,
      "step": 2921340
    },
    {
      "epoch": 4.780869713215896,
      "grad_norm": 0.19455602765083313,
      "learning_rate": 4.4114836949717654e-07,
      "loss": 0.009,
      "step": 2921360
    },
    {
      "epoch": 4.78090244365455,
      "grad_norm": 0.10111545771360397,
      "learning_rate": 4.4108247728365944e-07,
      "loss": 0.0056,
      "step": 2921380
    },
    {
      "epoch": 4.780935174093203,
      "grad_norm": 0.2239096462726593,
      "learning_rate": 4.410165850701423e-07,
      "loss": 0.0093,
      "step": 2921400
    },
    {
      "epoch": 4.780967904531856,
      "grad_norm": 0.23079591989517212,
      "learning_rate": 4.409506928566252e-07,
      "loss": 0.0068,
      "step": 2921420
    },
    {
      "epoch": 4.78100063497051,
      "grad_norm": 0.3894209563732147,
      "learning_rate": 4.4088480064310803e-07,
      "loss": 0.0062,
      "step": 2921440
    },
    {
      "epoch": 4.781033365409163,
      "grad_norm": 0.3630657494068146,
      "learning_rate": 4.408189084295909e-07,
      "loss": 0.0169,
      "step": 2921460
    },
    {
      "epoch": 4.781066095847817,
      "grad_norm": 0.31260746717453003,
      "learning_rate": 4.4075301621607377e-07,
      "loss": 0.0089,
      "step": 2921480
    },
    {
      "epoch": 4.7810988262864695,
      "grad_norm": 0.1703566312789917,
      "learning_rate": 4.4068712400255667e-07,
      "loss": 0.0075,
      "step": 2921500
    },
    {
      "epoch": 4.781131556725123,
      "grad_norm": 0.2690122425556183,
      "learning_rate": 4.406212317890395e-07,
      "loss": 0.0066,
      "step": 2921520
    },
    {
      "epoch": 4.781164287163777,
      "grad_norm": 0.1006413996219635,
      "learning_rate": 4.4055533957552236e-07,
      "loss": 0.0082,
      "step": 2921540
    },
    {
      "epoch": 4.78119701760243,
      "grad_norm": 0.11889079958200455,
      "learning_rate": 4.4048944736200526e-07,
      "loss": 0.0075,
      "step": 2921560
    },
    {
      "epoch": 4.781229748041083,
      "grad_norm": 0.15359361469745636,
      "learning_rate": 4.404235551484881e-07,
      "loss": 0.0066,
      "step": 2921580
    },
    {
      "epoch": 4.781262478479737,
      "grad_norm": 0.17108480632305145,
      "learning_rate": 4.40357662934971e-07,
      "loss": 0.0077,
      "step": 2921600
    },
    {
      "epoch": 4.78129520891839,
      "grad_norm": 0.49161994457244873,
      "learning_rate": 4.4029177072145385e-07,
      "loss": 0.0098,
      "step": 2921620
    },
    {
      "epoch": 4.781327939357043,
      "grad_norm": 0.22353459894657135,
      "learning_rate": 4.4022587850793675e-07,
      "loss": 0.0137,
      "step": 2921640
    },
    {
      "epoch": 4.7813606697956965,
      "grad_norm": 0.043711259961128235,
      "learning_rate": 4.401599862944196e-07,
      "loss": 0.0116,
      "step": 2921660
    },
    {
      "epoch": 4.78139340023435,
      "grad_norm": 0.9196081757545471,
      "learning_rate": 4.4009409408090255e-07,
      "loss": 0.0101,
      "step": 2921680
    },
    {
      "epoch": 4.781426130673003,
      "grad_norm": 0.6388161778450012,
      "learning_rate": 4.4002820186738534e-07,
      "loss": 0.007,
      "step": 2921700
    },
    {
      "epoch": 4.781458861111656,
      "grad_norm": 0.1309867799282074,
      "learning_rate": 4.399623096538683e-07,
      "loss": 0.0109,
      "step": 2921720
    },
    {
      "epoch": 4.78149159155031,
      "grad_norm": 0.2741287350654602,
      "learning_rate": 4.3989641744035114e-07,
      "loss": 0.0066,
      "step": 2921740
    },
    {
      "epoch": 4.781524321988964,
      "grad_norm": 0.16220834851264954,
      "learning_rate": 4.3983052522683393e-07,
      "loss": 0.0086,
      "step": 2921760
    },
    {
      "epoch": 4.781557052427616,
      "grad_norm": 0.691099226474762,
      "learning_rate": 4.397646330133169e-07,
      "loss": 0.0089,
      "step": 2921780
    },
    {
      "epoch": 4.78158978286627,
      "grad_norm": 0.16014903783798218,
      "learning_rate": 4.3969874079979967e-07,
      "loss": 0.0074,
      "step": 2921800
    },
    {
      "epoch": 4.7816225133049235,
      "grad_norm": 0.11609107255935669,
      "learning_rate": 4.396328485862826e-07,
      "loss": 0.0114,
      "step": 2921820
    },
    {
      "epoch": 4.781655243743577,
      "grad_norm": 0.05756695196032524,
      "learning_rate": 4.3956695637276547e-07,
      "loss": 0.0065,
      "step": 2921840
    },
    {
      "epoch": 4.78168797418223,
      "grad_norm": 0.09647053480148315,
      "learning_rate": 4.3950106415924837e-07,
      "loss": 0.0114,
      "step": 2921860
    },
    {
      "epoch": 4.781720704620883,
      "grad_norm": 0.40817904472351074,
      "learning_rate": 4.394351719457312e-07,
      "loss": 0.0074,
      "step": 2921880
    },
    {
      "epoch": 4.781753435059537,
      "grad_norm": 0.06474919617176056,
      "learning_rate": 4.393692797322141e-07,
      "loss": 0.0064,
      "step": 2921900
    },
    {
      "epoch": 4.78178616549819,
      "grad_norm": 0.29442596435546875,
      "learning_rate": 4.3930338751869696e-07,
      "loss": 0.0095,
      "step": 2921920
    },
    {
      "epoch": 4.781818895936843,
      "grad_norm": 0.28134745359420776,
      "learning_rate": 4.3923749530517986e-07,
      "loss": 0.0092,
      "step": 2921940
    },
    {
      "epoch": 4.781851626375497,
      "grad_norm": 0.1374400109052658,
      "learning_rate": 4.391716030916627e-07,
      "loss": 0.0058,
      "step": 2921960
    },
    {
      "epoch": 4.78188435681415,
      "grad_norm": 0.3470079302787781,
      "learning_rate": 4.3910571087814555e-07,
      "loss": 0.0093,
      "step": 2921980
    },
    {
      "epoch": 4.781917087252803,
      "grad_norm": 0.5222901105880737,
      "learning_rate": 4.3903981866462845e-07,
      "loss": 0.0098,
      "step": 2922000
    },
    {
      "epoch": 4.781949817691457,
      "grad_norm": 0.0978434607386589,
      "learning_rate": 4.389739264511113e-07,
      "loss": 0.0087,
      "step": 2922020
    },
    {
      "epoch": 4.78198254813011,
      "grad_norm": 0.1036185696721077,
      "learning_rate": 4.389080342375942e-07,
      "loss": 0.0096,
      "step": 2922040
    },
    {
      "epoch": 4.782015278568763,
      "grad_norm": 0.1606694906949997,
      "learning_rate": 4.3884214202407704e-07,
      "loss": 0.0066,
      "step": 2922060
    },
    {
      "epoch": 4.782048009007417,
      "grad_norm": 0.21805010735988617,
      "learning_rate": 4.3877624981055993e-07,
      "loss": 0.0108,
      "step": 2922080
    },
    {
      "epoch": 4.78208073944607,
      "grad_norm": 0.2333139330148697,
      "learning_rate": 4.387103575970428e-07,
      "loss": 0.0066,
      "step": 2922100
    },
    {
      "epoch": 4.782113469884724,
      "grad_norm": 0.0938548818230629,
      "learning_rate": 4.386444653835257e-07,
      "loss": 0.0083,
      "step": 2922120
    },
    {
      "epoch": 4.782146200323377,
      "grad_norm": 0.30085545778274536,
      "learning_rate": 4.385785731700085e-07,
      "loss": 0.0093,
      "step": 2922140
    },
    {
      "epoch": 4.78217893076203,
      "grad_norm": 0.2811630368232727,
      "learning_rate": 4.385126809564914e-07,
      "loss": 0.0067,
      "step": 2922160
    },
    {
      "epoch": 4.782211661200684,
      "grad_norm": 0.4631563723087311,
      "learning_rate": 4.3844678874297427e-07,
      "loss": 0.0069,
      "step": 2922180
    },
    {
      "epoch": 4.7822443916393365,
      "grad_norm": 0.4585838317871094,
      "learning_rate": 4.383808965294571e-07,
      "loss": 0.0088,
      "step": 2922200
    },
    {
      "epoch": 4.78227712207799,
      "grad_norm": 0.18122601509094238,
      "learning_rate": 4.3831500431594e-07,
      "loss": 0.0094,
      "step": 2922220
    },
    {
      "epoch": 4.782309852516644,
      "grad_norm": 0.19339723885059357,
      "learning_rate": 4.3824911210242286e-07,
      "loss": 0.0068,
      "step": 2922240
    },
    {
      "epoch": 4.782342582955296,
      "grad_norm": 0.43115144968032837,
      "learning_rate": 4.3818321988890576e-07,
      "loss": 0.0087,
      "step": 2922260
    },
    {
      "epoch": 4.78237531339395,
      "grad_norm": 0.12055456638336182,
      "learning_rate": 4.381173276753886e-07,
      "loss": 0.0066,
      "step": 2922280
    },
    {
      "epoch": 4.7824080438326035,
      "grad_norm": 0.16703251004219055,
      "learning_rate": 4.380514354618715e-07,
      "loss": 0.0086,
      "step": 2922300
    },
    {
      "epoch": 4.782440774271257,
      "grad_norm": 0.1673266738653183,
      "learning_rate": 4.3798554324835435e-07,
      "loss": 0.0099,
      "step": 2922320
    },
    {
      "epoch": 4.78247350470991,
      "grad_norm": 0.24857400357723236,
      "learning_rate": 4.379196510348373e-07,
      "loss": 0.0117,
      "step": 2922340
    },
    {
      "epoch": 4.782506235148563,
      "grad_norm": 0.2576824724674225,
      "learning_rate": 4.378537588213201e-07,
      "loss": 0.0061,
      "step": 2922360
    },
    {
      "epoch": 4.782538965587217,
      "grad_norm": 0.43521571159362793,
      "learning_rate": 4.3778786660780304e-07,
      "loss": 0.0107,
      "step": 2922380
    },
    {
      "epoch": 4.782571696025871,
      "grad_norm": 0.20000286400318146,
      "learning_rate": 4.377219743942859e-07,
      "loss": 0.0097,
      "step": 2922400
    },
    {
      "epoch": 4.782604426464523,
      "grad_norm": 0.5769566893577576,
      "learning_rate": 4.376560821807687e-07,
      "loss": 0.0143,
      "step": 2922420
    },
    {
      "epoch": 4.782637156903177,
      "grad_norm": 0.31780198216438293,
      "learning_rate": 4.3759018996725163e-07,
      "loss": 0.0089,
      "step": 2922440
    },
    {
      "epoch": 4.7826698873418305,
      "grad_norm": 0.2219054251909256,
      "learning_rate": 4.375242977537344e-07,
      "loss": 0.0064,
      "step": 2922460
    },
    {
      "epoch": 4.782702617780483,
      "grad_norm": 0.3300013840198517,
      "learning_rate": 4.374584055402174e-07,
      "loss": 0.0092,
      "step": 2922480
    },
    {
      "epoch": 4.782735348219137,
      "grad_norm": 0.6325190663337708,
      "learning_rate": 4.373925133267002e-07,
      "loss": 0.0091,
      "step": 2922500
    },
    {
      "epoch": 4.78276807865779,
      "grad_norm": 0.18825089931488037,
      "learning_rate": 4.373266211131831e-07,
      "loss": 0.0071,
      "step": 2922520
    },
    {
      "epoch": 4.782800809096443,
      "grad_norm": 0.3573507070541382,
      "learning_rate": 4.3726072889966597e-07,
      "loss": 0.0056,
      "step": 2922540
    },
    {
      "epoch": 4.782833539535097,
      "grad_norm": 0.7051209807395935,
      "learning_rate": 4.3719483668614886e-07,
      "loss": 0.0066,
      "step": 2922560
    },
    {
      "epoch": 4.78286626997375,
      "grad_norm": 0.5142536163330078,
      "learning_rate": 4.371289444726317e-07,
      "loss": 0.0077,
      "step": 2922580
    },
    {
      "epoch": 4.782899000412404,
      "grad_norm": 0.29120558500289917,
      "learning_rate": 4.370630522591146e-07,
      "loss": 0.0121,
      "step": 2922600
    },
    {
      "epoch": 4.782931730851057,
      "grad_norm": 0.2988658845424652,
      "learning_rate": 4.3699716004559745e-07,
      "loss": 0.0108,
      "step": 2922620
    },
    {
      "epoch": 4.78296446128971,
      "grad_norm": 0.11554199457168579,
      "learning_rate": 4.369312678320803e-07,
      "loss": 0.0083,
      "step": 2922640
    },
    {
      "epoch": 4.782997191728364,
      "grad_norm": 0.3336023688316345,
      "learning_rate": 4.368653756185632e-07,
      "loss": 0.0142,
      "step": 2922660
    },
    {
      "epoch": 4.7830299221670165,
      "grad_norm": 0.13421232998371124,
      "learning_rate": 4.3679948340504604e-07,
      "loss": 0.0084,
      "step": 2922680
    },
    {
      "epoch": 4.78306265260567,
      "grad_norm": 0.9692349433898926,
      "learning_rate": 4.3673359119152894e-07,
      "loss": 0.0119,
      "step": 2922700
    },
    {
      "epoch": 4.783095383044324,
      "grad_norm": 0.15932995080947876,
      "learning_rate": 4.366676989780118e-07,
      "loss": 0.005,
      "step": 2922720
    },
    {
      "epoch": 4.783128113482977,
      "grad_norm": 0.266853392124176,
      "learning_rate": 4.366018067644947e-07,
      "loss": 0.0101,
      "step": 2922740
    },
    {
      "epoch": 4.78316084392163,
      "grad_norm": 0.40457797050476074,
      "learning_rate": 4.3653591455097753e-07,
      "loss": 0.0095,
      "step": 2922760
    },
    {
      "epoch": 4.783193574360284,
      "grad_norm": 0.12852711975574493,
      "learning_rate": 4.3647002233746043e-07,
      "loss": 0.011,
      "step": 2922780
    },
    {
      "epoch": 4.783226304798937,
      "grad_norm": 0.1966455578804016,
      "learning_rate": 4.364041301239433e-07,
      "loss": 0.0076,
      "step": 2922800
    },
    {
      "epoch": 4.78325903523759,
      "grad_norm": 0.32201963663101196,
      "learning_rate": 4.363382379104262e-07,
      "loss": 0.0092,
      "step": 2922820
    },
    {
      "epoch": 4.7832917656762435,
      "grad_norm": 0.21463406085968018,
      "learning_rate": 4.36272345696909e-07,
      "loss": 0.0057,
      "step": 2922840
    },
    {
      "epoch": 4.783324496114897,
      "grad_norm": 0.2613139748573303,
      "learning_rate": 4.3620645348339187e-07,
      "loss": 0.0109,
      "step": 2922860
    },
    {
      "epoch": 4.78335722655355,
      "grad_norm": 0.1988186538219452,
      "learning_rate": 4.3614056126987477e-07,
      "loss": 0.0078,
      "step": 2922880
    },
    {
      "epoch": 4.783389956992203,
      "grad_norm": 0.26184898614883423,
      "learning_rate": 4.360746690563576e-07,
      "loss": 0.0086,
      "step": 2922900
    },
    {
      "epoch": 4.783422687430857,
      "grad_norm": 0.23308734595775604,
      "learning_rate": 4.360087768428405e-07,
      "loss": 0.006,
      "step": 2922920
    },
    {
      "epoch": 4.783455417869511,
      "grad_norm": 0.14308036863803864,
      "learning_rate": 4.3594288462932336e-07,
      "loss": 0.0097,
      "step": 2922940
    },
    {
      "epoch": 4.783488148308163,
      "grad_norm": 0.10787306725978851,
      "learning_rate": 4.3587699241580625e-07,
      "loss": 0.0079,
      "step": 2922960
    },
    {
      "epoch": 4.783520878746817,
      "grad_norm": 0.12013059109449387,
      "learning_rate": 4.358111002022891e-07,
      "loss": 0.0084,
      "step": 2922980
    },
    {
      "epoch": 4.7835536091854705,
      "grad_norm": 0.052557554095983505,
      "learning_rate": 4.3574520798877205e-07,
      "loss": 0.0106,
      "step": 2923000
    },
    {
      "epoch": 4.783586339624124,
      "grad_norm": 0.30571073293685913,
      "learning_rate": 4.3567931577525484e-07,
      "loss": 0.0105,
      "step": 2923020
    },
    {
      "epoch": 4.783619070062777,
      "grad_norm": 0.20318853855133057,
      "learning_rate": 4.356134235617378e-07,
      "loss": 0.0071,
      "step": 2923040
    },
    {
      "epoch": 4.78365180050143,
      "grad_norm": 0.20131026208400726,
      "learning_rate": 4.3554753134822064e-07,
      "loss": 0.0062,
      "step": 2923060
    },
    {
      "epoch": 4.783684530940084,
      "grad_norm": 0.45070552825927734,
      "learning_rate": 4.3548163913470343e-07,
      "loss": 0.0102,
      "step": 2923080
    },
    {
      "epoch": 4.783717261378737,
      "grad_norm": 0.10432118922472,
      "learning_rate": 4.354157469211864e-07,
      "loss": 0.0102,
      "step": 2923100
    },
    {
      "epoch": 4.78374999181739,
      "grad_norm": 0.09226013720035553,
      "learning_rate": 4.353498547076692e-07,
      "loss": 0.0087,
      "step": 2923120
    },
    {
      "epoch": 4.783782722256044,
      "grad_norm": 0.11840567737817764,
      "learning_rate": 4.3528396249415213e-07,
      "loss": 0.0097,
      "step": 2923140
    },
    {
      "epoch": 4.7838154526946965,
      "grad_norm": 0.3103337585926056,
      "learning_rate": 4.35218070280635e-07,
      "loss": 0.0111,
      "step": 2923160
    },
    {
      "epoch": 4.78384818313335,
      "grad_norm": 0.2627403438091278,
      "learning_rate": 4.3515217806711787e-07,
      "loss": 0.0132,
      "step": 2923180
    },
    {
      "epoch": 4.783880913572004,
      "grad_norm": 0.31290513277053833,
      "learning_rate": 4.350862858536007e-07,
      "loss": 0.011,
      "step": 2923200
    },
    {
      "epoch": 4.783913644010657,
      "grad_norm": 0.11689218133687973,
      "learning_rate": 4.350203936400836e-07,
      "loss": 0.009,
      "step": 2923220
    },
    {
      "epoch": 4.78394637444931,
      "grad_norm": 0.11932392418384552,
      "learning_rate": 4.3495450142656646e-07,
      "loss": 0.0076,
      "step": 2923240
    },
    {
      "epoch": 4.783979104887964,
      "grad_norm": 0.4471605718135834,
      "learning_rate": 4.3488860921304936e-07,
      "loss": 0.0092,
      "step": 2923260
    },
    {
      "epoch": 4.784011835326617,
      "grad_norm": 0.04577448591589928,
      "learning_rate": 4.348227169995322e-07,
      "loss": 0.0127,
      "step": 2923280
    },
    {
      "epoch": 4.784044565765271,
      "grad_norm": 0.18664422631263733,
      "learning_rate": 4.3475682478601505e-07,
      "loss": 0.0139,
      "step": 2923300
    },
    {
      "epoch": 4.7840772962039235,
      "grad_norm": 0.10776995867490768,
      "learning_rate": 4.3469093257249795e-07,
      "loss": 0.0096,
      "step": 2923320
    },
    {
      "epoch": 4.784110026642577,
      "grad_norm": 0.5092338919639587,
      "learning_rate": 4.346250403589808e-07,
      "loss": 0.0074,
      "step": 2923340
    },
    {
      "epoch": 4.784142757081231,
      "grad_norm": 0.39602699875831604,
      "learning_rate": 4.345591481454637e-07,
      "loss": 0.0113,
      "step": 2923360
    },
    {
      "epoch": 4.784175487519883,
      "grad_norm": 0.5513781309127808,
      "learning_rate": 4.3449325593194654e-07,
      "loss": 0.0086,
      "step": 2923380
    },
    {
      "epoch": 4.784208217958537,
      "grad_norm": 0.32051882147789,
      "learning_rate": 4.3442736371842944e-07,
      "loss": 0.0098,
      "step": 2923400
    },
    {
      "epoch": 4.784240948397191,
      "grad_norm": 0.37403470277786255,
      "learning_rate": 4.343614715049123e-07,
      "loss": 0.0108,
      "step": 2923420
    },
    {
      "epoch": 4.784273678835843,
      "grad_norm": 0.2298969179391861,
      "learning_rate": 4.342955792913952e-07,
      "loss": 0.008,
      "step": 2923440
    },
    {
      "epoch": 4.784306409274497,
      "grad_norm": 0.33619529008865356,
      "learning_rate": 4.3422968707787803e-07,
      "loss": 0.009,
      "step": 2923460
    },
    {
      "epoch": 4.7843391397131505,
      "grad_norm": 0.43696990609169006,
      "learning_rate": 4.3416379486436093e-07,
      "loss": 0.0061,
      "step": 2923480
    },
    {
      "epoch": 4.784371870151804,
      "grad_norm": 0.8199614882469177,
      "learning_rate": 4.340979026508438e-07,
      "loss": 0.0164,
      "step": 2923500
    },
    {
      "epoch": 4.784404600590457,
      "grad_norm": 0.11271177232265472,
      "learning_rate": 4.340320104373266e-07,
      "loss": 0.0092,
      "step": 2923520
    },
    {
      "epoch": 4.78443733102911,
      "grad_norm": 0.09380408376455307,
      "learning_rate": 4.339661182238095e-07,
      "loss": 0.0118,
      "step": 2923540
    },
    {
      "epoch": 4.784470061467764,
      "grad_norm": 0.18835526704788208,
      "learning_rate": 4.3390022601029236e-07,
      "loss": 0.01,
      "step": 2923560
    },
    {
      "epoch": 4.784502791906418,
      "grad_norm": 0.3089790940284729,
      "learning_rate": 4.3383433379677526e-07,
      "loss": 0.0048,
      "step": 2923580
    },
    {
      "epoch": 4.78453552234507,
      "grad_norm": 0.0675484910607338,
      "learning_rate": 4.337684415832581e-07,
      "loss": 0.0096,
      "step": 2923600
    },
    {
      "epoch": 4.784568252783724,
      "grad_norm": 0.41230496764183044,
      "learning_rate": 4.33702549369741e-07,
      "loss": 0.0071,
      "step": 2923620
    },
    {
      "epoch": 4.7846009832223775,
      "grad_norm": 0.17692530155181885,
      "learning_rate": 4.3363665715622385e-07,
      "loss": 0.0118,
      "step": 2923640
    },
    {
      "epoch": 4.78463371366103,
      "grad_norm": 0.26544129848480225,
      "learning_rate": 4.335707649427068e-07,
      "loss": 0.013,
      "step": 2923660
    },
    {
      "epoch": 4.784666444099684,
      "grad_norm": 0.3673763871192932,
      "learning_rate": 4.335048727291896e-07,
      "loss": 0.0098,
      "step": 2923680
    },
    {
      "epoch": 4.784699174538337,
      "grad_norm": 0.5861169695854187,
      "learning_rate": 4.3343898051567255e-07,
      "loss": 0.0128,
      "step": 2923700
    },
    {
      "epoch": 4.78473190497699,
      "grad_norm": 0.1075226441025734,
      "learning_rate": 4.333730883021554e-07,
      "loss": 0.006,
      "step": 2923720
    },
    {
      "epoch": 4.784764635415644,
      "grad_norm": 0.32867512106895447,
      "learning_rate": 4.333071960886382e-07,
      "loss": 0.0081,
      "step": 2923740
    },
    {
      "epoch": 4.784797365854297,
      "grad_norm": 0.10613822937011719,
      "learning_rate": 4.3324130387512114e-07,
      "loss": 0.0073,
      "step": 2923760
    },
    {
      "epoch": 4.784830096292951,
      "grad_norm": 0.14568835496902466,
      "learning_rate": 4.3317541166160393e-07,
      "loss": 0.0127,
      "step": 2923780
    },
    {
      "epoch": 4.784862826731604,
      "grad_norm": 0.13953781127929688,
      "learning_rate": 4.331095194480869e-07,
      "loss": 0.0101,
      "step": 2923800
    },
    {
      "epoch": 4.784895557170257,
      "grad_norm": 0.10615075379610062,
      "learning_rate": 4.3304362723456973e-07,
      "loss": 0.0099,
      "step": 2923820
    },
    {
      "epoch": 4.784928287608911,
      "grad_norm": 0.13961780071258545,
      "learning_rate": 4.329777350210526e-07,
      "loss": 0.0077,
      "step": 2923840
    },
    {
      "epoch": 4.784961018047564,
      "grad_norm": 0.14785537123680115,
      "learning_rate": 4.3291184280753547e-07,
      "loss": 0.0087,
      "step": 2923860
    },
    {
      "epoch": 4.784993748486217,
      "grad_norm": 0.21440908312797546,
      "learning_rate": 4.3284595059401837e-07,
      "loss": 0.0053,
      "step": 2923880
    },
    {
      "epoch": 4.785026478924871,
      "grad_norm": 0.18524372577667236,
      "learning_rate": 4.327800583805012e-07,
      "loss": 0.0098,
      "step": 2923900
    },
    {
      "epoch": 4.785059209363524,
      "grad_norm": 0.11865551024675369,
      "learning_rate": 4.327141661669841e-07,
      "loss": 0.0105,
      "step": 2923920
    },
    {
      "epoch": 4.785091939802177,
      "grad_norm": 0.16179296374320984,
      "learning_rate": 4.3264827395346696e-07,
      "loss": 0.0143,
      "step": 2923940
    },
    {
      "epoch": 4.7851246702408305,
      "grad_norm": 0.12487304210662842,
      "learning_rate": 4.325823817399498e-07,
      "loss": 0.0118,
      "step": 2923960
    },
    {
      "epoch": 4.785157400679484,
      "grad_norm": 0.02573518082499504,
      "learning_rate": 4.325164895264327e-07,
      "loss": 0.0084,
      "step": 2923980
    },
    {
      "epoch": 4.785190131118137,
      "grad_norm": 0.11747679859399796,
      "learning_rate": 4.3245059731291555e-07,
      "loss": 0.0123,
      "step": 2924000
    },
    {
      "epoch": 4.78522286155679,
      "grad_norm": 0.2611743211746216,
      "learning_rate": 4.3238470509939845e-07,
      "loss": 0.0083,
      "step": 2924020
    },
    {
      "epoch": 4.785255591995444,
      "grad_norm": 0.1370992213487625,
      "learning_rate": 4.323188128858813e-07,
      "loss": 0.0144,
      "step": 2924040
    },
    {
      "epoch": 4.785288322434098,
      "grad_norm": 0.18655043840408325,
      "learning_rate": 4.322529206723642e-07,
      "loss": 0.0077,
      "step": 2924060
    },
    {
      "epoch": 4.78532105287275,
      "grad_norm": 0.11130163073539734,
      "learning_rate": 4.3218702845884704e-07,
      "loss": 0.0069,
      "step": 2924080
    },
    {
      "epoch": 4.785353783311404,
      "grad_norm": 0.10368010401725769,
      "learning_rate": 4.3212113624532994e-07,
      "loss": 0.0062,
      "step": 2924100
    },
    {
      "epoch": 4.7853865137500575,
      "grad_norm": 0.35188430547714233,
      "learning_rate": 4.320552440318128e-07,
      "loss": 0.0089,
      "step": 2924120
    },
    {
      "epoch": 4.78541924418871,
      "grad_norm": 0.5267128944396973,
      "learning_rate": 4.319893518182957e-07,
      "loss": 0.0089,
      "step": 2924140
    },
    {
      "epoch": 4.785451974627364,
      "grad_norm": 0.11308514326810837,
      "learning_rate": 4.3192345960477853e-07,
      "loss": 0.006,
      "step": 2924160
    },
    {
      "epoch": 4.785484705066017,
      "grad_norm": 0.3999614417552948,
      "learning_rate": 4.3185756739126137e-07,
      "loss": 0.0082,
      "step": 2924180
    },
    {
      "epoch": 4.785517435504671,
      "grad_norm": 0.13851390779018402,
      "learning_rate": 4.3179167517774427e-07,
      "loss": 0.0063,
      "step": 2924200
    },
    {
      "epoch": 4.785550165943324,
      "grad_norm": 0.44668346643447876,
      "learning_rate": 4.317257829642271e-07,
      "loss": 0.0087,
      "step": 2924220
    },
    {
      "epoch": 4.785582896381977,
      "grad_norm": 0.49338725209236145,
      "learning_rate": 4.3165989075071e-07,
      "loss": 0.0093,
      "step": 2924240
    },
    {
      "epoch": 4.785615626820631,
      "grad_norm": 0.1118362694978714,
      "learning_rate": 4.3159399853719286e-07,
      "loss": 0.008,
      "step": 2924260
    },
    {
      "epoch": 4.785648357259284,
      "grad_norm": 0.13400913774967194,
      "learning_rate": 4.3152810632367576e-07,
      "loss": 0.0057,
      "step": 2924280
    },
    {
      "epoch": 4.785681087697937,
      "grad_norm": 0.05807802081108093,
      "learning_rate": 4.314622141101586e-07,
      "loss": 0.0084,
      "step": 2924300
    },
    {
      "epoch": 4.785713818136591,
      "grad_norm": 0.19802695512771606,
      "learning_rate": 4.3139632189664156e-07,
      "loss": 0.0066,
      "step": 2924320
    },
    {
      "epoch": 4.7857465485752435,
      "grad_norm": 0.17861072719097137,
      "learning_rate": 4.3133042968312435e-07,
      "loss": 0.0067,
      "step": 2924340
    },
    {
      "epoch": 4.785779279013897,
      "grad_norm": 0.09747844189405441,
      "learning_rate": 4.312645374696073e-07,
      "loss": 0.0078,
      "step": 2924360
    },
    {
      "epoch": 4.785812009452551,
      "grad_norm": 0.35660070180892944,
      "learning_rate": 4.3119864525609015e-07,
      "loss": 0.0166,
      "step": 2924380
    },
    {
      "epoch": 4.785844739891204,
      "grad_norm": 0.31829410791397095,
      "learning_rate": 4.3113275304257294e-07,
      "loss": 0.0062,
      "step": 2924400
    },
    {
      "epoch": 4.785877470329857,
      "grad_norm": 0.2197660505771637,
      "learning_rate": 4.310668608290559e-07,
      "loss": 0.007,
      "step": 2924420
    },
    {
      "epoch": 4.785910200768511,
      "grad_norm": 0.3121151030063629,
      "learning_rate": 4.310009686155387e-07,
      "loss": 0.0078,
      "step": 2924440
    },
    {
      "epoch": 4.785942931207164,
      "grad_norm": 0.3178395628929138,
      "learning_rate": 4.3093507640202163e-07,
      "loss": 0.0066,
      "step": 2924460
    },
    {
      "epoch": 4.785975661645818,
      "grad_norm": 0.2827948331832886,
      "learning_rate": 4.308691841885045e-07,
      "loss": 0.0092,
      "step": 2924480
    },
    {
      "epoch": 4.7860083920844705,
      "grad_norm": 0.4349994361400604,
      "learning_rate": 4.308032919749874e-07,
      "loss": 0.0098,
      "step": 2924500
    },
    {
      "epoch": 4.786041122523124,
      "grad_norm": 0.7589407563209534,
      "learning_rate": 4.307373997614702e-07,
      "loss": 0.0092,
      "step": 2924520
    },
    {
      "epoch": 4.786073852961778,
      "grad_norm": 0.6043528914451599,
      "learning_rate": 4.306715075479531e-07,
      "loss": 0.0095,
      "step": 2924540
    },
    {
      "epoch": 4.78610658340043,
      "grad_norm": 0.21280407905578613,
      "learning_rate": 4.3060561533443597e-07,
      "loss": 0.0055,
      "step": 2924560
    },
    {
      "epoch": 4.786139313839084,
      "grad_norm": 0.3177518844604492,
      "learning_rate": 4.3053972312091887e-07,
      "loss": 0.0084,
      "step": 2924580
    },
    {
      "epoch": 4.786172044277738,
      "grad_norm": 0.25125324726104736,
      "learning_rate": 4.304738309074017e-07,
      "loss": 0.0074,
      "step": 2924600
    },
    {
      "epoch": 4.78620477471639,
      "grad_norm": 0.15824738144874573,
      "learning_rate": 4.3040793869388456e-07,
      "loss": 0.0082,
      "step": 2924620
    },
    {
      "epoch": 4.786237505155044,
      "grad_norm": 0.6080935597419739,
      "learning_rate": 4.3034204648036746e-07,
      "loss": 0.0099,
      "step": 2924640
    },
    {
      "epoch": 4.7862702355936975,
      "grad_norm": 0.08447325974702835,
      "learning_rate": 4.302761542668503e-07,
      "loss": 0.0114,
      "step": 2924660
    },
    {
      "epoch": 4.786302966032351,
      "grad_norm": 0.08667796850204468,
      "learning_rate": 4.302102620533332e-07,
      "loss": 0.0084,
      "step": 2924680
    },
    {
      "epoch": 4.786335696471004,
      "grad_norm": 0.4783006012439728,
      "learning_rate": 4.3014436983981605e-07,
      "loss": 0.0096,
      "step": 2924700
    },
    {
      "epoch": 4.786368426909657,
      "grad_norm": 0.22787323594093323,
      "learning_rate": 4.3007847762629895e-07,
      "loss": 0.0069,
      "step": 2924720
    },
    {
      "epoch": 4.786401157348311,
      "grad_norm": 0.12159457057714462,
      "learning_rate": 4.300125854127818e-07,
      "loss": 0.0064,
      "step": 2924740
    },
    {
      "epoch": 4.7864338877869645,
      "grad_norm": 0.09200042486190796,
      "learning_rate": 4.299466931992647e-07,
      "loss": 0.0071,
      "step": 2924760
    },
    {
      "epoch": 4.786466618225617,
      "grad_norm": 0.17473462224006653,
      "learning_rate": 4.2988080098574753e-07,
      "loss": 0.0073,
      "step": 2924780
    },
    {
      "epoch": 4.786499348664271,
      "grad_norm": 0.4495925307273865,
      "learning_rate": 4.2981490877223043e-07,
      "loss": 0.0112,
      "step": 2924800
    },
    {
      "epoch": 4.786532079102924,
      "grad_norm": 0.5716789364814758,
      "learning_rate": 4.297490165587133e-07,
      "loss": 0.0081,
      "step": 2924820
    },
    {
      "epoch": 4.786564809541577,
      "grad_norm": 0.09533654153347015,
      "learning_rate": 4.296831243451961e-07,
      "loss": 0.009,
      "step": 2924840
    },
    {
      "epoch": 4.786597539980231,
      "grad_norm": 0.32845765352249146,
      "learning_rate": 4.29617232131679e-07,
      "loss": 0.0067,
      "step": 2924860
    },
    {
      "epoch": 4.786630270418884,
      "grad_norm": 0.2307298630475998,
      "learning_rate": 4.2955133991816187e-07,
      "loss": 0.0089,
      "step": 2924880
    },
    {
      "epoch": 4.786663000857537,
      "grad_norm": 0.2363795042037964,
      "learning_rate": 4.2948544770464477e-07,
      "loss": 0.0096,
      "step": 2924900
    },
    {
      "epoch": 4.786695731296191,
      "grad_norm": 0.3055238127708435,
      "learning_rate": 4.294195554911276e-07,
      "loss": 0.0137,
      "step": 2924920
    },
    {
      "epoch": 4.786728461734844,
      "grad_norm": 0.2538474500179291,
      "learning_rate": 4.293536632776105e-07,
      "loss": 0.0106,
      "step": 2924940
    },
    {
      "epoch": 4.786761192173498,
      "grad_norm": 0.3551723062992096,
      "learning_rate": 4.2928777106409336e-07,
      "loss": 0.0073,
      "step": 2924960
    },
    {
      "epoch": 4.7867939226121505,
      "grad_norm": 0.31452545523643494,
      "learning_rate": 4.292218788505763e-07,
      "loss": 0.0073,
      "step": 2924980
    },
    {
      "epoch": 4.786826653050804,
      "grad_norm": 0.06386488676071167,
      "learning_rate": 4.291559866370591e-07,
      "loss": 0.0065,
      "step": 2925000
    },
    {
      "epoch": 4.786859383489458,
      "grad_norm": 0.11113537847995758,
      "learning_rate": 4.2909009442354205e-07,
      "loss": 0.0073,
      "step": 2925020
    },
    {
      "epoch": 4.786892113928111,
      "grad_norm": 0.21029065549373627,
      "learning_rate": 4.290242022100249e-07,
      "loss": 0.005,
      "step": 2925040
    },
    {
      "epoch": 4.786924844366764,
      "grad_norm": 0.35777193307876587,
      "learning_rate": 4.289583099965078e-07,
      "loss": 0.0067,
      "step": 2925060
    },
    {
      "epoch": 4.786957574805418,
      "grad_norm": 0.127212792634964,
      "learning_rate": 4.2889241778299064e-07,
      "loss": 0.01,
      "step": 2925080
    },
    {
      "epoch": 4.786990305244071,
      "grad_norm": 0.18059903383255005,
      "learning_rate": 4.2882652556947344e-07,
      "loss": 0.0061,
      "step": 2925100
    },
    {
      "epoch": 4.787023035682724,
      "grad_norm": 0.1318233609199524,
      "learning_rate": 4.287606333559564e-07,
      "loss": 0.0124,
      "step": 2925120
    },
    {
      "epoch": 4.7870557661213775,
      "grad_norm": 0.18096143007278442,
      "learning_rate": 4.2869474114243923e-07,
      "loss": 0.0055,
      "step": 2925140
    },
    {
      "epoch": 4.787088496560031,
      "grad_norm": 0.2711535692214966,
      "learning_rate": 4.2862884892892213e-07,
      "loss": 0.0089,
      "step": 2925160
    },
    {
      "epoch": 4.787121226998684,
      "grad_norm": 0.2063862383365631,
      "learning_rate": 4.28562956715405e-07,
      "loss": 0.0113,
      "step": 2925180
    },
    {
      "epoch": 4.787153957437337,
      "grad_norm": 0.21779952943325043,
      "learning_rate": 4.284970645018879e-07,
      "loss": 0.0084,
      "step": 2925200
    },
    {
      "epoch": 4.787186687875991,
      "grad_norm": 0.4069277048110962,
      "learning_rate": 4.284311722883707e-07,
      "loss": 0.0085,
      "step": 2925220
    },
    {
      "epoch": 4.787219418314645,
      "grad_norm": 0.4565700888633728,
      "learning_rate": 4.283652800748536e-07,
      "loss": 0.0088,
      "step": 2925240
    },
    {
      "epoch": 4.787252148753297,
      "grad_norm": 0.2191799134016037,
      "learning_rate": 4.2829938786133647e-07,
      "loss": 0.0078,
      "step": 2925260
    },
    {
      "epoch": 4.787284879191951,
      "grad_norm": 0.28508198261260986,
      "learning_rate": 4.2823349564781936e-07,
      "loss": 0.0081,
      "step": 2925280
    },
    {
      "epoch": 4.7873176096306045,
      "grad_norm": 0.2091524302959442,
      "learning_rate": 4.281676034343022e-07,
      "loss": 0.0061,
      "step": 2925300
    },
    {
      "epoch": 4.787350340069258,
      "grad_norm": 0.07589610666036606,
      "learning_rate": 4.2810171122078506e-07,
      "loss": 0.0076,
      "step": 2925320
    },
    {
      "epoch": 4.787383070507911,
      "grad_norm": 0.2144984006881714,
      "learning_rate": 4.2803581900726795e-07,
      "loss": 0.0067,
      "step": 2925340
    },
    {
      "epoch": 4.787415800946564,
      "grad_norm": 0.19580864906311035,
      "learning_rate": 4.279699267937508e-07,
      "loss": 0.0094,
      "step": 2925360
    },
    {
      "epoch": 4.787448531385218,
      "grad_norm": 0.10148454457521439,
      "learning_rate": 4.279040345802337e-07,
      "loss": 0.0097,
      "step": 2925380
    },
    {
      "epoch": 4.787481261823871,
      "grad_norm": 0.16774415969848633,
      "learning_rate": 4.2783814236671654e-07,
      "loss": 0.0069,
      "step": 2925400
    },
    {
      "epoch": 4.787513992262524,
      "grad_norm": 0.3187316060066223,
      "learning_rate": 4.2777225015319944e-07,
      "loss": 0.0099,
      "step": 2925420
    },
    {
      "epoch": 4.787546722701178,
      "grad_norm": 0.41255834698677063,
      "learning_rate": 4.277063579396823e-07,
      "loss": 0.0079,
      "step": 2925440
    },
    {
      "epoch": 4.787579453139831,
      "grad_norm": 0.06583935767412186,
      "learning_rate": 4.276404657261652e-07,
      "loss": 0.0076,
      "step": 2925460
    },
    {
      "epoch": 4.787612183578484,
      "grad_norm": 0.2946469485759735,
      "learning_rate": 4.2757457351264803e-07,
      "loss": 0.0092,
      "step": 2925480
    },
    {
      "epoch": 4.787644914017138,
      "grad_norm": 0.15447628498077393,
      "learning_rate": 4.2750868129913093e-07,
      "loss": 0.0066,
      "step": 2925500
    },
    {
      "epoch": 4.787677644455791,
      "grad_norm": 0.14999566972255707,
      "learning_rate": 4.274427890856138e-07,
      "loss": 0.0073,
      "step": 2925520
    },
    {
      "epoch": 4.787710374894444,
      "grad_norm": 0.07781080901622772,
      "learning_rate": 4.273768968720966e-07,
      "loss": 0.0072,
      "step": 2925540
    },
    {
      "epoch": 4.787743105333098,
      "grad_norm": 0.3101525902748108,
      "learning_rate": 4.273110046585795e-07,
      "loss": 0.0098,
      "step": 2925560
    },
    {
      "epoch": 4.787775835771751,
      "grad_norm": 0.24435561895370483,
      "learning_rate": 4.2724511244506237e-07,
      "loss": 0.0113,
      "step": 2925580
    },
    {
      "epoch": 4.787808566210405,
      "grad_norm": 0.20380717515945435,
      "learning_rate": 4.2717922023154526e-07,
      "loss": 0.01,
      "step": 2925600
    },
    {
      "epoch": 4.7878412966490576,
      "grad_norm": 0.11091296374797821,
      "learning_rate": 4.271133280180281e-07,
      "loss": 0.0089,
      "step": 2925620
    },
    {
      "epoch": 4.787874027087711,
      "grad_norm": 0.17545710504055023,
      "learning_rate": 4.2704743580451106e-07,
      "loss": 0.0103,
      "step": 2925640
    },
    {
      "epoch": 4.787906757526365,
      "grad_norm": 0.29834720492362976,
      "learning_rate": 4.2698154359099385e-07,
      "loss": 0.0101,
      "step": 2925660
    },
    {
      "epoch": 4.7879394879650174,
      "grad_norm": 0.1748506873846054,
      "learning_rate": 4.269156513774768e-07,
      "loss": 0.0079,
      "step": 2925680
    },
    {
      "epoch": 4.787972218403671,
      "grad_norm": 0.08415146172046661,
      "learning_rate": 4.2684975916395965e-07,
      "loss": 0.0064,
      "step": 2925700
    },
    {
      "epoch": 4.788004948842325,
      "grad_norm": 0.05773304030299187,
      "learning_rate": 4.2678386695044255e-07,
      "loss": 0.0064,
      "step": 2925720
    },
    {
      "epoch": 4.788037679280977,
      "grad_norm": 0.1399536281824112,
      "learning_rate": 4.267179747369254e-07,
      "loss": 0.0075,
      "step": 2925740
    },
    {
      "epoch": 4.788070409719631,
      "grad_norm": 0.058973971754312515,
      "learning_rate": 4.2665208252340824e-07,
      "loss": 0.0071,
      "step": 2925760
    },
    {
      "epoch": 4.7881031401582845,
      "grad_norm": 0.13739898800849915,
      "learning_rate": 4.2658619030989114e-07,
      "loss": 0.0076,
      "step": 2925780
    },
    {
      "epoch": 4.788135870596938,
      "grad_norm": 0.07253680378198624,
      "learning_rate": 4.26520298096374e-07,
      "loss": 0.0137,
      "step": 2925800
    },
    {
      "epoch": 4.788168601035591,
      "grad_norm": 0.26539871096611023,
      "learning_rate": 4.264544058828569e-07,
      "loss": 0.0068,
      "step": 2925820
    },
    {
      "epoch": 4.788201331474244,
      "grad_norm": 0.17061486840248108,
      "learning_rate": 4.2638851366933973e-07,
      "loss": 0.0097,
      "step": 2925840
    },
    {
      "epoch": 4.788234061912898,
      "grad_norm": 0.27035269141197205,
      "learning_rate": 4.2632262145582263e-07,
      "loss": 0.0109,
      "step": 2925860
    },
    {
      "epoch": 4.788266792351551,
      "grad_norm": 0.20173656940460205,
      "learning_rate": 4.262567292423055e-07,
      "loss": 0.0117,
      "step": 2925880
    },
    {
      "epoch": 4.788299522790204,
      "grad_norm": 0.11357325315475464,
      "learning_rate": 4.2619083702878837e-07,
      "loss": 0.0096,
      "step": 2925900
    },
    {
      "epoch": 4.788332253228858,
      "grad_norm": 0.2355208843946457,
      "learning_rate": 4.261249448152712e-07,
      "loss": 0.0054,
      "step": 2925920
    },
    {
      "epoch": 4.7883649836675115,
      "grad_norm": 0.3362443745136261,
      "learning_rate": 4.260590526017541e-07,
      "loss": 0.0062,
      "step": 2925940
    },
    {
      "epoch": 4.788397714106164,
      "grad_norm": 0.27232667803764343,
      "learning_rate": 4.2599316038823696e-07,
      "loss": 0.0102,
      "step": 2925960
    },
    {
      "epoch": 4.788430444544818,
      "grad_norm": 0.08475127071142197,
      "learning_rate": 4.259272681747198e-07,
      "loss": 0.0107,
      "step": 2925980
    },
    {
      "epoch": 4.788463174983471,
      "grad_norm": 0.2691633999347687,
      "learning_rate": 4.258613759612027e-07,
      "loss": 0.0104,
      "step": 2926000
    },
    {
      "epoch": 4.788495905422124,
      "grad_norm": 0.05555916205048561,
      "learning_rate": 4.2579548374768555e-07,
      "loss": 0.0077,
      "step": 2926020
    },
    {
      "epoch": 4.788528635860778,
      "grad_norm": 0.1750338077545166,
      "learning_rate": 4.2572959153416845e-07,
      "loss": 0.0072,
      "step": 2926040
    },
    {
      "epoch": 4.788561366299431,
      "grad_norm": 0.12272033840417862,
      "learning_rate": 4.256636993206513e-07,
      "loss": 0.0066,
      "step": 2926060
    },
    {
      "epoch": 4.788594096738084,
      "grad_norm": 0.3013497292995453,
      "learning_rate": 4.255978071071342e-07,
      "loss": 0.0094,
      "step": 2926080
    },
    {
      "epoch": 4.788626827176738,
      "grad_norm": 0.30359768867492676,
      "learning_rate": 4.2553191489361704e-07,
      "loss": 0.0101,
      "step": 2926100
    },
    {
      "epoch": 4.788659557615391,
      "grad_norm": 0.29166603088378906,
      "learning_rate": 4.2546602268009994e-07,
      "loss": 0.0093,
      "step": 2926120
    },
    {
      "epoch": 4.788692288054045,
      "grad_norm": 0.07772265374660492,
      "learning_rate": 4.254001304665828e-07,
      "loss": 0.0066,
      "step": 2926140
    },
    {
      "epoch": 4.7887250184926975,
      "grad_norm": 0.2306300401687622,
      "learning_rate": 4.253342382530657e-07,
      "loss": 0.009,
      "step": 2926160
    },
    {
      "epoch": 4.788757748931351,
      "grad_norm": 0.48583754897117615,
      "learning_rate": 4.2526834603954853e-07,
      "loss": 0.0072,
      "step": 2926180
    },
    {
      "epoch": 4.788790479370005,
      "grad_norm": 0.28801867365837097,
      "learning_rate": 4.252024538260314e-07,
      "loss": 0.0111,
      "step": 2926200
    },
    {
      "epoch": 4.788823209808658,
      "grad_norm": 0.17639927566051483,
      "learning_rate": 4.2513656161251427e-07,
      "loss": 0.0099,
      "step": 2926220
    },
    {
      "epoch": 4.788855940247311,
      "grad_norm": 0.32268908619880676,
      "learning_rate": 4.250706693989971e-07,
      "loss": 0.0105,
      "step": 2926240
    },
    {
      "epoch": 4.788888670685965,
      "grad_norm": 0.09617670625448227,
      "learning_rate": 4.2500477718548e-07,
      "loss": 0.0088,
      "step": 2926260
    },
    {
      "epoch": 4.788921401124618,
      "grad_norm": 0.2128458172082901,
      "learning_rate": 4.2493888497196286e-07,
      "loss": 0.0044,
      "step": 2926280
    },
    {
      "epoch": 4.788954131563271,
      "grad_norm": 0.27410241961479187,
      "learning_rate": 4.248729927584458e-07,
      "loss": 0.0058,
      "step": 2926300
    },
    {
      "epoch": 4.7889868620019245,
      "grad_norm": 0.1585058718919754,
      "learning_rate": 4.248071005449286e-07,
      "loss": 0.0155,
      "step": 2926320
    },
    {
      "epoch": 4.789019592440578,
      "grad_norm": 0.21339662373065948,
      "learning_rate": 4.2474120833141156e-07,
      "loss": 0.0083,
      "step": 2926340
    },
    {
      "epoch": 4.789052322879231,
      "grad_norm": 0.5901629328727722,
      "learning_rate": 4.246753161178944e-07,
      "loss": 0.0076,
      "step": 2926360
    },
    {
      "epoch": 4.789085053317884,
      "grad_norm": 0.0920063853263855,
      "learning_rate": 4.246094239043773e-07,
      "loss": 0.0065,
      "step": 2926380
    },
    {
      "epoch": 4.789117783756538,
      "grad_norm": 0.28597187995910645,
      "learning_rate": 4.2454353169086015e-07,
      "loss": 0.0088,
      "step": 2926400
    },
    {
      "epoch": 4.7891505141951916,
      "grad_norm": 0.19343805313110352,
      "learning_rate": 4.24477639477343e-07,
      "loss": 0.0114,
      "step": 2926420
    },
    {
      "epoch": 4.789183244633844,
      "grad_norm": 0.4331241250038147,
      "learning_rate": 4.244117472638259e-07,
      "loss": 0.0149,
      "step": 2926440
    },
    {
      "epoch": 4.789215975072498,
      "grad_norm": 0.09225848317146301,
      "learning_rate": 4.2434585505030874e-07,
      "loss": 0.0077,
      "step": 2926460
    },
    {
      "epoch": 4.7892487055111514,
      "grad_norm": 0.37273648381233215,
      "learning_rate": 4.2427996283679164e-07,
      "loss": 0.0072,
      "step": 2926480
    },
    {
      "epoch": 4.789281435949805,
      "grad_norm": 0.06410367041826248,
      "learning_rate": 4.242140706232745e-07,
      "loss": 0.0075,
      "step": 2926500
    },
    {
      "epoch": 4.789314166388458,
      "grad_norm": 0.12571997940540314,
      "learning_rate": 4.241481784097574e-07,
      "loss": 0.0081,
      "step": 2926520
    },
    {
      "epoch": 4.789346896827111,
      "grad_norm": 0.1004025787115097,
      "learning_rate": 4.240822861962402e-07,
      "loss": 0.0063,
      "step": 2926540
    },
    {
      "epoch": 4.789379627265765,
      "grad_norm": 0.4688309133052826,
      "learning_rate": 4.240163939827231e-07,
      "loss": 0.0079,
      "step": 2926560
    },
    {
      "epoch": 4.789412357704418,
      "grad_norm": 0.3838026821613312,
      "learning_rate": 4.2395050176920597e-07,
      "loss": 0.0125,
      "step": 2926580
    },
    {
      "epoch": 4.789445088143071,
      "grad_norm": 0.27601271867752075,
      "learning_rate": 4.2388460955568887e-07,
      "loss": 0.0081,
      "step": 2926600
    },
    {
      "epoch": 4.789477818581725,
      "grad_norm": 0.11326097697019577,
      "learning_rate": 4.238187173421717e-07,
      "loss": 0.0068,
      "step": 2926620
    },
    {
      "epoch": 4.7895105490203775,
      "grad_norm": 0.19474837183952332,
      "learning_rate": 4.2375282512865456e-07,
      "loss": 0.008,
      "step": 2926640
    },
    {
      "epoch": 4.789543279459031,
      "grad_norm": 0.2770911455154419,
      "learning_rate": 4.2368693291513746e-07,
      "loss": 0.007,
      "step": 2926660
    },
    {
      "epoch": 4.789576009897685,
      "grad_norm": 0.4809427559375763,
      "learning_rate": 4.236210407016203e-07,
      "loss": 0.0119,
      "step": 2926680
    },
    {
      "epoch": 4.789608740336338,
      "grad_norm": 0.1627136766910553,
      "learning_rate": 4.235551484881032e-07,
      "loss": 0.0073,
      "step": 2926700
    },
    {
      "epoch": 4.789641470774991,
      "grad_norm": 0.19086670875549316,
      "learning_rate": 4.2348925627458605e-07,
      "loss": 0.0096,
      "step": 2926720
    },
    {
      "epoch": 4.789674201213645,
      "grad_norm": 0.16684380173683167,
      "learning_rate": 4.2342336406106895e-07,
      "loss": 0.007,
      "step": 2926740
    },
    {
      "epoch": 4.789706931652298,
      "grad_norm": 0.26315411925315857,
      "learning_rate": 4.233574718475518e-07,
      "loss": 0.0098,
      "step": 2926760
    },
    {
      "epoch": 4.789739662090952,
      "grad_norm": 0.09039801359176636,
      "learning_rate": 4.232915796340347e-07,
      "loss": 0.0078,
      "step": 2926780
    },
    {
      "epoch": 4.7897723925296045,
      "grad_norm": 0.16321244835853577,
      "learning_rate": 4.2322568742051754e-07,
      "loss": 0.0082,
      "step": 2926800
    },
    {
      "epoch": 4.789805122968258,
      "grad_norm": 0.2897203266620636,
      "learning_rate": 4.2315979520700044e-07,
      "loss": 0.0126,
      "step": 2926820
    },
    {
      "epoch": 4.789837853406912,
      "grad_norm": 0.35311493277549744,
      "learning_rate": 4.230939029934833e-07,
      "loss": 0.0122,
      "step": 2926840
    },
    {
      "epoch": 4.789870583845564,
      "grad_norm": 0.08059163391590118,
      "learning_rate": 4.2302801077996613e-07,
      "loss": 0.0074,
      "step": 2926860
    },
    {
      "epoch": 4.789903314284218,
      "grad_norm": 0.360645055770874,
      "learning_rate": 4.22962118566449e-07,
      "loss": 0.0088,
      "step": 2926880
    },
    {
      "epoch": 4.789936044722872,
      "grad_norm": 0.6265836358070374,
      "learning_rate": 4.2289622635293187e-07,
      "loss": 0.0073,
      "step": 2926900
    },
    {
      "epoch": 4.789968775161524,
      "grad_norm": 0.3872666656970978,
      "learning_rate": 4.2283033413941477e-07,
      "loss": 0.0107,
      "step": 2926920
    },
    {
      "epoch": 4.790001505600178,
      "grad_norm": 0.30782362818717957,
      "learning_rate": 4.227644419258976e-07,
      "loss": 0.0082,
      "step": 2926940
    },
    {
      "epoch": 4.7900342360388315,
      "grad_norm": 0.4134596586227417,
      "learning_rate": 4.2269854971238057e-07,
      "loss": 0.0057,
      "step": 2926960
    },
    {
      "epoch": 4.790066966477485,
      "grad_norm": 0.13310179114341736,
      "learning_rate": 4.2263265749886336e-07,
      "loss": 0.0089,
      "step": 2926980
    },
    {
      "epoch": 4.790099696916138,
      "grad_norm": 0.21106860041618347,
      "learning_rate": 4.225667652853463e-07,
      "loss": 0.0111,
      "step": 2927000
    },
    {
      "epoch": 4.790132427354791,
      "grad_norm": 0.10072840750217438,
      "learning_rate": 4.2250087307182916e-07,
      "loss": 0.013,
      "step": 2927020
    },
    {
      "epoch": 4.790165157793445,
      "grad_norm": 0.39574941992759705,
      "learning_rate": 4.2243498085831206e-07,
      "loss": 0.0103,
      "step": 2927040
    },
    {
      "epoch": 4.790197888232099,
      "grad_norm": 0.1641601026058197,
      "learning_rate": 4.223690886447949e-07,
      "loss": 0.0076,
      "step": 2927060
    },
    {
      "epoch": 4.790230618670751,
      "grad_norm": 0.09478645026683807,
      "learning_rate": 4.2230319643127775e-07,
      "loss": 0.0089,
      "step": 2927080
    },
    {
      "epoch": 4.790263349109405,
      "grad_norm": 0.406084269285202,
      "learning_rate": 4.2223730421776065e-07,
      "loss": 0.0048,
      "step": 2927100
    },
    {
      "epoch": 4.7902960795480585,
      "grad_norm": 0.5486388802528381,
      "learning_rate": 4.221714120042435e-07,
      "loss": 0.01,
      "step": 2927120
    },
    {
      "epoch": 4.790328809986711,
      "grad_norm": 0.20650871098041534,
      "learning_rate": 4.221055197907264e-07,
      "loss": 0.0114,
      "step": 2927140
    },
    {
      "epoch": 4.790361540425365,
      "grad_norm": 0.1572391837835312,
      "learning_rate": 4.2203962757720923e-07,
      "loss": 0.009,
      "step": 2927160
    },
    {
      "epoch": 4.790394270864018,
      "grad_norm": 0.16753099858760834,
      "learning_rate": 4.2197373536369213e-07,
      "loss": 0.0054,
      "step": 2927180
    },
    {
      "epoch": 4.790427001302671,
      "grad_norm": 0.1723332405090332,
      "learning_rate": 4.21907843150175e-07,
      "loss": 0.0066,
      "step": 2927200
    },
    {
      "epoch": 4.790459731741325,
      "grad_norm": 0.5110821723937988,
      "learning_rate": 4.218419509366579e-07,
      "loss": 0.0165,
      "step": 2927220
    },
    {
      "epoch": 4.790492462179978,
      "grad_norm": 0.45097798109054565,
      "learning_rate": 4.217760587231407e-07,
      "loss": 0.0087,
      "step": 2927240
    },
    {
      "epoch": 4.790525192618632,
      "grad_norm": 0.11564276367425919,
      "learning_rate": 4.217101665096236e-07,
      "loss": 0.0092,
      "step": 2927260
    },
    {
      "epoch": 4.790557923057285,
      "grad_norm": 0.403781533241272,
      "learning_rate": 4.2164427429610647e-07,
      "loss": 0.0097,
      "step": 2927280
    },
    {
      "epoch": 4.790590653495938,
      "grad_norm": 0.23012298345565796,
      "learning_rate": 4.215783820825893e-07,
      "loss": 0.0071,
      "step": 2927300
    },
    {
      "epoch": 4.790623383934592,
      "grad_norm": 0.2846992015838623,
      "learning_rate": 4.215124898690722e-07,
      "loss": 0.0093,
      "step": 2927320
    },
    {
      "epoch": 4.7906561143732445,
      "grad_norm": 0.6475239992141724,
      "learning_rate": 4.2144659765555506e-07,
      "loss": 0.011,
      "step": 2927340
    },
    {
      "epoch": 4.790688844811898,
      "grad_norm": 0.15087100863456726,
      "learning_rate": 4.2138070544203796e-07,
      "loss": 0.0091,
      "step": 2927360
    },
    {
      "epoch": 4.790721575250552,
      "grad_norm": 0.17057612538337708,
      "learning_rate": 4.213148132285208e-07,
      "loss": 0.009,
      "step": 2927380
    },
    {
      "epoch": 4.790754305689205,
      "grad_norm": 0.22864027321338654,
      "learning_rate": 4.212489210150037e-07,
      "loss": 0.0127,
      "step": 2927400
    },
    {
      "epoch": 4.790787036127858,
      "grad_norm": 0.1062176525592804,
      "learning_rate": 4.2118302880148655e-07,
      "loss": 0.0148,
      "step": 2927420
    },
    {
      "epoch": 4.7908197665665115,
      "grad_norm": 0.07640223950147629,
      "learning_rate": 4.2111713658796944e-07,
      "loss": 0.0069,
      "step": 2927440
    },
    {
      "epoch": 4.790852497005165,
      "grad_norm": 0.07714203000068665,
      "learning_rate": 4.210512443744523e-07,
      "loss": 0.0073,
      "step": 2927460
    },
    {
      "epoch": 4.790885227443818,
      "grad_norm": 0.2570449113845825,
      "learning_rate": 4.209853521609352e-07,
      "loss": 0.0077,
      "step": 2927480
    },
    {
      "epoch": 4.790917957882471,
      "grad_norm": 0.15388937294483185,
      "learning_rate": 4.2091945994741803e-07,
      "loss": 0.0124,
      "step": 2927500
    },
    {
      "epoch": 4.790950688321125,
      "grad_norm": 0.09888748079538345,
      "learning_rate": 4.208535677339009e-07,
      "loss": 0.007,
      "step": 2927520
    },
    {
      "epoch": 4.790983418759778,
      "grad_norm": 0.08121247589588165,
      "learning_rate": 4.207876755203838e-07,
      "loss": 0.0061,
      "step": 2927540
    },
    {
      "epoch": 4.791016149198431,
      "grad_norm": 0.16727736592292786,
      "learning_rate": 4.207217833068666e-07,
      "loss": 0.0095,
      "step": 2927560
    },
    {
      "epoch": 4.791048879637085,
      "grad_norm": 0.5514323711395264,
      "learning_rate": 4.206558910933495e-07,
      "loss": 0.0112,
      "step": 2927580
    },
    {
      "epoch": 4.7910816100757385,
      "grad_norm": 0.2656940817832947,
      "learning_rate": 4.2058999887983237e-07,
      "loss": 0.0072,
      "step": 2927600
    },
    {
      "epoch": 4.791114340514391,
      "grad_norm": 0.28126683831214905,
      "learning_rate": 4.205241066663153e-07,
      "loss": 0.0092,
      "step": 2927620
    },
    {
      "epoch": 4.791147070953045,
      "grad_norm": 0.6462852954864502,
      "learning_rate": 4.204582144527981e-07,
      "loss": 0.0163,
      "step": 2927640
    },
    {
      "epoch": 4.791179801391698,
      "grad_norm": 0.43303945660591125,
      "learning_rate": 4.2039232223928106e-07,
      "loss": 0.0082,
      "step": 2927660
    },
    {
      "epoch": 4.791212531830352,
      "grad_norm": 0.7508000135421753,
      "learning_rate": 4.203264300257639e-07,
      "loss": 0.0128,
      "step": 2927680
    },
    {
      "epoch": 4.791245262269005,
      "grad_norm": 0.13108627498149872,
      "learning_rate": 4.202605378122468e-07,
      "loss": 0.0108,
      "step": 2927700
    },
    {
      "epoch": 4.791277992707658,
      "grad_norm": 0.1676800400018692,
      "learning_rate": 4.2019464559872965e-07,
      "loss": 0.0087,
      "step": 2927720
    },
    {
      "epoch": 4.791310723146312,
      "grad_norm": 0.2015901654958725,
      "learning_rate": 4.201287533852125e-07,
      "loss": 0.0067,
      "step": 2927740
    },
    {
      "epoch": 4.791343453584965,
      "grad_norm": 0.362557977437973,
      "learning_rate": 4.200628611716954e-07,
      "loss": 0.0056,
      "step": 2927760
    },
    {
      "epoch": 4.791376184023618,
      "grad_norm": 0.44511446356773376,
      "learning_rate": 4.1999696895817824e-07,
      "loss": 0.0118,
      "step": 2927780
    },
    {
      "epoch": 4.791408914462272,
      "grad_norm": 0.11420852690935135,
      "learning_rate": 4.1993107674466114e-07,
      "loss": 0.0123,
      "step": 2927800
    },
    {
      "epoch": 4.7914416449009245,
      "grad_norm": 0.2132316380739212,
      "learning_rate": 4.19865184531144e-07,
      "loss": 0.0088,
      "step": 2927820
    },
    {
      "epoch": 4.791474375339578,
      "grad_norm": 0.04429030790925026,
      "learning_rate": 4.197992923176269e-07,
      "loss": 0.0095,
      "step": 2927840
    },
    {
      "epoch": 4.791507105778232,
      "grad_norm": 0.15069805085659027,
      "learning_rate": 4.1973340010410973e-07,
      "loss": 0.011,
      "step": 2927860
    },
    {
      "epoch": 4.791539836216885,
      "grad_norm": 0.28715458512306213,
      "learning_rate": 4.1966750789059263e-07,
      "loss": 0.0099,
      "step": 2927880
    },
    {
      "epoch": 4.791572566655538,
      "grad_norm": 0.34048017859458923,
      "learning_rate": 4.196016156770755e-07,
      "loss": 0.0097,
      "step": 2927900
    },
    {
      "epoch": 4.791605297094192,
      "grad_norm": 0.08315862715244293,
      "learning_rate": 4.195357234635584e-07,
      "loss": 0.0107,
      "step": 2927920
    },
    {
      "epoch": 4.791638027532845,
      "grad_norm": 0.13563181459903717,
      "learning_rate": 4.194698312500412e-07,
      "loss": 0.0111,
      "step": 2927940
    },
    {
      "epoch": 4.791670757971499,
      "grad_norm": 0.10829159617424011,
      "learning_rate": 4.1940393903652407e-07,
      "loss": 0.0087,
      "step": 2927960
    },
    {
      "epoch": 4.7917034884101515,
      "grad_norm": 0.23094606399536133,
      "learning_rate": 4.1933804682300696e-07,
      "loss": 0.0052,
      "step": 2927980
    },
    {
      "epoch": 4.791736218848805,
      "grad_norm": 0.2316390872001648,
      "learning_rate": 4.192721546094898e-07,
      "loss": 0.0097,
      "step": 2928000
    },
    {
      "epoch": 4.791768949287459,
      "grad_norm": 0.7299609184265137,
      "learning_rate": 4.192062623959727e-07,
      "loss": 0.0079,
      "step": 2928020
    },
    {
      "epoch": 4.791801679726111,
      "grad_norm": 0.33898845314979553,
      "learning_rate": 4.1914037018245555e-07,
      "loss": 0.0091,
      "step": 2928040
    },
    {
      "epoch": 4.791834410164765,
      "grad_norm": 0.266792356967926,
      "learning_rate": 4.1907447796893845e-07,
      "loss": 0.0079,
      "step": 2928060
    },
    {
      "epoch": 4.791867140603419,
      "grad_norm": 0.34292054176330566,
      "learning_rate": 4.190085857554213e-07,
      "loss": 0.01,
      "step": 2928080
    },
    {
      "epoch": 4.791899871042071,
      "grad_norm": 0.3148901164531708,
      "learning_rate": 4.189426935419042e-07,
      "loss": 0.0105,
      "step": 2928100
    },
    {
      "epoch": 4.791932601480725,
      "grad_norm": 0.21637508273124695,
      "learning_rate": 4.1887680132838704e-07,
      "loss": 0.0063,
      "step": 2928120
    },
    {
      "epoch": 4.7919653319193785,
      "grad_norm": 0.06829830259084702,
      "learning_rate": 4.1881090911486994e-07,
      "loss": 0.0079,
      "step": 2928140
    },
    {
      "epoch": 4.791998062358032,
      "grad_norm": 0.21352650225162506,
      "learning_rate": 4.187450169013528e-07,
      "loss": 0.0087,
      "step": 2928160
    },
    {
      "epoch": 4.792030792796685,
      "grad_norm": 0.24280698597431183,
      "learning_rate": 4.1867912468783563e-07,
      "loss": 0.0057,
      "step": 2928180
    },
    {
      "epoch": 4.792063523235338,
      "grad_norm": 0.12319181114435196,
      "learning_rate": 4.1861323247431853e-07,
      "loss": 0.0076,
      "step": 2928200
    },
    {
      "epoch": 4.792096253673992,
      "grad_norm": 0.3786942958831787,
      "learning_rate": 4.185473402608014e-07,
      "loss": 0.0082,
      "step": 2928220
    },
    {
      "epoch": 4.7921289841126455,
      "grad_norm": 0.15549419820308685,
      "learning_rate": 4.184814480472843e-07,
      "loss": 0.0076,
      "step": 2928240
    },
    {
      "epoch": 4.792161714551298,
      "grad_norm": 0.08971266448497772,
      "learning_rate": 4.184155558337671e-07,
      "loss": 0.0058,
      "step": 2928260
    },
    {
      "epoch": 4.792194444989952,
      "grad_norm": 0.1009313091635704,
      "learning_rate": 4.1834966362025007e-07,
      "loss": 0.0074,
      "step": 2928280
    },
    {
      "epoch": 4.792227175428605,
      "grad_norm": 0.14119784533977509,
      "learning_rate": 4.1828377140673286e-07,
      "loss": 0.0057,
      "step": 2928300
    },
    {
      "epoch": 4.792259905867258,
      "grad_norm": 0.1943017989397049,
      "learning_rate": 4.182178791932158e-07,
      "loss": 0.0085,
      "step": 2928320
    },
    {
      "epoch": 4.792292636305912,
      "grad_norm": 0.17442820966243744,
      "learning_rate": 4.1815198697969866e-07,
      "loss": 0.0104,
      "step": 2928340
    },
    {
      "epoch": 4.792325366744565,
      "grad_norm": 0.2454039454460144,
      "learning_rate": 4.1808609476618156e-07,
      "loss": 0.0068,
      "step": 2928360
    },
    {
      "epoch": 4.792358097183218,
      "grad_norm": 0.2633742392063141,
      "learning_rate": 4.180202025526644e-07,
      "loss": 0.0077,
      "step": 2928380
    },
    {
      "epoch": 4.792390827621872,
      "grad_norm": 0.27688705921173096,
      "learning_rate": 4.1795431033914725e-07,
      "loss": 0.0068,
      "step": 2928400
    },
    {
      "epoch": 4.792423558060525,
      "grad_norm": 0.3906473219394684,
      "learning_rate": 4.1788841812563015e-07,
      "loss": 0.011,
      "step": 2928420
    },
    {
      "epoch": 4.792456288499179,
      "grad_norm": 0.37205690145492554,
      "learning_rate": 4.17822525912113e-07,
      "loss": 0.0078,
      "step": 2928440
    },
    {
      "epoch": 4.7924890189378315,
      "grad_norm": 0.1614156812429428,
      "learning_rate": 4.177566336985959e-07,
      "loss": 0.0086,
      "step": 2928460
    },
    {
      "epoch": 4.792521749376485,
      "grad_norm": 0.31054994463920593,
      "learning_rate": 4.1769074148507874e-07,
      "loss": 0.0075,
      "step": 2928480
    },
    {
      "epoch": 4.792554479815139,
      "grad_norm": 0.2714556157588959,
      "learning_rate": 4.1762484927156164e-07,
      "loss": 0.008,
      "step": 2928500
    },
    {
      "epoch": 4.792587210253792,
      "grad_norm": 0.20279242098331451,
      "learning_rate": 4.175589570580445e-07,
      "loss": 0.0071,
      "step": 2928520
    },
    {
      "epoch": 4.792619940692445,
      "grad_norm": 0.113980233669281,
      "learning_rate": 4.174930648445274e-07,
      "loss": 0.009,
      "step": 2928540
    },
    {
      "epoch": 4.792652671131099,
      "grad_norm": 0.07495122402906418,
      "learning_rate": 4.1742717263101023e-07,
      "loss": 0.0081,
      "step": 2928560
    },
    {
      "epoch": 4.792685401569752,
      "grad_norm": 0.05902794748544693,
      "learning_rate": 4.1736128041749313e-07,
      "loss": 0.008,
      "step": 2928580
    },
    {
      "epoch": 4.792718132008405,
      "grad_norm": 0.06406020373106003,
      "learning_rate": 4.1729538820397597e-07,
      "loss": 0.0113,
      "step": 2928600
    },
    {
      "epoch": 4.7927508624470585,
      "grad_norm": 0.15313512086868286,
      "learning_rate": 4.172294959904588e-07,
      "loss": 0.0092,
      "step": 2928620
    },
    {
      "epoch": 4.792783592885712,
      "grad_norm": 0.07838717103004456,
      "learning_rate": 4.171636037769417e-07,
      "loss": 0.0101,
      "step": 2928640
    },
    {
      "epoch": 4.792816323324365,
      "grad_norm": 0.20114442706108093,
      "learning_rate": 4.1709771156342456e-07,
      "loss": 0.013,
      "step": 2928660
    },
    {
      "epoch": 4.792849053763018,
      "grad_norm": 0.24765922129154205,
      "learning_rate": 4.1703181934990746e-07,
      "loss": 0.0085,
      "step": 2928680
    },
    {
      "epoch": 4.792881784201672,
      "grad_norm": 0.17401620745658875,
      "learning_rate": 4.169659271363903e-07,
      "loss": 0.0085,
      "step": 2928700
    },
    {
      "epoch": 4.792914514640326,
      "grad_norm": 0.11435898393392563,
      "learning_rate": 4.169000349228732e-07,
      "loss": 0.01,
      "step": 2928720
    },
    {
      "epoch": 4.792947245078978,
      "grad_norm": 0.24169950187206268,
      "learning_rate": 4.1683414270935605e-07,
      "loss": 0.007,
      "step": 2928740
    },
    {
      "epoch": 4.792979975517632,
      "grad_norm": 0.018906516954302788,
      "learning_rate": 4.1676825049583895e-07,
      "loss": 0.008,
      "step": 2928760
    },
    {
      "epoch": 4.7930127059562855,
      "grad_norm": 0.20491264760494232,
      "learning_rate": 4.167023582823218e-07,
      "loss": 0.0058,
      "step": 2928780
    },
    {
      "epoch": 4.793045436394938,
      "grad_norm": 0.20490363240242004,
      "learning_rate": 4.166364660688047e-07,
      "loss": 0.0071,
      "step": 2928800
    },
    {
      "epoch": 4.793078166833592,
      "grad_norm": 0.06432054936885834,
      "learning_rate": 4.1657057385528754e-07,
      "loss": 0.0066,
      "step": 2928820
    },
    {
      "epoch": 4.793110897272245,
      "grad_norm": 0.26648229360580444,
      "learning_rate": 4.165046816417704e-07,
      "loss": 0.0088,
      "step": 2928840
    },
    {
      "epoch": 4.793143627710899,
      "grad_norm": 0.3982987403869629,
      "learning_rate": 4.164387894282533e-07,
      "loss": 0.0097,
      "step": 2928860
    },
    {
      "epoch": 4.793176358149552,
      "grad_norm": 0.1613924354314804,
      "learning_rate": 4.1637289721473613e-07,
      "loss": 0.0076,
      "step": 2928880
    },
    {
      "epoch": 4.793209088588205,
      "grad_norm": 0.4440184533596039,
      "learning_rate": 4.1630700500121903e-07,
      "loss": 0.0073,
      "step": 2928900
    },
    {
      "epoch": 4.793241819026859,
      "grad_norm": 0.13615916669368744,
      "learning_rate": 4.1624111278770187e-07,
      "loss": 0.0065,
      "step": 2928920
    },
    {
      "epoch": 4.793274549465512,
      "grad_norm": 0.15344758331775665,
      "learning_rate": 4.161752205741848e-07,
      "loss": 0.0124,
      "step": 2928940
    },
    {
      "epoch": 4.793307279904165,
      "grad_norm": 0.2568964660167694,
      "learning_rate": 4.161093283606676e-07,
      "loss": 0.0086,
      "step": 2928960
    },
    {
      "epoch": 4.793340010342819,
      "grad_norm": 0.3417373597621918,
      "learning_rate": 4.1604343614715057e-07,
      "loss": 0.0063,
      "step": 2928980
    },
    {
      "epoch": 4.7933727407814715,
      "grad_norm": 0.24261029064655304,
      "learning_rate": 4.159775439336334e-07,
      "loss": 0.0102,
      "step": 2929000
    },
    {
      "epoch": 4.793405471220125,
      "grad_norm": 0.39399874210357666,
      "learning_rate": 4.159116517201163e-07,
      "loss": 0.0103,
      "step": 2929020
    },
    {
      "epoch": 4.793438201658779,
      "grad_norm": 0.28914228081703186,
      "learning_rate": 4.1584575950659916e-07,
      "loss": 0.0074,
      "step": 2929040
    },
    {
      "epoch": 4.793470932097432,
      "grad_norm": 0.11392562091350555,
      "learning_rate": 4.15779867293082e-07,
      "loss": 0.006,
      "step": 2929060
    },
    {
      "epoch": 4.793503662536085,
      "grad_norm": 0.14450150728225708,
      "learning_rate": 4.157139750795649e-07,
      "loss": 0.0084,
      "step": 2929080
    },
    {
      "epoch": 4.7935363929747385,
      "grad_norm": 0.3450169861316681,
      "learning_rate": 4.1564808286604775e-07,
      "loss": 0.0068,
      "step": 2929100
    },
    {
      "epoch": 4.793569123413392,
      "grad_norm": 0.2747012972831726,
      "learning_rate": 4.1558219065253065e-07,
      "loss": 0.0092,
      "step": 2929120
    },
    {
      "epoch": 4.793601853852046,
      "grad_norm": 0.335578054189682,
      "learning_rate": 4.155162984390135e-07,
      "loss": 0.0074,
      "step": 2929140
    },
    {
      "epoch": 4.793634584290698,
      "grad_norm": 0.6357631683349609,
      "learning_rate": 4.154504062254964e-07,
      "loss": 0.0126,
      "step": 2929160
    },
    {
      "epoch": 4.793667314729352,
      "grad_norm": 0.3856200873851776,
      "learning_rate": 4.1538451401197924e-07,
      "loss": 0.0082,
      "step": 2929180
    },
    {
      "epoch": 4.793700045168006,
      "grad_norm": 0.08717940002679825,
      "learning_rate": 4.1531862179846214e-07,
      "loss": 0.0045,
      "step": 2929200
    },
    {
      "epoch": 4.793732775606658,
      "grad_norm": 0.2618257403373718,
      "learning_rate": 4.15252729584945e-07,
      "loss": 0.0128,
      "step": 2929220
    },
    {
      "epoch": 4.793765506045312,
      "grad_norm": 0.21944132447242737,
      "learning_rate": 4.151868373714279e-07,
      "loss": 0.0077,
      "step": 2929240
    },
    {
      "epoch": 4.7937982364839655,
      "grad_norm": 0.13206607103347778,
      "learning_rate": 4.151209451579107e-07,
      "loss": 0.0193,
      "step": 2929260
    },
    {
      "epoch": 4.793830966922618,
      "grad_norm": 0.10640571266412735,
      "learning_rate": 4.1505505294439357e-07,
      "loss": 0.0079,
      "step": 2929280
    },
    {
      "epoch": 4.793863697361272,
      "grad_norm": 0.23011009395122528,
      "learning_rate": 4.1498916073087647e-07,
      "loss": 0.0061,
      "step": 2929300
    },
    {
      "epoch": 4.793896427799925,
      "grad_norm": 0.2839908301830292,
      "learning_rate": 4.149232685173593e-07,
      "loss": 0.011,
      "step": 2929320
    },
    {
      "epoch": 4.793929158238579,
      "grad_norm": 0.07333464175462723,
      "learning_rate": 4.148573763038422e-07,
      "loss": 0.0074,
      "step": 2929340
    },
    {
      "epoch": 4.793961888677232,
      "grad_norm": 0.1520335078239441,
      "learning_rate": 4.1479148409032506e-07,
      "loss": 0.0094,
      "step": 2929360
    },
    {
      "epoch": 4.793994619115885,
      "grad_norm": 0.4109843075275421,
      "learning_rate": 4.1472559187680796e-07,
      "loss": 0.0108,
      "step": 2929380
    },
    {
      "epoch": 4.794027349554539,
      "grad_norm": 0.325359970331192,
      "learning_rate": 4.146596996632908e-07,
      "loss": 0.0095,
      "step": 2929400
    },
    {
      "epoch": 4.7940600799931925,
      "grad_norm": 0.5642929673194885,
      "learning_rate": 4.145938074497737e-07,
      "loss": 0.0097,
      "step": 2929420
    },
    {
      "epoch": 4.794092810431845,
      "grad_norm": 0.4406154155731201,
      "learning_rate": 4.1452791523625655e-07,
      "loss": 0.0089,
      "step": 2929440
    },
    {
      "epoch": 4.794125540870499,
      "grad_norm": 0.08624840527772903,
      "learning_rate": 4.1446202302273945e-07,
      "loss": 0.0096,
      "step": 2929460
    },
    {
      "epoch": 4.794158271309152,
      "grad_norm": 0.47759222984313965,
      "learning_rate": 4.143961308092223e-07,
      "loss": 0.0089,
      "step": 2929480
    },
    {
      "epoch": 4.794191001747805,
      "grad_norm": 0.21263262629508972,
      "learning_rate": 4.1433023859570514e-07,
      "loss": 0.0082,
      "step": 2929500
    },
    {
      "epoch": 4.794223732186459,
      "grad_norm": 0.08768674731254578,
      "learning_rate": 4.1426434638218804e-07,
      "loss": 0.0079,
      "step": 2929520
    },
    {
      "epoch": 4.794256462625112,
      "grad_norm": 0.35816752910614014,
      "learning_rate": 4.141984541686709e-07,
      "loss": 0.0099,
      "step": 2929540
    },
    {
      "epoch": 4.794289193063765,
      "grad_norm": 0.04833203926682472,
      "learning_rate": 4.141325619551538e-07,
      "loss": 0.0104,
      "step": 2929560
    },
    {
      "epoch": 4.794321923502419,
      "grad_norm": 0.34578773379325867,
      "learning_rate": 4.140666697416366e-07,
      "loss": 0.0082,
      "step": 2929580
    },
    {
      "epoch": 4.794354653941072,
      "grad_norm": 0.11639057844877243,
      "learning_rate": 4.140007775281196e-07,
      "loss": 0.0061,
      "step": 2929600
    },
    {
      "epoch": 4.794387384379726,
      "grad_norm": 0.1303928792476654,
      "learning_rate": 4.1393488531460237e-07,
      "loss": 0.0109,
      "step": 2929620
    },
    {
      "epoch": 4.7944201148183785,
      "grad_norm": 0.2146904617547989,
      "learning_rate": 4.138689931010853e-07,
      "loss": 0.0093,
      "step": 2929640
    },
    {
      "epoch": 4.794452845257032,
      "grad_norm": 0.18582335114479065,
      "learning_rate": 4.1380310088756817e-07,
      "loss": 0.0061,
      "step": 2929660
    },
    {
      "epoch": 4.794485575695686,
      "grad_norm": 0.116926409304142,
      "learning_rate": 4.1373720867405107e-07,
      "loss": 0.0083,
      "step": 2929680
    },
    {
      "epoch": 4.794518306134339,
      "grad_norm": 0.17236493527889252,
      "learning_rate": 4.136713164605339e-07,
      "loss": 0.0061,
      "step": 2929700
    },
    {
      "epoch": 4.794551036572992,
      "grad_norm": 0.11391796916723251,
      "learning_rate": 4.1360542424701676e-07,
      "loss": 0.007,
      "step": 2929720
    },
    {
      "epoch": 4.794583767011646,
      "grad_norm": 0.3403957784175873,
      "learning_rate": 4.1353953203349966e-07,
      "loss": 0.0074,
      "step": 2929740
    },
    {
      "epoch": 4.794616497450299,
      "grad_norm": 0.10538357496261597,
      "learning_rate": 4.134736398199825e-07,
      "loss": 0.0077,
      "step": 2929760
    },
    {
      "epoch": 4.794649227888952,
      "grad_norm": 0.1527577042579651,
      "learning_rate": 4.134077476064654e-07,
      "loss": 0.0098,
      "step": 2929780
    },
    {
      "epoch": 4.7946819583276055,
      "grad_norm": 0.20042642951011658,
      "learning_rate": 4.1334185539294825e-07,
      "loss": 0.0067,
      "step": 2929800
    },
    {
      "epoch": 4.794714688766259,
      "grad_norm": 0.18495486676692963,
      "learning_rate": 4.1327596317943114e-07,
      "loss": 0.0084,
      "step": 2929820
    },
    {
      "epoch": 4.794747419204912,
      "grad_norm": 0.1665346324443817,
      "learning_rate": 4.13210070965914e-07,
      "loss": 0.0122,
      "step": 2929840
    },
    {
      "epoch": 4.794780149643565,
      "grad_norm": 0.27272990345954895,
      "learning_rate": 4.131441787523969e-07,
      "loss": 0.0059,
      "step": 2929860
    },
    {
      "epoch": 4.794812880082219,
      "grad_norm": 0.22537186741828918,
      "learning_rate": 4.1307828653887973e-07,
      "loss": 0.0088,
      "step": 2929880
    },
    {
      "epoch": 4.7948456105208725,
      "grad_norm": 0.5469530820846558,
      "learning_rate": 4.1301239432536263e-07,
      "loss": 0.0092,
      "step": 2929900
    },
    {
      "epoch": 4.794878340959525,
      "grad_norm": 0.09505423903465271,
      "learning_rate": 4.129465021118455e-07,
      "loss": 0.0048,
      "step": 2929920
    },
    {
      "epoch": 4.794911071398179,
      "grad_norm": 0.31283754110336304,
      "learning_rate": 4.128806098983283e-07,
      "loss": 0.0114,
      "step": 2929940
    },
    {
      "epoch": 4.794943801836832,
      "grad_norm": 0.1884375810623169,
      "learning_rate": 4.128147176848112e-07,
      "loss": 0.0066,
      "step": 2929960
    },
    {
      "epoch": 4.794976532275486,
      "grad_norm": 0.18970803916454315,
      "learning_rate": 4.1274882547129407e-07,
      "loss": 0.006,
      "step": 2929980
    },
    {
      "epoch": 4.795009262714139,
      "grad_norm": 0.3270924389362335,
      "learning_rate": 4.1268293325777697e-07,
      "loss": 0.0062,
      "step": 2930000
    },
    {
      "epoch": 4.795041993152792,
      "grad_norm": 0.22631491720676422,
      "learning_rate": 4.126170410442598e-07,
      "loss": 0.0074,
      "step": 2930020
    },
    {
      "epoch": 4.795074723591446,
      "grad_norm": 0.09032760560512543,
      "learning_rate": 4.125511488307427e-07,
      "loss": 0.0135,
      "step": 2930040
    },
    {
      "epoch": 4.795107454030099,
      "grad_norm": 0.3318028748035431,
      "learning_rate": 4.1248525661722556e-07,
      "loss": 0.008,
      "step": 2930060
    },
    {
      "epoch": 4.795140184468752,
      "grad_norm": 0.1927068531513214,
      "learning_rate": 4.1241936440370845e-07,
      "loss": 0.0083,
      "step": 2930080
    },
    {
      "epoch": 4.795172914907406,
      "grad_norm": 0.2837267816066742,
      "learning_rate": 4.123534721901913e-07,
      "loss": 0.0129,
      "step": 2930100
    },
    {
      "epoch": 4.7952056453460585,
      "grad_norm": 0.045120202004909515,
      "learning_rate": 4.122875799766742e-07,
      "loss": 0.0057,
      "step": 2930120
    },
    {
      "epoch": 4.795238375784712,
      "grad_norm": 0.6085950136184692,
      "learning_rate": 4.1222168776315704e-07,
      "loss": 0.0113,
      "step": 2930140
    },
    {
      "epoch": 4.795271106223366,
      "grad_norm": 0.35903799533843994,
      "learning_rate": 4.121557955496399e-07,
      "loss": 0.0099,
      "step": 2930160
    },
    {
      "epoch": 4.795303836662019,
      "grad_norm": 0.215763121843338,
      "learning_rate": 4.120899033361228e-07,
      "loss": 0.0071,
      "step": 2930180
    },
    {
      "epoch": 4.795336567100672,
      "grad_norm": 0.5178500413894653,
      "learning_rate": 4.1202401112260563e-07,
      "loss": 0.0101,
      "step": 2930200
    },
    {
      "epoch": 4.795369297539326,
      "grad_norm": 0.24899227917194366,
      "learning_rate": 4.1195811890908853e-07,
      "loss": 0.009,
      "step": 2930220
    },
    {
      "epoch": 4.795402027977979,
      "grad_norm": 0.10063997656106949,
      "learning_rate": 4.118922266955714e-07,
      "loss": 0.0066,
      "step": 2930240
    },
    {
      "epoch": 4.795434758416632,
      "grad_norm": 0.21412591636180878,
      "learning_rate": 4.1182633448205433e-07,
      "loss": 0.0056,
      "step": 2930260
    },
    {
      "epoch": 4.7954674888552855,
      "grad_norm": 0.22965723276138306,
      "learning_rate": 4.117604422685371e-07,
      "loss": 0.0113,
      "step": 2930280
    },
    {
      "epoch": 4.795500219293939,
      "grad_norm": 0.22175933420658112,
      "learning_rate": 4.116945500550201e-07,
      "loss": 0.0119,
      "step": 2930300
    },
    {
      "epoch": 4.795532949732593,
      "grad_norm": 0.11470332741737366,
      "learning_rate": 4.116286578415029e-07,
      "loss": 0.011,
      "step": 2930320
    },
    {
      "epoch": 4.795565680171245,
      "grad_norm": 0.315843403339386,
      "learning_rate": 4.115627656279858e-07,
      "loss": 0.0066,
      "step": 2930340
    },
    {
      "epoch": 4.795598410609899,
      "grad_norm": 0.1722717136144638,
      "learning_rate": 4.1149687341446866e-07,
      "loss": 0.013,
      "step": 2930360
    },
    {
      "epoch": 4.795631141048553,
      "grad_norm": 0.3710172474384308,
      "learning_rate": 4.114309812009515e-07,
      "loss": 0.0065,
      "step": 2930380
    },
    {
      "epoch": 4.795663871487205,
      "grad_norm": 0.7745072841644287,
      "learning_rate": 4.113650889874344e-07,
      "loss": 0.0088,
      "step": 2930400
    },
    {
      "epoch": 4.795696601925859,
      "grad_norm": 0.061166681349277496,
      "learning_rate": 4.1129919677391725e-07,
      "loss": 0.0097,
      "step": 2930420
    },
    {
      "epoch": 4.7957293323645125,
      "grad_norm": 0.1209743469953537,
      "learning_rate": 4.1123330456040015e-07,
      "loss": 0.0054,
      "step": 2930440
    },
    {
      "epoch": 4.795762062803165,
      "grad_norm": 0.17431014776229858,
      "learning_rate": 4.11167412346883e-07,
      "loss": 0.0114,
      "step": 2930460
    },
    {
      "epoch": 4.795794793241819,
      "grad_norm": 0.35216808319091797,
      "learning_rate": 4.111015201333659e-07,
      "loss": 0.0068,
      "step": 2930480
    },
    {
      "epoch": 4.795827523680472,
      "grad_norm": 0.14406150579452515,
      "learning_rate": 4.1103562791984874e-07,
      "loss": 0.0089,
      "step": 2930500
    },
    {
      "epoch": 4.795860254119126,
      "grad_norm": 0.5119308829307556,
      "learning_rate": 4.1096973570633164e-07,
      "loss": 0.0089,
      "step": 2930520
    },
    {
      "epoch": 4.795892984557779,
      "grad_norm": 0.2671065628528595,
      "learning_rate": 4.109038434928145e-07,
      "loss": 0.0105,
      "step": 2930540
    },
    {
      "epoch": 4.795925714996432,
      "grad_norm": 0.1473907083272934,
      "learning_rate": 4.108379512792974e-07,
      "loss": 0.0115,
      "step": 2930560
    },
    {
      "epoch": 4.795958445435086,
      "grad_norm": 0.294842928647995,
      "learning_rate": 4.1077205906578023e-07,
      "loss": 0.0058,
      "step": 2930580
    },
    {
      "epoch": 4.7959911758737395,
      "grad_norm": 0.06495724618434906,
      "learning_rate": 4.107061668522631e-07,
      "loss": 0.0064,
      "step": 2930600
    },
    {
      "epoch": 4.796023906312392,
      "grad_norm": 0.21410126984119415,
      "learning_rate": 4.10640274638746e-07,
      "loss": 0.0058,
      "step": 2930620
    },
    {
      "epoch": 4.796056636751046,
      "grad_norm": 0.1314733773469925,
      "learning_rate": 4.105743824252288e-07,
      "loss": 0.0117,
      "step": 2930640
    },
    {
      "epoch": 4.796089367189699,
      "grad_norm": 0.415411114692688,
      "learning_rate": 4.105084902117117e-07,
      "loss": 0.0081,
      "step": 2930660
    },
    {
      "epoch": 4.796122097628352,
      "grad_norm": 0.1458994597196579,
      "learning_rate": 4.1044259799819456e-07,
      "loss": 0.0097,
      "step": 2930680
    },
    {
      "epoch": 4.796154828067006,
      "grad_norm": 0.34695807099342346,
      "learning_rate": 4.1037670578467746e-07,
      "loss": 0.0078,
      "step": 2930700
    },
    {
      "epoch": 4.796187558505659,
      "grad_norm": 0.08909265697002411,
      "learning_rate": 4.103108135711603e-07,
      "loss": 0.0078,
      "step": 2930720
    },
    {
      "epoch": 4.796220288944312,
      "grad_norm": 0.21646945178508759,
      "learning_rate": 4.102449213576432e-07,
      "loss": 0.0131,
      "step": 2930740
    },
    {
      "epoch": 4.7962530193829656,
      "grad_norm": 0.08883952349424362,
      "learning_rate": 4.1017902914412605e-07,
      "loss": 0.0099,
      "step": 2930760
    },
    {
      "epoch": 4.796285749821619,
      "grad_norm": 0.14136387407779694,
      "learning_rate": 4.1011313693060895e-07,
      "loss": 0.0086,
      "step": 2930780
    },
    {
      "epoch": 4.796318480260273,
      "grad_norm": 0.06707955151796341,
      "learning_rate": 4.100472447170918e-07,
      "loss": 0.009,
      "step": 2930800
    },
    {
      "epoch": 4.7963512106989254,
      "grad_norm": 1.9242117404937744,
      "learning_rate": 4.0998135250357464e-07,
      "loss": 0.0108,
      "step": 2930820
    },
    {
      "epoch": 4.796383941137579,
      "grad_norm": 0.06906045973300934,
      "learning_rate": 4.0991546029005754e-07,
      "loss": 0.0078,
      "step": 2930840
    },
    {
      "epoch": 4.796416671576233,
      "grad_norm": 0.1997065246105194,
      "learning_rate": 4.098495680765404e-07,
      "loss": 0.0063,
      "step": 2930860
    },
    {
      "epoch": 4.796449402014886,
      "grad_norm": 0.7779517769813538,
      "learning_rate": 4.097836758630233e-07,
      "loss": 0.0121,
      "step": 2930880
    },
    {
      "epoch": 4.796482132453539,
      "grad_norm": 0.610027015209198,
      "learning_rate": 4.0971778364950613e-07,
      "loss": 0.0095,
      "step": 2930900
    },
    {
      "epoch": 4.7965148628921925,
      "grad_norm": 0.21736259758472443,
      "learning_rate": 4.096518914359891e-07,
      "loss": 0.0084,
      "step": 2930920
    },
    {
      "epoch": 4.796547593330846,
      "grad_norm": 0.11846853047609329,
      "learning_rate": 4.095859992224719e-07,
      "loss": 0.0111,
      "step": 2930940
    },
    {
      "epoch": 4.796580323769499,
      "grad_norm": 0.14386139810085297,
      "learning_rate": 4.0952010700895483e-07,
      "loss": 0.0105,
      "step": 2930960
    },
    {
      "epoch": 4.796613054208152,
      "grad_norm": 0.3693894147872925,
      "learning_rate": 4.0945421479543767e-07,
      "loss": 0.0064,
      "step": 2930980
    },
    {
      "epoch": 4.796645784646806,
      "grad_norm": 0.09436965733766556,
      "learning_rate": 4.0938832258192057e-07,
      "loss": 0.0084,
      "step": 2931000
    },
    {
      "epoch": 4.796678515085459,
      "grad_norm": 0.22285884618759155,
      "learning_rate": 4.093224303684034e-07,
      "loss": 0.0116,
      "step": 2931020
    },
    {
      "epoch": 4.796711245524112,
      "grad_norm": 0.050936631858348846,
      "learning_rate": 4.0925653815488626e-07,
      "loss": 0.0063,
      "step": 2931040
    },
    {
      "epoch": 4.796743975962766,
      "grad_norm": 0.21021723747253418,
      "learning_rate": 4.0919064594136916e-07,
      "loss": 0.0085,
      "step": 2931060
    },
    {
      "epoch": 4.7967767064014195,
      "grad_norm": 0.11975719779729843,
      "learning_rate": 4.09124753727852e-07,
      "loss": 0.0083,
      "step": 2931080
    },
    {
      "epoch": 4.796809436840072,
      "grad_norm": 0.29420098662376404,
      "learning_rate": 4.090588615143349e-07,
      "loss": 0.01,
      "step": 2931100
    },
    {
      "epoch": 4.796842167278726,
      "grad_norm": 0.2928578853607178,
      "learning_rate": 4.0899296930081775e-07,
      "loss": 0.0112,
      "step": 2931120
    },
    {
      "epoch": 4.796874897717379,
      "grad_norm": 0.553466260433197,
      "learning_rate": 4.0892707708730065e-07,
      "loss": 0.0102,
      "step": 2931140
    },
    {
      "epoch": 4.796907628156033,
      "grad_norm": 0.43091267347335815,
      "learning_rate": 4.088611848737835e-07,
      "loss": 0.0101,
      "step": 2931160
    },
    {
      "epoch": 4.796940358594686,
      "grad_norm": 0.1079161986708641,
      "learning_rate": 4.087952926602664e-07,
      "loss": 0.01,
      "step": 2931180
    },
    {
      "epoch": 4.796973089033339,
      "grad_norm": 0.14744046330451965,
      "learning_rate": 4.0872940044674924e-07,
      "loss": 0.0106,
      "step": 2931200
    },
    {
      "epoch": 4.797005819471993,
      "grad_norm": 0.25354230403900146,
      "learning_rate": 4.0866350823323214e-07,
      "loss": 0.0068,
      "step": 2931220
    },
    {
      "epoch": 4.797038549910646,
      "grad_norm": 0.6391776204109192,
      "learning_rate": 4.08597616019715e-07,
      "loss": 0.0105,
      "step": 2931240
    },
    {
      "epoch": 4.797071280349299,
      "grad_norm": 0.14830364286899567,
      "learning_rate": 4.0853172380619783e-07,
      "loss": 0.0101,
      "step": 2931260
    },
    {
      "epoch": 4.797104010787953,
      "grad_norm": 0.5299868583679199,
      "learning_rate": 4.0846583159268073e-07,
      "loss": 0.0072,
      "step": 2931280
    },
    {
      "epoch": 4.7971367412266055,
      "grad_norm": 0.32980164885520935,
      "learning_rate": 4.0839993937916357e-07,
      "loss": 0.0064,
      "step": 2931300
    },
    {
      "epoch": 4.797169471665259,
      "grad_norm": 0.22293365001678467,
      "learning_rate": 4.0833404716564647e-07,
      "loss": 0.0099,
      "step": 2931320
    },
    {
      "epoch": 4.797202202103913,
      "grad_norm": 0.6897536516189575,
      "learning_rate": 4.082681549521293e-07,
      "loss": 0.0135,
      "step": 2931340
    },
    {
      "epoch": 4.797234932542566,
      "grad_norm": 0.15547387301921844,
      "learning_rate": 4.082022627386122e-07,
      "loss": 0.0095,
      "step": 2931360
    },
    {
      "epoch": 4.797267662981219,
      "grad_norm": 0.2260289490222931,
      "learning_rate": 4.0813637052509506e-07,
      "loss": 0.0077,
      "step": 2931380
    },
    {
      "epoch": 4.797300393419873,
      "grad_norm": 0.2992944121360779,
      "learning_rate": 4.0807047831157796e-07,
      "loss": 0.0102,
      "step": 2931400
    },
    {
      "epoch": 4.797333123858526,
      "grad_norm": 0.08181983232498169,
      "learning_rate": 4.080045860980608e-07,
      "loss": 0.0088,
      "step": 2931420
    },
    {
      "epoch": 4.79736585429718,
      "grad_norm": 0.9128310680389404,
      "learning_rate": 4.079386938845437e-07,
      "loss": 0.0119,
      "step": 2931440
    },
    {
      "epoch": 4.7973985847358325,
      "grad_norm": 0.1762111783027649,
      "learning_rate": 4.0787280167102655e-07,
      "loss": 0.007,
      "step": 2931460
    },
    {
      "epoch": 4.797431315174486,
      "grad_norm": 1.1680982112884521,
      "learning_rate": 4.078069094575094e-07,
      "loss": 0.0114,
      "step": 2931480
    },
    {
      "epoch": 4.79746404561314,
      "grad_norm": 0.20057742297649384,
      "learning_rate": 4.077410172439923e-07,
      "loss": 0.0082,
      "step": 2931500
    },
    {
      "epoch": 4.797496776051792,
      "grad_norm": 0.5559405088424683,
      "learning_rate": 4.0767512503047514e-07,
      "loss": 0.0068,
      "step": 2931520
    },
    {
      "epoch": 4.797529506490446,
      "grad_norm": 0.6552597880363464,
      "learning_rate": 4.0760923281695804e-07,
      "loss": 0.0107,
      "step": 2931540
    },
    {
      "epoch": 4.7975622369290996,
      "grad_norm": 0.16915805637836456,
      "learning_rate": 4.075433406034409e-07,
      "loss": 0.0097,
      "step": 2931560
    },
    {
      "epoch": 4.797594967367752,
      "grad_norm": 0.29398584365844727,
      "learning_rate": 4.0747744838992384e-07,
      "loss": 0.0094,
      "step": 2931580
    },
    {
      "epoch": 4.797627697806406,
      "grad_norm": 0.3295447826385498,
      "learning_rate": 4.0741155617640663e-07,
      "loss": 0.0133,
      "step": 2931600
    },
    {
      "epoch": 4.7976604282450594,
      "grad_norm": 0.2224087119102478,
      "learning_rate": 4.073456639628896e-07,
      "loss": 0.0088,
      "step": 2931620
    },
    {
      "epoch": 4.797693158683713,
      "grad_norm": 0.25849834084510803,
      "learning_rate": 4.072797717493724e-07,
      "loss": 0.0065,
      "step": 2931640
    },
    {
      "epoch": 4.797725889122366,
      "grad_norm": 0.40891629457473755,
      "learning_rate": 4.072138795358553e-07,
      "loss": 0.01,
      "step": 2931660
    },
    {
      "epoch": 4.797758619561019,
      "grad_norm": 0.6350089907646179,
      "learning_rate": 4.0714798732233817e-07,
      "loss": 0.0117,
      "step": 2931680
    },
    {
      "epoch": 4.797791349999673,
      "grad_norm": 0.5466383099555969,
      "learning_rate": 4.07082095108821e-07,
      "loss": 0.0095,
      "step": 2931700
    },
    {
      "epoch": 4.7978240804383265,
      "grad_norm": 0.16527342796325684,
      "learning_rate": 4.070162028953039e-07,
      "loss": 0.0084,
      "step": 2931720
    },
    {
      "epoch": 4.797856810876979,
      "grad_norm": 0.17854705452919006,
      "learning_rate": 4.0695031068178676e-07,
      "loss": 0.0112,
      "step": 2931740
    },
    {
      "epoch": 4.797889541315633,
      "grad_norm": 0.48097023367881775,
      "learning_rate": 4.0688441846826966e-07,
      "loss": 0.0068,
      "step": 2931760
    },
    {
      "epoch": 4.797922271754286,
      "grad_norm": 0.9184167981147766,
      "learning_rate": 4.068185262547525e-07,
      "loss": 0.0104,
      "step": 2931780
    },
    {
      "epoch": 4.797955002192939,
      "grad_norm": 0.289608895778656,
      "learning_rate": 4.067526340412354e-07,
      "loss": 0.009,
      "step": 2931800
    },
    {
      "epoch": 4.797987732631593,
      "grad_norm": 0.2456321269273758,
      "learning_rate": 4.0668674182771825e-07,
      "loss": 0.0079,
      "step": 2931820
    },
    {
      "epoch": 4.798020463070246,
      "grad_norm": 0.21903349459171295,
      "learning_rate": 4.0662084961420115e-07,
      "loss": 0.0107,
      "step": 2931840
    },
    {
      "epoch": 4.798053193508899,
      "grad_norm": 0.2152811735868454,
      "learning_rate": 4.06554957400684e-07,
      "loss": 0.012,
      "step": 2931860
    },
    {
      "epoch": 4.798085923947553,
      "grad_norm": 0.3382057845592499,
      "learning_rate": 4.064890651871669e-07,
      "loss": 0.0089,
      "step": 2931880
    },
    {
      "epoch": 4.798118654386206,
      "grad_norm": 0.2736210823059082,
      "learning_rate": 4.0642317297364974e-07,
      "loss": 0.0102,
      "step": 2931900
    },
    {
      "epoch": 4.79815138482486,
      "grad_norm": 0.11760559678077698,
      "learning_rate": 4.063572807601326e-07,
      "loss": 0.009,
      "step": 2931920
    },
    {
      "epoch": 4.7981841152635125,
      "grad_norm": 0.4307207465171814,
      "learning_rate": 4.062913885466155e-07,
      "loss": 0.0053,
      "step": 2931940
    },
    {
      "epoch": 4.798216845702166,
      "grad_norm": 0.3178778886795044,
      "learning_rate": 4.062254963330983e-07,
      "loss": 0.0064,
      "step": 2931960
    },
    {
      "epoch": 4.79824957614082,
      "grad_norm": 0.10966890305280685,
      "learning_rate": 4.061596041195812e-07,
      "loss": 0.0075,
      "step": 2931980
    },
    {
      "epoch": 4.798282306579472,
      "grad_norm": 0.3739388585090637,
      "learning_rate": 4.0609371190606407e-07,
      "loss": 0.0076,
      "step": 2932000
    },
    {
      "epoch": 4.798315037018126,
      "grad_norm": 0.11995867639780045,
      "learning_rate": 4.0602781969254697e-07,
      "loss": 0.0085,
      "step": 2932020
    },
    {
      "epoch": 4.79834776745678,
      "grad_norm": 0.18882526457309723,
      "learning_rate": 4.059619274790298e-07,
      "loss": 0.0076,
      "step": 2932040
    },
    {
      "epoch": 4.798380497895433,
      "grad_norm": 0.21661856770515442,
      "learning_rate": 4.058960352655127e-07,
      "loss": 0.0056,
      "step": 2932060
    },
    {
      "epoch": 4.798413228334086,
      "grad_norm": 0.1726808398962021,
      "learning_rate": 4.0583014305199556e-07,
      "loss": 0.0091,
      "step": 2932080
    },
    {
      "epoch": 4.7984459587727395,
      "grad_norm": 0.16012558341026306,
      "learning_rate": 4.0576425083847846e-07,
      "loss": 0.0083,
      "step": 2932100
    },
    {
      "epoch": 4.798478689211393,
      "grad_norm": 0.17008168995380402,
      "learning_rate": 4.056983586249613e-07,
      "loss": 0.0088,
      "step": 2932120
    },
    {
      "epoch": 4.798511419650046,
      "grad_norm": 0.08984890580177307,
      "learning_rate": 4.0563246641144415e-07,
      "loss": 0.0078,
      "step": 2932140
    },
    {
      "epoch": 4.798544150088699,
      "grad_norm": 0.05208098515868187,
      "learning_rate": 4.0556657419792705e-07,
      "loss": 0.01,
      "step": 2932160
    },
    {
      "epoch": 4.798576880527353,
      "grad_norm": 0.19288209080696106,
      "learning_rate": 4.055006819844099e-07,
      "loss": 0.0102,
      "step": 2932180
    },
    {
      "epoch": 4.798609610966006,
      "grad_norm": 0.10790230333805084,
      "learning_rate": 4.054347897708928e-07,
      "loss": 0.0085,
      "step": 2932200
    },
    {
      "epoch": 4.798642341404659,
      "grad_norm": 0.4284634292125702,
      "learning_rate": 4.0536889755737564e-07,
      "loss": 0.007,
      "step": 2932220
    },
    {
      "epoch": 4.798675071843313,
      "grad_norm": 0.27560049295425415,
      "learning_rate": 4.053030053438586e-07,
      "loss": 0.0073,
      "step": 2932240
    },
    {
      "epoch": 4.7987078022819665,
      "grad_norm": 0.08271890133619308,
      "learning_rate": 4.052371131303414e-07,
      "loss": 0.0132,
      "step": 2932260
    },
    {
      "epoch": 4.798740532720619,
      "grad_norm": 0.10615359246730804,
      "learning_rate": 4.0517122091682433e-07,
      "loss": 0.0069,
      "step": 2932280
    },
    {
      "epoch": 4.798773263159273,
      "grad_norm": 0.09659673273563385,
      "learning_rate": 4.051053287033072e-07,
      "loss": 0.0102,
      "step": 2932300
    },
    {
      "epoch": 4.798805993597926,
      "grad_norm": 0.14746195077896118,
      "learning_rate": 4.050394364897901e-07,
      "loss": 0.0118,
      "step": 2932320
    },
    {
      "epoch": 4.79883872403658,
      "grad_norm": 0.16973043978214264,
      "learning_rate": 4.049735442762729e-07,
      "loss": 0.0088,
      "step": 2932340
    },
    {
      "epoch": 4.798871454475233,
      "grad_norm": 0.23651933670043945,
      "learning_rate": 4.0490765206275577e-07,
      "loss": 0.0102,
      "step": 2932360
    },
    {
      "epoch": 4.798904184913886,
      "grad_norm": 0.07906977087259293,
      "learning_rate": 4.0484175984923867e-07,
      "loss": 0.0095,
      "step": 2932380
    },
    {
      "epoch": 4.79893691535254,
      "grad_norm": 0.09059014916419983,
      "learning_rate": 4.047758676357215e-07,
      "loss": 0.0084,
      "step": 2932400
    },
    {
      "epoch": 4.798969645791193,
      "grad_norm": 0.32300177216529846,
      "learning_rate": 4.047099754222044e-07,
      "loss": 0.0059,
      "step": 2932420
    },
    {
      "epoch": 4.799002376229846,
      "grad_norm": 0.3770592212677002,
      "learning_rate": 4.0464408320868726e-07,
      "loss": 0.0108,
      "step": 2932440
    },
    {
      "epoch": 4.7990351066685,
      "grad_norm": 0.39566588401794434,
      "learning_rate": 4.0457819099517015e-07,
      "loss": 0.0099,
      "step": 2932460
    },
    {
      "epoch": 4.7990678371071525,
      "grad_norm": 0.48079347610473633,
      "learning_rate": 4.04512298781653e-07,
      "loss": 0.0056,
      "step": 2932480
    },
    {
      "epoch": 4.799100567545806,
      "grad_norm": 0.273805171251297,
      "learning_rate": 4.044464065681359e-07,
      "loss": 0.012,
      "step": 2932500
    },
    {
      "epoch": 4.79913329798446,
      "grad_norm": 0.7898498773574829,
      "learning_rate": 4.0438051435461874e-07,
      "loss": 0.0094,
      "step": 2932520
    },
    {
      "epoch": 4.799166028423113,
      "grad_norm": 0.3677093982696533,
      "learning_rate": 4.0431462214110164e-07,
      "loss": 0.0083,
      "step": 2932540
    },
    {
      "epoch": 4.799198758861766,
      "grad_norm": 0.13284778594970703,
      "learning_rate": 4.042487299275845e-07,
      "loss": 0.0086,
      "step": 2932560
    },
    {
      "epoch": 4.7992314893004195,
      "grad_norm": 0.09561720490455627,
      "learning_rate": 4.0418283771406733e-07,
      "loss": 0.0125,
      "step": 2932580
    },
    {
      "epoch": 4.799264219739073,
      "grad_norm": 0.07997897267341614,
      "learning_rate": 4.0411694550055023e-07,
      "loss": 0.0096,
      "step": 2932600
    },
    {
      "epoch": 4.799296950177727,
      "grad_norm": 0.32688087224960327,
      "learning_rate": 4.040510532870331e-07,
      "loss": 0.011,
      "step": 2932620
    },
    {
      "epoch": 4.799329680616379,
      "grad_norm": 0.160883367061615,
      "learning_rate": 4.03985161073516e-07,
      "loss": 0.0064,
      "step": 2932640
    },
    {
      "epoch": 4.799362411055033,
      "grad_norm": 0.38055020570755005,
      "learning_rate": 4.039192688599988e-07,
      "loss": 0.0123,
      "step": 2932660
    },
    {
      "epoch": 4.799395141493687,
      "grad_norm": 0.6673622131347656,
      "learning_rate": 4.038533766464817e-07,
      "loss": 0.007,
      "step": 2932680
    },
    {
      "epoch": 4.799427871932339,
      "grad_norm": 0.2817523181438446,
      "learning_rate": 4.0378748443296457e-07,
      "loss": 0.0106,
      "step": 2932700
    },
    {
      "epoch": 4.799460602370993,
      "grad_norm": 0.2091946005821228,
      "learning_rate": 4.0372159221944747e-07,
      "loss": 0.0103,
      "step": 2932720
    },
    {
      "epoch": 4.7994933328096465,
      "grad_norm": 0.3601156175136566,
      "learning_rate": 4.036557000059303e-07,
      "loss": 0.011,
      "step": 2932740
    },
    {
      "epoch": 4.799526063248299,
      "grad_norm": 0.12078798562288284,
      "learning_rate": 4.035898077924132e-07,
      "loss": 0.0109,
      "step": 2932760
    },
    {
      "epoch": 4.799558793686953,
      "grad_norm": 0.2801172733306885,
      "learning_rate": 4.0352391557889606e-07,
      "loss": 0.0069,
      "step": 2932780
    },
    {
      "epoch": 4.799591524125606,
      "grad_norm": 0.1730499565601349,
      "learning_rate": 4.034580233653789e-07,
      "loss": 0.0126,
      "step": 2932800
    },
    {
      "epoch": 4.79962425456426,
      "grad_norm": 0.07602741569280624,
      "learning_rate": 4.033921311518618e-07,
      "loss": 0.0048,
      "step": 2932820
    },
    {
      "epoch": 4.799656985002913,
      "grad_norm": 0.11878054589033127,
      "learning_rate": 4.0332623893834465e-07,
      "loss": 0.0103,
      "step": 2932840
    },
    {
      "epoch": 4.799689715441566,
      "grad_norm": 0.25130435824394226,
      "learning_rate": 4.0326034672482754e-07,
      "loss": 0.0104,
      "step": 2932860
    },
    {
      "epoch": 4.79972244588022,
      "grad_norm": 0.2532009482383728,
      "learning_rate": 4.031944545113104e-07,
      "loss": 0.011,
      "step": 2932880
    },
    {
      "epoch": 4.7997551763188735,
      "grad_norm": 0.03933492302894592,
      "learning_rate": 4.0312856229779334e-07,
      "loss": 0.0067,
      "step": 2932900
    },
    {
      "epoch": 4.799787906757526,
      "grad_norm": 0.18029828369617462,
      "learning_rate": 4.0306267008427613e-07,
      "loss": 0.0093,
      "step": 2932920
    },
    {
      "epoch": 4.79982063719618,
      "grad_norm": 0.16963590681552887,
      "learning_rate": 4.029967778707591e-07,
      "loss": 0.0094,
      "step": 2932940
    },
    {
      "epoch": 4.799853367634833,
      "grad_norm": 0.16106732189655304,
      "learning_rate": 4.0293088565724193e-07,
      "loss": 0.0117,
      "step": 2932960
    },
    {
      "epoch": 4.799886098073486,
      "grad_norm": 0.23087982833385468,
      "learning_rate": 4.0286499344372483e-07,
      "loss": 0.0084,
      "step": 2932980
    },
    {
      "epoch": 4.79991882851214,
      "grad_norm": 0.1241309866309166,
      "learning_rate": 4.027991012302077e-07,
      "loss": 0.0124,
      "step": 2933000
    },
    {
      "epoch": 4.799951558950793,
      "grad_norm": 0.12461204081773758,
      "learning_rate": 4.027332090166905e-07,
      "loss": 0.0064,
      "step": 2933020
    },
    {
      "epoch": 4.799984289389446,
      "grad_norm": 0.30391234159469604,
      "learning_rate": 4.026673168031734e-07,
      "loss": 0.0068,
      "step": 2933040
    },
    {
      "epoch": 4.8000170198281,
      "grad_norm": 0.05729534104466438,
      "learning_rate": 4.0260142458965626e-07,
      "loss": 0.0088,
      "step": 2933060
    },
    {
      "epoch": 4.800049750266753,
      "grad_norm": 0.09511849284172058,
      "learning_rate": 4.0253553237613916e-07,
      "loss": 0.0075,
      "step": 2933080
    },
    {
      "epoch": 4.800082480705407,
      "grad_norm": 0.42041051387786865,
      "learning_rate": 4.02469640162622e-07,
      "loss": 0.0068,
      "step": 2933100
    },
    {
      "epoch": 4.8001152111440595,
      "grad_norm": 0.3957888185977936,
      "learning_rate": 4.024037479491049e-07,
      "loss": 0.0077,
      "step": 2933120
    },
    {
      "epoch": 4.800147941582713,
      "grad_norm": 0.6462854743003845,
      "learning_rate": 4.0233785573558775e-07,
      "loss": 0.0104,
      "step": 2933140
    },
    {
      "epoch": 4.800180672021367,
      "grad_norm": 0.2902851402759552,
      "learning_rate": 4.0227196352207065e-07,
      "loss": 0.0094,
      "step": 2933160
    },
    {
      "epoch": 4.80021340246002,
      "grad_norm": 0.13359816372394562,
      "learning_rate": 4.022060713085535e-07,
      "loss": 0.009,
      "step": 2933180
    },
    {
      "epoch": 4.800246132898673,
      "grad_norm": 0.20994789898395538,
      "learning_rate": 4.021401790950364e-07,
      "loss": 0.0106,
      "step": 2933200
    },
    {
      "epoch": 4.800278863337327,
      "grad_norm": 0.1482890099287033,
      "learning_rate": 4.0207428688151924e-07,
      "loss": 0.0104,
      "step": 2933220
    },
    {
      "epoch": 4.80031159377598,
      "grad_norm": 0.07009938359260559,
      "learning_rate": 4.020083946680021e-07,
      "loss": 0.0112,
      "step": 2933240
    },
    {
      "epoch": 4.800344324214633,
      "grad_norm": 0.3183797299861908,
      "learning_rate": 4.01942502454485e-07,
      "loss": 0.008,
      "step": 2933260
    },
    {
      "epoch": 4.8003770546532865,
      "grad_norm": 0.23330193758010864,
      "learning_rate": 4.0187661024096783e-07,
      "loss": 0.0067,
      "step": 2933280
    },
    {
      "epoch": 4.80040978509194,
      "grad_norm": 0.25636667013168335,
      "learning_rate": 4.0181071802745073e-07,
      "loss": 0.0102,
      "step": 2933300
    },
    {
      "epoch": 4.800442515530593,
      "grad_norm": 0.14201141893863678,
      "learning_rate": 4.017448258139336e-07,
      "loss": 0.01,
      "step": 2933320
    },
    {
      "epoch": 4.800475245969246,
      "grad_norm": 0.08469970524311066,
      "learning_rate": 4.016789336004165e-07,
      "loss": 0.0086,
      "step": 2933340
    },
    {
      "epoch": 4.8005079764079,
      "grad_norm": 0.05074632912874222,
      "learning_rate": 4.016130413868993e-07,
      "loss": 0.0096,
      "step": 2933360
    },
    {
      "epoch": 4.8005407068465535,
      "grad_norm": 0.2796543538570404,
      "learning_rate": 4.015471491733822e-07,
      "loss": 0.0052,
      "step": 2933380
    },
    {
      "epoch": 4.800573437285206,
      "grad_norm": 0.4206191897392273,
      "learning_rate": 4.0148125695986506e-07,
      "loss": 0.0097,
      "step": 2933400
    },
    {
      "epoch": 4.80060616772386,
      "grad_norm": 0.13740138709545135,
      "learning_rate": 4.0141536474634796e-07,
      "loss": 0.0094,
      "step": 2933420
    },
    {
      "epoch": 4.800638898162513,
      "grad_norm": 0.22863329946994781,
      "learning_rate": 4.013494725328308e-07,
      "loss": 0.0081,
      "step": 2933440
    },
    {
      "epoch": 4.800671628601166,
      "grad_norm": 0.05735132470726967,
      "learning_rate": 4.0128358031931376e-07,
      "loss": 0.0069,
      "step": 2933460
    },
    {
      "epoch": 4.80070435903982,
      "grad_norm": 0.2195909321308136,
      "learning_rate": 4.0121768810579655e-07,
      "loss": 0.0082,
      "step": 2933480
    },
    {
      "epoch": 4.800737089478473,
      "grad_norm": 0.31239914894104004,
      "learning_rate": 4.011517958922794e-07,
      "loss": 0.0133,
      "step": 2933500
    },
    {
      "epoch": 4.800769819917127,
      "grad_norm": 0.21792106330394745,
      "learning_rate": 4.010859036787623e-07,
      "loss": 0.0093,
      "step": 2933520
    },
    {
      "epoch": 4.80080255035578,
      "grad_norm": 0.33471643924713135,
      "learning_rate": 4.0102001146524514e-07,
      "loss": 0.0074,
      "step": 2933540
    },
    {
      "epoch": 4.800835280794433,
      "grad_norm": 0.13361623883247375,
      "learning_rate": 4.009541192517281e-07,
      "loss": 0.0047,
      "step": 2933560
    },
    {
      "epoch": 4.800868011233087,
      "grad_norm": 0.35619908571243286,
      "learning_rate": 4.008882270382109e-07,
      "loss": 0.0094,
      "step": 2933580
    },
    {
      "epoch": 4.8009007416717395,
      "grad_norm": 0.35761770606040955,
      "learning_rate": 4.0082233482469384e-07,
      "loss": 0.0128,
      "step": 2933600
    },
    {
      "epoch": 4.800933472110393,
      "grad_norm": 0.5988785624504089,
      "learning_rate": 4.007564426111767e-07,
      "loss": 0.0103,
      "step": 2933620
    },
    {
      "epoch": 4.800966202549047,
      "grad_norm": 0.056824300438165665,
      "learning_rate": 4.006905503976596e-07,
      "loss": 0.0059,
      "step": 2933640
    },
    {
      "epoch": 4.800998932987699,
      "grad_norm": 0.21530681848526,
      "learning_rate": 4.0062465818414243e-07,
      "loss": 0.0139,
      "step": 2933660
    },
    {
      "epoch": 4.801031663426353,
      "grad_norm": 0.22023209929466248,
      "learning_rate": 4.005587659706253e-07,
      "loss": 0.009,
      "step": 2933680
    },
    {
      "epoch": 4.801064393865007,
      "grad_norm": 0.12102140486240387,
      "learning_rate": 4.0049287375710817e-07,
      "loss": 0.0068,
      "step": 2933700
    },
    {
      "epoch": 4.80109712430366,
      "grad_norm": 0.5772862434387207,
      "learning_rate": 4.00426981543591e-07,
      "loss": 0.0076,
      "step": 2933720
    },
    {
      "epoch": 4.801129854742313,
      "grad_norm": 0.1620953381061554,
      "learning_rate": 4.003610893300739e-07,
      "loss": 0.0079,
      "step": 2933740
    },
    {
      "epoch": 4.8011625851809665,
      "grad_norm": 0.19655980169773102,
      "learning_rate": 4.0029519711655676e-07,
      "loss": 0.0093,
      "step": 2933760
    },
    {
      "epoch": 4.80119531561962,
      "grad_norm": 0.2255883365869522,
      "learning_rate": 4.0022930490303966e-07,
      "loss": 0.014,
      "step": 2933780
    },
    {
      "epoch": 4.801228046058274,
      "grad_norm": 0.1708987057209015,
      "learning_rate": 4.001634126895225e-07,
      "loss": 0.0086,
      "step": 2933800
    },
    {
      "epoch": 4.801260776496926,
      "grad_norm": 0.11528122425079346,
      "learning_rate": 4.000975204760054e-07,
      "loss": 0.0083,
      "step": 2933820
    },
    {
      "epoch": 4.80129350693558,
      "grad_norm": 0.35859763622283936,
      "learning_rate": 4.0003162826248825e-07,
      "loss": 0.0103,
      "step": 2933840
    },
    {
      "epoch": 4.801326237374234,
      "grad_norm": 0.10322003066539764,
      "learning_rate": 3.9996573604897115e-07,
      "loss": 0.007,
      "step": 2933860
    },
    {
      "epoch": 4.801358967812886,
      "grad_norm": 0.5068353414535522,
      "learning_rate": 3.99899843835454e-07,
      "loss": 0.0099,
      "step": 2933880
    },
    {
      "epoch": 4.80139169825154,
      "grad_norm": 0.041990406811237335,
      "learning_rate": 3.998339516219369e-07,
      "loss": 0.0077,
      "step": 2933900
    },
    {
      "epoch": 4.8014244286901935,
      "grad_norm": 0.20292000472545624,
      "learning_rate": 3.9976805940841974e-07,
      "loss": 0.0093,
      "step": 2933920
    },
    {
      "epoch": 4.801457159128846,
      "grad_norm": 0.3075336515903473,
      "learning_rate": 3.997021671949026e-07,
      "loss": 0.0084,
      "step": 2933940
    },
    {
      "epoch": 4.8014898895675,
      "grad_norm": 0.10230718553066254,
      "learning_rate": 3.996362749813855e-07,
      "loss": 0.0095,
      "step": 2933960
    },
    {
      "epoch": 4.801522620006153,
      "grad_norm": 0.1148032695055008,
      "learning_rate": 3.9957038276786833e-07,
      "loss": 0.0065,
      "step": 2933980
    },
    {
      "epoch": 4.801555350444807,
      "grad_norm": 0.17225076258182526,
      "learning_rate": 3.9950449055435123e-07,
      "loss": 0.0075,
      "step": 2934000
    },
    {
      "epoch": 4.80158808088346,
      "grad_norm": 0.35082557797431946,
      "learning_rate": 3.9943859834083407e-07,
      "loss": 0.0067,
      "step": 2934020
    },
    {
      "epoch": 4.801620811322113,
      "grad_norm": 0.11846055090427399,
      "learning_rate": 3.9937270612731697e-07,
      "loss": 0.0087,
      "step": 2934040
    },
    {
      "epoch": 4.801653541760767,
      "grad_norm": 0.6281827092170715,
      "learning_rate": 3.993068139137998e-07,
      "loss": 0.0093,
      "step": 2934060
    },
    {
      "epoch": 4.8016862721994205,
      "grad_norm": 0.13455186784267426,
      "learning_rate": 3.992409217002827e-07,
      "loss": 0.0106,
      "step": 2934080
    },
    {
      "epoch": 4.801719002638073,
      "grad_norm": 0.11093965172767639,
      "learning_rate": 3.9917502948676556e-07,
      "loss": 0.0065,
      "step": 2934100
    },
    {
      "epoch": 4.801751733076727,
      "grad_norm": 0.25179681181907654,
      "learning_rate": 3.991091372732485e-07,
      "loss": 0.009,
      "step": 2934120
    },
    {
      "epoch": 4.80178446351538,
      "grad_norm": 0.16843333840370178,
      "learning_rate": 3.990432450597313e-07,
      "loss": 0.0103,
      "step": 2934140
    },
    {
      "epoch": 4.801817193954033,
      "grad_norm": 0.3444859981536865,
      "learning_rate": 3.9897735284621415e-07,
      "loss": 0.0127,
      "step": 2934160
    },
    {
      "epoch": 4.801849924392687,
      "grad_norm": 0.12487521022558212,
      "learning_rate": 3.989114606326971e-07,
      "loss": 0.0104,
      "step": 2934180
    },
    {
      "epoch": 4.80188265483134,
      "grad_norm": 0.1788770854473114,
      "learning_rate": 3.988455684191799e-07,
      "loss": 0.0076,
      "step": 2934200
    },
    {
      "epoch": 4.801915385269993,
      "grad_norm": 0.17601247131824493,
      "learning_rate": 3.9877967620566285e-07,
      "loss": 0.0114,
      "step": 2934220
    },
    {
      "epoch": 4.8019481157086465,
      "grad_norm": 0.3717549443244934,
      "learning_rate": 3.9871378399214564e-07,
      "loss": 0.0122,
      "step": 2934240
    },
    {
      "epoch": 4.8019808461473,
      "grad_norm": 0.15829116106033325,
      "learning_rate": 3.986478917786286e-07,
      "loss": 0.0112,
      "step": 2934260
    },
    {
      "epoch": 4.802013576585954,
      "grad_norm": 0.4196717441082001,
      "learning_rate": 3.9858199956511144e-07,
      "loss": 0.0088,
      "step": 2934280
    },
    {
      "epoch": 4.802046307024606,
      "grad_norm": 0.6771427392959595,
      "learning_rate": 3.9851610735159433e-07,
      "loss": 0.0052,
      "step": 2934300
    },
    {
      "epoch": 4.80207903746326,
      "grad_norm": 0.16407641768455505,
      "learning_rate": 3.984502151380772e-07,
      "loss": 0.0092,
      "step": 2934320
    },
    {
      "epoch": 4.802111767901914,
      "grad_norm": 0.07344445586204529,
      "learning_rate": 3.983843229245601e-07,
      "loss": 0.0067,
      "step": 2934340
    },
    {
      "epoch": 4.802144498340567,
      "grad_norm": 0.18850429356098175,
      "learning_rate": 3.983184307110429e-07,
      "loss": 0.0076,
      "step": 2934360
    },
    {
      "epoch": 4.80217722877922,
      "grad_norm": 0.07887548208236694,
      "learning_rate": 3.9825253849752577e-07,
      "loss": 0.0061,
      "step": 2934380
    },
    {
      "epoch": 4.8022099592178735,
      "grad_norm": 0.1997375190258026,
      "learning_rate": 3.9818664628400867e-07,
      "loss": 0.0081,
      "step": 2934400
    },
    {
      "epoch": 4.802242689656527,
      "grad_norm": 0.2836458086967468,
      "learning_rate": 3.981207540704915e-07,
      "loss": 0.0085,
      "step": 2934420
    },
    {
      "epoch": 4.80227542009518,
      "grad_norm": 0.28607532382011414,
      "learning_rate": 3.980548618569744e-07,
      "loss": 0.0117,
      "step": 2934440
    },
    {
      "epoch": 4.802308150533833,
      "grad_norm": 0.4597225487232208,
      "learning_rate": 3.9798896964345726e-07,
      "loss": 0.0055,
      "step": 2934460
    },
    {
      "epoch": 4.802340880972487,
      "grad_norm": 0.2636304795742035,
      "learning_rate": 3.9792307742994016e-07,
      "loss": 0.0082,
      "step": 2934480
    },
    {
      "epoch": 4.80237361141114,
      "grad_norm": 0.13799923658370972,
      "learning_rate": 3.97857185216423e-07,
      "loss": 0.0085,
      "step": 2934500
    },
    {
      "epoch": 4.802406341849793,
      "grad_norm": 0.1749616414308548,
      "learning_rate": 3.977912930029059e-07,
      "loss": 0.0095,
      "step": 2934520
    },
    {
      "epoch": 4.802439072288447,
      "grad_norm": 0.12837986648082733,
      "learning_rate": 3.9772540078938875e-07,
      "loss": 0.0086,
      "step": 2934540
    },
    {
      "epoch": 4.8024718027271005,
      "grad_norm": 0.06285722553730011,
      "learning_rate": 3.9765950857587165e-07,
      "loss": 0.019,
      "step": 2934560
    },
    {
      "epoch": 4.802504533165753,
      "grad_norm": 0.2508827745914459,
      "learning_rate": 3.975936163623545e-07,
      "loss": 0.0105,
      "step": 2934580
    },
    {
      "epoch": 4.802537263604407,
      "grad_norm": 0.1830568015575409,
      "learning_rate": 3.9752772414883734e-07,
      "loss": 0.0049,
      "step": 2934600
    },
    {
      "epoch": 4.80256999404306,
      "grad_norm": 0.1214786097407341,
      "learning_rate": 3.9746183193532024e-07,
      "loss": 0.0069,
      "step": 2934620
    },
    {
      "epoch": 4.802602724481714,
      "grad_norm": 0.07492689788341522,
      "learning_rate": 3.973959397218031e-07,
      "loss": 0.0075,
      "step": 2934640
    },
    {
      "epoch": 4.802635454920367,
      "grad_norm": 0.3554542660713196,
      "learning_rate": 3.97330047508286e-07,
      "loss": 0.0096,
      "step": 2934660
    },
    {
      "epoch": 4.80266818535902,
      "grad_norm": 0.23181883990764618,
      "learning_rate": 3.972641552947688e-07,
      "loss": 0.0124,
      "step": 2934680
    },
    {
      "epoch": 4.802700915797674,
      "grad_norm": 0.18526391685009003,
      "learning_rate": 3.971982630812517e-07,
      "loss": 0.0096,
      "step": 2934700
    },
    {
      "epoch": 4.802733646236327,
      "grad_norm": 0.48838773369789124,
      "learning_rate": 3.9713237086773457e-07,
      "loss": 0.008,
      "step": 2934720
    },
    {
      "epoch": 4.80276637667498,
      "grad_norm": 0.12333289533853531,
      "learning_rate": 3.9706647865421747e-07,
      "loss": 0.0068,
      "step": 2934740
    },
    {
      "epoch": 4.802799107113634,
      "grad_norm": 0.5778325200080872,
      "learning_rate": 3.970005864407003e-07,
      "loss": 0.0088,
      "step": 2934760
    },
    {
      "epoch": 4.8028318375522865,
      "grad_norm": 0.1016642227768898,
      "learning_rate": 3.9693469422718326e-07,
      "loss": 0.0144,
      "step": 2934780
    },
    {
      "epoch": 4.80286456799094,
      "grad_norm": 0.1726200431585312,
      "learning_rate": 3.9686880201366606e-07,
      "loss": 0.0108,
      "step": 2934800
    },
    {
      "epoch": 4.802897298429594,
      "grad_norm": 0.4465177655220032,
      "learning_rate": 3.968029098001489e-07,
      "loss": 0.0101,
      "step": 2934820
    },
    {
      "epoch": 4.802930028868247,
      "grad_norm": 0.253704309463501,
      "learning_rate": 3.9673701758663185e-07,
      "loss": 0.0086,
      "step": 2934840
    },
    {
      "epoch": 4.8029627593069,
      "grad_norm": 0.18550756573677063,
      "learning_rate": 3.9667112537311465e-07,
      "loss": 0.0075,
      "step": 2934860
    },
    {
      "epoch": 4.802995489745554,
      "grad_norm": 0.07669675350189209,
      "learning_rate": 3.966052331595976e-07,
      "loss": 0.0091,
      "step": 2934880
    },
    {
      "epoch": 4.803028220184207,
      "grad_norm": 0.22982288897037506,
      "learning_rate": 3.965393409460804e-07,
      "loss": 0.0088,
      "step": 2934900
    },
    {
      "epoch": 4.80306095062286,
      "grad_norm": 0.1728811115026474,
      "learning_rate": 3.9647344873256334e-07,
      "loss": 0.0073,
      "step": 2934920
    },
    {
      "epoch": 4.8030936810615135,
      "grad_norm": 0.11635499447584152,
      "learning_rate": 3.964075565190462e-07,
      "loss": 0.0116,
      "step": 2934940
    },
    {
      "epoch": 4.803126411500167,
      "grad_norm": 0.1739778369665146,
      "learning_rate": 3.963416643055291e-07,
      "loss": 0.0075,
      "step": 2934960
    },
    {
      "epoch": 4.803159141938821,
      "grad_norm": 0.1408717781305313,
      "learning_rate": 3.9627577209201193e-07,
      "loss": 0.0079,
      "step": 2934980
    },
    {
      "epoch": 4.803191872377473,
      "grad_norm": 0.07796802371740341,
      "learning_rate": 3.9620987987849483e-07,
      "loss": 0.0081,
      "step": 2935000
    },
    {
      "epoch": 4.803224602816127,
      "grad_norm": 0.13734273612499237,
      "learning_rate": 3.961439876649777e-07,
      "loss": 0.0076,
      "step": 2935020
    },
    {
      "epoch": 4.8032573332547805,
      "grad_norm": 0.214896559715271,
      "learning_rate": 3.960780954514605e-07,
      "loss": 0.0077,
      "step": 2935040
    },
    {
      "epoch": 4.803290063693433,
      "grad_norm": 0.09568563848733902,
      "learning_rate": 3.960122032379434e-07,
      "loss": 0.014,
      "step": 2935060
    },
    {
      "epoch": 4.803322794132087,
      "grad_norm": 0.24693907797336578,
      "learning_rate": 3.9594631102442627e-07,
      "loss": 0.0077,
      "step": 2935080
    },
    {
      "epoch": 4.80335552457074,
      "grad_norm": 0.22837845981121063,
      "learning_rate": 3.9588041881090917e-07,
      "loss": 0.0083,
      "step": 2935100
    },
    {
      "epoch": 4.803388255009393,
      "grad_norm": 0.7466441988945007,
      "learning_rate": 3.95814526597392e-07,
      "loss": 0.0079,
      "step": 2935120
    },
    {
      "epoch": 4.803420985448047,
      "grad_norm": 0.14442718029022217,
      "learning_rate": 3.957486343838749e-07,
      "loss": 0.0053,
      "step": 2935140
    },
    {
      "epoch": 4.8034537158867,
      "grad_norm": 0.4659397304058075,
      "learning_rate": 3.9568274217035776e-07,
      "loss": 0.0099,
      "step": 2935160
    },
    {
      "epoch": 4.803486446325354,
      "grad_norm": 0.10411517322063446,
      "learning_rate": 3.9561684995684065e-07,
      "loss": 0.0052,
      "step": 2935180
    },
    {
      "epoch": 4.803519176764007,
      "grad_norm": 0.42354220151901245,
      "learning_rate": 3.955509577433235e-07,
      "loss": 0.009,
      "step": 2935200
    },
    {
      "epoch": 4.80355190720266,
      "grad_norm": 0.3151852488517761,
      "learning_rate": 3.954850655298064e-07,
      "loss": 0.01,
      "step": 2935220
    },
    {
      "epoch": 4.803584637641314,
      "grad_norm": 0.47434744238853455,
      "learning_rate": 3.9541917331628924e-07,
      "loss": 0.0092,
      "step": 2935240
    },
    {
      "epoch": 4.803617368079967,
      "grad_norm": 0.12861280143260956,
      "learning_rate": 3.953532811027721e-07,
      "loss": 0.0082,
      "step": 2935260
    },
    {
      "epoch": 4.80365009851862,
      "grad_norm": 0.44876033067703247,
      "learning_rate": 3.95287388889255e-07,
      "loss": 0.0123,
      "step": 2935280
    },
    {
      "epoch": 4.803682828957274,
      "grad_norm": 0.19317924976348877,
      "learning_rate": 3.9522149667573783e-07,
      "loss": 0.0124,
      "step": 2935300
    },
    {
      "epoch": 4.803715559395927,
      "grad_norm": 0.35812950134277344,
      "learning_rate": 3.9515560446222073e-07,
      "loss": 0.008,
      "step": 2935320
    },
    {
      "epoch": 4.80374828983458,
      "grad_norm": 0.20299604535102844,
      "learning_rate": 3.950897122487036e-07,
      "loss": 0.0085,
      "step": 2935340
    },
    {
      "epoch": 4.803781020273234,
      "grad_norm": 0.03056342713534832,
      "learning_rate": 3.950238200351865e-07,
      "loss": 0.005,
      "step": 2935360
    },
    {
      "epoch": 4.803813750711887,
      "grad_norm": 0.24814963340759277,
      "learning_rate": 3.949579278216693e-07,
      "loss": 0.0058,
      "step": 2935380
    },
    {
      "epoch": 4.80384648115054,
      "grad_norm": 0.1844492256641388,
      "learning_rate": 3.948920356081522e-07,
      "loss": 0.0072,
      "step": 2935400
    },
    {
      "epoch": 4.8038792115891935,
      "grad_norm": 0.16275691986083984,
      "learning_rate": 3.9482614339463507e-07,
      "loss": 0.0096,
      "step": 2935420
    },
    {
      "epoch": 4.803911942027847,
      "grad_norm": 0.3394128680229187,
      "learning_rate": 3.94760251181118e-07,
      "loss": 0.01,
      "step": 2935440
    },
    {
      "epoch": 4.803944672466501,
      "grad_norm": 0.13523364067077637,
      "learning_rate": 3.946943589676008e-07,
      "loss": 0.0078,
      "step": 2935460
    },
    {
      "epoch": 4.803977402905153,
      "grad_norm": 0.1781972199678421,
      "learning_rate": 3.9462846675408366e-07,
      "loss": 0.0092,
      "step": 2935480
    },
    {
      "epoch": 4.804010133343807,
      "grad_norm": 0.3587665259838104,
      "learning_rate": 3.945625745405666e-07,
      "loss": 0.0078,
      "step": 2935500
    },
    {
      "epoch": 4.804042863782461,
      "grad_norm": 0.4816136658191681,
      "learning_rate": 3.944966823270494e-07,
      "loss": 0.0113,
      "step": 2935520
    },
    {
      "epoch": 4.804075594221114,
      "grad_norm": 0.13645431399345398,
      "learning_rate": 3.9443079011353235e-07,
      "loss": 0.0056,
      "step": 2935540
    },
    {
      "epoch": 4.804108324659767,
      "grad_norm": 0.0706421360373497,
      "learning_rate": 3.9436489790001514e-07,
      "loss": 0.0061,
      "step": 2935560
    },
    {
      "epoch": 4.8041410550984205,
      "grad_norm": 0.04351707920432091,
      "learning_rate": 3.942990056864981e-07,
      "loss": 0.0066,
      "step": 2935580
    },
    {
      "epoch": 4.804173785537074,
      "grad_norm": 0.5303276181221008,
      "learning_rate": 3.9423311347298094e-07,
      "loss": 0.0111,
      "step": 2935600
    },
    {
      "epoch": 4.804206515975727,
      "grad_norm": 0.07070109248161316,
      "learning_rate": 3.9416722125946384e-07,
      "loss": 0.0079,
      "step": 2935620
    },
    {
      "epoch": 4.80423924641438,
      "grad_norm": 0.1687021404504776,
      "learning_rate": 3.941013290459467e-07,
      "loss": 0.0128,
      "step": 2935640
    },
    {
      "epoch": 4.804271976853034,
      "grad_norm": 0.33319875597953796,
      "learning_rate": 3.940354368324296e-07,
      "loss": 0.0094,
      "step": 2935660
    },
    {
      "epoch": 4.804304707291687,
      "grad_norm": 0.0714314877986908,
      "learning_rate": 3.9396954461891243e-07,
      "loss": 0.009,
      "step": 2935680
    },
    {
      "epoch": 4.80433743773034,
      "grad_norm": 0.2287033051252365,
      "learning_rate": 3.939036524053953e-07,
      "loss": 0.0073,
      "step": 2935700
    },
    {
      "epoch": 4.804370168168994,
      "grad_norm": 0.2134716808795929,
      "learning_rate": 3.938377601918782e-07,
      "loss": 0.0094,
      "step": 2935720
    },
    {
      "epoch": 4.8044028986076475,
      "grad_norm": 0.05257756635546684,
      "learning_rate": 3.93771867978361e-07,
      "loss": 0.0083,
      "step": 2935740
    },
    {
      "epoch": 4.8044356290463,
      "grad_norm": 0.16728562116622925,
      "learning_rate": 3.937059757648439e-07,
      "loss": 0.0101,
      "step": 2935760
    },
    {
      "epoch": 4.804468359484954,
      "grad_norm": 0.19074666500091553,
      "learning_rate": 3.9364008355132676e-07,
      "loss": 0.0094,
      "step": 2935780
    },
    {
      "epoch": 4.804501089923607,
      "grad_norm": 0.2079717367887497,
      "learning_rate": 3.9357419133780966e-07,
      "loss": 0.0075,
      "step": 2935800
    },
    {
      "epoch": 4.804533820362261,
      "grad_norm": 0.06348897516727448,
      "learning_rate": 3.935082991242925e-07,
      "loss": 0.0074,
      "step": 2935820
    },
    {
      "epoch": 4.804566550800914,
      "grad_norm": 0.1724773347377777,
      "learning_rate": 3.934424069107754e-07,
      "loss": 0.0092,
      "step": 2935840
    },
    {
      "epoch": 4.804599281239567,
      "grad_norm": 0.21594901382923126,
      "learning_rate": 3.9337651469725825e-07,
      "loss": 0.0115,
      "step": 2935860
    },
    {
      "epoch": 4.804632011678221,
      "grad_norm": 0.4175368845462799,
      "learning_rate": 3.9331062248374115e-07,
      "loss": 0.0082,
      "step": 2935880
    },
    {
      "epoch": 4.8046647421168736,
      "grad_norm": 0.08269928395748138,
      "learning_rate": 3.93244730270224e-07,
      "loss": 0.0075,
      "step": 2935900
    },
    {
      "epoch": 4.804697472555527,
      "grad_norm": 0.25959333777427673,
      "learning_rate": 3.9317883805670684e-07,
      "loss": 0.0088,
      "step": 2935920
    },
    {
      "epoch": 4.804730202994181,
      "grad_norm": 0.22899730503559113,
      "learning_rate": 3.9311294584318974e-07,
      "loss": 0.011,
      "step": 2935940
    },
    {
      "epoch": 4.8047629334328334,
      "grad_norm": 0.3179056644439697,
      "learning_rate": 3.930470536296726e-07,
      "loss": 0.0091,
      "step": 2935960
    },
    {
      "epoch": 4.804795663871487,
      "grad_norm": 0.4099753201007843,
      "learning_rate": 3.929811614161555e-07,
      "loss": 0.0083,
      "step": 2935980
    },
    {
      "epoch": 4.804828394310141,
      "grad_norm": 0.12489500641822815,
      "learning_rate": 3.9291526920263833e-07,
      "loss": 0.0059,
      "step": 2936000
    },
    {
      "epoch": 4.804861124748794,
      "grad_norm": 0.1564083993434906,
      "learning_rate": 3.9284937698912123e-07,
      "loss": 0.007,
      "step": 2936020
    },
    {
      "epoch": 4.804893855187447,
      "grad_norm": 0.4357587993144989,
      "learning_rate": 3.927834847756041e-07,
      "loss": 0.0088,
      "step": 2936040
    },
    {
      "epoch": 4.8049265856261005,
      "grad_norm": 0.1453380137681961,
      "learning_rate": 3.9271759256208697e-07,
      "loss": 0.0061,
      "step": 2936060
    },
    {
      "epoch": 4.804959316064754,
      "grad_norm": 0.05090675130486488,
      "learning_rate": 3.926517003485698e-07,
      "loss": 0.0076,
      "step": 2936080
    },
    {
      "epoch": 4.804992046503408,
      "grad_norm": 0.07517879456281662,
      "learning_rate": 3.9258580813505277e-07,
      "loss": 0.008,
      "step": 2936100
    },
    {
      "epoch": 4.80502477694206,
      "grad_norm": 0.37833157181739807,
      "learning_rate": 3.9251991592153556e-07,
      "loss": 0.0086,
      "step": 2936120
    },
    {
      "epoch": 4.805057507380714,
      "grad_norm": 0.30185720324516296,
      "learning_rate": 3.924540237080184e-07,
      "loss": 0.0127,
      "step": 2936140
    },
    {
      "epoch": 4.805090237819368,
      "grad_norm": 0.1273375302553177,
      "learning_rate": 3.9238813149450136e-07,
      "loss": 0.0061,
      "step": 2936160
    },
    {
      "epoch": 4.80512296825802,
      "grad_norm": 0.31062841415405273,
      "learning_rate": 3.9232223928098415e-07,
      "loss": 0.0086,
      "step": 2936180
    },
    {
      "epoch": 4.805155698696674,
      "grad_norm": 0.24769341945648193,
      "learning_rate": 3.922563470674671e-07,
      "loss": 0.0069,
      "step": 2936200
    },
    {
      "epoch": 4.8051884291353275,
      "grad_norm": 0.23480412364006042,
      "learning_rate": 3.921904548539499e-07,
      "loss": 0.0129,
      "step": 2936220
    },
    {
      "epoch": 4.80522115957398,
      "grad_norm": 0.21037770807743073,
      "learning_rate": 3.9212456264043285e-07,
      "loss": 0.0102,
      "step": 2936240
    },
    {
      "epoch": 4.805253890012634,
      "grad_norm": 0.17578493058681488,
      "learning_rate": 3.920586704269157e-07,
      "loss": 0.0142,
      "step": 2936260
    },
    {
      "epoch": 4.805286620451287,
      "grad_norm": 0.23912188410758972,
      "learning_rate": 3.919927782133986e-07,
      "loss": 0.0067,
      "step": 2936280
    },
    {
      "epoch": 4.805319350889941,
      "grad_norm": 0.42386800050735474,
      "learning_rate": 3.9192688599988144e-07,
      "loss": 0.0138,
      "step": 2936300
    },
    {
      "epoch": 4.805352081328594,
      "grad_norm": 0.8413519263267517,
      "learning_rate": 3.9186099378636434e-07,
      "loss": 0.005,
      "step": 2936320
    },
    {
      "epoch": 4.805384811767247,
      "grad_norm": 0.09023813158273697,
      "learning_rate": 3.917951015728472e-07,
      "loss": 0.0116,
      "step": 2936340
    },
    {
      "epoch": 4.805417542205901,
      "grad_norm": 0.15987379848957062,
      "learning_rate": 3.9172920935933003e-07,
      "loss": 0.0094,
      "step": 2936360
    },
    {
      "epoch": 4.805450272644554,
      "grad_norm": 0.3006698787212372,
      "learning_rate": 3.9166331714581293e-07,
      "loss": 0.0087,
      "step": 2936380
    },
    {
      "epoch": 4.805483003083207,
      "grad_norm": 0.4213709533214569,
      "learning_rate": 3.9159742493229577e-07,
      "loss": 0.0121,
      "step": 2936400
    },
    {
      "epoch": 4.805515733521861,
      "grad_norm": 0.3735020160675049,
      "learning_rate": 3.9153153271877867e-07,
      "loss": 0.0156,
      "step": 2936420
    },
    {
      "epoch": 4.805548463960514,
      "grad_norm": 0.18442881107330322,
      "learning_rate": 3.914656405052615e-07,
      "loss": 0.0045,
      "step": 2936440
    },
    {
      "epoch": 4.805581194399167,
      "grad_norm": 0.22697128355503082,
      "learning_rate": 3.913997482917444e-07,
      "loss": 0.0081,
      "step": 2936460
    },
    {
      "epoch": 4.805613924837821,
      "grad_norm": 0.1783420890569687,
      "learning_rate": 3.9133385607822726e-07,
      "loss": 0.0114,
      "step": 2936480
    },
    {
      "epoch": 4.805646655276474,
      "grad_norm": 0.22867275774478912,
      "learning_rate": 3.9126796386471016e-07,
      "loss": 0.0095,
      "step": 2936500
    },
    {
      "epoch": 4.805679385715127,
      "grad_norm": 0.20416560769081116,
      "learning_rate": 3.91202071651193e-07,
      "loss": 0.0057,
      "step": 2936520
    },
    {
      "epoch": 4.805712116153781,
      "grad_norm": 0.2075957953929901,
      "learning_rate": 3.911361794376759e-07,
      "loss": 0.0093,
      "step": 2936540
    },
    {
      "epoch": 4.805744846592434,
      "grad_norm": 0.35552167892456055,
      "learning_rate": 3.9107028722415875e-07,
      "loss": 0.0061,
      "step": 2936560
    },
    {
      "epoch": 4.805777577031087,
      "grad_norm": 0.0979313775897026,
      "learning_rate": 3.910043950106416e-07,
      "loss": 0.009,
      "step": 2936580
    },
    {
      "epoch": 4.8058103074697405,
      "grad_norm": 0.15794071555137634,
      "learning_rate": 3.909385027971245e-07,
      "loss": 0.0088,
      "step": 2936600
    },
    {
      "epoch": 4.805843037908394,
      "grad_norm": 0.23300430178642273,
      "learning_rate": 3.9087261058360734e-07,
      "loss": 0.0102,
      "step": 2936620
    },
    {
      "epoch": 4.805875768347048,
      "grad_norm": 0.2511867880821228,
      "learning_rate": 3.9080671837009024e-07,
      "loss": 0.0091,
      "step": 2936640
    },
    {
      "epoch": 4.8059084987857,
      "grad_norm": 0.17072951793670654,
      "learning_rate": 3.907408261565731e-07,
      "loss": 0.0087,
      "step": 2936660
    },
    {
      "epoch": 4.805941229224354,
      "grad_norm": 0.29339399933815,
      "learning_rate": 3.90674933943056e-07,
      "loss": 0.0062,
      "step": 2936680
    },
    {
      "epoch": 4.8059739596630076,
      "grad_norm": 0.13559691607952118,
      "learning_rate": 3.9060904172953883e-07,
      "loss": 0.0114,
      "step": 2936700
    },
    {
      "epoch": 4.806006690101661,
      "grad_norm": 0.13456618785858154,
      "learning_rate": 3.905431495160217e-07,
      "loss": 0.006,
      "step": 2936720
    },
    {
      "epoch": 4.806039420540314,
      "grad_norm": 0.09436380863189697,
      "learning_rate": 3.9047725730250457e-07,
      "loss": 0.0097,
      "step": 2936740
    },
    {
      "epoch": 4.8060721509789674,
      "grad_norm": 0.09376152604818344,
      "learning_rate": 3.904113650889875e-07,
      "loss": 0.0093,
      "step": 2936760
    },
    {
      "epoch": 4.806104881417621,
      "grad_norm": 0.346437007188797,
      "learning_rate": 3.903454728754703e-07,
      "loss": 0.0149,
      "step": 2936780
    },
    {
      "epoch": 4.806137611856274,
      "grad_norm": 0.3566286861896515,
      "learning_rate": 3.9027958066195316e-07,
      "loss": 0.0085,
      "step": 2936800
    },
    {
      "epoch": 4.806170342294927,
      "grad_norm": 0.37248894572257996,
      "learning_rate": 3.902136884484361e-07,
      "loss": 0.0068,
      "step": 2936820
    },
    {
      "epoch": 4.806203072733581,
      "grad_norm": 0.22440052032470703,
      "learning_rate": 3.901477962349189e-07,
      "loss": 0.0084,
      "step": 2936840
    },
    {
      "epoch": 4.806235803172234,
      "grad_norm": 0.40977177023887634,
      "learning_rate": 3.9008190402140186e-07,
      "loss": 0.0102,
      "step": 2936860
    },
    {
      "epoch": 4.806268533610887,
      "grad_norm": 0.29118838906288147,
      "learning_rate": 3.9001601180788465e-07,
      "loss": 0.0089,
      "step": 2936880
    },
    {
      "epoch": 4.806301264049541,
      "grad_norm": 0.21412289142608643,
      "learning_rate": 3.899501195943676e-07,
      "loss": 0.0082,
      "step": 2936900
    },
    {
      "epoch": 4.806333994488194,
      "grad_norm": 0.1364559680223465,
      "learning_rate": 3.8988422738085045e-07,
      "loss": 0.0108,
      "step": 2936920
    },
    {
      "epoch": 4.806366724926847,
      "grad_norm": 0.10769961029291153,
      "learning_rate": 3.8981833516733335e-07,
      "loss": 0.0065,
      "step": 2936940
    },
    {
      "epoch": 4.806399455365501,
      "grad_norm": 0.1280730664730072,
      "learning_rate": 3.897524429538162e-07,
      "loss": 0.0161,
      "step": 2936960
    },
    {
      "epoch": 4.806432185804154,
      "grad_norm": 0.45876482129096985,
      "learning_rate": 3.896865507402991e-07,
      "loss": 0.0108,
      "step": 2936980
    },
    {
      "epoch": 4.806464916242808,
      "grad_norm": 0.14307814836502075,
      "learning_rate": 3.8962065852678193e-07,
      "loss": 0.0071,
      "step": 2937000
    },
    {
      "epoch": 4.806497646681461,
      "grad_norm": 0.7081889510154724,
      "learning_rate": 3.895547663132648e-07,
      "loss": 0.0106,
      "step": 2937020
    },
    {
      "epoch": 4.806530377120114,
      "grad_norm": 0.10376714915037155,
      "learning_rate": 3.894888740997477e-07,
      "loss": 0.0094,
      "step": 2937040
    },
    {
      "epoch": 4.806563107558768,
      "grad_norm": 0.06611006706953049,
      "learning_rate": 3.894229818862305e-07,
      "loss": 0.0104,
      "step": 2937060
    },
    {
      "epoch": 4.8065958379974205,
      "grad_norm": 0.25180941820144653,
      "learning_rate": 3.893570896727134e-07,
      "loss": 0.0045,
      "step": 2937080
    },
    {
      "epoch": 4.806628568436074,
      "grad_norm": 0.3714272975921631,
      "learning_rate": 3.8929119745919627e-07,
      "loss": 0.0084,
      "step": 2937100
    },
    {
      "epoch": 4.806661298874728,
      "grad_norm": 0.4192812740802765,
      "learning_rate": 3.8922530524567917e-07,
      "loss": 0.0076,
      "step": 2937120
    },
    {
      "epoch": 4.80669402931338,
      "grad_norm": 0.18704527616500854,
      "learning_rate": 3.89159413032162e-07,
      "loss": 0.0082,
      "step": 2937140
    },
    {
      "epoch": 4.806726759752034,
      "grad_norm": 0.33763283491134644,
      "learning_rate": 3.890935208186449e-07,
      "loss": 0.0139,
      "step": 2937160
    },
    {
      "epoch": 4.806759490190688,
      "grad_norm": 0.14632101356983185,
      "learning_rate": 3.8902762860512776e-07,
      "loss": 0.0077,
      "step": 2937180
    },
    {
      "epoch": 4.806792220629341,
      "grad_norm": 0.040237993001937866,
      "learning_rate": 3.8896173639161066e-07,
      "loss": 0.0081,
      "step": 2937200
    },
    {
      "epoch": 4.806824951067994,
      "grad_norm": 0.20074869692325592,
      "learning_rate": 3.888958441780935e-07,
      "loss": 0.0097,
      "step": 2937220
    },
    {
      "epoch": 4.8068576815066475,
      "grad_norm": 0.05009596794843674,
      "learning_rate": 3.8882995196457635e-07,
      "loss": 0.0078,
      "step": 2937240
    },
    {
      "epoch": 4.806890411945301,
      "grad_norm": 0.14409101009368896,
      "learning_rate": 3.8876405975105925e-07,
      "loss": 0.014,
      "step": 2937260
    },
    {
      "epoch": 4.806923142383955,
      "grad_norm": 0.17569872736930847,
      "learning_rate": 3.886981675375421e-07,
      "loss": 0.0124,
      "step": 2937280
    },
    {
      "epoch": 4.806955872822607,
      "grad_norm": 0.1514010727405548,
      "learning_rate": 3.88632275324025e-07,
      "loss": 0.0166,
      "step": 2937300
    },
    {
      "epoch": 4.806988603261261,
      "grad_norm": 0.45134198665618896,
      "learning_rate": 3.8856638311050784e-07,
      "loss": 0.0106,
      "step": 2937320
    },
    {
      "epoch": 4.807021333699915,
      "grad_norm": 0.20691804587841034,
      "learning_rate": 3.8850049089699073e-07,
      "loss": 0.0081,
      "step": 2937340
    },
    {
      "epoch": 4.807054064138567,
      "grad_norm": 0.36754557490348816,
      "learning_rate": 3.884345986834736e-07,
      "loss": 0.0119,
      "step": 2937360
    },
    {
      "epoch": 4.807086794577221,
      "grad_norm": 0.23703782260417938,
      "learning_rate": 3.883687064699565e-07,
      "loss": 0.0131,
      "step": 2937380
    },
    {
      "epoch": 4.8071195250158745,
      "grad_norm": 0.4337459206581116,
      "learning_rate": 3.883028142564393e-07,
      "loss": 0.0094,
      "step": 2937400
    },
    {
      "epoch": 4.807152255454527,
      "grad_norm": 0.14571654796600342,
      "learning_rate": 3.882369220429223e-07,
      "loss": 0.0064,
      "step": 2937420
    },
    {
      "epoch": 4.807184985893181,
      "grad_norm": 0.19469508528709412,
      "learning_rate": 3.8817102982940507e-07,
      "loss": 0.0067,
      "step": 2937440
    },
    {
      "epoch": 4.807217716331834,
      "grad_norm": 0.14775916934013367,
      "learning_rate": 3.881051376158879e-07,
      "loss": 0.0084,
      "step": 2937460
    },
    {
      "epoch": 4.807250446770488,
      "grad_norm": 0.5494769215583801,
      "learning_rate": 3.8803924540237087e-07,
      "loss": 0.0092,
      "step": 2937480
    },
    {
      "epoch": 4.807283177209141,
      "grad_norm": 0.1804351806640625,
      "learning_rate": 3.8797335318885366e-07,
      "loss": 0.0078,
      "step": 2937500
    },
    {
      "epoch": 4.807315907647794,
      "grad_norm": 0.19421698153018951,
      "learning_rate": 3.879074609753366e-07,
      "loss": 0.0134,
      "step": 2937520
    },
    {
      "epoch": 4.807348638086448,
      "grad_norm": 0.1063244566321373,
      "learning_rate": 3.878415687618194e-07,
      "loss": 0.0103,
      "step": 2937540
    },
    {
      "epoch": 4.8073813685251015,
      "grad_norm": 0.2287554144859314,
      "learning_rate": 3.8777567654830235e-07,
      "loss": 0.0121,
      "step": 2937560
    },
    {
      "epoch": 4.807414098963754,
      "grad_norm": 0.15948541462421417,
      "learning_rate": 3.877097843347852e-07,
      "loss": 0.0078,
      "step": 2937580
    },
    {
      "epoch": 4.807446829402408,
      "grad_norm": 0.10655618458986282,
      "learning_rate": 3.876438921212681e-07,
      "loss": 0.0095,
      "step": 2937600
    },
    {
      "epoch": 4.807479559841061,
      "grad_norm": 0.07626532763242722,
      "learning_rate": 3.8757799990775094e-07,
      "loss": 0.0095,
      "step": 2937620
    },
    {
      "epoch": 4.807512290279714,
      "grad_norm": 0.09852396696805954,
      "learning_rate": 3.8751210769423384e-07,
      "loss": 0.013,
      "step": 2937640
    },
    {
      "epoch": 4.807545020718368,
      "grad_norm": 0.14585690200328827,
      "learning_rate": 3.874462154807167e-07,
      "loss": 0.0069,
      "step": 2937660
    },
    {
      "epoch": 4.807577751157021,
      "grad_norm": 0.22510136663913727,
      "learning_rate": 3.8738032326719953e-07,
      "loss": 0.0113,
      "step": 2937680
    },
    {
      "epoch": 4.807610481595674,
      "grad_norm": 0.08770741522312164,
      "learning_rate": 3.8731443105368243e-07,
      "loss": 0.0059,
      "step": 2937700
    },
    {
      "epoch": 4.8076432120343275,
      "grad_norm": 0.1337803304195404,
      "learning_rate": 3.872485388401653e-07,
      "loss": 0.0078,
      "step": 2937720
    },
    {
      "epoch": 4.807675942472981,
      "grad_norm": 0.8581263422966003,
      "learning_rate": 3.871826466266482e-07,
      "loss": 0.0118,
      "step": 2937740
    },
    {
      "epoch": 4.807708672911635,
      "grad_norm": 0.10244078934192657,
      "learning_rate": 3.87116754413131e-07,
      "loss": 0.0091,
      "step": 2937760
    },
    {
      "epoch": 4.807741403350287,
      "grad_norm": 0.35304221510887146,
      "learning_rate": 3.870508621996139e-07,
      "loss": 0.0082,
      "step": 2937780
    },
    {
      "epoch": 4.807774133788941,
      "grad_norm": 0.09502445161342621,
      "learning_rate": 3.8698496998609677e-07,
      "loss": 0.0083,
      "step": 2937800
    },
    {
      "epoch": 4.807806864227595,
      "grad_norm": 0.302439421415329,
      "learning_rate": 3.8691907777257966e-07,
      "loss": 0.0147,
      "step": 2937820
    },
    {
      "epoch": 4.807839594666247,
      "grad_norm": 0.4771813154220581,
      "learning_rate": 3.868531855590625e-07,
      "loss": 0.0053,
      "step": 2937840
    },
    {
      "epoch": 4.807872325104901,
      "grad_norm": 0.10869934409856796,
      "learning_rate": 3.867872933455454e-07,
      "loss": 0.0059,
      "step": 2937860
    },
    {
      "epoch": 4.8079050555435545,
      "grad_norm": 0.547260046005249,
      "learning_rate": 3.8672140113202825e-07,
      "loss": 0.0151,
      "step": 2937880
    },
    {
      "epoch": 4.807937785982208,
      "grad_norm": 0.06844352930784225,
      "learning_rate": 3.866555089185111e-07,
      "loss": 0.0064,
      "step": 2937900
    },
    {
      "epoch": 4.807970516420861,
      "grad_norm": 0.27414262294769287,
      "learning_rate": 3.86589616704994e-07,
      "loss": 0.0083,
      "step": 2937920
    },
    {
      "epoch": 4.808003246859514,
      "grad_norm": 0.12606573104858398,
      "learning_rate": 3.8652372449147684e-07,
      "loss": 0.0125,
      "step": 2937940
    },
    {
      "epoch": 4.808035977298168,
      "grad_norm": 0.08150380849838257,
      "learning_rate": 3.8645783227795974e-07,
      "loss": 0.0097,
      "step": 2937960
    },
    {
      "epoch": 4.808068707736821,
      "grad_norm": 0.2946767210960388,
      "learning_rate": 3.863919400644426e-07,
      "loss": 0.0067,
      "step": 2937980
    },
    {
      "epoch": 4.808101438175474,
      "grad_norm": 0.07626999914646149,
      "learning_rate": 3.863260478509255e-07,
      "loss": 0.0098,
      "step": 2938000
    },
    {
      "epoch": 4.808134168614128,
      "grad_norm": 0.1533856987953186,
      "learning_rate": 3.8626015563740833e-07,
      "loss": 0.0094,
      "step": 2938020
    },
    {
      "epoch": 4.808166899052781,
      "grad_norm": 0.29243502020835876,
      "learning_rate": 3.8619426342389123e-07,
      "loss": 0.0119,
      "step": 2938040
    },
    {
      "epoch": 4.808199629491434,
      "grad_norm": 0.2609873414039612,
      "learning_rate": 3.861283712103741e-07,
      "loss": 0.0084,
      "step": 2938060
    },
    {
      "epoch": 4.808232359930088,
      "grad_norm": 0.20065027475357056,
      "learning_rate": 3.8606247899685703e-07,
      "loss": 0.0091,
      "step": 2938080
    },
    {
      "epoch": 4.808265090368741,
      "grad_norm": 0.14128468930721283,
      "learning_rate": 3.859965867833398e-07,
      "loss": 0.0108,
      "step": 2938100
    },
    {
      "epoch": 4.808297820807394,
      "grad_norm": 0.0663137212395668,
      "learning_rate": 3.8593069456982267e-07,
      "loss": 0.0088,
      "step": 2938120
    },
    {
      "epoch": 4.808330551246048,
      "grad_norm": 0.24043573439121246,
      "learning_rate": 3.858648023563056e-07,
      "loss": 0.0061,
      "step": 2938140
    },
    {
      "epoch": 4.808363281684701,
      "grad_norm": 0.1353292316198349,
      "learning_rate": 3.857989101427884e-07,
      "loss": 0.0067,
      "step": 2938160
    },
    {
      "epoch": 4.808396012123355,
      "grad_norm": 0.13791576027870178,
      "learning_rate": 3.8573301792927136e-07,
      "loss": 0.0087,
      "step": 2938180
    },
    {
      "epoch": 4.808428742562008,
      "grad_norm": 0.3687251806259155,
      "learning_rate": 3.856671257157542e-07,
      "loss": 0.0119,
      "step": 2938200
    },
    {
      "epoch": 4.808461473000661,
      "grad_norm": 0.21804718673229218,
      "learning_rate": 3.856012335022371e-07,
      "loss": 0.0095,
      "step": 2938220
    },
    {
      "epoch": 4.808494203439315,
      "grad_norm": 0.20296259224414825,
      "learning_rate": 3.8553534128871995e-07,
      "loss": 0.0102,
      "step": 2938240
    },
    {
      "epoch": 4.8085269338779675,
      "grad_norm": 0.44738781452178955,
      "learning_rate": 3.8546944907520285e-07,
      "loss": 0.008,
      "step": 2938260
    },
    {
      "epoch": 4.808559664316621,
      "grad_norm": 0.101310133934021,
      "learning_rate": 3.854035568616857e-07,
      "loss": 0.0108,
      "step": 2938280
    },
    {
      "epoch": 4.808592394755275,
      "grad_norm": 0.21099072694778442,
      "learning_rate": 3.853376646481686e-07,
      "loss": 0.0098,
      "step": 2938300
    },
    {
      "epoch": 4.808625125193927,
      "grad_norm": 0.2119901329278946,
      "learning_rate": 3.8527177243465144e-07,
      "loss": 0.0091,
      "step": 2938320
    },
    {
      "epoch": 4.808657855632581,
      "grad_norm": 0.23203620314598083,
      "learning_rate": 3.852058802211343e-07,
      "loss": 0.0076,
      "step": 2938340
    },
    {
      "epoch": 4.808690586071235,
      "grad_norm": 0.3549902141094208,
      "learning_rate": 3.851399880076172e-07,
      "loss": 0.0088,
      "step": 2938360
    },
    {
      "epoch": 4.808723316509888,
      "grad_norm": 0.11016172915697098,
      "learning_rate": 3.8507409579410003e-07,
      "loss": 0.0101,
      "step": 2938380
    },
    {
      "epoch": 4.808756046948541,
      "grad_norm": 0.18212890625,
      "learning_rate": 3.8500820358058293e-07,
      "loss": 0.0092,
      "step": 2938400
    },
    {
      "epoch": 4.8087887773871945,
      "grad_norm": 0.20400556921958923,
      "learning_rate": 3.849423113670658e-07,
      "loss": 0.0091,
      "step": 2938420
    },
    {
      "epoch": 4.808821507825848,
      "grad_norm": 0.2507539391517639,
      "learning_rate": 3.8487641915354867e-07,
      "loss": 0.0086,
      "step": 2938440
    },
    {
      "epoch": 4.808854238264502,
      "grad_norm": 0.29097655415534973,
      "learning_rate": 3.848105269400315e-07,
      "loss": 0.0123,
      "step": 2938460
    },
    {
      "epoch": 4.808886968703154,
      "grad_norm": 0.47703438997268677,
      "learning_rate": 3.847446347265144e-07,
      "loss": 0.0103,
      "step": 2938480
    },
    {
      "epoch": 4.808919699141808,
      "grad_norm": 0.09888910502195358,
      "learning_rate": 3.8467874251299726e-07,
      "loss": 0.0079,
      "step": 2938500
    },
    {
      "epoch": 4.8089524295804615,
      "grad_norm": 0.2349223494529724,
      "learning_rate": 3.8461285029948016e-07,
      "loss": 0.0053,
      "step": 2938520
    },
    {
      "epoch": 4.808985160019114,
      "grad_norm": 0.17057408392429352,
      "learning_rate": 3.84546958085963e-07,
      "loss": 0.0127,
      "step": 2938540
    },
    {
      "epoch": 4.809017890457768,
      "grad_norm": 0.21292363107204437,
      "learning_rate": 3.8448106587244585e-07,
      "loss": 0.0056,
      "step": 2938560
    },
    {
      "epoch": 4.809050620896421,
      "grad_norm": 0.041682012379169464,
      "learning_rate": 3.8441517365892875e-07,
      "loss": 0.0106,
      "step": 2938580
    },
    {
      "epoch": 4.809083351335074,
      "grad_norm": 0.24195562303066254,
      "learning_rate": 3.843492814454116e-07,
      "loss": 0.0088,
      "step": 2938600
    },
    {
      "epoch": 4.809116081773728,
      "grad_norm": 0.09120117127895355,
      "learning_rate": 3.842833892318945e-07,
      "loss": 0.0114,
      "step": 2938620
    },
    {
      "epoch": 4.809148812212381,
      "grad_norm": 0.30764517188072205,
      "learning_rate": 3.8421749701837734e-07,
      "loss": 0.008,
      "step": 2938640
    },
    {
      "epoch": 4.809181542651035,
      "grad_norm": 0.15226517617702484,
      "learning_rate": 3.8415160480486024e-07,
      "loss": 0.01,
      "step": 2938660
    },
    {
      "epoch": 4.809214273089688,
      "grad_norm": 0.1862020045518875,
      "learning_rate": 3.840857125913431e-07,
      "loss": 0.0121,
      "step": 2938680
    },
    {
      "epoch": 4.809247003528341,
      "grad_norm": 0.36596953868865967,
      "learning_rate": 3.84019820377826e-07,
      "loss": 0.0076,
      "step": 2938700
    },
    {
      "epoch": 4.809279733966995,
      "grad_norm": 0.124326691031456,
      "learning_rate": 3.8395392816430883e-07,
      "loss": 0.0114,
      "step": 2938720
    },
    {
      "epoch": 4.809312464405648,
      "grad_norm": 0.3528420329093933,
      "learning_rate": 3.838880359507918e-07,
      "loss": 0.0087,
      "step": 2938740
    },
    {
      "epoch": 4.809345194844301,
      "grad_norm": 0.14954252541065216,
      "learning_rate": 3.8382214373727457e-07,
      "loss": 0.0085,
      "step": 2938760
    },
    {
      "epoch": 4.809377925282955,
      "grad_norm": 0.36462724208831787,
      "learning_rate": 3.837562515237574e-07,
      "loss": 0.0104,
      "step": 2938780
    },
    {
      "epoch": 4.809410655721608,
      "grad_norm": 0.6979787349700928,
      "learning_rate": 3.8369035931024037e-07,
      "loss": 0.0132,
      "step": 2938800
    },
    {
      "epoch": 4.809443386160261,
      "grad_norm": 0.3100774884223938,
      "learning_rate": 3.8362446709672316e-07,
      "loss": 0.0095,
      "step": 2938820
    },
    {
      "epoch": 4.809476116598915,
      "grad_norm": 0.14222516119480133,
      "learning_rate": 3.835585748832061e-07,
      "loss": 0.0089,
      "step": 2938840
    },
    {
      "epoch": 4.809508847037568,
      "grad_norm": 0.1761493682861328,
      "learning_rate": 3.8349268266968896e-07,
      "loss": 0.0111,
      "step": 2938860
    },
    {
      "epoch": 4.809541577476221,
      "grad_norm": 0.18071256577968597,
      "learning_rate": 3.8342679045617186e-07,
      "loss": 0.0067,
      "step": 2938880
    },
    {
      "epoch": 4.8095743079148745,
      "grad_norm": 0.36652040481567383,
      "learning_rate": 3.833608982426547e-07,
      "loss": 0.0074,
      "step": 2938900
    },
    {
      "epoch": 4.809607038353528,
      "grad_norm": 0.16072972118854523,
      "learning_rate": 3.832950060291376e-07,
      "loss": 0.0096,
      "step": 2938920
    },
    {
      "epoch": 4.809639768792182,
      "grad_norm": 0.06976917386054993,
      "learning_rate": 3.8322911381562045e-07,
      "loss": 0.0091,
      "step": 2938940
    },
    {
      "epoch": 4.809672499230834,
      "grad_norm": 0.34467947483062744,
      "learning_rate": 3.8316322160210335e-07,
      "loss": 0.0091,
      "step": 2938960
    },
    {
      "epoch": 4.809705229669488,
      "grad_norm": 0.09021669626235962,
      "learning_rate": 3.830973293885862e-07,
      "loss": 0.0079,
      "step": 2938980
    },
    {
      "epoch": 4.809737960108142,
      "grad_norm": 0.08552246540784836,
      "learning_rate": 3.8303143717506904e-07,
      "loss": 0.0088,
      "step": 2939000
    },
    {
      "epoch": 4.809770690546795,
      "grad_norm": 0.6786630153656006,
      "learning_rate": 3.8296554496155194e-07,
      "loss": 0.0108,
      "step": 2939020
    },
    {
      "epoch": 4.809803420985448,
      "grad_norm": 0.13955147564411163,
      "learning_rate": 3.828996527480348e-07,
      "loss": 0.0089,
      "step": 2939040
    },
    {
      "epoch": 4.8098361514241015,
      "grad_norm": 0.12415963411331177,
      "learning_rate": 3.828337605345177e-07,
      "loss": 0.0168,
      "step": 2939060
    },
    {
      "epoch": 4.809868881862755,
      "grad_norm": 0.35537734627723694,
      "learning_rate": 3.8276786832100053e-07,
      "loss": 0.0085,
      "step": 2939080
    },
    {
      "epoch": 4.809901612301408,
      "grad_norm": 0.27809932827949524,
      "learning_rate": 3.827019761074834e-07,
      "loss": 0.0097,
      "step": 2939100
    },
    {
      "epoch": 4.809934342740061,
      "grad_norm": 0.08718466758728027,
      "learning_rate": 3.8263608389396627e-07,
      "loss": 0.0107,
      "step": 2939120
    },
    {
      "epoch": 4.809967073178715,
      "grad_norm": 0.21189920604228973,
      "learning_rate": 3.8257019168044917e-07,
      "loss": 0.0068,
      "step": 2939140
    },
    {
      "epoch": 4.809999803617368,
      "grad_norm": 0.24931970238685608,
      "learning_rate": 3.82504299466932e-07,
      "loss": 0.0112,
      "step": 2939160
    },
    {
      "epoch": 4.810032534056021,
      "grad_norm": 0.16050679981708527,
      "learning_rate": 3.824384072534149e-07,
      "loss": 0.01,
      "step": 2939180
    },
    {
      "epoch": 4.810065264494675,
      "grad_norm": 0.29935595393180847,
      "learning_rate": 3.8237251503989776e-07,
      "loss": 0.0128,
      "step": 2939200
    },
    {
      "epoch": 4.8100979949333285,
      "grad_norm": 0.4017779231071472,
      "learning_rate": 3.823066228263806e-07,
      "loss": 0.0069,
      "step": 2939220
    },
    {
      "epoch": 4.810130725371981,
      "grad_norm": 0.2019418627023697,
      "learning_rate": 3.822407306128635e-07,
      "loss": 0.0079,
      "step": 2939240
    },
    {
      "epoch": 4.810163455810635,
      "grad_norm": 0.2512267529964447,
      "learning_rate": 3.8217483839934635e-07,
      "loss": 0.0067,
      "step": 2939260
    },
    {
      "epoch": 4.810196186249288,
      "grad_norm": 0.9400070309638977,
      "learning_rate": 3.8210894618582925e-07,
      "loss": 0.0103,
      "step": 2939280
    },
    {
      "epoch": 4.810228916687942,
      "grad_norm": 0.05968225374817848,
      "learning_rate": 3.820430539723121e-07,
      "loss": 0.0096,
      "step": 2939300
    },
    {
      "epoch": 4.810261647126595,
      "grad_norm": 0.09400835633277893,
      "learning_rate": 3.81977161758795e-07,
      "loss": 0.0114,
      "step": 2939320
    },
    {
      "epoch": 4.810294377565248,
      "grad_norm": 0.18463999032974243,
      "learning_rate": 3.8191126954527784e-07,
      "loss": 0.0082,
      "step": 2939340
    },
    {
      "epoch": 4.810327108003902,
      "grad_norm": 0.19705158472061157,
      "learning_rate": 3.8184537733176074e-07,
      "loss": 0.0126,
      "step": 2939360
    },
    {
      "epoch": 4.8103598384425545,
      "grad_norm": 0.11144381761550903,
      "learning_rate": 3.817794851182436e-07,
      "loss": 0.0095,
      "step": 2939380
    },
    {
      "epoch": 4.810392568881208,
      "grad_norm": 0.24843591451644897,
      "learning_rate": 3.8171359290472653e-07,
      "loss": 0.0126,
      "step": 2939400
    },
    {
      "epoch": 4.810425299319862,
      "grad_norm": 0.13869987428188324,
      "learning_rate": 3.816477006912093e-07,
      "loss": 0.0109,
      "step": 2939420
    },
    {
      "epoch": 4.810458029758514,
      "grad_norm": 0.21240080893039703,
      "learning_rate": 3.8158180847769217e-07,
      "loss": 0.0102,
      "step": 2939440
    },
    {
      "epoch": 4.810490760197168,
      "grad_norm": 0.46576163172721863,
      "learning_rate": 3.815159162641751e-07,
      "loss": 0.0055,
      "step": 2939460
    },
    {
      "epoch": 4.810523490635822,
      "grad_norm": 0.41637805104255676,
      "learning_rate": 3.814500240506579e-07,
      "loss": 0.0116,
      "step": 2939480
    },
    {
      "epoch": 4.810556221074475,
      "grad_norm": 0.380197674036026,
      "learning_rate": 3.8138413183714087e-07,
      "loss": 0.0109,
      "step": 2939500
    },
    {
      "epoch": 4.810588951513128,
      "grad_norm": 0.382375031709671,
      "learning_rate": 3.813182396236237e-07,
      "loss": 0.0104,
      "step": 2939520
    },
    {
      "epoch": 4.8106216819517815,
      "grad_norm": 0.17758920788764954,
      "learning_rate": 3.812523474101066e-07,
      "loss": 0.0089,
      "step": 2939540
    },
    {
      "epoch": 4.810654412390435,
      "grad_norm": 0.39742642641067505,
      "learning_rate": 3.8118645519658946e-07,
      "loss": 0.01,
      "step": 2939560
    },
    {
      "epoch": 4.810687142829088,
      "grad_norm": 0.33384570479393005,
      "learning_rate": 3.8112056298307236e-07,
      "loss": 0.0099,
      "step": 2939580
    },
    {
      "epoch": 4.810719873267741,
      "grad_norm": 0.33281993865966797,
      "learning_rate": 3.810546707695552e-07,
      "loss": 0.0081,
      "step": 2939600
    },
    {
      "epoch": 4.810752603706395,
      "grad_norm": 0.08668994903564453,
      "learning_rate": 3.809887785560381e-07,
      "loss": 0.007,
      "step": 2939620
    },
    {
      "epoch": 4.810785334145049,
      "grad_norm": 0.26447245478630066,
      "learning_rate": 3.8092288634252095e-07,
      "loss": 0.0153,
      "step": 2939640
    },
    {
      "epoch": 4.810818064583701,
      "grad_norm": 0.2129480391740799,
      "learning_rate": 3.808569941290038e-07,
      "loss": 0.0155,
      "step": 2939660
    },
    {
      "epoch": 4.810850795022355,
      "grad_norm": 0.28362709283828735,
      "learning_rate": 3.807911019154867e-07,
      "loss": 0.0099,
      "step": 2939680
    },
    {
      "epoch": 4.8108835254610085,
      "grad_norm": 0.4107059836387634,
      "learning_rate": 3.8072520970196954e-07,
      "loss": 0.0075,
      "step": 2939700
    },
    {
      "epoch": 4.810916255899661,
      "grad_norm": 0.24234583973884583,
      "learning_rate": 3.8065931748845243e-07,
      "loss": 0.013,
      "step": 2939720
    },
    {
      "epoch": 4.810948986338315,
      "grad_norm": 0.40649688243865967,
      "learning_rate": 3.805934252749353e-07,
      "loss": 0.0106,
      "step": 2939740
    },
    {
      "epoch": 4.810981716776968,
      "grad_norm": 0.08114767074584961,
      "learning_rate": 3.805275330614182e-07,
      "loss": 0.0137,
      "step": 2939760
    },
    {
      "epoch": 4.811014447215621,
      "grad_norm": 0.13681161403656006,
      "learning_rate": 3.80461640847901e-07,
      "loss": 0.0104,
      "step": 2939780
    },
    {
      "epoch": 4.811047177654275,
      "grad_norm": 0.1476663202047348,
      "learning_rate": 3.803957486343839e-07,
      "loss": 0.0095,
      "step": 2939800
    },
    {
      "epoch": 4.811079908092928,
      "grad_norm": 0.2584725022315979,
      "learning_rate": 3.8032985642086677e-07,
      "loss": 0.0097,
      "step": 2939820
    },
    {
      "epoch": 4.811112638531582,
      "grad_norm": 0.1935884654521942,
      "learning_rate": 3.8026396420734967e-07,
      "loss": 0.0087,
      "step": 2939840
    },
    {
      "epoch": 4.811145368970235,
      "grad_norm": 0.4652481973171234,
      "learning_rate": 3.801980719938325e-07,
      "loss": 0.0085,
      "step": 2939860
    },
    {
      "epoch": 4.811178099408888,
      "grad_norm": 0.05025971680879593,
      "learning_rate": 3.8013217978031536e-07,
      "loss": 0.0072,
      "step": 2939880
    },
    {
      "epoch": 4.811210829847542,
      "grad_norm": 0.09259670227766037,
      "learning_rate": 3.8006628756679826e-07,
      "loss": 0.0137,
      "step": 2939900
    },
    {
      "epoch": 4.811243560286195,
      "grad_norm": 0.1900153011083603,
      "learning_rate": 3.800003953532811e-07,
      "loss": 0.008,
      "step": 2939920
    },
    {
      "epoch": 4.811276290724848,
      "grad_norm": 0.11946801096200943,
      "learning_rate": 3.79934503139764e-07,
      "loss": 0.0086,
      "step": 2939940
    },
    {
      "epoch": 4.811309021163502,
      "grad_norm": 0.18842126429080963,
      "learning_rate": 3.7986861092624685e-07,
      "loss": 0.009,
      "step": 2939960
    },
    {
      "epoch": 4.811341751602155,
      "grad_norm": 0.1379772424697876,
      "learning_rate": 3.7980271871272974e-07,
      "loss": 0.0106,
      "step": 2939980
    },
    {
      "epoch": 4.811374482040808,
      "grad_norm": 0.08755412697792053,
      "learning_rate": 3.797368264992126e-07,
      "loss": 0.0067,
      "step": 2940000
    },
    {
      "epoch": 4.811407212479462,
      "grad_norm": 0.3098999559879303,
      "learning_rate": 3.796709342856955e-07,
      "loss": 0.0091,
      "step": 2940020
    },
    {
      "epoch": 4.811439942918115,
      "grad_norm": 0.2846682369709015,
      "learning_rate": 3.7960504207217833e-07,
      "loss": 0.008,
      "step": 2940040
    },
    {
      "epoch": 4.811472673356768,
      "grad_norm": 0.23997315764427185,
      "learning_rate": 3.795391498586613e-07,
      "loss": 0.0082,
      "step": 2940060
    },
    {
      "epoch": 4.8115054037954215,
      "grad_norm": 0.09857232868671417,
      "learning_rate": 3.794732576451441e-07,
      "loss": 0.0063,
      "step": 2940080
    },
    {
      "epoch": 4.811538134234075,
      "grad_norm": 0.27614858746528625,
      "learning_rate": 3.794073654316269e-07,
      "loss": 0.0085,
      "step": 2940100
    },
    {
      "epoch": 4.811570864672729,
      "grad_norm": 0.08049279451370239,
      "learning_rate": 3.793414732181099e-07,
      "loss": 0.0111,
      "step": 2940120
    },
    {
      "epoch": 4.811603595111381,
      "grad_norm": 0.2892627418041229,
      "learning_rate": 3.7927558100459267e-07,
      "loss": 0.006,
      "step": 2940140
    },
    {
      "epoch": 4.811636325550035,
      "grad_norm": 0.21487364172935486,
      "learning_rate": 3.792096887910756e-07,
      "loss": 0.0131,
      "step": 2940160
    },
    {
      "epoch": 4.8116690559886885,
      "grad_norm": 0.9192444682121277,
      "learning_rate": 3.7914379657755847e-07,
      "loss": 0.0127,
      "step": 2940180
    },
    {
      "epoch": 4.811701786427342,
      "grad_norm": 0.06013013422489166,
      "learning_rate": 3.7907790436404136e-07,
      "loss": 0.0084,
      "step": 2940200
    },
    {
      "epoch": 4.811734516865995,
      "grad_norm": 0.5451342463493347,
      "learning_rate": 3.790120121505242e-07,
      "loss": 0.0064,
      "step": 2940220
    },
    {
      "epoch": 4.811767247304648,
      "grad_norm": 0.11979173868894577,
      "learning_rate": 3.789461199370071e-07,
      "loss": 0.0092,
      "step": 2940240
    },
    {
      "epoch": 4.811799977743302,
      "grad_norm": 0.21849556267261505,
      "learning_rate": 3.7888022772348995e-07,
      "loss": 0.0087,
      "step": 2940260
    },
    {
      "epoch": 4.811832708181955,
      "grad_norm": 0.23717036843299866,
      "learning_rate": 3.7881433550997285e-07,
      "loss": 0.0082,
      "step": 2940280
    },
    {
      "epoch": 4.811865438620608,
      "grad_norm": 0.5784087777137756,
      "learning_rate": 3.787484432964557e-07,
      "loss": 0.0111,
      "step": 2940300
    },
    {
      "epoch": 4.811898169059262,
      "grad_norm": 0.39220061898231506,
      "learning_rate": 3.7868255108293854e-07,
      "loss": 0.0104,
      "step": 2940320
    },
    {
      "epoch": 4.811930899497915,
      "grad_norm": 0.17281927168369293,
      "learning_rate": 3.7861665886942144e-07,
      "loss": 0.0076,
      "step": 2940340
    },
    {
      "epoch": 4.811963629936568,
      "grad_norm": 0.291171669960022,
      "learning_rate": 3.785507666559043e-07,
      "loss": 0.0077,
      "step": 2940360
    },
    {
      "epoch": 4.811996360375222,
      "grad_norm": 0.3514297902584076,
      "learning_rate": 3.784848744423872e-07,
      "loss": 0.0109,
      "step": 2940380
    },
    {
      "epoch": 4.812029090813875,
      "grad_norm": 0.20687390863895416,
      "learning_rate": 3.7841898222887003e-07,
      "loss": 0.0094,
      "step": 2940400
    },
    {
      "epoch": 4.812061821252528,
      "grad_norm": 0.1945321261882782,
      "learning_rate": 3.7835309001535293e-07,
      "loss": 0.0068,
      "step": 2940420
    },
    {
      "epoch": 4.812094551691182,
      "grad_norm": 0.14628057181835175,
      "learning_rate": 3.782871978018358e-07,
      "loss": 0.0057,
      "step": 2940440
    },
    {
      "epoch": 4.812127282129835,
      "grad_norm": 0.08070842176675797,
      "learning_rate": 3.782213055883187e-07,
      "loss": 0.0065,
      "step": 2940460
    },
    {
      "epoch": 4.812160012568489,
      "grad_norm": 0.2614264190196991,
      "learning_rate": 3.781554133748015e-07,
      "loss": 0.0122,
      "step": 2940480
    },
    {
      "epoch": 4.812192743007142,
      "grad_norm": 0.3718982934951782,
      "learning_rate": 3.780895211612844e-07,
      "loss": 0.0061,
      "step": 2940500
    },
    {
      "epoch": 4.812225473445795,
      "grad_norm": 0.09902697801589966,
      "learning_rate": 3.7802362894776726e-07,
      "loss": 0.0093,
      "step": 2940520
    },
    {
      "epoch": 4.812258203884449,
      "grad_norm": 0.03315519541501999,
      "learning_rate": 3.779577367342501e-07,
      "loss": 0.0094,
      "step": 2940540
    },
    {
      "epoch": 4.8122909343231015,
      "grad_norm": 0.303218811750412,
      "learning_rate": 3.77891844520733e-07,
      "loss": 0.0104,
      "step": 2940560
    },
    {
      "epoch": 4.812323664761755,
      "grad_norm": 0.32367902994155884,
      "learning_rate": 3.7782595230721585e-07,
      "loss": 0.0082,
      "step": 2940580
    },
    {
      "epoch": 4.812356395200409,
      "grad_norm": 0.1805872768163681,
      "learning_rate": 3.7776006009369875e-07,
      "loss": 0.0094,
      "step": 2940600
    },
    {
      "epoch": 4.812389125639061,
      "grad_norm": 0.4305148422718048,
      "learning_rate": 3.776941678801816e-07,
      "loss": 0.0092,
      "step": 2940620
    },
    {
      "epoch": 4.812421856077715,
      "grad_norm": 0.33729028701782227,
      "learning_rate": 3.776282756666645e-07,
      "loss": 0.0075,
      "step": 2940640
    },
    {
      "epoch": 4.812454586516369,
      "grad_norm": 0.0963616743683815,
      "learning_rate": 3.7756238345314734e-07,
      "loss": 0.0062,
      "step": 2940660
    },
    {
      "epoch": 4.812487316955022,
      "grad_norm": 0.2313770204782486,
      "learning_rate": 3.7749649123963024e-07,
      "loss": 0.0078,
      "step": 2940680
    },
    {
      "epoch": 4.812520047393675,
      "grad_norm": 0.1087614893913269,
      "learning_rate": 3.774305990261131e-07,
      "loss": 0.0091,
      "step": 2940700
    },
    {
      "epoch": 4.8125527778323285,
      "grad_norm": 0.1807357370853424,
      "learning_rate": 3.7736470681259604e-07,
      "loss": 0.0101,
      "step": 2940720
    },
    {
      "epoch": 4.812585508270982,
      "grad_norm": 0.19380304217338562,
      "learning_rate": 3.7729881459907883e-07,
      "loss": 0.0045,
      "step": 2940740
    },
    {
      "epoch": 4.812618238709636,
      "grad_norm": 0.1884603053331375,
      "learning_rate": 3.772329223855617e-07,
      "loss": 0.0095,
      "step": 2940760
    },
    {
      "epoch": 4.812650969148288,
      "grad_norm": 0.2381746768951416,
      "learning_rate": 3.7716703017204463e-07,
      "loss": 0.0062,
      "step": 2940780
    },
    {
      "epoch": 4.812683699586942,
      "grad_norm": 0.4590415060520172,
      "learning_rate": 3.771011379585274e-07,
      "loss": 0.0071,
      "step": 2940800
    },
    {
      "epoch": 4.812716430025596,
      "grad_norm": 0.21316425502300262,
      "learning_rate": 3.7703524574501037e-07,
      "loss": 0.0078,
      "step": 2940820
    },
    {
      "epoch": 4.812749160464248,
      "grad_norm": 0.18989551067352295,
      "learning_rate": 3.769693535314932e-07,
      "loss": 0.0086,
      "step": 2940840
    },
    {
      "epoch": 4.812781890902902,
      "grad_norm": 0.666608989238739,
      "learning_rate": 3.769034613179761e-07,
      "loss": 0.0068,
      "step": 2940860
    },
    {
      "epoch": 4.8128146213415555,
      "grad_norm": 0.11647534370422363,
      "learning_rate": 3.7683756910445896e-07,
      "loss": 0.0116,
      "step": 2940880
    },
    {
      "epoch": 4.812847351780208,
      "grad_norm": 0.08183619379997253,
      "learning_rate": 3.7677167689094186e-07,
      "loss": 0.0123,
      "step": 2940900
    },
    {
      "epoch": 4.812880082218862,
      "grad_norm": 0.22830268740653992,
      "learning_rate": 3.767057846774247e-07,
      "loss": 0.0103,
      "step": 2940920
    },
    {
      "epoch": 4.812912812657515,
      "grad_norm": 0.07691741734743118,
      "learning_rate": 3.766398924639076e-07,
      "loss": 0.0093,
      "step": 2940940
    },
    {
      "epoch": 4.812945543096169,
      "grad_norm": 0.09844774007797241,
      "learning_rate": 3.7657400025039045e-07,
      "loss": 0.008,
      "step": 2940960
    },
    {
      "epoch": 4.812978273534822,
      "grad_norm": 0.13714365661144257,
      "learning_rate": 3.765081080368733e-07,
      "loss": 0.0054,
      "step": 2940980
    },
    {
      "epoch": 4.813011003973475,
      "grad_norm": 0.13672566413879395,
      "learning_rate": 3.764422158233562e-07,
      "loss": 0.0123,
      "step": 2941000
    },
    {
      "epoch": 4.813043734412129,
      "grad_norm": 0.21794059872627258,
      "learning_rate": 3.7637632360983904e-07,
      "loss": 0.009,
      "step": 2941020
    },
    {
      "epoch": 4.8130764648507816,
      "grad_norm": 0.15533733367919922,
      "learning_rate": 3.7631043139632194e-07,
      "loss": 0.0054,
      "step": 2941040
    },
    {
      "epoch": 4.813109195289435,
      "grad_norm": 0.15202103555202484,
      "learning_rate": 3.762445391828048e-07,
      "loss": 0.0069,
      "step": 2941060
    },
    {
      "epoch": 4.813141925728089,
      "grad_norm": 0.1586187779903412,
      "learning_rate": 3.761786469692877e-07,
      "loss": 0.0106,
      "step": 2941080
    },
    {
      "epoch": 4.813174656166742,
      "grad_norm": 0.16624557971954346,
      "learning_rate": 3.7611275475577053e-07,
      "loss": 0.0085,
      "step": 2941100
    },
    {
      "epoch": 4.813207386605395,
      "grad_norm": 0.11256198585033417,
      "learning_rate": 3.7604686254225343e-07,
      "loss": 0.0131,
      "step": 2941120
    },
    {
      "epoch": 4.813240117044049,
      "grad_norm": 0.21818123757839203,
      "learning_rate": 3.7598097032873627e-07,
      "loss": 0.0091,
      "step": 2941140
    },
    {
      "epoch": 4.813272847482702,
      "grad_norm": 0.23062552511692047,
      "learning_rate": 3.7591507811521917e-07,
      "loss": 0.0086,
      "step": 2941160
    },
    {
      "epoch": 4.813305577921355,
      "grad_norm": 0.20331059396266937,
      "learning_rate": 3.75849185901702e-07,
      "loss": 0.0067,
      "step": 2941180
    },
    {
      "epoch": 4.8133383083600085,
      "grad_norm": 0.2755067050457001,
      "learning_rate": 3.7578329368818486e-07,
      "loss": 0.0083,
      "step": 2941200
    },
    {
      "epoch": 4.813371038798662,
      "grad_norm": 0.7474236488342285,
      "learning_rate": 3.7571740147466776e-07,
      "loss": 0.0088,
      "step": 2941220
    },
    {
      "epoch": 4.813403769237315,
      "grad_norm": 0.318361759185791,
      "learning_rate": 3.756515092611506e-07,
      "loss": 0.0099,
      "step": 2941240
    },
    {
      "epoch": 4.813436499675968,
      "grad_norm": 0.26710182428359985,
      "learning_rate": 3.755856170476335e-07,
      "loss": 0.0074,
      "step": 2941260
    },
    {
      "epoch": 4.813469230114622,
      "grad_norm": 0.3362926244735718,
      "learning_rate": 3.7551972483411635e-07,
      "loss": 0.0109,
      "step": 2941280
    },
    {
      "epoch": 4.813501960553276,
      "grad_norm": 0.1094202995300293,
      "learning_rate": 3.7545383262059925e-07,
      "loss": 0.0104,
      "step": 2941300
    },
    {
      "epoch": 4.813534690991928,
      "grad_norm": 0.9367557764053345,
      "learning_rate": 3.753879404070821e-07,
      "loss": 0.0105,
      "step": 2941320
    },
    {
      "epoch": 4.813567421430582,
      "grad_norm": 0.18537746369838715,
      "learning_rate": 3.75322048193565e-07,
      "loss": 0.0101,
      "step": 2941340
    },
    {
      "epoch": 4.8136001518692355,
      "grad_norm": 0.21285772323608398,
      "learning_rate": 3.7525615598004784e-07,
      "loss": 0.0108,
      "step": 2941360
    },
    {
      "epoch": 4.813632882307889,
      "grad_norm": 0.16841155290603638,
      "learning_rate": 3.751902637665308e-07,
      "loss": 0.0077,
      "step": 2941380
    },
    {
      "epoch": 4.813665612746542,
      "grad_norm": 0.11015556007623672,
      "learning_rate": 3.751243715530136e-07,
      "loss": 0.0072,
      "step": 2941400
    },
    {
      "epoch": 4.813698343185195,
      "grad_norm": 0.21651947498321533,
      "learning_rate": 3.7505847933949643e-07,
      "loss": 0.0077,
      "step": 2941420
    },
    {
      "epoch": 4.813731073623849,
      "grad_norm": 0.3811735510826111,
      "learning_rate": 3.749925871259794e-07,
      "loss": 0.0094,
      "step": 2941440
    },
    {
      "epoch": 4.813763804062502,
      "grad_norm": 0.5819616317749023,
      "learning_rate": 3.749266949124622e-07,
      "loss": 0.0127,
      "step": 2941460
    },
    {
      "epoch": 4.813796534501155,
      "grad_norm": 0.19831594824790955,
      "learning_rate": 3.748608026989451e-07,
      "loss": 0.0128,
      "step": 2941480
    },
    {
      "epoch": 4.813829264939809,
      "grad_norm": 0.07342901825904846,
      "learning_rate": 3.7479491048542797e-07,
      "loss": 0.0081,
      "step": 2941500
    },
    {
      "epoch": 4.813861995378462,
      "grad_norm": 0.1468856930732727,
      "learning_rate": 3.7472901827191087e-07,
      "loss": 0.0081,
      "step": 2941520
    },
    {
      "epoch": 4.813894725817115,
      "grad_norm": 0.35443538427352905,
      "learning_rate": 3.746631260583937e-07,
      "loss": 0.0087,
      "step": 2941540
    },
    {
      "epoch": 4.813927456255769,
      "grad_norm": 0.3971465229988098,
      "learning_rate": 3.745972338448766e-07,
      "loss": 0.009,
      "step": 2941560
    },
    {
      "epoch": 4.813960186694422,
      "grad_norm": 0.6746159195899963,
      "learning_rate": 3.7453134163135946e-07,
      "loss": 0.0079,
      "step": 2941580
    },
    {
      "epoch": 4.813992917133075,
      "grad_norm": 0.1975381076335907,
      "learning_rate": 3.7446544941784236e-07,
      "loss": 0.0115,
      "step": 2941600
    },
    {
      "epoch": 4.814025647571729,
      "grad_norm": 0.35258543491363525,
      "learning_rate": 3.743995572043252e-07,
      "loss": 0.0095,
      "step": 2941620
    },
    {
      "epoch": 4.814058378010382,
      "grad_norm": 0.3776912987232208,
      "learning_rate": 3.7433366499080805e-07,
      "loss": 0.0113,
      "step": 2941640
    },
    {
      "epoch": 4.814091108449036,
      "grad_norm": 0.29046931862831116,
      "learning_rate": 3.7426777277729095e-07,
      "loss": 0.0092,
      "step": 2941660
    },
    {
      "epoch": 4.814123838887689,
      "grad_norm": 0.07686648517847061,
      "learning_rate": 3.742018805637738e-07,
      "loss": 0.0077,
      "step": 2941680
    },
    {
      "epoch": 4.814156569326342,
      "grad_norm": 0.16788692772388458,
      "learning_rate": 3.741359883502567e-07,
      "loss": 0.009,
      "step": 2941700
    },
    {
      "epoch": 4.814189299764996,
      "grad_norm": 0.16183407604694366,
      "learning_rate": 3.7407009613673954e-07,
      "loss": 0.0102,
      "step": 2941720
    },
    {
      "epoch": 4.8142220302036485,
      "grad_norm": 0.16632753610610962,
      "learning_rate": 3.7400420392322244e-07,
      "loss": 0.0141,
      "step": 2941740
    },
    {
      "epoch": 4.814254760642302,
      "grad_norm": 0.3706085979938507,
      "learning_rate": 3.739383117097053e-07,
      "loss": 0.0117,
      "step": 2941760
    },
    {
      "epoch": 4.814287491080956,
      "grad_norm": 0.12224315851926804,
      "learning_rate": 3.738724194961882e-07,
      "loss": 0.0075,
      "step": 2941780
    },
    {
      "epoch": 4.814320221519608,
      "grad_norm": 0.2912810146808624,
      "learning_rate": 3.73806527282671e-07,
      "loss": 0.0079,
      "step": 2941800
    },
    {
      "epoch": 4.814352951958262,
      "grad_norm": 0.24669405817985535,
      "learning_rate": 3.737406350691539e-07,
      "loss": 0.0087,
      "step": 2941820
    },
    {
      "epoch": 4.8143856823969156,
      "grad_norm": 0.0906059667468071,
      "learning_rate": 3.7367474285563677e-07,
      "loss": 0.0077,
      "step": 2941840
    },
    {
      "epoch": 4.814418412835569,
      "grad_norm": 0.1691335290670395,
      "learning_rate": 3.7360885064211967e-07,
      "loss": 0.0085,
      "step": 2941860
    },
    {
      "epoch": 4.814451143274222,
      "grad_norm": 0.2520807981491089,
      "learning_rate": 3.735429584286025e-07,
      "loss": 0.0099,
      "step": 2941880
    },
    {
      "epoch": 4.8144838737128755,
      "grad_norm": 0.21029812097549438,
      "learning_rate": 3.7347706621508536e-07,
      "loss": 0.0094,
      "step": 2941900
    },
    {
      "epoch": 4.814516604151529,
      "grad_norm": 0.12009173631668091,
      "learning_rate": 3.7341117400156826e-07,
      "loss": 0.0098,
      "step": 2941920
    },
    {
      "epoch": 4.814549334590183,
      "grad_norm": 0.08079250901937485,
      "learning_rate": 3.733452817880511e-07,
      "loss": 0.0068,
      "step": 2941940
    },
    {
      "epoch": 4.814582065028835,
      "grad_norm": 0.20068390667438507,
      "learning_rate": 3.73279389574534e-07,
      "loss": 0.0094,
      "step": 2941960
    },
    {
      "epoch": 4.814614795467489,
      "grad_norm": 0.2578514516353607,
      "learning_rate": 3.7321349736101685e-07,
      "loss": 0.0069,
      "step": 2941980
    },
    {
      "epoch": 4.8146475259061425,
      "grad_norm": 0.26328667998313904,
      "learning_rate": 3.7314760514749975e-07,
      "loss": 0.0084,
      "step": 2942000
    },
    {
      "epoch": 4.814680256344795,
      "grad_norm": 0.1299738734960556,
      "learning_rate": 3.730817129339826e-07,
      "loss": 0.0083,
      "step": 2942020
    },
    {
      "epoch": 4.814712986783449,
      "grad_norm": 0.2792460024356842,
      "learning_rate": 3.7301582072046554e-07,
      "loss": 0.005,
      "step": 2942040
    },
    {
      "epoch": 4.814745717222102,
      "grad_norm": 0.5450021028518677,
      "learning_rate": 3.7294992850694834e-07,
      "loss": 0.0078,
      "step": 2942060
    },
    {
      "epoch": 4.814778447660755,
      "grad_norm": 0.18314911425113678,
      "learning_rate": 3.728840362934313e-07,
      "loss": 0.0057,
      "step": 2942080
    },
    {
      "epoch": 4.814811178099409,
      "grad_norm": 0.13894589245319366,
      "learning_rate": 3.7281814407991413e-07,
      "loss": 0.0062,
      "step": 2942100
    },
    {
      "epoch": 4.814843908538062,
      "grad_norm": 0.020586874336004257,
      "learning_rate": 3.7275225186639693e-07,
      "loss": 0.0095,
      "step": 2942120
    },
    {
      "epoch": 4.814876638976716,
      "grad_norm": 0.2935928702354431,
      "learning_rate": 3.726863596528799e-07,
      "loss": 0.0102,
      "step": 2942140
    },
    {
      "epoch": 4.814909369415369,
      "grad_norm": 0.12849141657352448,
      "learning_rate": 3.726204674393627e-07,
      "loss": 0.0112,
      "step": 2942160
    },
    {
      "epoch": 4.814942099854022,
      "grad_norm": 0.0815609022974968,
      "learning_rate": 3.725545752258456e-07,
      "loss": 0.0093,
      "step": 2942180
    },
    {
      "epoch": 4.814974830292676,
      "grad_norm": 0.18688927590847015,
      "learning_rate": 3.7248868301232847e-07,
      "loss": 0.0124,
      "step": 2942200
    },
    {
      "epoch": 4.815007560731329,
      "grad_norm": 0.2465154230594635,
      "learning_rate": 3.7242279079881137e-07,
      "loss": 0.0086,
      "step": 2942220
    },
    {
      "epoch": 4.815040291169982,
      "grad_norm": 0.08405815809965134,
      "learning_rate": 3.723568985852942e-07,
      "loss": 0.007,
      "step": 2942240
    },
    {
      "epoch": 4.815073021608636,
      "grad_norm": 0.28855034708976746,
      "learning_rate": 3.722910063717771e-07,
      "loss": 0.0081,
      "step": 2942260
    },
    {
      "epoch": 4.815105752047289,
      "grad_norm": 0.03202744573354721,
      "learning_rate": 3.7222511415825996e-07,
      "loss": 0.0044,
      "step": 2942280
    },
    {
      "epoch": 4.815138482485942,
      "grad_norm": 0.10014121979475021,
      "learning_rate": 3.7215922194474285e-07,
      "loss": 0.0119,
      "step": 2942300
    },
    {
      "epoch": 4.815171212924596,
      "grad_norm": 0.3752635717391968,
      "learning_rate": 3.720933297312257e-07,
      "loss": 0.0103,
      "step": 2942320
    },
    {
      "epoch": 4.815203943363249,
      "grad_norm": 0.3129575550556183,
      "learning_rate": 3.7202743751770855e-07,
      "loss": 0.0054,
      "step": 2942340
    },
    {
      "epoch": 4.815236673801902,
      "grad_norm": 0.1324184536933899,
      "learning_rate": 3.7196154530419144e-07,
      "loss": 0.0059,
      "step": 2942360
    },
    {
      "epoch": 4.8152694042405555,
      "grad_norm": 0.16160733997821808,
      "learning_rate": 3.718956530906743e-07,
      "loss": 0.0114,
      "step": 2942380
    },
    {
      "epoch": 4.815302134679209,
      "grad_norm": 0.15717843174934387,
      "learning_rate": 3.718297608771572e-07,
      "loss": 0.0063,
      "step": 2942400
    },
    {
      "epoch": 4.815334865117863,
      "grad_norm": 0.25200632214546204,
      "learning_rate": 3.7176386866364003e-07,
      "loss": 0.0099,
      "step": 2942420
    },
    {
      "epoch": 4.815367595556515,
      "grad_norm": 0.08089805394411087,
      "learning_rate": 3.7169797645012293e-07,
      "loss": 0.0098,
      "step": 2942440
    },
    {
      "epoch": 4.815400325995169,
      "grad_norm": 0.43002232909202576,
      "learning_rate": 3.716320842366058e-07,
      "loss": 0.0145,
      "step": 2942460
    },
    {
      "epoch": 4.815433056433823,
      "grad_norm": 0.18349778652191162,
      "learning_rate": 3.715661920230887e-07,
      "loss": 0.008,
      "step": 2942480
    },
    {
      "epoch": 4.815465786872475,
      "grad_norm": 0.24889829754829407,
      "learning_rate": 3.715002998095715e-07,
      "loss": 0.0103,
      "step": 2942500
    },
    {
      "epoch": 4.815498517311129,
      "grad_norm": 0.21392761170864105,
      "learning_rate": 3.714344075960544e-07,
      "loss": 0.0116,
      "step": 2942520
    },
    {
      "epoch": 4.8155312477497825,
      "grad_norm": 0.21665625274181366,
      "learning_rate": 3.7136851538253727e-07,
      "loss": 0.009,
      "step": 2942540
    },
    {
      "epoch": 4.815563978188436,
      "grad_norm": 0.20695337653160095,
      "learning_rate": 3.713026231690201e-07,
      "loss": 0.0097,
      "step": 2942560
    },
    {
      "epoch": 4.815596708627089,
      "grad_norm": 0.15451353788375854,
      "learning_rate": 3.71236730955503e-07,
      "loss": 0.0084,
      "step": 2942580
    },
    {
      "epoch": 4.815629439065742,
      "grad_norm": 0.12143919616937637,
      "learning_rate": 3.7117083874198586e-07,
      "loss": 0.0113,
      "step": 2942600
    },
    {
      "epoch": 4.815662169504396,
      "grad_norm": 0.2648110091686249,
      "learning_rate": 3.7110494652846876e-07,
      "loss": 0.0164,
      "step": 2942620
    },
    {
      "epoch": 4.815694899943049,
      "grad_norm": 0.2126757949590683,
      "learning_rate": 3.710390543149516e-07,
      "loss": 0.008,
      "step": 2942640
    },
    {
      "epoch": 4.815727630381702,
      "grad_norm": 0.18092338740825653,
      "learning_rate": 3.709731621014345e-07,
      "loss": 0.0094,
      "step": 2942660
    },
    {
      "epoch": 4.815760360820356,
      "grad_norm": 0.1554567515850067,
      "learning_rate": 3.7090726988791735e-07,
      "loss": 0.0071,
      "step": 2942680
    },
    {
      "epoch": 4.815793091259009,
      "grad_norm": 0.3436216115951538,
      "learning_rate": 3.708413776744003e-07,
      "loss": 0.0085,
      "step": 2942700
    },
    {
      "epoch": 4.815825821697662,
      "grad_norm": 0.2792770266532898,
      "learning_rate": 3.707754854608831e-07,
      "loss": 0.0112,
      "step": 2942720
    },
    {
      "epoch": 4.815858552136316,
      "grad_norm": 0.19438689947128296,
      "learning_rate": 3.7070959324736604e-07,
      "loss": 0.0071,
      "step": 2942740
    },
    {
      "epoch": 4.815891282574969,
      "grad_norm": 1.0437439680099487,
      "learning_rate": 3.706437010338489e-07,
      "loss": 0.0134,
      "step": 2942760
    },
    {
      "epoch": 4.815924013013622,
      "grad_norm": 0.14361152052879333,
      "learning_rate": 3.705778088203317e-07,
      "loss": 0.01,
      "step": 2942780
    },
    {
      "epoch": 4.815956743452276,
      "grad_norm": 0.26808983087539673,
      "learning_rate": 3.7051191660681463e-07,
      "loss": 0.0066,
      "step": 2942800
    },
    {
      "epoch": 4.815989473890929,
      "grad_norm": 0.1116304025053978,
      "learning_rate": 3.704460243932975e-07,
      "loss": 0.0088,
      "step": 2942820
    },
    {
      "epoch": 4.816022204329583,
      "grad_norm": 0.5347395539283752,
      "learning_rate": 3.703801321797804e-07,
      "loss": 0.0106,
      "step": 2942840
    },
    {
      "epoch": 4.8160549347682355,
      "grad_norm": 0.29859164357185364,
      "learning_rate": 3.703142399662632e-07,
      "loss": 0.0071,
      "step": 2942860
    },
    {
      "epoch": 4.816087665206889,
      "grad_norm": 0.2587645947933197,
      "learning_rate": 3.702483477527461e-07,
      "loss": 0.0059,
      "step": 2942880
    },
    {
      "epoch": 4.816120395645543,
      "grad_norm": 0.1816222220659256,
      "learning_rate": 3.7018245553922896e-07,
      "loss": 0.0069,
      "step": 2942900
    },
    {
      "epoch": 4.816153126084195,
      "grad_norm": 0.7699034214019775,
      "learning_rate": 3.7011656332571186e-07,
      "loss": 0.0085,
      "step": 2942920
    },
    {
      "epoch": 4.816185856522849,
      "grad_norm": 0.4267028272151947,
      "learning_rate": 3.700506711121947e-07,
      "loss": 0.0098,
      "step": 2942940
    },
    {
      "epoch": 4.816218586961503,
      "grad_norm": 0.09735938161611557,
      "learning_rate": 3.699847788986776e-07,
      "loss": 0.0103,
      "step": 2942960
    },
    {
      "epoch": 4.816251317400155,
      "grad_norm": 0.6868951320648193,
      "learning_rate": 3.6991888668516045e-07,
      "loss": 0.0094,
      "step": 2942980
    },
    {
      "epoch": 4.816284047838809,
      "grad_norm": 0.12123966217041016,
      "learning_rate": 3.698529944716433e-07,
      "loss": 0.0077,
      "step": 2943000
    },
    {
      "epoch": 4.8163167782774625,
      "grad_norm": 0.265471875667572,
      "learning_rate": 3.697871022581262e-07,
      "loss": 0.0078,
      "step": 2943020
    },
    {
      "epoch": 4.816349508716116,
      "grad_norm": 0.06715607643127441,
      "learning_rate": 3.6972121004460904e-07,
      "loss": 0.0056,
      "step": 2943040
    },
    {
      "epoch": 4.816382239154769,
      "grad_norm": 0.2725849151611328,
      "learning_rate": 3.6965531783109194e-07,
      "loss": 0.0056,
      "step": 2943060
    },
    {
      "epoch": 4.816414969593422,
      "grad_norm": 0.15698784589767456,
      "learning_rate": 3.695894256175748e-07,
      "loss": 0.0053,
      "step": 2943080
    },
    {
      "epoch": 4.816447700032076,
      "grad_norm": 0.1363879293203354,
      "learning_rate": 3.695235334040577e-07,
      "loss": 0.0074,
      "step": 2943100
    },
    {
      "epoch": 4.81648043047073,
      "grad_norm": 0.1926809549331665,
      "learning_rate": 3.6945764119054053e-07,
      "loss": 0.0096,
      "step": 2943120
    },
    {
      "epoch": 4.816513160909382,
      "grad_norm": 0.1635306179523468,
      "learning_rate": 3.6939174897702343e-07,
      "loss": 0.0099,
      "step": 2943140
    },
    {
      "epoch": 4.816545891348036,
      "grad_norm": 0.14369456470012665,
      "learning_rate": 3.693258567635063e-07,
      "loss": 0.0115,
      "step": 2943160
    },
    {
      "epoch": 4.8165786217866895,
      "grad_norm": 0.3581548035144806,
      "learning_rate": 3.692599645499892e-07,
      "loss": 0.0099,
      "step": 2943180
    },
    {
      "epoch": 4.816611352225342,
      "grad_norm": 0.09086707979440689,
      "learning_rate": 3.69194072336472e-07,
      "loss": 0.0086,
      "step": 2943200
    },
    {
      "epoch": 4.816644082663996,
      "grad_norm": 0.3129216134548187,
      "learning_rate": 3.6912818012295487e-07,
      "loss": 0.0107,
      "step": 2943220
    },
    {
      "epoch": 4.816676813102649,
      "grad_norm": 0.48108649253845215,
      "learning_rate": 3.6906228790943776e-07,
      "loss": 0.0081,
      "step": 2943240
    },
    {
      "epoch": 4.816709543541302,
      "grad_norm": 0.11174385249614716,
      "learning_rate": 3.689963956959206e-07,
      "loss": 0.0117,
      "step": 2943260
    },
    {
      "epoch": 4.816742273979956,
      "grad_norm": 0.1438000202178955,
      "learning_rate": 3.689305034824035e-07,
      "loss": 0.0081,
      "step": 2943280
    },
    {
      "epoch": 4.816775004418609,
      "grad_norm": 0.37548118829727173,
      "learning_rate": 3.6886461126888635e-07,
      "loss": 0.0072,
      "step": 2943300
    },
    {
      "epoch": 4.816807734857263,
      "grad_norm": 0.1976553052663803,
      "learning_rate": 3.6879871905536925e-07,
      "loss": 0.0082,
      "step": 2943320
    },
    {
      "epoch": 4.816840465295916,
      "grad_norm": 0.23441486060619354,
      "learning_rate": 3.687328268418521e-07,
      "loss": 0.0105,
      "step": 2943340
    },
    {
      "epoch": 4.816873195734569,
      "grad_norm": 0.4556220769882202,
      "learning_rate": 3.6866693462833505e-07,
      "loss": 0.0132,
      "step": 2943360
    },
    {
      "epoch": 4.816905926173223,
      "grad_norm": 1.088521122932434,
      "learning_rate": 3.6860104241481784e-07,
      "loss": 0.0079,
      "step": 2943380
    },
    {
      "epoch": 4.816938656611876,
      "grad_norm": 0.0631309300661087,
      "learning_rate": 3.685351502013008e-07,
      "loss": 0.0062,
      "step": 2943400
    },
    {
      "epoch": 4.816971387050529,
      "grad_norm": 0.7466771602630615,
      "learning_rate": 3.6846925798778364e-07,
      "loss": 0.0065,
      "step": 2943420
    },
    {
      "epoch": 4.817004117489183,
      "grad_norm": 0.05430157855153084,
      "learning_rate": 3.6840336577426643e-07,
      "loss": 0.0079,
      "step": 2943440
    },
    {
      "epoch": 4.817036847927836,
      "grad_norm": 0.15275856852531433,
      "learning_rate": 3.683374735607494e-07,
      "loss": 0.0052,
      "step": 2943460
    },
    {
      "epoch": 4.817069578366489,
      "grad_norm": 0.20830172300338745,
      "learning_rate": 3.6827158134723223e-07,
      "loss": 0.0102,
      "step": 2943480
    },
    {
      "epoch": 4.817102308805143,
      "grad_norm": 0.08162366598844528,
      "learning_rate": 3.6820568913371513e-07,
      "loss": 0.0076,
      "step": 2943500
    },
    {
      "epoch": 4.817135039243796,
      "grad_norm": 0.37157177925109863,
      "learning_rate": 3.6813979692019797e-07,
      "loss": 0.0074,
      "step": 2943520
    },
    {
      "epoch": 4.817167769682449,
      "grad_norm": 0.32252007722854614,
      "learning_rate": 3.6807390470668087e-07,
      "loss": 0.0088,
      "step": 2943540
    },
    {
      "epoch": 4.8172005001211025,
      "grad_norm": 0.14372879266738892,
      "learning_rate": 3.680080124931637e-07,
      "loss": 0.0098,
      "step": 2943560
    },
    {
      "epoch": 4.817233230559756,
      "grad_norm": 0.2861858308315277,
      "learning_rate": 3.679421202796466e-07,
      "loss": 0.0073,
      "step": 2943580
    },
    {
      "epoch": 4.81726596099841,
      "grad_norm": 0.12084071338176727,
      "learning_rate": 3.6787622806612946e-07,
      "loss": 0.0102,
      "step": 2943600
    },
    {
      "epoch": 4.817298691437062,
      "grad_norm": 0.08017268031835556,
      "learning_rate": 3.6781033585261236e-07,
      "loss": 0.011,
      "step": 2943620
    },
    {
      "epoch": 4.817331421875716,
      "grad_norm": 0.2689592242240906,
      "learning_rate": 3.677444436390952e-07,
      "loss": 0.0081,
      "step": 2943640
    },
    {
      "epoch": 4.8173641523143695,
      "grad_norm": 0.23922400176525116,
      "learning_rate": 3.6767855142557805e-07,
      "loss": 0.0082,
      "step": 2943660
    },
    {
      "epoch": 4.817396882753023,
      "grad_norm": 0.25721508264541626,
      "learning_rate": 3.6761265921206095e-07,
      "loss": 0.0074,
      "step": 2943680
    },
    {
      "epoch": 4.817429613191676,
      "grad_norm": 0.48068591952323914,
      "learning_rate": 3.675467669985438e-07,
      "loss": 0.0117,
      "step": 2943700
    },
    {
      "epoch": 4.817462343630329,
      "grad_norm": 0.23415572941303253,
      "learning_rate": 3.674808747850267e-07,
      "loss": 0.0045,
      "step": 2943720
    },
    {
      "epoch": 4.817495074068983,
      "grad_norm": 0.2806638181209564,
      "learning_rate": 3.6741498257150954e-07,
      "loss": 0.0066,
      "step": 2943740
    },
    {
      "epoch": 4.817527804507636,
      "grad_norm": 0.29350659251213074,
      "learning_rate": 3.6734909035799244e-07,
      "loss": 0.0101,
      "step": 2943760
    },
    {
      "epoch": 4.817560534946289,
      "grad_norm": Infinity,
      "learning_rate": 3.672831981444753e-07,
      "loss": 0.0162,
      "step": 2943780
    },
    {
      "epoch": 4.817593265384943,
      "grad_norm": 0.12046915292739868,
      "learning_rate": 3.672173059309582e-07,
      "loss": 0.0101,
      "step": 2943800
    },
    {
      "epoch": 4.817625995823596,
      "grad_norm": 0.42232581973075867,
      "learning_rate": 3.6715141371744103e-07,
      "loss": 0.0108,
      "step": 2943820
    },
    {
      "epoch": 4.817658726262249,
      "grad_norm": 0.4997991919517517,
      "learning_rate": 3.6708552150392393e-07,
      "loss": 0.0092,
      "step": 2943840
    },
    {
      "epoch": 4.817691456700903,
      "grad_norm": 0.18270589411258698,
      "learning_rate": 3.6701962929040677e-07,
      "loss": 0.0065,
      "step": 2943860
    },
    {
      "epoch": 4.817724187139556,
      "grad_norm": 0.22971147298812866,
      "learning_rate": 3.669537370768896e-07,
      "loss": 0.0059,
      "step": 2943880
    },
    {
      "epoch": 4.817756917578209,
      "grad_norm": 0.20032382011413574,
      "learning_rate": 3.668878448633725e-07,
      "loss": 0.0122,
      "step": 2943900
    },
    {
      "epoch": 4.817789648016863,
      "grad_norm": 0.5004416108131409,
      "learning_rate": 3.6682195264985536e-07,
      "loss": 0.008,
      "step": 2943920
    },
    {
      "epoch": 4.817822378455516,
      "grad_norm": 0.22144357860088348,
      "learning_rate": 3.6675606043633826e-07,
      "loss": 0.0117,
      "step": 2943940
    },
    {
      "epoch": 4.817855108894169,
      "grad_norm": 0.15781371295452118,
      "learning_rate": 3.666901682228211e-07,
      "loss": 0.0081,
      "step": 2943960
    },
    {
      "epoch": 4.817887839332823,
      "grad_norm": 0.07618115097284317,
      "learning_rate": 3.66624276009304e-07,
      "loss": 0.0133,
      "step": 2943980
    },
    {
      "epoch": 4.817920569771476,
      "grad_norm": 0.6292930245399475,
      "learning_rate": 3.6655838379578685e-07,
      "loss": 0.0234,
      "step": 2944000
    },
    {
      "epoch": 4.81795330021013,
      "grad_norm": 0.11909734457731247,
      "learning_rate": 3.664924915822698e-07,
      "loss": 0.0084,
      "step": 2944020
    },
    {
      "epoch": 4.8179860306487825,
      "grad_norm": 0.12590055167675018,
      "learning_rate": 3.664265993687526e-07,
      "loss": 0.0064,
      "step": 2944040
    },
    {
      "epoch": 4.818018761087436,
      "grad_norm": 0.08420601487159729,
      "learning_rate": 3.6636070715523555e-07,
      "loss": 0.0113,
      "step": 2944060
    },
    {
      "epoch": 4.81805149152609,
      "grad_norm": 0.04057265445590019,
      "learning_rate": 3.662948149417184e-07,
      "loss": 0.0135,
      "step": 2944080
    },
    {
      "epoch": 4.818084221964742,
      "grad_norm": 0.41425222158432007,
      "learning_rate": 3.662289227282012e-07,
      "loss": 0.0097,
      "step": 2944100
    },
    {
      "epoch": 4.818116952403396,
      "grad_norm": 0.4701859951019287,
      "learning_rate": 3.6616303051468414e-07,
      "loss": 0.0116,
      "step": 2944120
    },
    {
      "epoch": 4.81814968284205,
      "grad_norm": 0.06706009805202484,
      "learning_rate": 3.66097138301167e-07,
      "loss": 0.0082,
      "step": 2944140
    },
    {
      "epoch": 4.818182413280702,
      "grad_norm": 0.03827277198433876,
      "learning_rate": 3.660312460876499e-07,
      "loss": 0.0094,
      "step": 2944160
    },
    {
      "epoch": 4.818215143719356,
      "grad_norm": 0.413639098405838,
      "learning_rate": 3.659653538741327e-07,
      "loss": 0.0101,
      "step": 2944180
    },
    {
      "epoch": 4.8182478741580095,
      "grad_norm": 0.13325625658035278,
      "learning_rate": 3.658994616606156e-07,
      "loss": 0.0093,
      "step": 2944200
    },
    {
      "epoch": 4.818280604596663,
      "grad_norm": 0.6269598007202148,
      "learning_rate": 3.6583356944709847e-07,
      "loss": 0.0063,
      "step": 2944220
    },
    {
      "epoch": 4.818313335035316,
      "grad_norm": 0.03461875766515732,
      "learning_rate": 3.6576767723358137e-07,
      "loss": 0.0069,
      "step": 2944240
    },
    {
      "epoch": 4.818346065473969,
      "grad_norm": 0.25486883521080017,
      "learning_rate": 3.657017850200642e-07,
      "loss": 0.0089,
      "step": 2944260
    },
    {
      "epoch": 4.818378795912623,
      "grad_norm": 0.4037541151046753,
      "learning_rate": 3.656358928065471e-07,
      "loss": 0.012,
      "step": 2944280
    },
    {
      "epoch": 4.818411526351277,
      "grad_norm": 0.24027806520462036,
      "learning_rate": 3.6557000059302996e-07,
      "loss": 0.0072,
      "step": 2944300
    },
    {
      "epoch": 4.818444256789929,
      "grad_norm": 0.21845488250255585,
      "learning_rate": 3.655041083795128e-07,
      "loss": 0.0108,
      "step": 2944320
    },
    {
      "epoch": 4.818476987228583,
      "grad_norm": 0.42386358976364136,
      "learning_rate": 3.654382161659957e-07,
      "loss": 0.0085,
      "step": 2944340
    },
    {
      "epoch": 4.8185097176672365,
      "grad_norm": 0.45061537623405457,
      "learning_rate": 3.6537232395247855e-07,
      "loss": 0.0076,
      "step": 2944360
    },
    {
      "epoch": 4.818542448105889,
      "grad_norm": 0.5414597392082214,
      "learning_rate": 3.6530643173896145e-07,
      "loss": 0.0107,
      "step": 2944380
    },
    {
      "epoch": 4.818575178544543,
      "grad_norm": 0.18857093155384064,
      "learning_rate": 3.652405395254443e-07,
      "loss": 0.0097,
      "step": 2944400
    },
    {
      "epoch": 4.818607908983196,
      "grad_norm": 0.23311112821102142,
      "learning_rate": 3.651746473119272e-07,
      "loss": 0.0086,
      "step": 2944420
    },
    {
      "epoch": 4.818640639421849,
      "grad_norm": 0.1430414617061615,
      "learning_rate": 3.6510875509841004e-07,
      "loss": 0.0092,
      "step": 2944440
    },
    {
      "epoch": 4.818673369860503,
      "grad_norm": 0.12532086670398712,
      "learning_rate": 3.6504286288489294e-07,
      "loss": 0.0072,
      "step": 2944460
    },
    {
      "epoch": 4.818706100299156,
      "grad_norm": 0.18275459110736847,
      "learning_rate": 3.649769706713758e-07,
      "loss": 0.0118,
      "step": 2944480
    },
    {
      "epoch": 4.81873883073781,
      "grad_norm": 0.20747360587120056,
      "learning_rate": 3.649110784578587e-07,
      "loss": 0.008,
      "step": 2944500
    },
    {
      "epoch": 4.8187715611764625,
      "grad_norm": 0.1801379770040512,
      "learning_rate": 3.648451862443415e-07,
      "loss": 0.0068,
      "step": 2944520
    },
    {
      "epoch": 4.818804291615116,
      "grad_norm": 0.23334628343582153,
      "learning_rate": 3.6477929403082437e-07,
      "loss": 0.0161,
      "step": 2944540
    },
    {
      "epoch": 4.81883702205377,
      "grad_norm": 0.6483046412467957,
      "learning_rate": 3.6471340181730727e-07,
      "loss": 0.014,
      "step": 2944560
    },
    {
      "epoch": 4.818869752492423,
      "grad_norm": 0.2271505743265152,
      "learning_rate": 3.646475096037901e-07,
      "loss": 0.0077,
      "step": 2944580
    },
    {
      "epoch": 4.818902482931076,
      "grad_norm": 0.12377816438674927,
      "learning_rate": 3.64581617390273e-07,
      "loss": 0.0083,
      "step": 2944600
    },
    {
      "epoch": 4.81893521336973,
      "grad_norm": 0.17506492137908936,
      "learning_rate": 3.6451572517675586e-07,
      "loss": 0.0074,
      "step": 2944620
    },
    {
      "epoch": 4.818967943808383,
      "grad_norm": 0.028063273057341576,
      "learning_rate": 3.6444983296323876e-07,
      "loss": 0.0104,
      "step": 2944640
    },
    {
      "epoch": 4.819000674247036,
      "grad_norm": 0.1165187656879425,
      "learning_rate": 3.643839407497216e-07,
      "loss": 0.0057,
      "step": 2944660
    },
    {
      "epoch": 4.8190334046856895,
      "grad_norm": 0.22690320014953613,
      "learning_rate": 3.6431804853620455e-07,
      "loss": 0.0089,
      "step": 2944680
    },
    {
      "epoch": 4.819066135124343,
      "grad_norm": 0.19084961712360382,
      "learning_rate": 3.6425215632268735e-07,
      "loss": 0.0118,
      "step": 2944700
    },
    {
      "epoch": 4.819098865562996,
      "grad_norm": 0.18240895867347717,
      "learning_rate": 3.641862641091703e-07,
      "loss": 0.0077,
      "step": 2944720
    },
    {
      "epoch": 4.819131596001649,
      "grad_norm": 0.0889265164732933,
      "learning_rate": 3.6412037189565314e-07,
      "loss": 0.0087,
      "step": 2944740
    },
    {
      "epoch": 4.819164326440303,
      "grad_norm": 0.3099649250507355,
      "learning_rate": 3.6405447968213594e-07,
      "loss": 0.0084,
      "step": 2944760
    },
    {
      "epoch": 4.819197056878957,
      "grad_norm": 0.3835918605327606,
      "learning_rate": 3.639885874686189e-07,
      "loss": 0.0092,
      "step": 2944780
    },
    {
      "epoch": 4.819229787317609,
      "grad_norm": 0.25236231088638306,
      "learning_rate": 3.6392269525510173e-07,
      "loss": 0.0073,
      "step": 2944800
    },
    {
      "epoch": 4.819262517756263,
      "grad_norm": 0.16384881734848022,
      "learning_rate": 3.6385680304158463e-07,
      "loss": 0.0082,
      "step": 2944820
    },
    {
      "epoch": 4.8192952481949165,
      "grad_norm": 0.2970975339412689,
      "learning_rate": 3.637909108280675e-07,
      "loss": 0.0129,
      "step": 2944840
    },
    {
      "epoch": 4.81932797863357,
      "grad_norm": 0.20380201935768127,
      "learning_rate": 3.637250186145504e-07,
      "loss": 0.0071,
      "step": 2944860
    },
    {
      "epoch": 4.819360709072223,
      "grad_norm": 0.21406176686286926,
      "learning_rate": 3.636591264010332e-07,
      "loss": 0.0069,
      "step": 2944880
    },
    {
      "epoch": 4.819393439510876,
      "grad_norm": 0.26595816016197205,
      "learning_rate": 3.635932341875161e-07,
      "loss": 0.0101,
      "step": 2944900
    },
    {
      "epoch": 4.81942616994953,
      "grad_norm": 0.116258405148983,
      "learning_rate": 3.6352734197399897e-07,
      "loss": 0.0082,
      "step": 2944920
    },
    {
      "epoch": 4.819458900388183,
      "grad_norm": 0.12471572309732437,
      "learning_rate": 3.6346144976048187e-07,
      "loss": 0.0127,
      "step": 2944940
    },
    {
      "epoch": 4.819491630826836,
      "grad_norm": 0.17611493170261383,
      "learning_rate": 3.633955575469647e-07,
      "loss": 0.0081,
      "step": 2944960
    },
    {
      "epoch": 4.81952436126549,
      "grad_norm": 0.15269897878170013,
      "learning_rate": 3.6332966533344756e-07,
      "loss": 0.0097,
      "step": 2944980
    },
    {
      "epoch": 4.819557091704143,
      "grad_norm": 0.2131667137145996,
      "learning_rate": 3.6326377311993046e-07,
      "loss": 0.0115,
      "step": 2945000
    },
    {
      "epoch": 4.819589822142796,
      "grad_norm": 0.3639662563800812,
      "learning_rate": 3.631978809064133e-07,
      "loss": 0.0108,
      "step": 2945020
    },
    {
      "epoch": 4.81962255258145,
      "grad_norm": 0.22297987341880798,
      "learning_rate": 3.631319886928962e-07,
      "loss": 0.0106,
      "step": 2945040
    },
    {
      "epoch": 4.819655283020103,
      "grad_norm": 0.19006024301052094,
      "learning_rate": 3.6306609647937905e-07,
      "loss": 0.0072,
      "step": 2945060
    },
    {
      "epoch": 4.819688013458756,
      "grad_norm": 0.18022486567497253,
      "learning_rate": 3.6300020426586194e-07,
      "loss": 0.0064,
      "step": 2945080
    },
    {
      "epoch": 4.81972074389741,
      "grad_norm": 0.3885089159011841,
      "learning_rate": 3.629343120523448e-07,
      "loss": 0.0102,
      "step": 2945100
    },
    {
      "epoch": 4.819753474336063,
      "grad_norm": 0.2857125997543335,
      "learning_rate": 3.628684198388277e-07,
      "loss": 0.0105,
      "step": 2945120
    },
    {
      "epoch": 4.819786204774717,
      "grad_norm": 0.4221154749393463,
      "learning_rate": 3.6280252762531053e-07,
      "loss": 0.0109,
      "step": 2945140
    },
    {
      "epoch": 4.81981893521337,
      "grad_norm": 0.3069709241390228,
      "learning_rate": 3.6273663541179343e-07,
      "loss": 0.0113,
      "step": 2945160
    },
    {
      "epoch": 4.819851665652023,
      "grad_norm": 0.23100751638412476,
      "learning_rate": 3.626707431982763e-07,
      "loss": 0.0101,
      "step": 2945180
    },
    {
      "epoch": 4.819884396090677,
      "grad_norm": 0.2697038948535919,
      "learning_rate": 3.626048509847591e-07,
      "loss": 0.0062,
      "step": 2945200
    },
    {
      "epoch": 4.8199171265293295,
      "grad_norm": 0.4727693796157837,
      "learning_rate": 3.62538958771242e-07,
      "loss": 0.0092,
      "step": 2945220
    },
    {
      "epoch": 4.819949856967983,
      "grad_norm": 0.5741121768951416,
      "learning_rate": 3.6247306655772487e-07,
      "loss": 0.0115,
      "step": 2945240
    },
    {
      "epoch": 4.819982587406637,
      "grad_norm": 0.18023793399333954,
      "learning_rate": 3.6240717434420777e-07,
      "loss": 0.0116,
      "step": 2945260
    },
    {
      "epoch": 4.820015317845289,
      "grad_norm": 0.07637850940227509,
      "learning_rate": 3.623412821306906e-07,
      "loss": 0.0122,
      "step": 2945280
    },
    {
      "epoch": 4.820048048283943,
      "grad_norm": 0.2786213457584381,
      "learning_rate": 3.622753899171735e-07,
      "loss": 0.0143,
      "step": 2945300
    },
    {
      "epoch": 4.8200807787225965,
      "grad_norm": 0.1912812441587448,
      "learning_rate": 3.6220949770365636e-07,
      "loss": 0.007,
      "step": 2945320
    },
    {
      "epoch": 4.82011350916125,
      "grad_norm": 0.24247364699840546,
      "learning_rate": 3.621436054901393e-07,
      "loss": 0.0113,
      "step": 2945340
    },
    {
      "epoch": 4.820146239599903,
      "grad_norm": 0.13507135212421417,
      "learning_rate": 3.620777132766221e-07,
      "loss": 0.009,
      "step": 2945360
    },
    {
      "epoch": 4.820178970038556,
      "grad_norm": 0.1962457150220871,
      "learning_rate": 3.6201182106310505e-07,
      "loss": 0.012,
      "step": 2945380
    },
    {
      "epoch": 4.82021170047721,
      "grad_norm": 0.2295706570148468,
      "learning_rate": 3.619459288495879e-07,
      "loss": 0.0081,
      "step": 2945400
    },
    {
      "epoch": 4.820244430915864,
      "grad_norm": 0.276044636964798,
      "learning_rate": 3.618800366360707e-07,
      "loss": 0.0148,
      "step": 2945420
    },
    {
      "epoch": 4.820277161354516,
      "grad_norm": 0.7203112244606018,
      "learning_rate": 3.6181414442255364e-07,
      "loss": 0.0105,
      "step": 2945440
    },
    {
      "epoch": 4.82030989179317,
      "grad_norm": 0.11904989928007126,
      "learning_rate": 3.617482522090365e-07,
      "loss": 0.0114,
      "step": 2945460
    },
    {
      "epoch": 4.8203426222318235,
      "grad_norm": 0.3761960566043854,
      "learning_rate": 3.616823599955194e-07,
      "loss": 0.0091,
      "step": 2945480
    },
    {
      "epoch": 4.820375352670476,
      "grad_norm": 0.15282386541366577,
      "learning_rate": 3.6161646778200223e-07,
      "loss": 0.0086,
      "step": 2945500
    },
    {
      "epoch": 4.82040808310913,
      "grad_norm": 0.23388119041919708,
      "learning_rate": 3.6155057556848513e-07,
      "loss": 0.0107,
      "step": 2945520
    },
    {
      "epoch": 4.820440813547783,
      "grad_norm": 0.3400766849517822,
      "learning_rate": 3.61484683354968e-07,
      "loss": 0.0092,
      "step": 2945540
    },
    {
      "epoch": 4.820473543986436,
      "grad_norm": 0.2518720030784607,
      "learning_rate": 3.614187911414509e-07,
      "loss": 0.0079,
      "step": 2945560
    },
    {
      "epoch": 4.82050627442509,
      "grad_norm": 0.1410277634859085,
      "learning_rate": 3.613528989279337e-07,
      "loss": 0.0076,
      "step": 2945580
    },
    {
      "epoch": 4.820539004863743,
      "grad_norm": 0.08820336312055588,
      "learning_rate": 3.612870067144166e-07,
      "loss": 0.0083,
      "step": 2945600
    },
    {
      "epoch": 4.820571735302397,
      "grad_norm": 0.21173082292079926,
      "learning_rate": 3.6122111450089946e-07,
      "loss": 0.0092,
      "step": 2945620
    },
    {
      "epoch": 4.82060446574105,
      "grad_norm": 0.2528550922870636,
      "learning_rate": 3.611552222873823e-07,
      "loss": 0.0071,
      "step": 2945640
    },
    {
      "epoch": 4.820637196179703,
      "grad_norm": 0.14556333422660828,
      "learning_rate": 3.610893300738652e-07,
      "loss": 0.0075,
      "step": 2945660
    },
    {
      "epoch": 4.820669926618357,
      "grad_norm": 0.14917218685150146,
      "learning_rate": 3.6102343786034805e-07,
      "loss": 0.0076,
      "step": 2945680
    },
    {
      "epoch": 4.8207026570570095,
      "grad_norm": 0.08833616226911545,
      "learning_rate": 3.6095754564683095e-07,
      "loss": 0.0071,
      "step": 2945700
    },
    {
      "epoch": 4.820735387495663,
      "grad_norm": 0.19779863953590393,
      "learning_rate": 3.608916534333138e-07,
      "loss": 0.0053,
      "step": 2945720
    },
    {
      "epoch": 4.820768117934317,
      "grad_norm": 0.2582097351551056,
      "learning_rate": 3.608257612197967e-07,
      "loss": 0.0102,
      "step": 2945740
    },
    {
      "epoch": 4.82080084837297,
      "grad_norm": 0.24222062528133392,
      "learning_rate": 3.6075986900627954e-07,
      "loss": 0.0086,
      "step": 2945760
    },
    {
      "epoch": 4.820833578811623,
      "grad_norm": 0.49635136127471924,
      "learning_rate": 3.6069397679276244e-07,
      "loss": 0.009,
      "step": 2945780
    },
    {
      "epoch": 4.820866309250277,
      "grad_norm": 0.17599353194236755,
      "learning_rate": 3.606280845792453e-07,
      "loss": 0.0107,
      "step": 2945800
    },
    {
      "epoch": 4.82089903968893,
      "grad_norm": 0.41668328642845154,
      "learning_rate": 3.605621923657282e-07,
      "loss": 0.014,
      "step": 2945820
    },
    {
      "epoch": 4.820931770127583,
      "grad_norm": 0.13292910158634186,
      "learning_rate": 3.6049630015221103e-07,
      "loss": 0.0053,
      "step": 2945840
    },
    {
      "epoch": 4.8209645005662365,
      "grad_norm": 0.30605873465538025,
      "learning_rate": 3.604304079386939e-07,
      "loss": 0.0074,
      "step": 2945860
    },
    {
      "epoch": 4.82099723100489,
      "grad_norm": 0.30947092175483704,
      "learning_rate": 3.603645157251768e-07,
      "loss": 0.0116,
      "step": 2945880
    },
    {
      "epoch": 4.821029961443543,
      "grad_norm": 0.2634613811969757,
      "learning_rate": 3.602986235116596e-07,
      "loss": 0.0057,
      "step": 2945900
    },
    {
      "epoch": 4.821062691882196,
      "grad_norm": 0.09203311055898666,
      "learning_rate": 3.602327312981425e-07,
      "loss": 0.0103,
      "step": 2945920
    },
    {
      "epoch": 4.82109542232085,
      "grad_norm": 0.44312477111816406,
      "learning_rate": 3.6016683908462536e-07,
      "loss": 0.0075,
      "step": 2945940
    },
    {
      "epoch": 4.821128152759504,
      "grad_norm": 0.7577270865440369,
      "learning_rate": 3.601009468711083e-07,
      "loss": 0.009,
      "step": 2945960
    },
    {
      "epoch": 4.821160883198156,
      "grad_norm": 0.12335231900215149,
      "learning_rate": 3.600350546575911e-07,
      "loss": 0.0101,
      "step": 2945980
    },
    {
      "epoch": 4.82119361363681,
      "grad_norm": 0.8441154956817627,
      "learning_rate": 3.5996916244407406e-07,
      "loss": 0.016,
      "step": 2946000
    },
    {
      "epoch": 4.8212263440754635,
      "grad_norm": 0.5493485331535339,
      "learning_rate": 3.5990327023055685e-07,
      "loss": 0.0109,
      "step": 2946020
    },
    {
      "epoch": 4.821259074514117,
      "grad_norm": 0.2785769999027252,
      "learning_rate": 3.598373780170398e-07,
      "loss": 0.0103,
      "step": 2946040
    },
    {
      "epoch": 4.82129180495277,
      "grad_norm": 0.4898773431777954,
      "learning_rate": 3.5977148580352265e-07,
      "loss": 0.007,
      "step": 2946060
    },
    {
      "epoch": 4.821324535391423,
      "grad_norm": 0.3096892535686493,
      "learning_rate": 3.5970559359000544e-07,
      "loss": 0.0051,
      "step": 2946080
    },
    {
      "epoch": 4.821357265830077,
      "grad_norm": 0.1283935159444809,
      "learning_rate": 3.596397013764884e-07,
      "loss": 0.009,
      "step": 2946100
    },
    {
      "epoch": 4.82138999626873,
      "grad_norm": 0.09227221459150314,
      "learning_rate": 3.5957380916297124e-07,
      "loss": 0.0115,
      "step": 2946120
    },
    {
      "epoch": 4.821422726707383,
      "grad_norm": 0.39137592911720276,
      "learning_rate": 3.5950791694945414e-07,
      "loss": 0.0095,
      "step": 2946140
    },
    {
      "epoch": 4.821455457146037,
      "grad_norm": 0.34767115116119385,
      "learning_rate": 3.59442024735937e-07,
      "loss": 0.0094,
      "step": 2946160
    },
    {
      "epoch": 4.8214881875846896,
      "grad_norm": 0.3582099378108978,
      "learning_rate": 3.593761325224199e-07,
      "loss": 0.0074,
      "step": 2946180
    },
    {
      "epoch": 4.821520918023343,
      "grad_norm": 0.4175015091896057,
      "learning_rate": 3.5931024030890273e-07,
      "loss": 0.0076,
      "step": 2946200
    },
    {
      "epoch": 4.821553648461997,
      "grad_norm": 0.39233672618865967,
      "learning_rate": 3.5924434809538563e-07,
      "loss": 0.0158,
      "step": 2946220
    },
    {
      "epoch": 4.82158637890065,
      "grad_norm": 0.05813451111316681,
      "learning_rate": 3.5917845588186847e-07,
      "loss": 0.0094,
      "step": 2946240
    },
    {
      "epoch": 4.821619109339303,
      "grad_norm": 0.6538957357406616,
      "learning_rate": 3.5911256366835137e-07,
      "loss": 0.0104,
      "step": 2946260
    },
    {
      "epoch": 4.821651839777957,
      "grad_norm": 0.19083522260189056,
      "learning_rate": 3.590466714548342e-07,
      "loss": 0.0057,
      "step": 2946280
    },
    {
      "epoch": 4.82168457021661,
      "grad_norm": 0.44717007875442505,
      "learning_rate": 3.5898077924131706e-07,
      "loss": 0.0059,
      "step": 2946300
    },
    {
      "epoch": 4.821717300655264,
      "grad_norm": 0.2730582058429718,
      "learning_rate": 3.5891488702779996e-07,
      "loss": 0.0149,
      "step": 2946320
    },
    {
      "epoch": 4.8217500310939165,
      "grad_norm": 0.2801489233970642,
      "learning_rate": 3.588489948142828e-07,
      "loss": 0.0107,
      "step": 2946340
    },
    {
      "epoch": 4.82178276153257,
      "grad_norm": 0.11149362474679947,
      "learning_rate": 3.587831026007657e-07,
      "loss": 0.0137,
      "step": 2946360
    },
    {
      "epoch": 4.821815491971224,
      "grad_norm": 0.25206688046455383,
      "learning_rate": 3.5871721038724855e-07,
      "loss": 0.0085,
      "step": 2946380
    },
    {
      "epoch": 4.821848222409876,
      "grad_norm": 0.3974838852882385,
      "learning_rate": 3.5865131817373145e-07,
      "loss": 0.0078,
      "step": 2946400
    },
    {
      "epoch": 4.82188095284853,
      "grad_norm": 0.354245662689209,
      "learning_rate": 3.585854259602143e-07,
      "loss": 0.0092,
      "step": 2946420
    },
    {
      "epoch": 4.821913683287184,
      "grad_norm": 0.36406180262565613,
      "learning_rate": 3.585195337466972e-07,
      "loss": 0.0073,
      "step": 2946440
    },
    {
      "epoch": 4.821946413725836,
      "grad_norm": 0.21500253677368164,
      "learning_rate": 3.5845364153318004e-07,
      "loss": 0.0081,
      "step": 2946460
    },
    {
      "epoch": 4.82197914416449,
      "grad_norm": 0.6424959897994995,
      "learning_rate": 3.5838774931966294e-07,
      "loss": 0.0075,
      "step": 2946480
    },
    {
      "epoch": 4.8220118746031435,
      "grad_norm": 0.2260180413722992,
      "learning_rate": 3.583218571061458e-07,
      "loss": 0.0079,
      "step": 2946500
    },
    {
      "epoch": 4.822044605041797,
      "grad_norm": 0.39425453543663025,
      "learning_rate": 3.5825596489262863e-07,
      "loss": 0.0094,
      "step": 2946520
    },
    {
      "epoch": 4.82207733548045,
      "grad_norm": 0.2890354096889496,
      "learning_rate": 3.5819007267911153e-07,
      "loss": 0.0098,
      "step": 2946540
    },
    {
      "epoch": 4.822110065919103,
      "grad_norm": 0.3916480243206024,
      "learning_rate": 3.5812418046559437e-07,
      "loss": 0.0125,
      "step": 2946560
    },
    {
      "epoch": 4.822142796357757,
      "grad_norm": 0.42247825860977173,
      "learning_rate": 3.5805828825207727e-07,
      "loss": 0.0097,
      "step": 2946580
    },
    {
      "epoch": 4.822175526796411,
      "grad_norm": 0.20374272763729095,
      "learning_rate": 3.579923960385601e-07,
      "loss": 0.0111,
      "step": 2946600
    },
    {
      "epoch": 4.822208257235063,
      "grad_norm": 0.7844800353050232,
      "learning_rate": 3.5792650382504307e-07,
      "loss": 0.0078,
      "step": 2946620
    },
    {
      "epoch": 4.822240987673717,
      "grad_norm": 0.38967689871788025,
      "learning_rate": 3.5786061161152586e-07,
      "loss": 0.0117,
      "step": 2946640
    },
    {
      "epoch": 4.8222737181123705,
      "grad_norm": 0.25965169072151184,
      "learning_rate": 3.577947193980088e-07,
      "loss": 0.0101,
      "step": 2946660
    },
    {
      "epoch": 4.822306448551023,
      "grad_norm": 0.16605804860591888,
      "learning_rate": 3.577288271844916e-07,
      "loss": 0.0071,
      "step": 2946680
    },
    {
      "epoch": 4.822339178989677,
      "grad_norm": 0.3604851961135864,
      "learning_rate": 3.5766293497097456e-07,
      "loss": 0.009,
      "step": 2946700
    },
    {
      "epoch": 4.82237190942833,
      "grad_norm": 0.14569984376430511,
      "learning_rate": 3.575970427574574e-07,
      "loss": 0.0059,
      "step": 2946720
    },
    {
      "epoch": 4.822404639866983,
      "grad_norm": 0.8777541518211365,
      "learning_rate": 3.575311505439402e-07,
      "loss": 0.01,
      "step": 2946740
    },
    {
      "epoch": 4.822437370305637,
      "grad_norm": 0.43959078192710876,
      "learning_rate": 3.5746525833042315e-07,
      "loss": 0.0102,
      "step": 2946760
    },
    {
      "epoch": 4.82247010074429,
      "grad_norm": 0.11736493557691574,
      "learning_rate": 3.57399366116906e-07,
      "loss": 0.0045,
      "step": 2946780
    },
    {
      "epoch": 4.822502831182944,
      "grad_norm": 0.08049336820840836,
      "learning_rate": 3.573334739033889e-07,
      "loss": 0.0074,
      "step": 2946800
    },
    {
      "epoch": 4.822535561621597,
      "grad_norm": 0.07345517724752426,
      "learning_rate": 3.5726758168987174e-07,
      "loss": 0.0104,
      "step": 2946820
    },
    {
      "epoch": 4.82256829206025,
      "grad_norm": 0.5550037622451782,
      "learning_rate": 3.5720168947635464e-07,
      "loss": 0.0096,
      "step": 2946840
    },
    {
      "epoch": 4.822601022498904,
      "grad_norm": 0.1089261993765831,
      "learning_rate": 3.571357972628375e-07,
      "loss": 0.0082,
      "step": 2946860
    },
    {
      "epoch": 4.822633752937557,
      "grad_norm": 0.17549102008342743,
      "learning_rate": 3.570699050493204e-07,
      "loss": 0.009,
      "step": 2946880
    },
    {
      "epoch": 4.82266648337621,
      "grad_norm": 0.22772449254989624,
      "learning_rate": 3.570040128358032e-07,
      "loss": 0.0068,
      "step": 2946900
    },
    {
      "epoch": 4.822699213814864,
      "grad_norm": 0.11541421711444855,
      "learning_rate": 3.569381206222861e-07,
      "loss": 0.0078,
      "step": 2946920
    },
    {
      "epoch": 4.822731944253517,
      "grad_norm": 0.1092866063117981,
      "learning_rate": 3.5687222840876897e-07,
      "loss": 0.0085,
      "step": 2946940
    },
    {
      "epoch": 4.82276467469217,
      "grad_norm": 0.40052980184555054,
      "learning_rate": 3.568063361952518e-07,
      "loss": 0.0092,
      "step": 2946960
    },
    {
      "epoch": 4.8227974051308236,
      "grad_norm": 0.16121962666511536,
      "learning_rate": 3.567404439817347e-07,
      "loss": 0.0079,
      "step": 2946980
    },
    {
      "epoch": 4.822830135569477,
      "grad_norm": 0.17913001775741577,
      "learning_rate": 3.5667455176821756e-07,
      "loss": 0.0072,
      "step": 2947000
    },
    {
      "epoch": 4.82286286600813,
      "grad_norm": 0.39227181673049927,
      "learning_rate": 3.5660865955470046e-07,
      "loss": 0.0098,
      "step": 2947020
    },
    {
      "epoch": 4.8228955964467835,
      "grad_norm": 0.6670281887054443,
      "learning_rate": 3.565427673411833e-07,
      "loss": 0.011,
      "step": 2947040
    },
    {
      "epoch": 4.822928326885437,
      "grad_norm": 0.276359498500824,
      "learning_rate": 3.564768751276662e-07,
      "loss": 0.0074,
      "step": 2947060
    },
    {
      "epoch": 4.822961057324091,
      "grad_norm": 0.13603909313678741,
      "learning_rate": 3.5641098291414905e-07,
      "loss": 0.0069,
      "step": 2947080
    },
    {
      "epoch": 4.822993787762743,
      "grad_norm": 0.03151952847838402,
      "learning_rate": 3.5634509070063195e-07,
      "loss": 0.0102,
      "step": 2947100
    },
    {
      "epoch": 4.823026518201397,
      "grad_norm": 0.44262146949768066,
      "learning_rate": 3.562791984871148e-07,
      "loss": 0.0071,
      "step": 2947120
    },
    {
      "epoch": 4.8230592486400505,
      "grad_norm": 0.1315731704235077,
      "learning_rate": 3.562133062735977e-07,
      "loss": 0.0079,
      "step": 2947140
    },
    {
      "epoch": 4.823091979078703,
      "grad_norm": 0.29972895979881287,
      "learning_rate": 3.5614741406008054e-07,
      "loss": 0.0089,
      "step": 2947160
    },
    {
      "epoch": 4.823124709517357,
      "grad_norm": 0.15007196366786957,
      "learning_rate": 3.560815218465634e-07,
      "loss": 0.0101,
      "step": 2947180
    },
    {
      "epoch": 4.82315743995601,
      "grad_norm": 0.29500022530555725,
      "learning_rate": 3.560156296330463e-07,
      "loss": 0.008,
      "step": 2947200
    },
    {
      "epoch": 4.823190170394664,
      "grad_norm": 0.19510741531848907,
      "learning_rate": 3.559497374195291e-07,
      "loss": 0.0079,
      "step": 2947220
    },
    {
      "epoch": 4.823222900833317,
      "grad_norm": 0.11677072197198868,
      "learning_rate": 3.55883845206012e-07,
      "loss": 0.0079,
      "step": 2947240
    },
    {
      "epoch": 4.82325563127197,
      "grad_norm": 0.25999462604522705,
      "learning_rate": 3.5581795299249487e-07,
      "loss": 0.0116,
      "step": 2947260
    },
    {
      "epoch": 4.823288361710624,
      "grad_norm": 0.3572327196598053,
      "learning_rate": 3.557520607789778e-07,
      "loss": 0.0073,
      "step": 2947280
    },
    {
      "epoch": 4.823321092149277,
      "grad_norm": 0.18101626634597778,
      "learning_rate": 3.556861685654606e-07,
      "loss": 0.0089,
      "step": 2947300
    },
    {
      "epoch": 4.82335382258793,
      "grad_norm": 0.16777348518371582,
      "learning_rate": 3.5562027635194357e-07,
      "loss": 0.0077,
      "step": 2947320
    },
    {
      "epoch": 4.823386553026584,
      "grad_norm": 0.4059177339076996,
      "learning_rate": 3.5555438413842636e-07,
      "loss": 0.0061,
      "step": 2947340
    },
    {
      "epoch": 4.8234192834652365,
      "grad_norm": 0.13109581172466278,
      "learning_rate": 3.554884919249093e-07,
      "loss": 0.0066,
      "step": 2947360
    },
    {
      "epoch": 4.82345201390389,
      "grad_norm": 0.10479173064231873,
      "learning_rate": 3.5542259971139216e-07,
      "loss": 0.0089,
      "step": 2947380
    },
    {
      "epoch": 4.823484744342544,
      "grad_norm": 0.3113134503364563,
      "learning_rate": 3.5535670749787495e-07,
      "loss": 0.01,
      "step": 2947400
    },
    {
      "epoch": 4.823517474781197,
      "grad_norm": 0.0706457644701004,
      "learning_rate": 3.552908152843579e-07,
      "loss": 0.0139,
      "step": 2947420
    },
    {
      "epoch": 4.82355020521985,
      "grad_norm": 0.0797475278377533,
      "learning_rate": 3.5522492307084075e-07,
      "loss": 0.009,
      "step": 2947440
    },
    {
      "epoch": 4.823582935658504,
      "grad_norm": 0.24294881522655487,
      "learning_rate": 3.5515903085732364e-07,
      "loss": 0.0105,
      "step": 2947460
    },
    {
      "epoch": 4.823615666097157,
      "grad_norm": 0.3734537959098816,
      "learning_rate": 3.550931386438065e-07,
      "loss": 0.0083,
      "step": 2947480
    },
    {
      "epoch": 4.823648396535811,
      "grad_norm": 0.30491477251052856,
      "learning_rate": 3.550272464302894e-07,
      "loss": 0.0145,
      "step": 2947500
    },
    {
      "epoch": 4.8236811269744635,
      "grad_norm": 0.056949421763420105,
      "learning_rate": 3.5496135421677223e-07,
      "loss": 0.0077,
      "step": 2947520
    },
    {
      "epoch": 4.823713857413117,
      "grad_norm": 0.5629301071166992,
      "learning_rate": 3.5489546200325513e-07,
      "loss": 0.0189,
      "step": 2947540
    },
    {
      "epoch": 4.823746587851771,
      "grad_norm": 0.3525124192237854,
      "learning_rate": 3.54829569789738e-07,
      "loss": 0.0114,
      "step": 2947560
    },
    {
      "epoch": 4.823779318290423,
      "grad_norm": 0.06920620054006577,
      "learning_rate": 3.547636775762209e-07,
      "loss": 0.0113,
      "step": 2947580
    },
    {
      "epoch": 4.823812048729077,
      "grad_norm": 0.4096451699733734,
      "learning_rate": 3.546977853627037e-07,
      "loss": 0.0083,
      "step": 2947600
    },
    {
      "epoch": 4.823844779167731,
      "grad_norm": 0.13499101996421814,
      "learning_rate": 3.5463189314918657e-07,
      "loss": 0.008,
      "step": 2947620
    },
    {
      "epoch": 4.823877509606383,
      "grad_norm": 0.642406702041626,
      "learning_rate": 3.5456600093566947e-07,
      "loss": 0.0079,
      "step": 2947640
    },
    {
      "epoch": 4.823910240045037,
      "grad_norm": 0.49474450945854187,
      "learning_rate": 3.545001087221523e-07,
      "loss": 0.0078,
      "step": 2947660
    },
    {
      "epoch": 4.8239429704836905,
      "grad_norm": 0.09839484095573425,
      "learning_rate": 3.544342165086352e-07,
      "loss": 0.0065,
      "step": 2947680
    },
    {
      "epoch": 4.823975700922344,
      "grad_norm": 0.1888267993927002,
      "learning_rate": 3.5436832429511806e-07,
      "loss": 0.0066,
      "step": 2947700
    },
    {
      "epoch": 4.824008431360997,
      "grad_norm": 0.03359558805823326,
      "learning_rate": 3.5430243208160095e-07,
      "loss": 0.0059,
      "step": 2947720
    },
    {
      "epoch": 4.82404116179965,
      "grad_norm": 0.27813833951950073,
      "learning_rate": 3.542365398680838e-07,
      "loss": 0.0047,
      "step": 2947740
    },
    {
      "epoch": 4.824073892238304,
      "grad_norm": 0.277588427066803,
      "learning_rate": 3.541706476545667e-07,
      "loss": 0.0082,
      "step": 2947760
    },
    {
      "epoch": 4.824106622676958,
      "grad_norm": 0.062378522008657455,
      "learning_rate": 3.5410475544104954e-07,
      "loss": 0.01,
      "step": 2947780
    },
    {
      "epoch": 4.82413935311561,
      "grad_norm": 0.1254274547100067,
      "learning_rate": 3.5403886322753244e-07,
      "loss": 0.0086,
      "step": 2947800
    },
    {
      "epoch": 4.824172083554264,
      "grad_norm": 0.22476069629192352,
      "learning_rate": 3.539729710140153e-07,
      "loss": 0.0126,
      "step": 2947820
    },
    {
      "epoch": 4.8242048139929175,
      "grad_norm": 0.11047230660915375,
      "learning_rate": 3.5390707880049813e-07,
      "loss": 0.0071,
      "step": 2947840
    },
    {
      "epoch": 4.82423754443157,
      "grad_norm": 0.2829236388206482,
      "learning_rate": 3.5384118658698103e-07,
      "loss": 0.0081,
      "step": 2947860
    },
    {
      "epoch": 4.824270274870224,
      "grad_norm": 0.0569646880030632,
      "learning_rate": 3.537752943734639e-07,
      "loss": 0.0096,
      "step": 2947880
    },
    {
      "epoch": 4.824303005308877,
      "grad_norm": 0.4688439965248108,
      "learning_rate": 3.537094021599468e-07,
      "loss": 0.0082,
      "step": 2947900
    },
    {
      "epoch": 4.82433573574753,
      "grad_norm": 0.29641416668891907,
      "learning_rate": 3.536435099464296e-07,
      "loss": 0.0082,
      "step": 2947920
    },
    {
      "epoch": 4.824368466186184,
      "grad_norm": 0.11156447231769562,
      "learning_rate": 3.535776177329126e-07,
      "loss": 0.0093,
      "step": 2947940
    },
    {
      "epoch": 4.824401196624837,
      "grad_norm": 0.18926171958446503,
      "learning_rate": 3.5351172551939537e-07,
      "loss": 0.0119,
      "step": 2947960
    },
    {
      "epoch": 4.824433927063491,
      "grad_norm": 0.19886337220668793,
      "learning_rate": 3.534458333058783e-07,
      "loss": 0.0097,
      "step": 2947980
    },
    {
      "epoch": 4.8244666575021435,
      "grad_norm": 0.3881106972694397,
      "learning_rate": 3.533799410923611e-07,
      "loss": 0.0099,
      "step": 2948000
    },
    {
      "epoch": 4.824499387940797,
      "grad_norm": 0.15432694554328918,
      "learning_rate": 3.5331404887884406e-07,
      "loss": 0.0109,
      "step": 2948020
    },
    {
      "epoch": 4.824532118379451,
      "grad_norm": 0.13637880980968475,
      "learning_rate": 3.532481566653269e-07,
      "loss": 0.0092,
      "step": 2948040
    },
    {
      "epoch": 4.824564848818104,
      "grad_norm": 0.16547228395938873,
      "learning_rate": 3.531822644518097e-07,
      "loss": 0.007,
      "step": 2948060
    },
    {
      "epoch": 4.824597579256757,
      "grad_norm": 0.16816411912441254,
      "learning_rate": 3.5311637223829265e-07,
      "loss": 0.0094,
      "step": 2948080
    },
    {
      "epoch": 4.824630309695411,
      "grad_norm": 0.22073501348495483,
      "learning_rate": 3.530504800247755e-07,
      "loss": 0.0082,
      "step": 2948100
    },
    {
      "epoch": 4.824663040134064,
      "grad_norm": 0.05204782262444496,
      "learning_rate": 3.529845878112584e-07,
      "loss": 0.0086,
      "step": 2948120
    },
    {
      "epoch": 4.824695770572717,
      "grad_norm": 0.1680080145597458,
      "learning_rate": 3.5291869559774124e-07,
      "loss": 0.0115,
      "step": 2948140
    },
    {
      "epoch": 4.8247285010113705,
      "grad_norm": 0.2764942944049835,
      "learning_rate": 3.5285280338422414e-07,
      "loss": 0.0061,
      "step": 2948160
    },
    {
      "epoch": 4.824761231450024,
      "grad_norm": 0.2725341320037842,
      "learning_rate": 3.52786911170707e-07,
      "loss": 0.0057,
      "step": 2948180
    },
    {
      "epoch": 4.824793961888677,
      "grad_norm": 0.4848732650279999,
      "learning_rate": 3.527210189571899e-07,
      "loss": 0.0118,
      "step": 2948200
    },
    {
      "epoch": 4.82482669232733,
      "grad_norm": 0.2964155077934265,
      "learning_rate": 3.5265512674367273e-07,
      "loss": 0.0066,
      "step": 2948220
    },
    {
      "epoch": 4.824859422765984,
      "grad_norm": 0.3031408488750458,
      "learning_rate": 3.5258923453015563e-07,
      "loss": 0.0108,
      "step": 2948240
    },
    {
      "epoch": 4.824892153204638,
      "grad_norm": 0.1315399706363678,
      "learning_rate": 3.525233423166385e-07,
      "loss": 0.0061,
      "step": 2948260
    },
    {
      "epoch": 4.82492488364329,
      "grad_norm": 0.20103871822357178,
      "learning_rate": 3.524574501031213e-07,
      "loss": 0.0083,
      "step": 2948280
    },
    {
      "epoch": 4.824957614081944,
      "grad_norm": 0.10970759391784668,
      "learning_rate": 3.523915578896042e-07,
      "loss": 0.008,
      "step": 2948300
    },
    {
      "epoch": 4.8249903445205975,
      "grad_norm": 0.206806480884552,
      "learning_rate": 3.5232566567608706e-07,
      "loss": 0.0082,
      "step": 2948320
    },
    {
      "epoch": 4.825023074959251,
      "grad_norm": 0.11174990236759186,
      "learning_rate": 3.5225977346256996e-07,
      "loss": 0.0089,
      "step": 2948340
    },
    {
      "epoch": 4.825055805397904,
      "grad_norm": 0.21613584458827972,
      "learning_rate": 3.521938812490528e-07,
      "loss": 0.012,
      "step": 2948360
    },
    {
      "epoch": 4.825088535836557,
      "grad_norm": 0.26883623003959656,
      "learning_rate": 3.521279890355357e-07,
      "loss": 0.011,
      "step": 2948380
    },
    {
      "epoch": 4.825121266275211,
      "grad_norm": 0.20095063745975494,
      "learning_rate": 3.5206209682201855e-07,
      "loss": 0.0059,
      "step": 2948400
    },
    {
      "epoch": 4.825153996713864,
      "grad_norm": 0.15133489668369293,
      "learning_rate": 3.5199620460850145e-07,
      "loss": 0.0117,
      "step": 2948420
    },
    {
      "epoch": 4.825186727152517,
      "grad_norm": 0.3119630217552185,
      "learning_rate": 3.519303123949843e-07,
      "loss": 0.0042,
      "step": 2948440
    },
    {
      "epoch": 4.825219457591171,
      "grad_norm": 0.1989409476518631,
      "learning_rate": 3.518644201814672e-07,
      "loss": 0.0082,
      "step": 2948460
    },
    {
      "epoch": 4.825252188029824,
      "grad_norm": 0.3884802758693695,
      "learning_rate": 3.5179852796795004e-07,
      "loss": 0.0106,
      "step": 2948480
    },
    {
      "epoch": 4.825284918468477,
      "grad_norm": 0.2333248406648636,
      "learning_rate": 3.517326357544329e-07,
      "loss": 0.0104,
      "step": 2948500
    },
    {
      "epoch": 4.825317648907131,
      "grad_norm": 0.12745513021945953,
      "learning_rate": 3.516667435409158e-07,
      "loss": 0.0082,
      "step": 2948520
    },
    {
      "epoch": 4.825350379345784,
      "grad_norm": 0.06972113996744156,
      "learning_rate": 3.5160085132739863e-07,
      "loss": 0.0103,
      "step": 2948540
    },
    {
      "epoch": 4.825383109784437,
      "grad_norm": 0.3894155025482178,
      "learning_rate": 3.5153495911388153e-07,
      "loss": 0.0057,
      "step": 2948560
    },
    {
      "epoch": 4.825415840223091,
      "grad_norm": 0.1682005524635315,
      "learning_rate": 3.514690669003644e-07,
      "loss": 0.0075,
      "step": 2948580
    },
    {
      "epoch": 4.825448570661744,
      "grad_norm": 0.17227981984615326,
      "learning_rate": 3.5140317468684733e-07,
      "loss": 0.0064,
      "step": 2948600
    },
    {
      "epoch": 4.825481301100397,
      "grad_norm": 0.10027208924293518,
      "learning_rate": 3.513372824733301e-07,
      "loss": 0.0116,
      "step": 2948620
    },
    {
      "epoch": 4.825514031539051,
      "grad_norm": 0.20043529570102692,
      "learning_rate": 3.5127139025981307e-07,
      "loss": 0.0072,
      "step": 2948640
    },
    {
      "epoch": 4.825546761977704,
      "grad_norm": 0.23075735569000244,
      "learning_rate": 3.5120549804629586e-07,
      "loss": 0.0102,
      "step": 2948660
    },
    {
      "epoch": 4.825579492416358,
      "grad_norm": 0.39850157499313354,
      "learning_rate": 3.511396058327788e-07,
      "loss": 0.011,
      "step": 2948680
    },
    {
      "epoch": 4.8256122228550105,
      "grad_norm": 0.13236890733242035,
      "learning_rate": 3.5107371361926166e-07,
      "loss": 0.0076,
      "step": 2948700
    },
    {
      "epoch": 4.825644953293664,
      "grad_norm": 0.2151828110218048,
      "learning_rate": 3.5100782140574445e-07,
      "loss": 0.0079,
      "step": 2948720
    },
    {
      "epoch": 4.825677683732318,
      "grad_norm": 0.12380610406398773,
      "learning_rate": 3.509419291922274e-07,
      "loss": 0.0101,
      "step": 2948740
    },
    {
      "epoch": 4.82571041417097,
      "grad_norm": 0.05505774915218353,
      "learning_rate": 3.5087603697871025e-07,
      "loss": 0.0078,
      "step": 2948760
    },
    {
      "epoch": 4.825743144609624,
      "grad_norm": 1.055846095085144,
      "learning_rate": 3.5081014476519315e-07,
      "loss": 0.0086,
      "step": 2948780
    },
    {
      "epoch": 4.8257758750482775,
      "grad_norm": 0.13263961672782898,
      "learning_rate": 3.50744252551676e-07,
      "loss": 0.0053,
      "step": 2948800
    },
    {
      "epoch": 4.82580860548693,
      "grad_norm": 0.18207402527332306,
      "learning_rate": 3.506783603381589e-07,
      "loss": 0.0056,
      "step": 2948820
    },
    {
      "epoch": 4.825841335925584,
      "grad_norm": 0.22356809675693512,
      "learning_rate": 3.5061246812464174e-07,
      "loss": 0.0096,
      "step": 2948840
    },
    {
      "epoch": 4.825874066364237,
      "grad_norm": 0.2376183718442917,
      "learning_rate": 3.5054657591112464e-07,
      "loss": 0.0142,
      "step": 2948860
    },
    {
      "epoch": 4.825906796802891,
      "grad_norm": 0.4117146134376526,
      "learning_rate": 3.504806836976075e-07,
      "loss": 0.0133,
      "step": 2948880
    },
    {
      "epoch": 4.825939527241544,
      "grad_norm": 0.1318877786397934,
      "learning_rate": 3.504147914840904e-07,
      "loss": 0.0054,
      "step": 2948900
    },
    {
      "epoch": 4.825972257680197,
      "grad_norm": 0.17954759299755096,
      "learning_rate": 3.5034889927057323e-07,
      "loss": 0.0065,
      "step": 2948920
    },
    {
      "epoch": 4.826004988118851,
      "grad_norm": 0.2812696695327759,
      "learning_rate": 3.5028300705705607e-07,
      "loss": 0.0094,
      "step": 2948940
    },
    {
      "epoch": 4.8260377185575045,
      "grad_norm": 0.07240735739469528,
      "learning_rate": 3.5021711484353897e-07,
      "loss": 0.0087,
      "step": 2948960
    },
    {
      "epoch": 4.826070448996157,
      "grad_norm": 0.4632457196712494,
      "learning_rate": 3.501512226300218e-07,
      "loss": 0.0083,
      "step": 2948980
    },
    {
      "epoch": 4.826103179434811,
      "grad_norm": 0.2123105823993683,
      "learning_rate": 3.500853304165047e-07,
      "loss": 0.0094,
      "step": 2949000
    },
    {
      "epoch": 4.826135909873464,
      "grad_norm": 0.23347876965999603,
      "learning_rate": 3.5001943820298756e-07,
      "loss": 0.0073,
      "step": 2949020
    },
    {
      "epoch": 4.826168640312117,
      "grad_norm": 0.73927241563797,
      "learning_rate": 3.4995354598947046e-07,
      "loss": 0.0112,
      "step": 2949040
    },
    {
      "epoch": 4.826201370750771,
      "grad_norm": 0.33494582772254944,
      "learning_rate": 3.498876537759533e-07,
      "loss": 0.0065,
      "step": 2949060
    },
    {
      "epoch": 4.826234101189424,
      "grad_norm": 0.6519561409950256,
      "learning_rate": 3.498217615624362e-07,
      "loss": 0.0147,
      "step": 2949080
    },
    {
      "epoch": 4.826266831628077,
      "grad_norm": 0.2426670342683792,
      "learning_rate": 3.4975586934891905e-07,
      "loss": 0.0097,
      "step": 2949100
    },
    {
      "epoch": 4.826299562066731,
      "grad_norm": 0.29019850492477417,
      "learning_rate": 3.4968997713540195e-07,
      "loss": 0.0108,
      "step": 2949120
    },
    {
      "epoch": 4.826332292505384,
      "grad_norm": 0.674347460269928,
      "learning_rate": 3.496240849218848e-07,
      "loss": 0.0142,
      "step": 2949140
    },
    {
      "epoch": 4.826365022944038,
      "grad_norm": 0.06466080993413925,
      "learning_rate": 3.4955819270836764e-07,
      "loss": 0.0091,
      "step": 2949160
    },
    {
      "epoch": 4.8263977533826905,
      "grad_norm": 0.16659972071647644,
      "learning_rate": 3.4949230049485054e-07,
      "loss": 0.0063,
      "step": 2949180
    },
    {
      "epoch": 4.826430483821344,
      "grad_norm": 0.18853852152824402,
      "learning_rate": 3.494264082813334e-07,
      "loss": 0.0103,
      "step": 2949200
    },
    {
      "epoch": 4.826463214259998,
      "grad_norm": 0.18552733957767487,
      "learning_rate": 3.493605160678163e-07,
      "loss": 0.0082,
      "step": 2949220
    },
    {
      "epoch": 4.826495944698651,
      "grad_norm": 0.12615816295146942,
      "learning_rate": 3.4929462385429913e-07,
      "loss": 0.0101,
      "step": 2949240
    },
    {
      "epoch": 4.826528675137304,
      "grad_norm": 0.44378671050071716,
      "learning_rate": 3.492287316407821e-07,
      "loss": 0.01,
      "step": 2949260
    },
    {
      "epoch": 4.826561405575958,
      "grad_norm": 0.4821806252002716,
      "learning_rate": 3.4916283942726487e-07,
      "loss": 0.0107,
      "step": 2949280
    },
    {
      "epoch": 4.826594136014611,
      "grad_norm": 0.06154334917664528,
      "learning_rate": 3.490969472137478e-07,
      "loss": 0.0062,
      "step": 2949300
    },
    {
      "epoch": 4.826626866453264,
      "grad_norm": 0.5230048894882202,
      "learning_rate": 3.490310550002306e-07,
      "loss": 0.0094,
      "step": 2949320
    },
    {
      "epoch": 4.8266595968919175,
      "grad_norm": 0.18147535622119904,
      "learning_rate": 3.4896516278671357e-07,
      "loss": 0.0079,
      "step": 2949340
    },
    {
      "epoch": 4.826692327330571,
      "grad_norm": 0.29554983973503113,
      "learning_rate": 3.488992705731964e-07,
      "loss": 0.0064,
      "step": 2949360
    },
    {
      "epoch": 4.826725057769224,
      "grad_norm": 0.38163062930107117,
      "learning_rate": 3.488333783596792e-07,
      "loss": 0.0118,
      "step": 2949380
    },
    {
      "epoch": 4.826757788207877,
      "grad_norm": 0.2522904872894287,
      "learning_rate": 3.4876748614616216e-07,
      "loss": 0.0084,
      "step": 2949400
    },
    {
      "epoch": 4.826790518646531,
      "grad_norm": 0.1178741529583931,
      "learning_rate": 3.48701593932645e-07,
      "loss": 0.007,
      "step": 2949420
    },
    {
      "epoch": 4.826823249085185,
      "grad_norm": 0.07287108898162842,
      "learning_rate": 3.486357017191279e-07,
      "loss": 0.0073,
      "step": 2949440
    },
    {
      "epoch": 4.826855979523837,
      "grad_norm": 0.16237382590770721,
      "learning_rate": 3.4856980950561075e-07,
      "loss": 0.0089,
      "step": 2949460
    },
    {
      "epoch": 4.826888709962491,
      "grad_norm": 0.28478947281837463,
      "learning_rate": 3.4850391729209365e-07,
      "loss": 0.0118,
      "step": 2949480
    },
    {
      "epoch": 4.8269214404011445,
      "grad_norm": 0.3628142178058624,
      "learning_rate": 3.484380250785765e-07,
      "loss": 0.0104,
      "step": 2949500
    },
    {
      "epoch": 4.826954170839798,
      "grad_norm": 0.7551847100257874,
      "learning_rate": 3.483721328650594e-07,
      "loss": 0.0097,
      "step": 2949520
    },
    {
      "epoch": 4.826986901278451,
      "grad_norm": 0.2968420386314392,
      "learning_rate": 3.4830624065154224e-07,
      "loss": 0.0107,
      "step": 2949540
    },
    {
      "epoch": 4.827019631717104,
      "grad_norm": 0.4511142075061798,
      "learning_rate": 3.4824034843802513e-07,
      "loss": 0.0078,
      "step": 2949560
    },
    {
      "epoch": 4.827052362155758,
      "grad_norm": 0.14342664182186127,
      "learning_rate": 3.48174456224508e-07,
      "loss": 0.0071,
      "step": 2949580
    },
    {
      "epoch": 4.827085092594411,
      "grad_norm": 0.2973208427429199,
      "learning_rate": 3.481085640109908e-07,
      "loss": 0.0071,
      "step": 2949600
    },
    {
      "epoch": 4.827117823033064,
      "grad_norm": 0.11245398968458176,
      "learning_rate": 3.480426717974737e-07,
      "loss": 0.0082,
      "step": 2949620
    },
    {
      "epoch": 4.827150553471718,
      "grad_norm": 0.5231921672821045,
      "learning_rate": 3.4797677958395657e-07,
      "loss": 0.012,
      "step": 2949640
    },
    {
      "epoch": 4.8271832839103705,
      "grad_norm": 0.41503408551216125,
      "learning_rate": 3.4791088737043947e-07,
      "loss": 0.015,
      "step": 2949660
    },
    {
      "epoch": 4.827216014349024,
      "grad_norm": 0.5726024508476257,
      "learning_rate": 3.478449951569223e-07,
      "loss": 0.0092,
      "step": 2949680
    },
    {
      "epoch": 4.827248744787678,
      "grad_norm": 0.20712143182754517,
      "learning_rate": 3.477791029434052e-07,
      "loss": 0.0109,
      "step": 2949700
    },
    {
      "epoch": 4.827281475226331,
      "grad_norm": 0.11167089641094208,
      "learning_rate": 3.4771321072988806e-07,
      "loss": 0.0065,
      "step": 2949720
    },
    {
      "epoch": 4.827314205664984,
      "grad_norm": 0.21780040860176086,
      "learning_rate": 3.4764731851637096e-07,
      "loss": 0.0095,
      "step": 2949740
    },
    {
      "epoch": 4.827346936103638,
      "grad_norm": 0.2934460937976837,
      "learning_rate": 3.475814263028538e-07,
      "loss": 0.0046,
      "step": 2949760
    },
    {
      "epoch": 4.827379666542291,
      "grad_norm": 0.08540353178977966,
      "learning_rate": 3.475155340893367e-07,
      "loss": 0.0077,
      "step": 2949780
    },
    {
      "epoch": 4.827412396980945,
      "grad_norm": 0.37701040506362915,
      "learning_rate": 3.4744964187581955e-07,
      "loss": 0.0087,
      "step": 2949800
    },
    {
      "epoch": 4.8274451274195975,
      "grad_norm": 0.16379672288894653,
      "learning_rate": 3.473837496623024e-07,
      "loss": 0.0076,
      "step": 2949820
    },
    {
      "epoch": 4.827477857858251,
      "grad_norm": 0.417027086019516,
      "learning_rate": 3.473178574487853e-07,
      "loss": 0.013,
      "step": 2949840
    },
    {
      "epoch": 4.827510588296905,
      "grad_norm": 0.24855004251003265,
      "learning_rate": 3.4725196523526814e-07,
      "loss": 0.0094,
      "step": 2949860
    },
    {
      "epoch": 4.827543318735557,
      "grad_norm": 0.3979376256465912,
      "learning_rate": 3.4718607302175103e-07,
      "loss": 0.0083,
      "step": 2949880
    },
    {
      "epoch": 4.827576049174211,
      "grad_norm": 0.8753040432929993,
      "learning_rate": 3.471201808082339e-07,
      "loss": 0.0128,
      "step": 2949900
    },
    {
      "epoch": 4.827608779612865,
      "grad_norm": 0.12948112189769745,
      "learning_rate": 3.4705428859471683e-07,
      "loss": 0.0068,
      "step": 2949920
    },
    {
      "epoch": 4.827641510051517,
      "grad_norm": 0.2640741765499115,
      "learning_rate": 3.469883963811996e-07,
      "loss": 0.0084,
      "step": 2949940
    },
    {
      "epoch": 4.827674240490171,
      "grad_norm": 0.11903025209903717,
      "learning_rate": 3.469225041676826e-07,
      "loss": 0.0096,
      "step": 2949960
    },
    {
      "epoch": 4.8277069709288245,
      "grad_norm": 0.16890844702720642,
      "learning_rate": 3.468566119541654e-07,
      "loss": 0.0066,
      "step": 2949980
    },
    {
      "epoch": 4.827739701367478,
      "grad_norm": 0.25176098942756653,
      "learning_rate": 3.467907197406483e-07,
      "loss": 0.0096,
      "step": 2950000
    },
    {
      "epoch": 4.827739701367478,
      "eval_loss": 0.005642736796289682,
      "eval_runtime": 6450.8153,
      "eval_samples_per_second": 159.338,
      "eval_steps_per_second": 15.934,
      "eval_sts-dev_pearson_cosine": 0.987238025198012,
      "eval_sts-dev_spearman_cosine": 0.8969905155347518,
      "step": 2950000
    },
    {
      "epoch": 4.827772431806131,
      "grad_norm": 0.27036967873573303,
      "learning_rate": 3.4672482752713117e-07,
      "loss": 0.0076,
      "step": 2950020
    },
    {
      "epoch": 4.827805162244784,
      "grad_norm": 0.09227314591407776,
      "learning_rate": 3.4665893531361396e-07,
      "loss": 0.0112,
      "step": 2950040
    },
    {
      "epoch": 4.827837892683438,
      "grad_norm": 0.29387080669403076,
      "learning_rate": 3.465930431000969e-07,
      "loss": 0.0105,
      "step": 2950060
    },
    {
      "epoch": 4.827870623122091,
      "grad_norm": 0.14582878351211548,
      "learning_rate": 3.4652715088657976e-07,
      "loss": 0.0113,
      "step": 2950080
    },
    {
      "epoch": 4.827903353560744,
      "grad_norm": 0.07679710537195206,
      "learning_rate": 3.4646125867306265e-07,
      "loss": 0.0052,
      "step": 2950100
    },
    {
      "epoch": 4.827936083999398,
      "grad_norm": 0.38155511021614075,
      "learning_rate": 3.463953664595455e-07,
      "loss": 0.0091,
      "step": 2950120
    },
    {
      "epoch": 4.8279688144380515,
      "grad_norm": 0.1265341192483902,
      "learning_rate": 3.463294742460284e-07,
      "loss": 0.0089,
      "step": 2950140
    },
    {
      "epoch": 4.828001544876704,
      "grad_norm": 0.08343230932950974,
      "learning_rate": 3.4626358203251124e-07,
      "loss": 0.0117,
      "step": 2950160
    },
    {
      "epoch": 4.828034275315358,
      "grad_norm": 0.13914136588573456,
      "learning_rate": 3.4619768981899414e-07,
      "loss": 0.0081,
      "step": 2950180
    },
    {
      "epoch": 4.828067005754011,
      "grad_norm": 0.17915841937065125,
      "learning_rate": 3.46131797605477e-07,
      "loss": 0.0101,
      "step": 2950200
    },
    {
      "epoch": 4.828099736192664,
      "grad_norm": 0.19786055386066437,
      "learning_rate": 3.460659053919599e-07,
      "loss": 0.0052,
      "step": 2950220
    },
    {
      "epoch": 4.828132466631318,
      "grad_norm": 0.3126099407672882,
      "learning_rate": 3.4600001317844273e-07,
      "loss": 0.0077,
      "step": 2950240
    },
    {
      "epoch": 4.828165197069971,
      "grad_norm": 0.7198933959007263,
      "learning_rate": 3.4593412096492563e-07,
      "loss": 0.015,
      "step": 2950260
    },
    {
      "epoch": 4.828197927508624,
      "grad_norm": 0.1489195078611374,
      "learning_rate": 3.458682287514085e-07,
      "loss": 0.0053,
      "step": 2950280
    },
    {
      "epoch": 4.828230657947278,
      "grad_norm": 0.10561149567365646,
      "learning_rate": 3.458023365378913e-07,
      "loss": 0.0074,
      "step": 2950300
    },
    {
      "epoch": 4.828263388385931,
      "grad_norm": 0.10883068293333054,
      "learning_rate": 3.457364443243742e-07,
      "loss": 0.0047,
      "step": 2950320
    },
    {
      "epoch": 4.828296118824585,
      "grad_norm": 0.15585407614707947,
      "learning_rate": 3.4567055211085707e-07,
      "loss": 0.0078,
      "step": 2950340
    },
    {
      "epoch": 4.8283288492632375,
      "grad_norm": 0.16974224150180817,
      "learning_rate": 3.4560465989733997e-07,
      "loss": 0.0082,
      "step": 2950360
    },
    {
      "epoch": 4.828361579701891,
      "grad_norm": 0.10672885179519653,
      "learning_rate": 3.455387676838228e-07,
      "loss": 0.0096,
      "step": 2950380
    },
    {
      "epoch": 4.828394310140545,
      "grad_norm": 0.11548753827810287,
      "learning_rate": 3.454728754703057e-07,
      "loss": 0.0072,
      "step": 2950400
    },
    {
      "epoch": 4.828427040579198,
      "grad_norm": 0.17242231965065002,
      "learning_rate": 3.4540698325678855e-07,
      "loss": 0.0051,
      "step": 2950420
    },
    {
      "epoch": 4.828459771017851,
      "grad_norm": 0.2569628655910492,
      "learning_rate": 3.4534109104327145e-07,
      "loss": 0.0096,
      "step": 2950440
    },
    {
      "epoch": 4.8284925014565045,
      "grad_norm": 0.19982881844043732,
      "learning_rate": 3.452751988297543e-07,
      "loss": 0.0103,
      "step": 2950460
    },
    {
      "epoch": 4.828525231895158,
      "grad_norm": 0.1746070384979248,
      "learning_rate": 3.452093066162372e-07,
      "loss": 0.0093,
      "step": 2950480
    },
    {
      "epoch": 4.828557962333811,
      "grad_norm": 0.3904995918273926,
      "learning_rate": 3.4514341440272004e-07,
      "loss": 0.0082,
      "step": 2950500
    },
    {
      "epoch": 4.828590692772464,
      "grad_norm": 0.4597606956958771,
      "learning_rate": 3.450775221892029e-07,
      "loss": 0.0076,
      "step": 2950520
    },
    {
      "epoch": 4.828623423211118,
      "grad_norm": 0.27761468291282654,
      "learning_rate": 3.450116299756858e-07,
      "loss": 0.0082,
      "step": 2950540
    },
    {
      "epoch": 4.828656153649771,
      "grad_norm": 0.20672853291034698,
      "learning_rate": 3.4494573776216863e-07,
      "loss": 0.0069,
      "step": 2950560
    },
    {
      "epoch": 4.828688884088424,
      "grad_norm": 0.1967267394065857,
      "learning_rate": 3.448798455486516e-07,
      "loss": 0.006,
      "step": 2950580
    },
    {
      "epoch": 4.828721614527078,
      "grad_norm": 0.1900126039981842,
      "learning_rate": 3.448139533351344e-07,
      "loss": 0.0089,
      "step": 2950600
    },
    {
      "epoch": 4.8287543449657315,
      "grad_norm": 0.17847630381584167,
      "learning_rate": 3.4474806112161733e-07,
      "loss": 0.0055,
      "step": 2950620
    },
    {
      "epoch": 4.828787075404384,
      "grad_norm": 0.15596920251846313,
      "learning_rate": 3.446821689081002e-07,
      "loss": 0.0078,
      "step": 2950640
    },
    {
      "epoch": 4.828819805843038,
      "grad_norm": 0.1825411170721054,
      "learning_rate": 3.4461627669458307e-07,
      "loss": 0.0066,
      "step": 2950660
    },
    {
      "epoch": 4.828852536281691,
      "grad_norm": 0.04670141264796257,
      "learning_rate": 3.445503844810659e-07,
      "loss": 0.0114,
      "step": 2950680
    },
    {
      "epoch": 4.828885266720345,
      "grad_norm": 0.3857196569442749,
      "learning_rate": 3.444844922675488e-07,
      "loss": 0.0085,
      "step": 2950700
    },
    {
      "epoch": 4.828917997158998,
      "grad_norm": 0.16080410778522491,
      "learning_rate": 3.4441860005403166e-07,
      "loss": 0.0079,
      "step": 2950720
    },
    {
      "epoch": 4.828950727597651,
      "grad_norm": 0.06337089836597443,
      "learning_rate": 3.443527078405145e-07,
      "loss": 0.0104,
      "step": 2950740
    },
    {
      "epoch": 4.828983458036305,
      "grad_norm": 0.1747446060180664,
      "learning_rate": 3.442868156269974e-07,
      "loss": 0.0071,
      "step": 2950760
    },
    {
      "epoch": 4.829016188474958,
      "grad_norm": 0.12588618695735931,
      "learning_rate": 3.4422092341348025e-07,
      "loss": 0.0103,
      "step": 2950780
    },
    {
      "epoch": 4.829048918913611,
      "grad_norm": 0.20067624747753143,
      "learning_rate": 3.4415503119996315e-07,
      "loss": 0.0049,
      "step": 2950800
    },
    {
      "epoch": 4.829081649352265,
      "grad_norm": 0.8340334296226501,
      "learning_rate": 3.44089138986446e-07,
      "loss": 0.0087,
      "step": 2950820
    },
    {
      "epoch": 4.8291143797909175,
      "grad_norm": 0.2614576816558838,
      "learning_rate": 3.440232467729289e-07,
      "loss": 0.0079,
      "step": 2950840
    },
    {
      "epoch": 4.829147110229571,
      "grad_norm": 0.20701716840267181,
      "learning_rate": 3.4395735455941174e-07,
      "loss": 0.0154,
      "step": 2950860
    },
    {
      "epoch": 4.829179840668225,
      "grad_norm": 0.14928004145622253,
      "learning_rate": 3.4389146234589464e-07,
      "loss": 0.0074,
      "step": 2950880
    },
    {
      "epoch": 4.829212571106878,
      "grad_norm": 0.11064799875020981,
      "learning_rate": 3.438255701323775e-07,
      "loss": 0.0053,
      "step": 2950900
    },
    {
      "epoch": 4.829245301545531,
      "grad_norm": 0.13636597990989685,
      "learning_rate": 3.437596779188604e-07,
      "loss": 0.0068,
      "step": 2950920
    },
    {
      "epoch": 4.829278031984185,
      "grad_norm": 0.28406643867492676,
      "learning_rate": 3.4369378570534323e-07,
      "loss": 0.0073,
      "step": 2950940
    },
    {
      "epoch": 4.829310762422838,
      "grad_norm": 0.36486679315567017,
      "learning_rate": 3.436278934918261e-07,
      "loss": 0.0114,
      "step": 2950960
    },
    {
      "epoch": 4.829343492861492,
      "grad_norm": 0.07351726293563843,
      "learning_rate": 3.4356200127830897e-07,
      "loss": 0.0064,
      "step": 2950980
    },
    {
      "epoch": 4.8293762233001445,
      "grad_norm": 0.17997796833515167,
      "learning_rate": 3.434961090647918e-07,
      "loss": 0.0054,
      "step": 2951000
    },
    {
      "epoch": 4.829408953738798,
      "grad_norm": 0.1950509250164032,
      "learning_rate": 3.434302168512747e-07,
      "loss": 0.0079,
      "step": 2951020
    },
    {
      "epoch": 4.829441684177452,
      "grad_norm": 0.3495820164680481,
      "learning_rate": 3.4336432463775756e-07,
      "loss": 0.0059,
      "step": 2951040
    },
    {
      "epoch": 4.829474414616104,
      "grad_norm": 0.15099206566810608,
      "learning_rate": 3.4329843242424046e-07,
      "loss": 0.0072,
      "step": 2951060
    },
    {
      "epoch": 4.829507145054758,
      "grad_norm": 0.33394554257392883,
      "learning_rate": 3.432325402107233e-07,
      "loss": 0.011,
      "step": 2951080
    },
    {
      "epoch": 4.829539875493412,
      "grad_norm": 0.5551614165306091,
      "learning_rate": 3.431666479972062e-07,
      "loss": 0.0083,
      "step": 2951100
    },
    {
      "epoch": 4.829572605932064,
      "grad_norm": 0.5289484262466431,
      "learning_rate": 3.4310075578368905e-07,
      "loss": 0.0105,
      "step": 2951120
    },
    {
      "epoch": 4.829605336370718,
      "grad_norm": 0.03588976711034775,
      "learning_rate": 3.4303486357017195e-07,
      "loss": 0.0081,
      "step": 2951140
    },
    {
      "epoch": 4.8296380668093715,
      "grad_norm": 0.07755138725042343,
      "learning_rate": 3.429689713566548e-07,
      "loss": 0.0069,
      "step": 2951160
    },
    {
      "epoch": 4.829670797248025,
      "grad_norm": 0.19411912560462952,
      "learning_rate": 3.4290307914313764e-07,
      "loss": 0.0119,
      "step": 2951180
    },
    {
      "epoch": 4.829703527686678,
      "grad_norm": 0.15317761898040771,
      "learning_rate": 3.4283718692962054e-07,
      "loss": 0.0121,
      "step": 2951200
    },
    {
      "epoch": 4.829736258125331,
      "grad_norm": 0.5660214424133301,
      "learning_rate": 3.427712947161034e-07,
      "loss": 0.0075,
      "step": 2951220
    },
    {
      "epoch": 4.829768988563985,
      "grad_norm": 0.1484103798866272,
      "learning_rate": 3.4270540250258634e-07,
      "loss": 0.0078,
      "step": 2951240
    },
    {
      "epoch": 4.8298017190026385,
      "grad_norm": 0.31899046897888184,
      "learning_rate": 3.4263951028906913e-07,
      "loss": 0.0065,
      "step": 2951260
    },
    {
      "epoch": 4.829834449441291,
      "grad_norm": 0.21979062259197235,
      "learning_rate": 3.425736180755521e-07,
      "loss": 0.0064,
      "step": 2951280
    },
    {
      "epoch": 4.829867179879945,
      "grad_norm": 0.19212955236434937,
      "learning_rate": 3.4250772586203493e-07,
      "loss": 0.0096,
      "step": 2951300
    },
    {
      "epoch": 4.829899910318598,
      "grad_norm": 0.18645107746124268,
      "learning_rate": 3.424418336485178e-07,
      "loss": 0.0088,
      "step": 2951320
    },
    {
      "epoch": 4.829932640757251,
      "grad_norm": 0.0779072642326355,
      "learning_rate": 3.4237594143500067e-07,
      "loss": 0.0097,
      "step": 2951340
    },
    {
      "epoch": 4.829965371195905,
      "grad_norm": 0.09231880307197571,
      "learning_rate": 3.4231004922148357e-07,
      "loss": 0.0106,
      "step": 2951360
    },
    {
      "epoch": 4.829998101634558,
      "grad_norm": 0.1563594788312912,
      "learning_rate": 3.422441570079664e-07,
      "loss": 0.0085,
      "step": 2951380
    },
    {
      "epoch": 4.830030832073211,
      "grad_norm": 0.06394143402576447,
      "learning_rate": 3.4217826479444926e-07,
      "loss": 0.0056,
      "step": 2951400
    },
    {
      "epoch": 4.830063562511865,
      "grad_norm": 0.1534208208322525,
      "learning_rate": 3.4211237258093216e-07,
      "loss": 0.0112,
      "step": 2951420
    },
    {
      "epoch": 4.830096292950518,
      "grad_norm": 0.08085942268371582,
      "learning_rate": 3.42046480367415e-07,
      "loss": 0.0127,
      "step": 2951440
    },
    {
      "epoch": 4.830129023389172,
      "grad_norm": 0.5317350029945374,
      "learning_rate": 3.419805881538979e-07,
      "loss": 0.0099,
      "step": 2951460
    },
    {
      "epoch": 4.8301617538278245,
      "grad_norm": 0.08406726270914078,
      "learning_rate": 3.4191469594038075e-07,
      "loss": 0.0101,
      "step": 2951480
    },
    {
      "epoch": 4.830194484266478,
      "grad_norm": 0.2810288965702057,
      "learning_rate": 3.4184880372686365e-07,
      "loss": 0.0076,
      "step": 2951500
    },
    {
      "epoch": 4.830227214705132,
      "grad_norm": 0.17041215300559998,
      "learning_rate": 3.417829115133465e-07,
      "loss": 0.0119,
      "step": 2951520
    },
    {
      "epoch": 4.830259945143785,
      "grad_norm": 0.2729162871837616,
      "learning_rate": 3.417170192998294e-07,
      "loss": 0.0081,
      "step": 2951540
    },
    {
      "epoch": 4.830292675582438,
      "grad_norm": 0.27998438477516174,
      "learning_rate": 3.4165112708631224e-07,
      "loss": 0.0088,
      "step": 2951560
    },
    {
      "epoch": 4.830325406021092,
      "grad_norm": 0.4012381434440613,
      "learning_rate": 3.4158523487279514e-07,
      "loss": 0.0092,
      "step": 2951580
    },
    {
      "epoch": 4.830358136459745,
      "grad_norm": 0.11352231353521347,
      "learning_rate": 3.41519342659278e-07,
      "loss": 0.0087,
      "step": 2951600
    },
    {
      "epoch": 4.830390866898398,
      "grad_norm": 0.07285946607589722,
      "learning_rate": 3.4145345044576083e-07,
      "loss": 0.0063,
      "step": 2951620
    },
    {
      "epoch": 4.8304235973370515,
      "grad_norm": 0.41154783964157104,
      "learning_rate": 3.413875582322437e-07,
      "loss": 0.0086,
      "step": 2951640
    },
    {
      "epoch": 4.830456327775705,
      "grad_norm": 0.291377991437912,
      "learning_rate": 3.4132166601872657e-07,
      "loss": 0.0065,
      "step": 2951660
    },
    {
      "epoch": 4.830489058214358,
      "grad_norm": 0.18887537717819214,
      "learning_rate": 3.4125577380520947e-07,
      "loss": 0.0082,
      "step": 2951680
    },
    {
      "epoch": 4.830521788653011,
      "grad_norm": 0.17607556283473969,
      "learning_rate": 3.411898815916923e-07,
      "loss": 0.0112,
      "step": 2951700
    },
    {
      "epoch": 4.830554519091665,
      "grad_norm": 0.25570183992385864,
      "learning_rate": 3.411239893781752e-07,
      "loss": 0.0101,
      "step": 2951720
    },
    {
      "epoch": 4.830587249530318,
      "grad_norm": 0.3410813808441162,
      "learning_rate": 3.4105809716465806e-07,
      "loss": 0.0087,
      "step": 2951740
    },
    {
      "epoch": 4.830619979968971,
      "grad_norm": 0.4744952321052551,
      "learning_rate": 3.4099220495114096e-07,
      "loss": 0.0083,
      "step": 2951760
    },
    {
      "epoch": 4.830652710407625,
      "grad_norm": 0.48032164573669434,
      "learning_rate": 3.409263127376238e-07,
      "loss": 0.0104,
      "step": 2951780
    },
    {
      "epoch": 4.8306854408462785,
      "grad_norm": 0.13769781589508057,
      "learning_rate": 3.408604205241067e-07,
      "loss": 0.011,
      "step": 2951800
    },
    {
      "epoch": 4.830718171284931,
      "grad_norm": 0.11107297986745834,
      "learning_rate": 3.4079452831058955e-07,
      "loss": 0.0075,
      "step": 2951820
    },
    {
      "epoch": 4.830750901723585,
      "grad_norm": 0.29300612211227417,
      "learning_rate": 3.407286360970724e-07,
      "loss": 0.0111,
      "step": 2951840
    },
    {
      "epoch": 4.830783632162238,
      "grad_norm": 0.35322144627571106,
      "learning_rate": 3.406627438835553e-07,
      "loss": 0.0052,
      "step": 2951860
    },
    {
      "epoch": 4.830816362600892,
      "grad_norm": 0.15414468944072723,
      "learning_rate": 3.4059685167003814e-07,
      "loss": 0.007,
      "step": 2951880
    },
    {
      "epoch": 4.830849093039545,
      "grad_norm": 0.24116751551628113,
      "learning_rate": 3.405309594565211e-07,
      "loss": 0.0074,
      "step": 2951900
    },
    {
      "epoch": 4.830881823478198,
      "grad_norm": 0.028752805665135384,
      "learning_rate": 3.404650672430039e-07,
      "loss": 0.0089,
      "step": 2951920
    },
    {
      "epoch": 4.830914553916852,
      "grad_norm": 0.14278832077980042,
      "learning_rate": 3.4039917502948683e-07,
      "loss": 0.0131,
      "step": 2951940
    },
    {
      "epoch": 4.830947284355505,
      "grad_norm": 0.24551719427108765,
      "learning_rate": 3.403332828159697e-07,
      "loss": 0.009,
      "step": 2951960
    },
    {
      "epoch": 4.830980014794158,
      "grad_norm": 0.35394954681396484,
      "learning_rate": 3.402673906024526e-07,
      "loss": 0.0116,
      "step": 2951980
    },
    {
      "epoch": 4.831012745232812,
      "grad_norm": 0.0848781168460846,
      "learning_rate": 3.402014983889354e-07,
      "loss": 0.0112,
      "step": 2952000
    },
    {
      "epoch": 4.8310454756714645,
      "grad_norm": 0.3184346854686737,
      "learning_rate": 3.401356061754183e-07,
      "loss": 0.006,
      "step": 2952020
    },
    {
      "epoch": 4.831078206110118,
      "grad_norm": 0.17097359895706177,
      "learning_rate": 3.4006971396190117e-07,
      "loss": 0.0122,
      "step": 2952040
    },
    {
      "epoch": 4.831110936548772,
      "grad_norm": 0.15346556901931763,
      "learning_rate": 3.40003821748384e-07,
      "loss": 0.008,
      "step": 2952060
    },
    {
      "epoch": 4.831143666987425,
      "grad_norm": 0.14419874548912048,
      "learning_rate": 3.399379295348669e-07,
      "loss": 0.0085,
      "step": 2952080
    },
    {
      "epoch": 4.831176397426078,
      "grad_norm": 0.21313601732254028,
      "learning_rate": 3.3987203732134976e-07,
      "loss": 0.0057,
      "step": 2952100
    },
    {
      "epoch": 4.831209127864732,
      "grad_norm": 0.4470219612121582,
      "learning_rate": 3.3980614510783266e-07,
      "loss": 0.0088,
      "step": 2952120
    },
    {
      "epoch": 4.831241858303385,
      "grad_norm": 0.2313113659620285,
      "learning_rate": 3.397402528943155e-07,
      "loss": 0.0091,
      "step": 2952140
    },
    {
      "epoch": 4.831274588742039,
      "grad_norm": 0.14958631992340088,
      "learning_rate": 3.396743606807984e-07,
      "loss": 0.0038,
      "step": 2952160
    },
    {
      "epoch": 4.8313073191806915,
      "grad_norm": 0.5182599425315857,
      "learning_rate": 3.3960846846728125e-07,
      "loss": 0.0132,
      "step": 2952180
    },
    {
      "epoch": 4.831340049619345,
      "grad_norm": 0.5104654431343079,
      "learning_rate": 3.3954257625376414e-07,
      "loss": 0.0101,
      "step": 2952200
    },
    {
      "epoch": 4.831372780057999,
      "grad_norm": 0.25968173146247864,
      "learning_rate": 3.39476684040247e-07,
      "loss": 0.014,
      "step": 2952220
    },
    {
      "epoch": 4.831405510496651,
      "grad_norm": 0.1675935536623001,
      "learning_rate": 3.394107918267299e-07,
      "loss": 0.0084,
      "step": 2952240
    },
    {
      "epoch": 4.831438240935305,
      "grad_norm": 0.1296352744102478,
      "learning_rate": 3.3934489961321273e-07,
      "loss": 0.0096,
      "step": 2952260
    },
    {
      "epoch": 4.8314709713739585,
      "grad_norm": 0.34860333800315857,
      "learning_rate": 3.392790073996956e-07,
      "loss": 0.0133,
      "step": 2952280
    },
    {
      "epoch": 4.831503701812611,
      "grad_norm": 0.21423913538455963,
      "learning_rate": 3.392131151861785e-07,
      "loss": 0.0078,
      "step": 2952300
    },
    {
      "epoch": 4.831536432251265,
      "grad_norm": 0.06205219775438309,
      "learning_rate": 3.391472229726613e-07,
      "loss": 0.0094,
      "step": 2952320
    },
    {
      "epoch": 4.831569162689918,
      "grad_norm": 0.21018967032432556,
      "learning_rate": 3.390813307591442e-07,
      "loss": 0.0097,
      "step": 2952340
    },
    {
      "epoch": 4.831601893128572,
      "grad_norm": 0.11903219670057297,
      "learning_rate": 3.3901543854562707e-07,
      "loss": 0.0089,
      "step": 2952360
    },
    {
      "epoch": 4.831634623567225,
      "grad_norm": 0.28787198662757874,
      "learning_rate": 3.3894954633210997e-07,
      "loss": 0.0103,
      "step": 2952380
    },
    {
      "epoch": 4.831667354005878,
      "grad_norm": 0.21127693355083466,
      "learning_rate": 3.388836541185928e-07,
      "loss": 0.0125,
      "step": 2952400
    },
    {
      "epoch": 4.831700084444532,
      "grad_norm": 0.42833587527275085,
      "learning_rate": 3.388177619050757e-07,
      "loss": 0.0095,
      "step": 2952420
    },
    {
      "epoch": 4.8317328148831855,
      "grad_norm": 0.2254733145236969,
      "learning_rate": 3.3875186969155856e-07,
      "loss": 0.0093,
      "step": 2952440
    },
    {
      "epoch": 4.831765545321838,
      "grad_norm": 0.4904417395591736,
      "learning_rate": 3.3868597747804146e-07,
      "loss": 0.0092,
      "step": 2952460
    },
    {
      "epoch": 4.831798275760492,
      "grad_norm": 0.12140364199876785,
      "learning_rate": 3.386200852645243e-07,
      "loss": 0.0088,
      "step": 2952480
    },
    {
      "epoch": 4.831831006199145,
      "grad_norm": 0.2078533172607422,
      "learning_rate": 3.3855419305100715e-07,
      "loss": 0.0063,
      "step": 2952500
    },
    {
      "epoch": 4.831863736637798,
      "grad_norm": 0.3005254566669464,
      "learning_rate": 3.3848830083749005e-07,
      "loss": 0.0068,
      "step": 2952520
    },
    {
      "epoch": 4.831896467076452,
      "grad_norm": 0.25193971395492554,
      "learning_rate": 3.384224086239729e-07,
      "loss": 0.0078,
      "step": 2952540
    },
    {
      "epoch": 4.831929197515105,
      "grad_norm": 0.2884140610694885,
      "learning_rate": 3.3835651641045584e-07,
      "loss": 0.0111,
      "step": 2952560
    },
    {
      "epoch": 4.831961927953758,
      "grad_norm": 0.05661292374134064,
      "learning_rate": 3.3829062419693864e-07,
      "loss": 0.009,
      "step": 2952580
    },
    {
      "epoch": 4.831994658392412,
      "grad_norm": 0.11177825182676315,
      "learning_rate": 3.382247319834216e-07,
      "loss": 0.0135,
      "step": 2952600
    },
    {
      "epoch": 4.832027388831065,
      "grad_norm": 0.1785723865032196,
      "learning_rate": 3.3815883976990443e-07,
      "loss": 0.0109,
      "step": 2952620
    },
    {
      "epoch": 4.832060119269719,
      "grad_norm": 0.2515038251876831,
      "learning_rate": 3.3809294755638733e-07,
      "loss": 0.0138,
      "step": 2952640
    },
    {
      "epoch": 4.8320928497083715,
      "grad_norm": 0.4574081301689148,
      "learning_rate": 3.380270553428702e-07,
      "loss": 0.0095,
      "step": 2952660
    },
    {
      "epoch": 4.832125580147025,
      "grad_norm": 0.33210375905036926,
      "learning_rate": 3.379611631293531e-07,
      "loss": 0.0167,
      "step": 2952680
    },
    {
      "epoch": 4.832158310585679,
      "grad_norm": 0.1367591768503189,
      "learning_rate": 3.378952709158359e-07,
      "loss": 0.0074,
      "step": 2952700
    },
    {
      "epoch": 4.832191041024332,
      "grad_norm": 0.39096012711524963,
      "learning_rate": 3.3782937870231877e-07,
      "loss": 0.0056,
      "step": 2952720
    },
    {
      "epoch": 4.832223771462985,
      "grad_norm": 0.32385024428367615,
      "learning_rate": 3.3776348648880166e-07,
      "loss": 0.011,
      "step": 2952740
    },
    {
      "epoch": 4.832256501901639,
      "grad_norm": 0.13439297676086426,
      "learning_rate": 3.376975942752845e-07,
      "loss": 0.0059,
      "step": 2952760
    },
    {
      "epoch": 4.832289232340292,
      "grad_norm": 0.14448335766792297,
      "learning_rate": 3.376317020617674e-07,
      "loss": 0.0106,
      "step": 2952780
    },
    {
      "epoch": 4.832321962778945,
      "grad_norm": 0.2627899944782257,
      "learning_rate": 3.3756580984825025e-07,
      "loss": 0.0089,
      "step": 2952800
    },
    {
      "epoch": 4.8323546932175985,
      "grad_norm": 0.5729426145553589,
      "learning_rate": 3.3749991763473315e-07,
      "loss": 0.0096,
      "step": 2952820
    },
    {
      "epoch": 4.832387423656252,
      "grad_norm": 0.410153865814209,
      "learning_rate": 3.37434025421216e-07,
      "loss": 0.0104,
      "step": 2952840
    },
    {
      "epoch": 4.832420154094905,
      "grad_norm": 0.42759591341018677,
      "learning_rate": 3.373681332076989e-07,
      "loss": 0.0111,
      "step": 2952860
    },
    {
      "epoch": 4.832452884533558,
      "grad_norm": 0.19668728113174438,
      "learning_rate": 3.3730224099418174e-07,
      "loss": 0.0088,
      "step": 2952880
    },
    {
      "epoch": 4.832485614972212,
      "grad_norm": 0.29178327322006226,
      "learning_rate": 3.3723634878066464e-07,
      "loss": 0.0114,
      "step": 2952900
    },
    {
      "epoch": 4.832518345410866,
      "grad_norm": 0.09786578267812729,
      "learning_rate": 3.371704565671475e-07,
      "loss": 0.0058,
      "step": 2952920
    },
    {
      "epoch": 4.832551075849518,
      "grad_norm": 0.24350862205028534,
      "learning_rate": 3.3710456435363033e-07,
      "loss": 0.0082,
      "step": 2952940
    },
    {
      "epoch": 4.832583806288172,
      "grad_norm": 0.22710858285427094,
      "learning_rate": 3.3703867214011323e-07,
      "loss": 0.0054,
      "step": 2952960
    },
    {
      "epoch": 4.8326165367268255,
      "grad_norm": 0.9265910983085632,
      "learning_rate": 3.369727799265961e-07,
      "loss": 0.012,
      "step": 2952980
    },
    {
      "epoch": 4.832649267165479,
      "grad_norm": 0.5560019612312317,
      "learning_rate": 3.36906887713079e-07,
      "loss": 0.0089,
      "step": 2953000
    },
    {
      "epoch": 4.832681997604132,
      "grad_norm": 0.2796874940395355,
      "learning_rate": 3.368409954995618e-07,
      "loss": 0.0097,
      "step": 2953020
    },
    {
      "epoch": 4.832714728042785,
      "grad_norm": 0.2543424665927887,
      "learning_rate": 3.367751032860447e-07,
      "loss": 0.0097,
      "step": 2953040
    },
    {
      "epoch": 4.832747458481439,
      "grad_norm": 0.2752823531627655,
      "learning_rate": 3.3670921107252757e-07,
      "loss": 0.0096,
      "step": 2953060
    },
    {
      "epoch": 4.832780188920092,
      "grad_norm": 0.25502341985702515,
      "learning_rate": 3.3664331885901046e-07,
      "loss": 0.0065,
      "step": 2953080
    },
    {
      "epoch": 4.832812919358745,
      "grad_norm": 0.47024616599082947,
      "learning_rate": 3.365774266454933e-07,
      "loss": 0.0113,
      "step": 2953100
    },
    {
      "epoch": 4.832845649797399,
      "grad_norm": 0.11679384857416153,
      "learning_rate": 3.365115344319762e-07,
      "loss": 0.0083,
      "step": 2953120
    },
    {
      "epoch": 4.8328783802360515,
      "grad_norm": 0.06024599447846413,
      "learning_rate": 3.3644564221845905e-07,
      "loss": 0.0085,
      "step": 2953140
    },
    {
      "epoch": 4.832911110674705,
      "grad_norm": 0.2130410075187683,
      "learning_rate": 3.363797500049419e-07,
      "loss": 0.0091,
      "step": 2953160
    },
    {
      "epoch": 4.832943841113359,
      "grad_norm": 0.2300024926662445,
      "learning_rate": 3.363138577914248e-07,
      "loss": 0.0066,
      "step": 2953180
    },
    {
      "epoch": 4.832976571552012,
      "grad_norm": 0.251091867685318,
      "learning_rate": 3.3624796557790764e-07,
      "loss": 0.0108,
      "step": 2953200
    },
    {
      "epoch": 4.833009301990665,
      "grad_norm": 0.2768876552581787,
      "learning_rate": 3.361820733643906e-07,
      "loss": 0.0071,
      "step": 2953220
    },
    {
      "epoch": 4.833042032429319,
      "grad_norm": 0.10931962728500366,
      "learning_rate": 3.361161811508734e-07,
      "loss": 0.0113,
      "step": 2953240
    },
    {
      "epoch": 4.833074762867972,
      "grad_norm": 0.10891042649745941,
      "learning_rate": 3.3605028893735634e-07,
      "loss": 0.0098,
      "step": 2953260
    },
    {
      "epoch": 4.833107493306625,
      "grad_norm": 0.32893431186676025,
      "learning_rate": 3.359843967238392e-07,
      "loss": 0.012,
      "step": 2953280
    },
    {
      "epoch": 4.8331402237452785,
      "grad_norm": 0.2721210718154907,
      "learning_rate": 3.359185045103221e-07,
      "loss": 0.0132,
      "step": 2953300
    },
    {
      "epoch": 4.833172954183932,
      "grad_norm": 0.09465702623128891,
      "learning_rate": 3.3585261229680493e-07,
      "loss": 0.0113,
      "step": 2953320
    },
    {
      "epoch": 4.833205684622586,
      "grad_norm": 0.19871786236763,
      "learning_rate": 3.3578672008328783e-07,
      "loss": 0.0109,
      "step": 2953340
    },
    {
      "epoch": 4.833238415061238,
      "grad_norm": 0.10477520525455475,
      "learning_rate": 3.3572082786977067e-07,
      "loss": 0.0063,
      "step": 2953360
    },
    {
      "epoch": 4.833271145499892,
      "grad_norm": 0.14047418534755707,
      "learning_rate": 3.356549356562535e-07,
      "loss": 0.0068,
      "step": 2953380
    },
    {
      "epoch": 4.833303875938546,
      "grad_norm": 0.27155348658561707,
      "learning_rate": 3.355890434427364e-07,
      "loss": 0.0098,
      "step": 2953400
    },
    {
      "epoch": 4.833336606377198,
      "grad_norm": 0.05289740115404129,
      "learning_rate": 3.3552315122921926e-07,
      "loss": 0.0064,
      "step": 2953420
    },
    {
      "epoch": 4.833369336815852,
      "grad_norm": 0.3950234651565552,
      "learning_rate": 3.3545725901570216e-07,
      "loss": 0.0083,
      "step": 2953440
    },
    {
      "epoch": 4.8334020672545055,
      "grad_norm": 0.15144337713718414,
      "learning_rate": 3.35391366802185e-07,
      "loss": 0.0087,
      "step": 2953460
    },
    {
      "epoch": 4.833434797693158,
      "grad_norm": 0.10668153315782547,
      "learning_rate": 3.353254745886679e-07,
      "loss": 0.0122,
      "step": 2953480
    },
    {
      "epoch": 4.833467528131812,
      "grad_norm": 0.22158832848072052,
      "learning_rate": 3.3525958237515075e-07,
      "loss": 0.0065,
      "step": 2953500
    },
    {
      "epoch": 4.833500258570465,
      "grad_norm": 0.2612106502056122,
      "learning_rate": 3.3519369016163365e-07,
      "loss": 0.0071,
      "step": 2953520
    },
    {
      "epoch": 4.833532989009119,
      "grad_norm": 0.1989685595035553,
      "learning_rate": 3.351277979481165e-07,
      "loss": 0.0085,
      "step": 2953540
    },
    {
      "epoch": 4.833565719447772,
      "grad_norm": 0.19531720876693726,
      "learning_rate": 3.350619057345994e-07,
      "loss": 0.0082,
      "step": 2953560
    },
    {
      "epoch": 4.833598449886425,
      "grad_norm": 0.31153059005737305,
      "learning_rate": 3.3499601352108224e-07,
      "loss": 0.012,
      "step": 2953580
    },
    {
      "epoch": 4.833631180325079,
      "grad_norm": 0.07074568420648575,
      "learning_rate": 3.349301213075651e-07,
      "loss": 0.0201,
      "step": 2953600
    },
    {
      "epoch": 4.8336639107637325,
      "grad_norm": 0.290587455034256,
      "learning_rate": 3.34864229094048e-07,
      "loss": 0.0102,
      "step": 2953620
    },
    {
      "epoch": 4.833696641202385,
      "grad_norm": 0.12898798286914825,
      "learning_rate": 3.3479833688053083e-07,
      "loss": 0.0089,
      "step": 2953640
    },
    {
      "epoch": 4.833729371641039,
      "grad_norm": 0.10250025987625122,
      "learning_rate": 3.3473244466701373e-07,
      "loss": 0.0108,
      "step": 2953660
    },
    {
      "epoch": 4.833762102079692,
      "grad_norm": 0.17314207553863525,
      "learning_rate": 3.346665524534966e-07,
      "loss": 0.0141,
      "step": 2953680
    },
    {
      "epoch": 4.833794832518345,
      "grad_norm": 0.13963374495506287,
      "learning_rate": 3.3460066023997947e-07,
      "loss": 0.0128,
      "step": 2953700
    },
    {
      "epoch": 4.833827562956999,
      "grad_norm": 0.38014376163482666,
      "learning_rate": 3.345347680264623e-07,
      "loss": 0.0086,
      "step": 2953720
    },
    {
      "epoch": 4.833860293395652,
      "grad_norm": 0.20700697600841522,
      "learning_rate": 3.344688758129452e-07,
      "loss": 0.009,
      "step": 2953740
    },
    {
      "epoch": 4.833893023834305,
      "grad_norm": 0.2105361372232437,
      "learning_rate": 3.3440298359942806e-07,
      "loss": 0.0082,
      "step": 2953760
    },
    {
      "epoch": 4.833925754272959,
      "grad_norm": 0.27576231956481934,
      "learning_rate": 3.3433709138591096e-07,
      "loss": 0.0091,
      "step": 2953780
    },
    {
      "epoch": 4.833958484711612,
      "grad_norm": 0.12333759665489197,
      "learning_rate": 3.342711991723938e-07,
      "loss": 0.0085,
      "step": 2953800
    },
    {
      "epoch": 4.833991215150266,
      "grad_norm": 0.19497744739055634,
      "learning_rate": 3.3420530695887665e-07,
      "loss": 0.0085,
      "step": 2953820
    },
    {
      "epoch": 4.8340239455889185,
      "grad_norm": 0.8318945169448853,
      "learning_rate": 3.3413941474535955e-07,
      "loss": 0.012,
      "step": 2953840
    },
    {
      "epoch": 4.834056676027572,
      "grad_norm": 0.0357029065489769,
      "learning_rate": 3.340735225318424e-07,
      "loss": 0.0053,
      "step": 2953860
    },
    {
      "epoch": 4.834089406466226,
      "grad_norm": 0.26843494176864624,
      "learning_rate": 3.3400763031832535e-07,
      "loss": 0.008,
      "step": 2953880
    },
    {
      "epoch": 4.834122136904879,
      "grad_norm": 0.18174265325069427,
      "learning_rate": 3.3394173810480814e-07,
      "loss": 0.0085,
      "step": 2953900
    },
    {
      "epoch": 4.834154867343532,
      "grad_norm": 0.23663093149662018,
      "learning_rate": 3.338758458912911e-07,
      "loss": 0.0095,
      "step": 2953920
    },
    {
      "epoch": 4.8341875977821855,
      "grad_norm": 0.18093708157539368,
      "learning_rate": 3.3380995367777394e-07,
      "loss": 0.0097,
      "step": 2953940
    },
    {
      "epoch": 4.834220328220839,
      "grad_norm": 0.11155570298433304,
      "learning_rate": 3.3374406146425684e-07,
      "loss": 0.0105,
      "step": 2953960
    },
    {
      "epoch": 4.834253058659492,
      "grad_norm": 1.3330254554748535,
      "learning_rate": 3.336781692507397e-07,
      "loss": 0.0094,
      "step": 2953980
    },
    {
      "epoch": 4.834285789098145,
      "grad_norm": 0.23083411157131195,
      "learning_rate": 3.336122770372226e-07,
      "loss": 0.0102,
      "step": 2954000
    },
    {
      "epoch": 4.834318519536799,
      "grad_norm": 0.11692778021097183,
      "learning_rate": 3.335463848237054e-07,
      "loss": 0.0063,
      "step": 2954020
    },
    {
      "epoch": 4.834351249975452,
      "grad_norm": 0.1647365391254425,
      "learning_rate": 3.3348049261018827e-07,
      "loss": 0.0061,
      "step": 2954040
    },
    {
      "epoch": 4.834383980414105,
      "grad_norm": 0.08511954545974731,
      "learning_rate": 3.3341460039667117e-07,
      "loss": 0.0106,
      "step": 2954060
    },
    {
      "epoch": 4.834416710852759,
      "grad_norm": 0.09081882238388062,
      "learning_rate": 3.33348708183154e-07,
      "loss": 0.0048,
      "step": 2954080
    },
    {
      "epoch": 4.8344494412914125,
      "grad_norm": 0.3922671675682068,
      "learning_rate": 3.332828159696369e-07,
      "loss": 0.0091,
      "step": 2954100
    },
    {
      "epoch": 4.834482171730065,
      "grad_norm": 0.0873589813709259,
      "learning_rate": 3.3321692375611976e-07,
      "loss": 0.0117,
      "step": 2954120
    },
    {
      "epoch": 4.834514902168719,
      "grad_norm": 0.22068245708942413,
      "learning_rate": 3.3315103154260266e-07,
      "loss": 0.0073,
      "step": 2954140
    },
    {
      "epoch": 4.834547632607372,
      "grad_norm": 0.11629842221736908,
      "learning_rate": 3.330851393290855e-07,
      "loss": 0.0077,
      "step": 2954160
    },
    {
      "epoch": 4.834580363046026,
      "grad_norm": 0.3061184585094452,
      "learning_rate": 3.330192471155684e-07,
      "loss": 0.0069,
      "step": 2954180
    },
    {
      "epoch": 4.834613093484679,
      "grad_norm": 0.3044080436229706,
      "learning_rate": 3.3295335490205125e-07,
      "loss": 0.0091,
      "step": 2954200
    },
    {
      "epoch": 4.834645823923332,
      "grad_norm": 0.29201313853263855,
      "learning_rate": 3.3288746268853415e-07,
      "loss": 0.0063,
      "step": 2954220
    },
    {
      "epoch": 4.834678554361986,
      "grad_norm": 0.22126424312591553,
      "learning_rate": 3.32821570475017e-07,
      "loss": 0.0135,
      "step": 2954240
    },
    {
      "epoch": 4.834711284800639,
      "grad_norm": 0.19555950164794922,
      "learning_rate": 3.3275567826149984e-07,
      "loss": 0.0098,
      "step": 2954260
    },
    {
      "epoch": 4.834744015239292,
      "grad_norm": 0.06795953214168549,
      "learning_rate": 3.3268978604798274e-07,
      "loss": 0.0079,
      "step": 2954280
    },
    {
      "epoch": 4.834776745677946,
      "grad_norm": 0.06137193366885185,
      "learning_rate": 3.326238938344656e-07,
      "loss": 0.0091,
      "step": 2954300
    },
    {
      "epoch": 4.8348094761165985,
      "grad_norm": 0.10707667469978333,
      "learning_rate": 3.325580016209485e-07,
      "loss": 0.0094,
      "step": 2954320
    },
    {
      "epoch": 4.834842206555252,
      "grad_norm": 0.26296839118003845,
      "learning_rate": 3.3249210940743133e-07,
      "loss": 0.0083,
      "step": 2954340
    },
    {
      "epoch": 4.834874936993906,
      "grad_norm": 0.08361858129501343,
      "learning_rate": 3.324262171939142e-07,
      "loss": 0.0121,
      "step": 2954360
    },
    {
      "epoch": 4.834907667432559,
      "grad_norm": 0.3433185815811157,
      "learning_rate": 3.3236032498039707e-07,
      "loss": 0.0083,
      "step": 2954380
    },
    {
      "epoch": 4.834940397871212,
      "grad_norm": 0.19552911818027496,
      "learning_rate": 3.3229443276687997e-07,
      "loss": 0.0072,
      "step": 2954400
    },
    {
      "epoch": 4.834973128309866,
      "grad_norm": 0.1893293857574463,
      "learning_rate": 3.322285405533628e-07,
      "loss": 0.0071,
      "step": 2954420
    },
    {
      "epoch": 4.835005858748519,
      "grad_norm": 0.3614337146282196,
      "learning_rate": 3.321626483398457e-07,
      "loss": 0.0108,
      "step": 2954440
    },
    {
      "epoch": 4.835038589187173,
      "grad_norm": 0.12693005800247192,
      "learning_rate": 3.3209675612632856e-07,
      "loss": 0.0052,
      "step": 2954460
    },
    {
      "epoch": 4.8350713196258255,
      "grad_norm": 0.0587705560028553,
      "learning_rate": 3.320308639128114e-07,
      "loss": 0.0138,
      "step": 2954480
    },
    {
      "epoch": 4.835104050064479,
      "grad_norm": 0.14976531267166138,
      "learning_rate": 3.319649716992943e-07,
      "loss": 0.006,
      "step": 2954500
    },
    {
      "epoch": 4.835136780503133,
      "grad_norm": 0.16470853984355927,
      "learning_rate": 3.3189907948577715e-07,
      "loss": 0.0058,
      "step": 2954520
    },
    {
      "epoch": 4.835169510941785,
      "grad_norm": 0.622587263584137,
      "learning_rate": 3.318331872722601e-07,
      "loss": 0.0072,
      "step": 2954540
    },
    {
      "epoch": 4.835202241380439,
      "grad_norm": 0.1574978232383728,
      "learning_rate": 3.317672950587429e-07,
      "loss": 0.006,
      "step": 2954560
    },
    {
      "epoch": 4.835234971819093,
      "grad_norm": 0.15639182925224304,
      "learning_rate": 3.3170140284522584e-07,
      "loss": 0.0073,
      "step": 2954580
    },
    {
      "epoch": 4.835267702257745,
      "grad_norm": 0.13133707642555237,
      "learning_rate": 3.316355106317087e-07,
      "loss": 0.0084,
      "step": 2954600
    },
    {
      "epoch": 4.835300432696399,
      "grad_norm": 0.04880976676940918,
      "learning_rate": 3.315696184181916e-07,
      "loss": 0.0066,
      "step": 2954620
    },
    {
      "epoch": 4.8353331631350525,
      "grad_norm": 0.37587830424308777,
      "learning_rate": 3.3150372620467443e-07,
      "loss": 0.0079,
      "step": 2954640
    },
    {
      "epoch": 4.835365893573706,
      "grad_norm": 0.14124305546283722,
      "learning_rate": 3.3143783399115733e-07,
      "loss": 0.0073,
      "step": 2954660
    },
    {
      "epoch": 4.835398624012359,
      "grad_norm": 0.254814088344574,
      "learning_rate": 3.313719417776402e-07,
      "loss": 0.0078,
      "step": 2954680
    },
    {
      "epoch": 4.835431354451012,
      "grad_norm": 0.3696926236152649,
      "learning_rate": 3.31306049564123e-07,
      "loss": 0.0115,
      "step": 2954700
    },
    {
      "epoch": 4.835464084889666,
      "grad_norm": 0.13250723481178284,
      "learning_rate": 3.312401573506059e-07,
      "loss": 0.0075,
      "step": 2954720
    },
    {
      "epoch": 4.835496815328319,
      "grad_norm": 0.621295690536499,
      "learning_rate": 3.3117426513708877e-07,
      "loss": 0.0092,
      "step": 2954740
    },
    {
      "epoch": 4.835529545766972,
      "grad_norm": 0.3070800304412842,
      "learning_rate": 3.3110837292357167e-07,
      "loss": 0.0106,
      "step": 2954760
    },
    {
      "epoch": 4.835562276205626,
      "grad_norm": 0.16747605800628662,
      "learning_rate": 3.310424807100545e-07,
      "loss": 0.0064,
      "step": 2954780
    },
    {
      "epoch": 4.835595006644279,
      "grad_norm": 0.34298333525657654,
      "learning_rate": 3.309765884965374e-07,
      "loss": 0.0145,
      "step": 2954800
    },
    {
      "epoch": 4.835627737082932,
      "grad_norm": 0.45884770154953003,
      "learning_rate": 3.3091069628302026e-07,
      "loss": 0.0116,
      "step": 2954820
    },
    {
      "epoch": 4.835660467521586,
      "grad_norm": 0.5187869071960449,
      "learning_rate": 3.3084480406950316e-07,
      "loss": 0.0093,
      "step": 2954840
    },
    {
      "epoch": 4.835693197960239,
      "grad_norm": 0.2228711098432541,
      "learning_rate": 3.30778911855986e-07,
      "loss": 0.0106,
      "step": 2954860
    },
    {
      "epoch": 4.835725928398892,
      "grad_norm": 0.15967436134815216,
      "learning_rate": 3.307130196424689e-07,
      "loss": 0.0104,
      "step": 2954880
    },
    {
      "epoch": 4.835758658837546,
      "grad_norm": 0.09415313601493835,
      "learning_rate": 3.3064712742895175e-07,
      "loss": 0.0081,
      "step": 2954900
    },
    {
      "epoch": 4.835791389276199,
      "grad_norm": 0.1688614785671234,
      "learning_rate": 3.305812352154346e-07,
      "loss": 0.008,
      "step": 2954920
    },
    {
      "epoch": 4.835824119714852,
      "grad_norm": 0.16407136619091034,
      "learning_rate": 3.305153430019175e-07,
      "loss": 0.0115,
      "step": 2954940
    },
    {
      "epoch": 4.8358568501535055,
      "grad_norm": 0.4986652433872223,
      "learning_rate": 3.3044945078840034e-07,
      "loss": 0.0087,
      "step": 2954960
    },
    {
      "epoch": 4.835889580592159,
      "grad_norm": 0.07890333235263824,
      "learning_rate": 3.3038355857488323e-07,
      "loss": 0.0085,
      "step": 2954980
    },
    {
      "epoch": 4.835922311030813,
      "grad_norm": 0.18311281502246857,
      "learning_rate": 3.303176663613661e-07,
      "loss": 0.0072,
      "step": 2955000
    },
    {
      "epoch": 4.835955041469465,
      "grad_norm": 0.21320755779743195,
      "learning_rate": 3.30251774147849e-07,
      "loss": 0.0076,
      "step": 2955020
    },
    {
      "epoch": 4.835987771908119,
      "grad_norm": 0.10241682082414627,
      "learning_rate": 3.301858819343318e-07,
      "loss": 0.0064,
      "step": 2955040
    },
    {
      "epoch": 4.836020502346773,
      "grad_norm": 0.40571829676628113,
      "learning_rate": 3.301199897208147e-07,
      "loss": 0.0066,
      "step": 2955060
    },
    {
      "epoch": 4.836053232785426,
      "grad_norm": 0.24079103767871857,
      "learning_rate": 3.3005409750729757e-07,
      "loss": 0.0089,
      "step": 2955080
    },
    {
      "epoch": 4.836085963224079,
      "grad_norm": 0.03622501716017723,
      "learning_rate": 3.2998820529378047e-07,
      "loss": 0.0101,
      "step": 2955100
    },
    {
      "epoch": 4.8361186936627325,
      "grad_norm": 0.39853066205978394,
      "learning_rate": 3.299223130802633e-07,
      "loss": 0.0118,
      "step": 2955120
    },
    {
      "epoch": 4.836151424101386,
      "grad_norm": 0.19329553842544556,
      "learning_rate": 3.2985642086674616e-07,
      "loss": 0.0098,
      "step": 2955140
    },
    {
      "epoch": 4.836184154540039,
      "grad_norm": 0.18486297130584717,
      "learning_rate": 3.2979052865322906e-07,
      "loss": 0.0091,
      "step": 2955160
    },
    {
      "epoch": 4.836216884978692,
      "grad_norm": 0.12516388297080994,
      "learning_rate": 3.297246364397119e-07,
      "loss": 0.0099,
      "step": 2955180
    },
    {
      "epoch": 4.836249615417346,
      "grad_norm": 0.09609971195459366,
      "learning_rate": 3.2965874422619485e-07,
      "loss": 0.0117,
      "step": 2955200
    },
    {
      "epoch": 4.836282345855999,
      "grad_norm": 0.22135892510414124,
      "learning_rate": 3.2959285201267765e-07,
      "loss": 0.0062,
      "step": 2955220
    },
    {
      "epoch": 4.836315076294652,
      "grad_norm": 0.24982252717018127,
      "learning_rate": 3.295269597991606e-07,
      "loss": 0.0081,
      "step": 2955240
    },
    {
      "epoch": 4.836347806733306,
      "grad_norm": 0.20643240213394165,
      "learning_rate": 3.2946106758564344e-07,
      "loss": 0.0085,
      "step": 2955260
    },
    {
      "epoch": 4.8363805371719595,
      "grad_norm": 0.3034592270851135,
      "learning_rate": 3.2939517537212634e-07,
      "loss": 0.0109,
      "step": 2955280
    },
    {
      "epoch": 4.836413267610612,
      "grad_norm": 0.39009928703308105,
      "learning_rate": 3.293292831586092e-07,
      "loss": 0.0064,
      "step": 2955300
    },
    {
      "epoch": 4.836445998049266,
      "grad_norm": 0.2923569083213806,
      "learning_rate": 3.292633909450921e-07,
      "loss": 0.0102,
      "step": 2955320
    },
    {
      "epoch": 4.836478728487919,
      "grad_norm": 0.4758468270301819,
      "learning_rate": 3.2919749873157493e-07,
      "loss": 0.0108,
      "step": 2955340
    },
    {
      "epoch": 4.836511458926573,
      "grad_norm": 0.23463018238544464,
      "learning_rate": 3.291316065180578e-07,
      "loss": 0.0086,
      "step": 2955360
    },
    {
      "epoch": 4.836544189365226,
      "grad_norm": 0.9589948654174805,
      "learning_rate": 3.290657143045407e-07,
      "loss": 0.0094,
      "step": 2955380
    },
    {
      "epoch": 4.836576919803879,
      "grad_norm": 0.37398654222488403,
      "learning_rate": 3.289998220910235e-07,
      "loss": 0.0082,
      "step": 2955400
    },
    {
      "epoch": 4.836609650242533,
      "grad_norm": 0.697841227054596,
      "learning_rate": 3.289339298775064e-07,
      "loss": 0.0083,
      "step": 2955420
    },
    {
      "epoch": 4.836642380681186,
      "grad_norm": 0.1536671668291092,
      "learning_rate": 3.2886803766398927e-07,
      "loss": 0.0084,
      "step": 2955440
    },
    {
      "epoch": 4.836675111119839,
      "grad_norm": 0.11501704901456833,
      "learning_rate": 3.2880214545047216e-07,
      "loss": 0.0072,
      "step": 2955460
    },
    {
      "epoch": 4.836707841558493,
      "grad_norm": 0.2344219982624054,
      "learning_rate": 3.28736253236955e-07,
      "loss": 0.0058,
      "step": 2955480
    },
    {
      "epoch": 4.8367405719971455,
      "grad_norm": 0.27573227882385254,
      "learning_rate": 3.286703610234379e-07,
      "loss": 0.0098,
      "step": 2955500
    },
    {
      "epoch": 4.836773302435799,
      "grad_norm": 0.11690475046634674,
      "learning_rate": 3.2860446880992075e-07,
      "loss": 0.008,
      "step": 2955520
    },
    {
      "epoch": 4.836806032874453,
      "grad_norm": 0.2916664779186249,
      "learning_rate": 3.2853857659640365e-07,
      "loss": 0.0061,
      "step": 2955540
    },
    {
      "epoch": 4.836838763313106,
      "grad_norm": 0.3125062584877014,
      "learning_rate": 3.284726843828865e-07,
      "loss": 0.0083,
      "step": 2955560
    },
    {
      "epoch": 4.836871493751759,
      "grad_norm": 0.2872719168663025,
      "learning_rate": 3.2840679216936934e-07,
      "loss": 0.0086,
      "step": 2955580
    },
    {
      "epoch": 4.8369042241904125,
      "grad_norm": 0.16123951971530914,
      "learning_rate": 3.2834089995585224e-07,
      "loss": 0.0133,
      "step": 2955600
    },
    {
      "epoch": 4.836936954629066,
      "grad_norm": 0.3347000181674957,
      "learning_rate": 3.282750077423351e-07,
      "loss": 0.0046,
      "step": 2955620
    },
    {
      "epoch": 4.83696968506772,
      "grad_norm": 0.13007155060768127,
      "learning_rate": 3.28209115528818e-07,
      "loss": 0.0059,
      "step": 2955640
    },
    {
      "epoch": 4.837002415506372,
      "grad_norm": 0.4191306233406067,
      "learning_rate": 3.2814322331530083e-07,
      "loss": 0.0078,
      "step": 2955660
    },
    {
      "epoch": 4.837035145945026,
      "grad_norm": 0.30419841408729553,
      "learning_rate": 3.2807733110178373e-07,
      "loss": 0.0174,
      "step": 2955680
    },
    {
      "epoch": 4.83706787638368,
      "grad_norm": 0.5978856682777405,
      "learning_rate": 3.280114388882666e-07,
      "loss": 0.0102,
      "step": 2955700
    },
    {
      "epoch": 4.837100606822332,
      "grad_norm": 0.4431253671646118,
      "learning_rate": 3.279455466747495e-07,
      "loss": 0.0083,
      "step": 2955720
    },
    {
      "epoch": 4.837133337260986,
      "grad_norm": 0.5424325466156006,
      "learning_rate": 3.278796544612323e-07,
      "loss": 0.0116,
      "step": 2955740
    },
    {
      "epoch": 4.8371660676996395,
      "grad_norm": 0.11420880258083344,
      "learning_rate": 3.278137622477152e-07,
      "loss": 0.0165,
      "step": 2955760
    },
    {
      "epoch": 4.837198798138292,
      "grad_norm": 0.2261643409729004,
      "learning_rate": 3.2774787003419806e-07,
      "loss": 0.0072,
      "step": 2955780
    },
    {
      "epoch": 4.837231528576946,
      "grad_norm": 0.39706024527549744,
      "learning_rate": 3.276819778206809e-07,
      "loss": 0.0067,
      "step": 2955800
    },
    {
      "epoch": 4.837264259015599,
      "grad_norm": 0.10948880016803741,
      "learning_rate": 3.276160856071638e-07,
      "loss": 0.0063,
      "step": 2955820
    },
    {
      "epoch": 4.837296989454253,
      "grad_norm": 0.2063513994216919,
      "learning_rate": 3.2755019339364665e-07,
      "loss": 0.006,
      "step": 2955840
    },
    {
      "epoch": 4.837329719892906,
      "grad_norm": 0.2724912166595459,
      "learning_rate": 3.274843011801296e-07,
      "loss": 0.0087,
      "step": 2955860
    },
    {
      "epoch": 4.837362450331559,
      "grad_norm": 0.1637793481349945,
      "learning_rate": 3.274184089666124e-07,
      "loss": 0.0104,
      "step": 2955880
    },
    {
      "epoch": 4.837395180770213,
      "grad_norm": 1.0190430879592896,
      "learning_rate": 3.2735251675309535e-07,
      "loss": 0.0108,
      "step": 2955900
    },
    {
      "epoch": 4.8374279112088665,
      "grad_norm": 0.215607151389122,
      "learning_rate": 3.272866245395782e-07,
      "loss": 0.0056,
      "step": 2955920
    },
    {
      "epoch": 4.837460641647519,
      "grad_norm": 0.26491764187812805,
      "learning_rate": 3.272207323260611e-07,
      "loss": 0.0089,
      "step": 2955940
    },
    {
      "epoch": 4.837493372086173,
      "grad_norm": 0.13799771666526794,
      "learning_rate": 3.2715484011254394e-07,
      "loss": 0.0065,
      "step": 2955960
    },
    {
      "epoch": 4.837526102524826,
      "grad_norm": 0.08229438215494156,
      "learning_rate": 3.2708894789902684e-07,
      "loss": 0.0069,
      "step": 2955980
    },
    {
      "epoch": 4.837558832963479,
      "grad_norm": 0.0686449185013771,
      "learning_rate": 3.270230556855097e-07,
      "loss": 0.0079,
      "step": 2956000
    },
    {
      "epoch": 4.837591563402133,
      "grad_norm": 0.1306886374950409,
      "learning_rate": 3.2695716347199253e-07,
      "loss": 0.0042,
      "step": 2956020
    },
    {
      "epoch": 4.837624293840786,
      "grad_norm": 0.1981515735387802,
      "learning_rate": 3.2689127125847543e-07,
      "loss": 0.0123,
      "step": 2956040
    },
    {
      "epoch": 4.837657024279439,
      "grad_norm": 0.35597261786460876,
      "learning_rate": 3.268253790449583e-07,
      "loss": 0.0116,
      "step": 2956060
    },
    {
      "epoch": 4.837689754718093,
      "grad_norm": 0.2555411756038666,
      "learning_rate": 3.2675948683144117e-07,
      "loss": 0.0082,
      "step": 2956080
    },
    {
      "epoch": 4.837722485156746,
      "grad_norm": 0.3194204866886139,
      "learning_rate": 3.26693594617924e-07,
      "loss": 0.0084,
      "step": 2956100
    },
    {
      "epoch": 4.8377552155954,
      "grad_norm": 0.08700136840343475,
      "learning_rate": 3.266277024044069e-07,
      "loss": 0.0088,
      "step": 2956120
    },
    {
      "epoch": 4.8377879460340525,
      "grad_norm": 0.07917027920484543,
      "learning_rate": 3.2656181019088976e-07,
      "loss": 0.0064,
      "step": 2956140
    },
    {
      "epoch": 4.837820676472706,
      "grad_norm": 0.10799281299114227,
      "learning_rate": 3.2649591797737266e-07,
      "loss": 0.0074,
      "step": 2956160
    },
    {
      "epoch": 4.83785340691136,
      "grad_norm": 0.8545514941215515,
      "learning_rate": 3.264300257638555e-07,
      "loss": 0.0157,
      "step": 2956180
    },
    {
      "epoch": 4.837886137350012,
      "grad_norm": 0.373990535736084,
      "learning_rate": 3.263641335503384e-07,
      "loss": 0.008,
      "step": 2956200
    },
    {
      "epoch": 4.837918867788666,
      "grad_norm": 0.09264751523733139,
      "learning_rate": 3.2629824133682125e-07,
      "loss": 0.0079,
      "step": 2956220
    },
    {
      "epoch": 4.83795159822732,
      "grad_norm": 0.2772723138332367,
      "learning_rate": 3.262323491233041e-07,
      "loss": 0.0089,
      "step": 2956240
    },
    {
      "epoch": 4.837984328665973,
      "grad_norm": 0.35349422693252563,
      "learning_rate": 3.26166456909787e-07,
      "loss": 0.0133,
      "step": 2956260
    },
    {
      "epoch": 4.838017059104626,
      "grad_norm": 0.42872750759124756,
      "learning_rate": 3.2610056469626984e-07,
      "loss": 0.01,
      "step": 2956280
    },
    {
      "epoch": 4.8380497895432795,
      "grad_norm": 0.3102889358997345,
      "learning_rate": 3.2603467248275274e-07,
      "loss": 0.0083,
      "step": 2956300
    },
    {
      "epoch": 4.838082519981933,
      "grad_norm": 0.5459870100021362,
      "learning_rate": 3.259687802692356e-07,
      "loss": 0.01,
      "step": 2956320
    },
    {
      "epoch": 4.838115250420586,
      "grad_norm": 0.10370032489299774,
      "learning_rate": 3.259028880557185e-07,
      "loss": 0.0073,
      "step": 2956340
    },
    {
      "epoch": 4.838147980859239,
      "grad_norm": 0.7769711017608643,
      "learning_rate": 3.2583699584220133e-07,
      "loss": 0.0128,
      "step": 2956360
    },
    {
      "epoch": 4.838180711297893,
      "grad_norm": 0.49611857533454895,
      "learning_rate": 3.2577110362868423e-07,
      "loss": 0.0104,
      "step": 2956380
    },
    {
      "epoch": 4.838213441736546,
      "grad_norm": 0.2589914798736572,
      "learning_rate": 3.2570521141516707e-07,
      "loss": 0.0076,
      "step": 2956400
    },
    {
      "epoch": 4.838246172175199,
      "grad_norm": 0.7228131890296936,
      "learning_rate": 3.2563931920164997e-07,
      "loss": 0.0084,
      "step": 2956420
    },
    {
      "epoch": 4.838278902613853,
      "grad_norm": 0.04365387558937073,
      "learning_rate": 3.255734269881328e-07,
      "loss": 0.0074,
      "step": 2956440
    },
    {
      "epoch": 4.8383116330525064,
      "grad_norm": 0.2164659947156906,
      "learning_rate": 3.2550753477461566e-07,
      "loss": 0.0093,
      "step": 2956460
    },
    {
      "epoch": 4.838344363491159,
      "grad_norm": 0.21626834571361542,
      "learning_rate": 3.2544164256109856e-07,
      "loss": 0.0081,
      "step": 2956480
    },
    {
      "epoch": 4.838377093929813,
      "grad_norm": 0.14355388283729553,
      "learning_rate": 3.253757503475814e-07,
      "loss": 0.0094,
      "step": 2956500
    },
    {
      "epoch": 4.838409824368466,
      "grad_norm": 0.2881017327308655,
      "learning_rate": 3.2530985813406436e-07,
      "loss": 0.007,
      "step": 2956520
    },
    {
      "epoch": 4.83844255480712,
      "grad_norm": 0.2175857424736023,
      "learning_rate": 3.2524396592054715e-07,
      "loss": 0.0106,
      "step": 2956540
    },
    {
      "epoch": 4.838475285245773,
      "grad_norm": 0.04441063851118088,
      "learning_rate": 3.251780737070301e-07,
      "loss": 0.0075,
      "step": 2956560
    },
    {
      "epoch": 4.838508015684426,
      "grad_norm": 0.13796336948871613,
      "learning_rate": 3.2511218149351295e-07,
      "loss": 0.0058,
      "step": 2956580
    },
    {
      "epoch": 4.83854074612308,
      "grad_norm": 0.3587503433227539,
      "learning_rate": 3.2504628927999585e-07,
      "loss": 0.0122,
      "step": 2956600
    },
    {
      "epoch": 4.8385734765617325,
      "grad_norm": 0.16425246000289917,
      "learning_rate": 3.249803970664787e-07,
      "loss": 0.0135,
      "step": 2956620
    },
    {
      "epoch": 4.838606207000386,
      "grad_norm": 0.2141776829957962,
      "learning_rate": 3.249145048529616e-07,
      "loss": 0.0052,
      "step": 2956640
    },
    {
      "epoch": 4.83863893743904,
      "grad_norm": 0.5271403789520264,
      "learning_rate": 3.2484861263944444e-07,
      "loss": 0.0084,
      "step": 2956660
    },
    {
      "epoch": 4.838671667877692,
      "grad_norm": 0.1691323220729828,
      "learning_rate": 3.247827204259273e-07,
      "loss": 0.0062,
      "step": 2956680
    },
    {
      "epoch": 4.838704398316346,
      "grad_norm": 0.2136615365743637,
      "learning_rate": 3.247168282124102e-07,
      "loss": 0.0154,
      "step": 2956700
    },
    {
      "epoch": 4.838737128755,
      "grad_norm": 0.1424962282180786,
      "learning_rate": 3.2465093599889303e-07,
      "loss": 0.0115,
      "step": 2956720
    },
    {
      "epoch": 4.838769859193653,
      "grad_norm": 0.08386575430631638,
      "learning_rate": 3.245850437853759e-07,
      "loss": 0.0144,
      "step": 2956740
    },
    {
      "epoch": 4.838802589632306,
      "grad_norm": 0.10762334614992142,
      "learning_rate": 3.2451915157185877e-07,
      "loss": 0.0069,
      "step": 2956760
    },
    {
      "epoch": 4.8388353200709595,
      "grad_norm": 0.16929520666599274,
      "learning_rate": 3.2445325935834167e-07,
      "loss": 0.0108,
      "step": 2956780
    },
    {
      "epoch": 4.838868050509613,
      "grad_norm": 0.3543485105037689,
      "learning_rate": 3.243873671448245e-07,
      "loss": 0.0078,
      "step": 2956800
    },
    {
      "epoch": 4.838900780948267,
      "grad_norm": 0.2821173071861267,
      "learning_rate": 3.243214749313074e-07,
      "loss": 0.0091,
      "step": 2956820
    },
    {
      "epoch": 4.838933511386919,
      "grad_norm": 0.86981201171875,
      "learning_rate": 3.2425558271779026e-07,
      "loss": 0.0129,
      "step": 2956840
    },
    {
      "epoch": 4.838966241825573,
      "grad_norm": 0.04705292358994484,
      "learning_rate": 3.2418969050427316e-07,
      "loss": 0.0076,
      "step": 2956860
    },
    {
      "epoch": 4.838998972264227,
      "grad_norm": 0.24773217737674713,
      "learning_rate": 3.24123798290756e-07,
      "loss": 0.0106,
      "step": 2956880
    },
    {
      "epoch": 4.839031702702879,
      "grad_norm": 0.1921522617340088,
      "learning_rate": 3.2405790607723885e-07,
      "loss": 0.0081,
      "step": 2956900
    },
    {
      "epoch": 4.839064433141533,
      "grad_norm": 0.23274756968021393,
      "learning_rate": 3.2399201386372175e-07,
      "loss": 0.0083,
      "step": 2956920
    },
    {
      "epoch": 4.8390971635801865,
      "grad_norm": 0.42284679412841797,
      "learning_rate": 3.239261216502046e-07,
      "loss": 0.007,
      "step": 2956940
    },
    {
      "epoch": 4.839129894018839,
      "grad_norm": 0.18091996014118195,
      "learning_rate": 3.238602294366875e-07,
      "loss": 0.0065,
      "step": 2956960
    },
    {
      "epoch": 4.839162624457493,
      "grad_norm": 0.3995554447174072,
      "learning_rate": 3.2379433722317034e-07,
      "loss": 0.0126,
      "step": 2956980
    },
    {
      "epoch": 4.839195354896146,
      "grad_norm": 0.06891784071922302,
      "learning_rate": 3.2372844500965324e-07,
      "loss": 0.0075,
      "step": 2957000
    },
    {
      "epoch": 4.8392280853348,
      "grad_norm": 0.12742607295513153,
      "learning_rate": 3.236625527961361e-07,
      "loss": 0.0054,
      "step": 2957020
    },
    {
      "epoch": 4.839260815773453,
      "grad_norm": 0.32340168952941895,
      "learning_rate": 3.23596660582619e-07,
      "loss": 0.0116,
      "step": 2957040
    },
    {
      "epoch": 4.839293546212106,
      "grad_norm": 0.16888444125652313,
      "learning_rate": 3.235307683691018e-07,
      "loss": 0.0065,
      "step": 2957060
    },
    {
      "epoch": 4.83932627665076,
      "grad_norm": 0.07529760897159576,
      "learning_rate": 3.234648761555847e-07,
      "loss": 0.0064,
      "step": 2957080
    },
    {
      "epoch": 4.8393590070894135,
      "grad_norm": 0.36949434876441956,
      "learning_rate": 3.2339898394206757e-07,
      "loss": 0.0098,
      "step": 2957100
    },
    {
      "epoch": 4.839391737528066,
      "grad_norm": 0.18453651666641235,
      "learning_rate": 3.233330917285504e-07,
      "loss": 0.0105,
      "step": 2957120
    },
    {
      "epoch": 4.83942446796672,
      "grad_norm": 0.2725875973701477,
      "learning_rate": 3.232671995150333e-07,
      "loss": 0.0068,
      "step": 2957140
    },
    {
      "epoch": 4.839457198405373,
      "grad_norm": 0.3464377820491791,
      "learning_rate": 3.2320130730151616e-07,
      "loss": 0.0092,
      "step": 2957160
    },
    {
      "epoch": 4.839489928844026,
      "grad_norm": 0.3132546842098236,
      "learning_rate": 3.231354150879991e-07,
      "loss": 0.0143,
      "step": 2957180
    },
    {
      "epoch": 4.83952265928268,
      "grad_norm": 0.22367112338542938,
      "learning_rate": 3.230695228744819e-07,
      "loss": 0.0059,
      "step": 2957200
    },
    {
      "epoch": 4.839555389721333,
      "grad_norm": 0.3922022879123688,
      "learning_rate": 3.2300363066096486e-07,
      "loss": 0.0099,
      "step": 2957220
    },
    {
      "epoch": 4.839588120159986,
      "grad_norm": 0.0682772547006607,
      "learning_rate": 3.229377384474477e-07,
      "loss": 0.0113,
      "step": 2957240
    },
    {
      "epoch": 4.83962085059864,
      "grad_norm": 0.42563262581825256,
      "learning_rate": 3.228718462339306e-07,
      "loss": 0.0059,
      "step": 2957260
    },
    {
      "epoch": 4.839653581037293,
      "grad_norm": 0.1937764585018158,
      "learning_rate": 3.2280595402041345e-07,
      "loss": 0.0073,
      "step": 2957280
    },
    {
      "epoch": 4.839686311475947,
      "grad_norm": 0.030215011909604073,
      "learning_rate": 3.2274006180689634e-07,
      "loss": 0.0133,
      "step": 2957300
    },
    {
      "epoch": 4.8397190419145995,
      "grad_norm": 0.16723570227622986,
      "learning_rate": 3.226741695933792e-07,
      "loss": 0.007,
      "step": 2957320
    },
    {
      "epoch": 4.839751772353253,
      "grad_norm": 0.2621612846851349,
      "learning_rate": 3.2260827737986203e-07,
      "loss": 0.0063,
      "step": 2957340
    },
    {
      "epoch": 4.839784502791907,
      "grad_norm": 0.45685410499572754,
      "learning_rate": 3.2254238516634493e-07,
      "loss": 0.0123,
      "step": 2957360
    },
    {
      "epoch": 4.83981723323056,
      "grad_norm": 0.2221459001302719,
      "learning_rate": 3.224764929528278e-07,
      "loss": 0.0065,
      "step": 2957380
    },
    {
      "epoch": 4.839849963669213,
      "grad_norm": 0.22128964960575104,
      "learning_rate": 3.224106007393107e-07,
      "loss": 0.009,
      "step": 2957400
    },
    {
      "epoch": 4.8398826941078665,
      "grad_norm": 0.18483486771583557,
      "learning_rate": 3.223447085257935e-07,
      "loss": 0.0091,
      "step": 2957420
    },
    {
      "epoch": 4.83991542454652,
      "grad_norm": 0.2198699414730072,
      "learning_rate": 3.222788163122764e-07,
      "loss": 0.007,
      "step": 2957440
    },
    {
      "epoch": 4.839948154985173,
      "grad_norm": 0.18320150673389435,
      "learning_rate": 3.2221292409875927e-07,
      "loss": 0.0079,
      "step": 2957460
    },
    {
      "epoch": 4.839980885423826,
      "grad_norm": 0.26018446683883667,
      "learning_rate": 3.2214703188524217e-07,
      "loss": 0.0066,
      "step": 2957480
    },
    {
      "epoch": 4.84001361586248,
      "grad_norm": 0.27104857563972473,
      "learning_rate": 3.22081139671725e-07,
      "loss": 0.0081,
      "step": 2957500
    },
    {
      "epoch": 4.840046346301133,
      "grad_norm": 0.11137069016695023,
      "learning_rate": 3.220152474582079e-07,
      "loss": 0.0102,
      "step": 2957520
    },
    {
      "epoch": 4.840079076739786,
      "grad_norm": 0.09280826896429062,
      "learning_rate": 3.2194935524469076e-07,
      "loss": 0.009,
      "step": 2957540
    },
    {
      "epoch": 4.84011180717844,
      "grad_norm": 0.23639334738254547,
      "learning_rate": 3.218834630311736e-07,
      "loss": 0.0111,
      "step": 2957560
    },
    {
      "epoch": 4.8401445376170935,
      "grad_norm": 0.8621789216995239,
      "learning_rate": 3.218175708176565e-07,
      "loss": 0.0101,
      "step": 2957580
    },
    {
      "epoch": 4.840177268055746,
      "grad_norm": 0.25325438380241394,
      "learning_rate": 3.2175167860413935e-07,
      "loss": 0.0089,
      "step": 2957600
    },
    {
      "epoch": 4.8402099984944,
      "grad_norm": 0.41689395904541016,
      "learning_rate": 3.2168578639062224e-07,
      "loss": 0.0082,
      "step": 2957620
    },
    {
      "epoch": 4.840242728933053,
      "grad_norm": 0.16271762549877167,
      "learning_rate": 3.216198941771051e-07,
      "loss": 0.0094,
      "step": 2957640
    },
    {
      "epoch": 4.840275459371706,
      "grad_norm": 0.1098024845123291,
      "learning_rate": 3.21554001963588e-07,
      "loss": 0.0092,
      "step": 2957660
    },
    {
      "epoch": 4.84030818981036,
      "grad_norm": 0.1390860378742218,
      "learning_rate": 3.2148810975007083e-07,
      "loss": 0.0049,
      "step": 2957680
    },
    {
      "epoch": 4.840340920249013,
      "grad_norm": 0.38488495349884033,
      "learning_rate": 3.2142221753655373e-07,
      "loss": 0.0074,
      "step": 2957700
    },
    {
      "epoch": 4.840373650687667,
      "grad_norm": 0.1431606560945511,
      "learning_rate": 3.213563253230366e-07,
      "loss": 0.0076,
      "step": 2957720
    },
    {
      "epoch": 4.84040638112632,
      "grad_norm": 0.17946404218673706,
      "learning_rate": 3.2129043310951953e-07,
      "loss": 0.0066,
      "step": 2957740
    },
    {
      "epoch": 4.840439111564973,
      "grad_norm": 0.251090943813324,
      "learning_rate": 3.212245408960023e-07,
      "loss": 0.0084,
      "step": 2957760
    },
    {
      "epoch": 4.840471842003627,
      "grad_norm": 0.6764624118804932,
      "learning_rate": 3.2115864868248517e-07,
      "loss": 0.0117,
      "step": 2957780
    },
    {
      "epoch": 4.8405045724422795,
      "grad_norm": 0.15254034101963043,
      "learning_rate": 3.2109275646896807e-07,
      "loss": 0.0147,
      "step": 2957800
    },
    {
      "epoch": 4.840537302880933,
      "grad_norm": 0.20990467071533203,
      "learning_rate": 3.210268642554509e-07,
      "loss": 0.0076,
      "step": 2957820
    },
    {
      "epoch": 4.840570033319587,
      "grad_norm": 0.33510851860046387,
      "learning_rate": 3.2096097204193386e-07,
      "loss": 0.0073,
      "step": 2957840
    },
    {
      "epoch": 4.840602763758239,
      "grad_norm": 0.12493935227394104,
      "learning_rate": 3.2089507982841666e-07,
      "loss": 0.0075,
      "step": 2957860
    },
    {
      "epoch": 4.840635494196893,
      "grad_norm": 0.2351738065481186,
      "learning_rate": 3.208291876148996e-07,
      "loss": 0.0077,
      "step": 2957880
    },
    {
      "epoch": 4.840668224635547,
      "grad_norm": 0.2980858087539673,
      "learning_rate": 3.2076329540138245e-07,
      "loss": 0.0094,
      "step": 2957900
    },
    {
      "epoch": 4.8407009550742,
      "grad_norm": 0.223854199051857,
      "learning_rate": 3.2069740318786535e-07,
      "loss": 0.0062,
      "step": 2957920
    },
    {
      "epoch": 4.840733685512853,
      "grad_norm": 0.2738150954246521,
      "learning_rate": 3.206315109743482e-07,
      "loss": 0.0097,
      "step": 2957940
    },
    {
      "epoch": 4.8407664159515065,
      "grad_norm": 0.28029051423072815,
      "learning_rate": 3.205656187608311e-07,
      "loss": 0.0061,
      "step": 2957960
    },
    {
      "epoch": 4.84079914639016,
      "grad_norm": 0.35373249650001526,
      "learning_rate": 3.2049972654731394e-07,
      "loss": 0.0115,
      "step": 2957980
    },
    {
      "epoch": 4.840831876828814,
      "grad_norm": 0.31330570578575134,
      "learning_rate": 3.204338343337968e-07,
      "loss": 0.0093,
      "step": 2958000
    },
    {
      "epoch": 4.840864607267466,
      "grad_norm": 0.25846943259239197,
      "learning_rate": 3.203679421202797e-07,
      "loss": 0.0078,
      "step": 2958020
    },
    {
      "epoch": 4.84089733770612,
      "grad_norm": 0.23113420605659485,
      "learning_rate": 3.2030204990676253e-07,
      "loss": 0.0158,
      "step": 2958040
    },
    {
      "epoch": 4.840930068144774,
      "grad_norm": 0.21099603176116943,
      "learning_rate": 3.2023615769324543e-07,
      "loss": 0.0103,
      "step": 2958060
    },
    {
      "epoch": 4.840962798583426,
      "grad_norm": 0.0594782792031765,
      "learning_rate": 3.201702654797283e-07,
      "loss": 0.0116,
      "step": 2958080
    },
    {
      "epoch": 4.84099552902208,
      "grad_norm": 0.07906071841716766,
      "learning_rate": 3.201043732662112e-07,
      "loss": 0.0156,
      "step": 2958100
    },
    {
      "epoch": 4.8410282594607335,
      "grad_norm": 0.09031858295202255,
      "learning_rate": 3.20038481052694e-07,
      "loss": 0.0082,
      "step": 2958120
    },
    {
      "epoch": 4.841060989899386,
      "grad_norm": 0.16438095271587372,
      "learning_rate": 3.199725888391769e-07,
      "loss": 0.0088,
      "step": 2958140
    },
    {
      "epoch": 4.84109372033804,
      "grad_norm": 0.18261325359344482,
      "learning_rate": 3.1990669662565976e-07,
      "loss": 0.0073,
      "step": 2958160
    },
    {
      "epoch": 4.841126450776693,
      "grad_norm": 0.1834186166524887,
      "learning_rate": 3.1984080441214266e-07,
      "loss": 0.0062,
      "step": 2958180
    },
    {
      "epoch": 4.841159181215347,
      "grad_norm": 0.052335601300001144,
      "learning_rate": 3.197749121986255e-07,
      "loss": 0.005,
      "step": 2958200
    },
    {
      "epoch": 4.841191911654,
      "grad_norm": 0.37086448073387146,
      "learning_rate": 3.1970901998510835e-07,
      "loss": 0.0092,
      "step": 2958220
    },
    {
      "epoch": 4.841224642092653,
      "grad_norm": 0.19439738988876343,
      "learning_rate": 3.1964312777159125e-07,
      "loss": 0.0085,
      "step": 2958240
    },
    {
      "epoch": 4.841257372531307,
      "grad_norm": 0.2079899162054062,
      "learning_rate": 3.195772355580741e-07,
      "loss": 0.0102,
      "step": 2958260
    },
    {
      "epoch": 4.84129010296996,
      "grad_norm": 0.17384299635887146,
      "learning_rate": 3.19511343344557e-07,
      "loss": 0.0115,
      "step": 2958280
    },
    {
      "epoch": 4.841322833408613,
      "grad_norm": 0.11287063360214233,
      "learning_rate": 3.1944545113103984e-07,
      "loss": 0.0109,
      "step": 2958300
    },
    {
      "epoch": 4.841355563847267,
      "grad_norm": 0.13709503412246704,
      "learning_rate": 3.1937955891752274e-07,
      "loss": 0.0068,
      "step": 2958320
    },
    {
      "epoch": 4.84138829428592,
      "grad_norm": 0.08240266889333725,
      "learning_rate": 3.193136667040056e-07,
      "loss": 0.0079,
      "step": 2958340
    },
    {
      "epoch": 4.841421024724573,
      "grad_norm": 0.0966607928276062,
      "learning_rate": 3.192477744904885e-07,
      "loss": 0.0097,
      "step": 2958360
    },
    {
      "epoch": 4.841453755163227,
      "grad_norm": 0.32237470149993896,
      "learning_rate": 3.1918188227697133e-07,
      "loss": 0.012,
      "step": 2958380
    },
    {
      "epoch": 4.84148648560188,
      "grad_norm": 0.14845247566699982,
      "learning_rate": 3.191159900634543e-07,
      "loss": 0.009,
      "step": 2958400
    },
    {
      "epoch": 4.841519216040533,
      "grad_norm": 0.27002888917922974,
      "learning_rate": 3.190500978499371e-07,
      "loss": 0.0099,
      "step": 2958420
    },
    {
      "epoch": 4.8415519464791865,
      "grad_norm": 0.08512789756059647,
      "learning_rate": 3.189842056364199e-07,
      "loss": 0.0075,
      "step": 2958440
    },
    {
      "epoch": 4.84158467691784,
      "grad_norm": 0.18444721400737762,
      "learning_rate": 3.189183134229028e-07,
      "loss": 0.0069,
      "step": 2958460
    },
    {
      "epoch": 4.841617407356494,
      "grad_norm": 0.27485391497612,
      "learning_rate": 3.1885242120938566e-07,
      "loss": 0.0151,
      "step": 2958480
    },
    {
      "epoch": 4.841650137795146,
      "grad_norm": 0.3419675827026367,
      "learning_rate": 3.187865289958686e-07,
      "loss": 0.0074,
      "step": 2958500
    },
    {
      "epoch": 4.8416828682338,
      "grad_norm": 0.2703480124473572,
      "learning_rate": 3.187206367823514e-07,
      "loss": 0.0178,
      "step": 2958520
    },
    {
      "epoch": 4.841715598672454,
      "grad_norm": 0.28464481234550476,
      "learning_rate": 3.1865474456883436e-07,
      "loss": 0.0103,
      "step": 2958540
    },
    {
      "epoch": 4.841748329111107,
      "grad_norm": 0.1417859047651291,
      "learning_rate": 3.185888523553172e-07,
      "loss": 0.0113,
      "step": 2958560
    },
    {
      "epoch": 4.84178105954976,
      "grad_norm": 0.20406733453273773,
      "learning_rate": 3.185229601418001e-07,
      "loss": 0.0101,
      "step": 2958580
    },
    {
      "epoch": 4.8418137899884135,
      "grad_norm": 0.10347097367048264,
      "learning_rate": 3.1845706792828295e-07,
      "loss": 0.0095,
      "step": 2958600
    },
    {
      "epoch": 4.841846520427067,
      "grad_norm": 0.17502304911613464,
      "learning_rate": 3.1839117571476585e-07,
      "loss": 0.0094,
      "step": 2958620
    },
    {
      "epoch": 4.84187925086572,
      "grad_norm": 0.10178134590387344,
      "learning_rate": 3.183252835012487e-07,
      "loss": 0.006,
      "step": 2958640
    },
    {
      "epoch": 4.841911981304373,
      "grad_norm": 0.16954514384269714,
      "learning_rate": 3.182593912877316e-07,
      "loss": 0.0127,
      "step": 2958660
    },
    {
      "epoch": 4.841944711743027,
      "grad_norm": 0.08362708240747452,
      "learning_rate": 3.1819349907421444e-07,
      "loss": 0.0075,
      "step": 2958680
    },
    {
      "epoch": 4.84197744218168,
      "grad_norm": 0.3825191855430603,
      "learning_rate": 3.181276068606973e-07,
      "loss": 0.0118,
      "step": 2958700
    },
    {
      "epoch": 4.842010172620333,
      "grad_norm": 0.2372675985097885,
      "learning_rate": 3.180617146471802e-07,
      "loss": 0.0084,
      "step": 2958720
    },
    {
      "epoch": 4.842042903058987,
      "grad_norm": 0.1452251374721527,
      "learning_rate": 3.1799582243366303e-07,
      "loss": 0.0068,
      "step": 2958740
    },
    {
      "epoch": 4.8420756334976405,
      "grad_norm": 0.08490094542503357,
      "learning_rate": 3.1792993022014593e-07,
      "loss": 0.007,
      "step": 2958760
    },
    {
      "epoch": 4.842108363936293,
      "grad_norm": 0.22383524477481842,
      "learning_rate": 3.1786403800662877e-07,
      "loss": 0.0069,
      "step": 2958780
    },
    {
      "epoch": 4.842141094374947,
      "grad_norm": 0.14670617878437042,
      "learning_rate": 3.1779814579311167e-07,
      "loss": 0.0069,
      "step": 2958800
    },
    {
      "epoch": 4.8421738248136,
      "grad_norm": 0.27560949325561523,
      "learning_rate": 3.177322535795945e-07,
      "loss": 0.0097,
      "step": 2958820
    },
    {
      "epoch": 4.842206555252254,
      "grad_norm": 0.20779846608638763,
      "learning_rate": 3.176663613660774e-07,
      "loss": 0.0085,
      "step": 2958840
    },
    {
      "epoch": 4.842239285690907,
      "grad_norm": 0.2776464819908142,
      "learning_rate": 3.1760046915256026e-07,
      "loss": 0.0059,
      "step": 2958860
    },
    {
      "epoch": 4.84227201612956,
      "grad_norm": 0.17173342406749725,
      "learning_rate": 3.1753457693904316e-07,
      "loss": 0.0097,
      "step": 2958880
    },
    {
      "epoch": 4.842304746568214,
      "grad_norm": 0.15732255578041077,
      "learning_rate": 3.17468684725526e-07,
      "loss": 0.0113,
      "step": 2958900
    },
    {
      "epoch": 4.842337477006867,
      "grad_norm": 0.12556618452072144,
      "learning_rate": 3.1740279251200885e-07,
      "loss": 0.0085,
      "step": 2958920
    },
    {
      "epoch": 4.84237020744552,
      "grad_norm": 0.3009001612663269,
      "learning_rate": 3.1733690029849175e-07,
      "loss": 0.01,
      "step": 2958940
    },
    {
      "epoch": 4.842402937884174,
      "grad_norm": 0.5157320499420166,
      "learning_rate": 3.172710080849746e-07,
      "loss": 0.0097,
      "step": 2958960
    },
    {
      "epoch": 4.8424356683228265,
      "grad_norm": 0.47156062722206116,
      "learning_rate": 3.172051158714575e-07,
      "loss": 0.0076,
      "step": 2958980
    },
    {
      "epoch": 4.84246839876148,
      "grad_norm": 0.05644388496875763,
      "learning_rate": 3.1713922365794034e-07,
      "loss": 0.0064,
      "step": 2959000
    },
    {
      "epoch": 4.842501129200134,
      "grad_norm": 0.08826380223035812,
      "learning_rate": 3.1707333144442324e-07,
      "loss": 0.0084,
      "step": 2959020
    },
    {
      "epoch": 4.842533859638787,
      "grad_norm": 0.27609625458717346,
      "learning_rate": 3.170074392309061e-07,
      "loss": 0.0065,
      "step": 2959040
    },
    {
      "epoch": 4.84256659007744,
      "grad_norm": 0.11324741691350937,
      "learning_rate": 3.1694154701738904e-07,
      "loss": 0.0106,
      "step": 2959060
    },
    {
      "epoch": 4.8425993205160935,
      "grad_norm": 0.39070966839790344,
      "learning_rate": 3.1687565480387183e-07,
      "loss": 0.0079,
      "step": 2959080
    },
    {
      "epoch": 4.842632050954747,
      "grad_norm": 0.26388391852378845,
      "learning_rate": 3.168097625903548e-07,
      "loss": 0.011,
      "step": 2959100
    },
    {
      "epoch": 4.842664781393401,
      "grad_norm": 0.13426589965820312,
      "learning_rate": 3.1674387037683757e-07,
      "loss": 0.0066,
      "step": 2959120
    },
    {
      "epoch": 4.842697511832053,
      "grad_norm": 0.31437718868255615,
      "learning_rate": 3.166779781633204e-07,
      "loss": 0.0074,
      "step": 2959140
    },
    {
      "epoch": 4.842730242270707,
      "grad_norm": 0.11685808002948761,
      "learning_rate": 3.1661208594980337e-07,
      "loss": 0.008,
      "step": 2959160
    },
    {
      "epoch": 4.842762972709361,
      "grad_norm": 0.15205615758895874,
      "learning_rate": 3.1654619373628616e-07,
      "loss": 0.0146,
      "step": 2959180
    },
    {
      "epoch": 4.842795703148013,
      "grad_norm": 0.24824407696723938,
      "learning_rate": 3.164803015227691e-07,
      "loss": 0.0103,
      "step": 2959200
    },
    {
      "epoch": 4.842828433586667,
      "grad_norm": 0.06753268092870712,
      "learning_rate": 3.1641440930925196e-07,
      "loss": 0.0082,
      "step": 2959220
    },
    {
      "epoch": 4.8428611640253205,
      "grad_norm": 0.2130994349718094,
      "learning_rate": 3.1634851709573486e-07,
      "loss": 0.0116,
      "step": 2959240
    },
    {
      "epoch": 4.842893894463973,
      "grad_norm": 0.8874017000198364,
      "learning_rate": 3.162826248822177e-07,
      "loss": 0.0077,
      "step": 2959260
    },
    {
      "epoch": 4.842926624902627,
      "grad_norm": 0.32205474376678467,
      "learning_rate": 3.162167326687006e-07,
      "loss": 0.0103,
      "step": 2959280
    },
    {
      "epoch": 4.84295935534128,
      "grad_norm": 0.05260760709643364,
      "learning_rate": 3.1615084045518345e-07,
      "loss": 0.0119,
      "step": 2959300
    },
    {
      "epoch": 4.842992085779934,
      "grad_norm": 0.13451625406742096,
      "learning_rate": 3.1608494824166635e-07,
      "loss": 0.0201,
      "step": 2959320
    },
    {
      "epoch": 4.843024816218587,
      "grad_norm": 0.14393870532512665,
      "learning_rate": 3.160190560281492e-07,
      "loss": 0.0119,
      "step": 2959340
    },
    {
      "epoch": 4.84305754665724,
      "grad_norm": 0.29333049058914185,
      "learning_rate": 3.1595316381463204e-07,
      "loss": 0.0038,
      "step": 2959360
    },
    {
      "epoch": 4.843090277095894,
      "grad_norm": 0.17226676642894745,
      "learning_rate": 3.1588727160111494e-07,
      "loss": 0.0094,
      "step": 2959380
    },
    {
      "epoch": 4.843123007534547,
      "grad_norm": 0.28882357478141785,
      "learning_rate": 3.158213793875978e-07,
      "loss": 0.0095,
      "step": 2959400
    },
    {
      "epoch": 4.8431557379732,
      "grad_norm": 0.1279512494802475,
      "learning_rate": 3.157554871740807e-07,
      "loss": 0.0054,
      "step": 2959420
    },
    {
      "epoch": 4.843188468411854,
      "grad_norm": 0.09970983117818832,
      "learning_rate": 3.156895949605635e-07,
      "loss": 0.0122,
      "step": 2959440
    },
    {
      "epoch": 4.843221198850507,
      "grad_norm": 0.286475270986557,
      "learning_rate": 3.156237027470464e-07,
      "loss": 0.0061,
      "step": 2959460
    },
    {
      "epoch": 4.84325392928916,
      "grad_norm": 0.2755148708820343,
      "learning_rate": 3.1555781053352927e-07,
      "loss": 0.0109,
      "step": 2959480
    },
    {
      "epoch": 4.843286659727814,
      "grad_norm": 0.3587472140789032,
      "learning_rate": 3.1549191832001217e-07,
      "loss": 0.0114,
      "step": 2959500
    },
    {
      "epoch": 4.843319390166467,
      "grad_norm": 0.31734129786491394,
      "learning_rate": 3.15426026106495e-07,
      "loss": 0.0138,
      "step": 2959520
    },
    {
      "epoch": 4.84335212060512,
      "grad_norm": 0.10179363191127777,
      "learning_rate": 3.153601338929779e-07,
      "loss": 0.0083,
      "step": 2959540
    },
    {
      "epoch": 4.843384851043774,
      "grad_norm": 0.214837446808815,
      "learning_rate": 3.1529424167946076e-07,
      "loss": 0.0114,
      "step": 2959560
    },
    {
      "epoch": 4.843417581482427,
      "grad_norm": 0.5642401576042175,
      "learning_rate": 3.152283494659436e-07,
      "loss": 0.0075,
      "step": 2959580
    },
    {
      "epoch": 4.84345031192108,
      "grad_norm": 0.36256590485572815,
      "learning_rate": 3.151624572524265e-07,
      "loss": 0.0071,
      "step": 2959600
    },
    {
      "epoch": 4.8434830423597335,
      "grad_norm": 0.4176355004310608,
      "learning_rate": 3.1509656503890935e-07,
      "loss": 0.0121,
      "step": 2959620
    },
    {
      "epoch": 4.843515772798387,
      "grad_norm": 0.1985679417848587,
      "learning_rate": 3.1503067282539225e-07,
      "loss": 0.0108,
      "step": 2959640
    },
    {
      "epoch": 4.843548503237041,
      "grad_norm": 0.4760288596153259,
      "learning_rate": 3.149647806118751e-07,
      "loss": 0.0078,
      "step": 2959660
    },
    {
      "epoch": 4.843581233675693,
      "grad_norm": 0.20140545070171356,
      "learning_rate": 3.14898888398358e-07,
      "loss": 0.009,
      "step": 2959680
    },
    {
      "epoch": 4.843613964114347,
      "grad_norm": 0.39931365847587585,
      "learning_rate": 3.1483299618484084e-07,
      "loss": 0.0099,
      "step": 2959700
    },
    {
      "epoch": 4.843646694553001,
      "grad_norm": 0.288263738155365,
      "learning_rate": 3.147671039713238e-07,
      "loss": 0.0071,
      "step": 2959720
    },
    {
      "epoch": 4.843679424991654,
      "grad_norm": 0.1678793579339981,
      "learning_rate": 3.147012117578066e-07,
      "loss": 0.008,
      "step": 2959740
    },
    {
      "epoch": 4.843712155430307,
      "grad_norm": 0.11144664138555527,
      "learning_rate": 3.1463531954428953e-07,
      "loss": 0.0103,
      "step": 2959760
    },
    {
      "epoch": 4.8437448858689605,
      "grad_norm": 0.45961305499076843,
      "learning_rate": 3.145694273307723e-07,
      "loss": 0.0071,
      "step": 2959780
    },
    {
      "epoch": 4.843777616307614,
      "grad_norm": 0.046408142894506454,
      "learning_rate": 3.1450353511725517e-07,
      "loss": 0.0059,
      "step": 2959800
    },
    {
      "epoch": 4.843810346746267,
      "grad_norm": 0.3402092158794403,
      "learning_rate": 3.144376429037381e-07,
      "loss": 0.0103,
      "step": 2959820
    },
    {
      "epoch": 4.84384307718492,
      "grad_norm": 0.22358746826648712,
      "learning_rate": 3.143717506902209e-07,
      "loss": 0.01,
      "step": 2959840
    },
    {
      "epoch": 4.843875807623574,
      "grad_norm": 0.2993541955947876,
      "learning_rate": 3.1430585847670387e-07,
      "loss": 0.0107,
      "step": 2959860
    },
    {
      "epoch": 4.843908538062227,
      "grad_norm": 0.2373906970024109,
      "learning_rate": 3.142399662631867e-07,
      "loss": 0.0108,
      "step": 2959880
    },
    {
      "epoch": 4.84394126850088,
      "grad_norm": 0.1862836629152298,
      "learning_rate": 3.141740740496696e-07,
      "loss": 0.0092,
      "step": 2959900
    },
    {
      "epoch": 4.843973998939534,
      "grad_norm": 0.16331563889980316,
      "learning_rate": 3.1410818183615246e-07,
      "loss": 0.0059,
      "step": 2959920
    },
    {
      "epoch": 4.844006729378187,
      "grad_norm": 0.2764451205730438,
      "learning_rate": 3.1404228962263535e-07,
      "loss": 0.0067,
      "step": 2959940
    },
    {
      "epoch": 4.84403945981684,
      "grad_norm": 0.2708236277103424,
      "learning_rate": 3.139763974091182e-07,
      "loss": 0.0067,
      "step": 2959960
    },
    {
      "epoch": 4.844072190255494,
      "grad_norm": 0.6279857158660889,
      "learning_rate": 3.139105051956011e-07,
      "loss": 0.0072,
      "step": 2959980
    },
    {
      "epoch": 4.844104920694147,
      "grad_norm": 0.09451604634523392,
      "learning_rate": 3.1384461298208394e-07,
      "loss": 0.0108,
      "step": 2960000
    },
    {
      "epoch": 4.844137651132801,
      "grad_norm": 0.21889103949069977,
      "learning_rate": 3.137787207685668e-07,
      "loss": 0.0106,
      "step": 2960020
    },
    {
      "epoch": 4.844170381571454,
      "grad_norm": 0.2420988231897354,
      "learning_rate": 3.137128285550497e-07,
      "loss": 0.0095,
      "step": 2960040
    },
    {
      "epoch": 4.844203112010107,
      "grad_norm": 0.20553581416606903,
      "learning_rate": 3.1364693634153253e-07,
      "loss": 0.0085,
      "step": 2960060
    },
    {
      "epoch": 4.844235842448761,
      "grad_norm": 0.20001181960105896,
      "learning_rate": 3.1358104412801543e-07,
      "loss": 0.0066,
      "step": 2960080
    },
    {
      "epoch": 4.8442685728874135,
      "grad_norm": 0.1927487850189209,
      "learning_rate": 3.135151519144983e-07,
      "loss": 0.0083,
      "step": 2960100
    },
    {
      "epoch": 4.844301303326067,
      "grad_norm": 0.3741307258605957,
      "learning_rate": 3.134492597009812e-07,
      "loss": 0.0076,
      "step": 2960120
    },
    {
      "epoch": 4.844334033764721,
      "grad_norm": 0.06624247878789902,
      "learning_rate": 3.13383367487464e-07,
      "loss": 0.0078,
      "step": 2960140
    },
    {
      "epoch": 4.844366764203373,
      "grad_norm": 0.20236992835998535,
      "learning_rate": 3.133174752739469e-07,
      "loss": 0.0075,
      "step": 2960160
    },
    {
      "epoch": 4.844399494642027,
      "grad_norm": 0.24490006268024445,
      "learning_rate": 3.1325158306042977e-07,
      "loss": 0.0112,
      "step": 2960180
    },
    {
      "epoch": 4.844432225080681,
      "grad_norm": 0.19187498092651367,
      "learning_rate": 3.1318569084691267e-07,
      "loss": 0.0057,
      "step": 2960200
    },
    {
      "epoch": 4.844464955519334,
      "grad_norm": 0.12373993545770645,
      "learning_rate": 3.131197986333955e-07,
      "loss": 0.0097,
      "step": 2960220
    },
    {
      "epoch": 4.844497685957987,
      "grad_norm": 0.13564492762088776,
      "learning_rate": 3.1305390641987836e-07,
      "loss": 0.0054,
      "step": 2960240
    },
    {
      "epoch": 4.8445304163966405,
      "grad_norm": 0.28897589445114136,
      "learning_rate": 3.1298801420636125e-07,
      "loss": 0.0056,
      "step": 2960260
    },
    {
      "epoch": 4.844563146835294,
      "grad_norm": 0.15719331800937653,
      "learning_rate": 3.129221219928441e-07,
      "loss": 0.008,
      "step": 2960280
    },
    {
      "epoch": 4.844595877273948,
      "grad_norm": 0.22326593101024628,
      "learning_rate": 3.12856229779327e-07,
      "loss": 0.0087,
      "step": 2960300
    },
    {
      "epoch": 4.8446286077126,
      "grad_norm": 0.1694994568824768,
      "learning_rate": 3.1279033756580984e-07,
      "loss": 0.0043,
      "step": 2960320
    },
    {
      "epoch": 4.844661338151254,
      "grad_norm": 0.3116649091243744,
      "learning_rate": 3.1272444535229274e-07,
      "loss": 0.008,
      "step": 2960340
    },
    {
      "epoch": 4.844694068589908,
      "grad_norm": 0.17635133862495422,
      "learning_rate": 3.126585531387756e-07,
      "loss": 0.0105,
      "step": 2960360
    },
    {
      "epoch": 4.84472679902856,
      "grad_norm": 0.09746293723583221,
      "learning_rate": 3.1259266092525854e-07,
      "loss": 0.0065,
      "step": 2960380
    },
    {
      "epoch": 4.844759529467214,
      "grad_norm": 0.12790462374687195,
      "learning_rate": 3.1252676871174133e-07,
      "loss": 0.0073,
      "step": 2960400
    },
    {
      "epoch": 4.8447922599058675,
      "grad_norm": 0.17521163821220398,
      "learning_rate": 3.1246087649822423e-07,
      "loss": 0.0107,
      "step": 2960420
    },
    {
      "epoch": 4.84482499034452,
      "grad_norm": 0.28589582443237305,
      "learning_rate": 3.123949842847071e-07,
      "loss": 0.0116,
      "step": 2960440
    },
    {
      "epoch": 4.844857720783174,
      "grad_norm": 0.10882816463708878,
      "learning_rate": 3.1232909207119e-07,
      "loss": 0.0093,
      "step": 2960460
    },
    {
      "epoch": 4.844890451221827,
      "grad_norm": 0.2381850630044937,
      "learning_rate": 3.122631998576729e-07,
      "loss": 0.0111,
      "step": 2960480
    },
    {
      "epoch": 4.844923181660481,
      "grad_norm": 0.16445307433605194,
      "learning_rate": 3.121973076441557e-07,
      "loss": 0.0085,
      "step": 2960500
    },
    {
      "epoch": 4.844955912099134,
      "grad_norm": 0.14468730986118317,
      "learning_rate": 3.121314154306386e-07,
      "loss": 0.0084,
      "step": 2960520
    },
    {
      "epoch": 4.844988642537787,
      "grad_norm": 0.38892659544944763,
      "learning_rate": 3.1206552321712146e-07,
      "loss": 0.0096,
      "step": 2960540
    },
    {
      "epoch": 4.845021372976441,
      "grad_norm": 0.12863051891326904,
      "learning_rate": 3.119996310036043e-07,
      "loss": 0.0058,
      "step": 2960560
    },
    {
      "epoch": 4.8450541034150945,
      "grad_norm": 0.9245417714118958,
      "learning_rate": 3.119337387900872e-07,
      "loss": 0.0094,
      "step": 2960580
    },
    {
      "epoch": 4.845086833853747,
      "grad_norm": 0.25197601318359375,
      "learning_rate": 3.1186784657657005e-07,
      "loss": 0.0171,
      "step": 2960600
    },
    {
      "epoch": 4.845119564292401,
      "grad_norm": 0.15713809430599213,
      "learning_rate": 3.1180195436305295e-07,
      "loss": 0.0102,
      "step": 2960620
    },
    {
      "epoch": 4.845152294731054,
      "grad_norm": 0.302738755941391,
      "learning_rate": 3.117360621495358e-07,
      "loss": 0.011,
      "step": 2960640
    },
    {
      "epoch": 4.845185025169707,
      "grad_norm": 0.2237437218427658,
      "learning_rate": 3.116701699360187e-07,
      "loss": 0.012,
      "step": 2960660
    },
    {
      "epoch": 4.845217755608361,
      "grad_norm": 0.19073082506656647,
      "learning_rate": 3.1160427772250154e-07,
      "loss": 0.007,
      "step": 2960680
    },
    {
      "epoch": 4.845250486047014,
      "grad_norm": 0.17103663086891174,
      "learning_rate": 3.1153838550898444e-07,
      "loss": 0.0054,
      "step": 2960700
    },
    {
      "epoch": 4.845283216485667,
      "grad_norm": 0.5198752880096436,
      "learning_rate": 3.114724932954673e-07,
      "loss": 0.0117,
      "step": 2960720
    },
    {
      "epoch": 4.8453159469243205,
      "grad_norm": 0.4271058142185211,
      "learning_rate": 3.114066010819502e-07,
      "loss": 0.0082,
      "step": 2960740
    },
    {
      "epoch": 4.845348677362974,
      "grad_norm": 0.46476006507873535,
      "learning_rate": 3.113407088684331e-07,
      "loss": 0.0103,
      "step": 2960760
    },
    {
      "epoch": 4.845381407801628,
      "grad_norm": 0.1039324626326561,
      "learning_rate": 3.112748166549159e-07,
      "loss": 0.0179,
      "step": 2960780
    },
    {
      "epoch": 4.84541413824028,
      "grad_norm": 0.1914423257112503,
      "learning_rate": 3.112089244413988e-07,
      "loss": 0.0095,
      "step": 2960800
    },
    {
      "epoch": 4.845446868678934,
      "grad_norm": 0.2614240050315857,
      "learning_rate": 3.111430322278817e-07,
      "loss": 0.0095,
      "step": 2960820
    },
    {
      "epoch": 4.845479599117588,
      "grad_norm": 0.29646801948547363,
      "learning_rate": 3.110771400143645e-07,
      "loss": 0.0081,
      "step": 2960840
    },
    {
      "epoch": 4.84551232955624,
      "grad_norm": 0.25344598293304443,
      "learning_rate": 3.110112478008474e-07,
      "loss": 0.005,
      "step": 2960860
    },
    {
      "epoch": 4.845545059994894,
      "grad_norm": 0.11108309030532837,
      "learning_rate": 3.1094535558733026e-07,
      "loss": 0.0122,
      "step": 2960880
    },
    {
      "epoch": 4.8455777904335475,
      "grad_norm": 0.1381135731935501,
      "learning_rate": 3.1087946337381316e-07,
      "loss": 0.0085,
      "step": 2960900
    },
    {
      "epoch": 4.845610520872201,
      "grad_norm": 0.15817391872406006,
      "learning_rate": 3.10813571160296e-07,
      "loss": 0.0065,
      "step": 2960920
    },
    {
      "epoch": 4.845643251310854,
      "grad_norm": 0.08724395185709,
      "learning_rate": 3.107476789467789e-07,
      "loss": 0.012,
      "step": 2960940
    },
    {
      "epoch": 4.845675981749507,
      "grad_norm": 0.2929629981517792,
      "learning_rate": 3.1068178673326175e-07,
      "loss": 0.0082,
      "step": 2960960
    },
    {
      "epoch": 4.845708712188161,
      "grad_norm": 0.14647415280342102,
      "learning_rate": 3.1061589451974465e-07,
      "loss": 0.0083,
      "step": 2960980
    },
    {
      "epoch": 4.845741442626814,
      "grad_norm": 0.10156691074371338,
      "learning_rate": 3.105500023062275e-07,
      "loss": 0.0099,
      "step": 2961000
    },
    {
      "epoch": 4.845774173065467,
      "grad_norm": 0.30650654435157776,
      "learning_rate": 3.1048411009271034e-07,
      "loss": 0.0092,
      "step": 2961020
    },
    {
      "epoch": 4.845806903504121,
      "grad_norm": 0.22749483585357666,
      "learning_rate": 3.1041821787919324e-07,
      "loss": 0.0074,
      "step": 2961040
    },
    {
      "epoch": 4.845839633942774,
      "grad_norm": 0.2620181143283844,
      "learning_rate": 3.103523256656761e-07,
      "loss": 0.0074,
      "step": 2961060
    },
    {
      "epoch": 4.845872364381427,
      "grad_norm": 0.05901780351996422,
      "learning_rate": 3.10286433452159e-07,
      "loss": 0.0121,
      "step": 2961080
    },
    {
      "epoch": 4.845905094820081,
      "grad_norm": 0.13133230805397034,
      "learning_rate": 3.1022054123864183e-07,
      "loss": 0.008,
      "step": 2961100
    },
    {
      "epoch": 4.845937825258734,
      "grad_norm": 0.5975486636161804,
      "learning_rate": 3.1015464902512473e-07,
      "loss": 0.0062,
      "step": 2961120
    },
    {
      "epoch": 4.845970555697387,
      "grad_norm": 0.1612476110458374,
      "learning_rate": 3.1008875681160763e-07,
      "loss": 0.0095,
      "step": 2961140
    },
    {
      "epoch": 4.846003286136041,
      "grad_norm": 0.15266850590705872,
      "learning_rate": 3.1002286459809047e-07,
      "loss": 0.0083,
      "step": 2961160
    },
    {
      "epoch": 4.846036016574694,
      "grad_norm": 0.1493145376443863,
      "learning_rate": 3.0995697238457337e-07,
      "loss": 0.0064,
      "step": 2961180
    },
    {
      "epoch": 4.846068747013348,
      "grad_norm": 0.09863442927598953,
      "learning_rate": 3.098910801710562e-07,
      "loss": 0.0067,
      "step": 2961200
    },
    {
      "epoch": 4.846101477452001,
      "grad_norm": 0.09460631012916565,
      "learning_rate": 3.0982518795753906e-07,
      "loss": 0.009,
      "step": 2961220
    },
    {
      "epoch": 4.846134207890654,
      "grad_norm": 0.1642158329486847,
      "learning_rate": 3.0975929574402196e-07,
      "loss": 0.007,
      "step": 2961240
    },
    {
      "epoch": 4.846166938329308,
      "grad_norm": 0.49933964014053345,
      "learning_rate": 3.096934035305048e-07,
      "loss": 0.0073,
      "step": 2961260
    },
    {
      "epoch": 4.8461996687679605,
      "grad_norm": 0.1274200975894928,
      "learning_rate": 3.096275113169877e-07,
      "loss": 0.0084,
      "step": 2961280
    },
    {
      "epoch": 4.846232399206614,
      "grad_norm": 0.2662647068500519,
      "learning_rate": 3.0956161910347055e-07,
      "loss": 0.0088,
      "step": 2961300
    },
    {
      "epoch": 4.846265129645268,
      "grad_norm": 0.09105244278907776,
      "learning_rate": 3.0949572688995345e-07,
      "loss": 0.0061,
      "step": 2961320
    },
    {
      "epoch": 4.84629786008392,
      "grad_norm": 0.21578669548034668,
      "learning_rate": 3.094298346764363e-07,
      "loss": 0.0097,
      "step": 2961340
    },
    {
      "epoch": 4.846330590522574,
      "grad_norm": 0.19617362320423126,
      "learning_rate": 3.093639424629192e-07,
      "loss": 0.0114,
      "step": 2961360
    },
    {
      "epoch": 4.846363320961228,
      "grad_norm": 0.2032059133052826,
      "learning_rate": 3.0929805024940204e-07,
      "loss": 0.0119,
      "step": 2961380
    },
    {
      "epoch": 4.846396051399881,
      "grad_norm": 0.22661004960536957,
      "learning_rate": 3.0923215803588494e-07,
      "loss": 0.0085,
      "step": 2961400
    },
    {
      "epoch": 4.846428781838534,
      "grad_norm": 0.08743822574615479,
      "learning_rate": 3.0916626582236784e-07,
      "loss": 0.0064,
      "step": 2961420
    },
    {
      "epoch": 4.8464615122771875,
      "grad_norm": 0.20667827129364014,
      "learning_rate": 3.0910037360885063e-07,
      "loss": 0.0097,
      "step": 2961440
    },
    {
      "epoch": 4.846494242715841,
      "grad_norm": 0.24214054644107819,
      "learning_rate": 3.0903448139533353e-07,
      "loss": 0.0084,
      "step": 2961460
    },
    {
      "epoch": 4.846526973154495,
      "grad_norm": 0.13564176857471466,
      "learning_rate": 3.089685891818164e-07,
      "loss": 0.0077,
      "step": 2961480
    },
    {
      "epoch": 4.846559703593147,
      "grad_norm": 0.17877674102783203,
      "learning_rate": 3.0890269696829927e-07,
      "loss": 0.011,
      "step": 2961500
    },
    {
      "epoch": 4.846592434031801,
      "grad_norm": 0.1400563269853592,
      "learning_rate": 3.0883680475478217e-07,
      "loss": 0.0102,
      "step": 2961520
    },
    {
      "epoch": 4.8466251644704545,
      "grad_norm": 0.19641932845115662,
      "learning_rate": 3.08770912541265e-07,
      "loss": 0.0091,
      "step": 2961540
    },
    {
      "epoch": 4.846657894909107,
      "grad_norm": 0.18809948861598969,
      "learning_rate": 3.087050203277479e-07,
      "loss": 0.0114,
      "step": 2961560
    },
    {
      "epoch": 4.846690625347761,
      "grad_norm": 0.14679689705371857,
      "learning_rate": 3.0863912811423076e-07,
      "loss": 0.0078,
      "step": 2961580
    },
    {
      "epoch": 4.8467233557864144,
      "grad_norm": 0.3618767261505127,
      "learning_rate": 3.0857323590071366e-07,
      "loss": 0.0069,
      "step": 2961600
    },
    {
      "epoch": 4.846756086225067,
      "grad_norm": 0.24195939302444458,
      "learning_rate": 3.085073436871965e-07,
      "loss": 0.0116,
      "step": 2961620
    },
    {
      "epoch": 4.846788816663721,
      "grad_norm": 0.3467182219028473,
      "learning_rate": 3.084414514736794e-07,
      "loss": 0.009,
      "step": 2961640
    },
    {
      "epoch": 4.846821547102374,
      "grad_norm": 0.17558921873569489,
      "learning_rate": 3.0837555926016225e-07,
      "loss": 0.008,
      "step": 2961660
    },
    {
      "epoch": 4.846854277541028,
      "grad_norm": 0.24631696939468384,
      "learning_rate": 3.083096670466451e-07,
      "loss": 0.0079,
      "step": 2961680
    },
    {
      "epoch": 4.846887007979681,
      "grad_norm": 0.125189408659935,
      "learning_rate": 3.08243774833128e-07,
      "loss": 0.0051,
      "step": 2961700
    },
    {
      "epoch": 4.846919738418334,
      "grad_norm": 0.13802357017993927,
      "learning_rate": 3.0817788261961084e-07,
      "loss": 0.0077,
      "step": 2961720
    },
    {
      "epoch": 4.846952468856988,
      "grad_norm": 0.36429324746131897,
      "learning_rate": 3.0811199040609374e-07,
      "loss": 0.0088,
      "step": 2961740
    },
    {
      "epoch": 4.846985199295641,
      "grad_norm": 0.30748921632766724,
      "learning_rate": 3.0804609819257664e-07,
      "loss": 0.0076,
      "step": 2961760
    },
    {
      "epoch": 4.847017929734294,
      "grad_norm": 0.21771256625652313,
      "learning_rate": 3.079802059790595e-07,
      "loss": 0.0088,
      "step": 2961780
    },
    {
      "epoch": 4.847050660172948,
      "grad_norm": 0.10532154142856598,
      "learning_rate": 3.079143137655424e-07,
      "loss": 0.0077,
      "step": 2961800
    },
    {
      "epoch": 4.847083390611601,
      "grad_norm": 0.38390442728996277,
      "learning_rate": 3.078484215520252e-07,
      "loss": 0.0108,
      "step": 2961820
    },
    {
      "epoch": 4.847116121050254,
      "grad_norm": 0.07398602366447449,
      "learning_rate": 3.077825293385081e-07,
      "loss": 0.0044,
      "step": 2961840
    },
    {
      "epoch": 4.847148851488908,
      "grad_norm": 0.13061214983463287,
      "learning_rate": 3.0771663712499097e-07,
      "loss": 0.0129,
      "step": 2961860
    },
    {
      "epoch": 4.847181581927561,
      "grad_norm": 0.2521747946739197,
      "learning_rate": 3.076507449114738e-07,
      "loss": 0.0056,
      "step": 2961880
    },
    {
      "epoch": 4.847214312366214,
      "grad_norm": 0.31970155239105225,
      "learning_rate": 3.075848526979567e-07,
      "loss": 0.0149,
      "step": 2961900
    },
    {
      "epoch": 4.8472470428048675,
      "grad_norm": 0.24451975524425507,
      "learning_rate": 3.0751896048443956e-07,
      "loss": 0.0057,
      "step": 2961920
    },
    {
      "epoch": 4.847279773243521,
      "grad_norm": 0.24537432193756104,
      "learning_rate": 3.0745306827092246e-07,
      "loss": 0.0092,
      "step": 2961940
    },
    {
      "epoch": 4.847312503682175,
      "grad_norm": 0.0908001959323883,
      "learning_rate": 3.073871760574053e-07,
      "loss": 0.0082,
      "step": 2961960
    },
    {
      "epoch": 4.847345234120827,
      "grad_norm": 0.051746122539043427,
      "learning_rate": 3.073212838438882e-07,
      "loss": 0.0117,
      "step": 2961980
    },
    {
      "epoch": 4.847377964559481,
      "grad_norm": 0.31110700964927673,
      "learning_rate": 3.0725539163037105e-07,
      "loss": 0.0114,
      "step": 2962000
    },
    {
      "epoch": 4.847410694998135,
      "grad_norm": 1.1101555824279785,
      "learning_rate": 3.0718949941685395e-07,
      "loss": 0.0114,
      "step": 2962020
    },
    {
      "epoch": 4.847443425436788,
      "grad_norm": 0.09863195568323135,
      "learning_rate": 3.071236072033368e-07,
      "loss": 0.0056,
      "step": 2962040
    },
    {
      "epoch": 4.847476155875441,
      "grad_norm": 0.45642808079719543,
      "learning_rate": 3.070577149898197e-07,
      "loss": 0.0096,
      "step": 2962060
    },
    {
      "epoch": 4.8475088863140945,
      "grad_norm": 1.5545554161071777,
      "learning_rate": 3.069918227763026e-07,
      "loss": 0.0101,
      "step": 2962080
    },
    {
      "epoch": 4.847541616752748,
      "grad_norm": 0.2529120147228241,
      "learning_rate": 3.069259305627854e-07,
      "loss": 0.0082,
      "step": 2962100
    },
    {
      "epoch": 4.847574347191401,
      "grad_norm": 0.1775381714105606,
      "learning_rate": 3.068600383492683e-07,
      "loss": 0.0084,
      "step": 2962120
    },
    {
      "epoch": 4.847607077630054,
      "grad_norm": 0.27950519323349,
      "learning_rate": 3.067941461357512e-07,
      "loss": 0.0142,
      "step": 2962140
    },
    {
      "epoch": 4.847639808068708,
      "grad_norm": 0.12699009478092194,
      "learning_rate": 3.06728253922234e-07,
      "loss": 0.0128,
      "step": 2962160
    },
    {
      "epoch": 4.847672538507361,
      "grad_norm": 0.18127992749214172,
      "learning_rate": 3.066623617087169e-07,
      "loss": 0.008,
      "step": 2962180
    },
    {
      "epoch": 4.847705268946014,
      "grad_norm": 0.3884655833244324,
      "learning_rate": 3.0659646949519977e-07,
      "loss": 0.0164,
      "step": 2962200
    },
    {
      "epoch": 4.847737999384668,
      "grad_norm": 0.12440982460975647,
      "learning_rate": 3.0653057728168267e-07,
      "loss": 0.0098,
      "step": 2962220
    },
    {
      "epoch": 4.8477707298233215,
      "grad_norm": 0.13301655650138855,
      "learning_rate": 3.064646850681655e-07,
      "loss": 0.0092,
      "step": 2962240
    },
    {
      "epoch": 4.847803460261974,
      "grad_norm": 0.17617569863796234,
      "learning_rate": 3.063987928546484e-07,
      "loss": 0.0068,
      "step": 2962260
    },
    {
      "epoch": 4.847836190700628,
      "grad_norm": 0.13941165804862976,
      "learning_rate": 3.0633290064113126e-07,
      "loss": 0.0082,
      "step": 2962280
    },
    {
      "epoch": 4.847868921139281,
      "grad_norm": 0.08738169074058533,
      "learning_rate": 3.0626700842761416e-07,
      "loss": 0.0084,
      "step": 2962300
    },
    {
      "epoch": 4.847901651577934,
      "grad_norm": 0.29272422194480896,
      "learning_rate": 3.06201116214097e-07,
      "loss": 0.0069,
      "step": 2962320
    },
    {
      "epoch": 4.847934382016588,
      "grad_norm": 0.46677300333976746,
      "learning_rate": 3.0613522400057985e-07,
      "loss": 0.0114,
      "step": 2962340
    },
    {
      "epoch": 4.847967112455241,
      "grad_norm": 0.0813572108745575,
      "learning_rate": 3.0606933178706275e-07,
      "loss": 0.0068,
      "step": 2962360
    },
    {
      "epoch": 4.847999842893895,
      "grad_norm": 0.18841563165187836,
      "learning_rate": 3.060034395735456e-07,
      "loss": 0.009,
      "step": 2962380
    },
    {
      "epoch": 4.848032573332548,
      "grad_norm": 0.11204120516777039,
      "learning_rate": 3.059375473600285e-07,
      "loss": 0.0081,
      "step": 2962400
    },
    {
      "epoch": 4.848065303771201,
      "grad_norm": 0.13478192687034607,
      "learning_rate": 3.058716551465114e-07,
      "loss": 0.0124,
      "step": 2962420
    },
    {
      "epoch": 4.848098034209855,
      "grad_norm": 0.13703520596027374,
      "learning_rate": 3.0580576293299423e-07,
      "loss": 0.0085,
      "step": 2962440
    },
    {
      "epoch": 4.8481307646485075,
      "grad_norm": 0.1545219123363495,
      "learning_rate": 3.0573987071947713e-07,
      "loss": 0.0049,
      "step": 2962460
    },
    {
      "epoch": 4.848163495087161,
      "grad_norm": 0.2235231250524521,
      "learning_rate": 3.0567397850596e-07,
      "loss": 0.0095,
      "step": 2962480
    },
    {
      "epoch": 4.848196225525815,
      "grad_norm": 0.2596921920776367,
      "learning_rate": 3.056080862924429e-07,
      "loss": 0.0136,
      "step": 2962500
    },
    {
      "epoch": 4.848228955964467,
      "grad_norm": 0.22936365008354187,
      "learning_rate": 3.055421940789257e-07,
      "loss": 0.0089,
      "step": 2962520
    },
    {
      "epoch": 4.848261686403121,
      "grad_norm": 0.22241929173469543,
      "learning_rate": 3.0547630186540857e-07,
      "loss": 0.0092,
      "step": 2962540
    },
    {
      "epoch": 4.8482944168417745,
      "grad_norm": 0.4731543958187103,
      "learning_rate": 3.0541040965189147e-07,
      "loss": 0.0153,
      "step": 2962560
    },
    {
      "epoch": 4.848327147280428,
      "grad_norm": 0.13578039407730103,
      "learning_rate": 3.053445174383743e-07,
      "loss": 0.0076,
      "step": 2962580
    },
    {
      "epoch": 4.848359877719081,
      "grad_norm": 0.5040589570999146,
      "learning_rate": 3.052786252248572e-07,
      "loss": 0.0093,
      "step": 2962600
    },
    {
      "epoch": 4.848392608157734,
      "grad_norm": 0.24310697615146637,
      "learning_rate": 3.0521273301134006e-07,
      "loss": 0.0097,
      "step": 2962620
    },
    {
      "epoch": 4.848425338596388,
      "grad_norm": 0.17399629950523376,
      "learning_rate": 3.0514684079782295e-07,
      "loss": 0.0083,
      "step": 2962640
    },
    {
      "epoch": 4.848458069035042,
      "grad_norm": 0.2600955367088318,
      "learning_rate": 3.050809485843058e-07,
      "loss": 0.0091,
      "step": 2962660
    },
    {
      "epoch": 4.848490799473694,
      "grad_norm": 0.4375019669532776,
      "learning_rate": 3.050150563707887e-07,
      "loss": 0.013,
      "step": 2962680
    },
    {
      "epoch": 4.848523529912348,
      "grad_norm": 0.32452425360679626,
      "learning_rate": 3.0494916415727154e-07,
      "loss": 0.0064,
      "step": 2962700
    },
    {
      "epoch": 4.8485562603510015,
      "grad_norm": 0.11133982241153717,
      "learning_rate": 3.0488327194375444e-07,
      "loss": 0.0072,
      "step": 2962720
    },
    {
      "epoch": 4.848588990789654,
      "grad_norm": 0.3555870056152344,
      "learning_rate": 3.0481737973023734e-07,
      "loss": 0.0043,
      "step": 2962740
    },
    {
      "epoch": 4.848621721228308,
      "grad_norm": 0.4552474915981293,
      "learning_rate": 3.0475148751672013e-07,
      "loss": 0.0078,
      "step": 2962760
    },
    {
      "epoch": 4.848654451666961,
      "grad_norm": 0.31428393721580505,
      "learning_rate": 3.0468559530320303e-07,
      "loss": 0.0078,
      "step": 2962780
    },
    {
      "epoch": 4.848687182105614,
      "grad_norm": 0.09849727153778076,
      "learning_rate": 3.0461970308968593e-07,
      "loss": 0.0069,
      "step": 2962800
    },
    {
      "epoch": 4.848719912544268,
      "grad_norm": 0.14597047865390778,
      "learning_rate": 3.045538108761688e-07,
      "loss": 0.0109,
      "step": 2962820
    },
    {
      "epoch": 4.848752642982921,
      "grad_norm": 0.28813156485557556,
      "learning_rate": 3.044879186626517e-07,
      "loss": 0.0063,
      "step": 2962840
    },
    {
      "epoch": 4.848785373421575,
      "grad_norm": 0.08982778340578079,
      "learning_rate": 3.044220264491345e-07,
      "loss": 0.007,
      "step": 2962860
    },
    {
      "epoch": 4.848818103860228,
      "grad_norm": 0.1826416403055191,
      "learning_rate": 3.043561342356174e-07,
      "loss": 0.0066,
      "step": 2962880
    },
    {
      "epoch": 4.848850834298881,
      "grad_norm": 0.47096380591392517,
      "learning_rate": 3.0429024202210027e-07,
      "loss": 0.0072,
      "step": 2962900
    },
    {
      "epoch": 4.848883564737535,
      "grad_norm": 0.210013285279274,
      "learning_rate": 3.0422434980858316e-07,
      "loss": 0.0062,
      "step": 2962920
    },
    {
      "epoch": 4.848916295176188,
      "grad_norm": 0.3300536274909973,
      "learning_rate": 3.04158457595066e-07,
      "loss": 0.0064,
      "step": 2962940
    },
    {
      "epoch": 4.848949025614841,
      "grad_norm": 0.23751702904701233,
      "learning_rate": 3.040925653815489e-07,
      "loss": 0.0086,
      "step": 2962960
    },
    {
      "epoch": 4.848981756053495,
      "grad_norm": 0.1039871871471405,
      "learning_rate": 3.0402667316803175e-07,
      "loss": 0.0097,
      "step": 2962980
    },
    {
      "epoch": 4.849014486492148,
      "grad_norm": 0.09622583538293839,
      "learning_rate": 3.039607809545146e-07,
      "loss": 0.0082,
      "step": 2963000
    },
    {
      "epoch": 4.849047216930801,
      "grad_norm": 0.11600421369075775,
      "learning_rate": 3.038948887409975e-07,
      "loss": 0.0097,
      "step": 2963020
    },
    {
      "epoch": 4.849079947369455,
      "grad_norm": 0.34576335549354553,
      "learning_rate": 3.0382899652748034e-07,
      "loss": 0.0081,
      "step": 2963040
    },
    {
      "epoch": 4.849112677808108,
      "grad_norm": 0.26817038655281067,
      "learning_rate": 3.0376310431396324e-07,
      "loss": 0.013,
      "step": 2963060
    },
    {
      "epoch": 4.849145408246761,
      "grad_norm": 0.32913368940353394,
      "learning_rate": 3.0369721210044614e-07,
      "loss": 0.0066,
      "step": 2963080
    },
    {
      "epoch": 4.8491781386854145,
      "grad_norm": 0.10065802931785583,
      "learning_rate": 3.03631319886929e-07,
      "loss": 0.0139,
      "step": 2963100
    },
    {
      "epoch": 4.849210869124068,
      "grad_norm": 0.06929312646389008,
      "learning_rate": 3.035654276734119e-07,
      "loss": 0.0062,
      "step": 2963120
    },
    {
      "epoch": 4.849243599562722,
      "grad_norm": 0.12479934096336365,
      "learning_rate": 3.0349953545989473e-07,
      "loss": 0.0105,
      "step": 2963140
    },
    {
      "epoch": 4.849276330001374,
      "grad_norm": 0.11748754978179932,
      "learning_rate": 3.0343364324637763e-07,
      "loss": 0.0068,
      "step": 2963160
    },
    {
      "epoch": 4.849309060440028,
      "grad_norm": 0.07815000414848328,
      "learning_rate": 3.033677510328605e-07,
      "loss": 0.0066,
      "step": 2963180
    },
    {
      "epoch": 4.849341790878682,
      "grad_norm": 0.20489756762981415,
      "learning_rate": 3.033018588193433e-07,
      "loss": 0.0067,
      "step": 2963200
    },
    {
      "epoch": 4.849374521317335,
      "grad_norm": 0.1846083402633667,
      "learning_rate": 3.032359666058262e-07,
      "loss": 0.0068,
      "step": 2963220
    },
    {
      "epoch": 4.849407251755988,
      "grad_norm": 0.14044257998466492,
      "learning_rate": 3.0317007439230906e-07,
      "loss": 0.0066,
      "step": 2963240
    },
    {
      "epoch": 4.8494399821946415,
      "grad_norm": 0.2022424191236496,
      "learning_rate": 3.0310418217879196e-07,
      "loss": 0.0092,
      "step": 2963260
    },
    {
      "epoch": 4.849472712633295,
      "grad_norm": 0.18127982318401337,
      "learning_rate": 3.030382899652748e-07,
      "loss": 0.0107,
      "step": 2963280
    },
    {
      "epoch": 4.849505443071948,
      "grad_norm": 0.3753588795661926,
      "learning_rate": 3.029723977517577e-07,
      "loss": 0.0086,
      "step": 2963300
    },
    {
      "epoch": 4.849538173510601,
      "grad_norm": 0.11707260459661484,
      "learning_rate": 3.0290650553824055e-07,
      "loss": 0.0093,
      "step": 2963320
    },
    {
      "epoch": 4.849570903949255,
      "grad_norm": 0.18222729861736298,
      "learning_rate": 3.0284061332472345e-07,
      "loss": 0.0106,
      "step": 2963340
    },
    {
      "epoch": 4.849603634387908,
      "grad_norm": 0.24529097974300385,
      "learning_rate": 3.027747211112063e-07,
      "loss": 0.0097,
      "step": 2963360
    },
    {
      "epoch": 4.849636364826561,
      "grad_norm": 0.08656805008649826,
      "learning_rate": 3.027088288976892e-07,
      "loss": 0.0137,
      "step": 2963380
    },
    {
      "epoch": 4.849669095265215,
      "grad_norm": 0.4753991365432739,
      "learning_rate": 3.026429366841721e-07,
      "loss": 0.0089,
      "step": 2963400
    },
    {
      "epoch": 4.849701825703868,
      "grad_norm": 0.2807387709617615,
      "learning_rate": 3.025770444706549e-07,
      "loss": 0.007,
      "step": 2963420
    },
    {
      "epoch": 4.849734556142521,
      "grad_norm": 0.06749133765697479,
      "learning_rate": 3.025111522571378e-07,
      "loss": 0.0093,
      "step": 2963440
    },
    {
      "epoch": 4.849767286581175,
      "grad_norm": 0.22102700173854828,
      "learning_rate": 3.024452600436207e-07,
      "loss": 0.0072,
      "step": 2963460
    },
    {
      "epoch": 4.849800017019828,
      "grad_norm": 0.13646404445171356,
      "learning_rate": 3.0237936783010353e-07,
      "loss": 0.012,
      "step": 2963480
    },
    {
      "epoch": 4.849832747458482,
      "grad_norm": 0.11395810544490814,
      "learning_rate": 3.0231347561658643e-07,
      "loss": 0.0101,
      "step": 2963500
    },
    {
      "epoch": 4.849865477897135,
      "grad_norm": 0.14586472511291504,
      "learning_rate": 3.022475834030693e-07,
      "loss": 0.0147,
      "step": 2963520
    },
    {
      "epoch": 4.849898208335788,
      "grad_norm": 0.08197437971830368,
      "learning_rate": 3.0218169118955217e-07,
      "loss": 0.006,
      "step": 2963540
    },
    {
      "epoch": 4.849930938774442,
      "grad_norm": 0.2369142472743988,
      "learning_rate": 3.02115798976035e-07,
      "loss": 0.0078,
      "step": 2963560
    },
    {
      "epoch": 4.8499636692130945,
      "grad_norm": 0.48806309700012207,
      "learning_rate": 3.020499067625179e-07,
      "loss": 0.0083,
      "step": 2963580
    },
    {
      "epoch": 4.849996399651748,
      "grad_norm": 0.14576438069343567,
      "learning_rate": 3.0198401454900076e-07,
      "loss": 0.0096,
      "step": 2963600
    },
    {
      "epoch": 4.850029130090402,
      "grad_norm": 0.09674292057752609,
      "learning_rate": 3.0191812233548366e-07,
      "loss": 0.0081,
      "step": 2963620
    },
    {
      "epoch": 4.850061860529054,
      "grad_norm": 0.12506188452243805,
      "learning_rate": 3.018522301219665e-07,
      "loss": 0.0084,
      "step": 2963640
    },
    {
      "epoch": 4.850094590967708,
      "grad_norm": 0.1621474176645279,
      "learning_rate": 3.0178633790844935e-07,
      "loss": 0.0107,
      "step": 2963660
    },
    {
      "epoch": 4.850127321406362,
      "grad_norm": 0.12511391937732697,
      "learning_rate": 3.0172044569493225e-07,
      "loss": 0.0077,
      "step": 2963680
    },
    {
      "epoch": 4.850160051845015,
      "grad_norm": 0.20797426998615265,
      "learning_rate": 3.016545534814151e-07,
      "loss": 0.0084,
      "step": 2963700
    },
    {
      "epoch": 4.850192782283668,
      "grad_norm": 0.29632383584976196,
      "learning_rate": 3.01588661267898e-07,
      "loss": 0.0098,
      "step": 2963720
    },
    {
      "epoch": 4.8502255127223215,
      "grad_norm": 0.08905439078807831,
      "learning_rate": 3.015227690543809e-07,
      "loss": 0.0062,
      "step": 2963740
    },
    {
      "epoch": 4.850258243160975,
      "grad_norm": 0.17374981939792633,
      "learning_rate": 3.0145687684086374e-07,
      "loss": 0.0078,
      "step": 2963760
    },
    {
      "epoch": 4.850290973599628,
      "grad_norm": 0.20315606892108917,
      "learning_rate": 3.0139098462734664e-07,
      "loss": 0.0133,
      "step": 2963780
    },
    {
      "epoch": 4.850323704038281,
      "grad_norm": 0.37296271324157715,
      "learning_rate": 3.013250924138295e-07,
      "loss": 0.0073,
      "step": 2963800
    },
    {
      "epoch": 4.850356434476935,
      "grad_norm": 0.30712810158729553,
      "learning_rate": 3.012592002003124e-07,
      "loss": 0.0101,
      "step": 2963820
    },
    {
      "epoch": 4.850389164915589,
      "grad_norm": 0.19724971055984497,
      "learning_rate": 3.0119330798679523e-07,
      "loss": 0.0084,
      "step": 2963840
    },
    {
      "epoch": 4.850421895354241,
      "grad_norm": 0.21333293616771698,
      "learning_rate": 3.0112741577327807e-07,
      "loss": 0.0087,
      "step": 2963860
    },
    {
      "epoch": 4.850454625792895,
      "grad_norm": 0.5752369165420532,
      "learning_rate": 3.0106152355976097e-07,
      "loss": 0.0053,
      "step": 2963880
    },
    {
      "epoch": 4.8504873562315485,
      "grad_norm": 0.30631300806999207,
      "learning_rate": 3.009956313462438e-07,
      "loss": 0.0072,
      "step": 2963900
    },
    {
      "epoch": 4.850520086670201,
      "grad_norm": 0.14450643956661224,
      "learning_rate": 3.009297391327267e-07,
      "loss": 0.0061,
      "step": 2963920
    },
    {
      "epoch": 4.850552817108855,
      "grad_norm": 0.4839438199996948,
      "learning_rate": 3.0086384691920956e-07,
      "loss": 0.0157,
      "step": 2963940
    },
    {
      "epoch": 4.850585547547508,
      "grad_norm": 0.18801230192184448,
      "learning_rate": 3.0079795470569246e-07,
      "loss": 0.0092,
      "step": 2963960
    },
    {
      "epoch": 4.850618277986161,
      "grad_norm": 0.1179315522313118,
      "learning_rate": 3.007320624921753e-07,
      "loss": 0.0067,
      "step": 2963980
    },
    {
      "epoch": 4.850651008424815,
      "grad_norm": 0.3603702485561371,
      "learning_rate": 3.006661702786582e-07,
      "loss": 0.0074,
      "step": 2964000
    },
    {
      "epoch": 4.850683738863468,
      "grad_norm": 0.313740074634552,
      "learning_rate": 3.0060027806514105e-07,
      "loss": 0.009,
      "step": 2964020
    },
    {
      "epoch": 4.850716469302122,
      "grad_norm": 0.3007229268550873,
      "learning_rate": 3.0053438585162395e-07,
      "loss": 0.009,
      "step": 2964040
    },
    {
      "epoch": 4.850749199740775,
      "grad_norm": 0.5241007804870605,
      "learning_rate": 3.0046849363810685e-07,
      "loss": 0.0155,
      "step": 2964060
    },
    {
      "epoch": 4.850781930179428,
      "grad_norm": 0.19256004691123962,
      "learning_rate": 3.0040260142458964e-07,
      "loss": 0.0137,
      "step": 2964080
    },
    {
      "epoch": 4.850814660618082,
      "grad_norm": 0.15497250854969025,
      "learning_rate": 3.0033670921107254e-07,
      "loss": 0.0064,
      "step": 2964100
    },
    {
      "epoch": 4.850847391056735,
      "grad_norm": 0.1551317423582077,
      "learning_rate": 3.0027081699755544e-07,
      "loss": 0.0099,
      "step": 2964120
    },
    {
      "epoch": 4.850880121495388,
      "grad_norm": 0.25164172053337097,
      "learning_rate": 3.002049247840383e-07,
      "loss": 0.0071,
      "step": 2964140
    },
    {
      "epoch": 4.850912851934042,
      "grad_norm": 0.04214128851890564,
      "learning_rate": 3.001390325705212e-07,
      "loss": 0.0083,
      "step": 2964160
    },
    {
      "epoch": 4.850945582372695,
      "grad_norm": 0.15326623618602753,
      "learning_rate": 3.0007314035700403e-07,
      "loss": 0.0065,
      "step": 2964180
    },
    {
      "epoch": 4.850978312811348,
      "grad_norm": 0.12289983034133911,
      "learning_rate": 3.000072481434869e-07,
      "loss": 0.007,
      "step": 2964200
    },
    {
      "epoch": 4.8510110432500015,
      "grad_norm": 0.09832412004470825,
      "learning_rate": 2.9994135592996977e-07,
      "loss": 0.0105,
      "step": 2964220
    },
    {
      "epoch": 4.851043773688655,
      "grad_norm": 0.2815203070640564,
      "learning_rate": 2.9987546371645267e-07,
      "loss": 0.0148,
      "step": 2964240
    },
    {
      "epoch": 4.851076504127308,
      "grad_norm": 0.2939893305301666,
      "learning_rate": 2.998095715029355e-07,
      "loss": 0.0088,
      "step": 2964260
    },
    {
      "epoch": 4.851109234565961,
      "grad_norm": 0.11267119646072388,
      "learning_rate": 2.997436792894184e-07,
      "loss": 0.0066,
      "step": 2964280
    },
    {
      "epoch": 4.851141965004615,
      "grad_norm": 0.08730605244636536,
      "learning_rate": 2.9967778707590126e-07,
      "loss": 0.0101,
      "step": 2964300
    },
    {
      "epoch": 4.851174695443269,
      "grad_norm": 0.11381522566080093,
      "learning_rate": 2.996118948623841e-07,
      "loss": 0.0088,
      "step": 2964320
    },
    {
      "epoch": 4.851207425881921,
      "grad_norm": 0.4198201894760132,
      "learning_rate": 2.99546002648867e-07,
      "loss": 0.012,
      "step": 2964340
    },
    {
      "epoch": 4.851240156320575,
      "grad_norm": 0.15201102197170258,
      "learning_rate": 2.9948011043534985e-07,
      "loss": 0.008,
      "step": 2964360
    },
    {
      "epoch": 4.8512728867592285,
      "grad_norm": 0.2096145749092102,
      "learning_rate": 2.9941421822183275e-07,
      "loss": 0.01,
      "step": 2964380
    },
    {
      "epoch": 4.851305617197882,
      "grad_norm": 0.08530107885599136,
      "learning_rate": 2.9934832600831565e-07,
      "loss": 0.0067,
      "step": 2964400
    },
    {
      "epoch": 4.851338347636535,
      "grad_norm": 0.4626341164112091,
      "learning_rate": 2.992824337947985e-07,
      "loss": 0.0077,
      "step": 2964420
    },
    {
      "epoch": 4.851371078075188,
      "grad_norm": 0.33075597882270813,
      "learning_rate": 2.992165415812814e-07,
      "loss": 0.0073,
      "step": 2964440
    },
    {
      "epoch": 4.851403808513842,
      "grad_norm": 0.3234458565711975,
      "learning_rate": 2.9915064936776424e-07,
      "loss": 0.0099,
      "step": 2964460
    },
    {
      "epoch": 4.851436538952495,
      "grad_norm": 0.22592061758041382,
      "learning_rate": 2.9908475715424713e-07,
      "loss": 0.0095,
      "step": 2964480
    },
    {
      "epoch": 4.851469269391148,
      "grad_norm": 0.20172624289989471,
      "learning_rate": 2.9901886494073e-07,
      "loss": 0.0112,
      "step": 2964500
    },
    {
      "epoch": 4.851501999829802,
      "grad_norm": 0.5933688879013062,
      "learning_rate": 2.989529727272128e-07,
      "loss": 0.0071,
      "step": 2964520
    },
    {
      "epoch": 4.851534730268455,
      "grad_norm": 0.2576257586479187,
      "learning_rate": 2.988870805136957e-07,
      "loss": 0.0066,
      "step": 2964540
    },
    {
      "epoch": 4.851567460707108,
      "grad_norm": 0.24522380530834198,
      "learning_rate": 2.9882118830017857e-07,
      "loss": 0.01,
      "step": 2964560
    },
    {
      "epoch": 4.851600191145762,
      "grad_norm": 0.25886082649230957,
      "learning_rate": 2.9875529608666147e-07,
      "loss": 0.0105,
      "step": 2964580
    },
    {
      "epoch": 4.851632921584415,
      "grad_norm": 0.1926063448190689,
      "learning_rate": 2.986894038731443e-07,
      "loss": 0.0091,
      "step": 2964600
    },
    {
      "epoch": 4.851665652023068,
      "grad_norm": 0.1539556086063385,
      "learning_rate": 2.986235116596272e-07,
      "loss": 0.0109,
      "step": 2964620
    },
    {
      "epoch": 4.851698382461722,
      "grad_norm": 0.11254774779081345,
      "learning_rate": 2.9855761944611006e-07,
      "loss": 0.0088,
      "step": 2964640
    },
    {
      "epoch": 4.851731112900375,
      "grad_norm": 0.24492155015468597,
      "learning_rate": 2.9849172723259296e-07,
      "loss": 0.0062,
      "step": 2964660
    },
    {
      "epoch": 4.851763843339029,
      "grad_norm": 0.14046047627925873,
      "learning_rate": 2.984258350190758e-07,
      "loss": 0.0095,
      "step": 2964680
    },
    {
      "epoch": 4.851796573777682,
      "grad_norm": 0.4638362228870392,
      "learning_rate": 2.983599428055587e-07,
      "loss": 0.0091,
      "step": 2964700
    },
    {
      "epoch": 4.851829304216335,
      "grad_norm": 0.10535569489002228,
      "learning_rate": 2.982940505920416e-07,
      "loss": 0.0081,
      "step": 2964720
    },
    {
      "epoch": 4.851862034654989,
      "grad_norm": 0.13276490569114685,
      "learning_rate": 2.982281583785244e-07,
      "loss": 0.005,
      "step": 2964740
    },
    {
      "epoch": 4.8518947650936415,
      "grad_norm": 0.2995795011520386,
      "learning_rate": 2.981622661650073e-07,
      "loss": 0.0095,
      "step": 2964760
    },
    {
      "epoch": 4.851927495532295,
      "grad_norm": 0.0776757001876831,
      "learning_rate": 2.980963739514902e-07,
      "loss": 0.0075,
      "step": 2964780
    },
    {
      "epoch": 4.851960225970949,
      "grad_norm": 0.22020985186100006,
      "learning_rate": 2.9803048173797304e-07,
      "loss": 0.0078,
      "step": 2964800
    },
    {
      "epoch": 4.851992956409601,
      "grad_norm": 0.2625323534011841,
      "learning_rate": 2.9796458952445593e-07,
      "loss": 0.0115,
      "step": 2964820
    },
    {
      "epoch": 4.852025686848255,
      "grad_norm": 0.10380013287067413,
      "learning_rate": 2.978986973109388e-07,
      "loss": 0.0053,
      "step": 2964840
    },
    {
      "epoch": 4.852058417286909,
      "grad_norm": 0.25902059674263,
      "learning_rate": 2.978328050974217e-07,
      "loss": 0.0076,
      "step": 2964860
    },
    {
      "epoch": 4.852091147725562,
      "grad_norm": 0.35704854130744934,
      "learning_rate": 2.977669128839045e-07,
      "loss": 0.0053,
      "step": 2964880
    },
    {
      "epoch": 4.852123878164215,
      "grad_norm": 0.08524299412965775,
      "learning_rate": 2.977010206703874e-07,
      "loss": 0.0084,
      "step": 2964900
    },
    {
      "epoch": 4.8521566086028685,
      "grad_norm": 0.4582674205303192,
      "learning_rate": 2.9763512845687027e-07,
      "loss": 0.0116,
      "step": 2964920
    },
    {
      "epoch": 4.852189339041522,
      "grad_norm": 0.3802984952926636,
      "learning_rate": 2.9756923624335317e-07,
      "loss": 0.0114,
      "step": 2964940
    },
    {
      "epoch": 4.852222069480176,
      "grad_norm": 0.5168309807777405,
      "learning_rate": 2.97503344029836e-07,
      "loss": 0.0096,
      "step": 2964960
    },
    {
      "epoch": 4.852254799918828,
      "grad_norm": 0.17135199904441833,
      "learning_rate": 2.9743745181631886e-07,
      "loss": 0.0086,
      "step": 2964980
    },
    {
      "epoch": 4.852287530357482,
      "grad_norm": 0.17556844651699066,
      "learning_rate": 2.9737155960280176e-07,
      "loss": 0.0085,
      "step": 2965000
    },
    {
      "epoch": 4.8523202607961355,
      "grad_norm": 0.36481258273124695,
      "learning_rate": 2.973056673892846e-07,
      "loss": 0.0078,
      "step": 2965020
    },
    {
      "epoch": 4.852352991234788,
      "grad_norm": 0.12161599099636078,
      "learning_rate": 2.972397751757675e-07,
      "loss": 0.0115,
      "step": 2965040
    },
    {
      "epoch": 4.852385721673442,
      "grad_norm": 0.5021352171897888,
      "learning_rate": 2.971738829622504e-07,
      "loss": 0.0074,
      "step": 2965060
    },
    {
      "epoch": 4.852418452112095,
      "grad_norm": 0.2153840810060501,
      "learning_rate": 2.9710799074873324e-07,
      "loss": 0.0058,
      "step": 2965080
    },
    {
      "epoch": 4.852451182550748,
      "grad_norm": 0.22542792558670044,
      "learning_rate": 2.9704209853521614e-07,
      "loss": 0.0084,
      "step": 2965100
    },
    {
      "epoch": 4.852483912989402,
      "grad_norm": 0.28533199429512024,
      "learning_rate": 2.96976206321699e-07,
      "loss": 0.0052,
      "step": 2965120
    },
    {
      "epoch": 4.852516643428055,
      "grad_norm": 0.12234672904014587,
      "learning_rate": 2.969103141081819e-07,
      "loss": 0.0094,
      "step": 2965140
    },
    {
      "epoch": 4.852549373866709,
      "grad_norm": 0.2322532832622528,
      "learning_rate": 2.9684442189466473e-07,
      "loss": 0.01,
      "step": 2965160
    },
    {
      "epoch": 4.852582104305362,
      "grad_norm": 0.39090755581855774,
      "learning_rate": 2.9677852968114763e-07,
      "loss": 0.0117,
      "step": 2965180
    },
    {
      "epoch": 4.852614834744015,
      "grad_norm": 0.12708324193954468,
      "learning_rate": 2.967126374676305e-07,
      "loss": 0.0045,
      "step": 2965200
    },
    {
      "epoch": 4.852647565182669,
      "grad_norm": 0.3294204771518707,
      "learning_rate": 2.966467452541133e-07,
      "loss": 0.0051,
      "step": 2965220
    },
    {
      "epoch": 4.852680295621322,
      "grad_norm": 0.6636956334114075,
      "learning_rate": 2.965808530405962e-07,
      "loss": 0.0097,
      "step": 2965240
    },
    {
      "epoch": 4.852713026059975,
      "grad_norm": 0.049602750688791275,
      "learning_rate": 2.9651496082707907e-07,
      "loss": 0.0079,
      "step": 2965260
    },
    {
      "epoch": 4.852745756498629,
      "grad_norm": 0.0790586769580841,
      "learning_rate": 2.9644906861356197e-07,
      "loss": 0.0079,
      "step": 2965280
    },
    {
      "epoch": 4.852778486937282,
      "grad_norm": 0.17197085916996002,
      "learning_rate": 2.963831764000448e-07,
      "loss": 0.0121,
      "step": 2965300
    },
    {
      "epoch": 4.852811217375935,
      "grad_norm": 0.36914563179016113,
      "learning_rate": 2.963172841865277e-07,
      "loss": 0.0083,
      "step": 2965320
    },
    {
      "epoch": 4.852843947814589,
      "grad_norm": 0.39260372519493103,
      "learning_rate": 2.9625139197301056e-07,
      "loss": 0.0082,
      "step": 2965340
    },
    {
      "epoch": 4.852876678253242,
      "grad_norm": 0.3643431067466736,
      "learning_rate": 2.9618549975949345e-07,
      "loss": 0.0087,
      "step": 2965360
    },
    {
      "epoch": 4.852909408691895,
      "grad_norm": 0.20289114117622375,
      "learning_rate": 2.9611960754597635e-07,
      "loss": 0.0078,
      "step": 2965380
    },
    {
      "epoch": 4.8529421391305485,
      "grad_norm": 0.07630118727684021,
      "learning_rate": 2.960537153324592e-07,
      "loss": 0.0099,
      "step": 2965400
    },
    {
      "epoch": 4.852974869569202,
      "grad_norm": 0.4038783609867096,
      "learning_rate": 2.9598782311894204e-07,
      "loss": 0.0077,
      "step": 2965420
    },
    {
      "epoch": 4.853007600007856,
      "grad_norm": 0.11181918531656265,
      "learning_rate": 2.9592193090542494e-07,
      "loss": 0.0084,
      "step": 2965440
    },
    {
      "epoch": 4.853040330446508,
      "grad_norm": 0.1475902497768402,
      "learning_rate": 2.958560386919078e-07,
      "loss": 0.0111,
      "step": 2965460
    },
    {
      "epoch": 4.853073060885162,
      "grad_norm": 0.5727526545524597,
      "learning_rate": 2.957901464783907e-07,
      "loss": 0.0069,
      "step": 2965480
    },
    {
      "epoch": 4.853105791323816,
      "grad_norm": 0.1307404637336731,
      "learning_rate": 2.9572425426487353e-07,
      "loss": 0.0135,
      "step": 2965500
    },
    {
      "epoch": 4.853138521762468,
      "grad_norm": 0.07960803806781769,
      "learning_rate": 2.9565836205135643e-07,
      "loss": 0.0064,
      "step": 2965520
    },
    {
      "epoch": 4.853171252201122,
      "grad_norm": 0.4159232974052429,
      "learning_rate": 2.955924698378393e-07,
      "loss": 0.0119,
      "step": 2965540
    },
    {
      "epoch": 4.8532039826397755,
      "grad_norm": 0.10648531466722488,
      "learning_rate": 2.955265776243222e-07,
      "loss": 0.0105,
      "step": 2965560
    },
    {
      "epoch": 4.853236713078429,
      "grad_norm": 0.130503311753273,
      "learning_rate": 2.95460685410805e-07,
      "loss": 0.0134,
      "step": 2965580
    },
    {
      "epoch": 4.853269443517082,
      "grad_norm": 0.20267002284526825,
      "learning_rate": 2.953947931972879e-07,
      "loss": 0.0097,
      "step": 2965600
    },
    {
      "epoch": 4.853302173955735,
      "grad_norm": 0.1358010619878769,
      "learning_rate": 2.9532890098377076e-07,
      "loss": 0.0068,
      "step": 2965620
    },
    {
      "epoch": 4.853334904394389,
      "grad_norm": 0.16853365302085876,
      "learning_rate": 2.952630087702536e-07,
      "loss": 0.0074,
      "step": 2965640
    },
    {
      "epoch": 4.853367634833042,
      "grad_norm": 0.17155233025550842,
      "learning_rate": 2.951971165567365e-07,
      "loss": 0.0092,
      "step": 2965660
    },
    {
      "epoch": 4.853400365271695,
      "grad_norm": 0.2110990732908249,
      "learning_rate": 2.9513122434321935e-07,
      "loss": 0.008,
      "step": 2965680
    },
    {
      "epoch": 4.853433095710349,
      "grad_norm": 0.14280876517295837,
      "learning_rate": 2.9506533212970225e-07,
      "loss": 0.0067,
      "step": 2965700
    },
    {
      "epoch": 4.853465826149002,
      "grad_norm": 0.20699737966060638,
      "learning_rate": 2.9499943991618515e-07,
      "loss": 0.0125,
      "step": 2965720
    },
    {
      "epoch": 4.853498556587655,
      "grad_norm": 0.17679384350776672,
      "learning_rate": 2.94933547702668e-07,
      "loss": 0.0114,
      "step": 2965740
    },
    {
      "epoch": 4.853531287026309,
      "grad_norm": 0.4523947536945343,
      "learning_rate": 2.948676554891509e-07,
      "loss": 0.0077,
      "step": 2965760
    },
    {
      "epoch": 4.853564017464962,
      "grad_norm": 0.0823526605963707,
      "learning_rate": 2.9480176327563374e-07,
      "loss": 0.0083,
      "step": 2965780
    },
    {
      "epoch": 4.853596747903615,
      "grad_norm": 0.2061510980129242,
      "learning_rate": 2.9473587106211664e-07,
      "loss": 0.0131,
      "step": 2965800
    },
    {
      "epoch": 4.853629478342269,
      "grad_norm": 0.0948793888092041,
      "learning_rate": 2.946699788485995e-07,
      "loss": 0.0087,
      "step": 2965820
    },
    {
      "epoch": 4.853662208780922,
      "grad_norm": 0.4490361511707306,
      "learning_rate": 2.946040866350824e-07,
      "loss": 0.0135,
      "step": 2965840
    },
    {
      "epoch": 4.853694939219576,
      "grad_norm": 0.18266570568084717,
      "learning_rate": 2.9453819442156523e-07,
      "loss": 0.0068,
      "step": 2965860
    },
    {
      "epoch": 4.8537276696582285,
      "grad_norm": 0.43954864144325256,
      "learning_rate": 2.944723022080481e-07,
      "loss": 0.0125,
      "step": 2965880
    },
    {
      "epoch": 4.853760400096882,
      "grad_norm": 0.9348216652870178,
      "learning_rate": 2.94406409994531e-07,
      "loss": 0.0097,
      "step": 2965900
    },
    {
      "epoch": 4.853793130535536,
      "grad_norm": 0.3238333761692047,
      "learning_rate": 2.943405177810138e-07,
      "loss": 0.0079,
      "step": 2965920
    },
    {
      "epoch": 4.8538258609741884,
      "grad_norm": 0.22699369490146637,
      "learning_rate": 2.942746255674967e-07,
      "loss": 0.0077,
      "step": 2965940
    },
    {
      "epoch": 4.853858591412842,
      "grad_norm": 0.13481296598911285,
      "learning_rate": 2.9420873335397956e-07,
      "loss": 0.0089,
      "step": 2965960
    },
    {
      "epoch": 4.853891321851496,
      "grad_norm": 0.11829031258821487,
      "learning_rate": 2.9414284114046246e-07,
      "loss": 0.0061,
      "step": 2965980
    },
    {
      "epoch": 4.853924052290148,
      "grad_norm": 0.1415194272994995,
      "learning_rate": 2.940769489269453e-07,
      "loss": 0.0078,
      "step": 2966000
    },
    {
      "epoch": 4.853956782728802,
      "grad_norm": 0.32077717781066895,
      "learning_rate": 2.940110567134282e-07,
      "loss": 0.0135,
      "step": 2966020
    },
    {
      "epoch": 4.8539895131674555,
      "grad_norm": 0.08325157314538956,
      "learning_rate": 2.939451644999111e-07,
      "loss": 0.0081,
      "step": 2966040
    },
    {
      "epoch": 4.854022243606109,
      "grad_norm": 0.2456008344888687,
      "learning_rate": 2.9387927228639395e-07,
      "loss": 0.0083,
      "step": 2966060
    },
    {
      "epoch": 4.854054974044762,
      "grad_norm": 0.1699988692998886,
      "learning_rate": 2.938133800728768e-07,
      "loss": 0.0086,
      "step": 2966080
    },
    {
      "epoch": 4.854087704483415,
      "grad_norm": 0.37153705954551697,
      "learning_rate": 2.937474878593597e-07,
      "loss": 0.0085,
      "step": 2966100
    },
    {
      "epoch": 4.854120434922069,
      "grad_norm": 0.09555268287658691,
      "learning_rate": 2.9368159564584254e-07,
      "loss": 0.006,
      "step": 2966120
    },
    {
      "epoch": 4.854153165360723,
      "grad_norm": 0.1480616331100464,
      "learning_rate": 2.9361570343232544e-07,
      "loss": 0.0045,
      "step": 2966140
    },
    {
      "epoch": 4.854185895799375,
      "grad_norm": 0.1432189792394638,
      "learning_rate": 2.935498112188083e-07,
      "loss": 0.0076,
      "step": 2966160
    },
    {
      "epoch": 4.854218626238029,
      "grad_norm": 0.30731382966041565,
      "learning_rate": 2.934839190052912e-07,
      "loss": 0.0094,
      "step": 2966180
    },
    {
      "epoch": 4.8542513566766825,
      "grad_norm": 0.3091989755630493,
      "learning_rate": 2.9341802679177403e-07,
      "loss": 0.0076,
      "step": 2966200
    },
    {
      "epoch": 4.854284087115335,
      "grad_norm": 0.1902206391096115,
      "learning_rate": 2.9335213457825693e-07,
      "loss": 0.0079,
      "step": 2966220
    },
    {
      "epoch": 4.854316817553989,
      "grad_norm": 0.21767070889472961,
      "learning_rate": 2.9328624236473977e-07,
      "loss": 0.0068,
      "step": 2966240
    },
    {
      "epoch": 4.854349547992642,
      "grad_norm": 0.6309219598770142,
      "learning_rate": 2.9322035015122267e-07,
      "loss": 0.0102,
      "step": 2966260
    },
    {
      "epoch": 4.854382278431295,
      "grad_norm": 0.08030984550714493,
      "learning_rate": 2.931544579377055e-07,
      "loss": 0.0095,
      "step": 2966280
    },
    {
      "epoch": 4.854415008869949,
      "grad_norm": 0.27024003863334656,
      "learning_rate": 2.9308856572418836e-07,
      "loss": 0.0119,
      "step": 2966300
    },
    {
      "epoch": 4.854447739308602,
      "grad_norm": 0.18301990628242493,
      "learning_rate": 2.9302267351067126e-07,
      "loss": 0.0109,
      "step": 2966320
    },
    {
      "epoch": 4.854480469747256,
      "grad_norm": 0.4943307936191559,
      "learning_rate": 2.929567812971541e-07,
      "loss": 0.011,
      "step": 2966340
    },
    {
      "epoch": 4.854513200185909,
      "grad_norm": 0.26678943634033203,
      "learning_rate": 2.92890889083637e-07,
      "loss": 0.0101,
      "step": 2966360
    },
    {
      "epoch": 4.854545930624562,
      "grad_norm": 0.3896110951900482,
      "learning_rate": 2.928249968701199e-07,
      "loss": 0.0077,
      "step": 2966380
    },
    {
      "epoch": 4.854578661063216,
      "grad_norm": 0.5066872835159302,
      "learning_rate": 2.9275910465660275e-07,
      "loss": 0.0061,
      "step": 2966400
    },
    {
      "epoch": 4.854611391501869,
      "grad_norm": 0.5499862432479858,
      "learning_rate": 2.9269321244308565e-07,
      "loss": 0.0103,
      "step": 2966420
    },
    {
      "epoch": 4.854644121940522,
      "grad_norm": 0.25056129693984985,
      "learning_rate": 2.926273202295685e-07,
      "loss": 0.0068,
      "step": 2966440
    },
    {
      "epoch": 4.854676852379176,
      "grad_norm": 0.3815852999687195,
      "learning_rate": 2.925614280160514e-07,
      "loss": 0.0087,
      "step": 2966460
    },
    {
      "epoch": 4.854709582817829,
      "grad_norm": 0.28399986028671265,
      "learning_rate": 2.9249553580253424e-07,
      "loss": 0.0071,
      "step": 2966480
    },
    {
      "epoch": 4.854742313256482,
      "grad_norm": 0.13436660170555115,
      "learning_rate": 2.9242964358901714e-07,
      "loss": 0.0091,
      "step": 2966500
    },
    {
      "epoch": 4.854775043695136,
      "grad_norm": 0.35633447766304016,
      "learning_rate": 2.923637513755e-07,
      "loss": 0.0078,
      "step": 2966520
    },
    {
      "epoch": 4.854807774133789,
      "grad_norm": 0.2301570177078247,
      "learning_rate": 2.9229785916198283e-07,
      "loss": 0.0092,
      "step": 2966540
    },
    {
      "epoch": 4.854840504572442,
      "grad_norm": 0.36079320311546326,
      "learning_rate": 2.9223196694846573e-07,
      "loss": 0.0053,
      "step": 2966560
    },
    {
      "epoch": 4.8548732350110955,
      "grad_norm": 0.1810765117406845,
      "learning_rate": 2.9216607473494857e-07,
      "loss": 0.0055,
      "step": 2966580
    },
    {
      "epoch": 4.854905965449749,
      "grad_norm": 0.21136018633842468,
      "learning_rate": 2.9210018252143147e-07,
      "loss": 0.0069,
      "step": 2966600
    },
    {
      "epoch": 4.854938695888403,
      "grad_norm": 0.2025907039642334,
      "learning_rate": 2.920342903079143e-07,
      "loss": 0.0076,
      "step": 2966620
    },
    {
      "epoch": 4.854971426327055,
      "grad_norm": 0.11410945653915405,
      "learning_rate": 2.919683980943972e-07,
      "loss": 0.0092,
      "step": 2966640
    },
    {
      "epoch": 4.855004156765709,
      "grad_norm": 0.14601102471351624,
      "learning_rate": 2.9190250588088006e-07,
      "loss": 0.0105,
      "step": 2966660
    },
    {
      "epoch": 4.8550368872043626,
      "grad_norm": 0.2678123116493225,
      "learning_rate": 2.9183661366736296e-07,
      "loss": 0.007,
      "step": 2966680
    },
    {
      "epoch": 4.855069617643016,
      "grad_norm": 0.11357888579368591,
      "learning_rate": 2.9177072145384586e-07,
      "loss": 0.0069,
      "step": 2966700
    },
    {
      "epoch": 4.855102348081669,
      "grad_norm": 0.09187064319849014,
      "learning_rate": 2.917048292403287e-07,
      "loss": 0.0087,
      "step": 2966720
    },
    {
      "epoch": 4.8551350785203224,
      "grad_norm": 0.46528226137161255,
      "learning_rate": 2.9163893702681155e-07,
      "loss": 0.0103,
      "step": 2966740
    },
    {
      "epoch": 4.855167808958976,
      "grad_norm": 0.1675763726234436,
      "learning_rate": 2.9157304481329445e-07,
      "loss": 0.0071,
      "step": 2966760
    },
    {
      "epoch": 4.855200539397629,
      "grad_norm": 0.410175085067749,
      "learning_rate": 2.915071525997773e-07,
      "loss": 0.0108,
      "step": 2966780
    },
    {
      "epoch": 4.855233269836282,
      "grad_norm": 0.37785691022872925,
      "learning_rate": 2.914412603862602e-07,
      "loss": 0.0102,
      "step": 2966800
    },
    {
      "epoch": 4.855266000274936,
      "grad_norm": 0.1556910276412964,
      "learning_rate": 2.9137536817274304e-07,
      "loss": 0.0088,
      "step": 2966820
    },
    {
      "epoch": 4.855298730713589,
      "grad_norm": 0.11583907157182693,
      "learning_rate": 2.9130947595922594e-07,
      "loss": 0.0097,
      "step": 2966840
    },
    {
      "epoch": 4.855331461152242,
      "grad_norm": 0.19050003588199615,
      "learning_rate": 2.912435837457088e-07,
      "loss": 0.0109,
      "step": 2966860
    },
    {
      "epoch": 4.855364191590896,
      "grad_norm": 0.14695386588573456,
      "learning_rate": 2.911776915321917e-07,
      "loss": 0.0121,
      "step": 2966880
    },
    {
      "epoch": 4.855396922029549,
      "grad_norm": 0.3052975535392761,
      "learning_rate": 2.911117993186745e-07,
      "loss": 0.0095,
      "step": 2966900
    },
    {
      "epoch": 4.855429652468202,
      "grad_norm": 0.3295999765396118,
      "learning_rate": 2.910459071051574e-07,
      "loss": 0.0114,
      "step": 2966920
    },
    {
      "epoch": 4.855462382906856,
      "grad_norm": 0.21781592071056366,
      "learning_rate": 2.9098001489164027e-07,
      "loss": 0.0108,
      "step": 2966940
    },
    {
      "epoch": 4.855495113345509,
      "grad_norm": 0.4169417917728424,
      "learning_rate": 2.909141226781231e-07,
      "loss": 0.0069,
      "step": 2966960
    },
    {
      "epoch": 4.855527843784162,
      "grad_norm": 0.3203785717487335,
      "learning_rate": 2.90848230464606e-07,
      "loss": 0.0068,
      "step": 2966980
    },
    {
      "epoch": 4.855560574222816,
      "grad_norm": 0.17957274615764618,
      "learning_rate": 2.9078233825108886e-07,
      "loss": 0.0061,
      "step": 2967000
    },
    {
      "epoch": 4.855593304661469,
      "grad_norm": 0.48945721983909607,
      "learning_rate": 2.9071644603757176e-07,
      "loss": 0.0055,
      "step": 2967020
    },
    {
      "epoch": 4.855626035100123,
      "grad_norm": 0.21174369752407074,
      "learning_rate": 2.9065055382405466e-07,
      "loss": 0.0074,
      "step": 2967040
    },
    {
      "epoch": 4.8556587655387755,
      "grad_norm": 0.17014287412166595,
      "learning_rate": 2.905846616105375e-07,
      "loss": 0.0075,
      "step": 2967060
    },
    {
      "epoch": 4.855691495977429,
      "grad_norm": 0.08185213804244995,
      "learning_rate": 2.905187693970204e-07,
      "loss": 0.0106,
      "step": 2967080
    },
    {
      "epoch": 4.855724226416083,
      "grad_norm": 0.09030675888061523,
      "learning_rate": 2.9045287718350325e-07,
      "loss": 0.0082,
      "step": 2967100
    },
    {
      "epoch": 4.855756956854735,
      "grad_norm": 0.15080367028713226,
      "learning_rate": 2.9038698496998615e-07,
      "loss": 0.007,
      "step": 2967120
    },
    {
      "epoch": 4.855789687293389,
      "grad_norm": 0.1171150952577591,
      "learning_rate": 2.90321092756469e-07,
      "loss": 0.0088,
      "step": 2967140
    },
    {
      "epoch": 4.855822417732043,
      "grad_norm": 0.5004011988639832,
      "learning_rate": 2.902552005429519e-07,
      "loss": 0.0142,
      "step": 2967160
    },
    {
      "epoch": 4.855855148170695,
      "grad_norm": 0.4592587351799011,
      "learning_rate": 2.9018930832943474e-07,
      "loss": 0.0074,
      "step": 2967180
    },
    {
      "epoch": 4.855887878609349,
      "grad_norm": 0.21609477698802948,
      "learning_rate": 2.901234161159176e-07,
      "loss": 0.0092,
      "step": 2967200
    },
    {
      "epoch": 4.8559206090480025,
      "grad_norm": 0.09478163719177246,
      "learning_rate": 2.900575239024005e-07,
      "loss": 0.0092,
      "step": 2967220
    },
    {
      "epoch": 4.855953339486656,
      "grad_norm": 0.2003038376569748,
      "learning_rate": 2.899916316888833e-07,
      "loss": 0.0087,
      "step": 2967240
    },
    {
      "epoch": 4.855986069925309,
      "grad_norm": 0.16489283740520477,
      "learning_rate": 2.899257394753662e-07,
      "loss": 0.0132,
      "step": 2967260
    },
    {
      "epoch": 4.856018800363962,
      "grad_norm": 0.33285200595855713,
      "learning_rate": 2.8985984726184907e-07,
      "loss": 0.0068,
      "step": 2967280
    },
    {
      "epoch": 4.856051530802616,
      "grad_norm": 0.11196223646402359,
      "learning_rate": 2.8979395504833197e-07,
      "loss": 0.0067,
      "step": 2967300
    },
    {
      "epoch": 4.85608426124127,
      "grad_norm": 0.3208491802215576,
      "learning_rate": 2.897280628348148e-07,
      "loss": 0.0125,
      "step": 2967320
    },
    {
      "epoch": 4.856116991679922,
      "grad_norm": 0.13819828629493713,
      "learning_rate": 2.896621706212977e-07,
      "loss": 0.0099,
      "step": 2967340
    },
    {
      "epoch": 4.856149722118576,
      "grad_norm": 0.15396066009998322,
      "learning_rate": 2.895962784077806e-07,
      "loss": 0.0068,
      "step": 2967360
    },
    {
      "epoch": 4.8561824525572295,
      "grad_norm": 0.14829768240451813,
      "learning_rate": 2.8953038619426346e-07,
      "loss": 0.0074,
      "step": 2967380
    },
    {
      "epoch": 4.856215182995882,
      "grad_norm": 0.4618702232837677,
      "learning_rate": 2.894644939807463e-07,
      "loss": 0.0081,
      "step": 2967400
    },
    {
      "epoch": 4.856247913434536,
      "grad_norm": 0.32644200325012207,
      "learning_rate": 2.893986017672292e-07,
      "loss": 0.0083,
      "step": 2967420
    },
    {
      "epoch": 4.856280643873189,
      "grad_norm": 0.2338092029094696,
      "learning_rate": 2.8933270955371205e-07,
      "loss": 0.0091,
      "step": 2967440
    },
    {
      "epoch": 4.856313374311842,
      "grad_norm": 0.2340599000453949,
      "learning_rate": 2.8926681734019494e-07,
      "loss": 0.0089,
      "step": 2967460
    },
    {
      "epoch": 4.856346104750496,
      "grad_norm": 0.11261368542909622,
      "learning_rate": 2.892009251266778e-07,
      "loss": 0.0084,
      "step": 2967480
    },
    {
      "epoch": 4.856378835189149,
      "grad_norm": 0.28490111231803894,
      "learning_rate": 2.891350329131607e-07,
      "loss": 0.0076,
      "step": 2967500
    },
    {
      "epoch": 4.856411565627803,
      "grad_norm": 0.18768368661403656,
      "learning_rate": 2.8906914069964353e-07,
      "loss": 0.0066,
      "step": 2967520
    },
    {
      "epoch": 4.856444296066456,
      "grad_norm": 0.08283090591430664,
      "learning_rate": 2.8900324848612643e-07,
      "loss": 0.0115,
      "step": 2967540
    },
    {
      "epoch": 4.856477026505109,
      "grad_norm": 0.07991362363100052,
      "learning_rate": 2.889373562726093e-07,
      "loss": 0.0102,
      "step": 2967560
    },
    {
      "epoch": 4.856509756943763,
      "grad_norm": 0.19798077642917633,
      "learning_rate": 2.888714640590922e-07,
      "loss": 0.0075,
      "step": 2967580
    },
    {
      "epoch": 4.856542487382416,
      "grad_norm": 0.2715608477592468,
      "learning_rate": 2.88805571845575e-07,
      "loss": 0.0107,
      "step": 2967600
    },
    {
      "epoch": 4.856575217821069,
      "grad_norm": 0.43206849694252014,
      "learning_rate": 2.8873967963205787e-07,
      "loss": 0.0096,
      "step": 2967620
    },
    {
      "epoch": 4.856607948259723,
      "grad_norm": 0.09647534787654877,
      "learning_rate": 2.8867378741854077e-07,
      "loss": 0.0066,
      "step": 2967640
    },
    {
      "epoch": 4.856640678698376,
      "grad_norm": 0.42708757519721985,
      "learning_rate": 2.886078952050236e-07,
      "loss": 0.0111,
      "step": 2967660
    },
    {
      "epoch": 4.856673409137029,
      "grad_norm": 0.2907687723636627,
      "learning_rate": 2.885420029915065e-07,
      "loss": 0.0123,
      "step": 2967680
    },
    {
      "epoch": 4.8567061395756825,
      "grad_norm": 0.16961674392223358,
      "learning_rate": 2.884761107779894e-07,
      "loss": 0.0088,
      "step": 2967700
    },
    {
      "epoch": 4.856738870014336,
      "grad_norm": 0.34168970584869385,
      "learning_rate": 2.8841021856447226e-07,
      "loss": 0.0063,
      "step": 2967720
    },
    {
      "epoch": 4.856771600452989,
      "grad_norm": 0.31583836674690247,
      "learning_rate": 2.8834432635095515e-07,
      "loss": 0.0083,
      "step": 2967740
    },
    {
      "epoch": 4.856804330891642,
      "grad_norm": 0.29725176095962524,
      "learning_rate": 2.88278434137438e-07,
      "loss": 0.0086,
      "step": 2967760
    },
    {
      "epoch": 4.856837061330296,
      "grad_norm": 0.4148620069026947,
      "learning_rate": 2.882125419239209e-07,
      "loss": 0.0113,
      "step": 2967780
    },
    {
      "epoch": 4.85686979176895,
      "grad_norm": 0.3377062976360321,
      "learning_rate": 2.8814664971040374e-07,
      "loss": 0.0073,
      "step": 2967800
    },
    {
      "epoch": 4.856902522207602,
      "grad_norm": 0.2684851884841919,
      "learning_rate": 2.8808075749688664e-07,
      "loss": 0.0114,
      "step": 2967820
    },
    {
      "epoch": 4.856935252646256,
      "grad_norm": 0.3663768768310547,
      "learning_rate": 2.880148652833695e-07,
      "loss": 0.0073,
      "step": 2967840
    },
    {
      "epoch": 4.8569679830849095,
      "grad_norm": 0.09121496975421906,
      "learning_rate": 2.8794897306985233e-07,
      "loss": 0.007,
      "step": 2967860
    },
    {
      "epoch": 4.857000713523563,
      "grad_norm": 0.40493515133857727,
      "learning_rate": 2.8788308085633523e-07,
      "loss": 0.009,
      "step": 2967880
    },
    {
      "epoch": 4.857033443962216,
      "grad_norm": 0.41320163011550903,
      "learning_rate": 2.878171886428181e-07,
      "loss": 0.0072,
      "step": 2967900
    },
    {
      "epoch": 4.857066174400869,
      "grad_norm": 0.17523667216300964,
      "learning_rate": 2.87751296429301e-07,
      "loss": 0.0083,
      "step": 2967920
    },
    {
      "epoch": 4.857098904839523,
      "grad_norm": 0.10072236508131027,
      "learning_rate": 2.876854042157838e-07,
      "loss": 0.0088,
      "step": 2967940
    },
    {
      "epoch": 4.857131635278176,
      "grad_norm": 0.12435917556285858,
      "learning_rate": 2.876195120022667e-07,
      "loss": 0.0062,
      "step": 2967960
    },
    {
      "epoch": 4.857164365716829,
      "grad_norm": 0.19889196753501892,
      "learning_rate": 2.875536197887496e-07,
      "loss": 0.009,
      "step": 2967980
    },
    {
      "epoch": 4.857197096155483,
      "grad_norm": 0.23750674724578857,
      "learning_rate": 2.8748772757523246e-07,
      "loss": 0.0098,
      "step": 2968000
    },
    {
      "epoch": 4.857229826594136,
      "grad_norm": 0.10601484775543213,
      "learning_rate": 2.8742183536171536e-07,
      "loss": 0.0072,
      "step": 2968020
    },
    {
      "epoch": 4.857262557032789,
      "grad_norm": 0.14799311757087708,
      "learning_rate": 2.873559431481982e-07,
      "loss": 0.0105,
      "step": 2968040
    },
    {
      "epoch": 4.857295287471443,
      "grad_norm": 0.1478084921836853,
      "learning_rate": 2.8729005093468105e-07,
      "loss": 0.0108,
      "step": 2968060
    },
    {
      "epoch": 4.857328017910096,
      "grad_norm": 0.08224143832921982,
      "learning_rate": 2.8722415872116395e-07,
      "loss": 0.0057,
      "step": 2968080
    },
    {
      "epoch": 4.857360748348749,
      "grad_norm": 0.14290477335453033,
      "learning_rate": 2.871582665076468e-07,
      "loss": 0.0069,
      "step": 2968100
    },
    {
      "epoch": 4.857393478787403,
      "grad_norm": 0.1622423529624939,
      "learning_rate": 2.870923742941297e-07,
      "loss": 0.0092,
      "step": 2968120
    },
    {
      "epoch": 4.857426209226056,
      "grad_norm": 0.2973186671733856,
      "learning_rate": 2.8702648208061254e-07,
      "loss": 0.0075,
      "step": 2968140
    },
    {
      "epoch": 4.85745893966471,
      "grad_norm": 0.04701034724712372,
      "learning_rate": 2.8696058986709544e-07,
      "loss": 0.0131,
      "step": 2968160
    },
    {
      "epoch": 4.857491670103363,
      "grad_norm": 0.06189524754881859,
      "learning_rate": 2.868946976535783e-07,
      "loss": 0.0081,
      "step": 2968180
    },
    {
      "epoch": 4.857524400542016,
      "grad_norm": 0.09679681807756424,
      "learning_rate": 2.868288054400612e-07,
      "loss": 0.0065,
      "step": 2968200
    },
    {
      "epoch": 4.85755713098067,
      "grad_norm": 0.17581802606582642,
      "learning_rate": 2.8676291322654403e-07,
      "loss": 0.0125,
      "step": 2968220
    },
    {
      "epoch": 4.8575898614193225,
      "grad_norm": 0.11417096108198166,
      "learning_rate": 2.8669702101302693e-07,
      "loss": 0.0087,
      "step": 2968240
    },
    {
      "epoch": 4.857622591857976,
      "grad_norm": 0.28688380122184753,
      "learning_rate": 2.866311287995098e-07,
      "loss": 0.0075,
      "step": 2968260
    },
    {
      "epoch": 4.85765532229663,
      "grad_norm": 0.2640077471733093,
      "learning_rate": 2.865652365859926e-07,
      "loss": 0.0079,
      "step": 2968280
    },
    {
      "epoch": 4.857688052735282,
      "grad_norm": 0.1655002385377884,
      "learning_rate": 2.864993443724755e-07,
      "loss": 0.0154,
      "step": 2968300
    },
    {
      "epoch": 4.857720783173936,
      "grad_norm": 0.1760914921760559,
      "learning_rate": 2.8643345215895837e-07,
      "loss": 0.008,
      "step": 2968320
    },
    {
      "epoch": 4.85775351361259,
      "grad_norm": 0.08670014142990112,
      "learning_rate": 2.8636755994544126e-07,
      "loss": 0.0102,
      "step": 2968340
    },
    {
      "epoch": 4.857786244051243,
      "grad_norm": 0.06918036937713623,
      "learning_rate": 2.8630166773192416e-07,
      "loss": 0.0087,
      "step": 2968360
    },
    {
      "epoch": 4.857818974489896,
      "grad_norm": 0.08287307620048523,
      "learning_rate": 2.86235775518407e-07,
      "loss": 0.0078,
      "step": 2968380
    },
    {
      "epoch": 4.8578517049285495,
      "grad_norm": 0.3143309950828552,
      "learning_rate": 2.861698833048899e-07,
      "loss": 0.0099,
      "step": 2968400
    },
    {
      "epoch": 4.857884435367203,
      "grad_norm": 0.11471416056156158,
      "learning_rate": 2.8610399109137275e-07,
      "loss": 0.0112,
      "step": 2968420
    },
    {
      "epoch": 4.857917165805856,
      "grad_norm": 0.2648417353630066,
      "learning_rate": 2.8603809887785565e-07,
      "loss": 0.0084,
      "step": 2968440
    },
    {
      "epoch": 4.857949896244509,
      "grad_norm": 0.3471396565437317,
      "learning_rate": 2.859722066643385e-07,
      "loss": 0.0097,
      "step": 2968460
    },
    {
      "epoch": 4.857982626683163,
      "grad_norm": 0.11012496799230576,
      "learning_rate": 2.859063144508214e-07,
      "loss": 0.0108,
      "step": 2968480
    },
    {
      "epoch": 4.8580153571218165,
      "grad_norm": 0.20026877522468567,
      "learning_rate": 2.8584042223730424e-07,
      "loss": 0.0094,
      "step": 2968500
    },
    {
      "epoch": 4.858048087560469,
      "grad_norm": 0.06378895789384842,
      "learning_rate": 2.857745300237871e-07,
      "loss": 0.0121,
      "step": 2968520
    },
    {
      "epoch": 4.858080817999123,
      "grad_norm": 0.44485870003700256,
      "learning_rate": 2.8570863781027e-07,
      "loss": 0.0095,
      "step": 2968540
    },
    {
      "epoch": 4.858113548437776,
      "grad_norm": 0.28295350074768066,
      "learning_rate": 2.8564274559675283e-07,
      "loss": 0.0088,
      "step": 2968560
    },
    {
      "epoch": 4.858146278876429,
      "grad_norm": 0.4119013845920563,
      "learning_rate": 2.8557685338323573e-07,
      "loss": 0.0106,
      "step": 2968580
    },
    {
      "epoch": 4.858179009315083,
      "grad_norm": 0.34066733717918396,
      "learning_rate": 2.855109611697186e-07,
      "loss": 0.0127,
      "step": 2968600
    },
    {
      "epoch": 4.858211739753736,
      "grad_norm": 0.12709441781044006,
      "learning_rate": 2.8544506895620147e-07,
      "loss": 0.0094,
      "step": 2968620
    },
    {
      "epoch": 4.858244470192389,
      "grad_norm": 0.7196319103240967,
      "learning_rate": 2.8537917674268437e-07,
      "loss": 0.0104,
      "step": 2968640
    },
    {
      "epoch": 4.858277200631043,
      "grad_norm": 0.12961581349372864,
      "learning_rate": 2.853132845291672e-07,
      "loss": 0.0047,
      "step": 2968660
    },
    {
      "epoch": 4.858309931069696,
      "grad_norm": 0.2559824585914612,
      "learning_rate": 2.852473923156501e-07,
      "loss": 0.0085,
      "step": 2968680
    },
    {
      "epoch": 4.85834266150835,
      "grad_norm": 0.0601518452167511,
      "learning_rate": 2.8518150010213296e-07,
      "loss": 0.0124,
      "step": 2968700
    },
    {
      "epoch": 4.8583753919470025,
      "grad_norm": 0.5222382545471191,
      "learning_rate": 2.851156078886158e-07,
      "loss": 0.009,
      "step": 2968720
    },
    {
      "epoch": 4.858408122385656,
      "grad_norm": 0.4867263734340668,
      "learning_rate": 2.850497156750987e-07,
      "loss": 0.0078,
      "step": 2968740
    },
    {
      "epoch": 4.85844085282431,
      "grad_norm": 0.25210416316986084,
      "learning_rate": 2.8498382346158155e-07,
      "loss": 0.007,
      "step": 2968760
    },
    {
      "epoch": 4.858473583262963,
      "grad_norm": 0.3123336434364319,
      "learning_rate": 2.8491793124806445e-07,
      "loss": 0.008,
      "step": 2968780
    },
    {
      "epoch": 4.858506313701616,
      "grad_norm": 0.11602932214736938,
      "learning_rate": 2.848520390345473e-07,
      "loss": 0.0079,
      "step": 2968800
    },
    {
      "epoch": 4.85853904414027,
      "grad_norm": 0.29331740736961365,
      "learning_rate": 2.847861468210302e-07,
      "loss": 0.0125,
      "step": 2968820
    },
    {
      "epoch": 4.858571774578923,
      "grad_norm": 0.6066727638244629,
      "learning_rate": 2.8472025460751304e-07,
      "loss": 0.0109,
      "step": 2968840
    },
    {
      "epoch": 4.858604505017576,
      "grad_norm": 0.18558111786842346,
      "learning_rate": 2.8465436239399594e-07,
      "loss": 0.0084,
      "step": 2968860
    },
    {
      "epoch": 4.8586372354562295,
      "grad_norm": 0.03992561250925064,
      "learning_rate": 2.845884701804788e-07,
      "loss": 0.0118,
      "step": 2968880
    },
    {
      "epoch": 4.858669965894883,
      "grad_norm": 0.10476086288690567,
      "learning_rate": 2.845225779669617e-07,
      "loss": 0.012,
      "step": 2968900
    },
    {
      "epoch": 4.858702696333536,
      "grad_norm": 0.06885475665330887,
      "learning_rate": 2.8445668575344453e-07,
      "loss": 0.0091,
      "step": 2968920
    },
    {
      "epoch": 4.858735426772189,
      "grad_norm": 0.3173058032989502,
      "learning_rate": 2.843907935399274e-07,
      "loss": 0.007,
      "step": 2968940
    },
    {
      "epoch": 4.858768157210843,
      "grad_norm": 0.16519306600093842,
      "learning_rate": 2.8432490132641027e-07,
      "loss": 0.0058,
      "step": 2968960
    },
    {
      "epoch": 4.858800887649497,
      "grad_norm": 0.27957379817962646,
      "learning_rate": 2.842590091128931e-07,
      "loss": 0.008,
      "step": 2968980
    },
    {
      "epoch": 4.858833618088149,
      "grad_norm": 0.2564348578453064,
      "learning_rate": 2.84193116899376e-07,
      "loss": 0.0103,
      "step": 2969000
    },
    {
      "epoch": 4.858866348526803,
      "grad_norm": 0.11684417724609375,
      "learning_rate": 2.841272246858589e-07,
      "loss": 0.0106,
      "step": 2969020
    },
    {
      "epoch": 4.8588990789654565,
      "grad_norm": 0.2678757309913635,
      "learning_rate": 2.8406133247234176e-07,
      "loss": 0.0067,
      "step": 2969040
    },
    {
      "epoch": 4.85893180940411,
      "grad_norm": 0.1271389126777649,
      "learning_rate": 2.8399544025882466e-07,
      "loss": 0.0099,
      "step": 2969060
    },
    {
      "epoch": 4.858964539842763,
      "grad_norm": 0.0575842559337616,
      "learning_rate": 2.839295480453075e-07,
      "loss": 0.0103,
      "step": 2969080
    },
    {
      "epoch": 4.858997270281416,
      "grad_norm": 0.12540285289287567,
      "learning_rate": 2.838636558317904e-07,
      "loss": 0.0117,
      "step": 2969100
    },
    {
      "epoch": 4.85903000072007,
      "grad_norm": 0.17065054178237915,
      "learning_rate": 2.8379776361827325e-07,
      "loss": 0.0099,
      "step": 2969120
    },
    {
      "epoch": 4.859062731158723,
      "grad_norm": 0.11257701367139816,
      "learning_rate": 2.8373187140475615e-07,
      "loss": 0.0084,
      "step": 2969140
    },
    {
      "epoch": 4.859095461597376,
      "grad_norm": 0.1572791188955307,
      "learning_rate": 2.83665979191239e-07,
      "loss": 0.013,
      "step": 2969160
    },
    {
      "epoch": 4.85912819203603,
      "grad_norm": 0.38349196314811707,
      "learning_rate": 2.8360008697772184e-07,
      "loss": 0.0117,
      "step": 2969180
    },
    {
      "epoch": 4.859160922474683,
      "grad_norm": 0.37047097086906433,
      "learning_rate": 2.8353419476420474e-07,
      "loss": 0.0126,
      "step": 2969200
    },
    {
      "epoch": 4.859193652913336,
      "grad_norm": 0.5056372880935669,
      "learning_rate": 2.834683025506876e-07,
      "loss": 0.0084,
      "step": 2969220
    },
    {
      "epoch": 4.85922638335199,
      "grad_norm": 0.21816864609718323,
      "learning_rate": 2.834024103371705e-07,
      "loss": 0.0061,
      "step": 2969240
    },
    {
      "epoch": 4.859259113790643,
      "grad_norm": 0.2023480236530304,
      "learning_rate": 2.8333651812365333e-07,
      "loss": 0.0079,
      "step": 2969260
    },
    {
      "epoch": 4.859291844229296,
      "grad_norm": 0.14884956181049347,
      "learning_rate": 2.832706259101362e-07,
      "loss": 0.0094,
      "step": 2969280
    },
    {
      "epoch": 4.85932457466795,
      "grad_norm": 0.12852971255779266,
      "learning_rate": 2.832047336966191e-07,
      "loss": 0.0066,
      "step": 2969300
    },
    {
      "epoch": 4.859357305106603,
      "grad_norm": 0.21987780928611755,
      "learning_rate": 2.8313884148310197e-07,
      "loss": 0.0088,
      "step": 2969320
    },
    {
      "epoch": 4.859390035545257,
      "grad_norm": 0.21156223118305206,
      "learning_rate": 2.8307294926958487e-07,
      "loss": 0.0076,
      "step": 2969340
    },
    {
      "epoch": 4.8594227659839095,
      "grad_norm": 0.28817465901374817,
      "learning_rate": 2.830070570560677e-07,
      "loss": 0.008,
      "step": 2969360
    },
    {
      "epoch": 4.859455496422563,
      "grad_norm": 0.17063912749290466,
      "learning_rate": 2.829411648425506e-07,
      "loss": 0.0122,
      "step": 2969380
    },
    {
      "epoch": 4.859488226861217,
      "grad_norm": 0.44260573387145996,
      "learning_rate": 2.8287527262903346e-07,
      "loss": 0.0118,
      "step": 2969400
    },
    {
      "epoch": 4.859520957299869,
      "grad_norm": 0.07540460675954819,
      "learning_rate": 2.828093804155163e-07,
      "loss": 0.0084,
      "step": 2969420
    },
    {
      "epoch": 4.859553687738523,
      "grad_norm": 0.15393085777759552,
      "learning_rate": 2.827434882019992e-07,
      "loss": 0.0074,
      "step": 2969440
    },
    {
      "epoch": 4.859586418177177,
      "grad_norm": 0.18200959265232086,
      "learning_rate": 2.8267759598848205e-07,
      "loss": 0.0073,
      "step": 2969460
    },
    {
      "epoch": 4.859619148615829,
      "grad_norm": 0.17498019337654114,
      "learning_rate": 2.8261170377496495e-07,
      "loss": 0.0057,
      "step": 2969480
    },
    {
      "epoch": 4.859651879054483,
      "grad_norm": 0.16041646897792816,
      "learning_rate": 2.825458115614478e-07,
      "loss": 0.0104,
      "step": 2969500
    },
    {
      "epoch": 4.8596846094931365,
      "grad_norm": 0.12602491676807404,
      "learning_rate": 2.824799193479307e-07,
      "loss": 0.0086,
      "step": 2969520
    },
    {
      "epoch": 4.85971733993179,
      "grad_norm": 0.2797148823738098,
      "learning_rate": 2.8241402713441354e-07,
      "loss": 0.0086,
      "step": 2969540
    },
    {
      "epoch": 4.859750070370443,
      "grad_norm": 0.7292520403862,
      "learning_rate": 2.8234813492089643e-07,
      "loss": 0.0145,
      "step": 2969560
    },
    {
      "epoch": 4.859782800809096,
      "grad_norm": 0.3041531443595886,
      "learning_rate": 2.822822427073793e-07,
      "loss": 0.0104,
      "step": 2969580
    },
    {
      "epoch": 4.85981553124775,
      "grad_norm": 0.3960912525653839,
      "learning_rate": 2.822163504938622e-07,
      "loss": 0.013,
      "step": 2969600
    },
    {
      "epoch": 4.859848261686404,
      "grad_norm": 0.7055997252464294,
      "learning_rate": 2.82150458280345e-07,
      "loss": 0.0093,
      "step": 2969620
    },
    {
      "epoch": 4.859880992125056,
      "grad_norm": 0.2022118717432022,
      "learning_rate": 2.8208456606682787e-07,
      "loss": 0.0087,
      "step": 2969640
    },
    {
      "epoch": 4.85991372256371,
      "grad_norm": 0.1341456025838852,
      "learning_rate": 2.8201867385331077e-07,
      "loss": 0.009,
      "step": 2969660
    },
    {
      "epoch": 4.8599464530023635,
      "grad_norm": 0.06643863767385483,
      "learning_rate": 2.8195278163979367e-07,
      "loss": 0.0101,
      "step": 2969680
    },
    {
      "epoch": 4.859979183441016,
      "grad_norm": 0.17362169921398163,
      "learning_rate": 2.818868894262765e-07,
      "loss": 0.0084,
      "step": 2969700
    },
    {
      "epoch": 4.86001191387967,
      "grad_norm": 0.5911511778831482,
      "learning_rate": 2.818209972127594e-07,
      "loss": 0.0103,
      "step": 2969720
    },
    {
      "epoch": 4.860044644318323,
      "grad_norm": 0.2367183119058609,
      "learning_rate": 2.8175510499924226e-07,
      "loss": 0.0055,
      "step": 2969740
    },
    {
      "epoch": 4.860077374756976,
      "grad_norm": 0.28270652890205383,
      "learning_rate": 2.8168921278572516e-07,
      "loss": 0.0075,
      "step": 2969760
    },
    {
      "epoch": 4.86011010519563,
      "grad_norm": 0.23090602457523346,
      "learning_rate": 2.81623320572208e-07,
      "loss": 0.0115,
      "step": 2969780
    },
    {
      "epoch": 4.860142835634283,
      "grad_norm": 0.24367278814315796,
      "learning_rate": 2.815574283586909e-07,
      "loss": 0.0107,
      "step": 2969800
    },
    {
      "epoch": 4.860175566072937,
      "grad_norm": 0.13842125236988068,
      "learning_rate": 2.8149153614517375e-07,
      "loss": 0.0104,
      "step": 2969820
    },
    {
      "epoch": 4.86020829651159,
      "grad_norm": 0.3006432056427002,
      "learning_rate": 2.814256439316566e-07,
      "loss": 0.0072,
      "step": 2969840
    },
    {
      "epoch": 4.860241026950243,
      "grad_norm": 0.14647965133190155,
      "learning_rate": 2.813597517181395e-07,
      "loss": 0.0097,
      "step": 2969860
    },
    {
      "epoch": 4.860273757388897,
      "grad_norm": 0.47294241189956665,
      "learning_rate": 2.8129385950462234e-07,
      "loss": 0.0072,
      "step": 2969880
    },
    {
      "epoch": 4.8603064878275495,
      "grad_norm": 0.26666855812072754,
      "learning_rate": 2.8122796729110523e-07,
      "loss": 0.0125,
      "step": 2969900
    },
    {
      "epoch": 4.860339218266203,
      "grad_norm": 0.11927377432584763,
      "learning_rate": 2.811620750775881e-07,
      "loss": 0.0055,
      "step": 2969920
    },
    {
      "epoch": 4.860371948704857,
      "grad_norm": 0.23627662658691406,
      "learning_rate": 2.81096182864071e-07,
      "loss": 0.0078,
      "step": 2969940
    },
    {
      "epoch": 4.86040467914351,
      "grad_norm": 0.16881144046783447,
      "learning_rate": 2.810302906505539e-07,
      "loss": 0.0077,
      "step": 2969960
    },
    {
      "epoch": 4.860437409582163,
      "grad_norm": 0.19185563921928406,
      "learning_rate": 2.809643984370367e-07,
      "loss": 0.0113,
      "step": 2969980
    },
    {
      "epoch": 4.860470140020817,
      "grad_norm": 0.15707963705062866,
      "learning_rate": 2.808985062235196e-07,
      "loss": 0.0139,
      "step": 2970000
    },
    {
      "epoch": 4.86050287045947,
      "grad_norm": 0.2573550045490265,
      "learning_rate": 2.8083261401000247e-07,
      "loss": 0.0061,
      "step": 2970020
    },
    {
      "epoch": 4.860535600898123,
      "grad_norm": 0.1627444624900818,
      "learning_rate": 2.8076672179648537e-07,
      "loss": 0.0065,
      "step": 2970040
    },
    {
      "epoch": 4.8605683313367765,
      "grad_norm": 0.08620460331439972,
      "learning_rate": 2.807008295829682e-07,
      "loss": 0.0093,
      "step": 2970060
    },
    {
      "epoch": 4.86060106177543,
      "grad_norm": 0.3323158621788025,
      "learning_rate": 2.8063493736945106e-07,
      "loss": 0.0071,
      "step": 2970080
    },
    {
      "epoch": 4.860633792214083,
      "grad_norm": 0.22113017737865448,
      "learning_rate": 2.8056904515593396e-07,
      "loss": 0.0079,
      "step": 2970100
    },
    {
      "epoch": 4.860666522652736,
      "grad_norm": 0.059641990810632706,
      "learning_rate": 2.805031529424168e-07,
      "loss": 0.0076,
      "step": 2970120
    },
    {
      "epoch": 4.86069925309139,
      "grad_norm": 0.2305717170238495,
      "learning_rate": 2.804372607288997e-07,
      "loss": 0.0097,
      "step": 2970140
    },
    {
      "epoch": 4.8607319835300435,
      "grad_norm": 0.20717668533325195,
      "learning_rate": 2.8037136851538254e-07,
      "loss": 0.0059,
      "step": 2970160
    },
    {
      "epoch": 4.860764713968696,
      "grad_norm": 0.15103161334991455,
      "learning_rate": 2.8030547630186544e-07,
      "loss": 0.0077,
      "step": 2970180
    },
    {
      "epoch": 4.86079744440735,
      "grad_norm": 0.5624228715896606,
      "learning_rate": 2.802395840883483e-07,
      "loss": 0.01,
      "step": 2970200
    },
    {
      "epoch": 4.860830174846003,
      "grad_norm": 0.2819860577583313,
      "learning_rate": 2.801736918748312e-07,
      "loss": 0.01,
      "step": 2970220
    },
    {
      "epoch": 4.860862905284657,
      "grad_norm": 0.2554054260253906,
      "learning_rate": 2.8010779966131403e-07,
      "loss": 0.0103,
      "step": 2970240
    },
    {
      "epoch": 4.86089563572331,
      "grad_norm": 0.16250881552696228,
      "learning_rate": 2.8004190744779693e-07,
      "loss": 0.0115,
      "step": 2970260
    },
    {
      "epoch": 4.860928366161963,
      "grad_norm": 0.17335250973701477,
      "learning_rate": 2.799760152342798e-07,
      "loss": 0.0088,
      "step": 2970280
    },
    {
      "epoch": 4.860961096600617,
      "grad_norm": 0.11846641451120377,
      "learning_rate": 2.799101230207626e-07,
      "loss": 0.0094,
      "step": 2970300
    },
    {
      "epoch": 4.86099382703927,
      "grad_norm": 0.2792820930480957,
      "learning_rate": 2.798442308072455e-07,
      "loss": 0.0102,
      "step": 2970320
    },
    {
      "epoch": 4.861026557477923,
      "grad_norm": 0.8489162921905518,
      "learning_rate": 2.797783385937284e-07,
      "loss": 0.0107,
      "step": 2970340
    },
    {
      "epoch": 4.861059287916577,
      "grad_norm": 0.44188591837882996,
      "learning_rate": 2.7971244638021127e-07,
      "loss": 0.0078,
      "step": 2970360
    },
    {
      "epoch": 4.8610920183552295,
      "grad_norm": 0.23840031027793884,
      "learning_rate": 2.7964655416669416e-07,
      "loss": 0.0073,
      "step": 2970380
    },
    {
      "epoch": 4.861124748793883,
      "grad_norm": 0.3177501857280731,
      "learning_rate": 2.79580661953177e-07,
      "loss": 0.0067,
      "step": 2970400
    },
    {
      "epoch": 4.861157479232537,
      "grad_norm": 0.12350919097661972,
      "learning_rate": 2.795147697396599e-07,
      "loss": 0.0065,
      "step": 2970420
    },
    {
      "epoch": 4.86119020967119,
      "grad_norm": 0.283340185880661,
      "learning_rate": 2.7944887752614275e-07,
      "loss": 0.0153,
      "step": 2970440
    },
    {
      "epoch": 4.861222940109843,
      "grad_norm": 0.1377834528684616,
      "learning_rate": 2.7938298531262565e-07,
      "loss": 0.009,
      "step": 2970460
    },
    {
      "epoch": 4.861255670548497,
      "grad_norm": 0.1558442860841751,
      "learning_rate": 2.793170930991085e-07,
      "loss": 0.0073,
      "step": 2970480
    },
    {
      "epoch": 4.86128840098715,
      "grad_norm": 0.15792372822761536,
      "learning_rate": 2.7925120088559134e-07,
      "loss": 0.0107,
      "step": 2970500
    },
    {
      "epoch": 4.861321131425804,
      "grad_norm": 0.5754217505455017,
      "learning_rate": 2.7918530867207424e-07,
      "loss": 0.0068,
      "step": 2970520
    },
    {
      "epoch": 4.8613538618644565,
      "grad_norm": 0.11847210675477982,
      "learning_rate": 2.791194164585571e-07,
      "loss": 0.012,
      "step": 2970540
    },
    {
      "epoch": 4.86138659230311,
      "grad_norm": 0.05835137143731117,
      "learning_rate": 2.7905352424504e-07,
      "loss": 0.0099,
      "step": 2970560
    },
    {
      "epoch": 4.861419322741764,
      "grad_norm": 0.4236838221549988,
      "learning_rate": 2.7898763203152283e-07,
      "loss": 0.0085,
      "step": 2970580
    },
    {
      "epoch": 4.861452053180416,
      "grad_norm": 0.2221245914697647,
      "learning_rate": 2.7892173981800573e-07,
      "loss": 0.0134,
      "step": 2970600
    },
    {
      "epoch": 4.86148478361907,
      "grad_norm": 0.21284431219100952,
      "learning_rate": 2.7885584760448863e-07,
      "loss": 0.0099,
      "step": 2970620
    },
    {
      "epoch": 4.861517514057724,
      "grad_norm": 0.5215046405792236,
      "learning_rate": 2.787899553909715e-07,
      "loss": 0.0104,
      "step": 2970640
    },
    {
      "epoch": 4.861550244496376,
      "grad_norm": 0.24360539019107819,
      "learning_rate": 2.787240631774544e-07,
      "loss": 0.007,
      "step": 2970660
    },
    {
      "epoch": 4.86158297493503,
      "grad_norm": 0.2739470601081848,
      "learning_rate": 2.786581709639372e-07,
      "loss": 0.012,
      "step": 2970680
    },
    {
      "epoch": 4.8616157053736835,
      "grad_norm": 0.5006498694419861,
      "learning_rate": 2.785922787504201e-07,
      "loss": 0.0144,
      "step": 2970700
    },
    {
      "epoch": 4.861648435812337,
      "grad_norm": 0.19696089625358582,
      "learning_rate": 2.7852638653690296e-07,
      "loss": 0.0061,
      "step": 2970720
    },
    {
      "epoch": 4.86168116625099,
      "grad_norm": 0.29387718439102173,
      "learning_rate": 2.784604943233858e-07,
      "loss": 0.0101,
      "step": 2970740
    },
    {
      "epoch": 4.861713896689643,
      "grad_norm": 0.19968263804912567,
      "learning_rate": 2.783946021098687e-07,
      "loss": 0.0082,
      "step": 2970760
    },
    {
      "epoch": 4.861746627128297,
      "grad_norm": 0.14934176206588745,
      "learning_rate": 2.7832870989635155e-07,
      "loss": 0.0086,
      "step": 2970780
    },
    {
      "epoch": 4.861779357566951,
      "grad_norm": 0.35289663076400757,
      "learning_rate": 2.7826281768283445e-07,
      "loss": 0.0113,
      "step": 2970800
    },
    {
      "epoch": 4.861812088005603,
      "grad_norm": 0.10983811318874359,
      "learning_rate": 2.781969254693173e-07,
      "loss": 0.0087,
      "step": 2970820
    },
    {
      "epoch": 4.861844818444257,
      "grad_norm": 0.23839449882507324,
      "learning_rate": 2.781310332558002e-07,
      "loss": 0.0077,
      "step": 2970840
    },
    {
      "epoch": 4.8618775488829105,
      "grad_norm": 0.2572537660598755,
      "learning_rate": 2.7806514104228304e-07,
      "loss": 0.0086,
      "step": 2970860
    },
    {
      "epoch": 4.861910279321563,
      "grad_norm": 0.14995160698890686,
      "learning_rate": 2.7799924882876594e-07,
      "loss": 0.0127,
      "step": 2970880
    },
    {
      "epoch": 4.861943009760217,
      "grad_norm": 0.16474799811840057,
      "learning_rate": 2.779333566152488e-07,
      "loss": 0.0074,
      "step": 2970900
    },
    {
      "epoch": 4.86197574019887,
      "grad_norm": 0.024498214945197105,
      "learning_rate": 2.778674644017317e-07,
      "loss": 0.0094,
      "step": 2970920
    },
    {
      "epoch": 4.862008470637523,
      "grad_norm": 0.19970783591270447,
      "learning_rate": 2.7780157218821453e-07,
      "loss": 0.0093,
      "step": 2970940
    },
    {
      "epoch": 4.862041201076177,
      "grad_norm": 0.5142446756362915,
      "learning_rate": 2.777356799746974e-07,
      "loss": 0.0105,
      "step": 2970960
    },
    {
      "epoch": 4.86207393151483,
      "grad_norm": 0.17894308269023895,
      "learning_rate": 2.776697877611803e-07,
      "loss": 0.0098,
      "step": 2970980
    },
    {
      "epoch": 4.862106661953484,
      "grad_norm": 0.11967950314283371,
      "learning_rate": 2.7760389554766317e-07,
      "loss": 0.0099,
      "step": 2971000
    },
    {
      "epoch": 4.8621393923921365,
      "grad_norm": 0.5106878876686096,
      "learning_rate": 2.77538003334146e-07,
      "loss": 0.0125,
      "step": 2971020
    },
    {
      "epoch": 4.86217212283079,
      "grad_norm": 0.2756935656070709,
      "learning_rate": 2.774721111206289e-07,
      "loss": 0.0068,
      "step": 2971040
    },
    {
      "epoch": 4.862204853269444,
      "grad_norm": 0.1396588534116745,
      "learning_rate": 2.7740621890711176e-07,
      "loss": 0.0112,
      "step": 2971060
    },
    {
      "epoch": 4.862237583708097,
      "grad_norm": 0.22184091806411743,
      "learning_rate": 2.7734032669359466e-07,
      "loss": 0.0076,
      "step": 2971080
    },
    {
      "epoch": 4.86227031414675,
      "grad_norm": 0.14078402519226074,
      "learning_rate": 2.772744344800775e-07,
      "loss": 0.0081,
      "step": 2971100
    },
    {
      "epoch": 4.862303044585404,
      "grad_norm": 0.15767799317836761,
      "learning_rate": 2.772085422665604e-07,
      "loss": 0.0065,
      "step": 2971120
    },
    {
      "epoch": 4.862335775024057,
      "grad_norm": 0.17423804104328156,
      "learning_rate": 2.7714265005304325e-07,
      "loss": 0.0097,
      "step": 2971140
    },
    {
      "epoch": 4.86236850546271,
      "grad_norm": 0.3664477467536926,
      "learning_rate": 2.770767578395261e-07,
      "loss": 0.0081,
      "step": 2971160
    },
    {
      "epoch": 4.8624012359013635,
      "grad_norm": 0.496629536151886,
      "learning_rate": 2.77010865626009e-07,
      "loss": 0.0129,
      "step": 2971180
    },
    {
      "epoch": 4.862433966340017,
      "grad_norm": 0.10102303326129913,
      "learning_rate": 2.7694497341249184e-07,
      "loss": 0.0082,
      "step": 2971200
    },
    {
      "epoch": 4.86246669677867,
      "grad_norm": 0.43859612941741943,
      "learning_rate": 2.7687908119897474e-07,
      "loss": 0.0076,
      "step": 2971220
    },
    {
      "epoch": 4.862499427217323,
      "grad_norm": 0.10406498610973358,
      "learning_rate": 2.768131889854576e-07,
      "loss": 0.0069,
      "step": 2971240
    },
    {
      "epoch": 4.862532157655977,
      "grad_norm": 0.10925368964672089,
      "learning_rate": 2.767472967719405e-07,
      "loss": 0.0113,
      "step": 2971260
    },
    {
      "epoch": 4.862564888094631,
      "grad_norm": 0.20798610150814056,
      "learning_rate": 2.766814045584234e-07,
      "loss": 0.0111,
      "step": 2971280
    },
    {
      "epoch": 4.862597618533283,
      "grad_norm": 0.3514702022075653,
      "learning_rate": 2.7661551234490623e-07,
      "loss": 0.0086,
      "step": 2971300
    },
    {
      "epoch": 4.862630348971937,
      "grad_norm": 0.3480203449726105,
      "learning_rate": 2.765496201313891e-07,
      "loss": 0.0103,
      "step": 2971320
    },
    {
      "epoch": 4.8626630794105905,
      "grad_norm": 0.26469671726226807,
      "learning_rate": 2.7648372791787197e-07,
      "loss": 0.0063,
      "step": 2971340
    },
    {
      "epoch": 4.862695809849243,
      "grad_norm": 0.23787660896778107,
      "learning_rate": 2.7641783570435487e-07,
      "loss": 0.0066,
      "step": 2971360
    },
    {
      "epoch": 4.862728540287897,
      "grad_norm": 0.16670125722885132,
      "learning_rate": 2.763519434908377e-07,
      "loss": 0.0093,
      "step": 2971380
    },
    {
      "epoch": 4.86276127072655,
      "grad_norm": 0.12054618448019028,
      "learning_rate": 2.7628605127732056e-07,
      "loss": 0.0113,
      "step": 2971400
    },
    {
      "epoch": 4.862794001165204,
      "grad_norm": 0.17899668216705322,
      "learning_rate": 2.7622015906380346e-07,
      "loss": 0.0078,
      "step": 2971420
    },
    {
      "epoch": 4.862826731603857,
      "grad_norm": 0.0967985987663269,
      "learning_rate": 2.761542668502863e-07,
      "loss": 0.0074,
      "step": 2971440
    },
    {
      "epoch": 4.86285946204251,
      "grad_norm": 0.1878926157951355,
      "learning_rate": 2.760883746367692e-07,
      "loss": 0.0075,
      "step": 2971460
    },
    {
      "epoch": 4.862892192481164,
      "grad_norm": 0.2882152199745178,
      "learning_rate": 2.7602248242325205e-07,
      "loss": 0.008,
      "step": 2971480
    },
    {
      "epoch": 4.862924922919817,
      "grad_norm": 0.1139766126871109,
      "learning_rate": 2.7595659020973495e-07,
      "loss": 0.0083,
      "step": 2971500
    },
    {
      "epoch": 4.86295765335847,
      "grad_norm": 0.18133863806724548,
      "learning_rate": 2.758906979962178e-07,
      "loss": 0.0093,
      "step": 2971520
    },
    {
      "epoch": 4.862990383797124,
      "grad_norm": 0.16069065034389496,
      "learning_rate": 2.758248057827007e-07,
      "loss": 0.0072,
      "step": 2971540
    },
    {
      "epoch": 4.8630231142357765,
      "grad_norm": 0.3051408529281616,
      "learning_rate": 2.7575891356918354e-07,
      "loss": 0.0102,
      "step": 2971560
    },
    {
      "epoch": 4.86305584467443,
      "grad_norm": 0.1926138997077942,
      "learning_rate": 2.7569302135566644e-07,
      "loss": 0.0089,
      "step": 2971580
    },
    {
      "epoch": 4.863088575113084,
      "grad_norm": 0.15545961260795593,
      "learning_rate": 2.756271291421493e-07,
      "loss": 0.0073,
      "step": 2971600
    },
    {
      "epoch": 4.863121305551737,
      "grad_norm": 0.0844564288854599,
      "learning_rate": 2.7556123692863213e-07,
      "loss": 0.0076,
      "step": 2971620
    },
    {
      "epoch": 4.86315403599039,
      "grad_norm": 0.09552435576915741,
      "learning_rate": 2.7549534471511503e-07,
      "loss": 0.0096,
      "step": 2971640
    },
    {
      "epoch": 4.863186766429044,
      "grad_norm": 0.19405195116996765,
      "learning_rate": 2.754294525015979e-07,
      "loss": 0.0066,
      "step": 2971660
    },
    {
      "epoch": 4.863219496867697,
      "grad_norm": 0.15196353197097778,
      "learning_rate": 2.7536356028808077e-07,
      "loss": 0.0111,
      "step": 2971680
    },
    {
      "epoch": 4.863252227306351,
      "grad_norm": 0.2782425880432129,
      "learning_rate": 2.7529766807456367e-07,
      "loss": 0.0079,
      "step": 2971700
    },
    {
      "epoch": 4.8632849577450035,
      "grad_norm": 0.545411229133606,
      "learning_rate": 2.752317758610465e-07,
      "loss": 0.0109,
      "step": 2971720
    },
    {
      "epoch": 4.863317688183657,
      "grad_norm": 0.10175842046737671,
      "learning_rate": 2.751658836475294e-07,
      "loss": 0.0084,
      "step": 2971740
    },
    {
      "epoch": 4.863350418622311,
      "grad_norm": 0.653856635093689,
      "learning_rate": 2.7509999143401226e-07,
      "loss": 0.0088,
      "step": 2971760
    },
    {
      "epoch": 4.863383149060963,
      "grad_norm": 0.2097720354795456,
      "learning_rate": 2.7503409922049516e-07,
      "loss": 0.0115,
      "step": 2971780
    },
    {
      "epoch": 4.863415879499617,
      "grad_norm": 0.3105829060077667,
      "learning_rate": 2.74968207006978e-07,
      "loss": 0.0087,
      "step": 2971800
    },
    {
      "epoch": 4.8634486099382706,
      "grad_norm": 0.11228615045547485,
      "learning_rate": 2.7490231479346085e-07,
      "loss": 0.0065,
      "step": 2971820
    },
    {
      "epoch": 4.863481340376923,
      "grad_norm": 0.3580942153930664,
      "learning_rate": 2.7483642257994375e-07,
      "loss": 0.0083,
      "step": 2971840
    },
    {
      "epoch": 4.863514070815577,
      "grad_norm": 0.3198804259300232,
      "learning_rate": 2.747705303664266e-07,
      "loss": 0.0056,
      "step": 2971860
    },
    {
      "epoch": 4.8635468012542304,
      "grad_norm": 0.38337442278862,
      "learning_rate": 2.747046381529095e-07,
      "loss": 0.0084,
      "step": 2971880
    },
    {
      "epoch": 4.863579531692884,
      "grad_norm": 0.30604681372642517,
      "learning_rate": 2.7463874593939234e-07,
      "loss": 0.012,
      "step": 2971900
    },
    {
      "epoch": 4.863612262131537,
      "grad_norm": 0.1880607008934021,
      "learning_rate": 2.7457285372587524e-07,
      "loss": 0.0073,
      "step": 2971920
    },
    {
      "epoch": 4.86364499257019,
      "grad_norm": 0.300216406583786,
      "learning_rate": 2.7450696151235813e-07,
      "loss": 0.0094,
      "step": 2971940
    },
    {
      "epoch": 4.863677723008844,
      "grad_norm": 0.03937374800443649,
      "learning_rate": 2.74441069298841e-07,
      "loss": 0.0136,
      "step": 2971960
    },
    {
      "epoch": 4.8637104534474975,
      "grad_norm": 0.18077869713306427,
      "learning_rate": 2.743751770853239e-07,
      "loss": 0.0101,
      "step": 2971980
    },
    {
      "epoch": 4.86374318388615,
      "grad_norm": 0.3166465163230896,
      "learning_rate": 2.743092848718067e-07,
      "loss": 0.0127,
      "step": 2972000
    },
    {
      "epoch": 4.863775914324804,
      "grad_norm": 0.3798801302909851,
      "learning_rate": 2.742433926582896e-07,
      "loss": 0.012,
      "step": 2972020
    },
    {
      "epoch": 4.863808644763457,
      "grad_norm": 0.17791499197483063,
      "learning_rate": 2.7417750044477247e-07,
      "loss": 0.0064,
      "step": 2972040
    },
    {
      "epoch": 4.86384137520211,
      "grad_norm": 0.18031997978687286,
      "learning_rate": 2.741116082312553e-07,
      "loss": 0.0104,
      "step": 2972060
    },
    {
      "epoch": 4.863874105640764,
      "grad_norm": 0.027516597881913185,
      "learning_rate": 2.740457160177382e-07,
      "loss": 0.007,
      "step": 2972080
    },
    {
      "epoch": 4.863906836079417,
      "grad_norm": 0.19380664825439453,
      "learning_rate": 2.7397982380422106e-07,
      "loss": 0.0075,
      "step": 2972100
    },
    {
      "epoch": 4.86393956651807,
      "grad_norm": 0.14127692580223083,
      "learning_rate": 2.7391393159070396e-07,
      "loss": 0.0071,
      "step": 2972120
    },
    {
      "epoch": 4.863972296956724,
      "grad_norm": 0.1022561565041542,
      "learning_rate": 2.738480393771868e-07,
      "loss": 0.0059,
      "step": 2972140
    },
    {
      "epoch": 4.864005027395377,
      "grad_norm": 1.1210681200027466,
      "learning_rate": 2.737821471636697e-07,
      "loss": 0.0107,
      "step": 2972160
    },
    {
      "epoch": 4.864037757834031,
      "grad_norm": 0.15169480443000793,
      "learning_rate": 2.7371625495015255e-07,
      "loss": 0.0108,
      "step": 2972180
    },
    {
      "epoch": 4.8640704882726835,
      "grad_norm": 0.08604280650615692,
      "learning_rate": 2.7365036273663545e-07,
      "loss": 0.0083,
      "step": 2972200
    },
    {
      "epoch": 4.864103218711337,
      "grad_norm": 0.10439512878656387,
      "learning_rate": 2.735844705231183e-07,
      "loss": 0.0116,
      "step": 2972220
    },
    {
      "epoch": 4.864135949149991,
      "grad_norm": 1.27406644821167,
      "learning_rate": 2.735185783096012e-07,
      "loss": 0.0104,
      "step": 2972240
    },
    {
      "epoch": 4.864168679588644,
      "grad_norm": 0.9624457359313965,
      "learning_rate": 2.7345268609608404e-07,
      "loss": 0.0107,
      "step": 2972260
    },
    {
      "epoch": 4.864201410027297,
      "grad_norm": 0.34115245938301086,
      "learning_rate": 2.733867938825669e-07,
      "loss": 0.0074,
      "step": 2972280
    },
    {
      "epoch": 4.864234140465951,
      "grad_norm": 0.1764465868473053,
      "learning_rate": 2.733209016690498e-07,
      "loss": 0.0108,
      "step": 2972300
    },
    {
      "epoch": 4.864266870904604,
      "grad_norm": 0.17013391852378845,
      "learning_rate": 2.732550094555327e-07,
      "loss": 0.0074,
      "step": 2972320
    },
    {
      "epoch": 4.864299601343257,
      "grad_norm": 0.2998577952384949,
      "learning_rate": 2.731891172420155e-07,
      "loss": 0.0089,
      "step": 2972340
    },
    {
      "epoch": 4.8643323317819105,
      "grad_norm": 0.25215768814086914,
      "learning_rate": 2.731232250284984e-07,
      "loss": 0.006,
      "step": 2972360
    },
    {
      "epoch": 4.864365062220564,
      "grad_norm": 0.10512716323137283,
      "learning_rate": 2.7305733281498127e-07,
      "loss": 0.0148,
      "step": 2972380
    },
    {
      "epoch": 4.864397792659217,
      "grad_norm": 0.31861549615859985,
      "learning_rate": 2.7299144060146417e-07,
      "loss": 0.0058,
      "step": 2972400
    },
    {
      "epoch": 4.86443052309787,
      "grad_norm": 0.1097160130739212,
      "learning_rate": 2.72925548387947e-07,
      "loss": 0.0068,
      "step": 2972420
    },
    {
      "epoch": 4.864463253536524,
      "grad_norm": 0.24873706698417664,
      "learning_rate": 2.728596561744299e-07,
      "loss": 0.0192,
      "step": 2972440
    },
    {
      "epoch": 4.864495983975178,
      "grad_norm": 0.7859789729118347,
      "learning_rate": 2.7279376396091276e-07,
      "loss": 0.0145,
      "step": 2972460
    },
    {
      "epoch": 4.86452871441383,
      "grad_norm": 0.10741063952445984,
      "learning_rate": 2.727278717473956e-07,
      "loss": 0.0089,
      "step": 2972480
    },
    {
      "epoch": 4.864561444852484,
      "grad_norm": 0.0372655987739563,
      "learning_rate": 2.726619795338785e-07,
      "loss": 0.006,
      "step": 2972500
    },
    {
      "epoch": 4.8645941752911375,
      "grad_norm": 0.43271318078041077,
      "learning_rate": 2.7259608732036135e-07,
      "loss": 0.011,
      "step": 2972520
    },
    {
      "epoch": 4.864626905729791,
      "grad_norm": 0.22154510021209717,
      "learning_rate": 2.7253019510684424e-07,
      "loss": 0.0104,
      "step": 2972540
    },
    {
      "epoch": 4.864659636168444,
      "grad_norm": 0.2144664227962494,
      "learning_rate": 2.724643028933271e-07,
      "loss": 0.0068,
      "step": 2972560
    },
    {
      "epoch": 4.864692366607097,
      "grad_norm": 0.17697250843048096,
      "learning_rate": 2.7239841067981e-07,
      "loss": 0.0087,
      "step": 2972580
    },
    {
      "epoch": 4.864725097045751,
      "grad_norm": 0.46543580293655396,
      "learning_rate": 2.723325184662929e-07,
      "loss": 0.0084,
      "step": 2972600
    },
    {
      "epoch": 4.864757827484404,
      "grad_norm": 0.17102578282356262,
      "learning_rate": 2.7226662625277573e-07,
      "loss": 0.0096,
      "step": 2972620
    },
    {
      "epoch": 4.864790557923057,
      "grad_norm": 0.10118935257196426,
      "learning_rate": 2.7220073403925863e-07,
      "loss": 0.0091,
      "step": 2972640
    },
    {
      "epoch": 4.864823288361711,
      "grad_norm": 0.7996261119842529,
      "learning_rate": 2.721348418257415e-07,
      "loss": 0.0103,
      "step": 2972660
    },
    {
      "epoch": 4.864856018800364,
      "grad_norm": 0.754118025302887,
      "learning_rate": 2.720689496122244e-07,
      "loss": 0.0095,
      "step": 2972680
    },
    {
      "epoch": 4.864888749239017,
      "grad_norm": 0.27961182594299316,
      "learning_rate": 2.720030573987072e-07,
      "loss": 0.0189,
      "step": 2972700
    },
    {
      "epoch": 4.864921479677671,
      "grad_norm": 0.030742457136511803,
      "learning_rate": 2.7193716518519007e-07,
      "loss": 0.0112,
      "step": 2972720
    },
    {
      "epoch": 4.864954210116324,
      "grad_norm": 0.34400683641433716,
      "learning_rate": 2.7187127297167297e-07,
      "loss": 0.0084,
      "step": 2972740
    },
    {
      "epoch": 4.864986940554977,
      "grad_norm": 0.109958216547966,
      "learning_rate": 2.718053807581558e-07,
      "loss": 0.0076,
      "step": 2972760
    },
    {
      "epoch": 4.865019670993631,
      "grad_norm": 0.22445662319660187,
      "learning_rate": 2.717394885446387e-07,
      "loss": 0.006,
      "step": 2972780
    },
    {
      "epoch": 4.865052401432284,
      "grad_norm": 0.19557493925094604,
      "learning_rate": 2.7167359633112156e-07,
      "loss": 0.008,
      "step": 2972800
    },
    {
      "epoch": 4.865085131870938,
      "grad_norm": 0.5556544661521912,
      "learning_rate": 2.7160770411760445e-07,
      "loss": 0.012,
      "step": 2972820
    },
    {
      "epoch": 4.8651178623095905,
      "grad_norm": 0.0814497098326683,
      "learning_rate": 2.715418119040873e-07,
      "loss": 0.0056,
      "step": 2972840
    },
    {
      "epoch": 4.865150592748244,
      "grad_norm": 0.23369894921779633,
      "learning_rate": 2.714759196905702e-07,
      "loss": 0.0086,
      "step": 2972860
    },
    {
      "epoch": 4.865183323186898,
      "grad_norm": 0.09467752277851105,
      "learning_rate": 2.7141002747705304e-07,
      "loss": 0.01,
      "step": 2972880
    },
    {
      "epoch": 4.86521605362555,
      "grad_norm": 0.12066566944122314,
      "learning_rate": 2.7134413526353594e-07,
      "loss": 0.0061,
      "step": 2972900
    },
    {
      "epoch": 4.865248784064204,
      "grad_norm": 0.1905631273984909,
      "learning_rate": 2.712782430500188e-07,
      "loss": 0.0079,
      "step": 2972920
    },
    {
      "epoch": 4.865281514502858,
      "grad_norm": 0.1688040792942047,
      "learning_rate": 2.7121235083650163e-07,
      "loss": 0.0071,
      "step": 2972940
    },
    {
      "epoch": 4.86531424494151,
      "grad_norm": 0.15829609334468842,
      "learning_rate": 2.7114645862298453e-07,
      "loss": 0.0112,
      "step": 2972960
    },
    {
      "epoch": 4.865346975380164,
      "grad_norm": 0.17180857062339783,
      "learning_rate": 2.7108056640946743e-07,
      "loss": 0.0067,
      "step": 2972980
    },
    {
      "epoch": 4.8653797058188175,
      "grad_norm": 0.12909966707229614,
      "learning_rate": 2.710146741959503e-07,
      "loss": 0.0106,
      "step": 2973000
    },
    {
      "epoch": 4.865412436257471,
      "grad_norm": 0.09216829389333725,
      "learning_rate": 2.709487819824332e-07,
      "loss": 0.0067,
      "step": 2973020
    },
    {
      "epoch": 4.865445166696124,
      "grad_norm": 0.5147160291671753,
      "learning_rate": 2.70882889768916e-07,
      "loss": 0.0109,
      "step": 2973040
    },
    {
      "epoch": 4.865477897134777,
      "grad_norm": 0.2600277364253998,
      "learning_rate": 2.708169975553989e-07,
      "loss": 0.0103,
      "step": 2973060
    },
    {
      "epoch": 4.865510627573431,
      "grad_norm": 0.21164269745349884,
      "learning_rate": 2.7075110534188176e-07,
      "loss": 0.0047,
      "step": 2973080
    },
    {
      "epoch": 4.865543358012084,
      "grad_norm": 0.21415886282920837,
      "learning_rate": 2.7068521312836466e-07,
      "loss": 0.0098,
      "step": 2973100
    },
    {
      "epoch": 4.865576088450737,
      "grad_norm": 0.6411895751953125,
      "learning_rate": 2.706193209148475e-07,
      "loss": 0.0078,
      "step": 2973120
    },
    {
      "epoch": 4.865608818889391,
      "grad_norm": 0.11968117952346802,
      "learning_rate": 2.7055342870133035e-07,
      "loss": 0.009,
      "step": 2973140
    },
    {
      "epoch": 4.8656415493280445,
      "grad_norm": 0.3921404182910919,
      "learning_rate": 2.7048753648781325e-07,
      "loss": 0.0102,
      "step": 2973160
    },
    {
      "epoch": 4.865674279766697,
      "grad_norm": 0.12818004190921783,
      "learning_rate": 2.704216442742961e-07,
      "loss": 0.0066,
      "step": 2973180
    },
    {
      "epoch": 4.865707010205351,
      "grad_norm": 0.5766049027442932,
      "learning_rate": 2.70355752060779e-07,
      "loss": 0.0108,
      "step": 2973200
    },
    {
      "epoch": 4.865739740644004,
      "grad_norm": 0.6330618858337402,
      "learning_rate": 2.7028985984726184e-07,
      "loss": 0.0073,
      "step": 2973220
    },
    {
      "epoch": 4.865772471082657,
      "grad_norm": 0.39153915643692017,
      "learning_rate": 2.7022396763374474e-07,
      "loss": 0.0071,
      "step": 2973240
    },
    {
      "epoch": 4.865805201521311,
      "grad_norm": 0.3764156103134155,
      "learning_rate": 2.7015807542022764e-07,
      "loss": 0.0072,
      "step": 2973260
    },
    {
      "epoch": 4.865837931959964,
      "grad_norm": 0.2633163630962372,
      "learning_rate": 2.700921832067105e-07,
      "loss": 0.0102,
      "step": 2973280
    },
    {
      "epoch": 4.865870662398617,
      "grad_norm": 0.33474481105804443,
      "learning_rate": 2.700262909931934e-07,
      "loss": 0.0097,
      "step": 2973300
    },
    {
      "epoch": 4.865903392837271,
      "grad_norm": 0.19038020074367523,
      "learning_rate": 2.6996039877967623e-07,
      "loss": 0.0126,
      "step": 2973320
    },
    {
      "epoch": 4.865936123275924,
      "grad_norm": 0.125974640250206,
      "learning_rate": 2.6989450656615913e-07,
      "loss": 0.0108,
      "step": 2973340
    },
    {
      "epoch": 4.865968853714578,
      "grad_norm": 0.174307182431221,
      "learning_rate": 2.69828614352642e-07,
      "loss": 0.0072,
      "step": 2973360
    },
    {
      "epoch": 4.8660015841532305,
      "grad_norm": 0.524469256401062,
      "learning_rate": 2.697627221391248e-07,
      "loss": 0.0112,
      "step": 2973380
    },
    {
      "epoch": 4.866034314591884,
      "grad_norm": 0.2331124097108841,
      "learning_rate": 2.696968299256077e-07,
      "loss": 0.0074,
      "step": 2973400
    },
    {
      "epoch": 4.866067045030538,
      "grad_norm": 0.060809426009655,
      "learning_rate": 2.6963093771209056e-07,
      "loss": 0.0068,
      "step": 2973420
    },
    {
      "epoch": 4.866099775469191,
      "grad_norm": 0.31866878271102905,
      "learning_rate": 2.6956504549857346e-07,
      "loss": 0.0066,
      "step": 2973440
    },
    {
      "epoch": 4.866132505907844,
      "grad_norm": 0.4007963240146637,
      "learning_rate": 2.694991532850563e-07,
      "loss": 0.0096,
      "step": 2973460
    },
    {
      "epoch": 4.866165236346498,
      "grad_norm": 0.10716280341148376,
      "learning_rate": 2.694332610715392e-07,
      "loss": 0.0067,
      "step": 2973480
    },
    {
      "epoch": 4.866197966785151,
      "grad_norm": 0.10823625326156616,
      "learning_rate": 2.6936736885802205e-07,
      "loss": 0.012,
      "step": 2973500
    },
    {
      "epoch": 4.866230697223804,
      "grad_norm": 0.19457344710826874,
      "learning_rate": 2.6930147664450495e-07,
      "loss": 0.0096,
      "step": 2973520
    },
    {
      "epoch": 4.8662634276624575,
      "grad_norm": 0.45473238825798035,
      "learning_rate": 2.6923558443098785e-07,
      "loss": 0.0171,
      "step": 2973540
    },
    {
      "epoch": 4.866296158101111,
      "grad_norm": 0.24954834580421448,
      "learning_rate": 2.691696922174707e-07,
      "loss": 0.0095,
      "step": 2973560
    },
    {
      "epoch": 4.866328888539764,
      "grad_norm": 0.09102555364370346,
      "learning_rate": 2.691038000039536e-07,
      "loss": 0.0094,
      "step": 2973580
    },
    {
      "epoch": 4.866361618978417,
      "grad_norm": 0.2509768009185791,
      "learning_rate": 2.690379077904364e-07,
      "loss": 0.0101,
      "step": 2973600
    },
    {
      "epoch": 4.866394349417071,
      "grad_norm": 0.21593481302261353,
      "learning_rate": 2.689720155769193e-07,
      "loss": 0.0118,
      "step": 2973620
    },
    {
      "epoch": 4.8664270798557245,
      "grad_norm": 0.29265207052230835,
      "learning_rate": 2.689061233634022e-07,
      "loss": 0.0115,
      "step": 2973640
    },
    {
      "epoch": 4.866459810294377,
      "grad_norm": 0.2200394570827484,
      "learning_rate": 2.6884023114988503e-07,
      "loss": 0.0069,
      "step": 2973660
    },
    {
      "epoch": 4.866492540733031,
      "grad_norm": 0.24619907140731812,
      "learning_rate": 2.6877433893636793e-07,
      "loss": 0.007,
      "step": 2973680
    },
    {
      "epoch": 4.866525271171684,
      "grad_norm": 0.19082768261432648,
      "learning_rate": 2.6870844672285077e-07,
      "loss": 0.0079,
      "step": 2973700
    },
    {
      "epoch": 4.866558001610338,
      "grad_norm": 0.09945713728666306,
      "learning_rate": 2.6864255450933367e-07,
      "loss": 0.0089,
      "step": 2973720
    },
    {
      "epoch": 4.866590732048991,
      "grad_norm": 0.13268819451332092,
      "learning_rate": 2.685766622958165e-07,
      "loss": 0.0072,
      "step": 2973740
    },
    {
      "epoch": 4.866623462487644,
      "grad_norm": 0.348400354385376,
      "learning_rate": 2.685107700822994e-07,
      "loss": 0.007,
      "step": 2973760
    },
    {
      "epoch": 4.866656192926298,
      "grad_norm": 0.3016430735588074,
      "learning_rate": 2.6844487786878226e-07,
      "loss": 0.0069,
      "step": 2973780
    },
    {
      "epoch": 4.866688923364951,
      "grad_norm": 0.2329166829586029,
      "learning_rate": 2.6837898565526516e-07,
      "loss": 0.0138,
      "step": 2973800
    },
    {
      "epoch": 4.866721653803604,
      "grad_norm": 0.1848885715007782,
      "learning_rate": 2.68313093441748e-07,
      "loss": 0.0101,
      "step": 2973820
    },
    {
      "epoch": 4.866754384242258,
      "grad_norm": 0.05483170598745346,
      "learning_rate": 2.6824720122823085e-07,
      "loss": 0.0063,
      "step": 2973840
    },
    {
      "epoch": 4.8667871146809105,
      "grad_norm": 0.3165088891983032,
      "learning_rate": 2.6818130901471375e-07,
      "loss": 0.0065,
      "step": 2973860
    },
    {
      "epoch": 4.866819845119564,
      "grad_norm": 0.0944908931851387,
      "learning_rate": 2.681154168011966e-07,
      "loss": 0.0107,
      "step": 2973880
    },
    {
      "epoch": 4.866852575558218,
      "grad_norm": 0.14792081713676453,
      "learning_rate": 2.680495245876795e-07,
      "loss": 0.0073,
      "step": 2973900
    },
    {
      "epoch": 4.866885305996871,
      "grad_norm": 0.6988896727561951,
      "learning_rate": 2.679836323741624e-07,
      "loss": 0.0113,
      "step": 2973920
    },
    {
      "epoch": 4.866918036435524,
      "grad_norm": 0.19992852210998535,
      "learning_rate": 2.6791774016064524e-07,
      "loss": 0.0087,
      "step": 2973940
    },
    {
      "epoch": 4.866950766874178,
      "grad_norm": 0.34024935960769653,
      "learning_rate": 2.6785184794712814e-07,
      "loss": 0.0088,
      "step": 2973960
    },
    {
      "epoch": 4.866983497312831,
      "grad_norm": 0.5681858658790588,
      "learning_rate": 2.67785955733611e-07,
      "loss": 0.0117,
      "step": 2973980
    },
    {
      "epoch": 4.867016227751485,
      "grad_norm": 0.137161523103714,
      "learning_rate": 2.677200635200939e-07,
      "loss": 0.0045,
      "step": 2974000
    },
    {
      "epoch": 4.8670489581901375,
      "grad_norm": 0.1457575261592865,
      "learning_rate": 2.6765417130657673e-07,
      "loss": 0.0114,
      "step": 2974020
    },
    {
      "epoch": 4.867081688628791,
      "grad_norm": 0.10900214314460754,
      "learning_rate": 2.6758827909305957e-07,
      "loss": 0.0095,
      "step": 2974040
    },
    {
      "epoch": 4.867114419067445,
      "grad_norm": 0.13000072538852692,
      "learning_rate": 2.6752238687954247e-07,
      "loss": 0.0057,
      "step": 2974060
    },
    {
      "epoch": 4.867147149506097,
      "grad_norm": 0.07901060581207275,
      "learning_rate": 2.674564946660253e-07,
      "loss": 0.009,
      "step": 2974080
    },
    {
      "epoch": 4.867179879944751,
      "grad_norm": 0.18732298910617828,
      "learning_rate": 2.673906024525082e-07,
      "loss": 0.011,
      "step": 2974100
    },
    {
      "epoch": 4.867212610383405,
      "grad_norm": 0.2024056762456894,
      "learning_rate": 2.6732471023899106e-07,
      "loss": 0.01,
      "step": 2974120
    },
    {
      "epoch": 4.867245340822057,
      "grad_norm": 0.4304051399230957,
      "learning_rate": 2.6725881802547396e-07,
      "loss": 0.0101,
      "step": 2974140
    },
    {
      "epoch": 4.867278071260711,
      "grad_norm": 0.12562112510204315,
      "learning_rate": 2.671929258119568e-07,
      "loss": 0.0111,
      "step": 2974160
    },
    {
      "epoch": 4.8673108016993645,
      "grad_norm": 0.6226401925086975,
      "learning_rate": 2.671270335984397e-07,
      "loss": 0.0077,
      "step": 2974180
    },
    {
      "epoch": 4.867343532138018,
      "grad_norm": 0.13205663859844208,
      "learning_rate": 2.670611413849226e-07,
      "loss": 0.01,
      "step": 2974200
    },
    {
      "epoch": 4.867376262576671,
      "grad_norm": 0.5484534502029419,
      "learning_rate": 2.6699524917140545e-07,
      "loss": 0.0145,
      "step": 2974220
    },
    {
      "epoch": 4.867408993015324,
      "grad_norm": 0.3925907611846924,
      "learning_rate": 2.6692935695788835e-07,
      "loss": 0.0099,
      "step": 2974240
    },
    {
      "epoch": 4.867441723453978,
      "grad_norm": 0.4824051856994629,
      "learning_rate": 2.6686346474437114e-07,
      "loss": 0.0081,
      "step": 2974260
    },
    {
      "epoch": 4.867474453892632,
      "grad_norm": 0.27044421434402466,
      "learning_rate": 2.6679757253085404e-07,
      "loss": 0.0065,
      "step": 2974280
    },
    {
      "epoch": 4.867507184331284,
      "grad_norm": 0.25053590536117554,
      "learning_rate": 2.6673168031733694e-07,
      "loss": 0.0088,
      "step": 2974300
    },
    {
      "epoch": 4.867539914769938,
      "grad_norm": 0.15031562745571136,
      "learning_rate": 2.666657881038198e-07,
      "loss": 0.0053,
      "step": 2974320
    },
    {
      "epoch": 4.8675726452085915,
      "grad_norm": 0.2902300953865051,
      "learning_rate": 2.665998958903027e-07,
      "loss": 0.0081,
      "step": 2974340
    },
    {
      "epoch": 4.867605375647244,
      "grad_norm": 0.3874639868736267,
      "learning_rate": 2.665340036767855e-07,
      "loss": 0.0067,
      "step": 2974360
    },
    {
      "epoch": 4.867638106085898,
      "grad_norm": 0.1458580046892166,
      "learning_rate": 2.664681114632684e-07,
      "loss": 0.0122,
      "step": 2974380
    },
    {
      "epoch": 4.867670836524551,
      "grad_norm": 0.8924538493156433,
      "learning_rate": 2.6640221924975127e-07,
      "loss": 0.0085,
      "step": 2974400
    },
    {
      "epoch": 4.867703566963204,
      "grad_norm": 0.3468701243400574,
      "learning_rate": 2.6633632703623417e-07,
      "loss": 0.0132,
      "step": 2974420
    },
    {
      "epoch": 4.867736297401858,
      "grad_norm": 0.19565877318382263,
      "learning_rate": 2.66270434822717e-07,
      "loss": 0.0132,
      "step": 2974440
    },
    {
      "epoch": 4.867769027840511,
      "grad_norm": 0.20851930975914001,
      "learning_rate": 2.662045426091999e-07,
      "loss": 0.0124,
      "step": 2974460
    },
    {
      "epoch": 4.867801758279165,
      "grad_norm": 0.13456861674785614,
      "learning_rate": 2.6613865039568276e-07,
      "loss": 0.0087,
      "step": 2974480
    },
    {
      "epoch": 4.8678344887178175,
      "grad_norm": 0.283698171377182,
      "learning_rate": 2.660727581821656e-07,
      "loss": 0.0077,
      "step": 2974500
    },
    {
      "epoch": 4.867867219156471,
      "grad_norm": 0.30595847964286804,
      "learning_rate": 2.660068659686485e-07,
      "loss": 0.0189,
      "step": 2974520
    },
    {
      "epoch": 4.867899949595125,
      "grad_norm": 0.18721093237400055,
      "learning_rate": 2.6594097375513135e-07,
      "loss": 0.0077,
      "step": 2974540
    },
    {
      "epoch": 4.867932680033777,
      "grad_norm": 0.16100069880485535,
      "learning_rate": 2.6587508154161425e-07,
      "loss": 0.0097,
      "step": 2974560
    },
    {
      "epoch": 4.867965410472431,
      "grad_norm": 0.37596267461776733,
      "learning_rate": 2.6580918932809715e-07,
      "loss": 0.006,
      "step": 2974580
    },
    {
      "epoch": 4.867998140911085,
      "grad_norm": 0.23506119847297668,
      "learning_rate": 2.6574329711458e-07,
      "loss": 0.0061,
      "step": 2974600
    },
    {
      "epoch": 4.868030871349738,
      "grad_norm": 0.19492025673389435,
      "learning_rate": 2.656774049010629e-07,
      "loss": 0.0061,
      "step": 2974620
    },
    {
      "epoch": 4.868063601788391,
      "grad_norm": 0.3068375885486603,
      "learning_rate": 2.6561151268754574e-07,
      "loss": 0.007,
      "step": 2974640
    },
    {
      "epoch": 4.8680963322270445,
      "grad_norm": 0.06739740073680878,
      "learning_rate": 2.6554562047402863e-07,
      "loss": 0.009,
      "step": 2974660
    },
    {
      "epoch": 4.868129062665698,
      "grad_norm": 0.25646278262138367,
      "learning_rate": 2.654797282605115e-07,
      "loss": 0.005,
      "step": 2974680
    },
    {
      "epoch": 4.868161793104351,
      "grad_norm": 0.0679374411702156,
      "learning_rate": 2.654138360469943e-07,
      "loss": 0.0141,
      "step": 2974700
    },
    {
      "epoch": 4.868194523543004,
      "grad_norm": 0.5738673806190491,
      "learning_rate": 2.653479438334772e-07,
      "loss": 0.0054,
      "step": 2974720
    },
    {
      "epoch": 4.868227253981658,
      "grad_norm": 0.1627117097377777,
      "learning_rate": 2.6528205161996007e-07,
      "loss": 0.0084,
      "step": 2974740
    },
    {
      "epoch": 4.868259984420311,
      "grad_norm": 0.11708791553974152,
      "learning_rate": 2.6521615940644297e-07,
      "loss": 0.0056,
      "step": 2974760
    },
    {
      "epoch": 4.868292714858964,
      "grad_norm": 0.1510394960641861,
      "learning_rate": 2.651502671929258e-07,
      "loss": 0.0071,
      "step": 2974780
    },
    {
      "epoch": 4.868325445297618,
      "grad_norm": 0.17929503321647644,
      "learning_rate": 2.650843749794087e-07,
      "loss": 0.0092,
      "step": 2974800
    },
    {
      "epoch": 4.8683581757362715,
      "grad_norm": 0.15161101520061493,
      "learning_rate": 2.6501848276589156e-07,
      "loss": 0.0061,
      "step": 2974820
    },
    {
      "epoch": 4.868390906174924,
      "grad_norm": 0.25069546699523926,
      "learning_rate": 2.6495259055237446e-07,
      "loss": 0.0063,
      "step": 2974840
    },
    {
      "epoch": 4.868423636613578,
      "grad_norm": 0.05085541680455208,
      "learning_rate": 2.6488669833885735e-07,
      "loss": 0.0097,
      "step": 2974860
    },
    {
      "epoch": 4.868456367052231,
      "grad_norm": 0.15823134779930115,
      "learning_rate": 2.648208061253402e-07,
      "loss": 0.0068,
      "step": 2974880
    },
    {
      "epoch": 4.868489097490885,
      "grad_norm": 0.2125615030527115,
      "learning_rate": 2.647549139118231e-07,
      "loss": 0.0061,
      "step": 2974900
    },
    {
      "epoch": 4.868521827929538,
      "grad_norm": 0.19994890689849854,
      "learning_rate": 2.646890216983059e-07,
      "loss": 0.0064,
      "step": 2974920
    },
    {
      "epoch": 4.868554558368191,
      "grad_norm": 0.12598203122615814,
      "learning_rate": 2.646231294847888e-07,
      "loss": 0.0139,
      "step": 2974940
    },
    {
      "epoch": 4.868587288806845,
      "grad_norm": 0.14504623413085938,
      "learning_rate": 2.645572372712717e-07,
      "loss": 0.007,
      "step": 2974960
    },
    {
      "epoch": 4.868620019245498,
      "grad_norm": 0.06738494336605072,
      "learning_rate": 2.6449134505775453e-07,
      "loss": 0.0091,
      "step": 2974980
    },
    {
      "epoch": 4.868652749684151,
      "grad_norm": 0.10043821483850479,
      "learning_rate": 2.6442545284423743e-07,
      "loss": 0.0076,
      "step": 2975000
    },
    {
      "epoch": 4.868685480122805,
      "grad_norm": 0.3709566295146942,
      "learning_rate": 2.643595606307203e-07,
      "loss": 0.0084,
      "step": 2975020
    },
    {
      "epoch": 4.8687182105614575,
      "grad_norm": 0.1642940640449524,
      "learning_rate": 2.642936684172032e-07,
      "loss": 0.0088,
      "step": 2975040
    },
    {
      "epoch": 4.868750941000111,
      "grad_norm": 0.09464152902364731,
      "learning_rate": 2.64227776203686e-07,
      "loss": 0.0068,
      "step": 2975060
    },
    {
      "epoch": 4.868783671438765,
      "grad_norm": 0.5787826776504517,
      "learning_rate": 2.641618839901689e-07,
      "loss": 0.0068,
      "step": 2975080
    },
    {
      "epoch": 4.868816401877418,
      "grad_norm": 0.7209465503692627,
      "learning_rate": 2.6409599177665177e-07,
      "loss": 0.0117,
      "step": 2975100
    },
    {
      "epoch": 4.868849132316071,
      "grad_norm": 0.40193334221839905,
      "learning_rate": 2.6403009956313467e-07,
      "loss": 0.009,
      "step": 2975120
    },
    {
      "epoch": 4.868881862754725,
      "grad_norm": 0.22930286824703217,
      "learning_rate": 2.639642073496175e-07,
      "loss": 0.0074,
      "step": 2975140
    },
    {
      "epoch": 4.868914593193378,
      "grad_norm": 0.021561602130532265,
      "learning_rate": 2.6389831513610036e-07,
      "loss": 0.0081,
      "step": 2975160
    },
    {
      "epoch": 4.868947323632032,
      "grad_norm": 0.2866385579109192,
      "learning_rate": 2.6383242292258326e-07,
      "loss": 0.0106,
      "step": 2975180
    },
    {
      "epoch": 4.8689800540706845,
      "grad_norm": 0.527350902557373,
      "learning_rate": 2.637665307090661e-07,
      "loss": 0.0067,
      "step": 2975200
    },
    {
      "epoch": 4.869012784509338,
      "grad_norm": 0.2500993609428406,
      "learning_rate": 2.63700638495549e-07,
      "loss": 0.0061,
      "step": 2975220
    },
    {
      "epoch": 4.869045514947992,
      "grad_norm": 0.17011921107769012,
      "learning_rate": 2.636347462820319e-07,
      "loss": 0.0068,
      "step": 2975240
    },
    {
      "epoch": 4.869078245386644,
      "grad_norm": 0.869968593120575,
      "learning_rate": 2.6356885406851474e-07,
      "loss": 0.0071,
      "step": 2975260
    },
    {
      "epoch": 4.869110975825298,
      "grad_norm": 0.09369827806949615,
      "learning_rate": 2.6350296185499764e-07,
      "loss": 0.0098,
      "step": 2975280
    },
    {
      "epoch": 4.8691437062639515,
      "grad_norm": 0.5424683094024658,
      "learning_rate": 2.634370696414805e-07,
      "loss": 0.008,
      "step": 2975300
    },
    {
      "epoch": 4.869176436702604,
      "grad_norm": 0.26104867458343506,
      "learning_rate": 2.633711774279634e-07,
      "loss": 0.0088,
      "step": 2975320
    },
    {
      "epoch": 4.869209167141258,
      "grad_norm": 0.29929667711257935,
      "learning_rate": 2.6330528521444623e-07,
      "loss": 0.0066,
      "step": 2975340
    },
    {
      "epoch": 4.869241897579911,
      "grad_norm": 0.2883651852607727,
      "learning_rate": 2.632393930009291e-07,
      "loss": 0.0102,
      "step": 2975360
    },
    {
      "epoch": 4.869274628018565,
      "grad_norm": 0.07934574782848358,
      "learning_rate": 2.63173500787412e-07,
      "loss": 0.0095,
      "step": 2975380
    },
    {
      "epoch": 4.869307358457218,
      "grad_norm": 0.28018444776535034,
      "learning_rate": 2.631076085738948e-07,
      "loss": 0.0103,
      "step": 2975400
    },
    {
      "epoch": 4.869340088895871,
      "grad_norm": 0.1667650043964386,
      "learning_rate": 2.630417163603777e-07,
      "loss": 0.0077,
      "step": 2975420
    },
    {
      "epoch": 4.869372819334525,
      "grad_norm": 0.1649952381849289,
      "learning_rate": 2.6297582414686057e-07,
      "loss": 0.0123,
      "step": 2975440
    },
    {
      "epoch": 4.8694055497731785,
      "grad_norm": 0.3503516912460327,
      "learning_rate": 2.6290993193334346e-07,
      "loss": 0.0108,
      "step": 2975460
    },
    {
      "epoch": 4.869438280211831,
      "grad_norm": 0.08242617547512054,
      "learning_rate": 2.628440397198263e-07,
      "loss": 0.0086,
      "step": 2975480
    },
    {
      "epoch": 4.869471010650485,
      "grad_norm": 0.1981872171163559,
      "learning_rate": 2.627781475063092e-07,
      "loss": 0.0098,
      "step": 2975500
    },
    {
      "epoch": 4.869503741089138,
      "grad_norm": 0.29767075181007385,
      "learning_rate": 2.627122552927921e-07,
      "loss": 0.0108,
      "step": 2975520
    },
    {
      "epoch": 4.869536471527791,
      "grad_norm": 0.08865536749362946,
      "learning_rate": 2.6264636307927495e-07,
      "loss": 0.015,
      "step": 2975540
    },
    {
      "epoch": 4.869569201966445,
      "grad_norm": 0.27320149540901184,
      "learning_rate": 2.6258047086575785e-07,
      "loss": 0.0094,
      "step": 2975560
    },
    {
      "epoch": 4.869601932405098,
      "grad_norm": 0.24681882560253143,
      "learning_rate": 2.6251457865224064e-07,
      "loss": 0.0081,
      "step": 2975580
    },
    {
      "epoch": 4.869634662843751,
      "grad_norm": 0.1940983086824417,
      "learning_rate": 2.6244868643872354e-07,
      "loss": 0.008,
      "step": 2975600
    },
    {
      "epoch": 4.869667393282405,
      "grad_norm": 0.21239295601844788,
      "learning_rate": 2.6238279422520644e-07,
      "loss": 0.0074,
      "step": 2975620
    },
    {
      "epoch": 4.869700123721058,
      "grad_norm": 0.08172117173671722,
      "learning_rate": 2.623169020116893e-07,
      "loss": 0.0057,
      "step": 2975640
    },
    {
      "epoch": 4.869732854159712,
      "grad_norm": 0.21848078072071075,
      "learning_rate": 2.622510097981722e-07,
      "loss": 0.0117,
      "step": 2975660
    },
    {
      "epoch": 4.8697655845983645,
      "grad_norm": 0.2960084080696106,
      "learning_rate": 2.6218511758465503e-07,
      "loss": 0.0127,
      "step": 2975680
    },
    {
      "epoch": 4.869798315037018,
      "grad_norm": 0.33667606115341187,
      "learning_rate": 2.6211922537113793e-07,
      "loss": 0.0122,
      "step": 2975700
    },
    {
      "epoch": 4.869831045475672,
      "grad_norm": 0.1362069845199585,
      "learning_rate": 2.620533331576208e-07,
      "loss": 0.0061,
      "step": 2975720
    },
    {
      "epoch": 4.869863775914325,
      "grad_norm": 0.04589814692735672,
      "learning_rate": 2.619874409441037e-07,
      "loss": 0.0079,
      "step": 2975740
    },
    {
      "epoch": 4.869896506352978,
      "grad_norm": 0.10445794463157654,
      "learning_rate": 2.619215487305865e-07,
      "loss": 0.0074,
      "step": 2975760
    },
    {
      "epoch": 4.869929236791632,
      "grad_norm": 0.40084561705589294,
      "learning_rate": 2.618556565170694e-07,
      "loss": 0.0082,
      "step": 2975780
    },
    {
      "epoch": 4.869961967230285,
      "grad_norm": 0.09502540528774261,
      "learning_rate": 2.6178976430355226e-07,
      "loss": 0.0052,
      "step": 2975800
    },
    {
      "epoch": 4.869994697668938,
      "grad_norm": 0.4019016921520233,
      "learning_rate": 2.617238720900351e-07,
      "loss": 0.0087,
      "step": 2975820
    },
    {
      "epoch": 4.8700274281075915,
      "grad_norm": 0.08166234940290451,
      "learning_rate": 2.61657979876518e-07,
      "loss": 0.0092,
      "step": 2975840
    },
    {
      "epoch": 4.870060158546245,
      "grad_norm": 0.060755521059036255,
      "learning_rate": 2.6159208766300085e-07,
      "loss": 0.0069,
      "step": 2975860
    },
    {
      "epoch": 4.870092888984898,
      "grad_norm": 0.12687844038009644,
      "learning_rate": 2.6152619544948375e-07,
      "loss": 0.0115,
      "step": 2975880
    },
    {
      "epoch": 4.870125619423551,
      "grad_norm": 0.14330172538757324,
      "learning_rate": 2.6146030323596665e-07,
      "loss": 0.013,
      "step": 2975900
    },
    {
      "epoch": 4.870158349862205,
      "grad_norm": 0.1667405515909195,
      "learning_rate": 2.613944110224495e-07,
      "loss": 0.0075,
      "step": 2975920
    },
    {
      "epoch": 4.870191080300859,
      "grad_norm": 0.7348321080207825,
      "learning_rate": 2.613285188089324e-07,
      "loss": 0.014,
      "step": 2975940
    },
    {
      "epoch": 4.870223810739511,
      "grad_norm": 0.13593129813671112,
      "learning_rate": 2.6126262659541524e-07,
      "loss": 0.0073,
      "step": 2975960
    },
    {
      "epoch": 4.870256541178165,
      "grad_norm": 0.28225451707839966,
      "learning_rate": 2.6119673438189814e-07,
      "loss": 0.0071,
      "step": 2975980
    },
    {
      "epoch": 4.8702892716168185,
      "grad_norm": 0.3733426630496979,
      "learning_rate": 2.61130842168381e-07,
      "loss": 0.0095,
      "step": 2976000
    },
    {
      "epoch": 4.870322002055471,
      "grad_norm": 0.2097308188676834,
      "learning_rate": 2.6106494995486383e-07,
      "loss": 0.0139,
      "step": 2976020
    },
    {
      "epoch": 4.870354732494125,
      "grad_norm": 0.2608908414840698,
      "learning_rate": 2.6099905774134673e-07,
      "loss": 0.0068,
      "step": 2976040
    },
    {
      "epoch": 4.870387462932778,
      "grad_norm": 0.10704482346773148,
      "learning_rate": 2.609331655278296e-07,
      "loss": 0.008,
      "step": 2976060
    },
    {
      "epoch": 4.870420193371432,
      "grad_norm": 0.23663073778152466,
      "learning_rate": 2.6086727331431247e-07,
      "loss": 0.0116,
      "step": 2976080
    },
    {
      "epoch": 4.870452923810085,
      "grad_norm": 0.4770720899105072,
      "learning_rate": 2.608013811007953e-07,
      "loss": 0.0065,
      "step": 2976100
    },
    {
      "epoch": 4.870485654248738,
      "grad_norm": 0.23604312539100647,
      "learning_rate": 2.607354888872782e-07,
      "loss": 0.0083,
      "step": 2976120
    },
    {
      "epoch": 4.870518384687392,
      "grad_norm": 0.5433186292648315,
      "learning_rate": 2.6066959667376106e-07,
      "loss": 0.0083,
      "step": 2976140
    },
    {
      "epoch": 4.8705511151260446,
      "grad_norm": 0.3709152936935425,
      "learning_rate": 2.6060370446024396e-07,
      "loss": 0.0086,
      "step": 2976160
    },
    {
      "epoch": 4.870583845564698,
      "grad_norm": 0.24728895723819733,
      "learning_rate": 2.6053781224672686e-07,
      "loss": 0.0107,
      "step": 2976180
    },
    {
      "epoch": 4.870616576003352,
      "grad_norm": 0.162174791097641,
      "learning_rate": 2.604719200332097e-07,
      "loss": 0.0122,
      "step": 2976200
    },
    {
      "epoch": 4.8706493064420044,
      "grad_norm": 0.27673041820526123,
      "learning_rate": 2.604060278196926e-07,
      "loss": 0.0085,
      "step": 2976220
    },
    {
      "epoch": 4.870682036880658,
      "grad_norm": 0.396798312664032,
      "learning_rate": 2.603401356061754e-07,
      "loss": 0.0066,
      "step": 2976240
    },
    {
      "epoch": 4.870714767319312,
      "grad_norm": 0.4499491751194,
      "learning_rate": 2.602742433926583e-07,
      "loss": 0.0122,
      "step": 2976260
    },
    {
      "epoch": 4.870747497757965,
      "grad_norm": 0.32180947065353394,
      "learning_rate": 2.602083511791412e-07,
      "loss": 0.01,
      "step": 2976280
    },
    {
      "epoch": 4.870780228196618,
      "grad_norm": 0.09437162429094315,
      "learning_rate": 2.6014245896562404e-07,
      "loss": 0.0128,
      "step": 2976300
    },
    {
      "epoch": 4.8708129586352715,
      "grad_norm": 0.16118378937244415,
      "learning_rate": 2.6007656675210694e-07,
      "loss": 0.0074,
      "step": 2976320
    },
    {
      "epoch": 4.870845689073925,
      "grad_norm": 0.24085859954357147,
      "learning_rate": 2.600106745385898e-07,
      "loss": 0.0096,
      "step": 2976340
    },
    {
      "epoch": 4.870878419512579,
      "grad_norm": 0.28605592250823975,
      "learning_rate": 2.599447823250727e-07,
      "loss": 0.0137,
      "step": 2976360
    },
    {
      "epoch": 4.870911149951231,
      "grad_norm": 0.09207385033369064,
      "learning_rate": 2.5987889011155553e-07,
      "loss": 0.0062,
      "step": 2976380
    },
    {
      "epoch": 4.870943880389885,
      "grad_norm": 0.32788074016571045,
      "learning_rate": 2.5981299789803843e-07,
      "loss": 0.0098,
      "step": 2976400
    },
    {
      "epoch": 4.870976610828539,
      "grad_norm": 0.36984243988990784,
      "learning_rate": 2.5974710568452127e-07,
      "loss": 0.0064,
      "step": 2976420
    },
    {
      "epoch": 4.871009341267191,
      "grad_norm": 0.3081687390804291,
      "learning_rate": 2.5968121347100417e-07,
      "loss": 0.0095,
      "step": 2976440
    },
    {
      "epoch": 4.871042071705845,
      "grad_norm": 0.145968496799469,
      "learning_rate": 2.59615321257487e-07,
      "loss": 0.0122,
      "step": 2976460
    },
    {
      "epoch": 4.8710748021444985,
      "grad_norm": 0.1501801759004593,
      "learning_rate": 2.5954942904396986e-07,
      "loss": 0.0075,
      "step": 2976480
    },
    {
      "epoch": 4.871107532583151,
      "grad_norm": 0.09318947046995163,
      "learning_rate": 2.5948353683045276e-07,
      "loss": 0.0098,
      "step": 2976500
    },
    {
      "epoch": 4.871140263021805,
      "grad_norm": 0.3182470202445984,
      "learning_rate": 2.594176446169356e-07,
      "loss": 0.0064,
      "step": 2976520
    },
    {
      "epoch": 4.871172993460458,
      "grad_norm": 0.1788882166147232,
      "learning_rate": 2.593517524034185e-07,
      "loss": 0.0107,
      "step": 2976540
    },
    {
      "epoch": 4.871205723899112,
      "grad_norm": 0.21972763538360596,
      "learning_rate": 2.592858601899014e-07,
      "loss": 0.0081,
      "step": 2976560
    },
    {
      "epoch": 4.871238454337765,
      "grad_norm": 0.36290866136550903,
      "learning_rate": 2.5921996797638425e-07,
      "loss": 0.0122,
      "step": 2976580
    },
    {
      "epoch": 4.871271184776418,
      "grad_norm": 0.07466769963502884,
      "learning_rate": 2.5915407576286715e-07,
      "loss": 0.0123,
      "step": 2976600
    },
    {
      "epoch": 4.871303915215072,
      "grad_norm": 1.019478678703308,
      "learning_rate": 2.5908818354935e-07,
      "loss": 0.0084,
      "step": 2976620
    },
    {
      "epoch": 4.8713366456537255,
      "grad_norm": 0.3436906039714813,
      "learning_rate": 2.590222913358329e-07,
      "loss": 0.0103,
      "step": 2976640
    },
    {
      "epoch": 4.871369376092378,
      "grad_norm": 0.10568703711032867,
      "learning_rate": 2.5895639912231574e-07,
      "loss": 0.0091,
      "step": 2976660
    },
    {
      "epoch": 4.871402106531032,
      "grad_norm": 0.13342086970806122,
      "learning_rate": 2.588905069087986e-07,
      "loss": 0.0088,
      "step": 2976680
    },
    {
      "epoch": 4.871434836969685,
      "grad_norm": 0.12254342436790466,
      "learning_rate": 2.588246146952815e-07,
      "loss": 0.0131,
      "step": 2976700
    },
    {
      "epoch": 4.871467567408338,
      "grad_norm": 0.2244790941476822,
      "learning_rate": 2.5875872248176433e-07,
      "loss": 0.0098,
      "step": 2976720
    },
    {
      "epoch": 4.871500297846992,
      "grad_norm": 0.3546527922153473,
      "learning_rate": 2.586928302682472e-07,
      "loss": 0.0093,
      "step": 2976740
    },
    {
      "epoch": 4.871533028285645,
      "grad_norm": 0.1778349131345749,
      "learning_rate": 2.5862693805473007e-07,
      "loss": 0.0083,
      "step": 2976760
    },
    {
      "epoch": 4.871565758724298,
      "grad_norm": 0.13476653397083282,
      "learning_rate": 2.5856104584121297e-07,
      "loss": 0.0109,
      "step": 2976780
    },
    {
      "epoch": 4.871598489162952,
      "grad_norm": 0.15672045946121216,
      "learning_rate": 2.584951536276958e-07,
      "loss": 0.0089,
      "step": 2976800
    },
    {
      "epoch": 4.871631219601605,
      "grad_norm": 0.24287620186805725,
      "learning_rate": 2.584292614141787e-07,
      "loss": 0.0095,
      "step": 2976820
    },
    {
      "epoch": 4.871663950040259,
      "grad_norm": 0.24583742022514343,
      "learning_rate": 2.583633692006616e-07,
      "loss": 0.0108,
      "step": 2976840
    },
    {
      "epoch": 4.8716966804789115,
      "grad_norm": 0.149788960814476,
      "learning_rate": 2.5829747698714446e-07,
      "loss": 0.0132,
      "step": 2976860
    },
    {
      "epoch": 4.871729410917565,
      "grad_norm": 0.14555929601192474,
      "learning_rate": 2.5823158477362736e-07,
      "loss": 0.0059,
      "step": 2976880
    },
    {
      "epoch": 4.871762141356219,
      "grad_norm": 0.3622661828994751,
      "learning_rate": 2.5816569256011015e-07,
      "loss": 0.0078,
      "step": 2976900
    },
    {
      "epoch": 4.871794871794872,
      "grad_norm": 0.07760830968618393,
      "learning_rate": 2.5809980034659305e-07,
      "loss": 0.006,
      "step": 2976920
    },
    {
      "epoch": 4.871827602233525,
      "grad_norm": 0.0671437531709671,
      "learning_rate": 2.5803390813307595e-07,
      "loss": 0.0109,
      "step": 2976940
    },
    {
      "epoch": 4.8718603326721786,
      "grad_norm": 0.37343257665634155,
      "learning_rate": 2.579680159195588e-07,
      "loss": 0.0076,
      "step": 2976960
    },
    {
      "epoch": 4.871893063110832,
      "grad_norm": 0.3036470115184784,
      "learning_rate": 2.579021237060417e-07,
      "loss": 0.0083,
      "step": 2976980
    },
    {
      "epoch": 4.871925793549485,
      "grad_norm": 0.23405903577804565,
      "learning_rate": 2.5783623149252454e-07,
      "loss": 0.0112,
      "step": 2977000
    },
    {
      "epoch": 4.8719585239881384,
      "grad_norm": 0.07034149020910263,
      "learning_rate": 2.5777033927900744e-07,
      "loss": 0.0065,
      "step": 2977020
    },
    {
      "epoch": 4.871991254426792,
      "grad_norm": 0.1098397821187973,
      "learning_rate": 2.577044470654903e-07,
      "loss": 0.0098,
      "step": 2977040
    },
    {
      "epoch": 4.872023984865445,
      "grad_norm": 0.42130178213119507,
      "learning_rate": 2.576385548519732e-07,
      "loss": 0.0056,
      "step": 2977060
    },
    {
      "epoch": 4.872056715304098,
      "grad_norm": 0.11528177559375763,
      "learning_rate": 2.57572662638456e-07,
      "loss": 0.0067,
      "step": 2977080
    },
    {
      "epoch": 4.872089445742752,
      "grad_norm": 0.20398275554180145,
      "learning_rate": 2.575067704249389e-07,
      "loss": 0.0077,
      "step": 2977100
    },
    {
      "epoch": 4.8721221761814055,
      "grad_norm": 0.412872850894928,
      "learning_rate": 2.5744087821142177e-07,
      "loss": 0.0103,
      "step": 2977120
    },
    {
      "epoch": 4.872154906620058,
      "grad_norm": 0.13919009268283844,
      "learning_rate": 2.573749859979046e-07,
      "loss": 0.0076,
      "step": 2977140
    },
    {
      "epoch": 4.872187637058712,
      "grad_norm": 0.3108750283718109,
      "learning_rate": 2.573090937843875e-07,
      "loss": 0.008,
      "step": 2977160
    },
    {
      "epoch": 4.872220367497365,
      "grad_norm": 0.15000298619270325,
      "learning_rate": 2.5724320157087036e-07,
      "loss": 0.0081,
      "step": 2977180
    },
    {
      "epoch": 4.872253097936019,
      "grad_norm": 0.1333097368478775,
      "learning_rate": 2.5717730935735326e-07,
      "loss": 0.0104,
      "step": 2977200
    },
    {
      "epoch": 4.872285828374672,
      "grad_norm": 0.14019916951656342,
      "learning_rate": 2.5711141714383616e-07,
      "loss": 0.0134,
      "step": 2977220
    },
    {
      "epoch": 4.872318558813325,
      "grad_norm": 0.06331498175859451,
      "learning_rate": 2.57045524930319e-07,
      "loss": 0.0071,
      "step": 2977240
    },
    {
      "epoch": 4.872351289251979,
      "grad_norm": 0.11684204638004303,
      "learning_rate": 2.569796327168019e-07,
      "loss": 0.0095,
      "step": 2977260
    },
    {
      "epoch": 4.872384019690632,
      "grad_norm": 0.2662906348705292,
      "learning_rate": 2.5691374050328475e-07,
      "loss": 0.0119,
      "step": 2977280
    },
    {
      "epoch": 4.872416750129285,
      "grad_norm": 0.6413434147834778,
      "learning_rate": 2.5684784828976764e-07,
      "loss": 0.014,
      "step": 2977300
    },
    {
      "epoch": 4.872449480567939,
      "grad_norm": 0.23749853670597076,
      "learning_rate": 2.567819560762505e-07,
      "loss": 0.0096,
      "step": 2977320
    },
    {
      "epoch": 4.8724822110065915,
      "grad_norm": 0.7188751697540283,
      "learning_rate": 2.5671606386273334e-07,
      "loss": 0.0118,
      "step": 2977340
    },
    {
      "epoch": 4.872514941445245,
      "grad_norm": 0.12516337633132935,
      "learning_rate": 2.5665017164921623e-07,
      "loss": 0.0082,
      "step": 2977360
    },
    {
      "epoch": 4.872547671883899,
      "grad_norm": 0.34835973381996155,
      "learning_rate": 2.565842794356991e-07,
      "loss": 0.0085,
      "step": 2977380
    },
    {
      "epoch": 4.872580402322552,
      "grad_norm": 0.3672325909137726,
      "learning_rate": 2.56518387222182e-07,
      "loss": 0.0095,
      "step": 2977400
    },
    {
      "epoch": 4.872613132761205,
      "grad_norm": 0.2518356144428253,
      "learning_rate": 2.564524950086648e-07,
      "loss": 0.0059,
      "step": 2977420
    },
    {
      "epoch": 4.872645863199859,
      "grad_norm": 0.13134583830833435,
      "learning_rate": 2.563866027951477e-07,
      "loss": 0.0075,
      "step": 2977440
    },
    {
      "epoch": 4.872678593638512,
      "grad_norm": 0.10388389974832535,
      "learning_rate": 2.5632071058163057e-07,
      "loss": 0.0077,
      "step": 2977460
    },
    {
      "epoch": 4.872711324077165,
      "grad_norm": 0.06831702589988708,
      "learning_rate": 2.5625481836811347e-07,
      "loss": 0.0064,
      "step": 2977480
    },
    {
      "epoch": 4.8727440545158185,
      "grad_norm": 0.1348126381635666,
      "learning_rate": 2.5618892615459637e-07,
      "loss": 0.0168,
      "step": 2977500
    },
    {
      "epoch": 4.872776784954472,
      "grad_norm": 0.34882232546806335,
      "learning_rate": 2.561230339410792e-07,
      "loss": 0.0137,
      "step": 2977520
    },
    {
      "epoch": 4.872809515393126,
      "grad_norm": 0.2139221876859665,
      "learning_rate": 2.560571417275621e-07,
      "loss": 0.0083,
      "step": 2977540
    },
    {
      "epoch": 4.872842245831778,
      "grad_norm": 0.18512190878391266,
      "learning_rate": 2.5599124951404496e-07,
      "loss": 0.0105,
      "step": 2977560
    },
    {
      "epoch": 4.872874976270432,
      "grad_norm": 0.27354732155799866,
      "learning_rate": 2.559253573005278e-07,
      "loss": 0.0065,
      "step": 2977580
    },
    {
      "epoch": 4.872907706709086,
      "grad_norm": 0.18241237103939056,
      "learning_rate": 2.558594650870107e-07,
      "loss": 0.0099,
      "step": 2977600
    },
    {
      "epoch": 4.872940437147738,
      "grad_norm": 0.07775158435106277,
      "learning_rate": 2.5579357287349355e-07,
      "loss": 0.0085,
      "step": 2977620
    },
    {
      "epoch": 4.872973167586392,
      "grad_norm": 0.3336402475833893,
      "learning_rate": 2.5572768065997644e-07,
      "loss": 0.0134,
      "step": 2977640
    },
    {
      "epoch": 4.8730058980250455,
      "grad_norm": 0.12093312293291092,
      "learning_rate": 2.556617884464593e-07,
      "loss": 0.0066,
      "step": 2977660
    },
    {
      "epoch": 4.873038628463698,
      "grad_norm": 0.23203933238983154,
      "learning_rate": 2.555958962329422e-07,
      "loss": 0.0113,
      "step": 2977680
    },
    {
      "epoch": 4.873071358902352,
      "grad_norm": 0.18077987432479858,
      "learning_rate": 2.5553000401942503e-07,
      "loss": 0.0091,
      "step": 2977700
    },
    {
      "epoch": 4.873104089341005,
      "grad_norm": 0.2448493093252182,
      "learning_rate": 2.5546411180590793e-07,
      "loss": 0.0119,
      "step": 2977720
    },
    {
      "epoch": 4.873136819779659,
      "grad_norm": 0.13117104768753052,
      "learning_rate": 2.553982195923908e-07,
      "loss": 0.0113,
      "step": 2977740
    },
    {
      "epoch": 4.873169550218312,
      "grad_norm": 0.5735179781913757,
      "learning_rate": 2.553323273788737e-07,
      "loss": 0.0092,
      "step": 2977760
    },
    {
      "epoch": 4.873202280656965,
      "grad_norm": 0.09955109655857086,
      "learning_rate": 2.552664351653565e-07,
      "loss": 0.0079,
      "step": 2977780
    },
    {
      "epoch": 4.873235011095619,
      "grad_norm": 0.47169071435928345,
      "learning_rate": 2.5520054295183937e-07,
      "loss": 0.0112,
      "step": 2977800
    },
    {
      "epoch": 4.8732677415342724,
      "grad_norm": 0.0998549833893776,
      "learning_rate": 2.5513465073832227e-07,
      "loss": 0.0082,
      "step": 2977820
    },
    {
      "epoch": 4.873300471972925,
      "grad_norm": 0.28231281042099,
      "learning_rate": 2.550687585248051e-07,
      "loss": 0.0089,
      "step": 2977840
    },
    {
      "epoch": 4.873333202411579,
      "grad_norm": 0.22653020918369293,
      "learning_rate": 2.55002866311288e-07,
      "loss": 0.0058,
      "step": 2977860
    },
    {
      "epoch": 4.873365932850232,
      "grad_norm": 0.37721359729766846,
      "learning_rate": 2.549369740977709e-07,
      "loss": 0.013,
      "step": 2977880
    },
    {
      "epoch": 4.873398663288885,
      "grad_norm": 0.3702906370162964,
      "learning_rate": 2.5487108188425375e-07,
      "loss": 0.0099,
      "step": 2977900
    },
    {
      "epoch": 4.873431393727539,
      "grad_norm": 0.1543670892715454,
      "learning_rate": 2.5480518967073665e-07,
      "loss": 0.0087,
      "step": 2977920
    },
    {
      "epoch": 4.873464124166192,
      "grad_norm": 0.25325751304626465,
      "learning_rate": 2.547392974572195e-07,
      "loss": 0.0084,
      "step": 2977940
    },
    {
      "epoch": 4.873496854604845,
      "grad_norm": 0.13743610680103302,
      "learning_rate": 2.546734052437024e-07,
      "loss": 0.0106,
      "step": 2977960
    },
    {
      "epoch": 4.8735295850434985,
      "grad_norm": 0.24595680832862854,
      "learning_rate": 2.5460751303018524e-07,
      "loss": 0.0091,
      "step": 2977980
    },
    {
      "epoch": 4.873562315482152,
      "grad_norm": 0.315371572971344,
      "learning_rate": 2.5454162081666814e-07,
      "loss": 0.0112,
      "step": 2978000
    },
    {
      "epoch": 4.873595045920806,
      "grad_norm": 0.3169068396091461,
      "learning_rate": 2.54475728603151e-07,
      "loss": 0.0045,
      "step": 2978020
    },
    {
      "epoch": 4.873627776359458,
      "grad_norm": 0.16200503706932068,
      "learning_rate": 2.5440983638963383e-07,
      "loss": 0.009,
      "step": 2978040
    },
    {
      "epoch": 4.873660506798112,
      "grad_norm": 0.07460090517997742,
      "learning_rate": 2.5434394417611673e-07,
      "loss": 0.0085,
      "step": 2978060
    },
    {
      "epoch": 4.873693237236766,
      "grad_norm": 0.09270059317350388,
      "learning_rate": 2.542780519625996e-07,
      "loss": 0.0072,
      "step": 2978080
    },
    {
      "epoch": 4.873725967675419,
      "grad_norm": 0.11076920479536057,
      "learning_rate": 2.542121597490825e-07,
      "loss": 0.0059,
      "step": 2978100
    },
    {
      "epoch": 4.873758698114072,
      "grad_norm": 0.21383066475391388,
      "learning_rate": 2.541462675355653e-07,
      "loss": 0.0074,
      "step": 2978120
    },
    {
      "epoch": 4.8737914285527255,
      "grad_norm": 0.0551939494907856,
      "learning_rate": 2.540803753220482e-07,
      "loss": 0.0078,
      "step": 2978140
    },
    {
      "epoch": 4.873824158991379,
      "grad_norm": 0.10860501974821091,
      "learning_rate": 2.540144831085311e-07,
      "loss": 0.0129,
      "step": 2978160
    },
    {
      "epoch": 4.873856889430032,
      "grad_norm": 0.15395456552505493,
      "learning_rate": 2.5394859089501396e-07,
      "loss": 0.008,
      "step": 2978180
    },
    {
      "epoch": 4.873889619868685,
      "grad_norm": 0.29978159070014954,
      "learning_rate": 2.5388269868149686e-07,
      "loss": 0.008,
      "step": 2978200
    },
    {
      "epoch": 4.873922350307339,
      "grad_norm": 0.22343987226486206,
      "learning_rate": 2.538168064679797e-07,
      "loss": 0.0089,
      "step": 2978220
    },
    {
      "epoch": 4.873955080745992,
      "grad_norm": 0.5260987281799316,
      "learning_rate": 2.5375091425446255e-07,
      "loss": 0.0088,
      "step": 2978240
    },
    {
      "epoch": 4.873987811184645,
      "grad_norm": 0.1333930790424347,
      "learning_rate": 2.5368502204094545e-07,
      "loss": 0.0087,
      "step": 2978260
    },
    {
      "epoch": 4.874020541623299,
      "grad_norm": 0.4974242150783539,
      "learning_rate": 2.536191298274283e-07,
      "loss": 0.0081,
      "step": 2978280
    },
    {
      "epoch": 4.8740532720619525,
      "grad_norm": 0.23195013403892517,
      "learning_rate": 2.535532376139112e-07,
      "loss": 0.0094,
      "step": 2978300
    },
    {
      "epoch": 4.874086002500605,
      "grad_norm": 0.11064247786998749,
      "learning_rate": 2.5348734540039404e-07,
      "loss": 0.0081,
      "step": 2978320
    },
    {
      "epoch": 4.874118732939259,
      "grad_norm": 0.2704833149909973,
      "learning_rate": 2.5342145318687694e-07,
      "loss": 0.0068,
      "step": 2978340
    },
    {
      "epoch": 4.874151463377912,
      "grad_norm": 0.28315088152885437,
      "learning_rate": 2.533555609733598e-07,
      "loss": 0.0095,
      "step": 2978360
    },
    {
      "epoch": 4.874184193816566,
      "grad_norm": 0.16990475356578827,
      "learning_rate": 2.532896687598427e-07,
      "loss": 0.005,
      "step": 2978380
    },
    {
      "epoch": 4.874216924255219,
      "grad_norm": 0.2726764380931854,
      "learning_rate": 2.5322377654632553e-07,
      "loss": 0.0083,
      "step": 2978400
    },
    {
      "epoch": 4.874249654693872,
      "grad_norm": 0.1823383867740631,
      "learning_rate": 2.5315788433280843e-07,
      "loss": 0.008,
      "step": 2978420
    },
    {
      "epoch": 4.874282385132526,
      "grad_norm": 0.3774617612361908,
      "learning_rate": 2.530919921192913e-07,
      "loss": 0.0105,
      "step": 2978440
    },
    {
      "epoch": 4.874315115571179,
      "grad_norm": 0.2730368971824646,
      "learning_rate": 2.530260999057741e-07,
      "loss": 0.0122,
      "step": 2978460
    },
    {
      "epoch": 4.874347846009832,
      "grad_norm": 0.07216149568557739,
      "learning_rate": 2.52960207692257e-07,
      "loss": 0.0135,
      "step": 2978480
    },
    {
      "epoch": 4.874380576448486,
      "grad_norm": 0.2500540614128113,
      "learning_rate": 2.5289431547873986e-07,
      "loss": 0.0072,
      "step": 2978500
    },
    {
      "epoch": 4.8744133068871385,
      "grad_norm": 0.2028142660856247,
      "learning_rate": 2.5282842326522276e-07,
      "loss": 0.0071,
      "step": 2978520
    },
    {
      "epoch": 4.874446037325792,
      "grad_norm": 0.14896206557750702,
      "learning_rate": 2.5276253105170566e-07,
      "loss": 0.0093,
      "step": 2978540
    },
    {
      "epoch": 4.874478767764446,
      "grad_norm": 0.11620979011058807,
      "learning_rate": 2.526966388381885e-07,
      "loss": 0.011,
      "step": 2978560
    },
    {
      "epoch": 4.874511498203099,
      "grad_norm": 0.3899555206298828,
      "learning_rate": 2.526307466246714e-07,
      "loss": 0.0043,
      "step": 2978580
    },
    {
      "epoch": 4.874544228641752,
      "grad_norm": 0.15026918053627014,
      "learning_rate": 2.5256485441115425e-07,
      "loss": 0.0064,
      "step": 2978600
    },
    {
      "epoch": 4.874576959080406,
      "grad_norm": 0.14403337240219116,
      "learning_rate": 2.5249896219763715e-07,
      "loss": 0.0087,
      "step": 2978620
    },
    {
      "epoch": 4.874609689519059,
      "grad_norm": 0.2595601975917816,
      "learning_rate": 2.5243306998412e-07,
      "loss": 0.0114,
      "step": 2978640
    },
    {
      "epoch": 4.874642419957713,
      "grad_norm": 0.37102749943733215,
      "learning_rate": 2.523671777706029e-07,
      "loss": 0.0083,
      "step": 2978660
    },
    {
      "epoch": 4.8746751503963655,
      "grad_norm": 0.07937426120042801,
      "learning_rate": 2.5230128555708574e-07,
      "loss": 0.0097,
      "step": 2978680
    },
    {
      "epoch": 4.874707880835019,
      "grad_norm": 0.15066418051719666,
      "learning_rate": 2.522353933435686e-07,
      "loss": 0.0062,
      "step": 2978700
    },
    {
      "epoch": 4.874740611273673,
      "grad_norm": 0.25769877433776855,
      "learning_rate": 2.521695011300515e-07,
      "loss": 0.0094,
      "step": 2978720
    },
    {
      "epoch": 4.874773341712325,
      "grad_norm": 0.3242827355861664,
      "learning_rate": 2.5210360891653433e-07,
      "loss": 0.0078,
      "step": 2978740
    },
    {
      "epoch": 4.874806072150979,
      "grad_norm": 0.2090320736169815,
      "learning_rate": 2.5203771670301723e-07,
      "loss": 0.0086,
      "step": 2978760
    },
    {
      "epoch": 4.8748388025896325,
      "grad_norm": 0.16366715729236603,
      "learning_rate": 2.519718244895001e-07,
      "loss": 0.0095,
      "step": 2978780
    },
    {
      "epoch": 4.874871533028285,
      "grad_norm": 0.18011131882667542,
      "learning_rate": 2.5190593227598297e-07,
      "loss": 0.0079,
      "step": 2978800
    },
    {
      "epoch": 4.874904263466939,
      "grad_norm": 0.1440035104751587,
      "learning_rate": 2.5184004006246587e-07,
      "loss": 0.0108,
      "step": 2978820
    },
    {
      "epoch": 4.874936993905592,
      "grad_norm": 0.37426429986953735,
      "learning_rate": 2.517741478489487e-07,
      "loss": 0.0131,
      "step": 2978840
    },
    {
      "epoch": 4.874969724344246,
      "grad_norm": 0.404158353805542,
      "learning_rate": 2.517082556354316e-07,
      "loss": 0.0078,
      "step": 2978860
    },
    {
      "epoch": 4.875002454782899,
      "grad_norm": 0.3450871706008911,
      "learning_rate": 2.5164236342191446e-07,
      "loss": 0.0056,
      "step": 2978880
    },
    {
      "epoch": 4.875035185221552,
      "grad_norm": 0.3936315178871155,
      "learning_rate": 2.515764712083973e-07,
      "loss": 0.0092,
      "step": 2978900
    },
    {
      "epoch": 4.875067915660206,
      "grad_norm": 0.3012465536594391,
      "learning_rate": 2.515105789948802e-07,
      "loss": 0.0072,
      "step": 2978920
    },
    {
      "epoch": 4.8751006460988595,
      "grad_norm": 0.22088903188705444,
      "learning_rate": 2.5144468678136305e-07,
      "loss": 0.0096,
      "step": 2978940
    },
    {
      "epoch": 4.875133376537512,
      "grad_norm": 0.7249981164932251,
      "learning_rate": 2.5137879456784595e-07,
      "loss": 0.0116,
      "step": 2978960
    },
    {
      "epoch": 4.875166106976166,
      "grad_norm": 0.10011444240808487,
      "learning_rate": 2.513129023543288e-07,
      "loss": 0.0088,
      "step": 2978980
    },
    {
      "epoch": 4.875198837414819,
      "grad_norm": 0.25219297409057617,
      "learning_rate": 2.512470101408117e-07,
      "loss": 0.0119,
      "step": 2979000
    },
    {
      "epoch": 4.875231567853472,
      "grad_norm": 0.5402117371559143,
      "learning_rate": 2.5118111792729454e-07,
      "loss": 0.0088,
      "step": 2979020
    },
    {
      "epoch": 4.875264298292126,
      "grad_norm": 0.09019343554973602,
      "learning_rate": 2.5111522571377744e-07,
      "loss": 0.0085,
      "step": 2979040
    },
    {
      "epoch": 4.875297028730779,
      "grad_norm": 0.1712462604045868,
      "learning_rate": 2.510493335002603e-07,
      "loss": 0.0044,
      "step": 2979060
    },
    {
      "epoch": 4.875329759169432,
      "grad_norm": 0.14753979444503784,
      "learning_rate": 2.509834412867432e-07,
      "loss": 0.0082,
      "step": 2979080
    },
    {
      "epoch": 4.875362489608086,
      "grad_norm": 0.08715608716011047,
      "learning_rate": 2.5091754907322603e-07,
      "loss": 0.0069,
      "step": 2979100
    },
    {
      "epoch": 4.875395220046739,
      "grad_norm": 0.1823815405368805,
      "learning_rate": 2.5085165685970887e-07,
      "loss": 0.0061,
      "step": 2979120
    },
    {
      "epoch": 4.875427950485393,
      "grad_norm": 0.18143150210380554,
      "learning_rate": 2.5078576464619177e-07,
      "loss": 0.0104,
      "step": 2979140
    },
    {
      "epoch": 4.8754606809240455,
      "grad_norm": 0.085673987865448,
      "learning_rate": 2.507198724326746e-07,
      "loss": 0.0078,
      "step": 2979160
    },
    {
      "epoch": 4.875493411362699,
      "grad_norm": 0.0961107686161995,
      "learning_rate": 2.506539802191575e-07,
      "loss": 0.0078,
      "step": 2979180
    },
    {
      "epoch": 4.875526141801353,
      "grad_norm": 0.37919414043426514,
      "learning_rate": 2.505880880056404e-07,
      "loss": 0.0107,
      "step": 2979200
    },
    {
      "epoch": 4.875558872240005,
      "grad_norm": 0.24049168825149536,
      "learning_rate": 2.5052219579212326e-07,
      "loss": 0.0058,
      "step": 2979220
    },
    {
      "epoch": 4.875591602678659,
      "grad_norm": 0.1868581771850586,
      "learning_rate": 2.5045630357860616e-07,
      "loss": 0.0062,
      "step": 2979240
    },
    {
      "epoch": 4.875624333117313,
      "grad_norm": 0.3175710141658783,
      "learning_rate": 2.50390411365089e-07,
      "loss": 0.0048,
      "step": 2979260
    },
    {
      "epoch": 4.875657063555966,
      "grad_norm": 0.21013253927230835,
      "learning_rate": 2.503245191515719e-07,
      "loss": 0.0069,
      "step": 2979280
    },
    {
      "epoch": 4.875689793994619,
      "grad_norm": 0.08789859712123871,
      "learning_rate": 2.5025862693805475e-07,
      "loss": 0.0101,
      "step": 2979300
    },
    {
      "epoch": 4.8757225244332725,
      "grad_norm": 0.24666562676429749,
      "learning_rate": 2.5019273472453765e-07,
      "loss": 0.0113,
      "step": 2979320
    },
    {
      "epoch": 4.875755254871926,
      "grad_norm": 0.15223337709903717,
      "learning_rate": 2.501268425110205e-07,
      "loss": 0.0113,
      "step": 2979340
    },
    {
      "epoch": 4.875787985310579,
      "grad_norm": 0.17092983424663544,
      "learning_rate": 2.5006095029750334e-07,
      "loss": 0.0129,
      "step": 2979360
    },
    {
      "epoch": 4.875820715749232,
      "grad_norm": 0.17757640779018402,
      "learning_rate": 2.4999505808398624e-07,
      "loss": 0.0131,
      "step": 2979380
    },
    {
      "epoch": 4.875853446187886,
      "grad_norm": 0.4526233673095703,
      "learning_rate": 2.499291658704691e-07,
      "loss": 0.0093,
      "step": 2979400
    },
    {
      "epoch": 4.875886176626539,
      "grad_norm": 0.03555192053318024,
      "learning_rate": 2.49863273656952e-07,
      "loss": 0.0056,
      "step": 2979420
    },
    {
      "epoch": 4.875918907065192,
      "grad_norm": 0.22180554270744324,
      "learning_rate": 2.497973814434348e-07,
      "loss": 0.0067,
      "step": 2979440
    },
    {
      "epoch": 4.875951637503846,
      "grad_norm": 0.19426988065242767,
      "learning_rate": 2.497314892299177e-07,
      "loss": 0.0085,
      "step": 2979460
    },
    {
      "epoch": 4.8759843679424995,
      "grad_norm": 0.10506373643875122,
      "learning_rate": 2.496655970164006e-07,
      "loss": 0.0103,
      "step": 2979480
    },
    {
      "epoch": 4.876017098381152,
      "grad_norm": 0.13528399169445038,
      "learning_rate": 2.4959970480288347e-07,
      "loss": 0.0125,
      "step": 2979500
    },
    {
      "epoch": 4.876049828819806,
      "grad_norm": 0.29480379819869995,
      "learning_rate": 2.4953381258936637e-07,
      "loss": 0.0121,
      "step": 2979520
    },
    {
      "epoch": 4.876082559258459,
      "grad_norm": 0.40909484028816223,
      "learning_rate": 2.494679203758492e-07,
      "loss": 0.0087,
      "step": 2979540
    },
    {
      "epoch": 4.876115289697113,
      "grad_norm": 0.08283913880586624,
      "learning_rate": 2.4940202816233206e-07,
      "loss": 0.0089,
      "step": 2979560
    },
    {
      "epoch": 4.876148020135766,
      "grad_norm": 0.09508207440376282,
      "learning_rate": 2.4933613594881496e-07,
      "loss": 0.0075,
      "step": 2979580
    },
    {
      "epoch": 4.876180750574419,
      "grad_norm": 0.10416298359632492,
      "learning_rate": 2.492702437352978e-07,
      "loss": 0.0076,
      "step": 2979600
    },
    {
      "epoch": 4.876213481013073,
      "grad_norm": 0.22439990937709808,
      "learning_rate": 2.492043515217807e-07,
      "loss": 0.0099,
      "step": 2979620
    },
    {
      "epoch": 4.8762462114517255,
      "grad_norm": 0.504030168056488,
      "learning_rate": 2.4913845930826355e-07,
      "loss": 0.0108,
      "step": 2979640
    },
    {
      "epoch": 4.876278941890379,
      "grad_norm": 0.13357646763324738,
      "learning_rate": 2.4907256709474645e-07,
      "loss": 0.0094,
      "step": 2979660
    },
    {
      "epoch": 4.876311672329033,
      "grad_norm": 0.5757948160171509,
      "learning_rate": 2.490066748812293e-07,
      "loss": 0.011,
      "step": 2979680
    },
    {
      "epoch": 4.876344402767685,
      "grad_norm": 0.2698615491390228,
      "learning_rate": 2.489407826677122e-07,
      "loss": 0.0086,
      "step": 2979700
    },
    {
      "epoch": 4.876377133206339,
      "grad_norm": 0.1769709438085556,
      "learning_rate": 2.4887489045419504e-07,
      "loss": 0.0107,
      "step": 2979720
    },
    {
      "epoch": 4.876409863644993,
      "grad_norm": 0.5799171328544617,
      "learning_rate": 2.4880899824067793e-07,
      "loss": 0.0063,
      "step": 2979740
    },
    {
      "epoch": 4.876442594083646,
      "grad_norm": 0.25447407364845276,
      "learning_rate": 2.4874310602716083e-07,
      "loss": 0.0068,
      "step": 2979760
    },
    {
      "epoch": 4.876475324522299,
      "grad_norm": 0.16597798466682434,
      "learning_rate": 2.486772138136436e-07,
      "loss": 0.0084,
      "step": 2979780
    },
    {
      "epoch": 4.8765080549609525,
      "grad_norm": 0.08876105397939682,
      "learning_rate": 2.486113216001265e-07,
      "loss": 0.0068,
      "step": 2979800
    },
    {
      "epoch": 4.876540785399606,
      "grad_norm": 0.11562392115592957,
      "learning_rate": 2.4854542938660937e-07,
      "loss": 0.0093,
      "step": 2979820
    },
    {
      "epoch": 4.87657351583826,
      "grad_norm": 0.12089255452156067,
      "learning_rate": 2.4847953717309227e-07,
      "loss": 0.0101,
      "step": 2979840
    },
    {
      "epoch": 4.876606246276912,
      "grad_norm": 0.5510668158531189,
      "learning_rate": 2.4841364495957517e-07,
      "loss": 0.0142,
      "step": 2979860
    },
    {
      "epoch": 4.876638976715566,
      "grad_norm": 0.10266754776239395,
      "learning_rate": 2.48347752746058e-07,
      "loss": 0.0094,
      "step": 2979880
    },
    {
      "epoch": 4.87667170715422,
      "grad_norm": 0.08655276894569397,
      "learning_rate": 2.482818605325409e-07,
      "loss": 0.0084,
      "step": 2979900
    },
    {
      "epoch": 4.876704437592872,
      "grad_norm": 0.20545415580272675,
      "learning_rate": 2.4821596831902376e-07,
      "loss": 0.0147,
      "step": 2979920
    },
    {
      "epoch": 4.876737168031526,
      "grad_norm": 0.20400041341781616,
      "learning_rate": 2.4815007610550666e-07,
      "loss": 0.01,
      "step": 2979940
    },
    {
      "epoch": 4.8767698984701795,
      "grad_norm": 0.13994742929935455,
      "learning_rate": 2.480841838919895e-07,
      "loss": 0.006,
      "step": 2979960
    },
    {
      "epoch": 4.876802628908832,
      "grad_norm": 0.18325339257717133,
      "learning_rate": 2.480182916784724e-07,
      "loss": 0.0065,
      "step": 2979980
    },
    {
      "epoch": 4.876835359347486,
      "grad_norm": 0.28169772028923035,
      "learning_rate": 2.4795239946495524e-07,
      "loss": 0.0107,
      "step": 2980000
    },
    {
      "epoch": 4.876868089786139,
      "grad_norm": 0.19613951444625854,
      "learning_rate": 2.478865072514381e-07,
      "loss": 0.0093,
      "step": 2980020
    },
    {
      "epoch": 4.876900820224793,
      "grad_norm": 0.21001820266246796,
      "learning_rate": 2.47820615037921e-07,
      "loss": 0.0064,
      "step": 2980040
    },
    {
      "epoch": 4.876933550663446,
      "grad_norm": 0.08866991102695465,
      "learning_rate": 2.4775472282440383e-07,
      "loss": 0.011,
      "step": 2980060
    },
    {
      "epoch": 4.876966281102099,
      "grad_norm": 0.10280097275972366,
      "learning_rate": 2.4768883061088673e-07,
      "loss": 0.0105,
      "step": 2980080
    },
    {
      "epoch": 4.876999011540753,
      "grad_norm": 0.1589762568473816,
      "learning_rate": 2.476229383973696e-07,
      "loss": 0.0068,
      "step": 2980100
    },
    {
      "epoch": 4.8770317419794065,
      "grad_norm": 0.2988801598548889,
      "learning_rate": 2.475570461838525e-07,
      "loss": 0.0097,
      "step": 2980120
    },
    {
      "epoch": 4.877064472418059,
      "grad_norm": 0.5010254383087158,
      "learning_rate": 2.474911539703354e-07,
      "loss": 0.0112,
      "step": 2980140
    },
    {
      "epoch": 4.877097202856713,
      "grad_norm": 0.0888252928853035,
      "learning_rate": 2.474252617568182e-07,
      "loss": 0.0076,
      "step": 2980160
    },
    {
      "epoch": 4.877129933295366,
      "grad_norm": 0.5165941715240479,
      "learning_rate": 2.473593695433011e-07,
      "loss": 0.0078,
      "step": 2980180
    },
    {
      "epoch": 4.877162663734019,
      "grad_norm": 0.5112105011940002,
      "learning_rate": 2.4729347732978397e-07,
      "loss": 0.0077,
      "step": 2980200
    },
    {
      "epoch": 4.877195394172673,
      "grad_norm": 0.08008722960948944,
      "learning_rate": 2.472275851162668e-07,
      "loss": 0.0089,
      "step": 2980220
    },
    {
      "epoch": 4.877228124611326,
      "grad_norm": 0.12470460683107376,
      "learning_rate": 2.471616929027497e-07,
      "loss": 0.0089,
      "step": 2980240
    },
    {
      "epoch": 4.877260855049979,
      "grad_norm": 0.408695250749588,
      "learning_rate": 2.4709580068923256e-07,
      "loss": 0.0094,
      "step": 2980260
    },
    {
      "epoch": 4.877293585488633,
      "grad_norm": 0.2044762521982193,
      "learning_rate": 2.4702990847571545e-07,
      "loss": 0.0074,
      "step": 2980280
    },
    {
      "epoch": 4.877326315927286,
      "grad_norm": 0.35987916588783264,
      "learning_rate": 2.469640162621983e-07,
      "loss": 0.0072,
      "step": 2980300
    },
    {
      "epoch": 4.87735904636594,
      "grad_norm": 0.11388687044382095,
      "learning_rate": 2.468981240486812e-07,
      "loss": 0.0074,
      "step": 2980320
    },
    {
      "epoch": 4.8773917768045925,
      "grad_norm": 0.04702668637037277,
      "learning_rate": 2.4683223183516404e-07,
      "loss": 0.0077,
      "step": 2980340
    },
    {
      "epoch": 4.877424507243246,
      "grad_norm": 0.13444951176643372,
      "learning_rate": 2.4676633962164694e-07,
      "loss": 0.0084,
      "step": 2980360
    },
    {
      "epoch": 4.8774572376819,
      "grad_norm": 0.18306633830070496,
      "learning_rate": 2.467004474081298e-07,
      "loss": 0.012,
      "step": 2980380
    },
    {
      "epoch": 4.877489968120553,
      "grad_norm": 0.1499456763267517,
      "learning_rate": 2.466345551946127e-07,
      "loss": 0.0057,
      "step": 2980400
    },
    {
      "epoch": 4.877522698559206,
      "grad_norm": 0.09074506163597107,
      "learning_rate": 2.465686629810956e-07,
      "loss": 0.0074,
      "step": 2980420
    },
    {
      "epoch": 4.8775554289978595,
      "grad_norm": 0.24714688956737518,
      "learning_rate": 2.465027707675784e-07,
      "loss": 0.0103,
      "step": 2980440
    },
    {
      "epoch": 4.877588159436513,
      "grad_norm": 0.15072445571422577,
      "learning_rate": 2.464368785540613e-07,
      "loss": 0.0089,
      "step": 2980460
    },
    {
      "epoch": 4.877620889875166,
      "grad_norm": 0.6461003422737122,
      "learning_rate": 2.463709863405441e-07,
      "loss": 0.0128,
      "step": 2980480
    },
    {
      "epoch": 4.877653620313819,
      "grad_norm": 0.18487298488616943,
      "learning_rate": 2.46305094127027e-07,
      "loss": 0.009,
      "step": 2980500
    },
    {
      "epoch": 4.877686350752473,
      "grad_norm": 0.11126559227705002,
      "learning_rate": 2.462392019135099e-07,
      "loss": 0.0151,
      "step": 2980520
    },
    {
      "epoch": 4.877719081191126,
      "grad_norm": 0.33391159772872925,
      "learning_rate": 2.4617330969999277e-07,
      "loss": 0.0122,
      "step": 2980540
    },
    {
      "epoch": 4.877751811629779,
      "grad_norm": 0.27424731850624084,
      "learning_rate": 2.4610741748647566e-07,
      "loss": 0.0059,
      "step": 2980560
    },
    {
      "epoch": 4.877784542068433,
      "grad_norm": 0.38337329030036926,
      "learning_rate": 2.460415252729585e-07,
      "loss": 0.0086,
      "step": 2980580
    },
    {
      "epoch": 4.8778172725070865,
      "grad_norm": 0.07247576117515564,
      "learning_rate": 2.459756330594414e-07,
      "loss": 0.0067,
      "step": 2980600
    },
    {
      "epoch": 4.877850002945739,
      "grad_norm": 0.08513256162405014,
      "learning_rate": 2.4590974084592425e-07,
      "loss": 0.0082,
      "step": 2980620
    },
    {
      "epoch": 4.877882733384393,
      "grad_norm": 0.21513384580612183,
      "learning_rate": 2.4584384863240715e-07,
      "loss": 0.0102,
      "step": 2980640
    },
    {
      "epoch": 4.877915463823046,
      "grad_norm": 0.06551908701658249,
      "learning_rate": 2.4577795641889e-07,
      "loss": 0.0067,
      "step": 2980660
    },
    {
      "epoch": 4.877948194261699,
      "grad_norm": 0.27881887555122375,
      "learning_rate": 2.4571206420537284e-07,
      "loss": 0.0062,
      "step": 2980680
    },
    {
      "epoch": 4.877980924700353,
      "grad_norm": 0.13029196858406067,
      "learning_rate": 2.4564617199185574e-07,
      "loss": 0.0098,
      "step": 2980700
    },
    {
      "epoch": 4.878013655139006,
      "grad_norm": 0.3971535563468933,
      "learning_rate": 2.455802797783386e-07,
      "loss": 0.0099,
      "step": 2980720
    },
    {
      "epoch": 4.87804638557766,
      "grad_norm": 0.3165484070777893,
      "learning_rate": 2.455143875648215e-07,
      "loss": 0.0113,
      "step": 2980740
    },
    {
      "epoch": 4.878079116016313,
      "grad_norm": 0.16971004009246826,
      "learning_rate": 2.4544849535130433e-07,
      "loss": 0.0056,
      "step": 2980760
    },
    {
      "epoch": 4.878111846454966,
      "grad_norm": 0.12118789553642273,
      "learning_rate": 2.4538260313778723e-07,
      "loss": 0.0099,
      "step": 2980780
    },
    {
      "epoch": 4.87814457689362,
      "grad_norm": 0.2512423098087311,
      "learning_rate": 2.4531671092427013e-07,
      "loss": 0.01,
      "step": 2980800
    },
    {
      "epoch": 4.8781773073322725,
      "grad_norm": 0.0763263925909996,
      "learning_rate": 2.45250818710753e-07,
      "loss": 0.0054,
      "step": 2980820
    },
    {
      "epoch": 4.878210037770926,
      "grad_norm": 0.3086966276168823,
      "learning_rate": 2.4518492649723587e-07,
      "loss": 0.0068,
      "step": 2980840
    },
    {
      "epoch": 4.87824276820958,
      "grad_norm": 0.14005069434642792,
      "learning_rate": 2.451190342837187e-07,
      "loss": 0.0059,
      "step": 2980860
    },
    {
      "epoch": 4.878275498648232,
      "grad_norm": 0.16137473285198212,
      "learning_rate": 2.4505314207020156e-07,
      "loss": 0.0068,
      "step": 2980880
    },
    {
      "epoch": 4.878308229086886,
      "grad_norm": 0.24399283528327942,
      "learning_rate": 2.4498724985668446e-07,
      "loss": 0.0068,
      "step": 2980900
    },
    {
      "epoch": 4.87834095952554,
      "grad_norm": 0.21026241779327393,
      "learning_rate": 2.449213576431673e-07,
      "loss": 0.0063,
      "step": 2980920
    },
    {
      "epoch": 4.878373689964193,
      "grad_norm": 0.10676512122154236,
      "learning_rate": 2.448554654296502e-07,
      "loss": 0.014,
      "step": 2980940
    },
    {
      "epoch": 4.878406420402846,
      "grad_norm": 0.20188069343566895,
      "learning_rate": 2.4478957321613305e-07,
      "loss": 0.0059,
      "step": 2980960
    },
    {
      "epoch": 4.8784391508414995,
      "grad_norm": 0.08942439407110214,
      "learning_rate": 2.4472368100261595e-07,
      "loss": 0.0076,
      "step": 2980980
    },
    {
      "epoch": 4.878471881280153,
      "grad_norm": 0.3109988868236542,
      "learning_rate": 2.446577887890988e-07,
      "loss": 0.0106,
      "step": 2981000
    },
    {
      "epoch": 4.878504611718807,
      "grad_norm": 0.33307814598083496,
      "learning_rate": 2.445918965755817e-07,
      "loss": 0.0081,
      "step": 2981020
    },
    {
      "epoch": 4.878537342157459,
      "grad_norm": 0.1310957670211792,
      "learning_rate": 2.4452600436206454e-07,
      "loss": 0.0096,
      "step": 2981040
    },
    {
      "epoch": 4.878570072596113,
      "grad_norm": 0.12296126037836075,
      "learning_rate": 2.4446011214854744e-07,
      "loss": 0.0077,
      "step": 2981060
    },
    {
      "epoch": 4.878602803034767,
      "grad_norm": 0.2323615550994873,
      "learning_rate": 2.4439421993503034e-07,
      "loss": 0.0078,
      "step": 2981080
    },
    {
      "epoch": 4.878635533473419,
      "grad_norm": 0.1405099481344223,
      "learning_rate": 2.4432832772151313e-07,
      "loss": 0.0088,
      "step": 2981100
    },
    {
      "epoch": 4.878668263912073,
      "grad_norm": 0.2527371346950531,
      "learning_rate": 2.4426243550799603e-07,
      "loss": 0.0147,
      "step": 2981120
    },
    {
      "epoch": 4.8787009943507265,
      "grad_norm": 0.35148054361343384,
      "learning_rate": 2.441965432944789e-07,
      "loss": 0.011,
      "step": 2981140
    },
    {
      "epoch": 4.878733724789379,
      "grad_norm": 0.22304807603359222,
      "learning_rate": 2.441306510809618e-07,
      "loss": 0.0078,
      "step": 2981160
    },
    {
      "epoch": 4.878766455228033,
      "grad_norm": 0.19004102051258087,
      "learning_rate": 2.4406475886744467e-07,
      "loss": 0.0095,
      "step": 2981180
    },
    {
      "epoch": 4.878799185666686,
      "grad_norm": 0.18588383495807648,
      "learning_rate": 2.439988666539275e-07,
      "loss": 0.0069,
      "step": 2981200
    },
    {
      "epoch": 4.87883191610534,
      "grad_norm": 0.1450946033000946,
      "learning_rate": 2.439329744404104e-07,
      "loss": 0.0058,
      "step": 2981220
    },
    {
      "epoch": 4.878864646543993,
      "grad_norm": 0.15143324434757233,
      "learning_rate": 2.4386708222689326e-07,
      "loss": 0.0109,
      "step": 2981240
    },
    {
      "epoch": 4.878897376982646,
      "grad_norm": 0.21240073442459106,
      "learning_rate": 2.4380119001337616e-07,
      "loss": 0.0057,
      "step": 2981260
    },
    {
      "epoch": 4.8789301074213,
      "grad_norm": 0.0648191049695015,
      "learning_rate": 2.43735297799859e-07,
      "loss": 0.0061,
      "step": 2981280
    },
    {
      "epoch": 4.878962837859953,
      "grad_norm": 0.23136408627033234,
      "learning_rate": 2.436694055863419e-07,
      "loss": 0.0072,
      "step": 2981300
    },
    {
      "epoch": 4.878995568298606,
      "grad_norm": 0.21324050426483154,
      "learning_rate": 2.4360351337282475e-07,
      "loss": 0.0084,
      "step": 2981320
    },
    {
      "epoch": 4.87902829873726,
      "grad_norm": 0.2695453464984894,
      "learning_rate": 2.435376211593076e-07,
      "loss": 0.0056,
      "step": 2981340
    },
    {
      "epoch": 4.879061029175913,
      "grad_norm": 0.13589683175086975,
      "learning_rate": 2.434717289457905e-07,
      "loss": 0.009,
      "step": 2981360
    },
    {
      "epoch": 4.879093759614566,
      "grad_norm": 0.1969769150018692,
      "learning_rate": 2.4340583673227334e-07,
      "loss": 0.0103,
      "step": 2981380
    },
    {
      "epoch": 4.87912649005322,
      "grad_norm": 0.17901499569416046,
      "learning_rate": 2.4333994451875624e-07,
      "loss": 0.0088,
      "step": 2981400
    },
    {
      "epoch": 4.879159220491873,
      "grad_norm": 0.41585952043533325,
      "learning_rate": 2.432740523052391e-07,
      "loss": 0.0115,
      "step": 2981420
    },
    {
      "epoch": 4.879191950930526,
      "grad_norm": 0.19506391882896423,
      "learning_rate": 2.43208160091722e-07,
      "loss": 0.0121,
      "step": 2981440
    },
    {
      "epoch": 4.8792246813691795,
      "grad_norm": 0.3729628622531891,
      "learning_rate": 2.431422678782049e-07,
      "loss": 0.012,
      "step": 2981460
    },
    {
      "epoch": 4.879257411807833,
      "grad_norm": 0.5637041926383972,
      "learning_rate": 2.4307637566468773e-07,
      "loss": 0.0132,
      "step": 2981480
    },
    {
      "epoch": 4.879290142246487,
      "grad_norm": Infinity,
      "learning_rate": 2.430104834511706e-07,
      "loss": 0.0115,
      "step": 2981500
    },
    {
      "epoch": 4.879322872685139,
      "grad_norm": 0.5122292041778564,
      "learning_rate": 2.4294459123765347e-07,
      "loss": 0.0088,
      "step": 2981520
    },
    {
      "epoch": 4.879355603123793,
      "grad_norm": 0.29763123393058777,
      "learning_rate": 2.428786990241363e-07,
      "loss": 0.0081,
      "step": 2981540
    },
    {
      "epoch": 4.879388333562447,
      "grad_norm": 0.33404964208602905,
      "learning_rate": 2.428128068106192e-07,
      "loss": 0.0074,
      "step": 2981560
    },
    {
      "epoch": 4.8794210640011,
      "grad_norm": 0.2509804368019104,
      "learning_rate": 2.4274691459710206e-07,
      "loss": 0.0092,
      "step": 2981580
    },
    {
      "epoch": 4.879453794439753,
      "grad_norm": 0.20697806775569916,
      "learning_rate": 2.4268102238358496e-07,
      "loss": 0.0096,
      "step": 2981600
    },
    {
      "epoch": 4.8794865248784065,
      "grad_norm": 0.26913750171661377,
      "learning_rate": 2.426151301700678e-07,
      "loss": 0.008,
      "step": 2981620
    },
    {
      "epoch": 4.87951925531706,
      "grad_norm": 0.3338887691497803,
      "learning_rate": 2.425492379565507e-07,
      "loss": 0.011,
      "step": 2981640
    },
    {
      "epoch": 4.879551985755713,
      "grad_norm": 0.1793706864118576,
      "learning_rate": 2.4248334574303355e-07,
      "loss": 0.0085,
      "step": 2981660
    },
    {
      "epoch": 4.879584716194366,
      "grad_norm": 0.13796979188919067,
      "learning_rate": 2.4241745352951645e-07,
      "loss": 0.0095,
      "step": 2981680
    },
    {
      "epoch": 4.87961744663302,
      "grad_norm": 0.15479792654514313,
      "learning_rate": 2.423515613159993e-07,
      "loss": 0.0108,
      "step": 2981700
    },
    {
      "epoch": 4.879650177071673,
      "grad_norm": 0.09393756836652756,
      "learning_rate": 2.422856691024822e-07,
      "loss": 0.009,
      "step": 2981720
    },
    {
      "epoch": 4.879682907510326,
      "grad_norm": 0.2712618410587311,
      "learning_rate": 2.422197768889651e-07,
      "loss": 0.008,
      "step": 2981740
    },
    {
      "epoch": 4.87971563794898,
      "grad_norm": 0.17298771440982819,
      "learning_rate": 2.4215388467544794e-07,
      "loss": 0.0067,
      "step": 2981760
    },
    {
      "epoch": 4.8797483683876335,
      "grad_norm": 0.1432284712791443,
      "learning_rate": 2.420879924619308e-07,
      "loss": 0.0105,
      "step": 2981780
    },
    {
      "epoch": 4.879781098826286,
      "grad_norm": 0.2791730463504791,
      "learning_rate": 2.4202210024841363e-07,
      "loss": 0.0072,
      "step": 2981800
    },
    {
      "epoch": 4.87981382926494,
      "grad_norm": 0.1400907337665558,
      "learning_rate": 2.419562080348965e-07,
      "loss": 0.0069,
      "step": 2981820
    },
    {
      "epoch": 4.879846559703593,
      "grad_norm": 0.10133448243141174,
      "learning_rate": 2.418903158213794e-07,
      "loss": 0.0079,
      "step": 2981840
    },
    {
      "epoch": 4.879879290142247,
      "grad_norm": 0.8264322280883789,
      "learning_rate": 2.4182442360786227e-07,
      "loss": 0.0111,
      "step": 2981860
    },
    {
      "epoch": 4.8799120205809,
      "grad_norm": 0.15796661376953125,
      "learning_rate": 2.4175853139434517e-07,
      "loss": 0.0065,
      "step": 2981880
    },
    {
      "epoch": 4.879944751019553,
      "grad_norm": 0.4264799952507019,
      "learning_rate": 2.41692639180828e-07,
      "loss": 0.0116,
      "step": 2981900
    },
    {
      "epoch": 4.879977481458207,
      "grad_norm": 0.1177067756652832,
      "learning_rate": 2.416267469673109e-07,
      "loss": 0.0087,
      "step": 2981920
    },
    {
      "epoch": 4.88001021189686,
      "grad_norm": 0.5022420883178711,
      "learning_rate": 2.4156085475379376e-07,
      "loss": 0.0098,
      "step": 2981940
    },
    {
      "epoch": 4.880042942335513,
      "grad_norm": 0.2778897285461426,
      "learning_rate": 2.4149496254027666e-07,
      "loss": 0.0088,
      "step": 2981960
    },
    {
      "epoch": 4.880075672774167,
      "grad_norm": 0.06483712047338486,
      "learning_rate": 2.414290703267595e-07,
      "loss": 0.0064,
      "step": 2981980
    },
    {
      "epoch": 4.8801084032128195,
      "grad_norm": 0.08386769890785217,
      "learning_rate": 2.4136317811324235e-07,
      "loss": 0.0052,
      "step": 2982000
    },
    {
      "epoch": 4.880141133651473,
      "grad_norm": 0.24181245267391205,
      "learning_rate": 2.4129728589972525e-07,
      "loss": 0.0095,
      "step": 2982020
    },
    {
      "epoch": 4.880173864090127,
      "grad_norm": 0.3238442540168762,
      "learning_rate": 2.412313936862081e-07,
      "loss": 0.0098,
      "step": 2982040
    },
    {
      "epoch": 4.88020659452878,
      "grad_norm": 0.26298192143440247,
      "learning_rate": 2.41165501472691e-07,
      "loss": 0.0062,
      "step": 2982060
    },
    {
      "epoch": 4.880239324967433,
      "grad_norm": 0.07153478264808655,
      "learning_rate": 2.4109960925917384e-07,
      "loss": 0.0111,
      "step": 2982080
    },
    {
      "epoch": 4.8802720554060866,
      "grad_norm": 0.19648216664791107,
      "learning_rate": 2.4103371704565674e-07,
      "loss": 0.0134,
      "step": 2982100
    },
    {
      "epoch": 4.88030478584474,
      "grad_norm": 0.2005302906036377,
      "learning_rate": 2.4096782483213963e-07,
      "loss": 0.01,
      "step": 2982120
    },
    {
      "epoch": 4.880337516283393,
      "grad_norm": 0.16321852803230286,
      "learning_rate": 2.409019326186225e-07,
      "loss": 0.007,
      "step": 2982140
    },
    {
      "epoch": 4.8803702467220464,
      "grad_norm": 0.06864325702190399,
      "learning_rate": 2.408360404051054e-07,
      "loss": 0.0063,
      "step": 2982160
    },
    {
      "epoch": 4.8804029771607,
      "grad_norm": 0.15681192278862,
      "learning_rate": 2.407701481915882e-07,
      "loss": 0.0072,
      "step": 2982180
    },
    {
      "epoch": 4.880435707599354,
      "grad_norm": 0.10885351896286011,
      "learning_rate": 2.407042559780711e-07,
      "loss": 0.0072,
      "step": 2982200
    },
    {
      "epoch": 4.880468438038006,
      "grad_norm": 0.2946925163269043,
      "learning_rate": 2.4063836376455397e-07,
      "loss": 0.004,
      "step": 2982220
    },
    {
      "epoch": 4.88050116847666,
      "grad_norm": 0.11639832705259323,
      "learning_rate": 2.405724715510368e-07,
      "loss": 0.0053,
      "step": 2982240
    },
    {
      "epoch": 4.8805338989153135,
      "grad_norm": 0.3353174030780792,
      "learning_rate": 2.405065793375197e-07,
      "loss": 0.0129,
      "step": 2982260
    },
    {
      "epoch": 4.880566629353966,
      "grad_norm": 0.068353071808815,
      "learning_rate": 2.4044068712400256e-07,
      "loss": 0.0083,
      "step": 2982280
    },
    {
      "epoch": 4.88059935979262,
      "grad_norm": 0.3179481029510498,
      "learning_rate": 2.4037479491048546e-07,
      "loss": 0.0086,
      "step": 2982300
    },
    {
      "epoch": 4.880632090231273,
      "grad_norm": 0.28890514373779297,
      "learning_rate": 2.403089026969683e-07,
      "loss": 0.0099,
      "step": 2982320
    },
    {
      "epoch": 4.880664820669926,
      "grad_norm": 0.1399533450603485,
      "learning_rate": 2.402430104834512e-07,
      "loss": 0.0095,
      "step": 2982340
    },
    {
      "epoch": 4.88069755110858,
      "grad_norm": 0.14002645015716553,
      "learning_rate": 2.4017711826993405e-07,
      "loss": 0.0084,
      "step": 2982360
    },
    {
      "epoch": 4.880730281547233,
      "grad_norm": 0.10682487487792969,
      "learning_rate": 2.4011122605641694e-07,
      "loss": 0.009,
      "step": 2982380
    },
    {
      "epoch": 4.880763011985887,
      "grad_norm": 0.11714588850736618,
      "learning_rate": 2.4004533384289984e-07,
      "loss": 0.0072,
      "step": 2982400
    },
    {
      "epoch": 4.88079574242454,
      "grad_norm": 0.16236357390880585,
      "learning_rate": 2.399794416293827e-07,
      "loss": 0.0084,
      "step": 2982420
    },
    {
      "epoch": 4.880828472863193,
      "grad_norm": 0.2140989750623703,
      "learning_rate": 2.3991354941586553e-07,
      "loss": 0.0095,
      "step": 2982440
    },
    {
      "epoch": 4.880861203301847,
      "grad_norm": 0.12488236278295517,
      "learning_rate": 2.398476572023484e-07,
      "loss": 0.0067,
      "step": 2982460
    },
    {
      "epoch": 4.8808939337405,
      "grad_norm": 0.14281511306762695,
      "learning_rate": 2.397817649888313e-07,
      "loss": 0.0073,
      "step": 2982480
    },
    {
      "epoch": 4.880926664179153,
      "grad_norm": 0.2836042046546936,
      "learning_rate": 2.397158727753142e-07,
      "loss": 0.0097,
      "step": 2982500
    },
    {
      "epoch": 4.880959394617807,
      "grad_norm": 0.1880740374326706,
      "learning_rate": 2.39649980561797e-07,
      "loss": 0.0083,
      "step": 2982520
    },
    {
      "epoch": 4.88099212505646,
      "grad_norm": 0.23714514076709747,
      "learning_rate": 2.395840883482799e-07,
      "loss": 0.0076,
      "step": 2982540
    },
    {
      "epoch": 4.881024855495113,
      "grad_norm": 0.1629914492368698,
      "learning_rate": 2.3951819613476277e-07,
      "loss": 0.0078,
      "step": 2982560
    },
    {
      "epoch": 4.881057585933767,
      "grad_norm": 0.6409802436828613,
      "learning_rate": 2.3945230392124567e-07,
      "loss": 0.012,
      "step": 2982580
    },
    {
      "epoch": 4.88109031637242,
      "grad_norm": 0.620994508266449,
      "learning_rate": 2.393864117077285e-07,
      "loss": 0.0091,
      "step": 2982600
    },
    {
      "epoch": 4.881123046811073,
      "grad_norm": 0.21663087606430054,
      "learning_rate": 2.393205194942114e-07,
      "loss": 0.0081,
      "step": 2982620
    },
    {
      "epoch": 4.8811557772497265,
      "grad_norm": 0.2338237166404724,
      "learning_rate": 2.3925462728069426e-07,
      "loss": 0.0068,
      "step": 2982640
    },
    {
      "epoch": 4.88118850768838,
      "grad_norm": 0.37587571144104004,
      "learning_rate": 2.391887350671771e-07,
      "loss": 0.0079,
      "step": 2982660
    },
    {
      "epoch": 4.881221238127034,
      "grad_norm": 0.19110356271266937,
      "learning_rate": 2.3912284285366e-07,
      "loss": 0.0067,
      "step": 2982680
    },
    {
      "epoch": 4.881253968565686,
      "grad_norm": 0.40238675475120544,
      "learning_rate": 2.3905695064014285e-07,
      "loss": 0.0097,
      "step": 2982700
    },
    {
      "epoch": 4.88128669900434,
      "grad_norm": 0.1397203654050827,
      "learning_rate": 2.3899105842662574e-07,
      "loss": 0.0065,
      "step": 2982720
    },
    {
      "epoch": 4.881319429442994,
      "grad_norm": 0.27888208627700806,
      "learning_rate": 2.389251662131086e-07,
      "loss": 0.0068,
      "step": 2982740
    },
    {
      "epoch": 4.881352159881647,
      "grad_norm": 0.17553207278251648,
      "learning_rate": 2.388592739995915e-07,
      "loss": 0.0117,
      "step": 2982760
    },
    {
      "epoch": 4.8813848903203,
      "grad_norm": 0.17617326974868774,
      "learning_rate": 2.387933817860744e-07,
      "loss": 0.01,
      "step": 2982780
    },
    {
      "epoch": 4.8814176207589535,
      "grad_norm": 0.2634008824825287,
      "learning_rate": 2.3872748957255723e-07,
      "loss": 0.0121,
      "step": 2982800
    },
    {
      "epoch": 4.881450351197607,
      "grad_norm": 0.16511298716068268,
      "learning_rate": 2.3866159735904013e-07,
      "loss": 0.0076,
      "step": 2982820
    },
    {
      "epoch": 4.88148308163626,
      "grad_norm": 0.09835108369588852,
      "learning_rate": 2.38595705145523e-07,
      "loss": 0.0096,
      "step": 2982840
    },
    {
      "epoch": 4.881515812074913,
      "grad_norm": 0.3233101963996887,
      "learning_rate": 2.385298129320059e-07,
      "loss": 0.0074,
      "step": 2982860
    },
    {
      "epoch": 4.881548542513567,
      "grad_norm": 0.11377187818288803,
      "learning_rate": 2.384639207184887e-07,
      "loss": 0.0079,
      "step": 2982880
    },
    {
      "epoch": 4.88158127295222,
      "grad_norm": 0.14717532694339752,
      "learning_rate": 2.3839802850497157e-07,
      "loss": 0.0087,
      "step": 2982900
    },
    {
      "epoch": 4.881614003390873,
      "grad_norm": 0.12143260985612869,
      "learning_rate": 2.3833213629145444e-07,
      "loss": 0.0081,
      "step": 2982920
    },
    {
      "epoch": 4.881646733829527,
      "grad_norm": 0.22777840495109558,
      "learning_rate": 2.3826624407793734e-07,
      "loss": 0.0112,
      "step": 2982940
    },
    {
      "epoch": 4.8816794642681804,
      "grad_norm": 0.14204591512680054,
      "learning_rate": 2.382003518644202e-07,
      "loss": 0.014,
      "step": 2982960
    },
    {
      "epoch": 4.881712194706833,
      "grad_norm": 0.10479346662759781,
      "learning_rate": 2.3813445965090308e-07,
      "loss": 0.0062,
      "step": 2982980
    },
    {
      "epoch": 4.881744925145487,
      "grad_norm": 0.13850517570972443,
      "learning_rate": 2.3806856743738595e-07,
      "loss": 0.0116,
      "step": 2983000
    },
    {
      "epoch": 4.88177765558414,
      "grad_norm": 0.21276196837425232,
      "learning_rate": 2.3800267522386883e-07,
      "loss": 0.007,
      "step": 2983020
    },
    {
      "epoch": 4.881810386022794,
      "grad_norm": 0.27718785405158997,
      "learning_rate": 2.379367830103517e-07,
      "loss": 0.0073,
      "step": 2983040
    },
    {
      "epoch": 4.881843116461447,
      "grad_norm": 0.07513317465782166,
      "learning_rate": 2.3787089079683457e-07,
      "loss": 0.0055,
      "step": 2983060
    },
    {
      "epoch": 4.8818758469001,
      "grad_norm": 0.13710299134254456,
      "learning_rate": 2.3780499858331744e-07,
      "loss": 0.0064,
      "step": 2983080
    },
    {
      "epoch": 4.881908577338754,
      "grad_norm": 0.1690201610326767,
      "learning_rate": 2.377391063698003e-07,
      "loss": 0.0086,
      "step": 2983100
    },
    {
      "epoch": 4.8819413077774065,
      "grad_norm": 0.1322706639766693,
      "learning_rate": 2.3767321415628316e-07,
      "loss": 0.0071,
      "step": 2983120
    },
    {
      "epoch": 4.88197403821606,
      "grad_norm": 0.26805025339126587,
      "learning_rate": 2.3760732194276603e-07,
      "loss": 0.0149,
      "step": 2983140
    },
    {
      "epoch": 4.882006768654714,
      "grad_norm": 0.3569667935371399,
      "learning_rate": 2.375414297292489e-07,
      "loss": 0.0087,
      "step": 2983160
    },
    {
      "epoch": 4.882039499093366,
      "grad_norm": 0.11729971319437027,
      "learning_rate": 2.3747553751573178e-07,
      "loss": 0.0076,
      "step": 2983180
    },
    {
      "epoch": 4.88207222953202,
      "grad_norm": 0.11570242047309875,
      "learning_rate": 2.3740964530221465e-07,
      "loss": 0.0077,
      "step": 2983200
    },
    {
      "epoch": 4.882104959970674,
      "grad_norm": 0.2578567564487457,
      "learning_rate": 2.3734375308869752e-07,
      "loss": 0.0088,
      "step": 2983220
    },
    {
      "epoch": 4.882137690409327,
      "grad_norm": 0.14671018719673157,
      "learning_rate": 2.3727786087518042e-07,
      "loss": 0.009,
      "step": 2983240
    },
    {
      "epoch": 4.88217042084798,
      "grad_norm": 0.14987815916538239,
      "learning_rate": 2.372119686616633e-07,
      "loss": 0.0079,
      "step": 2983260
    },
    {
      "epoch": 4.8822031512866335,
      "grad_norm": 0.2760320007801056,
      "learning_rate": 2.3714607644814616e-07,
      "loss": 0.0115,
      "step": 2983280
    },
    {
      "epoch": 4.882235881725287,
      "grad_norm": 0.1361774355173111,
      "learning_rate": 2.3708018423462903e-07,
      "loss": 0.0058,
      "step": 2983300
    },
    {
      "epoch": 4.882268612163941,
      "grad_norm": 0.25598421692848206,
      "learning_rate": 2.3701429202111188e-07,
      "loss": 0.0055,
      "step": 2983320
    },
    {
      "epoch": 4.882301342602593,
      "grad_norm": 0.16120655834674835,
      "learning_rate": 2.3694839980759475e-07,
      "loss": 0.0087,
      "step": 2983340
    },
    {
      "epoch": 4.882334073041247,
      "grad_norm": 1.0395311117172241,
      "learning_rate": 2.3688250759407762e-07,
      "loss": 0.0096,
      "step": 2983360
    },
    {
      "epoch": 4.882366803479901,
      "grad_norm": 0.1649366170167923,
      "learning_rate": 2.368166153805605e-07,
      "loss": 0.0132,
      "step": 2983380
    },
    {
      "epoch": 4.882399533918553,
      "grad_norm": 0.1562265306711197,
      "learning_rate": 2.3675072316704337e-07,
      "loss": 0.0085,
      "step": 2983400
    },
    {
      "epoch": 4.882432264357207,
      "grad_norm": 0.34842318296432495,
      "learning_rate": 2.3668483095352624e-07,
      "loss": 0.0069,
      "step": 2983420
    },
    {
      "epoch": 4.8824649947958605,
      "grad_norm": 0.320865660905838,
      "learning_rate": 2.366189387400091e-07,
      "loss": 0.0083,
      "step": 2983440
    },
    {
      "epoch": 4.882497725234513,
      "grad_norm": 0.3050936460494995,
      "learning_rate": 2.3655304652649199e-07,
      "loss": 0.0066,
      "step": 2983460
    },
    {
      "epoch": 4.882530455673167,
      "grad_norm": 0.3380778133869171,
      "learning_rate": 2.3648715431297486e-07,
      "loss": 0.0071,
      "step": 2983480
    },
    {
      "epoch": 4.88256318611182,
      "grad_norm": 0.2779146730899811,
      "learning_rate": 2.3642126209945773e-07,
      "loss": 0.0065,
      "step": 2983500
    },
    {
      "epoch": 4.882595916550474,
      "grad_norm": 0.3350469768047333,
      "learning_rate": 2.363553698859406e-07,
      "loss": 0.0089,
      "step": 2983520
    },
    {
      "epoch": 4.882628646989127,
      "grad_norm": 0.35950717329978943,
      "learning_rate": 2.3628947767242345e-07,
      "loss": 0.009,
      "step": 2983540
    },
    {
      "epoch": 4.88266137742778,
      "grad_norm": 0.3171549141407013,
      "learning_rate": 2.3622358545890632e-07,
      "loss": 0.0123,
      "step": 2983560
    },
    {
      "epoch": 4.882694107866434,
      "grad_norm": 0.20706962049007416,
      "learning_rate": 2.361576932453892e-07,
      "loss": 0.0066,
      "step": 2983580
    },
    {
      "epoch": 4.882726838305087,
      "grad_norm": 0.32129818201065063,
      "learning_rate": 2.360918010318721e-07,
      "loss": 0.0079,
      "step": 2983600
    },
    {
      "epoch": 4.88275956874374,
      "grad_norm": 0.14588695764541626,
      "learning_rate": 2.3602590881835496e-07,
      "loss": 0.0107,
      "step": 2983620
    },
    {
      "epoch": 4.882792299182394,
      "grad_norm": 0.22248341143131256,
      "learning_rate": 2.3596001660483783e-07,
      "loss": 0.0106,
      "step": 2983640
    },
    {
      "epoch": 4.882825029621047,
      "grad_norm": 1.3819680213928223,
      "learning_rate": 2.358941243913207e-07,
      "loss": 0.0096,
      "step": 2983660
    },
    {
      "epoch": 4.8828577600597,
      "grad_norm": 0.233585387468338,
      "learning_rate": 2.3582823217780358e-07,
      "loss": 0.0103,
      "step": 2983680
    },
    {
      "epoch": 4.882890490498354,
      "grad_norm": 0.15572106838226318,
      "learning_rate": 2.3576233996428645e-07,
      "loss": 0.0085,
      "step": 2983700
    },
    {
      "epoch": 4.882923220937007,
      "grad_norm": 0.20248739421367645,
      "learning_rate": 2.3569644775076932e-07,
      "loss": 0.008,
      "step": 2983720
    },
    {
      "epoch": 4.88295595137566,
      "grad_norm": 0.21428951621055603,
      "learning_rate": 2.356305555372522e-07,
      "loss": 0.0092,
      "step": 2983740
    },
    {
      "epoch": 4.882988681814314,
      "grad_norm": 0.16504578292369843,
      "learning_rate": 2.3556466332373504e-07,
      "loss": 0.0074,
      "step": 2983760
    },
    {
      "epoch": 4.883021412252967,
      "grad_norm": 0.3000166714191437,
      "learning_rate": 2.354987711102179e-07,
      "loss": 0.0077,
      "step": 2983780
    },
    {
      "epoch": 4.88305414269162,
      "grad_norm": 0.24056382477283478,
      "learning_rate": 2.3543287889670078e-07,
      "loss": 0.0092,
      "step": 2983800
    },
    {
      "epoch": 4.8830868731302735,
      "grad_norm": 0.25108057260513306,
      "learning_rate": 2.3536698668318366e-07,
      "loss": 0.0116,
      "step": 2983820
    },
    {
      "epoch": 4.883119603568927,
      "grad_norm": 0.22657106816768646,
      "learning_rate": 2.3530109446966653e-07,
      "loss": 0.0067,
      "step": 2983840
    },
    {
      "epoch": 4.883152334007581,
      "grad_norm": 0.10692808777093887,
      "learning_rate": 2.352352022561494e-07,
      "loss": 0.0097,
      "step": 2983860
    },
    {
      "epoch": 4.883185064446233,
      "grad_norm": 0.37538689374923706,
      "learning_rate": 2.3516931004263227e-07,
      "loss": 0.0098,
      "step": 2983880
    },
    {
      "epoch": 4.883217794884887,
      "grad_norm": 0.40850621461868286,
      "learning_rate": 2.3510341782911517e-07,
      "loss": 0.0099,
      "step": 2983900
    },
    {
      "epoch": 4.8832505253235405,
      "grad_norm": 0.20222002267837524,
      "learning_rate": 2.3503752561559804e-07,
      "loss": 0.0066,
      "step": 2983920
    },
    {
      "epoch": 4.883283255762194,
      "grad_norm": 0.4227839410305023,
      "learning_rate": 2.3497163340208092e-07,
      "loss": 0.0095,
      "step": 2983940
    },
    {
      "epoch": 4.883315986200847,
      "grad_norm": 0.0657753273844719,
      "learning_rate": 2.349057411885638e-07,
      "loss": 0.008,
      "step": 2983960
    },
    {
      "epoch": 4.8833487166395,
      "grad_norm": 0.4433133900165558,
      "learning_rate": 2.3483984897504663e-07,
      "loss": 0.006,
      "step": 2983980
    },
    {
      "epoch": 4.883381447078154,
      "grad_norm": 0.03878781571984291,
      "learning_rate": 2.347739567615295e-07,
      "loss": 0.0096,
      "step": 2984000
    },
    {
      "epoch": 4.883414177516807,
      "grad_norm": 0.2589414119720459,
      "learning_rate": 2.3470806454801238e-07,
      "loss": 0.0121,
      "step": 2984020
    },
    {
      "epoch": 4.88344690795546,
      "grad_norm": 0.4321964681148529,
      "learning_rate": 2.3464217233449525e-07,
      "loss": 0.0075,
      "step": 2984040
    },
    {
      "epoch": 4.883479638394114,
      "grad_norm": 0.1184002161026001,
      "learning_rate": 2.3457628012097812e-07,
      "loss": 0.009,
      "step": 2984060
    },
    {
      "epoch": 4.883512368832767,
      "grad_norm": 0.1274499148130417,
      "learning_rate": 2.34510387907461e-07,
      "loss": 0.011,
      "step": 2984080
    },
    {
      "epoch": 4.88354509927142,
      "grad_norm": 0.15872223675251007,
      "learning_rate": 2.3444449569394387e-07,
      "loss": 0.0113,
      "step": 2984100
    },
    {
      "epoch": 4.883577829710074,
      "grad_norm": 0.2725577652454376,
      "learning_rate": 2.3437860348042674e-07,
      "loss": 0.0089,
      "step": 2984120
    },
    {
      "epoch": 4.883610560148727,
      "grad_norm": 0.16186922788619995,
      "learning_rate": 2.343127112669096e-07,
      "loss": 0.007,
      "step": 2984140
    },
    {
      "epoch": 4.88364329058738,
      "grad_norm": 0.3177868723869324,
      "learning_rate": 2.3424681905339248e-07,
      "loss": 0.012,
      "step": 2984160
    },
    {
      "epoch": 4.883676021026034,
      "grad_norm": 0.37353843450546265,
      "learning_rate": 2.3418092683987535e-07,
      "loss": 0.0125,
      "step": 2984180
    },
    {
      "epoch": 4.883708751464687,
      "grad_norm": 0.2323242574930191,
      "learning_rate": 2.341150346263582e-07,
      "loss": 0.0066,
      "step": 2984200
    },
    {
      "epoch": 4.883741481903341,
      "grad_norm": 0.03006848692893982,
      "learning_rate": 2.3404914241284107e-07,
      "loss": 0.0062,
      "step": 2984220
    },
    {
      "epoch": 4.883774212341994,
      "grad_norm": 0.12602275609970093,
      "learning_rate": 2.3398325019932394e-07,
      "loss": 0.008,
      "step": 2984240
    },
    {
      "epoch": 4.883806942780647,
      "grad_norm": 0.05819947272539139,
      "learning_rate": 2.3391735798580684e-07,
      "loss": 0.0078,
      "step": 2984260
    },
    {
      "epoch": 4.883839673219301,
      "grad_norm": 0.18803101778030396,
      "learning_rate": 2.3385146577228971e-07,
      "loss": 0.0058,
      "step": 2984280
    },
    {
      "epoch": 4.8838724036579535,
      "grad_norm": 0.3101366460323334,
      "learning_rate": 2.3378557355877259e-07,
      "loss": 0.0072,
      "step": 2984300
    },
    {
      "epoch": 4.883905134096607,
      "grad_norm": 0.10265059024095535,
      "learning_rate": 2.3371968134525546e-07,
      "loss": 0.0089,
      "step": 2984320
    },
    {
      "epoch": 4.883937864535261,
      "grad_norm": 0.16519278287887573,
      "learning_rate": 2.3365378913173833e-07,
      "loss": 0.0108,
      "step": 2984340
    },
    {
      "epoch": 4.883970594973913,
      "grad_norm": 0.324087530374527,
      "learning_rate": 2.335878969182212e-07,
      "loss": 0.0123,
      "step": 2984360
    },
    {
      "epoch": 4.884003325412567,
      "grad_norm": 0.09916792809963226,
      "learning_rate": 2.3352200470470407e-07,
      "loss": 0.0071,
      "step": 2984380
    },
    {
      "epoch": 4.884036055851221,
      "grad_norm": 0.5024336576461792,
      "learning_rate": 2.3345611249118695e-07,
      "loss": 0.0148,
      "step": 2984400
    },
    {
      "epoch": 4.884068786289874,
      "grad_norm": 0.10781753808259964,
      "learning_rate": 2.333902202776698e-07,
      "loss": 0.0079,
      "step": 2984420
    },
    {
      "epoch": 4.884101516728527,
      "grad_norm": 0.44045403599739075,
      "learning_rate": 2.3332432806415266e-07,
      "loss": 0.0077,
      "step": 2984440
    },
    {
      "epoch": 4.8841342471671805,
      "grad_norm": 0.10305844992399216,
      "learning_rate": 2.3325843585063554e-07,
      "loss": 0.0079,
      "step": 2984460
    },
    {
      "epoch": 4.884166977605834,
      "grad_norm": 0.23782840371131897,
      "learning_rate": 2.331925436371184e-07,
      "loss": 0.0091,
      "step": 2984480
    },
    {
      "epoch": 4.884199708044488,
      "grad_norm": 0.09061404317617416,
      "learning_rate": 2.3312665142360128e-07,
      "loss": 0.0088,
      "step": 2984500
    },
    {
      "epoch": 4.88423243848314,
      "grad_norm": 0.20651476085186005,
      "learning_rate": 2.3306075921008415e-07,
      "loss": 0.0066,
      "step": 2984520
    },
    {
      "epoch": 4.884265168921794,
      "grad_norm": 0.1915401816368103,
      "learning_rate": 2.3299486699656703e-07,
      "loss": 0.0098,
      "step": 2984540
    },
    {
      "epoch": 4.884297899360448,
      "grad_norm": 0.21267685294151306,
      "learning_rate": 2.3292897478304992e-07,
      "loss": 0.0087,
      "step": 2984560
    },
    {
      "epoch": 4.8843306297991,
      "grad_norm": 0.31262755393981934,
      "learning_rate": 2.328630825695328e-07,
      "loss": 0.008,
      "step": 2984580
    },
    {
      "epoch": 4.884363360237754,
      "grad_norm": 0.14412793517112732,
      "learning_rate": 2.3279719035601567e-07,
      "loss": 0.0076,
      "step": 2984600
    },
    {
      "epoch": 4.8843960906764075,
      "grad_norm": 0.16730761528015137,
      "learning_rate": 2.3273129814249854e-07,
      "loss": 0.0049,
      "step": 2984620
    },
    {
      "epoch": 4.88442882111506,
      "grad_norm": 0.2131226658821106,
      "learning_rate": 2.3266540592898139e-07,
      "loss": 0.0082,
      "step": 2984640
    },
    {
      "epoch": 4.884461551553714,
      "grad_norm": 0.40386927127838135,
      "learning_rate": 2.3259951371546426e-07,
      "loss": 0.0096,
      "step": 2984660
    },
    {
      "epoch": 4.884494281992367,
      "grad_norm": 0.09982438385486603,
      "learning_rate": 2.3253362150194713e-07,
      "loss": 0.0058,
      "step": 2984680
    },
    {
      "epoch": 4.884527012431021,
      "grad_norm": 0.21875682473182678,
      "learning_rate": 2.3246772928843e-07,
      "loss": 0.0102,
      "step": 2984700
    },
    {
      "epoch": 4.884559742869674,
      "grad_norm": 0.05228963866829872,
      "learning_rate": 2.3240183707491287e-07,
      "loss": 0.0124,
      "step": 2984720
    },
    {
      "epoch": 4.884592473308327,
      "grad_norm": 0.24167490005493164,
      "learning_rate": 2.3233594486139575e-07,
      "loss": 0.0078,
      "step": 2984740
    },
    {
      "epoch": 4.884625203746981,
      "grad_norm": 0.32146933674812317,
      "learning_rate": 2.3227005264787862e-07,
      "loss": 0.0092,
      "step": 2984760
    },
    {
      "epoch": 4.884657934185634,
      "grad_norm": 0.25262466073036194,
      "learning_rate": 2.322041604343615e-07,
      "loss": 0.0058,
      "step": 2984780
    },
    {
      "epoch": 4.884690664624287,
      "grad_norm": 0.057565100491046906,
      "learning_rate": 2.3213826822084436e-07,
      "loss": 0.0073,
      "step": 2984800
    },
    {
      "epoch": 4.884723395062941,
      "grad_norm": 0.12181105464696884,
      "learning_rate": 2.3207237600732723e-07,
      "loss": 0.0044,
      "step": 2984820
    },
    {
      "epoch": 4.884756125501594,
      "grad_norm": 0.3665713965892792,
      "learning_rate": 2.320064837938101e-07,
      "loss": 0.0104,
      "step": 2984840
    },
    {
      "epoch": 4.884788855940247,
      "grad_norm": 0.28674906492233276,
      "learning_rate": 2.3194059158029295e-07,
      "loss": 0.0121,
      "step": 2984860
    },
    {
      "epoch": 4.884821586378901,
      "grad_norm": 0.18539877235889435,
      "learning_rate": 2.3187469936677582e-07,
      "loss": 0.0093,
      "step": 2984880
    },
    {
      "epoch": 4.884854316817554,
      "grad_norm": 0.08622495830059052,
      "learning_rate": 2.318088071532587e-07,
      "loss": 0.0066,
      "step": 2984900
    },
    {
      "epoch": 4.884887047256207,
      "grad_norm": 0.1149536743760109,
      "learning_rate": 2.317429149397416e-07,
      "loss": 0.01,
      "step": 2984920
    },
    {
      "epoch": 4.8849197776948605,
      "grad_norm": 0.2552909553050995,
      "learning_rate": 2.3167702272622447e-07,
      "loss": 0.0093,
      "step": 2984940
    },
    {
      "epoch": 4.884952508133514,
      "grad_norm": 0.09704767167568207,
      "learning_rate": 2.3161113051270734e-07,
      "loss": 0.0098,
      "step": 2984960
    },
    {
      "epoch": 4.884985238572168,
      "grad_norm": 0.13233555853366852,
      "learning_rate": 2.315452382991902e-07,
      "loss": 0.0094,
      "step": 2984980
    },
    {
      "epoch": 4.88501796901082,
      "grad_norm": 0.20387788116931915,
      "learning_rate": 2.3147934608567308e-07,
      "loss": 0.0071,
      "step": 2985000
    },
    {
      "epoch": 4.885050699449474,
      "grad_norm": 0.21869449317455292,
      "learning_rate": 2.3141345387215596e-07,
      "loss": 0.0077,
      "step": 2985020
    },
    {
      "epoch": 4.885083429888128,
      "grad_norm": 0.2312602996826172,
      "learning_rate": 2.3134756165863883e-07,
      "loss": 0.0139,
      "step": 2985040
    },
    {
      "epoch": 4.885116160326781,
      "grad_norm": 0.3591526746749878,
      "learning_rate": 2.312816694451217e-07,
      "loss": 0.0129,
      "step": 2985060
    },
    {
      "epoch": 4.885148890765434,
      "grad_norm": 0.5111612677574158,
      "learning_rate": 2.3121577723160455e-07,
      "loss": 0.0065,
      "step": 2985080
    },
    {
      "epoch": 4.8851816212040875,
      "grad_norm": 0.6461215615272522,
      "learning_rate": 2.3114988501808742e-07,
      "loss": 0.0113,
      "step": 2985100
    },
    {
      "epoch": 4.885214351642741,
      "grad_norm": 0.34505385160446167,
      "learning_rate": 2.310839928045703e-07,
      "loss": 0.0101,
      "step": 2985120
    },
    {
      "epoch": 4.885247082081394,
      "grad_norm": 0.3288528323173523,
      "learning_rate": 2.3101810059105316e-07,
      "loss": 0.0094,
      "step": 2985140
    },
    {
      "epoch": 4.885279812520047,
      "grad_norm": 0.08195269852876663,
      "learning_rate": 2.3095220837753603e-07,
      "loss": 0.0081,
      "step": 2985160
    },
    {
      "epoch": 4.885312542958701,
      "grad_norm": 0.2521063983440399,
      "learning_rate": 2.308863161640189e-07,
      "loss": 0.009,
      "step": 2985180
    },
    {
      "epoch": 4.885345273397354,
      "grad_norm": 0.24231167137622833,
      "learning_rate": 2.3082042395050178e-07,
      "loss": 0.0115,
      "step": 2985200
    },
    {
      "epoch": 4.885378003836007,
      "grad_norm": 0.2680596709251404,
      "learning_rate": 2.3075453173698468e-07,
      "loss": 0.0092,
      "step": 2985220
    },
    {
      "epoch": 4.885410734274661,
      "grad_norm": 0.6590563058853149,
      "learning_rate": 2.3068863952346755e-07,
      "loss": 0.0098,
      "step": 2985240
    },
    {
      "epoch": 4.885443464713314,
      "grad_norm": 0.36142176389694214,
      "learning_rate": 2.3062274730995042e-07,
      "loss": 0.009,
      "step": 2985260
    },
    {
      "epoch": 4.885476195151967,
      "grad_norm": 0.43215397000312805,
      "learning_rate": 2.305568550964333e-07,
      "loss": 0.0078,
      "step": 2985280
    },
    {
      "epoch": 4.885508925590621,
      "grad_norm": 0.16905860602855682,
      "learning_rate": 2.3049096288291614e-07,
      "loss": 0.0081,
      "step": 2985300
    },
    {
      "epoch": 4.885541656029274,
      "grad_norm": 0.20354488492012024,
      "learning_rate": 2.30425070669399e-07,
      "loss": 0.0087,
      "step": 2985320
    },
    {
      "epoch": 4.885574386467927,
      "grad_norm": 0.20984609425067902,
      "learning_rate": 2.3035917845588188e-07,
      "loss": 0.0063,
      "step": 2985340
    },
    {
      "epoch": 4.885607116906581,
      "grad_norm": 0.21141208708286285,
      "learning_rate": 2.3029328624236475e-07,
      "loss": 0.0113,
      "step": 2985360
    },
    {
      "epoch": 4.885639847345234,
      "grad_norm": 0.5932124257087708,
      "learning_rate": 2.3022739402884763e-07,
      "loss": 0.0092,
      "step": 2985380
    },
    {
      "epoch": 4.885672577783888,
      "grad_norm": 0.09629056602716446,
      "learning_rate": 2.301615018153305e-07,
      "loss": 0.0051,
      "step": 2985400
    },
    {
      "epoch": 4.885705308222541,
      "grad_norm": 0.1517808437347412,
      "learning_rate": 2.3009560960181337e-07,
      "loss": 0.0102,
      "step": 2985420
    },
    {
      "epoch": 4.885738038661194,
      "grad_norm": 0.21692460775375366,
      "learning_rate": 2.3002971738829624e-07,
      "loss": 0.0088,
      "step": 2985440
    },
    {
      "epoch": 4.885770769099848,
      "grad_norm": 0.397643506526947,
      "learning_rate": 2.2996382517477912e-07,
      "loss": 0.0055,
      "step": 2985460
    },
    {
      "epoch": 4.8858034995385005,
      "grad_norm": 0.4405592978000641,
      "learning_rate": 2.29897932961262e-07,
      "loss": 0.0085,
      "step": 2985480
    },
    {
      "epoch": 4.885836229977154,
      "grad_norm": 0.4022451937198639,
      "learning_rate": 2.2983204074774486e-07,
      "loss": 0.0077,
      "step": 2985500
    },
    {
      "epoch": 4.885868960415808,
      "grad_norm": 0.968856692314148,
      "learning_rate": 2.297661485342277e-07,
      "loss": 0.0122,
      "step": 2985520
    },
    {
      "epoch": 4.88590169085446,
      "grad_norm": 0.23175351321697235,
      "learning_rate": 2.2970025632071058e-07,
      "loss": 0.0064,
      "step": 2985540
    },
    {
      "epoch": 4.885934421293114,
      "grad_norm": 0.2860807478427887,
      "learning_rate": 2.2963436410719345e-07,
      "loss": 0.011,
      "step": 2985560
    },
    {
      "epoch": 4.8859671517317675,
      "grad_norm": 0.28379806876182556,
      "learning_rate": 2.2956847189367635e-07,
      "loss": 0.0086,
      "step": 2985580
    },
    {
      "epoch": 4.885999882170421,
      "grad_norm": 0.4797396659851074,
      "learning_rate": 2.2950257968015922e-07,
      "loss": 0.0081,
      "step": 2985600
    },
    {
      "epoch": 4.886032612609074,
      "grad_norm": 0.1333416998386383,
      "learning_rate": 2.294366874666421e-07,
      "loss": 0.0087,
      "step": 2985620
    },
    {
      "epoch": 4.886065343047727,
      "grad_norm": 0.13016538321971893,
      "learning_rate": 2.2937079525312496e-07,
      "loss": 0.0064,
      "step": 2985640
    },
    {
      "epoch": 4.886098073486381,
      "grad_norm": 0.032760340720415115,
      "learning_rate": 2.2930490303960784e-07,
      "loss": 0.0103,
      "step": 2985660
    },
    {
      "epoch": 4.886130803925035,
      "grad_norm": 0.15987905859947205,
      "learning_rate": 2.292390108260907e-07,
      "loss": 0.0064,
      "step": 2985680
    },
    {
      "epoch": 4.886163534363687,
      "grad_norm": 0.29448235034942627,
      "learning_rate": 2.2917311861257358e-07,
      "loss": 0.0136,
      "step": 2985700
    },
    {
      "epoch": 4.886196264802341,
      "grad_norm": 0.11500199139118195,
      "learning_rate": 2.2910722639905645e-07,
      "loss": 0.0104,
      "step": 2985720
    },
    {
      "epoch": 4.8862289952409945,
      "grad_norm": 0.14748793840408325,
      "learning_rate": 2.290413341855393e-07,
      "loss": 0.0084,
      "step": 2985740
    },
    {
      "epoch": 4.886261725679647,
      "grad_norm": 0.3132834732532501,
      "learning_rate": 2.2897544197202217e-07,
      "loss": 0.0132,
      "step": 2985760
    },
    {
      "epoch": 4.886294456118301,
      "grad_norm": 0.2161935269832611,
      "learning_rate": 2.2890954975850504e-07,
      "loss": 0.0071,
      "step": 2985780
    },
    {
      "epoch": 4.886327186556954,
      "grad_norm": 0.20865336060523987,
      "learning_rate": 2.2884365754498791e-07,
      "loss": 0.0074,
      "step": 2985800
    },
    {
      "epoch": 4.886359916995607,
      "grad_norm": 0.17243242263793945,
      "learning_rate": 2.2877776533147079e-07,
      "loss": 0.0087,
      "step": 2985820
    },
    {
      "epoch": 4.886392647434261,
      "grad_norm": 0.1405704766511917,
      "learning_rate": 2.2871187311795366e-07,
      "loss": 0.0108,
      "step": 2985840
    },
    {
      "epoch": 4.886425377872914,
      "grad_norm": 0.2609110474586487,
      "learning_rate": 2.2864598090443653e-07,
      "loss": 0.0104,
      "step": 2985860
    },
    {
      "epoch": 4.886458108311568,
      "grad_norm": 0.08043044805526733,
      "learning_rate": 2.2858008869091943e-07,
      "loss": 0.0069,
      "step": 2985880
    },
    {
      "epoch": 4.886490838750221,
      "grad_norm": 0.22835661470890045,
      "learning_rate": 2.285141964774023e-07,
      "loss": 0.0093,
      "step": 2985900
    },
    {
      "epoch": 4.886523569188874,
      "grad_norm": 0.1338776797056198,
      "learning_rate": 2.2844830426388517e-07,
      "loss": 0.0127,
      "step": 2985920
    },
    {
      "epoch": 4.886556299627528,
      "grad_norm": 0.13107062876224518,
      "learning_rate": 2.2838241205036805e-07,
      "loss": 0.0093,
      "step": 2985940
    },
    {
      "epoch": 4.886589030066181,
      "grad_norm": 0.2791058123111725,
      "learning_rate": 2.2831651983685092e-07,
      "loss": 0.0068,
      "step": 2985960
    },
    {
      "epoch": 4.886621760504834,
      "grad_norm": 0.10791180282831192,
      "learning_rate": 2.2825062762333376e-07,
      "loss": 0.012,
      "step": 2985980
    },
    {
      "epoch": 4.886654490943488,
      "grad_norm": 0.10559762269258499,
      "learning_rate": 2.2818473540981664e-07,
      "loss": 0.0076,
      "step": 2986000
    },
    {
      "epoch": 4.886687221382141,
      "grad_norm": 0.11100797355175018,
      "learning_rate": 2.281188431962995e-07,
      "loss": 0.0072,
      "step": 2986020
    },
    {
      "epoch": 4.886719951820794,
      "grad_norm": 0.053690243512392044,
      "learning_rate": 2.2805295098278238e-07,
      "loss": 0.0091,
      "step": 2986040
    },
    {
      "epoch": 4.886752682259448,
      "grad_norm": 0.2504412531852722,
      "learning_rate": 2.2798705876926525e-07,
      "loss": 0.0089,
      "step": 2986060
    },
    {
      "epoch": 4.886785412698101,
      "grad_norm": 0.25010716915130615,
      "learning_rate": 2.2792116655574812e-07,
      "loss": 0.0079,
      "step": 2986080
    },
    {
      "epoch": 4.886818143136754,
      "grad_norm": 0.16792844235897064,
      "learning_rate": 2.27855274342231e-07,
      "loss": 0.0106,
      "step": 2986100
    },
    {
      "epoch": 4.8868508735754075,
      "grad_norm": 0.092814601957798,
      "learning_rate": 2.2778938212871387e-07,
      "loss": 0.0102,
      "step": 2986120
    },
    {
      "epoch": 4.886883604014061,
      "grad_norm": 0.3614407479763031,
      "learning_rate": 2.2772348991519674e-07,
      "loss": 0.0088,
      "step": 2986140
    },
    {
      "epoch": 4.886916334452715,
      "grad_norm": 0.3153885006904602,
      "learning_rate": 2.276575977016796e-07,
      "loss": 0.0101,
      "step": 2986160
    },
    {
      "epoch": 4.886949064891367,
      "grad_norm": 0.34481456875801086,
      "learning_rate": 2.275917054881625e-07,
      "loss": 0.0069,
      "step": 2986180
    },
    {
      "epoch": 4.886981795330021,
      "grad_norm": 0.1447654664516449,
      "learning_rate": 2.2752581327464533e-07,
      "loss": 0.0098,
      "step": 2986200
    },
    {
      "epoch": 4.887014525768675,
      "grad_norm": 0.2396470010280609,
      "learning_rate": 2.274599210611282e-07,
      "loss": 0.0131,
      "step": 2986220
    },
    {
      "epoch": 4.887047256207328,
      "grad_norm": 0.24173232913017273,
      "learning_rate": 2.273940288476111e-07,
      "loss": 0.01,
      "step": 2986240
    },
    {
      "epoch": 4.887079986645981,
      "grad_norm": 0.2008914351463318,
      "learning_rate": 2.2732813663409397e-07,
      "loss": 0.0084,
      "step": 2986260
    },
    {
      "epoch": 4.8871127170846345,
      "grad_norm": 0.20760774612426758,
      "learning_rate": 2.2726224442057684e-07,
      "loss": 0.0104,
      "step": 2986280
    },
    {
      "epoch": 4.887145447523288,
      "grad_norm": 0.8544594645500183,
      "learning_rate": 2.2719635220705972e-07,
      "loss": 0.0132,
      "step": 2986300
    },
    {
      "epoch": 4.887178177961941,
      "grad_norm": 0.20383164286613464,
      "learning_rate": 2.271304599935426e-07,
      "loss": 0.0068,
      "step": 2986320
    },
    {
      "epoch": 4.887210908400594,
      "grad_norm": 0.30290108919143677,
      "learning_rate": 2.2706456778002546e-07,
      "loss": 0.0061,
      "step": 2986340
    },
    {
      "epoch": 4.887243638839248,
      "grad_norm": 0.375671923160553,
      "learning_rate": 2.2699867556650833e-07,
      "loss": 0.0109,
      "step": 2986360
    },
    {
      "epoch": 4.887276369277901,
      "grad_norm": 0.11614176630973816,
      "learning_rate": 2.269327833529912e-07,
      "loss": 0.0113,
      "step": 2986380
    },
    {
      "epoch": 4.887309099716554,
      "grad_norm": 0.24844489991664886,
      "learning_rate": 2.2686689113947408e-07,
      "loss": 0.01,
      "step": 2986400
    },
    {
      "epoch": 4.887341830155208,
      "grad_norm": 0.22856716811656952,
      "learning_rate": 2.2680099892595692e-07,
      "loss": 0.0089,
      "step": 2986420
    },
    {
      "epoch": 4.887374560593861,
      "grad_norm": 0.6923092007637024,
      "learning_rate": 2.267351067124398e-07,
      "loss": 0.0099,
      "step": 2986440
    },
    {
      "epoch": 4.887407291032514,
      "grad_norm": 0.2513810396194458,
      "learning_rate": 2.2666921449892267e-07,
      "loss": 0.0105,
      "step": 2986460
    },
    {
      "epoch": 4.887440021471168,
      "grad_norm": 0.13904406130313873,
      "learning_rate": 2.2660332228540554e-07,
      "loss": 0.0089,
      "step": 2986480
    },
    {
      "epoch": 4.887472751909821,
      "grad_norm": 0.11500868946313858,
      "learning_rate": 2.265374300718884e-07,
      "loss": 0.0084,
      "step": 2986500
    },
    {
      "epoch": 4.887505482348475,
      "grad_norm": 0.6630894541740417,
      "learning_rate": 2.2647153785837128e-07,
      "loss": 0.009,
      "step": 2986520
    },
    {
      "epoch": 4.887538212787128,
      "grad_norm": 0.0830419585108757,
      "learning_rate": 2.2640564564485418e-07,
      "loss": 0.0087,
      "step": 2986540
    },
    {
      "epoch": 4.887570943225781,
      "grad_norm": 0.35598933696746826,
      "learning_rate": 2.2633975343133705e-07,
      "loss": 0.0055,
      "step": 2986560
    },
    {
      "epoch": 4.887603673664435,
      "grad_norm": 0.2839401662349701,
      "learning_rate": 2.2627386121781993e-07,
      "loss": 0.0054,
      "step": 2986580
    },
    {
      "epoch": 4.8876364041030875,
      "grad_norm": 0.10646529495716095,
      "learning_rate": 2.262079690043028e-07,
      "loss": 0.0055,
      "step": 2986600
    },
    {
      "epoch": 4.887669134541741,
      "grad_norm": 0.27928367257118225,
      "learning_rate": 2.2614207679078567e-07,
      "loss": 0.0105,
      "step": 2986620
    },
    {
      "epoch": 4.887701864980395,
      "grad_norm": 0.15401379764080048,
      "learning_rate": 2.2607618457726852e-07,
      "loss": 0.0079,
      "step": 2986640
    },
    {
      "epoch": 4.887734595419047,
      "grad_norm": 0.9218854308128357,
      "learning_rate": 2.260102923637514e-07,
      "loss": 0.0113,
      "step": 2986660
    },
    {
      "epoch": 4.887767325857701,
      "grad_norm": 0.2957843840122223,
      "learning_rate": 2.2594440015023426e-07,
      "loss": 0.0063,
      "step": 2986680
    },
    {
      "epoch": 4.887800056296355,
      "grad_norm": 0.3777185380458832,
      "learning_rate": 2.2587850793671713e-07,
      "loss": 0.0162,
      "step": 2986700
    },
    {
      "epoch": 4.887832786735008,
      "grad_norm": 0.177394837141037,
      "learning_rate": 2.258126157232e-07,
      "loss": 0.0081,
      "step": 2986720
    },
    {
      "epoch": 4.887865517173661,
      "grad_norm": 0.8092839121818542,
      "learning_rate": 2.2574672350968288e-07,
      "loss": 0.0079,
      "step": 2986740
    },
    {
      "epoch": 4.8878982476123145,
      "grad_norm": 0.23949719965457916,
      "learning_rate": 2.2568083129616575e-07,
      "loss": 0.0086,
      "step": 2986760
    },
    {
      "epoch": 4.887930978050968,
      "grad_norm": 0.32396215200424194,
      "learning_rate": 2.2561493908264862e-07,
      "loss": 0.0094,
      "step": 2986780
    },
    {
      "epoch": 4.887963708489621,
      "grad_norm": 0.11086579412221909,
      "learning_rate": 2.255490468691315e-07,
      "loss": 0.0121,
      "step": 2986800
    },
    {
      "epoch": 4.887996438928274,
      "grad_norm": 0.16850653290748596,
      "learning_rate": 2.254831546556144e-07,
      "loss": 0.0088,
      "step": 2986820
    },
    {
      "epoch": 4.888029169366928,
      "grad_norm": 0.2486824095249176,
      "learning_rate": 2.2541726244209726e-07,
      "loss": 0.0093,
      "step": 2986840
    },
    {
      "epoch": 4.888061899805582,
      "grad_norm": 0.5161120891571045,
      "learning_rate": 2.2535137022858008e-07,
      "loss": 0.0122,
      "step": 2986860
    },
    {
      "epoch": 4.888094630244234,
      "grad_norm": 0.21292249858379364,
      "learning_rate": 2.2528547801506295e-07,
      "loss": 0.0066,
      "step": 2986880
    },
    {
      "epoch": 4.888127360682888,
      "grad_norm": 0.15834908187389374,
      "learning_rate": 2.2521958580154585e-07,
      "loss": 0.0098,
      "step": 2986900
    },
    {
      "epoch": 4.8881600911215415,
      "grad_norm": 0.30032211542129517,
      "learning_rate": 2.2515369358802873e-07,
      "loss": 0.0061,
      "step": 2986920
    },
    {
      "epoch": 4.888192821560194,
      "grad_norm": 0.15416546165943146,
      "learning_rate": 2.250878013745116e-07,
      "loss": 0.0095,
      "step": 2986940
    },
    {
      "epoch": 4.888225551998848,
      "grad_norm": 0.6193450093269348,
      "learning_rate": 2.2502190916099447e-07,
      "loss": 0.0055,
      "step": 2986960
    },
    {
      "epoch": 4.888258282437501,
      "grad_norm": 0.6627268195152283,
      "learning_rate": 2.2495601694747734e-07,
      "loss": 0.0075,
      "step": 2986980
    },
    {
      "epoch": 4.888291012876154,
      "grad_norm": 0.0832928940653801,
      "learning_rate": 2.2489012473396021e-07,
      "loss": 0.012,
      "step": 2987000
    },
    {
      "epoch": 4.888323743314808,
      "grad_norm": 0.0732264295220375,
      "learning_rate": 2.2482423252044309e-07,
      "loss": 0.007,
      "step": 2987020
    },
    {
      "epoch": 4.888356473753461,
      "grad_norm": 0.1651841402053833,
      "learning_rate": 2.2475834030692596e-07,
      "loss": 0.0065,
      "step": 2987040
    },
    {
      "epoch": 4.888389204192115,
      "grad_norm": 0.40484946966171265,
      "learning_rate": 2.2469244809340883e-07,
      "loss": 0.0076,
      "step": 2987060
    },
    {
      "epoch": 4.888421934630768,
      "grad_norm": 0.2756250500679016,
      "learning_rate": 2.2462655587989168e-07,
      "loss": 0.0099,
      "step": 2987080
    },
    {
      "epoch": 4.888454665069421,
      "grad_norm": 0.15591420233249664,
      "learning_rate": 2.2456066366637455e-07,
      "loss": 0.0091,
      "step": 2987100
    },
    {
      "epoch": 4.888487395508075,
      "grad_norm": 0.03818786144256592,
      "learning_rate": 2.2449477145285742e-07,
      "loss": 0.0115,
      "step": 2987120
    },
    {
      "epoch": 4.888520125946728,
      "grad_norm": 0.22003291547298431,
      "learning_rate": 2.244288792393403e-07,
      "loss": 0.0086,
      "step": 2987140
    },
    {
      "epoch": 4.888552856385381,
      "grad_norm": 0.28289732336997986,
      "learning_rate": 2.2436298702582316e-07,
      "loss": 0.0117,
      "step": 2987160
    },
    {
      "epoch": 4.888585586824035,
      "grad_norm": 0.29574868083000183,
      "learning_rate": 2.2429709481230604e-07,
      "loss": 0.0064,
      "step": 2987180
    },
    {
      "epoch": 4.888618317262688,
      "grad_norm": 0.5819180011749268,
      "learning_rate": 2.2423120259878893e-07,
      "loss": 0.0088,
      "step": 2987200
    },
    {
      "epoch": 4.888651047701341,
      "grad_norm": 0.0948299914598465,
      "learning_rate": 2.241653103852718e-07,
      "loss": 0.0068,
      "step": 2987220
    },
    {
      "epoch": 4.8886837781399946,
      "grad_norm": 0.5261148810386658,
      "learning_rate": 2.2409941817175468e-07,
      "loss": 0.0064,
      "step": 2987240
    },
    {
      "epoch": 4.888716508578648,
      "grad_norm": 0.1501539945602417,
      "learning_rate": 2.2403352595823755e-07,
      "loss": 0.0076,
      "step": 2987260
    },
    {
      "epoch": 4.888749239017301,
      "grad_norm": 0.21131736040115356,
      "learning_rate": 2.2396763374472042e-07,
      "loss": 0.0123,
      "step": 2987280
    },
    {
      "epoch": 4.8887819694559544,
      "grad_norm": 0.12497502565383911,
      "learning_rate": 2.2390174153120327e-07,
      "loss": 0.0071,
      "step": 2987300
    },
    {
      "epoch": 4.888814699894608,
      "grad_norm": 0.1268712878227234,
      "learning_rate": 2.2383584931768614e-07,
      "loss": 0.0078,
      "step": 2987320
    },
    {
      "epoch": 4.888847430333262,
      "grad_norm": 0.439725399017334,
      "learning_rate": 2.23769957104169e-07,
      "loss": 0.0078,
      "step": 2987340
    },
    {
      "epoch": 4.888880160771914,
      "grad_norm": 0.15746057033538818,
      "learning_rate": 2.2370406489065188e-07,
      "loss": 0.0061,
      "step": 2987360
    },
    {
      "epoch": 4.888912891210568,
      "grad_norm": 0.09126362949609756,
      "learning_rate": 2.2363817267713476e-07,
      "loss": 0.0072,
      "step": 2987380
    },
    {
      "epoch": 4.8889456216492215,
      "grad_norm": 0.3535265028476715,
      "learning_rate": 2.2357228046361763e-07,
      "loss": 0.0079,
      "step": 2987400
    },
    {
      "epoch": 4.888978352087875,
      "grad_norm": 0.5465719103813171,
      "learning_rate": 2.235063882501005e-07,
      "loss": 0.0088,
      "step": 2987420
    },
    {
      "epoch": 4.889011082526528,
      "grad_norm": 0.09921952337026596,
      "learning_rate": 2.2344049603658337e-07,
      "loss": 0.0085,
      "step": 2987440
    },
    {
      "epoch": 4.889043812965181,
      "grad_norm": 0.3835447132587433,
      "learning_rate": 2.2337460382306625e-07,
      "loss": 0.0075,
      "step": 2987460
    },
    {
      "epoch": 4.889076543403835,
      "grad_norm": 0.18774864077568054,
      "learning_rate": 2.2330871160954914e-07,
      "loss": 0.0112,
      "step": 2987480
    },
    {
      "epoch": 4.889109273842488,
      "grad_norm": 0.5068871974945068,
      "learning_rate": 2.2324281939603202e-07,
      "loss": 0.011,
      "step": 2987500
    },
    {
      "epoch": 4.889142004281141,
      "grad_norm": 0.10605653375387192,
      "learning_rate": 2.2317692718251484e-07,
      "loss": 0.0056,
      "step": 2987520
    },
    {
      "epoch": 4.889174734719795,
      "grad_norm": 0.1806134283542633,
      "learning_rate": 2.231110349689977e-07,
      "loss": 0.0081,
      "step": 2987540
    },
    {
      "epoch": 4.889207465158448,
      "grad_norm": 0.1653912216424942,
      "learning_rate": 2.230451427554806e-07,
      "loss": 0.0054,
      "step": 2987560
    },
    {
      "epoch": 4.889240195597101,
      "grad_norm": 0.30599409341812134,
      "learning_rate": 2.2297925054196348e-07,
      "loss": 0.0076,
      "step": 2987580
    },
    {
      "epoch": 4.889272926035755,
      "grad_norm": 0.19282951951026917,
      "learning_rate": 2.2291335832844635e-07,
      "loss": 0.0077,
      "step": 2987600
    },
    {
      "epoch": 4.889305656474408,
      "grad_norm": 0.2281521111726761,
      "learning_rate": 2.2284746611492922e-07,
      "loss": 0.012,
      "step": 2987620
    },
    {
      "epoch": 4.889338386913061,
      "grad_norm": 0.10405424237251282,
      "learning_rate": 2.227815739014121e-07,
      "loss": 0.0079,
      "step": 2987640
    },
    {
      "epoch": 4.889371117351715,
      "grad_norm": 0.4535329341888428,
      "learning_rate": 2.2271568168789497e-07,
      "loss": 0.0084,
      "step": 2987660
    },
    {
      "epoch": 4.889403847790368,
      "grad_norm": 0.17655664682388306,
      "learning_rate": 2.2264978947437784e-07,
      "loss": 0.0095,
      "step": 2987680
    },
    {
      "epoch": 4.889436578229022,
      "grad_norm": 0.11166229099035263,
      "learning_rate": 2.225838972608607e-07,
      "loss": 0.0106,
      "step": 2987700
    },
    {
      "epoch": 4.889469308667675,
      "grad_norm": 0.284379780292511,
      "learning_rate": 2.2251800504734358e-07,
      "loss": 0.0083,
      "step": 2987720
    },
    {
      "epoch": 4.889502039106328,
      "grad_norm": 0.05394423007965088,
      "learning_rate": 2.2245211283382643e-07,
      "loss": 0.0097,
      "step": 2987740
    },
    {
      "epoch": 4.889534769544982,
      "grad_norm": 0.24555528163909912,
      "learning_rate": 2.223862206203093e-07,
      "loss": 0.0133,
      "step": 2987760
    },
    {
      "epoch": 4.8895674999836345,
      "grad_norm": 0.06505759060382843,
      "learning_rate": 2.2232032840679217e-07,
      "loss": 0.0101,
      "step": 2987780
    },
    {
      "epoch": 4.889600230422288,
      "grad_norm": 0.10031872242689133,
      "learning_rate": 2.2225443619327504e-07,
      "loss": 0.0056,
      "step": 2987800
    },
    {
      "epoch": 4.889632960860942,
      "grad_norm": 0.4052025377750397,
      "learning_rate": 2.2218854397975792e-07,
      "loss": 0.0079,
      "step": 2987820
    },
    {
      "epoch": 4.889665691299594,
      "grad_norm": 0.17619498074054718,
      "learning_rate": 2.221226517662408e-07,
      "loss": 0.0122,
      "step": 2987840
    },
    {
      "epoch": 4.889698421738248,
      "grad_norm": 0.34475892782211304,
      "learning_rate": 2.220567595527237e-07,
      "loss": 0.0136,
      "step": 2987860
    },
    {
      "epoch": 4.889731152176902,
      "grad_norm": 0.4740011692047119,
      "learning_rate": 2.2199086733920656e-07,
      "loss": 0.0139,
      "step": 2987880
    },
    {
      "epoch": 4.889763882615555,
      "grad_norm": 0.07459508627653122,
      "learning_rate": 2.2192497512568943e-07,
      "loss": 0.0132,
      "step": 2987900
    },
    {
      "epoch": 4.889796613054208,
      "grad_norm": 0.11795680224895477,
      "learning_rate": 2.218590829121723e-07,
      "loss": 0.0083,
      "step": 2987920
    },
    {
      "epoch": 4.8898293434928615,
      "grad_norm": 0.22366459667682648,
      "learning_rate": 2.2179319069865518e-07,
      "loss": 0.0057,
      "step": 2987940
    },
    {
      "epoch": 4.889862073931515,
      "grad_norm": 0.12231405824422836,
      "learning_rate": 2.2172729848513802e-07,
      "loss": 0.0079,
      "step": 2987960
    },
    {
      "epoch": 4.889894804370169,
      "grad_norm": 0.1598593294620514,
      "learning_rate": 2.216614062716209e-07,
      "loss": 0.0094,
      "step": 2987980
    },
    {
      "epoch": 4.889927534808821,
      "grad_norm": 0.1547584980726242,
      "learning_rate": 2.2159551405810377e-07,
      "loss": 0.0092,
      "step": 2988000
    },
    {
      "epoch": 4.889960265247475,
      "grad_norm": 0.22866088151931763,
      "learning_rate": 2.2152962184458664e-07,
      "loss": 0.0066,
      "step": 2988020
    },
    {
      "epoch": 4.8899929956861286,
      "grad_norm": 0.18969736993312836,
      "learning_rate": 2.214637296310695e-07,
      "loss": 0.0086,
      "step": 2988040
    },
    {
      "epoch": 4.890025726124781,
      "grad_norm": 0.24041755497455597,
      "learning_rate": 2.2139783741755238e-07,
      "loss": 0.0066,
      "step": 2988060
    },
    {
      "epoch": 4.890058456563435,
      "grad_norm": 0.08819909393787384,
      "learning_rate": 2.2133194520403525e-07,
      "loss": 0.0095,
      "step": 2988080
    },
    {
      "epoch": 4.8900911870020884,
      "grad_norm": 0.3804437518119812,
      "learning_rate": 2.2126605299051813e-07,
      "loss": 0.0083,
      "step": 2988100
    },
    {
      "epoch": 4.890123917440741,
      "grad_norm": 0.24251839518547058,
      "learning_rate": 2.21200160777001e-07,
      "loss": 0.0129,
      "step": 2988120
    },
    {
      "epoch": 4.890156647879395,
      "grad_norm": 0.40167751908302307,
      "learning_rate": 2.211342685634839e-07,
      "loss": 0.0096,
      "step": 2988140
    },
    {
      "epoch": 4.890189378318048,
      "grad_norm": 0.13878999650478363,
      "learning_rate": 2.2106837634996677e-07,
      "loss": 0.0118,
      "step": 2988160
    },
    {
      "epoch": 4.890222108756702,
      "grad_norm": 0.554427444934845,
      "learning_rate": 2.210024841364496e-07,
      "loss": 0.0101,
      "step": 2988180
    },
    {
      "epoch": 4.890254839195355,
      "grad_norm": 0.2823978662490845,
      "learning_rate": 2.2093659192293246e-07,
      "loss": 0.0066,
      "step": 2988200
    },
    {
      "epoch": 4.890287569634008,
      "grad_norm": 0.14494504034519196,
      "learning_rate": 2.2087069970941536e-07,
      "loss": 0.0094,
      "step": 2988220
    },
    {
      "epoch": 4.890320300072662,
      "grad_norm": 0.3693685829639435,
      "learning_rate": 2.2080480749589823e-07,
      "loss": 0.0147,
      "step": 2988240
    },
    {
      "epoch": 4.8903530305113145,
      "grad_norm": 0.22763390839099884,
      "learning_rate": 2.207389152823811e-07,
      "loss": 0.0073,
      "step": 2988260
    },
    {
      "epoch": 4.890385760949968,
      "grad_norm": 0.06329190731048584,
      "learning_rate": 2.2067302306886397e-07,
      "loss": 0.0068,
      "step": 2988280
    },
    {
      "epoch": 4.890418491388622,
      "grad_norm": 0.05448480322957039,
      "learning_rate": 2.2060713085534685e-07,
      "loss": 0.0185,
      "step": 2988300
    },
    {
      "epoch": 4.890451221827275,
      "grad_norm": 0.5564145445823669,
      "learning_rate": 2.2054123864182972e-07,
      "loss": 0.0109,
      "step": 2988320
    },
    {
      "epoch": 4.890483952265928,
      "grad_norm": 0.31372320652008057,
      "learning_rate": 2.204753464283126e-07,
      "loss": 0.0112,
      "step": 2988340
    },
    {
      "epoch": 4.890516682704582,
      "grad_norm": 0.35838761925697327,
      "learning_rate": 2.2040945421479546e-07,
      "loss": 0.0094,
      "step": 2988360
    },
    {
      "epoch": 4.890549413143235,
      "grad_norm": 0.29394668340682983,
      "learning_rate": 2.2034356200127834e-07,
      "loss": 0.0097,
      "step": 2988380
    },
    {
      "epoch": 4.890582143581888,
      "grad_norm": 0.37377187609672546,
      "learning_rate": 2.2027766978776118e-07,
      "loss": 0.0196,
      "step": 2988400
    },
    {
      "epoch": 4.8906148740205415,
      "grad_norm": 0.3711406886577606,
      "learning_rate": 2.2021177757424405e-07,
      "loss": 0.007,
      "step": 2988420
    },
    {
      "epoch": 4.890647604459195,
      "grad_norm": 0.4632764756679535,
      "learning_rate": 2.2014588536072692e-07,
      "loss": 0.0123,
      "step": 2988440
    },
    {
      "epoch": 4.890680334897848,
      "grad_norm": 0.20785681903362274,
      "learning_rate": 2.200799931472098e-07,
      "loss": 0.0104,
      "step": 2988460
    },
    {
      "epoch": 4.890713065336501,
      "grad_norm": 0.2807207703590393,
      "learning_rate": 2.2001410093369267e-07,
      "loss": 0.0092,
      "step": 2988480
    },
    {
      "epoch": 4.890745795775155,
      "grad_norm": 0.06379824876785278,
      "learning_rate": 2.1994820872017557e-07,
      "loss": 0.007,
      "step": 2988500
    },
    {
      "epoch": 4.890778526213809,
      "grad_norm": 0.07233443111181259,
      "learning_rate": 2.1988231650665844e-07,
      "loss": 0.0084,
      "step": 2988520
    },
    {
      "epoch": 4.890811256652461,
      "grad_norm": 0.24792709946632385,
      "learning_rate": 2.198164242931413e-07,
      "loss": 0.0087,
      "step": 2988540
    },
    {
      "epoch": 4.890843987091115,
      "grad_norm": 0.0717078447341919,
      "learning_rate": 2.1975053207962418e-07,
      "loss": 0.005,
      "step": 2988560
    },
    {
      "epoch": 4.8908767175297685,
      "grad_norm": 0.197430819272995,
      "learning_rate": 2.1968463986610706e-07,
      "loss": 0.0104,
      "step": 2988580
    },
    {
      "epoch": 4.890909447968422,
      "grad_norm": 0.5060579776763916,
      "learning_rate": 2.1961874765258993e-07,
      "loss": 0.0083,
      "step": 2988600
    },
    {
      "epoch": 4.890942178407075,
      "grad_norm": 0.20166164636611938,
      "learning_rate": 2.1955285543907277e-07,
      "loss": 0.0073,
      "step": 2988620
    },
    {
      "epoch": 4.890974908845728,
      "grad_norm": 0.06969856470823288,
      "learning_rate": 2.1948696322555565e-07,
      "loss": 0.0076,
      "step": 2988640
    },
    {
      "epoch": 4.891007639284382,
      "grad_norm": 0.26888129115104675,
      "learning_rate": 2.1942107101203852e-07,
      "loss": 0.0111,
      "step": 2988660
    },
    {
      "epoch": 4.891040369723035,
      "grad_norm": 0.18136662244796753,
      "learning_rate": 2.193551787985214e-07,
      "loss": 0.0091,
      "step": 2988680
    },
    {
      "epoch": 4.891073100161688,
      "grad_norm": 0.0932861790060997,
      "learning_rate": 2.1928928658500426e-07,
      "loss": 0.0082,
      "step": 2988700
    },
    {
      "epoch": 4.891105830600342,
      "grad_norm": 0.25700920820236206,
      "learning_rate": 2.1922339437148713e-07,
      "loss": 0.0092,
      "step": 2988720
    },
    {
      "epoch": 4.891138561038995,
      "grad_norm": 0.3833460211753845,
      "learning_rate": 2.1915750215797e-07,
      "loss": 0.0115,
      "step": 2988740
    },
    {
      "epoch": 4.891171291477648,
      "grad_norm": 0.21115508675575256,
      "learning_rate": 2.1909160994445288e-07,
      "loss": 0.0108,
      "step": 2988760
    },
    {
      "epoch": 4.891204021916302,
      "grad_norm": 0.13281364738941193,
      "learning_rate": 2.1902571773093575e-07,
      "loss": 0.0073,
      "step": 2988780
    },
    {
      "epoch": 4.891236752354955,
      "grad_norm": 0.2028016746044159,
      "learning_rate": 2.1895982551741865e-07,
      "loss": 0.0103,
      "step": 2988800
    },
    {
      "epoch": 4.891269482793608,
      "grad_norm": 0.3600612282752991,
      "learning_rate": 2.1889393330390152e-07,
      "loss": 0.0078,
      "step": 2988820
    },
    {
      "epoch": 4.891302213232262,
      "grad_norm": 0.3086715042591095,
      "learning_rate": 2.1882804109038434e-07,
      "loss": 0.0064,
      "step": 2988840
    },
    {
      "epoch": 4.891334943670915,
      "grad_norm": 0.7639320492744446,
      "learning_rate": 2.187621488768672e-07,
      "loss": 0.0079,
      "step": 2988860
    },
    {
      "epoch": 4.891367674109569,
      "grad_norm": 0.5670461654663086,
      "learning_rate": 2.186962566633501e-07,
      "loss": 0.0125,
      "step": 2988880
    },
    {
      "epoch": 4.891400404548222,
      "grad_norm": 0.24634337425231934,
      "learning_rate": 2.1863036444983298e-07,
      "loss": 0.0093,
      "step": 2988900
    },
    {
      "epoch": 4.891433134986875,
      "grad_norm": 0.22201891243457794,
      "learning_rate": 2.1856447223631586e-07,
      "loss": 0.0126,
      "step": 2988920
    },
    {
      "epoch": 4.891465865425529,
      "grad_norm": 0.3069538176059723,
      "learning_rate": 2.1849858002279873e-07,
      "loss": 0.0114,
      "step": 2988940
    },
    {
      "epoch": 4.8914985958641815,
      "grad_norm": 0.08374916017055511,
      "learning_rate": 2.184326878092816e-07,
      "loss": 0.0063,
      "step": 2988960
    },
    {
      "epoch": 4.891531326302835,
      "grad_norm": 0.2028244584798813,
      "learning_rate": 2.1836679559576447e-07,
      "loss": 0.0064,
      "step": 2988980
    },
    {
      "epoch": 4.891564056741489,
      "grad_norm": 0.18272626399993896,
      "learning_rate": 2.1830090338224734e-07,
      "loss": 0.0122,
      "step": 2989000
    },
    {
      "epoch": 4.891596787180141,
      "grad_norm": 0.22509494423866272,
      "learning_rate": 2.1823501116873022e-07,
      "loss": 0.0086,
      "step": 2989020
    },
    {
      "epoch": 4.891629517618795,
      "grad_norm": 0.3318214416503906,
      "learning_rate": 2.181691189552131e-07,
      "loss": 0.0098,
      "step": 2989040
    },
    {
      "epoch": 4.8916622480574485,
      "grad_norm": 0.10926920920610428,
      "learning_rate": 2.1810322674169593e-07,
      "loss": 0.0071,
      "step": 2989060
    },
    {
      "epoch": 4.891694978496102,
      "grad_norm": 0.17979705333709717,
      "learning_rate": 2.180373345281788e-07,
      "loss": 0.0056,
      "step": 2989080
    },
    {
      "epoch": 4.891727708934755,
      "grad_norm": 0.29968762397766113,
      "learning_rate": 2.1797144231466168e-07,
      "loss": 0.0092,
      "step": 2989100
    },
    {
      "epoch": 4.891760439373408,
      "grad_norm": 0.3402875065803528,
      "learning_rate": 2.1790555010114455e-07,
      "loss": 0.0102,
      "step": 2989120
    },
    {
      "epoch": 4.891793169812062,
      "grad_norm": 0.11558941751718521,
      "learning_rate": 2.1783965788762742e-07,
      "loss": 0.0076,
      "step": 2989140
    },
    {
      "epoch": 4.891825900250716,
      "grad_norm": 0.4043107330799103,
      "learning_rate": 2.1777376567411032e-07,
      "loss": 0.0102,
      "step": 2989160
    },
    {
      "epoch": 4.891858630689368,
      "grad_norm": 0.19241507351398468,
      "learning_rate": 2.177078734605932e-07,
      "loss": 0.0083,
      "step": 2989180
    },
    {
      "epoch": 4.891891361128022,
      "grad_norm": 0.11478407680988312,
      "learning_rate": 2.1764198124707606e-07,
      "loss": 0.0081,
      "step": 2989200
    },
    {
      "epoch": 4.8919240915666755,
      "grad_norm": 0.18442806601524353,
      "learning_rate": 2.1757608903355894e-07,
      "loss": 0.0124,
      "step": 2989220
    },
    {
      "epoch": 4.891956822005328,
      "grad_norm": 0.11611629277467728,
      "learning_rate": 2.175101968200418e-07,
      "loss": 0.0063,
      "step": 2989240
    },
    {
      "epoch": 4.891989552443982,
      "grad_norm": 0.3191470801830292,
      "learning_rate": 2.1744430460652468e-07,
      "loss": 0.0095,
      "step": 2989260
    },
    {
      "epoch": 4.892022282882635,
      "grad_norm": 0.16583868861198425,
      "learning_rate": 2.1737841239300753e-07,
      "loss": 0.0135,
      "step": 2989280
    },
    {
      "epoch": 4.892055013321288,
      "grad_norm": 0.07326497882604599,
      "learning_rate": 2.173125201794904e-07,
      "loss": 0.0088,
      "step": 2989300
    },
    {
      "epoch": 4.892087743759942,
      "grad_norm": 0.2002515196800232,
      "learning_rate": 2.1724662796597327e-07,
      "loss": 0.0064,
      "step": 2989320
    },
    {
      "epoch": 4.892120474198595,
      "grad_norm": 0.10551665723323822,
      "learning_rate": 2.1718073575245614e-07,
      "loss": 0.0149,
      "step": 2989340
    },
    {
      "epoch": 4.892153204637249,
      "grad_norm": 0.18177586793899536,
      "learning_rate": 2.1711484353893901e-07,
      "loss": 0.01,
      "step": 2989360
    },
    {
      "epoch": 4.892185935075902,
      "grad_norm": 0.4583502411842346,
      "learning_rate": 2.170489513254219e-07,
      "loss": 0.0084,
      "step": 2989380
    },
    {
      "epoch": 4.892218665514555,
      "grad_norm": 0.16541028022766113,
      "learning_rate": 2.1698305911190476e-07,
      "loss": 0.0079,
      "step": 2989400
    },
    {
      "epoch": 4.892251395953209,
      "grad_norm": 0.25721558928489685,
      "learning_rate": 2.1691716689838763e-07,
      "loss": 0.014,
      "step": 2989420
    },
    {
      "epoch": 4.892284126391862,
      "grad_norm": 0.22817373275756836,
      "learning_rate": 2.168512746848705e-07,
      "loss": 0.0121,
      "step": 2989440
    },
    {
      "epoch": 4.892316856830515,
      "grad_norm": 0.567774772644043,
      "learning_rate": 2.167853824713534e-07,
      "loss": 0.0085,
      "step": 2989460
    },
    {
      "epoch": 4.892349587269169,
      "grad_norm": 0.0576559342443943,
      "learning_rate": 2.1671949025783627e-07,
      "loss": 0.0067,
      "step": 2989480
    },
    {
      "epoch": 4.892382317707822,
      "grad_norm": 0.16161799430847168,
      "learning_rate": 2.166535980443191e-07,
      "loss": 0.0116,
      "step": 2989500
    },
    {
      "epoch": 4.892415048146475,
      "grad_norm": 0.09231380373239517,
      "learning_rate": 2.1658770583080197e-07,
      "loss": 0.014,
      "step": 2989520
    },
    {
      "epoch": 4.892447778585129,
      "grad_norm": 0.137358620762825,
      "learning_rate": 2.1652181361728486e-07,
      "loss": 0.0093,
      "step": 2989540
    },
    {
      "epoch": 4.892480509023782,
      "grad_norm": 0.18250887095928192,
      "learning_rate": 2.1645592140376774e-07,
      "loss": 0.0073,
      "step": 2989560
    },
    {
      "epoch": 4.892513239462435,
      "grad_norm": 0.5166969895362854,
      "learning_rate": 2.163900291902506e-07,
      "loss": 0.0072,
      "step": 2989580
    },
    {
      "epoch": 4.8925459699010885,
      "grad_norm": 0.3755926489830017,
      "learning_rate": 2.1632413697673348e-07,
      "loss": 0.0062,
      "step": 2989600
    },
    {
      "epoch": 4.892578700339742,
      "grad_norm": 0.114737868309021,
      "learning_rate": 2.1625824476321635e-07,
      "loss": 0.0086,
      "step": 2989620
    },
    {
      "epoch": 4.892611430778396,
      "grad_norm": 0.10380109399557114,
      "learning_rate": 2.1619235254969922e-07,
      "loss": 0.0087,
      "step": 2989640
    },
    {
      "epoch": 4.892644161217048,
      "grad_norm": 0.21578043699264526,
      "learning_rate": 2.161264603361821e-07,
      "loss": 0.008,
      "step": 2989660
    },
    {
      "epoch": 4.892676891655702,
      "grad_norm": 0.03741520270705223,
      "learning_rate": 2.1606056812266497e-07,
      "loss": 0.0051,
      "step": 2989680
    },
    {
      "epoch": 4.892709622094356,
      "grad_norm": 0.10755316913127899,
      "learning_rate": 2.1599467590914784e-07,
      "loss": 0.005,
      "step": 2989700
    },
    {
      "epoch": 4.892742352533008,
      "grad_norm": 0.1246211975812912,
      "learning_rate": 2.1592878369563069e-07,
      "loss": 0.0152,
      "step": 2989720
    },
    {
      "epoch": 4.892775082971662,
      "grad_norm": 0.4260617196559906,
      "learning_rate": 2.1586289148211356e-07,
      "loss": 0.0056,
      "step": 2989740
    },
    {
      "epoch": 4.8928078134103155,
      "grad_norm": 0.13883453607559204,
      "learning_rate": 2.1579699926859643e-07,
      "loss": 0.0079,
      "step": 2989760
    },
    {
      "epoch": 4.892840543848969,
      "grad_norm": 0.4495850205421448,
      "learning_rate": 2.157311070550793e-07,
      "loss": 0.0066,
      "step": 2989780
    },
    {
      "epoch": 4.892873274287622,
      "grad_norm": 0.16862542927265167,
      "learning_rate": 2.1566521484156217e-07,
      "loss": 0.0088,
      "step": 2989800
    },
    {
      "epoch": 4.892906004726275,
      "grad_norm": 0.39865028858184814,
      "learning_rate": 2.1559932262804507e-07,
      "loss": 0.0077,
      "step": 2989820
    },
    {
      "epoch": 4.892938735164929,
      "grad_norm": 0.16562961041927338,
      "learning_rate": 2.1553343041452795e-07,
      "loss": 0.0087,
      "step": 2989840
    },
    {
      "epoch": 4.892971465603582,
      "grad_norm": 0.11764205992221832,
      "learning_rate": 2.1546753820101082e-07,
      "loss": 0.0102,
      "step": 2989860
    },
    {
      "epoch": 4.893004196042235,
      "grad_norm": 0.3460850417613983,
      "learning_rate": 2.154016459874937e-07,
      "loss": 0.0122,
      "step": 2989880
    },
    {
      "epoch": 4.893036926480889,
      "grad_norm": 0.1688191443681717,
      "learning_rate": 2.1533575377397656e-07,
      "loss": 0.0075,
      "step": 2989900
    },
    {
      "epoch": 4.8930696569195415,
      "grad_norm": 0.17957660555839539,
      "learning_rate": 2.1526986156045943e-07,
      "loss": 0.0093,
      "step": 2989920
    },
    {
      "epoch": 4.893102387358195,
      "grad_norm": 0.20181813836097717,
      "learning_rate": 2.1520396934694228e-07,
      "loss": 0.01,
      "step": 2989940
    },
    {
      "epoch": 4.893135117796849,
      "grad_norm": 0.13044923543930054,
      "learning_rate": 2.1513807713342515e-07,
      "loss": 0.0065,
      "step": 2989960
    },
    {
      "epoch": 4.893167848235502,
      "grad_norm": 0.21353955566883087,
      "learning_rate": 2.1507218491990802e-07,
      "loss": 0.0075,
      "step": 2989980
    },
    {
      "epoch": 4.893200578674155,
      "grad_norm": 0.17090241611003876,
      "learning_rate": 2.150062927063909e-07,
      "loss": 0.0084,
      "step": 2990000
    },
    {
      "epoch": 4.893233309112809,
      "grad_norm": 0.17502157390117645,
      "learning_rate": 2.1494040049287377e-07,
      "loss": 0.007,
      "step": 2990020
    },
    {
      "epoch": 4.893266039551462,
      "grad_norm": 0.05678999423980713,
      "learning_rate": 2.1487450827935664e-07,
      "loss": 0.0094,
      "step": 2990040
    },
    {
      "epoch": 4.893298769990116,
      "grad_norm": 0.2111140936613083,
      "learning_rate": 2.148086160658395e-07,
      "loss": 0.0096,
      "step": 2990060
    },
    {
      "epoch": 4.8933315004287685,
      "grad_norm": 0.07202260941267014,
      "learning_rate": 2.1474272385232238e-07,
      "loss": 0.0071,
      "step": 2990080
    },
    {
      "epoch": 4.893364230867422,
      "grad_norm": 0.13858386874198914,
      "learning_rate": 2.1467683163880526e-07,
      "loss": 0.0085,
      "step": 2990100
    },
    {
      "epoch": 4.893396961306076,
      "grad_norm": 0.280139684677124,
      "learning_rate": 2.1461093942528815e-07,
      "loss": 0.0065,
      "step": 2990120
    },
    {
      "epoch": 4.893429691744728,
      "grad_norm": 0.1528703272342682,
      "learning_rate": 2.1454504721177103e-07,
      "loss": 0.0084,
      "step": 2990140
    },
    {
      "epoch": 4.893462422183382,
      "grad_norm": 0.30241072177886963,
      "learning_rate": 2.144791549982539e-07,
      "loss": 0.0083,
      "step": 2990160
    },
    {
      "epoch": 4.893495152622036,
      "grad_norm": 0.13987010717391968,
      "learning_rate": 2.1441326278473672e-07,
      "loss": 0.0079,
      "step": 2990180
    },
    {
      "epoch": 4.893527883060688,
      "grad_norm": 0.5931046009063721,
      "learning_rate": 2.1434737057121962e-07,
      "loss": 0.0074,
      "step": 2990200
    },
    {
      "epoch": 4.893560613499342,
      "grad_norm": 0.18534313142299652,
      "learning_rate": 2.142814783577025e-07,
      "loss": 0.0121,
      "step": 2990220
    },
    {
      "epoch": 4.8935933439379955,
      "grad_norm": 0.36335697770118713,
      "learning_rate": 2.1421558614418536e-07,
      "loss": 0.0073,
      "step": 2990240
    },
    {
      "epoch": 4.893626074376649,
      "grad_norm": 0.09204475581645966,
      "learning_rate": 2.1414969393066823e-07,
      "loss": 0.009,
      "step": 2990260
    },
    {
      "epoch": 4.893658804815302,
      "grad_norm": 0.31495964527130127,
      "learning_rate": 2.140838017171511e-07,
      "loss": 0.0087,
      "step": 2990280
    },
    {
      "epoch": 4.893691535253955,
      "grad_norm": 0.26069384813308716,
      "learning_rate": 2.1401790950363398e-07,
      "loss": 0.0067,
      "step": 2990300
    },
    {
      "epoch": 4.893724265692609,
      "grad_norm": 0.5697429180145264,
      "learning_rate": 2.1395201729011685e-07,
      "loss": 0.0099,
      "step": 2990320
    },
    {
      "epoch": 4.893756996131263,
      "grad_norm": 0.04992422088980675,
      "learning_rate": 2.1388612507659972e-07,
      "loss": 0.0118,
      "step": 2990340
    },
    {
      "epoch": 4.893789726569915,
      "grad_norm": 0.3349084258079529,
      "learning_rate": 2.138202328630826e-07,
      "loss": 0.0127,
      "step": 2990360
    },
    {
      "epoch": 4.893822457008569,
      "grad_norm": 0.4152313768863678,
      "learning_rate": 2.1375434064956547e-07,
      "loss": 0.0095,
      "step": 2990380
    },
    {
      "epoch": 4.8938551874472225,
      "grad_norm": 0.48262202739715576,
      "learning_rate": 2.136884484360483e-07,
      "loss": 0.0082,
      "step": 2990400
    },
    {
      "epoch": 4.893887917885875,
      "grad_norm": 0.0945592001080513,
      "learning_rate": 2.1362255622253118e-07,
      "loss": 0.0079,
      "step": 2990420
    },
    {
      "epoch": 4.893920648324529,
      "grad_norm": 0.2051117718219757,
      "learning_rate": 2.1355666400901406e-07,
      "loss": 0.0055,
      "step": 2990440
    },
    {
      "epoch": 4.893953378763182,
      "grad_norm": 0.5217483639717102,
      "learning_rate": 2.1349077179549693e-07,
      "loss": 0.0072,
      "step": 2990460
    },
    {
      "epoch": 4.893986109201835,
      "grad_norm": 0.2499174326658249,
      "learning_rate": 2.1342487958197983e-07,
      "loss": 0.0131,
      "step": 2990480
    },
    {
      "epoch": 4.894018839640489,
      "grad_norm": 0.2681530714035034,
      "learning_rate": 2.133589873684627e-07,
      "loss": 0.008,
      "step": 2990500
    },
    {
      "epoch": 4.894051570079142,
      "grad_norm": 0.22714099287986755,
      "learning_rate": 2.1329309515494557e-07,
      "loss": 0.0089,
      "step": 2990520
    },
    {
      "epoch": 4.894084300517796,
      "grad_norm": 0.1521923542022705,
      "learning_rate": 2.1322720294142844e-07,
      "loss": 0.0113,
      "step": 2990540
    },
    {
      "epoch": 4.894117030956449,
      "grad_norm": 0.07678902894258499,
      "learning_rate": 2.1316131072791131e-07,
      "loss": 0.0081,
      "step": 2990560
    },
    {
      "epoch": 4.894149761395102,
      "grad_norm": 0.1382492184638977,
      "learning_rate": 2.1309541851439419e-07,
      "loss": 0.0085,
      "step": 2990580
    },
    {
      "epoch": 4.894182491833756,
      "grad_norm": 0.5669499635696411,
      "learning_rate": 2.1302952630087706e-07,
      "loss": 0.0098,
      "step": 2990600
    },
    {
      "epoch": 4.894215222272409,
      "grad_norm": 0.0675918459892273,
      "learning_rate": 2.129636340873599e-07,
      "loss": 0.0069,
      "step": 2990620
    },
    {
      "epoch": 4.894247952711062,
      "grad_norm": 0.11577796936035156,
      "learning_rate": 2.1289774187384278e-07,
      "loss": 0.0109,
      "step": 2990640
    },
    {
      "epoch": 4.894280683149716,
      "grad_norm": 0.14069782197475433,
      "learning_rate": 2.1283184966032565e-07,
      "loss": 0.0067,
      "step": 2990660
    },
    {
      "epoch": 4.894313413588369,
      "grad_norm": 0.13814173638820648,
      "learning_rate": 2.1276595744680852e-07,
      "loss": 0.0117,
      "step": 2990680
    },
    {
      "epoch": 4.894346144027022,
      "grad_norm": 0.22563831508159637,
      "learning_rate": 2.127000652332914e-07,
      "loss": 0.0108,
      "step": 2990700
    },
    {
      "epoch": 4.8943788744656755,
      "grad_norm": 0.11384548991918564,
      "learning_rate": 2.1263417301977426e-07,
      "loss": 0.0088,
      "step": 2990720
    },
    {
      "epoch": 4.894411604904329,
      "grad_norm": 0.3015444874763489,
      "learning_rate": 2.1256828080625714e-07,
      "loss": 0.0069,
      "step": 2990740
    },
    {
      "epoch": 4.894444335342982,
      "grad_norm": 0.20336534082889557,
      "learning_rate": 2.1250238859274e-07,
      "loss": 0.0113,
      "step": 2990760
    },
    {
      "epoch": 4.894477065781635,
      "grad_norm": 0.31018146872520447,
      "learning_rate": 2.124364963792229e-07,
      "loss": 0.0093,
      "step": 2990780
    },
    {
      "epoch": 4.894509796220289,
      "grad_norm": 0.20359279215335846,
      "learning_rate": 2.1237060416570578e-07,
      "loss": 0.0073,
      "step": 2990800
    },
    {
      "epoch": 4.894542526658943,
      "grad_norm": 0.15558649599552155,
      "learning_rate": 2.1230471195218865e-07,
      "loss": 0.0067,
      "step": 2990820
    },
    {
      "epoch": 4.894575257097595,
      "grad_norm": 0.16371329128742218,
      "learning_rate": 2.122388197386715e-07,
      "loss": 0.0076,
      "step": 2990840
    },
    {
      "epoch": 4.894607987536249,
      "grad_norm": 0.09647687524557114,
      "learning_rate": 2.1217292752515437e-07,
      "loss": 0.0097,
      "step": 2990860
    },
    {
      "epoch": 4.8946407179749025,
      "grad_norm": 0.26011866331100464,
      "learning_rate": 2.1210703531163724e-07,
      "loss": 0.0108,
      "step": 2990880
    },
    {
      "epoch": 4.894673448413556,
      "grad_norm": 0.12050513178110123,
      "learning_rate": 2.120411430981201e-07,
      "loss": 0.0106,
      "step": 2990900
    },
    {
      "epoch": 4.894706178852209,
      "grad_norm": 0.13854444026947021,
      "learning_rate": 2.1197525088460299e-07,
      "loss": 0.0072,
      "step": 2990920
    },
    {
      "epoch": 4.894738909290862,
      "grad_norm": 0.5457478761672974,
      "learning_rate": 2.1190935867108586e-07,
      "loss": 0.011,
      "step": 2990940
    },
    {
      "epoch": 4.894771639729516,
      "grad_norm": 0.23450763523578644,
      "learning_rate": 2.1184346645756873e-07,
      "loss": 0.0066,
      "step": 2990960
    },
    {
      "epoch": 4.894804370168169,
      "grad_norm": 0.3127564489841461,
      "learning_rate": 2.117775742440516e-07,
      "loss": 0.0099,
      "step": 2990980
    },
    {
      "epoch": 4.894837100606822,
      "grad_norm": 0.3244864046573639,
      "learning_rate": 2.1171168203053447e-07,
      "loss": 0.0089,
      "step": 2991000
    },
    {
      "epoch": 4.894869831045476,
      "grad_norm": 0.3950243592262268,
      "learning_rate": 2.1164578981701735e-07,
      "loss": 0.0092,
      "step": 2991020
    },
    {
      "epoch": 4.894902561484129,
      "grad_norm": 0.3619001507759094,
      "learning_rate": 2.1157989760350022e-07,
      "loss": 0.01,
      "step": 2991040
    },
    {
      "epoch": 4.894935291922782,
      "grad_norm": 0.24012450873851776,
      "learning_rate": 2.1151400538998306e-07,
      "loss": 0.0124,
      "step": 2991060
    },
    {
      "epoch": 4.894968022361436,
      "grad_norm": 0.3679068982601166,
      "learning_rate": 2.1144811317646594e-07,
      "loss": 0.0107,
      "step": 2991080
    },
    {
      "epoch": 4.895000752800089,
      "grad_norm": 0.3164949119091034,
      "learning_rate": 2.113822209629488e-07,
      "loss": 0.0096,
      "step": 2991100
    },
    {
      "epoch": 4.895033483238742,
      "grad_norm": 0.09984958171844482,
      "learning_rate": 2.1131632874943168e-07,
      "loss": 0.0056,
      "step": 2991120
    },
    {
      "epoch": 4.895066213677396,
      "grad_norm": 0.25229525566101074,
      "learning_rate": 2.1125043653591458e-07,
      "loss": 0.0055,
      "step": 2991140
    },
    {
      "epoch": 4.895098944116049,
      "grad_norm": 0.07388772070407867,
      "learning_rate": 2.1118454432239745e-07,
      "loss": 0.0077,
      "step": 2991160
    },
    {
      "epoch": 4.895131674554702,
      "grad_norm": 0.24483272433280945,
      "learning_rate": 2.1111865210888032e-07,
      "loss": 0.0098,
      "step": 2991180
    },
    {
      "epoch": 4.895164404993356,
      "grad_norm": 0.11653918772935867,
      "learning_rate": 2.110527598953632e-07,
      "loss": 0.0087,
      "step": 2991200
    },
    {
      "epoch": 4.895197135432009,
      "grad_norm": 0.07814453542232513,
      "learning_rate": 2.1098686768184607e-07,
      "loss": 0.0141,
      "step": 2991220
    },
    {
      "epoch": 4.895229865870663,
      "grad_norm": 0.18671821057796478,
      "learning_rate": 2.1092097546832894e-07,
      "loss": 0.0078,
      "step": 2991240
    },
    {
      "epoch": 4.8952625963093155,
      "grad_norm": 0.08005337417125702,
      "learning_rate": 2.108550832548118e-07,
      "loss": 0.0059,
      "step": 2991260
    },
    {
      "epoch": 4.895295326747969,
      "grad_norm": 0.37553054094314575,
      "learning_rate": 2.1078919104129466e-07,
      "loss": 0.0087,
      "step": 2991280
    },
    {
      "epoch": 4.895328057186623,
      "grad_norm": 0.5370950102806091,
      "learning_rate": 2.1072329882777753e-07,
      "loss": 0.0112,
      "step": 2991300
    },
    {
      "epoch": 4.895360787625275,
      "grad_norm": 0.17308717966079712,
      "learning_rate": 2.106574066142604e-07,
      "loss": 0.0077,
      "step": 2991320
    },
    {
      "epoch": 4.895393518063929,
      "grad_norm": 0.17400683462619781,
      "learning_rate": 2.1059151440074327e-07,
      "loss": 0.0115,
      "step": 2991340
    },
    {
      "epoch": 4.895426248502583,
      "grad_norm": 0.1746993362903595,
      "learning_rate": 2.1052562218722614e-07,
      "loss": 0.007,
      "step": 2991360
    },
    {
      "epoch": 4.895458978941235,
      "grad_norm": 0.5271400213241577,
      "learning_rate": 2.1045972997370902e-07,
      "loss": 0.0108,
      "step": 2991380
    },
    {
      "epoch": 4.895491709379889,
      "grad_norm": 0.23677986860275269,
      "learning_rate": 2.103938377601919e-07,
      "loss": 0.0057,
      "step": 2991400
    },
    {
      "epoch": 4.8955244398185425,
      "grad_norm": 1.1835129261016846,
      "learning_rate": 2.1032794554667476e-07,
      "loss": 0.0113,
      "step": 2991420
    },
    {
      "epoch": 4.895557170257196,
      "grad_norm": 0.14808019995689392,
      "learning_rate": 2.1026205333315766e-07,
      "loss": 0.0136,
      "step": 2991440
    },
    {
      "epoch": 4.895589900695849,
      "grad_norm": 0.34322091937065125,
      "learning_rate": 2.1019616111964053e-07,
      "loss": 0.0111,
      "step": 2991460
    },
    {
      "epoch": 4.895622631134502,
      "grad_norm": 0.2615988850593567,
      "learning_rate": 2.101302689061234e-07,
      "loss": 0.0132,
      "step": 2991480
    },
    {
      "epoch": 4.895655361573156,
      "grad_norm": 0.25611162185668945,
      "learning_rate": 2.1006437669260625e-07,
      "loss": 0.0093,
      "step": 2991500
    },
    {
      "epoch": 4.8956880920118095,
      "grad_norm": 0.11698036640882492,
      "learning_rate": 2.0999848447908912e-07,
      "loss": 0.0112,
      "step": 2991520
    },
    {
      "epoch": 4.895720822450462,
      "grad_norm": 0.2752000689506531,
      "learning_rate": 2.09932592265572e-07,
      "loss": 0.0096,
      "step": 2991540
    },
    {
      "epoch": 4.895753552889116,
      "grad_norm": 0.08817233145236969,
      "learning_rate": 2.0986670005205487e-07,
      "loss": 0.0088,
      "step": 2991560
    },
    {
      "epoch": 4.895786283327769,
      "grad_norm": 0.12277613580226898,
      "learning_rate": 2.0980080783853774e-07,
      "loss": 0.0068,
      "step": 2991580
    },
    {
      "epoch": 4.895819013766422,
      "grad_norm": 0.0815541073679924,
      "learning_rate": 2.097349156250206e-07,
      "loss": 0.0091,
      "step": 2991600
    },
    {
      "epoch": 4.895851744205076,
      "grad_norm": 0.38419049978256226,
      "learning_rate": 2.0966902341150348e-07,
      "loss": 0.008,
      "step": 2991620
    },
    {
      "epoch": 4.895884474643729,
      "grad_norm": 0.06063327193260193,
      "learning_rate": 2.0960313119798635e-07,
      "loss": 0.006,
      "step": 2991640
    },
    {
      "epoch": 4.895917205082382,
      "grad_norm": 0.2421085685491562,
      "learning_rate": 2.0953723898446923e-07,
      "loss": 0.0079,
      "step": 2991660
    },
    {
      "epoch": 4.895949935521036,
      "grad_norm": 0.14661027491092682,
      "learning_rate": 2.094713467709521e-07,
      "loss": 0.0134,
      "step": 2991680
    },
    {
      "epoch": 4.895982665959689,
      "grad_norm": 0.2523405849933624,
      "learning_rate": 2.0940545455743497e-07,
      "loss": 0.0093,
      "step": 2991700
    },
    {
      "epoch": 4.896015396398343,
      "grad_norm": 0.08698763698339462,
      "learning_rate": 2.0933956234391782e-07,
      "loss": 0.0097,
      "step": 2991720
    },
    {
      "epoch": 4.8960481268369955,
      "grad_norm": 0.3835562765598297,
      "learning_rate": 2.092736701304007e-07,
      "loss": 0.0087,
      "step": 2991740
    },
    {
      "epoch": 4.896080857275649,
      "grad_norm": 0.18766425549983978,
      "learning_rate": 2.0920777791688356e-07,
      "loss": 0.0074,
      "step": 2991760
    },
    {
      "epoch": 4.896113587714303,
      "grad_norm": 0.22097201645374298,
      "learning_rate": 2.0914188570336643e-07,
      "loss": 0.0106,
      "step": 2991780
    },
    {
      "epoch": 4.896146318152956,
      "grad_norm": 0.29076719284057617,
      "learning_rate": 2.0907599348984933e-07,
      "loss": 0.0107,
      "step": 2991800
    },
    {
      "epoch": 4.896179048591609,
      "grad_norm": 0.1274532824754715,
      "learning_rate": 2.090101012763322e-07,
      "loss": 0.0065,
      "step": 2991820
    },
    {
      "epoch": 4.896211779030263,
      "grad_norm": 0.0912146121263504,
      "learning_rate": 2.0894420906281508e-07,
      "loss": 0.0111,
      "step": 2991840
    },
    {
      "epoch": 4.896244509468916,
      "grad_norm": 0.5217247605323792,
      "learning_rate": 2.0887831684929795e-07,
      "loss": 0.0078,
      "step": 2991860
    },
    {
      "epoch": 4.896277239907569,
      "grad_norm": 0.19598586857318878,
      "learning_rate": 2.0881242463578082e-07,
      "loss": 0.0072,
      "step": 2991880
    },
    {
      "epoch": 4.8963099703462225,
      "grad_norm": 0.1784643679857254,
      "learning_rate": 2.087465324222637e-07,
      "loss": 0.007,
      "step": 2991900
    },
    {
      "epoch": 4.896342700784876,
      "grad_norm": 0.2738475799560547,
      "learning_rate": 2.0868064020874656e-07,
      "loss": 0.0104,
      "step": 2991920
    },
    {
      "epoch": 4.896375431223529,
      "grad_norm": 0.49463051557540894,
      "learning_rate": 2.086147479952294e-07,
      "loss": 0.01,
      "step": 2991940
    },
    {
      "epoch": 4.896408161662182,
      "grad_norm": 0.4794742166996002,
      "learning_rate": 2.0854885578171228e-07,
      "loss": 0.0064,
      "step": 2991960
    },
    {
      "epoch": 4.896440892100836,
      "grad_norm": 0.1943829208612442,
      "learning_rate": 2.0848296356819515e-07,
      "loss": 0.0104,
      "step": 2991980
    },
    {
      "epoch": 4.89647362253949,
      "grad_norm": 0.4321903884410858,
      "learning_rate": 2.0841707135467803e-07,
      "loss": 0.0088,
      "step": 2992000
    },
    {
      "epoch": 4.896506352978142,
      "grad_norm": 0.3818175792694092,
      "learning_rate": 2.083511791411609e-07,
      "loss": 0.0089,
      "step": 2992020
    },
    {
      "epoch": 4.896539083416796,
      "grad_norm": 0.14851677417755127,
      "learning_rate": 2.0828528692764377e-07,
      "loss": 0.0099,
      "step": 2992040
    },
    {
      "epoch": 4.8965718138554495,
      "grad_norm": 0.17622046172618866,
      "learning_rate": 2.0821939471412664e-07,
      "loss": 0.0089,
      "step": 2992060
    },
    {
      "epoch": 4.896604544294103,
      "grad_norm": 0.3615775406360626,
      "learning_rate": 2.0815350250060951e-07,
      "loss": 0.0074,
      "step": 2992080
    },
    {
      "epoch": 4.896637274732756,
      "grad_norm": 0.30790773034095764,
      "learning_rate": 2.080876102870924e-07,
      "loss": 0.0106,
      "step": 2992100
    },
    {
      "epoch": 4.896670005171409,
      "grad_norm": 0.21077217161655426,
      "learning_rate": 2.0802171807357528e-07,
      "loss": 0.0086,
      "step": 2992120
    },
    {
      "epoch": 4.896702735610063,
      "grad_norm": 0.1573176383972168,
      "learning_rate": 2.0795582586005816e-07,
      "loss": 0.0108,
      "step": 2992140
    },
    {
      "epoch": 4.896735466048716,
      "grad_norm": 0.13968122005462646,
      "learning_rate": 2.07889933646541e-07,
      "loss": 0.0069,
      "step": 2992160
    },
    {
      "epoch": 4.896768196487369,
      "grad_norm": 0.1825522482395172,
      "learning_rate": 2.0782404143302387e-07,
      "loss": 0.006,
      "step": 2992180
    },
    {
      "epoch": 4.896800926926023,
      "grad_norm": 0.35727113485336304,
      "learning_rate": 2.0775814921950675e-07,
      "loss": 0.0105,
      "step": 2992200
    },
    {
      "epoch": 4.896833657364676,
      "grad_norm": 0.061425160616636276,
      "learning_rate": 2.0769225700598962e-07,
      "loss": 0.0093,
      "step": 2992220
    },
    {
      "epoch": 4.896866387803329,
      "grad_norm": 0.3308657705783844,
      "learning_rate": 2.076263647924725e-07,
      "loss": 0.0121,
      "step": 2992240
    },
    {
      "epoch": 4.896899118241983,
      "grad_norm": 0.08472762256860733,
      "learning_rate": 2.0756047257895536e-07,
      "loss": 0.0057,
      "step": 2992260
    },
    {
      "epoch": 4.896931848680636,
      "grad_norm": 0.08426956087350845,
      "learning_rate": 2.0749458036543823e-07,
      "loss": 0.0091,
      "step": 2992280
    },
    {
      "epoch": 4.896964579119289,
      "grad_norm": 0.12174499034881592,
      "learning_rate": 2.074286881519211e-07,
      "loss": 0.0079,
      "step": 2992300
    },
    {
      "epoch": 4.896997309557943,
      "grad_norm": 0.1641109138727188,
      "learning_rate": 2.0736279593840398e-07,
      "loss": 0.0075,
      "step": 2992320
    },
    {
      "epoch": 4.897030039996596,
      "grad_norm": 0.45087745785713196,
      "learning_rate": 2.0729690372488685e-07,
      "loss": 0.0059,
      "step": 2992340
    },
    {
      "epoch": 4.89706277043525,
      "grad_norm": 0.5085625648498535,
      "learning_rate": 2.0723101151136972e-07,
      "loss": 0.0085,
      "step": 2992360
    },
    {
      "epoch": 4.8970955008739026,
      "grad_norm": 0.07552465051412582,
      "learning_rate": 2.0716511929785257e-07,
      "loss": 0.0106,
      "step": 2992380
    },
    {
      "epoch": 4.897128231312556,
      "grad_norm": 0.13653132319450378,
      "learning_rate": 2.0709922708433544e-07,
      "loss": 0.0081,
      "step": 2992400
    },
    {
      "epoch": 4.89716096175121,
      "grad_norm": 0.171201691031456,
      "learning_rate": 2.070333348708183e-07,
      "loss": 0.0075,
      "step": 2992420
    },
    {
      "epoch": 4.8971936921898624,
      "grad_norm": 0.13618046045303345,
      "learning_rate": 2.0696744265730119e-07,
      "loss": 0.0055,
      "step": 2992440
    },
    {
      "epoch": 4.897226422628516,
      "grad_norm": 0.08953143656253815,
      "learning_rate": 2.0690155044378408e-07,
      "loss": 0.0083,
      "step": 2992460
    },
    {
      "epoch": 4.89725915306717,
      "grad_norm": 0.1092449352145195,
      "learning_rate": 2.0683565823026696e-07,
      "loss": 0.0093,
      "step": 2992480
    },
    {
      "epoch": 4.897291883505822,
      "grad_norm": 0.12191760540008545,
      "learning_rate": 2.0676976601674983e-07,
      "loss": 0.0061,
      "step": 2992500
    },
    {
      "epoch": 4.897324613944476,
      "grad_norm": 0.12307963520288467,
      "learning_rate": 2.067038738032327e-07,
      "loss": 0.011,
      "step": 2992520
    },
    {
      "epoch": 4.8973573443831295,
      "grad_norm": 0.28990739583969116,
      "learning_rate": 2.0663798158971557e-07,
      "loss": 0.0073,
      "step": 2992540
    },
    {
      "epoch": 4.897390074821783,
      "grad_norm": 0.21283487975597382,
      "learning_rate": 2.0657208937619844e-07,
      "loss": 0.0071,
      "step": 2992560
    },
    {
      "epoch": 4.897422805260436,
      "grad_norm": 0.05371633917093277,
      "learning_rate": 2.0650619716268132e-07,
      "loss": 0.0048,
      "step": 2992580
    },
    {
      "epoch": 4.897455535699089,
      "grad_norm": 0.0698624923825264,
      "learning_rate": 2.0644030494916416e-07,
      "loss": 0.0086,
      "step": 2992600
    },
    {
      "epoch": 4.897488266137743,
      "grad_norm": 0.33307600021362305,
      "learning_rate": 2.0637441273564703e-07,
      "loss": 0.0076,
      "step": 2992620
    },
    {
      "epoch": 4.897520996576397,
      "grad_norm": 0.04408973827958107,
      "learning_rate": 2.063085205221299e-07,
      "loss": 0.0073,
      "step": 2992640
    },
    {
      "epoch": 4.897553727015049,
      "grad_norm": 0.4690968990325928,
      "learning_rate": 2.0624262830861278e-07,
      "loss": 0.0076,
      "step": 2992660
    },
    {
      "epoch": 4.897586457453703,
      "grad_norm": 0.4832441508769989,
      "learning_rate": 2.0617673609509565e-07,
      "loss": 0.0096,
      "step": 2992680
    },
    {
      "epoch": 4.8976191878923565,
      "grad_norm": 0.1335911899805069,
      "learning_rate": 2.0611084388157852e-07,
      "loss": 0.0086,
      "step": 2992700
    },
    {
      "epoch": 4.897651918331009,
      "grad_norm": 0.3245905339717865,
      "learning_rate": 2.060449516680614e-07,
      "loss": 0.0097,
      "step": 2992720
    },
    {
      "epoch": 4.897684648769663,
      "grad_norm": 0.12399137020111084,
      "learning_rate": 2.0597905945454427e-07,
      "loss": 0.0103,
      "step": 2992740
    },
    {
      "epoch": 4.897717379208316,
      "grad_norm": 0.2590150535106659,
      "learning_rate": 2.0591316724102717e-07,
      "loss": 0.0097,
      "step": 2992760
    },
    {
      "epoch": 4.897750109646969,
      "grad_norm": 0.17157137393951416,
      "learning_rate": 2.0584727502751004e-07,
      "loss": 0.007,
      "step": 2992780
    },
    {
      "epoch": 4.897782840085623,
      "grad_norm": 0.20046111941337585,
      "learning_rate": 2.057813828139929e-07,
      "loss": 0.0064,
      "step": 2992800
    },
    {
      "epoch": 4.897815570524276,
      "grad_norm": 0.0460643544793129,
      "learning_rate": 2.0571549060047575e-07,
      "loss": 0.0085,
      "step": 2992820
    },
    {
      "epoch": 4.89784830096293,
      "grad_norm": 0.1857457011938095,
      "learning_rate": 2.0564959838695863e-07,
      "loss": 0.0081,
      "step": 2992840
    },
    {
      "epoch": 4.897881031401583,
      "grad_norm": 0.049092650413513184,
      "learning_rate": 2.055837061734415e-07,
      "loss": 0.0058,
      "step": 2992860
    },
    {
      "epoch": 4.897913761840236,
      "grad_norm": 0.369010329246521,
      "learning_rate": 2.0551781395992437e-07,
      "loss": 0.0078,
      "step": 2992880
    },
    {
      "epoch": 4.89794649227889,
      "grad_norm": 0.10736706107854843,
      "learning_rate": 2.0545192174640724e-07,
      "loss": 0.0081,
      "step": 2992900
    },
    {
      "epoch": 4.8979792227175425,
      "grad_norm": 0.15054018795490265,
      "learning_rate": 2.0538602953289012e-07,
      "loss": 0.0128,
      "step": 2992920
    },
    {
      "epoch": 4.898011953156196,
      "grad_norm": 0.1546928584575653,
      "learning_rate": 2.05320137319373e-07,
      "loss": 0.0069,
      "step": 2992940
    },
    {
      "epoch": 4.89804468359485,
      "grad_norm": 0.07691002637147903,
      "learning_rate": 2.0525424510585586e-07,
      "loss": 0.009,
      "step": 2992960
    },
    {
      "epoch": 4.898077414033503,
      "grad_norm": 0.11220812797546387,
      "learning_rate": 2.0518835289233873e-07,
      "loss": 0.0068,
      "step": 2992980
    },
    {
      "epoch": 4.898110144472156,
      "grad_norm": 0.2401989847421646,
      "learning_rate": 2.051224606788216e-07,
      "loss": 0.0082,
      "step": 2993000
    },
    {
      "epoch": 4.89814287491081,
      "grad_norm": 0.18077217042446136,
      "learning_rate": 2.0505656846530448e-07,
      "loss": 0.0114,
      "step": 2993020
    },
    {
      "epoch": 4.898175605349463,
      "grad_norm": 0.20689691603183746,
      "learning_rate": 2.0499067625178732e-07,
      "loss": 0.0086,
      "step": 2993040
    },
    {
      "epoch": 4.898208335788116,
      "grad_norm": 0.31696438789367676,
      "learning_rate": 2.049247840382702e-07,
      "loss": 0.0092,
      "step": 2993060
    },
    {
      "epoch": 4.8982410662267695,
      "grad_norm": 0.07359038293361664,
      "learning_rate": 2.0485889182475307e-07,
      "loss": 0.0109,
      "step": 2993080
    },
    {
      "epoch": 4.898273796665423,
      "grad_norm": 0.08954030275344849,
      "learning_rate": 2.0479299961123594e-07,
      "loss": 0.0082,
      "step": 2993100
    },
    {
      "epoch": 4.898306527104076,
      "grad_norm": 0.5298746824264526,
      "learning_rate": 2.0472710739771884e-07,
      "loss": 0.0064,
      "step": 2993120
    },
    {
      "epoch": 4.898339257542729,
      "grad_norm": 0.0284865889698267,
      "learning_rate": 2.046612151842017e-07,
      "loss": 0.0094,
      "step": 2993140
    },
    {
      "epoch": 4.898371987981383,
      "grad_norm": 0.20110087096691132,
      "learning_rate": 2.0459532297068458e-07,
      "loss": 0.0087,
      "step": 2993160
    },
    {
      "epoch": 4.8984047184200366,
      "grad_norm": 0.18413637578487396,
      "learning_rate": 2.0452943075716745e-07,
      "loss": 0.0094,
      "step": 2993180
    },
    {
      "epoch": 4.898437448858689,
      "grad_norm": 0.15598556399345398,
      "learning_rate": 2.0446353854365032e-07,
      "loss": 0.0091,
      "step": 2993200
    },
    {
      "epoch": 4.898470179297343,
      "grad_norm": 0.3353777527809143,
      "learning_rate": 2.043976463301332e-07,
      "loss": 0.0115,
      "step": 2993220
    },
    {
      "epoch": 4.8985029097359964,
      "grad_norm": 0.2176186591386795,
      "learning_rate": 2.0433175411661607e-07,
      "loss": 0.0171,
      "step": 2993240
    },
    {
      "epoch": 4.89853564017465,
      "grad_norm": 0.19885623455047607,
      "learning_rate": 2.0426586190309891e-07,
      "loss": 0.0056,
      "step": 2993260
    },
    {
      "epoch": 4.898568370613303,
      "grad_norm": 0.11650308966636658,
      "learning_rate": 2.0419996968958179e-07,
      "loss": 0.0082,
      "step": 2993280
    },
    {
      "epoch": 4.898601101051956,
      "grad_norm": 0.7260613441467285,
      "learning_rate": 2.0413407747606466e-07,
      "loss": 0.0124,
      "step": 2993300
    },
    {
      "epoch": 4.89863383149061,
      "grad_norm": 0.12623663246631622,
      "learning_rate": 2.0406818526254753e-07,
      "loss": 0.0063,
      "step": 2993320
    },
    {
      "epoch": 4.898666561929263,
      "grad_norm": 0.40458163619041443,
      "learning_rate": 2.040022930490304e-07,
      "loss": 0.0121,
      "step": 2993340
    },
    {
      "epoch": 4.898699292367916,
      "grad_norm": 0.24027447402477264,
      "learning_rate": 2.0393640083551327e-07,
      "loss": 0.0058,
      "step": 2993360
    },
    {
      "epoch": 4.89873202280657,
      "grad_norm": 0.1566172093153,
      "learning_rate": 2.0387050862199615e-07,
      "loss": 0.0068,
      "step": 2993380
    },
    {
      "epoch": 4.8987647532452225,
      "grad_norm": 0.18995283544063568,
      "learning_rate": 2.0380461640847902e-07,
      "loss": 0.0113,
      "step": 2993400
    },
    {
      "epoch": 4.898797483683876,
      "grad_norm": 0.06925056129693985,
      "learning_rate": 2.0373872419496192e-07,
      "loss": 0.0072,
      "step": 2993420
    },
    {
      "epoch": 4.89883021412253,
      "grad_norm": 0.3083687722682953,
      "learning_rate": 2.036728319814448e-07,
      "loss": 0.0061,
      "step": 2993440
    },
    {
      "epoch": 4.898862944561183,
      "grad_norm": 0.16065119206905365,
      "learning_rate": 2.0360693976792766e-07,
      "loss": 0.0119,
      "step": 2993460
    },
    {
      "epoch": 4.898895674999836,
      "grad_norm": 0.12076446413993835,
      "learning_rate": 2.035410475544105e-07,
      "loss": 0.0108,
      "step": 2993480
    },
    {
      "epoch": 4.89892840543849,
      "grad_norm": 0.6572320461273193,
      "learning_rate": 2.0347515534089338e-07,
      "loss": 0.0137,
      "step": 2993500
    },
    {
      "epoch": 4.898961135877143,
      "grad_norm": 0.3654496669769287,
      "learning_rate": 2.0340926312737625e-07,
      "loss": 0.0097,
      "step": 2993520
    },
    {
      "epoch": 4.898993866315797,
      "grad_norm": 0.11777473986148834,
      "learning_rate": 2.0334337091385912e-07,
      "loss": 0.0087,
      "step": 2993540
    },
    {
      "epoch": 4.8990265967544495,
      "grad_norm": 0.259549081325531,
      "learning_rate": 2.03277478700342e-07,
      "loss": 0.0148,
      "step": 2993560
    },
    {
      "epoch": 4.899059327193103,
      "grad_norm": 0.4909631013870239,
      "learning_rate": 2.0321158648682487e-07,
      "loss": 0.0086,
      "step": 2993580
    },
    {
      "epoch": 4.899092057631757,
      "grad_norm": 0.48022645711898804,
      "learning_rate": 2.0314569427330774e-07,
      "loss": 0.0082,
      "step": 2993600
    },
    {
      "epoch": 4.899124788070409,
      "grad_norm": 0.3413563370704651,
      "learning_rate": 2.030798020597906e-07,
      "loss": 0.0083,
      "step": 2993620
    },
    {
      "epoch": 4.899157518509063,
      "grad_norm": 0.8152590990066528,
      "learning_rate": 2.0301390984627348e-07,
      "loss": 0.0093,
      "step": 2993640
    },
    {
      "epoch": 4.899190248947717,
      "grad_norm": 0.29105284810066223,
      "learning_rate": 2.0294801763275636e-07,
      "loss": 0.0103,
      "step": 2993660
    },
    {
      "epoch": 4.899222979386369,
      "grad_norm": 0.291150838136673,
      "learning_rate": 2.0288212541923923e-07,
      "loss": 0.0111,
      "step": 2993680
    },
    {
      "epoch": 4.899255709825023,
      "grad_norm": 0.14654657244682312,
      "learning_rate": 2.0281623320572207e-07,
      "loss": 0.0088,
      "step": 2993700
    },
    {
      "epoch": 4.8992884402636765,
      "grad_norm": 0.08396071940660477,
      "learning_rate": 2.0275034099220495e-07,
      "loss": 0.0097,
      "step": 2993720
    },
    {
      "epoch": 4.89932117070233,
      "grad_norm": 0.2985244393348694,
      "learning_rate": 2.0268444877868782e-07,
      "loss": 0.0068,
      "step": 2993740
    },
    {
      "epoch": 4.899353901140983,
      "grad_norm": 0.10789044201374054,
      "learning_rate": 2.026185565651707e-07,
      "loss": 0.0117,
      "step": 2993760
    },
    {
      "epoch": 4.899386631579636,
      "grad_norm": 0.070771723985672,
      "learning_rate": 2.025526643516536e-07,
      "loss": 0.0096,
      "step": 2993780
    },
    {
      "epoch": 4.89941936201829,
      "grad_norm": 0.4770706593990326,
      "learning_rate": 2.0248677213813646e-07,
      "loss": 0.0097,
      "step": 2993800
    },
    {
      "epoch": 4.899452092456944,
      "grad_norm": 0.7833620309829712,
      "learning_rate": 2.0242087992461933e-07,
      "loss": 0.0084,
      "step": 2993820
    },
    {
      "epoch": 4.899484822895596,
      "grad_norm": 0.5417248010635376,
      "learning_rate": 2.023549877111022e-07,
      "loss": 0.0095,
      "step": 2993840
    },
    {
      "epoch": 4.89951755333425,
      "grad_norm": 0.3513528108596802,
      "learning_rate": 2.0228909549758508e-07,
      "loss": 0.0076,
      "step": 2993860
    },
    {
      "epoch": 4.8995502837729035,
      "grad_norm": 0.22781309485435486,
      "learning_rate": 2.0222320328406795e-07,
      "loss": 0.0089,
      "step": 2993880
    },
    {
      "epoch": 4.899583014211556,
      "grad_norm": 0.2607986629009247,
      "learning_rate": 2.0215731107055082e-07,
      "loss": 0.0084,
      "step": 2993900
    },
    {
      "epoch": 4.89961574465021,
      "grad_norm": 0.11778971552848816,
      "learning_rate": 2.0209141885703367e-07,
      "loss": 0.006,
      "step": 2993920
    },
    {
      "epoch": 4.899648475088863,
      "grad_norm": 0.17631365358829498,
      "learning_rate": 2.0202552664351654e-07,
      "loss": 0.0065,
      "step": 2993940
    },
    {
      "epoch": 4.899681205527516,
      "grad_norm": 0.08620088547468185,
      "learning_rate": 2.019596344299994e-07,
      "loss": 0.0091,
      "step": 2993960
    },
    {
      "epoch": 4.89971393596617,
      "grad_norm": 0.36976712942123413,
      "learning_rate": 2.0189374221648228e-07,
      "loss": 0.0103,
      "step": 2993980
    },
    {
      "epoch": 4.899746666404823,
      "grad_norm": 0.0593169666826725,
      "learning_rate": 2.0182785000296516e-07,
      "loss": 0.0091,
      "step": 2994000
    },
    {
      "epoch": 4.899779396843477,
      "grad_norm": 0.1380254626274109,
      "learning_rate": 2.0176195778944803e-07,
      "loss": 0.0071,
      "step": 2994020
    },
    {
      "epoch": 4.89981212728213,
      "grad_norm": 0.4341060519218445,
      "learning_rate": 2.016960655759309e-07,
      "loss": 0.0101,
      "step": 2994040
    },
    {
      "epoch": 4.899844857720783,
      "grad_norm": 0.22610323131084442,
      "learning_rate": 2.0163017336241377e-07,
      "loss": 0.0074,
      "step": 2994060
    },
    {
      "epoch": 4.899877588159437,
      "grad_norm": 0.11397767812013626,
      "learning_rate": 2.0156428114889667e-07,
      "loss": 0.0097,
      "step": 2994080
    },
    {
      "epoch": 4.89991031859809,
      "grad_norm": 0.14258234202861786,
      "learning_rate": 2.0149838893537954e-07,
      "loss": 0.0067,
      "step": 2994100
    },
    {
      "epoch": 4.899943049036743,
      "grad_norm": 0.7435815930366516,
      "learning_rate": 2.0143249672186241e-07,
      "loss": 0.0108,
      "step": 2994120
    },
    {
      "epoch": 4.899975779475397,
      "grad_norm": 0.2543966770172119,
      "learning_rate": 2.0136660450834526e-07,
      "loss": 0.01,
      "step": 2994140
    },
    {
      "epoch": 4.90000850991405,
      "grad_norm": 0.6704211831092834,
      "learning_rate": 2.0130071229482813e-07,
      "loss": 0.0168,
      "step": 2994160
    },
    {
      "epoch": 4.900041240352703,
      "grad_norm": 0.0591023713350296,
      "learning_rate": 2.01234820081311e-07,
      "loss": 0.0109,
      "step": 2994180
    },
    {
      "epoch": 4.9000739707913565,
      "grad_norm": 0.1264571100473404,
      "learning_rate": 2.0116892786779388e-07,
      "loss": 0.0077,
      "step": 2994200
    },
    {
      "epoch": 4.90010670123001,
      "grad_norm": 0.2914210557937622,
      "learning_rate": 2.0110303565427675e-07,
      "loss": 0.0101,
      "step": 2994220
    },
    {
      "epoch": 4.900139431668663,
      "grad_norm": 0.2300473004579544,
      "learning_rate": 2.0103714344075962e-07,
      "loss": 0.0108,
      "step": 2994240
    },
    {
      "epoch": 4.900172162107316,
      "grad_norm": 0.2934533357620239,
      "learning_rate": 2.009712512272425e-07,
      "loss": 0.01,
      "step": 2994260
    },
    {
      "epoch": 4.90020489254597,
      "grad_norm": 0.4044573903083801,
      "learning_rate": 2.0090535901372536e-07,
      "loss": 0.0121,
      "step": 2994280
    },
    {
      "epoch": 4.900237622984624,
      "grad_norm": 0.22253696620464325,
      "learning_rate": 2.0083946680020824e-07,
      "loss": 0.008,
      "step": 2994300
    },
    {
      "epoch": 4.900270353423276,
      "grad_norm": 0.12714944779872894,
      "learning_rate": 2.007735745866911e-07,
      "loss": 0.0099,
      "step": 2994320
    },
    {
      "epoch": 4.90030308386193,
      "grad_norm": 0.2676784098148346,
      "learning_rate": 2.0070768237317398e-07,
      "loss": 0.008,
      "step": 2994340
    },
    {
      "epoch": 4.9003358143005835,
      "grad_norm": 0.4802510440349579,
      "learning_rate": 2.0064179015965688e-07,
      "loss": 0.0102,
      "step": 2994360
    },
    {
      "epoch": 4.900368544739236,
      "grad_norm": 0.09423253685235977,
      "learning_rate": 2.005758979461397e-07,
      "loss": 0.0111,
      "step": 2994380
    },
    {
      "epoch": 4.90040127517789,
      "grad_norm": 0.21987555921077728,
      "learning_rate": 2.0051000573262257e-07,
      "loss": 0.0092,
      "step": 2994400
    },
    {
      "epoch": 4.900434005616543,
      "grad_norm": 0.23608048260211945,
      "learning_rate": 2.0044411351910544e-07,
      "loss": 0.0095,
      "step": 2994420
    },
    {
      "epoch": 4.900466736055197,
      "grad_norm": 0.16142059862613678,
      "learning_rate": 2.0037822130558834e-07,
      "loss": 0.0109,
      "step": 2994440
    },
    {
      "epoch": 4.90049946649385,
      "grad_norm": 0.1782018095254898,
      "learning_rate": 2.0031232909207121e-07,
      "loss": 0.0074,
      "step": 2994460
    },
    {
      "epoch": 4.900532196932503,
      "grad_norm": 0.24025797843933105,
      "learning_rate": 2.0024643687855409e-07,
      "loss": 0.0089,
      "step": 2994480
    },
    {
      "epoch": 4.900564927371157,
      "grad_norm": 0.10177136212587357,
      "learning_rate": 2.0018054466503696e-07,
      "loss": 0.0148,
      "step": 2994500
    },
    {
      "epoch": 4.90059765780981,
      "grad_norm": 0.25451821088790894,
      "learning_rate": 2.0011465245151983e-07,
      "loss": 0.0065,
      "step": 2994520
    },
    {
      "epoch": 4.900630388248463,
      "grad_norm": 0.11227624863386154,
      "learning_rate": 2.000487602380027e-07,
      "loss": 0.0066,
      "step": 2994540
    },
    {
      "epoch": 4.900663118687117,
      "grad_norm": 0.14157673716545105,
      "learning_rate": 1.9998286802448557e-07,
      "loss": 0.0064,
      "step": 2994560
    },
    {
      "epoch": 4.9006958491257695,
      "grad_norm": 0.2422824651002884,
      "learning_rate": 1.9991697581096845e-07,
      "loss": 0.0132,
      "step": 2994580
    },
    {
      "epoch": 4.900728579564423,
      "grad_norm": 0.2597411870956421,
      "learning_rate": 1.998510835974513e-07,
      "loss": 0.0093,
      "step": 2994600
    },
    {
      "epoch": 4.900761310003077,
      "grad_norm": 0.11805783957242966,
      "learning_rate": 1.9978519138393416e-07,
      "loss": 0.0073,
      "step": 2994620
    },
    {
      "epoch": 4.90079404044173,
      "grad_norm": 0.21097759902477264,
      "learning_rate": 1.9971929917041704e-07,
      "loss": 0.0094,
      "step": 2994640
    },
    {
      "epoch": 4.900826770880383,
      "grad_norm": 0.3302353024482727,
      "learning_rate": 1.996534069568999e-07,
      "loss": 0.0083,
      "step": 2994660
    },
    {
      "epoch": 4.900859501319037,
      "grad_norm": 0.4212840497493744,
      "learning_rate": 1.9958751474338278e-07,
      "loss": 0.0086,
      "step": 2994680
    },
    {
      "epoch": 4.90089223175769,
      "grad_norm": 0.39387205243110657,
      "learning_rate": 1.9952162252986565e-07,
      "loss": 0.0079,
      "step": 2994700
    },
    {
      "epoch": 4.900924962196344,
      "grad_norm": 0.37727952003479004,
      "learning_rate": 1.9945573031634855e-07,
      "loss": 0.0078,
      "step": 2994720
    },
    {
      "epoch": 4.9009576926349965,
      "grad_norm": 0.9221972823143005,
      "learning_rate": 1.9938983810283142e-07,
      "loss": 0.01,
      "step": 2994740
    },
    {
      "epoch": 4.90099042307365,
      "grad_norm": 0.26877865195274353,
      "learning_rate": 1.993239458893143e-07,
      "loss": 0.012,
      "step": 2994760
    },
    {
      "epoch": 4.901023153512304,
      "grad_norm": 0.1398041695356369,
      "learning_rate": 1.9925805367579717e-07,
      "loss": 0.0071,
      "step": 2994780
    },
    {
      "epoch": 4.901055883950956,
      "grad_norm": 0.4671473801136017,
      "learning_rate": 1.9919216146228004e-07,
      "loss": 0.0091,
      "step": 2994800
    },
    {
      "epoch": 4.90108861438961,
      "grad_norm": 0.21014182269573212,
      "learning_rate": 1.9912626924876288e-07,
      "loss": 0.005,
      "step": 2994820
    },
    {
      "epoch": 4.901121344828264,
      "grad_norm": 0.14398759603500366,
      "learning_rate": 1.9906037703524576e-07,
      "loss": 0.0065,
      "step": 2994840
    },
    {
      "epoch": 4.901154075266916,
      "grad_norm": 0.3697816729545593,
      "learning_rate": 1.9899448482172863e-07,
      "loss": 0.0125,
      "step": 2994860
    },
    {
      "epoch": 4.90118680570557,
      "grad_norm": 0.18074731528759003,
      "learning_rate": 1.989285926082115e-07,
      "loss": 0.007,
      "step": 2994880
    },
    {
      "epoch": 4.9012195361442235,
      "grad_norm": 0.13707032799720764,
      "learning_rate": 1.9886270039469437e-07,
      "loss": 0.0083,
      "step": 2994900
    },
    {
      "epoch": 4.901252266582877,
      "grad_norm": 0.17214041948318481,
      "learning_rate": 1.9879680818117725e-07,
      "loss": 0.0071,
      "step": 2994920
    },
    {
      "epoch": 4.90128499702153,
      "grad_norm": 0.28121423721313477,
      "learning_rate": 1.9873091596766012e-07,
      "loss": 0.0081,
      "step": 2994940
    },
    {
      "epoch": 4.901317727460183,
      "grad_norm": 0.08937733620405197,
      "learning_rate": 1.98665023754143e-07,
      "loss": 0.0116,
      "step": 2994960
    },
    {
      "epoch": 4.901350457898837,
      "grad_norm": 0.06090295687317848,
      "learning_rate": 1.9859913154062586e-07,
      "loss": 0.0084,
      "step": 2994980
    },
    {
      "epoch": 4.9013831883374905,
      "grad_norm": 0.46798792481422424,
      "learning_rate": 1.9853323932710873e-07,
      "loss": 0.0101,
      "step": 2995000
    },
    {
      "epoch": 4.901415918776143,
      "grad_norm": 0.35898441076278687,
      "learning_rate": 1.9846734711359163e-07,
      "loss": 0.0131,
      "step": 2995020
    },
    {
      "epoch": 4.901448649214797,
      "grad_norm": 0.455949068069458,
      "learning_rate": 1.9840145490007445e-07,
      "loss": 0.0122,
      "step": 2995040
    },
    {
      "epoch": 4.90148137965345,
      "grad_norm": 0.257089763879776,
      "learning_rate": 1.9833556268655732e-07,
      "loss": 0.0108,
      "step": 2995060
    },
    {
      "epoch": 4.901514110092103,
      "grad_norm": 0.3859023153781891,
      "learning_rate": 1.982696704730402e-07,
      "loss": 0.0111,
      "step": 2995080
    },
    {
      "epoch": 4.901546840530757,
      "grad_norm": 0.03751028701663017,
      "learning_rate": 1.982037782595231e-07,
      "loss": 0.0087,
      "step": 2995100
    },
    {
      "epoch": 4.90157957096941,
      "grad_norm": 0.33676663041114807,
      "learning_rate": 1.9813788604600597e-07,
      "loss": 0.018,
      "step": 2995120
    },
    {
      "epoch": 4.901612301408063,
      "grad_norm": 0.22795997560024261,
      "learning_rate": 1.9807199383248884e-07,
      "loss": 0.009,
      "step": 2995140
    },
    {
      "epoch": 4.901645031846717,
      "grad_norm": 0.16503581404685974,
      "learning_rate": 1.980061016189717e-07,
      "loss": 0.0108,
      "step": 2995160
    },
    {
      "epoch": 4.90167776228537,
      "grad_norm": 0.2281339168548584,
      "learning_rate": 1.9794020940545458e-07,
      "loss": 0.0077,
      "step": 2995180
    },
    {
      "epoch": 4.901710492724024,
      "grad_norm": 0.5329616665840149,
      "learning_rate": 1.9787431719193745e-07,
      "loss": 0.0071,
      "step": 2995200
    },
    {
      "epoch": 4.9017432231626765,
      "grad_norm": 0.17180122435092926,
      "learning_rate": 1.9780842497842033e-07,
      "loss": 0.0129,
      "step": 2995220
    },
    {
      "epoch": 4.90177595360133,
      "grad_norm": 0.10629046708345413,
      "learning_rate": 1.977425327649032e-07,
      "loss": 0.0069,
      "step": 2995240
    },
    {
      "epoch": 4.901808684039984,
      "grad_norm": 0.14556171000003815,
      "learning_rate": 1.9767664055138604e-07,
      "loss": 0.0114,
      "step": 2995260
    },
    {
      "epoch": 4.901841414478637,
      "grad_norm": 0.3547905683517456,
      "learning_rate": 1.9761074833786892e-07,
      "loss": 0.011,
      "step": 2995280
    },
    {
      "epoch": 4.90187414491729,
      "grad_norm": 0.43807581067085266,
      "learning_rate": 1.975448561243518e-07,
      "loss": 0.0086,
      "step": 2995300
    },
    {
      "epoch": 4.901906875355944,
      "grad_norm": 0.20355141162872314,
      "learning_rate": 1.9747896391083466e-07,
      "loss": 0.0099,
      "step": 2995320
    },
    {
      "epoch": 4.901939605794597,
      "grad_norm": 0.3010587990283966,
      "learning_rate": 1.9741307169731753e-07,
      "loss": 0.0054,
      "step": 2995340
    },
    {
      "epoch": 4.90197233623325,
      "grad_norm": 0.6854092478752136,
      "learning_rate": 1.973471794838004e-07,
      "loss": 0.0082,
      "step": 2995360
    },
    {
      "epoch": 4.9020050666719035,
      "grad_norm": 0.05885498598217964,
      "learning_rate": 1.972812872702833e-07,
      "loss": 0.0086,
      "step": 2995380
    },
    {
      "epoch": 4.902037797110557,
      "grad_norm": 0.16089363396167755,
      "learning_rate": 1.9721539505676618e-07,
      "loss": 0.0118,
      "step": 2995400
    },
    {
      "epoch": 4.90207052754921,
      "grad_norm": 0.39426636695861816,
      "learning_rate": 1.9714950284324905e-07,
      "loss": 0.0115,
      "step": 2995420
    },
    {
      "epoch": 4.902103257987863,
      "grad_norm": 0.17840108275413513,
      "learning_rate": 1.9708361062973192e-07,
      "loss": 0.0068,
      "step": 2995440
    },
    {
      "epoch": 4.902135988426517,
      "grad_norm": 0.19808414578437805,
      "learning_rate": 1.970177184162148e-07,
      "loss": 0.0077,
      "step": 2995460
    },
    {
      "epoch": 4.902168718865171,
      "grad_norm": 0.2510191798210144,
      "learning_rate": 1.9695182620269764e-07,
      "loss": 0.0149,
      "step": 2995480
    },
    {
      "epoch": 4.902201449303823,
      "grad_norm": 0.33209165930747986,
      "learning_rate": 1.968859339891805e-07,
      "loss": 0.0111,
      "step": 2995500
    },
    {
      "epoch": 4.902234179742477,
      "grad_norm": 0.4561310410499573,
      "learning_rate": 1.9682004177566338e-07,
      "loss": 0.0088,
      "step": 2995520
    },
    {
      "epoch": 4.9022669101811305,
      "grad_norm": 0.40314435958862305,
      "learning_rate": 1.9675414956214625e-07,
      "loss": 0.0077,
      "step": 2995540
    },
    {
      "epoch": 4.902299640619784,
      "grad_norm": 0.22892388701438904,
      "learning_rate": 1.9668825734862913e-07,
      "loss": 0.0092,
      "step": 2995560
    },
    {
      "epoch": 4.902332371058437,
      "grad_norm": 0.08687985688447952,
      "learning_rate": 1.96622365135112e-07,
      "loss": 0.0092,
      "step": 2995580
    },
    {
      "epoch": 4.90236510149709,
      "grad_norm": 0.3263748288154602,
      "learning_rate": 1.9655647292159487e-07,
      "loss": 0.0108,
      "step": 2995600
    },
    {
      "epoch": 4.902397831935744,
      "grad_norm": 0.3221384584903717,
      "learning_rate": 1.9649058070807774e-07,
      "loss": 0.0087,
      "step": 2995620
    },
    {
      "epoch": 4.902430562374397,
      "grad_norm": 0.13197031617164612,
      "learning_rate": 1.9642468849456061e-07,
      "loss": 0.0095,
      "step": 2995640
    },
    {
      "epoch": 4.90246329281305,
      "grad_norm": 0.3079846203327179,
      "learning_rate": 1.9635879628104349e-07,
      "loss": 0.0111,
      "step": 2995660
    },
    {
      "epoch": 4.902496023251704,
      "grad_norm": 0.17014974355697632,
      "learning_rate": 1.9629290406752639e-07,
      "loss": 0.0064,
      "step": 2995680
    },
    {
      "epoch": 4.902528753690357,
      "grad_norm": 0.09975293278694153,
      "learning_rate": 1.962270118540092e-07,
      "loss": 0.0069,
      "step": 2995700
    },
    {
      "epoch": 4.90256148412901,
      "grad_norm": 0.3977971076965332,
      "learning_rate": 1.9616111964049208e-07,
      "loss": 0.0079,
      "step": 2995720
    },
    {
      "epoch": 4.902594214567664,
      "grad_norm": 0.42186471819877625,
      "learning_rate": 1.9609522742697495e-07,
      "loss": 0.009,
      "step": 2995740
    },
    {
      "epoch": 4.902626945006317,
      "grad_norm": 0.31408095359802246,
      "learning_rate": 1.9602933521345785e-07,
      "loss": 0.0098,
      "step": 2995760
    },
    {
      "epoch": 4.90265967544497,
      "grad_norm": 0.10439396649599075,
      "learning_rate": 1.9596344299994072e-07,
      "loss": 0.0069,
      "step": 2995780
    },
    {
      "epoch": 4.902692405883624,
      "grad_norm": 0.3260338008403778,
      "learning_rate": 1.958975507864236e-07,
      "loss": 0.007,
      "step": 2995800
    },
    {
      "epoch": 4.902725136322277,
      "grad_norm": 0.20977887511253357,
      "learning_rate": 1.9583165857290646e-07,
      "loss": 0.0094,
      "step": 2995820
    },
    {
      "epoch": 4.90275786676093,
      "grad_norm": 0.16930361092090607,
      "learning_rate": 1.9576576635938934e-07,
      "loss": 0.0103,
      "step": 2995840
    },
    {
      "epoch": 4.9027905971995835,
      "grad_norm": 0.24005460739135742,
      "learning_rate": 1.956998741458722e-07,
      "loss": 0.0118,
      "step": 2995860
    },
    {
      "epoch": 4.902823327638237,
      "grad_norm": 0.6610881090164185,
      "learning_rate": 1.9563398193235508e-07,
      "loss": 0.0175,
      "step": 2995880
    },
    {
      "epoch": 4.902856058076891,
      "grad_norm": 0.0872587040066719,
      "learning_rate": 1.9556808971883795e-07,
      "loss": 0.0066,
      "step": 2995900
    },
    {
      "epoch": 4.902888788515543,
      "grad_norm": 0.4652341604232788,
      "learning_rate": 1.955021975053208e-07,
      "loss": 0.0065,
      "step": 2995920
    },
    {
      "epoch": 4.902921518954197,
      "grad_norm": 0.05440724641084671,
      "learning_rate": 1.9543630529180367e-07,
      "loss": 0.0078,
      "step": 2995940
    },
    {
      "epoch": 4.902954249392851,
      "grad_norm": 0.22477565705776215,
      "learning_rate": 1.9537041307828654e-07,
      "loss": 0.0154,
      "step": 2995960
    },
    {
      "epoch": 4.902986979831503,
      "grad_norm": 0.07497189939022064,
      "learning_rate": 1.9530452086476941e-07,
      "loss": 0.0064,
      "step": 2995980
    },
    {
      "epoch": 4.903019710270157,
      "grad_norm": 0.2716159224510193,
      "learning_rate": 1.9523862865125229e-07,
      "loss": 0.0082,
      "step": 2996000
    },
    {
      "epoch": 4.9030524407088105,
      "grad_norm": 0.16523826122283936,
      "learning_rate": 1.9517273643773516e-07,
      "loss": 0.0098,
      "step": 2996020
    },
    {
      "epoch": 4.903085171147463,
      "grad_norm": 0.32847779989242554,
      "learning_rate": 1.9510684422421806e-07,
      "loss": 0.0093,
      "step": 2996040
    },
    {
      "epoch": 4.903117901586117,
      "grad_norm": 0.7273933291435242,
      "learning_rate": 1.9504095201070093e-07,
      "loss": 0.0109,
      "step": 2996060
    },
    {
      "epoch": 4.90315063202477,
      "grad_norm": 0.12707319855690002,
      "learning_rate": 1.949750597971838e-07,
      "loss": 0.0064,
      "step": 2996080
    },
    {
      "epoch": 4.903183362463424,
      "grad_norm": 0.09561090171337128,
      "learning_rate": 1.9490916758366667e-07,
      "loss": 0.0131,
      "step": 2996100
    },
    {
      "epoch": 4.903216092902077,
      "grad_norm": 0.2553345859050751,
      "learning_rate": 1.9484327537014954e-07,
      "loss": 0.0139,
      "step": 2996120
    },
    {
      "epoch": 4.90324882334073,
      "grad_norm": 0.04691917449235916,
      "learning_rate": 1.947773831566324e-07,
      "loss": 0.0083,
      "step": 2996140
    },
    {
      "epoch": 4.903281553779384,
      "grad_norm": 0.41208603978157043,
      "learning_rate": 1.9471149094311526e-07,
      "loss": 0.0059,
      "step": 2996160
    },
    {
      "epoch": 4.9033142842180375,
      "grad_norm": 0.30933240056037903,
      "learning_rate": 1.9464559872959813e-07,
      "loss": 0.0075,
      "step": 2996180
    },
    {
      "epoch": 4.90334701465669,
      "grad_norm": 0.4959988594055176,
      "learning_rate": 1.94579706516081e-07,
      "loss": 0.0097,
      "step": 2996200
    },
    {
      "epoch": 4.903379745095344,
      "grad_norm": 0.13754546642303467,
      "learning_rate": 1.9451381430256388e-07,
      "loss": 0.0168,
      "step": 2996220
    },
    {
      "epoch": 4.903412475533997,
      "grad_norm": 0.11360816657543182,
      "learning_rate": 1.9444792208904675e-07,
      "loss": 0.0092,
      "step": 2996240
    },
    {
      "epoch": 4.90344520597265,
      "grad_norm": 0.5981605052947998,
      "learning_rate": 1.9438202987552962e-07,
      "loss": 0.0134,
      "step": 2996260
    },
    {
      "epoch": 4.903477936411304,
      "grad_norm": 0.46628597378730774,
      "learning_rate": 1.943161376620125e-07,
      "loss": 0.006,
      "step": 2996280
    },
    {
      "epoch": 4.903510666849957,
      "grad_norm": 0.7903955578804016,
      "learning_rate": 1.9425024544849537e-07,
      "loss": 0.0116,
      "step": 2996300
    },
    {
      "epoch": 4.90354339728861,
      "grad_norm": 0.2740292251110077,
      "learning_rate": 1.9418435323497824e-07,
      "loss": 0.0102,
      "step": 2996320
    },
    {
      "epoch": 4.903576127727264,
      "grad_norm": 0.30621665716171265,
      "learning_rate": 1.9411846102146114e-07,
      "loss": 0.0064,
      "step": 2996340
    },
    {
      "epoch": 4.903608858165917,
      "grad_norm": 0.194960355758667,
      "learning_rate": 1.9405256880794396e-07,
      "loss": 0.0072,
      "step": 2996360
    },
    {
      "epoch": 4.903641588604571,
      "grad_norm": 0.40627339482307434,
      "learning_rate": 1.9398667659442683e-07,
      "loss": 0.0118,
      "step": 2996380
    },
    {
      "epoch": 4.9036743190432235,
      "grad_norm": 0.10457399487495422,
      "learning_rate": 1.939207843809097e-07,
      "loss": 0.0065,
      "step": 2996400
    },
    {
      "epoch": 4.903707049481877,
      "grad_norm": 0.24462586641311646,
      "learning_rate": 1.938548921673926e-07,
      "loss": 0.0079,
      "step": 2996420
    },
    {
      "epoch": 4.903739779920531,
      "grad_norm": 0.034463636577129364,
      "learning_rate": 1.9378899995387547e-07,
      "loss": 0.0076,
      "step": 2996440
    },
    {
      "epoch": 4.903772510359184,
      "grad_norm": 0.2816936671733856,
      "learning_rate": 1.9372310774035834e-07,
      "loss": 0.0098,
      "step": 2996460
    },
    {
      "epoch": 4.903805240797837,
      "grad_norm": 0.3390730023384094,
      "learning_rate": 1.9365721552684122e-07,
      "loss": 0.0111,
      "step": 2996480
    },
    {
      "epoch": 4.903837971236491,
      "grad_norm": 0.09210067242383957,
      "learning_rate": 1.935913233133241e-07,
      "loss": 0.0072,
      "step": 2996500
    },
    {
      "epoch": 4.903870701675144,
      "grad_norm": 0.08745211362838745,
      "learning_rate": 1.9352543109980696e-07,
      "loss": 0.0089,
      "step": 2996520
    },
    {
      "epoch": 4.903903432113797,
      "grad_norm": 0.2575069069862366,
      "learning_rate": 1.9345953888628983e-07,
      "loss": 0.0093,
      "step": 2996540
    },
    {
      "epoch": 4.9039361625524505,
      "grad_norm": 0.17293763160705566,
      "learning_rate": 1.933936466727727e-07,
      "loss": 0.0063,
      "step": 2996560
    },
    {
      "epoch": 4.903968892991104,
      "grad_norm": 0.5082973837852478,
      "learning_rate": 1.9332775445925555e-07,
      "loss": 0.0114,
      "step": 2996580
    },
    {
      "epoch": 4.904001623429757,
      "grad_norm": 0.17849692702293396,
      "learning_rate": 1.9326186224573842e-07,
      "loss": 0.007,
      "step": 2996600
    },
    {
      "epoch": 4.90403435386841,
      "grad_norm": 0.0411096028983593,
      "learning_rate": 1.931959700322213e-07,
      "loss": 0.007,
      "step": 2996620
    },
    {
      "epoch": 4.904067084307064,
      "grad_norm": 0.41693785786628723,
      "learning_rate": 1.9313007781870417e-07,
      "loss": 0.0096,
      "step": 2996640
    },
    {
      "epoch": 4.9040998147457175,
      "grad_norm": 0.4375293552875519,
      "learning_rate": 1.9306418560518704e-07,
      "loss": 0.0107,
      "step": 2996660
    },
    {
      "epoch": 4.90413254518437,
      "grad_norm": 0.3173350393772125,
      "learning_rate": 1.929982933916699e-07,
      "loss": 0.0104,
      "step": 2996680
    },
    {
      "epoch": 4.904165275623024,
      "grad_norm": 0.1497977077960968,
      "learning_rate": 1.929324011781528e-07,
      "loss": 0.0059,
      "step": 2996700
    },
    {
      "epoch": 4.904198006061677,
      "grad_norm": 0.3559274673461914,
      "learning_rate": 1.9286650896463568e-07,
      "loss": 0.0079,
      "step": 2996720
    },
    {
      "epoch": 4.904230736500331,
      "grad_norm": 0.20565569400787354,
      "learning_rate": 1.9280061675111855e-07,
      "loss": 0.0065,
      "step": 2996740
    },
    {
      "epoch": 4.904263466938984,
      "grad_norm": 0.2224562019109726,
      "learning_rate": 1.9273472453760143e-07,
      "loss": 0.0126,
      "step": 2996760
    },
    {
      "epoch": 4.904296197377637,
      "grad_norm": 0.14201074838638306,
      "learning_rate": 1.926688323240843e-07,
      "loss": 0.0068,
      "step": 2996780
    },
    {
      "epoch": 4.904328927816291,
      "grad_norm": 0.2925357222557068,
      "learning_rate": 1.9260294011056714e-07,
      "loss": 0.0086,
      "step": 2996800
    },
    {
      "epoch": 4.904361658254944,
      "grad_norm": 0.32107821106910706,
      "learning_rate": 1.9253704789705002e-07,
      "loss": 0.0075,
      "step": 2996820
    },
    {
      "epoch": 4.904394388693597,
      "grad_norm": 0.13405731320381165,
      "learning_rate": 1.924711556835329e-07,
      "loss": 0.0087,
      "step": 2996840
    },
    {
      "epoch": 4.904427119132251,
      "grad_norm": 1.2684264183044434,
      "learning_rate": 1.9240526347001576e-07,
      "loss": 0.0099,
      "step": 2996860
    },
    {
      "epoch": 4.9044598495709035,
      "grad_norm": 0.3200218975543976,
      "learning_rate": 1.9233937125649863e-07,
      "loss": 0.0115,
      "step": 2996880
    },
    {
      "epoch": 4.904492580009557,
      "grad_norm": 0.062205720692873,
      "learning_rate": 1.922734790429815e-07,
      "loss": 0.0098,
      "step": 2996900
    },
    {
      "epoch": 4.904525310448211,
      "grad_norm": 0.522127091884613,
      "learning_rate": 1.9220758682946438e-07,
      "loss": 0.0091,
      "step": 2996920
    },
    {
      "epoch": 4.904558040886864,
      "grad_norm": 0.22458450496196747,
      "learning_rate": 1.9214169461594725e-07,
      "loss": 0.0086,
      "step": 2996940
    },
    {
      "epoch": 4.904590771325517,
      "grad_norm": 0.19162781536579132,
      "learning_rate": 1.9207580240243012e-07,
      "loss": 0.01,
      "step": 2996960
    },
    {
      "epoch": 4.904623501764171,
      "grad_norm": 0.16298557817935944,
      "learning_rate": 1.92009910188913e-07,
      "loss": 0.0063,
      "step": 2996980
    },
    {
      "epoch": 4.904656232202824,
      "grad_norm": 0.36353689432144165,
      "learning_rate": 1.919440179753959e-07,
      "loss": 0.0101,
      "step": 2997000
    },
    {
      "epoch": 4.904688962641478,
      "grad_norm": 0.5182934403419495,
      "learning_rate": 1.918781257618787e-07,
      "loss": 0.0133,
      "step": 2997020
    },
    {
      "epoch": 4.9047216930801305,
      "grad_norm": 0.3583191931247711,
      "learning_rate": 1.9181223354836158e-07,
      "loss": 0.0105,
      "step": 2997040
    },
    {
      "epoch": 4.904754423518784,
      "grad_norm": 0.12500517070293427,
      "learning_rate": 1.9174634133484448e-07,
      "loss": 0.0065,
      "step": 2997060
    },
    {
      "epoch": 4.904787153957438,
      "grad_norm": 0.11134335398674011,
      "learning_rate": 1.9168044912132735e-07,
      "loss": 0.0136,
      "step": 2997080
    },
    {
      "epoch": 4.90481988439609,
      "grad_norm": 0.05262070894241333,
      "learning_rate": 1.9161455690781022e-07,
      "loss": 0.0053,
      "step": 2997100
    },
    {
      "epoch": 4.904852614834744,
      "grad_norm": 0.2258267104625702,
      "learning_rate": 1.915486646942931e-07,
      "loss": 0.0083,
      "step": 2997120
    },
    {
      "epoch": 4.904885345273398,
      "grad_norm": 0.1575499176979065,
      "learning_rate": 1.9148277248077597e-07,
      "loss": 0.0109,
      "step": 2997140
    },
    {
      "epoch": 4.90491807571205,
      "grad_norm": 0.23291291296482086,
      "learning_rate": 1.9141688026725884e-07,
      "loss": 0.0097,
      "step": 2997160
    },
    {
      "epoch": 4.904950806150704,
      "grad_norm": 0.09737614542245865,
      "learning_rate": 1.913509880537417e-07,
      "loss": 0.0089,
      "step": 2997180
    },
    {
      "epoch": 4.9049835365893575,
      "grad_norm": 0.21194687485694885,
      "learning_rate": 1.9128509584022458e-07,
      "loss": 0.0063,
      "step": 2997200
    },
    {
      "epoch": 4.905016267028011,
      "grad_norm": 0.259874165058136,
      "learning_rate": 1.9121920362670746e-07,
      "loss": 0.0086,
      "step": 2997220
    },
    {
      "epoch": 4.905048997466664,
      "grad_norm": 0.3157272934913635,
      "learning_rate": 1.911533114131903e-07,
      "loss": 0.0093,
      "step": 2997240
    },
    {
      "epoch": 4.905081727905317,
      "grad_norm": 0.4843786060810089,
      "learning_rate": 1.9108741919967317e-07,
      "loss": 0.0088,
      "step": 2997260
    },
    {
      "epoch": 4.905114458343971,
      "grad_norm": 0.261942595243454,
      "learning_rate": 1.9102152698615605e-07,
      "loss": 0.0091,
      "step": 2997280
    },
    {
      "epoch": 4.905147188782624,
      "grad_norm": 0.1246170774102211,
      "learning_rate": 1.9095563477263892e-07,
      "loss": 0.0066,
      "step": 2997300
    },
    {
      "epoch": 4.905179919221277,
      "grad_norm": 0.16462033987045288,
      "learning_rate": 1.908897425591218e-07,
      "loss": 0.0094,
      "step": 2997320
    },
    {
      "epoch": 4.905212649659931,
      "grad_norm": 0.25038421154022217,
      "learning_rate": 1.9082385034560466e-07,
      "loss": 0.0065,
      "step": 2997340
    },
    {
      "epoch": 4.9052453800985845,
      "grad_norm": 0.06237122043967247,
      "learning_rate": 1.9075795813208756e-07,
      "loss": 0.0119,
      "step": 2997360
    },
    {
      "epoch": 4.905278110537237,
      "grad_norm": 0.40860840678215027,
      "learning_rate": 1.9069206591857043e-07,
      "loss": 0.0052,
      "step": 2997380
    },
    {
      "epoch": 4.905310840975891,
      "grad_norm": 0.4065164029598236,
      "learning_rate": 1.906261737050533e-07,
      "loss": 0.0096,
      "step": 2997400
    },
    {
      "epoch": 4.905343571414544,
      "grad_norm": 0.23284855484962463,
      "learning_rate": 1.9056028149153618e-07,
      "loss": 0.0118,
      "step": 2997420
    },
    {
      "epoch": 4.905376301853197,
      "grad_norm": 0.5064099431037903,
      "learning_rate": 1.9049438927801905e-07,
      "loss": 0.009,
      "step": 2997440
    },
    {
      "epoch": 4.905409032291851,
      "grad_norm": 0.4812495708465576,
      "learning_rate": 1.904284970645019e-07,
      "loss": 0.011,
      "step": 2997460
    },
    {
      "epoch": 4.905441762730504,
      "grad_norm": 0.3629545271396637,
      "learning_rate": 1.9036260485098477e-07,
      "loss": 0.0048,
      "step": 2997480
    },
    {
      "epoch": 4.905474493169157,
      "grad_norm": 0.2446315884590149,
      "learning_rate": 1.9029671263746764e-07,
      "loss": 0.0102,
      "step": 2997500
    },
    {
      "epoch": 4.9055072236078106,
      "grad_norm": 0.2143768072128296,
      "learning_rate": 1.902308204239505e-07,
      "loss": 0.0083,
      "step": 2997520
    },
    {
      "epoch": 4.905539954046464,
      "grad_norm": 0.391440749168396,
      "learning_rate": 1.9016492821043338e-07,
      "loss": 0.0076,
      "step": 2997540
    },
    {
      "epoch": 4.905572684485118,
      "grad_norm": 0.19984795153141022,
      "learning_rate": 1.9009903599691626e-07,
      "loss": 0.0071,
      "step": 2997560
    },
    {
      "epoch": 4.9056054149237704,
      "grad_norm": 2.1289310455322266,
      "learning_rate": 1.9003314378339913e-07,
      "loss": 0.0099,
      "step": 2997580
    },
    {
      "epoch": 4.905638145362424,
      "grad_norm": 0.30259329080581665,
      "learning_rate": 1.89967251569882e-07,
      "loss": 0.0152,
      "step": 2997600
    },
    {
      "epoch": 4.905670875801078,
      "grad_norm": 0.139905646443367,
      "learning_rate": 1.8990135935636487e-07,
      "loss": 0.0142,
      "step": 2997620
    },
    {
      "epoch": 4.905703606239731,
      "grad_norm": 0.11671604961156845,
      "learning_rate": 1.8983546714284774e-07,
      "loss": 0.0051,
      "step": 2997640
    },
    {
      "epoch": 4.905736336678384,
      "grad_norm": 0.12008649855852127,
      "learning_rate": 1.8976957492933064e-07,
      "loss": 0.0098,
      "step": 2997660
    },
    {
      "epoch": 4.9057690671170375,
      "grad_norm": 0.3329390585422516,
      "learning_rate": 1.8970368271581346e-07,
      "loss": 0.0074,
      "step": 2997680
    },
    {
      "epoch": 4.905801797555691,
      "grad_norm": 0.17776191234588623,
      "learning_rate": 1.8963779050229633e-07,
      "loss": 0.0088,
      "step": 2997700
    },
    {
      "epoch": 4.905834527994344,
      "grad_norm": 0.06125796586275101,
      "learning_rate": 1.8957189828877923e-07,
      "loss": 0.0064,
      "step": 2997720
    },
    {
      "epoch": 4.905867258432997,
      "grad_norm": 0.2562260925769806,
      "learning_rate": 1.895060060752621e-07,
      "loss": 0.0123,
      "step": 2997740
    },
    {
      "epoch": 4.905899988871651,
      "grad_norm": 0.6803785562515259,
      "learning_rate": 1.8944011386174498e-07,
      "loss": 0.0108,
      "step": 2997760
    },
    {
      "epoch": 4.905932719310304,
      "grad_norm": 0.23126792907714844,
      "learning_rate": 1.8937422164822785e-07,
      "loss": 0.0104,
      "step": 2997780
    },
    {
      "epoch": 4.905965449748957,
      "grad_norm": 0.08752806484699249,
      "learning_rate": 1.8930832943471072e-07,
      "loss": 0.0078,
      "step": 2997800
    },
    {
      "epoch": 4.905998180187611,
      "grad_norm": 0.18540607392787933,
      "learning_rate": 1.892424372211936e-07,
      "loss": 0.0138,
      "step": 2997820
    },
    {
      "epoch": 4.9060309106262645,
      "grad_norm": 0.2897200286388397,
      "learning_rate": 1.8917654500767647e-07,
      "loss": 0.0097,
      "step": 2997840
    },
    {
      "epoch": 4.906063641064917,
      "grad_norm": 0.31222355365753174,
      "learning_rate": 1.8911065279415934e-07,
      "loss": 0.0089,
      "step": 2997860
    },
    {
      "epoch": 4.906096371503571,
      "grad_norm": 0.3353097140789032,
      "learning_rate": 1.890447605806422e-07,
      "loss": 0.0066,
      "step": 2997880
    },
    {
      "epoch": 4.906129101942224,
      "grad_norm": 0.20932318270206451,
      "learning_rate": 1.8897886836712506e-07,
      "loss": 0.0093,
      "step": 2997900
    },
    {
      "epoch": 4.906161832380878,
      "grad_norm": 0.09027759730815887,
      "learning_rate": 1.8891297615360793e-07,
      "loss": 0.0108,
      "step": 2997920
    },
    {
      "epoch": 4.906194562819531,
      "grad_norm": 0.22603246569633484,
      "learning_rate": 1.888470839400908e-07,
      "loss": 0.0065,
      "step": 2997940
    },
    {
      "epoch": 4.906227293258184,
      "grad_norm": 0.28822192549705505,
      "learning_rate": 1.8878119172657367e-07,
      "loss": 0.0089,
      "step": 2997960
    },
    {
      "epoch": 4.906260023696838,
      "grad_norm": 0.1210978776216507,
      "learning_rate": 1.8871529951305654e-07,
      "loss": 0.0077,
      "step": 2997980
    },
    {
      "epoch": 4.906292754135491,
      "grad_norm": 0.04967230185866356,
      "learning_rate": 1.8864940729953942e-07,
      "loss": 0.0057,
      "step": 2998000
    },
    {
      "epoch": 4.906325484574144,
      "grad_norm": 0.32396161556243896,
      "learning_rate": 1.8858351508602231e-07,
      "loss": 0.0111,
      "step": 2998020
    },
    {
      "epoch": 4.906358215012798,
      "grad_norm": 0.27590644359588623,
      "learning_rate": 1.8851762287250519e-07,
      "loss": 0.0085,
      "step": 2998040
    },
    {
      "epoch": 4.9063909454514505,
      "grad_norm": 0.11127808690071106,
      "learning_rate": 1.8845173065898806e-07,
      "loss": 0.0086,
      "step": 2998060
    },
    {
      "epoch": 4.906423675890104,
      "grad_norm": 0.5792940258979797,
      "learning_rate": 1.8838583844547093e-07,
      "loss": 0.0146,
      "step": 2998080
    },
    {
      "epoch": 4.906456406328758,
      "grad_norm": 0.4346119165420532,
      "learning_rate": 1.883199462319538e-07,
      "loss": 0.0085,
      "step": 2998100
    },
    {
      "epoch": 4.906489136767411,
      "grad_norm": 0.22344447672367096,
      "learning_rate": 1.8825405401843665e-07,
      "loss": 0.0048,
      "step": 2998120
    },
    {
      "epoch": 4.906521867206064,
      "grad_norm": 0.24538548290729523,
      "learning_rate": 1.8818816180491952e-07,
      "loss": 0.0079,
      "step": 2998140
    },
    {
      "epoch": 4.906554597644718,
      "grad_norm": 0.37630853056907654,
      "learning_rate": 1.881222695914024e-07,
      "loss": 0.0066,
      "step": 2998160
    },
    {
      "epoch": 4.906587328083371,
      "grad_norm": 1.2202869653701782,
      "learning_rate": 1.8805637737788526e-07,
      "loss": 0.0105,
      "step": 2998180
    },
    {
      "epoch": 4.906620058522025,
      "grad_norm": 0.5311963558197021,
      "learning_rate": 1.8799048516436814e-07,
      "loss": 0.0109,
      "step": 2998200
    },
    {
      "epoch": 4.9066527889606775,
      "grad_norm": 0.19258658587932587,
      "learning_rate": 1.87924592950851e-07,
      "loss": 0.0084,
      "step": 2998220
    },
    {
      "epoch": 4.906685519399331,
      "grad_norm": 0.049175288528203964,
      "learning_rate": 1.8785870073733388e-07,
      "loss": 0.0081,
      "step": 2998240
    },
    {
      "epoch": 4.906718249837985,
      "grad_norm": 0.5083742141723633,
      "learning_rate": 1.8779280852381675e-07,
      "loss": 0.0141,
      "step": 2998260
    },
    {
      "epoch": 4.906750980276637,
      "grad_norm": 0.15662001073360443,
      "learning_rate": 1.8772691631029963e-07,
      "loss": 0.0087,
      "step": 2998280
    },
    {
      "epoch": 4.906783710715291,
      "grad_norm": 0.3273596465587616,
      "learning_rate": 1.876610240967825e-07,
      "loss": 0.0104,
      "step": 2998300
    },
    {
      "epoch": 4.9068164411539446,
      "grad_norm": 0.6009982824325562,
      "learning_rate": 1.875951318832654e-07,
      "loss": 0.0133,
      "step": 2998320
    },
    {
      "epoch": 4.906849171592597,
      "grad_norm": 0.1650203913450241,
      "learning_rate": 1.8752923966974821e-07,
      "loss": 0.012,
      "step": 2998340
    },
    {
      "epoch": 4.906881902031251,
      "grad_norm": 0.46438267827033997,
      "learning_rate": 1.874633474562311e-07,
      "loss": 0.0112,
      "step": 2998360
    },
    {
      "epoch": 4.9069146324699044,
      "grad_norm": 0.13420306146144867,
      "learning_rate": 1.8739745524271399e-07,
      "loss": 0.0064,
      "step": 2998380
    },
    {
      "epoch": 4.906947362908558,
      "grad_norm": 0.14810782670974731,
      "learning_rate": 1.8733156302919686e-07,
      "loss": 0.0079,
      "step": 2998400
    },
    {
      "epoch": 4.906980093347211,
      "grad_norm": 0.17213039100170135,
      "learning_rate": 1.8726567081567973e-07,
      "loss": 0.0097,
      "step": 2998420
    },
    {
      "epoch": 4.907012823785864,
      "grad_norm": 0.17313304543495178,
      "learning_rate": 1.871997786021626e-07,
      "loss": 0.0091,
      "step": 2998440
    },
    {
      "epoch": 4.907045554224518,
      "grad_norm": 0.17061324417591095,
      "learning_rate": 1.8713388638864547e-07,
      "loss": 0.0055,
      "step": 2998460
    },
    {
      "epoch": 4.9070782846631715,
      "grad_norm": 0.17710310220718384,
      "learning_rate": 1.8706799417512835e-07,
      "loss": 0.0105,
      "step": 2998480
    },
    {
      "epoch": 4.907111015101824,
      "grad_norm": 0.4591001272201538,
      "learning_rate": 1.8700210196161122e-07,
      "loss": 0.0088,
      "step": 2998500
    },
    {
      "epoch": 4.907143745540478,
      "grad_norm": 0.11163241416215897,
      "learning_rate": 1.869362097480941e-07,
      "loss": 0.0095,
      "step": 2998520
    },
    {
      "epoch": 4.907176475979131,
      "grad_norm": 0.14314773678779602,
      "learning_rate": 1.8687031753457696e-07,
      "loss": 0.012,
      "step": 2998540
    },
    {
      "epoch": 4.907209206417784,
      "grad_norm": 0.3512091040611267,
      "learning_rate": 1.8680442532105983e-07,
      "loss": 0.0112,
      "step": 2998560
    },
    {
      "epoch": 4.907241936856438,
      "grad_norm": 0.08758005499839783,
      "learning_rate": 1.8673853310754268e-07,
      "loss": 0.0078,
      "step": 2998580
    },
    {
      "epoch": 4.907274667295091,
      "grad_norm": 0.2498650699853897,
      "learning_rate": 1.8667264089402555e-07,
      "loss": 0.0068,
      "step": 2998600
    },
    {
      "epoch": 4.907307397733744,
      "grad_norm": 0.2575839161872864,
      "learning_rate": 1.8660674868050842e-07,
      "loss": 0.0086,
      "step": 2998620
    },
    {
      "epoch": 4.907340128172398,
      "grad_norm": 0.3482362627983093,
      "learning_rate": 1.865408564669913e-07,
      "loss": 0.0082,
      "step": 2998640
    },
    {
      "epoch": 4.907372858611051,
      "grad_norm": 0.2257772833108902,
      "learning_rate": 1.8647496425347417e-07,
      "loss": 0.0068,
      "step": 2998660
    },
    {
      "epoch": 4.907405589049705,
      "grad_norm": 0.07833382487297058,
      "learning_rate": 1.8640907203995707e-07,
      "loss": 0.0096,
      "step": 2998680
    },
    {
      "epoch": 4.9074383194883575,
      "grad_norm": 0.04938574135303497,
      "learning_rate": 1.8634317982643994e-07,
      "loss": 0.0079,
      "step": 2998700
    },
    {
      "epoch": 4.907471049927011,
      "grad_norm": 0.5755889415740967,
      "learning_rate": 1.862772876129228e-07,
      "loss": 0.0088,
      "step": 2998720
    },
    {
      "epoch": 4.907503780365665,
      "grad_norm": 0.2016623467206955,
      "learning_rate": 1.8621139539940568e-07,
      "loss": 0.0078,
      "step": 2998740
    },
    {
      "epoch": 4.907536510804318,
      "grad_norm": 0.3486713767051697,
      "learning_rate": 1.8614550318588856e-07,
      "loss": 0.0118,
      "step": 2998760
    },
    {
      "epoch": 4.907569241242971,
      "grad_norm": 0.17071714997291565,
      "learning_rate": 1.8607961097237143e-07,
      "loss": 0.0089,
      "step": 2998780
    },
    {
      "epoch": 4.907601971681625,
      "grad_norm": 0.22292885184288025,
      "learning_rate": 1.8601371875885427e-07,
      "loss": 0.0078,
      "step": 2998800
    },
    {
      "epoch": 4.907634702120278,
      "grad_norm": 0.1492515653371811,
      "learning_rate": 1.8594782654533715e-07,
      "loss": 0.0063,
      "step": 2998820
    },
    {
      "epoch": 4.907667432558931,
      "grad_norm": 0.13720226287841797,
      "learning_rate": 1.8588193433182002e-07,
      "loss": 0.0086,
      "step": 2998840
    },
    {
      "epoch": 4.9077001629975845,
      "grad_norm": 0.2504735291004181,
      "learning_rate": 1.858160421183029e-07,
      "loss": 0.013,
      "step": 2998860
    },
    {
      "epoch": 4.907732893436238,
      "grad_norm": 0.03163125365972519,
      "learning_rate": 1.8575014990478576e-07,
      "loss": 0.0081,
      "step": 2998880
    },
    {
      "epoch": 4.907765623874891,
      "grad_norm": 0.24823631346225739,
      "learning_rate": 1.8568425769126863e-07,
      "loss": 0.0086,
      "step": 2998900
    },
    {
      "epoch": 4.907798354313544,
      "grad_norm": 0.6083200573921204,
      "learning_rate": 1.856183654777515e-07,
      "loss": 0.0076,
      "step": 2998920
    },
    {
      "epoch": 4.907831084752198,
      "grad_norm": 0.2783571183681488,
      "learning_rate": 1.8555247326423438e-07,
      "loss": 0.0157,
      "step": 2998940
    },
    {
      "epoch": 4.907863815190852,
      "grad_norm": 0.17061960697174072,
      "learning_rate": 1.8548658105071725e-07,
      "loss": 0.0141,
      "step": 2998960
    },
    {
      "epoch": 4.907896545629504,
      "grad_norm": 0.17337709665298462,
      "learning_rate": 1.8542068883720015e-07,
      "loss": 0.0083,
      "step": 2998980
    },
    {
      "epoch": 4.907929276068158,
      "grad_norm": 0.1698598861694336,
      "learning_rate": 1.8535479662368302e-07,
      "loss": 0.0065,
      "step": 2999000
    },
    {
      "epoch": 4.9079620065068115,
      "grad_norm": 0.18275615572929382,
      "learning_rate": 1.8528890441016584e-07,
      "loss": 0.0065,
      "step": 2999020
    },
    {
      "epoch": 4.907994736945464,
      "grad_norm": 0.11366479098796844,
      "learning_rate": 1.8522301219664874e-07,
      "loss": 0.0063,
      "step": 2999040
    },
    {
      "epoch": 4.908027467384118,
      "grad_norm": 0.16895902156829834,
      "learning_rate": 1.851571199831316e-07,
      "loss": 0.0063,
      "step": 2999060
    },
    {
      "epoch": 4.908060197822771,
      "grad_norm": 0.17279204726219177,
      "learning_rate": 1.8509122776961448e-07,
      "loss": 0.0094,
      "step": 2999080
    },
    {
      "epoch": 4.908092928261425,
      "grad_norm": 0.2931792736053467,
      "learning_rate": 1.8502533555609735e-07,
      "loss": 0.0121,
      "step": 2999100
    },
    {
      "epoch": 4.908125658700078,
      "grad_norm": 0.11863509565591812,
      "learning_rate": 1.8495944334258023e-07,
      "loss": 0.0081,
      "step": 2999120
    },
    {
      "epoch": 4.908158389138731,
      "grad_norm": 0.3353395164012909,
      "learning_rate": 1.848935511290631e-07,
      "loss": 0.0073,
      "step": 2999140
    },
    {
      "epoch": 4.908191119577385,
      "grad_norm": 0.1568915992975235,
      "learning_rate": 1.8482765891554597e-07,
      "loss": 0.0084,
      "step": 2999160
    },
    {
      "epoch": 4.908223850016038,
      "grad_norm": 0.16367167234420776,
      "learning_rate": 1.8476176670202884e-07,
      "loss": 0.0117,
      "step": 2999180
    },
    {
      "epoch": 4.908256580454691,
      "grad_norm": 0.21334926784038544,
      "learning_rate": 1.8469587448851171e-07,
      "loss": 0.0085,
      "step": 2999200
    },
    {
      "epoch": 4.908289310893345,
      "grad_norm": 0.0800040066242218,
      "learning_rate": 1.846299822749946e-07,
      "loss": 0.0067,
      "step": 2999220
    },
    {
      "epoch": 4.9083220413319975,
      "grad_norm": 0.11575808376073837,
      "learning_rate": 1.8456409006147743e-07,
      "loss": 0.0081,
      "step": 2999240
    },
    {
      "epoch": 4.908354771770651,
      "grad_norm": 0.08249025046825409,
      "learning_rate": 1.844981978479603e-07,
      "loss": 0.0076,
      "step": 2999260
    },
    {
      "epoch": 4.908387502209305,
      "grad_norm": 0.3665996193885803,
      "learning_rate": 1.8443230563444318e-07,
      "loss": 0.0073,
      "step": 2999280
    },
    {
      "epoch": 4.908420232647958,
      "grad_norm": 0.1563984900712967,
      "learning_rate": 1.8436641342092605e-07,
      "loss": 0.0101,
      "step": 2999300
    },
    {
      "epoch": 4.908452963086611,
      "grad_norm": 0.43215620517730713,
      "learning_rate": 1.8430052120740892e-07,
      "loss": 0.0103,
      "step": 2999320
    },
    {
      "epoch": 4.9084856935252645,
      "grad_norm": 0.6912580728530884,
      "learning_rate": 1.8423462899389182e-07,
      "loss": 0.0079,
      "step": 2999340
    },
    {
      "epoch": 4.908518423963918,
      "grad_norm": 0.3281220495700836,
      "learning_rate": 1.841687367803747e-07,
      "loss": 0.0103,
      "step": 2999360
    },
    {
      "epoch": 4.908551154402572,
      "grad_norm": 0.4047064185142517,
      "learning_rate": 1.8410284456685756e-07,
      "loss": 0.0072,
      "step": 2999380
    },
    {
      "epoch": 4.908583884841224,
      "grad_norm": 0.3656286895275116,
      "learning_rate": 1.8403695235334044e-07,
      "loss": 0.0097,
      "step": 2999400
    },
    {
      "epoch": 4.908616615279878,
      "grad_norm": 0.17506171762943268,
      "learning_rate": 1.839710601398233e-07,
      "loss": 0.0072,
      "step": 2999420
    },
    {
      "epoch": 4.908649345718532,
      "grad_norm": 0.15818504989147186,
      "learning_rate": 1.8390516792630618e-07,
      "loss": 0.0085,
      "step": 2999440
    },
    {
      "epoch": 4.908682076157184,
      "grad_norm": 0.3344809114933014,
      "learning_rate": 1.8383927571278903e-07,
      "loss": 0.0064,
      "step": 2999460
    },
    {
      "epoch": 4.908714806595838,
      "grad_norm": 0.2518274784088135,
      "learning_rate": 1.837733834992719e-07,
      "loss": 0.0099,
      "step": 2999480
    },
    {
      "epoch": 4.9087475370344915,
      "grad_norm": 0.5480097532272339,
      "learning_rate": 1.8370749128575477e-07,
      "loss": 0.0067,
      "step": 2999500
    },
    {
      "epoch": 4.908780267473144,
      "grad_norm": 0.15028399229049683,
      "learning_rate": 1.8364159907223764e-07,
      "loss": 0.0067,
      "step": 2999520
    },
    {
      "epoch": 4.908812997911798,
      "grad_norm": 0.21437053382396698,
      "learning_rate": 1.8357570685872051e-07,
      "loss": 0.0081,
      "step": 2999540
    },
    {
      "epoch": 4.908845728350451,
      "grad_norm": 0.3289978802204132,
      "learning_rate": 1.8350981464520339e-07,
      "loss": 0.0112,
      "step": 2999560
    },
    {
      "epoch": 4.908878458789105,
      "grad_norm": 0.24074068665504456,
      "learning_rate": 1.8344392243168626e-07,
      "loss": 0.0075,
      "step": 2999580
    },
    {
      "epoch": 4.908911189227758,
      "grad_norm": 0.076177217066288,
      "learning_rate": 1.8337803021816913e-07,
      "loss": 0.0072,
      "step": 2999600
    },
    {
      "epoch": 4.908943919666411,
      "grad_norm": 0.356669545173645,
      "learning_rate": 1.83312138004652e-07,
      "loss": 0.0087,
      "step": 2999620
    },
    {
      "epoch": 4.908976650105065,
      "grad_norm": 0.12307946383953094,
      "learning_rate": 1.832462457911349e-07,
      "loss": 0.0088,
      "step": 2999640
    },
    {
      "epoch": 4.9090093805437185,
      "grad_norm": 0.2739991843700409,
      "learning_rate": 1.8318035357761777e-07,
      "loss": 0.0067,
      "step": 2999660
    },
    {
      "epoch": 4.909042110982371,
      "grad_norm": 0.15191128849983215,
      "learning_rate": 1.831144613641006e-07,
      "loss": 0.01,
      "step": 2999680
    },
    {
      "epoch": 4.909074841421025,
      "grad_norm": 0.20649951696395874,
      "learning_rate": 1.830485691505835e-07,
      "loss": 0.0079,
      "step": 2999700
    },
    {
      "epoch": 4.909107571859678,
      "grad_norm": 0.2803933024406433,
      "learning_rate": 1.8298267693706636e-07,
      "loss": 0.0075,
      "step": 2999720
    },
    {
      "epoch": 4.909140302298331,
      "grad_norm": 0.10668504983186722,
      "learning_rate": 1.8291678472354924e-07,
      "loss": 0.009,
      "step": 2999740
    },
    {
      "epoch": 4.909173032736985,
      "grad_norm": 0.14742827415466309,
      "learning_rate": 1.828508925100321e-07,
      "loss": 0.0069,
      "step": 2999760
    },
    {
      "epoch": 4.909205763175638,
      "grad_norm": 0.05630781501531601,
      "learning_rate": 1.8278500029651498e-07,
      "loss": 0.008,
      "step": 2999780
    },
    {
      "epoch": 4.909238493614291,
      "grad_norm": 0.17789852619171143,
      "learning_rate": 1.8271910808299785e-07,
      "loss": 0.0071,
      "step": 2999800
    },
    {
      "epoch": 4.909271224052945,
      "grad_norm": 0.0775759294629097,
      "learning_rate": 1.8265321586948072e-07,
      "loss": 0.01,
      "step": 2999820
    },
    {
      "epoch": 4.909303954491598,
      "grad_norm": 0.4177946150302887,
      "learning_rate": 1.825873236559636e-07,
      "loss": 0.0072,
      "step": 2999840
    },
    {
      "epoch": 4.909336684930252,
      "grad_norm": 0.19334246218204498,
      "learning_rate": 1.8252143144244647e-07,
      "loss": 0.0102,
      "step": 2999860
    },
    {
      "epoch": 4.9093694153689045,
      "grad_norm": 0.20296651124954224,
      "learning_rate": 1.8245553922892934e-07,
      "loss": 0.0094,
      "step": 2999880
    },
    {
      "epoch": 4.909402145807558,
      "grad_norm": 0.1378864049911499,
      "learning_rate": 1.8238964701541219e-07,
      "loss": 0.0101,
      "step": 2999900
    },
    {
      "epoch": 4.909434876246212,
      "grad_norm": 0.2555203437805176,
      "learning_rate": 1.8232375480189506e-07,
      "loss": 0.0075,
      "step": 2999920
    },
    {
      "epoch": 4.909467606684865,
      "grad_norm": 0.2937532365322113,
      "learning_rate": 1.8225786258837793e-07,
      "loss": 0.0109,
      "step": 2999940
    },
    {
      "epoch": 4.909500337123518,
      "grad_norm": 0.23130889236927032,
      "learning_rate": 1.821919703748608e-07,
      "loss": 0.0063,
      "step": 2999960
    },
    {
      "epoch": 4.909533067562172,
      "grad_norm": 0.19384616613388062,
      "learning_rate": 1.8212607816134367e-07,
      "loss": 0.0085,
      "step": 2999980
    },
    {
      "epoch": 4.909565798000825,
      "grad_norm": 0.28622713685035706,
      "learning_rate": 1.8206018594782657e-07,
      "loss": 0.0067,
      "step": 3000000
    },
    {
      "epoch": 4.909565798000825,
      "eval_loss": 0.0056177228689193726,
      "eval_runtime": 6450.4082,
      "eval_samples_per_second": 159.348,
      "eval_steps_per_second": 15.935,
      "eval_sts-dev_pearson_cosine": 0.987302107081129,
      "eval_sts-dev_spearman_cosine": 0.8970525129964229,
      "step": 3000000
    },
    {
      "epoch": 4.909598528439478,
      "grad_norm": 0.34989845752716064,
      "learning_rate": 1.8199429373430944e-07,
      "loss": 0.0109,
      "step": 3000020
    },
    {
      "epoch": 4.9096312588781315,
      "grad_norm": 0.23162508010864258,
      "learning_rate": 1.8192840152079232e-07,
      "loss": 0.0072,
      "step": 3000040
    },
    {
      "epoch": 4.909663989316785,
      "grad_norm": 0.10102127492427826,
      "learning_rate": 1.818625093072752e-07,
      "loss": 0.0059,
      "step": 3000060
    },
    {
      "epoch": 4.909696719755438,
      "grad_norm": 0.057110801339149475,
      "learning_rate": 1.8179661709375806e-07,
      "loss": 0.0048,
      "step": 3000080
    },
    {
      "epoch": 4.909729450194091,
      "grad_norm": 0.22109369933605194,
      "learning_rate": 1.8173072488024093e-07,
      "loss": 0.0107,
      "step": 3000100
    },
    {
      "epoch": 4.909762180632745,
      "grad_norm": 0.21503987908363342,
      "learning_rate": 1.8166483266672378e-07,
      "loss": 0.0089,
      "step": 3000120
    },
    {
      "epoch": 4.9097949110713985,
      "grad_norm": 0.154851034283638,
      "learning_rate": 1.8159894045320665e-07,
      "loss": 0.0106,
      "step": 3000140
    },
    {
      "epoch": 4.909827641510051,
      "grad_norm": 0.2200055718421936,
      "learning_rate": 1.8153304823968952e-07,
      "loss": 0.0109,
      "step": 3000160
    },
    {
      "epoch": 4.909860371948705,
      "grad_norm": 0.2919582724571228,
      "learning_rate": 1.814671560261724e-07,
      "loss": 0.0116,
      "step": 3000180
    },
    {
      "epoch": 4.909893102387358,
      "grad_norm": 0.10739720612764359,
      "learning_rate": 1.8140126381265527e-07,
      "loss": 0.0087,
      "step": 3000200
    },
    {
      "epoch": 4.909925832826012,
      "grad_norm": 0.17273423075675964,
      "learning_rate": 1.8133537159913814e-07,
      "loss": 0.0091,
      "step": 3000220
    },
    {
      "epoch": 4.909958563264665,
      "grad_norm": 0.20961853861808777,
      "learning_rate": 1.81269479385621e-07,
      "loss": 0.008,
      "step": 3000240
    },
    {
      "epoch": 4.909991293703318,
      "grad_norm": 0.15974333882331848,
      "learning_rate": 1.8120358717210388e-07,
      "loss": 0.0057,
      "step": 3000260
    },
    {
      "epoch": 4.910024024141972,
      "grad_norm": 0.2646768391132355,
      "learning_rate": 1.8113769495858676e-07,
      "loss": 0.0092,
      "step": 3000280
    },
    {
      "epoch": 4.910056754580625,
      "grad_norm": 0.13610553741455078,
      "learning_rate": 1.8107180274506965e-07,
      "loss": 0.0078,
      "step": 3000300
    },
    {
      "epoch": 4.910089485019278,
      "grad_norm": 0.23317046463489532,
      "learning_rate": 1.8100591053155253e-07,
      "loss": 0.0114,
      "step": 3000320
    },
    {
      "epoch": 4.910122215457932,
      "grad_norm": 0.16513784229755402,
      "learning_rate": 1.8094001831803534e-07,
      "loss": 0.0085,
      "step": 3000340
    },
    {
      "epoch": 4.9101549458965845,
      "grad_norm": 0.08283018320798874,
      "learning_rate": 1.8087412610451824e-07,
      "loss": 0.0099,
      "step": 3000360
    },
    {
      "epoch": 4.910187676335238,
      "grad_norm": 0.05634380877017975,
      "learning_rate": 1.8080823389100112e-07,
      "loss": 0.0084,
      "step": 3000380
    },
    {
      "epoch": 4.910220406773892,
      "grad_norm": 0.09792030602693558,
      "learning_rate": 1.80742341677484e-07,
      "loss": 0.0057,
      "step": 3000400
    },
    {
      "epoch": 4.910253137212545,
      "grad_norm": 0.18557241559028625,
      "learning_rate": 1.8067644946396686e-07,
      "loss": 0.0098,
      "step": 3000420
    },
    {
      "epoch": 4.910285867651198,
      "grad_norm": 0.3026755452156067,
      "learning_rate": 1.8061055725044973e-07,
      "loss": 0.0093,
      "step": 3000440
    },
    {
      "epoch": 4.910318598089852,
      "grad_norm": 0.13613489270210266,
      "learning_rate": 1.805446650369326e-07,
      "loss": 0.0087,
      "step": 3000460
    },
    {
      "epoch": 4.910351328528505,
      "grad_norm": 0.06403353065252304,
      "learning_rate": 1.8047877282341548e-07,
      "loss": 0.0105,
      "step": 3000480
    },
    {
      "epoch": 4.910384058967158,
      "grad_norm": 0.20394115149974823,
      "learning_rate": 1.8041288060989835e-07,
      "loss": 0.0064,
      "step": 3000500
    },
    {
      "epoch": 4.9104167894058115,
      "grad_norm": 0.35138070583343506,
      "learning_rate": 1.8034698839638122e-07,
      "loss": 0.0099,
      "step": 3000520
    },
    {
      "epoch": 4.910449519844465,
      "grad_norm": 0.18729397654533386,
      "learning_rate": 1.802810961828641e-07,
      "loss": 0.0109,
      "step": 3000540
    },
    {
      "epoch": 4.910482250283119,
      "grad_norm": 0.4004448354244232,
      "learning_rate": 1.8021520396934694e-07,
      "loss": 0.0107,
      "step": 3000560
    },
    {
      "epoch": 4.910514980721771,
      "grad_norm": 0.19548767805099487,
      "learning_rate": 1.801493117558298e-07,
      "loss": 0.0098,
      "step": 3000580
    },
    {
      "epoch": 4.910547711160425,
      "grad_norm": 0.20815657079219818,
      "learning_rate": 1.8008341954231268e-07,
      "loss": 0.0072,
      "step": 3000600
    },
    {
      "epoch": 4.910580441599079,
      "grad_norm": 0.21739880740642548,
      "learning_rate": 1.8001752732879555e-07,
      "loss": 0.0093,
      "step": 3000620
    },
    {
      "epoch": 4.910613172037731,
      "grad_norm": 0.4159053564071655,
      "learning_rate": 1.7995163511527843e-07,
      "loss": 0.0107,
      "step": 3000640
    },
    {
      "epoch": 4.910645902476385,
      "grad_norm": 0.10953740775585175,
      "learning_rate": 1.7988574290176132e-07,
      "loss": 0.005,
      "step": 3000660
    },
    {
      "epoch": 4.9106786329150385,
      "grad_norm": 0.13367600739002228,
      "learning_rate": 1.798198506882442e-07,
      "loss": 0.0044,
      "step": 3000680
    },
    {
      "epoch": 4.910711363353691,
      "grad_norm": 0.10856945067644119,
      "learning_rate": 1.7975395847472707e-07,
      "loss": 0.006,
      "step": 3000700
    },
    {
      "epoch": 4.910744093792345,
      "grad_norm": 0.3341350257396698,
      "learning_rate": 1.7968806626120994e-07,
      "loss": 0.0138,
      "step": 3000720
    },
    {
      "epoch": 4.910776824230998,
      "grad_norm": 0.22920623421669006,
      "learning_rate": 1.7962217404769281e-07,
      "loss": 0.0107,
      "step": 3000740
    },
    {
      "epoch": 4.910809554669652,
      "grad_norm": 0.42151957750320435,
      "learning_rate": 1.7955628183417569e-07,
      "loss": 0.0098,
      "step": 3000760
    },
    {
      "epoch": 4.910842285108305,
      "grad_norm": 0.1162283644080162,
      "learning_rate": 1.7949038962065853e-07,
      "loss": 0.0073,
      "step": 3000780
    },
    {
      "epoch": 4.910875015546958,
      "grad_norm": 0.19520600140094757,
      "learning_rate": 1.794244974071414e-07,
      "loss": 0.0069,
      "step": 3000800
    },
    {
      "epoch": 4.910907745985612,
      "grad_norm": 0.13322027027606964,
      "learning_rate": 1.7935860519362428e-07,
      "loss": 0.0076,
      "step": 3000820
    },
    {
      "epoch": 4.9109404764242655,
      "grad_norm": 0.26676174998283386,
      "learning_rate": 1.7929271298010715e-07,
      "loss": 0.0099,
      "step": 3000840
    },
    {
      "epoch": 4.910973206862918,
      "grad_norm": 0.356316477060318,
      "learning_rate": 1.7922682076659002e-07,
      "loss": 0.0066,
      "step": 3000860
    },
    {
      "epoch": 4.911005937301572,
      "grad_norm": 0.13229338824748993,
      "learning_rate": 1.791609285530729e-07,
      "loss": 0.0108,
      "step": 3000880
    },
    {
      "epoch": 4.911038667740225,
      "grad_norm": 0.24639146029949188,
      "learning_rate": 1.7909503633955576e-07,
      "loss": 0.0061,
      "step": 3000900
    },
    {
      "epoch": 4.911071398178878,
      "grad_norm": 0.20719219744205475,
      "learning_rate": 1.7902914412603864e-07,
      "loss": 0.0101,
      "step": 3000920
    },
    {
      "epoch": 4.911104128617532,
      "grad_norm": 0.08625542372465134,
      "learning_rate": 1.7896325191252153e-07,
      "loss": 0.0048,
      "step": 3000940
    },
    {
      "epoch": 4.911136859056185,
      "grad_norm": 0.4103091061115265,
      "learning_rate": 1.788973596990044e-07,
      "loss": 0.0077,
      "step": 3000960
    },
    {
      "epoch": 4.911169589494838,
      "grad_norm": 0.15754950046539307,
      "learning_rate": 1.7883146748548728e-07,
      "loss": 0.0107,
      "step": 3000980
    },
    {
      "epoch": 4.9112023199334915,
      "grad_norm": 0.12808923423290253,
      "learning_rate": 1.787655752719701e-07,
      "loss": 0.0091,
      "step": 3001000
    },
    {
      "epoch": 4.911235050372145,
      "grad_norm": 0.32104337215423584,
      "learning_rate": 1.78699683058453e-07,
      "loss": 0.0076,
      "step": 3001020
    },
    {
      "epoch": 4.911267780810799,
      "grad_norm": 0.1255754828453064,
      "learning_rate": 1.7863379084493587e-07,
      "loss": 0.0042,
      "step": 3001040
    },
    {
      "epoch": 4.911300511249451,
      "grad_norm": 0.33224448561668396,
      "learning_rate": 1.7856789863141874e-07,
      "loss": 0.0062,
      "step": 3001060
    },
    {
      "epoch": 4.911333241688105,
      "grad_norm": 0.29148736596107483,
      "learning_rate": 1.785020064179016e-07,
      "loss": 0.0094,
      "step": 3001080
    },
    {
      "epoch": 4.911365972126759,
      "grad_norm": 0.17180654406547546,
      "learning_rate": 1.7843611420438448e-07,
      "loss": 0.0087,
      "step": 3001100
    },
    {
      "epoch": 4.911398702565412,
      "grad_norm": 0.21667519211769104,
      "learning_rate": 1.7837022199086736e-07,
      "loss": 0.0117,
      "step": 3001120
    },
    {
      "epoch": 4.911431433004065,
      "grad_norm": 0.5336283445358276,
      "learning_rate": 1.7830432977735023e-07,
      "loss": 0.0102,
      "step": 3001140
    },
    {
      "epoch": 4.9114641634427185,
      "grad_norm": 0.11219443380832672,
      "learning_rate": 1.782384375638331e-07,
      "loss": 0.0075,
      "step": 3001160
    },
    {
      "epoch": 4.911496893881372,
      "grad_norm": 0.17649413645267487,
      "learning_rate": 1.7817254535031597e-07,
      "loss": 0.0067,
      "step": 3001180
    },
    {
      "epoch": 4.911529624320025,
      "grad_norm": 0.2575319707393646,
      "learning_rate": 1.7810665313679885e-07,
      "loss": 0.0089,
      "step": 3001200
    },
    {
      "epoch": 4.911562354758678,
      "grad_norm": 0.4053303003311157,
      "learning_rate": 1.780407609232817e-07,
      "loss": 0.0086,
      "step": 3001220
    },
    {
      "epoch": 4.911595085197332,
      "grad_norm": 0.16105584800243378,
      "learning_rate": 1.7797486870976456e-07,
      "loss": 0.006,
      "step": 3001240
    },
    {
      "epoch": 4.911627815635985,
      "grad_norm": 0.3305214047431946,
      "learning_rate": 1.7790897649624743e-07,
      "loss": 0.0092,
      "step": 3001260
    },
    {
      "epoch": 4.911660546074638,
      "grad_norm": 0.2965112030506134,
      "learning_rate": 1.778430842827303e-07,
      "loss": 0.012,
      "step": 3001280
    },
    {
      "epoch": 4.911693276513292,
      "grad_norm": 0.2728721797466278,
      "learning_rate": 1.7777719206921318e-07,
      "loss": 0.0136,
      "step": 3001300
    },
    {
      "epoch": 4.9117260069519455,
      "grad_norm": 0.10194065421819687,
      "learning_rate": 1.7771129985569608e-07,
      "loss": 0.0086,
      "step": 3001320
    },
    {
      "epoch": 4.911758737390598,
      "grad_norm": 0.13422109186649323,
      "learning_rate": 1.7764540764217895e-07,
      "loss": 0.0101,
      "step": 3001340
    },
    {
      "epoch": 4.911791467829252,
      "grad_norm": 0.5031956434249878,
      "learning_rate": 1.7757951542866182e-07,
      "loss": 0.0102,
      "step": 3001360
    },
    {
      "epoch": 4.911824198267905,
      "grad_norm": 0.30830416083335876,
      "learning_rate": 1.775136232151447e-07,
      "loss": 0.0075,
      "step": 3001380
    },
    {
      "epoch": 4.911856928706559,
      "grad_norm": 0.23909860849380493,
      "learning_rate": 1.7744773100162757e-07,
      "loss": 0.0071,
      "step": 3001400
    },
    {
      "epoch": 4.911889659145212,
      "grad_norm": 0.16356836259365082,
      "learning_rate": 1.7738183878811044e-07,
      "loss": 0.0127,
      "step": 3001420
    },
    {
      "epoch": 4.911922389583865,
      "grad_norm": 0.11582586914300919,
      "learning_rate": 1.7731594657459328e-07,
      "loss": 0.0059,
      "step": 3001440
    },
    {
      "epoch": 4.911955120022519,
      "grad_norm": 0.23433072865009308,
      "learning_rate": 1.7725005436107616e-07,
      "loss": 0.0074,
      "step": 3001460
    },
    {
      "epoch": 4.911987850461172,
      "grad_norm": 0.28481778502464294,
      "learning_rate": 1.7718416214755903e-07,
      "loss": 0.008,
      "step": 3001480
    },
    {
      "epoch": 4.912020580899825,
      "grad_norm": 0.4383212924003601,
      "learning_rate": 1.771182699340419e-07,
      "loss": 0.0099,
      "step": 3001500
    },
    {
      "epoch": 4.912053311338479,
      "grad_norm": 0.47005680203437805,
      "learning_rate": 1.7705237772052477e-07,
      "loss": 0.0067,
      "step": 3001520
    },
    {
      "epoch": 4.9120860417771315,
      "grad_norm": 0.5888749957084656,
      "learning_rate": 1.7698648550700764e-07,
      "loss": 0.0127,
      "step": 3001540
    },
    {
      "epoch": 4.912118772215785,
      "grad_norm": 0.14595842361450195,
      "learning_rate": 1.7692059329349052e-07,
      "loss": 0.0066,
      "step": 3001560
    },
    {
      "epoch": 4.912151502654439,
      "grad_norm": 0.1577431857585907,
      "learning_rate": 1.768547010799734e-07,
      "loss": 0.0068,
      "step": 3001580
    },
    {
      "epoch": 4.912184233093092,
      "grad_norm": 0.4029512107372284,
      "learning_rate": 1.767888088664563e-07,
      "loss": 0.0101,
      "step": 3001600
    },
    {
      "epoch": 4.912216963531745,
      "grad_norm": 0.23278746008872986,
      "learning_rate": 1.7672291665293916e-07,
      "loss": 0.0104,
      "step": 3001620
    },
    {
      "epoch": 4.912249693970399,
      "grad_norm": 0.1585782915353775,
      "learning_rate": 1.7665702443942203e-07,
      "loss": 0.0084,
      "step": 3001640
    },
    {
      "epoch": 4.912282424409052,
      "grad_norm": 0.2549182176589966,
      "learning_rate": 1.7659113222590485e-07,
      "loss": 0.0077,
      "step": 3001660
    },
    {
      "epoch": 4.912315154847706,
      "grad_norm": 0.10236645489931107,
      "learning_rate": 1.7652524001238775e-07,
      "loss": 0.0082,
      "step": 3001680
    },
    {
      "epoch": 4.9123478852863585,
      "grad_norm": 0.3365037739276886,
      "learning_rate": 1.7645934779887062e-07,
      "loss": 0.0073,
      "step": 3001700
    },
    {
      "epoch": 4.912380615725012,
      "grad_norm": 0.2516639530658722,
      "learning_rate": 1.763934555853535e-07,
      "loss": 0.0092,
      "step": 3001720
    },
    {
      "epoch": 4.912413346163666,
      "grad_norm": 0.09896698594093323,
      "learning_rate": 1.7632756337183637e-07,
      "loss": 0.0089,
      "step": 3001740
    },
    {
      "epoch": 4.912446076602318,
      "grad_norm": 0.3413615822792053,
      "learning_rate": 1.7626167115831924e-07,
      "loss": 0.0061,
      "step": 3001760
    },
    {
      "epoch": 4.912478807040972,
      "grad_norm": 0.1259920597076416,
      "learning_rate": 1.761957789448021e-07,
      "loss": 0.0089,
      "step": 3001780
    },
    {
      "epoch": 4.9125115374796255,
      "grad_norm": 0.16755591332912445,
      "learning_rate": 1.7612988673128498e-07,
      "loss": 0.0057,
      "step": 3001800
    },
    {
      "epoch": 4.912544267918278,
      "grad_norm": 0.2784323990345001,
      "learning_rate": 1.7606399451776785e-07,
      "loss": 0.0063,
      "step": 3001820
    },
    {
      "epoch": 4.912576998356932,
      "grad_norm": 0.12132170796394348,
      "learning_rate": 1.7599810230425073e-07,
      "loss": 0.01,
      "step": 3001840
    },
    {
      "epoch": 4.912609728795585,
      "grad_norm": 0.26750263571739197,
      "learning_rate": 1.759322100907336e-07,
      "loss": 0.008,
      "step": 3001860
    },
    {
      "epoch": 4.912642459234239,
      "grad_norm": 0.12962765991687775,
      "learning_rate": 1.7586631787721644e-07,
      "loss": 0.0098,
      "step": 3001880
    },
    {
      "epoch": 4.912675189672892,
      "grad_norm": 0.15285059809684753,
      "learning_rate": 1.7580042566369932e-07,
      "loss": 0.0061,
      "step": 3001900
    },
    {
      "epoch": 4.912707920111545,
      "grad_norm": 0.6410858035087585,
      "learning_rate": 1.757345334501822e-07,
      "loss": 0.0091,
      "step": 3001920
    },
    {
      "epoch": 4.912740650550199,
      "grad_norm": 0.22851036489009857,
      "learning_rate": 1.7566864123666506e-07,
      "loss": 0.0107,
      "step": 3001940
    },
    {
      "epoch": 4.912773380988852,
      "grad_norm": 0.45023053884506226,
      "learning_rate": 1.7560274902314793e-07,
      "loss": 0.0116,
      "step": 3001960
    },
    {
      "epoch": 4.912806111427505,
      "grad_norm": 0.4075668752193451,
      "learning_rate": 1.7553685680963083e-07,
      "loss": 0.0134,
      "step": 3001980
    },
    {
      "epoch": 4.912838841866159,
      "grad_norm": 0.2557026147842407,
      "learning_rate": 1.754709645961137e-07,
      "loss": 0.0098,
      "step": 3002000
    },
    {
      "epoch": 4.912871572304812,
      "grad_norm": 0.22941705584526062,
      "learning_rate": 1.7540507238259657e-07,
      "loss": 0.009,
      "step": 3002020
    },
    {
      "epoch": 4.912904302743465,
      "grad_norm": 0.30533844232559204,
      "learning_rate": 1.7533918016907945e-07,
      "loss": 0.009,
      "step": 3002040
    },
    {
      "epoch": 4.912937033182119,
      "grad_norm": 0.3774746060371399,
      "learning_rate": 1.7527328795556232e-07,
      "loss": 0.0112,
      "step": 3002060
    },
    {
      "epoch": 4.912969763620772,
      "grad_norm": 0.2264176607131958,
      "learning_rate": 1.752073957420452e-07,
      "loss": 0.0071,
      "step": 3002080
    },
    {
      "epoch": 4.913002494059425,
      "grad_norm": 0.14213256537914276,
      "learning_rate": 1.7514150352852804e-07,
      "loss": 0.0089,
      "step": 3002100
    },
    {
      "epoch": 4.913035224498079,
      "grad_norm": 0.47993746399879456,
      "learning_rate": 1.750756113150109e-07,
      "loss": 0.0111,
      "step": 3002120
    },
    {
      "epoch": 4.913067954936732,
      "grad_norm": 0.09962626546621323,
      "learning_rate": 1.7500971910149378e-07,
      "loss": 0.011,
      "step": 3002140
    },
    {
      "epoch": 4.913100685375385,
      "grad_norm": 0.18328708410263062,
      "learning_rate": 1.7494382688797665e-07,
      "loss": 0.0073,
      "step": 3002160
    },
    {
      "epoch": 4.9131334158140385,
      "grad_norm": 0.5630474090576172,
      "learning_rate": 1.7487793467445952e-07,
      "loss": 0.0088,
      "step": 3002180
    },
    {
      "epoch": 4.913166146252692,
      "grad_norm": 0.12261354923248291,
      "learning_rate": 1.748120424609424e-07,
      "loss": 0.0121,
      "step": 3002200
    },
    {
      "epoch": 4.913198876691346,
      "grad_norm": 0.16855813562870026,
      "learning_rate": 1.7474615024742527e-07,
      "loss": 0.008,
      "step": 3002220
    },
    {
      "epoch": 4.913231607129998,
      "grad_norm": 0.3629336357116699,
      "learning_rate": 1.7468025803390814e-07,
      "loss": 0.0113,
      "step": 3002240
    },
    {
      "epoch": 4.913264337568652,
      "grad_norm": 0.19307005405426025,
      "learning_rate": 1.7461436582039104e-07,
      "loss": 0.0086,
      "step": 3002260
    },
    {
      "epoch": 4.913297068007306,
      "grad_norm": 0.30610787868499756,
      "learning_rate": 1.745484736068739e-07,
      "loss": 0.012,
      "step": 3002280
    },
    {
      "epoch": 4.913329798445959,
      "grad_norm": 0.25993338227272034,
      "learning_rate": 1.7448258139335678e-07,
      "loss": 0.0076,
      "step": 3002300
    },
    {
      "epoch": 4.913362528884612,
      "grad_norm": 0.15893155336380005,
      "learning_rate": 1.744166891798396e-07,
      "loss": 0.0086,
      "step": 3002320
    },
    {
      "epoch": 4.9133952593232655,
      "grad_norm": 0.10774611681699753,
      "learning_rate": 1.743507969663225e-07,
      "loss": 0.0077,
      "step": 3002340
    },
    {
      "epoch": 4.913427989761919,
      "grad_norm": 0.3449542224407196,
      "learning_rate": 1.7428490475280537e-07,
      "loss": 0.0067,
      "step": 3002360
    },
    {
      "epoch": 4.913460720200572,
      "grad_norm": 0.2493341565132141,
      "learning_rate": 1.7421901253928825e-07,
      "loss": 0.01,
      "step": 3002380
    },
    {
      "epoch": 4.913493450639225,
      "grad_norm": 0.6636980772018433,
      "learning_rate": 1.7415312032577112e-07,
      "loss": 0.0092,
      "step": 3002400
    },
    {
      "epoch": 4.913526181077879,
      "grad_norm": 0.29804500937461853,
      "learning_rate": 1.74087228112254e-07,
      "loss": 0.0106,
      "step": 3002420
    },
    {
      "epoch": 4.913558911516532,
      "grad_norm": 0.48562654852867126,
      "learning_rate": 1.7402133589873686e-07,
      "loss": 0.0111,
      "step": 3002440
    },
    {
      "epoch": 4.913591641955185,
      "grad_norm": 0.2331424355506897,
      "learning_rate": 1.7395544368521973e-07,
      "loss": 0.0081,
      "step": 3002460
    },
    {
      "epoch": 4.913624372393839,
      "grad_norm": 0.09263011068105698,
      "learning_rate": 1.738895514717026e-07,
      "loss": 0.0083,
      "step": 3002480
    },
    {
      "epoch": 4.9136571028324925,
      "grad_norm": 0.09717609733343124,
      "learning_rate": 1.7382365925818548e-07,
      "loss": 0.0064,
      "step": 3002500
    },
    {
      "epoch": 4.913689833271145,
      "grad_norm": 0.4612996280193329,
      "learning_rate": 1.7375776704466835e-07,
      "loss": 0.008,
      "step": 3002520
    },
    {
      "epoch": 4.913722563709799,
      "grad_norm": 0.32506513595581055,
      "learning_rate": 1.736918748311512e-07,
      "loss": 0.0086,
      "step": 3002540
    },
    {
      "epoch": 4.913755294148452,
      "grad_norm": 0.1728040874004364,
      "learning_rate": 1.7362598261763407e-07,
      "loss": 0.0074,
      "step": 3002560
    },
    {
      "epoch": 4.913788024587106,
      "grad_norm": 0.2462771236896515,
      "learning_rate": 1.7356009040411694e-07,
      "loss": 0.0094,
      "step": 3002580
    },
    {
      "epoch": 4.913820755025759,
      "grad_norm": 0.23728348314762115,
      "learning_rate": 1.734941981905998e-07,
      "loss": 0.0049,
      "step": 3002600
    },
    {
      "epoch": 4.913853485464412,
      "grad_norm": 0.9278337955474854,
      "learning_rate": 1.734283059770827e-07,
      "loss": 0.0096,
      "step": 3002620
    },
    {
      "epoch": 4.913886215903066,
      "grad_norm": 0.06540780514478683,
      "learning_rate": 1.7336241376356558e-07,
      "loss": 0.0077,
      "step": 3002640
    },
    {
      "epoch": 4.9139189463417186,
      "grad_norm": 0.026111409068107605,
      "learning_rate": 1.7329652155004846e-07,
      "loss": 0.0074,
      "step": 3002660
    },
    {
      "epoch": 4.913951676780372,
      "grad_norm": 0.2573150396347046,
      "learning_rate": 1.7323062933653133e-07,
      "loss": 0.008,
      "step": 3002680
    },
    {
      "epoch": 4.913984407219026,
      "grad_norm": 0.14317405223846436,
      "learning_rate": 1.731647371230142e-07,
      "loss": 0.0085,
      "step": 3002700
    },
    {
      "epoch": 4.9140171376576784,
      "grad_norm": 0.41260409355163574,
      "learning_rate": 1.7309884490949707e-07,
      "loss": 0.0109,
      "step": 3002720
    },
    {
      "epoch": 4.914049868096332,
      "grad_norm": 0.36090242862701416,
      "learning_rate": 1.7303295269597994e-07,
      "loss": 0.0075,
      "step": 3002740
    },
    {
      "epoch": 4.914082598534986,
      "grad_norm": 0.26440876722335815,
      "learning_rate": 1.7296706048246282e-07,
      "loss": 0.0097,
      "step": 3002760
    },
    {
      "epoch": 4.914115328973639,
      "grad_norm": 0.1063821092247963,
      "learning_rate": 1.7290116826894566e-07,
      "loss": 0.0098,
      "step": 3002780
    },
    {
      "epoch": 4.914148059412292,
      "grad_norm": 0.19393721222877502,
      "learning_rate": 1.7283527605542853e-07,
      "loss": 0.0072,
      "step": 3002800
    },
    {
      "epoch": 4.9141807898509455,
      "grad_norm": 0.20839262008666992,
      "learning_rate": 1.727693838419114e-07,
      "loss": 0.0115,
      "step": 3002820
    },
    {
      "epoch": 4.914213520289599,
      "grad_norm": 0.13900190591812134,
      "learning_rate": 1.7270349162839428e-07,
      "loss": 0.011,
      "step": 3002840
    },
    {
      "epoch": 4.914246250728253,
      "grad_norm": 0.2583838701248169,
      "learning_rate": 1.7263759941487715e-07,
      "loss": 0.0088,
      "step": 3002860
    },
    {
      "epoch": 4.914278981166905,
      "grad_norm": 0.13141949474811554,
      "learning_rate": 1.7257170720136002e-07,
      "loss": 0.0087,
      "step": 3002880
    },
    {
      "epoch": 4.914311711605559,
      "grad_norm": 0.15565653145313263,
      "learning_rate": 1.725058149878429e-07,
      "loss": 0.0083,
      "step": 3002900
    },
    {
      "epoch": 4.914344442044213,
      "grad_norm": 0.1088850349187851,
      "learning_rate": 1.724399227743258e-07,
      "loss": 0.0091,
      "step": 3002920
    },
    {
      "epoch": 4.914377172482865,
      "grad_norm": 0.11262842267751694,
      "learning_rate": 1.7237403056080866e-07,
      "loss": 0.013,
      "step": 3002940
    },
    {
      "epoch": 4.914409902921519,
      "grad_norm": 0.6848103404045105,
      "learning_rate": 1.7230813834729154e-07,
      "loss": 0.0115,
      "step": 3002960
    },
    {
      "epoch": 4.9144426333601725,
      "grad_norm": 0.19082333147525787,
      "learning_rate": 1.722422461337744e-07,
      "loss": 0.0107,
      "step": 3002980
    },
    {
      "epoch": 4.914475363798825,
      "grad_norm": 0.22265075147151947,
      "learning_rate": 1.7217635392025725e-07,
      "loss": 0.0099,
      "step": 3003000
    },
    {
      "epoch": 4.914508094237479,
      "grad_norm": 0.06314728409051895,
      "learning_rate": 1.7211046170674013e-07,
      "loss": 0.009,
      "step": 3003020
    },
    {
      "epoch": 4.914540824676132,
      "grad_norm": 0.30276110768318176,
      "learning_rate": 1.72044569493223e-07,
      "loss": 0.0069,
      "step": 3003040
    },
    {
      "epoch": 4.914573555114786,
      "grad_norm": 0.24650275707244873,
      "learning_rate": 1.7197867727970587e-07,
      "loss": 0.0086,
      "step": 3003060
    },
    {
      "epoch": 4.914606285553439,
      "grad_norm": 0.504036545753479,
      "learning_rate": 1.7191278506618874e-07,
      "loss": 0.0111,
      "step": 3003080
    },
    {
      "epoch": 4.914639015992092,
      "grad_norm": 0.12409187108278275,
      "learning_rate": 1.7184689285267161e-07,
      "loss": 0.0083,
      "step": 3003100
    },
    {
      "epoch": 4.914671746430746,
      "grad_norm": 0.3560740053653717,
      "learning_rate": 1.7178100063915449e-07,
      "loss": 0.0118,
      "step": 3003120
    },
    {
      "epoch": 4.9147044768693995,
      "grad_norm": 0.3251993656158447,
      "learning_rate": 1.7171510842563736e-07,
      "loss": 0.0069,
      "step": 3003140
    },
    {
      "epoch": 4.914737207308052,
      "grad_norm": 0.2716420590877533,
      "learning_rate": 1.7164921621212023e-07,
      "loss": 0.0083,
      "step": 3003160
    },
    {
      "epoch": 4.914769937746706,
      "grad_norm": 0.5615635514259338,
      "learning_rate": 1.715833239986031e-07,
      "loss": 0.0084,
      "step": 3003180
    },
    {
      "epoch": 4.914802668185359,
      "grad_norm": 0.24045732617378235,
      "learning_rate": 1.7151743178508598e-07,
      "loss": 0.0112,
      "step": 3003200
    },
    {
      "epoch": 4.914835398624012,
      "grad_norm": 0.14176344871520996,
      "learning_rate": 1.7145153957156882e-07,
      "loss": 0.008,
      "step": 3003220
    },
    {
      "epoch": 4.914868129062666,
      "grad_norm": 0.2512308955192566,
      "learning_rate": 1.713856473580517e-07,
      "loss": 0.0092,
      "step": 3003240
    },
    {
      "epoch": 4.914900859501319,
      "grad_norm": 0.9704944491386414,
      "learning_rate": 1.7131975514453456e-07,
      "loss": 0.0097,
      "step": 3003260
    },
    {
      "epoch": 4.914933589939972,
      "grad_norm": 0.1260119527578354,
      "learning_rate": 1.7125386293101746e-07,
      "loss": 0.0059,
      "step": 3003280
    },
    {
      "epoch": 4.914966320378626,
      "grad_norm": 0.1495051085948944,
      "learning_rate": 1.7118797071750034e-07,
      "loss": 0.0105,
      "step": 3003300
    },
    {
      "epoch": 4.914999050817279,
      "grad_norm": 0.13539423048496246,
      "learning_rate": 1.711220785039832e-07,
      "loss": 0.0089,
      "step": 3003320
    },
    {
      "epoch": 4.915031781255933,
      "grad_norm": 0.17511971294879913,
      "learning_rate": 1.7105618629046608e-07,
      "loss": 0.0068,
      "step": 3003340
    },
    {
      "epoch": 4.9150645116945855,
      "grad_norm": 0.5138208866119385,
      "learning_rate": 1.7099029407694895e-07,
      "loss": 0.008,
      "step": 3003360
    },
    {
      "epoch": 4.915097242133239,
      "grad_norm": 0.28686490654945374,
      "learning_rate": 1.7092440186343182e-07,
      "loss": 0.0129,
      "step": 3003380
    },
    {
      "epoch": 4.915129972571893,
      "grad_norm": 0.09121683984994888,
      "learning_rate": 1.708585096499147e-07,
      "loss": 0.0078,
      "step": 3003400
    },
    {
      "epoch": 4.915162703010545,
      "grad_norm": 0.3413040339946747,
      "learning_rate": 1.7079261743639757e-07,
      "loss": 0.0089,
      "step": 3003420
    },
    {
      "epoch": 4.915195433449199,
      "grad_norm": 0.42208606004714966,
      "learning_rate": 1.7072672522288041e-07,
      "loss": 0.0122,
      "step": 3003440
    },
    {
      "epoch": 4.9152281638878526,
      "grad_norm": 0.3229964077472687,
      "learning_rate": 1.7066083300936329e-07,
      "loss": 0.0095,
      "step": 3003460
    },
    {
      "epoch": 4.915260894326506,
      "grad_norm": 0.2824017107486725,
      "learning_rate": 1.7059494079584616e-07,
      "loss": 0.0105,
      "step": 3003480
    },
    {
      "epoch": 4.915293624765159,
      "grad_norm": 0.7497119903564453,
      "learning_rate": 1.7052904858232903e-07,
      "loss": 0.0071,
      "step": 3003500
    },
    {
      "epoch": 4.9153263552038124,
      "grad_norm": 0.3303194046020508,
      "learning_rate": 1.704631563688119e-07,
      "loss": 0.0103,
      "step": 3003520
    },
    {
      "epoch": 4.915359085642466,
      "grad_norm": 0.25864487886428833,
      "learning_rate": 1.7039726415529477e-07,
      "loss": 0.0103,
      "step": 3003540
    },
    {
      "epoch": 4.915391816081119,
      "grad_norm": 0.310251921415329,
      "learning_rate": 1.7033137194177765e-07,
      "loss": 0.0047,
      "step": 3003560
    },
    {
      "epoch": 4.915424546519772,
      "grad_norm": 0.6056621670722961,
      "learning_rate": 1.7026547972826054e-07,
      "loss": 0.0102,
      "step": 3003580
    },
    {
      "epoch": 4.915457276958426,
      "grad_norm": 0.14804832637310028,
      "learning_rate": 1.7019958751474342e-07,
      "loss": 0.0087,
      "step": 3003600
    },
    {
      "epoch": 4.915490007397079,
      "grad_norm": 0.27133500576019287,
      "learning_rate": 1.701336953012263e-07,
      "loss": 0.0133,
      "step": 3003620
    },
    {
      "epoch": 4.915522737835732,
      "grad_norm": 0.27343621850013733,
      "learning_rate": 1.7006780308770916e-07,
      "loss": 0.0121,
      "step": 3003640
    },
    {
      "epoch": 4.915555468274386,
      "grad_norm": 0.15203404426574707,
      "learning_rate": 1.70001910874192e-07,
      "loss": 0.0083,
      "step": 3003660
    },
    {
      "epoch": 4.915588198713039,
      "grad_norm": 0.4125838279724121,
      "learning_rate": 1.6993601866067488e-07,
      "loss": 0.016,
      "step": 3003680
    },
    {
      "epoch": 4.915620929151692,
      "grad_norm": 0.35153308510780334,
      "learning_rate": 1.6987012644715775e-07,
      "loss": 0.0113,
      "step": 3003700
    },
    {
      "epoch": 4.915653659590346,
      "grad_norm": 0.2723871171474457,
      "learning_rate": 1.6980423423364062e-07,
      "loss": 0.0078,
      "step": 3003720
    },
    {
      "epoch": 4.915686390028999,
      "grad_norm": 0.23674292862415314,
      "learning_rate": 1.697383420201235e-07,
      "loss": 0.0091,
      "step": 3003740
    },
    {
      "epoch": 4.915719120467653,
      "grad_norm": 0.41921982169151306,
      "learning_rate": 1.6967244980660637e-07,
      "loss": 0.0125,
      "step": 3003760
    },
    {
      "epoch": 4.915751850906306,
      "grad_norm": 0.1160096675157547,
      "learning_rate": 1.6960655759308924e-07,
      "loss": 0.0085,
      "step": 3003780
    },
    {
      "epoch": 4.915784581344959,
      "grad_norm": 0.11067534238100052,
      "learning_rate": 1.695406653795721e-07,
      "loss": 0.0064,
      "step": 3003800
    },
    {
      "epoch": 4.915817311783613,
      "grad_norm": 0.40032678842544556,
      "learning_rate": 1.6947477316605498e-07,
      "loss": 0.0093,
      "step": 3003820
    },
    {
      "epoch": 4.9158500422222655,
      "grad_norm": 0.06784001737833023,
      "learning_rate": 1.6940888095253786e-07,
      "loss": 0.0069,
      "step": 3003840
    },
    {
      "epoch": 4.915882772660919,
      "grad_norm": 0.3528553545475006,
      "learning_rate": 1.6934298873902073e-07,
      "loss": 0.0076,
      "step": 3003860
    },
    {
      "epoch": 4.915915503099573,
      "grad_norm": 0.36572661995887756,
      "learning_rate": 1.6927709652550357e-07,
      "loss": 0.0072,
      "step": 3003880
    },
    {
      "epoch": 4.915948233538225,
      "grad_norm": 0.3803468942642212,
      "learning_rate": 1.6921120431198645e-07,
      "loss": 0.0073,
      "step": 3003900
    },
    {
      "epoch": 4.915980963976879,
      "grad_norm": 0.17552372813224792,
      "learning_rate": 1.6914531209846932e-07,
      "loss": 0.0049,
      "step": 3003920
    },
    {
      "epoch": 4.916013694415533,
      "grad_norm": 0.1751602739095688,
      "learning_rate": 1.6907941988495222e-07,
      "loss": 0.0083,
      "step": 3003940
    },
    {
      "epoch": 4.916046424854186,
      "grad_norm": 0.2956320643424988,
      "learning_rate": 1.690135276714351e-07,
      "loss": 0.0068,
      "step": 3003960
    },
    {
      "epoch": 4.916079155292839,
      "grad_norm": 0.21592873334884644,
      "learning_rate": 1.6894763545791796e-07,
      "loss": 0.0111,
      "step": 3003980
    },
    {
      "epoch": 4.9161118857314925,
      "grad_norm": 0.1479930579662323,
      "learning_rate": 1.6888174324440083e-07,
      "loss": 0.0088,
      "step": 3004000
    },
    {
      "epoch": 4.916144616170146,
      "grad_norm": 0.12588150799274445,
      "learning_rate": 1.688158510308837e-07,
      "loss": 0.0066,
      "step": 3004020
    },
    {
      "epoch": 4.9161773466088,
      "grad_norm": 0.5072426795959473,
      "learning_rate": 1.6874995881736658e-07,
      "loss": 0.012,
      "step": 3004040
    },
    {
      "epoch": 4.916210077047452,
      "grad_norm": 0.40564587712287903,
      "learning_rate": 1.6868406660384945e-07,
      "loss": 0.0093,
      "step": 3004060
    },
    {
      "epoch": 4.916242807486106,
      "grad_norm": 0.2956129014492035,
      "learning_rate": 1.6861817439033232e-07,
      "loss": 0.0102,
      "step": 3004080
    },
    {
      "epoch": 4.91627553792476,
      "grad_norm": 0.2072306126356125,
      "learning_rate": 1.6855228217681517e-07,
      "loss": 0.0105,
      "step": 3004100
    },
    {
      "epoch": 4.916308268363412,
      "grad_norm": 0.2568643391132355,
      "learning_rate": 1.6848638996329804e-07,
      "loss": 0.0099,
      "step": 3004120
    },
    {
      "epoch": 4.916340998802066,
      "grad_norm": 0.16293753683567047,
      "learning_rate": 1.684204977497809e-07,
      "loss": 0.0073,
      "step": 3004140
    },
    {
      "epoch": 4.9163737292407195,
      "grad_norm": 0.27007943391799927,
      "learning_rate": 1.6835460553626378e-07,
      "loss": 0.0104,
      "step": 3004160
    },
    {
      "epoch": 4.916406459679372,
      "grad_norm": 0.2271588295698166,
      "learning_rate": 1.6828871332274665e-07,
      "loss": 0.0138,
      "step": 3004180
    },
    {
      "epoch": 4.916439190118026,
      "grad_norm": 0.12719957530498505,
      "learning_rate": 1.6822282110922953e-07,
      "loss": 0.0071,
      "step": 3004200
    },
    {
      "epoch": 4.916471920556679,
      "grad_norm": 0.787930965423584,
      "learning_rate": 1.681569288957124e-07,
      "loss": 0.0107,
      "step": 3004220
    },
    {
      "epoch": 4.916504650995333,
      "grad_norm": 0.11607484519481659,
      "learning_rate": 1.680910366821953e-07,
      "loss": 0.0185,
      "step": 3004240
    },
    {
      "epoch": 4.916537381433986,
      "grad_norm": 0.099179707467556,
      "learning_rate": 1.6802514446867817e-07,
      "loss": 0.0103,
      "step": 3004260
    },
    {
      "epoch": 4.916570111872639,
      "grad_norm": 0.158877894282341,
      "learning_rate": 1.6795925225516104e-07,
      "loss": 0.0053,
      "step": 3004280
    },
    {
      "epoch": 4.916602842311293,
      "grad_norm": 0.08238190412521362,
      "learning_rate": 1.6789336004164391e-07,
      "loss": 0.0062,
      "step": 3004300
    },
    {
      "epoch": 4.9166355727499464,
      "grad_norm": 0.11481856554746628,
      "learning_rate": 1.6782746782812676e-07,
      "loss": 0.009,
      "step": 3004320
    },
    {
      "epoch": 4.916668303188599,
      "grad_norm": 0.5039712190628052,
      "learning_rate": 1.6776157561460963e-07,
      "loss": 0.0095,
      "step": 3004340
    },
    {
      "epoch": 4.916701033627253,
      "grad_norm": 0.09937357157468796,
      "learning_rate": 1.676956834010925e-07,
      "loss": 0.0093,
      "step": 3004360
    },
    {
      "epoch": 4.916733764065906,
      "grad_norm": 0.13773399591445923,
      "learning_rate": 1.6762979118757538e-07,
      "loss": 0.0058,
      "step": 3004380
    },
    {
      "epoch": 4.916766494504559,
      "grad_norm": 0.14793610572814941,
      "learning_rate": 1.6756389897405825e-07,
      "loss": 0.0067,
      "step": 3004400
    },
    {
      "epoch": 4.916799224943213,
      "grad_norm": 0.6379891633987427,
      "learning_rate": 1.6749800676054112e-07,
      "loss": 0.0123,
      "step": 3004420
    },
    {
      "epoch": 4.916831955381866,
      "grad_norm": 0.31536808609962463,
      "learning_rate": 1.67432114547024e-07,
      "loss": 0.0067,
      "step": 3004440
    },
    {
      "epoch": 4.916864685820519,
      "grad_norm": 0.2139313519001007,
      "learning_rate": 1.6736622233350686e-07,
      "loss": 0.0062,
      "step": 3004460
    },
    {
      "epoch": 4.9168974162591725,
      "grad_norm": 0.6365814208984375,
      "learning_rate": 1.6730033011998974e-07,
      "loss": 0.007,
      "step": 3004480
    },
    {
      "epoch": 4.916930146697826,
      "grad_norm": 0.3800780773162842,
      "learning_rate": 1.672344379064726e-07,
      "loss": 0.0076,
      "step": 3004500
    },
    {
      "epoch": 4.91696287713648,
      "grad_norm": 0.04883579537272453,
      "learning_rate": 1.6716854569295548e-07,
      "loss": 0.009,
      "step": 3004520
    },
    {
      "epoch": 4.916995607575132,
      "grad_norm": 0.3616543412208557,
      "learning_rate": 1.6710265347943833e-07,
      "loss": 0.0128,
      "step": 3004540
    },
    {
      "epoch": 4.917028338013786,
      "grad_norm": 0.14314965903759003,
      "learning_rate": 1.670367612659212e-07,
      "loss": 0.0111,
      "step": 3004560
    },
    {
      "epoch": 4.91706106845244,
      "grad_norm": 0.10200273245573044,
      "learning_rate": 1.6697086905240407e-07,
      "loss": 0.0078,
      "step": 3004580
    },
    {
      "epoch": 4.917093798891093,
      "grad_norm": 0.08575249463319778,
      "learning_rate": 1.6690497683888697e-07,
      "loss": 0.0082,
      "step": 3004600
    },
    {
      "epoch": 4.917126529329746,
      "grad_norm": 0.24897365272045135,
      "learning_rate": 1.6683908462536984e-07,
      "loss": 0.0066,
      "step": 3004620
    },
    {
      "epoch": 4.9171592597683995,
      "grad_norm": 0.06749521195888519,
      "learning_rate": 1.667731924118527e-07,
      "loss": 0.0047,
      "step": 3004640
    },
    {
      "epoch": 4.917191990207053,
      "grad_norm": 0.2123921811580658,
      "learning_rate": 1.6670730019833559e-07,
      "loss": 0.0088,
      "step": 3004660
    },
    {
      "epoch": 4.917224720645706,
      "grad_norm": 0.07380060106515884,
      "learning_rate": 1.6664140798481846e-07,
      "loss": 0.0114,
      "step": 3004680
    },
    {
      "epoch": 4.917257451084359,
      "grad_norm": 0.13036881387233734,
      "learning_rate": 1.6657551577130133e-07,
      "loss": 0.0094,
      "step": 3004700
    },
    {
      "epoch": 4.917290181523013,
      "grad_norm": 0.6285980343818665,
      "learning_rate": 1.665096235577842e-07,
      "loss": 0.008,
      "step": 3004720
    },
    {
      "epoch": 4.917322911961666,
      "grad_norm": 0.23006278276443481,
      "learning_rate": 1.6644373134426707e-07,
      "loss": 0.0086,
      "step": 3004740
    },
    {
      "epoch": 4.917355642400319,
      "grad_norm": 0.17997829616069794,
      "learning_rate": 1.6637783913074992e-07,
      "loss": 0.0113,
      "step": 3004760
    },
    {
      "epoch": 4.917388372838973,
      "grad_norm": 0.06448768824338913,
      "learning_rate": 1.663119469172328e-07,
      "loss": 0.0089,
      "step": 3004780
    },
    {
      "epoch": 4.9174211032776265,
      "grad_norm": 0.3470745086669922,
      "learning_rate": 1.6624605470371566e-07,
      "loss": 0.0133,
      "step": 3004800
    },
    {
      "epoch": 4.917453833716279,
      "grad_norm": 0.20323815941810608,
      "learning_rate": 1.6618016249019854e-07,
      "loss": 0.0057,
      "step": 3004820
    },
    {
      "epoch": 4.917486564154933,
      "grad_norm": 0.06159568950533867,
      "learning_rate": 1.661142702766814e-07,
      "loss": 0.0084,
      "step": 3004840
    },
    {
      "epoch": 4.917519294593586,
      "grad_norm": 0.48952382802963257,
      "learning_rate": 1.6604837806316428e-07,
      "loss": 0.009,
      "step": 3004860
    },
    {
      "epoch": 4.917552025032239,
      "grad_norm": 0.10140369832515717,
      "learning_rate": 1.6598248584964715e-07,
      "loss": 0.009,
      "step": 3004880
    },
    {
      "epoch": 4.917584755470893,
      "grad_norm": 0.19168496131896973,
      "learning_rate": 1.6591659363613005e-07,
      "loss": 0.0098,
      "step": 3004900
    },
    {
      "epoch": 4.917617485909546,
      "grad_norm": 0.39012181758880615,
      "learning_rate": 1.6585070142261292e-07,
      "loss": 0.0071,
      "step": 3004920
    },
    {
      "epoch": 4.9176502163482,
      "grad_norm": 0.2933002710342407,
      "learning_rate": 1.657848092090958e-07,
      "loss": 0.0102,
      "step": 3004940
    },
    {
      "epoch": 4.917682946786853,
      "grad_norm": 0.18553553521633148,
      "learning_rate": 1.6571891699557867e-07,
      "loss": 0.0084,
      "step": 3004960
    },
    {
      "epoch": 4.917715677225506,
      "grad_norm": 0.34314194321632385,
      "learning_rate": 1.656530247820615e-07,
      "loss": 0.0068,
      "step": 3004980
    },
    {
      "epoch": 4.91774840766416,
      "grad_norm": 0.15633906424045563,
      "learning_rate": 1.6558713256854438e-07,
      "loss": 0.0075,
      "step": 3005000
    },
    {
      "epoch": 4.9177811381028125,
      "grad_norm": 1.1345196962356567,
      "learning_rate": 1.6552124035502726e-07,
      "loss": 0.0116,
      "step": 3005020
    },
    {
      "epoch": 4.917813868541466,
      "grad_norm": 0.06911404430866241,
      "learning_rate": 1.6545534814151013e-07,
      "loss": 0.0131,
      "step": 3005040
    },
    {
      "epoch": 4.91784659898012,
      "grad_norm": 0.6717687845230103,
      "learning_rate": 1.65389455927993e-07,
      "loss": 0.0122,
      "step": 3005060
    },
    {
      "epoch": 4.917879329418772,
      "grad_norm": 0.08237743377685547,
      "learning_rate": 1.6532356371447587e-07,
      "loss": 0.0108,
      "step": 3005080
    },
    {
      "epoch": 4.917912059857426,
      "grad_norm": 0.05053747445344925,
      "learning_rate": 1.6525767150095874e-07,
      "loss": 0.0109,
      "step": 3005100
    },
    {
      "epoch": 4.91794479029608,
      "grad_norm": 0.1378462314605713,
      "learning_rate": 1.6519177928744162e-07,
      "loss": 0.0089,
      "step": 3005120
    },
    {
      "epoch": 4.917977520734733,
      "grad_norm": 0.08383706957101822,
      "learning_rate": 1.651258870739245e-07,
      "loss": 0.0087,
      "step": 3005140
    },
    {
      "epoch": 4.918010251173386,
      "grad_norm": 0.2430076152086258,
      "learning_rate": 1.6505999486040736e-07,
      "loss": 0.0092,
      "step": 3005160
    },
    {
      "epoch": 4.9180429816120395,
      "grad_norm": 0.15842880308628082,
      "learning_rate": 1.6499410264689023e-07,
      "loss": 0.0078,
      "step": 3005180
    },
    {
      "epoch": 4.918075712050693,
      "grad_norm": 0.3467820882797241,
      "learning_rate": 1.6492821043337308e-07,
      "loss": 0.0118,
      "step": 3005200
    },
    {
      "epoch": 4.918108442489347,
      "grad_norm": 0.19475480914115906,
      "learning_rate": 1.6486231821985595e-07,
      "loss": 0.0051,
      "step": 3005220
    },
    {
      "epoch": 4.918141172927999,
      "grad_norm": 0.1394285410642624,
      "learning_rate": 1.6479642600633882e-07,
      "loss": 0.0113,
      "step": 3005240
    },
    {
      "epoch": 4.918173903366653,
      "grad_norm": 0.25916680693626404,
      "learning_rate": 1.6473053379282172e-07,
      "loss": 0.0087,
      "step": 3005260
    },
    {
      "epoch": 4.9182066338053065,
      "grad_norm": 0.41503840684890747,
      "learning_rate": 1.646646415793046e-07,
      "loss": 0.0111,
      "step": 3005280
    },
    {
      "epoch": 4.918239364243959,
      "grad_norm": 0.18392574787139893,
      "learning_rate": 1.6459874936578747e-07,
      "loss": 0.0075,
      "step": 3005300
    },
    {
      "epoch": 4.918272094682613,
      "grad_norm": 0.2626127302646637,
      "learning_rate": 1.6453285715227034e-07,
      "loss": 0.0096,
      "step": 3005320
    },
    {
      "epoch": 4.918304825121266,
      "grad_norm": 0.5015925168991089,
      "learning_rate": 1.644669649387532e-07,
      "loss": 0.0074,
      "step": 3005340
    },
    {
      "epoch": 4.918337555559919,
      "grad_norm": 0.3104066848754883,
      "learning_rate": 1.6440107272523608e-07,
      "loss": 0.014,
      "step": 3005360
    },
    {
      "epoch": 4.918370285998573,
      "grad_norm": 0.05627058446407318,
      "learning_rate": 1.6433518051171895e-07,
      "loss": 0.0078,
      "step": 3005380
    },
    {
      "epoch": 4.918403016437226,
      "grad_norm": 0.2791900038719177,
      "learning_rate": 1.6426928829820183e-07,
      "loss": 0.0065,
      "step": 3005400
    },
    {
      "epoch": 4.91843574687588,
      "grad_norm": 0.21012164652347565,
      "learning_rate": 1.6420339608468467e-07,
      "loss": 0.0106,
      "step": 3005420
    },
    {
      "epoch": 4.918468477314533,
      "grad_norm": 0.18354587256908417,
      "learning_rate": 1.6413750387116754e-07,
      "loss": 0.0061,
      "step": 3005440
    },
    {
      "epoch": 4.918501207753186,
      "grad_norm": 0.07689463347196579,
      "learning_rate": 1.6407161165765042e-07,
      "loss": 0.0065,
      "step": 3005460
    },
    {
      "epoch": 4.91853393819184,
      "grad_norm": 0.2017897665500641,
      "learning_rate": 1.640057194441333e-07,
      "loss": 0.0085,
      "step": 3005480
    },
    {
      "epoch": 4.918566668630493,
      "grad_norm": 0.3094118535518646,
      "learning_rate": 1.6393982723061616e-07,
      "loss": 0.0108,
      "step": 3005500
    },
    {
      "epoch": 4.918599399069146,
      "grad_norm": 0.09620586782693863,
      "learning_rate": 1.6387393501709903e-07,
      "loss": 0.0046,
      "step": 3005520
    },
    {
      "epoch": 4.9186321295078,
      "grad_norm": 0.13684527575969696,
      "learning_rate": 1.638080428035819e-07,
      "loss": 0.0062,
      "step": 3005540
    },
    {
      "epoch": 4.918664859946453,
      "grad_norm": 0.12134482711553574,
      "learning_rate": 1.637421505900648e-07,
      "loss": 0.0081,
      "step": 3005560
    },
    {
      "epoch": 4.918697590385106,
      "grad_norm": 0.10585165023803711,
      "learning_rate": 1.6367625837654768e-07,
      "loss": 0.0072,
      "step": 3005580
    },
    {
      "epoch": 4.91873032082376,
      "grad_norm": 0.2190445363521576,
      "learning_rate": 1.6361036616303055e-07,
      "loss": 0.0073,
      "step": 3005600
    },
    {
      "epoch": 4.918763051262413,
      "grad_norm": 0.1609097570180893,
      "learning_rate": 1.6354447394951342e-07,
      "loss": 0.0055,
      "step": 3005620
    },
    {
      "epoch": 4.918795781701066,
      "grad_norm": 0.19121041893959045,
      "learning_rate": 1.6347858173599626e-07,
      "loss": 0.0108,
      "step": 3005640
    },
    {
      "epoch": 4.9188285121397195,
      "grad_norm": 0.19639275968074799,
      "learning_rate": 1.6341268952247914e-07,
      "loss": 0.0054,
      "step": 3005660
    },
    {
      "epoch": 4.918861242578373,
      "grad_norm": 0.5596309900283813,
      "learning_rate": 1.63346797308962e-07,
      "loss": 0.0093,
      "step": 3005680
    },
    {
      "epoch": 4.918893973017027,
      "grad_norm": 0.31927916407585144,
      "learning_rate": 1.6328090509544488e-07,
      "loss": 0.0076,
      "step": 3005700
    },
    {
      "epoch": 4.918926703455679,
      "grad_norm": 0.2532520592212677,
      "learning_rate": 1.6321501288192775e-07,
      "loss": 0.0087,
      "step": 3005720
    },
    {
      "epoch": 4.918959433894333,
      "grad_norm": 0.5631970763206482,
      "learning_rate": 1.6314912066841063e-07,
      "loss": 0.0097,
      "step": 3005740
    },
    {
      "epoch": 4.918992164332987,
      "grad_norm": 0.18828214704990387,
      "learning_rate": 1.630832284548935e-07,
      "loss": 0.0052,
      "step": 3005760
    },
    {
      "epoch": 4.91902489477164,
      "grad_norm": 0.18177002668380737,
      "learning_rate": 1.6301733624137637e-07,
      "loss": 0.0092,
      "step": 3005780
    },
    {
      "epoch": 4.919057625210293,
      "grad_norm": 0.19570475816726685,
      "learning_rate": 1.6295144402785924e-07,
      "loss": 0.0071,
      "step": 3005800
    },
    {
      "epoch": 4.9190903556489465,
      "grad_norm": 0.36761555075645447,
      "learning_rate": 1.6288555181434211e-07,
      "loss": 0.0081,
      "step": 3005820
    },
    {
      "epoch": 4.9191230860876,
      "grad_norm": 1.2311251163482666,
      "learning_rate": 1.6281965960082499e-07,
      "loss": 0.0113,
      "step": 3005840
    },
    {
      "epoch": 4.919155816526253,
      "grad_norm": 0.33906587958335876,
      "learning_rate": 1.6275376738730783e-07,
      "loss": 0.0075,
      "step": 3005860
    },
    {
      "epoch": 4.919188546964906,
      "grad_norm": 0.16415467858314514,
      "learning_rate": 1.626878751737907e-07,
      "loss": 0.0067,
      "step": 3005880
    },
    {
      "epoch": 4.91922127740356,
      "grad_norm": 0.260121613740921,
      "learning_rate": 1.6262198296027358e-07,
      "loss": 0.0077,
      "step": 3005900
    },
    {
      "epoch": 4.919254007842213,
      "grad_norm": 0.36455240845680237,
      "learning_rate": 1.6255609074675647e-07,
      "loss": 0.007,
      "step": 3005920
    },
    {
      "epoch": 4.919286738280866,
      "grad_norm": 0.14322645962238312,
      "learning_rate": 1.6249019853323935e-07,
      "loss": 0.009,
      "step": 3005940
    },
    {
      "epoch": 4.91931946871952,
      "grad_norm": 0.08638697862625122,
      "learning_rate": 1.6242430631972222e-07,
      "loss": 0.0071,
      "step": 3005960
    },
    {
      "epoch": 4.9193521991581735,
      "grad_norm": 0.07456068694591522,
      "learning_rate": 1.623584141062051e-07,
      "loss": 0.0104,
      "step": 3005980
    },
    {
      "epoch": 4.919384929596826,
      "grad_norm": 0.10237160325050354,
      "learning_rate": 1.6229252189268796e-07,
      "loss": 0.0078,
      "step": 3006000
    },
    {
      "epoch": 4.91941766003548,
      "grad_norm": 0.6057575941085815,
      "learning_rate": 1.6222662967917083e-07,
      "loss": 0.0087,
      "step": 3006020
    },
    {
      "epoch": 4.919450390474133,
      "grad_norm": 0.10968569666147232,
      "learning_rate": 1.621607374656537e-07,
      "loss": 0.0087,
      "step": 3006040
    },
    {
      "epoch": 4.919483120912787,
      "grad_norm": 0.7691395878791809,
      "learning_rate": 1.6209484525213658e-07,
      "loss": 0.0081,
      "step": 3006060
    },
    {
      "epoch": 4.91951585135144,
      "grad_norm": 0.1699162870645523,
      "learning_rate": 1.6202895303861942e-07,
      "loss": 0.0095,
      "step": 3006080
    },
    {
      "epoch": 4.919548581790093,
      "grad_norm": 0.10828350484371185,
      "learning_rate": 1.619630608251023e-07,
      "loss": 0.0124,
      "step": 3006100
    },
    {
      "epoch": 4.919581312228747,
      "grad_norm": 0.17064891755580902,
      "learning_rate": 1.6189716861158517e-07,
      "loss": 0.0097,
      "step": 3006120
    },
    {
      "epoch": 4.9196140426673995,
      "grad_norm": 0.45287084579467773,
      "learning_rate": 1.6183127639806804e-07,
      "loss": 0.0133,
      "step": 3006140
    },
    {
      "epoch": 4.919646773106053,
      "grad_norm": 0.3945062756538391,
      "learning_rate": 1.617653841845509e-07,
      "loss": 0.0121,
      "step": 3006160
    },
    {
      "epoch": 4.919679503544707,
      "grad_norm": 0.06971580535173416,
      "learning_rate": 1.6169949197103378e-07,
      "loss": 0.0089,
      "step": 3006180
    },
    {
      "epoch": 4.919712233983359,
      "grad_norm": 0.2816828489303589,
      "learning_rate": 1.6163359975751666e-07,
      "loss": 0.011,
      "step": 3006200
    },
    {
      "epoch": 4.919744964422013,
      "grad_norm": 0.24612872302532196,
      "learning_rate": 1.6156770754399956e-07,
      "loss": 0.0127,
      "step": 3006220
    },
    {
      "epoch": 4.919777694860667,
      "grad_norm": 0.3205249607563019,
      "learning_rate": 1.6150181533048243e-07,
      "loss": 0.009,
      "step": 3006240
    },
    {
      "epoch": 4.91981042529932,
      "grad_norm": 0.30251818895339966,
      "learning_rate": 1.614359231169653e-07,
      "loss": 0.0137,
      "step": 3006260
    },
    {
      "epoch": 4.919843155737973,
      "grad_norm": 0.253188818693161,
      "learning_rate": 1.6137003090344817e-07,
      "loss": 0.0061,
      "step": 3006280
    },
    {
      "epoch": 4.9198758861766265,
      "grad_norm": 0.32476574182510376,
      "learning_rate": 1.6130413868993102e-07,
      "loss": 0.0076,
      "step": 3006300
    },
    {
      "epoch": 4.91990861661528,
      "grad_norm": 0.16271960735321045,
      "learning_rate": 1.612382464764139e-07,
      "loss": 0.0087,
      "step": 3006320
    },
    {
      "epoch": 4.919941347053934,
      "grad_norm": 0.16766135394573212,
      "learning_rate": 1.6117235426289676e-07,
      "loss": 0.0066,
      "step": 3006340
    },
    {
      "epoch": 4.919974077492586,
      "grad_norm": 0.1664971113204956,
      "learning_rate": 1.6110646204937963e-07,
      "loss": 0.0071,
      "step": 3006360
    },
    {
      "epoch": 4.92000680793124,
      "grad_norm": 0.1432943493127823,
      "learning_rate": 1.610405698358625e-07,
      "loss": 0.0101,
      "step": 3006380
    },
    {
      "epoch": 4.920039538369894,
      "grad_norm": 0.11016131192445755,
      "learning_rate": 1.6097467762234538e-07,
      "loss": 0.0123,
      "step": 3006400
    },
    {
      "epoch": 4.920072268808546,
      "grad_norm": 0.20631593465805054,
      "learning_rate": 1.6090878540882825e-07,
      "loss": 0.0095,
      "step": 3006420
    },
    {
      "epoch": 4.9201049992472,
      "grad_norm": 0.23410238325595856,
      "learning_rate": 1.6084289319531112e-07,
      "loss": 0.0074,
      "step": 3006440
    },
    {
      "epoch": 4.9201377296858535,
      "grad_norm": 0.26364198327064514,
      "learning_rate": 1.60777000981794e-07,
      "loss": 0.0088,
      "step": 3006460
    },
    {
      "epoch": 4.920170460124506,
      "grad_norm": 0.19224558770656586,
      "learning_rate": 1.6071110876827687e-07,
      "loss": 0.0114,
      "step": 3006480
    },
    {
      "epoch": 4.92020319056316,
      "grad_norm": 0.16268353164196014,
      "learning_rate": 1.6064521655475976e-07,
      "loss": 0.0102,
      "step": 3006500
    },
    {
      "epoch": 4.920235921001813,
      "grad_norm": 0.20422014594078064,
      "learning_rate": 1.6057932434124258e-07,
      "loss": 0.0093,
      "step": 3006520
    },
    {
      "epoch": 4.920268651440467,
      "grad_norm": 0.09125401079654694,
      "learning_rate": 1.6051343212772546e-07,
      "loss": 0.0084,
      "step": 3006540
    },
    {
      "epoch": 4.92030138187912,
      "grad_norm": 0.1674714982509613,
      "learning_rate": 1.6044753991420833e-07,
      "loss": 0.01,
      "step": 3006560
    },
    {
      "epoch": 4.920334112317773,
      "grad_norm": 0.09497527778148651,
      "learning_rate": 1.6038164770069123e-07,
      "loss": 0.0103,
      "step": 3006580
    },
    {
      "epoch": 4.920366842756427,
      "grad_norm": 0.5670620799064636,
      "learning_rate": 1.603157554871741e-07,
      "loss": 0.0137,
      "step": 3006600
    },
    {
      "epoch": 4.92039957319508,
      "grad_norm": 0.16085636615753174,
      "learning_rate": 1.6024986327365697e-07,
      "loss": 0.0075,
      "step": 3006620
    },
    {
      "epoch": 4.920432303633733,
      "grad_norm": 0.0677703395485878,
      "learning_rate": 1.6018397106013984e-07,
      "loss": 0.0102,
      "step": 3006640
    },
    {
      "epoch": 4.920465034072387,
      "grad_norm": 0.21003174781799316,
      "learning_rate": 1.6011807884662272e-07,
      "loss": 0.0117,
      "step": 3006660
    },
    {
      "epoch": 4.92049776451104,
      "grad_norm": 0.33368444442749023,
      "learning_rate": 1.600521866331056e-07,
      "loss": 0.0067,
      "step": 3006680
    },
    {
      "epoch": 4.920530494949693,
      "grad_norm": 0.4853258728981018,
      "learning_rate": 1.5998629441958846e-07,
      "loss": 0.0105,
      "step": 3006700
    },
    {
      "epoch": 4.920563225388347,
      "grad_norm": 1.185360074043274,
      "learning_rate": 1.5992040220607133e-07,
      "loss": 0.0188,
      "step": 3006720
    },
    {
      "epoch": 4.920595955827,
      "grad_norm": 0.28521767258644104,
      "learning_rate": 1.5985450999255418e-07,
      "loss": 0.0084,
      "step": 3006740
    },
    {
      "epoch": 4.920628686265653,
      "grad_norm": 0.22358866035938263,
      "learning_rate": 1.5978861777903705e-07,
      "loss": 0.0124,
      "step": 3006760
    },
    {
      "epoch": 4.920661416704307,
      "grad_norm": 0.12417981773614883,
      "learning_rate": 1.5972272556551992e-07,
      "loss": 0.0089,
      "step": 3006780
    },
    {
      "epoch": 4.92069414714296,
      "grad_norm": 0.2720228433609009,
      "learning_rate": 1.596568333520028e-07,
      "loss": 0.0088,
      "step": 3006800
    },
    {
      "epoch": 4.920726877581613,
      "grad_norm": 0.11872243881225586,
      "learning_rate": 1.5959094113848567e-07,
      "loss": 0.0086,
      "step": 3006820
    },
    {
      "epoch": 4.9207596080202665,
      "grad_norm": 0.18299081921577454,
      "learning_rate": 1.5952504892496854e-07,
      "loss": 0.0119,
      "step": 3006840
    },
    {
      "epoch": 4.92079233845892,
      "grad_norm": 0.1320319026708603,
      "learning_rate": 1.594591567114514e-07,
      "loss": 0.0097,
      "step": 3006860
    },
    {
      "epoch": 4.920825068897574,
      "grad_norm": 0.11526089161634445,
      "learning_rate": 1.593932644979343e-07,
      "loss": 0.0084,
      "step": 3006880
    },
    {
      "epoch": 4.920857799336226,
      "grad_norm": 0.14063018560409546,
      "learning_rate": 1.5932737228441718e-07,
      "loss": 0.0103,
      "step": 3006900
    },
    {
      "epoch": 4.92089052977488,
      "grad_norm": 0.03946467861533165,
      "learning_rate": 1.5926148007090005e-07,
      "loss": 0.0091,
      "step": 3006920
    },
    {
      "epoch": 4.9209232602135335,
      "grad_norm": 0.5593546032905579,
      "learning_rate": 1.5919558785738292e-07,
      "loss": 0.0098,
      "step": 3006940
    },
    {
      "epoch": 4.920955990652187,
      "grad_norm": 0.4752638638019562,
      "learning_rate": 1.591296956438658e-07,
      "loss": 0.0133,
      "step": 3006960
    },
    {
      "epoch": 4.92098872109084,
      "grad_norm": 0.09053744375705719,
      "learning_rate": 1.5906380343034864e-07,
      "loss": 0.0088,
      "step": 3006980
    },
    {
      "epoch": 4.921021451529493,
      "grad_norm": 0.2677040696144104,
      "learning_rate": 1.5899791121683151e-07,
      "loss": 0.0066,
      "step": 3007000
    },
    {
      "epoch": 4.921054181968147,
      "grad_norm": 0.1470761001110077,
      "learning_rate": 1.5893201900331439e-07,
      "loss": 0.012,
      "step": 3007020
    },
    {
      "epoch": 4.9210869124068,
      "grad_norm": 0.1060667335987091,
      "learning_rate": 1.5886612678979726e-07,
      "loss": 0.0123,
      "step": 3007040
    },
    {
      "epoch": 4.921119642845453,
      "grad_norm": 0.3361579477787018,
      "learning_rate": 1.5880023457628013e-07,
      "loss": 0.0081,
      "step": 3007060
    },
    {
      "epoch": 4.921152373284107,
      "grad_norm": 0.25300759077072144,
      "learning_rate": 1.58734342362763e-07,
      "loss": 0.0116,
      "step": 3007080
    },
    {
      "epoch": 4.92118510372276,
      "grad_norm": 0.514013946056366,
      "learning_rate": 1.5866845014924587e-07,
      "loss": 0.008,
      "step": 3007100
    },
    {
      "epoch": 4.921217834161413,
      "grad_norm": 0.11978234350681305,
      "learning_rate": 1.5860255793572875e-07,
      "loss": 0.0086,
      "step": 3007120
    },
    {
      "epoch": 4.921250564600067,
      "grad_norm": 0.07298175245523453,
      "learning_rate": 1.5853666572221162e-07,
      "loss": 0.0096,
      "step": 3007140
    },
    {
      "epoch": 4.92128329503872,
      "grad_norm": 0.518710196018219,
      "learning_rate": 1.5847077350869452e-07,
      "loss": 0.0102,
      "step": 3007160
    },
    {
      "epoch": 4.921316025477373,
      "grad_norm": 0.1846354901790619,
      "learning_rate": 1.584048812951774e-07,
      "loss": 0.0085,
      "step": 3007180
    },
    {
      "epoch": 4.921348755916027,
      "grad_norm": 0.2213342934846878,
      "learning_rate": 1.583389890816602e-07,
      "loss": 0.0069,
      "step": 3007200
    },
    {
      "epoch": 4.92138148635468,
      "grad_norm": 0.022235898301005363,
      "learning_rate": 1.5827309686814308e-07,
      "loss": 0.0074,
      "step": 3007220
    },
    {
      "epoch": 4.921414216793334,
      "grad_norm": 0.4220229685306549,
      "learning_rate": 1.5820720465462598e-07,
      "loss": 0.007,
      "step": 3007240
    },
    {
      "epoch": 4.921446947231987,
      "grad_norm": 0.21286018192768097,
      "learning_rate": 1.5814131244110885e-07,
      "loss": 0.0086,
      "step": 3007260
    },
    {
      "epoch": 4.92147967767064,
      "grad_norm": 0.14993451535701752,
      "learning_rate": 1.5807542022759172e-07,
      "loss": 0.0124,
      "step": 3007280
    },
    {
      "epoch": 4.921512408109294,
      "grad_norm": 0.030235683545470238,
      "learning_rate": 1.580095280140746e-07,
      "loss": 0.0078,
      "step": 3007300
    },
    {
      "epoch": 4.9215451385479465,
      "grad_norm": 0.11189303547143936,
      "learning_rate": 1.5794363580055747e-07,
      "loss": 0.0085,
      "step": 3007320
    },
    {
      "epoch": 4.9215778689866,
      "grad_norm": 0.0732499361038208,
      "learning_rate": 1.5787774358704034e-07,
      "loss": 0.0088,
      "step": 3007340
    },
    {
      "epoch": 4.921610599425254,
      "grad_norm": 0.16442649066448212,
      "learning_rate": 1.578118513735232e-07,
      "loss": 0.0078,
      "step": 3007360
    },
    {
      "epoch": 4.921643329863906,
      "grad_norm": 0.16465000808238983,
      "learning_rate": 1.5774595916000608e-07,
      "loss": 0.0116,
      "step": 3007380
    },
    {
      "epoch": 4.92167606030256,
      "grad_norm": 0.2227407991886139,
      "learning_rate": 1.5768006694648896e-07,
      "loss": 0.0066,
      "step": 3007400
    },
    {
      "epoch": 4.921708790741214,
      "grad_norm": 0.25091931223869324,
      "learning_rate": 1.576141747329718e-07,
      "loss": 0.008,
      "step": 3007420
    },
    {
      "epoch": 4.921741521179867,
      "grad_norm": 0.4633380174636841,
      "learning_rate": 1.5754828251945467e-07,
      "loss": 0.008,
      "step": 3007440
    },
    {
      "epoch": 4.92177425161852,
      "grad_norm": 0.6370421648025513,
      "learning_rate": 1.5748239030593755e-07,
      "loss": 0.0068,
      "step": 3007460
    },
    {
      "epoch": 4.9218069820571735,
      "grad_norm": 0.19979798793792725,
      "learning_rate": 1.5741649809242042e-07,
      "loss": 0.0085,
      "step": 3007480
    },
    {
      "epoch": 4.921839712495827,
      "grad_norm": 0.30831024050712585,
      "learning_rate": 1.573506058789033e-07,
      "loss": 0.008,
      "step": 3007500
    },
    {
      "epoch": 4.921872442934481,
      "grad_norm": 0.09062986820936203,
      "learning_rate": 1.5728471366538616e-07,
      "loss": 0.0135,
      "step": 3007520
    },
    {
      "epoch": 4.921905173373133,
      "grad_norm": 0.3416285812854767,
      "learning_rate": 1.5721882145186906e-07,
      "loss": 0.0083,
      "step": 3007540
    },
    {
      "epoch": 4.921937903811787,
      "grad_norm": 0.18633906543254852,
      "learning_rate": 1.5715292923835193e-07,
      "loss": 0.0078,
      "step": 3007560
    },
    {
      "epoch": 4.921970634250441,
      "grad_norm": 0.23815670609474182,
      "learning_rate": 1.570870370248348e-07,
      "loss": 0.0071,
      "step": 3007580
    },
    {
      "epoch": 4.922003364689093,
      "grad_norm": 0.4349905848503113,
      "learning_rate": 1.5702114481131768e-07,
      "loss": 0.0073,
      "step": 3007600
    },
    {
      "epoch": 4.922036095127747,
      "grad_norm": 0.09802058339118958,
      "learning_rate": 1.5695525259780055e-07,
      "loss": 0.0118,
      "step": 3007620
    },
    {
      "epoch": 4.9220688255664005,
      "grad_norm": 0.3425418436527252,
      "learning_rate": 1.568893603842834e-07,
      "loss": 0.0066,
      "step": 3007640
    },
    {
      "epoch": 4.922101556005053,
      "grad_norm": 0.21084775030612946,
      "learning_rate": 1.5682346817076627e-07,
      "loss": 0.0073,
      "step": 3007660
    },
    {
      "epoch": 4.922134286443707,
      "grad_norm": 0.12118595838546753,
      "learning_rate": 1.5675757595724914e-07,
      "loss": 0.006,
      "step": 3007680
    },
    {
      "epoch": 4.92216701688236,
      "grad_norm": 0.524675190448761,
      "learning_rate": 1.56691683743732e-07,
      "loss": 0.012,
      "step": 3007700
    },
    {
      "epoch": 4.922199747321014,
      "grad_norm": 0.06765157729387283,
      "learning_rate": 1.5662579153021488e-07,
      "loss": 0.0095,
      "step": 3007720
    },
    {
      "epoch": 4.922232477759667,
      "grad_norm": 0.5851996541023254,
      "learning_rate": 1.5655989931669776e-07,
      "loss": 0.0103,
      "step": 3007740
    },
    {
      "epoch": 4.92226520819832,
      "grad_norm": 0.4798332154750824,
      "learning_rate": 1.5649400710318063e-07,
      "loss": 0.0127,
      "step": 3007760
    },
    {
      "epoch": 4.922297938636974,
      "grad_norm": 0.4214032292366028,
      "learning_rate": 1.564281148896635e-07,
      "loss": 0.0108,
      "step": 3007780
    },
    {
      "epoch": 4.922330669075627,
      "grad_norm": 0.1631898283958435,
      "learning_rate": 1.5636222267614637e-07,
      "loss": 0.0067,
      "step": 3007800
    },
    {
      "epoch": 4.92236339951428,
      "grad_norm": 0.12048445641994476,
      "learning_rate": 1.5629633046262927e-07,
      "loss": 0.0075,
      "step": 3007820
    },
    {
      "epoch": 4.922396129952934,
      "grad_norm": 0.4250115752220154,
      "learning_rate": 1.5623043824911212e-07,
      "loss": 0.0166,
      "step": 3007840
    },
    {
      "epoch": 4.922428860391587,
      "grad_norm": 0.14591850340366364,
      "learning_rate": 1.56164546035595e-07,
      "loss": 0.0068,
      "step": 3007860
    },
    {
      "epoch": 4.92246159083024,
      "grad_norm": 0.23190121352672577,
      "learning_rate": 1.5609865382207786e-07,
      "loss": 0.0084,
      "step": 3007880
    },
    {
      "epoch": 4.922494321268894,
      "grad_norm": 0.1302529126405716,
      "learning_rate": 1.5603276160856073e-07,
      "loss": 0.0055,
      "step": 3007900
    },
    {
      "epoch": 4.922527051707547,
      "grad_norm": 0.13331635296344757,
      "learning_rate": 1.559668693950436e-07,
      "loss": 0.0108,
      "step": 3007920
    },
    {
      "epoch": 4.9225597821462,
      "grad_norm": 0.3812551498413086,
      "learning_rate": 1.5590097718152648e-07,
      "loss": 0.0094,
      "step": 3007940
    },
    {
      "epoch": 4.9225925125848535,
      "grad_norm": 0.1861514300107956,
      "learning_rate": 1.5583508496800935e-07,
      "loss": 0.0062,
      "step": 3007960
    },
    {
      "epoch": 4.922625243023507,
      "grad_norm": 0.24484466016292572,
      "learning_rate": 1.5576919275449222e-07,
      "loss": 0.0081,
      "step": 3007980
    },
    {
      "epoch": 4.922657973462161,
      "grad_norm": 0.2912002205848694,
      "learning_rate": 1.557033005409751e-07,
      "loss": 0.0065,
      "step": 3008000
    },
    {
      "epoch": 4.922690703900813,
      "grad_norm": 0.16401943564414978,
      "learning_rate": 1.5563740832745794e-07,
      "loss": 0.0077,
      "step": 3008020
    },
    {
      "epoch": 4.922723434339467,
      "grad_norm": 0.11037454009056091,
      "learning_rate": 1.5557151611394084e-07,
      "loss": 0.0096,
      "step": 3008040
    },
    {
      "epoch": 4.922756164778121,
      "grad_norm": 0.5038198232650757,
      "learning_rate": 1.555056239004237e-07,
      "loss": 0.0099,
      "step": 3008060
    },
    {
      "epoch": 4.922788895216773,
      "grad_norm": 0.16429948806762695,
      "learning_rate": 1.5543973168690658e-07,
      "loss": 0.0085,
      "step": 3008080
    },
    {
      "epoch": 4.922821625655427,
      "grad_norm": 0.41935598850250244,
      "learning_rate": 1.5537383947338945e-07,
      "loss": 0.0081,
      "step": 3008100
    },
    {
      "epoch": 4.9228543560940805,
      "grad_norm": 0.20888136327266693,
      "learning_rate": 1.5530794725987233e-07,
      "loss": 0.0151,
      "step": 3008120
    },
    {
      "epoch": 4.922887086532734,
      "grad_norm": 0.3841549754142761,
      "learning_rate": 1.5524205504635517e-07,
      "loss": 0.008,
      "step": 3008140
    },
    {
      "epoch": 4.922919816971387,
      "grad_norm": 0.12250446528196335,
      "learning_rate": 1.5517616283283804e-07,
      "loss": 0.0044,
      "step": 3008160
    },
    {
      "epoch": 4.92295254741004,
      "grad_norm": 0.2568581998348236,
      "learning_rate": 1.5511027061932091e-07,
      "loss": 0.0116,
      "step": 3008180
    },
    {
      "epoch": 4.922985277848694,
      "grad_norm": 0.25224992632865906,
      "learning_rate": 1.5504437840580381e-07,
      "loss": 0.0085,
      "step": 3008200
    },
    {
      "epoch": 4.923018008287347,
      "grad_norm": 0.5039229393005371,
      "learning_rate": 1.5497848619228669e-07,
      "loss": 0.0088,
      "step": 3008220
    },
    {
      "epoch": 4.923050738726,
      "grad_norm": 0.23865611851215363,
      "learning_rate": 1.5491259397876953e-07,
      "loss": 0.0154,
      "step": 3008240
    },
    {
      "epoch": 4.923083469164654,
      "grad_norm": 0.07887767255306244,
      "learning_rate": 1.548467017652524e-07,
      "loss": 0.0086,
      "step": 3008260
    },
    {
      "epoch": 4.923116199603307,
      "grad_norm": 0.09775952994823456,
      "learning_rate": 1.5478080955173528e-07,
      "loss": 0.0054,
      "step": 3008280
    },
    {
      "epoch": 4.92314893004196,
      "grad_norm": 0.04750275984406471,
      "learning_rate": 1.5471491733821815e-07,
      "loss": 0.0094,
      "step": 3008300
    },
    {
      "epoch": 4.923181660480614,
      "grad_norm": 0.21108566224575043,
      "learning_rate": 1.5464902512470102e-07,
      "loss": 0.0075,
      "step": 3008320
    },
    {
      "epoch": 4.923214390919267,
      "grad_norm": 0.10365397483110428,
      "learning_rate": 1.5458313291118392e-07,
      "loss": 0.0105,
      "step": 3008340
    },
    {
      "epoch": 4.92324712135792,
      "grad_norm": 0.15397073328495026,
      "learning_rate": 1.5451724069766676e-07,
      "loss": 0.0065,
      "step": 3008360
    },
    {
      "epoch": 4.923279851796574,
      "grad_norm": 0.20216920971870422,
      "learning_rate": 1.5445134848414964e-07,
      "loss": 0.0059,
      "step": 3008380
    },
    {
      "epoch": 4.923312582235227,
      "grad_norm": 0.1616630256175995,
      "learning_rate": 1.543854562706325e-07,
      "loss": 0.0058,
      "step": 3008400
    },
    {
      "epoch": 4.923345312673881,
      "grad_norm": 0.22018298506736755,
      "learning_rate": 1.5431956405711538e-07,
      "loss": 0.0086,
      "step": 3008420
    },
    {
      "epoch": 4.923378043112534,
      "grad_norm": 0.12289177626371384,
      "learning_rate": 1.5425367184359825e-07,
      "loss": 0.0089,
      "step": 3008440
    },
    {
      "epoch": 4.923410773551187,
      "grad_norm": 0.06334380060434341,
      "learning_rate": 1.5418777963008112e-07,
      "loss": 0.0075,
      "step": 3008460
    },
    {
      "epoch": 4.923443503989841,
      "grad_norm": 0.037290748208761215,
      "learning_rate": 1.54121887416564e-07,
      "loss": 0.0083,
      "step": 3008480
    },
    {
      "epoch": 4.9234762344284935,
      "grad_norm": 0.15656624734401703,
      "learning_rate": 1.5405599520304687e-07,
      "loss": 0.0091,
      "step": 3008500
    },
    {
      "epoch": 4.923508964867147,
      "grad_norm": 0.22223056852817535,
      "learning_rate": 1.5399010298952974e-07,
      "loss": 0.0081,
      "step": 3008520
    },
    {
      "epoch": 4.923541695305801,
      "grad_norm": 0.11285948008298874,
      "learning_rate": 1.539242107760126e-07,
      "loss": 0.0059,
      "step": 3008540
    },
    {
      "epoch": 4.923574425744453,
      "grad_norm": 0.14075520634651184,
      "learning_rate": 1.5385831856249548e-07,
      "loss": 0.0128,
      "step": 3008560
    },
    {
      "epoch": 4.923607156183107,
      "grad_norm": 0.1257801204919815,
      "learning_rate": 1.5379242634897836e-07,
      "loss": 0.008,
      "step": 3008580
    },
    {
      "epoch": 4.9236398866217606,
      "grad_norm": 0.4880807399749756,
      "learning_rate": 1.5372653413546123e-07,
      "loss": 0.0074,
      "step": 3008600
    },
    {
      "epoch": 4.923672617060414,
      "grad_norm": 0.09949826449155807,
      "learning_rate": 1.536606419219441e-07,
      "loss": 0.0051,
      "step": 3008620
    },
    {
      "epoch": 4.923705347499067,
      "grad_norm": 0.32185137271881104,
      "learning_rate": 1.5359474970842697e-07,
      "loss": 0.0088,
      "step": 3008640
    },
    {
      "epoch": 4.9237380779377204,
      "grad_norm": 0.35682642459869385,
      "learning_rate": 1.5352885749490985e-07,
      "loss": 0.0112,
      "step": 3008660
    },
    {
      "epoch": 4.923770808376374,
      "grad_norm": 0.14389120042324066,
      "learning_rate": 1.534629652813927e-07,
      "loss": 0.0052,
      "step": 3008680
    },
    {
      "epoch": 4.923803538815028,
      "grad_norm": 0.13746361434459686,
      "learning_rate": 1.533970730678756e-07,
      "loss": 0.0072,
      "step": 3008700
    },
    {
      "epoch": 4.92383626925368,
      "grad_norm": 0.30973517894744873,
      "learning_rate": 1.5333118085435846e-07,
      "loss": 0.0137,
      "step": 3008720
    },
    {
      "epoch": 4.923868999692334,
      "grad_norm": 0.10151772946119308,
      "learning_rate": 1.5326528864084133e-07,
      "loss": 0.0095,
      "step": 3008740
    },
    {
      "epoch": 4.9239017301309875,
      "grad_norm": 0.13274569809436798,
      "learning_rate": 1.531993964273242e-07,
      "loss": 0.0078,
      "step": 3008760
    },
    {
      "epoch": 4.92393446056964,
      "grad_norm": 0.10365954786539078,
      "learning_rate": 1.5313350421380708e-07,
      "loss": 0.0068,
      "step": 3008780
    },
    {
      "epoch": 4.923967191008294,
      "grad_norm": 0.0671040341258049,
      "learning_rate": 1.5306761200028992e-07,
      "loss": 0.0109,
      "step": 3008800
    },
    {
      "epoch": 4.923999921446947,
      "grad_norm": 0.10015817731618881,
      "learning_rate": 1.530017197867728e-07,
      "loss": 0.0083,
      "step": 3008820
    },
    {
      "epoch": 4.9240326518856,
      "grad_norm": 0.3096180558204651,
      "learning_rate": 1.529358275732557e-07,
      "loss": 0.0076,
      "step": 3008840
    },
    {
      "epoch": 4.924065382324254,
      "grad_norm": 0.07742032408714294,
      "learning_rate": 1.5286993535973857e-07,
      "loss": 0.0087,
      "step": 3008860
    },
    {
      "epoch": 4.924098112762907,
      "grad_norm": 0.328991562128067,
      "learning_rate": 1.5280404314622144e-07,
      "loss": 0.012,
      "step": 3008880
    },
    {
      "epoch": 4.924130843201561,
      "grad_norm": 0.08899883925914764,
      "learning_rate": 1.5273815093270428e-07,
      "loss": 0.0066,
      "step": 3008900
    },
    {
      "epoch": 4.924163573640214,
      "grad_norm": 0.11526565253734589,
      "learning_rate": 1.5267225871918716e-07,
      "loss": 0.0075,
      "step": 3008920
    },
    {
      "epoch": 4.924196304078867,
      "grad_norm": 0.4691227078437805,
      "learning_rate": 1.5260636650567003e-07,
      "loss": 0.006,
      "step": 3008940
    },
    {
      "epoch": 4.924229034517521,
      "grad_norm": 0.392519474029541,
      "learning_rate": 1.525404742921529e-07,
      "loss": 0.0104,
      "step": 3008960
    },
    {
      "epoch": 4.924261764956174,
      "grad_norm": 0.1479579210281372,
      "learning_rate": 1.5247458207863577e-07,
      "loss": 0.01,
      "step": 3008980
    },
    {
      "epoch": 4.924294495394827,
      "grad_norm": 0.4094185531139374,
      "learning_rate": 1.5240868986511867e-07,
      "loss": 0.014,
      "step": 3009000
    },
    {
      "epoch": 4.924327225833481,
      "grad_norm": 0.10894495248794556,
      "learning_rate": 1.5234279765160152e-07,
      "loss": 0.0062,
      "step": 3009020
    },
    {
      "epoch": 4.924359956272134,
      "grad_norm": 0.29267042875289917,
      "learning_rate": 1.522769054380844e-07,
      "loss": 0.0068,
      "step": 3009040
    },
    {
      "epoch": 4.924392686710787,
      "grad_norm": 0.11196155101060867,
      "learning_rate": 1.5221101322456726e-07,
      "loss": 0.0106,
      "step": 3009060
    },
    {
      "epoch": 4.924425417149441,
      "grad_norm": 0.1836215853691101,
      "learning_rate": 1.5214512101105013e-07,
      "loss": 0.015,
      "step": 3009080
    },
    {
      "epoch": 4.924458147588094,
      "grad_norm": 0.13829626142978668,
      "learning_rate": 1.52079228797533e-07,
      "loss": 0.0079,
      "step": 3009100
    },
    {
      "epoch": 4.924490878026747,
      "grad_norm": 0.7023220062255859,
      "learning_rate": 1.5201333658401588e-07,
      "loss": 0.0081,
      "step": 3009120
    },
    {
      "epoch": 4.9245236084654005,
      "grad_norm": 0.2764721214771271,
      "learning_rate": 1.5194744437049875e-07,
      "loss": 0.0082,
      "step": 3009140
    },
    {
      "epoch": 4.924556338904054,
      "grad_norm": 0.1604871302843094,
      "learning_rate": 1.5188155215698162e-07,
      "loss": 0.0053,
      "step": 3009160
    },
    {
      "epoch": 4.924589069342708,
      "grad_norm": 0.28684699535369873,
      "learning_rate": 1.518156599434645e-07,
      "loss": 0.0057,
      "step": 3009180
    },
    {
      "epoch": 4.92462179978136,
      "grad_norm": 0.06945961713790894,
      "learning_rate": 1.5174976772994737e-07,
      "loss": 0.0107,
      "step": 3009200
    },
    {
      "epoch": 4.924654530220014,
      "grad_norm": 0.27653294801712036,
      "learning_rate": 1.5168387551643024e-07,
      "loss": 0.0077,
      "step": 3009220
    },
    {
      "epoch": 4.924687260658668,
      "grad_norm": 0.22456811368465424,
      "learning_rate": 1.516179833029131e-07,
      "loss": 0.0095,
      "step": 3009240
    },
    {
      "epoch": 4.924719991097321,
      "grad_norm": 0.29944899678230286,
      "learning_rate": 1.5155209108939598e-07,
      "loss": 0.0088,
      "step": 3009260
    },
    {
      "epoch": 4.924752721535974,
      "grad_norm": 0.26917731761932373,
      "learning_rate": 1.5148619887587885e-07,
      "loss": 0.0064,
      "step": 3009280
    },
    {
      "epoch": 4.9247854519746275,
      "grad_norm": 0.30578747391700745,
      "learning_rate": 1.5142030666236173e-07,
      "loss": 0.0062,
      "step": 3009300
    },
    {
      "epoch": 4.924818182413281,
      "grad_norm": 0.3108123242855072,
      "learning_rate": 1.513544144488446e-07,
      "loss": 0.0055,
      "step": 3009320
    },
    {
      "epoch": 4.924850912851934,
      "grad_norm": 0.13179561495780945,
      "learning_rate": 1.5128852223532744e-07,
      "loss": 0.0122,
      "step": 3009340
    },
    {
      "epoch": 4.924883643290587,
      "grad_norm": 0.06305216997861862,
      "learning_rate": 1.5122263002181034e-07,
      "loss": 0.0091,
      "step": 3009360
    },
    {
      "epoch": 4.924916373729241,
      "grad_norm": 0.14383763074874878,
      "learning_rate": 1.5115673780829321e-07,
      "loss": 0.0085,
      "step": 3009380
    },
    {
      "epoch": 4.924949104167894,
      "grad_norm": 0.27285057306289673,
      "learning_rate": 1.5109084559477609e-07,
      "loss": 0.0084,
      "step": 3009400
    },
    {
      "epoch": 4.924981834606547,
      "grad_norm": 0.23588281869888306,
      "learning_rate": 1.5102495338125896e-07,
      "loss": 0.0092,
      "step": 3009420
    },
    {
      "epoch": 4.925014565045201,
      "grad_norm": 0.17450697720050812,
      "learning_rate": 1.5095906116774183e-07,
      "loss": 0.0138,
      "step": 3009440
    },
    {
      "epoch": 4.9250472954838544,
      "grad_norm": 0.077414870262146,
      "learning_rate": 1.5089316895422468e-07,
      "loss": 0.01,
      "step": 3009460
    },
    {
      "epoch": 4.925080025922507,
      "grad_norm": 0.2671974003314972,
      "learning_rate": 1.5082727674070755e-07,
      "loss": 0.0094,
      "step": 3009480
    },
    {
      "epoch": 4.925112756361161,
      "grad_norm": 0.29399728775024414,
      "learning_rate": 1.5076138452719045e-07,
      "loss": 0.0167,
      "step": 3009500
    },
    {
      "epoch": 4.925145486799814,
      "grad_norm": 0.18495619297027588,
      "learning_rate": 1.5069549231367332e-07,
      "loss": 0.0067,
      "step": 3009520
    },
    {
      "epoch": 4.925178217238467,
      "grad_norm": 0.3969683349132538,
      "learning_rate": 1.506296001001562e-07,
      "loss": 0.0081,
      "step": 3009540
    },
    {
      "epoch": 4.925210947677121,
      "grad_norm": 0.15994079411029816,
      "learning_rate": 1.5056370788663904e-07,
      "loss": 0.0081,
      "step": 3009560
    },
    {
      "epoch": 4.925243678115774,
      "grad_norm": 0.14567731320858002,
      "learning_rate": 1.504978156731219e-07,
      "loss": 0.0085,
      "step": 3009580
    },
    {
      "epoch": 4.925276408554428,
      "grad_norm": 0.10714764147996902,
      "learning_rate": 1.5043192345960478e-07,
      "loss": 0.0071,
      "step": 3009600
    },
    {
      "epoch": 4.9253091389930805,
      "grad_norm": 0.18625716865062714,
      "learning_rate": 1.5036603124608765e-07,
      "loss": 0.0107,
      "step": 3009620
    },
    {
      "epoch": 4.925341869431734,
      "grad_norm": 0.28514185547828674,
      "learning_rate": 1.5030013903257052e-07,
      "loss": 0.0104,
      "step": 3009640
    },
    {
      "epoch": 4.925374599870388,
      "grad_norm": 0.11916164308786392,
      "learning_rate": 1.5023424681905342e-07,
      "loss": 0.0096,
      "step": 3009660
    },
    {
      "epoch": 4.92540733030904,
      "grad_norm": 0.2584425210952759,
      "learning_rate": 1.5016835460553627e-07,
      "loss": 0.0096,
      "step": 3009680
    },
    {
      "epoch": 4.925440060747694,
      "grad_norm": 0.19507406651973724,
      "learning_rate": 1.5010246239201914e-07,
      "loss": 0.0082,
      "step": 3009700
    },
    {
      "epoch": 4.925472791186348,
      "grad_norm": 0.24949786067008972,
      "learning_rate": 1.5003657017850201e-07,
      "loss": 0.0071,
      "step": 3009720
    },
    {
      "epoch": 4.925505521625,
      "grad_norm": 0.15938417613506317,
      "learning_rate": 1.4997067796498489e-07,
      "loss": 0.0129,
      "step": 3009740
    },
    {
      "epoch": 4.925538252063654,
      "grad_norm": 0.25551289319992065,
      "learning_rate": 1.4990478575146776e-07,
      "loss": 0.0067,
      "step": 3009760
    },
    {
      "epoch": 4.9255709825023075,
      "grad_norm": 0.1521642804145813,
      "learning_rate": 1.4983889353795063e-07,
      "loss": 0.0083,
      "step": 3009780
    },
    {
      "epoch": 4.925603712940961,
      "grad_norm": 0.3777952492237091,
      "learning_rate": 1.497730013244335e-07,
      "loss": 0.0108,
      "step": 3009800
    },
    {
      "epoch": 4.925636443379614,
      "grad_norm": 0.4644712805747986,
      "learning_rate": 1.4970710911091637e-07,
      "loss": 0.0107,
      "step": 3009820
    },
    {
      "epoch": 4.925669173818267,
      "grad_norm": 0.5433611869812012,
      "learning_rate": 1.4964121689739925e-07,
      "loss": 0.0124,
      "step": 3009840
    },
    {
      "epoch": 4.925701904256921,
      "grad_norm": 0.14909037947654724,
      "learning_rate": 1.4957532468388212e-07,
      "loss": 0.0096,
      "step": 3009860
    },
    {
      "epoch": 4.925734634695575,
      "grad_norm": 0.08327383548021317,
      "learning_rate": 1.49509432470365e-07,
      "loss": 0.0115,
      "step": 3009880
    },
    {
      "epoch": 4.925767365134227,
      "grad_norm": 0.0999077707529068,
      "learning_rate": 1.4944354025684786e-07,
      "loss": 0.0094,
      "step": 3009900
    },
    {
      "epoch": 4.925800095572881,
      "grad_norm": 0.3881290853023529,
      "learning_rate": 1.4937764804333073e-07,
      "loss": 0.008,
      "step": 3009920
    },
    {
      "epoch": 4.9258328260115345,
      "grad_norm": 0.24567370116710663,
      "learning_rate": 1.493117558298136e-07,
      "loss": 0.014,
      "step": 3009940
    },
    {
      "epoch": 4.925865556450187,
      "grad_norm": 0.09713827073574066,
      "learning_rate": 1.4924586361629648e-07,
      "loss": 0.0101,
      "step": 3009960
    },
    {
      "epoch": 4.925898286888841,
      "grad_norm": 0.2821720540523529,
      "learning_rate": 1.4917997140277935e-07,
      "loss": 0.0123,
      "step": 3009980
    },
    {
      "epoch": 4.925931017327494,
      "grad_norm": 0.18105681240558624,
      "learning_rate": 1.491140791892622e-07,
      "loss": 0.0074,
      "step": 3010000
    },
    {
      "epoch": 4.925963747766147,
      "grad_norm": 0.42302921414375305,
      "learning_rate": 1.490481869757451e-07,
      "loss": 0.0105,
      "step": 3010020
    },
    {
      "epoch": 4.925996478204801,
      "grad_norm": 0.17532604932785034,
      "learning_rate": 1.4898229476222797e-07,
      "loss": 0.0066,
      "step": 3010040
    },
    {
      "epoch": 4.926029208643454,
      "grad_norm": 0.16245914995670319,
      "learning_rate": 1.4891640254871084e-07,
      "loss": 0.0103,
      "step": 3010060
    },
    {
      "epoch": 4.926061939082108,
      "grad_norm": 0.0804172083735466,
      "learning_rate": 1.488505103351937e-07,
      "loss": 0.0077,
      "step": 3010080
    },
    {
      "epoch": 4.926094669520761,
      "grad_norm": 0.18611325323581696,
      "learning_rate": 1.4878461812167658e-07,
      "loss": 0.0085,
      "step": 3010100
    },
    {
      "epoch": 4.926127399959414,
      "grad_norm": 0.1257610321044922,
      "learning_rate": 1.4871872590815943e-07,
      "loss": 0.0069,
      "step": 3010120
    },
    {
      "epoch": 4.926160130398068,
      "grad_norm": 0.7188437581062317,
      "learning_rate": 1.486528336946423e-07,
      "loss": 0.008,
      "step": 3010140
    },
    {
      "epoch": 4.926192860836721,
      "grad_norm": 0.16773393750190735,
      "learning_rate": 1.485869414811252e-07,
      "loss": 0.0108,
      "step": 3010160
    },
    {
      "epoch": 4.926225591275374,
      "grad_norm": 0.0882725790143013,
      "learning_rate": 1.4852104926760807e-07,
      "loss": 0.009,
      "step": 3010180
    },
    {
      "epoch": 4.926258321714028,
      "grad_norm": 0.24204985797405243,
      "learning_rate": 1.4845515705409094e-07,
      "loss": 0.0072,
      "step": 3010200
    },
    {
      "epoch": 4.926291052152681,
      "grad_norm": 0.26111412048339844,
      "learning_rate": 1.4838926484057382e-07,
      "loss": 0.0049,
      "step": 3010220
    },
    {
      "epoch": 4.926323782591334,
      "grad_norm": 0.4057582914829254,
      "learning_rate": 1.4832337262705666e-07,
      "loss": 0.009,
      "step": 3010240
    },
    {
      "epoch": 4.926356513029988,
      "grad_norm": 0.2201676219701767,
      "learning_rate": 1.4825748041353953e-07,
      "loss": 0.0091,
      "step": 3010260
    },
    {
      "epoch": 4.926389243468641,
      "grad_norm": 0.5155790448188782,
      "learning_rate": 1.481915882000224e-07,
      "loss": 0.0089,
      "step": 3010280
    },
    {
      "epoch": 4.926421973907294,
      "grad_norm": 0.3111552298069,
      "learning_rate": 1.4812569598650528e-07,
      "loss": 0.0093,
      "step": 3010300
    },
    {
      "epoch": 4.9264547043459475,
      "grad_norm": 0.5562880635261536,
      "learning_rate": 1.4805980377298818e-07,
      "loss": 0.0101,
      "step": 3010320
    },
    {
      "epoch": 4.926487434784601,
      "grad_norm": 0.19283835589885712,
      "learning_rate": 1.4799391155947102e-07,
      "loss": 0.0118,
      "step": 3010340
    },
    {
      "epoch": 4.926520165223255,
      "grad_norm": 0.1247137263417244,
      "learning_rate": 1.479280193459539e-07,
      "loss": 0.0055,
      "step": 3010360
    },
    {
      "epoch": 4.926552895661907,
      "grad_norm": 0.5767096877098083,
      "learning_rate": 1.4786212713243677e-07,
      "loss": 0.0077,
      "step": 3010380
    },
    {
      "epoch": 4.926585626100561,
      "grad_norm": 0.2422497570514679,
      "learning_rate": 1.4779623491891964e-07,
      "loss": 0.0078,
      "step": 3010400
    },
    {
      "epoch": 4.9266183565392145,
      "grad_norm": 0.19380322098731995,
      "learning_rate": 1.477303427054025e-07,
      "loss": 0.0116,
      "step": 3010420
    },
    {
      "epoch": 4.926651086977868,
      "grad_norm": 0.20360539853572845,
      "learning_rate": 1.4766445049188538e-07,
      "loss": 0.0077,
      "step": 3010440
    },
    {
      "epoch": 4.926683817416521,
      "grad_norm": 0.20783250033855438,
      "learning_rate": 1.4759855827836825e-07,
      "loss": 0.0118,
      "step": 3010460
    },
    {
      "epoch": 4.926716547855174,
      "grad_norm": 0.09833772480487823,
      "learning_rate": 1.4753266606485113e-07,
      "loss": 0.0075,
      "step": 3010480
    },
    {
      "epoch": 4.926749278293828,
      "grad_norm": 0.4040161967277527,
      "learning_rate": 1.47466773851334e-07,
      "loss": 0.0111,
      "step": 3010500
    },
    {
      "epoch": 4.926782008732481,
      "grad_norm": 0.15229906141757965,
      "learning_rate": 1.4740088163781687e-07,
      "loss": 0.0101,
      "step": 3010520
    },
    {
      "epoch": 4.926814739171134,
      "grad_norm": 0.13600191473960876,
      "learning_rate": 1.4733498942429974e-07,
      "loss": 0.0088,
      "step": 3010540
    },
    {
      "epoch": 4.926847469609788,
      "grad_norm": 0.11342618614435196,
      "learning_rate": 1.4726909721078261e-07,
      "loss": 0.0097,
      "step": 3010560
    },
    {
      "epoch": 4.926880200048441,
      "grad_norm": 0.15639129281044006,
      "learning_rate": 1.472032049972655e-07,
      "loss": 0.009,
      "step": 3010580
    },
    {
      "epoch": 4.926912930487094,
      "grad_norm": 0.6161435842514038,
      "learning_rate": 1.4713731278374836e-07,
      "loss": 0.0087,
      "step": 3010600
    },
    {
      "epoch": 4.926945660925748,
      "grad_norm": 0.47678902745246887,
      "learning_rate": 1.4707142057023123e-07,
      "loss": 0.0175,
      "step": 3010620
    },
    {
      "epoch": 4.926978391364401,
      "grad_norm": 0.36831262707710266,
      "learning_rate": 1.470055283567141e-07,
      "loss": 0.0097,
      "step": 3010640
    },
    {
      "epoch": 4.927011121803054,
      "grad_norm": 0.15253205597400665,
      "learning_rate": 1.4693963614319698e-07,
      "loss": 0.0067,
      "step": 3010660
    },
    {
      "epoch": 4.927043852241708,
      "grad_norm": 0.20169436931610107,
      "learning_rate": 1.4687374392967985e-07,
      "loss": 0.0099,
      "step": 3010680
    },
    {
      "epoch": 4.927076582680361,
      "grad_norm": 0.2253054529428482,
      "learning_rate": 1.4680785171616272e-07,
      "loss": 0.0091,
      "step": 3010700
    },
    {
      "epoch": 4.927109313119015,
      "grad_norm": 0.346110463142395,
      "learning_rate": 1.467419595026456e-07,
      "loss": 0.0073,
      "step": 3010720
    },
    {
      "epoch": 4.927142043557668,
      "grad_norm": 0.5463959574699402,
      "learning_rate": 1.4667606728912846e-07,
      "loss": 0.0089,
      "step": 3010740
    },
    {
      "epoch": 4.927174773996321,
      "grad_norm": 0.3376013934612274,
      "learning_rate": 1.4661017507561134e-07,
      "loss": 0.0067,
      "step": 3010760
    },
    {
      "epoch": 4.927207504434975,
      "grad_norm": 0.44345396757125854,
      "learning_rate": 1.4654428286209418e-07,
      "loss": 0.0073,
      "step": 3010780
    },
    {
      "epoch": 4.9272402348736275,
      "grad_norm": 0.1304401308298111,
      "learning_rate": 1.4647839064857705e-07,
      "loss": 0.0122,
      "step": 3010800
    },
    {
      "epoch": 4.927272965312281,
      "grad_norm": 0.17196252942085266,
      "learning_rate": 1.4641249843505995e-07,
      "loss": 0.0095,
      "step": 3010820
    },
    {
      "epoch": 4.927305695750935,
      "grad_norm": 0.09214027225971222,
      "learning_rate": 1.4634660622154282e-07,
      "loss": 0.0108,
      "step": 3010840
    },
    {
      "epoch": 4.927338426189587,
      "grad_norm": 0.29300662875175476,
      "learning_rate": 1.462807140080257e-07,
      "loss": 0.0114,
      "step": 3010860
    },
    {
      "epoch": 4.927371156628241,
      "grad_norm": 0.1342736929655075,
      "learning_rate": 1.4621482179450857e-07,
      "loss": 0.0112,
      "step": 3010880
    },
    {
      "epoch": 4.927403887066895,
      "grad_norm": 0.16032367944717407,
      "learning_rate": 1.4614892958099141e-07,
      "loss": 0.0098,
      "step": 3010900
    },
    {
      "epoch": 4.927436617505548,
      "grad_norm": 0.4033379554748535,
      "learning_rate": 1.4608303736747429e-07,
      "loss": 0.0115,
      "step": 3010920
    },
    {
      "epoch": 4.927469347944201,
      "grad_norm": 0.10804273188114166,
      "learning_rate": 1.4601714515395716e-07,
      "loss": 0.0148,
      "step": 3010940
    },
    {
      "epoch": 4.9275020783828545,
      "grad_norm": 0.16022180020809174,
      "learning_rate": 1.4595125294044003e-07,
      "loss": 0.0105,
      "step": 3010960
    },
    {
      "epoch": 4.927534808821508,
      "grad_norm": 0.20001329481601715,
      "learning_rate": 1.4588536072692293e-07,
      "loss": 0.0086,
      "step": 3010980
    },
    {
      "epoch": 4.927567539260161,
      "grad_norm": 0.20669373869895935,
      "learning_rate": 1.4581946851340577e-07,
      "loss": 0.0098,
      "step": 3011000
    },
    {
      "epoch": 4.927600269698814,
      "grad_norm": 0.18053460121154785,
      "learning_rate": 1.4575357629988865e-07,
      "loss": 0.009,
      "step": 3011020
    },
    {
      "epoch": 4.927633000137468,
      "grad_norm": 0.4962546229362488,
      "learning_rate": 1.4568768408637152e-07,
      "loss": 0.0094,
      "step": 3011040
    },
    {
      "epoch": 4.927665730576122,
      "grad_norm": 0.16259713470935822,
      "learning_rate": 1.456217918728544e-07,
      "loss": 0.0087,
      "step": 3011060
    },
    {
      "epoch": 4.927698461014774,
      "grad_norm": 0.4838978350162506,
      "learning_rate": 1.4555589965933726e-07,
      "loss": 0.0125,
      "step": 3011080
    },
    {
      "epoch": 4.927731191453428,
      "grad_norm": 0.33233559131622314,
      "learning_rate": 1.4549000744582013e-07,
      "loss": 0.0089,
      "step": 3011100
    },
    {
      "epoch": 4.9277639218920815,
      "grad_norm": 0.23448161780834198,
      "learning_rate": 1.45424115232303e-07,
      "loss": 0.0111,
      "step": 3011120
    },
    {
      "epoch": 4.927796652330734,
      "grad_norm": 0.29478785395622253,
      "learning_rate": 1.4535822301878588e-07,
      "loss": 0.0073,
      "step": 3011140
    },
    {
      "epoch": 4.927829382769388,
      "grad_norm": 0.3245968222618103,
      "learning_rate": 1.4529233080526875e-07,
      "loss": 0.0067,
      "step": 3011160
    },
    {
      "epoch": 4.927862113208041,
      "grad_norm": 0.31060269474983215,
      "learning_rate": 1.4522643859175162e-07,
      "loss": 0.0079,
      "step": 3011180
    },
    {
      "epoch": 4.927894843646694,
      "grad_norm": 0.1292445808649063,
      "learning_rate": 1.451605463782345e-07,
      "loss": 0.0063,
      "step": 3011200
    },
    {
      "epoch": 4.927927574085348,
      "grad_norm": 0.28910139203071594,
      "learning_rate": 1.4509465416471737e-07,
      "loss": 0.0079,
      "step": 3011220
    },
    {
      "epoch": 4.927960304524001,
      "grad_norm": 0.8678796887397766,
      "learning_rate": 1.4502876195120024e-07,
      "loss": 0.0072,
      "step": 3011240
    },
    {
      "epoch": 4.927993034962655,
      "grad_norm": 0.15338994562625885,
      "learning_rate": 1.449628697376831e-07,
      "loss": 0.0096,
      "step": 3011260
    },
    {
      "epoch": 4.9280257654013075,
      "grad_norm": 0.31183844804763794,
      "learning_rate": 1.4489697752416598e-07,
      "loss": 0.0148,
      "step": 3011280
    },
    {
      "epoch": 4.928058495839961,
      "grad_norm": 0.11689455062150955,
      "learning_rate": 1.4483108531064886e-07,
      "loss": 0.0104,
      "step": 3011300
    },
    {
      "epoch": 4.928091226278615,
      "grad_norm": 0.39097660779953003,
      "learning_rate": 1.4476519309713173e-07,
      "loss": 0.0074,
      "step": 3011320
    },
    {
      "epoch": 4.928123956717268,
      "grad_norm": 0.06004250794649124,
      "learning_rate": 1.446993008836146e-07,
      "loss": 0.0088,
      "step": 3011340
    },
    {
      "epoch": 4.928156687155921,
      "grad_norm": 0.26209452748298645,
      "learning_rate": 1.4463340867009747e-07,
      "loss": 0.0057,
      "step": 3011360
    },
    {
      "epoch": 4.928189417594575,
      "grad_norm": 0.08279021084308624,
      "learning_rate": 1.4456751645658034e-07,
      "loss": 0.0063,
      "step": 3011380
    },
    {
      "epoch": 4.928222148033228,
      "grad_norm": 0.3522491455078125,
      "learning_rate": 1.4450162424306322e-07,
      "loss": 0.0088,
      "step": 3011400
    },
    {
      "epoch": 4.928254878471881,
      "grad_norm": 0.18751190602779388,
      "learning_rate": 1.444357320295461e-07,
      "loss": 0.0099,
      "step": 3011420
    },
    {
      "epoch": 4.9282876089105345,
      "grad_norm": 0.17820651829242706,
      "learning_rate": 1.4436983981602893e-07,
      "loss": 0.0083,
      "step": 3011440
    },
    {
      "epoch": 4.928320339349188,
      "grad_norm": 0.29519718885421753,
      "learning_rate": 1.443039476025118e-07,
      "loss": 0.0082,
      "step": 3011460
    },
    {
      "epoch": 4.928353069787841,
      "grad_norm": 0.310871422290802,
      "learning_rate": 1.442380553889947e-07,
      "loss": 0.0087,
      "step": 3011480
    },
    {
      "epoch": 4.928385800226494,
      "grad_norm": 0.10269417613744736,
      "learning_rate": 1.4417216317547758e-07,
      "loss": 0.0096,
      "step": 3011500
    },
    {
      "epoch": 4.928418530665148,
      "grad_norm": 0.15392407774925232,
      "learning_rate": 1.4410627096196045e-07,
      "loss": 0.0093,
      "step": 3011520
    },
    {
      "epoch": 4.928451261103802,
      "grad_norm": 0.22645263373851776,
      "learning_rate": 1.4404037874844332e-07,
      "loss": 0.0112,
      "step": 3011540
    },
    {
      "epoch": 4.928483991542454,
      "grad_norm": 0.22589252889156342,
      "learning_rate": 1.4397448653492617e-07,
      "loss": 0.0079,
      "step": 3011560
    },
    {
      "epoch": 4.928516721981108,
      "grad_norm": 0.22049793601036072,
      "learning_rate": 1.4390859432140904e-07,
      "loss": 0.0118,
      "step": 3011580
    },
    {
      "epoch": 4.9285494524197615,
      "grad_norm": 0.22214704751968384,
      "learning_rate": 1.438427021078919e-07,
      "loss": 0.0066,
      "step": 3011600
    },
    {
      "epoch": 4.928582182858415,
      "grad_norm": 0.11217854171991348,
      "learning_rate": 1.437768098943748e-07,
      "loss": 0.0069,
      "step": 3011620
    },
    {
      "epoch": 4.928614913297068,
      "grad_norm": 0.2707735300064087,
      "learning_rate": 1.4371091768085768e-07,
      "loss": 0.0093,
      "step": 3011640
    },
    {
      "epoch": 4.928647643735721,
      "grad_norm": 0.07889346033334732,
      "learning_rate": 1.4364502546734053e-07,
      "loss": 0.0089,
      "step": 3011660
    },
    {
      "epoch": 4.928680374174375,
      "grad_norm": 0.4276917576789856,
      "learning_rate": 1.435791332538234e-07,
      "loss": 0.0112,
      "step": 3011680
    },
    {
      "epoch": 4.928713104613028,
      "grad_norm": 0.41183799505233765,
      "learning_rate": 1.4351324104030627e-07,
      "loss": 0.0107,
      "step": 3011700
    },
    {
      "epoch": 4.928745835051681,
      "grad_norm": 0.0778965950012207,
      "learning_rate": 1.4344734882678914e-07,
      "loss": 0.0072,
      "step": 3011720
    },
    {
      "epoch": 4.928778565490335,
      "grad_norm": 0.13748233020305634,
      "learning_rate": 1.4338145661327202e-07,
      "loss": 0.0102,
      "step": 3011740
    },
    {
      "epoch": 4.928811295928988,
      "grad_norm": 0.26014554500579834,
      "learning_rate": 1.433155643997549e-07,
      "loss": 0.0077,
      "step": 3011760
    },
    {
      "epoch": 4.928844026367641,
      "grad_norm": 0.1307421773672104,
      "learning_rate": 1.4324967218623776e-07,
      "loss": 0.0087,
      "step": 3011780
    },
    {
      "epoch": 4.928876756806295,
      "grad_norm": 0.272845596075058,
      "learning_rate": 1.4318377997272063e-07,
      "loss": 0.0085,
      "step": 3011800
    },
    {
      "epoch": 4.928909487244948,
      "grad_norm": 0.06911775469779968,
      "learning_rate": 1.431178877592035e-07,
      "loss": 0.0143,
      "step": 3011820
    },
    {
      "epoch": 4.928942217683601,
      "grad_norm": 0.4383230209350586,
      "learning_rate": 1.4305199554568638e-07,
      "loss": 0.008,
      "step": 3011840
    },
    {
      "epoch": 4.928974948122255,
      "grad_norm": 0.15177829563617706,
      "learning_rate": 1.4298610333216925e-07,
      "loss": 0.011,
      "step": 3011860
    },
    {
      "epoch": 4.929007678560908,
      "grad_norm": 0.17073413729667664,
      "learning_rate": 1.4292021111865212e-07,
      "loss": 0.0052,
      "step": 3011880
    },
    {
      "epoch": 4.929040408999562,
      "grad_norm": 0.3766842782497406,
      "learning_rate": 1.42854318905135e-07,
      "loss": 0.0077,
      "step": 3011900
    },
    {
      "epoch": 4.929073139438215,
      "grad_norm": 0.36133500933647156,
      "learning_rate": 1.4278842669161786e-07,
      "loss": 0.0142,
      "step": 3011920
    },
    {
      "epoch": 4.929105869876868,
      "grad_norm": 0.18897460401058197,
      "learning_rate": 1.4272253447810074e-07,
      "loss": 0.0062,
      "step": 3011940
    },
    {
      "epoch": 4.929138600315522,
      "grad_norm": 0.4192621111869812,
      "learning_rate": 1.426566422645836e-07,
      "loss": 0.0085,
      "step": 3011960
    },
    {
      "epoch": 4.9291713307541745,
      "grad_norm": 0.5423459410667419,
      "learning_rate": 1.4259075005106648e-07,
      "loss": 0.0067,
      "step": 3011980
    },
    {
      "epoch": 4.929204061192828,
      "grad_norm": 0.27729547023773193,
      "learning_rate": 1.4252485783754935e-07,
      "loss": 0.0105,
      "step": 3012000
    },
    {
      "epoch": 4.929236791631482,
      "grad_norm": 0.1329946666955948,
      "learning_rate": 1.4245896562403222e-07,
      "loss": 0.0057,
      "step": 3012020
    },
    {
      "epoch": 4.929269522070134,
      "grad_norm": 0.19477321207523346,
      "learning_rate": 1.423930734105151e-07,
      "loss": 0.0098,
      "step": 3012040
    },
    {
      "epoch": 4.929302252508788,
      "grad_norm": 0.17233382165431976,
      "learning_rate": 1.4232718119699797e-07,
      "loss": 0.0121,
      "step": 3012060
    },
    {
      "epoch": 4.9293349829474415,
      "grad_norm": 0.4491221308708191,
      "learning_rate": 1.4226128898348084e-07,
      "loss": 0.0051,
      "step": 3012080
    },
    {
      "epoch": 4.929367713386095,
      "grad_norm": 0.6798502802848816,
      "learning_rate": 1.421953967699637e-07,
      "loss": 0.0103,
      "step": 3012100
    },
    {
      "epoch": 4.929400443824748,
      "grad_norm": 0.22469666600227356,
      "learning_rate": 1.4212950455644656e-07,
      "loss": 0.011,
      "step": 3012120
    },
    {
      "epoch": 4.929433174263401,
      "grad_norm": 0.05449707806110382,
      "learning_rate": 1.4206361234292946e-07,
      "loss": 0.0071,
      "step": 3012140
    },
    {
      "epoch": 4.929465904702055,
      "grad_norm": 0.12206131219863892,
      "learning_rate": 1.4199772012941233e-07,
      "loss": 0.0037,
      "step": 3012160
    },
    {
      "epoch": 4.929498635140709,
      "grad_norm": 0.1563018411397934,
      "learning_rate": 1.419318279158952e-07,
      "loss": 0.0064,
      "step": 3012180
    },
    {
      "epoch": 4.929531365579361,
      "grad_norm": 0.12252838909626007,
      "learning_rate": 1.4186593570237807e-07,
      "loss": 0.0075,
      "step": 3012200
    },
    {
      "epoch": 4.929564096018015,
      "grad_norm": 0.04205423220992088,
      "learning_rate": 1.4180004348886092e-07,
      "loss": 0.0078,
      "step": 3012220
    },
    {
      "epoch": 4.9295968264566685,
      "grad_norm": 0.33081045746803284,
      "learning_rate": 1.417341512753438e-07,
      "loss": 0.0102,
      "step": 3012240
    },
    {
      "epoch": 4.929629556895321,
      "grad_norm": 0.23875388503074646,
      "learning_rate": 1.4166825906182666e-07,
      "loss": 0.0057,
      "step": 3012260
    },
    {
      "epoch": 4.929662287333975,
      "grad_norm": 0.2901466488838196,
      "learning_rate": 1.4160236684830956e-07,
      "loss": 0.0083,
      "step": 3012280
    },
    {
      "epoch": 4.929695017772628,
      "grad_norm": 0.35844480991363525,
      "learning_rate": 1.4153647463479243e-07,
      "loss": 0.0047,
      "step": 3012300
    },
    {
      "epoch": 4.929727748211281,
      "grad_norm": 0.12364887446165085,
      "learning_rate": 1.414705824212753e-07,
      "loss": 0.0068,
      "step": 3012320
    },
    {
      "epoch": 4.929760478649935,
      "grad_norm": 0.2580866515636444,
      "learning_rate": 1.4140469020775815e-07,
      "loss": 0.0078,
      "step": 3012340
    },
    {
      "epoch": 4.929793209088588,
      "grad_norm": 0.1983906775712967,
      "learning_rate": 1.4133879799424102e-07,
      "loss": 0.0113,
      "step": 3012360
    },
    {
      "epoch": 4.929825939527242,
      "grad_norm": 0.5629165172576904,
      "learning_rate": 1.412729057807239e-07,
      "loss": 0.0194,
      "step": 3012380
    },
    {
      "epoch": 4.929858669965895,
      "grad_norm": 0.21212777495384216,
      "learning_rate": 1.4120701356720677e-07,
      "loss": 0.0102,
      "step": 3012400
    },
    {
      "epoch": 4.929891400404548,
      "grad_norm": 0.19712381064891815,
      "learning_rate": 1.4114112135368964e-07,
      "loss": 0.0078,
      "step": 3012420
    },
    {
      "epoch": 4.929924130843202,
      "grad_norm": 0.14196045696735382,
      "learning_rate": 1.410752291401725e-07,
      "loss": 0.0066,
      "step": 3012440
    },
    {
      "epoch": 4.929956861281855,
      "grad_norm": 0.07528378814458847,
      "learning_rate": 1.4100933692665538e-07,
      "loss": 0.0105,
      "step": 3012460
    },
    {
      "epoch": 4.929989591720508,
      "grad_norm": 0.2781950831413269,
      "learning_rate": 1.4094344471313826e-07,
      "loss": 0.0097,
      "step": 3012480
    },
    {
      "epoch": 4.930022322159162,
      "grad_norm": 0.391521155834198,
      "learning_rate": 1.4087755249962113e-07,
      "loss": 0.0121,
      "step": 3012500
    },
    {
      "epoch": 4.930055052597815,
      "grad_norm": 0.24859173595905304,
      "learning_rate": 1.40811660286104e-07,
      "loss": 0.0107,
      "step": 3012520
    },
    {
      "epoch": 4.930087783036468,
      "grad_norm": 0.25247490406036377,
      "learning_rate": 1.4074576807258687e-07,
      "loss": 0.0115,
      "step": 3012540
    },
    {
      "epoch": 4.930120513475122,
      "grad_norm": 0.22544214129447937,
      "learning_rate": 1.4067987585906974e-07,
      "loss": 0.0083,
      "step": 3012560
    },
    {
      "epoch": 4.930153243913775,
      "grad_norm": 0.2353086918592453,
      "learning_rate": 1.4061398364555262e-07,
      "loss": 0.0059,
      "step": 3012580
    },
    {
      "epoch": 4.930185974352428,
      "grad_norm": 0.2185610681772232,
      "learning_rate": 1.405480914320355e-07,
      "loss": 0.0101,
      "step": 3012600
    },
    {
      "epoch": 4.9302187047910815,
      "grad_norm": 0.20847897231578827,
      "learning_rate": 1.4048219921851836e-07,
      "loss": 0.0062,
      "step": 3012620
    },
    {
      "epoch": 4.930251435229735,
      "grad_norm": 0.8483183979988098,
      "learning_rate": 1.4041630700500123e-07,
      "loss": 0.0113,
      "step": 3012640
    },
    {
      "epoch": 4.930284165668389,
      "grad_norm": 0.41482269763946533,
      "learning_rate": 1.403504147914841e-07,
      "loss": 0.0075,
      "step": 3012660
    },
    {
      "epoch": 4.930316896107041,
      "grad_norm": 0.1712677925825119,
      "learning_rate": 1.4028452257796698e-07,
      "loss": 0.0089,
      "step": 3012680
    },
    {
      "epoch": 4.930349626545695,
      "grad_norm": 0.2606922388076782,
      "learning_rate": 1.4021863036444985e-07,
      "loss": 0.0108,
      "step": 3012700
    },
    {
      "epoch": 4.930382356984349,
      "grad_norm": 0.4338923394680023,
      "learning_rate": 1.4015273815093272e-07,
      "loss": 0.0111,
      "step": 3012720
    },
    {
      "epoch": 4.930415087423001,
      "grad_norm": 0.11754544079303741,
      "learning_rate": 1.400868459374156e-07,
      "loss": 0.0101,
      "step": 3012740
    },
    {
      "epoch": 4.930447817861655,
      "grad_norm": 0.09966934472322464,
      "learning_rate": 1.4002095372389847e-07,
      "loss": 0.0131,
      "step": 3012760
    },
    {
      "epoch": 4.9304805483003085,
      "grad_norm": 0.13664987683296204,
      "learning_rate": 1.399550615103813e-07,
      "loss": 0.007,
      "step": 3012780
    },
    {
      "epoch": 4.930513278738962,
      "grad_norm": 0.11898849159479141,
      "learning_rate": 1.398891692968642e-07,
      "loss": 0.0084,
      "step": 3012800
    },
    {
      "epoch": 4.930546009177615,
      "grad_norm": 0.13113375008106232,
      "learning_rate": 1.3982327708334708e-07,
      "loss": 0.0086,
      "step": 3012820
    },
    {
      "epoch": 4.930578739616268,
      "grad_norm": 0.19282783567905426,
      "learning_rate": 1.3975738486982995e-07,
      "loss": 0.0097,
      "step": 3012840
    },
    {
      "epoch": 4.930611470054922,
      "grad_norm": 0.21803800761699677,
      "learning_rate": 1.3969149265631283e-07,
      "loss": 0.0069,
      "step": 3012860
    },
    {
      "epoch": 4.930644200493575,
      "grad_norm": 0.19493131339550018,
      "learning_rate": 1.3962560044279567e-07,
      "loss": 0.0061,
      "step": 3012880
    },
    {
      "epoch": 4.930676930932228,
      "grad_norm": 0.5247625112533569,
      "learning_rate": 1.3955970822927854e-07,
      "loss": 0.0117,
      "step": 3012900
    },
    {
      "epoch": 4.930709661370882,
      "grad_norm": 0.32704028487205505,
      "learning_rate": 1.3949381601576142e-07,
      "loss": 0.0119,
      "step": 3012920
    },
    {
      "epoch": 4.9307423918095346,
      "grad_norm": 0.25175902247428894,
      "learning_rate": 1.3942792380224431e-07,
      "loss": 0.0099,
      "step": 3012940
    },
    {
      "epoch": 4.930775122248188,
      "grad_norm": 0.13602744042873383,
      "learning_rate": 1.393620315887272e-07,
      "loss": 0.007,
      "step": 3012960
    },
    {
      "epoch": 4.930807852686842,
      "grad_norm": 0.39337825775146484,
      "learning_rate": 1.3929613937521006e-07,
      "loss": 0.0078,
      "step": 3012980
    },
    {
      "epoch": 4.930840583125495,
      "grad_norm": 0.4208945035934448,
      "learning_rate": 1.392302471616929e-07,
      "loss": 0.0102,
      "step": 3013000
    },
    {
      "epoch": 4.930873313564148,
      "grad_norm": 0.24603351950645447,
      "learning_rate": 1.3916435494817578e-07,
      "loss": 0.007,
      "step": 3013020
    },
    {
      "epoch": 4.930906044002802,
      "grad_norm": 0.47323280572891235,
      "learning_rate": 1.3909846273465865e-07,
      "loss": 0.0077,
      "step": 3013040
    },
    {
      "epoch": 4.930938774441455,
      "grad_norm": 0.030326908454298973,
      "learning_rate": 1.3903257052114152e-07,
      "loss": 0.0107,
      "step": 3013060
    },
    {
      "epoch": 4.930971504880109,
      "grad_norm": 0.1345134973526001,
      "learning_rate": 1.389666783076244e-07,
      "loss": 0.0102,
      "step": 3013080
    },
    {
      "epoch": 4.9310042353187615,
      "grad_norm": 0.1095234826207161,
      "learning_rate": 1.3890078609410727e-07,
      "loss": 0.0065,
      "step": 3013100
    },
    {
      "epoch": 4.931036965757415,
      "grad_norm": 0.14779652655124664,
      "learning_rate": 1.3883489388059014e-07,
      "loss": 0.0065,
      "step": 3013120
    },
    {
      "epoch": 4.931069696196069,
      "grad_norm": 0.1304863691329956,
      "learning_rate": 1.38769001667073e-07,
      "loss": 0.0084,
      "step": 3013140
    },
    {
      "epoch": 4.931102426634721,
      "grad_norm": 0.3192600607872009,
      "learning_rate": 1.3870310945355588e-07,
      "loss": 0.011,
      "step": 3013160
    },
    {
      "epoch": 4.931135157073375,
      "grad_norm": 0.5765520930290222,
      "learning_rate": 1.3863721724003875e-07,
      "loss": 0.0081,
      "step": 3013180
    },
    {
      "epoch": 4.931167887512029,
      "grad_norm": 0.4013100862503052,
      "learning_rate": 1.3857132502652163e-07,
      "loss": 0.0069,
      "step": 3013200
    },
    {
      "epoch": 4.931200617950681,
      "grad_norm": 0.12111278623342514,
      "learning_rate": 1.385054328130045e-07,
      "loss": 0.0099,
      "step": 3013220
    },
    {
      "epoch": 4.931233348389335,
      "grad_norm": 0.21548351645469666,
      "learning_rate": 1.3843954059948737e-07,
      "loss": 0.0101,
      "step": 3013240
    },
    {
      "epoch": 4.9312660788279885,
      "grad_norm": 0.04927147552371025,
      "learning_rate": 1.3837364838597024e-07,
      "loss": 0.0121,
      "step": 3013260
    },
    {
      "epoch": 4.931298809266642,
      "grad_norm": 0.40939393639564514,
      "learning_rate": 1.3830775617245311e-07,
      "loss": 0.0151,
      "step": 3013280
    },
    {
      "epoch": 4.931331539705295,
      "grad_norm": 0.06333599239587784,
      "learning_rate": 1.3824186395893599e-07,
      "loss": 0.0144,
      "step": 3013300
    },
    {
      "epoch": 4.931364270143948,
      "grad_norm": 0.2285657525062561,
      "learning_rate": 1.3817597174541886e-07,
      "loss": 0.0059,
      "step": 3013320
    },
    {
      "epoch": 4.931397000582602,
      "grad_norm": 0.15383349359035492,
      "learning_rate": 1.3811007953190173e-07,
      "loss": 0.0111,
      "step": 3013340
    },
    {
      "epoch": 4.931429731021256,
      "grad_norm": 0.10829859226942062,
      "learning_rate": 1.380441873183846e-07,
      "loss": 0.012,
      "step": 3013360
    },
    {
      "epoch": 4.931462461459908,
      "grad_norm": 0.2902471423149109,
      "learning_rate": 1.3797829510486747e-07,
      "loss": 0.0133,
      "step": 3013380
    },
    {
      "epoch": 4.931495191898562,
      "grad_norm": 0.1834101527929306,
      "learning_rate": 1.3791240289135035e-07,
      "loss": 0.0085,
      "step": 3013400
    },
    {
      "epoch": 4.9315279223372155,
      "grad_norm": 0.0893532931804657,
      "learning_rate": 1.3784651067783322e-07,
      "loss": 0.0076,
      "step": 3013420
    },
    {
      "epoch": 4.931560652775868,
      "grad_norm": 0.46753960847854614,
      "learning_rate": 1.3778061846431606e-07,
      "loss": 0.0088,
      "step": 3013440
    },
    {
      "epoch": 4.931593383214522,
      "grad_norm": 0.3230770230293274,
      "learning_rate": 1.3771472625079896e-07,
      "loss": 0.0078,
      "step": 3013460
    },
    {
      "epoch": 4.931626113653175,
      "grad_norm": 0.2378605604171753,
      "learning_rate": 1.3764883403728183e-07,
      "loss": 0.0096,
      "step": 3013480
    },
    {
      "epoch": 4.931658844091828,
      "grad_norm": 0.31273603439331055,
      "learning_rate": 1.375829418237647e-07,
      "loss": 0.0114,
      "step": 3013500
    },
    {
      "epoch": 4.931691574530482,
      "grad_norm": 0.1680597960948944,
      "learning_rate": 1.3751704961024758e-07,
      "loss": 0.0112,
      "step": 3013520
    },
    {
      "epoch": 4.931724304969135,
      "grad_norm": 0.3715960681438446,
      "learning_rate": 1.3745115739673042e-07,
      "loss": 0.0073,
      "step": 3013540
    },
    {
      "epoch": 4.931757035407789,
      "grad_norm": 0.6028141379356384,
      "learning_rate": 1.373852651832133e-07,
      "loss": 0.0099,
      "step": 3013560
    },
    {
      "epoch": 4.931789765846442,
      "grad_norm": 0.185479074716568,
      "learning_rate": 1.3731937296969617e-07,
      "loss": 0.0082,
      "step": 3013580
    },
    {
      "epoch": 4.931822496285095,
      "grad_norm": 0.08489720523357391,
      "learning_rate": 1.3725348075617907e-07,
      "loss": 0.0095,
      "step": 3013600
    },
    {
      "epoch": 4.931855226723749,
      "grad_norm": 0.292873352766037,
      "learning_rate": 1.3718758854266194e-07,
      "loss": 0.0064,
      "step": 3013620
    },
    {
      "epoch": 4.931887957162402,
      "grad_norm": 0.4580234885215759,
      "learning_rate": 1.371216963291448e-07,
      "loss": 0.0087,
      "step": 3013640
    },
    {
      "epoch": 4.931920687601055,
      "grad_norm": 0.14835388958454132,
      "learning_rate": 1.3705580411562766e-07,
      "loss": 0.0083,
      "step": 3013660
    },
    {
      "epoch": 4.931953418039709,
      "grad_norm": 0.17098307609558105,
      "learning_rate": 1.3698991190211053e-07,
      "loss": 0.0056,
      "step": 3013680
    },
    {
      "epoch": 4.931986148478362,
      "grad_norm": 0.04206293821334839,
      "learning_rate": 1.369240196885934e-07,
      "loss": 0.0118,
      "step": 3013700
    },
    {
      "epoch": 4.932018878917015,
      "grad_norm": 0.41818127036094666,
      "learning_rate": 1.3685812747507627e-07,
      "loss": 0.0081,
      "step": 3013720
    },
    {
      "epoch": 4.9320516093556686,
      "grad_norm": 0.3339046537876129,
      "learning_rate": 1.3679223526155915e-07,
      "loss": 0.012,
      "step": 3013740
    },
    {
      "epoch": 4.932084339794322,
      "grad_norm": 0.15477845072746277,
      "learning_rate": 1.3672634304804202e-07,
      "loss": 0.0056,
      "step": 3013760
    },
    {
      "epoch": 4.932117070232975,
      "grad_norm": 0.1639210730791092,
      "learning_rate": 1.366604508345249e-07,
      "loss": 0.0043,
      "step": 3013780
    },
    {
      "epoch": 4.9321498006716284,
      "grad_norm": 0.2611981928348541,
      "learning_rate": 1.3659455862100776e-07,
      "loss": 0.0077,
      "step": 3013800
    },
    {
      "epoch": 4.932182531110282,
      "grad_norm": 0.2342759221792221,
      "learning_rate": 1.3652866640749063e-07,
      "loss": 0.0079,
      "step": 3013820
    },
    {
      "epoch": 4.932215261548936,
      "grad_norm": 0.13515225052833557,
      "learning_rate": 1.364627741939735e-07,
      "loss": 0.0058,
      "step": 3013840
    },
    {
      "epoch": 4.932247991987588,
      "grad_norm": 0.21037712693214417,
      "learning_rate": 1.3639688198045638e-07,
      "loss": 0.0068,
      "step": 3013860
    },
    {
      "epoch": 4.932280722426242,
      "grad_norm": 0.12436074018478394,
      "learning_rate": 1.3633098976693925e-07,
      "loss": 0.0076,
      "step": 3013880
    },
    {
      "epoch": 4.9323134528648955,
      "grad_norm": 0.16752398014068604,
      "learning_rate": 1.3626509755342212e-07,
      "loss": 0.015,
      "step": 3013900
    },
    {
      "epoch": 4.932346183303549,
      "grad_norm": 0.35145753622055054,
      "learning_rate": 1.36199205339905e-07,
      "loss": 0.0098,
      "step": 3013920
    },
    {
      "epoch": 4.932378913742202,
      "grad_norm": 0.3623933792114258,
      "learning_rate": 1.3613331312638787e-07,
      "loss": 0.0124,
      "step": 3013940
    },
    {
      "epoch": 4.932411644180855,
      "grad_norm": 0.35547974705696106,
      "learning_rate": 1.3606742091287074e-07,
      "loss": 0.0108,
      "step": 3013960
    },
    {
      "epoch": 4.932444374619509,
      "grad_norm": 0.3916459381580353,
      "learning_rate": 1.360015286993536e-07,
      "loss": 0.0085,
      "step": 3013980
    },
    {
      "epoch": 4.932477105058162,
      "grad_norm": 0.2222907692193985,
      "learning_rate": 1.3593563648583648e-07,
      "loss": 0.0073,
      "step": 3014000
    },
    {
      "epoch": 4.932509835496815,
      "grad_norm": 0.2994226813316345,
      "learning_rate": 1.3586974427231935e-07,
      "loss": 0.0061,
      "step": 3014020
    },
    {
      "epoch": 4.932542565935469,
      "grad_norm": 0.17618131637573242,
      "learning_rate": 1.3580385205880223e-07,
      "loss": 0.0097,
      "step": 3014040
    },
    {
      "epoch": 4.932575296374122,
      "grad_norm": 0.18065598607063293,
      "learning_rate": 1.357379598452851e-07,
      "loss": 0.005,
      "step": 3014060
    },
    {
      "epoch": 4.932608026812775,
      "grad_norm": 0.2488720566034317,
      "learning_rate": 1.3567206763176797e-07,
      "loss": 0.0062,
      "step": 3014080
    },
    {
      "epoch": 4.932640757251429,
      "grad_norm": 0.38802117109298706,
      "learning_rate": 1.3560617541825082e-07,
      "loss": 0.0106,
      "step": 3014100
    },
    {
      "epoch": 4.932673487690082,
      "grad_norm": 0.1905769556760788,
      "learning_rate": 1.3554028320473372e-07,
      "loss": 0.0051,
      "step": 3014120
    },
    {
      "epoch": 4.932706218128735,
      "grad_norm": 0.26437270641326904,
      "learning_rate": 1.354743909912166e-07,
      "loss": 0.0121,
      "step": 3014140
    },
    {
      "epoch": 4.932738948567389,
      "grad_norm": 0.3312450647354126,
      "learning_rate": 1.3540849877769946e-07,
      "loss": 0.0097,
      "step": 3014160
    },
    {
      "epoch": 4.932771679006042,
      "grad_norm": 0.12755320966243744,
      "learning_rate": 1.3534260656418233e-07,
      "loss": 0.0117,
      "step": 3014180
    },
    {
      "epoch": 4.932804409444695,
      "grad_norm": 0.7429043650627136,
      "learning_rate": 1.3527671435066518e-07,
      "loss": 0.0066,
      "step": 3014200
    },
    {
      "epoch": 4.932837139883349,
      "grad_norm": 0.161823108792305,
      "learning_rate": 1.3521082213714805e-07,
      "loss": 0.0064,
      "step": 3014220
    },
    {
      "epoch": 4.932869870322002,
      "grad_norm": 0.04951066896319389,
      "learning_rate": 1.3514492992363092e-07,
      "loss": 0.008,
      "step": 3014240
    },
    {
      "epoch": 4.932902600760656,
      "grad_norm": 0.12093713879585266,
      "learning_rate": 1.3507903771011382e-07,
      "loss": 0.0086,
      "step": 3014260
    },
    {
      "epoch": 4.9329353311993085,
      "grad_norm": 0.37322765588760376,
      "learning_rate": 1.350131454965967e-07,
      "loss": 0.0092,
      "step": 3014280
    },
    {
      "epoch": 4.932968061637962,
      "grad_norm": 0.12428306043148041,
      "learning_rate": 1.3494725328307956e-07,
      "loss": 0.0068,
      "step": 3014300
    },
    {
      "epoch": 4.933000792076616,
      "grad_norm": 0.06914253532886505,
      "learning_rate": 1.348813610695624e-07,
      "loss": 0.0055,
      "step": 3014320
    },
    {
      "epoch": 4.933033522515268,
      "grad_norm": 0.2967973053455353,
      "learning_rate": 1.3481546885604528e-07,
      "loss": 0.011,
      "step": 3014340
    },
    {
      "epoch": 4.933066252953922,
      "grad_norm": 0.2426663488149643,
      "learning_rate": 1.3474957664252815e-07,
      "loss": 0.0065,
      "step": 3014360
    },
    {
      "epoch": 4.933098983392576,
      "grad_norm": 0.05309182032942772,
      "learning_rate": 1.3468368442901103e-07,
      "loss": 0.0084,
      "step": 3014380
    },
    {
      "epoch": 4.933131713831228,
      "grad_norm": 0.1806117445230484,
      "learning_rate": 1.3461779221549392e-07,
      "loss": 0.016,
      "step": 3014400
    },
    {
      "epoch": 4.933164444269882,
      "grad_norm": 0.26753661036491394,
      "learning_rate": 1.345519000019768e-07,
      "loss": 0.0053,
      "step": 3014420
    },
    {
      "epoch": 4.9331971747085355,
      "grad_norm": 0.4706580638885498,
      "learning_rate": 1.3448600778845964e-07,
      "loss": 0.0068,
      "step": 3014440
    },
    {
      "epoch": 4.933229905147189,
      "grad_norm": 0.05414522439241409,
      "learning_rate": 1.3442011557494251e-07,
      "loss": 0.0049,
      "step": 3014460
    },
    {
      "epoch": 4.933262635585842,
      "grad_norm": 0.16253113746643066,
      "learning_rate": 1.3435422336142539e-07,
      "loss": 0.0079,
      "step": 3014480
    },
    {
      "epoch": 4.933295366024495,
      "grad_norm": 0.14201775193214417,
      "learning_rate": 1.3428833114790826e-07,
      "loss": 0.0085,
      "step": 3014500
    },
    {
      "epoch": 4.933328096463149,
      "grad_norm": 0.039237797260284424,
      "learning_rate": 1.3422243893439113e-07,
      "loss": 0.0072,
      "step": 3014520
    },
    {
      "epoch": 4.9333608269018026,
      "grad_norm": 0.10419846326112747,
      "learning_rate": 1.34156546720874e-07,
      "loss": 0.007,
      "step": 3014540
    },
    {
      "epoch": 4.933393557340455,
      "grad_norm": 0.14147943258285522,
      "learning_rate": 1.3409065450735688e-07,
      "loss": 0.0099,
      "step": 3014560
    },
    {
      "epoch": 4.933426287779109,
      "grad_norm": 0.19868633151054382,
      "learning_rate": 1.3402476229383975e-07,
      "loss": 0.0147,
      "step": 3014580
    },
    {
      "epoch": 4.9334590182177624,
      "grad_norm": 0.2088228315114975,
      "learning_rate": 1.3395887008032262e-07,
      "loss": 0.0081,
      "step": 3014600
    },
    {
      "epoch": 4.933491748656415,
      "grad_norm": 0.18711978197097778,
      "learning_rate": 1.338929778668055e-07,
      "loss": 0.0076,
      "step": 3014620
    },
    {
      "epoch": 4.933524479095069,
      "grad_norm": 0.16150116920471191,
      "learning_rate": 1.3382708565328836e-07,
      "loss": 0.0054,
      "step": 3014640
    },
    {
      "epoch": 4.933557209533722,
      "grad_norm": 0.38150134682655334,
      "learning_rate": 1.3376119343977124e-07,
      "loss": 0.0081,
      "step": 3014660
    },
    {
      "epoch": 4.933589939972375,
      "grad_norm": 0.5961165428161621,
      "learning_rate": 1.336953012262541e-07,
      "loss": 0.013,
      "step": 3014680
    },
    {
      "epoch": 4.933622670411029,
      "grad_norm": 0.18198956549167633,
      "learning_rate": 1.3362940901273698e-07,
      "loss": 0.0127,
      "step": 3014700
    },
    {
      "epoch": 4.933655400849682,
      "grad_norm": 0.28230082988739014,
      "learning_rate": 1.3356351679921985e-07,
      "loss": 0.0082,
      "step": 3014720
    },
    {
      "epoch": 4.933688131288336,
      "grad_norm": 0.2243734747171402,
      "learning_rate": 1.3349762458570272e-07,
      "loss": 0.0103,
      "step": 3014740
    },
    {
      "epoch": 4.9337208617269885,
      "grad_norm": 0.11054396629333496,
      "learning_rate": 1.3343173237218557e-07,
      "loss": 0.0064,
      "step": 3014760
    },
    {
      "epoch": 4.933753592165642,
      "grad_norm": 0.18955247104167938,
      "learning_rate": 1.3336584015866847e-07,
      "loss": 0.0071,
      "step": 3014780
    },
    {
      "epoch": 4.933786322604296,
      "grad_norm": 0.16598409414291382,
      "learning_rate": 1.3329994794515134e-07,
      "loss": 0.0068,
      "step": 3014800
    },
    {
      "epoch": 4.933819053042949,
      "grad_norm": 0.04990515857934952,
      "learning_rate": 1.332340557316342e-07,
      "loss": 0.012,
      "step": 3014820
    },
    {
      "epoch": 4.933851783481602,
      "grad_norm": 0.25570639967918396,
      "learning_rate": 1.3316816351811708e-07,
      "loss": 0.0091,
      "step": 3014840
    },
    {
      "epoch": 4.933884513920256,
      "grad_norm": 0.32859987020492554,
      "learning_rate": 1.3310227130459996e-07,
      "loss": 0.013,
      "step": 3014860
    },
    {
      "epoch": 4.933917244358909,
      "grad_norm": 0.18407903611660004,
      "learning_rate": 1.330363790910828e-07,
      "loss": 0.0063,
      "step": 3014880
    },
    {
      "epoch": 4.933949974797562,
      "grad_norm": 0.12467031925916672,
      "learning_rate": 1.3297048687756567e-07,
      "loss": 0.008,
      "step": 3014900
    },
    {
      "epoch": 4.9339827052362155,
      "grad_norm": 0.18206389248371124,
      "learning_rate": 1.3290459466404857e-07,
      "loss": 0.01,
      "step": 3014920
    },
    {
      "epoch": 4.934015435674869,
      "grad_norm": 0.12447644770145416,
      "learning_rate": 1.3283870245053144e-07,
      "loss": 0.0078,
      "step": 3014940
    },
    {
      "epoch": 4.934048166113522,
      "grad_norm": 0.5574976205825806,
      "learning_rate": 1.3277281023701432e-07,
      "loss": 0.0092,
      "step": 3014960
    },
    {
      "epoch": 4.934080896552175,
      "grad_norm": 0.23049964010715485,
      "learning_rate": 1.3270691802349716e-07,
      "loss": 0.0084,
      "step": 3014980
    },
    {
      "epoch": 4.934113626990829,
      "grad_norm": 0.6461589336395264,
      "learning_rate": 1.3264102580998003e-07,
      "loss": 0.0112,
      "step": 3015000
    },
    {
      "epoch": 4.934146357429483,
      "grad_norm": 0.10651905834674835,
      "learning_rate": 1.325751335964629e-07,
      "loss": 0.0074,
      "step": 3015020
    },
    {
      "epoch": 4.934179087868135,
      "grad_norm": 0.16478897631168365,
      "learning_rate": 1.3250924138294578e-07,
      "loss": 0.0064,
      "step": 3015040
    },
    {
      "epoch": 4.934211818306789,
      "grad_norm": 0.1359001249074936,
      "learning_rate": 1.3244334916942868e-07,
      "loss": 0.0132,
      "step": 3015060
    },
    {
      "epoch": 4.9342445487454425,
      "grad_norm": 0.2905917763710022,
      "learning_rate": 1.3237745695591155e-07,
      "loss": 0.0061,
      "step": 3015080
    },
    {
      "epoch": 4.934277279184096,
      "grad_norm": 0.3610745668411255,
      "learning_rate": 1.323115647423944e-07,
      "loss": 0.0089,
      "step": 3015100
    },
    {
      "epoch": 4.934310009622749,
      "grad_norm": 0.393857479095459,
      "learning_rate": 1.3224567252887727e-07,
      "loss": 0.0068,
      "step": 3015120
    },
    {
      "epoch": 4.934342740061402,
      "grad_norm": 0.46550318598747253,
      "learning_rate": 1.3217978031536014e-07,
      "loss": 0.0109,
      "step": 3015140
    },
    {
      "epoch": 4.934375470500056,
      "grad_norm": 0.1839524507522583,
      "learning_rate": 1.32113888101843e-07,
      "loss": 0.0058,
      "step": 3015160
    },
    {
      "epoch": 4.934408200938709,
      "grad_norm": 0.21960760653018951,
      "learning_rate": 1.3204799588832588e-07,
      "loss": 0.0089,
      "step": 3015180
    },
    {
      "epoch": 4.934440931377362,
      "grad_norm": 0.10038159042596817,
      "learning_rate": 1.3198210367480876e-07,
      "loss": 0.0088,
      "step": 3015200
    },
    {
      "epoch": 4.934473661816016,
      "grad_norm": 0.42191290855407715,
      "learning_rate": 1.3191621146129163e-07,
      "loss": 0.0114,
      "step": 3015220
    },
    {
      "epoch": 4.934506392254669,
      "grad_norm": 0.4808647334575653,
      "learning_rate": 1.318503192477745e-07,
      "loss": 0.0095,
      "step": 3015240
    },
    {
      "epoch": 4.934539122693322,
      "grad_norm": 0.11650975048542023,
      "learning_rate": 1.3178442703425737e-07,
      "loss": 0.0099,
      "step": 3015260
    },
    {
      "epoch": 4.934571853131976,
      "grad_norm": 0.24947614967823029,
      "learning_rate": 1.3171853482074024e-07,
      "loss": 0.0101,
      "step": 3015280
    },
    {
      "epoch": 4.934604583570629,
      "grad_norm": 0.2885558307170868,
      "learning_rate": 1.3165264260722312e-07,
      "loss": 0.0108,
      "step": 3015300
    },
    {
      "epoch": 4.934637314009282,
      "grad_norm": 0.17207343876361847,
      "learning_rate": 1.31586750393706e-07,
      "loss": 0.0132,
      "step": 3015320
    },
    {
      "epoch": 4.934670044447936,
      "grad_norm": 0.1687934547662735,
      "learning_rate": 1.3152085818018886e-07,
      "loss": 0.0049,
      "step": 3015340
    },
    {
      "epoch": 4.934702774886589,
      "grad_norm": 0.25352221727371216,
      "learning_rate": 1.3145496596667173e-07,
      "loss": 0.0144,
      "step": 3015360
    },
    {
      "epoch": 4.934735505325243,
      "grad_norm": 0.1032627671957016,
      "learning_rate": 1.313890737531546e-07,
      "loss": 0.0099,
      "step": 3015380
    },
    {
      "epoch": 4.934768235763896,
      "grad_norm": 0.05786272510886192,
      "learning_rate": 1.3132318153963748e-07,
      "loss": 0.0075,
      "step": 3015400
    },
    {
      "epoch": 4.934800966202549,
      "grad_norm": 0.438037246465683,
      "learning_rate": 1.3125728932612032e-07,
      "loss": 0.0151,
      "step": 3015420
    },
    {
      "epoch": 4.934833696641203,
      "grad_norm": 0.0677756667137146,
      "learning_rate": 1.3119139711260322e-07,
      "loss": 0.0081,
      "step": 3015440
    },
    {
      "epoch": 4.9348664270798555,
      "grad_norm": 0.1869591772556305,
      "learning_rate": 1.311255048990861e-07,
      "loss": 0.0101,
      "step": 3015460
    },
    {
      "epoch": 4.934899157518509,
      "grad_norm": 0.15028390288352966,
      "learning_rate": 1.3105961268556896e-07,
      "loss": 0.0129,
      "step": 3015480
    },
    {
      "epoch": 4.934931887957163,
      "grad_norm": 0.26403117179870605,
      "learning_rate": 1.3099372047205184e-07,
      "loss": 0.0093,
      "step": 3015500
    },
    {
      "epoch": 4.934964618395815,
      "grad_norm": 0.16583982110023499,
      "learning_rate": 1.309278282585347e-07,
      "loss": 0.0086,
      "step": 3015520
    },
    {
      "epoch": 4.934997348834469,
      "grad_norm": 0.14213623106479645,
      "learning_rate": 1.3086193604501755e-07,
      "loss": 0.0093,
      "step": 3015540
    },
    {
      "epoch": 4.9350300792731225,
      "grad_norm": 0.06333758682012558,
      "learning_rate": 1.3079604383150043e-07,
      "loss": 0.0069,
      "step": 3015560
    },
    {
      "epoch": 4.935062809711776,
      "grad_norm": 0.09809109568595886,
      "learning_rate": 1.3073015161798333e-07,
      "loss": 0.0101,
      "step": 3015580
    },
    {
      "epoch": 4.935095540150429,
      "grad_norm": 0.2540725767612457,
      "learning_rate": 1.306642594044662e-07,
      "loss": 0.0074,
      "step": 3015600
    },
    {
      "epoch": 4.935128270589082,
      "grad_norm": 0.1512194126844406,
      "learning_rate": 1.3059836719094907e-07,
      "loss": 0.0092,
      "step": 3015620
    },
    {
      "epoch": 4.935161001027736,
      "grad_norm": 0.2518012225627899,
      "learning_rate": 1.3053247497743192e-07,
      "loss": 0.0093,
      "step": 3015640
    },
    {
      "epoch": 4.935193731466389,
      "grad_norm": 0.11610927432775497,
      "learning_rate": 1.304665827639148e-07,
      "loss": 0.0063,
      "step": 3015660
    },
    {
      "epoch": 4.935226461905042,
      "grad_norm": 0.152916818857193,
      "learning_rate": 1.3040069055039766e-07,
      "loss": 0.0116,
      "step": 3015680
    },
    {
      "epoch": 4.935259192343696,
      "grad_norm": 0.18302340805530548,
      "learning_rate": 1.3033479833688053e-07,
      "loss": 0.0074,
      "step": 3015700
    },
    {
      "epoch": 4.9352919227823495,
      "grad_norm": 0.20148396492004395,
      "learning_rate": 1.3026890612336343e-07,
      "loss": 0.0122,
      "step": 3015720
    },
    {
      "epoch": 4.935324653221002,
      "grad_norm": 0.23017382621765137,
      "learning_rate": 1.302030139098463e-07,
      "loss": 0.0121,
      "step": 3015740
    },
    {
      "epoch": 4.935357383659656,
      "grad_norm": 0.15487638115882874,
      "learning_rate": 1.3013712169632915e-07,
      "loss": 0.0121,
      "step": 3015760
    },
    {
      "epoch": 4.935390114098309,
      "grad_norm": 0.2958003282546997,
      "learning_rate": 1.3007122948281202e-07,
      "loss": 0.0081,
      "step": 3015780
    },
    {
      "epoch": 4.935422844536962,
      "grad_norm": 0.049876902252435684,
      "learning_rate": 1.300053372692949e-07,
      "loss": 0.0088,
      "step": 3015800
    },
    {
      "epoch": 4.935455574975616,
      "grad_norm": 0.35598424077033997,
      "learning_rate": 1.2993944505577776e-07,
      "loss": 0.0078,
      "step": 3015820
    },
    {
      "epoch": 4.935488305414269,
      "grad_norm": 0.22851531207561493,
      "learning_rate": 1.2987355284226064e-07,
      "loss": 0.0078,
      "step": 3015840
    },
    {
      "epoch": 4.935521035852922,
      "grad_norm": 0.12541413307189941,
      "learning_rate": 1.298076606287435e-07,
      "loss": 0.0062,
      "step": 3015860
    },
    {
      "epoch": 4.935553766291576,
      "grad_norm": 0.040817324072122574,
      "learning_rate": 1.2974176841522638e-07,
      "loss": 0.0099,
      "step": 3015880
    },
    {
      "epoch": 4.935586496730229,
      "grad_norm": 0.184347003698349,
      "learning_rate": 1.2967587620170925e-07,
      "loss": 0.0081,
      "step": 3015900
    },
    {
      "epoch": 4.935619227168883,
      "grad_norm": 0.1214563325047493,
      "learning_rate": 1.2960998398819212e-07,
      "loss": 0.0088,
      "step": 3015920
    },
    {
      "epoch": 4.9356519576075355,
      "grad_norm": 0.1659061312675476,
      "learning_rate": 1.29544091774675e-07,
      "loss": 0.0101,
      "step": 3015940
    },
    {
      "epoch": 4.935684688046189,
      "grad_norm": 0.15748214721679688,
      "learning_rate": 1.2947819956115787e-07,
      "loss": 0.0093,
      "step": 3015960
    },
    {
      "epoch": 4.935717418484843,
      "grad_norm": 0.11767157912254333,
      "learning_rate": 1.2941230734764074e-07,
      "loss": 0.0084,
      "step": 3015980
    },
    {
      "epoch": 4.935750148923496,
      "grad_norm": 0.1552717089653015,
      "learning_rate": 1.293464151341236e-07,
      "loss": 0.0054,
      "step": 3016000
    },
    {
      "epoch": 4.935782879362149,
      "grad_norm": 0.5929316878318787,
      "learning_rate": 1.2928052292060649e-07,
      "loss": 0.0099,
      "step": 3016020
    },
    {
      "epoch": 4.935815609800803,
      "grad_norm": 0.10875183343887329,
      "learning_rate": 1.2921463070708936e-07,
      "loss": 0.0067,
      "step": 3016040
    },
    {
      "epoch": 4.935848340239456,
      "grad_norm": 0.3546421527862549,
      "learning_rate": 1.2914873849357223e-07,
      "loss": 0.0058,
      "step": 3016060
    },
    {
      "epoch": 4.935881070678109,
      "grad_norm": 0.35307776927948,
      "learning_rate": 1.2908284628005507e-07,
      "loss": 0.0058,
      "step": 3016080
    },
    {
      "epoch": 4.9359138011167625,
      "grad_norm": 0.17108365893363953,
      "learning_rate": 1.2901695406653797e-07,
      "loss": 0.0131,
      "step": 3016100
    },
    {
      "epoch": 4.935946531555416,
      "grad_norm": 0.30318203568458557,
      "learning_rate": 1.2895106185302085e-07,
      "loss": 0.0114,
      "step": 3016120
    },
    {
      "epoch": 4.935979261994069,
      "grad_norm": 0.48861163854599,
      "learning_rate": 1.2888516963950372e-07,
      "loss": 0.0109,
      "step": 3016140
    },
    {
      "epoch": 4.936011992432722,
      "grad_norm": 0.415495365858078,
      "learning_rate": 1.288192774259866e-07,
      "loss": 0.0107,
      "step": 3016160
    },
    {
      "epoch": 4.936044722871376,
      "grad_norm": 0.3931451141834259,
      "learning_rate": 1.2875338521246946e-07,
      "loss": 0.0074,
      "step": 3016180
    },
    {
      "epoch": 4.93607745331003,
      "grad_norm": 0.3306106626987457,
      "learning_rate": 1.286874929989523e-07,
      "loss": 0.0096,
      "step": 3016200
    },
    {
      "epoch": 4.936110183748682,
      "grad_norm": 0.3712133467197418,
      "learning_rate": 1.2862160078543518e-07,
      "loss": 0.0105,
      "step": 3016220
    },
    {
      "epoch": 4.936142914187336,
      "grad_norm": 0.07836274802684784,
      "learning_rate": 1.2855570857191808e-07,
      "loss": 0.0114,
      "step": 3016240
    },
    {
      "epoch": 4.9361756446259895,
      "grad_norm": 0.20727400481700897,
      "learning_rate": 1.2848981635840095e-07,
      "loss": 0.0067,
      "step": 3016260
    },
    {
      "epoch": 4.936208375064643,
      "grad_norm": 0.1244615763425827,
      "learning_rate": 1.2842392414488382e-07,
      "loss": 0.0085,
      "step": 3016280
    },
    {
      "epoch": 4.936241105503296,
      "grad_norm": 0.8255248069763184,
      "learning_rate": 1.2835803193136667e-07,
      "loss": 0.0099,
      "step": 3016300
    },
    {
      "epoch": 4.936273835941949,
      "grad_norm": 0.27605965733528137,
      "learning_rate": 1.2829213971784954e-07,
      "loss": 0.0066,
      "step": 3016320
    },
    {
      "epoch": 4.936306566380603,
      "grad_norm": 0.5322117209434509,
      "learning_rate": 1.282262475043324e-07,
      "loss": 0.0072,
      "step": 3016340
    },
    {
      "epoch": 4.936339296819256,
      "grad_norm": 0.31074827909469604,
      "learning_rate": 1.2816035529081528e-07,
      "loss": 0.0114,
      "step": 3016360
    },
    {
      "epoch": 4.936372027257909,
      "grad_norm": 0.4934127926826477,
      "learning_rate": 1.2809446307729818e-07,
      "loss": 0.0114,
      "step": 3016380
    },
    {
      "epoch": 4.936404757696563,
      "grad_norm": 0.4478696882724762,
      "learning_rate": 1.2802857086378105e-07,
      "loss": 0.009,
      "step": 3016400
    },
    {
      "epoch": 4.9364374881352155,
      "grad_norm": 0.18552108108997345,
      "learning_rate": 1.279626786502639e-07,
      "loss": 0.0073,
      "step": 3016420
    },
    {
      "epoch": 4.936470218573869,
      "grad_norm": 0.23152832686901093,
      "learning_rate": 1.2789678643674677e-07,
      "loss": 0.0092,
      "step": 3016440
    },
    {
      "epoch": 4.936502949012523,
      "grad_norm": 0.1488075852394104,
      "learning_rate": 1.2783089422322964e-07,
      "loss": 0.0066,
      "step": 3016460
    },
    {
      "epoch": 4.936535679451176,
      "grad_norm": 0.2362280935049057,
      "learning_rate": 1.2776500200971252e-07,
      "loss": 0.0111,
      "step": 3016480
    },
    {
      "epoch": 4.936568409889829,
      "grad_norm": 0.6251819133758545,
      "learning_rate": 1.276991097961954e-07,
      "loss": 0.0092,
      "step": 3016500
    },
    {
      "epoch": 4.936601140328483,
      "grad_norm": 0.27197930216789246,
      "learning_rate": 1.2763321758267826e-07,
      "loss": 0.0067,
      "step": 3016520
    },
    {
      "epoch": 4.936633870767136,
      "grad_norm": 0.35507747530937195,
      "learning_rate": 1.2756732536916113e-07,
      "loss": 0.0109,
      "step": 3016540
    },
    {
      "epoch": 4.93666660120579,
      "grad_norm": 0.13242892920970917,
      "learning_rate": 1.27501433155644e-07,
      "loss": 0.0082,
      "step": 3016560
    },
    {
      "epoch": 4.9366993316444425,
      "grad_norm": 0.7720656991004944,
      "learning_rate": 1.2743554094212688e-07,
      "loss": 0.012,
      "step": 3016580
    },
    {
      "epoch": 4.936732062083096,
      "grad_norm": 0.048324111849069595,
      "learning_rate": 1.2736964872860975e-07,
      "loss": 0.0071,
      "step": 3016600
    },
    {
      "epoch": 4.93676479252175,
      "grad_norm": 0.4061864912509918,
      "learning_rate": 1.2730375651509262e-07,
      "loss": 0.0114,
      "step": 3016620
    },
    {
      "epoch": 4.936797522960402,
      "grad_norm": 0.2734362483024597,
      "learning_rate": 1.272378643015755e-07,
      "loss": 0.0067,
      "step": 3016640
    },
    {
      "epoch": 4.936830253399056,
      "grad_norm": 0.917487621307373,
      "learning_rate": 1.2717197208805837e-07,
      "loss": 0.0156,
      "step": 3016660
    },
    {
      "epoch": 4.93686298383771,
      "grad_norm": 0.229413703083992,
      "learning_rate": 1.2710607987454124e-07,
      "loss": 0.0105,
      "step": 3016680
    },
    {
      "epoch": 4.936895714276362,
      "grad_norm": 0.07264355570077896,
      "learning_rate": 1.270401876610241e-07,
      "loss": 0.0057,
      "step": 3016700
    },
    {
      "epoch": 4.936928444715016,
      "grad_norm": 0.3638443052768707,
      "learning_rate": 1.2697429544750698e-07,
      "loss": 0.0159,
      "step": 3016720
    },
    {
      "epoch": 4.9369611751536695,
      "grad_norm": 0.10902320593595505,
      "learning_rate": 1.2690840323398985e-07,
      "loss": 0.0091,
      "step": 3016740
    },
    {
      "epoch": 4.936993905592323,
      "grad_norm": 0.1506611555814743,
      "learning_rate": 1.2684251102047273e-07,
      "loss": 0.0089,
      "step": 3016760
    },
    {
      "epoch": 4.937026636030976,
      "grad_norm": 0.17431259155273438,
      "learning_rate": 1.267766188069556e-07,
      "loss": 0.005,
      "step": 3016780
    },
    {
      "epoch": 4.937059366469629,
      "grad_norm": 0.20556260645389557,
      "learning_rate": 1.2671072659343847e-07,
      "loss": 0.0079,
      "step": 3016800
    },
    {
      "epoch": 4.937092096908283,
      "grad_norm": 0.18033427000045776,
      "learning_rate": 1.2664483437992134e-07,
      "loss": 0.007,
      "step": 3016820
    },
    {
      "epoch": 4.937124827346937,
      "grad_norm": 0.0358278714120388,
      "learning_rate": 1.2657894216640421e-07,
      "loss": 0.0064,
      "step": 3016840
    },
    {
      "epoch": 4.937157557785589,
      "grad_norm": 0.05586564168334007,
      "learning_rate": 1.2651304995288706e-07,
      "loss": 0.0119,
      "step": 3016860
    },
    {
      "epoch": 4.937190288224243,
      "grad_norm": 0.061971526592969894,
      "learning_rate": 1.2644715773936993e-07,
      "loss": 0.0079,
      "step": 3016880
    },
    {
      "epoch": 4.9372230186628965,
      "grad_norm": 0.2931075692176819,
      "learning_rate": 1.2638126552585283e-07,
      "loss": 0.0082,
      "step": 3016900
    },
    {
      "epoch": 4.937255749101549,
      "grad_norm": 0.3309386968612671,
      "learning_rate": 1.263153733123357e-07,
      "loss": 0.0112,
      "step": 3016920
    },
    {
      "epoch": 4.937288479540203,
      "grad_norm": 0.04362645372748375,
      "learning_rate": 1.2624948109881857e-07,
      "loss": 0.0067,
      "step": 3016940
    },
    {
      "epoch": 4.937321209978856,
      "grad_norm": 0.4162111282348633,
      "learning_rate": 1.2618358888530145e-07,
      "loss": 0.007,
      "step": 3016960
    },
    {
      "epoch": 4.937353940417509,
      "grad_norm": 0.2995110750198364,
      "learning_rate": 1.261176966717843e-07,
      "loss": 0.0107,
      "step": 3016980
    },
    {
      "epoch": 4.937386670856163,
      "grad_norm": 0.4763730764389038,
      "learning_rate": 1.2605180445826716e-07,
      "loss": 0.0078,
      "step": 3017000
    },
    {
      "epoch": 4.937419401294816,
      "grad_norm": 0.2746164798736572,
      "learning_rate": 1.2598591224475004e-07,
      "loss": 0.0146,
      "step": 3017020
    },
    {
      "epoch": 4.93745213173347,
      "grad_norm": 0.2305631786584854,
      "learning_rate": 1.2592002003123294e-07,
      "loss": 0.0077,
      "step": 3017040
    },
    {
      "epoch": 4.937484862172123,
      "grad_norm": 0.1719505935907364,
      "learning_rate": 1.258541278177158e-07,
      "loss": 0.0067,
      "step": 3017060
    },
    {
      "epoch": 4.937517592610776,
      "grad_norm": 0.11928284168243408,
      "learning_rate": 1.2578823560419865e-07,
      "loss": 0.0076,
      "step": 3017080
    },
    {
      "epoch": 4.93755032304943,
      "grad_norm": 0.14364024996757507,
      "learning_rate": 1.2572234339068153e-07,
      "loss": 0.0087,
      "step": 3017100
    },
    {
      "epoch": 4.9375830534880825,
      "grad_norm": 0.2677859961986542,
      "learning_rate": 1.256564511771644e-07,
      "loss": 0.0096,
      "step": 3017120
    },
    {
      "epoch": 4.937615783926736,
      "grad_norm": 0.23589754104614258,
      "learning_rate": 1.2559055896364727e-07,
      "loss": 0.0074,
      "step": 3017140
    },
    {
      "epoch": 4.93764851436539,
      "grad_norm": 0.13434357941150665,
      "learning_rate": 1.2552466675013014e-07,
      "loss": 0.008,
      "step": 3017160
    },
    {
      "epoch": 4.937681244804043,
      "grad_norm": 0.04360496252775192,
      "learning_rate": 1.2545877453661301e-07,
      "loss": 0.0091,
      "step": 3017180
    },
    {
      "epoch": 4.937713975242696,
      "grad_norm": 0.3149009346961975,
      "learning_rate": 1.2539288232309589e-07,
      "loss": 0.0135,
      "step": 3017200
    },
    {
      "epoch": 4.9377467056813495,
      "grad_norm": 0.31786343455314636,
      "learning_rate": 1.2532699010957876e-07,
      "loss": 0.0071,
      "step": 3017220
    },
    {
      "epoch": 4.937779436120003,
      "grad_norm": 0.376534104347229,
      "learning_rate": 1.2526109789606163e-07,
      "loss": 0.0086,
      "step": 3017240
    },
    {
      "epoch": 4.937812166558656,
      "grad_norm": 0.22775886952877045,
      "learning_rate": 1.251952056825445e-07,
      "loss": 0.0104,
      "step": 3017260
    },
    {
      "epoch": 4.937844896997309,
      "grad_norm": 0.23930788040161133,
      "learning_rate": 1.2512931346902737e-07,
      "loss": 0.0077,
      "step": 3017280
    },
    {
      "epoch": 4.937877627435963,
      "grad_norm": 0.31284841895103455,
      "learning_rate": 1.2506342125551025e-07,
      "loss": 0.0077,
      "step": 3017300
    },
    {
      "epoch": 4.937910357874616,
      "grad_norm": 0.32275086641311646,
      "learning_rate": 1.2499752904199312e-07,
      "loss": 0.0069,
      "step": 3017320
    },
    {
      "epoch": 4.937943088313269,
      "grad_norm": 0.18634375929832458,
      "learning_rate": 1.24931636828476e-07,
      "loss": 0.011,
      "step": 3017340
    },
    {
      "epoch": 4.937975818751923,
      "grad_norm": 0.32566556334495544,
      "learning_rate": 1.2486574461495886e-07,
      "loss": 0.0054,
      "step": 3017360
    },
    {
      "epoch": 4.9380085491905765,
      "grad_norm": 0.2385212928056717,
      "learning_rate": 1.2479985240144173e-07,
      "loss": 0.0065,
      "step": 3017380
    },
    {
      "epoch": 4.938041279629229,
      "grad_norm": 0.042191967368125916,
      "learning_rate": 1.247339601879246e-07,
      "loss": 0.0097,
      "step": 3017400
    },
    {
      "epoch": 4.938074010067883,
      "grad_norm": 0.6360923647880554,
      "learning_rate": 1.2466806797440748e-07,
      "loss": 0.0123,
      "step": 3017420
    },
    {
      "epoch": 4.938106740506536,
      "grad_norm": 0.2944320738315582,
      "learning_rate": 1.2460217576089035e-07,
      "loss": 0.0139,
      "step": 3017440
    },
    {
      "epoch": 4.93813947094519,
      "grad_norm": 0.08523579686880112,
      "learning_rate": 1.2453628354737322e-07,
      "loss": 0.0091,
      "step": 3017460
    },
    {
      "epoch": 4.938172201383843,
      "grad_norm": 0.37534329295158386,
      "learning_rate": 1.244703913338561e-07,
      "loss": 0.0109,
      "step": 3017480
    },
    {
      "epoch": 4.938204931822496,
      "grad_norm": 0.1882425993680954,
      "learning_rate": 1.2440449912033897e-07,
      "loss": 0.0079,
      "step": 3017500
    },
    {
      "epoch": 4.93823766226115,
      "grad_norm": 0.20539100468158722,
      "learning_rate": 1.243386069068218e-07,
      "loss": 0.0086,
      "step": 3017520
    },
    {
      "epoch": 4.938270392699803,
      "grad_norm": 0.314700186252594,
      "learning_rate": 1.2427271469330468e-07,
      "loss": 0.0107,
      "step": 3017540
    },
    {
      "epoch": 4.938303123138456,
      "grad_norm": 0.10309112071990967,
      "learning_rate": 1.2420682247978758e-07,
      "loss": 0.0038,
      "step": 3017560
    },
    {
      "epoch": 4.93833585357711,
      "grad_norm": 0.40563833713531494,
      "learning_rate": 1.2414093026627046e-07,
      "loss": 0.0081,
      "step": 3017580
    },
    {
      "epoch": 4.9383685840157625,
      "grad_norm": 0.20199596881866455,
      "learning_rate": 1.2407503805275333e-07,
      "loss": 0.0059,
      "step": 3017600
    },
    {
      "epoch": 4.938401314454416,
      "grad_norm": 0.44941088557243347,
      "learning_rate": 1.240091458392362e-07,
      "loss": 0.0106,
      "step": 3017620
    },
    {
      "epoch": 4.93843404489307,
      "grad_norm": 0.17365297675132751,
      "learning_rate": 1.2394325362571905e-07,
      "loss": 0.0084,
      "step": 3017640
    },
    {
      "epoch": 4.938466775331723,
      "grad_norm": 0.162326380610466,
      "learning_rate": 1.2387736141220192e-07,
      "loss": 0.0083,
      "step": 3017660
    },
    {
      "epoch": 4.938499505770376,
      "grad_norm": 0.06627697497606277,
      "learning_rate": 1.238114691986848e-07,
      "loss": 0.0096,
      "step": 3017680
    },
    {
      "epoch": 4.93853223620903,
      "grad_norm": 0.272253155708313,
      "learning_rate": 1.237455769851677e-07,
      "loss": 0.0078,
      "step": 3017700
    },
    {
      "epoch": 4.938564966647683,
      "grad_norm": 0.06103316694498062,
      "learning_rate": 1.2367968477165056e-07,
      "loss": 0.0195,
      "step": 3017720
    },
    {
      "epoch": 4.938597697086337,
      "grad_norm": 0.22402323782444,
      "learning_rate": 1.236137925581334e-07,
      "loss": 0.0084,
      "step": 3017740
    },
    {
      "epoch": 4.9386304275249895,
      "grad_norm": 0.20031605660915375,
      "learning_rate": 1.2354790034461628e-07,
      "loss": 0.0134,
      "step": 3017760
    },
    {
      "epoch": 4.938663157963643,
      "grad_norm": 0.2062121480703354,
      "learning_rate": 1.2348200813109915e-07,
      "loss": 0.0097,
      "step": 3017780
    },
    {
      "epoch": 4.938695888402297,
      "grad_norm": 0.12323369085788727,
      "learning_rate": 1.2341611591758202e-07,
      "loss": 0.0073,
      "step": 3017800
    },
    {
      "epoch": 4.938728618840949,
      "grad_norm": 0.14816811680793762,
      "learning_rate": 1.233502237040649e-07,
      "loss": 0.0095,
      "step": 3017820
    },
    {
      "epoch": 4.938761349279603,
      "grad_norm": 0.23591914772987366,
      "learning_rate": 1.232843314905478e-07,
      "loss": 0.0075,
      "step": 3017840
    },
    {
      "epoch": 4.938794079718257,
      "grad_norm": 0.33172449469566345,
      "learning_rate": 1.2321843927703064e-07,
      "loss": 0.0094,
      "step": 3017860
    },
    {
      "epoch": 4.938826810156909,
      "grad_norm": 0.19721901416778564,
      "learning_rate": 1.231525470635135e-07,
      "loss": 0.008,
      "step": 3017880
    },
    {
      "epoch": 4.938859540595563,
      "grad_norm": 0.13730233907699585,
      "learning_rate": 1.2308665484999638e-07,
      "loss": 0.0085,
      "step": 3017900
    },
    {
      "epoch": 4.9388922710342165,
      "grad_norm": 0.34453073143959045,
      "learning_rate": 1.2302076263647925e-07,
      "loss": 0.0063,
      "step": 3017920
    },
    {
      "epoch": 4.93892500147287,
      "grad_norm": 0.2748241424560547,
      "learning_rate": 1.2295487042296213e-07,
      "loss": 0.0097,
      "step": 3017940
    },
    {
      "epoch": 4.938957731911523,
      "grad_norm": 0.48668619990348816,
      "learning_rate": 1.22888978209445e-07,
      "loss": 0.0099,
      "step": 3017960
    },
    {
      "epoch": 4.938990462350176,
      "grad_norm": 0.1689927726984024,
      "learning_rate": 1.2282308599592787e-07,
      "loss": 0.0083,
      "step": 3017980
    },
    {
      "epoch": 4.93902319278883,
      "grad_norm": 0.08758216351270676,
      "learning_rate": 1.2275719378241074e-07,
      "loss": 0.0118,
      "step": 3018000
    },
    {
      "epoch": 4.9390559232274835,
      "grad_norm": 0.24861201643943787,
      "learning_rate": 1.2269130156889362e-07,
      "loss": 0.0071,
      "step": 3018020
    },
    {
      "epoch": 4.939088653666136,
      "grad_norm": 0.23659473657608032,
      "learning_rate": 1.226254093553765e-07,
      "loss": 0.0083,
      "step": 3018040
    },
    {
      "epoch": 4.93912138410479,
      "grad_norm": 0.450744092464447,
      "learning_rate": 1.2255951714185936e-07,
      "loss": 0.0077,
      "step": 3018060
    },
    {
      "epoch": 4.939154114543443,
      "grad_norm": 0.2174275666475296,
      "learning_rate": 1.2249362492834223e-07,
      "loss": 0.0067,
      "step": 3018080
    },
    {
      "epoch": 4.939186844982096,
      "grad_norm": 0.21697291731834412,
      "learning_rate": 1.224277327148251e-07,
      "loss": 0.0167,
      "step": 3018100
    },
    {
      "epoch": 4.93921957542075,
      "grad_norm": 0.23253031075000763,
      "learning_rate": 1.2236184050130798e-07,
      "loss": 0.0053,
      "step": 3018120
    },
    {
      "epoch": 4.939252305859403,
      "grad_norm": 0.3747693598270416,
      "learning_rate": 1.2229594828779085e-07,
      "loss": 0.0088,
      "step": 3018140
    },
    {
      "epoch": 4.939285036298056,
      "grad_norm": 0.16284961998462677,
      "learning_rate": 1.2223005607427372e-07,
      "loss": 0.0107,
      "step": 3018160
    },
    {
      "epoch": 4.93931776673671,
      "grad_norm": 0.5799518823623657,
      "learning_rate": 1.2216416386075657e-07,
      "loss": 0.0093,
      "step": 3018180
    },
    {
      "epoch": 4.939350497175363,
      "grad_norm": 0.12065254151821136,
      "learning_rate": 1.2209827164723944e-07,
      "loss": 0.0077,
      "step": 3018200
    },
    {
      "epoch": 4.939383227614017,
      "grad_norm": 0.13047389686107635,
      "learning_rate": 1.2203237943372234e-07,
      "loss": 0.0097,
      "step": 3018220
    },
    {
      "epoch": 4.9394159580526695,
      "grad_norm": 0.23893727362155914,
      "learning_rate": 1.219664872202052e-07,
      "loss": 0.0116,
      "step": 3018240
    },
    {
      "epoch": 4.939448688491323,
      "grad_norm": 0.2520751953125,
      "learning_rate": 1.2190059500668808e-07,
      "loss": 0.0089,
      "step": 3018260
    },
    {
      "epoch": 4.939481418929977,
      "grad_norm": 0.4079207479953766,
      "learning_rate": 1.2183470279317095e-07,
      "loss": 0.0059,
      "step": 3018280
    },
    {
      "epoch": 4.93951414936863,
      "grad_norm": 0.0591549389064312,
      "learning_rate": 1.217688105796538e-07,
      "loss": 0.0092,
      "step": 3018300
    },
    {
      "epoch": 4.939546879807283,
      "grad_norm": 0.29386192560195923,
      "learning_rate": 1.2170291836613667e-07,
      "loss": 0.0116,
      "step": 3018320
    },
    {
      "epoch": 4.939579610245937,
      "grad_norm": 0.16869176924228668,
      "learning_rate": 1.2163702615261954e-07,
      "loss": 0.0061,
      "step": 3018340
    },
    {
      "epoch": 4.93961234068459,
      "grad_norm": 0.6378579139709473,
      "learning_rate": 1.2157113393910244e-07,
      "loss": 0.0074,
      "step": 3018360
    },
    {
      "epoch": 4.939645071123243,
      "grad_norm": 0.5988211631774902,
      "learning_rate": 1.215052417255853e-07,
      "loss": 0.0076,
      "step": 3018380
    },
    {
      "epoch": 4.9396778015618965,
      "grad_norm": 0.19469910860061646,
      "learning_rate": 1.2143934951206816e-07,
      "loss": 0.0131,
      "step": 3018400
    },
    {
      "epoch": 4.93971053200055,
      "grad_norm": 0.09041135758161545,
      "learning_rate": 1.2137345729855103e-07,
      "loss": 0.011,
      "step": 3018420
    },
    {
      "epoch": 4.939743262439203,
      "grad_norm": 0.31536856293678284,
      "learning_rate": 1.213075650850339e-07,
      "loss": 0.0095,
      "step": 3018440
    },
    {
      "epoch": 4.939775992877856,
      "grad_norm": 0.20263472199440002,
      "learning_rate": 1.2124167287151677e-07,
      "loss": 0.0087,
      "step": 3018460
    },
    {
      "epoch": 4.93980872331651,
      "grad_norm": 0.5420315861701965,
      "learning_rate": 1.2117578065799965e-07,
      "loss": 0.0075,
      "step": 3018480
    },
    {
      "epoch": 4.939841453755164,
      "grad_norm": 0.059185147285461426,
      "learning_rate": 1.2110988844448255e-07,
      "loss": 0.007,
      "step": 3018500
    },
    {
      "epoch": 4.939874184193816,
      "grad_norm": 0.15281781554222107,
      "learning_rate": 1.210439962309654e-07,
      "loss": 0.0056,
      "step": 3018520
    },
    {
      "epoch": 4.93990691463247,
      "grad_norm": 0.0769152045249939,
      "learning_rate": 1.2097810401744826e-07,
      "loss": 0.0101,
      "step": 3018540
    },
    {
      "epoch": 4.9399396450711235,
      "grad_norm": 0.3554757833480835,
      "learning_rate": 1.2091221180393114e-07,
      "loss": 0.0112,
      "step": 3018560
    },
    {
      "epoch": 4.939972375509777,
      "grad_norm": 0.24133047461509705,
      "learning_rate": 1.20846319590414e-07,
      "loss": 0.0083,
      "step": 3018580
    },
    {
      "epoch": 4.94000510594843,
      "grad_norm": 0.24860523641109467,
      "learning_rate": 1.2078042737689688e-07,
      "loss": 0.0083,
      "step": 3018600
    },
    {
      "epoch": 4.940037836387083,
      "grad_norm": 0.12719902396202087,
      "learning_rate": 1.2071453516337975e-07,
      "loss": 0.0077,
      "step": 3018620
    },
    {
      "epoch": 4.940070566825737,
      "grad_norm": 0.08771146833896637,
      "learning_rate": 1.2064864294986262e-07,
      "loss": 0.0087,
      "step": 3018640
    },
    {
      "epoch": 4.94010329726439,
      "grad_norm": 0.19541670382022858,
      "learning_rate": 1.205827507363455e-07,
      "loss": 0.0087,
      "step": 3018660
    },
    {
      "epoch": 4.940136027703043,
      "grad_norm": 0.1264968067407608,
      "learning_rate": 1.2051685852282837e-07,
      "loss": 0.006,
      "step": 3018680
    },
    {
      "epoch": 4.940168758141697,
      "grad_norm": 0.20388534665107727,
      "learning_rate": 1.2045096630931124e-07,
      "loss": 0.008,
      "step": 3018700
    },
    {
      "epoch": 4.94020148858035,
      "grad_norm": 0.07176262885332108,
      "learning_rate": 1.203850740957941e-07,
      "loss": 0.0092,
      "step": 3018720
    },
    {
      "epoch": 4.940234219019003,
      "grad_norm": 0.25779664516448975,
      "learning_rate": 1.2031918188227698e-07,
      "loss": 0.0101,
      "step": 3018740
    },
    {
      "epoch": 4.940266949457657,
      "grad_norm": 0.12813059985637665,
      "learning_rate": 1.2025328966875986e-07,
      "loss": 0.0082,
      "step": 3018760
    },
    {
      "epoch": 4.94029967989631,
      "grad_norm": 0.2250480204820633,
      "learning_rate": 1.2018739745524273e-07,
      "loss": 0.0069,
      "step": 3018780
    },
    {
      "epoch": 4.940332410334963,
      "grad_norm": 0.445504367351532,
      "learning_rate": 1.201215052417256e-07,
      "loss": 0.0112,
      "step": 3018800
    },
    {
      "epoch": 4.940365140773617,
      "grad_norm": 0.17640897631645203,
      "learning_rate": 1.2005561302820847e-07,
      "loss": 0.0059,
      "step": 3018820
    },
    {
      "epoch": 4.94039787121227,
      "grad_norm": 0.03656652197241783,
      "learning_rate": 1.1998972081469134e-07,
      "loss": 0.0091,
      "step": 3018840
    },
    {
      "epoch": 4.940430601650923,
      "grad_norm": 0.23078985512256622,
      "learning_rate": 1.199238286011742e-07,
      "loss": 0.0083,
      "step": 3018860
    },
    {
      "epoch": 4.9404633320895766,
      "grad_norm": 0.32452070713043213,
      "learning_rate": 1.198579363876571e-07,
      "loss": 0.0061,
      "step": 3018880
    },
    {
      "epoch": 4.94049606252823,
      "grad_norm": 0.35286471247673035,
      "learning_rate": 1.1979204417413996e-07,
      "loss": 0.0083,
      "step": 3018900
    },
    {
      "epoch": 4.940528792966884,
      "grad_norm": 0.30020764470100403,
      "learning_rate": 1.1972615196062283e-07,
      "loss": 0.008,
      "step": 3018920
    },
    {
      "epoch": 4.9405615234055364,
      "grad_norm": 0.12479835003614426,
      "learning_rate": 1.196602597471057e-07,
      "loss": 0.0093,
      "step": 3018940
    },
    {
      "epoch": 4.94059425384419,
      "grad_norm": 0.30113646388053894,
      "learning_rate": 1.1959436753358855e-07,
      "loss": 0.0083,
      "step": 3018960
    },
    {
      "epoch": 4.940626984282844,
      "grad_norm": 0.1541772335767746,
      "learning_rate": 1.1952847532007142e-07,
      "loss": 0.0101,
      "step": 3018980
    },
    {
      "epoch": 4.940659714721496,
      "grad_norm": 0.18464547395706177,
      "learning_rate": 1.194625831065543e-07,
      "loss": 0.0069,
      "step": 3019000
    },
    {
      "epoch": 4.94069244516015,
      "grad_norm": 0.11596234887838364,
      "learning_rate": 1.193966908930372e-07,
      "loss": 0.0078,
      "step": 3019020
    },
    {
      "epoch": 4.9407251755988035,
      "grad_norm": 0.1366923600435257,
      "learning_rate": 1.1933079867952007e-07,
      "loss": 0.0068,
      "step": 3019040
    },
    {
      "epoch": 4.940757906037456,
      "grad_norm": 0.2301221787929535,
      "learning_rate": 1.1926490646600294e-07,
      "loss": 0.0123,
      "step": 3019060
    },
    {
      "epoch": 4.94079063647611,
      "grad_norm": 0.2562614381313324,
      "learning_rate": 1.1919901425248578e-07,
      "loss": 0.0116,
      "step": 3019080
    },
    {
      "epoch": 4.940823366914763,
      "grad_norm": 0.09117817878723145,
      "learning_rate": 1.1913312203896867e-07,
      "loss": 0.0124,
      "step": 3019100
    },
    {
      "epoch": 4.940856097353417,
      "grad_norm": 0.2050323486328125,
      "learning_rate": 1.1906722982545154e-07,
      "loss": 0.0135,
      "step": 3019120
    },
    {
      "epoch": 4.94088882779207,
      "grad_norm": 0.10815407335758209,
      "learning_rate": 1.1900133761193441e-07,
      "loss": 0.0105,
      "step": 3019140
    },
    {
      "epoch": 4.940921558230723,
      "grad_norm": 0.22050124406814575,
      "learning_rate": 1.1893544539841728e-07,
      "loss": 0.0063,
      "step": 3019160
    },
    {
      "epoch": 4.940954288669377,
      "grad_norm": 0.03836977481842041,
      "learning_rate": 1.1886955318490014e-07,
      "loss": 0.005,
      "step": 3019180
    },
    {
      "epoch": 4.9409870191080305,
      "grad_norm": 0.32362934947013855,
      "learning_rate": 1.1880366097138302e-07,
      "loss": 0.0099,
      "step": 3019200
    },
    {
      "epoch": 4.941019749546683,
      "grad_norm": 0.162373349070549,
      "learning_rate": 1.1873776875786589e-07,
      "loss": 0.0071,
      "step": 3019220
    },
    {
      "epoch": 4.941052479985337,
      "grad_norm": 0.2208104133605957,
      "learning_rate": 1.1867187654434876e-07,
      "loss": 0.0086,
      "step": 3019240
    },
    {
      "epoch": 4.94108521042399,
      "grad_norm": 0.07938092201948166,
      "learning_rate": 1.1860598433083165e-07,
      "loss": 0.0067,
      "step": 3019260
    },
    {
      "epoch": 4.941117940862643,
      "grad_norm": 0.32256144285202026,
      "learning_rate": 1.1854009211731452e-07,
      "loss": 0.012,
      "step": 3019280
    },
    {
      "epoch": 4.941150671301297,
      "grad_norm": 0.19308507442474365,
      "learning_rate": 1.1847419990379738e-07,
      "loss": 0.008,
      "step": 3019300
    },
    {
      "epoch": 4.94118340173995,
      "grad_norm": 0.34566766023635864,
      "learning_rate": 1.1840830769028025e-07,
      "loss": 0.0094,
      "step": 3019320
    },
    {
      "epoch": 4.941216132178603,
      "grad_norm": 0.24422194063663483,
      "learning_rate": 1.1834241547676312e-07,
      "loss": 0.0124,
      "step": 3019340
    },
    {
      "epoch": 4.941248862617257,
      "grad_norm": 0.27313995361328125,
      "learning_rate": 1.1827652326324599e-07,
      "loss": 0.0142,
      "step": 3019360
    },
    {
      "epoch": 4.94128159305591,
      "grad_norm": 0.06045207008719444,
      "learning_rate": 1.1821063104972886e-07,
      "loss": 0.0048,
      "step": 3019380
    },
    {
      "epoch": 4.941314323494564,
      "grad_norm": 0.18878354132175446,
      "learning_rate": 1.1814473883621172e-07,
      "loss": 0.0106,
      "step": 3019400
    },
    {
      "epoch": 4.9413470539332165,
      "grad_norm": 0.26806434988975525,
      "learning_rate": 1.180788466226946e-07,
      "loss": 0.0067,
      "step": 3019420
    },
    {
      "epoch": 4.94137978437187,
      "grad_norm": 0.332334041595459,
      "learning_rate": 1.1801295440917748e-07,
      "loss": 0.0095,
      "step": 3019440
    },
    {
      "epoch": 4.941412514810524,
      "grad_norm": 0.8244640231132507,
      "learning_rate": 1.1794706219566035e-07,
      "loss": 0.007,
      "step": 3019460
    },
    {
      "epoch": 4.941445245249177,
      "grad_norm": 0.2623034715652466,
      "learning_rate": 1.1788116998214323e-07,
      "loss": 0.0059,
      "step": 3019480
    },
    {
      "epoch": 4.94147797568783,
      "grad_norm": 0.07071609795093536,
      "learning_rate": 1.178152777686261e-07,
      "loss": 0.0087,
      "step": 3019500
    },
    {
      "epoch": 4.941510706126484,
      "grad_norm": 0.21779313683509827,
      "learning_rate": 1.1774938555510896e-07,
      "loss": 0.0066,
      "step": 3019520
    },
    {
      "epoch": 4.941543436565137,
      "grad_norm": 0.27862465381622314,
      "learning_rate": 1.1768349334159183e-07,
      "loss": 0.0095,
      "step": 3019540
    },
    {
      "epoch": 4.94157616700379,
      "grad_norm": 0.24115155637264252,
      "learning_rate": 1.176176011280747e-07,
      "loss": 0.0138,
      "step": 3019560
    },
    {
      "epoch": 4.9416088974424435,
      "grad_norm": 0.22387568652629852,
      "learning_rate": 1.1755170891455759e-07,
      "loss": 0.0072,
      "step": 3019580
    },
    {
      "epoch": 4.941641627881097,
      "grad_norm": 0.2415192425251007,
      "learning_rate": 1.1748581670104046e-07,
      "loss": 0.0101,
      "step": 3019600
    },
    {
      "epoch": 4.94167435831975,
      "grad_norm": 0.24729444086551666,
      "learning_rate": 1.1741992448752332e-07,
      "loss": 0.0129,
      "step": 3019620
    },
    {
      "epoch": 4.941707088758403,
      "grad_norm": 0.3456156551837921,
      "learning_rate": 1.1735403227400619e-07,
      "loss": 0.0078,
      "step": 3019640
    },
    {
      "epoch": 4.941739819197057,
      "grad_norm": 0.2915481925010681,
      "learning_rate": 1.1728814006048906e-07,
      "loss": 0.0093,
      "step": 3019660
    },
    {
      "epoch": 4.9417725496357106,
      "grad_norm": 0.5882177352905273,
      "learning_rate": 1.1722224784697193e-07,
      "loss": 0.0085,
      "step": 3019680
    },
    {
      "epoch": 4.941805280074363,
      "grad_norm": 0.12641113996505737,
      "learning_rate": 1.171563556334548e-07,
      "loss": 0.0084,
      "step": 3019700
    },
    {
      "epoch": 4.941838010513017,
      "grad_norm": 0.30077189207077026,
      "learning_rate": 1.1709046341993768e-07,
      "loss": 0.0124,
      "step": 3019720
    },
    {
      "epoch": 4.9418707409516704,
      "grad_norm": 0.3121144771575928,
      "learning_rate": 1.1702457120642054e-07,
      "loss": 0.0097,
      "step": 3019740
    },
    {
      "epoch": 4.941903471390324,
      "grad_norm": 0.27987441420555115,
      "learning_rate": 1.1695867899290342e-07,
      "loss": 0.0095,
      "step": 3019760
    },
    {
      "epoch": 4.941936201828977,
      "grad_norm": 0.16350944340229034,
      "learning_rate": 1.1689278677938629e-07,
      "loss": 0.0061,
      "step": 3019780
    },
    {
      "epoch": 4.94196893226763,
      "grad_norm": 0.15242590010166168,
      "learning_rate": 1.1682689456586917e-07,
      "loss": 0.0052,
      "step": 3019800
    },
    {
      "epoch": 4.942001662706284,
      "grad_norm": 0.115016408264637,
      "learning_rate": 1.1676100235235204e-07,
      "loss": 0.0107,
      "step": 3019820
    },
    {
      "epoch": 4.942034393144937,
      "grad_norm": 0.31516125798225403,
      "learning_rate": 1.166951101388349e-07,
      "loss": 0.0068,
      "step": 3019840
    },
    {
      "epoch": 4.94206712358359,
      "grad_norm": 0.11462514102458954,
      "learning_rate": 1.1662921792531777e-07,
      "loss": 0.0099,
      "step": 3019860
    },
    {
      "epoch": 4.942099854022244,
      "grad_norm": 0.1001313328742981,
      "learning_rate": 1.1656332571180064e-07,
      "loss": 0.0101,
      "step": 3019880
    },
    {
      "epoch": 4.9421325844608965,
      "grad_norm": 0.24638664722442627,
      "learning_rate": 1.1649743349828351e-07,
      "loss": 0.0116,
      "step": 3019900
    },
    {
      "epoch": 4.94216531489955,
      "grad_norm": 0.22354990243911743,
      "learning_rate": 1.164315412847664e-07,
      "loss": 0.0068,
      "step": 3019920
    },
    {
      "epoch": 4.942198045338204,
      "grad_norm": 0.1361810714006424,
      "learning_rate": 1.1636564907124927e-07,
      "loss": 0.007,
      "step": 3019940
    },
    {
      "epoch": 4.942230775776857,
      "grad_norm": 0.12735214829444885,
      "learning_rate": 1.1629975685773213e-07,
      "loss": 0.0067,
      "step": 3019960
    },
    {
      "epoch": 4.94226350621551,
      "grad_norm": 0.4726976454257965,
      "learning_rate": 1.16233864644215e-07,
      "loss": 0.0082,
      "step": 3019980
    },
    {
      "epoch": 4.942296236654164,
      "grad_norm": 0.0271624568849802,
      "learning_rate": 1.1616797243069787e-07,
      "loss": 0.0099,
      "step": 3020000
    },
    {
      "epoch": 4.942328967092817,
      "grad_norm": 0.20887775719165802,
      "learning_rate": 1.1610208021718075e-07,
      "loss": 0.0085,
      "step": 3020020
    },
    {
      "epoch": 4.942361697531471,
      "grad_norm": 0.45158466696739197,
      "learning_rate": 1.1603618800366362e-07,
      "loss": 0.0068,
      "step": 3020040
    },
    {
      "epoch": 4.9423944279701235,
      "grad_norm": 0.17303182184696198,
      "learning_rate": 1.1597029579014648e-07,
      "loss": 0.0086,
      "step": 3020060
    },
    {
      "epoch": 4.942427158408777,
      "grad_norm": 0.42562273144721985,
      "learning_rate": 1.1590440357662935e-07,
      "loss": 0.0085,
      "step": 3020080
    },
    {
      "epoch": 4.942459888847431,
      "grad_norm": 0.4254678785800934,
      "learning_rate": 1.1583851136311223e-07,
      "loss": 0.0095,
      "step": 3020100
    },
    {
      "epoch": 4.942492619286083,
      "grad_norm": 0.1253858059644699,
      "learning_rate": 1.157726191495951e-07,
      "loss": 0.0078,
      "step": 3020120
    },
    {
      "epoch": 4.942525349724737,
      "grad_norm": 0.5458925366401672,
      "learning_rate": 1.1570672693607798e-07,
      "loss": 0.012,
      "step": 3020140
    },
    {
      "epoch": 4.942558080163391,
      "grad_norm": 0.15978659689426422,
      "learning_rate": 1.1564083472256085e-07,
      "loss": 0.0112,
      "step": 3020160
    },
    {
      "epoch": 4.942590810602043,
      "grad_norm": 0.2016354501247406,
      "learning_rate": 1.1557494250904371e-07,
      "loss": 0.0124,
      "step": 3020180
    },
    {
      "epoch": 4.942623541040697,
      "grad_norm": 0.2503999173641205,
      "learning_rate": 1.1550905029552658e-07,
      "loss": 0.0094,
      "step": 3020200
    },
    {
      "epoch": 4.9426562714793505,
      "grad_norm": 0.2835578918457031,
      "learning_rate": 1.1544315808200945e-07,
      "loss": 0.0084,
      "step": 3020220
    },
    {
      "epoch": 4.942689001918004,
      "grad_norm": 0.3538045585155487,
      "learning_rate": 1.1537726586849234e-07,
      "loss": 0.0128,
      "step": 3020240
    },
    {
      "epoch": 4.942721732356657,
      "grad_norm": 0.22337807714939117,
      "learning_rate": 1.1531137365497521e-07,
      "loss": 0.0127,
      "step": 3020260
    },
    {
      "epoch": 4.94275446279531,
      "grad_norm": 0.2123454064130783,
      "learning_rate": 1.1524548144145807e-07,
      "loss": 0.0111,
      "step": 3020280
    },
    {
      "epoch": 4.942787193233964,
      "grad_norm": 0.19508416950702667,
      "learning_rate": 1.1517958922794094e-07,
      "loss": 0.0107,
      "step": 3020300
    },
    {
      "epoch": 4.942819923672617,
      "grad_norm": 0.22738249599933624,
      "learning_rate": 1.1511369701442381e-07,
      "loss": 0.008,
      "step": 3020320
    },
    {
      "epoch": 4.94285265411127,
      "grad_norm": 0.2558386027812958,
      "learning_rate": 1.1504780480090669e-07,
      "loss": 0.0075,
      "step": 3020340
    },
    {
      "epoch": 4.942885384549924,
      "grad_norm": 0.3386920094490051,
      "learning_rate": 1.1498191258738956e-07,
      "loss": 0.0106,
      "step": 3020360
    },
    {
      "epoch": 4.9429181149885775,
      "grad_norm": 0.1453796923160553,
      "learning_rate": 1.1491602037387243e-07,
      "loss": 0.0058,
      "step": 3020380
    },
    {
      "epoch": 4.94295084542723,
      "grad_norm": 0.4180026650428772,
      "learning_rate": 1.1485012816035529e-07,
      "loss": 0.0082,
      "step": 3020400
    },
    {
      "epoch": 4.942983575865884,
      "grad_norm": 0.11815783381462097,
      "learning_rate": 1.1478423594683817e-07,
      "loss": 0.0099,
      "step": 3020420
    },
    {
      "epoch": 4.943016306304537,
      "grad_norm": 0.11546974629163742,
      "learning_rate": 1.1471834373332105e-07,
      "loss": 0.0045,
      "step": 3020440
    },
    {
      "epoch": 4.94304903674319,
      "grad_norm": 0.08104739338159561,
      "learning_rate": 1.1465245151980392e-07,
      "loss": 0.0058,
      "step": 3020460
    },
    {
      "epoch": 4.943081767181844,
      "grad_norm": 0.17711903154850006,
      "learning_rate": 1.1458655930628679e-07,
      "loss": 0.0075,
      "step": 3020480
    },
    {
      "epoch": 4.943114497620497,
      "grad_norm": 0.09691667556762695,
      "learning_rate": 1.1452066709276965e-07,
      "loss": 0.0081,
      "step": 3020500
    },
    {
      "epoch": 4.94314722805915,
      "grad_norm": 0.23388098180294037,
      "learning_rate": 1.1445477487925252e-07,
      "loss": 0.0089,
      "step": 3020520
    },
    {
      "epoch": 4.943179958497804,
      "grad_norm": 0.29046231508255005,
      "learning_rate": 1.1438888266573539e-07,
      "loss": 0.0113,
      "step": 3020540
    },
    {
      "epoch": 4.943212688936457,
      "grad_norm": 0.21609310805797577,
      "learning_rate": 1.1432299045221827e-07,
      "loss": 0.0112,
      "step": 3020560
    },
    {
      "epoch": 4.943245419375111,
      "grad_norm": 0.1854257434606552,
      "learning_rate": 1.1425709823870115e-07,
      "loss": 0.0137,
      "step": 3020580
    },
    {
      "epoch": 4.9432781498137635,
      "grad_norm": 0.14049817621707916,
      "learning_rate": 1.1419120602518402e-07,
      "loss": 0.0074,
      "step": 3020600
    },
    {
      "epoch": 4.943310880252417,
      "grad_norm": 0.33049359917640686,
      "learning_rate": 1.1412531381166688e-07,
      "loss": 0.0085,
      "step": 3020620
    },
    {
      "epoch": 4.943343610691071,
      "grad_norm": 0.1812598705291748,
      "learning_rate": 1.1405942159814975e-07,
      "loss": 0.0085,
      "step": 3020640
    },
    {
      "epoch": 4.943376341129724,
      "grad_norm": 0.6867936849594116,
      "learning_rate": 1.1399352938463263e-07,
      "loss": 0.0097,
      "step": 3020660
    },
    {
      "epoch": 4.943409071568377,
      "grad_norm": 0.21913114190101624,
      "learning_rate": 1.139276371711155e-07,
      "loss": 0.0071,
      "step": 3020680
    },
    {
      "epoch": 4.9434418020070305,
      "grad_norm": 0.5879226922988892,
      "learning_rate": 1.1386174495759837e-07,
      "loss": 0.0093,
      "step": 3020700
    },
    {
      "epoch": 4.943474532445684,
      "grad_norm": 0.5224425792694092,
      "learning_rate": 1.1379585274408126e-07,
      "loss": 0.0109,
      "step": 3020720
    },
    {
      "epoch": 4.943507262884337,
      "grad_norm": 0.1528007686138153,
      "learning_rate": 1.137299605305641e-07,
      "loss": 0.0095,
      "step": 3020740
    },
    {
      "epoch": 4.94353999332299,
      "grad_norm": 0.3313981294631958,
      "learning_rate": 1.1366406831704699e-07,
      "loss": 0.0101,
      "step": 3020760
    },
    {
      "epoch": 4.943572723761644,
      "grad_norm": 0.12346088886260986,
      "learning_rate": 1.1359817610352986e-07,
      "loss": 0.0093,
      "step": 3020780
    },
    {
      "epoch": 4.943605454200297,
      "grad_norm": 0.11627190560102463,
      "learning_rate": 1.1353228389001273e-07,
      "loss": 0.0092,
      "step": 3020800
    },
    {
      "epoch": 4.94363818463895,
      "grad_norm": 0.16249579191207886,
      "learning_rate": 1.134663916764956e-07,
      "loss": 0.0077,
      "step": 3020820
    },
    {
      "epoch": 4.943670915077604,
      "grad_norm": 0.49747201800346375,
      "learning_rate": 1.1340049946297846e-07,
      "loss": 0.011,
      "step": 3020840
    },
    {
      "epoch": 4.9437036455162575,
      "grad_norm": 0.0919092521071434,
      "learning_rate": 1.1333460724946133e-07,
      "loss": 0.0055,
      "step": 3020860
    },
    {
      "epoch": 4.94373637595491,
      "grad_norm": 0.20221653580665588,
      "learning_rate": 1.132687150359442e-07,
      "loss": 0.0093,
      "step": 3020880
    },
    {
      "epoch": 4.943769106393564,
      "grad_norm": 0.20575189590454102,
      "learning_rate": 1.1320282282242709e-07,
      "loss": 0.0077,
      "step": 3020900
    },
    {
      "epoch": 4.943801836832217,
      "grad_norm": 0.4957508146762848,
      "learning_rate": 1.1313693060890996e-07,
      "loss": 0.0093,
      "step": 3020920
    },
    {
      "epoch": 4.943834567270871,
      "grad_norm": 0.19778071343898773,
      "learning_rate": 1.1307103839539284e-07,
      "loss": 0.0085,
      "step": 3020940
    },
    {
      "epoch": 4.943867297709524,
      "grad_norm": 0.2716713547706604,
      "learning_rate": 1.130051461818757e-07,
      "loss": 0.0073,
      "step": 3020960
    },
    {
      "epoch": 4.943900028148177,
      "grad_norm": 0.14803113043308258,
      "learning_rate": 1.1293925396835857e-07,
      "loss": 0.0063,
      "step": 3020980
    },
    {
      "epoch": 4.943932758586831,
      "grad_norm": 0.15100839734077454,
      "learning_rate": 1.1287336175484144e-07,
      "loss": 0.0071,
      "step": 3021000
    },
    {
      "epoch": 4.943965489025484,
      "grad_norm": 0.07451808452606201,
      "learning_rate": 1.1280746954132431e-07,
      "loss": 0.0058,
      "step": 3021020
    },
    {
      "epoch": 4.943998219464137,
      "grad_norm": 0.139835387468338,
      "learning_rate": 1.127415773278072e-07,
      "loss": 0.0069,
      "step": 3021040
    },
    {
      "epoch": 4.944030949902791,
      "grad_norm": 0.21059858798980713,
      "learning_rate": 1.1267568511429004e-07,
      "loss": 0.0079,
      "step": 3021060
    },
    {
      "epoch": 4.9440636803414435,
      "grad_norm": 0.11947575211524963,
      "learning_rate": 1.1260979290077293e-07,
      "loss": 0.0104,
      "step": 3021080
    },
    {
      "epoch": 4.944096410780097,
      "grad_norm": 0.15512940287590027,
      "learning_rate": 1.125439006872558e-07,
      "loss": 0.0085,
      "step": 3021100
    },
    {
      "epoch": 4.944129141218751,
      "grad_norm": 0.06395679712295532,
      "learning_rate": 1.1247800847373867e-07,
      "loss": 0.0136,
      "step": 3021120
    },
    {
      "epoch": 4.944161871657404,
      "grad_norm": 0.1192580908536911,
      "learning_rate": 1.1241211626022154e-07,
      "loss": 0.0069,
      "step": 3021140
    },
    {
      "epoch": 4.944194602096057,
      "grad_norm": 0.38537707924842834,
      "learning_rate": 1.1234622404670441e-07,
      "loss": 0.0104,
      "step": 3021160
    },
    {
      "epoch": 4.944227332534711,
      "grad_norm": 0.8342443704605103,
      "learning_rate": 1.1228033183318727e-07,
      "loss": 0.0143,
      "step": 3021180
    },
    {
      "epoch": 4.944260062973364,
      "grad_norm": 0.1406041830778122,
      "learning_rate": 1.1221443961967015e-07,
      "loss": 0.0066,
      "step": 3021200
    },
    {
      "epoch": 4.944292793412018,
      "grad_norm": 0.11528202146291733,
      "learning_rate": 1.1214854740615302e-07,
      "loss": 0.0083,
      "step": 3021220
    },
    {
      "epoch": 4.9443255238506705,
      "grad_norm": 0.12666085362434387,
      "learning_rate": 1.120826551926359e-07,
      "loss": 0.0058,
      "step": 3021240
    },
    {
      "epoch": 4.944358254289324,
      "grad_norm": 0.11911123245954514,
      "learning_rate": 1.1201676297911878e-07,
      "loss": 0.0081,
      "step": 3021260
    },
    {
      "epoch": 4.944390984727978,
      "grad_norm": 0.23890943825244904,
      "learning_rate": 1.1195087076560163e-07,
      "loss": 0.006,
      "step": 3021280
    },
    {
      "epoch": 4.94442371516663,
      "grad_norm": 0.15805748105049133,
      "learning_rate": 1.118849785520845e-07,
      "loss": 0.0084,
      "step": 3021300
    },
    {
      "epoch": 4.944456445605284,
      "grad_norm": 0.2733471989631653,
      "learning_rate": 1.1181908633856738e-07,
      "loss": 0.0062,
      "step": 3021320
    },
    {
      "epoch": 4.944489176043938,
      "grad_norm": 0.5175146460533142,
      "learning_rate": 1.1175319412505025e-07,
      "loss": 0.0099,
      "step": 3021340
    },
    {
      "epoch": 4.94452190648259,
      "grad_norm": 0.17967793345451355,
      "learning_rate": 1.1168730191153312e-07,
      "loss": 0.0085,
      "step": 3021360
    },
    {
      "epoch": 4.944554636921244,
      "grad_norm": 0.49987465143203735,
      "learning_rate": 1.1162140969801601e-07,
      "loss": 0.0112,
      "step": 3021380
    },
    {
      "epoch": 4.9445873673598975,
      "grad_norm": 0.13130135834217072,
      "learning_rate": 1.1155551748449885e-07,
      "loss": 0.0088,
      "step": 3021400
    },
    {
      "epoch": 4.944620097798551,
      "grad_norm": 0.09531449526548386,
      "learning_rate": 1.1148962527098174e-07,
      "loss": 0.0104,
      "step": 3021420
    },
    {
      "epoch": 4.944652828237204,
      "grad_norm": 0.12001679837703705,
      "learning_rate": 1.1142373305746461e-07,
      "loss": 0.0079,
      "step": 3021440
    },
    {
      "epoch": 4.944685558675857,
      "grad_norm": 0.28664684295654297,
      "learning_rate": 1.1135784084394748e-07,
      "loss": 0.0092,
      "step": 3021460
    },
    {
      "epoch": 4.944718289114511,
      "grad_norm": 0.3257289230823517,
      "learning_rate": 1.1129194863043036e-07,
      "loss": 0.008,
      "step": 3021480
    },
    {
      "epoch": 4.9447510195531645,
      "grad_norm": 0.08270015567541122,
      "learning_rate": 1.1122605641691321e-07,
      "loss": 0.0047,
      "step": 3021500
    },
    {
      "epoch": 4.944783749991817,
      "grad_norm": 0.2927548587322235,
      "learning_rate": 1.1116016420339609e-07,
      "loss": 0.0074,
      "step": 3021520
    },
    {
      "epoch": 4.944816480430471,
      "grad_norm": 0.2532920837402344,
      "learning_rate": 1.1109427198987896e-07,
      "loss": 0.0118,
      "step": 3021540
    },
    {
      "epoch": 4.944849210869124,
      "grad_norm": 0.16663573682308197,
      "learning_rate": 1.1102837977636184e-07,
      "loss": 0.0089,
      "step": 3021560
    },
    {
      "epoch": 4.944881941307777,
      "grad_norm": 0.1019570454955101,
      "learning_rate": 1.1096248756284472e-07,
      "loss": 0.0072,
      "step": 3021580
    },
    {
      "epoch": 4.944914671746431,
      "grad_norm": 0.7078198194503784,
      "learning_rate": 1.1089659534932759e-07,
      "loss": 0.0113,
      "step": 3021600
    },
    {
      "epoch": 4.944947402185084,
      "grad_norm": 0.4863314628601074,
      "learning_rate": 1.1083070313581045e-07,
      "loss": 0.0099,
      "step": 3021620
    },
    {
      "epoch": 4.944980132623737,
      "grad_norm": 0.10898107290267944,
      "learning_rate": 1.1076481092229332e-07,
      "loss": 0.0125,
      "step": 3021640
    },
    {
      "epoch": 4.945012863062391,
      "grad_norm": 0.17437753081321716,
      "learning_rate": 1.1069891870877619e-07,
      "loss": 0.0119,
      "step": 3021660
    },
    {
      "epoch": 4.945045593501044,
      "grad_norm": 0.06884622573852539,
      "learning_rate": 1.1063302649525906e-07,
      "loss": 0.01,
      "step": 3021680
    },
    {
      "epoch": 4.945078323939698,
      "grad_norm": 0.1962399035692215,
      "learning_rate": 1.1056713428174195e-07,
      "loss": 0.011,
      "step": 3021700
    },
    {
      "epoch": 4.9451110543783505,
      "grad_norm": 0.06943728029727936,
      "learning_rate": 1.105012420682248e-07,
      "loss": 0.0082,
      "step": 3021720
    },
    {
      "epoch": 4.945143784817004,
      "grad_norm": 0.05093300715088844,
      "learning_rate": 1.1043534985470768e-07,
      "loss": 0.0082,
      "step": 3021740
    },
    {
      "epoch": 4.945176515255658,
      "grad_norm": 0.17983102798461914,
      "learning_rate": 1.1036945764119055e-07,
      "loss": 0.008,
      "step": 3021760
    },
    {
      "epoch": 4.94520924569431,
      "grad_norm": 0.40882542729377747,
      "learning_rate": 1.1030356542767342e-07,
      "loss": 0.0069,
      "step": 3021780
    },
    {
      "epoch": 4.945241976132964,
      "grad_norm": 0.31125766038894653,
      "learning_rate": 1.102376732141563e-07,
      "loss": 0.0089,
      "step": 3021800
    },
    {
      "epoch": 4.945274706571618,
      "grad_norm": 0.38067108392715454,
      "learning_rate": 1.1017178100063917e-07,
      "loss": 0.0087,
      "step": 3021820
    },
    {
      "epoch": 4.945307437010271,
      "grad_norm": 0.15790335834026337,
      "learning_rate": 1.1010588878712203e-07,
      "loss": 0.0064,
      "step": 3021840
    },
    {
      "epoch": 4.945340167448924,
      "grad_norm": 0.05662946403026581,
      "learning_rate": 1.100399965736049e-07,
      "loss": 0.0063,
      "step": 3021860
    },
    {
      "epoch": 4.9453728978875775,
      "grad_norm": 0.08957403898239136,
      "learning_rate": 1.0997410436008778e-07,
      "loss": 0.0059,
      "step": 3021880
    },
    {
      "epoch": 4.945405628326231,
      "grad_norm": 0.31895768642425537,
      "learning_rate": 1.0990821214657066e-07,
      "loss": 0.0073,
      "step": 3021900
    },
    {
      "epoch": 4.945438358764884,
      "grad_norm": 0.1315527707338333,
      "learning_rate": 1.0984231993305353e-07,
      "loss": 0.0082,
      "step": 3021920
    },
    {
      "epoch": 4.945471089203537,
      "grad_norm": 0.38224726915359497,
      "learning_rate": 1.0977642771953639e-07,
      "loss": 0.0081,
      "step": 3021940
    },
    {
      "epoch": 4.945503819642191,
      "grad_norm": 0.4458223879337311,
      "learning_rate": 1.0971053550601926e-07,
      "loss": 0.0091,
      "step": 3021960
    },
    {
      "epoch": 4.945536550080844,
      "grad_norm": 0.06231391802430153,
      "learning_rate": 1.0964464329250213e-07,
      "loss": 0.0093,
      "step": 3021980
    },
    {
      "epoch": 4.945569280519497,
      "grad_norm": 0.14038443565368652,
      "learning_rate": 1.09578751078985e-07,
      "loss": 0.0061,
      "step": 3022000
    },
    {
      "epoch": 4.945602010958151,
      "grad_norm": 0.2456555813550949,
      "learning_rate": 1.0951285886546788e-07,
      "loss": 0.0121,
      "step": 3022020
    },
    {
      "epoch": 4.9456347413968045,
      "grad_norm": 0.13366349041461945,
      "learning_rate": 1.0944696665195076e-07,
      "loss": 0.0136,
      "step": 3022040
    },
    {
      "epoch": 4.945667471835457,
      "grad_norm": 0.01773313619196415,
      "learning_rate": 1.093810744384336e-07,
      "loss": 0.0104,
      "step": 3022060
    },
    {
      "epoch": 4.945700202274111,
      "grad_norm": 0.23691634833812714,
      "learning_rate": 1.0931518222491649e-07,
      "loss": 0.0069,
      "step": 3022080
    },
    {
      "epoch": 4.945732932712764,
      "grad_norm": 0.2674444317817688,
      "learning_rate": 1.0924929001139936e-07,
      "loss": 0.0114,
      "step": 3022100
    },
    {
      "epoch": 4.945765663151418,
      "grad_norm": 0.1790759414434433,
      "learning_rate": 1.0918339779788224e-07,
      "loss": 0.0085,
      "step": 3022120
    },
    {
      "epoch": 4.945798393590071,
      "grad_norm": 0.2539917230606079,
      "learning_rate": 1.0911750558436511e-07,
      "loss": 0.0072,
      "step": 3022140
    },
    {
      "epoch": 4.945831124028724,
      "grad_norm": 0.20894359052181244,
      "learning_rate": 1.0905161337084797e-07,
      "loss": 0.0074,
      "step": 3022160
    },
    {
      "epoch": 4.945863854467378,
      "grad_norm": 0.17135627567768097,
      "learning_rate": 1.0898572115733084e-07,
      "loss": 0.0091,
      "step": 3022180
    },
    {
      "epoch": 4.945896584906031,
      "grad_norm": 0.30240052938461304,
      "learning_rate": 1.0891982894381371e-07,
      "loss": 0.0097,
      "step": 3022200
    },
    {
      "epoch": 4.945929315344684,
      "grad_norm": 0.17244938015937805,
      "learning_rate": 1.088539367302966e-07,
      "loss": 0.0083,
      "step": 3022220
    },
    {
      "epoch": 4.945962045783338,
      "grad_norm": 0.3053593039512634,
      "learning_rate": 1.0878804451677947e-07,
      "loss": 0.0093,
      "step": 3022240
    },
    {
      "epoch": 4.9459947762219905,
      "grad_norm": 0.3081477880477905,
      "learning_rate": 1.0872215230326234e-07,
      "loss": 0.0099,
      "step": 3022260
    },
    {
      "epoch": 4.946027506660644,
      "grad_norm": 0.21510517597198486,
      "learning_rate": 1.086562600897452e-07,
      "loss": 0.0097,
      "step": 3022280
    },
    {
      "epoch": 4.946060237099298,
      "grad_norm": 0.265575110912323,
      "learning_rate": 1.0859036787622807e-07,
      "loss": 0.0071,
      "step": 3022300
    },
    {
      "epoch": 4.946092967537951,
      "grad_norm": 0.19922484457492828,
      "learning_rate": 1.0852447566271094e-07,
      "loss": 0.0075,
      "step": 3022320
    },
    {
      "epoch": 4.946125697976604,
      "grad_norm": 0.42578163743019104,
      "learning_rate": 1.0845858344919382e-07,
      "loss": 0.0098,
      "step": 3022340
    },
    {
      "epoch": 4.9461584284152575,
      "grad_norm": 0.1610025316476822,
      "learning_rate": 1.083926912356767e-07,
      "loss": 0.0053,
      "step": 3022360
    },
    {
      "epoch": 4.946191158853911,
      "grad_norm": 0.18235071003437042,
      "learning_rate": 1.0832679902215955e-07,
      "loss": 0.01,
      "step": 3022380
    },
    {
      "epoch": 4.946223889292565,
      "grad_norm": 0.24552881717681885,
      "learning_rate": 1.0826090680864243e-07,
      "loss": 0.0077,
      "step": 3022400
    },
    {
      "epoch": 4.946256619731217,
      "grad_norm": 0.16268710792064667,
      "learning_rate": 1.081950145951253e-07,
      "loss": 0.0077,
      "step": 3022420
    },
    {
      "epoch": 4.946289350169871,
      "grad_norm": 0.08017025887966156,
      "learning_rate": 1.0812912238160818e-07,
      "loss": 0.0095,
      "step": 3022440
    },
    {
      "epoch": 4.946322080608525,
      "grad_norm": 0.051516685634851456,
      "learning_rate": 1.0806323016809105e-07,
      "loss": 0.0066,
      "step": 3022460
    },
    {
      "epoch": 4.946354811047177,
      "grad_norm": 0.11698038876056671,
      "learning_rate": 1.0799733795457392e-07,
      "loss": 0.0084,
      "step": 3022480
    },
    {
      "epoch": 4.946387541485831,
      "grad_norm": 0.6207768321037292,
      "learning_rate": 1.0793144574105678e-07,
      "loss": 0.0109,
      "step": 3022500
    },
    {
      "epoch": 4.9464202719244845,
      "grad_norm": 0.1590922623872757,
      "learning_rate": 1.0786555352753965e-07,
      "loss": 0.0062,
      "step": 3022520
    },
    {
      "epoch": 4.946453002363137,
      "grad_norm": 0.12693679332733154,
      "learning_rate": 1.0779966131402254e-07,
      "loss": 0.0077,
      "step": 3022540
    },
    {
      "epoch": 4.946485732801791,
      "grad_norm": 0.20414766669273376,
      "learning_rate": 1.0773376910050541e-07,
      "loss": 0.008,
      "step": 3022560
    },
    {
      "epoch": 4.946518463240444,
      "grad_norm": 0.2968772053718567,
      "learning_rate": 1.0766787688698828e-07,
      "loss": 0.0071,
      "step": 3022580
    },
    {
      "epoch": 4.946551193679098,
      "grad_norm": 0.12673306465148926,
      "learning_rate": 1.0760198467347114e-07,
      "loss": 0.0121,
      "step": 3022600
    },
    {
      "epoch": 4.946583924117751,
      "grad_norm": 0.22301563620567322,
      "learning_rate": 1.0753609245995401e-07,
      "loss": 0.0061,
      "step": 3022620
    },
    {
      "epoch": 4.946616654556404,
      "grad_norm": 0.39753976464271545,
      "learning_rate": 1.0747020024643688e-07,
      "loss": 0.0117,
      "step": 3022640
    },
    {
      "epoch": 4.946649384995058,
      "grad_norm": 0.26797470450401306,
      "learning_rate": 1.0740430803291976e-07,
      "loss": 0.0068,
      "step": 3022660
    },
    {
      "epoch": 4.9466821154337115,
      "grad_norm": 0.8488182425498962,
      "learning_rate": 1.0733841581940263e-07,
      "loss": 0.0115,
      "step": 3022680
    },
    {
      "epoch": 4.946714845872364,
      "grad_norm": 0.06676717102527618,
      "learning_rate": 1.0727252360588551e-07,
      "loss": 0.0051,
      "step": 3022700
    },
    {
      "epoch": 4.946747576311018,
      "grad_norm": 0.3532976806163788,
      "learning_rate": 1.0720663139236836e-07,
      "loss": 0.0065,
      "step": 3022720
    },
    {
      "epoch": 4.946780306749671,
      "grad_norm": 0.10870880633592606,
      "learning_rate": 1.0714073917885124e-07,
      "loss": 0.0103,
      "step": 3022740
    },
    {
      "epoch": 4.946813037188324,
      "grad_norm": 0.33185088634490967,
      "learning_rate": 1.0707484696533412e-07,
      "loss": 0.0093,
      "step": 3022760
    },
    {
      "epoch": 4.946845767626978,
      "grad_norm": 0.40908730030059814,
      "learning_rate": 1.0700895475181699e-07,
      "loss": 0.0097,
      "step": 3022780
    },
    {
      "epoch": 4.946878498065631,
      "grad_norm": 0.14340758323669434,
      "learning_rate": 1.0694306253829986e-07,
      "loss": 0.0077,
      "step": 3022800
    },
    {
      "epoch": 4.946911228504284,
      "grad_norm": 0.1056957021355629,
      "learning_rate": 1.0687717032478273e-07,
      "loss": 0.0101,
      "step": 3022820
    },
    {
      "epoch": 4.946943958942938,
      "grad_norm": 0.15736186504364014,
      "learning_rate": 1.0681127811126559e-07,
      "loss": 0.0075,
      "step": 3022840
    },
    {
      "epoch": 4.946976689381591,
      "grad_norm": 0.0740230605006218,
      "learning_rate": 1.0674538589774846e-07,
      "loss": 0.0085,
      "step": 3022860
    },
    {
      "epoch": 4.947009419820245,
      "grad_norm": 0.1273147165775299,
      "learning_rate": 1.0667949368423135e-07,
      "loss": 0.0066,
      "step": 3022880
    },
    {
      "epoch": 4.9470421502588975,
      "grad_norm": 0.21513871848583221,
      "learning_rate": 1.0661360147071422e-07,
      "loss": 0.0053,
      "step": 3022900
    },
    {
      "epoch": 4.947074880697551,
      "grad_norm": 0.1096484437584877,
      "learning_rate": 1.0654770925719709e-07,
      "loss": 0.0079,
      "step": 3022920
    },
    {
      "epoch": 4.947107611136205,
      "grad_norm": 0.08031592518091202,
      "learning_rate": 1.0648181704367995e-07,
      "loss": 0.0076,
      "step": 3022940
    },
    {
      "epoch": 4.947140341574858,
      "grad_norm": 0.22608719766139984,
      "learning_rate": 1.0641592483016282e-07,
      "loss": 0.0068,
      "step": 3022960
    },
    {
      "epoch": 4.947173072013511,
      "grad_norm": 0.07734367996454239,
      "learning_rate": 1.063500326166457e-07,
      "loss": 0.0123,
      "step": 3022980
    },
    {
      "epoch": 4.947205802452165,
      "grad_norm": 0.07180522382259369,
      "learning_rate": 1.0628414040312857e-07,
      "loss": 0.0109,
      "step": 3023000
    },
    {
      "epoch": 4.947238532890818,
      "grad_norm": 0.24120932817459106,
      "learning_rate": 1.0621824818961145e-07,
      "loss": 0.0082,
      "step": 3023020
    },
    {
      "epoch": 4.947271263329471,
      "grad_norm": 0.2748117744922638,
      "learning_rate": 1.0615235597609433e-07,
      "loss": 0.0045,
      "step": 3023040
    },
    {
      "epoch": 4.9473039937681245,
      "grad_norm": 0.2533004879951477,
      "learning_rate": 1.0608646376257718e-07,
      "loss": 0.0082,
      "step": 3023060
    },
    {
      "epoch": 4.947336724206778,
      "grad_norm": 0.17398113012313843,
      "learning_rate": 1.0602057154906006e-07,
      "loss": 0.0164,
      "step": 3023080
    },
    {
      "epoch": 4.947369454645431,
      "grad_norm": 0.4380827248096466,
      "learning_rate": 1.0595467933554293e-07,
      "loss": 0.0069,
      "step": 3023100
    },
    {
      "epoch": 4.947402185084084,
      "grad_norm": 0.05959795415401459,
      "learning_rate": 1.058887871220258e-07,
      "loss": 0.0123,
      "step": 3023120
    },
    {
      "epoch": 4.947434915522738,
      "grad_norm": 0.04855090007185936,
      "learning_rate": 1.0582289490850867e-07,
      "loss": 0.0125,
      "step": 3023140
    },
    {
      "epoch": 4.9474676459613915,
      "grad_norm": 0.9667418003082275,
      "learning_rate": 1.0575700269499153e-07,
      "loss": 0.0123,
      "step": 3023160
    },
    {
      "epoch": 4.947500376400044,
      "grad_norm": 0.20722739398479462,
      "learning_rate": 1.056911104814744e-07,
      "loss": 0.0054,
      "step": 3023180
    },
    {
      "epoch": 4.947533106838698,
      "grad_norm": 0.20627325773239136,
      "learning_rate": 1.0562521826795729e-07,
      "loss": 0.0108,
      "step": 3023200
    },
    {
      "epoch": 4.947565837277351,
      "grad_norm": 0.12504960596561432,
      "learning_rate": 1.0555932605444016e-07,
      "loss": 0.0075,
      "step": 3023220
    },
    {
      "epoch": 4.947598567716004,
      "grad_norm": 0.27768221497535706,
      "learning_rate": 1.0549343384092303e-07,
      "loss": 0.0076,
      "step": 3023240
    },
    {
      "epoch": 4.947631298154658,
      "grad_norm": 0.915198564529419,
      "learning_rate": 1.054275416274059e-07,
      "loss": 0.0107,
      "step": 3023260
    },
    {
      "epoch": 4.947664028593311,
      "grad_norm": 0.12374421954154968,
      "learning_rate": 1.0536164941388876e-07,
      "loss": 0.0054,
      "step": 3023280
    },
    {
      "epoch": 4.947696759031965,
      "grad_norm": 0.4656098186969757,
      "learning_rate": 1.0529575720037164e-07,
      "loss": 0.0129,
      "step": 3023300
    },
    {
      "epoch": 4.947729489470618,
      "grad_norm": 0.0677335187792778,
      "learning_rate": 1.0522986498685451e-07,
      "loss": 0.011,
      "step": 3023320
    },
    {
      "epoch": 4.947762219909271,
      "grad_norm": 0.3098924458026886,
      "learning_rate": 1.0516397277333738e-07,
      "loss": 0.011,
      "step": 3023340
    },
    {
      "epoch": 4.947794950347925,
      "grad_norm": 0.28996542096138,
      "learning_rate": 1.0509808055982027e-07,
      "loss": 0.0102,
      "step": 3023360
    },
    {
      "epoch": 4.9478276807865775,
      "grad_norm": 0.43434062600135803,
      "learning_rate": 1.0503218834630312e-07,
      "loss": 0.0076,
      "step": 3023380
    },
    {
      "epoch": 4.947860411225231,
      "grad_norm": 0.08111132681369781,
      "learning_rate": 1.04966296132786e-07,
      "loss": 0.0061,
      "step": 3023400
    },
    {
      "epoch": 4.947893141663885,
      "grad_norm": 0.12320119142532349,
      "learning_rate": 1.0490040391926887e-07,
      "loss": 0.0053,
      "step": 3023420
    },
    {
      "epoch": 4.947925872102537,
      "grad_norm": 0.3900890052318573,
      "learning_rate": 1.0483451170575174e-07,
      "loss": 0.0097,
      "step": 3023440
    },
    {
      "epoch": 4.947958602541191,
      "grad_norm": 0.2758760452270508,
      "learning_rate": 1.0476861949223461e-07,
      "loss": 0.0118,
      "step": 3023460
    },
    {
      "epoch": 4.947991332979845,
      "grad_norm": 0.11646132171154022,
      "learning_rate": 1.0470272727871749e-07,
      "loss": 0.0099,
      "step": 3023480
    },
    {
      "epoch": 4.948024063418498,
      "grad_norm": 0.1931479424238205,
      "learning_rate": 1.0463683506520034e-07,
      "loss": 0.0117,
      "step": 3023500
    },
    {
      "epoch": 4.948056793857151,
      "grad_norm": 0.09420191496610641,
      "learning_rate": 1.0457094285168322e-07,
      "loss": 0.0107,
      "step": 3023520
    },
    {
      "epoch": 4.9480895242958045,
      "grad_norm": 0.12714660167694092,
      "learning_rate": 1.045050506381661e-07,
      "loss": 0.0067,
      "step": 3023540
    },
    {
      "epoch": 4.948122254734458,
      "grad_norm": 0.16699115931987762,
      "learning_rate": 1.0443915842464897e-07,
      "loss": 0.0091,
      "step": 3023560
    },
    {
      "epoch": 4.948154985173112,
      "grad_norm": 0.08191874623298645,
      "learning_rate": 1.0437326621113185e-07,
      "loss": 0.0104,
      "step": 3023580
    },
    {
      "epoch": 4.948187715611764,
      "grad_norm": 0.18110720813274384,
      "learning_rate": 1.043073739976147e-07,
      "loss": 0.0066,
      "step": 3023600
    },
    {
      "epoch": 4.948220446050418,
      "grad_norm": 0.3071032166481018,
      "learning_rate": 1.0424148178409758e-07,
      "loss": 0.0074,
      "step": 3023620
    },
    {
      "epoch": 4.948253176489072,
      "grad_norm": 0.42247411608695984,
      "learning_rate": 1.0417558957058045e-07,
      "loss": 0.007,
      "step": 3023640
    },
    {
      "epoch": 4.948285906927724,
      "grad_norm": 0.06676317751407623,
      "learning_rate": 1.0410969735706332e-07,
      "loss": 0.0083,
      "step": 3023660
    },
    {
      "epoch": 4.948318637366378,
      "grad_norm": 0.24697245657444,
      "learning_rate": 1.040438051435462e-07,
      "loss": 0.0111,
      "step": 3023680
    },
    {
      "epoch": 4.9483513678050315,
      "grad_norm": 0.2432565987110138,
      "learning_rate": 1.0397791293002908e-07,
      "loss": 0.0046,
      "step": 3023700
    },
    {
      "epoch": 4.948384098243684,
      "grad_norm": 0.23093439638614655,
      "learning_rate": 1.0391202071651194e-07,
      "loss": 0.0084,
      "step": 3023720
    },
    {
      "epoch": 4.948416828682338,
      "grad_norm": 0.11827093362808228,
      "learning_rate": 1.0384612850299481e-07,
      "loss": 0.0099,
      "step": 3023740
    },
    {
      "epoch": 4.948449559120991,
      "grad_norm": 0.1478128284215927,
      "learning_rate": 1.0378023628947768e-07,
      "loss": 0.0096,
      "step": 3023760
    },
    {
      "epoch": 4.948482289559645,
      "grad_norm": 0.04628349840641022,
      "learning_rate": 1.0371434407596055e-07,
      "loss": 0.0059,
      "step": 3023780
    },
    {
      "epoch": 4.948515019998298,
      "grad_norm": 0.13195116817951202,
      "learning_rate": 1.0364845186244343e-07,
      "loss": 0.009,
      "step": 3023800
    },
    {
      "epoch": 4.948547750436951,
      "grad_norm": 0.6713454127311707,
      "learning_rate": 1.0358255964892628e-07,
      "loss": 0.0129,
      "step": 3023820
    },
    {
      "epoch": 4.948580480875605,
      "grad_norm": 0.1934041827917099,
      "learning_rate": 1.0351666743540916e-07,
      "loss": 0.014,
      "step": 3023840
    },
    {
      "epoch": 4.9486132113142585,
      "grad_norm": 0.28013160824775696,
      "learning_rate": 1.0345077522189204e-07,
      "loss": 0.008,
      "step": 3023860
    },
    {
      "epoch": 4.948645941752911,
      "grad_norm": 0.20961430668830872,
      "learning_rate": 1.0338488300837491e-07,
      "loss": 0.0061,
      "step": 3023880
    },
    {
      "epoch": 4.948678672191565,
      "grad_norm": 0.12449696660041809,
      "learning_rate": 1.0331899079485779e-07,
      "loss": 0.0079,
      "step": 3023900
    },
    {
      "epoch": 4.948711402630218,
      "grad_norm": 0.26241907477378845,
      "learning_rate": 1.0325309858134066e-07,
      "loss": 0.0084,
      "step": 3023920
    },
    {
      "epoch": 4.948744133068871,
      "grad_norm": 0.8292502164840698,
      "learning_rate": 1.0318720636782352e-07,
      "loss": 0.0116,
      "step": 3023940
    },
    {
      "epoch": 4.948776863507525,
      "grad_norm": 0.17010609805583954,
      "learning_rate": 1.0312131415430639e-07,
      "loss": 0.0099,
      "step": 3023960
    },
    {
      "epoch": 4.948809593946178,
      "grad_norm": 0.7305743098258972,
      "learning_rate": 1.0305542194078926e-07,
      "loss": 0.0093,
      "step": 3023980
    },
    {
      "epoch": 4.948842324384831,
      "grad_norm": 0.20726744830608368,
      "learning_rate": 1.0298952972727213e-07,
      "loss": 0.0071,
      "step": 3024000
    },
    {
      "epoch": 4.9488750548234846,
      "grad_norm": 0.11819738894701004,
      "learning_rate": 1.0292363751375502e-07,
      "loss": 0.0103,
      "step": 3024020
    },
    {
      "epoch": 4.948907785262138,
      "grad_norm": 0.30812788009643555,
      "learning_rate": 1.0285774530023788e-07,
      "loss": 0.0062,
      "step": 3024040
    },
    {
      "epoch": 4.948940515700792,
      "grad_norm": 0.1350553184747696,
      "learning_rate": 1.0279185308672075e-07,
      "loss": 0.0065,
      "step": 3024060
    },
    {
      "epoch": 4.9489732461394444,
      "grad_norm": 0.08282439410686493,
      "learning_rate": 1.0272596087320362e-07,
      "loss": 0.0074,
      "step": 3024080
    },
    {
      "epoch": 4.949005976578098,
      "grad_norm": 0.0582948736846447,
      "learning_rate": 1.026600686596865e-07,
      "loss": 0.0083,
      "step": 3024100
    },
    {
      "epoch": 4.949038707016752,
      "grad_norm": 0.13709454238414764,
      "learning_rate": 1.0259417644616937e-07,
      "loss": 0.0138,
      "step": 3024120
    },
    {
      "epoch": 4.949071437455405,
      "grad_norm": 0.1663469821214676,
      "learning_rate": 1.0252828423265224e-07,
      "loss": 0.0092,
      "step": 3024140
    },
    {
      "epoch": 4.949104167894058,
      "grad_norm": 0.04345402121543884,
      "learning_rate": 1.024623920191351e-07,
      "loss": 0.0113,
      "step": 3024160
    },
    {
      "epoch": 4.9491368983327115,
      "grad_norm": 0.10462066531181335,
      "learning_rate": 1.0239649980561797e-07,
      "loss": 0.0097,
      "step": 3024180
    },
    {
      "epoch": 4.949169628771365,
      "grad_norm": 0.3627048134803772,
      "learning_rate": 1.0233060759210085e-07,
      "loss": 0.0087,
      "step": 3024200
    },
    {
      "epoch": 4.949202359210018,
      "grad_norm": 0.11981577426195145,
      "learning_rate": 1.0226471537858373e-07,
      "loss": 0.0091,
      "step": 3024220
    },
    {
      "epoch": 4.949235089648671,
      "grad_norm": 0.08938875049352646,
      "learning_rate": 1.021988231650666e-07,
      "loss": 0.0083,
      "step": 3024240
    },
    {
      "epoch": 4.949267820087325,
      "grad_norm": 0.36822405457496643,
      "learning_rate": 1.0213293095154946e-07,
      "loss": 0.0089,
      "step": 3024260
    },
    {
      "epoch": 4.949300550525978,
      "grad_norm": 0.30225664377212524,
      "learning_rate": 1.0206703873803233e-07,
      "loss": 0.0091,
      "step": 3024280
    },
    {
      "epoch": 4.949333280964631,
      "grad_norm": 0.14178849756717682,
      "learning_rate": 1.020011465245152e-07,
      "loss": 0.0062,
      "step": 3024300
    },
    {
      "epoch": 4.949366011403285,
      "grad_norm": 0.14984816312789917,
      "learning_rate": 1.0193525431099807e-07,
      "loss": 0.008,
      "step": 3024320
    },
    {
      "epoch": 4.9493987418419385,
      "grad_norm": 0.5121399164199829,
      "learning_rate": 1.0186936209748096e-07,
      "loss": 0.007,
      "step": 3024340
    },
    {
      "epoch": 4.949431472280591,
      "grad_norm": 0.25266149640083313,
      "learning_rate": 1.0180346988396383e-07,
      "loss": 0.0105,
      "step": 3024360
    },
    {
      "epoch": 4.949464202719245,
      "grad_norm": 0.15761348605155945,
      "learning_rate": 1.0173757767044669e-07,
      "loss": 0.0077,
      "step": 3024380
    },
    {
      "epoch": 4.949496933157898,
      "grad_norm": 0.3827621638774872,
      "learning_rate": 1.0167168545692956e-07,
      "loss": 0.0062,
      "step": 3024400
    },
    {
      "epoch": 4.949529663596552,
      "grad_norm": 0.624075710773468,
      "learning_rate": 1.0160579324341243e-07,
      "loss": 0.0106,
      "step": 3024420
    },
    {
      "epoch": 4.949562394035205,
      "grad_norm": 0.18302340805530548,
      "learning_rate": 1.015399010298953e-07,
      "loss": 0.0079,
      "step": 3024440
    },
    {
      "epoch": 4.949595124473858,
      "grad_norm": 0.20209373533725739,
      "learning_rate": 1.0147400881637818e-07,
      "loss": 0.0064,
      "step": 3024460
    },
    {
      "epoch": 4.949627854912512,
      "grad_norm": 0.07387012988328934,
      "learning_rate": 1.0140811660286104e-07,
      "loss": 0.007,
      "step": 3024480
    },
    {
      "epoch": 4.949660585351165,
      "grad_norm": 0.07018167525529861,
      "learning_rate": 1.0134222438934391e-07,
      "loss": 0.009,
      "step": 3024500
    },
    {
      "epoch": 4.949693315789818,
      "grad_norm": 0.09000611305236816,
      "learning_rate": 1.012763321758268e-07,
      "loss": 0.0061,
      "step": 3024520
    },
    {
      "epoch": 4.949726046228472,
      "grad_norm": 0.2626665234565735,
      "learning_rate": 1.0121043996230967e-07,
      "loss": 0.01,
      "step": 3024540
    },
    {
      "epoch": 4.9497587766671245,
      "grad_norm": 0.05982331931591034,
      "learning_rate": 1.0114454774879254e-07,
      "loss": 0.0121,
      "step": 3024560
    },
    {
      "epoch": 4.949791507105778,
      "grad_norm": 0.14088058471679688,
      "learning_rate": 1.0107865553527541e-07,
      "loss": 0.0076,
      "step": 3024580
    },
    {
      "epoch": 4.949824237544432,
      "grad_norm": 0.23862501978874207,
      "learning_rate": 1.0101276332175827e-07,
      "loss": 0.0068,
      "step": 3024600
    },
    {
      "epoch": 4.949856967983085,
      "grad_norm": 0.2693299949169159,
      "learning_rate": 1.0094687110824114e-07,
      "loss": 0.0089,
      "step": 3024620
    },
    {
      "epoch": 4.949889698421738,
      "grad_norm": 0.28808748722076416,
      "learning_rate": 1.0088097889472401e-07,
      "loss": 0.008,
      "step": 3024640
    },
    {
      "epoch": 4.949922428860392,
      "grad_norm": 0.36277180910110474,
      "learning_rate": 1.0081508668120689e-07,
      "loss": 0.008,
      "step": 3024660
    },
    {
      "epoch": 4.949955159299045,
      "grad_norm": 0.24646145105361938,
      "learning_rate": 1.0074919446768977e-07,
      "loss": 0.0133,
      "step": 3024680
    },
    {
      "epoch": 4.949987889737698,
      "grad_norm": 0.55218905210495,
      "learning_rate": 1.0068330225417263e-07,
      "loss": 0.0083,
      "step": 3024700
    },
    {
      "epoch": 4.9500206201763515,
      "grad_norm": 0.20254212617874146,
      "learning_rate": 1.006174100406555e-07,
      "loss": 0.0095,
      "step": 3024720
    },
    {
      "epoch": 4.950053350615005,
      "grad_norm": 0.07693873345851898,
      "learning_rate": 1.0055151782713837e-07,
      "loss": 0.0081,
      "step": 3024740
    },
    {
      "epoch": 4.950086081053659,
      "grad_norm": 0.44138601422309875,
      "learning_rate": 1.0048562561362125e-07,
      "loss": 0.0133,
      "step": 3024760
    },
    {
      "epoch": 4.950118811492311,
      "grad_norm": 0.2450859546661377,
      "learning_rate": 1.0041973340010412e-07,
      "loss": 0.0075,
      "step": 3024780
    },
    {
      "epoch": 4.950151541930965,
      "grad_norm": 0.116053007543087,
      "learning_rate": 1.0035384118658699e-07,
      "loss": 0.0069,
      "step": 3024800
    },
    {
      "epoch": 4.9501842723696186,
      "grad_norm": 0.07379913330078125,
      "learning_rate": 1.0028794897306985e-07,
      "loss": 0.0119,
      "step": 3024820
    },
    {
      "epoch": 4.950217002808271,
      "grad_norm": 0.41347360610961914,
      "learning_rate": 1.0022205675955272e-07,
      "loss": 0.0087,
      "step": 3024840
    },
    {
      "epoch": 4.950249733246925,
      "grad_norm": 0.2880818247795105,
      "learning_rate": 1.0015616454603561e-07,
      "loss": 0.0064,
      "step": 3024860
    },
    {
      "epoch": 4.9502824636855784,
      "grad_norm": 0.2629128694534302,
      "learning_rate": 1.0009027233251848e-07,
      "loss": 0.0104,
      "step": 3024880
    },
    {
      "epoch": 4.950315194124231,
      "grad_norm": 0.30993083119392395,
      "learning_rate": 1.0002438011900135e-07,
      "loss": 0.0116,
      "step": 3024900
    },
    {
      "epoch": 4.950347924562885,
      "grad_norm": 0.42166396975517273,
      "learning_rate": 9.995848790548422e-08,
      "loss": 0.0074,
      "step": 3024920
    },
    {
      "epoch": 4.950380655001538,
      "grad_norm": 0.3612227737903595,
      "learning_rate": 9.989259569196708e-08,
      "loss": 0.0118,
      "step": 3024940
    },
    {
      "epoch": 4.950413385440192,
      "grad_norm": 0.21782036125659943,
      "learning_rate": 9.982670347844995e-08,
      "loss": 0.0101,
      "step": 3024960
    },
    {
      "epoch": 4.950446115878845,
      "grad_norm": 0.07381843030452728,
      "learning_rate": 9.976081126493283e-08,
      "loss": 0.0121,
      "step": 3024980
    },
    {
      "epoch": 4.950478846317498,
      "grad_norm": 0.0874895304441452,
      "learning_rate": 9.969491905141571e-08,
      "loss": 0.0083,
      "step": 3025000
    },
    {
      "epoch": 4.950511576756152,
      "grad_norm": 0.26257455348968506,
      "learning_rate": 9.962902683789858e-08,
      "loss": 0.0085,
      "step": 3025020
    },
    {
      "epoch": 4.950544307194805,
      "grad_norm": 0.18461723625659943,
      "learning_rate": 9.956313462438144e-08,
      "loss": 0.0087,
      "step": 3025040
    },
    {
      "epoch": 4.950577037633458,
      "grad_norm": 0.27819159626960754,
      "learning_rate": 9.949724241086431e-08,
      "loss": 0.0077,
      "step": 3025060
    },
    {
      "epoch": 4.950609768072112,
      "grad_norm": 0.26393455266952515,
      "learning_rate": 9.943135019734719e-08,
      "loss": 0.0095,
      "step": 3025080
    },
    {
      "epoch": 4.950642498510765,
      "grad_norm": 0.27981555461883545,
      "learning_rate": 9.936545798383006e-08,
      "loss": 0.011,
      "step": 3025100
    },
    {
      "epoch": 4.950675228949418,
      "grad_norm": 0.6395490765571594,
      "learning_rate": 9.929956577031293e-08,
      "loss": 0.0093,
      "step": 3025120
    },
    {
      "epoch": 4.950707959388072,
      "grad_norm": 0.049840085208415985,
      "learning_rate": 9.923367355679582e-08,
      "loss": 0.0068,
      "step": 3025140
    },
    {
      "epoch": 4.950740689826725,
      "grad_norm": 0.1085650846362114,
      "learning_rate": 9.916778134327866e-08,
      "loss": 0.0085,
      "step": 3025160
    },
    {
      "epoch": 4.950773420265378,
      "grad_norm": 0.07312735170125961,
      "learning_rate": 9.910188912976155e-08,
      "loss": 0.0078,
      "step": 3025180
    },
    {
      "epoch": 4.9508061507040315,
      "grad_norm": 0.08898832648992538,
      "learning_rate": 9.903599691624442e-08,
      "loss": 0.0114,
      "step": 3025200
    },
    {
      "epoch": 4.950838881142685,
      "grad_norm": 0.2647188603878021,
      "learning_rate": 9.897010470272729e-08,
      "loss": 0.0064,
      "step": 3025220
    },
    {
      "epoch": 4.950871611581339,
      "grad_norm": 0.3689456880092621,
      "learning_rate": 9.890421248921016e-08,
      "loss": 0.0083,
      "step": 3025240
    },
    {
      "epoch": 4.950904342019991,
      "grad_norm": 0.08195456862449646,
      "learning_rate": 9.883832027569302e-08,
      "loss": 0.0119,
      "step": 3025260
    },
    {
      "epoch": 4.950937072458645,
      "grad_norm": 0.4950374364852905,
      "learning_rate": 9.87724280621759e-08,
      "loss": 0.0101,
      "step": 3025280
    },
    {
      "epoch": 4.950969802897299,
      "grad_norm": 0.2371930032968521,
      "learning_rate": 9.870653584865877e-08,
      "loss": 0.0072,
      "step": 3025300
    },
    {
      "epoch": 4.951002533335952,
      "grad_norm": 0.5540652871131897,
      "learning_rate": 9.864064363514165e-08,
      "loss": 0.0058,
      "step": 3025320
    },
    {
      "epoch": 4.951035263774605,
      "grad_norm": 0.12541644275188446,
      "learning_rate": 9.857475142162452e-08,
      "loss": 0.0065,
      "step": 3025340
    },
    {
      "epoch": 4.9510679942132585,
      "grad_norm": 0.1753178983926773,
      "learning_rate": 9.85088592081074e-08,
      "loss": 0.0097,
      "step": 3025360
    },
    {
      "epoch": 4.951100724651912,
      "grad_norm": 0.3687335252761841,
      "learning_rate": 9.844296699459025e-08,
      "loss": 0.0107,
      "step": 3025380
    },
    {
      "epoch": 4.951133455090565,
      "grad_norm": 0.3974122405052185,
      "learning_rate": 9.837707478107313e-08,
      "loss": 0.0081,
      "step": 3025400
    },
    {
      "epoch": 4.951166185529218,
      "grad_norm": 0.115725576877594,
      "learning_rate": 9.8311182567556e-08,
      "loss": 0.0107,
      "step": 3025420
    },
    {
      "epoch": 4.951198915967872,
      "grad_norm": 0.2067299783229828,
      "learning_rate": 9.824529035403887e-08,
      "loss": 0.0148,
      "step": 3025440
    },
    {
      "epoch": 4.951231646406525,
      "grad_norm": 0.16602666676044464,
      "learning_rate": 9.817939814052174e-08,
      "loss": 0.0104,
      "step": 3025460
    },
    {
      "epoch": 4.951264376845178,
      "grad_norm": 0.02127201296389103,
      "learning_rate": 9.81135059270046e-08,
      "loss": 0.0088,
      "step": 3025480
    },
    {
      "epoch": 4.951297107283832,
      "grad_norm": 0.15719854831695557,
      "learning_rate": 9.804761371348747e-08,
      "loss": 0.0079,
      "step": 3025500
    },
    {
      "epoch": 4.9513298377224855,
      "grad_norm": 0.29837852716445923,
      "learning_rate": 9.798172149997036e-08,
      "loss": 0.009,
      "step": 3025520
    },
    {
      "epoch": 4.951362568161138,
      "grad_norm": 0.32980257272720337,
      "learning_rate": 9.791582928645323e-08,
      "loss": 0.0107,
      "step": 3025540
    },
    {
      "epoch": 4.951395298599792,
      "grad_norm": 0.2477579414844513,
      "learning_rate": 9.78499370729361e-08,
      "loss": 0.0076,
      "step": 3025560
    },
    {
      "epoch": 4.951428029038445,
      "grad_norm": 0.3497005105018616,
      "learning_rate": 9.778404485941898e-08,
      "loss": 0.0085,
      "step": 3025580
    },
    {
      "epoch": 4.951460759477099,
      "grad_norm": 0.35501208901405334,
      "learning_rate": 9.771815264590183e-08,
      "loss": 0.0113,
      "step": 3025600
    },
    {
      "epoch": 4.951493489915752,
      "grad_norm": 0.10406246781349182,
      "learning_rate": 9.765226043238471e-08,
      "loss": 0.0096,
      "step": 3025620
    },
    {
      "epoch": 4.951526220354405,
      "grad_norm": 0.19709856808185577,
      "learning_rate": 9.758636821886758e-08,
      "loss": 0.0073,
      "step": 3025640
    },
    {
      "epoch": 4.951558950793059,
      "grad_norm": 0.336578369140625,
      "learning_rate": 9.752047600535046e-08,
      "loss": 0.0058,
      "step": 3025660
    },
    {
      "epoch": 4.951591681231712,
      "grad_norm": 0.14341410994529724,
      "learning_rate": 9.745458379183334e-08,
      "loss": 0.0065,
      "step": 3025680
    },
    {
      "epoch": 4.951624411670365,
      "grad_norm": 0.07310325652360916,
      "learning_rate": 9.73886915783162e-08,
      "loss": 0.0088,
      "step": 3025700
    },
    {
      "epoch": 4.951657142109019,
      "grad_norm": 0.1801261454820633,
      "learning_rate": 9.732279936479907e-08,
      "loss": 0.0069,
      "step": 3025720
    },
    {
      "epoch": 4.9516898725476715,
      "grad_norm": 0.19029419124126434,
      "learning_rate": 9.725690715128194e-08,
      "loss": 0.0049,
      "step": 3025740
    },
    {
      "epoch": 4.951722602986325,
      "grad_norm": 0.1374468356370926,
      "learning_rate": 9.719101493776481e-08,
      "loss": 0.0065,
      "step": 3025760
    },
    {
      "epoch": 4.951755333424979,
      "grad_norm": 0.18563443422317505,
      "learning_rate": 9.712512272424768e-08,
      "loss": 0.0054,
      "step": 3025780
    },
    {
      "epoch": 4.951788063863632,
      "grad_norm": 0.12112051248550415,
      "learning_rate": 9.705923051073057e-08,
      "loss": 0.0099,
      "step": 3025800
    },
    {
      "epoch": 4.951820794302285,
      "grad_norm": 0.36382895708084106,
      "learning_rate": 9.699333829721341e-08,
      "loss": 0.0069,
      "step": 3025820
    },
    {
      "epoch": 4.9518535247409385,
      "grad_norm": 0.22309522330760956,
      "learning_rate": 9.69274460836963e-08,
      "loss": 0.0101,
      "step": 3025840
    },
    {
      "epoch": 4.951886255179592,
      "grad_norm": 0.24831755459308624,
      "learning_rate": 9.686155387017917e-08,
      "loss": 0.0108,
      "step": 3025860
    },
    {
      "epoch": 4.951918985618246,
      "grad_norm": 0.09259375184774399,
      "learning_rate": 9.679566165666204e-08,
      "loss": 0.0068,
      "step": 3025880
    },
    {
      "epoch": 4.951951716056898,
      "grad_norm": 0.2756533920764923,
      "learning_rate": 9.672976944314492e-08,
      "loss": 0.0108,
      "step": 3025900
    },
    {
      "epoch": 4.951984446495552,
      "grad_norm": 0.18767675757408142,
      "learning_rate": 9.666387722962777e-08,
      "loss": 0.0075,
      "step": 3025920
    },
    {
      "epoch": 4.952017176934206,
      "grad_norm": 0.31978607177734375,
      "learning_rate": 9.659798501611065e-08,
      "loss": 0.0068,
      "step": 3025940
    },
    {
      "epoch": 4.952049907372858,
      "grad_norm": 0.12991081178188324,
      "learning_rate": 9.653209280259352e-08,
      "loss": 0.008,
      "step": 3025960
    },
    {
      "epoch": 4.952082637811512,
      "grad_norm": 0.1347893625497818,
      "learning_rate": 9.64662005890764e-08,
      "loss": 0.0108,
      "step": 3025980
    },
    {
      "epoch": 4.9521153682501655,
      "grad_norm": 0.6900700926780701,
      "learning_rate": 9.640030837555928e-08,
      "loss": 0.012,
      "step": 3026000
    },
    {
      "epoch": 4.952148098688818,
      "grad_norm": 0.028205065056681633,
      "learning_rate": 9.633441616204215e-08,
      "loss": 0.0081,
      "step": 3026020
    },
    {
      "epoch": 4.952180829127472,
      "grad_norm": 0.25711581110954285,
      "learning_rate": 9.626852394852501e-08,
      "loss": 0.0088,
      "step": 3026040
    },
    {
      "epoch": 4.952213559566125,
      "grad_norm": 0.15696930885314941,
      "learning_rate": 9.620263173500788e-08,
      "loss": 0.0076,
      "step": 3026060
    },
    {
      "epoch": 4.952246290004779,
      "grad_norm": 0.08140293508768082,
      "learning_rate": 9.613673952149075e-08,
      "loss": 0.0118,
      "step": 3026080
    },
    {
      "epoch": 4.952279020443432,
      "grad_norm": 0.26920047402381897,
      "learning_rate": 9.607084730797362e-08,
      "loss": 0.0084,
      "step": 3026100
    },
    {
      "epoch": 4.952311750882085,
      "grad_norm": 0.29638510942459106,
      "learning_rate": 9.60049550944565e-08,
      "loss": 0.0108,
      "step": 3026120
    },
    {
      "epoch": 4.952344481320739,
      "grad_norm": 0.21757028996944427,
      "learning_rate": 9.593906288093935e-08,
      "loss": 0.0103,
      "step": 3026140
    },
    {
      "epoch": 4.9523772117593925,
      "grad_norm": 0.07600332796573639,
      "learning_rate": 9.587317066742224e-08,
      "loss": 0.0081,
      "step": 3026160
    },
    {
      "epoch": 4.952409942198045,
      "grad_norm": 0.26918500661849976,
      "learning_rate": 9.580727845390511e-08,
      "loss": 0.0073,
      "step": 3026180
    },
    {
      "epoch": 4.952442672636699,
      "grad_norm": 0.3076312243938446,
      "learning_rate": 9.574138624038798e-08,
      "loss": 0.0106,
      "step": 3026200
    },
    {
      "epoch": 4.952475403075352,
      "grad_norm": 0.3832632899284363,
      "learning_rate": 9.567549402687086e-08,
      "loss": 0.0056,
      "step": 3026220
    },
    {
      "epoch": 4.952508133514005,
      "grad_norm": 0.4232690632343292,
      "learning_rate": 9.560960181335373e-08,
      "loss": 0.0068,
      "step": 3026240
    },
    {
      "epoch": 4.952540863952659,
      "grad_norm": 0.20273204147815704,
      "learning_rate": 9.554370959983659e-08,
      "loss": 0.0066,
      "step": 3026260
    },
    {
      "epoch": 4.952573594391312,
      "grad_norm": 0.06752309948205948,
      "learning_rate": 9.547781738631946e-08,
      "loss": 0.0075,
      "step": 3026280
    },
    {
      "epoch": 4.952606324829965,
      "grad_norm": 0.2516675293445587,
      "learning_rate": 9.541192517280233e-08,
      "loss": 0.01,
      "step": 3026300
    },
    {
      "epoch": 4.952639055268619,
      "grad_norm": 0.2993113398551941,
      "learning_rate": 9.534603295928522e-08,
      "loss": 0.007,
      "step": 3026320
    },
    {
      "epoch": 4.952671785707272,
      "grad_norm": 0.05603186413645744,
      "learning_rate": 9.528014074576809e-08,
      "loss": 0.0104,
      "step": 3026340
    },
    {
      "epoch": 4.952704516145926,
      "grad_norm": 0.11870402842760086,
      "learning_rate": 9.521424853225095e-08,
      "loss": 0.0128,
      "step": 3026360
    },
    {
      "epoch": 4.9527372465845785,
      "grad_norm": 0.14112329483032227,
      "learning_rate": 9.514835631873382e-08,
      "loss": 0.0104,
      "step": 3026380
    },
    {
      "epoch": 4.952769977023232,
      "grad_norm": 0.40955641865730286,
      "learning_rate": 9.508246410521669e-08,
      "loss": 0.0105,
      "step": 3026400
    },
    {
      "epoch": 4.952802707461886,
      "grad_norm": 0.28631874918937683,
      "learning_rate": 9.501657189169956e-08,
      "loss": 0.0071,
      "step": 3026420
    },
    {
      "epoch": 4.952835437900538,
      "grad_norm": 0.1418038308620453,
      "learning_rate": 9.495067967818244e-08,
      "loss": 0.0055,
      "step": 3026440
    },
    {
      "epoch": 4.952868168339192,
      "grad_norm": 0.10459451377391815,
      "learning_rate": 9.488478746466532e-08,
      "loss": 0.0101,
      "step": 3026460
    },
    {
      "epoch": 4.952900898777846,
      "grad_norm": 0.1335335522890091,
      "learning_rate": 9.481889525114817e-08,
      "loss": 0.0105,
      "step": 3026480
    },
    {
      "epoch": 4.952933629216499,
      "grad_norm": 0.15645967423915863,
      "learning_rate": 9.475300303763105e-08,
      "loss": 0.0096,
      "step": 3026500
    },
    {
      "epoch": 4.952966359655152,
      "grad_norm": 0.33503225445747375,
      "learning_rate": 9.468711082411392e-08,
      "loss": 0.0105,
      "step": 3026520
    },
    {
      "epoch": 4.9529990900938055,
      "grad_norm": 0.1730666607618332,
      "learning_rate": 9.46212186105968e-08,
      "loss": 0.0075,
      "step": 3026540
    },
    {
      "epoch": 4.953031820532459,
      "grad_norm": 0.1326158046722412,
      "learning_rate": 9.455532639707967e-08,
      "loss": 0.0085,
      "step": 3026560
    },
    {
      "epoch": 4.953064550971112,
      "grad_norm": 0.14994901418685913,
      "learning_rate": 9.448943418356253e-08,
      "loss": 0.0089,
      "step": 3026580
    },
    {
      "epoch": 4.953097281409765,
      "grad_norm": 0.25281527638435364,
      "learning_rate": 9.44235419700454e-08,
      "loss": 0.0111,
      "step": 3026600
    },
    {
      "epoch": 4.953130011848419,
      "grad_norm": 0.17277200520038605,
      "learning_rate": 9.435764975652827e-08,
      "loss": 0.0129,
      "step": 3026620
    },
    {
      "epoch": 4.953162742287072,
      "grad_norm": 0.17141102254390717,
      "learning_rate": 9.429175754301116e-08,
      "loss": 0.0085,
      "step": 3026640
    },
    {
      "epoch": 4.953195472725725,
      "grad_norm": 0.0943988636136055,
      "learning_rate": 9.422586532949403e-08,
      "loss": 0.0081,
      "step": 3026660
    },
    {
      "epoch": 4.953228203164379,
      "grad_norm": 0.12459678947925568,
      "learning_rate": 9.41599731159769e-08,
      "loss": 0.0125,
      "step": 3026680
    },
    {
      "epoch": 4.953260933603032,
      "grad_norm": 0.2813187837600708,
      "learning_rate": 9.409408090245976e-08,
      "loss": 0.0095,
      "step": 3026700
    },
    {
      "epoch": 4.953293664041685,
      "grad_norm": 0.15203452110290527,
      "learning_rate": 9.402818868894263e-08,
      "loss": 0.0095,
      "step": 3026720
    },
    {
      "epoch": 4.953326394480339,
      "grad_norm": 0.18390220403671265,
      "learning_rate": 9.39622964754255e-08,
      "loss": 0.0056,
      "step": 3026740
    },
    {
      "epoch": 4.953359124918992,
      "grad_norm": 0.34327390789985657,
      "learning_rate": 9.389640426190838e-08,
      "loss": 0.012,
      "step": 3026760
    },
    {
      "epoch": 4.953391855357646,
      "grad_norm": 0.2514878511428833,
      "learning_rate": 9.383051204839125e-08,
      "loss": 0.0084,
      "step": 3026780
    },
    {
      "epoch": 4.953424585796299,
      "grad_norm": 0.42283353209495544,
      "learning_rate": 9.376461983487411e-08,
      "loss": 0.0119,
      "step": 3026800
    },
    {
      "epoch": 4.953457316234952,
      "grad_norm": 0.18254487216472626,
      "learning_rate": 9.369872762135699e-08,
      "loss": 0.0063,
      "step": 3026820
    },
    {
      "epoch": 4.953490046673606,
      "grad_norm": 0.1638326495885849,
      "learning_rate": 9.363283540783986e-08,
      "loss": 0.0083,
      "step": 3026840
    },
    {
      "epoch": 4.9535227771122585,
      "grad_norm": 1.0561959743499756,
      "learning_rate": 9.356694319432274e-08,
      "loss": 0.0139,
      "step": 3026860
    },
    {
      "epoch": 4.953555507550912,
      "grad_norm": 0.36820513010025024,
      "learning_rate": 9.350105098080561e-08,
      "loss": 0.007,
      "step": 3026880
    },
    {
      "epoch": 4.953588237989566,
      "grad_norm": 0.11905122548341751,
      "learning_rate": 9.343515876728848e-08,
      "loss": 0.0065,
      "step": 3026900
    },
    {
      "epoch": 4.953620968428218,
      "grad_norm": 0.18015748262405396,
      "learning_rate": 9.336926655377134e-08,
      "loss": 0.0098,
      "step": 3026920
    },
    {
      "epoch": 4.953653698866872,
      "grad_norm": 0.36548537015914917,
      "learning_rate": 9.330337434025421e-08,
      "loss": 0.0091,
      "step": 3026940
    },
    {
      "epoch": 4.953686429305526,
      "grad_norm": 0.18292014300823212,
      "learning_rate": 9.323748212673708e-08,
      "loss": 0.0068,
      "step": 3026960
    },
    {
      "epoch": 4.953719159744179,
      "grad_norm": 0.17967692017555237,
      "learning_rate": 9.317158991321997e-08,
      "loss": 0.0065,
      "step": 3026980
    },
    {
      "epoch": 4.953751890182832,
      "grad_norm": 0.13081631064414978,
      "learning_rate": 9.310569769970284e-08,
      "loss": 0.0092,
      "step": 3027000
    },
    {
      "epoch": 4.9537846206214855,
      "grad_norm": 0.1505996733903885,
      "learning_rate": 9.303980548618571e-08,
      "loss": 0.0082,
      "step": 3027020
    },
    {
      "epoch": 4.953817351060139,
      "grad_norm": 0.24906407296657562,
      "learning_rate": 9.297391327266857e-08,
      "loss": 0.0119,
      "step": 3027040
    },
    {
      "epoch": 4.953850081498793,
      "grad_norm": 0.30398425459861755,
      "learning_rate": 9.290802105915144e-08,
      "loss": 0.0107,
      "step": 3027060
    },
    {
      "epoch": 4.953882811937445,
      "grad_norm": 0.14968545734882355,
      "learning_rate": 9.284212884563432e-08,
      "loss": 0.0114,
      "step": 3027080
    },
    {
      "epoch": 4.953915542376099,
      "grad_norm": 0.13294482231140137,
      "learning_rate": 9.277623663211719e-08,
      "loss": 0.0094,
      "step": 3027100
    },
    {
      "epoch": 4.953948272814753,
      "grad_norm": 0.7963147759437561,
      "learning_rate": 9.271034441860007e-08,
      "loss": 0.0124,
      "step": 3027120
    },
    {
      "epoch": 4.953981003253405,
      "grad_norm": 0.4117301404476166,
      "learning_rate": 9.264445220508292e-08,
      "loss": 0.0114,
      "step": 3027140
    },
    {
      "epoch": 4.954013733692059,
      "grad_norm": 0.2754729390144348,
      "learning_rate": 9.25785599915658e-08,
      "loss": 0.0085,
      "step": 3027160
    },
    {
      "epoch": 4.9540464641307125,
      "grad_norm": 0.13267134130001068,
      "learning_rate": 9.251266777804868e-08,
      "loss": 0.0076,
      "step": 3027180
    },
    {
      "epoch": 4.954079194569365,
      "grad_norm": 0.24987027049064636,
      "learning_rate": 9.244677556453155e-08,
      "loss": 0.0064,
      "step": 3027200
    },
    {
      "epoch": 4.954111925008019,
      "grad_norm": 0.09918911755084991,
      "learning_rate": 9.238088335101442e-08,
      "loss": 0.0092,
      "step": 3027220
    },
    {
      "epoch": 4.954144655446672,
      "grad_norm": 0.43919068574905396,
      "learning_rate": 9.23149911374973e-08,
      "loss": 0.0096,
      "step": 3027240
    },
    {
      "epoch": 4.954177385885326,
      "grad_norm": 0.5404871106147766,
      "learning_rate": 9.224909892398015e-08,
      "loss": 0.0098,
      "step": 3027260
    },
    {
      "epoch": 4.954210116323979,
      "grad_norm": 0.11881574988365173,
      "learning_rate": 9.218320671046302e-08,
      "loss": 0.0073,
      "step": 3027280
    },
    {
      "epoch": 4.954242846762632,
      "grad_norm": 0.15067920088768005,
      "learning_rate": 9.211731449694591e-08,
      "loss": 0.0077,
      "step": 3027300
    },
    {
      "epoch": 4.954275577201286,
      "grad_norm": 0.26678040623664856,
      "learning_rate": 9.205142228342878e-08,
      "loss": 0.0084,
      "step": 3027320
    },
    {
      "epoch": 4.9543083076399395,
      "grad_norm": 0.0743870660662651,
      "learning_rate": 9.198553006991165e-08,
      "loss": 0.0103,
      "step": 3027340
    },
    {
      "epoch": 4.954341038078592,
      "grad_norm": 0.2463361620903015,
      "learning_rate": 9.191963785639451e-08,
      "loss": 0.0082,
      "step": 3027360
    },
    {
      "epoch": 4.954373768517246,
      "grad_norm": 0.07325637340545654,
      "learning_rate": 9.185374564287738e-08,
      "loss": 0.0083,
      "step": 3027380
    },
    {
      "epoch": 4.954406498955899,
      "grad_norm": 0.4375081956386566,
      "learning_rate": 9.178785342936026e-08,
      "loss": 0.0115,
      "step": 3027400
    },
    {
      "epoch": 4.954439229394552,
      "grad_norm": 0.38202857971191406,
      "learning_rate": 9.172196121584313e-08,
      "loss": 0.01,
      "step": 3027420
    },
    {
      "epoch": 4.954471959833206,
      "grad_norm": 0.4986676871776581,
      "learning_rate": 9.1656069002326e-08,
      "loss": 0.013,
      "step": 3027440
    },
    {
      "epoch": 4.954504690271859,
      "grad_norm": 0.11384076625108719,
      "learning_rate": 9.159017678880889e-08,
      "loss": 0.0056,
      "step": 3027460
    },
    {
      "epoch": 4.954537420710512,
      "grad_norm": 0.06963701546192169,
      "learning_rate": 9.152428457529175e-08,
      "loss": 0.0087,
      "step": 3027480
    },
    {
      "epoch": 4.9545701511491655,
      "grad_norm": 0.18354688584804535,
      "learning_rate": 9.145839236177462e-08,
      "loss": 0.0143,
      "step": 3027500
    },
    {
      "epoch": 4.954602881587819,
      "grad_norm": 0.12117356806993484,
      "learning_rate": 9.139250014825749e-08,
      "loss": 0.0104,
      "step": 3027520
    },
    {
      "epoch": 4.954635612026473,
      "grad_norm": 0.5746939182281494,
      "learning_rate": 9.132660793474036e-08,
      "loss": 0.0077,
      "step": 3027540
    },
    {
      "epoch": 4.954668342465125,
      "grad_norm": 0.4436991810798645,
      "learning_rate": 9.126071572122323e-08,
      "loss": 0.0096,
      "step": 3027560
    },
    {
      "epoch": 4.954701072903779,
      "grad_norm": 0.6727918386459351,
      "learning_rate": 9.119482350770609e-08,
      "loss": 0.0102,
      "step": 3027580
    },
    {
      "epoch": 4.954733803342433,
      "grad_norm": 0.4183325171470642,
      "learning_rate": 9.112893129418896e-08,
      "loss": 0.0071,
      "step": 3027600
    },
    {
      "epoch": 4.954766533781086,
      "grad_norm": 0.16487284004688263,
      "learning_rate": 9.106303908067184e-08,
      "loss": 0.0083,
      "step": 3027620
    },
    {
      "epoch": 4.954799264219739,
      "grad_norm": 0.1944827139377594,
      "learning_rate": 9.099714686715472e-08,
      "loss": 0.0079,
      "step": 3027640
    },
    {
      "epoch": 4.9548319946583925,
      "grad_norm": 0.14931628108024597,
      "learning_rate": 9.09312546536376e-08,
      "loss": 0.0097,
      "step": 3027660
    },
    {
      "epoch": 4.954864725097046,
      "grad_norm": 0.09392768889665604,
      "learning_rate": 9.086536244012047e-08,
      "loss": 0.009,
      "step": 3027680
    },
    {
      "epoch": 4.954897455535699,
      "grad_norm": 0.14382457733154297,
      "learning_rate": 9.079947022660333e-08,
      "loss": 0.0075,
      "step": 3027700
    },
    {
      "epoch": 4.954930185974352,
      "grad_norm": 0.11297143995761871,
      "learning_rate": 9.07335780130862e-08,
      "loss": 0.0097,
      "step": 3027720
    },
    {
      "epoch": 4.954962916413006,
      "grad_norm": 0.6772368550300598,
      "learning_rate": 9.066768579956907e-08,
      "loss": 0.0105,
      "step": 3027740
    },
    {
      "epoch": 4.954995646851659,
      "grad_norm": 0.31792235374450684,
      "learning_rate": 9.060179358605194e-08,
      "loss": 0.0111,
      "step": 3027760
    },
    {
      "epoch": 4.955028377290312,
      "grad_norm": 0.16962456703186035,
      "learning_rate": 9.053590137253483e-08,
      "loss": 0.0077,
      "step": 3027780
    },
    {
      "epoch": 4.955061107728966,
      "grad_norm": 0.10894522070884705,
      "learning_rate": 9.047000915901767e-08,
      "loss": 0.0112,
      "step": 3027800
    },
    {
      "epoch": 4.9550938381676195,
      "grad_norm": 0.19749115407466888,
      "learning_rate": 9.040411694550056e-08,
      "loss": 0.0089,
      "step": 3027820
    },
    {
      "epoch": 4.955126568606272,
      "grad_norm": 0.8136406540870667,
      "learning_rate": 9.033822473198343e-08,
      "loss": 0.0113,
      "step": 3027840
    },
    {
      "epoch": 4.955159299044926,
      "grad_norm": 0.11672570556402206,
      "learning_rate": 9.02723325184663e-08,
      "loss": 0.0061,
      "step": 3027860
    },
    {
      "epoch": 4.955192029483579,
      "grad_norm": 0.1693124771118164,
      "learning_rate": 9.020644030494917e-08,
      "loss": 0.0141,
      "step": 3027880
    },
    {
      "epoch": 4.955224759922232,
      "grad_norm": 0.23092295229434967,
      "learning_rate": 9.014054809143205e-08,
      "loss": 0.0157,
      "step": 3027900
    },
    {
      "epoch": 4.955257490360886,
      "grad_norm": 0.08870284259319305,
      "learning_rate": 9.00746558779149e-08,
      "loss": 0.0091,
      "step": 3027920
    },
    {
      "epoch": 4.955290220799539,
      "grad_norm": Infinity,
      "learning_rate": 9.000876366439778e-08,
      "loss": 0.0082,
      "step": 3027940
    },
    {
      "epoch": 4.955322951238193,
      "grad_norm": 0.18820591270923615,
      "learning_rate": 8.994287145088066e-08,
      "loss": 0.0097,
      "step": 3027960
    },
    {
      "epoch": 4.955355681676846,
      "grad_norm": 0.4054929316043854,
      "learning_rate": 8.987697923736353e-08,
      "loss": 0.0121,
      "step": 3027980
    },
    {
      "epoch": 4.955388412115499,
      "grad_norm": 0.261733740568161,
      "learning_rate": 8.981108702384641e-08,
      "loss": 0.0111,
      "step": 3028000
    },
    {
      "epoch": 4.955421142554153,
      "grad_norm": 0.0836111530661583,
      "learning_rate": 8.974519481032927e-08,
      "loss": 0.006,
      "step": 3028020
    },
    {
      "epoch": 4.9554538729928055,
      "grad_norm": 0.5143616199493408,
      "learning_rate": 8.967930259681214e-08,
      "loss": 0.0068,
      "step": 3028040
    },
    {
      "epoch": 4.955486603431459,
      "grad_norm": 0.21885134279727936,
      "learning_rate": 8.961341038329501e-08,
      "loss": 0.0122,
      "step": 3028060
    },
    {
      "epoch": 4.955519333870113,
      "grad_norm": 0.20927290618419647,
      "learning_rate": 8.954751816977788e-08,
      "loss": 0.0097,
      "step": 3028080
    },
    {
      "epoch": 4.955552064308765,
      "grad_norm": 0.06461480259895325,
      "learning_rate": 8.948162595626077e-08,
      "loss": 0.0071,
      "step": 3028100
    },
    {
      "epoch": 4.955584794747419,
      "grad_norm": 0.26104477047920227,
      "learning_rate": 8.941573374274364e-08,
      "loss": 0.0063,
      "step": 3028120
    },
    {
      "epoch": 4.955617525186073,
      "grad_norm": 0.10074503719806671,
      "learning_rate": 8.93498415292265e-08,
      "loss": 0.0075,
      "step": 3028140
    },
    {
      "epoch": 4.955650255624726,
      "grad_norm": 0.10916604101657867,
      "learning_rate": 8.928394931570937e-08,
      "loss": 0.0083,
      "step": 3028160
    },
    {
      "epoch": 4.955682986063379,
      "grad_norm": 0.05364402011036873,
      "learning_rate": 8.921805710219224e-08,
      "loss": 0.0102,
      "step": 3028180
    },
    {
      "epoch": 4.9557157165020325,
      "grad_norm": 0.26550358533859253,
      "learning_rate": 8.915216488867511e-08,
      "loss": 0.0063,
      "step": 3028200
    },
    {
      "epoch": 4.955748446940686,
      "grad_norm": 0.485630601644516,
      "learning_rate": 8.908627267515799e-08,
      "loss": 0.0129,
      "step": 3028220
    },
    {
      "epoch": 4.95578117737934,
      "grad_norm": 0.43220996856689453,
      "learning_rate": 8.902038046164085e-08,
      "loss": 0.0112,
      "step": 3028240
    },
    {
      "epoch": 4.955813907817992,
      "grad_norm": 0.2514667510986328,
      "learning_rate": 8.895448824812372e-08,
      "loss": 0.0156,
      "step": 3028260
    },
    {
      "epoch": 4.955846638256646,
      "grad_norm": 0.25863540172576904,
      "learning_rate": 8.888859603460659e-08,
      "loss": 0.0079,
      "step": 3028280
    },
    {
      "epoch": 4.9558793686952995,
      "grad_norm": 0.08743809908628464,
      "learning_rate": 8.882270382108947e-08,
      "loss": 0.0103,
      "step": 3028300
    },
    {
      "epoch": 4.955912099133952,
      "grad_norm": 0.04366679489612579,
      "learning_rate": 8.875681160757235e-08,
      "loss": 0.0101,
      "step": 3028320
    },
    {
      "epoch": 4.955944829572606,
      "grad_norm": 0.1858128011226654,
      "learning_rate": 8.869091939405522e-08,
      "loss": 0.0112,
      "step": 3028340
    },
    {
      "epoch": 4.955977560011259,
      "grad_norm": 0.17292222380638123,
      "learning_rate": 8.862502718053808e-08,
      "loss": 0.0112,
      "step": 3028360
    },
    {
      "epoch": 4.956010290449912,
      "grad_norm": 0.1830710619688034,
      "learning_rate": 8.855913496702095e-08,
      "loss": 0.0076,
      "step": 3028380
    },
    {
      "epoch": 4.956043020888566,
      "grad_norm": 0.3247552514076233,
      "learning_rate": 8.849324275350382e-08,
      "loss": 0.0063,
      "step": 3028400
    },
    {
      "epoch": 4.956075751327219,
      "grad_norm": 0.10513941943645477,
      "learning_rate": 8.84273505399867e-08,
      "loss": 0.0092,
      "step": 3028420
    },
    {
      "epoch": 4.956108481765873,
      "grad_norm": 0.34173065423965454,
      "learning_rate": 8.836145832646958e-08,
      "loss": 0.0108,
      "step": 3028440
    },
    {
      "epoch": 4.956141212204526,
      "grad_norm": 0.3090701997280121,
      "learning_rate": 8.829556611295243e-08,
      "loss": 0.0063,
      "step": 3028460
    },
    {
      "epoch": 4.956173942643179,
      "grad_norm": 0.16662520170211792,
      "learning_rate": 8.822967389943531e-08,
      "loss": 0.008,
      "step": 3028480
    },
    {
      "epoch": 4.956206673081833,
      "grad_norm": 0.5397864580154419,
      "learning_rate": 8.816378168591818e-08,
      "loss": 0.0085,
      "step": 3028500
    },
    {
      "epoch": 4.956239403520486,
      "grad_norm": 0.2879144847393036,
      "learning_rate": 8.809788947240105e-08,
      "loss": 0.0107,
      "step": 3028520
    },
    {
      "epoch": 4.956272133959139,
      "grad_norm": 0.1237836629152298,
      "learning_rate": 8.803199725888393e-08,
      "loss": 0.0056,
      "step": 3028540
    },
    {
      "epoch": 4.956304864397793,
      "grad_norm": 0.43097174167633057,
      "learning_rate": 8.79661050453668e-08,
      "loss": 0.0108,
      "step": 3028560
    },
    {
      "epoch": 4.956337594836446,
      "grad_norm": 0.2998628616333008,
      "learning_rate": 8.790021283184966e-08,
      "loss": 0.0089,
      "step": 3028580
    },
    {
      "epoch": 4.956370325275099,
      "grad_norm": 0.39694640040397644,
      "learning_rate": 8.783432061833253e-08,
      "loss": 0.01,
      "step": 3028600
    },
    {
      "epoch": 4.956403055713753,
      "grad_norm": 0.047373656183481216,
      "learning_rate": 8.776842840481542e-08,
      "loss": 0.0103,
      "step": 3028620
    },
    {
      "epoch": 4.956435786152406,
      "grad_norm": 0.24802753329277039,
      "learning_rate": 8.770253619129829e-08,
      "loss": 0.0096,
      "step": 3028640
    },
    {
      "epoch": 4.956468516591059,
      "grad_norm": 0.1668720245361328,
      "learning_rate": 8.763664397778116e-08,
      "loss": 0.0092,
      "step": 3028660
    },
    {
      "epoch": 4.9565012470297125,
      "grad_norm": 0.2851370871067047,
      "learning_rate": 8.757075176426402e-08,
      "loss": 0.0102,
      "step": 3028680
    },
    {
      "epoch": 4.956533977468366,
      "grad_norm": 0.2048337161540985,
      "learning_rate": 8.750485955074689e-08,
      "loss": 0.009,
      "step": 3028700
    },
    {
      "epoch": 4.95656670790702,
      "grad_norm": 0.4336582124233246,
      "learning_rate": 8.743896733722976e-08,
      "loss": 0.0081,
      "step": 3028720
    },
    {
      "epoch": 4.956599438345672,
      "grad_norm": 0.3059075176715851,
      "learning_rate": 8.737307512371263e-08,
      "loss": 0.0067,
      "step": 3028740
    },
    {
      "epoch": 4.956632168784326,
      "grad_norm": 0.050517819821834564,
      "learning_rate": 8.730718291019552e-08,
      "loss": 0.0064,
      "step": 3028760
    },
    {
      "epoch": 4.95666489922298,
      "grad_norm": 0.17880694568157196,
      "learning_rate": 8.724129069667839e-08,
      "loss": 0.0077,
      "step": 3028780
    },
    {
      "epoch": 4.956697629661633,
      "grad_norm": 0.12790367007255554,
      "learning_rate": 8.717539848316125e-08,
      "loss": 0.0058,
      "step": 3028800
    },
    {
      "epoch": 4.956730360100286,
      "grad_norm": 0.19628293812274933,
      "learning_rate": 8.710950626964412e-08,
      "loss": 0.0071,
      "step": 3028820
    },
    {
      "epoch": 4.9567630905389395,
      "grad_norm": 0.26077163219451904,
      "learning_rate": 8.7043614056127e-08,
      "loss": 0.0099,
      "step": 3028840
    },
    {
      "epoch": 4.956795820977593,
      "grad_norm": 0.39062315225601196,
      "learning_rate": 8.697772184260987e-08,
      "loss": 0.0161,
      "step": 3028860
    },
    {
      "epoch": 4.956828551416246,
      "grad_norm": 0.4188731610774994,
      "learning_rate": 8.691182962909274e-08,
      "loss": 0.0097,
      "step": 3028880
    },
    {
      "epoch": 4.956861281854899,
      "grad_norm": 0.2895861566066742,
      "learning_rate": 8.68459374155756e-08,
      "loss": 0.0085,
      "step": 3028900
    },
    {
      "epoch": 4.956894012293553,
      "grad_norm": 0.07914607226848602,
      "learning_rate": 8.678004520205847e-08,
      "loss": 0.0047,
      "step": 3028920
    },
    {
      "epoch": 4.956926742732206,
      "grad_norm": 0.28292644023895264,
      "learning_rate": 8.671415298854136e-08,
      "loss": 0.0093,
      "step": 3028940
    },
    {
      "epoch": 4.956959473170859,
      "grad_norm": 0.12462959438562393,
      "learning_rate": 8.664826077502423e-08,
      "loss": 0.0086,
      "step": 3028960
    },
    {
      "epoch": 4.956992203609513,
      "grad_norm": 0.13820210099220276,
      "learning_rate": 8.65823685615071e-08,
      "loss": 0.0077,
      "step": 3028980
    },
    {
      "epoch": 4.9570249340481665,
      "grad_norm": 0.1177186369895935,
      "learning_rate": 8.651647634798997e-08,
      "loss": 0.0114,
      "step": 3029000
    },
    {
      "epoch": 4.957057664486819,
      "grad_norm": 0.24540890753269196,
      "learning_rate": 8.645058413447283e-08,
      "loss": 0.0165,
      "step": 3029020
    },
    {
      "epoch": 4.957090394925473,
      "grad_norm": 0.24622172117233276,
      "learning_rate": 8.63846919209557e-08,
      "loss": 0.0095,
      "step": 3029040
    },
    {
      "epoch": 4.957123125364126,
      "grad_norm": 0.15364386141300201,
      "learning_rate": 8.631879970743857e-08,
      "loss": 0.0108,
      "step": 3029060
    },
    {
      "epoch": 4.95715585580278,
      "grad_norm": 0.2946050763130188,
      "learning_rate": 8.625290749392145e-08,
      "loss": 0.0089,
      "step": 3029080
    },
    {
      "epoch": 4.957188586241433,
      "grad_norm": 0.5905908942222595,
      "learning_rate": 8.618701528040433e-08,
      "loss": 0.0107,
      "step": 3029100
    },
    {
      "epoch": 4.957221316680086,
      "grad_norm": 0.07364870607852936,
      "learning_rate": 8.61211230668872e-08,
      "loss": 0.0073,
      "step": 3029120
    },
    {
      "epoch": 4.95725404711874,
      "grad_norm": 0.060673829168081284,
      "learning_rate": 8.605523085337006e-08,
      "loss": 0.0082,
      "step": 3029140
    },
    {
      "epoch": 4.9572867775573926,
      "grad_norm": 0.09855704009532928,
      "learning_rate": 8.598933863985294e-08,
      "loss": 0.0071,
      "step": 3029160
    },
    {
      "epoch": 4.957319507996046,
      "grad_norm": 0.2440040409564972,
      "learning_rate": 8.592344642633581e-08,
      "loss": 0.006,
      "step": 3029180
    },
    {
      "epoch": 4.9573522384347,
      "grad_norm": 0.43613940477371216,
      "learning_rate": 8.585755421281868e-08,
      "loss": 0.0067,
      "step": 3029200
    },
    {
      "epoch": 4.9573849688733524,
      "grad_norm": 0.40040454268455505,
      "learning_rate": 8.579166199930155e-08,
      "loss": 0.008,
      "step": 3029220
    },
    {
      "epoch": 4.957417699312006,
      "grad_norm": 0.126700758934021,
      "learning_rate": 8.572576978578441e-08,
      "loss": 0.008,
      "step": 3029240
    },
    {
      "epoch": 4.95745042975066,
      "grad_norm": 0.19489185512065887,
      "learning_rate": 8.565987757226728e-08,
      "loss": 0.0118,
      "step": 3029260
    },
    {
      "epoch": 4.957483160189313,
      "grad_norm": 0.34918802976608276,
      "learning_rate": 8.559398535875017e-08,
      "loss": 0.0072,
      "step": 3029280
    },
    {
      "epoch": 4.957515890627966,
      "grad_norm": 0.14549104869365692,
      "learning_rate": 8.552809314523304e-08,
      "loss": 0.0054,
      "step": 3029300
    },
    {
      "epoch": 4.9575486210666195,
      "grad_norm": 0.21356399357318878,
      "learning_rate": 8.546220093171591e-08,
      "loss": 0.0078,
      "step": 3029320
    },
    {
      "epoch": 4.957581351505273,
      "grad_norm": 0.18525682389736176,
      "learning_rate": 8.539630871819878e-08,
      "loss": 0.0188,
      "step": 3029340
    },
    {
      "epoch": 4.957614081943926,
      "grad_norm": 0.12146325409412384,
      "learning_rate": 8.533041650468164e-08,
      "loss": 0.0088,
      "step": 3029360
    },
    {
      "epoch": 4.957646812382579,
      "grad_norm": 0.13568344712257385,
      "learning_rate": 8.526452429116452e-08,
      "loss": 0.0114,
      "step": 3029380
    },
    {
      "epoch": 4.957679542821233,
      "grad_norm": 0.1481996327638626,
      "learning_rate": 8.519863207764739e-08,
      "loss": 0.0064,
      "step": 3029400
    },
    {
      "epoch": 4.957712273259887,
      "grad_norm": 0.3074704110622406,
      "learning_rate": 8.513273986413027e-08,
      "loss": 0.0115,
      "step": 3029420
    },
    {
      "epoch": 4.957745003698539,
      "grad_norm": 0.3576644957065582,
      "learning_rate": 8.506684765061314e-08,
      "loss": 0.006,
      "step": 3029440
    },
    {
      "epoch": 4.957777734137193,
      "grad_norm": 0.27081477642059326,
      "learning_rate": 8.5000955437096e-08,
      "loss": 0.012,
      "step": 3029460
    },
    {
      "epoch": 4.9578104645758465,
      "grad_norm": 0.23929080367088318,
      "learning_rate": 8.493506322357888e-08,
      "loss": 0.0073,
      "step": 3029480
    },
    {
      "epoch": 4.957843195014499,
      "grad_norm": 0.15712596476078033,
      "learning_rate": 8.486917101006175e-08,
      "loss": 0.0096,
      "step": 3029500
    },
    {
      "epoch": 4.957875925453153,
      "grad_norm": 0.13412372767925262,
      "learning_rate": 8.480327879654462e-08,
      "loss": 0.0055,
      "step": 3029520
    },
    {
      "epoch": 4.957908655891806,
      "grad_norm": 0.18446685373783112,
      "learning_rate": 8.473738658302749e-08,
      "loss": 0.0093,
      "step": 3029540
    },
    {
      "epoch": 4.957941386330459,
      "grad_norm": 0.08827622979879379,
      "learning_rate": 8.467149436951036e-08,
      "loss": 0.0081,
      "step": 3029560
    },
    {
      "epoch": 4.957974116769113,
      "grad_norm": 0.2902250587940216,
      "learning_rate": 8.460560215599322e-08,
      "loss": 0.01,
      "step": 3029580
    },
    {
      "epoch": 4.958006847207766,
      "grad_norm": 0.07248656451702118,
      "learning_rate": 8.453970994247611e-08,
      "loss": 0.0086,
      "step": 3029600
    },
    {
      "epoch": 4.95803957764642,
      "grad_norm": 0.0721508264541626,
      "learning_rate": 8.447381772895898e-08,
      "loss": 0.0072,
      "step": 3029620
    },
    {
      "epoch": 4.958072308085073,
      "grad_norm": 0.17616356909275055,
      "learning_rate": 8.440792551544185e-08,
      "loss": 0.0063,
      "step": 3029640
    },
    {
      "epoch": 4.958105038523726,
      "grad_norm": 0.1602826714515686,
      "learning_rate": 8.434203330192472e-08,
      "loss": 0.0115,
      "step": 3029660
    },
    {
      "epoch": 4.95813776896238,
      "grad_norm": 0.4333759546279907,
      "learning_rate": 8.427614108840758e-08,
      "loss": 0.0123,
      "step": 3029680
    },
    {
      "epoch": 4.958170499401033,
      "grad_norm": 0.2590952515602112,
      "learning_rate": 8.421024887489046e-08,
      "loss": 0.0087,
      "step": 3029700
    },
    {
      "epoch": 4.958203229839686,
      "grad_norm": 0.31788206100463867,
      "learning_rate": 8.414435666137333e-08,
      "loss": 0.0086,
      "step": 3029720
    },
    {
      "epoch": 4.95823596027834,
      "grad_norm": 0.12107701599597931,
      "learning_rate": 8.40784644478562e-08,
      "loss": 0.0076,
      "step": 3029740
    },
    {
      "epoch": 4.958268690716993,
      "grad_norm": 0.046520549803972244,
      "learning_rate": 8.401257223433908e-08,
      "loss": 0.0047,
      "step": 3029760
    },
    {
      "epoch": 4.958301421155646,
      "grad_norm": 0.0682079866528511,
      "learning_rate": 8.394668002082196e-08,
      "loss": 0.0096,
      "step": 3029780
    },
    {
      "epoch": 4.9583341515943,
      "grad_norm": 0.06455274671316147,
      "learning_rate": 8.388078780730482e-08,
      "loss": 0.0116,
      "step": 3029800
    },
    {
      "epoch": 4.958366882032953,
      "grad_norm": 0.26632535457611084,
      "learning_rate": 8.381489559378769e-08,
      "loss": 0.0089,
      "step": 3029820
    },
    {
      "epoch": 4.958399612471606,
      "grad_norm": 0.2981663942337036,
      "learning_rate": 8.374900338027056e-08,
      "loss": 0.0087,
      "step": 3029840
    },
    {
      "epoch": 4.9584323429102595,
      "grad_norm": 0.18692854046821594,
      "learning_rate": 8.368311116675343e-08,
      "loss": 0.0074,
      "step": 3029860
    },
    {
      "epoch": 4.958465073348913,
      "grad_norm": 0.3102743625640869,
      "learning_rate": 8.36172189532363e-08,
      "loss": 0.0086,
      "step": 3029880
    },
    {
      "epoch": 4.958497803787567,
      "grad_norm": 0.09442421048879623,
      "learning_rate": 8.355132673971916e-08,
      "loss": 0.0092,
      "step": 3029900
    },
    {
      "epoch": 4.958530534226219,
      "grad_norm": 0.15310253202915192,
      "learning_rate": 8.348543452620204e-08,
      "loss": 0.0079,
      "step": 3029920
    },
    {
      "epoch": 4.958563264664873,
      "grad_norm": 0.1327432543039322,
      "learning_rate": 8.341954231268492e-08,
      "loss": 0.0089,
      "step": 3029940
    },
    {
      "epoch": 4.9585959951035266,
      "grad_norm": 0.4952487647533417,
      "learning_rate": 8.335365009916779e-08,
      "loss": 0.0066,
      "step": 3029960
    },
    {
      "epoch": 4.95862872554218,
      "grad_norm": 0.16361060738563538,
      "learning_rate": 8.328775788565066e-08,
      "loss": 0.0068,
      "step": 3029980
    },
    {
      "epoch": 4.958661455980833,
      "grad_norm": 0.15604615211486816,
      "learning_rate": 8.322186567213354e-08,
      "loss": 0.0057,
      "step": 3030000
    },
    {
      "epoch": 4.9586941864194864,
      "grad_norm": 0.26413312554359436,
      "learning_rate": 8.31559734586164e-08,
      "loss": 0.008,
      "step": 3030020
    },
    {
      "epoch": 4.95872691685814,
      "grad_norm": 0.0829077810049057,
      "learning_rate": 8.309008124509927e-08,
      "loss": 0.0116,
      "step": 3030040
    },
    {
      "epoch": 4.958759647296793,
      "grad_norm": 0.5095623135566711,
      "learning_rate": 8.302418903158214e-08,
      "loss": 0.0092,
      "step": 3030060
    },
    {
      "epoch": 4.958792377735446,
      "grad_norm": 0.15456622838974,
      "learning_rate": 8.295829681806503e-08,
      "loss": 0.006,
      "step": 3030080
    },
    {
      "epoch": 4.9588251081741,
      "grad_norm": 0.17919866740703583,
      "learning_rate": 8.28924046045479e-08,
      "loss": 0.0073,
      "step": 3030100
    },
    {
      "epoch": 4.958857838612753,
      "grad_norm": 0.1473049372434616,
      "learning_rate": 8.282651239103076e-08,
      "loss": 0.0079,
      "step": 3030120
    },
    {
      "epoch": 4.958890569051406,
      "grad_norm": 0.33086326718330383,
      "learning_rate": 8.276062017751363e-08,
      "loss": 0.0103,
      "step": 3030140
    },
    {
      "epoch": 4.95892329949006,
      "grad_norm": 0.017018714919686317,
      "learning_rate": 8.26947279639965e-08,
      "loss": 0.0111,
      "step": 3030160
    },
    {
      "epoch": 4.958956029928713,
      "grad_norm": 0.383232980966568,
      "learning_rate": 8.262883575047937e-08,
      "loss": 0.0112,
      "step": 3030180
    },
    {
      "epoch": 4.958988760367366,
      "grad_norm": 0.34508922696113586,
      "learning_rate": 8.256294353696224e-08,
      "loss": 0.0084,
      "step": 3030200
    },
    {
      "epoch": 4.95902149080602,
      "grad_norm": 0.2535078823566437,
      "learning_rate": 8.249705132344512e-08,
      "loss": 0.0066,
      "step": 3030220
    },
    {
      "epoch": 4.959054221244673,
      "grad_norm": 0.08895131200551987,
      "learning_rate": 8.243115910992798e-08,
      "loss": 0.0093,
      "step": 3030240
    },
    {
      "epoch": 4.959086951683327,
      "grad_norm": 0.2849123179912567,
      "learning_rate": 8.236526689641086e-08,
      "loss": 0.0103,
      "step": 3030260
    },
    {
      "epoch": 4.95911968212198,
      "grad_norm": 0.18978086113929749,
      "learning_rate": 8.229937468289373e-08,
      "loss": 0.008,
      "step": 3030280
    },
    {
      "epoch": 4.959152412560633,
      "grad_norm": 0.04818093776702881,
      "learning_rate": 8.22334824693766e-08,
      "loss": 0.0055,
      "step": 3030300
    },
    {
      "epoch": 4.959185142999287,
      "grad_norm": 0.2653253674507141,
      "learning_rate": 8.216759025585948e-08,
      "loss": 0.0097,
      "step": 3030320
    },
    {
      "epoch": 4.9592178734379395,
      "grad_norm": 0.24564525485038757,
      "learning_rate": 8.210169804234234e-08,
      "loss": 0.0108,
      "step": 3030340
    },
    {
      "epoch": 4.959250603876593,
      "grad_norm": 0.05727481096982956,
      "learning_rate": 8.203580582882521e-08,
      "loss": 0.0068,
      "step": 3030360
    },
    {
      "epoch": 4.959283334315247,
      "grad_norm": 0.3499942123889923,
      "learning_rate": 8.196991361530808e-08,
      "loss": 0.0086,
      "step": 3030380
    },
    {
      "epoch": 4.959316064753899,
      "grad_norm": 0.7536646723747253,
      "learning_rate": 8.190402140179095e-08,
      "loss": 0.0125,
      "step": 3030400
    },
    {
      "epoch": 4.959348795192553,
      "grad_norm": 0.3992544710636139,
      "learning_rate": 8.183812918827384e-08,
      "loss": 0.0071,
      "step": 3030420
    },
    {
      "epoch": 4.959381525631207,
      "grad_norm": 0.15236473083496094,
      "learning_rate": 8.177223697475671e-08,
      "loss": 0.0059,
      "step": 3030440
    },
    {
      "epoch": 4.95941425606986,
      "grad_norm": 0.14574269950389862,
      "learning_rate": 8.170634476123957e-08,
      "loss": 0.0062,
      "step": 3030460
    },
    {
      "epoch": 4.959446986508513,
      "grad_norm": 0.12267977744340897,
      "learning_rate": 8.164045254772244e-08,
      "loss": 0.0118,
      "step": 3030480
    },
    {
      "epoch": 4.9594797169471665,
      "grad_norm": 0.2884412109851837,
      "learning_rate": 8.157456033420531e-08,
      "loss": 0.0055,
      "step": 3030500
    },
    {
      "epoch": 4.95951244738582,
      "grad_norm": 0.0948605164885521,
      "learning_rate": 8.150866812068818e-08,
      "loss": 0.008,
      "step": 3030520
    },
    {
      "epoch": 4.959545177824474,
      "grad_norm": 0.22693778574466705,
      "learning_rate": 8.144277590717106e-08,
      "loss": 0.0082,
      "step": 3030540
    },
    {
      "epoch": 4.959577908263126,
      "grad_norm": 0.21300646662712097,
      "learning_rate": 8.137688369365392e-08,
      "loss": 0.0096,
      "step": 3030560
    },
    {
      "epoch": 4.95961063870178,
      "grad_norm": 0.168130025267601,
      "learning_rate": 8.131099148013679e-08,
      "loss": 0.0095,
      "step": 3030580
    },
    {
      "epoch": 4.959643369140434,
      "grad_norm": 0.15367305278778076,
      "learning_rate": 8.124509926661967e-08,
      "loss": 0.0087,
      "step": 3030600
    },
    {
      "epoch": 4.959676099579086,
      "grad_norm": 0.10970155149698257,
      "learning_rate": 8.117920705310255e-08,
      "loss": 0.0111,
      "step": 3030620
    },
    {
      "epoch": 4.95970883001774,
      "grad_norm": 0.19893667101860046,
      "learning_rate": 8.111331483958542e-08,
      "loss": 0.0059,
      "step": 3030640
    },
    {
      "epoch": 4.9597415604563935,
      "grad_norm": 0.1631266325712204,
      "learning_rate": 8.104742262606829e-08,
      "loss": 0.0087,
      "step": 3030660
    },
    {
      "epoch": 4.959774290895046,
      "grad_norm": 0.1622423678636551,
      "learning_rate": 8.098153041255115e-08,
      "loss": 0.0068,
      "step": 3030680
    },
    {
      "epoch": 4.9598070213337,
      "grad_norm": 0.1984805315732956,
      "learning_rate": 8.091563819903402e-08,
      "loss": 0.007,
      "step": 3030700
    },
    {
      "epoch": 4.959839751772353,
      "grad_norm": 0.14564292132854462,
      "learning_rate": 8.084974598551689e-08,
      "loss": 0.0066,
      "step": 3030720
    },
    {
      "epoch": 4.959872482211007,
      "grad_norm": 0.06045587733387947,
      "learning_rate": 8.078385377199978e-08,
      "loss": 0.0064,
      "step": 3030740
    },
    {
      "epoch": 4.95990521264966,
      "grad_norm": 0.18400366604328156,
      "learning_rate": 8.071796155848265e-08,
      "loss": 0.0086,
      "step": 3030760
    },
    {
      "epoch": 4.959937943088313,
      "grad_norm": 0.4550939202308655,
      "learning_rate": 8.065206934496551e-08,
      "loss": 0.0126,
      "step": 3030780
    },
    {
      "epoch": 4.959970673526967,
      "grad_norm": 0.34659960865974426,
      "learning_rate": 8.058617713144838e-08,
      "loss": 0.0072,
      "step": 3030800
    },
    {
      "epoch": 4.96000340396562,
      "grad_norm": 0.43683841824531555,
      "learning_rate": 8.052028491793125e-08,
      "loss": 0.009,
      "step": 3030820
    },
    {
      "epoch": 4.960036134404273,
      "grad_norm": 0.31314370036125183,
      "learning_rate": 8.045439270441413e-08,
      "loss": 0.0097,
      "step": 3030840
    },
    {
      "epoch": 4.960068864842927,
      "grad_norm": 0.10531086474657059,
      "learning_rate": 8.0388500490897e-08,
      "loss": 0.008,
      "step": 3030860
    },
    {
      "epoch": 4.96010159528158,
      "grad_norm": 0.15949583053588867,
      "learning_rate": 8.032260827737988e-08,
      "loss": 0.0061,
      "step": 3030880
    },
    {
      "epoch": 4.960134325720233,
      "grad_norm": 0.16999897360801697,
      "learning_rate": 8.025671606386273e-08,
      "loss": 0.0067,
      "step": 3030900
    },
    {
      "epoch": 4.960167056158887,
      "grad_norm": 0.20255790650844574,
      "learning_rate": 8.019082385034561e-08,
      "loss": 0.0071,
      "step": 3030920
    },
    {
      "epoch": 4.96019978659754,
      "grad_norm": 0.2866120934486389,
      "learning_rate": 8.012493163682849e-08,
      "loss": 0.007,
      "step": 3030940
    },
    {
      "epoch": 4.960232517036193,
      "grad_norm": 0.12586748600006104,
      "learning_rate": 8.005903942331136e-08,
      "loss": 0.0139,
      "step": 3030960
    },
    {
      "epoch": 4.9602652474748465,
      "grad_norm": 0.30336496233940125,
      "learning_rate": 7.999314720979423e-08,
      "loss": 0.0055,
      "step": 3030980
    },
    {
      "epoch": 4.9602979779135,
      "grad_norm": 0.1269441545009613,
      "learning_rate": 7.992725499627709e-08,
      "loss": 0.0084,
      "step": 3031000
    },
    {
      "epoch": 4.960330708352153,
      "grad_norm": 0.1846684366464615,
      "learning_rate": 7.986136278275996e-08,
      "loss": 0.0075,
      "step": 3031020
    },
    {
      "epoch": 4.960363438790806,
      "grad_norm": 0.5396947264671326,
      "learning_rate": 7.979547056924283e-08,
      "loss": 0.0109,
      "step": 3031040
    },
    {
      "epoch": 4.96039616922946,
      "grad_norm": 0.1581123024225235,
      "learning_rate": 7.97295783557257e-08,
      "loss": 0.0111,
      "step": 3031060
    },
    {
      "epoch": 4.960428899668114,
      "grad_norm": 0.05418509989976883,
      "learning_rate": 7.966368614220859e-08,
      "loss": 0.0082,
      "step": 3031080
    },
    {
      "epoch": 4.960461630106766,
      "grad_norm": 0.15110917389392853,
      "learning_rate": 7.959779392869146e-08,
      "loss": 0.0087,
      "step": 3031100
    },
    {
      "epoch": 4.96049436054542,
      "grad_norm": 0.41061514616012573,
      "learning_rate": 7.953190171517432e-08,
      "loss": 0.0088,
      "step": 3031120
    },
    {
      "epoch": 4.9605270909840735,
      "grad_norm": 0.4089813530445099,
      "learning_rate": 7.946600950165719e-08,
      "loss": 0.0083,
      "step": 3031140
    },
    {
      "epoch": 4.960559821422727,
      "grad_norm": 0.03676099702715874,
      "learning_rate": 7.940011728814007e-08,
      "loss": 0.005,
      "step": 3031160
    },
    {
      "epoch": 4.96059255186138,
      "grad_norm": 0.08821085840463638,
      "learning_rate": 7.933422507462294e-08,
      "loss": 0.0104,
      "step": 3031180
    },
    {
      "epoch": 4.960625282300033,
      "grad_norm": 0.2729184627532959,
      "learning_rate": 7.926833286110581e-08,
      "loss": 0.0075,
      "step": 3031200
    },
    {
      "epoch": 4.960658012738687,
      "grad_norm": 0.19796407222747803,
      "learning_rate": 7.92024406475887e-08,
      "loss": 0.0092,
      "step": 3031220
    },
    {
      "epoch": 4.96069074317734,
      "grad_norm": 0.10085959732532501,
      "learning_rate": 7.913654843407154e-08,
      "loss": 0.0078,
      "step": 3031240
    },
    {
      "epoch": 4.960723473615993,
      "grad_norm": 0.36958035826683044,
      "learning_rate": 7.907065622055443e-08,
      "loss": 0.0086,
      "step": 3031260
    },
    {
      "epoch": 4.960756204054647,
      "grad_norm": 0.3559732735157013,
      "learning_rate": 7.90047640070373e-08,
      "loss": 0.0113,
      "step": 3031280
    },
    {
      "epoch": 4.9607889344933,
      "grad_norm": 0.08180872350931168,
      "learning_rate": 7.893887179352017e-08,
      "loss": 0.0082,
      "step": 3031300
    },
    {
      "epoch": 4.960821664931953,
      "grad_norm": 0.14637190103530884,
      "learning_rate": 7.887297958000304e-08,
      "loss": 0.0094,
      "step": 3031320
    },
    {
      "epoch": 4.960854395370607,
      "grad_norm": 0.24351239204406738,
      "learning_rate": 7.88070873664859e-08,
      "loss": 0.0114,
      "step": 3031340
    },
    {
      "epoch": 4.96088712580926,
      "grad_norm": 0.578309953212738,
      "learning_rate": 7.874119515296877e-08,
      "loss": 0.0118,
      "step": 3031360
    },
    {
      "epoch": 4.960919856247913,
      "grad_norm": 0.28263434767723083,
      "learning_rate": 7.867530293945165e-08,
      "loss": 0.0077,
      "step": 3031380
    },
    {
      "epoch": 4.960952586686567,
      "grad_norm": 0.21504351496696472,
      "learning_rate": 7.860941072593453e-08,
      "loss": 0.0065,
      "step": 3031400
    },
    {
      "epoch": 4.96098531712522,
      "grad_norm": 0.2658558189868927,
      "learning_rate": 7.85435185124174e-08,
      "loss": 0.0081,
      "step": 3031420
    },
    {
      "epoch": 4.961018047563874,
      "grad_norm": 0.1058368980884552,
      "learning_rate": 7.847762629890027e-08,
      "loss": 0.0088,
      "step": 3031440
    },
    {
      "epoch": 4.961050778002527,
      "grad_norm": 0.2662588357925415,
      "learning_rate": 7.841173408538313e-08,
      "loss": 0.0107,
      "step": 3031460
    },
    {
      "epoch": 4.96108350844118,
      "grad_norm": 0.31600379943847656,
      "learning_rate": 7.8345841871866e-08,
      "loss": 0.0157,
      "step": 3031480
    },
    {
      "epoch": 4.961116238879834,
      "grad_norm": 0.6106686592102051,
      "learning_rate": 7.827994965834888e-08,
      "loss": 0.0091,
      "step": 3031500
    },
    {
      "epoch": 4.9611489693184865,
      "grad_norm": 0.1538247913122177,
      "learning_rate": 7.821405744483175e-08,
      "loss": 0.0071,
      "step": 3031520
    },
    {
      "epoch": 4.96118169975714,
      "grad_norm": 0.1075969785451889,
      "learning_rate": 7.814816523131464e-08,
      "loss": 0.0091,
      "step": 3031540
    },
    {
      "epoch": 4.961214430195794,
      "grad_norm": 0.16414687037467957,
      "learning_rate": 7.80822730177975e-08,
      "loss": 0.0064,
      "step": 3031560
    },
    {
      "epoch": 4.961247160634446,
      "grad_norm": 0.3625313341617584,
      "learning_rate": 7.801638080428037e-08,
      "loss": 0.0106,
      "step": 3031580
    },
    {
      "epoch": 4.9612798910731,
      "grad_norm": 0.6244266033172607,
      "learning_rate": 7.795048859076324e-08,
      "loss": 0.0081,
      "step": 3031600
    },
    {
      "epoch": 4.961312621511754,
      "grad_norm": 0.24228627979755402,
      "learning_rate": 7.788459637724611e-08,
      "loss": 0.0109,
      "step": 3031620
    },
    {
      "epoch": 4.961345351950407,
      "grad_norm": 0.27875766158103943,
      "learning_rate": 7.781870416372897e-08,
      "loss": 0.0112,
      "step": 3031640
    },
    {
      "epoch": 4.96137808238906,
      "grad_norm": 0.17571237683296204,
      "learning_rate": 7.775281195021185e-08,
      "loss": 0.0065,
      "step": 3031660
    },
    {
      "epoch": 4.9614108128277135,
      "grad_norm": 0.6582340598106384,
      "learning_rate": 7.768691973669473e-08,
      "loss": 0.009,
      "step": 3031680
    },
    {
      "epoch": 4.961443543266367,
      "grad_norm": 0.2425205409526825,
      "learning_rate": 7.762102752317759e-08,
      "loss": 0.0053,
      "step": 3031700
    },
    {
      "epoch": 4.961476273705021,
      "grad_norm": 0.25462573766708374,
      "learning_rate": 7.755513530966046e-08,
      "loss": 0.0049,
      "step": 3031720
    },
    {
      "epoch": 4.961509004143673,
      "grad_norm": 0.3116094470024109,
      "learning_rate": 7.748924309614334e-08,
      "loss": 0.0104,
      "step": 3031740
    },
    {
      "epoch": 4.961541734582327,
      "grad_norm": 0.32003816962242126,
      "learning_rate": 7.74233508826262e-08,
      "loss": 0.0134,
      "step": 3031760
    },
    {
      "epoch": 4.9615744650209805,
      "grad_norm": 0.4167097210884094,
      "learning_rate": 7.735745866910907e-08,
      "loss": 0.0071,
      "step": 3031780
    },
    {
      "epoch": 4.961607195459633,
      "grad_norm": 0.29396966099739075,
      "learning_rate": 7.729156645559196e-08,
      "loss": 0.0091,
      "step": 3031800
    },
    {
      "epoch": 4.961639925898287,
      "grad_norm": 0.1775890290737152,
      "learning_rate": 7.722567424207482e-08,
      "loss": 0.0135,
      "step": 3031820
    },
    {
      "epoch": 4.96167265633694,
      "grad_norm": 0.30112484097480774,
      "learning_rate": 7.715978202855769e-08,
      "loss": 0.0089,
      "step": 3031840
    },
    {
      "epoch": 4.961705386775593,
      "grad_norm": 0.3578796684741974,
      "learning_rate": 7.709388981504056e-08,
      "loss": 0.0134,
      "step": 3031860
    },
    {
      "epoch": 4.961738117214247,
      "grad_norm": 0.05718670412898064,
      "learning_rate": 7.702799760152343e-08,
      "loss": 0.0082,
      "step": 3031880
    },
    {
      "epoch": 4.9617708476529,
      "grad_norm": 0.2455190122127533,
      "learning_rate": 7.69621053880063e-08,
      "loss": 0.0086,
      "step": 3031900
    },
    {
      "epoch": 4.961803578091554,
      "grad_norm": 0.30175966024398804,
      "learning_rate": 7.689621317448918e-08,
      "loss": 0.0079,
      "step": 3031920
    },
    {
      "epoch": 4.961836308530207,
      "grad_norm": 0.4059460759162903,
      "learning_rate": 7.683032096097205e-08,
      "loss": 0.0047,
      "step": 3031940
    },
    {
      "epoch": 4.96186903896886,
      "grad_norm": 0.2604277729988098,
      "learning_rate": 7.676442874745492e-08,
      "loss": 0.0082,
      "step": 3031960
    },
    {
      "epoch": 4.961901769407514,
      "grad_norm": 0.3473999500274658,
      "learning_rate": 7.66985365339378e-08,
      "loss": 0.0089,
      "step": 3031980
    },
    {
      "epoch": 4.961934499846167,
      "grad_norm": 0.06819787621498108,
      "learning_rate": 7.663264432042067e-08,
      "loss": 0.0083,
      "step": 3032000
    },
    {
      "epoch": 4.96196723028482,
      "grad_norm": 0.29840585589408875,
      "learning_rate": 7.656675210690354e-08,
      "loss": 0.0077,
      "step": 3032020
    },
    {
      "epoch": 4.961999960723474,
      "grad_norm": 0.3631645739078522,
      "learning_rate": 7.65008598933864e-08,
      "loss": 0.0095,
      "step": 3032040
    },
    {
      "epoch": 4.962032691162127,
      "grad_norm": 0.132854163646698,
      "learning_rate": 7.643496767986928e-08,
      "loss": 0.0097,
      "step": 3032060
    },
    {
      "epoch": 4.96206542160078,
      "grad_norm": 0.13730554282665253,
      "learning_rate": 7.636907546635214e-08,
      "loss": 0.0101,
      "step": 3032080
    },
    {
      "epoch": 4.962098152039434,
      "grad_norm": 0.2301538735628128,
      "learning_rate": 7.630318325283501e-08,
      "loss": 0.0099,
      "step": 3032100
    },
    {
      "epoch": 4.962130882478087,
      "grad_norm": 0.08184773474931717,
      "learning_rate": 7.623729103931789e-08,
      "loss": 0.0091,
      "step": 3032120
    },
    {
      "epoch": 4.96216361291674,
      "grad_norm": 0.1567377895116806,
      "learning_rate": 7.617139882580076e-08,
      "loss": 0.0082,
      "step": 3032140
    },
    {
      "epoch": 4.9621963433553935,
      "grad_norm": 0.08065436035394669,
      "learning_rate": 7.610550661228363e-08,
      "loss": 0.007,
      "step": 3032160
    },
    {
      "epoch": 4.962229073794047,
      "grad_norm": 0.0417904295027256,
      "learning_rate": 7.60396143987665e-08,
      "loss": 0.0096,
      "step": 3032180
    },
    {
      "epoch": 4.962261804232701,
      "grad_norm": 0.132042795419693,
      "learning_rate": 7.597372218524937e-08,
      "loss": 0.0084,
      "step": 3032200
    },
    {
      "epoch": 4.962294534671353,
      "grad_norm": 0.29225561022758484,
      "learning_rate": 7.590782997173225e-08,
      "loss": 0.0127,
      "step": 3032220
    },
    {
      "epoch": 4.962327265110007,
      "grad_norm": 0.1960645318031311,
      "learning_rate": 7.584193775821512e-08,
      "loss": 0.0056,
      "step": 3032240
    },
    {
      "epoch": 4.962359995548661,
      "grad_norm": 0.38871508836746216,
      "learning_rate": 7.577604554469799e-08,
      "loss": 0.0113,
      "step": 3032260
    },
    {
      "epoch": 4.962392725987314,
      "grad_norm": 0.2947736978530884,
      "learning_rate": 7.571015333118086e-08,
      "loss": 0.0066,
      "step": 3032280
    },
    {
      "epoch": 4.962425456425967,
      "grad_norm": 0.1380302608013153,
      "learning_rate": 7.564426111766372e-08,
      "loss": 0.0057,
      "step": 3032300
    },
    {
      "epoch": 4.9624581868646205,
      "grad_norm": 0.7014492154121399,
      "learning_rate": 7.557836890414661e-08,
      "loss": 0.0126,
      "step": 3032320
    },
    {
      "epoch": 4.962490917303274,
      "grad_norm": 0.10249501466751099,
      "learning_rate": 7.551247669062948e-08,
      "loss": 0.0125,
      "step": 3032340
    },
    {
      "epoch": 4.962523647741927,
      "grad_norm": 0.021258778870105743,
      "learning_rate": 7.544658447711234e-08,
      "loss": 0.0073,
      "step": 3032360
    },
    {
      "epoch": 4.96255637818058,
      "grad_norm": 0.17879636585712433,
      "learning_rate": 7.538069226359522e-08,
      "loss": 0.0071,
      "step": 3032380
    },
    {
      "epoch": 4.962589108619234,
      "grad_norm": 0.44800862669944763,
      "learning_rate": 7.53148000500781e-08,
      "loss": 0.007,
      "step": 3032400
    },
    {
      "epoch": 4.962621839057887,
      "grad_norm": 0.6644524335861206,
      "learning_rate": 7.524890783656095e-08,
      "loss": 0.0089,
      "step": 3032420
    },
    {
      "epoch": 4.96265456949654,
      "grad_norm": 0.11579503864049911,
      "learning_rate": 7.518301562304383e-08,
      "loss": 0.0069,
      "step": 3032440
    },
    {
      "epoch": 4.962687299935194,
      "grad_norm": 0.18207304179668427,
      "learning_rate": 7.511712340952671e-08,
      "loss": 0.0115,
      "step": 3032460
    },
    {
      "epoch": 4.9627200303738475,
      "grad_norm": 0.20104549825191498,
      "learning_rate": 7.505123119600957e-08,
      "loss": 0.0096,
      "step": 3032480
    },
    {
      "epoch": 4.9627527608125,
      "grad_norm": 0.3102944493293762,
      "learning_rate": 7.498533898249244e-08,
      "loss": 0.005,
      "step": 3032500
    },
    {
      "epoch": 4.962785491251154,
      "grad_norm": 0.21595311164855957,
      "learning_rate": 7.491944676897531e-08,
      "loss": 0.0095,
      "step": 3032520
    },
    {
      "epoch": 4.962818221689807,
      "grad_norm": 0.2062329649925232,
      "learning_rate": 7.485355455545819e-08,
      "loss": 0.0119,
      "step": 3032540
    },
    {
      "epoch": 4.96285095212846,
      "grad_norm": 0.30775731801986694,
      "learning_rate": 7.478766234194106e-08,
      "loss": 0.0047,
      "step": 3032560
    },
    {
      "epoch": 4.962883682567114,
      "grad_norm": 0.11456836014986038,
      "learning_rate": 7.472177012842393e-08,
      "loss": 0.0071,
      "step": 3032580
    },
    {
      "epoch": 4.962916413005767,
      "grad_norm": 0.1257649064064026,
      "learning_rate": 7.46558779149068e-08,
      "loss": 0.0059,
      "step": 3032600
    },
    {
      "epoch": 4.962949143444421,
      "grad_norm": 0.09084438532590866,
      "learning_rate": 7.458998570138968e-08,
      "loss": 0.0108,
      "step": 3032620
    },
    {
      "epoch": 4.9629818738830735,
      "grad_norm": 0.15320312976837158,
      "learning_rate": 7.452409348787255e-08,
      "loss": 0.0065,
      "step": 3032640
    },
    {
      "epoch": 4.963014604321727,
      "grad_norm": 0.18585659563541412,
      "learning_rate": 7.445820127435542e-08,
      "loss": 0.0114,
      "step": 3032660
    },
    {
      "epoch": 4.963047334760381,
      "grad_norm": 0.5892913341522217,
      "learning_rate": 7.439230906083829e-08,
      "loss": 0.008,
      "step": 3032680
    },
    {
      "epoch": 4.963080065199033,
      "grad_norm": 0.3468201458454132,
      "learning_rate": 7.432641684732115e-08,
      "loss": 0.0097,
      "step": 3032700
    },
    {
      "epoch": 4.963112795637687,
      "grad_norm": 0.1418018639087677,
      "learning_rate": 7.426052463380404e-08,
      "loss": 0.0077,
      "step": 3032720
    },
    {
      "epoch": 4.963145526076341,
      "grad_norm": 0.23711219429969788,
      "learning_rate": 7.419463242028691e-08,
      "loss": 0.0149,
      "step": 3032740
    },
    {
      "epoch": 4.963178256514993,
      "grad_norm": 0.10056106746196747,
      "learning_rate": 7.412874020676977e-08,
      "loss": 0.0085,
      "step": 3032760
    },
    {
      "epoch": 4.963210986953647,
      "grad_norm": 0.16396386921405792,
      "learning_rate": 7.406284799325264e-08,
      "loss": 0.0104,
      "step": 3032780
    },
    {
      "epoch": 4.9632437173923005,
      "grad_norm": 0.3775128424167633,
      "learning_rate": 7.399695577973551e-08,
      "loss": 0.0073,
      "step": 3032800
    },
    {
      "epoch": 4.963276447830954,
      "grad_norm": 0.40056902170181274,
      "learning_rate": 7.393106356621838e-08,
      "loss": 0.0115,
      "step": 3032820
    },
    {
      "epoch": 4.963309178269607,
      "grad_norm": 0.3342050015926361,
      "learning_rate": 7.386517135270126e-08,
      "loss": 0.0074,
      "step": 3032840
    },
    {
      "epoch": 4.96334190870826,
      "grad_norm": 0.14264927804470062,
      "learning_rate": 7.379927913918413e-08,
      "loss": 0.0064,
      "step": 3032860
    },
    {
      "epoch": 4.963374639146914,
      "grad_norm": 0.29073700308799744,
      "learning_rate": 7.3733386925667e-08,
      "loss": 0.0103,
      "step": 3032880
    },
    {
      "epoch": 4.963407369585568,
      "grad_norm": 0.1294071525335312,
      "learning_rate": 7.366749471214987e-08,
      "loss": 0.0074,
      "step": 3032900
    },
    {
      "epoch": 4.96344010002422,
      "grad_norm": 0.41307806968688965,
      "learning_rate": 7.360160249863274e-08,
      "loss": 0.0098,
      "step": 3032920
    },
    {
      "epoch": 4.963472830462874,
      "grad_norm": 0.0617758184671402,
      "learning_rate": 7.353571028511562e-08,
      "loss": 0.0077,
      "step": 3032940
    },
    {
      "epoch": 4.9635055609015275,
      "grad_norm": 0.05287259817123413,
      "learning_rate": 7.346981807159849e-08,
      "loss": 0.0074,
      "step": 3032960
    },
    {
      "epoch": 4.96353829134018,
      "grad_norm": 0.1490681767463684,
      "learning_rate": 7.340392585808136e-08,
      "loss": 0.0099,
      "step": 3032980
    },
    {
      "epoch": 4.963571021778834,
      "grad_norm": 0.3359588086605072,
      "learning_rate": 7.333803364456423e-08,
      "loss": 0.0116,
      "step": 3033000
    },
    {
      "epoch": 4.963603752217487,
      "grad_norm": 0.2970808148384094,
      "learning_rate": 7.327214143104709e-08,
      "loss": 0.0111,
      "step": 3033020
    },
    {
      "epoch": 4.96363648265614,
      "grad_norm": 0.1743844449520111,
      "learning_rate": 7.320624921752998e-08,
      "loss": 0.0092,
      "step": 3033040
    },
    {
      "epoch": 4.963669213094794,
      "grad_norm": 0.04901425540447235,
      "learning_rate": 7.314035700401285e-08,
      "loss": 0.0074,
      "step": 3033060
    },
    {
      "epoch": 4.963701943533447,
      "grad_norm": 0.08115937560796738,
      "learning_rate": 7.307446479049571e-08,
      "loss": 0.0089,
      "step": 3033080
    },
    {
      "epoch": 4.963734673972101,
      "grad_norm": 0.040780097246170044,
      "learning_rate": 7.300857257697858e-08,
      "loss": 0.0058,
      "step": 3033100
    },
    {
      "epoch": 4.963767404410754,
      "grad_norm": 0.3841213583946228,
      "learning_rate": 7.294268036346146e-08,
      "loss": 0.0091,
      "step": 3033120
    },
    {
      "epoch": 4.963800134849407,
      "grad_norm": 0.17429423332214355,
      "learning_rate": 7.287678814994432e-08,
      "loss": 0.011,
      "step": 3033140
    },
    {
      "epoch": 4.963832865288061,
      "grad_norm": 0.43542325496673584,
      "learning_rate": 7.28108959364272e-08,
      "loss": 0.0074,
      "step": 3033160
    },
    {
      "epoch": 4.963865595726714,
      "grad_norm": 0.13135771453380585,
      "learning_rate": 7.274500372291007e-08,
      "loss": 0.0089,
      "step": 3033180
    },
    {
      "epoch": 4.963898326165367,
      "grad_norm": 0.0320725291967392,
      "learning_rate": 7.267911150939294e-08,
      "loss": 0.009,
      "step": 3033200
    },
    {
      "epoch": 4.963931056604021,
      "grad_norm": 0.3960350453853607,
      "learning_rate": 7.261321929587581e-08,
      "loss": 0.0093,
      "step": 3033220
    },
    {
      "epoch": 4.963963787042674,
      "grad_norm": 0.20881512761116028,
      "learning_rate": 7.254732708235868e-08,
      "loss": 0.0061,
      "step": 3033240
    },
    {
      "epoch": 4.963996517481327,
      "grad_norm": 0.10095315426588058,
      "learning_rate": 7.248143486884156e-08,
      "loss": 0.0078,
      "step": 3033260
    },
    {
      "epoch": 4.964029247919981,
      "grad_norm": 0.2098844051361084,
      "learning_rate": 7.241554265532443e-08,
      "loss": 0.0119,
      "step": 3033280
    },
    {
      "epoch": 4.964061978358634,
      "grad_norm": 0.309160441160202,
      "learning_rate": 7.23496504418073e-08,
      "loss": 0.0088,
      "step": 3033300
    },
    {
      "epoch": 4.964094708797287,
      "grad_norm": 0.24178661406040192,
      "learning_rate": 7.228375822829017e-08,
      "loss": 0.0102,
      "step": 3033320
    },
    {
      "epoch": 4.9641274392359405,
      "grad_norm": 0.18003836274147034,
      "learning_rate": 7.221786601477304e-08,
      "loss": 0.0049,
      "step": 3033340
    },
    {
      "epoch": 4.964160169674594,
      "grad_norm": 0.43960022926330566,
      "learning_rate": 7.21519738012559e-08,
      "loss": 0.0094,
      "step": 3033360
    },
    {
      "epoch": 4.964192900113248,
      "grad_norm": 0.3522465229034424,
      "learning_rate": 7.208608158773879e-08,
      "loss": 0.0089,
      "step": 3033380
    },
    {
      "epoch": 4.9642256305519,
      "grad_norm": 0.2968254089355469,
      "learning_rate": 7.202018937422166e-08,
      "loss": 0.007,
      "step": 3033400
    },
    {
      "epoch": 4.964258360990554,
      "grad_norm": 0.12571078538894653,
      "learning_rate": 7.195429716070452e-08,
      "loss": 0.0172,
      "step": 3033420
    },
    {
      "epoch": 4.9642910914292075,
      "grad_norm": 0.1266453117132187,
      "learning_rate": 7.18884049471874e-08,
      "loss": 0.0058,
      "step": 3033440
    },
    {
      "epoch": 4.964323821867861,
      "grad_norm": 0.31274545192718506,
      "learning_rate": 7.182251273367026e-08,
      "loss": 0.0077,
      "step": 3033460
    },
    {
      "epoch": 4.964356552306514,
      "grad_norm": 0.18891477584838867,
      "learning_rate": 7.175662052015314e-08,
      "loss": 0.0077,
      "step": 3033480
    },
    {
      "epoch": 4.964389282745167,
      "grad_norm": 0.1462581902742386,
      "learning_rate": 7.169072830663601e-08,
      "loss": 0.0082,
      "step": 3033500
    },
    {
      "epoch": 4.964422013183821,
      "grad_norm": 0.1810852736234665,
      "learning_rate": 7.162483609311888e-08,
      "loss": 0.0088,
      "step": 3033520
    },
    {
      "epoch": 4.964454743622474,
      "grad_norm": 0.45127585530281067,
      "learning_rate": 7.155894387960175e-08,
      "loss": 0.0097,
      "step": 3033540
    },
    {
      "epoch": 4.964487474061127,
      "grad_norm": 0.5171652436256409,
      "learning_rate": 7.149305166608462e-08,
      "loss": 0.0106,
      "step": 3033560
    },
    {
      "epoch": 4.964520204499781,
      "grad_norm": 0.20765608549118042,
      "learning_rate": 7.14271594525675e-08,
      "loss": 0.0103,
      "step": 3033580
    },
    {
      "epoch": 4.964552934938434,
      "grad_norm": 0.11505355685949326,
      "learning_rate": 7.136126723905037e-08,
      "loss": 0.0068,
      "step": 3033600
    },
    {
      "epoch": 4.964585665377087,
      "grad_norm": 0.5109423995018005,
      "learning_rate": 7.129537502553324e-08,
      "loss": 0.0099,
      "step": 3033620
    },
    {
      "epoch": 4.964618395815741,
      "grad_norm": 0.26826032996177673,
      "learning_rate": 7.122948281201611e-08,
      "loss": 0.0108,
      "step": 3033640
    },
    {
      "epoch": 4.964651126254394,
      "grad_norm": 0.17260228097438812,
      "learning_rate": 7.116359059849898e-08,
      "loss": 0.009,
      "step": 3033660
    },
    {
      "epoch": 4.964683856693047,
      "grad_norm": 0.14305479824543,
      "learning_rate": 7.109769838498184e-08,
      "loss": 0.0052,
      "step": 3033680
    },
    {
      "epoch": 4.964716587131701,
      "grad_norm": 0.40200212597846985,
      "learning_rate": 7.103180617146473e-08,
      "loss": 0.0083,
      "step": 3033700
    },
    {
      "epoch": 4.964749317570354,
      "grad_norm": 0.335121214389801,
      "learning_rate": 7.09659139579476e-08,
      "loss": 0.0101,
      "step": 3033720
    },
    {
      "epoch": 4.964782048009008,
      "grad_norm": 0.18922585248947144,
      "learning_rate": 7.090002174443046e-08,
      "loss": 0.0097,
      "step": 3033740
    },
    {
      "epoch": 4.964814778447661,
      "grad_norm": 0.20662245154380798,
      "learning_rate": 7.083412953091333e-08,
      "loss": 0.008,
      "step": 3033760
    },
    {
      "epoch": 4.964847508886314,
      "grad_norm": 0.29758313298225403,
      "learning_rate": 7.076823731739622e-08,
      "loss": 0.0108,
      "step": 3033780
    },
    {
      "epoch": 4.964880239324968,
      "grad_norm": 0.1145041286945343,
      "learning_rate": 7.070234510387908e-08,
      "loss": 0.0101,
      "step": 3033800
    },
    {
      "epoch": 4.9649129697636205,
      "grad_norm": 0.25440722703933716,
      "learning_rate": 7.063645289036195e-08,
      "loss": 0.0083,
      "step": 3033820
    },
    {
      "epoch": 4.964945700202274,
      "grad_norm": 0.15545031428337097,
      "learning_rate": 7.057056067684482e-08,
      "loss": 0.0083,
      "step": 3033840
    },
    {
      "epoch": 4.964978430640928,
      "grad_norm": 0.2534407377243042,
      "learning_rate": 7.050466846332769e-08,
      "loss": 0.0071,
      "step": 3033860
    },
    {
      "epoch": 4.96501116107958,
      "grad_norm": 0.1311320960521698,
      "learning_rate": 7.043877624981056e-08,
      "loss": 0.0073,
      "step": 3033880
    },
    {
      "epoch": 4.965043891518234,
      "grad_norm": 0.1575445979833603,
      "learning_rate": 7.037288403629344e-08,
      "loss": 0.0111,
      "step": 3033900
    },
    {
      "epoch": 4.965076621956888,
      "grad_norm": 0.2319912314414978,
      "learning_rate": 7.030699182277631e-08,
      "loss": 0.0082,
      "step": 3033920
    },
    {
      "epoch": 4.965109352395541,
      "grad_norm": 0.07749255746603012,
      "learning_rate": 7.024109960925918e-08,
      "loss": 0.0117,
      "step": 3033940
    },
    {
      "epoch": 4.965142082834194,
      "grad_norm": 0.08107028156518936,
      "learning_rate": 7.017520739574205e-08,
      "loss": 0.0106,
      "step": 3033960
    },
    {
      "epoch": 4.9651748132728475,
      "grad_norm": 0.6627620458602905,
      "learning_rate": 7.010931518222492e-08,
      "loss": 0.0145,
      "step": 3033980
    },
    {
      "epoch": 4.965207543711501,
      "grad_norm": 0.2922111451625824,
      "learning_rate": 7.00434229687078e-08,
      "loss": 0.0112,
      "step": 3034000
    },
    {
      "epoch": 4.965240274150154,
      "grad_norm": 0.18464691936969757,
      "learning_rate": 6.997753075519066e-08,
      "loss": 0.0098,
      "step": 3034020
    },
    {
      "epoch": 4.965273004588807,
      "grad_norm": 0.5354151129722595,
      "learning_rate": 6.991163854167354e-08,
      "loss": 0.0103,
      "step": 3034040
    },
    {
      "epoch": 4.965305735027461,
      "grad_norm": 0.1642039716243744,
      "learning_rate": 6.984574632815641e-08,
      "loss": 0.0184,
      "step": 3034060
    },
    {
      "epoch": 4.965338465466115,
      "grad_norm": 0.030349791049957275,
      "learning_rate": 6.977985411463927e-08,
      "loss": 0.0071,
      "step": 3034080
    },
    {
      "epoch": 4.965371195904767,
      "grad_norm": 0.4023372530937195,
      "learning_rate": 6.971396190112216e-08,
      "loss": 0.0079,
      "step": 3034100
    },
    {
      "epoch": 4.965403926343421,
      "grad_norm": 0.6278373003005981,
      "learning_rate": 6.964806968760503e-08,
      "loss": 0.008,
      "step": 3034120
    },
    {
      "epoch": 4.9654366567820745,
      "grad_norm": 0.1683361530303955,
      "learning_rate": 6.958217747408789e-08,
      "loss": 0.0122,
      "step": 3034140
    },
    {
      "epoch": 4.965469387220727,
      "grad_norm": 0.31493091583251953,
      "learning_rate": 6.951628526057076e-08,
      "loss": 0.0057,
      "step": 3034160
    },
    {
      "epoch": 4.965502117659381,
      "grad_norm": 0.12212833017110825,
      "learning_rate": 6.945039304705363e-08,
      "loss": 0.0065,
      "step": 3034180
    },
    {
      "epoch": 4.965534848098034,
      "grad_norm": 0.04713444039225578,
      "learning_rate": 6.93845008335365e-08,
      "loss": 0.0108,
      "step": 3034200
    },
    {
      "epoch": 4.965567578536687,
      "grad_norm": 0.16667801141738892,
      "learning_rate": 6.931860862001938e-08,
      "loss": 0.0085,
      "step": 3034220
    },
    {
      "epoch": 4.965600308975341,
      "grad_norm": 0.1621648222208023,
      "learning_rate": 6.925271640650225e-08,
      "loss": 0.0088,
      "step": 3034240
    },
    {
      "epoch": 4.965633039413994,
      "grad_norm": 0.0520307831466198,
      "learning_rate": 6.918682419298512e-08,
      "loss": 0.0091,
      "step": 3034260
    },
    {
      "epoch": 4.965665769852648,
      "grad_norm": 0.20204174518585205,
      "learning_rate": 6.912093197946799e-08,
      "loss": 0.0078,
      "step": 3034280
    },
    {
      "epoch": 4.9656985002913006,
      "grad_norm": 0.23684106767177582,
      "learning_rate": 6.905503976595087e-08,
      "loss": 0.0095,
      "step": 3034300
    },
    {
      "epoch": 4.965731230729954,
      "grad_norm": 0.2978985607624054,
      "learning_rate": 6.898914755243374e-08,
      "loss": 0.0124,
      "step": 3034320
    },
    {
      "epoch": 4.965763961168608,
      "grad_norm": 0.3329305946826935,
      "learning_rate": 6.892325533891661e-08,
      "loss": 0.005,
      "step": 3034340
    },
    {
      "epoch": 4.965796691607261,
      "grad_norm": 0.13144104182720184,
      "learning_rate": 6.885736312539948e-08,
      "loss": 0.0062,
      "step": 3034360
    },
    {
      "epoch": 4.965829422045914,
      "grad_norm": 0.20532771944999695,
      "learning_rate": 6.879147091188235e-08,
      "loss": 0.0058,
      "step": 3034380
    },
    {
      "epoch": 4.965862152484568,
      "grad_norm": 0.1358995884656906,
      "learning_rate": 6.872557869836521e-08,
      "loss": 0.012,
      "step": 3034400
    },
    {
      "epoch": 4.965894882923221,
      "grad_norm": 0.35995998978614807,
      "learning_rate": 6.865968648484808e-08,
      "loss": 0.0115,
      "step": 3034420
    },
    {
      "epoch": 4.965927613361874,
      "grad_norm": 0.115811288356781,
      "learning_rate": 6.859379427133097e-08,
      "loss": 0.0133,
      "step": 3034440
    },
    {
      "epoch": 4.9659603438005275,
      "grad_norm": 0.15399521589279175,
      "learning_rate": 6.852790205781383e-08,
      "loss": 0.014,
      "step": 3034460
    },
    {
      "epoch": 4.965993074239181,
      "grad_norm": 0.09093088656663895,
      "learning_rate": 6.84620098442967e-08,
      "loss": 0.0096,
      "step": 3034480
    },
    {
      "epoch": 4.966025804677834,
      "grad_norm": 0.3736291825771332,
      "learning_rate": 6.839611763077957e-08,
      "loss": 0.0076,
      "step": 3034500
    },
    {
      "epoch": 4.966058535116487,
      "grad_norm": 0.14250145852565765,
      "learning_rate": 6.833022541726244e-08,
      "loss": 0.0054,
      "step": 3034520
    },
    {
      "epoch": 4.966091265555141,
      "grad_norm": 0.21090056002140045,
      "learning_rate": 6.826433320374532e-08,
      "loss": 0.0096,
      "step": 3034540
    },
    {
      "epoch": 4.966123995993795,
      "grad_norm": 0.17372028529644012,
      "learning_rate": 6.819844099022819e-08,
      "loss": 0.0134,
      "step": 3034560
    },
    {
      "epoch": 4.966156726432447,
      "grad_norm": 0.5611087083816528,
      "learning_rate": 6.813254877671106e-08,
      "loss": 0.0111,
      "step": 3034580
    },
    {
      "epoch": 4.966189456871101,
      "grad_norm": 0.21870197355747223,
      "learning_rate": 6.806665656319393e-08,
      "loss": 0.0086,
      "step": 3034600
    },
    {
      "epoch": 4.9662221873097545,
      "grad_norm": 0.18729673326015472,
      "learning_rate": 6.80007643496768e-08,
      "loss": 0.0058,
      "step": 3034620
    },
    {
      "epoch": 4.966254917748408,
      "grad_norm": 0.13099218904972076,
      "learning_rate": 6.793487213615968e-08,
      "loss": 0.0088,
      "step": 3034640
    },
    {
      "epoch": 4.966287648187061,
      "grad_norm": 0.39013057947158813,
      "learning_rate": 6.786897992264255e-08,
      "loss": 0.0176,
      "step": 3034660
    },
    {
      "epoch": 4.966320378625714,
      "grad_norm": 0.321289598941803,
      "learning_rate": 6.780308770912541e-08,
      "loss": 0.0092,
      "step": 3034680
    },
    {
      "epoch": 4.966353109064368,
      "grad_norm": 0.23587742447853088,
      "learning_rate": 6.77371954956083e-08,
      "loss": 0.0074,
      "step": 3034700
    },
    {
      "epoch": 4.966385839503021,
      "grad_norm": 0.15808115899562836,
      "learning_rate": 6.767130328209117e-08,
      "loss": 0.0106,
      "step": 3034720
    },
    {
      "epoch": 4.966418569941674,
      "grad_norm": 0.1480543613433838,
      "learning_rate": 6.760541106857402e-08,
      "loss": 0.0074,
      "step": 3034740
    },
    {
      "epoch": 4.966451300380328,
      "grad_norm": 0.4036460518836975,
      "learning_rate": 6.753951885505691e-08,
      "loss": 0.0102,
      "step": 3034760
    },
    {
      "epoch": 4.966484030818981,
      "grad_norm": 0.5657215118408203,
      "learning_rate": 6.747362664153978e-08,
      "loss": 0.0108,
      "step": 3034780
    },
    {
      "epoch": 4.966516761257634,
      "grad_norm": 0.21265916526317596,
      "learning_rate": 6.740773442802264e-08,
      "loss": 0.0139,
      "step": 3034800
    },
    {
      "epoch": 4.966549491696288,
      "grad_norm": 0.17700408399105072,
      "learning_rate": 6.734184221450551e-08,
      "loss": 0.0153,
      "step": 3034820
    },
    {
      "epoch": 4.966582222134941,
      "grad_norm": 0.5448933243751526,
      "learning_rate": 6.72759500009884e-08,
      "loss": 0.0123,
      "step": 3034840
    },
    {
      "epoch": 4.966614952573594,
      "grad_norm": 0.35046082735061646,
      "learning_rate": 6.721005778747126e-08,
      "loss": 0.0076,
      "step": 3034860
    },
    {
      "epoch": 4.966647683012248,
      "grad_norm": 0.3014521300792694,
      "learning_rate": 6.714416557395413e-08,
      "loss": 0.0074,
      "step": 3034880
    },
    {
      "epoch": 4.966680413450901,
      "grad_norm": 0.08174676448106766,
      "learning_rate": 6.7078273360437e-08,
      "loss": 0.006,
      "step": 3034900
    },
    {
      "epoch": 4.966713143889555,
      "grad_norm": 0.16235212981700897,
      "learning_rate": 6.701238114691987e-08,
      "loss": 0.0102,
      "step": 3034920
    },
    {
      "epoch": 4.966745874328208,
      "grad_norm": 0.4075527787208557,
      "learning_rate": 6.694648893340275e-08,
      "loss": 0.0084,
      "step": 3034940
    },
    {
      "epoch": 4.966778604766861,
      "grad_norm": 0.32445228099823,
      "learning_rate": 6.688059671988562e-08,
      "loss": 0.0077,
      "step": 3034960
    },
    {
      "epoch": 4.966811335205515,
      "grad_norm": 0.18607182800769806,
      "learning_rate": 6.681470450636849e-08,
      "loss": 0.014,
      "step": 3034980
    },
    {
      "epoch": 4.9668440656441675,
      "grad_norm": 0.1817379593849182,
      "learning_rate": 6.674881229285136e-08,
      "loss": 0.0113,
      "step": 3035000
    },
    {
      "epoch": 4.966876796082821,
      "grad_norm": 0.7700070142745972,
      "learning_rate": 6.668292007933423e-08,
      "loss": 0.0084,
      "step": 3035020
    },
    {
      "epoch": 4.966909526521475,
      "grad_norm": 0.1726561188697815,
      "learning_rate": 6.66170278658171e-08,
      "loss": 0.0072,
      "step": 3035040
    },
    {
      "epoch": 4.966942256960127,
      "grad_norm": 0.36422479152679443,
      "learning_rate": 6.655113565229998e-08,
      "loss": 0.0085,
      "step": 3035060
    },
    {
      "epoch": 4.966974987398781,
      "grad_norm": 0.17850902676582336,
      "learning_rate": 6.648524343878284e-08,
      "loss": 0.0095,
      "step": 3035080
    },
    {
      "epoch": 4.9670077178374346,
      "grad_norm": 0.35468316078186035,
      "learning_rate": 6.641935122526572e-08,
      "loss": 0.005,
      "step": 3035100
    },
    {
      "epoch": 4.967040448276088,
      "grad_norm": 0.8415959477424622,
      "learning_rate": 6.635345901174858e-08,
      "loss": 0.0073,
      "step": 3035120
    },
    {
      "epoch": 4.967073178714741,
      "grad_norm": 0.2788582146167755,
      "learning_rate": 6.628756679823145e-08,
      "loss": 0.008,
      "step": 3035140
    },
    {
      "epoch": 4.9671059091533944,
      "grad_norm": 0.1897071897983551,
      "learning_rate": 6.622167458471434e-08,
      "loss": 0.0063,
      "step": 3035160
    },
    {
      "epoch": 4.967138639592048,
      "grad_norm": 0.4465271830558777,
      "learning_rate": 6.61557823711972e-08,
      "loss": 0.0084,
      "step": 3035180
    },
    {
      "epoch": 4.967171370030702,
      "grad_norm": 0.4111594557762146,
      "learning_rate": 6.608989015768007e-08,
      "loss": 0.0104,
      "step": 3035200
    },
    {
      "epoch": 4.967204100469354,
      "grad_norm": 0.30512139201164246,
      "learning_rate": 6.602399794416294e-08,
      "loss": 0.0143,
      "step": 3035220
    },
    {
      "epoch": 4.967236830908008,
      "grad_norm": 0.03621181845664978,
      "learning_rate": 6.595810573064581e-08,
      "loss": 0.0069,
      "step": 3035240
    },
    {
      "epoch": 4.9672695613466615,
      "grad_norm": 0.14046502113342285,
      "learning_rate": 6.589221351712869e-08,
      "loss": 0.0072,
      "step": 3035260
    },
    {
      "epoch": 4.967302291785314,
      "grad_norm": 0.2828349769115448,
      "learning_rate": 6.582632130361156e-08,
      "loss": 0.0088,
      "step": 3035280
    },
    {
      "epoch": 4.967335022223968,
      "grad_norm": 0.16795462369918823,
      "learning_rate": 6.576042909009443e-08,
      "loss": 0.0094,
      "step": 3035300
    },
    {
      "epoch": 4.967367752662621,
      "grad_norm": 0.24254566431045532,
      "learning_rate": 6.56945368765773e-08,
      "loss": 0.0102,
      "step": 3035320
    },
    {
      "epoch": 4.967400483101274,
      "grad_norm": 0.06310630589723587,
      "learning_rate": 6.562864466306016e-08,
      "loss": 0.0095,
      "step": 3035340
    },
    {
      "epoch": 4.967433213539928,
      "grad_norm": 0.162303626537323,
      "learning_rate": 6.556275244954305e-08,
      "loss": 0.0091,
      "step": 3035360
    },
    {
      "epoch": 4.967465943978581,
      "grad_norm": 0.1956232488155365,
      "learning_rate": 6.549686023602592e-08,
      "loss": 0.0057,
      "step": 3035380
    },
    {
      "epoch": 4.967498674417235,
      "grad_norm": 0.19991643726825714,
      "learning_rate": 6.543096802250878e-08,
      "loss": 0.0083,
      "step": 3035400
    },
    {
      "epoch": 4.967531404855888,
      "grad_norm": 0.6856396198272705,
      "learning_rate": 6.536507580899166e-08,
      "loss": 0.0081,
      "step": 3035420
    },
    {
      "epoch": 4.967564135294541,
      "grad_norm": 0.37740159034729004,
      "learning_rate": 6.529918359547453e-08,
      "loss": 0.0093,
      "step": 3035440
    },
    {
      "epoch": 4.967596865733195,
      "grad_norm": 0.17038178443908691,
      "learning_rate": 6.52332913819574e-08,
      "loss": 0.0093,
      "step": 3035460
    },
    {
      "epoch": 4.9676295961718475,
      "grad_norm": 0.16361887753009796,
      "learning_rate": 6.516739916844027e-08,
      "loss": 0.0115,
      "step": 3035480
    },
    {
      "epoch": 4.967662326610501,
      "grad_norm": 0.2887957692146301,
      "learning_rate": 6.510150695492315e-08,
      "loss": 0.0129,
      "step": 3035500
    },
    {
      "epoch": 4.967695057049155,
      "grad_norm": 0.2279084324836731,
      "learning_rate": 6.503561474140601e-08,
      "loss": 0.0082,
      "step": 3035520
    },
    {
      "epoch": 4.967727787487808,
      "grad_norm": 0.18120405077934265,
      "learning_rate": 6.496972252788888e-08,
      "loss": 0.0056,
      "step": 3035540
    },
    {
      "epoch": 4.967760517926461,
      "grad_norm": 0.13739800453186035,
      "learning_rate": 6.490383031437175e-08,
      "loss": 0.0065,
      "step": 3035560
    },
    {
      "epoch": 4.967793248365115,
      "grad_norm": 0.7076867818832397,
      "learning_rate": 6.483793810085463e-08,
      "loss": 0.014,
      "step": 3035580
    },
    {
      "epoch": 4.967825978803768,
      "grad_norm": 0.15442055463790894,
      "learning_rate": 6.47720458873375e-08,
      "loss": 0.0084,
      "step": 3035600
    },
    {
      "epoch": 4.967858709242421,
      "grad_norm": 0.2643348276615143,
      "learning_rate": 6.470615367382037e-08,
      "loss": 0.0125,
      "step": 3035620
    },
    {
      "epoch": 4.9678914396810745,
      "grad_norm": 0.21827660501003265,
      "learning_rate": 6.464026146030324e-08,
      "loss": 0.0086,
      "step": 3035640
    },
    {
      "epoch": 4.967924170119728,
      "grad_norm": 0.13209308683872223,
      "learning_rate": 6.457436924678611e-08,
      "loss": 0.0051,
      "step": 3035660
    },
    {
      "epoch": 4.967956900558381,
      "grad_norm": 0.214199036359787,
      "learning_rate": 6.450847703326899e-08,
      "loss": 0.0057,
      "step": 3035680
    },
    {
      "epoch": 4.967989630997034,
      "grad_norm": 0.1379254013299942,
      "learning_rate": 6.444258481975186e-08,
      "loss": 0.008,
      "step": 3035700
    },
    {
      "epoch": 4.968022361435688,
      "grad_norm": 0.6322200298309326,
      "learning_rate": 6.437669260623473e-08,
      "loss": 0.009,
      "step": 3035720
    },
    {
      "epoch": 4.968055091874342,
      "grad_norm": 0.43031471967697144,
      "learning_rate": 6.431080039271759e-08,
      "loss": 0.0078,
      "step": 3035740
    },
    {
      "epoch": 4.968087822312994,
      "grad_norm": 0.2808707654476166,
      "learning_rate": 6.424490817920048e-08,
      "loss": 0.0101,
      "step": 3035760
    },
    {
      "epoch": 4.968120552751648,
      "grad_norm": 0.7152608036994934,
      "learning_rate": 6.417901596568333e-08,
      "loss": 0.0082,
      "step": 3035780
    },
    {
      "epoch": 4.9681532831903015,
      "grad_norm": 0.21691972017288208,
      "learning_rate": 6.41131237521662e-08,
      "loss": 0.007,
      "step": 3035800
    },
    {
      "epoch": 4.968186013628955,
      "grad_norm": 0.3815610706806183,
      "learning_rate": 6.404723153864909e-08,
      "loss": 0.0075,
      "step": 3035820
    },
    {
      "epoch": 4.968218744067608,
      "grad_norm": 0.34479814767837524,
      "learning_rate": 6.398133932513195e-08,
      "loss": 0.0077,
      "step": 3035840
    },
    {
      "epoch": 4.968251474506261,
      "grad_norm": 0.3797944486141205,
      "learning_rate": 6.391544711161482e-08,
      "loss": 0.0107,
      "step": 3035860
    },
    {
      "epoch": 4.968284204944915,
      "grad_norm": 0.24701014161109924,
      "learning_rate": 6.38495548980977e-08,
      "loss": 0.0088,
      "step": 3035880
    },
    {
      "epoch": 4.968316935383568,
      "grad_norm": 1.313890814781189,
      "learning_rate": 6.378366268458057e-08,
      "loss": 0.0164,
      "step": 3035900
    },
    {
      "epoch": 4.968349665822221,
      "grad_norm": 0.13965895771980286,
      "learning_rate": 6.371777047106344e-08,
      "loss": 0.0079,
      "step": 3035920
    },
    {
      "epoch": 4.968382396260875,
      "grad_norm": 0.26362860202789307,
      "learning_rate": 6.365187825754631e-08,
      "loss": 0.0086,
      "step": 3035940
    },
    {
      "epoch": 4.968415126699528,
      "grad_norm": 0.7899229526519775,
      "learning_rate": 6.358598604402918e-08,
      "loss": 0.0158,
      "step": 3035960
    },
    {
      "epoch": 4.968447857138181,
      "grad_norm": 0.17496837675571442,
      "learning_rate": 6.352009383051205e-08,
      "loss": 0.0066,
      "step": 3035980
    },
    {
      "epoch": 4.968480587576835,
      "grad_norm": 0.29080477356910706,
      "learning_rate": 6.345420161699493e-08,
      "loss": 0.0079,
      "step": 3036000
    },
    {
      "epoch": 4.968513318015488,
      "grad_norm": 0.12103064358234406,
      "learning_rate": 6.33883094034778e-08,
      "loss": 0.0055,
      "step": 3036020
    },
    {
      "epoch": 4.968546048454141,
      "grad_norm": 0.22222654521465302,
      "learning_rate": 6.332241718996067e-08,
      "loss": 0.0078,
      "step": 3036040
    },
    {
      "epoch": 4.968578778892795,
      "grad_norm": 0.2688487768173218,
      "learning_rate": 6.325652497644353e-08,
      "loss": 0.0082,
      "step": 3036060
    },
    {
      "epoch": 4.968611509331448,
      "grad_norm": 0.14783254265785217,
      "learning_rate": 6.319063276292642e-08,
      "loss": 0.0115,
      "step": 3036080
    },
    {
      "epoch": 4.968644239770102,
      "grad_norm": 0.09882152080535889,
      "learning_rate": 6.312474054940929e-08,
      "loss": 0.0072,
      "step": 3036100
    },
    {
      "epoch": 4.9686769702087545,
      "grad_norm": 0.0688188299536705,
      "learning_rate": 6.305884833589215e-08,
      "loss": 0.0067,
      "step": 3036120
    },
    {
      "epoch": 4.968709700647408,
      "grad_norm": 0.1999320238828659,
      "learning_rate": 6.299295612237502e-08,
      "loss": 0.0096,
      "step": 3036140
    },
    {
      "epoch": 4.968742431086062,
      "grad_norm": 0.2258286029100418,
      "learning_rate": 6.29270639088579e-08,
      "loss": 0.0104,
      "step": 3036160
    },
    {
      "epoch": 4.968775161524714,
      "grad_norm": 0.11810744553804398,
      "learning_rate": 6.286117169534076e-08,
      "loss": 0.0053,
      "step": 3036180
    },
    {
      "epoch": 4.968807891963368,
      "grad_norm": 0.20043431222438812,
      "learning_rate": 6.279527948182363e-08,
      "loss": 0.0086,
      "step": 3036200
    },
    {
      "epoch": 4.968840622402022,
      "grad_norm": 0.3138939142227173,
      "learning_rate": 6.272938726830651e-08,
      "loss": 0.008,
      "step": 3036220
    },
    {
      "epoch": 4.968873352840674,
      "grad_norm": 0.14034177362918854,
      "learning_rate": 6.266349505478938e-08,
      "loss": 0.0102,
      "step": 3036240
    },
    {
      "epoch": 4.968906083279328,
      "grad_norm": 0.46321943402290344,
      "learning_rate": 6.259760284127225e-08,
      "loss": 0.0074,
      "step": 3036260
    },
    {
      "epoch": 4.9689388137179815,
      "grad_norm": 0.44339293241500854,
      "learning_rate": 6.253171062775512e-08,
      "loss": 0.0105,
      "step": 3036280
    },
    {
      "epoch": 4.968971544156635,
      "grad_norm": 0.29448533058166504,
      "learning_rate": 6.2465818414238e-08,
      "loss": 0.0087,
      "step": 3036300
    },
    {
      "epoch": 4.969004274595288,
      "grad_norm": 0.3075951337814331,
      "learning_rate": 6.239992620072087e-08,
      "loss": 0.006,
      "step": 3036320
    },
    {
      "epoch": 4.969037005033941,
      "grad_norm": 0.22217296063899994,
      "learning_rate": 6.233403398720374e-08,
      "loss": 0.0054,
      "step": 3036340
    },
    {
      "epoch": 4.969069735472595,
      "grad_norm": 0.26833415031433105,
      "learning_rate": 6.226814177368661e-08,
      "loss": 0.0114,
      "step": 3036360
    },
    {
      "epoch": 4.969102465911249,
      "grad_norm": 0.2626507580280304,
      "learning_rate": 6.220224956016948e-08,
      "loss": 0.0101,
      "step": 3036380
    },
    {
      "epoch": 4.969135196349901,
      "grad_norm": 0.14123402535915375,
      "learning_rate": 6.213635734665234e-08,
      "loss": 0.0072,
      "step": 3036400
    },
    {
      "epoch": 4.969167926788555,
      "grad_norm": 0.15511387586593628,
      "learning_rate": 6.207046513313523e-08,
      "loss": 0.0046,
      "step": 3036420
    },
    {
      "epoch": 4.9692006572272085,
      "grad_norm": 0.43605104088783264,
      "learning_rate": 6.20045729196181e-08,
      "loss": 0.0098,
      "step": 3036440
    },
    {
      "epoch": 4.969233387665861,
      "grad_norm": 0.23060156404972076,
      "learning_rate": 6.193868070610096e-08,
      "loss": 0.0071,
      "step": 3036460
    },
    {
      "epoch": 4.969266118104515,
      "grad_norm": 0.7183700203895569,
      "learning_rate": 6.187278849258384e-08,
      "loss": 0.0094,
      "step": 3036480
    },
    {
      "epoch": 4.969298848543168,
      "grad_norm": 0.17912127077579498,
      "learning_rate": 6.18068962790667e-08,
      "loss": 0.0057,
      "step": 3036500
    },
    {
      "epoch": 4.969331578981821,
      "grad_norm": 0.10578494518995285,
      "learning_rate": 6.174100406554957e-08,
      "loss": 0.0088,
      "step": 3036520
    },
    {
      "epoch": 4.969364309420475,
      "grad_norm": 0.5043386220932007,
      "learning_rate": 6.167511185203245e-08,
      "loss": 0.0087,
      "step": 3036540
    },
    {
      "epoch": 4.969397039859128,
      "grad_norm": 0.16816765069961548,
      "learning_rate": 6.160921963851532e-08,
      "loss": 0.0061,
      "step": 3036560
    },
    {
      "epoch": 4.969429770297782,
      "grad_norm": 0.296115905046463,
      "learning_rate": 6.154332742499819e-08,
      "loss": 0.0101,
      "step": 3036580
    },
    {
      "epoch": 4.969462500736435,
      "grad_norm": 0.08290154486894608,
      "learning_rate": 6.147743521148106e-08,
      "loss": 0.0068,
      "step": 3036600
    },
    {
      "epoch": 4.969495231175088,
      "grad_norm": 0.23040182888507843,
      "learning_rate": 6.141154299796394e-08,
      "loss": 0.0082,
      "step": 3036620
    },
    {
      "epoch": 4.969527961613742,
      "grad_norm": 0.36222562193870544,
      "learning_rate": 6.134565078444681e-08,
      "loss": 0.0126,
      "step": 3036640
    },
    {
      "epoch": 4.969560692052395,
      "grad_norm": 0.16608421504497528,
      "learning_rate": 6.127975857092968e-08,
      "loss": 0.0124,
      "step": 3036660
    },
    {
      "epoch": 4.969593422491048,
      "grad_norm": 0.25730958580970764,
      "learning_rate": 6.121386635741255e-08,
      "loss": 0.0089,
      "step": 3036680
    },
    {
      "epoch": 4.969626152929702,
      "grad_norm": 0.21403959393501282,
      "learning_rate": 6.114797414389542e-08,
      "loss": 0.0067,
      "step": 3036700
    },
    {
      "epoch": 4.969658883368355,
      "grad_norm": 0.43492379784584045,
      "learning_rate": 6.108208193037828e-08,
      "loss": 0.0088,
      "step": 3036720
    },
    {
      "epoch": 4.969691613807008,
      "grad_norm": 0.2508710026741028,
      "learning_rate": 6.101618971686117e-08,
      "loss": 0.01,
      "step": 3036740
    },
    {
      "epoch": 4.969724344245662,
      "grad_norm": 0.15366323292255402,
      "learning_rate": 6.095029750334404e-08,
      "loss": 0.0076,
      "step": 3036760
    },
    {
      "epoch": 4.969757074684315,
      "grad_norm": 0.22422991693019867,
      "learning_rate": 6.08844052898269e-08,
      "loss": 0.0067,
      "step": 3036780
    },
    {
      "epoch": 4.969789805122968,
      "grad_norm": 0.3959352374076843,
      "learning_rate": 6.081851307630977e-08,
      "loss": 0.0096,
      "step": 3036800
    },
    {
      "epoch": 4.9698225355616215,
      "grad_norm": 0.157046377658844,
      "learning_rate": 6.075262086279266e-08,
      "loss": 0.0097,
      "step": 3036820
    },
    {
      "epoch": 4.969855266000275,
      "grad_norm": 0.19525621831417084,
      "learning_rate": 6.068672864927552e-08,
      "loss": 0.0051,
      "step": 3036840
    },
    {
      "epoch": 4.969887996438929,
      "grad_norm": 0.07839556783437729,
      "learning_rate": 6.062083643575839e-08,
      "loss": 0.0071,
      "step": 3036860
    },
    {
      "epoch": 4.969920726877581,
      "grad_norm": 0.10530595481395721,
      "learning_rate": 6.055494422224127e-08,
      "loss": 0.0109,
      "step": 3036880
    },
    {
      "epoch": 4.969953457316235,
      "grad_norm": 0.3948482275009155,
      "learning_rate": 6.048905200872413e-08,
      "loss": 0.0131,
      "step": 3036900
    },
    {
      "epoch": 4.9699861877548885,
      "grad_norm": 0.3071405589580536,
      "learning_rate": 6.0423159795207e-08,
      "loss": 0.0093,
      "step": 3036920
    },
    {
      "epoch": 4.970018918193541,
      "grad_norm": 0.37680456042289734,
      "learning_rate": 6.035726758168988e-08,
      "loss": 0.0088,
      "step": 3036940
    },
    {
      "epoch": 4.970051648632195,
      "grad_norm": 0.15625151991844177,
      "learning_rate": 6.029137536817275e-08,
      "loss": 0.0076,
      "step": 3036960
    },
    {
      "epoch": 4.970084379070848,
      "grad_norm": 0.12165545672178268,
      "learning_rate": 6.022548315465562e-08,
      "loss": 0.0132,
      "step": 3036980
    },
    {
      "epoch": 4.970117109509502,
      "grad_norm": 0.14681661128997803,
      "learning_rate": 6.015959094113849e-08,
      "loss": 0.007,
      "step": 3037000
    },
    {
      "epoch": 4.970149839948155,
      "grad_norm": 0.2703403830528259,
      "learning_rate": 6.009369872762136e-08,
      "loss": 0.0074,
      "step": 3037020
    },
    {
      "epoch": 4.970182570386808,
      "grad_norm": 0.1538366824388504,
      "learning_rate": 6.002780651410424e-08,
      "loss": 0.0072,
      "step": 3037040
    },
    {
      "epoch": 4.970215300825462,
      "grad_norm": 0.345279723405838,
      "learning_rate": 5.99619143005871e-08,
      "loss": 0.0078,
      "step": 3037060
    },
    {
      "epoch": 4.970248031264115,
      "grad_norm": 0.4589640200138092,
      "learning_rate": 5.989602208706998e-08,
      "loss": 0.0064,
      "step": 3037080
    },
    {
      "epoch": 4.970280761702768,
      "grad_norm": 0.3128109872341156,
      "learning_rate": 5.983012987355285e-08,
      "loss": 0.0104,
      "step": 3037100
    },
    {
      "epoch": 4.970313492141422,
      "grad_norm": 0.07452297955751419,
      "learning_rate": 5.976423766003571e-08,
      "loss": 0.007,
      "step": 3037120
    },
    {
      "epoch": 4.9703462225800745,
      "grad_norm": 0.14271444082260132,
      "learning_rate": 5.96983454465186e-08,
      "loss": 0.0106,
      "step": 3037140
    },
    {
      "epoch": 4.970378953018728,
      "grad_norm": 0.26905176043510437,
      "learning_rate": 5.963245323300147e-08,
      "loss": 0.0084,
      "step": 3037160
    },
    {
      "epoch": 4.970411683457382,
      "grad_norm": 0.24224825203418732,
      "learning_rate": 5.9566561019484334e-08,
      "loss": 0.0126,
      "step": 3037180
    },
    {
      "epoch": 4.970444413896035,
      "grad_norm": 0.11887548118829727,
      "learning_rate": 5.9500668805967206e-08,
      "loss": 0.0106,
      "step": 3037200
    },
    {
      "epoch": 4.970477144334688,
      "grad_norm": 0.24838902056217194,
      "learning_rate": 5.943477659245007e-08,
      "loss": 0.0109,
      "step": 3037220
    },
    {
      "epoch": 4.970509874773342,
      "grad_norm": 0.10027103871107101,
      "learning_rate": 5.9368884378932944e-08,
      "loss": 0.0106,
      "step": 3037240
    },
    {
      "epoch": 4.970542605211995,
      "grad_norm": 0.09598155319690704,
      "learning_rate": 5.930299216541582e-08,
      "loss": 0.0123,
      "step": 3037260
    },
    {
      "epoch": 4.970575335650649,
      "grad_norm": 0.4054844081401825,
      "learning_rate": 5.923709995189869e-08,
      "loss": 0.0098,
      "step": 3037280
    },
    {
      "epoch": 4.9706080660893015,
      "grad_norm": 0.10220714658498764,
      "learning_rate": 5.917120773838156e-08,
      "loss": 0.0096,
      "step": 3037300
    },
    {
      "epoch": 4.970640796527955,
      "grad_norm": 0.17986641824245453,
      "learning_rate": 5.910531552486443e-08,
      "loss": 0.0104,
      "step": 3037320
    },
    {
      "epoch": 4.970673526966609,
      "grad_norm": 0.36628231406211853,
      "learning_rate": 5.90394233113473e-08,
      "loss": 0.0102,
      "step": 3037340
    },
    {
      "epoch": 4.970706257405261,
      "grad_norm": 0.34778642654418945,
      "learning_rate": 5.8973531097830177e-08,
      "loss": 0.0079,
      "step": 3037360
    },
    {
      "epoch": 4.970738987843915,
      "grad_norm": 0.16703595221042633,
      "learning_rate": 5.890763888431305e-08,
      "loss": 0.0118,
      "step": 3037380
    },
    {
      "epoch": 4.970771718282569,
      "grad_norm": 0.26974090933799744,
      "learning_rate": 5.8841746670795914e-08,
      "loss": 0.0082,
      "step": 3037400
    },
    {
      "epoch": 4.970804448721221,
      "grad_norm": 0.28964272141456604,
      "learning_rate": 5.877585445727879e-08,
      "loss": 0.0059,
      "step": 3037420
    },
    {
      "epoch": 4.970837179159875,
      "grad_norm": 0.34582334756851196,
      "learning_rate": 5.870996224376166e-08,
      "loss": 0.0104,
      "step": 3037440
    },
    {
      "epoch": 4.9708699095985285,
      "grad_norm": 0.08289329707622528,
      "learning_rate": 5.864407003024453e-08,
      "loss": 0.0059,
      "step": 3037460
    },
    {
      "epoch": 4.970902640037182,
      "grad_norm": 0.7002419233322144,
      "learning_rate": 5.85781778167274e-08,
      "loss": 0.0093,
      "step": 3037480
    },
    {
      "epoch": 4.970935370475835,
      "grad_norm": 0.3136667311191559,
      "learning_rate": 5.851228560321027e-08,
      "loss": 0.0088,
      "step": 3037500
    },
    {
      "epoch": 4.970968100914488,
      "grad_norm": 0.07287249714136124,
      "learning_rate": 5.8446393389693147e-08,
      "loss": 0.0082,
      "step": 3037520
    },
    {
      "epoch": 4.971000831353142,
      "grad_norm": 0.11345004290342331,
      "learning_rate": 5.838050117617602e-08,
      "loss": 0.0076,
      "step": 3037540
    },
    {
      "epoch": 4.971033561791796,
      "grad_norm": 0.1637621521949768,
      "learning_rate": 5.8314608962658884e-08,
      "loss": 0.0075,
      "step": 3037560
    },
    {
      "epoch": 4.971066292230448,
      "grad_norm": 0.10609894245862961,
      "learning_rate": 5.8248716749141756e-08,
      "loss": 0.0082,
      "step": 3037580
    },
    {
      "epoch": 4.971099022669102,
      "grad_norm": 0.4063374102115631,
      "learning_rate": 5.8182824535624635e-08,
      "loss": 0.0071,
      "step": 3037600
    },
    {
      "epoch": 4.9711317531077555,
      "grad_norm": 0.4222318232059479,
      "learning_rate": 5.81169323221075e-08,
      "loss": 0.0066,
      "step": 3037620
    },
    {
      "epoch": 4.971164483546408,
      "grad_norm": 0.3124258816242218,
      "learning_rate": 5.805104010859037e-08,
      "loss": 0.0116,
      "step": 3037640
    },
    {
      "epoch": 4.971197213985062,
      "grad_norm": 0.28922784328460693,
      "learning_rate": 5.798514789507324e-08,
      "loss": 0.0101,
      "step": 3037660
    },
    {
      "epoch": 4.971229944423715,
      "grad_norm": 0.16460826992988586,
      "learning_rate": 5.791925568155612e-08,
      "loss": 0.0063,
      "step": 3037680
    },
    {
      "epoch": 4.971262674862368,
      "grad_norm": 0.09369343519210815,
      "learning_rate": 5.785336346803899e-08,
      "loss": 0.0106,
      "step": 3037700
    },
    {
      "epoch": 4.971295405301022,
      "grad_norm": 0.22126130759716034,
      "learning_rate": 5.7787471254521854e-08,
      "loss": 0.0059,
      "step": 3037720
    },
    {
      "epoch": 4.971328135739675,
      "grad_norm": 0.19282890856266022,
      "learning_rate": 5.7721579041004726e-08,
      "loss": 0.0106,
      "step": 3037740
    },
    {
      "epoch": 4.971360866178329,
      "grad_norm": 0.09377485513687134,
      "learning_rate": 5.7655686827487605e-08,
      "loss": 0.0057,
      "step": 3037760
    },
    {
      "epoch": 4.9713935966169815,
      "grad_norm": 0.10112237930297852,
      "learning_rate": 5.758979461397047e-08,
      "loss": 0.0074,
      "step": 3037780
    },
    {
      "epoch": 4.971426327055635,
      "grad_norm": 0.09763947129249573,
      "learning_rate": 5.752390240045334e-08,
      "loss": 0.0101,
      "step": 3037800
    },
    {
      "epoch": 4.971459057494289,
      "grad_norm": 0.10511849820613861,
      "learning_rate": 5.7458010186936215e-08,
      "loss": 0.0051,
      "step": 3037820
    },
    {
      "epoch": 4.971491787932942,
      "grad_norm": 0.41507378220558167,
      "learning_rate": 5.739211797341909e-08,
      "loss": 0.005,
      "step": 3037840
    },
    {
      "epoch": 4.971524518371595,
      "grad_norm": 0.23077784478664398,
      "learning_rate": 5.732622575990196e-08,
      "loss": 0.0089,
      "step": 3037860
    },
    {
      "epoch": 4.971557248810249,
      "grad_norm": 0.5195485353469849,
      "learning_rate": 5.7260333546384825e-08,
      "loss": 0.0127,
      "step": 3037880
    },
    {
      "epoch": 4.971589979248902,
      "grad_norm": 0.1699405312538147,
      "learning_rate": 5.7194441332867697e-08,
      "loss": 0.0093,
      "step": 3037900
    },
    {
      "epoch": 4.971622709687555,
      "grad_norm": 0.23415260016918182,
      "learning_rate": 5.7128549119350575e-08,
      "loss": 0.0149,
      "step": 3037920
    },
    {
      "epoch": 4.9716554401262085,
      "grad_norm": 0.08914615958929062,
      "learning_rate": 5.706265690583344e-08,
      "loss": 0.0092,
      "step": 3037940
    },
    {
      "epoch": 4.971688170564862,
      "grad_norm": 0.15021833777427673,
      "learning_rate": 5.699676469231631e-08,
      "loss": 0.0073,
      "step": 3037960
    },
    {
      "epoch": 4.971720901003515,
      "grad_norm": 0.16610971093177795,
      "learning_rate": 5.6930872478799185e-08,
      "loss": 0.0084,
      "step": 3037980
    },
    {
      "epoch": 4.971753631442168,
      "grad_norm": 0.5869117975234985,
      "learning_rate": 5.686498026528205e-08,
      "loss": 0.0092,
      "step": 3038000
    },
    {
      "epoch": 4.971786361880822,
      "grad_norm": 0.3220359683036804,
      "learning_rate": 5.679908805176493e-08,
      "loss": 0.0058,
      "step": 3038020
    },
    {
      "epoch": 4.971819092319476,
      "grad_norm": 0.3807973265647888,
      "learning_rate": 5.67331958382478e-08,
      "loss": 0.0088,
      "step": 3038040
    },
    {
      "epoch": 4.971851822758128,
      "grad_norm": 0.12942476570606232,
      "learning_rate": 5.666730362473067e-08,
      "loss": 0.0094,
      "step": 3038060
    },
    {
      "epoch": 4.971884553196782,
      "grad_norm": 0.0898619070649147,
      "learning_rate": 5.6601411411213545e-08,
      "loss": 0.0111,
      "step": 3038080
    },
    {
      "epoch": 4.9719172836354355,
      "grad_norm": 0.4571109116077423,
      "learning_rate": 5.653551919769642e-08,
      "loss": 0.0128,
      "step": 3038100
    },
    {
      "epoch": 4.971950014074089,
      "grad_norm": 0.3593156337738037,
      "learning_rate": 5.646962698417928e-08,
      "loss": 0.0078,
      "step": 3038120
    },
    {
      "epoch": 4.971982744512742,
      "grad_norm": 0.41136595606803894,
      "learning_rate": 5.6403734770662155e-08,
      "loss": 0.0059,
      "step": 3038140
    },
    {
      "epoch": 4.972015474951395,
      "grad_norm": 0.09806523472070694,
      "learning_rate": 5.633784255714502e-08,
      "loss": 0.0105,
      "step": 3038160
    },
    {
      "epoch": 4.972048205390049,
      "grad_norm": 0.3402602970600128,
      "learning_rate": 5.62719503436279e-08,
      "loss": 0.0133,
      "step": 3038180
    },
    {
      "epoch": 4.972080935828702,
      "grad_norm": 0.0577692873775959,
      "learning_rate": 5.620605813011077e-08,
      "loss": 0.0138,
      "step": 3038200
    },
    {
      "epoch": 4.972113666267355,
      "grad_norm": 0.3993951380252838,
      "learning_rate": 5.614016591659364e-08,
      "loss": 0.0162,
      "step": 3038220
    },
    {
      "epoch": 4.972146396706009,
      "grad_norm": 0.36838361620903015,
      "learning_rate": 5.607427370307651e-08,
      "loss": 0.0111,
      "step": 3038240
    },
    {
      "epoch": 4.972179127144662,
      "grad_norm": 0.36150923371315,
      "learning_rate": 5.600838148955939e-08,
      "loss": 0.0106,
      "step": 3038260
    },
    {
      "epoch": 4.972211857583315,
      "grad_norm": 0.14304077625274658,
      "learning_rate": 5.594248927604225e-08,
      "loss": 0.0061,
      "step": 3038280
    },
    {
      "epoch": 4.972244588021969,
      "grad_norm": 0.5513508319854736,
      "learning_rate": 5.5876597062525125e-08,
      "loss": 0.0105,
      "step": 3038300
    },
    {
      "epoch": 4.972277318460622,
      "grad_norm": 0.3173109292984009,
      "learning_rate": 5.5810704849008004e-08,
      "loss": 0.0077,
      "step": 3038320
    },
    {
      "epoch": 4.972310048899275,
      "grad_norm": 0.20328620076179504,
      "learning_rate": 5.574481263549087e-08,
      "loss": 0.0066,
      "step": 3038340
    },
    {
      "epoch": 4.972342779337929,
      "grad_norm": 0.04083627089858055,
      "learning_rate": 5.567892042197374e-08,
      "loss": 0.0112,
      "step": 3038360
    },
    {
      "epoch": 4.972375509776582,
      "grad_norm": 0.11351964622735977,
      "learning_rate": 5.561302820845661e-08,
      "loss": 0.0116,
      "step": 3038380
    },
    {
      "epoch": 4.972408240215236,
      "grad_norm": 0.3155456781387329,
      "learning_rate": 5.554713599493948e-08,
      "loss": 0.0072,
      "step": 3038400
    },
    {
      "epoch": 4.972440970653889,
      "grad_norm": 0.15839236974716187,
      "learning_rate": 5.548124378142236e-08,
      "loss": 0.0147,
      "step": 3038420
    },
    {
      "epoch": 4.972473701092542,
      "grad_norm": 0.06542955338954926,
      "learning_rate": 5.541535156790522e-08,
      "loss": 0.0085,
      "step": 3038440
    },
    {
      "epoch": 4.972506431531196,
      "grad_norm": 0.3010751008987427,
      "learning_rate": 5.5349459354388095e-08,
      "loss": 0.0088,
      "step": 3038460
    },
    {
      "epoch": 4.9725391619698485,
      "grad_norm": 0.4029387831687927,
      "learning_rate": 5.5283567140870974e-08,
      "loss": 0.0074,
      "step": 3038480
    },
    {
      "epoch": 4.972571892408502,
      "grad_norm": 0.14690685272216797,
      "learning_rate": 5.521767492735384e-08,
      "loss": 0.0103,
      "step": 3038500
    },
    {
      "epoch": 4.972604622847156,
      "grad_norm": 0.10258176922798157,
      "learning_rate": 5.515178271383671e-08,
      "loss": 0.0071,
      "step": 3038520
    },
    {
      "epoch": 4.972637353285808,
      "grad_norm": 0.3778148293495178,
      "learning_rate": 5.5085890500319584e-08,
      "loss": 0.0124,
      "step": 3038540
    },
    {
      "epoch": 4.972670083724462,
      "grad_norm": 0.41247880458831787,
      "learning_rate": 5.501999828680245e-08,
      "loss": 0.0106,
      "step": 3038560
    },
    {
      "epoch": 4.9727028141631155,
      "grad_norm": 0.23748207092285156,
      "learning_rate": 5.495410607328533e-08,
      "loss": 0.014,
      "step": 3038580
    },
    {
      "epoch": 4.972735544601768,
      "grad_norm": 0.12450902909040451,
      "learning_rate": 5.4888213859768193e-08,
      "loss": 0.0064,
      "step": 3038600
    },
    {
      "epoch": 4.972768275040422,
      "grad_norm": 0.22788818180561066,
      "learning_rate": 5.4822321646251066e-08,
      "loss": 0.0098,
      "step": 3038620
    },
    {
      "epoch": 4.972801005479075,
      "grad_norm": 0.28376680612564087,
      "learning_rate": 5.475642943273394e-08,
      "loss": 0.0101,
      "step": 3038640
    },
    {
      "epoch": 4.972833735917729,
      "grad_norm": 0.13760246336460114,
      "learning_rate": 5.46905372192168e-08,
      "loss": 0.0117,
      "step": 3038660
    },
    {
      "epoch": 4.972866466356382,
      "grad_norm": 0.15576912462711334,
      "learning_rate": 5.462464500569968e-08,
      "loss": 0.0093,
      "step": 3038680
    },
    {
      "epoch": 4.972899196795035,
      "grad_norm": 0.27572187781333923,
      "learning_rate": 5.4558752792182554e-08,
      "loss": 0.0097,
      "step": 3038700
    },
    {
      "epoch": 4.972931927233689,
      "grad_norm": 0.15037231147289276,
      "learning_rate": 5.449286057866542e-08,
      "loss": 0.0074,
      "step": 3038720
    },
    {
      "epoch": 4.9729646576723425,
      "grad_norm": 0.2538489103317261,
      "learning_rate": 5.44269683651483e-08,
      "loss": 0.0073,
      "step": 3038740
    },
    {
      "epoch": 4.972997388110995,
      "grad_norm": 0.22981052100658417,
      "learning_rate": 5.436107615163117e-08,
      "loss": 0.0088,
      "step": 3038760
    },
    {
      "epoch": 4.973030118549649,
      "grad_norm": 0.09245992451906204,
      "learning_rate": 5.4295183938114036e-08,
      "loss": 0.0133,
      "step": 3038780
    },
    {
      "epoch": 4.973062848988302,
      "grad_norm": 0.16431599855422974,
      "learning_rate": 5.422929172459691e-08,
      "loss": 0.0109,
      "step": 3038800
    },
    {
      "epoch": 4.973095579426955,
      "grad_norm": 0.17677412927150726,
      "learning_rate": 5.416339951107977e-08,
      "loss": 0.0115,
      "step": 3038820
    },
    {
      "epoch": 4.973128309865609,
      "grad_norm": 0.11440333724021912,
      "learning_rate": 5.409750729756265e-08,
      "loss": 0.0095,
      "step": 3038840
    },
    {
      "epoch": 4.973161040304262,
      "grad_norm": 0.157029390335083,
      "learning_rate": 5.4031615084045524e-08,
      "loss": 0.0065,
      "step": 3038860
    },
    {
      "epoch": 4.973193770742915,
      "grad_norm": 0.37324297428131104,
      "learning_rate": 5.396572287052839e-08,
      "loss": 0.0099,
      "step": 3038880
    },
    {
      "epoch": 4.973226501181569,
      "grad_norm": 0.10824678838253021,
      "learning_rate": 5.389983065701127e-08,
      "loss": 0.0087,
      "step": 3038900
    },
    {
      "epoch": 4.973259231620222,
      "grad_norm": 0.33117198944091797,
      "learning_rate": 5.383393844349414e-08,
      "loss": 0.0102,
      "step": 3038920
    },
    {
      "epoch": 4.973291962058876,
      "grad_norm": 0.22833865880966187,
      "learning_rate": 5.3768046229977006e-08,
      "loss": 0.0091,
      "step": 3038940
    },
    {
      "epoch": 4.9733246924975285,
      "grad_norm": 0.2099480777978897,
      "learning_rate": 5.370215401645988e-08,
      "loss": 0.0094,
      "step": 3038960
    },
    {
      "epoch": 4.973357422936182,
      "grad_norm": 0.12085980176925659,
      "learning_rate": 5.3636261802942757e-08,
      "loss": 0.0074,
      "step": 3038980
    },
    {
      "epoch": 4.973390153374836,
      "grad_norm": 0.0549188107252121,
      "learning_rate": 5.357036958942562e-08,
      "loss": 0.0076,
      "step": 3039000
    },
    {
      "epoch": 4.973422883813489,
      "grad_norm": 0.15140075981616974,
      "learning_rate": 5.3504477375908494e-08,
      "loss": 0.006,
      "step": 3039020
    },
    {
      "epoch": 4.973455614252142,
      "grad_norm": 0.13647224009037018,
      "learning_rate": 5.3438585162391366e-08,
      "loss": 0.0095,
      "step": 3039040
    },
    {
      "epoch": 4.973488344690796,
      "grad_norm": 0.17624865472316742,
      "learning_rate": 5.337269294887423e-08,
      "loss": 0.0095,
      "step": 3039060
    },
    {
      "epoch": 4.973521075129449,
      "grad_norm": 0.2616407871246338,
      "learning_rate": 5.330680073535711e-08,
      "loss": 0.0113,
      "step": 3039080
    },
    {
      "epoch": 4.973553805568102,
      "grad_norm": 0.12857672572135925,
      "learning_rate": 5.3240908521839976e-08,
      "loss": 0.0074,
      "step": 3039100
    },
    {
      "epoch": 4.9735865360067555,
      "grad_norm": 0.19373469054698944,
      "learning_rate": 5.317501630832285e-08,
      "loss": 0.0122,
      "step": 3039120
    },
    {
      "epoch": 4.973619266445409,
      "grad_norm": 0.13602392375469208,
      "learning_rate": 5.310912409480573e-08,
      "loss": 0.0187,
      "step": 3039140
    },
    {
      "epoch": 4.973651996884062,
      "grad_norm": 0.24112795293331146,
      "learning_rate": 5.304323188128859e-08,
      "loss": 0.0092,
      "step": 3039160
    },
    {
      "epoch": 4.973684727322715,
      "grad_norm": 0.14148573577404022,
      "learning_rate": 5.2977339667771464e-08,
      "loss": 0.0088,
      "step": 3039180
    },
    {
      "epoch": 4.973717457761369,
      "grad_norm": 0.07158145308494568,
      "learning_rate": 5.2911447454254336e-08,
      "loss": 0.0077,
      "step": 3039200
    },
    {
      "epoch": 4.973750188200023,
      "grad_norm": 0.1643878072500229,
      "learning_rate": 5.28455552407372e-08,
      "loss": 0.0076,
      "step": 3039220
    },
    {
      "epoch": 4.973782918638675,
      "grad_norm": 0.12729355692863464,
      "learning_rate": 5.277966302722008e-08,
      "loss": 0.0105,
      "step": 3039240
    },
    {
      "epoch": 4.973815649077329,
      "grad_norm": 0.13389189541339874,
      "learning_rate": 5.271377081370295e-08,
      "loss": 0.0077,
      "step": 3039260
    },
    {
      "epoch": 4.9738483795159825,
      "grad_norm": 0.1928502470254898,
      "learning_rate": 5.264787860018582e-08,
      "loss": 0.0048,
      "step": 3039280
    },
    {
      "epoch": 4.973881109954636,
      "grad_norm": 0.15291893482208252,
      "learning_rate": 5.258198638666869e-08,
      "loss": 0.007,
      "step": 3039300
    },
    {
      "epoch": 4.973913840393289,
      "grad_norm": 0.2272556722164154,
      "learning_rate": 5.251609417315156e-08,
      "loss": 0.0106,
      "step": 3039320
    },
    {
      "epoch": 4.973946570831942,
      "grad_norm": 0.12120447307825089,
      "learning_rate": 5.2450201959634434e-08,
      "loss": 0.0069,
      "step": 3039340
    },
    {
      "epoch": 4.973979301270596,
      "grad_norm": 0.15077100694179535,
      "learning_rate": 5.2384309746117307e-08,
      "loss": 0.0072,
      "step": 3039360
    },
    {
      "epoch": 4.974012031709249,
      "grad_norm": 0.7029426097869873,
      "learning_rate": 5.231841753260017e-08,
      "loss": 0.0081,
      "step": 3039380
    },
    {
      "epoch": 4.974044762147902,
      "grad_norm": 0.04709282144904137,
      "learning_rate": 5.225252531908305e-08,
      "loss": 0.0067,
      "step": 3039400
    },
    {
      "epoch": 4.974077492586556,
      "grad_norm": 0.24874785542488098,
      "learning_rate": 5.218663310556592e-08,
      "loss": 0.011,
      "step": 3039420
    },
    {
      "epoch": 4.9741102230252086,
      "grad_norm": 0.12314523756504059,
      "learning_rate": 5.212074089204879e-08,
      "loss": 0.0142,
      "step": 3039440
    },
    {
      "epoch": 4.974142953463862,
      "grad_norm": 0.5108463168144226,
      "learning_rate": 5.205484867853166e-08,
      "loss": 0.0118,
      "step": 3039460
    },
    {
      "epoch": 4.974175683902516,
      "grad_norm": 0.3500494062900543,
      "learning_rate": 5.198895646501454e-08,
      "loss": 0.0058,
      "step": 3039480
    },
    {
      "epoch": 4.974208414341169,
      "grad_norm": 0.2084457278251648,
      "learning_rate": 5.1923064251497405e-08,
      "loss": 0.006,
      "step": 3039500
    },
    {
      "epoch": 4.974241144779822,
      "grad_norm": 0.15455938875675201,
      "learning_rate": 5.185717203798028e-08,
      "loss": 0.0102,
      "step": 3039520
    },
    {
      "epoch": 4.974273875218476,
      "grad_norm": 0.226912721991539,
      "learning_rate": 5.179127982446314e-08,
      "loss": 0.0045,
      "step": 3039540
    },
    {
      "epoch": 4.974306605657129,
      "grad_norm": 0.18139950931072235,
      "learning_rate": 5.172538761094602e-08,
      "loss": 0.0082,
      "step": 3039560
    },
    {
      "epoch": 4.974339336095783,
      "grad_norm": 0.10605736821889877,
      "learning_rate": 5.165949539742889e-08,
      "loss": 0.0078,
      "step": 3039580
    },
    {
      "epoch": 4.9743720665344355,
      "grad_norm": 0.21616199612617493,
      "learning_rate": 5.159360318391176e-08,
      "loss": 0.01,
      "step": 3039600
    },
    {
      "epoch": 4.974404796973089,
      "grad_norm": 0.2900235056877136,
      "learning_rate": 5.152771097039463e-08,
      "loss": 0.0088,
      "step": 3039620
    },
    {
      "epoch": 4.974437527411743,
      "grad_norm": 0.38476094603538513,
      "learning_rate": 5.146181875687751e-08,
      "loss": 0.01,
      "step": 3039640
    },
    {
      "epoch": 4.974470257850395,
      "grad_norm": 0.1552906483411789,
      "learning_rate": 5.1395926543360375e-08,
      "loss": 0.0088,
      "step": 3039660
    },
    {
      "epoch": 4.974502988289049,
      "grad_norm": 0.12541401386260986,
      "learning_rate": 5.133003432984325e-08,
      "loss": 0.0099,
      "step": 3039680
    },
    {
      "epoch": 4.974535718727703,
      "grad_norm": 0.1098821610212326,
      "learning_rate": 5.126414211632612e-08,
      "loss": 0.007,
      "step": 3039700
    },
    {
      "epoch": 4.974568449166355,
      "grad_norm": 0.5945320725440979,
      "learning_rate": 5.1198249902808984e-08,
      "loss": 0.0063,
      "step": 3039720
    },
    {
      "epoch": 4.974601179605009,
      "grad_norm": 0.198453888297081,
      "learning_rate": 5.113235768929186e-08,
      "loss": 0.0101,
      "step": 3039740
    },
    {
      "epoch": 4.9746339100436625,
      "grad_norm": 0.101128488779068,
      "learning_rate": 5.106646547577473e-08,
      "loss": 0.012,
      "step": 3039760
    },
    {
      "epoch": 4.974666640482316,
      "grad_norm": 0.13227605819702148,
      "learning_rate": 5.10005732622576e-08,
      "loss": 0.017,
      "step": 3039780
    },
    {
      "epoch": 4.974699370920969,
      "grad_norm": 0.1527564823627472,
      "learning_rate": 5.093468104874048e-08,
      "loss": 0.0113,
      "step": 3039800
    },
    {
      "epoch": 4.974732101359622,
      "grad_norm": 0.24240607023239136,
      "learning_rate": 5.0868788835223345e-08,
      "loss": 0.0062,
      "step": 3039820
    },
    {
      "epoch": 4.974764831798276,
      "grad_norm": 0.2515093982219696,
      "learning_rate": 5.080289662170622e-08,
      "loss": 0.0064,
      "step": 3039840
    },
    {
      "epoch": 4.97479756223693,
      "grad_norm": 0.22181864082813263,
      "learning_rate": 5.073700440818909e-08,
      "loss": 0.0094,
      "step": 3039860
    },
    {
      "epoch": 4.974830292675582,
      "grad_norm": 0.1868668794631958,
      "learning_rate": 5.0671112194671955e-08,
      "loss": 0.0064,
      "step": 3039880
    },
    {
      "epoch": 4.974863023114236,
      "grad_norm": 0.1912659853696823,
      "learning_rate": 5.060521998115483e-08,
      "loss": 0.0076,
      "step": 3039900
    },
    {
      "epoch": 4.9748957535528895,
      "grad_norm": 0.14896899461746216,
      "learning_rate": 5.0539327767637705e-08,
      "loss": 0.0146,
      "step": 3039920
    },
    {
      "epoch": 4.974928483991542,
      "grad_norm": 0.6037896871566772,
      "learning_rate": 5.047343555412057e-08,
      "loss": 0.013,
      "step": 3039940
    },
    {
      "epoch": 4.974961214430196,
      "grad_norm": 0.8512625694274902,
      "learning_rate": 5.040754334060344e-08,
      "loss": 0.0124,
      "step": 3039960
    },
    {
      "epoch": 4.974993944868849,
      "grad_norm": 0.39619842171669006,
      "learning_rate": 5.0341651127086315e-08,
      "loss": 0.008,
      "step": 3039980
    },
    {
      "epoch": 4.975026675307502,
      "grad_norm": 0.16367898881435394,
      "learning_rate": 5.027575891356919e-08,
      "loss": 0.0092,
      "step": 3040000
    },
    {
      "epoch": 4.975059405746156,
      "grad_norm": 0.31266260147094727,
      "learning_rate": 5.020986670005206e-08,
      "loss": 0.0108,
      "step": 3040020
    },
    {
      "epoch": 4.975092136184809,
      "grad_norm": 0.12089644372463226,
      "learning_rate": 5.0143974486534925e-08,
      "loss": 0.0093,
      "step": 3040040
    },
    {
      "epoch": 4.975124866623463,
      "grad_norm": 0.22163063287734985,
      "learning_rate": 5.0078082273017803e-08,
      "loss": 0.007,
      "step": 3040060
    },
    {
      "epoch": 4.975157597062116,
      "grad_norm": 0.11481862515211105,
      "learning_rate": 5.0012190059500676e-08,
      "loss": 0.0082,
      "step": 3040080
    },
    {
      "epoch": 4.975190327500769,
      "grad_norm": 0.344179630279541,
      "learning_rate": 4.994629784598354e-08,
      "loss": 0.01,
      "step": 3040100
    },
    {
      "epoch": 4.975223057939423,
      "grad_norm": 0.10753260552883148,
      "learning_rate": 4.988040563246641e-08,
      "loss": 0.0088,
      "step": 3040120
    },
    {
      "epoch": 4.9752557883780755,
      "grad_norm": 0.23043008148670197,
      "learning_rate": 4.981451341894929e-08,
      "loss": 0.0083,
      "step": 3040140
    },
    {
      "epoch": 4.975288518816729,
      "grad_norm": 0.22415108978748322,
      "learning_rate": 4.974862120543216e-08,
      "loss": 0.0072,
      "step": 3040160
    },
    {
      "epoch": 4.975321249255383,
      "grad_norm": 0.08083994686603546,
      "learning_rate": 4.968272899191503e-08,
      "loss": 0.0072,
      "step": 3040180
    },
    {
      "epoch": 4.975353979694036,
      "grad_norm": 0.3011557161808014,
      "learning_rate": 4.961683677839791e-08,
      "loss": 0.0078,
      "step": 3040200
    },
    {
      "epoch": 4.975386710132689,
      "grad_norm": 0.18771882355213165,
      "learning_rate": 4.9550944564880774e-08,
      "loss": 0.0104,
      "step": 3040220
    },
    {
      "epoch": 4.9754194405713426,
      "grad_norm": 0.2899252772331238,
      "learning_rate": 4.9485052351363646e-08,
      "loss": 0.0068,
      "step": 3040240
    },
    {
      "epoch": 4.975452171009996,
      "grad_norm": 0.21844032406806946,
      "learning_rate": 4.941916013784651e-08,
      "loss": 0.0086,
      "step": 3040260
    },
    {
      "epoch": 4.975484901448649,
      "grad_norm": 0.1887378692626953,
      "learning_rate": 4.935326792432938e-08,
      "loss": 0.0087,
      "step": 3040280
    },
    {
      "epoch": 4.9755176318873024,
      "grad_norm": 0.42872121930122375,
      "learning_rate": 4.928737571081226e-08,
      "loss": 0.0146,
      "step": 3040300
    },
    {
      "epoch": 4.975550362325956,
      "grad_norm": 0.11161193996667862,
      "learning_rate": 4.922148349729513e-08,
      "loss": 0.0076,
      "step": 3040320
    },
    {
      "epoch": 4.975583092764609,
      "grad_norm": 0.12112969905138016,
      "learning_rate": 4.9155591283778e-08,
      "loss": 0.0099,
      "step": 3040340
    },
    {
      "epoch": 4.975615823203262,
      "grad_norm": 0.2344026118516922,
      "learning_rate": 4.908969907026087e-08,
      "loss": 0.0111,
      "step": 3040360
    },
    {
      "epoch": 4.975648553641916,
      "grad_norm": 0.07067857682704926,
      "learning_rate": 4.902380685674374e-08,
      "loss": 0.0087,
      "step": 3040380
    },
    {
      "epoch": 4.9756812840805695,
      "grad_norm": 0.17580737173557281,
      "learning_rate": 4.8957914643226616e-08,
      "loss": 0.0086,
      "step": 3040400
    },
    {
      "epoch": 4.975714014519222,
      "grad_norm": 0.2559756338596344,
      "learning_rate": 4.889202242970949e-08,
      "loss": 0.009,
      "step": 3040420
    },
    {
      "epoch": 4.975746744957876,
      "grad_norm": 0.3979536294937134,
      "learning_rate": 4.8826130216192353e-08,
      "loss": 0.0104,
      "step": 3040440
    },
    {
      "epoch": 4.975779475396529,
      "grad_norm": 0.24214595556259155,
      "learning_rate": 4.876023800267523e-08,
      "loss": 0.0128,
      "step": 3040460
    },
    {
      "epoch": 4.975812205835183,
      "grad_norm": 0.3628435730934143,
      "learning_rate": 4.86943457891581e-08,
      "loss": 0.0077,
      "step": 3040480
    },
    {
      "epoch": 4.975844936273836,
      "grad_norm": 0.3826395273208618,
      "learning_rate": 4.862845357564097e-08,
      "loss": 0.0097,
      "step": 3040500
    },
    {
      "epoch": 4.975877666712489,
      "grad_norm": 0.10797431319952011,
      "learning_rate": 4.856256136212384e-08,
      "loss": 0.0083,
      "step": 3040520
    },
    {
      "epoch": 4.975910397151143,
      "grad_norm": 0.3003489673137665,
      "learning_rate": 4.849666914860671e-08,
      "loss": 0.0066,
      "step": 3040540
    },
    {
      "epoch": 4.975943127589796,
      "grad_norm": 0.0997643917798996,
      "learning_rate": 4.8430776935089586e-08,
      "loss": 0.0061,
      "step": 3040560
    },
    {
      "epoch": 4.975975858028449,
      "grad_norm": 0.12732504308223724,
      "learning_rate": 4.836488472157246e-08,
      "loss": 0.0166,
      "step": 3040580
    },
    {
      "epoch": 4.976008588467103,
      "grad_norm": 0.24218910932540894,
      "learning_rate": 4.8298992508055324e-08,
      "loss": 0.0078,
      "step": 3040600
    },
    {
      "epoch": 4.9760413189057555,
      "grad_norm": 0.37009403109550476,
      "learning_rate": 4.82331002945382e-08,
      "loss": 0.0047,
      "step": 3040620
    },
    {
      "epoch": 4.976074049344409,
      "grad_norm": 0.7548190355300903,
      "learning_rate": 4.8167208081021074e-08,
      "loss": 0.009,
      "step": 3040640
    },
    {
      "epoch": 4.976106779783063,
      "grad_norm": 0.08516842126846313,
      "learning_rate": 4.810131586750394e-08,
      "loss": 0.0064,
      "step": 3040660
    },
    {
      "epoch": 4.976139510221716,
      "grad_norm": 0.4025213420391083,
      "learning_rate": 4.803542365398681e-08,
      "loss": 0.0133,
      "step": 3040680
    },
    {
      "epoch": 4.976172240660369,
      "grad_norm": 0.07689543813467026,
      "learning_rate": 4.796953144046968e-08,
      "loss": 0.0084,
      "step": 3040700
    },
    {
      "epoch": 4.976204971099023,
      "grad_norm": 0.17941918969154358,
      "learning_rate": 4.7903639226952556e-08,
      "loss": 0.0069,
      "step": 3040720
    },
    {
      "epoch": 4.976237701537676,
      "grad_norm": 0.3218672275543213,
      "learning_rate": 4.783774701343543e-08,
      "loss": 0.0055,
      "step": 3040740
    },
    {
      "epoch": 4.97627043197633,
      "grad_norm": 0.2768596112728119,
      "learning_rate": 4.7771854799918294e-08,
      "loss": 0.0058,
      "step": 3040760
    },
    {
      "epoch": 4.9763031624149825,
      "grad_norm": 0.5794670581817627,
      "learning_rate": 4.7705962586401166e-08,
      "loss": 0.0069,
      "step": 3040780
    },
    {
      "epoch": 4.976335892853636,
      "grad_norm": 0.09301067143678665,
      "learning_rate": 4.7640070372884044e-08,
      "loss": 0.0124,
      "step": 3040800
    },
    {
      "epoch": 4.97636862329229,
      "grad_norm": 0.12274441123008728,
      "learning_rate": 4.757417815936691e-08,
      "loss": 0.0076,
      "step": 3040820
    },
    {
      "epoch": 4.976401353730942,
      "grad_norm": 0.1561744511127472,
      "learning_rate": 4.750828594584978e-08,
      "loss": 0.0116,
      "step": 3040840
    },
    {
      "epoch": 4.976434084169596,
      "grad_norm": 0.09915725886821747,
      "learning_rate": 4.744239373233266e-08,
      "loss": 0.0123,
      "step": 3040860
    },
    {
      "epoch": 4.97646681460825,
      "grad_norm": 0.10463768988847733,
      "learning_rate": 4.7376501518815526e-08,
      "loss": 0.0061,
      "step": 3040880
    },
    {
      "epoch": 4.976499545046902,
      "grad_norm": 0.08760683983564377,
      "learning_rate": 4.73106093052984e-08,
      "loss": 0.008,
      "step": 3040900
    },
    {
      "epoch": 4.976532275485556,
      "grad_norm": 0.11291436851024628,
      "learning_rate": 4.7244717091781264e-08,
      "loss": 0.0092,
      "step": 3040920
    },
    {
      "epoch": 4.9765650059242095,
      "grad_norm": 0.2907308042049408,
      "learning_rate": 4.7178824878264136e-08,
      "loss": 0.008,
      "step": 3040940
    },
    {
      "epoch": 4.976597736362863,
      "grad_norm": 0.16854584217071533,
      "learning_rate": 4.7112932664747015e-08,
      "loss": 0.014,
      "step": 3040960
    },
    {
      "epoch": 4.976630466801516,
      "grad_norm": 0.32461026310920715,
      "learning_rate": 4.704704045122988e-08,
      "loss": 0.0067,
      "step": 3040980
    },
    {
      "epoch": 4.976663197240169,
      "grad_norm": 0.21427150070667267,
      "learning_rate": 4.698114823771275e-08,
      "loss": 0.0103,
      "step": 3041000
    },
    {
      "epoch": 4.976695927678823,
      "grad_norm": 0.2803642451763153,
      "learning_rate": 4.6915256024195624e-08,
      "loss": 0.0098,
      "step": 3041020
    },
    {
      "epoch": 4.9767286581174766,
      "grad_norm": 0.3349551856517792,
      "learning_rate": 4.6849363810678496e-08,
      "loss": 0.008,
      "step": 3041040
    },
    {
      "epoch": 4.976761388556129,
      "grad_norm": 0.6807068586349487,
      "learning_rate": 4.678347159716137e-08,
      "loss": 0.0099,
      "step": 3041060
    },
    {
      "epoch": 4.976794118994783,
      "grad_norm": 0.0726468563079834,
      "learning_rate": 4.671757938364424e-08,
      "loss": 0.0068,
      "step": 3041080
    },
    {
      "epoch": 4.9768268494334365,
      "grad_norm": 0.0895659327507019,
      "learning_rate": 4.6651687170127106e-08,
      "loss": 0.0091,
      "step": 3041100
    },
    {
      "epoch": 4.976859579872089,
      "grad_norm": 0.22960296273231506,
      "learning_rate": 4.6585794956609985e-08,
      "loss": 0.0086,
      "step": 3041120
    },
    {
      "epoch": 4.976892310310743,
      "grad_norm": 0.3380849063396454,
      "learning_rate": 4.651990274309286e-08,
      "loss": 0.0075,
      "step": 3041140
    },
    {
      "epoch": 4.976925040749396,
      "grad_norm": 0.35737210512161255,
      "learning_rate": 4.645401052957572e-08,
      "loss": 0.0078,
      "step": 3041160
    },
    {
      "epoch": 4.976957771188049,
      "grad_norm": 0.18191783130168915,
      "learning_rate": 4.6388118316058594e-08,
      "loss": 0.0137,
      "step": 3041180
    },
    {
      "epoch": 4.976990501626703,
      "grad_norm": 0.21970033645629883,
      "learning_rate": 4.632222610254146e-08,
      "loss": 0.0073,
      "step": 3041200
    },
    {
      "epoch": 4.977023232065356,
      "grad_norm": 0.04740386828780174,
      "learning_rate": 4.625633388902434e-08,
      "loss": 0.0057,
      "step": 3041220
    },
    {
      "epoch": 4.97705596250401,
      "grad_norm": 0.03768547251820564,
      "learning_rate": 4.619044167550721e-08,
      "loss": 0.0089,
      "step": 3041240
    },
    {
      "epoch": 4.9770886929426625,
      "grad_norm": 0.152800515294075,
      "learning_rate": 4.6124549461990076e-08,
      "loss": 0.0084,
      "step": 3041260
    },
    {
      "epoch": 4.977121423381316,
      "grad_norm": 0.27310359477996826,
      "learning_rate": 4.6058657248472955e-08,
      "loss": 0.0109,
      "step": 3041280
    },
    {
      "epoch": 4.97715415381997,
      "grad_norm": 0.1248772069811821,
      "learning_rate": 4.599276503495583e-08,
      "loss": 0.0108,
      "step": 3041300
    },
    {
      "epoch": 4.977186884258623,
      "grad_norm": 0.06556016951799393,
      "learning_rate": 4.592687282143869e-08,
      "loss": 0.0061,
      "step": 3041320
    },
    {
      "epoch": 4.977219614697276,
      "grad_norm": 0.5016734600067139,
      "learning_rate": 4.5860980607921565e-08,
      "loss": 0.0141,
      "step": 3041340
    },
    {
      "epoch": 4.97725234513593,
      "grad_norm": 0.22249546647071838,
      "learning_rate": 4.579508839440444e-08,
      "loss": 0.0076,
      "step": 3041360
    },
    {
      "epoch": 4.977285075574583,
      "grad_norm": 0.19347606599330902,
      "learning_rate": 4.572919618088731e-08,
      "loss": 0.0092,
      "step": 3041380
    },
    {
      "epoch": 4.977317806013236,
      "grad_norm": 0.3409079611301422,
      "learning_rate": 4.566330396737018e-08,
      "loss": 0.0075,
      "step": 3041400
    },
    {
      "epoch": 4.9773505364518895,
      "grad_norm": 0.202893927693367,
      "learning_rate": 4.5597411753853046e-08,
      "loss": 0.0116,
      "step": 3041420
    },
    {
      "epoch": 4.977383266890543,
      "grad_norm": 0.19379831850528717,
      "learning_rate": 4.553151954033592e-08,
      "loss": 0.0078,
      "step": 3041440
    },
    {
      "epoch": 4.977415997329196,
      "grad_norm": 0.8826495409011841,
      "learning_rate": 4.54656273268188e-08,
      "loss": 0.0079,
      "step": 3041460
    },
    {
      "epoch": 4.977448727767849,
      "grad_norm": 0.1750362664461136,
      "learning_rate": 4.539973511330166e-08,
      "loss": 0.0081,
      "step": 3041480
    },
    {
      "epoch": 4.977481458206503,
      "grad_norm": 0.12005489319562912,
      "learning_rate": 4.5333842899784535e-08,
      "loss": 0.0064,
      "step": 3041500
    },
    {
      "epoch": 4.977514188645157,
      "grad_norm": 0.25747764110565186,
      "learning_rate": 4.5267950686267413e-08,
      "loss": 0.0095,
      "step": 3041520
    },
    {
      "epoch": 4.977546919083809,
      "grad_norm": 0.1216650977730751,
      "learning_rate": 4.520205847275028e-08,
      "loss": 0.0072,
      "step": 3041540
    },
    {
      "epoch": 4.977579649522463,
      "grad_norm": 0.21197478473186493,
      "learning_rate": 4.513616625923315e-08,
      "loss": 0.0102,
      "step": 3041560
    },
    {
      "epoch": 4.9776123799611165,
      "grad_norm": 0.17172038555145264,
      "learning_rate": 4.507027404571602e-08,
      "loss": 0.0078,
      "step": 3041580
    },
    {
      "epoch": 4.977645110399769,
      "grad_norm": 0.6580822467803955,
      "learning_rate": 4.500438183219889e-08,
      "loss": 0.0099,
      "step": 3041600
    },
    {
      "epoch": 4.977677840838423,
      "grad_norm": 0.12282455712556839,
      "learning_rate": 4.493848961868177e-08,
      "loss": 0.009,
      "step": 3041620
    },
    {
      "epoch": 4.977710571277076,
      "grad_norm": 0.24132569134235382,
      "learning_rate": 4.487259740516463e-08,
      "loss": 0.0138,
      "step": 3041640
    },
    {
      "epoch": 4.97774330171573,
      "grad_norm": 0.22415044903755188,
      "learning_rate": 4.4806705191647505e-08,
      "loss": 0.0076,
      "step": 3041660
    },
    {
      "epoch": 4.977776032154383,
      "grad_norm": 0.18655063211917877,
      "learning_rate": 4.4740812978130384e-08,
      "loss": 0.014,
      "step": 3041680
    },
    {
      "epoch": 4.977808762593036,
      "grad_norm": 0.10604675114154816,
      "learning_rate": 4.467492076461325e-08,
      "loss": 0.0073,
      "step": 3041700
    },
    {
      "epoch": 4.97784149303169,
      "grad_norm": 0.1383410543203354,
      "learning_rate": 4.460902855109612e-08,
      "loss": 0.0075,
      "step": 3041720
    },
    {
      "epoch": 4.977874223470343,
      "grad_norm": 0.8669168949127197,
      "learning_rate": 4.454313633757899e-08,
      "loss": 0.0099,
      "step": 3041740
    },
    {
      "epoch": 4.977906953908996,
      "grad_norm": 0.4002757668495178,
      "learning_rate": 4.447724412406186e-08,
      "loss": 0.0111,
      "step": 3041760
    },
    {
      "epoch": 4.97793968434765,
      "grad_norm": 0.6722601056098938,
      "learning_rate": 4.441135191054474e-08,
      "loss": 0.0074,
      "step": 3041780
    },
    {
      "epoch": 4.9779724147863025,
      "grad_norm": 0.18914709985256195,
      "learning_rate": 4.434545969702761e-08,
      "loss": 0.0089,
      "step": 3041800
    },
    {
      "epoch": 4.978005145224956,
      "grad_norm": 0.16400444507598877,
      "learning_rate": 4.4279567483510475e-08,
      "loss": 0.0066,
      "step": 3041820
    },
    {
      "epoch": 4.97803787566361,
      "grad_norm": 0.31610944867134094,
      "learning_rate": 4.421367526999335e-08,
      "loss": 0.0125,
      "step": 3041840
    },
    {
      "epoch": 4.978070606102263,
      "grad_norm": 0.21429981291294098,
      "learning_rate": 4.414778305647621e-08,
      "loss": 0.0051,
      "step": 3041860
    },
    {
      "epoch": 4.978103336540916,
      "grad_norm": 0.14734956622123718,
      "learning_rate": 4.408189084295909e-08,
      "loss": 0.0099,
      "step": 3041880
    },
    {
      "epoch": 4.97813606697957,
      "grad_norm": 0.18568706512451172,
      "learning_rate": 4.4015998629441963e-08,
      "loss": 0.0105,
      "step": 3041900
    },
    {
      "epoch": 4.978168797418223,
      "grad_norm": 0.6664280295372009,
      "learning_rate": 4.395010641592483e-08,
      "loss": 0.0091,
      "step": 3041920
    },
    {
      "epoch": 4.978201527856877,
      "grad_norm": 0.13730834424495697,
      "learning_rate": 4.388421420240771e-08,
      "loss": 0.0064,
      "step": 3041940
    },
    {
      "epoch": 4.9782342582955295,
      "grad_norm": 0.6916875243186951,
      "learning_rate": 4.381832198889058e-08,
      "loss": 0.0084,
      "step": 3041960
    },
    {
      "epoch": 4.978266988734183,
      "grad_norm": 0.2655527591705322,
      "learning_rate": 4.3752429775373445e-08,
      "loss": 0.0071,
      "step": 3041980
    },
    {
      "epoch": 4.978299719172837,
      "grad_norm": 0.1153474673628807,
      "learning_rate": 4.368653756185632e-08,
      "loss": 0.0074,
      "step": 3042000
    },
    {
      "epoch": 4.978332449611489,
      "grad_norm": 0.08902823179960251,
      "learning_rate": 4.3620645348339196e-08,
      "loss": 0.0101,
      "step": 3042020
    },
    {
      "epoch": 4.978365180050143,
      "grad_norm": 0.32349124550819397,
      "learning_rate": 4.355475313482206e-08,
      "loss": 0.0075,
      "step": 3042040
    },
    {
      "epoch": 4.9783979104887965,
      "grad_norm": 0.10143890231847763,
      "learning_rate": 4.3488860921304934e-08,
      "loss": 0.0083,
      "step": 3042060
    },
    {
      "epoch": 4.978430640927449,
      "grad_norm": 0.041942428797483444,
      "learning_rate": 4.34229687077878e-08,
      "loss": 0.0093,
      "step": 3042080
    },
    {
      "epoch": 4.978463371366103,
      "grad_norm": 0.21089425683021545,
      "learning_rate": 4.335707649427068e-08,
      "loss": 0.0066,
      "step": 3042100
    },
    {
      "epoch": 4.978496101804756,
      "grad_norm": 0.45784056186676025,
      "learning_rate": 4.329118428075355e-08,
      "loss": 0.013,
      "step": 3042120
    },
    {
      "epoch": 4.97852883224341,
      "grad_norm": 0.12239033728837967,
      "learning_rate": 4.3225292067236415e-08,
      "loss": 0.0095,
      "step": 3042140
    },
    {
      "epoch": 4.978561562682063,
      "grad_norm": 0.14844796061515808,
      "learning_rate": 4.315939985371929e-08,
      "loss": 0.0069,
      "step": 3042160
    },
    {
      "epoch": 4.978594293120716,
      "grad_norm": 0.3919703960418701,
      "learning_rate": 4.3093507640202166e-08,
      "loss": 0.0091,
      "step": 3042180
    },
    {
      "epoch": 4.97862702355937,
      "grad_norm": 0.0859341099858284,
      "learning_rate": 4.302761542668503e-08,
      "loss": 0.0086,
      "step": 3042200
    },
    {
      "epoch": 4.9786597539980235,
      "grad_norm": 0.122103251516819,
      "learning_rate": 4.2961723213167904e-08,
      "loss": 0.0103,
      "step": 3042220
    },
    {
      "epoch": 4.978692484436676,
      "grad_norm": 0.3470103442668915,
      "learning_rate": 4.2895830999650776e-08,
      "loss": 0.0076,
      "step": 3042240
    },
    {
      "epoch": 4.97872521487533,
      "grad_norm": 0.19283583760261536,
      "learning_rate": 4.282993878613364e-08,
      "loss": 0.0098,
      "step": 3042260
    },
    {
      "epoch": 4.978757945313983,
      "grad_norm": 0.16480375826358795,
      "learning_rate": 4.276404657261652e-08,
      "loss": 0.0078,
      "step": 3042280
    },
    {
      "epoch": 4.978790675752636,
      "grad_norm": 0.1694180816411972,
      "learning_rate": 4.269815435909939e-08,
      "loss": 0.0082,
      "step": 3042300
    },
    {
      "epoch": 4.97882340619129,
      "grad_norm": 0.0851340964436531,
      "learning_rate": 4.263226214558226e-08,
      "loss": 0.007,
      "step": 3042320
    },
    {
      "epoch": 4.978856136629943,
      "grad_norm": 0.17098602652549744,
      "learning_rate": 4.2566369932065136e-08,
      "loss": 0.0097,
      "step": 3042340
    },
    {
      "epoch": 4.978888867068596,
      "grad_norm": 0.6515308618545532,
      "learning_rate": 4.2500477718548e-08,
      "loss": 0.0072,
      "step": 3042360
    },
    {
      "epoch": 4.97892159750725,
      "grad_norm": 0.22586767375469208,
      "learning_rate": 4.2434585505030874e-08,
      "loss": 0.0073,
      "step": 3042380
    },
    {
      "epoch": 4.978954327945903,
      "grad_norm": 0.19819070398807526,
      "learning_rate": 4.2368693291513746e-08,
      "loss": 0.0137,
      "step": 3042400
    },
    {
      "epoch": 4.978987058384557,
      "grad_norm": 0.29363521933555603,
      "learning_rate": 4.230280107799661e-08,
      "loss": 0.0161,
      "step": 3042420
    },
    {
      "epoch": 4.9790197888232095,
      "grad_norm": 0.17253725230693817,
      "learning_rate": 4.223690886447949e-08,
      "loss": 0.0087,
      "step": 3042440
    },
    {
      "epoch": 4.979052519261863,
      "grad_norm": 0.19543573260307312,
      "learning_rate": 4.217101665096236e-08,
      "loss": 0.0081,
      "step": 3042460
    },
    {
      "epoch": 4.979085249700517,
      "grad_norm": 0.22498907148838043,
      "learning_rate": 4.210512443744523e-08,
      "loss": 0.0069,
      "step": 3042480
    },
    {
      "epoch": 4.97911798013917,
      "grad_norm": 0.4996851980686188,
      "learning_rate": 4.20392322239281e-08,
      "loss": 0.009,
      "step": 3042500
    },
    {
      "epoch": 4.979150710577823,
      "grad_norm": 0.06217855215072632,
      "learning_rate": 4.197334001041098e-08,
      "loss": 0.0068,
      "step": 3042520
    },
    {
      "epoch": 4.979183441016477,
      "grad_norm": 0.2797577381134033,
      "learning_rate": 4.1907447796893844e-08,
      "loss": 0.0079,
      "step": 3042540
    },
    {
      "epoch": 4.97921617145513,
      "grad_norm": 0.29238152503967285,
      "learning_rate": 4.1841555583376716e-08,
      "loss": 0.0067,
      "step": 3042560
    },
    {
      "epoch": 4.979248901893783,
      "grad_norm": 0.2042490839958191,
      "learning_rate": 4.177566336985958e-08,
      "loss": 0.0106,
      "step": 3042580
    },
    {
      "epoch": 4.9792816323324365,
      "grad_norm": 0.341161847114563,
      "learning_rate": 4.170977115634246e-08,
      "loss": 0.0087,
      "step": 3042600
    },
    {
      "epoch": 4.97931436277109,
      "grad_norm": 0.2689494490623474,
      "learning_rate": 4.164387894282533e-08,
      "loss": 0.0078,
      "step": 3042620
    },
    {
      "epoch": 4.979347093209743,
      "grad_norm": 0.1262413114309311,
      "learning_rate": 4.15779867293082e-08,
      "loss": 0.01,
      "step": 3042640
    },
    {
      "epoch": 4.979379823648396,
      "grad_norm": 0.14485730230808258,
      "learning_rate": 4.151209451579107e-08,
      "loss": 0.0116,
      "step": 3042660
    },
    {
      "epoch": 4.97941255408705,
      "grad_norm": 0.12841551005840302,
      "learning_rate": 4.144620230227395e-08,
      "loss": 0.0102,
      "step": 3042680
    },
    {
      "epoch": 4.979445284525704,
      "grad_norm": 0.12032908201217651,
      "learning_rate": 4.1380310088756814e-08,
      "loss": 0.0057,
      "step": 3042700
    },
    {
      "epoch": 4.979478014964356,
      "grad_norm": 0.1906641125679016,
      "learning_rate": 4.1314417875239686e-08,
      "loss": 0.0092,
      "step": 3042720
    },
    {
      "epoch": 4.97951074540301,
      "grad_norm": 0.19118429720401764,
      "learning_rate": 4.124852566172256e-08,
      "loss": 0.0089,
      "step": 3042740
    },
    {
      "epoch": 4.9795434758416635,
      "grad_norm": 0.15696462988853455,
      "learning_rate": 4.118263344820543e-08,
      "loss": 0.006,
      "step": 3042760
    },
    {
      "epoch": 4.979576206280317,
      "grad_norm": 0.1673498898744583,
      "learning_rate": 4.11167412346883e-08,
      "loss": 0.0057,
      "step": 3042780
    },
    {
      "epoch": 4.97960893671897,
      "grad_norm": 0.18903464078903198,
      "learning_rate": 4.105084902117117e-08,
      "loss": 0.0132,
      "step": 3042800
    },
    {
      "epoch": 4.979641667157623,
      "grad_norm": 0.15102863311767578,
      "learning_rate": 4.098495680765404e-08,
      "loss": 0.0059,
      "step": 3042820
    },
    {
      "epoch": 4.979674397596277,
      "grad_norm": 0.3146485388278961,
      "learning_rate": 4.091906459413692e-08,
      "loss": 0.0098,
      "step": 3042840
    },
    {
      "epoch": 4.97970712803493,
      "grad_norm": 0.125590518116951,
      "learning_rate": 4.0853172380619784e-08,
      "loss": 0.0101,
      "step": 3042860
    },
    {
      "epoch": 4.979739858473583,
      "grad_norm": 0.0792890265583992,
      "learning_rate": 4.0787280167102656e-08,
      "loss": 0.0082,
      "step": 3042880
    },
    {
      "epoch": 4.979772588912237,
      "grad_norm": 0.2250816822052002,
      "learning_rate": 4.072138795358553e-08,
      "loss": 0.0105,
      "step": 3042900
    },
    {
      "epoch": 4.9798053193508895,
      "grad_norm": 0.23493440449237823,
      "learning_rate": 4.0655495740068394e-08,
      "loss": 0.0132,
      "step": 3042920
    },
    {
      "epoch": 4.979838049789543,
      "grad_norm": 0.2773035764694214,
      "learning_rate": 4.058960352655127e-08,
      "loss": 0.0061,
      "step": 3042940
    },
    {
      "epoch": 4.979870780228197,
      "grad_norm": 0.18542946875095367,
      "learning_rate": 4.0523711313034145e-08,
      "loss": 0.0082,
      "step": 3042960
    },
    {
      "epoch": 4.97990351066685,
      "grad_norm": 0.3423975110054016,
      "learning_rate": 4.045781909951701e-08,
      "loss": 0.0091,
      "step": 3042980
    },
    {
      "epoch": 4.979936241105503,
      "grad_norm": 0.3944704234600067,
      "learning_rate": 4.039192688599989e-08,
      "loss": 0.0115,
      "step": 3043000
    },
    {
      "epoch": 4.979968971544157,
      "grad_norm": 0.1768326610326767,
      "learning_rate": 4.0326034672482754e-08,
      "loss": 0.0052,
      "step": 3043020
    },
    {
      "epoch": 4.98000170198281,
      "grad_norm": 0.5841648578643799,
      "learning_rate": 4.0260142458965626e-08,
      "loss": 0.0089,
      "step": 3043040
    },
    {
      "epoch": 4.980034432421463,
      "grad_norm": 0.17411018908023834,
      "learning_rate": 4.01942502454485e-08,
      "loss": 0.0098,
      "step": 3043060
    },
    {
      "epoch": 4.9800671628601165,
      "grad_norm": 0.27035006880760193,
      "learning_rate": 4.0128358031931364e-08,
      "loss": 0.0088,
      "step": 3043080
    },
    {
      "epoch": 4.98009989329877,
      "grad_norm": 0.16440047323703766,
      "learning_rate": 4.006246581841424e-08,
      "loss": 0.0082,
      "step": 3043100
    },
    {
      "epoch": 4.980132623737424,
      "grad_norm": 0.22652028501033783,
      "learning_rate": 3.9996573604897115e-08,
      "loss": 0.0048,
      "step": 3043120
    },
    {
      "epoch": 4.980165354176076,
      "grad_norm": 0.3842911422252655,
      "learning_rate": 3.993068139137998e-08,
      "loss": 0.0142,
      "step": 3043140
    },
    {
      "epoch": 4.98019808461473,
      "grad_norm": 0.26427996158599854,
      "learning_rate": 3.986478917786285e-08,
      "loss": 0.0067,
      "step": 3043160
    },
    {
      "epoch": 4.980230815053384,
      "grad_norm": 0.09996619820594788,
      "learning_rate": 3.979889696434573e-08,
      "loss": 0.013,
      "step": 3043180
    },
    {
      "epoch": 4.980263545492036,
      "grad_norm": 0.057041604071855545,
      "learning_rate": 3.9733004750828597e-08,
      "loss": 0.0147,
      "step": 3043200
    },
    {
      "epoch": 4.98029627593069,
      "grad_norm": 0.1515396386384964,
      "learning_rate": 3.966711253731147e-08,
      "loss": 0.0083,
      "step": 3043220
    },
    {
      "epoch": 4.9803290063693435,
      "grad_norm": 0.1539645791053772,
      "learning_rate": 3.960122032379435e-08,
      "loss": 0.0074,
      "step": 3043240
    },
    {
      "epoch": 4.980361736807996,
      "grad_norm": 0.11602749675512314,
      "learning_rate": 3.953532811027721e-08,
      "loss": 0.0066,
      "step": 3043260
    },
    {
      "epoch": 4.98039446724665,
      "grad_norm": 0.1531141847372055,
      "learning_rate": 3.9469435896760085e-08,
      "loss": 0.0065,
      "step": 3043280
    },
    {
      "epoch": 4.980427197685303,
      "grad_norm": 0.16119422018527985,
      "learning_rate": 3.940354368324295e-08,
      "loss": 0.0067,
      "step": 3043300
    },
    {
      "epoch": 4.980459928123957,
      "grad_norm": 0.4065711796283722,
      "learning_rate": 3.933765146972582e-08,
      "loss": 0.0071,
      "step": 3043320
    },
    {
      "epoch": 4.98049265856261,
      "grad_norm": 0.1874316781759262,
      "learning_rate": 3.92717592562087e-08,
      "loss": 0.0133,
      "step": 3043340
    },
    {
      "epoch": 4.980525389001263,
      "grad_norm": 0.10278207808732986,
      "learning_rate": 3.920586704269157e-08,
      "loss": 0.0075,
      "step": 3043360
    },
    {
      "epoch": 4.980558119439917,
      "grad_norm": 0.15253080427646637,
      "learning_rate": 3.913997482917444e-08,
      "loss": 0.0133,
      "step": 3043380
    },
    {
      "epoch": 4.9805908498785705,
      "grad_norm": 0.3052820861339569,
      "learning_rate": 3.907408261565732e-08,
      "loss": 0.0095,
      "step": 3043400
    },
    {
      "epoch": 4.980623580317223,
      "grad_norm": 0.15057727694511414,
      "learning_rate": 3.900819040214018e-08,
      "loss": 0.0101,
      "step": 3043420
    },
    {
      "epoch": 4.980656310755877,
      "grad_norm": 0.3375340402126312,
      "learning_rate": 3.8942298188623055e-08,
      "loss": 0.0089,
      "step": 3043440
    },
    {
      "epoch": 4.98068904119453,
      "grad_norm": 0.11950517445802689,
      "learning_rate": 3.887640597510593e-08,
      "loss": 0.0053,
      "step": 3043460
    },
    {
      "epoch": 4.980721771633183,
      "grad_norm": 0.1896970272064209,
      "learning_rate": 3.881051376158879e-08,
      "loss": 0.0092,
      "step": 3043480
    },
    {
      "epoch": 4.980754502071837,
      "grad_norm": 0.2817823886871338,
      "learning_rate": 3.874462154807167e-08,
      "loss": 0.0127,
      "step": 3043500
    },
    {
      "epoch": 4.98078723251049,
      "grad_norm": 0.14946354925632477,
      "learning_rate": 3.867872933455454e-08,
      "loss": 0.0099,
      "step": 3043520
    },
    {
      "epoch": 4.980819962949143,
      "grad_norm": 0.1464676856994629,
      "learning_rate": 3.861283712103741e-08,
      "loss": 0.007,
      "step": 3043540
    },
    {
      "epoch": 4.980852693387797,
      "grad_norm": 0.11331632733345032,
      "learning_rate": 3.854694490752028e-08,
      "loss": 0.0056,
      "step": 3043560
    },
    {
      "epoch": 4.98088542382645,
      "grad_norm": 0.1687680184841156,
      "learning_rate": 3.848105269400315e-08,
      "loss": 0.0057,
      "step": 3043580
    },
    {
      "epoch": 4.980918154265104,
      "grad_norm": 0.15542976558208466,
      "learning_rate": 3.8415160480486025e-08,
      "loss": 0.0085,
      "step": 3043600
    },
    {
      "epoch": 4.9809508847037565,
      "grad_norm": 0.3882175385951996,
      "learning_rate": 3.83492682669689e-08,
      "loss": 0.0127,
      "step": 3043620
    },
    {
      "epoch": 4.98098361514241,
      "grad_norm": 0.12010905891656876,
      "learning_rate": 3.828337605345177e-08,
      "loss": 0.0083,
      "step": 3043640
    },
    {
      "epoch": 4.981016345581064,
      "grad_norm": 0.46410948038101196,
      "learning_rate": 3.821748383993464e-08,
      "loss": 0.0107,
      "step": 3043660
    },
    {
      "epoch": 4.981049076019717,
      "grad_norm": 0.25136151909828186,
      "learning_rate": 3.815159162641751e-08,
      "loss": 0.0083,
      "step": 3043680
    },
    {
      "epoch": 4.98108180645837,
      "grad_norm": 0.19325830042362213,
      "learning_rate": 3.808569941290038e-08,
      "loss": 0.009,
      "step": 3043700
    },
    {
      "epoch": 4.9811145368970235,
      "grad_norm": 0.1951567530632019,
      "learning_rate": 3.801980719938325e-08,
      "loss": 0.0113,
      "step": 3043720
    },
    {
      "epoch": 4.981147267335677,
      "grad_norm": 0.33044493198394775,
      "learning_rate": 3.7953914985866123e-08,
      "loss": 0.0051,
      "step": 3043740
    },
    {
      "epoch": 4.98117999777433,
      "grad_norm": 0.4867604970932007,
      "learning_rate": 3.7888022772348995e-08,
      "loss": 0.007,
      "step": 3043760
    },
    {
      "epoch": 4.981212728212983,
      "grad_norm": 0.3042554259300232,
      "learning_rate": 3.782213055883186e-08,
      "loss": 0.0097,
      "step": 3043780
    },
    {
      "epoch": 4.981245458651637,
      "grad_norm": 0.2597859501838684,
      "learning_rate": 3.775623834531474e-08,
      "loss": 0.0062,
      "step": 3043800
    },
    {
      "epoch": 4.98127818909029,
      "grad_norm": 0.27988338470458984,
      "learning_rate": 3.769034613179761e-08,
      "loss": 0.0073,
      "step": 3043820
    },
    {
      "epoch": 4.981310919528943,
      "grad_norm": 0.3890140652656555,
      "learning_rate": 3.762445391828048e-08,
      "loss": 0.0093,
      "step": 3043840
    },
    {
      "epoch": 4.981343649967597,
      "grad_norm": 0.17015330493450165,
      "learning_rate": 3.7558561704763356e-08,
      "loss": 0.0073,
      "step": 3043860
    },
    {
      "epoch": 4.9813763804062505,
      "grad_norm": 0.25173601508140564,
      "learning_rate": 3.749266949124622e-08,
      "loss": 0.007,
      "step": 3043880
    },
    {
      "epoch": 4.981409110844903,
      "grad_norm": 0.345535546541214,
      "learning_rate": 3.7426777277729093e-08,
      "loss": 0.0101,
      "step": 3043900
    },
    {
      "epoch": 4.981441841283557,
      "grad_norm": 0.25582242012023926,
      "learning_rate": 3.7360885064211966e-08,
      "loss": 0.0132,
      "step": 3043920
    },
    {
      "epoch": 4.98147457172221,
      "grad_norm": 0.06460364907979965,
      "learning_rate": 3.729499285069484e-08,
      "loss": 0.0069,
      "step": 3043940
    },
    {
      "epoch": 4.981507302160864,
      "grad_norm": 0.10708117485046387,
      "learning_rate": 3.722910063717771e-08,
      "loss": 0.0087,
      "step": 3043960
    },
    {
      "epoch": 4.981540032599517,
      "grad_norm": 0.22217372059822083,
      "learning_rate": 3.7163208423660575e-08,
      "loss": 0.0075,
      "step": 3043980
    },
    {
      "epoch": 4.98157276303817,
      "grad_norm": 0.5703679323196411,
      "learning_rate": 3.7097316210143454e-08,
      "loss": 0.0069,
      "step": 3044000
    },
    {
      "epoch": 4.981605493476824,
      "grad_norm": 0.23090860247612,
      "learning_rate": 3.703142399662632e-08,
      "loss": 0.0103,
      "step": 3044020
    },
    {
      "epoch": 4.981638223915477,
      "grad_norm": 0.5166571736335754,
      "learning_rate": 3.696553178310919e-08,
      "loss": 0.0075,
      "step": 3044040
    },
    {
      "epoch": 4.98167095435413,
      "grad_norm": 0.16250674426555634,
      "learning_rate": 3.6899639569592064e-08,
      "loss": 0.0075,
      "step": 3044060
    },
    {
      "epoch": 4.981703684792784,
      "grad_norm": 0.14161482453346252,
      "learning_rate": 3.6833747356074936e-08,
      "loss": 0.0105,
      "step": 3044080
    },
    {
      "epoch": 4.9817364152314365,
      "grad_norm": 0.08325452357530594,
      "learning_rate": 3.676785514255781e-08,
      "loss": 0.0071,
      "step": 3044100
    },
    {
      "epoch": 4.98176914567009,
      "grad_norm": 0.29468780755996704,
      "learning_rate": 3.670196292904068e-08,
      "loss": 0.0051,
      "step": 3044120
    },
    {
      "epoch": 4.981801876108744,
      "grad_norm": 0.15417161583900452,
      "learning_rate": 3.6636070715523545e-08,
      "loss": 0.005,
      "step": 3044140
    },
    {
      "epoch": 4.981834606547397,
      "grad_norm": 0.11596237868070602,
      "learning_rate": 3.6570178502006424e-08,
      "loss": 0.0079,
      "step": 3044160
    },
    {
      "epoch": 4.98186733698605,
      "grad_norm": 0.13388441503047943,
      "learning_rate": 3.650428628848929e-08,
      "loss": 0.0056,
      "step": 3044180
    },
    {
      "epoch": 4.981900067424704,
      "grad_norm": 0.09707681089639664,
      "learning_rate": 3.643839407497216e-08,
      "loss": 0.0076,
      "step": 3044200
    },
    {
      "epoch": 4.981932797863357,
      "grad_norm": 0.0902780145406723,
      "learning_rate": 3.6372501861455034e-08,
      "loss": 0.0134,
      "step": 3044220
    },
    {
      "epoch": 4.981965528302011,
      "grad_norm": 0.13426008820533752,
      "learning_rate": 3.6306609647937906e-08,
      "loss": 0.0124,
      "step": 3044240
    },
    {
      "epoch": 4.9819982587406635,
      "grad_norm": 0.29123562574386597,
      "learning_rate": 3.624071743442078e-08,
      "loss": 0.0087,
      "step": 3044260
    },
    {
      "epoch": 4.982030989179317,
      "grad_norm": 0.40690499544143677,
      "learning_rate": 3.617482522090365e-08,
      "loss": 0.0103,
      "step": 3044280
    },
    {
      "epoch": 4.982063719617971,
      "grad_norm": 0.15141968429088593,
      "learning_rate": 3.610893300738652e-08,
      "loss": 0.0088,
      "step": 3044300
    },
    {
      "epoch": 4.982096450056623,
      "grad_norm": 0.05521561950445175,
      "learning_rate": 3.6043040793869394e-08,
      "loss": 0.0091,
      "step": 3044320
    },
    {
      "epoch": 4.982129180495277,
      "grad_norm": 0.23973041772842407,
      "learning_rate": 3.597714858035226e-08,
      "loss": 0.0072,
      "step": 3044340
    },
    {
      "epoch": 4.982161910933931,
      "grad_norm": 0.15418897569179535,
      "learning_rate": 3.591125636683513e-08,
      "loss": 0.0101,
      "step": 3044360
    },
    {
      "epoch": 4.982194641372583,
      "grad_norm": 0.21346986293792725,
      "learning_rate": 3.5845364153318004e-08,
      "loss": 0.0086,
      "step": 3044380
    },
    {
      "epoch": 4.982227371811237,
      "grad_norm": 0.21583221852779388,
      "learning_rate": 3.5779471939800876e-08,
      "loss": 0.0067,
      "step": 3044400
    },
    {
      "epoch": 4.9822601022498905,
      "grad_norm": 0.5053102970123291,
      "learning_rate": 3.571357972628375e-08,
      "loss": 0.0082,
      "step": 3044420
    },
    {
      "epoch": 4.982292832688544,
      "grad_norm": 0.19418896734714508,
      "learning_rate": 3.564768751276662e-08,
      "loss": 0.0083,
      "step": 3044440
    },
    {
      "epoch": 4.982325563127197,
      "grad_norm": 0.6651666760444641,
      "learning_rate": 3.558179529924949e-08,
      "loss": 0.01,
      "step": 3044460
    },
    {
      "epoch": 4.98235829356585,
      "grad_norm": 0.12944696843624115,
      "learning_rate": 3.5515903085732364e-08,
      "loss": 0.0093,
      "step": 3044480
    },
    {
      "epoch": 4.982391024004504,
      "grad_norm": 0.19287358224391937,
      "learning_rate": 3.545001087221523e-08,
      "loss": 0.0114,
      "step": 3044500
    },
    {
      "epoch": 4.982423754443157,
      "grad_norm": 0.0901363417506218,
      "learning_rate": 3.538411865869811e-08,
      "loss": 0.0089,
      "step": 3044520
    },
    {
      "epoch": 4.98245648488181,
      "grad_norm": 0.19336923956871033,
      "learning_rate": 3.5318226445180974e-08,
      "loss": 0.0082,
      "step": 3044540
    },
    {
      "epoch": 4.982489215320464,
      "grad_norm": 0.3952568769454956,
      "learning_rate": 3.5252334231663846e-08,
      "loss": 0.0075,
      "step": 3044560
    },
    {
      "epoch": 4.982521945759117,
      "grad_norm": 0.13051077723503113,
      "learning_rate": 3.518644201814672e-08,
      "loss": 0.0052,
      "step": 3044580
    },
    {
      "epoch": 4.98255467619777,
      "grad_norm": 0.24474643170833588,
      "learning_rate": 3.512054980462959e-08,
      "loss": 0.0111,
      "step": 3044600
    },
    {
      "epoch": 4.982587406636424,
      "grad_norm": 0.31786635518074036,
      "learning_rate": 3.505465759111246e-08,
      "loss": 0.0063,
      "step": 3044620
    },
    {
      "epoch": 4.982620137075077,
      "grad_norm": 0.11472940444946289,
      "learning_rate": 3.498876537759533e-08,
      "loss": 0.0101,
      "step": 3044640
    },
    {
      "epoch": 4.98265286751373,
      "grad_norm": 0.33388084173202515,
      "learning_rate": 3.4922873164078207e-08,
      "loss": 0.0091,
      "step": 3044660
    },
    {
      "epoch": 4.982685597952384,
      "grad_norm": 0.27075353264808655,
      "learning_rate": 3.485698095056108e-08,
      "loss": 0.0117,
      "step": 3044680
    },
    {
      "epoch": 4.982718328391037,
      "grad_norm": 0.145011767745018,
      "learning_rate": 3.4791088737043944e-08,
      "loss": 0.0067,
      "step": 3044700
    },
    {
      "epoch": 4.98275105882969,
      "grad_norm": 0.057411305606365204,
      "learning_rate": 3.4725196523526816e-08,
      "loss": 0.0083,
      "step": 3044720
    },
    {
      "epoch": 4.9827837892683435,
      "grad_norm": 0.24935156106948853,
      "learning_rate": 3.465930431000969e-08,
      "loss": 0.007,
      "step": 3044740
    },
    {
      "epoch": 4.982816519706997,
      "grad_norm": 0.061907004565000534,
      "learning_rate": 3.459341209649256e-08,
      "loss": 0.0078,
      "step": 3044760
    },
    {
      "epoch": 4.982849250145651,
      "grad_norm": 0.19264818727970123,
      "learning_rate": 3.452751988297543e-08,
      "loss": 0.0089,
      "step": 3044780
    },
    {
      "epoch": 4.982881980584303,
      "grad_norm": 0.3271157145500183,
      "learning_rate": 3.4461627669458305e-08,
      "loss": 0.0159,
      "step": 3044800
    },
    {
      "epoch": 4.982914711022957,
      "grad_norm": 0.04605651646852493,
      "learning_rate": 3.439573545594118e-08,
      "loss": 0.0079,
      "step": 3044820
    },
    {
      "epoch": 4.982947441461611,
      "grad_norm": 0.6190038919448853,
      "learning_rate": 3.432984324242404e-08,
      "loss": 0.0067,
      "step": 3044840
    },
    {
      "epoch": 4.982980171900264,
      "grad_norm": 0.4691438376903534,
      "learning_rate": 3.4263951028906914e-08,
      "loss": 0.0061,
      "step": 3044860
    },
    {
      "epoch": 4.983012902338917,
      "grad_norm": 0.22297392785549164,
      "learning_rate": 3.4198058815389786e-08,
      "loss": 0.0073,
      "step": 3044880
    },
    {
      "epoch": 4.9830456327775705,
      "grad_norm": 0.1036643236875534,
      "learning_rate": 3.413216660187266e-08,
      "loss": 0.0089,
      "step": 3044900
    },
    {
      "epoch": 4.983078363216224,
      "grad_norm": 0.30474019050598145,
      "learning_rate": 3.406627438835553e-08,
      "loss": 0.0051,
      "step": 3044920
    },
    {
      "epoch": 4.983111093654877,
      "grad_norm": 0.2600606381893158,
      "learning_rate": 3.40003821748384e-08,
      "loss": 0.0058,
      "step": 3044940
    },
    {
      "epoch": 4.98314382409353,
      "grad_norm": 0.6300320029258728,
      "learning_rate": 3.3934489961321275e-08,
      "loss": 0.01,
      "step": 3044960
    },
    {
      "epoch": 4.983176554532184,
      "grad_norm": 0.3043811321258545,
      "learning_rate": 3.386859774780415e-08,
      "loss": 0.0066,
      "step": 3044980
    },
    {
      "epoch": 4.983209284970837,
      "grad_norm": 0.2944820523262024,
      "learning_rate": 3.380270553428701e-08,
      "loss": 0.0118,
      "step": 3045000
    },
    {
      "epoch": 4.98324201540949,
      "grad_norm": 0.32613736391067505,
      "learning_rate": 3.373681332076989e-08,
      "loss": 0.0089,
      "step": 3045020
    },
    {
      "epoch": 4.983274745848144,
      "grad_norm": 0.12369238585233688,
      "learning_rate": 3.3670921107252757e-08,
      "loss": 0.0099,
      "step": 3045040
    },
    {
      "epoch": 4.9833074762867975,
      "grad_norm": 0.2029752880334854,
      "learning_rate": 3.360502889373563e-08,
      "loss": 0.0069,
      "step": 3045060
    },
    {
      "epoch": 4.98334020672545,
      "grad_norm": 0.25049954652786255,
      "learning_rate": 3.35391366802185e-08,
      "loss": 0.0084,
      "step": 3045080
    },
    {
      "epoch": 4.983372937164104,
      "grad_norm": 0.08175645023584366,
      "learning_rate": 3.347324446670137e-08,
      "loss": 0.0056,
      "step": 3045100
    },
    {
      "epoch": 4.983405667602757,
      "grad_norm": 0.31045612692832947,
      "learning_rate": 3.3407352253184245e-08,
      "loss": 0.011,
      "step": 3045120
    },
    {
      "epoch": 4.983438398041411,
      "grad_norm": 0.13728225231170654,
      "learning_rate": 3.334146003966712e-08,
      "loss": 0.0097,
      "step": 3045140
    },
    {
      "epoch": 4.983471128480064,
      "grad_norm": 0.18972600996494293,
      "learning_rate": 3.327556782614999e-08,
      "loss": 0.005,
      "step": 3045160
    },
    {
      "epoch": 4.983503858918717,
      "grad_norm": 0.31173306703567505,
      "learning_rate": 3.320967561263286e-08,
      "loss": 0.0079,
      "step": 3045180
    },
    {
      "epoch": 4.983536589357371,
      "grad_norm": 0.07411058247089386,
      "learning_rate": 3.3143783399115727e-08,
      "loss": 0.0088,
      "step": 3045200
    },
    {
      "epoch": 4.983569319796024,
      "grad_norm": 0.23308561742305756,
      "learning_rate": 3.30778911855986e-08,
      "loss": 0.0131,
      "step": 3045220
    },
    {
      "epoch": 4.983602050234677,
      "grad_norm": 0.08062155544757843,
      "learning_rate": 3.301199897208147e-08,
      "loss": 0.0082,
      "step": 3045240
    },
    {
      "epoch": 4.983634780673331,
      "grad_norm": 0.26064184308052063,
      "learning_rate": 3.294610675856434e-08,
      "loss": 0.0081,
      "step": 3045260
    },
    {
      "epoch": 4.9836675111119835,
      "grad_norm": 0.21250948309898376,
      "learning_rate": 3.2880214545047215e-08,
      "loss": 0.0055,
      "step": 3045280
    },
    {
      "epoch": 4.983700241550637,
      "grad_norm": 0.3739489018917084,
      "learning_rate": 3.281432233153008e-08,
      "loss": 0.0094,
      "step": 3045300
    },
    {
      "epoch": 4.983732971989291,
      "grad_norm": 0.16169093549251556,
      "learning_rate": 3.274843011801296e-08,
      "loss": 0.0078,
      "step": 3045320
    },
    {
      "epoch": 4.983765702427944,
      "grad_norm": 0.1677159070968628,
      "learning_rate": 3.268253790449583e-08,
      "loss": 0.007,
      "step": 3045340
    },
    {
      "epoch": 4.983798432866597,
      "grad_norm": 0.3300597369670868,
      "learning_rate": 3.26166456909787e-08,
      "loss": 0.0117,
      "step": 3045360
    },
    {
      "epoch": 4.9838311633052506,
      "grad_norm": 0.3832571506500244,
      "learning_rate": 3.2550753477461576e-08,
      "loss": 0.008,
      "step": 3045380
    },
    {
      "epoch": 4.983863893743904,
      "grad_norm": 1.029617428779602,
      "learning_rate": 3.248486126394444e-08,
      "loss": 0.0113,
      "step": 3045400
    },
    {
      "epoch": 4.983896624182558,
      "grad_norm": 0.19446410238742828,
      "learning_rate": 3.241896905042731e-08,
      "loss": 0.0089,
      "step": 3045420
    },
    {
      "epoch": 4.9839293546212105,
      "grad_norm": 0.25395405292510986,
      "learning_rate": 3.2353076836910185e-08,
      "loss": 0.0115,
      "step": 3045440
    },
    {
      "epoch": 4.983962085059864,
      "grad_norm": 0.41767874360084534,
      "learning_rate": 3.228718462339306e-08,
      "loss": 0.0063,
      "step": 3045460
    },
    {
      "epoch": 4.983994815498518,
      "grad_norm": 0.14774572849273682,
      "learning_rate": 3.222129240987593e-08,
      "loss": 0.0069,
      "step": 3045480
    },
    {
      "epoch": 4.98402754593717,
      "grad_norm": 0.6253758072853088,
      "learning_rate": 3.2155400196358795e-08,
      "loss": 0.0145,
      "step": 3045500
    },
    {
      "epoch": 4.984060276375824,
      "grad_norm": 0.3058575689792633,
      "learning_rate": 3.208950798284167e-08,
      "loss": 0.0083,
      "step": 3045520
    },
    {
      "epoch": 4.9840930068144775,
      "grad_norm": 0.3765605092048645,
      "learning_rate": 3.2023615769324546e-08,
      "loss": 0.0065,
      "step": 3045540
    },
    {
      "epoch": 4.98412573725313,
      "grad_norm": 0.12935785949230194,
      "learning_rate": 3.195772355580741e-08,
      "loss": 0.0077,
      "step": 3045560
    },
    {
      "epoch": 4.984158467691784,
      "grad_norm": 0.23172034323215485,
      "learning_rate": 3.189183134229028e-08,
      "loss": 0.0125,
      "step": 3045580
    },
    {
      "epoch": 4.984191198130437,
      "grad_norm": 0.12256480753421783,
      "learning_rate": 3.1825939128773155e-08,
      "loss": 0.0089,
      "step": 3045600
    },
    {
      "epoch": 4.984223928569091,
      "grad_norm": 0.1272526979446411,
      "learning_rate": 3.176004691525603e-08,
      "loss": 0.0062,
      "step": 3045620
    },
    {
      "epoch": 4.984256659007744,
      "grad_norm": 0.34739768505096436,
      "learning_rate": 3.16941547017389e-08,
      "loss": 0.0098,
      "step": 3045640
    },
    {
      "epoch": 4.984289389446397,
      "grad_norm": 0.2360409051179886,
      "learning_rate": 3.1628262488221765e-08,
      "loss": 0.0077,
      "step": 3045660
    },
    {
      "epoch": 4.984322119885051,
      "grad_norm": 0.10758192092180252,
      "learning_rate": 3.1562370274704644e-08,
      "loss": 0.0072,
      "step": 3045680
    },
    {
      "epoch": 4.9843548503237045,
      "grad_norm": 0.3140852153301239,
      "learning_rate": 3.149647806118751e-08,
      "loss": 0.0064,
      "step": 3045700
    },
    {
      "epoch": 4.984387580762357,
      "grad_norm": 0.3436545729637146,
      "learning_rate": 3.143058584767038e-08,
      "loss": 0.0112,
      "step": 3045720
    },
    {
      "epoch": 4.984420311201011,
      "grad_norm": 0.2761402428150177,
      "learning_rate": 3.1364693634153253e-08,
      "loss": 0.0091,
      "step": 3045740
    },
    {
      "epoch": 4.984453041639664,
      "grad_norm": 0.08444192260503769,
      "learning_rate": 3.1298801420636125e-08,
      "loss": 0.0114,
      "step": 3045760
    },
    {
      "epoch": 4.984485772078317,
      "grad_norm": 0.10578323900699615,
      "learning_rate": 3.1232909207119e-08,
      "loss": 0.0099,
      "step": 3045780
    },
    {
      "epoch": 4.984518502516971,
      "grad_norm": 0.4329977333545685,
      "learning_rate": 3.116701699360187e-08,
      "loss": 0.0078,
      "step": 3045800
    },
    {
      "epoch": 4.984551232955624,
      "grad_norm": 0.5889375805854797,
      "learning_rate": 3.110112478008474e-08,
      "loss": 0.0136,
      "step": 3045820
    },
    {
      "epoch": 4.984583963394277,
      "grad_norm": 0.27036502957344055,
      "learning_rate": 3.1035232566567614e-08,
      "loss": 0.0076,
      "step": 3045840
    },
    {
      "epoch": 4.984616693832931,
      "grad_norm": 0.33746010065078735,
      "learning_rate": 3.096934035305048e-08,
      "loss": 0.0062,
      "step": 3045860
    },
    {
      "epoch": 4.984649424271584,
      "grad_norm": 0.4106680750846863,
      "learning_rate": 3.090344813953335e-08,
      "loss": 0.0102,
      "step": 3045880
    },
    {
      "epoch": 4.984682154710238,
      "grad_norm": 0.14779134094715118,
      "learning_rate": 3.0837555926016224e-08,
      "loss": 0.0088,
      "step": 3045900
    },
    {
      "epoch": 4.9847148851488905,
      "grad_norm": 0.09602797776460648,
      "learning_rate": 3.0771663712499096e-08,
      "loss": 0.0061,
      "step": 3045920
    },
    {
      "epoch": 4.984747615587544,
      "grad_norm": 0.34857863187789917,
      "learning_rate": 3.070577149898197e-08,
      "loss": 0.0078,
      "step": 3045940
    },
    {
      "epoch": 4.984780346026198,
      "grad_norm": 0.2910025715827942,
      "learning_rate": 3.063987928546484e-08,
      "loss": 0.0071,
      "step": 3045960
    },
    {
      "epoch": 4.984813076464851,
      "grad_norm": 0.2979585826396942,
      "learning_rate": 3.057398707194771e-08,
      "loss": 0.0123,
      "step": 3045980
    },
    {
      "epoch": 4.984845806903504,
      "grad_norm": 0.3396587669849396,
      "learning_rate": 3.0508094858430584e-08,
      "loss": 0.0061,
      "step": 3046000
    },
    {
      "epoch": 4.984878537342158,
      "grad_norm": 0.10871902853250504,
      "learning_rate": 3.044220264491345e-08,
      "loss": 0.0097,
      "step": 3046020
    },
    {
      "epoch": 4.984911267780811,
      "grad_norm": 0.09849989414215088,
      "learning_rate": 3.037631043139633e-08,
      "loss": 0.0048,
      "step": 3046040
    },
    {
      "epoch": 4.984943998219464,
      "grad_norm": 0.19746249914169312,
      "learning_rate": 3.0310418217879194e-08,
      "loss": 0.0117,
      "step": 3046060
    },
    {
      "epoch": 4.9849767286581175,
      "grad_norm": 0.35276320576667786,
      "learning_rate": 3.0244526004362066e-08,
      "loss": 0.0102,
      "step": 3046080
    },
    {
      "epoch": 4.985009459096771,
      "grad_norm": 0.17628246545791626,
      "learning_rate": 3.017863379084494e-08,
      "loss": 0.0079,
      "step": 3046100
    },
    {
      "epoch": 4.985042189535424,
      "grad_norm": 0.178323894739151,
      "learning_rate": 3.011274157732781e-08,
      "loss": 0.006,
      "step": 3046120
    },
    {
      "epoch": 4.985074919974077,
      "grad_norm": 0.30693957209587097,
      "learning_rate": 3.004684936381068e-08,
      "loss": 0.008,
      "step": 3046140
    },
    {
      "epoch": 4.985107650412731,
      "grad_norm": 0.3633768558502197,
      "learning_rate": 2.998095715029355e-08,
      "loss": 0.0094,
      "step": 3046160
    },
    {
      "epoch": 4.985140380851385,
      "grad_norm": 0.22679466009140015,
      "learning_rate": 2.9915064936776426e-08,
      "loss": 0.0078,
      "step": 3046180
    },
    {
      "epoch": 4.985173111290037,
      "grad_norm": 0.14293043315410614,
      "learning_rate": 2.98491727232593e-08,
      "loss": 0.0126,
      "step": 3046200
    },
    {
      "epoch": 4.985205841728691,
      "grad_norm": 0.8119642734527588,
      "learning_rate": 2.9783280509742167e-08,
      "loss": 0.0115,
      "step": 3046220
    },
    {
      "epoch": 4.9852385721673445,
      "grad_norm": 0.07759642601013184,
      "learning_rate": 2.9717388296225036e-08,
      "loss": 0.0061,
      "step": 3046240
    },
    {
      "epoch": 4.985271302605997,
      "grad_norm": 0.5362138152122498,
      "learning_rate": 2.965149608270791e-08,
      "loss": 0.0128,
      "step": 3046260
    },
    {
      "epoch": 4.985304033044651,
      "grad_norm": 0.3304845690727234,
      "learning_rate": 2.958560386919078e-08,
      "loss": 0.0094,
      "step": 3046280
    },
    {
      "epoch": 4.985336763483304,
      "grad_norm": 0.06168528273701668,
      "learning_rate": 2.951971165567365e-08,
      "loss": 0.0074,
      "step": 3046300
    },
    {
      "epoch": 4.985369493921958,
      "grad_norm": 0.3238176703453064,
      "learning_rate": 2.9453819442156524e-08,
      "loss": 0.0109,
      "step": 3046320
    },
    {
      "epoch": 4.985402224360611,
      "grad_norm": 0.3975849151611328,
      "learning_rate": 2.9387927228639396e-08,
      "loss": 0.0064,
      "step": 3046340
    },
    {
      "epoch": 4.985434954799264,
      "grad_norm": 0.12583531439304352,
      "learning_rate": 2.9322035015122265e-08,
      "loss": 0.0124,
      "step": 3046360
    },
    {
      "epoch": 4.985467685237918,
      "grad_norm": 0.6369640827178955,
      "learning_rate": 2.9256142801605134e-08,
      "loss": 0.0071,
      "step": 3046380
    },
    {
      "epoch": 4.9855004156765705,
      "grad_norm": 0.2996685206890106,
      "learning_rate": 2.919025058808801e-08,
      "loss": 0.0093,
      "step": 3046400
    },
    {
      "epoch": 4.985533146115224,
      "grad_norm": 0.1876370906829834,
      "learning_rate": 2.9124358374570878e-08,
      "loss": 0.0081,
      "step": 3046420
    },
    {
      "epoch": 4.985565876553878,
      "grad_norm": 0.23570510745048523,
      "learning_rate": 2.905846616105375e-08,
      "loss": 0.0057,
      "step": 3046440
    },
    {
      "epoch": 4.98559860699253,
      "grad_norm": 0.3452880382537842,
      "learning_rate": 2.899257394753662e-08,
      "loss": 0.0101,
      "step": 3046460
    },
    {
      "epoch": 4.985631337431184,
      "grad_norm": 0.19819247722625732,
      "learning_rate": 2.8926681734019494e-08,
      "loss": 0.0119,
      "step": 3046480
    },
    {
      "epoch": 4.985664067869838,
      "grad_norm": 0.37088242173194885,
      "learning_rate": 2.8860789520502363e-08,
      "loss": 0.0072,
      "step": 3046500
    },
    {
      "epoch": 4.985696798308491,
      "grad_norm": 0.23880164325237274,
      "learning_rate": 2.8794897306985235e-08,
      "loss": 0.0059,
      "step": 3046520
    },
    {
      "epoch": 4.985729528747144,
      "grad_norm": 0.11469337344169617,
      "learning_rate": 2.8729005093468107e-08,
      "loss": 0.0126,
      "step": 3046540
    },
    {
      "epoch": 4.9857622591857975,
      "grad_norm": 0.35581573843955994,
      "learning_rate": 2.866311287995098e-08,
      "loss": 0.0142,
      "step": 3046560
    },
    {
      "epoch": 4.985794989624451,
      "grad_norm": 0.24429793655872345,
      "learning_rate": 2.8597220666433848e-08,
      "loss": 0.0094,
      "step": 3046580
    },
    {
      "epoch": 4.985827720063105,
      "grad_norm": 0.2955690026283264,
      "learning_rate": 2.853132845291672e-08,
      "loss": 0.0067,
      "step": 3046600
    },
    {
      "epoch": 4.985860450501757,
      "grad_norm": 0.10757895559072495,
      "learning_rate": 2.8465436239399592e-08,
      "loss": 0.0089,
      "step": 3046620
    },
    {
      "epoch": 4.985893180940411,
      "grad_norm": 0.2303171008825302,
      "learning_rate": 2.8399544025882465e-08,
      "loss": 0.0067,
      "step": 3046640
    },
    {
      "epoch": 4.985925911379065,
      "grad_norm": 0.18748261034488678,
      "learning_rate": 2.8333651812365333e-08,
      "loss": 0.0087,
      "step": 3046660
    },
    {
      "epoch": 4.985958641817717,
      "grad_norm": 0.627952516078949,
      "learning_rate": 2.826775959884821e-08,
      "loss": 0.0095,
      "step": 3046680
    },
    {
      "epoch": 4.985991372256371,
      "grad_norm": 0.3346107006072998,
      "learning_rate": 2.8201867385331078e-08,
      "loss": 0.0118,
      "step": 3046700
    },
    {
      "epoch": 4.9860241026950245,
      "grad_norm": 0.24177567660808563,
      "learning_rate": 2.813597517181395e-08,
      "loss": 0.0105,
      "step": 3046720
    },
    {
      "epoch": 4.986056833133677,
      "grad_norm": 0.14580009877681732,
      "learning_rate": 2.807008295829682e-08,
      "loss": 0.0145,
      "step": 3046740
    },
    {
      "epoch": 4.986089563572331,
      "grad_norm": 0.0693332627415657,
      "learning_rate": 2.8004190744779694e-08,
      "loss": 0.0095,
      "step": 3046760
    },
    {
      "epoch": 4.986122294010984,
      "grad_norm": 0.16914722323417664,
      "learning_rate": 2.7938298531262563e-08,
      "loss": 0.0099,
      "step": 3046780
    },
    {
      "epoch": 4.986155024449638,
      "grad_norm": 0.06932799518108368,
      "learning_rate": 2.7872406317745435e-08,
      "loss": 0.0075,
      "step": 3046800
    },
    {
      "epoch": 4.986187754888291,
      "grad_norm": 0.21614763140678406,
      "learning_rate": 2.7806514104228304e-08,
      "loss": 0.0115,
      "step": 3046820
    },
    {
      "epoch": 4.986220485326944,
      "grad_norm": 0.14621776342391968,
      "learning_rate": 2.774062189071118e-08,
      "loss": 0.009,
      "step": 3046840
    },
    {
      "epoch": 4.986253215765598,
      "grad_norm": 0.5006498694419861,
      "learning_rate": 2.7674729677194048e-08,
      "loss": 0.0089,
      "step": 3046860
    },
    {
      "epoch": 4.9862859462042515,
      "grad_norm": 0.2479582577943802,
      "learning_rate": 2.760883746367692e-08,
      "loss": 0.0094,
      "step": 3046880
    },
    {
      "epoch": 4.986318676642904,
      "grad_norm": 0.4802417457103729,
      "learning_rate": 2.7542945250159792e-08,
      "loss": 0.0084,
      "step": 3046900
    },
    {
      "epoch": 4.986351407081558,
      "grad_norm": 0.46188169717788696,
      "learning_rate": 2.7477053036642664e-08,
      "loss": 0.0126,
      "step": 3046920
    },
    {
      "epoch": 4.986384137520211,
      "grad_norm": 0.4828215539455414,
      "learning_rate": 2.7411160823125533e-08,
      "loss": 0.0055,
      "step": 3046940
    },
    {
      "epoch": 4.986416867958864,
      "grad_norm": 0.3610619604587555,
      "learning_rate": 2.73452686096084e-08,
      "loss": 0.0068,
      "step": 3046960
    },
    {
      "epoch": 4.986449598397518,
      "grad_norm": 0.20077545940876007,
      "learning_rate": 2.7279376396091277e-08,
      "loss": 0.0095,
      "step": 3046980
    },
    {
      "epoch": 4.986482328836171,
      "grad_norm": 0.3514716327190399,
      "learning_rate": 2.721348418257415e-08,
      "loss": 0.0063,
      "step": 3047000
    },
    {
      "epoch": 4.986515059274824,
      "grad_norm": 0.22314496338367462,
      "learning_rate": 2.7147591969057018e-08,
      "loss": 0.0107,
      "step": 3047020
    },
    {
      "epoch": 4.986547789713478,
      "grad_norm": 0.1432703733444214,
      "learning_rate": 2.7081699755539887e-08,
      "loss": 0.0087,
      "step": 3047040
    },
    {
      "epoch": 4.986580520152131,
      "grad_norm": 0.40424618124961853,
      "learning_rate": 2.7015807542022762e-08,
      "loss": 0.0083,
      "step": 3047060
    },
    {
      "epoch": 4.986613250590785,
      "grad_norm": 0.22927162051200867,
      "learning_rate": 2.6949915328505634e-08,
      "loss": 0.0088,
      "step": 3047080
    },
    {
      "epoch": 4.9866459810294375,
      "grad_norm": 0.1899224817752838,
      "learning_rate": 2.6884023114988503e-08,
      "loss": 0.0081,
      "step": 3047100
    },
    {
      "epoch": 4.986678711468091,
      "grad_norm": 0.1568487286567688,
      "learning_rate": 2.6818130901471378e-08,
      "loss": 0.0054,
      "step": 3047120
    },
    {
      "epoch": 4.986711441906745,
      "grad_norm": 0.17271244525909424,
      "learning_rate": 2.6752238687954247e-08,
      "loss": 0.0081,
      "step": 3047140
    },
    {
      "epoch": 4.986744172345398,
      "grad_norm": 0.4875097870826721,
      "learning_rate": 2.6686346474437116e-08,
      "loss": 0.0086,
      "step": 3047160
    },
    {
      "epoch": 4.986776902784051,
      "grad_norm": 0.20704331994056702,
      "learning_rate": 2.6620454260919988e-08,
      "loss": 0.0104,
      "step": 3047180
    },
    {
      "epoch": 4.9868096332227045,
      "grad_norm": 0.12642309069633484,
      "learning_rate": 2.6554562047402863e-08,
      "loss": 0.0104,
      "step": 3047200
    },
    {
      "epoch": 4.986842363661358,
      "grad_norm": 0.15007194876670837,
      "learning_rate": 2.6488669833885732e-08,
      "loss": 0.0108,
      "step": 3047220
    },
    {
      "epoch": 4.986875094100011,
      "grad_norm": 0.2383260875940323,
      "learning_rate": 2.64227776203686e-08,
      "loss": 0.009,
      "step": 3047240
    },
    {
      "epoch": 4.986907824538664,
      "grad_norm": 0.07664153724908829,
      "learning_rate": 2.6356885406851476e-08,
      "loss": 0.0123,
      "step": 3047260
    },
    {
      "epoch": 4.986940554977318,
      "grad_norm": 0.18220271170139313,
      "learning_rate": 2.6290993193334345e-08,
      "loss": 0.0089,
      "step": 3047280
    },
    {
      "epoch": 4.986973285415971,
      "grad_norm": 0.24988310039043427,
      "learning_rate": 2.6225100979817217e-08,
      "loss": 0.0086,
      "step": 3047300
    },
    {
      "epoch": 4.987006015854624,
      "grad_norm": 0.29484692215919495,
      "learning_rate": 2.6159208766300086e-08,
      "loss": 0.0101,
      "step": 3047320
    },
    {
      "epoch": 4.987038746293278,
      "grad_norm": 0.12124003469944,
      "learning_rate": 2.609331655278296e-08,
      "loss": 0.0082,
      "step": 3047340
    },
    {
      "epoch": 4.9870714767319315,
      "grad_norm": 0.27664947509765625,
      "learning_rate": 2.602742433926583e-08,
      "loss": 0.0104,
      "step": 3047360
    },
    {
      "epoch": 4.987104207170584,
      "grad_norm": 0.2805964946746826,
      "learning_rate": 2.5961532125748702e-08,
      "loss": 0.0081,
      "step": 3047380
    },
    {
      "epoch": 4.987136937609238,
      "grad_norm": 0.21494990587234497,
      "learning_rate": 2.589563991223157e-08,
      "loss": 0.0111,
      "step": 3047400
    },
    {
      "epoch": 4.987169668047891,
      "grad_norm": 0.36240154504776,
      "learning_rate": 2.5829747698714447e-08,
      "loss": 0.0129,
      "step": 3047420
    },
    {
      "epoch": 4.987202398486545,
      "grad_norm": 0.3523830771446228,
      "learning_rate": 2.5763855485197315e-08,
      "loss": 0.0105,
      "step": 3047440
    },
    {
      "epoch": 4.987235128925198,
      "grad_norm": 0.07496163249015808,
      "learning_rate": 2.5697963271680187e-08,
      "loss": 0.0069,
      "step": 3047460
    },
    {
      "epoch": 4.987267859363851,
      "grad_norm": 0.5551964640617371,
      "learning_rate": 2.563207105816306e-08,
      "loss": 0.0095,
      "step": 3047480
    },
    {
      "epoch": 4.987300589802505,
      "grad_norm": 0.23513181507587433,
      "learning_rate": 2.556617884464593e-08,
      "loss": 0.0104,
      "step": 3047500
    },
    {
      "epoch": 4.987333320241158,
      "grad_norm": 0.27431678771972656,
      "learning_rate": 2.55002866311288e-08,
      "loss": 0.0103,
      "step": 3047520
    },
    {
      "epoch": 4.987366050679811,
      "grad_norm": 0.17759685218334198,
      "learning_rate": 2.5434394417611672e-08,
      "loss": 0.0088,
      "step": 3047540
    },
    {
      "epoch": 4.987398781118465,
      "grad_norm": 0.35480353236198425,
      "learning_rate": 2.5368502204094545e-08,
      "loss": 0.0073,
      "step": 3047560
    },
    {
      "epoch": 4.9874315115571175,
      "grad_norm": 0.11558535695075989,
      "learning_rate": 2.5302609990577417e-08,
      "loss": 0.0072,
      "step": 3047580
    },
    {
      "epoch": 4.987464241995771,
      "grad_norm": 0.21475787460803986,
      "learning_rate": 2.5236717777060285e-08,
      "loss": 0.0116,
      "step": 3047600
    },
    {
      "epoch": 4.987496972434425,
      "grad_norm": 0.09258720278739929,
      "learning_rate": 2.5170825563543158e-08,
      "loss": 0.0073,
      "step": 3047620
    },
    {
      "epoch": 4.987529702873078,
      "grad_norm": 0.11381718516349792,
      "learning_rate": 2.510493335002603e-08,
      "loss": 0.0129,
      "step": 3047640
    },
    {
      "epoch": 4.987562433311731,
      "grad_norm": 0.062132690101861954,
      "learning_rate": 2.5039041136508902e-08,
      "loss": 0.0064,
      "step": 3047660
    },
    {
      "epoch": 4.987595163750385,
      "grad_norm": 0.12244096398353577,
      "learning_rate": 2.497314892299177e-08,
      "loss": 0.0066,
      "step": 3047680
    },
    {
      "epoch": 4.987627894189038,
      "grad_norm": 0.18258516490459442,
      "learning_rate": 2.4907256709474646e-08,
      "loss": 0.0113,
      "step": 3047700
    },
    {
      "epoch": 4.987660624627691,
      "grad_norm": 0.28335079550743103,
      "learning_rate": 2.4841364495957515e-08,
      "loss": 0.0117,
      "step": 3047720
    },
    {
      "epoch": 4.9876933550663445,
      "grad_norm": 0.17975492775440216,
      "learning_rate": 2.4775472282440387e-08,
      "loss": 0.006,
      "step": 3047740
    },
    {
      "epoch": 4.987726085504998,
      "grad_norm": 0.08824651688337326,
      "learning_rate": 2.4709580068923256e-08,
      "loss": 0.0078,
      "step": 3047760
    },
    {
      "epoch": 4.987758815943652,
      "grad_norm": 0.23298271000385284,
      "learning_rate": 2.464368785540613e-08,
      "loss": 0.0089,
      "step": 3047780
    },
    {
      "epoch": 4.987791546382304,
      "grad_norm": 0.2960350811481476,
      "learning_rate": 2.4577795641889e-08,
      "loss": 0.0092,
      "step": 3047800
    },
    {
      "epoch": 4.987824276820958,
      "grad_norm": 0.4773657023906708,
      "learning_rate": 2.451190342837187e-08,
      "loss": 0.0114,
      "step": 3047820
    },
    {
      "epoch": 4.987857007259612,
      "grad_norm": 0.32488682866096497,
      "learning_rate": 2.4446011214854744e-08,
      "loss": 0.0098,
      "step": 3047840
    },
    {
      "epoch": 4.987889737698264,
      "grad_norm": 0.07483410835266113,
      "learning_rate": 2.4380119001337616e-08,
      "loss": 0.009,
      "step": 3047860
    },
    {
      "epoch": 4.987922468136918,
      "grad_norm": 0.20821253955364227,
      "learning_rate": 2.4314226787820485e-08,
      "loss": 0.0066,
      "step": 3047880
    },
    {
      "epoch": 4.9879551985755715,
      "grad_norm": 0.10367777198553085,
      "learning_rate": 2.4248334574303354e-08,
      "loss": 0.0066,
      "step": 3047900
    },
    {
      "epoch": 4.987987929014224,
      "grad_norm": 0.39796897768974304,
      "learning_rate": 2.418244236078623e-08,
      "loss": 0.0087,
      "step": 3047920
    },
    {
      "epoch": 4.988020659452878,
      "grad_norm": 0.3320204019546509,
      "learning_rate": 2.41165501472691e-08,
      "loss": 0.0126,
      "step": 3047940
    },
    {
      "epoch": 4.988053389891531,
      "grad_norm": 0.42263999581336975,
      "learning_rate": 2.405065793375197e-08,
      "loss": 0.0084,
      "step": 3047960
    },
    {
      "epoch": 4.988086120330185,
      "grad_norm": 0.09373805671930313,
      "learning_rate": 2.398476572023484e-08,
      "loss": 0.0161,
      "step": 3047980
    },
    {
      "epoch": 4.988118850768838,
      "grad_norm": 0.4125632345676422,
      "learning_rate": 2.3918873506717714e-08,
      "loss": 0.008,
      "step": 3048000
    },
    {
      "epoch": 4.988151581207491,
      "grad_norm": 0.07820999622344971,
      "learning_rate": 2.3852981293200583e-08,
      "loss": 0.0124,
      "step": 3048020
    },
    {
      "epoch": 4.988184311646145,
      "grad_norm": 0.12141314148902893,
      "learning_rate": 2.3787089079683455e-08,
      "loss": 0.0125,
      "step": 3048040
    },
    {
      "epoch": 4.988217042084798,
      "grad_norm": 0.1632343977689743,
      "learning_rate": 2.372119686616633e-08,
      "loss": 0.007,
      "step": 3048060
    },
    {
      "epoch": 4.988249772523451,
      "grad_norm": 0.3293195068836212,
      "learning_rate": 2.36553046526492e-08,
      "loss": 0.0081,
      "step": 3048080
    },
    {
      "epoch": 4.988282502962105,
      "grad_norm": 0.14862987399101257,
      "learning_rate": 2.3589412439132068e-08,
      "loss": 0.0065,
      "step": 3048100
    },
    {
      "epoch": 4.988315233400758,
      "grad_norm": 0.17294619977474213,
      "learning_rate": 2.352352022561494e-08,
      "loss": 0.0136,
      "step": 3048120
    },
    {
      "epoch": 4.988347963839411,
      "grad_norm": 0.10426007211208344,
      "learning_rate": 2.3457628012097812e-08,
      "loss": 0.0086,
      "step": 3048140
    },
    {
      "epoch": 4.988380694278065,
      "grad_norm": 0.2787163257598877,
      "learning_rate": 2.3391735798580684e-08,
      "loss": 0.0087,
      "step": 3048160
    },
    {
      "epoch": 4.988413424716718,
      "grad_norm": 0.11161000281572342,
      "learning_rate": 2.3325843585063553e-08,
      "loss": 0.0064,
      "step": 3048180
    },
    {
      "epoch": 4.988446155155371,
      "grad_norm": 0.2514868676662445,
      "learning_rate": 2.325995137154643e-08,
      "loss": 0.0079,
      "step": 3048200
    },
    {
      "epoch": 4.9884788855940245,
      "grad_norm": 0.23968113958835602,
      "learning_rate": 2.3194059158029297e-08,
      "loss": 0.0069,
      "step": 3048220
    },
    {
      "epoch": 4.988511616032678,
      "grad_norm": 0.15760955214500427,
      "learning_rate": 2.312816694451217e-08,
      "loss": 0.0111,
      "step": 3048240
    },
    {
      "epoch": 4.988544346471332,
      "grad_norm": 0.2539837956428528,
      "learning_rate": 2.3062274730995038e-08,
      "loss": 0.008,
      "step": 3048260
    },
    {
      "epoch": 4.988577076909984,
      "grad_norm": 0.19417092204093933,
      "learning_rate": 2.2996382517477913e-08,
      "loss": 0.0064,
      "step": 3048280
    },
    {
      "epoch": 4.988609807348638,
      "grad_norm": 0.20205903053283691,
      "learning_rate": 2.2930490303960782e-08,
      "loss": 0.0099,
      "step": 3048300
    },
    {
      "epoch": 4.988642537787292,
      "grad_norm": 0.8112949728965759,
      "learning_rate": 2.2864598090443654e-08,
      "loss": 0.013,
      "step": 3048320
    },
    {
      "epoch": 4.988675268225945,
      "grad_norm": 0.40014955401420593,
      "learning_rate": 2.2798705876926523e-08,
      "loss": 0.0132,
      "step": 3048340
    },
    {
      "epoch": 4.988707998664598,
      "grad_norm": 0.22033922374248505,
      "learning_rate": 2.27328136634094e-08,
      "loss": 0.0075,
      "step": 3048360
    },
    {
      "epoch": 4.9887407291032515,
      "grad_norm": 0.18497028946876526,
      "learning_rate": 2.2666921449892267e-08,
      "loss": 0.0076,
      "step": 3048380
    },
    {
      "epoch": 4.988773459541905,
      "grad_norm": 0.22512663900852203,
      "learning_rate": 2.260102923637514e-08,
      "loss": 0.0098,
      "step": 3048400
    },
    {
      "epoch": 4.988806189980558,
      "grad_norm": 0.1766863763332367,
      "learning_rate": 2.253513702285801e-08,
      "loss": 0.0093,
      "step": 3048420
    },
    {
      "epoch": 4.988838920419211,
      "grad_norm": 0.2021438628435135,
      "learning_rate": 2.2469244809340884e-08,
      "loss": 0.0126,
      "step": 3048440
    },
    {
      "epoch": 4.988871650857865,
      "grad_norm": 0.07126086205244064,
      "learning_rate": 2.2403352595823752e-08,
      "loss": 0.0072,
      "step": 3048460
    },
    {
      "epoch": 4.988904381296518,
      "grad_norm": 0.331292986869812,
      "learning_rate": 2.2337460382306625e-08,
      "loss": 0.0069,
      "step": 3048480
    },
    {
      "epoch": 4.988937111735171,
      "grad_norm": 0.3706141412258148,
      "learning_rate": 2.2271568168789497e-08,
      "loss": 0.0099,
      "step": 3048500
    },
    {
      "epoch": 4.988969842173825,
      "grad_norm": 0.4586288034915924,
      "learning_rate": 2.220567595527237e-08,
      "loss": 0.0132,
      "step": 3048520
    },
    {
      "epoch": 4.9890025726124785,
      "grad_norm": 0.24584780633449554,
      "learning_rate": 2.2139783741755237e-08,
      "loss": 0.0094,
      "step": 3048540
    },
    {
      "epoch": 4.989035303051131,
      "grad_norm": 0.046241339296102524,
      "learning_rate": 2.2073891528238106e-08,
      "loss": 0.006,
      "step": 3048560
    },
    {
      "epoch": 4.989068033489785,
      "grad_norm": 0.14703482389450073,
      "learning_rate": 2.2007999314720982e-08,
      "loss": 0.0103,
      "step": 3048580
    },
    {
      "epoch": 4.989100763928438,
      "grad_norm": 0.17712435126304626,
      "learning_rate": 2.1942107101203854e-08,
      "loss": 0.0082,
      "step": 3048600
    },
    {
      "epoch": 4.989133494367092,
      "grad_norm": 0.380209356546402,
      "learning_rate": 2.1876214887686723e-08,
      "loss": 0.0086,
      "step": 3048620
    },
    {
      "epoch": 4.989166224805745,
      "grad_norm": 0.38760560750961304,
      "learning_rate": 2.1810322674169598e-08,
      "loss": 0.0074,
      "step": 3048640
    },
    {
      "epoch": 4.989198955244398,
      "grad_norm": 0.2869046628475189,
      "learning_rate": 2.1744430460652467e-08,
      "loss": 0.0077,
      "step": 3048660
    },
    {
      "epoch": 4.989231685683052,
      "grad_norm": 0.2606484889984131,
      "learning_rate": 2.167853824713534e-08,
      "loss": 0.0086,
      "step": 3048680
    },
    {
      "epoch": 4.989264416121705,
      "grad_norm": 0.21292448043823242,
      "learning_rate": 2.1612646033618208e-08,
      "loss": 0.0119,
      "step": 3048700
    },
    {
      "epoch": 4.989297146560358,
      "grad_norm": 0.08367625623941422,
      "learning_rate": 2.1546753820101083e-08,
      "loss": 0.012,
      "step": 3048720
    },
    {
      "epoch": 4.989329876999012,
      "grad_norm": 0.3359600305557251,
      "learning_rate": 2.1480861606583952e-08,
      "loss": 0.009,
      "step": 3048740
    },
    {
      "epoch": 4.9893626074376645,
      "grad_norm": 0.25830814242362976,
      "learning_rate": 2.141496939306682e-08,
      "loss": 0.0078,
      "step": 3048760
    },
    {
      "epoch": 4.989395337876318,
      "grad_norm": 0.1509622186422348,
      "learning_rate": 2.1349077179549696e-08,
      "loss": 0.009,
      "step": 3048780
    },
    {
      "epoch": 4.989428068314972,
      "grad_norm": 0.1093667596578598,
      "learning_rate": 2.1283184966032568e-08,
      "loss": 0.0081,
      "step": 3048800
    },
    {
      "epoch": 4.989460798753625,
      "grad_norm": 0.760634183883667,
      "learning_rate": 2.1217292752515437e-08,
      "loss": 0.0104,
      "step": 3048820
    },
    {
      "epoch": 4.989493529192278,
      "grad_norm": 0.1481359899044037,
      "learning_rate": 2.1151400538998306e-08,
      "loss": 0.0056,
      "step": 3048840
    },
    {
      "epoch": 4.9895262596309315,
      "grad_norm": 0.42629244923591614,
      "learning_rate": 2.108550832548118e-08,
      "loss": 0.009,
      "step": 3048860
    },
    {
      "epoch": 4.989558990069585,
      "grad_norm": 0.3406875729560852,
      "learning_rate": 2.101961611196405e-08,
      "loss": 0.0117,
      "step": 3048880
    },
    {
      "epoch": 4.989591720508239,
      "grad_norm": 0.04112851247191429,
      "learning_rate": 2.0953723898446922e-08,
      "loss": 0.0103,
      "step": 3048900
    },
    {
      "epoch": 4.989624450946891,
      "grad_norm": 0.09209651499986649,
      "learning_rate": 2.088783168492979e-08,
      "loss": 0.006,
      "step": 3048920
    },
    {
      "epoch": 4.989657181385545,
      "grad_norm": 0.25123223662376404,
      "learning_rate": 2.0821939471412666e-08,
      "loss": 0.0102,
      "step": 3048940
    },
    {
      "epoch": 4.989689911824199,
      "grad_norm": 0.4373430907726288,
      "learning_rate": 2.0756047257895535e-08,
      "loss": 0.0066,
      "step": 3048960
    },
    {
      "epoch": 4.989722642262851,
      "grad_norm": 0.13187932968139648,
      "learning_rate": 2.0690155044378407e-08,
      "loss": 0.0068,
      "step": 3048980
    },
    {
      "epoch": 4.989755372701505,
      "grad_norm": 0.18552832305431366,
      "learning_rate": 2.062426283086128e-08,
      "loss": 0.0119,
      "step": 3049000
    },
    {
      "epoch": 4.9897881031401585,
      "grad_norm": 0.06728304922580719,
      "learning_rate": 2.055837061734415e-08,
      "loss": 0.0068,
      "step": 3049020
    },
    {
      "epoch": 4.989820833578811,
      "grad_norm": 0.1823519915342331,
      "learning_rate": 2.049247840382702e-08,
      "loss": 0.0068,
      "step": 3049040
    },
    {
      "epoch": 4.989853564017465,
      "grad_norm": 0.20956867933273315,
      "learning_rate": 2.0426586190309892e-08,
      "loss": 0.0065,
      "step": 3049060
    },
    {
      "epoch": 4.989886294456118,
      "grad_norm": 0.15570102632045746,
      "learning_rate": 2.0360693976792764e-08,
      "loss": 0.0142,
      "step": 3049080
    },
    {
      "epoch": 4.989919024894772,
      "grad_norm": 0.053139712661504745,
      "learning_rate": 2.0294801763275636e-08,
      "loss": 0.0083,
      "step": 3049100
    },
    {
      "epoch": 4.989951755333425,
      "grad_norm": 0.25133606791496277,
      "learning_rate": 2.0228909549758505e-08,
      "loss": 0.0086,
      "step": 3049120
    },
    {
      "epoch": 4.989984485772078,
      "grad_norm": 0.09466930478811264,
      "learning_rate": 2.0163017336241377e-08,
      "loss": 0.0078,
      "step": 3049140
    },
    {
      "epoch": 4.990017216210732,
      "grad_norm": 0.6208592057228088,
      "learning_rate": 2.009712512272425e-08,
      "loss": 0.0085,
      "step": 3049160
    },
    {
      "epoch": 4.990049946649385,
      "grad_norm": 0.30714187026023865,
      "learning_rate": 2.003123290920712e-08,
      "loss": 0.0073,
      "step": 3049180
    },
    {
      "epoch": 4.990082677088038,
      "grad_norm": 0.2666182219982147,
      "learning_rate": 1.996534069568999e-08,
      "loss": 0.0088,
      "step": 3049200
    },
    {
      "epoch": 4.990115407526692,
      "grad_norm": 0.2163715809583664,
      "learning_rate": 1.9899448482172866e-08,
      "loss": 0.01,
      "step": 3049220
    },
    {
      "epoch": 4.990148137965345,
      "grad_norm": 0.05508524179458618,
      "learning_rate": 1.9833556268655734e-08,
      "loss": 0.0083,
      "step": 3049240
    },
    {
      "epoch": 4.990180868403998,
      "grad_norm": 0.2766801118850708,
      "learning_rate": 1.9767664055138606e-08,
      "loss": 0.0088,
      "step": 3049260
    },
    {
      "epoch": 4.990213598842652,
      "grad_norm": 0.17524170875549316,
      "learning_rate": 1.9701771841621475e-08,
      "loss": 0.0104,
      "step": 3049280
    },
    {
      "epoch": 4.990246329281305,
      "grad_norm": 0.09450899064540863,
      "learning_rate": 1.963587962810435e-08,
      "loss": 0.0117,
      "step": 3049300
    },
    {
      "epoch": 4.990279059719958,
      "grad_norm": 0.20227451622486115,
      "learning_rate": 1.956998741458722e-08,
      "loss": 0.0137,
      "step": 3049320
    },
    {
      "epoch": 4.990311790158612,
      "grad_norm": 0.06763710826635361,
      "learning_rate": 1.950409520107009e-08,
      "loss": 0.0124,
      "step": 3049340
    },
    {
      "epoch": 4.990344520597265,
      "grad_norm": 0.09136936068534851,
      "learning_rate": 1.9438202987552964e-08,
      "loss": 0.0042,
      "step": 3049360
    },
    {
      "epoch": 4.990377251035918,
      "grad_norm": 0.13416238129138947,
      "learning_rate": 1.9372310774035836e-08,
      "loss": 0.008,
      "step": 3049380
    },
    {
      "epoch": 4.9904099814745715,
      "grad_norm": 0.23583343625068665,
      "learning_rate": 1.9306418560518704e-08,
      "loss": 0.0078,
      "step": 3049400
    },
    {
      "epoch": 4.990442711913225,
      "grad_norm": 0.5988490581512451,
      "learning_rate": 1.9240526347001577e-08,
      "loss": 0.0111,
      "step": 3049420
    },
    {
      "epoch": 4.990475442351879,
      "grad_norm": 0.5437216758728027,
      "learning_rate": 1.917463413348445e-08,
      "loss": 0.0112,
      "step": 3049440
    },
    {
      "epoch": 4.990508172790531,
      "grad_norm": 0.1933337152004242,
      "learning_rate": 1.910874191996732e-08,
      "loss": 0.0087,
      "step": 3049460
    },
    {
      "epoch": 4.990540903229185,
      "grad_norm": 0.18000543117523193,
      "learning_rate": 1.904284970645019e-08,
      "loss": 0.0092,
      "step": 3049480
    },
    {
      "epoch": 4.990573633667839,
      "grad_norm": 0.09354403614997864,
      "learning_rate": 1.8976957492933062e-08,
      "loss": 0.0086,
      "step": 3049500
    },
    {
      "epoch": 4.990606364106492,
      "grad_norm": 0.19732676446437836,
      "learning_rate": 1.891106527941593e-08,
      "loss": 0.013,
      "step": 3049520
    },
    {
      "epoch": 4.990639094545145,
      "grad_norm": 0.165324866771698,
      "learning_rate": 1.8845173065898806e-08,
      "loss": 0.0055,
      "step": 3049540
    },
    {
      "epoch": 4.9906718249837985,
      "grad_norm": 0.03559288755059242,
      "learning_rate": 1.8779280852381678e-08,
      "loss": 0.0065,
      "step": 3049560
    },
    {
      "epoch": 4.990704555422452,
      "grad_norm": 0.542645275592804,
      "learning_rate": 1.8713388638864547e-08,
      "loss": 0.0098,
      "step": 3049580
    },
    {
      "epoch": 4.990737285861105,
      "grad_norm": 0.34450194239616394,
      "learning_rate": 1.864749642534742e-08,
      "loss": 0.0086,
      "step": 3049600
    },
    {
      "epoch": 4.990770016299758,
      "grad_norm": 0.23197613656520844,
      "learning_rate": 1.8581604211830288e-08,
      "loss": 0.0076,
      "step": 3049620
    },
    {
      "epoch": 4.990802746738412,
      "grad_norm": 0.25508415699005127,
      "learning_rate": 1.851571199831316e-08,
      "loss": 0.0106,
      "step": 3049640
    },
    {
      "epoch": 4.990835477177065,
      "grad_norm": 0.8617072105407715,
      "learning_rate": 1.8449819784796032e-08,
      "loss": 0.0106,
      "step": 3049660
    },
    {
      "epoch": 4.990868207615718,
      "grad_norm": 0.1792154610157013,
      "learning_rate": 1.8383927571278904e-08,
      "loss": 0.0058,
      "step": 3049680
    },
    {
      "epoch": 4.990900938054372,
      "grad_norm": 0.21799954771995544,
      "learning_rate": 1.8318035357761773e-08,
      "loss": 0.007,
      "step": 3049700
    },
    {
      "epoch": 4.990933668493025,
      "grad_norm": 0.24585816264152527,
      "learning_rate": 1.8252143144244645e-08,
      "loss": 0.0076,
      "step": 3049720
    },
    {
      "epoch": 4.990966398931678,
      "grad_norm": 0.22395801544189453,
      "learning_rate": 1.8186250930727517e-08,
      "loss": 0.0074,
      "step": 3049740
    },
    {
      "epoch": 4.990999129370332,
      "grad_norm": 0.23739242553710938,
      "learning_rate": 1.812035871721039e-08,
      "loss": 0.0079,
      "step": 3049760
    },
    {
      "epoch": 4.991031859808985,
      "grad_norm": 0.30162501335144043,
      "learning_rate": 1.805446650369326e-08,
      "loss": 0.0078,
      "step": 3049780
    },
    {
      "epoch": 4.991064590247639,
      "grad_norm": 0.2519603967666626,
      "learning_rate": 1.798857429017613e-08,
      "loss": 0.0102,
      "step": 3049800
    },
    {
      "epoch": 4.991097320686292,
      "grad_norm": 0.10880164802074432,
      "learning_rate": 1.7922682076659002e-08,
      "loss": 0.0124,
      "step": 3049820
    },
    {
      "epoch": 4.991130051124945,
      "grad_norm": 0.30568620562553406,
      "learning_rate": 1.7856789863141874e-08,
      "loss": 0.0072,
      "step": 3049840
    },
    {
      "epoch": 4.991162781563599,
      "grad_norm": 0.04420638084411621,
      "learning_rate": 1.7790897649624746e-08,
      "loss": 0.0073,
      "step": 3049860
    },
    {
      "epoch": 4.9911955120022515,
      "grad_norm": 0.11346281319856644,
      "learning_rate": 1.7725005436107615e-08,
      "loss": 0.0101,
      "step": 3049880
    },
    {
      "epoch": 4.991228242440905,
      "grad_norm": 0.07321349531412125,
      "learning_rate": 1.7659113222590487e-08,
      "loss": 0.0061,
      "step": 3049900
    },
    {
      "epoch": 4.991260972879559,
      "grad_norm": 0.21245436370372772,
      "learning_rate": 1.759322100907336e-08,
      "loss": 0.0088,
      "step": 3049920
    },
    {
      "epoch": 4.991293703318211,
      "grad_norm": 0.41524919867515564,
      "learning_rate": 1.752732879555623e-08,
      "loss": 0.0081,
      "step": 3049940
    },
    {
      "epoch": 4.991326433756865,
      "grad_norm": 0.1290081888437271,
      "learning_rate": 1.7461436582039103e-08,
      "loss": 0.0083,
      "step": 3049960
    },
    {
      "epoch": 4.991359164195519,
      "grad_norm": 0.5018594264984131,
      "learning_rate": 1.7395544368521972e-08,
      "loss": 0.0074,
      "step": 3049980
    },
    {
      "epoch": 4.991391894634172,
      "grad_norm": 0.2993618845939636,
      "learning_rate": 1.7329652155004844e-08,
      "loss": 0.0148,
      "step": 3050000
    },
    {
      "epoch": 4.991391894634172,
      "eval_loss": 0.005603602156043053,
      "eval_runtime": 6462.4941,
      "eval_samples_per_second": 159.05,
      "eval_steps_per_second": 15.905,
      "eval_sts-dev_pearson_cosine": 0.9873383884873678,
      "eval_sts-dev_spearman_cosine": 0.8970643702607825,
      "step": 3050000
    },
    {
      "epoch": 4.991424625072825,
      "grad_norm": 0.17731571197509766,
      "learning_rate": 1.7263759941487716e-08,
      "loss": 0.0068,
      "step": 3050020
    },
    {
      "epoch": 4.9914573555114785,
      "grad_norm": 0.19488200545310974,
      "learning_rate": 1.719786772797059e-08,
      "loss": 0.0061,
      "step": 3050040
    },
    {
      "epoch": 4.991490085950132,
      "grad_norm": 0.15551809966564178,
      "learning_rate": 1.7131975514453457e-08,
      "loss": 0.0091,
      "step": 3050060
    },
    {
      "epoch": 4.991522816388786,
      "grad_norm": 0.5330217480659485,
      "learning_rate": 1.706608330093633e-08,
      "loss": 0.016,
      "step": 3050080
    },
    {
      "epoch": 4.991555546827438,
      "grad_norm": 0.4977000057697296,
      "learning_rate": 1.70001910874192e-08,
      "loss": 0.0071,
      "step": 3050100
    },
    {
      "epoch": 4.991588277266092,
      "grad_norm": 0.07106135040521622,
      "learning_rate": 1.6934298873902073e-08,
      "loss": 0.0071,
      "step": 3050120
    },
    {
      "epoch": 4.991621007704746,
      "grad_norm": 0.33896079659461975,
      "learning_rate": 1.6868406660384946e-08,
      "loss": 0.0071,
      "step": 3050140
    },
    {
      "epoch": 4.991653738143398,
      "grad_norm": 0.09601061791181564,
      "learning_rate": 1.6802514446867814e-08,
      "loss": 0.0108,
      "step": 3050160
    },
    {
      "epoch": 4.991686468582052,
      "grad_norm": 0.2234678566455841,
      "learning_rate": 1.6736622233350686e-08,
      "loss": 0.0095,
      "step": 3050180
    },
    {
      "epoch": 4.9917191990207055,
      "grad_norm": 0.2722260653972626,
      "learning_rate": 1.667073001983356e-08,
      "loss": 0.0081,
      "step": 3050200
    },
    {
      "epoch": 4.991751929459358,
      "grad_norm": 0.20471957325935364,
      "learning_rate": 1.660483780631643e-08,
      "loss": 0.0071,
      "step": 3050220
    },
    {
      "epoch": 4.991784659898012,
      "grad_norm": 0.5368012189865112,
      "learning_rate": 1.65389455927993e-08,
      "loss": 0.0087,
      "step": 3050240
    },
    {
      "epoch": 4.991817390336665,
      "grad_norm": 0.2491707056760788,
      "learning_rate": 1.647305337928217e-08,
      "loss": 0.006,
      "step": 3050260
    },
    {
      "epoch": 4.991850120775319,
      "grad_norm": 0.3604153096675873,
      "learning_rate": 1.640716116576504e-08,
      "loss": 0.0089,
      "step": 3050280
    },
    {
      "epoch": 4.991882851213972,
      "grad_norm": 0.31244564056396484,
      "learning_rate": 1.6341268952247916e-08,
      "loss": 0.0126,
      "step": 3050300
    },
    {
      "epoch": 4.991915581652625,
      "grad_norm": 0.42827144265174866,
      "learning_rate": 1.6275376738730788e-08,
      "loss": 0.0113,
      "step": 3050320
    },
    {
      "epoch": 4.991948312091279,
      "grad_norm": 0.1308504343032837,
      "learning_rate": 1.6209484525213657e-08,
      "loss": 0.0099,
      "step": 3050340
    },
    {
      "epoch": 4.9919810425299325,
      "grad_norm": 0.10768485069274902,
      "learning_rate": 1.614359231169653e-08,
      "loss": 0.0098,
      "step": 3050360
    },
    {
      "epoch": 4.992013772968585,
      "grad_norm": 0.14353105425834656,
      "learning_rate": 1.6077700098179397e-08,
      "loss": 0.0083,
      "step": 3050380
    },
    {
      "epoch": 4.992046503407239,
      "grad_norm": 0.1082788035273552,
      "learning_rate": 1.6011807884662273e-08,
      "loss": 0.0084,
      "step": 3050400
    },
    {
      "epoch": 4.992079233845892,
      "grad_norm": 0.1958274096250534,
      "learning_rate": 1.594591567114514e-08,
      "loss": 0.0103,
      "step": 3050420
    },
    {
      "epoch": 4.992111964284545,
      "grad_norm": 0.69608074426651,
      "learning_rate": 1.5880023457628014e-08,
      "loss": 0.0089,
      "step": 3050440
    },
    {
      "epoch": 4.992144694723199,
      "grad_norm": 0.10879989713430405,
      "learning_rate": 1.5814131244110883e-08,
      "loss": 0.0074,
      "step": 3050460
    },
    {
      "epoch": 4.992177425161852,
      "grad_norm": 0.05544976517558098,
      "learning_rate": 1.5748239030593755e-08,
      "loss": 0.0105,
      "step": 3050480
    },
    {
      "epoch": 4.992210155600505,
      "grad_norm": 0.10306558012962341,
      "learning_rate": 1.5682346817076627e-08,
      "loss": 0.0079,
      "step": 3050500
    },
    {
      "epoch": 4.9922428860391586,
      "grad_norm": 0.2136370688676834,
      "learning_rate": 1.56164546035595e-08,
      "loss": 0.0086,
      "step": 3050520
    },
    {
      "epoch": 4.992275616477812,
      "grad_norm": 0.18026994168758392,
      "learning_rate": 1.555056239004237e-08,
      "loss": 0.0106,
      "step": 3050540
    },
    {
      "epoch": 4.992308346916466,
      "grad_norm": 0.2024390995502472,
      "learning_rate": 1.548467017652524e-08,
      "loss": 0.0087,
      "step": 3050560
    },
    {
      "epoch": 4.9923410773551185,
      "grad_norm": 0.03261428326368332,
      "learning_rate": 1.5418777963008112e-08,
      "loss": 0.0066,
      "step": 3050580
    },
    {
      "epoch": 4.992373807793772,
      "grad_norm": 0.23684997856616974,
      "learning_rate": 1.5352885749490984e-08,
      "loss": 0.0057,
      "step": 3050600
    },
    {
      "epoch": 4.992406538232426,
      "grad_norm": 0.49335673451423645,
      "learning_rate": 1.5286993535973856e-08,
      "loss": 0.0088,
      "step": 3050620
    },
    {
      "epoch": 4.992439268671078,
      "grad_norm": 0.6029931306838989,
      "learning_rate": 1.5221101322456725e-08,
      "loss": 0.0116,
      "step": 3050640
    },
    {
      "epoch": 4.992471999109732,
      "grad_norm": 0.13806700706481934,
      "learning_rate": 1.5155209108939597e-08,
      "loss": 0.0102,
      "step": 3050660
    },
    {
      "epoch": 4.9925047295483855,
      "grad_norm": 0.17981232702732086,
      "learning_rate": 1.508931689542247e-08,
      "loss": 0.0059,
      "step": 3050680
    },
    {
      "epoch": 4.992537459987039,
      "grad_norm": 0.3832506239414215,
      "learning_rate": 1.502342468190534e-08,
      "loss": 0.0072,
      "step": 3050700
    },
    {
      "epoch": 4.992570190425692,
      "grad_norm": 0.2264840006828308,
      "learning_rate": 1.4957532468388213e-08,
      "loss": 0.0065,
      "step": 3050720
    },
    {
      "epoch": 4.992602920864345,
      "grad_norm": 0.37566280364990234,
      "learning_rate": 1.4891640254871084e-08,
      "loss": 0.0079,
      "step": 3050740
    },
    {
      "epoch": 4.992635651302999,
      "grad_norm": 0.3240143358707428,
      "learning_rate": 1.4825748041353956e-08,
      "loss": 0.0104,
      "step": 3050760
    },
    {
      "epoch": 4.992668381741652,
      "grad_norm": 0.8425189256668091,
      "learning_rate": 1.4759855827836824e-08,
      "loss": 0.0094,
      "step": 3050780
    },
    {
      "epoch": 4.992701112180305,
      "grad_norm": 0.2426244020462036,
      "learning_rate": 1.4693963614319698e-08,
      "loss": 0.0098,
      "step": 3050800
    },
    {
      "epoch": 4.992733842618959,
      "grad_norm": 0.22526469826698303,
      "learning_rate": 1.4628071400802567e-08,
      "loss": 0.0088,
      "step": 3050820
    },
    {
      "epoch": 4.992766573057612,
      "grad_norm": 0.21312358975410461,
      "learning_rate": 1.4562179187285439e-08,
      "loss": 0.0103,
      "step": 3050840
    },
    {
      "epoch": 4.992799303496265,
      "grad_norm": 0.1990414410829544,
      "learning_rate": 1.449628697376831e-08,
      "loss": 0.0056,
      "step": 3050860
    },
    {
      "epoch": 4.992832033934919,
      "grad_norm": 0.10808276385068893,
      "learning_rate": 1.4430394760251182e-08,
      "loss": 0.0066,
      "step": 3050880
    },
    {
      "epoch": 4.992864764373572,
      "grad_norm": 0.6063545942306519,
      "learning_rate": 1.4364502546734054e-08,
      "loss": 0.0141,
      "step": 3050900
    },
    {
      "epoch": 4.992897494812225,
      "grad_norm": 0.3092478811740875,
      "learning_rate": 1.4298610333216924e-08,
      "loss": 0.0113,
      "step": 3050920
    },
    {
      "epoch": 4.992930225250879,
      "grad_norm": 0.1373162418603897,
      "learning_rate": 1.4232718119699796e-08,
      "loss": 0.01,
      "step": 3050940
    },
    {
      "epoch": 4.992962955689532,
      "grad_norm": 0.6661431193351746,
      "learning_rate": 1.4166825906182667e-08,
      "loss": 0.0139,
      "step": 3050960
    },
    {
      "epoch": 4.992995686128186,
      "grad_norm": 0.1460890918970108,
      "learning_rate": 1.4100933692665539e-08,
      "loss": 0.0056,
      "step": 3050980
    },
    {
      "epoch": 4.993028416566839,
      "grad_norm": 0.19699488580226898,
      "learning_rate": 1.403504147914841e-08,
      "loss": 0.0112,
      "step": 3051000
    },
    {
      "epoch": 4.993061147005492,
      "grad_norm": 0.5267894268035889,
      "learning_rate": 1.3969149265631281e-08,
      "loss": 0.0073,
      "step": 3051020
    },
    {
      "epoch": 4.993093877444146,
      "grad_norm": 0.144102543592453,
      "learning_rate": 1.3903257052114152e-08,
      "loss": 0.011,
      "step": 3051040
    },
    {
      "epoch": 4.9931266078827985,
      "grad_norm": 0.2901037335395813,
      "learning_rate": 1.3837364838597024e-08,
      "loss": 0.006,
      "step": 3051060
    },
    {
      "epoch": 4.993159338321452,
      "grad_norm": 0.11053960770368576,
      "learning_rate": 1.3771472625079896e-08,
      "loss": 0.0165,
      "step": 3051080
    },
    {
      "epoch": 4.993192068760106,
      "grad_norm": 0.14801239967346191,
      "learning_rate": 1.3705580411562766e-08,
      "loss": 0.0075,
      "step": 3051100
    },
    {
      "epoch": 4.993224799198758,
      "grad_norm": 0.2739085555076599,
      "learning_rate": 1.3639688198045638e-08,
      "loss": 0.0082,
      "step": 3051120
    },
    {
      "epoch": 4.993257529637412,
      "grad_norm": 0.17271704971790314,
      "learning_rate": 1.3573795984528509e-08,
      "loss": 0.0096,
      "step": 3051140
    },
    {
      "epoch": 4.993290260076066,
      "grad_norm": 0.27431201934814453,
      "learning_rate": 1.3507903771011381e-08,
      "loss": 0.0147,
      "step": 3051160
    },
    {
      "epoch": 4.993322990514719,
      "grad_norm": 0.24918293952941895,
      "learning_rate": 1.3442011557494251e-08,
      "loss": 0.0139,
      "step": 3051180
    },
    {
      "epoch": 4.993355720953372,
      "grad_norm": 0.03544791415333748,
      "learning_rate": 1.3376119343977124e-08,
      "loss": 0.0071,
      "step": 3051200
    },
    {
      "epoch": 4.9933884513920255,
      "grad_norm": 0.33454006910324097,
      "learning_rate": 1.3310227130459994e-08,
      "loss": 0.0108,
      "step": 3051220
    },
    {
      "epoch": 4.993421181830679,
      "grad_norm": 0.361062616109848,
      "learning_rate": 1.3244334916942866e-08,
      "loss": 0.0082,
      "step": 3051240
    },
    {
      "epoch": 4.993453912269333,
      "grad_norm": 0.21895816922187805,
      "learning_rate": 1.3178442703425738e-08,
      "loss": 0.0061,
      "step": 3051260
    },
    {
      "epoch": 4.993486642707985,
      "grad_norm": 0.379179447889328,
      "learning_rate": 1.3112550489908609e-08,
      "loss": 0.0063,
      "step": 3051280
    },
    {
      "epoch": 4.993519373146639,
      "grad_norm": 0.048244085162878036,
      "learning_rate": 1.304665827639148e-08,
      "loss": 0.0079,
      "step": 3051300
    },
    {
      "epoch": 4.993552103585293,
      "grad_norm": 0.08962858468294144,
      "learning_rate": 1.2980766062874351e-08,
      "loss": 0.0086,
      "step": 3051320
    },
    {
      "epoch": 4.993584834023945,
      "grad_norm": 0.21191614866256714,
      "learning_rate": 1.2914873849357223e-08,
      "loss": 0.0087,
      "step": 3051340
    },
    {
      "epoch": 4.993617564462599,
      "grad_norm": 0.13271880149841309,
      "learning_rate": 1.2848981635840094e-08,
      "loss": 0.009,
      "step": 3051360
    },
    {
      "epoch": 4.9936502949012525,
      "grad_norm": 0.4329031705856323,
      "learning_rate": 1.2783089422322966e-08,
      "loss": 0.0071,
      "step": 3051380
    },
    {
      "epoch": 4.993683025339905,
      "grad_norm": 0.2771724760532379,
      "learning_rate": 1.2717197208805836e-08,
      "loss": 0.0097,
      "step": 3051400
    },
    {
      "epoch": 4.993715755778559,
      "grad_norm": 0.1532466560602188,
      "learning_rate": 1.2651304995288708e-08,
      "loss": 0.0096,
      "step": 3051420
    },
    {
      "epoch": 4.993748486217212,
      "grad_norm": 0.18686753511428833,
      "learning_rate": 1.2585412781771579e-08,
      "loss": 0.0104,
      "step": 3051440
    },
    {
      "epoch": 4.993781216655866,
      "grad_norm": 0.06270421296358109,
      "learning_rate": 1.2519520568254451e-08,
      "loss": 0.0112,
      "step": 3051460
    },
    {
      "epoch": 4.993813947094519,
      "grad_norm": 0.27408814430236816,
      "learning_rate": 1.2453628354737323e-08,
      "loss": 0.0111,
      "step": 3051480
    },
    {
      "epoch": 4.993846677533172,
      "grad_norm": 0.2642090618610382,
      "learning_rate": 1.2387736141220193e-08,
      "loss": 0.0078,
      "step": 3051500
    },
    {
      "epoch": 4.993879407971826,
      "grad_norm": 0.1377049684524536,
      "learning_rate": 1.2321843927703065e-08,
      "loss": 0.0101,
      "step": 3051520
    },
    {
      "epoch": 4.993912138410479,
      "grad_norm": 0.22479739785194397,
      "learning_rate": 1.2255951714185934e-08,
      "loss": 0.0104,
      "step": 3051540
    },
    {
      "epoch": 4.993944868849132,
      "grad_norm": 0.13601553440093994,
      "learning_rate": 1.2190059500668808e-08,
      "loss": 0.0076,
      "step": 3051560
    },
    {
      "epoch": 4.993977599287786,
      "grad_norm": 0.36138027906417847,
      "learning_rate": 1.2124167287151677e-08,
      "loss": 0.0073,
      "step": 3051580
    },
    {
      "epoch": 4.994010329726439,
      "grad_norm": 0.3657243251800537,
      "learning_rate": 1.205827507363455e-08,
      "loss": 0.0079,
      "step": 3051600
    },
    {
      "epoch": 4.994043060165092,
      "grad_norm": 0.34776145219802856,
      "learning_rate": 1.199238286011742e-08,
      "loss": 0.0076,
      "step": 3051620
    },
    {
      "epoch": 4.994075790603746,
      "grad_norm": 0.15103065967559814,
      "learning_rate": 1.1926490646600291e-08,
      "loss": 0.0106,
      "step": 3051640
    },
    {
      "epoch": 4.994108521042399,
      "grad_norm": 0.28877872228622437,
      "learning_rate": 1.1860598433083165e-08,
      "loss": 0.0092,
      "step": 3051660
    },
    {
      "epoch": 4.994141251481052,
      "grad_norm": 0.25436076521873474,
      "learning_rate": 1.1794706219566034e-08,
      "loss": 0.0073,
      "step": 3051680
    },
    {
      "epoch": 4.9941739819197055,
      "grad_norm": 0.371671587228775,
      "learning_rate": 1.1728814006048906e-08,
      "loss": 0.0073,
      "step": 3051700
    },
    {
      "epoch": 4.994206712358359,
      "grad_norm": 0.13899722695350647,
      "learning_rate": 1.1662921792531777e-08,
      "loss": 0.0104,
      "step": 3051720
    },
    {
      "epoch": 4.994239442797013,
      "grad_norm": 0.21151386201381683,
      "learning_rate": 1.1597029579014649e-08,
      "loss": 0.0118,
      "step": 3051740
    },
    {
      "epoch": 4.994272173235665,
      "grad_norm": 0.3355351984500885,
      "learning_rate": 1.1531137365497519e-08,
      "loss": 0.008,
      "step": 3051760
    },
    {
      "epoch": 4.994304903674319,
      "grad_norm": 0.3128271996974945,
      "learning_rate": 1.1465245151980391e-08,
      "loss": 0.0092,
      "step": 3051780
    },
    {
      "epoch": 4.994337634112973,
      "grad_norm": 0.22864244878292084,
      "learning_rate": 1.1399352938463262e-08,
      "loss": 0.0086,
      "step": 3051800
    },
    {
      "epoch": 4.994370364551626,
      "grad_norm": 0.47213035821914673,
      "learning_rate": 1.1333460724946134e-08,
      "loss": 0.0069,
      "step": 3051820
    },
    {
      "epoch": 4.994403094990279,
      "grad_norm": 0.20550203323364258,
      "learning_rate": 1.1267568511429006e-08,
      "loss": 0.0113,
      "step": 3051840
    },
    {
      "epoch": 4.9944358254289325,
      "grad_norm": 0.15218518674373627,
      "learning_rate": 1.1201676297911876e-08,
      "loss": 0.0068,
      "step": 3051860
    },
    {
      "epoch": 4.994468555867586,
      "grad_norm": 0.12660250067710876,
      "learning_rate": 1.1135784084394748e-08,
      "loss": 0.0125,
      "step": 3051880
    },
    {
      "epoch": 4.994501286306239,
      "grad_norm": 0.0762350931763649,
      "learning_rate": 1.1069891870877619e-08,
      "loss": 0.0094,
      "step": 3051900
    },
    {
      "epoch": 4.994534016744892,
      "grad_norm": 0.24695506691932678,
      "learning_rate": 1.1003999657360491e-08,
      "loss": 0.0147,
      "step": 3051920
    },
    {
      "epoch": 4.994566747183546,
      "grad_norm": 0.2083587944507599,
      "learning_rate": 1.0938107443843361e-08,
      "loss": 0.0113,
      "step": 3051940
    },
    {
      "epoch": 4.994599477622199,
      "grad_norm": 0.1765962839126587,
      "learning_rate": 1.0872215230326233e-08,
      "loss": 0.0058,
      "step": 3051960
    },
    {
      "epoch": 4.994632208060852,
      "grad_norm": 0.2056344747543335,
      "learning_rate": 1.0806323016809104e-08,
      "loss": 0.0086,
      "step": 3051980
    },
    {
      "epoch": 4.994664938499506,
      "grad_norm": 0.1972634196281433,
      "learning_rate": 1.0740430803291976e-08,
      "loss": 0.0063,
      "step": 3052000
    },
    {
      "epoch": 4.9946976689381595,
      "grad_norm": 0.22950510680675507,
      "learning_rate": 1.0674538589774848e-08,
      "loss": 0.0054,
      "step": 3052020
    },
    {
      "epoch": 4.994730399376812,
      "grad_norm": 0.14235572516918182,
      "learning_rate": 1.0608646376257718e-08,
      "loss": 0.0069,
      "step": 3052040
    },
    {
      "epoch": 4.994763129815466,
      "grad_norm": 0.13458718359470367,
      "learning_rate": 1.054275416274059e-08,
      "loss": 0.0132,
      "step": 3052060
    },
    {
      "epoch": 4.994795860254119,
      "grad_norm": 0.0702388659119606,
      "learning_rate": 1.0476861949223461e-08,
      "loss": 0.0074,
      "step": 3052080
    },
    {
      "epoch": 4.994828590692773,
      "grad_norm": 0.18264244496822357,
      "learning_rate": 1.0410969735706333e-08,
      "loss": 0.012,
      "step": 3052100
    },
    {
      "epoch": 4.994861321131426,
      "grad_norm": 0.10839882493019104,
      "learning_rate": 1.0345077522189204e-08,
      "loss": 0.0067,
      "step": 3052120
    },
    {
      "epoch": 4.994894051570079,
      "grad_norm": 0.1369980424642563,
      "learning_rate": 1.0279185308672076e-08,
      "loss": 0.0061,
      "step": 3052140
    },
    {
      "epoch": 4.994926782008733,
      "grad_norm": 0.14782148599624634,
      "learning_rate": 1.0213293095154946e-08,
      "loss": 0.0083,
      "step": 3052160
    },
    {
      "epoch": 4.994959512447386,
      "grad_norm": 0.11967557668685913,
      "learning_rate": 1.0147400881637818e-08,
      "loss": 0.0079,
      "step": 3052180
    },
    {
      "epoch": 4.994992242886039,
      "grad_norm": 0.21575310826301575,
      "learning_rate": 1.0081508668120689e-08,
      "loss": 0.0139,
      "step": 3052200
    },
    {
      "epoch": 4.995024973324693,
      "grad_norm": 0.05967309698462486,
      "learning_rate": 1.001561645460356e-08,
      "loss": 0.0085,
      "step": 3052220
    },
    {
      "epoch": 4.9950577037633455,
      "grad_norm": 0.08954409509897232,
      "learning_rate": 9.949724241086433e-09,
      "loss": 0.0058,
      "step": 3052240
    },
    {
      "epoch": 4.995090434201999,
      "grad_norm": 0.3305841386318207,
      "learning_rate": 9.883832027569303e-09,
      "loss": 0.0095,
      "step": 3052260
    },
    {
      "epoch": 4.995123164640653,
      "grad_norm": 0.1928456574678421,
      "learning_rate": 9.817939814052175e-09,
      "loss": 0.0056,
      "step": 3052280
    },
    {
      "epoch": 4.995155895079306,
      "grad_norm": 0.34562602639198303,
      "learning_rate": 9.752047600535046e-09,
      "loss": 0.0135,
      "step": 3052300
    },
    {
      "epoch": 4.995188625517959,
      "grad_norm": 0.1134842038154602,
      "learning_rate": 9.686155387017918e-09,
      "loss": 0.0067,
      "step": 3052320
    },
    {
      "epoch": 4.9952213559566125,
      "grad_norm": 0.14060702919960022,
      "learning_rate": 9.620263173500788e-09,
      "loss": 0.0086,
      "step": 3052340
    },
    {
      "epoch": 4.995254086395266,
      "grad_norm": 0.13432815670967102,
      "learning_rate": 9.55437095998366e-09,
      "loss": 0.0063,
      "step": 3052360
    },
    {
      "epoch": 4.995286816833919,
      "grad_norm": 0.3002874255180359,
      "learning_rate": 9.488478746466531e-09,
      "loss": 0.0084,
      "step": 3052380
    },
    {
      "epoch": 4.995319547272572,
      "grad_norm": 0.15223905444145203,
      "learning_rate": 9.422586532949403e-09,
      "loss": 0.012,
      "step": 3052400
    },
    {
      "epoch": 4.995352277711226,
      "grad_norm": 0.25481557846069336,
      "learning_rate": 9.356694319432273e-09,
      "loss": 0.005,
      "step": 3052420
    },
    {
      "epoch": 4.99538500814988,
      "grad_norm": 0.12044460326433182,
      "learning_rate": 9.290802105915144e-09,
      "loss": 0.0086,
      "step": 3052440
    },
    {
      "epoch": 4.995417738588532,
      "grad_norm": 0.10649710148572922,
      "learning_rate": 9.224909892398016e-09,
      "loss": 0.0144,
      "step": 3052460
    },
    {
      "epoch": 4.995450469027186,
      "grad_norm": 0.07478730380535126,
      "learning_rate": 9.159017678880886e-09,
      "loss": 0.0105,
      "step": 3052480
    },
    {
      "epoch": 4.9954831994658395,
      "grad_norm": 0.3872843086719513,
      "learning_rate": 9.093125465363758e-09,
      "loss": 0.0067,
      "step": 3052500
    },
    {
      "epoch": 4.995515929904492,
      "grad_norm": 0.18575124442577362,
      "learning_rate": 9.02723325184663e-09,
      "loss": 0.0127,
      "step": 3052520
    },
    {
      "epoch": 4.995548660343146,
      "grad_norm": 0.15005959570407867,
      "learning_rate": 8.961341038329501e-09,
      "loss": 0.0086,
      "step": 3052540
    },
    {
      "epoch": 4.995581390781799,
      "grad_norm": 0.2542118430137634,
      "learning_rate": 8.895448824812373e-09,
      "loss": 0.0095,
      "step": 3052560
    },
    {
      "epoch": 4.995614121220452,
      "grad_norm": 0.1425999402999878,
      "learning_rate": 8.829556611295244e-09,
      "loss": 0.0055,
      "step": 3052580
    },
    {
      "epoch": 4.995646851659106,
      "grad_norm": 0.14754413068294525,
      "learning_rate": 8.763664397778116e-09,
      "loss": 0.0072,
      "step": 3052600
    },
    {
      "epoch": 4.995679582097759,
      "grad_norm": 0.36960774660110474,
      "learning_rate": 8.697772184260986e-09,
      "loss": 0.0101,
      "step": 3052620
    },
    {
      "epoch": 4.995712312536413,
      "grad_norm": 0.13889066874980927,
      "learning_rate": 8.631879970743858e-09,
      "loss": 0.0073,
      "step": 3052640
    },
    {
      "epoch": 4.995745042975066,
      "grad_norm": 0.9828826189041138,
      "learning_rate": 8.565987757226729e-09,
      "loss": 0.0089,
      "step": 3052660
    },
    {
      "epoch": 4.995777773413719,
      "grad_norm": 0.08100149780511856,
      "learning_rate": 8.5000955437096e-09,
      "loss": 0.0075,
      "step": 3052680
    },
    {
      "epoch": 4.995810503852373,
      "grad_norm": 0.26573440432548523,
      "learning_rate": 8.434203330192473e-09,
      "loss": 0.0084,
      "step": 3052700
    },
    {
      "epoch": 4.995843234291026,
      "grad_norm": 0.6065970063209534,
      "learning_rate": 8.368311116675343e-09,
      "loss": 0.0082,
      "step": 3052720
    },
    {
      "epoch": 4.995875964729679,
      "grad_norm": 0.7506900429725647,
      "learning_rate": 8.302418903158215e-09,
      "loss": 0.0131,
      "step": 3052740
    },
    {
      "epoch": 4.995908695168333,
      "grad_norm": 0.16221880912780762,
      "learning_rate": 8.236526689641086e-09,
      "loss": 0.0097,
      "step": 3052760
    },
    {
      "epoch": 4.995941425606986,
      "grad_norm": 0.28389522433280945,
      "learning_rate": 8.170634476123958e-09,
      "loss": 0.01,
      "step": 3052780
    },
    {
      "epoch": 4.995974156045639,
      "grad_norm": 0.25008249282836914,
      "learning_rate": 8.104742262606828e-09,
      "loss": 0.0098,
      "step": 3052800
    },
    {
      "epoch": 4.996006886484293,
      "grad_norm": 0.2159232795238495,
      "learning_rate": 8.038850049089699e-09,
      "loss": 0.0121,
      "step": 3052820
    },
    {
      "epoch": 4.996039616922946,
      "grad_norm": 0.24361926317214966,
      "learning_rate": 7.97295783557257e-09,
      "loss": 0.0077,
      "step": 3052840
    },
    {
      "epoch": 4.996072347361599,
      "grad_norm": 0.13767528533935547,
      "learning_rate": 7.907065622055441e-09,
      "loss": 0.0085,
      "step": 3052860
    },
    {
      "epoch": 4.9961050778002525,
      "grad_norm": 0.20899523794651031,
      "learning_rate": 7.841173408538313e-09,
      "loss": 0.0072,
      "step": 3052880
    },
    {
      "epoch": 4.996137808238906,
      "grad_norm": 0.2754974067211151,
      "learning_rate": 7.775281195021185e-09,
      "loss": 0.0077,
      "step": 3052900
    },
    {
      "epoch": 4.99617053867756,
      "grad_norm": 0.33314236998558044,
      "learning_rate": 7.709388981504056e-09,
      "loss": 0.0092,
      "step": 3052920
    },
    {
      "epoch": 4.996203269116212,
      "grad_norm": 0.5066682696342468,
      "learning_rate": 7.643496767986928e-09,
      "loss": 0.0111,
      "step": 3052940
    },
    {
      "epoch": 4.996235999554866,
      "grad_norm": 0.2631250321865082,
      "learning_rate": 7.577604554469798e-09,
      "loss": 0.0082,
      "step": 3052960
    },
    {
      "epoch": 4.99626872999352,
      "grad_norm": 0.19571974873542786,
      "learning_rate": 7.51171234095267e-09,
      "loss": 0.0055,
      "step": 3052980
    },
    {
      "epoch": 4.996301460432173,
      "grad_norm": 0.14783340692520142,
      "learning_rate": 7.445820127435542e-09,
      "loss": 0.0075,
      "step": 3053000
    },
    {
      "epoch": 4.996334190870826,
      "grad_norm": 0.5584259033203125,
      "learning_rate": 7.379927913918412e-09,
      "loss": 0.0088,
      "step": 3053020
    },
    {
      "epoch": 4.9963669213094795,
      "grad_norm": 0.12400220334529877,
      "learning_rate": 7.3140357004012835e-09,
      "loss": 0.0059,
      "step": 3053040
    },
    {
      "epoch": 4.996399651748133,
      "grad_norm": 0.20438992977142334,
      "learning_rate": 7.248143486884155e-09,
      "loss": 0.0076,
      "step": 3053060
    },
    {
      "epoch": 4.996432382186786,
      "grad_norm": 0.0881170928478241,
      "learning_rate": 7.182251273367027e-09,
      "loss": 0.0102,
      "step": 3053080
    },
    {
      "epoch": 4.996465112625439,
      "grad_norm": 0.3521125316619873,
      "learning_rate": 7.116359059849898e-09,
      "loss": 0.0094,
      "step": 3053100
    },
    {
      "epoch": 4.996497843064093,
      "grad_norm": 0.16668370366096497,
      "learning_rate": 7.050466846332769e-09,
      "loss": 0.0063,
      "step": 3053120
    },
    {
      "epoch": 4.996530573502746,
      "grad_norm": 0.22956949472427368,
      "learning_rate": 6.984574632815641e-09,
      "loss": 0.0098,
      "step": 3053140
    },
    {
      "epoch": 4.996563303941399,
      "grad_norm": 0.5596901178359985,
      "learning_rate": 6.918682419298512e-09,
      "loss": 0.0124,
      "step": 3053160
    },
    {
      "epoch": 4.996596034380053,
      "grad_norm": 0.29365894198417664,
      "learning_rate": 6.852790205781383e-09,
      "loss": 0.0102,
      "step": 3053180
    },
    {
      "epoch": 4.996628764818706,
      "grad_norm": 0.12793123722076416,
      "learning_rate": 6.7868979922642545e-09,
      "loss": 0.006,
      "step": 3053200
    },
    {
      "epoch": 4.996661495257359,
      "grad_norm": 0.8544225692749023,
      "learning_rate": 6.721005778747126e-09,
      "loss": 0.0141,
      "step": 3053220
    },
    {
      "epoch": 4.996694225696013,
      "grad_norm": 0.0998915508389473,
      "learning_rate": 6.655113565229997e-09,
      "loss": 0.0058,
      "step": 3053240
    },
    {
      "epoch": 4.996726956134666,
      "grad_norm": 0.16226132214069366,
      "learning_rate": 6.589221351712869e-09,
      "loss": 0.0095,
      "step": 3053260
    },
    {
      "epoch": 4.99675968657332,
      "grad_norm": 0.14385418593883514,
      "learning_rate": 6.52332913819574e-09,
      "loss": 0.009,
      "step": 3053280
    },
    {
      "epoch": 4.996792417011973,
      "grad_norm": 0.2117605209350586,
      "learning_rate": 6.457436924678612e-09,
      "loss": 0.0054,
      "step": 3053300
    },
    {
      "epoch": 4.996825147450626,
      "grad_norm": 0.2990286350250244,
      "learning_rate": 6.391544711161483e-09,
      "loss": 0.0134,
      "step": 3053320
    },
    {
      "epoch": 4.99685787788928,
      "grad_norm": 0.1693309247493744,
      "learning_rate": 6.325652497644354e-09,
      "loss": 0.0069,
      "step": 3053340
    },
    {
      "epoch": 4.9968906083279325,
      "grad_norm": 0.305146187543869,
      "learning_rate": 6.2597602841272254e-09,
      "loss": 0.0116,
      "step": 3053360
    },
    {
      "epoch": 4.996923338766586,
      "grad_norm": 0.11951867491006851,
      "learning_rate": 6.193868070610097e-09,
      "loss": 0.0095,
      "step": 3053380
    },
    {
      "epoch": 4.99695606920524,
      "grad_norm": 0.13218189775943756,
      "learning_rate": 6.127975857092967e-09,
      "loss": 0.0073,
      "step": 3053400
    },
    {
      "epoch": 4.996988799643892,
      "grad_norm": 0.3103165328502655,
      "learning_rate": 6.062083643575838e-09,
      "loss": 0.0091,
      "step": 3053420
    },
    {
      "epoch": 4.997021530082546,
      "grad_norm": 0.18335023522377014,
      "learning_rate": 5.99619143005871e-09,
      "loss": 0.0081,
      "step": 3053440
    },
    {
      "epoch": 4.9970542605212,
      "grad_norm": 0.2476745992898941,
      "learning_rate": 5.930299216541583e-09,
      "loss": 0.0121,
      "step": 3053460
    },
    {
      "epoch": 4.997086990959853,
      "grad_norm": 0.2661646902561188,
      "learning_rate": 5.864407003024453e-09,
      "loss": 0.006,
      "step": 3053480
    },
    {
      "epoch": 4.997119721398506,
      "grad_norm": 0.3807969391345978,
      "learning_rate": 5.798514789507324e-09,
      "loss": 0.0068,
      "step": 3053500
    },
    {
      "epoch": 4.9971524518371595,
      "grad_norm": 0.1335931271314621,
      "learning_rate": 5.7326225759901956e-09,
      "loss": 0.0076,
      "step": 3053520
    },
    {
      "epoch": 4.997185182275813,
      "grad_norm": 0.18341733515262604,
      "learning_rate": 5.666730362473067e-09,
      "loss": 0.0079,
      "step": 3053540
    },
    {
      "epoch": 4.997217912714467,
      "grad_norm": 0.05619221553206444,
      "learning_rate": 5.600838148955938e-09,
      "loss": 0.0103,
      "step": 3053560
    },
    {
      "epoch": 4.997250643153119,
      "grad_norm": 0.41854599118232727,
      "learning_rate": 5.534945935438809e-09,
      "loss": 0.0083,
      "step": 3053580
    },
    {
      "epoch": 4.997283373591773,
      "grad_norm": 0.145529642701149,
      "learning_rate": 5.469053721921681e-09,
      "loss": 0.0091,
      "step": 3053600
    },
    {
      "epoch": 4.997316104030427,
      "grad_norm": 0.4152219891548157,
      "learning_rate": 5.403161508404552e-09,
      "loss": 0.0095,
      "step": 3053620
    },
    {
      "epoch": 4.997348834469079,
      "grad_norm": 0.16488318145275116,
      "learning_rate": 5.337269294887424e-09,
      "loss": 0.0103,
      "step": 3053640
    },
    {
      "epoch": 4.997381564907733,
      "grad_norm": 0.25196945667266846,
      "learning_rate": 5.271377081370295e-09,
      "loss": 0.0077,
      "step": 3053660
    },
    {
      "epoch": 4.9974142953463865,
      "grad_norm": 0.42564520239830017,
      "learning_rate": 5.2054848678531665e-09,
      "loss": 0.0133,
      "step": 3053680
    },
    {
      "epoch": 4.997447025785039,
      "grad_norm": 0.20427848398685455,
      "learning_rate": 5.139592654336038e-09,
      "loss": 0.0089,
      "step": 3053700
    },
    {
      "epoch": 4.997479756223693,
      "grad_norm": 0.3916069269180298,
      "learning_rate": 5.073700440818909e-09,
      "loss": 0.0073,
      "step": 3053720
    },
    {
      "epoch": 4.997512486662346,
      "grad_norm": 0.10989989340305328,
      "learning_rate": 5.00780822730178e-09,
      "loss": 0.0143,
      "step": 3053740
    },
    {
      "epoch": 4.997545217101,
      "grad_norm": 0.34534862637519836,
      "learning_rate": 4.941916013784652e-09,
      "loss": 0.0072,
      "step": 3053760
    },
    {
      "epoch": 4.997577947539653,
      "grad_norm": 0.13977843523025513,
      "learning_rate": 4.876023800267523e-09,
      "loss": 0.0079,
      "step": 3053780
    },
    {
      "epoch": 4.997610677978306,
      "grad_norm": 0.3564583659172058,
      "learning_rate": 4.810131586750394e-09,
      "loss": 0.0051,
      "step": 3053800
    },
    {
      "epoch": 4.99764340841696,
      "grad_norm": 0.5447453856468201,
      "learning_rate": 4.7442393732332654e-09,
      "loss": 0.01,
      "step": 3053820
    },
    {
      "epoch": 4.997676138855613,
      "grad_norm": 0.20563305914402008,
      "learning_rate": 4.678347159716137e-09,
      "loss": 0.0065,
      "step": 3053840
    },
    {
      "epoch": 4.997708869294266,
      "grad_norm": 0.3355635702610016,
      "learning_rate": 4.612454946199008e-09,
      "loss": 0.013,
      "step": 3053860
    },
    {
      "epoch": 4.99774159973292,
      "grad_norm": 0.0754399225115776,
      "learning_rate": 4.546562732681879e-09,
      "loss": 0.0084,
      "step": 3053880
    },
    {
      "epoch": 4.997774330171573,
      "grad_norm": 0.05428875610232353,
      "learning_rate": 4.4806705191647505e-09,
      "loss": 0.0115,
      "step": 3053900
    },
    {
      "epoch": 4.997807060610226,
      "grad_norm": 0.2064291387796402,
      "learning_rate": 4.414778305647622e-09,
      "loss": 0.0067,
      "step": 3053920
    },
    {
      "epoch": 4.99783979104888,
      "grad_norm": 0.19158685207366943,
      "learning_rate": 4.348886092130493e-09,
      "loss": 0.0097,
      "step": 3053940
    },
    {
      "epoch": 4.997872521487533,
      "grad_norm": 0.06894483417272568,
      "learning_rate": 4.282993878613364e-09,
      "loss": 0.0055,
      "step": 3053960
    },
    {
      "epoch": 4.997905251926186,
      "grad_norm": 0.2014828622341156,
      "learning_rate": 4.217101665096236e-09,
      "loss": 0.0081,
      "step": 3053980
    },
    {
      "epoch": 4.9979379823648395,
      "grad_norm": 0.08351288735866547,
      "learning_rate": 4.151209451579108e-09,
      "loss": 0.0114,
      "step": 3054000
    },
    {
      "epoch": 4.997970712803493,
      "grad_norm": 0.31623372435569763,
      "learning_rate": 4.085317238061979e-09,
      "loss": 0.0068,
      "step": 3054020
    },
    {
      "epoch": 4.998003443242146,
      "grad_norm": 0.21149137616157532,
      "learning_rate": 4.019425024544849e-09,
      "loss": 0.0071,
      "step": 3054040
    },
    {
      "epoch": 4.998036173680799,
      "grad_norm": 0.15456794202327728,
      "learning_rate": 3.953532811027721e-09,
      "loss": 0.0055,
      "step": 3054060
    },
    {
      "epoch": 4.998068904119453,
      "grad_norm": 0.1390150487422943,
      "learning_rate": 3.887640597510593e-09,
      "loss": 0.0088,
      "step": 3054080
    },
    {
      "epoch": 4.998101634558107,
      "grad_norm": 0.4199603497982025,
      "learning_rate": 3.821748383993464e-09,
      "loss": 0.0092,
      "step": 3054100
    },
    {
      "epoch": 4.998134364996759,
      "grad_norm": 0.42689141631126404,
      "learning_rate": 3.755856170476335e-09,
      "loss": 0.01,
      "step": 3054120
    },
    {
      "epoch": 4.998167095435413,
      "grad_norm": 0.17293597757816315,
      "learning_rate": 3.689963956959206e-09,
      "loss": 0.0084,
      "step": 3054140
    },
    {
      "epoch": 4.9981998258740665,
      "grad_norm": 0.17841610312461853,
      "learning_rate": 3.6240717434420774e-09,
      "loss": 0.0106,
      "step": 3054160
    },
    {
      "epoch": 4.99823255631272,
      "grad_norm": 0.2354593575000763,
      "learning_rate": 3.558179529924949e-09,
      "loss": 0.0119,
      "step": 3054180
    },
    {
      "epoch": 4.998265286751373,
      "grad_norm": 0.3654727637767792,
      "learning_rate": 3.4922873164078203e-09,
      "loss": 0.0067,
      "step": 3054200
    },
    {
      "epoch": 4.998298017190026,
      "grad_norm": 0.24084268510341644,
      "learning_rate": 3.4263951028906916e-09,
      "loss": 0.0058,
      "step": 3054220
    },
    {
      "epoch": 4.99833074762868,
      "grad_norm": 0.255683958530426,
      "learning_rate": 3.360502889373563e-09,
      "loss": 0.0088,
      "step": 3054240
    },
    {
      "epoch": 4.998363478067333,
      "grad_norm": 0.2788543105125427,
      "learning_rate": 3.2946106758564345e-09,
      "loss": 0.0093,
      "step": 3054260
    },
    {
      "epoch": 4.998396208505986,
      "grad_norm": 0.07449142634868622,
      "learning_rate": 3.228718462339306e-09,
      "loss": 0.0083,
      "step": 3054280
    },
    {
      "epoch": 4.99842893894464,
      "grad_norm": 0.3153328001499176,
      "learning_rate": 3.162826248822177e-09,
      "loss": 0.0078,
      "step": 3054300
    },
    {
      "epoch": 4.998461669383293,
      "grad_norm": 0.1822890192270279,
      "learning_rate": 3.0969340353050483e-09,
      "loss": 0.0072,
      "step": 3054320
    },
    {
      "epoch": 4.998494399821946,
      "grad_norm": 0.12463314831256866,
      "learning_rate": 3.031041821787919e-09,
      "loss": 0.0103,
      "step": 3054340
    },
    {
      "epoch": 4.9985271302606,
      "grad_norm": 0.3436444401741028,
      "learning_rate": 2.9651496082707913e-09,
      "loss": 0.0078,
      "step": 3054360
    },
    {
      "epoch": 4.998559860699253,
      "grad_norm": 0.14927464723587036,
      "learning_rate": 2.899257394753662e-09,
      "loss": 0.0083,
      "step": 3054380
    },
    {
      "epoch": 4.998592591137906,
      "grad_norm": 0.5869548320770264,
      "learning_rate": 2.8333651812365334e-09,
      "loss": 0.0067,
      "step": 3054400
    },
    {
      "epoch": 4.99862532157656,
      "grad_norm": 0.1930483877658844,
      "learning_rate": 2.7674729677194047e-09,
      "loss": 0.0064,
      "step": 3054420
    },
    {
      "epoch": 4.998658052015213,
      "grad_norm": 0.054728537797927856,
      "learning_rate": 2.701580754202276e-09,
      "loss": 0.0087,
      "step": 3054440
    },
    {
      "epoch": 4.998690782453867,
      "grad_norm": 0.20806308090686798,
      "learning_rate": 2.6356885406851476e-09,
      "loss": 0.0099,
      "step": 3054460
    },
    {
      "epoch": 4.99872351289252,
      "grad_norm": 0.7260101437568665,
      "learning_rate": 2.569796327168019e-09,
      "loss": 0.0094,
      "step": 3054480
    },
    {
      "epoch": 4.998756243331173,
      "grad_norm": 0.21157774329185486,
      "learning_rate": 2.50390411365089e-09,
      "loss": 0.013,
      "step": 3054500
    },
    {
      "epoch": 4.998788973769827,
      "grad_norm": 0.35763779282569885,
      "learning_rate": 2.4380119001337614e-09,
      "loss": 0.0107,
      "step": 3054520
    },
    {
      "epoch": 4.9988217042084795,
      "grad_norm": 0.33622559905052185,
      "learning_rate": 2.3721196866166327e-09,
      "loss": 0.0132,
      "step": 3054540
    },
    {
      "epoch": 4.998854434647133,
      "grad_norm": 0.04926412180066109,
      "learning_rate": 2.306227473099504e-09,
      "loss": 0.0071,
      "step": 3054560
    },
    {
      "epoch": 4.998887165085787,
      "grad_norm": 0.1781860888004303,
      "learning_rate": 2.2403352595823752e-09,
      "loss": 0.01,
      "step": 3054580
    },
    {
      "epoch": 4.998919895524439,
      "grad_norm": 0.14451469480991364,
      "learning_rate": 2.1744430460652465e-09,
      "loss": 0.0079,
      "step": 3054600
    },
    {
      "epoch": 4.998952625963093,
      "grad_norm": 0.15787675976753235,
      "learning_rate": 2.108550832548118e-09,
      "loss": 0.0109,
      "step": 3054620
    },
    {
      "epoch": 4.998985356401747,
      "grad_norm": 0.1113947257399559,
      "learning_rate": 2.0426586190309895e-09,
      "loss": 0.0072,
      "step": 3054640
    },
    {
      "epoch": 4.9990180868404,
      "grad_norm": 0.18731898069381714,
      "learning_rate": 1.9767664055138603e-09,
      "loss": 0.0068,
      "step": 3054660
    },
    {
      "epoch": 4.999050817279053,
      "grad_norm": 0.33259543776512146,
      "learning_rate": 1.910874191996732e-09,
      "loss": 0.0072,
      "step": 3054680
    },
    {
      "epoch": 4.9990835477177065,
      "grad_norm": 0.25671687722206116,
      "learning_rate": 1.844981978479603e-09,
      "loss": 0.0068,
      "step": 3054700
    },
    {
      "epoch": 4.99911627815636,
      "grad_norm": 0.36367955803871155,
      "learning_rate": 1.7790897649624745e-09,
      "loss": 0.0157,
      "step": 3054720
    },
    {
      "epoch": 4.999149008595014,
      "grad_norm": 0.14779183268547058,
      "learning_rate": 1.7131975514453458e-09,
      "loss": 0.0049,
      "step": 3054740
    },
    {
      "epoch": 4.999181739033666,
      "grad_norm": 0.1452620029449463,
      "learning_rate": 1.6473053379282173e-09,
      "loss": 0.0089,
      "step": 3054760
    },
    {
      "epoch": 4.99921446947232,
      "grad_norm": 0.07405725121498108,
      "learning_rate": 1.5814131244110885e-09,
      "loss": 0.0063,
      "step": 3054780
    },
    {
      "epoch": 4.9992471999109735,
      "grad_norm": 0.19524379074573517,
      "learning_rate": 1.5155209108939596e-09,
      "loss": 0.0133,
      "step": 3054800
    },
    {
      "epoch": 4.999279930349626,
      "grad_norm": 0.6077598333358765,
      "learning_rate": 1.449628697376831e-09,
      "loss": 0.0135,
      "step": 3054820
    },
    {
      "epoch": 4.99931266078828,
      "grad_norm": 0.22663642466068268,
      "learning_rate": 1.3837364838597023e-09,
      "loss": 0.0089,
      "step": 3054840
    },
    {
      "epoch": 4.999345391226933,
      "grad_norm": 0.09186320751905441,
      "learning_rate": 1.3178442703425738e-09,
      "loss": 0.0071,
      "step": 3054860
    },
    {
      "epoch": 4.999378121665586,
      "grad_norm": 0.07401777803897858,
      "learning_rate": 1.251952056825445e-09,
      "loss": 0.0163,
      "step": 3054880
    },
    {
      "epoch": 4.99941085210424,
      "grad_norm": 0.3739292621612549,
      "learning_rate": 1.1860598433083164e-09,
      "loss": 0.0105,
      "step": 3054900
    },
    {
      "epoch": 4.999443582542893,
      "grad_norm": 0.2341659963130951,
      "learning_rate": 1.1201676297911876e-09,
      "loss": 0.0106,
      "step": 3054920
    },
    {
      "epoch": 4.999476312981547,
      "grad_norm": 0.22062651813030243,
      "learning_rate": 1.054275416274059e-09,
      "loss": 0.0096,
      "step": 3054940
    },
    {
      "epoch": 4.9995090434202,
      "grad_norm": 0.6675089597702026,
      "learning_rate": 9.883832027569302e-10,
      "loss": 0.0082,
      "step": 3054960
    },
    {
      "epoch": 4.999541773858853,
      "grad_norm": 0.33209356665611267,
      "learning_rate": 9.224909892398015e-10,
      "loss": 0.0098,
      "step": 3054980
    },
    {
      "epoch": 4.999574504297507,
      "grad_norm": 0.495362251996994,
      "learning_rate": 8.565987757226729e-10,
      "loss": 0.0092,
      "step": 3055000
    },
    {
      "epoch": 4.99960723473616,
      "grad_norm": 0.058977968990802765,
      "learning_rate": 7.907065622055443e-10,
      "loss": 0.0077,
      "step": 3055020
    },
    {
      "epoch": 4.999639965174813,
      "grad_norm": 1.2834782600402832,
      "learning_rate": 7.248143486884155e-10,
      "loss": 0.0087,
      "step": 3055040
    },
    {
      "epoch": 4.999672695613467,
      "grad_norm": 0.45841720700263977,
      "learning_rate": 6.589221351712869e-10,
      "loss": 0.0138,
      "step": 3055060
    },
    {
      "epoch": 4.99970542605212,
      "grad_norm": 0.24076008796691895,
      "learning_rate": 5.930299216541582e-10,
      "loss": 0.0066,
      "step": 3055080
    },
    {
      "epoch": 4.999738156490773,
      "grad_norm": 0.2838570475578308,
      "learning_rate": 5.271377081370295e-10,
      "loss": 0.0101,
      "step": 3055100
    },
    {
      "epoch": 4.999770886929427,
      "grad_norm": 0.016580713912844658,
      "learning_rate": 4.6124549461990076e-10,
      "loss": 0.0066,
      "step": 3055120
    },
    {
      "epoch": 4.99980361736808,
      "grad_norm": 0.5820105671882629,
      "learning_rate": 3.9535328110277214e-10,
      "loss": 0.01,
      "step": 3055140
    },
    {
      "epoch": 4.999836347806733,
      "grad_norm": 0.08986205607652664,
      "learning_rate": 3.2946106758564345e-10,
      "loss": 0.0091,
      "step": 3055160
    },
    {
      "epoch": 4.9998690782453865,
      "grad_norm": 0.1514086276292801,
      "learning_rate": 2.6356885406851477e-10,
      "loss": 0.0076,
      "step": 3055180
    },
    {
      "epoch": 4.99990180868404,
      "grad_norm": 0.24212081730365753,
      "learning_rate": 1.9767664055138607e-10,
      "loss": 0.0062,
      "step": 3055200
    },
    {
      "epoch": 4.999934539122694,
      "grad_norm": 0.12429498881101608,
      "learning_rate": 1.3178442703425739e-10,
      "loss": 0.0097,
      "step": 3055220
    },
    {
      "epoch": 4.999967269561346,
      "grad_norm": 0.7568228244781494,
      "learning_rate": 6.589221351712869e-11,
      "loss": 0.016,
      "step": 3055240
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.056983429938554764,
      "learning_rate": 0.0,
      "loss": 0.0069,
      "step": 3055260
    }
  ],
  "logging_steps": 20,
  "max_steps": 3055260,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 50000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 10,
  "trial_name": null,
  "trial_params": null
}
